diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
deleted file mode 100644
index 57e46ac69568b1efb67942dca3895632509fc32e..0000000000000000000000000000000000000000
--- a/.github/workflows/main.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-name: License Check
-
-# Controls when the workflow will run
-on:
-  pull_request:
-    branches: [ kernel5.4/master ]
-
-# A workflow run is made up of one or more jobs that can run sequentially or in parallel
-jobs:
-  check:
-    # The type of runner that the job will run on
-    runs-on: ubuntu-latest
-
-    # Steps represent a sequence of tasks that will be executed as part of the job
-    steps:
-      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
-      - uses: actions/checkout@v3
-      
-      # Setup the python environment
-      - uses: actions/setup-python@v3
-        with:
-          python-version: ${{ matrix.python-version }}
-          
-      # Check the license of files
-      - name: Check the license
-        run: python ./package/default/license_check.py
diff --git a/Documentation/Changes b/Documentation/Changes
deleted file mode 120000
index 7564ae1682bae84b10e025026dcd080e34dc98ce..0000000000000000000000000000000000000000
--- a/Documentation/Changes
+++ /dev/null
@@ -1 +0,0 @@
-process/changes.rst
\ No newline at end of file
diff --git a/Documentation/Changes b/Documentation/Changes
new file mode 100644
index 0000000000000000000000000000000000000000..7564ae1682bae84b10e025026dcd080e34dc98ce
--- /dev/null
+++ b/Documentation/Changes
@@ -0,0 +1 @@
+process/changes.rst
\ No newline at end of file
diff --git a/Documentation/EDID/1024x768.S b/Documentation/EDID/1024x768.S
deleted file mode 100644
index 4aed3f9ab88aef714feb192af32262aae7ba62c7..0000000000000000000000000000000000000000
--- a/Documentation/EDID/1024x768.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
-   1024x768.S: EDID data set for standard 1024x768 60 Hz monitor
-
-   Copyright (C) 2011 Carsten Emde <C.Emde@osadl.org>
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License
-   as published by the Free Software Foundation; either version 2
-   of the License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
-*/
-
-/* EDID */
-#define VERSION 1
-#define REVISION 3
-
-/* Display */
-#define CLOCK 65000 /* kHz */
-#define XPIX 1024
-#define YPIX 768
-#define XY_RATIO XY_RATIO_4_3
-#define XBLANK 320
-#define YBLANK 38
-#define XOFFSET 8
-#define XPULSE 144
-#define YOFFSET 3
-#define YPULSE 6
-#define DPI 72
-#define VFREQ 60 /* Hz */
-#define TIMING_NAME "Linux XGA"
-#define ESTABLISHED_TIMING2_BITS 0x08 /* Bit 3 -> 1024x768 @60 Hz */
-#define HSYNC_POL 0
-#define VSYNC_POL 0
-
-#include "edid.S"
diff --git a/Documentation/EDID/1280x1024.S b/Documentation/EDID/1280x1024.S
deleted file mode 100644
index b26dd424cad7cc77d6c1cc79af1348409390a382..0000000000000000000000000000000000000000
--- a/Documentation/EDID/1280x1024.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
-   1280x1024.S: EDID data set for standard 1280x1024 60 Hz monitor
-
-   Copyright (C) 2011 Carsten Emde <C.Emde@osadl.org>
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License
-   as published by the Free Software Foundation; either version 2
-   of the License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
-*/
-
-/* EDID */
-#define VERSION 1
-#define REVISION 3
-
-/* Display */
-#define CLOCK 108000 /* kHz */
-#define XPIX 1280
-#define YPIX 1024
-#define XY_RATIO XY_RATIO_5_4
-#define XBLANK 408
-#define YBLANK 42
-#define XOFFSET 48
-#define XPULSE 112
-#define YOFFSET 1
-#define YPULSE 3
-#define DPI 72
-#define VFREQ 60 /* Hz */
-#define TIMING_NAME "Linux SXGA"
-/* No ESTABLISHED_TIMINGx_BITS */
-#define HSYNC_POL 1
-#define VSYNC_POL 1
-
-#include "edid.S"
diff --git a/Documentation/EDID/1600x1200.S b/Documentation/EDID/1600x1200.S
deleted file mode 100644
index 0d091b282768f2bd7ac4399dbe3bd4d0814a3cb6..0000000000000000000000000000000000000000
--- a/Documentation/EDID/1600x1200.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
-   1600x1200.S: EDID data set for standard 1600x1200 60 Hz monitor
-
-   Copyright (C) 2013 Carsten Emde <C.Emde@osadl.org>
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License
-   as published by the Free Software Foundation; either version 2
-   of the License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
-*/
-
-/* EDID */
-#define VERSION 1
-#define REVISION 3
-
-/* Display */
-#define CLOCK 162000 /* kHz */
-#define XPIX 1600
-#define YPIX 1200
-#define XY_RATIO XY_RATIO_4_3
-#define XBLANK 560
-#define YBLANK 50
-#define XOFFSET 64
-#define XPULSE 192
-#define YOFFSET 1
-#define YPULSE 3
-#define DPI 72
-#define VFREQ 60 /* Hz */
-#define TIMING_NAME "Linux UXGA"
-/* No ESTABLISHED_TIMINGx_BITS */
-#define HSYNC_POL 1
-#define VSYNC_POL 1
-
-#include "edid.S"
diff --git a/Documentation/EDID/1680x1050.S b/Documentation/EDID/1680x1050.S
deleted file mode 100644
index 7dfed9a33eab6f5f5c858bd34788d63b380ab5a3..0000000000000000000000000000000000000000
--- a/Documentation/EDID/1680x1050.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
-   1680x1050.S: EDID data set for standard 1680x1050 60 Hz monitor
-
-   Copyright (C) 2012 Carsten Emde <C.Emde@osadl.org>
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License
-   as published by the Free Software Foundation; either version 2
-   of the License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
-*/
-
-/* EDID */
-#define VERSION 1
-#define REVISION 3
-
-/* Display */
-#define CLOCK 146250 /* kHz */
-#define XPIX 1680
-#define YPIX 1050
-#define XY_RATIO XY_RATIO_16_10
-#define XBLANK 560
-#define YBLANK 39
-#define XOFFSET 104
-#define XPULSE 176
-#define YOFFSET 3
-#define YPULSE 6
-#define DPI 96
-#define VFREQ 60 /* Hz */
-#define TIMING_NAME "Linux WSXGA"
-/* No ESTABLISHED_TIMINGx_BITS */
-#define HSYNC_POL 1
-#define VSYNC_POL 1
-
-#include "edid.S"
diff --git a/Documentation/EDID/1920x1080.S b/Documentation/EDID/1920x1080.S
deleted file mode 100644
index d6ffbba28e95bf1d7829887134d2ae06f0369ba6..0000000000000000000000000000000000000000
--- a/Documentation/EDID/1920x1080.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
-   1920x1080.S: EDID data set for standard 1920x1080 60 Hz monitor
-
-   Copyright (C) 2012 Carsten Emde <C.Emde@osadl.org>
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License
-   as published by the Free Software Foundation; either version 2
-   of the License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
-*/
-
-/* EDID */
-#define VERSION 1
-#define REVISION 3
-
-/* Display */
-#define CLOCK 148500 /* kHz */
-#define XPIX 1920
-#define YPIX 1080
-#define XY_RATIO XY_RATIO_16_9
-#define XBLANK 280
-#define YBLANK 45
-#define XOFFSET 88
-#define XPULSE 44
-#define YOFFSET 4
-#define YPULSE 5
-#define DPI 96
-#define VFREQ 60 /* Hz */
-#define TIMING_NAME "Linux FHD"
-/* No ESTABLISHED_TIMINGx_BITS */
-#define HSYNC_POL 1
-#define VSYNC_POL 1
-
-#include "edid.S"
diff --git a/Documentation/EDID/800x600.S b/Documentation/EDID/800x600.S
deleted file mode 100644
index a5616588de086453bb814f3a1322920ac6c42511..0000000000000000000000000000000000000000
--- a/Documentation/EDID/800x600.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
-   800x600.S: EDID data set for standard 800x600 60 Hz monitor
-
-   Copyright (C) 2011 Carsten Emde <C.Emde@osadl.org>
-   Copyright (C) 2014 Linaro Limited
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License
-   as published by the Free Software Foundation; either version 2
-   of the License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-*/
-
-/* EDID */
-#define VERSION 1
-#define REVISION 3
-
-/* Display */
-#define CLOCK 40000 /* kHz */
-#define XPIX 800
-#define YPIX 600
-#define XY_RATIO XY_RATIO_4_3
-#define XBLANK 256
-#define YBLANK 28
-#define XOFFSET 40
-#define XPULSE 128
-#define YOFFSET 1
-#define YPULSE 4
-#define DPI 72
-#define VFREQ 60 /* Hz */
-#define TIMING_NAME "Linux SVGA"
-#define ESTABLISHED_TIMING1_BITS 0x01 /* Bit 0: 800x600 @ 60Hz */
-#define HSYNC_POL 1
-#define VSYNC_POL 1
-
-#include "edid.S"
diff --git a/Documentation/EDID/edid.S b/Documentation/EDID/edid.S
deleted file mode 100644
index c3d13815526dbd64b1b6b2d54c9afd31974b7c8a..0000000000000000000000000000000000000000
--- a/Documentation/EDID/edid.S
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
-   edid.S: EDID data template
-
-   Copyright (C) 2012 Carsten Emde <C.Emde@osadl.org>
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License
-   as published by the Free Software Foundation; either version 2
-   of the License, or (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
-*/
-
-
-/* Manufacturer */
-#define MFG_LNX1 'L'
-#define MFG_LNX2 'N'
-#define MFG_LNX3 'X'
-#define SERIAL 0
-#define YEAR 2012
-#define WEEK 5
-
-/* EDID 1.3 standard definitions */
-#define XY_RATIO_16_10	0b00
-#define XY_RATIO_4_3	0b01
-#define XY_RATIO_5_4	0b10
-#define XY_RATIO_16_9	0b11
-
-/* Provide defaults for the timing bits */
-#ifndef ESTABLISHED_TIMING1_BITS
-#define ESTABLISHED_TIMING1_BITS 0x00
-#endif
-#ifndef ESTABLISHED_TIMING2_BITS
-#define ESTABLISHED_TIMING2_BITS 0x00
-#endif
-#ifndef ESTABLISHED_TIMING3_BITS
-#define ESTABLISHED_TIMING3_BITS 0x00
-#endif
-
-#define mfgname2id(v1,v2,v3) \
-	((((v1-'@')&0x1f)<<10)+(((v2-'@')&0x1f)<<5)+((v3-'@')&0x1f))
-#define swap16(v1) ((v1>>8)+((v1&0xff)<<8))
-#define lsbs2(v1,v2) (((v1&0x0f)<<4)+(v2&0x0f))
-#define msbs2(v1,v2) ((((v1>>8)&0x0f)<<4)+((v2>>8)&0x0f))
-#define msbs4(v1,v2,v3,v4) \
-	((((v1>>8)&0x03)<<6)+(((v2>>8)&0x03)<<4)+\
-	(((v3>>4)&0x03)<<2)+((v4>>4)&0x03))
-#define pixdpi2mm(pix,dpi) ((pix*25)/dpi)
-#define xsize pixdpi2mm(XPIX,DPI)
-#define ysize pixdpi2mm(YPIX,DPI)
-
-		.data
-
-/* Fixed header pattern */
-header:		.byte	0x00,0xff,0xff,0xff,0xff,0xff,0xff,0x00
-
-mfg_id:		.hword	swap16(mfgname2id(MFG_LNX1, MFG_LNX2, MFG_LNX3))
-
-prod_code:	.hword	0
-
-/* Serial number. 32 bits, little endian. */
-serial_number:	.long	SERIAL
-
-/* Week of manufacture */
-week:		.byte	WEEK
-
-/* Year of manufacture, less 1990. (1990-2245)
-   If week=255, it is the model year instead */
-year:		.byte	YEAR-1990
-
-version:	.byte	VERSION 	/* EDID version, usually 1 (for 1.3) */
-revision:	.byte	REVISION	/* EDID revision, usually 3 (for 1.3) */
-
-/* If Bit 7=1	Digital input. If set, the following bit definitions apply:
-     Bits 6-1	Reserved, must be 0
-     Bit 0	Signal is compatible with VESA DFP 1.x TMDS CRGB,
-		  1 pixel per clock, up to 8 bits per color, MSB aligned,
-   If Bit 7=0	Analog input. If clear, the following bit definitions apply:
-     Bits 6-5	Video white and sync levels, relative to blank
-		  00=+0.7/-0.3 V; 01=+0.714/-0.286 V;
-		  10=+1.0/-0.4 V; 11=+0.7/0 V
-   Bit 4	Blank-to-black setup (pedestal) expected
-   Bit 3	Separate sync supported
-   Bit 2	Composite sync (on HSync) supported
-   Bit 1	Sync on green supported
-   Bit 0	VSync pulse must be serrated when somposite or
-		  sync-on-green is used. */
-video_parms:	.byte	0x6d
-
-/* Maximum horizontal image size, in centimetres
-   (max 292 cm/115 in at 16:9 aspect ratio) */
-max_hor_size:	.byte	xsize/10
-
-/* Maximum vertical image size, in centimetres.
-   If either byte is 0, undefined (e.g. projector) */
-max_vert_size:	.byte	ysize/10
-
-/* Display gamma, minus 1, times 100 (range 1.00-3.5 */
-gamma:		.byte	120
-
-/* Bit 7	DPMS standby supported
-   Bit 6	DPMS suspend supported
-   Bit 5	DPMS active-off supported
-   Bits 4-3	Display type: 00=monochrome; 01=RGB colour;
-		  10=non-RGB multicolour; 11=undefined
-   Bit 2	Standard sRGB colour space. Bytes 25-34 must contain
-		  sRGB standard values.
-   Bit 1	Preferred timing mode specified in descriptor block 1.
-   Bit 0	GTF supported with default parameter values. */
-dsp_features:	.byte	0xea
-
-/* Chromaticity coordinates. */
-/* Red and green least-significant bits
-   Bits 7-6	Red x value least-significant 2 bits
-   Bits 5-4	Red y value least-significant 2 bits
-   Bits 3-2	Green x value lst-significant 2 bits
-   Bits 1-0	Green y value least-significant 2 bits */
-red_green_lsb:	.byte	0x5e
-
-/* Blue and white least-significant 2 bits */
-blue_white_lsb:	.byte	0xc0
-
-/* Red x value most significant 8 bits.
-   0-255 encodes 0-0.996 (255/256); 0-0.999 (1023/1024) with lsbits */
-red_x_msb:	.byte	0xa4
-
-/* Red y value most significant 8 bits */
-red_y_msb:	.byte	0x59
-
-/* Green x and y value most significant 8 bits */
-green_x_y_msb:	.byte	0x4a,0x98
-
-/* Blue x and y value most significant 8 bits */
-blue_x_y_msb:	.byte	0x25,0x20
-
-/* Default white point x and y value most significant 8 bits */
-white_x_y_msb:	.byte	0x50,0x54
-
-/* Established timings */
-/* Bit 7	720x400 @ 70 Hz
-   Bit 6	720x400 @ 88 Hz
-   Bit 5	640x480 @ 60 Hz
-   Bit 4	640x480 @ 67 Hz
-   Bit 3	640x480 @ 72 Hz
-   Bit 2	640x480 @ 75 Hz
-   Bit 1	800x600 @ 56 Hz
-   Bit 0	800x600 @ 60 Hz */
-estbl_timing1:	.byte	ESTABLISHED_TIMING1_BITS
-
-/* Bit 7	800x600 @ 72 Hz
-   Bit 6	800x600 @ 75 Hz
-   Bit 5	832x624 @ 75 Hz
-   Bit 4	1024x768 @ 87 Hz, interlaced (1024x768)
-   Bit 3	1024x768 @ 60 Hz
-   Bit 2	1024x768 @ 72 Hz
-   Bit 1	1024x768 @ 75 Hz
-   Bit 0	1280x1024 @ 75 Hz */
-estbl_timing2:	.byte	ESTABLISHED_TIMING2_BITS
-
-/* Bit 7	1152x870 @ 75 Hz (Apple Macintosh II)
-   Bits 6-0 	Other manufacturer-specific display mod */
-estbl_timing3:	.byte	ESTABLISHED_TIMING3_BITS
-
-/* Standard timing */
-/* X resolution, less 31, divided by 8 (256-2288 pixels) */
-std_xres:	.byte	(XPIX/8)-31
-/* Y resolution, X:Y pixel ratio
-   Bits 7-6	X:Y pixel ratio: 00=16:10; 01=4:3; 10=5:4; 11=16:9.
-   Bits 5-0	Vertical frequency, less 60 (60-123 Hz) */
-std_vres:	.byte	(XY_RATIO<<6)+VFREQ-60
-		.fill	7,2,0x0101	/* Unused */
-
-descriptor1:
-/* Pixel clock in 10 kHz units. (0.-655.35 MHz, little-endian) */
-clock:		.hword	CLOCK/10
-
-/* Horizontal active pixels 8 lsbits (0-4095) */
-x_act_lsb:	.byte	XPIX&0xff
-/* Horizontal blanking pixels 8 lsbits (0-4095)
-   End of active to start of next active. */
-x_blk_lsb:	.byte	XBLANK&0xff
-/* Bits 7-4 	Horizontal active pixels 4 msbits
-   Bits 3-0	Horizontal blanking pixels 4 msbits */
-x_msbs:		.byte	msbs2(XPIX,XBLANK)
-
-/* Vertical active lines 8 lsbits (0-4095) */
-y_act_lsb:	.byte	YPIX&0xff
-/* Vertical blanking lines 8 lsbits (0-4095) */
-y_blk_lsb:	.byte	YBLANK&0xff
-/* Bits 7-4 	Vertical active lines 4 msbits
-   Bits 3-0 	Vertical blanking lines 4 msbits */
-y_msbs:		.byte	msbs2(YPIX,YBLANK)
-
-/* Horizontal sync offset pixels 8 lsbits (0-1023) From blanking start */
-x_snc_off_lsb:	.byte	XOFFSET&0xff
-/* Horizontal sync pulse width pixels 8 lsbits (0-1023) */
-x_snc_pls_lsb:	.byte	XPULSE&0xff
-/* Bits 7-4 	Vertical sync offset lines 4 lsbits (0-63)
-   Bits 3-0 	Vertical sync pulse width lines 4 lsbits (0-63) */
-y_snc_lsb:	.byte	lsbs2(YOFFSET, YPULSE)
-/* Bits 7-6 	Horizontal sync offset pixels 2 msbits
-   Bits 5-4 	Horizontal sync pulse width pixels 2 msbits
-   Bits 3-2 	Vertical sync offset lines 2 msbits
-   Bits 1-0 	Vertical sync pulse width lines 2 msbits */
-xy_snc_msbs:	.byte	msbs4(XOFFSET,XPULSE,YOFFSET,YPULSE)
-
-/* Horizontal display size, mm, 8 lsbits (0-4095 mm, 161 in) */
-x_dsp_size:	.byte	xsize&0xff
-
-/* Vertical display size, mm, 8 lsbits (0-4095 mm, 161 in) */
-y_dsp_size:	.byte	ysize&0xff
-
-/* Bits 7-4 	Horizontal display size, mm, 4 msbits
-   Bits 3-0 	Vertical display size, mm, 4 msbits */
-dsp_size_mbsb:	.byte	msbs2(xsize,ysize)
-
-/* Horizontal border pixels (each side; total is twice this) */
-x_border:	.byte	0
-/* Vertical border lines (each side; total is twice this) */
-y_border:	.byte	0
-
-/* Bit 7 	Interlaced
-   Bits 6-5 	Stereo mode: 00=No stereo; other values depend on bit 0:
-   Bit 0=0: 01=Field sequential, sync=1 during right; 10=similar,
-     sync=1 during left; 11=4-way interleaved stereo
-   Bit 0=1 2-way interleaved stereo: 01=Right image on even lines;
-     10=Left image on even lines; 11=side-by-side
-   Bits 4-3 	Sync type: 00=Analog composite; 01=Bipolar analog composite;
-     10=Digital composite (on HSync); 11=Digital separate
-   Bit 2 	If digital separate: Vertical sync polarity (1=positive)
-   Other types: VSync serrated (HSync during VSync)
-   Bit 1 	If analog sync: Sync on all 3 RGB lines (else green only)
-   Digital: HSync polarity (1=positive)
-   Bit 0 	2-way line-interleaved stereo, if bits 4-3 are not 00. */
-features:	.byte	0x18+(VSYNC_POL<<2)+(HSYNC_POL<<1)
-
-descriptor2:	.byte	0,0	/* Not a detailed timing descriptor */
-		.byte	0	/* Must be zero */
-		.byte	0xff	/* Descriptor is monitor serial number (text) */
-		.byte	0	/* Must be zero */
-start1:		.ascii	"Linux #0"
-end1:		.byte	0x0a	/* End marker */
-		.fill	12-(end1-start1), 1, 0x20 /* Padded spaces */
-descriptor3:	.byte	0,0	/* Not a detailed timing descriptor */
-		.byte	0	/* Must be zero */
-		.byte	0xfd	/* Descriptor is monitor range limits */
-		.byte	0	/* Must be zero */
-start2:		.byte	VFREQ-1	/* Minimum vertical field rate (1-255 Hz) */
-		.byte	VFREQ+1	/* Maximum vertical field rate (1-255 Hz) */
-		.byte	(CLOCK/(XPIX+XBLANK))-1 /* Minimum horizontal line rate
-						    (1-255 kHz) */
-		.byte	(CLOCK/(XPIX+XBLANK))+1 /* Maximum horizontal line rate
-						    (1-255 kHz) */
-		.byte	(CLOCK/10000)+1	/* Maximum pixel clock rate, rounded up
-					   to 10 MHz multiple (10-2550 MHz) */
-		.byte	0	/* No extended timing information type */
-end2:		.byte	0x0a	/* End marker */
-		.fill	12-(end2-start2), 1, 0x20 /* Padded spaces */
-descriptor4:	.byte	0,0	/* Not a detailed timing descriptor */
-		.byte	0	/* Must be zero */
-		.byte	0xfc	/* Descriptor is text */
-		.byte	0	/* Must be zero */
-start3:		.ascii	TIMING_NAME
-end3:		.byte	0x0a	/* End marker */
-		.fill	12-(end3-start3), 1, 0x20 /* Padded spaces */
-extensions:	.byte	0	/* Number of extensions to follow */
-checksum:	.byte	CRC	/* Sum of all bytes must be 0 */
diff --git a/Documentation/admin-guide/cifs/winucase_convert.pl b/Documentation/admin-guide/cifs/winucase_convert.pl
old mode 100755
new mode 100644
diff --git a/Documentation/arm/samsung/clksrc-change-registers.awk b/Documentation/arm/samsung/clksrc-change-registers.awk
old mode 100755
new mode 100644
diff --git a/Documentation/devicetree/bindings/sound/rt1308.txt b/Documentation/devicetree/bindings/sound/rt1308.txt
old mode 100755
new mode 100644
diff --git a/Documentation/features/list-arch.sh b/Documentation/features/list-arch.sh
old mode 100755
new mode 100644
diff --git a/Documentation/features/scripts/features-refresh.sh b/Documentation/features/scripts/features-refresh.sh
old mode 100755
new mode 100644
diff --git a/Documentation/sound/cards/multisound.sh b/Documentation/sound/cards/multisound.sh
old mode 100755
new mode 100644
diff --git a/Documentation/sphinx/kernel_include.py b/Documentation/sphinx/kernel_include.py
old mode 100755
new mode 100644
diff --git a/Documentation/sphinx/parse-headers.pl b/Documentation/sphinx/parse-headers.pl
old mode 100755
new mode 100644
diff --git a/Documentation/sphinx/rstFlatTable.py b/Documentation/sphinx/rstFlatTable.py
old mode 100755
new mode 100644
diff --git a/Documentation/target/target-export-device b/Documentation/target/target-export-device
old mode 100755
new mode 100644
diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py
old mode 100755
new mode 100644
diff --git a/arch/alpha/boot/head.S b/arch/alpha/boot/head.S
deleted file mode 100644
index 06a7c95fe9ad05878678bc707ef63f02d1773867..0000000000000000000000000000000000000000
--- a/arch/alpha/boot/head.S
+++ /dev/null
@@ -1,124 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/boot/head.S
- *
- * initial bootloader stuff..
- */
-
-#include <asm/pal.h>
-
-	.set noreorder
-	.globl	__start
-	.ent	__start
-__start:
-	br	$29,2f
-2:	ldgp	$29,0($29)
-	jsr	$26,start_kernel
-	call_pal PAL_halt
-	.end __start
-
-	.align 5
-	.globl	wrent
-	.ent	wrent
-wrent:
-	.prologue 0
-	call_pal PAL_wrent
-	ret ($26)
-	.end wrent
-
-	.align 5
-	.globl	wrkgp
-	.ent	wrkgp
-wrkgp:
-	.prologue 0
-	call_pal PAL_wrkgp
-	ret ($26)
-	.end wrkgp
-
-	.align 5
-	.globl	switch_to_osf_pal
-	.ent	switch_to_osf_pal
-switch_to_osf_pal:
-	subq	$30,128,$30
-	.frame	$30,128,$26
-	stq	$26,0($30)
-	stq	$1,8($30)
-	stq	$2,16($30)
-	stq	$3,24($30)
-	stq	$4,32($30)
-	stq	$5,40($30)
-	stq	$6,48($30)
-	stq	$7,56($30)
-	stq	$8,64($30)
-	stq	$9,72($30)
-	stq	$10,80($30)
-	stq	$11,88($30)
-	stq	$12,96($30)
-	stq	$13,104($30)
-	stq	$14,112($30)
-	stq	$15,120($30)
-	.prologue 0
-
-	stq	$30,0($17)	/* save KSP in PCB */
-
-	bis	$30,$30,$20	/* a4 = KSP */
-	br	$17,1f
-
-	ldq	$26,0($30)
-	ldq	$1,8($30)
-	ldq	$2,16($30)
-	ldq	$3,24($30)
-	ldq	$4,32($30)
-	ldq	$5,40($30)
-	ldq	$6,48($30)
-	ldq	$7,56($30)
-	ldq	$8,64($30)
-	ldq	$9,72($30)
-	ldq	$10,80($30)
-	ldq	$11,88($30)
-	ldq	$12,96($30)
-	ldq	$13,104($30)
-	ldq	$14,112($30)
-	ldq	$15,120($30)
-	addq	$30,128,$30
-	ret ($26)
-1:	call_pal PAL_swppal
-	.end	switch_to_osf_pal
-
-	.align 3
-	.globl	tbi
-	.ent	tbi
-tbi:
-	.prologue 0
-	call_pal PAL_tbi
-	ret	($26)
-	.end tbi
-
-	.align 3
-	.globl	halt
-	.ent	halt
-halt:
-	.prologue 0
-	call_pal PAL_halt
-	.end halt
-
-/* $16 - new stack page */
-	.align 3
-	.globl	move_stack
-	.ent	move_stack
-move_stack:
-	.prologue 0
-	lda	$0, 0x1fff($31)
-	and	$0, $30, $1			/* Stack offset */
-	or	$1, $16, $16			/* New stack pointer */
-	mov	$30, $1
-	mov	$16, $2
-1:	ldq	$3, 0($1)			/* Move the stack */
-	addq	$1, 8, $1
-	stq	$3, 0($2)
-	and	$0, $1, $4
-	addq	$2, 8, $2
-	bne	$4, 1b
-	mov	$16, $30
-	ret	($26)
-	.end move_stack
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
deleted file mode 100644
index 2e09248f8324258305dee38e88d1bec65fda9f00..0000000000000000000000000000000000000000
--- a/arch/alpha/kernel/entry.S
+++ /dev/null
@@ -1,852 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/kernel/entry.S
- *
- * Kernel entry-points.
- */
-
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/pal.h>
-#include <asm/errno.h>
-#include <asm/unistd.h>
-
-	.text
-	.set noat
-	.cfi_sections	.debug_frame
-
-/* Stack offsets.  */
-#define SP_OFF			184
-#define SWITCH_STACK_SIZE	320
-
-.macro	CFI_START_OSF_FRAME	func
-	.align	4
-	.globl	\func
-	.type	\func,@function
-\func:
-	.cfi_startproc simple
-	.cfi_return_column 64
-	.cfi_def_cfa	$sp, 48
-	.cfi_rel_offset	64, 8
-	.cfi_rel_offset	$gp, 16
-	.cfi_rel_offset	$16, 24
-	.cfi_rel_offset	$17, 32
-	.cfi_rel_offset	$18, 40
-.endm
-
-.macro	CFI_END_OSF_FRAME	func
-	.cfi_endproc
-	.size	\func, . - \func
-.endm
-
-/*
- * This defines the normal kernel pt-regs layout.
- *
- * regs 9-15 preserved by C code
- * regs 16-18 saved by PAL-code
- * regs 29-30 saved and set up by PAL-code
- * JRP - Save regs 16-18 in a special area of the stack, so that
- * the palcode-provided values are available to the signal handler.
- */
-
-.macro	SAVE_ALL
-	subq	$sp, SP_OFF, $sp
-	.cfi_adjust_cfa_offset	SP_OFF
-	stq	$0, 0($sp)
-	stq	$1, 8($sp)
-	stq	$2, 16($sp)
-	stq	$3, 24($sp)
-	stq	$4, 32($sp)
-	stq	$28, 144($sp)
-	.cfi_rel_offset	$0, 0
-	.cfi_rel_offset $1, 8
-	.cfi_rel_offset	$2, 16
-	.cfi_rel_offset	$3, 24
-	.cfi_rel_offset	$4, 32
-	.cfi_rel_offset	$28, 144
-	lda	$2, alpha_mv
-	stq	$5, 40($sp)
-	stq	$6, 48($sp)
-	stq	$7, 56($sp)
-	stq	$8, 64($sp)
-	stq	$19, 72($sp)
-	stq	$20, 80($sp)
-	stq	$21, 88($sp)
-	ldq	$2, HAE_CACHE($2)
-	stq	$22, 96($sp)
-	stq	$23, 104($sp)
-	stq	$24, 112($sp)
-	stq	$25, 120($sp)
-	stq	$26, 128($sp)
-	stq	$27, 136($sp)
-	stq	$2, 152($sp)
-	stq	$16, 160($sp)
-	stq	$17, 168($sp)
-	stq	$18, 176($sp)
-	.cfi_rel_offset	$5, 40
-	.cfi_rel_offset	$6, 48
-	.cfi_rel_offset	$7, 56
-	.cfi_rel_offset	$8, 64
-	.cfi_rel_offset $19, 72
-	.cfi_rel_offset	$20, 80
-	.cfi_rel_offset	$21, 88
-	.cfi_rel_offset $22, 96
-	.cfi_rel_offset	$23, 104
-	.cfi_rel_offset	$24, 112
-	.cfi_rel_offset	$25, 120
-	.cfi_rel_offset	$26, 128
-	.cfi_rel_offset	$27, 136
-.endm
-
-.macro	RESTORE_ALL
-	lda	$19, alpha_mv
-	ldq	$0, 0($sp)
-	ldq	$1, 8($sp)
-	ldq	$2, 16($sp)
-	ldq	$3, 24($sp)
-	ldq	$21, 152($sp)
-	ldq	$20, HAE_CACHE($19)
-	ldq	$4, 32($sp)
-	ldq	$5, 40($sp)
-	ldq	$6, 48($sp)
-	ldq	$7, 56($sp)
-	subq	$20, $21, $20
-	ldq	$8, 64($sp)
-	beq	$20, 99f
-	ldq	$20, HAE_REG($19)
-	stq	$21, HAE_CACHE($19)
-	stq	$21, 0($20)
-99:	ldq	$19, 72($sp)
-	ldq	$20, 80($sp)
-	ldq	$21, 88($sp)
-	ldq	$22, 96($sp)
-	ldq	$23, 104($sp)
-	ldq	$24, 112($sp)
-	ldq	$25, 120($sp)
-	ldq	$26, 128($sp)
-	ldq	$27, 136($sp)
-	ldq	$28, 144($sp)
-	addq	$sp, SP_OFF, $sp
-	.cfi_restore	$0
-	.cfi_restore	$1
-	.cfi_restore	$2
-	.cfi_restore	$3
-	.cfi_restore	$4
-	.cfi_restore	$5
-	.cfi_restore	$6
-	.cfi_restore	$7
-	.cfi_restore	$8
-	.cfi_restore	$19
-	.cfi_restore	$20
-	.cfi_restore	$21
-	.cfi_restore	$22
-	.cfi_restore	$23
-	.cfi_restore	$24
-	.cfi_restore	$25
-	.cfi_restore	$26
-	.cfi_restore	$27
-	.cfi_restore	$28
-	.cfi_adjust_cfa_offset	-SP_OFF
-.endm
-
-.macro	DO_SWITCH_STACK
-	bsr	$1, do_switch_stack
-	.cfi_adjust_cfa_offset	SWITCH_STACK_SIZE
-	.cfi_rel_offset	$9, 0
-	.cfi_rel_offset	$10, 8
-	.cfi_rel_offset	$11, 16
-	.cfi_rel_offset	$12, 24
-	.cfi_rel_offset	$13, 32
-	.cfi_rel_offset	$14, 40
-	.cfi_rel_offset	$15, 48
-	/* We don't really care about the FP registers for debugging.  */
-.endm
-
-.macro	UNDO_SWITCH_STACK
-	bsr	$1, undo_switch_stack
-	.cfi_restore	$9
-	.cfi_restore	$10
-	.cfi_restore	$11
-	.cfi_restore	$12
-	.cfi_restore	$13
-	.cfi_restore	$14
-	.cfi_restore	$15
-	.cfi_adjust_cfa_offset	-SWITCH_STACK_SIZE
-.endm
-
-/*
- * Non-syscall kernel entry points.
- */
-
-CFI_START_OSF_FRAME entInt
-	SAVE_ALL
-	lda	$8, 0x3fff
-	lda	$26, ret_from_sys_call
-	bic	$sp, $8, $8
-	mov	$sp, $19
-	jsr	$31, do_entInt
-CFI_END_OSF_FRAME entInt
-
-CFI_START_OSF_FRAME entArith
-	SAVE_ALL
-	lda	$8, 0x3fff
-	lda	$26, ret_from_sys_call
-	bic	$sp, $8, $8
-	mov	$sp, $18
-	jsr	$31, do_entArith
-CFI_END_OSF_FRAME entArith
-
-CFI_START_OSF_FRAME entMM
-	SAVE_ALL
-/* save $9 - $15 so the inline exception code can manipulate them.  */
-	subq	$sp, 56, $sp
-	.cfi_adjust_cfa_offset	56
-	stq	$9, 0($sp)
-	stq	$10, 8($sp)
-	stq	$11, 16($sp)
-	stq	$12, 24($sp)
-	stq	$13, 32($sp)
-	stq	$14, 40($sp)
-	stq	$15, 48($sp)
-	.cfi_rel_offset	$9, 0
-	.cfi_rel_offset	$10, 8
-	.cfi_rel_offset	$11, 16
-	.cfi_rel_offset	$12, 24
-	.cfi_rel_offset	$13, 32
-	.cfi_rel_offset	$14, 40
-	.cfi_rel_offset	$15, 48
-	addq	$sp, 56, $19
-/* handle the fault */
-	lda	$8, 0x3fff
-	bic	$sp, $8, $8
-	jsr	$26, do_page_fault
-/* reload the registers after the exception code played.  */
-	ldq	$9, 0($sp)
-	ldq	$10, 8($sp)
-	ldq	$11, 16($sp)
-	ldq	$12, 24($sp)
-	ldq	$13, 32($sp)
-	ldq	$14, 40($sp)
-	ldq	$15, 48($sp)
-	addq	$sp, 56, $sp
-	.cfi_restore	$9
-	.cfi_restore	$10
-	.cfi_restore	$11
-	.cfi_restore	$12
-	.cfi_restore	$13
-	.cfi_restore	$14
-	.cfi_restore	$15
-	.cfi_adjust_cfa_offset	-56
-/* finish up the syscall as normal.  */
-	br	ret_from_sys_call
-CFI_END_OSF_FRAME entMM
-
-CFI_START_OSF_FRAME entIF
-	SAVE_ALL
-	lda	$8, 0x3fff
-	lda	$26, ret_from_sys_call
-	bic	$sp, $8, $8
-	mov	$sp, $17
-	jsr	$31, do_entIF
-CFI_END_OSF_FRAME entIF
-
-CFI_START_OSF_FRAME entUna
-	lda	$sp, -256($sp)
-	.cfi_adjust_cfa_offset	256
-	stq	$0, 0($sp)
-	.cfi_rel_offset	$0, 0
-	.cfi_remember_state
-	ldq	$0, 256($sp)	/* get PS */
-	stq	$1, 8($sp)
-	stq	$2, 16($sp)
-	stq	$3, 24($sp)
-	and	$0, 8, $0		/* user mode? */
-	stq	$4, 32($sp)
-	bne	$0, entUnaUser	/* yup -> do user-level unaligned fault */
-	stq	$5, 40($sp)
-	stq	$6, 48($sp)
-	stq	$7, 56($sp)
-	stq	$8, 64($sp)
-	stq	$9, 72($sp)
-	stq	$10, 80($sp)
-	stq	$11, 88($sp)
-	stq	$12, 96($sp)
-	stq	$13, 104($sp)
-	stq	$14, 112($sp)
-	stq	$15, 120($sp)
-	/* 16-18 PAL-saved */
-	stq	$19, 152($sp)
-	stq	$20, 160($sp)
-	stq	$21, 168($sp)
-	stq	$22, 176($sp)
-	stq	$23, 184($sp)
-	stq	$24, 192($sp)
-	stq	$25, 200($sp)
-	stq	$26, 208($sp)
-	stq	$27, 216($sp)
-	stq	$28, 224($sp)
-	mov	$sp, $19
-	stq	$gp, 232($sp)
-	.cfi_rel_offset	$1, 1*8
-	.cfi_rel_offset	$2, 2*8
-	.cfi_rel_offset	$3, 3*8
-	.cfi_rel_offset	$4, 4*8
-	.cfi_rel_offset	$5, 5*8
-	.cfi_rel_offset	$6, 6*8
-	.cfi_rel_offset	$7, 7*8
-	.cfi_rel_offset	$8, 8*8
-	.cfi_rel_offset	$9, 9*8
-	.cfi_rel_offset	$10, 10*8
-	.cfi_rel_offset	$11, 11*8
-	.cfi_rel_offset	$12, 12*8
-	.cfi_rel_offset	$13, 13*8
-	.cfi_rel_offset	$14, 14*8
-	.cfi_rel_offset	$15, 15*8
-	.cfi_rel_offset	$19, 19*8
-	.cfi_rel_offset	$20, 20*8
-	.cfi_rel_offset	$21, 21*8
-	.cfi_rel_offset	$22, 22*8
-	.cfi_rel_offset	$23, 23*8
-	.cfi_rel_offset	$24, 24*8
-	.cfi_rel_offset	$25, 25*8
-	.cfi_rel_offset	$26, 26*8
-	.cfi_rel_offset	$27, 27*8
-	.cfi_rel_offset	$28, 28*8
-	.cfi_rel_offset	$29, 29*8
-	lda	$8, 0x3fff
-	stq	$31, 248($sp)
-	bic	$sp, $8, $8
-	jsr	$26, do_entUna
-	ldq	$0, 0($sp)
-	ldq	$1, 8($sp)
-	ldq	$2, 16($sp)
-	ldq	$3, 24($sp)
-	ldq	$4, 32($sp)
-	ldq	$5, 40($sp)
-	ldq	$6, 48($sp)
-	ldq	$7, 56($sp)
-	ldq	$8, 64($sp)
-	ldq	$9, 72($sp)
-	ldq	$10, 80($sp)
-	ldq	$11, 88($sp)
-	ldq	$12, 96($sp)
-	ldq	$13, 104($sp)
-	ldq	$14, 112($sp)
-	ldq	$15, 120($sp)
-	/* 16-18 PAL-saved */
-	ldq	$19, 152($sp)
-	ldq	$20, 160($sp)
-	ldq	$21, 168($sp)
-	ldq	$22, 176($sp)
-	ldq	$23, 184($sp)
-	ldq	$24, 192($sp)
-	ldq	$25, 200($sp)
-	ldq	$26, 208($sp)
-	ldq	$27, 216($sp)
-	ldq	$28, 224($sp)
-	ldq	$gp, 232($sp)
-	lda	$sp, 256($sp)
-	.cfi_restore	$1
-	.cfi_restore	$2
-	.cfi_restore	$3
-	.cfi_restore	$4
-	.cfi_restore	$5
-	.cfi_restore	$6
-	.cfi_restore	$7
-	.cfi_restore	$8
-	.cfi_restore	$9
-	.cfi_restore	$10
-	.cfi_restore	$11
-	.cfi_restore	$12
-	.cfi_restore	$13
-	.cfi_restore	$14
-	.cfi_restore	$15
-	.cfi_restore	$19
-	.cfi_restore	$20
-	.cfi_restore	$21
-	.cfi_restore	$22
-	.cfi_restore	$23
-	.cfi_restore	$24
-	.cfi_restore	$25
-	.cfi_restore	$26
-	.cfi_restore	$27
-	.cfi_restore	$28
-	.cfi_restore	$29
-	.cfi_adjust_cfa_offset	-256
-	call_pal PAL_rti
-
-	.align	4
-entUnaUser:
-	.cfi_restore_state
-	ldq	$0, 0($sp)	/* restore original $0 */
-	lda	$sp, 256($sp)	/* pop entUna's stack frame */
-	.cfi_restore	$0
-	.cfi_adjust_cfa_offset	-256
-	SAVE_ALL		/* setup normal kernel stack */
-	lda	$sp, -56($sp)
-	.cfi_adjust_cfa_offset	56
-	stq	$9, 0($sp)
-	stq	$10, 8($sp)
-	stq	$11, 16($sp)
-	stq	$12, 24($sp)
-	stq	$13, 32($sp)
-	stq	$14, 40($sp)
-	stq	$15, 48($sp)
-	.cfi_rel_offset	$9, 0
-	.cfi_rel_offset	$10, 8
-	.cfi_rel_offset	$11, 16
-	.cfi_rel_offset	$12, 24
-	.cfi_rel_offset	$13, 32
-	.cfi_rel_offset	$14, 40
-	.cfi_rel_offset	$15, 48
-	lda	$8, 0x3fff
-	addq	$sp, 56, $19
-	bic	$sp, $8, $8
-	jsr	$26, do_entUnaUser
-	ldq	$9, 0($sp)
-	ldq	$10, 8($sp)
-	ldq	$11, 16($sp)
-	ldq	$12, 24($sp)
-	ldq	$13, 32($sp)
-	ldq	$14, 40($sp)
-	ldq	$15, 48($sp)
-	lda	$sp, 56($sp)
-	.cfi_restore	$9
-	.cfi_restore	$10
-	.cfi_restore	$11
-	.cfi_restore	$12
-	.cfi_restore	$13
-	.cfi_restore	$14
-	.cfi_restore	$15
-	.cfi_adjust_cfa_offset	-56
-	br	ret_from_sys_call
-CFI_END_OSF_FRAME entUna
-
-CFI_START_OSF_FRAME entDbg
-	SAVE_ALL
-	lda	$8, 0x3fff
-	lda	$26, ret_from_sys_call
-	bic	$sp, $8, $8
-	mov	$sp, $16
-	jsr	$31, do_entDbg
-CFI_END_OSF_FRAME entDbg
-
-/*
- * The system call entry point is special.  Most importantly, it looks
- * like a function call to userspace as far as clobbered registers.  We
- * do preserve the argument registers (for syscall restarts) and $26
- * (for leaf syscall functions).
- *
- * So much for theory.  We don't take advantage of this yet.
- *
- * Note that a0-a2 are not saved by PALcode as with the other entry points.
- */
-
-	.align	4
-	.globl	entSys
-	.type	entSys, @function
-	.cfi_startproc simple
-	.cfi_return_column 64
-	.cfi_def_cfa	$sp, 48
-	.cfi_rel_offset	64, 8
-	.cfi_rel_offset	$gp, 16
-entSys:
-	SAVE_ALL
-	lda	$8, 0x3fff
-	bic	$sp, $8, $8
-	lda	$4, NR_SYSCALLS($31)
-	stq	$16, SP_OFF+24($sp)
-	lda	$5, sys_call_table
-	lda	$27, sys_ni_syscall
-	cmpult	$0, $4, $4
-	ldl	$3, TI_FLAGS($8)
-	stq	$17, SP_OFF+32($sp)
-	s8addq	$0, $5, $5
-	stq	$18, SP_OFF+40($sp)
-	.cfi_rel_offset	$16, SP_OFF+24
-	.cfi_rel_offset	$17, SP_OFF+32
-	.cfi_rel_offset	$18, SP_OFF+40
-#ifdef CONFIG_AUDITSYSCALL
-	lda     $6, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
-	and     $3, $6, $3
-#endif
-	bne     $3, strace
-	beq	$4, 1f
-	ldq	$27, 0($5)
-1:	jsr	$26, ($27), sys_ni_syscall
-	ldgp	$gp, 0($26)
-	blt	$0, $syscall_error	/* the call failed */
-	stq	$0, 0($sp)
-	stq	$31, 72($sp)		/* a3=0 => no error */
-
-	.align	4
-	.globl	ret_from_sys_call
-ret_from_sys_call:
-	cmovne	$26, 0, $18		/* $18 = 0 => non-restartable */
-	ldq	$0, SP_OFF($sp)
-	and	$0, 8, $0
-	beq	$0, ret_to_kernel
-ret_to_user:
-	/* Make sure need_resched and sigpending don't change between
-		sampling and the rti.  */
-	lda	$16, 7
-	call_pal PAL_swpipl
-	ldl	$17, TI_FLAGS($8)
-	and	$17, _TIF_WORK_MASK, $2
-	bne	$2, work_pending
-restore_all:
-	.cfi_remember_state
-	RESTORE_ALL
-	call_pal PAL_rti
-
-ret_to_kernel:
-	.cfi_restore_state
-	lda	$16, 7
-	call_pal PAL_swpipl
-	br restore_all
-
-	.align 3
-$syscall_error:
-	/*
-	 * Some system calls (e.g., ptrace) can return arbitrary
-	 * values which might normally be mistaken as error numbers.
-	 * Those functions must zero $0 (v0) directly in the stack
-	 * frame to indicate that a negative return value wasn't an
-	 * error number..
-	 */
-	ldq	$18, 0($sp)	/* old syscall nr (zero if success) */
-	beq	$18, $ret_success
-
-	ldq	$19, 72($sp)	/* .. and this a3 */
-	subq	$31, $0, $0	/* with error in v0 */
-	addq	$31, 1, $1	/* set a3 for errno return */
-	stq	$0, 0($sp)
-	mov	$31, $26	/* tell "ret_from_sys_call" we can restart */
-	stq	$1, 72($sp)	/* a3 for return */
-	br	ret_from_sys_call
-
-$ret_success:
-	stq	$0, 0($sp)
-	stq	$31, 72($sp)	/* a3=0 => no error */
-	br	ret_from_sys_call
-
-/*
- * Do all cleanup when returning from all interrupts and system calls.
- *
- * Arguments:
- *       $8: current.
- *      $17: TI_FLAGS.
- *      $18: The old syscall number, or zero if this is not a return
- *           from a syscall that errored and is possibly restartable.
- *      $19: The old a3 value
- */
-
-	.align	4
-	.type	work_pending, @function
-work_pending:
-	and	$17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING, $2
-	bne	$2, $work_notifysig
-
-$work_resched:
-	/*
-	 * We can get here only if we returned from syscall without SIGPENDING
-	 * or got through work_notifysig already.  Either case means no syscall
-	 * restarts for us, so let $18 and $19 burn.
-	 */
-	jsr	$26, schedule
-	mov	0, $18
-	br	ret_to_user
-
-$work_notifysig:
-	mov	$sp, $16
-	DO_SWITCH_STACK
-	jsr	$26, do_work_pending
-	UNDO_SWITCH_STACK
-	br	restore_all
-
-/*
- * PTRACE syscall handler
- */
-
-	.align	4
-	.type	strace, @function
-strace:
-	/* set up signal stack, call syscall_trace */
-	DO_SWITCH_STACK
-	jsr	$26, syscall_trace_enter /* returns the syscall number */
-	UNDO_SWITCH_STACK
-
-	/* get the arguments back.. */
-	ldq	$16, SP_OFF+24($sp)
-	ldq	$17, SP_OFF+32($sp)
-	ldq	$18, SP_OFF+40($sp)
-	ldq	$19, 72($sp)
-	ldq	$20, 80($sp)
-	ldq	$21, 88($sp)
-
-	/* get the system call pointer.. */
-	lda	$1, NR_SYSCALLS($31)
-	lda	$2, sys_call_table
-	lda	$27, sys_ni_syscall
-	cmpult	$0, $1, $1
-	s8addq	$0, $2, $2
-	beq	$1, 1f
-	ldq	$27, 0($2)
-1:	jsr	$26, ($27), sys_gettimeofday
-ret_from_straced:
-	ldgp	$gp, 0($26)
-
-	/* check return.. */
-	blt	$0, $strace_error	/* the call failed */
-	stq	$31, 72($sp)		/* a3=0 => no error */
-$strace_success:
-	stq	$0, 0($sp)		/* save return value */
-
-	DO_SWITCH_STACK
-	jsr	$26, syscall_trace_leave
-	UNDO_SWITCH_STACK
-	br	$31, ret_from_sys_call
-
-	.align	3
-$strace_error:
-	ldq	$18, 0($sp)	/* old syscall nr (zero if success) */
-	beq	$18, $strace_success
-	ldq	$19, 72($sp)	/* .. and this a3 */
-
-	subq	$31, $0, $0	/* with error in v0 */
-	addq	$31, 1, $1	/* set a3 for errno return */
-	stq	$0, 0($sp)
-	stq	$1, 72($sp)	/* a3 for return */
-
-	DO_SWITCH_STACK
-	mov	$18, $9		/* save old syscall number */
-	mov	$19, $10	/* save old a3 */
-	jsr	$26, syscall_trace_leave
-	mov	$9, $18
-	mov	$10, $19
-	UNDO_SWITCH_STACK
-
-	mov	$31, $26	/* tell "ret_from_sys_call" we can restart */
-	br	ret_from_sys_call
-CFI_END_OSF_FRAME entSys
-
-/*
- * Save and restore the switch stack -- aka the balance of the user context.
- */
-
-	.align	4
-	.type	do_switch_stack, @function
-	.cfi_startproc simple
-	.cfi_return_column 64
-	.cfi_def_cfa $sp, 0
-	.cfi_register 64, $1
-do_switch_stack:
-	lda	$sp, -SWITCH_STACK_SIZE($sp)
-	.cfi_adjust_cfa_offset	SWITCH_STACK_SIZE
-	stq	$9, 0($sp)
-	stq	$10, 8($sp)
-	stq	$11, 16($sp)
-	stq	$12, 24($sp)
-	stq	$13, 32($sp)
-	stq	$14, 40($sp)
-	stq	$15, 48($sp)
-	stq	$26, 56($sp)
-	stt	$f0, 64($sp)
-	stt	$f1, 72($sp)
-	stt	$f2, 80($sp)
-	stt	$f3, 88($sp)
-	stt	$f4, 96($sp)
-	stt	$f5, 104($sp)
-	stt	$f6, 112($sp)
-	stt	$f7, 120($sp)
-	stt	$f8, 128($sp)
-	stt	$f9, 136($sp)
-	stt	$f10, 144($sp)
-	stt	$f11, 152($sp)
-	stt	$f12, 160($sp)
-	stt	$f13, 168($sp)
-	stt	$f14, 176($sp)
-	stt	$f15, 184($sp)
-	stt	$f16, 192($sp)
-	stt	$f17, 200($sp)
-	stt	$f18, 208($sp)
-	stt	$f19, 216($sp)
-	stt	$f20, 224($sp)
-	stt	$f21, 232($sp)
-	stt	$f22, 240($sp)
-	stt	$f23, 248($sp)
-	stt	$f24, 256($sp)
-	stt	$f25, 264($sp)
-	stt	$f26, 272($sp)
-	stt	$f27, 280($sp)
-	mf_fpcr	$f0		# get fpcr
-	stt	$f28, 288($sp)
-	stt	$f29, 296($sp)
-	stt	$f30, 304($sp)
-	stt	$f0, 312($sp)	# save fpcr in slot of $f31
-	ldt	$f0, 64($sp)	# dont let "do_switch_stack" change fp state.
-	ret	$31, ($1), 1
-	.cfi_endproc
-	.size	do_switch_stack, .-do_switch_stack
-
-	.align	4
-	.type	undo_switch_stack, @function
-	.cfi_startproc simple
-	.cfi_def_cfa $sp, 0
-	.cfi_register 64, $1
-undo_switch_stack:
-	ldq	$9, 0($sp)
-	ldq	$10, 8($sp)
-	ldq	$11, 16($sp)
-	ldq	$12, 24($sp)
-	ldq	$13, 32($sp)
-	ldq	$14, 40($sp)
-	ldq	$15, 48($sp)
-	ldq	$26, 56($sp)
-	ldt	$f30, 312($sp)	# get saved fpcr
-	ldt	$f0, 64($sp)
-	ldt	$f1, 72($sp)
-	ldt	$f2, 80($sp)
-	ldt	$f3, 88($sp)
-	mt_fpcr	$f30		# install saved fpcr
-	ldt	$f4, 96($sp)
-	ldt	$f5, 104($sp)
-	ldt	$f6, 112($sp)
-	ldt	$f7, 120($sp)
-	ldt	$f8, 128($sp)
-	ldt	$f9, 136($sp)
-	ldt	$f10, 144($sp)
-	ldt	$f11, 152($sp)
-	ldt	$f12, 160($sp)
-	ldt	$f13, 168($sp)
-	ldt	$f14, 176($sp)
-	ldt	$f15, 184($sp)
-	ldt	$f16, 192($sp)
-	ldt	$f17, 200($sp)
-	ldt	$f18, 208($sp)
-	ldt	$f19, 216($sp)
-	ldt	$f20, 224($sp)
-	ldt	$f21, 232($sp)
-	ldt	$f22, 240($sp)
-	ldt	$f23, 248($sp)
-	ldt	$f24, 256($sp)
-	ldt	$f25, 264($sp)
-	ldt	$f26, 272($sp)
-	ldt	$f27, 280($sp)
-	ldt	$f28, 288($sp)
-	ldt	$f29, 296($sp)
-	ldt	$f30, 304($sp)
-	lda	$sp, SWITCH_STACK_SIZE($sp)
-	ret	$31, ($1), 1
-	.cfi_endproc
-	.size	undo_switch_stack, .-undo_switch_stack
-
-/*
- * The meat of the context switch code.
- */
-
-	.align	4
-	.globl	alpha_switch_to
-	.type	alpha_switch_to, @function
-	.cfi_startproc
-alpha_switch_to:
-	DO_SWITCH_STACK
-	call_pal PAL_swpctx
-	lda	$8, 0x3fff
-	UNDO_SWITCH_STACK
-	bic	$sp, $8, $8
-	mov	$17, $0
-	ret
-	.cfi_endproc
-	.size	alpha_switch_to, .-alpha_switch_to
-
-/*
- * New processes begin life here.
- */
-
-	.globl	ret_from_fork
-	.align	4
-	.ent	ret_from_fork
-ret_from_fork:
-	lda	$26, ret_from_sys_call
-	mov	$17, $16
-	jmp	$31, schedule_tail
-.end ret_from_fork
-
-/*
- * ... and new kernel threads - here
- */
-	.align 4
-	.globl	ret_from_kernel_thread
-	.ent	ret_from_kernel_thread
-ret_from_kernel_thread:
-	mov	$17, $16
-	jsr	$26, schedule_tail
-	mov	$9, $27
-	mov	$10, $16
-	jsr	$26, ($9)
-	br	$31, ret_to_user
-.end ret_from_kernel_thread
-
-
-/*
- * Special system calls.  Most of these are special in that they either
- * have to play switch_stack games.
- */
-
-.macro	fork_like name
-	.align	4
-	.globl	alpha_\name
-	.ent	alpha_\name
-alpha_\name:
-	.prologue 0
-	bsr	$1, do_switch_stack
-	jsr	$26, sys_\name
-	ldq	$26, 56($sp)
-	lda	$sp, SWITCH_STACK_SIZE($sp)
-	ret
-.end	alpha_\name
-.endm
-
-fork_like fork
-fork_like vfork
-fork_like clone
-
-.macro	sigreturn_like name
-	.align	4
-	.globl	sys_\name
-	.ent	sys_\name
-sys_\name:
-	.prologue 0
-	lda	$9, ret_from_straced
-	cmpult	$26, $9, $9
-	lda	$sp, -SWITCH_STACK_SIZE($sp)
-	jsr	$26, do_\name
-	bne	$9, 1f
-	jsr	$26, syscall_trace_leave
-1:	br	$1, undo_switch_stack
-	br	ret_from_sys_call
-.end sys_\name
-.endm
-
-sigreturn_like sigreturn
-sigreturn_like rt_sigreturn
-
-	.align	4
-	.globl	alpha_syscall_zero
-	.ent	alpha_syscall_zero
-alpha_syscall_zero:
-	.prologue 0
-	/* Special because it needs to do something opposite to
-	   force_successful_syscall_return().  We use the saved
-	   syscall number for that, zero meaning "not an error".
-	   That works nicely, but for real syscall 0 we need to
-	   make sure that this logics doesn't get confused.
-	   Store a non-zero there - -ENOSYS we need in register
-	   for our return value will do just fine.
-	  */
-	lda	$0, -ENOSYS
-	unop
-	stq	$0, 0($sp)
-	ret
-.end alpha_syscall_zero
diff --git a/arch/alpha/kernel/head.S b/arch/alpha/kernel/head.S
deleted file mode 100644
index bb48a8ae4e79fd9e16a173d7fb1f9f349e2a9845..0000000000000000000000000000000000000000
--- a/arch/alpha/kernel/head.S
+++ /dev/null
@@ -1,99 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/kernel/head.S
- *
- * initial boot stuff.. At this point, the bootloader has already
- * switched into OSF/1 PAL-code, and loaded us at the correct address
- * (START_ADDR).  So there isn't much left for us to do: just set up
- * the kernel global pointer and jump to the kernel entry-point.
- */
-
-#include <linux/init.h>
-#include <asm/asm-offsets.h>
-#include <asm/pal.h>
-#include <asm/setup.h>
-
-__HEAD
-.globl _stext
-	.set noreorder
-	.globl	__start
-	.ent	__start
-_stext:
-__start:
-	.prologue 0
-	br	$27,1f
-1:	ldgp	$29,0($27)
-	/* We need to get current_task_info loaded up...  */
-	lda	$8,init_thread_union
-	/* ... and find our stack ... */
-	lda	$30,0x4000 - SIZEOF_PT_REGS($8)
-	/* ... and then we can start the kernel.  */
-	jsr	$26,start_kernel
-	call_pal PAL_halt
-	.end __start
-
-#ifdef CONFIG_SMP
-	.align 3
-	.globl	__smp_callin
-	.ent	__smp_callin
-	/* On entry here from SRM console, the HWPCB of the per-cpu
-	   slot for this processor has been loaded.  We've arranged
-	   for the UNIQUE value for this process to contain the PCBB
-	   of the target idle task.  */
-__smp_callin:
-	.prologue 1
-	ldgp	$29,0($27)	# First order of business, load the GP.
-
-	call_pal PAL_rduniq	# Grab the target PCBB.
-	mov	$0,$16		# Install it.
-	call_pal PAL_swpctx
-
-	lda	$8,0x3fff	# Find "current".
-	bic	$30,$8,$8
-	
-	jsr	$26,smp_callin
-	call_pal PAL_halt
-	.end __smp_callin
-#endif /* CONFIG_SMP */
-
-	#
-	# The following two functions are needed for supporting SRM PALcode
-	# on the PC164 (at least), since that PALcode manages the interrupt
-	# masking, and we cannot duplicate the effort without causing problems
-	#
-
-	.align 3
-	.globl	cserve_ena
-	.ent	cserve_ena
-cserve_ena:
-	.prologue 0
-	bis	$16,$16,$17
-	lda	$16,52($31)
-	call_pal PAL_cserve
-	ret	($26)
-	.end	cserve_ena
-
-	.align 3
-	.globl	cserve_dis
-	.ent	cserve_dis
-cserve_dis:
-	.prologue 0
-	bis	$16,$16,$17
-	lda	$16,53($31)
-	call_pal PAL_cserve
-	ret	($26)
-	.end	cserve_dis
-
-	#
-	# It is handy, on occasion, to make halt actually just loop. 
-	# Putting it here means we dont have to recompile the whole
-	# kernel.
-	#
-
-	.align 3
-	.globl	halt
-	.ent	halt
-halt:
-	.prologue 0
-	call_pal PAL_halt
-	.end	halt
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
deleted file mode 100644
index 9704f22ed5e3765d3324b262ee4c6e93d02020d8..0000000000000000000000000000000000000000
--- a/arch/alpha/kernel/systbls.S
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/kernel/systbls.S
- *
- * The system call table. 
- */
-
-#include <asm/unistd.h>
-
-#define __SYSCALL(nr, entry, nargs) .quad entry
-	.data
-	.align 3
-	.globl sys_call_table
-sys_call_table:
-#include <asm/syscall_table.h>
-#undef __SYSCALL
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
deleted file mode 100644
index c4b5ceceab52f6a4cea05221cd0f06cb30de79bc..0000000000000000000000000000000000000000
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,79 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/thread_info.h>
-#include <asm/cache.h>
-#include <asm/page.h>
-#include <asm/setup.h>
-
-OUTPUT_FORMAT("elf64-alpha")
-OUTPUT_ARCH(alpha)
-ENTRY(__start)
-PHDRS { kernel PT_LOAD; note PT_NOTE; }
-jiffies = jiffies_64;
-SECTIONS
-{
-#ifdef CONFIG_ALPHA_LEGACY_START_ADDRESS
-	. = 0xfffffc0000310000;
-#else
-	. = 0xfffffc0001010000;
-#endif
-
-	_text = .;	/* Text and read-only data */
-	.text : {
-		HEAD_TEXT
-		TEXT_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-		*(.fixup)
-		*(.gnu.warning)
-	} :kernel
-	swapper_pg_dir = SWAPPER_PGD;
-	_etext = .;	/* End of text section */
-
-	NOTES :kernel :note
-	.dummy : {
-		*(.dummy)
-	} :kernel
-
-	RODATA
-	EXCEPTION_TABLE(16)
-
-	/* Will be freed after init */
-	__init_begin = ALIGN(PAGE_SIZE);
-	INIT_TEXT_SECTION(PAGE_SIZE)
-	INIT_DATA_SECTION(16)
-	PERCPU_SECTION(L1_CACHE_BYTES)
-	/* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page
-	   needed for the THREAD_SIZE aligned init_task gets freed after init */
-	. = ALIGN(THREAD_SIZE);
-	__init_end = .;
-	/* Freed after init ends here */
-
-	_sdata = .;	/* Start of rw data section */
-	_data = .;
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
-
-	.got : {
-		*(.got)
-	}
-	.sdata : {
-		*(.sdata)
-	}
-	_edata = .;	/* End of data section */
-
-	BSS_SECTION(0, 0, 0)
-	_end = .;
-
-	.mdebug 0 : {
-		*(.mdebug)
-	}
-	.note 0 : {
-		*(.note)
-	}
-
-	STABS_DEBUG
-	DWARF_DEBUG
-
-	DISCARDS
-}
diff --git a/arch/alpha/lib/callback_srm.S b/arch/alpha/lib/callback_srm.S
deleted file mode 100644
index b13c4a231f1b7fd088d2338585beb28fd5de8faa..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/callback_srm.S
+++ /dev/null
@@ -1,109 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *	arch/alpha/lib/callback_srm.S
- */
-
-#include <asm/console.h>
-#include <asm/export.h>
-
-.text
-#define HWRPB_CRB_OFFSET 0xc0
-
-#if defined(CONFIG_ALPHA_SRM) || defined(CONFIG_ALPHA_GENERIC)
-.align 4
-srm_dispatch:
-#if defined(CONFIG_ALPHA_GENERIC)
-	ldl	$4,alpha_using_srm
-	beq	$4,nosrm
-#endif
-	ldq	$0,hwrpb	# gp is set up by CALLBACK macro.
-	ldl	$25,0($25)	# Pick up the wrapper data.
-	mov	$20,$21		# Shift arguments right.
-	mov	$19,$20
-	ldq	$1,HWRPB_CRB_OFFSET($0)
-	mov	$18,$19
-	mov	$17,$18
-	mov	$16,$17
-	addq	$0,$1,$2	# CRB address
-	ldq	$27,0($2)	# DISPATCH procedure descriptor (VMS call std)
-	extwl	$25,0,$16	# SRM callback function code
-	ldq	$3,8($27)	# call address
-	extwl	$25,2,$25	# argument information (VMS calling std)
-	jmp	($3)		# Return directly to caller of wrapper.
-
-.align 4
-.globl	srm_fixup
-.ent	srm_fixup
-srm_fixup:
-	ldgp	$29,0($27)
-#if defined(CONFIG_ALPHA_GENERIC)
-	ldl	$4,alpha_using_srm
-	beq	$4,nosrm
-#endif
-	ldq	$0,hwrpb
-	ldq	$1,HWRPB_CRB_OFFSET($0)
-	addq	$0,$1,$2	# CRB address
-	ldq	$27,16($2)	# VA of FIXUP procedure descriptor
-	ldq	$3,8($27)	# call address
-	lda	$25,2($31)	# two integer arguments
-	jmp	($3)		# Return directly to caller of srm_fixup.
-.end    srm_fixup
-
-#if defined(CONFIG_ALPHA_GENERIC)
-.align 3
-nosrm:
-	lda	$0,-1($31)
-	ret
-#endif
-
-#define CALLBACK(NAME, CODE, ARG_CNT) \
-.align 4; .globl callback_##NAME; .ent callback_##NAME; callback_##NAME##: \
-ldgp $29,0($27); br $25,srm_dispatch; .word CODE, ARG_CNT; .end callback_##NAME
-
-#else /* defined(CONFIG_ALPHA_SRM) || defined(CONFIG_ALPHA_GENERIC) */
-
-#define CALLBACK(NAME, CODE, ARG_CNT) \
-.align 3; .globl callback_##NAME; .ent callback_##NAME; callback_##NAME##: \
-lda $0,-1($31); ret; .end callback_##NAME
-
-.align 3
-.globl	srm_fixup
-.ent	srm_fixup
-srm_fixup:
-	lda	$0,-1($31)
-	ret
-.end	srm_fixup
-#endif /* defined(CONFIG_ALPHA_SRM) || defined(CONFIG_ALPHA_GENERIC) */
-
-CALLBACK(puts, CCB_PUTS, 4)
-CALLBACK(open, CCB_OPEN, 3)
-CALLBACK(close, CCB_CLOSE, 2)
-CALLBACK(read, CCB_READ, 5)
-CALLBACK(open_console, CCB_OPEN_CONSOLE, 1)
-CALLBACK(close_console, CCB_CLOSE_CONSOLE, 1)
-CALLBACK(getenv, CCB_GET_ENV, 4)
-CALLBACK(setenv, CCB_SET_ENV, 4)
-CALLBACK(getc, CCB_GETC, 2)
-CALLBACK(reset_term, CCB_RESET_TERM, 2)
-CALLBACK(term_int, CCB_SET_TERM_INT, 3)
-CALLBACK(term_ctl, CCB_SET_TERM_CTL, 3)
-CALLBACK(process_keycode, CCB_PROCESS_KEYCODE, 3)
-CALLBACK(ioctl, CCB_IOCTL, 6)
-CALLBACK(write, CCB_WRITE, 5)
-CALLBACK(reset_env, CCB_RESET_ENV, 4)
-CALLBACK(save_env, CCB_SAVE_ENV, 1)
-CALLBACK(pswitch, CCB_PSWITCH, 3)
-CALLBACK(bios_emul, CCB_BIOS_EMUL, 5)
-
-EXPORT_SYMBOL(callback_getenv)
-EXPORT_SYMBOL(callback_setenv)
-EXPORT_SYMBOL(callback_save_env)
-	
-.data
-__alpha_using_srm:		# For use by bootpheader
-	.long 7			# value is not 1 for link debugging
-	.weak alpha_using_srm; alpha_using_srm = __alpha_using_srm
-__callback_init_done:		# For use by bootpheader
-	.long 7			# value is not 1 for link debugging
-	.weak callback_init_done; callback_init_done = __callback_init_done
-
diff --git a/arch/alpha/lib/clear_page.S b/arch/alpha/lib/clear_page.S
deleted file mode 100644
index ce02de7b049347b99c7eed9ee7c7deceb41af895..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/clear_page.S
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/clear_page.S
- *
- * Zero an entire page.
- */
-#include <asm/export.h>
-	.text
-	.align 4
-	.global clear_page
-	.ent clear_page
-clear_page:
-	.prologue 0
-
-	lda	$0,128
-	nop
-	unop
-	nop
-
-1:	stq	$31,0($16)
-	stq	$31,8($16)
-	stq	$31,16($16)
-	stq	$31,24($16)
-
-	stq	$31,32($16)
-	stq	$31,40($16)
-	stq	$31,48($16)
-	subq	$0,1,$0
-
-	stq	$31,56($16)
-	addq	$16,64,$16
-	unop
-	bne	$0,1b
-
-	ret
-	nop
-	unop
-	nop
-
-	.end clear_page
-	EXPORT_SYMBOL(clear_page)
diff --git a/arch/alpha/lib/clear_user.S b/arch/alpha/lib/clear_user.S
deleted file mode 100644
index db6c6ca45896c8d3b643cf61df2bf935e8d5a8b7..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/clear_user.S
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/clear_user.S
- * Contributed by Richard Henderson <rth@tamu.edu>
- *
- * Zero user space, handling exceptions as we go.
- *
- * We have to make sure that $0 is always up-to-date and contains the
- * right "bytes left to zero" value (and that it is updated only _after_
- * a successful copy).  There is also some rather minor exception setup
- * stuff.
- */
-#include <asm/export.h>
-
-/* Allow an exception for an insn; exit if we get one.  */
-#define EX(x,y...)			\
-	99: x,##y;			\
-	.section __ex_table,"a";	\
-	.long 99b - .;			\
-	lda $31, $exception-99b($31); 	\
-	.previous
-
-	.set noat
-	.set noreorder
-	.align 4
-
-	.globl __clear_user
-	.ent __clear_user
-	.frame	$30, 0, $26
-	.prologue 0
-
-$loop:
-	and	$1, 3, $4	# e0    :
-	beq	$4, 1f		# .. e1 :
-
-0:	EX( stq_u $31, 0($16) )	# e0    : zero one word
-	subq	$0, 8, $0	# .. e1 :
-	subq	$4, 1, $4	# e0    :
-	addq	$16, 8, $16	# .. e1 :
-	bne	$4, 0b		# e1    :
-	unop			#       :
-
-1:	bic	$1, 3, $1	# e0    :
-	beq	$1, $tail	# .. e1 :
-
-2:	EX( stq_u $31, 0($16) )	# e0    : zero four words
-	subq	$0, 8, $0	# .. e1 :
-	EX( stq_u $31, 8($16) )	# e0    :
-	subq	$0, 8, $0	# .. e1 :
-	EX( stq_u $31, 16($16) )	# e0    :
-	subq	$0, 8, $0	# .. e1 :
-	EX( stq_u $31, 24($16) )	# e0    :
-	subq	$0, 8, $0	# .. e1 :
-	subq	$1, 4, $1	# e0    :
-	addq	$16, 32, $16	# .. e1 :
-	bne	$1, 2b		# e1    :
-
-$tail:
-	bne	$2, 1f		# e1    : is there a tail to do?
-	ret	$31, ($26), 1	# .. e1 :
-
-1:	EX( ldq_u $5, 0($16) )	# e0    :
-	clr	$0		# .. e1 :
-	nop			# e1    :
-	mskqh	$5, $0, $5	# e0    :
-	EX( stq_u $5, 0($16) )	# e0    :
-	ret	$31, ($26), 1	# .. e1 :
-
-__clear_user:
-	and	$17, $17, $0
-	and	$16, 7, $4	# e0    : find dest misalignment
-	beq	$0, $zerolength # .. e1 :
-	addq	$0, $4, $1	# e0    : bias counter
-	and	$1, 7, $2	# e1    : number of bytes in tail
-	srl	$1, 3, $1	# e0    :
-	beq	$4, $loop	# .. e1 :
-
-	EX( ldq_u $5, 0($16) )	# e0    : load dst word to mask back in
-	beq	$1, $oneword	# .. e1 : sub-word store?
-
-	mskql	$5, $16, $5	# e0    : take care of misaligned head
-	addq	$16, 8, $16	# .. e1 :
-	EX( stq_u $5, -8($16) )	# e0    :
-	addq	$0, $4, $0	# .. e1 : bytes left -= 8 - misalignment
-	subq	$1, 1, $1	# e0    :
-	subq	$0, 8, $0	# .. e1 :
-	br	$loop		# e1    :
-	unop			#       :
-
-$oneword:
-	mskql	$5, $16, $4	# e0    :
-	mskqh	$5, $2, $5	# e0    :
-	or	$5, $4, $5	# e1    :
-	EX( stq_u $5, 0($16) )	# e0    :
-	clr	$0		# .. e1 :
-
-$zerolength:
-$exception:
-	ret	$31, ($26), 1	# .. e1 :
-
-	.end __clear_user
-	EXPORT_SYMBOL(__clear_user)
diff --git a/arch/alpha/lib/copy_page.S b/arch/alpha/lib/copy_page.S
deleted file mode 100644
index 5439a30c77d069ac50c7c97ab333e92d5b378ae1..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/copy_page.S
+++ /dev/null
@@ -1,51 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/copy_page.S
- *
- * Copy an entire page.
- */
-#include <asm/export.h>
-	.text
-	.align 4
-	.global copy_page
-	.ent copy_page
-copy_page:
-	.prologue 0
-
-	lda	$18,128
-	nop
-	unop
-	nop
-
-1:	ldq	$0,0($17)
-	ldq	$1,8($17)
-	ldq	$2,16($17)
-	ldq	$3,24($17)
-
-	ldq	$4,32($17)
-	ldq	$5,40($17)
-	ldq	$6,48($17)
-	ldq	$7,56($17)
-
-	stq	$0,0($16)
-	subq	$18,1,$18
-	stq	$1,8($16)
-	addq	$17,64,$17
-
-	stq	$2,16($16)
-	stq	$3,24($16)
-	stq	$4,32($16)
-	stq	$5,40($16)
-
-	stq	$6,48($16)
-	stq	$7,56($16)
-	addq	$16,64,$16
-	bne	$18, 1b
-
-	ret
-	nop
-	unop
-	nop
-
-	.end copy_page
-	EXPORT_SYMBOL(copy_page)
diff --git a/arch/alpha/lib/copy_user.S b/arch/alpha/lib/copy_user.S
deleted file mode 100644
index 32ab0344b1853cf2032e76bd8cdbdceaf4454b03..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/copy_user.S
+++ /dev/null
@@ -1,121 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/copy_user.S
- *
- * Copy to/from user space, handling exceptions as we go..  This
- * isn't exactly pretty.
- *
- * This is essentially the same as "memcpy()", but with a few twists.
- * Notably, we have to make sure that $0 is always up-to-date and
- * contains the right "bytes left to copy" value (and that it is updated
- * only _after_ a successful copy). There is also some rather minor
- * exception setup stuff..
- */
-
-#include <asm/export.h>
-
-/* Allow an exception for an insn; exit if we get one.  */
-#define EXI(x,y...)			\
-	99: x,##y;			\
-	.section __ex_table,"a";	\
-	.long 99b - .;			\
-	lda $31, $exitin-99b($31);	\
-	.previous
-
-#define EXO(x,y...)			\
-	99: x,##y;			\
-	.section __ex_table,"a";	\
-	.long 99b - .;			\
-	lda $31, $exitout-99b($31);	\
-	.previous
-
-	.set noat
-	.align 4
-	.globl __copy_user
-	.ent __copy_user
-__copy_user:
-	.prologue 0
-	mov $18,$0
-	and $16,7,$3
-	beq $0,$35
-	beq $3,$36
-	subq $3,8,$3
-	.align 4
-$37:
-	EXI( ldq_u $1,0($17) )
-	EXO( ldq_u $2,0($16) )
-	extbl $1,$17,$1
-	mskbl $2,$16,$2
-	insbl $1,$16,$1
-	addq $3,1,$3
-	bis $1,$2,$1
-	EXO( stq_u $1,0($16) )
-	subq $0,1,$0
-	addq $16,1,$16
-	addq $17,1,$17
-	beq $0,$41
-	bne $3,$37
-$36:
-	and $17,7,$1
-	bic $0,7,$4
-	beq $1,$43
-	beq $4,$48
-	EXI( ldq_u $3,0($17) )
-	.align 4
-$50:
-	EXI( ldq_u $2,8($17) )
-	subq $4,8,$4
-	extql $3,$17,$3
-	extqh $2,$17,$1
-	bis $3,$1,$1
-	EXO( stq $1,0($16) )
-	addq $17,8,$17
-	subq $0,8,$0
-	addq $16,8,$16
-	bis $2,$2,$3
-	bne $4,$50
-$48:
-	beq $0,$41
-	.align 4
-$57:
-	EXI( ldq_u $1,0($17) )
-	EXO( ldq_u $2,0($16) )
-	extbl $1,$17,$1
-	mskbl $2,$16,$2
-	insbl $1,$16,$1
-	bis $1,$2,$1
-	EXO( stq_u $1,0($16) )
-	subq $0,1,$0
-	addq $16,1,$16
-	addq $17,1,$17
-	bne $0,$57
-	br $31,$41
-	.align 4
-$43:
-	beq $4,$65
-	.align 4
-$66:
-	EXI( ldq $1,0($17) )
-	subq $4,8,$4
-	EXO( stq $1,0($16) )
-	addq $17,8,$17
-	subq $0,8,$0
-	addq $16,8,$16
-	bne $4,$66
-$65:
-	beq $0,$41
-	EXI( ldq $2,0($17) )
-	EXO( ldq $1,0($16) )
-	mskql $2,$0,$2
-	mskqh $1,$0,$1
-	bis $2,$1,$2
-	EXO( stq $2,0($16) )
-	bis $31,$31,$0
-$41:
-$35:
-$exitin:
-$exitout:
-	ret $31,($26),1
-
-	.end __copy_user
-EXPORT_SYMBOL(__copy_user)
diff --git a/arch/alpha/lib/csum_ipv6_magic.S b/arch/alpha/lib/csum_ipv6_magic.S
deleted file mode 100644
index c7b213ab01abbc35dc32da97c600351b7b1513cc..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/csum_ipv6_magic.S
+++ /dev/null
@@ -1,118 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/csum_ipv6_magic.S
- * Contributed by Richard Henderson <rth@tamu.edu>
- *
- * unsigned short csum_ipv6_magic(struct in6_addr *saddr,
- *                                struct in6_addr *daddr,
- *                                __u32 len,
- *                                unsigned short proto,
- *                                unsigned int csum);
- *
- * Misalignment handling (which costs 16 instructions / 8 cycles)
- * added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
- */
-
-#include <asm/export.h>
-	.globl csum_ipv6_magic
-	.align 4
-	.ent csum_ipv6_magic
-	.frame $30,0,$26,0
-csum_ipv6_magic:
-	.prologue 0
-
-	ldq_u	$0,0($16)	# e0    : load src & dst addr words
-	zapnot	$20,15,$20	# .. e1 : zero extend incoming csum
-	extqh	$18,1,$4	# e0    : byte swap len & proto while we wait
-	ldq_u	$21,7($16)	# .. e1 : handle misalignment
-
-	extbl	$18,1,$5	# e0	:
-	ldq_u	$1,8($16)	# .. e1 :
-	extbl	$18,2,$6	# e0 	:
-	ldq_u	$22,15($16)	# .. e1 :
-
-	extbl	$18,3,$18	# e0	:
-	ldq_u	$2,0($17)	# .. e1 :
-	sra	$4,32,$4	# e0	:
-	ldq_u	$23,7($17)	# .. e1 :
-
-	extql	$0,$16,$0	# e0	:
-	ldq_u	$3,8($17)	# .. e1 :
-	extqh	$21,$16,$21	# e0	:
-	ldq_u	$24,15($17)	# .. e1 :
-
-	sll	$5,16,$5	# e0	:
-	or	$0,$21,$0	# .. e1 : 1st src word complete
-	extql	$1,$16,$1	# e0	:
-	addq	$20,$0,$20	# .. e1 : begin summing the words
-
-	extqh	$22,$16,$22	# e0	:
-	cmpult	$20,$0,$0	# .. e1 :
-	sll	$6,8,$6		# e0	:
-	or	$1,$22,$1	# .. e1 : 2nd src word complete
-
-	extql	$2,$17,$2	# e0	:
-	or	$4,$18,$18	# .. e1 :
-	extqh	$23,$17,$23	# e0	:
-	or	$5,$6,$5	# .. e1 :
-
-	extql	$3,$17,$3	# e0	:
-	or	$2,$23,$2	# .. e1 : 1st dst word complete
-	extqh	$24,$17,$24	# e0	:
-	or	$18,$5,$18	# .. e1 : len complete
-
-	extwh	$19,7,$7	# e0    :
-	or	$3,$24,$3	# .. e1 : 2nd dst word complete
-	extbl	$19,1,$19	# e0    :
-	addq	$20,$1,$20	# .. e1 :
-
-	or	$19,$7,$19	# e0    :
-	cmpult	$20,$1,$1	# .. e1 :
-	sll	$19,48,$19	# e0    :
-	nop			# .. e0 :
-
-	sra	$19,32,$19	# e0    : proto complete
-	addq	$20,$2,$20	# .. e1 :
-	cmpult	$20,$2,$2	# e0    :
-	addq	$20,$3,$20	# .. e1 :
-
-	cmpult	$20,$3,$3	# e0    :
-	addq	$20,$18,$20	# .. e1 :
-	cmpult	$20,$18,$18	# e0    :
-	addq	$20,$19,$20	# .. e1 :
-
-	cmpult	$20,$19,$19	# e0    :
-	addq	$0,$1,$0	# .. e1 : merge the carries back into the csum
-	addq	$2,$3,$2	# e0    :
-	addq	$18,$19,$18	# .. e1 :
-
-	addq	$0,$2,$0	# e0    :
-	addq	$20,$18,$20	# .. e1 :
-	addq	$0,$20,$0	# e0    :
-	unop			#       :
-
-	extwl	$0,2,$2		# e0    : begin folding the 64-bit value
-	zapnot	$0,3,$3		# .. e1 :
-	extwl	$0,4,$1		# e0    :
-	addq	$2,$3,$3	# .. e1 :
-
-	extwl	$0,6,$0		# e0    :
-	addq	$3,$1,$3	# .. e1 :
-	addq	$0,$3,$0	# e0    :
-	unop			#       :
-
-	extwl	$0,2,$1		# e0    : fold 18-bit value
-	zapnot	$0,3,$0		# .. e1 :
-	addq	$0,$1,$0	# e0    :
-	unop			#       :
-
-	extwl	$0,2,$1		# e0    : fold 17-bit value
-	zapnot	$0,3,$0		# .. e1 :
-	addq	$0,$1,$0	# e0    :
-	not	$0,$0		# .. e1 : and complement.
-
-	zapnot	$0,3,$0		# e0    :
-	ret			# .. e1 :
-
-	.end csum_ipv6_magic
-	EXPORT_SYMBOL(csum_ipv6_magic)
diff --git a/arch/alpha/lib/dbg_current.S b/arch/alpha/lib/dbg_current.S
deleted file mode 100644
index be66121312774918505fc1cd3fd9b85463799e30..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/dbg_current.S
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/dbg_current.S
- * Contributed by Richard Henderson (rth@cygnus.com)
- *
- * Trap if we find current not correct.
- */
-
-#include <asm/pal.h>
-
-	.text
-	.set noat
-
-	.globl _mcount
-	.ent _mcount
-_mcount:
-	.frame $30, 0, $28, 0
-	.prologue 0
-
-	lda	$0, -0x4000($30)
-	cmpult	$8, $30, $1
-	cmpule	$0, $30, $2
-	and	$1, $2, $3
-	bne	$3, 1f
-
-	call_pal PAL_bugchk
-
-1:	ret	$31, ($28), 1
-
-	.end _mcount
diff --git a/arch/alpha/lib/dbg_stackcheck.S b/arch/alpha/lib/dbg_stackcheck.S
deleted file mode 100644
index b3b6fc94f7f321439f60ba0bcb853eb0af9636a0..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/dbg_stackcheck.S
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/dbg_stackcheck.S
- * Contributed by Richard Henderson (rth@tamu.edu)
- *
- * Verify that we have not overflowed the stack.  Oops if we have.
- */
-
-#include <asm/asm-offsets.h>
-
-	.text
-	.set noat
-
-	.align 3
-	.globl _mcount
-	.ent _mcount
-_mcount:
-	.frame $30, 0, $28, 0
-	.prologue 0
-
-	lda	$0, TASK_SIZE($8)
-	cmpult	$30, $0, $0
-	bne	$0, 1f
-	ret	($28)
-1:	stq	$31, -8($31)	# oops me, damn it.
-	br	1b
-
-	.end _mcount
diff --git a/arch/alpha/lib/dbg_stackkill.S b/arch/alpha/lib/dbg_stackkill.S
deleted file mode 100644
index 6d9197e52a425af943920556f8572b21e5b548cc..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/dbg_stackkill.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/dbg_stackkill.S
- * Contributed by Richard Henderson (rth@cygnus.com)
- *
- * Clobber the balance of the kernel stack, hoping to catch
- * uninitialized local variables in the act.
- */
-
-#include <asm/asm-offsets.h>
-
-	.text
-	.set noat
-
-	.align 5
-	.globl _mcount
-	.ent _mcount
-_mcount:
-	.frame $30, 0, $28, 0
-	.prologue 0
-
-	ldi	$0, 0xdeadbeef
-	lda	$2, -STACK_SIZE
-	sll	$0, 32, $1
-	and	$30, $2, $2
-	or	$0, $1, $0
-	lda	$2, TASK_SIZE($2)
-	cmpult	$2, $30, $1
-	beq	$1, 2f
-1:	stq	$0, 0($2)
-	addq	$2, 8, $2
-	cmpult	$2, $30, $1
-	bne	$1, 1b
-2:	ret	($28)
-
-	.end _mcount
diff --git a/arch/alpha/lib/divide.S b/arch/alpha/lib/divide.S
deleted file mode 100644
index 2b60eb45e50b68993b6225c8c7f23cacedd00651..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/divide.S
+++ /dev/null
@@ -1,199 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/divide.S
- *
- * (C) 1995 Linus Torvalds
- *
- * Alpha division..
- */
-
-/*
- * The alpha chip doesn't provide hardware division, so we have to do it
- * by hand.  The compiler expects the functions
- *
- *	__divqu: 64-bit unsigned long divide
- *	__remqu: 64-bit unsigned long remainder
- *	__divqs/__remqs: signed 64-bit
- *	__divlu/__remlu: unsigned 32-bit
- *	__divls/__remls: signed 32-bit
- *
- * These are not normal C functions: instead of the normal
- * calling sequence, these expect their arguments in registers
- * $24 and $25, and return the result in $27. Register $28 may
- * be clobbered (assembly temporary), anything else must be saved. 
- *
- * In short: painful.
- *
- * This is a rather simple bit-at-a-time algorithm: it's very good
- * at dividing random 64-bit numbers, but the more usual case where
- * the divisor is small is handled better by the DEC algorithm
- * using lookup tables. This uses much less memory, though, and is
- * nicer on the cache.. Besides, I don't know the copyright status
- * of the DEC code.
- */
-
-/*
- * My temporaries:
- *	$0 - current bit
- *	$1 - shifted divisor
- *	$2 - modulus/quotient
- *
- *	$23 - return address
- *	$24 - dividend
- *	$25 - divisor
- *
- *	$27 - quotient/modulus
- *	$28 - compare status
- */
-
-#include <asm/export.h>
-#define halt .long 0
-
-/*
- * Select function type and registers
- */
-#define mask	$0
-#define divisor	$1
-#define compare $28
-#define tmp1	$3
-#define tmp2	$4
-
-#ifdef DIV
-#define DIV_ONLY(x,y...) x,##y
-#define MOD_ONLY(x,y...)
-#define func(x) __div##x
-#define modulus $2
-#define quotient $27
-#define GETSIGN(x) xor $24,$25,x
-#define STACK 48
-#else
-#define DIV_ONLY(x,y...)
-#define MOD_ONLY(x,y...) x,##y
-#define func(x) __rem##x
-#define modulus $27
-#define quotient $2
-#define GETSIGN(x) bis $24,$24,x
-#define STACK 32
-#endif
-
-/*
- * For 32-bit operations, we need to extend to 64-bit
- */
-#ifdef INTSIZE
-#define ufunction func(lu)
-#define sfunction func(l)
-#define LONGIFY(x) zapnot x,15,x
-#define SLONGIFY(x) addl x,0,x
-#else
-#define ufunction func(qu)
-#define sfunction func(q)
-#define LONGIFY(x)
-#define SLONGIFY(x)
-#endif
-
-.set noat
-.align	3
-.globl	ufunction
-.ent	ufunction
-ufunction:
-	subq	$30,STACK,$30
-	.frame	$30,STACK,$23
-	.prologue 0
-
-7:	stq	$1, 0($30)
-	bis	$25,$25,divisor
-	stq	$2, 8($30)
-	bis	$24,$24,modulus
-	stq	$0,16($30)
-	bis	$31,$31,quotient
-	LONGIFY(divisor)
-	stq	tmp1,24($30)
-	LONGIFY(modulus)
-	bis	$31,1,mask
-	DIV_ONLY(stq tmp2,32($30))
-	beq	divisor, 9f			/* div by zero */
-
-#ifdef INTSIZE
-	/*
-	 * shift divisor left, using 3-bit shifts for
-	 * 32-bit divides as we can't overflow. Three-bit
-	 * shifts will result in looping three times less
-	 * here, but can result in two loops more later.
-	 * Thus using a large shift isn't worth it (and
-	 * s8add pairs better than a sll..)
-	 */
-1:	cmpult	divisor,modulus,compare
-	s8addq	divisor,$31,divisor
-	s8addq	mask,$31,mask
-	bne	compare,1b
-#else
-1:	cmpult	divisor,modulus,compare
-	blt     divisor, 2f
-	addq	divisor,divisor,divisor
-	addq	mask,mask,mask
-	bne	compare,1b
-	unop
-#endif
-
-	/* ok, start to go right again.. */
-2:	DIV_ONLY(addq quotient,mask,tmp2)
-	srl	mask,1,mask
-	cmpule	divisor,modulus,compare
-	subq	modulus,divisor,tmp1
-	DIV_ONLY(cmovne compare,tmp2,quotient)
-	srl	divisor,1,divisor
-	cmovne	compare,tmp1,modulus
-	bne	mask,2b
-
-9:	ldq	$1, 0($30)
-	ldq	$2, 8($30)
-	ldq	$0,16($30)
-	ldq	tmp1,24($30)
-	DIV_ONLY(ldq tmp2,32($30))
-	addq	$30,STACK,$30
-	ret	$31,($23),1
-	.end	ufunction
-EXPORT_SYMBOL(ufunction)
-
-/*
- * Uhh.. Ugly signed division. I'd rather not have it at all, but
- * it's needed in some circumstances. There are different ways to
- * handle this, really. This does:
- * 	-a / b = a / -b = -(a / b)
- *	-a % b = -(a % b)
- *	a % -b = a % b
- * which is probably not the best solution, but at least should
- * have the property that (x/y)*y + (x%y) = x.
- */
-.align 3
-.globl	sfunction
-.ent	sfunction
-sfunction:
-	subq	$30,STACK,$30
-	.frame	$30,STACK,$23
-	.prologue 0
-	bis	$24,$25,$28
-	SLONGIFY($28)
-	bge	$28,7b
-	stq	$24,0($30)
-	subq	$31,$24,$28
-	stq	$25,8($30)
-	cmovlt	$24,$28,$24	/* abs($24) */
-	stq	$23,16($30)
-	subq	$31,$25,$28
-	stq	tmp1,24($30)
-	cmovlt	$25,$28,$25	/* abs($25) */
-	unop
-	bsr	$23,ufunction
-	ldq	$24,0($30)
-	ldq	$25,8($30)
-	GETSIGN($28)
-	subq	$31,$27,tmp1
-	SLONGIFY($28)
-	ldq	$23,16($30)
-	cmovlt	$28,tmp1,$27
-	ldq	tmp1,24($30)
-	addq	$30,STACK,$30
-	ret	$31,($23),1
-	.end	sfunction
-EXPORT_SYMBOL(sfunction)
diff --git a/arch/alpha/lib/ev6-clear_page.S b/arch/alpha/lib/ev6-clear_page.S
deleted file mode 100644
index 325864c81586daa94d1b04534ed54016fadad6d3..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/ev6-clear_page.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/ev6-clear_page.S
- *
- * Zero an entire page.
- */
-#include <asm/export.h>
-        .text
-        .align 4
-        .global clear_page
-        .ent clear_page
-clear_page:
-        .prologue 0
-
-	lda	$0,128
-	lda	$1,125
-	addq	$16,64,$2
-	addq	$16,128,$3
-
-	addq	$16,192,$17
-	wh64	($16)
-	wh64	($2)
-	wh64	($3)
-
-1:	wh64	($17)
-	stq	$31,0($16)
-	subq	$0,1,$0
-	subq	$1,1,$1
-
-	stq	$31,8($16)
-	stq	$31,16($16)
-	addq	$17,64,$2
-	nop
-
-	stq	$31,24($16)
-	stq	$31,32($16)
-	cmovgt	$1,$2,$17
-	nop
-
-	stq	$31,40($16)
-	stq	$31,48($16)
-	nop
-	nop
-
-	stq	$31,56($16)
-	addq	$16,64,$16
-	nop
-	bne	$0,1b
-
-	ret
-	nop
-	nop
-	nop
-
-	.end clear_page
-	EXPORT_SYMBOL(clear_page)
diff --git a/arch/alpha/lib/ev6-clear_user.S b/arch/alpha/lib/ev6-clear_user.S
deleted file mode 100644
index 7e644f83cdf2905807b15b6cd788770c1be732d4..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/ev6-clear_user.S
+++ /dev/null
@@ -1,213 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/ev6-clear_user.S
- * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
- *
- * Zero user space, handling exceptions as we go.
- *
- * We have to make sure that $0 is always up-to-date and contains the
- * right "bytes left to zero" value (and that it is updated only _after_
- * a successful copy).  There is also some rather minor exception setup
- * stuff.
- *
- * Much of the information about 21264 scheduling/coding comes from:
- *	Compiler Writer's Guide for the Alpha 21264
- *	abbreviated as 'CWG' in other comments here
- *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
- * Scheduling notation:
- *	E	- either cluster
- *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
- *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
- * Try not to change the actual algorithm if possible for consistency.
- * Determining actual stalls (other than slotting) doesn't appear to be easy to do.
- * From perusing the source code context where this routine is called, it is
- * a fair assumption that significant fractions of entire pages are zeroed, so
- * it's going to be worth the effort to hand-unroll a big loop, and use wh64.
- * ASSUMPTION:
- *	The believed purpose of only updating $0 after a store is that a signal
- *	may come along during the execution of this chunk of code, and we don't
- *	want to leave a hole (and we also want to avoid repeating lots of work)
- */
-
-#include <asm/export.h>
-/* Allow an exception for an insn; exit if we get one.  */
-#define EX(x,y...)			\
-	99: x,##y;			\
-	.section __ex_table,"a";	\
-	.long 99b - .;			\
-	lda $31, $exception-99b($31); 	\
-	.previous
-
-	.set noat
-	.set noreorder
-	.align 4
-
-	.globl __clear_user
-	.ent __clear_user
-	.frame	$30, 0, $26
-	.prologue 0
-
-				# Pipeline info : Slotting & Comments
-__clear_user:
-	and	$17, $17, $0
-	and	$16, 7, $4	# .. E  .. ..	: find dest head misalignment
-	beq	$0, $zerolength # U  .. .. ..	:  U L U L
-
-	addq	$0, $4, $1	# .. .. .. E	: bias counter
-	and	$1, 7, $2	# .. .. E  ..	: number of misaligned bytes in tail
-# Note - we never actually use $2, so this is a moot computation
-# and we can rewrite this later...
-	srl	$1, 3, $1	# .. E  .. ..	: number of quadwords to clear
-	beq	$4, $headalign	# U  .. .. ..	: U L U L
-
-/*
- * Head is not aligned.  Write (8 - $4) bytes to head of destination
- * This means $16 is known to be misaligned
- */
-	EX( ldq_u $5, 0($16) )	# .. .. .. L	: load dst word to mask back in
-	beq	$1, $onebyte	# .. .. U  ..	: sub-word store?
-	mskql	$5, $16, $5	# .. U  .. ..	: take care of misaligned head
-	addq	$16, 8, $16	# E  .. .. .. 	: L U U L
-
-	EX( stq_u $5, -8($16) )	# .. .. .. L	:
-	subq	$1, 1, $1	# .. .. E  ..	:
-	addq	$0, $4, $0	# .. E  .. ..	: bytes left -= 8 - misalignment
-	subq	$0, 8, $0	# E  .. .. ..	: U L U L
-
-	.align	4
-/*
- * (The .align directive ought to be a moot point)
- * values upon initial entry to the loop
- * $1 is number of quadwords to clear (zero is a valid value)
- * $2 is number of trailing bytes (0..7) ($2 never used...)
- * $16 is known to be aligned 0mod8
- */
-$headalign:
-	subq	$1, 16, $4	# .. .. .. E	: If < 16, we can not use the huge loop
-	and	$16, 0x3f, $2	# .. .. E  ..	: Forward work for huge loop
-	subq	$2, 0x40, $3	# .. E  .. ..	: bias counter (huge loop)
-	blt	$4, $trailquad	# U  .. .. ..	: U L U L
-
-/*
- * We know that we're going to do at least 16 quads, which means we are
- * going to be able to use the large block clear loop at least once.
- * Figure out how many quads we need to clear before we are 0mod64 aligned
- * so we can use the wh64 instruction.
- */
-
-	nop			# .. .. .. E
-	nop			# .. .. E  ..
-	nop			# .. E  .. ..
-	beq	$3, $bigalign	# U  .. .. ..	: U L U L : Aligned 0mod64
-
-$alignmod64:
-	EX( stq_u $31, 0($16) )	# .. .. .. L
-	addq	$3, 8, $3	# .. .. E  ..
-	subq	$0, 8, $0	# .. E  .. ..
-	nop			# E  .. .. ..	: U L U L
-
-	nop			# .. .. .. E
-	subq	$1, 1, $1	# .. .. E  ..
-	addq	$16, 8, $16	# .. E  .. ..
-	blt	$3, $alignmod64	# U  .. .. ..	: U L U L
-
-$bigalign:
-/*
- * $0 is the number of bytes left
- * $1 is the number of quads left
- * $16 is aligned 0mod64
- * we know that we'll be taking a minimum of one trip through
- * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle
- * We are _not_ going to update $0 after every single store.  That
- * would be silly, because there will be cross-cluster dependencies
- * no matter how the code is scheduled.  By doing it in slightly
- * staggered fashion, we can still do this loop in 5 fetches
- * The worse case will be doing two extra quads in some future execution,
- * in the event of an interrupted clear.
- * Assumes the wh64 needs to be for 2 trips through the loop in the future
- * The wh64 is issued on for the starting destination address for trip +2
- * through the loop, and if there are less than two trips left, the target
- * address will be for the current trip.
- */
-	nop			# E :
-	nop			# E :
-	nop			# E :
-	bis	$16,$16,$3	# E : U L U L : Initial wh64 address is dest
-	/* This might actually help for the current trip... */
-
-$do_wh64:
-	wh64	($3)		# .. .. .. L1	: memory subsystem hint
-	subq	$1, 16, $4	# .. .. E  ..	: Forward calculation - repeat the loop?
-	EX( stq_u $31, 0($16) )	# .. L  .. ..
-	subq	$0, 8, $0	# E  .. .. ..	: U L U L
-
-	addq	$16, 128, $3	# E : Target address of wh64
-	EX( stq_u $31, 8($16) )	# L :
-	EX( stq_u $31, 16($16) )	# L :
-	subq	$0, 16, $0	# E : U L L U
-
-	nop			# E :
-	EX( stq_u $31, 24($16) )	# L :
-	EX( stq_u $31, 32($16) )	# L :
-	subq	$0, 168, $5	# E : U L L U : two trips through the loop left?
-	/* 168 = 192 - 24, since we've already completed some stores */
-
-	subq	$0, 16, $0	# E :
-	EX( stq_u $31, 40($16) )	# L :
-	EX( stq_u $31, 48($16) )	# L :
-	cmovlt	$5, $16, $3	# E : U L L U : Latency 2, extra mapping cycle
-
-	subq	$1, 8, $1	# E :
-	subq	$0, 16, $0	# E :
-	EX( stq_u $31, 56($16) )	# L :
-	nop			# E : U L U L
-
-	nop			# E :
-	subq	$0, 8, $0	# E :
-	addq	$16, 64, $16	# E :
-	bge	$4, $do_wh64	# U : U L U L
-
-$trailquad:
-	# zero to 16 quadwords left to store, plus any trailing bytes
-	# $1 is the number of quadwords left to go.
-	# 
-	nop			# .. .. .. E
-	nop			# .. .. E  ..
-	nop			# .. E  .. ..
-	beq	$1, $trailbytes	# U  .. .. ..	: U L U L : Only 0..7 bytes to go
-
-$onequad:
-	EX( stq_u $31, 0($16) )	# .. .. .. L
-	subq	$1, 1, $1	# .. .. E  ..
-	subq	$0, 8, $0	# .. E  .. ..
-	nop			# E  .. .. ..	: U L U L
-
-	nop			# .. .. .. E
-	nop			# .. .. E  ..
-	addq	$16, 8, $16	# .. E  .. ..
-	bgt	$1, $onequad	# U  .. .. ..	: U L U L
-
-	# We have an unknown number of bytes left to go.
-$trailbytes:
-	nop			# .. .. .. E
-	nop			# .. .. E  ..
-	nop			# .. E  .. ..
-	beq	$0, $zerolength	# U  .. .. ..	: U L U L
-
-	# $0 contains the number of bytes left to copy (0..31)
-	# so we will use $0 as the loop counter
-	# We know for a fact that $0 > 0 zero due to previous context
-$onebyte:
-	EX( stb $31, 0($16) )	# .. .. .. L
-	subq	$0, 1, $0	# .. .. E  ..	:
-	addq	$16, 1, $16	# .. E  .. ..	:
-	bgt	$0, $onebyte	# U  .. .. ..	: U L U L
-
-$zerolength:
-$exception:			# Destination for exception recovery(?)
-	nop			# .. .. .. E	:
-	nop			# .. .. E  ..	:
-	nop			# .. E  .. ..	:
-	ret	$31, ($26), 1	# L0 .. .. ..	: L U L U
-	.end __clear_user
-	EXPORT_SYMBOL(__clear_user)
diff --git a/arch/alpha/lib/ev6-copy_page.S b/arch/alpha/lib/ev6-copy_page.S
deleted file mode 100644
index fd7212c8dcf1848533ed8dc07c2f19c6a7aa673e..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/ev6-copy_page.S
+++ /dev/null
@@ -1,205 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/ev6-copy_page.S
- *
- * Copy an entire page.
- */
-
-/* The following comparison of this routine vs the normal copy_page.S
-   was written by an unnamed ev6 hardware designer and forwarded to me
-   via Steven Hobbs <hobbs@steven.zko.dec.com>.
- 
-   First Problem: STQ overflows.
-   -----------------------------
-
-	It would be nice if EV6 handled every resource overflow efficiently,
-	but for some it doesn't.  Including store queue overflows.  It causes
-	a trap and a restart of the pipe.
-
-	To get around this we sometimes use (to borrow a term from a VSSAD
-	researcher) "aeration".  The idea is to slow the rate at which the
-	processor receives valid instructions by inserting nops in the fetch
-	path.  In doing so, you can prevent the overflow and actually make
-	the code run faster.  You can, of course, take advantage of the fact
-	that the processor can fetch at most 4 aligned instructions per cycle.
-
-	I inserted enough nops to force it to take 10 cycles to fetch the
-	loop code.  In theory, EV6 should be able to execute this loop in
-	9 cycles but I was not able to get it to run that fast -- the initial
-	conditions were such that I could not reach this optimum rate on
-	(chaotic) EV6.  I wrote the code such that everything would issue
-	in order. 
-
-   Second Problem: Dcache index matches.
-   -------------------------------------
-
-	If you are going to use this routine on random aligned pages, there
-	is a 25% chance that the pages will be at the same dcache indices.
-	This results in many nasty memory traps without care.
-
-	The solution is to schedule the prefetches to avoid the memory
-	conflicts.  I schedule the wh64 prefetches farther ahead of the
-	read prefetches to avoid this problem.
-
-   Third Problem: Needs more prefetching.
-   --------------------------------------
-
-	In order to improve the code I added deeper prefetching to take the
-	most advantage of EV6's bandwidth.
-
-	I also prefetched the read stream. Note that adding the read prefetch
-	forced me to add another cycle to the inner-most kernel - up to 11
-	from the original 8 cycles per iteration.  We could improve performance
-	further by unrolling the loop and doing multiple prefetches per cycle.
-
-   I think that the code below will be very robust and fast code for the
-   purposes of copying aligned pages.  It is slower when both source and
-   destination pages are in the dcache, but it is my guess that this is
-   less important than the dcache miss case.  */
-
-#include <asm/export.h>
-	.text
-	.align 4
-	.global copy_page
-	.ent copy_page
-copy_page:
-	.prologue 0
-
-	/* Prefetch 5 read cachelines; write-hint 10 cache lines.  */
-	wh64	($16)
-	ldl	$31,0($17)
-	ldl	$31,64($17)
-	lda	$1,1*64($16)
-
-	wh64	($1)
-	ldl	$31,128($17)
-	ldl	$31,192($17)
-	lda	$1,2*64($16)
-
-	wh64	($1)
-	ldl	$31,256($17)
-	lda	$18,118
-	lda	$1,3*64($16)
-
-	wh64	($1)
-	nop
-	lda	$1,4*64($16)
-	lda	$2,5*64($16)
-
-	wh64	($1)
-	wh64	($2)
-	lda	$1,6*64($16)
-	lda	$2,7*64($16)
-
-	wh64	($1)
-	wh64	($2)
-	lda	$1,8*64($16)
-	lda	$2,9*64($16)
-
-	wh64	($1)
-	wh64	($2)
-	lda	$19,10*64($16)
-	nop
-
-	/* Main prefetching/write-hinting loop.  */
-1:	ldq	$0,0($17)
-	ldq	$1,8($17)
-	unop
-	unop
-
-	unop
-	unop
-	ldq	$2,16($17)
-	ldq	$3,24($17)
-
-	ldq	$4,32($17)
-	ldq	$5,40($17)
-	unop
-	unop
-
-	unop
-	unop
-	ldq	$6,48($17)
-	ldq	$7,56($17)
-
-	ldl	$31,320($17)
-	unop
-	unop
-	unop
-
-	/* This gives the extra cycle of aeration above the minimum.  */
-	unop			
-	unop
-	unop
-	unop
-
-	wh64	($19)
-	unop
-	unop
-	unop
-
-	stq	$0,0($16)
-	subq	$18,1,$18
-	stq	$1,8($16)
-	unop
-
-	unop
-	stq	$2,16($16)
-	addq	$17,64,$17
-	stq	$3,24($16)
-
-	stq	$4,32($16)
-	stq	$5,40($16)
-	addq	$19,64,$19
-	unop
-
-	stq	$6,48($16)
-	stq	$7,56($16)
-	addq	$16,64,$16
-	bne	$18, 1b
-
-	/* Prefetch the final 5 cache lines of the read stream.  */
-	lda	$18,10
-	ldl	$31,320($17)
-	ldl	$31,384($17)
-	ldl	$31,448($17)
-
-	ldl	$31,512($17)
-	ldl	$31,576($17)
-	nop
-	nop
-
-	/* Non-prefetching, non-write-hinting cleanup loop for the
-	   final 10 cache lines.  */
-2:	ldq	$0,0($17)
-	ldq	$1,8($17)
-	ldq	$2,16($17)
-	ldq	$3,24($17)
-
-	ldq	$4,32($17)
-	ldq	$5,40($17)
-	ldq	$6,48($17)
-	ldq	$7,56($17)
-
-	stq	$0,0($16)
-	subq	$18,1,$18
-	stq	$1,8($16)
-	addq	$17,64,$17
-
-	stq	$2,16($16)
-	stq	$3,24($16)
-	stq	$4,32($16)
-	stq	$5,40($16)
-
-	stq	$6,48($16)
-	stq	$7,56($16)
-	addq	$16,64,$16
-	bne	$18, 2b
-
-	ret
-	nop
-	unop
-	nop
-
-	.end copy_page
-	EXPORT_SYMBOL(copy_page)
diff --git a/arch/alpha/lib/ev6-copy_user.S b/arch/alpha/lib/ev6-copy_user.S
deleted file mode 100644
index f3e43375439743c54a2e58b2e7928339f50642bb..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/ev6-copy_user.S
+++ /dev/null
@@ -1,227 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/ev6-copy_user.S
- *
- * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
- *
- * Copy to/from user space, handling exceptions as we go..  This
- * isn't exactly pretty.
- *
- * This is essentially the same as "memcpy()", but with a few twists.
- * Notably, we have to make sure that $0 is always up-to-date and
- * contains the right "bytes left to copy" value (and that it is updated
- * only _after_ a successful copy). There is also some rather minor
- * exception setup stuff..
- *
- * Much of the information about 21264 scheduling/coding comes from:
- *	Compiler Writer's Guide for the Alpha 21264
- *	abbreviated as 'CWG' in other comments here
- *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
- * Scheduling notation:
- *	E	- either cluster
- *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
- *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
- */
-
-#include <asm/export.h>
-/* Allow an exception for an insn; exit if we get one.  */
-#define EXI(x,y...)			\
-	99: x,##y;			\
-	.section __ex_table,"a";	\
-	.long 99b - .;			\
-	lda $31, $exitin-99b($31);	\
-	.previous
-
-#define EXO(x,y...)			\
-	99: x,##y;			\
-	.section __ex_table,"a";	\
-	.long 99b - .;			\
-	lda $31, $exitout-99b($31);	\
-	.previous
-
-	.set noat
-	.align 4
-	.globl __copy_user
-	.ent __copy_user
-				# Pipeline info: Slotting & Comments
-__copy_user:
-	.prologue 0
-	mov $18, $0		# .. .. .. E
-	subq $18, 32, $1	# .. .. E. ..	: Is this going to be a small copy?
-	nop			# .. E  .. ..
-	beq $18, $zerolength	# U  .. .. ..	: U L U L
-
-	and $16,7,$3		# .. .. .. E	: is leading dest misalignment
-	ble $1, $onebyteloop	# .. .. U  ..	: 1st branch : small amount of data
-	beq $3, $destaligned	# .. U  .. ..	: 2nd (one cycle fetcher stall)
-	subq $3, 8, $3		# E  .. .. ..	: L U U L : trip counter
-/*
- * The fetcher stall also hides the 1 cycle cross-cluster stall for $3 (L --> U)
- * This loop aligns the destination a byte at a time
- * We know we have at least one trip through this loop
- */
-$aligndest:
-	EXI( ldbu $1,0($17) )	# .. .. .. L	: Keep loads separate from stores
-	addq $16,1,$16		# .. .. E  ..	: Section 3.8 in the CWG
-	addq $3,1,$3		# .. E  .. ..	:
-	nop			# E  .. .. ..	: U L U L
-
-/*
- * the -1 is to compensate for the inc($16) done in a previous quadpack
- * which allows us zero dependencies within either quadpack in the loop
- */
-	EXO( stb $1,-1($16) )	# .. .. .. L	:
-	addq $17,1,$17		# .. .. E  ..	: Section 3.8 in the CWG
-	subq $0,1,$0		# .. E  .. ..	:
-	bne $3, $aligndest	# U  .. .. ..	: U L U L
-
-/*
- * If we fell through into here, we have a minimum of 33 - 7 bytes
- * If we arrived via branch, we have a minimum of 32 bytes
- */
-$destaligned:
-	and $17,7,$1		# .. .. .. E	: Check _current_ source alignment
-	bic $0,7,$4		# .. .. E  ..	: number bytes as a quadword loop
-	EXI( ldq_u $3,0($17) )	# .. L  .. ..	: Forward fetch for fallthrough code
-	beq $1,$quadaligned	# U  .. .. ..	: U L U L
-
-/*
- * In the worst case, we've just executed an ldq_u here from 0($17)
- * and we'll repeat it once if we take the branch
- */
-
-/* Misaligned quadword loop - not unrolled.  Leave it that way. */
-$misquad:
-	EXI( ldq_u $2,8($17) )	# .. .. .. L	:
-	subq $4,8,$4		# .. .. E  ..	:
-	extql $3,$17,$3		# .. U  .. ..	:
-	extqh $2,$17,$1		# U  .. .. ..	: U U L L
-
-	bis $3,$1,$1		# .. .. .. E	:
-	EXO( stq $1,0($16) )	# .. .. L  ..	:
-	addq $17,8,$17		# .. E  .. ..	:
-	subq $0,8,$0		# E  .. .. ..	: U L L U
-
-	addq $16,8,$16		# .. .. .. E	:
-	bis $2,$2,$3		# .. .. E  ..	:
-	nop			# .. E  .. ..	:
-	bne $4,$misquad		# U  .. .. ..	: U L U L
-
-	nop			# .. .. .. E
-	nop			# .. .. E  ..
-	nop			# .. E  .. ..
-	beq $0,$zerolength	# U  .. .. ..	: U L U L
-
-/* We know we have at least one trip through the byte loop */
-	EXI ( ldbu $2,0($17) )	# .. .. .. L	: No loads in the same quad
-	addq $16,1,$16		# .. .. E  ..	: as the store (Section 3.8 in CWG)
-	nop			# .. E  .. ..	:
-	br $31, $dirtyentry	# L0 .. .. ..	: L U U L
-/* Do the trailing byte loop load, then hop into the store part of the loop */
-
-/*
- * A minimum of (33 - 7) bytes to do a quad at a time.
- * Based upon the usage context, it's worth the effort to unroll this loop
- * $0 - number of bytes to be moved
- * $4 - number of bytes to move as quadwords
- * $16 is current destination address
- * $17 is current source address
- */
-$quadaligned:
-	subq	$4, 32, $2	# .. .. .. E	: do not unroll for small stuff
-	nop			# .. .. E  ..
-	nop			# .. E  .. ..
-	blt	$2, $onequad	# U  .. .. ..	: U L U L
-
-/*
- * There is a significant assumption here that the source and destination
- * addresses differ by more than 32 bytes.  In this particular case, a
- * sparsity of registers further bounds this to be a minimum of 8 bytes.
- * But if this isn't met, then the output result will be incorrect.
- * Furthermore, due to a lack of available registers, we really can't
- * unroll this to be an 8x loop (which would enable us to use the wh64
- * instruction memory hint instruction).
- */
-$unroll4:
-	EXI( ldq $1,0($17) )	# .. .. .. L
-	EXI( ldq $2,8($17) )	# .. .. L  ..
-	subq	$4,32,$4	# .. E  .. ..
-	nop			# E  .. .. ..	: U U L L
-
-	addq	$17,16,$17	# .. .. .. E
-	EXO( stq $1,0($16) )	# .. .. L  ..
-	EXO( stq $2,8($16) )	# .. L  .. ..
-	subq	$0,16,$0	# E  .. .. ..	: U L L U
-
-	addq	$16,16,$16	# .. .. .. E
-	EXI( ldq $1,0($17) )	# .. .. L  ..
-	EXI( ldq $2,8($17) )	# .. L  .. ..
-	subq	$4, 32, $3	# E  .. .. ..	: U U L L : is there enough for another trip?
-
-	EXO( stq $1,0($16) )	# .. .. .. L
-	EXO( stq $2,8($16) )	# .. .. L  ..
-	subq	$0,16,$0	# .. E  .. ..
-	addq	$17,16,$17	# E  .. .. ..	: U L L U
-
-	nop			# .. .. .. E
-	nop			# .. .. E  ..
-	addq	$16,16,$16	# .. E  .. ..
-	bgt	$3,$unroll4	# U  .. .. ..	: U L U L
-
-	nop
-	nop
-	nop
-	beq	$4, $noquads
-
-$onequad:
-	EXI( ldq $1,0($17) )
-	subq	$4,8,$4
-	addq	$17,8,$17
-	nop
-
-	EXO( stq $1,0($16) )
-	subq	$0,8,$0
-	addq	$16,8,$16
-	bne	$4,$onequad
-
-$noquads:
-	nop
-	nop
-	nop
-	beq $0,$zerolength
-
-/*
- * For small copies (or the tail of a larger copy), do a very simple byte loop.
- * There's no point in doing a lot of complex alignment calculations to try to
- * to quadword stuff for a small amount of data.
- *	$0 - remaining number of bytes left to copy
- *	$16 - current dest addr
- *	$17 - current source addr
- */
-
-$onebyteloop:
-	EXI ( ldbu $2,0($17) )	# .. .. .. L	: No loads in the same quad
-	addq $16,1,$16		# .. .. E  ..	: as the store (Section 3.8 in CWG)
-	nop			# .. E  .. ..	:
-	nop			# E  .. .. ..	: U L U L
-
-$dirtyentry:
-/*
- * the -1 is to compensate for the inc($16) done in a previous quadpack
- * which allows us zero dependencies within either quadpack in the loop
- */
-	EXO ( stb $2,-1($16) )	# .. .. .. L	:
-	addq $17,1,$17		# .. .. E  ..	: quadpack as the load
-	subq $0,1,$0		# .. E  .. ..	: change count _after_ copy
-	bgt $0,$onebyteloop	# U  .. .. ..	: U L U L
-
-$zerolength:
-$exitin:
-$exitout:			# Destination for exception recovery(?)
-	nop			# .. .. .. E
-	nop			# .. .. E  ..
-	nop			# .. E  .. ..
-	ret $31,($26),1		# L0 .. .. ..	: L U L U
-
-	.end __copy_user
-	EXPORT_SYMBOL(__copy_user)
diff --git a/arch/alpha/lib/ev6-csum_ipv6_magic.S b/arch/alpha/lib/ev6-csum_ipv6_magic.S
deleted file mode 100644
index 9a73f90700a13a63cb3929d9ba3db7daab0250fc..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/ev6-csum_ipv6_magic.S
+++ /dev/null
@@ -1,153 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/ev6-csum_ipv6_magic.S
- * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
- *
- * unsigned short csum_ipv6_magic(struct in6_addr *saddr,
- *                                struct in6_addr *daddr,
- *                                __u32 len,
- *                                unsigned short proto,
- *                                unsigned int csum);
- *
- * Much of the information about 21264 scheduling/coding comes from:
- *	Compiler Writer's Guide for the Alpha 21264
- *	abbreviated as 'CWG' in other comments here
- *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
- * Scheduling notation:
- *	E	- either cluster
- *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
- *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
- * Try not to change the actual algorithm if possible for consistency.
- * Determining actual stalls (other than slotting) doesn't appear to be easy to do.
- *
- * unsigned short csum_ipv6_magic(struct in6_addr *saddr,
- *                                struct in6_addr *daddr,
- *                                __u32 len,
- *                                unsigned short proto,
- *                                unsigned int csum);
- *
- * Swap <proto> (takes form 0xaabb)
- * Then shift it left by 48, so result is:
- *	0xbbaa0000 00000000
- * Then turn it back into a sign extended 32-bit item
- *	0xbbaa0000
- *
- * Swap <len> (an unsigned int) using Mike Burrows' 7-instruction sequence
- * (we can't hide the 3-cycle latency of the unpkbw in the 6-instruction sequence)
- * Assume input takes form 0xAABBCCDD
- *
- * Finally, original 'folding' approach is to split the long into 4 unsigned shorts
- * add 4 ushorts, resulting in ushort/carry
- * add carry bits + ushort --> ushort
- * add carry bits + ushort --> ushort (in case the carry results in an overflow)
- * Truncate to a ushort.  (took 13 instructions)
- * From doing some testing, using the approach in checksum.c:from64to16()
- * results in the same outcome:
- * split into 2 uints, add those, generating a ulong
- * add the 3 low ushorts together, generating a uint
- * a final add of the 2 lower ushorts
- * truncating the result.
- *
- * Misalignment handling added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
- * The cost is 16 instructions (~8 cycles), including two extra loads which
- * may cause additional delay in rare cases (load-load replay traps).
- */
-
-#include <asm/export.h>
-	.globl csum_ipv6_magic
-	.align 4
-	.ent csum_ipv6_magic
-	.frame $30,0,$26,0
-csum_ipv6_magic:
-	.prologue 0
-
-	ldq_u	$0,0($16)	# L : Latency: 3
-	inslh	$18,7,$4	# U : 0000000000AABBCC
-	ldq_u	$1,8($16)	# L : Latency: 3
-	sll	$19,8,$7	# U : U L U L : 0x00000000 00aabb00
-
-	and	$16,7,$6	# E : src misalignment
-	ldq_u	$5,15($16)	# L : Latency: 3
-	zapnot	$20,15,$20	# U : zero extend incoming csum
-	ldq_u	$2,0($17)	# L : U L U L : Latency: 3
-
-	extql	$0,$6,$0	# U :
-	extqh	$1,$6,$22	# U :
-	ldq_u	$3,8($17)	# L : Latency: 3
-	sll	$19,24,$19	# U : U U L U : 0x000000aa bb000000
-
-	cmoveq	$6,$31,$22	# E : src aligned?
-	ldq_u	$23,15($17)	# L : Latency: 3
-	inswl	$18,3,$18	# U : 000000CCDD000000
-	addl	$19,$7,$19	# E : U L U L : <sign bits>bbaabb00
-
-	or	$0,$22,$0	# E : 1st src word complete
-	extql	$1,$6,$1	# U :
-	or	$18,$4,$18	# E : 000000CCDDAABBCC
-	extqh	$5,$6,$5	# U : L U L U
-
-	and	$17,7,$6	# E : dst misalignment
-	extql	$2,$6,$2	# U :
-	or	$1,$5,$1	# E : 2nd src word complete
-	extqh	$3,$6,$22	# U : L U L U :
-
-	cmoveq	$6,$31,$22	# E : dst aligned?
-	extql	$3,$6,$3	# U :
-	addq	$20,$0,$20	# E : begin summing the words
-	extqh	$23,$6,$23	# U : L U L U :
-
-	srl	$18,16,$4	# U : 0000000000CCDDAA
-	or	$2,$22,$2	# E : 1st dst word complete
-	zap	$19,0x3,$19	# U : <sign bits>bbaa0000
-	or	$3,$23,$3	# E : U L U L : 2nd dst word complete
-
-	cmpult	$20,$0,$0	# E :
-	addq	$20,$1,$20	# E :
-	zapnot	$18,0xa,$18	# U : 00000000DD00BB00
-	zap	$4,0xa,$4	# U : U U L L : 0000000000CC00AA
-
-	or	$18,$4,$18	# E : 00000000DDCCBBAA
-	nop			# E :
-	cmpult	$20,$1,$1	# E :
-	addq	$20,$2,$20	# E : U L U L
-
-	cmpult	$20,$2,$2	# E :
-	addq	$20,$3,$20	# E :
-	cmpult	$20,$3,$3	# E : (1 cycle stall on $20)
-	addq	$20,$18,$20	# E : U L U L (1 cycle stall on $20)
-
-	cmpult	$20,$18,$18	# E :
-	addq	$20,$19,$20	# E : (1 cycle stall on $20)
-	addq	$0,$1,$0	# E : merge the carries back into the csum
-	addq	$2,$3,$2	# E :
-
-	cmpult	$20,$19,$19	# E :
-	addq	$18,$19,$18	# E : (1 cycle stall on $19)
-	addq	$0,$2,$0	# E :
-	addq	$20,$18,$20	# E : U L U L :
-		/* (1 cycle stall on $18, 2 cycles on $20) */
-
-	addq	$0,$20,$0	# E :
-	zapnot	$0,15,$1	# U : Start folding output (1 cycle stall on $0)
-	nop			# E :
-	srl	$0,32,$0	# U : U L U L : (1 cycle stall on $0)
-
-	addq	$1,$0,$1	# E : Finished generating ulong
-	extwl	$1,2,$2		# U : ushort[1] (1 cycle stall on $1)
-	zapnot	$1,3,$0		# U : ushort[0] (1 cycle stall on $1)
-	extwl	$1,4,$1		# U : ushort[2] (1 cycle stall on $1)
-
-	addq	$0,$2,$0	# E
-	addq	$0,$1,$3	# E : Finished generating uint
-		/* (1 cycle stall on $0) */
-	extwl	$3,2,$1		# U : ushort[1] (1 cycle stall on $3)
-	nop			# E : L U L U
-
-	addq	$1,$3,$0	# E : Final carry
-	not	$0,$4		# E : complement (1 cycle stall on $0)
-	zapnot	$4,3,$0		# U : clear upper garbage bits
-		/* (1 cycle stall on $4) */
-	ret			# L0 : L U L U
-
-	.end csum_ipv6_magic
-	EXPORT_SYMBOL(csum_ipv6_magic)
diff --git a/arch/alpha/lib/ev6-divide.S b/arch/alpha/lib/ev6-divide.S
deleted file mode 100644
index 137ff1a07356311d598cd8c254cb8214c8707ec7..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/ev6-divide.S
+++ /dev/null
@@ -1,263 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/ev6-divide.S
- *
- * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
- *
- * Alpha division..
- */
-
-/*
- * The alpha chip doesn't provide hardware division, so we have to do it
- * by hand.  The compiler expects the functions
- *
- *	__divqu: 64-bit unsigned long divide
- *	__remqu: 64-bit unsigned long remainder
- *	__divqs/__remqs: signed 64-bit
- *	__divlu/__remlu: unsigned 32-bit
- *	__divls/__remls: signed 32-bit
- *
- * These are not normal C functions: instead of the normal
- * calling sequence, these expect their arguments in registers
- * $24 and $25, and return the result in $27. Register $28 may
- * be clobbered (assembly temporary), anything else must be saved. 
- *
- * In short: painful.
- *
- * This is a rather simple bit-at-a-time algorithm: it's very good
- * at dividing random 64-bit numbers, but the more usual case where
- * the divisor is small is handled better by the DEC algorithm
- * using lookup tables. This uses much less memory, though, and is
- * nicer on the cache.. Besides, I don't know the copyright status
- * of the DEC code.
- */
-
-/*
- * My temporaries:
- *	$0 - current bit
- *	$1 - shifted divisor
- *	$2 - modulus/quotient
- *
- *	$23 - return address
- *	$24 - dividend
- *	$25 - divisor
- *
- *	$27 - quotient/modulus
- *	$28 - compare status
- *
- * Much of the information about 21264 scheduling/coding comes from:
- *	Compiler Writer's Guide for the Alpha 21264
- *	abbreviated as 'CWG' in other comments here
- *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
- * Scheduling notation:
- *	E	- either cluster
- *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
- *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
- * Try not to change the actual algorithm if possible for consistency.
- */
-
-#include <asm/export.h>
-#define halt .long 0
-
-/*
- * Select function type and registers
- */
-#define mask	$0
-#define divisor	$1
-#define compare $28
-#define tmp1	$3
-#define tmp2	$4
-
-#ifdef DIV
-#define DIV_ONLY(x,y...) x,##y
-#define MOD_ONLY(x,y...)
-#define func(x) __div##x
-#define modulus $2
-#define quotient $27
-#define GETSIGN(x) xor $24,$25,x
-#define STACK 48
-#else
-#define DIV_ONLY(x,y...)
-#define MOD_ONLY(x,y...) x,##y
-#define func(x) __rem##x
-#define modulus $27
-#define quotient $2
-#define GETSIGN(x) bis $24,$24,x
-#define STACK 32
-#endif
-
-/*
- * For 32-bit operations, we need to extend to 64-bit
- */
-#ifdef INTSIZE
-#define ufunction func(lu)
-#define sfunction func(l)
-#define LONGIFY(x) zapnot x,15,x
-#define SLONGIFY(x) addl x,0,x
-#else
-#define ufunction func(qu)
-#define sfunction func(q)
-#define LONGIFY(x)
-#define SLONGIFY(x)
-#endif
-
-.set noat
-.align	4
-.globl	ufunction
-.ent	ufunction
-ufunction:
-	subq	$30,STACK,$30		# E :
-	.frame	$30,STACK,$23
-	.prologue 0
-
-7:	stq	$1, 0($30)		# L :
-	bis	$25,$25,divisor		# E :
-	stq	$2, 8($30)		# L : L U L U
-
-	bis	$24,$24,modulus		# E :
-	stq	$0,16($30)		# L :
-	bis	$31,$31,quotient	# E :
-	LONGIFY(divisor)		# E : U L L U
-
-	stq	tmp1,24($30)		# L :
-	LONGIFY(modulus)		# E :
-	bis	$31,1,mask		# E :
-	DIV_ONLY(stq tmp2,32($30))	# L : L U U L
-
-	beq	divisor, 9f			/* div by zero */
-	/*
-	 * In spite of the DIV_ONLY being either a non-instruction
-	 * or an actual stq, the addition of the .align directive
-	 * below ensures that label 1 is going to be nicely aligned
-	 */
-
-	.align	4
-#ifdef INTSIZE
-	/*
-	 * shift divisor left, using 3-bit shifts for
-	 * 32-bit divides as we can't overflow. Three-bit
-	 * shifts will result in looping three times less
-	 * here, but can result in two loops more later.
-	 * Thus using a large shift isn't worth it (and
-	 * s8add pairs better than a sll..)
-	 */
-1:	cmpult	divisor,modulus,compare	# E :
-	s8addq	divisor,$31,divisor	# E :
-	s8addq	mask,$31,mask		# E :
-	bne	compare,1b		# U : U L U L
-#else
-1:	cmpult	divisor,modulus,compare	# E :
-	nop				# E :
-	nop				# E :
-	blt     divisor, 2f		# U : U L U L
-
-	addq	divisor,divisor,divisor	# E :
-	addq	mask,mask,mask		# E :
-	unop				# E :
-	bne	compare,1b		# U : U L U L
-#endif
-
-	/* ok, start to go right again.. */
-2:
-	/*
-	 * Keep things nicely bundled... use a nop instead of not
-	 * having an instruction for DIV_ONLY
-	 */
-#ifdef DIV
-	DIV_ONLY(addq quotient,mask,tmp2) # E :
-#else
-	nop				# E :
-#endif
-	srl	mask,1,mask		# U :
-	cmpule	divisor,modulus,compare	# E :
-	subq	modulus,divisor,tmp1	# E :
-
-#ifdef DIV
-	DIV_ONLY(cmovne compare,tmp2,quotient)	# E : Latency 2, extra map slot
-	nop				# E : as part of the cmovne
-	srl	divisor,1,divisor	# U :
-	nop				# E : L U L U
-
-	nop				# E :
-	cmovne	compare,tmp1,modulus	# E : Latency 2, extra map slot
-	nop				# E : as part of the cmovne
-	bne	mask,2b			# U : U L U L
-#else
-	srl	divisor,1,divisor	# U :
-	cmovne	compare,tmp1,modulus	# E : Latency 2, extra map slot
-	nop				# E : as part of the cmovne
-	bne	mask,2b			# U : U L L U
-#endif
-
-9:	ldq	$1, 0($30)		# L :
-	ldq	$2, 8($30)		# L :
-	nop				# E :
-	nop				# E : U U L L
-
-	ldq	$0,16($30)		# L :
-	ldq	tmp1,24($30)		# L :
-	nop				# E :
-	nop				# E :
-
-#ifdef DIV
-	DIV_ONLY(ldq tmp2,32($30))	# L :
-#else
-	nop				# E :
-#endif
-	addq	$30,STACK,$30		# E :
-	ret	$31,($23),1		# L0 : L U U L
-	.end	ufunction
-EXPORT_SYMBOL(ufunction)
-
-/*
- * Uhh.. Ugly signed division. I'd rather not have it at all, but
- * it's needed in some circumstances. There are different ways to
- * handle this, really. This does:
- * 	-a / b = a / -b = -(a / b)
- *	-a % b = -(a % b)
- *	a % -b = a % b
- * which is probably not the best solution, but at least should
- * have the property that (x/y)*y + (x%y) = x.
- */
-.align 4
-.globl	sfunction
-.ent	sfunction
-sfunction:
-	subq	$30,STACK,$30		# E :
-	.frame	$30,STACK,$23
-	.prologue 0
-	bis	$24,$25,$28		# E :
-	SLONGIFY($28)			# E :
-	bge	$28,7b			# U :
-
-	stq	$24,0($30)		# L :
-	subq	$31,$24,$28		# E :
-	stq	$25,8($30)		# L :
-	nop				# E : U L U L
-
-	cmovlt	$24,$28,$24	/* abs($24) */ # E : Latency 2, extra map slot
-	nop				# E : as part of the cmov
-	stq	$23,16($30)		# L :
-	subq	$31,$25,$28		# E : U L U L
-
-	stq	tmp1,24($30)		# L :
-	cmovlt	$25,$28,$25	/* abs($25) */ # E : Latency 2, extra map slot
-	nop				# E :
-	bsr	$23,ufunction		# L0: L U L U
-
-	ldq	$24,0($30)		# L :
-	ldq	$25,8($30)		# L :
-	GETSIGN($28)			# E :
-	subq	$31,$27,tmp1		# E : U U L L
-
-	SLONGIFY($28)			# E :
-	ldq	$23,16($30)		# L :
-	cmovlt	$28,tmp1,$27		# E : Latency 2, extra map slot
-	nop				# E : U L L U : as part of the cmov
-
-	ldq	tmp1,24($30)		# L :
-	nop				# E : as part of the cmov
-	addq	$30,STACK,$30		# E :
-	ret	$31,($23),1		# L0 : L U U L
-	.end	sfunction
-EXPORT_SYMBOL(sfunction)
diff --git a/arch/alpha/lib/ev6-memchr.S b/arch/alpha/lib/ev6-memchr.S
deleted file mode 100644
index 56bf9e14eeeefadf510cbe4d52fba27c0f1f5701..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/ev6-memchr.S
+++ /dev/null
@@ -1,193 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/ev6-memchr.S
- *
- * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
- *
- * Finds characters in a memory area.  Optimized for the Alpha:
- *
- *    - memory accessed as aligned quadwords only
- *    - uses cmpbge to compare 8 bytes in parallel
- *    - does binary search to find 0 byte in last
- *      quadword (HAKMEM needed 12 instructions to
- *      do this instead of the 9 instructions that
- *      binary search needs).
- *
- * For correctness consider that:
- *
- *    - only minimum number of quadwords may be accessed
- *    - the third argument is an unsigned long
- *
- * Much of the information about 21264 scheduling/coding comes from:
- *	Compiler Writer's Guide for the Alpha 21264
- *	abbreviated as 'CWG' in other comments here
- *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
- * Scheduling notation:
- *	E	- either cluster
- *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
- *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
- * Try not to change the actual algorithm if possible for consistency.
- */
-#include <asm/export.h>
-        .set noreorder
-        .set noat
-
-	.align	4
-	.globl memchr
-	.ent memchr
-memchr:
-	.frame $30,0,$26,0
-	.prologue 0
-
-	# Hack -- if someone passes in (size_t)-1, hoping to just
-	# search til the end of the address space, we will overflow
-	# below when we find the address of the last byte.  Given
-	# that we will never have a 56-bit address space, cropping
-	# the length is the easiest way to avoid trouble.
-	zap	$18, 0x80, $5	# U : Bound length
-	beq	$18, $not_found	# U :
-        ldq_u   $1, 0($16)	# L : load first quadword Latency=3
-	and	$17, 0xff, $17	# E : L L U U : 00000000000000ch
-
-	insbl	$17, 1, $2	# U : 000000000000ch00
-	cmpult	$18, 9, $4	# E : small (< 1 quad) string?
-	or	$2, $17, $17	# E : 000000000000chch
-        lda     $3, -1($31)	# E : U L L U
-
-	sll	$17, 16, $2	# U : 00000000chch0000
-	addq	$16, $5, $5	# E : Max search address
-	or	$2, $17, $17	# E : 00000000chchchch
-	sll	$17, 32, $2	# U : U L L U : chchchch00000000
-
-	or	$2, $17, $17	# E : chchchchchchchch
-	extql	$1, $16, $7	# U : $7 is upper bits
-	beq	$4, $first_quad	# U :
-	ldq_u	$6, -1($5)	# L : L U U L : eight or less bytes to search Latency=3
-
-	extqh	$6, $16, $6	# U : 2 cycle stall for $6
-	mov	$16, $0		# E :
-	nop			# E :
-	or	$7, $6, $1	# E : L U L U $1 = quadword starting at $16
-
-	# Deal with the case where at most 8 bytes remain to be searched
-	# in $1.  E.g.:
-	#	$18 = 6
-	#	$1 = ????c6c5c4c3c2c1
-$last_quad:
-	negq	$18, $6		# E :
-        xor	$17, $1, $1	# E :
-	srl	$3, $6, $6	# U : $6 = mask of $18 bits set
-        cmpbge  $31, $1, $2	# E : L U L U
-
-	nop
-	nop
-	and	$2, $6, $2	# E :
-        beq     $2, $not_found	# U : U L U L
-
-$found_it:
-#ifdef CONFIG_ALPHA_EV67
-	/*
-	 * Since we are guaranteed to have set one of the bits, we don't
-	 * have to worry about coming back with a 0x40 out of cttz...
-	 */
-	cttz	$2, $3		# U0 :
-	addq	$0, $3, $0	# E : All done
-	nop			# E :
-	ret			# L0 : L U L U
-#else
-	/*
-	 * Slow and clunky.  It can probably be improved.
-	 * An exercise left for others.
-	 */
-        negq    $2, $3		# E :
-        and     $2, $3, $2	# E :
-        and     $2, 0x0f, $1	# E :
-        addq    $0, 4, $3	# E :
-
-        cmoveq  $1, $3, $0	# E : Latency 2, extra map cycle
-	nop			# E : keep with cmov
-        and     $2, 0x33, $1	# E :
-        addq    $0, 2, $3	# E : U L U L : 2 cycle stall on $0
-
-        cmoveq  $1, $3, $0	# E : Latency 2, extra map cycle
-	nop			# E : keep with cmov
-        and     $2, 0x55, $1	# E :
-        addq    $0, 1, $3	# E : U L U L : 2 cycle stall on $0
-
-        cmoveq  $1, $3, $0	# E : Latency 2, extra map cycle
-	nop
-	nop
-	ret			# L0 : L U L U
-#endif
-
-	# Deal with the case where $18 > 8 bytes remain to be
-	# searched.  $16 may not be aligned.
-	.align 4
-$first_quad:
-	andnot	$16, 0x7, $0	# E :
-        insqh   $3, $16, $2	# U : $2 = 0000ffffffffffff ($16<0:2> ff)
-        xor	$1, $17, $1	# E :
-	or	$1, $2, $1	# E : U L U L $1 = ====ffffffffffff
-
-        cmpbge  $31, $1, $2	# E :
-        bne     $2, $found_it	# U :
-	# At least one byte left to process.
-	ldq	$1, 8($0)	# L :
-	subq	$5, 1, $18	# E : U L U L
-
-	addq	$0, 8, $0	# E :
-	# Make $18 point to last quad to be accessed (the
-	# last quad may or may not be partial).
-	andnot	$18, 0x7, $18	# E :
-	cmpult	$0, $18, $2	# E :
-	beq	$2, $final	# U : U L U L
-
-	# At least two quads remain to be accessed.
-
-	subq	$18, $0, $4	# E : $4 <- nr quads to be processed
-	and	$4, 8, $4	# E : odd number of quads?
-	bne	$4, $odd_quad_count # U :
-	# At least three quads remain to be accessed
-	mov	$1, $4		# E : L U L U : move prefetched value to correct reg
-
-	.align	4
-$unrolled_loop:
-	ldq	$1, 8($0)	# L : prefetch $1
-	xor	$17, $4, $2	# E :
-	cmpbge	$31, $2, $2	# E :
-	bne	$2, $found_it	# U : U L U L
-
-	addq	$0, 8, $0	# E :
-	nop			# E :
-	nop			# E :
-	nop			# E :
-
-$odd_quad_count:
-	xor	$17, $1, $2	# E :
-	ldq	$4, 8($0)	# L : prefetch $4
-	cmpbge	$31, $2, $2	# E :
-	addq	$0, 8, $6	# E :
-
-	bne	$2, $found_it	# U :
-	cmpult	$6, $18, $6	# E :
-	addq	$0, 8, $0	# E :
-	nop			# E :
-
-	bne	$6, $unrolled_loop # U :
-	mov	$4, $1		# E : move prefetched value into $1
-	nop			# E :
-	nop			# E :
-
-$final:	subq	$5, $0, $18	# E : $18 <- number of bytes left to do
-	nop			# E :
-	nop			# E :
-	bne	$18, $last_quad	# U :
-
-$not_found:
-	mov	$31, $0		# E :
-	nop			# E :
-	nop			# E :
-	ret			# L0 :
-
-        .end memchr
-	EXPORT_SYMBOL(memchr)
diff --git a/arch/alpha/lib/ev6-memcpy.S b/arch/alpha/lib/ev6-memcpy.S
deleted file mode 100644
index ffbd056b6eb2905d72d01b6f5bc65d9ac4a06340..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/ev6-memcpy.S
+++ /dev/null
@@ -1,250 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/ev6-memcpy.S
- * 21264 version by Rick Gorton <rick.gorton@alpha-processor.com>
- *
- * Reasonably optimized memcpy() routine for the Alpha 21264
- *
- *	- memory accessed as aligned quadwords only
- *	- uses bcmpge to compare 8 bytes in parallel
- *
- * Much of the information about 21264 scheduling/coding comes from:
- *	Compiler Writer's Guide for the Alpha 21264
- *	abbreviated as 'CWG' in other comments here
- *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
- * Scheduling notation:
- *	E	- either cluster
- *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
- *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
- *
- * Temp usage notes:
- *	$1,$2,		- scratch
- */
-#include <asm/export.h>
-	.set noreorder
-	.set noat
-
-	.align	4
-	.globl memcpy
-	.ent memcpy
-memcpy:
-	.frame $30,0,$26,0
-	.prologue 0
-
-	mov	$16, $0			# E : copy dest to return
-	ble	$18, $nomoredata	# U : done with the copy?
-	xor	$16, $17, $1		# E : are source and dest alignments the same?
-	and	$1, 7, $1		# E : are they the same mod 8?
-
-	bne	$1, $misaligned		# U : Nope - gotta do this the slow way
-	/* source and dest are same mod 8 address */
-	and	$16, 7, $1		# E : Are both 0mod8?
-	beq	$1, $both_0mod8		# U : Yes
-	nop				# E :
-
-	/*
-	 * source and dest are same misalignment.  move a byte at a time
-	 * until a 0mod8 alignment for both is reached.
-	 * At least one byte more to move
-	 */
-
-$head_align:
-	ldbu	$1, 0($17)		# L : grab a byte
-	subq	$18, 1, $18		# E : count--
-	addq	$17, 1, $17		# E : src++
-	stb	$1, 0($16)		# L :
-	addq	$16, 1, $16		# E : dest++
-	and	$16, 7, $1		# E : Are we at 0mod8 yet?
-	ble	$18, $nomoredata	# U : done with the copy?
-	bne	$1, $head_align		# U :
-
-$both_0mod8:
-	cmple	$18, 127, $1		# E : Can we unroll the loop?
-	bne	$1, $no_unroll		# U :
-	and	$16, 63, $1		# E : get mod64 alignment
-	beq	$1, $do_unroll		# U : no single quads to fiddle
-
-$single_head_quad:
-	ldq	$1, 0($17)		# L : get 8 bytes
-	subq	$18, 8, $18		# E : count -= 8
-	addq	$17, 8, $17		# E : src += 8
-	nop				# E :
-
-	stq	$1, 0($16)		# L : store
-	addq	$16, 8, $16		# E : dest += 8
-	and	$16, 63, $1		# E : get mod64 alignment
-	bne	$1, $single_head_quad	# U : still not fully aligned
-
-$do_unroll:
-	addq	$16, 64, $7		# E : Initial (+1 trip) wh64 address
-	cmple	$18, 127, $1		# E : Can we go through the unrolled loop?
-	bne	$1, $tail_quads		# U : Nope
-	nop				# E : 
-
-$unroll_body:
-	wh64	($7)			# L1 : memory subsystem hint: 64 bytes at
-					# ($7) are about to be over-written
-	ldq	$6, 0($17)		# L0 : bytes 0..7
-	nop				# E :
-	nop				# E :
-
-	ldq	$4, 8($17)		# L : bytes 8..15
-	ldq	$5, 16($17)		# L : bytes 16..23
-	addq	$7, 64, $7		# E : Update next wh64 address
-	nop				# E :
-
-	ldq	$3, 24($17)		# L : bytes 24..31
-	addq	$16, 64, $1		# E : fallback value for wh64
-	nop				# E :
-	nop				# E :
-
-	addq	$17, 32, $17		# E : src += 32 bytes
-	stq	$6, 0($16)		# L : bytes 0..7
-	nop				# E :
-	nop				# E :
-
-	stq	$4, 8($16)		# L : bytes 8..15
-	stq	$5, 16($16)		# L : bytes 16..23
-	subq	$18, 192, $2		# E : At least two more trips to go?
-	nop				# E :
-
-	stq	$3, 24($16)		# L : bytes 24..31
-	addq	$16, 32, $16		# E : dest += 32 bytes
-	nop				# E :
-	nop				# E :
-
-	ldq	$6, 0($17)		# L : bytes 0..7
-	ldq	$4, 8($17)		# L : bytes 8..15
-	cmovlt	$2, $1, $7		# E : Latency 2, extra map slot - Use
-					# fallback wh64 address if < 2 more trips
-	nop				# E :
-
-	ldq	$5, 16($17)		# L : bytes 16..23
-	ldq	$3, 24($17)		# L : bytes 24..31
-	addq	$16, 32, $16		# E : dest += 32
-	subq	$18, 64, $18		# E : count -= 64
-
-	addq	$17, 32, $17		# E : src += 32
-	stq	$6, -32($16)		# L : bytes 0..7
-	stq	$4, -24($16)		# L : bytes 8..15
-	cmple	$18, 63, $1		# E : At least one more trip?
-
-	stq	$5, -16($16)		# L : bytes 16..23
-	stq	$3, -8($16)		# L : bytes 24..31
-	nop				# E :
-	beq	$1, $unroll_body
-
-$tail_quads:
-$no_unroll:
-	.align 4
-	subq	$18, 8, $18		# E : At least a quad left?
-	blt	$18, $less_than_8	# U : Nope
-	nop				# E :
-	nop				# E :
-
-$move_a_quad:
-	ldq	$1, 0($17)		# L : fetch 8
-	subq	$18, 8, $18		# E : count -= 8
-	addq	$17, 8, $17		# E : src += 8
-	nop				# E :
-
-	stq	$1, 0($16)		# L : store 8
-	addq	$16, 8, $16		# E : dest += 8
-	bge	$18, $move_a_quad	# U :
-	nop				# E :
-
-$less_than_8:
-	.align 4
-	addq	$18, 8, $18		# E : add back for trailing bytes
-	ble	$18, $nomoredata	# U : All-done
-	nop				# E :
-	nop				# E :
-
-	/* Trailing bytes */
-$tail_bytes:
-	subq	$18, 1, $18		# E : count--
-	ldbu	$1, 0($17)		# L : fetch a byte
-	addq	$17, 1, $17		# E : src++
-	nop				# E :
-
-	stb	$1, 0($16)		# L : store a byte
-	addq	$16, 1, $16		# E : dest++
-	bgt	$18, $tail_bytes	# U : more to be done?
-	nop				# E :
-
-	/* branching to exit takes 3 extra cycles, so replicate exit here */
-	ret	$31, ($26), 1		# L0 :
-	nop				# E :
-	nop				# E :
-	nop				# E :
-
-$misaligned:
-	mov	$0, $4			# E : dest temp
-	and	$0, 7, $1		# E : dest alignment mod8
-	beq	$1, $dest_0mod8		# U : life doesnt totally suck
-	nop
-
-$aligndest:
-	ble	$18, $nomoredata	# U :
-	ldbu	$1, 0($17)		# L : fetch a byte
-	subq	$18, 1, $18		# E : count--
-	addq	$17, 1, $17		# E : src++
-
-	stb	$1, 0($4)		# L : store it
-	addq	$4, 1, $4		# E : dest++
-	and	$4, 7, $1		# E : dest 0mod8 yet?
-	bne	$1, $aligndest		# U : go until we are aligned.
-
-	/* Source has unknown alignment, but dest is known to be 0mod8 */
-$dest_0mod8:
-	subq	$18, 8, $18		# E : At least a quad left?
-	blt	$18, $misalign_tail	# U : Nope
-	ldq_u	$3, 0($17)		# L : seed (rotating load) of 8 bytes
-	nop				# E :
-
-$mis_quad:
-	ldq_u	$16, 8($17)		# L : Fetch next 8
-	extql	$3, $17, $3		# U : masking
-	extqh	$16, $17, $1		# U : masking
-	bis	$3, $1, $1		# E : merged bytes to store
-
-	subq	$18, 8, $18		# E : count -= 8
-	addq	$17, 8, $17		# E : src += 8
-	stq	$1, 0($4)		# L : store 8 (aligned)
-	mov	$16, $3			# E : "rotate" source data
-
-	addq	$4, 8, $4		# E : dest += 8
-	bge	$18, $mis_quad		# U : More quads to move
-	nop
-	nop
-
-$misalign_tail:
-	addq	$18, 8, $18		# E : account for tail stuff
-	ble	$18, $nomoredata	# U :
-	nop
-	nop
-
-$misalign_byte:
-	ldbu	$1, 0($17)		# L : fetch 1
-	subq	$18, 1, $18		# E : count--
-	addq	$17, 1, $17		# E : src++
-	nop				# E :
-
-	stb	$1, 0($4)		# L : store
-	addq	$4, 1, $4		# E : dest++
-	bgt	$18, $misalign_byte	# U : more to go?
-	nop
-
-
-$nomoredata:
-	ret	$31, ($26), 1		# L0 :
-	nop				# E :
-	nop				# E :
-	nop				# E :
-
-	.end memcpy
-	EXPORT_SYMBOL(memcpy)
-
-/* For backwards module compatibility.  */
-__memcpy = memcpy
-.globl __memcpy
diff --git a/arch/alpha/lib/ev6-memset.S b/arch/alpha/lib/ev6-memset.S
deleted file mode 100644
index 1cfcfbbea6f068fcc69200bfd7a1d4177e1d6b75..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/ev6-memset.S
+++ /dev/null
@@ -1,605 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/ev6-memset.S
- *
- * This is an efficient (and relatively small) implementation of the C library
- * "memset()" function for the 21264 implementation of Alpha.
- *
- * 21264 version  contributed by Rick Gorton <rick.gorton@alpha-processor.com>
- *
- * Much of the information about 21264 scheduling/coding comes from:
- *	Compiler Writer's Guide for the Alpha 21264
- *	abbreviated as 'CWG' in other comments here
- *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
- * Scheduling notation:
- *	E	- either cluster
- *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
- *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
- * The algorithm for the leading and trailing quadwords remains the same,
- * however the loop has been unrolled to enable better memory throughput,
- * and the code has been replicated for each of the entry points: __memset
- * and __memset16 to permit better scheduling to eliminate the stalling
- * encountered during the mask replication.
- * A future enhancement might be to put in a byte store loop for really
- * small (say < 32 bytes) memset()s.  Whether or not that change would be
- * a win in the kernel would depend upon the contextual usage.
- * WARNING: Maintaining this is going to be more work than the above version,
- * as fixes will need to be made in multiple places.  The performance gain
- * is worth it.
- */
-#include <asm/export.h>
-	.set noat
-	.set noreorder
-.text
-	.globl memset
-	.globl __memset
-	.globl ___memset
-	.globl __memset16
-	.globl __constant_c_memset
-
-	.ent ___memset
-.align 5
-___memset:
-	.frame $30,0,$26,0
-	.prologue 0
-
-	/*
-	 * Serious stalling happens.  The only way to mitigate this is to
-	 * undertake a major re-write to interleave the constant materialization
-	 * with other parts of the fall-through code.  This is important, even
-	 * though it makes maintenance tougher.
-	 * Do this later.
-	 */
-	and $17,255,$1		# E : 00000000000000ch
-	insbl $17,1,$2		# U : 000000000000ch00
-	bis $16,$16,$0		# E : return value
-	ble $18,end_b		# U : zero length requested?
-
-	addq $18,$16,$6		# E : max address to write to
-	bis	$1,$2,$17	# E : 000000000000chch
-	insbl	$1,2,$3		# U : 0000000000ch0000
-	insbl	$1,3,$4		# U : 00000000ch000000
-
-	or	$3,$4,$3	# E : 00000000chch0000
-	inswl	$17,4,$5	# U : 0000chch00000000
-	xor	$16,$6,$1	# E : will complete write be within one quadword?
-	inswl	$17,6,$2	# U : chch000000000000
-
-	or	$17,$3,$17	# E : 00000000chchchch
-	or	$2,$5,$2	# E : chchchch00000000
-	bic	$1,7,$1		# E : fit within a single quadword?
-	and	$16,7,$3	# E : Target addr misalignment
-
-	or	$17,$2,$17	# E : chchchchchchchch
-	beq	$1,within_quad_b # U :
-	nop			# E :
-	beq	$3,aligned_b	# U : target is 0mod8
-
-	/*
-	 * Target address is misaligned, and won't fit within a quadword
-	 */
-	ldq_u $4,0($16)		# L : Fetch first partial
-	bis $16,$16,$5		# E : Save the address
-	insql $17,$16,$2	# U : Insert new bytes
-	subq $3,8,$3		# E : Invert (for addressing uses)
-
-	addq $18,$3,$18		# E : $18 is new count ($3 is negative)
-	mskql $4,$16,$4		# U : clear relevant parts of the quad
-	subq $16,$3,$16		# E : $16 is new aligned destination
-	bis $2,$4,$1		# E : Final bytes
-
-	nop
-	stq_u $1,0($5)		# L : Store result
-	nop
-	nop
-
-.align 4
-aligned_b:
-	/*
-	 * We are now guaranteed to be quad aligned, with at least
-	 * one partial quad to write.
-	 */
-
-	sra $18,3,$3		# U : Number of remaining quads to write
-	and $18,7,$18		# E : Number of trailing bytes to write
-	bis $16,$16,$5		# E : Save dest address
-	beq $3,no_quad_b	# U : tail stuff only
-
-	/*
-	 * it's worth the effort to unroll this and use wh64 if possible
-	 * Lifted a bunch of code from clear_user.S
-	 * At this point, entry values are:
-	 * $16	Current destination address
-	 * $5	A copy of $16
-	 * $6	The max quadword address to write to
-	 * $18	Number trailer bytes
-	 * $3	Number quads to write
-	 */
-
-	and	$16, 0x3f, $2	# E : Forward work (only useful for unrolled loop)
-	subq	$3, 16, $4	# E : Only try to unroll if > 128 bytes
-	subq	$2, 0x40, $1	# E : bias counter (aligning stuff 0mod64)
-	blt	$4, loop_b	# U :
-
-	/*
-	 * We know we've got at least 16 quads, minimum of one trip
-	 * through unrolled loop.  Do a quad at a time to get us 0mod64
-	 * aligned.
-	 */
-
-	nop			# E :
-	nop			# E :
-	nop			# E :
-	beq	$1, $bigalign_b	# U :
-
-$alignmod64_b:
-	stq	$17, 0($5)	# L :
-	subq	$3, 1, $3	# E : For consistency later
-	addq	$1, 8, $1	# E : Increment towards zero for alignment
-	addq	$5, 8, $4	# E : Initial wh64 address (filler instruction)
-
-	nop
-	nop
-	addq	$5, 8, $5	# E : Inc address
-	blt	$1, $alignmod64_b # U :
-
-$bigalign_b:
-	/*
-	 * $3 - number quads left to go
-	 * $5 - target address (aligned 0mod64)
-	 * $17 - mask of stuff to store
-	 * Scratch registers available: $7, $2, $4, $1
-	 * we know that we'll be taking a minimum of one trip through
- 	 * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle
-	 * Assumes the wh64 needs to be for 2 trips through the loop in the future
-	 * The wh64 is issued on for the starting destination address for trip +2
-	 * through the loop, and if there are less than two trips left, the target
-	 * address will be for the current trip.
-	 */
-
-$do_wh64_b:
-	wh64	($4)		# L1 : memory subsystem write hint
-	subq	$3, 24, $2	# E : For determining future wh64 addresses
-	stq	$17, 0($5)	# L :
-	nop			# E :
-
-	addq	$5, 128, $4	# E : speculative target of next wh64
-	stq	$17, 8($5)	# L :
-	stq	$17, 16($5)	# L :
-	addq	$5, 64, $7	# E : Fallback address for wh64 (== next trip addr)
-
-	stq	$17, 24($5)	# L :
-	stq	$17, 32($5)	# L :
-	cmovlt	$2, $7, $4	# E : Latency 2, extra mapping cycle
-	nop
-
-	stq	$17, 40($5)	# L :
-	stq	$17, 48($5)	# L :
-	subq	$3, 16, $2	# E : Repeat the loop at least once more?
-	nop
-
-	stq	$17, 56($5)	# L :
-	addq	$5, 64, $5	# E :
-	subq	$3, 8, $3	# E :
-	bge	$2, $do_wh64_b	# U :
-
-	nop
-	nop
-	nop
-	beq	$3, no_quad_b	# U : Might have finished already
-
-.align 4
-	/*
-	 * Simple loop for trailing quadwords, or for small amounts
-	 * of data (where we can't use an unrolled loop and wh64)
-	 */
-loop_b:
-	stq $17,0($5)		# L :
-	subq $3,1,$3		# E : Decrement number quads left
-	addq $5,8,$5		# E : Inc address
-	bne $3,loop_b		# U : more?
-
-no_quad_b:
-	/*
-	 * Write 0..7 trailing bytes.
-	 */
-	nop			# E :
-	beq $18,end_b		# U : All done?
-	ldq $7,0($5)		# L :
-	mskqh $7,$6,$2		# U : Mask final quad
-
-	insqh $17,$6,$4		# U : New bits
-	bis $2,$4,$1		# E : Put it all together
-	stq $1,0($5)		# L : And back to memory
-	ret $31,($26),1		# L0 :
-
-within_quad_b:
-	ldq_u $1,0($16)		# L :
-	insql $17,$16,$2	# U : New bits
-	mskql $1,$16,$4		# U : Clear old
-	bis $2,$4,$2		# E : New result
-
-	mskql $2,$6,$4		# U :
-	mskqh $1,$6,$2		# U :
-	bis $2,$4,$1		# E :
-	stq_u $1,0($16)		# L :
-
-end_b:
-	nop
-	nop
-	nop
-	ret $31,($26),1		# L0 :
-	.end ___memset
-	EXPORT_SYMBOL(___memset)
-
-	/*
-	 * This is the original body of code, prior to replication and
-	 * rescheduling.  Leave it here, as there may be calls to this
-	 * entry point.
-	 */
-.align 4
-	.ent __constant_c_memset
-__constant_c_memset:
-	.frame $30,0,$26,0
-	.prologue 0
-
-	addq $18,$16,$6		# E : max address to write to
-	bis $16,$16,$0		# E : return value
-	xor $16,$6,$1		# E : will complete write be within one quadword?
-	ble $18,end		# U : zero length requested?
-
-	bic $1,7,$1		# E : fit within a single quadword
-	beq $1,within_one_quad	# U :
-	and $16,7,$3		# E : Target addr misalignment
-	beq $3,aligned		# U : target is 0mod8
-
-	/*
-	 * Target address is misaligned, and won't fit within a quadword
-	 */
-	ldq_u $4,0($16)		# L : Fetch first partial
-	bis $16,$16,$5		# E : Save the address
-	insql $17,$16,$2	# U : Insert new bytes
-	subq $3,8,$3		# E : Invert (for addressing uses)
-
-	addq $18,$3,$18		# E : $18 is new count ($3 is negative)
-	mskql $4,$16,$4		# U : clear relevant parts of the quad
-	subq $16,$3,$16		# E : $16 is new aligned destination
-	bis $2,$4,$1		# E : Final bytes
-
-	nop
-	stq_u $1,0($5)		# L : Store result
-	nop
-	nop
-
-.align 4
-aligned:
-	/*
-	 * We are now guaranteed to be quad aligned, with at least
-	 * one partial quad to write.
-	 */
-
-	sra $18,3,$3		# U : Number of remaining quads to write
-	and $18,7,$18		# E : Number of trailing bytes to write
-	bis $16,$16,$5		# E : Save dest address
-	beq $3,no_quad		# U : tail stuff only
-
-	/*
-	 * it's worth the effort to unroll this and use wh64 if possible
-	 * Lifted a bunch of code from clear_user.S
-	 * At this point, entry values are:
-	 * $16	Current destination address
-	 * $5	A copy of $16
-	 * $6	The max quadword address to write to
-	 * $18	Number trailer bytes
-	 * $3	Number quads to write
-	 */
-
-	and	$16, 0x3f, $2	# E : Forward work (only useful for unrolled loop)
-	subq	$3, 16, $4	# E : Only try to unroll if > 128 bytes
-	subq	$2, 0x40, $1	# E : bias counter (aligning stuff 0mod64)
-	blt	$4, loop	# U :
-
-	/*
-	 * We know we've got at least 16 quads, minimum of one trip
-	 * through unrolled loop.  Do a quad at a time to get us 0mod64
-	 * aligned.
-	 */
-
-	nop			# E :
-	nop			# E :
-	nop			# E :
-	beq	$1, $bigalign	# U :
-
-$alignmod64:
-	stq	$17, 0($5)	# L :
-	subq	$3, 1, $3	# E : For consistency later
-	addq	$1, 8, $1	# E : Increment towards zero for alignment
-	addq	$5, 8, $4	# E : Initial wh64 address (filler instruction)
-
-	nop
-	nop
-	addq	$5, 8, $5	# E : Inc address
-	blt	$1, $alignmod64	# U :
-
-$bigalign:
-	/*
-	 * $3 - number quads left to go
-	 * $5 - target address (aligned 0mod64)
-	 * $17 - mask of stuff to store
-	 * Scratch registers available: $7, $2, $4, $1
-	 * we know that we'll be taking a minimum of one trip through
- 	 * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle
-	 * Assumes the wh64 needs to be for 2 trips through the loop in the future
-	 * The wh64 is issued on for the starting destination address for trip +2
-	 * through the loop, and if there are less than two trips left, the target
-	 * address will be for the current trip.
-	 */
-
-$do_wh64:
-	wh64	($4)		# L1 : memory subsystem write hint
-	subq	$3, 24, $2	# E : For determining future wh64 addresses
-	stq	$17, 0($5)	# L :
-	nop			# E :
-
-	addq	$5, 128, $4	# E : speculative target of next wh64
-	stq	$17, 8($5)	# L :
-	stq	$17, 16($5)	# L :
-	addq	$5, 64, $7	# E : Fallback address for wh64 (== next trip addr)
-
-	stq	$17, 24($5)	# L :
-	stq	$17, 32($5)	# L :
-	cmovlt	$2, $7, $4	# E : Latency 2, extra mapping cycle
-	nop
-
-	stq	$17, 40($5)	# L :
-	stq	$17, 48($5)	# L :
-	subq	$3, 16, $2	# E : Repeat the loop at least once more?
-	nop
-
-	stq	$17, 56($5)	# L :
-	addq	$5, 64, $5	# E :
-	subq	$3, 8, $3	# E :
-	bge	$2, $do_wh64	# U :
-
-	nop
-	nop
-	nop
-	beq	$3, no_quad	# U : Might have finished already
-
-.align 4
-	/*
-	 * Simple loop for trailing quadwords, or for small amounts
-	 * of data (where we can't use an unrolled loop and wh64)
-	 */
-loop:
-	stq $17,0($5)		# L :
-	subq $3,1,$3		# E : Decrement number quads left
-	addq $5,8,$5		# E : Inc address
-	bne $3,loop		# U : more?
-
-no_quad:
-	/*
-	 * Write 0..7 trailing bytes.
-	 */
-	nop			# E :
-	beq $18,end		# U : All done?
-	ldq $7,0($5)		# L :
-	mskqh $7,$6,$2		# U : Mask final quad
-
-	insqh $17,$6,$4		# U : New bits
-	bis $2,$4,$1		# E : Put it all together
-	stq $1,0($5)		# L : And back to memory
-	ret $31,($26),1		# L0 :
-
-within_one_quad:
-	ldq_u $1,0($16)		# L :
-	insql $17,$16,$2	# U : New bits
-	mskql $1,$16,$4		# U : Clear old
-	bis $2,$4,$2		# E : New result
-
-	mskql $2,$6,$4		# U :
-	mskqh $1,$6,$2		# U :
-	bis $2,$4,$1		# E :
-	stq_u $1,0($16)		# L :
-
-end:
-	nop
-	nop
-	nop
-	ret $31,($26),1		# L0 :
-	.end __constant_c_memset
-	EXPORT_SYMBOL(__constant_c_memset)
-
-	/*
-	 * This is a replicant of the __constant_c_memset code, rescheduled
-	 * to mask stalls.  Note that entry point names also had to change
-	 */
-	.align 5
-	.ent __memset16
-
-__memset16:
-	.frame $30,0,$26,0
-	.prologue 0
-
-	inswl $17,0,$5		# U : 000000000000c1c2
-	inswl $17,2,$2		# U : 00000000c1c20000
-	bis $16,$16,$0		# E : return value
-	addq	$18,$16,$6	# E : max address to write to
-
-	ble $18, end_w		# U : zero length requested?
-	inswl	$17,4,$3	# U : 0000c1c200000000
-	inswl	$17,6,$4	# U : c1c2000000000000
-	xor	$16,$6,$1	# E : will complete write be within one quadword?
-
-	or	$2,$5,$2	# E : 00000000c1c2c1c2
-	or	$3,$4,$17	# E : c1c2c1c200000000
-	bic	$1,7,$1		# E : fit within a single quadword
-	and	$16,7,$3	# E : Target addr misalignment
-
-	or	$17,$2,$17	# E : c1c2c1c2c1c2c1c2
-	beq $1,within_quad_w	# U :
-	nop
-	beq $3,aligned_w	# U : target is 0mod8
-
-	/*
-	 * Target address is misaligned, and won't fit within a quadword
-	 */
-	ldq_u $4,0($16)		# L : Fetch first partial
-	bis $16,$16,$5		# E : Save the address
-	insql $17,$16,$2	# U : Insert new bytes
-	subq $3,8,$3		# E : Invert (for addressing uses)
-
-	addq $18,$3,$18		# E : $18 is new count ($3 is negative)
-	mskql $4,$16,$4		# U : clear relevant parts of the quad
-	subq $16,$3,$16		# E : $16 is new aligned destination
-	bis $2,$4,$1		# E : Final bytes
-
-	nop
-	stq_u $1,0($5)		# L : Store result
-	nop
-	nop
-
-.align 4
-aligned_w:
-	/*
-	 * We are now guaranteed to be quad aligned, with at least
-	 * one partial quad to write.
-	 */
-
-	sra $18,3,$3		# U : Number of remaining quads to write
-	and $18,7,$18		# E : Number of trailing bytes to write
-	bis $16,$16,$5		# E : Save dest address
-	beq $3,no_quad_w	# U : tail stuff only
-
-	/*
-	 * it's worth the effort to unroll this and use wh64 if possible
-	 * Lifted a bunch of code from clear_user.S
-	 * At this point, entry values are:
-	 * $16	Current destination address
-	 * $5	A copy of $16
-	 * $6	The max quadword address to write to
-	 * $18	Number trailer bytes
-	 * $3	Number quads to write
-	 */
-
-	and	$16, 0x3f, $2	# E : Forward work (only useful for unrolled loop)
-	subq	$3, 16, $4	# E : Only try to unroll if > 128 bytes
-	subq	$2, 0x40, $1	# E : bias counter (aligning stuff 0mod64)
-	blt	$4, loop_w	# U :
-
-	/*
-	 * We know we've got at least 16 quads, minimum of one trip
-	 * through unrolled loop.  Do a quad at a time to get us 0mod64
-	 * aligned.
-	 */
-
-	nop			# E :
-	nop			# E :
-	nop			# E :
-	beq	$1, $bigalign_w	# U :
-
-$alignmod64_w:
-	stq	$17, 0($5)	# L :
-	subq	$3, 1, $3	# E : For consistency later
-	addq	$1, 8, $1	# E : Increment towards zero for alignment
-	addq	$5, 8, $4	# E : Initial wh64 address (filler instruction)
-
-	nop
-	nop
-	addq	$5, 8, $5	# E : Inc address
-	blt	$1, $alignmod64_w	# U :
-
-$bigalign_w:
-	/*
-	 * $3 - number quads left to go
-	 * $5 - target address (aligned 0mod64)
-	 * $17 - mask of stuff to store
-	 * Scratch registers available: $7, $2, $4, $1
-	 * we know that we'll be taking a minimum of one trip through
- 	 * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle
-	 * Assumes the wh64 needs to be for 2 trips through the loop in the future
-	 * The wh64 is issued on for the starting destination address for trip +2
-	 * through the loop, and if there are less than two trips left, the target
-	 * address will be for the current trip.
-	 */
-
-$do_wh64_w:
-	wh64	($4)		# L1 : memory subsystem write hint
-	subq	$3, 24, $2	# E : For determining future wh64 addresses
-	stq	$17, 0($5)	# L :
-	nop			# E :
-
-	addq	$5, 128, $4	# E : speculative target of next wh64
-	stq	$17, 8($5)	# L :
-	stq	$17, 16($5)	# L :
-	addq	$5, 64, $7	# E : Fallback address for wh64 (== next trip addr)
-
-	stq	$17, 24($5)	# L :
-	stq	$17, 32($5)	# L :
-	cmovlt	$2, $7, $4	# E : Latency 2, extra mapping cycle
-	nop
-
-	stq	$17, 40($5)	# L :
-	stq	$17, 48($5)	# L :
-	subq	$3, 16, $2	# E : Repeat the loop at least once more?
-	nop
-
-	stq	$17, 56($5)	# L :
-	addq	$5, 64, $5	# E :
-	subq	$3, 8, $3	# E :
-	bge	$2, $do_wh64_w	# U :
-
-	nop
-	nop
-	nop
-	beq	$3, no_quad_w	# U : Might have finished already
-
-.align 4
-	/*
-	 * Simple loop for trailing quadwords, or for small amounts
-	 * of data (where we can't use an unrolled loop and wh64)
-	 */
-loop_w:
-	stq $17,0($5)		# L :
-	subq $3,1,$3		# E : Decrement number quads left
-	addq $5,8,$5		# E : Inc address
-	bne $3,loop_w		# U : more?
-
-no_quad_w:
-	/*
-	 * Write 0..7 trailing bytes.
-	 */
-	nop			# E :
-	beq $18,end_w		# U : All done?
-	ldq $7,0($5)		# L :
-	mskqh $7,$6,$2		# U : Mask final quad
-
-	insqh $17,$6,$4		# U : New bits
-	bis $2,$4,$1		# E : Put it all together
-	stq $1,0($5)		# L : And back to memory
-	ret $31,($26),1		# L0 :
-
-within_quad_w:
-	ldq_u $1,0($16)		# L :
-	insql $17,$16,$2	# U : New bits
-	mskql $1,$16,$4		# U : Clear old
-	bis $2,$4,$2		# E : New result
-
-	mskql $2,$6,$4		# U :
-	mskqh $1,$6,$2		# U :
-	bis $2,$4,$1		# E :
-	stq_u $1,0($16)		# L :
-
-end_w:
-	nop
-	nop
-	nop
-	ret $31,($26),1		# L0 :
-
-	.end __memset16
-	EXPORT_SYMBOL(__memset16)
-
-memset = ___memset
-__memset = ___memset
-	EXPORT_SYMBOL(memset)
-	EXPORT_SYMBOL(__memset)
diff --git a/arch/alpha/lib/ev6-stxcpy.S b/arch/alpha/lib/ev6-stxcpy.S
deleted file mode 100644
index 65f5f7310d802d8f98cf91c61667a82fa367a9ff..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/ev6-stxcpy.S
+++ /dev/null
@@ -1,322 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/ev6-stxcpy.S
- * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
- *
- * Copy a null-terminated string from SRC to DST.
- *
- * This is an internal routine used by strcpy, stpcpy, and strcat.
- * As such, it uses special linkage conventions to make implementation
- * of these public functions more efficient.
- *
- * On input:
- *	t9 = return address
- *	a0 = DST
- *	a1 = SRC
- *
- * On output:
- *	t12 = bitmask (with one bit set) indicating the last byte written
- *	a0  = unaligned address of the last *word* written
- *
- * Furthermore, v0, a3-a5, t11, and t12 are untouched.
- *
- * Much of the information about 21264 scheduling/coding comes from:
- *	Compiler Writer's Guide for the Alpha 21264
- *	abbreviated as 'CWG' in other comments here
- *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
- * Scheduling notation:
- *	E	- either cluster
- *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
- *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
- * Try not to change the actual algorithm if possible for consistency.
- */
-
-#include <asm/regdef.h>
-
-	.set noat
-	.set noreorder
-
-	.text
-
-/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
-   doesn't like putting the entry point for a procedure somewhere in the
-   middle of the procedure descriptor.  Work around this by putting the
-   aligned copy in its own procedure descriptor */
-
-
-	.ent stxcpy_aligned
-	.align 4
-stxcpy_aligned:
-	.frame sp, 0, t9
-	.prologue 0
-
-	/* On entry to this basic block:
-	   t0 == the first destination word for masking back in
-	   t1 == the first source word.  */
-
-	/* Create the 1st output word and detect 0's in the 1st input word.  */
-	lda	t2, -1		# E : build a mask against false zero
-	mskqh	t2, a1, t2	# U :   detection in the src word (stall)
-	mskqh	t1, a1, t3	# U :
-	ornot	t1, t2, t2	# E : (stall)
-
-	mskql	t0, a1, t0	# U : assemble the first output word
-	cmpbge	zero, t2, t8	# E : bits set iff null found
-	or	t0, t3, t1	# E : (stall)
-	bne	t8, $a_eos	# U : (stall)
-
-	/* On entry to this basic block:
-	   t0 == the first destination word for masking back in
-	   t1 == a source word not containing a null.  */
-	/* Nops here to separate store quads from load quads */
-
-$a_loop:
-	stq_u	t1, 0(a0)	# L :
-	addq	a0, 8, a0	# E :
-	nop
-	nop
-
-	ldq_u	t1, 0(a1)	# L : Latency=3
-	addq	a1, 8, a1	# E :
-	cmpbge	zero, t1, t8	# E : (3 cycle stall)
-	beq	t8, $a_loop	# U : (stall for t8)
-
-	/* Take care of the final (partial) word store.
-	   On entry to this basic block we have:
-	   t1 == the source word containing the null
-	   t8 == the cmpbge mask that found it.  */
-$a_eos:
-	negq	t8, t6		# E : find low bit set
-	and	t8, t6, t12	# E : (stall)
-	/* For the sake of the cache, don't read a destination word
-	   if we're not going to need it.  */
-	and	t12, 0x80, t6	# E : (stall)
-	bne	t6, 1f		# U : (stall)
-
-	/* We're doing a partial word store and so need to combine
-	   our source and original destination words.  */
-	ldq_u	t0, 0(a0)	# L : Latency=3
-	subq	t12, 1, t6	# E :
-	zapnot	t1, t6, t1	# U : clear src bytes >= null (stall)
-	or	t12, t6, t8	# E : (stall)
-
-	zap	t0, t8, t0	# E : clear dst bytes <= null
-	or	t0, t1, t1	# E : (stall)
-	nop
-	nop
-
-1:	stq_u	t1, 0(a0)	# L :
-	ret	(t9)		# L0 : Latency=3
-	nop
-	nop
-
-	.end stxcpy_aligned
-
-	.align 4
-	.ent __stxcpy
-	.globl __stxcpy
-__stxcpy:
-	.frame sp, 0, t9
-	.prologue 0
-
-	/* Are source and destination co-aligned?  */
-	xor	a0, a1, t0	# E :
-	unop			# E :
-	and	t0, 7, t0	# E : (stall)
-	bne	t0, $unaligned	# U : (stall)
-
-	/* We are co-aligned; take care of a partial first word.  */
-	ldq_u	t1, 0(a1)		# L : load first src word
-	and	a0, 7, t0		# E : take care not to load a word ...
-	addq	a1, 8, a1		# E :
-	beq	t0, stxcpy_aligned	# U : ... if we wont need it (stall)
-
-	ldq_u	t0, 0(a0)	# L :
-	br	stxcpy_aligned	# L0 : Latency=3
-	nop
-	nop
-
-
-/* The source and destination are not co-aligned.  Align the destination
-   and cope.  We have to be very careful about not reading too much and
-   causing a SEGV.  */
-
-	.align 4
-$u_head:
-	/* We know just enough now to be able to assemble the first
-	   full source word.  We can still find a zero at the end of it
-	   that prevents us from outputting the whole thing.
-
-	   On entry to this basic block:
-	   t0 == the first dest word, for masking back in, if needed else 0
-	   t1 == the low bits of the first source word
-	   t6 == bytemask that is -1 in dest word bytes */
-
-	ldq_u	t2, 8(a1)	# L :
-	addq	a1, 8, a1	# E :
-	extql	t1, a1, t1	# U : (stall on a1)
-	extqh	t2, a1, t4	# U : (stall on a1)
-
-	mskql	t0, a0, t0	# U :
-	or	t1, t4, t1	# E :
-	mskqh	t1, a0, t1	# U : (stall on t1)
-	or	t0, t1, t1	# E : (stall on t1)
-
-	or	t1, t6, t6	# E :
-	cmpbge	zero, t6, t8	# E : (stall)
-	lda	t6, -1		# E : for masking just below
-	bne	t8, $u_final	# U : (stall)
-
-	mskql	t6, a1, t6		# U : mask out the bits we have
-	or	t6, t2, t2		# E :   already extracted before (stall)
-	cmpbge	zero, t2, t8		# E :   testing eos (stall)
-	bne	t8, $u_late_head_exit	# U : (stall)
-
-	/* Finally, we've got all the stupid leading edge cases taken care
-	   of and we can set up to enter the main loop.  */
-
-	stq_u	t1, 0(a0)	# L : store first output word
-	addq	a0, 8, a0	# E :
-	extql	t2, a1, t0	# U : position ho-bits of lo word
-	ldq_u	t2, 8(a1)	# U : read next high-order source word
-
-	addq	a1, 8, a1	# E :
-	cmpbge	zero, t2, t8	# E : (stall for t2)
-	nop			# E :
-	bne	t8, $u_eos	# U : (stall)
-
-	/* Unaligned copy main loop.  In order to avoid reading too much,
-	   the loop is structured to detect zeros in aligned source words.
-	   This has, unfortunately, effectively pulled half of a loop
-	   iteration out into the head and half into the tail, but it does
-	   prevent nastiness from accumulating in the very thing we want
-	   to run as fast as possible.
-
-	   On entry to this basic block:
-	   t0 == the shifted high-order bits from the previous source word
-	   t2 == the unshifted current source word
-
-	   We further know that t2 does not contain a null terminator.  */
-
-	.align 3
-$u_loop:
-	extqh	t2, a1, t1	# U : extract high bits for current word
-	addq	a1, 8, a1	# E : (stall)
-	extql	t2, a1, t3	# U : extract low bits for next time (stall)
-	addq	a0, 8, a0	# E :
-
-	or	t0, t1, t1	# E : current dst word now complete
-	ldq_u	t2, 0(a1)	# L : Latency=3 load high word for next time
-	stq_u	t1, -8(a0)	# L : save the current word (stall)
-	mov	t3, t0		# E :
-
-	cmpbge	zero, t2, t8	# E : test new word for eos
-	beq	t8, $u_loop	# U : (stall)
-	nop
-	nop
-
-	/* We've found a zero somewhere in the source word we just read.
-	   If it resides in the lower half, we have one (probably partial)
-	   word to write out, and if it resides in the upper half, we
-	   have one full and one partial word left to write out.
-
-	   On entry to this basic block:
-	   t0 == the shifted high-order bits from the previous source word
-	   t2 == the unshifted current source word.  */
-$u_eos:
-	extqh	t2, a1, t1	# U :
-	or	t0, t1, t1	# E : first (partial) source word complete (stall)
-	cmpbge	zero, t1, t8	# E : is the null in this first bit? (stall)
-	bne	t8, $u_final	# U : (stall)
-
-$u_late_head_exit:
-	stq_u	t1, 0(a0)	# L : the null was in the high-order bits
-	addq	a0, 8, a0	# E :
-	extql	t2, a1, t1	# U :
-	cmpbge	zero, t1, t8	# E : (stall)
-
-	/* Take care of a final (probably partial) result word.
-	   On entry to this basic block:
-	   t1 == assembled source word
-	   t8 == cmpbge mask that found the null.  */
-$u_final:
-	negq	t8, t6		# E : isolate low bit set
-	and	t6, t8, t12	# E : (stall)
-	and	t12, 0x80, t6	# E : avoid dest word load if we can (stall)
-	bne	t6, 1f		# U : (stall)
-
-	ldq_u	t0, 0(a0)	# E :
-	subq	t12, 1, t6	# E :
-	or	t6, t12, t8	# E : (stall)
-	zapnot	t1, t6, t1	# U : kill source bytes >= null (stall)
-
-	zap	t0, t8, t0	# U : kill dest bytes <= null (2 cycle data stall)
-	or	t0, t1, t1	# E : (stall)
-	nop
-	nop
-
-1:	stq_u	t1, 0(a0)	# L :
-	ret	(t9)		# L0 : Latency=3
-	nop
-	nop
-
-	/* Unaligned copy entry point.  */
-	.align 4
-$unaligned:
-
-	ldq_u	t1, 0(a1)	# L : load first source word
-	and	a0, 7, t4	# E : find dest misalignment
-	and	a1, 7, t5	# E : find src misalignment
-	/* Conditionally load the first destination word and a bytemask
-	   with 0xff indicating that the destination byte is sacrosanct.  */
-	mov	zero, t0	# E :
-
-	mov	zero, t6	# E :
-	beq	t4, 1f		# U :
-	ldq_u	t0, 0(a0)	# L :
-	lda	t6, -1		# E :
-
-	mskql	t6, a0, t6	# U :
-	nop
-	nop
-	nop
-1:
-	subq	a1, t4, a1	# E : sub dest misalignment from src addr
-	/* If source misalignment is larger than dest misalignment, we need
-	   extra startup checks to avoid SEGV.  */
-	cmplt	t4, t5, t12	# E :
-	beq	t12, $u_head	# U :
-	lda	t2, -1		# E : mask out leading garbage in source
-
-	mskqh	t2, t5, t2	# U :
-	ornot	t1, t2, t3	# E : (stall)
-	cmpbge	zero, t3, t8	# E : is there a zero? (stall)
-	beq	t8, $u_head	# U : (stall)
-
-	/* At this point we've found a zero in the first partial word of
-	   the source.  We need to isolate the valid source data and mask
-	   it into the original destination data.  (Incidentally, we know
-	   that we'll need at least one byte of that original dest word.) */
-
-	ldq_u	t0, 0(a0)	# L :
-	negq	t8, t6		# E : build bitmask of bytes <= zero
-	and	t6, t8, t12	# E : (stall)
-	and	a1, 7, t5	# E :
-
-	subq	t12, 1, t6	# E :
-	or	t6, t12, t8	# E : (stall)
-	srl	t12, t5, t12	# U : adjust final null return value
-	zapnot	t2, t8, t2	# U : prepare source word; mirror changes (stall)
-
-	and	t1, t2, t1	# E : to source validity mask
-	extql	t2, a1, t2	# U :
-	extql	t1, a1, t1	# U : (stall)
-	andnot	t0, t2, t0	# .. e1 : zero place for source to reside (stall)
-
-	or	t0, t1, t1	# e1    : and put it there
-	stq_u	t1, 0(a0)	# .. e0 : (stall)
-	ret	(t9)		# e1    :
-	nop
-
-	.end __stxcpy
-
diff --git a/arch/alpha/lib/ev6-stxncpy.S b/arch/alpha/lib/ev6-stxncpy.S
deleted file mode 100644
index 76da205282eec7aa1115568beb1f16f184e4a08d..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/ev6-stxncpy.S
+++ /dev/null
@@ -1,398 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/ev6-stxncpy.S
- * 21264 version contributed by Rick Gorton <rick.gorton@api-networks.com>
- *
- * Copy no more than COUNT bytes of the null-terminated string from
- * SRC to DST.
- *
- * This is an internal routine used by strncpy, stpncpy, and strncat.
- * As such, it uses special linkage conventions to make implementation
- * of these public functions more efficient.
- *
- * On input:
- *	t9 = return address
- *	a0 = DST
- *	a1 = SRC
- *	a2 = COUNT
- *
- * Furthermore, COUNT may not be zero.
- *
- * On output:
- *	t0  = last word written
- *	t10 = bitmask (with one bit set) indicating the byte position of
- *	      the end of the range specified by COUNT
- *	t12 = bitmask (with one bit set) indicating the last byte written
- *	a0  = unaligned address of the last *word* written
- *	a2  = the number of full words left in COUNT
- *
- * Furthermore, v0, a3-a5, t11, and $at are untouched.
- *
- * Much of the information about 21264 scheduling/coding comes from:
- *	Compiler Writer's Guide for the Alpha 21264
- *	abbreviated as 'CWG' in other comments here
- *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
- * Scheduling notation:
- *	E	- either cluster
- *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
- *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
- * Try not to change the actual algorithm if possible for consistency.
- */
-
-#include <asm/regdef.h>
-
-	.set noat
-	.set noreorder
-
-	.text
-
-/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
-   doesn't like putting the entry point for a procedure somewhere in the
-   middle of the procedure descriptor.  Work around this by putting the
-   aligned copy in its own procedure descriptor */
-
-
-	.ent stxncpy_aligned
-	.align 4
-stxncpy_aligned:
-	.frame sp, 0, t9, 0
-	.prologue 0
-
-	/* On entry to this basic block:
-	   t0 == the first destination word for masking back in
-	   t1 == the first source word.  */
-
-	/* Create the 1st output word and detect 0's in the 1st input word.  */
-	lda	t2, -1		# E : build a mask against false zero
-	mskqh	t2, a1, t2	# U :   detection in the src word (stall)
-	mskqh	t1, a1, t3	# U :
-	ornot	t1, t2, t2	# E : (stall)
-
-	mskql	t0, a1, t0	# U : assemble the first output word
-	cmpbge	zero, t2, t8	# E : bits set iff null found
-	or	t0, t3, t0	# E : (stall)
-	beq	a2, $a_eoc	# U :
-
-	bne	t8, $a_eos	# U :
-	nop
-	nop
-	nop
-
-	/* On entry to this basic block:
-	   t0 == a source word not containing a null.  */
-
-	/*
-	 * nops here to:
-	 *	separate store quads from load quads
-	 *	limit of 1 bcond/quad to permit training
-	 */
-$a_loop:
-	stq_u	t0, 0(a0)	# L :
-	addq	a0, 8, a0	# E :
-	subq	a2, 1, a2	# E :
-	nop
-
-	ldq_u	t0, 0(a1)	# L :
-	addq	a1, 8, a1	# E :
-	cmpbge	zero, t0, t8	# E :
-	beq	a2, $a_eoc      # U :
-
-	beq	t8, $a_loop	# U :
-	nop
-	nop
-	nop
-
-	/* Take care of the final (partial) word store.  At this point
-	   the end-of-count bit is set in t8 iff it applies.
-
-	   On entry to this basic block we have:
-	   t0 == the source word containing the null
-	   t8 == the cmpbge mask that found it.  */
-
-$a_eos:
-	negq	t8, t12		# E : find low bit set
-	and	t8, t12, t12	# E : (stall)
-	/* For the sake of the cache, don't read a destination word
-	   if we're not going to need it.  */
-	and	t12, 0x80, t6	# E : (stall)
-	bne	t6, 1f		# U : (stall)
-
-	/* We're doing a partial word store and so need to combine
-	   our source and original destination words.  */
-	ldq_u	t1, 0(a0)	# L :
-	subq	t12, 1, t6	# E :
-	or	t12, t6, t8	# E : (stall)
-	zapnot	t0, t8, t0	# U : clear src bytes > null (stall)
-
-	zap	t1, t8, t1	# .. e1 : clear dst bytes <= null
-	or	t0, t1, t0	# e1    : (stall)
-	nop
-	nop
-
-1:	stq_u	t0, 0(a0)	# L :
-	ret	(t9)		# L0 : Latency=3
-	nop
-	nop
-
-	/* Add the end-of-count bit to the eos detection bitmask.  */
-$a_eoc:
-	or	t10, t8, t8	# E :
-	br	$a_eos		# L0 : Latency=3
-	nop
-	nop
-
-	.end stxncpy_aligned
-
-	.align 4
-	.ent __stxncpy
-	.globl __stxncpy
-__stxncpy:
-	.frame sp, 0, t9, 0
-	.prologue 0
-
-	/* Are source and destination co-aligned?  */
-	xor	a0, a1, t1	# E :
-	and	a0, 7, t0	# E : find dest misalignment
-	and	t1, 7, t1	# E : (stall)
-	addq	a2, t0, a2	# E : bias count by dest misalignment (stall)
-
-	subq	a2, 1, a2	# E :
-	and	a2, 7, t2	# E : (stall)
-	srl	a2, 3, a2	# U : a2 = loop counter = (count - 1)/8 (stall)
-	addq	zero, 1, t10	# E :
-
-	sll	t10, t2, t10	# U : t10 = bitmask of last count byte
-	bne	t1, $unaligned	# U :
-	/* We are co-aligned; take care of a partial first word.  */
-	ldq_u	t1, 0(a1)	# L : load first src word
-	addq	a1, 8, a1	# E :
-
-	beq	t0, stxncpy_aligned     # U : avoid loading dest word if not needed
-	ldq_u	t0, 0(a0)	# L :
-	nop
-	nop
-
-	br	stxncpy_aligned	# .. e1 :
-	nop
-	nop
-	nop
-
-
-
-/* The source and destination are not co-aligned.  Align the destination
-   and cope.  We have to be very careful about not reading too much and
-   causing a SEGV.  */
-
-	.align 4
-$u_head:
-	/* We know just enough now to be able to assemble the first
-	   full source word.  We can still find a zero at the end of it
-	   that prevents us from outputting the whole thing.
-
-	   On entry to this basic block:
-	   t0 == the first dest word, unmasked
-	   t1 == the shifted low bits of the first source word
-	   t6 == bytemask that is -1 in dest word bytes */
-
-	ldq_u	t2, 8(a1)	# L : Latency=3 load second src word
-	addq	a1, 8, a1	# E :
-	mskql	t0, a0, t0	# U : mask trailing garbage in dst
-	extqh	t2, a1, t4	# U : (3 cycle stall on t2)
-
-	or	t1, t4, t1	# E : first aligned src word complete (stall)
-	mskqh	t1, a0, t1	# U : mask leading garbage in src (stall)
-	or	t0, t1, t0	# E : first output word complete (stall)
-	or	t0, t6, t6	# E : mask original data for zero test (stall)
-
-	cmpbge	zero, t6, t8	# E :
-	beq	a2, $u_eocfin	# U :
-	lda	t6, -1		# E :
-	nop
-
-	bne	t8, $u_final	# U :
-	mskql	t6, a1, t6	# U : mask out bits already seen
-	stq_u	t0, 0(a0)	# L : store first output word
-	or      t6, t2, t2	# E : (stall)
-
-	cmpbge	zero, t2, t8	# E : find nulls in second partial
-	addq	a0, 8, a0	# E :
-	subq	a2, 1, a2	# E :
-	bne	t8, $u_late_head_exit	# U :
-
-	/* Finally, we've got all the stupid leading edge cases taken care
-	   of and we can set up to enter the main loop.  */
-	extql	t2, a1, t1	# U : position hi-bits of lo word
-	beq	a2, $u_eoc	# U :
-	ldq_u	t2, 8(a1)	# L : read next high-order source word
-	addq	a1, 8, a1	# E :
-
-	extqh	t2, a1, t0	# U : position lo-bits of hi word (stall)
-	cmpbge	zero, t2, t8	# E :
-	nop
-	bne	t8, $u_eos	# U :
-
-	/* Unaligned copy main loop.  In order to avoid reading too much,
-	   the loop is structured to detect zeros in aligned source words.
-	   This has, unfortunately, effectively pulled half of a loop
-	   iteration out into the head and half into the tail, but it does
-	   prevent nastiness from accumulating in the very thing we want
-	   to run as fast as possible.
-
-	   On entry to this basic block:
-	   t0 == the shifted low-order bits from the current source word
-	   t1 == the shifted high-order bits from the previous source word
-	   t2 == the unshifted current source word
-
-	   We further know that t2 does not contain a null terminator.  */
-
-	.align 4
-$u_loop:
-	or	t0, t1, t0	# E : current dst word now complete
-	subq	a2, 1, a2	# E : decrement word count
-	extql	t2, a1, t1	# U : extract low bits for next time
-	addq	a0, 8, a0	# E :
-
-	stq_u	t0, -8(a0)	# U : save the current word
-	beq	a2, $u_eoc	# U :
-	ldq_u	t2, 8(a1)	# U : Latency=3 load high word for next time
-	addq	a1, 8, a1	# E :
-
-	extqh	t2, a1, t0	# U : extract low bits (2 cycle stall)
-	cmpbge	zero, t2, t8	# E : test new word for eos
-	nop
-	beq	t8, $u_loop	# U :
-
-	/* We've found a zero somewhere in the source word we just read.
-	   If it resides in the lower half, we have one (probably partial)
-	   word to write out, and if it resides in the upper half, we
-	   have one full and one partial word left to write out.
-
-	   On entry to this basic block:
-	   t0 == the shifted low-order bits from the current source word
-	   t1 == the shifted high-order bits from the previous source word
-	   t2 == the unshifted current source word.  */
-$u_eos:
-	or	t0, t1, t0	# E : first (partial) source word complete
-	nop
-	cmpbge	zero, t0, t8	# E : is the null in this first bit? (stall)
-	bne	t8, $u_final	# U : (stall)
-
-	stq_u	t0, 0(a0)	# L : the null was in the high-order bits
-	addq	a0, 8, a0	# E :
-	subq	a2, 1, a2	# E :
-	nop
-
-$u_late_head_exit:
-	extql	t2, a1, t0	# U :
-	cmpbge	zero, t0, t8	# E :
-	or	t8, t10, t6	# E : (stall)
-	cmoveq	a2, t6, t8	# E : Latency=2, extra map slot (stall)
-
-	/* Take care of a final (probably partial) result word.
-	   On entry to this basic block:
-	   t0 == assembled source word
-	   t8 == cmpbge mask that found the null.  */
-$u_final:
-	negq	t8, t6		# E : isolate low bit set
-	and	t6, t8, t12	# E : (stall)
-	and	t12, 0x80, t6	# E : avoid dest word load if we can (stall)
-	bne	t6, 1f		# U : (stall)
-
-	ldq_u	t1, 0(a0)	# L :
-	subq	t12, 1, t6	# E :
-	or	t6, t12, t8	# E : (stall)
-	zapnot	t0, t8, t0	# U : kill source bytes > null
-
-	zap	t1, t8, t1	# U : kill dest bytes <= null
-	or	t0, t1, t0	# E : (stall)
-	nop
-	nop
-
-1:	stq_u	t0, 0(a0)	# L :
-	ret	(t9)		# L0 : Latency=3
-
-	  /* Got to end-of-count before end of string.  
-	     On entry to this basic block:
-	     t1 == the shifted high-order bits from the previous source word  */
-$u_eoc:
-	and	a1, 7, t6	# E : avoid final load if possible
-	sll	t10, t6, t6	# U : (stall)
-	and	t6, 0xff, t6	# E : (stall)
-	bne	t6, 1f		# U : (stall)
-
-	ldq_u	t2, 8(a1)	# L : load final src word
-	nop
-	extqh	t2, a1, t0	# U : extract low bits for last word (stall)
-	or	t1, t0, t1	# E : (stall)
-
-1:	cmpbge	zero, t1, t8	# E :
-	mov	t1, t0		# E :
-
-$u_eocfin:			# end-of-count, final word
-	or	t10, t8, t8	# E :
-	br	$u_final	# L0 : Latency=3
-
-	/* Unaligned copy entry point.  */
-	.align 4
-$unaligned:
-
-	ldq_u	t1, 0(a1)	# L : load first source word
-	and	a0, 7, t4	# E : find dest misalignment
-	and	a1, 7, t5	# E : find src misalignment
-	/* Conditionally load the first destination word and a bytemask
-	   with 0xff indicating that the destination byte is sacrosanct.  */
-	mov	zero, t0	# E :
-
-	mov	zero, t6	# E :
-	beq	t4, 1f		# U :
-	ldq_u	t0, 0(a0)	# L :
-	lda	t6, -1		# E :
-
-	mskql	t6, a0, t6	# U :
-	nop
-	nop
-	subq	a1, t4, a1	# E : sub dest misalignment from src addr
-
-	/* If source misalignment is larger than dest misalignment, we need
-	   extra startup checks to avoid SEGV.  */
-
-1:	cmplt	t4, t5, t12	# E :
-	extql	t1, a1, t1	# U : shift src into place
-	lda	t2, -1		# E : for creating masks later
-	beq	t12, $u_head	# U : (stall)
-
-	extql	t2, a1, t2	# U :
-	cmpbge	zero, t1, t8	# E : is there a zero?
-	andnot	t2, t6, t2	# E : dest mask for a single word copy
-	or	t8, t10, t5	# E : test for end-of-count too
-
-	cmpbge	zero, t2, t3	# E :
-	cmoveq	a2, t5, t8	# E : Latency=2, extra map slot
-	nop			# E : keep with cmoveq
-	andnot	t8, t3, t8	# E : (stall)
-
-	beq	t8, $u_head	# U :
-	/* At this point we've found a zero in the first partial word of
-	   the source.  We need to isolate the valid source data and mask
-	   it into the original destination data.  (Incidentally, we know
-	   that we'll need at least one byte of that original dest word.) */
-	ldq_u	t0, 0(a0)	# L :
-	negq	t8, t6		# E : build bitmask of bytes <= zero
-	mskqh	t1, t4, t1	# U :
-
-	and	t6, t8, t12	# E :
-	subq	t12, 1, t6	# E : (stall)
-	or	t6, t12, t8	# E : (stall)
-	zapnot	t2, t8, t2	# U : prepare source word; mirror changes (stall)
-
-	zapnot	t1, t8, t1	# U : to source validity mask
-	andnot	t0, t2, t0	# E : zero place for source to reside
-	or	t0, t1, t0	# E : and put it there (stall both t0, t1)
-	stq_u	t0, 0(a0)	# L : (stall)
-
-	ret	(t9)		# L0 : Latency=3
-	nop
-	nop
-	nop
-
-	.end __stxncpy
diff --git a/arch/alpha/lib/ev67-strcat.S b/arch/alpha/lib/ev67-strcat.S
deleted file mode 100644
index ec3096a9e8d409558f7a29d19868688f7971dc81..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/ev67-strcat.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/ev67-strcat.S
- * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
- *
- * Append a null-terminated string from SRC to DST.
- *
- * Much of the information about 21264 scheduling/coding comes from:
- *	Compiler Writer's Guide for the Alpha 21264
- *	abbreviated as 'CWG' in other comments here
- *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
- * Scheduling notation:
- *	E	- either cluster
- *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
- *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
- * Try not to change the actual algorithm if possible for consistency.
- * Commentary: It seems bogus to walk the input string twice - once
- * to determine the length, and then again while doing the copy.
- * A significant (future) enhancement would be to only read the input
- * string once.
- */
-
-#include <asm/export.h>
-	.text
-
-	.align 4
-	.globl strcat
-	.ent strcat
-strcat:
-	.frame $30, 0, $26
-	.prologue 0
-
-	mov	$16, $0		# E : set up return value
-	/* Find the end of the string.  */
-	ldq_u   $1, 0($16)	# L : load first quadword (a0 may be misaligned)
-	lda     $2, -1		# E :
-	insqh   $2, $16, $2	# U :
-
-	andnot  $16, 7, $16	# E :
-	or      $2, $1, $1	# E :
-	cmpbge  $31, $1, $2	# E : bits set iff byte == 0
-	bne     $2, $found	# U :
-
-$loop:	ldq     $1, 8($16)	# L :
-	addq    $16, 8, $16	# E :
-	cmpbge  $31, $1, $2	# E :
-	beq     $2, $loop	# U :
-
-$found:	cttz	$2, $3		# U0 :
-	addq	$16, $3, $16	# E :
-	/* Now do the append.  */
-	mov	$26, $23	# E :
-	br	__stxcpy	# L0 :
-
-	.end strcat
-	EXPORT_SYMBOL(strcat)
diff --git a/arch/alpha/lib/ev67-strchr.S b/arch/alpha/lib/ev67-strchr.S
deleted file mode 100644
index fbf89e0b6dc3b7bbb70560f1852728c01f3bf26d..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/ev67-strchr.S
+++ /dev/null
@@ -1,90 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/ev67-strchr.S
- * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
- *
- * Return the address of a given character within a null-terminated
- * string, or null if it is not found.
- *
- * Much of the information about 21264 scheduling/coding comes from:
- *	Compiler Writer's Guide for the Alpha 21264
- *	abbreviated as 'CWG' in other comments here
- *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
- * Scheduling notation:
- *	E	- either cluster
- *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
- *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
- * Try not to change the actual algorithm if possible for consistency.
- */
-#include <asm/export.h>
-#include <asm/regdef.h>
-
-	.set noreorder
-	.set noat
-
-	.align 4
-	.globl strchr
-	.ent strchr
-strchr:
-	.frame sp, 0, ra
-	.prologue 0
-
-	ldq_u   t0, 0(a0)	# L : load first quadword Latency=3
-	and	a1, 0xff, t3	# E : 00000000000000ch
-	insbl	a1, 1, t5	# U : 000000000000ch00
-	insbl	a1, 7, a2	# U : ch00000000000000
-
-	insbl	t3, 6, a3	# U : 00ch000000000000
-	or	t5, t3, a1	# E : 000000000000chch
-	andnot  a0, 7, v0	# E : align our loop pointer
-	lda	t4, -1		# E : build garbage mask
-
-	mskqh	t4, a0, t4	# U : only want relevant part of first quad
-	or	a2, a3, a2	# E : chch000000000000
-	inswl	a1, 2, t5	# E : 00000000chch0000
-	inswl	a1, 4, a3	# E : 0000chch00000000
-
-	or	a1, a2, a1	# E : chch00000000chch
-	or	a3, t5, t5	# E : 0000chchchch0000
-	cmpbge  zero, t0, t2	# E : bits set iff byte == zero
-	cmpbge	zero, t4, t4	# E : bits set iff byte is garbage
-
-	/* This quad is _very_ serialized.  Lots of stalling happens */
-	or	t5, a1, a1	# E : chchchchchchchch
-	xor	t0, a1, t1	# E : make bytes == c zero
-	cmpbge  zero, t1, t3	# E : bits set iff byte == c
-	or	t2, t3, t0	# E : bits set iff char match or zero match
-
-	andnot	t0, t4, t0	# E : clear garbage bits
-	cttz	t0, a2		# U0 : speculative (in case we get a match)
-	nop			# E :
-	bne	t0, $found	# U :
-
-	/*
-	 * Yuk.  This loop is going to stall like crazy waiting for the
-	 * data to be loaded.  Not much can be done about it unless it's
-	 * unrolled multiple times - is that safe to do in kernel space?
-	 * Or would exception handling recovery code do the trick here?
-	 */
-$loop:	ldq	t0, 8(v0)	# L : Latency=3
-	addq	v0, 8, v0	# E :
-	xor	t0, a1, t1	# E :
-	cmpbge	zero, t0, t2	# E : bits set iff byte == 0
-
-	cmpbge	zero, t1, t3	# E : bits set iff byte == c
-	or	t2, t3, t0	# E :
-	cttz	t3, a2		# U0 : speculative (in case we get a match)
-	beq	t0, $loop	# U :
-
-$found:	negq    t0, t1		# E : clear all but least set bit
-	and     t0, t1, t0	# E :
-	and	t0, t3, t1	# E : bit set iff byte was the char
-	addq	v0, a2, v0	# E : Add in the bit number from above
-
-	cmoveq	t1, $31, v0	# E : Two mapping slots, latency = 2
-	nop
-	nop
-	ret			# L0 :
-
-	.end strchr
-	EXPORT_SYMBOL(strchr)
diff --git a/arch/alpha/lib/ev67-strlen.S b/arch/alpha/lib/ev67-strlen.S
deleted file mode 100644
index b73106ffbbc7c7e32e286a92ec9f2cc276038bfd..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/ev67-strlen.S
+++ /dev/null
@@ -1,51 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/ev67-strlen.S
- * 21264 version by Rick Gorton <rick.gorton@alpha-processor.com>
- *
- * Finds length of a 0-terminated string.  Optimized for the
- * Alpha architecture:
- *
- *	- memory accessed as aligned quadwords only
- *	- uses bcmpge to compare 8 bytes in parallel
- *
- * Much of the information about 21264 scheduling/coding comes from:
- *	Compiler Writer's Guide for the Alpha 21264
- *	abbreviated as 'CWG' in other comments here
- *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
- * Scheduling notation:
- *	E	- either cluster
- *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
- *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
- */
-#include <asm/export.h>
-	.set noreorder
-	.set noat
-
-	.globl	strlen
-	.ent	strlen
-	.align 4
-strlen:
-	ldq_u	$1, 0($16)	# L : load first quadword ($16  may be misaligned)
-	lda	$2, -1($31)	# E :
-	insqh	$2, $16, $2	# U :
-	andnot	$16, 7, $0	# E :
-
-	or	$2, $1, $1	# E :
-	cmpbge	$31, $1, $2	# E : $2  <- bitmask: bit i == 1 <==> i-th byte == 0
-	nop			# E :
-	bne	$2, $found	# U :
-
-$loop:	ldq	$1, 8($0)	# L :
-	addq	$0, 8, $0	# E : addr += 8
-	cmpbge	$31, $1, $2	# E :
-	beq	$2, $loop	# U :
-
-$found:
-	cttz	$2, $3		# U0 :
-	addq	$0, $3, $0	# E :
-	subq	$0, $16, $0	# E :
-	ret	$31, ($26)	# L0 :
-
-	.end	strlen
-	EXPORT_SYMBOL(strlen)
diff --git a/arch/alpha/lib/ev67-strncat.S b/arch/alpha/lib/ev67-strncat.S
deleted file mode 100644
index ceb0ca528789aa67026ed17e1e2b6db7245de242..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/ev67-strncat.S
+++ /dev/null
@@ -1,96 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/ev67-strncat.S
- * 21264 version contributed by Rick Gorton <rick.gorton@api-networks.com>
- *
- * Append no more than COUNT characters from the null-terminated string SRC
- * to the null-terminated string DST.  Always null-terminate the new DST.
- *
- * This differs slightly from the semantics in libc in that we never write
- * past count, whereas libc may write to count+1.  This follows the generic
- * implementation in lib/string.c and is, IMHO, more sensible.
- *
- * Much of the information about 21264 scheduling/coding comes from:
- *	Compiler Writer's Guide for the Alpha 21264
- *	abbreviated as 'CWG' in other comments here
- *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
- * Scheduling notation:
- *	E	- either cluster
- *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
- *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
- * Try not to change the actual algorithm if possible for consistency.
- */
-
-#include <asm/export.h>
-	.text
-
-	.align 4
-	.globl strncat
-	.ent strncat
-strncat:
-	.frame $30, 0, $26
-	.prologue 0
-
-	mov	$16, $0		# set up return value
-	beq	$18, $zerocount	# U :
-	/* Find the end of the string.  */
-	ldq_u   $1, 0($16)	# L : load first quadword ($16 may be misaligned)
-	lda     $2, -1($31)	# E :
-
-	insqh   $2, $0, $2	# U :
-	andnot  $16, 7, $16	# E :
-	nop			# E :
-	or      $2, $1, $1	# E :
-
-	nop			# E :
-	nop			# E :
-	cmpbge  $31, $1, $2	# E : bits set iff byte == 0
-	bne     $2, $found	# U :
-
-$loop:	ldq     $1, 8($16)	# L :
-	addq    $16, 8, $16	# E :
-	cmpbge  $31, $1, $2	# E :
-	beq     $2, $loop	# U :
-
-$found:	cttz	$2, $3		# U0 :
-	addq	$16, $3, $16	# E :
-	nop			# E :
-	bsr	$23, __stxncpy	# L0 :/* Now do the append.  */
-
-	/* Worry about the null termination.  */
-
-	zapnot	$1, $27, $2	# U : was last byte a null?
-	cmplt	$27, $24, $5	# E : did we fill the buffer completely?
-	bne	$2, 0f		# U :
-	ret			# L0 :
-
-0:	or	$5, $18, $2	# E :
-	nop
-	bne	$2, 2f		# U :
-	and	$24, 0x80, $3	# E : no zero next byte
-
-	nop			# E :
-	bne	$3, 1f		# U :
-	/* Here there are bytes left in the current word.  Clear one.  */
-	addq	$24, $24, $24	# E : end-of-count bit <<= 1
-	nop			# E :
-
-2:	zap	$1, $24, $1	# U :
-	nop			# E :
-	stq_u	$1, 0($16)	# L :
-	ret			# L0 :
-
-1:	/* Here we must clear the first byte of the next DST word */
-	stb	$31, 8($16)	# L :
-	nop			# E :
-	nop			# E :
-	ret			# L0 :
-
-$zerocount:
-	nop			# E :
-	nop			# E :
-	nop			# E :
-	ret			# L0 :
-
-	.end strncat
-	EXPORT_SYMBOL(strncat)
diff --git a/arch/alpha/lib/ev67-strrchr.S b/arch/alpha/lib/ev67-strrchr.S
deleted file mode 100644
index 7f80e398530f564666cfac6f8b1fa8c836b028d4..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/ev67-strrchr.S
+++ /dev/null
@@ -1,111 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/ev67-strrchr.S
- * 21264 version by Rick Gorton <rick.gorton@alpha-processor.com>
- *
- * Finds length of a 0-terminated string.  Optimized for the
- * Alpha architecture:
- *
- *	- memory accessed as aligned quadwords only
- *	- uses bcmpge to compare 8 bytes in parallel
- *
- * Much of the information about 21264 scheduling/coding comes from:
- *	Compiler Writer's Guide for the Alpha 21264
- *	abbreviated as 'CWG' in other comments here
- *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
- * Scheduling notation:
- *	E	- either cluster
- *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
- *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
- */
-
-#include <asm/export.h>
-#include <asm/regdef.h>
-
-	.set noreorder
-	.set noat
-
-	.align 4
-	.ent strrchr
-	.globl strrchr
-strrchr:
-	.frame sp, 0, ra
-	.prologue 0
-
-	and	a1, 0xff, t2	# E : 00000000000000ch
-	insbl	a1, 1, t4	# U : 000000000000ch00
-	insbl	a1, 2, t5	# U : 0000000000ch0000
-	ldq_u   t0, 0(a0)	# L : load first quadword Latency=3
-
-	mov	zero, t6	# E : t6 is last match aligned addr
-	or	t2, t4, a1	# E : 000000000000chch
-	sll	t5, 8, t3	# U : 00000000ch000000
-	mov	zero, t8	# E : t8 is last match byte compare mask
-
-	andnot  a0, 7, v0	# E : align source addr
-	or	t5, t3, t3	# E : 00000000chch0000
-	sll	a1, 32, t2	# U : 0000chch00000000
-	sll	a1, 48, t4	# U : chch000000000000
-
-	or	t4, a1, a1	# E : chch00000000chch
-	or	t2, t3, t2	# E : 0000chchchch0000
-	or	a1, t2, a1	# E : chchchchchchchch
-	lda	t5, -1		# E : build garbage mask
-
-	cmpbge  zero, t0, t1	# E : bits set iff byte == zero
-	mskqh	t5, a0, t4	# E : Complete garbage mask
-	xor	t0, a1, t2	# E : make bytes == c zero
-	cmpbge	zero, t4, t4	# E : bits set iff byte is garbage
-
-	cmpbge  zero, t2, t3	# E : bits set iff byte == c
-	andnot	t1, t4, t1	# E : clear garbage from null test
-	andnot	t3, t4, t3	# E : clear garbage from char test
-	bne	t1, $eos	# U : did we already hit the terminator?
-
-	/* Character search main loop */
-$loop:
-	ldq	t0, 8(v0)	# L : load next quadword
-	cmovne	t3, v0, t6	# E : save previous comparisons match
-	nop			#   : Latency=2, extra map slot (keep nop with cmov)
-	nop
-
-	cmovne	t3, t3, t8	# E : Latency=2, extra map slot
-	nop			#   : keep with cmovne
-	addq	v0, 8, v0	# E :
-	xor	t0, a1, t2	# E :
-
-	cmpbge	zero, t0, t1	# E : bits set iff byte == zero
-	cmpbge	zero, t2, t3	# E : bits set iff byte == c
-	beq	t1, $loop	# U : if we havnt seen a null, loop
-	nop
-
-	/* Mask out character matches after terminator */
-$eos:
-	negq	t1, t4		# E : isolate first null byte match
-	and	t1, t4, t4	# E :
-	subq	t4, 1, t5	# E : build a mask of the bytes up to...
-	or	t4, t5, t4	# E : ... and including the null
-
-	and	t3, t4, t3	# E : mask out char matches after null
-	cmovne	t3, t3, t8	# E : save it, if match found Latency=2, extra map slot
-	nop			#   : Keep with cmovne
-	nop
-
-	cmovne	t3, v0, t6	# E :
-	nop			#   : Keep with cmovne
-	/* Locate the address of the last matched character */
-	ctlz	t8, t2		# U0 : Latency=3 (0x40 for t8=0)
-	nop
-
-	cmoveq	t8, 0x3f, t2	# E : Compensate for case when no match is seen
-	nop			# E : hide the cmov latency (2) behind ctlz latency
-	lda	t5, 0x3f($31)	# E :
-	subq	t5, t2, t5	# E : Normalize leading zero count
-
-	addq	t6, t5, v0	# E : and add to quadword address
-	ret			# L0 : Latency=3
-	nop
-	nop
-
-	.end strrchr
-	EXPORT_SYMBOL(strrchr)
diff --git a/arch/alpha/lib/memchr.S b/arch/alpha/lib/memchr.S
deleted file mode 100644
index c13d3eca2e0592736dd17112ff836dcac5c0b7cb..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/memchr.S
+++ /dev/null
@@ -1,165 +0,0 @@
-/* Copyright (C) 1996 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by David Mosberger (davidm@cs.arizona.edu).
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
-
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
-
-/* Finds characters in a memory area.  Optimized for the Alpha:
-
-      - memory accessed as aligned quadwords only
-      - uses cmpbge to compare 8 bytes in parallel
-      - does binary search to find 0 byte in last
-        quadword (HAKMEM needed 12 instructions to
-        do this instead of the 9 instructions that
-        binary search needs).
-
-For correctness consider that:
-
-      - only minimum number of quadwords may be accessed
-      - the third argument is an unsigned long
-*/
-#include <asm/export.h>
-        .set noreorder
-        .set noat
-
-	.globl memchr
-	.ent memchr
-memchr:
-	.frame $30,0,$26,0
-	.prologue 0
-
-	# Hack -- if someone passes in (size_t)-1, hoping to just
-	# search til the end of the address space, we will overflow
-	# below when we find the address of the last byte.  Given
-	# that we will never have a 56-bit address space, cropping
-	# the length is the easiest way to avoid trouble.
-	zap	$18, 0x80, $5	#-e0	:
-
-	beq	$18, $not_found	# .. e1 :
-        ldq_u   $1, 0($16)	# e1	: load first quadword
-	insbl	$17, 1, $2	# .. e0 : $2 = 000000000000ch00
-	and	$17, 0xff, $17	#-e0    : $17 = 00000000000000ch
-	cmpult	$18, 9, $4	# .. e1 :
-	or	$2, $17, $17	# e0    : $17 = 000000000000chch
-        lda     $3, -1($31)	# .. e1 :
-	sll	$17, 16, $2	#-e0    : $2 = 00000000chch0000
-	addq	$16, $5, $5	# .. e1 :
-	or	$2, $17, $17	# e1    : $17 = 00000000chchchch
-	unop			#	:
-	sll	$17, 32, $2	#-e0    : $2 = chchchch00000000
-	or	$2, $17, $17	# e1	: $17 = chchchchchchchch
-	extql	$1, $16, $7	# e0    : 
-	beq	$4, $first_quad	# .. e1 :
-
-	ldq_u	$6, -1($5)	#-e1	: eight or less bytes to search
-	extqh	$6, $16, $6	# .. e0 :
-	mov	$16, $0		# e0	:
-	or	$7, $6, $1	# .. e1 : $1 = quadword starting at $16
-
-	# Deal with the case where at most 8 bytes remain to be searched
-	# in $1.  E.g.:
-	#	$18 = 6
-	#	$1 = ????c6c5c4c3c2c1
-$last_quad:
-	negq	$18, $6		#-e0	:
-        xor	$17, $1, $1	# .. e1 :
-	srl	$3, $6, $6	# e0    : $6 = mask of $18 bits set
-        cmpbge  $31, $1, $2	# .. e1 :
-	and	$2, $6, $2	#-e0	:
-        beq     $2, $not_found	# .. e1 :
-
-$found_it:
-	# Now, determine which byte matched:
-        negq    $2, $3		# e0	:
-        and     $2, $3, $2	# e1	:
-
-        and     $2, 0x0f, $1	#-e0	:
-        addq    $0, 4, $3	# .. e1 :
-        cmoveq  $1, $3, $0	# e0	:
-
-        addq    $0, 2, $3	# .. e1 :
-        and     $2, 0x33, $1	#-e0	:
-        cmoveq  $1, $3, $0	# .. e1 :
-
-        and     $2, 0x55, $1	# e0	:
-        addq    $0, 1, $3	# .. e1 :
-        cmoveq  $1, $3, $0	#-e0	:
-
-$done:	ret			# .. e1 :
-
-	# Deal with the case where $18 > 8 bytes remain to be
-	# searched.  $16 may not be aligned.
-	.align 4
-$first_quad:
-	andnot	$16, 0x7, $0	#-e1	:
-        insqh   $3, $16, $2	# .. e0	: $2 = 0000ffffffffffff ($16<0:2> ff)
-        xor	$1, $17, $1	# e0	:
-	or	$1, $2, $1	# e1	: $1 = ====ffffffffffff
-        cmpbge  $31, $1, $2	#-e0	:
-        bne     $2, $found_it	# .. e1 :
-
-	# At least one byte left to process.
-
-	ldq	$1, 8($0)	# e0	:
-	subq	$5, 1, $18	# .. e1 :
-	addq	$0, 8, $0	#-e0	:
-
-	# Make $18 point to last quad to be accessed (the
-	# last quad may or may not be partial).
-
-	andnot	$18, 0x7, $18	# .. e1 :
-	cmpult	$0, $18, $2	# e0	:
-	beq	$2, $final	# .. e1 :
-
-	# At least two quads remain to be accessed.
-
-	subq	$18, $0, $4	#-e0	: $4 <- nr quads to be processed
-	and	$4, 8, $4	# e1	: odd number of quads?
-	bne	$4, $odd_quad_count # e1 :
-
-	# At least three quads remain to be accessed
-
-	mov	$1, $4		# e0	: move prefetched value to correct reg
-
-	.align	4
-$unrolled_loop:
-	ldq	$1, 8($0)	#-e0	: prefetch $1
-	xor	$17, $4, $2	# .. e1 :
-	cmpbge	$31, $2, $2	# e0	:
-	bne	$2, $found_it	# .. e1 :
-
-	addq	$0, 8, $0	#-e0	:
-$odd_quad_count:
-	xor	$17, $1, $2	# .. e1 :
-	ldq	$4, 8($0)	# e0	: prefetch $4
-	cmpbge	$31, $2, $2	# .. e1 :
-	addq	$0, 8, $6	#-e0	:
-	bne	$2, $found_it	# .. e1	:
-
-	cmpult	$6, $18, $6	# e0	:
-	addq	$0, 8, $0	# .. e1 :
-	bne	$6, $unrolled_loop #-e1 :
-
-	mov	$4, $1		# e0	: move prefetched value into $1
-$final:	subq	$5, $0, $18	# .. e1	: $18 <- number of bytes left to do
-	bne	$18, $last_quad	# e1	:
-
-$not_found:
-	mov	$31, $0		#-e0	:
-	ret			# .. e1 :
-
-        .end memchr
-	EXPORT_SYMBOL(memchr)
diff --git a/arch/alpha/lib/memmove.S b/arch/alpha/lib/memmove.S
deleted file mode 100644
index 42d1922d0edfca212abebd5bbc674257723ba0ac..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/memmove.S
+++ /dev/null
@@ -1,183 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/memmove.S
- *
- * Barely optimized memmove routine for Alpha EV5.
- *
- * This is hand-massaged output from the original memcpy.c.  We defer to
- * memcpy whenever possible; the backwards copy loops are not unrolled.
- */
-#include <asm/export.h>        
-	.set noat
-	.set noreorder
-	.text
-
-	.align 4
-	.globl memmove
-	.ent memmove
-memmove:
-	ldgp $29, 0($27)
-	unop
-	nop
-	.prologue 1
-
-	addq $16,$18,$4
-	addq $17,$18,$5
-	cmpule $4,$17,$1		/*  dest + n <= src  */
-	cmpule $5,$16,$2		/*  dest >= src + n  */
-
-	bis $1,$2,$1
-	mov $16,$0
-	xor $16,$17,$2
-	bne $1,memcpy			!samegp
-
-	and $2,7,$2			/* Test for src/dest co-alignment.  */
-	and $16,7,$1
-	cmpule $16,$17,$3
-	bne $3,$memmove_up		/* dest < src */
-
-	and $4,7,$1
-	bne $2,$misaligned_dn
-	unop
-	beq $1,$skip_aligned_byte_loop_head_dn
-
-$aligned_byte_loop_head_dn:
-	lda $4,-1($4)
-	lda $5,-1($5)
-	unop
-	ble $18,$egress
-
-	ldq_u $3,0($5)
-	ldq_u $2,0($4)
-	lda $18,-1($18)
-	extbl $3,$5,$1
-
-	insbl $1,$4,$1
-	mskbl $2,$4,$2
-	bis $1,$2,$1
-	and $4,7,$6
-
-	stq_u $1,0($4)
-	bne $6,$aligned_byte_loop_head_dn
-
-$skip_aligned_byte_loop_head_dn:
-	lda $18,-8($18)
-	blt $18,$skip_aligned_word_loop_dn
-
-$aligned_word_loop_dn:
-	ldq $1,-8($5)
-	nop
-	lda $5,-8($5)
-	lda $18,-8($18)
-
-	stq $1,-8($4)
-	nop
-	lda $4,-8($4)
-	bge $18,$aligned_word_loop_dn
-
-$skip_aligned_word_loop_dn:
-	lda $18,8($18)
-	bgt $18,$byte_loop_tail_dn
-	unop
-	ret $31,($26),1
-
-	.align 4
-$misaligned_dn:
-	nop
-	fnop
-	unop
-	beq $18,$egress
-
-$byte_loop_tail_dn:
-	ldq_u $3,-1($5)
-	ldq_u $2,-1($4)
-	lda $5,-1($5)
-	lda $4,-1($4)
-
-	lda $18,-1($18)
-	extbl $3,$5,$1
-	insbl $1,$4,$1
-	mskbl $2,$4,$2
-
-	bis $1,$2,$1
-	stq_u $1,0($4)
-	bgt $18,$byte_loop_tail_dn
-	br $egress
-
-$memmove_up:
-	mov $16,$4
-	mov $17,$5
-	bne $2,$misaligned_up
-	beq $1,$skip_aligned_byte_loop_head_up
-
-$aligned_byte_loop_head_up:
-	unop
-	ble $18,$egress
-	ldq_u $3,0($5)
-	ldq_u $2,0($4)
-
-	lda $18,-1($18)
-	extbl $3,$5,$1
-	insbl $1,$4,$1
-	mskbl $2,$4,$2
-
-	bis $1,$2,$1
-	lda $5,1($5)
-	stq_u $1,0($4)
-	lda $4,1($4)
-
-	and $4,7,$6
-	bne $6,$aligned_byte_loop_head_up
-
-$skip_aligned_byte_loop_head_up:
-	lda $18,-8($18)
-	blt $18,$skip_aligned_word_loop_up
-
-$aligned_word_loop_up:
-	ldq $1,0($5)
-	nop
-	lda $5,8($5)
-	lda $18,-8($18)
-
-	stq $1,0($4)
-	nop
-	lda $4,8($4)
-	bge $18,$aligned_word_loop_up
-
-$skip_aligned_word_loop_up:
-	lda $18,8($18)
-	bgt $18,$byte_loop_tail_up
-	unop
-	ret $31,($26),1
-
-	.align 4
-$misaligned_up:
-	nop
-	fnop
-	unop
-	beq $18,$egress
-
-$byte_loop_tail_up:
-	ldq_u $3,0($5)
-	ldq_u $2,0($4)
-	lda $18,-1($18)
-	extbl $3,$5,$1
-
-	insbl $1,$4,$1
-	mskbl $2,$4,$2
-	bis $1,$2,$1
-	stq_u $1,0($4)
-
-	lda $5,1($5)
-	lda $4,1($4)
-	nop
-	bgt $18,$byte_loop_tail_up
-
-$egress:
-	ret $31,($26),1
-	nop
-	nop
-	nop
-
-	.end memmove
-	EXPORT_SYMBOL(memmove)
diff --git a/arch/alpha/lib/memset.S b/arch/alpha/lib/memset.S
deleted file mode 100644
index 00393e30df259150a5bafe4d0e370987c3fce1e9..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/memset.S
+++ /dev/null
@@ -1,133 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/arch/alpha/lib/memset.S
- *
- * This is an efficient (and small) implementation of the C library "memset()"
- * function for the alpha.
- *
- *	(C) Copyright 1996 Linus Torvalds
- *
- * This routine is "moral-ware": you are free to use it any way you wish, and
- * the only obligation I put on you is a moral one: if you make any improvements
- * to the routine, please send me your improvements for me to use similarly.
- *
- * The scheduling comments are according to the EV5 documentation (and done by 
- * hand, so they might well be incorrect, please do tell me about it..)
- */
-#include <asm/export.h>
-	.set noat
-	.set noreorder
-.text
-	.globl memset
-	.globl __memset
-	.globl ___memset
-	.globl __memset16
-	.globl __constant_c_memset
-
-	.ent ___memset
-.align 5
-___memset:
-	.frame $30,0,$26,0
-	.prologue 0
-
-	and $17,255,$1		/* E1 */
-	insbl $17,1,$17		/* .. E0 */
-	bis $17,$1,$17		/* E0 (p-c latency, next cycle) */
-	sll $17,16,$1		/* E1 (p-c latency, next cycle) */
-
-	bis $17,$1,$17		/* E0 (p-c latency, next cycle) */
-	sll $17,32,$1		/* E1 (p-c latency, next cycle) */
-	bis $17,$1,$17		/* E0 (p-c latency, next cycle) */
-	ldq_u $31,0($30)	/* .. E1 */
-
-.align 5
-__constant_c_memset:
-	addq $18,$16,$6		/* E0 */
-	bis $16,$16,$0		/* .. E1 */
-	xor $16,$6,$1		/* E0 */
-	ble $18,end		/* .. E1 */
-
-	bic $1,7,$1		/* E0 */
-	beq $1,within_one_quad	/* .. E1 (note EV5 zero-latency forwarding) */
-	and $16,7,$3		/* E0 */
-	beq $3,aligned		/* .. E1 (note EV5 zero-latency forwarding) */
-
-	ldq_u $4,0($16)		/* E0 */
-	bis $16,$16,$5		/* .. E1 */
-	insql $17,$16,$2	/* E0 */
-	subq $3,8,$3		/* .. E1 */
-
-	addq $18,$3,$18		/* E0	$18 is new count ($3 is negative) */
-	mskql $4,$16,$4		/* .. E1 (and possible load stall) */
-	subq $16,$3,$16		/* E0 	$16 is new aligned destination */
-	bis $2,$4,$1		/* .. E1 */
-
-	bis $31,$31,$31		/* E0 */
-	ldq_u $31,0($30)	/* .. E1 */
-	stq_u $1,0($5)		/* E0 */
-	bis $31,$31,$31		/* .. E1 */
-
-.align 4
-aligned:
-	sra $18,3,$3		/* E0 */
-	and $18,7,$18		/* .. E1 */
-	bis $16,$16,$5		/* E0 */
-	beq $3,no_quad		/* .. E1 */
-
-.align 3
-loop:
-	stq $17,0($5)		/* E0 */
-	subq $3,1,$3		/* .. E1 */
-	addq $5,8,$5		/* E0 */
-	bne $3,loop		/* .. E1 */
-
-no_quad:
-	bis $31,$31,$31		/* E0 */
-	beq $18,end		/* .. E1 */
-	ldq $7,0($5)		/* E0 */
-	mskqh $7,$6,$2		/* .. E1 (and load stall) */
-
-	insqh $17,$6,$4		/* E0 */
-	bis $2,$4,$1		/* .. E1 */
-	stq $1,0($5)		/* E0 */
-	ret $31,($26),1		/* .. E1 */
-
-.align 3
-within_one_quad:
-	ldq_u $1,0($16)		/* E0 */
-	insql $17,$16,$2	/* E1 */
-	mskql $1,$16,$4		/* E0 (after load stall) */
-	bis $2,$4,$2		/* E0 */
-
-	mskql $2,$6,$4		/* E0 */
-	mskqh $1,$6,$2		/* .. E1 */
-	bis $2,$4,$1		/* E0 */
-	stq_u $1,0($16)		/* E0 */
-
-end:
-	ret $31,($26),1		/* E1 */
-	.end ___memset
-EXPORT_SYMBOL(___memset)
-EXPORT_SYMBOL(__constant_c_memset)
-
-	.align 5
-	.ent __memset16
-__memset16:
-	.prologue 0
-
-	inswl $17,0,$1		/* E0 */
-	inswl $17,2,$2		/* E0 */
-	inswl $17,4,$3		/* E0 */
-	or $1,$2,$1		/* .. E1 */
-	inswl $17,6,$4		/* E0 */
-	or $1,$3,$1		/* .. E1 */
-	or $1,$4,$17		/* E0 */
-	br __constant_c_memset	/* .. E1 */
-
-	.end __memset16
-EXPORT_SYMBOL(__memset16)
-
-memset = ___memset
-__memset = ___memset
-	EXPORT_SYMBOL(memset)
-	EXPORT_SYMBOL(__memset)
diff --git a/arch/alpha/lib/strcat.S b/arch/alpha/lib/strcat.S
deleted file mode 100644
index 055877dccd276869895b8a48ad6d6c2590c019e2..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/strcat.S
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/strcat.S
- * Contributed by Richard Henderson (rth@tamu.edu)
- *
- * Append a null-terminated string from SRC to DST.
- */
-#include <asm/export.h>
-
-	.text
-
-	.align 3
-	.globl strcat
-	.ent strcat
-strcat:
-	.frame $30, 0, $26
-	.prologue 0
-
-	mov	$16, $0		# set up return value
-
-	/* Find the end of the string.  */
-
-	ldq_u   $1, 0($16)	# load first quadword (a0 may be misaligned)
-	lda     $2, -1
-	insqh   $2, $16, $2
-	andnot  $16, 7, $16
-	or      $2, $1, $1
-	cmpbge  $31, $1, $2	# bits set iff byte == 0
-	bne     $2, $found
-
-$loop:	ldq     $1, 8($16)
-	addq    $16, 8, $16
-	cmpbge  $31, $1, $2
-	beq     $2, $loop
-
-$found:	negq    $2, $3		# clear all but least set bit
-	and     $2, $3, $2
-
-	and     $2, 0xf0, $3	# binary search for that set bit
-	and	$2, 0xcc, $4
-	and	$2, 0xaa, $5
-	cmovne	$3, 4, $3
-	cmovne	$4, 2, $4
-	cmovne	$5, 1, $5
-	addq	$3, $4, $3
-	addq	$16, $5, $16
-	addq	$16, $3, $16
-
-	/* Now do the append.  */
-
-	mov	$26, $23
-	br	__stxcpy
-
-	.end strcat
-EXPORT_SYMBOL(strcat);
diff --git a/arch/alpha/lib/strchr.S b/arch/alpha/lib/strchr.S
deleted file mode 100644
index 17871dd00280489f80b381a2610ca9b547058c9c..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/strchr.S
+++ /dev/null
@@ -1,72 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/strchr.S
- * Contributed by Richard Henderson (rth@tamu.edu)
- *
- * Return the address of a given character within a null-terminated
- * string, or null if it is not found.
- */
-#include <asm/export.h>
-#include <asm/regdef.h>
-
-	.set noreorder
-	.set noat
-
-	.align 3
-	.globl strchr
-	.ent strchr
-strchr:
-	.frame sp, 0, ra
-	.prologue 0
-
-	zapnot	a1, 1, a1	# e0    : zero extend the search character
-	ldq_u   t0, 0(a0)	# .. e1 : load first quadword
-	sll	a1, 8, t5	# e0    : replicate the search character
-	andnot  a0, 7, v0	# .. e1 : align our loop pointer
-	or	t5, a1, a1	# e0    :
-	lda	t4, -1		# .. e1 : build garbage mask
-	sll	a1, 16, t5	# e0    :
-	cmpbge  zero, t0, t2	# .. e1 : bits set iff byte == zero
-	mskqh	t4, a0, t4	# e0    :
-	or	t5, a1, a1	# .. e1 :
-	sll	a1, 32, t5	# e0    :
-	cmpbge	zero, t4, t4	# .. e1 : bits set iff byte is garbage
-	or	t5, a1, a1	# e0    :
-	xor	t0, a1, t1	# .. e1 : make bytes == c zero
-	cmpbge  zero, t1, t3	# e0    : bits set iff byte == c
-	or	t2, t3, t0	# e1    : bits set iff char match or zero match
-	andnot	t0, t4, t0	# e0    : clear garbage bits
-	bne	t0, $found	# .. e1 (zdb)
-
-$loop:	ldq	t0, 8(v0)	# e0    :
-	addq	v0, 8, v0	# .. e1 :
-	nop			# e0    :
-	xor	t0, a1, t1	# .. e1 (ev5 data stall)
-	cmpbge	zero, t0, t2	# e0    : bits set iff byte == 0
-	cmpbge	zero, t1, t3	# .. e1 : bits set iff byte == c
-	or	t2, t3, t0	# e0    :
-	beq	t0, $loop	# .. e1 (zdb)
-
-$found:	negq    t0, t1		# e0    : clear all but least set bit
-	and     t0, t1, t0	# e1 (stall)
-
-	and	t0, t3, t1	# e0    : bit set iff byte was the char
-	beq	t1, $retnull	# .. e1 (zdb)
-
-	and     t0, 0xf0, t2	# e0    : binary search for that set bit
-	and	t0, 0xcc, t3	# .. e1 :
-	and	t0, 0xaa, t4	# e0    :
-	cmovne	t2, 4, t2	# .. e1 :
-	cmovne	t3, 2, t3	# e0    :
-	cmovne	t4, 1, t4	# .. e1 :
-	addq	t2, t3, t2	# e0    :
-	addq	v0, t4, v0	# .. e1 :
-	addq	v0, t2, v0	# e0    :
-	ret			# .. e1 :
-
-$retnull:
-	mov	zero, v0	# e0    :
-	ret			# .. e1 :
-
-	.end strchr
-	EXPORT_SYMBOL(strchr)
diff --git a/arch/alpha/lib/strcpy.S b/arch/alpha/lib/strcpy.S
deleted file mode 100644
index cb74ad23a90df8a79a38ea157ac434bc7a7ac567..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/strcpy.S
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/strcpy.S
- * Contributed by Richard Henderson (rth@tamu.edu)
- *
- * Copy a null-terminated string from SRC to DST.  Return a pointer
- * to the null-terminator in the source.
- */
-#include <asm/export.h>
-	.text
-
-	.align 3
-	.globl strcpy
-	.ent strcpy
-strcpy:
-	.frame $30, 0, $26
-	.prologue 0
-
-	mov	$16, $0		# set up return value
-	mov	$26, $23	# set up return address
-	unop
-	br	__stxcpy	# do the copy
-
-	.end strcpy
-	EXPORT_SYMBOL(strcpy)
diff --git a/arch/alpha/lib/strlen.S b/arch/alpha/lib/strlen.S
deleted file mode 100644
index dd882fe4d7e3076469dbdba9d6c3a6135583c75a..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/strlen.S
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu)
- *
- * Finds length of a 0-terminated string.  Optimized for the
- * Alpha architecture:
- *
- *	- memory accessed as aligned quadwords only
- *	- uses bcmpge to compare 8 bytes in parallel
- *	- does binary search to find 0 byte in last
- *	  quadword (HAKMEM needed 12 instructions to
- *	  do this instead of the 9 instructions that
- *	  binary search needs).
- */
-#include <asm/export.h>
-	.set noreorder
-	.set noat
-
-	.align 3
-
-	.globl	strlen
-	.ent	strlen
-
-strlen:
-	ldq_u	$1, 0($16)	# load first quadword ($16  may be misaligned)
-	lda	$2, -1($31)
-	insqh	$2, $16, $2
-	andnot	$16, 7, $0
-	or	$2, $1, $1
-	cmpbge	$31, $1, $2	# $2  <- bitmask: bit i == 1 <==> i-th byte == 0
-	bne	$2, found
-
-loop:	ldq	$1, 8($0)
-	addq	$0, 8, $0	# addr += 8
-	nop			# helps dual issue last two insns
-	cmpbge	$31, $1, $2
-	beq	$2, loop
-
-found:	blbs	$2, done	# make aligned case fast
-	negq	$2, $3
-	and	$2, $3, $2
-
-	and	$2, 0x0f, $1
-	addq	$0, 4, $3
-	cmoveq	$1, $3, $0
-
-	and	$2, 0x33, $1
-	addq	$0, 2, $3
-	cmoveq	$1, $3, $0
-
-	and	$2, 0x55, $1
-	addq	$0, 1, $3
-	cmoveq	$1, $3, $0
-
-done:	subq	$0, $16, $0
-	ret	$31, ($26)
-
-	.end	strlen
-	EXPORT_SYMBOL(strlen)
diff --git a/arch/alpha/lib/strncat.S b/arch/alpha/lib/strncat.S
deleted file mode 100644
index 522fee3e26ac148f8d0175f1964d7f586aa2fb64..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/strncat.S
+++ /dev/null
@@ -1,86 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/strncat.S
- * Contributed by Richard Henderson (rth@tamu.edu)
- *
- * Append no more than COUNT characters from the null-terminated string SRC
- * to the null-terminated string DST.  Always null-terminate the new DST.
- *
- * This differs slightly from the semantics in libc in that we never write
- * past count, whereas libc may write to count+1.  This follows the generic
- * implementation in lib/string.c and is, IMHO, more sensible.
- */
-#include <asm/export.h>
-	.text
-
-	.align 3
-	.globl strncat
-	.ent strncat
-strncat:
-	.frame $30, 0, $26
-	.prologue 0
-
-	mov	$16, $0		# set up return value
-	beq	$18, $zerocount
-
-	/* Find the end of the string.  */
-
-	ldq_u   $1, 0($16)	# load first quadword ($16 may be misaligned)
-	lda     $2, -1($31)
-	insqh   $2, $16, $2
-	andnot  $16, 7, $16
-	or      $2, $1, $1
-	cmpbge  $31, $1, $2	# bits set iff byte == 0
-	bne     $2, $found
-
-$loop:	ldq     $1, 8($16)
-	addq    $16, 8, $16
-	cmpbge  $31, $1, $2
-	beq     $2, $loop
-
-$found:	negq    $2, $3		# clear all but least set bit
-	and     $2, $3, $2
-
-	and     $2, 0xf0, $3	# binary search for that set bit
-	and	$2, 0xcc, $4
-	and	$2, 0xaa, $5
-	cmovne	$3, 4, $3
-	cmovne	$4, 2, $4
-	cmovne	$5, 1, $5
-	addq	$3, $4, $3
-	addq	$16, $5, $16
-	addq	$16, $3, $16
-
-	/* Now do the append.  */
-
-	bsr	$23, __stxncpy
-
-	/* Worry about the null termination.  */
-
-	zapnot	$1, $27, $2	# was last byte a null?
-	bne	$2, 0f
-	ret
-
-0:	cmplt	$27, $24, $2	# did we fill the buffer completely?
-	or	$2, $18, $2
-	bne	$2, 2f
-
-	and	$24, 0x80, $2	# no zero next byte
-	bne	$2, 1f
-
-	/* Here there are bytes left in the current word.  Clear one.  */
-	addq	$24, $24, $24	# end-of-count bit <<= 1
-2:	zap	$1, $24, $1
-	stq_u	$1, 0($16)
-	ret
-
-1:	/* Here we must read the next DST word and clear the first byte.  */
-	ldq_u	$1, 8($16)
-	zap	$1, 1, $1
-	stq_u	$1, 8($16)
-
-$zerocount:
-	ret
-
-	.end strncat
-	EXPORT_SYMBOL(strncat)
diff --git a/arch/alpha/lib/strncpy.S b/arch/alpha/lib/strncpy.S
deleted file mode 100644
index cc57fad8b7ca7728c28824dbc506357a562b3b8a..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/strncpy.S
+++ /dev/null
@@ -1,83 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/strncpy.S
- * Contributed by Richard Henderson (rth@tamu.edu)
- *
- * Copy no more than COUNT bytes of the null-terminated string from
- * SRC to DST.  If SRC does not cover all of COUNT, the balance is
- * zeroed.
- *
- * Or, rather, if the kernel cared about that weird ANSI quirk.  This
- * version has cropped that bit o' nastiness as well as assuming that
- * __stxncpy is in range of a branch.
- */
-#include <asm/export.h>
-	.set noat
-	.set noreorder
-
-	.text
-
-	.align 4
-	.globl strncpy
-	.ent strncpy
-strncpy:
-	.frame $30, 0, $26
-	.prologue 0
-
-	mov	$16, $0		# set return value now
-	beq	$18, $zerolen
-	unop
-	bsr	$23, __stxncpy	# do the work of the copy
-
-	unop
-	bne	$18, $multiword	# do we have full words left?
-	subq	$24, 1, $3	# nope
-	subq	$27, 1, $4
-
-	or	$3, $24, $3	# clear the bits between the last
-	or	$4, $27, $4	# written byte and the last byte in COUNT
-	andnot	$3, $4, $4
-	zap	$1, $4, $1
-
-	stq_u	$1, 0($16)
-	ret
-
-	.align	4
-$multiword:
-	subq	$27, 1, $2	# clear the final bits in the prev word
-	or	$2, $27, $2
-	zapnot	$1, $2, $1
-	subq	$18, 1, $18
-
-	stq_u	$1, 0($16)
-	addq	$16, 8, $16
-	unop
-	beq	$18, 1f
-
-	nop
-	unop
-	nop
-	blbc	$18, 0f
-
-	stq_u	$31, 0($16)	# zero one word
-	subq	$18, 1, $18
-	addq	$16, 8, $16
-	beq	$18, 1f
-
-0:	stq_u	$31, 0($16)	# zero two words
-	subq	$18, 2, $18
-	stq_u	$31, 8($16)
-	addq	$16, 16, $16
-	bne	$18, 0b
-
-1:	ldq_u	$1, 0($16)	# clear the leading bits in the final word
-	subq	$24, 1, $2
-	or	$2, $24, $2
-
-	zap	$1, $2, $1
-	stq_u	$1, 0($16)
-$zerolen:
-	ret
-
-	.end	strncpy
-	EXPORT_SYMBOL(strncpy)
diff --git a/arch/alpha/lib/strrchr.S b/arch/alpha/lib/strrchr.S
deleted file mode 100644
index 7650ba99b7e2c877cb26d97351fa09bfe50b0e5e..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/strrchr.S
+++ /dev/null
@@ -1,89 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/strrchr.S
- * Contributed by Richard Henderson (rth@tamu.edu)
- *
- * Return the address of the last occurrence of a given character
- * within a null-terminated string, or null if it is not found.
- */
-#include <asm/export.h>
-#include <asm/regdef.h>
-
-	.set noreorder
-	.set noat
-
-	.align 3
-	.ent strrchr
-	.globl strrchr
-strrchr:
-	.frame sp, 0, ra
-	.prologue 0
-
-	zapnot	a1, 1, a1	# e0    : zero extend our test character
-	mov	zero, t6	# .. e1 : t6 is last match aligned addr
-	sll	a1, 8, t5	# e0    : replicate our test character
-	mov	zero, t8	# .. e1 : t8 is last match byte compare mask
-	or	t5, a1, a1	# e0    :
-	ldq_u   t0, 0(a0)	# .. e1 : load first quadword
-	sll	a1, 16, t5	# e0    :
-	andnot  a0, 7, v0	# .. e1 : align source addr
-	or	t5, a1, a1	# e0    :
-	lda	t4, -1		# .. e1 : build garbage mask
-	sll	a1, 32, t5	# e0    :
-	cmpbge  zero, t0, t1	# .. e1 : bits set iff byte == zero
-	mskqh	t4, a0, t4	# e0    :
-	or	t5, a1, a1	# .. e1 : character replication complete
-	xor	t0, a1, t2	# e0    : make bytes == c zero
-	cmpbge	zero, t4, t4	# .. e1 : bits set iff byte is garbage
-	cmpbge  zero, t2, t3	# e0    : bits set iff byte == c
-	andnot	t1, t4, t1	# .. e1 : clear garbage from null test
-	andnot	t3, t4, t3	# e0    : clear garbage from char test
-	bne	t1, $eos	# .. e1 : did we already hit the terminator?
-
-	/* Character search main loop */
-$loop:
-	ldq	t0, 8(v0)	# e0    : load next quadword
-	cmovne	t3, v0, t6	# .. e1 : save previous comparisons match
-	cmovne	t3, t3, t8	# e0    :
-	addq	v0, 8, v0	# .. e1 :
-	xor	t0, a1, t2	# e0    :
-	cmpbge	zero, t0, t1	# .. e1 : bits set iff byte == zero
-	cmpbge	zero, t2, t3	# e0    : bits set iff byte == c
-	beq	t1, $loop	# .. e1 : if we havnt seen a null, loop
-
-	/* Mask out character matches after terminator */
-$eos:
-	negq	t1, t4		# e0    : isolate first null byte match
-	and	t1, t4, t4	# e1    :
-	subq	t4, 1, t5	# e0    : build a mask of the bytes up to...
-	or	t4, t5, t4	# e1    : ... and including the null
-
-	and	t3, t4, t3	# e0    : mask out char matches after null
-	cmovne	t3, t3, t8	# .. e1 : save it, if match found
-	cmovne	t3, v0, t6	# e0    :
-
-	/* Locate the address of the last matched character */
-
-	/* Retain the early exit for the ev4 -- the ev5 mispredict penalty
-	   is 5 cycles -- the same as just falling through.  */
-	beq	t8, $retnull	# .. e1 :
-
-	and	t8, 0xf0, t2	# e0    : binary search for the high bit set
-	cmovne	t2, t2, t8	# .. e1 (zdb)
-	cmovne	t2, 4, t2	# e0    :
-	and	t8, 0xcc, t1	# .. e1 :
-	cmovne	t1, t1, t8	# e0    :
-	cmovne	t1, 2, t1	# .. e1 :
-	and	t8, 0xaa, t0	# e0    :
-	cmovne	t0, 1, t0	# .. e1 (zdb)
-	addq	t2, t1, t1	# e0    :
-	addq	t6, t0, v0	# .. e1 : add our aligned base ptr to the mix
-	addq	v0, t1, v0	# e0    :
-	ret			# .. e1 :
-
-$retnull:
-	mov	zero, v0	# e0    :
-	ret			# .. e1 :
-
-	.end strrchr
-	EXPORT_SYMBOL(strrchr)
diff --git a/arch/alpha/lib/stxcpy.S b/arch/alpha/lib/stxcpy.S
deleted file mode 100644
index 58723b0a36d4a12c7c28e8be821876152f2b8584..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/stxcpy.S
+++ /dev/null
@@ -1,290 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/stxcpy.S
- * Contributed by Richard Henderson (rth@tamu.edu)
- *
- * Copy a null-terminated string from SRC to DST.
- *
- * This is an internal routine used by strcpy, stpcpy, and strcat.
- * As such, it uses special linkage conventions to make implementation
- * of these public functions more efficient.
- *
- * On input:
- *	t9 = return address
- *	a0 = DST
- *	a1 = SRC
- *
- * On output:
- *	t12 = bitmask (with one bit set) indicating the last byte written
- *	a0  = unaligned address of the last *word* written
- *
- * Furthermore, v0, a3-a5, t11, and t12 are untouched.
- */
-
-#include <asm/regdef.h>
-
-	.set noat
-	.set noreorder
-
-	.text
-
-/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
-   doesn't like putting the entry point for a procedure somewhere in the
-   middle of the procedure descriptor.  Work around this by putting the
-   aligned copy in its own procedure descriptor */
-
-	.ent stxcpy_aligned
-	.align 3
-stxcpy_aligned:
-	.frame sp, 0, t9
-	.prologue 0
-
-	/* On entry to this basic block:
-	   t0 == the first destination word for masking back in
-	   t1 == the first source word.  */
-
-	/* Create the 1st output word and detect 0's in the 1st input word.  */
-	lda	t2, -1		# e1    : build a mask against false zero
-	mskqh	t2, a1, t2	# e0    :   detection in the src word
-	mskqh	t1, a1, t3	# e0    :
-	ornot	t1, t2, t2	# .. e1 :
-	mskql	t0, a1, t0	# e0    : assemble the first output word
-	cmpbge	zero, t2, t8	# .. e1 : bits set iff null found
-	or	t0, t3, t1	# e0    :
-	bne	t8, $a_eos	# .. e1 :
-
-	/* On entry to this basic block:
-	   t0 == the first destination word for masking back in
-	   t1 == a source word not containing a null.  */
-
-$a_loop:
-	stq_u	t1, 0(a0)	# e0    :
-	addq	a0, 8, a0	# .. e1 :
-	ldq_u	t1, 0(a1)	# e0    :
-	addq	a1, 8, a1	# .. e1 :
-	cmpbge	zero, t1, t8	# e0 (stall)
-	beq	t8, $a_loop	# .. e1 (zdb)
-
-	/* Take care of the final (partial) word store.
-	   On entry to this basic block we have:
-	   t1 == the source word containing the null
-	   t8 == the cmpbge mask that found it.  */
-$a_eos:
-	negq	t8, t6		# e0    : find low bit set
-	and	t8, t6, t12	# e1 (stall)
-
-	/* For the sake of the cache, don't read a destination word
-	   if we're not going to need it.  */
-	and	t12, 0x80, t6	# e0    :
-	bne	t6, 1f		# .. e1 (zdb)
-
-	/* We're doing a partial word store and so need to combine
-	   our source and original destination words.  */
-	ldq_u	t0, 0(a0)	# e0    :
-	subq	t12, 1, t6	# .. e1 :
-	zapnot	t1, t6, t1	# e0    : clear src bytes >= null
-	or	t12, t6, t8	# .. e1 :
-	zap	t0, t8, t0	# e0    : clear dst bytes <= null
-	or	t0, t1, t1	# e1    :
-
-1:	stq_u	t1, 0(a0)	# e0    :
-	ret	(t9)		# .. e1 :
-
-	.end stxcpy_aligned
-
-	.align 3
-	.ent __stxcpy
-	.globl __stxcpy
-__stxcpy:
-	.frame sp, 0, t9
-	.prologue 0
-
-	/* Are source and destination co-aligned?  */
-	xor	a0, a1, t0	# e0    :
-	unop			#       :
-	and	t0, 7, t0	# e0    :
-	bne	t0, $unaligned	# .. e1 :
-
-	/* We are co-aligned; take care of a partial first word.  */
-	ldq_u	t1, 0(a1)	# e0    : load first src word
-	and	a0, 7, t0	# .. e1 : take care not to load a word ...
-	addq	a1, 8, a1		# e0    :
-	beq	t0, stxcpy_aligned	# .. e1 : ... if we wont need it
-	ldq_u	t0, 0(a0)	# e0    :
-	br	stxcpy_aligned	# .. e1 :
-
-
-/* The source and destination are not co-aligned.  Align the destination
-   and cope.  We have to be very careful about not reading too much and
-   causing a SEGV.  */
-
-	.align 3
-$u_head:
-	/* We know just enough now to be able to assemble the first
-	   full source word.  We can still find a zero at the end of it
-	   that prevents us from outputting the whole thing.
-
-	   On entry to this basic block:
-	   t0 == the first dest word, for masking back in, if needed else 0
-	   t1 == the low bits of the first source word
-	   t6 == bytemask that is -1 in dest word bytes */
-
-	ldq_u	t2, 8(a1)	# e0    :
-	addq	a1, 8, a1	# .. e1 :
-
-	extql	t1, a1, t1	# e0    :
-	extqh	t2, a1, t4	# e0    :
-	mskql	t0, a0, t0	# e0    :
-	or	t1, t4, t1	# .. e1 :
-	mskqh	t1, a0, t1	# e0    :
-	or	t0, t1, t1	# e1    :
-
-	or	t1, t6, t6	# e0    :
-	cmpbge	zero, t6, t8	# .. e1 :
-	lda	t6, -1		# e0    : for masking just below
-	bne	t8, $u_final	# .. e1 :
-
-	mskql	t6, a1, t6		# e0    : mask out the bits we have
-	or	t6, t2, t2		# e1    :   already extracted before
-	cmpbge	zero, t2, t8		# e0    :   testing eos
-	bne	t8, $u_late_head_exit	# .. e1 (zdb)
-
-	/* Finally, we've got all the stupid leading edge cases taken care
-	   of and we can set up to enter the main loop.  */
-
-	stq_u	t1, 0(a0)	# e0    : store first output word
-	addq	a0, 8, a0	# .. e1 :
-	extql	t2, a1, t0	# e0    : position ho-bits of lo word
-	ldq_u	t2, 8(a1)	# .. e1 : read next high-order source word
-	addq	a1, 8, a1	# e0    :
-	cmpbge	zero, t2, t8	# .. e1 :
-	nop			# e0    :
-	bne	t8, $u_eos	# .. e1 :
-
-	/* Unaligned copy main loop.  In order to avoid reading too much,
-	   the loop is structured to detect zeros in aligned source words.
-	   This has, unfortunately, effectively pulled half of a loop
-	   iteration out into the head and half into the tail, but it does
-	   prevent nastiness from accumulating in the very thing we want
-	   to run as fast as possible.
-
-	   On entry to this basic block:
-	   t0 == the shifted high-order bits from the previous source word
-	   t2 == the unshifted current source word
-
-	   We further know that t2 does not contain a null terminator.  */
-
-	.align 3
-$u_loop:
-	extqh	t2, a1, t1	# e0    : extract high bits for current word
-	addq	a1, 8, a1	# .. e1 :
-	extql	t2, a1, t3	# e0    : extract low bits for next time
-	addq	a0, 8, a0	# .. e1 :
-	or	t0, t1, t1	# e0    : current dst word now complete
-	ldq_u	t2, 0(a1)	# .. e1 : load high word for next time
-	stq_u	t1, -8(a0)	# e0    : save the current word
-	mov	t3, t0		# .. e1 :
-	cmpbge	zero, t2, t8	# e0    : test new word for eos
-	beq	t8, $u_loop	# .. e1 :
-
-	/* We've found a zero somewhere in the source word we just read.
-	   If it resides in the lower half, we have one (probably partial)
-	   word to write out, and if it resides in the upper half, we
-	   have one full and one partial word left to write out.
-
-	   On entry to this basic block:
-	   t0 == the shifted high-order bits from the previous source word
-	   t2 == the unshifted current source word.  */
-$u_eos:
-	extqh	t2, a1, t1	# e0    :
-	or	t0, t1, t1	# e1    : first (partial) source word complete
-
-	cmpbge	zero, t1, t8	# e0    : is the null in this first bit?
-	bne	t8, $u_final	# .. e1 (zdb)
-
-$u_late_head_exit:
-	stq_u	t1, 0(a0)	# e0    : the null was in the high-order bits
-	addq	a0, 8, a0	# .. e1 :
-	extql	t2, a1, t1	# e0    :
-	cmpbge	zero, t1, t8	# .. e1 :
-
-	/* Take care of a final (probably partial) result word.
-	   On entry to this basic block:
-	   t1 == assembled source word
-	   t8 == cmpbge mask that found the null.  */
-$u_final:
-	negq	t8, t6		# e0    : isolate low bit set
-	and	t6, t8, t12	# e1    :
-
-	and	t12, 0x80, t6	# e0    : avoid dest word load if we can
-	bne	t6, 1f		# .. e1 (zdb)
-
-	ldq_u	t0, 0(a0)	# e0    :
-	subq	t12, 1, t6	# .. e1 :
-	or	t6, t12, t8	# e0    :
-	zapnot	t1, t6, t1	# .. e1 : kill source bytes >= null
-	zap	t0, t8, t0	# e0    : kill dest bytes <= null
-	or	t0, t1, t1	# e1    :
-
-1:	stq_u	t1, 0(a0)	# e0    :
-	ret	(t9)		# .. e1 :
-
-	/* Unaligned copy entry point.  */
-	.align 3
-$unaligned:
-
-	ldq_u	t1, 0(a1)	# e0    : load first source word
-
-	and	a0, 7, t4	# .. e1 : find dest misalignment
-	and	a1, 7, t5	# e0    : find src misalignment
-
-	/* Conditionally load the first destination word and a bytemask
-	   with 0xff indicating that the destination byte is sacrosanct.  */
-
-	mov	zero, t0	# .. e1 :
-	mov	zero, t6	# e0    :
-	beq	t4, 1f		# .. e1 :
-	ldq_u	t0, 0(a0)	# e0    :
-	lda	t6, -1		# .. e1 :
-	mskql	t6, a0, t6	# e0    :
-1:
-	subq	a1, t4, a1	# .. e1 : sub dest misalignment from src addr
-
-	/* If source misalignment is larger than dest misalignment, we need
-	   extra startup checks to avoid SEGV.  */
-
-	cmplt	t4, t5, t12	# e0    :
-	beq	t12, $u_head	# .. e1 (zdb)
-
-	lda	t2, -1		# e1    : mask out leading garbage in source
-	mskqh	t2, t5, t2	# e0    :
-	nop			# e0    :
-	ornot	t1, t2, t3	# .. e1 :
-	cmpbge	zero, t3, t8	# e0    : is there a zero?
-	beq	t8, $u_head	# .. e1 (zdb)
-
-	/* At this point we've found a zero in the first partial word of
-	   the source.  We need to isolate the valid source data and mask
-	   it into the original destination data.  (Incidentally, we know
-	   that we'll need at least one byte of that original dest word.) */
-
-	ldq_u	t0, 0(a0)	# e0    :
-
-	negq	t8, t6		# .. e1 : build bitmask of bytes <= zero
-	and	t6, t8, t12	# e0    :
-	and	a1, 7, t5	# .. e1 :
-	subq	t12, 1, t6	# e0    :
-	or	t6, t12, t8	# e1    :
-	srl	t12, t5, t12	# e0    : adjust final null return value
-
-	zapnot	t2, t8, t2	# .. e1 : prepare source word; mirror changes
-	and	t1, t2, t1	# e1    : to source validity mask
-	extql	t2, a1, t2	# .. e0 :
-	extql	t1, a1, t1	# e0    :
-
-	andnot	t0, t2, t0	# .. e1 : zero place for source to reside
-	or	t0, t1, t1	# e1    : and put it there
-	stq_u	t1, 0(a0)	# .. e0 :
-	ret	(t9)		# e1    :
-
-	.end __stxcpy
diff --git a/arch/alpha/lib/stxncpy.S b/arch/alpha/lib/stxncpy.S
deleted file mode 100644
index 011d9091c6e16deb49f8d7b76c47e384bf599b23..0000000000000000000000000000000000000000
--- a/arch/alpha/lib/stxncpy.S
+++ /dev/null
@@ -1,346 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/alpha/lib/stxncpy.S
- * Contributed by Richard Henderson (rth@tamu.edu)
- *
- * Copy no more than COUNT bytes of the null-terminated string from
- * SRC to DST.
- *
- * This is an internal routine used by strncpy, stpncpy, and strncat.
- * As such, it uses special linkage conventions to make implementation
- * of these public functions more efficient.
- *
- * On input:
- *	t9 = return address
- *	a0 = DST
- *	a1 = SRC
- *	a2 = COUNT
- *
- * Furthermore, COUNT may not be zero.
- *
- * On output:
- *	t0  = last word written
- *	t10 = bitmask (with one bit set) indicating the byte position of
- *	      the end of the range specified by COUNT
- *	t12 = bitmask (with one bit set) indicating the last byte written
- *	a0  = unaligned address of the last *word* written
- *	a2  = the number of full words left in COUNT
- *
- * Furthermore, v0, a3-a5, t11, and $at are untouched.
- */
-
-#include <asm/regdef.h>
-
-	.set noat
-	.set noreorder
-
-	.text
-
-/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
-   doesn't like putting the entry point for a procedure somewhere in the
-   middle of the procedure descriptor.  Work around this by putting the
-   aligned copy in its own procedure descriptor */
-
-	.ent stxncpy_aligned
-	.align 3
-stxncpy_aligned:
-	.frame sp, 0, t9, 0
-	.prologue 0
-
-	/* On entry to this basic block:
-	   t0 == the first destination word for masking back in
-	   t1 == the first source word.  */
-
-	/* Create the 1st output word and detect 0's in the 1st input word.  */
-	lda	t2, -1		# e1    : build a mask against false zero
-	mskqh	t2, a1, t2	# e0    :   detection in the src word
-	mskqh	t1, a1, t3	# e0    :
-	ornot	t1, t2, t2	# .. e1 :
-	mskql	t0, a1, t0	# e0    : assemble the first output word
-	cmpbge	zero, t2, t8	# .. e1 : bits set iff null found
-	or	t0, t3, t0	# e0    :
-	beq	a2, $a_eoc	# .. e1 :
-	bne	t8, $a_eos	# .. e1 :
-
-	/* On entry to this basic block:
-	   t0 == a source word not containing a null.  */
-
-$a_loop:
-	stq_u	t0, 0(a0)	# e0    :
-	addq	a0, 8, a0	# .. e1 :
-	ldq_u	t0, 0(a1)	# e0    :
-	addq	a1, 8, a1	# .. e1 :
-	subq	a2, 1, a2	# e0    :
-	cmpbge	zero, t0, t8	# .. e1 (stall)
-	beq	a2, $a_eoc      # e1    :
-	beq	t8, $a_loop	# e1    :
-
-	/* Take care of the final (partial) word store.  At this point
-	   the end-of-count bit is set in t8 iff it applies.
-
-	   On entry to this basic block we have:
-	   t0 == the source word containing the null
-	   t8 == the cmpbge mask that found it.  */
-
-$a_eos:
-	negq	t8, t12		# e0    : find low bit set
-	and	t8, t12, t12	# e1 (stall)
-
-	/* For the sake of the cache, don't read a destination word
-	   if we're not going to need it.  */
-	and	t12, 0x80, t6	# e0    :
-	bne	t6, 1f		# .. e1 (zdb)
-
-	/* We're doing a partial word store and so need to combine
-	   our source and original destination words.  */
-	ldq_u	t1, 0(a0)	# e0    :
-	subq	t12, 1, t6	# .. e1 :
-	or	t12, t6, t8	# e0    :
-	unop			#
-	zapnot	t0, t8, t0	# e0    : clear src bytes > null
-	zap	t1, t8, t1	# .. e1 : clear dst bytes <= null
-	or	t0, t1, t0	# e1    :
-
-1:	stq_u	t0, 0(a0)	# e0    :
-	ret	(t9)		# e1    :
-
-	/* Add the end-of-count bit to the eos detection bitmask.  */
-$a_eoc:
-	or	t10, t8, t8
-	br	$a_eos
-
-	.end stxncpy_aligned
-
-	.align 3
-	.ent __stxncpy
-	.globl __stxncpy
-__stxncpy:
-	.frame sp, 0, t9, 0
-	.prologue 0
-
-	/* Are source and destination co-aligned?  */
-	xor	a0, a1, t1	# e0    :
-	and	a0, 7, t0	# .. e1 : find dest misalignment
-	and	t1, 7, t1	# e0    :
-	addq	a2, t0, a2	# .. e1 : bias count by dest misalignment
-	subq	a2, 1, a2	# e0    :
-	and	a2, 7, t2	# e1    :
-	srl	a2, 3, a2	# e0    : a2 = loop counter = (count - 1)/8
-	addq	zero, 1, t10	# .. e1 :
-	sll	t10, t2, t10	# e0    : t10 = bitmask of last count byte
-	bne	t1, $unaligned	# .. e1 :
-
-	/* We are co-aligned; take care of a partial first word.  */
-
-	ldq_u	t1, 0(a1)	# e0    : load first src word
-	addq	a1, 8, a1	# .. e1 :
-
-	beq	t0, stxncpy_aligned     # avoid loading dest word if not needed
-	ldq_u	t0, 0(a0)	# e0    :
-	br	stxncpy_aligned	# .. e1 :
-
-
-/* The source and destination are not co-aligned.  Align the destination
-   and cope.  We have to be very careful about not reading too much and
-   causing a SEGV.  */
-
-	.align 3
-$u_head:
-	/* We know just enough now to be able to assemble the first
-	   full source word.  We can still find a zero at the end of it
-	   that prevents us from outputting the whole thing.
-
-	   On entry to this basic block:
-	   t0 == the first dest word, unmasked
-	   t1 == the shifted low bits of the first source word
-	   t6 == bytemask that is -1 in dest word bytes */
-
-	ldq_u	t2, 8(a1)	# e0    : load second src word
-	addq	a1, 8, a1	# .. e1 :
-	mskql	t0, a0, t0	# e0    : mask trailing garbage in dst
-	extqh	t2, a1, t4	# e0    :
-	or	t1, t4, t1	# e1    : first aligned src word complete
-	mskqh	t1, a0, t1	# e0    : mask leading garbage in src
-	or	t0, t1, t0	# e0    : first output word complete
-	or	t0, t6, t6	# e1    : mask original data for zero test
-	cmpbge	zero, t6, t8	# e0    :
-	beq	a2, $u_eocfin	# .. e1 :
-	lda	t6, -1		# e0    :
-	bne	t8, $u_final	# .. e1 :
-
-	mskql	t6, a1, t6	# e0    : mask out bits already seen
-	nop			# .. e1 :
-	stq_u	t0, 0(a0)	# e0    : store first output word
-	or      t6, t2, t2	# .. e1 :
-	cmpbge	zero, t2, t8	# e0    : find nulls in second partial
-	addq	a0, 8, a0	# .. e1 :
-	subq	a2, 1, a2	# e0    :
-	bne	t8, $u_late_head_exit	# .. e1 :
-
-	/* Finally, we've got all the stupid leading edge cases taken care
-	   of and we can set up to enter the main loop.  */
-
-	extql	t2, a1, t1	# e0    : position hi-bits of lo word
-	beq	a2, $u_eoc	# .. e1 :
-	ldq_u	t2, 8(a1)	# e0    : read next high-order source word
-	addq	a1, 8, a1	# .. e1 :
-	extqh	t2, a1, t0	# e0    : position lo-bits of hi word (stall)
-	cmpbge	zero, t2, t8	# .. e1 :
-	nop			# e0    :
-	bne	t8, $u_eos	# .. e1 :
-
-	/* Unaligned copy main loop.  In order to avoid reading too much,
-	   the loop is structured to detect zeros in aligned source words.
-	   This has, unfortunately, effectively pulled half of a loop
-	   iteration out into the head and half into the tail, but it does
-	   prevent nastiness from accumulating in the very thing we want
-	   to run as fast as possible.
-
-	   On entry to this basic block:
-	   t0 == the shifted low-order bits from the current source word
-	   t1 == the shifted high-order bits from the previous source word
-	   t2 == the unshifted current source word
-
-	   We further know that t2 does not contain a null terminator.  */
-
-	.align 3
-$u_loop:
-	or	t0, t1, t0	# e0    : current dst word now complete
-	subq	a2, 1, a2	# .. e1 : decrement word count
-	stq_u	t0, 0(a0)	# e0    : save the current word
-	addq	a0, 8, a0	# .. e1 :
-	extql	t2, a1, t1	# e0    : extract high bits for next time
-	beq	a2, $u_eoc	# .. e1 :
-	ldq_u	t2, 8(a1)	# e0    : load high word for next time
-	addq	a1, 8, a1	# .. e1 :
-	nop			# e0    :
-	cmpbge	zero, t2, t8	# e1    : test new word for eos (stall)
-	extqh	t2, a1, t0	# e0    : extract low bits for current word
-	beq	t8, $u_loop	# .. e1 :
-
-	/* We've found a zero somewhere in the source word we just read.
-	   If it resides in the lower half, we have one (probably partial)
-	   word to write out, and if it resides in the upper half, we
-	   have one full and one partial word left to write out.
-
-	   On entry to this basic block:
-	   t0 == the shifted low-order bits from the current source word
-	   t1 == the shifted high-order bits from the previous source word
-	   t2 == the unshifted current source word.  */
-$u_eos:
-	or	t0, t1, t0	# e0    : first (partial) source word complete
-	nop			# .. e1 :
-	cmpbge	zero, t0, t8	# e0    : is the null in this first bit?
-	bne	t8, $u_final	# .. e1 (zdb)
-
-	stq_u	t0, 0(a0)	# e0    : the null was in the high-order bits
-	addq	a0, 8, a0	# .. e1 :
-	subq	a2, 1, a2	# e1    :
-
-$u_late_head_exit:
-	extql	t2, a1, t0	# .. e0 :
-	cmpbge	zero, t0, t8	# e0    :
-	or	t8, t10, t6	# e1    :
-	cmoveq	a2, t6, t8	# e0    :
-	nop			# .. e1 :
-
-	/* Take care of a final (probably partial) result word.
-	   On entry to this basic block:
-	   t0 == assembled source word
-	   t8 == cmpbge mask that found the null.  */
-$u_final:
-	negq	t8, t6		# e0    : isolate low bit set
-	and	t6, t8, t12	# e1    :
-
-	and	t12, 0x80, t6	# e0    : avoid dest word load if we can
-	bne	t6, 1f		# .. e1 (zdb)
-
-	ldq_u	t1, 0(a0)	# e0    :
-	subq	t12, 1, t6	# .. e1 :
-	or	t6, t12, t8	# e0    :
-	zapnot	t0, t8, t0	# .. e1 : kill source bytes > null
-	zap	t1, t8, t1	# e0    : kill dest bytes <= null
-	or	t0, t1, t0	# e1    :
-
-1:	stq_u	t0, 0(a0)	# e0    :
-	ret	(t9)		# .. e1 :
-
-	/* Got to end-of-count before end of string.  
-	   On entry to this basic block:
-	   t1 == the shifted high-order bits from the previous source word  */
-$u_eoc:
-	and	a1, 7, t6	# e1    :
-	sll	t10, t6, t6	# e0    :
-	and	t6, 0xff, t6	# e0    :
-	bne	t6, 1f		# .. e1 :
-
-	ldq_u	t2, 8(a1)	# e0    : load final src word
-	nop			# .. e1 :
-	extqh	t2, a1, t0	# e0    : extract low bits for last word
-	or	t1, t0, t1	# e1    :
-
-1:	cmpbge	zero, t1, t8
-	mov	t1, t0
-
-$u_eocfin:			# end-of-count, final word
-	or	t10, t8, t8
-	br	$u_final
-
-	/* Unaligned copy entry point.  */
-	.align 3
-$unaligned:
-
-	ldq_u	t1, 0(a1)	# e0    : load first source word
-
-	and	a0, 7, t4	# .. e1 : find dest misalignment
-	and	a1, 7, t5	# e0    : find src misalignment
-
-	/* Conditionally load the first destination word and a bytemask
-	   with 0xff indicating that the destination byte is sacrosanct.  */
-
-	mov	zero, t0	# .. e1 :
-	mov	zero, t6	# e0    :
-	beq	t4, 1f		# .. e1 :
-	ldq_u	t0, 0(a0)	# e0    :
-	lda	t6, -1		# .. e1 :
-	mskql	t6, a0, t6	# e0    :
-	subq	a1, t4, a1	# .. e1 : sub dest misalignment from src addr
-
-	/* If source misalignment is larger than dest misalignment, we need
-	   extra startup checks to avoid SEGV.  */
-
-1:	cmplt	t4, t5, t12	# e1    :
-	extql	t1, a1, t1	# .. e0 : shift src into place
-	lda	t2, -1		# e0    : for creating masks later
-	beq	t12, $u_head	# .. e1 :
-
-	extql	t2, a1, t2	# e0    :
-	cmpbge	zero, t1, t8	# .. e1 : is there a zero?
-	andnot	t2, t6, t2	# e0    : dest mask for a single word copy
-	or	t8, t10, t5	# .. e1 : test for end-of-count too
-	cmpbge	zero, t2, t3	# e0    :
-	cmoveq	a2, t5, t8	# .. e1 :
-	andnot	t8, t3, t8	# e0    :
-	beq	t8, $u_head	# .. e1 (zdb)
-
-	/* At this point we've found a zero in the first partial word of
-	   the source.  We need to isolate the valid source data and mask
-	   it into the original destination data.  (Incidentally, we know
-	   that we'll need at least one byte of that original dest word.) */
-
-	ldq_u	t0, 0(a0)	# e0    :
-	negq	t8, t6		# .. e1 : build bitmask of bytes <= zero
-	mskqh	t1, t4, t1	# e0    :
-	and	t6, t8, t12	# .. e1 :
-	subq	t12, 1, t6	# e0    :
-	or	t6, t12, t8	# e1    :
-
-	zapnot	t2, t8, t2	# e0    : prepare source word; mirror changes
-	zapnot	t1, t8, t1	# .. e1 : to source validity mask
-
-	andnot	t0, t2, t0	# e0    : zero place for source to reside
-	or	t0, t1, t0	# e1    : and put it there
-	stq_u	t0, 0(a0)	# e0    :
-	ret	(t9)		# .. e1 :
-
-	.end __stxncpy
diff --git a/arch/alpha/math-emu/qrnnd.S b/arch/alpha/math-emu/qrnnd.S
deleted file mode 100644
index d6373ec1bff9e99a9f65368d713a2f42ecd0bf15..0000000000000000000000000000000000000000
--- a/arch/alpha/math-emu/qrnnd.S
+++ /dev/null
@@ -1,163 +0,0 @@
- # Alpha 21064 __udiv_qrnnd
- # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
-
- # This file is part of GCC.
-
- # The GNU MP Library is free software; you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation; either version 2 of the License, or (at your
- # option) any later version.
-
- # In addition to the permissions in the GNU General Public License, the
- # Free Software Foundation gives you unlimited permission to link the
- # compiled version of this file with other programs, and to distribute
- # those programs without any restriction coming from the use of this
- # file.  (The General Public License restrictions do apply in other
- # respects; for example, they cover modification of the file, and
- # distribution when not linked into another program.)
-
- # This file is distributed in the hope that it will be useful, but
- # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
- # License for more details.
-
- # You should have received a copy of the GNU General Public License
- # along with GCC; see the file COPYING.  If not, write to the 
- # Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- # MA 02111-1307, USA.
-
-        .set noreorder
-        .set noat
-
-	.text
-
-	.globl __udiv_qrnnd
-	.ent __udiv_qrnnd
-__udiv_qrnnd:
-	.frame $30,0,$26,0
-	.prologue 0
-
-#define cnt	$2
-#define tmp	$3
-#define rem_ptr	$16
-#define n1	$17
-#define n0	$18
-#define d	$19
-#define qb	$20
-#define AT	$at
-
-	ldiq	cnt,16
-	blt	d,$largedivisor
-
-$loop1:	cmplt	n0,0,tmp
-	addq	n1,n1,n1
-	bis	n1,tmp,n1
-	addq	n0,n0,n0
-	cmpule	d,n1,qb
-	subq	n1,d,tmp
-	cmovne	qb,tmp,n1
-	bis	n0,qb,n0
-	cmplt	n0,0,tmp
-	addq	n1,n1,n1
-	bis	n1,tmp,n1
-	addq	n0,n0,n0
-	cmpule	d,n1,qb
-	subq	n1,d,tmp
-	cmovne	qb,tmp,n1
-	bis	n0,qb,n0
-	cmplt	n0,0,tmp
-	addq	n1,n1,n1
-	bis	n1,tmp,n1
-	addq	n0,n0,n0
-	cmpule	d,n1,qb
-	subq	n1,d,tmp
-	cmovne	qb,tmp,n1
-	bis	n0,qb,n0
-	cmplt	n0,0,tmp
-	addq	n1,n1,n1
-	bis	n1,tmp,n1
-	addq	n0,n0,n0
-	cmpule	d,n1,qb
-	subq	n1,d,tmp
-	cmovne	qb,tmp,n1
-	bis	n0,qb,n0
-	subq	cnt,1,cnt
-	bgt	cnt,$loop1
-	stq	n1,0(rem_ptr)
-	bis	$31,n0,$0
-	ret	$31,($26),1
-
-$largedivisor:
-	and	n0,1,$4
-
-	srl	n0,1,n0
-	sll	n1,63,tmp
-	or	tmp,n0,n0
-	srl	n1,1,n1
-
-	and	d,1,$6
-	srl	d,1,$5
-	addq	$5,$6,$5
-
-$loop2:	cmplt	n0,0,tmp
-	addq	n1,n1,n1
-	bis	n1,tmp,n1
-	addq	n0,n0,n0
-	cmpule	$5,n1,qb
-	subq	n1,$5,tmp
-	cmovne	qb,tmp,n1
-	bis	n0,qb,n0
-	cmplt	n0,0,tmp
-	addq	n1,n1,n1
-	bis	n1,tmp,n1
-	addq	n0,n0,n0
-	cmpule	$5,n1,qb
-	subq	n1,$5,tmp
-	cmovne	qb,tmp,n1
-	bis	n0,qb,n0
-	cmplt	n0,0,tmp
-	addq	n1,n1,n1
-	bis	n1,tmp,n1
-	addq	n0,n0,n0
-	cmpule	$5,n1,qb
-	subq	n1,$5,tmp
-	cmovne	qb,tmp,n1
-	bis	n0,qb,n0
-	cmplt	n0,0,tmp
-	addq	n1,n1,n1
-	bis	n1,tmp,n1
-	addq	n0,n0,n0
-	cmpule	$5,n1,qb
-	subq	n1,$5,tmp
-	cmovne	qb,tmp,n1
-	bis	n0,qb,n0
-	subq	cnt,1,cnt
-	bgt	cnt,$loop2
-
-	addq	n1,n1,n1
-	addq	$4,n1,n1
-	bne	$6,$Odd
-	stq	n1,0(rem_ptr)
-	bis	$31,n0,$0
-	ret	$31,($26),1
-
-$Odd:
-	/* q' in n0. r' in n1 */
-	addq	n1,n0,n1
-
-	cmpult	n1,n0,tmp	# tmp := carry from addq
-	subq	n1,d,AT
-	addq	n0,tmp,n0
-	cmovne	tmp,AT,n1
-
-	cmpult	n1,d,tmp
-	addq	n0,1,AT
-	cmoveq	tmp,AT,n0
-	subq	n1,d,AT
-	cmoveq	tmp,AT,n1
-
-	stq	n1,0(rem_ptr)
-	bis	$31,n0,$0
-	ret	$31,($26),1
-
-	.end	__udiv_qrnnd
diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S
deleted file mode 100644
index 02c4614847611e5e55bbe88f00d102cd73ef0f84..0000000000000000000000000000000000000000
--- a/arch/arc/kernel/ctx_sw_asm.S
+++ /dev/null
@@ -1,60 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * Vineetg: Aug 2009
- *  -Moved core context switch macro out of entry.S into this file.
- *  -This is the more "natural" hand written assembler
- */
-
-#include <linux/linkage.h>
-#include <asm/entry.h>       /* For the SAVE_* macros */
-#include <asm/asm-offsets.h>
-
-#define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
-
-;################### Low Level Context Switch ##########################
-
-	.section .sched.text,"ax",@progbits
-	.align 4
-	.global __switch_to
-	.type   __switch_to, @function
-__switch_to:
-	CFI_STARTPROC
-
-	/* Save regs on kernel mode stack of task */
-	st.a    blink, [sp, -4]
-	st.a    fp, [sp, -4]
-	SAVE_CALLEE_SAVED_KERNEL
-
-	/* Save the now KSP in task->thread.ksp */
-#if KSP_WORD_OFF  <= 255
-	st.as  sp, [r0, KSP_WORD_OFF]
-#else
-	/* Workaround for NR_CPUS=4k as ST.as can only take s9 offset */
-	add2	r24, r0, KSP_WORD_OFF
-	st	sp, [r24]
-#endif
-	/*
-	* Return last task in r0 (return reg)
-	* On ARC, Return reg = First Arg reg = r0.
-	* Since we already have last task in r0,
-	* don't need to do anything special to return it
-	*/
-
-	/*
-	 * switch to new task, contained in r1
-	 * Temp reg r3 is required to get the ptr to store val
-	 */
-	SET_CURR_TASK_ON_CPU  r1, r3
-
-	/* reload SP with kernel mode stack pointer in task->thread.ksp */
-	ld.as  sp, [r1, (TASK_THREAD + THREAD_KSP)/4]
-
-	/* restore the registers */
-	RESTORE_CALLEE_SAVED_KERNEL
-	ld.ab   fp, [sp, 4]
-	ld.ab   blink, [sp, 4]
-	j       [blink]
-
-END_CFI(__switch_to)
diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
deleted file mode 100644
index 12d5f12d10d23b96cbb2ace74a2629aa1ac7257a..0000000000000000000000000000000000000000
--- a/arch/arc/kernel/entry-arcv2.S
+++ /dev/null
@@ -1,263 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * ARCv2 ISA based core Low Level Intr/Traps/Exceptions(non-TLB) Handling
- *
- * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
- */
-
-#include <linux/linkage.h>   /* ARC_{EXTRY,EXIT} */
-#include <asm/entry.h>       /* SAVE_ALL_{INT1,INT2,TRAP...} */
-#include <asm/errno.h>
-#include <asm/arcregs.h>
-#include <asm/irqflags.h>
-
-; A maximum number of supported interrupts in the core interrupt controller.
-; This number is not equal to the maximum interrupt number (256) because
-; first 16 lines are reserved for exceptions and are not configurable.
-#define NR_CPU_IRQS	240
-
-	.cpu HS
-
-#define VECTOR	.word
-
-;############################ Vector Table #################################
-
-	.section .vector,"a",@progbits
-	.align 4
-
-# Initial 16 slots are Exception Vectors
-VECTOR	res_service		; Reset Vector
-VECTOR	mem_service		; Mem exception
-VECTOR	instr_service		; Instrn Error
-VECTOR	EV_MachineCheck		; Fatal Machine check
-VECTOR	EV_TLBMissI		; Intruction TLB miss
-VECTOR	EV_TLBMissD		; Data TLB miss
-VECTOR	EV_TLBProtV		; Protection Violation
-VECTOR	EV_PrivilegeV		; Privilege Violation
-VECTOR	EV_SWI			; Software Breakpoint
-VECTOR	EV_Trap			; Trap exception
-VECTOR	EV_Extension		; Extn Instruction Exception
-VECTOR	EV_DivZero		; Divide by Zero
-VECTOR	EV_DCError		; Data Cache Error
-VECTOR	EV_Misaligned		; Misaligned Data Access
-VECTOR	reserved		; Reserved slots
-VECTOR	reserved		; Reserved slots
-
-# Begin Interrupt Vectors
-VECTOR	handle_interrupt	; (16) Timer0
-VECTOR	handle_interrupt	; unused (Timer1)
-VECTOR	handle_interrupt	; unused (WDT)
-VECTOR	handle_interrupt	; (19) Inter core Interrupt (IPI)
-VECTOR	handle_interrupt	; (20) perf Interrupt
-VECTOR	handle_interrupt	; (21) Software Triggered Intr (Self IPI)
-VECTOR	handle_interrupt	; unused
-VECTOR	handle_interrupt	; (23) unused
-# End of fixed IRQs
-
-.rept NR_CPU_IRQS - 8
-	VECTOR	handle_interrupt
-.endr
-
-	.section .text, "ax",@progbits
-
-reserved:
-	flag 1		; Unexpected event, halt
-
-;##################### Interrupt Handling ##############################
-
-ENTRY(handle_interrupt)
-
-	INTERRUPT_PROLOGUE
-
-	# irq control APIs local_irq_save/restore/disable/enable fiddle with
-	# global interrupt enable bits in STATUS32 (.IE for 1 prio, .E[] for 2 prio)
-	# However a taken interrupt doesn't clear these bits. Thus irqs_disabled()
-	# query in hard ISR path would return false (since .IE is set) which would
-	# trips genirq interrupt handling asserts.
-	#
-	# So do a "soft" disable of interrutps here.
-	#
-	# Note this disable is only for consistent book-keeping as further interrupts
-	# will be disabled anyways even w/o this. Hardware tracks active interrupts
-	# seperately in AUX_IRQ_ACT.active and will not take new interrupts
-	# unless this one returns (or higher prio becomes pending in 2-prio scheme)
-
-	IRQ_DISABLE
-
-	; icause is banked: one per priority level
-	; so a higher prio interrupt taken here won't clobber prev prio icause
-	lr  r0, [ICAUSE]
-	mov   blink, ret_from_exception
-
-	b.d  arch_do_IRQ
-	mov r1, sp
-
-END(handle_interrupt)
-
-;################### Non TLB Exception Handling #############################
-
-ENTRY(EV_SWI)
-	; TODO: implement this
-	EXCEPTION_PROLOGUE
-	b   ret_from_exception
-END(EV_SWI)
-
-ENTRY(EV_DivZero)
-	; TODO: implement this
-	EXCEPTION_PROLOGUE
-	b   ret_from_exception
-END(EV_DivZero)
-
-ENTRY(EV_DCError)
-	; TODO: implement this
-	EXCEPTION_PROLOGUE
-	b   ret_from_exception
-END(EV_DCError)
-
-; ---------------------------------------------
-; Memory Error Exception Handler
-;   - Unlike ARCompact, handles Bus errors for both User/Kernel mode,
-;     Instruction fetch or Data access, under a single Exception Vector
-; ---------------------------------------------
-
-ENTRY(mem_service)
-
-	EXCEPTION_PROLOGUE
-
-	lr  r0, [efa]
-	mov r1, sp
-
-	FAKE_RET_FROM_EXCPN
-
-	bl  do_memory_error
-	b   ret_from_exception
-END(mem_service)
-
-ENTRY(EV_Misaligned)
-
-	EXCEPTION_PROLOGUE
-
-	lr  r0, [efa]	; Faulting Data address
-	mov r1, sp
-
-	FAKE_RET_FROM_EXCPN
-
-	SAVE_CALLEE_SAVED_USER
-	mov r2, sp              ; callee_regs
-
-	bl  do_misaligned_access
-
-	; TBD: optimize - do this only if a callee reg was involved
-	; either a dst of emulated LD/ST or src with address-writeback
-	RESTORE_CALLEE_SAVED_USER
-
-	b   ret_from_exception
-END(EV_Misaligned)
-
-; ---------------------------------------------
-; Protection Violation Exception Handler
-; ---------------------------------------------
-
-ENTRY(EV_TLBProtV)
-
-	EXCEPTION_PROLOGUE
-
-	lr  r0, [efa]	; Faulting Data address
-	mov r1, sp	; pt_regs
-
-	FAKE_RET_FROM_EXCPN
-
-	mov blink, ret_from_exception
-	b   do_page_fault
-
-END(EV_TLBProtV)
-
-; From Linux standpoint Slow Path I/D TLB Miss is same a ProtV as they
-; need to call do_page_fault().
-; ECR in pt_regs provides whether access was R/W/X
-
-.global        call_do_page_fault
-.set call_do_page_fault, EV_TLBProtV
-
-;############# Common Handlers for ARCompact and ARCv2 ##############
-
-#include "entry.S"
-
-;############# Return from Intr/Excp/Trap (ARCv2 ISA Specifics) ##############
-;
-; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
-; IRQ shd definitely not happen between now and rtie
-; All 2 entry points to here already disable interrupts
-
-.Lrestore_regs:
-restore_regs:
-
-	# Interrpts are actually disabled from this point on, but will get
-	# reenabled after we return from interrupt/exception.
-	# But irq tracer needs to be told now...
-	TRACE_ASM_IRQ_ENABLE
-
-	ld	r0, [sp, PT_status32]	; U/K mode at time of entry
-	lr	r10, [AUX_IRQ_ACT]
-
-	bmsk	r11, r10, 15		; extract AUX_IRQ_ACT.active
-	breq	r11, 0, .Lexcept_ret	; No intr active, ret from Exception
-
-;####### Return from Intr #######
-
-.Lisr_ret:
-
-debug_marker_l1:
-	; bbit1.nt r0, STATUS_DE_BIT, .Lintr_ret_to_delay_slot
-	btst	r0, STATUS_DE_BIT		; Z flag set if bit clear
-	bnz	.Lintr_ret_to_delay_slot	; branch if STATUS_DE_BIT set
-
-	; Handle special case #1: (Entry via Exception, Return via IRQ)
-	;
-	; Exception in U mode, preempted in kernel, Intr taken (K mode), orig
-	; task now returning to U mode (riding the Intr)
-	; AUX_IRQ_ACTIVE won't have U bit set (since intr in K mode), hence SP
-	; won't be switched to correct U mode value (from AUX_SP)
-	; So force AUX_IRQ_ACT.U for such a case
-
-	btst	r0, STATUS_U_BIT		; Z flag set if K (Z clear for U)
-	bset.nz	r11, r11, AUX_IRQ_ACT_BIT_U	; NZ means U
-	sr	r11, [AUX_IRQ_ACT]
-
-	INTERRUPT_EPILOGUE
-	rtie
-
-;####### Return from Exception / pure kernel mode #######
-
-.Lexcept_ret:	; Expects r0 has PT_status32
-
-debug_marker_syscall:
-	EXCEPTION_EPILOGUE
-	rtie
-
-;####### Return from Intr to insn in delay slot #######
-
-; Handle special case #2: (Entry via Exception in Delay Slot, Return via IRQ)
-;
-; Intr returning to a Delay Slot (DS) insn
-; (since IRQ NOT allowed in DS in ARCv2, this can only happen if orig
-; entry was via Exception in DS which got preempted in kernel).
-;
-; IRQ RTIE won't reliably restore DE bit and/or BTA, needs workaround
-;
-; Solution is to drop out of interrupt context into pure kernel mode
-; and return from pure kernel mode which does right things for delay slot
-
-.Lintr_ret_to_delay_slot:
-debug_marker_ds:
-
-	ld	r2, [@intr_to_DE_cnt]
-	add	r2, r2, 1
-	st	r2, [@intr_to_DE_cnt]
-
-	; drop out of interrupt context (clear AUX_IRQ_ACT.active)
-	bmskn	r11, r10, 15
-	sr	r11, [AUX_IRQ_ACT]
-	b	.Lexcept_ret
-
-END(ret_from_exception)
diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S
deleted file mode 100644
index 5cb0cd7e4eabaff15d8e4bc3afa356042615b8fe..0000000000000000000000000000000000000000
--- a/arch/arc/kernel/entry-compact.S
+++ /dev/null
@@ -1,403 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARCompact ISA
- *
- * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * vineetg: May 2011
- *  -Userspace unaligned access emulation
- *
- * vineetg: Feb 2011 (ptrace low level code fixes)
- *  -traced syscall return code (r0) was not saved into pt_regs for restoring
- *   into user reg-file when traded task rets to user space.
- *  -syscalls needing arch-wrappers (mainly for passing sp as pt_regs)
- *   were not invoking post-syscall trace hook (jumping directly into
- *   ret_from_system_call)
- *
- * vineetg: Nov 2010:
- *  -Vector table jumps (@8 bytes) converted into branches (@4 bytes)
- *  -To maintain the slot size of 8 bytes/vector, added nop, which is
- *   not executed at runtime.
- *
- * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK)
- *  -do_signal()invoked upon TIF_RESTORE_SIGMASK as well
- *  -Wrappers for sys_{,rt_}sigsuspend() no longer needed as they don't
- *   need ptregs anymore
- *
- * Vineetg: Oct 2009
- *  -In a rare scenario, Process gets a Priv-V exception and gets scheduled
- *   out. Since we don't do FAKE RTIE for Priv-V, CPU exception state remains
- *   active (AE bit enabled).  This causes a double fault for a subseq valid
- *   exception. Thus FAKE RTIE needed in low level Priv-Violation handler.
- *   Instr Error could also cause similar scenario, so same there as well.
- *
- * Vineetg: March 2009 (Supporting 2 levels of Interrupts)
- *
- * Vineetg: Aug 28th 2008: Bug #94984
- *  -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
- *   Normally CPU does this automatically, however when doing FAKE rtie,
- *   we need to explicitly do this. The problem in macros
- *   FAKE_RET_FROM_EXCPN and FAKE_RET_FROM_EXCPN_LOCK_IRQ was that this bit
- *   was being "CLEARED" rather then "SET". Since it is Loop INHIBIT Bit,
- *   setting it and not clearing it clears ZOL context
- *
- * Vineetg: May 16th, 2008
- *  - r25 now contains the Current Task when in kernel
- *
- * Vineetg: Dec 22, 2007
- *    Minor Surgery of Low Level ISR to make it SMP safe
- *    - MMU_SCRATCH0 Reg used for freeing up r9 in Level 1 ISR
- *    - _current_task is made an array of NR_CPUS
- *    - Access of _current_task wrapped inside a macro so that if hardware
- *       team agrees for a dedicated reg, no other code is touched
- *
- * Amit Bhor, Rahul Trivedi, Kanika Nema, Sameer Dhavale : Codito Tech 2004
- */
-
-#include <linux/errno.h>
-#include <linux/linkage.h>	/* {ENTRY,EXIT} */
-#include <asm/entry.h>
-#include <asm/irqflags.h>
-
-	.cpu A7
-
-;############################ Vector Table #################################
-
-.macro VECTOR  lbl
-#if 1   /* Just in case, build breaks */
-	j   \lbl
-#else
-	b   \lbl
-	nop
-#endif
-.endm
-
-	.section .vector, "ax",@progbits
-	.align 4
-
-/* Each entry in the vector table must occupy 2 words. Since it is a jump
- * across sections (.vector to .text) we are guaranteed that 'j somewhere'
- * will use the 'j limm' form of the instruction as long as somewhere is in
- * a section other than .vector.
- */
-
-; ********* Critical System Events **********************
-VECTOR   res_service             ; 0x0, Reset Vector	(0x0)
-VECTOR   mem_service             ; 0x8, Mem exception   (0x1)
-VECTOR   instr_service           ; 0x10, Instrn Error   (0x2)
-
-; ******************** Device ISRs **********************
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
-VECTOR   handle_interrupt_level2
-#else
-VECTOR   handle_interrupt_level1
-#endif
-
-.rept   28
-VECTOR   handle_interrupt_level1 ; Other devices
-.endr
-
-/* FOR ARC600: timer = 0x3, uart = 0x8, emac = 0x10 */
-
-; ******************** Exceptions **********************
-VECTOR   EV_MachineCheck         ; 0x100, Fatal Machine check   (0x20)
-VECTOR   EV_TLBMissI             ; 0x108, Instruction TLB miss  (0x21)
-VECTOR   EV_TLBMissD             ; 0x110, Data TLB miss         (0x22)
-VECTOR   EV_TLBProtV             ; 0x118, Protection Violation  (0x23)
-				 ;         or Misaligned Access
-VECTOR   EV_PrivilegeV           ; 0x120, Privilege Violation   (0x24)
-VECTOR   EV_Trap                 ; 0x128, Trap exception        (0x25)
-VECTOR   EV_Extension            ; 0x130, Extn Instruction Excp (0x26)
-
-.rept   24
-VECTOR   reserved                ; Reserved Exceptions
-.endr
-
-
-;##################### Scratch Mem for IRQ stack switching #############
-
-ARCFP_DATA int1_saved_reg
-	.align 32
-	.type   int1_saved_reg, @object
-	.size   int1_saved_reg, 4
-int1_saved_reg:
-	.zero 4
-
-/* Each Interrupt level needs its own scratch */
-ARCFP_DATA int2_saved_reg
-	.type   int2_saved_reg, @object
-	.size   int2_saved_reg, 4
-int2_saved_reg:
-	.zero 4
-
-; ---------------------------------------------
-	.section .text, "ax",@progbits
-
-
-reserved:
-	flag 1		; Unexpected event, halt
-
-;##################### Interrupt Handling ##############################
-
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
-; ---------------------------------------------
-;  Level 2 ISR: Can interrupt a Level 1 ISR
-; ---------------------------------------------
-ENTRY(handle_interrupt_level2)
-
-	INTERRUPT_PROLOGUE 2
-
-	;------------------------------------------------------
-	; if L2 IRQ interrupted a L1 ISR, disable preemption
-	;
-	; This is to avoid a potential L1-L2-L1 scenario
-	;  -L1 IRQ taken
-	;  -L2 interrupts L1 (before L1 ISR could run)
-	;  -preemption off IRQ, user task in syscall picked to run
-	;  -RTIE to userspace
-	;	Returns from L2 context fine
-	;	But both L1 and L2 re-enabled, so another L1 can be taken
-	;	while prev L1 is still unserviced
-	;
-	;------------------------------------------------------
-
-	; L2 interrupting L1 implies both L2 and L1 active
-	; However both A2 and A1 are NOT set in STATUS32, thus
-	; need to check STATUS32_L2 to determine if L1 was active
-
-	ld r9, [sp, PT_status32]        ; get statu32_l2 (saved in pt_regs)
-	bbit0 r9, STATUS_A1_BIT, 1f     ; L1 not active when L2 IRQ, so normal
-
-	; bump thread_info->preempt_count (Disable preemption)
-	GET_CURR_THR_INFO_FROM_SP   r10
-	ld      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
-	add     r9, r9, 1
-	st      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
-
-1:
-	;------------------------------------------------------
-	; setup params for Linux common ISR and invoke it
-	;------------------------------------------------------
-	lr  r0, [icause2]
-	and r0, r0, 0x1f
-
-	bl.d  @arch_do_IRQ
-	mov r1, sp
-
-	mov r8,0x2
-	sr r8, [AUX_IRQ_LV12]       ; clear bit in Sticky Status Reg
-
-	b   ret_from_exception
-
-END(handle_interrupt_level2)
-
-#endif
-
-; ---------------------------------------------
-; User Mode Memory Bus Error Interrupt Handler
-; (Kernel mode memory errors handled via separate exception vectors)
-; ---------------------------------------------
-ENTRY(mem_service)
-
-	INTERRUPT_PROLOGUE 2
-
-	mov r0, ilink2
-	mov r1, sp
-
-	; User process needs to be killed with SIGBUS, but first need to get
-	; out of the L2 interrupt context (drop to pure kernel mode) and jump
-	; off to "C" code where SIGBUS in enqueued
-	lr  r3, [status32]
-	bclr r3, r3, STATUS_A2_BIT
-	or  r3, r3, (STATUS_E1_MASK|STATUS_E2_MASK)
-	sr  r3, [status32_l2]
-	mov ilink2, 1f
-	rtie
-1:
-	bl  do_memory_error
-	b   ret_from_exception
-END(mem_service)
-
-; ---------------------------------------------
-;  Level 1 ISR
-; ---------------------------------------------
-ENTRY(handle_interrupt_level1)
-
-	INTERRUPT_PROLOGUE 1
-
-	lr  r0, [icause1]
-	and r0, r0, 0x1f
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-	; icause1 needs to be read early, before calling tracing, which
-	; can clobber scratch regs, hence use of stack to stash it
-	push r0
-	TRACE_ASM_IRQ_DISABLE
-	pop  r0
-#endif
-
-	bl.d  @arch_do_IRQ
-	mov r1, sp
-
-	mov r8,0x1
-	sr r8, [AUX_IRQ_LV12]       ; clear bit in Sticky Status Reg
-
-	b   ret_from_exception
-END(handle_interrupt_level1)
-
-;################### Non TLB Exception Handling #############################
-
-; ---------------------------------------------
-; Protection Violation Exception Handler
-; ---------------------------------------------
-
-ENTRY(EV_TLBProtV)
-
-	EXCEPTION_PROLOGUE
-
-	mov r2, r10	; ECR set into r10 already
-	lr  r0, [efa]	; Faulting Data address (not part of pt_regs saved above)
-
-	; Exception auto-disables further Intr/exceptions.
-	; Re-enable them by pretending to return from exception
-	; (so rest of handler executes in pure K mode)
-
-	FAKE_RET_FROM_EXCPN
-
-	mov   r1, sp	; Handle to pt_regs
-
-	;------ (5) Type of Protection Violation? ----------
-	;
-	; ProtV Hardware Exception is triggered for Access Faults of 2 types
-	;   -Access Violation	: 00_23_(00|01|02|03)_00
-	;			         x  r  w  r+w
-	;   -Unaligned Access	: 00_23_04_00
-	;
-	bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
-
-	;========= (6a) Access Violation Processing ========
-	bl  do_page_fault
-	b   ret_from_exception
-
-	;========== (6b) Non aligned access ============
-4:
-
-	SAVE_CALLEE_SAVED_USER
-	mov r2, sp              ; callee_regs
-
-	bl  do_misaligned_access
-
-	; TBD: optimize - do this only if a callee reg was involved
-	; either a dst of emulated LD/ST or src with address-writeback
-	RESTORE_CALLEE_SAVED_USER
-
-	b   ret_from_exception
-
-END(EV_TLBProtV)
-
-; Wrapper for Linux page fault handler called from EV_TLBMiss*
-; Very similar to ProtV handler case (6a) above, but avoids the extra checks
-; for Misaligned access
-;
-ENTRY(call_do_page_fault)
-
-	EXCEPTION_PROLOGUE
-	lr  r0, [efa]	; Faulting Data address
-	mov   r1, sp
-	FAKE_RET_FROM_EXCPN
-
-	mov blink, ret_from_exception
-	b  do_page_fault
-
-END(call_do_page_fault)
-
-;############# Common Handlers for ARCompact and ARCv2 ##############
-
-#include "entry.S"
-
-;############# Return from Intr/Excp/Trap (ARC Specifics) ##############
-;
-; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
-; IRQ shd definitely not happen between now and rtie
-; All 2 entry points to here already disable interrupts
-
-.Lrestore_regs:
-
-	# Interrupts are actually disabled from this point on, but will get
-	# reenabled after we return from interrupt/exception.
-	# But irq tracer needs to be told now...
-	TRACE_ASM_IRQ_ENABLE
-
-	lr	r10, [status32]
-
-	; Restore REG File. In case multiple Events outstanding,
-	; use the same priority as rtie: EXCPN, L2 IRQ, L1 IRQ, None
-	; Note that we use realtime STATUS32 (not pt_regs->status32) to
-	; decide that.
-
-	and.f	0, r10, (STATUS_A1_MASK|STATUS_A2_MASK)
-	bz	.Lexcep_or_pure_K_ret
-
-	; Returning from Interrupts (Level 1 or 2)
-
-#ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
-
-	; Level 2 interrupt return Path - from hardware standpoint
-	bbit0  r10, STATUS_A2_BIT, not_level2_interrupt
-
-	;------------------------------------------------------------------
-	; However the context returning might not have taken L2 intr itself
-	; e.g. Task'A' user-code -> L2 intr -> schedule -> 'B' user-code ret
-	; Special considerations needed for the context which took L2 intr
-
-	ld   r9, [sp, PT_event]        ; Ensure this is L2 intr context
-	brne r9, event_IRQ2, 149f
-
-	;------------------------------------------------------------------
-	; if L2 IRQ interrupted an L1 ISR,  we'd disabled preemption earlier
-	; so that sched doesn't move to new task, causing L1 to be delayed
-	; undeterministically. Now that we've achieved that, let's reset
-	; things to what they were, before returning from L2 context
-	;----------------------------------------------------------------
-
-	ld r9, [sp, PT_status32]       ; get statu32_l2 (saved in pt_regs)
-	bbit0 r9, STATUS_A1_BIT, 149f  ; L1 not active when L2 IRQ, so normal
-
-	; decrement thread_info->preempt_count (re-enable preemption)
-	GET_CURR_THR_INFO_FROM_SP   r10
-	ld      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
-
-	; paranoid check, given A1 was active when A2 happened, preempt count
-	; must not be 0 because we would have incremented it.
-	; If this does happen we simply HALT as it means a BUG !!!
-	cmp     r9, 0
-	bnz     2f
-	flag 1
-
-2:
-	sub     r9, r9, 1
-	st      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
-
-149:
-	INTERRUPT_EPILOGUE 2	; return from level 2 interrupt
-debug_marker_l2:
-	rtie
-
-not_level2_interrupt:
-
-#endif
-
-	INTERRUPT_EPILOGUE 1	; return from level 1 interrupt
-debug_marker_l1:
-	rtie
-
-.Lexcep_or_pure_K_ret:
-
-	;this case is for syscalls or Exceptions or pure kernel mode
-
-	EXCEPTION_EPILOGUE
-debug_marker_syscall:
-	rtie
-
-END(ret_from_exception)
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
deleted file mode 100644
index ea74a1eee5d9dfabed168720474e67a5089d3cef..0000000000000000000000000000000000000000
--- a/arch/arc/kernel/entry.S
+++ /dev/null
@@ -1,358 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Common Low Level Interrupts/Traps/Exceptions(non-TLB) Handling for ARC
- * (included from entry-<isa>.S
- *
- * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-/*------------------------------------------------------------------
- *    Function                            ABI
- *------------------------------------------------------------------
- *
- *  Arguments                           r0 - r7
- *  Caller Saved Registers              r0 - r12
- *  Callee Saved Registers              r13- r25
- *  Global Pointer (gp)                 r26
- *  Frame Pointer (fp)                  r27
- *  Stack Pointer (sp)                  r28
- *  Branch link register (blink)        r31
- *------------------------------------------------------------------
- */
-
-;################### Special Sys Call Wrappers ##########################
-
-ENTRY(sys_clone_wrapper)
-	SAVE_CALLEE_SAVED_USER
-	bl  @sys_clone
-	DISCARD_CALLEE_SAVED_USER
-
-	GET_CURR_THR_INFO_FLAGS   r10
-	btst r10, TIF_SYSCALL_TRACE
-	bnz  tracesys_exit
-
-	b .Lret_from_system_call
-END(sys_clone_wrapper)
-
-ENTRY(ret_from_fork)
-	; when the forked child comes here from the __switch_to function
-	; r0 has the last task pointer.
-	; put last task in scheduler queue
-	jl   @schedule_tail
-
-	ld   r9, [sp, PT_status32]
-	brne r9, 0, 1f
-
-	jl.d [r14]		; kernel thread entry point
-	mov  r0, r13		; (see PF_KTHREAD block in copy_thread)
-
-1:
-	; Return to user space
-	; 1. Any forked task (Reach here via BRne above)
-	; 2. First ever init task (Reach here via return from JL above)
-	;    This is the historic "kernel_execve" use-case, to return to init
-	;    user mode, in a round about way since that is always done from
-	;    a kernel thread which is executed via JL above but always returns
-	;    out whenever kernel_execve (now inline do_fork()) is involved
-	b    ret_from_exception
-END(ret_from_fork)
-
-;################### Non TLB Exception Handling #############################
-
-; ---------------------------------------------
-; Instruction Error Exception Handler
-; ---------------------------------------------
-
-ENTRY(instr_service)
-
-	EXCEPTION_PROLOGUE
-
-	lr  r0, [efa]
-	mov r1, sp
-
-	FAKE_RET_FROM_EXCPN
-
-	bl  do_insterror_or_kprobe
-	b   ret_from_exception
-END(instr_service)
-
-; ---------------------------------------------
-; Machine Check Exception Handler
-; ---------------------------------------------
-
-ENTRY(EV_MachineCheck)
-
-	EXCEPTION_PROLOGUE
-
-	lr  r2, [ecr]
-	lr  r0, [efa]
-	mov r1, sp
-
-	; hardware auto-disables MMU, re-enable it to allow kernel vaddr
-	; access for say stack unwinding of modules for crash dumps
-	lr	r3, [ARC_REG_PID]
-	or	r3, r3, MMU_ENABLE
-	sr	r3, [ARC_REG_PID]
-
-	lsr  	r3, r2, 8
-	bmsk 	r3, r3, 7
-	brne    r3, ECR_C_MCHK_DUP_TLB, 1f
-
-	bl      do_tlb_overlap_fault
-	b       ret_from_exception
-
-1:
-	; DEAD END: can't do much, display Regs and HALT
-	SAVE_CALLEE_SAVED_USER
-
-	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r10
-	st  sp, [r10, THREAD_CALLEE_REG]
-
-	j  do_machine_check_fault
-
-END(EV_MachineCheck)
-
-; ---------------------------------------------
-; Privilege Violation Exception Handler
-; ---------------------------------------------
-ENTRY(EV_PrivilegeV)
-
-	EXCEPTION_PROLOGUE
-
-	lr  r0, [efa]
-	mov r1, sp
-
-	FAKE_RET_FROM_EXCPN
-
-	bl  do_privilege_fault
-	b   ret_from_exception
-END(EV_PrivilegeV)
-
-; ---------------------------------------------
-; Extension Instruction Exception Handler
-; ---------------------------------------------
-ENTRY(EV_Extension)
-
-	EXCEPTION_PROLOGUE
-
-	lr  r0, [efa]
-	mov r1, sp
-
-	FAKE_RET_FROM_EXCPN
-
-	bl  do_extension_fault
-	b   ret_from_exception
-END(EV_Extension)
-
-;################ Trap Handling (Syscall, Breakpoint) ##################
-
-; ---------------------------------------------
-; syscall Tracing
-; ---------------------------------------------
-tracesys:
-	; save EFA in case tracer wants the PC of traced task
-	; using ERET won't work since next-PC has already committed
-	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r11
-	st  r12, [r11, THREAD_FAULT_ADDR]	; thread.fault_address
-
-	; PRE Sys Call Ptrace hook
-	mov r0, sp			; pt_regs needed
-	bl  @syscall_trace_entry
-
-	; Tracing code now returns the syscall num (orig or modif)
-	mov r8, r0
-
-	; Do the Sys Call as we normally would.
-	; Validate the Sys Call number
-	cmp     r8,  NR_syscalls
-	mov.hi  r0, -ENOSYS
-	bhi     tracesys_exit
-
-	; Restore the sys-call args. Mere invocation of the hook abv could have
-	; clobbered them (since they are in scratch regs). The tracer could also
-	; have deliberately changed the syscall args: r0-r7
-	ld  r0, [sp, PT_r0]
-	ld  r1, [sp, PT_r1]
-	ld  r2, [sp, PT_r2]
-	ld  r3, [sp, PT_r3]
-	ld  r4, [sp, PT_r4]
-	ld  r5, [sp, PT_r5]
-	ld  r6, [sp, PT_r6]
-	ld  r7, [sp, PT_r7]
-	ld.as   r9, [sys_call_table, r8]
-	jl      [r9]        ; Entry into Sys Call Handler
-
-tracesys_exit:
-	st  r0, [sp, PT_r0]     ; sys call return value in pt_regs
-
-	;POST Sys Call Ptrace Hook
-	bl  @syscall_trace_exit
-	b   ret_from_exception ; NOT ret_from_system_call at is saves r0 which
-	; we'd done before calling post hook above
-
-; ---------------------------------------------
-; Breakpoint TRAP
-; ---------------------------------------------
-trap_with_param:
-	mov r0, r12	; EFA in case ptracer/gdb wants stop_pc
-	mov r1, sp
-
-	; Save callee regs in case gdb wants to have a look
-	; SP will grow up by size of CALLEE Reg-File
-	; NOTE: clobbers r12
-	SAVE_CALLEE_SAVED_USER
-
-	; save location of saved Callee Regs @ thread_struct->pc
-	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r10
-	st  sp, [r10, THREAD_CALLEE_REG]
-
-	; Call the trap handler
-	bl  do_non_swi_trap
-
-	; unwind stack to discard Callee saved Regs
-	DISCARD_CALLEE_SAVED_USER
-
-	b   ret_from_exception
-
-; ---------------------------------------------
-; syscall TRAP
-; ABI: (r0-r7) upto 8 args, (r8) syscall number
-; ---------------------------------------------
-
-ENTRY(EV_Trap)
-
-	EXCEPTION_PROLOGUE
-
-	lr  r12, [efa]
-
-	FAKE_RET_FROM_EXCPN
-
-	;============ TRAP 1   :breakpoints
-	; Check ECR for trap with arg (PROLOGUE ensures r10 has ECR)
-	bmsk.f 0, r10, 7
-	bnz    trap_with_param
-
-	;============ TRAP  (no param): syscall top level
-
-	; If syscall tracing ongoing, invoke pre-post-hooks
-	GET_CURR_THR_INFO_FLAGS   r10
-	btst r10, TIF_SYSCALL_TRACE
-	bnz tracesys  ; this never comes back
-
-	;============ Normal syscall case
-
-	; syscall num shd not exceed the total system calls avail
-	cmp     r8,  NR_syscalls
-	mov.hi  r0, -ENOSYS
-	bhi     .Lret_from_system_call
-
-	; Offset into the syscall_table and call handler
-	ld.as   r9,[sys_call_table, r8]
-	jl      [r9]        ; Entry into Sys Call Handler
-
-.Lret_from_system_call:
-
-	st  r0, [sp, PT_r0]     ; sys call return value in pt_regs
-
-	; fall through to ret_from_exception
-END(EV_Trap)
-
-;############# Return from Intr/Excp/Trap (Linux Specifics) ##############
-;
-; If ret to user mode do we need to handle signals, schedule() et al.
-
-ENTRY(ret_from_exception)
-
-	; Pre-{IRQ,Trap,Exception} K/U mode from pt_regs->status32
-	ld  r8, [sp, PT_status32]   ; returning to User/Kernel Mode
-
-	bbit0  r8, STATUS_U_BIT, resume_kernel_mode
-
-	; Before returning to User mode check-for-and-complete any pending work
-	; such as rescheduling/signal-delivery etc.
-resume_user_mode_begin:
-
-	; Disable IRQs to ensures that chk for pending work itself is atomic
-	; (and we don't end up missing a NEED_RESCHED/SIGPENDING due to an
-	; interim IRQ).
-	IRQ_DISABLE	r10
-
-	; Fast Path return to user mode if no pending work
-	GET_CURR_THR_INFO_FLAGS   r9
-	and.f  0,  r9, _TIF_WORK_MASK
-	bz     .Lrestore_regs
-
-	; --- (Slow Path #1) task preemption ---
-	bbit0  r9, TIF_NEED_RESCHED, .Lchk_pend_signals
-	mov    blink, resume_user_mode_begin  ; tail-call to U mode ret chks
-	j      @schedule 	; BTST+Bnz causes relo error in link
-
-.Lchk_pend_signals:
-	IRQ_ENABLE	r10
-
-	; --- (Slow Path #2) pending signal  ---
-	mov r0, sp	; pt_regs for arg to do_signal()/do_notify_resume()
-
-	GET_CURR_THR_INFO_FLAGS   r9
-	bbit0  r9, TIF_SIGPENDING, .Lchk_notify_resume
-
-	; Normal Trap/IRQ entry only saves Scratch (caller-saved) regs
-	; in pt_reg since the "C" ABI (kernel code) will automatically
-	; save/restore callee-saved regs.
-	;
-	; However, here we need to explicitly save callee regs because
-	; (i)  If this signal causes coredump - full regfile needed
-	; (ii) If signal is SIGTRAP/SIGSTOP, task is being traced thus
-	;      tracer might call PEEKUSR(CALLEE reg)
-	;
-	; NOTE: SP will grow up by size of CALLEE Reg-File
-	SAVE_CALLEE_SAVED_USER		; clobbers r12
-
-	; save location of saved Callee Regs @ thread_struct->callee
-	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r10
-	st  sp, [r10, THREAD_CALLEE_REG]
-
-	bl  @do_signal
-
-	; Ideally we want to discard the Callee reg above, however if this was
-	; a tracing signal, tracer could have done a POKEUSR(CALLEE reg)
-	RESTORE_CALLEE_SAVED_USER
-
-	b      resume_user_mode_begin	; loop back to start of U mode ret
-
-	; --- (Slow Path #3) notify_resume ---
-.Lchk_notify_resume:
-	btst   r9, TIF_NOTIFY_RESUME
-	blnz   @do_notify_resume
-	b      resume_user_mode_begin	; unconditionally back to U mode ret chks
-					; for single exit point from this block
-
-resume_kernel_mode:
-
-	; Disable Interrupts from this point on
-	; CONFIG_PREEMPT: This is a must for preempt_schedule_irq()
-	; !CONFIG_PREEMPT: To ensure restore_regs is intr safe
-	IRQ_DISABLE	r9
-
-#ifdef CONFIG_PREEMPT
-
-	; Can't preempt if preemption disabled
-	GET_CURR_THR_INFO_FROM_SP   r10
-	ld  r8, [r10, THREAD_INFO_PREEMPT_COUNT]
-	brne  r8, 0, .Lrestore_regs
-
-	; check if this task's NEED_RESCHED flag set
-	ld  r9, [r10, THREAD_INFO_FLAGS]
-	bbit0  r9, TIF_NEED_RESCHED, .Lrestore_regs
-
-	; Invoke PREEMPTION
-	jl      preempt_schedule_irq
-
-	; preempt_schedule_irq() always returns with IRQ disabled
-#endif
-
-	b	.Lrestore_regs
-
-##### DONT ADD CODE HERE - .Lrestore_regs actually follows in entry-<isa>.S
-
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
deleted file mode 100644
index 6f41265f62505cf6850c470a3dc09db1b7066b0c..0000000000000000000000000000000000000000
--- a/arch/arc/kernel/head.S
+++ /dev/null
@@ -1,146 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * ARC CPU startup Code
- *
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * Vineetg: Dec 2007
- *  -Check if we are running on Simulator or on real hardware
- *      to skip certain things during boot on simulator
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/entry.h>
-#include <asm/arcregs.h>
-#include <asm/cache.h>
-#include <asm/irqflags.h>
-
-.macro CPU_EARLY_SETUP
-
-	; Setting up Vectror Table (in case exception happens in early boot
-	sr	@_int_vec_base_lds, [AUX_INTR_VEC_BASE]
-
-	; Disable I-cache/D-cache if kernel so configured
-	lr	r5, [ARC_REG_IC_BCR]
-	breq    r5, 0, 1f		; I$ doesn't exist
-	lr	r5, [ARC_REG_IC_CTRL]
-#ifdef CONFIG_ARC_HAS_ICACHE
-	bclr	r5, r5, 0		; 0 - Enable, 1 is Disable
-#else
-	bset	r5, r5, 0		; I$ exists, but is not used
-#endif
-	sr	r5, [ARC_REG_IC_CTRL]
-
-1:
-	lr	r5, [ARC_REG_DC_BCR]
-	breq    r5, 0, 1f		; D$ doesn't exist
-	lr	r5, [ARC_REG_DC_CTRL]
-	bclr	r5, r5, 6		; Invalidate (discard w/o wback)
-#ifdef CONFIG_ARC_HAS_DCACHE
-	bclr	r5, r5, 0		; Enable (+Inv)
-#else
-	bset	r5, r5, 0		; Disable (+Inv)
-#endif
-	sr	r5, [ARC_REG_DC_CTRL]
-
-1:
-
-#ifdef CONFIG_ISA_ARCV2
-	; Unaligned access is disabled at reset, so re-enable early as
-	; gcc 7.3.1 (ARC GNU 2018.03) onwards generates unaligned access
-	; by default
-	lr	r5, [status32]
-#ifdef CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS
-	bset	r5, r5, STATUS_AD_BIT
-#else
-	; Although disabled at reset, bootloader might have enabled it
-	bclr	r5, r5, STATUS_AD_BIT
-#endif
-	kflag	r5
-#endif
-.endm
-
-	.section .init.text, "ax",@progbits
-
-;----------------------------------------------------------------
-; Default Reset Handler (jumped into from Reset vector)
-; - Don't clobber r0,r1,r2 as they might have u-boot provided args
-; - Platforms can override this weak version if needed
-;----------------------------------------------------------------
-WEAK(res_service)
-	j	stext
-END(res_service)
-
-;----------------------------------------------------------------
-; Kernel Entry point
-;----------------------------------------------------------------
-ENTRY(stext)
-
-	CPU_EARLY_SETUP
-
-#ifdef CONFIG_SMP
-	GET_CPU_ID  r5
-	cmp	r5, 0
-	mov.nz	r0, r5
-	bz	.Lmaster_proceed
-
-	; Non-Masters wait for Master to boot enough and bring them up
-	; when they resume, tail-call to entry point
-	mov	blink, @first_lines_of_secondary
-	j	arc_platform_smp_wait_to_boot
-
-.Lmaster_proceed:
-#endif
-
-	; Clear BSS before updating any globals
-	; XXX: use ZOL here
-	mov	r5, __bss_start
-	sub	r6, __bss_stop, r5
-	lsr.f	lp_count, r6, 2
-	lpnz	1f
-	st.ab   0, [r5, 4]
-1:
-
-	; Uboot - kernel ABI
-	;    r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2
-	;    r1 = magic number (always zero as of now)
-	;    r2 = pointer to uboot provided cmdline or external DTB in mem
-	; These are handled later in handle_uboot_args()
-	st	r0, [@uboot_tag]
-	st      r1, [@uboot_magic]
-	st	r2, [@uboot_arg]
-
-	; setup "current" tsk and optionally cache it in dedicated r25
-	mov	r9, @init_task
-	SET_CURR_TASK_ON_CPU  r9, r0	; r9 = tsk, r0 = scratch
-
-	; setup stack (fp, sp)
-	mov	fp, 0
-
-	; tsk->thread_info is really a PAGE, whose bottom hoists stack
-	GET_TSK_STACK_BASE r9, sp	; r9 = tsk, sp = stack base(output)
-
-	j	start_kernel	; "C" entry point
-END(stext)
-
-#ifdef CONFIG_SMP
-;----------------------------------------------------------------
-;     First lines of code run by secondary before jumping to 'C'
-;----------------------------------------------------------------
-	.section .text, "ax",@progbits
-ENTRY(first_lines_of_secondary)
-
-	; setup per-cpu idle task as "current" on this CPU
-	ld	r0, [@secondary_idle_tsk]
-	SET_CURR_TASK_ON_CPU  r0, r1
-
-	; setup stack (fp, sp)
-	mov	fp, 0
-
-	; set it's stack base to tsk->thread_info bottom
-	GET_TSK_STACK_BASE r0, sp
-
-	j	start_kernel_secondary
-END(first_lines_of_secondary)
-#endif
diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S
deleted file mode 100644
index 6c693a9d29b6d79c3f09af1bb3176b41103e7b8f..0000000000000000000000000000000000000000
--- a/arch/arc/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,155 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/cache.h>
-#include <asm/page.h>
-#include <asm/thread_info.h>
-
-OUTPUT_ARCH(arc)
-ENTRY(res_service)
-
-#ifdef CONFIG_CPU_BIG_ENDIAN
-jiffies = jiffies_64 + 4;
-#else
-jiffies = jiffies_64;
-#endif
-
-SECTIONS
-{
-	/*
-	 * ICCM starts at 0x8000_0000. So if kernel is relocated to some other
-	 * address, make sure peripheral at 0x8z doesn't clash with ICCM
-	 * Essentially vector is also in ICCM.
-	 */
-
-	. = CONFIG_LINUX_LINK_BASE;
-
-	_int_vec_base_lds = .;
-	.vector : {
-		*(.vector)
-		. = ALIGN(PAGE_SIZE);
-	}
-
-#ifdef CONFIG_ARC_HAS_ICCM
-	.text.arcfp : {
-		*(.text.arcfp)
-		. = ALIGN(CONFIG_ARC_ICCM_SZ * 1024);
-	}
-#endif
-
-	/*
-	 * The reason for having a seperate subsection .init.ramfs is to
-	 * prevent objump from including it in kernel dumps
-	 *
-	 * Reason for having .init.ramfs above .init is to make sure that the
-	 * binary blob is tucked away to one side, reducing the displacement
-	 * between .init.text and .text, avoiding any possible relocation
-	 * errors because of calls from .init.text to .text
-	 * Yes such calls do exist. e.g.
-	 *	decompress_inflate.c:gunzip( ) -> zlib_inflate_workspace( )
-	 */
-
-	__init_begin = .;
-
-	.init.ramfs : { INIT_RAM_FS }
-
-	. = ALIGN(PAGE_SIZE);
-	_stext = .;
-
-	HEAD_TEXT_SECTION
-	INIT_TEXT_SECTION(L1_CACHE_BYTES)
-
-	/* INIT_DATA_SECTION open-coded: special INIT_RAM_FS handling */
-	.init.data : {
-		INIT_DATA
-		INIT_SETUP(L1_CACHE_BYTES)
-		INIT_CALLS
-		CON_INITCALL
-	}
-
-	.init.arch.info : {
-		__arch_info_begin = .;
-		*(.arch.info.init)
-		__arch_info_end = .;
-	}
-
-	PERCPU_SECTION(L1_CACHE_BYTES)
-
-	. = ALIGN(PAGE_SIZE);
-	__init_end = .;
-
-	.text : {
-		_text = .;
-		TEXT_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		*(.fixup)
-		*(.gnu.warning)
-	}
-	EXCEPTION_TABLE(L1_CACHE_BYTES)
-	_etext = .;
-
-	_sdata = .;
-	RO_DATA_SECTION(PAGE_SIZE)
-
-	/*
-	 * 1. this is .data essentially
-	 * 2. THREAD_SIZE for init.task, must be kernel-stk sz aligned
-	 */
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
-
-	_edata = .;
-
-	BSS_SECTION(4, 4, 4)
-
-#ifdef CONFIG_ARC_DW2_UNWIND
-	. = ALIGN(PAGE_SIZE);
-	.eh_frame  : {
-		__start_unwind = .;
-		*(.eh_frame)
-		__end_unwind = .;
-	}
-#else
-	/DISCARD/ : {	*(.eh_frame) }
-#endif
-
-	NOTES
-
-	. = ALIGN(PAGE_SIZE);
-	_end = . ;
-
-	STABS_DEBUG
-	DISCARDS
-
-	.arcextmap 0 : {
-		*(.gnu.linkonce.arcextmap.*)
-		*(.arcextmap.*)
-	}
-
-#ifndef CONFIG_DEBUG_INFO
-	/DISCARD/ : { *(.debug_frame) }
-	/DISCARD/ : { *(.debug_aranges) }
-	/DISCARD/ : { *(.debug_pubnames) }
-	/DISCARD/ : { *(.debug_info) }
-	/DISCARD/ : { *(.debug_abbrev) }
-	/DISCARD/ : { *(.debug_line) }
-	/DISCARD/ : { *(.debug_str) }
-	/DISCARD/ : { *(.debug_loc) }
-	/DISCARD/ : { *(.debug_macinfo) }
-	/DISCARD/ : { *(.debug_ranges) }
-#endif
-
-#ifdef CONFIG_ARC_HAS_DCCM
-	. = CONFIG_ARC_DCCM_BASE;
-	__arc_dccm_base = .;
-	.data.arcfp : {
-		*(.data.arcfp)
-	}
-	. = ALIGN(CONFIG_ARC_DCCM_SZ * 1024);
-#endif
-}
diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S
deleted file mode 100644
index d6dc5e9bc49bfe1d9ae3c77f040f4b85a08afd33..0000000000000000000000000000000000000000
--- a/arch/arc/lib/memcmp.S
+++ /dev/null
@@ -1,149 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-#include <linux/linkage.h>
-
-#ifdef __LITTLE_ENDIAN__
-#define WORD2 r2
-#define SHIFT r3
-#else /* BIG ENDIAN */
-#define WORD2 r3
-#define SHIFT r2
-#endif
-
-ENTRY_CFI(memcmp)
-	or	r12,r0,r1
-	asl_s	r12,r12,30
-	sub	r3,r2,1
-	brls	r2,r12,.Lbytewise
-	ld	r4,[r0,0]
-	ld	r5,[r1,0]
-	lsr.f	lp_count,r3,3
-#ifdef CONFIG_ISA_ARCV2
-	/* In ARCv2 a branch can't be the last instruction in a zero overhead
-	 * loop.
-	 * So we move the branch to the start of the loop, duplicate it
-	 * after the end, and set up r12 so that the branch isn't taken
-	 *  initially.
-	 */
-	mov_s	r12,WORD2
-	lpne	.Loop_end
-	brne	WORD2,r12,.Lodd
-	ld	WORD2,[r0,4]
-#else
-	lpne	.Loop_end
-	ld_s	WORD2,[r0,4]
-#endif
-	ld_s	r12,[r1,4]
-	brne	r4,r5,.Leven
-	ld.a	r4,[r0,8]
-	ld.a	r5,[r1,8]
-#ifdef CONFIG_ISA_ARCV2
-.Loop_end:
-	brne	WORD2,r12,.Lodd
-#else
-	brne	WORD2,r12,.Lodd
-.Loop_end:
-#endif
-	asl_s	SHIFT,SHIFT,3
-	bhs_s	.Last_cmp
-	brne	r4,r5,.Leven
-	ld	r4,[r0,4]
-	ld	r5,[r1,4]
-#ifdef __LITTLE_ENDIAN__
-	nop_s
-	; one more load latency cycle
-.Last_cmp:
-	xor	r0,r4,r5
-	bset	r0,r0,SHIFT
-	sub_s	r1,r0,1
-	bic_s	r1,r1,r0
-	norm	r1,r1
-	b.d	.Leven_cmp
-	and	r1,r1,24
-.Leven:
-	xor	r0,r4,r5
-	sub_s	r1,r0,1
-	bic_s	r1,r1,r0
-	norm	r1,r1
-	; slow track insn
-	and	r1,r1,24
-.Leven_cmp:
-	asl	r2,r4,r1
-	asl	r12,r5,r1
-	lsr_s	r2,r2,1
-	lsr_s	r12,r12,1
-	j_s.d	[blink]
-	sub	r0,r2,r12
-	.balign	4
-.Lodd:
-	xor	r0,WORD2,r12
-	sub_s	r1,r0,1
-	bic_s	r1,r1,r0
-	norm	r1,r1
-	; slow track insn
-	and	r1,r1,24
-	asl_s	r2,r2,r1
-	asl_s	r12,r12,r1
-	lsr_s	r2,r2,1
-	lsr_s	r12,r12,1
-	j_s.d	[blink]
-	sub	r0,r2,r12
-#else /* BIG ENDIAN */
-.Last_cmp:
-	neg_s	SHIFT,SHIFT
-	lsr	r4,r4,SHIFT
-	lsr	r5,r5,SHIFT
-	; slow track insn
-.Leven:
-	sub.f	r0,r4,r5
-	mov.ne	r0,1
-	j_s.d	[blink]
-	bset.cs	r0,r0,31
-.Lodd:
-	cmp_s	WORD2,r12
-	mov_s	r0,1
-	j_s.d	[blink]
-	bset.cs	r0,r0,31
-#endif /* ENDIAN */
-	.balign	4
-.Lbytewise:
-	breq	r2,0,.Lnil
-	ldb	r4,[r0,0]
-	ldb	r5,[r1,0]
-	lsr.f	lp_count,r3
-#ifdef CONFIG_ISA_ARCV2
-	mov	r12,r3
-	lpne	.Lbyte_end
-	brne	r3,r12,.Lbyte_odd
-#else
-	lpne	.Lbyte_end
-#endif
-	ldb_s	r3,[r0,1]
-	ldb	r12,[r1,1]
-	brne	r4,r5,.Lbyte_even
-	ldb.a	r4,[r0,2]
-	ldb.a	r5,[r1,2]
-#ifdef CONFIG_ISA_ARCV2
-.Lbyte_end:
-	brne	r3,r12,.Lbyte_odd
-#else
-	brne	r3,r12,.Lbyte_odd
-.Lbyte_end:
-#endif
-	bcc	.Lbyte_even
-	brne	r4,r5,.Lbyte_even
-	ldb_s	r3,[r0,1]
-	ldb_s	r12,[r1,1]
-.Lbyte_odd:
-	j_s.d	[blink]
-	sub	r0,r3,r12
-.Lbyte_even:
-	j_s.d	[blink]
-	sub	r0,r4,r5
-.Lnil:
-	j_s.d	[blink]
-	mov	r0,0
-END_CFI(memcmp)
diff --git a/arch/arc/lib/memcpy-700.S b/arch/arc/lib/memcpy-700.S
deleted file mode 100644
index f2e239e219b2aad6c5cb8014d1c55551bf9fdecf..0000000000000000000000000000000000000000
--- a/arch/arc/lib/memcpy-700.S
+++ /dev/null
@@ -1,63 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-#include <linux/linkage.h>
-
-ENTRY_CFI(memcpy)
-	or	r3,r0,r1
-	asl_s	r3,r3,30
-	mov_s	r5,r0
-	brls.d	r2,r3,.Lcopy_bytewise
-	sub.f	r3,r2,1
-	ld_s	r12,[r1,0]
-	asr.f	lp_count,r3,3
-	bbit0.d	r3,2,.Lnox4
-	bmsk_s	r2,r2,1
-	st.ab	r12,[r5,4]
-	ld.a	r12,[r1,4]
-.Lnox4:
-	lppnz	.Lendloop
-	ld_s	r3,[r1,4]
-	st.ab	r12,[r5,4]
-	ld.a	r12,[r1,8]
-	st.ab	r3,[r5,4]
-.Lendloop:
-	breq	r2,0,.Last_store
-	ld	r3,[r5,0]
-#ifdef __LITTLE_ENDIAN__
-	add3	r2,-1,r2
-	; uses long immediate
-	xor_s	r12,r12,r3
-	bmsk	r12,r12,r2
-    xor_s	r12,r12,r3
-#else /* BIG ENDIAN */
-	sub3	r2,31,r2
-	; uses long immediate
-        xor_s	r3,r3,r12
-        bmsk	r3,r3,r2
-        xor_s	r12,r12,r3
-#endif /* ENDIAN */
-.Last_store:
-	j_s.d	[blink]
-	st	r12,[r5,0]
-
-	.balign	4
-.Lcopy_bytewise:
-	jcs	[blink]
-	ldb_s	r12,[r1,0]
-	lsr.f	lp_count,r3
-	bhs_s	.Lnox1
-	stb.ab	r12,[r5,1]
-	ldb.a	r12,[r1,1]
-.Lnox1:
-	lppnz	.Lendbloop
-	ldb_s	r3,[r1,1]
-	stb.ab	r12,[r5,1]
-	ldb.a	r12,[r1,2]
-	stb.ab	r3,[r5,1]
-.Lendbloop:
-	j_s.d	[blink]
-	stb	r12,[r5,0]
-END_CFI(memcpy)
diff --git a/arch/arc/lib/memcpy-archs-unaligned.S b/arch/arc/lib/memcpy-archs-unaligned.S
deleted file mode 100644
index 28993a73fdde637a3c0fb7c8f58a99d1ceb61120..0000000000000000000000000000000000000000
--- a/arch/arc/lib/memcpy-archs-unaligned.S
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * ARCv2 memcpy implementation optimized for unaligned memory access using.
- *
- * Copyright (C) 2019 Synopsys
- * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
- */
-
-#include <linux/linkage.h>
-
-#ifdef CONFIG_ARC_HAS_LL64
-# define LOADX(DST,RX)		ldd.ab	DST, [RX, 8]
-# define STOREX(SRC,RX)		std.ab	SRC, [RX, 8]
-# define ZOLSHFT		5
-# define ZOLAND			0x1F
-#else
-# define LOADX(DST,RX)		ld.ab	DST, [RX, 4]
-# define STOREX(SRC,RX)		st.ab	SRC, [RX, 4]
-# define ZOLSHFT		4
-# define ZOLAND			0xF
-#endif
-
-ENTRY_CFI(memcpy)
-	mov	r3, r0		; don;t clobber ret val
-
-	lsr.f	lp_count, r2, ZOLSHFT
-	lpnz	@.Lcopy32_64bytes
-	;; LOOP START
-	LOADX	(r6, r1)
-	LOADX	(r8, r1)
-	LOADX	(r10, r1)
-	LOADX	(r4, r1)
-	STOREX	(r6, r3)
-	STOREX	(r8, r3)
-	STOREX	(r10, r3)
-	STOREX	(r4, r3)
-.Lcopy32_64bytes:
-
-	and.f	lp_count, r2, ZOLAND ;Last remaining 31 bytes
-	lpnz	@.Lcopyremainingbytes
-	;; LOOP START
-	ldb.ab	r5, [r1, 1]
-	stb.ab	r5, [r3, 1]
-.Lcopyremainingbytes:
-
-	j	[blink]
-END_CFI(memcpy)
diff --git a/arch/arc/lib/memcpy-archs.S b/arch/arc/lib/memcpy-archs.S
deleted file mode 100644
index 0051a84f60c0553bad763d53b1d86f331e6da491..0000000000000000000000000000000000000000
--- a/arch/arc/lib/memcpy-archs.S
+++ /dev/null
@@ -1,219 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
- */
-
-#include <linux/linkage.h>
-
-#ifdef __LITTLE_ENDIAN__
-# define SHIFT_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
-# define SHIFT_2(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
-# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM
-# define MERGE_2(RX,RY,IMM)
-# define EXTRACT_1(RX,RY,IMM)	and	RX, RY, 0xFFFF
-# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, IMM
-#else
-# define SHIFT_1(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
-# define SHIFT_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
-# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
-# define MERGE_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
-# define EXTRACT_1(RX,RY,IMM)	lsr	RX, RY, IMM
-# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, 0x08
-#endif
-
-#ifdef CONFIG_ARC_HAS_LL64
-# define LOADX(DST,RX)		ldd.ab	DST, [RX, 8]
-# define STOREX(SRC,RX)		std.ab	SRC, [RX, 8]
-# define ZOLSHFT		5
-# define ZOLAND			0x1F
-#else
-# define LOADX(DST,RX)		ld.ab	DST, [RX, 4]
-# define STOREX(SRC,RX)		st.ab	SRC, [RX, 4]
-# define ZOLSHFT		4
-# define ZOLAND			0xF
-#endif
-
-ENTRY_CFI(memcpy)
-	mov.f	0, r2
-;;; if size is zero
-	jz.d	[blink]
-	mov	r3, r0		; don;t clobber ret val
-
-;;; if size <= 8
-	cmp	r2, 8
-	bls.d	@.Lsmallchunk
-	mov.f	lp_count, r2
-
-	and.f	r4, r0, 0x03
-	rsub	lp_count, r4, 4
-	lpnz	@.Laligndestination
-	;; LOOP BEGIN
-	ldb.ab	r5, [r1,1]
-	sub	r2, r2, 1
-	stb.ab	r5, [r3,1]
-.Laligndestination:
-
-;;; Check the alignment of the source
-	and.f	r4, r1, 0x03
-	bnz.d	@.Lsourceunaligned
-
-;;; CASE 0: Both source and destination are 32bit aligned
-;;; Convert len to Dwords, unfold x4
-	lsr.f	lp_count, r2, ZOLSHFT
-	lpnz	@.Lcopy32_64bytes
-	;; LOOP START
-	LOADX (r6, r1)
-	LOADX (r8, r1)
-	LOADX (r10, r1)
-	LOADX (r4, r1)
-	STOREX (r6, r3)
-	STOREX (r8, r3)
-	STOREX (r10, r3)
-	STOREX (r4, r3)
-.Lcopy32_64bytes:
-
-	and.f	lp_count, r2, ZOLAND ;Last remaining 31 bytes
-.Lsmallchunk:
-	lpnz	@.Lcopyremainingbytes
-	;; LOOP START
-	ldb.ab	r5, [r1,1]
-	stb.ab	r5, [r3,1]
-.Lcopyremainingbytes:
-
-	j	[blink]
-;;; END CASE 0
-
-.Lsourceunaligned:
-	cmp	r4, 2
-	beq.d	@.LunalignedOffby2
-	sub	r2, r2, 1
-
-	bhi.d	@.LunalignedOffby3
-	ldb.ab	r5, [r1, 1]
-
-;;; CASE 1: The source is unaligned, off by 1
-	;; Hence I need to read 1 byte for a 16bit alignment
-	;; and 2bytes to reach 32bit alignment
-	ldh.ab	r6, [r1, 2]
-	sub	r2, r2, 2
-	;; Convert to words, unfold x2
-	lsr.f	lp_count, r2, 3
-	MERGE_1 (r6, r6, 8)
-	MERGE_2 (r5, r5, 24)
-	or	r5, r5, r6
-
-	;; Both src and dst are aligned
-	lpnz	@.Lcopy8bytes_1
-	;; LOOP START
-	ld.ab	r6, [r1, 4]
-	ld.ab	r8, [r1,4]
-
-	SHIFT_1	(r7, r6, 24)
-	or	r7, r7, r5
-	SHIFT_2	(r5, r6, 8)
-
-	SHIFT_1	(r9, r8, 24)
-	or	r9, r9, r5
-	SHIFT_2	(r5, r8, 8)
-
-	st.ab	r7, [r3, 4]
-	st.ab	r9, [r3, 4]
-.Lcopy8bytes_1:
-
-	;; Write back the remaining 16bits
-	EXTRACT_1 (r6, r5, 16)
-	sth.ab	r6, [r3, 2]
-	;; Write back the remaining 8bits
-	EXTRACT_2 (r5, r5, 16)
-	stb.ab	r5, [r3, 1]
-
-	and.f	lp_count, r2, 0x07 ;Last 8bytes
-	lpnz	@.Lcopybytewise_1
-	;; LOOP START
-	ldb.ab	r6, [r1,1]
-	stb.ab	r6, [r3,1]
-.Lcopybytewise_1:
-	j	[blink]
-
-.LunalignedOffby2:
-;;; CASE 2: The source is unaligned, off by 2
-	ldh.ab	r5, [r1, 2]
-	sub	r2, r2, 1
-
-	;; Both src and dst are aligned
-	;; Convert to words, unfold x2
-	lsr.f	lp_count, r2, 3
-#ifdef __BIG_ENDIAN__
-	asl.nz	r5, r5, 16
-#endif
-	lpnz	@.Lcopy8bytes_2
-	;; LOOP START
-	ld.ab	r6, [r1, 4]
-	ld.ab	r8, [r1,4]
-
-	SHIFT_1	(r7, r6, 16)
-	or	r7, r7, r5
-	SHIFT_2	(r5, r6, 16)
-
-	SHIFT_1	(r9, r8, 16)
-	or	r9, r9, r5
-	SHIFT_2	(r5, r8, 16)
-
-	st.ab	r7, [r3, 4]
-	st.ab	r9, [r3, 4]
-.Lcopy8bytes_2:
-
-#ifdef __BIG_ENDIAN__
-	lsr.nz	r5, r5, 16
-#endif
-	sth.ab	r5, [r3, 2]
-
-	and.f	lp_count, r2, 0x07 ;Last 8bytes
-	lpnz	@.Lcopybytewise_2
-	;; LOOP START
-	ldb.ab	r6, [r1,1]
-	stb.ab	r6, [r3,1]
-.Lcopybytewise_2:
-	j	[blink]
-
-.LunalignedOffby3:
-;;; CASE 3: The source is unaligned, off by 3
-;;; Hence, I need to read 1byte for achieve the 32bit alignment
-
-	;; Both src and dst are aligned
-	;; Convert to words, unfold x2
-	lsr.f	lp_count, r2, 3
-#ifdef __BIG_ENDIAN__
-	asl.ne	r5, r5, 24
-#endif
-	lpnz	@.Lcopy8bytes_3
-	;; LOOP START
-	ld.ab	r6, [r1, 4]
-	ld.ab	r8, [r1,4]
-
-	SHIFT_1	(r7, r6, 8)
-	or	r7, r7, r5
-	SHIFT_2	(r5, r6, 24)
-
-	SHIFT_1	(r9, r8, 8)
-	or	r9, r9, r5
-	SHIFT_2	(r5, r8, 24)
-
-	st.ab	r7, [r3, 4]
-	st.ab	r9, [r3, 4]
-.Lcopy8bytes_3:
-
-#ifdef __BIG_ENDIAN__
-	lsr.nz	r5, r5, 24
-#endif
-	stb.ab	r5, [r3, 1]
-
-	and.f	lp_count, r2, 0x07 ;Last 8bytes
-	lpnz	@.Lcopybytewise_3
-	;; LOOP START
-	ldb.ab	r6, [r1,1]
-	stb.ab	r6, [r3,1]
-.Lcopybytewise_3:
-	j	[blink]
-
-END_CFI(memcpy)
diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
deleted file mode 100644
index d2e09fece5bcffdb556c02bbd7d91781e869a229..0000000000000000000000000000000000000000
--- a/arch/arc/lib/memset-archs.S
+++ /dev/null
@@ -1,143 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
- */
-
-#include <linux/linkage.h>
-#include <asm/cache.h>
-
-/*
- * The memset implementation below is optimized to use prefetchw and prealloc
- * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
- * If you want to implement optimized memset for other possible L1 data cache
- * line lengths (32B and 128B) you should rewrite code carefully checking
- * we don't call any prefetchw/prealloc instruction for L1 cache lines which
- * don't belongs to memset area.
- */
-
-#if L1_CACHE_SHIFT == 6
-
-.macro PREALLOC_INSTR	reg, off
-	prealloc	[\reg, \off]
-.endm
-
-.macro PREFETCHW_INSTR	reg, off
-	prefetchw	[\reg, \off]
-.endm
-
-#else
-
-.macro PREALLOC_INSTR	reg, off
-.endm
-
-.macro PREFETCHW_INSTR	reg, off
-.endm
-
-#endif
-
-ENTRY_CFI(memset)
-	PREFETCHW_INSTR	r0, 0	; Prefetch the first write location
-	mov.f	0, r2
-;;; if size is zero
-	jz.d	[blink]
-	mov	r3, r0		; don't clobber ret val
-
-;;; if length < 8
-	brls.d.nt	r2, 8, .Lsmallchunk
-	mov.f	lp_count,r2
-
-	and.f	r4, r0, 0x03
-	rsub	lp_count, r4, 4
-	lpnz	@.Laligndestination
-	;; LOOP BEGIN
-	stb.ab	r1, [r3,1]
-	sub	r2, r2, 1
-.Laligndestination:
-
-;;; Destination is aligned
-	and	r1, r1, 0xFF
-	asl	r4, r1, 8
-	or	r4, r4, r1
-	asl	r5, r4, 16
-	or	r5, r5, r4
-	mov	r4, r5
-
-	sub3	lp_count, r2, 8
-	cmp     r2, 64
-	bmsk.hi	r2, r2, 5
-	mov.ls	lp_count, 0
-	add3.hi	r2, r2, 8
-
-;;; Convert len to Dwords, unfold x8
-	lsr.f	lp_count, lp_count, 6
-
-	lpnz	@.Lset64bytes
-	;; LOOP START
-	PREALLOC_INSTR	r3, 64	; alloc next line w/o fetching
-
-#ifdef CONFIG_ARC_HAS_LL64
-	std.ab	r4, [r3, 8]
-	std.ab	r4, [r3, 8]
-	std.ab	r4, [r3, 8]
-	std.ab	r4, [r3, 8]
-	std.ab	r4, [r3, 8]
-	std.ab	r4, [r3, 8]
-	std.ab	r4, [r3, 8]
-	std.ab	r4, [r3, 8]
-#else
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-#endif
-.Lset64bytes:
-
-	lsr.f	lp_count, r2, 5 ;Last remaining  max 124 bytes
-	lpnz	.Lset32bytes
-	;; LOOP START
-#ifdef CONFIG_ARC_HAS_LL64
-	std.ab	r4, [r3, 8]
-	std.ab	r4, [r3, 8]
-	std.ab	r4, [r3, 8]
-	std.ab	r4, [r3, 8]
-#else
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-	st.ab	r4, [r3, 4]
-#endif
-.Lset32bytes:
-
-	and.f	lp_count, r2, 0x1F ;Last remaining 31 bytes
-.Lsmallchunk:
-	lpnz	.Lcopy3bytes
-	;; LOOP START
-	stb.ab	r1, [r3, 1]
-.Lcopy3bytes:
-
-	j	[blink]
-
-END_CFI(memset)
-
-ENTRY_CFI(memzero)
-    ; adjust bzero args to memset args
-    mov r2, r1
-    b.d  memset    ;tail call so need to tinker with blink
-    mov r1, 0
-END_CFI(memzero)
diff --git a/arch/arc/lib/memset.S b/arch/arc/lib/memset.S
deleted file mode 100644
index 9f35960da114182706158037dff960f400088d4a..0000000000000000000000000000000000000000
--- a/arch/arc/lib/memset.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-#include <linux/linkage.h>
-
-#define SMALL	7 /* Must be at least 6 to deal with alignment/loop issues.  */
-
-ENTRY_CFI(memset)
-	mov_s	r4,r0
-	or	r12,r0,r2
-	bmsk.f	r12,r12,1
-	extb_s	r1,r1
-	asl	r3,r1,8
-	beq.d	.Laligned
-	or_s	r1,r1,r3
-	brls	r2,SMALL,.Ltiny
-	add	r3,r2,r0
-	stb	r1,[r3,-1]
-	bclr_s	r3,r3,0
-	stw	r1,[r3,-2]
-	bmsk.f	r12,r0,1
-	add_s	r2,r2,r12
-	sub.ne	r2,r2,4
-	stb.ab	r1,[r4,1]
-	and	r4,r4,-2
-	stw.ab	r1,[r4,2]
-	and	r4,r4,-4
-.Laligned:	; This code address should be aligned for speed.
-	asl	r3,r1,16
-	lsr.f	lp_count,r2,2
-	or_s	r1,r1,r3
-	lpne	.Loop_end
-	st.ab	r1,[r4,4]
-.Loop_end:
-	j_s	[blink]
-
-	.balign	4
-.Ltiny:
-	mov.f	lp_count,r2
-	lpne	.Ltiny_end
-	stb.ab	r1,[r4,1]
-.Ltiny_end:
-	j_s	[blink]
-END_CFI(memset)
-
-; memzero: @r0 = mem, @r1 = size_t
-; memset:  @r0 = mem, @r1 = char, @r2 = size_t
-
-ENTRY_CFI(memzero)
-    ; adjust bzero args to memset args
-    mov r2, r1
-    mov r1, 0
-    b  memset    ;tail call so need to tinker with blink
-END_CFI(memzero)
diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S
deleted file mode 100644
index d52e2833f9ed73ad702870bf400c16b26c0b5929..0000000000000000000000000000000000000000
--- a/arch/arc/lib/strchr-700.S
+++ /dev/null
@@ -1,130 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-/* ARC700 has a relatively long pipeline and branch prediction, so we want
-   to avoid branches that are hard to predict.  On the other hand, the
-   presence of the norm instruction makes it easier to operate on whole
-   words branch-free.  */
-
-#include <linux/linkage.h>
-
-ENTRY_CFI(strchr)
-	extb_s	r1,r1
-	asl	r5,r1,8
-	bmsk	r2,r0,1
-	or	r5,r5,r1
-	mov_s	r3,0x01010101
-	breq.d	r2,r0,.Laligned
-	asl	r4,r5,16
-	sub_s	r0,r0,r2
-	asl	r7,r2,3
-	ld_s	r2,[r0]
-#ifdef __LITTLE_ENDIAN__
-	asl	r7,r3,r7
-#else
-	lsr	r7,r3,r7
-#endif
-	or	r5,r5,r4
-	ror	r4,r3
-	sub	r12,r2,r7
-	bic_s	r12,r12,r2
-	and	r12,r12,r4
-	brne.d	r12,0,.Lfound0_ua
-	xor	r6,r2,r5
-	ld.a	r2,[r0,4]
-	sub	r12,r6,r7
-	bic	r12,r12,r6
-#ifdef __LITTLE_ENDIAN__
-	and	r7,r12,r4
-	breq	r7,0,.Loop ; For speed, we want this branch to be unaligned.
-	b	.Lfound_char ; Likewise this one.
-#else
-	and	r12,r12,r4
-	breq	r12,0,.Loop ; For speed, we want this branch to be unaligned.
-	lsr_s	r12,r12,7
-	bic 	r2,r7,r6
-	b.d	.Lfound_char_b
-	and_s	r2,r2,r12
-#endif
-; /* We require this code address to be unaligned for speed...  */
-.Laligned:
-	ld_s	r2,[r0]
-	or	r5,r5,r4
-	ror	r4,r3
-; /* ... so that this code address is aligned, for itself and ...  */
-.Loop:
-	sub	r12,r2,r3
-	bic_s	r12,r12,r2
-	and	r12,r12,r4
-	brne.d	r12,0,.Lfound0
-	xor	r6,r2,r5
-	ld.a	r2,[r0,4]
-	sub	r12,r6,r3
-	bic	r12,r12,r6
-	and	r7,r12,r4
-	breq	r7,0,.Loop /* ... so that this branch is unaligned.  */
-	; Found searched-for character.  r0 has already advanced to next word.
-#ifdef __LITTLE_ENDIAN__
-/* We only need the information about the first matching byte
-   (i.e. the least significant matching byte) to be exact,
-   hence there is no problem with carry effects.  */
-.Lfound_char:
-	sub	r3,r7,1
-	bic	r3,r3,r7
-	norm	r2,r3
-	sub_s	r0,r0,1
-	asr_s	r2,r2,3
-	j.d	[blink]
-	sub_s	r0,r0,r2
-
-	.balign	4
-.Lfound0_ua:
-	mov	r3,r7
-.Lfound0:
-	sub	r3,r6,r3
-	bic	r3,r3,r6
-	and	r2,r3,r4
-	or_s	r12,r12,r2
-	sub_s	r3,r12,1
-	bic_s	r3,r3,r12
-	norm	r3,r3
-	add_s	r0,r0,3
-	asr_s	r12,r3,3
-	asl.f	0,r2,r3
-	sub_s	r0,r0,r12
-	j_s.d	[blink]
-	mov.pl	r0,0
-#else /* BIG ENDIAN */
-.Lfound_char:
-	lsr	r7,r7,7
-
-	bic	r2,r7,r6
-.Lfound_char_b:
-	norm	r2,r2
-	sub_s	r0,r0,4
-	asr_s	r2,r2,3
-	j.d	[blink]
-	add_s	r0,r0,r2
-
-.Lfound0_ua:
-	mov_s	r3,r7
-.Lfound0:
-	asl_s	r2,r2,7
-	or	r7,r6,r4
-	bic_s	r12,r12,r2
-	sub	r2,r7,r3
-	or	r2,r2,r6
-	bic	r12,r2,r12
-	bic.f	r3,r4,r12
-	norm	r3,r3
-
-	add.pl	r3,r3,1
-	asr_s	r12,r3,3
-	asl.f	0,r2,r3
-	add_s	r0,r0,r12
-	j_s.d	[blink]
-	mov.mi	r0,0
-#endif /* ENDIAN */
-END_CFI(strchr)
diff --git a/arch/arc/lib/strcmp-archs.S b/arch/arc/lib/strcmp-archs.S
deleted file mode 100644
index 7cffb37174408b2a43724c9892b80104c6c6a3b5..0000000000000000000000000000000000000000
--- a/arch/arc/lib/strcmp-archs.S
+++ /dev/null
@@ -1,75 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
- */
-
-#include <linux/linkage.h>
-
-ENTRY_CFI(strcmp)
-	or	r2, r0, r1
-	bmsk_s	r2, r2, 1
-	brne	r2, 0, @.Lcharloop
-
-;;; s1 and s2 are word aligned
-	ld.ab	r2, [r0, 4]
-
-	mov_s	r12, 0x01010101
-	ror	r11, r12
-	.align  4
-.LwordLoop:
-	ld.ab	r3, [r1, 4]
-	;; Detect NULL char in str1
-	sub	r4, r2, r12
-	ld.ab	r5, [r0, 4]
-	bic	r4, r4, r2
-	and	r4, r4, r11
-	brne.d.nt	r4, 0, .LfoundNULL
-	;; Check if the read locations are the same
-	cmp	r2, r3
-	beq.d	.LwordLoop
-	mov.eq	r2, r5
-
-	;; A match is found, spot it out
-#ifdef __LITTLE_ENDIAN__
-	swape	r3, r3
-	mov_s	r0, 1
-	swape	r2, r2
-#else
-	mov_s	r0, 1
-#endif
-	cmp_s	r2, r3
-	j_s.d	[blink]
-	bset.lo	r0, r0, 31
-
-	.align 4
-.LfoundNULL:
-#ifdef __BIG_ENDIAN__
-	swape	r4, r4
-	swape	r2, r2
-	swape	r3, r3
-#endif
-	;; Find null byte
-	ffs	r0, r4
-	bmsk	r2, r2, r0
-	bmsk	r3, r3, r0
-	swape	r2, r2
-	swape	r3, r3
-	;; make the return value
-	sub.f	r0, r2, r3
-	mov.hi	r0, 1
-	j_s.d	[blink]
-	bset.lo	r0, r0, 31
-
-	.align 4
-.Lcharloop:
-	ldb.ab	r2, [r0, 1]
-	ldb.ab	r3, [r1, 1]
-	nop
-	breq	r2, 0, .Lcmpend
-	breq	r2, r3, .Lcharloop
-
-	.align 4
-.Lcmpend:
-	j_s.d	[blink]
-	sub	r0, r2, r3
-END_CFI(strcmp)
diff --git a/arch/arc/lib/strcmp.S b/arch/arc/lib/strcmp.S
deleted file mode 100644
index b20c98fb3b2382003e87a1fb395cc6abfe65e642..0000000000000000000000000000000000000000
--- a/arch/arc/lib/strcmp.S
+++ /dev/null
@@ -1,93 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-/* This is optimized primarily for the ARC700.
-   It would be possible to speed up the loops by one cycle / word
-   respective one cycle / byte by forcing double source 1 alignment, unrolling
-   by a factor of two, and speculatively loading the second word / byte of
-   source 1; however, that would increase the overhead for loop setup / finish,
-   and strcmp might often terminate early.  */
-
-#include <linux/linkage.h>
-
-ENTRY_CFI(strcmp)
-	or	r2,r0,r1
-	bmsk_s	r2,r2,1
-	brne	r2,0,.Lcharloop
-	mov_s	r12,0x01010101
-	ror	r5,r12
-.Lwordloop:
-	ld.ab	r2,[r0,4]
-	ld.ab	r3,[r1,4]
-	nop_s
-	sub	r4,r2,r12
-	bic	r4,r4,r2
-	and	r4,r4,r5
-	brne	r4,0,.Lfound0
-	breq	r2,r3,.Lwordloop
-#ifdef	__LITTLE_ENDIAN__
-	xor	r0,r2,r3	; mask for difference
-	sub_s	r1,r0,1
-	bic_s	r0,r0,r1	; mask for least significant difference bit
-	sub	r1,r5,r0
-	xor	r0,r5,r1	; mask for least significant difference byte
-	and_s	r2,r2,r0
-	and_s	r3,r3,r0
-#endif /* LITTLE ENDIAN */
-	cmp_s	r2,r3
-	mov_s	r0,1
-	j_s.d	[blink]
-	bset.lo	r0,r0,31
-
-	.balign	4
-#ifdef __LITTLE_ENDIAN__
-.Lfound0:
-	xor	r0,r2,r3	; mask for difference
-	or	r0,r0,r4	; or in zero indicator
-	sub_s	r1,r0,1
-	bic_s	r0,r0,r1	; mask for least significant difference bit
-	sub	r1,r5,r0
-	xor	r0,r5,r1	; mask for least significant difference byte
-	and_s	r2,r2,r0
-	and_s	r3,r3,r0
-	sub.f	r0,r2,r3
-	mov.hi	r0,1
-	j_s.d	[blink]
-	bset.lo	r0,r0,31
-#else /* BIG ENDIAN */
-	/* The zero-detection above can mis-detect 0x01 bytes as zeroes
-	   because of carry-propagateion from a lower significant zero byte.
-	   We can compensate for this by checking that bit0 is zero.
-	   This compensation is not necessary in the step where we
-	   get a low estimate for r2, because in any affected bytes
-	   we already have 0x00 or 0x01, which will remain unchanged
-	   when bit 7 is cleared.  */
-	.balign	4
-.Lfound0:
-	lsr	r0,r4,8
-	lsr_s	r1,r2
-	bic_s	r2,r2,r0	; get low estimate for r2 and get ...
-	bic_s	r0,r0,r1	; <this is the adjusted mask for zeros>
-	or_s	r3,r3,r0	; ... high estimate r3 so that r2 > r3 will ...
-	cmp_s	r3,r2		; ... be independent of trailing garbage
-	or_s	r2,r2,r0	; likewise for r3 > r2
-	bic_s	r3,r3,r0
-	rlc	r0,0		; r0 := r2 > r3 ? 1 : 0
-	cmp_s	r2,r3
-	j_s.d	[blink]
-	bset.lo	r0,r0,31
-#endif /* ENDIAN */
-
-	.balign	4
-.Lcharloop:
-	ldb.ab	r2,[r0,1]
-	ldb.ab	r3,[r1,1]
-	nop_s
-	breq	r2,0,.Lcmpend
-	breq	r2,r3,.Lcharloop
-.Lcmpend:
-	j_s.d	[blink]
-	sub	r0,r2,r3
-END_CFI(strcmp)
diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S
deleted file mode 100644
index 6e2294d13e2f1f9105b6524c7b52950e20ed906e..0000000000000000000000000000000000000000
--- a/arch/arc/lib/strcpy-700.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-/* If dst and src are 4 byte aligned, copy 8 bytes at a time.
-   If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
-   it 8 byte aligned.  Thus, we can do a little read-ahead, without
-   dereferencing a cache line that we should not touch.
-   Note that short and long instructions have been scheduled to avoid
-   branch stalls.
-   The beq_s to r3z could be made unaligned & long to avoid a stall
-   there, but the it is not likely to be taken often, and it
-   would also be likey to cost an unaligned mispredict at the next call.  */
-
-#include <linux/linkage.h>
-
-ENTRY_CFI(strcpy)
-	or	r2,r0,r1
-	bmsk_s	r2,r2,1
-	brne.d	r2,0,charloop
-	mov_s	r10,r0
-	ld_s	r3,[r1,0]
-	mov	r8,0x01010101
-	bbit0.d	r1,2,loop_start
-	ror	r12,r8
-	sub	r2,r3,r8
-	bic_s	r2,r2,r3
-	tst_s	r2,r12
-	bne	r3z
-	mov_s	r4,r3
-	.balign 4
-loop:
-	ld.a	r3,[r1,4]
-	st.ab	r4,[r10,4]
-loop_start:
-	ld.a	r4,[r1,4]
-	sub	r2,r3,r8
-	bic_s	r2,r2,r3
-	tst_s	r2,r12
-	bne_s	r3z
-	st.ab	r3,[r10,4]
-	sub	r2,r4,r8
-	bic	r2,r2,r4
-	tst	r2,r12
-	beq	loop
-	mov_s	r3,r4
-#ifdef __LITTLE_ENDIAN__
-r3z:	bmsk.f	r1,r3,7
-	lsr_s	r3,r3,8
-#else
-r3z:	lsr.f	r1,r3,24
-	asl_s	r3,r3,8
-#endif
-	bne.d	r3z
-	stb.ab	r1,[r10,1]
-	j_s	[blink]
-
-	.balign	4
-charloop:
-	ldb.ab	r3,[r1,1]
-
-
-	brne.d	r3,0,charloop
-	stb.ab	r3,[r10,1]
-	j	[blink]
-END_CFI(strcpy)
diff --git a/arch/arc/lib/strlen.S b/arch/arc/lib/strlen.S
deleted file mode 100644
index dae428ceb87af8449f1ad1e7bedcd8365cd8a64d..0000000000000000000000000000000000000000
--- a/arch/arc/lib/strlen.S
+++ /dev/null
@@ -1,80 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-#include <linux/linkage.h>
-
-ENTRY_CFI(strlen)
-	or	r3,r0,7
-	ld	r2,[r3,-7]
-	ld.a	r6,[r3,-3]
-	mov	r4,0x01010101
-	; uses long immediate
-#ifdef __LITTLE_ENDIAN__
-	asl_s	r1,r0,3
-	btst_s	r0,2
-	asl	r7,r4,r1
-	ror	r5,r4
-	sub	r1,r2,r7
-	bic_s	r1,r1,r2
-	mov.eq	r7,r4
-	sub	r12,r6,r7
-	bic	r12,r12,r6
-	or.eq	r12,r12,r1
-	and	r12,r12,r5
-	brne	r12,0,.Learly_end
-#else /* BIG ENDIAN */
-	ror	r5,r4
-	btst_s	r0,2
-	mov_s	r1,31
-	sub3	r7,r1,r0
-	sub	r1,r2,r4
-	bic_s	r1,r1,r2
-	bmsk	r1,r1,r7
-	sub	r12,r6,r4
-	bic	r12,r12,r6
-	bmsk.ne	r12,r12,r7
-	or.eq	r12,r12,r1
-	and	r12,r12,r5
-	brne	r12,0,.Learly_end
-#endif /* ENDIAN */
-
-.Loop:
-	ld_s	r2,[r3,4]
-	ld.a	r6,[r3,8]
-	; stall for load result
-	sub	r1,r2,r4
-	bic_s	r1,r1,r2
-	sub	r12,r6,r4
-	bic	r12,r12,r6
-	or	r12,r12,r1
-	and	r12,r12,r5
-	breq r12,0,.Loop
-.Lend:
-	and.f	r1,r1,r5
-	sub.ne	r3,r3,4
-	mov.eq	r1,r12
-#ifdef __LITTLE_ENDIAN__
-	sub_s	r2,r1,1
-	bic_s	r2,r2,r1
-	norm	r1,r2
-	sub_s	r0,r0,3
-	lsr_s	r1,r1,3
-	sub	    r0,r3,r0
-	j_s.d	[blink]
-	sub	    r0,r0,r1
-#else /* BIG ENDIAN */
-	lsr_s	r1,r1,7
-	mov.eq	r2,r6
-	bic_s	r1,r1,r2
-	norm	r1,r1
-	sub	    r0,r3,r0
-	lsr_s	r1,r1,3
-	j_s.d	[blink]
-	add	    r0,r0,r1
-#endif /* ENDIAN */
-.Learly_end:
-	b.d	.Lend
-	sub_s.ne r1,r1,r1
-END_CFI(strlen)
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
deleted file mode 100644
index c55d95dd2f3949f0fae10451ce8e02691ca03d9a..0000000000000000000000000000000000000000
--- a/arch/arc/mm/tlbex.S
+++ /dev/null
@@ -1,413 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * TLB Exception Handling for ARC
- *
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * Vineetg: April 2011 :
- *  -MMU v1: moved out legacy code into a seperate file
- *  -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore,
- *      helps avoid a shift when preparing PD0 from PTE
- *
- * Vineetg: July 2009
- *  -For MMU V2, we need not do heuristics at the time of commiting a D-TLB
- *   entry, so that it doesn't knock out it's I-TLB entry
- *  -Some more fine tuning:
- *   bmsk instead of add, asl.cc instead of branch, delay slot utilise etc
- *
- * Vineetg: July 2009
- *  -Practically rewrote the I/D TLB Miss handlers
- *   Now 40 and 135 instructions a peice as compared to 131 and 449 resp.
- *   Hence Leaner by 1.5 K
- *   Used Conditional arithmetic to replace excessive branching
- *   Also used short instructions wherever possible
- *
- * Vineetg: Aug 13th 2008
- *  -Passing ECR (Exception Cause REG) to do_page_fault( ) for printing
- *   more information in case of a Fatality
- *
- * Vineetg: March 25th Bug #92690
- *  -Added Debug Code to check if sw-ASID == hw-ASID
-
- * Rahul Trivedi, Amit Bhor: Codito Technologies 2004
- */
-
-#include <linux/linkage.h>
-#include <asm/entry.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/arcregs.h>
-#include <asm/cache.h>
-#include <asm/processor.h>
-#include <asm/tlb-mmu1.h>
-
-#ifdef CONFIG_ISA_ARCOMPACT
-;-----------------------------------------------------------------
-; ARC700 Exception Handling doesn't auto-switch stack and it only provides
-; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0"
-;
-; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a
-; "global" is used to free-up FIRST core reg to be able to code the rest of
-; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe).
-; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3
-; need to be saved as well by extending the "global" to be 4 words. Hence
-;	".size   ex_saved_reg1, 16"
-; [All of this dance is to avoid stack switching for each TLB Miss, since we
-; only need to save only a handful of regs, as opposed to complete reg file]
-;
-; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST
-; core reg as it will not be SMP safe.
-; Thus scratch AUX reg is used (and no longer used to cache task PGD).
-; To save the rest of 3 regs - per cpu, the global is made "per-cpu".
-; Epilogue thus has to locate the "per-cpu" storage for regs.
-; To avoid cache line bouncing the per-cpu global is aligned/sized per
-; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence
-;	".size   ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)"
-
-; As simple as that....
-;--------------------------------------------------------------------------
-
-; scratch memory to save [r0-r3] used to code TLB refill Handler
-ARCFP_DATA ex_saved_reg1
-	.align 1 << L1_CACHE_SHIFT
-	.type   ex_saved_reg1, @object
-#ifdef CONFIG_SMP
-	.size   ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)
-ex_saved_reg1:
-	.zero (CONFIG_NR_CPUS << L1_CACHE_SHIFT)
-#else
-	.size   ex_saved_reg1, 16
-ex_saved_reg1:
-	.zero 16
-#endif
-
-.macro TLBMISS_FREEUP_REGS
-#ifdef CONFIG_SMP
-	sr  r0, [ARC_REG_SCRATCH_DATA0]	; freeup r0 to code with
-	GET_CPU_ID  r0			; get to per cpu scratch mem,
-	asl r0, r0, L1_CACHE_SHIFT	; cache line wide per cpu
-	add r0, @ex_saved_reg1, r0
-#else
-	st    r0, [@ex_saved_reg1]
-	mov_s r0, @ex_saved_reg1
-#endif
-	st_s  r1, [r0, 4]
-	st_s  r2, [r0, 8]
-	st_s  r3, [r0, 12]
-
-	; VERIFY if the ASID in MMU-PID Reg is same as
-	; one in Linux data structures
-
-	tlb_paranoid_check_asm
-.endm
-
-.macro TLBMISS_RESTORE_REGS
-#ifdef CONFIG_SMP
-	GET_CPU_ID  r0			; get to per cpu scratch mem
-	asl r0, r0, L1_CACHE_SHIFT	; each is cache line wide
-	add r0, @ex_saved_reg1, r0
-	ld_s  r3, [r0,12]
-	ld_s  r2, [r0, 8]
-	ld_s  r1, [r0, 4]
-	lr    r0, [ARC_REG_SCRATCH_DATA0]
-#else
-	mov_s r0, @ex_saved_reg1
-	ld_s  r3, [r0,12]
-	ld_s  r2, [r0, 8]
-	ld_s  r1, [r0, 4]
-	ld_s  r0, [r0]
-#endif
-.endm
-
-#else	/* ARCv2 */
-
-.macro TLBMISS_FREEUP_REGS
-	PUSH  r0
-	PUSH  r1
-	PUSH  r2
-	PUSH  r3
-.endm
-
-.macro TLBMISS_RESTORE_REGS
-	POP   r3
-	POP   r2
-	POP   r1
-	POP   r0
-.endm
-
-#endif
-
-;============================================================================
-;  Troubleshooting Stuff
-;============================================================================
-
-; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid
-; When Creating TLB Entries, instead of doing 3 dependent loads from memory,
-; we use the MMU PID Reg to get current ASID.
-; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble.
-; So we try to detect this in TLB Mis shandler
-
-.macro tlb_paranoid_check_asm
-
-#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-
-	GET_CURR_TASK_ON_CPU  r3
-	ld r0, [r3, TASK_ACT_MM]
-	ld r0, [r0, MM_CTXT+MM_CTXT_ASID]
-	breq r0, 0, 55f	; Error if no ASID allocated
-
-	lr r1, [ARC_REG_PID]
-	and r1, r1, 0xFF
-
-	and r2, r0, 0xFF	; MMU PID bits only for comparison
-	breq r1, r2, 5f
-
-55:
-	; Error if H/w and S/w ASID don't match, but NOT if in kernel mode
-	lr  r2, [erstatus]
-	bbit0 r2, STATUS_U_BIT, 5f
-
-	; We sure are in troubled waters, Flag the error, but to do so
-	; need to switch to kernel mode stack to call error routine
-	GET_TSK_STACK_BASE   r3, sp
-
-	; Call printk to shoutout aloud
-	mov r2, 1
-	j print_asid_mismatch
-
-5:	; ASIDs match so proceed normally
-	nop
-
-#endif
-
-.endm
-
-;============================================================================
-;TLB Miss handling Code
-;============================================================================
-
-;-----------------------------------------------------------------------------
-; This macro does the page-table lookup for the faulting address.
-; OUT: r0 = PTE faulted on, r1 = ptr to PTE, r2 = Faulting V-address
-.macro LOAD_FAULT_PTE
-
-	lr  r2, [efa]
-
-#ifndef CONFIG_SMP
-	lr  r1, [ARC_REG_SCRATCH_DATA0] ; current pgd
-#else
-	GET_CURR_TASK_ON_CPU  r1
-	ld  r1, [r1, TASK_ACT_MM]
-	ld  r1, [r1, MM_PGD]
-#endif
-
-	lsr     r0, r2, PGDIR_SHIFT     ; Bits for indexing into PGD
-	ld.as   r3, [r1, r0]            ; PGD entry corresp to faulting addr
-	tst	r3, r3
-	bz	do_slow_path_pf         ; if no Page Table, do page fault
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	and.f	0, r3, _PAGE_HW_SZ	; Is this Huge PMD (thp)
-	add2.nz	r1, r1, r0
-	bnz.d	2f		; YES: PGD == PMD has THP PTE: stop pgd walk
-	mov.nz	r0, r3
-
-#endif
-	and	r1, r3, PAGE_MASK
-
-	; Get the PTE entry: The idea is
-	; (1) x = addr >> PAGE_SHIFT 	-> masks page-off bits from @fault-addr
-	; (2) y = x & (PTRS_PER_PTE - 1) -> to get index
-	; (3) z = (pgtbl + y * 4)
-
-#ifdef CONFIG_ARC_HAS_PAE40
-#define PTE_SIZE_LOG	3	/* 8 == 2 ^ 3 */
-#else
-#define PTE_SIZE_LOG	2	/* 4 == 2 ^ 2 */
-#endif
-
-	; multiply in step (3) above avoided by shifting lesser in step (1)
-	lsr     r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG )
-	and     r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG )
-	ld.aw   r0, [r1, r0]            ; r0: PTE (lower word only for PAE40)
-					; r1: PTE ptr
-
-2:
-
-.endm
-
-;-----------------------------------------------------------------
-; Convert Linux PTE entry into TLB entry
-; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu
-;    (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI])
-; IN: r0 = PTE, r1 = ptr to PTE
-
-.macro CONV_PTE_TO_TLB
-	and    r3, r0, PTE_BITS_RWX	;          r  w  x
-	asl    r2, r3, 3		; Kr Kw Kx 0  0  0 (GLOBAL, kernel only)
-	and.f  0,  r0, _PAGE_GLOBAL
-	or.z   r2, r2, r3		; Kr Kw Kx Ur Uw Ux (!GLOBAL, user page)
-
-	and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE
-	or  r3, r3, r2
-
-	sr  r3, [ARC_REG_TLBPD1]    	; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C
-#ifdef	CONFIG_ARC_HAS_PAE40
-	ld	r3, [r1, 4]		; paddr[39..32]
-	sr	r3, [ARC_REG_TLBPD1HI]
-#endif
-
-	and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb
-
-	lr  r3,[ARC_REG_TLBPD0]     ; MMU prepares PD0 with vaddr and asid
-
-	or  r3, r3, r2              ; S | vaddr | {sasid|asid}
-	sr  r3,[ARC_REG_TLBPD0]     ; rewrite PD0
-.endm
-
-;-----------------------------------------------------------------
-; Commit the TLB entry into MMU
-
-.macro COMMIT_ENTRY_TO_MMU
-#if (CONFIG_ARC_MMU_VER < 4)
-
-#ifdef CONFIG_EZNPS_MTM_EXT
-	/* verify if entry for this vaddr+ASID already exists */
-	sr    TLBProbe, [ARC_REG_TLBCOMMAND]
-	lr    r0, [ARC_REG_TLBINDEX]
-	bbit0 r0, 31, 88f
-#endif
-
-	/* Get free TLB slot: Set = computed from vaddr, way = random */
-	sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
-
-	/* Commit the Write */
-#if (CONFIG_ARC_MMU_VER >= 2)   /* introduced in v2 */
-	sr TLBWriteNI, [ARC_REG_TLBCOMMAND]
-#else
-	sr TLBWrite, [ARC_REG_TLBCOMMAND]
-#endif
-
-#else
-	sr TLBInsertEntry, [ARC_REG_TLBCOMMAND]
-#endif
-
-88:
-.endm
-
-
-ARCFP_CODE	;Fast Path Code, candidate for ICCM
-
-;-----------------------------------------------------------------------------
-; I-TLB Miss Exception Handler
-;-----------------------------------------------------------------------------
-
-ENTRY(EV_TLBMissI)
-
-	TLBMISS_FREEUP_REGS
-
-	;----------------------------------------------------------------
-	; Get the PTE corresponding to V-addr accessed, r2 is setup with EFA
-	LOAD_FAULT_PTE
-
-	;----------------------------------------------------------------
-	; VERIFY_PTE: Check if PTE permissions approp for executing code
-	cmp_s   r2, VMALLOC_START
-	mov_s   r2, (_PAGE_PRESENT | _PAGE_EXECUTE)
-	or.hs   r2, r2, _PAGE_GLOBAL
-
-	and     r3, r0, r2  ; Mask out NON Flag bits from PTE
-	xor.f   r3, r3, r2  ; check ( ( pte & flags_test ) == flags_test )
-	bnz     do_slow_path_pf
-
-	; Let Linux VM know that the page was accessed
-	or      r0, r0, _PAGE_ACCESSED  ; set Accessed Bit
-	st_s    r0, [r1]                ; Write back PTE
-
-	CONV_PTE_TO_TLB
-	COMMIT_ENTRY_TO_MMU
-	TLBMISS_RESTORE_REGS
-EV_TLBMissI_fast_ret:	; additional label for VDK OS-kit instrumentation
-	rtie
-
-END(EV_TLBMissI)
-
-;-----------------------------------------------------------------------------
-; D-TLB Miss Exception Handler
-;-----------------------------------------------------------------------------
-
-ENTRY(EV_TLBMissD)
-
-	TLBMISS_FREEUP_REGS
-
-	;----------------------------------------------------------------
-	; Get the PTE corresponding to V-addr accessed
-	; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE, r2 = EFA
-	LOAD_FAULT_PTE
-
-	;----------------------------------------------------------------
-	; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W)
-
-	cmp_s	r2, VMALLOC_START
-	mov_s   r2, _PAGE_PRESENT	; common bit for K/U PTE
-	or.hs	r2, r2, _PAGE_GLOBAL	; kernel PTE only
-
-	; Linux PTE [RWX] bits are semantically overloaded:
-	; -If PAGE_GLOBAL set, they refer to kernel-only flags (vmalloc)
-	; -Otherwise they are user-mode permissions, and those are exactly
-	;  same for kernel mode as well (e.g. copy_(to|from)_user)
-
-	lr      r3, [ecr]
-	btst_s  r3, ECR_C_BIT_DTLB_LD_MISS	; Read Access
-	or.nz   r2, r2, _PAGE_READ      	; chk for Read flag in PTE
-	btst_s  r3, ECR_C_BIT_DTLB_ST_MISS	; Write Access
-	or.nz   r2, r2, _PAGE_WRITE     	; chk for Write flag in PTE
-	; Above laddering takes care of XCHG access (both R and W)
-
-	; By now, r2 setup with all the Flags we need to check in PTE
-	and     r3, r0, r2              ; Mask out NON Flag bits from PTE
-	brne.d  r3, r2, do_slow_path_pf ; is ((pte & flags_test) == flags_test)
-
-	;----------------------------------------------------------------
-	; UPDATE_PTE: Let Linux VM know that page was accessed/dirty
-	lr      r3, [ecr]
-	or      r0, r0, _PAGE_ACCESSED        ; Accessed bit always
-	btst_s  r3,  ECR_C_BIT_DTLB_ST_MISS   ; See if it was a Write Access ?
-	or.nz   r0, r0, _PAGE_DIRTY           ; if Write, set Dirty bit as well
-	st_s    r0, [r1]                      ; Write back PTE
-
-	CONV_PTE_TO_TLB
-
-#if (CONFIG_ARC_MMU_VER == 1)
-	; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of
-	; memcpy where 3 parties contend for 2 ways, ensuing a livelock.
-	; But only for old MMU or one with Metal Fix
-	TLB_WRITE_HEURISTICS
-#endif
-
-	COMMIT_ENTRY_TO_MMU
-	TLBMISS_RESTORE_REGS
-EV_TLBMissD_fast_ret:	; additional label for VDK OS-kit instrumentation
-	rtie
-
-;-------- Common routine to call Linux Page Fault Handler -----------
-do_slow_path_pf:
-
-#ifdef CONFIG_ISA_ARCV2
-	; Set Z flag if exception in U mode. Hardware micro-ops do this on any
-	; taken interrupt/exception, and thus is already the case at the entry
-	; above, but ensuing code would have already clobbered.
-	; EXCEPTION_PROLOGUE called in slow path, relies on correct Z flag set
-
-	lr	r2, [erstatus]
-	and	r2, r2, STATUS_U_MASK
-	bxor.f	0, r2, STATUS_U_BIT
-#endif
-
-	; Restore the 4-scratch regs saved by fast path miss handler
-	TLBMISS_RESTORE_REGS
-
-	; Slow path TLB Miss handled as a regular ARC Exception
-	; (stack switching / save the complete reg-file).
-	b  call_do_page_fault
-END(EV_TLBMissD)
diff --git a/arch/arc/plat-eznps/entry.S b/arch/arc/plat-eznps/entry.S
deleted file mode 100644
index 3f18c0108e7287b76b73080095d24a0437a82a74..0000000000000000000000000000000000000000
--- a/arch/arc/plat-eznps/entry.S
+++ /dev/null
@@ -1,60 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*******************************************************************************
-
-  EZNPS CPU startup Code
-  Copyright(c) 2012 EZchip Technologies.
-
-
-*******************************************************************************/
-#include <linux/linkage.h>
-#include <asm/entry.h>
-#include <asm/cache.h>
-#include <plat/ctop.h>
-
-	.cpu A7
-
-	.section .init.text, "ax",@progbits
-	.align 1024	; HW requierment for restart first PC
-
-ENTRY(res_service)
-#if defined(CONFIG_EZNPS_MTM_EXT) && defined(CONFIG_EZNPS_SHARED_AUX_REGS)
-	; There is no work for HW thread id != 0
-	lr	r3, [CTOP_AUX_THREAD_ID]
-	cmp	r3, 0
-	jne	stext
-#endif
-
-#ifdef CONFIG_ARC_HAS_DCACHE
-	; With no cache coherency mechanism D$ need to be used very carefully.
-	; Address space:
-	; 0G-2G: We disable CONFIG_ARC_CACHE_PAGES.
-	; 2G-3G: We disable D$ by setting this bit.
-	; 3G-4G: D$ is disabled by architecture.
-	; FMT are huge pages for user application reside at 0-2G.
-	; Only FMT left as one who can use D$ where each such page got
-	; disable/enable bit for cachability.
-	; Programmer will use FMT pages for private data so cache coherency
-	; would not be a problem.
-	; First thing we invalidate D$
-	sr	1, [ARC_REG_DC_IVDC]
-	sr	HW_COMPLY_KRN_NOT_D_CACHED, [CTOP_AUX_HW_COMPLY]
-#endif
-
-#ifdef CONFIG_SMP
-	; We set logical cpuid to be used by GET_CPUID
-	; We do not use physical cpuid since we want ids to be continious when
-	; it comes to cpus on the same quad cluster.
-	; This is useful for applications that used shared resources of a quad
-	; cluster such SRAMS.
-	lr 	r3, [CTOP_AUX_CORE_ID]
-	sr	r3, [CTOP_AUX_LOGIC_CORE_ID]
-	lr	r3, [CTOP_AUX_CLUSTER_ID]
-	; Set logical is acheived by swap of 2 middle bits of cluster id (4 bit)
-	; r3 is used since we use short instruction and we need q-class reg
-	.short	CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST
-	.word 	CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM
-	 sr	r3, [CTOP_AUX_LOGIC_CLUSTER_ID]
-#endif
-
-	j	stext
-END(res_service)
diff --git a/arch/arm/boot/bootp/init.S b/arch/arm/boot/bootp/init.S
deleted file mode 100644
index 5c476bd2b4ce9c9bf9c7a37b684e20ddf46135f3..0000000000000000000000000000000000000000
--- a/arch/arm/boot/bootp/init.S
+++ /dev/null
@@ -1,85 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/boot/bootp/init.S
- *
- *  Copyright (C) 2000-2003 Russell King.
- *
- *  "Header" file for splitting kernel + initrd.  Note that we pass
- *  r0 through to r3 straight through.
- *
- *  This demonstrates how to append code to the start of the kernel
- *  zImage, and boot the kernel without copying it around.  This
- *  example would be simpler; if we didn't have an object of unknown
- *  size immediately following the kernel, we could build this into
- *  a binary blob, and concatenate the zImage using the cat command.
- */
-		.section .start,#alloc,#execinstr
-		.type	_start, #function
-		.globl	_start
-
-_start:		add	lr, pc, #-0x8		@ lr = current load addr
-		adr	r13, data
-		ldmia	r13!, {r4-r6}		@ r5 = dest, r6 = length
-		add	r4, r4, lr		@ r4 = initrd_start + load addr
-		bl	move			@ move the initrd
-
-/*
- * Setup the initrd parameters to pass to the kernel.  This can only be
- * passed in via the tagged list.
- */
-		ldmia	r13, {r5-r9}		@ get size and addr of initrd
-						@ r5 = ATAG_CORE
-						@ r6 = ATAG_INITRD2
-						@ r7 = initrd start
-						@ r8 = initrd end
-						@ r9 = param_struct address
-
-		ldr	r10, [r9, #4]		@ get first tag
-		teq	r10, r5			@ is it ATAG_CORE?
-/*
- * If we didn't find a valid tag list, create a dummy ATAG_CORE entry.
- */
-		movne	r10, #0			@ terminator
-		movne	r4, #2			@ Size of this entry (2 words)
-		stmiane	r9, {r4, r5, r10}	@ Size, ATAG_CORE, terminator
-
-/*
- * find the end of the tag list, and then add an INITRD tag on the end.
- * If there is already an INITRD tag, then we ignore it; the last INITRD
- * tag takes precedence.
- */
-taglist:	ldr	r10, [r9, #0]		@ tag length
-		teq	r10, #0			@ last tag (zero length)?
-		addne	r9, r9, r10, lsl #2
-		bne	taglist
-
-		mov	r5, #4			@ Size of initrd tag (4 words)
-		stmia	r9, {r5, r6, r7, r8, r10}
-		b	kernel_start		@ call kernel
-
-/*
- * Move the block of memory length r6 from address r4 to address r5
- */
-move:		ldmia	r4!, {r7 - r10}		@ move 32-bytes at a time
-		stmia	r5!, {r7 - r10}
-		ldmia	r4!, {r7 - r10}
-		stmia	r5!, {r7 - r10}
-		subs	r6, r6, #8 * 4
-		bcs	move
-		mov	pc, lr
-
-		.size	_start, . - _start
-
-		.align
-
-		.type	data,#object
-data:		.word	initrd_start		@ source initrd address
-		.word	initrd_phys		@ destination initrd address
-		.word	initrd_size		@ initrd size
-
-		.word	0x54410001		@ r5 = ATAG_CORE
-		.word	0x54420005		@ r6 = ATAG_INITRD2
-		.word	initrd_phys		@ r7
-		.word	initrd_size		@ r8
-		.word	params_phys		@ r9
-		.size	data, . - data
diff --git a/arch/arm/boot/bootp/initrd.S b/arch/arm/boot/bootp/initrd.S
deleted file mode 100644
index dd3d04971c42123581569003992286fe03e0e9ee..0000000000000000000000000000000000000000
--- a/arch/arm/boot/bootp/initrd.S
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.type	initrd_start,#object
-	.globl	initrd_start
-initrd_start:
-	.incbin	INITRD
-	.globl	initrd_end
-initrd_end:
diff --git a/arch/arm/boot/bootp/kernel.S b/arch/arm/boot/bootp/kernel.S
deleted file mode 100644
index dc6236c173d241267021eed2245434e2826ad077..0000000000000000000000000000000000000000
--- a/arch/arm/boot/bootp/kernel.S
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.globl	kernel_start
-kernel_start:
-	.incbin	"arch/arm/boot/zImage"
-	.globl	kernel_end
-kernel_end:
-	.align	2
diff --git a/arch/arm/boot/compressed/big-endian.S b/arch/arm/boot/compressed/big-endian.S
deleted file mode 100644
index 88e2a88d324b2535a6610c1a914e81ef3a010b8b..0000000000000000000000000000000000000000
--- a/arch/arm/boot/compressed/big-endian.S
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  linux/arch/arm/boot/compressed/big-endian.S
- *
- *  Switch CPU into big endian mode.
- *  Author: Nicolas Pitre
- */
-
-	.section ".start", #alloc, #execinstr
-
-	mrc	p15, 0, r0, c1, c0, 0	@ read control reg
-	orr	r0, r0, #(1 << 7)	@ enable big endian mode
-	mcr	p15, 0, r0, c1, c0, 0	@ write control reg
-
diff --git a/arch/arm/boot/compressed/debug.S b/arch/arm/boot/compressed/debug.S
deleted file mode 100644
index 6bf2917a46214f09215e35debe7ddfdc5df4d5d3..0000000000000000000000000000000000000000
--- a/arch/arm/boot/compressed/debug.S
+++ /dev/null
@@ -1,45 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#ifndef CONFIG_DEBUG_SEMIHOSTING
-
-#include CONFIG_DEBUG_LL_INCLUDE
-
-ENTRY(putc)
-	addruart r1, r2, r3
-	waituart r3, r1
-	senduart r0, r1
-	busyuart r3, r1
-	mov	 pc, lr
-ENDPROC(putc)
-
-#else
-
-ENTRY(putc)
-	adr	r1, 1f
-	ldmia	r1, {r2, r3}
-	add	r2, r2, r1
-	ldr	r1, [r2, r3]
-	strb	r0, [r1]
-	mov	r0, #0x03		@ SYS_WRITEC
-   ARM(	svc	#0x123456	)
-#ifdef CONFIG_CPU_V7M
- THUMB(	bkpt	#0xab		)
-#else
- THUMB(	svc	#0xab		)
-#endif
-	mov	pc, lr
-	.align	2
-1:	.word	_GLOBAL_OFFSET_TABLE_ - .
-	.word	semi_writec_buf(GOT)
-ENDPROC(putc)
-
-	.bss
-	.global	semi_writec_buf
-	.type   semi_writec_buf, %object
-semi_writec_buf:
-	.space	4
-	.size	semi_writec_buf, 4
-
-#endif
diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S
deleted file mode 100644
index a5983588f96b8cf8847edf5ea74fe798c7f6c74b..0000000000000000000000000000000000000000
--- a/arch/arm/boot/compressed/efi-header.S
+++ /dev/null
@@ -1,130 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013-2017 Linaro Ltd
- * Authors: Roy Franz <roy.franz@linaro.org>
- *          Ard Biesheuvel <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/pe.h>
-#include <linux/sizes.h>
-
-		.macro	__nop
-#ifdef CONFIG_EFI_STUB
-		@ This is almost but not quite a NOP, since it does clobber the
-		@ condition flags. But it is the best we can do for EFI, since
-		@ PE/COFF expects the magic string "MZ" at offset 0, while the
-		@ ARM/Linux boot protocol expects an executable instruction
-		@ there.
-		.inst	MZ_MAGIC | (0x1310 << 16)	@ tstne r0, #0x4d000
-#else
- AR_CLASS(	mov	r0, r0		)
-  M_CLASS(	nop.w			)
-#endif
-		.endm
-
-		.macro	__EFI_HEADER
-#ifdef CONFIG_EFI_STUB
-		.set	start_offset, __efi_start - start
-		.org	start + 0x3c
-		@
-		@ The PE header can be anywhere in the file, but for
-		@ simplicity we keep it together with the MSDOS header
-		@ The offset to the PE/COFF header needs to be at offset
-		@ 0x3C in the MSDOS header.
-		@ The only 2 fields of the MSDOS header that are used are this
-		@ PE/COFF offset, and the "MZ" bytes at offset 0x0.
-		@
-		.long	pe_header - start		@ Offset to the PE header.
-
-pe_header:
-		.long	PE_MAGIC
-
-coff_header:
-		.short	IMAGE_FILE_MACHINE_THUMB	@ Machine
-		.short	section_count			@ NumberOfSections
-		.long	0 				@ TimeDateStamp
-		.long	0				@ PointerToSymbolTable
-		.long	0				@ NumberOfSymbols
-		.short	section_table - optional_header	@ SizeOfOptionalHeader
-		.short	IMAGE_FILE_32BIT_MACHINE | \
-			IMAGE_FILE_DEBUG_STRIPPED | \
-			IMAGE_FILE_EXECUTABLE_IMAGE | \
-			IMAGE_FILE_LINE_NUMS_STRIPPED	@ Characteristics
-
-#define __pecoff_code_size (__pecoff_data_start - __efi_start)
-
-optional_header:
-		.short	PE_OPT_MAGIC_PE32		@ PE32 format
-		.byte	0x02				@ MajorLinkerVersion
-		.byte	0x14				@ MinorLinkerVersion
-		.long	__pecoff_code_size		@ SizeOfCode
-		.long	__pecoff_data_size		@ SizeOfInitializedData
-		.long	0				@ SizeOfUninitializedData
-		.long	efi_stub_entry - start		@ AddressOfEntryPoint
-		.long	start_offset			@ BaseOfCode
-		.long	__pecoff_data_start - start	@ BaseOfData
-
-extra_header_fields:
-		.long	0				@ ImageBase
-		.long	SZ_4K				@ SectionAlignment
-		.long	SZ_512				@ FileAlignment
-		.short	0				@ MajorOsVersion
-		.short	0				@ MinorOsVersion
-		.short	0				@ MajorImageVersion
-		.short	0				@ MinorImageVersion
-		.short	0				@ MajorSubsystemVersion
-		.short	0				@ MinorSubsystemVersion
-		.long	0				@ Win32VersionValue
-
-		.long	__pecoff_end - start		@ SizeOfImage
-		.long	start_offset			@ SizeOfHeaders
-		.long	0				@ CheckSum
-		.short	IMAGE_SUBSYSTEM_EFI_APPLICATION	@ Subsystem
-		.short	0				@ DllCharacteristics
-		.long	0				@ SizeOfStackReserve
-		.long	0				@ SizeOfStackCommit
-		.long	0				@ SizeOfHeapReserve
-		.long	0				@ SizeOfHeapCommit
-		.long	0				@ LoaderFlags
-		.long	(section_table - .) / 8		@ NumberOfRvaAndSizes
-
-		.quad	0				@ ExportTable
-		.quad	0				@ ImportTable
-		.quad	0				@ ResourceTable
-		.quad	0				@ ExceptionTable
-		.quad	0				@ CertificationTable
-		.quad	0				@ BaseRelocationTable
-
-section_table:
-		.ascii	".text\0\0\0"
-		.long	__pecoff_code_size		@ VirtualSize
-		.long	__efi_start			@ VirtualAddress
-		.long	__pecoff_code_size		@ SizeOfRawData
-		.long	__efi_start			@ PointerToRawData
-		.long	0				@ PointerToRelocations
-		.long	0				@ PointerToLineNumbers
-		.short	0				@ NumberOfRelocations
-		.short	0				@ NumberOfLineNumbers
-		.long	IMAGE_SCN_CNT_CODE | \
-			IMAGE_SCN_MEM_READ | \
-			IMAGE_SCN_MEM_EXECUTE		@ Characteristics
-
-		.ascii	".data\0\0\0"
-		.long	__pecoff_data_size		@ VirtualSize
-		.long	__pecoff_data_start - start	@ VirtualAddress
-		.long	__pecoff_data_rawsize		@ SizeOfRawData
-		.long	__pecoff_data_start - start	@ PointerToRawData
-		.long	0				@ PointerToRelocations
-		.long	0				@ PointerToLineNumbers
-		.short	0				@ NumberOfRelocations
-		.short	0				@ NumberOfLineNumbers
-		.long	IMAGE_SCN_CNT_INITIALIZED_DATA | \
-			IMAGE_SCN_MEM_READ | \
-			IMAGE_SCN_MEM_WRITE		@ Characteristics
-
-		.set	section_count, (. - section_table) / 40
-
-		.align	12
-__efi_start:
-#endif
-		.endm
diff --git a/arch/arm/boot/compressed/head-sa1100.S b/arch/arm/boot/compressed/head-sa1100.S
deleted file mode 100644
index 95abdd850fe35bcb2257b4449e0112cd9b478aa5..0000000000000000000000000000000000000000
--- a/arch/arm/boot/compressed/head-sa1100.S
+++ /dev/null
@@ -1,49 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* 
- * linux/arch/arm/boot/compressed/head-sa1100.S
- * 
- * Copyright (C) 1999 Nicolas Pitre <nico@fluxnic.net>
- * 
- * SA1100 specific tweaks.  This is merged into head.S by the linker.
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/mach-types.h>
-
-		.section        ".start", "ax"
-		.arch	armv4
-
-__SA1100_start:
-
-		@ Preserve r8/r7 i.e. kernel entry values
-#ifdef CONFIG_SA1100_COLLIE
-		mov	r7, #MACH_TYPE_COLLIE
-#endif
-#ifdef CONFIG_SA1100_SIMPAD
-		@ UNTIL we've something like an open bootldr
-		mov	r7, #MACH_TYPE_SIMPAD	@should be 87
-#endif
-		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
-		ands	r0, r0, #0x0d
-		beq	99f
-
-		@ Data cache might be active.
-		@ Be sure to flush kernel binary out of the cache,
-		@ whatever state it is, before it is turned off.
-		@ This is done by fetching through currently executed
-		@ memory to be sure we hit the same cache.
-		bic	r2, pc, #0x1f
-		add	r3, r2, #0x4000		@ 16 kb is quite enough...
-1:		ldr	r0, [r2], #32
-		teq	r2, r3
-		bne	1b
-		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
-		mcr	p15, 0, r0, c7, c7, 0	@ flush I & D caches
-
-		@ disabling MMU and caches
-		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
-		bic	r0, r0, #0x0d		@ clear WB, DC, MMU
-		bic	r0, r0, #0x1000		@ clear Icache
-		mcr	p15, 0, r0, c1, c0, 0
-99:
diff --git a/arch/arm/boot/compressed/head-sharpsl.S b/arch/arm/boot/compressed/head-sharpsl.S
deleted file mode 100644
index 992e784500fa6ea4e8ef44843d8a410ba2376c3e..0000000000000000000000000000000000000000
--- a/arch/arm/boot/compressed/head-sharpsl.S
+++ /dev/null
@@ -1,151 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/arch/arm/boot/compressed/head-sharpsl.S
- *
- * Copyright (C) 2004-2005 Richard Purdie <rpurdie@rpsys.net>
- *
- * Sharp's bootloader doesn't pass any kind of machine ID
- * so we have to figure out the machine for ourselves...
- *
- * Support for Poodle, Corgi (SL-C700), Shepherd (SL-C750)
- * Husky (SL-C760), Tosa (SL-C6000), Spitz (SL-C3000),
- * Akita (SL-C1000) and Borzoi (SL-C3100).
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/mach-types.h>
-
-#ifndef CONFIG_PXA_SHARPSL
-#error What am I doing here...
-#endif
-
-		.section        ".start", "ax"
-
-__SharpSL_start:
-
-/* Check for TC6393 - if found we have a Tosa */
-	ldr	r7, .TOSAID
-	mov	r1, #0x10000000		@ Base address of TC6393 chip
-	mov 	r6, #0x03
-	ldrh	r3, [r1, #8]		@ Load TC6393XB Revison: This is 0x0003
-	cmp	r6, r3
-	beq	.SHARPEND		@ Success -> tosa
-
-/* Check for pxa270 - if found, branch */
-	mrc p15, 0, r4, c0, c0		@ Get Processor ID
-	and	r4, r4, #0xffffff00
-	ldr	r3, .PXA270ID
-	cmp	r4, r3
-	beq	.PXA270
-
-/* Check for w100 - if not found we have a Poodle */
-	ldr	r1, .W100ADDR		@ Base address of w100 chip + regs offset
-
-	mov r6, #0x31			@ Load Magic Init value
-	str	r6, [r1, #0x280]	@ to SCRATCH_UMSK
-	mov r5, #0x3000
-.W100LOOP:
-	subs r5, r5, #1
-	bne .W100LOOP
-	mov r6, #0x30			@ Load 2nd Magic Init value
-	str	r6, [r1, #0x280]	@ to SCRATCH_UMSK
-
-	ldr	r6, [r1, #0]		@ Load Chip ID
-	ldr	r3, .W100ID
-	ldr	r7, .POODLEID
-	cmp	r6, r3
-	bne	.SHARPEND			@ We have no w100 - Poodle
-
-/* Check for pxa250 - if found we have a Corgi */
-	ldr	r7, .CORGIID
-	ldr	r3, .PXA255ID
-	cmp	r4, r3
-	blo	.SHARPEND			@ We have a PXA250 - Corgi
-
-/* Check for 64MiB flash - if found we have a Shepherd */
-	bl	get_flash_ids
-	ldr	r7, .SHEPHERDID
-	cmp	r3, #0x76			@ 64MiB flash
-	beq	.SHARPEND			@ We have Shepherd
-
-/* Must be a Husky */
-	ldr	r7, .HUSKYID		@ Must be Husky
-	b .SHARPEND
-
-.PXA270:
-/* Check for 16MiB flash - if found we have Spitz */
-	bl	get_flash_ids
-	ldr	r7, .SPITZID
-	cmp	r3, #0x73			@ 16MiB flash
-	beq	.SHARPEND			@ We have Spitz
-
-/* Check for a second SCOOP chip - if found we have Borzoi */
-	ldr	r1, .SCOOP2ADDR
-	ldr	r7, .BORZOIID
-	mov 	r6, #0x0140
-	strh	r6, [r1]
-	ldrh	r6, [r1]
-	cmp	r6, #0x0140
-	beq	.SHARPEND			@ We have Borzoi
-
-/* Must be Akita */
-	ldr	r7, .AKITAID
-	b	.SHARPEND			@ We have Borzoi
-
-.PXA255ID:
-	.word	0x69052d00		@ PXA255 Processor ID
-.PXA270ID:
-	.word	0x69054100		@ PXA270 Processor ID
-.W100ID:
-	.word	0x57411002		@ w100 Chip ID
-.W100ADDR:
-	.word 	0x08010000		@ w100 Chip ID Reg Address
-.SCOOP2ADDR:
-	.word	0x08800040
-.POODLEID:
-	.word	MACH_TYPE_POODLE
-.CORGIID:
-	.word	MACH_TYPE_CORGI
-.SHEPHERDID:
-	.word	MACH_TYPE_SHEPHERD
-.HUSKYID:
-	.word	MACH_TYPE_HUSKY
-.TOSAID:
-	.word	MACH_TYPE_TOSA
-.SPITZID:
-	.word	MACH_TYPE_SPITZ
-.AKITAID:
-	.word	MACH_TYPE_AKITA
-.BORZOIID:
-	.word	MACH_TYPE_BORZOI
-
-/*
- * Return: r2 - NAND Manufacturer ID
- *         r3 - NAND Chip ID
- * Corrupts: r1
- */
-get_flash_ids:
-	mov	r1, #0x0c000000		@ Base address of NAND chip
-	ldrb	r3, [r1, #24]		@ Load FLASHCTL
-	bic	r3, r3, #0x11		@ SET NCE
-	orr	r3, r3, #0x0a		@ SET CLR + FLWP
-	strb	r3, [r1, #24]		@ Save to FLASHCTL
-	mov 	r2, #0x90		@ Command "readid"
-	strb	r2, [r1, #20]		@ Save to FLASHIO
-	bic	r3, r3, #2		@ CLR CLE
-	orr	r3, r3, #4		@ SET ALE
-	strb	r3, [r1, #24]		@ Save to FLASHCTL
-	mov	r2, #0			@ Address 0x00
-	strb	r2, [r1, #20]		@ Save to FLASHIO
-	bic	r3, r3, #4		@ CLR ALE
-	strb	r3, [r1, #24]		@ Save to FLASHCTL
-.fids1:
-	ldrb	r3, [r1, #24]		@ Load FLASHCTL
-	tst	r3, #32			@ Is chip ready?
-	beq	.fids1
-	ldrb	r2, [r1, #20]		@ NAND Manufacturer ID
-	ldrb	r3, [r1, #20]		@ NAND Chip ID
-	mov	pc, lr
-
-.SHARPEND:
diff --git a/arch/arm/boot/compressed/head-xscale.S b/arch/arm/boot/compressed/head-xscale.S
deleted file mode 100644
index 20fa44d59f82db667c6572b78905afd3296536ce..0000000000000000000000000000000000000000
--- a/arch/arm/boot/compressed/head-xscale.S
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/arch/arm/boot/compressed/head-xscale.S
- *
- * XScale specific tweaks.  This is merged into head.S by the linker.
- *
- */
-
-#include <linux/linkage.h>
-
-		.section        ".start", "ax"
-
-__XScale_start:
-
-		@ Preserve r8/r7 i.e. kernel entry values
-
-		@ Data cache might be active.
-		@ Be sure to flush kernel binary out of the cache,
-		@ whatever state it is, before it is turned off.
-		@ This is done by fetching through currently executed
-		@ memory to be sure we hit the same cache.
-		bic	r2, pc, #0x1f
-		add	r3, r2, #0x10000	@ 64 kb is quite enough...
-1:		ldr	r0, [r2], #32
-		teq	r2, r3
-		bne	1b
-		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
-		mcr	p15, 0, r0, c7, c7, 0	@ flush I & D caches
-
-		@ disabling MMU and caches
-		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
-		bic	r0, r0, #0x05		@ clear DC, MMU
-		bic	r0, r0, #0x1000		@ clear Icache
-		mcr	p15, 0, r0, c1, c0, 0
-
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
deleted file mode 100644
index cbe126297f5499507f9b51910d2ab4e5cc7a528c..0000000000000000000000000000000000000000
--- a/arch/arm/boot/compressed/head.S
+++ /dev/null
@@ -1,1490 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/boot/compressed/head.S
- *
- *  Copyright (C) 1996-2002 Russell King
- *  Copyright (C) 2004 Hyok S. Choi (MPU support)
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/v7m.h>
-
-#include "efi-header.S"
-
- AR_CLASS(	.arch	armv7-a	)
- M_CLASS(	.arch	armv7-m	)
-
-/*
- * Debugging stuff
- *
- * Note that these macros must not contain any code which is not
- * 100% relocatable.  Any attempt to do so will result in a crash.
- * Please select one of the following when turning on debugging.
- */
-#ifdef DEBUG
-
-#if defined(CONFIG_DEBUG_ICEDCC)
-
-#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
-		.macro	loadsp, rb, tmp1, tmp2
-		.endm
-		.macro	writeb, ch, rb
-		mcr	p14, 0, \ch, c0, c5, 0
-		.endm
-#elif defined(CONFIG_CPU_XSCALE)
-		.macro	loadsp, rb, tmp1, tmp2
-		.endm
-		.macro	writeb, ch, rb
-		mcr	p14, 0, \ch, c8, c0, 0
-		.endm
-#else
-		.macro	loadsp, rb, tmp1, tmp2
-		.endm
-		.macro	writeb, ch, rb
-		mcr	p14, 0, \ch, c1, c0, 0
-		.endm
-#endif
-
-#else
-
-#include CONFIG_DEBUG_LL_INCLUDE
-
-		.macro	writeb,	ch, rb
-		senduart \ch, \rb
-		.endm
-
-#if defined(CONFIG_ARCH_SA1100)
-		.macro	loadsp, rb, tmp1, tmp2
-		mov	\rb, #0x80000000	@ physical base address
-#ifdef CONFIG_DEBUG_LL_SER3
-		add	\rb, \rb, #0x00050000	@ Ser3
-#else
-		add	\rb, \rb, #0x00010000	@ Ser1
-#endif
-		.endm
-#else
-		.macro	loadsp,	rb, tmp1, tmp2
-		addruart \rb, \tmp1, \tmp2
-		.endm
-#endif
-#endif
-#endif
-
-		.macro	kputc,val
-		mov	r0, \val
-		bl	putc
-		.endm
-
-		.macro	kphex,val,len
-		mov	r0, \val
-		mov	r1, #\len
-		bl	phex
-		.endm
-
-		.macro	debug_reloc_start
-#ifdef DEBUG
-		kputc	#'\n'
-		kphex	r6, 8		/* processor id */
-		kputc	#':'
-		kphex	r7, 8		/* architecture id */
-#ifdef CONFIG_CPU_CP15
-		kputc	#':'
-		mrc	p15, 0, r0, c1, c0
-		kphex	r0, 8		/* control reg */
-#endif
-		kputc	#'\n'
-		kphex	r5, 8		/* decompressed kernel start */
-		kputc	#'-'
-		kphex	r9, 8		/* decompressed kernel end  */
-		kputc	#'>'
-		kphex	r4, 8		/* kernel execution address */
-		kputc	#'\n'
-#endif
-		.endm
-
-		.macro	debug_reloc_end
-#ifdef DEBUG
-		kphex	r5, 8		/* end of kernel */
-		kputc	#'\n'
-		mov	r0, r4
-		bl	memdump		/* dump 256 bytes at start of kernel */
-#endif
-		.endm
-
-		/*
-		 * Debug kernel copy by printing the memory addresses involved
-		 */
-		.macro dbgkc, begin, end, cbegin, cend
-#ifdef DEBUG
-		kputc   #'\n'
-		kputc   #'C'
-		kputc   #':'
-		kputc   #'0'
-		kputc   #'x'
-		kphex   \begin, 8	/* Start of compressed kernel */
-		kputc	#'-'
-		kputc	#'0'
-		kputc	#'x'
-		kphex	\end, 8		/* End of compressed kernel */
-		kputc	#'-'
-		kputc	#'>'
-		kputc   #'0'
-		kputc   #'x'
-		kphex   \cbegin, 8	/* Start of kernel copy */
-		kputc	#'-'
-		kputc	#'0'
-		kputc	#'x'
-		kphex	\cend, 8	/* End of kernel copy */
-		kputc	#'\n'
-		kputc	#'\r'
-#endif
-		.endm
-
-		.section ".start", #alloc, #execinstr
-/*
- * sort out different calling conventions
- */
-		.align
-		/*
-		 * Always enter in ARM state for CPUs that support the ARM ISA.
-		 * As of today (2014) that's exactly the members of the A and R
-		 * classes.
-		 */
- AR_CLASS(	.arm	)
-start:
-		.type	start,#function
-		/*
-		 * These 7 nops along with the 1 nop immediately below for
-		 * !THUMB2 form 8 nops that make the compressed kernel bootable
-		 * on legacy ARM systems that were assuming the kernel in a.out
-		 * binary format. The boot loaders on these systems would
-		 * jump 32 bytes into the image to skip the a.out header.
-		 * with these 8 nops filling exactly 32 bytes, things still
-		 * work as expected on these legacy systems. Thumb2 mode keeps
-		 * 7 of the nops as it turns out that some boot loaders
-		 * were patching the initial instructions of the kernel, i.e
-		 * had started to exploit this "patch area".
-		 */
-		.rept	7
-		__nop
-		.endr
-#ifndef CONFIG_THUMB2_KERNEL
-		__nop
-#else
- AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
-  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
-		.thumb
-#endif
-		W(b)	1f
-
-		.word	_magic_sig	@ Magic numbers to help the loader
-		.word	_magic_start	@ absolute load/run zImage address
-		.word	_magic_end	@ zImage end address
-		.word	0x04030201	@ endianness flag
-		.word	0x45454545	@ another magic number to indicate
-		.word	_magic_table	@ additional data table
-
-		__EFI_HEADER
-1:
- ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
- AR_CLASS(	mrs	r9, cpsr	)
-#ifdef CONFIG_ARM_VIRT_EXT
-		bl	__hyp_stub_install	@ get into SVC mode, reversibly
-#endif
-		mov	r7, r1			@ save architecture ID
-		mov	r8, r2			@ save atags pointer
-
-#ifndef CONFIG_CPU_V7M
-		/*
-		 * Booting from Angel - need to enter SVC mode and disable
-		 * FIQs/IRQs (numeric definitions from angel arm.h source).
-		 * We only do this if we were in user mode on entry.
-		 */
-		mrs	r2, cpsr		@ get current mode
-		tst	r2, #3			@ not user?
-		bne	not_angel
-		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
- ARM(		swi	0x123456	)	@ angel_SWI_ARM
- THUMB(		svc	0xab		)	@ angel_SWI_THUMB
-not_angel:
-		safe_svcmode_maskall r0
-		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
-						@ SPSR
-#endif
-		/*
-		 * Note that some cache flushing and other stuff may
-		 * be needed here - is there an Angel SWI call for this?
-		 */
-
-		/*
-		 * some architecture specific code can be inserted
-		 * by the linker here, but it should preserve r7, r8, and r9.
-		 */
-
-		.text
-
-#ifdef CONFIG_AUTO_ZRELADDR
-		/*
-		 * Find the start of physical memory.  As we are executing
-		 * without the MMU on, we are in the physical address space.
-		 * We just need to get rid of any offset by aligning the
-		 * address.
-		 *
-		 * This alignment is a balance between the requirements of
-		 * different platforms - we have chosen 128MB to allow
-		 * platforms which align the start of their physical memory
-		 * to 128MB to use this feature, while allowing the zImage
-		 * to be placed within the first 128MB of memory on other
-		 * platforms.  Increasing the alignment means we place
-		 * stricter alignment requirements on the start of physical
-		 * memory, but relaxing it means that we break people who
-		 * are already placing their zImage in (eg) the top 64MB
-		 * of this range.
-		 */
-		mov	r4, pc
-		and	r4, r4, #0xf8000000
-		/* Determine final kernel image address. */
-		add	r4, r4, #TEXT_OFFSET
-#else
-		ldr	r4, =zreladdr
-#endif
-
-		/*
-		 * Set up a page table only if it won't overwrite ourself.
-		 * That means r4 < pc || r4 - 16k page directory > &_end.
-		 * Given that r4 > &_end is most unfrequent, we add a rough
-		 * additional 1MB of room for a possible appended DTB.
-		 */
-		mov	r0, pc
-		cmp	r0, r4
-		ldrcc	r0, LC0+32
-		addcc	r0, r0, pc
-		cmpcc	r4, r0
-		orrcc	r4, r4, #1		@ remember we skipped cache_on
-		blcs	cache_on
-
-restart:	adr	r0, LC0
-		ldmia	r0, {r1, r2, r3, r6, r10, r11, r12}
-		ldr	sp, [r0, #28]
-
-		/*
-		 * We might be running at a different address.  We need
-		 * to fix up various pointers.
-		 */
-		sub	r0, r0, r1		@ calculate the delta offset
-		add	r6, r6, r0		@ _edata
-		add	r10, r10, r0		@ inflated kernel size location
-
-		/*
-		 * The kernel build system appends the size of the
-		 * decompressed kernel at the end of the compressed data
-		 * in little-endian form.
-		 */
-		ldrb	r9, [r10, #0]
-		ldrb	lr, [r10, #1]
-		orr	r9, r9, lr, lsl #8
-		ldrb	lr, [r10, #2]
-		ldrb	r10, [r10, #3]
-		orr	r9, r9, lr, lsl #16
-		orr	r9, r9, r10, lsl #24
-
-#ifndef CONFIG_ZBOOT_ROM
-		/* malloc space is above the relocated stack (64k max) */
-		add	sp, sp, r0
-		add	r10, sp, #0x10000
-#else
-		/*
-		 * With ZBOOT_ROM the bss/stack is non relocatable,
-		 * but someone could still run this code from RAM,
-		 * in which case our reference is _edata.
-		 */
-		mov	r10, r6
-#endif
-
-		mov	r5, #0			@ init dtb size to 0
-#ifdef CONFIG_ARM_APPENDED_DTB
-/*
- *   r0  = delta
- *   r2  = BSS start
- *   r3  = BSS end
- *   r4  = final kernel address (possibly with LSB set)
- *   r5  = appended dtb size (still unknown)
- *   r6  = _edata
- *   r7  = architecture ID
- *   r8  = atags/device tree pointer
- *   r9  = size of decompressed image
- *   r10 = end of this image, including  bss/stack/malloc space if non XIP
- *   r11 = GOT start
- *   r12 = GOT end
- *   sp  = stack pointer
- *
- * if there are device trees (dtb) appended to zImage, advance r10 so that the
- * dtb data will get relocated along with the kernel if necessary.
- */
-
-		ldr	lr, [r6, #0]
-#ifndef __ARMEB__
-		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
-#else
-		ldr	r1, =0xd00dfeed
-#endif
-		cmp	lr, r1
-		bne	dtb_check_done		@ not found
-
-#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
-		/*
-		 * OK... Let's do some funky business here.
-		 * If we do have a DTB appended to zImage, and we do have
-		 * an ATAG list around, we want the later to be translated
-		 * and folded into the former here. No GOT fixup has occurred
-		 * yet, but none of the code we're about to call uses any
-		 * global variable.
-		*/
-
-		/* Get the initial DTB size */
-		ldr	r5, [r6, #4]
-#ifndef __ARMEB__
-		/* convert to little endian */
-		eor	r1, r5, r5, ror #16
-		bic	r1, r1, #0x00ff0000
-		mov	r5, r5, ror #8
-		eor	r5, r5, r1, lsr #8
-#endif
-		/* 50% DTB growth should be good enough */
-		add	r5, r5, r5, lsr #1
-		/* preserve 64-bit alignment */
-		add	r5, r5, #7
-		bic	r5, r5, #7
-		/* clamp to 32KB min and 1MB max */
-		cmp	r5, #(1 << 15)
-		movlo	r5, #(1 << 15)
-		cmp	r5, #(1 << 20)
-		movhi	r5, #(1 << 20)
-		/* temporarily relocate the stack past the DTB work space */
-		add	sp, sp, r5
-
-		stmfd	sp!, {r0-r3, ip, lr}
-		mov	r0, r8
-		mov	r1, r6
-		mov	r2, r5
-		bl	atags_to_fdt
-
-		/*
-		 * If returned value is 1, there is no ATAG at the location
-		 * pointed by r8.  Try the typical 0x100 offset from start
-		 * of RAM and hope for the best.
-		 */
-		cmp	r0, #1
-		sub	r0, r4, #TEXT_OFFSET
-		bic	r0, r0, #1
-		add	r0, r0, #0x100
-		mov	r1, r6
-		mov	r2, r5
-		bleq	atags_to_fdt
-
-		ldmfd	sp!, {r0-r3, ip, lr}
-		sub	sp, sp, r5
-#endif
-
-		mov	r8, r6			@ use the appended device tree
-
-		/*
-		 * Make sure that the DTB doesn't end up in the final
-		 * kernel's .bss area. To do so, we adjust the decompressed
-		 * kernel size to compensate if that .bss size is larger
-		 * than the relocated code.
-		 */
-		ldr	r5, =_kernel_bss_size
-		adr	r1, wont_overwrite
-		sub	r1, r6, r1
-		subs	r1, r5, r1
-		addhi	r9, r9, r1
-
-		/* Get the current DTB size */
-		ldr	r5, [r6, #4]
-#ifndef __ARMEB__
-		/* convert r5 (dtb size) to little endian */
-		eor	r1, r5, r5, ror #16
-		bic	r1, r1, #0x00ff0000
-		mov	r5, r5, ror #8
-		eor	r5, r5, r1, lsr #8
-#endif
-
-		/* preserve 64-bit alignment */
-		add	r5, r5, #7
-		bic	r5, r5, #7
-
-		/* relocate some pointers past the appended dtb */
-		add	r6, r6, r5
-		add	r10, r10, r5
-		add	sp, sp, r5
-dtb_check_done:
-#endif
-
-/*
- * Check to see if we will overwrite ourselves.
- *   r4  = final kernel address (possibly with LSB set)
- *   r9  = size of decompressed image
- *   r10 = end of this image, including  bss/stack/malloc space if non XIP
- * We basically want:
- *   r4 - 16k page directory >= r10 -> OK
- *   r4 + image length <= address of wont_overwrite -> OK
- * Note: the possible LSB in r4 is harmless here.
- */
-		add	r10, r10, #16384
-		cmp	r4, r10
-		bhs	wont_overwrite
-		add	r10, r4, r9
-		adr	r9, wont_overwrite
-		cmp	r10, r9
-		bls	wont_overwrite
-
-/*
- * Relocate ourselves past the end of the decompressed kernel.
- *   r6  = _edata
- *   r10 = end of the decompressed kernel
- * Because we always copy ahead, we need to do it from the end and go
- * backward in case the source and destination overlap.
- */
-		/*
-		 * Bump to the next 256-byte boundary with the size of
-		 * the relocation code added. This avoids overwriting
-		 * ourself when the offset is small.
-		 */
-		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
-		bic	r10, r10, #255
-
-		/* Get start of code we want to copy and align it down. */
-		adr	r5, restart
-		bic	r5, r5, #31
-
-/* Relocate the hyp vector base if necessary */
-#ifdef CONFIG_ARM_VIRT_EXT
-		mrs	r0, spsr
-		and	r0, r0, #MODE_MASK
-		cmp	r0, #HYP_MODE
-		bne	1f
-
-		/*
-		 * Compute the address of the hyp vectors after relocation.
-		 * This requires some arithmetic since we cannot directly
-		 * reference __hyp_stub_vectors in a PC-relative way.
-		 * Call __hyp_set_vectors with the new address so that we
-		 * can HVC again after the copy.
-		 */
-0:		adr	r0, 0b
-		movw	r1, #:lower16:__hyp_stub_vectors - 0b
-		movt	r1, #:upper16:__hyp_stub_vectors - 0b
-		add	r0, r0, r1
-		sub	r0, r0, r5
-		add	r0, r0, r10
-		bl	__hyp_set_vectors
-1:
-#endif
-
-		sub	r9, r6, r5		@ size to copy
-		add	r9, r9, #31		@ rounded up to a multiple
-		bic	r9, r9, #31		@ ... of 32 bytes
-		add	r6, r9, r5
-		add	r9, r9, r10
-
-#ifdef DEBUG
-		sub     r10, r6, r5
-		sub     r10, r9, r10
-		/*
-		 * We are about to copy the kernel to a new memory area.
-		 * The boundaries of the new memory area can be found in
-		 * r10 and r9, whilst r5 and r6 contain the boundaries
-		 * of the memory we are going to copy.
-		 * Calling dbgkc will help with the printing of this
-		 * information.
-		 */
-		dbgkc	r5, r6, r10, r9
-#endif
-
-1:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
-		cmp	r6, r5
-		stmdb	r9!, {r0 - r3, r10 - r12, lr}
-		bhi	1b
-
-		/* Preserve offset to relocated code. */
-		sub	r6, r9, r6
-
-#ifndef CONFIG_ZBOOT_ROM
-		/* cache_clean_flush may use the stack, so relocate it */
-		add	sp, sp, r6
-#endif
-
-		bl	cache_clean_flush
-
-		badr	r0, restart
-		add	r0, r0, r6
-		mov	pc, r0
-
-wont_overwrite:
-/*
- * If delta is zero, we are running at the address we were linked at.
- *   r0  = delta
- *   r2  = BSS start
- *   r3  = BSS end
- *   r4  = kernel execution address (possibly with LSB set)
- *   r5  = appended dtb size (0 if not present)
- *   r7  = architecture ID
- *   r8  = atags pointer
- *   r11 = GOT start
- *   r12 = GOT end
- *   sp  = stack pointer
- */
-		orrs	r1, r0, r5
-		beq	not_relocated
-
-		add	r11, r11, r0
-		add	r12, r12, r0
-
-#ifndef CONFIG_ZBOOT_ROM
-		/*
-		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
-		 * we need to fix up pointers into the BSS region.
-		 * Note that the stack pointer has already been fixed up.
-		 */
-		add	r2, r2, r0
-		add	r3, r3, r0
-
-		/*
-		 * Relocate all entries in the GOT table.
-		 * Bump bss entries to _edata + dtb size
-		 */
-1:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
-		add	r1, r1, r0		@ This fixes up C references
-		cmp	r1, r2			@ if entry >= bss_start &&
-		cmphs	r3, r1			@       bss_end > entry
-		addhi	r1, r1, r5		@    entry += dtb size
-		str	r1, [r11], #4		@ next entry
-		cmp	r11, r12
-		blo	1b
-
-		/* bump our bss pointers too */
-		add	r2, r2, r5
-		add	r3, r3, r5
-
-#else
-
-		/*
-		 * Relocate entries in the GOT table.  We only relocate
-		 * the entries that are outside the (relocated) BSS region.
-		 */
-1:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
-		cmp	r1, r2			@ entry < bss_start ||
-		cmphs	r3, r1			@ _end < entry
-		addlo	r1, r1, r0		@ table.  This fixes up the
-		str	r1, [r11], #4		@ C references.
-		cmp	r11, r12
-		blo	1b
-#endif
-
-not_relocated:	mov	r0, #0
-1:		str	r0, [r2], #4		@ clear bss
-		str	r0, [r2], #4
-		str	r0, [r2], #4
-		str	r0, [r2], #4
-		cmp	r2, r3
-		blo	1b
-
-		/*
-		 * Did we skip the cache setup earlier?
-		 * That is indicated by the LSB in r4.
-		 * Do it now if so.
-		 */
-		tst	r4, #1
-		bic	r4, r4, #1
-		blne	cache_on
-
-/*
- * The C runtime environment should now be setup sufficiently.
- * Set up some pointers, and start decompressing.
- *   r4  = kernel execution address
- *   r7  = architecture ID
- *   r8  = atags pointer
- */
-		mov	r0, r4
-		mov	r1, sp			@ malloc space above stack
-		add	r2, sp, #0x10000	@ 64k max
-		mov	r3, r7
-		bl	decompress_kernel
-		bl	cache_clean_flush
-		bl	cache_off
-
-#ifdef CONFIG_ARM_VIRT_EXT
-		mrs	r0, spsr		@ Get saved CPU boot mode
-		and	r0, r0, #MODE_MASK
-		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
-		bne	__enter_kernel		@ boot kernel directly
-
-		adr	r12, .L__hyp_reentry_vectors_offset
-		ldr	r0, [r12]
-		add	r0, r0, r12
-
-		bl	__hyp_set_vectors
-		__HVC(0)			@ otherwise bounce to hyp mode
-
-		b	.			@ should never be reached
-
-		.align	2
-.L__hyp_reentry_vectors_offset:	.long	__hyp_reentry_vectors - .
-#else
-		b	__enter_kernel
-#endif
-
-		.align	2
-		.type	LC0, #object
-LC0:		.word	LC0			@ r1
-		.word	__bss_start		@ r2
-		.word	_end			@ r3
-		.word	_edata			@ r6
-		.word	input_data_end - 4	@ r10 (inflated size location)
-		.word	_got_start		@ r11
-		.word	_got_end		@ ip
-		.word	.L_user_stack_end	@ sp
-		.word	_end - restart + 16384 + 1024*1024
-		.size	LC0, . - LC0
-
-#ifdef CONFIG_ARCH_RPC
-		.globl	params
-params:		ldr	r0, =0x10000100		@ params_phys for RPC
-		mov	pc, lr
-		.ltorg
-		.align
-#endif
-
-/*
- * Turn on the cache.  We need to setup some page tables so that we
- * can have both the I and D caches on.
- *
- * We place the page tables 16k down from the kernel execution address,
- * and we hope that nothing else is using it.  If we're using it, we
- * will go pop!
- *
- * On entry,
- *  r4 = kernel execution address
- *  r7 = architecture number
- *  r8 = atags pointer
- * On exit,
- *  r0, r1, r2, r3, r9, r10, r12 corrupted
- * This routine must preserve:
- *  r4, r7, r8
- */
-		.align	5
-cache_on:	mov	r3, #8			@ cache_on function
-		b	call_cache_fn
-
-/*
- * Initialize the highest priority protection region, PR7
- * to cover all 32bit address and cacheable and bufferable.
- */
-__armv4_mpu_cache_on:
-		mov	r0, #0x3f		@ 4G, the whole
-		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
-		mcr 	p15, 0, r0, c6, c7, 1
-
-		mov	r0, #0x80		@ PR7
-		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
-		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
-		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
-
-		mov	r0, #0xc000
-		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
-		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
-
-		mov	r0, #0
-		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
-		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
-		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
-		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
-						@ ...I .... ..D. WC.M
-		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
-		orr	r0, r0, #0x1000		@ ...1 .... .... ....
-
-		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
-
-		mov	r0, #0
-		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
-		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
-		mov	pc, lr
-
-__armv3_mpu_cache_on:
-		mov	r0, #0x3f		@ 4G, the whole
-		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
-
-		mov	r0, #0x80		@ PR7
-		mcr	p15, 0, r0, c2, c0, 0	@ cache on
-		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
-
-		mov	r0, #0xc000
-		mcr	p15, 0, r0, c5, c0, 0	@ access permission
-
-		mov	r0, #0
-		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
-		/*
-		 * ?? ARMv3 MMU does not allow reading the control register,
-		 * does this really work on ARMv3 MPU?
-		 */
-		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
-						@ .... .... .... WC.M
-		orr	r0, r0, #0x000d		@ .... .... .... 11.1
-		/* ?? this overwrites the value constructed above? */
-		mov	r0, #0
-		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
-
-		/* ?? invalidate for the second time? */
-		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
-		mov	pc, lr
-
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-#define CB_BITS 0x08
-#else
-#define CB_BITS 0x0c
-#endif
-
-__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
-		bic	r3, r3, #0xff		@ Align the pointer
-		bic	r3, r3, #0x3f00
-/*
- * Initialise the page tables, turning on the cacheable and bufferable
- * bits for the RAM area only.
- */
-		mov	r0, r3
-		mov	r9, r0, lsr #18
-		mov	r9, r9, lsl #18		@ start of RAM
-		add	r10, r9, #0x10000000	@ a reasonable RAM size
-		mov	r1, #0x12		@ XN|U + section mapping
-		orr	r1, r1, #3 << 10	@ AP=11
-		add	r2, r3, #16384
-1:		cmp	r1, r9			@ if virt > start of RAM
-		cmphs	r10, r1			@   && end of RAM > virt
-		bic	r1, r1, #0x1c		@ clear XN|U + C + B
-		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
-		orrhs	r1, r1, r6		@ set RAM section settings
-		str	r1, [r0], #4		@ 1:1 mapping
-		add	r1, r1, #1048576
-		teq	r0, r2
-		bne	1b
-/*
- * If ever we are running from Flash, then we surely want the cache
- * to be enabled also for our execution instance...  We map 2MB of it
- * so there is no map overlap problem for up to 1 MB compressed kernel.
- * If the execution is in RAM then we would only be duplicating the above.
- */
-		orr	r1, r6, #0x04		@ ensure B is set for this
-		orr	r1, r1, #3 << 10
-		mov	r2, pc
-		mov	r2, r2, lsr #20
-		orr	r1, r1, r2, lsl #20
-		add	r0, r3, r2, lsl #2
-		str	r1, [r0], #4
-		add	r1, r1, #1048576
-		str	r1, [r0]
-		mov	pc, lr
-ENDPROC(__setup_mmu)
-
-@ Enable unaligned access on v6, to allow better code generation
-@ for the decompressor C code:
-__armv6_mmu_cache_on:
-		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
-		bic	r0, r0, #2		@ A (no unaligned access fault)
-		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
-		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
-		b	__armv4_mmu_cache_on
-
-__arm926ejs_mmu_cache_on:
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-		mov	r0, #4			@ put dcache in WT mode
-		mcr	p15, 7, r0, c15, c0, 0
-#endif
-
-__armv4_mmu_cache_on:
-		mov	r12, lr
-#ifdef CONFIG_MMU
-		mov	r6, #CB_BITS | 0x12	@ U
-		bl	__setup_mmu
-		mov	r0, #0
-		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
-		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
-		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
-		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
-		orr	r0, r0, #0x0030
- ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
-		bl	__common_mmu_cache_on
-		mov	r0, #0
-		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
-#endif
-		mov	pc, r12
-
-__armv7_mmu_cache_on:
-		mov	r12, lr
-#ifdef CONFIG_MMU
-		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
-		tst	r11, #0xf		@ VMSA
-		movne	r6, #CB_BITS | 0x02	@ !XN
-		blne	__setup_mmu
-		mov	r0, #0
-		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
-		tst	r11, #0xf		@ VMSA
-		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
-#endif
-		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
-		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
-		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
-		orr	r0, r0, #0x003c		@ write buffer
-		bic	r0, r0, #2		@ A (no unaligned access fault)
-		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
-						@ (needed for ARM1176)
-#ifdef CONFIG_MMU
- ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
-		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
-		orrne	r0, r0, #1		@ MMU enabled
-		movne	r1, #0xfffffffd		@ domain 0 = client
-		bic     r6, r6, #1 << 31        @ 32-bit translation system
-		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
-		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
-		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
-		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
-#endif
-		mcr	p15, 0, r0, c7, c5, 4	@ ISB
-		mcr	p15, 0, r0, c1, c0, 0	@ load control register
-		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
-		mov	r0, #0
-		mcr	p15, 0, r0, c7, c5, 4	@ ISB
-		mov	pc, r12
-
-__fa526_cache_on:
-		mov	r12, lr
-		mov	r6, #CB_BITS | 0x12	@ U
-		bl	__setup_mmu
-		mov	r0, #0
-		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
-		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
-		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
-		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
-		orr	r0, r0, #0x1000		@ I-cache enable
-		bl	__common_mmu_cache_on
-		mov	r0, #0
-		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
-		mov	pc, r12
-
-__common_mmu_cache_on:
-#ifndef CONFIG_THUMB2_KERNEL
-#ifndef DEBUG
-		orr	r0, r0, #0x000d		@ Write buffer, mmu
-#endif
-		mov	r1, #-1
-		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
-		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
-		b	1f
-		.align	5			@ cache line aligned
-1:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
-		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
-		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
-#endif
-
-#define PROC_ENTRY_SIZE (4*5)
-
-/*
- * Here follow the relocatable cache support functions for the
- * various processors.  This is a generic hook for locating an
- * entry and jumping to an instruction at the specified offset
- * from the start of the block.  Please note this is all position
- * independent code.
- *
- *  r1  = corrupted
- *  r2  = corrupted
- *  r3  = block offset
- *  r9  = corrupted
- *  r12 = corrupted
- */
-
-call_cache_fn:	adr	r12, proc_types
-#ifdef CONFIG_CPU_CP15
-		mrc	p15, 0, r9, c0, c0	@ get processor ID
-#elif defined(CONFIG_CPU_V7M)
-		/*
-		 * On v7-M the processor id is located in the V7M_SCB_CPUID
-		 * register, but as cache handling is IMPLEMENTATION DEFINED on
-		 * v7-M (if existant at all) we just return early here.
-		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
-		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
-		 * use cp15 registers that are not implemented on v7-M.
-		 */
-		bx	lr
-#else
-		ldr	r9, =CONFIG_PROCESSOR_ID
-#endif
-1:		ldr	r1, [r12, #0]		@ get value
-		ldr	r2, [r12, #4]		@ get mask
-		eor	r1, r1, r9		@ (real ^ match)
-		tst	r1, r2			@       & mask
- ARM(		addeq	pc, r12, r3		) @ call cache function
- THUMB(		addeq	r12, r3			)
- THUMB(		moveq	pc, r12			) @ call cache function
-		add	r12, r12, #PROC_ENTRY_SIZE
-		b	1b
-
-/*
- * Table for cache operations.  This is basically:
- *   - CPU ID match
- *   - CPU ID mask
- *   - 'cache on' method instruction
- *   - 'cache off' method instruction
- *   - 'cache flush' method instruction
- *
- * We match an entry using: ((real_id ^ match) & mask) == 0
- *
- * Writethrough caches generally only need 'on' and 'off'
- * methods.  Writeback caches _must_ have the flush method
- * defined.
- */
-		.align	2
-		.type	proc_types,#object
-proc_types:
-		.word	0x41000000		@ old ARM ID
-		.word	0xff00f000
-		mov	pc, lr
- THUMB(		nop				)
-		mov	pc, lr
- THUMB(		nop				)
-		mov	pc, lr
- THUMB(		nop				)
-
-		.word	0x41007000		@ ARM7/710
-		.word	0xfff8fe00
-		mov	pc, lr
- THUMB(		nop				)
-		mov	pc, lr
- THUMB(		nop				)
-		mov	pc, lr
- THUMB(		nop				)
-
-		.word	0x41807200		@ ARM720T (writethrough)
-		.word	0xffffff00
-		W(b)	__armv4_mmu_cache_on
-		W(b)	__armv4_mmu_cache_off
-		mov	pc, lr
- THUMB(		nop				)
-
-		.word	0x41007400		@ ARM74x
-		.word	0xff00ff00
-		W(b)	__armv3_mpu_cache_on
-		W(b)	__armv3_mpu_cache_off
-		W(b)	__armv3_mpu_cache_flush
-		
-		.word	0x41009400		@ ARM94x
-		.word	0xff00ff00
-		W(b)	__armv4_mpu_cache_on
-		W(b)	__armv4_mpu_cache_off
-		W(b)	__armv4_mpu_cache_flush
-
-		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
-		.word	0xff0ffff0
-		W(b)	__arm926ejs_mmu_cache_on
-		W(b)	__armv4_mmu_cache_off
-		W(b)	__armv5tej_mmu_cache_flush
-
-		.word	0x00007000		@ ARM7 IDs
-		.word	0x0000f000
-		mov	pc, lr
- THUMB(		nop				)
-		mov	pc, lr
- THUMB(		nop				)
-		mov	pc, lr
- THUMB(		nop				)
-
-		@ Everything from here on will be the new ID system.
-
-		.word	0x4401a100		@ sa110 / sa1100
-		.word	0xffffffe0
-		W(b)	__armv4_mmu_cache_on
-		W(b)	__armv4_mmu_cache_off
-		W(b)	__armv4_mmu_cache_flush
-
-		.word	0x6901b110		@ sa1110
-		.word	0xfffffff0
-		W(b)	__armv4_mmu_cache_on
-		W(b)	__armv4_mmu_cache_off
-		W(b)	__armv4_mmu_cache_flush
-
-		.word	0x56056900
-		.word	0xffffff00		@ PXA9xx
-		W(b)	__armv4_mmu_cache_on
-		W(b)	__armv4_mmu_cache_off
-		W(b)	__armv4_mmu_cache_flush
-
-		.word	0x56158000		@ PXA168
-		.word	0xfffff000
-		W(b)	__armv4_mmu_cache_on
-		W(b)	__armv4_mmu_cache_off
-		W(b)	__armv5tej_mmu_cache_flush
-
-		.word	0x56050000		@ Feroceon
-		.word	0xff0f0000
-		W(b)	__armv4_mmu_cache_on
-		W(b)	__armv4_mmu_cache_off
-		W(b)	__armv5tej_mmu_cache_flush
-
-#ifdef CONFIG_CPU_FEROCEON_OLD_ID
-		/* this conflicts with the standard ARMv5TE entry */
-		.long	0x41009260		@ Old Feroceon
-		.long	0xff00fff0
-		b	__armv4_mmu_cache_on
-		b	__armv4_mmu_cache_off
-		b	__armv5tej_mmu_cache_flush
-#endif
-
-		.word	0x66015261		@ FA526
-		.word	0xff01fff1
-		W(b)	__fa526_cache_on
-		W(b)	__armv4_mmu_cache_off
-		W(b)	__fa526_cache_flush
-
-		@ These match on the architecture ID
-
-		.word	0x00020000		@ ARMv4T
-		.word	0x000f0000
-		W(b)	__armv4_mmu_cache_on
-		W(b)	__armv4_mmu_cache_off
-		W(b)	__armv4_mmu_cache_flush
-
-		.word	0x00050000		@ ARMv5TE
-		.word	0x000f0000
-		W(b)	__armv4_mmu_cache_on
-		W(b)	__armv4_mmu_cache_off
-		W(b)	__armv4_mmu_cache_flush
-
-		.word	0x00060000		@ ARMv5TEJ
-		.word	0x000f0000
-		W(b)	__armv4_mmu_cache_on
-		W(b)	__armv4_mmu_cache_off
-		W(b)	__armv5tej_mmu_cache_flush
-
-		.word	0x0007b000		@ ARMv6
-		.word	0x000ff000
-		W(b)	__armv6_mmu_cache_on
-		W(b)	__armv4_mmu_cache_off
-		W(b)	__armv6_mmu_cache_flush
-
-		.word	0x000f0000		@ new CPU Id
-		.word	0x000f0000
-		W(b)	__armv7_mmu_cache_on
-		W(b)	__armv7_mmu_cache_off
-		W(b)	__armv7_mmu_cache_flush
-
-		.word	0			@ unrecognised type
-		.word	0
-		mov	pc, lr
- THUMB(		nop				)
-		mov	pc, lr
- THUMB(		nop				)
-		mov	pc, lr
- THUMB(		nop				)
-
-		.size	proc_types, . - proc_types
-
-		/*
-		 * If you get a "non-constant expression in ".if" statement"
-		 * error from the assembler on this line, check that you have
-		 * not accidentally written a "b" instruction where you should
-		 * have written W(b).
-		 */
-		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
-		.error "The size of one or more proc_types entries is wrong."
-		.endif
-
-/*
- * Turn off the Cache and MMU.  ARMv3 does not support
- * reading the control register, but ARMv4 does.
- *
- * On exit,
- *  r0, r1, r2, r3, r9, r12 corrupted
- * This routine must preserve:
- *  r4, r7, r8
- */
-		.align	5
-cache_off:	mov	r3, #12			@ cache_off function
-		b	call_cache_fn
-
-__armv4_mpu_cache_off:
-		mrc	p15, 0, r0, c1, c0
-		bic	r0, r0, #0x000d
-		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
-		mov	r0, #0
-		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
-		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
-		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
-		mov	pc, lr
-
-__armv3_mpu_cache_off:
-		mrc	p15, 0, r0, c1, c0
-		bic	r0, r0, #0x000d
-		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
-		mov	r0, #0
-		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
-		mov	pc, lr
-
-__armv4_mmu_cache_off:
-#ifdef CONFIG_MMU
-		mrc	p15, 0, r0, c1, c0
-		bic	r0, r0, #0x000d
-		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
-		mov	r0, #0
-		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
-		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
-#endif
-		mov	pc, lr
-
-__armv7_mmu_cache_off:
-		mrc	p15, 0, r0, c1, c0
-#ifdef CONFIG_MMU
-		bic	r0, r0, #0x0005
-#else
-		bic	r0, r0, #0x0004
-#endif
-		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
-		mov	r12, lr
-		bl	__armv7_mmu_cache_flush
-		mov	r0, #0
-#ifdef CONFIG_MMU
-		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
-#endif
-		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
-		mcr	p15, 0, r0, c7, c10, 4	@ DSB
-		mcr	p15, 0, r0, c7, c5, 4	@ ISB
-		mov	pc, r12
-
-/*
- * Clean and flush the cache to maintain consistency.
- *
- * On exit,
- *  r1, r2, r3, r9, r10, r11, r12 corrupted
- * This routine must preserve:
- *  r4, r6, r7, r8
- */
-		.align	5
-cache_clean_flush:
-		mov	r3, #16
-		b	call_cache_fn
-
-__armv4_mpu_cache_flush:
-		tst	r4, #1
-		movne	pc, lr
-		mov	r2, #1
-		mov	r3, #0
-		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
-		mov	r1, #7 << 5		@ 8 segments
-1:		orr	r3, r1, #63 << 26	@ 64 entries
-2:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
-		subs	r3, r3, #1 << 26
-		bcs	2b			@ entries 63 to 0
-		subs 	r1, r1, #1 << 5
-		bcs	1b			@ segments 7 to 0
-
-		teq	r2, #0
-		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
-		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
-		mov	pc, lr
-		
-__fa526_cache_flush:
-		tst	r4, #1
-		movne	pc, lr
-		mov	r1, #0
-		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
-		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
-		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
-		mov	pc, lr
-
-__armv6_mmu_cache_flush:
-		mov	r1, #0
-		tst	r4, #1
-		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
-		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
-		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
-		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
-		mov	pc, lr
-
-__armv7_mmu_cache_flush:
-		tst	r4, #1
-		bne	iflush
-		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
-		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
-		mov	r10, #0
-		beq	hierarchical
-		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
-		b	iflush
-hierarchical:
-		mcr	p15, 0, r10, c7, c10, 5	@ DMB
-		stmfd	sp!, {r0-r7, r9-r11}
-		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
-		ands	r3, r0, #0x7000000	@ extract loc from clidr
-		mov	r3, r3, lsr #23		@ left align loc bit field
-		beq	finished		@ if loc is 0, then no need to clean
-		mov	r10, #0			@ start clean at cache level 0
-loop1:
-		add	r2, r10, r10, lsr #1	@ work out 3x current cache level
-		mov	r1, r0, lsr r2		@ extract cache type bits from clidr
-		and	r1, r1, #7		@ mask of the bits for current cache only
-		cmp	r1, #2			@ see what cache we have at this level
-		blt	skip			@ skip if no cache, or just i-cache
-		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
-		mcr	p15, 0, r10, c7, c5, 4	@ isb to sych the new cssr&csidr
-		mrc	p15, 1, r1, c0, c0, 0	@ read the new csidr
-		and	r2, r1, #7		@ extract the length of the cache lines
-		add	r2, r2, #4		@ add 4 (line length offset)
-		ldr	r4, =0x3ff
-		ands	r4, r4, r1, lsr #3	@ find maximum number on the way size
-		clz	r5, r4			@ find bit position of way size increment
-		ldr	r7, =0x7fff
-		ands	r7, r7, r1, lsr #13	@ extract max number of the index size
-loop2:
-		mov	r9, r4			@ create working copy of max way size
-loop3:
- ARM(		orr	r11, r10, r9, lsl r5	) @ factor way and cache number into r11
- ARM(		orr	r11, r11, r7, lsl r2	) @ factor index number into r11
- THUMB(		lsl	r6, r9, r5		)
- THUMB(		orr	r11, r10, r6		) @ factor way and cache number into r11
- THUMB(		lsl	r6, r7, r2		)
- THUMB(		orr	r11, r11, r6		) @ factor index number into r11
-		mcr	p15, 0, r11, c7, c14, 2	@ clean & invalidate by set/way
-		subs	r9, r9, #1		@ decrement the way
-		bge	loop3
-		subs	r7, r7, #1		@ decrement the index
-		bge	loop2
-skip:
-		add	r10, r10, #2		@ increment cache number
-		cmp	r3, r10
-		bgt	loop1
-finished:
-		ldmfd	sp!, {r0-r7, r9-r11}
-		mov	r10, #0			@ switch back to cache level 0
-		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
-iflush:
-		mcr	p15, 0, r10, c7, c10, 4	@ DSB
-		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
-		mcr	p15, 0, r10, c7, c10, 4	@ DSB
-		mcr	p15, 0, r10, c7, c5, 4	@ ISB
-		mov	pc, lr
-
-__armv5tej_mmu_cache_flush:
-		tst	r4, #1
-		movne	pc, lr
-1:		mrc	p15, 0, r15, c7, c14, 3	@ test,clean,invalidate D cache
-		bne	1b
-		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
-		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
-		mov	pc, lr
-
-__armv4_mmu_cache_flush:
-		tst	r4, #1
-		movne	pc, lr
-		mov	r2, #64*1024		@ default: 32K dcache size (*2)
-		mov	r11, #32		@ default: 32 byte line size
-		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
-		teq	r3, r9			@ cache ID register present?
-		beq	no_cache_id
-		mov	r1, r3, lsr #18
-		and	r1, r1, #7
-		mov	r2, #1024
-		mov	r2, r2, lsl r1		@ base dcache size *2
-		tst	r3, #1 << 14		@ test M bit
-		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
-		mov	r3, r3, lsr #12
-		and	r3, r3, #3
-		mov	r11, #8
-		mov	r11, r11, lsl r3	@ cache line size in bytes
-no_cache_id:
-		mov	r1, pc
-		bic	r1, r1, #63		@ align to longest cache line
-		add	r2, r1, r2
-1:
- ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
- THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
- THUMB(		add     r1, r1, r11		)
-		teq	r1, r2
-		bne	1b
-
-		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
-		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
-		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
-		mov	pc, lr
-
-__armv3_mmu_cache_flush:
-__armv3_mpu_cache_flush:
-		tst	r4, #1
-		movne	pc, lr
-		mov	r1, #0
-		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
-		mov	pc, lr
-
-/*
- * Various debugging routines for printing hex characters and
- * memory, which again must be relocatable.
- */
-#ifdef DEBUG
-		.align	2
-		.type	phexbuf,#object
-phexbuf:	.space	12
-		.size	phexbuf, . - phexbuf
-
-@ phex corrupts {r0, r1, r2, r3}
-phex:		adr	r3, phexbuf
-		mov	r2, #0
-		strb	r2, [r3, r1]
-1:		subs	r1, r1, #1
-		movmi	r0, r3
-		bmi	puts
-		and	r2, r0, #15
-		mov	r0, r0, lsr #4
-		cmp	r2, #10
-		addge	r2, r2, #7
-		add	r2, r2, #'0'
-		strb	r2, [r3, r1]
-		b	1b
-
-@ puts corrupts {r0, r1, r2, r3}
-puts:		loadsp	r3, r2, r1
-1:		ldrb	r2, [r0], #1
-		teq	r2, #0
-		moveq	pc, lr
-2:		writeb	r2, r3
-		mov	r1, #0x00020000
-3:		subs	r1, r1, #1
-		bne	3b
-		teq	r2, #'\n'
-		moveq	r2, #'\r'
-		beq	2b
-		teq	r0, #0
-		bne	1b
-		mov	pc, lr
-@ putc corrupts {r0, r1, r2, r3}
-putc:
-		mov	r2, r0
-		loadsp	r3, r1, r0
-		mov	r0, #0
-		b	2b
-
-@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
-memdump:	mov	r12, r0
-		mov	r10, lr
-		mov	r11, #0
-2:		mov	r0, r11, lsl #2
-		add	r0, r0, r12
-		mov	r1, #8
-		bl	phex
-		mov	r0, #':'
-		bl	putc
-1:		mov	r0, #' '
-		bl	putc
-		ldr	r0, [r12, r11, lsl #2]
-		mov	r1, #8
-		bl	phex
-		and	r0, r11, #7
-		teq	r0, #3
-		moveq	r0, #' '
-		bleq	putc
-		and	r0, r11, #7
-		add	r11, r11, #1
-		teq	r0, #7
-		bne	1b
-		mov	r0, #'\n'
-		bl	putc
-		cmp	r11, #64
-		blt	2b
-		mov	pc, r10
-#endif
-
-		.ltorg
-
-#ifdef CONFIG_ARM_VIRT_EXT
-.align 5
-__hyp_reentry_vectors:
-		W(b)	.			@ reset
-		W(b)	.			@ undef
-		W(b)	.			@ svc
-		W(b)	.			@ pabort
-		W(b)	.			@ dabort
-		W(b)	__enter_kernel		@ hyp
-		W(b)	.			@ irq
-		W(b)	.			@ fiq
-#endif /* CONFIG_ARM_VIRT_EXT */
-
-__enter_kernel:
-		mov	r0, #0			@ must be 0
-		mov	r1, r7			@ restore architecture number
-		mov	r2, r8			@ restore atags pointer
- ARM(		mov	pc, r4		)	@ call kernel
- M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
- THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
-
-reloc_code_end:
-
-#ifdef CONFIG_EFI_STUB
-		.align	2
-_start:		.long	start - .
-
-ENTRY(efi_stub_entry)
-		@ allocate space on stack for passing current zImage address
-		@ and for the EFI stub to return of new entry point of
-		@ zImage, as EFI stub may copy the kernel. Pointer address
-		@ is passed in r2. r0 and r1 are passed through from the
-		@ EFI firmware to efi_entry
-		adr	ip, _start
-		ldr	r3, [ip]
-		add	r3, r3, ip
-		stmfd	sp!, {r3, lr}
-		mov	r2, sp			@ pass zImage address in r2
-		bl	efi_entry
-
-		@ Check for error return from EFI stub. r0 has FDT address
-		@ or error code.
-		cmn	r0, #1
-		beq	efi_load_fail
-
-		@ Preserve return value of efi_entry() in r4
-		mov	r4, r0
-
-		@ our cache maintenance code relies on CP15 barrier instructions
-		@ but since we arrived here with the MMU and caches configured
-		@ by UEFI, we must check that the CP15BEN bit is set in SCTLR.
-		@ Note that this bit is RAO/WI on v6 and earlier, so the ISB in
-		@ the enable path will be executed on v7+ only.
-		mrc	p15, 0, r1, c1, c0, 0	@ read SCTLR
-		tst	r1, #(1 << 5)		@ CP15BEN bit set?
-		bne	0f
-		orr	r1, r1, #(1 << 5)	@ CP15 barrier instructions
-		mcr	p15, 0, r1, c1, c0, 0	@ write SCTLR
- ARM(		.inst	0xf57ff06f		@ v7+ isb	)
- THUMB(		isb						)
-
-0:		bl	cache_clean_flush
-		bl	cache_off
-
-		@ Set parameters for booting zImage according to boot protocol
-		@ put FDT address in r2, it was returned by efi_entry()
-		@ r1 is the machine type, and r0 needs to be 0
-		mov	r0, #0
-		mov	r1, #0xFFFFFFFF
-		mov	r2, r4
-
-		@ Branch to (possibly) relocated zImage that is in [sp]
-		ldr	lr, [sp]
-		ldr	ip, =start_offset
-		add	lr, lr, ip
-		mov	pc, lr				@ no mode switch
-
-efi_load_fail:
-		@ Return EFI_LOAD_ERROR to EFI firmware on error.
-		ldr	r0, =0x80000001
-		ldmfd	sp!, {ip, pc}
-ENDPROC(efi_stub_entry)
-#endif
-
-		.align
-		.section ".stack", "aw", %nobits
-.L_user_stack:	.space	4096
-.L_user_stack_end:
diff --git a/arch/arm/boot/compressed/ll_char_wr.S b/arch/arm/boot/compressed/ll_char_wr.S
deleted file mode 100644
index 1ec8cb2898b1c368790d611371cbcf87701dc19b..0000000000000000000000000000000000000000
--- a/arch/arm/boot/compressed/ll_char_wr.S
+++ /dev/null
@@ -1,131 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/ll_char_wr.S
- *
- *  Copyright (C) 1995, 1996 Russell King.
- *
- *  Speedups & 1bpp code (C) 1996 Philip Blundell & Russell King.
- *
- *  10-04-96	RMK	Various cleanups & reduced register usage.
- *  08-04-98	RMK	Shifts re-ordered
- */
-
-@ Regs: [] = corruptible
-@       {} = used
-@       () = do not use
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-	.text
-
-LC0:	.word	LC0
-	.word	bytes_per_char_h
-	.word	video_size_row
-	.word	acorndata_8x8
-	.word	con_charconvtable
-
-/*
- * r0 = ptr
- * r1 = char
- * r2 = white
- */
-ENTRY(ll_write_char)
-	stmfd	sp!, {r4 - r7, lr}
-@
-@ Smashable regs: {r0 - r3}, [r4 - r7], (r8 - fp), [ip], (sp), [lr], (pc)
-@
-	/*
-	 * calculate offset into character table
-	 */
-	mov	r1, r1, lsl #3
-	/*
-	 * calculate offset required for each row.
-	 */
-	adr	ip, LC0
-	ldmia	ip, {r3, r4, r5, r6, lr}
-	sub	ip, ip, r3
-	add	r6, r6, ip
-	add	lr, lr, ip
-	ldr	r4, [r4, ip]
-	ldr	r5, [r5, ip]
-	/*
-	 * Go to resolution-dependent routine...
-	 */
-	cmp	r4, #4
-	blt	Lrow1bpp
-	add	r0, r0, r5, lsl #3		@ Move to bottom of character
-	orr	r1, r1, #7
-	ldrb	r7, [r6, r1]
-	teq	r4, #8
-	beq	Lrow8bpplp
-@
-@ Smashable regs: {r0 - r3}, [r4], {r5 - r7}, (r8 - fp), [ip], (sp), {lr}, (pc)
-@
-Lrow4bpplp:
-	ldr	r7, [lr, r7, lsl #2]
-	mul	r7, r2, r7
-	sub	r1, r1, #1			@ avoid using r7 directly after
-	str	r7, [r0, -r5]!
-	ldrb	r7, [r6, r1]
-	ldr	r7, [lr, r7, lsl #2]
-	mul	r7, r2, r7
-	tst	r1, #7				@ avoid using r7 directly after
-	str	r7, [r0, -r5]!
-	subne	r1, r1, #1
-	ldrbne	r7, [r6, r1]
-	bne	Lrow4bpplp
-	ldmfd	sp!, {r4 - r7, pc}
-
-@
-@ Smashable regs: {r0 - r3}, [r4], {r5 - r7}, (r8 - fp), [ip], (sp), {lr}, (pc)
-@
-Lrow8bpplp:
-	mov	ip, r7, lsr #4
-	ldr	ip, [lr, ip, lsl #2]
-	mul	r4, r2, ip
-	and	ip, r7, #15			@ avoid r4
-	ldr	ip, [lr, ip, lsl #2]		@ avoid r4
-	mul	ip, r2, ip			@ avoid r4
-	sub	r1, r1, #1			@ avoid ip
-	sub	r0, r0, r5			@ avoid ip
-	stmia	r0, {r4, ip}
-	ldrb	r7, [r6, r1]
-	mov	ip, r7, lsr #4
-	ldr	ip, [lr, ip, lsl #2]
-	mul	r4, r2, ip
-	and	ip, r7, #15			@ avoid r4
-	ldr	ip, [lr, ip, lsl #2]		@ avoid r4
-	mul	ip, r2, ip			@ avoid r4
-	tst	r1, #7				@ avoid ip
-	sub	r0, r0, r5			@ avoid ip
-	stmia	r0, {r4, ip}
-	subne	r1, r1, #1
-	ldrbne	r7, [r6, r1]
-	bne	Lrow8bpplp
-	ldmfd	sp!, {r4 - r7, pc}
-
-@
-@ Smashable regs: {r0 - r3}, [r4], {r5, r6}, [r7], (r8 - fp), [ip], (sp), [lr], (pc)
-@
-Lrow1bpp:
-	add	r6, r6, r1
-	ldmia	r6, {r4, r7}
-	strb	r4, [r0], r5
-	mov	r4, r4, lsr #8
-	strb	r4, [r0], r5
-	mov	r4, r4, lsr #8
-	strb	r4, [r0], r5
-	mov	r4, r4, lsr #8
-	strb	r4, [r0], r5
-	strb	r7, [r0], r5
-	mov	r7, r7, lsr #8
-	strb	r7, [r0], r5
-	mov	r7, r7, lsr #8
-	strb	r7, [r0], r5
-	mov	r7, r7, lsr #8
-	strb	r7, [r0], r5
-	ldmfd	sp!, {r4 - r7, pc}
-
-	.bss
-ENTRY(con_charconvtable)
-	.space	1024
diff --git a/arch/arm/boot/compressed/piggy.S b/arch/arm/boot/compressed/piggy.S
deleted file mode 100644
index 0284f84dcf38049f643c144d4f86dd76df88fabe..0000000000000000000000000000000000000000
--- a/arch/arm/boot/compressed/piggy.S
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.section .piggydata,#alloc
-	.globl	input_data
-input_data:
-	.incbin	"arch/arm/boot/compressed/piggy_data"
-	.globl	input_data_end
-input_data_end:
diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S
deleted file mode 100644
index 51b078604978b9f4296dca4c58819304ef464997..0000000000000000000000000000000000000000
--- a/arch/arm/boot/compressed/vmlinux.lds.S
+++ /dev/null
@@ -1,135 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) 2000 Russell King
- */
-
-#ifdef CONFIG_CPU_ENDIAN_BE8
-#define ZIMAGE_MAGIC(x) ( (((x) >> 24) & 0x000000ff) | \
-			  (((x) >>  8) & 0x0000ff00) | \
-			  (((x) <<  8) & 0x00ff0000) | \
-			  (((x) << 24) & 0xff000000) )
-#else
-#define ZIMAGE_MAGIC(x) (x)
-#endif
-
-OUTPUT_ARCH(arm)
-ENTRY(_start)
-SECTIONS
-{
-  /DISCARD/ : {
-    *(.ARM.exidx*)
-    *(.ARM.extab*)
-    /*
-     * Discard any r/w data - this produces a link error if we have any,
-     * which is required for PIC decompression.  Local data generates
-     * GOTOFF relocations, which prevents it being relocated independently
-     * of the text/got segments.
-     */
-    *(.data)
-  }
-
-  . = TEXT_START;
-  _text = .;
-
-  .text : {
-    _start = .;
-    *(.start)
-    *(.text)
-    *(.text.*)
-    *(.fixup)
-    *(.gnu.warning)
-    *(.glue_7t)
-    *(.glue_7)
-  }
-  .table : ALIGN(4) {
-    _table_start = .;
-    LONG(ZIMAGE_MAGIC(4))
-    LONG(ZIMAGE_MAGIC(0x5a534c4b))
-    LONG(ZIMAGE_MAGIC(__piggy_size_addr - _start))
-    LONG(ZIMAGE_MAGIC(_kernel_bss_size))
-    LONG(0)
-    _table_end = .;
-  }
-  .rodata : {
-    *(.rodata)
-    *(.rodata.*)
-    *(.data.rel.ro)
-  }
-  .piggydata : {
-    *(.piggydata)
-    __piggy_size_addr = . - 4;
-  }
-
-  . = ALIGN(4);
-  _etext = .;
-
-  .got.plt		: { *(.got.plt) }
-  _got_start = .;
-  .got			: { *(.got) }
-  _got_end = .;
-
-  /* ensure the zImage file size is always a multiple of 64 bits */
-  /* (without a dummy byte, ld just ignores the empty section) */
-  .pad			: { BYTE(0); . = ALIGN(8); }
-
-#ifdef CONFIG_EFI_STUB
-  .data : ALIGN(4096) {
-    __pecoff_data_start = .;
-    /*
-     * The EFI stub always executes from RAM, and runs strictly before the
-     * decompressor, so we can make an exception for its r/w data, and keep it
-     */
-    *(.data.efistub)
-    __pecoff_data_end = .;
-
-    /*
-     * PE/COFF mandates a file size which is a multiple of 512 bytes if the
-     * section size equals or exceeds 4 KB
-     */
-    . = ALIGN(512);
-  }
-  __pecoff_data_rawsize = . - ADDR(.data);
-#endif
-
-  _edata = .;
-
-  /*
-   * The image_end section appears after any additional loadable sections
-   * that the linker may decide to insert in the binary image.  Having
-   * this symbol allows further debug in the near future.
-   */
-  .image_end (NOLOAD) : {
-    /*
-     * EFI requires that the image is aligned to 512 bytes, and appended
-     * DTB requires that we know where the end of the image is.  Ensure
-     * that both are satisfied by ensuring that there are no additional
-     * sections emitted into the decompressor image.
-     */
-    _edata_real = .;
-  }
-
-  _magic_sig = ZIMAGE_MAGIC(0x016f2818);
-  _magic_start = ZIMAGE_MAGIC(_start);
-  _magic_end = ZIMAGE_MAGIC(_edata);
-  _magic_table = ZIMAGE_MAGIC(_table_start - _start);
-
-  . = BSS_START;
-  __bss_start = .;
-  .bss			: { *(.bss) }
-  _end = .;
-
-  . = ALIGN(8);		/* the stack must be 64-bit aligned */
-  .stack		: { *(.stack) }
-
-  PROVIDE(__pecoff_data_size = ALIGN(512) - ADDR(.data));
-  PROVIDE(__pecoff_end = ALIGN(512));
-
-  .stab 0		: { *(.stab) }
-  .stabstr 0		: { *(.stabstr) }
-  .stab.excl 0		: { *(.stab.excl) }
-  .stab.exclstr 0	: { *(.stab.exclstr) }
-  .stab.index 0		: { *(.stab.index) }
-  .stab.indexstr 0	: { *(.stab.indexstr) }
-  .comment 0		: { *(.comment) }
-}
-ASSERT(_edata_real == _edata, "error: zImage file size is incorrect");
diff --git a/arch/arm/boot/deflate_xip_data.sh b/arch/arm/boot/deflate_xip_data.sh
old mode 100755
new mode 100644
diff --git a/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v1.2.dts b/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v1.2.dts
deleted file mode 120000
index c2f22fc3381107322545a350fa5b9620ba8647af..0000000000000000000000000000000000000000
--- a/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v1.2.dts
+++ /dev/null
@@ -1 +0,0 @@
-sun8i-a23-q8-tablet.dts
\ No newline at end of file
diff --git a/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v1.2.dts b/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v1.2.dts
new file mode 100644
index 0000000000000000000000000000000000000000..c2f22fc3381107322545a350fa5b9620ba8647af
--- /dev/null
+++ b/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v1.2.dts
@@ -0,0 +1 @@
+sun8i-a23-q8-tablet.dts
\ No newline at end of file
diff --git a/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts b/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts
deleted file mode 120000
index c2f22fc3381107322545a350fa5b9620ba8647af..0000000000000000000000000000000000000000
--- a/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts
+++ /dev/null
@@ -1 +0,0 @@
-sun8i-a23-q8-tablet.dts
\ No newline at end of file
diff --git a/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts b/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts
new file mode 100644
index 0000000000000000000000000000000000000000..c2f22fc3381107322545a350fa5b9620ba8647af
--- /dev/null
+++ b/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts
@@ -0,0 +1 @@
+sun8i-a23-q8-tablet.dts
\ No newline at end of file
diff --git a/arch/arm/boot/dts/sun8i-a33-et-q8-v1.6.dts b/arch/arm/boot/dts/sun8i-a33-et-q8-v1.6.dts
deleted file mode 120000
index 4519fd791a8f9077bfb769c88027b0b0df47f627..0000000000000000000000000000000000000000
--- a/arch/arm/boot/dts/sun8i-a33-et-q8-v1.6.dts
+++ /dev/null
@@ -1 +0,0 @@
-sun8i-a33-q8-tablet.dts
\ No newline at end of file
diff --git a/arch/arm/boot/dts/sun8i-a33-et-q8-v1.6.dts b/arch/arm/boot/dts/sun8i-a33-et-q8-v1.6.dts
new file mode 100644
index 0000000000000000000000000000000000000000..4519fd791a8f9077bfb769c88027b0b0df47f627
--- /dev/null
+++ b/arch/arm/boot/dts/sun8i-a33-et-q8-v1.6.dts
@@ -0,0 +1 @@
+sun8i-a33-q8-tablet.dts
\ No newline at end of file
diff --git a/arch/arm/boot/dts/sun8i-a33-ippo-q8h-v1.2.dts b/arch/arm/boot/dts/sun8i-a33-ippo-q8h-v1.2.dts
deleted file mode 120000
index 4519fd791a8f9077bfb769c88027b0b0df47f627..0000000000000000000000000000000000000000
--- a/arch/arm/boot/dts/sun8i-a33-ippo-q8h-v1.2.dts
+++ /dev/null
@@ -1 +0,0 @@
-sun8i-a33-q8-tablet.dts
\ No newline at end of file
diff --git a/arch/arm/boot/dts/sun8i-a33-ippo-q8h-v1.2.dts b/arch/arm/boot/dts/sun8i-a33-ippo-q8h-v1.2.dts
new file mode 100644
index 0000000000000000000000000000000000000000..4519fd791a8f9077bfb769c88027b0b0df47f627
--- /dev/null
+++ b/arch/arm/boot/dts/sun8i-a33-ippo-q8h-v1.2.dts
@@ -0,0 +1 @@
+sun8i-a33-q8-tablet.dts
\ No newline at end of file
diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S
deleted file mode 100644
index 291d969bc719cdd77c74c5f3e1735f7d0fe56a1d..0000000000000000000000000000000000000000
--- a/arch/arm/common/mcpm_head.S
+++ /dev/null
@@ -1,229 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/common/mcpm_head.S -- kernel entry point for multi-cluster PM
- *
- * Created by:  Nicolas Pitre, March 2012
- * Copyright:   (C) 2012-2013  Linaro Limited
- *
- * Refer to Documentation/arm/cluster-pm-race-avoidance.rst
- * for details of the synchronisation algorithms used here.
- */
-
-#include <linux/linkage.h>
-#include <asm/mcpm.h>
-#include <asm/assembler.h>
-
-#include "vlock.h"
-
-.if MCPM_SYNC_CLUSTER_CPUS
-.error "cpus must be the first member of struct mcpm_sync_struct"
-.endif
-
-	.macro	pr_dbg	string
-#if defined(CONFIG_DEBUG_LL) && defined(DEBUG)
-	b	1901f
-1902:	.asciz	"CPU"
-1903:	.asciz	" cluster"
-1904:	.asciz	": \string"
-	.align
-1901:	adr	r0, 1902b
-	bl	printascii
-	mov	r0, r9
-	bl	printhex2
-	adr	r0, 1903b
-	bl	printascii
-	mov	r0, r10
-	bl	printhex2
-	adr	r0, 1904b
-	bl	printascii
-#endif
-	.endm
-
-	.arm
-	.align
-
-ENTRY(mcpm_entry_point)
-
- ARM_BE8(setend        be)
- THUMB(	badr	r12, 1f		)
- THUMB(	bx	r12		)
- THUMB(	.thumb			)
-1:
-	mrc	p15, 0, r0, c0, c0, 5		@ MPIDR
-	ubfx	r9, r0, #0, #8			@ r9 = cpu
-	ubfx	r10, r0, #8, #8			@ r10 = cluster
-	mov	r3, #MAX_CPUS_PER_CLUSTER
-	mla	r4, r3, r10, r9			@ r4 = canonical CPU index
-	cmp	r4, #(MAX_CPUS_PER_CLUSTER * MAX_NR_CLUSTERS)
-	blo	2f
-
-	/* We didn't expect this CPU.  Try to cheaply make it quiet. */
-1:	wfi
-	wfe
-	b	1b
-
-2:	pr_dbg	"kernel mcpm_entry_point\n"
-
-	/*
-	 * MMU is off so we need to get to various variables in a
-	 * position independent way.
-	 */
-	adr	r5, 3f
-	ldmia	r5, {r0, r6, r7, r8, r11}
-	add	r0, r5, r0			@ r0 = mcpm_entry_early_pokes
-	add	r6, r5, r6			@ r6 = mcpm_entry_vectors
-	ldr	r7, [r5, r7]			@ r7 = mcpm_power_up_setup_phys
-	add	r8, r5, r8			@ r8 = mcpm_sync
-	add	r11, r5, r11			@ r11 = first_man_locks
-
-	@ Perform an early poke, if any
-	add	r0, r0, r4, lsl #3
-	ldmia	r0, {r0, r1}
-	teq	r0, #0
-	strne	r1, [r0]
-
-	mov	r0, #MCPM_SYNC_CLUSTER_SIZE
-	mla	r8, r0, r10, r8			@ r8 = sync cluster base
-
-	@ Signal that this CPU is coming UP:
-	mov	r0, #CPU_COMING_UP
-	mov	r5, #MCPM_SYNC_CPU_SIZE
-	mla	r5, r9, r5, r8			@ r5 = sync cpu address
-	strb	r0, [r5]
-
-	@ At this point, the cluster cannot unexpectedly enter the GOING_DOWN
-	@ state, because there is at least one active CPU (this CPU).
-
-	mov	r0, #VLOCK_SIZE
-	mla	r11, r0, r10, r11		@ r11 = cluster first man lock
-	mov	r0, r11
-	mov	r1, r9				@ cpu
-	bl	vlock_trylock			@ implies DMB
-
-	cmp	r0, #0				@ failed to get the lock?
-	bne	mcpm_setup_wait		@ wait for cluster setup if so
-
-	ldrb	r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER]
-	cmp	r0, #CLUSTER_UP			@ cluster already up?
-	bne	mcpm_setup			@ if not, set up the cluster
-
-	@ Otherwise, release the first man lock and skip setup:
-	mov	r0, r11
-	bl	vlock_unlock
-	b	mcpm_setup_complete
-
-mcpm_setup:
-	@ Control dependency implies strb not observable before previous ldrb.
-
-	@ Signal that the cluster is being brought up:
-	mov	r0, #INBOUND_COMING_UP
-	strb	r0, [r8, #MCPM_SYNC_CLUSTER_INBOUND]
-	dmb
-
-	@ Any CPU trying to take the cluster into CLUSTER_GOING_DOWN from this
-	@ point onwards will observe INBOUND_COMING_UP and abort.
-
-	@ Wait for any previously-pending cluster teardown operations to abort
-	@ or complete:
-mcpm_teardown_wait:
-	ldrb	r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER]
-	cmp	r0, #CLUSTER_GOING_DOWN
-	bne	first_man_setup
-	wfe
-	b	mcpm_teardown_wait
-
-first_man_setup:
-	dmb
-
-	@ If the outbound gave up before teardown started, skip cluster setup:
-
-	cmp	r0, #CLUSTER_UP
-	beq	mcpm_setup_leave
-
-	@ power_up_setup is now responsible for setting up the cluster:
-
-	cmp	r7, #0
-	mov	r0, #1		@ second (cluster) affinity level
-	blxne	r7		@ Call power_up_setup if defined
-	dmb
-
-	mov	r0, #CLUSTER_UP
-	strb	r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER]
-	dmb
-
-mcpm_setup_leave:
-	@ Leave the cluster setup critical section:
-
-	mov	r0, #INBOUND_NOT_COMING_UP
-	strb	r0, [r8, #MCPM_SYNC_CLUSTER_INBOUND]
-	dsb	st
-	sev
-
-	mov	r0, r11
-	bl	vlock_unlock	@ implies DMB
-	b	mcpm_setup_complete
-
-	@ In the contended case, non-first men wait here for cluster setup
-	@ to complete:
-mcpm_setup_wait:
-	ldrb	r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER]
-	cmp	r0, #CLUSTER_UP
-	wfene
-	bne	mcpm_setup_wait
-	dmb
-
-mcpm_setup_complete:
-	@ If a platform-specific CPU setup hook is needed, it is
-	@ called from here.
-
-	cmp	r7, #0
-	mov	r0, #0		@ first (CPU) affinity level
-	blxne	r7		@ Call power_up_setup if defined
-	dmb
-
-	@ Mark the CPU as up:
-
-	mov	r0, #CPU_UP
-	strb	r0, [r5]
-
-	@ Observability order of CPU_UP and opening of the gate does not matter.
-
-mcpm_entry_gated:
-	ldr	r5, [r6, r4, lsl #2]		@ r5 = CPU entry vector
-	cmp	r5, #0
-	wfeeq
-	beq	mcpm_entry_gated
-	dmb
-
-	pr_dbg	"released\n"
-	bx	r5
-
-	.align	2
-
-3:	.word	mcpm_entry_early_pokes - .
-	.word	mcpm_entry_vectors - 3b
-	.word	mcpm_power_up_setup_phys - 3b
-	.word	mcpm_sync - 3b
-	.word	first_man_locks - 3b
-
-ENDPROC(mcpm_entry_point)
-
-	.bss
-
-	.align	CACHE_WRITEBACK_ORDER
-	.type	first_man_locks, #object
-first_man_locks:
-	.space	VLOCK_SIZE * MAX_NR_CLUSTERS
-	.align	CACHE_WRITEBACK_ORDER
-
-	.type	mcpm_entry_vectors, #object
-ENTRY(mcpm_entry_vectors)
-	.space	4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
-
-	.type	mcpm_entry_early_pokes, #object
-ENTRY(mcpm_entry_early_pokes)
-	.space	8 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
-
-	.type	mcpm_power_up_setup_phys, #object
-ENTRY(mcpm_power_up_setup_phys)
-	.space  4		@ set by mcpm_sync_init()
diff --git a/arch/arm/common/secure_cntvoff.S b/arch/arm/common/secure_cntvoff.S
deleted file mode 100644
index 53fc7bdb6c2e191440cce900818e2a6fffb687e5..0000000000000000000000000000000000000000
--- a/arch/arm/common/secure_cntvoff.S
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2014 Renesas Electronics Corporation
- *
- * Initialization of CNTVOFF register from secure mode
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-ENTRY(secure_cntvoff_init)
-	.arch	armv7-a
-	/*
-	 * CNTVOFF has to be initialized either from non-secure Hypervisor
-	 * mode or secure Monitor mode with SCR.NS==1. If TrustZone is enabled
-	 * then it should be handled by the secure code. The CPU must implement
-	 * the virtualization extensions.
-	 */
-	cps	#MON_MODE
-	mrc	p15, 0, r1, c1, c1, 0		/* Get Secure Config */
-	orr	r0, r1, #1
-	mcr	p15, 0, r0, c1, c1, 0		/* Set Non Secure bit */
-	isb
-	mov	r0, #0
-	mcrr	p15, 4, r0, r0, c14		/* CNTVOFF = 0 */
-	isb
-	mcr	p15, 0, r1, c1, c1, 0		/* Set Secure bit */
-	isb
-	cps	#SVC_MODE
-	ret	lr
-ENDPROC(secure_cntvoff_init)
diff --git a/arch/arm/common/vlock.S b/arch/arm/common/vlock.S
deleted file mode 100644
index f1c7fd44f1b10bc7f08787765e4e57f94e0789de..0000000000000000000000000000000000000000
--- a/arch/arm/common/vlock.S
+++ /dev/null
@@ -1,99 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * vlock.S - simple voting lock implementation for ARM
- *
- * Created by:	Dave Martin, 2012-08-16
- * Copyright:	(C) 2012-2013  Linaro Limited
- *
- * This algorithm is described in more detail in
- * Documentation/arm/vlocks.rst.
- */
-
-#include <linux/linkage.h>
-#include "vlock.h"
-
-/* Select different code if voting flags  can fit in a single word. */
-#if VLOCK_VOTING_SIZE > 4
-#define FEW(x...)
-#define MANY(x...) x
-#else
-#define FEW(x...) x
-#define MANY(x...)
-#endif
-
-@ voting lock for first-man coordination
-
-.macro voting_begin rbase:req, rcpu:req, rscratch:req
-	mov	\rscratch, #1
-	strb	\rscratch, [\rbase, \rcpu]
-	dmb
-.endm
-
-.macro voting_end rbase:req, rcpu:req, rscratch:req
-	dmb
-	mov	\rscratch, #0
-	strb	\rscratch, [\rbase, \rcpu]
-	dsb	st
-	sev
-.endm
-
-/*
- * The vlock structure must reside in Strongly-Ordered or Device memory.
- * This implementation deliberately eliminates most of the barriers which
- * would be required for other memory types, and assumes that independent
- * writes to neighbouring locations within a cacheline do not interfere
- * with one another.
- */
-
-@ r0: lock structure base
-@ r1: CPU ID (0-based index within cluster)
-ENTRY(vlock_trylock)
-	add	r1, r1, #VLOCK_VOTING_OFFSET
-
-	voting_begin	r0, r1, r2
-
-	ldrb	r2, [r0, #VLOCK_OWNER_OFFSET]	@ check whether lock is held
-	cmp	r2, #VLOCK_OWNER_NONE
-	bne	trylock_fail			@ fail if so
-
-	@ Control dependency implies strb not observable before previous ldrb.
-
-	strb	r1, [r0, #VLOCK_OWNER_OFFSET]	@ submit my vote
-
-	voting_end	r0, r1, r2		@ implies DMB
-
-	@ Wait for the current round of voting to finish:
-
- MANY(	mov	r3, #VLOCK_VOTING_OFFSET			)
-0:
- MANY(	ldr	r2, [r0, r3]					)
- FEW(	ldr	r2, [r0, #VLOCK_VOTING_OFFSET]			)
-	cmp	r2, #0
-	wfene
-	bne	0b
- MANY(	add	r3, r3, #4					)
- MANY(	cmp	r3, #VLOCK_VOTING_OFFSET + VLOCK_VOTING_SIZE	)
- MANY(	bne	0b						)
-
-	@ Check who won:
-
-	dmb
-	ldrb	r2, [r0, #VLOCK_OWNER_OFFSET]
-	eor	r0, r1, r2			@ zero if I won, else nonzero
-	bx	lr
-
-trylock_fail:
-	voting_end	r0, r1, r2
-	mov	r0, #1				@ nonzero indicates that I lost
-	bx	lr
-ENDPROC(vlock_trylock)
-
-@ r0: lock structure base
-ENTRY(vlock_unlock)
-	dmb
-	mov	r1, #VLOCK_OWNER_NONE
-	strb	r1, [r0, #VLOCK_OWNER_OFFSET]
-	dsb	st
-	sev
-	bx	lr
-ENDPROC(vlock_unlock)
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
deleted file mode 100644
index 312428d83eedb2aacd5380bd1e1d196133668ee6..0000000000000000000000000000000000000000
--- a/arch/arm/crypto/aes-ce-core.S
+++ /dev/null
@@ -1,713 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
- *
- * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.text
-	.arch		armv8-a
-	.fpu		crypto-neon-fp-armv8
-	.align		3
-
-	.macro		enc_round, state, key
-	aese.8		\state, \key
-	aesmc.8		\state, \state
-	.endm
-
-	.macro		dec_round, state, key
-	aesd.8		\state, \key
-	aesimc.8	\state, \state
-	.endm
-
-	.macro		enc_dround, key1, key2
-	enc_round	q0, \key1
-	enc_round	q0, \key2
-	.endm
-
-	.macro		dec_dround, key1, key2
-	dec_round	q0, \key1
-	dec_round	q0, \key2
-	.endm
-
-	.macro		enc_fround, key1, key2, key3
-	enc_round	q0, \key1
-	aese.8		q0, \key2
-	veor		q0, q0, \key3
-	.endm
-
-	.macro		dec_fround, key1, key2, key3
-	dec_round	q0, \key1
-	aesd.8		q0, \key2
-	veor		q0, q0, \key3
-	.endm
-
-	.macro		enc_dround_4x, key1, key2
-	enc_round	q0, \key1
-	enc_round	q1, \key1
-	enc_round	q2, \key1
-	enc_round	q3, \key1
-	enc_round	q0, \key2
-	enc_round	q1, \key2
-	enc_round	q2, \key2
-	enc_round	q3, \key2
-	.endm
-
-	.macro		dec_dround_4x, key1, key2
-	dec_round	q0, \key1
-	dec_round	q1, \key1
-	dec_round	q2, \key1
-	dec_round	q3, \key1
-	dec_round	q0, \key2
-	dec_round	q1, \key2
-	dec_round	q2, \key2
-	dec_round	q3, \key2
-	.endm
-
-	.macro		enc_fround_4x, key1, key2, key3
-	enc_round	q0, \key1
-	enc_round	q1, \key1
-	enc_round	q2, \key1
-	enc_round	q3, \key1
-	aese.8		q0, \key2
-	aese.8		q1, \key2
-	aese.8		q2, \key2
-	aese.8		q3, \key2
-	veor		q0, q0, \key3
-	veor		q1, q1, \key3
-	veor		q2, q2, \key3
-	veor		q3, q3, \key3
-	.endm
-
-	.macro		dec_fround_4x, key1, key2, key3
-	dec_round	q0, \key1
-	dec_round	q1, \key1
-	dec_round	q2, \key1
-	dec_round	q3, \key1
-	aesd.8		q0, \key2
-	aesd.8		q1, \key2
-	aesd.8		q2, \key2
-	aesd.8		q3, \key2
-	veor		q0, q0, \key3
-	veor		q1, q1, \key3
-	veor		q2, q2, \key3
-	veor		q3, q3, \key3
-	.endm
-
-	.macro		do_block, dround, fround
-	cmp		r3, #12			@ which key size?
-	vld1.32		{q10-q11}, [ip]!
-	\dround		q8, q9
-	vld1.32		{q12-q13}, [ip]!
-	\dround		q10, q11
-	vld1.32		{q10-q11}, [ip]!
-	\dround		q12, q13
-	vld1.32		{q12-q13}, [ip]!
-	\dround		q10, q11
-	blo		0f			@ AES-128: 10 rounds
-	vld1.32		{q10-q11}, [ip]!
-	\dround		q12, q13
-	beq		1f			@ AES-192: 12 rounds
-	vld1.32		{q12-q13}, [ip]
-	\dround		q10, q11
-0:	\fround		q12, q13, q14
-	bx		lr
-
-1:	\fround		q10, q11, q14
-	bx		lr
-	.endm
-
-	/*
-	 * Internal, non-AAPCS compliant functions that implement the core AES
-	 * transforms. These should preserve all registers except q0 - q2 and ip
-	 * Arguments:
-	 *   q0        : first in/output block
-	 *   q1        : second in/output block (_4x version only)
-	 *   q2        : third in/output block (_4x version only)
-	 *   q3        : fourth in/output block (_4x version only)
-	 *   q8        : first round key
-	 *   q9        : secound round key
-	 *   q14       : final round key
-	 *   r2        : address of round key array
-	 *   r3        : number of rounds
-	 */
-	.align		6
-aes_encrypt:
-	add		ip, r2, #32		@ 3rd round key
-.Laes_encrypt_tweak:
-	do_block	enc_dround, enc_fround
-ENDPROC(aes_encrypt)
-
-	.align		6
-aes_decrypt:
-	add		ip, r2, #32		@ 3rd round key
-	do_block	dec_dround, dec_fround
-ENDPROC(aes_decrypt)
-
-	.align		6
-aes_encrypt_4x:
-	add		ip, r2, #32		@ 3rd round key
-	do_block	enc_dround_4x, enc_fround_4x
-ENDPROC(aes_encrypt_4x)
-
-	.align		6
-aes_decrypt_4x:
-	add		ip, r2, #32		@ 3rd round key
-	do_block	dec_dround_4x, dec_fround_4x
-ENDPROC(aes_decrypt_4x)
-
-	.macro		prepare_key, rk, rounds
-	add		ip, \rk, \rounds, lsl #4
-	vld1.32		{q8-q9}, [\rk]		@ load first 2 round keys
-	vld1.32		{q14}, [ip]		@ load last round key
-	.endm
-
-	/*
-	 * aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
-	 *		   int blocks)
-	 * aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
-	 *		   int blocks)
-	 */
-ENTRY(ce_aes_ecb_encrypt)
-	push		{r4, lr}
-	ldr		r4, [sp, #8]
-	prepare_key	r2, r3
-.Lecbencloop4x:
-	subs		r4, r4, #4
-	bmi		.Lecbenc1x
-	vld1.8		{q0-q1}, [r1]!
-	vld1.8		{q2-q3}, [r1]!
-	bl		aes_encrypt_4x
-	vst1.8		{q0-q1}, [r0]!
-	vst1.8		{q2-q3}, [r0]!
-	b		.Lecbencloop4x
-.Lecbenc1x:
-	adds		r4, r4, #4
-	beq		.Lecbencout
-.Lecbencloop:
-	vld1.8		{q0}, [r1]!
-	bl		aes_encrypt
-	vst1.8		{q0}, [r0]!
-	subs		r4, r4, #1
-	bne		.Lecbencloop
-.Lecbencout:
-	pop		{r4, pc}
-ENDPROC(ce_aes_ecb_encrypt)
-
-ENTRY(ce_aes_ecb_decrypt)
-	push		{r4, lr}
-	ldr		r4, [sp, #8]
-	prepare_key	r2, r3
-.Lecbdecloop4x:
-	subs		r4, r4, #4
-	bmi		.Lecbdec1x
-	vld1.8		{q0-q1}, [r1]!
-	vld1.8		{q2-q3}, [r1]!
-	bl		aes_decrypt_4x
-	vst1.8		{q0-q1}, [r0]!
-	vst1.8		{q2-q3}, [r0]!
-	b		.Lecbdecloop4x
-.Lecbdec1x:
-	adds		r4, r4, #4
-	beq		.Lecbdecout
-.Lecbdecloop:
-	vld1.8		{q0}, [r1]!
-	bl		aes_decrypt
-	vst1.8		{q0}, [r0]!
-	subs		r4, r4, #1
-	bne		.Lecbdecloop
-.Lecbdecout:
-	pop		{r4, pc}
-ENDPROC(ce_aes_ecb_decrypt)
-
-	/*
-	 * aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
-	 *		   int blocks, u8 iv[])
-	 * aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
-	 *		   int blocks, u8 iv[])
-	 */
-ENTRY(ce_aes_cbc_encrypt)
-	push		{r4-r6, lr}
-	ldrd		r4, r5, [sp, #16]
-	vld1.8		{q0}, [r5]
-	prepare_key	r2, r3
-.Lcbcencloop:
-	vld1.8		{q1}, [r1]!		@ get next pt block
-	veor		q0, q0, q1		@ ..and xor with iv
-	bl		aes_encrypt
-	vst1.8		{q0}, [r0]!
-	subs		r4, r4, #1
-	bne		.Lcbcencloop
-	vst1.8		{q0}, [r5]
-	pop		{r4-r6, pc}
-ENDPROC(ce_aes_cbc_encrypt)
-
-ENTRY(ce_aes_cbc_decrypt)
-	push		{r4-r6, lr}
-	ldrd		r4, r5, [sp, #16]
-	vld1.8		{q15}, [r5]		@ keep iv in q15
-	prepare_key	r2, r3
-.Lcbcdecloop4x:
-	subs		r4, r4, #4
-	bmi		.Lcbcdec1x
-	vld1.8		{q0-q1}, [r1]!
-	vld1.8		{q2-q3}, [r1]!
-	vmov		q4, q0
-	vmov		q5, q1
-	vmov		q6, q2
-	vmov		q7, q3
-	bl		aes_decrypt_4x
-	veor		q0, q0, q15
-	veor		q1, q1, q4
-	veor		q2, q2, q5
-	veor		q3, q3, q6
-	vmov		q15, q7
-	vst1.8		{q0-q1}, [r0]!
-	vst1.8		{q2-q3}, [r0]!
-	b		.Lcbcdecloop4x
-.Lcbcdec1x:
-	adds		r4, r4, #4
-	beq		.Lcbcdecout
-	vmov		q6, q14			@ preserve last round key
-.Lcbcdecloop:
-	vld1.8		{q0}, [r1]!		@ get next ct block
-	veor		q14, q15, q6		@ combine prev ct with last key
-	vmov		q15, q0
-	bl		aes_decrypt
-	vst1.8		{q0}, [r0]!
-	subs		r4, r4, #1
-	bne		.Lcbcdecloop
-.Lcbcdecout:
-	vst1.8		{q15}, [r5]		@ keep iv in q15
-	pop		{r4-r6, pc}
-ENDPROC(ce_aes_cbc_decrypt)
-
-
-	/*
-	 * ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
-	 *			  int rounds, int bytes, u8 const iv[])
-	 * ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
-	 *			  int rounds, int bytes, u8 const iv[])
-	 */
-
-ENTRY(ce_aes_cbc_cts_encrypt)
-	push		{r4-r6, lr}
-	ldrd		r4, r5, [sp, #16]
-
-	movw		ip, :lower16:.Lcts_permute_table
-	movt		ip, :upper16:.Lcts_permute_table
-	sub		r4, r4, #16
-	add		lr, ip, #32
-	add		ip, ip, r4
-	sub		lr, lr, r4
-	vld1.8		{q5}, [ip]
-	vld1.8		{q6}, [lr]
-
-	add		ip, r1, r4
-	vld1.8		{q0}, [r1]			@ overlapping loads
-	vld1.8		{q3}, [ip]
-
-	vld1.8		{q1}, [r5]			@ get iv
-	prepare_key	r2, r3
-
-	veor		q0, q0, q1			@ xor with iv
-	bl		aes_encrypt
-
-	vtbl.8		d4, {d0-d1}, d10
-	vtbl.8		d5, {d0-d1}, d11
-	vtbl.8		d2, {d6-d7}, d12
-	vtbl.8		d3, {d6-d7}, d13
-
-	veor		q0, q0, q1
-	bl		aes_encrypt
-
-	add		r4, r0, r4
-	vst1.8		{q2}, [r4]			@ overlapping stores
-	vst1.8		{q0}, [r0]
-
-	pop		{r4-r6, pc}
-ENDPROC(ce_aes_cbc_cts_encrypt)
-
-ENTRY(ce_aes_cbc_cts_decrypt)
-	push		{r4-r6, lr}
-	ldrd		r4, r5, [sp, #16]
-
-	movw		ip, :lower16:.Lcts_permute_table
-	movt		ip, :upper16:.Lcts_permute_table
-	sub		r4, r4, #16
-	add		lr, ip, #32
-	add		ip, ip, r4
-	sub		lr, lr, r4
-	vld1.8		{q5}, [ip]
-	vld1.8		{q6}, [lr]
-
-	add		ip, r1, r4
-	vld1.8		{q0}, [r1]			@ overlapping loads
-	vld1.8		{q1}, [ip]
-
-	vld1.8		{q3}, [r5]			@ get iv
-	prepare_key	r2, r3
-
-	bl		aes_decrypt
-
-	vtbl.8		d4, {d0-d1}, d10
-	vtbl.8		d5, {d0-d1}, d11
-	vtbx.8		d0, {d2-d3}, d12
-	vtbx.8		d1, {d2-d3}, d13
-
-	veor		q1, q1, q2
-	bl		aes_decrypt
-	veor		q0, q0, q3			@ xor with iv
-
-	add		r4, r0, r4
-	vst1.8		{q1}, [r4]			@ overlapping stores
-	vst1.8		{q0}, [r0]
-
-	pop		{r4-r6, pc}
-ENDPROC(ce_aes_cbc_cts_decrypt)
-
-
-	/*
-	 * aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
-	 *		   int blocks, u8 ctr[])
-	 */
-ENTRY(ce_aes_ctr_encrypt)
-	push		{r4-r6, lr}
-	ldrd		r4, r5, [sp, #16]
-	vld1.8		{q7}, [r5]		@ load ctr
-	prepare_key	r2, r3
-	vmov		r6, s31			@ keep swabbed ctr in r6
-	rev		r6, r6
-	cmn		r6, r4			@ 32 bit overflow?
-	bcs		.Lctrloop
-.Lctrloop4x:
-	subs		r4, r4, #4
-	bmi		.Lctr1x
-
-	/*
-	 * NOTE: the sequence below has been carefully tweaked to avoid
-	 * a silicon erratum that exists in Cortex-A57 (#1742098) and
-	 * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs
-	 * may produce an incorrect result if they take their input from a
-	 * register of which a single 32-bit lane has been updated the last
-	 * time it was modified. To work around this, the lanes of registers
-	 * q0-q3 below are not manipulated individually, and the different
-	 * counter values are prepared by successive manipulations of q7.
-	 */
-	add		ip, r6, #1
-	vmov		q0, q7
-	rev		ip, ip
-	add		lr, r6, #2
-	vmov		s31, ip			@ set lane 3 of q1 via q7
-	add		ip, r6, #3
-	rev		lr, lr
-	vmov		q1, q7
-	vmov		s31, lr			@ set lane 3 of q2 via q7
-	rev		ip, ip
-	vmov		q2, q7
-	vmov		s31, ip			@ set lane 3 of q3 via q7
-	add		r6, r6, #4
-	vmov		q3, q7
-
-	vld1.8		{q4-q5}, [r1]!
-	vld1.8		{q6}, [r1]!
-	vld1.8		{q15}, [r1]!
-	bl		aes_encrypt_4x
-	veor		q0, q0, q4
-	veor		q1, q1, q5
-	veor		q2, q2, q6
-	veor		q3, q3, q15
-	rev		ip, r6
-	vst1.8		{q0-q1}, [r0]!
-	vst1.8		{q2-q3}, [r0]!
-	vmov		s31, ip
-	b		.Lctrloop4x
-.Lctr1x:
-	adds		r4, r4, #4
-	beq		.Lctrout
-.Lctrloop:
-	vmov		q0, q7
-	bl		aes_encrypt
-
-	adds		r6, r6, #1		@ increment BE ctr
-	rev		ip, r6
-	vmov		s31, ip
-	bcs		.Lctrcarry
-
-.Lctrcarrydone:
-	subs		r4, r4, #1
-	bmi		.Lctrtailblock		@ blocks < 0 means tail block
-	vld1.8		{q3}, [r1]!
-	veor		q3, q0, q3
-	vst1.8		{q3}, [r0]!
-	bne		.Lctrloop
-
-.Lctrout:
-	vst1.8		{q7}, [r5]		@ return next CTR value
-	pop		{r4-r6, pc}
-
-.Lctrtailblock:
-	vst1.8		{q0}, [r0, :64]		@ return the key stream
-	b		.Lctrout
-
-.Lctrcarry:
-	.irp		sreg, s30, s29, s28
-	vmov		ip, \sreg		@ load next word of ctr
-	rev		ip, ip			@ ... to handle the carry
-	adds		ip, ip, #1
-	rev		ip, ip
-	vmov		\sreg, ip
-	bcc		.Lctrcarrydone
-	.endr
-	b		.Lctrcarrydone
-ENDPROC(ce_aes_ctr_encrypt)
-
-	/*
-	 * aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
-	 *		   int bytes, u8 iv[], u32 const rk2[], int first)
-	 * aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
-	 *		   int bytes, u8 iv[], u32 const rk2[], int first)
-	 */
-
-	.macro		next_tweak, out, in, const, tmp
-	vshr.s64	\tmp, \in, #63
-	vand		\tmp, \tmp, \const
-	vadd.u64	\out, \in, \in
-	vext.8		\tmp, \tmp, \tmp, #8
-	veor		\out, \out, \tmp
-	.endm
-
-ce_aes_xts_init:
-	vmov.i32	d30, #0x87		@ compose tweak mask vector
-	vmovl.u32	q15, d30
-	vshr.u64	d30, d31, #7
-
-	ldrd		r4, r5, [sp, #16]	@ load args
-	ldr		r6, [sp, #28]
-	vld1.8		{q0}, [r5]		@ load iv
-	teq		r6, #1			@ start of a block?
-	bxne		lr
-
-	@ Encrypt the IV in q0 with the second AES key. This should only
-	@ be done at the start of a block.
-	ldr		r6, [sp, #24]		@ load AES key 2
-	prepare_key	r6, r3
-	add		ip, r6, #32		@ 3rd round key of key 2
-	b		.Laes_encrypt_tweak	@ tail call
-ENDPROC(ce_aes_xts_init)
-
-ENTRY(ce_aes_xts_encrypt)
-	push		{r4-r6, lr}
-
-	bl		ce_aes_xts_init		@ run shared prologue
-	prepare_key	r2, r3
-	vmov		q4, q0
-
-	teq		r6, #0			@ start of a block?
-	bne		.Lxtsenc4x
-
-.Lxtsencloop4x:
-	next_tweak	q4, q4, q15, q10
-.Lxtsenc4x:
-	subs		r4, r4, #64
-	bmi		.Lxtsenc1x
-	vld1.8		{q0-q1}, [r1]!		@ get 4 pt blocks
-	vld1.8		{q2-q3}, [r1]!
-	next_tweak	q5, q4, q15, q10
-	veor		q0, q0, q4
-	next_tweak	q6, q5, q15, q10
-	veor		q1, q1, q5
-	next_tweak	q7, q6, q15, q10
-	veor		q2, q2, q6
-	veor		q3, q3, q7
-	bl		aes_encrypt_4x
-	veor		q0, q0, q4
-	veor		q1, q1, q5
-	veor		q2, q2, q6
-	veor		q3, q3, q7
-	vst1.8		{q0-q1}, [r0]!		@ write 4 ct blocks
-	vst1.8		{q2-q3}, [r0]!
-	vmov		q4, q7
-	teq		r4, #0
-	beq		.Lxtsencret
-	b		.Lxtsencloop4x
-.Lxtsenc1x:
-	adds		r4, r4, #64
-	beq		.Lxtsencout
-	subs		r4, r4, #16
-	bmi		.LxtsencctsNx
-.Lxtsencloop:
-	vld1.8		{q0}, [r1]!
-.Lxtsencctsout:
-	veor		q0, q0, q4
-	bl		aes_encrypt
-	veor		q0, q0, q4
-	teq		r4, #0
-	beq		.Lxtsencout
-	subs		r4, r4, #16
-	next_tweak	q4, q4, q15, q6
-	bmi		.Lxtsenccts
-	vst1.8		{q0}, [r0]!
-	b		.Lxtsencloop
-.Lxtsencout:
-	vst1.8		{q0}, [r0]
-.Lxtsencret:
-	vst1.8		{q4}, [r5]
-	pop		{r4-r6, pc}
-
-.LxtsencctsNx:
-	vmov		q0, q3
-	sub		r0, r0, #16
-.Lxtsenccts:
-	movw		ip, :lower16:.Lcts_permute_table
-	movt		ip, :upper16:.Lcts_permute_table
-
-	add		r1, r1, r4		@ rewind input pointer
-	add		r4, r4, #16		@ # bytes in final block
-	add		lr, ip, #32
-	add		ip, ip, r4
-	sub		lr, lr, r4
-	add		r4, r0, r4		@ output address of final block
-
-	vld1.8		{q1}, [r1]		@ load final partial block
-	vld1.8		{q2}, [ip]
-	vld1.8		{q3}, [lr]
-
-	vtbl.8		d4, {d0-d1}, d4
-	vtbl.8		d5, {d0-d1}, d5
-	vtbx.8		d0, {d2-d3}, d6
-	vtbx.8		d1, {d2-d3}, d7
-
-	vst1.8		{q2}, [r4]		@ overlapping stores
-	mov		r4, #0
-	b		.Lxtsencctsout
-ENDPROC(ce_aes_xts_encrypt)
-
-
-ENTRY(ce_aes_xts_decrypt)
-	push		{r4-r6, lr}
-
-	bl		ce_aes_xts_init		@ run shared prologue
-	prepare_key	r2, r3
-	vmov		q4, q0
-
-	/* subtract 16 bytes if we are doing CTS */
-	tst		r4, #0xf
-	subne		r4, r4, #0x10
-
-	teq		r6, #0			@ start of a block?
-	bne		.Lxtsdec4x
-
-.Lxtsdecloop4x:
-	next_tweak	q4, q4, q15, q10
-.Lxtsdec4x:
-	subs		r4, r4, #64
-	bmi		.Lxtsdec1x
-	vld1.8		{q0-q1}, [r1]!		@ get 4 ct blocks
-	vld1.8		{q2-q3}, [r1]!
-	next_tweak	q5, q4, q15, q10
-	veor		q0, q0, q4
-	next_tweak	q6, q5, q15, q10
-	veor		q1, q1, q5
-	next_tweak	q7, q6, q15, q10
-	veor		q2, q2, q6
-	veor		q3, q3, q7
-	bl		aes_decrypt_4x
-	veor		q0, q0, q4
-	veor		q1, q1, q5
-	veor		q2, q2, q6
-	veor		q3, q3, q7
-	vst1.8		{q0-q1}, [r0]!		@ write 4 pt blocks
-	vst1.8		{q2-q3}, [r0]!
-	vmov		q4, q7
-	teq		r4, #0
-	beq		.Lxtsdecout
-	b		.Lxtsdecloop4x
-.Lxtsdec1x:
-	adds		r4, r4, #64
-	beq		.Lxtsdecout
-	subs		r4, r4, #16
-.Lxtsdecloop:
-	vld1.8		{q0}, [r1]!
-	bmi		.Lxtsdeccts
-.Lxtsdecctsout:
-	veor		q0, q0, q4
-	bl		aes_decrypt
-	veor		q0, q0, q4
-	vst1.8		{q0}, [r0]!
-	teq		r4, #0
-	beq		.Lxtsdecout
-	subs		r4, r4, #16
-	next_tweak	q4, q4, q15, q6
-	b		.Lxtsdecloop
-.Lxtsdecout:
-	vst1.8		{q4}, [r5]
-	pop		{r4-r6, pc}
-
-.Lxtsdeccts:
-	movw		ip, :lower16:.Lcts_permute_table
-	movt		ip, :upper16:.Lcts_permute_table
-
-	add		r1, r1, r4		@ rewind input pointer
-	add		r4, r4, #16		@ # bytes in final block
-	add		lr, ip, #32
-	add		ip, ip, r4
-	sub		lr, lr, r4
-	add		r4, r0, r4		@ output address of final block
-
-	next_tweak	q5, q4, q15, q6
-
-	vld1.8		{q1}, [r1]		@ load final partial block
-	vld1.8		{q2}, [ip]
-	vld1.8		{q3}, [lr]
-
-	veor		q0, q0, q5
-	bl		aes_decrypt
-	veor		q0, q0, q5
-
-	vtbl.8		d4, {d0-d1}, d4
-	vtbl.8		d5, {d0-d1}, d5
-	vtbx.8		d0, {d2-d3}, d6
-	vtbx.8		d1, {d2-d3}, d7
-
-	vst1.8		{q2}, [r4]		@ overlapping stores
-	mov		r4, #0
-	b		.Lxtsdecctsout
-ENDPROC(ce_aes_xts_decrypt)
-
-	/*
-	 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
-	 *                             AES sbox substitution on each byte in
-	 *                             'input'
-	 */
-ENTRY(ce_aes_sub)
-	vdup.32		q1, r0
-	veor		q0, q0, q0
-	aese.8		q0, q1
-	vmov		r0, s0
-	bx		lr
-ENDPROC(ce_aes_sub)
-
-	/*
-	 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
-	 *                                        operation on round key *src
-	 */
-ENTRY(ce_aes_invert)
-	vld1.32		{q0}, [r1]
-	aesimc.8	q0, q0
-	vst1.32		{q0}, [r0]
-	bx		lr
-ENDPROC(ce_aes_invert)
-
-	.section	".rodata", "a"
-	.align		6
-.Lcts_permute_table:
-	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
-	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
-	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
deleted file mode 100644
index 472e56d09eeae6d4f4fd4211960c49750bd4c649..0000000000000000000000000000000000000000
--- a/arch/arm/crypto/aes-cipher-core.S
+++ /dev/null
@@ -1,223 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Scalar AES core transform
- *
- * Copyright (C) 2017 Linaro Ltd.
- * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/cache.h>
-
-	.text
-	.align		5
-
-	rk		.req	r0
-	rounds		.req	r1
-	in		.req	r2
-	out		.req	r3
-	ttab		.req	ip
-
-	t0		.req	lr
-	t1		.req	r2
-	t2		.req	r3
-
-	.macro		__select, out, in, idx
-	.if		__LINUX_ARM_ARCH__ < 7
-	and		\out, \in, #0xff << (8 * \idx)
-	.else
-	ubfx		\out, \in, #(8 * \idx), #8
-	.endif
-	.endm
-
-	.macro		__load, out, in, idx, sz, op
-	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
-	ldr\op		\out, [ttab, \in, lsr #(8 * \idx) - \sz]
-	.else
-	ldr\op		\out, [ttab, \in, lsl #\sz]
-	.endif
-	.endm
-
-	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
-	__select	\out0, \in0, 0
-	__select	t0, \in1, 1
-	__load		\out0, \out0, 0, \sz, \op
-	__load		t0, t0, 1, \sz, \op
-
-	.if		\enc
-	__select	\out1, \in1, 0
-	__select	t1, \in2, 1
-	.else
-	__select	\out1, \in3, 0
-	__select	t1, \in0, 1
-	.endif
-	__load		\out1, \out1, 0, \sz, \op
-	__select	t2, \in2, 2
-	__load		t1, t1, 1, \sz, \op
-	__load		t2, t2, 2, \sz, \op
-
-	eor		\out0, \out0, t0, ror #24
-
-	__select	t0, \in3, 3
-	.if		\enc
-	__select	\t3, \in3, 2
-	__select	\t4, \in0, 3
-	.else
-	__select	\t3, \in1, 2
-	__select	\t4, \in2, 3
-	.endif
-	__load		\t3, \t3, 2, \sz, \op
-	__load		t0, t0, 3, \sz, \op
-	__load		\t4, \t4, 3, \sz, \op
-
-	.ifnb		\oldcpsr
-	/*
-	 * This is the final round and we're done with all data-dependent table
-	 * lookups, so we can safely re-enable interrupts.
-	 */
-	restore_irqs	\oldcpsr
-	.endif
-
-	eor		\out1, \out1, t1, ror #24
-	eor		\out0, \out0, t2, ror #16
-	ldm		rk!, {t1, t2}
-	eor		\out1, \out1, \t3, ror #16
-	eor		\out0, \out0, t0, ror #8
-	eor		\out1, \out1, \t4, ror #8
-	eor		\out0, \out0, t1
-	eor		\out1, \out1, t2
-	.endm
-
-	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
-	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
-	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
-	.endm
-
-	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
-	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
-	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
-	.endm
-
-	.macro		__rev, out, in
-	.if		__LINUX_ARM_ARCH__ < 6
-	lsl		t0, \in, #24
-	and		t1, \in, #0xff00
-	and		t2, \in, #0xff0000
-	orr		\out, t0, \in, lsr #24
-	orr		\out, \out, t1, lsl #8
-	orr		\out, \out, t2, lsr #8
-	.else
-	rev		\out, \in
-	.endif
-	.endm
-
-	.macro		__adrl, out, sym, c
-	.if		__LINUX_ARM_ARCH__ < 7
-	ldr\c		\out, =\sym
-	.else
-	movw\c		\out, #:lower16:\sym
-	movt\c		\out, #:upper16:\sym
-	.endif
-	.endm
-
-	.macro		do_crypt, round, ttab, ltab, bsz
-	push		{r3-r11, lr}
-
-	// Load keys first, to reduce latency in case they're not cached yet.
-	ldm		rk!, {r8-r11}
-
-	ldr		r4, [in]
-	ldr		r5, [in, #4]
-	ldr		r6, [in, #8]
-	ldr		r7, [in, #12]
-
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	__rev		r4, r4
-	__rev		r5, r5
-	__rev		r6, r6
-	__rev		r7, r7
-#endif
-
-	eor		r4, r4, r8
-	eor		r5, r5, r9
-	eor		r6, r6, r10
-	eor		r7, r7, r11
-
-	__adrl		ttab, \ttab
-	/*
-	 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
-	 * L1 cache, assuming cacheline size >= 32.  This is a hardening measure
-	 * intended to make cache-timing attacks more difficult.  They may not
-	 * be fully prevented, however; see the paper
-	 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
-	 * ("Cache-timing attacks on AES") for a discussion of the many
-	 * difficulties involved in writing truly constant-time AES software.
-	 */
-	 save_and_disable_irqs	t0
-	.set		i, 0
-	.rept		1024 / 128
-	ldr		r8, [ttab, #i + 0]
-	ldr		r9, [ttab, #i + 32]
-	ldr		r10, [ttab, #i + 64]
-	ldr		r11, [ttab, #i + 96]
-	.set		i, i + 128
-	.endr
-	push		{t0}		// oldcpsr
-
-	tst		rounds, #2
-	bne		1f
-
-0:	\round		r8, r9, r10, r11, r4, r5, r6, r7
-	\round		r4, r5, r6, r7, r8, r9, r10, r11
-
-1:	subs		rounds, rounds, #4
-	\round		r8, r9, r10, r11, r4, r5, r6, r7
-	bls		2f
-	\round		r4, r5, r6, r7, r8, r9, r10, r11
-	b		0b
-
-2:	.ifb		\ltab
-	add		ttab, ttab, #1
-	.else
-	__adrl		ttab, \ltab
-	// Prefetch inverse S-box for final round; see explanation above
-	.set		i, 0
-	.rept		256 / 64
-	ldr		t0, [ttab, #i + 0]
-	ldr		t1, [ttab, #i + 32]
-	.set		i, i + 64
-	.endr
-	.endif
-
-	pop		{rounds}	// oldcpsr
-	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
-
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	__rev		r4, r4
-	__rev		r5, r5
-	__rev		r6, r6
-	__rev		r7, r7
-#endif
-
-	ldr		out, [sp]
-
-	str		r4, [out]
-	str		r5, [out, #4]
-	str		r6, [out, #8]
-	str		r7, [out, #12]
-
-	pop		{r3-r11, pc}
-
-	.align		3
-	.ltorg
-	.endm
-
-ENTRY(__aes_arm_encrypt)
-	do_crypt	fround, crypto_ft_tab,, 2
-ENDPROC(__aes_arm_encrypt)
-
-	.align		5
-ENTRY(__aes_arm_decrypt)
-	do_crypt	iround, crypto_it_tab, crypto_aes_inv_sbox, 0
-ENDPROC(__aes_arm_decrypt)
diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S
deleted file mode 100644
index cfaed4e67535f5ef7453c76336b474cd1923868c..0000000000000000000000000000000000000000
--- a/arch/arm/crypto/aes-neonbs-core.S
+++ /dev/null
@@ -1,1026 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Bit sliced AES using NEON instructions
- *
- * Copyright (C) 2017 Linaro Ltd.
- * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
- */
-
-/*
- * The algorithm implemented here is described in detail by the paper
- * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
- * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
- *
- * This implementation is based primarily on the OpenSSL implementation
- * for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.text
-	.fpu		neon
-
-	rounds		.req	ip
-	bskey		.req	r4
-
-	q0l		.req	d0
-	q0h		.req	d1
-	q1l		.req	d2
-	q1h		.req	d3
-	q2l		.req	d4
-	q2h		.req	d5
-	q3l		.req	d6
-	q3h		.req	d7
-	q4l		.req	d8
-	q4h		.req	d9
-	q5l		.req	d10
-	q5h		.req	d11
-	q6l		.req	d12
-	q6h		.req	d13
-	q7l		.req	d14
-	q7h		.req	d15
-	q8l		.req	d16
-	q8h		.req	d17
-	q9l		.req	d18
-	q9h		.req	d19
-	q10l		.req	d20
-	q10h		.req	d21
-	q11l		.req	d22
-	q11h		.req	d23
-	q12l		.req	d24
-	q12h		.req	d25
-	q13l		.req	d26
-	q13h		.req	d27
-	q14l		.req	d28
-	q14h		.req	d29
-	q15l		.req	d30
-	q15h		.req	d31
-
-	.macro		__tbl, out, tbl, in, tmp
-	.ifc		\out, \tbl
-	.ifb		\tmp
-	.error		__tbl needs temp register if out == tbl
-	.endif
-	vmov		\tmp, \out
-	.endif
-	vtbl.8		\out\()l, {\tbl}, \in\()l
-	.ifc		\out, \tbl
-	vtbl.8		\out\()h, {\tmp}, \in\()h
-	.else
-	vtbl.8		\out\()h, {\tbl}, \in\()h
-	.endif
-	.endm
-
-	.macro		__ldr, out, sym
-	vldr		\out\()l, \sym
-	vldr		\out\()h, \sym + 8
-	.endm
-
-	.macro		__adr, reg, lbl
-	adr		\reg, \lbl
-THUMB(	orr		\reg, \reg, #1		)
-	.endm
-
-	.macro		in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
-	veor		\b2, \b2, \b1
-	veor		\b5, \b5, \b6
-	veor		\b3, \b3, \b0
-	veor		\b6, \b6, \b2
-	veor		\b5, \b5, \b0
-	veor		\b6, \b6, \b3
-	veor		\b3, \b3, \b7
-	veor		\b7, \b7, \b5
-	veor		\b3, \b3, \b4
-	veor		\b4, \b4, \b5
-	veor		\b2, \b2, \b7
-	veor		\b3, \b3, \b1
-	veor		\b1, \b1, \b5
-	.endm
-
-	.macro		out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
-	veor		\b0, \b0, \b6
-	veor		\b1, \b1, \b4
-	veor		\b4, \b4, \b6
-	veor		\b2, \b2, \b0
-	veor		\b6, \b6, \b1
-	veor		\b1, \b1, \b5
-	veor		\b5, \b5, \b3
-	veor		\b3, \b3, \b7
-	veor		\b7, \b7, \b5
-	veor		\b2, \b2, \b5
-	veor		\b4, \b4, \b7
-	.endm
-
-	.macro		inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
-	veor		\b1, \b1, \b7
-	veor		\b4, \b4, \b7
-	veor		\b7, \b7, \b5
-	veor		\b1, \b1, \b3
-	veor		\b2, \b2, \b5
-	veor		\b3, \b3, \b7
-	veor		\b6, \b6, \b1
-	veor		\b2, \b2, \b0
-	veor		\b5, \b5, \b3
-	veor		\b4, \b4, \b6
-	veor		\b0, \b0, \b6
-	veor		\b1, \b1, \b4
-	.endm
-
-	.macro		inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
-	veor		\b1, \b1, \b5
-	veor		\b2, \b2, \b7
-	veor		\b3, \b3, \b1
-	veor		\b4, \b4, \b5
-	veor		\b7, \b7, \b5
-	veor		\b3, \b3, \b4
-	veor 		\b5, \b5, \b0
-	veor		\b3, \b3, \b7
-	veor		\b6, \b6, \b2
-	veor		\b2, \b2, \b1
-	veor		\b6, \b6, \b3
-	veor		\b3, \b3, \b0
-	veor		\b5, \b5, \b6
-	.endm
-
-	.macro		mul_gf4, x0, x1, y0, y1, t0, t1
-	veor 		\t0, \y0, \y1
-	vand		\t0, \t0, \x0
-	veor		\x0, \x0, \x1
-	vand		\t1, \x1, \y0
-	vand		\x0, \x0, \y1
-	veor		\x1, \t1, \t0
-	veor		\x0, \x0, \t1
-	.endm
-
-	.macro		mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
-	veor		\t0, \y0, \y1
-	veor 		\t1, \y2, \y3
-	vand		\t0, \t0, \x0
-	vand		\t1, \t1, \x2
-	veor		\x0, \x0, \x1
-	veor		\x2, \x2, \x3
-	vand		\x1, \x1, \y0
-	vand		\x3, \x3, \y2
-	vand		\x0, \x0, \y1
-	vand		\x2, \x2, \y3
-	veor		\x1, \x1, \x0
-	veor		\x2, \x2, \x3
-	veor		\x0, \x0, \t0
-	veor		\x3, \x3, \t1
-	.endm
-
-	.macro		mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
-				    y0, y1, y2, y3, t0, t1, t2, t3
-	veor		\t0, \x0, \x2
-	veor		\t1, \x1, \x3
-	mul_gf4  	\x0, \x1, \y0, \y1, \t2, \t3
-	veor		\y0, \y0, \y2
-	veor		\y1, \y1, \y3
-	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
-	veor		\x0, \x0, \t0
-	veor		\x2, \x2, \t0
-	veor		\x1, \x1, \t1
-	veor		\x3, \x3, \t1
-	veor		\t0, \x4, \x6
-	veor		\t1, \x5, \x7
-	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
-	veor		\y0, \y0, \y2
-	veor		\y1, \y1, \y3
-	mul_gf4  	\x4, \x5, \y0, \y1, \t2, \t3
-	veor		\x4, \x4, \t0
-	veor		\x6, \x6, \t0
-	veor		\x5, \x5, \t1
-	veor		\x7, \x7, \t1
-	.endm
-
-	.macro		inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
-				   t0, t1, t2, t3, s0, s1, s2, s3
-	veor		\t3, \x4, \x6
-	veor		\t0, \x5, \x7
-	veor		\t1, \x1, \x3
-	veor		\s1, \x7, \x6
-	veor		\s0, \x0, \x2
-	veor		\s3, \t3, \t0
-	vorr		\t2, \t0, \t1
-	vand		\s2, \t3, \s0
-	vorr		\t3, \t3, \s0
-	veor		\s0, \s0, \t1
-	vand		\t0, \t0, \t1
-	veor		\t1, \x3, \x2
-	vand		\s3, \s3, \s0
-	vand		\s1, \s1, \t1
-	veor		\t1, \x4, \x5
-	veor		\s0, \x1, \x0
-	veor		\t3, \t3, \s1
-	veor		\t2, \t2, \s1
-	vand		\s1, \t1, \s0
-	vorr		\t1, \t1, \s0
-	veor		\t3, \t3, \s3
-	veor		\t0, \t0, \s1
-	veor		\t2, \t2, \s2
-	veor		\t1, \t1, \s3
-	veor		\t0, \t0, \s2
-	vand		\s0, \x7, \x3
-	veor		\t1, \t1, \s2
-	vand		\s1, \x6, \x2
-	vand		\s2, \x5, \x1
-	vorr		\s3, \x4, \x0
-	veor		\t3, \t3, \s0
-	veor		\t1, \t1, \s2
-	veor		\s0, \t0, \s3
-	veor		\t2, \t2, \s1
-	vand		\s2, \t3, \t1
-	veor		\s1, \t2, \s2
-	veor		\s3, \s0, \s2
-	vbsl		\s1, \t1, \s0
-	vmvn		\t0, \s0
-	vbsl		\s0, \s1, \s3
-	vbsl		\t0, \s1, \s3
-	vbsl		\s3, \t3, \t2
-	veor		\t3, \t3, \t2
-	vand		\s2, \s0, \s3
-	veor		\t1, \t1, \t0
-	veor		\s2, \s2, \t3
-	mul_gf16_2	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
-			\s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
-	.endm
-
-	.macro		sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
-			      t0, t1, t2, t3, s0, s1, s2, s3
-	in_bs_ch	\b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7
-	inv_gf256	\b6, \b5, \b0, \b3, \b7, \b1, \b4, \b2, \
-			\t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3
-	out_bs_ch	\b7, \b1, \b4, \b2, \b6, \b5, \b0, \b3
-	.endm
-
-	.macro		inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
-				  t0, t1, t2, t3, s0, s1, s2, s3
-	inv_in_bs_ch	\b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7
-	inv_gf256	\b5, \b1, \b2, \b6, \b3, \b7, \b0, \b4, \
-			\t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3
-	inv_out_bs_ch	\b3, \b7, \b0, \b4, \b5, \b1, \b2, \b6
-	.endm
-
-	.macro		shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \
-				    t0, t1, t2, t3, mask
-	vld1.8		{\t0-\t1}, [bskey, :256]!
-	veor		\t0, \t0, \x0
-	vld1.8		{\t2-\t3}, [bskey, :256]!
-	veor		\t1, \t1, \x1
-	__tbl		\x0, \t0, \mask
-	veor		\t2, \t2, \x2
-	__tbl		\x1, \t1, \mask
-	vld1.8		{\t0-\t1}, [bskey, :256]!
-	veor		\t3, \t3, \x3
-	__tbl		\x2, \t2, \mask
-	__tbl		\x3, \t3, \mask
-	vld1.8		{\t2-\t3}, [bskey, :256]!
-	veor		\t0, \t0, \x4
-	veor		\t1, \t1, \x5
-	__tbl		\x4, \t0, \mask
-	veor		\t2, \t2, \x6
-	__tbl		\x5, \t1, \mask
-	veor		\t3, \t3, \x7
-	__tbl		\x6, \t2, \mask
-	__tbl		\x7, \t3, \mask
-	.endm
-
-	.macro		inv_shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \
-					t0, t1, t2, t3, mask
-	__tbl		\x0, \x0, \mask, \t0
-	__tbl		\x1, \x1, \mask, \t1
-	__tbl		\x2, \x2, \mask, \t2
-	__tbl		\x3, \x3, \mask, \t3
-	__tbl		\x4, \x4, \mask, \t0
-	__tbl		\x5, \x5, \mask, \t1
-	__tbl		\x6, \x6, \mask, \t2
-	__tbl		\x7, \x7, \mask, \t3
-	.endm
-
-	.macro		mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
-				  t0, t1, t2, t3, t4, t5, t6, t7, inv
-	vext.8		\t0, \x0, \x0, #12
-	vext.8		\t1, \x1, \x1, #12
-	veor		\x0, \x0, \t0
-	vext.8		\t2, \x2, \x2, #12
-	veor		\x1, \x1, \t1
-	vext.8		\t3, \x3, \x3, #12
-	veor		\x2, \x2, \t2
-	vext.8		\t4, \x4, \x4, #12
-	veor		\x3, \x3, \t3
-	vext.8		\t5, \x5, \x5, #12
-	veor		\x4, \x4, \t4
-	vext.8		\t6, \x6, \x6, #12
-	veor		\x5, \x5, \t5
-	vext.8		\t7, \x7, \x7, #12
-	veor		\x6, \x6, \t6
-	veor		\t1, \t1, \x0
-	veor.8		\x7, \x7, \t7
-	vext.8		\x0, \x0, \x0, #8
-	veor		\t2, \t2, \x1
-	veor		\t0, \t0, \x7
-	veor		\t1, \t1, \x7
-	vext.8		\x1, \x1, \x1, #8
-	veor		\t5, \t5, \x4
-	veor		\x0, \x0, \t0
-	veor		\t6, \t6, \x5
-	veor		\x1, \x1, \t1
-	vext.8		\t0, \x4, \x4, #8
-	veor		\t4, \t4, \x3
-	vext.8		\t1, \x5, \x5, #8
-	veor		\t7, \t7, \x6
-	vext.8		\x4, \x3, \x3, #8
-	veor		\t3, \t3, \x2
-	vext.8		\x5, \x7, \x7, #8
-	veor		\t4, \t4, \x7
-	vext.8		\x3, \x6, \x6, #8
-	veor		\t3, \t3, \x7
-	vext.8		\x6, \x2, \x2, #8
-	veor		\x7, \t1, \t5
-	.ifb		\inv
-	veor		\x2, \t0, \t4
-	veor		\x4, \x4, \t3
-	veor		\x5, \x5, \t7
-	veor		\x3, \x3, \t6
-	veor		\x6, \x6, \t2
-	.else
-	veor		\t3, \t3, \x4
-	veor		\x5, \x5, \t7
-	veor		\x2, \x3, \t6
-	veor		\x3, \t0, \t4
-	veor		\x4, \x6, \t2
-	vmov		\x6, \t3
-	.endif
-	.endm
-
-	.macro		inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
-				      t0, t1, t2, t3, t4, t5, t6, t7
-	vld1.8		{\t0-\t1}, [bskey, :256]!
-	veor		\x0, \x0, \t0
-	vld1.8		{\t2-\t3}, [bskey, :256]!
-	veor		\x1, \x1, \t1
-	vld1.8		{\t4-\t5}, [bskey, :256]!
-	veor		\x2, \x2, \t2
-	vld1.8		{\t6-\t7}, [bskey, :256]
-	sub		bskey, bskey, #224
-	veor		\x3, \x3, \t3
-	veor		\x4, \x4, \t4
-	veor		\x5, \x5, \t5
-	veor		\x6, \x6, \t6
-	veor		\x7, \x7, \t7
-	vext.8		\t0, \x0, \x0, #8
-	vext.8		\t6, \x6, \x6, #8
-	vext.8		\t7, \x7, \x7, #8
-	veor		\t0, \t0, \x0
-	vext.8		\t1, \x1, \x1, #8
-	veor		\t6, \t6, \x6
-	vext.8		\t2, \x2, \x2, #8
-	veor		\t7, \t7, \x7
-	vext.8		\t3, \x3, \x3, #8
-	veor		\t1, \t1, \x1
-	vext.8		\t4, \x4, \x4, #8
-	veor		\t2, \t2, \x2
-	vext.8		\t5, \x5, \x5, #8
-	veor		\t3, \t3, \x3
-	veor		\t4, \t4, \x4
-	veor		\t5, \t5, \x5
-	veor		\x0, \x0, \t6
-	veor		\x1, \x1, \t6
-	veor		\x2, \x2, \t0
-	veor		\x4, \x4, \t2
-	veor		\x3, \x3, \t1
-	veor		\x1, \x1, \t7
-	veor		\x2, \x2, \t7
-	veor		\x4, \x4, \t6
-	veor		\x5, \x5, \t3
-	veor		\x3, \x3, \t6
-	veor		\x6, \x6, \t4
-	veor		\x4, \x4, \t7
-	veor		\x5, \x5, \t7
-	veor		\x7, \x7, \t5
-	mix_cols	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
-			\t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
-	.endm
-
-	.macro		swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
-	vshr.u64	\t0, \b0, #\n
-	vshr.u64	\t1, \b1, #\n
-	veor		\t0, \t0, \a0
-	veor		\t1, \t1, \a1
-	vand		\t0, \t0, \mask
-	vand		\t1, \t1, \mask
-	veor		\a0, \a0, \t0
-	vshl.s64	\t0, \t0, #\n
-	veor		\a1, \a1, \t1
-	vshl.s64	\t1, \t1, #\n
-	veor		\b0, \b0, \t0
-	veor		\b1, \b1, \t1
-	.endm
-
-	.macro		bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
-	vmov.i8		\t0, #0x55
-	vmov.i8		\t1, #0x33
-	swapmove_2x	\x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
-	swapmove_2x	\x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
-	vmov.i8		\t0, #0x0f
-	swapmove_2x	\x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
-	swapmove_2x	\x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
-	swapmove_2x	\x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
-	swapmove_2x	\x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
-	.endm
-
-	.align		4
-M0:	.quad		0x02060a0e03070b0f, 0x0004080c0105090d
-
-	/*
-	 * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
-	 */
-ENTRY(aesbs_convert_key)
-	vld1.32		{q7}, [r1]!		// load round 0 key
-	vld1.32		{q15}, [r1]!		// load round 1 key
-
-	vmov.i8		q8,  #0x01		// bit masks
-	vmov.i8		q9,  #0x02
-	vmov.i8		q10, #0x04
-	vmov.i8		q11, #0x08
-	vmov.i8		q12, #0x10
-	vmov.i8		q13, #0x20
-	__ldr		q14, M0
-
-	sub		r2, r2, #1
-	vst1.8		{q7}, [r0, :128]!	// save round 0 key
-
-.Lkey_loop:
-	__tbl		q7, q15, q14
-	vmov.i8		q6, #0x40
-	vmov.i8		q15, #0x80
-
-	vtst.8		q0, q7, q8
-	vtst.8		q1, q7, q9
-	vtst.8		q2, q7, q10
-	vtst.8		q3, q7, q11
-	vtst.8		q4, q7, q12
-	vtst.8		q5, q7, q13
-	vtst.8		q6, q7, q6
-	vtst.8		q7, q7, q15
-	vld1.32		{q15}, [r1]!		// load next round key
-	vmvn		q0, q0
-	vmvn		q1, q1
-	vmvn		q5, q5
-	vmvn		q6, q6
-
-	subs		r2, r2, #1
-	vst1.8		{q0-q1}, [r0, :256]!
-	vst1.8		{q2-q3}, [r0, :256]!
-	vst1.8		{q4-q5}, [r0, :256]!
-	vst1.8		{q6-q7}, [r0, :256]!
-	bne		.Lkey_loop
-
-	vmov.i8		q7, #0x63		// compose .L63
-	veor		q15, q15, q7
-	vst1.8		{q15}, [r0, :128]
-	bx		lr
-ENDPROC(aesbs_convert_key)
-
-	.align		4
-M0SR:	.quad		0x0a0e02060f03070b, 0x0004080c05090d01
-
-aesbs_encrypt8:
-	vld1.8		{q9}, [bskey, :128]!	// round 0 key
-	__ldr		q8, M0SR
-
-	veor		q10, q0, q9		// xor with round0 key
-	veor		q11, q1, q9
-	__tbl		q0, q10, q8
-	veor		q12, q2, q9
-	__tbl		q1, q11, q8
-	veor		q13, q3, q9
-	__tbl		q2, q12, q8
-	veor		q14, q4, q9
-	__tbl		q3, q13, q8
-	veor		q15, q5, q9
-	__tbl		q4, q14, q8
-	veor		q10, q6, q9
-	__tbl		q5, q15, q8
-	veor		q11, q7, q9
-	__tbl		q6, q10, q8
-	__tbl		q7, q11, q8
-
-	bitslice	q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11
-
-	sub		rounds, rounds, #1
-	b		.Lenc_sbox
-
-	.align		5
-SR:	.quad		0x0504070600030201, 0x0f0e0d0c0a09080b
-SRM0:	.quad		0x0304090e00050a0f, 0x01060b0c0207080d
-
-.Lenc_last:
-	__ldr		q12, SRM0
-.Lenc_loop:
-	shift_rows	q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12
-.Lenc_sbox:
-	sbox		q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \
-								q13, q14, q15
-	subs		rounds, rounds, #1
-	bcc		.Lenc_done
-
-	mix_cols	q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11, q12, \
-								q13, q14, q15
-
-	beq		.Lenc_last
-	__ldr		q12, SR
-	b		.Lenc_loop
-
-.Lenc_done:
-	vld1.8		{q12}, [bskey, :128]	// last round key
-
-	bitslice	q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11
-
-	veor		q0, q0, q12
-	veor		q1, q1, q12
-	veor		q4, q4, q12
-	veor		q6, q6, q12
-	veor		q3, q3, q12
-	veor		q7, q7, q12
-	veor		q2, q2, q12
-	veor		q5, q5, q12
-	bx		lr
-ENDPROC(aesbs_encrypt8)
-
-	.align		4
-M0ISR:	.quad		0x0a0e0206070b0f03, 0x0004080c0d010509
-
-aesbs_decrypt8:
-	add		bskey, bskey, rounds, lsl #7
-	sub		bskey, bskey, #112
-	vld1.8		{q9}, [bskey, :128]	// round 0 key
-	sub		bskey, bskey, #128
-	__ldr		q8, M0ISR
-
-	veor		q10, q0, q9		// xor with round0 key
-	veor		q11, q1, q9
-	__tbl		q0, q10, q8
-	veor		q12, q2, q9
-	__tbl		q1, q11, q8
-	veor		q13, q3, q9
-	__tbl		q2, q12, q8
-	veor		q14, q4, q9
-	__tbl		q3, q13, q8
-	veor		q15, q5, q9
-	__tbl		q4, q14, q8
-	veor		q10, q6, q9
-	__tbl		q5, q15, q8
-	veor		q11, q7, q9
-	__tbl		q6, q10, q8
-	__tbl		q7, q11, q8
-
-	bitslice	q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11
-
-	sub		rounds, rounds, #1
-	b		.Ldec_sbox
-
-	.align		5
-ISR:	.quad		0x0504070602010003, 0x0f0e0d0c080b0a09
-ISRM0:	.quad		0x01040b0e0205080f, 0x0306090c00070a0d
-
-.Ldec_last:
-	__ldr		q12, ISRM0
-.Ldec_loop:
-	inv_shift_rows	q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12
-.Ldec_sbox:
-	inv_sbox	q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \
-								q13, q14, q15
-	subs		rounds, rounds, #1
-	bcc		.Ldec_done
-
-	inv_mix_cols	q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11, q12, \
-								q13, q14, q15
-
-	beq		.Ldec_last
-	__ldr		q12, ISR
-	b		.Ldec_loop
-
-.Ldec_done:
-	add		bskey, bskey, #112
-	vld1.8		{q12}, [bskey, :128]	// last round key
-
-	bitslice	q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11
-
-	veor		q0, q0, q12
-	veor		q1, q1, q12
-	veor		q6, q6, q12
-	veor		q4, q4, q12
-	veor		q2, q2, q12
-	veor		q7, q7, q12
-	veor		q3, q3, q12
-	veor		q5, q5, q12
-	bx		lr
-ENDPROC(aesbs_decrypt8)
-
-	/*
-	 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		     int blocks)
-	 * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		     int blocks)
-	 */
-	.macro		__ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
-	push		{r4-r6, lr}
-	ldr		r5, [sp, #16]		// number of blocks
-
-99:	__adr		ip, 0f
-	and		lr, r5, #7
-	cmp		r5, #8
-	sub		ip, ip, lr, lsl #2
-	bxlt		ip			// computed goto if blocks < 8
-
-	vld1.8		{q0}, [r1]!
-	vld1.8		{q1}, [r1]!
-	vld1.8		{q2}, [r1]!
-	vld1.8		{q3}, [r1]!
-	vld1.8		{q4}, [r1]!
-	vld1.8		{q5}, [r1]!
-	vld1.8		{q6}, [r1]!
-	vld1.8		{q7}, [r1]!
-
-0:	mov		bskey, r2
-	mov		rounds, r3
-	bl		\do8
-
-	__adr		ip, 1f
-	and		lr, r5, #7
-	cmp		r5, #8
-	sub		ip, ip, lr, lsl #2
-	bxlt		ip			// computed goto if blocks < 8
-
-	vst1.8		{\o0}, [r0]!
-	vst1.8		{\o1}, [r0]!
-	vst1.8		{\o2}, [r0]!
-	vst1.8		{\o3}, [r0]!
-	vst1.8		{\o4}, [r0]!
-	vst1.8		{\o5}, [r0]!
-	vst1.8		{\o6}, [r0]!
-	vst1.8		{\o7}, [r0]!
-
-1:	subs		r5, r5, #8
-	bgt		99b
-
-	pop		{r4-r6, pc}
-	.endm
-
-	.align		4
-ENTRY(aesbs_ecb_encrypt)
-	__ecb_crypt	aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5
-ENDPROC(aesbs_ecb_encrypt)
-
-	.align		4
-ENTRY(aesbs_ecb_decrypt)
-	__ecb_crypt	aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5
-ENDPROC(aesbs_ecb_decrypt)
-
-	/*
-	 * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
-	 *		     int rounds, int blocks, u8 iv[])
-	 */
-	.align		4
-ENTRY(aesbs_cbc_decrypt)
-	mov		ip, sp
-	push		{r4-r6, lr}
-	ldm		ip, {r5-r6}		// load args 4-5
-
-99:	__adr		ip, 0f
-	and		lr, r5, #7
-	cmp		r5, #8
-	sub		ip, ip, lr, lsl #2
-	mov		lr, r1
-	bxlt		ip			// computed goto if blocks < 8
-
-	vld1.8		{q0}, [lr]!
-	vld1.8		{q1}, [lr]!
-	vld1.8		{q2}, [lr]!
-	vld1.8		{q3}, [lr]!
-	vld1.8		{q4}, [lr]!
-	vld1.8		{q5}, [lr]!
-	vld1.8		{q6}, [lr]!
-	vld1.8		{q7}, [lr]
-
-0:	mov		bskey, r2
-	mov		rounds, r3
-	bl		aesbs_decrypt8
-
-	vld1.8		{q8}, [r6]
-	vmov		q9, q8
-	vmov		q10, q8
-	vmov		q11, q8
-	vmov		q12, q8
-	vmov		q13, q8
-	vmov		q14, q8
-	vmov		q15, q8
-
-	__adr		ip, 1f
-	and		lr, r5, #7
-	cmp		r5, #8
-	sub		ip, ip, lr, lsl #2
-	bxlt		ip			// computed goto if blocks < 8
-
-	vld1.8		{q9}, [r1]!
-	vld1.8		{q10}, [r1]!
-	vld1.8		{q11}, [r1]!
-	vld1.8		{q12}, [r1]!
-	vld1.8		{q13}, [r1]!
-	vld1.8		{q14}, [r1]!
-	vld1.8		{q15}, [r1]!
-	W(nop)
-
-1:	__adr		ip, 2f
-	sub		ip, ip, lr, lsl #3
-	bxlt		ip			// computed goto if blocks < 8
-
-	veor		q0, q0, q8
-	vst1.8		{q0}, [r0]!
-	veor		q1, q1, q9
-	vst1.8		{q1}, [r0]!
-	veor		q6, q6, q10
-	vst1.8		{q6}, [r0]!
-	veor		q4, q4, q11
-	vst1.8		{q4}, [r0]!
-	veor		q2, q2, q12
-	vst1.8		{q2}, [r0]!
-	veor		q7, q7, q13
-	vst1.8		{q7}, [r0]!
-	veor		q3, q3, q14
-	vst1.8		{q3}, [r0]!
-	veor		q5, q5, q15
-	vld1.8		{q8}, [r1]!		// load next round's iv
-2:	vst1.8		{q5}, [r0]!
-
-	subs		r5, r5, #8
-	vst1.8		{q8}, [r6]		// store next round's iv
-	bgt		99b
-
-	pop		{r4-r6, pc}
-ENDPROC(aesbs_cbc_decrypt)
-
-	.macro		next_ctr, q
-	vmov.32		\q\()h[1], r10
-	adds		r10, r10, #1
-	vmov.32		\q\()h[0], r9
-	adcs		r9, r9, #0
-	vmov.32		\q\()l[1], r8
-	adcs		r8, r8, #0
-	vmov.32		\q\()l[0], r7
-	adc		r7, r7, #0
-	vrev32.8	\q, \q
-	.endm
-
-	/*
-	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
-	 *		     int rounds, int blocks, u8 ctr[], u8 final[])
-	 */
-ENTRY(aesbs_ctr_encrypt)
-	mov		ip, sp
-	push		{r4-r10, lr}
-
-	ldm		ip, {r5-r7}		// load args 4-6
-	teq		r7, #0
-	addne		r5, r5, #1		// one extra block if final != 0
-
-	vld1.8		{q0}, [r6]		// load counter
-	vrev32.8	q1, q0
-	vmov		r9, r10, d3
-	vmov		r7, r8, d2
-
-	adds		r10, r10, #1
-	adcs		r9, r9, #0
-	adcs		r8, r8, #0
-	adc		r7, r7, #0
-
-99:	vmov		q1, q0
-	vmov		q2, q0
-	vmov		q3, q0
-	vmov		q4, q0
-	vmov		q5, q0
-	vmov		q6, q0
-	vmov		q7, q0
-
-	__adr		ip, 0f
-	sub		lr, r5, #1
-	and		lr, lr, #7
-	cmp		r5, #8
-	sub		ip, ip, lr, lsl #5
-	sub		ip, ip, lr, lsl #2
-	bxlt		ip			// computed goto if blocks < 8
-
-	next_ctr	q1
-	next_ctr	q2
-	next_ctr	q3
-	next_ctr	q4
-	next_ctr	q5
-	next_ctr	q6
-	next_ctr	q7
-
-0:	mov		bskey, r2
-	mov		rounds, r3
-	bl		aesbs_encrypt8
-
-	__adr		ip, 1f
-	and		lr, r5, #7
-	cmp		r5, #8
-	movgt		r4, #0
-	ldrle		r4, [sp, #40]		// load final in the last round
-	sub		ip, ip, lr, lsl #2
-	bxlt		ip			// computed goto if blocks < 8
-
-	vld1.8		{q8}, [r1]!
-	vld1.8		{q9}, [r1]!
-	vld1.8		{q10}, [r1]!
-	vld1.8		{q11}, [r1]!
-	vld1.8		{q12}, [r1]!
-	vld1.8		{q13}, [r1]!
-	vld1.8		{q14}, [r1]!
-	teq		r4, #0			// skip last block if 'final'
-1:	bne		2f
-	vld1.8		{q15}, [r1]!
-
-2:	__adr		ip, 3f
-	cmp		r5, #8
-	sub		ip, ip, lr, lsl #3
-	bxlt		ip			// computed goto if blocks < 8
-
-	veor		q0, q0, q8
-	vst1.8		{q0}, [r0]!
-	veor		q1, q1, q9
-	vst1.8		{q1}, [r0]!
-	veor		q4, q4, q10
-	vst1.8		{q4}, [r0]!
-	veor		q6, q6, q11
-	vst1.8		{q6}, [r0]!
-	veor		q3, q3, q12
-	vst1.8		{q3}, [r0]!
-	veor		q7, q7, q13
-	vst1.8		{q7}, [r0]!
-	veor		q2, q2, q14
-	vst1.8		{q2}, [r0]!
-	teq		r4, #0			// skip last block if 'final'
-	W(bne)		5f
-3:	veor		q5, q5, q15
-	vst1.8		{q5}, [r0]!
-
-4:	next_ctr	q0
-
-	subs		r5, r5, #8
-	bgt		99b
-
-	vst1.8		{q0}, [r6]
-	pop		{r4-r10, pc}
-
-5:	vst1.8		{q5}, [r4]
-	b		4b
-ENDPROC(aesbs_ctr_encrypt)
-
-	.macro		next_tweak, out, in, const, tmp
-	vshr.s64	\tmp, \in, #63
-	vand		\tmp, \tmp, \const
-	vadd.u64	\out, \in, \in
-	vext.8		\tmp, \tmp, \tmp, #8
-	veor		\out, \out, \tmp
-	.endm
-
-	/*
-	 * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		     int blocks, u8 iv[], int reorder_last_tweak)
-	 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		     int blocks, u8 iv[], int reorder_last_tweak)
-	 */
-__xts_prepare8:
-	vld1.8		{q14}, [r7]		// load iv
-	vmov.i32	d30, #0x87		// compose tweak mask vector
-	vmovl.u32	q15, d30
-	vshr.u64	d30, d31, #7
-	vmov		q12, q14
-
-	__adr		ip, 0f
-	and		r4, r6, #7
-	cmp		r6, #8
-	sub		ip, ip, r4, lsl #5
-	mov		r4, sp
-	bxlt		ip			// computed goto if blocks < 8
-
-	vld1.8		{q0}, [r1]!
-	next_tweak	q12, q14, q15, q13
-	veor		q0, q0, q14
-	vst1.8		{q14}, [r4, :128]!
-
-	vld1.8		{q1}, [r1]!
-	next_tweak	q14, q12, q15, q13
-	veor		q1, q1, q12
-	vst1.8		{q12}, [r4, :128]!
-
-	vld1.8		{q2}, [r1]!
-	next_tweak	q12, q14, q15, q13
-	veor		q2, q2, q14
-	vst1.8		{q14}, [r4, :128]!
-
-	vld1.8		{q3}, [r1]!
-	next_tweak	q14, q12, q15, q13
-	veor		q3, q3, q12
-	vst1.8		{q12}, [r4, :128]!
-
-	vld1.8		{q4}, [r1]!
-	next_tweak	q12, q14, q15, q13
-	veor		q4, q4, q14
-	vst1.8		{q14}, [r4, :128]!
-
-	vld1.8		{q5}, [r1]!
-	next_tweak	q14, q12, q15, q13
-	veor		q5, q5, q12
-	vst1.8		{q12}, [r4, :128]!
-
-	vld1.8		{q6}, [r1]!
-	next_tweak	q12, q14, q15, q13
-	veor		q6, q6, q14
-	vst1.8		{q14}, [r4, :128]!
-
-	vld1.8		{q7}, [r1]!
-	next_tweak	q14, q12, q15, q13
-THUMB(	itt		le		)
-	W(cmple)	r8, #0
-	ble		1f
-0:	veor		q7, q7, q12
-	vst1.8		{q12}, [r4, :128]
-
-	vst1.8		{q14}, [r7]		// store next iv
-	bx		lr
-
-1:	vswp		q12, q14
-	b		0b
-ENDPROC(__xts_prepare8)
-
-	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
-	push		{r4-r8, lr}
-	mov		r5, sp			// preserve sp
-	ldrd		r6, r7, [sp, #24]	// get blocks and iv args
-	ldr		r8, [sp, #32]		// reorder final tweak?
-	rsb		r8, r8, #1
-	sub		ip, sp, #128		// make room for 8x tweak
-	bic		ip, ip, #0xf		// align sp to 16 bytes
-	mov		sp, ip
-
-99:	bl		__xts_prepare8
-
-	mov		bskey, r2
-	mov		rounds, r3
-	bl		\do8
-
-	__adr		ip, 0f
-	and		lr, r6, #7
-	cmp		r6, #8
-	sub		ip, ip, lr, lsl #2
-	mov		r4, sp
-	bxlt		ip			// computed goto if blocks < 8
-
-	vld1.8		{q8}, [r4, :128]!
-	vld1.8		{q9}, [r4, :128]!
-	vld1.8		{q10}, [r4, :128]!
-	vld1.8		{q11}, [r4, :128]!
-	vld1.8		{q12}, [r4, :128]!
-	vld1.8		{q13}, [r4, :128]!
-	vld1.8		{q14}, [r4, :128]!
-	vld1.8		{q15}, [r4, :128]
-
-0:	__adr		ip, 1f
-	sub		ip, ip, lr, lsl #3
-	bxlt		ip			// computed goto if blocks < 8
-
-	veor		\o0, \o0, q8
-	vst1.8		{\o0}, [r0]!
-	veor		\o1, \o1, q9
-	vst1.8		{\o1}, [r0]!
-	veor		\o2, \o2, q10
-	vst1.8		{\o2}, [r0]!
-	veor		\o3, \o3, q11
-	vst1.8		{\o3}, [r0]!
-	veor		\o4, \o4, q12
-	vst1.8		{\o4}, [r0]!
-	veor		\o5, \o5, q13
-	vst1.8		{\o5}, [r0]!
-	veor		\o6, \o6, q14
-	vst1.8		{\o6}, [r0]!
-	veor		\o7, \o7, q15
-	vst1.8		{\o7}, [r0]!
-
-1:	subs		r6, r6, #8
-	bgt		99b
-
-	mov		sp, r5
-	pop		{r4-r8, pc}
-	.endm
-
-ENTRY(aesbs_xts_encrypt)
-	__xts_crypt	aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5
-ENDPROC(aesbs_xts_encrypt)
-
-ENTRY(aesbs_xts_decrypt)
-	__xts_crypt	aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5
-ENDPROC(aesbs_xts_decrypt)
diff --git a/arch/arm/crypto/chacha-neon-core.S b/arch/arm/crypto/chacha-neon-core.S
deleted file mode 100644
index eb22926d49127e894a060afc003871d743dfe36d..0000000000000000000000000000000000000000
--- a/arch/arm/crypto/chacha-neon-core.S
+++ /dev/null
@@ -1,560 +0,0 @@
-/*
- * ChaCha/XChaCha NEON helper functions
- *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
- /*
-  * NEON doesn't have a rotate instruction.  The alternatives are, more or less:
-  *
-  * (a)  vshl.u32 + vsri.u32		(needs temporary register)
-  * (b)  vshl.u32 + vshr.u32 + vorr	(needs temporary register)
-  * (c)  vrev32.16			(16-bit rotations only)
-  * (d)  vtbl.8 + vtbl.8		(multiple of 8 bits rotations only,
-  *					 needs index vector)
-  *
-  * ChaCha has 16, 12, 8, and 7-bit rotations.  For the 12 and 7-bit rotations,
-  * the only choices are (a) and (b).  We use (a) since it takes two-thirds the
-  * cycles of (b) on both Cortex-A7 and Cortex-A53.
-  *
-  * For the 16-bit rotation, we use vrev32.16 since it's consistently fastest
-  * and doesn't need a temporary register.
-  *
-  * For the 8-bit rotation, we use vtbl.8 + vtbl.8.  On Cortex-A7, this sequence
-  * is twice as fast as (a), even when doing (a) on multiple registers
-  * simultaneously to eliminate the stall between vshl and vsri.  Also, it
-  * parallelizes better when temporary registers are scarce.
-  *
-  * A disadvantage is that on Cortex-A53, the vtbl sequence is the same speed as
-  * (a), so the need to load the rotation table actually makes the vtbl method
-  * slightly slower overall on that CPU (~1.3% slower ChaCha20).  Still, it
-  * seems to be a good compromise to get a more significant speed boost on some
-  * CPUs, e.g. ~4.8% faster ChaCha20 on Cortex-A7.
-  */
-
-#include <linux/linkage.h>
-
-	.text
-	.fpu		neon
-	.align		5
-
-/*
- * chacha_permute - permute one block
- *
- * Permute one 64-byte block where the state matrix is stored in the four NEON
- * registers q0-q3.  It performs matrix operations on four words in parallel,
- * but requires shuffling to rearrange the words after each round.
- *
- * The round count is given in r3.
- *
- * Clobbers: r3, ip, q4-q5
- */
-chacha_permute:
-
-	adr		ip, .Lrol8_table
-	vld1.8		{d10}, [ip, :64]
-
-.Ldoubleround:
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	vadd.i32	q0, q0, q1
-	veor		q3, q3, q0
-	vrev32.16	q3, q3
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	vadd.i32	q2, q2, q3
-	veor		q4, q1, q2
-	vshl.u32	q1, q4, #12
-	vsri.u32	q1, q4, #20
-
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	vadd.i32	q0, q0, q1
-	veor		q3, q3, q0
-	vtbl.8		d6, {d6}, d10
-	vtbl.8		d7, {d7}, d10
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	vadd.i32	q2, q2, q3
-	veor		q4, q1, q2
-	vshl.u32	q1, q4, #7
-	vsri.u32	q1, q4, #25
-
-	// x1 = shuffle32(x1, MASK(0, 3, 2, 1))
-	vext.8		q1, q1, q1, #4
-	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	vext.8		q2, q2, q2, #8
-	// x3 = shuffle32(x3, MASK(2, 1, 0, 3))
-	vext.8		q3, q3, q3, #12
-
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	vadd.i32	q0, q0, q1
-	veor		q3, q3, q0
-	vrev32.16	q3, q3
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	vadd.i32	q2, q2, q3
-	veor		q4, q1, q2
-	vshl.u32	q1, q4, #12
-	vsri.u32	q1, q4, #20
-
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	vadd.i32	q0, q0, q1
-	veor		q3, q3, q0
-	vtbl.8		d6, {d6}, d10
-	vtbl.8		d7, {d7}, d10
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	vadd.i32	q2, q2, q3
-	veor		q4, q1, q2
-	vshl.u32	q1, q4, #7
-	vsri.u32	q1, q4, #25
-
-	// x1 = shuffle32(x1, MASK(2, 1, 0, 3))
-	vext.8		q1, q1, q1, #12
-	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	vext.8		q2, q2, q2, #8
-	// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
-	vext.8		q3, q3, q3, #4
-
-	subs		r3, r3, #2
-	bne		.Ldoubleround
-
-	bx		lr
-ENDPROC(chacha_permute)
-
-ENTRY(chacha_block_xor_neon)
-	// r0: Input state matrix, s
-	// r1: 1 data block output, o
-	// r2: 1 data block input, i
-	// r3: nrounds
-	push		{lr}
-
-	// x0..3 = s0..3
-	add		ip, r0, #0x20
-	vld1.32		{q0-q1}, [r0]
-	vld1.32		{q2-q3}, [ip]
-
-	vmov		q8, q0
-	vmov		q9, q1
-	vmov		q10, q2
-	vmov		q11, q3
-
-	bl		chacha_permute
-
-	add		ip, r2, #0x20
-	vld1.8		{q4-q5}, [r2]
-	vld1.8		{q6-q7}, [ip]
-
-	// o0 = i0 ^ (x0 + s0)
-	vadd.i32	q0, q0, q8
-	veor		q0, q0, q4
-
-	// o1 = i1 ^ (x1 + s1)
-	vadd.i32	q1, q1, q9
-	veor		q1, q1, q5
-
-	// o2 = i2 ^ (x2 + s2)
-	vadd.i32	q2, q2, q10
-	veor		q2, q2, q6
-
-	// o3 = i3 ^ (x3 + s3)
-	vadd.i32	q3, q3, q11
-	veor		q3, q3, q7
-
-	add		ip, r1, #0x20
-	vst1.8		{q0-q1}, [r1]
-	vst1.8		{q2-q3}, [ip]
-
-	pop		{pc}
-ENDPROC(chacha_block_xor_neon)
-
-ENTRY(hchacha_block_neon)
-	// r0: Input state matrix, s
-	// r1: output (8 32-bit words)
-	// r2: nrounds
-	push		{lr}
-
-	vld1.32		{q0-q1}, [r0]!
-	vld1.32		{q2-q3}, [r0]
-
-	mov		r3, r2
-	bl		chacha_permute
-
-	vst1.32		{q0}, [r1]!
-	vst1.32		{q3}, [r1]
-
-	pop		{pc}
-ENDPROC(hchacha_block_neon)
-
-	.align		4
-.Lctrinc:	.word	0, 1, 2, 3
-.Lrol8_table:	.byte	3, 0, 1, 2, 7, 4, 5, 6
-
-	.align		5
-ENTRY(chacha_4block_xor_neon)
-	push		{r4-r5}
-	mov		r4, sp			// preserve the stack pointer
-	sub		ip, sp, #0x20		// allocate a 32 byte buffer
-	bic		ip, ip, #0x1f		// aligned to 32 bytes
-	mov		sp, ip
-
-	// r0: Input state matrix, s
-	// r1: 4 data blocks output, o
-	// r2: 4 data blocks input, i
-	// r3: nrounds
-
-	//
-	// This function encrypts four consecutive ChaCha blocks by loading
-	// the state matrix in NEON registers four times. The algorithm performs
-	// each operation on the corresponding word of each state matrix, hence
-	// requires no word shuffling. The words are re-interleaved before the
-	// final addition of the original state and the XORing step.
-	//
-
-	// x0..15[0-3] = s0..15[0-3]
-	add		ip, r0, #0x20
-	vld1.32		{q0-q1}, [r0]
-	vld1.32		{q2-q3}, [ip]
-
-	adr		r5, .Lctrinc
-	vdup.32		q15, d7[1]
-	vdup.32		q14, d7[0]
-	vld1.32		{q4}, [r5, :128]
-	vdup.32		q13, d6[1]
-	vdup.32		q12, d6[0]
-	vdup.32		q11, d5[1]
-	vdup.32		q10, d5[0]
-	vadd.u32	q12, q12, q4		// x12 += counter values 0-3
-	vdup.32		q9, d4[1]
-	vdup.32		q8, d4[0]
-	vdup.32		q7, d3[1]
-	vdup.32		q6, d3[0]
-	vdup.32		q5, d2[1]
-	vdup.32		q4, d2[0]
-	vdup.32		q3, d1[1]
-	vdup.32		q2, d1[0]
-	vdup.32		q1, d0[1]
-	vdup.32		q0, d0[0]
-
-	adr		ip, .Lrol8_table
-	b		1f
-
-.Ldoubleround4:
-	vld1.32		{q8-q9}, [sp, :256]
-1:
-	// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
-	// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
-	// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
-	// x3 += x7, x15 = rotl32(x15 ^ x3, 16)
-	vadd.i32	q0, q0, q4
-	vadd.i32	q1, q1, q5
-	vadd.i32	q2, q2, q6
-	vadd.i32	q3, q3, q7
-
-	veor		q12, q12, q0
-	veor		q13, q13, q1
-	veor		q14, q14, q2
-	veor		q15, q15, q3
-
-	vrev32.16	q12, q12
-	vrev32.16	q13, q13
-	vrev32.16	q14, q14
-	vrev32.16	q15, q15
-
-	// x8 += x12, x4 = rotl32(x4 ^ x8, 12)
-	// x9 += x13, x5 = rotl32(x5 ^ x9, 12)
-	// x10 += x14, x6 = rotl32(x6 ^ x10, 12)
-	// x11 += x15, x7 = rotl32(x7 ^ x11, 12)
-	vadd.i32	q8, q8, q12
-	vadd.i32	q9, q9, q13
-	vadd.i32	q10, q10, q14
-	vadd.i32	q11, q11, q15
-
-	vst1.32		{q8-q9}, [sp, :256]
-
-	veor		q8, q4, q8
-	veor		q9, q5, q9
-	vshl.u32	q4, q8, #12
-	vshl.u32	q5, q9, #12
-	vsri.u32	q4, q8, #20
-	vsri.u32	q5, q9, #20
-
-	veor		q8, q6, q10
-	veor		q9, q7, q11
-	vshl.u32	q6, q8, #12
-	vshl.u32	q7, q9, #12
-	vsri.u32	q6, q8, #20
-	vsri.u32	q7, q9, #20
-
-	// x0 += x4, x12 = rotl32(x12 ^ x0, 8)
-	// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
-	// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
-	// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
-	vld1.8		{d16}, [ip, :64]
-	vadd.i32	q0, q0, q4
-	vadd.i32	q1, q1, q5
-	vadd.i32	q2, q2, q6
-	vadd.i32	q3, q3, q7
-
-	veor		q12, q12, q0
-	veor		q13, q13, q1
-	veor		q14, q14, q2
-	veor		q15, q15, q3
-
-	vtbl.8		d24, {d24}, d16
-	vtbl.8		d25, {d25}, d16
-	vtbl.8		d26, {d26}, d16
-	vtbl.8		d27, {d27}, d16
-	vtbl.8		d28, {d28}, d16
-	vtbl.8		d29, {d29}, d16
-	vtbl.8		d30, {d30}, d16
-	vtbl.8		d31, {d31}, d16
-
-	vld1.32		{q8-q9}, [sp, :256]
-
-	// x8 += x12, x4 = rotl32(x4 ^ x8, 7)
-	// x9 += x13, x5 = rotl32(x5 ^ x9, 7)
-	// x10 += x14, x6 = rotl32(x6 ^ x10, 7)
-	// x11 += x15, x7 = rotl32(x7 ^ x11, 7)
-	vadd.i32	q8, q8, q12
-	vadd.i32	q9, q9, q13
-	vadd.i32	q10, q10, q14
-	vadd.i32	q11, q11, q15
-
-	vst1.32		{q8-q9}, [sp, :256]
-
-	veor		q8, q4, q8
-	veor		q9, q5, q9
-	vshl.u32	q4, q8, #7
-	vshl.u32	q5, q9, #7
-	vsri.u32	q4, q8, #25
-	vsri.u32	q5, q9, #25
-
-	veor		q8, q6, q10
-	veor		q9, q7, q11
-	vshl.u32	q6, q8, #7
-	vshl.u32	q7, q9, #7
-	vsri.u32	q6, q8, #25
-	vsri.u32	q7, q9, #25
-
-	vld1.32		{q8-q9}, [sp, :256]
-
-	// x0 += x5, x15 = rotl32(x15 ^ x0, 16)
-	// x1 += x6, x12 = rotl32(x12 ^ x1, 16)
-	// x2 += x7, x13 = rotl32(x13 ^ x2, 16)
-	// x3 += x4, x14 = rotl32(x14 ^ x3, 16)
-	vadd.i32	q0, q0, q5
-	vadd.i32	q1, q1, q6
-	vadd.i32	q2, q2, q7
-	vadd.i32	q3, q3, q4
-
-	veor		q15, q15, q0
-	veor		q12, q12, q1
-	veor		q13, q13, q2
-	veor		q14, q14, q3
-
-	vrev32.16	q15, q15
-	vrev32.16	q12, q12
-	vrev32.16	q13, q13
-	vrev32.16	q14, q14
-
-	// x10 += x15, x5 = rotl32(x5 ^ x10, 12)
-	// x11 += x12, x6 = rotl32(x6 ^ x11, 12)
-	// x8 += x13, x7 = rotl32(x7 ^ x8, 12)
-	// x9 += x14, x4 = rotl32(x4 ^ x9, 12)
-	vadd.i32	q10, q10, q15
-	vadd.i32	q11, q11, q12
-	vadd.i32	q8, q8, q13
-	vadd.i32	q9, q9, q14
-
-	vst1.32		{q8-q9}, [sp, :256]
-
-	veor		q8, q7, q8
-	veor		q9, q4, q9
-	vshl.u32	q7, q8, #12
-	vshl.u32	q4, q9, #12
-	vsri.u32	q7, q8, #20
-	vsri.u32	q4, q9, #20
-
-	veor		q8, q5, q10
-	veor		q9, q6, q11
-	vshl.u32	q5, q8, #12
-	vshl.u32	q6, q9, #12
-	vsri.u32	q5, q8, #20
-	vsri.u32	q6, q9, #20
-
-	// x0 += x5, x15 = rotl32(x15 ^ x0, 8)
-	// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
-	// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
-	// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
-	vld1.8		{d16}, [ip, :64]
-	vadd.i32	q0, q0, q5
-	vadd.i32	q1, q1, q6
-	vadd.i32	q2, q2, q7
-	vadd.i32	q3, q3, q4
-
-	veor		q15, q15, q0
-	veor		q12, q12, q1
-	veor		q13, q13, q2
-	veor		q14, q14, q3
-
-	vtbl.8		d30, {d30}, d16
-	vtbl.8		d31, {d31}, d16
-	vtbl.8		d24, {d24}, d16
-	vtbl.8		d25, {d25}, d16
-	vtbl.8		d26, {d26}, d16
-	vtbl.8		d27, {d27}, d16
-	vtbl.8		d28, {d28}, d16
-	vtbl.8		d29, {d29}, d16
-
-	vld1.32		{q8-q9}, [sp, :256]
-
-	// x10 += x15, x5 = rotl32(x5 ^ x10, 7)
-	// x11 += x12, x6 = rotl32(x6 ^ x11, 7)
-	// x8 += x13, x7 = rotl32(x7 ^ x8, 7)
-	// x9 += x14, x4 = rotl32(x4 ^ x9, 7)
-	vadd.i32	q10, q10, q15
-	vadd.i32	q11, q11, q12
-	vadd.i32	q8, q8, q13
-	vadd.i32	q9, q9, q14
-
-	vst1.32		{q8-q9}, [sp, :256]
-
-	veor		q8, q7, q8
-	veor		q9, q4, q9
-	vshl.u32	q7, q8, #7
-	vshl.u32	q4, q9, #7
-	vsri.u32	q7, q8, #25
-	vsri.u32	q4, q9, #25
-
-	veor		q8, q5, q10
-	veor		q9, q6, q11
-	vshl.u32	q5, q8, #7
-	vshl.u32	q6, q9, #7
-	vsri.u32	q5, q8, #25
-	vsri.u32	q6, q9, #25
-
-	subs		r3, r3, #2
-	bne		.Ldoubleround4
-
-	// x0..7[0-3] are in q0-q7, x10..15[0-3] are in q10-q15.
-	// x8..9[0-3] are on the stack.
-
-	// Re-interleave the words in the first two rows of each block (x0..7).
-	// Also add the counter values 0-3 to x12[0-3].
-	  vld1.32	{q8}, [r5, :128]	// load counter values 0-3
-	vzip.32		q0, q1			// => (0 1 0 1) (0 1 0 1)
-	vzip.32		q2, q3			// => (2 3 2 3) (2 3 2 3)
-	vzip.32		q4, q5			// => (4 5 4 5) (4 5 4 5)
-	vzip.32		q6, q7			// => (6 7 6 7) (6 7 6 7)
-	  vadd.u32	q12, q8			// x12 += counter values 0-3
-	vswp		d1, d4
-	vswp		d3, d6
-	  vld1.32	{q8-q9}, [r0]!		// load s0..7
-	vswp		d9, d12
-	vswp		d11, d14
-
-	// Swap q1 and q4 so that we'll free up consecutive registers (q0-q1)
-	// after XORing the first 32 bytes.
-	vswp		q1, q4
-
-	// First two rows of each block are (q0 q1) (q2 q6) (q4 q5) (q3 q7)
-
-	// x0..3[0-3] += s0..3[0-3]	(add orig state to 1st row of each block)
-	vadd.u32	q0, q0, q8
-	vadd.u32	q2, q2, q8
-	vadd.u32	q4, q4, q8
-	vadd.u32	q3, q3, q8
-
-	// x4..7[0-3] += s4..7[0-3]	(add orig state to 2nd row of each block)
-	vadd.u32	q1, q1, q9
-	vadd.u32	q6, q6, q9
-	vadd.u32	q5, q5, q9
-	vadd.u32	q7, q7, q9
-
-	// XOR first 32 bytes using keystream from first two rows of first block
-	vld1.8		{q8-q9}, [r2]!
-	veor		q8, q8, q0
-	veor		q9, q9, q1
-	vst1.8		{q8-q9}, [r1]!
-
-	// Re-interleave the words in the last two rows of each block (x8..15).
-	vld1.32		{q8-q9}, [sp, :256]
-	vzip.32		q12, q13	// => (12 13 12 13) (12 13 12 13)
-	vzip.32		q14, q15	// => (14 15 14 15) (14 15 14 15)
-	vzip.32		q8, q9		// => (8 9 8 9) (8 9 8 9)
-	vzip.32		q10, q11	// => (10 11 10 11) (10 11 10 11)
-	  vld1.32	{q0-q1}, [r0]	// load s8..15
-	vswp		d25, d28
-	vswp		d27, d30
-	vswp		d17, d20
-	vswp		d19, d22
-
-	// Last two rows of each block are (q8 q12) (q10 q14) (q9 q13) (q11 q15)
-
-	// x8..11[0-3] += s8..11[0-3]	(add orig state to 3rd row of each block)
-	vadd.u32	q8,  q8,  q0
-	vadd.u32	q10, q10, q0
-	vadd.u32	q9,  q9,  q0
-	vadd.u32	q11, q11, q0
-
-	// x12..15[0-3] += s12..15[0-3] (add orig state to 4th row of each block)
-	vadd.u32	q12, q12, q1
-	vadd.u32	q14, q14, q1
-	vadd.u32	q13, q13, q1
-	vadd.u32	q15, q15, q1
-
-	// XOR the rest of the data with the keystream
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q8
-	veor		q1, q1, q12
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q2
-	veor		q1, q1, q6
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q10
-	veor		q1, q1, q14
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q4
-	veor		q1, q1, q5
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q9
-	veor		q1, q1, q13
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]!
-	veor		q0, q0, q3
-	veor		q1, q1, q7
-	vst1.8		{q0-q1}, [r1]!
-
-	vld1.8		{q0-q1}, [r2]
-	  mov		sp, r4		// restore original stack pointer
-	veor		q0, q0, q11
-	veor		q1, q1, q15
-	vst1.8		{q0-q1}, [r1]
-
-	pop		{r4-r5}
-	bx		lr
-ENDPROC(chacha_4block_xor_neon)
diff --git a/arch/arm/crypto/crc32-ce-core.S b/arch/arm/crypto/crc32-ce-core.S
deleted file mode 100644
index 5cbd4a6fedad7cb3c99ed35295b77f554d967434..0000000000000000000000000000000000000000
--- a/arch/arm/crypto/crc32-ce-core.S
+++ /dev/null
@@ -1,306 +0,0 @@
-/*
- * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions
- *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please  visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
- * calculation.
- * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
- * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
- * at:
- * http://www.intel.com/products/processor/manuals/
- * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
- * Volume 2B: Instruction Set Reference, N-Z
- *
- * Authors:   Gregory Prestas <Gregory_Prestas@us.xyratex.com>
- *	      Alexander Boyko <Alexander_Boyko@xyratex.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.text
-	.align		6
-	.arch		armv8-a
-	.arch_extension	crc
-	.fpu		crypto-neon-fp-armv8
-
-.Lcrc32_constants:
-	/*
-	 * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
-	 * #define CONSTANT_R1  0x154442bd4LL
-	 *
-	 * [(x4*128-32 mod P(x) << 32)]' << 1   = 0x1c6e41596
-	 * #define CONSTANT_R2  0x1c6e41596LL
-	 */
-	.quad		0x0000000154442bd4
-	.quad		0x00000001c6e41596
-
-	/*
-	 * [(x128+32 mod P(x) << 32)]'   << 1   = 0x1751997d0
-	 * #define CONSTANT_R3  0x1751997d0LL
-	 *
-	 * [(x128-32 mod P(x) << 32)]'   << 1   = 0x0ccaa009e
-	 * #define CONSTANT_R4  0x0ccaa009eLL
-	 */
-	.quad		0x00000001751997d0
-	.quad		0x00000000ccaa009e
-
-	/*
-	 * [(x64 mod P(x) << 32)]'       << 1   = 0x163cd6124
-	 * #define CONSTANT_R5  0x163cd6124LL
-	 */
-	.quad		0x0000000163cd6124
-	.quad		0x00000000FFFFFFFF
-
-	/*
-	 * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
-	 *
-	 * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))`
-	 *                                                      = 0x1F7011641LL
-	 * #define CONSTANT_RU  0x1F7011641LL
-	 */
-	.quad		0x00000001DB710641
-	.quad		0x00000001F7011641
-
-.Lcrc32c_constants:
-	.quad		0x00000000740eef02
-	.quad		0x000000009e4addf8
-	.quad		0x00000000f20c0dfe
-	.quad		0x000000014cd00bd6
-	.quad		0x00000000dd45aab8
-	.quad		0x00000000FFFFFFFF
-	.quad		0x0000000105ec76f0
-	.quad		0x00000000dea713f1
-
-	dCONSTANTl	.req	d0
-	dCONSTANTh	.req	d1
-	qCONSTANT	.req	q0
-
-	BUF		.req	r0
-	LEN		.req	r1
-	CRC		.req	r2
-
-	qzr		.req	q9
-
-	/**
-	 * Calculate crc32
-	 * BUF - buffer
-	 * LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63
-	 * CRC - initial crc32
-	 * return %eax crc32
-	 * uint crc32_pmull_le(unsigned char const *buffer,
-	 *                     size_t len, uint crc32)
-	 */
-ENTRY(crc32_pmull_le)
-	adr		r3, .Lcrc32_constants
-	b		0f
-
-ENTRY(crc32c_pmull_le)
-	adr		r3, .Lcrc32c_constants
-
-0:	bic		LEN, LEN, #15
-	vld1.8		{q1-q2}, [BUF, :128]!
-	vld1.8		{q3-q4}, [BUF, :128]!
-	vmov.i8		qzr, #0
-	vmov.i8		qCONSTANT, #0
-	vmov.32		dCONSTANTl[0], CRC
-	veor.8		d2, d2, dCONSTANTl
-	sub		LEN, LEN, #0x40
-	cmp		LEN, #0x40
-	blt		less_64
-
-	vld1.64		{qCONSTANT}, [r3]
-
-loop_64:		/* 64 bytes Full cache line folding */
-	sub		LEN, LEN, #0x40
-
-	vmull.p64	q5, d3, dCONSTANTh
-	vmull.p64	q6, d5, dCONSTANTh
-	vmull.p64	q7, d7, dCONSTANTh
-	vmull.p64	q8, d9, dCONSTANTh
-
-	vmull.p64	q1, d2, dCONSTANTl
-	vmull.p64	q2, d4, dCONSTANTl
-	vmull.p64	q3, d6, dCONSTANTl
-	vmull.p64	q4, d8, dCONSTANTl
-
-	veor.8		q1, q1, q5
-	vld1.8		{q5}, [BUF, :128]!
-	veor.8		q2, q2, q6
-	vld1.8		{q6}, [BUF, :128]!
-	veor.8		q3, q3, q7
-	vld1.8		{q7}, [BUF, :128]!
-	veor.8		q4, q4, q8
-	vld1.8		{q8}, [BUF, :128]!
-
-	veor.8		q1, q1, q5
-	veor.8		q2, q2, q6
-	veor.8		q3, q3, q7
-	veor.8		q4, q4, q8
-
-	cmp		LEN, #0x40
-	bge		loop_64
-
-less_64:		/* Folding cache line into 128bit */
-	vldr		dCONSTANTl, [r3, #16]
-	vldr		dCONSTANTh, [r3, #24]
-
-	vmull.p64	q5, d3, dCONSTANTh
-	vmull.p64	q1, d2, dCONSTANTl
-	veor.8		q1, q1, q5
-	veor.8		q1, q1, q2
-
-	vmull.p64	q5, d3, dCONSTANTh
-	vmull.p64	q1, d2, dCONSTANTl
-	veor.8		q1, q1, q5
-	veor.8		q1, q1, q3
-
-	vmull.p64	q5, d3, dCONSTANTh
-	vmull.p64	q1, d2, dCONSTANTl
-	veor.8		q1, q1, q5
-	veor.8		q1, q1, q4
-
-	teq		LEN, #0
-	beq		fold_64
-
-loop_16:		/* Folding rest buffer into 128bit */
-	subs		LEN, LEN, #0x10
-
-	vld1.8		{q2}, [BUF, :128]!
-	vmull.p64	q5, d3, dCONSTANTh
-	vmull.p64	q1, d2, dCONSTANTl
-	veor.8		q1, q1, q5
-	veor.8		q1, q1, q2
-
-	bne		loop_16
-
-fold_64:
-	/* perform the last 64 bit fold, also adds 32 zeroes
-	 * to the input stream */
-	vmull.p64	q2, d2, dCONSTANTh
-	vext.8		q1, q1, qzr, #8
-	veor.8		q1, q1, q2
-
-	/* final 32-bit fold */
-	vldr		dCONSTANTl, [r3, #32]
-	vldr		d6, [r3, #40]
-	vmov.i8		d7, #0
-
-	vext.8		q2, q1, qzr, #4
-	vand.8		d2, d2, d6
-	vmull.p64	q1, d2, dCONSTANTl
-	veor.8		q1, q1, q2
-
-	/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
-	vldr		dCONSTANTl, [r3, #48]
-	vldr		dCONSTANTh, [r3, #56]
-
-	vand.8		q2, q1, q3
-	vext.8		q2, qzr, q2, #8
-	vmull.p64	q2, d5, dCONSTANTh
-	vand.8		q2, q2, q3
-	vmull.p64	q2, d4, dCONSTANTl
-	veor.8		q1, q1, q2
-	vmov		r0, s5
-
-	bx		lr
-ENDPROC(crc32_pmull_le)
-ENDPROC(crc32c_pmull_le)
-
-	.macro		__crc32, c
-	subs		ip, r2, #8
-	bmi		.Ltail\c
-
-	tst		r1, #3
-	bne		.Lunaligned\c
-
-	teq		ip, #0
-.Laligned8\c:
-	ldrd		r2, r3, [r1], #8
-ARM_BE8(rev		r2, r2		)
-ARM_BE8(rev		r3, r3		)
-	crc32\c\()w	r0, r0, r2
-	crc32\c\()w	r0, r0, r3
-	bxeq		lr
-	subs		ip, ip, #8
-	bpl		.Laligned8\c
-
-.Ltail\c:
-	tst		ip, #4
-	beq		2f
-	ldr		r3, [r1], #4
-ARM_BE8(rev		r3, r3		)
-	crc32\c\()w	r0, r0, r3
-
-2:	tst		ip, #2
-	beq		1f
-	ldrh		r3, [r1], #2
-ARM_BE8(rev16		r3, r3		)
-	crc32\c\()h	r0, r0, r3
-
-1:	tst		ip, #1
-	bxeq		lr
-	ldrb		r3, [r1]
-	crc32\c\()b	r0, r0, r3
-	bx		lr
-
-.Lunaligned\c:
-	tst		r1, #1
-	beq		2f
-	ldrb		r3, [r1], #1
-	subs		r2, r2, #1
-	crc32\c\()b	r0, r0, r3
-
-	tst		r1, #2
-	beq		0f
-2:	ldrh		r3, [r1], #2
-	subs		r2, r2, #2
-ARM_BE8(rev16		r3, r3		)
-	crc32\c\()h	r0, r0, r3
-
-0:	subs		ip, r2, #8
-	bpl		.Laligned8\c
-	b		.Ltail\c
-	.endm
-
-	.align		5
-ENTRY(crc32_armv8_le)
-	__crc32
-ENDPROC(crc32_armv8_le)
-
-	.align		5
-ENTRY(crc32c_armv8_le)
-	__crc32		c
-ENDPROC(crc32c_armv8_le)
diff --git a/arch/arm/crypto/crct10dif-ce-core.S b/arch/arm/crypto/crct10dif-ce-core.S
deleted file mode 100644
index 86be258a803fa0b618f6d762d511143285b3a8fd..0000000000000000000000000000000000000000
--- a/arch/arm/crypto/crct10dif-ce-core.S
+++ /dev/null
@@ -1,381 +0,0 @@
-//
-// Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions
-//
-// Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
-// Copyright (C) 2019 Google LLC <ebiggers@google.com>
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License version 2 as
-// published by the Free Software Foundation.
-//
-
-// Derived from the x86 version:
-//
-// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
-//
-// Copyright (c) 2013, Intel Corporation
-//
-// Authors:
-//     Erdinc Ozturk <erdinc.ozturk@intel.com>
-//     Vinodh Gopal <vinodh.gopal@intel.com>
-//     James Guilford <james.guilford@intel.com>
-//     Tim Chen <tim.c.chen@linux.intel.com>
-//
-// This software is available to you under a choice of one of two
-// licenses.  You may choose to be licensed under the terms of the GNU
-// General Public License (GPL) Version 2, available from the file
-// COPYING in the main directory of this source tree, or the
-// OpenIB.org BSD license below:
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-//   notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above copyright
-//   notice, this list of conditions and the following disclaimer in the
-//   documentation and/or other materials provided with the
-//   distribution.
-//
-// * Neither the name of the Intel Corporation nor the names of its
-//   contributors may be used to endorse or promote products derived from
-//   this software without specific prior written permission.
-//
-//
-// THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-//       Reference paper titled "Fast CRC Computation for Generic
-//	Polynomials Using PCLMULQDQ Instruction"
-//       URL: http://www.intel.com/content/dam/www/public/us/en/documents
-//  /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
-//
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#ifdef CONFIG_CPU_ENDIAN_BE8
-#define CPU_LE(code...)
-#else
-#define CPU_LE(code...)		code
-#endif
-
-	.text
-	.arch		armv7-a
-	.fpu		crypto-neon-fp-armv8
-
-	init_crc	.req	r0
-	buf		.req	r1
-	len		.req	r2
-
-	fold_consts_ptr	.req	ip
-
-	q0l		.req	d0
-	q0h		.req	d1
-	q1l		.req	d2
-	q1h		.req	d3
-	q2l		.req	d4
-	q2h		.req	d5
-	q3l		.req	d6
-	q3h		.req	d7
-	q4l		.req	d8
-	q4h		.req	d9
-	q5l		.req	d10
-	q5h		.req	d11
-	q6l		.req	d12
-	q6h		.req	d13
-	q7l		.req	d14
-	q7h		.req	d15
-	q8l		.req	d16
-	q8h		.req	d17
-	q9l		.req	d18
-	q9h		.req	d19
-	q10l		.req	d20
-	q10h		.req	d21
-	q11l		.req	d22
-	q11h		.req	d23
-	q12l		.req	d24
-	q12h		.req	d25
-
-	FOLD_CONSTS	.req	q10
-	FOLD_CONST_L	.req	q10l
-	FOLD_CONST_H	.req	q10h
-
-	// Fold reg1, reg2 into the next 32 data bytes, storing the result back
-	// into reg1, reg2.
-	.macro		fold_32_bytes, reg1, reg2
-	vld1.64		{q11-q12}, [buf]!
-
-	vmull.p64	q8, \reg1\()h, FOLD_CONST_H
-	vmull.p64	\reg1, \reg1\()l, FOLD_CONST_L
-	vmull.p64	q9, \reg2\()h, FOLD_CONST_H
-	vmull.p64	\reg2, \reg2\()l, FOLD_CONST_L
-
-CPU_LE(	vrev64.8	q11, q11	)
-CPU_LE(	vrev64.8	q12, q12	)
-	vswp		q11l, q11h
-	vswp		q12l, q12h
-
-	veor.8		\reg1, \reg1, q8
-	veor.8		\reg2, \reg2, q9
-	veor.8		\reg1, \reg1, q11
-	veor.8		\reg2, \reg2, q12
-	.endm
-
-	// Fold src_reg into dst_reg, optionally loading the next fold constants
-	.macro		fold_16_bytes, src_reg, dst_reg, load_next_consts
-	vmull.p64	q8, \src_reg\()l, FOLD_CONST_L
-	vmull.p64	\src_reg, \src_reg\()h, FOLD_CONST_H
-	.ifnb		\load_next_consts
-	vld1.64		{FOLD_CONSTS}, [fold_consts_ptr, :128]!
-	.endif
-	veor.8		\dst_reg, \dst_reg, q8
-	veor.8		\dst_reg, \dst_reg, \src_reg
-	.endm
-
-	.macro		__adrl, out, sym
-	movw		\out, #:lower16:\sym
-	movt		\out, #:upper16:\sym
-	.endm
-
-//
-// u16 crc_t10dif_pmull(u16 init_crc, const u8 *buf, size_t len);
-//
-// Assumes len >= 16.
-//
-ENTRY(crc_t10dif_pmull)
-
-	// For sizes less than 256 bytes, we can't fold 128 bytes at a time.
-	cmp		len, #256
-	blt		.Lless_than_256_bytes
-
-	__adrl		fold_consts_ptr, .Lfold_across_128_bytes_consts
-
-	// Load the first 128 data bytes.  Byte swapping is necessary to make
-	// the bit order match the polynomial coefficient order.
-	vld1.64		{q0-q1}, [buf]!
-	vld1.64		{q2-q3}, [buf]!
-	vld1.64		{q4-q5}, [buf]!
-	vld1.64		{q6-q7}, [buf]!
-CPU_LE(	vrev64.8	q0, q0	)
-CPU_LE(	vrev64.8	q1, q1	)
-CPU_LE(	vrev64.8	q2, q2	)
-CPU_LE(	vrev64.8	q3, q3	)
-CPU_LE(	vrev64.8	q4, q4	)
-CPU_LE(	vrev64.8	q5, q5	)
-CPU_LE(	vrev64.8	q6, q6	)
-CPU_LE(	vrev64.8	q7, q7	)
-	vswp		q0l, q0h
-	vswp		q1l, q1h
-	vswp		q2l, q2h
-	vswp		q3l, q3h
-	vswp		q4l, q4h
-	vswp		q5l, q5h
-	vswp		q6l, q6h
-	vswp		q7l, q7h
-
-	// XOR the first 16 data *bits* with the initial CRC value.
-	vmov.i8		q8h, #0
-	vmov.u16	q8h[3], init_crc
-	veor		q0h, q0h, q8h
-
-	// Load the constants for folding across 128 bytes.
-	vld1.64		{FOLD_CONSTS}, [fold_consts_ptr, :128]!
-
-	// Subtract 128 for the 128 data bytes just consumed.  Subtract another
-	// 128 to simplify the termination condition of the following loop.
-	sub		len, len, #256
-
-	// While >= 128 data bytes remain (not counting q0-q7), fold the 128
-	// bytes q0-q7 into them, storing the result back into q0-q7.
-.Lfold_128_bytes_loop:
-	fold_32_bytes	q0, q1
-	fold_32_bytes	q2, q3
-	fold_32_bytes	q4, q5
-	fold_32_bytes	q6, q7
-	subs		len, len, #128
-	bge		.Lfold_128_bytes_loop
-
-	// Now fold the 112 bytes in q0-q6 into the 16 bytes in q7.
-
-	// Fold across 64 bytes.
-	vld1.64		{FOLD_CONSTS}, [fold_consts_ptr, :128]!
-	fold_16_bytes	q0, q4
-	fold_16_bytes	q1, q5
-	fold_16_bytes	q2, q6
-	fold_16_bytes	q3, q7, 1
-	// Fold across 32 bytes.
-	fold_16_bytes	q4, q6
-	fold_16_bytes	q5, q7, 1
-	// Fold across 16 bytes.
-	fold_16_bytes	q6, q7
-
-	// Add 128 to get the correct number of data bytes remaining in 0...127
-	// (not counting q7), following the previous extra subtraction by 128.
-	// Then subtract 16 to simplify the termination condition of the
-	// following loop.
-	adds		len, len, #(128-16)
-
-	// While >= 16 data bytes remain (not counting q7), fold the 16 bytes q7
-	// into them, storing the result back into q7.
-	blt		.Lfold_16_bytes_loop_done
-.Lfold_16_bytes_loop:
-	vmull.p64	q8, q7l, FOLD_CONST_L
-	vmull.p64	q7, q7h, FOLD_CONST_H
-	veor.8		q7, q7, q8
-	vld1.64		{q0}, [buf]!
-CPU_LE(	vrev64.8	q0, q0	)
-	vswp		q0l, q0h
-	veor.8		q7, q7, q0
-	subs		len, len, #16
-	bge		.Lfold_16_bytes_loop
-
-.Lfold_16_bytes_loop_done:
-	// Add 16 to get the correct number of data bytes remaining in 0...15
-	// (not counting q7), following the previous extra subtraction by 16.
-	adds		len, len, #16
-	beq		.Lreduce_final_16_bytes
-
-.Lhandle_partial_segment:
-	// Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first
-	// 16 bytes are in q7 and the rest are the remaining data in 'buf'.  To
-	// do this without needing a fold constant for each possible 'len',
-	// redivide the bytes into a first chunk of 'len' bytes and a second
-	// chunk of 16 bytes, then fold the first chunk into the second.
-
-	// q0 = last 16 original data bytes
-	add		buf, buf, len
-	sub		buf, buf, #16
-	vld1.64		{q0}, [buf]
-CPU_LE(	vrev64.8	q0, q0	)
-	vswp		q0l, q0h
-
-	// q1 = high order part of second chunk: q7 left-shifted by 'len' bytes.
-	__adrl		r3, .Lbyteshift_table + 16
-	sub		r3, r3, len
-	vld1.8		{q2}, [r3]
-	vtbl.8		q1l, {q7l-q7h}, q2l
-	vtbl.8		q1h, {q7l-q7h}, q2h
-
-	// q3 = first chunk: q7 right-shifted by '16-len' bytes.
-	vmov.i8		q3, #0x80
-	veor.8		q2, q2, q3
-	vtbl.8		q3l, {q7l-q7h}, q2l
-	vtbl.8		q3h, {q7l-q7h}, q2h
-
-	// Convert to 8-bit masks: 'len' 0x00 bytes, then '16-len' 0xff bytes.
-	vshr.s8		q2, q2, #7
-
-	// q2 = second chunk: 'len' bytes from q0 (low-order bytes),
-	// then '16-len' bytes from q1 (high-order bytes).
-	vbsl.8		q2, q1, q0
-
-	// Fold the first chunk into the second chunk, storing the result in q7.
-	vmull.p64	q0, q3l, FOLD_CONST_L
-	vmull.p64	q7, q3h, FOLD_CONST_H
-	veor.8		q7, q7, q0
-	veor.8		q7, q7, q2
-
-.Lreduce_final_16_bytes:
-	// Reduce the 128-bit value M(x), stored in q7, to the final 16-bit CRC.
-
-	// Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'.
-	vld1.64		{FOLD_CONSTS}, [fold_consts_ptr, :128]!
-
-	// Fold the high 64 bits into the low 64 bits, while also multiplying by
-	// x^64.  This produces a 128-bit value congruent to x^64 * M(x) and
-	// whose low 48 bits are 0.
-	vmull.p64	q0, q7h, FOLD_CONST_H	// high bits * x^48 * (x^80 mod G(x))
-	veor.8		q0h, q0h, q7l		// + low bits * x^64
-
-	// Fold the high 32 bits into the low 96 bits.  This produces a 96-bit
-	// value congruent to x^64 * M(x) and whose low 48 bits are 0.
-	vmov.i8		q1, #0
-	vmov		s4, s3			// extract high 32 bits
-	vmov		s3, s5			// zero high 32 bits
-	vmull.p64	q1, q1l, FOLD_CONST_L	// high 32 bits * x^48 * (x^48 mod G(x))
-	veor.8		q0, q0, q1		// + low bits
-
-	// Load G(x) and floor(x^48 / G(x)).
-	vld1.64		{FOLD_CONSTS}, [fold_consts_ptr, :128]
-
-	// Use Barrett reduction to compute the final CRC value.
-	vmull.p64	q1, q0h, FOLD_CONST_H	// high 32 bits * floor(x^48 / G(x))
-	vshr.u64	q1l, q1l, #32		// /= x^32
-	vmull.p64	q1, q1l, FOLD_CONST_L	// *= G(x)
-	vshr.u64	q0l, q0l, #48
-	veor.8		q0l, q0l, q1l		// + low 16 nonzero bits
-	// Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of q0.
-
-	vmov.u16	r0, q0l[0]
-	bx		lr
-
-.Lless_than_256_bytes:
-	// Checksumming a buffer of length 16...255 bytes
-
-	__adrl		fold_consts_ptr, .Lfold_across_16_bytes_consts
-
-	// Load the first 16 data bytes.
-	vld1.64		{q7}, [buf]!
-CPU_LE(	vrev64.8	q7, q7	)
-	vswp		q7l, q7h
-
-	// XOR the first 16 data *bits* with the initial CRC value.
-	vmov.i8		q0h, #0
-	vmov.u16	q0h[3], init_crc
-	veor.8		q7h, q7h, q0h
-
-	// Load the fold-across-16-bytes constants.
-	vld1.64		{FOLD_CONSTS}, [fold_consts_ptr, :128]!
-
-	cmp		len, #16
-	beq		.Lreduce_final_16_bytes		// len == 16
-	subs		len, len, #32
-	addlt		len, len, #16
-	blt		.Lhandle_partial_segment	// 17 <= len <= 31
-	b		.Lfold_16_bytes_loop		// 32 <= len <= 255
-ENDPROC(crc_t10dif_pmull)
-
-	.section	".rodata", "a"
-	.align		4
-
-// Fold constants precomputed from the polynomial 0x18bb7
-// G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
-.Lfold_across_128_bytes_consts:
-	.quad		0x0000000000006123	// x^(8*128)	mod G(x)
-	.quad		0x0000000000002295	// x^(8*128+64)	mod G(x)
-// .Lfold_across_64_bytes_consts:
-	.quad		0x0000000000001069	// x^(4*128)	mod G(x)
-	.quad		0x000000000000dd31	// x^(4*128+64)	mod G(x)
-// .Lfold_across_32_bytes_consts:
-	.quad		0x000000000000857d	// x^(2*128)	mod G(x)
-	.quad		0x0000000000007acc	// x^(2*128+64)	mod G(x)
-.Lfold_across_16_bytes_consts:
-	.quad		0x000000000000a010	// x^(1*128)	mod G(x)
-	.quad		0x0000000000001faa	// x^(1*128+64)	mod G(x)
-// .Lfinal_fold_consts:
-	.quad		0x1368000000000000	// x^48 * (x^48 mod G(x))
-	.quad		0x2d56000000000000	// x^48 * (x^80 mod G(x))
-// .Lbarrett_reduction_consts:
-	.quad		0x0000000000018bb7	// G(x)
-	.quad		0x00000001f65a57f8	// floor(x^48 / G(x))
-
-// For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 -
-// len] is the index vector to shift left by 'len' bytes, and is also {0x80,
-// ..., 0x80} XOR the index vector to shift right by '16 - len' bytes.
-.Lbyteshift_table:
-	.byte		 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
-	.byte		0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
-	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
-	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe , 0x0
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
deleted file mode 100644
index c47fe81abcb0189cdb809a959c96dbdfe2fa74bd..0000000000000000000000000000000000000000
--- a/arch/arm/crypto/ghash-ce-core.S
+++ /dev/null
@@ -1,337 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Accelerated GHASH implementation with NEON/ARMv8 vmull.p8/64 instructions.
- *
- * Copyright (C) 2015 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	SHASH		.req	q0
-	T1		.req	q1
-	XL		.req	q2
-	XM		.req	q3
-	XH		.req	q4
-	IN1		.req	q4
-
-	SHASH_L		.req	d0
-	SHASH_H		.req	d1
-	T1_L		.req	d2
-	T1_H		.req	d3
-	XL_L		.req	d4
-	XL_H		.req	d5
-	XM_L		.req	d6
-	XM_H		.req	d7
-	XH_L		.req	d8
-
-	t0l		.req	d10
-	t0h		.req	d11
-	t1l		.req	d12
-	t1h		.req	d13
-	t2l		.req	d14
-	t2h		.req	d15
-	t3l		.req	d16
-	t3h		.req	d17
-	t4l		.req	d18
-	t4h		.req	d19
-
-	t0q		.req	q5
-	t1q		.req	q6
-	t2q		.req	q7
-	t3q		.req	q8
-	t4q		.req	q9
-	T2		.req	q9
-
-	s1l		.req	d20
-	s1h		.req	d21
-	s2l		.req	d22
-	s2h		.req	d23
-	s3l		.req	d24
-	s3h		.req	d25
-	s4l		.req	d26
-	s4h		.req	d27
-
-	MASK		.req	d28
-	SHASH2_p8	.req	d28
-
-	k16		.req	d29
-	k32		.req	d30
-	k48		.req	d31
-	SHASH2_p64	.req	d31
-
-	HH		.req	q10
-	HH3		.req	q11
-	HH4		.req	q12
-	HH34		.req	q13
-
-	HH_L		.req	d20
-	HH_H		.req	d21
-	HH3_L		.req	d22
-	HH3_H		.req	d23
-	HH4_L		.req	d24
-	HH4_H		.req	d25
-	HH34_L		.req	d26
-	HH34_H		.req	d27
-	SHASH2_H	.req	d29
-
-	XL2		.req	q5
-	XM2		.req	q6
-	XH2		.req	q7
-	T3		.req	q8
-
-	XL2_L		.req	d10
-	XL2_H		.req	d11
-	XM2_L		.req	d12
-	XM2_H		.req	d13
-	T3_L		.req	d16
-	T3_H		.req	d17
-
-	.text
-	.fpu		crypto-neon-fp-armv8
-
-	.macro		__pmull_p64, rd, rn, rm, b1, b2, b3, b4
-	vmull.p64	\rd, \rn, \rm
-	.endm
-
-	/*
-	 * This implementation of 64x64 -> 128 bit polynomial multiplication
-	 * using vmull.p8 instructions (8x8 -> 16) is taken from the paper
-	 * "Fast Software Polynomial Multiplication on ARM Processors Using
-	 * the NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and
-	 * Ricardo Dahab (https://hal.inria.fr/hal-01506572)
-	 *
-	 * It has been slightly tweaked for in-order performance, and to allow
-	 * 'rq' to overlap with 'ad' or 'bd'.
-	 */
-	.macro		__pmull_p8, rq, ad, bd, b1=t4l, b2=t3l, b3=t4l, b4=t3l
-	vext.8		t0l, \ad, \ad, #1	@ A1
-	.ifc		\b1, t4l
-	vext.8		t4l, \bd, \bd, #1	@ B1
-	.endif
-	vmull.p8	t0q, t0l, \bd		@ F = A1*B
-	vext.8		t1l, \ad, \ad, #2	@ A2
-	vmull.p8	t4q, \ad, \b1		@ E = A*B1
-	.ifc		\b2, t3l
-	vext.8		t3l, \bd, \bd, #2	@ B2
-	.endif
-	vmull.p8	t1q, t1l, \bd		@ H = A2*B
-	vext.8		t2l, \ad, \ad, #3	@ A3
-	vmull.p8	t3q, \ad, \b2		@ G = A*B2
-	veor		t0q, t0q, t4q		@ L = E + F
-	.ifc		\b3, t4l
-	vext.8		t4l, \bd, \bd, #3	@ B3
-	.endif
-	vmull.p8	t2q, t2l, \bd		@ J = A3*B
-	veor		t0l, t0l, t0h		@ t0 = (L) (P0 + P1) << 8
-	veor		t1q, t1q, t3q		@ M = G + H
-	.ifc		\b4, t3l
-	vext.8		t3l, \bd, \bd, #4	@ B4
-	.endif
-	vmull.p8	t4q, \ad, \b3		@ I = A*B3
-	veor		t1l, t1l, t1h		@ t1 = (M) (P2 + P3) << 16
-	vmull.p8	t3q, \ad, \b4		@ K = A*B4
-	vand		t0h, t0h, k48
-	vand		t1h, t1h, k32
-	veor		t2q, t2q, t4q		@ N = I + J
-	veor		t0l, t0l, t0h
-	veor		t1l, t1l, t1h
-	veor		t2l, t2l, t2h		@ t2 = (N) (P4 + P5) << 24
-	vand		t2h, t2h, k16
-	veor		t3l, t3l, t3h		@ t3 = (K) (P6 + P7) << 32
-	vmov.i64	t3h, #0
-	vext.8		t0q, t0q, t0q, #15
-	veor		t2l, t2l, t2h
-	vext.8		t1q, t1q, t1q, #14
-	vmull.p8	\rq, \ad, \bd		@ D = A*B
-	vext.8		t2q, t2q, t2q, #13
-	vext.8		t3q, t3q, t3q, #12
-	veor		t0q, t0q, t1q
-	veor		t2q, t2q, t3q
-	veor		\rq, \rq, t0q
-	veor		\rq, \rq, t2q
-	.endm
-
-	//
-	// PMULL (64x64->128) based reduction for CPUs that can do
-	// it in a single instruction.
-	//
-	.macro		__pmull_reduce_p64
-	vmull.p64	T1, XL_L, MASK
-
-	veor		XH_L, XH_L, XM_H
-	vext.8		T1, T1, T1, #8
-	veor		XL_H, XL_H, XM_L
-	veor		T1, T1, XL
-
-	vmull.p64	XL, T1_H, MASK
-	.endm
-
-	//
-	// Alternative reduction for CPUs that lack support for the
-	// 64x64->128 PMULL instruction
-	//
-	.macro		__pmull_reduce_p8
-	veor		XL_H, XL_H, XM_L
-	veor		XH_L, XH_L, XM_H
-
-	vshl.i64	T1, XL, #57
-	vshl.i64	T2, XL, #62
-	veor		T1, T1, T2
-	vshl.i64	T2, XL, #63
-	veor		T1, T1, T2
-	veor		XL_H, XL_H, T1_L
-	veor		XH_L, XH_L, T1_H
-
-	vshr.u64	T1, XL, #1
-	veor		XH, XH, XL
-	veor		XL, XL, T1
-	vshr.u64	T1, T1, #6
-	vshr.u64	XL, XL, #1
-	.endm
-
-	.macro		ghash_update, pn
-	vld1.64		{XL}, [r1]
-
-	/* do the head block first, if supplied */
-	ldr		ip, [sp]
-	teq		ip, #0
-	beq		0f
-	vld1.64		{T1}, [ip]
-	teq		r0, #0
-	b		3f
-
-0:	.ifc		\pn, p64
-	tst		r0, #3			// skip until #blocks is a
-	bne		2f			// round multiple of 4
-
-	vld1.8		{XL2-XM2}, [r2]!
-1:	vld1.8		{T3-T2}, [r2]!
-	vrev64.8	XL2, XL2
-	vrev64.8	XM2, XM2
-
-	subs		r0, r0, #4
-
-	vext.8		T1, XL2, XL2, #8
-	veor		XL2_H, XL2_H, XL_L
-	veor		XL, XL, T1
-
-	vrev64.8	T3, T3
-	vrev64.8	T1, T2
-
-	vmull.p64	XH, HH4_H, XL_H			// a1 * b1
-	veor		XL2_H, XL2_H, XL_H
-	vmull.p64	XL, HH4_L, XL_L			// a0 * b0
-	vmull.p64	XM, HH34_H, XL2_H		// (a1 + a0)(b1 + b0)
-
-	vmull.p64	XH2, HH3_H, XM2_L		// a1 * b1
-	veor		XM2_L, XM2_L, XM2_H
-	vmull.p64	XL2, HH3_L, XM2_H		// a0 * b0
-	vmull.p64	XM2, HH34_L, XM2_L		// (a1 + a0)(b1 + b0)
-
-	veor		XH, XH, XH2
-	veor		XL, XL, XL2
-	veor		XM, XM, XM2
-
-	vmull.p64	XH2, HH_H, T3_L			// a1 * b1
-	veor		T3_L, T3_L, T3_H
-	vmull.p64	XL2, HH_L, T3_H			// a0 * b0
-	vmull.p64	XM2, SHASH2_H, T3_L		// (a1 + a0)(b1 + b0)
-
-	veor		XH, XH, XH2
-	veor		XL, XL, XL2
-	veor		XM, XM, XM2
-
-	vmull.p64	XH2, SHASH_H, T1_L		// a1 * b1
-	veor		T1_L, T1_L, T1_H
-	vmull.p64	XL2, SHASH_L, T1_H		// a0 * b0
-	vmull.p64	XM2, SHASH2_p64, T1_L		// (a1 + a0)(b1 + b0)
-
-	veor		XH, XH, XH2
-	veor		XL, XL, XL2
-	veor		XM, XM, XM2
-
-	beq		4f
-
-	vld1.8		{XL2-XM2}, [r2]!
-
-	veor		T1, XL, XH
-	veor		XM, XM, T1
-
-	__pmull_reduce_p64
-
-	veor		T1, T1, XH
-	veor		XL, XL, T1
-
-	b		1b
-	.endif
-
-2:	vld1.64		{T1}, [r2]!
-	subs		r0, r0, #1
-
-3:	/* multiply XL by SHASH in GF(2^128) */
-#ifndef CONFIG_CPU_BIG_ENDIAN
-	vrev64.8	T1, T1
-#endif
-	vext.8		IN1, T1, T1, #8
-	veor		T1_L, T1_L, XL_H
-	veor		XL, XL, IN1
-
-	__pmull_\pn	XH, XL_H, SHASH_H, s1h, s2h, s3h, s4h	@ a1 * b1
-	veor		T1, T1, XL
-	__pmull_\pn	XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l	@ a0 * b0
-	__pmull_\pn	XM, T1_L, SHASH2_\pn			@ (a1+a0)(b1+b0)
-
-4:	veor		T1, XL, XH
-	veor		XM, XM, T1
-
-	__pmull_reduce_\pn
-
-	veor		T1, T1, XH
-	veor		XL, XL, T1
-
-	bne		0b
-
-	vst1.64		{XL}, [r1]
-	bx		lr
-	.endm
-
-	/*
-	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-	 *			   struct ghash_key const *k, const char *head)
-	 */
-ENTRY(pmull_ghash_update_p64)
-	vld1.64		{SHASH}, [r3]!
-	vld1.64		{HH}, [r3]!
-	vld1.64		{HH3-HH4}, [r3]
-
-	veor		SHASH2_p64, SHASH_L, SHASH_H
-	veor		SHASH2_H, HH_L, HH_H
-	veor		HH34_L, HH3_L, HH3_H
-	veor		HH34_H, HH4_L, HH4_H
-
-	vmov.i8		MASK, #0xe1
-	vshl.u64	MASK, MASK, #57
-
-	ghash_update	p64
-ENDPROC(pmull_ghash_update_p64)
-
-ENTRY(pmull_ghash_update_p8)
-	vld1.64		{SHASH}, [r3]
-	veor		SHASH2_p8, SHASH_L, SHASH_H
-
-	vext.8		s1l, SHASH_L, SHASH_L, #1
-	vext.8		s2l, SHASH_L, SHASH_L, #2
-	vext.8		s3l, SHASH_L, SHASH_L, #3
-	vext.8		s4l, SHASH_L, SHASH_L, #4
-	vext.8		s1h, SHASH_H, SHASH_H, #1
-	vext.8		s2h, SHASH_H, SHASH_H, #2
-	vext.8		s3h, SHASH_H, SHASH_H, #3
-	vext.8		s4h, SHASH_H, SHASH_H, #4
-
-	vmov.i64	k16, #0xffff
-	vmov.i64	k32, #0xffffffff
-	vmov.i64	k48, #0xffffffffffff
-
-	ghash_update	p8
-ENDPROC(pmull_ghash_update_p8)
diff --git a/arch/arm/crypto/nh-neon-core.S b/arch/arm/crypto/nh-neon-core.S
deleted file mode 100644
index 434d80ab531c2a600fbcffc89c21e6a8ad5ef284..0000000000000000000000000000000000000000
--- a/arch/arm/crypto/nh-neon-core.S
+++ /dev/null
@@ -1,116 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * NH - ε-almost-universal hash function, NEON accelerated version
- *
- * Copyright 2018 Google LLC
- *
- * Author: Eric Biggers <ebiggers@google.com>
- */
-
-#include <linux/linkage.h>
-
-	.text
-	.fpu		neon
-
-	KEY		.req	r0
-	MESSAGE		.req	r1
-	MESSAGE_LEN	.req	r2
-	HASH		.req	r3
-
-	PASS0_SUMS	.req	q0
-	PASS0_SUM_A	.req	d0
-	PASS0_SUM_B	.req	d1
-	PASS1_SUMS	.req	q1
-	PASS1_SUM_A	.req	d2
-	PASS1_SUM_B	.req	d3
-	PASS2_SUMS	.req	q2
-	PASS2_SUM_A	.req	d4
-	PASS2_SUM_B	.req	d5
-	PASS3_SUMS	.req	q3
-	PASS3_SUM_A	.req	d6
-	PASS3_SUM_B	.req	d7
-	K0		.req	q4
-	K1		.req	q5
-	K2		.req	q6
-	K3		.req	q7
-	T0		.req	q8
-	T0_L		.req	d16
-	T0_H		.req	d17
-	T1		.req	q9
-	T1_L		.req	d18
-	T1_H		.req	d19
-	T2		.req	q10
-	T2_L		.req	d20
-	T2_H		.req	d21
-	T3		.req	q11
-	T3_L		.req	d22
-	T3_H		.req	d23
-
-.macro _nh_stride	k0, k1, k2, k3
-
-	// Load next message stride
-	vld1.8		{T3}, [MESSAGE]!
-
-	// Load next key stride
-	vld1.32		{\k3}, [KEY]!
-
-	// Add message words to key words
-	vadd.u32	T0, T3, \k0
-	vadd.u32	T1, T3, \k1
-	vadd.u32	T2, T3, \k2
-	vadd.u32	T3, T3, \k3
-
-	// Multiply 32x32 => 64 and accumulate
-	vmlal.u32	PASS0_SUMS, T0_L, T0_H
-	vmlal.u32	PASS1_SUMS, T1_L, T1_H
-	vmlal.u32	PASS2_SUMS, T2_L, T2_H
-	vmlal.u32	PASS3_SUMS, T3_L, T3_H
-.endm
-
-/*
- * void nh_neon(const u32 *key, const u8 *message, size_t message_len,
- *		u8 hash[NH_HASH_BYTES])
- *
- * It's guaranteed that message_len % 16 == 0.
- */
-ENTRY(nh_neon)
-
-	vld1.32		{K0,K1}, [KEY]!
-	  vmov.u64	PASS0_SUMS, #0
-	  vmov.u64	PASS1_SUMS, #0
-	vld1.32		{K2}, [KEY]!
-	  vmov.u64	PASS2_SUMS, #0
-	  vmov.u64	PASS3_SUMS, #0
-
-	subs		MESSAGE_LEN, MESSAGE_LEN, #64
-	blt		.Lloop4_done
-.Lloop4:
-	_nh_stride	K0, K1, K2, K3
-	_nh_stride	K1, K2, K3, K0
-	_nh_stride	K2, K3, K0, K1
-	_nh_stride	K3, K0, K1, K2
-	subs		MESSAGE_LEN, MESSAGE_LEN, #64
-	bge		.Lloop4
-
-.Lloop4_done:
-	ands		MESSAGE_LEN, MESSAGE_LEN, #63
-	beq		.Ldone
-	_nh_stride	K0, K1, K2, K3
-
-	subs		MESSAGE_LEN, MESSAGE_LEN, #16
-	beq		.Ldone
-	_nh_stride	K1, K2, K3, K0
-
-	subs		MESSAGE_LEN, MESSAGE_LEN, #16
-	beq		.Ldone
-	_nh_stride	K2, K3, K0, K1
-
-.Ldone:
-	// Sum the accumulators for each pass, then store the sums to 'hash'
-	vadd.u64	T0_L, PASS0_SUM_A, PASS0_SUM_B
-	vadd.u64	T0_H, PASS1_SUM_A, PASS1_SUM_B
-	vadd.u64	T1_L, PASS2_SUM_A, PASS2_SUM_B
-	vadd.u64	T1_H, PASS3_SUM_A, PASS3_SUM_B
-	vst1.8		{T0-T1}, [HASH]
-	bx		lr
-ENDPROC(nh_neon)
diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S
deleted file mode 100644
index f82cd8cf5a093f5bb79c911a45513864dcdbe271..0000000000000000000000000000000000000000
--- a/arch/arm/crypto/sha1-armv4-large.S
+++ /dev/null
@@ -1,507 +0,0 @@
-#define __ARM_ARCH__ __LINUX_ARM_ARCH__
-@ SPDX-License-Identifier: GPL-2.0
-
-@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
-@ has relicensed it under the GPLv2. Therefore this program is free software;
-@ you can redistribute it and/or modify it under the terms of the GNU General
-@ Public License version 2 as published by the Free Software Foundation.
-@
-@ The original headers, including the original license headers, are
-@ included below for completeness.
-
-@ ====================================================================
-@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-@ project. The module is, however, dual licensed under OpenSSL and
-@ CRYPTOGAMS licenses depending on where you obtain it. For further
-@ details see http://www.openssl.org/~appro/cryptogams/.
-@ ====================================================================
-
-@ sha1_block procedure for ARMv4.
-@
-@ January 2007.
-
-@ Size/performance trade-off
-@ ====================================================================
-@ impl		size in bytes	comp cycles[*]	measured performance
-@ ====================================================================
-@ thumb		304		3212		4420
-@ armv4-small	392/+29%	1958/+64%	2250/+96%
-@ armv4-compact	740/+89%	1552/+26%	1840/+22%
-@ armv4-large	1420/+92%	1307/+19%	1370/+34%[***]
-@ full unroll	~5100/+260%	~1260/+4%	~1300/+5%
-@ ====================================================================
-@ thumb		= same as 'small' but in Thumb instructions[**] and
-@		  with recurring code in two private functions;
-@ small		= detached Xload/update, loops are folded;
-@ compact	= detached Xload/update, 5x unroll;
-@ large		= interleaved Xload/update, 5x unroll;
-@ full unroll	= interleaved Xload/update, full unroll, estimated[!];
-@
-@ [*]	Manually counted instructions in "grand" loop body. Measured
-@	performance is affected by prologue and epilogue overhead,
-@	i-cache availability, branch penalties, etc.
-@ [**]	While each Thumb instruction is twice smaller, they are not as
-@	diverse as ARM ones: e.g., there are only two arithmetic
-@	instructions with 3 arguments, no [fixed] rotate, addressing
-@	modes are limited. As result it takes more instructions to do
-@	the same job in Thumb, therefore the code is never twice as
-@	small and always slower.
-@ [***]	which is also ~35% better than compiler generated code. Dual-
-@	issue Cortex A8 core was measured to process input block in
-@	~990 cycles.
-
-@ August 2010.
-@
-@ Rescheduling for dual-issue pipeline resulted in 13% improvement on
-@ Cortex A8 core and in absolute terms ~870 cycles per input block
-@ [or 13.6 cycles per byte].
-
-@ February 2011.
-@
-@ Profiler-assisted and platform-specific optimization resulted in 10%
-@ improvement on Cortex A8 core and 12.2 cycles per byte.
-
-#include <linux/linkage.h>
-
-.text
-
-.align	2
-ENTRY(sha1_block_data_order)
-	stmdb	sp!,{r4-r12,lr}
-	add	r2,r1,r2,lsl#6	@ r2 to point at the end of r1
-	ldmia	r0,{r3,r4,r5,r6,r7}
-.Lloop:
-	ldr	r8,.LK_00_19
-	mov	r14,sp
-	sub	sp,sp,#15*4
-	mov	r5,r5,ror#30
-	mov	r6,r6,ror#30
-	mov	r7,r7,ror#30		@ [6]
-.L_00_15:
-#if __ARM_ARCH__<7
-	ldrb	r10,[r1,#2]
-	ldrb	r9,[r1,#3]
-	ldrb	r11,[r1,#1]
-	add	r7,r8,r7,ror#2			@ E+=K_00_19
-	ldrb	r12,[r1],#4
-	orr	r9,r9,r10,lsl#8
-	eor	r10,r5,r6			@ F_xx_xx
-	orr	r9,r9,r11,lsl#16
-	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
-	orr	r9,r9,r12,lsl#24
-#else
-	ldr	r9,[r1],#4			@ handles unaligned
-	add	r7,r8,r7,ror#2			@ E+=K_00_19
-	eor	r10,r5,r6			@ F_xx_xx
-	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
-#ifdef __ARMEL__
-	rev	r9,r9				@ byte swap
-#endif
-#endif
-	and	r10,r4,r10,ror#2
-	add	r7,r7,r9			@ E+=X[i]
-	eor	r10,r10,r6,ror#2		@ F_00_19(B,C,D)
-	str	r9,[r14,#-4]!
-	add	r7,r7,r10			@ E+=F_00_19(B,C,D)
-#if __ARM_ARCH__<7
-	ldrb	r10,[r1,#2]
-	ldrb	r9,[r1,#3]
-	ldrb	r11,[r1,#1]
-	add	r6,r8,r6,ror#2			@ E+=K_00_19
-	ldrb	r12,[r1],#4
-	orr	r9,r9,r10,lsl#8
-	eor	r10,r4,r5			@ F_xx_xx
-	orr	r9,r9,r11,lsl#16
-	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
-	orr	r9,r9,r12,lsl#24
-#else
-	ldr	r9,[r1],#4			@ handles unaligned
-	add	r6,r8,r6,ror#2			@ E+=K_00_19
-	eor	r10,r4,r5			@ F_xx_xx
-	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
-#ifdef __ARMEL__
-	rev	r9,r9				@ byte swap
-#endif
-#endif
-	and	r10,r3,r10,ror#2
-	add	r6,r6,r9			@ E+=X[i]
-	eor	r10,r10,r5,ror#2		@ F_00_19(B,C,D)
-	str	r9,[r14,#-4]!
-	add	r6,r6,r10			@ E+=F_00_19(B,C,D)
-#if __ARM_ARCH__<7
-	ldrb	r10,[r1,#2]
-	ldrb	r9,[r1,#3]
-	ldrb	r11,[r1,#1]
-	add	r5,r8,r5,ror#2			@ E+=K_00_19
-	ldrb	r12,[r1],#4
-	orr	r9,r9,r10,lsl#8
-	eor	r10,r3,r4			@ F_xx_xx
-	orr	r9,r9,r11,lsl#16
-	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
-	orr	r9,r9,r12,lsl#24
-#else
-	ldr	r9,[r1],#4			@ handles unaligned
-	add	r5,r8,r5,ror#2			@ E+=K_00_19
-	eor	r10,r3,r4			@ F_xx_xx
-	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
-#ifdef __ARMEL__
-	rev	r9,r9				@ byte swap
-#endif
-#endif
-	and	r10,r7,r10,ror#2
-	add	r5,r5,r9			@ E+=X[i]
-	eor	r10,r10,r4,ror#2		@ F_00_19(B,C,D)
-	str	r9,[r14,#-4]!
-	add	r5,r5,r10			@ E+=F_00_19(B,C,D)
-#if __ARM_ARCH__<7
-	ldrb	r10,[r1,#2]
-	ldrb	r9,[r1,#3]
-	ldrb	r11,[r1,#1]
-	add	r4,r8,r4,ror#2			@ E+=K_00_19
-	ldrb	r12,[r1],#4
-	orr	r9,r9,r10,lsl#8
-	eor	r10,r7,r3			@ F_xx_xx
-	orr	r9,r9,r11,lsl#16
-	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
-	orr	r9,r9,r12,lsl#24
-#else
-	ldr	r9,[r1],#4			@ handles unaligned
-	add	r4,r8,r4,ror#2			@ E+=K_00_19
-	eor	r10,r7,r3			@ F_xx_xx
-	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
-#ifdef __ARMEL__
-	rev	r9,r9				@ byte swap
-#endif
-#endif
-	and	r10,r6,r10,ror#2
-	add	r4,r4,r9			@ E+=X[i]
-	eor	r10,r10,r3,ror#2		@ F_00_19(B,C,D)
-	str	r9,[r14,#-4]!
-	add	r4,r4,r10			@ E+=F_00_19(B,C,D)
-#if __ARM_ARCH__<7
-	ldrb	r10,[r1,#2]
-	ldrb	r9,[r1,#3]
-	ldrb	r11,[r1,#1]
-	add	r3,r8,r3,ror#2			@ E+=K_00_19
-	ldrb	r12,[r1],#4
-	orr	r9,r9,r10,lsl#8
-	eor	r10,r6,r7			@ F_xx_xx
-	orr	r9,r9,r11,lsl#16
-	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
-	orr	r9,r9,r12,lsl#24
-#else
-	ldr	r9,[r1],#4			@ handles unaligned
-	add	r3,r8,r3,ror#2			@ E+=K_00_19
-	eor	r10,r6,r7			@ F_xx_xx
-	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
-#ifdef __ARMEL__
-	rev	r9,r9				@ byte swap
-#endif
-#endif
-	and	r10,r5,r10,ror#2
-	add	r3,r3,r9			@ E+=X[i]
-	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
-	str	r9,[r14,#-4]!
-	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
-	cmp	r14,sp
-	bne	.L_00_15		@ [((11+4)*5+2)*3]
-	sub	sp,sp,#25*4
-#if __ARM_ARCH__<7
-	ldrb	r10,[r1,#2]
-	ldrb	r9,[r1,#3]
-	ldrb	r11,[r1,#1]
-	add	r7,r8,r7,ror#2			@ E+=K_00_19
-	ldrb	r12,[r1],#4
-	orr	r9,r9,r10,lsl#8
-	eor	r10,r5,r6			@ F_xx_xx
-	orr	r9,r9,r11,lsl#16
-	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
-	orr	r9,r9,r12,lsl#24
-#else
-	ldr	r9,[r1],#4			@ handles unaligned
-	add	r7,r8,r7,ror#2			@ E+=K_00_19
-	eor	r10,r5,r6			@ F_xx_xx
-	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
-#ifdef __ARMEL__
-	rev	r9,r9				@ byte swap
-#endif
-#endif
-	and	r10,r4,r10,ror#2
-	add	r7,r7,r9			@ E+=X[i]
-	eor	r10,r10,r6,ror#2		@ F_00_19(B,C,D)
-	str	r9,[r14,#-4]!
-	add	r7,r7,r10			@ E+=F_00_19(B,C,D)
-	ldr	r9,[r14,#15*4]
-	ldr	r10,[r14,#13*4]
-	ldr	r11,[r14,#7*4]
-	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
-	ldr	r12,[r14,#2*4]
-	eor	r9,r9,r10
-	eor	r11,r11,r12			@ 1 cycle stall
-	eor	r10,r4,r5			@ F_xx_xx
-	mov	r9,r9,ror#31
-	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
-	eor	r9,r9,r11,ror#31
-	str	r9,[r14,#-4]!
-	and r10,r3,r10,ror#2					@ F_xx_xx
-						@ F_xx_xx
-	add	r6,r6,r9			@ E+=X[i]
-	eor	r10,r10,r5,ror#2		@ F_00_19(B,C,D)
-	add	r6,r6,r10			@ E+=F_00_19(B,C,D)
-	ldr	r9,[r14,#15*4]
-	ldr	r10,[r14,#13*4]
-	ldr	r11,[r14,#7*4]
-	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
-	ldr	r12,[r14,#2*4]
-	eor	r9,r9,r10
-	eor	r11,r11,r12			@ 1 cycle stall
-	eor	r10,r3,r4			@ F_xx_xx
-	mov	r9,r9,ror#31
-	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
-	eor	r9,r9,r11,ror#31
-	str	r9,[r14,#-4]!
-	and r10,r7,r10,ror#2					@ F_xx_xx
-						@ F_xx_xx
-	add	r5,r5,r9			@ E+=X[i]
-	eor	r10,r10,r4,ror#2		@ F_00_19(B,C,D)
-	add	r5,r5,r10			@ E+=F_00_19(B,C,D)
-	ldr	r9,[r14,#15*4]
-	ldr	r10,[r14,#13*4]
-	ldr	r11,[r14,#7*4]
-	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
-	ldr	r12,[r14,#2*4]
-	eor	r9,r9,r10
-	eor	r11,r11,r12			@ 1 cycle stall
-	eor	r10,r7,r3			@ F_xx_xx
-	mov	r9,r9,ror#31
-	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
-	eor	r9,r9,r11,ror#31
-	str	r9,[r14,#-4]!
-	and r10,r6,r10,ror#2					@ F_xx_xx
-						@ F_xx_xx
-	add	r4,r4,r9			@ E+=X[i]
-	eor	r10,r10,r3,ror#2		@ F_00_19(B,C,D)
-	add	r4,r4,r10			@ E+=F_00_19(B,C,D)
-	ldr	r9,[r14,#15*4]
-	ldr	r10,[r14,#13*4]
-	ldr	r11,[r14,#7*4]
-	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
-	ldr	r12,[r14,#2*4]
-	eor	r9,r9,r10
-	eor	r11,r11,r12			@ 1 cycle stall
-	eor	r10,r6,r7			@ F_xx_xx
-	mov	r9,r9,ror#31
-	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
-	eor	r9,r9,r11,ror#31
-	str	r9,[r14,#-4]!
-	and r10,r5,r10,ror#2					@ F_xx_xx
-						@ F_xx_xx
-	add	r3,r3,r9			@ E+=X[i]
-	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
-	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
-
-	ldr	r8,.LK_20_39		@ [+15+16*4]
-	cmn	sp,#0			@ [+3], clear carry to denote 20_39
-.L_20_39_or_60_79:
-	ldr	r9,[r14,#15*4]
-	ldr	r10,[r14,#13*4]
-	ldr	r11,[r14,#7*4]
-	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
-	ldr	r12,[r14,#2*4]
-	eor	r9,r9,r10
-	eor	r11,r11,r12			@ 1 cycle stall
-	eor	r10,r5,r6			@ F_xx_xx
-	mov	r9,r9,ror#31
-	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
-	eor	r9,r9,r11,ror#31
-	str	r9,[r14,#-4]!
-	eor r10,r4,r10,ror#2					@ F_xx_xx
-						@ F_xx_xx
-	add	r7,r7,r9			@ E+=X[i]
-	add	r7,r7,r10			@ E+=F_20_39(B,C,D)
-	ldr	r9,[r14,#15*4]
-	ldr	r10,[r14,#13*4]
-	ldr	r11,[r14,#7*4]
-	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
-	ldr	r12,[r14,#2*4]
-	eor	r9,r9,r10
-	eor	r11,r11,r12			@ 1 cycle stall
-	eor	r10,r4,r5			@ F_xx_xx
-	mov	r9,r9,ror#31
-	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
-	eor	r9,r9,r11,ror#31
-	str	r9,[r14,#-4]!
-	eor r10,r3,r10,ror#2					@ F_xx_xx
-						@ F_xx_xx
-	add	r6,r6,r9			@ E+=X[i]
-	add	r6,r6,r10			@ E+=F_20_39(B,C,D)
-	ldr	r9,[r14,#15*4]
-	ldr	r10,[r14,#13*4]
-	ldr	r11,[r14,#7*4]
-	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
-	ldr	r12,[r14,#2*4]
-	eor	r9,r9,r10
-	eor	r11,r11,r12			@ 1 cycle stall
-	eor	r10,r3,r4			@ F_xx_xx
-	mov	r9,r9,ror#31
-	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
-	eor	r9,r9,r11,ror#31
-	str	r9,[r14,#-4]!
-	eor r10,r7,r10,ror#2					@ F_xx_xx
-						@ F_xx_xx
-	add	r5,r5,r9			@ E+=X[i]
-	add	r5,r5,r10			@ E+=F_20_39(B,C,D)
-	ldr	r9,[r14,#15*4]
-	ldr	r10,[r14,#13*4]
-	ldr	r11,[r14,#7*4]
-	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
-	ldr	r12,[r14,#2*4]
-	eor	r9,r9,r10
-	eor	r11,r11,r12			@ 1 cycle stall
-	eor	r10,r7,r3			@ F_xx_xx
-	mov	r9,r9,ror#31
-	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
-	eor	r9,r9,r11,ror#31
-	str	r9,[r14,#-4]!
-	eor r10,r6,r10,ror#2					@ F_xx_xx
-						@ F_xx_xx
-	add	r4,r4,r9			@ E+=X[i]
-	add	r4,r4,r10			@ E+=F_20_39(B,C,D)
-	ldr	r9,[r14,#15*4]
-	ldr	r10,[r14,#13*4]
-	ldr	r11,[r14,#7*4]
-	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
-	ldr	r12,[r14,#2*4]
-	eor	r9,r9,r10
-	eor	r11,r11,r12			@ 1 cycle stall
-	eor	r10,r6,r7			@ F_xx_xx
-	mov	r9,r9,ror#31
-	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
-	eor	r9,r9,r11,ror#31
-	str	r9,[r14,#-4]!
-	eor r10,r5,r10,ror#2					@ F_xx_xx
-						@ F_xx_xx
-	add	r3,r3,r9			@ E+=X[i]
-	add	r3,r3,r10			@ E+=F_20_39(B,C,D)
- ARM(	teq	r14,sp		)	@ preserve carry
- THUMB(	mov	r11,sp		)
- THUMB(	teq	r14,r11		)	@ preserve carry
-	bne	.L_20_39_or_60_79	@ [+((12+3)*5+2)*4]
-	bcs	.L_done			@ [+((12+3)*5+2)*4], spare 300 bytes
-
-	ldr	r8,.LK_40_59
-	sub	sp,sp,#20*4		@ [+2]
-.L_40_59:
-	ldr	r9,[r14,#15*4]
-	ldr	r10,[r14,#13*4]
-	ldr	r11,[r14,#7*4]
-	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
-	ldr	r12,[r14,#2*4]
-	eor	r9,r9,r10
-	eor	r11,r11,r12			@ 1 cycle stall
-	eor	r10,r5,r6			@ F_xx_xx
-	mov	r9,r9,ror#31
-	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
-	eor	r9,r9,r11,ror#31
-	str	r9,[r14,#-4]!
-	and r10,r4,r10,ror#2					@ F_xx_xx
-	and r11,r5,r6					@ F_xx_xx
-	add	r7,r7,r9			@ E+=X[i]
-	add	r7,r7,r10			@ E+=F_40_59(B,C,D)
-	add	r7,r7,r11,ror#2
-	ldr	r9,[r14,#15*4]
-	ldr	r10,[r14,#13*4]
-	ldr	r11,[r14,#7*4]
-	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
-	ldr	r12,[r14,#2*4]
-	eor	r9,r9,r10
-	eor	r11,r11,r12			@ 1 cycle stall
-	eor	r10,r4,r5			@ F_xx_xx
-	mov	r9,r9,ror#31
-	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
-	eor	r9,r9,r11,ror#31
-	str	r9,[r14,#-4]!
-	and r10,r3,r10,ror#2					@ F_xx_xx
-	and r11,r4,r5					@ F_xx_xx
-	add	r6,r6,r9			@ E+=X[i]
-	add	r6,r6,r10			@ E+=F_40_59(B,C,D)
-	add	r6,r6,r11,ror#2
-	ldr	r9,[r14,#15*4]
-	ldr	r10,[r14,#13*4]
-	ldr	r11,[r14,#7*4]
-	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
-	ldr	r12,[r14,#2*4]
-	eor	r9,r9,r10
-	eor	r11,r11,r12			@ 1 cycle stall
-	eor	r10,r3,r4			@ F_xx_xx
-	mov	r9,r9,ror#31
-	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
-	eor	r9,r9,r11,ror#31
-	str	r9,[r14,#-4]!
-	and r10,r7,r10,ror#2					@ F_xx_xx
-	and r11,r3,r4					@ F_xx_xx
-	add	r5,r5,r9			@ E+=X[i]
-	add	r5,r5,r10			@ E+=F_40_59(B,C,D)
-	add	r5,r5,r11,ror#2
-	ldr	r9,[r14,#15*4]
-	ldr	r10,[r14,#13*4]
-	ldr	r11,[r14,#7*4]
-	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
-	ldr	r12,[r14,#2*4]
-	eor	r9,r9,r10
-	eor	r11,r11,r12			@ 1 cycle stall
-	eor	r10,r7,r3			@ F_xx_xx
-	mov	r9,r9,ror#31
-	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
-	eor	r9,r9,r11,ror#31
-	str	r9,[r14,#-4]!
-	and r10,r6,r10,ror#2					@ F_xx_xx
-	and r11,r7,r3					@ F_xx_xx
-	add	r4,r4,r9			@ E+=X[i]
-	add	r4,r4,r10			@ E+=F_40_59(B,C,D)
-	add	r4,r4,r11,ror#2
-	ldr	r9,[r14,#15*4]
-	ldr	r10,[r14,#13*4]
-	ldr	r11,[r14,#7*4]
-	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
-	ldr	r12,[r14,#2*4]
-	eor	r9,r9,r10
-	eor	r11,r11,r12			@ 1 cycle stall
-	eor	r10,r6,r7			@ F_xx_xx
-	mov	r9,r9,ror#31
-	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
-	eor	r9,r9,r11,ror#31
-	str	r9,[r14,#-4]!
-	and r10,r5,r10,ror#2					@ F_xx_xx
-	and r11,r6,r7					@ F_xx_xx
-	add	r3,r3,r9			@ E+=X[i]
-	add	r3,r3,r10			@ E+=F_40_59(B,C,D)
-	add	r3,r3,r11,ror#2
-	cmp	r14,sp
-	bne	.L_40_59		@ [+((12+5)*5+2)*4]
-
-	ldr	r8,.LK_60_79
-	sub	sp,sp,#20*4
-	cmp	sp,#0			@ set carry to denote 60_79
-	b	.L_20_39_or_60_79	@ [+4], spare 300 bytes
-.L_done:
-	add	sp,sp,#80*4		@ "deallocate" stack frame
-	ldmia	r0,{r8,r9,r10,r11,r12}
-	add	r3,r8,r3
-	add	r4,r9,r4
-	add	r5,r10,r5,ror#2
-	add	r6,r11,r6,ror#2
-	add	r7,r12,r7,ror#2
-	stmia	r0,{r3,r4,r5,r6,r7}
-	teq	r1,r2
-	bne	.Lloop			@ [+18], total 1307
-
-	ldmia	sp!,{r4-r12,pc}
-.align	2
-.LK_00_19:	.word	0x5a827999
-.LK_20_39:	.word	0x6ed9eba1
-.LK_40_59:	.word	0x8f1bbcdc
-.LK_60_79:	.word	0xca62c1d6
-ENDPROC(sha1_block_data_order)
-.asciz	"SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
-.align	2
diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S
deleted file mode 100644
index 28d816a6a530777af2b1b4f32052afbbb7a28728..0000000000000000000000000000000000000000
--- a/arch/arm/crypto/sha1-armv7-neon.S
+++ /dev/null
@@ -1,634 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function
- *
- * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-.syntax unified
-.fpu neon
-
-.text
-
-
-/* Context structure */
-
-#define state_h0 0
-#define state_h1 4
-#define state_h2 8
-#define state_h3 12
-#define state_h4 16
-
-
-/* Constants */
-
-#define K1  0x5A827999
-#define K2  0x6ED9EBA1
-#define K3  0x8F1BBCDC
-#define K4  0xCA62C1D6
-.align 4
-.LK_VEC:
-.LK1:	.long K1, K1, K1, K1
-.LK2:	.long K2, K2, K2, K2
-.LK3:	.long K3, K3, K3, K3
-.LK4:	.long K4, K4, K4, K4
-
-
-/* Register macros */
-
-#define RSTATE r0
-#define RDATA r1
-#define RNBLKS r2
-#define ROLDSTACK r3
-#define RWK lr
-
-#define _a r4
-#define _b r5
-#define _c r6
-#define _d r7
-#define _e r8
-
-#define RT0 r9
-#define RT1 r10
-#define RT2 r11
-#define RT3 r12
-
-#define W0 q0
-#define W1 q7
-#define W2 q2
-#define W3 q3
-#define W4 q4
-#define W5 q6
-#define W6 q5
-#define W7 q1
-
-#define tmp0 q8
-#define tmp1 q9
-#define tmp2 q10
-#define tmp3 q11
-
-#define qK1 q12
-#define qK2 q13
-#define qK3 q14
-#define qK4 q15
-
-#ifdef CONFIG_CPU_BIG_ENDIAN
-#define ARM_LE(code...)
-#else
-#define ARM_LE(code...)		code
-#endif
-
-/* Round function macros. */
-
-#define WK_offs(i) (((i) & 15) * 4)
-
-#define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
-	      W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	ldr RT3, [sp, WK_offs(i)]; \
-		pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
-	bic RT0, d, b; \
-	add e, e, a, ror #(32 - 5); \
-	and RT1, c, b; \
-		pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
-	add RT0, RT0, RT3; \
-	add e, e, RT1; \
-	ror b, #(32 - 30); \
-		pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
-	add e, e, RT0;
-
-#define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
-	      W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	ldr RT3, [sp, WK_offs(i)]; \
-		pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
-	eor RT0, d, b; \
-	add e, e, a, ror #(32 - 5); \
-	eor RT0, RT0, c; \
-		pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
-	add e, e, RT3; \
-	ror b, #(32 - 30); \
-		pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
-	add e, e, RT0; \
-
-#define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
-	      W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	ldr RT3, [sp, WK_offs(i)]; \
-		pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
-	eor RT0, b, c; \
-	and RT1, b, c; \
-	add e, e, a, ror #(32 - 5); \
-		pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
-	and RT0, RT0, d; \
-	add RT1, RT1, RT3; \
-	add e, e, RT0; \
-	ror b, #(32 - 30); \
-		pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
-	add e, e, RT1;
-
-#define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
-	      W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	_R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
-	      W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
-
-#define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\
-           W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	_R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
-	       W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
-
-#define R(a,b,c,d,e,f,i) \
-	_R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\
-	       W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
-
-#define dummy(...)
-
-
-/* Input expansion macros. */
-
-/********* Precalc macros for rounds 0-15 *************************************/
-
-#define W_PRECALC_00_15() \
-	add       RWK, sp, #(WK_offs(0));			\
-	\
-	vld1.32   {W0, W7}, [RDATA]!;				\
- ARM_LE(vrev32.8  W0, W0;	)	/* big => little */	\
-	vld1.32   {W6, W5}, [RDATA]!;				\
-	vadd.u32  tmp0, W0, curK;				\
- ARM_LE(vrev32.8  W7, W7;	)	/* big => little */	\
- ARM_LE(vrev32.8  W6, W6;	)	/* big => little */	\
-	vadd.u32  tmp1, W7, curK;				\
- ARM_LE(vrev32.8  W5, W5;	)	/* big => little */	\
-	vadd.u32  tmp2, W6, curK;				\
-	vst1.32   {tmp0, tmp1}, [RWK]!;				\
-	vadd.u32  tmp3, W5, curK;				\
-	vst1.32   {tmp2, tmp3}, [RWK];				\
-
-#define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vld1.32   {W0, W7}, [RDATA]!;				\
-
-#define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	add       RWK, sp, #(WK_offs(0));			\
-
-#define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
- ARM_LE(vrev32.8  W0, W0;	)	/* big => little */	\
-
-#define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vld1.32   {W6, W5}, [RDATA]!;				\
-
-#define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vadd.u32  tmp0, W0, curK;				\
-
-#define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
- ARM_LE(vrev32.8  W7, W7;	)	/* big => little */	\
-
-#define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
- ARM_LE(vrev32.8  W6, W6;	)	/* big => little */	\
-
-#define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vadd.u32  tmp1, W7, curK;				\
-
-#define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
- ARM_LE(vrev32.8  W5, W5;	)	/* big => little */	\
-
-#define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vadd.u32  tmp2, W6, curK;				\
-
-#define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vst1.32   {tmp0, tmp1}, [RWK]!;				\
-
-#define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vadd.u32  tmp3, W5, curK;				\
-
-#define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vst1.32   {tmp2, tmp3}, [RWK];				\
-
-
-/********* Precalc macros for rounds 16-31 ************************************/
-
-#define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	veor      tmp0, tmp0;			\
-	vext.8    W, W_m16, W_m12, #8;		\
-
-#define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	add       RWK, sp, #(WK_offs(i));	\
-	vext.8    tmp0, W_m04, tmp0, #4;	\
-
-#define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	veor      tmp0, tmp0, W_m16;		\
-	veor.32   W, W, W_m08;			\
-
-#define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	veor      tmp1, tmp1;			\
-	veor      W, W, tmp0;			\
-
-#define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vshl.u32  tmp0, W, #1;			\
-
-#define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vext.8    tmp1, tmp1, W, #(16-12);	\
-	vshr.u32  W, W, #31;			\
-
-#define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vorr      tmp0, tmp0, W;		\
-	vshr.u32  W, tmp1, #30;			\
-
-#define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vshl.u32  tmp1, tmp1, #2;		\
-
-#define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	veor      tmp0, tmp0, W;		\
-
-#define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	veor      W, tmp0, tmp1;		\
-
-#define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vadd.u32  tmp0, W, curK;		\
-
-#define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vst1.32   {tmp0}, [RWK];
-
-
-/********* Precalc macros for rounds 32-79 ************************************/
-
-#define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	veor W, W_m28; \
-
-#define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vext.8 tmp0, W_m08, W_m04, #8; \
-
-#define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	veor W, W_m16; \
-
-#define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	veor W, tmp0; \
-
-#define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	add RWK, sp, #(WK_offs(i&~3)); \
-
-#define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vshl.u32 tmp1, W, #2; \
-
-#define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vshr.u32 tmp0, W, #30; \
-
-#define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vorr W, tmp0, tmp1; \
-
-#define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vadd.u32 tmp0, W, curK; \
-
-#define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vst1.32 {tmp0}, [RWK];
-
-
-/*
- * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
- *
- * unsigned int
- * sha1_transform_neon (void *ctx, const unsigned char *data,
- *                      unsigned int nblks)
- */
-.align 3
-ENTRY(sha1_transform_neon)
-  /* input:
-   *	r0: ctx, CTX
-   *	r1: data (64*nblks bytes)
-   *	r2: nblks
-   */
-
-  cmp RNBLKS, #0;
-  beq .Ldo_nothing;
-
-  push {r4-r12, lr};
-  /*vpush {q4-q7};*/
-
-  adr RT3, .LK_VEC;
-
-  mov ROLDSTACK, sp;
-
-  /* Align stack. */
-  sub RT0, sp, #(16*4);
-  and RT0, #(~(16-1));
-  mov sp, RT0;
-
-  vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */
-
-  /* Get the values of the chaining variables. */
-  ldm RSTATE, {_a-_e};
-
-  vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */
-
-#undef curK
-#define curK qK1
-  /* Precalc 0-15. */
-  W_PRECALC_00_15();
-
-.Loop:
-  /* Transform 0-15 + Precalc 16-31. */
-  _R( _a, _b, _c, _d, _e, F1,  0,
-      WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16,
-      W4, W5, W6, W7, W0, _, _, _ );
-  _R( _e, _a, _b, _c, _d, F1,  1,
-      WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16,
-      W4, W5, W6, W7, W0, _, _, _ );
-  _R( _d, _e, _a, _b, _c, F1,  2,
-      WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16,
-      W4, W5, W6, W7, W0, _, _, _ );
-  _R( _c, _d, _e, _a, _b, F1,  3,
-      WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16,
-      W4, W5, W6, W7, W0, _, _, _ );
-
-#undef curK
-#define curK qK2
-  _R( _b, _c, _d, _e, _a, F1,  4,
-      WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20,
-      W3, W4, W5, W6, W7, _, _, _ );
-  _R( _a, _b, _c, _d, _e, F1,  5,
-      WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20,
-      W3, W4, W5, W6, W7, _, _, _ );
-  _R( _e, _a, _b, _c, _d, F1,  6,
-      WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20,
-      W3, W4, W5, W6, W7, _, _, _ );
-  _R( _d, _e, _a, _b, _c, F1,  7,
-      WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20,
-      W3, W4, W5, W6, W7, _, _, _ );
-
-  _R( _c, _d, _e, _a, _b, F1,  8,
-      WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24,
-      W2, W3, W4, W5, W6, _, _, _ );
-  _R( _b, _c, _d, _e, _a, F1,  9,
-      WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24,
-      W2, W3, W4, W5, W6, _, _, _ );
-  _R( _a, _b, _c, _d, _e, F1, 10,
-      WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24,
-      W2, W3, W4, W5, W6, _, _, _ );
-  _R( _e, _a, _b, _c, _d, F1, 11,
-      WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24,
-      W2, W3, W4, W5, W6, _, _, _ );
-
-  _R( _d, _e, _a, _b, _c, F1, 12,
-      WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28,
-      W1, W2, W3, W4, W5, _, _, _ );
-  _R( _c, _d, _e, _a, _b, F1, 13,
-      WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28,
-      W1, W2, W3, W4, W5, _, _, _ );
-  _R( _b, _c, _d, _e, _a, F1, 14,
-      WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28,
-      W1, W2, W3, W4, W5, _, _, _ );
-  _R( _a, _b, _c, _d, _e, F1, 15,
-      WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28,
-      W1, W2, W3, W4, W5, _, _, _ );
-
-  /* Transform 16-63 + Precalc 32-79. */
-  _R( _e, _a, _b, _c, _d, F1, 16,
-      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32,
-      W0, W1, W2, W3, W4, W5, W6, W7);
-  _R( _d, _e, _a, _b, _c, F1, 17,
-      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32,
-      W0, W1, W2, W3, W4, W5, W6, W7);
-  _R( _c, _d, _e, _a, _b, F1, 18,
-      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 32,
-      W0, W1, W2, W3, W4, W5, W6, W7);
-  _R( _b, _c, _d, _e, _a, F1, 19,
-      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 32,
-      W0, W1, W2, W3, W4, W5, W6, W7);
-
-  _R( _a, _b, _c, _d, _e, F2, 20,
-      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36,
-      W7, W0, W1, W2, W3, W4, W5, W6);
-  _R( _e, _a, _b, _c, _d, F2, 21,
-      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36,
-      W7, W0, W1, W2, W3, W4, W5, W6);
-  _R( _d, _e, _a, _b, _c, F2, 22,
-      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 36,
-      W7, W0, W1, W2, W3, W4, W5, W6);
-  _R( _c, _d, _e, _a, _b, F2, 23,
-      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 36,
-      W7, W0, W1, W2, W3, W4, W5, W6);
-
-#undef curK
-#define curK qK3
-  _R( _b, _c, _d, _e, _a, F2, 24,
-      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40,
-      W6, W7, W0, W1, W2, W3, W4, W5);
-  _R( _a, _b, _c, _d, _e, F2, 25,
-      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40,
-      W6, W7, W0, W1, W2, W3, W4, W5);
-  _R( _e, _a, _b, _c, _d, F2, 26,
-      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 40,
-      W6, W7, W0, W1, W2, W3, W4, W5);
-  _R( _d, _e, _a, _b, _c, F2, 27,
-      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 40,
-      W6, W7, W0, W1, W2, W3, W4, W5);
-
-  _R( _c, _d, _e, _a, _b, F2, 28,
-      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44,
-      W5, W6, W7, W0, W1, W2, W3, W4);
-  _R( _b, _c, _d, _e, _a, F2, 29,
-      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44,
-      W5, W6, W7, W0, W1, W2, W3, W4);
-  _R( _a, _b, _c, _d, _e, F2, 30,
-      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 44,
-      W5, W6, W7, W0, W1, W2, W3, W4);
-  _R( _e, _a, _b, _c, _d, F2, 31,
-      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 44,
-      W5, W6, W7, W0, W1, W2, W3, W4);
-
-  _R( _d, _e, _a, _b, _c, F2, 32,
-      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48,
-      W4, W5, W6, W7, W0, W1, W2, W3);
-  _R( _c, _d, _e, _a, _b, F2, 33,
-      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48,
-      W4, W5, W6, W7, W0, W1, W2, W3);
-  _R( _b, _c, _d, _e, _a, F2, 34,
-      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 48,
-      W4, W5, W6, W7, W0, W1, W2, W3);
-  _R( _a, _b, _c, _d, _e, F2, 35,
-      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 48,
-      W4, W5, W6, W7, W0, W1, W2, W3);
-
-  _R( _e, _a, _b, _c, _d, F2, 36,
-      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52,
-      W3, W4, W5, W6, W7, W0, W1, W2);
-  _R( _d, _e, _a, _b, _c, F2, 37,
-      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52,
-      W3, W4, W5, W6, W7, W0, W1, W2);
-  _R( _c, _d, _e, _a, _b, F2, 38,
-      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 52,
-      W3, W4, W5, W6, W7, W0, W1, W2);
-  _R( _b, _c, _d, _e, _a, F2, 39,
-      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 52,
-      W3, W4, W5, W6, W7, W0, W1, W2);
-
-  _R( _a, _b, _c, _d, _e, F3, 40,
-      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56,
-      W2, W3, W4, W5, W6, W7, W0, W1);
-  _R( _e, _a, _b, _c, _d, F3, 41,
-      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56,
-      W2, W3, W4, W5, W6, W7, W0, W1);
-  _R( _d, _e, _a, _b, _c, F3, 42,
-      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 56,
-      W2, W3, W4, W5, W6, W7, W0, W1);
-  _R( _c, _d, _e, _a, _b, F3, 43,
-      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 56,
-      W2, W3, W4, W5, W6, W7, W0, W1);
-
-#undef curK
-#define curK qK4
-  _R( _b, _c, _d, _e, _a, F3, 44,
-      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60,
-      W1, W2, W3, W4, W5, W6, W7, W0);
-  _R( _a, _b, _c, _d, _e, F3, 45,
-      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60,
-      W1, W2, W3, W4, W5, W6, W7, W0);
-  _R( _e, _a, _b, _c, _d, F3, 46,
-      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 60,
-      W1, W2, W3, W4, W5, W6, W7, W0);
-  _R( _d, _e, _a, _b, _c, F3, 47,
-      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 60,
-      W1, W2, W3, W4, W5, W6, W7, W0);
-
-  _R( _c, _d, _e, _a, _b, F3, 48,
-      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64,
-      W0, W1, W2, W3, W4, W5, W6, W7);
-  _R( _b, _c, _d, _e, _a, F3, 49,
-      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64,
-      W0, W1, W2, W3, W4, W5, W6, W7);
-  _R( _a, _b, _c, _d, _e, F3, 50,
-      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 64,
-      W0, W1, W2, W3, W4, W5, W6, W7);
-  _R( _e, _a, _b, _c, _d, F3, 51,
-      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 64,
-      W0, W1, W2, W3, W4, W5, W6, W7);
-
-  _R( _d, _e, _a, _b, _c, F3, 52,
-      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68,
-      W7, W0, W1, W2, W3, W4, W5, W6);
-  _R( _c, _d, _e, _a, _b, F3, 53,
-      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68,
-      W7, W0, W1, W2, W3, W4, W5, W6);
-  _R( _b, _c, _d, _e, _a, F3, 54,
-      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 68,
-      W7, W0, W1, W2, W3, W4, W5, W6);
-  _R( _a, _b, _c, _d, _e, F3, 55,
-      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 68,
-      W7, W0, W1, W2, W3, W4, W5, W6);
-
-  _R( _e, _a, _b, _c, _d, F3, 56,
-      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72,
-      W6, W7, W0, W1, W2, W3, W4, W5);
-  _R( _d, _e, _a, _b, _c, F3, 57,
-      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72,
-      W6, W7, W0, W1, W2, W3, W4, W5);
-  _R( _c, _d, _e, _a, _b, F3, 58,
-      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 72,
-      W6, W7, W0, W1, W2, W3, W4, W5);
-  _R( _b, _c, _d, _e, _a, F3, 59,
-      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 72,
-      W6, W7, W0, W1, W2, W3, W4, W5);
-
-  subs RNBLKS, #1;
-
-  _R( _a, _b, _c, _d, _e, F4, 60,
-      WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76,
-      W5, W6, W7, W0, W1, W2, W3, W4);
-  _R( _e, _a, _b, _c, _d, F4, 61,
-      WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76,
-      W5, W6, W7, W0, W1, W2, W3, W4);
-  _R( _d, _e, _a, _b, _c, F4, 62,
-      WPRECALC_32_79_6, dummy,            WPRECALC_32_79_7, 76,
-      W5, W6, W7, W0, W1, W2, W3, W4);
-  _R( _c, _d, _e, _a, _b, F4, 63,
-      WPRECALC_32_79_8, dummy,            WPRECALC_32_79_9, 76,
-      W5, W6, W7, W0, W1, W2, W3, W4);
-
-  beq .Lend;
-
-  /* Transform 64-79 + Precalc 0-15 of next block. */
-#undef curK
-#define curK qK1
-  _R( _b, _c, _d, _e, _a, F4, 64,
-      WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ );
-  _R( _a, _b, _c, _d, _e, F4, 65,
-      WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ );
-  _R( _e, _a, _b, _c, _d, F4, 66,
-      WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ );
-  _R( _d, _e, _a, _b, _c, F4, 67,
-      WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ );
-
-  _R( _c, _d, _e, _a, _b, F4, 68,
-      dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
-  _R( _b, _c, _d, _e, _a, F4, 69,
-      dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
-  _R( _a, _b, _c, _d, _e, F4, 70,
-      WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ );
-  _R( _e, _a, _b, _c, _d, F4, 71,
-      WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ );
-
-  _R( _d, _e, _a, _b, _c, F4, 72,
-      dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
-  _R( _c, _d, _e, _a, _b, F4, 73,
-      dummy,            dummy, dummy, _, _, _, _, _, _, _, _, _ );
-  _R( _b, _c, _d, _e, _a, F4, 74,
-      WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ );
-  _R( _a, _b, _c, _d, _e, F4, 75,
-      WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ );
-
-  _R( _e, _a, _b, _c, _d, F4, 76,
-      WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ );
-  _R( _d, _e, _a, _b, _c, F4, 77,
-      WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ );
-  _R( _c, _d, _e, _a, _b, F4, 78,
-      WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ );
-  _R( _b, _c, _d, _e, _a, F4, 79,
-      WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ );
-
-  /* Update the chaining variables. */
-  ldm RSTATE, {RT0-RT3};
-  add _a, RT0;
-  ldr RT0, [RSTATE, #state_h4];
-  add _b, RT1;
-  add _c, RT2;
-  add _d, RT3;
-  add _e, RT0;
-  stm RSTATE, {_a-_e};
-
-  b .Loop;
-
-.Lend:
-  /* Transform 64-79 */
-  R( _b, _c, _d, _e, _a, F4, 64 );
-  R( _a, _b, _c, _d, _e, F4, 65 );
-  R( _e, _a, _b, _c, _d, F4, 66 );
-  R( _d, _e, _a, _b, _c, F4, 67 );
-  R( _c, _d, _e, _a, _b, F4, 68 );
-  R( _b, _c, _d, _e, _a, F4, 69 );
-  R( _a, _b, _c, _d, _e, F4, 70 );
-  R( _e, _a, _b, _c, _d, F4, 71 );
-  R( _d, _e, _a, _b, _c, F4, 72 );
-  R( _c, _d, _e, _a, _b, F4, 73 );
-  R( _b, _c, _d, _e, _a, F4, 74 );
-  R( _a, _b, _c, _d, _e, F4, 75 );
-  R( _e, _a, _b, _c, _d, F4, 76 );
-  R( _d, _e, _a, _b, _c, F4, 77 );
-  R( _c, _d, _e, _a, _b, F4, 78 );
-  R( _b, _c, _d, _e, _a, F4, 79 );
-
-  mov sp, ROLDSTACK;
-
-  /* Update the chaining variables. */
-  ldm RSTATE, {RT0-RT3};
-  add _a, RT0;
-  ldr RT0, [RSTATE, #state_h4];
-  add _b, RT1;
-  add _c, RT2;
-  add _d, RT3;
-  /*vpop {q4-q7};*/
-  add _e, RT0;
-  stm RSTATE, {_a-_e};
-
-  pop {r4-r12, pc};
-
-.Ldo_nothing:
-  bx lr
-ENDPROC(sha1_transform_neon)
diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S
deleted file mode 100644
index 49a74a441aec78e4749a207b741ea8b514b4bde7..0000000000000000000000000000000000000000
--- a/arch/arm/crypto/sha1-ce-core.S
+++ /dev/null
@@ -1,122 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
- *
- * Copyright (C) 2015 Linaro Ltd.
- * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.text
-	.fpu		crypto-neon-fp-armv8
-
-	k0		.req	q0
-	k1		.req	q1
-	k2		.req	q2
-	k3		.req	q3
-
-	ta0		.req	q4
-	ta1		.req	q5
-	tb0		.req	q5
-	tb1		.req	q4
-
-	dga		.req	q6
-	dgb		.req	q7
-	dgbs		.req	s28
-
-	dg0		.req	q12
-	dg1a0		.req	q13
-	dg1a1		.req	q14
-	dg1b0		.req	q14
-	dg1b1		.req	q13
-
-	.macro		add_only, op, ev, rc, s0, dg1
-	.ifnb		\s0
-	vadd.u32	tb\ev, q\s0, \rc
-	.endif
-	sha1h.32	dg1b\ev, dg0
-	.ifb		\dg1
-	sha1\op\().32	dg0, dg1a\ev, ta\ev
-	.else
-	sha1\op\().32	dg0, \dg1, ta\ev
-	.endif
-	.endm
-
-	.macro		add_update, op, ev, rc, s0, s1, s2, s3, dg1
-	sha1su0.32	q\s0, q\s1, q\s2
-	add_only	\op, \ev, \rc, \s1, \dg1
-	sha1su1.32	q\s0, q\s3
-	.endm
-
-	.align		6
-.Lsha1_rcon:
-	.word		0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999
-	.word		0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1
-	.word		0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc
-	.word		0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6
-
-	/*
-	 * void sha1_ce_transform(struct sha1_state *sst, u8 const *src,
-	 *			  int blocks);
-	 */
-ENTRY(sha1_ce_transform)
-	/* load round constants */
-	adr		ip, .Lsha1_rcon
-	vld1.32		{k0-k1}, [ip, :128]!
-	vld1.32		{k2-k3}, [ip, :128]
-
-	/* load state */
-	vld1.32		{dga}, [r0]
-	vldr		dgbs, [r0, #16]
-
-	/* load input */
-0:	vld1.32		{q8-q9}, [r1]!
-	vld1.32		{q10-q11}, [r1]!
-	subs		r2, r2, #1
-
-#ifndef CONFIG_CPU_BIG_ENDIAN
-	vrev32.8	q8, q8
-	vrev32.8	q9, q9
-	vrev32.8	q10, q10
-	vrev32.8	q11, q11
-#endif
-
-	vadd.u32	ta0, q8, k0
-	vmov		dg0, dga
-
-	add_update	c, 0, k0,  8,  9, 10, 11, dgb
-	add_update	c, 1, k0,  9, 10, 11,  8
-	add_update	c, 0, k0, 10, 11,  8,  9
-	add_update	c, 1, k0, 11,  8,  9, 10
-	add_update	c, 0, k1,  8,  9, 10, 11
-
-	add_update	p, 1, k1,  9, 10, 11,  8
-	add_update	p, 0, k1, 10, 11,  8,  9
-	add_update	p, 1, k1, 11,  8,  9, 10
-	add_update	p, 0, k1,  8,  9, 10, 11
-	add_update	p, 1, k2,  9, 10, 11,  8
-
-	add_update	m, 0, k2, 10, 11,  8,  9
-	add_update	m, 1, k2, 11,  8,  9, 10
-	add_update	m, 0, k2,  8,  9, 10, 11
-	add_update	m, 1, k2,  9, 10, 11,  8
-	add_update	m, 0, k3, 10, 11,  8,  9
-
-	add_update	p, 1, k3, 11,  8,  9, 10
-	add_only	p, 0, k3,  9
-	add_only	p, 1, k3, 10
-	add_only	p, 0, k3, 11
-	add_only	p, 1
-
-	/* update state */
-	vadd.u32	dga, dga, dg0
-	vadd.u32	dgb, dgb, dg1a0
-	bne		0b
-
-	/* store new state */
-	vst1.32		{dga}, [r0]
-	vstr		dgbs, [r0, #16]
-	bx		lr
-ENDPROC(sha1_ce_transform)
diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S
deleted file mode 100644
index 4ad517577e230ddd04d7891f35b5ec7e7196931e..0000000000000000000000000000000000000000
--- a/arch/arm/crypto/sha2-ce-core.S
+++ /dev/null
@@ -1,122 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * sha2-ce-core.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions
- *
- * Copyright (C) 2015 Linaro Ltd.
- * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.text
-	.fpu		crypto-neon-fp-armv8
-
-	k0		.req	q7
-	k1		.req	q8
-	rk		.req	r3
-
-	ta0		.req	q9
-	ta1		.req	q10
-	tb0		.req	q10
-	tb1		.req	q9
-
-	dga		.req	q11
-	dgb		.req	q12
-
-	dg0		.req	q13
-	dg1		.req	q14
-	dg2		.req	q15
-
-	.macro		add_only, ev, s0
-	vmov		dg2, dg0
-	.ifnb		\s0
-	vld1.32		{k\ev}, [rk, :128]!
-	.endif
-	sha256h.32	dg0, dg1, tb\ev
-	sha256h2.32	dg1, dg2, tb\ev
-	.ifnb		\s0
-	vadd.u32	ta\ev, q\s0, k\ev
-	.endif
-	.endm
-
-	.macro		add_update, ev, s0, s1, s2, s3
-	sha256su0.32	q\s0, q\s1
-	add_only	\ev, \s1
-	sha256su1.32	q\s0, q\s2, q\s3
-	.endm
-
-	.align		6
-.Lsha256_rcon:
-	.word		0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
-	.word		0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
-	.word		0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
-	.word		0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
-	.word		0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
-	.word		0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
-	.word		0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
-	.word		0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
-	.word		0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
-	.word		0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
-	.word		0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
-	.word		0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
-	.word		0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
-	.word		0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
-	.word		0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
-	.word		0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-
-	/*
-	 * void sha2_ce_transform(struct sha256_state *sst, u8 const *src,
-				  int blocks);
-	 */
-ENTRY(sha2_ce_transform)
-	/* load state */
-	vld1.32		{dga-dgb}, [r0]
-
-	/* load input */
-0:	vld1.32		{q0-q1}, [r1]!
-	vld1.32		{q2-q3}, [r1]!
-	subs		r2, r2, #1
-
-#ifndef CONFIG_CPU_BIG_ENDIAN
-	vrev32.8	q0, q0
-	vrev32.8	q1, q1
-	vrev32.8	q2, q2
-	vrev32.8	q3, q3
-#endif
-
-	/* load first round constant */
-	adr		rk, .Lsha256_rcon
-	vld1.32		{k0}, [rk, :128]!
-
-	vadd.u32	ta0, q0, k0
-	vmov		dg0, dga
-	vmov		dg1, dgb
-
-	add_update	1, 0, 1, 2, 3
-	add_update	0, 1, 2, 3, 0
-	add_update	1, 2, 3, 0, 1
-	add_update	0, 3, 0, 1, 2
-	add_update	1, 0, 1, 2, 3
-	add_update	0, 1, 2, 3, 0
-	add_update	1, 2, 3, 0, 1
-	add_update	0, 3, 0, 1, 2
-	add_update	1, 0, 1, 2, 3
-	add_update	0, 1, 2, 3, 0
-	add_update	1, 2, 3, 0, 1
-	add_update	0, 3, 0, 1, 2
-
-	add_only	1, 1
-	add_only	0, 2
-	add_only	1, 3
-	add_only	0
-
-	/* update state */
-	vadd.u32	dga, dga, dg0
-	vadd.u32	dgb, dgb, dg1
-	bne		0b
-
-	/* store new state */
-	vst1.32		{dga-dgb}, [r0]
-	bx		lr
-ENDPROC(sha2_ce_transform)
diff --git a/arch/arm/include/asm/entry-macro-multi.S b/arch/arm/include/asm/entry-macro-multi.S
deleted file mode 100644
index dfc6bfa430121673015fb927349b161c94798217..0000000000000000000000000000000000000000
--- a/arch/arm/include/asm/entry-macro-multi.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/assembler.h>
-
-/*
- * Interrupt handling.  Preserves r7, r8, r9
- */
-	.macro	arch_irq_handler_default
-	get_irqnr_preamble r6, lr
-1:	get_irqnr_and_base r0, r2, r6, lr
-	movne	r1, sp
-	@
-	@ routine called with r0 = irq number, r1 = struct pt_regs *
-	@
-	badrne	lr, 1b
-	bne	asm_do_IRQ
-
-#ifdef CONFIG_SMP
-	/*
-	 * XXX
-	 *
-	 * this macro assumes that irqstat (r2) and base (r6) are
-	 * preserved from get_irqnr_and_base above
-	 */
-	ALT_SMP(test_for_ipi r0, r2, r6, lr)
-	ALT_UP_B(9997f)
-	movne	r1, sp
-	badrne	lr, 1b
-	bne	do_IPI
-#endif
-9997:
-	.endm
-
-	.macro	arch_irq_handler, symbol_name
-	.align	5
-	.global \symbol_name
-\symbol_name:
-	mov	r8, lr
-	arch_irq_handler_default
-	ret	r8
-	.endm
diff --git a/arch/arm/include/asm/hardware/entry-macro-iomd.S b/arch/arm/include/asm/hardware/entry-macro-iomd.S
deleted file mode 100644
index f7692731e514359a6a8fb66cb229444b9cf9fabe..0000000000000000000000000000000000000000
--- a/arch/arm/include/asm/hardware/entry-macro-iomd.S
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * arch/arm/include/asm/hardware/entry-macro-iomd.S
- *
- * Low-level IRQ helper macros for IOC/IOMD based platforms
- *
- * This file is licensed under  the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-/* IOC / IOMD based hardware */
-#include <asm/hardware/iomd.h>
-
-		.macro	get_irqnr_and_base, irqnr, irqstat, base, tmp
-		ldrb	\irqstat, [\base, #IOMD_IRQREQB]	@ get high priority first
-		ldr	\tmp, =irq_prio_h
-		teq	\irqstat, #0
-#ifdef IOMD_BASE
-		ldrbeq	\irqstat, [\base, #IOMD_DMAREQ]	@ get dma
-		addeq	\tmp, \tmp, #256		@ irq_prio_h table size
-		teqeq	\irqstat, #0
-		bne	2406f
-#endif
-		ldrbeq	\irqstat, [\base, #IOMD_IRQREQA]	@ get low priority
-		addeq	\tmp, \tmp, #256		@ irq_prio_d table size
-		teqeq	\irqstat, #0
-#ifdef IOMD_IRQREQC
-		ldrbeq	\irqstat, [\base, #IOMD_IRQREQC]
-		addeq	\tmp, \tmp, #256		@ irq_prio_l table size
-		teqeq	\irqstat, #0
-#endif
-#ifdef IOMD_IRQREQD
-		ldrbeq	\irqstat, [\base, #IOMD_IRQREQD]
-		addeq	\tmp, \tmp, #256		@ irq_prio_lc table size
-		teqeq	\irqstat, #0
-#endif
-2406:		ldrbne	\irqnr, [\tmp, \irqstat]	@ get IRQ number
-		.endm
-
-/*
- * Interrupt table (incorporates priority).  Please note that we
- * rely on the order of these tables (see above code).
- */
-		.align	5
-irq_prio_h:	.byte	 0, 8, 9, 8,10,10,10,10,11,11,11,11,10,10,10,10
-		.byte	12, 8, 9, 8,10,10,10,10,11,11,11,11,10,10,10,10
-		.byte	13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
-		.byte	13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
-		.byte	14,14,14,14,10,10,10,10,11,11,11,11,10,10,10,10
-		.byte	14,14,14,14,10,10,10,10,11,11,11,11,10,10,10,10
-		.byte	13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
-		.byte	13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
-		.byte	15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10
-		.byte	15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10
-		.byte	13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
-		.byte	13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
-		.byte	15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10
-		.byte	15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10
-		.byte	13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
-		.byte	13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
-#ifdef IOMD_BASE
-irq_prio_d:	.byte	 0,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16
-		.byte	20,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16
-		.byte	21,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16
-		.byte	21,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16
-		.byte	22,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16
-		.byte	22,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16
-		.byte	21,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16
-		.byte	21,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16
-		.byte	23,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16
-		.byte	23,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16
-		.byte	21,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16
-		.byte	21,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16
-		.byte	22,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16
-		.byte	22,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16
-		.byte	21,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16
-		.byte	21,16,17,16,18,16,17,16,19,16,17,16,18,16,17,16
-#endif
-irq_prio_l:	.byte	 0, 0, 1, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3
-		.byte	 4, 0, 1, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3
-		.byte	 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
-		.byte	 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
-		.byte	 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 3, 3, 3
-		.byte	 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 3, 3, 3
-		.byte	 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
-		.byte	 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
-		.byte	 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
-		.byte	 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
-		.byte	 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
-		.byte	 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
-		.byte	 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
-		.byte	 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
-		.byte	 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
-		.byte	 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
-#ifdef IOMD_IRQREQC
-irq_prio_lc:	.byte	24,24,25,24,26,26,26,26,27,27,27,27,27,27,27,27
-		.byte	28,24,25,24,26,26,26,26,27,27,27,27,27,27,27,27
-		.byte	29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29
-		.byte	29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29
-		.byte	30,30,30,30,30,30,30,30,27,27,27,27,27,27,27,27
-		.byte	30,30,30,30,30,30,30,30,27,27,27,27,27,27,27,27
-		.byte	29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29
-		.byte	29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29
-		.byte	31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31
-		.byte	31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31
-		.byte	31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31
-		.byte	31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31
-		.byte	31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31
-		.byte	31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31
-		.byte	31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31
-		.byte	31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31
-#endif
-#ifdef IOMD_IRQREQD
-irq_prio_ld:	.byte	40,40,41,40,42,42,42,42,43,43,43,43,43,43,43,43
-		.byte	44,40,41,40,42,42,42,42,43,43,43,43,43,43,43,43
-		.byte	45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45
-		.byte	45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45
-		.byte	46,46,46,46,46,46,46,46,43,43,43,43,43,43,43,43
-		.byte	46,46,46,46,46,46,46,46,43,43,43,43,43,43,43,43
-		.byte	45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45
-		.byte	45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45
-		.byte	47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47
-		.byte	47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47
-		.byte	47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47
-		.byte	47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47
-		.byte	47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47
-		.byte	47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47
-		.byte	47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47
-		.byte	47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47
-#endif
-
diff --git a/arch/arm/include/debug/8250.S b/arch/arm/include/debug/8250.S
deleted file mode 100644
index e4a036f082c29c722e69f0cfbb0b9230c0653b56..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/8250.S
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/include/debug/8250.S
- *
- *  Copyright (C) 1994-2013 Russell King
- */
-#include <linux/serial_reg.h>
-
-		.macro	addruart, rp, rv, tmp
-		ldr	\rp, =CONFIG_DEBUG_UART_PHYS
-		ldr	\rv, =CONFIG_DEBUG_UART_VIRT
-		.endm
-
-#ifdef CONFIG_DEBUG_UART_8250_WORD
-		.macro	store, rd, rx:vararg
-	 ARM_BE8(rev \rd, \rd)
-		str	\rd, \rx
-	 ARM_BE8(rev \rd, \rd)
-		.endm
-
-		.macro	load, rd, rx:vararg
-		ldr	\rd, \rx
-	ARM_BE8(rev \rd, \rd)
-		.endm
-#else
-		.macro	store, rd, rx:vararg
-		strb	\rd, \rx
-		.endm
-
-		.macro	load, rd, rx:vararg
-		ldrb	\rd, \rx
-		.endm
-#endif
-
-#define UART_SHIFT CONFIG_DEBUG_UART_8250_SHIFT
-
-		.macro	senduart,rd,rx
-		store	\rd, [\rx, #UART_TX << UART_SHIFT]
-		.endm
-
-		.macro	busyuart,rd,rx
-1002:		load	\rd, [\rx, #UART_LSR << UART_SHIFT]
-		and	\rd, \rd, #UART_LSR_TEMT | UART_LSR_THRE
-		teq	\rd, #UART_LSR_TEMT | UART_LSR_THRE
-		bne	1002b
-		.endm
-
-		.macro	waituart,rd,rx
-#ifdef CONFIG_DEBUG_UART_8250_FLOW_CONTROL
-1001:		load	\rd, [\rx, #UART_MSR << UART_SHIFT]
-		tst	\rd, #UART_MSR_CTS
-		beq	1001b
-#endif
-		.endm
diff --git a/arch/arm/include/debug/asm9260.S b/arch/arm/include/debug/asm9260.S
deleted file mode 100644
index 0da1eb6253318dcd18c4c81eb896e865712041c1..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/asm9260.S
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* Debugging macro include header
- *
- *  Copyright (C) 1994-1999 Russell King
- *  Moved from linux/arch/arm/kernel/debug.S by Ben Dooks
- *  Modified for ASM9260 by Oleksij Remepl <linux@rempel-privat.de>
- */
-
-		.macro	addruart, rp, rv, tmp
-		ldr	\rp, = CONFIG_DEBUG_UART_PHYS
-		ldr	\rv, = CONFIG_DEBUG_UART_VIRT
-		.endm
-
-		.macro	waituart,rd,rx
-		.endm
-
-		.macro	senduart,rd,rx
-		str	\rd, [\rx, #0x50]	@ TXDATA
-		.endm
-
-		.macro	busyuart,rd,rx
-1002:		ldr	\rd, [\rx, #0x60]	@ STAT
-		tst	\rd, #1 << 27		@ TXEMPTY
-		beq	1002b			@ wait until transmit done
-		.endm
diff --git a/arch/arm/include/debug/at91.S b/arch/arm/include/debug/at91.S
deleted file mode 100644
index 6c91cbaaa20be8a7a943703ae062a54c88279c8b..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/at91.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) 2003-2005 SAN People
- *
- * Debugging macro include header
-*/
-
-#define AT91_DBGU_SR		(0x14)	/* Status Register */
-#define AT91_DBGU_THR		(0x1c)	/* Transmitter Holding Register */
-#define AT91_DBGU_TXRDY		(1 << 1)	/* Transmitter Ready */
-#define AT91_DBGU_TXEMPTY	(1 << 9)	/* Transmitter Empty */
-
-	.macro	addruart, rp, rv, tmp
-	ldr	\rp, =CONFIG_DEBUG_UART_PHYS		@ System peripherals (phys address)
-	ldr	\rv, =CONFIG_DEBUG_UART_VIRT		@ System peripherals (virt address)
-	.endm
-
-	.macro	senduart,rd,rx
-	strb	\rd, [\rx, #(AT91_DBGU_THR)]		@ Write to Transmitter Holding Register
-	.endm
-
-	.macro	waituart,rd,rx
-1001:	ldr	\rd, [\rx, #(AT91_DBGU_SR)]		@ Read Status Register
-	tst	\rd, #AT91_DBGU_TXRDY			@ DBGU_TXRDY = 1 when ready to transmit
-	beq	1001b
-	.endm
-
-	.macro	busyuart,rd,rx
-1001:	ldr	\rd, [\rx, #(AT91_DBGU_SR)]		@ Read Status Register
-	tst	\rd, #AT91_DBGU_TXEMPTY			@ DBGU_TXEMPTY = 1 when transmission complete
-	beq	1001b
-	.endm
-
diff --git a/arch/arm/include/debug/bcm63xx.S b/arch/arm/include/debug/bcm63xx.S
deleted file mode 100644
index 06a8962273960078c3749663396a0550a9d158db..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/bcm63xx.S
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Broadcom BCM63xx low-level UART debug
- *
- * Copyright (C) 2014 Broadcom Corporation
- */
-
-#include <linux/serial_bcm63xx.h>
-
-	.macro	addruart, rp, rv, tmp
-	ldr	\rp, =CONFIG_DEBUG_UART_PHYS
-	ldr	\rv, =CONFIG_DEBUG_UART_VIRT
-	.endm
-
-	.macro	senduart, rd, rx
-	/* word access do not work */
-	strb	\rd, [\rx, #UART_FIFO_REG]
-	.endm
-
-	.macro	waituart, rd, rx
-1001:	ldr	\rd, [\rx, #UART_IR_REG]
-	tst	\rd, #(1 << UART_IR_TXEMPTY)
-	beq	1001b
-	.endm
-
-	.macro	busyuart, rd, rx
-1002:	ldr	\rd, [\rx, #UART_IR_REG]
-	tst	\rd, #(1 << UART_IR_TXTRESH)
-	beq	1002b
-	.endm
diff --git a/arch/arm/include/debug/brcmstb.S b/arch/arm/include/debug/brcmstb.S
deleted file mode 100644
index bf8702ee8f86dff04e7f7d905b64c155c03ec912..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/brcmstb.S
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (C) 2016 Broadcom
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-#include <linux/serial_reg.h>
-#include <asm/cputype.h>
-
-/* Physical register offset and virtual register offset */
-#define REG_PHYS_BASE		0xf0000000
-#define REG_PHYS_BASE_V7	0x08000000
-#define REG_VIRT_BASE		0xfc000000
-#define REG_PHYS_ADDR(x)	((x) + REG_PHYS_BASE)
-#define REG_PHYS_ADDR_V7(x)	((x) + REG_PHYS_BASE_V7)
-
-/* Product id can be read from here */
-#define SUN_TOP_CTRL_BASE	REG_PHYS_ADDR(0x404000)
-#define SUN_TOP_CTRL_BASE_V7	REG_PHYS_ADDR_V7(0x404000)
-
-#define UARTA_3390		REG_PHYS_ADDR(0x40a900)
-#define UARTA_7250		REG_PHYS_ADDR(0x40b400)
-#define UARTA_7255		REG_PHYS_ADDR(0x40c000)
-#define UARTA_7260		UARTA_7255
-#define UARTA_7268		UARTA_7255
-#define UARTA_7271		UARTA_7268
-#define UARTA_7278		REG_PHYS_ADDR_V7(0x40c000)
-#define UARTA_7364		REG_PHYS_ADDR(0x40b000)
-#define UARTA_7366		UARTA_7364
-#define UARTA_74371		REG_PHYS_ADDR(0x406b00)
-#define UARTA_7439		REG_PHYS_ADDR(0x40a900)
-#define UARTA_7445		REG_PHYS_ADDR(0x40ab00)
-
-#define UART_SHIFT		2
-
-#define checkuart(rp, rv, family_id, family) \
-		/* Load family id */ \
-		ldr	rp, =family_id ; \
-		/* Compare SUN_TOP_CTRL value against it */ \
-		cmp	rp, rv ; \
-		/* Passed test, load address */ \
-		ldreq	rp, =UARTA_##family ; \
-		/* Jump to save UART address */ \
-		beq	91f
-
-		.macro  addruart, rp, rv, tmp
-		adr	\rp, 99f		@ actual addr of 99f
-		ldr	\rv, [\rp]		@ linked addr is stored there
-		sub	\rv, \rv, \rp		@ offset between the two
-		ldr	\rp, [\rp, #4]		@ linked brcmstb_uart_config
-		sub	\tmp, \rp, \rv		@ actual brcmstb_uart_config
-		ldr	\rp, [\tmp]		@ Load brcmstb_uart_config
-		cmp	\rp, #1			@ needs initialization?
-		bne	100f			@ no; go load the addresses
-		mov	\rv, #0			@ yes; record init is done
-		str	\rv, [\tmp]
-
-		/* Check for V7 memory map if B53 */
-		mrc	p15, 0, \rv, c0, c0, 0	@ get Main ID register
-		ldr	\rp, =ARM_CPU_PART_MASK
-		and	\rv, \rv, \rp
-		ldr	\rp, =ARM_CPU_PART_BRAHMA_B53	@ check for B53 CPU
-		cmp	\rv, \rp
-		bne	10f
-
-		/* if PERIPHBASE doesn't overlap REG_PHYS_BASE use V7 map */
-		mrc	p15, 1, \rv, c15, c3, 0	@ get PERIPHBASE from CBAR
-		ands	\rv, \rv, #REG_PHYS_BASE
-		ldreq	\rp, =SUN_TOP_CTRL_BASE_V7
-
-		/* Check SUN_TOP_CTRL base */
-10:		ldrne	\rp, =SUN_TOP_CTRL_BASE	@ load SUN_TOP_CTRL PA
-		ldr	\rv, [\rp, #0]		@ get register contents
-ARM_BE8(	rev	\rv, \rv )
-		and	\rv, \rv, #0xffffff00	@ strip revision bits [7:0]
-
-		/* Chip specific detection starts here */
-20:		checkuart(\rp, \rv, 0x33900000, 3390)
-21:		checkuart(\rp, \rv, 0x72500000, 7250)
-22:		checkuart(\rp, \rv, 0x72550000, 7255)
-23:		checkuart(\rp, \rv, 0x72600000, 7260)
-24:		checkuart(\rp, \rv, 0x72680000, 7268)
-25:		checkuart(\rp, \rv, 0x72710000, 7271)
-26:		checkuart(\rp, \rv, 0x72780000, 7278)
-27:		checkuart(\rp, \rv, 0x73640000, 7364)
-28:		checkuart(\rp, \rv, 0x73660000, 7366)
-29:		checkuart(\rp, \rv, 0x07437100, 74371)
-30:		checkuart(\rp, \rv, 0x74390000, 7439)
-31:		checkuart(\rp, \rv, 0x74450000, 7445)
-
-		/* No valid UART found */
-90:		mov	\rp, #0
-		/* fall through */
-
-		/* Record whichever UART we chose */
-91:		str	\rp, [\tmp, #4]		@ Store in brcmstb_uart_phys
-		cmp	\rp, #0			@ Valid UART address?
-		bne	92f			@ Yes, go process it
-		str	\rp, [\tmp, #8]		@ Store 0 in brcmstb_uart_virt
-		b	100f			@ Done
-92:		and     \rv, \rp, #0xffffff	@ offset within 16MB section
-		add	\rv, \rv, #REG_VIRT_BASE
-		str	\rv, [\tmp, #8]		@ Store in brcmstb_uart_virt
-		b	100f
-
-		.align
-99:		.word	.
-		.word	brcmstb_uart_config
-		.ltorg
-
-		/* Load previously selected UART address */
-100:		ldr	\rp, [\tmp, #4]		@ Load brcmstb_uart_phys
-		ldr	\rv, [\tmp, #8]		@ Load brcmstb_uart_virt
-		.endm
-
-		.macro	store, rd, rx:vararg
-ARM_BE8(	rev	\rd, \rd )
-		str	\rd, \rx
-		.endm
-
-		.macro	load, rd, rx:vararg
-		ldr	\rd, \rx
-ARM_BE8(	rev	\rd, \rd )
-		.endm
-
-		.macro	senduart,rd,rx
-		store	\rd, [\rx, #UART_TX << UART_SHIFT]
-		.endm
-
-		.macro	busyuart,rd,rx
-1002:		load	\rd, [\rx, #UART_LSR << UART_SHIFT]
-		and	\rd, \rd, #UART_LSR_TEMT | UART_LSR_THRE
-		teq	\rd, #UART_LSR_TEMT | UART_LSR_THRE
-		bne	1002b
-		.endm
-
-		.macro	waituart,rd,rx
-		.endm
-
-/*
- * Storage for the state maintained by the macros above.
- *
- * In the kernel proper, this data is located in arch/arm/mach-bcm/brcmstb.c.
- * That's because this header is included from multiple files, and we only
- * want a single copy of the data. In particular, the UART probing code above
- * assumes it's running using physical addresses. This is true when this file
- * is included from head.o, but not when included from debug.o. So we need
- * to share the probe results between the two copies, rather than having
- * to re-run the probing again later.
- *
- * In the decompressor, we put the symbol/storage right here, since common.c
- * isn't included in the decompressor build. This symbol gets put in .text
- * even though it's really data, since .data is discarded from the
- * decompressor. Luckily, .text is writeable in the decompressor, unless
- * CONFIG_ZBOOT_ROM. That dependency is handled in arch/arm/Kconfig.debug.
- */
-#if defined(ZIMAGE)
-brcmstb_uart_config:
-	/* Debug UART initialization required */
-	.word 1
-	/* Debug UART physical address */
-	.word 0
-	/* Debug UART virtual address */
-	.word 0
-#endif
diff --git a/arch/arm/include/debug/clps711x.S b/arch/arm/include/debug/clps711x.S
deleted file mode 100644
index 774a67ac3877dd6baebb2dc8eefc96938feef191..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/clps711x.S
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2014 Alexander Shiyan <shc_work@mail.ru>
- */
-
-#ifndef CONFIG_DEBUG_CLPS711X_UART2
-#define CLPS711X_UART_PADDR	(0x80000000 + 0x0000)
-#define CLPS711X_UART_VADDR	(0xfeff4000 + 0x0000)
-#else
-#define CLPS711X_UART_PADDR	(0x80000000 + 0x1000)
-#define CLPS711X_UART_VADDR	(0xfeff4000 + 0x1000)
-#endif
-
-#define SYSFLG		(0x0140)
-#define SYSFLG_UBUSY	(1 << 11)
-#define UARTDR		(0x0480)
-
-	.macro	addruart, rp, rv, tmp
-	ldr	\rv, =CLPS711X_UART_VADDR
-	ldr	\rp, =CLPS711X_UART_PADDR
-	.endm
-
-	.macro	waituart,rd,rx
-	.endm
-
-	.macro	senduart,rd,rx
-	str	\rd, [\rx, #UARTDR]
-	.endm
-
-	.macro	busyuart,rd,rx
-1001:	ldr	\rd, [\rx, #SYSFLG]
-	tst	\rd, #SYSFLG_UBUSY
-	bne	1001b
-	.endm
diff --git a/arch/arm/include/debug/dc21285.S b/arch/arm/include/debug/dc21285.S
deleted file mode 100644
index d7e8c71706abd9293174e8727b6bc2c2865893bf..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/dc21285.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* arch/arm/mach-footbridge/include/mach/debug-macro.S
- *
- * Debugging macro include header
- *
- *  Copyright (C) 1994-1999 Russell King
- *  Moved from linux/arch/arm/kernel/debug.S by Ben Dooks
-*/
-
-#include <asm/hardware/dec21285.h>
-
-#include <mach/hardware.h>
-	/* For EBSA285 debugging */
-		.equ	dc21285_high, ARMCSR_BASE & 0xff000000
-		.equ	dc21285_low,  ARMCSR_BASE & 0x00ffffff
-
-		.macro	addruart, rp, rv, tmp
-		.if	dc21285_low
-		mov	\rp, #dc21285_low
-		.else
-		mov	\rp, #0
-		.endif
-		orr	\rv, \rp, #dc21285_high
-		orr	\rp, \rp, #0x42000000
-		.endm
-
-		.macro	senduart,rd,rx
-		str	\rd, [\rx, #0x160]	@ UARTDR
-		.endm
-
-		.macro	busyuart,rd,rx
-1001:		ldr	\rd, [\rx, #0x178]	@ UARTFLG
-		tst	\rd, #1 << 3
-		bne	1001b
-		.endm
-
-		.macro	waituart,rd,rx
-		.endm
diff --git a/arch/arm/include/debug/digicolor.S b/arch/arm/include/debug/digicolor.S
deleted file mode 100644
index 256f5f4da2759d4a1e4f72ef3c5f0b9ab3c90478..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/digicolor.S
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Debugging macro include header for Conexant Digicolor USART
- *
- * Copyright (C) 2014 Paradox Innovation Ltd.
-*/
-
-#define UA0_STATUS	0x0742
-#define UA0_EMI_REC	0x0744
-
-#define UA0_STATUS_TX_READY	0x40
-
-#ifdef CONFIG_DEBUG_UART_PHYS
-		.macro	addruart, rp, rv, tmp
-		ldr	\rp, =CONFIG_DEBUG_UART_PHYS
-		ldr	\rv, =CONFIG_DEBUG_UART_VIRT
-		.endm
-#endif
-
-		.macro	senduart,rd,rx
-		strb	\rd, [\rx, #UA0_EMI_REC]
-		.endm
-
-		.macro	waituart,rd,rx
-		.endm
-
-	.macro	busyuart,rd,rx
-1001:		ldrb	\rd, [\rx, #UA0_STATUS]
-		tst	\rd, #UA0_STATUS_TX_READY
-		beq	1001b
-		.endm
diff --git a/arch/arm/include/debug/efm32.S b/arch/arm/include/debug/efm32.S
deleted file mode 100644
index 5ed5028306f4a605bff81df0782e5a8346773e45..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/efm32.S
+++ /dev/null
@@ -1,42 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 Pengutronix
- * Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
- */
-
-#define UARTn_CMD		0x000c
-#define UARTn_CMD_TXEN			0x0004
-
-#define	UARTn_STATUS		0x0010
-#define	UARTn_STATUS_TXC		0x0020
-#define	UARTn_STATUS_TXBL		0x0040
-
-#define	UARTn_TXDATA		0x0034
-
-		.macro	addruart, rx, tmp, tmp2
-		ldr	\rx, =(CONFIG_DEBUG_UART_PHYS)
-
-		/*
-		 * enable TX. The driver might disable it to save energy. We
-		 * don't care about disabling at the end as during debug power
-		 * consumption isn't that important.
-		 */
-		ldr	\tmp, =(UARTn_CMD_TXEN)
-		str	\tmp, [\rx, #UARTn_CMD]
-		.endm
-
-		.macro	senduart,rd,rx
-		strb	\rd, [\rx, #UARTn_TXDATA]
-		.endm
-
-		.macro	waituart,rd,rx
-1001:		ldr	\rd, [\rx, #UARTn_STATUS]
-		tst	\rd, #UARTn_STATUS_TXBL
-		beq	1001b
-		.endm
-
-		.macro	busyuart,rd,rx
-1001:		ldr	\rd, [\rx, UARTn_STATUS]
-		tst	\rd, #UARTn_STATUS_TXC
-		bne	1001b
-		.endm
diff --git a/arch/arm/include/debug/exynos.S b/arch/arm/include/debug/exynos.S
deleted file mode 100644
index 74b56769f9cb3b6faf3f58940dbb3dbe2b6f40d9..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/exynos.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com
- */
-
-/* pull in the relevant register and map files. */
-
-#define S3C_ADDR_BASE   0xF6000000
-#define S3C_VA_UART	S3C_ADDR_BASE + 0x01000000
-#define EXYNOS4_PA_UART	0x13800000
-#define EXYNOS5_PA_UART	0x12C00000
-
-	/* note, for the boot process to work we have to keep the UART
-	 * virtual address aligned to an 1MiB boundary for the L1
-	 * mapping the head code makes. We keep the UART virtual address
-	 * aligned and add in the offset when we load the value here.
-	 */
-
-	.macro addruart, rp, rv, tmp
-		mrc	p15, 0, \tmp, c0, c0, 0
-		and	\tmp, \tmp, #0xf0
-		teq	\tmp, #0xf0		@@ A15
-		beq	100f
-		mrc	p15, 0, \tmp, c0, c0, 5
-		and	\tmp, \tmp, #0xf00
-		teq	\tmp, #0x100		@@ A15 + A7 but boot to A7
-100:		ldreq	\rp, =EXYNOS5_PA_UART
-		movne	\rp, #EXYNOS4_PA_UART	@@ EXYNOS4
-		ldr	\rv, =S3C_VA_UART
-#if CONFIG_DEBUG_S3C_UART != 0
-		add	\rp, \rp, #(0x10000 * CONFIG_DEBUG_S3C_UART)
-		add	\rv, \rv, #(0x10000 * CONFIG_DEBUG_S3C_UART)
-#endif
-	.endm
-
-#define fifo_full fifo_full_s5pv210
-#define fifo_level fifo_level_s5pv210
-
-#include <debug/samsung.S>
diff --git a/arch/arm/include/debug/icedcc.S b/arch/arm/include/debug/icedcc.S
deleted file mode 100644
index 74a0dd036a175edac05d931db5f132e9e18f5b53..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/icedcc.S
+++ /dev/null
@@ -1,86 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  arch/arm/include/debug/icedcc.S
- *
- *  Copyright (C) 1994-1999 Russell King
- */
-
-		@@ debug using ARM EmbeddedICE DCC channel
-
-		.macro	addruart, rp, rv, tmp
-		.endm
-
-#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
-
-		.macro	senduart, rd, rx
-		mcr	p14, 0, \rd, c0, c5, 0
-		.endm
-
-		.macro	busyuart, rd, rx
-1001:
-		mrc	p14, 0, \rx, c0, c1, 0
-		tst	\rx, #0x20000000
-		beq	1001b
-		.endm
-
-		.macro	waituart, rd, rx
-		mov	\rd, #0x2000000
-1001:
-		subs	\rd, \rd, #1
-		bmi	1002f
-		mrc	p14, 0, \rx, c0, c1, 0
-		tst	\rx, #0x20000000
-		bne	1001b
-1002:
-		.endm
-
-#elif defined(CONFIG_CPU_XSCALE)
-
-		.macro	senduart, rd, rx
-		mcr	p14, 0, \rd, c8, c0, 0
-		.endm
-
-		.macro	busyuart, rd, rx
-1001:
-		mrc	p14, 0, \rx, c14, c0, 0
-		tst	\rx, #0x10000000
-		beq	1001b
-		.endm
-
-		.macro	waituart, rd, rx
-		mov	\rd, #0x10000000
-1001:
-		subs	\rd, \rd, #1
-		bmi	1002f
-		mrc	p14, 0, \rx, c14, c0, 0
-		tst	\rx, #0x10000000
-		bne	1001b
-1002:
-		.endm
-
-#else
-
-		.macro	senduart, rd, rx
-		mcr	p14, 0, \rd, c1, c0, 0
-		.endm
-
-		.macro	busyuart, rd, rx
-1001:
-		mrc	p14, 0, \rx, c0, c0, 0
-		tst	\rx, #2
-		beq	1001b
-
-		.endm
-
-		.macro	waituart, rd, rx
-		mov	\rd, #0x2000000
-1001:
-		subs	\rd, \rd, #1
-		bmi	1002f
-		mrc	p14, 0, \rx, c0, c0, 0
-		tst	\rx, #2
-		bne	1001b
-1002:
-		.endm
-
-#endif	/* CONFIG_CPU_V6 */
diff --git a/arch/arm/include/debug/imx.S b/arch/arm/include/debug/imx.S
deleted file mode 100644
index 1c1b9d1da4c8f5e28ece495f8c1d1c6bcce8807b..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/imx.S
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* arch/arm/mach-imx/include/mach/debug-macro.S
- *
- * Debugging macro include header
- *
- *  Copyright (C) 1994-1999 Russell King
- *  Moved from linux/arch/arm/kernel/debug.S by Ben Dooks
- */
-
-#include <asm/assembler.h>
-#include "imx-uart.h"
-
-/*
- * FIXME: This is a copy of IMX_IO_P2V in hardware.h, and needs to
- * stay sync with that.  It's hard to maintain, and should be fixed
- * globally for multi-platform build to use a fixed virtual address
- * for low-level debug uart port across platforms.
- */
-#define IMX_IO_P2V(x)	(						\
-			(((x) & 0x80000000) >> 7) |			\
-			(0xf4000000 +					\
-			(((x) & 0x50000000) >> 6) +			\
-			(((x) & 0x0b000000) >> 4) +			\
-			(((x) & 0x000fffff))))
-
-#define UART_VADDR	IMX_IO_P2V(UART_PADDR)
-
-		.macro	addruart, rp, rv, tmp
-		ldr	\rp, =UART_PADDR	@ physical
-		ldr	\rv, =UART_VADDR	@ virtual
-		.endm
-
-		.macro	senduart,rd,rx
-		ARM_BE8(rev \rd, \rd)
-		str	\rd, [\rx, #0x40]	@ TXDATA
-		.endm
-
-		.macro	waituart,rd,rx
-		.endm
-
-		.macro	busyuart,rd,rx
-1002:		ldr	\rd, [\rx, #0x98]	@ SR2
-		ARM_BE8(rev \rd, \rd)
-		tst	\rd, #1 << 3		@ TXDC
-		beq	1002b			@ wait until transmit done
-		.endm
diff --git a/arch/arm/include/debug/meson.S b/arch/arm/include/debug/meson.S
deleted file mode 100644
index 1e501a0054aea10ee77829ee6c9c32fcd6a64035..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/meson.S
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2014 Carlo Caione
- * Carlo Caione <carlo@caione.org>
- */
-
-#define MESON_AO_UART_WFIFO		0x0
-#define MESON_AO_UART_STATUS		0xc
-
-#define MESON_AO_UART_TX_FIFO_EMPTY	(1 << 22)
-#define MESON_AO_UART_TX_FIFO_FULL	(1 << 21)
-
-	.macro	addruart, rp, rv, tmp
-	ldr	\rp, =(CONFIG_DEBUG_UART_PHYS)		@ physical
-	ldr	\rv, =(CONFIG_DEBUG_UART_VIRT)		@ virtual
-	.endm
-
-	.macro	senduart,rd,rx
-	str	\rd, [\rx, #MESON_AO_UART_WFIFO]
-	.endm
-
-	.macro	busyuart,rd,rx
-1002:	ldr	\rd, [\rx, #MESON_AO_UART_STATUS]
-	tst	\rd, #MESON_AO_UART_TX_FIFO_EMPTY
-	beq	1002b
-	.endm
-
-	.macro	waituart,rd,rx
-1001:	ldr	\rd, [\rx, #MESON_AO_UART_STATUS]
-	tst	\rd, #MESON_AO_UART_TX_FIFO_FULL
-	bne	1001b
-	.endm
diff --git a/arch/arm/include/debug/msm.S b/arch/arm/include/debug/msm.S
deleted file mode 100644
index 9405b71461daf1aebe3968835a6c2682159d86f3..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/msm.S
+++ /dev/null
@@ -1,45 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- * Copyright (C) 2007 Google, Inc.
- * Copyright (c) 2011, Code Aurora Forum. All rights reserved.
- * Author: Brian Swetland <swetland@google.com>
- */
-
-	.macro	addruart, rp, rv, tmp
-	ldr	\rp, =CONFIG_DEBUG_UART_PHYS
-	ldr	\rv, =CONFIG_DEBUG_UART_VIRT
-	.endm
-
-	.macro	senduart, rd, rx
-ARM_BE8(rev	\rd, \rd )
-	@ Write the 1 character to UARTDM_TF
-	str	\rd, [\rx, #0x70]
-	.endm
-
-	.macro	waituart, rd, rx
-	@ check for TX_EMT in UARTDM_SR
-	ldr	\rd, [\rx, #0x08]
-ARM_BE8(rev     \rd, \rd )
-	tst	\rd, #0x08
-	bne	1002f
-	@ wait for TXREADY in UARTDM_ISR
-1001:	ldr	\rd, [\rx, #0x14]
-ARM_BE8(rev     \rd, \rd )
-	tst	\rd, #0x80
-	beq 	1001b
-1002:
-	@ Clear TX_READY by writing to the UARTDM_CR register
-	mov	\rd, #0x300
-ARM_BE8(rev     \rd, \rd )
-	str	\rd, [\rx, #0x10]
-	@ Write 0x1 to NCF register
-	mov 	\rd, #0x1
-ARM_BE8(rev     \rd, \rd )
-	str	\rd, [\rx, #0x40]
-	@ UARTDM reg. Read to induce delay
-	ldr	\rd, [\rx, #0x08]
-	.endm
-
-	.macro	busyuart, rd, rx
-	.endm
diff --git a/arch/arm/include/debug/omap2plus.S b/arch/arm/include/debug/omap2plus.S
deleted file mode 100644
index b5696a33ba0f524b26dbd41b4cbc18064b01e969..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/omap2plus.S
+++ /dev/null
@@ -1,79 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Debugging macro include header
- *
- *  Copyright (C) 1994-1999 Russell King
- *  Moved from linux/arch/arm/kernel/debug.S by Ben Dooks
-*/
-
-#include <linux/serial_reg.h>
-
-/* External port on Zoom2/3 */
-#define ZOOM_UART_BASE		0x10000000
-#define ZOOM_UART_VIRT		0xfa400000
-
-#define OMAP_PORT_SHIFT		2
-#define ZOOM_PORT_SHIFT		1
-
-#define UART_OFFSET(addr)	((addr) & 0x00ffffff)
-
-		.pushsection .data
-		.align	2
-omap_uart_phys:	.word	0
-omap_uart_virt:	.word	0
-omap_uart_lsr:	.word	0
-		.popsection
-
-		.macro	addruart, rp, rv, tmp
-
-		/* Use omap_uart_phys/virt if already configured */
-10:		adr	\rp, 99f		@ get effective addr of 99f
-		ldr	\rv, [\rp]		@ get absolute addr of 99f
-		sub	\rv, \rv, \rp		@ offset between the two
-		ldr	\rp, [\rp, #4]		@ abs addr of omap_uart_phys
-		sub	\tmp, \rp, \rv		@ make it effective
-		ldr	\rp, [\tmp, #0]		@ omap_uart_phys
-		ldr	\rv, [\tmp, #4]		@ omap_uart_virt
-		cmp	\rp, #0			@ is port configured?
-		cmpne	\rv, #0
-		bne	100f			@ already configured
-
-		/* Configure the UART offset from the phys/virt base */
-#ifdef CONFIG_DEBUG_ZOOM_UART
-		ldr	\rp, =ZOOM_UART_BASE
-		str	\rp, [\tmp, #0]		@ omap_uart_phys
-		ldr	\rp, =ZOOM_UART_VIRT
-		str	\rp, [\tmp, #4]		@ omap_uart_virt
-		mov	\rp, #(UART_LSR << ZOOM_PORT_SHIFT)
-		str	\rp, [\tmp, #8]		@ omap_uart_lsr
-#endif
-		b	10b
-
-		.align
-99:		.word	.
-		.word	omap_uart_phys
-		.ltorg
-
-100:		/* Pass the UART_LSR reg address */
-		ldr	\tmp, [\tmp, #8]	@ omap_uart_lsr
-		add	\rp, \rp, \tmp
-		add	\rv, \rv, \tmp
-		.endm
-
-		.macro	senduart,rd,rx
-		orr	\rd, \rd, \rx, lsl #24	@ preserve LSR reg offset
-		bic	\rx, \rx, #0xff		@ get base (THR) reg address
-		strb	\rd, [\rx]		@ send lower byte of rd
-		orr	\rx, \rx, \rd, lsr #24	@ restore original rx (LSR)
-		bic	\rd, \rd, #(0xff << 24)	@ restore original rd
-		.endm
-
-		.macro	busyuart,rd,rx
-1001:		ldrb	\rd, [\rx]		@ rx contains UART_LSR address
-		and	\rd, \rd, #(UART_LSR_TEMT | UART_LSR_THRE)
-		teq	\rd, #(UART_LSR_TEMT | UART_LSR_THRE)
-		bne	1001b
-		.endm
-
-		.macro	waituart,rd,rx
-		.endm
diff --git a/arch/arm/include/debug/palmchip.S b/arch/arm/include/debug/palmchip.S
deleted file mode 100644
index aed59332e487bc2c1d475a7cb687e036d9f9abbb..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/palmchip.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/serial_reg.h>
-
-#undef UART_TX
-#undef UART_LSR
-#undef UART_MSR
-
-#define UART_TX 1
-#define UART_LSR 7
-#define UART_MSR 8
-
-#include <debug/8250.S>
diff --git a/arch/arm/include/debug/pl01x.S b/arch/arm/include/debug/pl01x.S
deleted file mode 100644
index a2a553afe7b89e4409c7ba5b583d8011bb0ff0c3..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/pl01x.S
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* arch/arm/include/debug/pl01x.S
- *
- * Debugging macro include header
- *
- *  Copyright (C) 1994-1999 Russell King
- *  Moved from linux/arch/arm/kernel/debug.S by Ben Dooks
-*/
-#include <linux/amba/serial.h>
-
-#ifdef CONFIG_DEBUG_ZTE_ZX
-#undef UART01x_DR
-#undef UART01x_FR
-#define UART01x_DR     0x04
-#define UART01x_FR     0x14
-#endif
-
-#ifdef CONFIG_DEBUG_UART_PHYS
-		.macro	addruart, rp, rv, tmp
-		ldr	\rp, =CONFIG_DEBUG_UART_PHYS
-		ldr	\rv, =CONFIG_DEBUG_UART_VIRT
-		.endm
-#endif
-
-		.macro	senduart,rd,rx
-		strb	\rd, [\rx, #UART01x_DR]
-		.endm
-
-		.macro	waituart,rd,rx
-1001:		ldr	\rd, [\rx, #UART01x_FR]
- ARM_BE8(	rev	\rd, \rd )
-		tst	\rd, #UART01x_FR_TXFF
-		bne	1001b
-		.endm
-
-		.macro	busyuart,rd,rx
-1001:		ldr	\rd, [\rx, #UART01x_FR]
- ARM_BE8(	rev	\rd, \rd )
-		tst	\rd, #UART01x_FR_BUSY
-		bne	1001b
-		.endm
diff --git a/arch/arm/include/debug/renesas-scif.S b/arch/arm/include/debug/renesas-scif.S
deleted file mode 100644
index 25f06663a9a4e2c1b2f569b8bb10b5c503a78e83..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/renesas-scif.S
+++ /dev/null
@@ -1,53 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Renesas SCIF(A) debugging macro include header
- *
- * Based on r8a7790.S
- *
- * Copyright (C) 2012-2013 Renesas Electronics Corporation
- * Copyright (C) 1994-1999 Russell King
- */
-
-#define SCIF_PHYS	CONFIG_DEBUG_UART_PHYS
-#define SCIF_VIRT	((SCIF_PHYS & 0x00ffffff) | 0xfd000000)
-
-#if defined(CONFIG_DEBUG_R7S9210_SCIF2) || defined(CONFIG_DEBUG_R7S9210_SCIF4)
-/* RZ/A2 SCIFA */
-#define FTDR		0x06
-#define FSR		0x08
-#elif CONFIG_DEBUG_UART_PHYS < 0xe6e00000
-/* SCIFA */
-#define FTDR		0x20
-#define FSR		0x14
-#else
-/* SCIF */
-#define FTDR		0x0c
-#define FSR		0x10
-#endif
-
-#define TDFE	(1 << 5)
-#define TEND	(1 << 6)
-
-	.macro	addruart, rp, rv, tmp
-	ldr	\rp, =SCIF_PHYS
-	ldr	\rv, =SCIF_VIRT
-	.endm
-
-	.macro	waituart, rd, rx
-1001:	ldrh	\rd, [\rx, #FSR]
-	tst	\rd, #TDFE
-	beq	1001b
-	.endm
-
-	.macro	senduart, rd, rx
-	strb	\rd, [\rx, #FTDR]
-	ldrh	\rd, [\rx, #FSR]
-	bic	\rd, \rd, #TEND
-	strh	\rd, [\rx, #FSR]
-	.endm
-
-	.macro	busyuart, rd, rx
-1001:	ldrh	\rd, [\rx, #FSR]
-	tst	\rd, #TEND
-	beq	1001b
-	.endm
diff --git a/arch/arm/include/debug/s3c24xx.S b/arch/arm/include/debug/s3c24xx.S
deleted file mode 100644
index af873b5266778be070b0fde8cf3cbfd34debdad1..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/s3c24xx.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* arch/arm/mach-s3c2410/include/mach/debug-macro.S
- *
- * Debugging macro include header
- *
- *  Copyright (C) 1994-1999 Russell King
- *  Copyright (C) 2005 Simtec Electronics
- *
- *  Moved from linux/arch/arm/kernel/debug.S by Ben Dooks
-*/
-
-#include <linux/serial_s3c.h>
-
-#define S3C2410_UART1_OFF (0x4000)
-
-	.macro addruart, rp, rv, tmp
-		ldr	\rp, = CONFIG_DEBUG_UART_PHYS
-		ldr	\rv, = CONFIG_DEBUG_UART_VIRT
-	.endm
-
-	.macro  fifo_full_s3c2410 rd, rx
-		ldr	\rd, [\rx, # S3C2410_UFSTAT]
-		tst	\rd, #S3C2410_UFSTAT_TXFULL
-	.endm
-
-	.macro fifo_level_s3c2410 rd, rx
-		ldr	\rd, [\rx, # S3C2410_UFSTAT]
-		and	\rd, \rd, #S3C2410_UFSTAT_TXMASK
-	.endm
-
-/* Select the correct implementation depending on the configuration. The
- * S3C2440 will get selected by default, as these are the most widely
- * used variants of these
-*/
-
-#if defined(CONFIG_DEBUG_S3C2410_UART)
-#define fifo_full  fifo_full_s3c2410
-#define fifo_level fifo_level_s3c2410
-#endif
-
-/* include the reset of the code which will do the work */
-
-#include <debug/samsung.S>
diff --git a/arch/arm/include/debug/s5pv210.S b/arch/arm/include/debug/s5pv210.S
deleted file mode 100644
index 820a1cfb059527c9346037d54f23df4a5569acfe..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/s5pv210.S
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com
-*/
-
-/* pull in the relevant register and map files. */
-
-#define S3C_ADDR_BASE   0xF6000000
-#define S3C_VA_UART	S3C_ADDR_BASE + 0x01000000
-#define S5PV210_PA_UART	0xe2900000
-
-	/* note, for the boot process to work we have to keep the UART
-	 * virtual address aligned to an 1MiB boundary for the L1
-	 * mapping the head code makes. We keep the UART virtual address
-	 * aligned and add in the offset when we load the value here.
-	 */
-
-	.macro addruart, rp, rv, tmp
-		ldr	\rp, =S5PV210_PA_UART
-		ldr	\rv, =S3C_VA_UART
-#if CONFIG_DEBUG_S3C_UART != 0
-		add	\rp, \rp, #(0x400 * CONFIG_DEBUG_S3C_UART)
-		add	\rv, \rv, #(0x400 * CONFIG_DEBUG_S3C_UART)
-#endif
-	.endm
-
-#define fifo_full fifo_full_s5pv210
-#define fifo_level fifo_level_s5pv210
-
-#include <debug/samsung.S>
diff --git a/arch/arm/include/debug/sa1100.S b/arch/arm/include/debug/sa1100.S
deleted file mode 100644
index 6109e6058e5b3eb80cd9e58a8e871e760cc4f245..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/sa1100.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* arch/arm/include/debug/sa1100.S
- *
- * Debugging macro include header
- *
- *  Copyright (C) 1994-1999 Russell King
- *  Moved from linux/arch/arm/kernel/debug.S by Ben Dooks
-*/
-
-#define UTCR3		0x0c
-#define UTDR		0x14
-#define UTSR1		0x20
-#define UTCR3_TXE	0x00000002	/* Transmit Enable                 */
-#define UTSR1_TBY	0x00000001	/* Transmitter BusY (read)         */
-#define UTSR1_TNF	0x00000004	/* Transmit FIFO Not Full (read)   */
-
-		.macro	addruart, rp, rv, tmp
-		mrc	p15, 0, \rp, c1, c0
-		tst	\rp, #1			@ MMU enabled?
-		moveq	\rp, #0x80000000	@ physical base address
-		movne	\rp, #0xf8000000	@ virtual address
-
-		@ We probe for the active serial port here, coherently with
-		@ the comment in arch/arm/mach-sa1100/include/mach/uncompress.h.
-		@ We assume r1 can be clobbered.
-
-		@ see if Ser3 is active
-		add	\rp, \rp, #0x00050000
-		ldr	\rv, [\rp, #UTCR3]
-		tst	\rv, #UTCR3_TXE
-
-		@ if Ser3 is inactive, then try Ser1
-		addeq	\rp, \rp, #(0x00010000 - 0x00050000)
-		ldreq	\rv, [\rp, #UTCR3]
-		tsteq	\rv, #UTCR3_TXE
-
-		@ if Ser1 is inactive, then try Ser2
-		addeq	\rp, \rp, #(0x00030000 - 0x00010000)
-		ldreq	\rv, [\rp, #UTCR3]
-		tsteq	\rv, #UTCR3_TXE
-
-		@ clear top bits, and generate both phys and virt addresses
-		lsl	\rp, \rp, #8
-		lsr	\rp, \rp, #8
-		orr	\rv, \rp, #0xf8000000	@ virtual
-		orr	\rp, \rp, #0x80000000	@ physical
-
-		.endm
-
-		.macro	senduart,rd,rx
-		str	\rd, [\rx, #UTDR]
-		.endm
-
-		.macro	waituart,rd,rx
-1001:		ldr	\rd, [\rx, #UTSR1]
-		tst	\rd, #UTSR1_TNF
-		beq	1001b
-		.endm
-
-		.macro	busyuart,rd,rx
-1001:		ldr	\rd, [\rx, #UTSR1]
-		tst	\rd, #UTSR1_TBY
-		bne	1001b
-		.endm
diff --git a/arch/arm/include/debug/samsung.S b/arch/arm/include/debug/samsung.S
deleted file mode 100644
index 69201d7fb48f6443d0ec8e5bb474764d947275b0..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/samsung.S
+++ /dev/null
@@ -1,91 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright 2005, 2007 Simtec Electronics
- *	http://armlinux.simtec.co.uk/
- *	Ben Dooks <ben@simtec.co.uk>
- */
-
-#include <linux/serial_s3c.h>
-
-/* The S5PV210/S5PC110 implementations are as belows. */
-
-	.macro fifo_level_s5pv210 rd, rx
-		ldr	\rd, [\rx, # S3C2410_UFSTAT]
-ARM_BE8(rev \rd, \rd)
-		and	\rd, \rd, #S5PV210_UFSTAT_TXMASK
-	.endm
-
-	.macro  fifo_full_s5pv210 rd, rx
-		ldr	\rd, [\rx, # S3C2410_UFSTAT]
-ARM_BE8(rev \rd, \rd)
-		tst	\rd, #S5PV210_UFSTAT_TXFULL
-	.endm
-
-/* The S3C2440 implementations are used by default as they are the
- * most widely re-used */
-
-	.macro fifo_level_s3c2440 rd, rx
-		ldr	\rd, [\rx, # S3C2410_UFSTAT]
-ARM_BE8(rev \rd, \rd)
-		and	\rd, \rd, #S3C2440_UFSTAT_TXMASK
-	.endm
-
-#ifndef fifo_level
-#define fifo_level fifo_level_s3c2440
-#endif
-
-	.macro  fifo_full_s3c2440 rd, rx
-		ldr	\rd, [\rx, # S3C2410_UFSTAT]
-ARM_BE8(rev \rd, \rd)
-		tst	\rd, #S3C2440_UFSTAT_TXFULL
-	.endm
-
-#ifndef fifo_full
-#define fifo_full fifo_full_s3c2440
-#endif
-
-	.macro	senduart,rd,rx
-		strb 	\rd, [\rx, # S3C2410_UTXH]
-	.endm
-
-	.macro	busyuart, rd, rx
-		ldr	\rd, [\rx, # S3C2410_UFCON]
-ARM_BE8(rev \rd, \rd)
-		tst	\rd, #S3C2410_UFCON_FIFOMODE	@ fifo enabled?
-		beq	1001f				@
-		@ FIFO enabled...
-1003:
-		fifo_full \rd, \rx
-		bne	1003b
-		b	1002f
-
-1001:
-		@ busy waiting for non fifo
-		ldr	\rd, [\rx, # S3C2410_UTRSTAT]
-ARM_BE8(rev \rd, \rd)
-		tst	\rd, #S3C2410_UTRSTAT_TXFE
-		beq	1001b
-
-1002:		@ exit busyuart
-	.endm
-
-	.macro	waituart,rd,rx
-		ldr	\rd, [\rx, # S3C2410_UFCON]
-ARM_BE8(rev \rd, \rd)
-		tst	\rd, #S3C2410_UFCON_FIFOMODE	@ fifo enabled?
-		beq	1001f				@
-		@ FIFO enabled...
-1003:
-		fifo_level \rd, \rx
-		teq	\rd, #0
-		bne	1003b
-		b	1002f
-1001:
-		@ idle waiting for non fifo
-		ldr	\rd, [\rx, # S3C2410_UTRSTAT]
-ARM_BE8(rev \rd, \rd)
-		tst	\rd, #S3C2410_UTRSTAT_TXFE
-		beq	1001b
-
-1002:		@ exit busyuart
-	.endm
diff --git a/arch/arm/include/debug/sirf.S b/arch/arm/include/debug/sirf.S
deleted file mode 100644
index e73e4de0a015312fe01d344972a7998b8c6bd4d8..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/sirf.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * arch/arm/mach-prima2/include/mach/debug-macro.S
- *
- * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company.
- */
-
-#define SIRF_LLUART_TXFIFO_STATUS	0x0114
-#define SIRF_LLUART_TXFIFO_DATA	0x0118
-
-#define SIRF_LLUART_TXFIFO_FULL                       (1 << 5)
-
-#ifdef CONFIG_DEBUG_SIRFATLAS7_UART0
-#define SIRF_LLUART_TXFIFO_EMPTY			(1 << 8)
-#else
-#define SIRF_LLUART_TXFIFO_EMPTY			(1 << 6)
-#endif
-
-
-	.macro	addruart, rp, rv, tmp
-	ldr	\rp, =CONFIG_DEBUG_UART_PHYS		@ physical
-	ldr	\rv, =CONFIG_DEBUG_UART_VIRT		@ virtual
-	.endm
-
-	.macro	senduart,rd,rx
-	str	\rd, [\rx, #SIRF_LLUART_TXFIFO_DATA]
-	.endm
-
-	.macro	busyuart,rd,rx
-	.endm
-
-	.macro	waituart,rd,rx
-1001:	ldr	\rd, [\rx, #SIRF_LLUART_TXFIFO_STATUS]
-	tst	\rd, #SIRF_LLUART_TXFIFO_EMPTY
-	beq	1001b
-	.endm
-
diff --git a/arch/arm/include/debug/sti.S b/arch/arm/include/debug/sti.S
deleted file mode 100644
index 6b42c91f217d4019f392eac6d4eab53f210ea259..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/sti.S
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/include/debug/sti.S
- *
- * Debugging macro include header
- * Copyright (C) 2013 STMicroelectronics (R&D) Limited.
- */
-
-#define STIH41X_COMMS_BASE              0xfed00000
-#define STIH41X_ASC2_BASE               (STIH41X_COMMS_BASE+0x32000)
-
-#define STIH41X_SBC_LPM_BASE            0xfe400000
-#define STIH41X_SBC_COMMS_BASE          (STIH41X_SBC_LPM_BASE + 0x100000)
-#define STIH41X_SBC_ASC1_BASE           (STIH41X_SBC_COMMS_BASE + 0x31000)
-
-
-#define VIRT_ADDRESS(x)		(x - 0x1000000)
-
-#if IS_ENABLED(CONFIG_STIH41X_DEBUG_ASC2)
-#define DEBUG_LL_UART_BASE	STIH41X_ASC2_BASE
-#endif
-
-#if IS_ENABLED(CONFIG_STIH41X_DEBUG_SBC_ASC1)
-#define DEBUG_LL_UART_BASE	STIH41X_SBC_ASC1_BASE
-#endif
-
-#ifndef DEBUG_LL_UART_BASE
-#error "DEBUG UART is not Configured"
-#endif
-
-#define ASC_TX_BUF_OFF  0x04
-#define ASC_CTRL_OFF    0x0c
-#define ASC_STA_OFF     0x14
-
-#define ASC_STA_TX_FULL         (1<<9)
-#define ASC_STA_TX_EMPTY        (1<<1)
-
-
-		.macro	addruart, rp, rv, tmp
-		ldr	\rp,      =DEBUG_LL_UART_BASE	@ physical base
-		ldr	\rv,      =VIRT_ADDRESS(DEBUG_LL_UART_BASE) @ virt base
-		.endm
-
-                .macro  senduart,rd,rx
-                strb    \rd, [\rx, #ASC_TX_BUF_OFF]
-                .endm
-
-                .macro  waituart,rd,rx
-1001:           ldr     \rd, [\rx, #ASC_STA_OFF]
-                tst     \rd, #ASC_STA_TX_FULL
-                bne     1001b
-                .endm
-
-                .macro  busyuart,rd,rx
-1001:           ldr     \rd, [\rx, #ASC_STA_OFF]
-                tst     \rd, #ASC_STA_TX_EMPTY
-                beq     1001b
-                .endm
diff --git a/arch/arm/include/debug/stm32.S b/arch/arm/include/debug/stm32.S
deleted file mode 100644
index 1abb32f685fdbb8327d3392ccdd754fb00e35312..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/stm32.S
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) STMicroelectronics SA 2017 - All Rights Reserved
- * Author:   Gerald Baeza <gerald.baeza@st.com> for STMicroelectronics.
- */
-
-#define STM32_UART_BASE			0x40011000	/* USART1 */
-
-#ifdef CONFIG_STM32F4_DEBUG_UART
-#define STM32_USART_SR_OFF		0x00
-#define STM32_USART_TDR_OFF		0x04
-#endif
-
-#ifdef CONFIG_STM32F7_DEBUG_UART
-#define STM32_USART_SR_OFF		0x1C
-#define STM32_USART_TDR_OFF		0x28
-#endif
-
-#define STM32_USART_TC			(1 << 6)	/* Tx complete       */
-#define STM32_USART_TXE			(1 << 7)	/* Tx data reg empty */
-
-.macro	addruart, rp, rv, tmp
-	ldr	\rp,      =STM32_UART_BASE	@ physical base
-	ldr	\rv,      =STM32_UART_BASE      @ virt base /* NoMMU */
-.endm
-
-.macro  senduart,rd,rx
-	strb    \rd, [\rx, #STM32_USART_TDR_OFF]
-.endm
-
-.macro  waituart,rd,rx
-1001:	ldr	\rd, [\rx, #(STM32_USART_SR_OFF)]	@ Read Status Register
-	tst	\rd, #STM32_USART_TXE			@ TXE = 1 = tx empty
-	beq	1001b
-.endm
-
-.macro  busyuart,rd,rx
-1001:	ldr	\rd, [\rx, #(STM32_USART_SR_OFF)]	@ Read Status Register
-	tst	\rd, #STM32_USART_TC			@ TC = 1 = tx complete
-	beq	1001b
-.endm
diff --git a/arch/arm/include/debug/tegra.S b/arch/arm/include/debug/tegra.S
deleted file mode 100644
index 2148d0f8859194f1ee6928dfb8d995a2899bf134..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/tegra.S
+++ /dev/null
@@ -1,217 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2010,2011 Google, Inc.
- * Copyright (C) 2011-2012 NVIDIA CORPORATION. All Rights Reserved.
- *
- * Author:
- *	Colin Cross <ccross@google.com>
- *	Erik Gilling <konkers@google.com>
- *	Doug Anderson <dianders@chromium.org>
- *	Stephen Warren <swarren@nvidia.com>
- *
- * Portions based on mach-omap2's debug-macro.S
- * Copyright (C) 1994-1999 Russell King
- */
-
-#include <linux/serial_reg.h>
-
-#define UART_SHIFT 2
-
-/* Physical addresses */
-#define TEGRA_CLK_RESET_BASE		0x60006000
-#define TEGRA_APB_MISC_BASE		0x70000000
-#define TEGRA_UARTA_BASE		0x70006000
-#define TEGRA_UARTB_BASE		0x70006040
-#define TEGRA_UARTC_BASE		0x70006200
-#define TEGRA_UARTD_BASE		0x70006300
-#define TEGRA_UARTE_BASE		0x70006400
-#define TEGRA_PMC_BASE			0x7000e400
-
-#define TEGRA_CLK_RST_DEVICES_L		(TEGRA_CLK_RESET_BASE + 0x04)
-#define TEGRA_CLK_RST_DEVICES_H		(TEGRA_CLK_RESET_BASE + 0x08)
-#define TEGRA_CLK_RST_DEVICES_U		(TEGRA_CLK_RESET_BASE + 0x0c)
-#define TEGRA_CLK_OUT_ENB_L		(TEGRA_CLK_RESET_BASE + 0x10)
-#define TEGRA_CLK_OUT_ENB_H		(TEGRA_CLK_RESET_BASE + 0x14)
-#define TEGRA_CLK_OUT_ENB_U		(TEGRA_CLK_RESET_BASE + 0x18)
-#define TEGRA_PMC_SCRATCH20		(TEGRA_PMC_BASE + 0xa0)
-#define TEGRA_APB_MISC_GP_HIDREV	(TEGRA_APB_MISC_BASE + 0x804)
-
-/*
- * Must be section-aligned since a section mapping is used early on.
- * Must not overlap with regions in mach-tegra/io.c:tegra_io_desc[].
- */
-#define UART_VIRTUAL_BASE		0xfe800000
-
-#define checkuart(rp, rv, lhu, bit, uart) \
-		/* Load address of CLK_RST register */ \
-		ldr	rp, =TEGRA_CLK_RST_DEVICES_##lhu ; \
-		/* Load value from CLK_RST register */ \
-		ldr	rp, [rp, #0] ; \
-		/* Test UART's reset bit */ \
-		tst	rp, #(1 << bit) ; \
-		/* If set, can't use UART; jump to save no UART */ \
-		bne	90f ; \
-		/* Load address of CLK_OUT_ENB register */ \
-		ldr	rp, =TEGRA_CLK_OUT_ENB_##lhu ; \
-		/* Load value from CLK_OUT_ENB register */ \
-		ldr	rp, [rp, #0] ; \
-		/* Test UART's clock enable bit */ \
-		tst	rp, #(1 << bit) ; \
-		/* If clear, can't use UART; jump to save no UART */ \
-		beq	90f ; \
-		/* Passed all tests, load address of UART registers */ \
-		ldr	rp, =TEGRA_UART##uart##_BASE ; \
-		/* Jump to save UART address */ \
-		b 91f
-
-		.macro  addruart, rp, rv, tmp
-		adr	\rp, 99f		@ actual addr of 99f
-		ldr	\rv, [\rp]		@ linked addr is stored there
-		sub	\rv, \rv, \rp		@ offset between the two
-		ldr	\rp, [\rp, #4]		@ linked tegra_uart_config
-		sub	\tmp, \rp, \rv		@ actual tegra_uart_config
-		ldr	\rp, [\tmp]		@ Load tegra_uart_config
-		cmp	\rp, #1			@ needs initialization?
-		bne	100f			@ no; go load the addresses
-		mov	\rv, #0			@ yes; record init is done
-		str	\rv, [\tmp]
-
-#ifdef CONFIG_TEGRA_DEBUG_UART_AUTO_ODMDATA
-		/* Check ODMDATA */
-10:		ldr	\rp, =TEGRA_PMC_SCRATCH20
-		ldr	\rp, [\rp, #0]		@ Load PMC_SCRATCH20
-		lsr	\rv, \rp, #18		@ 19:18 are console type
-		and	\rv, \rv, #3
-		cmp	\rv, #2			@ 2 and 3 mean DCC, UART
-		beq	11f			@ some boards swap the meaning
-		cmp	\rv, #3			@ so accept either
-		bne	90f
-11:		lsr	\rv, \rp, #15		@ 17:15 are UART ID
-		and	\rv, #7	
-		cmp	\rv, #0			@ UART 0?
-		beq	20f
-		cmp	\rv, #1			@ UART 1?
-		beq	21f
-		cmp	\rv, #2			@ UART 2?
-		beq	22f
-		cmp	\rv, #3			@ UART 3?
-		beq	23f
-		cmp	\rv, #4			@ UART 4?
-		beq	24f
-		b	90f			@ invalid
-#endif
-
-#if defined(CONFIG_TEGRA_DEBUG_UARTA) || \
-    defined(CONFIG_TEGRA_DEBUG_UART_AUTO_ODMDATA)
-		/* Check UART A validity */
-20:		checkuart(\rp, \rv, L, 6, A)
-#endif
-
-#if defined(CONFIG_TEGRA_DEBUG_UARTB) || \
-    defined(CONFIG_TEGRA_DEBUG_UART_AUTO_ODMDATA)
-		/* Check UART B validity */
-21:		checkuart(\rp, \rv, L, 7, B)
-#endif
-
-#if defined(CONFIG_TEGRA_DEBUG_UARTC) || \
-    defined(CONFIG_TEGRA_DEBUG_UART_AUTO_ODMDATA)
-		/* Check UART C validity */
-22:		checkuart(\rp, \rv, H, 23, C)
-#endif
-
-#if defined(CONFIG_TEGRA_DEBUG_UARTD) || \
-    defined(CONFIG_TEGRA_DEBUG_UART_AUTO_ODMDATA)
-		/* Check UART D validity */
-23:		checkuart(\rp, \rv, U, 1, D)
-#endif
-
-#if defined(CONFIG_TEGRA_DEBUG_UARTE) || \
-    defined(CONFIG_TEGRA_DEBUG_UART_AUTO_ODMDATA)
-		/* Check UART E validity */
-24:
-		checkuart(\rp, \rv, U, 2, E)
-#endif
-
-		/* No valid UART found */
-90:		mov	\rp, #0
-		/* fall through */
-
-		/* Record whichever UART we chose */
-91:		str	\rp, [\tmp, #4]		@ Store in tegra_uart_phys
-		cmp	\rp, #0			@ Valid UART address?
-		bne	92f			@ Yes, go process it
-		str	\rp, [\tmp, #8]		@ Store 0 in tegra_uart_virt
-		b	100f			@ Done
-92:		and	\rv, \rp, #0xffffff	@ offset within 1MB section
-		add	\rv, \rv, #UART_VIRTUAL_BASE
-		str	\rv, [\tmp, #8]		@ Store in tegra_uart_virt
-		b	100f
-
-		.align
-99:		.word	.
-		.word	tegra_uart_config
-		.ltorg
-
-		/* Load previously selected UART address */
-100:		ldr	\rp, [\tmp, #4]		@ Load tegra_uart_phys
-		ldr	\rv, [\tmp, #8]		@ Load tegra_uart_virt
-		.endm
-
-/*
- * Code below is swiped from <asm/hardware/debug-8250.S>, but add an extra
- * check to make sure that the UART address is actually valid.
- */
-
-		.macro	senduart, rd, rx
-		cmp	\rx, #0
-		strbne	\rd, [\rx, #UART_TX << UART_SHIFT]
-1001:
-		.endm
-
-		.macro	busyuart, rd, rx
-		cmp	\rx, #0
-		beq	1002f
-1001:		ldrb	\rd, [\rx, #UART_LSR << UART_SHIFT]
-		and	\rd, \rd, #UART_LSR_THRE
-		teq	\rd, #UART_LSR_THRE
-		bne	1001b
-1002:
-		.endm
-
-		.macro	waituart, rd, rx
-#ifdef FLOW_CONTROL
-		cmp	\rx, #0
-		beq	1002f
-1001:		ldrb	\rd, [\rx, #UART_MSR << UART_SHIFT]
-		tst	\rd, #UART_MSR_CTS
-		beq	1001b
-1002:
-#endif
-		.endm
-
-/*
- * Storage for the state maintained by the macros above.
- *
- * In the kernel proper, this data is located in arch/arm/mach-tegra/tegra.c.
- * That's because this header is included from multiple files, and we only
- * want a single copy of the data. In particular, the UART probing code above
- * assumes it's running using physical addresses. This is true when this file
- * is included from head.o, but not when included from debug.o. So we need
- * to share the probe results between the two copies, rather than having
- * to re-run the probing again later.
- *
- * In the decompressor, we put the symbol/storage right here, since common.c
- * isn't included in the decompressor build. This symbol gets put in .text
- * even though it's really data, since .data is discarded from the
- * decompressor. Luckily, .text is writeable in the decompressor, unless
- * CONFIG_ZBOOT_ROM. That dependency is handled in arch/arm/Kconfig.debug.
- */
-#if defined(ZIMAGE)
-tegra_uart_config:
-	/* Debug UART initialization required */
-	.word 1
-	/* Debug UART physical address */
-	.word 0
-	/* Debug UART virtual address */
-	.word 0
-#endif
diff --git a/arch/arm/include/debug/ux500.S b/arch/arm/include/debug/ux500.S
deleted file mode 100644
index c516900947bb4e39dc7d28c88e1a93d2d2f37ec1..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/ux500.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Debugging macro include header
- *
- *  Copyright (C) 2009 ST-Ericsson
- */
-
-
-#if CONFIG_UX500_DEBUG_UART > 2
-#error Invalid Ux500 debug UART
-#endif
-
-/*
- * DEBUG_LL only works if only one SOC is built in.  We don't use #else below
- * in order to get "__UX500_UART redefined" warnings if more than one SOC is
- * built, so that there's some hint during the build that something is wrong.
- */
-
-#ifdef CONFIG_UX500_SOC_DB8500
-#define U8500_UART0_PHYS_BASE	(0x80120000)
-#define U8500_UART1_PHYS_BASE	(0x80121000)
-#define U8500_UART2_PHYS_BASE	(0x80007000)
-#define __UX500_PHYS_UART(n)	U8500_UART##n##_PHYS_BASE
-#endif
-
-#if !defined(__UX500_PHYS_UART)
-#error Unknown SOC
-#endif
-
-#define UX500_PHYS_UART(n)	__UX500_PHYS_UART(n)
-#define UART_PHYS_BASE	UX500_PHYS_UART(CONFIG_UX500_DEBUG_UART)
-#define UART_VIRT_BASE	(0xfff07000)
-
-	.macro	addruart, rp, rv, tmp
-	ldr	\rp, =UART_PHYS_BASE		@ no, physical address
-	ldr	\rv, =UART_VIRT_BASE		@ yes, virtual address
-	.endm
-
-#include <debug/pl01x.S>
diff --git a/arch/arm/include/debug/vexpress.S b/arch/arm/include/debug/vexpress.S
deleted file mode 100644
index ccb22e9a86a35a4123fca489bbc2db1c1de7b8df..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/vexpress.S
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* arch/arm/mach-realview/include/mach/debug-macro.S
- *
- * Debugging macro include header
- *
- *  Copyright (C) 1994-1999 Russell King
- *  Moved from linux/arch/arm/kernel/debug.S by Ben Dooks
- */
-
-#define DEBUG_LL_PHYS_BASE		0x10000000
-#define DEBUG_LL_UART_OFFSET		0x00009000
-
-#define DEBUG_LL_PHYS_BASE_RS1		0x1c000000
-#define DEBUG_LL_UART_OFFSET_RS1	0x00090000
-
-#define DEBUG_LL_UART_PHYS_CRX		0xb0090000
-
-#define DEBUG_LL_VIRT_BASE		0xf8000000
-
-#if defined(CONFIG_DEBUG_VEXPRESS_UART0_DETECT)
-
-		.macro	addruart,rp,rv,tmp
-		.arch   armv7-a
-
-		@ Make an educated guess regarding the memory map:
-		@ - the original A9 core tile (based on ARM Cortex-A9 r0p1)
-		@   should use UART at 0x10009000
-		@ - all other (RS1 complaint) tiles use UART mapped
-		@   at 0x1c090000
-		mrc	p15, 0, \rp, c0, c0, 0
-		movw	\rv, #0xc091
-		movt	\rv, #0x410f
-		cmp	\rp, \rv
-
-		@ Original memory map
-		moveq	\rp, #DEBUG_LL_UART_OFFSET
-		orreq	\rv, \rp, #DEBUG_LL_VIRT_BASE
-		orreq	\rp, \rp, #DEBUG_LL_PHYS_BASE
-
-		@ RS1 memory map
-		movne	\rp, #DEBUG_LL_UART_OFFSET_RS1
-		orrne	\rv, \rp, #DEBUG_LL_VIRT_BASE
-		orrne	\rp, \rp, #DEBUG_LL_PHYS_BASE_RS1
-
-		.endm
-
-#include <debug/pl01x.S>
-#endif
diff --git a/arch/arm/include/debug/vf.S b/arch/arm/include/debug/vf.S
deleted file mode 100644
index 854d9bd8277019e50de31cc6823278ef343afaf5..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/vf.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2013 Freescale Semiconductor, Inc.
- */
-
-#define VF_UART0_BASE_ADDR	0x40027000
-#define VF_UART1_BASE_ADDR	0x40028000
-#define VF_UART2_BASE_ADDR	0x40029000
-#define VF_UART3_BASE_ADDR	0x4002a000
-#define VF_UART_BASE_ADDR(n)	VF_UART##n##_BASE_ADDR
-#define VF_UART_BASE(n)		VF_UART_BASE_ADDR(n)
-#define VF_UART_PHYSICAL_BASE	VF_UART_BASE(CONFIG_DEBUG_VF_UART_PORT)
-
-#define VF_UART_VIRTUAL_BASE	0xfe000000
-
-	.macro	addruart, rp, rv, tmp
-	ldr	\rp, =VF_UART_PHYSICAL_BASE 	@ physical
-	and	\rv, \rp, #0xffffff		@ offset within 16MB section
-	add	\rv, \rv, #VF_UART_VIRTUAL_BASE
-	.endm
-
-	.macro	senduart, rd, rx
-	strb	\rd, [\rx, #0x7]	@ Data Register
-	.endm
-
-	.macro	busyuart, rd, rx
-1001:	ldrb	\rd, [\rx, #0x4]	@ Status Register 1
-	tst	\rd, #1 << 6		@ TC
-	beq	1001b			@ wait until transmit done
-	.endm
-
-	.macro	waituart,rd,rx
-	.endm
diff --git a/arch/arm/include/debug/vt8500.S b/arch/arm/include/debug/vt8500.S
deleted file mode 100644
index 8dc1df2d91b859926978cb9313b30fa4235764c9..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/vt8500.S
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* 
- * Debugging macro include header
- *
- *  Copyright (C) 2010 Alexey Charkov <alchark@gmail.com>
- *    Moved from arch/arm/mach-vt8500/include/mach/debug-macro.S
- *    Minor changes for readability.
- */
-
-#define DEBUG_LL_PHYS_BASE		0xD8000000
-#define DEBUG_LL_VIRT_BASE		0xF8000000
-#define DEBUG_LL_UART_OFFSET		0x00200000
-
-#if defined(CONFIG_DEBUG_VT8500_UART0)
-	.macro	addruart, rp, rv, tmp
-	mov	\rp,      #DEBUG_LL_UART_OFFSET
-	orr	\rv, \rp, #DEBUG_LL_VIRT_BASE
-	orr	\rp, \rp, #DEBUG_LL_PHYS_BASE
-	.endm
-
-	.macro	senduart,rd,rx
-	strb	\rd, [\rx, #0]
-	.endm
-
-	.macro	busyuart,rd,rx
-1001:	ldr	\rd, [\rx, #0x1c]
-	ands	\rd, \rd, #0x2
-	bne	1001b
-	.endm
-
-	.macro	waituart,rd,rx
-	.endm
-
-#endif
diff --git a/arch/arm/include/debug/zynq.S b/arch/arm/include/debug/zynq.S
deleted file mode 100644
index 58d77c972fd680684a2e0f1f8ac40bd19e0d221d..0000000000000000000000000000000000000000
--- a/arch/arm/include/debug/zynq.S
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Debugging macro include header
- *
- *  Copyright (C) 2011 Xilinx
- */
-#define UART_CR_OFFSET		0x00  /* Control Register [8:0] */
-#define UART_SR_OFFSET		0x2C  /* Channel Status [11:0] */
-#define UART_FIFO_OFFSET	0x30  /* FIFO [15:0] or [7:0] */
-
-#define UART_SR_TXFULL		0x00000010	/* TX FIFO full */
-#define UART_SR_TXEMPTY		0x00000008	/* TX FIFO empty */
-
-#define UART0_PHYS		0xE0000000
-#define UART0_VIRT		0xF0800000
-#define UART1_PHYS		0xE0001000
-#define UART1_VIRT		0xF0801000
-
-#if IS_ENABLED(CONFIG_DEBUG_ZYNQ_UART1)
-# define LL_UART_PADDR		UART1_PHYS
-# define LL_UART_VADDR		UART1_VIRT
-#else
-# define LL_UART_PADDR		UART0_PHYS
-# define LL_UART_VADDR		UART0_VIRT
-#endif
-
-		.macro	addruart, rp, rv, tmp
-		ldr	\rp, =LL_UART_PADDR	@ physical
-		ldr	\rv, =LL_UART_VADDR	@ virtual
-		.endm
-
-		.macro	senduart,rd,rx
-		strb	\rd, [\rx, #UART_FIFO_OFFSET]	@ TXDATA
-		.endm
-
-		.macro	waituart,rd,rx
-1001:		ldr	\rd, [\rx, #UART_SR_OFFSET]
-ARM_BE8(	rev	\rd, \rd )
-		tst	\rd, #UART_SR_TXEMPTY
-		beq	1001b
-		.endm
-
-		.macro	busyuart,rd,rx
-1002:		ldr	\rd, [\rx, #UART_SR_OFFSET]	@ get status register
-ARM_BE8(	rev	\rd, \rd )
-		tst	\rd, #UART_SR_TXFULL		@
-		bne	1002b			@ wait if FIFO is full
-		.endm
diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S
deleted file mode 100644
index e112072b579d424c3dfcaad914cc8c576b9590c8..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/debug.S
+++ /dev/null
@@ -1,154 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/kernel/debug.S
- *
- *  Copyright (C) 1994-1999 Russell King
- *
- *  32-bit debugging code
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-		.text
-
-/*
- * Some debugging routines (useful if you've got MM problems and
- * printk isn't working).  For DEBUGGING ONLY!!!  Do not leave
- * references to these in a production kernel!
- */
-
-#if !defined(CONFIG_DEBUG_SEMIHOSTING)
-#include CONFIG_DEBUG_LL_INCLUDE
-#endif
-
-#ifdef CONFIG_MMU
-		.macro	addruart_current, rx, tmp1, tmp2
-		addruart	\tmp1, \tmp2, \rx
-		mrc		p15, 0, \rx, c1, c0
-		tst		\rx, #1
-		moveq		\rx, \tmp1
-		movne		\rx, \tmp2
-		.endm
-
-#else /* !CONFIG_MMU */
-		.macro	addruart_current, rx, tmp1, tmp2
-		addruart	\rx, \tmp1, \tmp2
-		.endm
-
-#endif /* CONFIG_MMU */
-
-/*
- * Useful debugging routines
- */
-ENTRY(printhex8)
-		mov	r1, #8
-		b	printhex
-ENDPROC(printhex8)
-
-ENTRY(printhex4)
-		mov	r1, #4
-		b	printhex
-ENDPROC(printhex4)
-
-ENTRY(printhex2)
-		mov	r1, #2
-printhex:	adr	r2, hexbuf_rel
-		ldr	r3, [r2]
-		add	r2, r2, r3
-		add	r3, r2, r1
-		mov	r1, #0
-		strb	r1, [r3]
-1:		and	r1, r0, #15
-		mov	r0, r0, lsr #4
-		cmp	r1, #10
-		addlt	r1, r1, #'0'
-		addge	r1, r1, #'a' - 10
-		strb	r1, [r3, #-1]!
-		teq	r3, r2
-		bne	1b
-		mov	r0, r2
-		b	printascii
-ENDPROC(printhex2)
-
-		.pushsection .bss
-hexbuf_addr:	.space 16
-		.popsection
-		.align
-hexbuf_rel:	.long	hexbuf_addr - .
-
-		.ltorg
-
-#ifndef CONFIG_DEBUG_SEMIHOSTING
-
-ENTRY(printascii)
-		addruart_current r3, r1, r2
-1:		teq	r0, #0
-		ldrbne	r1, [r0], #1
-		teqne	r1, #0
-		reteq	lr
-2:		teq     r1, #'\n'
-		bne	3f
-		mov	r1, #'\r'
-		waituart r2, r3
-		senduart r1, r3
-		busyuart r2, r3
-		mov	r1, #'\n'
-3:		waituart r2, r3
-		senduart r1, r3
-		busyuart r2, r3
-		b	1b
-ENDPROC(printascii)
-
-ENTRY(printch)
-		addruart_current r3, r1, r2
-		mov	r1, r0
-		mov	r0, #0
-		b	2b
-ENDPROC(printch)
-
-#ifdef CONFIG_MMU
-ENTRY(debug_ll_addr)
-		addruart r2, r3, ip
-		str	r2, [r0]
-		str	r3, [r1]
-		ret	lr
-ENDPROC(debug_ll_addr)
-#endif
-
-#else
-
-ENTRY(printascii)
-		mov	r1, r0
-		mov	r0, #0x04		@ SYS_WRITE0
-	ARM(	svc	#0x123456	)
-#ifdef CONFIG_CPU_V7M
-	THUMB(	bkpt	#0xab		)
-#else
-	THUMB(	svc	#0xab		)
-#endif
-		ret	lr
-ENDPROC(printascii)
-
-ENTRY(printch)
-		adr	r1, hexbuf_rel
-		ldr	r2, [r1]
-		add	r1, r1, r2
-		strb	r0, [r1]
-		mov	r0, #0x03		@ SYS_WRITEC
-	ARM(	svc	#0x123456	)
-#ifdef CONFIG_CPU_V7M
-	THUMB(	bkpt	#0xab		)
-#else
-	THUMB(	svc	#0xab		)
-#endif
-		ret	lr
-ENDPROC(printch)
-
-ENTRY(debug_ll_addr)
-		mov	r2, #0
-		str	r2, [r0]
-		str	r2, [r1]
-		ret	lr
-ENDPROC(debug_ll_addr)
-
-#endif
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
deleted file mode 100644
index b62d74a2c73a58f9fdf9d6bca7e7d473c65a5d28..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/entry-armv.S
+++ /dev/null
@@ -1,1197 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/kernel/entry-armv.S
- *
- *  Copyright (C) 1996,1997,1998 Russell King.
- *  ARM700 fix by Matthew Godbolt (linux-user@willothewisp.demon.co.uk)
- *  nommu support by Hyok S. Choi (hyok.choi@samsung.com)
- *
- *  Low-level vector interface routines
- *
- *  Note:  there is a StrongARM bug in the STMIA rn, {regs}^ instruction
- *  that causes it to save wrong values...  Be aware!
- */
-
-#include <linux/init.h>
-
-#include <asm/assembler.h>
-#include <asm/memory.h>
-#include <asm/glue-df.h>
-#include <asm/glue-pf.h>
-#include <asm/vfpmacros.h>
-#ifndef CONFIG_GENERIC_IRQ_MULTI_HANDLER
-#include <mach/entry-macro.S>
-#endif
-#include <asm/thread_notify.h>
-#include <asm/unwind.h>
-#include <asm/unistd.h>
-#include <asm/tls.h>
-#include <asm/system_info.h>
-#include <asm/uaccess-asm.h>
-
-#include "entry-header.S"
-#include <asm/entry-macro-multi.S>
-#include <asm/probes.h>
-
-/*
- * Interrupt handling.
- */
-	.macro	irq_handler
-#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER
-	ldr	r1, =handle_arch_irq
-	mov	r0, sp
-	badr	lr, 9997f
-	ldr	pc, [r1]
-#else
-	arch_irq_handler_default
-#endif
-9997:
-	.endm
-
-	.macro	pabt_helper
-	@ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5
-#ifdef MULTI_PABORT
-	ldr	ip, .LCprocfns
-	mov	lr, pc
-	ldr	pc, [ip, #PROCESSOR_PABT_FUNC]
-#else
-	bl	CPU_PABORT_HANDLER
-#endif
-	.endm
-
-	.macro	dabt_helper
-
-	@
-	@ Call the processor-specific abort handler:
-	@
-	@  r2 - pt_regs
-	@  r4 - aborted context pc
-	@  r5 - aborted context psr
-	@
-	@ The abort handler must return the aborted address in r0, and
-	@ the fault status register in r1.  r9 must be preserved.
-	@
-#ifdef MULTI_DABORT
-	ldr	ip, .LCprocfns
-	mov	lr, pc
-	ldr	pc, [ip, #PROCESSOR_DABT_FUNC]
-#else
-	bl	CPU_DABORT_HANDLER
-#endif
-	.endm
-
-	.section	.entry.text,"ax",%progbits
-
-/*
- * Invalid mode handlers
- */
-	.macro	inv_entry, reason
-	sub	sp, sp, #PT_REGS_SIZE
- ARM(	stmib	sp, {r1 - lr}		)
- THUMB(	stmia	sp, {r0 - r12}		)
- THUMB(	str	sp, [sp, #S_SP]		)
- THUMB(	str	lr, [sp, #S_LR]		)
-	mov	r1, #\reason
-	.endm
-
-__pabt_invalid:
-	inv_entry BAD_PREFETCH
-	b	common_invalid
-ENDPROC(__pabt_invalid)
-
-__dabt_invalid:
-	inv_entry BAD_DATA
-	b	common_invalid
-ENDPROC(__dabt_invalid)
-
-__irq_invalid:
-	inv_entry BAD_IRQ
-	b	common_invalid
-ENDPROC(__irq_invalid)
-
-__und_invalid:
-	inv_entry BAD_UNDEFINSTR
-
-	@
-	@ XXX fall through to common_invalid
-	@
-
-@
-@ common_invalid - generic code for failed exception (re-entrant version of handlers)
-@
-common_invalid:
-	zero_fp
-
-	ldmia	r0, {r4 - r6}
-	add	r0, sp, #S_PC		@ here for interlock avoidance
-	mov	r7, #-1			@  ""   ""    ""        ""
-	str	r4, [sp]		@ save preserved r0
-	stmia	r0, {r5 - r7}		@ lr_<exception>,
-					@ cpsr_<exception>, "old_r0"
-
-	mov	r0, sp
-	b	bad_mode
-ENDPROC(__und_invalid)
-
-/*
- * SVC mode handlers
- */
-
-#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
-#define SPFIX(code...) code
-#else
-#define SPFIX(code...)
-#endif
-
-	.macro	svc_entry, stack_hole=0, trace=1, uaccess=1
- UNWIND(.fnstart		)
- UNWIND(.save {r0 - pc}		)
-	sub	sp, sp, #(SVC_REGS_SIZE + \stack_hole - 4)
-#ifdef CONFIG_THUMB2_KERNEL
- SPFIX(	str	r0, [sp]	)	@ temporarily saved
- SPFIX(	mov	r0, sp		)
- SPFIX(	tst	r0, #4		)	@ test original stack alignment
- SPFIX(	ldr	r0, [sp]	)	@ restored
-#else
- SPFIX(	tst	sp, #4		)
-#endif
- SPFIX(	subeq	sp, sp, #4	)
-	stmia	sp, {r1 - r12}
-
-	ldmia	r0, {r3 - r5}
-	add	r7, sp, #S_SP - 4	@ here for interlock avoidance
-	mov	r6, #-1			@  ""  ""      ""       ""
-	add	r2, sp, #(SVC_REGS_SIZE + \stack_hole - 4)
- SPFIX(	addeq	r2, r2, #4	)
-	str	r3, [sp, #-4]!		@ save the "real" r0 copied
-					@ from the exception stack
-
-	mov	r3, lr
-
-	@
-	@ We are now ready to fill in the remaining blanks on the stack:
-	@
-	@  r2 - sp_svc
-	@  r3 - lr_svc
-	@  r4 - lr_<exception>, already fixed up for correct return/restart
-	@  r5 - spsr_<exception>
-	@  r6 - orig_r0 (see pt_regs definition in ptrace.h)
-	@
-	stmia	r7, {r2 - r6}
-
-	get_thread_info tsk
-	uaccess_entry tsk, r0, r1, r2, \uaccess
-
-	.if \trace
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_off
-#endif
-	.endif
-	.endm
-
-	.align	5
-__dabt_svc:
-	svc_entry uaccess=0
-	mov	r2, sp
-	dabt_helper
- THUMB(	ldr	r5, [sp, #S_PSR]	)	@ potentially updated CPSR
-	svc_exit r5				@ return from exception
- UNWIND(.fnend		)
-ENDPROC(__dabt_svc)
-
-	.align	5
-__irq_svc:
-	svc_entry
-	irq_handler
-
-#ifdef CONFIG_PREEMPT
-	ldr	r8, [tsk, #TI_PREEMPT]		@ get preempt count
-	ldr	r0, [tsk, #TI_FLAGS]		@ get flags
-	teq	r8, #0				@ if preempt count != 0
-	movne	r0, #0				@ force flags to 0
-	tst	r0, #_TIF_NEED_RESCHED
-	blne	svc_preempt
-#endif
-
-	svc_exit r5, irq = 1			@ return from exception
- UNWIND(.fnend		)
-ENDPROC(__irq_svc)
-
-	.ltorg
-
-#ifdef CONFIG_PREEMPT
-svc_preempt:
-	mov	r8, lr
-1:	bl	preempt_schedule_irq		@ irq en/disable is done inside
-	ldr	r0, [tsk, #TI_FLAGS]		@ get new tasks TI_FLAGS
-	tst	r0, #_TIF_NEED_RESCHED
-	reteq	r8				@ go again
-	b	1b
-#endif
-
-__und_fault:
-	@ Correct the PC such that it is pointing at the instruction
-	@ which caused the fault.  If the faulting instruction was ARM
-	@ the PC will be pointing at the next instruction, and have to
-	@ subtract 4.  Otherwise, it is Thumb, and the PC will be
-	@ pointing at the second half of the Thumb instruction.  We
-	@ have to subtract 2.
-	ldr	r2, [r0, #S_PC]
-	sub	r2, r2, r1
-	str	r2, [r0, #S_PC]
-	b	do_undefinstr
-ENDPROC(__und_fault)
-
-	.align	5
-__und_svc:
-#ifdef CONFIG_KPROBES
-	@ If a kprobe is about to simulate a "stmdb sp..." instruction,
-	@ it obviously needs free stack space which then will belong to
-	@ the saved context.
-	svc_entry MAX_STACK_SIZE
-#else
-	svc_entry
-#endif
-
-	mov	r1, #4				@ PC correction to apply
- THUMB(	tst	r5, #PSR_T_BIT		)	@ exception taken in Thumb mode?
- THUMB(	movne	r1, #2			)	@ if so, fix up PC correction
-	mov	r0, sp				@ struct pt_regs *regs
-	bl	__und_fault
-
-__und_svc_finish:
-	get_thread_info tsk
-	ldr	r5, [sp, #S_PSR]		@ Get SVC cpsr
-	svc_exit r5				@ return from exception
- UNWIND(.fnend		)
-ENDPROC(__und_svc)
-
-	.align	5
-__pabt_svc:
-	svc_entry
-	mov	r2, sp				@ regs
-	pabt_helper
-	svc_exit r5				@ return from exception
- UNWIND(.fnend		)
-ENDPROC(__pabt_svc)
-
-	.align	5
-__fiq_svc:
-	svc_entry trace=0
-	mov	r0, sp				@ struct pt_regs *regs
-	bl	handle_fiq_as_nmi
-	svc_exit_via_fiq
- UNWIND(.fnend		)
-ENDPROC(__fiq_svc)
-
-	.align	5
-.LCcralign:
-	.word	cr_alignment
-#ifdef MULTI_DABORT
-.LCprocfns:
-	.word	processor
-#endif
-.LCfp:
-	.word	fp_enter
-
-/*
- * Abort mode handlers
- */
-
-@
-@ Taking a FIQ in abort mode is similar to taking a FIQ in SVC mode
-@ and reuses the same macros. However in abort mode we must also
-@ save/restore lr_abt and spsr_abt to make nested aborts safe.
-@
-	.align 5
-__fiq_abt:
-	svc_entry trace=0
-
- ARM(	msr	cpsr_c, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
- THUMB( mov	r0, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
- THUMB( msr	cpsr_c, r0 )
-	mov	r1, lr		@ Save lr_abt
-	mrs	r2, spsr	@ Save spsr_abt, abort is now safe
- ARM(	msr	cpsr_c, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
- THUMB( mov	r0, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
- THUMB( msr	cpsr_c, r0 )
-	stmfd	sp!, {r1 - r2}
-
-	add	r0, sp, #8			@ struct pt_regs *regs
-	bl	handle_fiq_as_nmi
-
-	ldmfd	sp!, {r1 - r2}
- ARM(	msr	cpsr_c, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
- THUMB( mov	r0, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
- THUMB( msr	cpsr_c, r0 )
-	mov	lr, r1		@ Restore lr_abt, abort is unsafe
-	msr	spsr_cxsf, r2	@ Restore spsr_abt
- ARM(	msr	cpsr_c, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
- THUMB( mov	r0, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
- THUMB( msr	cpsr_c, r0 )
-
-	svc_exit_via_fiq
- UNWIND(.fnend		)
-ENDPROC(__fiq_abt)
-
-/*
- * User mode handlers
- *
- * EABI note: sp_svc is always 64-bit aligned here, so should PT_REGS_SIZE
- */
-
-#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) && (PT_REGS_SIZE & 7)
-#error "sizeof(struct pt_regs) must be a multiple of 8"
-#endif
-
-	.macro	usr_entry, trace=1, uaccess=1
- UNWIND(.fnstart	)
- UNWIND(.cantunwind	)	@ don't unwind the user space
-	sub	sp, sp, #PT_REGS_SIZE
- ARM(	stmib	sp, {r1 - r12}	)
- THUMB(	stmia	sp, {r0 - r12}	)
-
- ATRAP(	mrc	p15, 0, r7, c1, c0, 0)
- ATRAP(	ldr	r8, .LCcralign)
-
-	ldmia	r0, {r3 - r5}
-	add	r0, sp, #S_PC		@ here for interlock avoidance
-	mov	r6, #-1			@  ""  ""     ""        ""
-
-	str	r3, [sp]		@ save the "real" r0 copied
-					@ from the exception stack
-
- ATRAP(	ldr	r8, [r8, #0])
-
-	@
-	@ We are now ready to fill in the remaining blanks on the stack:
-	@
-	@  r4 - lr_<exception>, already fixed up for correct return/restart
-	@  r5 - spsr_<exception>
-	@  r6 - orig_r0 (see pt_regs definition in ptrace.h)
-	@
-	@ Also, separately save sp_usr and lr_usr
-	@
-	stmia	r0, {r4 - r6}
- ARM(	stmdb	r0, {sp, lr}^			)
- THUMB(	store_user_sp_lr r0, r1, S_SP - S_PC	)
-
-	.if \uaccess
-	uaccess_disable ip
-	.endif
-
-	@ Enable the alignment trap while in kernel mode
- ATRAP(	teq	r8, r7)
- ATRAP( mcrne	p15, 0, r8, c1, c0, 0)
-
-	@
-	@ Clear FP to mark the first stack frame
-	@
-	zero_fp
-
-	.if	\trace
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_off
-#endif
-	ct_user_exit save = 0
-	.endif
-	.endm
-
-	.macro	kuser_cmpxchg_check
-#if !defined(CONFIG_CPU_32v6K) && defined(CONFIG_KUSER_HELPERS)
-#ifndef CONFIG_MMU
-#warning "NPTL on non MMU needs fixing"
-#else
-	@ Make sure our user space atomic helper is restarted
-	@ if it was interrupted in a critical region.  Here we
-	@ perform a quick test inline since it should be false
-	@ 99.9999% of the time.  The rest is done out of line.
-	cmp	r4, #TASK_SIZE
-	blhs	kuser_cmpxchg64_fixup
-#endif
-#endif
-	.endm
-
-	.align	5
-__dabt_usr:
-	usr_entry uaccess=0
-	kuser_cmpxchg_check
-	mov	r2, sp
-	dabt_helper
-	b	ret_from_exception
- UNWIND(.fnend		)
-ENDPROC(__dabt_usr)
-
-	.align	5
-__irq_usr:
-	usr_entry
-	kuser_cmpxchg_check
-	irq_handler
-	get_thread_info tsk
-	mov	why, #0
-	b	ret_to_user_from_irq
- UNWIND(.fnend		)
-ENDPROC(__irq_usr)
-
-	.ltorg
-
-	.align	5
-__und_usr:
-	usr_entry uaccess=0
-
-	mov	r2, r4
-	mov	r3, r5
-
-	@ r2 = regs->ARM_pc, which is either 2 or 4 bytes ahead of the
-	@      faulting instruction depending on Thumb mode.
-	@ r3 = regs->ARM_cpsr
-	@
-	@ The emulation code returns using r9 if it has emulated the
-	@ instruction, or the more conventional lr if we are to treat
-	@ this as a real undefined instruction
-	@
-	badr	r9, ret_from_exception
-
-	@ IRQs must be enabled before attempting to read the instruction from
-	@ user space since that could cause a page/translation fault if the
-	@ page table was modified by another CPU.
-	enable_irq
-
-	tst	r3, #PSR_T_BIT			@ Thumb mode?
-	bne	__und_usr_thumb
-	sub	r4, r2, #4			@ ARM instr at LR - 4
-1:	ldrt	r0, [r4]
- ARM_BE8(rev	r0, r0)				@ little endian instruction
-
-	uaccess_disable ip
-
-	@ r0 = 32-bit ARM instruction which caused the exception
-	@ r2 = PC value for the following instruction (:= regs->ARM_pc)
-	@ r4 = PC value for the faulting instruction
-	@ lr = 32-bit undefined instruction function
-	badr	lr, __und_usr_fault_32
-	b	call_fpe
-
-__und_usr_thumb:
-	@ Thumb instruction
-	sub	r4, r2, #2			@ First half of thumb instr at LR - 2
-#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7
-/*
- * Thumb-2 instruction handling.  Note that because pre-v6 and >= v6 platforms
- * can never be supported in a single kernel, this code is not applicable at
- * all when __LINUX_ARM_ARCH__ < 6.  This allows simplifying assumptions to be
- * made about .arch directives.
- */
-#if __LINUX_ARM_ARCH__ < 7
-/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
-#define NEED_CPU_ARCHITECTURE
-	ldr	r5, .LCcpu_architecture
-	ldr	r5, [r5]
-	cmp	r5, #CPU_ARCH_ARMv7
-	blo	__und_usr_fault_16		@ 16bit undefined instruction
-/*
- * The following code won't get run unless the running CPU really is v7, so
- * coding round the lack of ldrht on older arches is pointless.  Temporarily
- * override the assembler target arch with the minimum required instead:
- */
-	.arch	armv6t2
-#endif
-2:	ldrht	r5, [r4]
-ARM_BE8(rev16	r5, r5)				@ little endian instruction
-	cmp	r5, #0xe800			@ 32bit instruction if xx != 0
-	blo	__und_usr_fault_16_pan		@ 16bit undefined instruction
-3:	ldrht	r0, [r2]
-ARM_BE8(rev16	r0, r0)				@ little endian instruction
-	uaccess_disable ip
-	add	r2, r2, #2			@ r2 is PC + 2, make it PC + 4
-	str	r2, [sp, #S_PC]			@ it's a 2x16bit instr, update
-	orr	r0, r0, r5, lsl #16
-	badr	lr, __und_usr_fault_32
-	@ r0 = the two 16-bit Thumb instructions which caused the exception
-	@ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc)
-	@ r4 = PC value for the first 16-bit Thumb instruction
-	@ lr = 32bit undefined instruction function
-
-#if __LINUX_ARM_ARCH__ < 7
-/* If the target arch was overridden, change it back: */
-#ifdef CONFIG_CPU_32v6K
-	.arch	armv6k
-#else
-	.arch	armv6
-#endif
-#endif /* __LINUX_ARM_ARCH__ < 7 */
-#else /* !(CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7) */
-	b	__und_usr_fault_16
-#endif
- UNWIND(.fnend)
-ENDPROC(__und_usr)
-
-/*
- * The out of line fixup for the ldrt instructions above.
- */
-	.pushsection .text.fixup, "ax"
-	.align	2
-4:	str     r4, [sp, #S_PC]			@ retry current instruction
-	ret	r9
-	.popsection
-	.pushsection __ex_table,"a"
-	.long	1b, 4b
-#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7
-	.long	2b, 4b
-	.long	3b, 4b
-#endif
-	.popsection
-
-/*
- * Check whether the instruction is a co-processor instruction.
- * If yes, we need to call the relevant co-processor handler.
- *
- * Note that we don't do a full check here for the co-processor
- * instructions; all instructions with bit 27 set are well
- * defined.  The only instructions that should fault are the
- * co-processor instructions.  However, we have to watch out
- * for the ARM6/ARM7 SWI bug.
- *
- * NEON is a special case that has to be handled here. Not all
- * NEON instructions are co-processor instructions, so we have
- * to make a special case of checking for them. Plus, there's
- * five groups of them, so we have a table of mask/opcode pairs
- * to check against, and if any match then we branch off into the
- * NEON handler code.
- *
- * Emulators may wish to make use of the following registers:
- *  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
- *  r2  = PC value to resume execution after successful emulation
- *  r9  = normal "successful" return address
- *  r10 = this threads thread_info structure
- *  lr  = unrecognised instruction return address
- * IRQs enabled, FIQs enabled.
- */
-	@
-	@ Fall-through from Thumb-2 __und_usr
-	@
-#ifdef CONFIG_NEON
-	get_thread_info r10			@ get current thread
-	adr	r6, .LCneon_thumb_opcodes
-	b	2f
-#endif
-call_fpe:
-	get_thread_info r10			@ get current thread
-#ifdef CONFIG_NEON
-	adr	r6, .LCneon_arm_opcodes
-2:	ldr	r5, [r6], #4			@ mask value
-	ldr	r7, [r6], #4			@ opcode bits matching in mask
-	cmp	r5, #0				@ end mask?
-	beq	1f
-	and	r8, r0, r5
-	cmp	r8, r7				@ NEON instruction?
-	bne	2b
-	mov	r7, #1
-	strb	r7, [r10, #TI_USED_CP + 10]	@ mark CP#10 as used
-	strb	r7, [r10, #TI_USED_CP + 11]	@ mark CP#11 as used
-	b	do_vfp				@ let VFP handler handle this
-1:
-#endif
-	tst	r0, #0x08000000			@ only CDP/CPRT/LDC/STC have bit 27
-	tstne	r0, #0x04000000			@ bit 26 set on both ARM and Thumb-2
-	reteq	lr
-	and	r8, r0, #0x00000f00		@ mask out CP number
- THUMB(	lsr	r8, r8, #8		)
-	mov	r7, #1
-	add	r6, r10, #TI_USED_CP
- ARM(	strb	r7, [r6, r8, lsr #8]	)	@ set appropriate used_cp[]
- THUMB(	strb	r7, [r6, r8]		)	@ set appropriate used_cp[]
-#ifdef CONFIG_IWMMXT
-	@ Test if we need to give access to iWMMXt coprocessors
-	ldr	r5, [r10, #TI_FLAGS]
-	rsbs	r7, r8, #(1 << 8)		@ CP 0 or 1 only
-	movscs	r7, r5, lsr #(TIF_USING_IWMMXT + 1)
-	bcs	iwmmxt_task_enable
-#endif
- ARM(	add	pc, pc, r8, lsr #6	)
- THUMB(	lsl	r8, r8, #2		)
- THUMB(	add	pc, r8			)
-	nop
-
-	ret.w	lr				@ CP#0
-	W(b)	do_fpe				@ CP#1 (FPE)
-	W(b)	do_fpe				@ CP#2 (FPE)
-	ret.w	lr				@ CP#3
-#ifdef CONFIG_CRUNCH
-	b	crunch_task_enable		@ CP#4 (MaverickCrunch)
-	b	crunch_task_enable		@ CP#5 (MaverickCrunch)
-	b	crunch_task_enable		@ CP#6 (MaverickCrunch)
-#else
-	ret.w	lr				@ CP#4
-	ret.w	lr				@ CP#5
-	ret.w	lr				@ CP#6
-#endif
-	ret.w	lr				@ CP#7
-	ret.w	lr				@ CP#8
-	ret.w	lr				@ CP#9
-#ifdef CONFIG_VFP
-	W(b)	do_vfp				@ CP#10 (VFP)
-	W(b)	do_vfp				@ CP#11 (VFP)
-#else
-	ret.w	lr				@ CP#10 (VFP)
-	ret.w	lr				@ CP#11 (VFP)
-#endif
-	ret.w	lr				@ CP#12
-	ret.w	lr				@ CP#13
-	ret.w	lr				@ CP#14 (Debug)
-	ret.w	lr				@ CP#15 (Control)
-
-#ifdef NEED_CPU_ARCHITECTURE
-	.align	2
-.LCcpu_architecture:
-	.word	__cpu_architecture
-#endif
-
-#ifdef CONFIG_NEON
-	.align	6
-
-.LCneon_arm_opcodes:
-	.word	0xfe000000			@ mask
-	.word	0xf2000000			@ opcode
-
-	.word	0xff100000			@ mask
-	.word	0xf4000000			@ opcode
-
-	.word	0x00000000			@ mask
-	.word	0x00000000			@ opcode
-
-.LCneon_thumb_opcodes:
-	.word	0xef000000			@ mask
-	.word	0xef000000			@ opcode
-
-	.word	0xff100000			@ mask
-	.word	0xf9000000			@ opcode
-
-	.word	0x00000000			@ mask
-	.word	0x00000000			@ opcode
-#endif
-
-do_fpe:
-	ldr	r4, .LCfp
-	add	r10, r10, #TI_FPSTATE		@ r10 = workspace
-	ldr	pc, [r4]			@ Call FP module USR entry point
-
-/*
- * The FP module is called with these registers set:
- *  r0  = instruction
- *  r2  = PC+4
- *  r9  = normal "successful" return address
- *  r10 = FP workspace
- *  lr  = unrecognised FP instruction return address
- */
-
-	.pushsection .data
-	.align	2
-ENTRY(fp_enter)
-	.word	no_fp
-	.popsection
-
-ENTRY(no_fp)
-	ret	lr
-ENDPROC(no_fp)
-
-__und_usr_fault_32:
-	mov	r1, #4
-	b	1f
-__und_usr_fault_16_pan:
-	uaccess_disable ip
-__und_usr_fault_16:
-	mov	r1, #2
-1:	mov	r0, sp
-	badr	lr, ret_from_exception
-	b	__und_fault
-ENDPROC(__und_usr_fault_32)
-ENDPROC(__und_usr_fault_16)
-
-	.align	5
-__pabt_usr:
-	usr_entry
-	mov	r2, sp				@ regs
-	pabt_helper
- UNWIND(.fnend		)
-	/* fall through */
-/*
- * This is the return code to user mode for abort handlers
- */
-ENTRY(ret_from_exception)
- UNWIND(.fnstart	)
- UNWIND(.cantunwind	)
-	get_thread_info tsk
-	mov	why, #0
-	b	ret_to_user
- UNWIND(.fnend		)
-ENDPROC(__pabt_usr)
-ENDPROC(ret_from_exception)
-
-	.align	5
-__fiq_usr:
-	usr_entry trace=0
-	kuser_cmpxchg_check
-	mov	r0, sp				@ struct pt_regs *regs
-	bl	handle_fiq_as_nmi
-	get_thread_info tsk
-	restore_user_regs fast = 0, offset = 0
- UNWIND(.fnend		)
-ENDPROC(__fiq_usr)
-
-/*
- * Register switch for ARMv3 and ARMv4 processors
- * r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info
- * previous and next are guaranteed not to be the same.
- */
-ENTRY(__switch_to)
- UNWIND(.fnstart	)
- UNWIND(.cantunwind	)
-	add	ip, r1, #TI_CPU_SAVE
- ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
- THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
- THUMB(	str	sp, [ip], #4		   )
- THUMB(	str	lr, [ip], #4		   )
-	ldr	r4, [r2, #TI_TP_VALUE]
-	ldr	r5, [r2, #TI_TP_VALUE + 4]
-#ifdef CONFIG_CPU_USE_DOMAINS
-	mrc	p15, 0, r6, c3, c0, 0		@ Get domain register
-	str	r6, [r1, #TI_CPU_DOMAIN]	@ Save old domain register
-	ldr	r6, [r2, #TI_CPU_DOMAIN]
-#endif
-	switch_tls r1, r4, r5, r3, r7
-#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP)
-	ldr	r7, [r2, #TI_TASK]
-	ldr	r8, =__stack_chk_guard
-	.if (TSK_STACK_CANARY > IMM12_MASK)
-	add	r7, r7, #TSK_STACK_CANARY & ~IMM12_MASK
-	.endif
-	ldr	r7, [r7, #TSK_STACK_CANARY & IMM12_MASK]
-#endif
-#ifdef CONFIG_CPU_USE_DOMAINS
-	mcr	p15, 0, r6, c3, c0, 0		@ Set domain register
-#endif
-	mov	r5, r0
-	add	r4, r2, #TI_CPU_SAVE
-	ldr	r0, =thread_notify_head
-	mov	r1, #THREAD_NOTIFY_SWITCH
-	bl	atomic_notifier_call_chain
-#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP)
-	str	r7, [r8]
-#endif
- THUMB(	mov	ip, r4			   )
-	mov	r0, r5
- ARM(	ldmia	r4, {r4 - sl, fp, sp, pc}  )	@ Load all regs saved previously
- THUMB(	ldmia	ip!, {r4 - sl, fp}	   )	@ Load all regs saved previously
- THUMB(	ldr	sp, [ip], #4		   )
- THUMB(	ldr	pc, [ip]		   )
- UNWIND(.fnend		)
-ENDPROC(__switch_to)
-
-	__INIT
-
-/*
- * User helpers.
- *
- * Each segment is 32-byte aligned and will be moved to the top of the high
- * vector page.  New segments (if ever needed) must be added in front of
- * existing ones.  This mechanism should be used only for things that are
- * really small and justified, and not be abused freely.
- *
- * See Documentation/arm/kernel_user_helpers.rst for formal definitions.
- */
- THUMB(	.arm	)
-
-	.macro	usr_ret, reg
-#ifdef CONFIG_ARM_THUMB
-	bx	\reg
-#else
-	ret	\reg
-#endif
-	.endm
-
-	.macro	kuser_pad, sym, size
-	.if	(. - \sym) & 3
-	.rept	4 - (. - \sym) & 3
-	.byte	0
-	.endr
-	.endif
-	.rept	(\size - (. - \sym)) / 4
-	.word	0xe7fddef1
-	.endr
-	.endm
-
-#ifdef CONFIG_KUSER_HELPERS
-	.align	5
-	.globl	__kuser_helper_start
-__kuser_helper_start:
-
-/*
- * Due to the length of some sequences, __kuser_cmpxchg64 spans 2 regular
- * kuser "slots", therefore 0xffff0f80 is not used as a valid entry point.
- */
-
-__kuser_cmpxchg64:				@ 0xffff0f60
-
-#if defined(CONFIG_CPU_32v6K)
-
-	stmfd	sp!, {r4, r5, r6, r7}
-	ldrd	r4, r5, [r0]			@ load old val
-	ldrd	r6, r7, [r1]			@ load new val
-	smp_dmb	arm
-1:	ldrexd	r0, r1, [r2]			@ load current val
-	eors	r3, r0, r4			@ compare with oldval (1)
-	eorseq	r3, r1, r5			@ compare with oldval (2)
-	strexdeq r3, r6, r7, [r2]		@ store newval if eq
-	teqeq	r3, #1				@ success?
-	beq	1b				@ if no then retry
-	smp_dmb	arm
-	rsbs	r0, r3, #0			@ set returned val and C flag
-	ldmfd	sp!, {r4, r5, r6, r7}
-	usr_ret	lr
-
-#elif !defined(CONFIG_SMP)
-
-#ifdef CONFIG_MMU
-
-	/*
-	 * The only thing that can break atomicity in this cmpxchg64
-	 * implementation is either an IRQ or a data abort exception
-	 * causing another process/thread to be scheduled in the middle of
-	 * the critical sequence.  The same strategy as for cmpxchg is used.
-	 */
-	stmfd	sp!, {r4, r5, r6, lr}
-	ldmia	r0, {r4, r5}			@ load old val
-	ldmia	r1, {r6, lr}			@ load new val
-1:	ldmia	r2, {r0, r1}			@ load current val
-	eors	r3, r0, r4			@ compare with oldval (1)
-	eorseq	r3, r1, r5			@ compare with oldval (2)
-2:	stmiaeq	r2, {r6, lr}			@ store newval if eq
-	rsbs	r0, r3, #0			@ set return val and C flag
-	ldmfd	sp!, {r4, r5, r6, pc}
-
-	.text
-kuser_cmpxchg64_fixup:
-	@ Called from kuser_cmpxchg_fixup.
-	@ r4 = address of interrupted insn (must be preserved).
-	@ sp = saved regs. r7 and r8 are clobbered.
-	@ 1b = first critical insn, 2b = last critical insn.
-	@ If r4 >= 1b and r4 <= 2b then saved pc_usr is set to 1b.
-	mov	r7, #0xffff0fff
-	sub	r7, r7, #(0xffff0fff - (0xffff0f60 + (1b - __kuser_cmpxchg64)))
-	subs	r8, r4, r7
-	rsbscs	r8, r8, #(2b - 1b)
-	strcs	r7, [sp, #S_PC]
-#if __LINUX_ARM_ARCH__ < 6
-	bcc	kuser_cmpxchg32_fixup
-#endif
-	ret	lr
-	.previous
-
-#else
-#warning "NPTL on non MMU needs fixing"
-	mov	r0, #-1
-	adds	r0, r0, #0
-	usr_ret	lr
-#endif
-
-#else
-#error "incoherent kernel configuration"
-#endif
-
-	kuser_pad __kuser_cmpxchg64, 64
-
-__kuser_memory_barrier:				@ 0xffff0fa0
-	smp_dmb	arm
-	usr_ret	lr
-
-	kuser_pad __kuser_memory_barrier, 32
-
-__kuser_cmpxchg:				@ 0xffff0fc0
-
-#if __LINUX_ARM_ARCH__ < 6
-
-#ifdef CONFIG_MMU
-
-	/*
-	 * The only thing that can break atomicity in this cmpxchg
-	 * implementation is either an IRQ or a data abort exception
-	 * causing another process/thread to be scheduled in the middle
-	 * of the critical sequence.  To prevent this, code is added to
-	 * the IRQ and data abort exception handlers to set the pc back
-	 * to the beginning of the critical section if it is found to be
-	 * within that critical section (see kuser_cmpxchg_fixup).
-	 */
-1:	ldr	r3, [r2]			@ load current val
-	subs	r3, r3, r0			@ compare with oldval
-2:	streq	r1, [r2]			@ store newval if eq
-	rsbs	r0, r3, #0			@ set return val and C flag
-	usr_ret	lr
-
-	.text
-kuser_cmpxchg32_fixup:
-	@ Called from kuser_cmpxchg_check macro.
-	@ r4 = address of interrupted insn (must be preserved).
-	@ sp = saved regs. r7 and r8 are clobbered.
-	@ 1b = first critical insn, 2b = last critical insn.
-	@ If r4 >= 1b and r4 <= 2b then saved pc_usr is set to 1b.
-	mov	r7, #0xffff0fff
-	sub	r7, r7, #(0xffff0fff - (0xffff0fc0 + (1b - __kuser_cmpxchg)))
-	subs	r8, r4, r7
-	rsbscs	r8, r8, #(2b - 1b)
-	strcs	r7, [sp, #S_PC]
-	ret	lr
-	.previous
-
-#else
-#warning "NPTL on non MMU needs fixing"
-	mov	r0, #-1
-	adds	r0, r0, #0
-	usr_ret	lr
-#endif
-
-#else
-
-	smp_dmb	arm
-1:	ldrex	r3, [r2]
-	subs	r3, r3, r0
-	strexeq	r3, r1, [r2]
-	teqeq	r3, #1
-	beq	1b
-	rsbs	r0, r3, #0
-	/* beware -- each __kuser slot must be 8 instructions max */
-	ALT_SMP(b	__kuser_memory_barrier)
-	ALT_UP(usr_ret	lr)
-
-#endif
-
-	kuser_pad __kuser_cmpxchg, 32
-
-__kuser_get_tls:				@ 0xffff0fe0
-	ldr	r0, [pc, #(16 - 8)]	@ read TLS, set in kuser_get_tls_init
-	usr_ret	lr
-	mrc	p15, 0, r0, c13, c0, 3	@ 0xffff0fe8 hardware TLS code
-	kuser_pad __kuser_get_tls, 16
-	.rep	3
-	.word	0			@ 0xffff0ff0 software TLS value, then
-	.endr				@ pad up to __kuser_helper_version
-
-__kuser_helper_version:				@ 0xffff0ffc
-	.word	((__kuser_helper_end - __kuser_helper_start) >> 5)
-
-	.globl	__kuser_helper_end
-__kuser_helper_end:
-
-#endif
-
- THUMB(	.thumb	)
-
-/*
- * Vector stubs.
- *
- * This code is copied to 0xffff1000 so we can use branches in the
- * vectors, rather than ldr's.  Note that this code must not exceed
- * a page size.
- *
- * Common stub entry macro:
- *   Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
- *
- * SP points to a minimal amount of processor-private memory, the address
- * of which is copied into r0 for the mode specific abort handler.
- */
-	.macro	vector_stub, name, mode, correction=0
-	.align	5
-
-vector_\name:
-	.if \correction
-	sub	lr, lr, #\correction
-	.endif
-
-	@
-	@ Save r0, lr_<exception> (parent PC) and spsr_<exception>
-	@ (parent CPSR)
-	@
-	stmia	sp, {r0, lr}		@ save r0, lr
-	mrs	lr, spsr
-	str	lr, [sp, #8]		@ save spsr
-
-	@
-	@ Prepare for SVC32 mode.  IRQs remain disabled.
-	@
-	mrs	r0, cpsr
-	eor	r0, r0, #(\mode ^ SVC_MODE | PSR_ISETSTATE)
-	msr	spsr_cxsf, r0
-
-	@
-	@ the branch table must immediately follow this code
-	@
-	and	lr, lr, #0x0f
- THUMB(	adr	r0, 1f			)
- THUMB(	ldr	lr, [r0, lr, lsl #2]	)
-	mov	r0, sp
- ARM(	ldr	lr, [pc, lr, lsl #2]	)
-	movs	pc, lr			@ branch to handler in SVC mode
-ENDPROC(vector_\name)
-
-	.align	2
-	@ handler addresses follow this label
-1:
-	.endm
-
-	.section .stubs, "ax", %progbits
-	@ This must be the first word
-	.word	vector_swi
-
-vector_rst:
- ARM(	swi	SYS_ERROR0	)
- THUMB(	svc	#0		)
- THUMB(	nop			)
-	b	vector_und
-
-/*
- * Interrupt dispatcher
- */
-	vector_stub	irq, IRQ_MODE, 4
-
-	.long	__irq_usr			@  0  (USR_26 / USR_32)
-	.long	__irq_invalid			@  1  (FIQ_26 / FIQ_32)
-	.long	__irq_invalid			@  2  (IRQ_26 / IRQ_32)
-	.long	__irq_svc			@  3  (SVC_26 / SVC_32)
-	.long	__irq_invalid			@  4
-	.long	__irq_invalid			@  5
-	.long	__irq_invalid			@  6
-	.long	__irq_invalid			@  7
-	.long	__irq_invalid			@  8
-	.long	__irq_invalid			@  9
-	.long	__irq_invalid			@  a
-	.long	__irq_invalid			@  b
-	.long	__irq_invalid			@  c
-	.long	__irq_invalid			@  d
-	.long	__irq_invalid			@  e
-	.long	__irq_invalid			@  f
-
-/*
- * Data abort dispatcher
- * Enter in ABT mode, spsr = USR CPSR, lr = USR PC
- */
-	vector_stub	dabt, ABT_MODE, 8
-
-	.long	__dabt_usr			@  0  (USR_26 / USR_32)
-	.long	__dabt_invalid			@  1  (FIQ_26 / FIQ_32)
-	.long	__dabt_invalid			@  2  (IRQ_26 / IRQ_32)
-	.long	__dabt_svc			@  3  (SVC_26 / SVC_32)
-	.long	__dabt_invalid			@  4
-	.long	__dabt_invalid			@  5
-	.long	__dabt_invalid			@  6
-	.long	__dabt_invalid			@  7
-	.long	__dabt_invalid			@  8
-	.long	__dabt_invalid			@  9
-	.long	__dabt_invalid			@  a
-	.long	__dabt_invalid			@  b
-	.long	__dabt_invalid			@  c
-	.long	__dabt_invalid			@  d
-	.long	__dabt_invalid			@  e
-	.long	__dabt_invalid			@  f
-
-/*
- * Prefetch abort dispatcher
- * Enter in ABT mode, spsr = USR CPSR, lr = USR PC
- */
-	vector_stub	pabt, ABT_MODE, 4
-
-	.long	__pabt_usr			@  0 (USR_26 / USR_32)
-	.long	__pabt_invalid			@  1 (FIQ_26 / FIQ_32)
-	.long	__pabt_invalid			@  2 (IRQ_26 / IRQ_32)
-	.long	__pabt_svc			@  3 (SVC_26 / SVC_32)
-	.long	__pabt_invalid			@  4
-	.long	__pabt_invalid			@  5
-	.long	__pabt_invalid			@  6
-	.long	__pabt_invalid			@  7
-	.long	__pabt_invalid			@  8
-	.long	__pabt_invalid			@  9
-	.long	__pabt_invalid			@  a
-	.long	__pabt_invalid			@  b
-	.long	__pabt_invalid			@  c
-	.long	__pabt_invalid			@  d
-	.long	__pabt_invalid			@  e
-	.long	__pabt_invalid			@  f
-
-/*
- * Undef instr entry dispatcher
- * Enter in UND mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
- */
-	vector_stub	und, UND_MODE
-
-	.long	__und_usr			@  0 (USR_26 / USR_32)
-	.long	__und_invalid			@  1 (FIQ_26 / FIQ_32)
-	.long	__und_invalid			@  2 (IRQ_26 / IRQ_32)
-	.long	__und_svc			@  3 (SVC_26 / SVC_32)
-	.long	__und_invalid			@  4
-	.long	__und_invalid			@  5
-	.long	__und_invalid			@  6
-	.long	__und_invalid			@  7
-	.long	__und_invalid			@  8
-	.long	__und_invalid			@  9
-	.long	__und_invalid			@  a
-	.long	__und_invalid			@  b
-	.long	__und_invalid			@  c
-	.long	__und_invalid			@  d
-	.long	__und_invalid			@  e
-	.long	__und_invalid			@  f
-
-	.align	5
-
-/*=============================================================================
- * Address exception handler
- *-----------------------------------------------------------------------------
- * These aren't too critical.
- * (they're not supposed to happen, and won't happen in 32-bit data mode).
- */
-
-vector_addrexcptn:
-	b	vector_addrexcptn
-
-/*=============================================================================
- * FIQ "NMI" handler
- *-----------------------------------------------------------------------------
- * Handle a FIQ using the SVC stack allowing FIQ act like NMI on x86
- * systems.
- */
-	vector_stub	fiq, FIQ_MODE, 4
-
-	.long	__fiq_usr			@  0  (USR_26 / USR_32)
-	.long	__fiq_svc			@  1  (FIQ_26 / FIQ_32)
-	.long	__fiq_svc			@  2  (IRQ_26 / IRQ_32)
-	.long	__fiq_svc			@  3  (SVC_26 / SVC_32)
-	.long	__fiq_svc			@  4
-	.long	__fiq_svc			@  5
-	.long	__fiq_svc			@  6
-	.long	__fiq_abt			@  7
-	.long	__fiq_svc			@  8
-	.long	__fiq_svc			@  9
-	.long	__fiq_svc			@  a
-	.long	__fiq_svc			@  b
-	.long	__fiq_svc			@  c
-	.long	__fiq_svc			@  d
-	.long	__fiq_svc			@  e
-	.long	__fiq_svc			@  f
-
-	.globl	vector_fiq
-
-	.section .vectors, "ax", %progbits
-.L__vectors_start:
-	W(b)	vector_rst
-	W(b)	vector_und
-	W(ldr)	pc, .L__vectors_start + 0x1000
-	W(b)	vector_pabt
-	W(b)	vector_dabt
-	W(b)	vector_addrexcptn
-	W(b)	vector_irq
-	W(b)	vector_fiq
-
-	.data
-	.align	2
-
-	.globl	cr_alignment
-cr_alignment:
-	.space	4
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
deleted file mode 100644
index 271cb8a1eba1eefe4469db9ae5a677385fa8c2cc..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/entry-common.S
+++ /dev/null
@@ -1,460 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/kernel/entry-common.S
- *
- *  Copyright (C) 2000 Russell King
- */
-
-#include <asm/assembler.h>
-#include <asm/unistd.h>
-#include <asm/ftrace.h>
-#include <asm/unwind.h>
-#include <asm/memory.h>
-#ifdef CONFIG_AEABI
-#include <asm/unistd-oabi.h>
-#endif
-
-	.equ	NR_syscalls, __NR_syscalls
-
-#ifdef CONFIG_NEED_RET_TO_USER
-#include <mach/entry-macro.S>
-#else
-	.macro  arch_ret_to_user, tmp1, tmp2
-	.endm
-#endif
-
-#include "entry-header.S"
-
-saved_psr	.req	r8
-#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING)
-saved_pc	.req	r9
-#define TRACE(x...) x
-#else
-saved_pc	.req	lr
-#define TRACE(x...)
-#endif
-
-	.section .entry.text,"ax",%progbits
-	.align	5
-#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING) || \
-	IS_ENABLED(CONFIG_DEBUG_RSEQ))
-/*
- * This is the fast syscall return path.  We do as little as possible here,
- * such as avoiding writing r0 to the stack.  We only use this path if we
- * have tracing, context tracking and rseq debug disabled - the overheads
- * from those features make this path too inefficient.
- */
-ret_fast_syscall:
-__ret_fast_syscall:
- UNWIND(.fnstart	)
- UNWIND(.cantunwind	)
-	disable_irq_notrace			@ disable interrupts
-	ldr	r2, [tsk, #TI_ADDR_LIMIT]
-	cmp	r2, #TASK_SIZE
-	blne	addr_limit_check_failed
-	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
-	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
-	bne	fast_work_pending
-
-
-	/* perform architecture specific actions before user return */
-	arch_ret_to_user r1, lr
-
-	restore_user_regs fast = 1, offset = S_OFF
- UNWIND(.fnend		)
-ENDPROC(ret_fast_syscall)
-
-	/* Ok, we need to do extra processing, enter the slow path. */
-fast_work_pending:
-	str	r0, [sp, #S_R0+S_OFF]!		@ returned r0
-	/* fall through to work_pending */
-#else
-/*
- * The "replacement" ret_fast_syscall for when tracing, context tracking,
- * or rseq debug is enabled.  As we will need to call out to some C functions,
- * we save r0 first to avoid needing to save registers around each C function
- * call.
- */
-ret_fast_syscall:
-__ret_fast_syscall:
- UNWIND(.fnstart	)
- UNWIND(.cantunwind	)
-	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
-#if IS_ENABLED(CONFIG_DEBUG_RSEQ)
-	/* do_rseq_syscall needs interrupts enabled. */
-	mov	r0, sp				@ 'regs'
-	bl	do_rseq_syscall
-#endif
-	disable_irq_notrace			@ disable interrupts
-	ldr	r2, [tsk, #TI_ADDR_LIMIT]
-	cmp	r2, #TASK_SIZE
-	blne	addr_limit_check_failed
-	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
-	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
-	beq	no_work_pending
- UNWIND(.fnend		)
-ENDPROC(ret_fast_syscall)
-
-	/* Slower path - fall through to work_pending */
-#endif
-
-	tst	r1, #_TIF_SYSCALL_WORK
-	bne	__sys_trace_return_nosave
-slow_work_pending:
-	mov	r0, sp				@ 'regs'
-	mov	r2, why				@ 'syscall'
-	bl	do_work_pending
-	cmp	r0, #0
-	beq	no_work_pending
-	movlt	scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
-	ldmia	sp, {r0 - r6}			@ have to reload r0 - r6
-	b	local_restart			@ ... and off we go
-ENDPROC(ret_fast_syscall)
-
-/*
- * "slow" syscall return path.  "why" tells us if this was a real syscall.
- * IRQs may be enabled here, so always disable them.  Note that we use the
- * "notrace" version to avoid calling into the tracing code unnecessarily.
- * do_work_pending() will update this state if necessary.
- */
-ENTRY(ret_to_user)
-ret_slow_syscall:
-#if IS_ENABLED(CONFIG_DEBUG_RSEQ)
-	/* do_rseq_syscall needs interrupts enabled. */
-	enable_irq_notrace			@ enable interrupts
-	mov	r0, sp				@ 'regs'
-	bl	do_rseq_syscall
-#endif
-	disable_irq_notrace			@ disable interrupts
-ENTRY(ret_to_user_from_irq)
-	ldr	r2, [tsk, #TI_ADDR_LIMIT]
-	cmp	r2, #TASK_SIZE
-	blne	addr_limit_check_failed
-	ldr	r1, [tsk, #TI_FLAGS]
-	tst	r1, #_TIF_WORK_MASK
-	bne	slow_work_pending
-no_work_pending:
-	asm_trace_hardirqs_on save = 0
-
-	/* perform architecture specific actions before user return */
-	arch_ret_to_user r1, lr
-	ct_user_enter save = 0
-
-	restore_user_regs fast = 0, offset = 0
-ENDPROC(ret_to_user_from_irq)
-ENDPROC(ret_to_user)
-
-/*
- * This is how we return from a fork.
- */
-ENTRY(ret_from_fork)
-	bl	schedule_tail
-	cmp	r5, #0
-	movne	r0, r4
-	badrne	lr, 1f
-	retne	r5
-1:	get_thread_info tsk
-	b	ret_slow_syscall
-ENDPROC(ret_from_fork)
-
-/*=============================================================================
- * SWI handler
- *-----------------------------------------------------------------------------
- */
-
-	.align	5
-ENTRY(vector_swi)
-#ifdef CONFIG_CPU_V7M
-	v7m_exception_entry
-#else
-	sub	sp, sp, #PT_REGS_SIZE
-	stmia	sp, {r0 - r12}			@ Calling r0 - r12
- ARM(	add	r8, sp, #S_PC		)
- ARM(	stmdb	r8, {sp, lr}^		)	@ Calling sp, lr
- THUMB(	mov	r8, sp			)
- THUMB(	store_user_sp_lr r8, r10, S_SP	)	@ calling sp, lr
-	mrs	saved_psr, spsr			@ called from non-FIQ mode, so ok.
- TRACE(	mov	saved_pc, lr		)
-	str	saved_pc, [sp, #S_PC]		@ Save calling PC
-	str	saved_psr, [sp, #S_PSR]		@ Save CPSR
-	str	r0, [sp, #S_OLD_R0]		@ Save OLD_R0
-#endif
-	zero_fp
-	alignment_trap r10, ip, __cr_alignment
-	asm_trace_hardirqs_on save=0
-	enable_irq_notrace
-	ct_user_exit save=0
-
-	/*
-	 * Get the system call number.
-	 */
-
-#if defined(CONFIG_OABI_COMPAT)
-
-	/*
-	 * If we have CONFIG_OABI_COMPAT then we need to look at the swi
-	 * value to determine if it is an EABI or an old ABI call.
-	 */
-#ifdef CONFIG_ARM_THUMB
-	tst	saved_psr, #PSR_T_BIT
-	movne	r10, #0				@ no thumb OABI emulation
- USER(	ldreq	r10, [saved_pc, #-4]	)	@ get SWI instruction
-#else
- USER(	ldr	r10, [saved_pc, #-4]	)	@ get SWI instruction
-#endif
- ARM_BE8(rev	r10, r10)			@ little endian instruction
-
-#elif defined(CONFIG_AEABI)
-
-	/*
-	 * Pure EABI user space always put syscall number into scno (r7).
-	 */
-#elif defined(CONFIG_ARM_THUMB)
-	/* Legacy ABI only, possibly thumb mode. */
-	tst	saved_psr, #PSR_T_BIT		@ this is SPSR from save_user_regs
-	addne	scno, r7, #__NR_SYSCALL_BASE	@ put OS number in
- USER(	ldreq	scno, [saved_pc, #-4]	)
-
-#else
-	/* Legacy ABI only. */
- USER(	ldr	scno, [saved_pc, #-4]	)	@ get SWI instruction
-#endif
-
-	/* saved_psr and saved_pc are now dead */
-
-	uaccess_disable tbl
-
-	adr	tbl, sys_call_table		@ load syscall table pointer
-
-#if defined(CONFIG_OABI_COMPAT)
-	/*
-	 * If the swi argument is zero, this is an EABI call and we do nothing.
-	 *
-	 * If this is an old ABI call, get the syscall number into scno and
-	 * get the old ABI syscall table address.
-	 */
-	bics	r10, r10, #0xff000000
-	eorne	scno, r10, #__NR_OABI_SYSCALL_BASE
-	ldrne	tbl, =sys_oabi_call_table
-#elif !defined(CONFIG_AEABI)
-	bic	scno, scno, #0xff000000		@ mask off SWI op-code
-	eor	scno, scno, #__NR_SYSCALL_BASE	@ check OS number
-#endif
-	get_thread_info tsk
-	/*
-	 * Reload the registers that may have been corrupted on entry to
-	 * the syscall assembly (by tracing or context tracking.)
-	 */
- TRACE(	ldmia	sp, {r0 - r3}		)
-
-local_restart:
-	ldr	r10, [tsk, #TI_FLAGS]		@ check for syscall tracing
-	stmdb	sp!, {r4, r5}			@ push fifth and sixth args
-
-	tst	r10, #_TIF_SYSCALL_WORK		@ are we tracing syscalls?
-	bne	__sys_trace
-
-	invoke_syscall tbl, scno, r10, __ret_fast_syscall
-
-	add	r1, sp, #S_OFF
-2:	cmp	scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
-	eor	r0, scno, #__NR_SYSCALL_BASE	@ put OS number back
-	bcs	arm_syscall
-	mov	why, #0				@ no longer a real syscall
-	b	sys_ni_syscall			@ not private func
-
-#if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI)
-	/*
-	 * We failed to handle a fault trying to access the page
-	 * containing the swi instruction, but we're not really in a
-	 * position to return -EFAULT. Instead, return back to the
-	 * instruction and re-enter the user fault handling path trying
-	 * to page it in. This will likely result in sending SEGV to the
-	 * current task.
-	 */
-9001:
-	sub	lr, saved_pc, #4
-	str	lr, [sp, #S_PC]
-	get_thread_info tsk
-	b	ret_fast_syscall
-#endif
-ENDPROC(vector_swi)
-
-	/*
-	 * This is the really slow path.  We're going to be doing
-	 * context switches, and waiting for our parent to respond.
-	 */
-__sys_trace:
-	mov	r1, scno
-	add	r0, sp, #S_OFF
-	bl	syscall_trace_enter
-	mov	scno, r0
-	invoke_syscall tbl, scno, r10, __sys_trace_return, reload=1
-	cmp	scno, #-1			@ skip the syscall?
-	bne	2b
-	add	sp, sp, #S_OFF			@ restore stack
-
-__sys_trace_return_nosave:
-	enable_irq_notrace
-	mov	r0, sp
-	bl	syscall_trace_exit
-	b	ret_slow_syscall
-
-__sys_trace_return:
-	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
-	mov	r0, sp
-	bl	syscall_trace_exit
-	b	ret_slow_syscall
-
-	.align	5
-#ifdef CONFIG_ALIGNMENT_TRAP
-	.type	__cr_alignment, #object
-__cr_alignment:
-	.word	cr_alignment
-#endif
-	.ltorg
-
-	.macro	syscall_table_start, sym
-	.equ	__sys_nr, 0
-	.type	\sym, #object
-ENTRY(\sym)
-	.endm
-
-	.macro	syscall, nr, func
-	.ifgt	__sys_nr - \nr
-	.error	"Duplicated/unorded system call entry"
-	.endif
-	.rept	\nr - __sys_nr
-	.long	sys_ni_syscall
-	.endr
-	.long	\func
-	.equ	__sys_nr, \nr + 1
-	.endm
-
-	.macro	syscall_table_end, sym
-	.ifgt	__sys_nr - __NR_syscalls
-	.error	"System call table too big"
-	.endif
-	.rept	__NR_syscalls - __sys_nr
-	.long	sys_ni_syscall
-	.endr
-	.size	\sym, . - \sym
-	.endm
-
-#define NATIVE(nr, func) syscall nr, func
-
-/*
- * This is the syscall table declaration for native ABI syscalls.
- * With EABI a couple syscalls are obsolete and defined as sys_ni_syscall.
- */
-	syscall_table_start sys_call_table
-#define COMPAT(nr, native, compat) syscall nr, native
-#ifdef CONFIG_AEABI
-#include <calls-eabi.S>
-#else
-#include <calls-oabi.S>
-#endif
-#undef COMPAT
-	syscall_table_end sys_call_table
-
-/*============================================================================
- * Special system call wrappers
- */
-@ r0 = syscall number
-@ r8 = syscall table
-sys_syscall:
-		bic	scno, r0, #__NR_OABI_SYSCALL_BASE
-		cmp	scno, #__NR_syscall - __NR_SYSCALL_BASE
-		cmpne	scno, #NR_syscalls	@ check range
-#ifdef CONFIG_CPU_SPECTRE
-		movhs	scno, #0
-		csdb
-#endif
-		stmialo	sp, {r5, r6}		@ shuffle args
-		movlo	r0, r1
-		movlo	r1, r2
-		movlo	r2, r3
-		movlo	r3, r4
-		ldrlo	pc, [tbl, scno, lsl #2]
-		b	sys_ni_syscall
-ENDPROC(sys_syscall)
-
-sys_sigreturn_wrapper:
-		add	r0, sp, #S_OFF
-		mov	why, #0		@ prevent syscall restart handling
-		b	sys_sigreturn
-ENDPROC(sys_sigreturn_wrapper)
-
-sys_rt_sigreturn_wrapper:
-		add	r0, sp, #S_OFF
-		mov	why, #0		@ prevent syscall restart handling
-		b	sys_rt_sigreturn
-ENDPROC(sys_rt_sigreturn_wrapper)
-
-sys_statfs64_wrapper:
-		teq	r1, #88
-		moveq	r1, #84
-		b	sys_statfs64
-ENDPROC(sys_statfs64_wrapper)
-
-sys_fstatfs64_wrapper:
-		teq	r1, #88
-		moveq	r1, #84
-		b	sys_fstatfs64
-ENDPROC(sys_fstatfs64_wrapper)
-
-/*
- * Note: off_4k (r5) is always units of 4K.  If we can't do the requested
- * offset, we return EINVAL.
- */
-sys_mmap2:
-		str	r5, [sp, #4]
-		b	sys_mmap_pgoff
-ENDPROC(sys_mmap2)
-
-#ifdef CONFIG_OABI_COMPAT
-
-/*
- * These are syscalls with argument register differences
- */
-
-sys_oabi_pread64:
-		stmia	sp, {r3, r4}
-		b	sys_pread64
-ENDPROC(sys_oabi_pread64)
-
-sys_oabi_pwrite64:
-		stmia	sp, {r3, r4}
-		b	sys_pwrite64
-ENDPROC(sys_oabi_pwrite64)
-
-sys_oabi_truncate64:
-		mov	r3, r2
-		mov	r2, r1
-		b	sys_truncate64
-ENDPROC(sys_oabi_truncate64)
-
-sys_oabi_ftruncate64:
-		mov	r3, r2
-		mov	r2, r1
-		b	sys_ftruncate64
-ENDPROC(sys_oabi_ftruncate64)
-
-sys_oabi_readahead:
-		str	r3, [sp]
-		mov	r3, r2
-		mov	r2, r1
-		b	sys_readahead
-ENDPROC(sys_oabi_readahead)
-
-/*
- * Let's declare a second syscall table for old ABI binaries
- * using the compatibility syscall entries.
- */
-	syscall_table_start sys_oabi_call_table
-#define COMPAT(nr, native, compat) syscall nr, compat
-#include <calls-oabi.S>
-	syscall_table_end sys_oabi_call_table
-
-#endif
-
diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S
deleted file mode 100644
index a74289ebc803699955155b4f31bd387c8f23b9bd..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/entry-ftrace.S
+++ /dev/null
@@ -1,272 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-
-#include <asm/assembler.h>
-#include <asm/ftrace.h>
-#include <asm/unwind.h>
-
-#include "entry-header.S"
-
-/*
- * When compiling with -pg, gcc inserts a call to the mcount routine at the
- * start of every function.  In mcount, apart from the function's address (in
- * lr), we need to get hold of the function's caller's address.
- *
- * Newer GCCs (4.4+) solve this problem by using a version of mcount with call
- * sites like:
- *
- *	push	{lr}
- *	bl	__gnu_mcount_nc
- *
- * With these compilers, frame pointers are not necessary.
- *
- * mcount can be thought of as a function called in the middle of a subroutine
- * call.  As such, it needs to be transparent for both the caller and the
- * callee: the original lr needs to be restored when leaving mcount, and no
- * registers should be clobbered.  (In the __gnu_mcount_nc implementation, we
- * clobber the ip register.  This is OK because the ARM calling convention
- * allows it to be clobbered in subroutines and doesn't use it to hold
- * parameters.)
- *
- * When using dynamic ftrace, we patch out the mcount call by a "pop {lr}"
- * instead of the __gnu_mcount_nc call (see arch/arm/kernel/ftrace.c).
- */
-
-.macro mcount_adjust_addr rd, rn
-	bic	\rd, \rn, #1		@ clear the Thumb bit if present
-	sub	\rd, \rd, #MCOUNT_INSN_SIZE
-.endm
-
-.macro __mcount suffix
-	mcount_enter
-	ldr	r0, =ftrace_trace_function
-	ldr	r2, [r0]
-	adr	r0, .Lftrace_stub
-	cmp	r0, r2
-	bne	1f
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	ldr     r1, =ftrace_graph_return
-	ldr     r2, [r1]
-	cmp     r0, r2
-	bne     ftrace_graph_caller\suffix
-
-	ldr     r1, =ftrace_graph_entry
-	ldr     r2, [r1]
-	ldr     r0, =ftrace_graph_entry_stub
-	cmp     r0, r2
-	bne     ftrace_graph_caller\suffix
-#endif
-
-	mcount_exit
-
-1: 	mcount_get_lr	r1			@ lr of instrumented func
-	mcount_adjust_addr	r0, lr		@ instrumented function
-	badr	lr, 2f
-	mov	pc, r2
-2:	mcount_exit
-.endm
-
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-
-.macro __ftrace_regs_caller
-
-	sub	sp, sp, #8	@ space for PC and CPSR OLD_R0,
-				@ OLD_R0 will overwrite previous LR
-
-	add 	ip, sp, #12	@ move in IP the value of SP as it was
-				@ before the push {lr} of the mcount mechanism
-
-	str     lr, [sp, #0]    @ store LR instead of PC
-
-	ldr     lr, [sp, #8]    @ get previous LR
-
-	str	r0, [sp, #8]	@ write r0 as OLD_R0 over previous LR
-
-	stmdb   sp!, {ip, lr}
-	stmdb   sp!, {r0-r11, lr}
-
-	@ stack content at this point:
-	@ 0  4          48   52       56            60   64    68       72
-	@ R0 | R1 | ... | LR | SP + 4 | previous LR | LR | PSR | OLD_R0 |
-
-	mov r3, sp				@ struct pt_regs*
-
-	ldr r2, =function_trace_op
-	ldr r2, [r2]				@ pointer to the current
-						@ function tracing op
-
-	ldr	r1, [sp, #S_LR]			@ lr of instrumented func
-
-	ldr	lr, [sp, #S_PC]			@ get LR
-
-	mcount_adjust_addr	r0, lr		@ instrumented function
-
-	.globl ftrace_regs_call
-ftrace_regs_call:
-	bl	ftrace_stub
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	.globl ftrace_graph_regs_call
-ftrace_graph_regs_call:
-	mov	r0, r0
-#endif
-
-	@ pop saved regs
-	ldmia   sp!, {r0-r12}			@ restore r0 through r12
-	ldr	ip, [sp, #8]			@ restore PC
-	ldr	lr, [sp, #4]			@ restore LR
-	ldr	sp, [sp, #0]			@ restore SP
-	mov	pc, ip				@ return
-.endm
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.macro __ftrace_graph_regs_caller
-
-	sub     r0, fp, #4              @ lr of instrumented routine (parent)
-
-	@ called from __ftrace_regs_caller
-	ldr     r1, [sp, #S_PC]		@ instrumented routine (func)
-	mcount_adjust_addr	r1, r1
-
-	mov	r2, fp			@ frame pointer
-	bl	prepare_ftrace_return
-
-	@ pop registers saved in ftrace_regs_caller
-	ldmia   sp!, {r0-r12}			@ restore r0 through r12
-	ldr	ip, [sp, #8]			@ restore PC
-	ldr	lr, [sp, #4]			@ restore LR
-	ldr	sp, [sp, #0]			@ restore SP
-	mov	pc, ip				@ return
-
-.endm
-#endif
-#endif
-
-.macro __ftrace_caller suffix
-	mcount_enter
-
-	mcount_get_lr	r1			@ lr of instrumented func
-	mcount_adjust_addr	r0, lr		@ instrumented function
-
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-	ldr r2, =function_trace_op
-	ldr r2, [r2]				@ pointer to the current
-						@ function tracing op
-	mov r3, #0				@ regs is NULL
-#endif
-
-	.globl ftrace_call\suffix
-ftrace_call\suffix:
-	bl	ftrace_stub
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	.globl ftrace_graph_call\suffix
-ftrace_graph_call\suffix:
-	mov	r0, r0
-#endif
-
-	mcount_exit
-.endm
-
-.macro __ftrace_graph_caller
-	sub	r0, fp, #4		@ &lr of instrumented routine (&parent)
-#ifdef CONFIG_DYNAMIC_FTRACE
-	@ called from __ftrace_caller, saved in mcount_enter
-	ldr	r1, [sp, #16]		@ instrumented routine (func)
-	mcount_adjust_addr	r1, r1
-#else
-	@ called from __mcount, untouched in lr
-	mcount_adjust_addr	r1, lr	@ instrumented routine (func)
-#endif
-	mov	r2, fp			@ frame pointer
-	bl	prepare_ftrace_return
-	mcount_exit
-.endm
-
-/*
- * __gnu_mcount_nc
- */
-
-.macro mcount_enter
-/*
- * This pad compensates for the push {lr} at the call site.  Note that we are
- * unable to unwind through a function which does not otherwise save its lr.
- */
- UNWIND(.pad	#4)
-	stmdb	sp!, {r0-r3, lr}
- UNWIND(.save	{r0-r3, lr})
-.endm
-
-.macro mcount_get_lr reg
-	ldr	\reg, [sp, #20]
-.endm
-
-.macro mcount_exit
-	ldmia	sp!, {r0-r3, ip, lr}
-	ret	ip
-.endm
-
-ENTRY(__gnu_mcount_nc)
-UNWIND(.fnstart)
-#ifdef CONFIG_DYNAMIC_FTRACE
-	mov	ip, lr
-	ldmia	sp!, {lr}
-	ret	ip
-#else
-	__mcount
-#endif
-UNWIND(.fnend)
-ENDPROC(__gnu_mcount_nc)
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-ENTRY(ftrace_caller)
-UNWIND(.fnstart)
-	__ftrace_caller
-UNWIND(.fnend)
-ENDPROC(ftrace_caller)
-
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-ENTRY(ftrace_regs_caller)
-UNWIND(.fnstart)
-	__ftrace_regs_caller
-UNWIND(.fnend)
-ENDPROC(ftrace_regs_caller)
-#endif
-
-#endif
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
-UNWIND(.fnstart)
-	__ftrace_graph_caller
-UNWIND(.fnend)
-ENDPROC(ftrace_graph_caller)
-
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-ENTRY(ftrace_graph_regs_caller)
-UNWIND(.fnstart)
-	__ftrace_graph_regs_caller
-UNWIND(.fnend)
-ENDPROC(ftrace_graph_regs_caller)
-#endif
-#endif
-
-.purgem mcount_enter
-.purgem mcount_get_lr
-.purgem mcount_exit
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	.globl return_to_handler
-return_to_handler:
-	stmdb	sp!, {r0-r3}
-	mov	r0, fp			@ frame pointer
-	bl	ftrace_return_to_handler
-	mov	lr, r0			@ r0 has real ret addr
-	ldmia	sp!, {r0-r3}
-	ret	lr
-#endif
-
-ENTRY(ftrace_stub)
-.Lftrace_stub:
-	ret	lr
-ENDPROC(ftrace_stub)
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
deleted file mode 100644
index 40db0f9188b69e9e4323405c8c44e9a3fe153890..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/entry-header.S
+++ /dev/null
@@ -1,417 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/init.h>
-#include <linux/linkage.h>
-
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/errno.h>
-#include <asm/thread_info.h>
-#include <asm/uaccess-asm.h>
-#include <asm/v7m.h>
-
-@ Bad Abort numbers
-@ -----------------
-@
-#define BAD_PREFETCH	0
-#define BAD_DATA	1
-#define BAD_ADDREXCPTN	2
-#define BAD_IRQ		3
-#define BAD_UNDEFINSTR	4
-
-@
-@ Most of the stack format comes from struct pt_regs, but with
-@ the addition of 8 bytes for storing syscall args 5 and 6.
-@ This _must_ remain a multiple of 8 for EABI.
-@
-#define S_OFF		8
-
-/* 
- * The SWI code relies on the fact that R0 is at the bottom of the stack
- * (due to slow/fast restore user regs).
- */
-#if S_R0 != 0
-#error "Please fix"
-#endif
-
-	.macro	zero_fp
-#ifdef CONFIG_FRAME_POINTER
-	mov	fp, #0
-#endif
-	.endm
-
-#ifdef CONFIG_ALIGNMENT_TRAP
-#define ATRAP(x...) x
-#else
-#define ATRAP(x...)
-#endif
-
-	.macro	alignment_trap, rtmp1, rtmp2, label
-#ifdef CONFIG_ALIGNMENT_TRAP
-	mrc	p15, 0, \rtmp2, c1, c0, 0
-	ldr	\rtmp1, \label
-	ldr	\rtmp1, [\rtmp1]
-	teq	\rtmp1, \rtmp2
-	mcrne	p15, 0, \rtmp1, c1, c0, 0
-#endif
-	.endm
-
-#ifdef CONFIG_CPU_V7M
-/*
- * ARMv7-M exception entry/exit macros.
- *
- * xPSR, ReturnAddress(), LR (R14), R12, R3, R2, R1, and R0 are
- * automatically saved on the current stack (32 words) before
- * switching to the exception stack (SP_main).
- *
- * If exception is taken while in user mode, SP_main is
- * empty. Otherwise, SP_main is aligned to 64 bit automatically
- * (CCR.STKALIGN set).
- *
- * Linux assumes that the interrupts are disabled when entering an
- * exception handler and it may BUG if this is not the case. Interrupts
- * are disabled during entry and reenabled in the exit macro.
- *
- * v7m_exception_slow_exit is used when returning from SVC or PendSV.
- * When returning to kernel mode, we don't return from exception.
- */
-	.macro	v7m_exception_entry
-	@ determine the location of the registers saved by the core during
-	@ exception entry. Depending on the mode the cpu was in when the
-	@ exception happend that is either on the main or the process stack.
-	@ Bit 2 of EXC_RETURN stored in the lr register specifies which stack
-	@ was used.
-	tst	lr, #EXC_RET_STACK_MASK
-	mrsne	r12, psp
-	moveq	r12, sp
-
-	@ we cannot rely on r0-r3 and r12 matching the value saved in the
-	@ exception frame because of tail-chaining. So these have to be
-	@ reloaded.
-	ldmia	r12!, {r0-r3}
-
-	@ Linux expects to have irqs off. Do it here before taking stack space
-	cpsid	i
-
-	sub	sp, #PT_REGS_SIZE-S_IP
-	stmdb	sp!, {r0-r11}
-
-	@ load saved r12, lr, return address and xPSR.
-	@ r0-r7 are used for signals and never touched from now on. Clobbering
-	@ r8-r12 is OK.
-	mov	r9, r12
-	ldmia	r9!, {r8, r10-r12}
-
-	@ calculate the original stack pointer value.
-	@ r9 currently points to the memory location just above the auto saved
-	@ xPSR.
-	@ The cpu might automatically 8-byte align the stack. Bit 9
-	@ of the saved xPSR specifies if stack aligning took place. In this case
-	@ another 32-bit value is included in the stack.
-
-	tst	r12, V7M_xPSR_FRAMEPTRALIGN
-	addne	r9, r9, #4
-
-	@ store saved r12 using str to have a register to hold the base for stm
-	str	r8, [sp, #S_IP]
-	add	r8, sp, #S_SP
-	@ store r13-r15, xPSR
-	stmia	r8!, {r9-r12}
-	@ store old_r0
-	str	r0, [r8]
-	.endm
-
-        /*
-	 * PENDSV and SVCALL are configured to have the same exception
-	 * priorities. As a kernel thread runs at SVCALL execution priority it
-	 * can never be preempted and so we will never have to return to a
-	 * kernel thread here.
-         */
-	.macro	v7m_exception_slow_exit ret_r0
-	cpsid	i
-	ldr	lr, =exc_ret
-	ldr	lr, [lr]
-
-	@ read original r12, sp, lr, pc and xPSR
-	add	r12, sp, #S_IP
-	ldmia	r12, {r1-r5}
-
-	@ an exception frame is always 8-byte aligned. To tell the hardware if
-	@ the sp to be restored is aligned or not set bit 9 of the saved xPSR
-	@ accordingly.
-	tst	r2, #4
-	subne	r2, r2, #4
-	orrne	r5, V7M_xPSR_FRAMEPTRALIGN
-	biceq	r5, V7M_xPSR_FRAMEPTRALIGN
-
-	@ ensure bit 0 is cleared in the PC, otherwise behaviour is
-	@ unpredictable
-	bic	r4, #1
-
-	@ write basic exception frame
-	stmdb	r2!, {r1, r3-r5}
-	ldmia	sp, {r1, r3-r5}
-	.if	\ret_r0
-	stmdb	r2!, {r0, r3-r5}
-	.else
-	stmdb	r2!, {r1, r3-r5}
-	.endif
-
-	@ restore process sp
-	msr	psp, r2
-
-	@ restore original r4-r11
-	ldmia	sp!, {r0-r11}
-
-	@ restore main sp
-	add	sp, sp, #PT_REGS_SIZE-S_IP
-
-	cpsie	i
-	bx	lr
-	.endm
-#endif	/* CONFIG_CPU_V7M */
-
-	@
-	@ Store/load the USER SP and LR registers by switching to the SYS
-	@ mode. Useful in Thumb-2 mode where "stm/ldm rd, {sp, lr}^" is not
-	@ available. Should only be called from SVC mode
-	@
-	.macro	store_user_sp_lr, rd, rtemp, offset = 0
-	mrs	\rtemp, cpsr
-	eor	\rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE)
-	msr	cpsr_c, \rtemp			@ switch to the SYS mode
-
-	str	sp, [\rd, #\offset]		@ save sp_usr
-	str	lr, [\rd, #\offset + 4]		@ save lr_usr
-
-	eor	\rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE)
-	msr	cpsr_c, \rtemp			@ switch back to the SVC mode
-	.endm
-
-	.macro	load_user_sp_lr, rd, rtemp, offset = 0
-	mrs	\rtemp, cpsr
-	eor	\rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE)
-	msr	cpsr_c, \rtemp			@ switch to the SYS mode
-
-	ldr	sp, [\rd, #\offset]		@ load sp_usr
-	ldr	lr, [\rd, #\offset + 4]		@ load lr_usr
-
-	eor	\rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE)
-	msr	cpsr_c, \rtemp			@ switch back to the SVC mode
-	.endm
-
-
-	.macro	svc_exit, rpsr, irq = 0
-	.if	\irq != 0
-	@ IRQs already off
-#ifdef CONFIG_TRACE_IRQFLAGS
-	@ The parent context IRQs must have been enabled to get here in
-	@ the first place, so there's no point checking the PSR I bit.
-	bl	trace_hardirqs_on
-#endif
-	.else
-	@ IRQs off again before pulling preserved data off the stack
-	disable_irq_notrace
-#ifdef CONFIG_TRACE_IRQFLAGS
-	tst	\rpsr, #PSR_I_BIT
-	bleq	trace_hardirqs_on
-	tst	\rpsr, #PSR_I_BIT
-	blne	trace_hardirqs_off
-#endif
-	.endif
-	uaccess_exit tsk, r0, r1
-
-#ifndef CONFIG_THUMB2_KERNEL
-	@ ARM mode SVC restore
-	msr	spsr_cxsf, \rpsr
-#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
-	@ We must avoid clrex due to Cortex-A15 erratum #830321
-	sub	r0, sp, #4			@ uninhabited address
-	strex	r1, r2, [r0]			@ clear the exclusive monitor
-#endif
-	ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
-#else
-	@ Thumb mode SVC restore
-	ldr	lr, [sp, #S_SP]			@ top of the stack
-	ldrd	r0, r1, [sp, #S_LR]		@ calling lr and pc
-
-	@ We must avoid clrex due to Cortex-A15 erratum #830321
-	strex	r2, r1, [sp, #S_LR]		@ clear the exclusive monitor
-
-	stmdb	lr!, {r0, r1, \rpsr}		@ calling lr and rfe context
-	ldmia	sp, {r0 - r12}
-	mov	sp, lr
-	ldr	lr, [sp], #4
-	rfeia	sp!
-#endif
-	.endm
-
-	@
-	@ svc_exit_via_fiq - like svc_exit but switches to FIQ mode before exit
-	@
-	@ This macro acts in a similar manner to svc_exit but switches to FIQ
-	@ mode to restore the final part of the register state.
-	@
-	@ We cannot use the normal svc_exit procedure because that would
-	@ clobber spsr_svc (FIQ could be delivered during the first few
-	@ instructions of vector_swi meaning its contents have not been
-	@ saved anywhere).
-	@
-	@ Note that, unlike svc_exit, this macro also does not allow a caller
-	@ supplied rpsr. This is because the FIQ exceptions are not re-entrant
-	@ and the handlers cannot call into the scheduler (meaning the value
-	@ on the stack remains correct).
-	@
-	.macro  svc_exit_via_fiq
-	uaccess_exit tsk, r0, r1
-#ifndef CONFIG_THUMB2_KERNEL
-	@ ARM mode restore
-	mov	r0, sp
-	ldmib	r0, {r1 - r14}	@ abort is deadly from here onward (it will
-				@ clobber state restored below)
-	msr	cpsr_c, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT
-	add	r8, r0, #S_PC
-	ldr	r9, [r0, #S_PSR]
-	msr	spsr_cxsf, r9
-	ldr	r0, [r0, #S_R0]
-	ldmia	r8, {pc}^
-#else
-	@ Thumb mode restore
-	add	r0, sp, #S_R2
-	ldr	lr, [sp, #S_LR]
-	ldr	sp, [sp, #S_SP] @ abort is deadly from here onward (it will
-			        @ clobber state restored below)
-	ldmia	r0, {r2 - r12}
-	mov	r1, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT
-	msr	cpsr_c, r1
-	sub	r0, #S_R2
-	add	r8, r0, #S_PC
-	ldmia	r0, {r0 - r1}
-	rfeia	r8
-#endif
-	.endm
-
-
-	.macro	restore_user_regs, fast = 0, offset = 0
-	uaccess_enable r1, isb=0
-#ifndef CONFIG_THUMB2_KERNEL
-	@ ARM mode restore
-	mov	r2, sp
-	ldr	r1, [r2, #\offset + S_PSR]	@ get calling cpsr
-	ldr	lr, [r2, #\offset + S_PC]!	@ get pc
-	tst	r1, #PSR_I_BIT | 0x0f
-	bne	1f
-	msr	spsr_cxsf, r1			@ save in spsr_svc
-#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
-	@ We must avoid clrex due to Cortex-A15 erratum #830321
-	strex	r1, r2, [r2]			@ clear the exclusive monitor
-#endif
-	.if	\fast
-	ldmdb	r2, {r1 - lr}^			@ get calling r1 - lr
-	.else
-	ldmdb	r2, {r0 - lr}^			@ get calling r0 - lr
-	.endif
-	mov	r0, r0				@ ARMv5T and earlier require a nop
-						@ after ldm {}^
-	add	sp, sp, #\offset + PT_REGS_SIZE
-	movs	pc, lr				@ return & move spsr_svc into cpsr
-1:	bug	"Returning to usermode but unexpected PSR bits set?", \@
-#elif defined(CONFIG_CPU_V7M)
-	@ V7M restore.
-	@ Note that we don't need to do clrex here as clearing the local
-	@ monitor is part of the exception entry and exit sequence.
-	.if	\offset
-	add	sp, #\offset
-	.endif
-	v7m_exception_slow_exit ret_r0 = \fast
-#else
-	@ Thumb mode restore
-	mov	r2, sp
-	load_user_sp_lr r2, r3, \offset + S_SP	@ calling sp, lr
-	ldr	r1, [sp, #\offset + S_PSR]	@ get calling cpsr
-	ldr	lr, [sp, #\offset + S_PC]	@ get pc
-	add	sp, sp, #\offset + S_SP
-	tst	r1, #PSR_I_BIT | 0x0f
-	bne	1f
-	msr	spsr_cxsf, r1			@ save in spsr_svc
-
-	@ We must avoid clrex due to Cortex-A15 erratum #830321
-	strex	r1, r2, [sp]			@ clear the exclusive monitor
-
-	.if	\fast
-	ldmdb	sp, {r1 - r12}			@ get calling r1 - r12
-	.else
-	ldmdb	sp, {r0 - r12}			@ get calling r0 - r12
-	.endif
-	add	sp, sp, #PT_REGS_SIZE - S_SP
-	movs	pc, lr				@ return & move spsr_svc into cpsr
-1:	bug	"Returning to usermode but unexpected PSR bits set?", \@
-#endif	/* !CONFIG_THUMB2_KERNEL */
-	.endm
-
-/*
- * Context tracking subsystem.  Used to instrument transitions
- * between user and kernel mode.
- */
-	.macro ct_user_exit, save = 1
-#ifdef CONFIG_CONTEXT_TRACKING
-	.if	\save
-	stmdb   sp!, {r0-r3, ip, lr}
-	bl	context_tracking_user_exit
-	ldmia	sp!, {r0-r3, ip, lr}
-	.else
-	bl	context_tracking_user_exit
-	.endif
-#endif
-	.endm
-
-	.macro ct_user_enter, save = 1
-#ifdef CONFIG_CONTEXT_TRACKING
-	.if	\save
-	stmdb   sp!, {r0-r3, ip, lr}
-	bl	context_tracking_user_enter
-	ldmia	sp!, {r0-r3, ip, lr}
-	.else
-	bl	context_tracking_user_enter
-	.endif
-#endif
-	.endm
-
-	.macro	invoke_syscall, table, nr, tmp, ret, reload=0
-#ifdef CONFIG_CPU_SPECTRE
-	mov	\tmp, \nr
-	cmp	\tmp, #NR_syscalls		@ check upper syscall limit
-	movcs	\tmp, #0
-	csdb
-	badr	lr, \ret			@ return address
-	.if	\reload
-	add	r1, sp, #S_R0 + S_OFF		@ pointer to regs
-	ldmiacc	r1, {r0 - r6}			@ reload r0-r6
-	stmiacc	sp, {r4, r5}			@ update stack arguments
-	.endif
-	ldrcc	pc, [\table, \tmp, lsl #2]	@ call sys_* routine
-#else
-	cmp	\nr, #NR_syscalls		@ check upper syscall limit
-	badr	lr, \ret			@ return address
-	.if	\reload
-	add	r1, sp, #S_R0 + S_OFF		@ pointer to regs
-	ldmiacc	r1, {r0 - r6}			@ reload r0-r6
-	stmiacc	sp, {r4, r5}			@ update stack arguments
-	.endif
-	ldrcc	pc, [\table, \nr, lsl #2]	@ call sys_* routine
-#endif
-	.endm
-
-/*
- * These are the registers used in the syscall handler, and allow us to
- * have in theory up to 7 arguments to a function - r0 to r6.
- *
- * r7 is reserved for the system call number for thumb mode.
- *
- * Note that tbl == why is intentional.
- *
- * We must set at least "tsk" and "why" when calling ret_with_reschedule.
- */
-scno	.req	r7		@ syscall number
-tbl	.req	r8		@ syscall table pointer
-why	.req	r8		@ Linux syscall (!= 0)
-tsk	.req	r9		@ current thread_info
diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S
deleted file mode 100644
index de1f20624be152f232cb32bdda391249500a1199..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/entry-v7m.S
+++ /dev/null
@@ -1,149 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/arm/kernel/entry-v7m.S
- *
- * Copyright (C) 2008 ARM Ltd.
- *
- * Low-level vector interface routines for the ARMv7-M architecture
- */
-#include <asm/memory.h>
-#include <asm/glue.h>
-#include <asm/thread_notify.h>
-#include <asm/v7m.h>
-
-#include "entry-header.S"
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-#error "CONFIG_TRACE_IRQFLAGS not supported on the current ARMv7M implementation"
-#endif
-
-__invalid_entry:
-	v7m_exception_entry
-#ifdef CONFIG_PRINTK
-	adr	r0, strerr
-	mrs	r1, ipsr
-	mov	r2, lr
-	bl	printk
-#endif
-	mov	r0, sp
-	bl	show_regs
-1:	b	1b
-ENDPROC(__invalid_entry)
-
-strerr:	.asciz	"\nUnhandled exception: IPSR = %08lx LR = %08lx\n"
-
-	.align	2
-__irq_entry:
-	v7m_exception_entry
-
-	@
-	@ Invoke the IRQ handler
-	@
-	mrs	r0, ipsr
-	ldr	r1, =V7M_xPSR_EXCEPTIONNO
-	and	r0, r1
-	sub	r0, #16
-	mov	r1, sp
-	stmdb	sp!, {lr}
-	@ routine called with r0 = irq number, r1 = struct pt_regs *
-	bl	nvic_handle_irq
-
-	pop	{lr}
-	@
-	@ Check for any pending work if returning to user
-	@
-	ldr	r1, =BASEADDR_V7M_SCB
-	ldr	r0, [r1, V7M_SCB_ICSR]
-	tst	r0, V7M_SCB_ICSR_RETTOBASE
-	beq	2f
-
-	get_thread_info tsk
-	ldr	r2, [tsk, #TI_FLAGS]
-	tst	r2, #_TIF_WORK_MASK
-	beq	2f			@ no work pending
-	mov	r0, #V7M_SCB_ICSR_PENDSVSET
-	str	r0, [r1, V7M_SCB_ICSR]	@ raise PendSV
-
-2:
-	@ registers r0-r3 and r12 are automatically restored on exception
-	@ return. r4-r7 were not clobbered in v7m_exception_entry so for
-	@ correctness they don't need to be restored. So only r8-r11 must be
-	@ restored here. The easiest way to do so is to restore r0-r7, too.
-	ldmia	sp!, {r0-r11}
-	add	sp, #PT_REGS_SIZE-S_IP
-	cpsie	i
-	bx	lr
-ENDPROC(__irq_entry)
-
-__pendsv_entry:
-	v7m_exception_entry
-
-	ldr	r1, =BASEADDR_V7M_SCB
-	mov	r0, #V7M_SCB_ICSR_PENDSVCLR
-	str	r0, [r1, V7M_SCB_ICSR]	@ clear PendSV
-
-	@ execute the pending work, including reschedule
-	get_thread_info tsk
-	mov	why, #0
-	b	ret_to_user_from_irq
-ENDPROC(__pendsv_entry)
-
-/*
- * Register switch for ARMv7-M processors.
- * r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info
- * previous and next are guaranteed not to be the same.
- */
-ENTRY(__switch_to)
-	.fnstart
-	.cantunwind
-	add	ip, r1, #TI_CPU_SAVE
-	stmia	ip!, {r4 - r11}		@ Store most regs on stack
-	str	sp, [ip], #4
-	str	lr, [ip], #4
-	mov	r5, r0
-	add	r4, r2, #TI_CPU_SAVE
-	ldr	r0, =thread_notify_head
-	mov	r1, #THREAD_NOTIFY_SWITCH
-	bl	atomic_notifier_call_chain
-	mov	ip, r4
-	mov	r0, r5
-	ldmia	ip!, {r4 - r11}		@ Load all regs saved previously
-	ldr	sp, [ip]
-	ldr	pc, [ip, #4]!
-	.fnend
-ENDPROC(__switch_to)
-
-	.data
-#if CONFIG_CPU_V7M_NUM_IRQ <= 112
-	.align	9
-#else
-	.align	10
-#endif
-
-/*
- * Vector table (Natural alignment need to be ensured)
- */
-ENTRY(vector_table)
-	.long	0			@ 0 - Reset stack pointer
-	.long	__invalid_entry		@ 1 - Reset
-	.long	__invalid_entry		@ 2 - NMI
-	.long	__invalid_entry		@ 3 - HardFault
-	.long	__invalid_entry		@ 4 - MemManage
-	.long	__invalid_entry		@ 5 - BusFault
-	.long	__invalid_entry		@ 6 - UsageFault
-	.long	__invalid_entry		@ 7 - Reserved
-	.long	__invalid_entry		@ 8 - Reserved
-	.long	__invalid_entry		@ 9 - Reserved
-	.long	__invalid_entry		@ 10 - Reserved
-	.long	vector_swi		@ 11 - SVCall
-	.long	__invalid_entry		@ 12 - Debug Monitor
-	.long	__invalid_entry		@ 13 - Reserved
-	.long	__pendsv_entry		@ 14 - PendSV
-	.long	__invalid_entry		@ 15 - SysTick
-	.rept	CONFIG_CPU_V7M_NUM_IRQ
-	.long	__irq_entry		@ External Interrupts
-	.endr
-	.align	2
-	.globl	exc_ret
-exc_ret:
-	.space	4
diff --git a/arch/arm/kernel/fiqasm.S b/arch/arm/kernel/fiqasm.S
deleted file mode 100644
index 8dd26e1a9bd69051a1548d99c157cad2e65ba81b..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/fiqasm.S
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- *  linux/arch/arm/kernel/fiqasm.S
- *
- *  Derived from code originally in linux/arch/arm/kernel/fiq.c:
- *
- *  Copyright (C) 1998 Russell King
- *  Copyright (C) 1998, 1999 Phil Blundell
- *  Copyright (C) 2011, Linaro Limited
- *
- *  FIQ support written by Philip Blundell <philb@gnu.org>, 1998.
- *
- *  FIQ support re-written by Russell King to be more generic
- *
- *  v7/Thumb-2 compatibility modifications by Linaro Limited, 2011.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * Taking an interrupt in FIQ mode is death, so both these functions
- * disable irqs for the duration.
- */
-
-ENTRY(__set_fiq_regs)
-	mov	r2, #PSR_I_BIT | PSR_F_BIT | FIQ_MODE
-	mrs	r1, cpsr
-	msr	cpsr_c, r2	@ select FIQ mode
-	mov	r0, r0		@ avoid hazard prior to ARMv4
-	ldmia	r0!, {r8 - r12}
-	ldr	sp, [r0], #4
-	ldr	lr, [r0]
-	msr	cpsr_c, r1	@ return to SVC mode
-	mov	r0, r0		@ avoid hazard prior to ARMv4
-	ret	lr
-ENDPROC(__set_fiq_regs)
-
-ENTRY(__get_fiq_regs)
-	mov	r2, #PSR_I_BIT | PSR_F_BIT | FIQ_MODE
-	mrs	r1, cpsr
-	msr	cpsr_c, r2	@ select FIQ mode
-	mov	r0, r0		@ avoid hazard prior to ARMv4
-	stmia	r0!, {r8 - r12}
-	str	sp, [r0], #4
-	str	lr, [r0]
-	msr	cpsr_c, r1	@ return to SVC mode
-	mov	r0, r0		@ avoid hazard prior to ARMv4
-	ret	lr
-ENDPROC(__get_fiq_regs)
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
deleted file mode 100644
index 4a3982812a401f1259909df4e1d05ead3f29dd9c..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/head-common.S
+++ /dev/null
@@ -1,243 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/kernel/head-common.S
- *
- *  Copyright (C) 1994-2002 Russell King
- *  Copyright (c) 2003 ARM Limited
- *  All Rights Reserved
- */
-#include <asm/assembler.h>
-
-#define ATAG_CORE 0x54410001
-#define ATAG_CORE_SIZE ((2*4 + 3*4) >> 2)
-#define ATAG_CORE_SIZE_EMPTY ((2*4) >> 2)
-
-#ifdef CONFIG_CPU_BIG_ENDIAN
-#define OF_DT_MAGIC 0xd00dfeed
-#else
-#define OF_DT_MAGIC 0xedfe0dd0 /* 0xd00dfeed in big-endian */
-#endif
-
-/*
- * Exception handling.  Something went wrong and we can't proceed.  We
- * ought to tell the user, but since we don't have any guarantee that
- * we're even running on the right architecture, we do virtually nothing.
- *
- * If CONFIG_DEBUG_LL is set we try to print out something about the error
- * and hope for the best (useful if bootloader fails to pass a proper
- * machine ID for example).
- */
-	__HEAD
-
-/* Determine validity of the r2 atags pointer.  The heuristic requires
- * that the pointer be aligned, in the first 16k of physical RAM and
- * that the ATAG_CORE marker is first and present.  If CONFIG_OF_FLATTREE
- * is selected, then it will also accept a dtb pointer.  Future revisions
- * of this function may be more lenient with the physical address and
- * may also be able to move the ATAGS block if necessary.
- *
- * Returns:
- *  r2 either valid atags pointer, valid dtb pointer, or zero
- *  r5, r6 corrupted
- */
-__vet_atags:
-	tst	r2, #0x3			@ aligned?
-	bne	1f
-
-	ldr	r5, [r2, #0]
-#ifdef CONFIG_OF_FLATTREE
-	ldr	r6, =OF_DT_MAGIC		@ is it a DTB?
-	cmp	r5, r6
-	beq	2f
-#endif
-	cmp	r5, #ATAG_CORE_SIZE		@ is first tag ATAG_CORE?
-	cmpne	r5, #ATAG_CORE_SIZE_EMPTY
-	bne	1f
-	ldr	r5, [r2, #4]
-	ldr	r6, =ATAG_CORE
-	cmp	r5, r6
-	bne	1f
-
-2:	ret	lr				@ atag/dtb pointer is ok
-
-1:	mov	r2, #0
-	ret	lr
-ENDPROC(__vet_atags)
-
-/*
- * The following fragment of code is executed with the MMU on in MMU mode,
- * and uses absolute addresses; this is not position independent.
- *
- *  r0  = cp#15 control register (exc_ret for M-class)
- *  r1  = machine ID
- *  r2  = atags/dtb pointer
- *  r9  = processor ID
- */
-	__INIT
-__mmap_switched:
-
-	mov	r7, r1
-	mov	r8, r2
-	mov	r10, r0
-
-	adr	r4, __mmap_switched_data
-	mov	fp, #0
-
-#if defined(CONFIG_XIP_DEFLATED_DATA)
-   ARM(	ldr	sp, [r4], #4 )
- THUMB(	ldr	sp, [r4] )
- THUMB(	add	r4, #4 )
-	bl	__inflate_kernel_data		@ decompress .data to RAM
-	teq	r0, #0
-	bne	__error
-#elif defined(CONFIG_XIP_KERNEL)
-   ARM(	ldmia	r4!, {r0, r1, r2, sp} )
- THUMB(	ldmia	r4!, {r0, r1, r2, r3} )
- THUMB(	mov	sp, r3 )
-	sub	r2, r2, r1
-	bl	memcpy				@ copy .data to RAM
-#endif
-
-   ARM(	ldmia	r4!, {r0, r1, sp} )
- THUMB(	ldmia	r4!, {r0, r1, r3} )
- THUMB(	mov	sp, r3 )
-	sub	r2, r1, r0
-	mov	r1, #0
-	bl	memset				@ clear .bss
-
-	ldmia	r4, {r0, r1, r2, r3}
-	str	r9, [r0]			@ Save processor ID
-	str	r7, [r1]			@ Save machine type
-	str	r8, [r2]			@ Save atags pointer
-	cmp	r3, #0
-	strne	r10, [r3]			@ Save control register values
-	mov	lr, #0
-	b	start_kernel
-ENDPROC(__mmap_switched)
-
-	.align	2
-	.type	__mmap_switched_data, %object
-__mmap_switched_data:
-#ifdef CONFIG_XIP_KERNEL
-#ifndef CONFIG_XIP_DEFLATED_DATA
-	.long	_sdata				@ r0
-	.long	__data_loc			@ r1
-	.long	_edata_loc			@ r2
-#endif
-	.long	__bss_stop			@ sp (temporary stack in .bss)
-#endif
-
-	.long	__bss_start			@ r0
-	.long	__bss_stop			@ r1
-	.long	init_thread_union + THREAD_START_SP @ sp
-
-	.long	processor_id			@ r0
-	.long	__machine_arch_type		@ r1
-	.long	__atags_pointer			@ r2
-#ifdef CONFIG_CPU_CP15
-	.long	cr_alignment			@ r3
-#else
-M_CLASS(.long	exc_ret)			@ r3
-AR_CLASS(.long	0)				@ r3
-#endif
-	.size	__mmap_switched_data, . - __mmap_switched_data
-
-	__FINIT
-	.text
-
-/*
- * This provides a C-API version of __lookup_processor_type
- */
-ENTRY(lookup_processor_type)
-	stmfd	sp!, {r4 - r6, r9, lr}
-	mov	r9, r0
-	bl	__lookup_processor_type
-	mov	r0, r5
-	ldmfd	sp!, {r4 - r6, r9, pc}
-ENDPROC(lookup_processor_type)
-
-/*
- * Read processor ID register (CP#15, CR0), and look up in the linker-built
- * supported processor list.  Note that we can't use the absolute addresses
- * for the __proc_info lists since we aren't running with the MMU on
- * (and therefore, we are not in the correct address space).  We have to
- * calculate the offset.
- *
- *	r9 = cpuid
- * Returns:
- *	r3, r4, r6 corrupted
- *	r5 = proc_info pointer in physical address space
- *	r9 = cpuid (preserved)
- */
-__lookup_processor_type:
-	adr	r3, __lookup_processor_type_data
-	ldmia	r3, {r4 - r6}
-	sub	r3, r3, r4			@ get offset between virt&phys
-	add	r5, r5, r3			@ convert virt addresses to
-	add	r6, r6, r3			@ physical address space
-1:	ldmia	r5, {r3, r4}			@ value, mask
-	and	r4, r4, r9			@ mask wanted bits
-	teq	r3, r4
-	beq	2f
-	add	r5, r5, #PROC_INFO_SZ		@ sizeof(proc_info_list)
-	cmp	r5, r6
-	blo	1b
-	mov	r5, #0				@ unknown processor
-2:	ret	lr
-ENDPROC(__lookup_processor_type)
-
-/*
- * Look in <asm/procinfo.h> for information about the __proc_info structure.
- */
-	.align	2
-	.type	__lookup_processor_type_data, %object
-__lookup_processor_type_data:
-	.long	.
-	.long	__proc_info_begin
-	.long	__proc_info_end
-	.size	__lookup_processor_type_data, . - __lookup_processor_type_data
-
-__error_lpae:
-#ifdef CONFIG_DEBUG_LL
-	adr	r0, str_lpae
-	bl 	printascii
-	b	__error
-str_lpae: .asciz "\nError: Kernel with LPAE support, but CPU does not support LPAE.\n"
-#else
-	b	__error
-#endif
-	.align
-ENDPROC(__error_lpae)
-
-__error_p:
-#ifdef CONFIG_DEBUG_LL
-	adr	r0, str_p1
-	bl	printascii
-	mov	r0, r9
-	bl	printhex8
-	adr	r0, str_p2
-	bl	printascii
-	b	__error
-str_p1:	.asciz	"\nError: unrecognized/unsupported processor variant (0x"
-str_p2:	.asciz	").\n"
-	.align
-#endif
-ENDPROC(__error_p)
-
-__error:
-#ifdef CONFIG_ARCH_RPC
-/*
- * Turn the screen red on a error - RiscPC only.
- */
-	mov	r0, #0x02000000
-	mov	r3, #0x11
-	orr	r3, r3, r3, lsl #8
-	orr	r3, r3, r3, lsl #16
-	str	r3, [r0], #4
-	str	r3, [r0], #4
-	str	r3, [r0], #4
-	str	r3, [r0], #4
-#endif
-1:	mov	r0, r0
-	b	1b
-ENDPROC(__error)
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
deleted file mode 100644
index 0fc814bbc34b171e43e55a57b081220b15fe70a2..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/head-nommu.S
+++ /dev/null
@@ -1,535 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/kernel/head-nommu.S
- *
- *  Copyright (C) 1994-2002 Russell King
- *  Copyright (C) 2003-2006 Hyok S. Choi
- *
- *  Common kernel startup code (non-paged MM)
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-
-#include <asm/assembler.h>
-#include <asm/ptrace.h>
-#include <asm/asm-offsets.h>
-#include <asm/memory.h>
-#include <asm/cp15.h>
-#include <asm/thread_info.h>
-#include <asm/v7m.h>
-#include <asm/mpu.h>
-#include <asm/page.h>
-
-/*
- * Kernel startup entry point.
- * ---------------------------
- *
- * This is normally called from the decompressor code.  The requirements
- * are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0,
- * r1 = machine nr.
- *
- * See linux/arch/arm/tools/mach-types for the complete list of machine
- * numbers for r1.
- *
- */
-
-	__HEAD
-
-#ifdef CONFIG_CPU_THUMBONLY
-	.thumb
-ENTRY(stext)
-#else
-	.arm
-ENTRY(stext)
-
- THUMB(	badr	r9, 1f		)	@ Kernel is always entered in ARM.
- THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
- THUMB(	.thumb			)	@ switch to Thumb now.
- THUMB(1:			)
-#endif
-
-#ifdef CONFIG_ARM_VIRT_EXT
-	bl	__hyp_stub_install
-#endif
-	@ ensure svc mode and all interrupts masked
-	safe_svcmode_maskall r9
-						@ and irqs disabled
-#if defined(CONFIG_CPU_CP15)
-	mrc	p15, 0, r9, c0, c0		@ get processor id
-#elif defined(CONFIG_CPU_V7M)
-	ldr	r9, =BASEADDR_V7M_SCB
-	ldr	r9, [r9, V7M_SCB_CPUID]
-#else
-	ldr	r9, =CONFIG_PROCESSOR_ID
-#endif
-	bl	__lookup_processor_type		@ r5=procinfo r9=cpuid
-	movs	r10, r5				@ invalid processor (r5=0)?
-	beq	__error_p				@ yes, error 'p'
-
-#ifdef CONFIG_ARM_MPU
-	bl	__setup_mpu
-#endif
-
-	badr	lr, 1f				@ return (PIC) address
-	ldr	r12, [r10, #PROCINFO_INITFUNC]
-	add	r12, r12, r10
-	ret	r12
-1:	ldr	lr, =__mmap_switched
-	b	__after_proc_init
-ENDPROC(stext)
-
-#ifdef CONFIG_SMP
-	.text
-ENTRY(secondary_startup)
-	/*
-	 * Common entry point for secondary CPUs.
-	 *
-	 * Ensure that we're in SVC mode, and IRQs are disabled.  Lookup
-	 * the processor type - there is no need to check the machine type
-	 * as it has already been validated by the primary processor.
-	 */
-#ifdef CONFIG_ARM_VIRT_EXT
-	bl	__hyp_stub_install_secondary
-#endif
-	safe_svcmode_maskall r9
-
-#ifndef CONFIG_CPU_CP15
-	ldr	r9, =CONFIG_PROCESSOR_ID
-#else
-	mrc	p15, 0, r9, c0, c0		@ get processor id
-#endif
-	bl	__lookup_processor_type		@ r5=procinfo r9=cpuid
-	movs	r10, r5				@ invalid processor?
-	beq	__error_p			@ yes, error 'p'
-
-	ldr	r7, __secondary_data
-
-#ifdef CONFIG_ARM_MPU
-	bl      __secondary_setup_mpu		@ Initialize the MPU
-#endif
-
-	badr	lr, 1f				@ return (PIC) address
-	ldr	r12, [r10, #PROCINFO_INITFUNC]
-	add	r12, r12, r10
-	ret	r12
-1:	bl	__after_proc_init
-	ldr	sp, [r7, #12]			@ set up the stack pointer
-	mov	fp, #0
-	b	secondary_start_kernel
-ENDPROC(secondary_startup)
-
-	.type	__secondary_data, %object
-__secondary_data:
-	.long	secondary_data
-#endif /* CONFIG_SMP */
-
-/*
- * Set the Control Register and Read the process ID.
- */
-	.text
-__after_proc_init:
-M_CLASS(movw	r12, #:lower16:BASEADDR_V7M_SCB)
-M_CLASS(movt	r12, #:upper16:BASEADDR_V7M_SCB)
-#ifdef CONFIG_ARM_MPU
-M_CLASS(ldr	r3, [r12, 0x50])
-AR_CLASS(mrc	p15, 0, r3, c0, c1, 4)          @ Read ID_MMFR0
-	and	r3, r3, #(MMFR0_PMSA)           @ PMSA field
-	teq	r3, #(MMFR0_PMSAv7)             @ PMSA v7
-	beq	1f
-	teq	r3, #(MMFR0_PMSAv8)		@ PMSA v8
-	/*
-	 * Memory region attributes for PMSAv8:
-	 *
-	 *   n = AttrIndx[2:0]
-	 *                      n       MAIR
-	 *   DEVICE_nGnRnE      000     00000000
-	 *   NORMAL             001     11111111
-	 */
-	ldreq	r3, =PMSAv8_MAIR(0x00, PMSAv8_RGN_DEVICE_nGnRnE) | \
-		     PMSAv8_MAIR(0xff, PMSAv8_RGN_NORMAL)
-AR_CLASS(mcreq	p15, 0, r3, c10, c2, 0)		@ MAIR 0
-M_CLASS(streq	r3, [r12, #PMSAv8_MAIR0])
-	moveq	r3, #0
-AR_CLASS(mcreq	p15, 0, r3, c10, c2, 1)		@ MAIR 1
-M_CLASS(streq	r3, [r12, #PMSAv8_MAIR1])
-
-1:
-#endif
-#ifdef CONFIG_CPU_CP15
-	/*
-	 * CP15 system control register value returned in r0 from
-	 * the CPU init function.
-	 */
-
-#ifdef CONFIG_ARM_MPU
-	biceq	r0, r0, #CR_BR			@ Disable the 'default mem-map'
-	orreq	r0, r0, #CR_M			@ Set SCTRL.M (MPU on)
-#endif
-#if defined(CONFIG_ALIGNMENT_TRAP) && __LINUX_ARM_ARCH__ < 6
-	orr	r0, r0, #CR_A
-#else
-	bic	r0, r0, #CR_A
-#endif
-#ifdef CONFIG_CPU_DCACHE_DISABLE
-	bic	r0, r0, #CR_C
-#endif
-#ifdef CONFIG_CPU_BPREDICT_DISABLE
-	bic	r0, r0, #CR_Z
-#endif
-#ifdef CONFIG_CPU_ICACHE_DISABLE
-	bic	r0, r0, #CR_I
-#endif
-	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
-	instr_sync
-#elif defined (CONFIG_CPU_V7M)
-#ifdef CONFIG_ARM_MPU
-	ldreq	r3, [r12, MPU_CTRL]
-	biceq	r3, #MPU_CTRL_PRIVDEFENA
-	orreq	r3, #MPU_CTRL_ENABLE
-	streq	r3, [r12, MPU_CTRL]
-	isb
-#endif
-	/* For V7M systems we want to modify the CCR similarly to the SCTLR */
-#ifdef CONFIG_CPU_DCACHE_DISABLE
-	bic	r0, r0, #V7M_SCB_CCR_DC
-#endif
-#ifdef CONFIG_CPU_BPREDICT_DISABLE
-	bic	r0, r0, #V7M_SCB_CCR_BP
-#endif
-#ifdef CONFIG_CPU_ICACHE_DISABLE
-	bic	r0, r0, #V7M_SCB_CCR_IC
-#endif
-	str	r0, [r12, V7M_SCB_CCR]
-	/* Pass exc_ret to __mmap_switched */
-	mov	r0, r10
-#endif /* CONFIG_CPU_CP15 elif CONFIG_CPU_V7M */
-	ret	lr
-ENDPROC(__after_proc_init)
-	.ltorg
-
-#ifdef CONFIG_ARM_MPU
-
-
-#ifndef CONFIG_CPU_V7M
-/* Set which MPU region should be programmed */
-.macro set_region_nr tmp, rgnr, unused
-	mov	\tmp, \rgnr			@ Use static region numbers
-	mcr	p15, 0, \tmp, c6, c2, 0		@ Write RGNR
-.endm
-
-/* Setup a single MPU region, either D or I side (D-side for unified) */
-.macro setup_region bar, acr, sr, side = PMSAv7_DATA_SIDE, unused
-	mcr	p15, 0, \bar, c6, c1, (0 + \side)	@ I/DRBAR
-	mcr	p15, 0, \acr, c6, c1, (4 + \side)	@ I/DRACR
-	mcr	p15, 0, \sr, c6, c1, (2 + \side)		@ I/DRSR
-.endm
-#else
-.macro set_region_nr tmp, rgnr, base
-	mov	\tmp, \rgnr
-	str     \tmp, [\base, #PMSAv7_RNR]
-.endm
-
-.macro setup_region bar, acr, sr, unused, base
-	lsl     \acr, \acr, #16
-	orr     \acr, \acr, \sr
-	str     \bar, [\base, #PMSAv7_RBAR]
-	str     \acr, [\base, #PMSAv7_RASR]
-.endm
-
-#endif
-/*
- * Setup the MPU and initial MPU Regions. We create the following regions:
- * Region 0: Use this for probing the MPU details, so leave disabled.
- * Region 1: Background region - covers the whole of RAM as strongly ordered
- * Region 2: Normal, Shared, cacheable for RAM. From PHYS_OFFSET, size from r6
- * Region 3: Normal, shared, inaccessible from PL0 to protect the vectors page
- *
- * r6: Value to be written to DRSR (and IRSR if required) for PMSAv7_RAM_REGION
-*/
-	__HEAD
-
-ENTRY(__setup_mpu)
-
-	/* Probe for v7 PMSA compliance */
-M_CLASS(movw	r12, #:lower16:BASEADDR_V7M_SCB)
-M_CLASS(movt	r12, #:upper16:BASEADDR_V7M_SCB)
-
-AR_CLASS(mrc	p15, 0, r0, c0, c1, 4)		@ Read ID_MMFR0
-M_CLASS(ldr	r0, [r12, 0x50])
-	and	r0, r0, #(MMFR0_PMSA)		@ PMSA field
-	teq	r0, #(MMFR0_PMSAv7)		@ PMSA v7
-	beq	__setup_pmsa_v7
-	teq	r0, #(MMFR0_PMSAv8)		@ PMSA v8
-	beq	__setup_pmsa_v8
-
-	ret	lr
-ENDPROC(__setup_mpu)
-
-ENTRY(__setup_pmsa_v7)
-	/* Calculate the size of a region covering just the kernel */
-	ldr	r5, =PLAT_PHYS_OFFSET		@ Region start: PHYS_OFFSET
-	ldr     r6, =(_end)			@ Cover whole kernel
-	sub	r6, r6, r5			@ Minimum size of region to map
-	clz	r6, r6				@ Region size must be 2^N...
-	rsb	r6, r6, #31			@ ...so round up region size
-	lsl	r6, r6, #PMSAv7_RSR_SZ		@ Put size in right field
-	orr	r6, r6, #(1 << PMSAv7_RSR_EN)	@ Set region enabled bit
-
-	/* Determine whether the D/I-side memory map is unified. We set the
-	 * flags here and continue to use them for the rest of this function */
-AR_CLASS(mrc	p15, 0, r0, c0, c0, 4)		@ MPUIR
-M_CLASS(ldr    r0, [r12, #MPU_TYPE])
-	ands	r5, r0, #MPUIR_DREGION_SZMASK	@ 0 size d region => No MPU
-	bxeq	lr
-	tst	r0, #MPUIR_nU			@ MPUIR_nU = 0 for unified
-
-	/* Setup second region first to free up r6 */
-	set_region_nr r0, #PMSAv7_RAM_REGION, r12
-	isb
-	/* Full access from PL0, PL1, shared for CONFIG_SMP, cacheable */
-	ldr	r0, =PLAT_PHYS_OFFSET		@ RAM starts at PHYS_OFFSET
-	ldr	r5,=(PMSAv7_AP_PL1RW_PL0RW | PMSAv7_RGN_NORMAL)
-
-	setup_region r0, r5, r6, PMSAv7_DATA_SIDE, r12	@ PHYS_OFFSET, shared, enabled
-	beq	1f					@ Memory-map not unified
-	setup_region r0, r5, r6, PMSAv7_INSTR_SIDE, r12	@ PHYS_OFFSET, shared, enabled
-1:	isb
-
-	/* First/background region */
-	set_region_nr r0, #PMSAv7_BG_REGION, r12
-	isb
-	/* Execute Never,  strongly ordered, inaccessible to PL0, rw PL1  */
-	mov	r0, #0				@ BG region starts at 0x0
-	ldr	r5,=(PMSAv7_ACR_XN | PMSAv7_RGN_STRONGLY_ORDERED | PMSAv7_AP_PL1RW_PL0NA)
-	mov	r6, #PMSAv7_RSR_ALL_MEM		@ 4GB region, enabled
-
-	setup_region r0, r5, r6, PMSAv7_DATA_SIDE, r12	@ 0x0, BG region, enabled
-	beq	2f					@ Memory-map not unified
-	setup_region r0, r5, r6, PMSAv7_INSTR_SIDE r12	@ 0x0, BG region, enabled
-2:	isb
-
-#ifdef CONFIG_XIP_KERNEL
-	set_region_nr r0, #PMSAv7_ROM_REGION, r12
-	isb
-
-	ldr	r5,=(PMSAv7_AP_PL1RO_PL0NA | PMSAv7_RGN_NORMAL)
-
-	ldr	r0, =CONFIG_XIP_PHYS_ADDR		@ ROM start
-	ldr     r6, =(_exiprom)				@ ROM end
-	sub	r6, r6, r0				@ Minimum size of region to map
-	clz	r6, r6					@ Region size must be 2^N...
-	rsb	r6, r6, #31				@ ...so round up region size
-	lsl	r6, r6, #PMSAv7_RSR_SZ			@ Put size in right field
-	orr	r6, r6, #(1 << PMSAv7_RSR_EN)		@ Set region enabled bit
-
-	setup_region r0, r5, r6, PMSAv7_DATA_SIDE, r12	@ XIP_PHYS_ADDR, shared, enabled
-	beq	3f					@ Memory-map not unified
-	setup_region r0, r5, r6, PMSAv7_INSTR_SIDE, r12	@ XIP_PHYS_ADDR, shared, enabled
-3:	isb
-#endif
-	ret	lr
-ENDPROC(__setup_pmsa_v7)
-
-ENTRY(__setup_pmsa_v8)
-	mov	r0, #0
-AR_CLASS(mcr	p15, 0, r0, c6, c2, 1)		@ PRSEL
-M_CLASS(str	r0, [r12, #PMSAv8_RNR])
-	isb
-
-#ifdef CONFIG_XIP_KERNEL
-	ldr	r5, =CONFIG_XIP_PHYS_ADDR		@ ROM start
-	ldr     r6, =(_exiprom)				@ ROM end
-	sub	r6, r6, #1
-	bic	r6, r6, #(PMSAv8_MINALIGN - 1)
-
-	orr	r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED)
-	orr	r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN)
-
-AR_CLASS(mcr	p15, 0, r5, c6, c8, 0)			@ PRBAR0
-AR_CLASS(mcr	p15, 0, r6, c6, c8, 1)			@ PRLAR0
-M_CLASS(str	r5, [r12, #PMSAv8_RBAR_A(0)])
-M_CLASS(str	r6, [r12, #PMSAv8_RLAR_A(0)])
-#endif
-
-	ldr	r5, =KERNEL_START
-	ldr	r6, =KERNEL_END
-	sub	r6, r6, #1
-	bic	r6, r6, #(PMSAv8_MINALIGN - 1)
-
-	orr	r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED)
-	orr	r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN)
-
-AR_CLASS(mcr	p15, 0, r5, c6, c8, 4)			@ PRBAR1
-AR_CLASS(mcr	p15, 0, r6, c6, c8, 5)			@ PRLAR1
-M_CLASS(str	r5, [r12, #PMSAv8_RBAR_A(1)])
-M_CLASS(str	r6, [r12, #PMSAv8_RLAR_A(1)])
-
-	/* Setup Background: 0x0 - min(KERNEL_START, XIP_PHYS_ADDR) */
-#ifdef CONFIG_XIP_KERNEL
-	ldr	r6, =KERNEL_START
-	ldr	r5, =CONFIG_XIP_PHYS_ADDR
-	cmp	r6, r5
-	movcs	r6, r5
-#else
-	ldr	r6, =KERNEL_START
-#endif
-	cmp	r6, #0
-	beq	1f
-
-	mov	r5, #0
-	sub	r6, r6, #1
-	bic	r6, r6, #(PMSAv8_MINALIGN - 1)
-
-	orr	r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN)
-	orr	r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN)
-
-AR_CLASS(mcr	p15, 0, r5, c6, c9, 0)			@ PRBAR2
-AR_CLASS(mcr	p15, 0, r6, c6, c9, 1)			@ PRLAR2
-M_CLASS(str	r5, [r12, #PMSAv8_RBAR_A(2)])
-M_CLASS(str	r6, [r12, #PMSAv8_RLAR_A(2)])
-
-1:
-	/* Setup Background: max(KERNEL_END, _exiprom) - 0xffffffff */
-#ifdef CONFIG_XIP_KERNEL
-	ldr	r5, =KERNEL_END
-	ldr	r6, =(_exiprom)
-	cmp	r5, r6
-	movcc	r5, r6
-#else
-	ldr	r5, =KERNEL_END
-#endif
-	mov	r6, #0xffffffff
-	bic	r6, r6, #(PMSAv8_MINALIGN - 1)
-
-	orr	r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN)
-	orr	r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN)
-
-AR_CLASS(mcr	p15, 0, r5, c6, c9, 4)			@ PRBAR3
-AR_CLASS(mcr	p15, 0, r6, c6, c9, 5)			@ PRLAR3
-M_CLASS(str	r5, [r12, #PMSAv8_RBAR_A(3)])
-M_CLASS(str	r6, [r12, #PMSAv8_RLAR_A(3)])
-
-#ifdef CONFIG_XIP_KERNEL
-	/* Setup Background: min(_exiprom, KERNEL_END) - max(KERNEL_START, XIP_PHYS_ADDR) */
-	ldr	r5, =(_exiprom)
-	ldr	r6, =KERNEL_END
-	cmp	r5, r6
-	movcs	r5, r6
-
-	ldr	r6, =KERNEL_START
-	ldr	r0, =CONFIG_XIP_PHYS_ADDR
-	cmp	r6, r0
-	movcc	r6, r0
-
-	sub	r6, r6, #1
-	bic	r6, r6, #(PMSAv8_MINALIGN - 1)
-
-	orr	r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN)
-	orr	r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN)
-
-#ifdef CONFIG_CPU_V7M
-	/* There is no alias for n == 4 */
-	mov	r0, #4
-	str	r0, [r12, #PMSAv8_RNR]			@ PRSEL
-	isb
-
-	str	r5, [r12, #PMSAv8_RBAR_A(0)]
-	str	r6, [r12, #PMSAv8_RLAR_A(0)]
-#else
-	mcr	p15, 0, r5, c6, c10, 0			@ PRBAR4
-	mcr	p15, 0, r6, c6, c10, 1			@ PRLAR4
-#endif
-#endif
-	ret	lr
-ENDPROC(__setup_pmsa_v8)
-
-#ifdef CONFIG_SMP
-/*
- * r6: pointer at mpu_rgn_info
- */
-
-	.text
-ENTRY(__secondary_setup_mpu)
-	/* Use MPU region info supplied by __cpu_up */
-	ldr	r6, [r7]			@ get secondary_data.mpu_rgn_info
-
-	/* Probe for v7 PMSA compliance */
-	mrc	p15, 0, r0, c0, c1, 4		@ Read ID_MMFR0
-	and	r0, r0, #(MMFR0_PMSA)		@ PMSA field
-	teq	r0, #(MMFR0_PMSAv7)		@ PMSA v7
-	beq	__secondary_setup_pmsa_v7
-	teq	r0, #(MMFR0_PMSAv8)		@ PMSA v8
-	beq	__secondary_setup_pmsa_v8
-	b	__error_p
-ENDPROC(__secondary_setup_mpu)
-
-/*
- * r6: pointer at mpu_rgn_info
- */
-ENTRY(__secondary_setup_pmsa_v7)
-	/* Determine whether the D/I-side memory map is unified. We set the
-	 * flags here and continue to use them for the rest of this function */
-	mrc	p15, 0, r0, c0, c0, 4		@ MPUIR
-	ands	r5, r0, #MPUIR_DREGION_SZMASK	@ 0 size d region => No MPU
-	beq	__error_p
-
-	ldr	r4, [r6, #MPU_RNG_INFO_USED]
-	mov	r5, #MPU_RNG_SIZE
-	add	r3, r6, #MPU_RNG_INFO_RNGS
-	mla	r3, r4, r5, r3
-
-1:
-	tst	r0, #MPUIR_nU			@ MPUIR_nU = 0 for unified
-	sub	r3, r3, #MPU_RNG_SIZE
-	sub	r4, r4, #1
-
-	set_region_nr r0, r4
-	isb
-
-	ldr	r0, [r3, #MPU_RGN_DRBAR]
-	ldr	r6, [r3, #MPU_RGN_DRSR]
-	ldr	r5, [r3, #MPU_RGN_DRACR]
-
-	setup_region r0, r5, r6, PMSAv7_DATA_SIDE
-	beq	2f
-	setup_region r0, r5, r6, PMSAv7_INSTR_SIDE
-2:	isb
-
-	mrc	p15, 0, r0, c0, c0, 4		@ Reevaluate the MPUIR
-	cmp	r4, #0
-	bgt	1b
-
-	ret	lr
-ENDPROC(__secondary_setup_pmsa_v7)
-
-ENTRY(__secondary_setup_pmsa_v8)
-	ldr	r4, [r6, #MPU_RNG_INFO_USED]
-#ifndef CONFIG_XIP_KERNEL
-	add	r4, r4, #1
-#endif
-	mov	r5, #MPU_RNG_SIZE
-	add	r3, r6, #MPU_RNG_INFO_RNGS
-	mla	r3, r4, r5, r3
-
-1:
-	sub	r3, r3, #MPU_RNG_SIZE
-	sub	r4, r4, #1
-
-	mcr	p15, 0, r4, c6, c2, 1		@ PRSEL
-	isb
-
-	ldr	r5, [r3, #MPU_RGN_PRBAR]
-	ldr	r6, [r3, #MPU_RGN_PRLAR]
-
-	mcr	p15, 0, r5, c6, c3, 0		@ PRBAR
-	mcr	p15, 0, r6, c6, c3, 1           @ PRLAR
-
-	cmp	r4, #0
-	bgt	1b
-
-	ret	lr
-ENDPROC(__secondary_setup_pmsa_v8)
-#endif /* CONFIG_SMP */
-#endif /* CONFIG_ARM_MPU */
-#include "head-common.S"
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
deleted file mode 100644
index f1cdc1f369575c368a3ea012375c19b52cc47bb9..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/head.S
+++ /dev/null
@@ -1,727 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/kernel/head.S
- *
- *  Copyright (C) 1994-2002 Russell King
- *  Copyright (c) 2003 ARM Limited
- *  All Rights Reserved
- *
- *  Kernel startup code for all 32-bit CPUs
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-
-#include <asm/assembler.h>
-#include <asm/cp15.h>
-#include <asm/domain.h>
-#include <asm/ptrace.h>
-#include <asm/asm-offsets.h>
-#include <asm/memory.h>
-#include <asm/thread_info.h>
-#include <asm/pgtable.h>
-
-#if defined(CONFIG_DEBUG_LL) && !defined(CONFIG_DEBUG_SEMIHOSTING)
-#include CONFIG_DEBUG_LL_INCLUDE
-#endif
-
-/*
- * swapper_pg_dir is the virtual address of the initial page table.
- * We place the page tables 16K below KERNEL_RAM_VADDR.  Therefore, we must
- * make sure that KERNEL_RAM_VADDR is correctly set.  Currently, we expect
- * the least significant 16 bits to be 0x8000, but we could probably
- * relax this restriction to KERNEL_RAM_VADDR >= PAGE_OFFSET + 0x4000.
- */
-#define KERNEL_RAM_VADDR	(PAGE_OFFSET + TEXT_OFFSET)
-#if (KERNEL_RAM_VADDR & 0xffff) != 0x8000
-#error KERNEL_RAM_VADDR must start at 0xXXXX8000
-#endif
-
-#ifdef CONFIG_ARM_LPAE
-	/* LPAE requires an additional page for the PGD */
-#define PG_DIR_SIZE	0x5000
-#define PMD_ORDER	3
-#else
-#define PG_DIR_SIZE	0x4000
-#define PMD_ORDER	2
-#endif
-
-	.globl	swapper_pg_dir
-	.equ	swapper_pg_dir, KERNEL_RAM_VADDR - PG_DIR_SIZE
-
-	.macro	pgtbl, rd, phys
-	add	\rd, \phys, #TEXT_OFFSET
-	sub	\rd, \rd, #PG_DIR_SIZE
-	.endm
-
-/*
- * Kernel startup entry point.
- * ---------------------------
- *
- * This is normally called from the decompressor code.  The requirements
- * are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0,
- * r1 = machine nr, r2 = atags or dtb pointer.
- *
- * This code is mostly position independent, so if you link the kernel at
- * 0xc0008000, you call this at __pa(0xc0008000).
- *
- * See linux/arch/arm/tools/mach-types for the complete list of machine
- * numbers for r1.
- *
- * We're trying to keep crap to a minimum; DO NOT add any machine specific
- * crap here - that's what the boot loader (or in extreme, well justified
- * circumstances, zImage) is for.
- */
-	.arm
-
-	__HEAD
-ENTRY(stext)
- ARM_BE8(setend	be )			@ ensure we are in BE8 mode
-
- THUMB(	badr	r9, 1f		)	@ Kernel is always entered in ARM.
- THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
- THUMB(	.thumb			)	@ switch to Thumb now.
- THUMB(1:			)
-
-#ifdef CONFIG_ARM_VIRT_EXT
-	bl	__hyp_stub_install
-#endif
-	@ ensure svc mode and all interrupts masked
-	safe_svcmode_maskall r9
-
-	mrc	p15, 0, r9, c0, c0		@ get processor id
-	bl	__lookup_processor_type		@ r5=procinfo r9=cpuid
-	movs	r10, r5				@ invalid processor (r5=0)?
- THUMB( it	eq )		@ force fixup-able long branch encoding
-	beq	__error_p			@ yes, error 'p'
-
-#ifdef CONFIG_ARM_LPAE
-	mrc	p15, 0, r3, c0, c1, 4		@ read ID_MMFR0
-	and	r3, r3, #0xf			@ extract VMSA support
-	cmp	r3, #5				@ long-descriptor translation table format?
- THUMB( it	lo )				@ force fixup-able long branch encoding
-	blo	__error_lpae			@ only classic page table format
-#endif
-
-#ifndef CONFIG_XIP_KERNEL
-	adr	r3, 2f
-	ldmia	r3, {r4, r8}
-	sub	r4, r3, r4			@ (PHYS_OFFSET - PAGE_OFFSET)
-	add	r8, r8, r4			@ PHYS_OFFSET
-#else
-	ldr	r8, =PLAT_PHYS_OFFSET		@ always constant in this case
-#endif
-
-	/*
-	 * r1 = machine no, r2 = atags or dtb,
-	 * r8 = phys_offset, r9 = cpuid, r10 = procinfo
-	 */
-	bl	__vet_atags
-#ifdef CONFIG_SMP_ON_UP
-	bl	__fixup_smp
-#endif
-#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
-	bl	__fixup_pv_table
-#endif
-	bl	__create_page_tables
-
-	/*
-	 * The following calls CPU specific code in a position independent
-	 * manner.  See arch/arm/mm/proc-*.S for details.  r10 = base of
-	 * xxx_proc_info structure selected by __lookup_processor_type
-	 * above.
-	 *
-	 * The processor init function will be called with:
-	 *  r1 - machine type
-	 *  r2 - boot data (atags/dt) pointer
-	 *  r4 - translation table base (low word)
-	 *  r5 - translation table base (high word, if LPAE)
-	 *  r8 - translation table base 1 (pfn if LPAE)
-	 *  r9 - cpuid
-	 *  r13 - virtual address for __enable_mmu -> __turn_mmu_on
-	 *
-	 * On return, the CPU will be ready for the MMU to be turned on,
-	 * r0 will hold the CPU control register value, r1, r2, r4, and
-	 * r9 will be preserved.  r5 will also be preserved if LPAE.
-	 */
-	ldr	r13, =__mmap_switched		@ address to jump to after
-						@ mmu has been enabled
-	badr	lr, 1f				@ return (PIC) address
-#ifdef CONFIG_ARM_LPAE
-	mov	r5, #0				@ high TTBR0
-	mov	r8, r4, lsr #12			@ TTBR1 is swapper_pg_dir pfn
-#else
-	mov	r8, r4				@ set TTBR1 to swapper_pg_dir
-#endif
-	ldr	r12, [r10, #PROCINFO_INITFUNC]
-	add	r12, r12, r10
-	ret	r12
-1:	b	__enable_mmu
-ENDPROC(stext)
-	.ltorg
-#ifndef CONFIG_XIP_KERNEL
-2:	.long	.
-	.long	PAGE_OFFSET
-#endif
-
-/*
- * Setup the initial page tables.  We only setup the barest
- * amount which are required to get the kernel running, which
- * generally means mapping in the kernel code.
- *
- * r8 = phys_offset, r9 = cpuid, r10 = procinfo
- *
- * Returns:
- *  r0, r3, r5-r7 corrupted
- *  r4 = physical page table address
- */
-__create_page_tables:
-	pgtbl	r4, r8				@ page table address
-
-	/*
-	 * Clear the swapper page table
-	 */
-	mov	r0, r4
-	mov	r3, #0
-	add	r6, r0, #PG_DIR_SIZE
-1:	str	r3, [r0], #4
-	str	r3, [r0], #4
-	str	r3, [r0], #4
-	str	r3, [r0], #4
-	teq	r0, r6
-	bne	1b
-
-#ifdef CONFIG_ARM_LPAE
-	/*
-	 * Build the PGD table (first level) to point to the PMD table. A PGD
-	 * entry is 64-bit wide.
-	 */
-	mov	r0, r4
-	add	r3, r4, #0x1000			@ first PMD table address
-	orr	r3, r3, #3			@ PGD block type
-	mov	r6, #4				@ PTRS_PER_PGD
-	mov	r7, #1 << (55 - 32)		@ L_PGD_SWAPPER
-1:
-#ifdef CONFIG_CPU_ENDIAN_BE8
-	str	r7, [r0], #4			@ set top PGD entry bits
-	str	r3, [r0], #4			@ set bottom PGD entry bits
-#else
-	str	r3, [r0], #4			@ set bottom PGD entry bits
-	str	r7, [r0], #4			@ set top PGD entry bits
-#endif
-	add	r3, r3, #0x1000			@ next PMD table
-	subs	r6, r6, #1
-	bne	1b
-
-	add	r4, r4, #0x1000			@ point to the PMD tables
-#ifdef CONFIG_CPU_ENDIAN_BE8
-	add	r4, r4, #4			@ we only write the bottom word
-#endif
-#endif
-
-	ldr	r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags
-
-	/*
-	 * Create identity mapping to cater for __enable_mmu.
-	 * This identity mapping will be removed by paging_init().
-	 */
-	adr	r0, __turn_mmu_on_loc
-	ldmia	r0, {r3, r5, r6}
-	sub	r0, r0, r3			@ virt->phys offset
-	add	r5, r5, r0			@ phys __turn_mmu_on
-	add	r6, r6, r0			@ phys __turn_mmu_on_end
-	mov	r5, r5, lsr #SECTION_SHIFT
-	mov	r6, r6, lsr #SECTION_SHIFT
-
-1:	orr	r3, r7, r5, lsl #SECTION_SHIFT	@ flags + kernel base
-	str	r3, [r4, r5, lsl #PMD_ORDER]	@ identity mapping
-	cmp	r5, r6
-	addlo	r5, r5, #1			@ next section
-	blo	1b
-
-	/*
-	 * Map our RAM from the start to the end of the kernel .bss section.
-	 */
-	add	r0, r4, #PAGE_OFFSET >> (SECTION_SHIFT - PMD_ORDER)
-	ldr	r6, =(_end - 1)
-	orr	r3, r8, r7
-	add	r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
-1:	str	r3, [r0], #1 << PMD_ORDER
-	add	r3, r3, #1 << SECTION_SHIFT
-	cmp	r0, r6
-	bls	1b
-
-#ifdef CONFIG_XIP_KERNEL
-	/*
-	 * Map the kernel image separately as it is not located in RAM.
-	 */
-#define XIP_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)
-	mov	r3, pc
-	mov	r3, r3, lsr #SECTION_SHIFT
-	orr	r3, r7, r3, lsl #SECTION_SHIFT
-	add	r0, r4,  #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
-	str	r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]!
-	ldr	r6, =(_edata_loc - 1)
-	add	r0, r0, #1 << PMD_ORDER
-	add	r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
-1:	cmp	r0, r6
-	add	r3, r3, #1 << SECTION_SHIFT
-	strls	r3, [r0], #1 << PMD_ORDER
-	bls	1b
-#endif
-
-	/*
-	 * Then map boot params address in r2 if specified.
-	 * We map 2 sections in case the ATAGs/DTB crosses a section boundary.
-	 */
-	mov	r0, r2, lsr #SECTION_SHIFT
-	movs	r0, r0, lsl #SECTION_SHIFT
-	subne	r3, r0, r8
-	addne	r3, r3, #PAGE_OFFSET
-	addne	r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
-	orrne	r6, r7, r0
-	strne	r6, [r3], #1 << PMD_ORDER
-	addne	r6, r6, #1 << SECTION_SHIFT
-	strne	r6, [r3]
-
-#if defined(CONFIG_ARM_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8)
-	sub	r4, r4, #4			@ Fixup page table pointer
-						@ for 64-bit descriptors
-#endif
-
-#ifdef CONFIG_DEBUG_LL
-#if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING)
-	/*
-	 * Map in IO space for serial debugging.
-	 * This allows debug messages to be output
-	 * via a serial console before paging_init.
-	 */
-	addruart r7, r3, r0
-
-	mov	r3, r3, lsr #SECTION_SHIFT
-	mov	r3, r3, lsl #PMD_ORDER
-
-	add	r0, r4, r3
-	mov	r3, r7, lsr #SECTION_SHIFT
-	ldr	r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags
-	orr	r3, r7, r3, lsl #SECTION_SHIFT
-#ifdef CONFIG_ARM_LPAE
-	mov	r7, #1 << (54 - 32)		@ XN
-#ifdef CONFIG_CPU_ENDIAN_BE8
-	str	r7, [r0], #4
-	str	r3, [r0], #4
-#else
-	str	r3, [r0], #4
-	str	r7, [r0], #4
-#endif
-#else
-	orr	r3, r3, #PMD_SECT_XN
-	str	r3, [r0], #4
-#endif
-
-#else /* CONFIG_DEBUG_ICEDCC || CONFIG_DEBUG_SEMIHOSTING */
-	/* we don't need any serial debugging mappings */
-	ldr	r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags
-#endif
-
-#if defined(CONFIG_ARCH_NETWINDER) || defined(CONFIG_ARCH_CATS)
-	/*
-	 * If we're using the NetWinder or CATS, we also need to map
-	 * in the 16550-type serial port for the debug messages
-	 */
-	add	r0, r4, #0xff000000 >> (SECTION_SHIFT - PMD_ORDER)
-	orr	r3, r7, #0x7c000000
-	str	r3, [r0]
-#endif
-#ifdef CONFIG_ARCH_RPC
-	/*
-	 * Map in screen at 0x02000000 & SCREEN2_BASE
-	 * Similar reasons here - for debug.  This is
-	 * only for Acorn RiscPC architectures.
-	 */
-	add	r0, r4, #0x02000000 >> (SECTION_SHIFT - PMD_ORDER)
-	orr	r3, r7, #0x02000000
-	str	r3, [r0]
-	add	r0, r4, #0xd8000000 >> (SECTION_SHIFT - PMD_ORDER)
-	str	r3, [r0]
-#endif
-#endif
-#ifdef CONFIG_ARM_LPAE
-	sub	r4, r4, #0x1000		@ point to the PGD table
-#endif
-	ret	lr
-ENDPROC(__create_page_tables)
-	.ltorg
-	.align
-__turn_mmu_on_loc:
-	.long	.
-	.long	__turn_mmu_on
-	.long	__turn_mmu_on_end
-
-#if defined(CONFIG_SMP)
-	.text
-	.arm
-ENTRY(secondary_startup_arm)
- THUMB(	badr	r9, 1f		)	@ Kernel is entered in ARM.
- THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
- THUMB(	.thumb			)	@ switch to Thumb now.
- THUMB(1:			)
-ENTRY(secondary_startup)
-	/*
-	 * Common entry point for secondary CPUs.
-	 *
-	 * Ensure that we're in SVC mode, and IRQs are disabled.  Lookup
-	 * the processor type - there is no need to check the machine type
-	 * as it has already been validated by the primary processor.
-	 */
-
- ARM_BE8(setend	be)				@ ensure we are in BE8 mode
-
-#ifdef CONFIG_ARM_VIRT_EXT
-	bl	__hyp_stub_install_secondary
-#endif
-	safe_svcmode_maskall r9
-
-	mrc	p15, 0, r9, c0, c0		@ get processor id
-	bl	__lookup_processor_type
-	movs	r10, r5				@ invalid processor?
-	moveq	r0, #'p'			@ yes, error 'p'
- THUMB( it	eq )		@ force fixup-able long branch encoding
-	beq	__error_p
-
-	/*
-	 * Use the page tables supplied from  __cpu_up.
-	 */
-	adr	r4, __secondary_data
-	ldmia	r4, {r5, r7, r12}		@ address to jump to after
-	sub	lr, r4, r5			@ mmu has been enabled
-	add	r3, r7, lr
-	ldrd	r4, r5, [r3, #0]		@ get secondary_data.pgdir
-ARM_BE8(eor	r4, r4, r5)			@ Swap r5 and r4 in BE:
-ARM_BE8(eor	r5, r4, r5)			@ it can be done in 3 steps
-ARM_BE8(eor	r4, r4, r5)			@ without using a temp reg.
-	ldr	r8, [r3, #8]			@ get secondary_data.swapper_pg_dir
-	badr	lr, __enable_mmu		@ return address
-	mov	r13, r12			@ __secondary_switched address
-	ldr	r12, [r10, #PROCINFO_INITFUNC]
-	add	r12, r12, r10			@ initialise processor
-						@ (return control reg)
-	ret	r12
-ENDPROC(secondary_startup)
-ENDPROC(secondary_startup_arm)
-
-	/*
-	 * r6  = &secondary_data
-	 */
-ENTRY(__secondary_switched)
-	ldr	sp, [r7, #12]			@ get secondary_data.stack
-	mov	fp, #0
-	b	secondary_start_kernel
-ENDPROC(__secondary_switched)
-
-	.align
-
-	.type	__secondary_data, %object
-__secondary_data:
-	.long	.
-	.long	secondary_data
-	.long	__secondary_switched
-#endif /* defined(CONFIG_SMP) */
-
-
-
-/*
- * Setup common bits before finally enabling the MMU.  Essentially
- * this is just loading the page table pointer and domain access
- * registers.  All these registers need to be preserved by the
- * processor setup function (or set in the case of r0)
- *
- *  r0  = cp#15 control register
- *  r1  = machine ID
- *  r2  = atags or dtb pointer
- *  r4  = TTBR pointer (low word)
- *  r5  = TTBR pointer (high word if LPAE)
- *  r9  = processor ID
- *  r13 = *virtual* address to jump to upon completion
- */
-__enable_mmu:
-#if defined(CONFIG_ALIGNMENT_TRAP) && __LINUX_ARM_ARCH__ < 6
-	orr	r0, r0, #CR_A
-#else
-	bic	r0, r0, #CR_A
-#endif
-#ifdef CONFIG_CPU_DCACHE_DISABLE
-	bic	r0, r0, #CR_C
-#endif
-#ifdef CONFIG_CPU_BPREDICT_DISABLE
-	bic	r0, r0, #CR_Z
-#endif
-#ifdef CONFIG_CPU_ICACHE_DISABLE
-	bic	r0, r0, #CR_I
-#endif
-#ifdef CONFIG_ARM_LPAE
-	mcrr	p15, 0, r4, r5, c2		@ load TTBR0
-#else
-	mov	r5, #DACR_INIT
-	mcr	p15, 0, r5, c3, c0, 0		@ load domain access register
-	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
-#endif
-	b	__turn_mmu_on
-ENDPROC(__enable_mmu)
-
-/*
- * Enable the MMU.  This completely changes the structure of the visible
- * memory space.  You will not be able to trace execution through this.
- * If you have an enquiry about this, *please* check the linux-arm-kernel
- * mailing list archives BEFORE sending another post to the list.
- *
- *  r0  = cp#15 control register
- *  r1  = machine ID
- *  r2  = atags or dtb pointer
- *  r9  = processor ID
- *  r13 = *virtual* address to jump to upon completion
- *
- * other registers depend on the function called upon completion
- */
-	.align	5
-	.pushsection	.idmap.text, "ax"
-ENTRY(__turn_mmu_on)
-	mov	r0, r0
-	instr_sync
-	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
-	mrc	p15, 0, r3, c0, c0, 0		@ read id reg
-	instr_sync
-	mov	r3, r3
-	mov	r3, r13
-	ret	r3
-__turn_mmu_on_end:
-ENDPROC(__turn_mmu_on)
-	.popsection
-
-
-#ifdef CONFIG_SMP_ON_UP
-	__HEAD
-__fixup_smp:
-	and	r3, r9, #0x000f0000	@ architecture version
-	teq	r3, #0x000f0000		@ CPU ID supported?
-	bne	__fixup_smp_on_up	@ no, assume UP
-
-	bic	r3, r9, #0x00ff0000
-	bic	r3, r3, #0x0000000f	@ mask 0xff00fff0
-	mov	r4, #0x41000000
-	orr	r4, r4, #0x0000b000
-	orr	r4, r4, #0x00000020	@ val 0x4100b020
-	teq	r3, r4			@ ARM 11MPCore?
-	reteq	lr			@ yes, assume SMP
-
-	mrc	p15, 0, r0, c0, c0, 5	@ read MPIDR
-	and	r0, r0, #0xc0000000	@ multiprocessing extensions and
-	teq	r0, #0x80000000		@ not part of a uniprocessor system?
-	bne    __fixup_smp_on_up	@ no, assume UP
-
-	@ Core indicates it is SMP. Check for Aegis SOC where a single
-	@ Cortex-A9 CPU is present but SMP operations fault.
-	mov	r4, #0x41000000
-	orr	r4, r4, #0x0000c000
-	orr	r4, r4, #0x00000090
-	teq	r3, r4			@ Check for ARM Cortex-A9
-	retne	lr			@ Not ARM Cortex-A9,
-
-	@ If a future SoC *does* use 0x0 as the PERIPH_BASE, then the
-	@ below address check will need to be #ifdef'd or equivalent
-	@ for the Aegis platform.
-	mrc	p15, 4, r0, c15, c0	@ get SCU base address
-	teq	r0, #0x0		@ '0' on actual UP A9 hardware
-	beq	__fixup_smp_on_up	@ So its an A9 UP
-	ldr	r0, [r0, #4]		@ read SCU Config
-ARM_BE8(rev	r0, r0)			@ byteswap if big endian
-	and	r0, r0, #0x3		@ number of CPUs
-	teq	r0, #0x0		@ is 1?
-	retne	lr
-
-__fixup_smp_on_up:
-	adr	r0, 1f
-	ldmia	r0, {r3 - r5}
-	sub	r3, r0, r3
-	add	r4, r4, r3
-	add	r5, r5, r3
-	b	__do_fixup_smp_on_up
-ENDPROC(__fixup_smp)
-
-	.align
-1:	.word	.
-	.word	__smpalt_begin
-	.word	__smpalt_end
-
-	.pushsection .data
-	.align	2
-	.globl	smp_on_up
-smp_on_up:
-	ALT_SMP(.long	1)
-	ALT_UP(.long	0)
-	.popsection
-#endif
-
-	.text
-__do_fixup_smp_on_up:
-	cmp	r4, r5
-	reths	lr
-	ldmia	r4!, {r0, r6}
- ARM(	str	r6, [r0, r3]	)
- THUMB(	add	r0, r0, r3	)
-#ifdef __ARMEB__
- THUMB(	mov	r6, r6, ror #16	)	@ Convert word order for big-endian.
-#endif
- THUMB(	strh	r6, [r0], #2	)	@ For Thumb-2, store as two halfwords
- THUMB(	mov	r6, r6, lsr #16	)	@ to be robust against misaligned r3.
- THUMB(	strh	r6, [r0]	)
-	b	__do_fixup_smp_on_up
-ENDPROC(__do_fixup_smp_on_up)
-
-ENTRY(fixup_smp)
-	stmfd	sp!, {r4 - r6, lr}
-	mov	r4, r0
-	add	r5, r0, r1
-	mov	r3, #0
-	bl	__do_fixup_smp_on_up
-	ldmfd	sp!, {r4 - r6, pc}
-ENDPROC(fixup_smp)
-
-#ifdef __ARMEB__
-#define LOW_OFFSET	0x4
-#define HIGH_OFFSET	0x0
-#else
-#define LOW_OFFSET	0x0
-#define HIGH_OFFSET	0x4
-#endif
-
-#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
-
-/* __fixup_pv_table - patch the stub instructions with the delta between
- * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and
- * can be expressed by an immediate shifter operand. The stub instruction
- * has a form of '(add|sub) rd, rn, #imm'.
- */
-	__HEAD
-__fixup_pv_table:
-	adr	r0, 1f
-	ldmia	r0, {r3-r7}
-	mvn	ip, #0
-	subs	r3, r0, r3	@ PHYS_OFFSET - PAGE_OFFSET
-	add	r4, r4, r3	@ adjust table start address
-	add	r5, r5, r3	@ adjust table end address
-	add	r6, r6, r3	@ adjust __pv_phys_pfn_offset address
-	add	r7, r7, r3	@ adjust __pv_offset address
-	mov	r0, r8, lsr #PAGE_SHIFT	@ convert to PFN
-	str	r0, [r6]	@ save computed PHYS_OFFSET to __pv_phys_pfn_offset
-	strcc	ip, [r7, #HIGH_OFFSET]	@ save to __pv_offset high bits
-	mov	r6, r3, lsr #24	@ constant for add/sub instructions
-	teq	r3, r6, lsl #24 @ must be 16MiB aligned
-THUMB(	it	ne		@ cross section branch )
-	bne	__error
-	str	r3, [r7, #LOW_OFFSET]	@ save to __pv_offset low bits
-	b	__fixup_a_pv_table
-ENDPROC(__fixup_pv_table)
-
-	.align
-1:	.long	.
-	.long	__pv_table_begin
-	.long	__pv_table_end
-2:	.long	__pv_phys_pfn_offset
-	.long	__pv_offset
-
-	.text
-__fixup_a_pv_table:
-	adr	r0, 3f
-	ldr	r6, [r0]
-	add	r6, r6, r3
-	ldr	r0, [r6, #HIGH_OFFSET]	@ pv_offset high word
-	ldr	r6, [r6, #LOW_OFFSET]	@ pv_offset low word
-	mov	r6, r6, lsr #24
-	cmn	r0, #1
-#ifdef CONFIG_THUMB2_KERNEL
-	moveq	r0, #0x200000	@ set bit 21, mov to mvn instruction
-	lsls	r6, #24
-	beq	2f
-	clz	r7, r6
-	lsr	r6, #24
-	lsl	r6, r7
-	bic	r6, #0x0080
-	lsrs	r7, #1
-	orrcs	r6, #0x0080
-	orr	r6, r6, r7, lsl #12
-	orr	r6, #0x4000
-	b	2f
-1:	add     r7, r3
-	ldrh	ip, [r7, #2]
-ARM_BE8(rev16	ip, ip)
-	tst	ip, #0x4000
-	and	ip, #0x8f00
-	orrne	ip, r6	@ mask in offset bits 31-24
-	orreq	ip, r0	@ mask in offset bits 7-0
-ARM_BE8(rev16	ip, ip)
-	strh	ip, [r7, #2]
-	bne	2f
-	ldrh	ip, [r7]
-ARM_BE8(rev16	ip, ip)
-	bic	ip, #0x20
-	orr	ip, ip, r0, lsr #16
-ARM_BE8(rev16	ip, ip)
-	strh	ip, [r7]
-2:	cmp	r4, r5
-	ldrcc	r7, [r4], #4	@ use branch for delay slot
-	bcc	1b
-	bx	lr
-#else
-	moveq	r0, #0x400000	@ set bit 22, mov to mvn instruction
-	b	2f
-1:	ldr	ip, [r7, r3]
-#ifdef CONFIG_CPU_ENDIAN_BE8
-	@ in BE8, we load data in BE, but instructions still in LE
-	bic	ip, ip, #0xff000000
-	tst	ip, #0x000f0000	@ check the rotation field
-	orrne	ip, ip, r6, lsl #24 @ mask in offset bits 31-24
-	biceq	ip, ip, #0x00004000 @ clear bit 22
-	orreq	ip, ip, r0, ror #8  @ mask in offset bits 7-0
-#else
-	bic	ip, ip, #0x000000ff
-	tst	ip, #0xf00	@ check the rotation field
-	orrne	ip, ip, r6	@ mask in offset bits 31-24
-	biceq	ip, ip, #0x400000	@ clear bit 22
-	orreq	ip, ip, r0	@ mask in offset bits 7-0
-#endif
-	str	ip, [r7, r3]
-2:	cmp	r4, r5
-	ldrcc	r7, [r4], #4	@ use branch for delay slot
-	bcc	1b
-	ret	lr
-#endif
-ENDPROC(__fixup_a_pv_table)
-
-	.align
-3:	.long __pv_offset
-
-ENTRY(fixup_pv_table)
-	stmfd	sp!, {r4 - r7, lr}
-	mov	r3, #0			@ no offset
-	mov	r4, r0			@ r0 = table start
-	add	r5, r0, r1		@ r1 = table size
-	bl	__fixup_a_pv_table
-	ldmfd	sp!, {r4 - r7, pc}
-ENDPROC(fixup_pv_table)
-
-	.data
-	.align	2
-	.globl	__pv_phys_pfn_offset
-	.type	__pv_phys_pfn_offset, %object
-__pv_phys_pfn_offset:
-	.word	0
-	.size	__pv_phys_pfn_offset, . -__pv_phys_pfn_offset
-
-	.globl	__pv_offset
-	.type	__pv_offset, %object
-__pv_offset:
-	.quad	0
-	.size	__pv_offset, . -__pv_offset
-#endif
-
-#include "head-common.S"
diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
deleted file mode 100644
index 6607fa817bba9a5510ac0a0c2de88342a80dfdb9..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/hyp-stub.S
+++ /dev/null
@@ -1,271 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2012 Linaro Limited.
- */
-
-#include <linux/init.h>
-#include <linux/irqchip/arm-gic-v3.h>
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/virt.h>
-
-#ifndef ZIMAGE
-/*
- * For the kernel proper, we need to find out the CPU boot mode long after
- * boot, so we need to store it in a writable variable.
- *
- * This is not in .bss, because we set it sufficiently early that the boot-time
- * zeroing of .bss would clobber it.
- */
-.data
-	.align	2
-ENTRY(__boot_cpu_mode)
-	.long	0
-.text
-
-	/*
-	 * Save the primary CPU boot mode. Requires 3 scratch registers.
-	 */
-	.macro	store_primary_cpu_mode	reg1, reg2, reg3
-	mrs	\reg1, cpsr
-	and	\reg1, \reg1, #MODE_MASK
-	adr	\reg2, .L__boot_cpu_mode_offset
-	ldr	\reg3, [\reg2]
-	str	\reg1, [\reg2, \reg3]
-	.endm
-
-	/*
-	 * Compare the current mode with the one saved on the primary CPU.
-	 * If they don't match, record that fact. The Z bit indicates
-	 * if there's a match or not.
-	 * Requires 3 additionnal scratch registers.
-	 */
-	.macro	compare_cpu_mode_with_primary mode, reg1, reg2, reg3
-	adr	\reg2, .L__boot_cpu_mode_offset
-	ldr	\reg3, [\reg2]
-	ldr	\reg1, [\reg2, \reg3]
-	cmp	\mode, \reg1		@ matches primary CPU boot mode?
-	orrne	\reg1, \reg1, #BOOT_CPU_MODE_MISMATCH
-	strne	\reg1, [\reg2, \reg3]	@ record what happened and give up
-	.endm
-
-#else	/* ZIMAGE */
-
-	.macro	store_primary_cpu_mode	reg1:req, reg2:req, reg3:req
-	.endm
-
-/*
- * The zImage loader only runs on one CPU, so we don't bother with mult-CPU
- * consistency checking:
- */
-	.macro	compare_cpu_mode_with_primary mode, reg1, reg2, reg3
-	cmp	\mode, \mode
-	.endm
-
-#endif /* ZIMAGE */
-
-/*
- * Hypervisor stub installation functions.
- *
- * These must be called with the MMU and D-cache off.
- * They are not ABI compliant and are only intended to be called from the kernel
- * entry points in head.S.
- */
-@ Call this from the primary CPU
-ENTRY(__hyp_stub_install)
-	store_primary_cpu_mode	r4, r5, r6
-ENDPROC(__hyp_stub_install)
-
-	@ fall through...
-
-@ Secondary CPUs should call here
-ENTRY(__hyp_stub_install_secondary)
-	mrs	r4, cpsr
-	and	r4, r4, #MODE_MASK
-
-	/*
-	 * If the secondary has booted with a different mode, give up
-	 * immediately.
-	 */
-	compare_cpu_mode_with_primary	r4, r5, r6, r7
-	retne	lr
-
-	/*
-	 * Once we have given up on one CPU, we do not try to install the
-	 * stub hypervisor on the remaining ones: because the saved boot mode
-	 * is modified, it can't compare equal to the CPSR mode field any
-	 * more.
-	 *
-	 * Otherwise...
-	 */
-
-	cmp	r4, #HYP_MODE
-	retne	lr			@ give up if the CPU is not in HYP mode
-
-/*
- * Configure HSCTLR to set correct exception endianness/instruction set
- * state etc.
- * Turn off all traps
- * Eventually, CPU-specific code might be needed -- assume not for now
- *
- * This code relies on the "eret" instruction to synchronize the
- * various coprocessor accesses. This is done when we switch to SVC
- * (see safe_svcmode_maskall).
- */
-	@ Now install the hypervisor stub:
-	W(adr)	r7, __hyp_stub_vectors
-	mcr	p15, 4, r7, c12, c0, 0	@ set hypervisor vector base (HVBAR)
-
-	@ Disable all traps, so we don't get any nasty surprise
-	mov	r7, #0
-	mcr	p15, 4, r7, c1, c1, 0	@ HCR
-	mcr	p15, 4, r7, c1, c1, 2	@ HCPTR
-	mcr	p15, 4, r7, c1, c1, 3	@ HSTR
-
-THUMB(	orr	r7, #(1 << 30)	)	@ HSCTLR.TE
-ARM_BE8(orr	r7, r7, #(1 << 25))     @ HSCTLR.EE
-	mcr	p15, 4, r7, c1, c0, 0	@ HSCTLR
-
-	mrc	p15, 4, r7, c1, c1, 1	@ HDCR
-	and	r7, #0x1f		@ Preserve HPMN
-	mcr	p15, 4, r7, c1, c1, 1	@ HDCR
-
-	@ Make sure NS-SVC is initialised appropriately
-	mrc	p15, 0, r7, c1, c0, 0	@ SCTLR
-	orr	r7, #(1 << 5)		@ CP15 barriers enabled
-	bic	r7, #(3 << 7)		@ Clear SED/ITD for v8 (RES0 for v7)
-	bic	r7, #(3 << 19)		@ WXN and UWXN disabled
-	mcr	p15, 0, r7, c1, c0, 0	@ SCTLR
-
-	mrc	p15, 0, r7, c0, c0, 0	@ MIDR
-	mcr	p15, 4, r7, c0, c0, 0	@ VPIDR
-
-	mrc	p15, 0, r7, c0, c0, 5	@ MPIDR
-	mcr	p15, 4, r7, c0, c0, 5	@ VMPIDR
-
-#if !defined(ZIMAGE) && defined(CONFIG_ARM_ARCH_TIMER)
-	@ make CNTP_* and CNTPCT accessible from PL1
-	mrc	p15, 0, r7, c0, c1, 1	@ ID_PFR1
-	ubfx	r7, r7, #16, #4
-	teq	r7, #0
-	beq	1f
-	mrc	p15, 4, r7, c14, c1, 0	@ CNTHCTL
-	orr	r7, r7, #3		@ PL1PCEN | PL1PCTEN
-	mcr	p15, 4, r7, c14, c1, 0	@ CNTHCTL
-	mov	r7, #0
-	mcrr	p15, 4, r7, r7, c14	@ CNTVOFF
-
-	@ Disable virtual timer in case it was counting
-	mrc	p15, 0, r7, c14, c3, 1	@ CNTV_CTL
-	bic	r7, #1			@ Clear ENABLE
-	mcr	p15, 0, r7, c14, c3, 1	@ CNTV_CTL
-1:
-#endif
-
-#ifdef CONFIG_ARM_GIC_V3
-	@ Check whether GICv3 system registers are available
-	mrc	p15, 0, r7, c0, c1, 1	@ ID_PFR1
-	ubfx	r7, r7, #28, #4
-	teq	r7, #0
-	beq	2f
-
-	@ Enable system register accesses
-	mrc	p15, 4, r7, c12, c9, 5	@ ICC_HSRE
-	orr	r7, r7, #(ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE)
-	mcr	p15, 4, r7, c12, c9, 5	@ ICC_HSRE
-	isb
-
-	@ SRE bit could be forced to 0 by firmware.
-	@ Check whether it sticks before accessing any other sysreg
-	mrc	p15, 4, r7, c12, c9, 5	@ ICC_HSRE
-	tst	r7, #ICC_SRE_EL2_SRE
-	beq	2f
-	mov	r7, #0
-	mcr	p15, 4, r7, c12, c11, 0	@ ICH_HCR
-2:
-#endif
-
-	bx	lr			@ The boot CPU mode is left in r4.
-ENDPROC(__hyp_stub_install_secondary)
-
-__hyp_stub_do_trap:
-	teq	r0, #HVC_SET_VECTORS
-	bne	1f
-	mcr	p15, 4, r1, c12, c0, 0	@ set HVBAR
-	b	__hyp_stub_exit
-
-1:	teq	r0, #HVC_SOFT_RESTART
-	bne	1f
-	bx	r1
-
-1:	teq	r0, #HVC_RESET_VECTORS
-	beq	__hyp_stub_exit
-
-	ldr	r0, =HVC_STUB_ERR
-	__ERET
-
-__hyp_stub_exit:
-	mov	r0, #0
-	__ERET
-ENDPROC(__hyp_stub_do_trap)
-
-/*
- * __hyp_set_vectors: Call this after boot to set the initial hypervisor
- * vectors as part of hypervisor installation.  On an SMP system, this should
- * be called on each CPU.
- *
- * r0 must be the physical address of the new vector table (which must lie in
- * the bottom 4GB of physical address space.
- *
- * r0 must be 32-byte aligned.
- *
- * Before calling this, you must check that the stub hypervisor is installed
- * everywhere, by waiting for any secondary CPUs to be brought up and then
- * checking that BOOT_CPU_MODE_HAVE_HYP(__boot_cpu_mode) is true.
- *
- * If not, there is a pre-existing hypervisor, some CPUs failed to boot, or
- * something else went wrong... in such cases, trying to install a new
- * hypervisor is unlikely to work as desired.
- *
- * When you call into your shiny new hypervisor, sp_hyp will contain junk,
- * so you will need to set that to something sensible at the new hypervisor's
- * initialisation entry point.
- */
-ENTRY(__hyp_set_vectors)
-	mov	r1, r0
-	mov	r0, #HVC_SET_VECTORS
-	__HVC(0)
-	ret	lr
-ENDPROC(__hyp_set_vectors)
-
-ENTRY(__hyp_soft_restart)
-	mov	r1, r0
-	mov	r0, #HVC_SOFT_RESTART
-	__HVC(0)
-	ret	lr
-ENDPROC(__hyp_soft_restart)
-
-ENTRY(__hyp_reset_vectors)
-	mov	r0, #HVC_RESET_VECTORS
-	__HVC(0)
-	ret	lr
-ENDPROC(__hyp_reset_vectors)
-
-#ifndef ZIMAGE
-.align 2
-.L__boot_cpu_mode_offset:
-	.long	__boot_cpu_mode - .
-#endif
-
-.align 5
-ENTRY(__hyp_stub_vectors)
-__hyp_stub_reset:	W(b)	.
-__hyp_stub_und:		W(b)	.
-__hyp_stub_svc:		W(b)	.
-__hyp_stub_pabort:	W(b)	.
-__hyp_stub_dabort:	W(b)	.
-__hyp_stub_trap:	W(b)	__hyp_stub_do_trap
-__hyp_stub_irq:		W(b)	.
-__hyp_stub_fiq:		W(b)	.
-ENDPROC(__hyp_stub_vectors)
-
diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S
deleted file mode 100644
index 0dcae787b004d61cfe92bf4548b8dface3e8d01e..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/iwmmxt.S
+++ /dev/null
@@ -1,370 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/kernel/iwmmxt.S
- *
- *  XScale iWMMXt (Concan) context switching and handling
- *
- *  Initial code:
- *  Copyright (c) 2003, Intel Corporation
- *
- *  Full lazy switching support, optimizations and more, by Nicolas Pitre
-*   Copyright (c) 2003-2004, MontaVista Software, Inc.
- */
-
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/assembler.h>
-
-#if defined(CONFIG_CPU_PJ4) || defined(CONFIG_CPU_PJ4B)
-#define PJ4(code...)		code
-#define XSC(code...)
-#elif defined(CONFIG_CPU_MOHAWK) || \
-	defined(CONFIG_CPU_XSC3) || \
-	defined(CONFIG_CPU_XSCALE)
-#define PJ4(code...)
-#define XSC(code...)		code
-#else
-#error "Unsupported iWMMXt architecture"
-#endif
-
-#define MMX_WR0		 	(0x00)
-#define MMX_WR1		 	(0x08)
-#define MMX_WR2		 	(0x10)
-#define MMX_WR3			(0x18)
-#define MMX_WR4		 	(0x20)
-#define MMX_WR5		 	(0x28)
-#define MMX_WR6		 	(0x30)
-#define MMX_WR7		 	(0x38)
-#define MMX_WR8		 	(0x40)
-#define MMX_WR9		 	(0x48)
-#define MMX_WR10		(0x50)
-#define MMX_WR11		(0x58)
-#define MMX_WR12		(0x60)
-#define MMX_WR13		(0x68)
-#define MMX_WR14		(0x70)
-#define MMX_WR15		(0x78)
-#define MMX_WCSSF		(0x80)
-#define MMX_WCASF		(0x84)
-#define MMX_WCGR0		(0x88)
-#define MMX_WCGR1		(0x8C)
-#define MMX_WCGR2		(0x90)
-#define MMX_WCGR3		(0x94)
-
-#define MMX_SIZE		(0x98)
-
-	.text
-	.arm
-
-/*
- * Lazy switching of Concan coprocessor context
- *
- * r10 = struct thread_info pointer
- * r9  = ret_from_exception
- * lr  = undefined instr exit
- *
- * called from prefetch exception handler with interrupts enabled
- */
-
-ENTRY(iwmmxt_task_enable)
-	inc_preempt_count r10, r3
-
-	XSC(mrc	p15, 0, r2, c15, c1, 0)
-	PJ4(mrc p15, 0, r2, c1, c0, 2)
-	@ CP0 and CP1 accessible?
-	XSC(tst	r2, #0x3)
-	PJ4(tst	r2, #0xf)
-	bne	4f				@ if so no business here
-	@ enable access to CP0 and CP1
-	XSC(orr	r2, r2, #0x3)
-	XSC(mcr	p15, 0, r2, c15, c1, 0)
-	PJ4(orr	r2, r2, #0xf)
-	PJ4(mcr	p15, 0, r2, c1, c0, 2)
-
-	ldr	r3, =concan_owner
-	add	r0, r10, #TI_IWMMXT_STATE	@ get task Concan save area
-	ldr	r2, [sp, #60]			@ current task pc value
-	ldr	r1, [r3]			@ get current Concan owner
-	str	r0, [r3]			@ this task now owns Concan regs
-	sub	r2, r2, #4			@ adjust pc back
-	str	r2, [sp, #60]
-
-	mrc	p15, 0, r2, c2, c0, 0
-	mov	r2, r2				@ cpwait
-	bl	concan_save
-
-#ifdef CONFIG_PREEMPT_COUNT
-	get_thread_info r10
-#endif
-4:	dec_preempt_count r10, r3
-	ret	r9				@ normal exit from exception
-
-concan_save:
-
-	teq	r1, #0				@ test for last ownership
-	beq	concan_load			@ no owner, skip save
-
-	tmrc	r2, wCon
-
-	@ CUP? wCx
-	tst	r2, #0x1
-	beq 	1f
-
-concan_dump:
-
-	wstrw	wCSSF, [r1, #MMX_WCSSF]
-	wstrw	wCASF, [r1, #MMX_WCASF]
-	wstrw	wCGR0, [r1, #MMX_WCGR0]
-	wstrw	wCGR1, [r1, #MMX_WCGR1]
-	wstrw	wCGR2, [r1, #MMX_WCGR2]
-	wstrw	wCGR3, [r1, #MMX_WCGR3]
-
-1:	@ MUP? wRn
-	tst	r2, #0x2
-	beq	2f
-
-	wstrd	wR0,  [r1, #MMX_WR0]
-	wstrd	wR1,  [r1, #MMX_WR1]
-	wstrd	wR2,  [r1, #MMX_WR2]
-	wstrd	wR3,  [r1, #MMX_WR3]
-	wstrd	wR4,  [r1, #MMX_WR4]
-	wstrd	wR5,  [r1, #MMX_WR5]
-	wstrd	wR6,  [r1, #MMX_WR6]
-	wstrd	wR7,  [r1, #MMX_WR7]
-	wstrd	wR8,  [r1, #MMX_WR8]
-	wstrd	wR9,  [r1, #MMX_WR9]
-	wstrd	wR10, [r1, #MMX_WR10]
-	wstrd	wR11, [r1, #MMX_WR11]
-	wstrd	wR12, [r1, #MMX_WR12]
-	wstrd	wR13, [r1, #MMX_WR13]
-	wstrd	wR14, [r1, #MMX_WR14]
-	wstrd	wR15, [r1, #MMX_WR15]
-
-2:	teq	r0, #0				@ anything to load?
-	reteq	lr				@ if not, return
-
-concan_load:
-
-	@ Load wRn
-	wldrd	wR0,  [r0, #MMX_WR0]
-	wldrd	wR1,  [r0, #MMX_WR1]
-	wldrd	wR2,  [r0, #MMX_WR2]
-	wldrd	wR3,  [r0, #MMX_WR3]
-	wldrd	wR4,  [r0, #MMX_WR4]
-	wldrd	wR5,  [r0, #MMX_WR5]
-	wldrd	wR6,  [r0, #MMX_WR6]
-	wldrd	wR7,  [r0, #MMX_WR7]
-	wldrd	wR8,  [r0, #MMX_WR8]
-	wldrd	wR9,  [r0, #MMX_WR9]
-	wldrd	wR10, [r0, #MMX_WR10]
-	wldrd	wR11, [r0, #MMX_WR11]
-	wldrd	wR12, [r0, #MMX_WR12]
-	wldrd	wR13, [r0, #MMX_WR13]
-	wldrd	wR14, [r0, #MMX_WR14]
-	wldrd	wR15, [r0, #MMX_WR15]
-
-	@ Load wCx
-	wldrw	wCSSF, [r0, #MMX_WCSSF]
-	wldrw	wCASF, [r0, #MMX_WCASF]
-	wldrw	wCGR0, [r0, #MMX_WCGR0]
-	wldrw	wCGR1, [r0, #MMX_WCGR1]
-	wldrw	wCGR2, [r0, #MMX_WCGR2]
-	wldrw	wCGR3, [r0, #MMX_WCGR3]
-
-	@ clear CUP/MUP (only if r1 != 0)
-	teq	r1, #0
-	mov 	r2, #0
-	reteq	lr
-
-	tmcr	wCon, r2
-	ret	lr
-
-ENDPROC(iwmmxt_task_enable)
-
-/*
- * Back up Concan regs to save area and disable access to them
- * (mainly for gdb or sleep mode usage)
- *
- * r0 = struct thread_info pointer of target task or NULL for any
- */
-
-ENTRY(iwmmxt_task_disable)
-
-	stmfd	sp!, {r4, lr}
-
-	mrs	ip, cpsr
-	orr	r2, ip, #PSR_I_BIT		@ disable interrupts
-	msr	cpsr_c, r2
-
-	ldr	r3, =concan_owner
-	add	r2, r0, #TI_IWMMXT_STATE	@ get task Concan save area
-	ldr	r1, [r3]			@ get current Concan owner
-	teq	r1, #0				@ any current owner?
-	beq	1f				@ no: quit
-	teq	r0, #0				@ any owner?
-	teqne	r1, r2				@ or specified one?
-	bne	1f				@ no: quit
-
-	@ enable access to CP0 and CP1
-	XSC(mrc	p15, 0, r4, c15, c1, 0)
-	XSC(orr	r4, r4, #0x3)
-	XSC(mcr	p15, 0, r4, c15, c1, 0)
-	PJ4(mrc p15, 0, r4, c1, c0, 2)
-	PJ4(orr	r4, r4, #0xf)
-	PJ4(mcr	p15, 0, r4, c1, c0, 2)
-
-	mov	r0, #0				@ nothing to load
-	str	r0, [r3]			@ no more current owner
-	mrc	p15, 0, r2, c2, c0, 0
-	mov	r2, r2				@ cpwait
-	bl	concan_save
-
-	@ disable access to CP0 and CP1
-	XSC(bic	r4, r4, #0x3)
-	XSC(mcr	p15, 0, r4, c15, c1, 0)
-	PJ4(bic	r4, r4, #0xf)
-	PJ4(mcr	p15, 0, r4, c1, c0, 2)
-
-	mrc	p15, 0, r2, c2, c0, 0
-	mov	r2, r2				@ cpwait
-
-1:	msr	cpsr_c, ip			@ restore interrupt mode
-	ldmfd	sp!, {r4, pc}
-
-ENDPROC(iwmmxt_task_disable)
-
-/*
- * Copy Concan state to given memory address
- *
- * r0 = struct thread_info pointer of target task
- * r1 = memory address where to store Concan state
- *
- * this is called mainly in the creation of signal stack frames
- */
-
-ENTRY(iwmmxt_task_copy)
-
-	mrs	ip, cpsr
-	orr	r2, ip, #PSR_I_BIT		@ disable interrupts
-	msr	cpsr_c, r2
-
-	ldr	r3, =concan_owner
-	add	r2, r0, #TI_IWMMXT_STATE	@ get task Concan save area
-	ldr	r3, [r3]			@ get current Concan owner
-	teq	r2, r3				@ does this task own it...
-	beq	1f
-
-	@ current Concan values are in the task save area
-	msr	cpsr_c, ip			@ restore interrupt mode
-	mov	r0, r1
-	mov	r1, r2
-	mov	r2, #MMX_SIZE
-	b	memcpy
-
-1:	@ this task owns Concan regs -- grab a copy from there
-	mov	r0, #0				@ nothing to load
-	mov	r2, #3				@ save all regs
-	mov	r3, lr				@ preserve return address
-	bl	concan_dump
-	msr	cpsr_c, ip			@ restore interrupt mode
-	ret	r3
-
-ENDPROC(iwmmxt_task_copy)
-
-/*
- * Restore Concan state from given memory address
- *
- * r0 = struct thread_info pointer of target task
- * r1 = memory address where to get Concan state from
- *
- * this is used to restore Concan state when unwinding a signal stack frame
- */
-
-ENTRY(iwmmxt_task_restore)
-
-	mrs	ip, cpsr
-	orr	r2, ip, #PSR_I_BIT		@ disable interrupts
-	msr	cpsr_c, r2
-
-	ldr	r3, =concan_owner
-	add	r2, r0, #TI_IWMMXT_STATE	@ get task Concan save area
-	ldr	r3, [r3]			@ get current Concan owner
-	bic	r2, r2, #0x7			@ 64-bit alignment
-	teq	r2, r3				@ does this task own it...
-	beq	1f
-
-	@ this task doesn't own Concan regs -- use its save area
-	msr	cpsr_c, ip			@ restore interrupt mode
-	mov	r0, r2
-	mov	r2, #MMX_SIZE
-	b	memcpy
-
-1:	@ this task owns Concan regs -- load them directly
-	mov	r0, r1
-	mov	r1, #0				@ don't clear CUP/MUP
-	mov	r3, lr				@ preserve return address
-	bl	concan_load
-	msr	cpsr_c, ip			@ restore interrupt mode
-	ret	r3
-
-ENDPROC(iwmmxt_task_restore)
-
-/*
- * Concan handling on task switch
- *
- * r0 = next thread_info pointer
- *
- * Called only from the iwmmxt notifier with task preemption disabled.
- */
-ENTRY(iwmmxt_task_switch)
-
-	XSC(mrc	p15, 0, r1, c15, c1, 0)
-	PJ4(mrc	p15, 0, r1, c1, c0, 2)
-	@ CP0 and CP1 accessible?
-	XSC(tst	r1, #0x3)
-	PJ4(tst	r1, #0xf)
-	bne	1f				@ yes: block them for next task
-
-	ldr	r2, =concan_owner
-	add	r3, r0, #TI_IWMMXT_STATE	@ get next task Concan save area
-	ldr	r2, [r2]			@ get current Concan owner
-	teq	r2, r3				@ next task owns it?
-	retne	lr				@ no: leave Concan disabled
-
-1:	@ flip Concan access
-	XSC(eor	r1, r1, #0x3)
-	XSC(mcr	p15, 0, r1, c15, c1, 0)
-	PJ4(eor r1, r1, #0xf)
-	PJ4(mcr	p15, 0, r1, c1, c0, 2)
-
-	mrc	p15, 0, r1, c2, c0, 0
-	sub	pc, lr, r1, lsr #32		@ cpwait and return
-
-ENDPROC(iwmmxt_task_switch)
-
-/*
- * Remove Concan ownership of given task
- *
- * r0 = struct thread_info pointer
- */
-ENTRY(iwmmxt_task_release)
-
-	mrs	r2, cpsr
-	orr	ip, r2, #PSR_I_BIT		@ disable interrupts
-	msr	cpsr_c, ip
-	ldr	r3, =concan_owner
-	add	r0, r0, #TI_IWMMXT_STATE	@ get task Concan save area
-	ldr	r1, [r3]			@ get current Concan owner
-	eors	r0, r0, r1			@ if equal...
-	streq	r0, [r3]			@ then clear ownership
-	msr	cpsr_c, r2			@ restore interrupts
-	ret	lr
-
-ENDPROC(iwmmxt_task_release)
-
-	.data
-	.align	2
-concan_owner:
-	.word	0
-
diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S
deleted file mode 100644
index 5e15b5912cb05f74e65446ed84503f2ab4fd3d1e..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/relocate_kernel.S
+++ /dev/null
@@ -1,78 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * relocate_kernel.S - put the kernel image in place to boot
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/kexec.h>
-
-	.align	3	/* not needed for this code, but keeps fncpy() happy */
-
-ENTRY(relocate_new_kernel)
-
-	adr	r7, relocate_new_kernel_end
-	ldr	r0, [r7, #KEXEC_INDIR_PAGE]
-	ldr	r1, [r7, #KEXEC_START_ADDR]
-
-	/*
-	 * If there is no indirection page (we are doing crashdumps)
-	 * skip any relocation.
-	 */
-	cmp	r0, #0
-	beq	2f
-
-0:	/* top, read another word for the indirection page */
-	ldr	r3, [r0],#4
-
-	/* Is it a destination page. Put destination address to r4 */
-	tst	r3,#1,0
-	beq	1f
-	bic	r4,r3,#1
-	b	0b
-1:
-	/* Is it an indirection page */
-	tst	r3,#2,0
-	beq	1f
-	bic	r0,r3,#2
-	b	0b
-1:
-
-	/* are we done ? */
-	tst	r3,#4,0
-	beq	1f
-	b	2f
-
-1:
-	/* is it source ? */
-	tst	r3,#8,0
-	beq	0b
-	bic r3,r3,#8
-	mov r6,#1024
-9:
-	ldr r5,[r3],#4
-	str r5,[r4],#4
-	subs r6,r6,#1
-	bne 9b
-	b 0b
-
-2:
-	/* Jump to relocated kernel */
-	mov	lr, r1
-	mov	r0, #0
-	ldr	r1, [r7, #KEXEC_MACH_TYPE]
-	ldr	r2, [r7, #KEXEC_R2]
- ARM(	ret	lr	)
- THUMB(	bx	lr	)
-
-ENDPROC(relocate_new_kernel)
-
-	.align	3
-relocate_new_kernel_end:
-
-	.globl relocate_new_kernel_size
-relocate_new_kernel_size:
-	.long relocate_new_kernel_end - relocate_new_kernel
-
-
diff --git a/arch/arm/kernel/sigreturn_codes.S b/arch/arm/kernel/sigreturn_codes.S
deleted file mode 100644
index 7540ec51d16cdceb9f3ddf5f60b3293d373e9249..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/sigreturn_codes.S
+++ /dev/null
@@ -1,140 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * sigreturn_codes.S - code sinpets for sigreturn syscalls
- *
- * Created by:	Victor Kamensky, 2013-08-13
- * Copyright:	(C) 2013  Linaro Limited
- */
-
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
-/*
- * For ARM syscalls, we encode the syscall number into the instruction.
- * With EABI, the syscall number has to be loaded into r7. As result
- * ARM syscall sequence snippet will have move and svc in .arm encoding
- *
- * For Thumb syscalls, we pass the syscall number via r7.  We therefore
- * need two 16-bit instructions in .thumb encoding
- *
- * Please note sigreturn_codes code are not executed in place. Instead
- * they just copied by kernel into appropriate places. Code inside of
- * arch/arm/kernel/signal.c is very sensitive to layout of these code
- * snippets.
- */
-
-/*
- * In CPU_THUMBONLY case kernel arm opcodes are not allowed.
- * Note in this case codes skips those instructions but it uses .org
- * directive to keep correct layout of sigreturn_codes array.
- */
-#ifndef CONFIG_CPU_THUMBONLY
-#define ARM_OK(code...)	code
-#else
-#define ARM_OK(code...)
-#endif
-
-	.macro arm_slot n
-	.org	sigreturn_codes + 12 * (\n)
-ARM_OK(	.arm	)
-	.endm
-
-	.macro thumb_slot n
-	.org	sigreturn_codes + 12 * (\n) + 8
-	.thumb
-	.endm
-
-	.macro arm_fdpic_slot n
-	.org	sigreturn_codes + 24 + 20 * (\n)
-ARM_OK(	.arm	)
-	.endm
-
-	.macro thumb_fdpic_slot n
-	.org	sigreturn_codes + 24 + 20 * (\n) + 12
-	.thumb
-	.endm
-
-
-#if __LINUX_ARM_ARCH__ <= 4
-	/*
-	 * Note we manually set minimally required arch that supports
-	 * required thumb opcodes for early arch versions. It is OK
-	 * for this file to be used in combination with other
-	 * lower arch variants, since these code snippets are only
-	 * used as input data.
-	 */
-	.arch armv4t
-#endif
-
-	.section .rodata
-	.global sigreturn_codes
-	.type	sigreturn_codes, #object
-
-	.align
-
-sigreturn_codes:
-
-	/* ARM sigreturn syscall code snippet */
-	arm_slot 0
-ARM_OK(	mov	r7, #(__NR_sigreturn - __NR_SYSCALL_BASE)	)
-ARM_OK(	swi	#(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE)	)
-
-	/* Thumb sigreturn syscall code snippet */
-	thumb_slot 0
-	movs	r7, #(__NR_sigreturn - __NR_SYSCALL_BASE)
-	swi	#0
-
-	/* ARM sigreturn_rt syscall code snippet */
-	arm_slot 1
-ARM_OK(	mov	r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE)	)
-ARM_OK(	swi	#(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE)	)
-
-	/* Thumb sigreturn_rt syscall code snippet */
-	thumb_slot 1
-	movs	r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE)
-	swi	#0
-
-	/* ARM sigreturn restorer FDPIC bounce code snippet */
-	arm_fdpic_slot 0
-ARM_OK(	ldr	r3, [sp, #SIGFRAME_RC3_OFFSET] )
-ARM_OK(	ldmia	r3, {r3, r9} )
-#ifdef CONFIG_ARM_THUMB
-ARM_OK(	bx	r3 )
-#else
-ARM_OK(	ret	r3 )
-#endif
-
-	/* Thumb sigreturn restorer FDPIC bounce code snippet */
-	thumb_fdpic_slot 0
-	ldr	r3, [sp, #SIGFRAME_RC3_OFFSET]
-	ldmia	r3, {r2, r3}
-	mov	r9, r3
-	bx	r2
-
-	/* ARM sigreturn_rt restorer FDPIC bounce code snippet */
-	arm_fdpic_slot 1
-ARM_OK(	ldr	r3, [sp, #RT_SIGFRAME_RC3_OFFSET] )
-ARM_OK(	ldmia	r3, {r3, r9} )
-#ifdef CONFIG_ARM_THUMB
-ARM_OK(	bx	r3 )
-#else
-ARM_OK(	ret	r3 )
-#endif
-
-	/* Thumb sigreturn_rt restorer FDPIC bounce code snippet */
-	thumb_fdpic_slot 1
-	ldr	r3, [sp, #RT_SIGFRAME_RC3_OFFSET]
-	ldmia	r3, {r2, r3}
-	mov	r9, r3
-	bx	r2
-
-	/*
-	 * Note on additional space: setup_return in signal.c
-	 * always copies the same number of words regardless whether
-	 * it is thumb case or not, so we need one additional padding
-	 * word after the last entry.
-	 */
-	.space	4
-
-	.size	sigreturn_codes, . - sigreturn_codes
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
deleted file mode 100644
index 5dc8b80bb69383643eddec5ba62164e0458b4512..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/sleep.S
+++ /dev/null
@@ -1,190 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <linux/threads.h>
-#include <asm/asm-offsets.h>
-#include <asm/assembler.h>
-#include <asm/glue-cache.h>
-#include <asm/glue-proc.h>
-	.text
-
-/*
- * Implementation of MPIDR hash algorithm through shifting
- * and OR'ing.
- *
- * @dst: register containing hash result
- * @rs0: register containing affinity level 0 bit shift
- * @rs1: register containing affinity level 1 bit shift
- * @rs2: register containing affinity level 2 bit shift
- * @mpidr: register containing MPIDR value
- * @mask: register containing MPIDR mask
- *
- * Pseudo C-code:
- *
- *u32 dst;
- *
- *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 mpidr, u32 mask) {
- *	u32 aff0, aff1, aff2;
- *	u32 mpidr_masked = mpidr & mask;
- *	aff0 = mpidr_masked & 0xff;
- *	aff1 = mpidr_masked & 0xff00;
- *	aff2 = mpidr_masked & 0xff0000;
- *	dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2);
- *}
- * Input registers: rs0, rs1, rs2, mpidr, mask
- * Output register: dst
- * Note: input and output registers must be disjoint register sets
-         (eg: a macro instance with mpidr = r1 and dst = r1 is invalid)
- */
-	.macro compute_mpidr_hash dst, rs0, rs1, rs2, mpidr, mask
-	and	\mpidr, \mpidr, \mask			@ mask out MPIDR bits
-	and	\dst, \mpidr, #0xff			@ mask=aff0
- ARM(	mov	\dst, \dst, lsr \rs0		)	@ dst=aff0>>rs0
- THUMB(	lsr	\dst, \dst, \rs0		)
-	and	\mask, \mpidr, #0xff00			@ mask = aff1
- ARM(	orr	\dst, \dst, \mask, lsr \rs1	)	@ dst|=(aff1>>rs1)
- THUMB(	lsr	\mask, \mask, \rs1		)
- THUMB(	orr	\dst, \dst, \mask		)
-	and	\mask, \mpidr, #0xff0000		@ mask = aff2
- ARM(	orr	\dst, \dst, \mask, lsr \rs2	)	@ dst|=(aff2>>rs2)
- THUMB(	lsr	\mask, \mask, \rs2		)
- THUMB(	orr	\dst, \dst, \mask		)
-	.endm
-
-/*
- * Save CPU state for a suspend.  This saves the CPU general purpose
- * registers, and allocates space on the kernel stack to save the CPU
- * specific registers and some other data for resume.
- *  r0 = suspend function arg0
- *  r1 = suspend function
- *  r2 = MPIDR value the resuming CPU will use
- */
-ENTRY(__cpu_suspend)
-	stmfd	sp!, {r4 - r11, lr}
-#ifdef MULTI_CPU
-	ldr	r10, =processor
-	ldr	r4, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state
-#else
-	ldr	r4, =cpu_suspend_size
-#endif
-	mov	r5, sp			@ current virtual SP
-	add	r4, r4, #12		@ Space for pgd, virt sp, phys resume fn
-	sub	sp, sp, r4		@ allocate CPU state on stack
-	ldr	r3, =sleep_save_sp
-	stmfd	sp!, {r0, r1}		@ save suspend func arg and pointer
-	ldr	r3, [r3, #SLEEP_SAVE_SP_VIRT]
-	ALT_SMP(ldr r0, =mpidr_hash)
-	ALT_UP_B(1f)
-	/* This ldmia relies on the memory layout of the mpidr_hash struct */
-	ldmia	r0, {r1, r6-r8}	@ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts
-	compute_mpidr_hash	r0, r6, r7, r8, r2, r1
-	add	r3, r3, r0, lsl #2
-1:	mov	r2, r5			@ virtual SP
-	mov	r1, r4			@ size of save block
-	add	r0, sp, #8		@ pointer to save block
-	bl	__cpu_suspend_save
-	badr	lr, cpu_suspend_abort
-	ldmfd	sp!, {r0, pc}		@ call suspend fn
-ENDPROC(__cpu_suspend)
-	.ltorg
-
-cpu_suspend_abort:
-	ldmia	sp!, {r1 - r3}		@ pop phys pgd, virt SP, phys resume fn
-	teq	r0, #0
-	moveq	r0, #1			@ force non-zero value
-	mov	sp, r2
-	ldmfd	sp!, {r4 - r11, pc}
-ENDPROC(cpu_suspend_abort)
-
-/*
- * r0 = control register value
- */
-	.align	5
-	.pushsection	.idmap.text,"ax"
-ENTRY(cpu_resume_mmu)
-	ldr	r3, =cpu_resume_after_mmu
-	instr_sync
-	mcr	p15, 0, r0, c1, c0, 0	@ turn on MMU, I-cache, etc
-	mrc	p15, 0, r0, c0, c0, 0	@ read id reg
-	instr_sync
-	mov	r0, r0
-	mov	r0, r0
-	ret	r3			@ jump to virtual address
-ENDPROC(cpu_resume_mmu)
-	.popsection
-cpu_resume_after_mmu:
-	bl	cpu_init		@ restore the und/abt/irq banked regs
-	mov	r0, #0			@ return zero on success
-	ldmfd	sp!, {r4 - r11, pc}
-ENDPROC(cpu_resume_after_mmu)
-
-	.text
-	.align
-
-#ifdef CONFIG_MCPM
-	.arm
-THUMB(	.thumb			)
-ENTRY(cpu_resume_no_hyp)
-ARM_BE8(setend be)			@ ensure we are in BE mode
-	b	no_hyp
-#endif
-
-#ifdef CONFIG_MMU
-	.arm
-ENTRY(cpu_resume_arm)
- THUMB(	badr	r9, 1f		)	@ Kernel is entered in ARM.
- THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
- THUMB(	.thumb			)	@ switch to Thumb now.
- THUMB(1:			)
-#endif
-
-ENTRY(cpu_resume)
-ARM_BE8(setend be)			@ ensure we are in BE mode
-#ifdef CONFIG_ARM_VIRT_EXT
-	bl	__hyp_stub_install_secondary
-#endif
-	safe_svcmode_maskall r1
-no_hyp:
-	mov	r1, #0
-	ALT_SMP(mrc p15, 0, r0, c0, c0, 5)
-	ALT_UP_B(1f)
-	adr	r2, mpidr_hash_ptr
-	ldr	r3, [r2]
-	add	r2, r2, r3		@ r2 = struct mpidr_hash phys address
-	/*
-	 * This ldmia relies on the memory layout of the mpidr_hash
-	 * struct mpidr_hash.
-	 */
-	ldmia	r2, { r3-r6 }	@ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts
-	compute_mpidr_hash	r1, r4, r5, r6, r0, r3
-1:
-	adr	r0, _sleep_save_sp
-	ldr	r2, [r0]
-	add	r0, r0, r2
-	ldr	r0, [r0, #SLEEP_SAVE_SP_PHYS]
-	ldr	r0, [r0, r1, lsl #2]
-
-	@ load phys pgd, stack, resume fn
-  ARM(	ldmia	r0!, {r1, sp, pc}	)
-THUMB(	ldmia	r0!, {r1, r2, r3}	)
-THUMB(	mov	sp, r2			)
-THUMB(	bx	r3			)
-ENDPROC(cpu_resume)
-
-#ifdef CONFIG_MMU
-ENDPROC(cpu_resume_arm)
-#endif
-#ifdef CONFIG_MCPM
-ENDPROC(cpu_resume_no_hyp)
-#endif
-
-	.align 2
-_sleep_save_sp:
-	.long	sleep_save_sp - .
-mpidr_hash_ptr:
-	.long	mpidr_hash - .			@ mpidr_hash struct offset
-
-	.data
-	.align	2
-	.type	sleep_save_sp, #object
-ENTRY(sleep_save_sp)
-	.space	SLEEP_SAVE_SP_SZ		@ struct sleep_save_sp
diff --git a/arch/arm/kernel/smccc-call.S b/arch/arm/kernel/smccc-call.S
deleted file mode 100644
index 00664c78facab321049bf7e7dfcdb380d15986d9..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/smccc-call.S
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2015, Linaro Limited
- */
-#include <linux/linkage.h>
-
-#include <asm/opcodes-sec.h>
-#include <asm/opcodes-virt.h>
-#include <asm/unwind.h>
-
-	/*
-	 * Wrap c macros in asm macros to delay expansion until after the
-	 * SMCCC asm macro is expanded.
-	 */
-	.macro SMCCC_SMC
-	__SMC(0)
-	.endm
-
-	.macro SMCCC_HVC
-	__HVC(0)
-	.endm
-
-	.macro SMCCC instr
-UNWIND(	.fnstart)
-	mov	r12, sp
-	push	{r4-r7}
-UNWIND(	.save	{r4-r7})
-	ldm	r12, {r4-r7}
-	\instr
-	pop	{r4-r7}
-	ldr	r12, [sp, #(4 * 4)]
-	stm	r12, {r0-r3}
-	bx	lr
-UNWIND(	.fnend)
-	.endm
-
-/*
- * void smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2,
- *		  unsigned long a3, unsigned long a4, unsigned long a5,
- *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res,
- *		  struct arm_smccc_quirk *quirk)
- */
-ENTRY(__arm_smccc_smc)
-	SMCCC SMCCC_SMC
-ENDPROC(__arm_smccc_smc)
-
-/*
- * void smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2,
- *		  unsigned long a3, unsigned long a4, unsigned long a5,
- *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res,
- *		  struct arm_smccc_quirk *quirk)
- */
-ENTRY(__arm_smccc_hvc)
-	SMCCC SMCCC_HVC
-ENDPROC(__arm_smccc_hvc)
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
deleted file mode 100644
index 8c74037ade22958688e4766e4ef762a6cad514d9..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ /dev/null
@@ -1,199 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* ld script to make ARM Linux kernel
- * taken from the i386 version by Russell King
- * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
- */
-
-/* No __ro_after_init data in the .rodata section - which will always be ro */
-#define RO_AFTER_INIT_DATA
-
-#include <linux/sizes.h>
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-#include <asm/memory.h>
-#include <asm/mpu.h>
-#include <asm/page.h>
-
-#include "vmlinux.lds.h"
-
-OUTPUT_ARCH(arm)
-ENTRY(stext)
-
-#ifndef __ARMEB__
-jiffies = jiffies_64;
-#else
-jiffies = jiffies_64 + 4;
-#endif
-
-SECTIONS
-{
-	/*
-	 * XXX: The linker does not define how output sections are
-	 * assigned to input sections when there are multiple statements
-	 * matching the same input section name.  There is no documented
-	 * order of matching.
-	 *
-	 * unwind exit sections must be discarded before the rest of the
-	 * unwind sections get included.
-	 */
-	/DISCARD/ : {
-		ARM_DISCARD
-		*(.alt.smp.init)
-		*(.pv_table)
-	}
-
-	. = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR);
-	_xiprom = .;			/* XIP ROM area to be mapped */
-
-	.head.text : {
-		_text = .;
-		HEAD_TEXT
-	}
-
-	.text : {			/* Real text segment		*/
-		_stext = .;		/* Text and read-only data	*/
-		ARM_TEXT
-	}
-
-	RO_DATA(PAGE_SIZE)
-
-	. = ALIGN(4);
-	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
-		__start___ex_table = .;
-		ARM_MMU_KEEP(*(__ex_table))
-		__stop___ex_table = .;
-	}
-
-#ifdef CONFIG_ARM_UNWIND
-	ARM_UNWIND_SECTIONS
-#endif
-
-	NOTES
-
-	_etext = .;			/* End of text and rodata section */
-
-	ARM_VECTORS
-	INIT_TEXT_SECTION(8)
-	.exit.text : {
-		ARM_EXIT_KEEP(EXIT_TEXT)
-	}
-	.init.proc.info : {
-		ARM_CPU_DISCARD(PROC_INFO)
-	}
-	.init.arch.info : {
-		__arch_info_begin = .;
-		*(.arch.info.init)
-		__arch_info_end = .;
-	}
-	.init.tagtable : {
-		__tagtable_begin = .;
-		*(.taglist.init)
-		__tagtable_end = .;
-	}
-	.init.rodata : {
-		INIT_SETUP(16)
-		INIT_CALLS
-		CON_INITCALL
-		INIT_RAM_FS
-	}
-
-#ifdef CONFIG_ARM_MPU
-	. = ALIGN(SZ_128K);
-#endif
-	_exiprom = .;			/* End of XIP ROM area */
-
-/*
- * From this point, stuff is considered writable and will be copied to RAM
- */
-	__data_loc = ALIGN(4);		/* location in file */
-	. = PAGE_OFFSET + TEXT_OFFSET;	/* location in memory */
-#undef LOAD_OFFSET
-#define LOAD_OFFSET (PAGE_OFFSET + TEXT_OFFSET - __data_loc)
-
-	. = ALIGN(THREAD_SIZE);
-	_sdata = .;
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
-	.data.ro_after_init : AT(ADDR(.data.ro_after_init) - LOAD_OFFSET) {
-		*(.data..ro_after_init)
-	}
-	_edata = .;
-
-	. = ALIGN(PAGE_SIZE);
-	__init_begin = .;
-	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
-		INIT_DATA
-	}
-	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
-		ARM_EXIT_KEEP(EXIT_DATA)
-	}
-#ifdef CONFIG_SMP
-	PERCPU_SECTION(L1_CACHE_BYTES)
-#endif
-
-#ifdef CONFIG_HAVE_TCM
-	ARM_TCM
-#endif
-
-	/*
-	 * End of copied data. We need a dummy section to get its LMA.
-	 * Also located before final ALIGN() as trailing padding is not stored
-	 * in the resulting binary file and useless to copy.
-	 */
-	.data.endmark : AT(ADDR(.data.endmark) - LOAD_OFFSET) { }
-	_edata_loc = LOADADDR(.data.endmark);
-
-	. = ALIGN(PAGE_SIZE);
-	__init_end = .;
-
-	BSS_SECTION(0, 0, 8)
-#ifdef CONFIG_ARM_MPU
-	. = ALIGN(PMSAv8_MINALIGN);
-#endif
-	_end = .;
-
-	STABS_DEBUG
-}
-
-/*
- * These must never be empty
- * If you have to comment these two assert statements out, your
- * binutils is too old (for other reasons as well)
- */
-ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support")
-ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
-
-/*
- * The HYP init code can't be more than a page long,
- * and should not cross a page boundary.
- * The above comment applies as well.
- */
-ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
-	"HYP init code too big or misaligned")
-
-#ifdef CONFIG_XIP_DEFLATED_DATA
-/*
- * The .bss is used as a stack area for __inflate_kernel_data() whose stack
- * frame is 9568 bytes. Make sure it has extra room left.
- */
-ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA")
-#endif
-
-#ifdef CONFIG_ARM_MPU
-/*
- * Due to PMSAv7 restriction on base address and size we have to
- * enforce minimal alignment restrictions. It was seen that weaker
- * alignment restriction on _xiprom will likely force XIP address
- * space spawns multiple MPU regions thus it is likely we run in
- * situation when we are reprogramming MPU region we run on with
- * something which doesn't cover reprogramming code itself, so as soon
- * as we update MPU settings we'd immediately try to execute straight
- * from background region which is XN.
- * It seem that alignment in 1M should suit most users.
- * _exiprom is aligned as 1/8 of 1M so can be covered by subregion
- * disable
- */
-ASSERT(!(_xiprom & (SZ_1M - 1)), "XIP start address may cause MPU programming issues")
-ASSERT(!(_exiprom & (SZ_128K - 1)), "XIP end address may cause MPU programming issues")
-#endif
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
deleted file mode 100644
index 23150c0f0f4d4f81ec816798ab0415a3ac00d68b..0000000000000000000000000000000000000000
--- a/arch/arm/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,183 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* ld script to make ARM Linux kernel
- * taken from the i386 version by Russell King
- * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
- */
-
-#ifdef CONFIG_XIP_KERNEL
-#include "vmlinux-xip.lds.S"
-#else
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-#include <asm/memory.h>
-#include <asm/mpu.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-
-#include "vmlinux.lds.h"
-
-OUTPUT_ARCH(arm)
-ENTRY(stext)
-
-#ifndef __ARMEB__
-jiffies = jiffies_64;
-#else
-jiffies = jiffies_64 + 4;
-#endif
-
-SECTIONS
-{
-	/*
-	 * XXX: The linker does not define how output sections are
-	 * assigned to input sections when there are multiple statements
-	 * matching the same input section name.  There is no documented
-	 * order of matching.
-	 *
-	 * unwind exit sections must be discarded before the rest of the
-	 * unwind sections get included.
-	 */
-	/DISCARD/ : {
-		ARM_DISCARD
-#ifndef CONFIG_SMP_ON_UP
-		*(.alt.smp.init)
-#endif
-	}
-
-	. = PAGE_OFFSET + TEXT_OFFSET;
-	.head.text : {
-		_text = .;
-		HEAD_TEXT
-	}
-
-#ifdef CONFIG_STRICT_KERNEL_RWX
-	. = ALIGN(1<<SECTION_SHIFT);
-#endif
-
-#ifdef CONFIG_ARM_MPU
-	. = ALIGN(PMSAv8_MINALIGN);
-#endif
-	.text : {			/* Real text segment		*/
-		_stext = .;		/* Text and read-only data	*/
-		ARM_TEXT
-	}
-
-#ifdef CONFIG_DEBUG_ALIGN_RODATA
-	. = ALIGN(1<<SECTION_SHIFT);
-#endif
-	_etext = .;			/* End of text section */
-
-	RO_DATA(PAGE_SIZE)
-
-	. = ALIGN(4);
-	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
-		__start___ex_table = .;
-		ARM_MMU_KEEP(*(__ex_table))
-		__stop___ex_table = .;
-	}
-
-#ifdef CONFIG_ARM_UNWIND
-	ARM_UNWIND_SECTIONS
-#endif
-
-	NOTES
-
-#ifdef CONFIG_STRICT_KERNEL_RWX
-	. = ALIGN(1<<SECTION_SHIFT);
-#else
-	. = ALIGN(PAGE_SIZE);
-#endif
-	__init_begin = .;
-
-	ARM_VECTORS
-	INIT_TEXT_SECTION(8)
-	.exit.text : {
-		ARM_EXIT_KEEP(EXIT_TEXT)
-	}
-	.init.proc.info : {
-		ARM_CPU_DISCARD(PROC_INFO)
-	}
-	.init.arch.info : {
-		__arch_info_begin = .;
-		*(.arch.info.init)
-		__arch_info_end = .;
-	}
-	.init.tagtable : {
-		__tagtable_begin = .;
-		*(.taglist.init)
-		__tagtable_end = .;
-	}
-#ifdef CONFIG_SMP_ON_UP
-	.init.smpalt : {
-		__smpalt_begin = .;
-		*(.alt.smp.init)
-		__smpalt_end = .;
-	}
-#endif
-	.init.pv_table : {
-		__pv_table_begin = .;
-		*(.pv_table)
-		__pv_table_end = .;
-	}
-
-	INIT_DATA_SECTION(16)
-
-	.exit.data : {
-		ARM_EXIT_KEEP(EXIT_DATA)
-	}
-
-#ifdef CONFIG_SMP
-	PERCPU_SECTION(L1_CACHE_BYTES)
-#endif
-
-#ifdef CONFIG_HAVE_TCM
-	ARM_TCM
-#endif
-
-#ifdef CONFIG_STRICT_KERNEL_RWX
-	. = ALIGN(1<<SECTION_SHIFT);
-#else
-	. = ALIGN(THREAD_SIZE);
-#endif
-	__init_end = .;
-
-	_sdata = .;
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
-	_edata = .;
-
-	BSS_SECTION(0, 0, 0)
-#ifdef CONFIG_ARM_MPU
-	. = ALIGN(PMSAv8_MINALIGN);
-#endif
-	_end = .;
-
-	STABS_DEBUG
-}
-
-#ifdef CONFIG_STRICT_KERNEL_RWX
-/*
- * Without CONFIG_DEBUG_ALIGN_RODATA, __start_rodata_section_aligned will
- * be the first section-aligned location after __start_rodata. Otherwise,
- * it will be equal to __start_rodata.
- */
-__start_rodata_section_aligned = ALIGN(__start_rodata, 1 << SECTION_SHIFT);
-#endif
-
-/*
- * These must never be empty
- * If you have to comment these two assert statements out, your
- * binutils is too old (for other reasons as well)
- */
-ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support")
-ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
-
-/*
- * The HYP init code can't be more than a page long,
- * and should not cross a page boundary.
- * The above comment applies as well.
- */
-ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
-	"HYP init code too big or misaligned")
-
-#endif /* CONFIG_XIP_KERNEL */
diff --git a/arch/arm/kvm/hyp/entry.S b/arch/arm/kvm/hyp/entry.S
deleted file mode 100644
index 4bd1f6a7418073a569064a60cabf028cd4ca4a2d..0000000000000000000000000000000000000000
--- a/arch/arm/kvm/hyp/entry.S
+++ /dev/null
@@ -1,121 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2016 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
-*/
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
-
-	.arch_extension     virt
-
-	.text
-	.pushsection	.hyp.text, "ax"
-
-#define USR_REGS_OFFSET		(CPU_CTXT_GP_REGS + GP_REGS_USR)
-
-/* int __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host) */
-ENTRY(__guest_enter)
-	@ Save host registers
-	add	r1, r1, #(USR_REGS_OFFSET + S_R4)
-	stm	r1!, {r4-r12}
-	str	lr, [r1, #4]	@ Skip SP_usr (already saved)
-
-	@ Restore guest registers
-	add	r0, r0,  #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R0)
-	ldr	lr, [r0, #S_LR]
-	ldm	r0, {r0-r12}
-
-	clrex
-	eret
-ENDPROC(__guest_enter)
-
-ENTRY(__guest_exit)
-	/*
-	 * return convention:
-	 * guest r0, r1, r2 saved on the stack
-	 * r0: vcpu pointer
-	 * r1: exception code
-	 */
-
-	add	r2, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R3)
-	stm	r2!, {r3-r12}
-	str	lr, [r2, #4]
-	add	r2, r0, #(VCPU_GUEST_CTXT + USR_REGS_OFFSET + S_R0)
-	pop	{r3, r4, r5}		@ r0, r1, r2
-	stm	r2, {r3-r5}
-
-	ldr	r0, [r0, #VCPU_HOST_CTXT]
-	add	r0, r0, #(USR_REGS_OFFSET + S_R4)
-	ldm	r0!, {r4-r12}
-	ldr	lr, [r0, #4]
-
-	mov	r0, r1
-	mrs	r1, SPSR
-	mrs	r2, ELR_hyp
-	mrc	p15, 4, r3, c5, c2, 0	@ HSR
-
-	/*
-	 * Force loads and stores to complete before unmasking aborts
-	 * and forcing the delivery of the exception. This gives us a
-	 * single instruction window, which the handler will try to
-	 * match.
-	 */
-	dsb	sy
-	cpsie	a
-
-	.global	abort_guest_exit_start
-abort_guest_exit_start:
-
-	isb
-
-	.global	abort_guest_exit_end
-abort_guest_exit_end:
-
-	/*
-	 * If we took an abort, r0[31] will be set, and cmp will set
-	 * the N bit in PSTATE.
-	 */
-	cmp	r0, #0
-	msrmi	SPSR_cxsf, r1
-	msrmi	ELR_hyp, r2
-	mcrmi	p15, 4, r3, c5, c2, 0	@ HSR
-
-	bx	lr
-ENDPROC(__guest_exit)
-
-/*
- * If VFPv3 support is not available, then we will not switch the VFP
- * registers; however cp10 and cp11 accesses will still trap and fallback
- * to the regular coprocessor emulation code, which currently will
- * inject an undefined exception to the guest.
- */
-#ifdef CONFIG_VFPv3
-ENTRY(__vfp_guest_restore)
-	push	{r3, r4, lr}
-
-	@ NEON/VFP used.  Turn on VFP access.
-	mrc	p15, 4, r1, c1, c1, 2		@ HCPTR
-	bic	r1, r1, #(HCPTR_TCP(10) | HCPTR_TCP(11))
-	mcr	p15, 4, r1, c1, c1, 2		@ HCPTR
-	isb
-
-	@ Switch VFP/NEON hardware state to the guest's
-	mov	r4, r0
-	ldr	r0, [r0, #VCPU_HOST_CTXT]
-	add	r0, r0, #CPU_CTXT_VFP
-	bl	__vfp_save_state
-	add	r0, r4, #(VCPU_GUEST_CTXT + CPU_CTXT_VFP)
-	bl	__vfp_restore_state
-
-	pop	{r3, r4, lr}
-	pop	{r0, r1, r2}
-	clrex
-	eret
-ENDPROC(__vfp_guest_restore)
-#endif
-
-	.popsection
-
diff --git a/arch/arm/kvm/hyp/hyp-entry.S b/arch/arm/kvm/hyp/hyp-entry.S
deleted file mode 100644
index fe3d7811a908c73de535eef289cbae2cd1a9cc02..0000000000000000000000000000000000000000
--- a/arch/arm/kvm/hyp/hyp-entry.S
+++ /dev/null
@@ -1,295 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#include <linux/arm-smccc.h>
-#include <linux/linkage.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
-
-	.arch_extension     virt
-
-	.text
-	.pushsection	.hyp.text, "ax"
-
-.macro load_vcpu	reg
-	mrc	p15, 4, \reg, c13, c0, 2	@ HTPIDR
-.endm
-
-/********************************************************************
- * Hypervisor exception vector and handlers
- *
- *
- * The KVM/ARM Hypervisor ABI is defined as follows:
- *
- * Entry to Hyp mode from the host kernel will happen _only_ when an HVC
- * instruction is issued since all traps are disabled when running the host
- * kernel as per the Hyp-mode initialization at boot time.
- *
- * HVC instructions cause a trap to the vector page + offset 0x14 (see hyp_hvc
- * below) when the HVC instruction is called from SVC mode (i.e. a guest or the
- * host kernel) and they cause a trap to the vector page + offset 0x8 when HVC
- * instructions are called from within Hyp-mode.
- *
- * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode):
- *    Switching to Hyp mode is done through a simple HVC #0 instruction. The
- *    exception vector code will check that the HVC comes from VMID==0.
- *    - r0 contains a pointer to a HYP function
- *    - r1, r2, and r3 contain arguments to the above function.
- *    - The HYP function will be called with its arguments in r0, r1 and r2.
- *    On HYP function return, we return directly to SVC.
- *
- * Note that the above is used to execute code in Hyp-mode from a host-kernel
- * point of view, and is a different concept from performing a world-switch and
- * executing guest code SVC mode (with a VMID != 0).
- */
-
-	.align 5
-__kvm_hyp_vector:
-	.global __kvm_hyp_vector
-
-	@ Hyp-mode exception vector
-	W(b)	hyp_reset
-	W(b)	hyp_undef
-	W(b)	hyp_svc
-	W(b)	hyp_pabt
-	W(b)	hyp_dabt
-	W(b)	hyp_hvc
-	W(b)	hyp_irq
-	W(b)	hyp_fiq
-
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
-	.align 5
-__kvm_hyp_vector_ic_inv:
-	.global __kvm_hyp_vector_ic_inv
-
-	/*
-	 * We encode the exception entry in the bottom 3 bits of
-	 * SP, and we have to guarantee to be 8 bytes aligned.
-	 */
-	W(add)	sp, sp, #1	/* Reset 	  7 */
-	W(add)	sp, sp, #1	/* Undef	  6 */
-	W(add)	sp, sp, #1	/* Syscall	  5 */
-	W(add)	sp, sp, #1	/* Prefetch abort 4 */
-	W(add)	sp, sp, #1	/* Data abort	  3 */
-	W(add)	sp, sp, #1	/* HVC		  2 */
-	W(add)	sp, sp, #1	/* IRQ		  1 */
-	W(nop)			/* FIQ		  0 */
-
-	mcr	p15, 0, r0, c7, c5, 0	/* ICIALLU */
-	isb
-
-	b	decode_vectors
-
-	.align 5
-__kvm_hyp_vector_bp_inv:
-	.global __kvm_hyp_vector_bp_inv
-
-	/*
-	 * We encode the exception entry in the bottom 3 bits of
-	 * SP, and we have to guarantee to be 8 bytes aligned.
-	 */
-	W(add)	sp, sp, #1	/* Reset 	  7 */
-	W(add)	sp, sp, #1	/* Undef	  6 */
-	W(add)	sp, sp, #1	/* Syscall	  5 */
-	W(add)	sp, sp, #1	/* Prefetch abort 4 */
-	W(add)	sp, sp, #1	/* Data abort	  3 */
-	W(add)	sp, sp, #1	/* HVC		  2 */
-	W(add)	sp, sp, #1	/* IRQ		  1 */
-	W(nop)			/* FIQ		  0 */
-
-	mcr	p15, 0, r0, c7, c5, 6	/* BPIALL */
-	isb
-
-decode_vectors:
-
-#ifdef CONFIG_THUMB2_KERNEL
-	/*
-	 * Yet another silly hack: Use VPIDR as a temp register.
-	 * Thumb2 is really a pain, as SP cannot be used with most
-	 * of the bitwise instructions. The vect_br macro ensures
-	 * things gets cleaned-up.
-	 */
-	mcr	p15, 4, r0, c0, c0, 0	/* VPIDR */
-	mov	r0, sp
-	and	r0, r0, #7
-	sub	sp, sp, r0
-	push	{r1, r2}
-	mov	r1, r0
-	mrc	p15, 4, r0, c0, c0, 0	/* VPIDR */
-	mrc	p15, 0, r2, c0, c0, 0	/* MIDR  */
-	mcr	p15, 4, r2, c0, c0, 0	/* VPIDR */
-#endif
-
-.macro vect_br val, targ
-ARM(	eor	sp, sp, #\val	)
-ARM(	tst	sp, #7		)
-ARM(	eorne	sp, sp, #\val	)
-
-THUMB(	cmp	r1, #\val	)
-THUMB(	popeq	{r1, r2}	)
-
-	beq	\targ
-.endm
-
-	vect_br	0, hyp_fiq
-	vect_br	1, hyp_irq
-	vect_br	2, hyp_hvc
-	vect_br	3, hyp_dabt
-	vect_br	4, hyp_pabt
-	vect_br	5, hyp_svc
-	vect_br	6, hyp_undef
-	vect_br	7, hyp_reset
-#endif
-
-.macro invalid_vector label, cause
-	.align
-\label:	mov	r0, #\cause
-	b	__hyp_panic
-.endm
-
-	invalid_vector	hyp_reset	ARM_EXCEPTION_RESET
-	invalid_vector	hyp_undef	ARM_EXCEPTION_UNDEFINED
-	invalid_vector	hyp_svc		ARM_EXCEPTION_SOFTWARE
-	invalid_vector	hyp_pabt	ARM_EXCEPTION_PREF_ABORT
-	invalid_vector	hyp_fiq		ARM_EXCEPTION_FIQ
-
-ENTRY(__hyp_do_panic)
-	mrs	lr, cpsr
-	bic	lr, lr, #MODE_MASK
-	orr	lr, lr, #SVC_MODE
-THUMB(	orr	lr, lr, #PSR_T_BIT	)
-	msr	spsr_cxsf, lr
-	ldr	lr, =panic
-	msr	ELR_hyp, lr
-	ldr	lr, =__kvm_call_hyp
-	clrex
-	eret
-ENDPROC(__hyp_do_panic)
-
-hyp_hvc:
-	/*
-	 * Getting here is either because of a trap from a guest,
-	 * or from executing HVC from the host kernel, which means
-	 * "do something in Hyp mode".
-	 */
-	push	{r0, r1, r2}
-
-	@ Check syndrome register
-	mrc	p15, 4, r1, c5, c2, 0	@ HSR
-	lsr	r0, r1, #HSR_EC_SHIFT
-	cmp	r0, #HSR_EC_HVC
-	bne	guest_trap		@ Not HVC instr.
-
-	/*
-	 * Let's check if the HVC came from VMID 0 and allow simple
-	 * switch to Hyp mode
-	 */
-	mrrc    p15, 6, r0, r2, c2
-	lsr     r2, r2, #16
-	and     r2, r2, #0xff
-	cmp     r2, #0
-	bne	guest_hvc_trap		@ Guest called HVC
-
-	/*
-	 * Getting here means host called HVC, we shift parameters and branch
-	 * to Hyp function.
-	 */
-	pop	{r0, r1, r2}
-
-	/*
-	 * Check if we have a kernel function, which is guaranteed to be
-	 * bigger than the maximum hyp stub hypercall
-	 */
-	cmp	r0, #HVC_STUB_HCALL_NR
-	bhs	1f
-
-	/*
-	 * Not a kernel function, treat it as a stub hypercall.
-	 * Compute the physical address for __kvm_handle_stub_hvc
-	 * (as the code lives in the idmaped page) and branch there.
-	 * We hijack ip (r12) as a tmp register.
-	 */
-	push	{r1}
-	ldr	r1, =kimage_voffset
-	ldr	r1, [r1]
-	ldr	ip, =__kvm_handle_stub_hvc
-	sub	ip, ip, r1
-	pop	{r1}
-
-	bx	ip
-
-1:
-	/*
-	 * Pushing r2 here is just a way of keeping the stack aligned to
-	 * 8 bytes on any path that can trigger a HYP exception. Here,
-	 * we may well be about to jump into the guest, and the guest
-	 * exit would otherwise be badly decoded by our fancy
-	 * "decode-exception-without-a-branch" code...
-	 */
-	push	{r2, lr}
-
-	mov	lr, r0
-	mov	r0, r1
-	mov	r1, r2
-	mov	r2, r3
-
-THUMB(	orr	lr, #1)
-	blx	lr			@ Call the HYP function
-
-	pop	{r2, lr}
-	eret
-
-guest_hvc_trap:
-	movw	r2, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1
-	movt	r2, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1
-	ldr	r0, [sp]		@ Guest's r0
-	teq	r0, r2
-	bne	guest_trap
-	add	sp, sp, #12
-	@ Returns:
-	@ r0 = 0
-	@ r1 = HSR value (perfectly predictable)
-	@ r2 = ARM_SMCCC_ARCH_WORKAROUND_1
-	mov	r0, #0
-	eret
-
-guest_trap:
-	load_vcpu r0			@ Load VCPU pointer to r0
-
-#ifdef CONFIG_VFPv3
-	@ Check for a VFP access
-	lsr	r1, r1, #HSR_EC_SHIFT
-	cmp	r1, #HSR_EC_CP_0_13
-	beq	__vfp_guest_restore
-#endif
-
-	mov	r1, #ARM_EXCEPTION_HVC
-	b	__guest_exit
-
-hyp_irq:
-	push	{r0, r1, r2}
-	mov	r1, #ARM_EXCEPTION_IRQ
-	load_vcpu r0			@ Load VCPU pointer to r0
-	b	__guest_exit
-
-hyp_dabt:
-	push	{r0, r1}
-	mrs	r0, ELR_hyp
-	ldr	r1, =abort_guest_exit_start
-THUMB(	add	r1, r1, #1)
-	cmp	r0, r1
-	ldrne	r1, =abort_guest_exit_end
-THUMB(	addne	r1, r1, #1)
-	cmpne	r0, r1
-	pop	{r0, r1}
-	bne	__hyp_panic
-
-	orr	r0, r0, #(1 << ARM_EXIT_WITH_ABORT_BIT)
-	eret
-
-	.ltorg
-
-	.popsection
diff --git a/arch/arm/kvm/hyp/vfp.S b/arch/arm/kvm/hyp/vfp.S
deleted file mode 100644
index 675a52348d8dc11d7a5e724aca7441af1fef8282..0000000000000000000000000000000000000000
--- a/arch/arm/kvm/hyp/vfp.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/vfpmacros.h>
-
-	.text
-	.pushsection	.hyp.text, "ax"
-
-/* void __vfp_save_state(struct vfp_hard_struct *vfp); */
-ENTRY(__vfp_save_state)
-	push	{r4, r5}
-	VFPFMRX	r1, FPEXC
-
-	@ Make sure *really* VFP is enabled so we can touch the registers.
-	orr	r5, r1, #FPEXC_EN
-	tst	r5, #FPEXC_EX		@ Check for VFP Subarchitecture
-	bic	r5, r5, #FPEXC_EX	@ FPEXC_EX disable
-	VFPFMXR	FPEXC, r5
-	isb
-
-	VFPFMRX	r2, FPSCR
-	beq	1f
-
-	@ If FPEXC_EX is 0, then FPINST/FPINST2 reads are upredictable, so
-	@ we only need to save them if FPEXC_EX is set.
-	VFPFMRX r3, FPINST
-	tst	r5, #FPEXC_FP2V
-	VFPFMRX r4, FPINST2, ne		@ vmrsne
-1:
-	VFPFSTMIA r0, r5		@ Save VFP registers
-	stm	r0, {r1-r4}		@ Save FPEXC, FPSCR, FPINST, FPINST2
-	pop	{r4, r5}
-	bx	lr
-ENDPROC(__vfp_save_state)
-
-/* void __vfp_restore_state(struct vfp_hard_struct *vfp);
- * Assume FPEXC_EN is on and FPEXC_EX is off */
-ENTRY(__vfp_restore_state)
-	VFPFLDMIA r0, r1		@ Load VFP registers
-	ldm	r0, {r0-r3}		@ Load FPEXC, FPSCR, FPINST, FPINST2
-
-	VFPFMXR FPSCR, r1
-	tst	r0, #FPEXC_EX		@ Check for VFP Subarchitecture
-	beq	1f
-	VFPFMXR FPINST, r2
-	tst	r0, #FPEXC_FP2V
-	VFPFMXR FPINST2, r3, ne
-1:
-	VFPFMXR FPEXC, r0		@ FPEXC	(last, in case !EN)
-	bx	lr
-ENDPROC(__vfp_restore_state)
-
-	.popsection
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
deleted file mode 100644
index 33e34b6d24b24e23b872deaabe0cbe80d3271b3f..0000000000000000000000000000000000000000
--- a/arch/arm/kvm/init.S
+++ /dev/null
@@ -1,157 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/unified.h>
-#include <asm/asm-offsets.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_mmu.h>
-#include <asm/virt.h>
-
-/********************************************************************
- * Hypervisor initialization
- *   - should be called with:
- *       r0 = top of Hyp stack (kernel VA)
- *       r1 = pointer to hyp vectors
- *       r2,r3 = Hypervisor pgd pointer
- *
- * The init scenario is:
- * - We jump in HYP with 3 parameters: runtime HYP pgd, runtime stack,
- *   runtime vectors
- * - Invalidate TLBs
- * - Set stack and vectors
- * - Setup the page tables
- * - Enable the MMU
- * - Profit! (or eret, if you only care about the code).
- *
- * Another possibility is to get a HYP stub hypercall.
- * We discriminate between the two by checking if r0 contains a value
- * that is less than HVC_STUB_HCALL_NR.
- */
-
-	.text
-	.pushsection    .hyp.idmap.text,"ax"
-	.align 5
-__kvm_hyp_init:
-	.globl __kvm_hyp_init
-
-	@ Hyp-mode exception vector
-	W(b)	.
-	W(b)	.
-	W(b)	.
-	W(b)	.
-	W(b)	.
-	W(b)	__do_hyp_init
-	W(b)	.
-	W(b)	.
-
-__do_hyp_init:
-	@ Check for a stub hypercall
-	cmp	r0, #HVC_STUB_HCALL_NR
-	blo	__kvm_handle_stub_hvc
-
-	@ Set stack pointer
-	mov	sp, r0
-
-	@ Set HVBAR to point to the HYP vectors
-	mcr	p15, 4, r1, c12, c0, 0	@ HVBAR
-
-	@ Set the HTTBR to point to the hypervisor PGD pointer passed
-	mcrr	p15, 4, rr_lo_hi(r2, r3), c2
-
-	@ Set the HTCR and VTCR to the same shareability and cacheability
-	@ settings as the non-secure TTBCR and with T0SZ == 0.
-	mrc	p15, 4, r0, c2, c0, 2	@ HTCR
-	ldr	r2, =HTCR_MASK
-	bic	r0, r0, r2
-	mrc	p15, 0, r1, c2, c0, 2	@ TTBCR
-	and	r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ)
-	orr	r0, r0, r1
-	mcr	p15, 4, r0, c2, c0, 2	@ HTCR
-
-	@ Use the same memory attributes for hyp. accesses as the kernel
-	@ (copy MAIRx ro HMAIRx).
-	mrc	p15, 0, r0, c10, c2, 0
-	mcr	p15, 4, r0, c10, c2, 0
-	mrc	p15, 0, r0, c10, c2, 1
-	mcr	p15, 4, r0, c10, c2, 1
-
-	@ Invalidate the stale TLBs from Bootloader
-	mcr	p15, 4, r0, c8, c7, 0	@ TLBIALLH
-	dsb	ish
-
-	@ Set the HSCTLR to:
-	@  - ARM/THUMB exceptions: Kernel config (Thumb-2 kernel)
-	@  - Endianness: Kernel config
-	@  - Fast Interrupt Features: Kernel config
-	@  - Write permission implies XN: disabled
-	@  - Instruction cache: enabled
-	@  - Data/Unified cache: enabled
-	@  - MMU: enabled (this code must be run from an identity mapping)
-	mrc	p15, 4, r0, c1, c0, 0	@ HSCR
-	ldr	r2, =HSCTLR_MASK
-	bic	r0, r0, r2
-	mrc	p15, 0, r1, c1, c0, 0	@ SCTLR
-	ldr	r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
-	and	r1, r1, r2
- ARM(	ldr	r2, =(HSCTLR_M)					)
- THUMB(	ldr	r2, =(HSCTLR_M | HSCTLR_TE)			)
-	orr	r1, r1, r2
-	orr	r0, r0, r1
-	mcr	p15, 4, r0, c1, c0, 0	@ HSCR
-	isb
-
-	eret
-
-ENTRY(__kvm_handle_stub_hvc)
-	cmp	r0, #HVC_SOFT_RESTART
-	bne	1f
-
-	/* The target is expected in r1 */
-	msr	ELR_hyp, r1
-	mrs	r0, cpsr
-	bic	r0, r0, #MODE_MASK
-	orr	r0, r0, #HYP_MODE
-THUMB(	orr	r0, r0, #PSR_T_BIT	)
-	msr	spsr_cxsf, r0
-	b	reset
-
-1:	cmp	r0, #HVC_RESET_VECTORS
-	bne	1f
-
-reset:
-	/* We're now in idmap, disable MMU */
-	mrc	p15, 4, r1, c1, c0, 0	@ HSCTLR
-	ldr	r0, =(HSCTLR_M | HSCTLR_A | HSCTLR_C | HSCTLR_I)
-	bic	r1, r1, r0
-	mcr	p15, 4, r1, c1, c0, 0	@ HSCTLR
-
-	/*
-	 * Install stub vectors, using ardb's VA->PA trick.
-	 */
-0:	adr	r0, 0b					@ PA(0)
-	movw	r1, #:lower16:__hyp_stub_vectors - 0b   @ VA(stub) - VA(0)
-	movt	r1, #:upper16:__hyp_stub_vectors - 0b
-	add	r1, r1, r0				@ PA(stub)
-	mcr	p15, 4, r1, c12, c0, 0	@ HVBAR
-	b	exit
-
-1:	ldr	r0, =HVC_STUB_ERR
-	eret
-
-exit:
-	mov	r0, #0
-	eret
-ENDPROC(__kvm_handle_stub_hvc)
-
-	.ltorg
-
-	.globl __kvm_hyp_init_end
-__kvm_hyp_init_end:
-
-	.popsection
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
deleted file mode 100644
index 064f4f118ca73d0ff49a0e00185d5e985a1817a1..0000000000000000000000000000000000000000
--- a/arch/arm/kvm/interrupts.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#include <linux/linkage.h>
-
-	.text
-
-/********************************************************************
- *  Call function in Hyp mode
- *
- *
- * unsigned long kvm_call_hyp(void *hypfn, ...);
- *
- * This is not really a variadic function in the classic C-way and care must
- * be taken when calling this to ensure parameters are passed in registers
- * only, since the stack will change between the caller and the callee.
- *
- * Call the function with the first argument containing a pointer to the
- * function you wish to call in Hyp mode, and subsequent arguments will be
- * passed as r0, r1, and r2 (a maximum of 3 arguments in addition to the
- * function pointer can be passed).  The function being called must be mapped
- * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
- * passed in r0 (strictly 32bit).
- *
- * The calling convention follows the standard AAPCS:
- *   r0 - r3: caller save
- *   r12:     caller save
- *   rest:    callee save
- */
-ENTRY(__kvm_call_hyp)
-	hvc	#0
-	bx	lr
-ENDPROC(__kvm_call_hyp)
diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S
deleted file mode 100644
index b05e95840651d0f5acc653a1efaeecd5c85b18fe..0000000000000000000000000000000000000000
--- a/arch/arm/lib/ashldi3.S
+++ /dev/null
@@ -1,54 +0,0 @@
-/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
-   Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
-later version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file into combinations with other programs,
-and to distribute those combinations without any restriction coming
-from the use of this file.  (The General Public License restrictions
-do apply in other respects; for example, they cover modification of
-the file, and distribution when not linked into a combine
-executable.)
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA.  */
-
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#ifdef __ARMEB__
-#define al r1
-#define ah r0
-#else
-#define al r0
-#define ah r1
-#endif
-
-ENTRY(__ashldi3)
-ENTRY(__aeabi_llsl)
-
-	subs	r3, r2, #32
-	rsb	ip, r2, #32
-	movmi	ah, ah, lsl r2
-	movpl	ah, al, lsl r3
- ARM(	orrmi	ah, ah, al, lsr ip	)
- THUMB(	lsrmi	r3, al, ip		)
- THUMB(	orrmi	ah, ah, r3		)
-	mov	al, al, lsl r2
-	ret	lr
-
-ENDPROC(__ashldi3)
-ENDPROC(__aeabi_llsl)
diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S
deleted file mode 100644
index 275d7d2341a4e52e31e19924ebde00aaa771a49c..0000000000000000000000000000000000000000
--- a/arch/arm/lib/ashrdi3.S
+++ /dev/null
@@ -1,54 +0,0 @@
-/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
-   Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
-later version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file into combinations with other programs,
-and to distribute those combinations without any restriction coming
-from the use of this file.  (The General Public License restrictions
-do apply in other respects; for example, they cover modification of
-the file, and distribution when not linked into a combine
-executable.)
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA.  */
-
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#ifdef __ARMEB__
-#define al r1
-#define ah r0
-#else
-#define al r0
-#define ah r1
-#endif
-
-ENTRY(__ashrdi3)
-ENTRY(__aeabi_lasr)
-
-	subs	r3, r2, #32
-	rsb	ip, r2, #32
-	movmi	al, al, lsr r2
-	movpl	al, ah, asr r3
- ARM(	orrmi	al, al, ah, lsl ip	)
- THUMB(	lslmi	r3, ah, ip		)
- THUMB(	orrmi	al, al, r3		)
-	mov	ah, ah, asr r2
-	ret	lr
-
-ENDPROC(__ashrdi3)
-ENDPROC(__aeabi_lasr)
diff --git a/arch/arm/lib/backtrace-clang.S b/arch/arm/lib/backtrace-clang.S
deleted file mode 100644
index 2ff375144b55b0347240441205f476aae1a837cc..0000000000000000000000000000000000000000
--- a/arch/arm/lib/backtrace-clang.S
+++ /dev/null
@@ -1,217 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/backtrace-clang.S
- *
- *  Copyright (C) 2019 Nathan Huckleberry
- *
- */
-#include <linux/kern_levels.h>
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-		.text
-
-/* fp is 0 or stack frame */
-
-#define frame	r4
-#define sv_fp	r5
-#define sv_pc	r6
-#define mask	r7
-#define sv_lr	r8
-
-ENTRY(c_backtrace)
-
-#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
-		ret	lr
-ENDPROC(c_backtrace)
-#else
-
-
-/*
- * Clang does not store pc or sp in function prologues so we don't know exactly
- * where the function starts.
- *
- * We can treat the current frame's lr as the saved pc and the preceding
- * frame's lr as the current frame's lr, but we can't trace the most recent
- * call.  Inserting a false stack frame allows us to reference the function
- * called last in the stacktrace.
- *
- * If the call instruction was a bl we can look at the callers branch
- * instruction to calculate the saved pc.  We can recover the pc in most cases,
- * but in cases such as calling function pointers we cannot. In this case,
- * default to using the lr. This will be some address in the function, but will
- * not be the function start.
- *
- * Unfortunately due to the stack frame layout we can't dump r0 - r3, but these
- * are less frequently saved.
- *
- * Stack frame layout:
- * 		<larger addresses>
- * 		saved lr
- * 	frame=> saved fp
- * 		optionally saved caller registers (r4 - r10)
- * 		optionally saved arguments (r0 - r3)
- * 		<top of stack frame>
- * 		<smaller addresses>
- *
- * Functions start with the following code sequence:
- * corrected pc =>  stmfd sp!, {..., fp, lr}
- *		add fp, sp, #x
- *		stmfd sp!, {r0 - r3} (optional)
- *
- *
- *
- *
- *
- *
- * The diagram below shows an example stack setup for dump_stack.
- *
- * The frame for c_backtrace has pointers to the code of dump_stack. This is
- * why the frame of c_backtrace is used to for the pc calculation of
- * dump_stack. This is why we must move back a frame to print dump_stack.
- *
- * The stored locals for dump_stack are in dump_stack's frame. This means that
- * to fully print dump_stack's frame we need both the frame for dump_stack (for
- * locals) and the frame that was called by dump_stack (for pc).
- *
- * To print locals we must know where the function start is. If we read the
- * function prologue opcodes we can determine which variables are stored in the
- * stack frame.
- *
- * To find the function start of dump_stack we can look at the stored LR of
- * show_stack. It points at the instruction directly after the bl dump_stack.
- * We can then read the offset from the bl opcode to determine where the branch
- * takes us.  The address calculated must be the start of dump_stack.
- *
- * c_backtrace frame           dump_stack:
- * {[LR]    }  ============|   ...
- * {[FP]    }  =======|    |   bl c_backtrace
- *                    |    |=> ...
- * {[R4-R10]}         |
- * {[R0-R3] }         |        show_stack:
- * dump_stack frame   |        ...
- * {[LR]    } =============|   bl dump_stack
- * {[FP]    } <=======|    |=> ...
- * {[R4-R10]}
- * {[R0-R3] }
- */
-
-		stmfd	sp!, {r4 - r9, fp, lr}	@ Save an extra register
-						@ to ensure 8 byte alignment
-		movs	frame, r0		@ if frame pointer is zero
-		beq	no_frame		@ we have no stack frames
-		tst	r1, #0x10		@ 26 or 32-bit mode?
-		moveq	mask, #0xfc000003
-		movne	mask, #0		@ mask for 32-bit
-
-/*
- * Switches the current frame to be the frame for dump_stack.
- */
-		add	frame, sp, #24		@ switch to false frame
-for_each_frame:	tst	frame, mask		@ Check for address exceptions
-		bne	no_frame
-
-/*
- * sv_fp is the stack frame with the locals for the current considered
- * function.
- *
- * sv_pc is the saved lr frame the frame above. This is a pointer to a code
- * address within the current considered function, but it is not the function
- * start. This value gets updated to be the function start later if it is
- * possible.
- */
-1001:		ldr	sv_pc, [frame, #4]	@ get saved 'pc'
-1002:		ldr	sv_fp, [frame, #0]	@ get saved fp
-
-		teq	sv_fp, mask		@ make sure next frame exists
-		beq	no_frame
-
-/*
- * sv_lr is the lr from the function that called the current function. This is
- * a pointer to a code address in the current function's caller.  sv_lr-4 is
- * the instruction used to call the current function.
- *
- * This sv_lr can be used to calculate the function start if the function was
- * called using a bl instruction. If the function start can be recovered sv_pc
- * is overwritten with the function start.
- *
- * If the current function was called using a function pointer we cannot
- * recover the function start and instead continue with sv_pc as an arbitrary
- * value within the current function. If this is the case we cannot print
- * registers for the current function, but the stacktrace is still printed
- * properly.
- */
-1003:		ldr	sv_lr, [sv_fp, #4]	@ get saved lr from next frame
-
-		ldr	r0, [sv_lr, #-4]	@ get call instruction
-		ldr	r3, .Lopcode+4
-		and	r2, r3, r0		@ is this a bl call
-		teq	r2, r3
-		bne	finished_setup		@ give up if it's not
-		and	r0, #0xffffff		@ get call offset 24-bit int
-		lsl	r0, r0, #8		@ sign extend offset
-		asr	r0, r0, #8
-		ldr	sv_pc, [sv_fp, #4]	@ get lr address
-		add	sv_pc, sv_pc, #-4	@ get call instruction address
-		add	sv_pc, sv_pc, #8	@ take care of prefetch
-		add	sv_pc, sv_pc, r0, lsl #2@ find function start
-
-finished_setup:
-
-		bic	sv_pc, sv_pc, mask	@ mask PC/LR for the mode
-
-/*
- * Print the function (sv_pc) and where it was called from (sv_lr).
- */
-1004:		mov	r0, sv_pc
-
-		mov	r1, sv_lr
-		mov	r2, frame
-		bic	r1, r1, mask		@ mask PC/LR for the mode
-		bl	dump_backtrace_entry
-
-/*
- * Test if the function start is a stmfd instruction to determine which
- * registers were stored in the function prologue.
- *
- * If we could not recover the sv_pc because we were called through a function
- * pointer the comparison will fail and no registers will print. Unwinding will
- * continue as if there had been no registers stored in this frame.
- */
-1005:		ldr	r1, [sv_pc, #0]		@ if stmfd sp!, {..., fp, lr}
-		ldr	r3, .Lopcode		@ instruction exists,
-		teq	r3, r1, lsr #11
-		ldr	r0, [frame]		@ locals are stored in
-						@ the preceding frame
-		subeq	r0, r0, #4
-		bleq	dump_backtrace_stm	@ dump saved registers
-
-/*
- * If we are out of frames or if the next frame is invalid.
- */
-		teq	sv_fp, #0		@ zero saved fp means
-		beq	no_frame		@ no further frames
-
-		cmp	sv_fp, frame		@ next frame must be
-		mov	frame, sv_fp		@ above the current frame
-		bhi	for_each_frame
-
-1006:		adr	r0, .Lbad
-		mov	r1, frame
-		bl	printk
-no_frame:	ldmfd	sp!, {r4 - r9, fp, pc}
-ENDPROC(c_backtrace)
-		.pushsection __ex_table,"a"
-		.align	3
-		.long	1001b, 1006b
-		.long	1002b, 1006b
-		.long	1003b, 1006b
-		.long	1004b, 1006b
-		.long   1005b, 1006b
-		.popsection
-
-.Lbad:		.asciz	"Backtrace aborted due to bad frame pointer <%p>\n"
-		.align
-.Lopcode:	.word	0xe92d4800 >> 11	@ stmfd sp!, {... fp, lr}
-		.word	0x0b000000		@ bl if these bits are set
-
-#endif
diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S
deleted file mode 100644
index 582925238d65ea261cc126078462ce2a66421549..0000000000000000000000000000000000000000
--- a/arch/arm/lib/backtrace.S
+++ /dev/null
@@ -1,117 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/backtrace.S
- *
- *  Copyright (C) 1995, 1996 Russell King
- *
- * 27/03/03 Ian Molton Clean up CONFIG_CPU
- */
-#include <linux/kern_levels.h>
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-		.text
-
-@ fp is 0 or stack frame
-
-#define frame	r4
-#define sv_fp	r5
-#define sv_pc	r6
-#define mask	r7
-#define offset	r8
-
-ENTRY(c_backtrace)
-
-#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
-		ret	lr
-ENDPROC(c_backtrace)
-#else
-		stmfd	sp!, {r4 - r8, lr}	@ Save an extra register so we have a location...
-		movs	frame, r0		@ if frame pointer is zero
-		beq	no_frame		@ we have no stack frames
-
-		tst	r1, #0x10		@ 26 or 32-bit mode?
- ARM(		moveq	mask, #0xfc000003	)
- THUMB(		moveq	mask, #0xfc000000	)
- THUMB(		orreq	mask, #0x03		)
-		movne	mask, #0		@ mask for 32-bit
-
-1:		stmfd	sp!, {pc}		@ calculate offset of PC stored
-		ldr	r0, [sp], #4		@ by stmfd for this CPU
-		adr	r1, 1b
-		sub	offset, r0, r1
-
-/*
- * Stack frame layout:
- *             optionally saved caller registers (r4 - r10)
- *             saved fp
- *             saved sp
- *             saved lr
- *    frame => saved pc
- *             optionally saved arguments (r0 - r3)
- * saved sp => <next word>
- *
- * Functions start with the following code sequence:
- *                  mov   ip, sp
- *                  stmfd sp!, {r0 - r3} (optional)
- * corrected pc =>  stmfd sp!, {..., fp, ip, lr, pc}
- */
-for_each_frame:	tst	frame, mask		@ Check for address exceptions
-		bne	no_frame
-
-1001:		ldr	sv_pc, [frame, #0]	@ get saved pc
-1002:		ldr	sv_fp, [frame, #-12]	@ get saved fp
-
-		sub	sv_pc, sv_pc, offset	@ Correct PC for prefetching
-		bic	sv_pc, sv_pc, mask	@ mask PC/LR for the mode
-
-1003:		ldr	r2, [sv_pc, #-4]	@ if stmfd sp!, {args} exists,
-		ldr	r3, .Ldsi+4		@ adjust saved 'pc' back one
-		teq	r3, r2, lsr #11		@ instruction
-		subne	r0, sv_pc, #4		@ allow for mov
-		subeq	r0, sv_pc, #8		@ allow for mov + stmia
-
-		ldr	r1, [frame, #-4]	@ get saved lr
-		mov	r2, frame
-		bic	r1, r1, mask		@ mask PC/LR for the mode
-		bl	dump_backtrace_entry
-
-		ldr	r1, [sv_pc, #-4]	@ if stmfd sp!, {args} exists,
-		ldr	r3, .Ldsi+4
-		teq	r3, r1, lsr #11
-		ldreq	r0, [frame, #-8]	@ get sp
-		subeq	r0, r0, #4		@ point at the last arg
-		bleq	dump_backtrace_stm	@ dump saved registers
-
-1004:		ldr	r1, [sv_pc, #0]		@ if stmfd sp!, {..., fp, ip, lr, pc}
-		ldr	r3, .Ldsi		@ instruction exists,
-		teq	r3, r1, lsr #11
-		subeq	r0, frame, #16
-		bleq	dump_backtrace_stm	@ dump saved registers
-
-		teq	sv_fp, #0		@ zero saved fp means
-		beq	no_frame		@ no further frames
-
-		cmp	sv_fp, frame		@ next frame must be
-		mov	frame, sv_fp		@ above the current frame
-		bhi	for_each_frame
-
-1006:		adr	r0, .Lbad
-		mov	r1, frame
-		bl	printk
-no_frame:	ldmfd	sp!, {r4 - r8, pc}
-ENDPROC(c_backtrace)
-		
-		.pushsection __ex_table,"a"
-		.align	3
-		.long	1001b, 1006b
-		.long	1002b, 1006b
-		.long	1003b, 1006b
-		.long	1004b, 1006b
-		.popsection
-
-.Lbad:		.asciz	"Backtrace aborted due to bad frame pointer <%p>\n"
-		.align
-.Ldsi:		.word	0xe92dd800 >> 11	@ stmfd sp!, {... fp, ip, lr, pc}
-		.word	0xe92d0000 >> 11	@ stmfd sp!, {}
-
-#endif
diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S
deleted file mode 100644
index 591ba077e874de5f5fef18da11d0a5643d8c7ff1..0000000000000000000000000000000000000000
--- a/arch/arm/lib/bswapsdi2.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#if __LINUX_ARM_ARCH__ >= 6
-ENTRY(__bswapsi2)
-	rev r0, r0
-	bx lr
-ENDPROC(__bswapsi2)
-
-ENTRY(__bswapdi2)
-	rev r3, r0
-	rev r0, r1
-	mov r1, r3
-	bx lr
-ENDPROC(__bswapdi2)
-#else
-ENTRY(__bswapsi2)
-	eor r3, r0, r0, ror #16
-	mov r3, r3, lsr #8
-	bic r3, r3, #0xff00
-	eor r0, r3, r0, ror #8
-	ret lr
-ENDPROC(__bswapsi2)
-
-ENTRY(__bswapdi2)
-	mov ip, r1
-	eor r3, ip, ip, ror #16
-	eor r1, r0, r0, ror #16
-	mov r1, r1, lsr #8
-	mov r3, r3, lsr #8
-	bic r3, r3, #0xff00
-	bic r1, r1, #0xff00
-	eor r1, r1, r0, ror #8
-	eor r0, r3, ip, ror #8
-	ret lr
-ENDPROC(__bswapdi2)
-#endif
diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S
deleted file mode 100644
index 28b0341ae786fbd73aa5d4fe9a43e49bee3e910c..0000000000000000000000000000000000000000
--- a/arch/arm/lib/call_with_stack.S
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/lib/call_with_stack.S
- *
- * Copyright (C) 2011 ARM Ltd.
- * Written by Will Deacon <will.deacon@arm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * void call_with_stack(void (*fn)(void *), void *arg, void *sp)
- *
- * Change the stack to that pointed at by sp, then invoke fn(arg) with
- * the new stack.
- */
-ENTRY(call_with_stack)
-	str	sp, [r2, #-4]!
-	str	lr, [r2, #-4]!
-
-	mov	sp, r2
-	mov	r2, r0
-	mov	r0, r1
-
-	badr	lr, 1f
-	ret	r2
-
-1:	ldr	lr, [sp]
-	ldr	sp, [sp, #4]
-	ret	lr
-ENDPROC(call_with_stack)
diff --git a/arch/arm/lib/changebit.S b/arch/arm/lib/changebit.S
deleted file mode 100644
index 02424765e9e1cd8659c8886d5c97c191ad0880b7..0000000000000000000000000000000000000000
--- a/arch/arm/lib/changebit.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/changebit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include "bitops.h"
-                .text
-
-bitop	_change_bit, eor
diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S
deleted file mode 100644
index 8f2c4dbfc5f21009d6336c16f514140a226fe3eb..0000000000000000000000000000000000000000
--- a/arch/arm/lib/clear_user.S
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/clear_user.S
- *
- *  Copyright (C) 1995, 1996,1997,1998 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/unwind.h>
-
-		.text
-
-/* Prototype: unsigned long arm_clear_user(void *addr, size_t sz)
- * Purpose  : clear some user memory
- * Params   : addr - user memory address to clear
- *          : sz   - number of bytes to clear
- * Returns  : number of bytes NOT cleared
- */
-ENTRY(__clear_user_std)
-WEAK(arm_clear_user)
-UNWIND(.fnstart)
-UNWIND(.save {r1, lr})
-		stmfd	sp!, {r1, lr}
-		mov	r2, #0
-		cmp	r1, #4
-		blt	2f
-		ands	ip, r0, #3
-		beq	1f
-		cmp	ip, #2
-		strusr	r2, r0, 1
-		strusr	r2, r0, 1, le
-		strusr	r2, r0, 1, lt
-		rsb	ip, ip, #4
-		sub	r1, r1, ip		@  7  6  5  4  3  2  1
-1:		subs	r1, r1, #8		@ -1 -2 -3 -4 -5 -6 -7
-		strusr	r2, r0, 4, pl, rept=2
-		bpl	1b
-		adds	r1, r1, #4		@  3  2  1  0 -1 -2 -3
-		strusr	r2, r0, 4, pl
-2:		tst	r1, #2			@ 1x 1x 0x 0x 1x 1x 0x
-		strusr	r2, r0, 1, ne, rept=2
-		tst	r1, #1			@ x1 x0 x1 x0 x1 x0 x1
-		it	ne			@ explicit IT needed for the label
-USER(		strbtne	r2, [r0])
-		mov	r0, #0
-		ldmfd	sp!, {r1, pc}
-UNWIND(.fnend)
-ENDPROC(arm_clear_user)
-ENDPROC(__clear_user_std)
-
-		.pushsection .text.fixup,"ax"
-		.align	0
-9001:		ldmfd	sp!, {r0, pc}
-		.popsection
-
diff --git a/arch/arm/lib/clearbit.S b/arch/arm/lib/clearbit.S
deleted file mode 100644
index 4646dee8a3394121794ad9988cf737fa5ac99d3c..0000000000000000000000000000000000000000
--- a/arch/arm/lib/clearbit.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/clearbit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include "bitops.h"
-                .text
-
-bitop	_clear_bit, bic
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
deleted file mode 100644
index f8016e3db65d7f628327ed7600f24943c210ea7f..0000000000000000000000000000000000000000
--- a/arch/arm/lib/copy_from_user.S
+++ /dev/null
@@ -1,129 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/copy_from_user.S
- *
- *  Author:	Nicolas Pitre
- *  Created:	Sep 29, 2005
- *  Copyright:	MontaVista Software, Inc.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/unwind.h>
-
-/*
- * Prototype:
- *
- *	size_t arm_copy_from_user(void *to, const void *from, size_t n)
- *
- * Purpose:
- *
- *	copy a block to kernel memory from user memory
- *
- * Params:
- *
- *	to = kernel memory
- *	from = user memory
- *	n = number of bytes to copy
- *
- * Return value:
- *
- *	Number of bytes NOT copied.
- */
-
-#ifdef CONFIG_CPU_USE_DOMAINS
-
-#ifndef CONFIG_THUMB2_KERNEL
-#define LDR1W_SHIFT	0
-#else
-#define LDR1W_SHIFT	1
-#endif
-
-	.macro ldr1w ptr reg abort
-	ldrusr	\reg, \ptr, 4, abort=\abort
-	.endm
-
-	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
-	ldr1w \ptr, \reg1, \abort
-	ldr1w \ptr, \reg2, \abort
-	ldr1w \ptr, \reg3, \abort
-	ldr1w \ptr, \reg4, \abort
-	.endm
-
-	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-	ldr4w \ptr, \reg1, \reg2, \reg3, \reg4, \abort
-	ldr4w \ptr, \reg5, \reg6, \reg7, \reg8, \abort
-	.endm
-
-#else
-
-#define LDR1W_SHIFT	0
-
-	.macro ldr1w ptr reg abort
-	USERL(\abort, W(ldr) \reg, [\ptr], #4)
-	.endm
-
-	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
-	USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4})
-	.endm
-
-	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-	USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8})
-	.endm
-
-#endif /* CONFIG_CPU_USE_DOMAINS */
-
-	.macro ldr1b ptr reg cond=al abort
-	ldrusr	\reg, \ptr, 1, \cond, abort=\abort
-	.endm
-
-#define STR1W_SHIFT	0
-
-	.macro str1w ptr reg abort
-	W(str) \reg, [\ptr], #4
-	.endm
-
-	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-	stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
-	.endm
-
-	.macro str1b ptr reg cond=al abort
-	strb\cond \reg, [\ptr], #1
-	.endm
-
-	.macro enter reg1 reg2
-	mov	r3, #0
-	stmdb	sp!, {r0, r2, r3, \reg1, \reg2}
-	.endm
-
-	.macro usave reg1 reg2
-	UNWIND(	.save {r0, r2, r3, \reg1, \reg2}	)
-	.endm
-
-	.macro exit reg1 reg2
-	add	sp, sp, #8
-	ldmfd	sp!, {r0, \reg1, \reg2}
-	.endm
-
-	.text
-
-ENTRY(arm_copy_from_user)
-#ifdef CONFIG_CPU_SPECTRE
-	get_thread_info r3
-	ldr	r3, [r3, #TI_ADDR_LIMIT]
-	uaccess_mask_range_ptr r1, r2, r3, ip
-#endif
-
-#include "copy_template.S"
-
-ENDPROC(arm_copy_from_user)
-
-	.pushsection .text.fixup,"ax"
-	.align 0
-	copy_abort_preamble
-	ldmfd	sp!, {r1, r2, r3}
-	sub	r0, r0, r1
-	rsb	r0, r0, r2
-	copy_abort_end
-	.popsection
-
diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S
deleted file mode 100644
index 5db1a8ee3d9fb94416e2dae5e744de507f2659d9..0000000000000000000000000000000000000000
--- a/arch/arm/lib/copy_page.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/copypage.S
- *
- *  Copyright (C) 1995-1999 Russell King
- *
- *  ASM optimised string functions
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/cache.h>
-
-#define COPY_COUNT (PAGE_SZ / (2 * L1_CACHE_BYTES) PLD( -1 ))
-
-		.text
-		.align	5
-/*
- * StrongARM optimised copy_page routine
- * now 1.78bytes/cycle, was 1.60 bytes/cycle (50MHz bus -> 89MB/s)
- * Note that we probably achieve closer to the 100MB/s target with
- * the core clock switching.
- */
-ENTRY(copy_page)
-		stmfd	sp!, {r4, lr}			@	2
-	PLD(	pld	[r1, #0]		)
-	PLD(	pld	[r1, #L1_CACHE_BYTES]		)
-		mov	r2, #COPY_COUNT			@	1
-		ldmia	r1!, {r3, r4, ip, lr}		@	4+1
-1:	PLD(	pld	[r1, #2 * L1_CACHE_BYTES])
-	PLD(	pld	[r1, #3 * L1_CACHE_BYTES])
-2:
-	.rept	(2 * L1_CACHE_BYTES / 16 - 1)
-		stmia	r0!, {r3, r4, ip, lr}		@	4
-		ldmia	r1!, {r3, r4, ip, lr}		@	4
-	.endr
-		subs	r2, r2, #1			@	1
-		stmia	r0!, {r3, r4, ip, lr}		@	4
-		ldmiagt	r1!, {r3, r4, ip, lr}		@	4
-		bgt	1b				@	1
-	PLD(	ldmiaeq r1!, {r3, r4, ip, lr}	)
-	PLD(	beq	2b			)
-		ldmfd	sp!, {r4, pc}			@	3
-ENDPROC(copy_page)
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
deleted file mode 100644
index 810a805d36dce8f78e879ae38d392be16d52bebc..0000000000000000000000000000000000000000
--- a/arch/arm/lib/copy_template.S
+++ /dev/null
@@ -1,294 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/copy_template.s
- *
- *  Code template for optimized memory copy functions
- *
- *  Author:	Nicolas Pitre
- *  Created:	Sep 28, 2005
- *  Copyright:	MontaVista Software, Inc.
- */
-
-/*
- * Theory of operation
- * -------------------
- *
- * This file provides the core code for a forward memory copy used in
- * the implementation of memcopy(), copy_to_user() and copy_from_user().
- *
- * The including file must define the following accessor macros
- * according to the need of the given function:
- *
- * ldr1w ptr reg abort
- *
- *	This loads one word from 'ptr', stores it in 'reg' and increments
- *	'ptr' to the next word. The 'abort' argument is used for fixup tables.
- *
- * ldr4w ptr reg1 reg2 reg3 reg4 abort
- * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
- *
- *	This loads four or eight words starting from 'ptr', stores them
- *	in provided registers and increments 'ptr' past those words.
- *	The'abort' argument is used for fixup tables.
- *
- * ldr1b ptr reg cond abort
- *
- *	Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
- *	It also must apply the condition code if provided, otherwise the
- *	"al" condition is assumed by default.
- *
- * str1w ptr reg abort
- * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
- * str1b ptr reg cond abort
- *
- *	Same as their ldr* counterparts, but data is stored to 'ptr' location
- *	rather than being loaded.
- *
- * enter reg1 reg2
- *
- *	Preserve the provided registers on the stack plus any additional
- *	data as needed by the implementation including this code. Called
- *	upon code entry.
- *
- * usave reg1 reg2
- *
- *	Unwind annotation macro is corresponding for 'enter' macro.
- *	It tell unwinder that preserved some provided registers on the stack
- *	and additional data by a prior 'enter' macro.
- *
- * exit reg1 reg2
- *
- *	Restore registers with the values previously saved with the
- *	'preserv' macro. Called upon code termination.
- *
- * LDR1W_SHIFT
- * STR1W_SHIFT
- *
- *	Correction to be applied to the "ip" register when branching into
- *	the ldr1w or str1w instructions (some of these macros may expand to
- *	than one 32bit instruction in Thumb-2)
- */
-
-
-	UNWIND(	.fnstart			)
-		enter	r4, lr
-	UNWIND(	.fnend				)
-
-	UNWIND(	.fnstart			)
-		usave	r4, lr			  @ in first stmdb block
-
-		subs	r2, r2, #4
-		blt	8f
-		ands	ip, r0, #3
-	PLD(	pld	[r1, #0]		)
-		bne	9f
-		ands	ip, r1, #3
-		bne	10f
-
-1:		subs	r2, r2, #(28)
-		stmfd	sp!, {r5 - r8}
-	UNWIND(	.fnend				)
-
-	UNWIND(	.fnstart			)
-		usave	r4, lr
-	UNWIND(	.save	{r5 - r8}		) @ in second stmfd block
-		blt	5f
-
-	CALGN(	ands	ip, r0, #31		)
-	CALGN(	rsb	r3, ip, #32		)
-	CALGN(	sbcsne	r4, r3, r2		)  @ C is always set here
-	CALGN(	bcs	2f			)
-	CALGN(	adr	r4, 6f			)
-	CALGN(	subs	r2, r2, r3		)  @ C gets set
-	CALGN(	add	pc, r4, ip		)
-
-	PLD(	pld	[r1, #0]		)
-2:	PLD(	subs	r2, r2, #96		)
-	PLD(	pld	[r1, #28]		)
-	PLD(	blt	4f			)
-	PLD(	pld	[r1, #60]		)
-	PLD(	pld	[r1, #92]		)
-
-3:	PLD(	pld	[r1, #124]		)
-4:		ldr8w	r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
-		subs	r2, r2, #32
-		str8w	r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
-		bge	3b
-	PLD(	cmn	r2, #96			)
-	PLD(	bge	4b			)
-
-5:		ands	ip, r2, #28
-		rsb	ip, ip, #32
-#if LDR1W_SHIFT > 0
-		lsl	ip, ip, #LDR1W_SHIFT
-#endif
-		addne	pc, pc, ip		@ C is always clear here
-		b	7f
-6:
-		.rept	(1 << LDR1W_SHIFT)
-		W(nop)
-		.endr
-		ldr1w	r1, r3, abort=20f
-		ldr1w	r1, r4, abort=20f
-		ldr1w	r1, r5, abort=20f
-		ldr1w	r1, r6, abort=20f
-		ldr1w	r1, r7, abort=20f
-		ldr1w	r1, r8, abort=20f
-		ldr1w	r1, lr, abort=20f
-
-#if LDR1W_SHIFT < STR1W_SHIFT
-		lsl	ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
-#elif LDR1W_SHIFT > STR1W_SHIFT
-		lsr	ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
-#endif
-		add	pc, pc, ip
-		nop
-		.rept	(1 << STR1W_SHIFT)
-		W(nop)
-		.endr
-		str1w	r0, r3, abort=20f
-		str1w	r0, r4, abort=20f
-		str1w	r0, r5, abort=20f
-		str1w	r0, r6, abort=20f
-		str1w	r0, r7, abort=20f
-		str1w	r0, r8, abort=20f
-		str1w	r0, lr, abort=20f
-
-	CALGN(	bcs	2b			)
-
-7:		ldmfd	sp!, {r5 - r8}
-	UNWIND(	.fnend				) @ end of second stmfd block
-
-	UNWIND(	.fnstart			)
-		usave	r4, lr			  @ still in first stmdb block
-8:		movs	r2, r2, lsl #31
-		ldr1b	r1, r3, ne, abort=21f
-		ldr1b	r1, r4, cs, abort=21f
-		ldr1b	r1, ip, cs, abort=21f
-		str1b	r0, r3, ne, abort=21f
-		str1b	r0, r4, cs, abort=21f
-		str1b	r0, ip, cs, abort=21f
-
-		exit	r4, pc
-
-9:		rsb	ip, ip, #4
-		cmp	ip, #2
-		ldr1b	r1, r3, gt, abort=21f
-		ldr1b	r1, r4, ge, abort=21f
-		ldr1b	r1, lr, abort=21f
-		str1b	r0, r3, gt, abort=21f
-		str1b	r0, r4, ge, abort=21f
-		subs	r2, r2, ip
-		str1b	r0, lr, abort=21f
-		blt	8b
-		ands	ip, r1, #3
-		beq	1b
-
-10:		bic	r1, r1, #3
-		cmp	ip, #2
-		ldr1w	r1, lr, abort=21f
-		beq	17f
-		bgt	18f
-	UNWIND(	.fnend				)
-
-
-		.macro	forward_copy_shift pull push
-
-	UNWIND(	.fnstart			)
-		usave	r4, lr			  @ still in first stmdb block
-		subs	r2, r2, #28
-		blt	14f
-
-	CALGN(	ands	ip, r0, #31		)
-	CALGN(	rsb	ip, ip, #32		)
-	CALGN(	sbcsne	r4, ip, r2		)  @ C is always set here
-	CALGN(	subcc	r2, r2, ip		)
-	CALGN(	bcc	15f			)
-
-11:		stmfd	sp!, {r5 - r9}
-	UNWIND(	.fnend				)
-
-	UNWIND(	.fnstart			)
-		usave	r4, lr
-	UNWIND(	.save	{r5 - r9}		) @ in new second stmfd block
-	PLD(	pld	[r1, #0]		)
-	PLD(	subs	r2, r2, #96		)
-	PLD(	pld	[r1, #28]		)
-	PLD(	blt	13f			)
-	PLD(	pld	[r1, #60]		)
-	PLD(	pld	[r1, #92]		)
-
-12:	PLD(	pld	[r1, #124]		)
-13:		ldr4w	r1, r4, r5, r6, r7, abort=19f
-		mov	r3, lr, lspull #\pull
-		subs	r2, r2, #32
-		ldr4w	r1, r8, r9, ip, lr, abort=19f
-		orr	r3, r3, r4, lspush #\push
-		mov	r4, r4, lspull #\pull
-		orr	r4, r4, r5, lspush #\push
-		mov	r5, r5, lspull #\pull
-		orr	r5, r5, r6, lspush #\push
-		mov	r6, r6, lspull #\pull
-		orr	r6, r6, r7, lspush #\push
-		mov	r7, r7, lspull #\pull
-		orr	r7, r7, r8, lspush #\push
-		mov	r8, r8, lspull #\pull
-		orr	r8, r8, r9, lspush #\push
-		mov	r9, r9, lspull #\pull
-		orr	r9, r9, ip, lspush #\push
-		mov	ip, ip, lspull #\pull
-		orr	ip, ip, lr, lspush #\push
-		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, abort=19f
-		bge	12b
-	PLD(	cmn	r2, #96			)
-	PLD(	bge	13b			)
-
-		ldmfd	sp!, {r5 - r9}
-	UNWIND(	.fnend				) @ end of the second stmfd block
-
-	UNWIND(	.fnstart			)
-		usave	r4, lr			  @ still in first stmdb block
-14:		ands	ip, r2, #28
-		beq	16f
-
-15:		mov	r3, lr, lspull #\pull
-		ldr1w	r1, lr, abort=21f
-		subs	ip, ip, #4
-		orr	r3, r3, lr, lspush #\push
-		str1w	r0, r3, abort=21f
-		bgt	15b
-	CALGN(	cmp	r2, #0			)
-	CALGN(	bge	11b			)
-
-16:		sub	r1, r1, #(\push / 8)
-		b	8b
-	UNWIND(	.fnend				)
-
-		.endm
-
-
-		forward_copy_shift	pull=8	push=24
-
-17:		forward_copy_shift	pull=16	push=16
-
-18:		forward_copy_shift	pull=24	push=8
-
-
-/*
- * Abort preamble and completion macros.
- * If a fixup handler is required then those macros must surround it.
- * It is assumed that the fixup code will handle the private part of
- * the exit macro.
- */
-
-	.macro	copy_abort_preamble
-19:	ldmfd	sp!, {r5 - r9}
-	b	21f
-20:	ldmfd	sp!, {r5 - r8}
-21:
-	.endm
-
-	.macro	copy_abort_end
-	ldmfd	sp!, {r4, pc}
-	.endm
-
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
deleted file mode 100644
index ebfe4cb3d9125056a5abcd867d6342d44ba0d3a7..0000000000000000000000000000000000000000
--- a/arch/arm/lib/copy_to_user.S
+++ /dev/null
@@ -1,129 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/copy_to_user.S
- *
- *  Author:	Nicolas Pitre
- *  Created:	Sep 29, 2005
- *  Copyright:	MontaVista Software, Inc.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/unwind.h>
-
-/*
- * Prototype:
- *
- *	size_t arm_copy_to_user(void *to, const void *from, size_t n)
- *
- * Purpose:
- *
- *	copy a block to user memory from kernel memory
- *
- * Params:
- *
- *	to = user memory
- *	from = kernel memory
- *	n = number of bytes to copy
- *
- * Return value:
- *
- *	Number of bytes NOT copied.
- */
-
-#define LDR1W_SHIFT	0
-
-	.macro ldr1w ptr reg abort
-	W(ldr) \reg, [\ptr], #4
-	.endm
-
-	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
-	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
-	.endm
-
-	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
-	.endm
-
-	.macro ldr1b ptr reg cond=al abort
-	ldrb\cond \reg, [\ptr], #1
-	.endm
-
-#ifdef CONFIG_CPU_USE_DOMAINS
-
-#ifndef CONFIG_THUMB2_KERNEL
-#define STR1W_SHIFT	0
-#else
-#define STR1W_SHIFT	1
-#endif
-
-	.macro str1w ptr reg abort
-	strusr	\reg, \ptr, 4, abort=\abort
-	.endm
-
-	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-	str1w \ptr, \reg1, \abort
-	str1w \ptr, \reg2, \abort
-	str1w \ptr, \reg3, \abort
-	str1w \ptr, \reg4, \abort
-	str1w \ptr, \reg5, \abort
-	str1w \ptr, \reg6, \abort
-	str1w \ptr, \reg7, \abort
-	str1w \ptr, \reg8, \abort
-	.endm
-
-#else
-
-#define STR1W_SHIFT	0
-
-	.macro str1w ptr reg abort
-	USERL(\abort, W(str) \reg, [\ptr], #4)
-	.endm
-
-	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-	USERL(\abort, stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8})
-	.endm
-
-#endif /* CONFIG_CPU_USE_DOMAINS */
-
-	.macro str1b ptr reg cond=al abort
-	strusr	\reg, \ptr, 1, \cond, abort=\abort
-	.endm
-
-	.macro enter reg1 reg2
-	mov	r3, #0
-	stmdb	sp!, {r0, r2, r3, \reg1, \reg2}
-	.endm
-
-	.macro usave reg1 reg2
-	UNWIND(	.save {r0, r2, r3, \reg1, \reg2}	)
-	.endm
-
-	.macro exit reg1 reg2
-	add	sp, sp, #8
-	ldmfd	sp!, {r0, \reg1, \reg2}
-	.endm
-
-	.text
-
-ENTRY(__copy_to_user_std)
-WEAK(arm_copy_to_user)
-#ifdef CONFIG_CPU_SPECTRE
-	get_thread_info r3
-	ldr	r3, [r3, #TI_ADDR_LIMIT]
-	uaccess_mask_range_ptr r0, r2, r3, ip
-#endif
-
-#include "copy_template.S"
-
-ENDPROC(arm_copy_to_user)
-ENDPROC(__copy_to_user_std)
-
-	.pushsection .text.fixup,"ax"
-	.align 0
-	copy_abort_preamble
-	ldmfd	sp!, {r1, r2, r3}
-	sub	r0, r0, r1
-	rsb	r0, r0, r2
-	copy_abort_end
-	.popsection
diff --git a/arch/arm/lib/csumipv6.S b/arch/arm/lib/csumipv6.S
deleted file mode 100644
index 3559d515144c2243844527bf6e83b616cc25a7b4..0000000000000000000000000000000000000000
--- a/arch/arm/lib/csumipv6.S
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/csumipv6.S
- *
- *  Copyright (C) 1995-1998 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-		.text
-
-ENTRY(__csum_ipv6_magic)
-		str	lr, [sp, #-4]!
-		adds	ip, r2, r3
-		ldmia	r1, {r1 - r3, lr}
-		adcs	ip, ip, r1
-		adcs	ip, ip, r2
-		adcs	ip, ip, r3
-		adcs	ip, ip, lr
-		ldmia	r0, {r0 - r3}
-		adcs	r0, ip, r0
-		adcs	r0, r0, r1
-		adcs	r0, r0, r2
-		ldr	r2, [sp, #4]
-		adcs	r0, r0, r3
-		adcs	r0, r0, r2
-		adcs	r0, r0, #0
-		ldmfd	sp!, {pc}
-ENDPROC(__csum_ipv6_magic)
-
diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S
deleted file mode 100644
index 87c9471be8b65aa527c5805369430b2e49cee961..0000000000000000000000000000000000000000
--- a/arch/arm/lib/csumpartial.S
+++ /dev/null
@@ -1,139 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/csumpartial.S
- *
- *  Copyright (C) 1995-1998 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-		.text
-
-/*
- * Function: __u32 csum_partial(const char *src, int len, __u32 sum)
- * Params  : r0 = buffer, r1 = len, r2 = checksum
- * Returns : r0 = new checksum
- */
-
-buf	.req	r0
-len	.req	r1
-sum	.req	r2
-td0	.req	r3
-td1	.req	r4	@ save before use
-td2	.req	r5	@ save before use
-td3	.req	lr
-
-.Lzero:		mov	r0, sum
-		add	sp, sp, #4
-		ldr	pc, [sp], #4
-
-		/*
-		 * Handle 0 to 7 bytes, with any alignment of source and
-		 * destination pointers.  Note that when we get here, C = 0
-		 */
-.Lless8:		teq	len, #0			@ check for zero count
-		beq	.Lzero
-
-		/* we must have at least one byte. */
-		tst	buf, #1			@ odd address?
-		movne	sum, sum, ror #8
-		ldrbne	td0, [buf], #1
-		subne	len, len, #1
-		adcsne	sum, sum, td0, put_byte_1
-
-.Lless4:		tst	len, #6
-		beq	.Lless8_byte
-
-		/* we are now half-word aligned */
-
-.Lless8_wordlp:
-#if __LINUX_ARM_ARCH__ >= 4
-		ldrh	td0, [buf], #2
-		sub	len, len, #2
-#else
-		ldrb	td0, [buf], #1
-		ldrb	td3, [buf], #1
-		sub	len, len, #2
-#ifndef __ARMEB__
-		orr	td0, td0, td3, lsl #8
-#else
-		orr	td0, td3, td0, lsl #8
-#endif
-#endif
-		adcs	sum, sum, td0
-		tst	len, #6
-		bne	.Lless8_wordlp
-
-.Lless8_byte:	tst	len, #1			@ odd number of bytes
-		ldrbne	td0, [buf], #1		@ include last byte
-		adcsne	sum, sum, td0, put_byte_0	@ update checksum
-
-.Ldone:		adc	r0, sum, #0		@ collect up the last carry
-		ldr	td0, [sp], #4
-		tst	td0, #1			@ check buffer alignment
-		movne	r0, r0, ror #8		@ rotate checksum by 8 bits
-		ldr	pc, [sp], #4		@ return
-
-.Lnot_aligned:	tst	buf, #1			@ odd address
-		ldrbne	td0, [buf], #1		@ make even
-		subne	len, len, #1
-		adcsne	sum, sum, td0, put_byte_1	@ update checksum
-
-		tst	buf, #2			@ 32-bit aligned?
-#if __LINUX_ARM_ARCH__ >= 4
-		ldrhne	td0, [buf], #2		@ make 32-bit aligned
-		subne	len, len, #2
-#else
-		ldrbne	td0, [buf], #1
-		ldrbne	ip, [buf], #1
-		subne	len, len, #2
-#ifndef __ARMEB__
-		orrne	td0, td0, ip, lsl #8
-#else
-		orrne	td0, ip, td0, lsl #8
-#endif
-#endif
-		adcsne	sum, sum, td0		@ update checksum
-		ret	lr
-
-ENTRY(csum_partial)
-		stmfd	sp!, {buf, lr}
-		cmp	len, #8			@ Ensure that we have at least
-		blo	.Lless8			@ 8 bytes to copy.
-
-		tst	buf, #1
-		movne	sum, sum, ror #8
-
-		adds	sum, sum, #0		@ C = 0
-		tst	buf, #3			@ Test destination alignment
-		blne	.Lnot_aligned		@ align destination, return here
-
-1:		bics	ip, len, #31
-		beq	3f
-
-		stmfd	sp!, {r4 - r5}
-2:		ldmia	buf!, {td0, td1, td2, td3}
-		adcs	sum, sum, td0
-		adcs	sum, sum, td1
-		adcs	sum, sum, td2
-		adcs	sum, sum, td3
-		ldmia	buf!, {td0, td1, td2, td3}
-		adcs	sum, sum, td0
-		adcs	sum, sum, td1
-		adcs	sum, sum, td2
-		adcs	sum, sum, td3
-		sub	ip, ip, #32
-		teq	ip, #0
-		bne	2b
-		ldmfd	sp!, {r4 - r5}
-
-3:		tst	len, #0x1c		@ should not change C
-		beq	.Lless4
-
-4:		ldr	td0, [buf], #4
-		sub	len, len, #4
-		adcs	sum, sum, td0
-		tst	len, #0x1c
-		bne	4b
-		b	.Lless4
-ENDPROC(csum_partial)
diff --git a/arch/arm/lib/csumpartialcopy.S b/arch/arm/lib/csumpartialcopy.S
deleted file mode 100644
index 184d97254a7a2de14cdf83e312383c6fe3071b8f..0000000000000000000000000000000000000000
--- a/arch/arm/lib/csumpartialcopy.S
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/csumpartialcopy.S
- *
- *  Copyright (C) 1995-1998 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-		.text
-
-/* Function: __u32 csum_partial_copy_nocheck(const char *src, char *dst, int len, __u32 sum)
- * Params  : r0 = src, r1 = dst, r2 = len, r3 = checksum
- * Returns : r0 = new checksum
- */
-
-		.macro	save_regs
-		stmfd	sp!, {r1, r4 - r8, lr}
-		.endm
-
-		.macro	load_regs
-		ldmfd	sp!, {r1, r4 - r8, pc}
-		.endm
-
-		.macro	load1b, reg1
-		ldrb	\reg1, [r0], #1
-		.endm
-
-		.macro	load2b, reg1, reg2
-		ldrb	\reg1, [r0], #1
-		ldrb	\reg2, [r0], #1
-		.endm
-
-		.macro	load1l, reg1
-		ldr	\reg1, [r0], #4
-		.endm
-
-		.macro	load2l, reg1, reg2
-		ldr	\reg1, [r0], #4
-		ldr	\reg2, [r0], #4
-		.endm
-
-		.macro	load4l, reg1, reg2, reg3, reg4
-		ldmia	r0!, {\reg1, \reg2, \reg3, \reg4}
-		.endm
-
-#define FN_ENTRY	ENTRY(csum_partial_copy_nocheck)
-#define FN_EXIT		ENDPROC(csum_partial_copy_nocheck)
-
-#include "csumpartialcopygeneric.S"
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
deleted file mode 100644
index 0b706a39a6770296034c4421a7c7cf4f483949ba..0000000000000000000000000000000000000000
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ /dev/null
@@ -1,330 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/csumpartialcopygeneric.S
- *
- *  Copyright (C) 1995-2001 Russell King
- */
-#include <asm/assembler.h>
-
-/*
- * unsigned int
- * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
- *  r0 = src, r1 = dst, r2 = len, r3 = sum
- *  Returns : r0 = checksum
- *
- * Note that 'tst' and 'teq' preserve the carry flag.
- */
-
-src	.req	r0
-dst	.req	r1
-len	.req	r2
-sum	.req	r3
-
-.Lzero:		mov	r0, sum
-		load_regs
-
-		/*
-		 * Align an unaligned destination pointer.  We know that
-		 * we have >= 8 bytes here, so we don't need to check
-		 * the length.  Note that the source pointer hasn't been
-		 * aligned yet.
-		 */
-.Ldst_unaligned:
-		tst	dst, #1
-		beq	.Ldst_16bit
-
-		load1b	ip
-		sub	len, len, #1
-		adcs	sum, sum, ip, put_byte_1	@ update checksum
-		strb	ip, [dst], #1
-		tst	dst, #2
-		reteq	lr			@ dst is now 32bit aligned
-
-.Ldst_16bit:	load2b	r8, ip
-		sub	len, len, #2
-		adcs	sum, sum, r8, put_byte_0
-		strb	r8, [dst], #1
-		adcs	sum, sum, ip, put_byte_1
-		strb	ip, [dst], #1
-		ret	lr			@ dst is now 32bit aligned
-
-		/*
-		 * Handle 0 to 7 bytes, with any alignment of source and
-		 * destination pointers.  Note that when we get here, C = 0
-		 */
-.Lless8:	teq	len, #0			@ check for zero count
-		beq	.Lzero
-
-		/* we must have at least one byte. */
-		tst	dst, #1			@ dst 16-bit aligned
-		beq	.Lless8_aligned
-
-		/* Align dst */
-		load1b	ip
-		sub	len, len, #1
-		adcs	sum, sum, ip, put_byte_1	@ update checksum
-		strb	ip, [dst], #1
-		tst	len, #6
-		beq	.Lless8_byteonly
-
-1:		load2b	r8, ip
-		sub	len, len, #2
-		adcs	sum, sum, r8, put_byte_0
-		strb	r8, [dst], #1
-		adcs	sum, sum, ip, put_byte_1
-		strb	ip, [dst], #1
-.Lless8_aligned:
-		tst	len, #6
-		bne	1b
-.Lless8_byteonly:
-		tst	len, #1
-		beq	.Ldone
-		load1b	r8
-		adcs	sum, sum, r8, put_byte_0	@ update checksum
-		strb	r8, [dst], #1
-		b	.Ldone
-
-FN_ENTRY
-		save_regs
-
-		cmp	len, #8			@ Ensure that we have at least
-		blo	.Lless8			@ 8 bytes to copy.
-
-		adds	sum, sum, #0		@ C = 0
-		tst	dst, #3			@ Test destination alignment
-		blne	.Ldst_unaligned		@ align destination, return here
-
-		/*
-		 * Ok, the dst pointer is now 32bit aligned, and we know
-		 * that we must have more than 4 bytes to copy.  Note
-		 * that C contains the carry from the dst alignment above.
-		 */
-
-		tst	src, #3			@ Test source alignment
-		bne	.Lsrc_not_aligned
-
-		/* Routine for src & dst aligned */
-
-		bics	ip, len, #15
-		beq	2f
-
-1:		load4l	r4, r5, r6, r7
-		stmia	dst!, {r4, r5, r6, r7}
-		adcs	sum, sum, r4
-		adcs	sum, sum, r5
-		adcs	sum, sum, r6
-		adcs	sum, sum, r7
-		sub	ip, ip, #16
-		teq	ip, #0
-		bne	1b
-
-2:		ands	ip, len, #12
-		beq	4f
-		tst	ip, #8
-		beq	3f
-		load2l	r4, r5
-		stmia	dst!, {r4, r5}
-		adcs	sum, sum, r4
-		adcs	sum, sum, r5
-		tst	ip, #4
-		beq	4f
-
-3:		load1l	r4
-		str	r4, [dst], #4
-		adcs	sum, sum, r4
-
-4:		ands	len, len, #3
-		beq	.Ldone
-		load1l	r4
-		tst	len, #2
-		mov	r5, r4, get_byte_0
-		beq	.Lexit
-		adcs	sum, sum, r4, lspush #16
-		strb	r5, [dst], #1
-		mov	r5, r4, get_byte_1
-		strb	r5, [dst], #1
-		mov	r5, r4, get_byte_2
-.Lexit:		tst	len, #1
-		strbne	r5, [dst], #1
-		andne	r5, r5, #255
-		adcsne	sum, sum, r5, put_byte_0
-
-		/*
-		 * If the dst pointer was not 16-bit aligned, we
-		 * need to rotate the checksum here to get around
-		 * the inefficient byte manipulations in the
-		 * architecture independent code.
-		 */
-.Ldone:		adc	r0, sum, #0
-		ldr	sum, [sp, #0]		@ dst
-		tst	sum, #1
-		movne	r0, r0, ror #8
-		load_regs
-
-.Lsrc_not_aligned:
-		adc	sum, sum, #0		@ include C from dst alignment
-		and	ip, src, #3
-		bic	src, src, #3
-		load1l	r5
-		cmp	ip, #2
-		beq	.Lsrc2_aligned
-		bhi	.Lsrc3_aligned
-		mov	r4, r5, lspull #8		@ C = 0
-		bics	ip, len, #15
-		beq	2f
-1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, lspush #24
-		mov	r5, r5, lspull #8
-		orr	r5, r5, r6, lspush #24
-		mov	r6, r6, lspull #8
-		orr	r6, r6, r7, lspush #24
-		mov	r7, r7, lspull #8
-		orr	r7, r7, r8, lspush #24
-		stmia	dst!, {r4, r5, r6, r7}
-		adcs	sum, sum, r4
-		adcs	sum, sum, r5
-		adcs	sum, sum, r6
-		adcs	sum, sum, r7
-		mov	r4, r8, lspull #8
-		sub	ip, ip, #16
-		teq	ip, #0
-		bne	1b
-2:		ands	ip, len, #12
-		beq	4f
-		tst	ip, #8
-		beq	3f
-		load2l	r5, r6
-		orr	r4, r4, r5, lspush #24
-		mov	r5, r5, lspull #8
-		orr	r5, r5, r6, lspush #24
-		stmia	dst!, {r4, r5}
-		adcs	sum, sum, r4
-		adcs	sum, sum, r5
-		mov	r4, r6, lspull #8
-		tst	ip, #4
-		beq	4f
-3:		load1l	r5
-		orr	r4, r4, r5, lspush #24
-		str	r4, [dst], #4
-		adcs	sum, sum, r4
-		mov	r4, r5, lspull #8
-4:		ands	len, len, #3
-		beq	.Ldone
-		mov	r5, r4, get_byte_0
-		tst	len, #2
-		beq	.Lexit
-		adcs	sum, sum, r4, lspush #16
-		strb	r5, [dst], #1
-		mov	r5, r4, get_byte_1
-		strb	r5, [dst], #1
-		mov	r5, r4, get_byte_2
-		b	.Lexit
-
-.Lsrc2_aligned:	mov	r4, r5, lspull #16
-		adds	sum, sum, #0
-		bics	ip, len, #15
-		beq	2f
-1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, lspush #16
-		mov	r5, r5, lspull #16
-		orr	r5, r5, r6, lspush #16
-		mov	r6, r6, lspull #16
-		orr	r6, r6, r7, lspush #16
-		mov	r7, r7, lspull #16
-		orr	r7, r7, r8, lspush #16
-		stmia	dst!, {r4, r5, r6, r7}
-		adcs	sum, sum, r4
-		adcs	sum, sum, r5
-		adcs	sum, sum, r6
-		adcs	sum, sum, r7
-		mov	r4, r8, lspull #16
-		sub	ip, ip, #16
-		teq	ip, #0
-		bne	1b
-2:		ands	ip, len, #12
-		beq	4f
-		tst	ip, #8
-		beq	3f
-		load2l	r5, r6
-		orr	r4, r4, r5, lspush #16
-		mov	r5, r5, lspull #16
-		orr	r5, r5, r6, lspush #16
-		stmia	dst!, {r4, r5}
-		adcs	sum, sum, r4
-		adcs	sum, sum, r5
-		mov	r4, r6, lspull #16
-		tst	ip, #4
-		beq	4f
-3:		load1l	r5
-		orr	r4, r4, r5, lspush #16
-		str	r4, [dst], #4
-		adcs	sum, sum, r4
-		mov	r4, r5, lspull #16
-4:		ands	len, len, #3
-		beq	.Ldone
-		mov	r5, r4, get_byte_0
-		tst	len, #2
-		beq	.Lexit
-		adcs	sum, sum, r4
-		strb	r5, [dst], #1
-		mov	r5, r4, get_byte_1
-		strb	r5, [dst], #1
-		tst	len, #1
-		beq	.Ldone
-		load1b	r5
-		b	.Lexit
-
-.Lsrc3_aligned:	mov	r4, r5, lspull #24
-		adds	sum, sum, #0
-		bics	ip, len, #15
-		beq	2f
-1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, lspush #8
-		mov	r5, r5, lspull #24
-		orr	r5, r5, r6, lspush #8
-		mov	r6, r6, lspull #24
-		orr	r6, r6, r7, lspush #8
-		mov	r7, r7, lspull #24
-		orr	r7, r7, r8, lspush #8
-		stmia	dst!, {r4, r5, r6, r7}
-		adcs	sum, sum, r4
-		adcs	sum, sum, r5
-		adcs	sum, sum, r6
-		adcs	sum, sum, r7
-		mov	r4, r8, lspull #24
-		sub	ip, ip, #16
-		teq	ip, #0
-		bne	1b
-2:		ands	ip, len, #12
-		beq	4f
-		tst	ip, #8
-		beq	3f
-		load2l	r5, r6
-		orr	r4, r4, r5, lspush #8
-		mov	r5, r5, lspull #24
-		orr	r5, r5, r6, lspush #8
-		stmia	dst!, {r4, r5}
-		adcs	sum, sum, r4
-		adcs	sum, sum, r5
-		mov	r4, r6, lspull #24
-		tst	ip, #4
-		beq	4f
-3:		load1l	r5
-		orr	r4, r4, r5, lspush #8
-		str	r4, [dst], #4
-		adcs	sum, sum, r4
-		mov	r4, r5, lspull #24
-4:		ands	len, len, #3
-		beq	.Ldone
-		mov	r5, r4, get_byte_0
-		tst	len, #2
-		beq	.Lexit
-		strb	r5, [dst], #1
-		adcs	sum, sum, r4
-		load1l	r4
-		mov	r5, r4, get_byte_0
-		strb	r5, [dst], #1
-		adcs	sum, sum, r4, lspush #24
-		mov	r5, r4, get_byte_1
-		b	.Lexit
-FN_EXIT
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
deleted file mode 100644
index 6bd3a93eaa3c15202fc5b0f45d3ca4e5b52be054..0000000000000000000000000000000000000000
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ /dev/null
@@ -1,97 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/csumpartialcopyuser.S
- *
- *  Copyright (C) 1995-1998 Russell King
- *
- * 27/03/03 Ian Molton Clean up CONFIG_CPU
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-#include <asm/asm-offsets.h>
-
-		.text
-
-#ifdef CONFIG_CPU_SW_DOMAIN_PAN
-		.macro	save_regs
-		mrc	p15, 0, ip, c3, c0, 0
-		stmfd	sp!, {r1, r2, r4 - r8, ip, lr}
-		uaccess_enable ip
-		.endm
-
-		.macro	load_regs
-		ldmfd	sp!, {r1, r2, r4 - r8, ip, lr}
-		mcr	p15, 0, ip, c3, c0, 0
-		ret	lr
-		.endm
-#else
-		.macro	save_regs
-		stmfd	sp!, {r1, r2, r4 - r8, lr}
-		.endm
-
-		.macro	load_regs
-		ldmfd	sp!, {r1, r2, r4 - r8, pc}
-		.endm
-#endif
-
-		.macro	load1b,	reg1
-		ldrusr	\reg1, r0, 1
-		.endm
-
-		.macro	load2b, reg1, reg2
-		ldrusr	\reg1, r0, 1
-		ldrusr	\reg2, r0, 1
-		.endm
-
-		.macro	load1l, reg1
-		ldrusr	\reg1, r0, 4
-		.endm
-
-		.macro	load2l, reg1, reg2
-		ldrusr	\reg1, r0, 4
-		ldrusr	\reg2, r0, 4
-		.endm
-
-		.macro	load4l, reg1, reg2, reg3, reg4
-		ldrusr	\reg1, r0, 4
-		ldrusr	\reg2, r0, 4
-		ldrusr	\reg3, r0, 4
-		ldrusr	\reg4, r0, 4
-		.endm
-
-/*
- * unsigned int
- * csum_partial_copy_from_user(const char *src, char *dst, int len, int sum, int *err_ptr)
- *  r0 = src, r1 = dst, r2 = len, r3 = sum, [sp] = *err_ptr
- *  Returns : r0 = checksum, [[sp, #0], #0] = 0 or -EFAULT
- */
-
-#define FN_ENTRY	ENTRY(csum_partial_copy_from_user)
-#define FN_EXIT		ENDPROC(csum_partial_copy_from_user)
-
-#include "csumpartialcopygeneric.S"
-
-/*
- * FIXME: minor buglet here
- * We don't return the checksum for the data present in the buffer.  To do
- * so properly, we would have to add in whatever registers were loaded before
- * the fault, which, with the current asm above is not predictable.
- */
-		.pushsection .text.fixup,"ax"
-		.align	4
-9001:		mov	r4, #-EFAULT
-#ifdef CONFIG_CPU_SW_DOMAIN_PAN
-		ldr	r5, [sp, #9*4]		@ *err_ptr
-#else
-		ldr	r5, [sp, #8*4]		@ *err_ptr
-#endif
-		str	r4, [r5]
-		ldmia	sp, {r1, r2}		@ retrieve dst, len
-		add	r2, r2, r1
-		mov	r0, #0			@ zero the buffer
-9002:		teq	r2, r1
-		strbne	r0, [r1], #1
-		bne	9002b
-		load_regs
-		.popsection
diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S
deleted file mode 100644
index 3ccade0f813038c89404a26f92b4a35a3638dd86..0000000000000000000000000000000000000000
--- a/arch/arm/lib/delay-loop.S
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/delay.S
- *
- *  Copyright (C) 1995, 1996 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/delay.h>
-
-		.text
-
-.LC0:		.word	loops_per_jiffy
-.LC1:		.word	UDELAY_MULT
-
-/*
- * loops = r0 * HZ * loops_per_jiffy / 1000000
- *
- * r0  <= 2000
- * HZ  <= 1000
- */
-
-ENTRY(__loop_udelay)
-		ldr	r2, .LC1
-		mul	r0, r2, r0		@ r0 = delay_us * UDELAY_MULT
-ENTRY(__loop_const_udelay)			@ 0 <= r0 <= 0xfffffaf0
-		ldr	r2, .LC0
-		ldr	r2, [r2]
-		umull	r1, r0, r2, r0		@ r0-r1 = r0 * loops_per_jiffy
-		adds	r1, r1, #0xffffffff	@ rounding up ...
-		adcs	r0, r0, r0		@ and right shift by 31
-		reteq	lr
-
-		.align 3
-
-@ Delay routine
-ENTRY(__loop_delay)
-		subs	r0, r0, #1
-#if 0
-		retls	lr
-		subs	r0, r0, #1
-		retls	lr
-		subs	r0, r0, #1
-		retls	lr
-		subs	r0, r0, #1
-		retls	lr
-		subs	r0, r0, #1
-		retls	lr
-		subs	r0, r0, #1
-		retls	lr
-		subs	r0, r0, #1
-		retls	lr
-		subs	r0, r0, #1
-#endif
-		bhi	__loop_delay
-		ret	lr
-ENDPROC(__loop_udelay)
-ENDPROC(__loop_const_udelay)
-ENDPROC(__loop_delay)
diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S
deleted file mode 100644
index a87c02925ffae039687167b26345d8e0b490179f..0000000000000000000000000000000000000000
--- a/arch/arm/lib/div64.S
+++ /dev/null
@@ -1,209 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/div64.S
- *
- *  Optimized computation of 64-bit dividend / 32-bit divisor
- *
- *  Author:	Nicolas Pitre
- *  Created:	Oct 5, 2003
- *  Copyright:	Monta Vista Software, Inc.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/unwind.h>
-
-#ifdef __ARMEB__
-#define xh r0
-#define xl r1
-#define yh r2
-#define yl r3
-#else
-#define xl r0
-#define xh r1
-#define yl r2
-#define yh r3
-#endif
-
-/*
- * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
- *
- * Note: Calling convention is totally non standard for optimal code.
- *       This is meant to be used by do_div() from include/asm/div64.h only.
- *
- * Input parameters:
- * 	xh-xl	= dividend (clobbered)
- * 	r4	= divisor (preserved)
- *
- * Output values:
- * 	yh-yl	= result
- * 	xh	= remainder
- *
- * Clobbered regs: xl, ip
- */
-
-ENTRY(__do_div64)
-UNWIND(.fnstart)
-
-	@ Test for easy paths first.
-	subs	ip, r4, #1
-	bls	9f			@ divisor is 0 or 1
-	tst	ip, r4
-	beq	8f			@ divisor is power of 2
-
-	@ See if we need to handle upper 32-bit result.
-	cmp	xh, r4
-	mov	yh, #0
-	blo	3f
-
-	@ Align divisor with upper part of dividend.
-	@ The aligned divisor is stored in yl preserving the original.
-	@ The bit position is stored in ip.
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-	clz	yl, r4
-	clz	ip, xh
-	sub	yl, yl, ip
-	mov	ip, #1
-	mov	ip, ip, lsl yl
-	mov	yl, r4, lsl yl
-
-#else
-
-	mov	yl, r4
-	mov	ip, #1
-1:	cmp	yl, #0x80000000
-	cmpcc	yl, xh
-	movcc	yl, yl, lsl #1
-	movcc	ip, ip, lsl #1
-	bcc	1b
-
-#endif
-
-	@ The division loop for needed upper bit positions.
- 	@ Break out early if dividend reaches 0.
-2:	cmp	xh, yl
-	orrcs	yh, yh, ip
-	subscs	xh, xh, yl
-	movsne	ip, ip, lsr #1
-	mov	yl, yl, lsr #1
-	bne	2b
-
-	@ See if we need to handle lower 32-bit result.
-3:	cmp	xh, #0
-	mov	yl, #0
-	cmpeq	xl, r4
-	movlo	xh, xl
-	retlo	lr
-
-	@ The division loop for lower bit positions.
-	@ Here we shift remainer bits leftwards rather than moving the
-	@ divisor for comparisons, considering the carry-out bit as well.
-	mov	ip, #0x80000000
-4:	movs	xl, xl, lsl #1
-	adcs	xh, xh, xh
-	beq	6f
-	cmpcc	xh, r4
-5:	orrcs	yl, yl, ip
-	subcs	xh, xh, r4
-	movs	ip, ip, lsr #1
-	bne	4b
-	ret	lr
-
-	@ The top part of remainder became zero.  If carry is set
-	@ (the 33th bit) this is a false positive so resume the loop.
-	@ Otherwise, if lower part is also null then we are done.
-6:	bcs	5b
-	cmp	xl, #0
-	reteq	lr
-
-	@ We still have remainer bits in the low part.  Bring them up.
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-	clz	xh, xl			@ we know xh is zero here so...
-	add	xh, xh, #1
-	mov	xl, xl, lsl xh
-	mov	ip, ip, lsr xh
-
-#else
-
-7:	movs	xl, xl, lsl #1
-	mov	ip, ip, lsr #1
-	bcc	7b
-
-#endif
-
-	@ Current remainder is now 1.  It is worthless to compare with
-	@ divisor at this point since divisor can not be smaller than 3 here.
-	@ If possible, branch for another shift in the division loop.
-	@ If no bit position left then we are done.
-	movs	ip, ip, lsr #1
-	mov	xh, #1
-	bne	4b
-	ret	lr
-
-8:	@ Division by a power of 2: determine what that divisor order is
-	@ then simply shift values around
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-	clz	ip, r4
-	rsb	ip, ip, #31
-
-#else
-
-	mov	yl, r4
-	cmp	r4, #(1 << 16)
-	mov	ip, #0
-	movhs	yl, yl, lsr #16
-	movhs	ip, #16
-
-	cmp	yl, #(1 << 8)
-	movhs	yl, yl, lsr #8
-	addhs	ip, ip, #8
-
-	cmp	yl, #(1 << 4)
-	movhs	yl, yl, lsr #4
-	addhs	ip, ip, #4
-
-	cmp	yl, #(1 << 2)
-	addhi	ip, ip, #3
-	addls	ip, ip, yl, lsr #1
-
-#endif
-
-	mov	yh, xh, lsr ip
-	mov	yl, xl, lsr ip
-	rsb	ip, ip, #32
- ARM(	orr	yl, yl, xh, lsl ip	)
- THUMB(	lsl	xh, xh, ip		)
- THUMB(	orr	yl, yl, xh		)
-	mov	xh, xl, lsl ip
-	mov	xh, xh, lsr ip
-	ret	lr
-
-	@ eq -> division by 1: obvious enough...
-9:	moveq	yl, xl
-	moveq	yh, xh
-	moveq	xh, #0
-	reteq	lr
-UNWIND(.fnend)
-
-UNWIND(.fnstart)
-UNWIND(.pad #4)
-UNWIND(.save {lr})
-Ldiv0_64:
-	@ Division by 0:
-	str	lr, [sp, #-8]!
-	bl	__div0
-
-	@ as wrong as it could be...
-	mov	yl, #0
-	mov	yh, #0
-	mov	xh, #0
-	ldr	pc, [sp], #8
-
-UNWIND(.fnend)
-ENDPROC(__do_div64)
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
deleted file mode 100644
index b5e8b9ae4c7d496dcd292e6437d3bebd6417a870..0000000000000000000000000000000000000000
--- a/arch/arm/lib/findbit.S
+++ /dev/null
@@ -1,193 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/findbit.S
- *
- *  Copyright (C) 1995-2000 Russell King
- *
- * 16th March 2001 - John Ripley <jripley@sonicblue.com>
- *   Fixed so that "size" is an exclusive not an inclusive quantity.
- *   All users of these functions expect exclusive sizes, and may
- *   also call with zero size.
- * Reworked by rmk.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-                .text
-
-/*
- * Purpose  : Find a 'zero' bit
- * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit);
- */
-ENTRY(_find_first_zero_bit_le)
-		teq	r1, #0	
-		beq	3f
-		mov	r2, #0
-1:
- ARM(		ldrb	r3, [r0, r2, lsr #3]	)
- THUMB(		lsr	r3, r2, #3		)
- THUMB(		ldrb	r3, [r0, r3]		)
-		eors	r3, r3, #0xff		@ invert bits
-		bne	.L_found		@ any now set - found zero bit
-		add	r2, r2, #8		@ next bit pointer
-2:		cmp	r2, r1			@ any more?
-		blo	1b
-3:		mov	r0, r1			@ no free bits
-		ret	lr
-ENDPROC(_find_first_zero_bit_le)
-
-/*
- * Purpose  : Find next 'zero' bit
- * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
- */
-ENTRY(_find_next_zero_bit_le)
-		teq	r1, #0
-		beq	3b
-		ands	ip, r2, #7
-		beq	1b			@ If new byte, goto old routine
- ARM(		ldrb	r3, [r0, r2, lsr #3]	)
- THUMB(		lsr	r3, r2, #3		)
- THUMB(		ldrb	r3, [r0, r3]		)
-		eor	r3, r3, #0xff		@ now looking for a 1 bit
-		movs	r3, r3, lsr ip		@ shift off unused bits
-		bne	.L_found
-		orr	r2, r2, #7		@ if zero, then no bits here
-		add	r2, r2, #1		@ align bit pointer
-		b	2b			@ loop for next bit
-ENDPROC(_find_next_zero_bit_le)
-
-/*
- * Purpose  : Find a 'one' bit
- * Prototype: int find_first_bit(const unsigned long *addr, unsigned int maxbit);
- */
-ENTRY(_find_first_bit_le)
-		teq	r1, #0	
-		beq	3f
-		mov	r2, #0
-1:
- ARM(		ldrb	r3, [r0, r2, lsr #3]	)
- THUMB(		lsr	r3, r2, #3		)
- THUMB(		ldrb	r3, [r0, r3]		)
-		movs	r3, r3
-		bne	.L_found		@ any now set - found zero bit
-		add	r2, r2, #8		@ next bit pointer
-2:		cmp	r2, r1			@ any more?
-		blo	1b
-3:		mov	r0, r1			@ no free bits
-		ret	lr
-ENDPROC(_find_first_bit_le)
-
-/*
- * Purpose  : Find next 'one' bit
- * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
- */
-ENTRY(_find_next_bit_le)
-		teq	r1, #0
-		beq	3b
-		ands	ip, r2, #7
-		beq	1b			@ If new byte, goto old routine
- ARM(		ldrb	r3, [r0, r2, lsr #3]	)
- THUMB(		lsr	r3, r2, #3		)
- THUMB(		ldrb	r3, [r0, r3]		)
-		movs	r3, r3, lsr ip		@ shift off unused bits
-		bne	.L_found
-		orr	r2, r2, #7		@ if zero, then no bits here
-		add	r2, r2, #1		@ align bit pointer
-		b	2b			@ loop for next bit
-ENDPROC(_find_next_bit_le)
-
-#ifdef __ARMEB__
-
-ENTRY(_find_first_zero_bit_be)
-		teq	r1, #0
-		beq	3f
-		mov	r2, #0
-1:		eor	r3, r2, #0x18		@ big endian byte ordering
- ARM(		ldrb	r3, [r0, r3, lsr #3]	)
- THUMB(		lsr	r3, #3			)
- THUMB(		ldrb	r3, [r0, r3]		)
-		eors	r3, r3, #0xff		@ invert bits
-		bne	.L_found		@ any now set - found zero bit
-		add	r2, r2, #8		@ next bit pointer
-2:		cmp	r2, r1			@ any more?
-		blo	1b
-3:		mov	r0, r1			@ no free bits
-		ret	lr
-ENDPROC(_find_first_zero_bit_be)
-
-ENTRY(_find_next_zero_bit_be)
-		teq	r1, #0
-		beq	3b
-		ands	ip, r2, #7
-		beq	1b			@ If new byte, goto old routine
-		eor	r3, r2, #0x18		@ big endian byte ordering
- ARM(		ldrb	r3, [r0, r3, lsr #3]	)
- THUMB(		lsr	r3, #3			)
- THUMB(		ldrb	r3, [r0, r3]		)
-		eor	r3, r3, #0xff		@ now looking for a 1 bit
-		movs	r3, r3, lsr ip		@ shift off unused bits
-		bne	.L_found
-		orr	r2, r2, #7		@ if zero, then no bits here
-		add	r2, r2, #1		@ align bit pointer
-		b	2b			@ loop for next bit
-ENDPROC(_find_next_zero_bit_be)
-
-ENTRY(_find_first_bit_be)
-		teq	r1, #0
-		beq	3f
-		mov	r2, #0
-1:		eor	r3, r2, #0x18		@ big endian byte ordering
- ARM(		ldrb	r3, [r0, r3, lsr #3]	)
- THUMB(		lsr	r3, #3			)
- THUMB(		ldrb	r3, [r0, r3]		)
-		movs	r3, r3
-		bne	.L_found		@ any now set - found zero bit
-		add	r2, r2, #8		@ next bit pointer
-2:		cmp	r2, r1			@ any more?
-		blo	1b
-3:		mov	r0, r1			@ no free bits
-		ret	lr
-ENDPROC(_find_first_bit_be)
-
-ENTRY(_find_next_bit_be)
-		teq	r1, #0
-		beq	3b
-		ands	ip, r2, #7
-		beq	1b			@ If new byte, goto old routine
-		eor	r3, r2, #0x18		@ big endian byte ordering
- ARM(		ldrb	r3, [r0, r3, lsr #3]	)
- THUMB(		lsr	r3, #3			)
- THUMB(		ldrb	r3, [r0, r3]		)
-		movs	r3, r3, lsr ip		@ shift off unused bits
-		bne	.L_found
-		orr	r2, r2, #7		@ if zero, then no bits here
-		add	r2, r2, #1		@ align bit pointer
-		b	2b			@ loop for next bit
-ENDPROC(_find_next_bit_be)
-
-#endif
-
-/*
- * One or more bits in the LSB of r3 are assumed to be set.
- */
-.L_found:
-#if __LINUX_ARM_ARCH__ >= 5
-		rsb	r0, r3, #0
-		and	r3, r3, r0
-		clz	r3, r3
-		rsb	r3, r3, #31
-		add	r0, r2, r3
-#else
-		tst	r3, #0x0f
-		addeq	r2, r2, #4
-		movne	r3, r3, lsl #4
-		tst	r3, #0x30
-		addeq	r2, r2, #2
-		movne	r3, r3, lsl #2
-		tst	r3, #0x40
-		addeq	r2, r2, #1
-		mov	r0, r2
-#endif
-		cmp	r1, r0			@ Clamp to maxbit
-		movlo	r0, r1
-		ret	lr
-
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
deleted file mode 100644
index c5e420750c48d70374b216a70d3078add0fc0eb5..0000000000000000000000000000000000000000
--- a/arch/arm/lib/getuser.S
+++ /dev/null
@@ -1,167 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/getuser.S
- *
- *  Copyright (C) 2001 Russell King
- *
- *  Idea from x86 version, (C) Copyright 1998 Linus Torvalds
- *
- * These functions have a non-standard call interface to make them more
- * efficient, especially as they return an error value in addition to
- * the "real" return value.
- *
- * __get_user_X
- *
- * Inputs:	r0 contains the address
- *		r1 contains the address limit, which must be preserved
- * Outputs:	r0 is the error code
- *		r2, r3 contains the zero-extended value
- *		lr corrupted
- *
- * No other registers must be altered.  (see <asm/uaccess.h>
- * for specific ASM register usage).
- *
- * Note that ADDR_LIMIT is either 0 or 0xc0000000.
- * Note also that it is intended that __get_user_bad is not global.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-#include <asm/domain.h>
-
-ENTRY(__get_user_1)
-	check_uaccess r0, 1, r1, r2, __get_user_bad
-1: TUSER(ldrb)	r2, [r0]
-	mov	r0, #0
-	ret	lr
-ENDPROC(__get_user_1)
-_ASM_NOKPROBE(__get_user_1)
-
-ENTRY(__get_user_2)
-	check_uaccess r0, 2, r1, r2, __get_user_bad
-#if __LINUX_ARM_ARCH__ >= 6
-
-2: TUSER(ldrh)	r2, [r0]
-
-#else
-
-#ifdef CONFIG_CPU_USE_DOMAINS
-rb	.req	ip
-2:	ldrbt	r2, [r0], #1
-3:	ldrbt	rb, [r0], #0
-#else
-rb	.req	r0
-2:	ldrb	r2, [r0]
-3:	ldrb	rb, [r0, #1]
-#endif
-#ifndef __ARMEB__
-	orr	r2, r2, rb, lsl #8
-#else
-	orr	r2, rb, r2, lsl #8
-#endif
-
-#endif /* __LINUX_ARM_ARCH__ >= 6 */
-
-	mov	r0, #0
-	ret	lr
-ENDPROC(__get_user_2)
-_ASM_NOKPROBE(__get_user_2)
-
-ENTRY(__get_user_4)
-	check_uaccess r0, 4, r1, r2, __get_user_bad
-4: TUSER(ldr)	r2, [r0]
-	mov	r0, #0
-	ret	lr
-ENDPROC(__get_user_4)
-_ASM_NOKPROBE(__get_user_4)
-
-ENTRY(__get_user_8)
-	check_uaccess r0, 8, r1, r2, __get_user_bad8
-#ifdef CONFIG_THUMB2_KERNEL
-5: TUSER(ldr)	r2, [r0]
-6: TUSER(ldr)	r3, [r0, #4]
-#else
-5: TUSER(ldr)	r2, [r0], #4
-6: TUSER(ldr)	r3, [r0]
-#endif
-	mov	r0, #0
-	ret	lr
-ENDPROC(__get_user_8)
-_ASM_NOKPROBE(__get_user_8)
-
-#ifdef __ARMEB__
-ENTRY(__get_user_32t_8)
-	check_uaccess r0, 8, r1, r2, __get_user_bad
-#ifdef CONFIG_CPU_USE_DOMAINS
-	add	r0, r0, #4
-7:	ldrt	r2, [r0]
-#else
-7:	ldr	r2, [r0, #4]
-#endif
-	mov	r0, #0
-	ret	lr
-ENDPROC(__get_user_32t_8)
-_ASM_NOKPROBE(__get_user_32t_8)
-
-ENTRY(__get_user_64t_1)
-	check_uaccess r0, 1, r1, r2, __get_user_bad8
-8: TUSER(ldrb)	r3, [r0]
-	mov	r0, #0
-	ret	lr
-ENDPROC(__get_user_64t_1)
-_ASM_NOKPROBE(__get_user_64t_1)
-
-ENTRY(__get_user_64t_2)
-	check_uaccess r0, 2, r1, r2, __get_user_bad8
-#ifdef CONFIG_CPU_USE_DOMAINS
-rb	.req	ip
-9:	ldrbt	r3, [r0], #1
-10:	ldrbt	rb, [r0], #0
-#else
-rb	.req	r0
-9:	ldrb	r3, [r0]
-10:	ldrb	rb, [r0, #1]
-#endif
-	orr	r3, rb, r3, lsl #8
-	mov	r0, #0
-	ret	lr
-ENDPROC(__get_user_64t_2)
-_ASM_NOKPROBE(__get_user_64t_2)
-
-ENTRY(__get_user_64t_4)
-	check_uaccess r0, 4, r1, r2, __get_user_bad8
-11: TUSER(ldr)	r3, [r0]
-	mov	r0, #0
-	ret	lr
-ENDPROC(__get_user_64t_4)
-_ASM_NOKPROBE(__get_user_64t_4)
-#endif
-
-__get_user_bad8:
-	mov	r3, #0
-__get_user_bad:
-	mov	r2, #0
-	mov	r0, #-EFAULT
-	ret	lr
-ENDPROC(__get_user_bad)
-ENDPROC(__get_user_bad8)
-_ASM_NOKPROBE(__get_user_bad)
-_ASM_NOKPROBE(__get_user_bad8)
-
-.pushsection __ex_table, "a"
-	.long	1b, __get_user_bad
-	.long	2b, __get_user_bad
-#if __LINUX_ARM_ARCH__ < 6
-	.long	3b, __get_user_bad
-#endif
-	.long	4b, __get_user_bad
-	.long	5b, __get_user_bad8
-	.long	6b, __get_user_bad8
-#ifdef __ARMEB__
-	.long   7b, __get_user_bad
-	.long	8b, __get_user_bad8
-	.long	9b, __get_user_bad8
-	.long	10b, __get_user_bad8
-	.long	11b, __get_user_bad8
-#endif
-.popsection
diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S
deleted file mode 100644
index 0def9388fb1566ebd0b75ed1322c06d4b7542760..0000000000000000000000000000000000000000
--- a/arch/arm/lib/io-readsb.S
+++ /dev/null
@@ -1,120 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/io-readsb.S
- *
- *  Copyright (C) 1995-2000 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-.Linsb_align:	rsb	ip, ip, #4
-		cmp	ip, r2
-		movgt	ip, r2
-		cmp	ip, #2
-		ldrb	r3, [r0]
-		strb	r3, [r1], #1
-		ldrbge	r3, [r0]
-		strbge	r3, [r1], #1
-		ldrbgt	r3, [r0]
-		strbgt	r3, [r1], #1
-		subs	r2, r2, ip
-		bne	.Linsb_aligned
-
-ENTRY(__raw_readsb)
-		teq	r2, #0		@ do we have to check for the zero len?
-		reteq	lr
-		ands	ip, r1, #3
-		bne	.Linsb_align
-
-.Linsb_aligned:	stmfd	sp!, {r4 - r6, lr}
-
-		subs	r2, r2, #16
-		bmi	.Linsb_no_16
-
-.Linsb_16_lp:	ldrb	r3, [r0]
-		ldrb	r4, [r0]
-		ldrb	r5, [r0]
-		mov	r3, r3,     put_byte_0
-		ldrb	r6, [r0]
-		orr	r3, r3, r4, put_byte_1
-		ldrb	r4, [r0]
-		orr	r3, r3, r5, put_byte_2
-		ldrb	r5, [r0]
-		orr	r3, r3, r6, put_byte_3
-		ldrb	r6, [r0]
-		mov	r4, r4,     put_byte_0
-		ldrb	ip, [r0]
-		orr	r4, r4, r5, put_byte_1
-		ldrb	r5, [r0]
-		orr	r4, r4, r6, put_byte_2
-		ldrb	r6, [r0]
-		orr	r4, r4, ip, put_byte_3
-		ldrb	ip, [r0]
-		mov	r5, r5,     put_byte_0
-		ldrb	lr, [r0]
-		orr	r5, r5, r6, put_byte_1
-		ldrb	r6, [r0]
-		orr	r5, r5, ip, put_byte_2
-		ldrb	ip, [r0]
-		orr	r5, r5, lr, put_byte_3
-		ldrb	lr, [r0]
-		mov	r6, r6,     put_byte_0
-		orr	r6, r6, ip, put_byte_1
-		ldrb	ip, [r0]
-		orr	r6, r6, lr, put_byte_2
-		orr	r6, r6, ip, put_byte_3
-		stmia	r1!, {r3 - r6}
-
-		subs	r2, r2, #16
-		bpl	.Linsb_16_lp
-
-		tst	r2, #15
-		ldmfdeq	sp!, {r4 - r6, pc}
-
-.Linsb_no_16:	tst	r2, #8
-		beq	.Linsb_no_8
-
-		ldrb	r3, [r0]
-		ldrb	r4, [r0]
-		ldrb	r5, [r0]
-		mov	r3, r3,     put_byte_0
-		ldrb	r6, [r0]
-		orr	r3, r3, r4, put_byte_1
-		ldrb	r4, [r0]
-		orr	r3, r3, r5, put_byte_2
-		ldrb	r5, [r0]
-		orr	r3, r3, r6, put_byte_3
-		ldrb	r6, [r0]
-		mov	r4, r4,     put_byte_0
-		ldrb	ip, [r0]
-		orr	r4, r4, r5, put_byte_1
-		orr	r4, r4, r6, put_byte_2
-		orr	r4, r4, ip, put_byte_3
-		stmia	r1!, {r3, r4}
-
-.Linsb_no_8:	tst	r2, #4
-		beq	.Linsb_no_4
-
-		ldrb	r3, [r0]
-		ldrb	r4, [r0]
-		ldrb	r5, [r0]
-		ldrb	r6, [r0]
-		mov	r3, r3,     put_byte_0
-		orr	r3, r3, r4, put_byte_1
-		orr	r3, r3, r5, put_byte_2
-		orr	r3, r3, r6, put_byte_3
-		str	r3, [r1], #4
-
-.Linsb_no_4:	ands	r2, r2, #3
-		ldmfdeq	sp!, {r4 - r6, pc}
-
-		cmp	r2, #2
-		ldrb	r3, [r0]
-		strb	r3, [r1], #1
-		ldrbge	r3, [r0]
-		strbge	r3, [r1], #1
-		ldrbgt	r3, [r0]
-		strbgt	r3, [r1]
-
-		ldmfd	sp!, {r4 - r6, pc}
-ENDPROC(__raw_readsb)
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
deleted file mode 100644
index d9f6b372b0586e4b123ac149e08d5c322481cedd..0000000000000000000000000000000000000000
--- a/arch/arm/lib/io-readsl.S
+++ /dev/null
@@ -1,76 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/io-readsl.S
- *
- *  Copyright (C) 1995-2000 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-ENTRY(__raw_readsl)
-		teq	r2, #0		@ do we have to check for the zero len?
-		reteq	lr
-		ands	ip, r1, #3
-		bne	3f
-
-		subs	r2, r2, #4
-		bmi	2f
-		stmfd	sp!, {r4, lr}
-1:		ldr	r3, [r0, #0]
-		ldr	r4, [r0, #0]
-		ldr	ip, [r0, #0]
-		ldr	lr, [r0, #0]
-		subs	r2, r2, #4
-		stmia	r1!, {r3, r4, ip, lr}
-		bpl	1b
-		ldmfd	sp!, {r4, lr}
-2:		movs	r2, r2, lsl #31
-		ldrcs	r3, [r0, #0]
-		ldrcs	ip, [r0, #0]
-		stmiacs	r1!, {r3, ip}
-		ldrne	r3, [r0, #0]
-		strne	r3, [r1, #0]
-		ret	lr
-
-3:		ldr	r3, [r0]
-		cmp	ip, #2
-		mov	ip, r3, get_byte_0
-		strb	ip, [r1], #1
-		bgt	6f
-		mov	ip, r3, get_byte_1
-		strb	ip, [r1], #1
-		beq	5f
-		mov	ip, r3, get_byte_2
-		strb	ip, [r1], #1
-
-4:		subs	r2, r2, #1
-		mov	ip, r3, lspull #24
-		ldrne	r3, [r0]
-		orrne	ip, ip, r3, lspush #8
-		strne	ip, [r1], #4
-		bne	4b
-		b	8f
-
-5:		subs	r2, r2, #1
-		mov	ip, r3, lspull #16
-		ldrne	r3, [r0]
-		orrne	ip, ip, r3, lspush #16
-		strne	ip, [r1], #4
-		bne	5b
-		b	7f
-
-6:		subs	r2, r2, #1
-		mov	ip, r3, lspull #8
-		ldrne	r3, [r0]
-		orrne	ip, ip, r3, lspush #24
-		strne	ip, [r1], #4
-		bne	6b
-
-		mov	r3, ip, get_byte_2
-		strb	r3, [r1, #2]
-7:		mov	r3, ip, get_byte_1
-		strb	r3, [r1, #1]
-8:		mov	r3, ip, get_byte_0
-		strb	r3, [r1, #0]
-		ret	lr
-ENDPROC(__raw_readsl)
diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S
deleted file mode 100644
index 266043610c0c19bbafb931292714fbf13f9f709b..0000000000000000000000000000000000000000
--- a/arch/arm/lib/io-readsw-armv3.S
+++ /dev/null
@@ -1,103 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/io-readsw-armv3.S
- *
- *  Copyright (C) 1995-2000 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-.Linsw_bad_alignment:
-		adr	r0, .Linsw_bad_align_msg
-		mov	r2, lr
-		b	panic
-.Linsw_bad_align_msg:
-		.asciz	"insw: bad buffer alignment (0x%p, lr=0x%08lX)\n"
-		.align
-
-.Linsw_align:	tst	r1, #1
-		bne	.Linsw_bad_alignment
-
-		ldr	r3, [r0]
-		strb	r3, [r1], #1
-		mov	r3, r3, lsr #8
-		strb	r3, [r1], #1
-
-		subs	r2, r2, #1
-		reteq	lr
-
-ENTRY(__raw_readsw)
-		teq	r2, #0		@ do we have to check for the zero len?
-		reteq	lr
-		tst	r1, #3
-		bne	.Linsw_align
-
-.Linsw_aligned:	mov	ip, #0xff
-		orr	ip, ip, ip, lsl #8
-		stmfd	sp!, {r4, r5, r6, lr}
-
-		subs	r2, r2, #8
-		bmi	.Lno_insw_8
-
-.Linsw_8_lp:	ldr	r3, [r0]
-		and	r3, r3, ip
-		ldr	r4, [r0]
-		orr	r3, r3, r4, lsl #16
-
-		ldr	r4, [r0]
-		and	r4, r4, ip
-		ldr	r5, [r0]
-		orr	r4, r4, r5, lsl #16
-
-		ldr	r5, [r0]
-		and	r5, r5, ip
-		ldr	r6, [r0]
-		orr	r5, r5, r6, lsl #16
-
-		ldr	r6, [r0]
-		and	r6, r6, ip
-		ldr	lr, [r0]
-		orr	r6, r6, lr, lsl #16
-
-		stmia	r1!, {r3 - r6}
-
-		subs	r2, r2, #8
-		bpl	.Linsw_8_lp
-
-		tst	r2, #7
-		ldmfdeq	sp!, {r4, r5, r6, pc}
-
-.Lno_insw_8:	tst	r2, #4
-		beq	.Lno_insw_4
-
-		ldr	r3, [r0]
-		and	r3, r3, ip
-		ldr	r4, [r0]
-		orr	r3, r3, r4, lsl #16
-
-		ldr	r4, [r0]
-		and	r4, r4, ip
-		ldr	r5, [r0]
-		orr	r4, r4, r5, lsl #16
-
-		stmia	r1!, {r3, r4}
-
-.Lno_insw_4:	tst	r2, #2
-		beq	.Lno_insw_2
-
-		ldr	r3, [r0]
-		and	r3, r3, ip
-		ldr	r4, [r0]
-		orr	r3, r3, r4, lsl #16
-
-		str	r3, [r1], #4
-
-.Lno_insw_2:	tst	r2, #1
-		ldrne	r3, [r0]
-		strbne	r3, [r1], #1
-		movne	r3, r3, lsr #8
-		strbne	r3, [r1]
-
-		ldmfd	sp!, {r4, r5, r6, pc}
-
-
diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S
deleted file mode 100644
index 228c176a94d1e4b5d5d27e79da6817ba732f15a9..0000000000000000000000000000000000000000
--- a/arch/arm/lib/io-readsw-armv4.S
+++ /dev/null
@@ -1,128 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/io-readsw-armv4.S
- *
- *  Copyright (C) 1995-2000 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-		.macro	pack, rd, hw1, hw2
-#ifndef __ARMEB__
-		orr	\rd, \hw1, \hw2, lsl #16
-#else
-		orr	\rd, \hw2, \hw1, lsl #16
-#endif
-		.endm
-
-.Linsw_align:	movs	ip, r1, lsl #31
-		bne	.Linsw_noalign
-		ldrh	ip, [r0]
-		sub	r2, r2, #1
-		strh	ip, [r1], #2
-
-ENTRY(__raw_readsw)
-		teq	r2, #0
-		reteq	lr
-		tst	r1, #3
-		bne	.Linsw_align
-
-		stmfd	sp!, {r4, r5, lr}
-
-		subs	r2, r2, #8
-		bmi	.Lno_insw_8
-
-.Linsw_8_lp:	ldrh	r3, [r0]
-		ldrh	r4, [r0]
-		pack	r3, r3, r4
-
-		ldrh	r4, [r0]
-		ldrh	r5, [r0]
-		pack	r4, r4, r5
-
-		ldrh	r5, [r0]
-		ldrh	ip, [r0]
-		pack	r5, r5, ip
-
-		ldrh	ip, [r0]
-		ldrh	lr, [r0]
-		pack	ip, ip, lr
-
-		subs	r2, r2, #8
-		stmia	r1!, {r3 - r5, ip}
-		bpl	.Linsw_8_lp
-
-.Lno_insw_8:	tst	r2, #4
-		beq	.Lno_insw_4
-
-		ldrh	r3, [r0]
-		ldrh	r4, [r0]
-		pack	r3, r3, r4
-
-		ldrh	r4, [r0]
-		ldrh	ip, [r0]
-		pack	r4, r4, ip
-
-		stmia	r1!, {r3, r4}
-
-.Lno_insw_4:	movs	r2, r2, lsl #31
-		bcc	.Lno_insw_2
-
-		ldrh	r3, [r0]
-		ldrh	ip, [r0]
-		pack	r3, r3, ip
-		str	r3, [r1], #4
-
-.Lno_insw_2:	ldrhne	r3, [r0]
-		strhne	r3, [r1]
-
-		ldmfd	sp!, {r4, r5, pc}
-
-#ifdef __ARMEB__
-#define _BE_ONLY_(code...)	code
-#define _LE_ONLY_(code...)
-#define push_hbyte0		lsr #8
-#define pull_hbyte1		lsl #24
-#else
-#define _BE_ONLY_(code...)
-#define _LE_ONLY_(code...) code
-#define push_hbyte0		lsl #24
-#define pull_hbyte1		lsr #8
-#endif
-
-.Linsw_noalign:	stmfd	sp!, {r4, lr}
-		ldrbcc	ip, [r1, #-1]!
-		bcc	1f
-
-		ldrh	ip, [r0]
-		sub	r2, r2, #1
-   _BE_ONLY_(	mov	ip, ip, ror #8		)
-		strb	ip, [r1], #1
-   _LE_ONLY_(	mov	ip, ip, lsr #8		)
-   _BE_ONLY_(	mov	ip, ip, lsr #24		)
-
-1:		subs	r2, r2, #2
-		bmi	3f
-   _BE_ONLY_(	mov	ip, ip, lsl #24		)
-
-2:		ldrh	r3, [r0]
-		ldrh	r4, [r0]
-		subs	r2, r2, #2
-		orr	ip, ip, r3, lsl #8
-		orr	ip, ip, r4, push_hbyte0
-		str	ip, [r1], #4
-		mov	ip, r4, pull_hbyte1
-		bpl	2b
-
-   _BE_ONLY_(	mov	ip, ip, lsr #24		)
-
-3:		tst	r2, #1
-		strb	ip, [r1], #1
-		ldrhne	ip, [r0]
-   _BE_ONLY_(	movne	ip, ip, ror #8		)
-		strbne	ip, [r1], #1
-   _LE_ONLY_(	movne	ip, ip, lsr #8		)
-   _BE_ONLY_(	movne	ip, ip, lsr #24		)
-		strbne	ip, [r1]
-		ldmfd	sp!, {r4, pc}
-ENDPROC(__raw_readsw)
diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S
deleted file mode 100644
index e2ae312f0b69101469a11a918db55e56554a9438..0000000000000000000000000000000000000000
--- a/arch/arm/lib/io-writesb.S
+++ /dev/null
@@ -1,91 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/io-writesb.S
- *
- *  Copyright (C) 1995-2000 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-		.macro	outword, rd
-#ifndef __ARMEB__
-		strb	\rd, [r0]
-		mov	\rd, \rd, lsr #8
-		strb	\rd, [r0]
-		mov	\rd, \rd, lsr #8
-		strb	\rd, [r0]
-		mov	\rd, \rd, lsr #8
-		strb	\rd, [r0]
-#else
-		mov	lr, \rd, lsr #24
-		strb	lr, [r0]
-		mov	lr, \rd, lsr #16
-		strb	lr, [r0]
-		mov	lr, \rd, lsr #8
-		strb	lr, [r0]
-		strb	\rd, [r0]
-#endif
-		.endm
-
-.Loutsb_align:	rsb	ip, ip, #4
-		cmp	ip, r2
-		movgt	ip, r2
-		cmp	ip, #2
-		ldrb	r3, [r1], #1
-		strb	r3, [r0]
-		ldrbge	r3, [r1], #1
-		strbge	r3, [r0]
-		ldrbgt	r3, [r1], #1
-		strbgt	r3, [r0]
-		subs	r2, r2, ip
-		bne	.Loutsb_aligned
-
-ENTRY(__raw_writesb)
-		teq	r2, #0		@ do we have to check for the zero len?
-		reteq	lr
-		ands	ip, r1, #3
-		bne	.Loutsb_align
-
-.Loutsb_aligned:
-		stmfd	sp!, {r4, r5, lr}
-
-		subs	r2, r2, #16
-		bmi	.Loutsb_no_16
-
-.Loutsb_16_lp:	ldmia	r1!, {r3, r4, r5, ip}
-		outword	r3
-		outword	r4
-		outword	r5
-		outword	ip
-		subs	r2, r2, #16
-		bpl	.Loutsb_16_lp
-
-		tst	r2, #15
-		ldmfdeq	sp!, {r4, r5, pc}
-
-.Loutsb_no_16:	tst	r2, #8
-		beq	.Loutsb_no_8
-
-		ldmia	r1!, {r3, r4}
-		outword	r3
-		outword	r4
-
-.Loutsb_no_8:	tst	r2, #4
-		beq	.Loutsb_no_4
-
-		ldr	r3, [r1], #4
-		outword	r3
-
-.Loutsb_no_4:	ands	r2, r2, #3
-		ldmfdeq	sp!, {r4, r5, pc}
-
-		cmp	r2, #2
-		ldrb	r3, [r1], #1
-		strb	r3, [r0]
-		ldrbge	r3, [r1], #1
-		strbge	r3, [r0]
-		ldrbgt	r3, [r1]
-		strbgt	r3, [r0]
-
-		ldmfd	sp!, {r4, r5, pc}
-ENDPROC(__raw_writesb)
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
deleted file mode 100644
index 89ef7be61421918f8c6cc74769ba7c22ca2e3382..0000000000000000000000000000000000000000
--- a/arch/arm/lib/io-writesl.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/io-writesl.S
- *
- *  Copyright (C) 1995-2000 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-ENTRY(__raw_writesl)
-		teq	r2, #0		@ do we have to check for the zero len?
-		reteq	lr
-		ands	ip, r1, #3
-		bne	3f
-
-		subs	r2, r2, #4
-		bmi	2f
-		stmfd	sp!, {r4, lr}
-1:		ldmia	r1!, {r3, r4, ip, lr}
-		subs	r2, r2, #4
-		str	r3, [r0, #0]
-		str	r4, [r0, #0]
-		str	ip, [r0, #0]
-		str	lr, [r0, #0]
-		bpl	1b
-		ldmfd	sp!, {r4, lr}
-2:		movs	r2, r2, lsl #31
-		ldmiacs	r1!, {r3, ip}
-		strcs	r3, [r0, #0]
-		ldrne	r3, [r1, #0]
-		strcs	ip, [r0, #0]
-		strne	r3, [r0, #0]
-		ret	lr
-
-3:		bic	r1, r1, #3
-		ldr	r3, [r1], #4
-		cmp	ip, #2
-		blt	5f
-		bgt	6f
-
-4:		mov	ip, r3, lspull #16
-		ldr	r3, [r1], #4
-		subs	r2, r2, #1
-		orr	ip, ip, r3, lspush #16
-		str	ip, [r0]
-		bne	4b
-		ret	lr
-
-5:		mov	ip, r3, lspull #8
-		ldr	r3, [r1], #4
-		subs	r2, r2, #1
-		orr	ip, ip, r3, lspush #24
-		str	ip, [r0]
-		bne	5b
-		ret	lr
-
-6:		mov	ip, r3, lspull #24
-		ldr	r3, [r1], #4
-		subs	r2, r2, #1
-		orr	ip, ip, r3, lspush #8
-		str	ip, [r0]
-		bne	6b
-		ret	lr
-ENDPROC(__raw_writesl)
diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S
deleted file mode 100644
index 4cabbee7f3b8278ddf165f6ec2ab065f0e26ca2f..0000000000000000000000000000000000000000
--- a/arch/arm/lib/io-writesw-armv3.S
+++ /dev/null
@@ -1,123 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/io-writesw-armv3.S
- *
- *  Copyright (C) 1995-2000 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-.Loutsw_bad_alignment:
-		adr	r0, .Loutsw_bad_align_msg
-		mov	r2, lr
-		b	panic
-.Loutsw_bad_align_msg:
-		.asciz	"outsw: bad buffer alignment (0x%p, lr=0x%08lX)\n"
-		.align
-
-.Loutsw_align:	tst	r1, #1
-		bne	.Loutsw_bad_alignment
-
-		add	r1, r1, #2
-
-		ldr	r3, [r1, #-4]
-		mov	r3, r3, lsr #16
-		orr	r3, r3, r3, lsl #16
-		str	r3, [r0]
-		subs	r2, r2, #1
-		reteq	lr
-
-ENTRY(__raw_writesw)
-		teq	r2, #0		@ do we have to check for the zero len?
-		reteq	lr
-		tst	r1, #3
-		bne	.Loutsw_align
-
-		stmfd	sp!, {r4, r5, r6, lr}
-
-		subs	r2, r2, #8
-		bmi	.Lno_outsw_8
-
-.Loutsw_8_lp:	ldmia	r1!, {r3, r4, r5, r6}
-
-		mov	ip, r3, lsl #16
-		orr	ip, ip, ip, lsr #16
-		str	ip, [r0]
-
-		mov	ip, r3, lsr #16
-		orr	ip, ip, ip, lsl #16
-		str	ip, [r0]
-
-		mov	ip, r4, lsl #16
-		orr	ip, ip, ip, lsr #16
-		str	ip, [r0]
-
-		mov	ip, r4, lsr #16
-		orr	ip, ip, ip, lsl #16
-		str	ip, [r0]
-
-		mov	ip, r5, lsl #16
-		orr	ip, ip, ip, lsr #16
-		str	ip, [r0]
-
-		mov	ip, r5, lsr #16
-		orr	ip, ip, ip, lsl #16
-		str	ip, [r0]
-
-		mov	ip, r6, lsl #16
-		orr	ip, ip, ip, lsr #16
-		str	ip, [r0]
-
-		mov	ip, r6, lsr #16
-		orr	ip, ip, ip, lsl #16
-		str	ip, [r0]
-
-		subs	r2, r2, #8
-		bpl	.Loutsw_8_lp
-
-		tst	r2, #7
-		ldmfdeq	sp!, {r4, r5, r6, pc}
-
-.Lno_outsw_8:	tst	r2, #4
-		beq	.Lno_outsw_4
-
-		ldmia	r1!, {r3, r4}
-
-		mov	ip, r3, lsl #16
-		orr	ip, ip, ip, lsr #16
-		str	ip, [r0]
-
-		mov	ip, r3, lsr #16
-		orr	ip, ip, ip, lsl #16
-		str	ip, [r0]
-
-		mov	ip, r4, lsl #16
-		orr	ip, ip, ip, lsr #16
-		str	ip, [r0]
-
-		mov	ip, r4, lsr #16
-		orr	ip, ip, ip, lsl #16
-		str	ip, [r0]
-
-.Lno_outsw_4:	tst	r2, #2
-		beq	.Lno_outsw_2
-
-		ldr	r3, [r1], #4
-
-		mov	ip, r3, lsl #16
-		orr	ip, ip, ip, lsr #16
-		str	ip, [r0]
-
-		mov	ip, r3, lsr #16
-		orr	ip, ip, ip, lsl #16
-		str	ip, [r0]
-
-.Lno_outsw_2:	tst	r2, #1
-
-		ldrne	r3, [r1]
-
-		movne	ip, r3, lsl #16
-		orrne	ip, ip, ip, lsr #16
-		strne	ip, [r0]
-
-		ldmfd	sp!, {r4, r5, r6, pc}
diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S
deleted file mode 100644
index 12eec53266c74dbd569fb4a3d673794faa5854c2..0000000000000000000000000000000000000000
--- a/arch/arm/lib/io-writesw-armv4.S
+++ /dev/null
@@ -1,97 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/io-writesw-armv4.S
- *
- *  Copyright (C) 1995-2000 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-		.macro	outword, rd
-#ifndef __ARMEB__
-		strh	\rd, [r0]
-		mov	\rd, \rd, lsr #16
-		strh	\rd, [r0]
-#else
-		mov	lr, \rd, lsr #16
-		strh	lr, [r0]
-		strh	\rd, [r0]
-#endif
-		.endm
-
-.Loutsw_align:	movs	ip, r1, lsl #31
-		bne	.Loutsw_noalign
-
-		ldrh	r3, [r1], #2
-		sub	r2, r2, #1
-		strh	r3, [r0]
-
-ENTRY(__raw_writesw)
-		teq	r2, #0
-		reteq	lr
-		ands	r3, r1, #3
-		bne	.Loutsw_align
-
-		stmfd	sp!, {r4, r5, lr}
-
-		subs	r2, r2, #8
-		bmi	.Lno_outsw_8
-
-.Loutsw_8_lp:	ldmia	r1!, {r3, r4, r5, ip}
-		subs	r2, r2, #8
-		outword	r3
-		outword	r4
-		outword	r5
-		outword	ip
-		bpl	.Loutsw_8_lp
-
-.Lno_outsw_8:	tst	r2, #4
-		beq	.Lno_outsw_4
-
-		ldmia	r1!, {r3, ip}
-		outword	r3
-		outword	ip
-
-.Lno_outsw_4:	movs	r2, r2, lsl #31
-		bcc	.Lno_outsw_2
-
-		ldr	r3, [r1], #4
-		outword	r3
-
-.Lno_outsw_2:	ldrhne	r3, [r1]
-		strhne	r3, [r0]
-
-		ldmfd	sp!, {r4, r5, pc}
-
-#ifdef __ARMEB__
-#define pull_hbyte0	lsl #8
-#define push_hbyte1	lsr #24
-#else
-#define pull_hbyte0	lsr #24
-#define push_hbyte1	lsl #8
-#endif
-
-.Loutsw_noalign:
- ARM(		ldr	r3, [r1, -r3]!	)
- THUMB(		rsb	r3, r3, #0	)
- THUMB(		ldr	r3, [r1, r3]	)
- THUMB(		sub	r1, r3		)
-		subcs	r2, r2, #1
-		bcs	2f
-		subs	r2, r2, #2
-		bmi	3f
-
-1:		mov	ip, r3, lsr #8
-		strh	ip, [r0]
-2:		mov	ip, r3, pull_hbyte0
-		ldr	r3, [r1, #4]!
-		subs	r2, r2, #2
-		orr	ip, ip, r3, push_hbyte1
-		strh	ip, [r0]
-		bpl	1b
-
-		tst	r2, #1
-3:		movne	ip, r3, lsr #8
-		strhne	ip, [r0]
-		ret	lr
-ENDPROC(__raw_writesw)
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
deleted file mode 100644
index c23f9d9e29704be4c834185a22d8ca9eefef7013..0000000000000000000000000000000000000000
--- a/arch/arm/lib/lib1funcs.S
+++ /dev/null
@@ -1,371 +0,0 @@
-/*
- * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
- *
- * Author: Nicolas Pitre <nico@fluxnic.net>
- *   - contributed to gcc-3.4 on Sep 30, 2003
- *   - adapted for the Linux kernel on Oct 2, 2003
- */
-
-/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
-later version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file into combinations with other programs,
-and to distribute those combinations without any restriction coming
-from the use of this file.  (The General Public License restrictions
-do apply in other respects; for example, they cover modification of
-the file, and distribution when not linked into a combine
-executable.)
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/unwind.h>
-
-.macro ARM_DIV_BODY dividend, divisor, result, curbit
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-	clz	\curbit, \divisor
-	clz	\result, \dividend
-	sub	\result, \curbit, \result
-	mov	\curbit, #1
-	mov	\divisor, \divisor, lsl \result
-	mov	\curbit, \curbit, lsl \result
-	mov	\result, #0
-	
-#else
-
-	@ Initially shift the divisor left 3 bits if possible,
-	@ set curbit accordingly.  This allows for curbit to be located
-	@ at the left end of each 4 bit nibbles in the division loop
-	@ to save one loop in most cases.
-	tst	\divisor, #0xe0000000
-	moveq	\divisor, \divisor, lsl #3
-	moveq	\curbit, #8
-	movne	\curbit, #1
-
-	@ Unless the divisor is very big, shift it up in multiples of
-	@ four bits, since this is the amount of unwinding in the main
-	@ division loop.  Continue shifting until the divisor is 
-	@ larger than the dividend.
-1:	cmp	\divisor, #0x10000000
-	cmplo	\divisor, \dividend
-	movlo	\divisor, \divisor, lsl #4
-	movlo	\curbit, \curbit, lsl #4
-	blo	1b
-
-	@ For very big divisors, we must shift it a bit at a time, or
-	@ we will be in danger of overflowing.
-1:	cmp	\divisor, #0x80000000
-	cmplo	\divisor, \dividend
-	movlo	\divisor, \divisor, lsl #1
-	movlo	\curbit, \curbit, lsl #1
-	blo	1b
-
-	mov	\result, #0
-
-#endif
-
-	@ Division loop
-1:	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-	orrhs	\result,   \result,   \curbit
-	cmp	\dividend, \divisor,  lsr #1
-	subhs	\dividend, \dividend, \divisor, lsr #1
-	orrhs	\result,   \result,   \curbit,  lsr #1
-	cmp	\dividend, \divisor,  lsr #2
-	subhs	\dividend, \dividend, \divisor, lsr #2
-	orrhs	\result,   \result,   \curbit,  lsr #2
-	cmp	\dividend, \divisor,  lsr #3
-	subhs	\dividend, \dividend, \divisor, lsr #3
-	orrhs	\result,   \result,   \curbit,  lsr #3
-	cmp	\dividend, #0			@ Early termination?
-	movsne	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
-	movne	\divisor,  \divisor, lsr #4
-	bne	1b
-
-.endm
-
-
-.macro ARM_DIV2_ORDER divisor, order
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-	clz	\order, \divisor
-	rsb	\order, \order, #31
-
-#else
-
-	cmp	\divisor, #(1 << 16)
-	movhs	\divisor, \divisor, lsr #16
-	movhs	\order, #16
-	movlo	\order, #0
-
-	cmp	\divisor, #(1 << 8)
-	movhs	\divisor, \divisor, lsr #8
-	addhs	\order, \order, #8
-
-	cmp	\divisor, #(1 << 4)
-	movhs	\divisor, \divisor, lsr #4
-	addhs	\order, \order, #4
-
-	cmp	\divisor, #(1 << 2)
-	addhi	\order, \order, #3
-	addls	\order, \order, \divisor, lsr #1
-
-#endif
-
-.endm
-
-
-.macro ARM_MOD_BODY dividend, divisor, order, spare
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-	clz	\order, \divisor
-	clz	\spare, \dividend
-	sub	\order, \order, \spare
-	mov	\divisor, \divisor, lsl \order
-
-#else
-
-	mov	\order, #0
-
-	@ Unless the divisor is very big, shift it up in multiples of
-	@ four bits, since this is the amount of unwinding in the main
-	@ division loop.  Continue shifting until the divisor is 
-	@ larger than the dividend.
-1:	cmp	\divisor, #0x10000000
-	cmplo	\divisor, \dividend
-	movlo	\divisor, \divisor, lsl #4
-	addlo	\order, \order, #4
-	blo	1b
-
-	@ For very big divisors, we must shift it a bit at a time, or
-	@ we will be in danger of overflowing.
-1:	cmp	\divisor, #0x80000000
-	cmplo	\divisor, \dividend
-	movlo	\divisor, \divisor, lsl #1
-	addlo	\order, \order, #1
-	blo	1b
-
-#endif
-
-	@ Perform all needed subtractions to keep only the reminder.
-	@ Do comparisons in batch of 4 first.
-	subs	\order, \order, #3		@ yes, 3 is intended here
-	blt	2f
-
-1:	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-	cmp	\dividend, \divisor,  lsr #1
-	subhs	\dividend, \dividend, \divisor, lsr #1
-	cmp	\dividend, \divisor,  lsr #2
-	subhs	\dividend, \dividend, \divisor, lsr #2
-	cmp	\dividend, \divisor,  lsr #3
-	subhs	\dividend, \dividend, \divisor, lsr #3
-	cmp	\dividend, #1
-	mov	\divisor, \divisor, lsr #4
-	subsge	\order, \order, #4
-	bge	1b
-
-	tst	\order, #3
-	teqne	\dividend, #0
-	beq	5f
-
-	@ Either 1, 2 or 3 comparison/subtractions are left.
-2:	cmn	\order, #2
-	blt	4f
-	beq	3f
-	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-	mov	\divisor,  \divisor,  lsr #1
-3:	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-	mov	\divisor,  \divisor,  lsr #1
-4:	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-5:
-.endm
-
-
-#ifdef CONFIG_ARM_PATCH_IDIV
-	.align	3
-#endif
-
-ENTRY(__udivsi3)
-ENTRY(__aeabi_uidiv)
-UNWIND(.fnstart)
-
-	subs	r2, r1, #1
-	reteq	lr
-	bcc	Ldiv0
-	cmp	r0, r1
-	bls	11f
-	tst	r1, r2
-	beq	12f
-
-	ARM_DIV_BODY r0, r1, r2, r3
-
-	mov	r0, r2
-	ret	lr
-
-11:	moveq	r0, #1
-	movne	r0, #0
-	ret	lr
-
-12:	ARM_DIV2_ORDER r1, r2
-
-	mov	r0, r0, lsr r2
-	ret	lr
-
-UNWIND(.fnend)
-ENDPROC(__udivsi3)
-ENDPROC(__aeabi_uidiv)
-
-ENTRY(__umodsi3)
-UNWIND(.fnstart)
-
-	subs	r2, r1, #1			@ compare divisor with 1
-	bcc	Ldiv0
-	cmpne	r0, r1				@ compare dividend with divisor
-	moveq   r0, #0
-	tsthi	r1, r2				@ see if divisor is power of 2
-	andeq	r0, r0, r2
-	retls	lr
-
-	ARM_MOD_BODY r0, r1, r2, r3
-
-	ret	lr
-
-UNWIND(.fnend)
-ENDPROC(__umodsi3)
-
-#ifdef CONFIG_ARM_PATCH_IDIV
-	.align 3
-#endif
-
-ENTRY(__divsi3)
-ENTRY(__aeabi_idiv)
-UNWIND(.fnstart)
-
-	cmp	r1, #0
-	eor	ip, r0, r1			@ save the sign of the result.
-	beq	Ldiv0
-	rsbmi	r1, r1, #0			@ loops below use unsigned.
-	subs	r2, r1, #1			@ division by 1 or -1 ?
-	beq	10f
-	movs	r3, r0
-	rsbmi	r3, r0, #0			@ positive dividend value
-	cmp	r3, r1
-	bls	11f
-	tst	r1, r2				@ divisor is power of 2 ?
-	beq	12f
-
-	ARM_DIV_BODY r3, r1, r0, r2
-
-	cmp	ip, #0
-	rsbmi	r0, r0, #0
-	ret	lr
-
-10:	teq	ip, r0				@ same sign ?
-	rsbmi	r0, r0, #0
-	ret	lr
-
-11:	movlo	r0, #0
-	moveq	r0, ip, asr #31
-	orreq	r0, r0, #1
-	ret	lr
-
-12:	ARM_DIV2_ORDER r1, r2
-
-	cmp	ip, #0
-	mov	r0, r3, lsr r2
-	rsbmi	r0, r0, #0
-	ret	lr
-
-UNWIND(.fnend)
-ENDPROC(__divsi3)
-ENDPROC(__aeabi_idiv)
-
-ENTRY(__modsi3)
-UNWIND(.fnstart)
-
-	cmp	r1, #0
-	beq	Ldiv0
-	rsbmi	r1, r1, #0			@ loops below use unsigned.
-	movs	ip, r0				@ preserve sign of dividend
-	rsbmi	r0, r0, #0			@ if negative make positive
-	subs	r2, r1, #1			@ compare divisor with 1
-	cmpne	r0, r1				@ compare dividend with divisor
-	moveq	r0, #0
-	tsthi	r1, r2				@ see if divisor is power of 2
-	andeq	r0, r0, r2
-	bls	10f
-
-	ARM_MOD_BODY r0, r1, r2, r3
-
-10:	cmp	ip, #0
-	rsbmi	r0, r0, #0
-	ret	lr
-
-UNWIND(.fnend)
-ENDPROC(__modsi3)
-
-#ifdef CONFIG_AEABI
-
-ENTRY(__aeabi_uidivmod)
-UNWIND(.fnstart)
-UNWIND(.save {r0, r1, ip, lr}	)
-
-	stmfd	sp!, {r0, r1, ip, lr}
-	bl	__aeabi_uidiv
-	ldmfd	sp!, {r1, r2, ip, lr}
-	mul	r3, r0, r2
-	sub	r1, r1, r3
-	ret	lr
-
-UNWIND(.fnend)
-ENDPROC(__aeabi_uidivmod)
-
-ENTRY(__aeabi_idivmod)
-UNWIND(.fnstart)
-UNWIND(.save {r0, r1, ip, lr}	)
-	stmfd	sp!, {r0, r1, ip, lr}
-	bl	__aeabi_idiv
-	ldmfd	sp!, {r1, r2, ip, lr}
-	mul	r3, r0, r2
-	sub	r1, r1, r3
-	ret	lr
-
-UNWIND(.fnend)
-ENDPROC(__aeabi_idivmod)
-
-#endif
-
-Ldiv0:
-UNWIND(.fnstart)
-UNWIND(.pad #4)
-UNWIND(.save {lr})
-	str	lr, [sp, #-8]!
-	bl	__div0
-	mov	r0, #0			@ About as wrong as it could be.
-	ldr	pc, [sp], #8
-UNWIND(.fnend)
-ENDPROC(Ldiv0)
diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S
deleted file mode 100644
index 922dcd88b02b7804fca63f0d891e9a7ed6cbf83e..0000000000000000000000000000000000000000
--- a/arch/arm/lib/lshrdi3.S
+++ /dev/null
@@ -1,54 +0,0 @@
-/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
-   Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
-later version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file into combinations with other programs,
-and to distribute those combinations without any restriction coming
-from the use of this file.  (The General Public License restrictions
-do apply in other respects; for example, they cover modification of
-the file, and distribution when not linked into a combine
-executable.)
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA.  */
-
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#ifdef __ARMEB__
-#define al r1
-#define ah r0
-#else
-#define al r0
-#define ah r1
-#endif
-
-ENTRY(__lshrdi3)
-ENTRY(__aeabi_llsr)
-
-	subs	r3, r2, #32
-	rsb	ip, r2, #32
-	movmi	al, al, lsr r2
-	movpl	al, ah, lsr r3
- ARM(	orrmi	al, al, ah, lsl ip	)
- THUMB(	lslmi	r3, ah, ip		)
- THUMB(	orrmi	al, al, r3		)
-	mov	ah, ah, lsr r2
-	ret	lr
-
-ENDPROC(__lshrdi3)
-ENDPROC(__aeabi_llsr)
diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S
deleted file mode 100644
index 95bedafd0330561f405a0d6af80bd156e6fa2c09..0000000000000000000000000000000000000000
--- a/arch/arm/lib/memchr.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/memchr.S
- *
- *  Copyright (C) 1995-2000 Russell King
- *
- *  ASM optimised string functions
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.text
-	.align	5
-ENTRY(memchr)
-1:	subs	r2, r2, #1
-	bmi	2f
-	ldrb	r3, [r0], #1
-	teq	r3, r1
-	bne	1b
-	sub	r0, r0, #1
-2:	movne	r0, #0
-	ret	lr
-ENDPROC(memchr)
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
deleted file mode 100644
index 09a333153dc66409e4cbf81cb657253fa24fb428..0000000000000000000000000000000000000000
--- a/arch/arm/lib/memcpy.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/memcpy.S
- *
- *  Author:	Nicolas Pitre
- *  Created:	Sep 28, 2005
- *  Copyright:	MontaVista Software, Inc.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/unwind.h>
-
-#define LDR1W_SHIFT	0
-#define STR1W_SHIFT	0
-
-	.macro ldr1w ptr reg abort
-	W(ldr) \reg, [\ptr], #4
-	.endm
-
-	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
-	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
-	.endm
-
-	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
-	.endm
-
-	.macro ldr1b ptr reg cond=al abort
-	ldrb\cond \reg, [\ptr], #1
-	.endm
-
-	.macro str1w ptr reg abort
-	W(str) \reg, [\ptr], #4
-	.endm
-
-	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-	stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
-	.endm
-
-	.macro str1b ptr reg cond=al abort
-	strb\cond \reg, [\ptr], #1
-	.endm
-
-	.macro enter reg1 reg2
-	stmdb sp!, {r0, \reg1, \reg2}
-	.endm
-
-	.macro usave reg1 reg2
-	UNWIND(	.save	{r0, \reg1, \reg2}	)
-	.endm
-
-	.macro exit reg1 reg2
-	ldmfd sp!, {r0, \reg1, \reg2}
-	.endm
-
-	.text
-
-/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
-
-ENTRY(mmiocpy)
-ENTRY(memcpy)
-
-#include "copy_template.S"
-
-ENDPROC(memcpy)
-ENDPROC(mmiocpy)
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
deleted file mode 100644
index b50e5770fb44de25d37a6818f468c9ec3795ba91..0000000000000000000000000000000000000000
--- a/arch/arm/lib/memmove.S
+++ /dev/null
@@ -1,224 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/memmove.S
- *
- *  Author:	Nicolas Pitre
- *  Created:	Sep 28, 2005
- *  Copyright:	(C) MontaVista Software Inc.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/unwind.h>
-
-		.text
-
-/*
- * Prototype: void *memmove(void *dest, const void *src, size_t n);
- *
- * Note:
- *
- * If the memory regions don't overlap, we simply branch to memcpy which is
- * normally a bit faster. Otherwise the copy is done going downwards.  This
- * is a transposition of the code from copy_template.S but with the copy
- * occurring in the opposite direction.
- */
-
-ENTRY(memmove)
-	UNWIND(	.fnstart			)
-
-		subs	ip, r0, r1
-		cmphi	r2, ip
-		bls	memcpy
-
-		stmfd	sp!, {r0, r4, lr}
-	UNWIND(	.fnend				)
-
-	UNWIND(	.fnstart			)
-	UNWIND(	.save	{r0, r4, lr}		) @ in first stmfd block
-		add	r1, r1, r2
-		add	r0, r0, r2
-		subs	r2, r2, #4
-		blt	8f
-		ands	ip, r0, #3
-	PLD(	pld	[r1, #-4]		)
-		bne	9f
-		ands	ip, r1, #3
-		bne	10f
-
-1:		subs	r2, r2, #(28)
-		stmfd	sp!, {r5 - r8}
-	UNWIND(	.fnend				)
-
-	UNWIND(	.fnstart			)
-	UNWIND(	.save	{r0, r4, lr}		)
-	UNWIND(	.save	{r5 - r8}		) @ in second stmfd block
-		blt	5f
-
-	CALGN(	ands	ip, r0, #31		)
-	CALGN(	sbcsne	r4, ip, r2		)  @ C is always set here
-	CALGN(	bcs	2f			)
-	CALGN(	adr	r4, 6f			)
-	CALGN(	subs	r2, r2, ip		)  @ C is set here
-	CALGN(	rsb	ip, ip, #32		)
-	CALGN(	add	pc, r4, ip		)
-
-	PLD(	pld	[r1, #-4]		)
-2:	PLD(	subs	r2, r2, #96		)
-	PLD(	pld	[r1, #-32]		)
-	PLD(	blt	4f			)
-	PLD(	pld	[r1, #-64]		)
-	PLD(	pld	[r1, #-96]		)
-
-3:	PLD(	pld	[r1, #-128]		)
-4:		ldmdb	r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
-		subs	r2, r2, #32
-		stmdb	r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
-		bge	3b
-	PLD(	cmn	r2, #96			)
-	PLD(	bge	4b			)
-
-5:		ands	ip, r2, #28
-		rsb	ip, ip, #32
-		addne	pc, pc, ip		@ C is always clear here
-		b	7f
-6:		W(nop)
-		W(ldr)	r3, [r1, #-4]!
-		W(ldr)	r4, [r1, #-4]!
-		W(ldr)	r5, [r1, #-4]!
-		W(ldr)	r6, [r1, #-4]!
-		W(ldr)	r7, [r1, #-4]!
-		W(ldr)	r8, [r1, #-4]!
-		W(ldr)	lr, [r1, #-4]!
-
-		add	pc, pc, ip
-		nop
-		W(nop)
-		W(str)	r3, [r0, #-4]!
-		W(str)	r4, [r0, #-4]!
-		W(str)	r5, [r0, #-4]!
-		W(str)	r6, [r0, #-4]!
-		W(str)	r7, [r0, #-4]!
-		W(str)	r8, [r0, #-4]!
-		W(str)	lr, [r0, #-4]!
-
-	CALGN(	bcs	2b			)
-
-7:		ldmfd	sp!, {r5 - r8}
-	UNWIND(	.fnend				) @ end of second stmfd block
-
-	UNWIND(	.fnstart			)
-	UNWIND(	.save	{r0, r4, lr}		) @ still in first stmfd block
-
-8:		movs	r2, r2, lsl #31
-		ldrbne	r3, [r1, #-1]!
-		ldrbcs	r4, [r1, #-1]!
-		ldrbcs	ip, [r1, #-1]
-		strbne	r3, [r0, #-1]!
-		strbcs	r4, [r0, #-1]!
-		strbcs	ip, [r0, #-1]
-		ldmfd	sp!, {r0, r4, pc}
-
-9:		cmp	ip, #2
-		ldrbgt	r3, [r1, #-1]!
-		ldrbge	r4, [r1, #-1]!
-		ldrb	lr, [r1, #-1]!
-		strbgt	r3, [r0, #-1]!
-		strbge	r4, [r0, #-1]!
-		subs	r2, r2, ip
-		strb	lr, [r0, #-1]!
-		blt	8b
-		ands	ip, r1, #3
-		beq	1b
-
-10:		bic	r1, r1, #3
-		cmp	ip, #2
-		ldr	r3, [r1, #0]
-		beq	17f
-		blt	18f
-	UNWIND(	.fnend				)
-
-
-		.macro	backward_copy_shift push pull
-
-	UNWIND(	.fnstart			)
-	UNWIND(	.save	{r0, r4, lr}		) @ still in first stmfd block
-		subs	r2, r2, #28
-		blt	14f
-
-	CALGN(	ands	ip, r0, #31		)
-	CALGN(	sbcsne	r4, ip, r2		)  @ C is always set here
-	CALGN(	subcc	r2, r2, ip		)
-	CALGN(	bcc	15f			)
-
-11:		stmfd	sp!, {r5 - r9}
-	UNWIND(	.fnend				)
-
-	UNWIND(	.fnstart			)
-	UNWIND(	.save	{r0, r4, lr}		)
-	UNWIND(	.save	{r5 - r9}		) @ in new second stmfd block
-
-	PLD(	pld	[r1, #-4]		)
-	PLD(	subs	r2, r2, #96		)
-	PLD(	pld	[r1, #-32]		)
-	PLD(	blt	13f			)
-	PLD(	pld	[r1, #-64]		)
-	PLD(	pld	[r1, #-96]		)
-
-12:	PLD(	pld	[r1, #-128]		)
-13:		ldmdb   r1!, {r7, r8, r9, ip}
-		mov     lr, r3, lspush #\push
-		subs    r2, r2, #32
-		ldmdb   r1!, {r3, r4, r5, r6}
-		orr     lr, lr, ip, lspull #\pull
-		mov     ip, ip, lspush #\push
-		orr     ip, ip, r9, lspull #\pull
-		mov     r9, r9, lspush #\push
-		orr     r9, r9, r8, lspull #\pull
-		mov     r8, r8, lspush #\push
-		orr     r8, r8, r7, lspull #\pull
-		mov     r7, r7, lspush #\push
-		orr     r7, r7, r6, lspull #\pull
-		mov     r6, r6, lspush #\push
-		orr     r6, r6, r5, lspull #\pull
-		mov     r5, r5, lspush #\push
-		orr     r5, r5, r4, lspull #\pull
-		mov     r4, r4, lspush #\push
-		orr     r4, r4, r3, lspull #\pull
-		stmdb   r0!, {r4 - r9, ip, lr}
-		bge	12b
-	PLD(	cmn	r2, #96			)
-	PLD(	bge	13b			)
-
-		ldmfd	sp!, {r5 - r9}
-	UNWIND(	.fnend				) @ end of the second stmfd block
-
-	UNWIND(	.fnstart			)
-	UNWIND(	.save {r0, r4, lr}		) @ still in first stmfd block
-
-14:		ands	ip, r2, #28
-		beq	16f
-
-15:		mov     lr, r3, lspush #\push
-		ldr	r3, [r1, #-4]!
-		subs	ip, ip, #4
-		orr	lr, lr, r3, lspull #\pull
-		str	lr, [r0, #-4]!
-		bgt	15b
-	CALGN(	cmp	r2, #0			)
-	CALGN(	bge	11b			)
-
-16:		add	r1, r1, #(\pull / 8)
-		b	8b
-	UNWIND(	.fnend				)
-
-		.endm
-
-
-		backward_copy_shift	push=8	pull=24
-
-17:		backward_copy_shift	push=16	pull=16
-
-18:		backward_copy_shift	push=24	pull=8
-
-ENDPROC(memmove)
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
deleted file mode 100644
index 6ca4535c47fb63dd48fc15c9c91a60ee34552def..0000000000000000000000000000000000000000
--- a/arch/arm/lib/memset.S
+++ /dev/null
@@ -1,146 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/memset.S
- *
- *  Copyright (C) 1995-2000 Russell King
- *
- *  ASM optimised string functions
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/unwind.h>
-
-	.text
-	.align	5
-
-ENTRY(mmioset)
-ENTRY(memset)
-UNWIND( .fnstart         )
-	ands	r3, r0, #3		@ 1 unaligned?
-	mov	ip, r0			@ preserve r0 as return value
-	bne	6f			@ 1
-/*
- * we know that the pointer in ip is aligned to a word boundary.
- */
-1:	orr	r1, r1, r1, lsl #8
-	orr	r1, r1, r1, lsl #16
-	mov	r3, r1
-7:	cmp	r2, #16
-	blt	4f
-
-#if ! CALGN(1)+0
-
-/*
- * We need 2 extra registers for this loop - use r8 and the LR
- */
-	stmfd	sp!, {r8, lr}
-UNWIND( .fnend              )
-UNWIND( .fnstart            )
-UNWIND( .save {r8, lr}      )
-	mov	r8, r1
-	mov	lr, r3
-
-2:	subs	r2, r2, #64
-	stmiage	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time.
-	stmiage	ip!, {r1, r3, r8, lr}
-	stmiage	ip!, {r1, r3, r8, lr}
-	stmiage	ip!, {r1, r3, r8, lr}
-	bgt	2b
-	ldmfdeq	sp!, {r8, pc}		@ Now <64 bytes to go.
-/*
- * No need to correct the count; we're only testing bits from now on
- */
-	tst	r2, #32
-	stmiane	ip!, {r1, r3, r8, lr}
-	stmiane	ip!, {r1, r3, r8, lr}
-	tst	r2, #16
-	stmiane	ip!, {r1, r3, r8, lr}
-	ldmfd	sp!, {r8, lr}
-UNWIND( .fnend              )
-
-#else
-
-/*
- * This version aligns the destination pointer in order to write
- * whole cache lines at once.
- */
-
-	stmfd	sp!, {r4-r8, lr}
-UNWIND( .fnend                 )
-UNWIND( .fnstart               )
-UNWIND( .save {r4-r8, lr}      )
-	mov	r4, r1
-	mov	r5, r3
-	mov	r6, r1
-	mov	r7, r3
-	mov	r8, r1
-	mov	lr, r3
-
-	cmp	r2, #96
-	tstgt	ip, #31
-	ble	3f
-
-	and	r8, ip, #31
-	rsb	r8, r8, #32
-	sub	r2, r2, r8
-	movs	r8, r8, lsl #(32 - 4)
-	stmiacs	ip!, {r4, r5, r6, r7}
-	stmiami	ip!, {r4, r5}
-	tst	r8, #(1 << 30)
-	mov	r8, r1
-	strne	r1, [ip], #4
-
-3:	subs	r2, r2, #64
-	stmiage	ip!, {r1, r3-r8, lr}
-	stmiage	ip!, {r1, r3-r8, lr}
-	bgt	3b
-	ldmfdeq	sp!, {r4-r8, pc}
-
-	tst	r2, #32
-	stmiane	ip!, {r1, r3-r8, lr}
-	tst	r2, #16
-	stmiane	ip!, {r4-r7}
-	ldmfd	sp!, {r4-r8, lr}
-UNWIND( .fnend                 )
-
-#endif
-
-UNWIND( .fnstart            )
-4:	tst	r2, #8
-	stmiane	ip!, {r1, r3}
-	tst	r2, #4
-	strne	r1, [ip], #4
-/*
- * When we get here, we've got less than 4 bytes to set.  We
- * may have an unaligned pointer as well.
- */
-5:	tst	r2, #2
-	strbne	r1, [ip], #1
-	strbne	r1, [ip], #1
-	tst	r2, #1
-	strbne	r1, [ip], #1
-	ret	lr
-
-6:	subs	r2, r2, #4		@ 1 do we have enough
-	blt	5b			@ 1 bytes to align with?
-	cmp	r3, #2			@ 1
-	strblt	r1, [ip], #1		@ 1
-	strble	r1, [ip], #1		@ 1
-	strb	r1, [ip], #1		@ 1
-	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
-	b	1b
-UNWIND( .fnend   )
-ENDPROC(memset)
-ENDPROC(mmioset)
-
-ENTRY(__memset32)
-UNWIND( .fnstart         )
-	mov	r3, r1			@ copy r1 to r3 and fall into memset64
-UNWIND( .fnend   )
-ENDPROC(__memset32)
-ENTRY(__memset64)
-UNWIND( .fnstart         )
-	mov	ip, r0			@ preserve r0 as return value
-	b	7b			@ jump into the middle of memset
-UNWIND( .fnend   )
-ENDPROC(__memset64)
diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S
deleted file mode 100644
index 8362fe6c0de9d71a344d913c3fada986e9e5234f..0000000000000000000000000000000000000000
--- a/arch/arm/lib/muldi3.S
+++ /dev/null
@@ -1,45 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/muldi3.S
- *
- *  Author:     Nicolas Pitre
- *  Created:    Oct 19, 2005
- *  Copyright:  Monta Vista Software, Inc.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#ifdef __ARMEB__
-#define xh r0
-#define xl r1
-#define yh r2
-#define yl r3
-#else
-#define xl r0
-#define xh r1
-#define yl r2
-#define yh r3
-#endif
-
-ENTRY(__muldi3)
-ENTRY(__aeabi_lmul)
-
-	mul	xh, yl, xh
-	mla	xh, xl, yh, xh
-	mov	ip, xl, lsr #16
-	mov	yh, yl, lsr #16
-	bic	xl, xl, ip, lsl #16
-	bic	yl, yl, yh, lsl #16
-	mla	xh, yh, ip, xh
-	mul	yh, xl, yh
-	mul	xl, yl, xl
-	mul	ip, yl, ip
-	adds	xl, xl, yh, lsl #16
-	adc	xh, xh, yh, lsr #16
-	adds	xl, xl, ip, lsl #16
-	adc	xh, xh, ip, lsr #16
-	ret	lr
-
-ENDPROC(__muldi3)
-ENDPROC(__aeabi_lmul)
diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S
deleted file mode 100644
index bdd8836dc5c25ecfcb51efbacf4bde858daac5f8..0000000000000000000000000000000000000000
--- a/arch/arm/lib/putuser.S
+++ /dev/null
@@ -1,95 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/putuser.S
- *
- *  Copyright (C) 2001 Russell King
- *
- *  Idea from x86 version, (C) Copyright 1998 Linus Torvalds
- *
- * These functions have a non-standard call interface to make
- * them more efficient, especially as they return an error
- * value in addition to the "real" return value.
- *
- * __put_user_X
- *
- * Inputs:	r0 contains the address
- *		r1 contains the address limit, which must be preserved
- *		r2, r3 contains the value
- * Outputs:	r0 is the error code
- *		lr corrupted
- *
- * No other registers must be altered.  (see <asm/uaccess.h>
- * for specific ASM register usage).
- *
- * Note that ADDR_LIMIT is either 0 or 0xc0000000
- * Note also that it is intended that __put_user_bad is not global.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-#include <asm/domain.h>
-
-ENTRY(__put_user_1)
-	check_uaccess r0, 1, r1, ip, __put_user_bad
-1: TUSER(strb)	r2, [r0]
-	mov	r0, #0
-	ret	lr
-ENDPROC(__put_user_1)
-
-ENTRY(__put_user_2)
-	check_uaccess r0, 2, r1, ip, __put_user_bad
-#if __LINUX_ARM_ARCH__ >= 6
-
-2: TUSER(strh)	r2, [r0]
-
-#else
-
-	mov	ip, r2, lsr #8
-#ifndef __ARMEB__
-2: TUSER(strb)	r2, [r0], #1
-3: TUSER(strb)	ip, [r0]
-#else
-2: TUSER(strb)	ip, [r0], #1
-3: TUSER(strb)	r2, [r0]
-#endif
-
-#endif /* __LINUX_ARM_ARCH__ >= 6 */
-	mov	r0, #0
-	ret	lr
-ENDPROC(__put_user_2)
-
-ENTRY(__put_user_4)
-	check_uaccess r0, 4, r1, ip, __put_user_bad
-4: TUSER(str)	r2, [r0]
-	mov	r0, #0
-	ret	lr
-ENDPROC(__put_user_4)
-
-ENTRY(__put_user_8)
-	check_uaccess r0, 8, r1, ip, __put_user_bad
-#ifdef CONFIG_THUMB2_KERNEL
-5: TUSER(str)	r2, [r0]
-6: TUSER(str)	r3, [r0, #4]
-#else
-5: TUSER(str)	r2, [r0], #4
-6: TUSER(str)	r3, [r0]
-#endif
-	mov	r0, #0
-	ret	lr
-ENDPROC(__put_user_8)
-
-__put_user_bad:
-	mov	r0, #-EFAULT
-	ret	lr
-ENDPROC(__put_user_bad)
-
-.pushsection __ex_table, "a"
-	.long	1b, __put_user_bad
-	.long	2b, __put_user_bad
-#if __LINUX_ARM_ARCH__ < 6
-	.long	3b, __put_user_bad
-#endif
-	.long	4b, __put_user_bad
-	.long	5b, __put_user_bad
-	.long	6b, __put_user_bad
-.popsection
diff --git a/arch/arm/lib/setbit.S b/arch/arm/lib/setbit.S
deleted file mode 100644
index 19a96f43f4bbe71ddb697fb6af760f7ba45df499..0000000000000000000000000000000000000000
--- a/arch/arm/lib/setbit.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/setbit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include "bitops.h"
-		.text
-
-bitop	_set_bit, orr
diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S
deleted file mode 100644
index 09e2cc8a89501a0f9ace6f81cb2a3256d68cd105..0000000000000000000000000000000000000000
--- a/arch/arm/lib/strchr.S
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/strchr.S
- *
- *  Copyright (C) 1995-2000 Russell King
- *
- *  ASM optimised string functions
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-		.text
-		.align	5
-ENTRY(strchr)
-		and	r1, r1, #0xff
-1:		ldrb	r2, [r0], #1
-		teq	r2, r1
-		teqne	r2, #0
-		bne	1b
-		teq	r2, r1
-		movne	r0, #0
-		subeq	r0, r0, #1
-		ret	lr
-ENDPROC(strchr)
diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S
deleted file mode 100644
index 5e87247d1e8bf46eabe27015d5921df2a8d0f94b..0000000000000000000000000000000000000000
--- a/arch/arm/lib/strrchr.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/strrchr.S
- *
- *  Copyright (C) 1995-2000 Russell King
- *
- *  ASM optimised string functions
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-		.text
-		.align	5
-ENTRY(strrchr)
-		mov	r3, #0
-1:		ldrb	r2, [r0], #1
-		teq	r2, r1
-		subeq	r3, r0, #1
-		teq	r2, #0
-		bne	1b
-		mov	r0, r3
-		ret	lr
-ENDPROC(strrchr)
diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S
deleted file mode 100644
index 4ebecc67e6e044ab6fc38fb254314a2cd0332c83..0000000000000000000000000000000000000000
--- a/arch/arm/lib/testchangebit.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/testchangebit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include "bitops.h"
-                .text
-
-testop	_test_and_change_bit, eor, str
diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S
deleted file mode 100644
index 009afa0f5b4a73fcafac5b2736aa90258fe1f77d..0000000000000000000000000000000000000000
--- a/arch/arm/lib/testclearbit.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/testclearbit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include "bitops.h"
-                .text
-
-testop	_test_and_clear_bit, bicne, strne
diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S
deleted file mode 100644
index f3192e55acc87fede3a34a50d7ca6d446b049b89..0000000000000000000000000000000000000000
--- a/arch/arm/lib/testsetbit.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/testsetbit.S
- *
- *  Copyright (C) 1995-1996 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include "bitops.h"
-                .text
-
-testop	_test_and_set_bit, orreq, streq
diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S
deleted file mode 100644
index 679e16a210ae78cff87c0718ee86b5ae5fbd558d..0000000000000000000000000000000000000000
--- a/arch/arm/lib/ucmpdi2.S
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/ucmpdi2.S
- *
- *  Author:	Nicolas Pitre
- *  Created:	Oct 19, 2005
- *  Copyright:	Monta Vista Software, Inc.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#ifdef __ARMEB__
-#define xh r0
-#define xl r1
-#define yh r2
-#define yl r3
-#else
-#define xl r0
-#define xh r1
-#define yl r2
-#define yh r3
-#endif
-
-ENTRY(__ucmpdi2)
-
-	cmp	xh, yh
-	cmpeq	xl, yl
-	movlo	r0, #0
-	moveq	r0, #1
-	movhi	r0, #2
-	ret	lr
-
-ENDPROC(__ucmpdi2)
-
-#ifdef CONFIG_AEABI
-
-ENTRY(__aeabi_ulcmp)
-
-	cmp	xh, yh
-	cmpeq	xl, yl
-	movlo	r0, #-1
-	moveq	r0, #0
-	movhi	r0, #1
-	ret	lr
-
-ENDPROC(__aeabi_ulcmp)
-
-#endif
-
diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S
deleted file mode 100644
index 2591cba61937b9cc4cbd24c6289dd3d1937b3137..0000000000000000000000000000000000000000
--- a/arch/arm/mach-at91/pm_suspend.S
+++ /dev/null
@@ -1,523 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/mach-at91/pm_slow_clock.S
- *
- *  Copyright (C) 2006 Savin Zlobec
- *
- * AT91SAM9 support:
- *  Copyright (C) 2007 Anti Sullin <anti.sullin@artecdesign.ee>
- */
-#include <linux/linkage.h>
-#include <linux/clk/at91_pmc.h>
-#include "pm.h"
-#include "pm_data-offsets.h"
-
-#define	SRAMC_SELF_FRESH_ACTIVE		0x01
-#define	SRAMC_SELF_FRESH_EXIT		0x00
-
-pmc	.req	r0
-tmp1	.req	r4
-tmp2	.req	r5
-
-/*
- * Wait until master clock is ready (after switching master clock source)
- */
-	.macro wait_mckrdy
-1:	ldr	tmp1, [pmc, #AT91_PMC_SR]
-	tst	tmp1, #AT91_PMC_MCKRDY
-	beq	1b
-	.endm
-
-/*
- * Wait until master oscillator has stabilized.
- */
-	.macro wait_moscrdy
-1:	ldr	tmp1, [pmc, #AT91_PMC_SR]
-	tst	tmp1, #AT91_PMC_MOSCS
-	beq	1b
-	.endm
-
-/*
- * Wait for main oscillator selection is done
- */
-	.macro wait_moscsels
-1:	ldr	tmp1, [pmc, #AT91_PMC_SR]
-	tst	tmp1, #AT91_PMC_MOSCSELS
-	beq	1b
-	.endm
-
-/*
- * Put the processor to enter the idle state
- */
-	.macro at91_cpu_idle
-
-#if defined(CONFIG_CPU_V7)
-	mov	tmp1, #AT91_PMC_PCK
-	str	tmp1, [pmc, #AT91_PMC_SCDR]
-
-	dsb
-
-	wfi		@ Wait For Interrupt
-#else
-	mcr	p15, 0, tmp1, c7, c0, 4
-#endif
-
-	.endm
-
-	.text
-
-	.arm
-
-/*
- * void at91_suspend_sram_fn(struct at91_pm_data*)
- * @input param:
- * 	@r0: base address of struct at91_pm_data
- */
-/* at91_pm_suspend_in_sram must be 8-byte aligned per the requirements of fncpy() */
-	.align 3
-ENTRY(at91_pm_suspend_in_sram)
-	/* Save registers on stack */
-	stmfd	sp!, {r4 - r12, lr}
-
-	/* Drain write buffer */
-	mov	tmp1, #0
-	mcr	p15, 0, tmp1, c7, c10, 4
-
-	ldr	tmp1, [r0, #PM_DATA_PMC]
-	str	tmp1, .pmc_base
-	ldr	tmp1, [r0, #PM_DATA_RAMC0]
-	str	tmp1, .sramc_base
-	ldr	tmp1, [r0, #PM_DATA_RAMC1]
-	str	tmp1, .sramc1_base
-	ldr	tmp1, [r0, #PM_DATA_MEMCTRL]
-	str	tmp1, .memtype
-	ldr	tmp1, [r0, #PM_DATA_MODE]
-	str	tmp1, .pm_mode
-	/* Both ldrne below are here to preload their address in the TLB */
-	ldr	tmp1, [r0, #PM_DATA_SHDWC]
-	str	tmp1, .shdwc
-	cmp	tmp1, #0
-	ldrne	tmp2, [tmp1, #0]
-	ldr	tmp1, [r0, #PM_DATA_SFRBU]
-	str	tmp1, .sfr
-	cmp	tmp1, #0
-	ldrne	tmp2, [tmp1, #0x10]
-
-	/* Active the self-refresh mode */
-	mov	r0, #SRAMC_SELF_FRESH_ACTIVE
-	bl	at91_sramc_self_refresh
-
-	ldr	r0, .pm_mode
-	cmp	r0, #AT91_PM_STANDBY
-	beq	standby
-	cmp	r0, #AT91_PM_BACKUP
-	beq	backup_mode
-
-	bl	at91_ulp_mode
-	b	exit_suspend
-
-standby:
-	/* Wait for interrupt */
-	ldr	pmc, .pmc_base
-	at91_cpu_idle
-	b	exit_suspend
-
-backup_mode:
-	bl	at91_backup_mode
-	b	exit_suspend
-
-exit_suspend:
-	/* Exit the self-refresh mode */
-	mov	r0, #SRAMC_SELF_FRESH_EXIT
-	bl	at91_sramc_self_refresh
-
-	/* Restore registers, and return */
-	ldmfd	sp!, {r4 - r12, pc}
-ENDPROC(at91_pm_suspend_in_sram)
-
-ENTRY(at91_backup_mode)
-	/* Switch the master clock source to slow clock. */
-	ldr	pmc, .pmc_base
-	ldr	tmp1, [pmc, #AT91_PMC_MCKR]
-	bic	tmp1, tmp1, #AT91_PMC_CSS
-	str	tmp1, [pmc, #AT91_PMC_MCKR]
-
-	wait_mckrdy
-
-	/*BUMEN*/
-	ldr	r0, .sfr
-	mov	tmp1, #0x1
-	str	tmp1, [r0, #0x10]
-
-	/* Shutdown */
-	ldr	r0, .shdwc
-	mov	tmp1, #0xA5000000
-	add	tmp1, tmp1, #0x1
-	str	tmp1, [r0, #0]
-ENDPROC(at91_backup_mode)
-
-.macro at91_pm_ulp0_mode
-	ldr	pmc, .pmc_base
-
-	/* Turn off the crystal oscillator */
-	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
-	bic	tmp1, tmp1, #AT91_PMC_MOSCEN
-	orr	tmp1, tmp1, #AT91_PMC_KEY
-	str	tmp1, [pmc, #AT91_CKGR_MOR]
-
-	/* Save RC oscillator state */
-	ldr	tmp1, [pmc, #AT91_PMC_SR]
-	str	tmp1, .saved_osc_status
-	tst	tmp1, #AT91_PMC_MOSCRCS
-	bne	1f
-
-	/* Turn off RC oscillator */
-	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
-	bic	tmp1, tmp1, #AT91_PMC_MOSCRCEN
-	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
-	orr	tmp1, tmp1, #AT91_PMC_KEY
-	str	tmp1, [pmc, #AT91_CKGR_MOR]
-
-	/* Wait main RC disabled done */
-2:	ldr	tmp1, [pmc, #AT91_PMC_SR]
-	tst	tmp1, #AT91_PMC_MOSCRCS
-	bne	2b
-
-	/* Wait for interrupt */
-1:	at91_cpu_idle
-
-	/* Restore RC oscillator state */
-	ldr	tmp1, .saved_osc_status
-	tst	tmp1, #AT91_PMC_MOSCRCS
-	beq	4f
-
-	/* Turn on RC oscillator */
-	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
-	orr	tmp1, tmp1, #AT91_PMC_MOSCRCEN
-	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
-	orr	tmp1, tmp1, #AT91_PMC_KEY
-	str	tmp1, [pmc, #AT91_CKGR_MOR]
-
-	/* Wait main RC stabilization */
-3:	ldr	tmp1, [pmc, #AT91_PMC_SR]
-	tst	tmp1, #AT91_PMC_MOSCRCS
-	beq	3b
-
-	/* Turn on the crystal oscillator */
-4:	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
-	orr	tmp1, tmp1, #AT91_PMC_MOSCEN
-	orr	tmp1, tmp1, #AT91_PMC_KEY
-	str	tmp1, [pmc, #AT91_CKGR_MOR]
-
-	wait_moscrdy
-.endm
-
-/**
- * Note: This procedure only applies on the platform which uses
- * the external crystal oscillator as a main clock source.
- */
-.macro at91_pm_ulp1_mode
-	ldr	pmc, .pmc_base
-
-	/* Save RC oscillator state and check if it is enabled. */
-	ldr	tmp1, [pmc, #AT91_PMC_SR]
-	str	tmp1, .saved_osc_status
-	tst	tmp1, #AT91_PMC_MOSCRCS
-	bne	2f
-
-	/* Enable RC oscillator */
-	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
-	orr	tmp1, tmp1, #AT91_PMC_MOSCRCEN
-	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
-	orr	tmp1, tmp1, #AT91_PMC_KEY
-	str	tmp1, [pmc, #AT91_CKGR_MOR]
-
-	/* Wait main RC stabilization */
-1:	ldr	tmp1, [pmc, #AT91_PMC_SR]
-	tst	tmp1, #AT91_PMC_MOSCRCS
-	beq	1b
-
-	/* Switch the main clock source to 12-MHz RC oscillator */
-2:	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
-	bic	tmp1, tmp1, #AT91_PMC_MOSCSEL
-	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
-	orr	tmp1, tmp1, #AT91_PMC_KEY
-	str	tmp1, [pmc, #AT91_CKGR_MOR]
-
-	wait_moscsels
-
-	/* Disable the crystal oscillator */
-	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
-	bic	tmp1, tmp1, #AT91_PMC_MOSCEN
-	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
-	orr	tmp1, tmp1, #AT91_PMC_KEY
-	str	tmp1, [pmc, #AT91_CKGR_MOR]
-
-	/* Switch the master clock source to main clock */
-	ldr	tmp1, [pmc, #AT91_PMC_MCKR]
-	bic	tmp1, tmp1, #AT91_PMC_CSS
-	orr	tmp1, tmp1, #AT91_PMC_CSS_MAIN
-	str	tmp1, [pmc, #AT91_PMC_MCKR]
-
-	wait_mckrdy
-
-	/* Enter the ULP1 mode by set WAITMODE bit in CKGR_MOR */
-	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
-	orr	tmp1, tmp1, #AT91_PMC_WAITMODE
-	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
-	orr	tmp1, tmp1, #AT91_PMC_KEY
-	str	tmp1, [pmc, #AT91_CKGR_MOR]
-
-	/* Quirk for SAM9X60's PMC */
-	nop
-	nop
-
-	wait_mckrdy
-
-	/* Enable the crystal oscillator */
-	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
-	orr	tmp1, tmp1, #AT91_PMC_MOSCEN
-	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
-	orr	tmp1, tmp1, #AT91_PMC_KEY
-	str	tmp1, [pmc, #AT91_CKGR_MOR]
-
-	wait_moscrdy
-
-	/* Switch the master clock source to slow clock */
-	ldr	tmp1, [pmc, #AT91_PMC_MCKR]
-	bic	tmp1, tmp1, #AT91_PMC_CSS
-	str	tmp1, [pmc, #AT91_PMC_MCKR]
-
-	wait_mckrdy
-
-	/* Switch main clock source to crystal oscillator */
-	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
-	orr	tmp1, tmp1, #AT91_PMC_MOSCSEL
-	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
-	orr	tmp1, tmp1, #AT91_PMC_KEY
-	str	tmp1, [pmc, #AT91_CKGR_MOR]
-
-	wait_moscsels
-
-	/* Switch the master clock source to main clock */
-	ldr	tmp1, [pmc, #AT91_PMC_MCKR]
-	bic	tmp1, tmp1, #AT91_PMC_CSS
-	orr	tmp1, tmp1, #AT91_PMC_CSS_MAIN
-	str	tmp1, [pmc, #AT91_PMC_MCKR]
-
-	wait_mckrdy
-
-	/* Restore RC oscillator state */
-	ldr	tmp1, .saved_osc_status
-	tst	tmp1, #AT91_PMC_MOSCRCS
-	bne	3f
-
-	/* Disable RC oscillator */
-	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
-	bic	tmp1, tmp1, #AT91_PMC_MOSCRCEN
-	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
-	orr	tmp1, tmp1, #AT91_PMC_KEY
-	str	tmp1, [pmc, #AT91_CKGR_MOR]
-
-	/* Wait RC oscillator disable done */
-4:	ldr	tmp1, [pmc, #AT91_PMC_SR]
-	tst	tmp1, #AT91_PMC_MOSCRCS
-	bne	4b
-
-3:
-.endm
-
-ENTRY(at91_ulp_mode)
-	ldr	pmc, .pmc_base
-
-	/* Save Master clock setting */
-	ldr	tmp1, [pmc, #AT91_PMC_MCKR]
-	str	tmp1, .saved_mckr
-
-	/*
-	 * Set the Master clock source to slow clock
-	 */
-	bic	tmp1, tmp1, #AT91_PMC_CSS
-	str	tmp1, [pmc, #AT91_PMC_MCKR]
-
-	wait_mckrdy
-
-	ldr	r0, .pm_mode
-	cmp	r0, #AT91_PM_ULP1
-	beq	ulp1_mode
-
-	at91_pm_ulp0_mode
-	b	ulp_exit
-
-ulp1_mode:
-	at91_pm_ulp1_mode
-	b	ulp_exit
-
-ulp_exit:
-	ldr	pmc, .pmc_base
-
-	/*
-	 * Restore master clock setting
-	 */
-	ldr	tmp1, .saved_mckr
-	str	tmp1, [pmc, #AT91_PMC_MCKR]
-
-	wait_mckrdy
-
-	mov	pc, lr
-ENDPROC(at91_ulp_mode)
-
-/*
- * void at91_sramc_self_refresh(unsigned int is_active)
- *
- * @input param:
- *	@r0: 1 - active self-refresh mode
- *	     0 - exit self-refresh mode
- * register usage:
- * 	@r1: memory type
- *	@r2: base address of the sram controller
- */
-
-ENTRY(at91_sramc_self_refresh)
-	ldr	r1, .memtype
-	ldr	r2, .sramc_base
-
-	cmp	r1, #AT91_MEMCTRL_MC
-	bne	ddrc_sf
-
-	/*
-	 * at91rm9200 Memory controller
-	 */
-
-	 /*
-	  * For exiting the self-refresh mode, do nothing,
-	  * automatically exit the self-refresh mode.
-	  */
-	tst	r0, #SRAMC_SELF_FRESH_ACTIVE
-	beq	exit_sramc_sf
-
-	/* Active SDRAM self-refresh mode */
-	mov	r3, #1
-	str	r3, [r2, #AT91_MC_SDRAMC_SRR]
-	b	exit_sramc_sf
-
-ddrc_sf:
-	cmp	r1, #AT91_MEMCTRL_DDRSDR
-	bne	sdramc_sf
-
-	/*
-	 * DDR Memory controller
-	 */
-	tst	r0, #SRAMC_SELF_FRESH_ACTIVE
-	beq	ddrc_exit_sf
-
-	/* LPDDR1 --> force DDR2 mode during self-refresh */
-	ldr	r3, [r2, #AT91_DDRSDRC_MDR]
-	str	r3, .saved_sam9_mdr
-	bic	r3, r3, #~AT91_DDRSDRC_MD
-	cmp	r3, #AT91_DDRSDRC_MD_LOW_POWER_DDR
-	ldreq	r3, [r2, #AT91_DDRSDRC_MDR]
-	biceq	r3, r3, #AT91_DDRSDRC_MD
-	orreq	r3, r3, #AT91_DDRSDRC_MD_DDR2
-	streq	r3, [r2, #AT91_DDRSDRC_MDR]
-
-	/* Active DDRC self-refresh mode */
-	ldr	r3, [r2, #AT91_DDRSDRC_LPR]
-	str	r3, .saved_sam9_lpr
-	bic	r3, r3, #AT91_DDRSDRC_LPCB
-	orr	r3, r3, #AT91_DDRSDRC_LPCB_SELF_REFRESH
-	str	r3, [r2, #AT91_DDRSDRC_LPR]
-
-	/* If using the 2nd ddr controller */
-	ldr	r2, .sramc1_base
-	cmp	r2, #0
-	beq	no_2nd_ddrc
-
-	ldr	r3, [r2, #AT91_DDRSDRC_MDR]
-	str	r3, .saved_sam9_mdr1
-	bic	r3, r3, #~AT91_DDRSDRC_MD
-	cmp	r3, #AT91_DDRSDRC_MD_LOW_POWER_DDR
-	ldreq	r3, [r2, #AT91_DDRSDRC_MDR]
-	biceq	r3, r3, #AT91_DDRSDRC_MD
-	orreq	r3, r3, #AT91_DDRSDRC_MD_DDR2
-	streq	r3, [r2, #AT91_DDRSDRC_MDR]
-
-	/* Active DDRC self-refresh mode */
-	ldr	r3, [r2, #AT91_DDRSDRC_LPR]
-	str	r3, .saved_sam9_lpr1
-	bic	r3, r3, #AT91_DDRSDRC_LPCB
-	orr	r3, r3, #AT91_DDRSDRC_LPCB_SELF_REFRESH
-	str	r3, [r2, #AT91_DDRSDRC_LPR]
-
-no_2nd_ddrc:
-	b	exit_sramc_sf
-
-ddrc_exit_sf:
-	/* Restore MDR in case of LPDDR1 */
-	ldr	r3, .saved_sam9_mdr
-	str	r3, [r2, #AT91_DDRSDRC_MDR]
-	/* Restore LPR on AT91 with DDRAM */
-	ldr	r3, .saved_sam9_lpr
-	str	r3, [r2, #AT91_DDRSDRC_LPR]
-
-	/* If using the 2nd ddr controller */
-	ldr	r2, .sramc1_base
-	cmp	r2, #0
-	ldrne	r3, .saved_sam9_mdr1
-	strne	r3, [r2, #AT91_DDRSDRC_MDR]
-	ldrne	r3, .saved_sam9_lpr1
-	strne	r3, [r2, #AT91_DDRSDRC_LPR]
-
-	b	exit_sramc_sf
-
-	/*
-	 * SDRAMC Memory controller
-	 */
-sdramc_sf:
-	tst	r0, #SRAMC_SELF_FRESH_ACTIVE
-	beq	sdramc_exit_sf
-
-	/* Active SDRAMC self-refresh mode */
-	ldr	r3, [r2, #AT91_SDRAMC_LPR]
-	str	r3, .saved_sam9_lpr
-	bic	r3, r3, #AT91_SDRAMC_LPCB
-	orr	r3, r3, #AT91_SDRAMC_LPCB_SELF_REFRESH
-	str	r3, [r2, #AT91_SDRAMC_LPR]
-
-sdramc_exit_sf:
-	ldr	r3, .saved_sam9_lpr
-	str	r3, [r2, #AT91_SDRAMC_LPR]
-
-exit_sramc_sf:
-	mov	pc, lr
-ENDPROC(at91_sramc_self_refresh)
-
-.pmc_base:
-	.word 0
-.sramc_base:
-	.word 0
-.sramc1_base:
-	.word 0
-.shdwc:
-	.word 0
-.sfr:
-	.word 0
-.memtype:
-	.word 0
-.pm_mode:
-	.word 0
-.saved_mckr:
-	.word 0
-.saved_sam9_lpr:
-	.word 0
-.saved_sam9_lpr1:
-	.word 0
-.saved_sam9_mdr:
-	.word 0
-.saved_sam9_mdr1:
-	.word 0
-.saved_osc_status:
-	.word 0
-
-ENTRY(at91_pm_suspend_in_sram_sz)
-	.word .-at91_pm_suspend_in_sram
diff --git a/arch/arm/mach-berlin/headsmp.S b/arch/arm/mach-berlin/headsmp.S
deleted file mode 100644
index 3057885d97728f6896409f62ddb93cb64378962d..0000000000000000000000000000000000000000
--- a/arch/arm/mach-berlin/headsmp.S
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2014 Marvell Technology Group Ltd.
- *
- * Antoine Ténart <antoine.tenart@free-electrons.com>
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-
-/*
- * If the following instruction is set in the reset exception vector, CPUs
- * will fetch the value of the software reset address vector when being
- * reset.
- */
-.global boot_inst
-boot_inst:
-	ldr	pc, [pc, #140]
-
-	.align
diff --git a/arch/arm/mach-davinci/sleep.S b/arch/arm/mach-davinci/sleep.S
deleted file mode 100644
index 71262dcdbca32aea22714945f43bc6671c71c8a9..0000000000000000000000000000000000000000
--- a/arch/arm/mach-davinci/sleep.S
+++ /dev/null
@@ -1,216 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * (C) Copyright 2009, Texas Instruments, Inc. http://www.ti.com/
- */
-
-/* replicated define because linux/bitops.h cannot be included in assembly */
-#define BIT(nr)			(1 << (nr))
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include "psc.h"
-#include "ddr2.h"
-
-#include "clock.h"
-
-/* Arbitrary, hardware currently does not update PHYRDY correctly */
-#define PHYRDY_CYCLES		0x1000
-
-/* Assume 25 MHz speed for the cycle conversions since PLLs are bypassed */
-#define PLL_BYPASS_CYCLES	(PLL_BYPASS_TIME * 25)
-#define PLL_RESET_CYCLES	(PLL_RESET_TIME	* 25)
-#define PLL_LOCK_CYCLES		(PLL_LOCK_TIME * 25)
-
-#define DEEPSLEEP_SLEEPENABLE_BIT	BIT(31)
-
-	.text
-	.arch	armv5te
-/*
- * Move DaVinci into deep sleep state
- *
- * Note: This code is copied to internal SRAM by PM code. When the DaVinci
- *	 wakes up it continues execution at the point it went to sleep.
- * Register Usage:
- * 	r0: contains virtual base for DDR2 controller
- * 	r1: contains virtual base for DDR2 Power and Sleep controller (PSC)
- * 	r2: contains PSC number for DDR2
- * 	r3: contains virtual base DDR2 PLL controller
- * 	r4: contains virtual address of the DEEPSLEEP register
- */
-ENTRY(davinci_cpu_suspend)
-	stmfd	sp!, {r0-r12, lr}		@ save registers on stack
-
-	ldr 	ip, CACHE_FLUSH
-	blx	ip
-
-	ldmia	r0, {r0-r4}
-
-	/*
-	 * Switch DDR to self-refresh mode.
-	 */
-
-	/* calculate SDRCR address */
-	ldr	ip, [r0, #DDR2_SDRCR_OFFSET]
-	bic	ip, ip, #DDR2_SRPD_BIT
-	orr	ip, ip, #DDR2_LPMODEN_BIT
-	str	ip, [r0, #DDR2_SDRCR_OFFSET]
-
-	ldr	ip, [r0, #DDR2_SDRCR_OFFSET]
-	orr	ip, ip, #DDR2_MCLKSTOPEN_BIT
-	str	ip, [r0, #DDR2_SDRCR_OFFSET]
-
-       mov	ip, #PHYRDY_CYCLES
-1:     subs	ip, ip, #0x1
-       bne	1b
-
-       /* Disable DDR2 LPSC */
-	mov	r7, r0
-	mov	r0, #0x2
-	bl davinci_ddr_psc_config
-	mov	r0, r7
-
-	/* Disable clock to DDR PHY */
-	ldr	ip, [r3, #PLLDIV1]
-	bic	ip, ip, #PLLDIV_EN
-	str	ip, [r3, #PLLDIV1]
-
-	/* Put the DDR PLL in bypass and power down */
-	ldr	ip, [r3, #PLLCTL]
-	bic	ip, ip, #PLLCTL_PLLENSRC
-	bic	ip, ip, #PLLCTL_PLLEN
-	str	ip, [r3, #PLLCTL]
-
-	/* Wait for PLL to switch to bypass */
-       mov	ip, #PLL_BYPASS_CYCLES
-2:     subs	ip, ip, #0x1
-       bne	2b
-
-       /* Power down the PLL */
-	ldr	ip, [r3, #PLLCTL]
-	orr	ip, ip, #PLLCTL_PLLPWRDN
-	str	ip, [r3, #PLLCTL]
-
-	/* Go to deep sleep */
-	ldr	ip, [r4]
-	orr	ip, ip, #DEEPSLEEP_SLEEPENABLE_BIT
-	/* System goes to sleep beyond after this instruction */
-	str	ip, [r4]
-
-	/* Wake up from sleep */
-
-	/* Clear sleep enable */
-	ldr	ip, [r4]
-	bic	ip, ip, #DEEPSLEEP_SLEEPENABLE_BIT
-	str	ip, [r4]
-
-	/* initialize the DDR PLL controller */
-
-	/* Put PLL in reset */
-	ldr	ip, [r3, #PLLCTL]
-	bic	ip, ip, #PLLCTL_PLLRST
-	str	ip, [r3, #PLLCTL]
-
-	/* Clear PLL power down */
-	ldr	ip, [r3, #PLLCTL]
-	bic	ip, ip, #PLLCTL_PLLPWRDN
-	str	ip, [r3, #PLLCTL]
-
-       mov	ip, #PLL_RESET_CYCLES
-3:     subs	ip, ip, #0x1
-       bne	3b
-
-       /* Bring PLL out of reset */
-	ldr	ip, [r3, #PLLCTL]
-	orr	ip, ip, #PLLCTL_PLLRST
-	str	ip, [r3, #PLLCTL]
-
-	/* Wait for PLL to lock (assume prediv = 1, 25MHz OSCIN) */
-       mov	ip, #PLL_LOCK_CYCLES
-4:     subs	ip, ip, #0x1
-       bne	4b
-
-       /* Remove PLL from bypass mode */
-	ldr	ip, [r3, #PLLCTL]
-	bic	ip, ip, #PLLCTL_PLLENSRC
-	orr	ip, ip, #PLLCTL_PLLEN
-	str	ip, [r3, #PLLCTL]
-
-	/* Start 2x clock to DDR2 */
-
-	ldr	ip, [r3, #PLLDIV1]
-	orr	ip, ip, #PLLDIV_EN
-	str	ip, [r3, #PLLDIV1]
-
-	/* Enable VCLK */
-
-       /* Enable DDR2 LPSC */
-	mov	r7, r0
-	mov	r0, #0x3
-	bl davinci_ddr_psc_config
-	mov	r0, r7
-
-	/* clear  MCLKSTOPEN */
-
-	ldr	ip, [r0, #DDR2_SDRCR_OFFSET]
-	bic	ip, ip, #DDR2_MCLKSTOPEN_BIT
-	str	ip, [r0, #DDR2_SDRCR_OFFSET]
-
-	ldr	ip, [r0, #DDR2_SDRCR_OFFSET]
-	bic	ip, ip, #DDR2_LPMODEN_BIT
-	str	ip, [r0, #DDR2_SDRCR_OFFSET]
-
-	/* Restore registers and return */
-	ldmfd   sp!, {r0-r12, pc}
-
-ENDPROC(davinci_cpu_suspend)
-
-/*
- * Disables or Enables DDR2 LPSC
- * Register Usage:
- * 	r0: Enable or Disable LPSC r0 = 0x3 => Enable, r0 = 0x2 => Disable LPSC
- * 	r1: contains virtual base for DDR2 Power and Sleep controller (PSC)
- * 	r2: contains PSC number for DDR2
- */
-ENTRY(davinci_ddr_psc_config)
-	/* Set next state in mdctl for DDR2 */
-	mov	r6, #MDCTL
-	add	r6, r6, r2, lsl #2
-	ldr	ip, [r1, r6]
-	bic	ip, ip, #MDSTAT_STATE_MASK
-	orr	ip, ip, r0
-	str	ip, [r1, r6]
-
-	/* Enable the Power Domain Transition Command */
-	ldr	ip, [r1, #PTCMD]
-	orr	ip, ip, #0x1
-	str	ip, [r1, #PTCMD]
-
-	/* Check for Transition Complete (PTSTAT) */
-ptstat_done:
-	ldr	ip, [r1, #PTSTAT]
-	and	ip, ip, #0x1
-	cmp 	ip, #0x0
-	bne	ptstat_done
-
-	/* Check for DDR2 clock disable completion; */
-	mov	r6, #MDSTAT
-	add	r6, r6, r2, lsl #2
-ddr2clk_stop_done:
-	ldr	ip, [r1, r6]
-	and	ip, ip, #MDSTAT_STATE_MASK
-	cmp	ip, r0
-	bne	ddr2clk_stop_done
-
-	ret	lr
-ENDPROC(davinci_ddr_psc_config)
-
-CACHE_FLUSH:
-#ifdef CONFIG_CPU_V6
-	.word	v6_flush_kern_cache_all
-#else
-	.word   arm926_flush_kern_cache_all
-#endif
-
-ENTRY(davinci_cpu_suspend_sz)
-	.word	. - davinci_cpu_suspend
-ENDPROC(davinci_cpu_suspend_sz)
diff --git a/arch/arm/mach-ebsa110/include/mach/entry-macro.S b/arch/arm/mach-ebsa110/include/mach/entry-macro.S
deleted file mode 100644
index 14b110de78a9caf133dce9b9f5c99f29c06385c5..0000000000000000000000000000000000000000
--- a/arch/arm/mach-ebsa110/include/mach/entry-macro.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * arch/arm/mach-ebsa110/include/mach/entry-macro.S
- *
- * Low-level IRQ helper macros for ebsa110 platform.
- *
- * This file is licensed under  the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-
-
-#define IRQ_STAT		0xff000000	/* read */
-
-	.macro  get_irqnr_preamble, base, tmp
-	mov	\base, #IRQ_STAT
-	.endm
-
-	.macro	get_irqnr_and_base, irqnr, stat, base, tmp
-	ldrb	\stat, [\base]			@ get interrupts
-	mov	\irqnr, #0
-	tst	\stat, #15
-	addeq	\irqnr, \irqnr, #4
-	moveq	\stat, \stat, lsr #4
-	tst	\stat, #3
-	addeq	\irqnr, \irqnr, #2
-	moveq	\stat, \stat, lsr #2
-	tst	\stat, #1
-	addeq	\irqnr, \irqnr, #1
-	moveq	\stat, \stat, lsr #1
-	tst	\stat, #1			@ bit 0 should be set
-	.endm
-
diff --git a/arch/arm/mach-ep93xx/crunch-bits.S b/arch/arm/mach-ep93xx/crunch-bits.S
deleted file mode 100644
index fb2dbf76f09ee58df6ba0550f03f74d75817f490..0000000000000000000000000000000000000000
--- a/arch/arm/mach-ep93xx/crunch-bits.S
+++ /dev/null
@@ -1,310 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/kernel/crunch-bits.S
- * Cirrus MaverickCrunch context switching and handling
- *
- * Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org>
- *
- * Shamelessly stolen from the iWMMXt code by Nicolas Pitre, which is
- * Copyright (c) 2003-2004, MontaVista Software, Inc.
- */
-
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/assembler.h>
-#include <mach/ep93xx-regs.h>
-
-/*
- * We can't use hex constants here due to a bug in gas.
- */
-#define CRUNCH_MVDX0		0
-#define CRUNCH_MVDX1		8
-#define CRUNCH_MVDX2		16
-#define CRUNCH_MVDX3		24
-#define CRUNCH_MVDX4		32
-#define CRUNCH_MVDX5		40
-#define CRUNCH_MVDX6		48
-#define CRUNCH_MVDX7		56
-#define CRUNCH_MVDX8		64
-#define CRUNCH_MVDX9		72
-#define CRUNCH_MVDX10		80
-#define CRUNCH_MVDX11		88
-#define CRUNCH_MVDX12		96
-#define CRUNCH_MVDX13		104
-#define CRUNCH_MVDX14		112
-#define CRUNCH_MVDX15		120
-#define CRUNCH_MVAX0L		128
-#define CRUNCH_MVAX0M		132
-#define CRUNCH_MVAX0H		136
-#define CRUNCH_MVAX1L		140
-#define CRUNCH_MVAX1M		144
-#define CRUNCH_MVAX1H		148
-#define CRUNCH_MVAX2L		152
-#define CRUNCH_MVAX2M		156
-#define CRUNCH_MVAX2H		160
-#define CRUNCH_MVAX3L		164
-#define CRUNCH_MVAX3M		168
-#define CRUNCH_MVAX3H		172
-#define CRUNCH_DSPSC		176
-
-#define CRUNCH_SIZE		184
-
-	.text
-
-/*
- * Lazy switching of crunch coprocessor context
- *
- * r10 = struct thread_info pointer
- * r9  = ret_from_exception
- * lr  = undefined instr exit
- *
- * called from prefetch exception handler with interrupts enabled
- */
-ENTRY(crunch_task_enable)
-	inc_preempt_count r10, r3
-
-	ldr	r8, =(EP93XX_APB_VIRT_BASE + 0x00130000)	@ syscon addr
-
-	ldr	r1, [r8, #0x80]
-	tst	r1, #0x00800000			@ access to crunch enabled?
-	bne	2f				@ if so no business here
-	mov	r3, #0xaa			@ unlock syscon swlock
-	str	r3, [r8, #0xc0]
-	orr	r1, r1, #0x00800000		@ enable access to crunch
-	str	r1, [r8, #0x80]
-
-	ldr	r3, =crunch_owner
-	add	r0, r10, #TI_CRUNCH_STATE	@ get task crunch save area
-	ldr	r2, [sp, #60]			@ current task pc value
-	ldr	r1, [r3]			@ get current crunch owner
-	str	r0, [r3]			@ this task now owns crunch
-	sub	r2, r2, #4			@ adjust pc back
-	str	r2, [sp, #60]
-
-	ldr	r2, [r8, #0x80]
-	mov	r2, r2				@ flush out enable (@@@)
-
-	teq	r1, #0				@ test for last ownership
-	mov	lr, r9				@ normal exit from exception
-	beq	crunch_load			@ no owner, skip save
-
-crunch_save:
-	cfstr64		mvdx0, [r1, #CRUNCH_MVDX0]	@ save 64b registers
-	cfstr64		mvdx1, [r1, #CRUNCH_MVDX1]
-	cfstr64		mvdx2, [r1, #CRUNCH_MVDX2]
-	cfstr64		mvdx3, [r1, #CRUNCH_MVDX3]
-	cfstr64		mvdx4, [r1, #CRUNCH_MVDX4]
-	cfstr64		mvdx5, [r1, #CRUNCH_MVDX5]
-	cfstr64		mvdx6, [r1, #CRUNCH_MVDX6]
-	cfstr64		mvdx7, [r1, #CRUNCH_MVDX7]
-	cfstr64		mvdx8, [r1, #CRUNCH_MVDX8]
-	cfstr64		mvdx9, [r1, #CRUNCH_MVDX9]
-	cfstr64		mvdx10, [r1, #CRUNCH_MVDX10]
-	cfstr64		mvdx11, [r1, #CRUNCH_MVDX11]
-	cfstr64		mvdx12, [r1, #CRUNCH_MVDX12]
-	cfstr64		mvdx13, [r1, #CRUNCH_MVDX13]
-	cfstr64		mvdx14, [r1, #CRUNCH_MVDX14]
-	cfstr64		mvdx15, [r1, #CRUNCH_MVDX15]
-
-#ifdef __ARMEB__
-#error fix me for ARMEB
-#endif
-
-	cfmv32al	mvfx0, mvax0			@ save 72b accumulators
-	cfstr32		mvfx0, [r1, #CRUNCH_MVAX0L]
-	cfmv32am	mvfx0, mvax0
-	cfstr32		mvfx0, [r1, #CRUNCH_MVAX0M]
-	cfmv32ah	mvfx0, mvax0
-	cfstr32		mvfx0, [r1, #CRUNCH_MVAX0H]
-	cfmv32al	mvfx0, mvax1
-	cfstr32		mvfx0, [r1, #CRUNCH_MVAX1L]
-	cfmv32am	mvfx0, mvax1
-	cfstr32		mvfx0, [r1, #CRUNCH_MVAX1M]
-	cfmv32ah	mvfx0, mvax1
-	cfstr32		mvfx0, [r1, #CRUNCH_MVAX1H]
-	cfmv32al	mvfx0, mvax2
-	cfstr32		mvfx0, [r1, #CRUNCH_MVAX2L]
-	cfmv32am	mvfx0, mvax2
-	cfstr32		mvfx0, [r1, #CRUNCH_MVAX2M]
-	cfmv32ah	mvfx0, mvax2
-	cfstr32		mvfx0, [r1, #CRUNCH_MVAX2H]
-	cfmv32al	mvfx0, mvax3
-	cfstr32		mvfx0, [r1, #CRUNCH_MVAX3L]
-	cfmv32am	mvfx0, mvax3
-	cfstr32		mvfx0, [r1, #CRUNCH_MVAX3M]
-	cfmv32ah	mvfx0, mvax3
-	cfstr32		mvfx0, [r1, #CRUNCH_MVAX3H]
-
-	cfmv32sc	mvdx0, dspsc			@ save status word
-	cfstr64		mvdx0, [r1, #CRUNCH_DSPSC]
-
-	teq		r0, #0				@ anything to load?
-	cfldr64eq	mvdx0, [r1, #CRUNCH_MVDX0]	@ mvdx0 was clobbered
-	beq		1f
-
-crunch_load:
-	cfldr64		mvdx0, [r0, #CRUNCH_DSPSC]	@ load status word
-	cfmvsc32	dspsc, mvdx0
-
-	cfldr32		mvfx0, [r0, #CRUNCH_MVAX0L]	@ load 72b accumulators
-	cfmval32	mvax0, mvfx0
-	cfldr32		mvfx0, [r0, #CRUNCH_MVAX0M]
-	cfmvam32	mvax0, mvfx0
-	cfldr32		mvfx0, [r0, #CRUNCH_MVAX0H]
-	cfmvah32	mvax0, mvfx0
-	cfldr32		mvfx0, [r0, #CRUNCH_MVAX1L]
-	cfmval32	mvax1, mvfx0
-	cfldr32		mvfx0, [r0, #CRUNCH_MVAX1M]
-	cfmvam32	mvax1, mvfx0
-	cfldr32		mvfx0, [r0, #CRUNCH_MVAX1H]
-	cfmvah32	mvax1, mvfx0
-	cfldr32		mvfx0, [r0, #CRUNCH_MVAX2L]
-	cfmval32	mvax2, mvfx0
-	cfldr32		mvfx0, [r0, #CRUNCH_MVAX2M]
-	cfmvam32	mvax2, mvfx0
-	cfldr32		mvfx0, [r0, #CRUNCH_MVAX2H]
-	cfmvah32	mvax2, mvfx0
-	cfldr32		mvfx0, [r0, #CRUNCH_MVAX3L]
-	cfmval32	mvax3, mvfx0
-	cfldr32		mvfx0, [r0, #CRUNCH_MVAX3M]
-	cfmvam32	mvax3, mvfx0
-	cfldr32		mvfx0, [r0, #CRUNCH_MVAX3H]
-	cfmvah32	mvax3, mvfx0
-
-	cfldr64		mvdx0, [r0, #CRUNCH_MVDX0]	@ load 64b registers
-	cfldr64		mvdx1, [r0, #CRUNCH_MVDX1]
-	cfldr64		mvdx2, [r0, #CRUNCH_MVDX2]
-	cfldr64		mvdx3, [r0, #CRUNCH_MVDX3]
-	cfldr64		mvdx4, [r0, #CRUNCH_MVDX4]
-	cfldr64		mvdx5, [r0, #CRUNCH_MVDX5]
-	cfldr64		mvdx6, [r0, #CRUNCH_MVDX6]
-	cfldr64		mvdx7, [r0, #CRUNCH_MVDX7]
-	cfldr64		mvdx8, [r0, #CRUNCH_MVDX8]
-	cfldr64		mvdx9, [r0, #CRUNCH_MVDX9]
-	cfldr64		mvdx10, [r0, #CRUNCH_MVDX10]
-	cfldr64		mvdx11, [r0, #CRUNCH_MVDX11]
-	cfldr64		mvdx12, [r0, #CRUNCH_MVDX12]
-	cfldr64		mvdx13, [r0, #CRUNCH_MVDX13]
-	cfldr64		mvdx14, [r0, #CRUNCH_MVDX14]
-	cfldr64		mvdx15, [r0, #CRUNCH_MVDX15]
-
-1:
-#ifdef CONFIG_PREEMPT_COUNT
-	get_thread_info r10
-#endif
-2:	dec_preempt_count r10, r3
-	ret	lr
-
-/*
- * Back up crunch regs to save area and disable access to them
- * (mainly for gdb or sleep mode usage)
- *
- * r0 = struct thread_info pointer of target task or NULL for any
- */
-ENTRY(crunch_task_disable)
-	stmfd	sp!, {r4, r5, lr}
-
-	mrs	ip, cpsr
-	orr	r2, ip, #PSR_I_BIT		@ disable interrupts
-	msr	cpsr_c, r2
-
-	ldr	r4, =(EP93XX_APB_VIRT_BASE + 0x00130000)	@ syscon addr
-
-	ldr	r3, =crunch_owner
-	add	r2, r0, #TI_CRUNCH_STATE	@ get task crunch save area
-	ldr	r1, [r3]			@ get current crunch owner
-	teq	r1, #0				@ any current owner?
-	beq	1f				@ no: quit
-	teq	r0, #0				@ any owner?
-	teqne	r1, r2				@ or specified one?
-	bne	1f				@ no: quit
-
-	ldr	r5, [r4, #0x80]			@ enable access to crunch
-	mov	r2, #0xaa
-	str	r2, [r4, #0xc0]
-	orr	r5, r5, #0x00800000
-	str	r5, [r4, #0x80]
-
-	mov	r0, #0				@ nothing to load
-	str	r0, [r3]			@ no more current owner
-	ldr	r2, [r4, #0x80]			@ flush out enable (@@@)
-	mov	r2, r2
-	bl	crunch_save
-
-	mov	r2, #0xaa			@ disable access to crunch
-	str	r2, [r4, #0xc0]
-	bic	r5, r5, #0x00800000
-	str	r5, [r4, #0x80]
-	ldr	r5, [r4, #0x80]			@ flush out enable (@@@)
-	mov	r5, r5
-
-1:	msr	cpsr_c, ip			@ restore interrupt mode
-	ldmfd	sp!, {r4, r5, pc}
-
-/*
- * Copy crunch state to given memory address
- *
- * r0 = struct thread_info pointer of target task
- * r1 = memory address where to store crunch state
- *
- * this is called mainly in the creation of signal stack frames
- */
-ENTRY(crunch_task_copy)
-	mrs	ip, cpsr
-	orr	r2, ip, #PSR_I_BIT		@ disable interrupts
-	msr	cpsr_c, r2
-
-	ldr	r3, =crunch_owner
-	add	r2, r0, #TI_CRUNCH_STATE	@ get task crunch save area
-	ldr	r3, [r3]			@ get current crunch owner
-	teq	r2, r3				@ does this task own it...
-	beq	1f
-
-	@ current crunch values are in the task save area
-	msr	cpsr_c, ip			@ restore interrupt mode
-	mov	r0, r1
-	mov	r1, r2
-	mov	r2, #CRUNCH_SIZE
-	b	memcpy
-
-1:	@ this task owns crunch regs -- grab a copy from there
-	mov	r0, #0				@ nothing to load
-	mov	r3, lr				@ preserve return address
-	bl	crunch_save
-	msr	cpsr_c, ip			@ restore interrupt mode
-	ret	r3
-
-/*
- * Restore crunch state from given memory address
- *
- * r0 = struct thread_info pointer of target task
- * r1 = memory address where to get crunch state from
- *
- * this is used to restore crunch state when unwinding a signal stack frame
- */
-ENTRY(crunch_task_restore)
-	mrs	ip, cpsr
-	orr	r2, ip, #PSR_I_BIT		@ disable interrupts
-	msr	cpsr_c, r2
-
-	ldr	r3, =crunch_owner
-	add	r2, r0, #TI_CRUNCH_STATE	@ get task crunch save area
-	ldr	r3, [r3]			@ get current crunch owner
-	teq	r2, r3				@ does this task own it...
-	beq	1f
-
-	@ this task doesn't own crunch regs -- use its save area
-	msr	cpsr_c, ip			@ restore interrupt mode
-	mov	r0, r2
-	mov	r2, #CRUNCH_SIZE
-	b	memcpy
-
-1:	@ this task owns crunch regs -- load them directly
-	mov	r0, r1
-	mov	r1, #0				@ nothing to save
-	mov	r3, lr				@ preserve return address
-	bl	crunch_load
-	msr	cpsr_c, ip			@ restore interrupt mode
-	ret	r3
diff --git a/arch/arm/mach-exynos/exynos-smc.S b/arch/arm/mach-exynos/exynos-smc.S
deleted file mode 100644
index 6da31e6a7acbc74397020a2bf9498f8eb223a3f5..0000000000000000000000000000000000000000
--- a/arch/arm/mach-exynos/exynos-smc.S
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2012 Samsung Electronics.
- *
- * Copied from omap-smc.S Copyright (C) 2010 Texas Instruments, Inc.
- */
-
-#include <linux/linkage.h>
-
-/*
- * Function signature: void exynos_smc(u32 cmd, u32 arg1, u32 arg2, u32 arg3)
- */
-	.arch armv7-a
-	.arch_extension sec
-ENTRY(exynos_smc)
-	stmfd	sp!, {r4-r11, lr}
-	dsb
-	smc	#0
-	ldmfd	sp!, {r4-r11, pc}
-ENDPROC(exynos_smc)
diff --git a/arch/arm/mach-exynos/headsmp.S b/arch/arm/mach-exynos/headsmp.S
deleted file mode 100644
index 0ac2cb9a735568613c3dc7cdd52b599945e0134c..0000000000000000000000000000000000000000
--- a/arch/arm/mach-exynos/headsmp.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  Cloned from linux/arch/arm/mach-realview/headsmp.S
- *
- *  Copyright (c) 2003 ARM Limited
- *  All Rights Reserved
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-
-#include <asm/assembler.h>
-
-/*
- * exynos4 specific entry point for secondary CPUs.  This provides
- * a "holding pen" into which all secondary cores are held until we're
- * ready for them to initialise.
- */
-ENTRY(exynos4_secondary_startup)
-ARM_BE8(setend	be)
-	mrc	p15, 0, r0, c0, c0, 5
-	and	r0, r0, #15
-	adr	r4, 1f
-	ldmia	r4, {r5, r6}
-	sub	r4, r4, r5
-	add	r6, r6, r4
-pen:	ldr	r7, [r6]
-	cmp	r7, r0
-	bne	pen
-
-	/*
-	 * we've been released from the holding pen: secondary_stack
-	 * should now contain the SVC stack for this core
-	 */
-	b	secondary_startup
-ENDPROC(exynos4_secondary_startup)
-
-	.align 2
-1:	.long	.
-	.long	exynos_pen_release
diff --git a/arch/arm/mach-exynos/sleep.S b/arch/arm/mach-exynos/sleep.S
deleted file mode 100644
index ed93f91853b8cddc3c3e869adeaee1bcb1fe3650..0000000000000000000000000000000000000000
--- a/arch/arm/mach-exynos/sleep.S
+++ /dev/null
@@ -1,125 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * Copyright (c) 2013 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com
- *
- * Exynos low-level resume code
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/hardware/cache-l2x0.h>
-#include "smc.h"
-
-#define CPU_MASK	0xff0ffff0
-#define CPU_CORTEX_A9	0x410fc090
-
-	.text
-	.align
-
-	/*
-	 * sleep magic, to allow the bootloader to check for an valid
-	 * image to resume to. Must be the first word before the
-	 * exynos_cpu_resume entry.
-	 */
-
-	.word	0x2bedf00d
-
-	/*
-	 * exynos_cpu_resume
-	 *
-	 * resume code entry for bootloader to call
-	 */
-
-ENTRY(exynos_cpu_resume)
-#ifdef CONFIG_CACHE_L2X0
-	mrc	p15, 0, r0, c0, c0, 0
-	ldr	r1, =CPU_MASK
-	and	r0, r0, r1
-	ldr	r1, =CPU_CORTEX_A9
-	cmp	r0, r1
-	bleq	l2c310_early_resume
-#endif
-	b	cpu_resume
-ENDPROC(exynos_cpu_resume)
-
-	.align
-	.arch armv7-a
-	.arch_extension sec
-ENTRY(exynos_cpu_resume_ns)
-	mrc	p15, 0, r0, c0, c0, 0
-	ldr	r1, =CPU_MASK
-	and	r0, r0, r1
-	ldr	r1, =CPU_CORTEX_A9
-	cmp	r0, r1
-	bne	skip_cp15
-
-	adr	r0, _cp15_save_power
-	ldr	r1, [r0]
-	ldr	r1, [r0, r1]
-	adr	r0, _cp15_save_diag
-	ldr	r2, [r0]
-	ldr	r2, [r0, r2]
-	mov	r0, #SMC_CMD_C15RESUME
-	dsb
-	smc	#0
-#ifdef CONFIG_CACHE_L2X0
-	adr	r0, 1f
-	ldr	r2, [r0]
-	add	r0, r2, r0
-
-	/* Check that the address has been initialised. */
-	ldr	r1, [r0, #L2X0_R_PHY_BASE]
-	teq	r1, #0
-	beq	skip_l2x0
-
-	/* Check if controller has been enabled. */
-	ldr	r2, [r1, #L2X0_CTRL]
-	tst	r2, #0x1
-	bne	skip_l2x0
-
-	ldr	r1, [r0, #L2X0_R_TAG_LATENCY]
-	ldr	r2, [r0, #L2X0_R_DATA_LATENCY]
-	ldr	r3, [r0, #L2X0_R_PREFETCH_CTRL]
-	mov	r0, #SMC_CMD_L2X0SETUP1
-	smc	#0
-
-	/* Reload saved regs pointer because smc corrupts registers. */
-	adr	r0, 1f
-	ldr	r2, [r0]
-	add	r0, r2, r0
-
-	ldr	r1, [r0, #L2X0_R_PWR_CTRL]
-	ldr	r2, [r0, #L2X0_R_AUX_CTRL]
-	mov	r0, #SMC_CMD_L2X0SETUP2
-	smc	#0
-
-	mov	r0, #SMC_CMD_L2X0INVALL
-	smc	#0
-
-	mov	r1, #1
-	mov	r0, #SMC_CMD_L2X0CTRL
-	smc	#0
-skip_l2x0:
-#endif /* CONFIG_CACHE_L2X0 */
-skip_cp15:
-	b	cpu_resume
-ENDPROC(exynos_cpu_resume_ns)
-
-	.align
-_cp15_save_power:
-	.long	cp15_save_power - .
-_cp15_save_diag:
-	.long	cp15_save_diag - .
-#ifdef CONFIG_CACHE_L2X0
-1:	.long	l2x0_saved_regs - .
-#endif /* CONFIG_CACHE_L2X0 */
-
-	.data
-	.align	2
-	.globl cp15_save_diag
-cp15_save_diag:
-	.long	0	@ cp15 diagnostic
-	.globl cp15_save_power
-cp15_save_power:
-	.long	0	@ cp15 power control
diff --git a/arch/arm/mach-footbridge/include/mach/entry-macro.S b/arch/arm/mach-footbridge/include/mach/entry-macro.S
deleted file mode 100644
index dabbd5c54a788f5529d81b2942a974bc66d86018..0000000000000000000000000000000000000000
--- a/arch/arm/mach-footbridge/include/mach/entry-macro.S
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * arch/arm/mach-footbridge/include/mach/entry-macro.S
- *
- * Low-level IRQ helper macros for footbridge-based platforms
- *
- * This file is licensed under  the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-#include <mach/hardware.h>
-#include <mach/irqs.h>
-#include <asm/hardware/dec21285.h>
-
-		.equ	dc21285_high, ARMCSR_BASE & 0xff000000
-		.equ	dc21285_low, ARMCSR_BASE & 0x00ffffff
-
-		.macro  get_irqnr_preamble, base, tmp
-		mov	\base, #dc21285_high
-		.if	dc21285_low
-		orr	\base, \base, #dc21285_low
-		.endif
-		.endm
-
-		.macro	get_irqnr_and_base, irqnr, irqstat, base, tmp
-		ldr	\irqstat, [\base, #0x180]	@ get interrupts
-
-		mov	\irqnr, #IRQ_SDRAMPARITY
-		tst	\irqstat, #IRQ_MASK_SDRAMPARITY
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_UART_RX
-		movne	\irqnr, #IRQ_CONRX
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_DMA1
-		movne	\irqnr, #IRQ_DMA1
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_DMA2
-		movne	\irqnr, #IRQ_DMA2
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_IN0
-		movne	\irqnr, #IRQ_IN0
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_IN1
-		movne	\irqnr, #IRQ_IN1
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_IN2
-		movne	\irqnr, #IRQ_IN2
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_IN3
-		movne	\irqnr, #IRQ_IN3
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_PCI
-		movne	\irqnr, #IRQ_PCI
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_DOORBELLHOST
-		movne	\irqnr, #IRQ_DOORBELLHOST
-		bne     1001f
-
-		tst	\irqstat, #IRQ_MASK_I2OINPOST
-		movne	\irqnr, #IRQ_I2OINPOST
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_TIMER1
-		movne	\irqnr, #IRQ_TIMER1
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_TIMER2
-		movne	\irqnr, #IRQ_TIMER2
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_TIMER3
-		movne	\irqnr, #IRQ_TIMER3
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_UART_TX
-		movne	\irqnr, #IRQ_CONTX
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_PCI_ABORT
-		movne	\irqnr, #IRQ_PCI_ABORT
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_PCI_SERR
-		movne	\irqnr, #IRQ_PCI_SERR
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_DISCARD_TIMER
-		movne	\irqnr, #IRQ_DISCARD_TIMER
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_PCI_DPERR
-		movne	\irqnr, #IRQ_PCI_DPERR
-		bne	1001f
-
-		tst	\irqstat, #IRQ_MASK_PCI_PERR
-		movne	\irqnr, #IRQ_PCI_PERR
-1001:
-		.endm
-
diff --git a/arch/arm/mach-highbank/smc.S b/arch/arm/mach-highbank/smc.S
deleted file mode 100644
index 78b3f19e7f37fe83edb25a723037417c1d2d7bd8..0000000000000000000000000000000000000000
--- a/arch/arm/mach-highbank/smc.S
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copied from omap44xx-smc.S Copyright (C) 2010 Texas Instruments, Inc.
- * Copyright 2012 Calxeda, Inc.
- */
-
-#include <linux/linkage.h>
-
-/*
- * This is common routine to manage secure monitor API
- * used to modify the PL310 secure registers.
- * 'r0' contains the value to be modified and 'r12' contains
- * the monitor API number.
- * Function signature : void highbank_smc1(u32 fn, u32 arg)
- */
-	.arch armv7-a
-	.arch_extension sec
-ENTRY(highbank_smc1)
-	stmfd   sp!, {r4-r11, lr}
-	mov	r12, r0
-	mov 	r0, r1
-	dsb
-	smc	#0
-	ldmfd   sp!, {r4-r11, pc}
-ENDPROC(highbank_smc1)
diff --git a/arch/arm/mach-imx/headsmp.S b/arch/arm/mach-imx/headsmp.S
deleted file mode 100644
index 766dbdb2ae27b9eb51cca555c8170103d17dde17..0000000000000000000000000000000000000000
--- a/arch/arm/mach-imx/headsmp.S
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright 2011 Freescale Semiconductor, Inc.
- * Copyright 2011 Linaro Ltd.
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-
-diag_reg_offset:
-	.word	g_diag_reg - .
-
-	.macro	set_diag_reg
-	adr	r0, diag_reg_offset
-	ldr	r1, [r0]
-	add	r1, r1, r0		@ r1 = physical &g_diag_reg
-	ldr	r0, [r1]
-	mcr	p15, 0, r0, c15, c0, 1	@ write diagnostic register
-	.endm
-
-ENTRY(v7_secondary_startup)
-ARM_BE8(setend be)			@ go BE8 if entered LE
-	set_diag_reg
-	b	secondary_startup
-ENDPROC(v7_secondary_startup)
diff --git a/arch/arm/mach-imx/resume-imx6.S b/arch/arm/mach-imx/resume-imx6.S
deleted file mode 100644
index 5bd1ba7ef15b61cb98d1bd2d3af4e85acc5ec632..0000000000000000000000000000000000000000
--- a/arch/arm/mach-imx/resume-imx6.S
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright 2014 Freescale Semiconductor, Inc.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/hardware/cache-l2x0.h>
-#include "hardware.h"
-
-/*
- * The following code must assume it is running from physical address
- * where absolute virtual addresses to the data section have to be
- * turned into relative ones.
- */
-
-ENTRY(v7_cpu_resume)
-	bl	v7_invalidate_l1
-#ifdef CONFIG_CACHE_L2X0
-	bl	l2c310_early_resume
-#endif
-	b	cpu_resume
-ENDPROC(v7_cpu_resume)
diff --git a/arch/arm/mach-imx/ssi-fiq.S b/arch/arm/mach-imx/ssi-fiq.S
deleted file mode 100644
index 68d7fdea92ad6c3c37284b78778001de5522f290..0000000000000000000000000000000000000000
--- a/arch/arm/mach-imx/ssi-fiq.S
+++ /dev/null
@@ -1,144 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) 2009 Sascha Hauer <s.hauer@pengutronix.de>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * r8  = bit 0-15: tx offset, bit 16-31: tx buffer size
- * r9  = bit 0-15: rx offset, bit 16-31: rx buffer size
- */
-
-#define SSI_STX0	0x00
-#define SSI_SRX0	0x08
-#define SSI_SISR	0x14
-#define SSI_SIER	0x18
-#define SSI_SACNT	0x38
-
-#define SSI_SACNT_AC97EN	(1 << 0)
-
-#define SSI_SIER_TFE0_EN	(1 << 0)
-#define SSI_SISR_TFE0		(1 << 0)
-#define SSI_SISR_RFF0		(1 << 2)
-#define SSI_SIER_RFF0_EN	(1 << 2)
-
-		.text
-		.global	imx_ssi_fiq_start
-		.global	imx_ssi_fiq_end
-		.global imx_ssi_fiq_base
-		.global imx_ssi_fiq_rx_buffer
-		.global imx_ssi_fiq_tx_buffer
-
-/*
- * imx_ssi_fiq_start is _intentionally_ not marked as a function symbol
- * using ENDPROC().  imx_ssi_fiq_start and imx_ssi_fiq_end are used to
- * mark the function body so that it can be copied to the FIQ vector in
- * the vectors page.  imx_ssi_fiq_start should only be called as the result
- * of an FIQ: calling it directly will not work.
- */
-imx_ssi_fiq_start:
-		ldr r12, .L_imx_ssi_fiq_base
-
-		/* TX */
-		ldr r13, .L_imx_ssi_fiq_tx_buffer
-
-		/* shall we send? */
-		ldr r11, [r12, #SSI_SIER]
-		tst r11, #SSI_SIER_TFE0_EN
-		beq 1f
-
-		/* TX FIFO empty? */
-		ldr r11, [r12, #SSI_SISR]
-		tst r11, #SSI_SISR_TFE0
-		beq 1f
-
-		mov r10, #0x10000
-		sub r10, #1
-		and r10, r10, r8	/* r10: current buffer offset */
-
-		add r13, r13, r10
-
-		ldrh r11, [r13]
-		strh r11, [r12, #SSI_STX0]
-
-		ldrh r11, [r13, #2]
-		strh r11, [r12, #SSI_STX0]
-
-		ldrh r11, [r13, #4]
-		strh r11, [r12, #SSI_STX0]
-
-		ldrh r11, [r13, #6]
-		strh r11, [r12, #SSI_STX0]
-
-		add r10, #8
-		lsr r11, r8, #16	/* r11: buffer size */
-		cmp r10, r11
-		lslgt r8, r11, #16
-		addle r8, #8
-1:
-		/* RX */
-
-		/* shall we receive? */
-		ldr r11, [r12, #SSI_SIER]
-		tst r11, #SSI_SIER_RFF0_EN
-		beq 1f
-
-		/* RX FIFO full? */
-		ldr r11, [r12, #SSI_SISR]
-		tst r11, #SSI_SISR_RFF0
-		beq 1f
-
-		ldr r13, .L_imx_ssi_fiq_rx_buffer
-
-		mov r10, #0x10000
-		sub r10, #1
-		and r10, r10, r9	/* r10: current buffer offset */
-
-		add r13, r13, r10
-
-		ldr r11, [r12, #SSI_SACNT]
-		tst r11, #SSI_SACNT_AC97EN
-
-		ldr r11, [r12, #SSI_SRX0]
-		strh r11, [r13]
-
-		ldr r11, [r12, #SSI_SRX0]
-		strh r11, [r13, #2]
-
-		/* dummy read to skip slot 12 */
-		ldrne r11, [r12, #SSI_SRX0]
-
-		ldr r11, [r12, #SSI_SRX0]
-		strh r11, [r13, #4]
-
-		ldr r11, [r12, #SSI_SRX0]
-		strh r11, [r13, #6]
-
-		/* dummy read to skip slot 12 */
-		ldrne r11, [r12, #SSI_SRX0]
-
-		add r10, #8
-		lsr r11, r9, #16	/* r11: buffer size */
-		cmp r10, r11
-		lslgt r9, r11, #16
-		addle r9, #8
-
-1:
-		@ return from FIQ
-		subs	pc, lr, #4
-
-		.align
-.L_imx_ssi_fiq_base:
-imx_ssi_fiq_base:
-		.word 0x0
-.L_imx_ssi_fiq_rx_buffer:
-imx_ssi_fiq_rx_buffer:
-		.word 0x0
-.L_imx_ssi_fiq_tx_buffer:
-imx_ssi_fiq_tx_buffer:
-		.word 0x0
-.L_imx_ssi_fiq_end:
-imx_ssi_fiq_end:
-
diff --git a/arch/arm/mach-imx/suspend-imx53.S b/arch/arm/mach-imx/suspend-imx53.S
deleted file mode 100644
index 41b8aad653634074ea592953dc661b3a8ace10d4..0000000000000000000000000000000000000000
--- a/arch/arm/mach-imx/suspend-imx53.S
+++ /dev/null
@@ -1,134 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2008-2011 Freescale Semiconductor, Inc.
- */
-/*
- */
-
-#include <linux/linkage.h>
-
-#define M4IF_MCR0_OFFSET			(0x008C)
-#define M4IF_MCR0_FDVFS				(0x1 << 11)
-#define M4IF_MCR0_FDVACK			(0x1 << 27)
-
-	.align 3
-
-/*
- * ==================== low level suspend ====================
- *
- * On entry
- * r0: pm_info structure address;
- *
- * suspend ocram space layout:
- * ======================== high address ======================
- *                              .
- *                              .
- *                              .
- *                              ^
- *                              ^
- *                              ^
- *                      imx53_suspend code
- *              PM_INFO structure(imx53_suspend_info)
- * ======================== low address =======================
- */
-
-/* Offsets of members of struct imx53_suspend_info */
-#define SUSPEND_INFO_MX53_M4IF_V_OFFSET		0x0
-#define SUSPEND_INFO_MX53_IOMUXC_V_OFFSET	0x4
-#define SUSPEND_INFO_MX53_IO_COUNT_OFFSET	0x8
-#define SUSPEND_INFO_MX53_IO_STATE_OFFSET	0xc
-
-ENTRY(imx53_suspend)
-	stmfd	sp!, {r4,r5,r6,r7}
-
-	/* Save pad config */
-	ldr	r1, [r0, #SUSPEND_INFO_MX53_IO_COUNT_OFFSET]
-	cmp	r1, #0
-	beq	skip_pad_conf_1
-
-	add	r2, r0, #SUSPEND_INFO_MX53_IO_STATE_OFFSET
-	ldr	r3, [r0, #SUSPEND_INFO_MX53_IOMUXC_V_OFFSET]
-
-1:
-	ldr	r5, [r2], #12	/* IOMUXC register offset */
-	ldr	r6, [r3, r5]	/* current value */
-	str	r6, [r2], #4	/* save area */
-	subs	r1, r1, #1
-	bne	1b
-
-skip_pad_conf_1:
-	/* Set FDVFS bit of M4IF_MCR0 to request DDR to enter self-refresh */
-	ldr	r1, [r0, #SUSPEND_INFO_MX53_M4IF_V_OFFSET]
-	ldr	r2,[r1, #M4IF_MCR0_OFFSET]
-	orr	r2, r2, #M4IF_MCR0_FDVFS
-	str	r2,[r1, #M4IF_MCR0_OFFSET]
-
-	/* Poll FDVACK bit of M4IF_MCR to wait for DDR to enter self-refresh */
-wait_sr_ack:
-	ldr	r2,[r1, #M4IF_MCR0_OFFSET]
-	ands	r2, r2, #M4IF_MCR0_FDVACK
-	beq	wait_sr_ack
-
-	/* Set pad config */
-	ldr	r1, [r0, #SUSPEND_INFO_MX53_IO_COUNT_OFFSET]
-	cmp	r1, #0
-	beq	skip_pad_conf_2
-
-	add	r2, r0, #SUSPEND_INFO_MX53_IO_STATE_OFFSET
-	ldr	r3, [r0, #SUSPEND_INFO_MX53_IOMUXC_V_OFFSET]
-
-2:
-	ldr	r5, [r2], #4	/* IOMUXC register offset */
-	ldr	r6, [r2], #4	/* clear */
-	ldr	r7, [r3, r5]
-	bic	r7, r7, r6
-	ldr	r6, [r2], #8	/* set */
-	orr	r7, r7, r6
-	str	r7, [r3, r5]
-	subs	r1, r1, #1
-	bne	2b
-
-skip_pad_conf_2:
-	/* Zzz, enter stop mode */
-	wfi
-	nop
-	nop
-	nop
-	nop
-
-	/* Restore pad config */
-	ldr	r1, [r0, #SUSPEND_INFO_MX53_IO_COUNT_OFFSET]
-	cmp	r1, #0
-	beq	skip_pad_conf_3
-
-	add	r2, r0, #SUSPEND_INFO_MX53_IO_STATE_OFFSET
-	ldr	r3, [r0, #SUSPEND_INFO_MX53_IOMUXC_V_OFFSET]
-
-3:
-	ldr	r5, [r2], #12	/* IOMUXC register offset */
-	ldr	r6, [r2], #4	/* saved value */
-	str	r6, [r3, r5]
-	subs	r1, r1, #1
-	bne	3b
-
-skip_pad_conf_3:
-	/* Clear FDVFS bit of M4IF_MCR0 to request DDR to exit self-refresh */
-	ldr	r1, [r0, #SUSPEND_INFO_MX53_M4IF_V_OFFSET]
-	ldr	r2,[r1, #M4IF_MCR0_OFFSET]
-	bic	r2, r2, #M4IF_MCR0_FDVFS
-	str	r2,[r1, #M4IF_MCR0_OFFSET]
-
-	/* Poll FDVACK bit of M4IF_MCR to wait for DDR to exit self-refresh */
-wait_ar_ack:
-	ldr	r2,[r1, #M4IF_MCR0_OFFSET]
-	ands	r2, r2, #M4IF_MCR0_FDVACK
-	bne	wait_ar_ack
-
-	/* Restore registers */
-	ldmfd	sp!, {r4,r5,r6,r7}
-	mov	pc, lr
-
-ENDPROC(imx53_suspend)
-
-ENTRY(imx53_suspend_sz)
-        .word   . - imx53_suspend
diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S
deleted file mode 100644
index e06f946b75b96a9455d34facbf019b16121edb82..0000000000000000000000000000000000000000
--- a/arch/arm/mach-imx/suspend-imx6.S
+++ /dev/null
@@ -1,330 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright 2014 Freescale Semiconductor, Inc.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/hardware/cache-l2x0.h>
-#include "hardware.h"
-
-/*
- * ==================== low level suspend ====================
- *
- * Better to follow below rules to use ARM registers:
- * r0: pm_info structure address;
- * r1 ~ r4: for saving pm_info members;
- * r5 ~ r10: free registers;
- * r11: io base address.
- *
- * suspend ocram space layout:
- * ======================== high address ======================
- *                              .
- *                              .
- *                              .
- *                              ^
- *                              ^
- *                              ^
- *                      imx6_suspend code
- *              PM_INFO structure(imx6_cpu_pm_info)
- * ======================== low address =======================
- */
-
-/*
- * Below offsets are based on struct imx6_cpu_pm_info
- * which defined in arch/arm/mach-imx/pm-imx6q.c, this
- * structure contains necessary pm info for low level
- * suspend related code.
- */
-#define PM_INFO_PBASE_OFFSET			0x0
-#define PM_INFO_RESUME_ADDR_OFFSET		0x4
-#define PM_INFO_DDR_TYPE_OFFSET			0x8
-#define PM_INFO_PM_INFO_SIZE_OFFSET		0xC
-#define PM_INFO_MX6Q_MMDC_P_OFFSET		0x10
-#define PM_INFO_MX6Q_MMDC_V_OFFSET		0x14
-#define PM_INFO_MX6Q_SRC_P_OFFSET		0x18
-#define PM_INFO_MX6Q_SRC_V_OFFSET		0x1C
-#define PM_INFO_MX6Q_IOMUXC_P_OFFSET		0x20
-#define PM_INFO_MX6Q_IOMUXC_V_OFFSET		0x24
-#define PM_INFO_MX6Q_CCM_P_OFFSET		0x28
-#define PM_INFO_MX6Q_CCM_V_OFFSET		0x2C
-#define PM_INFO_MX6Q_GPC_P_OFFSET		0x30
-#define PM_INFO_MX6Q_GPC_V_OFFSET		0x34
-#define PM_INFO_MX6Q_L2_P_OFFSET		0x38
-#define PM_INFO_MX6Q_L2_V_OFFSET		0x3C
-#define PM_INFO_MMDC_IO_NUM_OFFSET		0x40
-#define PM_INFO_MMDC_IO_VAL_OFFSET		0x44
-
-#define MX6Q_SRC_GPR1	0x20
-#define MX6Q_SRC_GPR2	0x24
-#define MX6Q_MMDC_MAPSR	0x404
-#define MX6Q_MMDC_MPDGCTRL0	0x83c
-#define MX6Q_GPC_IMR1	0x08
-#define MX6Q_GPC_IMR2	0x0c
-#define MX6Q_GPC_IMR3	0x10
-#define MX6Q_GPC_IMR4	0x14
-#define MX6Q_CCM_CCR	0x0
-
-	.align 3
-	.arm
-
-	.macro  sync_l2_cache
-
-	/* sync L2 cache to drain L2's buffers to DRAM. */
-#ifdef CONFIG_CACHE_L2X0
-	ldr	r11, [r0, #PM_INFO_MX6Q_L2_V_OFFSET]
-	teq	r11, #0
-	beq	6f
-	mov	r6, #0x0
-	str	r6, [r11, #L2X0_CACHE_SYNC]
-1:
-	ldr	r6, [r11, #L2X0_CACHE_SYNC]
-	ands	r6, r6, #0x1
-	bne	1b
-6:
-#endif
-
-	.endm
-
-	.macro	resume_mmdc
-
-	/* restore MMDC IO */
-	cmp	r5, #0x0
-	ldreq	r11, [r0, #PM_INFO_MX6Q_IOMUXC_V_OFFSET]
-	ldrne	r11, [r0, #PM_INFO_MX6Q_IOMUXC_P_OFFSET]
-
-	ldr	r6, [r0, #PM_INFO_MMDC_IO_NUM_OFFSET]
-	ldr	r7, =PM_INFO_MMDC_IO_VAL_OFFSET
-	add	r7, r7, r0
-1:
-	ldr	r8, [r7], #0x4
-	ldr	r9, [r7], #0x4
-	str	r9, [r11, r8]
-	subs	r6, r6, #0x1
-	bne	1b
-
-	cmp	r5, #0x0
-	ldreq	r11, [r0, #PM_INFO_MX6Q_MMDC_V_OFFSET]
-	ldrne	r11, [r0, #PM_INFO_MX6Q_MMDC_P_OFFSET]
-
-	cmp	r3, #IMX_DDR_TYPE_LPDDR2
-	bne	4f
-
-	/* reset read FIFO, RST_RD_FIFO */
-	ldr	r7, =MX6Q_MMDC_MPDGCTRL0
-	ldr	r6, [r11, r7]
-	orr     r6, r6, #(1 << 31)
-	str	r6, [r11, r7]
-2:
-	ldr	r6, [r11, r7]
-	ands	r6, r6, #(1 << 31)
-	bne	2b
-
-	/* reset FIFO a second time */
-	ldr	r6, [r11, r7]
-	orr     r6, r6, #(1 << 31)
-	str	r6, [r11, r7]
-3:
-	ldr	r6, [r11, r7]
-	ands	r6, r6, #(1 << 31)
-	bne	3b
-4:
-	/* let DDR out of self-refresh */
-	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
-	bic	r7, r7, #(1 << 21)
-	str	r7, [r11, #MX6Q_MMDC_MAPSR]
-5:
-	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
-	ands	r7, r7, #(1 << 25)
-	bne	5b
-
-	/* enable DDR auto power saving */
-	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
-	bic	r7, r7, #0x1
-	str	r7, [r11, #MX6Q_MMDC_MAPSR]
-
-	.endm
-
-ENTRY(imx6_suspend)
-	ldr	r1, [r0, #PM_INFO_PBASE_OFFSET]
-	ldr	r2, [r0, #PM_INFO_RESUME_ADDR_OFFSET]
-	ldr	r3, [r0, #PM_INFO_DDR_TYPE_OFFSET]
-	ldr	r4, [r0, #PM_INFO_PM_INFO_SIZE_OFFSET]
-
-	/*
-	 * counting the resume address in iram
-	 * to set it in SRC register.
-	 */
-	ldr	r6, =imx6_suspend
-	ldr	r7, =resume
-	sub	r7, r7, r6
-	add	r8, r1, r4
-	add	r9, r8, r7
-
-	/*
-	 * make sure TLB contain the addr we want,
-	 * as we will access them after MMDC IO floated.
-	 */
-
-	ldr	r11, [r0, #PM_INFO_MX6Q_CCM_V_OFFSET]
-	ldr	r6, [r11, #0x0]
-	ldr	r11, [r0, #PM_INFO_MX6Q_GPC_V_OFFSET]
-	ldr	r6, [r11, #0x0]
-	ldr	r11, [r0, #PM_INFO_MX6Q_IOMUXC_V_OFFSET]
-	ldr	r6, [r11, #0x0]
-
-	/* use r11 to store the IO address */
-	ldr	r11, [r0, #PM_INFO_MX6Q_SRC_V_OFFSET]
-	/* store physical resume addr and pm_info address. */
-	str	r9, [r11, #MX6Q_SRC_GPR1]
-	str	r1, [r11, #MX6Q_SRC_GPR2]
-
-	/* need to sync L2 cache before DSM. */
-	sync_l2_cache
-
-	ldr	r11, [r0, #PM_INFO_MX6Q_MMDC_V_OFFSET]
-	/*
-	 * put DDR explicitly into self-refresh and
-	 * disable automatic power savings.
-	 */
-	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
-	orr	r7, r7, #0x1
-	str	r7, [r11, #MX6Q_MMDC_MAPSR]
-
-	/* make the DDR explicitly enter self-refresh. */
-	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
-	orr	r7, r7, #(1 << 21)
-	str	r7, [r11, #MX6Q_MMDC_MAPSR]
-
-poll_dvfs_set:
-	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
-	ands	r7, r7, #(1 << 25)
-	beq	poll_dvfs_set
-
-	ldr	r11, [r0, #PM_INFO_MX6Q_IOMUXC_V_OFFSET]
-	ldr	r6, =0x0
-	ldr	r7, [r0, #PM_INFO_MMDC_IO_NUM_OFFSET]
-	ldr	r8, =PM_INFO_MMDC_IO_VAL_OFFSET
-	add	r8, r8, r0
-	/* LPDDR2's last 3 IOs need special setting */
-	cmp	r3, #IMX_DDR_TYPE_LPDDR2
-	subeq	r7, r7, #0x3
-set_mmdc_io_lpm:
-	ldr	r9, [r8], #0x8
-	str	r6, [r11, r9]
-	subs	r7, r7, #0x1
-	bne	set_mmdc_io_lpm
-
-	cmp 	r3, #IMX_DDR_TYPE_LPDDR2
-	bne	set_mmdc_io_lpm_done
-	ldr	r6, =0x1000
-	ldr	r9, [r8], #0x8
-	str	r6, [r11, r9]
-	ldr	r9, [r8], #0x8
-	str	r6, [r11, r9]
-	ldr	r6, =0x80000
-	ldr	r9, [r8]
-	str	r6, [r11, r9]
-set_mmdc_io_lpm_done:
-
-	/*
-	 * mask all GPC interrupts before
-	 * enabling the RBC counters to
-	 * avoid the counter starting too
-	 * early if an interupt is already
-	 * pending.
-	 */
-	ldr	r11, [r0, #PM_INFO_MX6Q_GPC_V_OFFSET]
-	ldr	r6, [r11, #MX6Q_GPC_IMR1]
-	ldr	r7, [r11, #MX6Q_GPC_IMR2]
-	ldr	r8, [r11, #MX6Q_GPC_IMR3]
-	ldr	r9, [r11, #MX6Q_GPC_IMR4]
-
-	ldr	r10, =0xffffffff
-	str	r10, [r11, #MX6Q_GPC_IMR1]
-	str	r10, [r11, #MX6Q_GPC_IMR2]
-	str	r10, [r11, #MX6Q_GPC_IMR3]
-	str	r10, [r11, #MX6Q_GPC_IMR4]
-
-	/*
-	 * enable the RBC bypass counter here
-	 * to hold off the interrupts. RBC counter
-	 * = 32 (1ms), Minimum RBC delay should be
-	 * 400us for the analog LDOs to power down.
-	 */
-	ldr	r11, [r0, #PM_INFO_MX6Q_CCM_V_OFFSET]
-	ldr	r10, [r11, #MX6Q_CCM_CCR]
-	bic	r10, r10, #(0x3f << 21)
-	orr	r10, r10, #(0x20 << 21)
-	str	r10, [r11, #MX6Q_CCM_CCR]
-
-	/* enable the counter. */
-	ldr	r10, [r11, #MX6Q_CCM_CCR]
-	orr	r10, r10, #(0x1 << 27)
-	str	r10, [r11, #MX6Q_CCM_CCR]
-
-	/* unmask all the GPC interrupts. */
-	ldr	r11, [r0, #PM_INFO_MX6Q_GPC_V_OFFSET]
-	str	r6, [r11, #MX6Q_GPC_IMR1]
-	str	r7, [r11, #MX6Q_GPC_IMR2]
-	str	r8, [r11, #MX6Q_GPC_IMR3]
-	str	r9, [r11, #MX6Q_GPC_IMR4]
-
-	/*
-	 * now delay for a short while (3usec)
-	 * ARM is at 1GHz at this point
-	 * so a short loop should be enough.
-	 * this delay is required to ensure that
-	 * the RBC counter can start counting in
-	 * case an interrupt is already pending
-	 * or in case an interrupt arrives just
-	 * as ARM is about to assert DSM_request.
-	 */
-	ldr	r6, =2000
-rbc_loop:
-	subs	r6, r6, #0x1
-	bne	rbc_loop
-
-	/* Zzz, enter stop mode */
-	wfi
-	nop
-	nop
-	nop
-	nop
-
-	/*
-	 * run to here means there is pending
-	 * wakeup source, system should auto
-	 * resume, we need to restore MMDC IO first
-	 */
-	mov	r5, #0x0
-	resume_mmdc
-
-	/* return to suspend finish */
-	ret	lr
-
-resume:
-	/* invalidate L1 I-cache first */
-	mov     r6, #0x0
-	mcr     p15, 0, r6, c7, c5, 0
-	mcr     p15, 0, r6, c7, c5, 6
-	/* enable the Icache and branch prediction */
-	mov     r6, #0x1800
-	mcr     p15, 0, r6, c1, c0, 0
-	isb
-
-	/* get physical resume address from pm_info. */
-	ldr	lr, [r0, #PM_INFO_RESUME_ADDR_OFFSET]
-	/* clear core0's entry and parameter */
-	ldr	r11, [r0, #PM_INFO_MX6Q_SRC_P_OFFSET]
-	mov	r7, #0x0
-	str	r7, [r11, #MX6Q_SRC_GPR1]
-	str	r7, [r11, #MX6Q_SRC_GPR2]
-
-	ldr	r3, [r0, #PM_INFO_DDR_TYPE_OFFSET]
-	mov	r5, #0x1
-	resume_mmdc
-
-	ret	lr
-ENDPROC(imx6_suspend)
diff --git a/arch/arm/mach-iop32x/include/mach/entry-macro.S b/arch/arm/mach-iop32x/include/mach/entry-macro.S
deleted file mode 100644
index 8e6766d4621eb7c6bf53afbd575f2eb5ec6f056f..0000000000000000000000000000000000000000
--- a/arch/arm/mach-iop32x/include/mach/entry-macro.S
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * arch/arm/mach-iop32x/include/mach/entry-macro.S
- *
- * Low-level IRQ helper macros for IOP32x-based platforms
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-	.macro get_irqnr_preamble, base, tmp
-	mrc	p15, 0, \tmp, c15, c1, 0
-	orr	\tmp, \tmp, #(1 << 6)
-	mcr	p15, 0, \tmp, c15, c1, 0	@ Enable cp6 access
-	mrc	p15, 0, \tmp, c15, c1, 0
-	mov	\tmp, \tmp
-	sub	pc, pc, #4			@ cp_wait
-	.endm
-
-	.macro  get_irqnr_and_base, irqnr, irqstat, base, tmp
-	mrc     p6, 0, \irqstat, c8, c0, 0	@ Read IINTSRC
-	cmp     \irqstat, #0
-	clzne   \irqnr, \irqstat
-	rsbne   \irqnr, \irqnr, #31
-	.endm
-
-	.macro arch_ret_to_user, tmp1, tmp2
-	mrc	p15, 0, \tmp1, c15, c1, 0
-	ands	\tmp2, \tmp1, #(1 << 6)
-	bicne	\tmp1, \tmp1, #(1 << 6)
-	mcrne	p15, 0, \tmp1, c15, c1, 0	@ Disable cp6 access
-	.endm
diff --git a/arch/arm/mach-keystone/smc.S b/arch/arm/mach-keystone/smc.S
deleted file mode 100644
index 21ef75cf537091fa9262d928e22bfdcda4228666..0000000000000000000000000000000000000000
--- a/arch/arm/mach-keystone/smc.S
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Keystone Secure APIs
- *
- * Copyright (C) 2013 Texas Instruments, Inc.
- * 	Santosh Shilimkar <santosh.shilimkar@ti.com>
- */
-
-#include <linux/linkage.h>
-
-/**
- * u32 keystone_cpu_smc(u32 command, u32 cpu, u32 addr)
- *
- * Low level CPU monitor API
- * @command:	Monitor command.
- * @cpu:	CPU Number
- * @addr:	Kernel jump address for boot CPU
- *
- * Return: Non zero value on failure
- */
-	.arch_extension sec
-ENTRY(keystone_cpu_smc)
-	stmfd   sp!, {r4-r11, lr}
-	smc	#0
-	ldmfd   sp!, {r4-r11, pc}
-ENDPROC(keystone_cpu_smc)
diff --git a/arch/arm/mach-lpc32xx/suspend.S b/arch/arm/mach-lpc32xx/suspend.S
deleted file mode 100644
index 3f0a8282ef6fd2edfb9dd308689ef94350fccd39..0000000000000000000000000000000000000000
--- a/arch/arm/mach-lpc32xx/suspend.S
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * arch/arm/mach-lpc32xx/suspend.S
- *
- * Original authors: Dmitry Chigirev, Vitaly Wool <source@mvista.com>
- * Modified by Kevin Wells <kevin.wells@nxp.com>
- *
- * 2005 (c) MontaVista Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include "lpc32xx.h"
-
-/* Using named register defines makes the code easier to follow */
-#define WORK1_REG			r0
-#define WORK2_REG			r1
-#define SAVED_HCLK_DIV_REG		r2
-#define SAVED_HCLK_PLL_REG		r3
-#define SAVED_DRAM_CLKCTRL_REG		r4
-#define SAVED_PWR_CTRL_REG		r5
-#define CLKPWRBASE_REG			r6
-#define EMCBASE_REG			r7
-
-#define LPC32XX_EMC_STATUS_OFFS		0x04
-#define LPC32XX_EMC_STATUS_BUSY		0x1
-#define LPC32XX_EMC_STATUS_SELF_RFSH	0x4
-
-#define LPC32XX_CLKPWR_PWR_CTRL_OFFS	0x44
-#define LPC32XX_CLKPWR_HCLK_DIV_OFFS	0x40
-#define LPC32XX_CLKPWR_HCLKPLL_CTRL_OFFS 0x58
-
-#define CLKPWR_PCLK_DIV_MASK		0xFFFFFE7F
-
-	.text
-
-ENTRY(lpc32xx_sys_suspend)
-	@ Save a copy of the used registers in IRAM, r0 is corrupted
-	adr	r0, tmp_stack_end
-	stmfd	r0!, {r3 - r7, sp, lr}
-
-	@ Load a few common register addresses
-	adr	WORK1_REG, reg_bases
-	ldr	CLKPWRBASE_REG, [WORK1_REG, #0]
-	ldr	EMCBASE_REG, [WORK1_REG, #4]
-
-	ldr	SAVED_PWR_CTRL_REG, [CLKPWRBASE_REG,\
-		#LPC32XX_CLKPWR_PWR_CTRL_OFFS]
-	orr	WORK1_REG, SAVED_PWR_CTRL_REG, #LPC32XX_CLKPWR_SDRAM_SELF_RFSH
-
-	@ Wait for SDRAM busy status to go busy and then idle
-	@ This guarantees a small windows where DRAM isn't busy
-1:
-	ldr	WORK2_REG, [EMCBASE_REG, #LPC32XX_EMC_STATUS_OFFS]
-	and	WORK2_REG, WORK2_REG, #LPC32XX_EMC_STATUS_BUSY
-	cmp	WORK2_REG, #LPC32XX_EMC_STATUS_BUSY
-	bne	1b @ Branch while idle
-2:
-	ldr	WORK2_REG, [EMCBASE_REG, #LPC32XX_EMC_STATUS_OFFS]
-	and	WORK2_REG, WORK2_REG, #LPC32XX_EMC_STATUS_BUSY
-	cmp	WORK2_REG, #LPC32XX_EMC_STATUS_BUSY
-	beq	2b @ Branch until idle
-
-	@ Setup self-refresh with support for manual exit of
-	@ self-refresh mode
-	str	WORK1_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_PWR_CTRL_OFFS]
-	orr	WORK2_REG, WORK1_REG, #LPC32XX_CLKPWR_UPD_SDRAM_SELF_RFSH
-	str	WORK2_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_PWR_CTRL_OFFS]
-	str	WORK1_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_PWR_CTRL_OFFS]
-
-	@ Wait for self-refresh acknowledge, clocks to the DRAM device
-	@ will automatically stop on start of self-refresh
-3:
-	ldr	WORK2_REG, [EMCBASE_REG, #LPC32XX_EMC_STATUS_OFFS]
-	and	WORK2_REG, WORK2_REG, #LPC32XX_EMC_STATUS_SELF_RFSH
-	cmp	WORK2_REG, #LPC32XX_EMC_STATUS_SELF_RFSH
-	bne	3b @ Branch until self-refresh mode starts
-
-	@ Enter direct-run mode from run mode
-	bic	WORK1_REG, WORK1_REG, #LPC32XX_CLKPWR_SELECT_RUN_MODE
-	str	WORK1_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_PWR_CTRL_OFFS]
-
-	@ Safe disable of DRAM clock in EMC block, prevents DDR sync
-	@ issues on restart
-	ldr	SAVED_HCLK_DIV_REG, [CLKPWRBASE_REG,\
-		#LPC32XX_CLKPWR_HCLK_DIV_OFFS]
-	and	WORK2_REG, SAVED_HCLK_DIV_REG, #CLKPWR_PCLK_DIV_MASK
-	str	WORK2_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_HCLK_DIV_OFFS]
-
-	@ Save HCLK PLL state and disable HCLK PLL
-	ldr	SAVED_HCLK_PLL_REG, [CLKPWRBASE_REG,\
-		#LPC32XX_CLKPWR_HCLKPLL_CTRL_OFFS]
-	bic	WORK2_REG, SAVED_HCLK_PLL_REG, #LPC32XX_CLKPWR_HCLKPLL_POWER_UP
-	str	WORK2_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_HCLKPLL_CTRL_OFFS]
-
-	@ Enter stop mode until an enabled event occurs
-	orr	WORK1_REG, WORK1_REG, #LPC32XX_CLKPWR_STOP_MODE_CTRL
-	str	WORK1_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_PWR_CTRL_OFFS]
-	.rept 9
-	nop
-	.endr
-
-	@ Clear stop status
-	bic	WORK1_REG, WORK1_REG, #LPC32XX_CLKPWR_STOP_MODE_CTRL
-
-	@ Restore original HCLK PLL value and wait for PLL lock
-	str	SAVED_HCLK_PLL_REG, [CLKPWRBASE_REG,\
-		#LPC32XX_CLKPWR_HCLKPLL_CTRL_OFFS]
-4:
-	ldr	WORK2_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_HCLKPLL_CTRL_OFFS]
-	and	WORK2_REG, WORK2_REG, #LPC32XX_CLKPWR_HCLKPLL_PLL_STS
-	bne	4b
-
-	@ Re-enter run mode with self-refresh flag cleared, but no DRAM
-	@ update yet. DRAM is still in self-refresh
-	str	SAVED_PWR_CTRL_REG, [CLKPWRBASE_REG,\
-		#LPC32XX_CLKPWR_PWR_CTRL_OFFS]
-
-	@ Restore original DRAM clock mode to restore DRAM clocks
-	str	SAVED_HCLK_DIV_REG, [CLKPWRBASE_REG,\
-		#LPC32XX_CLKPWR_HCLK_DIV_OFFS]
-
-	@ Clear self-refresh mode
-	orr	WORK1_REG, SAVED_PWR_CTRL_REG,\
-		#LPC32XX_CLKPWR_UPD_SDRAM_SELF_RFSH
-	str	WORK1_REG, [CLKPWRBASE_REG, #LPC32XX_CLKPWR_PWR_CTRL_OFFS]
-	str	SAVED_PWR_CTRL_REG, [CLKPWRBASE_REG,\
-		#LPC32XX_CLKPWR_PWR_CTRL_OFFS]
-
-	@ Wait for EMC to clear self-refresh mode
-5:
-	ldr	WORK2_REG, [EMCBASE_REG, #LPC32XX_EMC_STATUS_OFFS]
-	and	WORK2_REG, WORK2_REG, #LPC32XX_EMC_STATUS_SELF_RFSH
-	bne	5b @ Branch until self-refresh has exited
-
-	@ restore regs and return
-	adr	r0, tmp_stack
-	ldmfd	r0!, {r3 - r7, sp, pc}
-
-reg_bases:
-	.long	IO_ADDRESS(LPC32XX_CLK_PM_BASE)
-	.long	IO_ADDRESS(LPC32XX_EMC_BASE)
-
-tmp_stack:
-	.long	0, 0, 0, 0, 0, 0, 0
-tmp_stack_end:
-
-ENTRY(lpc32xx_sys_suspend_sz)
-	.word	. - lpc32xx_sys_suspend
diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S
deleted file mode 100644
index 2d962fe488210d309f050e7387bc7f1812210d3f..0000000000000000000000000000000000000000
--- a/arch/arm/mach-mvebu/coherency_ll.S
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Coherency fabric: low level functions
- *
- * Copyright (C) 2012 Marvell
- *
- * Gregory CLEMENT <gregory.clement@free-electrons.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2.  This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- *
- * This file implements the assembly function to add a CPU to the
- * coherency fabric. This function is called by each of the secondary
- * CPUs during their early boot in an SMP kernel, this why this
- * function have to callable from assembly. It can also be called by a
- * primary CPU from C code during its boot.
- */
-
-#include <linux/linkage.h>
-#define ARMADA_XP_CFB_CTL_REG_OFFSET 0x0
-#define ARMADA_XP_CFB_CFG_REG_OFFSET 0x4
-
-#include <asm/assembler.h>
-#include <asm/cp15.h>
-
-	.text
-/*
- * Returns the coherency base address in r1 (r0 is untouched), or 0 if
- * the coherency fabric is not enabled.
- */
-ENTRY(ll_get_coherency_base)
-	mrc	p15, 0, r1, c1, c0, 0
-	tst	r1, #CR_M @ Check MMU bit enabled
-	bne	1f
-
-	/*
-	 * MMU is disabled, use the physical address of the coherency
-	 * base address. However, if the coherency fabric isn't mapped
-	 * (i.e its virtual address is zero), it means coherency is
-	 * not enabled, so we return 0.
-	 */
-	ldr	r1, =coherency_base
-	cmp	r1, #0
-	beq	2f
-	adr	r1, 3f
-	ldr	r3, [r1]
-	ldr	r1, [r1, r3]
-	b	2f
-1:
-	/*
-	 * MMU is enabled, use the virtual address of the coherency
-	 * base address.
-	 */
-	ldr	r1, =coherency_base
-	ldr	r1, [r1]
-2:
-	ret	lr
-ENDPROC(ll_get_coherency_base)
-
-/*
- * Returns the coherency CPU mask in r3 (r0 is untouched). This
- * coherency CPU mask can be used with the coherency fabric
- * configuration and control registers. Note that the mask is already
- * endian-swapped as appropriate so that the calling functions do not
- * have to care about endianness issues while accessing the coherency
- * fabric registers
- */
-ENTRY(ll_get_coherency_cpumask)
-	mrc	p15, 0, r3, cr0, cr0, 5
-	and	r3, r3, #15
-	mov	r2, #(1 << 24)
-	lsl	r3, r2, r3
-ARM_BE8(rev	r3, r3)
-	ret	lr
-ENDPROC(ll_get_coherency_cpumask)
-
-/*
- * ll_add_cpu_to_smp_group(), ll_enable_coherency() and
- * ll_disable_coherency() use the strex/ldrex instructions while the
- * MMU can be disabled. The Armada XP SoC has an exclusive monitor
- * that tracks transactions to Device and/or SO memory and thanks to
- * that, exclusive transactions are functional even when the MMU is
- * disabled.
- */
-
-ENTRY(ll_add_cpu_to_smp_group)
-	/*
-	 * As r0 is not modified by ll_get_coherency_base() and
-	 * ll_get_coherency_cpumask(), we use it to temporarly save lr
-	 * and avoid it being modified by the branch and link
-	 * calls. This function is used very early in the secondary
-	 * CPU boot, and no stack is available at this point.
-	 */
-	mov 	r0, lr
-	bl	ll_get_coherency_base
-	/* Bail out if the coherency is not enabled */
-	cmp	r1, #0
-	reteq	r0
-	bl	ll_get_coherency_cpumask
-	mov 	lr, r0
-	add	r0, r1, #ARMADA_XP_CFB_CFG_REG_OFFSET
-1:
-	ldrex	r2, [r0]
-	orr	r2, r2, r3
-	strex	r1, r2, [r0]
-	cmp	r1, #0
-	bne	1b
-	ret	lr
-ENDPROC(ll_add_cpu_to_smp_group)
-
-ENTRY(ll_enable_coherency)
-	/*
-	 * As r0 is not modified by ll_get_coherency_base() and
-	 * ll_get_coherency_cpumask(), we use it to temporarly save lr
-	 * and avoid it being modified by the branch and link
-	 * calls. This function is used very early in the secondary
-	 * CPU boot, and no stack is available at this point.
-	 */
-	mov r0, lr
-	bl	ll_get_coherency_base
-	/* Bail out if the coherency is not enabled */
-	cmp	r1, #0
-	reteq	r0
-	bl	ll_get_coherency_cpumask
-	mov lr, r0
-	add	r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET
-1:
-	ldrex	r2, [r0]
-	orr	r2, r2, r3
-	strex	r1, r2, [r0]
-	cmp	r1, #0
-	bne	1b
-	dsb
-	mov	r0, #0
-	ret	lr
-ENDPROC(ll_enable_coherency)
-
-ENTRY(ll_disable_coherency)
-	/*
-	 * As r0 is not modified by ll_get_coherency_base() and
-	 * ll_get_coherency_cpumask(), we use it to temporarly save lr
-	 * and avoid it being modified by the branch and link
-	 * calls. This function is used very early in the secondary
-	 * CPU boot, and no stack is available at this point.
-	 */
-	mov 	r0, lr
-	bl	ll_get_coherency_base
-	/* Bail out if the coherency is not enabled */
-	cmp	r1, #0
-	reteq	r0
-	bl	ll_get_coherency_cpumask
-	mov 	lr, r0
-	add	r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET
-1:
-	ldrex	r2, [r0]
-	bic	r2, r2, r3
-	strex	r1, r2, [r0]
-	cmp	r1, #0
-	bne	1b
-	dsb
-	ret	lr
-ENDPROC(ll_disable_coherency)
-
-	.align 2
-3:
-	.long	coherency_phys_base - .
diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S
deleted file mode 100644
index b093a196e80176d44cc083ee89d51b6fee1318c4..0000000000000000000000000000000000000000
--- a/arch/arm/mach-mvebu/headsmp-a9.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * SMP support: Entry point for secondary CPUs of Marvell EBU
- * Cortex-A9 based SOCs (Armada 375 and Armada 38x).
- *
- * Copyright (C) 2014 Marvell
- *
- * Gregory CLEMENT <gregory.clement@free-electrons.com>
- * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2.  This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#include <linux/linkage.h>
-
-#include <asm/assembler.h>
-
-ENTRY(mvebu_cortex_a9_secondary_startup)
-ARM_BE8(setend	be)
-	bl	armada_38x_scu_power_up
-	b	secondary_startup
-ENDPROC(mvebu_cortex_a9_secondary_startup)
diff --git a/arch/arm/mach-mvebu/headsmp.S b/arch/arm/mach-mvebu/headsmp.S
deleted file mode 100644
index 2c4032e368badaa94d5ff60b7a6361b42d7f04dd..0000000000000000000000000000000000000000
--- a/arch/arm/mach-mvebu/headsmp.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * SMP support: Entry point for secondary CPUs
- *
- * Copyright (C) 2012 Marvell
- *
- * Yehuda Yitschak <yehuday@marvell.com>
- * Gregory CLEMENT <gregory.clement@free-electrons.com>
- * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2.  This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- *
- * This file implements the assembly entry point for secondary CPUs in
- * an SMP kernel. The only thing we need to do is to add the CPU to
- * the coherency fabric by writing to 2 registers. Currently the base
- * register addresses are hard coded due to the early initialisation
- * problems.
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-
-#include <asm/assembler.h>
-
-/*
- * Armada XP specific entry point for secondary CPUs.
- * We add the CPU to the coherency fabric and then jump to secondary
- * startup
- */
-ENTRY(armada_xp_secondary_startup)
- ARM_BE8(setend	be )			@ go BE8 if entered LE
-
-	bl	ll_add_cpu_to_smp_group
-
-	bl	ll_enable_coherency
-
-	b	secondary_startup
-
-ENDPROC(armada_xp_secondary_startup)
diff --git a/arch/arm/mach-mvebu/pmsu_ll.S b/arch/arm/mach-mvebu/pmsu_ll.S
deleted file mode 100644
index 7aae9a25cfeb7cab3285d9c02a5e584d33eabc95..0000000000000000000000000000000000000000
--- a/arch/arm/mach-mvebu/pmsu_ll.S
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (C) 2014 Marvell
- *
- * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
- * Gregory Clement <gregory.clement@free-electrons.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2.  This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-
-ENTRY(armada_38x_scu_power_up)
-	mrc     p15, 4, r1, c15, c0	@ get SCU base address
-	orr	r1, r1, #0x8		@ SCU CPU Power Status Register
-	mrc	p15, 0, r0, cr0, cr0, 5	@ get the CPU ID
-	and	r0, r0, #15
-	add	r1, r1, r0
-	mov	r0, #0x0
-	strb	r0, [r1]		@ switch SCU power state to Normal mode
-	ret	lr
-ENDPROC(armada_38x_scu_power_up)
-
-/*
- * This is the entry point through which CPUs exiting cpuidle deep
- * idle state are going.
- */
-ENTRY(armada_370_xp_cpu_resume)
-ARM_BE8(setend	be )			@ go BE8 if entered LE
-	/*
-	 * Disable the MMU that might have been enabled in BootROM if
-	 * this code is used in the resume path of a suspend/resume
-	 * cycle.
-	 */
-	mrc	p15, 0, r1, c1, c0, 0
-	bic	r1, #1
-	mcr	p15, 0, r1, c1, c0, 0
-	bl	ll_add_cpu_to_smp_group
-	bl	ll_enable_coherency
-	b	cpu_resume
-ENDPROC(armada_370_xp_cpu_resume)
-
-ENTRY(armada_38x_cpu_resume)
-	/* do we need it for Armada 38x*/
-ARM_BE8(setend	be )			@ go BE8 if entered LE
-	bl	v7_invalidate_l1
-	bl	armada_38x_scu_power_up
-	b	cpu_resume
-ENDPROC(armada_38x_cpu_resume)
-
-.global mvebu_boot_wa_start
-.global mvebu_boot_wa_end
-
-/* The following code will be executed from SRAM */
-ENTRY(mvebu_boot_wa_start)
-ARM_BE8(setend	be)
-	adr	r0, 1f
-	ldr	r0, [r0]		@ load the address of the
-					@ resume register
-	ldr	r0, [r0]		@ load the value in the
-					@ resume register
-ARM_BE8(rev	r0, r0)			@ the value is stored LE
-	mov	pc, r0			@ jump to this value
-/*
- * the last word of this piece of code will be filled by the physical
- * address of the boot address register just after being copied in SRAM
- */
-1:
-	.long   .
-mvebu_boot_wa_end:
-ENDPROC(mvebu_boot_wa_end)
diff --git a/arch/arm/mach-npcm/headsmp.S b/arch/arm/mach-npcm/headsmp.S
deleted file mode 100644
index c083fe09a07b123cd347a42db6138faa24fd3853..0000000000000000000000000000000000000000
--- a/arch/arm/mach-npcm/headsmp.S
+++ /dev/null
@@ -1,17 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Nuvoton Technology corporation.
-// Copyright 2018 Google, Inc.
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-
-/*
- * The boot ROM does not start secondary CPUs in SVC mode, so we need to do that
- * here.
- */
-ENTRY(npcm7xx_secondary_startup)
-	safe_svcmode_maskall r0
-
-	b	secondary_startup
-ENDPROC(npcm7xx_secondary_startup)
diff --git a/arch/arm/mach-omap1/ams-delta-fiq-handler.S b/arch/arm/mach-omap1/ams-delta-fiq-handler.S
deleted file mode 100644
index f745a65d3bd7a3239eaabc25a6771c5d91323cf5..0000000000000000000000000000000000000000
--- a/arch/arm/mach-omap1/ams-delta-fiq-handler.S
+++ /dev/null
@@ -1,274 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mach-omap1/ams-delta-fiq-handler.S
- *
- *  Based on  linux/arch/arm/lib/floppydma.S
- *  Renamed and modified to work with 2.6 kernel by Matt Callow
- *  Copyright (C) 1995, 1996 Russell King
- *  Copyright (C) 2004 Pete Trapps
- *  Copyright (C) 2006 Matt Callow
- *  Copyright (C) 2010 Janusz Krzysztofik
- */
-
-#include <linux/linkage.h>
-#include <linux/platform_data/ams-delta-fiq.h>
-#include <linux/platform_data/gpio-omap.h>
-
-#include <asm/assembler.h>
-#include <asm/irq.h>
-
-#include "ams-delta-fiq.h"
-#include "board-ams-delta.h"
-#include "iomap.h"
-#include "soc.h"
-
-/*
- * OMAP1510 GPIO related symbol copied from arch/arm/mach-omap1/gpio15xx.c.
- * Unfortunately, it was not placed in a separate header file.
- */
-#define OMAP1510_GPIO_BASE		0xFFFCE000
-
-/* GPIO register bitmasks */
-#define KEYBRD_DATA_MASK		(0x1 << AMS_DELTA_GPIO_PIN_KEYBRD_DATA)
-#define KEYBRD_CLK_MASK			(0x1 << AMS_DELTA_GPIO_PIN_KEYBRD_CLK)
-#define MODEM_IRQ_MASK			(0x1 << AMS_DELTA_GPIO_PIN_MODEM_IRQ)
-#define HOOK_SWITCH_MASK		(0x1 << AMS_DELTA_GPIO_PIN_HOOK_SWITCH)
-#define OTHERS_MASK			(MODEM_IRQ_MASK | HOOK_SWITCH_MASK)
-
-/* IRQ handler register bitmasks */
-#define DEFERRED_FIQ_MASK		OMAP_IRQ_BIT(INT_DEFERRED_FIQ)
-#define GPIO_BANK1_MASK  		OMAP_IRQ_BIT(INT_GPIO_BANK1)
-
-/* Driver buffer byte offsets */
-#define BUF_MASK			(FIQ_MASK * 4)
-#define BUF_STATE			(FIQ_STATE * 4)
-#define BUF_KEYS_CNT			(FIQ_KEYS_CNT * 4)
-#define BUF_TAIL_OFFSET			(FIQ_TAIL_OFFSET * 4)
-#define BUF_HEAD_OFFSET			(FIQ_HEAD_OFFSET * 4)
-#define BUF_BUF_LEN			(FIQ_BUF_LEN * 4)
-#define BUF_KEY				(FIQ_KEY * 4)
-#define BUF_MISSED_KEYS			(FIQ_MISSED_KEYS * 4)
-#define BUF_BUFFER_START		(FIQ_BUFFER_START * 4)
-#define BUF_GPIO_INT_MASK		(FIQ_GPIO_INT_MASK * 4)
-#define BUF_KEYS_HICNT			(FIQ_KEYS_HICNT * 4)
-#define BUF_IRQ_PEND			(FIQ_IRQ_PEND * 4)
-#define BUF_SIR_CODE_L1			(FIQ_SIR_CODE_L1 * 4)
-#define BUF_SIR_CODE_L2			(IRQ_SIR_CODE_L2 * 4)
-#define BUF_CNT_INT_00			(FIQ_CNT_INT_00 * 4)
-#define BUF_CNT_INT_KEY			(FIQ_CNT_INT_KEY * 4)
-#define BUF_CNT_INT_MDM			(FIQ_CNT_INT_MDM * 4)
-#define BUF_CNT_INT_03			(FIQ_CNT_INT_03 * 4)
-#define BUF_CNT_INT_HSW			(FIQ_CNT_INT_HSW * 4)
-#define BUF_CNT_INT_05			(FIQ_CNT_INT_05 * 4)
-#define BUF_CNT_INT_06			(FIQ_CNT_INT_06 * 4)
-#define BUF_CNT_INT_07			(FIQ_CNT_INT_07 * 4)
-#define BUF_CNT_INT_08			(FIQ_CNT_INT_08 * 4)
-#define BUF_CNT_INT_09			(FIQ_CNT_INT_09 * 4)
-#define BUF_CNT_INT_10			(FIQ_CNT_INT_10 * 4)
-#define BUF_CNT_INT_11			(FIQ_CNT_INT_11 * 4)
-#define BUF_CNT_INT_12			(FIQ_CNT_INT_12 * 4)
-#define BUF_CNT_INT_13			(FIQ_CNT_INT_13 * 4)
-#define BUF_CNT_INT_14			(FIQ_CNT_INT_14 * 4)
-#define BUF_CNT_INT_15			(FIQ_CNT_INT_15 * 4)
-#define BUF_CIRC_BUFF			(FIQ_CIRC_BUFF * 4)
-
-
-/*
- * Register usage
- * r8  - temporary
- * r9  - the driver buffer
- * r10 - temporary
- * r11 - interrupts mask
- * r12 - base pointers
- * r13 - interrupts status
- */
-
-	.text
-
-	.global qwerty_fiqin_end
-
-ENTRY(qwerty_fiqin_start)
-	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-	@ FIQ intrrupt handler
-	ldr r12, omap_ih1_base			@ set pointer to level1 handler
-
-	ldr r11, [r12, #IRQ_MIR_REG_OFFSET]	@ fetch interrupts mask
-
-	ldr r13, [r12, #IRQ_ITR_REG_OFFSET]	@ fetch interrupts status
-	bics r13, r13, r11			@ clear masked - any left?
-	beq exit				@ none - spurious FIQ? exit
-
-	ldr r10, [r12, #IRQ_SIR_FIQ_REG_OFFSET]	@ get requested interrupt number
-
-	mov r8, #2				@ reset FIQ agreement
-	str r8, [r12, #IRQ_CONTROL_REG_OFFSET]
-
-	cmp r10, #(INT_GPIO_BANK1 - NR_IRQS_LEGACY)	@ is it GPIO interrupt?
-	beq gpio				@ yes - process it
-
-	mov r8, #1
-	orr r8, r11, r8, lsl r10		@ mask spurious interrupt
-	str r8, [r12, #IRQ_MIR_REG_OFFSET]
-exit:
-	subs	pc, lr, #4			@ return from FIQ
-	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-
-
-	@@@@@@@@@@@@@@@@@@@@@@@@@@@
-gpio:	@ GPIO bank interrupt handler
-	ldr r12, omap1510_gpio_base		@ set base pointer to GPIO bank
-
-	ldr r11, [r12, #OMAP1510_GPIO_INT_MASK]	@ fetch GPIO interrupts mask
-restart:
-	ldr r13, [r12, #OMAP1510_GPIO_INT_STATUS]	@ fetch status bits
-	bics r13, r13, r11			@ clear masked - any left?
-	beq exit				@ no - spurious interrupt? exit
-
-	orr r11, r11, r13			@ mask all requested interrupts
-	str r11, [r12, #OMAP1510_GPIO_INT_MASK]
-
-	str r13, [r12, #OMAP1510_GPIO_INT_STATUS] @ ack all requested interrupts
-
-	ands r10, r13, #KEYBRD_CLK_MASK		@ extract keyboard status - set?
-	beq hksw				@ no - try next source
-
-
-	@@@@@@@@@@@@@@@@@@@@@@
-	@ Keyboard clock FIQ mode interrupt handler
-	@ r10 now contains KEYBRD_CLK_MASK, use it
-	bic r11, r11, r10				@ unmask it
-	str r11, [r12, #OMAP1510_GPIO_INT_MASK]
-
-	@ Process keyboard data
-	ldr r8, [r12, #OMAP1510_GPIO_DATA_INPUT]	@ fetch GPIO input
-
-	ldr r10, [r9, #BUF_STATE]		@ fetch kbd interface state
-	cmp r10, #0				@ are we expecting start bit?
-	bne data				@ no - go to data processing
-
-	ands r8, r8, #KEYBRD_DATA_MASK		@ check start bit - detected?
-	beq hksw				@ no - try next source
-
-	@ r8 contains KEYBRD_DATA_MASK, use it
-	str r8, [r9, #BUF_STATE]		@ enter data processing state
-	@ r10 already contains 0, reuse it
-	str r10, [r9, #BUF_KEY]			@ clear keycode
-	mov r10, #2				@ reset input bit mask
-	str r10, [r9, #BUF_MASK]
-
-	@ Mask other GPIO line interrupts till key done
-	str r11, [r9, #BUF_GPIO_INT_MASK]	@ save mask for later restore
-	mvn r11, #KEYBRD_CLK_MASK		@ prepare all except kbd mask
-	str r11, [r12, #OMAP1510_GPIO_INT_MASK]	@ store into the mask register
-
-	b restart				@ restart
-
-data:	ldr r10, [r9, #BUF_MASK]		@ fetch current input bit mask
-
-	@ r8 still contains GPIO input bits
-	ands r8, r8, #KEYBRD_DATA_MASK		@ is keyboard data line low?
-	ldreq r8, [r9, #BUF_KEY]		@ yes - fetch collected so far,
-	orreq r8, r8, r10			@ set 1 at current mask position
-	streq r8, [r9, #BUF_KEY]		@ and save back
-
-	mov r10, r10, lsl #1			@ shift mask left
-	bics r10, r10, #0x800			@ have we got all the bits?
-	strne r10, [r9, #BUF_MASK]		@ not yet - store the mask
-	bne restart				@ and restart
-
-	@ r10 already contains 0, reuse it
-	str r10, [r9, #BUF_STATE]		@ reset state to start
-
-	@ Key done - restore interrupt mask
-	ldr r10, [r9, #BUF_GPIO_INT_MASK]	@ fetch saved mask
-	and r11, r11, r10			@ unmask all saved as unmasked
-	str r11, [r12, #OMAP1510_GPIO_INT_MASK]	@ restore into the mask register
-
-	@ Try appending the keycode to the circular buffer
-	ldr r10, [r9, #BUF_KEYS_CNT]		@ get saved keystrokes count
-	ldr r8, [r9, #BUF_BUF_LEN]		@ get buffer size
-	cmp r10, r8				@ is buffer full?
-	beq hksw				@ yes - key lost, next source
-
-	add r10, r10, #1			@ incremet keystrokes counter
-	str r10, [r9, #BUF_KEYS_CNT]
-
-	ldr r10, [r9, #BUF_TAIL_OFFSET]		@ get buffer tail offset
-	@ r8 already contains buffer size
-	cmp r10, r8				@ end of buffer?
-	moveq r10, #0				@ yes - rewind to buffer start
-
-	ldr r12, [r9, #BUF_BUFFER_START]	@ get buffer start address
-	add r12, r12, r10, LSL #2		@ calculate buffer tail address
-	ldr r8, [r9, #BUF_KEY]			@ get last keycode
-	str r8, [r12]				@ append it to the buffer tail
-
-	add r10, r10, #1			@ increment buffer tail offset
-	str r10, [r9, #BUF_TAIL_OFFSET]
-
-	ldr r10, [r9, #BUF_CNT_INT_KEY]		@ increment interrupts counter
-	add r10, r10, #1
-	str r10, [r9, #BUF_CNT_INT_KEY]
-	@@@@@@@@@@@@@@@@@@@@@@@@
-
-
-hksw:	@Is hook switch interrupt requested?
-	tst r13, #HOOK_SWITCH_MASK 		@ is hook switch status bit set?
-	beq mdm					@ no - try next source
-
-
-	@@@@@@@@@@@@@@@@@@@@@@@@
-	@ Hook switch interrupt FIQ mode simple handler
-
-	@ Don't toggle active edge, the switch always bounces
-
-	@ Increment hook switch interrupt counter
-	ldr r10, [r9, #BUF_CNT_INT_HSW]
-	add r10, r10, #1
-	str r10, [r9, #BUF_CNT_INT_HSW]
-	@@@@@@@@@@@@@@@@@@@@@@@@
-
-
-mdm:	@Is it a modem interrupt?
-	tst r13, #MODEM_IRQ_MASK 		@ is modem status bit set?
-	beq irq					@ no - check for next interrupt
-
-
-	@@@@@@@@@@@@@@@@@@@@@@@@
-	@ Modem FIQ mode interrupt handler stub
-
-	@ Increment modem interrupt counter
-	ldr r10, [r9, #BUF_CNT_INT_MDM]
-	add r10, r10, #1
-	str r10, [r9, #BUF_CNT_INT_MDM]
-	@@@@@@@@@@@@@@@@@@@@@@@@
-
-
-irq:	@ Place deferred_fiq interrupt request
-	ldr r12, deferred_fiq_ih_base		@ set pointer to IRQ handler
-	mov r10, #DEFERRED_FIQ_MASK		@ set deferred_fiq bit
-	str r10, [r12, #IRQ_ISR_REG_OFFSET] 	@ place it in the ISR register
-
-	ldr r12, omap1510_gpio_base		@ set pointer back to GPIO bank
-	b restart				@ check for next GPIO interrupt
-	@@@@@@@@@@@@@@@@@@@@@@@@@@@
-
-
-/*
- * Virtual addresses for IO
- */
-omap_ih1_base:
-	.word OMAP1_IO_ADDRESS(OMAP_IH1_BASE)
-deferred_fiq_ih_base:
-	.word OMAP1_IO_ADDRESS(DEFERRED_FIQ_IH_BASE)
-omap1510_gpio_base:
-	.word OMAP1_IO_ADDRESS(OMAP1510_GPIO_BASE)
-qwerty_fiqin_end:
-
-/*
- * Check the size of the FIQ,
- * it cannot go beyond 0xffff0200, and is copied to 0xffff001c
- */
-.if (qwerty_fiqin_end - qwerty_fiqin_start) > (0x200 - 0x1c)
-	.err
-.endif
diff --git a/arch/arm/mach-omap1/sleep.S b/arch/arm/mach-omap1/sleep.S
deleted file mode 100644
index a908c51839a43bc7b56c2db0abdc27fbc16f89fa..0000000000000000000000000000000000000000
--- a/arch/arm/mach-omap1/sleep.S
+++ /dev/null
@@ -1,370 +0,0 @@
-/*
- * linux/arch/arm/mach-omap1/sleep.S
- *
- * Low-level OMAP7XX/1510/1610 sleep/wakeUp support
- *
- * Initial SA1110 code:
- * Copyright (c) 2001 Cliff Brake <cbrake@accelent.com>
- *
- * Adapted for PXA by Nicolas Pitre:
- * Copyright (c) 2002 Monta Vista Software, Inc.
- *
- * Support for OMAP1510/1610 by Dirk Behme <dirk.behme@de.bosch.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
- * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/linkage.h>
-
-#include <asm/assembler.h>
-
-#include <mach/hardware.h>
-
-#include "iomap.h"
-#include "pm.h"
-
-		.text
-
-
-/*
- * Forces OMAP into deep sleep state
- *
- * omapXXXX_cpu_suspend()
- *
- * The values of the registers ARM_IDLECT1 and ARM_IDLECT2 are passed
- * as arg0 and arg1 from caller. arg0 is stored in register r0 and arg1
- * in register r1.
- *
- * Note: This code get's copied to internal SRAM at boot. When the OMAP
- *	 wakes up it continues execution at the point it went to sleep.
- *
- * Note: Because of errata work arounds we have processor specific functions
- *       here. They are mostly the same, but slightly different.
- *
- */
-
-#if defined(CONFIG_ARCH_OMAP730) || defined(CONFIG_ARCH_OMAP850)
-	.align	3
-ENTRY(omap7xx_cpu_suspend)
-
-	@ save registers on stack
-	stmfd	sp!, {r0 - r12, lr}
-
-	@ Drain write cache
-	mov	r4, #0
-	mcr	p15, 0, r0, c7, c10, 4
-	nop
-
-	@ load base address of Traffic Controller
-	mov	r6, #TCMIF_ASM_BASE & 0xff000000
-	orr	r6, r6, #TCMIF_ASM_BASE & 0x00ff0000
-	orr	r6, r6, #TCMIF_ASM_BASE & 0x0000ff00
-
-	@ prepare to put SDRAM into self-refresh manually
-	ldr	r7, [r6, #EMIFF_SDRAM_CONFIG_ASM_OFFSET & 0xff]
-	orr	r9, r7, #SELF_REFRESH_MODE & 0xff000000
-	orr	r9, r9, #SELF_REFRESH_MODE & 0x000000ff
-	str	r9, [r6, #EMIFF_SDRAM_CONFIG_ASM_OFFSET & 0xff]
-
-	@ prepare to put EMIFS to Sleep
-	ldr	r8, [r6, #EMIFS_CONFIG_ASM_OFFSET & 0xff]
-	orr	r9, r8, #IDLE_EMIFS_REQUEST & 0xff
-	str	r9, [r6, #EMIFS_CONFIG_ASM_OFFSET & 0xff]
-
-	@ load base address of ARM_IDLECT1 and ARM_IDLECT2
-	mov	r4, #CLKGEN_REG_ASM_BASE & 0xff000000
-	orr	r4, r4, #CLKGEN_REG_ASM_BASE & 0x00ff0000
-	orr	r4, r4, #CLKGEN_REG_ASM_BASE & 0x0000ff00
-
-	@ turn off clock domains
-	@ do not disable PERCK (0x04)
-	mov	r5, #OMAP7XX_IDLECT2_SLEEP_VAL & 0xff
-	orr	r5, r5, #OMAP7XX_IDLECT2_SLEEP_VAL & 0xff00
-	strh	r5, [r4, #ARM_IDLECT2_ASM_OFFSET & 0xff]
-
-	@ request ARM idle
-	mov	r3, #OMAP7XX_IDLECT1_SLEEP_VAL & 0xff
-	orr	r3, r3, #OMAP7XX_IDLECT1_SLEEP_VAL & 0xff00
-	strh	r3, [r4, #ARM_IDLECT1_ASM_OFFSET & 0xff]
-
-	@ disable instruction cache
-	mrc	p15, 0, r9, c1, c0, 0
-	bic	r2, r9, #0x1000
-	mcr	p15, 0, r2, c1, c0, 0
-	nop
-
-/*
- * Let's wait for the next wake up event to wake us up. r0 can't be
- * used here because r0 holds ARM_IDLECT1
- */
-	mov	r2, #0
-	mcr	p15, 0, r2, c7, c0, 4		@ wait for interrupt
-/*
- * omap7xx_cpu_suspend()'s resume point.
- *
- * It will just start executing here, so we'll restore stuff from the
- * stack.
- */
-	@ re-enable Icache
-	mcr	p15, 0, r9, c1, c0, 0
-
-	@ reset the ARM_IDLECT1 and ARM_IDLECT2.
-	strh	r1, [r4, #ARM_IDLECT2_ASM_OFFSET & 0xff]
-	strh	r0, [r4, #ARM_IDLECT1_ASM_OFFSET & 0xff]
-
-	@ Restore EMIFF controls
-	str	r7, [r6, #EMIFF_SDRAM_CONFIG_ASM_OFFSET & 0xff]
-	str	r8, [r6, #EMIFS_CONFIG_ASM_OFFSET & 0xff]
-
-	@ restore regs and return
-	ldmfd	sp!, {r0 - r12, pc}
-
-ENTRY(omap7xx_cpu_suspend_sz)
-	.word	. - omap7xx_cpu_suspend
-#endif /* CONFIG_ARCH_OMAP730 || CONFIG_ARCH_OMAP850 */
-
-#ifdef CONFIG_ARCH_OMAP15XX
-	.align	3
-ENTRY(omap1510_cpu_suspend)
-
-	@ save registers on stack
-	stmfd	sp!, {r0 - r12, lr}
-
-	@ load base address of Traffic Controller
-	mov	r4, #TCMIF_ASM_BASE & 0xff000000
-	orr	r4, r4, #TCMIF_ASM_BASE & 0x00ff0000
-	orr	r4, r4, #TCMIF_ASM_BASE & 0x0000ff00
-
-	@ work around errata of OMAP1510 PDE bit for TC shut down
-	@ clear PDE bit
-	ldr	r5, [r4, #EMIFS_CONFIG_ASM_OFFSET & 0xff]
-	bic	r5, r5, #PDE_BIT & 0xff
-	str	r5, [r4, #EMIFS_CONFIG_ASM_OFFSET & 0xff]
-
-	@ set PWD_EN bit
-	and	r5, r5, #PWD_EN_BIT & 0xff
-	str	r5, [r4, #EMIFS_CONFIG_ASM_OFFSET & 0xff]
-
-	@ prepare to put SDRAM into self-refresh manually
-	ldr	r5, [r4, #EMIFF_SDRAM_CONFIG_ASM_OFFSET & 0xff]
-	orr	r5, r5, #SELF_REFRESH_MODE & 0xff000000
-	orr	r5, r5, #SELF_REFRESH_MODE & 0x000000ff
-	str	r5, [r4, #EMIFF_SDRAM_CONFIG_ASM_OFFSET & 0xff]
-
-	@ prepare to put EMIFS to Sleep
-	ldr	r5, [r4, #EMIFS_CONFIG_ASM_OFFSET & 0xff]
-	orr	r5, r5, #IDLE_EMIFS_REQUEST & 0xff
-	str	r5, [r4, #EMIFS_CONFIG_ASM_OFFSET & 0xff]
-
-	@ load base address of ARM_IDLECT1 and ARM_IDLECT2
-	mov	r4, #CLKGEN_REG_ASM_BASE & 0xff000000
-	orr	r4, r4, #CLKGEN_REG_ASM_BASE & 0x00ff0000
-	orr	r4, r4, #CLKGEN_REG_ASM_BASE & 0x0000ff00
-
-	@ turn off clock domains
-	mov	r5, #OMAP1510_IDLE_CLOCK_DOMAINS & 0xff
-	orr	r5, r5, #OMAP1510_IDLE_CLOCK_DOMAINS & 0xff00
-	strh	r5, [r4, #ARM_IDLECT2_ASM_OFFSET & 0xff]
-
-	@ request ARM idle
-	mov	r3, #OMAP1510_DEEP_SLEEP_REQUEST & 0xff
-	orr	r3, r3, #OMAP1510_DEEP_SLEEP_REQUEST & 0xff00
-	strh	r3, [r4, #ARM_IDLECT1_ASM_OFFSET & 0xff]
-
-	mov	r5, #IDLE_WAIT_CYCLES & 0xff
-	orr	r5, r5, #IDLE_WAIT_CYCLES & 0xff00
-l_1510_2:
-	subs	r5, r5, #1
-	bne	l_1510_2
-/*
- * Let's wait for the next wake up event to wake us up. r0 can't be
- * used here because r0 holds ARM_IDLECT1
- */
-	mov	r2, #0
-	mcr	p15, 0, r2, c7, c0, 4		@ wait for interrupt
-/*
- * omap1510_cpu_suspend()'s resume point.
- *
- * It will just start executing here, so we'll restore stuff from the
- * stack, reset the ARM_IDLECT1 and ARM_IDLECT2.
- */
-	strh	r1, [r4, #ARM_IDLECT2_ASM_OFFSET & 0xff]
-	strh	r0, [r4, #ARM_IDLECT1_ASM_OFFSET & 0xff]
-
-	@ restore regs and return
-	ldmfd	sp!, {r0 - r12, pc}
-
-ENTRY(omap1510_cpu_suspend_sz)
-	.word	. - omap1510_cpu_suspend
-#endif /* CONFIG_ARCH_OMAP15XX */
-
-#if defined(CONFIG_ARCH_OMAP16XX)
-	.align	3
-ENTRY(omap1610_cpu_suspend)
-
-	@ save registers on stack
-	stmfd	sp!, {r0 - r12, lr}
-
-	@ Drain write cache
-	mov	r4, #0
-	mcr	p15, 0, r0, c7, c10, 4
-	nop
-
-	@ Load base address of Traffic Controller
-	mov	r6, #TCMIF_ASM_BASE & 0xff000000
-	orr	r6, r6, #TCMIF_ASM_BASE & 0x00ff0000
-	orr	r6, r6, #TCMIF_ASM_BASE & 0x0000ff00
-
-	@ Prepare to put SDRAM into self-refresh manually
-	ldr	r7, [r6, #EMIFF_SDRAM_CONFIG_ASM_OFFSET & 0xff]
-	orr	r9, r7, #SELF_REFRESH_MODE & 0xff000000
-	orr	r9, r9, #SELF_REFRESH_MODE & 0x000000ff
-	str	r9, [r6, #EMIFF_SDRAM_CONFIG_ASM_OFFSET & 0xff]
-
-	@ Prepare to put EMIFS to Sleep
-	ldr	r8, [r6, #EMIFS_CONFIG_ASM_OFFSET & 0xff]
-	orr	r9, r8, #IDLE_EMIFS_REQUEST & 0xff
-	str	r9, [r6, #EMIFS_CONFIG_ASM_OFFSET & 0xff]
-
-	@ Load base address of ARM_IDLECT1 and ARM_IDLECT2
-	mov	r4, #CLKGEN_REG_ASM_BASE & 0xff000000
-	orr	r4, r4, #CLKGEN_REG_ASM_BASE & 0x00ff0000
-	orr	r4, r4, #CLKGEN_REG_ASM_BASE & 0x0000ff00
-
-	@ Turn off clock domains
-	@ Do not disable PERCK (0x04)
-	mov	r5, #OMAP1610_IDLECT2_SLEEP_VAL & 0xff
-	orr	r5, r5, #OMAP1610_IDLECT2_SLEEP_VAL & 0xff00
-	strh	r5, [r4, #ARM_IDLECT2_ASM_OFFSET & 0xff]
-
-	@ Request ARM idle
-	mov	r3, #OMAP1610_IDLECT1_SLEEP_VAL & 0xff
-	orr	r3, r3, #OMAP1610_IDLECT1_SLEEP_VAL & 0xff00
-	strh	r3, [r4, #ARM_IDLECT1_ASM_OFFSET & 0xff]
-
-/*
- * Let's wait for the next wake up event to wake us up. r0 can't be
- * used here because r0 holds ARM_IDLECT1
- */
-	mov	r2, #0
-	mcr	p15, 0, r2, c7, c0, 4		@ wait for interrupt
-
-	@ Errata (HEL3SU467, section 1.4.4) specifies nop-instructions
-	@ according to this formula:
-	@ 2 + (4*DPLL_MULT)/DPLL_DIV/ARMDIV
-	@ Max DPLL_MULT = 18
-	@ DPLL_DIV = 1
-	@ ARMDIV = 1
-	@ => 74 nop-instructions
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop	@10
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop	@20
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop	@30
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop	@40
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop	@50
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop	@60
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop	@70
-	nop
-	nop
-	nop
-	nop	@74
-/*
- * omap1610_cpu_suspend()'s resume point.
- *
- * It will just start executing here, so we'll restore stuff from the
- * stack.
- */
-	@ Restore the ARM_IDLECT1 and ARM_IDLECT2.
-	strh	r1, [r4, #ARM_IDLECT2_ASM_OFFSET & 0xff]
-	strh	r0, [r4, #ARM_IDLECT1_ASM_OFFSET & 0xff]
-
-	@ Restore EMIFF controls
-	str	r7, [r6, #EMIFF_SDRAM_CONFIG_ASM_OFFSET & 0xff]
-	str	r8, [r6, #EMIFS_CONFIG_ASM_OFFSET & 0xff]
-
-	@ Restore regs and return
-	ldmfd	sp!, {r0 - r12, pc}
-
-ENTRY(omap1610_cpu_suspend_sz)
-	.word	. - omap1610_cpu_suspend
-#endif /* CONFIG_ARCH_OMAP16XX */
diff --git a/arch/arm/mach-omap1/sram.S b/arch/arm/mach-omap1/sram.S
deleted file mode 100644
index 37f34fcd65fb9dee5060f9949f08a7e3ff6a653f..0000000000000000000000000000000000000000
--- a/arch/arm/mach-omap1/sram.S
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/arm/plat-omap/sram-fn.S
- *
- * Functions that need to be run in internal SRAM
- */
-
-#include <linux/linkage.h>
-
-#include <asm/assembler.h>
-
-#include <mach/hardware.h>
-
-#include "iomap.h"
-
-	.text
-
-/*
- * Reprograms ULPD and CKCTL.
- */
-	.align	3
-ENTRY(omap1_sram_reprogram_clock)
-	stmfd	sp!, {r0 - r12, lr}		@ save registers on stack
-
-	mov	r2, #OMAP1_IO_ADDRESS(DPLL_CTL) & 0xff000000
-	orr	r2, r2, #OMAP1_IO_ADDRESS(DPLL_CTL) & 0x00ff0000
-	orr	r2, r2, #OMAP1_IO_ADDRESS(DPLL_CTL) & 0x0000ff00
-
-	mov	r3, #OMAP1_IO_ADDRESS(ARM_CKCTL) & 0xff000000
-	orr	r3, r3, #OMAP1_IO_ADDRESS(ARM_CKCTL) & 0x00ff0000
-	orr	r3, r3, #OMAP1_IO_ADDRESS(ARM_CKCTL) & 0x0000ff00
-
-	tst	r0, #1 << 4			@ want lock mode?
-	beq	newck				@ nope
-	bic	r0, r0, #1 << 4			@ else clear lock bit
-	strh	r0, [r2]			@ set dpll into bypass mode
-	orr	r0, r0, #1 << 4			@ set lock bit again
-
-newck:
-	strh	r1, [r3]			@ write new ckctl value
-	strh	r0, [r2]			@ write new dpll value
-
-	mov	r4, #0x0700			@ let the clocks settle
-	orr	r4, r4, #0x00ff
-delay:	sub	r4, r4, #1
-	cmp	r4, #0
-	bne	delay
-
-lock:	ldrh	r4, [r2], #0			@ read back dpll value
-	tst	r0, #1 << 4			@ want lock mode?
-	beq	out				@ nope
-	tst	r4, #1 << 0			@ dpll rate locked?
-	beq	lock				@ try again
-
-out:
-	ldmfd	sp!, {r0 - r12, pc}		@ restore regs and return
-ENTRY(omap1_sram_reprogram_clock_sz)
-	.word	. - omap1_sram_reprogram_clock
diff --git a/arch/arm/mach-omap2/omap-headsmp.S b/arch/arm/mach-omap2/omap-headsmp.S
deleted file mode 100644
index 1762f919941f4f98d3159e83d8d39f882f23477c..0000000000000000000000000000000000000000
--- a/arch/arm/mach-omap2/omap-headsmp.S
+++ /dev/null
@@ -1,133 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Secondary CPU startup routine source file.
- *
- * Copyright (C) 2009-2014 Texas Instruments, Inc.
- *
- * Author:
- *      Santosh Shilimkar <santosh.shilimkar@ti.com>
- *
- * Interface functions needed for the SMP. This file is based on arm
- * realview smp platform.
- * Copyright (c) 2003 ARM Limited.
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-
-#include "omap44xx.h"
-
-/* Physical address needed since MMU not enabled yet on secondary core */
-#define AUX_CORE_BOOT0_PA			0x48281800
-#define API_HYP_ENTRY				0x102
-
-ENTRY(omap_secondary_startup)
-#ifdef CONFIG_SMP
-	b	secondary_startup
-#else
-/* Should never get here */
-again:	wfi
-	b	again
-#endif
-#ENDPROC(omap_secondary_startup)
-
-/*
- * OMAP5 specific entry point for secondary CPU to jump from ROM
- * code.  This routine also provides a holding flag into which
- * secondary core is held until we're ready for it to initialise.
- * The primary core will update this flag using a hardware
- * register AuxCoreBoot0.
- */
-ENTRY(omap5_secondary_startup)
-wait:	ldr	r2, =AUX_CORE_BOOT0_PA	@ read from AuxCoreBoot0
-	ldr	r0, [r2]
-	mov	r0, r0, lsr #5
-	mrc	p15, 0, r4, c0, c0, 5
-	and	r4, r4, #0x0f
-	cmp	r0, r4
-	bne	wait
-	b	omap_secondary_startup
-ENDPROC(omap5_secondary_startup)
-/*
- * Same as omap5_secondary_startup except we call into the ROM to
- * enable HYP mode first.  This is called instead of
- * omap5_secondary_startup if the primary CPU was put into HYP mode by
- * the boot loader.
- */
-	.arch armv7-a
-	.arch_extension sec
-ENTRY(omap5_secondary_hyp_startup)
-wait_2:	ldr	r2, =AUX_CORE_BOOT0_PA	@ read from AuxCoreBoot0
-	ldr	r0, [r2]
-	mov	r0, r0, lsr #5
-	mrc	p15, 0, r4, c0, c0, 5
-	and	r4, r4, #0x0f
-	cmp	r0, r4
-	bne	wait_2
-	ldr	r12, =API_HYP_ENTRY
-	badr	r0, hyp_boot
-	smc	#0
-hyp_boot:
-	b	omap_secondary_startup
-ENDPROC(omap5_secondary_hyp_startup)
-/*
- * OMAP4 specific entry point for secondary CPU to jump from ROM
- * code.  This routine also provides a holding flag into which
- * secondary core is held until we're ready for it to initialise.
- * The primary core will update this flag using a hardware
- * register AuxCoreBoot0.
- */
-ENTRY(omap4_secondary_startup)
-hold:	ldr	r12,=0x103
-	dsb
-	smc	#0			@ read from AuxCoreBoot0
-	mov	r0, r0, lsr #9
-	mrc	p15, 0, r4, c0, c0, 5
-	and	r4, r4, #0x0f
-	cmp	r0, r4
-	bne	hold
-
-	/*
-	 * we've been released from the wait loop,secondary_stack
-	 * should now contain the SVC stack for this core
-	 */
-	b	omap_secondary_startup
-ENDPROC(omap4_secondary_startup)
-
-ENTRY(omap4460_secondary_startup)
-hold_2:	ldr	r12,=0x103
-	dsb
-	smc	#0			@ read from AuxCoreBoot0
-	mov	r0, r0, lsr #9
-	mrc	p15, 0, r4, c0, c0, 5
-	and	r4, r4, #0x0f
-	cmp	r0, r4
-	bne	hold_2
-
-	/*
-	 * GIC distributor control register has changed between
-	 * CortexA9 r1pX and r2pX. The Control Register secure
-	 * banked version is now composed of 2 bits:
-	 * bit 0 == Secure Enable
-	 * bit 1 == Non-Secure Enable
-	 * The Non-Secure banked register has not changed
-	 * Because the ROM Code is based on the r1pX GIC, the CPU1
-	 * GIC restoration will cause a problem to CPU0 Non-Secure SW.
-	 * The workaround must be:
-	 * 1) Before doing the CPU1 wakeup, CPU0 must disable
-	 * the GIC distributor
-	 * 2) CPU1 must re-enable the GIC distributor on
-	 * it's wakeup path.
-	 */
-	ldr	r1, =OMAP44XX_GIC_DIST_BASE
-	ldr	r0, [r1]
-	orr	r0, #1
-	str	r0, [r1]
-
-	/*
-	 * we've been released from the wait loop,secondary_stack
-	 * should now contain the SVC stack for this core
-	 */
-	b	omap_secondary_startup
-ENDPROC(omap4460_secondary_startup)
diff --git a/arch/arm/mach-omap2/omap-smc.S b/arch/arm/mach-omap2/omap-smc.S
deleted file mode 100644
index fd2bcd91f4a14c874bd8b833bd91a7e5ef0d8c8c..0000000000000000000000000000000000000000
--- a/arch/arm/mach-omap2/omap-smc.S
+++ /dev/null
@@ -1,96 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * OMAP34xx and OMAP44xx secure APIs file.
- *
- * Copyright (C) 2010 Texas Instruments, Inc.
- * Written by Santosh Shilimkar <santosh.shilimkar@ti.com>
- *
- * Copyright (C) 2012 Ivaylo Dimitrov <freemangordon@abv.bg>
- * Copyright (C) 2013 Pali Rohár <pali.rohar@gmail.com>
- */
-
-#include <linux/linkage.h>
-
-/*
- * This is common routine to manage secure monitor API
- * used to modify the PL310 secure registers.
- * 'r0' contains the value to be modified and 'r12' contains
- * the monitor API number. It uses few CPU registers
- * internally and hence they need be backed up including
- * link register "lr".
- * Function signature : void omap_smc1(u32 fn, u32 arg)
- */
-	.arch armv7-a
-	.arch_extension sec
-ENTRY(omap_smc1)
-	stmfd   sp!, {r2-r12, lr}
-	mov	r12, r0
-	mov 	r0, r1
-	dsb
-	smc	#0
-	ldmfd   sp!, {r2-r12, pc}
-ENDPROC(omap_smc1)
-
-/**
- * u32 omap_smc2(u32 id, u32 falg, u32 pargs)
- * Low level common routine for secure HAL and PPA APIs.
- * @id: Application ID of HAL APIs
- * @flag: Flag to indicate the criticality of operation
- * @pargs: Physical address of parameter list starting
- *	    with number of parametrs
- */
-ENTRY(omap_smc2)
-	stmfd   sp!, {r4-r12, lr}
-	mov	r3, r2
-	mov	r2, r1
-	mov	r1, #0x0	@ Process ID
-	mov	r6, #0xff
-	mov	r12, #0x00	@ Secure Service ID
-	mov	r7, #0
-	mcr	p15, 0, r7, c7, c5, 6
-	dsb
-	dmb
-	smc	#0
-	ldmfd   sp!, {r4-r12, pc}
-ENDPROC(omap_smc2)
-
-/**
- * u32 omap_smc3(u32 service_id, u32 process_id, u32 flag, u32 pargs)
- * Low level common routine for secure HAL and PPA APIs via smc #1
- * r0 - @service_id: Secure Service ID
- * r1 - @process_id: Process ID
- * r2 - @flag: Flag to indicate the criticality of operation
- * r3 - @pargs: Physical address of parameter list
- */
-ENTRY(omap_smc3)
-	stmfd	sp!, {r4-r11, lr}
-	mov	r12, r0		@ Copy the secure service ID
-	mov	r6, #0xff	@ Indicate new Task call
-	dsb			@ Memory Barrier (not sure if needed, copied from omap_smc2)
-	smc	#1		@ Call PPA service
-	ldmfd	sp!, {r4-r11, pc}
-ENDPROC(omap_smc3)
-
-ENTRY(omap_modify_auxcoreboot0)
-	stmfd   sp!, {r1-r12, lr}
-	ldr	r12, =0x104
-	dsb
-	smc	#0
-	ldmfd   sp!, {r1-r12, pc}
-ENDPROC(omap_modify_auxcoreboot0)
-
-ENTRY(omap_auxcoreboot_addr)
-	stmfd   sp!, {r2-r12, lr}
-	ldr	r12, =0x105
-	dsb
-	smc	#0
-	ldmfd   sp!, {r2-r12, pc}
-ENDPROC(omap_auxcoreboot_addr)
-
-ENTRY(omap_read_auxcoreboot0)
-	stmfd   sp!, {r2-r12, lr}
-	ldr	r12, =0x103
-	dsb
-	smc	#0
-	ldmfd   sp!, {r2-r12, pc}
-ENDPROC(omap_read_auxcoreboot0)
diff --git a/arch/arm/mach-omap2/sleep24xx.S b/arch/arm/mach-omap2/sleep24xx.S
deleted file mode 100644
index 84d8c43ad382c2a8323efc260dd0f61e0adb20e5..0000000000000000000000000000000000000000
--- a/arch/arm/mach-omap2/sleep24xx.S
+++ /dev/null
@@ -1,91 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * linux/arch/arm/mach-omap2/sleep.S
- *
- * (C) Copyright 2004
- * Texas Instruments, <www.ti.com>
- * Richard Woodruff <r-woodruff2@ti.com>
- *
- * (C) Copyright 2006 Nokia Corporation
- * Fixed idle loop sleep
- * Igor Stoppa <igor.stoppa@nokia.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#include "omap24xx.h"
-#include "sdrc.h"
-
-/* First address of reserved address space?  apparently valid for OMAP2 & 3 */
-#define A_SDRC0_V		(0xC0000000)
-
-	.text
-
-/*
- * omap24xx_cpu_suspend() - Forces OMAP into deep sleep state by completing
- * SDRC shutdown then ARM shutdown.  Upon wake MPU is back on so just restore
- * SDRC.
- *
- * Input:
- * R0 :	DLL ctrl value pre-Sleep
- * R1 : SDRC_DLLA_CTRL
- * R2 : SDRC_POWER
- *
- * The if the DPLL is going to AutoIdle. It seems like the DPLL may be back on
- * when we get called, but the DLL probably isn't.  We will wait a bit more in
- * case the DPLL isn't quite there yet. The code will wait on DLL for DDR even
- * if in unlocked mode.
- *
- * For less than 242x-ES2.2 upon wake from a sleep mode where the external
- * oscillator was stopped, a timing bug exists where a non-stabilized 12MHz
- * clock can pass into the PRCM can cause problems at DSP and IVA.
- * To work around this the code will switch to the 32kHz source prior to sleep.
- * Post sleep we will shift back to using the DPLL.  Apparently,
- * CM_IDLEST_CLKGEN does not reflect the full clock change so you need to wait
- * 3x12MHz + 3x32kHz clocks for a full switch.
- *
- * The DLL load value is not kept in RETENTION or OFF.	It needs to be restored
- * at wake
- */
-	.align	3
-ENTRY(omap24xx_cpu_suspend)
-	stmfd	sp!, {r0 - r12, lr}	@ save registers on stack
-	mov	r3, #0x0		@ clear for mcr call
-	mcr	p15, 0, r3, c7, c10, 4	@ memory barrier, hope SDR/DDR finished
-	nop
-	nop
-	ldr	r4, [r2]		@ read SDRC_POWER
-	orr	r4, r4, #0x40		@ enable self refresh on idle req
-	mov	r5, #0x2000		@ set delay (DPLL relock + DLL relock)
-	str	r4, [r2]		@ make it so
-	nop
-	mcr	p15, 0, r3, c7, c0, 4	@ wait for interrupt
-	nop
-loop:
-	subs	r5, r5, #0x1		@ awake, wait just a bit
-	bne	loop
-
-	/* The DPLL has to be on before we take the DDR out of self refresh */
-	bic	r4, r4, #0x40		@ now clear self refresh bit.
-	str	r4, [r2]		@ write to SDRC_POWER
-	ldr	r4, A_SDRC0		@ make a clock happen
-	ldr	r4, [r4]		@ read A_SDRC0
-	nop				@ start auto refresh only after clk ok
-	movs	r0, r0			@ see if DDR or SDR
-	strne	r0, [r1]		@ rewrite DLLA to force DLL reload
-	addne	r1, r1, #0x8		@ move to DLLB
-	strne	r0, [r1]		@ rewrite DLLB to force DLL reload
-
-	mov	r5, #0x1000
-loop2:
-	subs	r5, r5, #0x1
-	bne	loop2
-	/* resume*/
-	ldmfd	sp!, {r0 - r12, pc}	@ restore regs and return
-
-A_SDRC0:
-	.word A_SDRC0_V
-
-ENTRY(omap24xx_cpu_suspend_sz)
-	.word	. - omap24xx_cpu_suspend
diff --git a/arch/arm/mach-omap2/sleep33xx.S b/arch/arm/mach-omap2/sleep33xx.S
deleted file mode 100644
index dc221249bc22c88ae95b94ce4461781cc9c1c999..0000000000000000000000000000000000000000
--- a/arch/arm/mach-omap2/sleep33xx.S
+++ /dev/null
@@ -1,262 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Low level suspend code for AM33XX SoCs
- *
- * Copyright (C) 2012-2018 Texas Instruments Incorporated - http://www.ti.com/
- *	Dave Gerlach, Vaibhav Bedia
- */
-
-#include <linux/linkage.h>
-#include <linux/platform_data/pm33xx.h>
-#include <linux/ti-emif-sram.h>
-#include <asm/assembler.h>
-#include <asm/memory.h>
-
-#include "iomap.h"
-#include "cm33xx.h"
-#include "pm-asm-offsets.h"
-
-#define AM33XX_CM_CLKCTRL_MODULESTATE_DISABLED			0x00030000
-#define AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE			0x0003
-#define AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE			0x0002
-
-/* replicated define because linux/bitops.h cannot be included in assembly */
-#define BIT(nr)			(1 << (nr))
-
-	.arm
-	.arch armv7-a
-	.align 3
-
-ENTRY(am33xx_do_wfi)
-	stmfd	sp!, {r4 - r11, lr}	@ save registers on stack
-
-	/* Save wfi_flags arg to data space */
-	mov	r4, r0
-	adr	r3, am33xx_pm_ro_sram_data
-	ldr	r2, [r3, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET]
-	str	r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET]
-
-	/* Only flush cache is we know we are losing MPU context */
-	tst	r4, #WFI_FLAG_FLUSH_CACHE
-	beq	cache_skip_flush
-
-	/*
-	 * Flush all data from the L1 and L2 data cache before disabling
-	 * SCTLR.C bit.
-	 */
-	ldr	r1, kernel_flush
-	blx	r1
-
-	/*
-	 * Clear the SCTLR.C bit to prevent further data cache
-	 * allocation. Clearing SCTLR.C would make all the data accesses
-	 * strongly ordered and would not hit the cache.
-	 */
-	mrc	p15, 0, r0, c1, c0, 0
-	bic	r0, r0, #(1 << 2)	@ Disable the C bit
-	mcr	p15, 0, r0, c1, c0, 0
-	isb
-
-	/*
-	 * Invalidate L1 and L2 data cache.
-	 */
-	ldr	r1, kernel_flush
-	blx	r1
-
-	adr	r3, am33xx_pm_ro_sram_data
-	ldr	r2, [r3, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET]
-	ldr	r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET]
-
-cache_skip_flush:
-	/* Check if we want self refresh */
-	tst	r4, #WFI_FLAG_SELF_REFRESH
-	beq	emif_skip_enter_sr
-
-	adr	r9, am33xx_emif_sram_table
-
-	ldr	r3, [r9, #EMIF_PM_ENTER_SR_OFFSET]
-	blx	r3
-
-emif_skip_enter_sr:
-	/* Only necessary if PER is losing context */
-	tst	r4, #WFI_FLAG_SAVE_EMIF
-	beq	emif_skip_save
-
-	ldr	r3, [r9, #EMIF_PM_SAVE_CONTEXT_OFFSET]
-	blx	r3
-
-emif_skip_save:
-	/* Only can disable EMIF if we have entered self refresh */
-	tst     r4, #WFI_FLAG_SELF_REFRESH
-	beq     emif_skip_disable
-
-	/* Disable EMIF */
-	ldr     r1, virt_emif_clkctrl
-	ldr     r2, [r1]
-	bic     r2, r2, #AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE
-	str     r2, [r1]
-
-	ldr	r1, virt_emif_clkctrl
-wait_emif_disable:
-	ldr	r2, [r1]
-	mov	r3, #AM33XX_CM_CLKCTRL_MODULESTATE_DISABLED
-	cmp	r2, r3
-	bne	wait_emif_disable
-
-emif_skip_disable:
-	tst	r4, #WFI_FLAG_WAKE_M3
-	beq	wkup_m3_skip
-
-	/*
-	 * For the MPU WFI to be registered as an interrupt
-	 * to WKUP_M3, MPU_CLKCTRL.MODULEMODE needs to be set
-	 * to DISABLED
-	 */
-	ldr	r1, virt_mpu_clkctrl
-	ldr	r2, [r1]
-	bic	r2, r2, #AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE
-	str	r2, [r1]
-
-wkup_m3_skip:
-	/*
-	 * Execute an ISB instruction to ensure that all of the
-	 * CP15 register changes have been committed.
-	 */
-	isb
-
-	/*
-	 * Execute a barrier instruction to ensure that all cache,
-	 * TLB and branch predictor maintenance operations issued
-	 * have completed.
-	 */
-	dsb
-	dmb
-
-	/*
-	 * Execute a WFI instruction and wait until the
-	 * STANDBYWFI output is asserted to indicate that the
-	 * CPU is in idle and low power state. CPU can specualatively
-	 * prefetch the instructions so add NOPs after WFI. Thirteen
-	 * NOPs as per Cortex-A8 pipeline.
-	 */
-	wfi
-
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	/* We come here in case of an abort due to a late interrupt */
-
-	/* Set MPU_CLKCTRL.MODULEMODE back to ENABLE */
-	ldr	r1, virt_mpu_clkctrl
-	mov	r2, #AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE
-	str	r2, [r1]
-
-	/* Re-enable EMIF */
-	ldr	r1, virt_emif_clkctrl
-	mov	r2, #AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE
-	str	r2, [r1]
-wait_emif_enable:
-	ldr	r3, [r1]
-	cmp	r2, r3
-	bne	wait_emif_enable
-
-	/* Only necessary if PER is losing context */
-	tst	r4, #WFI_FLAG_SELF_REFRESH
-	beq	emif_skip_exit_sr_abt
-
-	adr	r9, am33xx_emif_sram_table
-	ldr	r1, [r9, #EMIF_PM_ABORT_SR_OFFSET]
-	blx	r1
-
-emif_skip_exit_sr_abt:
-	tst	r4, #WFI_FLAG_FLUSH_CACHE
-	beq	cache_skip_restore
-
-	/*
-	 * Set SCTLR.C bit to allow data cache allocation
-	 */
-	mrc	p15, 0, r0, c1, c0, 0
-	orr	r0, r0, #(1 << 2)	@ Enable the C bit
-	mcr	p15, 0, r0, c1, c0, 0
-	isb
-
-cache_skip_restore:
-	/* Let the suspend code know about the abort */
-	mov	r0, #1
-	ldmfd	sp!, {r4 - r11, pc}	@ restore regs and return
-ENDPROC(am33xx_do_wfi)
-
-	.align
-ENTRY(am33xx_resume_offset)
-	.word . - am33xx_do_wfi
-
-ENTRY(am33xx_resume_from_deep_sleep)
-	/* Re-enable EMIF */
-	ldr	r0, phys_emif_clkctrl
-	mov	r1, #AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE
-	str	r1, [r0]
-wait_emif_enable1:
-	ldr	r2, [r0]
-	cmp	r1, r2
-	bne	wait_emif_enable1
-
-	adr	r9, am33xx_emif_sram_table
-
-	ldr	r1, [r9, #EMIF_PM_RESTORE_CONTEXT_OFFSET]
-	blx	r1
-
-	ldr	r1, [r9, #EMIF_PM_EXIT_SR_OFFSET]
-	blx	r1
-
-resume_to_ddr:
-	/* We are back. Branch to the common CPU resume routine */
-	mov	r0, #0
-	ldr	pc, resume_addr
-ENDPROC(am33xx_resume_from_deep_sleep)
-
-/*
- * Local variables
- */
-	.align
-kernel_flush:
-	.word   v7_flush_dcache_all
-virt_mpu_clkctrl:
-	.word	AM33XX_CM_MPU_MPU_CLKCTRL
-virt_emif_clkctrl:
-	.word	AM33XX_CM_PER_EMIF_CLKCTRL
-phys_emif_clkctrl:
-	.word	(AM33XX_CM_BASE + AM33XX_CM_PER_MOD + \
-		AM33XX_CM_PER_EMIF_CLKCTRL_OFFSET)
-
-.align 3
-/* DDR related defines */
-am33xx_emif_sram_table:
-	.space EMIF_PM_FUNCTIONS_SIZE
-
-ENTRY(am33xx_pm_sram)
-	.word am33xx_do_wfi
-	.word am33xx_do_wfi_sz
-	.word am33xx_resume_offset
-	.word am33xx_emif_sram_table
-	.word am33xx_pm_ro_sram_data
-
-resume_addr:
-.word  cpu_resume - PAGE_OFFSET + 0x80000000
-
-.align 3
-ENTRY(am33xx_pm_ro_sram_data)
-	.space AMX3_PM_RO_SRAM_DATA_SIZE
-
-ENTRY(am33xx_do_wfi_sz)
-	.word	. - am33xx_do_wfi
diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S
deleted file mode 100644
index ac1324c6453b5b121a4466b833787a1a9086d2cd..0000000000000000000000000000000000000000
--- a/arch/arm/mach-omap2/sleep34xx.S
+++ /dev/null
@@ -1,569 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * (C) Copyright 2007
- * Texas Instruments
- * Karthik Dasu <karthik-dp@ti.com>
- *
- * (C) Copyright 2004
- * Texas Instruments, <www.ti.com>
- * Richard Woodruff <r-woodruff2@ti.com>
- */
-#include <linux/linkage.h>
-
-#include <asm/assembler.h>
-
-#include "omap34xx.h"
-#include "iomap.h"
-#include "cm3xxx.h"
-#include "prm3xxx.h"
-#include "sdrc.h"
-#include "sram.h"
-#include "control.h"
-
-/*
- * Registers access definitions
- */
-#define SDRC_SCRATCHPAD_SEM_OFFS	0xc
-#define SDRC_SCRATCHPAD_SEM_V	OMAP343X_SCRATCHPAD_REGADDR\
-					(SDRC_SCRATCHPAD_SEM_OFFS)
-#define PM_PREPWSTST_CORE_P	OMAP3430_PRM_BASE + CORE_MOD +\
-					OMAP3430_PM_PREPWSTST
-#define PM_PWSTCTRL_MPU_P	OMAP3430_PRM_BASE + MPU_MOD + OMAP2_PM_PWSTCTRL
-#define CM_IDLEST1_CORE_V	OMAP34XX_CM_REGADDR(CORE_MOD, CM_IDLEST1)
-#define CM_IDLEST_CKGEN_V	OMAP34XX_CM_REGADDR(PLL_MOD, CM_IDLEST)
-#define SRAM_BASE_P		OMAP3_SRAM_PA
-#define CONTROL_STAT		OMAP343X_CTRL_BASE + OMAP343X_CONTROL_STATUS
-#define CONTROL_MEM_RTA_CTRL	(OMAP343X_CTRL_BASE +\
-					OMAP36XX_CONTROL_MEM_RTA_CTRL)
-
-/* Move this as correct place is available */
-#define SCRATCHPAD_MEM_OFFS	0x310
-#define SCRATCHPAD_BASE_P	(OMAP343X_CTRL_BASE +\
-					OMAP343X_CONTROL_MEM_WKUP +\
-					SCRATCHPAD_MEM_OFFS)
-#define SDRC_POWER_V		OMAP34XX_SDRC_REGADDR(SDRC_POWER)
-#define SDRC_SYSCONFIG_P	(OMAP343X_SDRC_BASE + SDRC_SYSCONFIG)
-#define SDRC_MR_0_P		(OMAP343X_SDRC_BASE + SDRC_MR_0)
-#define SDRC_EMR2_0_P		(OMAP343X_SDRC_BASE + SDRC_EMR2_0)
-#define SDRC_MANUAL_0_P		(OMAP343X_SDRC_BASE + SDRC_MANUAL_0)
-#define SDRC_MR_1_P		(OMAP343X_SDRC_BASE + SDRC_MR_1)
-#define SDRC_EMR2_1_P		(OMAP343X_SDRC_BASE + SDRC_EMR2_1)
-#define SDRC_MANUAL_1_P		(OMAP343X_SDRC_BASE + SDRC_MANUAL_1)
-#define SDRC_DLLA_STATUS_V	OMAP34XX_SDRC_REGADDR(SDRC_DLLA_STATUS)
-#define SDRC_DLLA_CTRL_V	OMAP34XX_SDRC_REGADDR(SDRC_DLLA_CTRL)
-
-/*
- * This file needs be built unconditionally as ARM to interoperate correctly
- * with non-Thumb-2-capable firmware.
- */
-	.arm
-
-/*
- * API functions
- */
-
-	.text
-/*
- * L2 cache needs to be toggled for stable OFF mode functionality on 3630.
- * This function sets up a flag that will allow for this toggling to take
- * place on 3630. Hopefully some version in the future may not need this.
- */
-ENTRY(enable_omap3630_toggle_l2_on_restore)
-	stmfd	sp!, {lr}	@ save registers on stack
-	/* Setup so that we will disable and enable l2 */
-	mov	r1, #0x1
-	adrl	r3, l2dis_3630_offset	@ may be too distant for plain adr
-	ldr	r2, [r3]		@ value for offset
-	str	r1, [r2, r3]		@ write to l2dis_3630
-	ldmfd	sp!, {pc}	@ restore regs and return
-ENDPROC(enable_omap3630_toggle_l2_on_restore)
-
-/*
- * Function to call rom code to save secure ram context.
- *
- * r0 = physical address of the parameters
- */
-	.arch armv7-a
-	.arch_extension sec
-ENTRY(save_secure_ram_context)
-	stmfd	sp!, {r4 - r11, lr}	@ save registers on stack
-	mov	r3, r0			@ physical address of parameters
-	mov	r0, #25			@ set service ID for PPA
-	mov	r12, r0			@ copy secure service ID in r12
-	mov	r1, #0			@ set task id for ROM code in r1
-	mov	r2, #4			@ set some flags in r2, r6
-	mov	r6, #0xff
-	dsb				@ data write barrier
-	dmb				@ data memory barrier
-	smc	#1			@ call SMI monitor (smi #1)
-	nop
-	nop
-	nop
-	nop
-	ldmfd	sp!, {r4 - r11, pc}
-ENDPROC(save_secure_ram_context)
-
-/*
- * ======================
- * == Idle entry point ==
- * ======================
- */
-
-/*
- * Forces OMAP into idle state
- *
- * omap34xx_cpu_suspend() - This bit of code saves the CPU context if needed
- * and executes the WFI instruction. Calling WFI effectively changes the
- * power domains states to the desired target power states.
- *
- *
- * Notes:
- * - only the minimum set of functions gets copied to internal SRAM at boot
- *   and after wake-up from OFF mode, cf. omap_push_sram_idle. The function
- *   pointers in SDRAM or SRAM are called depending on the desired low power
- *   target state.
- * - when the OMAP wakes up it continues at different execution points
- *   depending on the low power mode (non-OFF vs OFF modes),
- *   cf. 'Resume path for xxx mode' comments.
- */
-	.align	3
-ENTRY(omap34xx_cpu_suspend)
-	stmfd	sp!, {r4 - r11, lr}	@ save registers on stack
-
-	/*
-	 * r0 contains information about saving context:
-	 *   0 - No context lost
-	 *   1 - Only L1 and logic lost
-	 *   2 - Only L2 lost (Even L1 is retained we clean it along with L2)
-	 *   3 - Both L1 and L2 lost and logic lost
-	 */
-
-	/*
-	 * For OFF mode: save context and jump to WFI in SDRAM (omap3_do_wfi)
-	 * For non-OFF modes: jump to the WFI code in SRAM (omap3_do_wfi_sram)
-	 */
-	ldr	r4, omap3_do_wfi_sram_addr
-	ldr	r5, [r4]
-	cmp	r0, #0x0		@ If no context save required,
-	bxeq	r5			@  jump to the WFI code in SRAM
-
-
-	/* Otherwise fall through to the save context code */
-save_context_wfi:
-	/*
-	 * jump out to kernel flush routine
-	 *  - reuse that code is better
-	 *  - it executes in a cached space so is faster than refetch per-block
-	 *  - should be faster and will change with kernel
-	 *  - 'might' have to copy address, load and jump to it
-	 * Flush all data from the L1 data cache before disabling
-	 * SCTLR.C bit.
-	 */
-	ldr	r1, kernel_flush
-	mov	lr, pc
-	bx	r1
-
-	/*
-	 * Clear the SCTLR.C bit to prevent further data cache
-	 * allocation. Clearing SCTLR.C would make all the data accesses
-	 * strongly ordered and would not hit the cache.
-	 */
-	mrc	p15, 0, r0, c1, c0, 0
-	bic	r0, r0, #(1 << 2)	@ Disable the C bit
-	mcr	p15, 0, r0, c1, c0, 0
-	isb
-
-	/*
-	 * Invalidate L1 data cache. Even though only invalidate is
-	 * necessary exported flush API is used here. Doing clean
-	 * on already clean cache would be almost NOP.
-	 */
-	ldr	r1, kernel_flush
-	blx	r1
-	b	omap3_do_wfi
-ENDPROC(omap34xx_cpu_suspend)
-omap3_do_wfi_sram_addr:
-	.word omap3_do_wfi_sram
-kernel_flush:
-	.word v7_flush_dcache_all
-
-/* ===================================
- * == WFI instruction => Enter idle ==
- * ===================================
- */
-
-/*
- * Do WFI instruction
- * Includes the resume path for non-OFF modes
- *
- * This code gets copied to internal SRAM and is accessible
- * from both SDRAM and SRAM:
- * - executed from SRAM for non-off modes (omap3_do_wfi_sram),
- * - executed from SDRAM for OFF mode (omap3_do_wfi).
- */
-	.align	3
-ENTRY(omap3_do_wfi)
-	ldr	r4, sdrc_power		@ read the SDRC_POWER register
-	ldr	r5, [r4]		@ read the contents of SDRC_POWER
-	orr	r5, r5, #0x40		@ enable self refresh on idle req
-	str	r5, [r4]		@ write back to SDRC_POWER register
-
-	/* Data memory barrier and Data sync barrier */
-	dsb
-	dmb
-
-/*
- * ===================================
- * == WFI instruction => Enter idle ==
- * ===================================
- */
-	wfi				@ wait for interrupt
-
-/*
- * ===================================
- * == Resume path for non-OFF modes ==
- * ===================================
- */
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-/*
- * This function implements the erratum ID i581 WA:
- *  SDRC state restore before accessing the SDRAM
- *
- * Only used at return from non-OFF mode. For OFF
- * mode the ROM code configures the SDRC and
- * the DPLL before calling the restore code directly
- * from DDR.
- */
-
-/* Make sure SDRC accesses are ok */
-wait_sdrc_ok:
-
-/* DPLL3 must be locked before accessing the SDRC. Maybe the HW ensures this */
-	ldr	r4, cm_idlest_ckgen
-wait_dpll3_lock:
-	ldr	r5, [r4]
-	tst	r5, #1
-	beq	wait_dpll3_lock
-
-	ldr	r4, cm_idlest1_core
-wait_sdrc_ready:
-	ldr	r5, [r4]
-	tst	r5, #0x2
-	bne	wait_sdrc_ready
-	/* allow DLL powerdown upon hw idle req */
-	ldr	r4, sdrc_power
-	ldr	r5, [r4]
-	bic	r5, r5, #0x40
-	str	r5, [r4]
-
-is_dll_in_lock_mode:
-	/* Is dll in lock mode? */
-	ldr	r4, sdrc_dlla_ctrl
-	ldr	r5, [r4]
-	tst	r5, #0x4
-	bne	exit_nonoff_modes	@ Return if locked
-	/* wait till dll locks */
-wait_dll_lock_timed:
-	ldr	r4, sdrc_dlla_status
-	/* Wait 20uS for lock */
-	mov	r6, #8
-wait_dll_lock:
-	subs	r6, r6, #0x1
-	beq	kick_dll
-	ldr	r5, [r4]
-	and	r5, r5, #0x4
-	cmp	r5, #0x4
-	bne	wait_dll_lock
-	b	exit_nonoff_modes	@ Return when locked
-
-	/* disable/reenable DLL if not locked */
-kick_dll:
-	ldr	r4, sdrc_dlla_ctrl
-	ldr	r5, [r4]
-	mov	r6, r5
-	bic	r6, #(1<<3)		@ disable dll
-	str	r6, [r4]
-	dsb
-	orr	r6, r6, #(1<<3)		@ enable dll
-	str	r6, [r4]
-	dsb
-	b	wait_dll_lock_timed
-
-exit_nonoff_modes:
-	/* Re-enable C-bit if needed */
-	mrc	p15, 0, r0, c1, c0, 0
-	tst	r0, #(1 << 2)		@ Check C bit enabled?
-	orreq	r0, r0, #(1 << 2)	@ Enable the C bit if cleared
-	mcreq	p15, 0, r0, c1, c0, 0
-	isb
-
-/*
- * ===================================
- * == Exit point from non-OFF modes ==
- * ===================================
- */
-	ldmfd	sp!, {r4 - r11, pc}	@ restore regs and return
-ENDPROC(omap3_do_wfi)
-sdrc_power:
-	.word	SDRC_POWER_V
-cm_idlest1_core:
-	.word	CM_IDLEST1_CORE_V
-cm_idlest_ckgen:
-	.word	CM_IDLEST_CKGEN_V
-sdrc_dlla_status:
-	.word	SDRC_DLLA_STATUS_V
-sdrc_dlla_ctrl:
-	.word	SDRC_DLLA_CTRL_V
-ENTRY(omap3_do_wfi_sz)
-	.word	. - omap3_do_wfi
-
-
-/*
- * ==============================
- * == Resume path for OFF mode ==
- * ==============================
- */
-
-/*
- * The restore_* functions are called by the ROM code
- *  when back from WFI in OFF mode.
- * Cf. the get_*restore_pointer functions.
- *
- *  restore_es3: applies to 34xx >= ES3.0
- *  restore_3630: applies to 36xx
- *  restore: common code for 3xxx
- *
- * Note: when back from CORE and MPU OFF mode we are running
- *  from SDRAM, without MMU, without the caches and prediction.
- *  Also the SRAM content has been cleared.
- */
-ENTRY(omap3_restore_es3)
-	ldr	r5, pm_prepwstst_core_p
-	ldr	r4, [r5]
-	and	r4, r4, #0x3
-	cmp	r4, #0x0	@ Check if previous power state of CORE is OFF
-	bne	omap3_restore	@ Fall through to OMAP3 common code
-	adr	r0, es3_sdrc_fix
-	ldr	r1, sram_base
-	ldr	r2, es3_sdrc_fix_sz
-	mov	r2, r2, ror #2
-copy_to_sram:
-	ldmia	r0!, {r3}	@ val = *src
-	stmia	r1!, {r3}	@ *dst = val
-	subs	r2, r2, #0x1	@ num_words--
-	bne	copy_to_sram
-	ldr	r1, sram_base
-	blx	r1
-	b	omap3_restore	@ Fall through to OMAP3 common code
-ENDPROC(omap3_restore_es3)
-
-ENTRY(omap3_restore_3630)
-	ldr	r1, pm_prepwstst_core_p
-	ldr	r2, [r1]
-	and	r2, r2, #0x3
-	cmp	r2, #0x0	@ Check if previous power state of CORE is OFF
-	bne	omap3_restore	@ Fall through to OMAP3 common code
-	/* Disable RTA before giving control */
-	ldr	r1, control_mem_rta
-	mov	r2, #OMAP36XX_RTA_DISABLE
-	str	r2, [r1]
-ENDPROC(omap3_restore_3630)
-
-	/* Fall through to common code for the remaining logic */
-
-ENTRY(omap3_restore)
-	/*
-	 * Read the pwstctrl register to check the reason for mpu reset.
-	 * This tells us what was lost.
-	 */
-	ldr	r1, pm_pwstctrl_mpu
-	ldr	r2, [r1]
-	and	r2, r2, #0x3
-	cmp	r2, #0x0	@ Check if target power state was OFF or RET
-	bne	logic_l1_restore
-
-	adr	r1, l2dis_3630_offset	@ address for offset
-	ldr	r0, [r1]		@ value for offset
-	ldr	r0, [r1, r0]		@ value at l2dis_3630
-	cmp	r0, #0x1	@ should we disable L2 on 3630?
-	bne	skipl2dis
-	mrc	p15, 0, r0, c1, c0, 1
-	bic	r0, r0, #2	@ disable L2 cache
-	mcr	p15, 0, r0, c1, c0, 1
-skipl2dis:
-	ldr	r0, control_stat
-	ldr	r1, [r0]
-	and	r1, #0x700
-	cmp	r1, #0x300
-	beq	l2_inv_gp
-	adr	r0, l2_inv_api_params_offset
-	ldr	r3, [r0]
-	add	r3, r3, r0		@ r3 points to dummy parameters
-	mov	r0, #40			@ set service ID for PPA
-	mov	r12, r0			@ copy secure Service ID in r12
-	mov	r1, #0			@ set task id for ROM code in r1
-	mov	r2, #4			@ set some flags in r2, r6
-	mov	r6, #0xff
-	dsb				@ data write barrier
-	dmb				@ data memory barrier
-	smc	#1			@ call SMI monitor (smi #1)
-	/* Write to Aux control register to set some bits */
-	mov	r0, #42			@ set service ID for PPA
-	mov	r12, r0			@ copy secure Service ID in r12
-	mov	r1, #0			@ set task id for ROM code in r1
-	mov	r2, #4			@ set some flags in r2, r6
-	mov	r6, #0xff
-	ldr	r4, scratchpad_base
-	ldr	r3, [r4, #0xBC]		@ r3 points to parameters
-	dsb				@ data write barrier
-	dmb				@ data memory barrier
-	smc	#1			@ call SMI monitor (smi #1)
-
-#ifdef CONFIG_OMAP3_L2_AUX_SECURE_SAVE_RESTORE
-	/* Restore L2 aux control register */
-					@ set service ID for PPA
-	mov	r0, #CONFIG_OMAP3_L2_AUX_SECURE_SERVICE_SET_ID
-	mov	r12, r0			@ copy service ID in r12
-	mov	r1, #0			@ set task ID for ROM code in r1
-	mov	r2, #4			@ set some flags in r2, r6
-	mov	r6, #0xff
-	ldr	r4, scratchpad_base
-	ldr	r3, [r4, #0xBC]
-	adds	r3, r3, #8		@ r3 points to parameters
-	dsb				@ data write barrier
-	dmb				@ data memory barrier
-	smc	#1			@ call SMI monitor (smi #1)
-#endif
-	b	logic_l1_restore
-
-	.align
-l2_inv_api_params_offset:
-	.long	l2_inv_api_params - .
-l2_inv_gp:
-	/* Execute smi to invalidate L2 cache */
-	mov r12, #0x1			@ set up to invalidate L2
-	smc	#0			@ Call SMI monitor (smieq)
-	/* Write to Aux control register to set some bits */
-	ldr	r4, scratchpad_base
-	ldr	r3, [r4,#0xBC]
-	ldr	r0, [r3,#4]
-	mov	r12, #0x3
-	smc	#0			@ Call SMI monitor (smieq)
-	ldr	r4, scratchpad_base
-	ldr	r3, [r4,#0xBC]
-	ldr	r0, [r3,#12]
-	mov	r12, #0x2
-	smc	#0			@ Call SMI monitor (smieq)
-logic_l1_restore:
-	adr	r0, l2dis_3630_offset	@ adress for offset
-	ldr	r1, [r0]		@ value for offset
-	ldr	r1, [r0, r1]		@ value at l2dis_3630
-	cmp	r1, #0x1		@ Test if L2 re-enable needed on 3630
-	bne	skipl2reen
-	mrc	p15, 0, r1, c1, c0, 1
-	orr	r1, r1, #2		@ re-enable L2 cache
-	mcr	p15, 0, r1, c1, c0, 1
-skipl2reen:
-
-	/* Now branch to the common CPU resume function */
-	b	cpu_resume
-ENDPROC(omap3_restore)
-
-	.ltorg
-
-/*
- * Local variables
- */
-pm_prepwstst_core_p:
-	.word	PM_PREPWSTST_CORE_P
-pm_pwstctrl_mpu:
-	.word	PM_PWSTCTRL_MPU_P
-scratchpad_base:
-	.word	SCRATCHPAD_BASE_P
-sram_base:
-	.word	SRAM_BASE_P + 0x8000
-control_stat:
-	.word	CONTROL_STAT
-control_mem_rta:
-	.word	CONTROL_MEM_RTA_CTRL
-l2dis_3630_offset:
-	.long	l2dis_3630 - .
-
-	.data
-	.align	2
-l2dis_3630:
-	.word	0
-
-	.data
-	.align	2
-l2_inv_api_params:
-	.word	0x1, 0x00
-
-/*
- * Internal functions
- */
-
-/*
- * This function implements the erratum ID i443 WA, applies to 34xx >= ES3.0
- * Copied to and run from SRAM in order to reconfigure the SDRC parameters.
- */
-	.text
-	.align	3
-ENTRY(es3_sdrc_fix)
-	ldr	r4, sdrc_syscfg		@ get config addr
-	ldr	r5, [r4]		@ get value
-	tst	r5, #0x100		@ is part access blocked
-	it	eq
-	biceq	r5, r5, #0x100		@ clear bit if set
-	str	r5, [r4]		@ write back change
-	ldr	r4, sdrc_mr_0		@ get config addr
-	ldr	r5, [r4]		@ get value
-	str	r5, [r4]		@ write back change
-	ldr	r4, sdrc_emr2_0		@ get config addr
-	ldr	r5, [r4]		@ get value
-	str	r5, [r4]		@ write back change
-	ldr	r4, sdrc_manual_0	@ get config addr
-	mov	r5, #0x2		@ autorefresh command
-	str	r5, [r4]		@ kick off refreshes
-	ldr	r4, sdrc_mr_1		@ get config addr
-	ldr	r5, [r4]		@ get value
-	str	r5, [r4]		@ write back change
-	ldr	r4, sdrc_emr2_1		@ get config addr
-	ldr	r5, [r4]		@ get value
-	str	r5, [r4]		@ write back change
-	ldr	r4, sdrc_manual_1	@ get config addr
-	mov	r5, #0x2		@ autorefresh command
-	str	r5, [r4]		@ kick off refreshes
-	bx	lr
-
-/*
- * Local variables
- */
-	.align
-sdrc_syscfg:
-	.word	SDRC_SYSCONFIG_P
-sdrc_mr_0:
-	.word	SDRC_MR_0_P
-sdrc_emr2_0:
-	.word	SDRC_EMR2_0_P
-sdrc_manual_0:
-	.word	SDRC_MANUAL_0_P
-sdrc_mr_1:
-	.word	SDRC_MR_1_P
-sdrc_emr2_1:
-	.word	SDRC_EMR2_1_P
-sdrc_manual_1:
-	.word	SDRC_MANUAL_1_P
-ENDPROC(es3_sdrc_fix)
-ENTRY(es3_sdrc_fix_sz)
-	.word	. - es3_sdrc_fix
diff --git a/arch/arm/mach-omap2/sleep43xx.S b/arch/arm/mach-omap2/sleep43xx.S
deleted file mode 100644
index 90d2907a2eb27eba605421c1252dd98caa228025..0000000000000000000000000000000000000000
--- a/arch/arm/mach-omap2/sleep43xx.S
+++ /dev/null
@@ -1,493 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Low level suspend code for AM43XX SoCs
- *
- * Copyright (C) 2013-2018 Texas Instruments Incorporated - http://www.ti.com/
- *	Dave Gerlach, Vaibhav Bedia
- */
-
-#include <linux/linkage.h>
-#include <linux/ti-emif-sram.h>
-#include <linux/platform_data/pm33xx.h>
-#include <asm/assembler.h>
-#include <asm/hardware/cache-l2x0.h>
-#include <asm/memory.h>
-
-#include "cm33xx.h"
-#include "common.h"
-#include "iomap.h"
-#include "omap-secure.h"
-#include "omap44xx.h"
-#include "pm-asm-offsets.h"
-#include "prm33xx.h"
-#include "prcm43xx.h"
-
-/* replicated define because linux/bitops.h cannot be included in assembly */
-#define BIT(nr)			(1 << (nr))
-
-#define AM33XX_CM_CLKCTRL_MODULESTATE_DISABLED		0x00030000
-#define AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE		0x0003
-#define AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE		0x0002
-
-#define AM43XX_EMIF_POWEROFF_ENABLE			0x1
-#define AM43XX_EMIF_POWEROFF_DISABLE			0x0
-
-#define AM43XX_CM_CLKSTCTRL_CLKTRCTRL_SW_SLEEP		0x1
-#define AM43XX_CM_CLKSTCTRL_CLKTRCTRL_HW_AUTO		0x3
-
-#define AM43XX_CM_BASE					0x44DF0000
-
-#define AM43XX_CM_REGADDR(inst, reg)                           \
-       AM33XX_L4_WK_IO_ADDRESS(AM43XX_CM_BASE + (inst) + (reg))
-
-#define AM43XX_CM_MPU_CLKSTCTRL AM43XX_CM_REGADDR(AM43XX_CM_MPU_INST, \
-					AM43XX_CM_MPU_MPU_CDOFFS)
-#define AM43XX_CM_MPU_MPU_CLKCTRL AM43XX_CM_REGADDR(AM43XX_CM_MPU_INST, \
-					AM43XX_CM_MPU_MPU_CLKCTRL_OFFSET)
-#define AM43XX_CM_PER_EMIF_CLKCTRL  AM43XX_CM_REGADDR(AM43XX_CM_PER_INST, \
-					AM43XX_CM_PER_EMIF_CLKCTRL_OFFSET)
-#define AM43XX_PRM_EMIF_CTRL_OFFSET			0x0030
-
-#define RTC_SECONDS_REG					0x0
-#define RTC_PMIC_REG					0x98
-#define RTC_PMIC_POWER_EN				BIT(16)
-#define RTC_PMIC_EXT_WAKEUP_STS				BIT(12)
-#define RTC_PMIC_EXT_WAKEUP_POL				BIT(4)
-#define RTC_PMIC_EXT_WAKEUP_EN				BIT(0)
-
-	.arm
-	.arch armv7-a
-	.arch_extension sec
-	.align 3
-
-ENTRY(am43xx_do_wfi)
-	stmfd	sp!, {r4 - r11, lr}	@ save registers on stack
-
-	/* Save wfi_flags arg to data space */
-	mov	r4, r0
-	adr	r3, am43xx_pm_ro_sram_data
-	ldr	r2, [r3, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET]
-	str	r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET]
-
-#ifdef CONFIG_CACHE_L2X0
-	/* Retrieve l2 cache virt address BEFORE we shut off EMIF */
-	ldr	r1, get_l2cache_base
-	blx	r1
-	mov	r8, r0
-#endif
-
-	/* Only flush cache is we know we are losing MPU context */
-	tst	r4, #WFI_FLAG_FLUSH_CACHE
-	beq	cache_skip_flush
-
-	/*
-	 * Flush all data from the L1 and L2 data cache before disabling
-	 * SCTLR.C bit.
-	 */
-	ldr	r1, kernel_flush
-	blx	r1
-
-	/*
-	 * Clear the SCTLR.C bit to prevent further data cache
-	 * allocation. Clearing SCTLR.C would make all the data accesses
-	 * strongly ordered and would not hit the cache.
-	 */
-	mrc	p15, 0, r0, c1, c0, 0
-	bic	r0, r0, #(1 << 2)	@ Disable the C bit
-	mcr	p15, 0, r0, c1, c0, 0
-	isb
-	dsb
-
-	/*
-	 * Invalidate L1 and L2 data cache.
-	 */
-	ldr	r1, kernel_flush
-	blx	r1
-
-#ifdef CONFIG_CACHE_L2X0
-	/*
-	 * Clean and invalidate the L2 cache.
-	 */
-#ifdef CONFIG_PL310_ERRATA_727915
-	mov	r0, #0x03
-	mov	r12, #OMAP4_MON_L2X0_DBG_CTRL_INDEX
-	dsb
-	smc	#0
-	dsb
-#endif
-	mov	r0, r8
-	adr	r4, am43xx_pm_ro_sram_data
-	ldr	r3, [r4, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET]
-
-	mov	r2, r0
-	ldr	r0, [r2, #L2X0_AUX_CTRL]
-	str	r0, [r3, #AMX3_PM_L2_AUX_CTRL_VAL_OFFSET]
-	ldr	r0, [r2, #L310_PREFETCH_CTRL]
-	str	r0, [r3, #AMX3_PM_L2_PREFETCH_CTRL_VAL_OFFSET]
-
-	ldr	r0, l2_val
-	str	r0, [r2, #L2X0_CLEAN_INV_WAY]
-wait:
-	ldr	r0, [r2, #L2X0_CLEAN_INV_WAY]
-	ldr	r1, l2_val
-	ands	r0, r0, r1
-	bne	wait
-#ifdef CONFIG_PL310_ERRATA_727915
-	mov	r0, #0x00
-	mov	r12, #OMAP4_MON_L2X0_DBG_CTRL_INDEX
-	dsb
-	smc	#0
-	dsb
-#endif
-l2x_sync:
-	mov	r0, r8
-	mov	r2, r0
-	mov	r0, #0x0
-	str	r0, [r2, #L2X0_CACHE_SYNC]
-sync:
-	ldr	r0, [r2, #L2X0_CACHE_SYNC]
-	ands	r0, r0, #0x1
-	bne	sync
-#endif
-
-	/* Restore wfi_flags */
-	adr	r3, am43xx_pm_ro_sram_data
-	ldr	r2, [r3, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET]
-	ldr	r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET]
-
-cache_skip_flush:
-	/*
-	 * If we are trying to enter RTC+DDR mode we must perform
-	 * a read from the rtc address space to ensure translation
-	 * presence in the TLB to avoid page table walk after DDR
-	 * is unavailable.
-	 */
-	tst	r4, #WFI_FLAG_RTC_ONLY
-	beq	skip_rtc_va_refresh
-
-	adr	r3, am43xx_pm_ro_sram_data
-	ldr	r1, [r3, #AMX3_PM_RTC_BASE_VIRT_OFFSET]
-	ldr	r0, [r1]
-
-skip_rtc_va_refresh:
-	/* Check if we want self refresh */
-	tst	r4, #WFI_FLAG_SELF_REFRESH
-	beq	emif_skip_enter_sr
-
-	adr     r9, am43xx_emif_sram_table
-
-	ldr     r3, [r9, #EMIF_PM_ENTER_SR_OFFSET]
-	blx     r3
-
-emif_skip_enter_sr:
-	/* Only necessary if PER is losing context */
-	tst	r4, #WFI_FLAG_SAVE_EMIF
-	beq	emif_skip_save
-
-	ldr     r3, [r9, #EMIF_PM_SAVE_CONTEXT_OFFSET]
-	blx	r3
-
-emif_skip_save:
-	/* Only can disable EMIF if we have entered self refresh */
-	tst	r4, #WFI_FLAG_SELF_REFRESH
-	beq	emif_skip_disable
-
-	/* Disable EMIF */
-	ldr	r1, am43xx_virt_emif_clkctrl
-	ldr	r2, [r1]
-	bic	r2, r2, #AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE
-	str	r2, [r1]
-
-wait_emif_disable:
-	ldr	r2, [r1]
-	mov	r3, #AM33XX_CM_CLKCTRL_MODULESTATE_DISABLED
-	cmp	r2, r3
-	bne	wait_emif_disable
-
-emif_skip_disable:
-	tst	r4, #WFI_FLAG_RTC_ONLY
-	beq	skip_rtc_only
-
-	adr	r3, am43xx_pm_ro_sram_data
-	ldr	r1, [r3, #AMX3_PM_RTC_BASE_VIRT_OFFSET]
-
-	ldr	r0, [r1, #RTC_PMIC_REG]
-	orr	r0, r0, #RTC_PMIC_POWER_EN
-	orr	r0, r0, #RTC_PMIC_EXT_WAKEUP_STS
-	orr	r0, r0, #RTC_PMIC_EXT_WAKEUP_EN
-	orr	r0, r0, #RTC_PMIC_EXT_WAKEUP_POL
-	str	r0, [r1, #RTC_PMIC_REG]
-	ldr	r0, [r1, #RTC_PMIC_REG]
-	/* Wait for 2 seconds to lose power */
-	mov	r3, #2
-	ldr	r2, [r1, #RTC_SECONDS_REG]
-rtc_loop:
-	ldr	r0, [r1, #RTC_SECONDS_REG]
-	cmp	r0, r2
-	beq	rtc_loop
-	mov	r2, r0
-	subs	r3, r3, #1
-	bne	rtc_loop
-
-	b	re_enable_emif
-
-skip_rtc_only:
-
-	tst	r4, #WFI_FLAG_WAKE_M3
-	beq	wkup_m3_skip
-
-	/*
-	 * For the MPU WFI to be registered as an interrupt
-	 * to WKUP_M3, MPU_CLKCTRL.MODULEMODE needs to be set
-	 * to DISABLED
-	 */
-	ldr	r1, am43xx_virt_mpu_clkctrl
-	ldr	r2, [r1]
-	bic	r2, r2, #AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE
-	str	r2, [r1]
-
-	/*
-	 * Put MPU CLKDM to SW_SLEEP
-	 */
-	ldr	r1, am43xx_virt_mpu_clkstctrl
-	mov	r2, #AM43XX_CM_CLKSTCTRL_CLKTRCTRL_SW_SLEEP
-	str	r2, [r1]
-
-wkup_m3_skip:
-	/*
-	 * Execute a barrier instruction to ensure that all cache,
-	 * TLB and branch predictor maintenance operations issued
-	 * have completed.
-	 */
-	dsb
-	dmb
-
-	/*
-	 * Execute a WFI instruction and wait until the
-	 * STANDBYWFI output is asserted to indicate that the
-	 * CPU is in idle and low power state. CPU can specualatively
-	 * prefetch the instructions so add NOPs after WFI. Sixteen
-	 * NOPs as per Cortex-A9 pipeline.
-	 */
-	wfi
-
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	/* We come here in case of an abort due to a late interrupt */
-	ldr	r1, am43xx_virt_mpu_clkstctrl
-	mov	r2, #AM43XX_CM_CLKSTCTRL_CLKTRCTRL_HW_AUTO
-	str	r2, [r1]
-
-	/* Set MPU_CLKCTRL.MODULEMODE back to ENABLE */
-	ldr	r1, am43xx_virt_mpu_clkctrl
-	mov	r2, #AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE
-	str	r2, [r1]
-
-re_enable_emif:
-	/* Re-enable EMIF */
-	ldr	r1, am43xx_virt_emif_clkctrl
-	mov	r2, #AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE
-	str	r2, [r1]
-wait_emif_enable:
-	ldr	r3, [r1]
-	cmp	r2, r3
-	bne	wait_emif_enable
-
-	tst	r4, #WFI_FLAG_FLUSH_CACHE
-	beq	cache_skip_restore
-
-	/*
-	 * Set SCTLR.C bit to allow data cache allocation
-	 */
-	mrc	p15, 0, r0, c1, c0, 0
-	orr	r0, r0, #(1 << 2)	@ Enable the C bit
-	mcr	p15, 0, r0, c1, c0, 0
-	isb
-
-cache_skip_restore:
-	/* Only necessary if PER is losing context */
-	tst	r4, #WFI_FLAG_SELF_REFRESH
-	beq	emif_skip_exit_sr_abt
-
-	adr	r9, am43xx_emif_sram_table
-	ldr	r1, [r9, #EMIF_PM_ABORT_SR_OFFSET]
-	blx	r1
-
-emif_skip_exit_sr_abt:
-	/* Let the suspend code know about the abort */
-	mov	r0, #1
-	ldmfd	sp!, {r4 - r11, pc}	@ restore regs and return
-ENDPROC(am43xx_do_wfi)
-
-	.align
-ENTRY(am43xx_resume_offset)
-	.word . - am43xx_do_wfi
-
-ENTRY(am43xx_resume_from_deep_sleep)
-	/* Set MPU CLKSTCTRL to HW AUTO so that CPUidle works properly */
-	ldr	r1, am43xx_virt_mpu_clkstctrl
-	mov	r2, #AM43XX_CM_CLKSTCTRL_CLKTRCTRL_HW_AUTO
-	str	r2, [r1]
-
-	/* For AM43xx, use EMIF power down until context is restored */
-	ldr	r2, am43xx_phys_emif_poweroff
-	mov	r1, #AM43XX_EMIF_POWEROFF_ENABLE
-	str	r1, [r2, #0x0]
-
-	/* Re-enable EMIF */
-	ldr	r1, am43xx_phys_emif_clkctrl
-	mov	r2, #AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE
-	str	r2, [r1]
-wait_emif_enable1:
-	ldr	r3, [r1]
-	cmp	r2, r3
-	bne	wait_emif_enable1
-
-	adr     r9, am43xx_emif_sram_table
-
-	ldr     r1, [r9, #EMIF_PM_RESTORE_CONTEXT_OFFSET]
-	blx     r1
-
-	ldr     r1, [r9, #EMIF_PM_EXIT_SR_OFFSET]
-	blx     r1
-
-	ldr     r2, am43xx_phys_emif_poweroff
-	mov     r1, #AM43XX_EMIF_POWEROFF_DISABLE
-	str     r1, [r2, #0x0]
-
-	ldr     r1, [r9, #EMIF_PM_RUN_HW_LEVELING]
-	blx     r1
-
-#ifdef CONFIG_CACHE_L2X0
-	ldr	r2, l2_cache_base
-	ldr	r0, [r2, #L2X0_CTRL]
-	and	r0, #0x0f
-	cmp	r0, #1
-	beq	skip_l2en			@ Skip if already enabled
-
-	adr	r4, am43xx_pm_ro_sram_data
-	ldr	r3, [r4, #AMX3_PM_RO_SRAM_DATA_PHYS_OFFSET]
-	ldr     r0, [r3, #AMX3_PM_L2_PREFETCH_CTRL_VAL_OFFSET]
-
-	ldr	r12, l2_smc1
-	dsb
-	smc	#0
-	dsb
-set_aux_ctrl:
-	ldr     r0, [r3, #AMX3_PM_L2_AUX_CTRL_VAL_OFFSET]
-	ldr	r12, l2_smc2
-	dsb
-	smc	#0
-	dsb
-
-	/* L2 invalidate on resume */
-	ldr	r0, l2_val
-	ldr	r2, l2_cache_base
-	str	r0, [r2, #L2X0_INV_WAY]
-wait2:
-	ldr	r0, [r2, #L2X0_INV_WAY]
-	ldr	r1, l2_val
-	ands	r0, r0, r1
-	bne	wait2
-#ifdef CONFIG_PL310_ERRATA_727915
-	mov	r0, #0x00
-	mov	r12, #OMAP4_MON_L2X0_DBG_CTRL_INDEX
-	dsb
-	smc	#0
-	dsb
-#endif
-l2x_sync2:
-	ldr	r2, l2_cache_base
-	mov	r0, #0x0
-	str	r0, [r2, #L2X0_CACHE_SYNC]
-sync2:
-	ldr	r0, [r2, #L2X0_CACHE_SYNC]
-	ands	r0, r0, #0x1
-	bne	sync2
-
-	mov	r0, #0x1
-	ldr	r12, l2_smc3
-	dsb
-	smc	#0
-	dsb
-#endif
-skip_l2en:
-	/* We are back. Branch to the common CPU resume routine */
-	mov	r0, #0
-	ldr	pc, resume_addr
-ENDPROC(am43xx_resume_from_deep_sleep)
-
-/*
- * Local variables
- */
-	.align
-kernel_flush:
-	.word   v7_flush_dcache_all
-ddr_start:
-	.word	PAGE_OFFSET
-
-am43xx_phys_emif_poweroff:
-	.word   (AM43XX_CM_BASE + AM43XX_PRM_DEVICE_INST + \
-		 AM43XX_PRM_EMIF_CTRL_OFFSET)
-am43xx_virt_mpu_clkstctrl:
-	.word	(AM43XX_CM_MPU_CLKSTCTRL)
-am43xx_virt_mpu_clkctrl:
-	.word	(AM43XX_CM_MPU_MPU_CLKCTRL)
-am43xx_virt_emif_clkctrl:
-	.word	(AM43XX_CM_PER_EMIF_CLKCTRL)
-am43xx_phys_emif_clkctrl:
-	.word	(AM43XX_CM_BASE + AM43XX_CM_PER_INST + \
-		 AM43XX_CM_PER_EMIF_CLKCTRL_OFFSET)
-
-#ifdef CONFIG_CACHE_L2X0
-/* L2 cache related defines for AM437x */
-get_l2cache_base:
-	.word	omap4_get_l2cache_base
-l2_cache_base:
-	.word	OMAP44XX_L2CACHE_BASE
-l2_smc1:
-	.word	OMAP4_MON_L2X0_PREFETCH_INDEX
-l2_smc2:
-	.word	OMAP4_MON_L2X0_AUXCTRL_INDEX
-l2_smc3:
-	.word	OMAP4_MON_L2X0_CTRL_INDEX
-l2_val:
-	.word	0xffff
-#endif
-
-.align 3
-/* DDR related defines */
-ENTRY(am43xx_emif_sram_table)
-	.space EMIF_PM_FUNCTIONS_SIZE
-
-ENTRY(am43xx_pm_sram)
-	.word am43xx_do_wfi
-	.word am43xx_do_wfi_sz
-	.word am43xx_resume_offset
-	.word am43xx_emif_sram_table
-	.word am43xx_pm_ro_sram_data
-
-resume_addr:
-	.word   cpu_resume - PAGE_OFFSET + 0x80000000
-.align 3
-
-ENTRY(am43xx_pm_ro_sram_data)
-	.space AMX3_PM_RO_SRAM_DATA_SIZE
-
-ENTRY(am43xx_do_wfi_sz)
-	.word	. - am43xx_do_wfi
diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S
deleted file mode 100644
index f60f6a9aed7351532fbe0116a0fa7025e20dc367..0000000000000000000000000000000000000000
--- a/arch/arm/mach-omap2/sleep44xx.S
+++ /dev/null
@@ -1,388 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * OMAP44xx sleep code.
- *
- * Copyright (C) 2011 Texas Instruments, Inc.
- * 	Santosh Shilimkar <santosh.shilimkar@ti.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/smp_scu.h>
-#include <asm/memory.h>
-#include <asm/hardware/cache-l2x0.h>
-
-#include "omap-secure.h"
-
-#include "common.h"
-#include "omap44xx.h"
-#include "omap4-sar-layout.h"
-
-	.arch armv7-a
-
-#if defined(CONFIG_SMP) && defined(CONFIG_PM)
-
-	.arch_extension sec
-.macro	DO_SMC
-	dsb
-	smc	#0
-	dsb
-.endm
-
-#ifdef CONFIG_ARCH_OMAP4
-
-/*
- * =============================
- * == CPU suspend finisher ==
- * =============================
- *
- * void omap4_finish_suspend(unsigned long cpu_state)
- *
- * This function code saves the CPU context and performs the CPU
- * power down sequence. Calling WFI effectively changes the CPU
- * power domains states to the desired target power state.
- *
- * @cpu_state : contains context save state (r0)
- *	0 - No context lost
- * 	1 - CPUx L1 and logic lost: MPUSS CSWR
- * 	2 - CPUx L1 and logic lost + GIC lost: MPUSS OSWR
- *	3 - CPUx L1 and logic lost + GIC + L2 lost: MPUSS OFF
- * @return: This function never returns for CPU OFF and DORMANT power states.
- * Post WFI, CPU transitions to DORMANT or OFF power state and on wake-up
- * from this follows a full CPU reset path via ROM code to CPU restore code.
- * The restore function pointer is stored at CPUx_WAKEUP_NS_PA_ADDR_OFFSET.
- * It returns to the caller for CPU INACTIVE and ON power states or in case
- * CPU failed to transition to targeted OFF/DORMANT state.
- *
- * omap4_finish_suspend() calls v7_flush_dcache_all() which doesn't save
- * stack frame and it expects the caller to take care of it. Hence the entire
- * stack frame is saved to avoid possible stack corruption.
- */
-ENTRY(omap4_finish_suspend)
-	stmfd	sp!, {r4-r12, lr}
-	cmp	r0, #0x0
-	beq	do_WFI				@ No lowpower state, jump to WFI
-
-	/*
-	 * Flush all data from the L1 data cache before disabling
-	 * SCTLR.C bit.
-	 */
-	bl	omap4_get_sar_ram_base
-	ldr	r9, [r0, #OMAP_TYPE_OFFSET]
-	cmp	r9, #0x1			@ Check for HS device
-	bne	skip_secure_l1_clean
-	mov	r0, #SCU_PM_NORMAL
-	mov	r1, #0xFF			@ clean seucre L1
-	stmfd   r13!, {r4-r12, r14}
-	ldr	r12, =OMAP4_MON_SCU_PWR_INDEX
-	DO_SMC
-	ldmfd   r13!, {r4-r12, r14}
-skip_secure_l1_clean:
-	bl	v7_flush_dcache_all
-
-	/*
-	 * Clear the SCTLR.C bit to prevent further data cache
-	 * allocation. Clearing SCTLR.C would make all the data accesses
-	 * strongly ordered and would not hit the cache.
-	 */
-	mrc	p15, 0, r0, c1, c0, 0
-	bic	r0, r0, #(1 << 2)		@ Disable the C bit
-	mcr	p15, 0, r0, c1, c0, 0
-	isb
-
-	bl	v7_invalidate_l1
-
-	/*
-	 * Switch the CPU from Symmetric Multiprocessing (SMP) mode
-	 * to AsymmetricMultiprocessing (AMP) mode by programming
-	 * the SCU power status to DORMANT or OFF mode.
-	 * This enables the CPU to be taken out of coherency by
-	 * preventing the CPU from receiving cache, TLB, or BTB
-	 * maintenance operations broadcast by other CPUs in the cluster.
-	 */
-	bl	omap4_get_sar_ram_base
-	mov	r8, r0
-	ldr	r9, [r8, #OMAP_TYPE_OFFSET]
-	cmp	r9, #0x1			@ Check for HS device
-	bne	scu_gp_set
-	mrc	p15, 0, r0, c0, c0, 5		@ Read MPIDR
-	ands	r0, r0, #0x0f
-	ldreq	r0, [r8, #SCU_OFFSET0]
-	ldrne	r0, [r8, #SCU_OFFSET1]
-	mov	r1, #0x00
-	stmfd   r13!, {r4-r12, r14}
-	ldr	r12, =OMAP4_MON_SCU_PWR_INDEX
-	DO_SMC
-	ldmfd   r13!, {r4-r12, r14}
-	b	skip_scu_gp_set
-scu_gp_set:
-	mrc	p15, 0, r0, c0, c0, 5		@ Read MPIDR
-	ands	r0, r0, #0x0f
-	ldreq	r1, [r8, #SCU_OFFSET0]
-	ldrne	r1, [r8, #SCU_OFFSET1]
-	bl	omap4_get_scu_base
-	bl	scu_power_mode
-skip_scu_gp_set:
-	mrc	p15, 0, r0, c1, c1, 2		@ Read NSACR data
-	tst	r0, #(1 << 18)
-	mrcne	p15, 0, r0, c1, c0, 1
-	bicne	r0, r0, #(1 << 6)		@ Disable SMP bit
-	mcrne	p15, 0, r0, c1, c0, 1
-	isb
-	dsb
-#ifdef CONFIG_CACHE_L2X0
-	/*
-	 * Clean and invalidate the L2 cache.
-	 * Common cache-l2x0.c functions can't be used here since it
-	 * uses spinlocks. We are out of coherency here with data cache
-	 * disabled. The spinlock implementation uses exclusive load/store
-	 * instruction which can fail without data cache being enabled.
-	 * OMAP4 hardware doesn't support exclusive monitor which can
-	 * overcome exclusive access issue. Because of this, CPU can
-	 * lead to deadlock.
-	 */
-	bl	omap4_get_sar_ram_base
-	mov	r8, r0
-	mrc	p15, 0, r5, c0, c0, 5		@ Read MPIDR
-	ands	r5, r5, #0x0f
-	ldreq	r0, [r8, #L2X0_SAVE_OFFSET0]	@ Retrieve L2 state from SAR
-	ldrne	r0, [r8, #L2X0_SAVE_OFFSET1]	@ memory.
-	cmp	r0, #3
-	bne	do_WFI
-#ifdef CONFIG_PL310_ERRATA_727915
-	mov	r0, #0x03
-	mov	r12, #OMAP4_MON_L2X0_DBG_CTRL_INDEX
-	DO_SMC
-#endif
-	bl	omap4_get_l2cache_base
-	mov	r2, r0
-	ldr	r0, =0xffff
-	str	r0, [r2, #L2X0_CLEAN_INV_WAY]
-wait:
-	ldr	r0, [r2, #L2X0_CLEAN_INV_WAY]
-	ldr	r1, =0xffff
-	ands	r0, r0, r1
-	bne	wait
-#ifdef CONFIG_PL310_ERRATA_727915
-	mov	r0, #0x00
-	mov	r12, #OMAP4_MON_L2X0_DBG_CTRL_INDEX
-	DO_SMC
-#endif
-l2x_sync:
-	bl	omap4_get_l2cache_base
-	mov	r2, r0
-	mov	r0, #0x0
-	str	r0, [r2, #L2X0_CACHE_SYNC]
-sync:
-	ldr	r0, [r2, #L2X0_CACHE_SYNC]
-	ands	r0, r0, #0x1
-	bne	sync
-#endif
-
-do_WFI:
-	bl	omap_do_wfi
-
-	/*
-	 * CPU is here when it failed to enter OFF/DORMANT or
-	 * no low power state was attempted.
-	 */
-	mrc	p15, 0, r0, c1, c0, 0
-	tst	r0, #(1 << 2)			@ Check C bit enabled?
-	orreq	r0, r0, #(1 << 2)		@ Enable the C bit
-	mcreq	p15, 0, r0, c1, c0, 0
-	isb
-
-	/*
-	 * Ensure the CPU power state is set to NORMAL in
-	 * SCU power state so that CPU is back in coherency.
-	 * In non-coherent mode CPU can lock-up and lead to
-	 * system deadlock.
-	 */
-	mrc	p15, 0, r0, c1, c0, 1
-	tst	r0, #(1 << 6)			@ Check SMP bit enabled?
-	orreq	r0, r0, #(1 << 6)
-	mcreq	p15, 0, r0, c1, c0, 1
-	isb
-	bl	omap4_get_sar_ram_base
-	mov	r8, r0
-	ldr	r9, [r8, #OMAP_TYPE_OFFSET]
-	cmp	r9, #0x1			@ Check for HS device
-	bne	scu_gp_clear
-	mov	r0, #SCU_PM_NORMAL
-	mov	r1, #0x00
-	stmfd   r13!, {r4-r12, r14}
-	ldr	r12, =OMAP4_MON_SCU_PWR_INDEX
-	DO_SMC
-	ldmfd   r13!, {r4-r12, r14}
-	b	skip_scu_gp_clear
-scu_gp_clear:
-	bl	omap4_get_scu_base
-	mov	r1, #SCU_PM_NORMAL
-	bl	scu_power_mode
-skip_scu_gp_clear:
-	isb
-	dsb
-	ldmfd	sp!, {r4-r12, pc}
-ENDPROC(omap4_finish_suspend)
-
-/*
- * ============================
- * == CPU resume entry point ==
- * ============================
- *
- * void omap4_cpu_resume(void)
- *
- * ROM code jumps to this function while waking up from CPU
- * OFF or DORMANT state. Physical address of the function is
- * stored in the SAR RAM while entering to OFF or DORMANT mode.
- * The restore function pointer is stored at CPUx_WAKEUP_NS_PA_ADDR_OFFSET.
- */
-ENTRY(omap4_cpu_resume)
-	/*
-	 * Configure ACTRL and enable NS SMP bit access on CPU1 on HS device.
-	 * OMAP44XX EMU/HS devices - CPU0 SMP bit access is enabled in PPA
-	 * init and for CPU1, a secure PPA API provided. CPU0 must be ON
-	 * while executing NS_SMP API on CPU1 and PPA version must be 1.4.0+.
-	 * OMAP443X GP devices- SMP bit isn't accessible.
-	 * OMAP446X GP devices - SMP bit access is enabled on both CPUs.
-	 */
-	ldr	r8, =OMAP44XX_SAR_RAM_BASE
-	ldr	r9, [r8, #OMAP_TYPE_OFFSET]
-	cmp	r9, #0x1			@ Skip if GP device
-	bne	skip_ns_smp_enable
-	mrc     p15, 0, r0, c0, c0, 5
-	ands    r0, r0, #0x0f
-	beq	skip_ns_smp_enable
-ppa_actrl_retry:
-	mov     r0, #OMAP4_PPA_CPU_ACTRL_SMP_INDEX
-	adr	r1, ppa_zero_params_offset
-	ldr	r3, [r1]
-	add	r3, r3, r1			@ Pointer to ppa_zero_params
-	mov	r1, #0x0			@ Process ID
-	mov	r2, #0x4			@ Flag
-	mov	r6, #0xff
-	mov	r12, #0x00			@ Secure Service ID
-	DO_SMC
-	cmp	r0, #0x0			@ API returns 0 on success.
-	beq	enable_smp_bit
-	b	ppa_actrl_retry
-enable_smp_bit:
-	mrc	p15, 0, r0, c1, c0, 1
-	tst	r0, #(1 << 6)			@ Check SMP bit enabled?
-	orreq	r0, r0, #(1 << 6)
-	mcreq	p15, 0, r0, c1, c0, 1
-	isb
-skip_ns_smp_enable:
-#ifdef CONFIG_CACHE_L2X0
-	/*
-	 * Restore the L2 AUXCTRL and enable the L2 cache.
-	 * OMAP4_MON_L2X0_AUXCTRL_INDEX =  Program the L2X0 AUXCTRL
-	 * OMAP4_MON_L2X0_CTRL_INDEX =  Enable the L2 using L2X0 CTRL
-	 * register r0 contains value to be programmed.
-	 * L2 cache is already invalidate by ROM code as part
-	 * of MPUSS OFF wakeup path.
-	 */
-	ldr	r2, =OMAP44XX_L2CACHE_BASE
-	ldr	r0, [r2, #L2X0_CTRL]
-	and	r0, #0x0f
-	cmp	r0, #1
-	beq	skip_l2en			@ Skip if already enabled
-	ldr	r3, =OMAP44XX_SAR_RAM_BASE
-	ldr	r1, [r3, #OMAP_TYPE_OFFSET]
-	cmp	r1, #0x1			@ Check for HS device
-	bne     set_gp_por
-	ldr     r0, =OMAP4_PPA_L2_POR_INDEX
-	ldr     r1, =OMAP44XX_SAR_RAM_BASE
-	ldr     r4, [r1, #L2X0_PREFETCH_CTRL_OFFSET]
-	adr     r1, ppa_por_params_offset
-	ldr	r3, [r1]
-	add	r3, r3, r1			@ Pointer to ppa_por_params
-	str     r4, [r3, #0x04]
-	mov	r1, #0x0			@ Process ID
-	mov	r2, #0x4			@ Flag
-	mov	r6, #0xff
-	mov	r12, #0x00			@ Secure Service ID
-	DO_SMC
-	b	set_aux_ctrl
-set_gp_por:
-	ldr     r1, =OMAP44XX_SAR_RAM_BASE
-	ldr     r0, [r1, #L2X0_PREFETCH_CTRL_OFFSET]
-	ldr	r12, =OMAP4_MON_L2X0_PREFETCH_INDEX	@ Setup L2 PREFETCH
-	DO_SMC
-set_aux_ctrl:
-	ldr     r1, =OMAP44XX_SAR_RAM_BASE
-	ldr	r0, [r1, #L2X0_AUXCTRL_OFFSET]
-	ldr	r12, =OMAP4_MON_L2X0_AUXCTRL_INDEX	@ Setup L2 AUXCTRL
-	DO_SMC
-	mov	r0, #0x1
-	ldr	r12, =OMAP4_MON_L2X0_CTRL_INDEX		@ Enable L2 cache
-	DO_SMC
-skip_l2en:
-#endif
-
-	b	cpu_resume			@ Jump to generic resume
-ppa_por_params_offset:
-	.long	ppa_por_params - .
-ENDPROC(omap4_cpu_resume)
-#endif	/* CONFIG_ARCH_OMAP4 */
-
-#endif	/* defined(CONFIG_SMP) && defined(CONFIG_PM) */
-
-ENTRY(omap_do_wfi)
-	stmfd	sp!, {lr}
-#ifdef CONFIG_OMAP_INTERCONNECT_BARRIER
-	/* Drain interconnect write buffers. */
-	bl	omap_interconnect_sync
-#endif
-
-	/*
-	 * Execute an ISB instruction to ensure that all of the
-	 * CP15 register changes have been committed.
-	 */
-	isb
-
-	/*
-	 * Execute a barrier instruction to ensure that all cache,
-	 * TLB and branch predictor maintenance operations issued
-	 * by any CPU in the cluster have completed.
-	 */
-	dsb
-	dmb
-
-	/*
-	 * Execute a WFI instruction and wait until the
-	 * STANDBYWFI output is asserted to indicate that the
-	 * CPU is in idle and low power state. CPU can specualatively
-	 * prefetch the instructions so add NOPs after WFI. Sixteen
-	 * NOPs as per Cortex-A9 pipeline.
-	 */
-	wfi					@ Wait For Interrupt
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	ldmfd	sp!, {pc}
-ppa_zero_params_offset:
-	.long	ppa_zero_params - .
-ENDPROC(omap_do_wfi)
-
-	.data
-	.align	2
-ppa_zero_params:
-	.word		0
-
-ppa_por_params:
-	.word		1, 0
diff --git a/arch/arm/mach-omap2/sram242x.S b/arch/arm/mach-omap2/sram242x.S
deleted file mode 100644
index 92ef21ac2ac151ddd3b3ed0c2e0cffc991a7ee1b..0000000000000000000000000000000000000000
--- a/arch/arm/mach-omap2/sram242x.S
+++ /dev/null
@@ -1,317 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * linux/arch/arm/mach-omap2/sram242x.S
- *
- * Omap2 specific functions that need to be run in internal SRAM
- *
- * (C) Copyright 2004
- * Texas Instruments, <www.ti.com>
- * Richard Woodruff <r-woodruff2@ti.com>
- *
- * Richard Woodruff notes that any changes to this code must be carefully
- * audited and tested to ensure that they don't cause a TLB miss while
- * the SDRAM is inaccessible.  Such a situation will crash the system
- * since it will cause the ARM MMU to attempt to walk the page tables.
- * These crashes may be intermittent.
- */
-#include <linux/linkage.h>
-
-#include <asm/assembler.h>
-
-#include "soc.h"
-#include "iomap.h"
-#include "prm2xxx.h"
-#include "cm2xxx.h"
-#include "sdrc.h"
-
-	.text
-
-	.align	3
-ENTRY(omap242x_sram_ddr_init)
-	stmfd	sp!, {r0 - r12, lr}	@ save registers on stack
-
-	mov	r12, r2			@ capture CS1 vs CS0
-	mov	r8, r3			@ capture force parameter
-
-	/* frequency shift down */
-	ldr	r2, omap242x_sdi_cm_clksel2_pll	@ get address of dpllout reg
-	mov	r3, #0x1		@ value for 1x operation
-	str	r3, [r2]		@ go to L1-freq operation
-
-	/* voltage shift down */
-	mov r9, #0x1			@ set up for L1 voltage call
-	bl voltage_shift		@ go drop voltage
-
-	/* dll lock mode */
-	ldr	r11, omap242x_sdi_sdrc_dlla_ctrl	@ addr of dlla ctrl
-	ldr	r10, [r11]		@ get current val
-	cmp	r12, #0x1		@ cs1 base (2422 es2.05/1)
-	addeq	r11, r11, #0x8		@ if cs1 base, move to DLLB
-	mvn	r9, #0x4		@ mask to get clear bit2
-	and	r10, r10, r9		@ clear bit2 for lock mode.
-	orr	r10, r10, #0x8		@ make sure DLL on (es2 bit pos)
-	orr	r10, r10, #0x2		@ 90 degree phase for all below 133MHz
-	str	r10, [r11]		@ commit to DLLA_CTRL
-	bl	i_dll_wait		@ wait for dll to lock
-
-	/* get dll value */
-	add	r11, r11, #0x4		@ get addr of status reg
-	ldr	r10, [r11]		@ get locked value
-
-	/* voltage shift up */
-	mov r9, #0x0			@ shift back to L0-voltage
-	bl voltage_shift		@ go raise voltage
-
-	/* frequency shift up */
-	mov	r3, #0x2		@ value for 2x operation
-	str	r3, [r2]		@ go to L0-freq operation
-
-	/* reset entry mode for dllctrl */
-	sub	r11, r11, #0x4		@ move from status to ctrl
-	cmp	r12, #0x1		@ normalize if cs1 based
-	subeq	r11, r11, #0x8		@ possibly back to DLLA
-	cmp	r8, #0x1		@ if forced unlock exit
-	orreq	r1, r1, #0x4		@ make sure exit with unlocked value
-	str	r1, [r11]		@ restore DLLA_CTRL high value
-	add	r11, r11, #0x8		@ move to DLLB_CTRL addr
-	str	r1, [r11]		@ set value DLLB_CTRL
-	bl	i_dll_wait		@ wait for possible lock
-
-	/* set up for return, DDR should be good */
-	str r10, [r0]			@ write dll_status and return counter
-	ldmfd	sp!, {r0 - r12, pc}	@ restore regs and return
-
-	/* ensure the DLL has relocked */
-i_dll_wait:
-	mov	r4, #0x800		@ delay DLL relock, min 0x400 L3 clocks
-i_dll_delay:
-	subs	r4, r4, #0x1
-	bne	i_dll_delay
-	ret	lr
-
-	/*
-	 * shift up or down voltage, use R9 as input to tell level.
-	 * wait for it to finish, use 32k sync counter, 1tick=31uS.
-	 */
-voltage_shift:
-	ldr	r4, omap242x_sdi_prcm_voltctrl	@ get addr of volt ctrl.
-	ldr	r5, [r4]		@ get value.
-	ldr	r6, prcm_mask_val	@ get value of mask
-	and	r5, r5, r6		@ apply mask to clear bits
-	orr	r5, r5, r9		@ bulld value for L0/L1-volt operation.
-	str	r5, [r4]		@ set up for change.
-	mov	r3, #0x4000		@ get val for force
-	orr	r5, r5, r3		@ build value for force
-	str	r5, [r4]		@ Force transition to L1
-
-	ldr	r3, omap242x_sdi_timer_32ksynct_cr	@ get addr of counter
-	ldr	r5, [r3]		@ get value
-	add	r5, r5, #0x3		@ give it at most 93uS
-volt_delay:
-	ldr	r7, [r3]		@ get timer value
-	cmp	r5, r7			@ time up?
-	bhi	volt_delay		@ not yet->branch
-	ret	lr			@ back to caller.
-
-omap242x_sdi_cm_clksel2_pll:
-	.word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
-omap242x_sdi_sdrc_dlla_ctrl:
-	.word OMAP242X_SDRC_REGADDR(SDRC_DLLA_CTRL)
-omap242x_sdi_prcm_voltctrl:
-	.word OMAP2420_PRCM_VOLTCTRL
-prcm_mask_val:
-	.word 0xFFFF3FFC
-omap242x_sdi_timer_32ksynct_cr:
-	.word OMAP2_L4_IO_ADDRESS(OMAP2420_32KSYNCT_BASE + 0x010)
-ENTRY(omap242x_sram_ddr_init_sz)
-	.word	. - omap242x_sram_ddr_init
-
-/*
- * Reprograms memory timings.
- * r0 = [PRCM_FULL | PRCM_HALF] r1 = SDRC_DLLA_CTRL value r2 = [DDR | SDR]
- * PRCM_FULL = 2, PRCM_HALF = 1, DDR = 1, SDR = 0
- */
-	.align	3
-ENTRY(omap242x_sram_reprogram_sdrc)
-	stmfd	sp!, {r0 - r10, lr}	@ save registers on stack
-	mov	r3, #0x0		@ clear for mrc call
-	mcr	p15, 0, r3, c7, c10, 4	@ memory barrier, finish ARM SDR/DDR
-	nop
-	nop
-	ldr	r6, omap242x_srs_sdrc_rfr_ctrl	@ get addr of refresh reg
-	ldr	r5, [r6]		@ get value
-	mov	r5, r5, lsr #8		@ isolate rfr field and drop burst
-
-	cmp	r0, #0x1		@ going to half speed?
-	movne	r9, #0x0		@ if up set flag up for pre up, hi volt
-
-	blne	voltage_shift_c		@ adjust voltage
-
-	cmp	r0, #0x1		@ going to half speed (post branch link)
-	moveq	r5, r5, lsr #1		@ divide by 2 if to half
-	movne	r5, r5, lsl #1		@ mult by 2 if to full
-	mov	r5, r5, lsl #8		@ put rfr field back into place
-	add	r5, r5, #0x1		@ turn on burst of 1
-	ldr	r4, omap242x_srs_cm_clksel2_pll	@ get address of out reg
-	ldr	r3, [r4]		@ get curr value
-	orr	r3, r3, #0x3
-	bic	r3, r3, #0x3		@ clear lower bits
-	orr	r3, r3, r0		@ new state value
-	str	r3, [r4]		@ set new state (pll/x, x=1 or 2)
-	nop
-	nop
-
-	moveq	r9, #0x1		@ if speed down, post down, drop volt
-	bleq	voltage_shift_c
-
-	mcr	p15, 0, r3, c7, c10, 4	@ memory barrier
-	str	r5, [r6]		@ set new RFR_1 value
-	add	r6, r6, #0x30		@ get RFR_2 addr
-	str	r5, [r6]		@ set RFR_2
-	nop
-	cmp	r2, #0x1		@ (SDR or DDR) do we need to adjust DLL
-	bne	freq_out		@ leave if SDR, no DLL function
-
-	/* With DDR, we need to take care of the DLL for the frequency change */
-	ldr	r2, omap242x_srs_sdrc_dlla_ctrl	@ addr of dlla ctrl
-	str	r1, [r2]		@ write out new SDRC_DLLA_CTRL
-	add	r2, r2, #0x8		@ addr to SDRC_DLLB_CTRL
-	str	r1, [r2]		@ commit to SDRC_DLLB_CTRL
-	mov	r1, #0x2000		@ wait DLL relock, min 0x400 L3 clocks
-dll_wait:
-	subs	r1, r1, #0x1
-	bne	dll_wait
-freq_out:
-	ldmfd	sp!, {r0 - r10, pc}	@ restore regs and return
-
-    /*
-     * shift up or down voltage, use R9 as input to tell level.
-     *	wait for it to finish, use 32k sync counter, 1tick=31uS.
-     */
-voltage_shift_c:
-	ldr	r10, omap242x_srs_prcm_voltctrl	@ get addr of volt ctrl
-	ldr	r8, [r10]		@ get value
-	ldr	r7, ddr_prcm_mask_val	@ get value of mask
-	and	r8, r8, r7		@ apply mask to clear bits
-	orr	r8, r8, r9		@ bulld value for L0/L1-volt operation.
-	str	r8, [r10]		@ set up for change.
-	mov	r7, #0x4000		@ get val for force
-	orr	r8, r8, r7		@ build value for force
-	str	r8, [r10]		@ Force transition to L1
-
-	ldr	r10, omap242x_srs_timer_32ksynct	@ get addr of counter
-	ldr	r8, [r10]		@ get value
-	add	r8, r8, #0x2		@ give it at most 62uS (min 31+)
-volt_delay_c:
-	ldr	r7, [r10]		@ get timer value
-	cmp	r8, r7			@ time up?
-	bhi	volt_delay_c		@ not yet->branch
-	ret	lr			@ back to caller
-
-omap242x_srs_cm_clksel2_pll:
-	.word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
-omap242x_srs_sdrc_dlla_ctrl:
-	.word OMAP242X_SDRC_REGADDR(SDRC_DLLA_CTRL)
-omap242x_srs_sdrc_rfr_ctrl:
-	.word OMAP242X_SDRC_REGADDR(SDRC_RFR_CTRL_0)
-omap242x_srs_prcm_voltctrl:
-	.word OMAP2420_PRCM_VOLTCTRL
-ddr_prcm_mask_val:
-	.word 0xFFFF3FFC
-omap242x_srs_timer_32ksynct:
-	.word OMAP2_L4_IO_ADDRESS(OMAP2420_32KSYNCT_BASE + 0x010)
-
-ENTRY(omap242x_sram_reprogram_sdrc_sz)
-	.word	. - omap242x_sram_reprogram_sdrc
-
-/*
- * Set dividers and pll. Also recalculate DLL value for DDR and unlock mode.
- */
-	.align	3
-ENTRY(omap242x_sram_set_prcm)
-	stmfd	sp!, {r0-r12, lr}	@ regs to stack
-	adr	r4, pbegin		@ addr of preload start
-	adr	r8, pend		@ addr of preload end
-	mcrr	p15, 1, r8, r4, c12	@ preload into icache
-pbegin:
-	/* move into fast relock bypass */
-	ldr	r8, omap242x_ssp_pll_ctl	@ get addr
-	ldr	r5, [r8]		@ get val
-	mvn	r6, #0x3		@ clear mask
-	and	r5, r5, r6		@ clear field
-	orr	r7, r5, #0x2		@ fast relock val
-	str	r7, [r8]		@ go to fast relock
-	ldr	r4, omap242x_ssp_pll_stat	@ addr of stat
-block:
-	/* wait for bypass */
-	ldr	r8, [r4]		@ stat value
-	and	r8, r8, #0x3		@ mask for stat
-	cmp	r8, #0x1		@ there yet
-	bne	block			@ loop if not
-
-	/* set new dpll dividers _after_ in bypass */
-	ldr	r4, omap242x_ssp_pll_div	@ get addr
-	str	r0, [r4]		@ set dpll ctrl val
-
-	ldr	r4, omap242x_ssp_set_config	@ get addr
-	mov	r8, #1			@ valid cfg msk
-	str	r8, [r4]		@ make dividers take
-
-	mov	r4, #100		@ dead spin a bit
-wait_a_bit:
-	subs	r4, r4, #1		@ dec loop
-	bne	wait_a_bit		@ delay done?
-
-	/* check if staying in bypass */
-	cmp	r2, #0x1		@ stay in bypass?
-	beq	pend			@ jump over dpll relock
-
-	/* relock DPLL with new vals */
-	ldr	r5, omap242x_ssp_pll_stat	@ get addr
-	ldr	r4, omap242x_ssp_pll_ctl	@ get addr
-	orr	r8, r7, #0x3		@ val for lock dpll
-	str	r8, [r4]		@ set val
-	mov	r0, #1000		@ dead spin a bit
-wait_more:
-	subs	r0, r0, #1		@ dec loop
-	bne	wait_more		@ delay done?
-wait_lock:
-	ldr	r8, [r5]		@ get lock val
-	and	r8, r8, #3		@ isolate field
-	cmp	r8, #2			@ locked?
-	bne	wait_lock		@ wait if not
-pend:
-	/* update memory timings & briefly lock dll */
-	ldr	r4, omap242x_ssp_sdrc_rfr	@ get addr
-	str	r1, [r4]		@ update refresh timing
-	ldr	r11, omap242x_ssp_dlla_ctrl	@ get addr of DLLA ctrl
-	ldr	r10, [r11]		@ get current val
-	mvn	r9, #0x4		@ mask to get clear bit2
-	and	r10, r10, r9		@ clear bit2 for lock mode
-	orr	r10, r10, #0x8		@ make sure DLL on (es2 bit pos)
-	str	r10, [r11]		@ commit to DLLA_CTRL
-	add	r11, r11, #0x8		@ move to dllb
-	str	r10, [r11]		@ hit DLLB also
-
-	mov	r4, #0x800		@ relock time (min 0x400 L3 clocks)
-wait_dll_lock:
-	subs	r4, r4, #0x1
-	bne	wait_dll_lock
-	nop
-	ldmfd	sp!, {r0-r12, pc}	@ restore regs and return
-
-omap242x_ssp_set_config:
-	.word OMAP2420_PRCM_CLKCFG_CTRL
-omap242x_ssp_pll_ctl:
-	.word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKEN)
-omap242x_ssp_pll_stat:
-	.word OMAP2420_CM_REGADDR(PLL_MOD, CM_IDLEST)
-omap242x_ssp_pll_div:
-	.word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL1)
-omap242x_ssp_sdrc_rfr:
-	.word OMAP242X_SDRC_REGADDR(SDRC_RFR_CTRL_0)
-omap242x_ssp_dlla_ctrl:
-	.word OMAP242X_SDRC_REGADDR(SDRC_DLLA_CTRL)
-
-ENTRY(omap242x_sram_set_prcm_sz)
-	.word	. - omap242x_sram_set_prcm
diff --git a/arch/arm/mach-omap2/sram243x.S b/arch/arm/mach-omap2/sram243x.S
deleted file mode 100644
index faf03b7f08f5c6f3efaf2ad91a6d97034b5e28db..0000000000000000000000000000000000000000
--- a/arch/arm/mach-omap2/sram243x.S
+++ /dev/null
@@ -1,317 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * linux/arch/arm/mach-omap2/sram243x.S
- *
- * Omap2 specific functions that need to be run in internal SRAM
- *
- * (C) Copyright 2004
- * Texas Instruments, <www.ti.com>
- * Richard Woodruff <r-woodruff2@ti.com>
- *
- * Richard Woodruff notes that any changes to this code must be carefully
- * audited and tested to ensure that they don't cause a TLB miss while
- * the SDRAM is inaccessible.  Such a situation will crash the system
- * since it will cause the ARM MMU to attempt to walk the page tables.
- * These crashes may be intermittent.
- */
-#include <linux/linkage.h>
-
-#include <asm/assembler.h>
-
-#include "soc.h"
-#include "iomap.h"
-#include "prm2xxx.h"
-#include "cm2xxx.h"
-#include "sdrc.h"
-
-	.text
-
-	.align	3
-ENTRY(omap243x_sram_ddr_init)
-	stmfd	sp!, {r0 - r12, lr}	@ save registers on stack
-
-	mov	r12, r2			@ capture CS1 vs CS0
-	mov	r8, r3			@ capture force parameter
-
-	/* frequency shift down */
-	ldr	r2, omap243x_sdi_cm_clksel2_pll	@ get address of dpllout reg
-	mov	r3, #0x1		@ value for 1x operation
-	str	r3, [r2]		@ go to L1-freq operation
-
-	/* voltage shift down */
-	mov r9, #0x1			@ set up for L1 voltage call
-	bl voltage_shift		@ go drop voltage
-
-	/* dll lock mode */
-	ldr	r11, omap243x_sdi_sdrc_dlla_ctrl	@ addr of dlla ctrl
-	ldr	r10, [r11]		@ get current val
-	cmp	r12, #0x1		@ cs1 base (2422 es2.05/1)
-	addeq	r11, r11, #0x8		@ if cs1 base, move to DLLB
-	mvn	r9, #0x4		@ mask to get clear bit2
-	and	r10, r10, r9		@ clear bit2 for lock mode.
-	orr	r10, r10, #0x8		@ make sure DLL on (es2 bit pos)
-	orr	r10, r10, #0x2		@ 90 degree phase for all below 133MHz
-	str	r10, [r11]		@ commit to DLLA_CTRL
-	bl	i_dll_wait		@ wait for dll to lock
-
-	/* get dll value */
-	add	r11, r11, #0x4		@ get addr of status reg
-	ldr	r10, [r11]		@ get locked value
-
-	/* voltage shift up */
-	mov r9, #0x0			@ shift back to L0-voltage
-	bl voltage_shift		@ go raise voltage
-
-	/* frequency shift up */
-	mov	r3, #0x2		@ value for 2x operation
-	str	r3, [r2]		@ go to L0-freq operation
-
-	/* reset entry mode for dllctrl */
-	sub	r11, r11, #0x4		@ move from status to ctrl
-	cmp	r12, #0x1		@ normalize if cs1 based
-	subeq	r11, r11, #0x8		@ possibly back to DLLA
-	cmp	r8, #0x1		@ if forced unlock exit
-	orreq	r1, r1, #0x4		@ make sure exit with unlocked value
-	str	r1, [r11]		@ restore DLLA_CTRL high value
-	add	r11, r11, #0x8		@ move to DLLB_CTRL addr
-	str	r1, [r11]		@ set value DLLB_CTRL
-	bl	i_dll_wait		@ wait for possible lock
-
-	/* set up for return, DDR should be good */
-	str r10, [r0]			@ write dll_status and return counter
-	ldmfd	sp!, {r0 - r12, pc}	@ restore regs and return
-
-	/* ensure the DLL has relocked */
-i_dll_wait:
-	mov	r4, #0x800		@ delay DLL relock, min 0x400 L3 clocks
-i_dll_delay:
-	subs	r4, r4, #0x1
-	bne	i_dll_delay
-	ret	lr
-
-	/*
-	 * shift up or down voltage, use R9 as input to tell level.
-	 * wait for it to finish, use 32k sync counter, 1tick=31uS.
-	 */
-voltage_shift:
-	ldr	r4, omap243x_sdi_prcm_voltctrl	@ get addr of volt ctrl.
-	ldr	r5, [r4]		@ get value.
-	ldr	r6, prcm_mask_val	@ get value of mask
-	and	r5, r5, r6		@ apply mask to clear bits
-	orr	r5, r5, r9		@ bulld value for L0/L1-volt operation.
-	str	r5, [r4]		@ set up for change.
-	mov	r3, #0x4000		@ get val for force
-	orr	r5, r5, r3		@ build value for force
-	str	r5, [r4]		@ Force transition to L1
-
-	ldr	r3, omap243x_sdi_timer_32ksynct_cr	@ get addr of counter
-	ldr	r5, [r3]		@ get value
-	add	r5, r5, #0x3		@ give it at most 93uS
-volt_delay:
-	ldr	r7, [r3]		@ get timer value
-	cmp	r5, r7			@ time up?
-	bhi	volt_delay		@ not yet->branch
-	ret	lr			@ back to caller.
-
-omap243x_sdi_cm_clksel2_pll:
-	.word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
-omap243x_sdi_sdrc_dlla_ctrl:
-	.word OMAP243X_SDRC_REGADDR(SDRC_DLLA_CTRL)
-omap243x_sdi_prcm_voltctrl:
-	.word OMAP2430_PRCM_VOLTCTRL
-prcm_mask_val:
-	.word 0xFFFF3FFC
-omap243x_sdi_timer_32ksynct_cr:
-	.word OMAP2_L4_IO_ADDRESS(OMAP2430_32KSYNCT_BASE + 0x010)
-ENTRY(omap243x_sram_ddr_init_sz)
-	.word	. - omap243x_sram_ddr_init
-
-/*
- * Reprograms memory timings.
- * r0 = [PRCM_FULL | PRCM_HALF] r1 = SDRC_DLLA_CTRL value r2 = [DDR | SDR]
- * PRCM_FULL = 2, PRCM_HALF = 1, DDR = 1, SDR = 0
- */
-	.align	3
-ENTRY(omap243x_sram_reprogram_sdrc)
-	stmfd	sp!, {r0 - r10, lr}	@ save registers on stack
-	mov	r3, #0x0		@ clear for mrc call
-	mcr	p15, 0, r3, c7, c10, 4	@ memory barrier, finish ARM SDR/DDR
-	nop
-	nop
-	ldr	r6, omap243x_srs_sdrc_rfr_ctrl	@ get addr of refresh reg
-	ldr	r5, [r6]		@ get value
-	mov	r5, r5, lsr #8		@ isolate rfr field and drop burst
-
-	cmp	r0, #0x1		@ going to half speed?
-	movne	r9, #0x0		@ if up set flag up for pre up, hi volt
-
-	blne	voltage_shift_c		@ adjust voltage
-
-	cmp	r0, #0x1		@ going to half speed (post branch link)
-	moveq	r5, r5, lsr #1		@ divide by 2 if to half
-	movne	r5, r5, lsl #1		@ mult by 2 if to full
-	mov	r5, r5, lsl #8		@ put rfr field back into place
-	add	r5, r5, #0x1		@ turn on burst of 1
-	ldr	r4, omap243x_srs_cm_clksel2_pll	@ get address of out reg
-	ldr	r3, [r4]		@ get curr value
-	orr	r3, r3, #0x3
-	bic	r3, r3, #0x3		@ clear lower bits
-	orr	r3, r3, r0		@ new state value
-	str	r3, [r4]		@ set new state (pll/x, x=1 or 2)
-	nop
-	nop
-
-	moveq	r9, #0x1		@ if speed down, post down, drop volt
-	bleq	voltage_shift_c
-
-	mcr	p15, 0, r3, c7, c10, 4	@ memory barrier
-	str	r5, [r6]		@ set new RFR_1 value
-	add	r6, r6, #0x30		@ get RFR_2 addr
-	str	r5, [r6]		@ set RFR_2
-	nop
-	cmp	r2, #0x1		@ (SDR or DDR) do we need to adjust DLL
-	bne	freq_out		@ leave if SDR, no DLL function
-
-	/* With DDR, we need to take care of the DLL for the frequency change */
-	ldr	r2, omap243x_srs_sdrc_dlla_ctrl	@ addr of dlla ctrl
-	str	r1, [r2]		@ write out new SDRC_DLLA_CTRL
-	add	r2, r2, #0x8		@ addr to SDRC_DLLB_CTRL
-	str	r1, [r2]		@ commit to SDRC_DLLB_CTRL
-	mov	r1, #0x2000		@ wait DLL relock, min 0x400 L3 clocks
-dll_wait:
-	subs	r1, r1, #0x1
-	bne	dll_wait
-freq_out:
-	ldmfd	sp!, {r0 - r10, pc}	@ restore regs and return
-
-    /*
-     * shift up or down voltage, use R9 as input to tell level.
-     *	wait for it to finish, use 32k sync counter, 1tick=31uS.
-     */
-voltage_shift_c:
-	ldr	r10, omap243x_srs_prcm_voltctrl	@ get addr of volt ctrl
-	ldr	r8, [r10]		@ get value
-	ldr	r7, ddr_prcm_mask_val	@ get value of mask
-	and	r8, r8, r7		@ apply mask to clear bits
-	orr	r8, r8, r9		@ bulld value for L0/L1-volt operation.
-	str	r8, [r10]		@ set up for change.
-	mov	r7, #0x4000		@ get val for force
-	orr	r8, r8, r7		@ build value for force
-	str	r8, [r10]		@ Force transition to L1
-
-	ldr	r10, omap243x_srs_timer_32ksynct	@ get addr of counter
-	ldr	r8, [r10]		@ get value
-	add	r8, r8, #0x2		@ give it at most 62uS (min 31+)
-volt_delay_c:
-	ldr	r7, [r10]		@ get timer value
-	cmp	r8, r7			@ time up?
-	bhi	volt_delay_c		@ not yet->branch
-	ret	lr			@ back to caller
-
-omap243x_srs_cm_clksel2_pll:
-	.word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL2)
-omap243x_srs_sdrc_dlla_ctrl:
-	.word OMAP243X_SDRC_REGADDR(SDRC_DLLA_CTRL)
-omap243x_srs_sdrc_rfr_ctrl:
-	.word OMAP243X_SDRC_REGADDR(SDRC_RFR_CTRL_0)
-omap243x_srs_prcm_voltctrl:
-	.word OMAP2430_PRCM_VOLTCTRL
-ddr_prcm_mask_val:
-	.word 0xFFFF3FFC
-omap243x_srs_timer_32ksynct:
-	.word OMAP2_L4_IO_ADDRESS(OMAP2430_32KSYNCT_BASE + 0x010)
-
-ENTRY(omap243x_sram_reprogram_sdrc_sz)
-	.word	. - omap243x_sram_reprogram_sdrc
-
-/*
- * Set dividers and pll. Also recalculate DLL value for DDR and unlock mode.
- */
-	.align	3
-ENTRY(omap243x_sram_set_prcm)
-	stmfd	sp!, {r0-r12, lr}	@ regs to stack
-	adr	r4, pbegin		@ addr of preload start
-	adr	r8, pend		@ addr of preload end
-	mcrr	p15, 1, r8, r4, c12	@ preload into icache
-pbegin:
-	/* move into fast relock bypass */
-	ldr	r8, omap243x_ssp_pll_ctl	@ get addr
-	ldr	r5, [r8]		@ get val
-	mvn	r6, #0x3		@ clear mask
-	and	r5, r5, r6		@ clear field
-	orr	r7, r5, #0x2		@ fast relock val
-	str	r7, [r8]		@ go to fast relock
-	ldr	r4, omap243x_ssp_pll_stat	@ addr of stat
-block:
-	/* wait for bypass */
-	ldr	r8, [r4]		@ stat value
-	and	r8, r8, #0x3		@ mask for stat
-	cmp	r8, #0x1		@ there yet
-	bne	block			@ loop if not
-
-	/* set new dpll dividers _after_ in bypass */
-	ldr	r4, omap243x_ssp_pll_div	@ get addr
-	str	r0, [r4]		@ set dpll ctrl val
-
-	ldr	r4, omap243x_ssp_set_config	@ get addr
-	mov	r8, #1			@ valid cfg msk
-	str	r8, [r4]		@ make dividers take
-
-	mov	r4, #100		@ dead spin a bit
-wait_a_bit:
-	subs	r4, r4, #1		@ dec loop
-	bne	wait_a_bit		@ delay done?
-
-	/* check if staying in bypass */
-	cmp	r2, #0x1		@ stay in bypass?
-	beq	pend			@ jump over dpll relock
-
-	/* relock DPLL with new vals */
-	ldr	r5, omap243x_ssp_pll_stat	@ get addr
-	ldr	r4, omap243x_ssp_pll_ctl	@ get addr
-	orr	r8, r7, #0x3		@ val for lock dpll
-	str	r8, [r4]		@ set val
-	mov	r0, #1000		@ dead spin a bit
-wait_more:
-	subs	r0, r0, #1		@ dec loop
-	bne	wait_more		@ delay done?
-wait_lock:
-	ldr	r8, [r5]		@ get lock val
-	and	r8, r8, #3		@ isolate field
-	cmp	r8, #2			@ locked?
-	bne	wait_lock		@ wait if not
-pend:
-	/* update memory timings & briefly lock dll */
-	ldr	r4, omap243x_ssp_sdrc_rfr	@ get addr
-	str	r1, [r4]		@ update refresh timing
-	ldr	r11, omap243x_ssp_dlla_ctrl	@ get addr of DLLA ctrl
-	ldr	r10, [r11]		@ get current val
-	mvn	r9, #0x4		@ mask to get clear bit2
-	and	r10, r10, r9		@ clear bit2 for lock mode
-	orr	r10, r10, #0x8		@ make sure DLL on (es2 bit pos)
-	str	r10, [r11]		@ commit to DLLA_CTRL
-	add	r11, r11, #0x8		@ move to dllb
-	str	r10, [r11]		@ hit DLLB also
-
-	mov	r4, #0x800		@ relock time (min 0x400 L3 clocks)
-wait_dll_lock:
-	subs	r4, r4, #0x1
-	bne	wait_dll_lock
-	nop
-	ldmfd	sp!, {r0-r12, pc}	@ restore regs and return
-
-omap243x_ssp_set_config:
-	.word OMAP2430_PRCM_CLKCFG_CTRL
-omap243x_ssp_pll_ctl:
-	.word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKEN)
-omap243x_ssp_pll_stat:
-	.word OMAP2430_CM_REGADDR(PLL_MOD, CM_IDLEST)
-omap243x_ssp_pll_div:
-	.word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL1)
-omap243x_ssp_sdrc_rfr:
-	.word OMAP243X_SDRC_REGADDR(SDRC_RFR_CTRL_0)
-omap243x_ssp_dlla_ctrl:
-	.word OMAP243X_SDRC_REGADDR(SDRC_DLLA_CTRL)
-
-ENTRY(omap243x_sram_set_prcm_sz)
-	.word	. - omap243x_sram_set_prcm
diff --git a/arch/arm/mach-oxnas/headsmp.S b/arch/arm/mach-oxnas/headsmp.S
deleted file mode 100644
index 9c0f1479f33a3db6c4f8a00732fecb77319aee79..0000000000000000000000000000000000000000
--- a/arch/arm/mach-oxnas/headsmp.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 Ma Haijun <mahaijuns@gmail.com>
- * Copyright (c) 2003 ARM Limited
- * All Rights Reserved
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-
-	__INIT
-
-/*
- * OX820 specific entry point for secondary CPUs.
- */
-ENTRY(ox820_secondary_startup)
-	mov r4, #0
-	/* invalidate both caches and branch target cache */
-	mcr p15, 0, r4, c7, c7, 0
-	/*
-	 * we've been released from the holding pen: secondary_stack
-	 * should now contain the SVC stack for this core
-	 */
-	b	secondary_startup
diff --git a/arch/arm/mach-prima2/headsmp.S b/arch/arm/mach-prima2/headsmp.S
deleted file mode 100644
index 88ea1243942ab244400e69bd3ef31a6ddb05778d..0000000000000000000000000000000000000000
--- a/arch/arm/mach-prima2/headsmp.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Entry of the second core for CSR Marco dual-core SMP SoCs
- *
- * Copyright (c) 2012 Cambridge Silicon Radio Limited, a CSR plc group company.
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-
-/*
- * SIRFSOC specific entry point for secondary CPUs.  This provides
- * a "holding pen" into which all secondary cores are held until we're
- * ready for them to initialise.
- */
-ENTRY(sirfsoc_secondary_startup)
-        mrc     p15, 0, r0, c0, c0, 5
-        and     r0, r0, #15
-        adr     r4, 1f
-        ldmia   r4, {r5, r6}
-        sub     r4, r4, r5
-        add     r6, r6, r4
-pen:    ldr     r7, [r6]
-        cmp     r7, r0
-        bne     pen
-
-        /*
-         * we've been released from the holding pen: secondary_stack
-         * should now contain the SVC stack for this core
-         */
-        b       secondary_startup
-ENDPROC(sirfsoc_secondary_startup)
-
-        .align
-1:      .long   .
-        .long   prima2_pen_release
diff --git a/arch/arm/mach-prima2/sleep.S b/arch/arm/mach-prima2/sleep.S
deleted file mode 100644
index d9bbc5ca39ef84baf6cd979a51e15f7366172e0a..0000000000000000000000000000000000000000
--- a/arch/arm/mach-prima2/sleep.S
+++ /dev/null
@@ -1,63 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * sleep mode for CSR SiRFprimaII
- *
- * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company.
- */
-
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm/assembler.h>
-
-#include "pm.h"
-
-#define DENALI_CTL_22_OFF	0x58
-#define DENALI_CTL_112_OFF	0x1c0
-
-	.text
-
-ENTRY(sirfsoc_finish_suspend)
-	@ r5: 	mem controller
-	ldr     r0, =sirfsoc_memc_base
-	ldr	r5, [r0]
-	@ r6: 	pwrc base offset
-	ldr     r0, =sirfsoc_pwrc_base
-	ldr	r6, [r0]
-	@ r7: 	rtc iobrg controller
-	ldr     r0, =sirfsoc_rtciobrg_base
-	ldr	r7, [r0]
-
-	@ Read the power control register and set the
-	@ sleep force bit.
-	add	r0, r6, #SIRFSOC_PWRC_PDN_CTRL
-	bl	__sirfsoc_rtc_iobrg_readl
-	orr	r0,r0,#SIRFSOC_PWR_SLEEPFORCE
-	add	r1, r6, #SIRFSOC_PWRC_PDN_CTRL
-	bl	sirfsoc_rtc_iobrg_pre_writel
-	mov	r1, #0x1
-
-	@ read the MEM ctl register and set the self
-	@ refresh bit
-
-	ldr	r2, [r5, #DENALI_CTL_22_OFF]
-	orr	r2, r2, #0x1
-
-	@ Following code has to run from cache since
-	@ the RAM is going to self refresh mode
-	.align 5
-	str	r2, [r5, #DENALI_CTL_22_OFF]
-
-1:
-	ldr	r4, [r5, #DENALI_CTL_112_OFF]
-	tst	r4, #0x1
-	bne	1b
-
-	@ write SLEEPFORCE through rtc iobridge
-
-	str	r1, [r7]
-	@ wait rtc io bridge sync
-1:
-	ldr	r3, [r7]
-	tst	r3, #0x01
-	bne	1b
-	b .
diff --git a/arch/arm/mach-pxa/mioa701_bootresume.S b/arch/arm/mach-pxa/mioa701_bootresume.S
deleted file mode 100644
index 4ad2fa27fc417ed7854cbc5c1516519cde843bb7..0000000000000000000000000000000000000000
--- a/arch/arm/mach-pxa/mioa701_bootresume.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* Bootloader to resume MIO A701
- *
- * 2007-1-12 Robert Jarzmik
-*/
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * Note: Yes, part of the following code is located into the .data section.
- *       This is to allow jumpaddr to be accessed with a relative load
- *       while we can't rely on any MMU translation.  We could have put
- *       sleep_save_sp in the .text section as well, but some setups might
- *       insist on it to be truly read-only.
- */
-	.data
-	.align	2
-ENTRY(mioa701_bootstrap)
-0:
-	b	1f
-ENTRY(mioa701_jumpaddr)
-	.word	0x40f00008		@ PSPR in no-MMU mode
-1:
-	mov	r0,     #0xa0000000	@ Don't suppose memory access works
-	orr	r0, r0, #0x00200000	@ even if it's supposed to
-	orr	r0, r0, #0x0000b000
-	mov	r1, #0
-	str	r1, [r0]		@ Early disable resume for next boot
-	ldr	r0, mioa701_jumpaddr	@ (Murphy's Law)
-	ldr	r0, [r0]
-	ret	r0
-2:
-
-ENTRY(mioa701_bootstrap_lg)
-	.data
-	.align	2
-	.word	2b-0b
diff --git a/arch/arm/mach-pxa/sleep.S b/arch/arm/mach-pxa/sleep.S
deleted file mode 100644
index 6c5b3ffd2cd3f53900a4106696566dee2a2b9c5f..0000000000000000000000000000000000000000
--- a/arch/arm/mach-pxa/sleep.S
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Low-level PXA250/210 sleep/wakeUp support
- *
- * Initial SA1110 code:
- * Copyright (c) 2001 Cliff Brake <cbrake@accelent.com>
- *
- * Adapted for PXA by Nicolas Pitre:
- * Copyright (c) 2002 Monta Vista Software, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <mach/hardware.h>
-#include <mach/smemc.h>
-#include <mach/pxa2xx-regs.h>
-
-#define MDREFR_KDIV	0x200a4000	// all banks
-#define CCCR_SLEEP	0x00000107	// L=7 2N=2 A=0 PPDIS=0 CPDIS=0
-
-		.text
-
-#ifdef CONFIG_PXA3xx
-/*
- * pxa3xx_finish_suspend() - forces CPU into sleep state (S2D3C4)
- */
-ENTRY(pxa3xx_finish_suspend)
-	mov	r0, #0x06		@ S2D3C4 mode
-	mcr	p14, 0, r0, c7, c0, 0	@ enter sleep
-
-20:	b	20b			@ waiting for sleep
-#endif /* CONFIG_PXA3xx */
-
-#ifdef CONFIG_PXA27x
-/*
- * pxa27x_finish_suspend()
- *
- * Forces CPU into sleep state.
- *
- * r0 = value for PWRMODE M field for desired sleep state
- */
-ENTRY(pxa27x_finish_suspend)
-	@ Put the processor to sleep
-	@ (also workaround for sighting 28071)
-
-	@ prepare value for sleep mode
-	mov	r1, r0				@ sleep mode
-
-	@ prepare pointer to physical address 0 (virtual mapping in generic.c)
-	mov	r2, #UNCACHED_PHYS_0
-
-	@ prepare SDRAM refresh settings
-	ldr	r4, =MDREFR
-	ldr	r5, [r4]
-
-	@ enable SDRAM self-refresh mode
-	orr	r5, r5, #MDREFR_SLFRSH
-
-	@ set SDCLKx divide-by-2 bits (this is part of a workaround for Errata 50)
-	ldr	r6, =MDREFR_KDIV
-	orr	r5, r5, r6
-
-	@ Intel PXA270 Specification Update notes problems sleeping
-	@ with core operating above 91 MHz
-	@ (see Errata 50, ...processor does not exit from sleep...)
-
-	ldr	r6, =CCCR
-	ldr	r8, [r6]		@ keep original value for resume
-
-	ldr	r7, =CCCR_SLEEP		@ prepare CCCR sleep value
-	mov	r0, #0x2		@ prepare value for CLKCFG
-
-	@ align execution to a cache line
-	b	pxa_cpu_do_suspend
-#endif
-
-#ifdef CONFIG_PXA25x
-/*
- * pxa25x_finish_suspend()
- *
- * Forces CPU into sleep state.
- *
- * r0 = value for PWRMODE M field for desired sleep state
- */
-
-ENTRY(pxa25x_finish_suspend)
-	@ prepare value for sleep mode
-	mov	r1, r0				@ sleep mode
-
-	@ prepare pointer to physical address 0 (virtual mapping in generic.c)
-	mov	r2, #UNCACHED_PHYS_0
-
-	@ prepare SDRAM refresh settings
-	ldr	r4, =MDREFR
-	ldr	r5, [r4]
-
-	@ enable SDRAM self-refresh mode
-	orr	r5, r5, #MDREFR_SLFRSH
-
-	@ Intel PXA255 Specification Update notes problems
-	@ about suspending with PXBus operating above 133MHz
-	@ (see Errata 31, GPIO output signals, ... unpredictable in sleep
-	@
-	@ We keep the change-down close to the actual suspend on SDRAM
-	@ as possible to eliminate messing about with the refresh clock
-	@ as the system will restore with the original speed settings
-	@
-	@ Ben Dooks, 13-Sep-2004
-
-	ldr	r6, =CCCR
-	ldr	r8, [r6]		@ keep original value for resume
-
-	@ ensure x1 for run and turbo mode with memory clock
-	bic	r7, r8, #CCCR_M_MASK | CCCR_N_MASK
-	orr	r7, r7, #(1<<5) | (2<<7)
-
-	@ check that the memory frequency is within limits
-	and	r14, r7, #CCCR_L_MASK
-	teq	r14, #1
-	bicne	r7, r7, #CCCR_L_MASK
-	orrne	r7, r7, #1			@@ 99.53MHz
-
-	@ get ready for the change
-
-	@ note, turbo is not preserved over sleep so there is no
-	@ point in preserving it here. we save it on the stack with the
-	@ other CP registers instead.
-	mov	r0, #0
-	mcr	p14, 0, r0, c6, c0, 0
-	orr	r0, r0, #2			@ initiate change bit
-	b	pxa_cpu_do_suspend
-#endif
-
-	.ltorg
-	.align	5
-pxa_cpu_do_suspend:
-
-	@ All needed values are now in registers.
-	@ These last instructions should be in cache
-
-	@ initiate the frequency change...
-	str	r7, [r6]
-	mcr	p14, 0, r0, c6, c0, 0
-
-	@ restore the original cpu speed value for resume
-	str	r8, [r6]
-
-	@ need 6 13-MHz cycles before changing PWRMODE
-	@ just set frequency to 91-MHz... 6*91/13 = 42
-
-	mov	r0, #42
-10:	subs	r0, r0, #1
-	bne	10b
-
-	@ Do not reorder...
-	@ Intel PXA270 Specification Update notes problems performing
-	@ external accesses after SDRAM is put in self-refresh mode
-	@ (see Errata 38 ...hangs when entering self-refresh mode)
-
-	@ force address lines low by reading at physical address 0
-	ldr	r3, [r2]
-
-	@ put SDRAM into self-refresh
-	str	r5, [r4]
-
-	@ enter sleep mode
-	mcr	p14, 0, r1, c7, c0, 0		@ PWRMODE
-
-20:	b	20b				@ loop waiting for sleep
diff --git a/arch/arm/mach-pxa/standby.S b/arch/arm/mach-pxa/standby.S
deleted file mode 100644
index eab1645bb4adb93a37108dc30f6c335dc75f31e1..0000000000000000000000000000000000000000
--- a/arch/arm/mach-pxa/standby.S
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * PXA27x standby mode
- *
- * Author: David Burrage
- *
- * 2005 (c) MontaVista Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <mach/hardware.h>
-
-#include <mach/pxa2xx-regs.h>
-
-		.text
-
-#ifdef CONFIG_PXA27x
-ENTRY(pxa_cpu_standby)
-	ldr	r0, =PSSR
-	mov	r1, #(PSSR_PH | PSSR_STS)
-	mov	r2, #PWRMODE_STANDBY
-	mov	r3, #UNCACHED_PHYS_0	@ Read mem context in.
-	ldr	ip, [r3]
-	b	1f
-
-	.align	5
-1:	mcr	p14, 0, r2, c7, c0, 0	@ put the system into Standby
-	str	r1, [r0]		@ make sure PSSR_PH/STS are clear
-	ret	lr
-
-#endif
-
-#ifdef CONFIG_PXA3xx
-
-#define PXA3_MDCNFG		0x0000
-#define PXA3_MDCNFG_DMCEN	(1 << 30)
-#define PXA3_DDR_HCAL		0x0060
-#define PXA3_DDR_HCAL_HCRNG	0x1f
-#define PXA3_DDR_HCAL_HCPROG	(1 << 28)
-#define PXA3_DDR_HCAL_HCEN	(1 << 31)
-#define PXA3_DMCIER		0x0070
-#define PXA3_DMCIER_EDLP	(1 << 29)
-#define PXA3_DMCISR		0x0078
-#define PXA3_RCOMP		0x0100
-#define PXA3_RCOMP_SWEVAL	(1 << 31)
-
-ENTRY(pm_enter_standby_start)
-	mov	r1, #0xf6000000			@ DMEMC_REG_BASE (PXA3_MDCNFG)
-	add	r1, r1, #0x00100000
-
-	/*
-	 * Preload the TLB entry for accessing the dynamic memory
-	 * controller registers.  Note that page table lookups will
-	 * fail until the dynamic memory controller has been
-	 * reinitialised - and that includes MMU page table walks.
-	 * This also means that only the dynamic memory controller
-	 * can be reliably accessed in the code following standby.
-	 */
-	ldr	r2, [r1]			@ Dummy read PXA3_MDCNFG
-
-	mcr	p14, 0, r0, c7, c0, 0
-	.rept	8
-	nop
-	.endr
-
-	ldr	r0, [r1, #PXA3_DDR_HCAL]	@ Clear (and wait for) HCEN
-	bic	r0, r0, #PXA3_DDR_HCAL_HCEN
-	str	r0, [r1, #PXA3_DDR_HCAL]
-1:	ldr	r0, [r1, #PXA3_DDR_HCAL]
-	tst	r0, #PXA3_DDR_HCAL_HCEN
-	bne	1b
-
-	ldr	r0, [r1, #PXA3_RCOMP]		@ Initiate RCOMP
-	orr	r0, r0, #PXA3_RCOMP_SWEVAL
-	str	r0, [r1, #PXA3_RCOMP]
-
-	mov	r0, #~0				@ Clear interrupts
-	str	r0, [r1, #PXA3_DMCISR]
-
-	ldr	r0, [r1, #PXA3_DMCIER]		@ set DMIER[EDLP]
-	orr	r0, r0, #PXA3_DMCIER_EDLP
-	str	r0, [r1, #PXA3_DMCIER]
-
-	ldr	r0, [r1, #PXA3_DDR_HCAL]	@ clear HCRNG, set HCPROG, HCEN
-	bic	r0, r0, #PXA3_DDR_HCAL_HCRNG
-	orr	r0, r0, #PXA3_DDR_HCAL_HCEN | PXA3_DDR_HCAL_HCPROG
-	str	r0, [r1, #PXA3_DDR_HCAL]
-
-1:	ldr	r0, [r1, #PXA3_DMCISR]
-	tst	r0, #PXA3_DMCIER_EDLP
-	beq	1b
-
-	ldr	r0, [r1, #PXA3_MDCNFG]		@ set PXA3_MDCNFG[DMCEN]
-	orr	r0, r0, #PXA3_MDCNFG_DMCEN
-	str	r0, [r1, #PXA3_MDCNFG]
-1:	ldr	r0, [r1, #PXA3_MDCNFG]
-	tst	r0, #PXA3_MDCNFG_DMCEN
-	beq	1b
-
-	ldr	r0, [r1, #PXA3_DDR_HCAL]	@ set PXA3_DDR_HCAL[HCRNG]
-	orr	r0, r0, #2 @ HCRNG
-	str	r0, [r1, #PXA3_DDR_HCAL]
-
-	ldr	r0, [r1, #PXA3_DMCIER]		@ Clear the interrupt
-	bic	r0, r0, #0x20000000
-	str	r0, [r1, #PXA3_DMCIER]
-
-	ret	lr
-ENTRY(pm_enter_standby_end)
-
-#endif
diff --git a/arch/arm/mach-rockchip/headsmp.S b/arch/arm/mach-rockchip/headsmp.S
deleted file mode 100644
index 37a7ea524a16077c958f2a9fdff640565500af4c..0000000000000000000000000000000000000000
--- a/arch/arm/mach-rockchip/headsmp.S
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2013 MundoReader S.L.
- * Author: Heiko Stuebner <heiko@sntech.de>
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-
-ENTRY(rockchip_secondary_trampoline)
-	ldr	pc, 1f
-ENDPROC(rockchip_secondary_trampoline)
-	.globl	rockchip_boot_fn
-rockchip_boot_fn:
-1:	.space	4
-
-ENTRY(rockchip_secondary_trampoline_end)
diff --git a/arch/arm/mach-rockchip/sleep.S b/arch/arm/mach-rockchip/sleep.S
deleted file mode 100644
index 3eca3922c944576c9c6aae93cea3a94b24055702..0000000000000000000000000000000000000000
--- a/arch/arm/mach-rockchip/sleep.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd
- * Author: Tony Xie <tony.xie@rock-chips.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/memory.h>
-
-.data
-/*
- * this code will be copied from
- * ddr to sram for system resumeing.
- * so it is ".data section".
- */
-	.align	2
-
-ENTRY(rockchip_slp_cpu_resume)
-	setmode	PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1  @ set svc, irqs off
-	mrc	p15, 0, r1, c0, c0, 5
-	and	r1, r1, #0xf
-	cmp	r1, #0
-	/* olny cpu0 can continue to run, the others is halt here */
-	beq	cpu0run
-secondary_loop:
-	wfe
-	b	secondary_loop
-cpu0run:
-	ldr	r3, rkpm_bootdata_l2ctlr_f
-	cmp	r3, #0
-	beq	sp_set
-	ldr	r3, rkpm_bootdata_l2ctlr
-	mcr	p15, 1, r3, c9, c0, 2
-sp_set:
-	ldr	sp, rkpm_bootdata_cpusp
-	ldr	r1, rkpm_bootdata_cpu_code
-	bx	r1
-ENDPROC(rockchip_slp_cpu_resume)
-
-/* Parameters filled in by the kernel */
-
-/* Flag for whether to restore L2CTLR on resume */
-	.global rkpm_bootdata_l2ctlr_f
-rkpm_bootdata_l2ctlr_f:
-	.long 0
-
-/* Saved L2CTLR to restore on resume */
-	.global rkpm_bootdata_l2ctlr
-rkpm_bootdata_l2ctlr:
-	.long 0
-
-/* CPU resume SP addr */
-	.globl rkpm_bootdata_cpusp
-rkpm_bootdata_cpusp:
-	.long 0
-
-/* CPU resume function (physical address) */
-	.globl rkpm_bootdata_cpu_code
-rkpm_bootdata_cpu_code:
-	.long 0
-
-ENTRY(rk3288_bootram_sz)
-        .word   . - rockchip_slp_cpu_resume
diff --git a/arch/arm/mach-rpc/ecard-loader.S b/arch/arm/mach-rpc/ecard-loader.S
deleted file mode 100644
index eb8ac0412da6186011b05e074dd3c1cb37e71de3..0000000000000000000000000000000000000000
--- a/arch/arm/mach-rpc/ecard-loader.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/ecard.S
- *
- *  Copyright (C) 1995, 1996 Russell King
- *
- * 27/03/03 Ian Molton Clean up CONFIG_CPU
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#define CPSR2SPSR(rt) \
-		mrs	rt, cpsr; \
-		msr	spsr_cxsf, rt
-
-@ Purpose: call an expansion card loader to read bytes.
-@ Proto  : char read_loader(int offset, char *card_base, char *loader);
-@ Returns: byte read
-
-ENTRY(ecard_loader_read)
-		stmfd	sp!, {r4 - r12, lr}
-		mov	r11, r1
-		mov	r1, r0
-		CPSR2SPSR(r0)
-		mov	lr, pc
-		mov	pc, r2
-		ldmfd	sp!, {r4 - r12, pc}
-
-@ Purpose: call an expansion card loader to reset the card
-@ Proto  : void read_loader(int card_base, char *loader);
-@ Returns: byte read
-
-ENTRY(ecard_loader_reset)
-		stmfd	sp!, {r4 - r12, lr}
-		mov	r11, r0
-		CPSR2SPSR(r0)
-		mov	lr, pc
-		add	pc, r1, #8
-		ldmfd	sp!, {r4 - r12, pc}
-
diff --git a/arch/arm/mach-rpc/fiq.S b/arch/arm/mach-rpc/fiq.S
deleted file mode 100644
index 0de83e9b0b39398231b5b543a6e6d0762961cfd5..0000000000000000000000000000000000000000
--- a/arch/arm/mach-rpc/fiq.S
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <mach/hardware.h>
-#include <mach/entry-macro.S>
-
-	.text
-
-	.global	rpc_default_fiq_end
-ENTRY(rpc_default_fiq_start)
-	mov	r12, #ioc_base_high
-	.if	ioc_base_low
-	orr	r12, r12, #ioc_base_low
-	.endif
-	strb	r12, [r12, #0x38]	@ Disable FIQ register
-	subs	pc, lr, #4
-rpc_default_fiq_end:
diff --git a/arch/arm/mach-rpc/floppydma.S b/arch/arm/mach-rpc/floppydma.S
deleted file mode 100644
index 6698b83050dc2c4f1a1b3b34ec67287f97e2c4c1..0000000000000000000000000000000000000000
--- a/arch/arm/mach-rpc/floppydma.S
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/floppydma.S
- *
- *  Copyright (C) 1995, 1996 Russell King
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-		.text
-
-		.global	floppy_fiqin_end
-ENTRY(floppy_fiqin_start)
-		subs	r9, r9, #1
-		ldrbgt	r12, [r11, #-4]
-		ldrble	r12, [r11], #0
-		strb	r12, [r10], #1
-		subs	pc, lr, #4
-floppy_fiqin_end:
-
-		.global	floppy_fiqout_end
-ENTRY(floppy_fiqout_start)
-		subs	r9, r9, #1
-		ldrbge	r12, [r10], #1
-		movlt	r12, #0
-		strble	r12, [r11], #0
-		subsle	pc, lr, #4
-		strb	r12, [r11, #-4]
-		subs	pc, lr, #4
-floppy_fiqout_end:
diff --git a/arch/arm/mach-rpc/include/mach/entry-macro.S b/arch/arm/mach-rpc/include/mach/entry-macro.S
deleted file mode 100644
index a6d1a9f4bb791b946641ceb5dd1e72129eba3758..0000000000000000000000000000000000000000
--- a/arch/arm/mach-rpc/include/mach/entry-macro.S
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <mach/hardware.h>
-#include <asm/hardware/entry-macro-iomd.S>
-
-	.equ	ioc_base_high, IOC_BASE & 0xff000000
-	.equ	ioc_base_low, IOC_BASE & 0x00ff0000
-
-	.macro  get_irqnr_preamble, base, tmp
-	mov	\base, #ioc_base_high		@ point at IOC
-	.if	ioc_base_low
-	orr	\base, \base, #ioc_base_low
-	.endif
-	.endm
diff --git a/arch/arm/mach-rpc/io-acorn.S b/arch/arm/mach-rpc/io-acorn.S
deleted file mode 100644
index b9082a2a2a01436784ff3d34161dbc83bfefa750..0000000000000000000000000000000000000000
--- a/arch/arm/mach-rpc/io-acorn.S
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/lib/io-acorn.S
- *
- *  Copyright (C) 1995, 1996 Russell King
- *
- * 27/03/03 Ian Molton Clean up CONFIG_CPU
- */
-#include <linux/linkage.h>
-#include <linux/kern_levels.h>
-#include <asm/assembler.h>
-
-		.text
-		.align
-
-.Liosl_warning:
-		.ascii	KERN_WARNING "insl/outsl not implemented, called from %08lX\0"
-		.align
-
-/*
- * These make no sense on Acorn machines.
- * Print a warning message.
- */
-ENTRY(insl)
-ENTRY(outsl)
-		adr	r0, .Liosl_warning
-		mov	r1, lr
-		b	printk
diff --git a/arch/arm/mach-s3c24xx/pm-h1940.S b/arch/arm/mach-s3c24xx/pm-h1940.S
deleted file mode 100644
index a7bbe336ac6b6d28760434e05c6f9b57039dcd43..0000000000000000000000000000000000000000
--- a/arch/arm/mach-s3c24xx/pm-h1940.S
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * Copyright (c) 2006 Ben Dooks <ben-linux@fluff.org>
- *
- * H1940 Suspend to RAM
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <mach/hardware.h>
-#include <mach/map.h>
-
-#include <mach/regs-gpio.h>
-
-	.text
-	.global	h1940_pm_return
-
-h1940_pm_return:
-	mov	r0, #S3C2410_PA_GPIO
-	ldr	pc, [r0, #S3C2410_GSTATUS3 - S3C24XX_VA_GPIO]
diff --git a/arch/arm/mach-s3c24xx/sleep-s3c2410.S b/arch/arm/mach-s3c24xx/sleep-s3c2410.S
deleted file mode 100644
index 659f9eff9de2d808ea6557d00a64eee53cec2415..0000000000000000000000000000000000000000
--- a/arch/arm/mach-s3c24xx/sleep-s3c2410.S
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * Copyright (c) 2004 Simtec Electronics
- *	Ben Dooks <ben@simtec.co.uk>
- *
- * S3C2410 Power Manager (Suspend-To-RAM) support
- *
- * Based on PXA/SA1100 sleep code by:
- *	Nicolas Pitre, (c) 2002 Monta Vista Software Inc
- *	Cliff Brake, (c) 2001
- */
-
-#include <linux/linkage.h>
-#include <linux/serial_s3c.h>
-#include <asm/assembler.h>
-#include <mach/hardware.h>
-#include <mach/map.h>
-
-#include <mach/regs-gpio.h>
-#include <mach/regs-clock.h>
-
-#include "regs-mem.h"
-
-	/* s3c2410_cpu_suspend
-	 *
-	 * put the cpu into sleep mode
-	*/
-
-ENTRY(s3c2410_cpu_suspend)
-	@@ prepare cpu to sleep
-
-	ldr	r4, =S3C2410_REFRESH
-	ldr	r5, =S3C24XX_MISCCR
-	ldr	r6, =S3C2410_CLKCON
-	ldr	r7, [r4]		@ get REFRESH (and ensure in TLB)
-	ldr	r8, [r5]		@ get MISCCR (and ensure in TLB)
-	ldr	r9, [r6]		@ get CLKCON (and ensure in TLB)
-
-	orr	r7, r7, #S3C2410_REFRESH_SELF	@ SDRAM sleep command
-	orr	r8, r8, #S3C2410_MISCCR_SDSLEEP @ SDRAM power-down signals
-	orr	r9, r9, #S3C2410_CLKCON_POWER	@ power down command
-
-	teq	pc, #0			@ first as a trial-run to load cache
-	bl	s3c2410_do_sleep
-	teq	r0, r0			@ now do it for real
-	b	s3c2410_do_sleep	@
-
-	@@ align next bit of code to cache line
-	.align	5
-s3c2410_do_sleep:
-	streq	r7, [r4]			@ SDRAM sleep command
-	streq	r8, [r5]			@ SDRAM power-down config
-	streq	r9, [r6]			@ CPU sleep
-1:	beq	1b
-	ret	lr
diff --git a/arch/arm/mach-s3c24xx/sleep-s3c2412.S b/arch/arm/mach-s3c24xx/sleep-s3c2412.S
deleted file mode 100644
index c373f1ca862bca608b72e4bbce8b7624603e8cd7..0000000000000000000000000000000000000000
--- a/arch/arm/mach-s3c24xx/sleep-s3c2412.S
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * Copyright (c) 2007 Simtec Electronics
- *	Ben Dooks <ben@simtec.co.uk>
- *
- * S3C2412 Power Manager low-level sleep support
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <mach/hardware.h>
-#include <mach/map.h>
-
-#include <mach/regs-irq.h>
-
-	.text
-
-	.global	s3c2412_sleep_enter
-
-s3c2412_sleep_enter:
-	mov	r0, #0			/* argument for coprocessors */
-	ldr	r1, =S3C2410_INTPND
-	ldr	r2, =S3C2410_SRCPND
-	ldr	r3, =S3C2410_EINTPEND
-
-	teq	r0, r0
-	bl	s3c2412_sleep_enter1
-	teq	pc, r0
-	bl	s3c2412_sleep_enter1
-
-	.align	5
-
-	/* this is called twice, first with the Z flag to ensure that the
-	 * instructions have been loaded into the cache, and the second
-	 * time to try and suspend the system.
-	*/
-s3c2412_sleep_enter1:
-	mcr	p15, 0, r0, c7, c10, 4
-	mcrne	p15, 0, r0, c7, c0, 4
-
-	/* if we return from here, it is because an interrupt was
-	 * active when we tried to shutdown. Try and ack the IRQ and
-	 * retry, as simply returning causes the system to lock.
-	*/
-
-	ldrne	r9, [r1]
-	strne	r9, [r1]
-	ldrne	r9, [r2]
-	strne	r9, [r2]
-	ldrne	r9, [r3]
-	strne	r9, [r3]
-	bne	s3c2412_sleep_enter1
-
-	ret	lr
diff --git a/arch/arm/mach-s3c24xx/sleep.S b/arch/arm/mach-s3c24xx/sleep.S
deleted file mode 100644
index f0f11ad60c52c4bc262ebb7194a0ab46f597bf39..0000000000000000000000000000000000000000
--- a/arch/arm/mach-s3c24xx/sleep.S
+++ /dev/null
@@ -1,70 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * Copyright (c) 2004 Simtec Electronics
- *	Ben Dooks <ben@simtec.co.uk>
- *
- * S3C2410 Power Manager (Suspend-To-RAM) support
- *
- * Based on PXA/SA1100 sleep code by:
- *	Nicolas Pitre, (c) 2002 Monta Vista Software Inc
- *	Cliff Brake, (c) 2001
- */
-
-#include <linux/linkage.h>
-#include <linux/serial_s3c.h>
-#include <asm/assembler.h>
-#include <mach/hardware.h>
-#include <mach/map.h>
-
-#include <mach/regs-gpio.h>
-#include <mach/regs-clock.h>
-
-/*
- * S3C24XX_DEBUG_RESUME is dangerous if your bootloader does not
- * reset the UART configuration, only enable if you really need this!
- */
-//#define S3C24XX_DEBUG_RESUME
-
-	.text
-
-	/* sleep magic, to allow the bootloader to check for an valid
-	 * image to resume to. Must be the first word before the
-	 * s3c_cpu_resume entry.
-	*/
-
-	.word	0x2bedf00d
-
-	/* s3c_cpu_resume
-	 *
-	 * resume code entry for bootloader to call
-	*/
-
-ENTRY(s3c_cpu_resume)
-	mov	r0, #PSR_I_BIT | PSR_F_BIT | SVC_MODE
-	msr	cpsr_c, r0
-
-	@@ load UART to allow us to print the two characters for
-	@@ resume debug
-
-	mov	r2, #S3C24XX_PA_UART & 0xff000000
-	orr	r2, r2, #S3C24XX_PA_UART & 0xff000
-
-#if 0
-	/* SMDK2440 LED set */
-	mov	r14, #S3C24XX_PA_GPIO
-	ldr	r12, [ r14, #0x54 ]
-	bic	r12, r12, #3<<4
-	orr	r12, r12, #1<<7
-	str	r12, [ r14, #0x54 ]
-#endif
-
-#ifdef S3C24XX_DEBUG_RESUME
-	mov	r3, #'L'
-	strb	r3, [ r2, #S3C2410_UTXH ]
-1001:
-	ldrb	r14, [ r3, #S3C2410_UTRSTAT ]
-	tst	r14, #S3C2410_UTRSTAT_TXE
-	beq	1001b
-#endif /* S3C24XX_DEBUG_RESUME */
-
-	b	cpu_resume
diff --git a/arch/arm/mach-s3c64xx/sleep.S b/arch/arm/mach-s3c64xx/sleep.S
deleted file mode 100644
index 39e16a07a5e4b43daac12d8d386b650cb9f98d0b..0000000000000000000000000000000000000000
--- a/arch/arm/mach-s3c64xx/sleep.S
+++ /dev/null
@@ -1,69 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* linux/arch/arm/plat-s3c64xx/sleep.S
- *
- * Copyright 2008 Openmoko, Inc.
- * Copyright 2008 Simtec Electronics
- *	Ben Dooks <ben@simtec.co.uk>
- *	http://armlinux.simtec.co.uk/
- *
- * S3C64XX CPU sleep code
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <mach/map.h>
-
-#undef S3C64XX_VA_GPIO
-#define S3C64XX_VA_GPIO (0x0)
-
-#include <mach/regs-gpio.h>
-
-#define LL_UART (S3C_PA_UART + (0x400 * CONFIG_S3C_LOWLEVEL_UART_PORT))
-
-	.text
-
-	/* Sleep magic, the word before the resume entry point so that the
-	 * bootloader can check for a resumeable image. */
-
-	.word	0x2bedf00d
-
-	/* s3c_cpu_reusme
-	 *
-	 * This is the entry point, stored by whatever method the bootloader
-	 * requires to get the kernel runnign again. This code expects to be
-	 * entered with no caches live and the MMU disabled. It will then
-	 * restore the MMU and other basic CP registers saved and restart
-	 * the kernel C code to finish the resume code.
-	*/
-
-ENTRY(s3c_cpu_resume)
-	msr	cpsr_c, #PSR_I_BIT | PSR_F_BIT | SVC_MODE
-	ldr	r2, =LL_UART		/* for debug */
-
-#ifdef CONFIG_S3C_PM_DEBUG_LED_SMDK
-
-#define S3C64XX_GPNCON			(S3C64XX_GPN_BASE + 0x00)
-#define S3C64XX_GPNDAT			(S3C64XX_GPN_BASE + 0x04)
-
-#define S3C64XX_GPN_CONMASK(__gpio)	(0x3 << ((__gpio) * 2))
-#define S3C64XX_GPN_OUTPUT(__gpio)	(0x1 << ((__gpio) * 2))
-
-	/* Initialise the GPIO state if we are debugging via the SMDK LEDs,
-	 * as the uboot version supplied resets these to inputs during the
-	 * resume checks.
-	*/
-
-	ldr	r3, =S3C64XX_PA_GPIO
-	ldr	r0, [ r3, #S3C64XX_GPNCON ]
-	bic	r0, r0, #(S3C64XX_GPN_CONMASK(12) | S3C64XX_GPN_CONMASK(13) | \
-			  S3C64XX_GPN_CONMASK(14) | S3C64XX_GPN_CONMASK(15))
-	orr	r0, r0, #(S3C64XX_GPN_OUTPUT(12) | S3C64XX_GPN_OUTPUT(13) | \
-			  S3C64XX_GPN_OUTPUT(14) | S3C64XX_GPN_OUTPUT(15))
-	str	r0, [ r3, #S3C64XX_GPNCON ]
-
-	ldr	r0, [ r3, #S3C64XX_GPNDAT ]
-	bic	r0, r0, #0xf << 12			@ GPN12..15
-	orr	r0, r0, #1 << 15			@ GPN15
-	str	r0, [ r3, #S3C64XX_GPNDAT ]
-#endif
-	b	cpu_resume
diff --git a/arch/arm/mach-s5pv210/sleep.S b/arch/arm/mach-s5pv210/sleep.S
deleted file mode 100644
index 81568767f30a83bc3a40bccac0dfbe89eed5534f..0000000000000000000000000000000000000000
--- a/arch/arm/mach-s5pv210/sleep.S
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * Copyright (c) 2011-2014 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com
- *
- * S5PV210 Sleep Code
- * Based on S3C64XX sleep code by:
- *	Ben Dooks, (c) 2008 Simtec Electronics
- */
-
-#include <linux/linkage.h>
-
-	.text
-	.align
-
-	/*
-	 * sleep magic, to allow the bootloader to check for an valid
-	 * image to resume to. Must be the first word before the
-	 * s3c_cpu_resume entry.
-	 */
-
-	.word	0x2bedf00d
-
-	/*
-	 * s3c_cpu_resume
-	 *
-	 * resume code entry for bootloader to call
-	 */
-
-ENTRY(s5pv210_cpu_resume)
-	b	cpu_resume
-ENDPROC(s5pv210_cpu_resume)
diff --git a/arch/arm/mach-sa1100/sleep.S b/arch/arm/mach-sa1100/sleep.S
deleted file mode 100644
index 85863741ef8bb6d5b6c0fa9199036557ac3aa734..0000000000000000000000000000000000000000
--- a/arch/arm/mach-sa1100/sleep.S
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * SA11x0 Assembler Sleep/WakeUp Management Routines
- *
- * Copyright (c) 2001 Cliff Brake <cbrake@accelent.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License.
- *
- * History:
- *
- * 2001-02-06: Cliff Brake         Initial code
- *
- * 2001-08-29:	Nicolas Pitre	Simplified.
- *
- * 2002-05-27:	Nicolas Pitre	Revisited, more cleanup and simplification.
- *				Storage is on the stack now.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <mach/hardware.h>
-
-		.text
-/*
- * sa1100_finish_suspend()
- *
- * Causes sa11x0 to enter sleep state
- *
- * Must be aligned to a cacheline.
- */
-	.balign	32
-ENTRY(sa1100_finish_suspend)
-	@ disable clock switching
-	mcr	p15, 0, r1, c15, c2, 2
-
-	ldr	r6, =MDREFR
-	ldr	r4, [r6]
-	orr     r4, r4, #MDREFR_K1DB2
-	ldr	r5, =PPCR
-
-	@ Pre-load __loop_udelay into the I-cache
-	mov	r0, #1
-	bl	__loop_udelay
-	mov	r0, r0
-
-	@ The following must all exist in a single cache line to
-	@ avoid accessing memory until this sequence is complete,
-	@ otherwise we occasionally hang.
-
-	@ Adjust memory timing before lowering CPU clock
-	str     r4, [r6]
-
-	@ delay 90us and set CPU PLL to lowest speed
-	@ fixes resume problem on high speed SA1110
-	mov	r0, #90
-	bl	__loop_udelay
-	mov	r1, #0
-	str	r1, [r5]
-	mov	r0, #90
-	bl	__loop_udelay
-
-	/*
-	 * SA1110 SDRAM controller workaround.  register values:
-	 *
-	 * r0  = &MSC0
-	 * r1  = &MSC1
-	 * r2  = &MSC2
-	 * r3  = MSC0 value
-	 * r4  = MSC1 value
-	 * r5  = MSC2 value
-	 * r6  = &MDREFR
-	 * r7  = first MDREFR value
-	 * r8  = second MDREFR value
-	 * r9  = &MDCNFG
-	 * r10 = MDCNFG value
-	 * r11 = third MDREFR value
-	 * r12 = &PMCR
-	 * r13 = PMCR value (1)
-	 */
-
-	ldr	r0, =MSC0
-	ldr	r1, =MSC1
-	ldr	r2, =MSC2
-
-	ldr	r3, [r0]
-	bic	r3, r3, #FMsk(MSC_RT)
-	bic	r3, r3, #FMsk(MSC_RT)<<16
-
-	ldr	r4, [r1]
-	bic	r4, r4, #FMsk(MSC_RT)
-	bic	r4, r4, #FMsk(MSC_RT)<<16
-
-	ldr	r5, [r2]
-	bic	r5, r5, #FMsk(MSC_RT)
-	bic	r5, r5, #FMsk(MSC_RT)<<16
-
-	ldr	r7, [r6]
-	bic	r7, r7, #0x0000FF00
-	bic	r7, r7, #0x000000F0
-	orr	r8, r7, #MDREFR_SLFRSH
-
-	ldr	r9, =MDCNFG
-	ldr	r10, [r9]
-	bic	r10, r10, #(MDCNFG_DE0+MDCNFG_DE1)
-	bic	r10, r10, #(MDCNFG_DE2+MDCNFG_DE3)
-
-	bic	r11, r8, #MDREFR_SLFRSH
-	bic	r11, r11, #MDREFR_E1PIN
-
-	ldr	r12, =PMCR
-
-	mov	r13, #PMCR_SF
-
-	b	sa1110_sdram_controller_fix
-
-	.align 5
-sa1110_sdram_controller_fix:
-
-	@ Step 1 clear RT field of all MSCx registers
-	str 	r3, [r0]
-	str	r4, [r1]
-	str	r5, [r2]
-
-	@ Step 2 clear DRI field in MDREFR
-	str	r7, [r6]
-
-	@ Step 3 set SLFRSH bit in MDREFR
-	str	r8, [r6]
-
-	@ Step 4 clear DE bis in MDCNFG
-	str	r10, [r9]
-
-	@ Step 5 clear DRAM refresh control register
-	str	r11, [r6]
-
-	@ Wow, now the hardware suspend request pins can be used, that makes them functional for
-	@ about 7 ns out of the	entire time that the CPU is running!
-
-	@ Step 6 set force sleep bit in PMCR
-
-	str	r13, [r12]
-
-20:	b	20b			@ loop waiting for sleep
diff --git a/arch/arm/mach-shmobile/headsmp-apmu.S b/arch/arm/mach-shmobile/headsmp-apmu.S
deleted file mode 100644
index fabe9cadd12ef0b1d82ed9c3ead65f35943e8a70..0000000000000000000000000000000000000000
--- a/arch/arm/mach-shmobile/headsmp-apmu.S
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * SMP support for APMU based systems with Cortex A7/A15
- *
- * Copyright (C) 2014  Renesas Electronics Corporation
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-ENTRY(shmobile_boot_apmu)
-	bl	secure_cntvoff_init
-	b	secondary_startup
-ENDPROC(shmobile_boot_apmu)
diff --git a/arch/arm/mach-shmobile/headsmp-scu.S b/arch/arm/mach-shmobile/headsmp-scu.S
deleted file mode 100644
index d0234296ae622b5f1745d9be7002d4705c3e4913..0000000000000000000000000000000000000000
--- a/arch/arm/mach-shmobile/headsmp-scu.S
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+
- *
- * Shared SCU setup for mach-shmobile
- *
- * Copyright (C) 2012 Bastian Hecht
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/memory.h>
-
-/*
- * Boot code for secondary CPUs.
- *
- * First we turn on L1 cache coherency for our CPU. Then we jump to
- * secondary_startup that invalidates the cache and hands over control
- * to the common ARM startup code.
- */
-ENTRY(shmobile_boot_scu)
-					@ r0 = SCU base address
-	mrc     p15, 0, r1, c0, c0, 5	@ read MPIDR
-	and	r1, r1, #3		@ mask out cpu ID
-	lsl	r1, r1, #3		@ we will shift by cpu_id * 8 bits
-	ldr	r2, [r0, #8]		@ SCU Power Status Register
-	mov	r3, #3
-	lsl	r3, r3, r1
-	bic	r2, r2, r3		@ Clear bits of our CPU (Run Mode)
-	str	r2, [r0, #8]		@ write back
-
-	b	secondary_startup
-ENDPROC(shmobile_boot_scu)
diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S
deleted file mode 100644
index 9466ae61f56abd17726098143c4019789da1b201..0000000000000000000000000000000000000000
--- a/arch/arm/mach-shmobile/headsmp.S
+++ /dev/null
@@ -1,147 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * SMP support for R-Mobile / SH-Mobile
- *
- * Copyright (C) 2010  Magnus Damm
- * Copyright (C) 2010  Takashi Yoshii
- *
- * Based on vexpress, Copyright (c) 2003 ARM Limited, All Rights Reserved
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <linux/threads.h>
-#include <asm/assembler.h>
-#include <asm/memory.h>
-
-#define SCTLR_MMU	0x01
-#define BOOTROM_ADDRESS	0xE6340000
-#define RWTCSRA_ADDRESS 0xE6020004
-#define RWTCSRA_WOVF	0x10
-
-/*
- * Reset vector for secondary CPUs.
- * This will be mapped at address 0 by SBAR register.
- * We need _long_ jump to the physical address.
- */
-	.arm
-	.align  12
-ENTRY(shmobile_boot_vector)
-	ldr     r1, 1f
-	bx	r1
-
-ENDPROC(shmobile_boot_vector)
-
-	.align	2
-	.globl	shmobile_boot_fn
-shmobile_boot_fn:
-1:	.space	4
-	.globl	shmobile_boot_size
-shmobile_boot_size:
-	.long	. - shmobile_boot_vector
-
-#ifdef CONFIG_ARCH_RCAR_GEN2
-/*
- * Reset vector for R-Car Gen2 and RZ/G1 secondary CPUs.
- * This will be mapped at address 0 by SBAR register.
- */
-ENTRY(shmobile_boot_vector_gen2)
-	mrc	p15, 0, r0, c0, c0, 5		@ r0 = MPIDR
-	ldr	r1, shmobile_boot_cpu_gen2
-	cmp	r0, r1
-	bne	shmobile_smp_continue_gen2
-
-	mrc	p15, 0, r1, c1, c0, 0		@ r1 = SCTLR
-	and	r0, r1, #SCTLR_MMU
-	cmp	r0, #SCTLR_MMU
-	beq	shmobile_smp_continue_gen2
-
-	ldr	r0, rwtcsra
-	mov	r1, #0
-	ldrb	r1, [r0]
-	and	r0, r1, #RWTCSRA_WOVF
-	cmp	r0, #RWTCSRA_WOVF
-	bne	shmobile_smp_continue_gen2
-
-	ldr	r0, bootrom
-	bx	r0
-
-shmobile_smp_continue_gen2:
-	ldr     r1, shmobile_boot_fn_gen2
-	bx	r1
-
-ENDPROC(shmobile_boot_vector_gen2)
-
-	.align	4
-rwtcsra:
-	.word	RWTCSRA_ADDRESS
-bootrom:
-	.word	BOOTROM_ADDRESS
-	.globl	shmobile_boot_cpu_gen2
-shmobile_boot_cpu_gen2:
-	.word	0x00000000
-
-	.align	2
-	.globl	shmobile_boot_fn_gen2
-shmobile_boot_fn_gen2:
-	.space	4
-	.globl	shmobile_boot_size_gen2
-shmobile_boot_size_gen2:
-	.long	. - shmobile_boot_vector_gen2
-#endif /* CONFIG_ARCH_RCAR_GEN2 */
-
-/*
- * Per-CPU SMP boot function/argument selection code based on MPIDR
- */
-
-ENTRY(shmobile_smp_boot)
-	mrc	p15, 0, r1, c0, c0, 5		@ r1 = MPIDR
-	and	r0, r1, #0xffffff		@ MPIDR_HWID_BITMASK
-						@ r0 = cpu_logical_map() value
-	mov	r1, #0				@ r1 = CPU index
-	adr	r2, 1f
-	ldmia	r2, {r5, r6, r7}
-	add	r5, r5, r2			@ array of per-cpu mpidr values
-	add	r6, r6, r2			@ array of per-cpu functions
-	add	r7, r7, r2			@ array of per-cpu arguments
-
-shmobile_smp_boot_find_mpidr:
-	ldr	r8, [r5, r1, lsl #2]
-	cmp	r8, r0
-	bne	shmobile_smp_boot_next
-
-	ldr	r9, [r6, r1, lsl #2]
-	cmp	r9, #0
-	bne	shmobile_smp_boot_found
-
-shmobile_smp_boot_next:
-	add	r1, r1, #1
-	cmp	r1, #NR_CPUS
-	blo	shmobile_smp_boot_find_mpidr
-
-	b	shmobile_smp_sleep
-
-shmobile_smp_boot_found:
-	ldr	r0, [r7, r1, lsl #2]
-	ret	r9
-ENDPROC(shmobile_smp_boot)
-
-ENTRY(shmobile_smp_sleep)
-	wfi
-	b	shmobile_smp_boot
-ENDPROC(shmobile_smp_sleep)
-
-	.align	2
-1:	.long	shmobile_smp_mpidr - .
-	.long	shmobile_smp_fn - 1b
-	.long	shmobile_smp_arg - 1b
-
-	.bss
-	.globl	shmobile_smp_mpidr
-shmobile_smp_mpidr:
-	.space	NR_CPUS * 4
-	.globl	shmobile_smp_fn
-shmobile_smp_fn:
-	.space	NR_CPUS * 4
-	.globl	shmobile_smp_arg
-shmobile_smp_arg:
-	.space	NR_CPUS * 4
diff --git a/arch/arm/mach-socfpga/headsmp.S b/arch/arm/mach-socfpga/headsmp.S
deleted file mode 100644
index 54f1844eac031bd165484daec502040fee2a992a..0000000000000000000000000000000000000000
--- a/arch/arm/mach-socfpga/headsmp.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (c) 2003 ARM Limited
- *  Copyright (c) u-boot contributors
- *  Copyright (c) 2012 Pavel Machek <pavel@denx.de>
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/memory.h>
-#include <asm/assembler.h>
-
-	.arch	armv7-a
-	.arm
-
-ENTRY(secondary_trampoline)
-	/* CPU1 will always fetch from 0x0 when it is brought out of reset.
-	 * Thus, we can just subtract the PAGE_OFFSET to get the physical
-	 * address of &cpu1start_addr. This would not work for platforms
-	 * where the physical memory does not start at 0x0.
-	*/
-ARM_BE8(setend	be)
-	adr	r0, 1f
-	ldmia	r0, {r1, r2}
-	sub	r2, r2, #PAGE_OFFSET
-	ldr	r3, [r2]
-	ldr	r4, [r3]
-ARM_BE8(rev	r4, r4)
-	bx	r4
-
-	.align
-1:	.long	.
-	.long	socfpga_cpu1start_addr
-ENTRY(secondary_trampoline_end)
diff --git a/arch/arm/mach-socfpga/self-refresh.S b/arch/arm/mach-socfpga/self-refresh.S
deleted file mode 100644
index 649f2779053d11a26c966e645a920b1995fc1e85..0000000000000000000000000000000000000000
--- a/arch/arm/mach-socfpga/self-refresh.S
+++ /dev/null
@@ -1,125 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2014-2015 Altera Corporation. All rights reserved.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#define MAX_LOOP_COUNT		1000
-
-/* Register offset */
-#define SDR_CTRLGRP_LOWPWREQ_ADDR       0x54
-#define SDR_CTRLGRP_LOWPWRACK_ADDR      0x58
-
-/* Bitfield positions */
-#define SELFRSHREQ_POS                  3
-#define SELFRSHREQ_MASK                 0x8
-
-#define SELFRFSHACK_POS                 1
-#define SELFRFSHACK_MASK                0x2
-
-	/*
-	 * This code assumes that when the bootloader configured
-	 * the sdram controller for the DDR on the board it
-	 * configured the following fields depending on the DDR
-	 * vendor/configuration:
-	 *
-	 * sdr.ctrlcfg.lowpwreq.selfrfshmask
-	 * sdr.ctrlcfg.lowpwrtiming.clkdisablecycles
-	 * sdr.ctrlcfg.dramtiming4.selfrfshexit
-	 */
-
-	.arch   armv7-a
-	.text
-	.align 3
-
-	/*
-	 * socfpga_sdram_self_refresh
-	 *
-	 *  r0 : sdr_ctl_base_addr
-	 *  r1 : temp storage of return value
-	 *  r2 : temp storage of register values
-	 *  r3 : loop counter
-	 *
-	 *  return value: lower 16 bits: loop count going into self refresh
-	 *                upper 16 bits: loop count exiting self refresh
-	 */
-ENTRY(socfpga_sdram_self_refresh)
-	/* Enable dynamic clock gating in the Power Control Register. */
-	mrc	p15, 0, r2, c15, c0, 0
-	orr	r2, r2, #1
-	mcr	p15, 0, r2, c15, c0, 0
-
-	/* Enable self refresh: set sdr.ctrlgrp.lowpwreq.selfrshreq = 1 */
-	ldr	r2, [r0, #SDR_CTRLGRP_LOWPWREQ_ADDR]
-	orr	r2, r2, #SELFRSHREQ_MASK
-	str	r2, [r0, #SDR_CTRLGRP_LOWPWREQ_ADDR]
-
-	/* Poll until sdr.ctrlgrp.lowpwrack.selfrfshack == 1 or hit max loops */
-	mov	r3, #0
-while_ack_0:
-	ldr	r2, [r0, #SDR_CTRLGRP_LOWPWRACK_ADDR]
-	and	r2, r2, #SELFRFSHACK_MASK
-	cmp	r2, #SELFRFSHACK_MASK
-	beq	ack_1
-
-	add	r3, #1
-	cmp	r3, #MAX_LOOP_COUNT
-	bne	while_ack_0
-
-ack_1:
-	mov	r1, r3
-
-	/*
-	 * Execute an ISB instruction to ensure that all of the
-	 * CP15 register changes have been committed.
-	 */
-	isb
-
-	/*
-	 * Execute a barrier instruction to ensure that all cache,
-	 * TLB and branch predictor maintenance operations issued
-	 * by any CPU in the cluster have completed.
-	 */
-	dsb
-	dmb
-
-	wfi
-
-	/* Disable self-refresh: set sdr.ctrlgrp.lowpwreq.selfrshreq = 0 */
-	ldr	r2, [r0, #SDR_CTRLGRP_LOWPWREQ_ADDR]
-	bic	r2, r2, #SELFRSHREQ_MASK
-	str	r2, [r0, #SDR_CTRLGRP_LOWPWREQ_ADDR]
-
-	/* Poll until sdr.ctrlgrp.lowpwrack.selfrfshack == 0 or hit max loops */
-	mov	r3, #0
-while_ack_1:
-	ldr	r2, [r0, #SDR_CTRLGRP_LOWPWRACK_ADDR]
-	and	r2, r2, #SELFRFSHACK_MASK
-	cmp	r2, #SELFRFSHACK_MASK
-	bne	ack_0
-
-	add	r3, #1
-	cmp	r3, #MAX_LOOP_COUNT
-	bne	while_ack_1
-
-ack_0:
-	/*
-	 * Prepare return value:
-	 * Shift loop count for exiting self refresh into upper 16 bits.
-	 * Leave loop count for requesting self refresh in lower 16 bits.
-	 */
-	mov	r3, r3, lsl #16
-	add	r1, r1, r3
-
-	/* Disable dynamic clock gating in the Power Control Register. */
-	mrc	p15, 0, r2, c15, c0, 0
-	bic	r2, r2, #1
-	mcr	p15, 0, r2, c15, c0, 0
-
-	mov     r0, r1                  @ return value
-	bx	lr			@ return
-
-ENDPROC(socfpga_sdram_self_refresh)
-ENTRY(socfpga_sdram_self_refresh_sz)
-	.word	. - socfpga_sdram_self_refresh
diff --git a/arch/arm/mach-spear/headsmp.S b/arch/arm/mach-spear/headsmp.S
deleted file mode 100644
index 96f89436ccf61db20a336b1dc433a46765070636..0000000000000000000000000000000000000000
--- a/arch/arm/mach-spear/headsmp.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/mach-spear13XX/headsmp.S
- *
- * Picked from realview
- * Copyright (c) 2012 ST Microelectronics Limited
- * Shiraz Hashim <shiraz.linux.kernel@gmail.com>
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-
-	__INIT
-
-/*
- * spear13xx specific entry point for secondary CPUs. This provides
- * a "holding pen" into which all secondary cores are held until we're
- * ready for them to initialise.
- */
-ENTRY(spear13xx_secondary_startup)
-	mrc	p15, 0, r0, c0, c0, 5
-	and	r0, r0, #15
-	adr	r4, 1f
-	ldmia	r4, {r5, r6}
-	sub	r4, r4, r5
-	add	r6, r6, r4
-pen:	ldr	r7, [r6]
-	cmp	r7, r0
-	bne	pen
-
-	/* re-enable coherency */
-	mrc	p15, 0, r0, c1, c0, 1
-	orr	r0, r0, #(1 << 6) | (1 << 0)
-	mcr	p15, 0, r0, c1, c0, 1
-	/*
-	 * we've been released from the holding pen: secondary_stack
-	 * should now contain the SVC stack for this core
-	 */
-	b	secondary_startup
-
-	.align
-1:	.long	.
-	.long	spear_pen_release
-ENDPROC(spear13xx_secondary_startup)
diff --git a/arch/arm/mach-sunxi/headsmp.S b/arch/arm/mach-sunxi/headsmp.S
deleted file mode 100644
index 32d76be98541acc857c6c038b98c789426e89869..0000000000000000000000000000000000000000
--- a/arch/arm/mach-sunxi/headsmp.S
+++ /dev/null
@@ -1,81 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright (c) 2018 Chen-Yu Tsai
- * Copyright (c) 2018 Bootlin
- *
- * Chen-Yu Tsai <wens@csie.org>
- * Mylène Josserand <mylene.josserand@bootlin.com>
- *
- * SMP support for sunxi based systems with Cortex A7/A15
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/cputype.h>
-
-ENTRY(sunxi_mc_smp_cluster_cache_enable)
-	.arch	armv7-a
-	/*
-	 * Enable cluster-level coherency, in preparation for turning on the MMU.
-	 *
-	 * Also enable regional clock gating and L2 data latency settings for
-	 * Cortex-A15. These settings are from the vendor kernel.
-	 */
-	mrc	p15, 0, r1, c0, c0, 0
-	movw	r2, #(ARM_CPU_PART_MASK & 0xffff)
-	movt	r2, #(ARM_CPU_PART_MASK >> 16)
-	and	r1, r1, r2
-	movw	r2, #(ARM_CPU_PART_CORTEX_A15 & 0xffff)
-	movt	r2, #(ARM_CPU_PART_CORTEX_A15 >> 16)
-	cmp	r1, r2
-	bne	not_a15
-
-	/* The following is Cortex-A15 specific */
-
-	/* ACTLR2: Enable CPU regional clock gates */
-	mrc p15, 1, r1, c15, c0, 4
-	orr r1, r1, #(0x1 << 31)
-	mcr p15, 1, r1, c15, c0, 4
-
-	/* L2ACTLR */
-	mrc p15, 1, r1, c15, c0, 0
-	/* Enable L2, GIC, and Timer regional clock gates */
-	orr r1, r1, #(0x1 << 26)
-	/* Disable clean/evict from being pushed to external */
-	orr r1, r1, #(0x1<<3)
-	mcr p15, 1, r1, c15, c0, 0
-
-	/* L2CTRL: L2 data RAM latency */
-	mrc p15, 1, r1, c9, c0, 2
-	bic r1, r1, #(0x7 << 0)
-	orr r1, r1, #(0x3 << 0)
-	mcr p15, 1, r1, c9, c0, 2
-
-	/* End of Cortex-A15 specific setup */
-	not_a15:
-
-	/* Get value of sunxi_mc_smp_first_comer */
-	adr	r1, first
-	ldr	r0, [r1]
-	ldr	r0, [r1, r0]
-
-	/* Skip cci_enable_port_for_self if not first comer */
-	cmp	r0, #0
-	bxeq	lr
-	b	cci_enable_port_for_self
-
-	.align 2
-	first: .word sunxi_mc_smp_first_comer - .
-ENDPROC(sunxi_mc_smp_cluster_cache_enable)
-
-ENTRY(sunxi_mc_smp_secondary_startup)
-	bl	sunxi_mc_smp_cluster_cache_enable
-	bl	secure_cntvoff_init
-	b	secondary_startup
-ENDPROC(sunxi_mc_smp_secondary_startup)
-
-ENTRY(sunxi_mc_smp_resume)
-	bl	sunxi_mc_smp_cluster_cache_enable
-	b	cpu_resume
-ENDPROC(sunxi_mc_smp_resume)
diff --git a/arch/arm/mach-tango/smc.S b/arch/arm/mach-tango/smc.S
deleted file mode 100644
index b1752aaa72bcbf0267a81e80728aa0999bfd0a87..0000000000000000000000000000000000000000
--- a/arch/arm/mach-tango/smc.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-
-	.arch armv7-a
-	.arch_extension sec
-ENTRY(tango_smc)
-	push	{lr}
-	mov	ip, r1
-	dsb	/* This barrier is probably unnecessary */
-	smc	#0
-	pop	{pc}
-ENDPROC(tango_smc)
diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S
deleted file mode 100644
index e3f34815c9da7dc844de4d00d168e2b8cfebbae2..0000000000000000000000000000000000000000
--- a/arch/arm/mach-tegra/reset-handler.S
+++ /dev/null
@@ -1,307 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2012, NVIDIA Corporation. All rights reserved.
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-
-#include <soc/tegra/flowctrl.h>
-#include <soc/tegra/fuse.h>
-
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/cache.h>
-
-#include "iomap.h"
-#include "reset.h"
-#include "sleep.h"
-
-#define PMC_SCRATCH41	0x140
-
-#ifdef CONFIG_PM_SLEEP
-/*
- *	tegra_resume
- *
- *	  CPU boot vector when restarting the a CPU following
- *	  an LP2 transition. Also branched to by LP0 and LP1 resume after
- *	  re-enabling sdram.
- *
- *	r6: SoC ID
- *	r8: CPU part number
- */
-ENTRY(tegra_resume)
-	check_cpu_part_num 0xc09, r8, r9
-	bleq	v7_invalidate_l1
-
-	cpu_id	r0
-	cmp	r0, #0				@ CPU0?
- THUMB(	it	ne )
-	bne	cpu_resume			@ no
-
-	tegra_get_soc_id TEGRA_APB_MISC_BASE, r6
-	/* Are we on Tegra20? */
-	cmp	r6, #TEGRA20
-	beq	1f				@ Yes
-	/* Clear the flow controller flags for this CPU. */
-	cpu_to_csr_reg r3, r0
-	mov32	r2, TEGRA_FLOW_CTRL_BASE
-	ldr	r1, [r2, r3]
-	/* Clear event & intr flag */
-	orr	r1, r1, \
-		#FLOW_CTRL_CSR_INTR_FLAG | FLOW_CTRL_CSR_EVENT_FLAG
-	movw	r0, #0x3FFD	@ enable, cluster_switch, immed, bitmaps
-				@ & ext flags for CPU power mgnt
-	bic	r1, r1, r0
-	str	r1, [r2, r3]
-1:
-
-	mov32	r9, 0xc09
-	cmp	r8, r9
-	bne	end_ca9_scu_l2_resume
-#ifdef CONFIG_HAVE_ARM_SCU
-	/* enable SCU */
-	mov32	r0, TEGRA_ARM_PERIF_BASE
-	ldr	r1, [r0]
-	orr	r1, r1, #1
-	str	r1, [r0]
-#endif
-	bl	tegra_resume_trusted_foundations
-
-#ifdef CONFIG_CACHE_L2X0
-	/* L2 cache resume & re-enable */
-	bl	l2c310_early_resume
-#endif
-end_ca9_scu_l2_resume:
-	mov32	r9, 0xc0f
-	cmp	r8, r9
-	bleq	tegra_init_l2_for_a15
-
-	b	cpu_resume
-ENDPROC(tegra_resume)
-
-/*
- *	tegra_resume_trusted_foundations
- *
- *	  Trusted Foundations firmware initialization.
- *
- *	Doesn't return if firmware presents.
- *	Corrupted registers: r1, r2
- */
-ENTRY(tegra_resume_trusted_foundations)
-	/* Check whether Trusted Foundations firmware presents. */
-	mov32	r2, TEGRA_IRAM_BASE + TEGRA_IRAM_RESET_HANDLER_OFFSET
-	ldr	r1, =__tegra_cpu_reset_handler_data_offset + \
-							RESET_DATA(TF_PRESENT)
-	ldr	r1, [r2, r1]
-	cmp	r1, #0
-	reteq	lr
-
- .arch_extension sec
-	/* First call after suspend wakes firmware. No arguments required. */
-	smc	#0
-
-	b	cpu_resume
-ENDPROC(tegra_resume_trusted_foundations)
-#endif
-
-	.align L1_CACHE_SHIFT
-ENTRY(__tegra_cpu_reset_handler_start)
-
-/*
- * __tegra_cpu_reset_handler:
- *
- * Common handler for all CPU reset events.
- *
- * Register usage within the reset handler:
- *
- *      Others: scratch
- *      R6  = SoC ID
- *      R7  = CPU present (to the OS) mask
- *      R8  = CPU in LP1 state mask
- *      R9  = CPU in LP2 state mask
- *      R10 = CPU number
- *      R11 = CPU mask
- *      R12 = pointer to reset handler data
- *
- * NOTE: This code is copied to IRAM. All code and data accesses
- *       must be position-independent.
- */
-
-	.arm
-	.align L1_CACHE_SHIFT
-ENTRY(__tegra_cpu_reset_handler)
-
-	cpsid	aif, 0x13			@ SVC mode, interrupts disabled
-
-	tegra_get_soc_id TEGRA_APB_MISC_BASE, r6
-
-	adr	r12, __tegra_cpu_reset_handler_data
-	ldr	r5, [r12, #RESET_DATA(TF_PRESENT)]
-	cmp	r5, #0
-	bne	after_errata
-
-#ifdef CONFIG_ARCH_TEGRA_2x_SOC
-t20_check:
-	cmp	r6, #TEGRA20
-	bne	after_t20_check
-t20_errata:
-	# Tegra20 is a Cortex-A9 r1p1
-	mrc	p15, 0, r0, c1, c0, 0   @ read system control register
-	orr	r0, r0, #1 << 14        @ erratum 716044
-	mcr	p15, 0, r0, c1, c0, 0   @ write system control register
-	mrc	p15, 0, r0, c15, c0, 1  @ read diagnostic register
-	orr	r0, r0, #1 << 4         @ erratum 742230
-	orr	r0, r0, #1 << 11        @ erratum 751472
-	mcr	p15, 0, r0, c15, c0, 1  @ write diagnostic register
-	b	after_errata
-after_t20_check:
-#endif
-#ifdef CONFIG_ARCH_TEGRA_3x_SOC
-t30_check:
-	cmp	r6, #TEGRA30
-	bne	after_t30_check
-t30_errata:
-	# Tegra30 is a Cortex-A9 r2p9
-	mrc	p15, 0, r0, c15, c0, 1  @ read diagnostic register
-	orr	r0, r0, #1 << 6         @ erratum 743622
-	orr	r0, r0, #1 << 11        @ erratum 751472
-	mcr	p15, 0, r0, c15, c0, 1  @ write diagnostic register
-	b	after_errata
-after_t30_check:
-#endif
-after_errata:
-	mrc	p15, 0, r10, c0, c0, 5		@ MPIDR
-	and	r10, r10, #0x3			@ R10 = CPU number
-	mov	r11, #1
-	mov	r11, r11, lsl r10  		@ R11 = CPU mask
-
-#ifdef CONFIG_SMP
-	/* Does the OS know about this CPU? */
-	ldr	r7, [r12, #RESET_DATA(MASK_PRESENT)]
-	tst	r7, r11 			@ if !present
-	bleq	__die				@ CPU not present (to OS)
-#endif
-
-#ifdef CONFIG_ARCH_TEGRA_2x_SOC
-	/* Are we on Tegra20? */
-	cmp	r6, #TEGRA20
-	bne	1f
-	/* If not CPU0, don't let CPU0 reset CPU1 now that CPU1 is coming up. */
-	mov	r0, #CPU_NOT_RESETTABLE
-	cmp	r10, #0
-	strbne	r0, [r12, #RESET_DATA(RESETTABLE_STATUS)]
-1:
-#endif
-
-	/* Waking up from LP1? */
-	ldr	r8, [r12, #RESET_DATA(MASK_LP1)]
-	tst	r8, r11				@ if in_lp1
-	beq	__is_not_lp1
-	cmp	r10, #0
-	bne	__die				@ only CPU0 can be here
-	ldr	lr, [r12, #RESET_DATA(STARTUP_LP1)]
-	cmp	lr, #0
-	bleq	__die				@ no LP1 startup handler
- THUMB(	add	lr, lr, #1 )			@ switch to Thumb mode
-	bx	lr
-__is_not_lp1:
-
-	/* Waking up from LP2? */
-	ldr	r9, [r12, #RESET_DATA(MASK_LP2)]
-	tst	r9, r11				@ if in_lp2
-	beq	__is_not_lp2
-	ldr	lr, [r12, #RESET_DATA(STARTUP_LP2)]
-	cmp	lr, #0
-	bleq	__die				@ no LP2 startup handler
-	bx	lr
-
-__is_not_lp2:
-
-#ifdef CONFIG_SMP
-	/*
-	 * Can only be secondary boot (initial or hotplug)
-	 * CPU0 can't be here for Tegra20/30
-	 */
-	cmp	r6, #TEGRA114
-	beq	__no_cpu0_chk
-	cmp	r10, #0
-	bleq	__die				@ CPU0 cannot be here
-__no_cpu0_chk:
-	ldr	lr, [r12, #RESET_DATA(STARTUP_SECONDARY)]
-	cmp	lr, #0
-	bleq	__die				@ no secondary startup handler
-	bx	lr
-#endif
-
-/*
- * We don't know why the CPU reset. Just kill it.
- * The LR register will contain the address we died at + 4.
- */
-
-__die:
-	sub	lr, lr, #4
-	mov32	r7, TEGRA_PMC_BASE
-	str	lr, [r7, #PMC_SCRATCH41]
-
-	mov32	r7, TEGRA_CLK_RESET_BASE
-
-	/* Are we on Tegra20? */
-	cmp	r6, #TEGRA20
-	bne	1f
-
-#ifdef CONFIG_ARCH_TEGRA_2x_SOC
-	mov32	r0, 0x1111
-	mov	r1, r0, lsl r10
-	str	r1, [r7, #0x340]		@ CLK_RST_CPU_CMPLX_SET
-#endif
-1:
-#ifdef CONFIG_ARCH_TEGRA_3x_SOC
-	mov32	r6, TEGRA_FLOW_CTRL_BASE
-
-	cmp	r10, #0
-	moveq	r1, #FLOW_CTRL_HALT_CPU0_EVENTS
-	moveq	r2, #FLOW_CTRL_CPU0_CSR
-	movne	r1, r10, lsl #3
-	addne	r2, r1, #(FLOW_CTRL_CPU1_CSR-8)
-	addne	r1, r1, #(FLOW_CTRL_HALT_CPU1_EVENTS-8)
-
-	/* Clear CPU "event" and "interrupt" flags and power gate
-	   it when halting but not before it is in the "WFI" state. */
-	ldr	r0, [r6, +r2]
-	orr	r0, r0, #FLOW_CTRL_CSR_INTR_FLAG | FLOW_CTRL_CSR_EVENT_FLAG
-	orr	r0, r0, #FLOW_CTRL_CSR_ENABLE
-	str	r0, [r6, +r2]
-
-	/* Unconditionally halt this CPU */
-	mov	r0, #FLOW_CTRL_WAITEVENT
-	str	r0, [r6, +r1]
-	ldr	r0, [r6, +r1]			@ memory barrier
-
-	dsb
-	isb
-	wfi					@ CPU should be power gated here
-
-	/* If the CPU didn't power gate above just kill it's clock. */
-
-	mov	r0, r11, lsl #8
-	str	r0, [r7, #348]			@ CLK_CPU_CMPLX_SET
-#endif
-
-	/* If the CPU still isn't dead, just spin here. */
-	b	.
-ENDPROC(__tegra_cpu_reset_handler)
-
-	.align L1_CACHE_SHIFT
-	.type	__tegra_cpu_reset_handler_data, %object
-	.globl	__tegra_cpu_reset_handler_data
-	.globl	__tegra_cpu_reset_handler_data_offset
-	.equ	__tegra_cpu_reset_handler_data_offset, \
-					. - __tegra_cpu_reset_handler_start
-__tegra_cpu_reset_handler_data:
-	.rept   TEGRA_RESET_DATA_SIZE
-	.long   0
-	.endr
-	.align L1_CACHE_SHIFT
-
-ENTRY(__tegra_cpu_reset_handler_end)
diff --git a/arch/arm/mach-tegra/sleep-tegra20.S b/arch/arm/mach-tegra/sleep-tegra20.S
deleted file mode 100644
index 9a89f30d53ca172d31e9ded2878fbba21b1c2575..0000000000000000000000000000000000000000
--- a/arch/arm/mach-tegra/sleep-tegra20.S
+++ /dev/null
@@ -1,575 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2010-2012, NVIDIA Corporation. All rights reserved.
- * Copyright (c) 2011, Google, Inc.
- *
- * Author: Colin Cross <ccross@android.com>
- *         Gary King <gking@nvidia.com>
- */
-
-#include <linux/linkage.h>
-
-#include <soc/tegra/flowctrl.h>
-
-#include <asm/assembler.h>
-#include <asm/proc-fns.h>
-#include <asm/cp15.h>
-#include <asm/cache.h>
-
-#include "irammap.h"
-#include "reset.h"
-#include "sleep.h"
-
-#define EMC_CFG				0xc
-#define EMC_ADR_CFG			0x10
-#define EMC_NOP				0xdc
-#define EMC_SELF_REF			0xe0
-#define EMC_REQ_CTRL			0x2b0
-#define EMC_EMC_STATUS			0x2b4
-
-#define CLK_RESET_CCLK_BURST		0x20
-#define CLK_RESET_CCLK_DIVIDER		0x24
-#define CLK_RESET_SCLK_BURST		0x28
-#define CLK_RESET_SCLK_DIVIDER		0x2c
-#define CLK_RESET_PLLC_BASE		0x80
-#define CLK_RESET_PLLM_BASE		0x90
-#define CLK_RESET_PLLP_BASE		0xa0
-
-#define APB_MISC_XM2CFGCPADCTRL		0x8c8
-#define APB_MISC_XM2CFGDPADCTRL		0x8cc
-#define APB_MISC_XM2CLKCFGPADCTRL	0x8d0
-#define APB_MISC_XM2COMPPADCTRL		0x8d4
-#define APB_MISC_XM2VTTGENPADCTRL	0x8d8
-#define APB_MISC_XM2CFGCPADCTRL2	0x8e4
-#define APB_MISC_XM2CFGDPADCTRL2	0x8e8
-
-#define __tegra20_cpu1_resettable_status_offset \
-	(__tegra_cpu_reset_handler_data_offset + RESET_DATA(RESETTABLE_STATUS))
-
-.macro pll_enable, rd, r_car_base, pll_base
-	ldr	\rd, [\r_car_base, #\pll_base]
-	tst	\rd, #(1 << 30)
-	orreq	\rd, \rd, #(1 << 30)
-	streq	\rd, [\r_car_base, #\pll_base]
-.endm
-
-.macro emc_device_mask, rd, base
-	ldr	\rd, [\base, #EMC_ADR_CFG]
-	tst	\rd, #(0x3 << 24)
-	moveq	\rd, #(0x1 << 8)		@ just 1 device
-	movne	\rd, #(0x3 << 8)		@ 2 devices
-.endm
-
-#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PM_SLEEP)
-/*
- * tegra20_hotplug_shutdown(void)
- *
- * puts the current cpu in reset
- * should never return
- */
-ENTRY(tegra20_hotplug_shutdown)
-	/* Put this CPU down */
-	cpu_id	r0
-	bl	tegra20_cpu_shutdown
-	ret	lr			@ should never get here
-ENDPROC(tegra20_hotplug_shutdown)
-
-/*
- * tegra20_cpu_shutdown(int cpu)
- *
- * r0 is cpu to reset
- *
- * puts the specified CPU in wait-for-event mode on the flow controller
- * and puts the CPU in reset
- * can be called on the current cpu or another cpu
- * if called on the current cpu, does not return
- * MUST NOT BE CALLED FOR CPU 0.
- *
- * corrupts r0-r3, r12
- */
-ENTRY(tegra20_cpu_shutdown)
-	cmp	r0, #0
-	reteq	lr			@ must not be called for CPU 0
-	mov32	r1, TEGRA_IRAM_RESET_BASE_VIRT
-	ldr	r2, =__tegra20_cpu1_resettable_status_offset
-	mov	r12, #CPU_RESETTABLE
-	strb	r12, [r1, r2]
-
-	cpu_to_halt_reg r1, r0
-	ldr	r3, =TEGRA_FLOW_CTRL_VIRT
-	mov	r2, #FLOW_CTRL_WAITEVENT | FLOW_CTRL_JTAG_RESUME
-	str	r2, [r3, r1]		@ put flow controller in wait event mode
-	ldr	r2, [r3, r1]
-	isb
-	dsb
-	movw	r1, 0x1011
-	mov	r1, r1, lsl r0
-	ldr	r3, =TEGRA_CLK_RESET_VIRT
-	str	r1, [r3, #0x340]	@ put slave CPU in reset
-	isb
-	dsb
-	cpu_id	r3
-	cmp	r3, r0
-	beq	.
-	ret	lr
-ENDPROC(tegra20_cpu_shutdown)
-#endif
-
-#ifdef CONFIG_PM_SLEEP
-/*
- * tegra_pen_lock
- *
- * spinlock implementation with no atomic test-and-set and no coherence
- * using Peterson's algorithm on strongly-ordered registers
- * used to synchronize a cpu waking up from wfi with entering lp2 on idle
- *
- * The reference link of Peterson's algorithm:
- * http://en.wikipedia.org/wiki/Peterson's_algorithm
- *
- * SCRATCH37 = r1 = !turn (inverted from Peterson's algorithm)
- * on cpu 0:
- * r2 = flag[0] (in SCRATCH38)
- * r3 = flag[1] (in SCRATCH39)
- * on cpu1:
- * r2 = flag[1] (in SCRATCH39)
- * r3 = flag[0] (in SCRATCH38)
- *
- * must be called with MMU on
- * corrupts r0-r3, r12
- */
-ENTRY(tegra_pen_lock)
-	mov32	r3, TEGRA_PMC_VIRT
-	cpu_id	r0
-	add	r1, r3, #PMC_SCRATCH37
-	cmp	r0, #0
-	addeq	r2, r3, #PMC_SCRATCH38
-	addeq	r3, r3, #PMC_SCRATCH39
-	addne	r2, r3, #PMC_SCRATCH39
-	addne	r3, r3, #PMC_SCRATCH38
-
-	mov	r12, #1
-	str	r12, [r2]		@ flag[cpu] = 1
-	dsb
-	str	r12, [r1]		@ !turn = cpu
-1:	dsb
-	ldr	r12, [r3]
-	cmp	r12, #1			@ flag[!cpu] == 1?
-	ldreq	r12, [r1]
-	cmpeq	r12, r0			@ !turn == cpu?
-	beq	1b			@ while !turn == cpu && flag[!cpu] == 1
-
-	ret	lr			@ locked
-ENDPROC(tegra_pen_lock)
-
-ENTRY(tegra_pen_unlock)
-	dsb
-	mov32	r3, TEGRA_PMC_VIRT
-	cpu_id	r0
-	cmp	r0, #0
-	addeq	r2, r3, #PMC_SCRATCH38
-	addne	r2, r3, #PMC_SCRATCH39
-	mov	r12, #0
-	str	r12, [r2]
-	ret     lr
-ENDPROC(tegra_pen_unlock)
-
-/*
- * tegra20_cpu_clear_resettable(void)
- *
- * Called to clear the "resettable soon" flag in IRAM variable when
- * it is expected that the secondary CPU will be idle soon.
- */
-ENTRY(tegra20_cpu_clear_resettable)
-	mov32	r1, TEGRA_IRAM_RESET_BASE_VIRT
-	ldr	r2, =__tegra20_cpu1_resettable_status_offset
-	mov	r12, #CPU_NOT_RESETTABLE
-	strb	r12, [r1, r2]
-	ret	lr
-ENDPROC(tegra20_cpu_clear_resettable)
-
-/*
- * tegra20_cpu_set_resettable_soon(void)
- *
- * Called to set the "resettable soon" flag in IRAM variable when
- * it is expected that the secondary CPU will be idle soon.
- */
-ENTRY(tegra20_cpu_set_resettable_soon)
-	mov32	r1, TEGRA_IRAM_RESET_BASE_VIRT
-	ldr	r2, =__tegra20_cpu1_resettable_status_offset
-	mov	r12, #CPU_RESETTABLE_SOON
-	strb	r12, [r1, r2]
-	ret	lr
-ENDPROC(tegra20_cpu_set_resettable_soon)
-
-/*
- * tegra20_cpu_is_resettable_soon(void)
- *
- * Returns true if the "resettable soon" flag in IRAM variable has been
- * set because it is expected that the secondary CPU will be idle soon.
- */
-ENTRY(tegra20_cpu_is_resettable_soon)
-	mov32	r1, TEGRA_IRAM_RESET_BASE_VIRT
-	ldr	r2, =__tegra20_cpu1_resettable_status_offset
-	ldrb	r12, [r1, r2]
-	cmp	r12, #CPU_RESETTABLE_SOON
-	moveq	r0, #1
-	movne	r0, #0
-	ret	lr
-ENDPROC(tegra20_cpu_is_resettable_soon)
-
-/*
- * tegra20_sleep_core_finish(unsigned long v2p)
- *
- * Enters suspend in LP0 or LP1 by turning off the mmu and jumping to
- * tegra20_tear_down_core in IRAM
- */
-ENTRY(tegra20_sleep_core_finish)
-	mov     r4, r0
-	/* Flush, disable the L1 data cache and exit SMP */
-	mov     r0, #TEGRA_FLUSH_CACHE_ALL
-	bl	tegra_disable_clean_inv_dcache
-	mov     r0, r4
-
-	mov32	r3, tegra_shut_off_mmu
-	add	r3, r3, r0
-
-	mov32	r0, tegra20_tear_down_core
-	mov32	r1, tegra20_iram_start
-	sub	r0, r0, r1
-	mov32	r1, TEGRA_IRAM_LPx_RESUME_AREA
-	add	r0, r0, r1
-
-	ret	r3
-ENDPROC(tegra20_sleep_core_finish)
-
-/*
- * tegra20_sleep_cpu_secondary_finish(unsigned long v2p)
- *
- * Enters WFI on secondary CPU by exiting coherency.
- */
-ENTRY(tegra20_sleep_cpu_secondary_finish)
-	stmfd	sp!, {r4-r11, lr}
-
-	mrc	p15, 0, r11, c1, c0, 1  @ save actlr before exiting coherency
-
-	/* Flush and disable the L1 data cache */
-	mov	r0, #TEGRA_FLUSH_CACHE_LOUIS
-	bl	tegra_disable_clean_inv_dcache
-
-	mov32	r0, TEGRA_IRAM_RESET_BASE_VIRT
-	ldr	r4, =__tegra20_cpu1_resettable_status_offset
-	mov	r3, #CPU_RESETTABLE
-	strb	r3, [r0, r4]
-
-	bl	tegra_cpu_do_idle
-
-	/*
-	 * cpu may be reset while in wfi, which will return through
-	 * tegra_resume to cpu_resume
-	 * or interrupt may wake wfi, which will return here
-	 * cpu state is unchanged - MMU is on, cache is on, coherency
-	 * is off, and the data cache is off
-	 *
-	 * r11 contains the original actlr
-	 */
-
-	bl	tegra_pen_lock
-
-	mov32	r0, TEGRA_IRAM_RESET_BASE_VIRT
-	ldr	r4, =__tegra20_cpu1_resettable_status_offset
-	mov	r3, #CPU_NOT_RESETTABLE
-	strb	r3, [r0, r4]
-
-	bl	tegra_pen_unlock
-
-	/* Re-enable the data cache */
-	mrc	p15, 0, r10, c1, c0, 0
-	orr	r10, r10, #CR_C
-	mcr	p15, 0, r10, c1, c0, 0
-	isb
-
-	mcr	p15, 0, r11, c1, c0, 1	@ reenable coherency
-
-	/* Invalidate the TLBs & BTAC */
-	mov	r1, #0
-	mcr	p15, 0, r1, c8, c3, 0	@ invalidate shared TLBs
-	mcr	p15, 0, r1, c7, c1, 6	@ invalidate shared BTAC
-	dsb
-	isb
-
-	/* the cpu was running with coherency disabled,
-	 * caches may be out of date */
-	bl	v7_flush_kern_cache_louis
-
-	ldmfd	sp!, {r4 - r11, pc}
-ENDPROC(tegra20_sleep_cpu_secondary_finish)
-
-/*
- * tegra20_tear_down_cpu
- *
- * Switches the CPU cluster to PLL-P and enters sleep.
- */
-ENTRY(tegra20_tear_down_cpu)
-	bl	tegra_switch_cpu_to_pllp
-	b	tegra20_enter_sleep
-ENDPROC(tegra20_tear_down_cpu)
-
-/* START OF ROUTINES COPIED TO IRAM */
-	.align L1_CACHE_SHIFT
-	.globl tegra20_iram_start
-tegra20_iram_start:
-
-/*
- * tegra20_lp1_reset
- *
- * reset vector for LP1 restore; copied into IRAM during suspend.
- * Brings the system back up to a safe staring point (SDRAM out of
- * self-refresh, PLLC, PLLM and PLLP reenabled, CPU running on PLLP,
- * system clock running on the same PLL that it suspended at), and
- * jumps to tegra_resume to restore virtual addressing and PLLX.
- * The physical address of tegra_resume expected to be stored in
- * PMC_SCRATCH41.
- *
- * NOTE: THIS *MUST* BE RELOCATED TO TEGRA_IRAM_LPx_RESUME_AREA.
- */
-ENTRY(tegra20_lp1_reset)
-	/*
-	 * The CPU and system bus are running at 32KHz and executing from
-	 * IRAM when this code is executed; immediately switch to CLKM and
-	 * enable PLLM, PLLP, PLLC.
-	 */
-	mov32	r0, TEGRA_CLK_RESET_BASE
-
-	mov	r1, #(1 << 28)
-	str	r1, [r0, #CLK_RESET_SCLK_BURST]
-	str	r1, [r0, #CLK_RESET_CCLK_BURST]
-	mov	r1, #0
-	str	r1, [r0, #CLK_RESET_CCLK_DIVIDER]
-	str	r1, [r0, #CLK_RESET_SCLK_DIVIDER]
-
-	pll_enable r1, r0, CLK_RESET_PLLM_BASE
-	pll_enable r1, r0, CLK_RESET_PLLP_BASE
-	pll_enable r1, r0, CLK_RESET_PLLC_BASE
-
-	adr	r2, tegra20_sdram_pad_address
-	adr	r4, tegra20_sdram_pad_save
-	mov	r5, #0
-
-	ldr	r6, tegra20_sdram_pad_size
-padload:
-	ldr	r7, [r2, r5]		@ r7 is the addr in the pad_address
-
-	ldr	r1, [r4, r5]
-	str	r1, [r7]		@ restore the value in pad_save
-
-	add	r5, r5, #4
-	cmp	r6, r5
-	bne	padload
-
-padload_done:
-	/* 255uS delay for PLL stabilization */
-	mov32	r7, TEGRA_TMRUS_BASE
-	ldr	r1, [r7]
-	add	r1, r1, #0xff
-	wait_until r1, r7, r9
-
-	adr	r4, tegra20_sclk_save
-	ldr	r4, [r4]
-	str	r4, [r0, #CLK_RESET_SCLK_BURST]
-	mov32	r4, ((1 << 28) | (4))	@ burst policy is PLLP
-	str	r4, [r0, #CLK_RESET_CCLK_BURST]
-
-	mov32	r0, TEGRA_EMC_BASE
-	ldr	r1, [r0, #EMC_CFG]
-	bic	r1, r1, #(1 << 31)	@ disable DRAM_CLK_STOP
-	str	r1, [r0, #EMC_CFG]
-
-	mov	r1, #0
-	str	r1, [r0, #EMC_SELF_REF]	@ take DRAM out of self refresh
-	mov	r1, #1
-	str	r1, [r0, #EMC_NOP]
-	str	r1, [r0, #EMC_NOP]
-
-	emc_device_mask r1, r0
-
-exit_selfrefresh_loop:
-	ldr	r2, [r0, #EMC_EMC_STATUS]
-	ands	r2, r2, r1
-	bne	exit_selfrefresh_loop
-
-	mov	r1, #0			@ unstall all transactions
-	str	r1, [r0, #EMC_REQ_CTRL]
-
-	mov32	r0, TEGRA_PMC_BASE
-	ldr	r0, [r0, #PMC_SCRATCH41]
-	ret	r0			@ jump to tegra_resume
-ENDPROC(tegra20_lp1_reset)
-
-/*
- * tegra20_tear_down_core
- *
- * copied into and executed from IRAM
- * puts memory in self-refresh for LP0 and LP1
- */
-tegra20_tear_down_core:
-	bl	tegra20_sdram_self_refresh
-	bl	tegra20_switch_cpu_to_clk32k
-	b	tegra20_enter_sleep
-
-/*
- * tegra20_switch_cpu_to_clk32k
- *
- * In LP0 and LP1 all PLLs will be turned off. Switch the CPU and system clock
- * to the 32KHz clock.
- */
-tegra20_switch_cpu_to_clk32k:
-	/*
-	 * start by switching to CLKM to safely disable PLLs, then switch to
-	 * CLKS.
-	 */
-	mov	r0, #(1 << 28)
-	str	r0, [r5, #CLK_RESET_SCLK_BURST]
-	str	r0, [r5, #CLK_RESET_CCLK_BURST]
-	mov	r0, #0
-	str	r0, [r5, #CLK_RESET_CCLK_DIVIDER]
-	str	r0, [r5, #CLK_RESET_SCLK_DIVIDER]
-
-	/* 2uS delay delay between changing SCLK and disabling PLLs */
-	mov32	r7, TEGRA_TMRUS_BASE
-	ldr	r1, [r7]
-	add	r1, r1, #2
-	wait_until r1, r7, r9
-
-	/* disable PLLM, PLLP and PLLC */
-	ldr	r0, [r5, #CLK_RESET_PLLM_BASE]
-	bic	r0, r0, #(1 << 30)
-	str	r0, [r5, #CLK_RESET_PLLM_BASE]
-	ldr	r0, [r5, #CLK_RESET_PLLP_BASE]
-	bic	r0, r0, #(1 << 30)
-	str	r0, [r5, #CLK_RESET_PLLP_BASE]
-	ldr	r0, [r5, #CLK_RESET_PLLC_BASE]
-	bic	r0, r0, #(1 << 30)
-	str	r0, [r5, #CLK_RESET_PLLC_BASE]
-
-	/* switch to CLKS */
-	mov	r0, #0	/* brust policy = 32KHz */
-	str	r0, [r5, #CLK_RESET_SCLK_BURST]
-
-	ret	lr
-
-/*
- * tegra20_enter_sleep
- *
- * uses flow controller to enter sleep state
- * executes from IRAM with SDRAM in selfrefresh when target state is LP0 or LP1
- * executes from SDRAM with target state is LP2
- */
-tegra20_enter_sleep:
-	mov32   r6, TEGRA_FLOW_CTRL_BASE
-
-	mov     r0, #FLOW_CTRL_WAIT_FOR_INTERRUPT
-	orr	r0, r0, #FLOW_CTRL_HALT_CPU_IRQ | FLOW_CTRL_HALT_CPU_FIQ
-	cpu_id	r1
-	cpu_to_halt_reg r1, r1
-	str	r0, [r6, r1]
-	dsb
-	ldr	r0, [r6, r1] /* memory barrier */
-
-halted:
-	dsb
-	wfe	/* CPU should be power gated here */
-	isb
-	b	halted
-
-/*
- * tegra20_sdram_self_refresh
- *
- * called with MMU off and caches disabled
- * puts sdram in self refresh
- * must be executed from IRAM
- */
-tegra20_sdram_self_refresh:
-	mov32	r1, TEGRA_EMC_BASE	@ r1 reserved for emc base addr
-
-	mov	r2, #3
-	str	r2, [r1, #EMC_REQ_CTRL]	@ stall incoming DRAM requests
-
-emcidle:
-	ldr	r2, [r1, #EMC_EMC_STATUS]
-	tst	r2, #4
-	beq	emcidle
-
-	mov	r2, #1
-	str	r2, [r1, #EMC_SELF_REF]
-
-	emc_device_mask r2, r1
-
-emcself:
-	ldr	r3, [r1, #EMC_EMC_STATUS]
-	and	r3, r3, r2
-	cmp	r3, r2
-	bne	emcself			@ loop until DDR in self-refresh
-
-	adr	r2, tegra20_sdram_pad_address
-	adr	r3, tegra20_sdram_pad_safe
-	adr	r4, tegra20_sdram_pad_save
-	mov	r5, #0
-
-	ldr	r6, tegra20_sdram_pad_size
-padsave:
-	ldr	r0, [r2, r5]		@ r0 is the addr in the pad_address
-
-	ldr	r1, [r0]
-	str	r1, [r4, r5]		@ save the content of the addr
-
-	ldr	r1, [r3, r5]
-	str	r1, [r0]		@ set the save val to the addr
-
-	add	r5, r5, #4
-	cmp	r6, r5
-	bne	padsave
-padsave_done:
-
-	mov32	r5, TEGRA_CLK_RESET_BASE
-	ldr	r0, [r5, #CLK_RESET_SCLK_BURST]
-	adr	r2, tegra20_sclk_save
-	str	r0, [r2]
-	dsb
-	ret	lr
-
-tegra20_sdram_pad_address:
-	.word	TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGCPADCTRL
-	.word	TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGDPADCTRL
-	.word	TEGRA_APB_MISC_BASE + APB_MISC_XM2CLKCFGPADCTRL
-	.word	TEGRA_APB_MISC_BASE + APB_MISC_XM2COMPPADCTRL
-	.word	TEGRA_APB_MISC_BASE + APB_MISC_XM2VTTGENPADCTRL
-	.word	TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGCPADCTRL2
-	.word	TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGDPADCTRL2
-
-tegra20_sdram_pad_size:
-	.word	tegra20_sdram_pad_size - tegra20_sdram_pad_address
-
-tegra20_sdram_pad_safe:
-	.word	0x8
-	.word	0x8
-	.word	0x0
-	.word	0x8
-	.word	0x5500
-	.word	0x08080040
-	.word	0x0
-
-tegra20_sclk_save:
-	.word	0x0
-
-tegra20_sdram_pad_save:
-	.rept (tegra20_sdram_pad_size - tegra20_sdram_pad_address) / 4
-	.long	0
-	.endr
-
-	.ltorg
-/* dummy symbol for end of IRAM */
-	.align L1_CACHE_SHIFT
-	.globl tegra20_iram_end
-tegra20_iram_end:
-	b	.
-#endif
diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S
deleted file mode 100644
index 6922dd8d3e2d95232d95c70fea25da69d6715632..0000000000000000000000000000000000000000
--- a/arch/arm/mach-tegra/sleep-tegra30.S
+++ /dev/null
@@ -1,834 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2012, NVIDIA Corporation. All rights reserved.
- */
-
-#include <linux/linkage.h>
-
-#include <soc/tegra/flowctrl.h>
-#include <soc/tegra/fuse.h>
-
-#include <asm/asm-offsets.h>
-#include <asm/assembler.h>
-#include <asm/cache.h>
-
-#include "irammap.h"
-#include "sleep.h"
-
-#define EMC_CFG				0xc
-#define EMC_ADR_CFG			0x10
-#define EMC_TIMING_CONTROL		0x28
-#define EMC_NOP				0xdc
-#define EMC_SELF_REF			0xe0
-#define EMC_MRW				0xe8
-#define EMC_FBIO_CFG5			0x104
-#define EMC_AUTO_CAL_CONFIG		0x2a4
-#define EMC_AUTO_CAL_INTERVAL		0x2a8
-#define EMC_AUTO_CAL_STATUS		0x2ac
-#define EMC_REQ_CTRL			0x2b0
-#define EMC_CFG_DIG_DLL			0x2bc
-#define EMC_EMC_STATUS			0x2b4
-#define EMC_ZCAL_INTERVAL		0x2e0
-#define EMC_ZQ_CAL			0x2ec
-#define EMC_XM2VTTGENPADCTRL		0x310
-#define EMC_XM2VTTGENPADCTRL2		0x314
-
-#define PMC_CTRL			0x0
-#define PMC_CTRL_SIDE_EFFECT_LP0 (1 << 14) /* enter LP0 when CPU pwr gated */
-
-#define PMC_PLLP_WB0_OVERRIDE		0xf8
-#define PMC_IO_DPD_REQ			0x1b8
-#define PMC_IO_DPD_STATUS		0x1bc
-
-#define CLK_RESET_CCLK_BURST		0x20
-#define CLK_RESET_CCLK_DIVIDER		0x24
-#define CLK_RESET_SCLK_BURST		0x28
-#define CLK_RESET_SCLK_DIVIDER		0x2c
-
-#define CLK_RESET_PLLC_BASE		0x80
-#define CLK_RESET_PLLC_MISC		0x8c
-#define CLK_RESET_PLLM_BASE		0x90
-#define CLK_RESET_PLLM_MISC		0x9c
-#define CLK_RESET_PLLP_BASE		0xa0
-#define CLK_RESET_PLLP_MISC		0xac
-#define CLK_RESET_PLLA_BASE		0xb0
-#define CLK_RESET_PLLA_MISC		0xbc
-#define CLK_RESET_PLLX_BASE		0xe0
-#define CLK_RESET_PLLX_MISC		0xe4
-#define CLK_RESET_PLLX_MISC3		0x518
-#define CLK_RESET_PLLX_MISC3_IDDQ	3
-#define CLK_RESET_PLLM_MISC_IDDQ	5
-#define CLK_RESET_PLLC_MISC_IDDQ	26
-
-#define CLK_RESET_CLK_SOURCE_MSELECT	0x3b4
-
-#define MSELECT_CLKM			(0x3 << 30)
-
-#define LOCK_DELAY 50 /* safety delay after lock is detected */
-
-#define TEGRA30_POWER_HOTPLUG_SHUTDOWN	(1 << 27) /* Hotplug shutdown */
-
-.macro emc_device_mask, rd, base
-	ldr	\rd, [\base, #EMC_ADR_CFG]
-	tst	\rd, #0x1
-	moveq	\rd, #(0x1 << 8)		@ just 1 device
-	movne	\rd, #(0x3 << 8)		@ 2 devices
-.endm
-
-.macro emc_timing_update, rd, base
-	mov	\rd, #1
-	str	\rd, [\base, #EMC_TIMING_CONTROL]
-1001:
-	ldr	\rd, [\base, #EMC_EMC_STATUS]
-	tst	\rd, #(0x1<<23)	@ wait EMC_STATUS_TIMING_UPDATE_STALLED is clear
-	bne	1001b
-.endm
-
-.macro pll_enable, rd, r_car_base, pll_base, pll_misc
-	ldr	\rd, [\r_car_base, #\pll_base]
-	tst	\rd, #(1 << 30)
-	orreq	\rd, \rd, #(1 << 30)
-	streq	\rd, [\r_car_base, #\pll_base]
-	/* Enable lock detector */
-	.if	\pll_misc
-	ldr	\rd, [\r_car_base, #\pll_misc]
-	bic	\rd, \rd, #(1 << 18)
-	str	\rd, [\r_car_base, #\pll_misc]
-	ldr	\rd, [\r_car_base, #\pll_misc]
-	ldr	\rd, [\r_car_base, #\pll_misc]
-	orr	\rd, \rd, #(1 << 18)
-	str	\rd, [\r_car_base, #\pll_misc]
-	.endif
-.endm
-
-.macro pll_locked, rd, r_car_base, pll_base
-1:
-	ldr	\rd, [\r_car_base, #\pll_base]
-	tst	\rd, #(1 << 27)
-	beq	1b
-.endm
-
-.macro pll_iddq_exit, rd, car, iddq, iddq_bit
-	ldr	\rd, [\car, #\iddq]
-	bic	\rd, \rd, #(1<<\iddq_bit)
-	str	\rd, [\car, #\iddq]
-.endm
-
-.macro pll_iddq_entry, rd, car, iddq, iddq_bit
-	ldr	\rd, [\car, #\iddq]
-	orr	\rd, \rd, #(1<<\iddq_bit)
-	str	\rd, [\car, #\iddq]
-.endm
-
-#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PM_SLEEP)
-/*
- * tegra30_hotplug_shutdown(void)
- *
- * Powergates the current CPU.
- * Should never return.
- */
-ENTRY(tegra30_hotplug_shutdown)
-	/* Powergate this CPU */
-	mov	r0, #TEGRA30_POWER_HOTPLUG_SHUTDOWN
-	bl	tegra30_cpu_shutdown
-	ret	lr			@ should never get here
-ENDPROC(tegra30_hotplug_shutdown)
-
-/*
- * tegra30_cpu_shutdown(unsigned long flags)
- *
- * Puts the current CPU in wait-for-event mode on the flow controller
- * and powergates it -- flags (in R0) indicate the request type.
- *
- * r10 = SoC ID
- * corrupts r0-r4, r10-r12
- */
-ENTRY(tegra30_cpu_shutdown)
-	cpu_id	r3
-	tegra_get_soc_id TEGRA_APB_MISC_VIRT, r10
-	cmp	r10, #TEGRA30
-	bne	_no_cpu0_chk	@ It's not Tegra30
-
-	cmp	r3, #0
-	reteq	lr		@ Must never be called for CPU 0
-_no_cpu0_chk:
-
-	ldr	r12, =TEGRA_FLOW_CTRL_VIRT
-	cpu_to_csr_reg r1, r3
-	add	r1, r1, r12	@ virtual CSR address for this CPU
-	cpu_to_halt_reg r2, r3
-	add	r2, r2, r12	@ virtual HALT_EVENTS address for this CPU
-
-	/*
-	 * Clear this CPU's "event" and "interrupt" flags and power gate
-	 * it when halting but not before it is in the "WFE" state.
-	 */
-	movw	r12, \
-		FLOW_CTRL_CSR_INTR_FLAG | FLOW_CTRL_CSR_EVENT_FLAG | \
-		FLOW_CTRL_CSR_ENABLE
-	cmp	r10, #TEGRA30
-	moveq	r4, #(1 << 4)			@ wfe bitmap
-	movne	r4, #(1 << 8)			@ wfi bitmap
- ARM(	orr	r12, r12, r4, lsl r3	)
- THUMB(	lsl	r4, r4, r3		)
- THUMB(	orr	r12, r12, r4		)
-	str	r12, [r1]
-
-	/* Halt this CPU. */
-	mov	r3, #0x400
-delay_1:
-	subs	r3, r3, #1			@ delay as a part of wfe war.
-	bge	delay_1;
-	cpsid	a				@ disable imprecise aborts.
-	ldr	r3, [r1]			@ read CSR
-	str	r3, [r1]			@ clear CSR
-
-	tst	r0, #TEGRA30_POWER_HOTPLUG_SHUTDOWN
-	beq	flow_ctrl_setting_for_lp2
-
-	/* flow controller set up for hotplug */
-	mov	r3, #FLOW_CTRL_WAITEVENT		@ For hotplug
-	b	flow_ctrl_done
-flow_ctrl_setting_for_lp2:
-	/* flow controller set up for LP2 */
-	cmp	r10, #TEGRA30
-	moveq   r3, #FLOW_CTRL_WAIT_FOR_INTERRUPT	@ For LP2
-	movne	r3, #FLOW_CTRL_WAITEVENT
-	orrne	r3, r3, #FLOW_CTRL_HALT_GIC_IRQ
-	orrne	r3, r3, #FLOW_CTRL_HALT_GIC_FIQ
-flow_ctrl_done:
-	cmp	r10, #TEGRA30
-	str	r3, [r2]
-	ldr	r0, [r2]
-	b	wfe_war
-
-__cpu_reset_again:
-	dsb
-	.align 5
-	wfeeq					@ CPU should be power gated here
-	wfine
-wfe_war:
-	b	__cpu_reset_again
-
-	/*
-	 * 38 nop's, which fills rest of wfe cache line and
-	 * 4 more cachelines with nop
-	 */
-	.rept 38
-	nop
-	.endr
-	b	.				@ should never get here
-
-ENDPROC(tegra30_cpu_shutdown)
-#endif
-
-#ifdef CONFIG_PM_SLEEP
-/*
- * tegra30_sleep_core_finish(unsigned long v2p)
- *
- * Enters suspend in LP0 or LP1 by turning off the MMU and jumping to
- * tegra30_tear_down_core in IRAM
- */
-ENTRY(tegra30_sleep_core_finish)
-	mov	r4, r0
-	/* Flush, disable the L1 data cache and exit SMP */
-	mov	r0, #TEGRA_FLUSH_CACHE_ALL
-	bl	tegra_disable_clean_inv_dcache
-	mov	r0, r4
-
-	/*
-	 * Preload all the address literals that are needed for the
-	 * CPU power-gating process, to avoid loading from SDRAM which
-	 * are not supported once SDRAM is put into self-refresh.
-	 * LP0 / LP1 use physical address, since the MMU needs to be
-	 * disabled before putting SDRAM into self-refresh to avoid
-	 * memory access due to page table walks.
-	 */
-	mov32	r4, TEGRA_PMC_BASE
-	mov32	r5, TEGRA_CLK_RESET_BASE
-	mov32	r6, TEGRA_FLOW_CTRL_BASE
-	mov32	r7, TEGRA_TMRUS_BASE
-
-	mov32	r3, tegra_shut_off_mmu
-	add	r3, r3, r0
-
-	mov32	r0, tegra30_tear_down_core
-	mov32	r1, tegra30_iram_start
-	sub	r0, r0, r1
-	mov32	r1, TEGRA_IRAM_LPx_RESUME_AREA
-	add	r0, r0, r1
-
-	ret	r3
-ENDPROC(tegra30_sleep_core_finish)
-
-/*
- * tegra30_sleep_cpu_secondary_finish(unsigned long v2p)
- *
- * Enters LP2 on secondary CPU by exiting coherency and powergating the CPU.
- */
-ENTRY(tegra30_sleep_cpu_secondary_finish)
-	mov	r7, lr
-
-	/* Flush and disable the L1 data cache */
-	mov 	r0, #TEGRA_FLUSH_CACHE_LOUIS
-	bl	tegra_disable_clean_inv_dcache
-
-	/* Powergate this CPU. */
-	mov	r0, #0                          @ power mode flags (!hotplug)
-	bl	tegra30_cpu_shutdown
-	mov	r0, #1                          @ never return here
-	ret	r7
-ENDPROC(tegra30_sleep_cpu_secondary_finish)
-
-/*
- * tegra30_tear_down_cpu
- *
- * Switches the CPU to enter sleep.
- */
-ENTRY(tegra30_tear_down_cpu)
-	mov32	r6, TEGRA_FLOW_CTRL_BASE
-
-	b	tegra30_enter_sleep
-ENDPROC(tegra30_tear_down_cpu)
-
-/* START OF ROUTINES COPIED TO IRAM */
-	.align L1_CACHE_SHIFT
-	.globl tegra30_iram_start
-tegra30_iram_start:
-
-/*
- * tegra30_lp1_reset
- *
- * reset vector for LP1 restore; copied into IRAM during suspend.
- * Brings the system back up to a safe staring point (SDRAM out of
- * self-refresh, PLLC, PLLM and PLLP reenabled, CPU running on PLLX,
- * system clock running on the same PLL that it suspended at), and
- * jumps to tegra_resume to restore virtual addressing.
- * The physical address of tegra_resume expected to be stored in
- * PMC_SCRATCH41.
- *
- * NOTE: THIS *MUST* BE RELOCATED TO TEGRA_IRAM_LPx_RESUME_AREA.
- */
-ENTRY(tegra30_lp1_reset)
-	/*
-	 * The CPU and system bus are running at 32KHz and executing from
-	 * IRAM when this code is executed; immediately switch to CLKM and
-	 * enable PLLP, PLLM, PLLC, PLLA and PLLX.
-	 */
-	mov32	r0, TEGRA_CLK_RESET_BASE
-
-	mov	r1, #(1 << 28)
-	str	r1, [r0, #CLK_RESET_SCLK_BURST]
-	str	r1, [r0, #CLK_RESET_CCLK_BURST]
-	mov	r1, #0
-	str	r1, [r0, #CLK_RESET_CCLK_DIVIDER]
-	str	r1, [r0, #CLK_RESET_SCLK_DIVIDER]
-
-	tegra_get_soc_id TEGRA_APB_MISC_BASE, r10
-	cmp	r10, #TEGRA30
-	beq	_no_pll_iddq_exit
-
-	pll_iddq_exit r1, r0, CLK_RESET_PLLM_MISC, CLK_RESET_PLLM_MISC_IDDQ
-	pll_iddq_exit r1, r0, CLK_RESET_PLLC_MISC, CLK_RESET_PLLC_MISC_IDDQ
-	pll_iddq_exit r1, r0, CLK_RESET_PLLX_MISC3, CLK_RESET_PLLX_MISC3_IDDQ
-
-	mov32	r7, TEGRA_TMRUS_BASE
-	ldr	r1, [r7]
-	add	r1, r1, #2
-	wait_until r1, r7, r3
-
-	/* enable PLLM via PMC */
-	mov32	r2, TEGRA_PMC_BASE
-	ldr	r1, [r2, #PMC_PLLP_WB0_OVERRIDE]
-	orr	r1, r1, #(1 << 12)
-	str	r1, [r2, #PMC_PLLP_WB0_OVERRIDE]
-
-	pll_enable r1, r0, CLK_RESET_PLLM_BASE, 0
-	pll_enable r1, r0, CLK_RESET_PLLC_BASE, 0
-	pll_enable r1, r0, CLK_RESET_PLLX_BASE, 0
-
-	b	_pll_m_c_x_done
-
-_no_pll_iddq_exit:
-	/* enable PLLM via PMC */
-	mov32	r2, TEGRA_PMC_BASE
-	ldr	r1, [r2, #PMC_PLLP_WB0_OVERRIDE]
-	orr	r1, r1, #(1 << 12)
-	str	r1, [r2, #PMC_PLLP_WB0_OVERRIDE]
-
-	pll_enable r1, r0, CLK_RESET_PLLM_BASE, CLK_RESET_PLLM_MISC
-	pll_enable r1, r0, CLK_RESET_PLLC_BASE, CLK_RESET_PLLC_MISC
-	pll_enable r1, r0, CLK_RESET_PLLX_BASE, CLK_RESET_PLLX_MISC
-
-_pll_m_c_x_done:
-	pll_enable r1, r0, CLK_RESET_PLLP_BASE, CLK_RESET_PLLP_MISC
-	pll_enable r1, r0, CLK_RESET_PLLA_BASE, CLK_RESET_PLLA_MISC
-
-	pll_locked r1, r0, CLK_RESET_PLLM_BASE
-	pll_locked r1, r0, CLK_RESET_PLLP_BASE
-	pll_locked r1, r0, CLK_RESET_PLLA_BASE
-	pll_locked r1, r0, CLK_RESET_PLLC_BASE
-	pll_locked r1, r0, CLK_RESET_PLLX_BASE
-
-	tegra_get_soc_id TEGRA_APB_MISC_BASE, r1
-	cmp	r1, #TEGRA30
-	beq	1f
-	ldr	r1, [r0, #CLK_RESET_PLLP_BASE]
-	bic	r1, r1, #(1<<31)	@ disable PllP bypass
-	str	r1, [r0, #CLK_RESET_PLLP_BASE]
-1:
-
-	mov32	r7, TEGRA_TMRUS_BASE
-	ldr	r1, [r7]
-	add	r1, r1, #LOCK_DELAY
-	wait_until r1, r7, r3
-
-	adr	r5, tegra_sdram_pad_save
-
-	ldr	r4, [r5, #0x18]		@ restore CLK_SOURCE_MSELECT
-	str	r4, [r0, #CLK_RESET_CLK_SOURCE_MSELECT]
-
-	ldr	r4, [r5, #0x1C]		@ restore SCLK_BURST
-	str	r4, [r0, #CLK_RESET_SCLK_BURST]
-
-	cmp	r10, #TEGRA30
-	movweq	r4, #:lower16:((1 << 28) | (0x8))	@ burst policy is PLLX
-	movteq	r4, #:upper16:((1 << 28) | (0x8))
-	movwne	r4, #:lower16:((1 << 28) | (0xe))
-	movtne	r4, #:upper16:((1 << 28) | (0xe))
-	str	r4, [r0, #CLK_RESET_CCLK_BURST]
-
-	/* Restore pad power state to normal */
-	ldr	r1, [r5, #0x14]		@ PMC_IO_DPD_STATUS
-	mvn	r1, r1
-	bic	r1, r1, #(1 << 31)
-	orr	r1, r1, #(1 << 30)
-	str	r1, [r2, #PMC_IO_DPD_REQ]	@ DPD_OFF
-
-	cmp	r10, #TEGRA30
-	movweq	r0, #:lower16:TEGRA_EMC_BASE	@ r0 reserved for emc base
-	movteq	r0, #:upper16:TEGRA_EMC_BASE
-	cmp	r10, #TEGRA114
-	movweq	r0, #:lower16:TEGRA_EMC0_BASE
-	movteq	r0, #:upper16:TEGRA_EMC0_BASE
-	cmp	r10, #TEGRA124
-	movweq	r0, #:lower16:TEGRA124_EMC_BASE
-	movteq	r0, #:upper16:TEGRA124_EMC_BASE
-
-exit_self_refresh:
-	ldr	r1, [r5, #0xC]		@ restore EMC_XM2VTTGENPADCTRL
-	str	r1, [r0, #EMC_XM2VTTGENPADCTRL]
-	ldr	r1, [r5, #0x10]		@ restore EMC_XM2VTTGENPADCTRL2
-	str	r1, [r0, #EMC_XM2VTTGENPADCTRL2]
-	ldr	r1, [r5, #0x8]		@ restore EMC_AUTO_CAL_INTERVAL
-	str	r1, [r0, #EMC_AUTO_CAL_INTERVAL]
-
-	/* Relock DLL */
-	ldr	r1, [r0, #EMC_CFG_DIG_DLL]
-	orr	r1, r1, #(1 << 30)	@ set DLL_RESET
-	str	r1, [r0, #EMC_CFG_DIG_DLL]
-
-	emc_timing_update r1, r0
-
-	cmp	r10, #TEGRA114
-	movweq	r1, #:lower16:TEGRA_EMC1_BASE
-	movteq	r1, #:upper16:TEGRA_EMC1_BASE
-	cmpeq	r0, r1
-
-	ldr	r1, [r0, #EMC_AUTO_CAL_CONFIG]
-	orr	r1, r1, #(1 << 31)	@ set AUTO_CAL_ACTIVE
-	orreq	r1, r1, #(1 << 27)	@ set slave mode for channel 1
-	str	r1, [r0, #EMC_AUTO_CAL_CONFIG]
-
-emc_wait_auto_cal_onetime:
-	ldr	r1, [r0, #EMC_AUTO_CAL_STATUS]
-	tst	r1, #(1 << 31)		@ wait until AUTO_CAL_ACTIVE is cleared
-	bne	emc_wait_auto_cal_onetime
-
-	ldr	r1, [r0, #EMC_CFG]
-	bic	r1, r1, #(1 << 31)	@ disable DRAM_CLK_STOP_PD
-	str	r1, [r0, #EMC_CFG]
-
-	mov	r1, #0
-	str	r1, [r0, #EMC_SELF_REF]	@ take DRAM out of self refresh
-	mov	r1, #1
-	cmp	r10, #TEGRA30
-	streq	r1, [r0, #EMC_NOP]
-	streq	r1, [r0, #EMC_NOP]
-
-	emc_device_mask r1, r0
-
-exit_selfrefresh_loop:
-	ldr	r2, [r0, #EMC_EMC_STATUS]
-	ands	r2, r2, r1
-	bne	exit_selfrefresh_loop
-
-	lsr	r1, r1, #8		@ devSel, bit0:dev0, bit1:dev1
-
-	mov32	r7, TEGRA_TMRUS_BASE
-	ldr	r2, [r0, #EMC_FBIO_CFG5]
-
-	and	r2, r2,	#3		@ check DRAM_TYPE
-	cmp	r2, #2
-	beq	emc_lpddr2
-
-	/* Issue a ZQ_CAL for dev0 - DDR3 */
-	mov32	r2, 0x80000011		@ DEV_SELECTION=2, LENGTH=LONG, CMD=1
-	str	r2, [r0, #EMC_ZQ_CAL]
-	ldr	r2, [r7]
-	add	r2, r2, #10
-	wait_until r2, r7, r3
-
-	tst	r1, #2
-	beq	zcal_done
-
-	/* Issue a ZQ_CAL for dev1 - DDR3 */
-	mov32	r2, 0x40000011		@ DEV_SELECTION=1, LENGTH=LONG, CMD=1
-	str	r2, [r0, #EMC_ZQ_CAL]
-	ldr	r2, [r7]
-	add	r2, r2, #10
-	wait_until r2, r7, r3
-	b	zcal_done
-
-emc_lpddr2:
-	/* Issue a ZQ_CAL for dev0 - LPDDR2 */
-	mov32	r2, 0x800A00AB		@ DEV_SELECTION=2, MA=10, OP=0xAB
-	str	r2, [r0, #EMC_MRW]
-	ldr	r2, [r7]
-	add	r2, r2, #1
-	wait_until r2, r7, r3
-
-	tst	r1, #2
-	beq	zcal_done
-
-	/* Issue a ZQ_CAL for dev0 - LPDDR2 */
-	mov32	r2, 0x400A00AB		@ DEV_SELECTION=1, MA=10, OP=0xAB
-	str	r2, [r0, #EMC_MRW]
-	ldr	r2, [r7]
-	add	r2, r2, #1
-	wait_until r2, r7, r3
-
-zcal_done:
-	mov	r1, #0			@ unstall all transactions
-	str	r1, [r0, #EMC_REQ_CTRL]
-	ldr	r1, [r5, #0x4]		@ restore EMC_ZCAL_INTERVAL
-	str	r1, [r0, #EMC_ZCAL_INTERVAL]
-	ldr	r1, [r5, #0x0]		@ restore EMC_CFG
-	str	r1, [r0, #EMC_CFG]
-
-	emc_timing_update r1, r0
-
-	/* Tegra114 had dual EMC channel, now config the other one */
-	cmp	r10, #TEGRA114
-	bne	__no_dual_emc_chanl
-	mov32	r1, TEGRA_EMC1_BASE
-	cmp	r0, r1
-	movne	r0, r1
-	addne	r5, r5, #0x20
-	bne	exit_self_refresh
-__no_dual_emc_chanl:
-
-	mov32	r0, TEGRA_PMC_BASE
-	ldr	r0, [r0, #PMC_SCRATCH41]
-	ret	r0			@ jump to tegra_resume
-ENDPROC(tegra30_lp1_reset)
-
-	.align	L1_CACHE_SHIFT
-tegra30_sdram_pad_address:
-	.word	TEGRA_EMC_BASE + EMC_CFG				@0x0
-	.word	TEGRA_EMC_BASE + EMC_ZCAL_INTERVAL			@0x4
-	.word	TEGRA_EMC_BASE + EMC_AUTO_CAL_INTERVAL			@0x8
-	.word	TEGRA_EMC_BASE + EMC_XM2VTTGENPADCTRL			@0xc
-	.word	TEGRA_EMC_BASE + EMC_XM2VTTGENPADCTRL2			@0x10
-	.word	TEGRA_PMC_BASE + PMC_IO_DPD_STATUS			@0x14
-	.word	TEGRA_CLK_RESET_BASE + CLK_RESET_CLK_SOURCE_MSELECT	@0x18
-	.word	TEGRA_CLK_RESET_BASE + CLK_RESET_SCLK_BURST		@0x1c
-tegra30_sdram_pad_address_end:
-
-tegra114_sdram_pad_address:
-	.word	TEGRA_EMC0_BASE + EMC_CFG				@0x0
-	.word	TEGRA_EMC0_BASE + EMC_ZCAL_INTERVAL			@0x4
-	.word	TEGRA_EMC0_BASE + EMC_AUTO_CAL_INTERVAL			@0x8
-	.word	TEGRA_EMC0_BASE + EMC_XM2VTTGENPADCTRL			@0xc
-	.word	TEGRA_EMC0_BASE + EMC_XM2VTTGENPADCTRL2			@0x10
-	.word	TEGRA_PMC_BASE + PMC_IO_DPD_STATUS			@0x14
-	.word	TEGRA_CLK_RESET_BASE + CLK_RESET_CLK_SOURCE_MSELECT	@0x18
-	.word	TEGRA_CLK_RESET_BASE + CLK_RESET_SCLK_BURST		@0x1c
-	.word	TEGRA_EMC1_BASE + EMC_CFG				@0x20
-	.word	TEGRA_EMC1_BASE + EMC_ZCAL_INTERVAL			@0x24
-	.word	TEGRA_EMC1_BASE + EMC_AUTO_CAL_INTERVAL			@0x28
-	.word	TEGRA_EMC1_BASE + EMC_XM2VTTGENPADCTRL			@0x2c
-	.word	TEGRA_EMC1_BASE + EMC_XM2VTTGENPADCTRL2			@0x30
-tegra114_sdram_pad_adress_end:
-
-tegra124_sdram_pad_address:
-	.word	TEGRA124_EMC_BASE + EMC_CFG				@0x0
-	.word	TEGRA124_EMC_BASE + EMC_ZCAL_INTERVAL			@0x4
-	.word	TEGRA124_EMC_BASE + EMC_AUTO_CAL_INTERVAL		@0x8
-	.word	TEGRA124_EMC_BASE + EMC_XM2VTTGENPADCTRL		@0xc
-	.word	TEGRA124_EMC_BASE + EMC_XM2VTTGENPADCTRL2		@0x10
-	.word	TEGRA_PMC_BASE + PMC_IO_DPD_STATUS			@0x14
-	.word	TEGRA_CLK_RESET_BASE + CLK_RESET_CLK_SOURCE_MSELECT	@0x18
-	.word	TEGRA_CLK_RESET_BASE + CLK_RESET_SCLK_BURST		@0x1c
-tegra124_sdram_pad_address_end:
-
-tegra30_sdram_pad_size:
-	.word	tegra30_sdram_pad_address_end - tegra30_sdram_pad_address
-
-tegra114_sdram_pad_size:
-	.word	tegra114_sdram_pad_adress_end - tegra114_sdram_pad_address
-
-	.type	tegra_sdram_pad_save, %object
-tegra_sdram_pad_save:
-	.rept (tegra114_sdram_pad_adress_end - tegra114_sdram_pad_address) / 4
-	.long	0
-	.endr
-
-/*
- * tegra30_tear_down_core
- *
- * copied into and executed from IRAM
- * puts memory in self-refresh for LP0 and LP1
- */
-tegra30_tear_down_core:
-	bl	tegra30_sdram_self_refresh
-	bl	tegra30_switch_cpu_to_clk32k
-	b	tegra30_enter_sleep
-
-/*
- * tegra30_switch_cpu_to_clk32k
- *
- * In LP0 and LP1 all PLLs will be turned off. Switching the CPU and System CLK
- * to the 32KHz clock.
- * r4 = TEGRA_PMC_BASE
- * r5 = TEGRA_CLK_RESET_BASE
- * r6 = TEGRA_FLOW_CTRL_BASE
- * r7 = TEGRA_TMRUS_BASE
- * r10= SoC ID
- */
-tegra30_switch_cpu_to_clk32k:
-	/*
-	 * start by jumping to CLKM to safely disable PLLs, then jump to
-	 * CLKS.
-	 */
-	mov	r0, #(1 << 28)
-	str	r0, [r5, #CLK_RESET_SCLK_BURST]
-	/* 2uS delay delay between changing SCLK and CCLK */
-	ldr	r1, [r7]
-	add	r1, r1, #2
-	wait_until r1, r7, r9
-	str	r0, [r5, #CLK_RESET_CCLK_BURST]
-	mov	r0, #0
-	str	r0, [r5, #CLK_RESET_CCLK_DIVIDER]
-	str	r0, [r5, #CLK_RESET_SCLK_DIVIDER]
-
-	/* switch the clock source of mselect to be CLK_M */
-	ldr	r0, [r5, #CLK_RESET_CLK_SOURCE_MSELECT]
-	orr	r0, r0, #MSELECT_CLKM
-	str	r0, [r5, #CLK_RESET_CLK_SOURCE_MSELECT]
-
-	/* 2uS delay delay between changing SCLK and disabling PLLs */
-	ldr	r1, [r7]
-	add	r1, r1, #2
-	wait_until r1, r7, r9
-
-	/* disable PLLM via PMC in LP1 */
-	ldr	r0, [r4, #PMC_PLLP_WB0_OVERRIDE]
-	bic	r0, r0, #(1 << 12)
-	str	r0, [r4, #PMC_PLLP_WB0_OVERRIDE]
-
-	/* disable PLLP, PLLA, PLLC and PLLX */
-	tegra_get_soc_id TEGRA_APB_MISC_BASE, r1
-	cmp	r1, #TEGRA30
-	ldr	r0, [r5, #CLK_RESET_PLLP_BASE]
-	orrne	r0, r0, #(1 << 31)	@ enable PllP bypass on fast cluster
-	bic	r0, r0, #(1 << 30)
-	str	r0, [r5, #CLK_RESET_PLLP_BASE]
-	ldr	r0, [r5, #CLK_RESET_PLLA_BASE]
-	bic	r0, r0, #(1 << 30)
-	str	r0, [r5, #CLK_RESET_PLLA_BASE]
-	ldr	r0, [r5, #CLK_RESET_PLLC_BASE]
-	bic	r0, r0, #(1 << 30)
-	str	r0, [r5, #CLK_RESET_PLLC_BASE]
-	ldr	r0, [r5, #CLK_RESET_PLLX_BASE]
-	bic	r0, r0, #(1 << 30)
-	str	r0, [r5, #CLK_RESET_PLLX_BASE]
-
-	cmp	r10, #TEGRA30
-	beq	_no_pll_in_iddq
-	pll_iddq_entry r1, r5, CLK_RESET_PLLX_MISC3, CLK_RESET_PLLX_MISC3_IDDQ
-_no_pll_in_iddq:
-
-	/* switch to CLKS */
-	mov	r0, #0	/* brust policy = 32KHz */
-	str	r0, [r5, #CLK_RESET_SCLK_BURST]
-
-	ret	lr
-
-/*
- * tegra30_enter_sleep
- *
- * uses flow controller to enter sleep state
- * executes from IRAM with SDRAM in selfrefresh when target state is LP0 or LP1
- * executes from SDRAM with target state is LP2
- * r6 = TEGRA_FLOW_CTRL_BASE
- */
-tegra30_enter_sleep:
-	cpu_id	r1
-
-	cpu_to_csr_reg	r2, r1
-	ldr	r0, [r6, r2]
-	orr	r0, r0, #FLOW_CTRL_CSR_INTR_FLAG | FLOW_CTRL_CSR_EVENT_FLAG
-	orr	r0, r0, #FLOW_CTRL_CSR_ENABLE
-	str	r0, [r6, r2]
-
-	tegra_get_soc_id TEGRA_APB_MISC_BASE, r10
-	cmp	r10, #TEGRA30
-	mov	r0, #FLOW_CTRL_WAIT_FOR_INTERRUPT
-	orreq	r0, r0, #FLOW_CTRL_HALT_CPU_IRQ | FLOW_CTRL_HALT_CPU_FIQ
-	orrne   r0, r0, #FLOW_CTRL_HALT_LIC_IRQ | FLOW_CTRL_HALT_LIC_FIQ
-
-	cpu_to_halt_reg r2, r1
-	str	r0, [r6, r2]
-	dsb
-	ldr	r0, [r6, r2] /* memory barrier */
-
-halted:
-	isb
-	dsb
-	wfi	/* CPU should be power gated here */
-
-	/* !!!FIXME!!! Implement halt failure handler */
-	b	halted
-
-/*
- * tegra30_sdram_self_refresh
- *
- * called with MMU off and caches disabled
- * must be executed from IRAM
- * r4 = TEGRA_PMC_BASE
- * r5 = TEGRA_CLK_RESET_BASE
- * r6 = TEGRA_FLOW_CTRL_BASE
- * r7 = TEGRA_TMRUS_BASE
- * r10= SoC ID
- */
-tegra30_sdram_self_refresh:
-
-	adr	r8, tegra_sdram_pad_save
-	tegra_get_soc_id TEGRA_APB_MISC_BASE, r10
-	cmp	r10, #TEGRA30
-	adreq	r2, tegra30_sdram_pad_address
-	ldreq	r3, tegra30_sdram_pad_size
-	cmp	r10, #TEGRA114
-	adreq	r2, tegra114_sdram_pad_address
-	ldreq	r3, tegra114_sdram_pad_size
-	cmp	r10, #TEGRA124
-	adreq	r2, tegra124_sdram_pad_address
-	ldreq	r3, tegra30_sdram_pad_size
-
-	mov	r9, #0
-
-padsave:
-	ldr	r0, [r2, r9]		@ r0 is the addr in the pad_address
-
-	ldr	r1, [r0]
-	str	r1, [r8, r9]		@ save the content of the addr
-
-	add	r9, r9, #4
-	cmp	r3, r9
-	bne	padsave
-padsave_done:
-
-	dsb
-
-	cmp	r10, #TEGRA30
-	ldreq	r0, =TEGRA_EMC_BASE	@ r0 reserved for emc base addr
-	cmp	r10, #TEGRA114
-	ldreq	r0, =TEGRA_EMC0_BASE
-	cmp	r10, #TEGRA124
-	ldreq	r0, =TEGRA124_EMC_BASE
-
-enter_self_refresh:
-	cmp	r10, #TEGRA30
-	mov	r1, #0
-	str	r1, [r0, #EMC_ZCAL_INTERVAL]
-	str	r1, [r0, #EMC_AUTO_CAL_INTERVAL]
-	ldr	r1, [r0, #EMC_CFG]
-	bic	r1, r1, #(1 << 28)
-	bicne	r1, r1, #(1 << 29)
-	str	r1, [r0, #EMC_CFG]	@ disable DYN_SELF_REF
-
-	emc_timing_update r1, r0
-
-	ldr	r1, [r7]
-	add	r1, r1, #5
-	wait_until r1, r7, r2
-
-emc_wait_auto_cal:
-	ldr	r1, [r0, #EMC_AUTO_CAL_STATUS]
-	tst	r1, #(1 << 31)		@ wait until AUTO_CAL_ACTIVE is cleared
-	bne	emc_wait_auto_cal
-
-	mov	r1, #3
-	str	r1, [r0, #EMC_REQ_CTRL]	@ stall incoming DRAM requests
-
-emcidle:
-	ldr	r1, [r0, #EMC_EMC_STATUS]
-	tst	r1, #4
-	beq	emcidle
-
-	mov	r1, #1
-	str	r1, [r0, #EMC_SELF_REF]
-
-	emc_device_mask r1, r0
-
-emcself:
-	ldr	r2, [r0, #EMC_EMC_STATUS]
-	and	r2, r2, r1
-	cmp	r2, r1
-	bne	emcself			@ loop until DDR in self-refresh
-
-	/* Put VTTGEN in the lowest power mode */
-	ldr	r1, [r0, #EMC_XM2VTTGENPADCTRL]
-	mov32	r2, 0xF8F8FFFF	@ clear XM2VTTGEN_DRVUP and XM2VTTGEN_DRVDN
-	and	r1, r1, r2
-	str	r1, [r0, #EMC_XM2VTTGENPADCTRL]
-	ldr	r1, [r0, #EMC_XM2VTTGENPADCTRL2]
-	cmp	r10, #TEGRA30
-	orreq	r1, r1, #7		@ set E_NO_VTTGEN
-	orrne	r1, r1, #0x3f
-	str	r1, [r0, #EMC_XM2VTTGENPADCTRL2]
-
-	emc_timing_update r1, r0
-
-	/* Tegra114 had dual EMC channel, now config the other one */
-	cmp	r10, #TEGRA114
-	bne	no_dual_emc_chanl
-	mov32	r1, TEGRA_EMC1_BASE
-	cmp	r0, r1
-	movne	r0, r1
-	bne	enter_self_refresh
-no_dual_emc_chanl:
-
-	ldr	r1, [r4, #PMC_CTRL]
-	tst	r1, #PMC_CTRL_SIDE_EFFECT_LP0
-	bne	pmc_io_dpd_skip
-	/*
-	 * Put DDR_DATA, DISC_ADDR_CMD, DDR_ADDR_CMD, POP_ADDR_CMD, POP_CLK
-	 * and COMP in the lowest power mode when LP1.
-	 */
-	mov32	r1, 0x8EC00000
-	str	r1, [r4, #PMC_IO_DPD_REQ]
-pmc_io_dpd_skip:
-
-	dsb
-
-	ret	lr
-
-	.ltorg
-/* dummy symbol for end of IRAM */
-	.align L1_CACHE_SHIFT
-	.global tegra30_iram_end
-tegra30_iram_end:
-	b	.
-#endif
diff --git a/arch/arm/mach-tegra/sleep.S b/arch/arm/mach-tegra/sleep.S
deleted file mode 100644
index 8f88944831c5353ef7e8206664f914a90248ca59..0000000000000000000000000000000000000000
--- a/arch/arm/mach-tegra/sleep.S
+++ /dev/null
@@ -1,150 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * arch/arm/mach-tegra/sleep.S
- *
- * Copyright (c) 2010-2011, NVIDIA Corporation.
- * Copyright (c) 2011, Google, Inc.
- *
- * Author: Colin Cross <ccross@android.com>
- *         Gary King <gking@nvidia.com>
- */
-
-#include <linux/linkage.h>
-
-#include <asm/assembler.h>
-#include <asm/cache.h>
-#include <asm/cp15.h>
-#include <asm/hardware/cache-l2x0.h>
-
-#include "iomap.h"
-#include "sleep.h"
-
-#define CLK_RESET_CCLK_BURST	0x20
-#define CLK_RESET_CCLK_DIVIDER  0x24
-
-#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PM_SLEEP)
-/*
- * tegra_disable_clean_inv_dcache
- *
- * disable, clean & invalidate the D-cache
- *
- * Corrupted registers: r1-r3, r6, r8, r9-r11
- */
-ENTRY(tegra_disable_clean_inv_dcache)
-	stmfd	sp!, {r0, r4-r5, r7, r9-r11, lr}
-	dmb					@ ensure ordering
-
-	/* Disable the D-cache */
-	mrc	p15, 0, r2, c1, c0, 0
-	tst	r2, #CR_C			@ see tegra_sleep_cpu()
-	bic	r2, r2, #CR_C
-	mcrne	p15, 0, r2, c1, c0, 0
-	isb
-
-	/* Flush the D-cache */
-	cmp	r0, #TEGRA_FLUSH_CACHE_ALL
-	blne	v7_flush_dcache_louis
-	bleq	v7_flush_dcache_all
-
-	/* Trun off coherency */
-	exit_smp r4, r5
-
-	ldmfd	sp!, {r0, r4-r5, r7, r9-r11, pc}
-ENDPROC(tegra_disable_clean_inv_dcache)
-#endif
-
-#ifdef CONFIG_PM_SLEEP
-/*
- * tegra_init_l2_for_a15
- *
- * set up the correct L2 cache data RAM latency
- */
-ENTRY(tegra_init_l2_for_a15)
-	mrc	p15, 0, r0, c0, c0, 5
-	ubfx	r0, r0, #8, #4
-	tst	r0, #1				@ only need for cluster 0
-	bne	_exit_init_l2_a15
-
-	mrc	p15, 0x1, r0, c9, c0, 2
-	and	r0, r0, #7
-	cmp	r0, #2
-	bicne	r0, r0, #7
-	orrne	r0, r0, #2
-	mcrne	p15, 0x1, r0, c9, c0, 2
-_exit_init_l2_a15:
-
-	ret	lr
-ENDPROC(tegra_init_l2_for_a15)
-
-/*
- * tegra_sleep_cpu_finish(unsigned long v2p)
- *
- * enters suspend in LP2 by turning off the mmu and jumping to
- * tegra?_tear_down_cpu
- */
-ENTRY(tegra_sleep_cpu_finish)
-	mov	r4, r0
-	/* Flush and disable the L1 data cache */
-	mov	r0, #TEGRA_FLUSH_CACHE_ALL
-	bl	tegra_disable_clean_inv_dcache
-
-	mov	r0, r4
-	mov32	r6, tegra_tear_down_cpu
-	ldr	r1, [r6]
-	add	r1, r1, r0
-
-	mov32	r3, tegra_shut_off_mmu
-	add	r3, r3, r0
-	mov	r0, r1
-
-	ret	r3
-ENDPROC(tegra_sleep_cpu_finish)
-
-/*
- * tegra_shut_off_mmu
- *
- * r0 = physical address to jump to with mmu off
- *
- * called with VA=PA mapping
- * turns off MMU, icache, dcache and branch prediction
- */
-	.align	L1_CACHE_SHIFT
-	.pushsection	.idmap.text, "ax"
-ENTRY(tegra_shut_off_mmu)
-	mrc	p15, 0, r3, c1, c0, 0
-	movw	r2, #CR_I | CR_Z | CR_C | CR_M
-	bic	r3, r3, r2
-	dsb
-	mcr	p15, 0, r3, c1, c0, 0
-	isb
-#ifdef CONFIG_CACHE_L2X0
-	/* Disable L2 cache */
-	check_cpu_part_num 0xc09, r9, r10
-	retne	r0
-
-	mov32	r2, TEGRA_ARM_PERIF_BASE + 0x3000
-	ldr	r3, [r2, #L2X0_CTRL]
-	tst	r3, #L2X0_CTRL_EN		@ see tegra_sleep_cpu()
-	mov	r3, #0
-	strne	r3, [r2, #L2X0_CTRL]
-#endif
-	ret	r0
-ENDPROC(tegra_shut_off_mmu)
-	.popsection
-
-/*
- * tegra_switch_cpu_to_pllp
- *
- * In LP2 the normal cpu clock pllx will be turned off. Switch the CPU to pllp
- */
-ENTRY(tegra_switch_cpu_to_pllp)
-	/* in LP2 idle (SDRAM active), set the CPU burst policy to PLLP */
-	mov32	r5, TEGRA_CLK_RESET_BASE
-	mov	r0, #(2 << 28)			@ burst policy = run mode
-	orr	r0, r0, #(4 << 4)		@ use PLLP in run mode burst
-	str	r0, [r5, #CLK_RESET_CCLK_BURST]
-	mov	r0, #0
-	str	r0, [r5, #CLK_RESET_CCLK_DIVIDER]
-	ret	lr
-ENDPROC(tegra_switch_cpu_to_pllp)
-#endif
diff --git a/arch/arm/mach-vexpress/dcscb_setup.S b/arch/arm/mach-vexpress/dcscb_setup.S
deleted file mode 100644
index 0614b2ebd354c0a5c73f2cc53adac29f78c6669f..0000000000000000000000000000000000000000
--- a/arch/arm/mach-vexpress/dcscb_setup.S
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/include/asm/dcscb_setup.S
- *
- * Created by:  Dave Martin, 2012-06-22
- * Copyright:   (C) 2012-2013  Linaro Limited
- */
-
-#include <linux/linkage.h>
-
-
-ENTRY(dcscb_power_up_setup)
-
-	cmp	r0, #0			@ check affinity level
-	beq	2f
-
-/*
- * Enable cluster-level coherency, in preparation for turning on the MMU.
- * The ACTLR SMP bit does not need to be set here, because cpu_resume()
- * already restores that.
- *
- * A15/A7 may not require explicit L2 invalidation on reset, dependent
- * on hardware integration decisions.
- * For now, this code assumes that L2 is either already invalidated,
- * or invalidation is not required.
- */
-
-	b	cci_enable_port_for_self
-
-2:	@ Implementation-specific local CPU setup operations should go here,
-	@ if any.  In this case, there is nothing to do.
-
-	bx	lr
-
-ENDPROC(dcscb_power_up_setup)
diff --git a/arch/arm/mach-zx/headsmp.S b/arch/arm/mach-zx/headsmp.S
deleted file mode 100644
index 0846859b05739bbe8ad49a717d23c62ab3abd8ef..0000000000000000000000000000000000000000
--- a/arch/arm/mach-zx/headsmp.S
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2014 Linaro Ltd.
- * Copyright (C) 2014 ZTE Corporation.
- */
-
-#include <linux/linkage.h>
-
-	.align 3
-	.arm
-
-/* It runs from physical address */
-ENTRY(zx_resume_jump)
-	adr	r1, zx_secondary_startup_pa
-	ldr	r0, [r1]
-	bx	r0
-ENDPROC(zx_resume_jump)
-
-ENTRY(zx_secondary_startup_pa)
-	.word	zx_secondary_startup_pa
-
-ENTRY(zx_suspend_iram_sz)
-        .word	. - zx_resume_jump
-ENDPROC(zx_secondary_startup_pa)
-
-
-ENTRY(zx_secondary_startup)
-	bl	v7_invalidate_l1
-	b	secondary_startup
-ENDPROC(zx_secondary_startup)
diff --git a/arch/arm/mach-zynq/headsmp.S b/arch/arm/mach-zynq/headsmp.S
deleted file mode 100644
index 3449e0d1f9900db4890489dd39a77ffc05599115..0000000000000000000000000000000000000000
--- a/arch/arm/mach-zynq/headsmp.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2013 Steffen Trumtrar <s.trumtrar@pengutronix.de>
- * Copyright (c) 2012-2013 Xilinx
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-
-	.arm
-
-ENTRY(zynq_secondary_trampoline)
-ARM_BE8(setend	be)				@ ensure we are in BE8 mode
-	ldr	r0, zynq_secondary_trampoline_jump
-ARM_BE8(rev	r0, r0)
-	bx	r0
-.globl zynq_secondary_trampoline_jump
-zynq_secondary_trampoline_jump:
-	/* Space for jumping address */
-	.word	0	/* cpu 1 */
-.globl zynq_secondary_trampoline_end
-zynq_secondary_trampoline_end:
-ENDPROC(zynq_secondary_trampoline)
diff --git a/arch/arm/mm/abort-ev4.S b/arch/arm/mm/abort-ev4.S
deleted file mode 100644
index a10bcb89594dd38ce31ca30bf97d68cf421afc42..0000000000000000000000000000000000000000
--- a/arch/arm/mm/abort-ev4.S
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-/*
- * Function: v4_early_abort
- *
- * Params  : r2 = pt_regs
- *	   : r4 = aborted context pc
- *	   : r5 = aborted context psr
- *
- * Returns : r4 - r11, r13 preserved
- *
- * Purpose : obtain information about current aborted instruction.
- * Note: we read user space.  This means we might cause a data
- * abort here if the I-TLB and D-TLB aren't seeing the same
- * picture.  Unfortunately, this does happen.  We live with it.
- */
-	.align	5
-ENTRY(v4_early_abort)
-	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
-	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
-	ldr	r3, [r4]			@ read aborted ARM instruction
-	uaccess_disable ip			@ disable userspace access
-	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
-	tst	r3, #1 << 20			@ L = 1 -> write?
-	orreq	r1, r1, #1 << 11		@ yes.
-	b	do_DataAbort
diff --git a/arch/arm/mm/abort-ev4t.S b/arch/arm/mm/abort-ev4t.S
deleted file mode 100644
index 14743a2f6997fcae0eeb55f53dec07b7b2ec227c..0000000000000000000000000000000000000000
--- a/arch/arm/mm/abort-ev4t.S
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include "abort-macro.S"
-/*
- * Function: v4t_early_abort
- *
- * Params  : r2 = pt_regs
- *	   : r4 = aborted context pc
- *	   : r5 = aborted context psr
- *
- * Returns : r4 - r11, r13 preserved
- *
- * Purpose : obtain information about current aborted instruction.
- * Note: we read user space.  This means we might cause a data
- * abort here if the I-TLB and D-TLB aren't seeing the same
- * picture.  Unfortunately, this does happen.  We live with it.
- */
-	.align	5
-ENTRY(v4t_early_abort)
-	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
-	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
-	do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3
-	ldreq	r3, [r4]			@ read aborted ARM instruction
-	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
-	tst	r3, #1 << 20			@ check write
-	orreq	r1, r1, #1 << 11
-	b	do_DataAbort
diff --git a/arch/arm/mm/abort-ev5t.S b/arch/arm/mm/abort-ev5t.S
deleted file mode 100644
index 98c523118820798668bf04d065ad86ea05fb6d2c..0000000000000000000000000000000000000000
--- a/arch/arm/mm/abort-ev5t.S
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include "abort-macro.S"
-/*
- * Function: v5t_early_abort
- *
- * Params  : r2 = pt_regs
- *	   : r4 = aborted context pc
- *	   : r5 = aborted context psr
- *
- * Returns : r4 - r11, r13 preserved
- *
- * Purpose : obtain information about current aborted instruction.
- * Note: we read user space.  This means we might cause a data
- * abort here if the I-TLB and D-TLB aren't seeing the same
- * picture.  Unfortunately, this does happen.  We live with it.
- */
-	.align	5
-ENTRY(v5t_early_abort)
-	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
-	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
-	do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3
-	ldreq	r3, [r4]			@ read aborted ARM instruction
-	uaccess_disable ip			@ disable user access
-	bic	r1, r1, #1 << 11		@ clear bits 11 of FSR
-	teq_ldrd tmp=ip, insn=r3		@ insn was LDRD?
-	beq	do_DataAbort			@ yes
-	tst	r3, #1 << 20			@ check write
-	orreq	r1, r1, #1 << 11
-	b	do_DataAbort
diff --git a/arch/arm/mm/abort-ev5tj.S b/arch/arm/mm/abort-ev5tj.S
deleted file mode 100644
index fec72f4fbaf508597d826e58d0dc084ee6e58dd0..0000000000000000000000000000000000000000
--- a/arch/arm/mm/abort-ev5tj.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include "abort-macro.S"
-/*
- * Function: v5tj_early_abort
- *
- * Params  : r2 = pt_regs
- *	   : r4 = aborted context pc
- *	   : r5 = aborted context psr
- *
- * Returns : r4 - r11, r13 preserved
- *
- * Purpose : obtain information about current aborted instruction.
- * Note: we read user space.  This means we might cause a data
- * abort here if the I-TLB and D-TLB aren't seeing the same
- * picture.  Unfortunately, this does happen.  We live with it.
- */
-	.align	5
-ENTRY(v5tj_early_abort)
-	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
-	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
-	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
-	tst	r5, #PSR_J_BIT			@ Java?
-	bne	do_DataAbort
-	do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3
-	ldreq	r3, [r4]			@ read aborted ARM instruction
-	uaccess_disable ip			@ disable userspace access
-	teq_ldrd tmp=ip, insn=r3		@ insn was LDRD?
-	beq	do_DataAbort			@ yes
-	tst	r3, #1 << 20			@ L = 0 -> write
-	orreq	r1, r1, #1 << 11		@ yes.
-	b	do_DataAbort
diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S
deleted file mode 100644
index c58bf8b43fea64f240ea66d079dc840c9c9d141f..0000000000000000000000000000000000000000
--- a/arch/arm/mm/abort-ev6.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include "abort-macro.S"
-/*
- * Function: v6_early_abort
- *
- * Params  : r2 = pt_regs
- *	   : r4 = aborted context pc
- *	   : r5 = aborted context psr
- *
- * Returns : r4 - r11, r13 preserved
- *
- * Purpose : obtain information about current aborted instruction.
- * Note: we read user space.  This means we might cause a data
- * abort here if the I-TLB and D-TLB aren't seeing the same
- * picture.  Unfortunately, this does happen.  We live with it.
- */
-	.align	5
-ENTRY(v6_early_abort)
-	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
-	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
-/*
- * Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR.
- */
-#ifdef CONFIG_ARM_ERRATA_326103
-	ldr	ip, =0x4107b36
-	mrc	p15, 0, r3, c0, c0, 0		@ get processor id
-	teq	ip, r3, lsr #4			@ r0 ARM1136?
-	bne	1f
-	tst	r5, #PSR_J_BIT			@ Java?
-	tsteq	r5, #PSR_T_BIT			@ Thumb?
-	bne	1f
-	bic	r1, r1, #1 << 11		@ clear bit 11 of FSR
-	ldr	r3, [r4]			@ read aborted ARM instruction
- ARM_BE8(rev	r3, r3)
-
-	teq_ldrd tmp=ip, insn=r3		@ insn was LDRD?
-	beq	1f				@ yes
-	tst	r3, #1 << 20			@ L = 0 -> write
-	orreq	r1, r1, #1 << 11		@ yes.
-#endif
-1:	uaccess_disable ip			@ disable userspace access
-	b	do_DataAbort
diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S
deleted file mode 100644
index f7cc5d68444b56217a613bead95bf0d492fc372e..0000000000000000000000000000000000000000
--- a/arch/arm/mm/abort-ev7.S
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-/*
- * Function: v7_early_abort
- *
- * Params  : r2 = pt_regs
- *	   : r4 = aborted context pc
- *	   : r5 = aborted context psr
- *
- * Returns : r4 - r11, r13 preserved
- *
- * Purpose : obtain information about current aborted instruction.
- */
-	.align	5
-ENTRY(v7_early_abort)
-	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
-	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
-	uaccess_disable ip			@ disable userspace access
-
-	/*
-	 * V6 code adjusts the returned DFSR.
-	 * New designs should not need to patch up faults.
-	 */
-
-#if defined(CONFIG_VERIFY_PERMISSION_FAULT)
-	/*
-	 * Detect erroneous permission failures and fix
-	 */
-	ldr	r3, =0x40d			@ On permission fault
-	and	r3, r1, r3
-	cmp	r3, #0x0d
-	bne	do_DataAbort
-
-	mcr	p15, 0, r0, c7, c8, 0   	@ Retranslate FAR
-	isb
-	mrc	p15, 0, ip, c7, c4, 0   	@ Read the PAR
-	and	r3, ip, #0x7b   		@ On translation fault
-	cmp	r3, #0x0b
-	bne	do_DataAbort
-	bic	r1, r1, #0xf			@ Fix up FSR FS[5:0]
-	and	ip, ip, #0x7e
-	orr	r1, r1, ip, LSR #1
-#endif
-
-	b	do_DataAbort
-ENDPROC(v7_early_abort)
diff --git a/arch/arm/mm/abort-lv4t.S b/arch/arm/mm/abort-lv4t.S
deleted file mode 100644
index fbd60a120f6684c56c63cea10b00200765473f1d..0000000000000000000000000000000000000000
--- a/arch/arm/mm/abort-lv4t.S
+++ /dev/null
@@ -1,237 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-/*
- * Function: v4t_late_abort
- *
- * Params  : r2 = pt_regs
- *	   : r4 = aborted context pc
- *	   : r5 = aborted context psr
- *
- * Returns : r4-r5, r9-r11, r13 preserved
- *
- * Purpose : obtain information about current aborted instruction.
- * Note: we read user space.  This means we might cause a data
- * abort here if the I-TLB and D-TLB aren't seeing the same
- * picture.  Unfortunately, this does happen.  We live with it.
- */
-ENTRY(v4t_late_abort)
-	tst	r5, #PSR_T_BIT			@ check for thumb mode
-#ifdef CONFIG_CPU_CP15_MMU
-	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
-	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
-	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
-#else
-	mov	r0, #0				@ clear r0, r1 (no FSR/FAR)
-	mov	r1, #0
-#endif
-	bne	.data_thumb_abort
-	ldr	r8, [r4]			@ read arm instruction
-	uaccess_disable ip			@ disable userspace access
-	tst	r8, #1 << 20			@ L = 1 -> write?
-	orreq	r1, r1, #1 << 11		@ yes.
-	and	r7, r8, #15 << 24
-	add	pc, pc, r7, lsr #22		@ Now branch to the relevant processing routine
-	nop
-
-/* 0 */	b	.data_arm_lateldrhpost		@ ldrh	rd, [rn], #m/rm
-/* 1 */	b	.data_arm_lateldrhpre		@ ldrh	rd, [rn, #m/rm]
-/* 2 */	b	.data_unknown
-/* 3 */	b	.data_unknown
-/* 4 */	b	.data_arm_lateldrpostconst	@ ldr	rd, [rn], #m
-/* 5 */	b	.data_arm_lateldrpreconst	@ ldr	rd, [rn, #m] 
-/* 6 */	b	.data_arm_lateldrpostreg	@ ldr	rd, [rn], rm
-/* 7 */	b	.data_arm_lateldrprereg		@ ldr	rd, [rn, rm]
-/* 8 */	b	.data_arm_ldmstm		@ ldm*a	rn, <rlist>
-/* 9 */	b	.data_arm_ldmstm		@ ldm*b	rn, <rlist>
-/* a */	b	.data_unknown
-/* b */	b	.data_unknown
-/* c */	b	do_DataAbort			@ ldc	rd, [rn], #m	@ Same as ldr	rd, [rn], #m
-/* d */	b	do_DataAbort			@ ldc	rd, [rn, #m]
-/* e */	b	.data_unknown
-/* f */	b	.data_unknown
-
-.data_unknown_r9:
-	ldr	r9, [sp], #4
-.data_unknown:	@ Part of jumptable
-	mov	r0, r4
-	mov	r1, r8
-	b	baddataabort
-
-.data_arm_ldmstm:
-	tst	r8, #1 << 21			@ check writeback bit
-	beq	do_DataAbort			@ no writeback -> no fixup
-	str	r9, [sp, #-4]!
-	mov	r7, #0x11
-	orr	r7, r7, #0x1100
-	and	r6, r8, r7
-	and	r9, r8, r7, lsl #1
-	add	r6, r6, r9, lsr #1
-	and	r9, r8, r7, lsl #2
-	add	r6, r6, r9, lsr #2
-	and	r9, r8, r7, lsl #3
-	add	r6, r6, r9, lsr #3
-	add	r6, r6, r6, lsr #8
-	add	r6, r6, r6, lsr #4
-	and	r6, r6, #15			@ r6 = no. of registers to transfer.
-	and	r9, r8, #15 << 16		@ Extract 'n' from instruction
-	ldr	r7, [r2, r9, lsr #14]		@ Get register 'Rn'
-	tst	r8, #1 << 23			@ Check U bit
-	subne	r7, r7, r6, lsl #2		@ Undo increment
-	addeq	r7, r7, r6, lsl #2		@ Undo decrement
-	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
-	ldr	r9, [sp], #4
-	b	do_DataAbort
-
-.data_arm_lateldrhpre:
-	tst	r8, #1 << 21			@ Check writeback bit
-	beq	do_DataAbort			@ No writeback -> no fixup
-.data_arm_lateldrhpost:
-	str	r9, [sp, #-4]!
-	and	r9, r8, #0x00f			@ get Rm / low nibble of immediate value
-	tst	r8, #1 << 22			@ if (immediate offset)
-	andne	r6, r8, #0xf00			@ { immediate high nibble
-	orrne	r6, r9, r6, lsr #4		@   combine nibbles } else
-	ldreq	r6, [r2, r9, lsl #2]		@ { load Rm value }
-.data_arm_apply_r6_and_rn:
-	and	r9, r8, #15 << 16		@ Extract 'n' from instruction
-	ldr	r7, [r2, r9, lsr #14]		@ Get register 'Rn'
-	tst	r8, #1 << 23			@ Check U bit
-	subne	r7, r7, r6			@ Undo incrmenet
-	addeq	r7, r7, r6			@ Undo decrement
-	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
-	ldr	r9, [sp], #4
-	b	do_DataAbort
-
-.data_arm_lateldrpreconst:
-	tst	r8, #1 << 21			@ check writeback bit
-	beq	do_DataAbort			@ no writeback -> no fixup
-.data_arm_lateldrpostconst:
-	movs	r6, r8, lsl #20			@ Get offset
-	beq	do_DataAbort			@ zero -> no fixup
-	str	r9, [sp, #-4]!
-	and	r9, r8, #15 << 16		@ Extract 'n' from instruction
-	ldr	r7, [r2, r9, lsr #14]		@ Get register 'Rn'
-	tst	r8, #1 << 23			@ Check U bit
-	subne	r7, r7, r6, lsr #20		@ Undo increment
-	addeq	r7, r7, r6, lsr #20		@ Undo decrement
-	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
-	ldr	r9, [sp], #4
-	b	do_DataAbort
-
-.data_arm_lateldrprereg:
-	tst	r8, #1 << 21			@ check writeback bit
-	beq	do_DataAbort			@ no writeback -> no fixup
-.data_arm_lateldrpostreg:
-	and	r7, r8, #15			@ Extract 'm' from instruction
-	ldr	r6, [r2, r7, lsl #2]		@ Get register 'Rm'
-	str	r9, [sp, #-4]!
-	mov	r9, r8, lsr #7			@ get shift count
-	ands	r9, r9, #31
-	and	r7, r8, #0x70			@ get shift type
-	orreq	r7, r7, #8			@ shift count = 0
-	add	pc, pc, r7
-	nop
-
-	mov	r6, r6, lsl r9			@ 0: LSL #!0
-	b	.data_arm_apply_r6_and_rn
-	b	.data_arm_apply_r6_and_rn	@ 1: LSL #0
-	nop
-	b	.data_unknown_r9		@ 2: MUL?
-	nop
-	b	.data_unknown_r9		@ 3: MUL?
-	nop
-	mov	r6, r6, lsr r9			@ 4: LSR #!0
-	b	.data_arm_apply_r6_and_rn
-	mov	r6, r6, lsr #32			@ 5: LSR #32
-	b	.data_arm_apply_r6_and_rn
-	b	.data_unknown_r9		@ 6: MUL?
-	nop
-	b	.data_unknown_r9		@ 7: MUL?
-	nop
-	mov	r6, r6, asr r9			@ 8: ASR #!0
-	b	.data_arm_apply_r6_and_rn
-	mov	r6, r6, asr #32			@ 9: ASR #32
-	b	.data_arm_apply_r6_and_rn
-	b	.data_unknown_r9		@ A: MUL?
-	nop
-	b	.data_unknown_r9		@ B: MUL?
-	nop
-	mov	r6, r6, ror r9			@ C: ROR #!0
-	b	.data_arm_apply_r6_and_rn
-	mov	r6, r6, rrx			@ D: RRX
-	b	.data_arm_apply_r6_and_rn
-	b	.data_unknown_r9		@ E: MUL?
-	nop
-	b	.data_unknown_r9		@ F: MUL?
-
-.data_thumb_abort:
-	ldrh	r8, [r4]			@ read instruction
-	uaccess_disable ip			@ disable userspace access
-	tst	r8, #1 << 11			@ L = 1 -> write?
-	orreq	r1, r1, #1 << 8			@ yes
-	and	r7, r8, #15 << 12
-	add	pc, pc, r7, lsr #10		@ lookup in table
-	nop
-
-/* 0 */	b	.data_unknown
-/* 1 */	b	.data_unknown
-/* 2 */	b	.data_unknown
-/* 3 */	b	.data_unknown
-/* 4 */	b	.data_unknown
-/* 5 */	b	.data_thumb_reg
-/* 6 */	b	do_DataAbort
-/* 7 */	b	do_DataAbort
-/* 8 */	b	do_DataAbort
-/* 9 */	b	do_DataAbort
-/* A */	b	.data_unknown
-/* B */	b	.data_thumb_pushpop
-/* C */	b	.data_thumb_ldmstm
-/* D */	b	.data_unknown
-/* E */	b	.data_unknown
-/* F */	b	.data_unknown
-
-.data_thumb_reg:
-	tst	r8, #1 << 9
-	beq	do_DataAbort
-	tst	r8, #1 << 10			@ If 'S' (signed) bit is set
-	movne	r1, #0				@ it must be a load instr
-	b	do_DataAbort
-
-.data_thumb_pushpop:
-	tst	r8, #1 << 10
-	beq	.data_unknown
-	str	r9, [sp, #-4]!
-	and	r6, r8, #0x55			@ hweight8(r8) + R bit
-	and	r9, r8, #0xaa
-	add	r6, r6, r9, lsr #1
-	and	r9, r6, #0xcc
-	and	r6, r6, #0x33
-	add	r6, r6, r9, lsr #2
-	movs	r7, r8, lsr #9			@ C = r8 bit 8 (R bit)
-	adc	r6, r6, r6, lsr #4		@ high + low nibble + R bit
-	and	r6, r6, #15			@ number of regs to transfer
-	ldr	r7, [r2, #13 << 2]
-	tst	r8, #1 << 11
-	addeq	r7, r7, r6, lsl #2		@ increment SP if PUSH
-	subne	r7, r7, r6, lsl #2		@ decrement SP if POP
-	str	r7, [r2, #13 << 2]
-	ldr	r9, [sp], #4
-	b	do_DataAbort
-
-.data_thumb_ldmstm:
-	str	r9, [sp, #-4]!
-	and	r6, r8, #0x55			@ hweight8(r8)
-	and	r9, r8, #0xaa
-	add	r6, r6, r9, lsr #1
-	and	r9, r6, #0xcc
-	and	r6, r6, #0x33
-	add	r6, r6, r9, lsr #2
-	add	r6, r6, r6, lsr #4
-	and	r9, r8, #7 << 8
-	ldr	r7, [r2, r9, lsr #6]
-	and	r6, r6, #15			@ number of regs to transfer
-	sub	r7, r7, r6, lsl #2		@ always decrement
-	str	r7, [r2, r9, lsr #6]
-	ldr	r9, [sp], #4
-	b	do_DataAbort
diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S
deleted file mode 100644
index bacf53fd0b70c6307e74ef8601d8dcc7db292700..0000000000000000000000000000000000000000
--- a/arch/arm/mm/abort-macro.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * The ARM LDRD and Thumb LDRSB instructions use bit 20/11 (ARM/Thumb)
- * differently than every other instruction, so it is set to 0 (write)
- * even though the instructions are read instructions. This means that
- * during an abort the instructions will be treated as a write and the
- * handler will raise a signal from unwriteable locations if they
- * fault. We have to specifically check for these instructions
- * from the abort handlers to treat them properly.
- *
- */
-
-	.macro	do_thumb_abort, fsr, pc, psr, tmp
-	tst	\psr, #PSR_T_BIT
-	beq	not_thumb
-	ldrh	\tmp, [\pc]			@ Read aborted Thumb instruction
-	uaccess_disable ip			@ disable userspace access
-	and	\tmp, \tmp, # 0xfe00		@ Mask opcode field
-	cmp	\tmp, # 0x5600			@ Is it ldrsb?
-	orreq	\tmp, \tmp, #1 << 11		@ Set L-bit if yes
-	tst	\tmp, #1 << 11			@ L = 0 -> write
-	orreq	\fsr, \fsr, #1 << 11		@ yes.
-	b	do_DataAbort
-not_thumb:
-	.endm
-
-/*
- * We check for the following instruction encoding for LDRD.
- *
- * [27:25] == 000
- *   [7:4] == 1101
- *    [20] == 0
- */
-	.macro	teq_ldrd, tmp, insn
-	mov	\tmp, #0x0e100000
-	orr	\tmp, #0x000000f0
-	and	\tmp, \insn, \tmp
-	teq	\tmp, #0x000000d0
-	.endm
diff --git a/arch/arm/mm/abort-nommu.S b/arch/arm/mm/abort-nommu.S
deleted file mode 100644
index 6e2366a263219b379f4fdb43cc8e5413fb36e52a..0000000000000000000000000000000000000000
--- a/arch/arm/mm/abort-nommu.S
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-/*
- * Function: nommu_early_abort
- *
- * Params  : r2 = pt_regs
- *	   : r4 = aborted context pc
- *	   : r5 = aborted context psr
- *
- * Returns : r4 - r11, r13 preserved
- *
- * Note: There is no FSR/FAR on !CPU_CP15_MMU cores.
- *       Just fill zero into the registers.
- */
-	.align	5
-ENTRY(nommu_early_abort)
-	mov	r0, #0				@ clear r0, r1 (no FSR/FAR)
-	mov	r1, #0
-	b	do_DataAbort
-ENDPROC(nommu_early_abort)
diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
deleted file mode 100644
index 3a464d1649b4b25b8e718be84d1d42d7093646e7..0000000000000000000000000000000000000000
--- a/arch/arm/mm/cache-fa.S
+++ /dev/null
@@ -1,247 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/cache-fa.S
- *
- *  Copyright (C) 2005 Faraday Corp.
- *  Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
- *
- * Based on cache-v4wb.S:
- *  Copyright (C) 1997-2002 Russell king
- *
- *  Processors: FA520 FA526 FA626	
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/memory.h>
-#include <asm/page.h>
-
-#include "proc-macros.S"
-
-/*
- * The size of one data cache line.
- */
-#define CACHE_DLINESIZE	16
-
-/*
- * The total size of the data cache.
- */
-#ifdef CONFIG_ARCH_GEMINI
-#define CACHE_DSIZE	8192
-#else
-#define CACHE_DSIZE	16384 
-#endif 
-
-/* FIXME: put optimal value here. Current one is just estimation */
-#define CACHE_DLIMIT	(CACHE_DSIZE * 2)
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(fa_flush_icache_all)
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	ret	lr
-ENDPROC(fa_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- *
- *	Clean and invalidate all cache entries in a particular address
- *	space.
- */
-ENTRY(fa_flush_user_cache_all)
-	/* FALLTHROUGH */
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(fa_flush_kern_cache_all)
-	mov	ip, #0
-	mov	r2, #VM_EXEC
-__flush_whole_cache:
-	mcr	p15, 0, ip, c7, c14, 0		@ clean/invalidate D cache
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain write buffer
-	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
-	ret	lr
-
-/*
- *	flush_user_cache_range(start, end, flags)
- *
- *	Invalidate a range of cache entries in the specified
- *	address space.
- *
- *	- start - start address (inclusive, page aligned)
- *	- end	- end address (exclusive, page aligned)
- *	- flags	- vma_area_struct flags describing address space
- */
-ENTRY(fa_flush_user_cache_range)
-	mov	ip, #0
-	sub	r3, r1, r0			@ calculate total size
-	cmp	r3, #CACHE_DLIMIT		@ total size >= limit?
-	bhs	__flush_whole_cache		@ flush whole D cache
-
-1:	tst	r2, #VM_EXEC
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I line
-	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
-	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
-	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
-	ret	lr
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-ENTRY(fa_coherent_kern_range)
-	/* fall through */
-
-/*
- *	coherent_user_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-ENTRY(fa_coherent_user_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
-	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 6		@ invalidate BTB
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
-	ret	lr
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure that the data held in the page kaddr is written back
- *	to the page in question.
- *
- *	- addr	- kernel address
- *	- size	- size of region
- */
-ENTRY(fa_flush_kern_dcache_area)
-	add	r1, r0, r1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	ret	lr
-
-/*
- *	dma_inv_range(start, end)
- *
- *	Invalidate (discard) the specified virtual address range.
- *	May not write back any entries.  If 'start' or 'end'
- *	are not cache line aligned, those lines must be written
- *	back.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-fa_dma_inv_range:
-	tst	r0, #CACHE_DLINESIZE - 1
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r0, c7, c14, 1		@ clean & invalidate D entry
-	tst	r1, #CACHE_DLINESIZE - 1
-	bic	r1, r1, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r1, c7, c14, 1		@ clean & invalidate D entry
-1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	ret	lr
-
-/*
- *	dma_clean_range(start, end)
- *
- *	Clean (write back) the specified virtual address range.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-fa_dma_clean_range:
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0	
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	ret	lr
-
-/*
- *	dma_flush_range(start,end)
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-ENTRY(fa_dma_flush_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0	
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	ret	lr
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(fa_dma_map_area)
-	add	r1, r1, r0
-	cmp	r2, #DMA_TO_DEVICE
-	beq	fa_dma_clean_range
-	bcs	fa_dma_inv_range
-	b	fa_dma_flush_range
-ENDPROC(fa_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(fa_dma_unmap_area)
-	ret	lr
-ENDPROC(fa_dma_unmap_area)
-
-	.globl	fa_flush_kern_cache_louis
-	.equ	fa_flush_kern_cache_louis, fa_flush_kern_cache_all
-
-	__INITDATA
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions fa
diff --git a/arch/arm/mm/cache-nop.S b/arch/arm/mm/cache-nop.S
deleted file mode 100644
index 72d939ef87985ca40cc3a27e47360dac52daa6c2..0000000000000000000000000000000000000000
--- a/arch/arm/mm/cache-nop.S
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-
-#include "proc-macros.S"
-
-ENTRY(nop_flush_icache_all)
-	ret	lr
-ENDPROC(nop_flush_icache_all)
-
-	.globl nop_flush_kern_cache_all
-	.equ nop_flush_kern_cache_all, nop_flush_icache_all
-
-	.globl nop_flush_kern_cache_louis
-	.equ nop_flush_kern_cache_louis, nop_flush_icache_all
-
-	.globl nop_flush_user_cache_all
-	.equ nop_flush_user_cache_all, nop_flush_icache_all
-
-	.globl nop_flush_user_cache_range
-	.equ nop_flush_user_cache_range, nop_flush_icache_all
-
-	.globl nop_coherent_kern_range
-	.equ nop_coherent_kern_range, nop_flush_icache_all
-
-ENTRY(nop_coherent_user_range)
-	mov	r0, 0
-	ret	lr
-ENDPROC(nop_coherent_user_range)
-
-	.globl nop_flush_kern_dcache_area
-	.equ nop_flush_kern_dcache_area, nop_flush_icache_all
-
-	.globl nop_dma_flush_range
-	.equ nop_dma_flush_range, nop_flush_icache_all
-
-	.globl nop_dma_map_area
-	.equ nop_dma_map_area, nop_flush_icache_all
-
-	.globl nop_dma_unmap_area
-	.equ nop_dma_unmap_area, nop_flush_icache_all
-
-	__INITDATA
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions nop
diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S
deleted file mode 100644
index 7787057e4990fbba9005999ea59da0f4b912c838..0000000000000000000000000000000000000000
--- a/arch/arm/mm/cache-v4.S
+++ /dev/null
@@ -1,147 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/cache-v4.S
- *
- *  Copyright (C) 1997-2002 Russell king
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/page.h>
-#include "proc-macros.S"
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(v4_flush_icache_all)
-	ret	lr
-ENDPROC(v4_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- *
- *	Invalidate all cache entries in a particular address
- *	space.
- *
- *	- mm	- mm_struct describing address space
- */
-ENTRY(v4_flush_user_cache_all)
-	/* FALLTHROUGH */
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(v4_flush_kern_cache_all)
-#ifdef CONFIG_CPU_CP15
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c7, 0		@ flush ID cache
-	ret	lr
-#else
-	/* FALLTHROUGH */
-#endif
-
-/*
- *	flush_user_cache_range(start, end, flags)
- *
- *	Invalidate a range of cache entries in the specified
- *	address space.
- *
- *	- start - start address (may not be aligned)
- *	- end	- end address (exclusive, may not be aligned)
- *	- flags	- vma_area_struct flags describing address space
- */
-ENTRY(v4_flush_user_cache_range)
-#ifdef CONFIG_CPU_CP15
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c7, 0		@ flush ID cache
-	ret	lr
-#else
-	/* FALLTHROUGH */
-#endif
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-ENTRY(v4_coherent_kern_range)
-	/* FALLTHROUGH */
-
-/*
- *	coherent_user_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-ENTRY(v4_coherent_user_range)
-	mov	r0, #0
-	ret	lr
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure no D cache aliasing occurs, either with itself or
- *	the I cache
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(v4_flush_kern_dcache_area)
-	/* FALLTHROUGH */
-
-/*
- *	dma_flush_range(start, end)
- *
- *	Clean and invalidate the specified virtual address range.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-ENTRY(v4_dma_flush_range)
-#ifdef CONFIG_CPU_CP15
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c7, 0		@ flush ID cache
-#endif
-	ret	lr
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(v4_dma_unmap_area)
-	teq	r2, #DMA_TO_DEVICE
-	bne	v4_dma_flush_range
-	/* FALLTHROUGH */
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(v4_dma_map_area)
-	ret	lr
-ENDPROC(v4_dma_unmap_area)
-ENDPROC(v4_dma_map_area)
-
-	.globl	v4_flush_kern_cache_louis
-	.equ	v4_flush_kern_cache_louis, v4_flush_kern_cache_all
-
-	__INITDATA
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions v4
diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S
deleted file mode 100644
index 905ac2fa2b1ea27b918f1d3c06960425403ac8fb..0000000000000000000000000000000000000000
--- a/arch/arm/mm/cache-v4wb.S
+++ /dev/null
@@ -1,259 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/cache-v4wb.S
- *
- *  Copyright (C) 1997-2002 Russell king
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/memory.h>
-#include <asm/page.h>
-#include "proc-macros.S"
-
-/*
- * The size of one data cache line.
- */
-#define CACHE_DLINESIZE	32
-
-/*
- * The total size of the data cache.
- */
-#if defined(CONFIG_CPU_SA110)
-# define CACHE_DSIZE	16384
-#elif defined(CONFIG_CPU_SA1100)
-# define CACHE_DSIZE	8192
-#else
-# error Unknown cache size
-#endif
-
-/*
- * This is the size at which it becomes more efficient to
- * clean the whole cache, rather than using the individual
- * cache line maintenance instructions.
- *
- *  Size  Clean (ticks) Dirty (ticks)
- *   4096   21  20  21    53  55  54
- *   8192   40  41  40   106 100 102
- *  16384   77  77  76   140 140 138
- *  32768  150 149 150   214 216 212 <---
- *  65536  296 297 296   351 358 361
- * 131072  591 591 591   656 657 651
- *  Whole  132 136 132   221 217 207 <---
- */
-#define CACHE_DLIMIT	(CACHE_DSIZE * 4)
-
-	.data
-	.align	2
-flush_base:
-	.long	FLUSH_BASE
-	.text
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(v4wb_flush_icache_all)
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	ret	lr
-ENDPROC(v4wb_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- *
- *	Clean and invalidate all cache entries in a particular address
- *	space.
- */
-ENTRY(v4wb_flush_user_cache_all)
-	/* FALLTHROUGH */
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(v4wb_flush_kern_cache_all)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-__flush_whole_cache:
-	ldr	r3, =flush_base
-	ldr	r1, [r3, #0]
-	eor	r1, r1, #CACHE_DSIZE
-	str	r1, [r3, #0]
-	add	r2, r1, #CACHE_DSIZE
-1:	ldr	r3, [r1], #32
-	cmp	r1, r2
-	blo	1b
-#ifdef FLUSH_BASE_MINICACHE
-	add	r2, r2, #FLUSH_BASE_MINICACHE - FLUSH_BASE
-	sub	r1, r2, #512			@ only 512 bytes
-1:	ldr	r3, [r1], #32
-	cmp	r1, r2
-	blo	1b
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain write buffer
-	ret	lr
-
-/*
- *	flush_user_cache_range(start, end, flags)
- *
- *	Invalidate a range of cache entries in the specified
- *	address space.
- *
- *	- start - start address (inclusive, page aligned)
- *	- end	- end address (exclusive, page aligned)
- *	- flags	- vma_area_struct flags describing address space
- */
-ENTRY(v4wb_flush_user_cache_range)
-	mov	ip, #0
-	sub	r3, r1, r0			@ calculate total size
-	tst	r2, #VM_EXEC			@ executable region?
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-
-	cmp	r3, #CACHE_DLIMIT		@ total size >= limit?
-	bhs	__flush_whole_cache		@ flush whole D cache
-
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain write buffer
-	ret	lr
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure no D cache aliasing occurs, either with itself or
- *	the I cache
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(v4wb_flush_kern_dcache_area)
-	add	r1, r0, r1
-	/* fall through */
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-ENTRY(v4wb_coherent_kern_range)
-	/* fall through */
-
-/*
- *	coherent_user_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-ENTRY(v4wb_coherent_user_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-
-/*
- *	dma_inv_range(start, end)
- *
- *	Invalidate (discard) the specified virtual address range.
- *	May not write back any entries.  If 'start' or 'end'
- *	are not cache line aligned, those lines must be written
- *	back.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-v4wb_dma_inv_range:
-	tst	r0, #CACHE_DLINESIZE - 1
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
-	tst	r1, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
-1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	ret	lr
-
-/*
- *	dma_clean_range(start, end)
- *
- *	Clean (write back) the specified virtual address range.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-v4wb_dma_clean_range:
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	ret	lr
-
-/*
- *	dma_flush_range(start, end)
- *
- *	Clean and invalidate the specified virtual address range.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- *
- *	This is actually the same as v4wb_coherent_kern_range()
- */
-	.globl	v4wb_dma_flush_range
-	.set	v4wb_dma_flush_range, v4wb_coherent_kern_range
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(v4wb_dma_map_area)
-	add	r1, r1, r0
-	cmp	r2, #DMA_TO_DEVICE
-	beq	v4wb_dma_clean_range
-	bcs	v4wb_dma_inv_range
-	b	v4wb_dma_flush_range
-ENDPROC(v4wb_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(v4wb_dma_unmap_area)
-	ret	lr
-ENDPROC(v4wb_dma_unmap_area)
-
-	.globl	v4wb_flush_kern_cache_louis
-	.equ	v4wb_flush_kern_cache_louis, v4wb_flush_kern_cache_all
-
-	__INITDATA
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions v4wb
diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S
deleted file mode 100644
index 0b290c25a99dd6c522d9371e0b542bc054894e5f..0000000000000000000000000000000000000000
--- a/arch/arm/mm/cache-v4wt.S
+++ /dev/null
@@ -1,203 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/cache-v4wt.S
- *
- *  Copyright (C) 1997-2002 Russell king
- *
- *  ARMv4 write through cache operations support.
- *
- *  We assume that the write buffer is not enabled.
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/page.h>
-#include "proc-macros.S"
-
-/*
- * The size of one data cache line.
- */
-#define CACHE_DLINESIZE	32
-
-/*
- * The number of data cache segments.
- */
-#define CACHE_DSEGMENTS	8
-
-/*
- * The number of lines in a cache segment.
- */
-#define CACHE_DENTRIES	64
-
-/*
- * This is the size at which it becomes more efficient to
- * clean the whole cache, rather than using the individual
- * cache line maintenance instructions.
- *
- * *** This needs benchmarking
- */
-#define CACHE_DLIMIT	16384
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(v4wt_flush_icache_all)
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	ret	lr
-ENDPROC(v4wt_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- *
- *	Invalidate all cache entries in a particular address
- *	space.
- */
-ENTRY(v4wt_flush_user_cache_all)
-	/* FALLTHROUGH */
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(v4wt_flush_kern_cache_all)
-	mov	r2, #VM_EXEC
-	mov	ip, #0
-__flush_whole_cache:
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
-	ret	lr
-
-/*
- *	flush_user_cache_range(start, end, flags)
- *
- *	Clean and invalidate a range of cache entries in the specified
- *	address space.
- *
- *	- start - start address (inclusive, page aligned)
- *	- end	- end address (exclusive, page aligned)
- *	- flags	- vma_area_struct flags describing address space
- */
-ENTRY(v4wt_flush_user_cache_range)
-	sub	r3, r1, r0			@ calculate total size
-	cmp	r3, #CACHE_DLIMIT
-	bhs	__flush_whole_cache
-
-1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	ret	lr
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-ENTRY(v4wt_coherent_kern_range)
-	/* FALLTRHOUGH */
-
-/*
- *	coherent_user_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-ENTRY(v4wt_coherent_user_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	ret	lr
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure no D cache aliasing occurs, either with itself or
- *	the I cache
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(v4wt_flush_kern_dcache_area)
-	mov	r2, #0
-	mcr	p15, 0, r2, c7, c5, 0		@ invalidate I cache
-	add	r1, r0, r1
-	/* fallthrough */
-
-/*
- *	dma_inv_range(start, end)
- *
- *	Invalidate (discard) the specified virtual address range.
- *	May not write back any entries.  If 'start' or 'end'
- *	are not cache line aligned, those lines must be written
- *	back.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-v4wt_dma_inv_range:
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	ret	lr
-
-/*
- *	dma_flush_range(start, end)
- *
- *	Clean and invalidate the specified virtual address range.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-	.globl	v4wt_dma_flush_range
-	.equ	v4wt_dma_flush_range, v4wt_dma_inv_range
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(v4wt_dma_unmap_area)
-	add	r1, r1, r0
-	teq	r2, #DMA_TO_DEVICE
-	bne	v4wt_dma_inv_range
-	/* FALLTHROUGH */
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(v4wt_dma_map_area)
-	ret	lr
-ENDPROC(v4wt_dma_unmap_area)
-ENDPROC(v4wt_dma_map_area)
-
-	.globl	v4wt_flush_kern_cache_louis
-	.equ	v4wt_flush_kern_cache_louis, v4wt_flush_kern_cache_all
-
-	__INITDATA
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions v4wt
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
deleted file mode 100644
index f0f65eb073e481e082e216f846c7aebf20d0e279..0000000000000000000000000000000000000000
--- a/arch/arm/mm/cache-v6.S
+++ /dev/null
@@ -1,332 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/cache-v6.S
- *
- *  Copyright (C) 2001 Deep Blue Solutions Ltd.
- *
- *  This is the "shell" of the ARMv6 processor support.
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-#include <asm/unwind.h>
-
-#include "proc-macros.S"
-
-#define HARVARD_CACHE
-#define CACHE_LINE_SIZE		32
-#define D_CACHE_LINE_SIZE	32
-#define BTB_FLUSH_SIZE		8
-
-/*
- *	v6_flush_icache_all()
- *
- *	Flush the whole I-cache.
- *
- *	ARM1136 erratum 411920 - Invalidate Instruction Cache operation can fail.
- *	This erratum is present in 1136, 1156 and 1176. It does not affect the
- *	MPCore.
- *
- *	Registers:
- *	r0 - set to 0
- *	r1 - corrupted
- */
-ENTRY(v6_flush_icache_all)
-	mov	r0, #0
-#ifdef CONFIG_ARM_ERRATA_411920
-	mrs	r1, cpsr
-	cpsid	ifa				@ disable interrupts
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate entire I-cache
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate entire I-cache
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate entire I-cache
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate entire I-cache
-	msr	cpsr_cx, r1			@ restore interrupts
-	.rept	11				@ ARM Ltd recommends at least
-	nop					@ 11 NOPs
-	.endr
-#else
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I-cache
-#endif
-	ret	lr
-ENDPROC(v6_flush_icache_all)
-
-/*
- *	v6_flush_cache_all()
- *
- *	Flush the entire cache.
- *
- *	It is assumed that:
- */
-ENTRY(v6_flush_kern_cache_all)
-	mov	r0, #0
-#ifdef HARVARD_CACHE
-	mcr	p15, 0, r0, c7, c14, 0		@ D cache clean+invalidate
-#ifndef CONFIG_ARM_ERRATA_411920
-	mcr	p15, 0, r0, c7, c5, 0		@ I+BTB cache invalidate
-#else
-	b	v6_flush_icache_all
-#endif
-#else
-	mcr	p15, 0, r0, c7, c15, 0		@ Cache clean+invalidate
-#endif
-	ret	lr
-
-/*
- *	v6_flush_cache_all()
- *
- *	Flush all TLB entries in a particular address space
- *
- *	- mm    - mm_struct describing address space
- */
-ENTRY(v6_flush_user_cache_all)
-	/*FALLTHROUGH*/
-
-/*
- *	v6_flush_cache_range(start, end, flags)
- *
- *	Flush a range of TLB entries in the specified address space.
- *
- *	- start - start address (may not be aligned)
- *	- end   - end address (exclusive, may not be aligned)
- *	- flags	- vm_area_struct flags describing address space
- *
- *	It is assumed that:
- *	- we have a VIPT cache.
- */
-ENTRY(v6_flush_user_cache_range)
-	ret	lr
-
-/*
- *	v6_coherent_kern_range(start,end)
- *
- *	Ensure that the I and D caches are coherent within specified
- *	region.  This is typically used when code has been written to
- *	a memory region, and will be executed.
- *
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- *
- *	It is assumed that:
- *	- the Icache does not read data from the write buffer
- */
-ENTRY(v6_coherent_kern_range)
-	/* FALLTHROUGH */
-
-/*
- *	v6_coherent_user_range(start,end)
- *
- *	Ensure that the I and D caches are coherent within specified
- *	region.  This is typically used when code has been written to
- *	a memory region, and will be executed.
- *
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- *
- *	It is assumed that:
- *	- the Icache does not read data from the write buffer
- */
-ENTRY(v6_coherent_user_range)
- UNWIND(.fnstart		)
-#ifdef HARVARD_CACHE
-	bic	r0, r0, #CACHE_LINE_SIZE - 1
-1:
- USER(	mcr	p15, 0, r0, c7, c10, 1	)	@ clean D line
-	add	r0, r0, #CACHE_LINE_SIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mov	r0, #0
-#ifdef HARVARD_CACHE
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-#ifndef CONFIG_ARM_ERRATA_411920
-	mcr	p15, 0, r0, c7, c5, 0		@ I+BTB cache invalidate
-#else
-	b	v6_flush_icache_all
-#endif
-#else
-	mcr	p15, 0, r0, c7, c5, 6		@ invalidate BTB
-#endif
-	ret	lr
-
-/*
- * Fault handling for the cache operation above. If the virtual address in r0
- * isn't mapped, fail with -EFAULT.
- */
-9001:
-	mov	r0, #-EFAULT
-	ret	lr
- UNWIND(.fnend		)
-ENDPROC(v6_coherent_user_range)
-ENDPROC(v6_coherent_kern_range)
-
-/*
- *	v6_flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure that the data held in the page kaddr is written back
- *	to the page in question.
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(v6_flush_kern_dcache_area)
-	add	r1, r0, r1
-	bic	r0, r0, #D_CACHE_LINE_SIZE - 1
-1:
-#ifdef HARVARD_CACHE
-	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line
-#else
-	mcr	p15, 0, r0, c7, c15, 1		@ clean & invalidate unified line
-#endif	
-	add	r0, r0, #D_CACHE_LINE_SIZE
-	cmp	r0, r1
-	blo	1b
-#ifdef HARVARD_CACHE
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c10, 4
-#endif
-	ret	lr
-
-
-/*
- *	v6_dma_inv_range(start,end)
- *
- *	Invalidate the data cache within the specified region; we will
- *	be performing a DMA operation in this region and we want to
- *	purge old data in the cache.
- *
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-v6_dma_inv_range:
-#ifdef CONFIG_DMA_CACHE_RWFO
-	ldrb	r2, [r0]			@ read for ownership
-	strb	r2, [r0]			@ write for ownership
-#endif
-	tst	r0, #D_CACHE_LINE_SIZE - 1
-	bic	r0, r0, #D_CACHE_LINE_SIZE - 1
-#ifdef HARVARD_CACHE
-	mcrne	p15, 0, r0, c7, c10, 1		@ clean D line
-#else
-	mcrne	p15, 0, r0, c7, c11, 1		@ clean unified line
-#endif
-	tst	r1, #D_CACHE_LINE_SIZE - 1
-#ifdef CONFIG_DMA_CACHE_RWFO
-	ldrbne	r2, [r1, #-1]			@ read for ownership
-	strbne	r2, [r1, #-1]			@ write for ownership
-#endif
-	bic	r1, r1, #D_CACHE_LINE_SIZE - 1
-#ifdef HARVARD_CACHE
-	mcrne	p15, 0, r1, c7, c14, 1		@ clean & invalidate D line
-#else
-	mcrne	p15, 0, r1, c7, c15, 1		@ clean & invalidate unified line
-#endif
-1:
-#ifdef HARVARD_CACHE
-	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D line
-#else
-	mcr	p15, 0, r0, c7, c7, 1		@ invalidate unified line
-#endif
-	add	r0, r0, #D_CACHE_LINE_SIZE
-	cmp	r0, r1
-#ifdef CONFIG_DMA_CACHE_RWFO
-	ldrlo	r2, [r0]			@ read for ownership
-	strlo	r2, [r0]			@ write for ownership
-#endif
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	ret	lr
-
-/*
- *	v6_dma_clean_range(start,end)
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-v6_dma_clean_range:
-	bic	r0, r0, #D_CACHE_LINE_SIZE - 1
-1:
-#ifdef CONFIG_DMA_CACHE_RWFO
-	ldr	r2, [r0]			@ read for ownership
-#endif
-#ifdef HARVARD_CACHE
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D line
-#else
-	mcr	p15, 0, r0, c7, c11, 1		@ clean unified line
-#endif
-	add	r0, r0, #D_CACHE_LINE_SIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	ret	lr
-
-/*
- *	v6_dma_flush_range(start,end)
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-ENTRY(v6_dma_flush_range)
-#ifdef CONFIG_DMA_CACHE_RWFO
-	ldrb	r2, [r0]		@ read for ownership
-	strb	r2, [r0]		@ write for ownership
-#endif
-	bic	r0, r0, #D_CACHE_LINE_SIZE - 1
-1:
-#ifdef HARVARD_CACHE
-	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line
-#else
-	mcr	p15, 0, r0, c7, c15, 1		@ clean & invalidate line
-#endif
-	add	r0, r0, #D_CACHE_LINE_SIZE
-	cmp	r0, r1
-#ifdef CONFIG_DMA_CACHE_RWFO
-	ldrblo	r2, [r0]			@ read for ownership
-	strblo	r2, [r0]			@ write for ownership
-#endif
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	ret	lr
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(v6_dma_map_area)
-	add	r1, r1, r0
-	teq	r2, #DMA_FROM_DEVICE
-	beq	v6_dma_inv_range
-#ifndef CONFIG_DMA_CACHE_RWFO
-	b	v6_dma_clean_range
-#else
-	teq	r2, #DMA_TO_DEVICE
-	beq	v6_dma_clean_range
-	b	v6_dma_flush_range
-#endif
-ENDPROC(v6_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(v6_dma_unmap_area)
-#ifndef CONFIG_DMA_CACHE_RWFO
-	add	r1, r1, r0
-	teq	r2, #DMA_TO_DEVICE
-	bne	v6_dma_inv_range
-#endif
-	ret	lr
-ENDPROC(v6_dma_unmap_area)
-
-	.globl	v6_flush_kern_cache_louis
-	.equ	v6_flush_kern_cache_louis, v6_flush_kern_cache_all
-
-	__INITDATA
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions v6
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
deleted file mode 100644
index 0ee8fc4b4672c6d2123c231a47b61cb870ec6052..0000000000000000000000000000000000000000
--- a/arch/arm/mm/cache-v7.S
+++ /dev/null
@@ -1,484 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/cache-v7.S
- *
- *  Copyright (C) 2001 Deep Blue Solutions Ltd.
- *  Copyright (C) 2005 ARM Ltd.
- *
- *  This is the "shell" of the ARMv7 processor support.
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-#include <asm/unwind.h>
-#include <asm/hardware/cache-b15-rac.h>
-
-#include "proc-macros.S"
-
-#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND
-.globl icache_size
-	.data
-	.align	2
-icache_size:
-	.long	64
-	.text
-#endif
-/*
- * The secondary kernel init calls v7_flush_dcache_all before it enables
- * the L1; however, the L1 comes out of reset in an undefined state, so
- * the clean + invalidate performed by v7_flush_dcache_all causes a bunch
- * of cache lines with uninitialized data and uninitialized tags to get
- * written out to memory, which does really unpleasant things to the main
- * processor.  We fix this by performing an invalidate, rather than a
- * clean + invalidate, before jumping into the kernel.
- *
- * This function is cloned from arch/arm/mach-tegra/headsmp.S, and needs
- * to be called for both secondary cores startup and primary core resume
- * procedures.
- */
-ENTRY(v7_invalidate_l1)
-       mov     r0, #0
-       mcr     p15, 2, r0, c0, c0, 0
-       mrc     p15, 1, r0, c0, c0, 0
-
-       movw    r1, #0x7fff
-       and     r2, r1, r0, lsr #13
-
-       movw    r1, #0x3ff
-
-       and     r3, r1, r0, lsr #3      @ NumWays - 1
-       add     r2, r2, #1              @ NumSets
-
-       and     r0, r0, #0x7
-       add     r0, r0, #4      @ SetShift
-
-       clz     r1, r3          @ WayShift
-       add     r4, r3, #1      @ NumWays
-1:     sub     r2, r2, #1      @ NumSets--
-       mov     r3, r4          @ Temp = NumWays
-2:     subs    r3, r3, #1      @ Temp--
-       mov     r5, r3, lsl r1
-       mov     r6, r2, lsl r0
-       orr     r5, r5, r6      @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
-       mcr     p15, 0, r5, c7, c6, 2
-       bgt     2b
-       cmp     r2, #0
-       bgt     1b
-       dsb     st
-       isb
-       ret     lr
-ENDPROC(v7_invalidate_l1)
-
-/*
- *	v7_flush_icache_all()
- *
- *	Flush the whole I-cache.
- *
- *	Registers:
- *	r0 - set to 0
- */
-ENTRY(v7_flush_icache_all)
-	mov	r0, #0
-	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)		@ invalidate I-cache inner shareable
-	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)		@ I+BTB cache invalidate
-	ret	lr
-ENDPROC(v7_flush_icache_all)
-
- /*
- *     v7_flush_dcache_louis()
- *
- *     Flush the D-cache up to the Level of Unification Inner Shareable
- *
- *     Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
- */
-
-ENTRY(v7_flush_dcache_louis)
-	dmb					@ ensure ordering with previous memory accesses
-	mrc	p15, 1, r0, c0, c0, 1		@ read clidr, r0 = clidr
-ALT_SMP(mov	r3, r0, lsr #20)		@ move LoUIS into position
-ALT_UP(	mov	r3, r0, lsr #26)		@ move LoUU into position
-	ands	r3, r3, #7 << 1 		@ extract LoU*2 field from clidr
-	bne	start_flush_levels		@ LoU != 0, start flushing
-#ifdef CONFIG_ARM_ERRATA_643719
-ALT_SMP(mrc	p15, 0, r2, c0, c0, 0)		@ read main ID register
-ALT_UP(	ret	lr)				@ LoUU is zero, so nothing to do
-	movw	r1, #:lower16:(0x410fc090 >> 4)	@ ID of ARM Cortex A9 r0p?
-	movt	r1, #:upper16:(0x410fc090 >> 4)
-	teq	r1, r2, lsr #4			@ test for errata affected core and if so...
-	moveq	r3, #1 << 1			@   fix LoUIS value
-	beq	start_flush_levels		@   start flushing cache levels
-#endif
-	ret	lr
-ENDPROC(v7_flush_dcache_louis)
-
-/*
- *	v7_flush_dcache_all()
- *
- *	Flush the whole D-cache.
- *
- *	Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
- *
- *	- mm    - mm_struct describing address space
- */
-ENTRY(v7_flush_dcache_all)
-	dmb					@ ensure ordering with previous memory accesses
-	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
-	mov	r3, r0, lsr #23			@ move LoC into position
-	ands	r3, r3, #7 << 1			@ extract LoC*2 from clidr
-	beq	finished			@ if loc is 0, then no need to clean
-start_flush_levels:
-	mov	r10, #0				@ start clean at cache level 0
-flush_levels:
-	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
-	mov	r1, r0, lsr r2			@ extract cache type bits from clidr
-	and	r1, r1, #7			@ mask of the bits for current cache only
-	cmp	r1, #2				@ see what cache we have at this level
-	blt	skip				@ skip if no cache, or just i-cache
-#ifdef CONFIG_PREEMPT
-	save_and_disable_irqs_notrace r9	@ make cssr&csidr read atomic
-#endif
-	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
-	isb					@ isb to sych the new cssr&csidr
-	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
-#ifdef CONFIG_PREEMPT
-	restore_irqs_notrace r9
-#endif
-	and	r2, r1, #7			@ extract the length of the cache lines
-	add	r2, r2, #4			@ add 4 (line length offset)
-	movw	r4, #0x3ff
-	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
-	clz	r5, r4				@ find bit position of way size increment
-	movw	r7, #0x7fff
-	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
-loop1:
-	mov	r9, r7				@ create working copy of max index
-loop2:
- ARM(	orr	r11, r10, r4, lsl r5	)	@ factor way and cache number into r11
- THUMB(	lsl	r6, r4, r5		)
- THUMB(	orr	r11, r10, r6		)	@ factor way and cache number into r11
- ARM(	orr	r11, r11, r9, lsl r2	)	@ factor index number into r11
- THUMB(	lsl	r6, r9, r2		)
- THUMB(	orr	r11, r11, r6		)	@ factor index number into r11
-	mcr	p15, 0, r11, c7, c14, 2		@ clean & invalidate by set/way
-	subs	r9, r9, #1			@ decrement the index
-	bge	loop2
-	subs	r4, r4, #1			@ decrement the way
-	bge	loop1
-skip:
-	add	r10, r10, #2			@ increment cache number
-	cmp	r3, r10
-#ifdef CONFIG_ARM_ERRATA_814220
-	dsb
-#endif
-	bgt	flush_levels
-finished:
-	mov	r10, #0				@ switch back to cache level 0
-	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
-	dsb	st
-	isb
-	ret	lr
-ENDPROC(v7_flush_dcache_all)
-
-/*
- *	v7_flush_cache_all()
- *
- *	Flush the entire cache system.
- *  The data cache flush is now achieved using atomic clean / invalidates
- *  working outwards from L1 cache. This is done using Set/Way based cache
- *  maintenance instructions.
- *  The instruction cache can still be invalidated back to the point of
- *  unification in a single instruction.
- *
- */
-ENTRY(v7_flush_kern_cache_all)
- ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
- THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
-	bl	v7_flush_dcache_all
-	mov	r0, #0
-	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)	@ invalidate I-cache inner shareable
-	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)	@ I+BTB cache invalidate
- ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
- THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
-	ret	lr
-ENDPROC(v7_flush_kern_cache_all)
-
- /*
- *     v7_flush_kern_cache_louis(void)
- *
- *     Flush the data cache up to Level of Unification Inner Shareable.
- *     Invalidate the I-cache to the point of unification.
- */
-ENTRY(v7_flush_kern_cache_louis)
- ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
- THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
-	bl	v7_flush_dcache_louis
-	mov	r0, #0
-	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)	@ invalidate I-cache inner shareable
-	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)	@ I+BTB cache invalidate
- ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
- THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
-	ret	lr
-ENDPROC(v7_flush_kern_cache_louis)
-
-/*
- *	v7_flush_cache_all()
- *
- *	Flush all TLB entries in a particular address space
- *
- *	- mm    - mm_struct describing address space
- */
-ENTRY(v7_flush_user_cache_all)
-	/*FALLTHROUGH*/
-
-/*
- *	v7_flush_cache_range(start, end, flags)
- *
- *	Flush a range of TLB entries in the specified address space.
- *
- *	- start - start address (may not be aligned)
- *	- end   - end address (exclusive, may not be aligned)
- *	- flags	- vm_area_struct flags describing address space
- *
- *	It is assumed that:
- *	- we have a VIPT cache.
- */
-ENTRY(v7_flush_user_cache_range)
-	ret	lr
-ENDPROC(v7_flush_user_cache_all)
-ENDPROC(v7_flush_user_cache_range)
-
-/*
- *	v7_coherent_kern_range(start,end)
- *
- *	Ensure that the I and D caches are coherent within specified
- *	region.  This is typically used when code has been written to
- *	a memory region, and will be executed.
- *
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- *
- *	It is assumed that:
- *	- the Icache does not read data from the write buffer
- */
-ENTRY(v7_coherent_kern_range)
-	/* FALLTHROUGH */
-
-/*
- *	v7_coherent_user_range(start,end)
- *
- *	Ensure that the I and D caches are coherent within specified
- *	region.  This is typically used when code has been written to
- *	a memory region, and will be executed.
- *
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- *
- *	It is assumed that:
- *	- the Icache does not read data from the write buffer
- */
-ENTRY(v7_coherent_user_range)
- UNWIND(.fnstart		)
-	dcache_line_size r2, r3
-	sub	r3, r2, #1
-	bic	r12, r0, r3
-#ifdef CONFIG_ARM_ERRATA_764369
-	ALT_SMP(W(dsb))
-	ALT_UP(W(nop))
-#endif
-1:
- USER(	mcr	p15, 0, r12, c7, c11, 1	)	@ clean D line to the point of unification
-	add	r12, r12, r2
-	cmp	r12, r1
-	blo	1b
-	dsb	ishst
-#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND
-	ldr	r3, =icache_size
-	ldr	r2, [r3, #0]
-#else
-	icache_line_size r2, r3
-#endif
-	sub	r3, r2, #1
-	bic	r12, r0, r3
-2:
- USER(	mcr	p15, 0, r12, c7, c5, 1	)	@ invalidate I line
-	add	r12, r12, r2
-	cmp	r12, r1
-	blo	2b
-	mov	r0, #0
-	ALT_SMP(mcr	p15, 0, r0, c7, c1, 6)	@ invalidate BTB Inner Shareable
-	ALT_UP(mcr	p15, 0, r0, c7, c5, 6)	@ invalidate BTB
-	dsb	ishst
-	isb
-	ret	lr
-
-/*
- * Fault handling for the cache operation above. If the virtual address in r0
- * isn't mapped, fail with -EFAULT.
- */
-9001:
-#ifdef CONFIG_ARM_ERRATA_775420
-	dsb
-#endif
-	mov	r0, #-EFAULT
-	ret	lr
- UNWIND(.fnend		)
-ENDPROC(v7_coherent_kern_range)
-ENDPROC(v7_coherent_user_range)
-
-/*
- *	v7_flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure that the data held in the page kaddr is written back
- *	to the page in question.
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(v7_flush_kern_dcache_area)
-	dcache_line_size r2, r3
-	add	r1, r0, r1
-	sub	r3, r2, #1
-	bic	r0, r0, r3
-#ifdef CONFIG_ARM_ERRATA_764369
-	ALT_SMP(W(dsb))
-	ALT_UP(W(nop))
-#endif
-1:
-	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line / unified line
-	add	r0, r0, r2
-	cmp	r0, r1
-	blo	1b
-	dsb	st
-	ret	lr
-ENDPROC(v7_flush_kern_dcache_area)
-
-/*
- *	v7_dma_inv_range(start,end)
- *
- *	Invalidate the data cache within the specified region; we will
- *	be performing a DMA operation in this region and we want to
- *	purge old data in the cache.
- *
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-v7_dma_inv_range:
-	dcache_line_size r2, r3
-	sub	r3, r2, #1
-	tst	r0, r3
-	bic	r0, r0, r3
-#ifdef CONFIG_ARM_ERRATA_764369
-	ALT_SMP(W(dsb))
-	ALT_UP(W(nop))
-#endif
-	mcrne	p15, 0, r0, c7, c14, 1		@ clean & invalidate D / U line
-	addne	r0, r0, r2
-
-	tst	r1, r3
-	bic	r1, r1, r3
-	mcrne	p15, 0, r1, c7, c14, 1		@ clean & invalidate D / U line
-	cmp	r0, r1
-1:
-	mcrlo	p15, 0, r0, c7, c6, 1		@ invalidate D / U line
-	addlo	r0, r0, r2
-	cmplo	r0, r1
-	blo	1b
-	dsb	st
-	ret	lr
-ENDPROC(v7_dma_inv_range)
-
-/*
- *	v7_dma_clean_range(start,end)
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-v7_dma_clean_range:
-	dcache_line_size r2, r3
-	sub	r3, r2, #1
-	bic	r0, r0, r3
-#ifdef CONFIG_ARM_ERRATA_764369
-	ALT_SMP(W(dsb))
-	ALT_UP(W(nop))
-#endif
-1:
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D / U line
-	add	r0, r0, r2
-	cmp	r0, r1
-	blo	1b
-	dsb	st
-	ret	lr
-ENDPROC(v7_dma_clean_range)
-
-/*
- *	v7_dma_flush_range(start,end)
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-ENTRY(v7_dma_flush_range)
-	dcache_line_size r2, r3
-	sub	r3, r2, #1
-	bic	r0, r0, r3
-#ifdef CONFIG_ARM_ERRATA_764369
-	ALT_SMP(W(dsb))
-	ALT_UP(W(nop))
-#endif
-1:
-	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D / U line
-	add	r0, r0, r2
-	cmp	r0, r1
-	blo	1b
-	dsb	st
-	ret	lr
-ENDPROC(v7_dma_flush_range)
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(v7_dma_map_area)
-	add	r1, r1, r0
-	teq	r2, #DMA_FROM_DEVICE
-	beq	v7_dma_inv_range
-	b	v7_dma_clean_range
-ENDPROC(v7_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(v7_dma_unmap_area)
-	add	r1, r1, r0
-	teq	r2, #DMA_TO_DEVICE
-	bne	v7_dma_inv_range
-	ret	lr
-ENDPROC(v7_dma_unmap_area)
-
-	__INITDATA
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions v7
-
-	/* The Broadcom Brahma-B15 read-ahead cache requires some modifications
-	 * to the v7_cache_fns, we only override the ones we need
-	 */
-#ifndef CONFIG_CACHE_B15_RAC
-	globl_equ	b15_flush_kern_cache_all,	v7_flush_kern_cache_all
-#endif
-	globl_equ	b15_flush_icache_all,		v7_flush_icache_all
-	globl_equ	b15_flush_kern_cache_louis,	v7_flush_kern_cache_louis
-	globl_equ	b15_flush_user_cache_all,	v7_flush_user_cache_all
-	globl_equ	b15_flush_user_cache_range,	v7_flush_user_cache_range
-	globl_equ	b15_coherent_kern_range,	v7_coherent_kern_range
-	globl_equ	b15_coherent_user_range,	v7_coherent_user_range
-	globl_equ	b15_flush_kern_dcache_area,	v7_flush_kern_dcache_area
-
-	globl_equ	b15_dma_map_area,		v7_dma_map_area
-	globl_equ	b15_dma_unmap_area,		v7_dma_unmap_area
-	globl_equ	b15_dma_flush_range,		v7_dma_flush_range
-
-	define_cache_functions b15
diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S
deleted file mode 100644
index a0035c426ce635b3ceacc647d712aabb61d0e63e..0000000000000000000000000000000000000000
--- a/arch/arm/mm/cache-v7m.S
+++ /dev/null
@@ -1,454 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/cache-v7m.S
- *
- *  Based on linux/arch/arm/mm/cache-v7.S
- *
- *  Copyright (C) 2001 Deep Blue Solutions Ltd.
- *  Copyright (C) 2005 ARM Ltd.
- *
- *  This is the "shell" of the ARMv7M processor support.
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-#include <asm/unwind.h>
-#include <asm/v7m.h>
-
-#include "proc-macros.S"
-
-/* Generic V7M read/write macros for memory mapped cache operations */
-.macro v7m_cache_read, rt, reg
-	movw	\rt, #:lower16:BASEADDR_V7M_SCB + \reg
-	movt	\rt, #:upper16:BASEADDR_V7M_SCB + \reg
-	ldr     \rt, [\rt]
-.endm
-
-.macro v7m_cacheop, rt, tmp, op, c = al
-	movw\c	\tmp, #:lower16:BASEADDR_V7M_SCB + \op
-	movt\c	\tmp, #:upper16:BASEADDR_V7M_SCB + \op
-	str\c	\rt, [\tmp]
-.endm
-
-
-.macro	read_ccsidr, rt
-	v7m_cache_read \rt, V7M_SCB_CCSIDR
-.endm
-
-.macro read_clidr, rt
-	v7m_cache_read \rt, V7M_SCB_CLIDR
-.endm
-
-.macro	write_csselr, rt, tmp
-	v7m_cacheop \rt, \tmp, V7M_SCB_CSSELR
-.endm
-
-/*
- * dcisw: Invalidate data cache by set/way
- */
-.macro dcisw, rt, tmp
-	v7m_cacheop \rt, \tmp, V7M_SCB_DCISW
-.endm
-
-/*
- * dccisw: Clean and invalidate data cache by set/way
- */
-.macro dccisw, rt, tmp
-	v7m_cacheop \rt, \tmp, V7M_SCB_DCCISW
-.endm
-
-/*
- * dccimvac: Clean and invalidate data cache line by MVA to PoC.
- */
-.irp    c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
-.macro dccimvac\c, rt, tmp
-	v7m_cacheop \rt, \tmp, V7M_SCB_DCCIMVAC, \c
-.endm
-.endr
-
-/*
- * dcimvac: Invalidate data cache line by MVA to PoC
- */
-.irp    c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
-.macro dcimvac\c, rt, tmp
-	v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC, \c
-.endm
-.endr
-
-/*
- * dccmvau: Clean data cache line by MVA to PoU
- */
-.macro dccmvau, rt, tmp
-	v7m_cacheop \rt, \tmp, V7M_SCB_DCCMVAU
-.endm
-
-/*
- * dccmvac: Clean data cache line by MVA to PoC
- */
-.macro dccmvac,  rt, tmp
-	v7m_cacheop \rt, \tmp, V7M_SCB_DCCMVAC
-.endm
-
-/*
- * icimvau: Invalidate instruction caches by MVA to PoU
- */
-.macro icimvau, rt, tmp
-	v7m_cacheop \rt, \tmp, V7M_SCB_ICIMVAU
-.endm
-
-/*
- * Invalidate the icache, inner shareable if SMP, invalidate BTB for UP.
- * rt data ignored by ICIALLU(IS), so can be used for the address
- */
-.macro invalidate_icache, rt
-	v7m_cacheop \rt, \rt, V7M_SCB_ICIALLU
-	mov \rt, #0
-.endm
-
-/*
- * Invalidate the BTB, inner shareable if SMP.
- * rt data ignored by BPIALL, so it can be used for the address
- */
-.macro invalidate_bp, rt
-	v7m_cacheop \rt, \rt, V7M_SCB_BPIALL
-	mov \rt, #0
-.endm
-
-ENTRY(v7m_invalidate_l1)
-	mov	r0, #0
-
-	write_csselr r0, r1
-	read_ccsidr r0
-
-	movw	r1, #0x7fff
-	and	r2, r1, r0, lsr #13
-
-	movw	r1, #0x3ff
-
-	and	r3, r1, r0, lsr #3      @ NumWays - 1
-	add	r2, r2, #1              @ NumSets
-
-	and	r0, r0, #0x7
-	add	r0, r0, #4      @ SetShift
-
-	clz	r1, r3          @ WayShift
-	add	r4, r3, #1      @ NumWays
-1:	sub	r2, r2, #1      @ NumSets--
-	mov	r3, r4          @ Temp = NumWays
-2:	subs	r3, r3, #1      @ Temp--
-	mov	r5, r3, lsl r1
-	mov	r6, r2, lsl r0
-	orr	r5, r5, r6      @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
-	dcisw	r5, r6
-	bgt	2b
-	cmp	r2, #0
-	bgt	1b
-	dsb	st
-	isb
-	ret	lr
-ENDPROC(v7m_invalidate_l1)
-
-/*
- *	v7m_flush_icache_all()
- *
- *	Flush the whole I-cache.
- *
- *	Registers:
- *	r0 - set to 0
- */
-ENTRY(v7m_flush_icache_all)
-	invalidate_icache r0
-	ret	lr
-ENDPROC(v7m_flush_icache_all)
-
-/*
- *	v7m_flush_dcache_all()
- *
- *	Flush the whole D-cache.
- *
- *	Corrupted registers: r0-r7, r9-r11
- */
-ENTRY(v7m_flush_dcache_all)
-	dmb					@ ensure ordering with previous memory accesses
-	read_clidr r0
-	mov	r3, r0, lsr #23			@ move LoC into position
-	ands	r3, r3, #7 << 1			@ extract LoC*2 from clidr
-	beq	finished			@ if loc is 0, then no need to clean
-start_flush_levels:
-	mov	r10, #0				@ start clean at cache level 0
-flush_levels:
-	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
-	mov	r1, r0, lsr r2			@ extract cache type bits from clidr
-	and	r1, r1, #7			@ mask of the bits for current cache only
-	cmp	r1, #2				@ see what cache we have at this level
-	blt	skip				@ skip if no cache, or just i-cache
-#ifdef CONFIG_PREEMPT
-	save_and_disable_irqs_notrace r9	@ make cssr&csidr read atomic
-#endif
-	write_csselr r10, r1			@ set current cache level
-	isb					@ isb to sych the new cssr&csidr
-	read_ccsidr r1				@ read the new csidr
-#ifdef CONFIG_PREEMPT
-	restore_irqs_notrace r9
-#endif
-	and	r2, r1, #7			@ extract the length of the cache lines
-	add	r2, r2, #4			@ add 4 (line length offset)
-	movw	r4, #0x3ff
-	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
-	clz	r5, r4				@ find bit position of way size increment
-	movw	r7, #0x7fff
-	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
-loop1:
-	mov	r9, r7				@ create working copy of max index
-loop2:
-	lsl	r6, r4, r5
-	orr	r11, r10, r6			@ factor way and cache number into r11
-	lsl	r6, r9, r2
-	orr	r11, r11, r6			@ factor index number into r11
-	dccisw	r11, r6				@ clean/invalidate by set/way
-	subs	r9, r9, #1			@ decrement the index
-	bge	loop2
-	subs	r4, r4, #1			@ decrement the way
-	bge	loop1
-skip:
-	add	r10, r10, #2			@ increment cache number
-	cmp	r3, r10
-	bgt	flush_levels
-finished:
-	mov	r10, #0				@ switch back to cache level 0
-	write_csselr r10, r3			@ select current cache level in cssr
-	dsb	st
-	isb
-	ret	lr
-ENDPROC(v7m_flush_dcache_all)
-
-/*
- *	v7m_flush_cache_all()
- *
- *	Flush the entire cache system.
- *  The data cache flush is now achieved using atomic clean / invalidates
- *  working outwards from L1 cache. This is done using Set/Way based cache
- *  maintenance instructions.
- *  The instruction cache can still be invalidated back to the point of
- *  unification in a single instruction.
- *
- */
-ENTRY(v7m_flush_kern_cache_all)
-	stmfd	sp!, {r4-r7, r9-r11, lr}
-	bl	v7m_flush_dcache_all
-	invalidate_icache r0
-	ldmfd	sp!, {r4-r7, r9-r11, lr}
-	ret	lr
-ENDPROC(v7m_flush_kern_cache_all)
-
-/*
- *	v7m_flush_cache_all()
- *
- *	Flush all TLB entries in a particular address space
- *
- *	- mm    - mm_struct describing address space
- */
-ENTRY(v7m_flush_user_cache_all)
-	/*FALLTHROUGH*/
-
-/*
- *	v7m_flush_cache_range(start, end, flags)
- *
- *	Flush a range of TLB entries in the specified address space.
- *
- *	- start - start address (may not be aligned)
- *	- end   - end address (exclusive, may not be aligned)
- *	- flags	- vm_area_struct flags describing address space
- *
- *	It is assumed that:
- *	- we have a VIPT cache.
- */
-ENTRY(v7m_flush_user_cache_range)
-	ret	lr
-ENDPROC(v7m_flush_user_cache_all)
-ENDPROC(v7m_flush_user_cache_range)
-
-/*
- *	v7m_coherent_kern_range(start,end)
- *
- *	Ensure that the I and D caches are coherent within specified
- *	region.  This is typically used when code has been written to
- *	a memory region, and will be executed.
- *
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- *
- *	It is assumed that:
- *	- the Icache does not read data from the write buffer
- */
-ENTRY(v7m_coherent_kern_range)
-	/* FALLTHROUGH */
-
-/*
- *	v7m_coherent_user_range(start,end)
- *
- *	Ensure that the I and D caches are coherent within specified
- *	region.  This is typically used when code has been written to
- *	a memory region, and will be executed.
- *
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- *
- *	It is assumed that:
- *	- the Icache does not read data from the write buffer
- */
-ENTRY(v7m_coherent_user_range)
- UNWIND(.fnstart		)
-	dcache_line_size r2, r3
-	sub	r3, r2, #1
-	bic	r12, r0, r3
-1:
-/*
- * We use open coded version of dccmvau otherwise USER() would
- * point at movw instruction.
- */
-	dccmvau	r12, r3
-	add	r12, r12, r2
-	cmp	r12, r1
-	blo	1b
-	dsb	ishst
-	icache_line_size r2, r3
-	sub	r3, r2, #1
-	bic	r12, r0, r3
-2:
-	icimvau r12, r3
-	add	r12, r12, r2
-	cmp	r12, r1
-	blo	2b
-	invalidate_bp r0
-	dsb	ishst
-	isb
-	ret	lr
- UNWIND(.fnend		)
-ENDPROC(v7m_coherent_kern_range)
-ENDPROC(v7m_coherent_user_range)
-
-/*
- *	v7m_flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure that the data held in the page kaddr is written back
- *	to the page in question.
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(v7m_flush_kern_dcache_area)
-	dcache_line_size r2, r3
-	add	r1, r0, r1
-	sub	r3, r2, #1
-	bic	r0, r0, r3
-1:
-	dccimvac r0, r3		@ clean & invalidate D line / unified line
-	add	r0, r0, r2
-	cmp	r0, r1
-	blo	1b
-	dsb	st
-	ret	lr
-ENDPROC(v7m_flush_kern_dcache_area)
-
-/*
- *	v7m_dma_inv_range(start,end)
- *
- *	Invalidate the data cache within the specified region; we will
- *	be performing a DMA operation in this region and we want to
- *	purge old data in the cache.
- *
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-v7m_dma_inv_range:
-	dcache_line_size r2, r3
-	sub	r3, r2, #1
-	tst	r0, r3
-	bic	r0, r0, r3
-	dccimvacne r0, r3
-	addne	r0, r0, r2
-	subne	r3, r2, #1	@ restore r3, corrupted by v7m's dccimvac
-	tst	r1, r3
-	bic	r1, r1, r3
-	dccimvacne r1, r3
-	cmp	r0, r1
-1:
-	dcimvaclo r0, r3
-	addlo	r0, r0, r2
-	cmplo	r0, r1
-	blo	1b
-	dsb	st
-	ret	lr
-ENDPROC(v7m_dma_inv_range)
-
-/*
- *	v7m_dma_clean_range(start,end)
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-v7m_dma_clean_range:
-	dcache_line_size r2, r3
-	sub	r3, r2, #1
-	bic	r0, r0, r3
-1:
-	dccmvac r0, r3			@ clean D / U line
-	add	r0, r0, r2
-	cmp	r0, r1
-	blo	1b
-	dsb	st
-	ret	lr
-ENDPROC(v7m_dma_clean_range)
-
-/*
- *	v7m_dma_flush_range(start,end)
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-ENTRY(v7m_dma_flush_range)
-	dcache_line_size r2, r3
-	sub	r3, r2, #1
-	bic	r0, r0, r3
-1:
-	dccimvac r0, r3			 @ clean & invalidate D / U line
-	add	r0, r0, r2
-	cmp	r0, r1
-	blo	1b
-	dsb	st
-	ret	lr
-ENDPROC(v7m_dma_flush_range)
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(v7m_dma_map_area)
-	add	r1, r1, r0
-	teq	r2, #DMA_FROM_DEVICE
-	beq	v7m_dma_inv_range
-	b	v7m_dma_clean_range
-ENDPROC(v7m_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(v7m_dma_unmap_area)
-	add	r1, r1, r0
-	teq	r2, #DMA_TO_DEVICE
-	bne	v7m_dma_inv_range
-	ret	lr
-ENDPROC(v7m_dma_unmap_area)
-
-	.globl	v7m_flush_kern_cache_louis
-	.equ	v7m_flush_kern_cache_louis, v7m_flush_kern_cache_all
-
-	__INITDATA
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions v7m
diff --git a/arch/arm/mm/l2c-l2x0-resume.S b/arch/arm/mm/l2c-l2x0-resume.S
deleted file mode 100644
index fc01f1b18523653a4378421b6f89dd877bbb53fd..0000000000000000000000000000000000000000
--- a/arch/arm/mm/l2c-l2x0-resume.S
+++ /dev/null
@@ -1,60 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * L2C-310 early resume code.  This can be used by platforms to restore
- * the settings of their L2 cache controller before restoring the
- * processor state.
- *
- * This code can only be used to if you are running in the secure world.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/hardware/cache-l2x0.h>
-
-	.text
-
-ENTRY(l2c310_early_resume)
-	adr	r0, 1f
-	ldr	r2, [r0]
-	add	r0, r2, r0
-
-	ldmia	r0, {r1, r2, r3, r4, r5, r6, r7, r8}
-	@ r1 = phys address of L2C-310 controller
-	@ r2 = aux_ctrl
-	@ r3 = tag_latency
-	@ r4 = data_latency
-	@ r5 = filter_start
-	@ r6 = filter_end
-	@ r7 = prefetch_ctrl
-	@ r8 = pwr_ctrl
-
-	@ Check that the address has been initialised
-	teq	r1, #0
-	reteq	lr
-
-	@ The prefetch and power control registers are revision dependent
-	@ and can be written whether or not the L2 cache is enabled
-	ldr	r0, [r1, #L2X0_CACHE_ID]
-	and	r0, r0, #L2X0_CACHE_ID_RTL_MASK
-	cmp	r0, #L310_CACHE_ID_RTL_R2P0
-	strcs	r7, [r1, #L310_PREFETCH_CTRL]
-	cmp	r0, #L310_CACHE_ID_RTL_R3P0
-	strcs	r8, [r1, #L310_POWER_CTRL]
-
-	@ Don't setup the L2 cache if it is already enabled
-	ldr	r0, [r1, #L2X0_CTRL]
-	tst	r0, #L2X0_CTRL_EN
-	retne	lr
-
-	str	r3, [r1, #L310_TAG_LATENCY_CTRL]
-	str	r4, [r1, #L310_DATA_LATENCY_CTRL]
-	str	r6, [r1, #L310_ADDR_FILTER_END]
-	str	r5, [r1, #L310_ADDR_FILTER_START]
-
-	str	r2, [r1, #L2X0_AUX_CTRL]
-	mov	r9, #L2X0_CTRL_EN
-	str	r9, [r1, #L2X0_CTRL]
-	ret	lr
-ENDPROC(l2c310_early_resume)
-
-	.align
-1:	.long	l2x0_saved_regs - .
diff --git a/arch/arm/mm/pabort-legacy.S b/arch/arm/mm/pabort-legacy.S
deleted file mode 100644
index b2ffce4201062e3ec2045364ddc454cf706bab8d..0000000000000000000000000000000000000000
--- a/arch/arm/mm/pabort-legacy.S
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * Function: legacy_pabort
- *
- * Params  : r2 = pt_regs
- *	   : r4 = address of aborted instruction
- *	   : r5 = psr for parent context
- *
- * Returns : r4 - r11, r13 preserved
- *
- * Purpose : obtain information about current prefetch abort.
- */
-
-	.align	5
-ENTRY(legacy_pabort)
-	mov	r0, r4
-	mov	r1, #5
-	b	do_PrefetchAbort
-ENDPROC(legacy_pabort)
diff --git a/arch/arm/mm/pabort-v6.S b/arch/arm/mm/pabort-v6.S
deleted file mode 100644
index 8686265dc9418b29381942bfd87a937a3234d46e..0000000000000000000000000000000000000000
--- a/arch/arm/mm/pabort-v6.S
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * Function: v6_pabort
- *
- * Params  : r2 = pt_regs
- *	   : r4 = address of aborted instruction
- *	   : r5 = psr for parent context
- *
- * Returns : r4 - r11, r13 preserved
- *
- * Purpose : obtain information about current prefetch abort.
- */
-
-	.align	5
-ENTRY(v6_pabort)
-	mov	r0, r4
-	mrc	p15, 0, r1, c5, c0, 1		@ get IFSR
-	b	do_PrefetchAbort
-ENDPROC(v6_pabort)
diff --git a/arch/arm/mm/pabort-v7.S b/arch/arm/mm/pabort-v7.S
deleted file mode 100644
index 9c70b1a21dc9204f24524df9905fbc077a82f2dc..0000000000000000000000000000000000000000
--- a/arch/arm/mm/pabort-v7.S
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * Function: v7_pabort
- *
- * Params  : r2 = pt_regs
- *	   : r4 = address of aborted instruction
- *	   : r5 = psr for parent context
- *
- * Returns : r4 - r11, r13 preserved
- *
- * Purpose : obtain information about current prefetch abort.
- */
-
-	.align	5
-ENTRY(v7_pabort)
-	mrc	p15, 0, r0, c6, c0, 2		@ get IFAR
-	mrc	p15, 0, r1, c5, c0, 1		@ get IFSR
-	b	do_PrefetchAbort
-ENDPROC(v7_pabort)
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
deleted file mode 100644
index 4fa5371bc6624ce63be9963edd268280662d1566..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-arm1020.S
+++ /dev/null
@@ -1,515 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  linux/arch/arm/mm/proc-arm1020.S: MMU functions for ARM1020
- *
- *  Copyright (C) 2000 ARM Limited
- *  Copyright (C) 2000 Deep Blue Solutions Ltd.
- *  hacked for non-paged-MM by Hyok S. Choi, 2003.
- *
- * These are the low level assembler for performing cache and TLB
- * functions on the arm1020.
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/ptrace.h>
-
-#include "proc-macros.S"
-
-/*
- * This is the maximum size of an area which will be invalidated
- * using the single invalidate entry instructions.  Anything larger
- * than this, and we go for the whole cache.
- *
- * This value should be chosen such that we choose the cheapest
- * alternative.
- */
-#define MAX_AREA_SIZE	32768
-
-/*
- * The size of one data cache line.
- */
-#define CACHE_DLINESIZE	32
-
-/*
- * The number of data cache segments.
- */
-#define CACHE_DSEGMENTS	16
-
-/*
- * The number of lines in a cache segment.
- */
-#define CACHE_DENTRIES	64
-
-/*
- * This is the size at which it becomes more efficient to
- * clean the whole cache, rather than using the individual
- * cache line maintenance instructions.
- */
-#define CACHE_DLIMIT	32768
-
-	.text
-/*
- * cpu_arm1020_proc_init()
- */
-ENTRY(cpu_arm1020_proc_init)
-	ret	lr
-
-/*
- * cpu_arm1020_proc_fin()
- */
-ENTRY(cpu_arm1020_proc_fin)
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x1000 		@ ...i............
-	bic	r0, r0, #0x000e 		@ ............wca.
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- * cpu_arm1020_reset(loc)
- *
- * Perform a soft reset of the system.	Put the CPU into the
- * same state as it would be if it had been reset, and branch
- * to what would be the reset vector.
- *
- * loc: location to jump to for soft reset
- */
-	.align	5
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_arm1020_reset)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-#ifdef CONFIG_MMU
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
-	bic	ip, ip, #0x000f 		@ ............wcam
-	bic	ip, ip, #0x1100 		@ ...i...s........
-	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	ret	r0
-ENDPROC(cpu_arm1020_reset)
-	.popsection
-
-/*
- * cpu_arm1020_do_idle()
- */
-	.align	5
-ENTRY(cpu_arm1020_do_idle)
-	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	ret	lr
-
-/* ================================= CACHE ================================ */
-
-	.align	5
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(arm1020_flush_icache_all)
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-#endif
-	ret	lr
-ENDPROC(arm1020_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- *
- *	Invalidate all cache entries in a particular address
- *	space.
- */
-ENTRY(arm1020_flush_user_cache_all)
-	/* FALLTHROUGH */
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(arm1020_flush_kern_cache_all)
-	mov	r2, #VM_EXEC
-	mov	ip, #0
-__flush_whole_cache:
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 16 segments
-1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
-2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	subs	r3, r3, #1 << 26
-	bcs	2b				@ entries 63 to 0
-	subs	r1, r1, #1 << 5
-	bcs	1b				@ segments 15 to 0
-#endif
-	tst	r2, #VM_EXEC
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-#endif
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	flush_user_cache_range(start, end, flags)
- *
- *	Invalidate a range of cache entries in the specified
- *	address space.
- *
- *	- start	- start address (inclusive)
- *	- end	- end address (exclusive)
- *	- flags	- vm_flags for this space
- */
-ENTRY(arm1020_flush_user_cache_range)
-	mov	ip, #0
-	sub	r3, r1, r0			@ calculate total size
-	cmp	r3, #CACHE_DLIMIT
-	bhs	__flush_whole_cache
-
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mcr	p15, 0, ip, c7, c10, 4
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	tst	r2, #VM_EXEC
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-#endif
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm1020_coherent_kern_range)
-	/* FALLTRHOUGH */
-
-/*
- *	coherent_user_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm1020_coherent_user_range)
-	mov	ip, #0
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-	mcr	p15, 0, ip, c7, c10, 4
-1:
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-#endif
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-#endif
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	r0, #0
-	ret	lr
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure no D cache aliasing occurs, either with itself or
- *	the I cache
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(arm1020_flush_kern_dcache_area)
-	mov	ip, #0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	add	r1, r0, r1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_inv_range(start, end)
- *
- *	Invalidate (discard) the specified virtual address range.
- *	May not write back any entries.  If 'start' or 'end'
- *	are not cache line aligned, those lines must be written
- *	back.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-arm1020_dma_inv_range:
-	mov	ip, #0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	tst	r0, #CACHE_DLINESIZE - 1
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, ip, c7, c10, 4
-	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	tst	r1, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, ip, c7, c10, 4
-	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_clean_range(start, end)
- *
- *	Clean the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-arm1020_dma_clean_range:
-	mov	ip, #0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_flush_range(start, end)
- *
- *	Clean and invalidate the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm1020_dma_flush_range)
-	mov	ip, #0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-	mcr	p15, 0, ip, c7, c10, 4
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm1020_dma_map_area)
-	add	r1, r1, r0
-	cmp	r2, #DMA_TO_DEVICE
-	beq	arm1020_dma_clean_range
-	bcs	arm1020_dma_inv_range
-	b	arm1020_dma_flush_range
-ENDPROC(arm1020_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm1020_dma_unmap_area)
-	ret	lr
-ENDPROC(arm1020_dma_unmap_area)
-
-	.globl	arm1020_flush_kern_cache_louis
-	.equ	arm1020_flush_kern_cache_louis, arm1020_flush_kern_cache_all
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions arm1020
-
-	.align	5
-ENTRY(cpu_arm1020_dcache_clean_area)
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mov	ip, #0
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	add	r0, r0, #CACHE_DLINESIZE
-	subs	r1, r1, #CACHE_DLINESIZE
-	bhi	1b
-#endif
-	ret	lr
-
-/* =============================== PageTable ============================== */
-
-/*
- * cpu_arm1020_switch_mm(pgd)
- *
- * Set the translation base pointer to be as described by pgd.
- *
- * pgd: new page tables
- */
-	.align	5
-ENTRY(cpu_arm1020_switch_mm)
-#ifdef CONFIG_MMU
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mcr	p15, 0, r3, c7, c10, 4
-	mov	r1, #0xF			@ 16 segments
-1:	mov	r3, #0x3F			@ 64 entries
-2:	mov	ip, r3, LSL #26 		@ shift up entry
-	orr	ip, ip, r1, LSL #5		@ shift in/up index
-	mcr	p15, 0, ip, c7, c14, 2		@ Clean & Inval DCache entry
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c10, 4
-	subs	r3, r3, #1
-	cmp	r3, #0
-	bge	2b				@ entries 3F to 0
-	subs	r1, r1, #1
-	cmp	r1, #0
-	bge	1b				@ segments 15 to 0
-
-#endif
-	mov	r1, #0
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mcr	p15, 0, r1, c7, c5, 0		@ invalidate I cache
-#endif
-	mcr	p15, 0, r1, c7, c10, 4		@ drain WB
-	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
-	mcr	p15, 0, r1, c8, c7, 0		@ invalidate I & D TLBs
-#endif /* CONFIG_MMU */
-	ret	lr
-        
-/*
- * cpu_arm1020_set_pte(ptep, pte)
- *
- * Set a PTE and flush it out
- */
-	.align	5
-ENTRY(cpu_arm1020_set_pte_ext)
-#ifdef CONFIG_MMU
-	armv3_set_pte_ext
-	mov	r0, r0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mcr	p15, 0, r0, c7, c10, 4
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-#endif
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-#endif /* CONFIG_MMU */
-	ret	lr
-
-	.type	__arm1020_setup, #function
-__arm1020_setup:
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
-#ifdef CONFIG_MMU
-	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
-#endif
-
-	adr	r5, arm1020_crval
-	ldmia	r5, {r5, r6}
-	mrc	p15, 0, r0, c1, c0		@ get control register v4
-	bic	r0, r0, r5
-	orr	r0, r0, r6
-#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
-	orr	r0, r0, #0x4000 		@ .R.. .... .... ....
-#endif
-	ret	lr
-	.size	__arm1020_setup, . - __arm1020_setup
-
-	/*
-	 *  R
-	 * .RVI ZFRS BLDP WCAM
-	 * .011 1001 ..11 0101
-	 */
-	.type	arm1020_crval, #object
-arm1020_crval:
-	crval	clear=0x0000593f, mmuset=0x00003935, ucset=0x00001930
-
-	__INITDATA
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions arm1020, dabort=v4t_early_abort, pabort=legacy_pabort
-
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv5t"
-	string	cpu_elf_name, "v5"
-
-	.type	cpu_arm1020_name, #object
-cpu_arm1020_name:
-	.ascii	"ARM1020"
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	.ascii	"i"
-#endif
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	.ascii	"d"
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	.ascii	"(wt)"
-#else
-	.ascii	"(wb)"
-#endif
-#endif
-#ifndef CONFIG_CPU_BPREDICT_DISABLE
-	.ascii	"B"
-#endif
-#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
-	.ascii	"RR"
-#endif
-	.ascii	"\0"
-	.size	cpu_arm1020_name, . - cpu_arm1020_name
-
-	.align
-
-	.section ".proc.info.init", #alloc
-
-	.type	__arm1020_proc_info,#object
-__arm1020_proc_info:
-	.long	0x4104a200			@ ARM 1020T (Architecture v5T)
-	.long	0xff0ffff0
-	.long   PMD_TYPE_SECT | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	.long   PMD_TYPE_SECT | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	initfn	__arm1020_setup, __arm1020_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
-	.long	cpu_arm1020_name
-	.long	arm1020_processor_functions
-	.long	v4wbi_tlb_fns
-	.long	v4wb_user_fns
-	.long	arm1020_cache_fns
-	.size	__arm1020_proc_info, . - __arm1020_proc_info
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
deleted file mode 100644
index 5d8a8339e09a4ea7c90093f007a37a3f54bba99e..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-arm1020e.S
+++ /dev/null
@@ -1,475 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  linux/arch/arm/mm/proc-arm1020e.S: MMU functions for ARM1020
- *
- *  Copyright (C) 2000 ARM Limited
- *  Copyright (C) 2000 Deep Blue Solutions Ltd.
- *  hacked for non-paged-MM by Hyok S. Choi, 2003.
- *
- * These are the low level assembler for performing cache and TLB
- * functions on the arm1020e.
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/ptrace.h>
-
-#include "proc-macros.S"
-
-/*
- * This is the maximum size of an area which will be invalidated
- * using the single invalidate entry instructions.  Anything larger
- * than this, and we go for the whole cache.
- *
- * This value should be chosen such that we choose the cheapest
- * alternative.
- */
-#define MAX_AREA_SIZE	32768
-
-/*
- * The size of one data cache line.
- */
-#define CACHE_DLINESIZE	32
-
-/*
- * The number of data cache segments.
- */
-#define CACHE_DSEGMENTS	16
-
-/*
- * The number of lines in a cache segment.
- */
-#define CACHE_DENTRIES	64
-
-/*
- * This is the size at which it becomes more efficient to
- * clean the whole cache, rather than using the individual
- * cache line maintenance instructions.
- */
-#define CACHE_DLIMIT	32768
-
-	.text
-/*
- * cpu_arm1020e_proc_init()
- */
-ENTRY(cpu_arm1020e_proc_init)
-	ret	lr
-
-/*
- * cpu_arm1020e_proc_fin()
- */
-ENTRY(cpu_arm1020e_proc_fin)
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x1000 		@ ...i............
-	bic	r0, r0, #0x000e 		@ ............wca.
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- * cpu_arm1020e_reset(loc)
- *
- * Perform a soft reset of the system.	Put the CPU into the
- * same state as it would be if it had been reset, and branch
- * to what would be the reset vector.
- *
- * loc: location to jump to for soft reset
- */
-	.align	5
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_arm1020e_reset)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-#ifdef CONFIG_MMU
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
-	bic	ip, ip, #0x000f 		@ ............wcam
-	bic	ip, ip, #0x1100 		@ ...i...s........
-	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	ret	r0
-ENDPROC(cpu_arm1020e_reset)
-	.popsection
-
-/*
- * cpu_arm1020e_do_idle()
- */
-	.align	5
-ENTRY(cpu_arm1020e_do_idle)
-	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	ret	lr
-
-/* ================================= CACHE ================================ */
-
-	.align	5
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(arm1020e_flush_icache_all)
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-#endif
-	ret	lr
-ENDPROC(arm1020e_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- *
- *	Invalidate all cache entries in a particular address
- *	space.
- */
-ENTRY(arm1020e_flush_user_cache_all)
-	/* FALLTHROUGH */
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(arm1020e_flush_kern_cache_all)
-	mov	r2, #VM_EXEC
-	mov	ip, #0
-__flush_whole_cache:
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 16 segments
-1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
-2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
-	subs	r3, r3, #1 << 26
-	bcs	2b				@ entries 63 to 0
-	subs	r1, r1, #1 << 5
-	bcs	1b				@ segments 15 to 0
-#endif
-	tst	r2, #VM_EXEC
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-#endif
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	flush_user_cache_range(start, end, flags)
- *
- *	Invalidate a range of cache entries in the specified
- *	address space.
- *
- *	- start	- start address (inclusive)
- *	- end	- end address (exclusive)
- *	- flags	- vm_flags for this space
- */
-ENTRY(arm1020e_flush_user_cache_range)
-	mov	ip, #0
-	sub	r3, r1, r0			@ calculate total size
-	cmp	r3, #CACHE_DLIMIT
-	bhs	__flush_whole_cache
-
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	tst	r2, #VM_EXEC
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-#endif
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm1020e_coherent_kern_range)
-	/* FALLTHROUGH */
-/*
- *	coherent_user_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm1020e_coherent_user_range)
-	mov	ip, #0
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-#endif
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-#endif
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	r0, #0
-	ret	lr
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure no D cache aliasing occurs, either with itself or
- *	the I cache
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(arm1020e_flush_kern_dcache_area)
-	mov	ip, #0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	add	r1, r0, r1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_inv_range(start, end)
- *
- *	Invalidate (discard) the specified virtual address range.
- *	May not write back any entries.  If 'start' or 'end'
- *	are not cache line aligned, those lines must be written
- *	back.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-arm1020e_dma_inv_range:
-	mov	ip, #0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	tst	r0, #CACHE_DLINESIZE - 1
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
-	tst	r1, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
-1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_clean_range(start, end)
- *
- *	Clean the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-arm1020e_dma_clean_range:
-	mov	ip, #0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_flush_range(start, end)
- *
- *	Clean and invalidate the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm1020e_dma_flush_range)
-	mov	ip, #0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm1020e_dma_map_area)
-	add	r1, r1, r0
-	cmp	r2, #DMA_TO_DEVICE
-	beq	arm1020e_dma_clean_range
-	bcs	arm1020e_dma_inv_range
-	b	arm1020e_dma_flush_range
-ENDPROC(arm1020e_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm1020e_dma_unmap_area)
-	ret	lr
-ENDPROC(arm1020e_dma_unmap_area)
-
-	.globl	arm1020e_flush_kern_cache_louis
-	.equ	arm1020e_flush_kern_cache_louis, arm1020e_flush_kern_cache_all
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions arm1020e
-
-	.align	5
-ENTRY(cpu_arm1020e_dcache_clean_area)
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mov	ip, #0
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	subs	r1, r1, #CACHE_DLINESIZE
-	bhi	1b
-#endif
-	ret	lr
-
-/* =============================== PageTable ============================== */
-
-/*
- * cpu_arm1020e_switch_mm(pgd)
- *
- * Set the translation base pointer to be as described by pgd.
- *
- * pgd: new page tables
- */
-	.align	5
-ENTRY(cpu_arm1020e_switch_mm)
-#ifdef CONFIG_MMU
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mcr	p15, 0, r3, c7, c10, 4
-	mov	r1, #0xF			@ 16 segments
-1:	mov	r3, #0x3F			@ 64 entries
-2:	mov	ip, r3, LSL #26 		@ shift up entry
-	orr	ip, ip, r1, LSL #5		@ shift in/up index
-	mcr	p15, 0, ip, c7, c14, 2		@ Clean & Inval DCache entry
-	mov	ip, #0
-	subs	r3, r3, #1
-	cmp	r3, #0
-	bge	2b				@ entries 3F to 0
-	subs	r1, r1, #1
-	cmp	r1, #0
-	bge	1b				@ segments 15 to 0
-
-#endif
-	mov	r1, #0
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mcr	p15, 0, r1, c7, c5, 0		@ invalidate I cache
-#endif
-	mcr	p15, 0, r1, c7, c10, 4		@ drain WB
-	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
-	mcr	p15, 0, r1, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	ret	lr
-        
-/*
- * cpu_arm1020e_set_pte(ptep, pte)
- *
- * Set a PTE and flush it out
- */
-	.align	5
-ENTRY(cpu_arm1020e_set_pte_ext)
-#ifdef CONFIG_MMU
-	armv3_set_pte_ext
-	mov	r0, r0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-#endif
-#endif /* CONFIG_MMU */
-	ret	lr
-
-	.type	__arm1020e_setup, #function
-__arm1020e_setup:
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
-#ifdef CONFIG_MMU
-	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
-#endif
-	adr	r5, arm1020e_crval
-	ldmia	r5, {r5, r6}
-	mrc	p15, 0, r0, c1, c0		@ get control register v4
-	bic	r0, r0, r5
-	orr	r0, r0, r6
-#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
-	orr	r0, r0, #0x4000 		@ .R.. .... .... ....
-#endif
-	ret	lr
-	.size	__arm1020e_setup, . - __arm1020e_setup
-
-	/*
-	 *  R
-	 * .RVI ZFRS BLDP WCAM
-	 * .011 1001 ..11 0101
-	 */
-	.type	arm1020e_crval, #object
-arm1020e_crval:
-	crval	clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001930
-
-	__INITDATA
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions arm1020e, dabort=v4t_early_abort, pabort=legacy_pabort
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv5te"
-	string	cpu_elf_name, "v5"
-	string	cpu_arm1020e_name, "ARM1020E"
-
-	.align
-
-	.section ".proc.info.init", #alloc
-
-	.type	__arm1020e_proc_info,#object
-__arm1020e_proc_info:
-	.long	0x4105a200			@ ARM 1020TE (Architecture v5TE)
-	.long	0xff0ffff0
-	.long   PMD_TYPE_SECT | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	.long   PMD_TYPE_SECT | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	initfn	__arm1020e_setup, __arm1020e_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP
-	.long	cpu_arm1020e_name
-	.long	arm1020e_processor_functions
-	.long	v4wbi_tlb_fns
-	.long	v4wb_user_fns
-	.long	arm1020e_cache_fns
-	.size	__arm1020e_proc_info, . - __arm1020e_proc_info
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
deleted file mode 100644
index b3dd95c345e482f20ac898f1610bdcb2ceb6a815..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-arm1022.S
+++ /dev/null
@@ -1,469 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  linux/arch/arm/mm/proc-arm1022.S: MMU functions for ARM1022E
- *
- *  Copyright (C) 2000 ARM Limited
- *  Copyright (C) 2000 Deep Blue Solutions Ltd.
- *  hacked for non-paged-MM by Hyok S. Choi, 2003.
- *
- * These are the low level assembler for performing cache and TLB
- * functions on the ARM1022E.
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/ptrace.h>
-
-#include "proc-macros.S"
-
-/*
- * This is the maximum size of an area which will be invalidated
- * using the single invalidate entry instructions.  Anything larger
- * than this, and we go for the whole cache.
- *
- * This value should be chosen such that we choose the cheapest
- * alternative.
- */
-#define MAX_AREA_SIZE	32768
-
-/*
- * The size of one data cache line.
- */
-#define CACHE_DLINESIZE	32
-
-/*
- * The number of data cache segments.
- */
-#define CACHE_DSEGMENTS	16
-
-/*
- * The number of lines in a cache segment.
- */
-#define CACHE_DENTRIES	64
-
-/*
- * This is the size at which it becomes more efficient to
- * clean the whole cache, rather than using the individual
- * cache line maintenance instructions.
- */
-#define CACHE_DLIMIT	32768
-
-	.text
-/*
- * cpu_arm1022_proc_init()
- */
-ENTRY(cpu_arm1022_proc_init)
-	ret	lr
-
-/*
- * cpu_arm1022_proc_fin()
- */
-ENTRY(cpu_arm1022_proc_fin)
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x1000 		@ ...i............
-	bic	r0, r0, #0x000e 		@ ............wca.
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- * cpu_arm1022_reset(loc)
- *
- * Perform a soft reset of the system.	Put the CPU into the
- * same state as it would be if it had been reset, and branch
- * to what would be the reset vector.
- *
- * loc: location to jump to for soft reset
- */
-	.align	5
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_arm1022_reset)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-#ifdef CONFIG_MMU
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
-	bic	ip, ip, #0x000f 		@ ............wcam
-	bic	ip, ip, #0x1100 		@ ...i...s........
-	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	ret	r0
-ENDPROC(cpu_arm1022_reset)
-	.popsection
-
-/*
- * cpu_arm1022_do_idle()
- */
-	.align	5
-ENTRY(cpu_arm1022_do_idle)
-	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	ret	lr
-
-/* ================================= CACHE ================================ */
-
-	.align	5
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(arm1022_flush_icache_all)
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-#endif
-	ret	lr
-ENDPROC(arm1022_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- *
- *	Invalidate all cache entries in a particular address
- *	space.
- */
-ENTRY(arm1022_flush_user_cache_all)
-	/* FALLTHROUGH */
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(arm1022_flush_kern_cache_all)
-	mov	r2, #VM_EXEC
-	mov	ip, #0
-__flush_whole_cache:
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 16 segments
-1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
-2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
-	subs	r3, r3, #1 << 26
-	bcs	2b				@ entries 63 to 0
-	subs	r1, r1, #1 << 5
-	bcs	1b				@ segments 15 to 0
-#endif
-	tst	r2, #VM_EXEC
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-#endif
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	flush_user_cache_range(start, end, flags)
- *
- *	Invalidate a range of cache entries in the specified
- *	address space.
- *
- *	- start	- start address (inclusive)
- *	- end	- end address (exclusive)
- *	- flags	- vm_flags for this space
- */
-ENTRY(arm1022_flush_user_cache_range)
-	mov	ip, #0
-	sub	r3, r1, r0			@ calculate total size
-	cmp	r3, #CACHE_DLIMIT
-	bhs	__flush_whole_cache
-
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	tst	r2, #VM_EXEC
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-#endif
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm1022_coherent_kern_range)
-	/* FALLTHROUGH */
-
-/*
- *	coherent_user_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm1022_coherent_user_range)
-	mov	ip, #0
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-#endif
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-#endif
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	r0, #0
-	ret	lr
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure no D cache aliasing occurs, either with itself or
- *	the I cache
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(arm1022_flush_kern_dcache_area)
-	mov	ip, #0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	add	r1, r0, r1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_inv_range(start, end)
- *
- *	Invalidate (discard) the specified virtual address range.
- *	May not write back any entries.  If 'start' or 'end'
- *	are not cache line aligned, those lines must be written
- *	back.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-arm1022_dma_inv_range:
-	mov	ip, #0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	tst	r0, #CACHE_DLINESIZE - 1
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
-	tst	r1, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
-1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_clean_range(start, end)
- *
- *	Clean the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-arm1022_dma_clean_range:
-	mov	ip, #0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_flush_range(start, end)
- *
- *	Clean and invalidate the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm1022_dma_flush_range)
-	mov	ip, #0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm1022_dma_map_area)
-	add	r1, r1, r0
-	cmp	r2, #DMA_TO_DEVICE
-	beq	arm1022_dma_clean_range
-	bcs	arm1022_dma_inv_range
-	b	arm1022_dma_flush_range
-ENDPROC(arm1022_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm1022_dma_unmap_area)
-	ret	lr
-ENDPROC(arm1022_dma_unmap_area)
-
-	.globl	arm1022_flush_kern_cache_louis
-	.equ	arm1022_flush_kern_cache_louis, arm1022_flush_kern_cache_all
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions arm1022
-
-	.align	5
-ENTRY(cpu_arm1022_dcache_clean_area)
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mov	ip, #0
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	subs	r1, r1, #CACHE_DLINESIZE
-	bhi	1b
-#endif
-	ret	lr
-
-/* =============================== PageTable ============================== */
-
-/*
- * cpu_arm1022_switch_mm(pgd)
- *
- * Set the translation base pointer to be as described by pgd.
- *
- * pgd: new page tables
- */
-	.align	5
-ENTRY(cpu_arm1022_switch_mm)
-#ifdef CONFIG_MMU
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 16 segments
-1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
-2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
-	subs	r3, r3, #1 << 26
-	bcs	2b				@ entries 63 to 0
-	subs	r1, r1, #1 << 5
-	bcs	1b				@ segments 15 to 0
-#endif
-	mov	r1, #0
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mcr	p15, 0, r1, c7, c5, 0		@ invalidate I cache
-#endif
-	mcr	p15, 0, r1, c7, c10, 4		@ drain WB
-	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
-	mcr	p15, 0, r1, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	ret	lr
-        
-/*
- * cpu_arm1022_set_pte_ext(ptep, pte, ext)
- *
- * Set a PTE and flush it out
- */
-	.align	5
-ENTRY(cpu_arm1022_set_pte_ext)
-#ifdef CONFIG_MMU
-	armv3_set_pte_ext
-	mov	r0, r0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-#endif
-#endif /* CONFIG_MMU */
-	ret	lr
-
-	.type	__arm1022_setup, #function
-__arm1022_setup:
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
-#ifdef CONFIG_MMU
-	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
-#endif
-	adr	r5, arm1022_crval
-	ldmia	r5, {r5, r6}
-	mrc	p15, 0, r0, c1, c0		@ get control register v4
-	bic	r0, r0, r5
-	orr	r0, r0, r6
-#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
-	orr	r0, r0, #0x4000 		@ .R..............
-#endif
-	ret	lr
-	.size	__arm1022_setup, . - __arm1022_setup
-
-	/*
-	 *  R
-	 * .RVI ZFRS BLDP WCAM
-	 * .011 1001 ..11 0101
-	 * 
-	 */
-	.type	arm1022_crval, #object
-arm1022_crval:
-	crval	clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001930
-
-	__INITDATA
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions arm1022, dabort=v4t_early_abort, pabort=legacy_pabort
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv5te"
-	string	cpu_elf_name, "v5"
-	string	cpu_arm1022_name, "ARM1022"
-
-	.align
-
-	.section ".proc.info.init", #alloc
-
-	.type	__arm1022_proc_info,#object
-__arm1022_proc_info:
-	.long	0x4105a220			@ ARM 1022E (v5TE)
-	.long	0xff0ffff0
-	.long   PMD_TYPE_SECT | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	.long   PMD_TYPE_SECT | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	initfn	__arm1022_setup, __arm1022_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP
-	.long	cpu_arm1022_name
-	.long	arm1022_processor_functions
-	.long	v4wbi_tlb_fns
-	.long	v4wb_user_fns
-	.long	arm1022_cache_fns
-	.size	__arm1022_proc_info, . - __arm1022_proc_info
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
deleted file mode 100644
index ac5afde12f35cfe09eccd8fa449f0ae4b7057962..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-arm1026.S
+++ /dev/null
@@ -1,463 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  linux/arch/arm/mm/proc-arm1026.S: MMU functions for ARM1026EJ-S
- *
- *  Copyright (C) 2000 ARM Limited
- *  Copyright (C) 2000 Deep Blue Solutions Ltd.
- *  hacked for non-paged-MM by Hyok S. Choi, 2003.
- *
- * These are the low level assembler for performing cache and TLB
- * functions on the ARM1026EJ-S.
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/ptrace.h>
-
-#include "proc-macros.S"
-
-/*
- * This is the maximum size of an area which will be invalidated
- * using the single invalidate entry instructions.  Anything larger
- * than this, and we go for the whole cache.
- *
- * This value should be chosen such that we choose the cheapest
- * alternative.
- */
-#define MAX_AREA_SIZE	32768
-
-/*
- * The size of one data cache line.
- */
-#define CACHE_DLINESIZE	32
-
-/*
- * The number of data cache segments.
- */
-#define CACHE_DSEGMENTS	16
-
-/*
- * The number of lines in a cache segment.
- */
-#define CACHE_DENTRIES	64
-
-/*
- * This is the size at which it becomes more efficient to
- * clean the whole cache, rather than using the individual
- * cache line maintenance instructions.
- */
-#define CACHE_DLIMIT	32768
-
-	.text
-/*
- * cpu_arm1026_proc_init()
- */
-ENTRY(cpu_arm1026_proc_init)
-	ret	lr
-
-/*
- * cpu_arm1026_proc_fin()
- */
-ENTRY(cpu_arm1026_proc_fin)
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x1000 		@ ...i............
-	bic	r0, r0, #0x000e 		@ ............wca.
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- * cpu_arm1026_reset(loc)
- *
- * Perform a soft reset of the system.	Put the CPU into the
- * same state as it would be if it had been reset, and branch
- * to what would be the reset vector.
- *
- * loc: location to jump to for soft reset
- */
-	.align	5
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_arm1026_reset)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-#ifdef CONFIG_MMU
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
-	bic	ip, ip, #0x000f 		@ ............wcam
-	bic	ip, ip, #0x1100 		@ ...i...s........
-	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	ret	r0
-ENDPROC(cpu_arm1026_reset)
-	.popsection
-
-/*
- * cpu_arm1026_do_idle()
- */
-	.align	5
-ENTRY(cpu_arm1026_do_idle)
-	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	ret	lr
-
-/* ================================= CACHE ================================ */
-
-	.align	5
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(arm1026_flush_icache_all)
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-#endif
-	ret	lr
-ENDPROC(arm1026_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- *
- *	Invalidate all cache entries in a particular address
- *	space.
- */
-ENTRY(arm1026_flush_user_cache_all)
-	/* FALLTHROUGH */
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(arm1026_flush_kern_cache_all)
-	mov	r2, #VM_EXEC
-	mov	ip, #0
-__flush_whole_cache:
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-1:	mrc	p15, 0, r15, c7, c14, 3		@ test, clean, invalidate
-	bne	1b
-#endif
-	tst	r2, #VM_EXEC
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-#endif
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	flush_user_cache_range(start, end, flags)
- *
- *	Invalidate a range of cache entries in the specified
- *	address space.
- *
- *	- start	- start address (inclusive)
- *	- end	- end address (exclusive)
- *	- flags	- vm_flags for this space
- */
-ENTRY(arm1026_flush_user_cache_range)
-	mov	ip, #0
-	sub	r3, r1, r0			@ calculate total size
-	cmp	r3, #CACHE_DLIMIT
-	bhs	__flush_whole_cache
-
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	tst	r2, #VM_EXEC
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-#endif
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm1026_coherent_kern_range)
-	/* FALLTHROUGH */
-/*
- *	coherent_user_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm1026_coherent_user_range)
-	mov	ip, #0
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-#endif
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-#endif
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mov	r0, #0
-	ret	lr
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure no D cache aliasing occurs, either with itself or
- *	the I cache
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(arm1026_flush_kern_dcache_area)
-	mov	ip, #0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	add	r1, r0, r1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_inv_range(start, end)
- *
- *	Invalidate (discard) the specified virtual address range.
- *	May not write back any entries.  If 'start' or 'end'
- *	are not cache line aligned, those lines must be written
- *	back.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-arm1026_dma_inv_range:
-	mov	ip, #0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	tst	r0, #CACHE_DLINESIZE - 1
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
-	tst	r1, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
-1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_clean_range(start, end)
- *
- *	Clean the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-arm1026_dma_clean_range:
-	mov	ip, #0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_flush_range(start, end)
- *
- *	Clean and invalidate the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm1026_dma_flush_range)
-	mov	ip, #0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm1026_dma_map_area)
-	add	r1, r1, r0
-	cmp	r2, #DMA_TO_DEVICE
-	beq	arm1026_dma_clean_range
-	bcs	arm1026_dma_inv_range
-	b	arm1026_dma_flush_range
-ENDPROC(arm1026_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm1026_dma_unmap_area)
-	ret	lr
-ENDPROC(arm1026_dma_unmap_area)
-
-	.globl	arm1026_flush_kern_cache_louis
-	.equ	arm1026_flush_kern_cache_louis, arm1026_flush_kern_cache_all
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions arm1026
-
-	.align	5
-ENTRY(cpu_arm1026_dcache_clean_area)
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mov	ip, #0
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	subs	r1, r1, #CACHE_DLINESIZE
-	bhi	1b
-#endif
-	ret	lr
-
-/* =============================== PageTable ============================== */
-
-/*
- * cpu_arm1026_switch_mm(pgd)
- *
- * Set the translation base pointer to be as described by pgd.
- *
- * pgd: new page tables
- */
-	.align	5
-ENTRY(cpu_arm1026_switch_mm)
-#ifdef CONFIG_MMU
-	mov	r1, #0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-1:	mrc	p15, 0, r15, c7, c14, 3		@ test, clean, invalidate
-	bne	1b
-#endif
-#ifndef CONFIG_CPU_ICACHE_DISABLE
-	mcr	p15, 0, r1, c7, c5, 0		@ invalidate I cache
-#endif
-	mcr	p15, 0, r1, c7, c10, 4		@ drain WB
-	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
-	mcr	p15, 0, r1, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	ret	lr
-        
-/*
- * cpu_arm1026_set_pte_ext(ptep, pte, ext)
- *
- * Set a PTE and flush it out
- */
-	.align	5
-ENTRY(cpu_arm1026_set_pte_ext)
-#ifdef CONFIG_MMU
-	armv3_set_pte_ext
-	mov	r0, r0
-#ifndef CONFIG_CPU_DCACHE_DISABLE
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-#endif
-#endif /* CONFIG_MMU */
-	ret	lr
-
-	.type	__arm1026_setup, #function
-__arm1026_setup:
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
-#ifdef CONFIG_MMU
-	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
-	mcr	p15, 0, r4, c2, c0		@ load page table pointer
-#endif
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mov	r0, #4				@ explicitly disable writeback
-	mcr	p15, 7, r0, c15, c0, 0
-#endif
-	adr	r5, arm1026_crval
-	ldmia	r5, {r5, r6}
-	mrc	p15, 0, r0, c1, c0		@ get control register v4
-	bic	r0, r0, r5
-	orr	r0, r0, r6
-#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
-	orr	r0, r0, #0x4000 		@ .R.. .... .... ....
-#endif
-	ret	lr
-	.size	__arm1026_setup, . - __arm1026_setup
-
-	/*
-	 *  R
-	 * .RVI ZFRS BLDP WCAM
-	 * .011 1001 ..11 0101
-	 * 
-	 */
-	.type	arm1026_crval, #object
-arm1026_crval:
-	crval	clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001934
-
-	__INITDATA
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions arm1026, dabort=v5t_early_abort, pabort=legacy_pabort
-
-	.section .rodata
-
-	string	cpu_arch_name, "armv5tej"
-	string	cpu_elf_name, "v5"
-	.align
-	string	cpu_arm1026_name, "ARM1026EJ-S"
-	.align
-
-	.section ".proc.info.init", #alloc
-
-	.type	__arm1026_proc_info,#object
-__arm1026_proc_info:
-	.long	0x4106a260			@ ARM 1026EJ-S (v5TEJ)
-	.long	0xff0ffff0
-	.long   PMD_TYPE_SECT | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	.long   PMD_TYPE_SECT | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	initfn	__arm1026_setup, __arm1026_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
-	.long	cpu_arm1026_name
-	.long	arm1026_processor_functions
-	.long	v4wbi_tlb_fns
-	.long	v4wb_user_fns
-	.long	arm1026_cache_fns
-	.size	__arm1026_proc_info, . - __arm1026_proc_info
diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
deleted file mode 100644
index c99d24363f32ee64754068b971b059be36ee8f05..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-arm720.S
+++ /dev/null
@@ -1,205 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  linux/arch/arm/mm/proc-arm720.S: MMU functions for ARM720
- *
- *  Copyright (C) 2000 Steve Hill (sjhill@cotw.com)
- *                     Rob Scott (rscott@mtrob.fdns.net)
- *  Copyright (C) 2000 ARM Limited, Deep Blue Solutions Ltd.
- *  hacked for non-paged-MM by Hyok S. Choi, 2004.
- *
- * These are the low level assembler for performing cache and TLB
- * functions on the ARM720T.  The ARM720T has a writethrough IDC
- * cache, so we don't need to clean it.
- *
- *  Changelog:
- *   05-09-2000 SJH	Created by moving 720 specific functions
- *			out of 'proc-arm6,7.S' per RMK discussion
- *   07-25-2000 SJH	Added idle function.
- *   08-25-2000	DBS	Updated for integration of ARM Ltd version.
- *   04-20-2004 HSC	modified for non-paged memory management mode.
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/ptrace.h>
-
-#include "proc-macros.S"
-
-/*
- * Function: arm720_proc_init (void)
- *	   : arm720_proc_fin (void)
- *
- * Notes   : This processor does not require these
- */
-ENTRY(cpu_arm720_dcache_clean_area)
-ENTRY(cpu_arm720_proc_init)
-		ret	lr
-
-ENTRY(cpu_arm720_proc_fin)
-		mrc	p15, 0, r0, c1, c0, 0
-		bic	r0, r0, #0x1000			@ ...i............
-		bic	r0, r0, #0x000e			@ ............wca.
-		mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-		ret	lr
-
-/*
- * Function: arm720_proc_do_idle(void)
- * Params  : r0 = unused
- * Purpose : put the processor in proper idle mode
- */
-ENTRY(cpu_arm720_do_idle)
-		ret	lr
-
-/*
- * Function: arm720_switch_mm(unsigned long pgd_phys)
- * Params  : pgd_phys	Physical address of page table
- * Purpose : Perform a task switch, saving the old process' state and restoring
- *	     the new.
- */
-ENTRY(cpu_arm720_switch_mm)
-#ifdef CONFIG_MMU
-		mov	r1, #0
-		mcr	p15, 0, r1, c7, c7, 0		@ invalidate cache
-		mcr	p15, 0, r0, c2, c0, 0		@ update page table ptr
-		mcr	p15, 0, r1, c8, c7, 0		@ flush TLB (v4)
-#endif
-		ret	lr
-
-/*
- * Function: arm720_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext)
- * Params  : r0 = Address to set
- *	   : r1 = value to set
- * Purpose : Set a PTE and flush it out of any WB cache
- */
-	.align	5
-ENTRY(cpu_arm720_set_pte_ext)
-#ifdef CONFIG_MMU
-	armv3_set_pte_ext wc_disable=0
-#endif
-	ret	lr
-
-/*
- * Function: arm720_reset
- * Params  : r0 = address to jump to
- * Notes   : This sets up everything for a reset
- */
-		.pushsection	.idmap.text, "ax"
-ENTRY(cpu_arm720_reset)
-		mov	ip, #0
-		mcr	p15, 0, ip, c7, c7, 0		@ invalidate cache
-#ifdef CONFIG_MMU
-		mcr	p15, 0, ip, c8, c7, 0		@ flush TLB (v4)
-#endif
-		mrc	p15, 0, ip, c1, c0, 0		@ get ctrl register
-		bic	ip, ip, #0x000f			@ ............wcam
-		bic	ip, ip, #0x2100			@ ..v....s........
-		mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-		ret	r0
-ENDPROC(cpu_arm720_reset)
-		.popsection
-
-	.type	__arm710_setup, #function
-__arm710_setup:
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c7, 0		@ invalidate caches
-#ifdef CONFIG_MMU
-	mcr	p15, 0, r0, c8, c7, 0		@ flush TLB (v4)
-#endif
-	mrc	p15, 0, r0, c1, c0		@ get control register
-	ldr	r5, arm710_cr1_clear
-	bic	r0, r0, r5
-	ldr	r5, arm710_cr1_set
-	orr	r0, r0, r5
-	ret	lr				@ __ret (head.S)
-	.size	__arm710_setup, . - __arm710_setup
-
-	/*
-	 *  R
-	 * .RVI ZFRS BLDP WCAM
-	 * .... 0001 ..11 1101
-	 * 
-	 */
-	.type	arm710_cr1_clear, #object
-	.type	arm710_cr1_set, #object
-arm710_cr1_clear:
-	.word	0x0f3f
-arm710_cr1_set:
-	.word	0x013d
-
-	.type	__arm720_setup, #function
-__arm720_setup:
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c7, 0		@ invalidate caches
-#ifdef CONFIG_MMU
-	mcr	p15, 0, r0, c8, c7, 0		@ flush TLB (v4)
-#endif
-	adr	r5, arm720_crval
-	ldmia	r5, {r5, r6}
-	mrc	p15, 0, r0, c1, c0		@ get control register
-	bic	r0, r0, r5
-	orr	r0, r0, r6
-	ret	lr				@ __ret (head.S)
-	.size	__arm720_setup, . - __arm720_setup
-
-	/*
-	 *  R
-	 * .RVI ZFRS BLDP WCAM
-	 * ..1. 1001 ..11 1101
-	 * 
-	 */
-	.type	arm720_crval, #object
-arm720_crval:
-	crval	clear=0x00002f3f, mmuset=0x0000213d, ucset=0x00000130
-
-		__INITDATA
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions arm720, dabort=v4t_late_abort, pabort=legacy_pabort
-
-		.section ".rodata"
-
-	string	cpu_arch_name, "armv4t"
-	string	cpu_elf_name, "v4"
-	string	cpu_arm710_name, "ARM710T"
-	string	cpu_arm720_name, "ARM720T"
-
-		.align
-
-/*
- * See <asm/procinfo.h> for a definition of this structure.
- */
-	
-		.section ".proc.info.init", #alloc
-
-.macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req
-		.type	__\name\()_proc_info,#object
-__\name\()_proc_info:
-		.long	\cpu_val
-		.long	\cpu_mask
-		.long   PMD_TYPE_SECT | \
-			PMD_SECT_BUFFERABLE | \
-			PMD_SECT_CACHEABLE | \
-			PMD_BIT4 | \
-			PMD_SECT_AP_WRITE | \
-			PMD_SECT_AP_READ
-		.long   PMD_TYPE_SECT | \
-			PMD_BIT4 | \
-			PMD_SECT_AP_WRITE | \
-			PMD_SECT_AP_READ
-		initfn	\cpu_flush, __\name\()_proc_info	@ cpu_flush
-		.long	cpu_arch_name				@ arch_name
-		.long	cpu_elf_name				@ elf_name
-		.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB	@ elf_hwcap
-		.long	\cpu_name
-		.long	arm720_processor_functions
-		.long	v4_tlb_fns
-		.long	v4wt_user_fns
-		.long	v4_cache_fns
-		.size	__\name\()_proc_info, . - __\name\()_proc_info
-.endm
-
-	arm720_proc_info arm710, 0x41807100, 0xffffff00, cpu_arm710_name, __arm710_setup
-	arm720_proc_info arm720, 0x41807200, 0xffffff00, cpu_arm720_name, __arm720_setup
diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S
deleted file mode 100644
index 1b4a3838393fbda090d9528d834947bfae96d4ae..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-arm740.S
+++ /dev/null
@@ -1,147 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/arm740.S: utility functions for ARM740
- *
- *  Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com)
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/ptrace.h>
-
-#include "proc-macros.S"
-
-	.text
-/*
- * cpu_arm740_proc_init()
- * cpu_arm740_do_idle()
- * cpu_arm740_dcache_clean_area()
- * cpu_arm740_switch_mm()
- *
- * These are not required.
- */
-ENTRY(cpu_arm740_proc_init)
-ENTRY(cpu_arm740_do_idle)
-ENTRY(cpu_arm740_dcache_clean_area)
-ENTRY(cpu_arm740_switch_mm)
-	ret	lr
-
-/*
- * cpu_arm740_proc_fin()
- */
-ENTRY(cpu_arm740_proc_fin)
-	mrc	p15, 0, r0, c1, c0, 0
-	bic	r0, r0, #0x3f000000		@ bank/f/lock/s
-	bic	r0, r0, #0x0000000c		@ w-buffer/cache
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- * cpu_arm740_reset(loc)
- * Params  : r0 = address to jump to
- * Notes   : This sets up everything for a reset
- */
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_arm740_reset)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c0, 0		@ invalidate cache
-	mrc	p15, 0, ip, c1, c0, 0		@ get ctrl register
-	bic	ip, ip, #0x0000000c		@ ............wc..
-	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	ret	r0
-ENDPROC(cpu_arm740_reset)
-	.popsection
-
-	.type	__arm740_setup, #function
-__arm740_setup:
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c0, 0		@ invalidate caches
-
-	mcr	p15, 0, r0, c6, c3		@ disable area 3~7
-	mcr	p15, 0, r0, c6, c4
-	mcr	p15, 0, r0, c6, c5
-	mcr	p15, 0, r0, c6, c6
-	mcr	p15, 0, r0, c6, c7
-
-	mov	r0, #0x0000003F			@ base = 0, size = 4GB
-	mcr	p15, 0, r0, c6,	c0		@ set area 0, default
-
-	ldr	r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM
-	ldr	r3, =(CONFIG_DRAM_SIZE >> 12)	@ size of RAM (must be >= 4KB)
-	mov	r4, #10				@ 11 is the minimum (4KB)
-1:	add	r4, r4, #1			@ area size *= 2
-	movs	r3, r3, lsr #1
-	bne	1b				@ count not zero r-shift
-	orr	r0, r0, r4, lsl #1		@ the area register value
-	orr	r0, r0, #1			@ set enable bit
-	mcr	p15, 0, r0, c6,	c1		@ set area 1, RAM
-
-	ldr	r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH
-	ldr	r3, =(CONFIG_FLASH_SIZE >> 12)	@ size of FLASH (must be >= 4KB)
-	cmp	r3, #0
-	moveq	r0, #0
-	beq	2f
-	mov	r4, #10				@ 11 is the minimum (4KB)
-1:	add	r4, r4, #1			@ area size *= 2
-	movs	r3, r3, lsr #1
-	bne	1b				@ count not zero r-shift
-	orr	r0, r0, r4, lsl #1		@ the area register value
-	orr	r0, r0, #1			@ set enable bit
-2:	mcr	p15, 0, r0, c6,	c2		@ set area 2, ROM/FLASH
-
-	mov	r0, #0x06
-	mcr	p15, 0, r0, c2, c0		@ Region 1&2 cacheable
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mov	r0, #0x00			@ disable whole write buffer
-#else
-	mov	r0, #0x02			@ Region 1 write bufferred
-#endif
-	mcr	p15, 0, r0, c3, c0
-
-	mov	r0, #0x10000
-	sub	r0, r0, #1			@ r0 = 0xffff
-	mcr	p15, 0, r0, c5, c0		@ all read/write access
-
-	mrc	p15, 0, r0, c1, c0		@ get control register
-	bic	r0, r0, #0x3F000000		@ set to standard caching mode
-						@ need some benchmark
-	orr	r0, r0, #0x0000000d		@ MPU/Cache/WB
-
-	ret	lr
-
-	.size	__arm740_setup, . - __arm740_setup
-
-	__INITDATA
-
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions arm740, dabort=v4t_late_abort, pabort=legacy_pabort, nommu=1
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv4"
-	string	cpu_elf_name, "v4"
-	string	cpu_arm740_name, "ARM740T"
-
-	.align
-
-	.section ".proc.info.init", #alloc
-	.type	__arm740_proc_info,#object
-__arm740_proc_info:
-	.long	0x41807400
-	.long	0xfffffff0
-	.long	0
-	.long	0
-	initfn	__arm740_setup, __arm740_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_26BIT
-	.long	cpu_arm740_name
-	.long	arm740_processor_functions
-	.long	0
-	.long	0
-	.long	v4_cache_fns			@ cache model
-	.size	__arm740_proc_info, . - __arm740_proc_info
diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S
deleted file mode 100644
index 17a4687065c7f9cc6a7b2fad4d0605e2767fbca6..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-arm7tdmi.S
+++ /dev/null
@@ -1,110 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/proc-arm7tdmi.S: utility functions for ARM7TDMI
- *
- *  Copyright (C) 2003-2006 Hyok S. Choi <hyok.choi@samsung.com>
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/ptrace.h>
-
-#include "proc-macros.S"
-
-	.text
-/*
- * cpu_arm7tdmi_proc_init()
- * cpu_arm7tdmi_do_idle()
- * cpu_arm7tdmi_dcache_clean_area()
- * cpu_arm7tdmi_switch_mm()
- *
- * These are not required.
- */
-ENTRY(cpu_arm7tdmi_proc_init)
-ENTRY(cpu_arm7tdmi_do_idle)
-ENTRY(cpu_arm7tdmi_dcache_clean_area)
-ENTRY(cpu_arm7tdmi_switch_mm)
-		ret	lr
-
-/*
- * cpu_arm7tdmi_proc_fin()
- */
-ENTRY(cpu_arm7tdmi_proc_fin)
-		ret	lr
-
-/*
- * Function: cpu_arm7tdmi_reset(loc)
- * Params  : loc(r0)	address to jump to
- * Purpose : Sets up everything for a reset and jump to the location for soft reset.
- */
-		.pushsection	.idmap.text, "ax"
-ENTRY(cpu_arm7tdmi_reset)
-		ret	r0
-ENDPROC(cpu_arm7tdmi_reset)
-		.popsection
-
-		.type	__arm7tdmi_setup, #function
-__arm7tdmi_setup:
-		ret	lr
-		.size	__arm7tdmi_setup, . - __arm7tdmi_setup
-
-		__INITDATA
-
-		@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-		define_processor_functions arm7tdmi, dabort=v4t_late_abort, pabort=legacy_pabort, nommu=1
-
-		.section ".rodata"
-
-		string	cpu_arch_name, "armv4t"
-		string	cpu_elf_name, "v4"
-		string	cpu_arm7tdmi_name, "ARM7TDMI"
-		string	cpu_triscenda7_name, "Triscend-A7x"
-		string	cpu_at91_name, "Atmel-AT91M40xxx"
-		string	cpu_s3c3410_name, "Samsung-S3C3410"
-		string	cpu_s3c44b0x_name, "Samsung-S3C44B0x"
-		string	cpu_s3c4510b_name, "Samsung-S3C4510B"
-		string	cpu_s3c4530_name, "Samsung-S3C4530"
-		string	cpu_netarm_name, "NETARM"
-
-		.align
-
-		.section ".proc.info.init", #alloc
-
-.macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \
-	extra_hwcaps=0
-		.type	__\name\()_proc_info, #object
-__\name\()_proc_info:
-		.long	\cpu_val
-		.long	\cpu_mask
-		.long	0
-		.long	0
-		initfn	__arm7tdmi_setup, __\name\()_proc_info
-		.long	cpu_arch_name
-		.long	cpu_elf_name
-		.long	HWCAP_SWP | HWCAP_26BIT | ( \extra_hwcaps )
-		.long	\cpu_name
-		.long	arm7tdmi_processor_functions
-		.long	0
-		.long	0
-		.long	v4_cache_fns
-		.size	__\name\()_proc_info, . - __\name\()_proc_info
-.endm
-
-		arm7tdmi_proc_info arm7tdmi, 0x41007700, 0xfff8ff00, \
-			cpu_arm7tdmi_name
-		arm7tdmi_proc_info triscenda7, 0x0001d2ff, 0x0001ffff, \
-			cpu_triscenda7_name, extra_hwcaps=HWCAP_THUMB
-		arm7tdmi_proc_info at91, 0x14000040, 0xfff000e0, \
-			cpu_at91_name, extra_hwcaps=HWCAP_THUMB
-		arm7tdmi_proc_info s3c4510b, 0x36365000, 0xfffff000, \
-			cpu_s3c4510b_name, extra_hwcaps=HWCAP_THUMB
-		arm7tdmi_proc_info s3c4530, 0x4c000000, 0xfff000e0, \
-			cpu_s3c4530_name, extra_hwcaps=HWCAP_THUMB
-		arm7tdmi_proc_info s3c3410, 0x34100000, 0xffff0000, \
-			cpu_s3c3410_name, extra_hwcaps=HWCAP_THUMB
-		arm7tdmi_proc_info s3c44b0x, 0x44b00000, 0xffff0000, \
-			cpu_s3c44b0x_name, extra_hwcaps=HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
deleted file mode 100644
index 298c76b47749f962ce8ba46072df1af7dfefecac..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-arm920.S
+++ /dev/null
@@ -1,466 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  linux/arch/arm/mm/proc-arm920.S: MMU functions for ARM920
- *
- *  Copyright (C) 1999,2000 ARM Limited
- *  Copyright (C) 2000 Deep Blue Solutions Ltd.
- *  hacked for non-paged-MM by Hyok S. Choi, 2003.
- *
- * These are the low level assembler for performing cache and TLB
- * functions on the arm920.
- *
- *  CONFIG_CPU_ARM920_CPU_IDLE -> nohlt
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include "proc-macros.S"
-
-/*
- * The size of one data cache line.
- */
-#define CACHE_DLINESIZE	32
-
-/*
- * The number of data cache segments.
- */
-#define CACHE_DSEGMENTS	8
-
-/*
- * The number of lines in a cache segment.
- */
-#define CACHE_DENTRIES	64
-
-/*
- * This is the size at which it becomes more efficient to
- * clean the whole cache, rather than using the individual
- * cache line maintenance instructions.
- */
-#define CACHE_DLIMIT	65536
-
-
-	.text
-/*
- * cpu_arm920_proc_init()
- */
-ENTRY(cpu_arm920_proc_init)
-	ret	lr
-
-/*
- * cpu_arm920_proc_fin()
- */
-ENTRY(cpu_arm920_proc_fin)
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x1000			@ ...i............
-	bic	r0, r0, #0x000e			@ ............wca.
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- * cpu_arm920_reset(loc)
- *
- * Perform a soft reset of the system.  Put the CPU into the
- * same state as it would be if it had been reset, and branch
- * to what would be the reset vector.
- *
- * loc: location to jump to for soft reset
- */
-	.align	5
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_arm920_reset)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-#ifdef CONFIG_MMU
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
-	bic	ip, ip, #0x000f			@ ............wcam
-	bic	ip, ip, #0x1100			@ ...i...s........
-	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	ret	r0
-ENDPROC(cpu_arm920_reset)
-	.popsection
-
-/*
- * cpu_arm920_do_idle()
- */
-	.align	5
-ENTRY(cpu_arm920_do_idle)
-	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	ret	lr
-
-
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(arm920_flush_icache_all)
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	ret	lr
-ENDPROC(arm920_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- *
- *	Invalidate all cache entries in a particular address
- *	space.
- */
-ENTRY(arm920_flush_user_cache_all)
-	/* FALLTHROUGH */
-
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(arm920_flush_kern_cache_all)
-	mov	r2, #VM_EXEC
-	mov	ip, #0
-__flush_whole_cache:
-	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 8 segments
-1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
-2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
-	subs	r3, r3, #1 << 26
-	bcs	2b				@ entries 63 to 0
-	subs	r1, r1, #1 << 5
-	bcs	1b				@ segments 7 to 0
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	flush_user_cache_range(start, end, flags)
- *
- *	Invalidate a range of cache entries in the specified
- *	address space.
- *
- *	- start	- start address (inclusive)
- *	- end	- end address (exclusive)
- *	- flags	- vm_flags for address space
- */
-ENTRY(arm920_flush_user_cache_range)
-	mov	ip, #0
-	sub	r3, r1, r0			@ calculate total size
-	cmp	r3, #CACHE_DLIMIT
-	bhs	__flush_whole_cache
-
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start, end.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm920_coherent_kern_range)
-	/* FALLTHROUGH */
-
-/*
- *	coherent_user_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start, end.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm920_coherent_user_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	r0, #0
-	ret	lr
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure no D cache aliasing occurs, either with itself or
- *	the I cache
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(arm920_flush_kern_dcache_area)
-	add	r1, r0, r1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_inv_range(start, end)
- *
- *	Invalidate (discard) the specified virtual address range.
- *	May not write back any entries.  If 'start' or 'end'
- *	are not cache line aligned, those lines must be written
- *	back.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-arm920_dma_inv_range:
-	tst	r0, #CACHE_DLINESIZE - 1
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
-	tst	r1, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
-1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_clean_range(start, end)
- *
- *	Clean the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-arm920_dma_clean_range:
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_flush_range(start, end)
- *
- *	Clean and invalidate the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm920_dma_flush_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm920_dma_map_area)
-	add	r1, r1, r0
-	cmp	r2, #DMA_TO_DEVICE
-	beq	arm920_dma_clean_range
-	bcs	arm920_dma_inv_range
-	b	arm920_dma_flush_range
-ENDPROC(arm920_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm920_dma_unmap_area)
-	ret	lr
-ENDPROC(arm920_dma_unmap_area)
-
-	.globl	arm920_flush_kern_cache_louis
-	.equ	arm920_flush_kern_cache_louis, arm920_flush_kern_cache_all
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions arm920
-#endif
-
-
-ENTRY(cpu_arm920_dcache_clean_area)
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	subs	r1, r1, #CACHE_DLINESIZE
-	bhi	1b
-	ret	lr
-
-/* =============================== PageTable ============================== */
-
-/*
- * cpu_arm920_switch_mm(pgd)
- *
- * Set the translation base pointer to be as described by pgd.
- *
- * pgd: new page tables
- */
-	.align	5
-ENTRY(cpu_arm920_switch_mm)
-#ifdef CONFIG_MMU
-	mov	ip, #0
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
-#else
-@ && 'Clean & Invalidate whole DCache'
-@ && Re-written to use Index Ops.
-@ && Uses registers r1, r3 and ip
-
-	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 8 segments
-1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
-2:	mcr	p15, 0, r3, c7, c14, 2		@ clean & invalidate D index
-	subs	r3, r3, #1 << 26
-	bcs	2b				@ entries 63 to 0
-	subs	r1, r1, #1 << 5
-	bcs	1b				@ segments 7 to 0
-#endif
-	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	ret	lr
-
-/*
- * cpu_arm920_set_pte(ptep, pte, ext)
- *
- * Set a PTE and flush it out
- */
-	.align	5
-ENTRY(cpu_arm920_set_pte_ext)
-#ifdef CONFIG_MMU
-	armv3_set_pte_ext
-	mov	r0, r0
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-#endif
-	ret	lr
-
-/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
-.globl	cpu_arm920_suspend_size
-.equ	cpu_arm920_suspend_size, 4 * 3
-#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_arm920_do_suspend)
-	stmfd	sp!, {r4 - r6, lr}
-	mrc	p15, 0, r4, c13, c0, 0	@ PID
-	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
-	mrc	p15, 0, r6, c1, c0, 0	@ Control register
-	stmia	r0, {r4 - r6}
-	ldmfd	sp!, {r4 - r6, pc}
-ENDPROC(cpu_arm920_do_suspend)
-
-ENTRY(cpu_arm920_do_resume)
-	mov	ip, #0
-	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I+D TLBs
-	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I+D caches
-	ldmia	r0, {r4 - r6}
-	mcr	p15, 0, r4, c13, c0, 0	@ PID
-	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
-	mcr	p15, 0, r1, c2, c0, 0	@ TTB address
-	mov	r0, r6			@ control register
-	b	cpu_resume_mmu
-ENDPROC(cpu_arm920_do_resume)
-#endif
-
-	.type	__arm920_setup, #function
-__arm920_setup:
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
-#ifdef CONFIG_MMU
-	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
-#endif
-	adr	r5, arm920_crval
-	ldmia	r5, {r5, r6}
-	mrc	p15, 0, r0, c1, c0		@ get control register v4
-	bic	r0, r0, r5
-	orr	r0, r0, r6
-	ret	lr
-	.size	__arm920_setup, . - __arm920_setup
-
-	/*
-	 *  R
-	 * .RVI ZFRS BLDP WCAM
-	 * ..11 0001 ..11 0101
-	 * 
-	 */
-	.type	arm920_crval, #object
-arm920_crval:
-	crval	clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130
-
-	__INITDATA
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions arm920, dabort=v4t_early_abort, pabort=legacy_pabort, suspend=1
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv4t"
-	string	cpu_elf_name, "v4"
-	string	cpu_arm920_name, "ARM920T"
-
-	.align
-
-	.section ".proc.info.init", #alloc
-
-	.type	__arm920_proc_info,#object
-__arm920_proc_info:
-	.long	0x41009200
-	.long	0xff00fff0
-	.long   PMD_TYPE_SECT | \
-		PMD_SECT_BUFFERABLE | \
-		PMD_SECT_CACHEABLE | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	.long   PMD_TYPE_SECT | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	initfn	__arm920_setup, __arm920_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
-	.long	cpu_arm920_name
-	.long	arm920_processor_functions
-	.long	v4wbi_tlb_fns
-	.long	v4wb_user_fns
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	.long	arm920_cache_fns
-#else
-	.long	v4wt_cache_fns
-#endif
-	.size	__arm920_proc_info, . - __arm920_proc_info
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
deleted file mode 100644
index 824be3a0bc23820149f99bc0a6ef8f66dfbc7560..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-arm922.S
+++ /dev/null
@@ -1,444 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  linux/arch/arm/mm/proc-arm922.S: MMU functions for ARM922
- *
- *  Copyright (C) 1999,2000 ARM Limited
- *  Copyright (C) 2000 Deep Blue Solutions Ltd.
- *  Copyright (C) 2001 Altera Corporation
- *  hacked for non-paged-MM by Hyok S. Choi, 2003.
- *
- * These are the low level assembler for performing cache and TLB
- * functions on the arm922.
- *
- *  CONFIG_CPU_ARM922_CPU_IDLE -> nohlt
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include "proc-macros.S"
-
-/*
- * The size of one data cache line.
- */
-#define CACHE_DLINESIZE	32
-
-/*
- * The number of data cache segments.
- */
-#define CACHE_DSEGMENTS	4
-
-/*
- * The number of lines in a cache segment.
- */
-#define CACHE_DENTRIES	64
-
-/*
- * This is the size at which it becomes more efficient to
- * clean the whole cache, rather than using the individual
- * cache line maintenance instructions.  (I think this should
- * be 32768).
- */
-#define CACHE_DLIMIT	8192
-
-
-	.text
-/*
- * cpu_arm922_proc_init()
- */
-ENTRY(cpu_arm922_proc_init)
-	ret	lr
-
-/*
- * cpu_arm922_proc_fin()
- */
-ENTRY(cpu_arm922_proc_fin)
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x1000			@ ...i............
-	bic	r0, r0, #0x000e			@ ............wca.
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- * cpu_arm922_reset(loc)
- *
- * Perform a soft reset of the system.  Put the CPU into the
- * same state as it would be if it had been reset, and branch
- * to what would be the reset vector.
- *
- * loc: location to jump to for soft reset
- */
-	.align	5
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_arm922_reset)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-#ifdef CONFIG_MMU
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
-	bic	ip, ip, #0x000f			@ ............wcam
-	bic	ip, ip, #0x1100			@ ...i...s........
-	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	ret	r0
-ENDPROC(cpu_arm922_reset)
-	.popsection
-
-/*
- * cpu_arm922_do_idle()
- */
-	.align	5
-ENTRY(cpu_arm922_do_idle)
-	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	ret	lr
-
-
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(arm922_flush_icache_all)
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	ret	lr
-ENDPROC(arm922_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- *
- *	Clean and invalidate all cache entries in a particular
- *	address space.
- */
-ENTRY(arm922_flush_user_cache_all)
-	/* FALLTHROUGH */
-
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(arm922_flush_kern_cache_all)
-	mov	r2, #VM_EXEC
-	mov	ip, #0
-__flush_whole_cache:
-	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 8 segments
-1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
-2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
-	subs	r3, r3, #1 << 26
-	bcs	2b				@ entries 63 to 0
-	subs	r1, r1, #1 << 5
-	bcs	1b				@ segments 7 to 0
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	flush_user_cache_range(start, end, flags)
- *
- *	Clean and invalidate a range of cache entries in the
- *	specified address range.
- *
- *	- start	- start address (inclusive)
- *	- end	- end address (exclusive)
- *	- flags	- vm_flags describing address space
- */
-ENTRY(arm922_flush_user_cache_range)
-	mov	ip, #0
-	sub	r3, r1, r0			@ calculate total size
-	cmp	r3, #CACHE_DLIMIT
-	bhs	__flush_whole_cache
-
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start, end.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm922_coherent_kern_range)
-	/* FALLTHROUGH */
-
-/*
- *	coherent_user_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start, end.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm922_coherent_user_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	r0, #0
-	ret	lr
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure no D cache aliasing occurs, either with itself or
- *	the I cache
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(arm922_flush_kern_dcache_area)
-	add	r1, r0, r1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_inv_range(start, end)
- *
- *	Invalidate (discard) the specified virtual address range.
- *	May not write back any entries.  If 'start' or 'end'
- *	are not cache line aligned, those lines must be written
- *	back.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-arm922_dma_inv_range:
-	tst	r0, #CACHE_DLINESIZE - 1
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
-	tst	r1, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
-1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_clean_range(start, end)
- *
- *	Clean the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-arm922_dma_clean_range:
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_flush_range(start, end)
- *
- *	Clean and invalidate the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm922_dma_flush_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm922_dma_map_area)
-	add	r1, r1, r0
-	cmp	r2, #DMA_TO_DEVICE
-	beq	arm922_dma_clean_range
-	bcs	arm922_dma_inv_range
-	b	arm922_dma_flush_range
-ENDPROC(arm922_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm922_dma_unmap_area)
-	ret	lr
-ENDPROC(arm922_dma_unmap_area)
-
-	.globl	arm922_flush_kern_cache_louis
-	.equ	arm922_flush_kern_cache_louis, arm922_flush_kern_cache_all
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions arm922
-#endif
-
-
-ENTRY(cpu_arm922_dcache_clean_area)
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	subs	r1, r1, #CACHE_DLINESIZE
-	bhi	1b
-#endif
-	ret	lr
-
-/* =============================== PageTable ============================== */
-
-/*
- * cpu_arm922_switch_mm(pgd)
- *
- * Set the translation base pointer to be as described by pgd.
- *
- * pgd: new page tables
- */
-	.align	5
-ENTRY(cpu_arm922_switch_mm)
-#ifdef CONFIG_MMU
-	mov	ip, #0
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
-#else
-@ && 'Clean & Invalidate whole DCache'
-@ && Re-written to use Index Ops.
-@ && Uses registers r1, r3 and ip
-
-	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 4 segments
-1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
-2:	mcr	p15, 0, r3, c7, c14, 2		@ clean & invalidate D index
-	subs	r3, r3, #1 << 26
-	bcs	2b				@ entries 63 to 0
-	subs	r1, r1, #1 << 5
-	bcs	1b				@ segments 7 to 0
-#endif
-	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	ret	lr
-
-/*
- * cpu_arm922_set_pte_ext(ptep, pte, ext)
- *
- * Set a PTE and flush it out
- */
-	.align	5
-ENTRY(cpu_arm922_set_pte_ext)
-#ifdef CONFIG_MMU
-	armv3_set_pte_ext
-	mov	r0, r0
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-#endif /* CONFIG_MMU */
-	ret	lr
-
-	.type	__arm922_setup, #function
-__arm922_setup:
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
-#ifdef CONFIG_MMU
-	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
-#endif
-	adr	r5, arm922_crval
-	ldmia	r5, {r5, r6}
-	mrc	p15, 0, r0, c1, c0		@ get control register v4
-	bic	r0, r0, r5
-	orr	r0, r0, r6
-	ret	lr
-	.size	__arm922_setup, . - __arm922_setup
-
-	/*
-	 *  R
-	 * .RVI ZFRS BLDP WCAM
-	 * ..11 0001 ..11 0101
-	 * 
-	 */
-	.type	arm922_crval, #object
-arm922_crval:
-	crval	clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130
-
-	__INITDATA
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions arm922, dabort=v4t_early_abort, pabort=legacy_pabort
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv4t"
-	string	cpu_elf_name, "v4"
-	string	cpu_arm922_name, "ARM922T"
-
-	.align
-
-	.section ".proc.info.init", #alloc
-
-	.type	__arm922_proc_info,#object
-__arm922_proc_info:
-	.long	0x41009220
-	.long	0xff00fff0
-	.long   PMD_TYPE_SECT | \
-		PMD_SECT_BUFFERABLE | \
-		PMD_SECT_CACHEABLE | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	.long   PMD_TYPE_SECT | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	initfn	__arm922_setup, __arm922_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
-	.long	cpu_arm922_name
-	.long	arm922_processor_functions
-	.long	v4wbi_tlb_fns
-	.long	v4wb_user_fns
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	.long	arm922_cache_fns
-#else
-	.long	v4wt_cache_fns
-#endif
-	.size	__arm922_proc_info, . - __arm922_proc_info
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
deleted file mode 100644
index d40cff8f102c2b5c7d603f74ceb9215693a33010..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-arm925.S
+++ /dev/null
@@ -1,509 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  linux/arch/arm/mm/arm925.S: MMU functions for ARM925
- *
- *  Copyright (C) 1999,2000 ARM Limited
- *  Copyright (C) 2000 Deep Blue Solutions Ltd.
- *  Copyright (C) 2002 RidgeRun, Inc.
- *  Copyright (C) 2002-2003 MontaVista Software, Inc.
- *
- *  Update for Linux-2.6 and cache flush improvements
- *  Copyright (C) 2004 Nokia Corporation by Tony Lindgren <tony@atomide.com>
- *
- *  hacked for non-paged-MM by Hyok S. Choi, 2004.
- *
- * These are the low level assembler for performing cache and TLB
- * functions on the arm925.
- *
- *  CONFIG_CPU_ARM925_CPU_IDLE -> nohlt
- *
- * Some additional notes based on deciphering the TI TRM on OMAP-5910:
- *
- * NOTE1: The TI925T Configuration Register bit "D-cache clean and flush
- *	  entry mode" must be 0 to flush the entries in both segments
- *	  at once. This is the default value. See TRM 2-20 and 2-24 for
- *	  more information.
- *
- * NOTE2: Default is the "D-cache clean and flush entry mode". It looks
- *	  like the "Transparent mode" must be on for partial cache flushes
- *	  to work in this mode. This mode only works with 16-bit external
- *	  memory. See TRM 2-24 for more information.
- *
- * NOTE3: Write-back cache flushing seems to be flakey with devices using
- *        direct memory access, such as USB OHCI. The workaround is to use
- *        write-through cache with CONFIG_CPU_DCACHE_WRITETHROUGH (this is
- *        the default for OMAP-1510).
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include "proc-macros.S"
-
-/*
- * The size of one data cache line.
- */
-#define CACHE_DLINESIZE	16
-
-/*
- * The number of data cache segments.
- */
-#define CACHE_DSEGMENTS	2
-
-/*
- * The number of lines in a cache segment.
- */
-#define CACHE_DENTRIES	256
-
-/*
- * This is the size at which it becomes more efficient to
- * clean the whole cache, rather than using the individual
- * cache line maintenance instructions.
- */
-#define CACHE_DLIMIT	8192
-
-	.text
-/*
- * cpu_arm925_proc_init()
- */
-ENTRY(cpu_arm925_proc_init)
-	ret	lr
-
-/*
- * cpu_arm925_proc_fin()
- */
-ENTRY(cpu_arm925_proc_fin)
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x1000			@ ...i............
-	bic	r0, r0, #0x000e			@ ............wca.
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- * cpu_arm925_reset(loc)
- *
- * Perform a soft reset of the system.  Put the CPU into the
- * same state as it would be if it had been reset, and branch
- * to what would be the reset vector.
- *
- * loc: location to jump to for soft reset
- */
-	.align	5
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_arm925_reset)
-	/* Send software reset to MPU and DSP */
-	mov	ip, #0xff000000
-	orr	ip, ip, #0x00fe0000
-	orr	ip, ip, #0x0000ce00
-	mov	r4, #1
-	strh	r4, [ip, #0x10]
-ENDPROC(cpu_arm925_reset)
-	.popsection
-
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-#ifdef CONFIG_MMU
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
-	bic	ip, ip, #0x000f			@ ............wcam
-	bic	ip, ip, #0x1100			@ ...i...s........
-	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	ret	r0
-
-/*
- * cpu_arm925_do_idle()
- *
- * Called with IRQs disabled
- */
-	.align	10
-ENTRY(cpu_arm925_do_idle)
-	mov	r0, #0
-	mrc	p15, 0, r1, c1, c0, 0		@ Read control register
-	mcr	p15, 0, r0, c7, c10, 4		@ Drain write buffer
-	bic	r2, r1, #1 << 12
-	mcr	p15, 0, r2, c1, c0, 0		@ Disable I cache
-	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	mcr	p15, 0, r1, c1, c0, 0		@ Restore ICache enable
-	ret	lr
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(arm925_flush_icache_all)
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	ret	lr
-ENDPROC(arm925_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- *
- *	Clean and invalidate all cache entries in a particular
- *	address space.
- */
-ENTRY(arm925_flush_user_cache_all)
-	/* FALLTHROUGH */
-
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(arm925_flush_kern_cache_all)
-	mov	r2, #VM_EXEC
-	mov	ip, #0
-__flush_whole_cache:
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
-#else
-	/* Flush entries in both segments at once, see NOTE1 above */
-	mov	r3, #(CACHE_DENTRIES - 1) << 4	@ 256 entries in segment
-2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
-	subs	r3, r3, #1 << 4
-	bcs	2b				@ entries 255 to 0
-#endif
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	flush_user_cache_range(start, end, flags)
- *
- *	Clean and invalidate a range of cache entries in the
- *	specified address range.
- *
- *	- start	- start address (inclusive)
- *	- end	- end address (exclusive)
- *	- flags	- vm_flags describing address space
- */
-ENTRY(arm925_flush_user_cache_range)
-	mov	ip, #0
-	sub	r3, r1, r0			@ calculate total size
-	cmp	r3, #CACHE_DLIMIT
-	bgt	__flush_whole_cache
-1:	tst	r2, #VM_EXEC
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-#else
-	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-#endif
-	cmp	r0, r1
-	blo	1b
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start, end.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm925_coherent_kern_range)
-	/* FALLTHROUGH */
-
-/*
- *	coherent_user_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start, end.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm925_coherent_user_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	r0, #0
-	ret	lr
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure no D cache aliasing occurs, either with itself or
- *	the I cache
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(arm925_flush_kern_dcache_area)
-	add	r1, r0, r1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_inv_range(start, end)
- *
- *	Invalidate (discard) the specified virtual address range.
- *	May not write back any entries.  If 'start' or 'end'
- *	are not cache line aligned, those lines must be written
- *	back.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-arm925_dma_inv_range:
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	tst	r0, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
-	tst	r1, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
-#endif
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_clean_range(start, end)
- *
- *	Clean the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-arm925_dma_clean_range:
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_flush_range(start, end)
- *
- *	Clean and invalidate the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm925_dma_flush_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-#else
-	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-#endif
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm925_dma_map_area)
-	add	r1, r1, r0
-	cmp	r2, #DMA_TO_DEVICE
-	beq	arm925_dma_clean_range
-	bcs	arm925_dma_inv_range
-	b	arm925_dma_flush_range
-ENDPROC(arm925_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm925_dma_unmap_area)
-	ret	lr
-ENDPROC(arm925_dma_unmap_area)
-
-	.globl	arm925_flush_kern_cache_louis
-	.equ	arm925_flush_kern_cache_louis, arm925_flush_kern_cache_all
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions arm925
-
-ENTRY(cpu_arm925_dcache_clean_area)
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	subs	r1, r1, #CACHE_DLINESIZE
-	bhi	1b
-#endif
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/* =============================== PageTable ============================== */
-
-/*
- * cpu_arm925_switch_mm(pgd)
- *
- * Set the translation base pointer to be as described by pgd.
- *
- * pgd: new page tables
- */
-	.align	5
-ENTRY(cpu_arm925_switch_mm)
-#ifdef CONFIG_MMU
-	mov	ip, #0
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
-#else
-	/* Flush entries in bothe segments at once, see NOTE1 above */
-	mov	r3, #(CACHE_DENTRIES - 1) << 4	@ 256 entries in segment
-2:	mcr	p15, 0, r3, c7, c14, 2		@ clean & invalidate D index
-	subs	r3, r3, #1 << 4
-	bcs	2b				@ entries 255 to 0
-#endif
-	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	ret	lr
-
-/*
- * cpu_arm925_set_pte_ext(ptep, pte, ext)
- *
- * Set a PTE and flush it out
- */
-	.align	5
-ENTRY(cpu_arm925_set_pte_ext)
-#ifdef CONFIG_MMU
-	armv3_set_pte_ext
-	mov	r0, r0
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-#endif
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-#endif /* CONFIG_MMU */
-	ret	lr
-
-	.type	__arm925_setup, #function
-__arm925_setup:
-	mov	r0, #0
-
-	/* Transparent on, D-cache clean & flush mode. See  NOTE2 above */
-        orr     r0,r0,#1 << 1			@ transparent mode on
-        mcr     p15, 0, r0, c15, c1, 0          @ write TI config register
-
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
-#ifdef CONFIG_MMU
-	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
-#endif
-
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mov	r0, #4				@ disable write-back on caches explicitly
-	mcr	p15, 7, r0, c15, c0, 0
-#endif
-
-	adr	r5, arm925_crval
-	ldmia	r5, {r5, r6}
-	mrc	p15, 0, r0, c1, c0		@ get control register v4
-	bic	r0, r0, r5
-	orr	r0, r0, r6
-#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
-	orr	r0, r0, #0x4000			@ .1.. .... .... ....
-#endif
-	ret	lr
-	.size	__arm925_setup, . - __arm925_setup
-
-	/*
-	 *  R
-	 * .RVI ZFRS BLDP WCAM
-	 * .011 0001 ..11 1101
-	 * 
-	 */
-	.type	arm925_crval, #object
-arm925_crval:
-	crval	clear=0x00007f3f, mmuset=0x0000313d, ucset=0x00001130
-
-	__INITDATA
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions arm925, dabort=v4t_early_abort, pabort=legacy_pabort
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv4t"
-	string	cpu_elf_name, "v4"
-	string	cpu_arm925_name, "ARM925T"
-
-	.align
-
-	.section ".proc.info.init", #alloc
-
-.macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache
-	.type	__\name\()_proc_info,#object
-__\name\()_proc_info:
-	.long	\cpu_val
-	.long	\cpu_mask
-	.long   PMD_TYPE_SECT | \
-		PMD_SECT_CACHEABLE | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	.long   PMD_TYPE_SECT | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	initfn	__arm925_setup, __\name\()_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
-	.long	cpu_arm925_name
-	.long	arm925_processor_functions
-	.long	v4wbi_tlb_fns
-	.long	v4wb_user_fns
-	.long	arm925_cache_fns
-	.size	__\name\()_proc_info, . - __\name\()_proc_info
-.endm
-
-	arm925_proc_info arm925, 0x54029250, 0xfffffff0, cpu_arm925_name
-	arm925_proc_info arm915, 0x54029150, 0xfffffff0, cpu_arm925_name
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
deleted file mode 100644
index f3cd08f353f00a92e186a075ee7ab17ec41f65a1..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-arm926.S
+++ /dev/null
@@ -1,488 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  linux/arch/arm/mm/proc-arm926.S: MMU functions for ARM926EJ-S
- *
- *  Copyright (C) 1999-2001 ARM Limited
- *  Copyright (C) 2000 Deep Blue Solutions Ltd.
- *  hacked for non-paged-MM by Hyok S. Choi, 2003.
- *
- * These are the low level assembler for performing cache and TLB
- * functions on the arm926.
- *
- *  CONFIG_CPU_ARM926_CPU_IDLE -> nohlt
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include "proc-macros.S"
-
-/*
- * This is the maximum size of an area which will be invalidated
- * using the single invalidate entry instructions.  Anything larger
- * than this, and we go for the whole cache.
- *
- * This value should be chosen such that we choose the cheapest
- * alternative.
- */
-#define CACHE_DLIMIT	16384
-
-/*
- * the cache line size of the I and D cache
- */
-#define CACHE_DLINESIZE	32
-
-	.text
-/*
- * cpu_arm926_proc_init()
- */
-ENTRY(cpu_arm926_proc_init)
-	ret	lr
-
-/*
- * cpu_arm926_proc_fin()
- */
-ENTRY(cpu_arm926_proc_fin)
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x1000			@ ...i............
-	bic	r0, r0, #0x000e			@ ............wca.
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- * cpu_arm926_reset(loc)
- *
- * Perform a soft reset of the system.  Put the CPU into the
- * same state as it would be if it had been reset, and branch
- * to what would be the reset vector.
- *
- * loc: location to jump to for soft reset
- */
-	.align	5
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_arm926_reset)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-#ifdef CONFIG_MMU
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
-	bic	ip, ip, #0x000f			@ ............wcam
-	bic	ip, ip, #0x1100			@ ...i...s........
-	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	ret	r0
-ENDPROC(cpu_arm926_reset)
-	.popsection
-
-/*
- * cpu_arm926_do_idle()
- *
- * Called with IRQs disabled
- */
-	.align	10
-ENTRY(cpu_arm926_do_idle)
-	mov	r0, #0
-	mrc	p15, 0, r1, c1, c0, 0		@ Read control register
-	mcr	p15, 0, r0, c7, c10, 4		@ Drain write buffer
-	bic	r2, r1, #1 << 12
-	mrs	r3, cpsr			@ Disable FIQs while Icache
-	orr	ip, r3, #PSR_F_BIT		@ is disabled
-	msr	cpsr_c, ip
-	mcr	p15, 0, r2, c1, c0, 0		@ Disable I cache
-	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	mcr	p15, 0, r1, c1, c0, 0		@ Restore ICache enable
-	msr	cpsr_c, r3			@ Restore FIQ state
-	ret	lr
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(arm926_flush_icache_all)
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	ret	lr
-ENDPROC(arm926_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- *
- *	Clean and invalidate all cache entries in a particular
- *	address space.
- */
-ENTRY(arm926_flush_user_cache_all)
-	/* FALLTHROUGH */
-
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(arm926_flush_kern_cache_all)
-	mov	r2, #VM_EXEC
-	mov	ip, #0
-__flush_whole_cache:
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
-#else
-1:	mrc	p15, 0, r15, c7, c14, 3 	@ test,clean,invalidate
-	bne	1b
-#endif
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	flush_user_cache_range(start, end, flags)
- *
- *	Clean and invalidate a range of cache entries in the
- *	specified address range.
- *
- *	- start	- start address (inclusive)
- *	- end	- end address (exclusive)
- *	- flags	- vm_flags describing address space
- */
-ENTRY(arm926_flush_user_cache_range)
-	mov	ip, #0
-	sub	r3, r1, r0			@ calculate total size
-	cmp	r3, #CACHE_DLIMIT
-	bgt	__flush_whole_cache
-1:	tst	r2, #VM_EXEC
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-#else
-	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-#endif
-	cmp	r0, r1
-	blo	1b
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start, end.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm926_coherent_kern_range)
-	/* FALLTHROUGH */
-
-/*
- *	coherent_user_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start, end.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm926_coherent_user_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	r0, #0
-	ret	lr
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure no D cache aliasing occurs, either with itself or
- *	the I cache
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(arm926_flush_kern_dcache_area)
-	add	r1, r0, r1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_inv_range(start, end)
- *
- *	Invalidate (discard) the specified virtual address range.
- *	May not write back any entries.  If 'start' or 'end'
- *	are not cache line aligned, those lines must be written
- *	back.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-arm926_dma_inv_range:
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	tst	r0, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
-	tst	r1, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
-#endif
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_clean_range(start, end)
- *
- *	Clean the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-arm926_dma_clean_range:
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_flush_range(start, end)
- *
- *	Clean and invalidate the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm926_dma_flush_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-#else
-	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-#endif
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm926_dma_map_area)
-	add	r1, r1, r0
-	cmp	r2, #DMA_TO_DEVICE
-	beq	arm926_dma_clean_range
-	bcs	arm926_dma_inv_range
-	b	arm926_dma_flush_range
-ENDPROC(arm926_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm926_dma_unmap_area)
-	ret	lr
-ENDPROC(arm926_dma_unmap_area)
-
-	.globl	arm926_flush_kern_cache_louis
-	.equ	arm926_flush_kern_cache_louis, arm926_flush_kern_cache_all
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions arm926
-
-ENTRY(cpu_arm926_dcache_clean_area)
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	subs	r1, r1, #CACHE_DLINESIZE
-	bhi	1b
-#endif
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/* =============================== PageTable ============================== */
-
-/*
- * cpu_arm926_switch_mm(pgd)
- *
- * Set the translation base pointer to be as described by pgd.
- *
- * pgd: new page tables
- */
-	.align	5
-ENTRY(cpu_arm926_switch_mm)
-#ifdef CONFIG_MMU
-	mov	ip, #0
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
-#else
-@ && 'Clean & Invalidate whole DCache'
-1:	mrc	p15, 0, r15, c7, c14, 3 	@ test,clean,invalidate
-	bne	1b
-#endif
-	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	ret	lr
-
-/*
- * cpu_arm926_set_pte_ext(ptep, pte, ext)
- *
- * Set a PTE and flush it out
- */
-	.align	5
-ENTRY(cpu_arm926_set_pte_ext)
-#ifdef CONFIG_MMU
-	armv3_set_pte_ext
-	mov	r0, r0
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-#endif
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-#endif
-	ret	lr
-
-/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
-.globl	cpu_arm926_suspend_size
-.equ	cpu_arm926_suspend_size, 4 * 3
-#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_arm926_do_suspend)
-	stmfd	sp!, {r4 - r6, lr}
-	mrc	p15, 0, r4, c13, c0, 0	@ PID
-	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
-	mrc	p15, 0, r6, c1, c0, 0	@ Control register
-	stmia	r0, {r4 - r6}
-	ldmfd	sp!, {r4 - r6, pc}
-ENDPROC(cpu_arm926_do_suspend)
-
-ENTRY(cpu_arm926_do_resume)
-	mov	ip, #0
-	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I+D TLBs
-	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I+D caches
-	ldmia	r0, {r4 - r6}
-	mcr	p15, 0, r4, c13, c0, 0	@ PID
-	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
-	mcr	p15, 0, r1, c2, c0, 0	@ TTB address
-	mov	r0, r6			@ control register
-	b	cpu_resume_mmu
-ENDPROC(cpu_arm926_do_resume)
-#endif
-
-	.type	__arm926_setup, #function
-__arm926_setup:
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
-#ifdef CONFIG_MMU
-	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
-#endif
-
-
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mov	r0, #4				@ disable write-back on caches explicitly
-	mcr	p15, 7, r0, c15, c0, 0
-#endif 
-
-	adr	r5, arm926_crval
-	ldmia	r5, {r5, r6}
-	mrc	p15, 0, r0, c1, c0		@ get control register v4
-	bic	r0, r0, r5
-	orr	r0, r0, r6
-#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
-	orr	r0, r0, #0x4000			@ .1.. .... .... ....
-#endif
-	ret	lr
-	.size	__arm926_setup, . - __arm926_setup
-
-	/*
-	 *  R
-	 * .RVI ZFRS BLDP WCAM
-	 * .011 0001 ..11 0101
-	 * 
-	 */
-	.type	arm926_crval, #object
-arm926_crval:
-	crval	clear=0x00007f3f, mmuset=0x00003135, ucset=0x00001134
-
-	__INITDATA
-
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions arm926, dabort=v5tj_early_abort, pabort=legacy_pabort, suspend=1
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv5tej"
-	string	cpu_elf_name, "v5"
-	string	cpu_arm926_name, "ARM926EJ-S"
-
-	.align
-
-	.section ".proc.info.init", #alloc
-
-	.type	__arm926_proc_info,#object
-__arm926_proc_info:
-	.long	0x41069260			@ ARM926EJ-S (v5TEJ)
-	.long	0xff0ffff0
-	.long   PMD_TYPE_SECT | \
-		PMD_SECT_BUFFERABLE | \
-		PMD_SECT_CACHEABLE | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	.long   PMD_TYPE_SECT | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	initfn	__arm926_setup, __arm926_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
-	.long	cpu_arm926_name
-	.long	arm926_processor_functions
-	.long	v4wbi_tlb_fns
-	.long	v4wb_user_fns
-	.long	arm926_cache_fns
-	.size	__arm926_proc_info, . - __arm926_proc_info
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
deleted file mode 100644
index 1c26d991386d7d6f92abd9c3c6c77dcb89ab7392..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-arm940.S
+++ /dev/null
@@ -1,360 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/arm940.S: utility functions for ARM940T
- *
- *  Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com)
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/ptrace.h>
-#include "proc-macros.S"
-
-/* ARM940T has a 4KB DCache comprising 256 lines of 4 words */
-#define CACHE_DLINESIZE	16
-#define CACHE_DSEGMENTS	4
-#define CACHE_DENTRIES	64
-
-	.text
-/*
- * cpu_arm940_proc_init()
- * cpu_arm940_switch_mm()
- *
- * These are not required.
- */
-ENTRY(cpu_arm940_proc_init)
-ENTRY(cpu_arm940_switch_mm)
-	ret	lr
-
-/*
- * cpu_arm940_proc_fin()
- */
-ENTRY(cpu_arm940_proc_fin)
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x00001000		@ i-cache
-	bic	r0, r0, #0x00000004		@ d-cache
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- * cpu_arm940_reset(loc)
- * Params  : r0 = address to jump to
- * Notes   : This sets up everything for a reset
- */
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_arm940_reset)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c5, 0		@ flush I cache
-	mcr	p15, 0, ip, c7, c6, 0		@ flush D cache
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
-	bic	ip, ip, #0x00000005		@ .............c.p
-	bic	ip, ip, #0x00001000		@ i-cache
-	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	ret	r0
-ENDPROC(cpu_arm940_reset)
-	.popsection
-
-/*
- * cpu_arm940_do_idle()
- */
-	.align	5
-ENTRY(cpu_arm940_do_idle)
-	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	ret	lr
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(arm940_flush_icache_all)
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	ret	lr
-ENDPROC(arm940_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- */
-ENTRY(arm940_flush_user_cache_all)
-	/* FALLTHROUGH */
-
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(arm940_flush_kern_cache_all)
-	mov	r2, #VM_EXEC
-	/* FALLTHROUGH */
-
-/*
- *	flush_user_cache_range(start, end, flags)
- *
- *	There is no efficient way to flush a range of cache entries
- *	in the specified address range. Thus, flushes all.
- *
- *	- start	- start address (inclusive)
- *	- end	- end address (exclusive)
- *	- flags	- vm_flags describing address space
- */
-ENTRY(arm940_flush_user_cache_range)
-	mov	ip, #0
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, ip, c7, c6, 0		@ flush D cache
-#else
-	mov	r1, #(CACHE_DSEGMENTS - 1) << 4	@ 4 segments
-1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
-2:	mcr	p15, 0, r3, c7, c14, 2		@ clean/flush D index
-	subs	r3, r3, #1 << 26
-	bcs	2b				@ entries 63 to 0
-	subs	r1, r1, #1 << 4
-	bcs	1b				@ segments 3 to 0
-#endif
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start, end.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm940_coherent_kern_range)
-	/* FALLTHROUGH */
-
-/*
- *	coherent_user_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start, end.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm940_coherent_user_range)
-	/* FALLTHROUGH */
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure no D cache aliasing occurs, either with itself or
- *	the I cache
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(arm940_flush_kern_dcache_area)
-	mov	r0, #0
-	mov	r1, #(CACHE_DSEGMENTS - 1) << 4	@ 4 segments
-1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
-2:	mcr	p15, 0, r3, c7, c14, 2		@ clean/flush D index
-	subs	r3, r3, #1 << 26
-	bcs	2b				@ entries 63 to 0
-	subs	r1, r1, #1 << 4
-	bcs	1b				@ segments 7 to 0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_inv_range(start, end)
- *
- *	There is no efficient way to invalidate a specifid virtual
- *	address range. Thus, invalidates all.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-arm940_dma_inv_range:
-	mov	ip, #0
-	mov	r1, #(CACHE_DSEGMENTS - 1) << 4	@ 4 segments
-1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
-2:	mcr	p15, 0, r3, c7, c6, 2		@ flush D entry
-	subs	r3, r3, #1 << 26
-	bcs	2b				@ entries 63 to 0
-	subs	r1, r1, #1 << 4
-	bcs	1b				@ segments 7 to 0
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_clean_range(start, end)
- *
- *	There is no efficient way to clean a specifid virtual
- *	address range. Thus, cleans all.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-arm940_dma_clean_range:
-ENTRY(cpu_arm940_dcache_clean_area)
-	mov	ip, #0
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mov	r1, #(CACHE_DSEGMENTS - 1) << 4	@ 4 segments
-1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
-2:	mcr	p15, 0, r3, c7, c10, 2		@ clean D entry
-	subs	r3, r3, #1 << 26
-	bcs	2b				@ entries 63 to 0
-	subs	r1, r1, #1 << 4
-	bcs	1b				@ segments 7 to 0
-#endif
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_flush_range(start, end)
- *
- *	There is no efficient way to clean and invalidate a specifid
- *	virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm940_dma_flush_range)
-	mov	ip, #0
-	mov	r1, #(CACHE_DSEGMENTS - 1) << 4	@ 4 segments
-1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
-2:
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, r3, c7, c14, 2		@ clean/flush D entry
-#else
-	mcr	p15, 0, r3, c7, c6, 2		@ invalidate D entry
-#endif
-	subs	r3, r3, #1 << 26
-	bcs	2b				@ entries 63 to 0
-	subs	r1, r1, #1 << 4
-	bcs	1b				@ segments 7 to 0
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm940_dma_map_area)
-	add	r1, r1, r0
-	cmp	r2, #DMA_TO_DEVICE
-	beq	arm940_dma_clean_range
-	bcs	arm940_dma_inv_range
-	b	arm940_dma_flush_range
-ENDPROC(arm940_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm940_dma_unmap_area)
-	ret	lr
-ENDPROC(arm940_dma_unmap_area)
-
-	.globl	arm940_flush_kern_cache_louis
-	.equ	arm940_flush_kern_cache_louis, arm940_flush_kern_cache_all
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions arm940
-
-	.type	__arm940_setup, #function
-__arm940_setup:
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, r0, c7, c6, 0		@ invalidate D cache
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-
-	mcr	p15, 0, r0, c6, c3, 0		@ disable data area 3~7
-	mcr	p15, 0, r0, c6, c4, 0
-	mcr	p15, 0, r0, c6, c5, 0
-	mcr	p15, 0, r0, c6, c6, 0
-	mcr	p15, 0, r0, c6, c7, 0
-
-	mcr	p15, 0, r0, c6, c3, 1		@ disable instruction area 3~7
-	mcr	p15, 0, r0, c6, c4, 1
-	mcr	p15, 0, r0, c6, c5, 1
-	mcr	p15, 0, r0, c6, c6, 1
-	mcr	p15, 0, r0, c6, c7, 1
-
-	mov	r0, #0x0000003F			@ base = 0, size = 4GB
-	mcr	p15, 0, r0, c6,	c0, 0		@ set area 0, default
-	mcr	p15, 0, r0, c6,	c0, 1
-
-	ldr	r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM
-	ldr	r7, =CONFIG_DRAM_SIZE >> 12	@ size of RAM (must be >= 4KB)
-	pr_val	r3, r0, r7, #1
-	mcr	p15, 0, r3, c6,	c1, 0		@ set area 1, RAM
-	mcr	p15, 0, r3, c6,	c1, 1
-
-	ldr	r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH
-	ldr	r7, =CONFIG_FLASH_SIZE		@ size of FLASH (must be >= 4KB)
-	pr_val	r3, r0, r6, #1
-	mcr	p15, 0, r3, c6,	c2, 0		@ set area 2, ROM/FLASH
-	mcr	p15, 0, r3, c6,	c2, 1
-
-	mov	r0, #0x06
-	mcr	p15, 0, r0, c2, c0, 0		@ Region 1&2 cacheable
-	mcr	p15, 0, r0, c2, c0, 1
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mov	r0, #0x00			@ disable whole write buffer
-#else
-	mov	r0, #0x02			@ Region 1 write bufferred
-#endif
-	mcr	p15, 0, r0, c3, c0, 0
-
-	mov	r0, #0x10000
-	sub	r0, r0, #1			@ r0 = 0xffff
-	mcr	p15, 0, r0, c5, c0, 0		@ all read/write access
-	mcr	p15, 0, r0, c5, c0, 1
-
-	mrc	p15, 0, r0, c1, c0		@ get control register
-	orr	r0, r0, #0x00001000		@ I-cache
-	orr	r0, r0, #0x00000005		@ MPU/D-cache
-
-	ret	lr
-
-	.size	__arm940_setup, . - __arm940_setup
-
-	__INITDATA
-
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions arm940, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv4t"
-	string	cpu_elf_name, "v4"
-	string	cpu_arm940_name, "ARM940T"
-
-	.align
-
-	.section ".proc.info.init", #alloc
-
-	.type	__arm940_proc_info,#object
-__arm940_proc_info:
-	.long	0x41009400
-	.long	0xff00fff0
-	.long	0
-	initfn	__arm940_setup, __arm940_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
-	.long	cpu_arm940_name
-	.long	arm940_processor_functions
-	.long	0
-	.long	0
-	.long	arm940_cache_fns
-	.size	__arm940_proc_info, . - __arm940_proc_info
-
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
deleted file mode 100644
index 2dc1c75a4fd4a8131de6db7a77be2ed227053e0e..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-arm946.S
+++ /dev/null
@@ -1,415 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/arm946.S: utility functions for ARM946E-S
- *
- *  Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com)
- *
- *  (Many of cache codes are from proc-arm926.S)
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/ptrace.h>
-#include "proc-macros.S"
-
-/*
- * ARM946E-S is synthesizable to have 0KB to 1MB sized D-Cache,
- * comprising 256 lines of 32 bytes (8 words).
- */
-#define CACHE_DSIZE	(CONFIG_CPU_DCACHE_SIZE) /* typically 8KB. */
-#define CACHE_DLINESIZE	32			/* fixed */
-#define CACHE_DSEGMENTS	4			/* fixed */
-#define CACHE_DENTRIES	(CACHE_DSIZE / CACHE_DSEGMENTS / CACHE_DLINESIZE)
-#define CACHE_DLIMIT	(CACHE_DSIZE * 4)	/* benchmark needed */
-
-	.text
-/*
- * cpu_arm946_proc_init()
- * cpu_arm946_switch_mm()
- *
- * These are not required.
- */
-ENTRY(cpu_arm946_proc_init)
-ENTRY(cpu_arm946_switch_mm)
-	ret	lr
-
-/*
- * cpu_arm946_proc_fin()
- */
-ENTRY(cpu_arm946_proc_fin)
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x00001000		@ i-cache
-	bic	r0, r0, #0x00000004		@ d-cache
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- * cpu_arm946_reset(loc)
- * Params  : r0 = address to jump to
- * Notes   : This sets up everything for a reset
- */
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_arm946_reset)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c5, 0		@ flush I cache
-	mcr	p15, 0, ip, c7, c6, 0		@ flush D cache
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
-	bic	ip, ip, #0x00000005		@ .............c.p
-	bic	ip, ip, #0x00001000		@ i-cache
-	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	ret	r0
-ENDPROC(cpu_arm946_reset)
-	.popsection
-
-/*
- * cpu_arm946_do_idle()
- */
-	.align	5
-ENTRY(cpu_arm946_do_idle)
-	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	ret	lr
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(arm946_flush_icache_all)
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	ret	lr
-ENDPROC(arm946_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- */
-ENTRY(arm946_flush_user_cache_all)
-	/* FALLTHROUGH */
-
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(arm946_flush_kern_cache_all)
-	mov	r2, #VM_EXEC
-	mov	ip, #0
-__flush_whole_cache:
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, ip, c7, c6, 0		@ flush D cache
-#else
-	mov	r1, #(CACHE_DSEGMENTS - 1) << 29 @ 4 segments
-1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 4 @ n entries
-2:	mcr	p15, 0, r3, c7, c14, 2		@ clean/flush D index
-	subs	r3, r3, #1 << 4
-	bcs	2b				@ entries n to 0
-	subs	r1, r1, #1 << 29
-	bcs	1b				@ segments 3 to 0
-#endif
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c5, 0		@ flush I cache
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	flush_user_cache_range(start, end, flags)
- *
- *	Clean and invalidate a range of cache entries in the
- *	specified address range.
- *
- *	- start	- start address (inclusive)
- *	- end	- end address (exclusive)
- *	- flags	- vm_flags describing address space
- * (same as arm926)
- */
-ENTRY(arm946_flush_user_cache_range)
-	mov	ip, #0
-	sub	r3, r1, r0			@ calculate total size
-	cmp	r3, #CACHE_DLIMIT
-	bhs	__flush_whole_cache
-
-1:	tst	r2, #VM_EXEC
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-#else
-	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-#endif
-	cmp	r0, r1
-	blo	1b
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start, end.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(arm946_coherent_kern_range)
-	/* FALLTHROUGH */
-
-/*
- *	coherent_user_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start, end.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- * (same as arm926)
- */
-ENTRY(arm946_coherent_user_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	r0, #0
-	ret	lr
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure no D cache aliasing occurs, either with itself or
- *	the I cache
- *
- *	- addr	- kernel address
- *	- size	- region size
- * (same as arm926)
- */
-ENTRY(arm946_flush_kern_dcache_area)
-	add	r1, r0, r1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_inv_range(start, end)
- *
- *	Invalidate (discard) the specified virtual address range.
- *	May not write back any entries.  If 'start' or 'end'
- *	are not cache line aligned, those lines must be written
- *	back.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- * (same as arm926)
- */
-arm946_dma_inv_range:
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	tst	r0, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
-	tst	r1, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
-#endif
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_clean_range(start, end)
- *
- *	Clean the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as arm926)
- */
-arm946_dma_clean_range:
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-#endif
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_flush_range(start, end)
- *
- *	Clean and invalidate the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as arm926)
- */
-ENTRY(arm946_dma_flush_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-#else
-	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-#endif
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm946_dma_map_area)
-	add	r1, r1, r0
-	cmp	r2, #DMA_TO_DEVICE
-	beq	arm946_dma_clean_range
-	bcs	arm946_dma_inv_range
-	b	arm946_dma_flush_range
-ENDPROC(arm946_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(arm946_dma_unmap_area)
-	ret	lr
-ENDPROC(arm946_dma_unmap_area)
-
-	.globl	arm946_flush_kern_cache_louis
-	.equ	arm946_flush_kern_cache_louis, arm946_flush_kern_cache_all
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions arm946
-
-ENTRY(cpu_arm946_dcache_clean_area)
-#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	subs	r1, r1, #CACHE_DLINESIZE
-	bhi	1b
-#endif
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-	.type	__arm946_setup, #function
-__arm946_setup:
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, r0, c7, c6, 0		@ invalidate D cache
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-
-	mcr	p15, 0, r0, c6, c3, 0		@ disable memory region 3~7
-	mcr	p15, 0, r0, c6, c4, 0
-	mcr	p15, 0, r0, c6, c5, 0
-	mcr	p15, 0, r0, c6, c6, 0
-	mcr	p15, 0, r0, c6, c7, 0
-
-	mov	r0, #0x0000003F			@ base = 0, size = 4GB
-	mcr	p15, 0, r0, c6,	c0, 0		@ set region 0, default
-
-	ldr	r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM
-	ldr	r7, =CONFIG_DRAM_SIZE		@ size of RAM (must be >= 4KB)
-	pr_val	r3, r0, r7, #1
-	mcr	p15, 0, r3, c6, c1, 0
-
-	ldr	r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH
-	ldr	r7, =CONFIG_FLASH_SIZE		@ size of FLASH (must be >= 4KB)
-	pr_val	r3, r0, r7, #1
-	mcr	p15, 0, r3, c6, c2, 0
-
-	mov	r0, #0x06
-	mcr	p15, 0, r0, c2, c0, 0		@ region 1,2 d-cacheable
-	mcr	p15, 0, r0, c2, c0, 1		@ region 1,2 i-cacheable
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mov	r0, #0x00			@ disable whole write buffer
-#else
-	mov	r0, #0x02			@ region 1 write bufferred
-#endif
-	mcr	p15, 0, r0, c3, c0, 0
-
-/*
- *  Access Permission Settings for future permission control by PU.
- *
- *				priv.	user
- * 	region 0 (whole)	rw	--	: b0001
- * 	region 1 (RAM)		rw	rw	: b0011
- * 	region 2 (FLASH)	rw	r-	: b0010
- *	region 3~7 (none)	--	--	: b0000
- */
-	mov	r0, #0x00000031
-	orr	r0, r0, #0x00000200
-	mcr	p15, 0, r0, c5, c0, 2		@ set data access permission
-	mcr	p15, 0, r0, c5, c0, 3		@ set inst. access permission
-
-	mrc	p15, 0, r0, c1, c0		@ get control register
-	orr	r0, r0, #0x00001000		@ I-cache
-	orr	r0, r0, #0x00000005		@ MPU/D-cache
-#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
-	orr	r0, r0, #0x00004000		@ .1.. .... .... ....
-#endif
-	ret	lr
-
-	.size	__arm946_setup, . - __arm946_setup
-
-	__INITDATA
-
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions arm946, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv5te"
-	string	cpu_elf_name, "v5t"
-	string	cpu_arm946_name, "ARM946E-S"
-
-	.align
-
-	.section ".proc.info.init", #alloc
-	.type	__arm946_proc_info,#object
-__arm946_proc_info:
-	.long	0x41009460
-	.long	0xff00fff0
-	.long	0
-	.long	0
-	initfn	__arm946_setup, __arm946_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
-	.long	cpu_arm946_name
-	.long	arm946_processor_functions
-	.long	0
-	.long	0
-	.long	arm946_cache_fns
-	.size	__arm946_proc_info, . - __arm946_proc_info
-
diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S
deleted file mode 100644
index 913c06e590af516c438a9a20c7d254bae38f1ac7..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-arm9tdmi.S
+++ /dev/null
@@ -1,91 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/proc-arm9tdmi.S: utility functions for ARM9TDMI
- *
- *  Copyright (C) 2003-2006 Hyok S. Choi <hyok.choi@samsung.com>
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/ptrace.h>
-
-#include "proc-macros.S"
-
-	.text
-/*
- * cpu_arm9tdmi_proc_init()
- * cpu_arm9tdmi_do_idle()
- * cpu_arm9tdmi_dcache_clean_area()
- * cpu_arm9tdmi_switch_mm()
- *
- * These are not required.
- */
-ENTRY(cpu_arm9tdmi_proc_init)
-ENTRY(cpu_arm9tdmi_do_idle)
-ENTRY(cpu_arm9tdmi_dcache_clean_area)
-ENTRY(cpu_arm9tdmi_switch_mm)
-		ret	lr
-
-/*
- * cpu_arm9tdmi_proc_fin()
- */
-ENTRY(cpu_arm9tdmi_proc_fin)
-		ret	lr
-
-/*
- * Function: cpu_arm9tdmi_reset(loc)
- * Params  : loc(r0)	address to jump to
- * Purpose : Sets up everything for a reset and jump to the location for soft reset.
- */
-		.pushsection	.idmap.text, "ax"
-ENTRY(cpu_arm9tdmi_reset)
-		ret	r0
-ENDPROC(cpu_arm9tdmi_reset)
-		.popsection
-
-		.type	__arm9tdmi_setup, #function
-__arm9tdmi_setup:
-		ret	lr
-		.size	__arm9tdmi_setup, . - __arm9tdmi_setup
-
-		__INITDATA
-
-		@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-		define_processor_functions arm9tdmi, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
-
-		.section ".rodata"
-
-		string	cpu_arch_name, "armv4t"
-		string	cpu_elf_name, "v4"
-		string	cpu_arm9tdmi_name, "ARM9TDMI"
-		string	cpu_p2001_name, "P2001"
-
-		.align
-
-		.section ".proc.info.init", #alloc
-
-.macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req
-		.type	__\name\()_proc_info, #object
-__\name\()_proc_info:
-		.long	\cpu_val
-		.long	\cpu_mask
-		.long	0
-		.long	0
-		initfn	__arm9tdmi_setup, __\name\()_proc_info
-		.long	cpu_arch_name
-		.long	cpu_elf_name
-		.long	HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT
-		.long	\cpu_name
-		.long	arm9tdmi_processor_functions
-		.long	0
-		.long	0
-		.long	v4_cache_fns
-		.size	__\name\()_proc_info, . - __\name\()_proc_info
-.endm
-
-	arm9tdmi_proc_info arm9tdmi, 0x41009900, 0xfff8ff00, cpu_arm9tdmi_name
-	arm9tdmi_proc_info p2001, 0x41029000, 0xffffffff, cpu_p2001_name
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
deleted file mode 100644
index 8120b6f4dbb83ec2dbff229060018e66e7317562..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-fa526.S
+++ /dev/null
@@ -1,213 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  linux/arch/arm/mm/proc-fa526.S: MMU functions for FA526
- *
- *  Written by : Luke Lee
- *  Copyright (C) 2005 Faraday Corp.
- *  Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
- *
- * These are the low level assembler for performing cache and TLB
- * functions on the fa526.
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-
-#include "proc-macros.S"
-
-#define CACHE_DLINESIZE	16
-
-	.text
-/*
- * cpu_fa526_proc_init()
- */
-ENTRY(cpu_fa526_proc_init)
-	ret	lr
-
-/*
- * cpu_fa526_proc_fin()
- */
-ENTRY(cpu_fa526_proc_fin)
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x1000			@ ...i............
-	bic	r0, r0, #0x000e			@ ............wca.
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	nop
-	nop
-	ret	lr
-
-/*
- * cpu_fa526_reset(loc)
- *
- * Perform a soft reset of the system.  Put the CPU into the
- * same state as it would be if it had been reset, and branch
- * to what would be the reset vector.
- *
- * loc: location to jump to for soft reset
- */
-	.align	4
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_fa526_reset)
-/* TODO: Use CP8 if possible... */
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-#ifdef CONFIG_MMU
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
-	bic	ip, ip, #0x000f			@ ............wcam
-	bic	ip, ip, #0x1100			@ ...i...s........
-	bic	ip, ip, #0x0800			@ BTB off
-	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	nop
-	nop
-	ret	r0
-ENDPROC(cpu_fa526_reset)
-	.popsection
-
-/*
- * cpu_fa526_do_idle()
- */
-	.align	4
-ENTRY(cpu_fa526_do_idle)
-	ret	lr
-
-
-ENTRY(cpu_fa526_dcache_clean_area)
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	subs	r1, r1, #CACHE_DLINESIZE
-	bhi	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/* =============================== PageTable ============================== */
-
-/*
- * cpu_fa526_switch_mm(pgd)
- *
- * Set the translation base pointer to be as described by pgd.
- *
- * pgd: new page tables
- */
-	.align	4
-ENTRY(cpu_fa526_switch_mm)
-#ifdef CONFIG_MMU
-	mov	ip, #0
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
-#else
-	mcr	p15, 0, ip, c7, c14, 0		@ clean and invalidate whole D cache
-#endif
-	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, ip, c7, c5, 6		@ invalidate BTB since mm changed
-	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
-	mcr	p15, 0, ip, c7, c5, 4		@ prefetch flush
-	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate UTLB
-#endif
-	ret	lr
-
-/*
- * cpu_fa526_set_pte_ext(ptep, pte, ext)
- *
- * Set a PTE and flush it out
- */
-	.align	4
-ENTRY(cpu_fa526_set_pte_ext)
-#ifdef CONFIG_MMU
-	armv3_set_pte_ext
-	mov	r0, r0
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-#endif
-	ret	lr
-
-	.type	__fa526_setup, #function
-__fa526_setup:
-	/* On return of this routine, r0 must carry correct flags for CFG register */
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
-#ifdef CONFIG_MMU
-	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
-#endif
-	mcr	p15, 0, r0, c7, c5, 5		@ invalidate IScratchpad RAM
-
-	mov	r0, #1
-	mcr	p15, 0, r0, c1, c1, 0		@ turn-on ECR
-
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 6		@ invalidate BTB All
-	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
-	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
-
-	mov	r0, #0x1f			@ Domains 0, 1 = manager, 2 = client
-	mcr	p15, 0, r0, c3, c0		@ load domain access register
-
-	mrc	p15, 0, r0, c1, c0		@ get control register v4
-	ldr	r5, fa526_cr1_clear
-	bic	r0, r0, r5
-	ldr	r5, fa526_cr1_set
-	orr	r0, r0, r5
-	ret	lr
-	.size	__fa526_setup, . - __fa526_setup
-
-	/*
-	 * .RVI ZFRS BLDP WCAM
-	 * ..11 1001 .111 1101
-	 *
-	 */
-	.type	fa526_cr1_clear, #object
-	.type	fa526_cr1_set, #object
-fa526_cr1_clear:
-	.word	0x3f3f
-fa526_cr1_set:
-	.word	0x397D
-
-	__INITDATA
-
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions fa526, dabort=v4_early_abort, pabort=legacy_pabort
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv4"
-	string	cpu_elf_name, "v4"
-	string	cpu_fa526_name, "FA526"
-
-	.align
-
-	.section ".proc.info.init", #alloc
-
-	.type	__fa526_proc_info,#object
-__fa526_proc_info:
-	.long	0x66015261
-	.long	0xff01fff1
-	.long   PMD_TYPE_SECT | \
-		PMD_SECT_BUFFERABLE | \
-		PMD_SECT_CACHEABLE | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	.long   PMD_TYPE_SECT | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	initfn	__fa526_setup, __fa526_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP | HWCAP_HALF
-	.long	cpu_fa526_name
-	.long	fa526_processor_functions
-	.long	fa_tlb_fns
-	.long	fa_user_fns
-	.long	fa_cache_fns
-	.size	__fa526_proc_info, . - __fa526_proc_info
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
deleted file mode 100644
index bb6dc34d42a374298c2b50fccb191b02504b402b..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-feroceon.S
+++ /dev/null
@@ -1,613 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  linux/arch/arm/mm/proc-feroceon.S: MMU functions for Feroceon
- *
- *  Heavily based on proc-arm926.S
- *  Maintainer: Assaf Hoffman <hoffman@marvell.com>
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include "proc-macros.S"
-
-/*
- * This is the maximum size of an area which will be invalidated
- * using the single invalidate entry instructions.  Anything larger
- * than this, and we go for the whole cache.
- *
- * This value should be chosen such that we choose the cheapest
- * alternative.
- */
-#define CACHE_DLIMIT	16384
-
-/*
- * the cache line size of the I and D cache
- */
-#define CACHE_DLINESIZE	32
-
-	.bss
-	.align 3
-__cache_params_loc:
-	.space	8
-
-	.text
-__cache_params:
-	.word	__cache_params_loc
-
-/*
- * cpu_feroceon_proc_init()
- */
-ENTRY(cpu_feroceon_proc_init)
-	mrc	p15, 0, r0, c0, c0, 1		@ read cache type register
-	ldr	r1, __cache_params
-	mov	r2, #(16 << 5)
-	tst	r0, #(1 << 16)			@ get way
-	mov	r0, r0, lsr #18			@ get cache size order
-	movne	r3, #((4 - 1) << 30)		@ 4-way
-	and	r0, r0, #0xf
-	moveq	r3, #0				@ 1-way
-	mov	r2, r2, lsl r0			@ actual cache size
-	movne	r2, r2, lsr #2			@ turned into # of sets
-	sub	r2, r2, #(1 << 5)
-	stmia	r1, {r2, r3}
-	ret	lr
-
-/*
- * cpu_feroceon_proc_fin()
- */
-ENTRY(cpu_feroceon_proc_fin)
-#if defined(CONFIG_CACHE_FEROCEON_L2) && \
-	!defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
-	mov	r0, #0
-	mcr	p15, 1, r0, c15, c9, 0		@ clean L2
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-#endif
-
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x1000			@ ...i............
-	bic	r0, r0, #0x000e			@ ............wca.
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- * cpu_feroceon_reset(loc)
- *
- * Perform a soft reset of the system.  Put the CPU into the
- * same state as it would be if it had been reset, and branch
- * to what would be the reset vector.
- *
- * loc: location to jump to for soft reset
- */
-	.align	5
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_feroceon_reset)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-#ifdef CONFIG_MMU
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
-	bic	ip, ip, #0x000f			@ ............wcam
-	bic	ip, ip, #0x1100			@ ...i...s........
-	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	ret	r0
-ENDPROC(cpu_feroceon_reset)
-	.popsection
-
-/*
- * cpu_feroceon_do_idle()
- *
- * Called with IRQs disabled
- */
-	.align	5
-ENTRY(cpu_feroceon_do_idle)
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c10, 4		@ Drain write buffer
-	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
-	ret	lr
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(feroceon_flush_icache_all)
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	ret	lr
-ENDPROC(feroceon_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- *
- *	Clean and invalidate all cache entries in a particular
- *	address space.
- */
-	.align	5
-ENTRY(feroceon_flush_user_cache_all)
-	/* FALLTHROUGH */
-
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(feroceon_flush_kern_cache_all)
-	mov	r2, #VM_EXEC
-
-__flush_whole_cache:
-	ldr	r1, __cache_params
-	ldmia	r1, {r1, r3}
-1:	orr	ip, r1, r3
-2:	mcr	p15, 0, ip, c7, c14, 2		@ clean + invalidate D set/way
-	subs	ip, ip, #(1 << 30)		@ next way
-	bcs	2b
-	subs	r1, r1, #(1 << 5)		@ next set
-	bcs	1b
-
-	tst	r2, #VM_EXEC
-	mov	ip, #0
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	flush_user_cache_range(start, end, flags)
- *
- *	Clean and invalidate a range of cache entries in the
- *	specified address range.
- *
- *	- start	- start address (inclusive)
- *	- end	- end address (exclusive)
- *	- flags	- vm_flags describing address space
- */
-	.align	5
-ENTRY(feroceon_flush_user_cache_range)
-	sub	r3, r1, r0			@ calculate total size
-	cmp	r3, #CACHE_DLIMIT
-	bgt	__flush_whole_cache
-1:	tst	r2, #VM_EXEC
-	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	tst	r2, #VM_EXEC
-	mov	ip, #0
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start, end.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-	.align	5
-ENTRY(feroceon_coherent_kern_range)
-	/* FALLTHROUGH */
-
-/*
- *	coherent_user_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start, end.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(feroceon_coherent_user_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	r0, #0
-	ret	lr
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure no D cache aliasing occurs, either with itself or
- *	the I cache
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-	.align	5
-ENTRY(feroceon_flush_kern_dcache_area)
-	add	r1, r0, r1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-	.align	5
-ENTRY(feroceon_range_flush_kern_dcache_area)
-	mrs	r2, cpsr
-	add	r1, r0, #PAGE_SZ - CACHE_DLINESIZE	@ top addr is inclusive
-	orr	r3, r2, #PSR_I_BIT
-	msr	cpsr_c, r3			@ disable interrupts
-	mcr	p15, 5, r0, c15, c15, 0		@ D clean/inv range start
-	mcr	p15, 5, r1, c15, c15, 1		@ D clean/inv range top
-	msr	cpsr_c, r2			@ restore interrupts
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_inv_range(start, end)
- *
- *	Invalidate (discard) the specified virtual address range.
- *	May not write back any entries.  If 'start' or 'end'
- *	are not cache line aligned, those lines must be written
- *	back.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-	.align	5
-feroceon_dma_inv_range:
-	tst	r0, #CACHE_DLINESIZE - 1
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
-	tst	r1, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
-1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-	.align	5
-feroceon_range_dma_inv_range:
-	mrs	r2, cpsr
-	tst	r0, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
-	tst	r1, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
-	cmp	r1, r0
-	subne	r1, r1, #1			@ top address is inclusive
-	orr	r3, r2, #PSR_I_BIT
-	msr	cpsr_c, r3			@ disable interrupts
-	mcr	p15, 5, r0, c15, c14, 0		@ D inv range start
-	mcr	p15, 5, r1, c15, c14, 1		@ D inv range top
-	msr	cpsr_c, r2			@ restore interrupts
-	ret	lr
-
-/*
- *	dma_clean_range(start, end)
- *
- *	Clean the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-	.align	5
-feroceon_dma_clean_range:
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-	.align	5
-feroceon_range_dma_clean_range:
-	mrs	r2, cpsr
-	cmp	r1, r0
-	subne	r1, r1, #1			@ top address is inclusive
-	orr	r3, r2, #PSR_I_BIT
-	msr	cpsr_c, r3			@ disable interrupts
-	mcr	p15, 5, r0, c15, c13, 0		@ D clean range start
-	mcr	p15, 5, r1, c15, c13, 1		@ D clean range top
-	msr	cpsr_c, r2			@ restore interrupts
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_flush_range(start, end)
- *
- *	Clean and invalidate the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-	.align	5
-ENTRY(feroceon_dma_flush_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-	.align	5
-ENTRY(feroceon_range_dma_flush_range)
-	mrs	r2, cpsr
-	cmp	r1, r0
-	subne	r1, r1, #1			@ top address is inclusive
-	orr	r3, r2, #PSR_I_BIT
-	msr	cpsr_c, r3			@ disable interrupts
-	mcr	p15, 5, r0, c15, c15, 0		@ D clean/inv range start
-	mcr	p15, 5, r1, c15, c15, 1		@ D clean/inv range top
-	msr	cpsr_c, r2			@ restore interrupts
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(feroceon_dma_map_area)
-	add	r1, r1, r0
-	cmp	r2, #DMA_TO_DEVICE
-	beq	feroceon_dma_clean_range
-	bcs	feroceon_dma_inv_range
-	b	feroceon_dma_flush_range
-ENDPROC(feroceon_dma_map_area)
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(feroceon_range_dma_map_area)
-	add	r1, r1, r0
-	cmp	r2, #DMA_TO_DEVICE
-	beq	feroceon_range_dma_clean_range
-	bcs	feroceon_range_dma_inv_range
-	b	feroceon_range_dma_flush_range
-ENDPROC(feroceon_range_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(feroceon_dma_unmap_area)
-	ret	lr
-ENDPROC(feroceon_dma_unmap_area)
-
-	.globl	feroceon_flush_kern_cache_louis
-	.equ	feroceon_flush_kern_cache_louis, feroceon_flush_kern_cache_all
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions feroceon
-
-.macro range_alias basename
-	.globl feroceon_range_\basename
-	.type feroceon_range_\basename , %function
-	.equ feroceon_range_\basename , feroceon_\basename
-.endm
-
-/*
- * Most of the cache functions are unchanged for this case.
- * Export suitable alias symbols for the unchanged functions:
- */
-	range_alias flush_icache_all
-	range_alias flush_user_cache_all
-	range_alias flush_kern_cache_all
-	range_alias flush_kern_cache_louis
-	range_alias flush_user_cache_range
-	range_alias coherent_kern_range
-	range_alias coherent_user_range
-	range_alias dma_unmap_area
-
-	define_cache_functions feroceon_range
-
-	.align	5
-ENTRY(cpu_feroceon_dcache_clean_area)
-#if defined(CONFIG_CACHE_FEROCEON_L2) && \
-	!defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
-	mov	r2, r0
-	mov	r3, r1
-#endif
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	subs	r1, r1, #CACHE_DLINESIZE
-	bhi	1b
-#if defined(CONFIG_CACHE_FEROCEON_L2) && \
-	!defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
-1:	mcr	p15, 1, r2, c15, c9, 1		@ clean L2 entry
-	add	r2, r2, #CACHE_DLINESIZE
-	subs	r3, r3, #CACHE_DLINESIZE
-	bhi	1b
-#endif
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/* =============================== PageTable ============================== */
-
-/*
- * cpu_feroceon_switch_mm(pgd)
- *
- * Set the translation base pointer to be as described by pgd.
- *
- * pgd: new page tables
- */
-	.align	5
-ENTRY(cpu_feroceon_switch_mm)
-#ifdef CONFIG_MMU
-	/*
-	 * Note: we wish to call __flush_whole_cache but we need to preserve
-	 * lr to do so.  The only way without touching main memory is to
-	 * use r2 which is normally used to test the VM_EXEC flag, and
-	 * compensate locally for the skipped ops if it is not set.
-	 */
-	mov	r2, lr				@ abuse r2 to preserve lr
-	bl	__flush_whole_cache
-	@ if r2 contains the VM_EXEC bit then the next 2 ops are done already
-	tst	r2, #VM_EXEC
-	mcreq	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-	mcreq	p15, 0, ip, c7, c10, 4		@ drain WB
-
-	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-	ret	r2
-#else
-	ret	lr
-#endif
-
-/*
- * cpu_feroceon_set_pte_ext(ptep, pte, ext)
- *
- * Set a PTE and flush it out
- */
-	.align	5
-ENTRY(cpu_feroceon_set_pte_ext)
-#ifdef CONFIG_MMU
-	armv3_set_pte_ext wc_disable=0
-	mov	r0, r0
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-#if defined(CONFIG_CACHE_FEROCEON_L2) && \
-	!defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
-	mcr	p15, 1, r0, c15, c9, 1		@ clean L2 entry
-#endif
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-#endif
-	ret	lr
-
-/* Suspend/resume support: taken from arch/arm/mm/proc-arm926.S */
-.globl	cpu_feroceon_suspend_size
-.equ	cpu_feroceon_suspend_size, 4 * 3
-#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_feroceon_do_suspend)
-	stmfd	sp!, {r4 - r6, lr}
-	mrc	p15, 0, r4, c13, c0, 0	@ PID
-	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
-	mrc	p15, 0, r6, c1, c0, 0	@ Control register
-	stmia	r0, {r4 - r6}
-	ldmfd	sp!, {r4 - r6, pc}
-ENDPROC(cpu_feroceon_do_suspend)
-
-ENTRY(cpu_feroceon_do_resume)
-	mov	ip, #0
-	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I+D TLBs
-	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I+D caches
-	ldmia	r0, {r4 - r6}
-	mcr	p15, 0, r4, c13, c0, 0	@ PID
-	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
-	mcr	p15, 0, r1, c2, c0, 0	@ TTB address
-	mov	r0, r6			@ control register
-	b	cpu_resume_mmu
-ENDPROC(cpu_feroceon_do_resume)
-#endif
-
-	.type	__feroceon_setup, #function
-__feroceon_setup:
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
-#ifdef CONFIG_MMU
-	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
-#endif
-
-	adr	r5, feroceon_crval
-	ldmia	r5, {r5, r6}
-	mrc	p15, 0, r0, c1, c0		@ get control register v4
-	bic	r0, r0, r5
-	orr	r0, r0, r6
-	ret	lr
-	.size	__feroceon_setup, . - __feroceon_setup
-
-	/*
-	 *      B
-	 *  R   P
-	 * .RVI UFRS BLDP WCAM
-	 * .011 .001 ..11 0101
-	 *
-	 */
-	.type	feroceon_crval, #object
-feroceon_crval:
-	crval	clear=0x0000773f, mmuset=0x00003135, ucset=0x00001134
-
-	__INITDATA
-
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions feroceon, dabort=v5t_early_abort, pabort=legacy_pabort
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv5te"
-	string	cpu_elf_name, "v5"
-	string	cpu_feroceon_name, "Feroceon"
-	string	cpu_88fr531_name, "Feroceon 88FR531-vd"
-	string	cpu_88fr571_name, "Feroceon 88FR571-vd"
-	string	cpu_88fr131_name, "Feroceon 88FR131"
-
-	.align
-
-	.section ".proc.info.init", #alloc
-
-.macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req
-	.type	__\name\()_proc_info,#object
-__\name\()_proc_info:
-	.long	\cpu_val
-	.long	\cpu_mask
-	.long	PMD_TYPE_SECT | \
-		PMD_SECT_BUFFERABLE | \
-		PMD_SECT_CACHEABLE | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	.long	PMD_TYPE_SECT | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	initfn	__feroceon_setup, __\name\()_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
-	.long	\cpu_name
-	.long	feroceon_processor_functions
-	.long	v4wbi_tlb_fns
-	.long	feroceon_user_fns
-	.long	\cache
-	 .size	__\name\()_proc_info, . - __\name\()_proc_info
-.endm
-
-#ifdef CONFIG_CPU_FEROCEON_OLD_ID
-	feroceon_proc_info feroceon_old_id, 0x41009260, 0xff00fff0, \
-		cpu_name=cpu_feroceon_name, cache=feroceon_cache_fns
-#endif
-
-	feroceon_proc_info 88fr531, 0x56055310, 0xfffffff0, cpu_88fr531_name, \
-		cache=feroceon_cache_fns
-	feroceon_proc_info 88fr571, 0x56155710, 0xfffffff0, cpu_88fr571_name, \
-		cache=feroceon_range_cache_fns
-	feroceon_proc_info 88fr131, 0x56251310, 0xfffffff0, cpu_88fr131_name, \
-		cache=feroceon_range_cache_fns
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
deleted file mode 100644
index 60ac7c5999a98eb3046d32e17299cc42dfd980ec..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-macros.S
+++ /dev/null
@@ -1,388 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * We need constants.h for:
- *  VMA_VM_MM
- *  VMA_VM_FLAGS
- *  VM_EXEC
- */
-#include <linux/const.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-
-#ifdef CONFIG_CPU_V7M
-#include <asm/v7m.h>
-#endif
-
-/*
- * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
- */
-	.macro	vma_vm_mm, rd, rn
-	ldr	\rd, [\rn, #VMA_VM_MM]
-	.endm
-
-/*
- * vma_vm_flags - get vma->vm_flags
- */
-	.macro	vma_vm_flags, rd, rn
-	ldr	\rd, [\rn, #VMA_VM_FLAGS]
-	.endm
-
-/*
- * act_mm - get current->active_mm
- */
-	.macro	act_mm, rd
-	bic	\rd, sp, #(THREAD_SIZE - 1) & ~63
-	bic	\rd, \rd, #63
-	ldr	\rd, [\rd, #TI_TASK]
-	.if (TSK_ACTIVE_MM > IMM12_MASK)
-	add	\rd, \rd, #TSK_ACTIVE_MM & ~IMM12_MASK
-	.endif
-	ldr	\rd, [\rd, #TSK_ACTIVE_MM & IMM12_MASK]
-	.endm
-
-/*
- * mmid - get context id from mm pointer (mm->context.id)
- * note, this field is 64bit, so in big-endian the two words are swapped too.
- */
-	.macro	mmid, rd, rn
-#ifdef __ARMEB__
-	ldr	\rd, [\rn, #MM_CONTEXT_ID + 4 ]
-#else
-	ldr	\rd, [\rn, #MM_CONTEXT_ID]
-#endif
-	.endm
-
-/*
- * mask_asid - mask the ASID from the context ID
- */
-	.macro	asid, rd, rn
-	and	\rd, \rn, #255
-	.endm
-
-	.macro	crval, clear, mmuset, ucset
-#ifdef CONFIG_MMU
-	.word	\clear
-	.word	\mmuset
-#else
-	.word	\clear
-	.word	\ucset
-#endif
-	.endm
-
-/*
- * dcache_line_size - get the minimum D-cache line size from the CTR register
- * on ARMv7.
- */
-	.macro	dcache_line_size, reg, tmp
-#ifdef CONFIG_CPU_V7M
-	movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
-	movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
-	ldr     \tmp, [\tmp]
-#else
-	mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
-#endif
-	lsr	\tmp, \tmp, #16
-	and	\tmp, \tmp, #0xf		@ cache line size encoding
-	mov	\reg, #4			@ bytes per word
-	mov	\reg, \reg, lsl \tmp		@ actual cache line size
-	.endm
-
-/*
- * icache_line_size - get the minimum I-cache line size from the CTR register
- * on ARMv7.
- */
-	.macro	icache_line_size, reg, tmp
-#ifdef CONFIG_CPU_V7M
-	movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
-	movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
-	ldr     \tmp, [\tmp]
-#else
-	mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
-#endif
-	and	\tmp, \tmp, #0xf		@ cache line size encoding
-	mov	\reg, #4			@ bytes per word
-	mov	\reg, \reg, lsl \tmp		@ actual cache line size
-	.endm
-
-/*
- * Sanity check the PTE configuration for the code below - which makes
- * certain assumptions about how these bits are laid out.
- */
-#ifdef CONFIG_MMU
-#if L_PTE_SHARED != PTE_EXT_SHARED
-#error PTE shared bit mismatch
-#endif
-#if !defined (CONFIG_ARM_LPAE) && \
-	(L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\
-	 L_PTE_PRESENT) > L_PTE_SHARED
-#error Invalid Linux PTE bit settings
-#endif
-#endif	/* CONFIG_MMU */
-
-/*
- * The ARMv6 and ARMv7 set_pte_ext translation function.
- *
- * Permission translation:
- *  YUWD  APX AP1 AP0	SVC	User
- *  0xxx   0   0   0	no acc	no acc
- *  100x   1   0   1	r/o	no acc
- *  10x0   1   0   1	r/o	no acc
- *  1011   0   0   1	r/w	no acc
- *  110x   1   1   1	r/o	r/o
- *  11x0   1   1   1	r/o	r/o
- *  1111   0   1   1	r/w	r/w
- */
-	.macro	armv6_mt_table pfx
-\pfx\()_mt_table:
-	.long	0x00						@ L_PTE_MT_UNCACHED
-	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_BUFFERABLE
-	.long	PTE_CACHEABLE					@ L_PTE_MT_WRITETHROUGH
-	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_WRITEBACK
-	.long	PTE_BUFFERABLE					@ L_PTE_MT_DEV_SHARED
-	.long	0x00						@ unused
-	.long	0x00						@ L_PTE_MT_MINICACHE (not present)
-	.long	PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE	@ L_PTE_MT_WRITEALLOC
-	.long	0x00						@ unused
-	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_DEV_WC
-	.long	0x00						@ unused
-	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_DEV_CACHED
-	.long	PTE_EXT_TEX(2)					@ L_PTE_MT_DEV_NONSHARED
-	.long	0x00						@ unused
-	.long	0x00						@ unused
-	.long	PTE_CACHEABLE | PTE_BUFFERABLE | PTE_EXT_APX	@ L_PTE_MT_VECTORS
-	.endm
-
-	.macro	armv6_set_pte_ext pfx
-	str	r1, [r0], #2048			@ linux version
-
-	bic	r3, r1, #0x000003fc
-	bic	r3, r3, #PTE_TYPE_MASK
-	orr	r3, r3, r2
-	orr	r3, r3, #PTE_EXT_AP0 | 2
-
-	adr	ip, \pfx\()_mt_table
-	and	r2, r1, #L_PTE_MT_MASK
-	ldr	r2, [ip, r2]
-
-	eor	r1, r1, #L_PTE_DIRTY
-	tst	r1, #L_PTE_DIRTY|L_PTE_RDONLY
-	orrne	r3, r3, #PTE_EXT_APX
-
-	tst	r1, #L_PTE_USER
-	orrne	r3, r3, #PTE_EXT_AP1
-	tstne	r3, #PTE_EXT_APX
-
-	@ user read-only -> kernel read-only
-	bicne	r3, r3, #PTE_EXT_AP0
-
-	tst	r1, #L_PTE_XN
-	orrne	r3, r3, #PTE_EXT_XN
-
-	eor	r3, r3, r2
-
-	tst	r1, #L_PTE_YOUNG
-	tstne	r1, #L_PTE_PRESENT
-	moveq	r3, #0
-	tstne	r1, #L_PTE_NONE
-	movne	r3, #0
-
-	str	r3, [r0]
-	mcr	p15, 0, r0, c7, c10, 1		@ flush_pte
-	.endm
-
-
-/*
- * The ARMv3, ARMv4 and ARMv5 set_pte_ext translation function,
- * covering most CPUs except Xscale and Xscale 3.
- *
- * Permission translation:
- *  YUWD   AP	SVC	User
- *  0xxx  0x00	no acc	no acc
- *  100x  0x00	r/o	no acc
- *  10x0  0x00	r/o	no acc
- *  1011  0x55	r/w	no acc
- *  110x  0xaa	r/w	r/o
- *  11x0  0xaa	r/w	r/o
- *  1111  0xff	r/w	r/w
- */
-	.macro	armv3_set_pte_ext wc_disable=1
-	str	r1, [r0], #2048			@ linux version
-
-	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY
-
-	bic	r2, r1, #PTE_SMALL_AP_MASK	@ keep C, B bits
-	bic	r2, r2, #PTE_TYPE_MASK
-	orr	r2, r2, #PTE_TYPE_SMALL
-
-	tst	r3, #L_PTE_USER			@ user?
-	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
-
-	tst	r3, #L_PTE_RDONLY | L_PTE_DIRTY	@ write and dirty?
-	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
-
-	tst	r3, #L_PTE_PRESENT | L_PTE_YOUNG	@ present and young?
-	movne	r2, #0
-
-	.if	\wc_disable
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	tst	r2, #PTE_CACHEABLE
-	bicne	r2, r2, #PTE_BUFFERABLE
-#endif
-	.endif
-	str	r2, [r0]		@ hardware version
-	.endm
-
-
-/*
- * Xscale set_pte_ext translation, split into two halves to cope
- * with work-arounds.  r3 must be preserved by code between these
- * two macros.
- *
- * Permission translation:
- *  YUWD  AP	SVC	User
- *  0xxx  00	no acc	no acc
- *  100x  00	r/o	no acc
- *  10x0  00	r/o	no acc
- *  1011  01	r/w	no acc
- *  110x  10	r/w	r/o
- *  11x0  10	r/w	r/o
- *  1111  11	r/w	r/w
- */
-	.macro	xscale_set_pte_ext_prologue
-	str	r1, [r0]			@ linux version
-
-	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY
-
-	bic	r2, r1, #PTE_SMALL_AP_MASK	@ keep C, B bits
-	orr	r2, r2, #PTE_TYPE_EXT		@ extended page
-
-	tst	r3, #L_PTE_USER			@ user?
-	orrne	r2, r2, #PTE_EXT_AP_URO_SRW	@ yes -> user r/o, system r/w
-
-	tst	r3, #L_PTE_RDONLY | L_PTE_DIRTY	@ write and dirty?
-	orreq	r2, r2, #PTE_EXT_AP_UNO_SRW	@ yes -> user n/a, system r/w
-						@ combined with user -> user r/w
-	.endm
-
-	.macro	xscale_set_pte_ext_epilogue
-	tst	r3, #L_PTE_PRESENT | L_PTE_YOUNG	@ present and young?
-	movne	r2, #0				@ no -> fault
-
-	str	r2, [r0, #2048]!		@ hardware version
-	mov	ip, #0
-	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
-	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
-	.endm
-
-.macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0
-/*
- * If we are building for big.Little with branch predictor hardening,
- * we need the processor function tables to remain available after boot.
- */
-#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
-	.section ".rodata"
-#endif
-	.type	\name\()_processor_functions, #object
-	.align 2
-ENTRY(\name\()_processor_functions)
-	.word	\dabort
-	.word	\pabort
-	.word	cpu_\name\()_proc_init
-	.word	\bugs
-	.word	cpu_\name\()_proc_fin
-	.word	cpu_\name\()_reset
-	.word	cpu_\name\()_do_idle
-	.word	cpu_\name\()_dcache_clean_area
-	.word	cpu_\name\()_switch_mm
-
-	.if \nommu
-	.word	0
-	.else
-	.word	cpu_\name\()_set_pte_ext
-	.endif
-
-	.if \suspend
-	.word	cpu_\name\()_suspend_size
-#ifdef CONFIG_ARM_CPU_SUSPEND
-	.word	cpu_\name\()_do_suspend
-	.word	cpu_\name\()_do_resume
-#else
-	.word	0
-	.word	0
-#endif
-	.else
-	.word	0
-	.word	0
-	.word	0
-	.endif
-
-	.size	\name\()_processor_functions, . - \name\()_processor_functions
-#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
-	.previous
-#endif
-.endm
-
-.macro define_cache_functions name:req
-	.align 2
-	.type	\name\()_cache_fns, #object
-ENTRY(\name\()_cache_fns)
-	.long	\name\()_flush_icache_all
-	.long	\name\()_flush_kern_cache_all
-	.long   \name\()_flush_kern_cache_louis
-	.long	\name\()_flush_user_cache_all
-	.long	\name\()_flush_user_cache_range
-	.long	\name\()_coherent_kern_range
-	.long	\name\()_coherent_user_range
-	.long	\name\()_flush_kern_dcache_area
-	.long	\name\()_dma_map_area
-	.long	\name\()_dma_unmap_area
-	.long	\name\()_dma_flush_range
-	.size	\name\()_cache_fns, . - \name\()_cache_fns
-.endm
-
-.macro define_tlb_functions name:req, flags_up:req, flags_smp
-	.type	\name\()_tlb_fns, #object
-ENTRY(\name\()_tlb_fns)
-	.long	\name\()_flush_user_tlb_range
-	.long	\name\()_flush_kern_tlb_range
-	.ifnb \flags_smp
-		ALT_SMP(.long	\flags_smp )
-		ALT_UP(.long	\flags_up )
-	.else
-		.long	\flags_up
-	.endif
-	.size	\name\()_tlb_fns, . - \name\()_tlb_fns
-.endm
-
-.macro globl_equ x, y
-	.globl	\x
-	.equ	\x, \y
-.endm
-
-.macro	initfn, func, base
-	.long	\func - \base
-.endm
-
-	/*
-	 * Macro to calculate the log2 size for the protection region
-	 * registers. This calculates rd = log2(size) - 1.  tmp must
-	 * not be the same register as rd.
-	 */
-.macro	pr_sz, rd, size, tmp
-	mov	\tmp, \size, lsr #12
-	mov	\rd, #11
-1:	movs	\tmp, \tmp, lsr #1
-	addne	\rd, \rd, #1
-	bne	1b
-.endm
-
-	/*
-	 * Macro to generate a protection region register value
-	 * given a pre-masked address, size, and enable bit.
-	 * Corrupts size.
-	 */
-.macro	pr_val, dest, addr, size, enable
-	pr_sz	\dest, \size, \size		@ calculate log2(size) - 1
-	orr	\dest, \addr, \dest, lsl #1	@ mask in the region size
-	orr	\dest, \dest, \enable
-.endm
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
deleted file mode 100644
index f083085788857b8999a00c62025f240cb74b0a66..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-mohawk.S
+++ /dev/null
@@ -1,444 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  linux/arch/arm/mm/proc-mohawk.S: MMU functions for Marvell PJ1 core
- *
- *  PJ1 (codename Mohawk) is a hybrid of the xscale3 and Marvell's own core.
- *
- *  Heavily based on proc-arm926.S and proc-xsc3.S
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include "proc-macros.S"
-
-/*
- * This is the maximum size of an area which will be flushed.  If the
- * area is larger than this, then we flush the whole cache.
- */
-#define CACHE_DLIMIT	32768
-
-/*
- * The cache line size of the L1 D cache.
- */
-#define CACHE_DLINESIZE	32
-
-/*
- * cpu_mohawk_proc_init()
- */
-ENTRY(cpu_mohawk_proc_init)
-	ret	lr
-
-/*
- * cpu_mohawk_proc_fin()
- */
-ENTRY(cpu_mohawk_proc_fin)
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x1800			@ ...iz...........
-	bic	r0, r0, #0x0006			@ .............ca.
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- * cpu_mohawk_reset(loc)
- *
- * Perform a soft reset of the system.  Put the CPU into the
- * same state as it would be if it had been reset, and branch
- * to what would be the reset vector.
- *
- * loc: location to jump to for soft reset
- *
- * (same as arm926)
- */
-	.align	5
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_mohawk_reset)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
-	bic	ip, ip, #0x0007			@ .............cam
-	bic	ip, ip, #0x1100			@ ...i...s........
-	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	ret	r0
-ENDPROC(cpu_mohawk_reset)
-	.popsection
-
-/*
- * cpu_mohawk_do_idle()
- *
- * Called with IRQs disabled
- */
-	.align	5
-ENTRY(cpu_mohawk_do_idle)
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	mcr	p15, 0, r0, c7, c0, 4		@ wait for interrupt
-	ret	lr
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(mohawk_flush_icache_all)
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	ret	lr
-ENDPROC(mohawk_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- *
- *	Clean and invalidate all cache entries in a particular
- *	address space.
- */
-ENTRY(mohawk_flush_user_cache_all)
-	/* FALLTHROUGH */
-
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(mohawk_flush_kern_cache_all)
-	mov	r2, #VM_EXEC
-	mov	ip, #0
-__flush_whole_cache:
-	mcr	p15, 0, ip, c7, c14, 0		@ clean & invalidate all D cache
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-	mcrne	p15, 0, ip, c7, c10, 0		@ drain write buffer
-	ret	lr
-
-/*
- *	flush_user_cache_range(start, end, flags)
- *
- *	Clean and invalidate a range of cache entries in the
- *	specified address range.
- *
- *	- start	- start address (inclusive)
- *	- end	- end address (exclusive)
- *	- flags	- vm_flags describing address space
- *
- * (same as arm926)
- */
-ENTRY(mohawk_flush_user_cache_range)
-	mov	ip, #0
-	sub	r3, r1, r0			@ calculate total size
-	cmp	r3, #CACHE_DLIMIT
-	bgt	__flush_whole_cache
-1:	tst	r2, #VM_EXEC
-	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start, end.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(mohawk_coherent_kern_range)
-	/* FALLTHROUGH */
-
-/*
- *	coherent_user_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start, end.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as arm926)
- */
-ENTRY(mohawk_coherent_user_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	mov	r0, #0
-	ret	lr
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure no D cache aliasing occurs, either with itself or
- *	the I cache
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(mohawk_flush_kern_dcache_area)
-	add	r1, r0, r1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_inv_range(start, end)
- *
- *	Invalidate (discard) the specified virtual address range.
- *	May not write back any entries.  If 'start' or 'end'
- *	are not cache line aligned, those lines must be written
- *	back.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-mohawk_dma_inv_range:
-	tst	r0, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
-	tst	r1, #CACHE_DLINESIZE - 1
-	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_clean_range(start, end)
- *
- *	Clean the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- *
- * (same as v4wb)
- */
-mohawk_dma_clean_range:
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_flush_range(start, end)
- *
- *	Clean and invalidate the specified virtual address range.
- *
- *	- start	- virtual start address
- *	- end	- virtual end address
- */
-ENTRY(mohawk_dma_flush_range)
-	bic	r0, r0, #CACHE_DLINESIZE - 1
-1:
-	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(mohawk_dma_map_area)
-	add	r1, r1, r0
-	cmp	r2, #DMA_TO_DEVICE
-	beq	mohawk_dma_clean_range
-	bcs	mohawk_dma_inv_range
-	b	mohawk_dma_flush_range
-ENDPROC(mohawk_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(mohawk_dma_unmap_area)
-	ret	lr
-ENDPROC(mohawk_dma_unmap_area)
-
-	.globl	mohawk_flush_kern_cache_louis
-	.equ	mohawk_flush_kern_cache_louis, mohawk_flush_kern_cache_all
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions mohawk
-
-ENTRY(cpu_mohawk_dcache_clean_area)
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHE_DLINESIZE
-	subs	r1, r1, #CACHE_DLINESIZE
-	bhi	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-
-/*
- * cpu_mohawk_switch_mm(pgd)
- *
- * Set the translation base pointer to be as described by pgd.
- *
- * pgd: new page tables
- */
-	.align	5
-ENTRY(cpu_mohawk_switch_mm)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c14, 0		@ clean & invalidate all D cache
-	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-	orr	r0, r0, #0x18			@ cache the page table in L2
-	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-	ret	lr
-
-/*
- * cpu_mohawk_set_pte_ext(ptep, pte, ext)
- *
- * Set a PTE and flush it out
- */
-	.align	5
-ENTRY(cpu_mohawk_set_pte_ext)
-#ifdef CONFIG_MMU
-	armv3_set_pte_ext
-	mov	r0, r0
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-	ret	lr
-#endif
-
-.globl	cpu_mohawk_suspend_size
-.equ	cpu_mohawk_suspend_size, 4 * 6
-#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_mohawk_do_suspend)
-	stmfd	sp!, {r4 - r9, lr}
-	mrc	p14, 0, r4, c6, c0, 0	@ clock configuration, for turbo mode
-	mrc	p15, 0, r5, c15, c1, 0	@ CP access reg
-	mrc	p15, 0, r6, c13, c0, 0	@ PID
-	mrc 	p15, 0, r7, c3, c0, 0	@ domain ID
-	mrc	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
-	mrc 	p15, 0, r9, c1, c0, 0	@ control reg
-	bic	r4, r4, #2		@ clear frequency change bit
-	stmia	r0, {r4 - r9}		@ store cp regs
-	ldmia	sp!, {r4 - r9, pc}
-ENDPROC(cpu_mohawk_do_suspend)
-
-ENTRY(cpu_mohawk_do_resume)
-	ldmia	r0, {r4 - r9}		@ load cp regs
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I & D caches, BTB
-	mcr	p15, 0, ip, c7, c10, 4	@ drain write (&fill) buffer
-	mcr	p15, 0, ip, c7, c5, 4	@ flush prefetch buffer
-	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I & D TLBs
-	mcr	p14, 0, r4, c6, c0, 0	@ clock configuration, turbo mode.
-	mcr	p15, 0, r5, c15, c1, 0	@ CP access reg
-	mcr	p15, 0, r6, c13, c0, 0	@ PID
-	mcr	p15, 0, r7, c3, c0, 0	@ domain ID
-	orr	r1, r1, #0x18		@ cache the page table in L2
-	mcr	p15, 0, r1, c2, c0, 0	@ translation table base addr
-	mcr	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
-	mov	r0, r9			@ control register
-	b	cpu_resume_mmu
-ENDPROC(cpu_mohawk_do_resume)
-#endif
-
-	.type	__mohawk_setup, #function
-__mohawk_setup:
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
-	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs
-	orr	r4, r4, #0x18			@ cache the page table in L2
-	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
-
-	mov	r0, #0				@ don't allow CP access
-	mcr	p15, 0, r0, c15, c1, 0		@ write CP access register
-
-	adr	r5, mohawk_crval
-	ldmia	r5, {r5, r6}
-	mrc	p15, 0, r0, c1, c0		@ get control register
-	bic	r0, r0, r5
-	orr	r0, r0, r6
-	ret	lr
-
-	.size	__mohawk_setup, . - __mohawk_setup
-
-	/*
-	 *  R
-	 * .RVI ZFRS BLDP WCAM
-	 * .011 1001 ..00 0101
-	 *
-	 */
-	.type	mohawk_crval, #object
-mohawk_crval:
-	crval	clear=0x00007f3f, mmuset=0x00003905, ucset=0x00001134
-
-	__INITDATA
-
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions mohawk, dabort=v5t_early_abort, pabort=legacy_pabort
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv5te"
-	string	cpu_elf_name, "v5"
-	string	cpu_mohawk_name, "Marvell 88SV331x"
-
-	.align
-
-	.section ".proc.info.init", #alloc
-
-	.type	__88sv331x_proc_info,#object
-__88sv331x_proc_info:
-	.long	0x56158000			@ Marvell 88SV331x (MOHAWK)
-	.long	0xfffff000
-	.long   PMD_TYPE_SECT | \
-		PMD_SECT_BUFFERABLE | \
-		PMD_SECT_CACHEABLE | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	.long   PMD_TYPE_SECT | \
-		PMD_BIT4 | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	initfn	__mohawk_setup, __88sv331x_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
-	.long	cpu_mohawk_name
-	.long	mohawk_processor_functions
-	.long	v4wbi_tlb_fns
-	.long	v4wb_user_fns
-	.long	mohawk_cache_fns
-	.size	__88sv331x_proc_info, . - __88sv331x_proc_info
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
deleted file mode 100644
index d5bc5d70256399723f29065b0ab09cdb20a32a9b..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-sa110.S
+++ /dev/null
@@ -1,222 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/proc-sa110.S
- *
- *  Copyright (C) 1997-2002 Russell King
- *  hacked for non-paged-MM by Hyok S. Choi, 2003.
- *
- *  MMU functions for SA110
- *
- *  These are the low level assembler for performing cache and TLB
- *  functions on the StrongARM-110.
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/hwcap.h>
-#include <mach/hardware.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/ptrace.h>
-
-#include "proc-macros.S"
-
-/*
- * the cache line size of the I and D cache
- */
-#define DCACHELINESIZE	32
-
-	.text
-
-/*
- * cpu_sa110_proc_init()
- */
-ENTRY(cpu_sa110_proc_init)
-	mov	r0, #0
-	mcr	p15, 0, r0, c15, c1, 2		@ Enable clock switching
-	ret	lr
-
-/*
- * cpu_sa110_proc_fin()
- */
-ENTRY(cpu_sa110_proc_fin)
-	mov	r0, #0
-	mcr	p15, 0, r0, c15, c2, 2		@ Disable clock switching
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x1000			@ ...i............
-	bic	r0, r0, #0x000e			@ ............wca.
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- * cpu_sa110_reset(loc)
- *
- * Perform a soft reset of the system.  Put the CPU into the
- * same state as it would be if it had been reset, and branch
- * to what would be the reset vector.
- *
- * loc: location to jump to for soft reset
- */
-	.align	5
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_sa110_reset)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-#ifdef CONFIG_MMU
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
-	bic	ip, ip, #0x000f			@ ............wcam
-	bic	ip, ip, #0x1100			@ ...i...s........
-	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	ret	r0
-ENDPROC(cpu_sa110_reset)
-	.popsection
-
-/*
- * cpu_sa110_do_idle(type)
- *
- * Cause the processor to idle
- *
- * type: call type:
- *   0 = slow idle
- *   1 = fast idle
- *   2 = switch to slow processor clock
- *   3 = switch to fast processor clock
- */
-	.align	5
-
-ENTRY(cpu_sa110_do_idle)
-	mcr	p15, 0, ip, c15, c2, 2		@ disable clock switching
-	ldr	r1, =UNCACHEABLE_ADDR		@ load from uncacheable loc
-	ldr	r1, [r1, #0]			@ force switch to MCLK
-	mov	r0, r0				@ safety
-	mov	r0, r0				@ safety
-	mov	r0, r0				@ safety
-	mcr	p15, 0, r0, c15, c8, 2		@ Wait for interrupt, cache aligned
-	mov	r0, r0				@ safety
-	mov	r0, r0				@ safety
-	mov	r0, r0				@ safety
-	mcr	p15, 0, r0, c15, c1, 2		@ enable clock switching
-	ret	lr
-
-/* ================================= CACHE ================================ */
-
-/*
- * cpu_sa110_dcache_clean_area(addr,sz)
- *
- * Clean the specified entry of any caches such that the MMU
- * translation fetches will obtain correct data.
- *
- * addr: cache-unaligned virtual address
- */
-	.align	5
-ENTRY(cpu_sa110_dcache_clean_area)
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #DCACHELINESIZE
-	subs	r1, r1, #DCACHELINESIZE
-	bhi	1b
-	ret	lr
-
-/* =============================== PageTable ============================== */
-
-/*
- * cpu_sa110_switch_mm(pgd)
- *
- * Set the translation base pointer to be as described by pgd.
- *
- * pgd: new page tables
- */
-	.align	5
-ENTRY(cpu_sa110_switch_mm)
-#ifdef CONFIG_MMU
-	str	lr, [sp, #-4]!
-	bl	v4wb_flush_kern_cache_all	@ clears IP
-	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-	ldr	pc, [sp], #4
-#else
-	ret	lr
-#endif
-
-/*
- * cpu_sa110_set_pte_ext(ptep, pte, ext)
- *
- * Set a PTE and flush it out
- */
-	.align	5
-ENTRY(cpu_sa110_set_pte_ext)
-#ifdef CONFIG_MMU
-	armv3_set_pte_ext wc_disable=0
-	mov	r0, r0
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-#endif
-	ret	lr
-
-	.type	__sa110_setup, #function
-__sa110_setup:
-	mov	r10, #0
-	mcr	p15, 0, r10, c7, c7		@ invalidate I,D caches on v4
-	mcr	p15, 0, r10, c7, c10, 4		@ drain write buffer on v4
-#ifdef CONFIG_MMU
-	mcr	p15, 0, r10, c8, c7		@ invalidate I,D TLBs on v4
-#endif
-
-	adr	r5, sa110_crval
-	ldmia	r5, {r5, r6}
-	mrc	p15, 0, r0, c1, c0		@ get control register v4
-	bic	r0, r0, r5
-	orr	r0, r0, r6
-	ret	lr
-	.size	__sa110_setup, . - __sa110_setup
-
-	/*
-	 *  R
-	 * .RVI ZFRS BLDP WCAM
-	 * ..01 0001 ..11 1101
-	 * 
-	 */
-	.type	sa110_crval, #object
-sa110_crval:
-	crval	clear=0x00003f3f, mmuset=0x0000113d, ucset=0x00001130
-
-	__INITDATA
-
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions sa110, dabort=v4_early_abort, pabort=legacy_pabort
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv4"
-	string	cpu_elf_name, "v4"
-	string	cpu_sa110_name, "StrongARM-110"
-
-	.align
-
-	.section ".proc.info.init", #alloc
-
-	.type	__sa110_proc_info,#object
-__sa110_proc_info:
-	.long	0x4401a100
-	.long	0xfffffff0
-	.long   PMD_TYPE_SECT | \
-		PMD_SECT_BUFFERABLE | \
-		PMD_SECT_CACHEABLE | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	.long   PMD_TYPE_SECT | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	initfn	__sa110_setup, __sa110_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT
-	.long	cpu_sa110_name
-	.long	sa110_processor_functions
-	.long	v4wb_tlb_fns
-	.long	v4wb_user_fns
-	.long	v4wb_cache_fns
-	.size	__sa110_proc_info, . - __sa110_proc_info
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
deleted file mode 100644
index be7b611c76c76ada0a8f5d48737a49e70f4be56e..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-sa1100.S
+++ /dev/null
@@ -1,270 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/proc-sa1100.S
- *
- *  Copyright (C) 1997-2002 Russell King
- *  hacked for non-paged-MM by Hyok S. Choi, 2003.
- *
- *  MMU functions for SA110
- *
- *  These are the low level assembler for performing cache and TLB
- *  functions on the StrongARM-1100 and StrongARM-1110.
- *
- *  Note that SA1100 and SA1110 share everything but their name and CPU ID.
- *
- *  12-jun-2000, Erik Mouw (J.A.K.Mouw@its.tudelft.nl):
- *    Flush the read buffer at context switches
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/hwcap.h>
-#include <mach/hardware.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-
-#include "proc-macros.S"
-
-/*
- * the cache line size of the I and D cache
- */
-#define DCACHELINESIZE	32
-
-	.section .text
-
-/*
- * cpu_sa1100_proc_init()
- */
-ENTRY(cpu_sa1100_proc_init)
-	mov	r0, #0
-	mcr	p15, 0, r0, c15, c1, 2		@ Enable clock switching
-	mcr	p15, 0, r0, c9, c0, 5		@ Allow read-buffer operations from userland
-	ret	lr
-
-/*
- * cpu_sa1100_proc_fin()
- *
- * Prepare the CPU for reset:
- *  - Disable interrupts
- *  - Clean and turn off caches.
- */
-ENTRY(cpu_sa1100_proc_fin)
-	mcr	p15, 0, ip, c15, c2, 2		@ Disable clock switching
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x1000			@ ...i............
-	bic	r0, r0, #0x000e			@ ............wca.
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- * cpu_sa1100_reset(loc)
- *
- * Perform a soft reset of the system.  Put the CPU into the
- * same state as it would be if it had been reset, and branch
- * to what would be the reset vector.
- *
- * loc: location to jump to for soft reset
- */
-	.align	5
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_sa1100_reset)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
-#ifdef CONFIG_MMU
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-#endif
-	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
-	bic	ip, ip, #0x000f			@ ............wcam
-	bic	ip, ip, #0x1100			@ ...i...s........
-	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
-	ret	r0
-ENDPROC(cpu_sa1100_reset)
-	.popsection
-
-/*
- * cpu_sa1100_do_idle(type)
- *
- * Cause the processor to idle
- *
- * type: call type:
- *   0 = slow idle
- *   1 = fast idle
- *   2 = switch to slow processor clock
- *   3 = switch to fast processor clock
- */
-	.align	5
-ENTRY(cpu_sa1100_do_idle)
-	mov	r0, r0				@ 4 nop padding
-	mov	r0, r0
-	mov	r0, r0
-	mov	r0, r0				@ 4 nop padding
-	mov	r0, r0
-	mov	r0, r0
-	mov	r0, #0
-	ldr	r1, =UNCACHEABLE_ADDR		@ ptr to uncacheable address
-	@ --- aligned to a cache line
-	mcr	p15, 0, r0, c15, c2, 2		@ disable clock switching
-	ldr	r1, [r1, #0]			@ force switch to MCLK
-	mcr	p15, 0, r0, c15, c8, 2		@ wait for interrupt
-	mov	r0, r0				@ safety
-	mcr	p15, 0, r0, c15, c1, 2		@ enable clock switching
-	ret	lr
-
-/* ================================= CACHE ================================ */
-
-/*
- * cpu_sa1100_dcache_clean_area(addr,sz)
- *
- * Clean the specified entry of any caches such that the MMU
- * translation fetches will obtain correct data.
- *
- * addr: cache-unaligned virtual address
- */
-	.align	5
-ENTRY(cpu_sa1100_dcache_clean_area)
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #DCACHELINESIZE
-	subs	r1, r1, #DCACHELINESIZE
-	bhi	1b
-	ret	lr
-
-/* =============================== PageTable ============================== */
-
-/*
- * cpu_sa1100_switch_mm(pgd)
- *
- * Set the translation base pointer to be as described by pgd.
- *
- * pgd: new page tables
- */
-	.align	5
-ENTRY(cpu_sa1100_switch_mm)
-#ifdef CONFIG_MMU
-	str	lr, [sp, #-4]!
-	bl	v4wb_flush_kern_cache_all	@ clears IP
-	mcr	p15, 0, ip, c9, c0, 0		@ invalidate RB
-	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-	ldr	pc, [sp], #4
-#else
-	ret	lr
-#endif
-
-/*
- * cpu_sa1100_set_pte_ext(ptep, pte, ext)
- *
- * Set a PTE and flush it out
- */
-	.align	5
-ENTRY(cpu_sa1100_set_pte_ext)
-#ifdef CONFIG_MMU
-	armv3_set_pte_ext wc_disable=0
-	mov	r0, r0
-	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-#endif
-	ret	lr
-
-.globl	cpu_sa1100_suspend_size
-.equ	cpu_sa1100_suspend_size, 4 * 3
-#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_sa1100_do_suspend)
-	stmfd	sp!, {r4 - r6, lr}
-	mrc	p15, 0, r4, c3, c0, 0		@ domain ID
-	mrc	p15, 0, r5, c13, c0, 0		@ PID
-	mrc	p15, 0, r6, c1, c0, 0		@ control reg
-	stmia	r0, {r4 - r6}			@ store cp regs
-	ldmfd	sp!, {r4 - r6, pc}
-ENDPROC(cpu_sa1100_do_suspend)
-
-ENTRY(cpu_sa1100_do_resume)
-	ldmia	r0, {r4 - r6}			@ load cp regs
-	mov	ip, #0
-	mcr	p15, 0, ip, c8, c7, 0		@ flush I+D TLBs
-	mcr	p15, 0, ip, c7, c7, 0		@ flush I&D cache
-	mcr	p15, 0, ip, c9, c0, 0		@ invalidate RB
-	mcr	p15, 0, ip, c9, c0, 5		@ allow user space to use RB
-
-	mcr	p15, 0, r4, c3, c0, 0		@ domain ID
-	mcr	p15, 0, r1, c2, c0, 0		@ translation table base addr
-	mcr	p15, 0, r5, c13, c0, 0		@ PID
-	mov	r0, r6				@ control register
-	b	cpu_resume_mmu
-ENDPROC(cpu_sa1100_do_resume)
-#endif
-
-	.type	__sa1100_setup, #function
-__sa1100_setup:
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
-#ifdef CONFIG_MMU
-	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
-#endif
-	adr	r5, sa1100_crval
-	ldmia	r5, {r5, r6}
-	mrc	p15, 0, r0, c1, c0		@ get control register v4
-	bic	r0, r0, r5
-	orr	r0, r0, r6
-	ret	lr
-	.size	__sa1100_setup, . - __sa1100_setup
-
-	/*
-	 *  R
-	 * .RVI ZFRS BLDP WCAM
-	 * ..11 0001 ..11 1101
-	 * 
-	 */
-	.type	sa1100_crval, #object
-sa1100_crval:
-	crval	clear=0x00003f3f, mmuset=0x0000313d, ucset=0x00001130
-
-	__INITDATA
-
-/*
- * SA1100 and SA1110 share the same function calls
- */
-
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions sa1100, dabort=v4_early_abort, pabort=legacy_pabort, suspend=1
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv4"
-	string	cpu_elf_name, "v4"
-	string	cpu_sa1100_name, "StrongARM-1100"
-	string	cpu_sa1110_name, "StrongARM-1110"
-
-	.align
-
-	.section ".proc.info.init", #alloc
-
-.macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req
-	.type	__\name\()_proc_info,#object
-__\name\()_proc_info:
-	.long	\cpu_val
-	.long	\cpu_mask
-	.long   PMD_TYPE_SECT | \
-		PMD_SECT_BUFFERABLE | \
-		PMD_SECT_CACHEABLE | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	.long   PMD_TYPE_SECT | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	initfn	__sa1100_setup, __\name\()_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT
-	.long	\cpu_name
-	.long	sa1100_processor_functions
-	.long	v4wb_tlb_fns
-	.long	v4_mc_user_fns
-	.long	v4wb_cache_fns
-	.size	__\name\()_proc_info, . - __\name\()_proc_info
-.endm
-
-	sa1100_proc_info sa1100, 0x4401a110, 0xfffffff0, cpu_sa1100_name
-	sa1100_proc_info sa1110, 0x6901b110, 0xfffffff0, cpu_sa1110_name
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
deleted file mode 100644
index c1c85eb3484f319d853b854b96a272cd6f3c6e06..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-v6.S
+++ /dev/null
@@ -1,297 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/proc-v6.S
- *
- *  Copyright (C) 2001 Deep Blue Solutions Ltd.
- *  Modified by Catalin Marinas for noMMU support
- *
- *  This is the "shell" of the ARMv6 processor support.
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-
-#include "proc-macros.S"
-
-#define D_CACHE_LINE_SIZE	32
-
-#define TTB_C		(1 << 0)
-#define TTB_S		(1 << 1)
-#define TTB_IMP		(1 << 2)
-#define TTB_RGN_NC	(0 << 3)
-#define TTB_RGN_WBWA	(1 << 3)
-#define TTB_RGN_WT	(2 << 3)
-#define TTB_RGN_WB	(3 << 3)
-
-#define TTB_FLAGS_UP	TTB_RGN_WBWA
-#define PMD_FLAGS_UP	PMD_SECT_WB
-#define TTB_FLAGS_SMP	TTB_RGN_WBWA|TTB_S
-#define PMD_FLAGS_SMP	PMD_SECT_WBWA|PMD_SECT_S
-
-ENTRY(cpu_v6_proc_init)
-	ret	lr
-
-ENTRY(cpu_v6_proc_fin)
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x1000			@ ...i............
-	bic	r0, r0, #0x0006			@ .............ca.
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- *	cpu_v6_reset(loc)
- *
- *	Perform a soft reset of the system.  Put the CPU into the
- *	same state as it would be if it had been reset, and branch
- *	to what would be the reset vector.
- *
- *	- loc   - location to jump to for soft reset
- */
-	.align	5
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_v6_reset)
-	mrc	p15, 0, r1, c1, c0, 0		@ ctrl register
-	bic	r1, r1, #0x1			@ ...............m
-	mcr	p15, 0, r1, c1, c0, 0		@ disable MMU
-	mov	r1, #0
-	mcr	p15, 0, r1, c7, c5, 4		@ ISB
-	ret	r0
-ENDPROC(cpu_v6_reset)
-	.popsection
-
-/*
- *	cpu_v6_do_idle()
- *
- *	Idle the processor (eg, wait for interrupt).
- *
- *	IRQs are already disabled.
- */
-ENTRY(cpu_v6_do_idle)
-	mov	r1, #0
-	mcr	p15, 0, r1, c7, c10, 4		@ DWB - WFI may enter a low-power mode
-	mcr	p15, 0, r1, c7, c0, 4		@ wait for interrupt
-	ret	lr
-
-ENTRY(cpu_v6_dcache_clean_area)
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #D_CACHE_LINE_SIZE
-	subs	r1, r1, #D_CACHE_LINE_SIZE
-	bhi	1b
-	ret	lr
-
-/*
- *	cpu_v6_switch_mm(pgd_phys, tsk)
- *
- *	Set the translation table base pointer to be pgd_phys
- *
- *	- pgd_phys - physical address of new TTB
- *
- *	It is assumed that:
- *	- we are not using split page tables
- */
-ENTRY(cpu_v6_switch_mm)
-#ifdef CONFIG_MMU
-	mov	r2, #0
-	mmid	r1, r1				@ get mm->context.id
-	ALT_SMP(orr	r0, r0, #TTB_FLAGS_SMP)
-	ALT_UP(orr	r0, r0, #TTB_FLAGS_UP)
-	mcr	p15, 0, r2, c7, c5, 6		@ flush BTAC/BTB
-	mcr	p15, 0, r2, c7, c10, 4		@ drain write buffer
-	mcr	p15, 0, r0, c2, c0, 0		@ set TTB 0
-#ifdef CONFIG_PID_IN_CONTEXTIDR
-	mrc	p15, 0, r2, c13, c0, 1		@ read current context ID
-	bic	r2, r2, #0xff			@ extract the PID
-	and	r1, r1, #0xff
-	orr	r1, r1, r2			@ insert into new context ID
-#endif
-	mcr	p15, 0, r1, c13, c0, 1		@ set context ID
-#endif
-	ret	lr
-
-/*
- *	cpu_v6_set_pte_ext(ptep, pte, ext)
- *
- *	Set a level 2 translation table entry.
- *
- *	- ptep  - pointer to level 2 translation table entry
- *		  (hardware version is stored at -1024 bytes)
- *	- pte   - PTE value to store
- *	- ext	- value for extended PTE bits
- */
-	armv6_mt_table cpu_v6
-
-ENTRY(cpu_v6_set_pte_ext)
-#ifdef CONFIG_MMU
-	armv6_set_pte_ext cpu_v6
-#endif
-	ret	lr
-
-/* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */
-.globl	cpu_v6_suspend_size
-.equ	cpu_v6_suspend_size, 4 * 6
-#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_v6_do_suspend)
-	stmfd	sp!, {r4 - r9, lr}
-	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
-#ifdef CONFIG_MMU
-	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
-	mrc	p15, 0, r6, c2, c0, 1	@ Translation table base 1
-#endif
-	mrc	p15, 0, r7, c1, c0, 1	@ auxiliary control register
-	mrc	p15, 0, r8, c1, c0, 2	@ co-processor access control
-	mrc	p15, 0, r9, c1, c0, 0	@ control register
-	stmia	r0, {r4 - r9}
-	ldmfd	sp!, {r4- r9, pc}
-ENDPROC(cpu_v6_do_suspend)
-
-ENTRY(cpu_v6_do_resume)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c14, 0	@ clean+invalidate D cache
-	mcr	p15, 0, ip, c7, c5, 0	@ invalidate I cache
-	mcr	p15, 0, ip, c7, c15, 0	@ clean+invalidate cache
-	mcr	p15, 0, ip, c7, c10, 4	@ drain write buffer
-	mcr	p15, 0, ip, c13, c0, 1	@ set reserved context ID
-	ldmia	r0, {r4 - r9}
-	mcr	p15, 0, r4, c13, c0, 0	@ FCSE/PID
-#ifdef CONFIG_MMU
-	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
-	ALT_SMP(orr	r1, r1, #TTB_FLAGS_SMP)
-	ALT_UP(orr	r1, r1, #TTB_FLAGS_UP)
-	mcr	p15, 0, r1, c2, c0, 0	@ Translation table base 0
-	mcr	p15, 0, r6, c2, c0, 1	@ Translation table base 1
-	mcr	p15, 0, ip, c2, c0, 2	@ TTB control register
-#endif
-	mcr	p15, 0, r7, c1, c0, 1	@ auxiliary control register
-	mcr	p15, 0, r8, c1, c0, 2	@ co-processor access control
-	mcr	p15, 0, ip, c7, c5, 4	@ ISB
-	mov	r0, r9			@ control register
-	b	cpu_resume_mmu
-ENDPROC(cpu_v6_do_resume)
-#endif
-
-	string	cpu_v6_name, "ARMv6-compatible processor"
-
-	.align
-
-/*
- *	__v6_setup
- *
- *	Initialise TLB, Caches, and MMU state ready to switch the MMU
- *	on.  Return in r0 the new CP15 C1 control register setting.
- *
- *	We automatically detect if we have a Harvard cache, and use the
- *	Harvard cache control instructions insead of the unified cache
- *	control instructions.
- *
- *	This should be able to cover all ARMv6 cores.
- *
- *	It is assumed that:
- *	- cache type register is implemented
- */
-__v6_setup:
-#ifdef CONFIG_SMP
-	ALT_SMP(mrc	p15, 0, r0, c1, c0, 1)	@ Enable SMP/nAMP mode
-	ALT_UP(nop)
-	orr	r0, r0, #0x20
-	ALT_SMP(mcr	p15, 0, r0, c1, c0, 1)
-	ALT_UP(nop)
-#endif
-
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c14, 0		@ clean+invalidate D cache
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	mcr	p15, 0, r0, c7, c15, 0		@ clean+invalidate cache
-#ifdef CONFIG_MMU
-	mcr	p15, 0, r0, c8, c7, 0		@ invalidate I + D TLBs
-	mcr	p15, 0, r0, c2, c0, 2		@ TTB control register
-	ALT_SMP(orr	r4, r4, #TTB_FLAGS_SMP)
-	ALT_UP(orr	r4, r4, #TTB_FLAGS_UP)
-	ALT_SMP(orr	r8, r8, #TTB_FLAGS_SMP)
-	ALT_UP(orr	r8, r8, #TTB_FLAGS_UP)
-	mcr	p15, 0, r8, c2, c0, 1		@ load TTB1
-#endif /* CONFIG_MMU */
-	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer and
-						@ complete invalidations
-	adr	r5, v6_crval
-	ldmia	r5, {r5, r6}
- ARM_BE8(orr	r6, r6, #1 << 25)		@ big-endian page tables
-	mrc	p15, 0, r0, c1, c0, 0		@ read control register
-	bic	r0, r0, r5			@ clear bits them
-	orr	r0, r0, r6			@ set them
-#ifdef CONFIG_ARM_ERRATA_364296
-	/*
-	 * Workaround for the 364296 ARM1136 r0p2 erratum (possible cache data
-	 * corruption with hit-under-miss enabled). The conditional code below
-	 * (setting the undocumented bit 31 in the auxiliary control register
-	 * and the FI bit in the control register) disables hit-under-miss
-	 * without putting the processor into full low interrupt latency mode.
-	 */
-	ldr	r6, =0x4107b362			@ id for ARM1136 r0p2
-	mrc	p15, 0, r5, c0, c0, 0		@ get processor id
-	teq	r5, r6				@ check for the faulty core
-	mrceq	p15, 0, r5, c1, c0, 1		@ load aux control reg
-	orreq	r5, r5, #(1 << 31)		@ set the undocumented bit 31
-	mcreq	p15, 0, r5, c1, c0, 1		@ write aux control reg
-	orreq	r0, r0, #(1 << 21)		@ low interrupt latency configuration
-#endif
-	ret	lr				@ return to head.S:__ret
-
-	/*
-	 *         V X F   I D LR
-	 * .... ...E PUI. .T.T 4RVI ZFRS BLDP WCAM
-	 * rrrr rrrx xxx0 0101 xxxx xxxx x111 xxxx < forced
-	 *         0 110       0011 1.00 .111 1101 < we want
-	 */
-	.type	v6_crval, #object
-v6_crval:
-	crval	clear=0x01e0fb7f, mmuset=0x00c0387d, ucset=0x00c0187c
-
-	__INITDATA
-
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions v6, dabort=v6_early_abort, pabort=v6_pabort, suspend=1
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv6"
-	string	cpu_elf_name, "v6"
-	.align
-
-	.section ".proc.info.init", #alloc
-
-	/*
-	 * Match any ARMv6 processor core.
-	 */
-	.type	__v6_proc_info, #object
-__v6_proc_info:
-	.long	0x0007b000
-	.long	0x0007f000
-	ALT_SMP(.long \
-		PMD_TYPE_SECT | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ | \
-		PMD_FLAGS_SMP)
-	ALT_UP(.long \
-		PMD_TYPE_SECT | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ | \
-		PMD_FLAGS_UP)
-	.long   PMD_TYPE_SECT | \
-		PMD_SECT_XN | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	initfn	__v6_setup, __v6_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	/* See also feat_v6_fixup() for HWCAP_TLS */
-	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA|HWCAP_TLS
-	.long	cpu_v6_name
-	.long	v6_processor_functions
-	.long	v6wbi_tlb_fns
-	.long	v6_user_fns
-	.long	v6_cache_fns
-	.size	__v6_proc_info, . - __v6_proc_info
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
deleted file mode 100644
index 5db029c8f9876c2b5f0bffa4d6139e9e93c2ed7b..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-v7-2level.S
+++ /dev/null
@@ -1,162 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/mm/proc-v7-2level.S
- *
- * Copyright (C) 2001 Deep Blue Solutions Ltd.
- */
-
-#define TTB_S		(1 << 1)
-#define TTB_RGN_NC	(0 << 3)
-#define TTB_RGN_OC_WBWA	(1 << 3)
-#define TTB_RGN_OC_WT	(2 << 3)
-#define TTB_RGN_OC_WB	(3 << 3)
-#define TTB_NOS		(1 << 5)
-#define TTB_IRGN_NC	((0 << 0) | (0 << 6))
-#define TTB_IRGN_WBWA	((0 << 0) | (1 << 6))
-#define TTB_IRGN_WT	((1 << 0) | (0 << 6))
-#define TTB_IRGN_WB	((1 << 0) | (1 << 6))
-
-/* PTWs cacheable, inner WB not shareable, outer WB not shareable */
-#define TTB_FLAGS_UP	TTB_IRGN_WB|TTB_RGN_OC_WB
-#define PMD_FLAGS_UP	PMD_SECT_WB
-
-/* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */
-#define TTB_FLAGS_SMP	TTB_IRGN_WBWA|TTB_S|TTB_NOS|TTB_RGN_OC_WBWA
-#define PMD_FLAGS_SMP	PMD_SECT_WBWA|PMD_SECT_S
-
-/*
- *	cpu_v7_switch_mm(pgd_phys, tsk)
- *
- *	Set the translation table base pointer to be pgd_phys
- *
- *	- pgd_phys - physical address of new TTB
- *
- *	It is assumed that:
- *	- we are not using split page tables
- *
- *	Note that we always need to flush BTAC/BTB if IBE is set
- *	even on Cortex-A8 revisions not affected by 430973.
- *	If IBE is not set, the flush BTAC/BTB won't do anything.
- */
-ENTRY(cpu_v7_switch_mm)
-#ifdef CONFIG_MMU
-	mmid	r1, r1				@ get mm->context.id
-	ALT_SMP(orr	r0, r0, #TTB_FLAGS_SMP)
-	ALT_UP(orr	r0, r0, #TTB_FLAGS_UP)
-#ifdef CONFIG_PID_IN_CONTEXTIDR
-	mrc	p15, 0, r2, c13, c0, 1		@ read current context ID
-	lsr	r2, r2, #8			@ extract the PID
-	bfi	r1, r2, #8, #24			@ insert into new context ID
-#endif
-#ifdef CONFIG_ARM_ERRATA_754322
-	dsb
-#endif
-	mcr	p15, 0, r1, c13, c0, 1		@ set context ID
-	isb
-	mcr	p15, 0, r0, c2, c0, 0		@ set TTB 0
-	isb
-#endif
-	bx	lr
-ENDPROC(cpu_v7_switch_mm)
-
-/*
- *	cpu_v7_set_pte_ext(ptep, pte)
- *
- *	Set a level 2 translation table entry.
- *
- *	- ptep  - pointer to level 2 translation table entry
- *		  (hardware version is stored at +2048 bytes)
- *	- pte   - PTE value to store
- *	- ext	- value for extended PTE bits
- */
-ENTRY(cpu_v7_set_pte_ext)
-#ifdef CONFIG_MMU
-	str	r1, [r0]			@ linux version
-
-	bic	r3, r1, #0x000003f0
-	bic	r3, r3, #PTE_TYPE_MASK
-	orr	r3, r3, r2
-	orr	r3, r3, #PTE_EXT_AP0 | 2
-
-	tst	r1, #1 << 4
-	orrne	r3, r3, #PTE_EXT_TEX(1)
-
-	eor	r1, r1, #L_PTE_DIRTY
-	tst	r1, #L_PTE_RDONLY | L_PTE_DIRTY
-	orrne	r3, r3, #PTE_EXT_APX
-
-	tst	r1, #L_PTE_USER
-	orrne	r3, r3, #PTE_EXT_AP1
-
-	tst	r1, #L_PTE_XN
-	orrne	r3, r3, #PTE_EXT_XN
-
-	tst	r1, #L_PTE_YOUNG
-	tstne	r1, #L_PTE_VALID
-	eorne	r1, r1, #L_PTE_NONE
-	tstne	r1, #L_PTE_NONE
-	moveq	r3, #0
-
- ARM(	str	r3, [r0, #2048]! )
- THUMB(	add	r0, r0, #2048 )
- THUMB(	str	r3, [r0] )
-	ALT_SMP(W(nop))
-	ALT_UP (mcr	p15, 0, r0, c7, c10, 1)		@ flush_pte
-#endif
-	bx	lr
-ENDPROC(cpu_v7_set_pte_ext)
-
-	/*
-	 * Memory region attributes with SCTLR.TRE=1
-	 *
-	 *   n = TEX[0],C,B
-	 *   TR = PRRR[2n+1:2n]		- memory type
-	 *   IR = NMRR[2n+1:2n]		- inner cacheable property
-	 *   OR = NMRR[2n+17:2n+16]	- outer cacheable property
-	 *
-	 *			n	TR	IR	OR
-	 *   UNCACHED		000	00
-	 *   BUFFERABLE		001	10	00	00
-	 *   WRITETHROUGH	010	10	10	10
-	 *   WRITEBACK		011	10	11	11
-	 *   reserved		110
-	 *   WRITEALLOC		111	10	01	01
-	 *   DEV_SHARED		100	01
-	 *   DEV_NONSHARED	100	01
-	 *   DEV_WC		001	10
-	 *   DEV_CACHED		011	10
-	 *
-	 * Other attributes:
-	 *
-	 *   DS0 = PRRR[16] = 0		- device shareable property
-	 *   DS1 = PRRR[17] = 1		- device shareable property
-	 *   NS0 = PRRR[18] = 0		- normal shareable property
-	 *   NS1 = PRRR[19] = 1		- normal shareable property
-	 *   NOS = PRRR[24+n] = 1	- not outer shareable
-	 */
-.equ	PRRR,	0xff0a81a8
-.equ	NMRR,	0x40e040e0
-
-	/*
-	 * Macro for setting up the TTBRx and TTBCR registers.
-	 * - \ttb0 and \ttb1 updated with the corresponding flags.
-	 */
-	.macro	v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp
-	mcr	p15, 0, \zero, c2, c0, 2	@ TTB control register
-	ALT_SMP(orr	\ttbr0l, \ttbr0l, #TTB_FLAGS_SMP)
-	ALT_UP(orr	\ttbr0l, \ttbr0l, #TTB_FLAGS_UP)
-	ALT_SMP(orr	\ttbr1, \ttbr1, #TTB_FLAGS_SMP)
-	ALT_UP(orr	\ttbr1, \ttbr1, #TTB_FLAGS_UP)
-	mcr	p15, 0, \ttbr1, c2, c0, 1	@ load TTB1
-	.endm
-
-	/*   AT
-	 *  TFR   EV X F   I D LR    S
-	 * .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM
-	 * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced
-	 *   01    0 110       0011 1100 .111 1101 < we want
-	 */
-	.align	2
-	.type	v7_crval, #object
-v7_crval:
-	crval	clear=0x2120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
deleted file mode 100644
index 131984462d0d5e1f057abab392bf2bca6832400d..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-v7-3level.S
+++ /dev/null
@@ -1,148 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/mm/proc-v7-3level.S
- *
- * Copyright (C) 2001 Deep Blue Solutions Ltd.
- * Copyright (C) 2011 ARM Ltd.
- * Author: Catalin Marinas <catalin.marinas@arm.com>
- *   based on arch/arm/mm/proc-v7-2level.S
- */
-#include <asm/assembler.h>
-
-#define TTB_IRGN_NC	(0 << 8)
-#define TTB_IRGN_WBWA	(1 << 8)
-#define TTB_IRGN_WT	(2 << 8)
-#define TTB_IRGN_WB	(3 << 8)
-#define TTB_RGN_NC	(0 << 10)
-#define TTB_RGN_OC_WBWA	(1 << 10)
-#define TTB_RGN_OC_WT	(2 << 10)
-#define TTB_RGN_OC_WB	(3 << 10)
-#define TTB_S		(3 << 12)
-#define TTB_EAE		(1 << 31)
-
-/* PTWs cacheable, inner WB not shareable, outer WB not shareable */
-#define TTB_FLAGS_UP	(TTB_IRGN_WB|TTB_RGN_OC_WB)
-#define PMD_FLAGS_UP	(PMD_SECT_WB)
-
-/* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */
-#define TTB_FLAGS_SMP	(TTB_IRGN_WBWA|TTB_S|TTB_RGN_OC_WBWA)
-#define PMD_FLAGS_SMP	(PMD_SECT_WBWA|PMD_SECT_S)
-
-#ifndef __ARMEB__
-#  define rpgdl	r0
-#  define rpgdh	r1
-#else
-#  define rpgdl	r1
-#  define rpgdh	r0
-#endif
-
-/*
- * cpu_v7_switch_mm(pgd_phys, tsk)
- *
- * Set the translation table base pointer to be pgd_phys (physical address of
- * the new TTB).
- */
-ENTRY(cpu_v7_switch_mm)
-#ifdef CONFIG_MMU
-	mmid	r2, r2
-	asid	r2, r2
-	orr	rpgdh, rpgdh, r2, lsl #(48 - 32)	@ upper 32-bits of pgd
-	mcrr	p15, 0, rpgdl, rpgdh, c2		@ set TTB 0
-	isb
-#endif
-	ret	lr
-ENDPROC(cpu_v7_switch_mm)
-
-#ifdef __ARMEB__
-#define rl r3
-#define rh r2
-#else
-#define rl r2
-#define rh r3
-#endif
-
-/*
- * cpu_v7_set_pte_ext(ptep, pte)
- *
- * Set a level 2 translation table entry.
- * - ptep - pointer to level 3 translation table entry
- * - pte - PTE value to store (64-bit in r2 and r3)
- */
-ENTRY(cpu_v7_set_pte_ext)
-#ifdef CONFIG_MMU
-	tst	rl, #L_PTE_VALID
-	beq	1f
-	tst	rh, #1 << (57 - 32)		@ L_PTE_NONE
-	bicne	rl, #L_PTE_VALID
-	bne	1f
-
-	eor	ip, rh, #1 << (55 - 32)	@ toggle L_PTE_DIRTY in temp reg to
-					@ test for !L_PTE_DIRTY || L_PTE_RDONLY
-	tst	ip, #1 << (55 - 32) | 1 << (58 - 32)
-	orrne	rl, #PTE_AP2
-	biceq	rl, #PTE_AP2
-
-1:	strd	r2, r3, [r0]
-	ALT_SMP(W(nop))
-	ALT_UP (mcr	p15, 0, r0, c7, c10, 1)		@ flush_pte
-#endif
-	ret	lr
-ENDPROC(cpu_v7_set_pte_ext)
-
-	/*
-	 * Memory region attributes for LPAE (defined in pgtable-3level.h):
-	 *
-	 *   n = AttrIndx[2:0]
-	 *
-	 *			n	MAIR
-	 *   UNCACHED		000	00000000
-	 *   BUFFERABLE		001	01000100
-	 *   DEV_WC		001	01000100
-	 *   WRITETHROUGH	010	10101010
-	 *   WRITEBACK		011	11101110
-	 *   DEV_CACHED		011	11101110
-	 *   DEV_SHARED		100	00000100
-	 *   DEV_NONSHARED	100	00000100
-	 *   unused		101
-	 *   unused		110
-	 *   WRITEALLOC		111	11111111
-	 */
-.equ	PRRR,	0xeeaa4400			@ MAIR0
-.equ	NMRR,	0xff000004			@ MAIR1
-
-	/*
-	 * Macro for setting up the TTBRx and TTBCR registers.
-	 * - \ttbr1 updated.
-	 */
-	.macro	v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp
-	ldr	\tmp, =swapper_pg_dir		@ swapper_pg_dir virtual address
-	cmp	\ttbr1, \tmp, lsr #12		@ PHYS_OFFSET > PAGE_OFFSET?
-	mov	\tmp, #TTB_EAE			@ for TTB control egister
-	ALT_SMP(orr	\tmp, \tmp, #TTB_FLAGS_SMP)
-	ALT_UP(orr	\tmp, \tmp, #TTB_FLAGS_UP)
-	ALT_SMP(orr	\tmp, \tmp, #TTB_FLAGS_SMP << 16)
-	ALT_UP(orr	\tmp, \tmp, #TTB_FLAGS_UP << 16)
-	/*
-	 * Only use split TTBRs if PHYS_OFFSET <= PAGE_OFFSET (cmp above),
-	 * otherwise booting secondary CPUs would end up using TTBR1 for the
-	 * identity mapping set up in TTBR0.
-	 */
-	orrls	\tmp, \tmp, #TTBR1_SIZE				@ TTBCR.T1SZ
-	mcr	p15, 0, \tmp, c2, c0, 2				@ TTBCR
-	mov	\tmp, \ttbr1, lsr #20
-	mov	\ttbr1, \ttbr1, lsl #12
-	addls	\ttbr1, \ttbr1, #TTBR1_OFFSET
-	mcrr	p15, 1, \ttbr1, \tmp, c2			@ load TTBR1
-	.endm
-
-	/*
-	 *   AT
-	 *  TFR   EV X F   IHD LR    S
-	 * .EEE ..EE PUI. .TAT 4RVI ZWRS BLDP WCAM
-	 * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced
-	 *   11    0 110    0  0011 1100 .111 1101 < we want
-	 */
-	.align	2
-	.type	v7_crval, #object
-v7_crval:
-	crval	clear=0x0122c302, mmuset=0x30c03c7d, ucset=0x00c01c7c
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
deleted file mode 100644
index c4e8006a1a8cdd13ae3e622007e7c5b34883393a..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-v7.S
+++ /dev/null
@@ -1,826 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/proc-v7.S
- *
- *  Copyright (C) 2001 Deep Blue Solutions Ltd.
- *
- *  This is the "shell" of the ARMv7 processor support.
- */
-#include <linux/arm-smccc.h>
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/memory.h>
-
-#include "proc-macros.S"
-
-#ifdef CONFIG_ARM_LPAE
-#include "proc-v7-3level.S"
-#else
-#include "proc-v7-2level.S"
-#endif
-
-ENTRY(cpu_v7_proc_init)
-	ret	lr
-ENDPROC(cpu_v7_proc_init)
-
-ENTRY(cpu_v7_proc_fin)
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x1000			@ ...i............
-	bic	r0, r0, #0x0006			@ .............ca.
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-ENDPROC(cpu_v7_proc_fin)
-
-/*
- *	cpu_v7_reset(loc, hyp)
- *
- *	Perform a soft reset of the system.  Put the CPU into the
- *	same state as it would be if it had been reset, and branch
- *	to what would be the reset vector.
- *
- *	- loc   - location to jump to for soft reset
- *	- hyp   - indicate if restart occurs in HYP mode
- *
- *	This code must be executed using a flat identity mapping with
- *      caches disabled.
- */
-	.align	5
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_v7_reset)
-	mrc	p15, 0, r2, c1, c0, 0		@ ctrl register
-	bic	r2, r2, #0x1			@ ...............m
- THUMB(	bic	r2, r2, #1 << 30 )		@ SCTLR.TE (Thumb exceptions)
-	mcr	p15, 0, r2, c1, c0, 0		@ disable MMU
-	isb
-#ifdef CONFIG_ARM_VIRT_EXT
-	teq	r1, #0
-	bne	__hyp_soft_restart
-#endif
-	bx	r0
-ENDPROC(cpu_v7_reset)
-	.popsection
-
-/*
- *	cpu_v7_do_idle()
- *
- *	Idle the processor (eg, wait for interrupt).
- *
- *	IRQs are already disabled.
- */
-ENTRY(cpu_v7_do_idle)
-	dsb					@ WFI may enter a low-power mode
-	wfi
-	ret	lr
-ENDPROC(cpu_v7_do_idle)
-
-ENTRY(cpu_v7_dcache_clean_area)
-	ALT_SMP(W(nop))			@ MP extensions imply L1 PTW
-	ALT_UP_B(1f)
-	ret	lr
-1:	dcache_line_size r2, r3
-2:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, r2
-	subs	r1, r1, r2
-	bhi	2b
-	dsb	ishst
-	ret	lr
-ENDPROC(cpu_v7_dcache_clean_area)
-
-#ifdef CONFIG_ARM_PSCI
-	.arch_extension sec
-ENTRY(cpu_v7_smc_switch_mm)
-	stmfd	sp!, {r0 - r3}
-	movw	r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1
-	movt	r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1
-	smc	#0
-	ldmfd	sp!, {r0 - r3}
-	b	cpu_v7_switch_mm
-ENDPROC(cpu_v7_smc_switch_mm)
-	.arch_extension virt
-ENTRY(cpu_v7_hvc_switch_mm)
-	stmfd	sp!, {r0 - r3}
-	movw	r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1
-	movt	r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1
-	hvc	#0
-	ldmfd	sp!, {r0 - r3}
-	b	cpu_v7_switch_mm
-ENDPROC(cpu_v7_hvc_switch_mm)
-#endif
-ENTRY(cpu_v7_iciallu_switch_mm)
-	mov	r3, #0
-	mcr	p15, 0, r3, c7, c5, 0		@ ICIALLU
-	b	cpu_v7_switch_mm
-ENDPROC(cpu_v7_iciallu_switch_mm)
-ENTRY(cpu_v7_bpiall_switch_mm)
-	mov	r3, #0
-	mcr	p15, 0, r3, c7, c5, 6		@ flush BTAC/BTB
-	b	cpu_v7_switch_mm
-ENDPROC(cpu_v7_bpiall_switch_mm)
-
-	string	cpu_v7_name, "ARMv7 Processor"
-	.align
-
-/* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */
-.globl	cpu_v7_suspend_size
-.equ	cpu_v7_suspend_size, 4 * 9
-#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_v7_do_suspend)
-	stmfd	sp!, {r4 - r11, lr}
-	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
-	mrc	p15, 0, r5, c13, c0, 3	@ User r/o thread ID
-	stmia	r0!, {r4 - r5}
-#ifdef CONFIG_MMU
-	mrc	p15, 0, r6, c3, c0, 0	@ Domain ID
-#ifdef CONFIG_ARM_LPAE
-	mrrc	p15, 1, r5, r7, c2	@ TTB 1
-#else
-	mrc	p15, 0, r7, c2, c0, 1	@ TTB 1
-#endif
-	mrc	p15, 0, r11, c2, c0, 2	@ TTB control register
-#endif
-	mrc	p15, 0, r8, c1, c0, 0	@ Control register
-	mrc	p15, 0, r9, c1, c0, 1	@ Auxiliary control register
-	mrc	p15, 0, r10, c1, c0, 2	@ Co-processor access control
-	stmia	r0, {r5 - r11}
-	ldmfd	sp!, {r4 - r11, pc}
-ENDPROC(cpu_v7_do_suspend)
-
-ENTRY(cpu_v7_do_resume)
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c5, 0	@ invalidate I cache
-	mcr	p15, 0, ip, c13, c0, 1	@ set reserved context ID
-	ldmia	r0!, {r4 - r5}
-	mcr	p15, 0, r4, c13, c0, 0	@ FCSE/PID
-	mcr	p15, 0, r5, c13, c0, 3	@ User r/o thread ID
-	ldmia	r0, {r5 - r11}
-#ifdef CONFIG_MMU
-	mcr	p15, 0, ip, c8, c7, 0	@ invalidate TLBs
-	mcr	p15, 0, r6, c3, c0, 0	@ Domain ID
-#ifdef CONFIG_ARM_LPAE
-	mcrr	p15, 0, r1, ip, c2	@ TTB 0
-	mcrr	p15, 1, r5, r7, c2	@ TTB 1
-#else
-	ALT_SMP(orr	r1, r1, #TTB_FLAGS_SMP)
-	ALT_UP(orr	r1, r1, #TTB_FLAGS_UP)
-	mcr	p15, 0, r1, c2, c0, 0	@ TTB 0
-	mcr	p15, 0, r7, c2, c0, 1	@ TTB 1
-#endif
-	mcr	p15, 0, r11, c2, c0, 2	@ TTB control register
-	ldr	r4, =PRRR		@ PRRR
-	ldr	r5, =NMRR		@ NMRR
-	mcr	p15, 0, r4, c10, c2, 0	@ write PRRR
-	mcr	p15, 0, r5, c10, c2, 1	@ write NMRR
-#endif	/* CONFIG_MMU */
-	mrc	p15, 0, r4, c1, c0, 1	@ Read Auxiliary control register
-	teq	r4, r9			@ Is it already set?
-	mcrne	p15, 0, r9, c1, c0, 1	@ No, so write it
-	mcr	p15, 0, r10, c1, c0, 2	@ Co-processor access control
-	isb
-	dsb
-	mov	r0, r8			@ control register
-	b	cpu_resume_mmu
-ENDPROC(cpu_v7_do_resume)
-#endif
-
-.globl	cpu_ca9mp_suspend_size
-.equ	cpu_ca9mp_suspend_size, cpu_v7_suspend_size + 4 * 2
-#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_ca9mp_do_suspend)
-	stmfd	sp!, {r4 - r5}
-	mrc	p15, 0, r4, c15, c0, 1		@ Diagnostic register
-	mrc	p15, 0, r5, c15, c0, 0		@ Power register
-	stmia	r0!, {r4 - r5}
-	ldmfd	sp!, {r4 - r5}
-	b	cpu_v7_do_suspend
-ENDPROC(cpu_ca9mp_do_suspend)
-
-ENTRY(cpu_ca9mp_do_resume)
-	ldmia	r0!, {r4 - r5}
-	mrc	p15, 0, r10, c15, c0, 1		@ Read Diagnostic register
-	teq	r4, r10				@ Already restored?
-	mcrne	p15, 0, r4, c15, c0, 1		@ No, so restore it
-	mrc	p15, 0, r10, c15, c0, 0		@ Read Power register
-	teq	r5, r10				@ Already restored?
-	mcrne	p15, 0, r5, c15, c0, 0		@ No, so restore it
-	b	cpu_v7_do_resume
-ENDPROC(cpu_ca9mp_do_resume)
-#endif
-
-#ifdef CONFIG_CPU_PJ4B
-	globl_equ	cpu_pj4b_switch_mm,     cpu_v7_switch_mm
-	globl_equ	cpu_pj4b_set_pte_ext,	cpu_v7_set_pte_ext
-	globl_equ	cpu_pj4b_proc_init,	cpu_v7_proc_init
-	globl_equ	cpu_pj4b_proc_fin, 	cpu_v7_proc_fin
-	globl_equ	cpu_pj4b_reset,	   	cpu_v7_reset
-#ifdef CONFIG_PJ4B_ERRATA_4742
-ENTRY(cpu_pj4b_do_idle)
-	dsb					@ WFI may enter a low-power mode
-	wfi
-	dsb					@barrier
-	ret	lr
-ENDPROC(cpu_pj4b_do_idle)
-#else
-	globl_equ	cpu_pj4b_do_idle,  	cpu_v7_do_idle
-#endif
-	globl_equ	cpu_pj4b_dcache_clean_area,	cpu_v7_dcache_clean_area
-#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_pj4b_do_suspend)
-	stmfd	sp!, {r6 - r10}
-	mrc	p15, 1, r6, c15, c1, 0  @ save CP15 - extra features
-	mrc	p15, 1, r7, c15, c2, 0	@ save CP15 - Aux Func Modes Ctrl 0
-	mrc	p15, 1, r8, c15, c1, 2	@ save CP15 - Aux Debug Modes Ctrl 2
-	mrc	p15, 1, r9, c15, c1, 1  @ save CP15 - Aux Debug Modes Ctrl 1
-	mrc	p15, 0, r10, c9, c14, 0  @ save CP15 - PMC
-	stmia	r0!, {r6 - r10}
-	ldmfd	sp!, {r6 - r10}
-	b cpu_v7_do_suspend
-ENDPROC(cpu_pj4b_do_suspend)
-
-ENTRY(cpu_pj4b_do_resume)
-	ldmia	r0!, {r6 - r10}
-	mcr	p15, 1, r6, c15, c1, 0  @ restore CP15 - extra features
-	mcr	p15, 1, r7, c15, c2, 0	@ restore CP15 - Aux Func Modes Ctrl 0
-	mcr	p15, 1, r8, c15, c1, 2	@ restore CP15 - Aux Debug Modes Ctrl 2
-	mcr	p15, 1, r9, c15, c1, 1  @ restore CP15 - Aux Debug Modes Ctrl 1
-	mcr	p15, 0, r10, c9, c14, 0  @ restore CP15 - PMC
-	b cpu_v7_do_resume
-ENDPROC(cpu_pj4b_do_resume)
-#endif
-.globl	cpu_pj4b_suspend_size
-.equ	cpu_pj4b_suspend_size, cpu_v7_suspend_size + 4 * 5
-
-#endif
-
-/*
- *	__v7_setup
- *
- *	Initialise TLB, Caches, and MMU state ready to switch the MMU
- *	on.  Return in r0 the new CP15 C1 control register setting.
- *
- *	r1, r2, r4, r5, r9, r13 must be preserved - r13 is not a stack
- *	r4: TTBR0 (low word)
- *	r5: TTBR0 (high word if LPAE)
- *	r8: TTBR1
- *	r9: Main ID register
- *
- *	This should be able to cover all ARMv7 cores.
- *
- *	It is assumed that:
- *	- cache type register is implemented
- */
-__v7_ca5mp_setup:
-__v7_ca9mp_setup:
-__v7_cr7mp_setup:
-__v7_cr8mp_setup:
-	mov	r10, #(1 << 0)			@ Cache/TLB ops broadcasting
-	b	1f
-__v7_ca7mp_setup:
-__v7_ca12mp_setup:
-__v7_ca15mp_setup:
-__v7_b15mp_setup:
-__v7_ca17mp_setup:
-	mov	r10, #0
-1:	adr	r0, __v7_setup_stack_ptr
-	ldr	r12, [r0]
-	add	r12, r12, r0			@ the local stack
-	stmia	r12, {r1-r6, lr}		@ v7_invalidate_l1 touches r0-r6
-	bl      v7_invalidate_l1
-	ldmia	r12, {r1-r6, lr}
-#ifdef CONFIG_SMP
-	orr	r10, r10, #(1 << 6)		@ Enable SMP/nAMP mode
-	ALT_SMP(mrc	p15, 0, r0, c1, c0, 1)
-	ALT_UP(mov	r0, r10)		@ fake it for UP
-	orr	r10, r10, r0			@ Set required bits
-	teq	r10, r0				@ Were they already set?
-	mcrne	p15, 0, r10, c1, c0, 1		@ No, update register
-#endif
-	b	__v7_setup_cont
-
-/*
- * Errata:
- *  r0, r10 available for use
- *  r1, r2, r4, r5, r9, r13: must be preserved
- *  r3: contains MIDR rX number in bits 23-20
- *  r6: contains MIDR rXpY as 8-bit XY number
- *  r9: MIDR
- */
-__ca8_errata:
-#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM)
-	teq	r3, #0x00100000			@ only present in r1p*
-	mrceq	p15, 0, r0, c1, c0, 1		@ read aux control register
-	orreq	r0, r0, #(1 << 6)		@ set IBE to 1
-	mcreq	p15, 0, r0, c1, c0, 1		@ write aux control register
-#endif
-#ifdef CONFIG_ARM_ERRATA_458693
-	teq	r6, #0x20			@ only present in r2p0
-	mrceq	p15, 0, r0, c1, c0, 1		@ read aux control register
-	orreq	r0, r0, #(1 << 5)		@ set L1NEON to 1
-	orreq	r0, r0, #(1 << 9)		@ set PLDNOP to 1
-	mcreq	p15, 0, r0, c1, c0, 1		@ write aux control register
-#endif
-#ifdef CONFIG_ARM_ERRATA_460075
-	teq	r6, #0x20			@ only present in r2p0
-	mrceq	p15, 1, r0, c9, c0, 2		@ read L2 cache aux ctrl register
-	tsteq	r0, #1 << 22
-	orreq	r0, r0, #(1 << 22)		@ set the Write Allocate disable bit
-	mcreq	p15, 1, r0, c9, c0, 2		@ write the L2 cache aux ctrl register
-#endif
-	b	__errata_finish
-
-__ca9_errata:
-#ifdef CONFIG_ARM_ERRATA_742230
-	cmp	r6, #0x22			@ only present up to r2p2
-	mrcle	p15, 0, r0, c15, c0, 1		@ read diagnostic register
-	orrle	r0, r0, #1 << 4			@ set bit #4
-	mcrle	p15, 0, r0, c15, c0, 1		@ write diagnostic register
-#endif
-#ifdef CONFIG_ARM_ERRATA_742231
-	teq	r6, #0x20			@ present in r2p0
-	teqne	r6, #0x21			@ present in r2p1
-	teqne	r6, #0x22			@ present in r2p2
-	mrceq	p15, 0, r0, c15, c0, 1		@ read diagnostic register
-	orreq	r0, r0, #1 << 12		@ set bit #12
-	orreq	r0, r0, #1 << 22		@ set bit #22
-	mcreq	p15, 0, r0, c15, c0, 1		@ write diagnostic register
-#endif
-#ifdef CONFIG_ARM_ERRATA_743622
-	teq	r3, #0x00200000			@ only present in r2p*
-	mrceq	p15, 0, r0, c15, c0, 1		@ read diagnostic register
-	orreq	r0, r0, #1 << 6			@ set bit #6
-	mcreq	p15, 0, r0, c15, c0, 1		@ write diagnostic register
-#endif
-#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP)
-	ALT_SMP(cmp r6, #0x30)			@ present prior to r3p0
-	ALT_UP_B(1f)
-	mrclt	p15, 0, r0, c15, c0, 1		@ read diagnostic register
-	orrlt	r0, r0, #1 << 11		@ set bit #11
-	mcrlt	p15, 0, r0, c15, c0, 1		@ write diagnostic register
-1:
-#endif
-	b	__errata_finish
-
-__ca15_errata:
-#ifdef CONFIG_ARM_ERRATA_773022
-	cmp	r6, #0x4			@ only present up to r0p4
-	mrcle	p15, 0, r0, c1, c0, 1		@ read aux control register
-	orrle	r0, r0, #1 << 1			@ disable loop buffer
-	mcrle	p15, 0, r0, c1, c0, 1		@ write aux control register
-#endif
-	b	__errata_finish
-
-__ca12_errata:
-#ifdef CONFIG_ARM_ERRATA_818325_852422
-	mrc	p15, 0, r10, c15, c0, 1		@ read diagnostic register
-	orr	r10, r10, #1 << 12		@ set bit #12
-	mcr	p15, 0, r10, c15, c0, 1		@ write diagnostic register
-#endif
-#ifdef CONFIG_ARM_ERRATA_821420
-	mrc	p15, 0, r10, c15, c0, 2		@ read internal feature reg
-	orr	r10, r10, #1 << 1		@ set bit #1
-	mcr	p15, 0, r10, c15, c0, 2		@ write internal feature reg
-#endif
-#ifdef CONFIG_ARM_ERRATA_825619
-	mrc	p15, 0, r10, c15, c0, 1		@ read diagnostic register
-	orr	r10, r10, #1 << 24		@ set bit #24
-	mcr	p15, 0, r10, c15, c0, 1		@ write diagnostic register
-#endif
-#ifdef CONFIG_ARM_ERRATA_857271
-	mrc	p15, 0, r10, c15, c0, 1		@ read diagnostic register
-	orr	r10, r10, #3 << 10		@ set bits #10 and #11
-	mcr	p15, 0, r10, c15, c0, 1		@ write diagnostic register
-#endif
-	b	__errata_finish
-
-__ca17_errata:
-#ifdef CONFIG_ARM_ERRATA_852421
-	cmp	r6, #0x12			@ only present up to r1p2
-	mrcle	p15, 0, r10, c15, c0, 1		@ read diagnostic register
-	orrle	r10, r10, #1 << 24		@ set bit #24
-	mcrle	p15, 0, r10, c15, c0, 1		@ write diagnostic register
-#endif
-#ifdef CONFIG_ARM_ERRATA_852423
-	cmp	r6, #0x12			@ only present up to r1p2
-	mrcle	p15, 0, r10, c15, c0, 1		@ read diagnostic register
-	orrle	r10, r10, #1 << 12		@ set bit #12
-	mcrle	p15, 0, r10, c15, c0, 1		@ write diagnostic register
-#endif
-#ifdef CONFIG_ARM_ERRATA_857272
-	mrc	p15, 0, r10, c15, c0, 1		@ read diagnostic register
-	orr	r10, r10, #3 << 10		@ set bits #10 and #11
-	mcr	p15, 0, r10, c15, c0, 1		@ write diagnostic register
-#endif
-	b	__errata_finish
-
-__v7_pj4b_setup:
-#ifdef CONFIG_CPU_PJ4B
-
-/* Auxiliary Debug Modes Control 1 Register */
-#define PJ4B_STATIC_BP (1 << 2) /* Enable Static BP */
-#define PJ4B_INTER_PARITY (1 << 8) /* Disable Internal Parity Handling */
-#define PJ4B_CLEAN_LINE (1 << 16) /* Disable data transfer for clean line */
-
-/* Auxiliary Debug Modes Control 2 Register */
-#define PJ4B_FAST_LDR (1 << 23) /* Disable fast LDR */
-#define PJ4B_SNOOP_DATA (1 << 25) /* Do not interleave write and snoop data */
-#define PJ4B_CWF (1 << 27) /* Disable Critical Word First feature */
-#define PJ4B_OUTSDNG_NC (1 << 29) /* Disable outstanding non cacheable rqst */
-#define PJ4B_L1_REP_RR (1 << 30) /* L1 replacement - Strict round robin */
-#define PJ4B_AUX_DBG_CTRL2 (PJ4B_SNOOP_DATA | PJ4B_CWF |\
-			    PJ4B_OUTSDNG_NC | PJ4B_L1_REP_RR)
-
-/* Auxiliary Functional Modes Control Register 0 */
-#define PJ4B_SMP_CFB (1 << 1) /* Set SMP mode. Join the coherency fabric */
-#define PJ4B_L1_PAR_CHK (1 << 2) /* Support L1 parity checking */
-#define PJ4B_BROADCAST_CACHE (1 << 8) /* Broadcast Cache and TLB maintenance */
-
-/* Auxiliary Debug Modes Control 0 Register */
-#define PJ4B_WFI_WFE (1 << 22) /* WFI/WFE - serve the DVM and back to idle */
-
-	/* Auxiliary Debug Modes Control 1 Register */
-	mrc	p15, 1,	r0, c15, c1, 1
-	orr     r0, r0, #PJ4B_CLEAN_LINE
-	orr     r0, r0, #PJ4B_INTER_PARITY
-	bic	r0, r0, #PJ4B_STATIC_BP
-	mcr	p15, 1,	r0, c15, c1, 1
-
-	/* Auxiliary Debug Modes Control 2 Register */
-	mrc	p15, 1,	r0, c15, c1, 2
-	bic	r0, r0, #PJ4B_FAST_LDR
-	orr	r0, r0, #PJ4B_AUX_DBG_CTRL2
-	mcr	p15, 1,	r0, c15, c1, 2
-
-	/* Auxiliary Functional Modes Control Register 0 */
-	mrc	p15, 1,	r0, c15, c2, 0
-#ifdef CONFIG_SMP
-	orr	r0, r0, #PJ4B_SMP_CFB
-#endif
-	orr	r0, r0, #PJ4B_L1_PAR_CHK
-	orr	r0, r0, #PJ4B_BROADCAST_CACHE
-	mcr	p15, 1,	r0, c15, c2, 0
-
-	/* Auxiliary Debug Modes Control 0 Register */
-	mrc	p15, 1,	r0, c15, c1, 0
-	orr	r0, r0, #PJ4B_WFI_WFE
-	mcr	p15, 1,	r0, c15, c1, 0
-
-#endif /* CONFIG_CPU_PJ4B */
-
-__v7_setup:
-	adr	r0, __v7_setup_stack_ptr
-	ldr	r12, [r0]
-	add	r12, r12, r0			@ the local stack
-	stmia	r12, {r1-r6, lr}		@ v7_invalidate_l1 touches r0-r6
-	bl      v7_invalidate_l1
-	ldmia	r12, {r1-r6, lr}
-
-__v7_setup_cont:
-	and	r0, r9, #0xff000000		@ ARM?
-	teq	r0, #0x41000000
-	bne	__errata_finish
-	and	r3, r9, #0x00f00000		@ variant
-	and	r6, r9, #0x0000000f		@ revision
-	orr	r6, r6, r3, lsr #20-4		@ combine variant and revision
-	ubfx	r0, r9, #4, #12			@ primary part number
-
-	/* Cortex-A8 Errata */
-	ldr	r10, =0x00000c08		@ Cortex-A8 primary part number
-	teq	r0, r10
-	beq	__ca8_errata
-
-	/* Cortex-A9 Errata */
-	ldr	r10, =0x00000c09		@ Cortex-A9 primary part number
-	teq	r0, r10
-	beq	__ca9_errata
-
-	/* Cortex-A12 Errata */
-	ldr	r10, =0x00000c0d		@ Cortex-A12 primary part number
-	teq	r0, r10
-	beq	__ca12_errata
-
-	/* Cortex-A17 Errata */
-	ldr	r10, =0x00000c0e		@ Cortex-A17 primary part number
-	teq	r0, r10
-	beq	__ca17_errata
-
-	/* Cortex-A15 Errata */
-	ldr	r10, =0x00000c0f		@ Cortex-A15 primary part number
-	teq	r0, r10
-	beq	__ca15_errata
-
-__errata_finish:
-	mov	r10, #0
-	mcr	p15, 0, r10, c7, c5, 0		@ I+BTB cache invalidate
-#ifdef CONFIG_MMU
-	mcr	p15, 0, r10, c8, c7, 0		@ invalidate I + D TLBs
-	v7_ttb_setup r10, r4, r5, r8, r3	@ TTBCR, TTBRx setup
-	ldr	r3, =PRRR			@ PRRR
-	ldr	r6, =NMRR			@ NMRR
-	mcr	p15, 0, r3, c10, c2, 0		@ write PRRR
-	mcr	p15, 0, r6, c10, c2, 1		@ write NMRR
-#endif
-	dsb					@ Complete invalidations
-#ifndef CONFIG_ARM_THUMBEE
-	mrc	p15, 0, r0, c0, c1, 0		@ read ID_PFR0 for ThumbEE
-	and	r0, r0, #(0xf << 12)		@ ThumbEE enabled field
-	teq	r0, #(1 << 12)			@ check if ThumbEE is present
-	bne	1f
-	mov	r3, #0
-	mcr	p14, 6, r3, c1, c0, 0		@ Initialize TEEHBR to 0
-	mrc	p14, 6, r0, c0, c0, 0		@ load TEECR
-	orr	r0, r0, #1			@ set the 1st bit in order to
-	mcr	p14, 6, r0, c0, c0, 0		@ stop userspace TEEHBR access
-1:
-#endif
-	adr	r3, v7_crval
-	ldmia	r3, {r3, r6}
- ARM_BE8(orr	r6, r6, #1 << 25)		@ big-endian page tables
-#ifdef CONFIG_SWP_EMULATE
-	orr     r3, r3, #(1 << 10)              @ set SW bit in "clear"
-	bic     r6, r6, #(1 << 10)              @ clear it in "mmuset"
-#endif
-   	mrc	p15, 0, r0, c1, c0, 0		@ read control register
-	bic	r0, r0, r3			@ clear bits them
-	orr	r0, r0, r6			@ set them
- THUMB(	orr	r0, r0, #1 << 30	)	@ Thumb exceptions
-	ret	lr				@ return to head.S:__ret
-
-	.align	2
-__v7_setup_stack_ptr:
-	.word	PHYS_RELATIVE(__v7_setup_stack, .)
-ENDPROC(__v7_setup)
-
-	.bss
-	.align	2
-__v7_setup_stack:
-	.space	4 * 7				@ 7 registers
-
-	__INITDATA
-
-	.weak cpu_v7_bugs_init
-
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init
-
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
-	@ generic v7 bpiall on context switch
-	globl_equ	cpu_v7_bpiall_proc_init,	cpu_v7_proc_init
-	globl_equ	cpu_v7_bpiall_proc_fin,		cpu_v7_proc_fin
-	globl_equ	cpu_v7_bpiall_reset,		cpu_v7_reset
-	globl_equ	cpu_v7_bpiall_do_idle,		cpu_v7_do_idle
-	globl_equ	cpu_v7_bpiall_dcache_clean_area, cpu_v7_dcache_clean_area
-	globl_equ	cpu_v7_bpiall_set_pte_ext,	cpu_v7_set_pte_ext
-	globl_equ	cpu_v7_bpiall_suspend_size,	cpu_v7_suspend_size
-#ifdef CONFIG_ARM_CPU_SUSPEND
-	globl_equ	cpu_v7_bpiall_do_suspend,	cpu_v7_do_suspend
-	globl_equ	cpu_v7_bpiall_do_resume,	cpu_v7_do_resume
-#endif
-	define_processor_functions v7_bpiall, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init
-
-#define HARDENED_BPIALL_PROCESSOR_FUNCTIONS v7_bpiall_processor_functions
-#else
-#define HARDENED_BPIALL_PROCESSOR_FUNCTIONS v7_processor_functions
-#endif
-
-#ifndef CONFIG_ARM_LPAE
-	@ Cortex-A8 - always needs bpiall switch_mm implementation
-	globl_equ	cpu_ca8_proc_init,	cpu_v7_proc_init
-	globl_equ	cpu_ca8_proc_fin,	cpu_v7_proc_fin
-	globl_equ	cpu_ca8_reset,		cpu_v7_reset
-	globl_equ	cpu_ca8_do_idle,	cpu_v7_do_idle
-	globl_equ	cpu_ca8_dcache_clean_area, cpu_v7_dcache_clean_area
-	globl_equ	cpu_ca8_set_pte_ext,	cpu_v7_set_pte_ext
-	globl_equ	cpu_ca8_switch_mm,	cpu_v7_bpiall_switch_mm
-	globl_equ	cpu_ca8_suspend_size,	cpu_v7_suspend_size
-#ifdef CONFIG_ARM_CPU_SUSPEND
-	globl_equ	cpu_ca8_do_suspend,	cpu_v7_do_suspend
-	globl_equ	cpu_ca8_do_resume,	cpu_v7_do_resume
-#endif
-	define_processor_functions ca8, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_ca8_ibe
-
-	@ Cortex-A9 - needs more registers preserved across suspend/resume
-	@ and bpiall switch_mm for hardening
-	globl_equ	cpu_ca9mp_proc_init,	cpu_v7_proc_init
-	globl_equ	cpu_ca9mp_proc_fin,	cpu_v7_proc_fin
-	globl_equ	cpu_ca9mp_reset,	cpu_v7_reset
-	globl_equ	cpu_ca9mp_do_idle,	cpu_v7_do_idle
-	globl_equ	cpu_ca9mp_dcache_clean_area, cpu_v7_dcache_clean_area
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
-	globl_equ	cpu_ca9mp_switch_mm,	cpu_v7_bpiall_switch_mm
-#else
-	globl_equ	cpu_ca9mp_switch_mm,	cpu_v7_switch_mm
-#endif
-	globl_equ	cpu_ca9mp_set_pte_ext,	cpu_v7_set_pte_ext
-	define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init
-#endif
-
-	@ Cortex-A15 - needs iciallu switch_mm for hardening
-	globl_equ	cpu_ca15_proc_init,	cpu_v7_proc_init
-	globl_equ	cpu_ca15_proc_fin,	cpu_v7_proc_fin
-	globl_equ	cpu_ca15_reset,		cpu_v7_reset
-	globl_equ	cpu_ca15_do_idle,	cpu_v7_do_idle
-	globl_equ	cpu_ca15_dcache_clean_area, cpu_v7_dcache_clean_area
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
-	globl_equ	cpu_ca15_switch_mm,	cpu_v7_iciallu_switch_mm
-#else
-	globl_equ	cpu_ca15_switch_mm,	cpu_v7_switch_mm
-#endif
-	globl_equ	cpu_ca15_set_pte_ext,	cpu_v7_set_pte_ext
-	globl_equ	cpu_ca15_suspend_size,	cpu_v7_suspend_size
-	globl_equ	cpu_ca15_do_suspend,	cpu_v7_do_suspend
-	globl_equ	cpu_ca15_do_resume,	cpu_v7_do_resume
-	define_processor_functions ca15, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_ca15_ibe
-#ifdef CONFIG_CPU_PJ4B
-	define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
-#endif
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv7"
-	string	cpu_elf_name, "v7"
-	.align
-
-	.section ".proc.info.init", #alloc
-
-	/*
-	 * Standard v7 proc info content
-	 */
-.macro __v7_proc name, initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions, cache_fns = v7_cache_fns
-	ALT_SMP(.long	PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \
-			PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags)
-	ALT_UP(.long	PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \
-			PMD_SECT_AF | PMD_FLAGS_UP | \mm_mmuflags)
-	.long	PMD_TYPE_SECT | PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ | PMD_SECT_AF | \io_mmuflags
-	initfn	\initfunc, \name
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \
-		HWCAP_EDSP | HWCAP_TLS | \hwcaps
-	.long	cpu_v7_name
-	.long	\proc_fns
-	.long	v7wbi_tlb_fns
-	.long	v6_user_fns
-	.long	\cache_fns
-.endm
-
-#ifndef CONFIG_ARM_LPAE
-	/*
-	 * ARM Ltd. Cortex A5 processor.
-	 */
-	.type   __v7_ca5mp_proc_info, #object
-__v7_ca5mp_proc_info:
-	.long	0x410fc050
-	.long	0xff0ffff0
-	__v7_proc __v7_ca5mp_proc_info, __v7_ca5mp_setup
-	.size	__v7_ca5mp_proc_info, . - __v7_ca5mp_proc_info
-
-	/*
-	 * ARM Ltd. Cortex A9 processor.
-	 */
-	.type   __v7_ca9mp_proc_info, #object
-__v7_ca9mp_proc_info:
-	.long	0x410fc090
-	.long	0xff0ffff0
-	__v7_proc __v7_ca9mp_proc_info, __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions
-	.size	__v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info
-
-	/*
-	 * ARM Ltd. Cortex A8 processor.
-	 */
-	.type	__v7_ca8_proc_info, #object
-__v7_ca8_proc_info:
-	.long	0x410fc080
-	.long	0xff0ffff0
-	__v7_proc __v7_ca8_proc_info, __v7_setup, proc_fns = ca8_processor_functions
-	.size	__v7_ca8_proc_info, . - __v7_ca8_proc_info
-
-#endif	/* CONFIG_ARM_LPAE */
-
-	/*
-	 * Marvell PJ4B processor.
-	 */
-#ifdef CONFIG_CPU_PJ4B
-	.type   __v7_pj4b_proc_info, #object
-__v7_pj4b_proc_info:
-	.long	0x560f5800
-	.long	0xff0fff00
-	__v7_proc __v7_pj4b_proc_info, __v7_pj4b_setup, proc_fns = pj4b_processor_functions
-	.size	__v7_pj4b_proc_info, . - __v7_pj4b_proc_info
-#endif
-
-	/*
-	 * ARM Ltd. Cortex R7 processor.
-	 */
-	.type	__v7_cr7mp_proc_info, #object
-__v7_cr7mp_proc_info:
-	.long	0x410fc170
-	.long	0xff0ffff0
-	__v7_proc __v7_cr7mp_proc_info, __v7_cr7mp_setup
-	.size	__v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info
-
-	/*
-	 * ARM Ltd. Cortex R8 processor.
-	 */
-	.type	__v7_cr8mp_proc_info, #object
-__v7_cr8mp_proc_info:
-	.long	0x410fc180
-	.long	0xff0ffff0
-	__v7_proc __v7_cr8mp_proc_info, __v7_cr8mp_setup
-	.size	__v7_cr8mp_proc_info, . - __v7_cr8mp_proc_info
-
-	/*
-	 * ARM Ltd. Cortex A7 processor.
-	 */
-	.type	__v7_ca7mp_proc_info, #object
-__v7_ca7mp_proc_info:
-	.long	0x410fc070
-	.long	0xff0ffff0
-	__v7_proc __v7_ca7mp_proc_info, __v7_ca7mp_setup
-	.size	__v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info
-
-	/*
-	 * ARM Ltd. Cortex A12 processor.
-	 */
-	.type	__v7_ca12mp_proc_info, #object
-__v7_ca12mp_proc_info:
-	.long	0x410fc0d0
-	.long	0xff0ffff0
-	__v7_proc __v7_ca12mp_proc_info, __v7_ca12mp_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS
-	.size	__v7_ca12mp_proc_info, . - __v7_ca12mp_proc_info
-
-	/*
-	 * ARM Ltd. Cortex A15 processor.
-	 */
-	.type	__v7_ca15mp_proc_info, #object
-__v7_ca15mp_proc_info:
-	.long	0x410fc0f0
-	.long	0xff0ffff0
-	__v7_proc __v7_ca15mp_proc_info, __v7_ca15mp_setup, proc_fns = ca15_processor_functions
-	.size	__v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info
-
-	/*
-	 * Broadcom Corporation Brahma-B15 processor.
-	 */
-	.type	__v7_b15mp_proc_info, #object
-__v7_b15mp_proc_info:
-	.long	0x420f00f0
-	.long	0xff0ffff0
-	__v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup, proc_fns = ca15_processor_functions, cache_fns = b15_cache_fns
-	.size	__v7_b15mp_proc_info, . - __v7_b15mp_proc_info
-
-	/*
-	 * ARM Ltd. Cortex A17 processor.
-	 */
-	.type	__v7_ca17mp_proc_info, #object
-__v7_ca17mp_proc_info:
-	.long	0x410fc0e0
-	.long	0xff0ffff0
-	__v7_proc __v7_ca17mp_proc_info, __v7_ca17mp_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS
-	.size	__v7_ca17mp_proc_info, . - __v7_ca17mp_proc_info
-
-	/* ARM Ltd. Cortex A73 processor */
-	.type	__v7_ca73_proc_info, #object
-__v7_ca73_proc_info:
-	.long	0x410fd090
-	.long	0xff0ffff0
-	__v7_proc __v7_ca73_proc_info, __v7_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS
-	.size	__v7_ca73_proc_info, . - __v7_ca73_proc_info
-
-	/* ARM Ltd. Cortex A75 processor */
-	.type	__v7_ca75_proc_info, #object
-__v7_ca75_proc_info:
-	.long	0x410fd0a0
-	.long	0xff0ffff0
-	__v7_proc __v7_ca75_proc_info, __v7_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS
-	.size	__v7_ca75_proc_info, . - __v7_ca75_proc_info
-
-	/*
-	 * Qualcomm Inc. Krait processors.
-	 */
-	.type	__krait_proc_info, #object
-__krait_proc_info:
-	.long	0x510f0400		@ Required ID value
-	.long	0xff0ffc00		@ Mask for ID
-	/*
-	 * Some Krait processors don't indicate support for SDIV and UDIV
-	 * instructions in the ARM instruction set, even though they actually
-	 * do support them. They also don't indicate support for fused multiply
-	 * instructions even though they actually do support them.
-	 */
-	__v7_proc __krait_proc_info, __v7_setup, hwcaps = HWCAP_IDIV | HWCAP_VFPv4
-	.size	__krait_proc_info, . - __krait_proc_info
-
-	/*
-	 * Match any ARMv7 processor core.
-	 */
-	.type	__v7_proc_info, #object
-__v7_proc_info:
-	.long	0x000f0000		@ Required ID value
-	.long	0x000f0000		@ Mask for ID
-	__v7_proc __v7_proc_info, __v7_setup
-	.size	__v7_proc_info, . - __v7_proc_info
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
deleted file mode 100644
index 1a49d503eafc80b461d256f4f068e9a54c6d85f6..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-v7m.S
+++ /dev/null
@@ -1,235 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/proc-v7m.S
- *
- *  Copyright (C) 2008 ARM Ltd.
- *  Copyright (C) 2001 Deep Blue Solutions Ltd.
- *
- *  This is the "shell" of the ARMv7-M processor support.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/memory.h>
-#include <asm/v7m.h>
-#include "proc-macros.S"
-
-ENTRY(cpu_v7m_proc_init)
-	ret	lr
-ENDPROC(cpu_v7m_proc_init)
-
-ENTRY(cpu_v7m_proc_fin)
-	ret	lr
-ENDPROC(cpu_v7m_proc_fin)
-
-/*
- *	cpu_v7m_reset(loc)
- *
- *	Perform a soft reset of the system.  Put the CPU into the
- *	same state as it would be if it had been reset, and branch
- *	to what would be the reset vector.
- *
- *	- loc   - location to jump to for soft reset
- */
-	.align	5
-ENTRY(cpu_v7m_reset)
-	ret	r0
-ENDPROC(cpu_v7m_reset)
-
-/*
- *	cpu_v7m_do_idle()
- *
- *	Idle the processor (eg, wait for interrupt).
- *
- *	IRQs are already disabled.
- */
-ENTRY(cpu_v7m_do_idle)
-	wfi
-	ret	lr
-ENDPROC(cpu_v7m_do_idle)
-
-ENTRY(cpu_v7m_dcache_clean_area)
-	ret	lr
-ENDPROC(cpu_v7m_dcache_clean_area)
-
-/*
- * There is no MMU, so here is nothing to do.
- */
-ENTRY(cpu_v7m_switch_mm)
-	ret	lr
-ENDPROC(cpu_v7m_switch_mm)
-
-.globl	cpu_v7m_suspend_size
-.equ	cpu_v7m_suspend_size, 0
-
-#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_v7m_do_suspend)
-	ret	lr
-ENDPROC(cpu_v7m_do_suspend)
-
-ENTRY(cpu_v7m_do_resume)
-	ret	lr
-ENDPROC(cpu_v7m_do_resume)
-#endif
-
-ENTRY(cpu_cm7_dcache_clean_area)
-	dcache_line_size r2, r3
-	movw	r3, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC
-	movt	r3, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC
-
-1:	str	r0, [r3]		@ clean D entry
-	add	r0, r0, r2
-	subs	r1, r1, r2
-	bhi	1b
-	dsb
-	ret	lr
-ENDPROC(cpu_cm7_dcache_clean_area)
-
-ENTRY(cpu_cm7_proc_fin)
-	movw	r2, #:lower16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
-	movt	r2, #:upper16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
-	ldr	r0, [r2]
-	bic	r0, r0, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC)
-	str	r0, [r2]
-	ret	lr
-ENDPROC(cpu_cm7_proc_fin)
-
-	.section ".init.text", #alloc, #execinstr
-
-__v7m_cm7_setup:
-	mov	r8, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC| V7M_SCB_CCR_BP)
-	b	__v7m_setup_cont
-/*
- *	__v7m_setup
- *
- *	This should be able to cover all ARMv7-M cores.
- */
-__v7m_setup:
-	mov	r8, 0
-
-__v7m_setup_cont:
-	@ Configure the vector table base address
-	ldr	r0, =BASEADDR_V7M_SCB
-	ldr	r12, =vector_table
-	str	r12, [r0, V7M_SCB_VTOR]
-
-	@ enable UsageFault, BusFault and MemManage fault.
-	ldr	r5, [r0, #V7M_SCB_SHCSR]
-	orr	r5, #(V7M_SCB_SHCSR_USGFAULTENA | V7M_SCB_SHCSR_BUSFAULTENA | V7M_SCB_SHCSR_MEMFAULTENA)
-	str	r5, [r0, #V7M_SCB_SHCSR]
-
-	@ Lower the priority of the SVC and PendSV exceptions
-	mov	r5, #0x80000000
-	str	r5, [r0, V7M_SCB_SHPR2]	@ set SVC priority
-	mov	r5, #0x00800000
-	str	r5, [r0, V7M_SCB_SHPR3]	@ set PendSV priority
-
-	@ SVC to switch to handler mode. Notice that this requires sp to
-	@ point to writeable memory because the processor saves
-	@ some registers to the stack.
-	badr	r1, 1f
-	ldr	r5, [r12, #11 * 4]	@ read the SVC vector entry
-	str	r1, [r12, #11 * 4]	@ write the temporary SVC vector entry
-	dsb
-	mov	r6, lr			@ save LR
-	ldr	sp, =init_thread_union + THREAD_START_SP
-	cpsie	i
-	svc	#0
-1:	cpsid	i
-	/* Calculate exc_ret */
-	orr	r10, lr, #EXC_RET_THREADMODE_PROCESSSTACK
-	ldmia	sp, {r0-r3, r12}
-	str	r5, [r12, #11 * 4]	@ restore the original SVC vector entry
-	mov	lr, r6			@ restore LR
-
-	@ Special-purpose control register
-	mov	r1, #1
-	msr	control, r1		@ Thread mode has unpriviledged access
-
-	@ Configure caches (if implemented)
-	teq     r8, #0
-	stmiane	sp, {r0-r6, lr}		@ v7m_invalidate_l1 touches r0-r6
-	blne	v7m_invalidate_l1
-	teq     r8, #0			@ re-evalutae condition
-	ldmiane	sp, {r0-r6, lr}
-
-	@ Configure the System Control Register to ensure 8-byte stack alignment
-	@ Note the STKALIGN bit is either RW or RAO.
-	ldr	r0, [r0, V7M_SCB_CCR]   @ system control register
-	orr	r0, #V7M_SCB_CCR_STKALIGN
-	orr	r0, r0, r8
-
-	ret	lr
-ENDPROC(__v7m_setup)
-
-/*
- * Cortex-M7 processor functions
- */
-	globl_equ	cpu_cm7_proc_init,	cpu_v7m_proc_init
-	globl_equ	cpu_cm7_reset,		cpu_v7m_reset
-	globl_equ	cpu_cm7_do_idle,	cpu_v7m_do_idle
-	globl_equ	cpu_cm7_switch_mm,	cpu_v7m_switch_mm
-
-	define_processor_functions v7m, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
-	define_processor_functions cm7, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
-
-	.section ".rodata"
-	string cpu_arch_name, "armv7m"
-	string cpu_elf_name "v7m"
-	string cpu_v7m_name "ARMv7-M"
-
-	.section ".proc.info.init", #alloc
-
-.macro __v7m_proc name, initfunc, cache_fns = nop_cache_fns, hwcaps = 0,  proc_fns = v7m_processor_functions
-	.long	0			/* proc_info_list.__cpu_mm_mmu_flags */
-	.long	0			/* proc_info_list.__cpu_io_mmu_flags */
-	initfn	\initfunc, \name
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \hwcaps
-	.long	cpu_v7m_name
-	.long   \proc_fns
-	.long	0			/* proc_info_list.tlb */
-	.long	0			/* proc_info_list.user */
-	.long	\cache_fns
-.endm
-
-	/*
-	 * Match ARM Cortex-M7 processor.
-	 */
-	.type	__v7m_cm7_proc_info, #object
-__v7m_cm7_proc_info:
-	.long	0x410fc270		/* ARM Cortex-M7 0xC27 */
-	.long	0xff0ffff0		/* Mask off revision, patch release */
-	__v7m_proc __v7m_cm7_proc_info, __v7m_cm7_setup, hwcaps = HWCAP_EDSP, cache_fns = v7m_cache_fns, proc_fns = cm7_processor_functions
-	.size	__v7m_cm7_proc_info, . - __v7m_cm7_proc_info
-
-	/*
-	 * Match ARM Cortex-M4 processor.
-	 */
-	.type	__v7m_cm4_proc_info, #object
-__v7m_cm4_proc_info:
-	.long	0x410fc240		/* ARM Cortex-M4 0xC24 */
-	.long	0xff0ffff0		/* Mask off revision, patch release */
-	__v7m_proc __v7m_cm4_proc_info, __v7m_setup, hwcaps = HWCAP_EDSP
-	.size	__v7m_cm4_proc_info, . - __v7m_cm4_proc_info
-
-	/*
-	 * Match ARM Cortex-M3 processor.
-	 */
-	.type	__v7m_cm3_proc_info, #object
-__v7m_cm3_proc_info:
-	.long	0x410fc230		/* ARM Cortex-M3 0xC23 */
-	.long	0xff0ffff0		/* Mask off revision, patch release */
-	__v7m_proc __v7m_cm3_proc_info, __v7m_setup
-	.size	__v7m_cm3_proc_info, . - __v7m_cm3_proc_info
-
-	/*
-	 * Match any ARMv7-M processor core.
-	 */
-	.type	__v7m_proc_info, #object
-__v7m_proc_info:
-	.long	0x000f0000		@ Required ID value
-	.long	0x000f0000		@ Mask for ID
-	__v7m_proc __v7m_proc_info, __v7m_setup
-	.size	__v7m_proc_info, . - __v7m_proc_info
-
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
deleted file mode 100644
index 1ac0fbbe9f127f3524eee28503a9ded19790cc98..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-xsc3.S
+++ /dev/null
@@ -1,529 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/arm/mm/proc-xsc3.S
- *
- * Original Author: Matthew Gilbert
- * Current Maintainer: Lennert Buytenhek <buytenh@wantstofly.org>
- *
- * Copyright 2004 (C) Intel Corp.
- * Copyright 2005 (C) MontaVista Software, Inc.
- *
- * MMU functions for the Intel XScale3 Core (XSC3).  The XSC3 core is
- * an extension to Intel's original XScale core that adds the following
- * features:
- *
- * - ARMv6 Supersections
- * - Low Locality Reference pages (replaces mini-cache)
- * - 36-bit addressing
- * - L2 cache
- * - Cache coherency if chipset supports it
- *
- * Based on original XScale code by Nicolas Pitre.
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include "proc-macros.S"
-
-/*
- * This is the maximum size of an area which will be flushed.  If the
- * area is larger than this, then we flush the whole cache.
- */
-#define MAX_AREA_SIZE	32768
-
-/*
- * The cache line size of the L1 I, L1 D and unified L2 cache.
- */
-#define CACHELINESIZE	32
-
-/*
- * The size of the L1 D cache.
- */
-#define CACHESIZE	32768
-
-/*
- * This macro is used to wait for a CP15 write and is needed when we
- * have to ensure that the last operation to the coprocessor was
- * completed before continuing with operation.
- */
-	.macro	cpwait_ret, lr, rd
-	mrc	p15, 0, \rd, c2, c0, 0		@ arbitrary read of cp15
-	sub	pc, \lr, \rd, LSR #32		@ wait for completion and
-						@ flush instruction pipeline
-	.endm
-
-/*
- * This macro cleans and invalidates the entire L1 D cache.
- */
-
- 	.macro  clean_d_cache rd, rs
-	mov	\rd, #0x1f00
-	orr	\rd, \rd, #0x00e0
-1:	mcr	p15, 0, \rd, c7, c14, 2		@ clean/invalidate L1 D line
-	adds	\rd, \rd, #0x40000000
-	bcc	1b
-	subs	\rd, \rd, #0x20
-	bpl	1b
-	.endm
-
-	.text
-
-/*
- * cpu_xsc3_proc_init()
- *
- * Nothing too exciting at the moment
- */
-ENTRY(cpu_xsc3_proc_init)
-	ret	lr
-
-/*
- * cpu_xsc3_proc_fin()
- */
-ENTRY(cpu_xsc3_proc_fin)
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x1800			@ ...IZ...........
-	bic	r0, r0, #0x0006			@ .............CA.
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- * cpu_xsc3_reset(loc)
- *
- * Perform a soft reset of the system.  Put the CPU into the
- * same state as it would be if it had been reset, and branch
- * to what would be the reset vector.
- *
- * loc: location to jump to for soft reset
- */
-	.align	5
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_xsc3_reset)
-	mov	r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
-	msr	cpsr_c, r1			@ reset CPSR
-	mrc	p15, 0, r1, c1, c0, 0		@ ctrl register
-	bic	r1, r1, #0x3900			@ ..VIZ..S........
-	bic	r1, r1, #0x0086			@ ........B....CA.
-	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
-	mcr	p15, 0, ip, c7, c7, 0		@ invalidate L1 caches and BTB
-	bic	r1, r1, #0x0001			@ ...............M
-	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
-	@ CAUTION: MMU turned off from this point.  We count on the pipeline
-	@ already containing those two last instructions to survive.
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
-	ret	r0
-ENDPROC(cpu_xsc3_reset)
-	.popsection
-
-/*
- * cpu_xsc3_do_idle()
- *
- * Cause the processor to idle
- *
- * For now we do nothing but go to idle mode for every case
- *
- * XScale supports clock switching, but using idle mode support
- * allows external hardware to react to system state changes.
- */
-	.align	5
-
-ENTRY(cpu_xsc3_do_idle)
-	mov	r0, #1
-	mcr	p14, 0, r0, c7, c0, 0		@ go to idle
-	ret	lr
-
-/* ================================= CACHE ================================ */
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(xsc3_flush_icache_all)
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	ret	lr
-ENDPROC(xsc3_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- *
- *	Invalidate all cache entries in a particular address
- *	space.
- */
-ENTRY(xsc3_flush_user_cache_all)
-	/* FALLTHROUGH */
-
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(xsc3_flush_kern_cache_all)
-	mov	r2, #VM_EXEC
-	mov	ip, #0
-__flush_whole_cache:
-	clean_d_cache r0, r1
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate L1 I cache and BTB
-	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
-	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
-	ret	lr
-
-/*
- *	flush_user_cache_range(start, end, vm_flags)
- *
- *	Invalidate a range of cache entries in the specified
- *	address space.
- *
- *	- start - start address (may not be aligned)
- *	- end	- end address (exclusive, may not be aligned)
- *	- vma	- vma_area_struct describing address space
- */
-	.align	5
-ENTRY(xsc3_flush_user_cache_range)
-	mov	ip, #0
-	sub	r3, r1, r0			@ calculate total size
-	cmp	r3, #MAX_AREA_SIZE
-	bhs	__flush_whole_cache
-
-1:	tst	r2, #VM_EXEC
-	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate L1 I line
-	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
-	add	r0, r0, #CACHELINESIZE
-	cmp	r0, r1
-	blo	1b
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
-	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
-	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
-	ret	lr
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the I cache and the D cache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- *
- *	Note: single I-cache line invalidation isn't used here since
- *	it also trashes the mini I-cache used by JTAG debuggers.
- */
-ENTRY(xsc3_coherent_kern_range)
-/* FALLTHROUGH */
-ENTRY(xsc3_coherent_user_range)
-	bic	r0, r0, #CACHELINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
-	add	r0, r0, #CACHELINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate L1 I cache and BTB
-	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
-	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
-	ret	lr
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure no D cache aliasing occurs, either with itself or
- *	the I cache.
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(xsc3_flush_kern_dcache_area)
-	add	r1, r0, r1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
-	add	r0, r0, #CACHELINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate L1 I cache and BTB
-	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
-	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
-	ret	lr
-
-/*
- *	dma_inv_range(start, end)
- *
- *	Invalidate (discard) the specified virtual address range.
- *	May not write back any entries.  If 'start' or 'end'
- *	are not cache line aligned, those lines must be written
- *	back.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-xsc3_dma_inv_range:
-	tst	r0, #CACHELINESIZE - 1
-	bic	r0, r0, #CACHELINESIZE - 1
-	mcrne	p15, 0, r0, c7, c10, 1		@ clean L1 D line
-	tst	r1, #CACHELINESIZE - 1
-	mcrne	p15, 0, r1, c7, c10, 1		@ clean L1 D line
-1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate L1 D line
-	add	r0, r0, #CACHELINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
-	ret	lr
-
-/*
- *	dma_clean_range(start, end)
- *
- *	Clean the specified virtual address range.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-xsc3_dma_clean_range:
-	bic	r0, r0, #CACHELINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
-	add	r0, r0, #CACHELINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
-	ret	lr
-
-/*
- *	dma_flush_range(start, end)
- *
- *	Clean and invalidate the specified virtual address range.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-ENTRY(xsc3_dma_flush_range)
-	bic	r0, r0, #CACHELINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
-	add	r0, r0, #CACHELINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
-	ret	lr
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(xsc3_dma_map_area)
-	add	r1, r1, r0
-	cmp	r2, #DMA_TO_DEVICE
-	beq	xsc3_dma_clean_range
-	bcs	xsc3_dma_inv_range
-	b	xsc3_dma_flush_range
-ENDPROC(xsc3_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(xsc3_dma_unmap_area)
-	ret	lr
-ENDPROC(xsc3_dma_unmap_area)
-
-	.globl	xsc3_flush_kern_cache_louis
-	.equ	xsc3_flush_kern_cache_louis, xsc3_flush_kern_cache_all
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions xsc3
-
-ENTRY(cpu_xsc3_dcache_clean_area)
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
-	add	r0, r0, #CACHELINESIZE
-	subs	r1, r1, #CACHELINESIZE
-	bhi	1b
-	ret	lr
-
-/* =============================== PageTable ============================== */
-
-/*
- * cpu_xsc3_switch_mm(pgd)
- *
- * Set the translation base pointer to be as described by pgd.
- *
- * pgd: new page tables
- */
-	.align	5
-ENTRY(cpu_xsc3_switch_mm)
-	clean_d_cache r1, r2
-	mcr	p15, 0, ip, c7, c5, 0		@ invalidate L1 I cache and BTB
-	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
-	mcr	p15, 0, ip, c7, c5, 4		@ prefetch flush
-	orr	r0, r0, #0x18			@ cache the page table in L2
-	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
-	cpwait_ret lr, ip
-
-/*
- * cpu_xsc3_set_pte_ext(ptep, pte, ext)
- *
- * Set a PTE and flush it out
- */
-cpu_xsc3_mt_table:
-	.long	0x00						@ L_PTE_MT_UNCACHED
-	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_BUFFERABLE
-	.long	PTE_EXT_TEX(5) | PTE_CACHEABLE			@ L_PTE_MT_WRITETHROUGH
-	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_WRITEBACK
-	.long	PTE_EXT_TEX(1) | PTE_BUFFERABLE			@ L_PTE_MT_DEV_SHARED
-	.long	0x00						@ unused
-	.long	0x00						@ L_PTE_MT_MINICACHE (not present)
-	.long	PTE_EXT_TEX(5) | PTE_CACHEABLE | PTE_BUFFERABLE	@ L_PTE_MT_WRITEALLOC (not present?)
-	.long	0x00						@ unused
-	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_DEV_WC
-	.long	0x00						@ unused
-	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_DEV_CACHED
-	.long	PTE_EXT_TEX(2)					@ L_PTE_MT_DEV_NONSHARED
-	.long	0x00						@ unused
-	.long	0x00						@ unused
-	.long	0x00						@ unused
-
-	.align	5
-ENTRY(cpu_xsc3_set_pte_ext)
-	xscale_set_pte_ext_prologue
-
-	tst	r1, #L_PTE_SHARED		@ shared?
-	and	r1, r1, #L_PTE_MT_MASK
-	adr	ip, cpu_xsc3_mt_table
-	ldr	ip, [ip, r1]
-	orrne	r2, r2, #PTE_EXT_COHERENT	@ interlock: mask in coherent bit
-	bic	r2, r2, #0x0c			@ clear old C,B bits
-	orr	r2, r2, ip
-
-	xscale_set_pte_ext_epilogue
-	ret	lr
-
-	.ltorg
-	.align
-
-.globl	cpu_xsc3_suspend_size
-.equ	cpu_xsc3_suspend_size, 4 * 6
-#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_xsc3_do_suspend)
-	stmfd	sp!, {r4 - r9, lr}
-	mrc	p14, 0, r4, c6, c0, 0	@ clock configuration, for turbo mode
-	mrc	p15, 0, r5, c15, c1, 0	@ CP access reg
-	mrc	p15, 0, r6, c13, c0, 0	@ PID
-	mrc 	p15, 0, r7, c3, c0, 0	@ domain ID
-	mrc	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
-	mrc 	p15, 0, r9, c1, c0, 0	@ control reg
-	bic	r4, r4, #2		@ clear frequency change bit
-	stmia	r0, {r4 - r9}		@ store cp regs
-	ldmia	sp!, {r4 - r9, pc}
-ENDPROC(cpu_xsc3_do_suspend)
-
-ENTRY(cpu_xsc3_do_resume)
-	ldmia	r0, {r4 - r9}		@ load cp regs
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I & D caches, BTB
-	mcr	p15, 0, ip, c7, c10, 4	@ drain write (&fill) buffer
-	mcr	p15, 0, ip, c7, c5, 4	@ flush prefetch buffer
-	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I & D TLBs
-	mcr	p14, 0, r4, c6, c0, 0	@ clock configuration, turbo mode.
-	mcr	p15, 0, r5, c15, c1, 0	@ CP access reg
-	mcr	p15, 0, r6, c13, c0, 0	@ PID
-	mcr	p15, 0, r7, c3, c0, 0	@ domain ID
-	orr	r1, r1, #0x18		@ cache the page table in L2
-	mcr	p15, 0, r1, c2, c0, 0	@ translation table base addr
-	mcr	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
-	mov	r0, r9			@ control register
-	b	cpu_resume_mmu
-ENDPROC(cpu_xsc3_do_resume)
-#endif
-
-	.type	__xsc3_setup, #function
-__xsc3_setup:
-	mov	r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
-	msr	cpsr_c, r0
-	mcr	p15, 0, ip, c7, c7, 0		@ invalidate L1 caches and BTB
-	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
-	mcr	p15, 0, ip, c7, c5, 4		@ prefetch flush
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
-	orr	r4, r4, #0x18			@ cache the page table in L2
-	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
-
-	mov	r0, #1 << 6			@ cp6 access for early sched_clock
-	mcr	p15, 0, r0, c15, c1, 0		@ write CP access register
-
-	mrc	p15, 0, r0, c1, c0, 1		@ get auxiliary control reg
-	and	r0, r0, #2			@ preserve bit P bit setting
-	orr	r0, r0, #(1 << 10)		@ enable L2 for LLR cache
-	mcr	p15, 0, r0, c1, c0, 1		@ set auxiliary control reg
-
-	adr	r5, xsc3_crval
-	ldmia	r5, {r5, r6}
-
-#ifdef CONFIG_CACHE_XSC3L2
-	mrc	p15, 1, r0, c0, c0, 1		@ get L2 present information
-	ands	r0, r0, #0xf8
-	orrne	r6, r6, #(1 << 26)		@ enable L2 if present
-#endif
-
-	mrc	p15, 0, r0, c1, c0, 0		@ get control register
-	bic	r0, r0, r5			@ ..V. ..R. .... ..A.
-	orr	r0, r0, r6			@ ..VI Z..S .... .C.M (mmu)
-						@ ...I Z..S .... .... (uc)
-	ret	lr
-
-	.size	__xsc3_setup, . - __xsc3_setup
-
-	.type	xsc3_crval, #object
-xsc3_crval:
-	crval	clear=0x04002202, mmuset=0x00003905, ucset=0x00001900
-
-	__INITDATA
-
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions xsc3, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv5te"
-	string	cpu_elf_name, "v5"
-	string	cpu_xsc3_name, "XScale-V3 based processor"
-
-	.align
-
-	.section ".proc.info.init", #alloc
-
-.macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req
-	.type	__\name\()_proc_info,#object
-__\name\()_proc_info:
-	.long	\cpu_val
-	.long	\cpu_mask
-	.long	PMD_TYPE_SECT | \
-		PMD_SECT_BUFFERABLE | \
-		PMD_SECT_CACHEABLE | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	.long	PMD_TYPE_SECT | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	initfn	__xsc3_setup, __\name\()_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
-	.long	cpu_xsc3_name
-	.long	xsc3_processor_functions
-	.long	v4wbi_tlb_fns
-	.long	xsc3_mc_user_fns
-	.long	xsc3_cache_fns
-	.size	__\name\()_proc_info, . - __\name\()_proc_info
-.endm
-
-	xsc3_proc_info xsc3, 0x69056000, 0xffffe000
-
-/* Note: PXA935 changed its implementor ID from Intel to Marvell */
-	xsc3_proc_info xsc3_pxa935, 0x56056000, 0xffffe000
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
deleted file mode 100644
index bdb2b7749b0393dec09fc39236c32d72cdfa1f06..0000000000000000000000000000000000000000
--- a/arch/arm/mm/proc-xscale.S
+++ /dev/null
@@ -1,658 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/proc-xscale.S
- *
- *  Author:	Nicolas Pitre
- *  Created:	November 2000
- *  Copyright:	(C) 2000, 2001 MontaVista Software Inc.
- *
- * MMU functions for the Intel XScale CPUs
- *
- * 2001 Aug 21:
- *	some contributions by Brett Gaines <brett.w.gaines@intel.com>
- *	Copyright 2001 by Intel Corp.
- *
- * 2001 Sep 08:
- *	Completely revisited, many important fixes
- *	Nicolas Pitre <nico@fluxnic.net>
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include "proc-macros.S"
-
-/*
- * This is the maximum size of an area which will be flushed.  If the area
- * is larger than this, then we flush the whole cache
- */
-#define MAX_AREA_SIZE	32768
-
-/*
- * the cache line size of the I and D cache
- */
-#define CACHELINESIZE	32
-
-/*
- * the size of the data cache
- */
-#define CACHESIZE	32768
-
-/*
- * Virtual address used to allocate the cache when flushed
- *
- * This must be an address range which is _never_ used.  It should
- * apparently have a mapping in the corresponding page table for
- * compatibility with future CPUs that _could_ require it.  For instance we
- * don't care.
- *
- * This must be aligned on a 2*CACHESIZE boundary.  The code selects one of
- * the 2 areas in alternance each time the clean_d_cache macro is used.
- * Without this the XScale core exhibits cache eviction problems and no one
- * knows why.
- *
- * Reminder: the vector table is located at 0xffff0000-0xffff0fff.
- */
-#define CLEAN_ADDR	0xfffe0000
-
-/*
- * This macro is used to wait for a CP15 write and is needed
- * when we have to ensure that the last operation to the co-pro
- * was completed before continuing with operation.
- */
-	.macro	cpwait, rd
-	mrc	p15, 0, \rd, c2, c0, 0		@ arbitrary read of cp15
-	mov	\rd, \rd			@ wait for completion
-	sub 	pc, pc, #4			@ flush instruction pipeline
-	.endm
-
-	.macro	cpwait_ret, lr, rd
-	mrc	p15, 0, \rd, c2, c0, 0		@ arbitrary read of cp15
-	sub	pc, \lr, \rd, LSR #32		@ wait for completion and
-						@ flush instruction pipeline
-	.endm
-
-/*
- * This macro cleans the entire dcache using line allocate.
- * The main loop has been unrolled to reduce loop overhead.
- * rd and rs are two scratch registers.
- */
-	.macro  clean_d_cache, rd, rs
-	ldr	\rs, =clean_addr
-	ldr	\rd, [\rs]
-	eor	\rd, \rd, #CACHESIZE
-	str	\rd, [\rs]
-	add	\rs, \rd, #CACHESIZE
-1:	mcr	p15, 0, \rd, c7, c2, 5		@ allocate D cache line
-	add	\rd, \rd, #CACHELINESIZE
-	mcr	p15, 0, \rd, c7, c2, 5		@ allocate D cache line
-	add	\rd, \rd, #CACHELINESIZE
-	mcr	p15, 0, \rd, c7, c2, 5		@ allocate D cache line
-	add	\rd, \rd, #CACHELINESIZE
-	mcr	p15, 0, \rd, c7, c2, 5		@ allocate D cache line
-	add	\rd, \rd, #CACHELINESIZE
-	teq	\rd, \rs
-	bne	1b
-	.endm
-
-	.data
-	.align	2
-clean_addr:	.word	CLEAN_ADDR
-
-	.text
-
-/*
- * cpu_xscale_proc_init()
- *
- * Nothing too exciting at the moment
- */
-ENTRY(cpu_xscale_proc_init)
-	@ enable write buffer coalescing. Some bootloader disable it
-	mrc	p15, 0, r1, c1, c0, 1
-	bic	r1, r1, #1
-	mcr	p15, 0, r1, c1, c0, 1
-	ret	lr
-
-/*
- * cpu_xscale_proc_fin()
- */
-ENTRY(cpu_xscale_proc_fin)
-	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
-	bic	r0, r0, #0x1800			@ ...IZ...........
-	bic	r0, r0, #0x0006			@ .............CA.
-	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
-	ret	lr
-
-/*
- * cpu_xscale_reset(loc)
- *
- * Perform a soft reset of the system.  Put the CPU into the
- * same state as it would be if it had been reset, and branch
- * to what would be the reset vector.
- *
- * loc: location to jump to for soft reset
- *
- * Beware PXA270 erratum E7.
- */
-	.align	5
-	.pushsection	.idmap.text, "ax"
-ENTRY(cpu_xscale_reset)
-	mov	r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
-	msr	cpsr_c, r1			@ reset CPSR
-	mcr	p15, 0, r1, c10, c4, 1		@ unlock I-TLB
-	mcr	p15, 0, r1, c8, c5, 0		@ invalidate I-TLB
-	mrc	p15, 0, r1, c1, c0, 0		@ ctrl register
-	bic	r1, r1, #0x0086			@ ........B....CA.
-	bic	r1, r1, #0x3900			@ ..VIZ..S........
-	sub	pc, pc, #4			@ flush pipeline
-	@ *** cache line aligned ***
-	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
-	bic	r1, r1, #0x0001			@ ...............M
-	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches & BTB
-	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
-	@ CAUTION: MMU turned off from this point. We count on the pipeline
-	@ already containing those two last instructions to survive.
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-	ret	r0
-ENDPROC(cpu_xscale_reset)
-	.popsection
-
-/*
- * cpu_xscale_do_idle()
- *
- * Cause the processor to idle
- *
- * For now we do nothing but go to idle mode for every case
- *
- * XScale supports clock switching, but using idle mode support
- * allows external hardware to react to system state changes.
- */
-	.align	5
-
-ENTRY(cpu_xscale_do_idle)
-	mov	r0, #1
-	mcr	p14, 0, r0, c7, c0, 0		@ Go to IDLE
-	ret	lr
-
-/* ================================= CACHE ================================ */
-
-/*
- *	flush_icache_all()
- *
- *	Unconditionally clean and invalidate the entire icache.
- */
-ENTRY(xscale_flush_icache_all)
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
-	ret	lr
-ENDPROC(xscale_flush_icache_all)
-
-/*
- *	flush_user_cache_all()
- *
- *	Invalidate all cache entries in a particular address
- *	space.
- */
-ENTRY(xscale_flush_user_cache_all)
-	/* FALLTHROUGH */
-
-/*
- *	flush_kern_cache_all()
- *
- *	Clean and invalidate the entire cache.
- */
-ENTRY(xscale_flush_kern_cache_all)
-	mov	r2, #VM_EXEC
-	mov	ip, #0
-__flush_whole_cache:
-	clean_d_cache r0, r1
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c5, 0		@ Invalidate I cache & BTB
-	mcrne	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	ret	lr
-
-/*
- *	flush_user_cache_range(start, end, vm_flags)
- *
- *	Invalidate a range of cache entries in the specified
- *	address space.
- *
- *	- start - start address (may not be aligned)
- *	- end	- end address (exclusive, may not be aligned)
- *	- vma	- vma_area_struct describing address space
- */
-	.align	5
-ENTRY(xscale_flush_user_cache_range)
-	mov	ip, #0
-	sub	r3, r1, r0			@ calculate total size
-	cmp	r3, #MAX_AREA_SIZE
-	bhs	__flush_whole_cache
-
-1:	tst	r2, #VM_EXEC
-	mcrne	p15, 0, r0, c7, c5, 1		@ Invalidate I cache line
-	mcr	p15, 0, r0, c7, c10, 1		@ Clean D cache line
-	mcr	p15, 0, r0, c7, c6, 1		@ Invalidate D cache line
-	add	r0, r0, #CACHELINESIZE
-	cmp	r0, r1
-	blo	1b
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, ip, c7, c5, 6		@ Invalidate BTB
-	mcrne	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	ret	lr
-
-/*
- *	coherent_kern_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- *
- *	Note: single I-cache line invalidation isn't used here since
- *	it also trashes the mini I-cache used by JTAG debuggers.
- */
-ENTRY(xscale_coherent_kern_range)
-	bic	r0, r0, #CACHELINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHELINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ Invalidate I cache & BTB
-	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	ret	lr
-
-/*
- *	coherent_user_range(start, end)
- *
- *	Ensure coherency between the Icache and the Dcache in the
- *	region described by start.  If you have non-snooping
- *	Harvard caches, you need to implement this function.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-ENTRY(xscale_coherent_user_range)
-	bic	r0, r0, #CACHELINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, r0, c7, c5, 1		@ Invalidate I cache entry
-	add	r0, r0, #CACHELINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 6		@ Invalidate BTB
-	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	ret	lr
-
-/*
- *	flush_kern_dcache_area(void *addr, size_t size)
- *
- *	Ensure no D cache aliasing occurs, either with itself or
- *	the I cache
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(xscale_flush_kern_dcache_area)
-	add	r1, r0, r1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHELINESIZE
-	cmp	r0, r1
-	blo	1b
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 0		@ Invalidate I cache & BTB
-	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	ret	lr
-
-/*
- *	dma_inv_range(start, end)
- *
- *	Invalidate (discard) the specified virtual address range.
- *	May not write back any entries.  If 'start' or 'end'
- *	are not cache line aligned, those lines must be written
- *	back.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-xscale_dma_inv_range:
-	tst	r0, #CACHELINESIZE - 1
-	bic	r0, r0, #CACHELINESIZE - 1
-	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
-	tst	r1, #CACHELINESIZE - 1
-	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
-1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHELINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	ret	lr
-
-/*
- *	dma_clean_range(start, end)
- *
- *	Clean the specified virtual address range.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-xscale_dma_clean_range:
-	bic	r0, r0, #CACHELINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHELINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	ret	lr
-
-/*
- *	dma_flush_range(start, end)
- *
- *	Clean and invalidate the specified virtual address range.
- *
- *	- start  - virtual start address
- *	- end	 - virtual end address
- */
-ENTRY(xscale_dma_flush_range)
-	bic	r0, r0, #CACHELINESIZE - 1
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
-	add	r0, r0, #CACHELINESIZE
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	ret	lr
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(xscale_dma_map_area)
-	add	r1, r1, r0
-	cmp	r2, #DMA_TO_DEVICE
-	beq	xscale_dma_clean_range
-	bcs	xscale_dma_inv_range
-	b	xscale_dma_flush_range
-ENDPROC(xscale_dma_map_area)
-
-/*
- *	dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(xscale_80200_A0_A1_dma_map_area)
-	add	r1, r1, r0
-	teq	r2, #DMA_TO_DEVICE
-	beq	xscale_dma_clean_range
-	b	xscale_dma_flush_range
-ENDPROC(xscale_80200_A0_A1_dma_map_area)
-
-/*
- *	dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(xscale_dma_unmap_area)
-	ret	lr
-ENDPROC(xscale_dma_unmap_area)
-
-	.globl	xscale_flush_kern_cache_louis
-	.equ	xscale_flush_kern_cache_louis, xscale_flush_kern_cache_all
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions xscale
-
-/*
- * On stepping A0/A1 of the 80200, invalidating D-cache by line doesn't
- * clear the dirty bits, which means that if we invalidate a dirty line,
- * the dirty data can still be written back to external memory later on.
- *
- * The recommended workaround is to always do a clean D-cache line before
- * doing an invalidate D-cache line, so on the affected processors,
- * dma_inv_range() is implemented as dma_flush_range().
- *
- * See erratum #25 of "Intel 80200 Processor Specification Update",
- * revision January 22, 2003, available at:
- *     http://www.intel.com/design/iio/specupdt/273415.htm
- */
-.macro a0_alias basename
-	.globl xscale_80200_A0_A1_\basename
-	.type xscale_80200_A0_A1_\basename , %function
-	.equ xscale_80200_A0_A1_\basename , xscale_\basename
-.endm
-
-/*
- * Most of the cache functions are unchanged for these processor revisions.
- * Export suitable alias symbols for the unchanged functions:
- */
-	a0_alias flush_icache_all
-	a0_alias flush_user_cache_all
-	a0_alias flush_kern_cache_all
-	a0_alias flush_kern_cache_louis
-	a0_alias flush_user_cache_range
-	a0_alias coherent_kern_range
-	a0_alias coherent_user_range
-	a0_alias flush_kern_dcache_area
-	a0_alias dma_flush_range
-	a0_alias dma_unmap_area
-
-	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
-	define_cache_functions xscale_80200_A0_A1
-
-ENTRY(cpu_xscale_dcache_clean_area)
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-	add	r0, r0, #CACHELINESIZE
-	subs	r1, r1, #CACHELINESIZE
-	bhi	1b
-	ret	lr
-
-/* =============================== PageTable ============================== */
-
-/*
- * cpu_xscale_switch_mm(pgd)
- *
- * Set the translation base pointer to be as described by pgd.
- *
- * pgd: new page tables
- */
-	.align	5
-ENTRY(cpu_xscale_switch_mm)
-	clean_d_cache r1, r2
-	mcr	p15, 0, ip, c7, c5, 0		@ Invalidate I cache & BTB
-	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
-	cpwait_ret lr, ip
-
-/*
- * cpu_xscale_set_pte_ext(ptep, pte, ext)
- *
- * Set a PTE and flush it out
- *
- * Errata 40: must set memory to write-through for user read-only pages.
- */
-cpu_xscale_mt_table:
-	.long	0x00						@ L_PTE_MT_UNCACHED
-	.long	PTE_BUFFERABLE					@ L_PTE_MT_BUFFERABLE
-	.long	PTE_CACHEABLE					@ L_PTE_MT_WRITETHROUGH
-	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_WRITEBACK
-	.long	PTE_EXT_TEX(1) | PTE_BUFFERABLE			@ L_PTE_MT_DEV_SHARED
-	.long	0x00						@ unused
-	.long	PTE_EXT_TEX(1) | PTE_CACHEABLE			@ L_PTE_MT_MINICACHE
-	.long	PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE	@ L_PTE_MT_WRITEALLOC
-	.long	0x00						@ unused
-	.long	PTE_BUFFERABLE					@ L_PTE_MT_DEV_WC
-	.long	0x00						@ unused
-	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_DEV_CACHED
-	.long	0x00						@ L_PTE_MT_DEV_NONSHARED
-	.long	0x00						@ unused
-	.long	0x00						@ unused
-	.long	0x00						@ unused
-
-	.align	5
-ENTRY(cpu_xscale_set_pte_ext)
-	xscale_set_pte_ext_prologue
-
-	@
-	@ Erratum 40: must set memory to write-through for user read-only pages
-	@
-	and	ip, r1, #(L_PTE_MT_MASK | L_PTE_USER | L_PTE_RDONLY) & ~(4 << 2)
-	teq	ip, #L_PTE_MT_WRITEBACK | L_PTE_USER | L_PTE_RDONLY
-
-	moveq	r1, #L_PTE_MT_WRITETHROUGH
-	and	r1, r1, #L_PTE_MT_MASK
-	adr	ip, cpu_xscale_mt_table
-	ldr	ip, [ip, r1]
-	bic	r2, r2, #0x0c
-	orr	r2, r2, ip
-
-	xscale_set_pte_ext_epilogue
-	ret	lr
-
-	.ltorg
-	.align
-
-.globl	cpu_xscale_suspend_size
-.equ	cpu_xscale_suspend_size, 4 * 6
-#ifdef CONFIG_ARM_CPU_SUSPEND
-ENTRY(cpu_xscale_do_suspend)
-	stmfd	sp!, {r4 - r9, lr}
-	mrc	p14, 0, r4, c6, c0, 0	@ clock configuration, for turbo mode
-	mrc	p15, 0, r5, c15, c1, 0	@ CP access reg
-	mrc	p15, 0, r6, c13, c0, 0	@ PID
-	mrc	p15, 0, r7, c3, c0, 0	@ domain ID
-	mrc	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
-	mrc	p15, 0, r9, c1, c0, 0	@ control reg
-	bic	r4, r4, #2		@ clear frequency change bit
-	stmia	r0, {r4 - r9}		@ store cp regs
-	ldmfd	sp!, {r4 - r9, pc}
-ENDPROC(cpu_xscale_do_suspend)
-
-ENTRY(cpu_xscale_do_resume)
-	ldmia	r0, {r4 - r9}		@ load cp regs
-	mov	ip, #0
-	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I & D TLBs
-	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I & D caches, BTB
-	mcr	p14, 0, r4, c6, c0, 0	@ clock configuration, turbo mode.
-	mcr	p15, 0, r5, c15, c1, 0	@ CP access reg
-	mcr	p15, 0, r6, c13, c0, 0	@ PID
-	mcr	p15, 0, r7, c3, c0, 0	@ domain ID
-	mcr	p15, 0, r1, c2, c0, 0	@ translation table base addr
-	mcr	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
-	mov	r0, r9			@ control register
-	b	cpu_resume_mmu
-ENDPROC(cpu_xscale_do_resume)
-#endif
-
-	.type	__xscale_setup, #function
-__xscale_setup:
-	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I, D caches & BTB
-	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I, D TLBs
-	mov	r0, #1 << 6			@ cp6 for IOP3xx and Bulverde
-	orr	r0, r0, #1 << 13		@ Its undefined whether this
-	mcr	p15, 0, r0, c15, c1, 0		@ affects USR or SVC modes
-
-	adr	r5, xscale_crval
-	ldmia	r5, {r5, r6}
-	mrc	p15, 0, r0, c1, c0, 0		@ get control register
-	bic	r0, r0, r5
-	orr	r0, r0, r6
-	ret	lr
-	.size	__xscale_setup, . - __xscale_setup
-
-	/*
-	 *  R
-	 * .RVI ZFRS BLDP WCAM
-	 * ..11 1.01 .... .101
-	 * 
-	 */
-	.type	xscale_crval, #object
-xscale_crval:
-	crval	clear=0x00003b07, mmuset=0x00003905, ucset=0x00001900
-
-	__INITDATA
-
-	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions xscale, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1
-
-	.section ".rodata"
-
-	string	cpu_arch_name, "armv5te"
-	string	cpu_elf_name, "v5"
-
-	string	cpu_80200_A0_A1_name, "XScale-80200 A0/A1"
-	string	cpu_80200_name, "XScale-80200"
-	string	cpu_80219_name, "XScale-80219"
-	string	cpu_8032x_name, "XScale-IOP8032x Family"
-	string	cpu_8033x_name, "XScale-IOP8033x Family"
-	string	cpu_pxa250_name, "XScale-PXA250"
-	string	cpu_pxa210_name, "XScale-PXA210"
-	string	cpu_ixp42x_name, "XScale-IXP42x Family"
-	string	cpu_ixp43x_name, "XScale-IXP43x Family"
-	string	cpu_ixp46x_name, "XScale-IXP46x Family"
-	string	cpu_ixp2400_name, "XScale-IXP2400"
-	string	cpu_ixp2800_name, "XScale-IXP2800"
-	string	cpu_pxa255_name, "XScale-PXA255"
-	string	cpu_pxa270_name, "XScale-PXA270"
-
-	.align
-
-	.section ".proc.info.init", #alloc
-
-.macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache
-	.type	__\name\()_proc_info,#object
-__\name\()_proc_info:
-	.long	\cpu_val
-	.long	\cpu_mask
-	.long	PMD_TYPE_SECT | \
-		PMD_SECT_BUFFERABLE | \
-		PMD_SECT_CACHEABLE | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	.long	PMD_TYPE_SECT | \
-		PMD_SECT_AP_WRITE | \
-		PMD_SECT_AP_READ
-	initfn	__xscale_setup, __\name\()_proc_info
-	.long	cpu_arch_name
-	.long	cpu_elf_name
-	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
-	.long	\cpu_name
-	.long	xscale_processor_functions
-	.long	v4wbi_tlb_fns
-	.long	xscale_mc_user_fns
-	.ifb \cache
-		.long	xscale_cache_fns
-	.else
-		.long	\cache
-	.endif
-	.size	__\name\()_proc_info, . - __\name\()_proc_info
-.endm
-
-	xscale_proc_info 80200_A0_A1, 0x69052000, 0xfffffffe, cpu_80200_name, \
-		cache=xscale_80200_A0_A1_cache_fns
-	xscale_proc_info 80200, 0x69052000, 0xfffffff0, cpu_80200_name
-	xscale_proc_info 80219, 0x69052e20, 0xffffffe0, cpu_80219_name
-	xscale_proc_info 8032x, 0x69052420, 0xfffff7e0, cpu_8032x_name
-	xscale_proc_info 8033x, 0x69054010, 0xfffffd30, cpu_8033x_name
-	xscale_proc_info pxa250, 0x69052100, 0xfffff7f0, cpu_pxa250_name
-	xscale_proc_info pxa210, 0x69052120, 0xfffff3f0, cpu_pxa210_name
-	xscale_proc_info ixp2400, 0x69054190, 0xfffffff0, cpu_ixp2400_name
-	xscale_proc_info ixp2800, 0x690541a0, 0xfffffff0, cpu_ixp2800_name
-	xscale_proc_info ixp42x, 0x690541c0, 0xffffffc0, cpu_ixp42x_name
-	xscale_proc_info ixp43x, 0x69054040, 0xfffffff0, cpu_ixp43x_name
-	xscale_proc_info ixp46x, 0x69054200, 0xffffff00, cpu_ixp46x_name
-	xscale_proc_info pxa255, 0x69052d00, 0xfffffff0, cpu_pxa255_name
-	xscale_proc_info pxa270, 0x69054110, 0xfffffff0, cpu_pxa270_name
diff --git a/arch/arm/mm/pv-fixup-asm.S b/arch/arm/mm/pv-fixup-asm.S
deleted file mode 100644
index 769778928356e01e50d023f4924e3df57944999b..0000000000000000000000000000000000000000
--- a/arch/arm/mm/pv-fixup-asm.S
+++ /dev/null
@@ -1,85 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) 2015 Russell King
- *
- * This assembly is required to safely remap the physical address space
- * for Keystone 2
- */
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/cp15.h>
-#include <asm/memory.h>
-#include <asm/pgtable.h>
-
-	.section ".idmap.text", "ax"
-
-#define L1_ORDER 3
-#define L2_ORDER 3
-
-ENTRY(lpae_pgtables_remap_asm)
-	stmfd	sp!, {r4-r8, lr}
-
-	mrc	p15, 0, r8, c1, c0, 0		@ read control reg
-	bic	ip, r8, #CR_M			@ disable caches and MMU
-	mcr	p15, 0, ip, c1, c0, 0
-	dsb
-	isb
-
-	/* Update level 2 entries covering the kernel */
-	ldr	r6, =(_end - 1)
-	add	r7, r2, #0x1000
-	add	r6, r7, r6, lsr #SECTION_SHIFT - L2_ORDER
-	add	r7, r7, #PAGE_OFFSET >> (SECTION_SHIFT - L2_ORDER)
-1:	ldrd	r4, r5, [r7]
-	adds	r4, r4, r0
-	adc	r5, r5, r1
-	strd	r4, r5, [r7], #1 << L2_ORDER
-	cmp	r7, r6
-	bls	1b
-
-	/* Update level 2 entries for the boot data */
-	add	r7, r2, #0x1000
-	add	r7, r7, r3, lsr #SECTION_SHIFT - L2_ORDER
-	bic	r7, r7, #(1 << L2_ORDER) - 1
-	ldrd	r4, r5, [r7]
-	adds	r4, r4, r0
-	adc	r5, r5, r1
-	strd	r4, r5, [r7], #1 << L2_ORDER
-	ldrd	r4, r5, [r7]
-	adds	r4, r4, r0
-	adc	r5, r5, r1
-	strd	r4, r5, [r7]
-
-	/* Update level 1 entries */
-	mov	r6, #4
-	mov	r7, r2
-2:	ldrd	r4, r5, [r7]
-	adds	r4, r4, r0
-	adc	r5, r5, r1
-	strd	r4, r5, [r7], #1 << L1_ORDER
-	subs	r6, r6, #1
-	bne	2b
-
-	mrrc	p15, 0, r4, r5, c2		@ read TTBR0
-	adds	r4, r4, r0			@ update physical address
-	adc	r5, r5, r1
-	mcrr	p15, 0, r4, r5, c2		@ write back TTBR0
-	mrrc	p15, 1, r4, r5, c2		@ read TTBR1
-	adds	r4, r4, r0			@ update physical address
-	adc	r5, r5, r1
-	mcrr	p15, 1, r4, r5, c2		@ write back TTBR1
-
-	dsb
-
-	mov	ip, #0
-	mcr	p15, 0, ip, c7, c5, 0		@ I+BTB cache invalidate
-	mcr	p15, 0, ip, c8, c7, 0		@ local_flush_tlb_all()
-	dsb
-	isb
-
-	mcr	p15, 0, r8, c1, c0, 0		@ re-enable MMU
-	dsb
-	isb
-
-	ldmfd	sp!, {r4-r8, pc}
-ENDPROC(lpae_pgtables_remap_asm)
diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S
deleted file mode 100644
index def6161ec4523d5579ba9f99e1be3a1589af0081..0000000000000000000000000000000000000000
--- a/arch/arm/mm/tlb-fa.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/tlb-fa.S
- *
- *  Copyright (C) 2005 Faraday Corp.
- *  Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
- *
- * Based on tlb-v4wbi.S:
- *  Copyright (C) 1997-2002 Russell King
- *
- *  ARM architecture version 4, Faraday variation.
- *  This assume an unified TLBs, with a write buffer, and branch target buffer (BTB)
- *
- *  Processors: FA520 FA526 FA626
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/tlbflush.h>
-#include "proc-macros.S"
-
-
-/*
- *	flush_user_tlb_range(start, end, mm)
- *
- *	Invalidate a range of TLB entries in the specified address space.
- *
- *	- start - range start address
- *	- end   - range end address
- *	- mm    - mm_struct describing address space
- */
-	.align	4
-ENTRY(fa_flush_user_tlb_range)
-	vma_vm_mm ip, r2
-	act_mm	r3				@ get current->active_mm
-	eors	r3, ip, r3			@ == mm ?
-	retne	lr				@ no, we dont do anything
-	mov	r3, #0
-	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
-	bic	r0, r0, #0x0ff
-	bic	r0, r0, #0xf00
-1:	mcr	p15, 0, r0, c8, c7, 1		@ invalidate UTLB entry
-	add	r0, r0, #PAGE_SZ
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r3, c7, c10, 4		@ data write barrier
-	ret	lr
-
-
-ENTRY(fa_flush_kern_tlb_range)
-	mov	r3, #0
-	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
-	bic	r0, r0, #0x0ff
-	bic	r0, r0, #0xf00
-1:	mcr	p15, 0, r0, c8, c7, 1		@ invalidate UTLB entry
-	add	r0, r0, #PAGE_SZ
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r3, c7, c10, 4		@ data write barrier
-	mcr	p15, 0, r3, c7, c5, 4		@ prefetch flush (isb)
-	ret	lr
-
-	__INITDATA
-
-	/* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
-	define_tlb_functions fa, fa_tlb_flags
diff --git a/arch/arm/mm/tlb-v4.S b/arch/arm/mm/tlb-v4.S
deleted file mode 100644
index b962b4e751584f7080c6ca4e55b61ed8b39dd622..0000000000000000000000000000000000000000
--- a/arch/arm/mm/tlb-v4.S
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/tlbv4.S
- *
- *  Copyright (C) 1997-2002 Russell King
- *
- *  ARM architecture version 4 TLB handling functions.
- *  These assume a split I/D TLBs, and no write buffer.
- *
- * Processors: ARM720T
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/tlbflush.h>
-#include "proc-macros.S"
-
-	.align	5
-/*
- *	v4_flush_user_tlb_range(start, end, mm)
- *
- *	Invalidate a range of TLB entries in the specified user address space.
- *
- *	- start - range start address
- *	- end   - range end address
- *	- mm    - mm_struct describing address space
- */
-	.align	5
-ENTRY(v4_flush_user_tlb_range)
-	vma_vm_mm ip, r2
-	act_mm	r3				@ get current->active_mm
-	eors	r3, ip, r3				@ == mm ?
-	retne	lr				@ no, we dont do anything
-.v4_flush_kern_tlb_range:
-	bic	r0, r0, #0x0ff
-	bic	r0, r0, #0xf00
-1:	mcr	p15, 0, r0, c8, c7, 1		@ invalidate TLB entry
-	add	r0, r0, #PAGE_SZ
-	cmp	r0, r1
-	blo	1b
-	ret	lr
-
-/*
- *	v4_flush_kern_tlb_range(start, end)
- *
- *	Invalidate a range of TLB entries in the specified kernel
- *	address range.
- *
- *	- start - virtual address (may not be aligned)
- *	- end   - virtual address (may not be aligned)
- */
-.globl v4_flush_kern_tlb_range
-.equ v4_flush_kern_tlb_range, .v4_flush_kern_tlb_range
-
-	__INITDATA
-
-	/* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
-	define_tlb_functions v4, v4_tlb_flags
diff --git a/arch/arm/mm/tlb-v4wb.S b/arch/arm/mm/tlb-v4wb.S
deleted file mode 100644
index 9348bba7586a0e6e8e256aa9ae511a6446548c67..0000000000000000000000000000000000000000
--- a/arch/arm/mm/tlb-v4wb.S
+++ /dev/null
@@ -1,71 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/tlbv4wb.S
- *
- *  Copyright (C) 1997-2002 Russell King
- *
- *  ARM architecture version 4 TLB handling functions.
- *  These assume a split I/D TLBs w/o I TLB entry, with a write buffer.
- *
- *  Processors: SA110 SA1100 SA1110
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/tlbflush.h>
-#include "proc-macros.S"
-
-	.align	5
-/*
- *	v4wb_flush_user_tlb_range(start, end, mm)
- *
- *	Invalidate a range of TLB entries in the specified address space.
- *
- *	- start - range start address
- *	- end   - range end address
- *	- mm    - mm_struct describing address space
- */
-	.align	5
-ENTRY(v4wb_flush_user_tlb_range)
-	vma_vm_mm ip, r2
-	act_mm	r3				@ get current->active_mm
-	eors	r3, ip, r3				@ == mm ?
-	retne	lr				@ no, we dont do anything
-	vma_vm_flags r2, r2
-	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
-	tst	r2, #VM_EXEC
-	mcrne	p15, 0, r3, c8, c5, 0		@ invalidate I TLB
-	bic	r0, r0, #0x0ff
-	bic	r0, r0, #0xf00
-1:	mcr	p15, 0, r0, c8, c6, 1		@ invalidate D TLB entry
-	add	r0, r0, #PAGE_SZ
-	cmp	r0, r1
-	blo	1b
-	ret	lr
-
-/*
- *	v4_flush_kern_tlb_range(start, end)
- *
- *	Invalidate a range of TLB entries in the specified kernel
- *	address range.
- *
- *	- start - virtual address (may not be aligned)
- *	- end   - virtual address (may not be aligned)
- */
-ENTRY(v4wb_flush_kern_tlb_range)
-	mov	r3, #0
-	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
-	bic	r0, r0, #0x0ff
-	bic	r0, r0, #0xf00
-	mcr	p15, 0, r3, c8, c5, 0		@ invalidate I TLB
-1:	mcr	p15, 0, r0, c8, c6, 1		@ invalidate D TLB entry
-	add	r0, r0, #PAGE_SZ
-	cmp	r0, r1
-	blo	1b
-	ret	lr
-
-	__INITDATA
-
-	/* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
-	define_tlb_functions v4wb, v4wb_tlb_flags
diff --git a/arch/arm/mm/tlb-v4wbi.S b/arch/arm/mm/tlb-v4wbi.S
deleted file mode 100644
index d4f9040a4111c180861de6feb7db275ad8a0a417..0000000000000000000000000000000000000000
--- a/arch/arm/mm/tlb-v4wbi.S
+++ /dev/null
@@ -1,62 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/tlbv4wbi.S
- *
- *  Copyright (C) 1997-2002 Russell King
- *
- *  ARM architecture version 4 and version 5 TLB handling functions.
- *  These assume a split I/D TLBs, with a write buffer.
- *
- *  Processors: ARM920 ARM922 ARM925 ARM926 XScale
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/tlbflush.h>
-#include "proc-macros.S"
-
-/*
- *	v4wb_flush_user_tlb_range(start, end, mm)
- *
- *	Invalidate a range of TLB entries in the specified address space.
- *
- *	- start - range start address
- *	- end   - range end address
- *	- mm    - mm_struct describing address space
- */
-	.align	5
-ENTRY(v4wbi_flush_user_tlb_range)
-	vma_vm_mm ip, r2
-	act_mm	r3				@ get current->active_mm
-	eors	r3, ip, r3			@ == mm ?
-	retne	lr				@ no, we dont do anything
-	mov	r3, #0
-	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
-	vma_vm_flags r2, r2
-	bic	r0, r0, #0x0ff
-	bic	r0, r0, #0xf00
-1:	tst	r2, #VM_EXEC
-	mcrne	p15, 0, r0, c8, c5, 1		@ invalidate I TLB entry
-	mcr	p15, 0, r0, c8, c6, 1		@ invalidate D TLB entry
-	add	r0, r0, #PAGE_SZ
-	cmp	r0, r1
-	blo	1b
-	ret	lr
-
-ENTRY(v4wbi_flush_kern_tlb_range)
-	mov	r3, #0
-	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
-	bic	r0, r0, #0x0ff
-	bic	r0, r0, #0xf00
-1:	mcr	p15, 0, r0, c8, c5, 1		@ invalidate I TLB entry
-	mcr	p15, 0, r0, c8, c6, 1		@ invalidate D TLB entry
-	add	r0, r0, #PAGE_SZ
-	cmp	r0, r1
-	blo	1b
-	ret	lr
-
-	__INITDATA
-
-	/* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
-	define_tlb_functions v4wbi, v4wbi_tlb_flags
diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S
deleted file mode 100644
index 5335b9687297fa476f9b0d91fff1f8241d0e69af..0000000000000000000000000000000000000000
--- a/arch/arm/mm/tlb-v6.S
+++ /dev/null
@@ -1,90 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/tlb-v6.S
- *
- *  Copyright (C) 1997-2002 Russell King
- *
- *  ARM architecture version 6 TLB handling functions.
- *  These assume a split I/D TLB.
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/assembler.h>
-#include <asm/page.h>
-#include <asm/tlbflush.h>
-#include "proc-macros.S"
-
-#define HARVARD_TLB
-
-/*
- *	v6wbi_flush_user_tlb_range(start, end, vma)
- *
- *	Invalidate a range of TLB entries in the specified address space.
- *
- *	- start - start address (may not be aligned)
- *	- end   - end address (exclusive, may not be aligned)
- *	- vma   - vma_struct describing address range
- *
- *	It is assumed that:
- *	- the "Invalidate single entry" instruction will invalidate
- *	  both the I and the D TLBs on Harvard-style TLBs
- */
-ENTRY(v6wbi_flush_user_tlb_range)
-	vma_vm_mm r3, r2			@ get vma->vm_mm
-	mov	ip, #0
-	mmid	r3, r3				@ get vm_mm->context.id
-	mcr	p15, 0, ip, c7, c10, 4		@ drain write buffer
-	mov	r0, r0, lsr #PAGE_SHIFT		@ align address
-	mov	r1, r1, lsr #PAGE_SHIFT
-	asid	r3, r3				@ mask ASID
-	orr	r0, r3, r0, lsl #PAGE_SHIFT	@ Create initial MVA
-	mov	r1, r1, lsl #PAGE_SHIFT
-	vma_vm_flags r2, r2			@ get vma->vm_flags
-1:
-#ifdef HARVARD_TLB
-	mcr	p15, 0, r0, c8, c6, 1		@ TLB invalidate D MVA (was 1)
-	tst	r2, #VM_EXEC			@ Executable area ?
-	mcrne	p15, 0, r0, c8, c5, 1		@ TLB invalidate I MVA (was 1)
-#else
-	mcr	p15, 0, r0, c8, c7, 1		@ TLB invalidate MVA (was 1)
-#endif
-	add	r0, r0, #PAGE_SZ
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, ip, c7, c10, 4		@ data synchronization barrier
-	ret	lr
-
-/*
- *	v6wbi_flush_kern_tlb_range(start,end)
- *
- *	Invalidate a range of kernel TLB entries
- *
- *	- start - start address (may not be aligned)
- *	- end   - end address (exclusive, may not be aligned)
- */
-ENTRY(v6wbi_flush_kern_tlb_range)
-	mov	r2, #0
-	mcr	p15, 0, r2, c7, c10, 4		@ drain write buffer
-	mov	r0, r0, lsr #PAGE_SHIFT		@ align address
-	mov	r1, r1, lsr #PAGE_SHIFT
-	mov	r0, r0, lsl #PAGE_SHIFT
-	mov	r1, r1, lsl #PAGE_SHIFT
-1:
-#ifdef HARVARD_TLB
-	mcr	p15, 0, r0, c8, c6, 1		@ TLB invalidate D MVA
-	mcr	p15, 0, r0, c8, c5, 1		@ TLB invalidate I MVA
-#else
-	mcr	p15, 0, r0, c8, c7, 1		@ TLB invalidate MVA
-#endif
-	add	r0, r0, #PAGE_SZ
-	cmp	r0, r1
-	blo	1b
-	mcr	p15, 0, r2, c7, c10, 4		@ data synchronization barrier
-	mcr	p15, 0, r2, c7, c5, 4		@ prefetch flush (isb)
-	ret	lr
-
-	__INIT
-
-	/* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
-	define_tlb_functions v6wbi, v6wbi_tlb_flags
diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
deleted file mode 100644
index 1bb28d7db5670551d22035c785544066fbeb1e2c..0000000000000000000000000000000000000000
--- a/arch/arm/mm/tlb-v7.S
+++ /dev/null
@@ -1,92 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/mm/tlb-v7.S
- *
- *  Copyright (C) 1997-2002 Russell King
- *  Modified for ARMv7 by Catalin Marinas
- *
- *  ARM architecture version 6 TLB handling functions.
- *  These assume a split I/D TLB.
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/page.h>
-#include <asm/tlbflush.h>
-#include "proc-macros.S"
-
-/*
- *	v7wbi_flush_user_tlb_range(start, end, vma)
- *
- *	Invalidate a range of TLB entries in the specified address space.
- *
- *	- start - start address (may not be aligned)
- *	- end   - end address (exclusive, may not be aligned)
- *	- vma   - vma_struct describing address range
- *
- *	It is assumed that:
- *	- the "Invalidate single entry" instruction will invalidate
- *	  both the I and the D TLBs on Harvard-style TLBs
- */
-ENTRY(v7wbi_flush_user_tlb_range)
-	vma_vm_mm r3, r2			@ get vma->vm_mm
-	mmid	r3, r3				@ get vm_mm->context.id
-	dsb	ish
-	mov	r0, r0, lsr #PAGE_SHIFT		@ align address
-	mov	r1, r1, lsr #PAGE_SHIFT
-	asid	r3, r3				@ mask ASID
-#ifdef CONFIG_ARM_ERRATA_720789
-	ALT_SMP(W(mov)	r3, #0	)
-	ALT_UP(W(nop)		)
-#endif
-	orr	r0, r3, r0, lsl #PAGE_SHIFT	@ Create initial MVA
-	mov	r1, r1, lsl #PAGE_SHIFT
-1:
-#ifdef CONFIG_ARM_ERRATA_720789
-	ALT_SMP(mcr	p15, 0, r0, c8, c3, 3)	@ TLB invalidate U MVA all ASID (shareable)
-#else
-	ALT_SMP(mcr	p15, 0, r0, c8, c3, 1)	@ TLB invalidate U MVA (shareable)
-#endif
-	ALT_UP(mcr	p15, 0, r0, c8, c7, 1)	@ TLB invalidate U MVA
-
-	add	r0, r0, #PAGE_SZ
-	cmp	r0, r1
-	blo	1b
-	dsb	ish
-	ret	lr
-ENDPROC(v7wbi_flush_user_tlb_range)
-
-/*
- *	v7wbi_flush_kern_tlb_range(start,end)
- *
- *	Invalidate a range of kernel TLB entries
- *
- *	- start - start address (may not be aligned)
- *	- end   - end address (exclusive, may not be aligned)
- */
-ENTRY(v7wbi_flush_kern_tlb_range)
-	dsb	ish
-	mov	r0, r0, lsr #PAGE_SHIFT		@ align address
-	mov	r1, r1, lsr #PAGE_SHIFT
-	mov	r0, r0, lsl #PAGE_SHIFT
-	mov	r1, r1, lsl #PAGE_SHIFT
-1:
-#ifdef CONFIG_ARM_ERRATA_720789
-	ALT_SMP(mcr	p15, 0, r0, c8, c3, 3)	@ TLB invalidate U MVA all ASID (shareable)
-#else
-	ALT_SMP(mcr	p15, 0, r0, c8, c3, 1)	@ TLB invalidate U MVA (shareable)
-#endif
-	ALT_UP(mcr	p15, 0, r0, c8, c7, 1)	@ TLB invalidate U MVA
-	add	r0, r0, #PAGE_SZ
-	cmp	r0, r1
-	blo	1b
-	dsb	ish
-	isb
-	ret	lr
-ENDPROC(v7wbi_flush_kern_tlb_range)
-
-	__INIT
-
-	/* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
-	define_tlb_functions v7wbi, v7wbi_tlb_flags_up, flags_smp=v7wbi_tlb_flags_smp
diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S
deleted file mode 100644
index d8f9915566e1577334c334fd4a348d3dcce9df71..0000000000000000000000000000000000000000
--- a/arch/arm/nwfpe/entry.S
+++ /dev/null
@@ -1,113 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
-    NetWinder Floating Point Emulator
-    (c) Rebel.COM, 1998
-    (c) 1998, 1999 Philip Blundell
-
-    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
-
-*/
-#include <asm/assembler.h>
-#include <asm/opcodes.h>
-
-/* This is the kernel's entry point into the floating point emulator.
-It is called from the kernel with code similar to this:
-
-	sub	r4, r5, #4
-	ldrt	r0, [r4]			@ r0  = instruction
-	adrsvc	al, r9, ret_from_exception	@ r9  = normal FP return
-	adrsvc	al, lr, fpundefinstr		@ lr  = undefined instr return
-
-	get_current_task r10
-	mov	r8, #1
-	strb	r8, [r10, #TSK_USED_MATH]	@ set current->used_math
-	add	r10, r10, #TSS_FPESAVE		@ r10 = workspace
-	ldr	r4, .LC2
-	ldr	pc, [r4]			@ Call FP emulator entry point
-
-The kernel expects the emulator to return via one of two possible
-points of return it passes to the emulator.  The emulator, if
-successful in its emulation, jumps to ret_from_exception (passed in
-r9) and the kernel takes care of returning control from the trap to
-the user code.  If the emulator is unable to emulate the instruction,
-it returns via _fpundefinstr (passed via lr) and the kernel halts the
-user program with a core dump.
-
-On entry to the emulator r10 points to an area of private FP workspace
-reserved in the thread structure for this process.  This is where the
-emulator saves its registers across calls.  The first word of this area
-is used as a flag to detect the first time a process uses floating point,
-so that the emulator startup cost can be avoided for tasks that don't
-want it.
-
-This routine does three things:
-
-1) The kernel has created a struct pt_regs on the stack and saved the
-user registers into it.  See /usr/include/asm/proc/ptrace.h for details.
-
-2) It calls EmulateAll to emulate a floating point instruction.
-EmulateAll returns 1 if the emulation was successful, or 0 if not.
-
-3) If an instruction has been emulated successfully, it looks ahead at
-the next instruction.  If it is a floating point instruction, it
-executes the instruction, without returning to user space.  In this
-way it repeatedly looks ahead and executes floating point instructions
-until it encounters a non floating point instruction, at which time it
-returns via _fpreturn.
-
-This is done to reduce the effect of the trap overhead on each
-floating point instructions.  GCC attempts to group floating point
-instructions to allow the emulator to spread the cost of the trap over
-several floating point instructions.  */
-
-#include <asm/asm-offsets.h>
-
-	.globl	nwfpe_enter
-nwfpe_enter:
-	mov	r4, lr			@ save the failure-return addresses
-	mov	sl, sp			@ we access the registers via 'sl'
-
-	ldr	r5, [sp, #S_PC]		@ get contents of PC;
-	mov	r6, r0			@ save the opcode
-emulate:
-	ldr	r1, [sp, #S_PSR]	@ fetch the PSR
-	bl	arm_check_condition	@ check the condition
-	cmp	r0, #ARM_OPCODE_CONDTEST_PASS	@ condition passed?
-
-	@ if condition code failed to match, next insn
-	bne	next			@ get the next instruction;
-
-	mov	r0, r6			@ prepare for EmulateAll()
-	bl	EmulateAll		@ emulate the instruction
-	cmp	r0, #0			@ was emulation successful
-	reteq	r4			@ no, return failure
-
-next:
-	uaccess_enable r3
-.Lx1:	ldrt	r6, [r5], #4		@ get the next instruction and
-					@ increment PC
-	uaccess_disable r3
-	and	r2, r6, #0x0F000000	@ test for FP insns
-	teq	r2, #0x0C000000
-	teqne	r2, #0x0D000000
-	teqne	r2, #0x0E000000
-	retne	r9			@ return ok if not a fp insn
-
-	str	r5, [sp, #S_PC]		@ update PC copy in regs
-
-	mov	r0, r6			@ save a copy
-	b	emulate			@ check condition and emulate
-
-	@ We need to be prepared for the instructions at .Lx1 and .Lx2 
-	@ to fault.  Emit the appropriate exception gunk to fix things up.
-	@ ??? For some reason, faults can happen at .Lx2 even with a
-	@ plain LDR instruction.  Weird, but it seems harmless.
-	.pushsection .text.fixup,"ax"
-	.align	2
-.Lfix:	ret	r9			@ let the user eat segfaults
-	.popsection
-
-	.pushsection __ex_table,"a"
-	.align	3
-	.long	.Lx1, .Lfix
-	.popsection
diff --git a/arch/arm/plat-versatile/headsmp.S b/arch/arm/plat-versatile/headsmp.S
deleted file mode 100644
index 09d9fc30c8cabce736a89fe026539454b971a551..0000000000000000000000000000000000000000
--- a/arch/arm/plat-versatile/headsmp.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/plat-versatile/headsmp.S
- *
- *  Copyright (c) 2003 ARM Limited
- *  All Rights Reserved
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-
-/*
- * Realview/Versatile Express specific entry point for secondary CPUs.
- * This provides a "holding pen" into which all secondary cores are held
- * until we're ready for them to initialise.
- */
-ENTRY(versatile_secondary_startup)
- ARM_BE8(setend	be)
-	mrc	p15, 0, r0, c0, c0, 5
-	bic	r0, #0xff000000
-	adr	r4, 1f
-	ldmia	r4, {r5, r6}
-	sub	r4, r4, r5
-	add	r6, r6, r4
-pen:	ldr	r7, [r6]
-	cmp	r7, r0
-	bne	pen
-
-	/*
-	 * we've been released from the holding pen: secondary_stack
-	 * should now contain the SVC stack for this core
-	 */
-	b	secondary_startup
-
-	.align
-1:	.long	.
-	.long	versatile_cpu_release
-ENDPROC(versatile_secondary_startup)
diff --git a/arch/arm/vdso/datapage.S b/arch/arm/vdso/datapage.S
deleted file mode 100644
index 9cd73b725d9fb89c2b9bb1d1178cce705baff38b..0000000000000000000000000000000000000000
--- a/arch/arm/vdso/datapage.S
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-
-	.align 2
-.L_vdso_data_ptr:
-	.long	_start - . - VDSO_DATA_SIZE
-
-ENTRY(__get_datapage)
-	.fnstart
-	adr	r0, .L_vdso_data_ptr
-	ldr	r1, [r0]
-	add	r0, r0, r1
-	bx	lr
-	.fnend
-ENDPROC(__get_datapage)
diff --git a/arch/arm/vdso/vdso.S b/arch/arm/vdso/vdso.S
deleted file mode 100644
index 65f2e6f863baf1d2b6f992a9d8474290acb0d1ac..0000000000000000000000000000000000000000
--- a/arch/arm/vdso/vdso.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Adapted from arm64 version.
- *
- * Copyright (C) 2012 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <linux/const.h>
-#include <asm/page.h>
-
-	.globl vdso_start, vdso_end
-	.section .data..ro_after_init
-	.balign PAGE_SIZE
-vdso_start:
-	.incbin "arch/arm/vdso/vdso.so"
-	.balign PAGE_SIZE
-vdso_end:
-
-	.previous
diff --git a/arch/arm/vdso/vdso.lds.S b/arch/arm/vdso/vdso.lds.S
deleted file mode 100644
index 73cf205b003ea94acf3ca1b4bddd35945b70db7e..0000000000000000000000000000000000000000
--- a/arch/arm/vdso/vdso.lds.S
+++ /dev/null
@@ -1,76 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Adapted from arm64 version.
- *
- * GNU linker script for the VDSO library.
- *
- * Copyright (C) 2012 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- * Heavily based on the vDSO linker scripts for other archs.
- */
-
-#include <linux/const.h>
-#include <asm/page.h>
-#include <asm/vdso.h>
-
-OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm")
-OUTPUT_ARCH(arm)
-
-SECTIONS
-{
-	PROVIDE(_start = .);
-
-	. = SIZEOF_HEADERS;
-
-	.hash		: { *(.hash) }			:text
-	.gnu.hash	: { *(.gnu.hash) }
-	.dynsym		: { *(.dynsym) }
-	.dynstr		: { *(.dynstr) }
-	.gnu.version	: { *(.gnu.version) }
-	.gnu.version_d	: { *(.gnu.version_d) }
-	.gnu.version_r	: { *(.gnu.version_r) }
-
-	.note		: { *(.note.*) }		:text	:note
-
-
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-
-	.dynamic	: { *(.dynamic) }		:text	:dynamic
-
-	.rodata		: { *(.rodata*) }		:text
-
-	.text		: { *(.text*) }			:text	=0xe7f001f2
-
-	.got		: { *(.got) }
-	.rel.plt	: { *(.rel.plt) }
-
-	/DISCARD/	: {
-		*(.note.GNU-stack)
-		*(.data .data.* .gnu.linkonce.d.* .sdata*)
-		*(.bss .sbss .dynbss .dynsbss)
-	}
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
-	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
-	note		PT_NOTE		FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
-}
-
-VERSION
-{
-	LINUX_2.6 {
-	global:
-		__vdso_clock_gettime;
-		__vdso_gettimeofday;
-	local: *;
-	};
-}
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
deleted file mode 100644
index 27b0a1f27fbdf392e882d049045fd8102fe31b6e..0000000000000000000000000000000000000000
--- a/arch/arm/vfp/entry.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/vfp/entry.S
- *
- *  Copyright (C) 2004 ARM Limited.
- *  Written by Deep Blue Solutions Limited.
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <asm/vfpmacros.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-
-@ VFP entry point.
-@
-@  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
-@  r2  = PC value to resume execution after successful emulation
-@  r9  = normal "successful" return address
-@  r10 = this threads thread_info structure
-@  lr  = unrecognised instruction return address
-@  IRQs enabled.
-@
-ENTRY(do_vfp)
-	inc_preempt_count r10, r4
- 	ldr	r4, .LCvfp
-	ldr	r11, [r10, #TI_CPU]	@ CPU number
-	add	r10, r10, #TI_VFPSTATE	@ r10 = workspace
-	ldr	pc, [r4]		@ call VFP entry point
-ENDPROC(do_vfp)
-
-ENTRY(vfp_null_entry)
-	dec_preempt_count_ti r10, r4
-	ret	lr
-ENDPROC(vfp_null_entry)
-
-	.align	2
-.LCvfp:
-	.word	vfp_vector
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
deleted file mode 100644
index b530db8f2c6c8bea795902c13e72aa0e05a43b31..0000000000000000000000000000000000000000
--- a/arch/arm/vfp/vfphw.S
+++ /dev/null
@@ -1,315 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/arm/vfp/vfphw.S
- *
- *  Copyright (C) 2004 ARM Limited.
- *  Written by Deep Blue Solutions Limited.
- *
- * This code is called from the kernel's undefined instruction trap.
- * r9 holds the return address for successful handling.
- * lr holds the return address for unrecognised instructions.
- * r10 points at the start of the private FP workspace in the thread structure
- * sp points to a struct pt_regs (as defined in include/asm/proc/ptrace.h)
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <asm/vfpmacros.h>
-#include <linux/kern_levels.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-
-	.macro	DBGSTR, str
-#ifdef DEBUG
-	stmfd	sp!, {r0-r3, ip, lr}
-	ldr	r0, =1f
-	bl	printk
-	ldmfd	sp!, {r0-r3, ip, lr}
-
-	.pushsection .rodata, "a"
-1:	.ascii	KERN_DEBUG "VFP: \str\n"
-	.byte	0
-	.previous
-#endif
-	.endm
-
-	.macro  DBGSTR1, str, arg
-#ifdef DEBUG
-	stmfd	sp!, {r0-r3, ip, lr}
-	mov	r1, \arg
-	ldr	r0, =1f
-	bl	printk
-	ldmfd	sp!, {r0-r3, ip, lr}
-
-	.pushsection .rodata, "a"
-1:	.ascii	KERN_DEBUG "VFP: \str\n"
-	.byte	0
-	.previous
-#endif
-	.endm
-
-	.macro  DBGSTR3, str, arg1, arg2, arg3
-#ifdef DEBUG
-	stmfd	sp!, {r0-r3, ip, lr}
-	mov	r3, \arg3
-	mov	r2, \arg2
-	mov	r1, \arg1
-	ldr	r0, =1f
-	bl	printk
-	ldmfd	sp!, {r0-r3, ip, lr}
-
-	.pushsection .rodata, "a"
-1:	.ascii	KERN_DEBUG "VFP: \str\n"
-	.byte	0
-	.previous
-#endif
-	.endm
-
-
-@ VFP hardware support entry point.
-@
-@  r0  = instruction opcode (32-bit ARM or two 16-bit Thumb)
-@  r2  = PC value to resume execution after successful emulation
-@  r9  = normal "successful" return address
-@  r10 = vfp_state union
-@  r11 = CPU number
-@  lr  = unrecognised instruction return address
-@  IRQs enabled.
-ENTRY(vfp_support_entry)
-	DBGSTR3	"instr %08x pc %08x state %p", r0, r2, r10
-
-	VFPFMRX	r1, FPEXC		@ Is the VFP enabled?
-	DBGSTR1	"fpexc %08x", r1
-	tst	r1, #FPEXC_EN
-	bne	look_for_VFP_exceptions	@ VFP is already enabled
-
-	DBGSTR1 "enable %x", r10
-	ldr	r3, vfp_current_hw_state_address
-	orr	r1, r1, #FPEXC_EN	@ user FPEXC has the enable bit set
-	ldr	r4, [r3, r11, lsl #2]	@ vfp_current_hw_state pointer
-	bic	r5, r1, #FPEXC_EX	@ make sure exceptions are disabled
-	cmp	r4, r10			@ this thread owns the hw context?
-#ifndef CONFIG_SMP
-	@ For UP, checking that this thread owns the hw context is
-	@ sufficient to determine that the hardware state is valid.
-	beq	vfp_hw_state_valid
-
-	@ On UP, we lazily save the VFP context.  As a different
-	@ thread wants ownership of the VFP hardware, save the old
-	@ state if there was a previous (valid) owner.
-
-	VFPFMXR	FPEXC, r5		@ enable VFP, disable any pending
-					@ exceptions, so we can get at the
-					@ rest of it
-
-	DBGSTR1	"save old state %p", r4
-	cmp	r4, #0			@ if the vfp_current_hw_state is NULL
-	beq	vfp_reload_hw		@ then the hw state needs reloading
-	VFPFSTMIA r4, r5		@ save the working registers
-	VFPFMRX	r5, FPSCR		@ current status
-#ifndef CONFIG_CPU_FEROCEON
-	tst	r1, #FPEXC_EX		@ is there additional state to save?
-	beq	1f
-	VFPFMRX	r6, FPINST		@ FPINST (only if FPEXC.EX is set)
-	tst	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
-	beq	1f
-	VFPFMRX	r8, FPINST2		@ FPINST2 if needed (and present)
-1:
-#endif
-	stmia	r4, {r1, r5, r6, r8}	@ save FPEXC, FPSCR, FPINST, FPINST2
-vfp_reload_hw:
-
-#else
-	@ For SMP, if this thread does not own the hw context, then we
-	@ need to reload it.  No need to save the old state as on SMP,
-	@ we always save the state when we switch away from a thread.
-	bne	vfp_reload_hw
-
-	@ This thread has ownership of the current hardware context.
-	@ However, it may have been migrated to another CPU, in which
-	@ case the saved state is newer than the hardware context.
-	@ Check this by looking at the CPU number which the state was
-	@ last loaded onto.
-	ldr	ip, [r10, #VFP_CPU]
-	teq	ip, r11
-	beq	vfp_hw_state_valid
-
-vfp_reload_hw:
-	@ We're loading this threads state into the VFP hardware. Update
-	@ the CPU number which contains the most up to date VFP context.
-	str	r11, [r10, #VFP_CPU]
-
-	VFPFMXR	FPEXC, r5		@ enable VFP, disable any pending
-					@ exceptions, so we can get at the
-					@ rest of it
-#endif
-
-	DBGSTR1	"load state %p", r10
-	str	r10, [r3, r11, lsl #2]	@ update the vfp_current_hw_state pointer
-					@ Load the saved state back into the VFP
-	VFPFLDMIA r10, r5		@ reload the working registers while
-					@ FPEXC is in a safe state
-	ldmia	r10, {r1, r5, r6, r8}	@ load FPEXC, FPSCR, FPINST, FPINST2
-#ifndef CONFIG_CPU_FEROCEON
-	tst	r1, #FPEXC_EX		@ is there additional state to restore?
-	beq	1f
-	VFPFMXR	FPINST, r6		@ restore FPINST (only if FPEXC.EX is set)
-	tst	r1, #FPEXC_FP2V		@ is there an FPINST2 to write?
-	beq	1f
-	VFPFMXR	FPINST2, r8		@ FPINST2 if needed (and present)
-1:
-#endif
-	VFPFMXR	FPSCR, r5		@ restore status
-
-@ The context stored in the VFP hardware is up to date with this thread
-vfp_hw_state_valid:
-	tst	r1, #FPEXC_EX
-	bne	process_exception	@ might as well handle the pending
-					@ exception before retrying branch
-					@ out before setting an FPEXC that
-					@ stops us reading stuff
-	VFPFMXR	FPEXC, r1		@ Restore FPEXC last
-	sub	r2, r2, #4		@ Retry current instruction - if Thumb
-	str	r2, [sp, #S_PC]		@ mode it's two 16-bit instructions,
-					@ else it's one 32-bit instruction, so
-					@ always subtract 4 from the following
-					@ instruction address.
-	dec_preempt_count_ti r10, r4
-	ret	r9			@ we think we have handled things
-
-
-look_for_VFP_exceptions:
-	@ Check for synchronous or asynchronous exception
-	tst	r1, #FPEXC_EX | FPEXC_DEX
-	bne	process_exception
-	@ On some implementations of the VFP subarch 1, setting FPSCR.IXE
-	@ causes all the CDP instructions to be bounced synchronously without
-	@ setting the FPEXC.EX bit
-	VFPFMRX	r5, FPSCR
-	tst	r5, #FPSCR_IXE
-	bne	process_exception
-
-	tst	r5, #FPSCR_LENGTH_MASK
-	beq	skip
-	orr	r1, r1, #FPEXC_DEX
-	b	process_exception
-skip:
-
-	@ Fall into hand on to next handler - appropriate coproc instr
-	@ not recognised by VFP
-
-	DBGSTR	"not VFP"
-	dec_preempt_count_ti r10, r4
-	ret	lr
-
-process_exception:
-	DBGSTR	"bounce"
-	mov	r2, sp			@ nothing stacked - regdump is at TOS
-	mov	lr, r9			@ setup for a return to the user code.
-
-	@ Now call the C code to package up the bounce to the support code
-	@   r0 holds the trigger instruction
-	@   r1 holds the FPEXC value
-	@   r2 pointer to register dump
-	b	VFP_bounce		@ we have handled this - the support
-					@ code will raise an exception if
-					@ required. If not, the user code will
-					@ retry the faulted instruction
-ENDPROC(vfp_support_entry)
-
-ENTRY(vfp_save_state)
-	@ Save the current VFP state
-	@ r0 - save location
-	@ r1 - FPEXC
-	DBGSTR1	"save VFP state %p", r0
-	VFPFSTMIA r0, r2		@ save the working registers
-	VFPFMRX	r2, FPSCR		@ current status
-	tst	r1, #FPEXC_EX		@ is there additional state to save?
-	beq	1f
-	VFPFMRX	r3, FPINST		@ FPINST (only if FPEXC.EX is set)
-	tst	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
-	beq	1f
-	VFPFMRX	r12, FPINST2		@ FPINST2 if needed (and present)
-1:
-	stmia	r0, {r1, r2, r3, r12}	@ save FPEXC, FPSCR, FPINST, FPINST2
-	ret	lr
-ENDPROC(vfp_save_state)
-
-	.align
-vfp_current_hw_state_address:
-	.word	vfp_current_hw_state
-
-	.macro	tbl_branch, base, tmp, shift
-#ifdef CONFIG_THUMB2_KERNEL
-	adr	\tmp, 1f
-	add	\tmp, \tmp, \base, lsl \shift
-	ret	\tmp
-#else
-	add	pc, pc, \base, lsl \shift
-	mov	r0, r0
-#endif
-1:
-	.endm
-
-ENTRY(vfp_get_float)
-	tbl_branch r0, r3, #3
-	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
-1:	mrc	p10, 0, r0, c\dr, c0, 0	@ fmrs	r0, s0
-	ret	lr
-	.org	1b + 8
-1:	mrc	p10, 0, r0, c\dr, c0, 4	@ fmrs	r0, s1
-	ret	lr
-	.org	1b + 8
-	.endr
-ENDPROC(vfp_get_float)
-
-ENTRY(vfp_put_float)
-	tbl_branch r1, r3, #3
-	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
-1:	mcr	p10, 0, r0, c\dr, c0, 0	@ fmsr	r0, s0
-	ret	lr
-	.org	1b + 8
-1:	mcr	p10, 0, r0, c\dr, c0, 4	@ fmsr	r0, s1
-	ret	lr
-	.org	1b + 8
-	.endr
-ENDPROC(vfp_put_float)
-
-ENTRY(vfp_get_double)
-	tbl_branch r0, r3, #3
-	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
-1:	fmrrd	r0, r1, d\dr
-	ret	lr
-	.org	1b + 8
-	.endr
-#ifdef CONFIG_VFPv3
-	@ d16 - d31 registers
-	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
-1:	mrrc	p11, 3, r0, r1, c\dr	@ fmrrd	r0, r1, d\dr
-	ret	lr
-	.org	1b + 8
-	.endr
-#endif
-
-	@ virtual register 16 (or 32 if VFPv3) for compare with zero
-	mov	r0, #0
-	mov	r1, #0
-	ret	lr
-ENDPROC(vfp_get_double)
-
-ENTRY(vfp_put_double)
-	tbl_branch r2, r3, #3
-	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
-1:	fmdrr	d\dr, r0, r1
-	ret	lr
-	.org	1b + 8
-	.endr
-#ifdef CONFIG_VFPv3
-	@ d16 - d31 registers
-	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
-1:	mcrr	p11, 3, r0, r1, c\dr	@ fmdrr	r0, r1, d\dr
-	ret	lr
-	.org	1b + 8
-	.endr
-#endif
-ENDPROC(vfp_put_double)
diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S
deleted file mode 100644
index b11bba542faccc4889abe8b5571b583e83ee0479..0000000000000000000000000000000000000000
--- a/arch/arm/xen/hypercall.S
+++ /dev/null
@@ -1,122 +0,0 @@
-/******************************************************************************
- * hypercall.S
- *
- * Xen hypercall wrappers
- *
- * Stefano Stabellini <stefano.stabellini@eu.citrix.com>, Citrix, 2012
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/*
- * The Xen hypercall calling convention is very similar to the ARM
- * procedure calling convention: the first paramter is passed in r0, the
- * second in r1, the third in r2 and the fourth in r3. Considering that
- * Xen hypercalls have 5 arguments at most, the fifth paramter is passed
- * in r4, differently from the procedure calling convention of using the
- * stack for that case.
- *
- * The hypercall number is passed in r12.
- *
- * The return value is in r0.
- *
- * The hvc ISS is required to be 0xEA1, that is the Xen specific ARM
- * hypercall tag.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/opcodes-virt.h>
-#include <xen/interface/xen.h>
-
-
-#define XEN_IMM 0xEA1
-
-#define HYPERCALL_SIMPLE(hypercall)		\
-ENTRY(HYPERVISOR_##hypercall)			\
-	mov r12, #__HYPERVISOR_##hypercall;	\
-	__HVC(XEN_IMM);						\
-	ret lr;					\
-ENDPROC(HYPERVISOR_##hypercall)
-
-#define HYPERCALL0 HYPERCALL_SIMPLE
-#define HYPERCALL1 HYPERCALL_SIMPLE
-#define HYPERCALL2 HYPERCALL_SIMPLE
-#define HYPERCALL3 HYPERCALL_SIMPLE
-#define HYPERCALL4 HYPERCALL_SIMPLE
-
-#define HYPERCALL5(hypercall)			\
-ENTRY(HYPERVISOR_##hypercall)			\
-	stmdb sp!, {r4}						\
-	ldr r4, [sp, #4]					\
-	mov r12, #__HYPERVISOR_##hypercall;	\
-	__HVC(XEN_IMM);						\
-	ldm sp!, {r4}						\
-	ret lr					\
-ENDPROC(HYPERVISOR_##hypercall)
-
-                .text
-
-HYPERCALL2(xen_version);
-HYPERCALL3(console_io);
-HYPERCALL3(grant_table_op);
-HYPERCALL2(sched_op);
-HYPERCALL2(event_channel_op);
-HYPERCALL2(hvm_op);
-HYPERCALL2(memory_op);
-HYPERCALL2(physdev_op);
-HYPERCALL3(vcpu_op);
-HYPERCALL1(tmem_op);
-HYPERCALL1(platform_op_raw);
-HYPERCALL2(multicall);
-HYPERCALL2(vm_assist);
-HYPERCALL3(dm_op);
-
-ENTRY(privcmd_call)
-	stmdb sp!, {r4}
-	mov r12, r0
-	mov r0, r1
-	mov r1, r2
-	mov r2, r3
-	ldr r3, [sp, #8]
-	/*
-	 * Privcmd calls are issued by the userspace. We need to allow the
-	 * kernel to access the userspace memory before issuing the hypercall.
-	 */
-	uaccess_enable r4
-
-	/* r4 is loaded now as we use it as scratch register before */
-	ldr r4, [sp, #4]
-	__HVC(XEN_IMM)
-
-	/*
-	 * Disable userspace access from kernel. This is fine to do it
-	 * unconditionally as no set_fs(KERNEL_DS) is called before.
-	 */
-	uaccess_disable r4
-
-	ldm sp!, {r4}
-	ret lr
-ENDPROC(privcmd_call);
diff --git a/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi b/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi
deleted file mode 120000
index 68fd0f8f1dee8e9b7ff0c0229c888d1ec1d03b05..0000000000000000000000000000000000000000
--- a/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi
+++ /dev/null
@@ -1 +0,0 @@
-../../../../arm/boot/dts/vexpress-v2m-rs1.dtsi
\ No newline at end of file
diff --git a/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi b/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi
new file mode 100644
index 0000000000000000000000000000000000000000..68fd0f8f1dee8e9b7ff0c0229c888d1ec1d03b05
--- /dev/null
+++ b/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi
@@ -0,0 +1 @@
+../../../../arm/boot/dts/vexpress-v2m-rs1.dtsi
\ No newline at end of file
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
deleted file mode 100644
index 9add9bbc48d8b84a9454f6b29db41d403a47125f..0000000000000000000000000000000000000000
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ /dev/null
@@ -1,221 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
- *
- * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.text
-	.arch	armv8-a+crypto
-
-	/*
-	 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
-	 *			     u32 *macp, u8 const rk[], u32 rounds);
-	 */
-ENTRY(ce_aes_ccm_auth_data)
-	ldr	w8, [x3]			/* leftover from prev round? */
-	ld1	{v0.16b}, [x0]			/* load mac */
-	cbz	w8, 1f
-	sub	w8, w8, #16
-	eor	v1.16b, v1.16b, v1.16b
-0:	ldrb	w7, [x1], #1			/* get 1 byte of input */
-	subs	w2, w2, #1
-	add	w8, w8, #1
-	ins	v1.b[0], w7
-	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
-	beq	8f				/* out of input? */
-	cbnz	w8, 0b
-	eor	v0.16b, v0.16b, v1.16b
-1:	ld1	{v3.4s}, [x4]			/* load first round key */
-	prfm	pldl1strm, [x1]
-	cmp	w5, #12				/* which key size? */
-	add	x6, x4, #16
-	sub	w7, w5, #2			/* modified # of rounds */
-	bmi	2f
-	bne	5f
-	mov	v5.16b, v3.16b
-	b	4f
-2:	mov	v4.16b, v3.16b
-	ld1	{v5.4s}, [x6], #16		/* load 2nd round key */
-3:	aese	v0.16b, v4.16b
-	aesmc	v0.16b, v0.16b
-4:	ld1	{v3.4s}, [x6], #16		/* load next round key */
-	aese	v0.16b, v5.16b
-	aesmc	v0.16b, v0.16b
-5:	ld1	{v4.4s}, [x6], #16		/* load next round key */
-	subs	w7, w7, #3
-	aese	v0.16b, v3.16b
-	aesmc	v0.16b, v0.16b
-	ld1	{v5.4s}, [x6], #16		/* load next round key */
-	bpl	3b
-	aese	v0.16b, v4.16b
-	subs	w2, w2, #16			/* last data? */
-	eor	v0.16b, v0.16b, v5.16b		/* final round */
-	bmi	6f
-	ld1	{v1.16b}, [x1], #16		/* load next input block */
-	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
-	bne	1b
-6:	st1	{v0.16b}, [x0]			/* store mac */
-	beq	10f
-	adds	w2, w2, #16
-	beq	10f
-	mov	w8, w2
-7:	ldrb	w7, [x1], #1
-	umov	w6, v0.b[0]
-	eor	w6, w6, w7
-	strb	w6, [x0], #1
-	subs	w2, w2, #1
-	beq	10f
-	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
-	b	7b
-8:	cbz	w8, 91f
-	mov	w7, w8
-	add	w8, w8, #16
-9:	ext	v1.16b, v1.16b, v1.16b, #1
-	adds	w7, w7, #1
-	bne	9b
-91:	eor	v0.16b, v0.16b, v1.16b
-	st1	{v0.16b}, [x0]
-10:	str	w8, [x3]
-	ret
-ENDPROC(ce_aes_ccm_auth_data)
-
-	/*
-	 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
-	 * 			 u32 rounds);
-	 */
-ENTRY(ce_aes_ccm_final)
-	ld1	{v3.4s}, [x2], #16		/* load first round key */
-	ld1	{v0.16b}, [x0]			/* load mac */
-	cmp	w3, #12				/* which key size? */
-	sub	w3, w3, #2			/* modified # of rounds */
-	ld1	{v1.16b}, [x1]			/* load 1st ctriv */
-	bmi	0f
-	bne	3f
-	mov	v5.16b, v3.16b
-	b	2f
-0:	mov	v4.16b, v3.16b
-1:	ld1	{v5.4s}, [x2], #16		/* load next round key */
-	aese	v0.16b, v4.16b
-	aesmc	v0.16b, v0.16b
-	aese	v1.16b, v4.16b
-	aesmc	v1.16b, v1.16b
-2:	ld1	{v3.4s}, [x2], #16		/* load next round key */
-	aese	v0.16b, v5.16b
-	aesmc	v0.16b, v0.16b
-	aese	v1.16b, v5.16b
-	aesmc	v1.16b, v1.16b
-3:	ld1	{v4.4s}, [x2], #16		/* load next round key */
-	subs	w3, w3, #3
-	aese	v0.16b, v3.16b
-	aesmc	v0.16b, v0.16b
-	aese	v1.16b, v3.16b
-	aesmc	v1.16b, v1.16b
-	bpl	1b
-	aese	v0.16b, v4.16b
-	aese	v1.16b, v4.16b
-	/* final round key cancels out */
-	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
-	st1	{v0.16b}, [x0]			/* store result */
-	ret
-ENDPROC(ce_aes_ccm_final)
-
-	.macro	aes_ccm_do_crypt,enc
-	ldr	x8, [x6, #8]			/* load lower ctr */
-	ld1	{v0.16b}, [x5]			/* load mac */
-CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
-0:	/* outer loop */
-	ld1	{v1.8b}, [x6]			/* load upper ctr */
-	prfm	pldl1strm, [x1]
-	add	x8, x8, #1
-	rev	x9, x8
-	cmp	w4, #12				/* which key size? */
-	sub	w7, w4, #2			/* get modified # of rounds */
-	ins	v1.d[1], x9			/* no carry in lower ctr */
-	ld1	{v3.4s}, [x3]			/* load first round key */
-	add	x10, x3, #16
-	bmi	1f
-	bne	4f
-	mov	v5.16b, v3.16b
-	b	3f
-1:	mov	v4.16b, v3.16b
-	ld1	{v5.4s}, [x10], #16		/* load 2nd round key */
-2:	/* inner loop: 3 rounds, 2x interleaved */
-	aese	v0.16b, v4.16b
-	aesmc	v0.16b, v0.16b
-	aese	v1.16b, v4.16b
-	aesmc	v1.16b, v1.16b
-3:	ld1	{v3.4s}, [x10], #16		/* load next round key */
-	aese	v0.16b, v5.16b
-	aesmc	v0.16b, v0.16b
-	aese	v1.16b, v5.16b
-	aesmc	v1.16b, v1.16b
-4:	ld1	{v4.4s}, [x10], #16		/* load next round key */
-	subs	w7, w7, #3
-	aese	v0.16b, v3.16b
-	aesmc	v0.16b, v0.16b
-	aese	v1.16b, v3.16b
-	aesmc	v1.16b, v1.16b
-	ld1	{v5.4s}, [x10], #16		/* load next round key */
-	bpl	2b
-	aese	v0.16b, v4.16b
-	aese	v1.16b, v4.16b
-	subs	w2, w2, #16
-	bmi	6f				/* partial block? */
-	ld1	{v2.16b}, [x1], #16		/* load next input block */
-	.if	\enc == 1
-	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
-	eor	v1.16b, v1.16b, v2.16b		/* xor with crypted ctr */
-	.else
-	eor	v2.16b, v2.16b, v1.16b		/* xor with crypted ctr */
-	eor	v1.16b, v2.16b, v5.16b		/* final round enc */
-	.endif
-	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
-	st1	{v1.16b}, [x0], #16		/* write output block */
-	bne	0b
-CPU_LE(	rev	x8, x8			)
-	st1	{v0.16b}, [x5]			/* store mac */
-	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
-5:	ret
-
-6:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
-	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
-	st1	{v0.16b}, [x5]			/* store mac */
-	add	w2, w2, #16			/* process partial tail block */
-7:	ldrb	w9, [x1], #1			/* get 1 byte of input */
-	umov	w6, v1.b[0]			/* get top crypted ctr byte */
-	umov	w7, v0.b[0]			/* get top mac byte */
-	.if	\enc == 1
-	eor	w7, w7, w9
-	eor	w9, w9, w6
-	.else
-	eor	w9, w9, w6
-	eor	w7, w7, w9
-	.endif
-	strb	w9, [x0], #1			/* store out byte */
-	strb	w7, [x5], #1			/* store mac byte */
-	subs	w2, w2, #1
-	beq	5b
-	ext	v0.16b, v0.16b, v0.16b, #1	/* shift out mac byte */
-	ext	v1.16b, v1.16b, v1.16b, #1	/* shift out ctr byte */
-	b	7b
-	.endm
-
-	/*
-	 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
-	 * 			   u8 const rk[], u32 rounds, u8 mac[],
-	 * 			   u8 ctr[]);
-	 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
-	 * 			   u8 const rk[], u32 rounds, u8 mac[],
-	 * 			   u8 ctr[]);
-	 */
-ENTRY(ce_aes_ccm_encrypt)
-	aes_ccm_do_crypt	1
-ENDPROC(ce_aes_ccm_encrypt)
-
-ENTRY(ce_aes_ccm_decrypt)
-	aes_ccm_do_crypt	0
-ENDPROC(ce_aes_ccm_decrypt)
diff --git a/arch/arm64/crypto/aes-ce-core.S b/arch/arm64/crypto/aes-ce-core.S
deleted file mode 100644
index 76a30fe4ba8b140ecfa0fc2e1feb5f3cc94fc19f..0000000000000000000000000000000000000000
--- a/arch/arm64/crypto/aes-ce-core.S
+++ /dev/null
@@ -1,84 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.arch		armv8-a+crypto
-
-ENTRY(__aes_ce_encrypt)
-	sub		w3, w3, #2
-	ld1		{v0.16b}, [x2]
-	ld1		{v1.4s}, [x0], #16
-	cmp		w3, #10
-	bmi		0f
-	bne		3f
-	mov		v3.16b, v1.16b
-	b		2f
-0:	mov		v2.16b, v1.16b
-	ld1		{v3.4s}, [x0], #16
-1:	aese		v0.16b, v2.16b
-	aesmc		v0.16b, v0.16b
-2:	ld1		{v1.4s}, [x0], #16
-	aese		v0.16b, v3.16b
-	aesmc		v0.16b, v0.16b
-3:	ld1		{v2.4s}, [x0], #16
-	subs		w3, w3, #3
-	aese		v0.16b, v1.16b
-	aesmc		v0.16b, v0.16b
-	ld1		{v3.4s}, [x0], #16
-	bpl		1b
-	aese		v0.16b, v2.16b
-	eor		v0.16b, v0.16b, v3.16b
-	st1		{v0.16b}, [x1]
-	ret
-ENDPROC(__aes_ce_encrypt)
-
-ENTRY(__aes_ce_decrypt)
-	sub		w3, w3, #2
-	ld1		{v0.16b}, [x2]
-	ld1		{v1.4s}, [x0], #16
-	cmp		w3, #10
-	bmi		0f
-	bne		3f
-	mov		v3.16b, v1.16b
-	b		2f
-0:	mov		v2.16b, v1.16b
-	ld1		{v3.4s}, [x0], #16
-1:	aesd		v0.16b, v2.16b
-	aesimc		v0.16b, v0.16b
-2:	ld1		{v1.4s}, [x0], #16
-	aesd		v0.16b, v3.16b
-	aesimc		v0.16b, v0.16b
-3:	ld1		{v2.4s}, [x0], #16
-	subs		w3, w3, #3
-	aesd		v0.16b, v1.16b
-	aesimc		v0.16b, v0.16b
-	ld1		{v3.4s}, [x0], #16
-	bpl		1b
-	aesd		v0.16b, v2.16b
-	eor		v0.16b, v0.16b, v3.16b
-	st1		{v0.16b}, [x1]
-	ret
-ENDPROC(__aes_ce_decrypt)
-
-/*
- * __aes_ce_sub() - use the aese instruction to perform the AES sbox
- *                  substitution on each byte in 'input'
- */
-ENTRY(__aes_ce_sub)
-	dup		v1.4s, w0
-	movi		v0.16b, #0
-	aese		v0.16b, v1.16b
-	umov		w0, v0.s[0]
-	ret
-ENDPROC(__aes_ce_sub)
-
-ENTRY(__aes_ce_invert)
-	ld1		{v0.4s}, [x1]
-	aesimc		v1.16b, v0.16b
-	st1		{v1.4s}, [x0]
-	ret
-ENDPROC(__aes_ce_invert)
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
deleted file mode 100644
index c132c49c89a8c4427fd2a252ddd4874a5ad44dfa..0000000000000000000000000000000000000000
--- a/arch/arm64/crypto/aes-ce.S
+++ /dev/null
@@ -1,152 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
- *                                    Crypto Extensions
- *
- * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#define AES_ENTRY(func)		ENTRY(ce_ ## func)
-#define AES_ENDPROC(func)	ENDPROC(ce_ ## func)
-
-	.arch		armv8-a+crypto
-
-	xtsmask		.req	v16
-	cbciv		.req	v16
-	vctr		.req	v16
-
-	.macro		xts_reload_mask, tmp
-	.endm
-
-	.macro		xts_cts_skip_tw, reg, lbl
-	.endm
-
-	/* preload all round keys */
-	.macro		load_round_keys, rounds, rk
-	cmp		\rounds, #12
-	blo		2222f		/* 128 bits */
-	beq		1111f		/* 192 bits */
-	ld1		{v17.4s-v18.4s}, [\rk], #32
-1111:	ld1		{v19.4s-v20.4s}, [\rk], #32
-2222:	ld1		{v21.4s-v24.4s}, [\rk], #64
-	ld1		{v25.4s-v28.4s}, [\rk], #64
-	ld1		{v29.4s-v31.4s}, [\rk]
-	.endm
-
-	/* prepare for encryption with key in rk[] */
-	.macro		enc_prepare, rounds, rk, temp
-	mov		\temp, \rk
-	load_round_keys	\rounds, \temp
-	.endm
-
-	/* prepare for encryption (again) but with new key in rk[] */
-	.macro		enc_switch_key, rounds, rk, temp
-	mov		\temp, \rk
-	load_round_keys	\rounds, \temp
-	.endm
-
-	/* prepare for decryption with key in rk[] */
-	.macro		dec_prepare, rounds, rk, temp
-	mov		\temp, \rk
-	load_round_keys	\rounds, \temp
-	.endm
-
-	.macro		do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4
-	aes\de		\i0\().16b, \k\().16b
-	aes\mc		\i0\().16b, \i0\().16b
-	.ifnb		\i1
-	aes\de		\i1\().16b, \k\().16b
-	aes\mc		\i1\().16b, \i1\().16b
-	.ifnb		\i3
-	aes\de		\i2\().16b, \k\().16b
-	aes\mc		\i2\().16b, \i2\().16b
-	aes\de		\i3\().16b, \k\().16b
-	aes\mc		\i3\().16b, \i3\().16b
-	.ifnb		\i4
-	aes\de		\i4\().16b, \k\().16b
-	aes\mc		\i4\().16b, \i4\().16b
-	.endif
-	.endif
-	.endif
-	.endm
-
-	/* up to 5 interleaved encryption rounds with the same round key */
-	.macro		round_Nx, enc, k, i0, i1, i2, i3, i4
-	.ifc		\enc, e
-	do_enc_Nx	e, mc, \k, \i0, \i1, \i2, \i3, \i4
-	.else
-	do_enc_Nx	d, imc, \k, \i0, \i1, \i2, \i3, \i4
-	.endif
-	.endm
-
-	/* up to 5 interleaved final rounds */
-	.macro		fin_round_Nx, de, k, k2, i0, i1, i2, i3, i4
-	aes\de		\i0\().16b, \k\().16b
-	.ifnb		\i1
-	aes\de		\i1\().16b, \k\().16b
-	.ifnb		\i3
-	aes\de		\i2\().16b, \k\().16b
-	aes\de		\i3\().16b, \k\().16b
-	.ifnb		\i4
-	aes\de		\i4\().16b, \k\().16b
-	.endif
-	.endif
-	.endif
-	eor		\i0\().16b, \i0\().16b, \k2\().16b
-	.ifnb		\i1
-	eor		\i1\().16b, \i1\().16b, \k2\().16b
-	.ifnb		\i3
-	eor		\i2\().16b, \i2\().16b, \k2\().16b
-	eor		\i3\().16b, \i3\().16b, \k2\().16b
-	.ifnb		\i4
-	eor		\i4\().16b, \i4\().16b, \k2\().16b
-	.endif
-	.endif
-	.endif
-	.endm
-
-	/* up to 5 interleaved blocks */
-	.macro		do_block_Nx, enc, rounds, i0, i1, i2, i3, i4
-	cmp		\rounds, #12
-	blo		2222f		/* 128 bits */
-	beq		1111f		/* 192 bits */
-	round_Nx	\enc, v17, \i0, \i1, \i2, \i3, \i4
-	round_Nx	\enc, v18, \i0, \i1, \i2, \i3, \i4
-1111:	round_Nx	\enc, v19, \i0, \i1, \i2, \i3, \i4
-	round_Nx	\enc, v20, \i0, \i1, \i2, \i3, \i4
-2222:	.irp		key, v21, v22, v23, v24, v25, v26, v27, v28, v29
-	round_Nx	\enc, \key, \i0, \i1, \i2, \i3, \i4
-	.endr
-	fin_round_Nx	\enc, v30, v31, \i0, \i1, \i2, \i3, \i4
-	.endm
-
-	.macro		encrypt_block, in, rounds, t0, t1, t2
-	do_block_Nx	e, \rounds, \in
-	.endm
-
-	.macro		encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
-	do_block_Nx	e, \rounds, \i0, \i1, \i2, \i3
-	.endm
-
-	.macro		encrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2
-	do_block_Nx	e, \rounds, \i0, \i1, \i2, \i3, \i4
-	.endm
-
-	.macro		decrypt_block, in, rounds, t0, t1, t2
-	do_block_Nx	d, \rounds, \in
-	.endm
-
-	.macro		decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
-	do_block_Nx	d, \rounds, \i0, \i1, \i2, \i3
-	.endm
-
-	.macro		decrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2
-	do_block_Nx	d, \rounds, \i0, \i1, \i2, \i3, \i4
-	.endm
-
-#define MAX_STRIDE	5
-
-#include "aes-modes.S"
diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S
deleted file mode 100644
index 423d0aebc570f0350b8a6055b769161c559f2afc..0000000000000000000000000000000000000000
--- a/arch/arm64/crypto/aes-cipher-core.S
+++ /dev/null
@@ -1,132 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Scalar AES core transform
- *
- * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/cache.h>
-
-	.text
-
-	rk		.req	x0
-	out		.req	x1
-	in		.req	x2
-	rounds		.req	x3
-	tt		.req	x2
-
-	.macro		__pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
-	.ifc		\op\shift, b0
-	ubfiz		\reg0, \in0, #2, #8
-	ubfiz		\reg1, \in1e, #2, #8
-	.else
-	ubfx		\reg0, \in0, #\shift, #8
-	ubfx		\reg1, \in1e, #\shift, #8
-	.endif
-
-	/*
-	 * AArch64 cannot do byte size indexed loads from a table containing
-	 * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
-	 * valid instruction. So perform the shift explicitly first for the
-	 * high bytes (the low byte is shifted implicitly by using ubfiz rather
-	 * than ubfx above)
-	 */
-	.ifnc		\op, b
-	ldr		\reg0, [tt, \reg0, uxtw #2]
-	ldr		\reg1, [tt, \reg1, uxtw #2]
-	.else
-	.if		\shift > 0
-	lsl		\reg0, \reg0, #2
-	lsl		\reg1, \reg1, #2
-	.endif
-	ldrb		\reg0, [tt, \reg0, uxtw]
-	ldrb		\reg1, [tt, \reg1, uxtw]
-	.endif
-	.endm
-
-	.macro		__pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
-	ubfx		\reg0, \in0, #\shift, #8
-	ubfx		\reg1, \in1d, #\shift, #8
-	ldr\op		\reg0, [tt, \reg0, uxtw #\sz]
-	ldr\op		\reg1, [tt, \reg1, uxtw #\sz]
-	.endm
-
-	.macro		__hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
-	ldp		\out0, \out1, [rk], #8
-
-	__pair\enc	\sz, \op, w12, w13, \in0, \in1, \in3, 0
-	__pair\enc	\sz, \op, w14, w15, \in1, \in2, \in0, 8
-	__pair\enc	\sz, \op, w16, w17, \in2, \in3, \in1, 16
-	__pair\enc	\sz, \op, \t0, \t1, \in3, \in0, \in2, 24
-
-	eor		\out0, \out0, w12
-	eor		\out1, \out1, w13
-	eor		\out0, \out0, w14, ror #24
-	eor		\out1, \out1, w15, ror #24
-	eor		\out0, \out0, w16, ror #16
-	eor		\out1, \out1, w17, ror #16
-	eor		\out0, \out0, \t0, ror #8
-	eor		\out1, \out1, \t1, ror #8
-	.endm
-
-	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
-	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
-	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
-	.endm
-
-	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
-	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
-	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
-	.endm
-
-	.macro		do_crypt, round, ttab, ltab, bsz
-	ldp		w4, w5, [in]
-	ldp		w6, w7, [in, #8]
-	ldp		w8, w9, [rk], #16
-	ldp		w10, w11, [rk, #-8]
-
-CPU_BE(	rev		w4, w4		)
-CPU_BE(	rev		w5, w5		)
-CPU_BE(	rev		w6, w6		)
-CPU_BE(	rev		w7, w7		)
-
-	eor		w4, w4, w8
-	eor		w5, w5, w9
-	eor		w6, w6, w10
-	eor		w7, w7, w11
-
-	adr_l		tt, \ttab
-
-	tbnz		rounds, #1, 1f
-
-0:	\round		w8, w9, w10, w11, w4, w5, w6, w7
-	\round		w4, w5, w6, w7, w8, w9, w10, w11
-
-1:	subs		rounds, rounds, #4
-	\round		w8, w9, w10, w11, w4, w5, w6, w7
-	b.ls		3f
-2:	\round		w4, w5, w6, w7, w8, w9, w10, w11
-	b		0b
-3:	adr_l		tt, \ltab
-	\round		w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
-
-CPU_BE(	rev		w4, w4		)
-CPU_BE(	rev		w5, w5		)
-CPU_BE(	rev		w6, w6		)
-CPU_BE(	rev		w7, w7		)
-
-	stp		w4, w5, [out]
-	stp		w6, w7, [out, #8]
-	ret
-	.endm
-
-ENTRY(__aes_arm64_encrypt)
-	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
-ENDPROC(__aes_arm64_encrypt)
-
-	.align		5
-ENTRY(__aes_arm64_decrypt)
-	do_crypt	iround, crypto_it_tab, crypto_aes_inv_sbox, 0
-ENDPROC(__aes_arm64_decrypt)
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
deleted file mode 100644
index 131618389f1fda7fd3744ce0584a33ce5cc20388..0000000000000000000000000000000000000000
--- a/arch/arm64/crypto/aes-modes.S
+++ /dev/null
@@ -1,679 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
- *
- * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-/* included by aes-ce.S and aes-neon.S */
-
-	.text
-	.align		4
-
-#ifndef MAX_STRIDE
-#define MAX_STRIDE	4
-#endif
-
-#if MAX_STRIDE == 4
-#define ST4(x...) x
-#define ST5(x...)
-#else
-#define ST4(x...)
-#define ST5(x...) x
-#endif
-
-aes_encrypt_block4x:
-	encrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
-	ret
-ENDPROC(aes_encrypt_block4x)
-
-aes_decrypt_block4x:
-	decrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
-	ret
-ENDPROC(aes_decrypt_block4x)
-
-#if MAX_STRIDE == 5
-aes_encrypt_block5x:
-	encrypt_block5x	v0, v1, v2, v3, v4, w3, x2, x8, w7
-	ret
-ENDPROC(aes_encrypt_block5x)
-
-aes_decrypt_block5x:
-	decrypt_block5x	v0, v1, v2, v3, v4, w3, x2, x8, w7
-	ret
-ENDPROC(aes_decrypt_block5x)
-#endif
-
-	/*
-	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks)
-	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks)
-	 */
-
-AES_ENTRY(aes_ecb_encrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
-
-	enc_prepare	w3, x2, x5
-
-.LecbencloopNx:
-	subs		w4, w4, #MAX_STRIDE
-	bmi		.Lecbenc1x
-	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
-ST4(	bl		aes_encrypt_block4x		)
-ST5(	ld1		{v4.16b}, [x1], #16		)
-ST5(	bl		aes_encrypt_block5x		)
-	st1		{v0.16b-v3.16b}, [x0], #64
-ST5(	st1		{v4.16b}, [x0], #16		)
-	b		.LecbencloopNx
-.Lecbenc1x:
-	adds		w4, w4, #MAX_STRIDE
-	beq		.Lecbencout
-.Lecbencloop:
-	ld1		{v0.16b}, [x1], #16		/* get next pt block */
-	encrypt_block	v0, w3, x2, x5, w6
-	st1		{v0.16b}, [x0], #16
-	subs		w4, w4, #1
-	bne		.Lecbencloop
-.Lecbencout:
-	ldp		x29, x30, [sp], #16
-	ret
-AES_ENDPROC(aes_ecb_encrypt)
-
-
-AES_ENTRY(aes_ecb_decrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
-
-	dec_prepare	w3, x2, x5
-
-.LecbdecloopNx:
-	subs		w4, w4, #MAX_STRIDE
-	bmi		.Lecbdec1x
-	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
-ST4(	bl		aes_decrypt_block4x		)
-ST5(	ld1		{v4.16b}, [x1], #16		)
-ST5(	bl		aes_decrypt_block5x		)
-	st1		{v0.16b-v3.16b}, [x0], #64
-ST5(	st1		{v4.16b}, [x0], #16		)
-	b		.LecbdecloopNx
-.Lecbdec1x:
-	adds		w4, w4, #MAX_STRIDE
-	beq		.Lecbdecout
-.Lecbdecloop:
-	ld1		{v0.16b}, [x1], #16		/* get next ct block */
-	decrypt_block	v0, w3, x2, x5, w6
-	st1		{v0.16b}, [x0], #16
-	subs		w4, w4, #1
-	bne		.Lecbdecloop
-.Lecbdecout:
-	ldp		x29, x30, [sp], #16
-	ret
-AES_ENDPROC(aes_ecb_decrypt)
-
-
-	/*
-	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, u8 iv[])
-	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, u8 iv[])
-	 * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[],
-	 *			 int rounds, int blocks, u8 iv[],
-	 *			 u32 const rk2[]);
-	 * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[],
-	 *			 int rounds, int blocks, u8 iv[],
-	 *			 u32 const rk2[]);
-	 */
-
-AES_ENTRY(aes_essiv_cbc_encrypt)
-	ld1		{v4.16b}, [x5]			/* get iv */
-
-	mov		w8, #14				/* AES-256: 14 rounds */
-	enc_prepare	w8, x6, x7
-	encrypt_block	v4, w8, x6, x7, w9
-	enc_switch_key	w3, x2, x6
-	b		.Lcbcencloop4x
-
-AES_ENTRY(aes_cbc_encrypt)
-	ld1		{v4.16b}, [x5]			/* get iv */
-	enc_prepare	w3, x2, x6
-
-.Lcbcencloop4x:
-	subs		w4, w4, #4
-	bmi		.Lcbcenc1x
-	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
-	eor		v0.16b, v0.16b, v4.16b		/* ..and xor with iv */
-	encrypt_block	v0, w3, x2, x6, w7
-	eor		v1.16b, v1.16b, v0.16b
-	encrypt_block	v1, w3, x2, x6, w7
-	eor		v2.16b, v2.16b, v1.16b
-	encrypt_block	v2, w3, x2, x6, w7
-	eor		v3.16b, v3.16b, v2.16b
-	encrypt_block	v3, w3, x2, x6, w7
-	st1		{v0.16b-v3.16b}, [x0], #64
-	mov		v4.16b, v3.16b
-	b		.Lcbcencloop4x
-.Lcbcenc1x:
-	adds		w4, w4, #4
-	beq		.Lcbcencout
-.Lcbcencloop:
-	ld1		{v0.16b}, [x1], #16		/* get next pt block */
-	eor		v4.16b, v4.16b, v0.16b		/* ..and xor with iv */
-	encrypt_block	v4, w3, x2, x6, w7
-	st1		{v4.16b}, [x0], #16
-	subs		w4, w4, #1
-	bne		.Lcbcencloop
-.Lcbcencout:
-	st1		{v4.16b}, [x5]			/* return iv */
-	ret
-AES_ENDPROC(aes_cbc_encrypt)
-AES_ENDPROC(aes_essiv_cbc_encrypt)
-
-AES_ENTRY(aes_essiv_cbc_decrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
-
-	ld1		{cbciv.16b}, [x5]		/* get iv */
-
-	mov		w8, #14				/* AES-256: 14 rounds */
-	enc_prepare	w8, x6, x7
-	encrypt_block	cbciv, w8, x6, x7, w9
-	b		.Lessivcbcdecstart
-
-AES_ENTRY(aes_cbc_decrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
-
-	ld1		{cbciv.16b}, [x5]		/* get iv */
-.Lessivcbcdecstart:
-	dec_prepare	w3, x2, x6
-
-.LcbcdecloopNx:
-	subs		w4, w4, #MAX_STRIDE
-	bmi		.Lcbcdec1x
-	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
-#if MAX_STRIDE == 5
-	ld1		{v4.16b}, [x1], #16		/* get 1 ct block */
-	mov		v5.16b, v0.16b
-	mov		v6.16b, v1.16b
-	mov		v7.16b, v2.16b
-	bl		aes_decrypt_block5x
-	sub		x1, x1, #32
-	eor		v0.16b, v0.16b, cbciv.16b
-	eor		v1.16b, v1.16b, v5.16b
-	ld1		{v5.16b}, [x1], #16		/* reload 1 ct block */
-	ld1		{cbciv.16b}, [x1], #16		/* reload 1 ct block */
-	eor		v2.16b, v2.16b, v6.16b
-	eor		v3.16b, v3.16b, v7.16b
-	eor		v4.16b, v4.16b, v5.16b
-#else
-	mov		v4.16b, v0.16b
-	mov		v5.16b, v1.16b
-	mov		v6.16b, v2.16b
-	bl		aes_decrypt_block4x
-	sub		x1, x1, #16
-	eor		v0.16b, v0.16b, cbciv.16b
-	eor		v1.16b, v1.16b, v4.16b
-	ld1		{cbciv.16b}, [x1], #16		/* reload 1 ct block */
-	eor		v2.16b, v2.16b, v5.16b
-	eor		v3.16b, v3.16b, v6.16b
-#endif
-	st1		{v0.16b-v3.16b}, [x0], #64
-ST5(	st1		{v4.16b}, [x0], #16		)
-	b		.LcbcdecloopNx
-.Lcbcdec1x:
-	adds		w4, w4, #MAX_STRIDE
-	beq		.Lcbcdecout
-.Lcbcdecloop:
-	ld1		{v1.16b}, [x1], #16		/* get next ct block */
-	mov		v0.16b, v1.16b			/* ...and copy to v0 */
-	decrypt_block	v0, w3, x2, x6, w7
-	eor		v0.16b, v0.16b, cbciv.16b	/* xor with iv => pt */
-	mov		cbciv.16b, v1.16b		/* ct is next iv */
-	st1		{v0.16b}, [x0], #16
-	subs		w4, w4, #1
-	bne		.Lcbcdecloop
-.Lcbcdecout:
-	st1		{cbciv.16b}, [x5]		/* return iv */
-	ldp		x29, x30, [sp], #16
-	ret
-AES_ENDPROC(aes_cbc_decrypt)
-AES_ENDPROC(aes_essiv_cbc_decrypt)
-
-
-	/*
-	 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
-	 *		       int rounds, int bytes, u8 const iv[])
-	 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
-	 *		       int rounds, int bytes, u8 const iv[])
-	 */
-
-AES_ENTRY(aes_cbc_cts_encrypt)
-	adr_l		x8, .Lcts_permute_table
-	sub		x4, x4, #16
-	add		x9, x8, #32
-	add		x8, x8, x4
-	sub		x9, x9, x4
-	ld1		{v3.16b}, [x8]
-	ld1		{v4.16b}, [x9]
-
-	ld1		{v0.16b}, [x1], x4		/* overlapping loads */
-	ld1		{v1.16b}, [x1]
-
-	ld1		{v5.16b}, [x5]			/* get iv */
-	enc_prepare	w3, x2, x6
-
-	eor		v0.16b, v0.16b, v5.16b		/* xor with iv */
-	tbl		v1.16b, {v1.16b}, v4.16b
-	encrypt_block	v0, w3, x2, x6, w7
-
-	eor		v1.16b, v1.16b, v0.16b
-	tbl		v0.16b, {v0.16b}, v3.16b
-	encrypt_block	v1, w3, x2, x6, w7
-
-	add		x4, x0, x4
-	st1		{v0.16b}, [x4]			/* overlapping stores */
-	st1		{v1.16b}, [x0]
-	ret
-AES_ENDPROC(aes_cbc_cts_encrypt)
-
-AES_ENTRY(aes_cbc_cts_decrypt)
-	adr_l		x8, .Lcts_permute_table
-	sub		x4, x4, #16
-	add		x9, x8, #32
-	add		x8, x8, x4
-	sub		x9, x9, x4
-	ld1		{v3.16b}, [x8]
-	ld1		{v4.16b}, [x9]
-
-	ld1		{v0.16b}, [x1], x4		/* overlapping loads */
-	ld1		{v1.16b}, [x1]
-
-	ld1		{v5.16b}, [x5]			/* get iv */
-	dec_prepare	w3, x2, x6
-
-	decrypt_block	v0, w3, x2, x6, w7
-	tbl		v2.16b, {v0.16b}, v3.16b
-	eor		v2.16b, v2.16b, v1.16b
-
-	tbx		v0.16b, {v1.16b}, v4.16b
-	decrypt_block	v0, w3, x2, x6, w7
-	eor		v0.16b, v0.16b, v5.16b		/* xor with iv */
-
-	add		x4, x0, x4
-	st1		{v2.16b}, [x4]			/* overlapping stores */
-	st1		{v0.16b}, [x0]
-	ret
-AES_ENDPROC(aes_cbc_cts_decrypt)
-
-	.section	".rodata", "a"
-	.align		6
-.Lcts_permute_table:
-	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
-	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
-	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
-	.previous
-
-
-	/*
-	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, u8 ctr[])
-	 */
-
-AES_ENTRY(aes_ctr_encrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
-
-	enc_prepare	w3, x2, x6
-	ld1		{vctr.16b}, [x5]
-
-	umov		x6, vctr.d[1]		/* keep swabbed ctr in reg */
-	rev		x6, x6
-	cmn		w6, w4			/* 32 bit overflow? */
-	bcs		.Lctrloop
-.LctrloopNx:
-	subs		w4, w4, #MAX_STRIDE
-	bmi		.Lctr1x
-	add		w7, w6, #1
-	mov		v0.16b, vctr.16b
-	add		w8, w6, #2
-	mov		v1.16b, vctr.16b
-	add		w9, w6, #3
-	mov		v2.16b, vctr.16b
-	add		w9, w6, #3
-	rev		w7, w7
-	mov		v3.16b, vctr.16b
-	rev		w8, w8
-ST5(	mov		v4.16b, vctr.16b		)
-	mov		v1.s[3], w7
-	rev		w9, w9
-ST5(	add		w10, w6, #4			)
-	mov		v2.s[3], w8
-ST5(	rev		w10, w10			)
-	mov		v3.s[3], w9
-ST5(	mov		v4.s[3], w10			)
-	ld1		{v5.16b-v7.16b}, [x1], #48	/* get 3 input blocks */
-ST4(	bl		aes_encrypt_block4x		)
-ST5(	bl		aes_encrypt_block5x		)
-	eor		v0.16b, v5.16b, v0.16b
-ST4(	ld1		{v5.16b}, [x1], #16		)
-	eor		v1.16b, v6.16b, v1.16b
-ST5(	ld1		{v5.16b-v6.16b}, [x1], #32	)
-	eor		v2.16b, v7.16b, v2.16b
-	eor		v3.16b, v5.16b, v3.16b
-ST5(	eor		v4.16b, v6.16b, v4.16b		)
-	st1		{v0.16b-v3.16b}, [x0], #64
-ST5(	st1		{v4.16b}, [x0], #16		)
-	add		x6, x6, #MAX_STRIDE
-	rev		x7, x6
-	ins		vctr.d[1], x7
-	cbz		w4, .Lctrout
-	b		.LctrloopNx
-.Lctr1x:
-	adds		w4, w4, #MAX_STRIDE
-	beq		.Lctrout
-.Lctrloop:
-	mov		v0.16b, vctr.16b
-	encrypt_block	v0, w3, x2, x8, w7
-
-	adds		x6, x6, #1		/* increment BE ctr */
-	rev		x7, x6
-	ins		vctr.d[1], x7
-	bcs		.Lctrcarry		/* overflow? */
-
-.Lctrcarrydone:
-	subs		w4, w4, #1
-	bmi		.Lctrtailblock		/* blocks <0 means tail block */
-	ld1		{v3.16b}, [x1], #16
-	eor		v3.16b, v0.16b, v3.16b
-	st1		{v3.16b}, [x0], #16
-	bne		.Lctrloop
-
-.Lctrout:
-	st1		{vctr.16b}, [x5]	/* return next CTR value */
-	ldp		x29, x30, [sp], #16
-	ret
-
-.Lctrtailblock:
-	st1		{v0.16b}, [x0]
-	b		.Lctrout
-
-.Lctrcarry:
-	umov		x7, vctr.d[0]		/* load upper word of ctr  */
-	rev		x7, x7			/* ... to handle the carry */
-	add		x7, x7, #1
-	rev		x7, x7
-	ins		vctr.d[0], x7
-	b		.Lctrcarrydone
-AES_ENDPROC(aes_ctr_encrypt)
-
-
-	/*
-	 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
-	 *		   int bytes, u8 const rk2[], u8 iv[], int first)
-	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
-	 *		   int bytes, u8 const rk2[], u8 iv[], int first)
-	 */
-
-	.macro		next_tweak, out, in, tmp
-	sshr		\tmp\().2d,  \in\().2d,   #63
-	and		\tmp\().16b, \tmp\().16b, xtsmask.16b
-	add		\out\().2d,  \in\().2d,   \in\().2d
-	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
-	eor		\out\().16b, \out\().16b, \tmp\().16b
-	.endm
-
-	.macro		xts_load_mask, tmp
-	movi		xtsmask.2s, #0x1
-	movi		\tmp\().2s, #0x87
-	uzp1		xtsmask.4s, xtsmask.4s, \tmp\().4s
-	.endm
-
-AES_ENTRY(aes_xts_encrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
-
-	ld1		{v4.16b}, [x6]
-	xts_load_mask	v8
-	cbz		w7, .Lxtsencnotfirst
-
-	enc_prepare	w3, x5, x8
-	xts_cts_skip_tw	w7, .LxtsencNx
-	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
-	enc_switch_key	w3, x2, x8
-	b		.LxtsencNx
-
-.Lxtsencnotfirst:
-	enc_prepare	w3, x2, x8
-.LxtsencloopNx:
-	next_tweak	v4, v4, v8
-.LxtsencNx:
-	subs		w4, w4, #64
-	bmi		.Lxtsenc1x
-	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
-	next_tweak	v5, v4, v8
-	eor		v0.16b, v0.16b, v4.16b
-	next_tweak	v6, v5, v8
-	eor		v1.16b, v1.16b, v5.16b
-	eor		v2.16b, v2.16b, v6.16b
-	next_tweak	v7, v6, v8
-	eor		v3.16b, v3.16b, v7.16b
-	bl		aes_encrypt_block4x
-	eor		v3.16b, v3.16b, v7.16b
-	eor		v0.16b, v0.16b, v4.16b
-	eor		v1.16b, v1.16b, v5.16b
-	eor		v2.16b, v2.16b, v6.16b
-	st1		{v0.16b-v3.16b}, [x0], #64
-	mov		v4.16b, v7.16b
-	cbz		w4, .Lxtsencret
-	xts_reload_mask	v8
-	b		.LxtsencloopNx
-.Lxtsenc1x:
-	adds		w4, w4, #64
-	beq		.Lxtsencout
-	subs		w4, w4, #16
-	bmi		.LxtsencctsNx
-.Lxtsencloop:
-	ld1		{v0.16b}, [x1], #16
-.Lxtsencctsout:
-	eor		v0.16b, v0.16b, v4.16b
-	encrypt_block	v0, w3, x2, x8, w7
-	eor		v0.16b, v0.16b, v4.16b
-	cbz		w4, .Lxtsencout
-	subs		w4, w4, #16
-	next_tweak	v4, v4, v8
-	bmi		.Lxtsenccts
-	st1		{v0.16b}, [x0], #16
-	b		.Lxtsencloop
-.Lxtsencout:
-	st1		{v0.16b}, [x0]
-.Lxtsencret:
-	st1		{v4.16b}, [x6]
-	ldp		x29, x30, [sp], #16
-	ret
-
-.LxtsencctsNx:
-	mov		v0.16b, v3.16b
-	sub		x0, x0, #16
-.Lxtsenccts:
-	adr_l		x8, .Lcts_permute_table
-
-	add		x1, x1, w4, sxtw	/* rewind input pointer */
-	add		w4, w4, #16		/* # bytes in final block */
-	add		x9, x8, #32
-	add		x8, x8, x4
-	sub		x9, x9, x4
-	add		x4, x0, x4		/* output address of final block */
-
-	ld1		{v1.16b}, [x1]		/* load final block */
-	ld1		{v2.16b}, [x8]
-	ld1		{v3.16b}, [x9]
-
-	tbl		v2.16b, {v0.16b}, v2.16b
-	tbx		v0.16b, {v1.16b}, v3.16b
-	st1		{v2.16b}, [x4]			/* overlapping stores */
-	mov		w4, wzr
-	b		.Lxtsencctsout
-AES_ENDPROC(aes_xts_encrypt)
-
-AES_ENTRY(aes_xts_decrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
-
-	/* subtract 16 bytes if we are doing CTS */
-	sub		w8, w4, #0x10
-	tst		w4, #0xf
-	csel		w4, w4, w8, eq
-
-	ld1		{v4.16b}, [x6]
-	xts_load_mask	v8
-	xts_cts_skip_tw	w7, .Lxtsdecskiptw
-	cbz		w7, .Lxtsdecnotfirst
-
-	enc_prepare	w3, x5, x8
-	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
-.Lxtsdecskiptw:
-	dec_prepare	w3, x2, x8
-	b		.LxtsdecNx
-
-.Lxtsdecnotfirst:
-	dec_prepare	w3, x2, x8
-.LxtsdecloopNx:
-	next_tweak	v4, v4, v8
-.LxtsdecNx:
-	subs		w4, w4, #64
-	bmi		.Lxtsdec1x
-	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
-	next_tweak	v5, v4, v8
-	eor		v0.16b, v0.16b, v4.16b
-	next_tweak	v6, v5, v8
-	eor		v1.16b, v1.16b, v5.16b
-	eor		v2.16b, v2.16b, v6.16b
-	next_tweak	v7, v6, v8
-	eor		v3.16b, v3.16b, v7.16b
-	bl		aes_decrypt_block4x
-	eor		v3.16b, v3.16b, v7.16b
-	eor		v0.16b, v0.16b, v4.16b
-	eor		v1.16b, v1.16b, v5.16b
-	eor		v2.16b, v2.16b, v6.16b
-	st1		{v0.16b-v3.16b}, [x0], #64
-	mov		v4.16b, v7.16b
-	cbz		w4, .Lxtsdecout
-	xts_reload_mask	v8
-	b		.LxtsdecloopNx
-.Lxtsdec1x:
-	adds		w4, w4, #64
-	beq		.Lxtsdecout
-	subs		w4, w4, #16
-.Lxtsdecloop:
-	ld1		{v0.16b}, [x1], #16
-	bmi		.Lxtsdeccts
-.Lxtsdecctsout:
-	eor		v0.16b, v0.16b, v4.16b
-	decrypt_block	v0, w3, x2, x8, w7
-	eor		v0.16b, v0.16b, v4.16b
-	st1		{v0.16b}, [x0], #16
-	cbz		w4, .Lxtsdecout
-	subs		w4, w4, #16
-	next_tweak	v4, v4, v8
-	b		.Lxtsdecloop
-.Lxtsdecout:
-	st1		{v4.16b}, [x6]
-	ldp		x29, x30, [sp], #16
-	ret
-
-.Lxtsdeccts:
-	adr_l		x8, .Lcts_permute_table
-
-	add		x1, x1, w4, sxtw	/* rewind input pointer */
-	add		w4, w4, #16		/* # bytes in final block */
-	add		x9, x8, #32
-	add		x8, x8, x4
-	sub		x9, x9, x4
-	add		x4, x0, x4		/* output address of final block */
-
-	next_tweak	v5, v4, v8
-
-	ld1		{v1.16b}, [x1]		/* load final block */
-	ld1		{v2.16b}, [x8]
-	ld1		{v3.16b}, [x9]
-
-	eor		v0.16b, v0.16b, v5.16b
-	decrypt_block	v0, w3, x2, x8, w7
-	eor		v0.16b, v0.16b, v5.16b
-
-	tbl		v2.16b, {v0.16b}, v2.16b
-	tbx		v0.16b, {v1.16b}, v3.16b
-
-	st1		{v2.16b}, [x4]			/* overlapping stores */
-	mov		w4, wzr
-	b		.Lxtsdecctsout
-AES_ENDPROC(aes_xts_decrypt)
-
-	/*
-	 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
-	 *		  int blocks, u8 dg[], int enc_before, int enc_after)
-	 */
-AES_ENTRY(aes_mac_update)
-	frame_push	6
-
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-	mov		x24, x6
-
-	ld1		{v0.16b}, [x23]			/* get dg */
-	enc_prepare	w2, x1, x7
-	cbz		w5, .Lmacloop4x
-
-	encrypt_block	v0, w2, x1, x7, w8
-
-.Lmacloop4x:
-	subs		w22, w22, #4
-	bmi		.Lmac1x
-	ld1		{v1.16b-v4.16b}, [x19], #64	/* get next pt block */
-	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
-	encrypt_block	v0, w21, x20, x7, w8
-	eor		v0.16b, v0.16b, v2.16b
-	encrypt_block	v0, w21, x20, x7, w8
-	eor		v0.16b, v0.16b, v3.16b
-	encrypt_block	v0, w21, x20, x7, w8
-	eor		v0.16b, v0.16b, v4.16b
-	cmp		w22, wzr
-	csinv		x5, x24, xzr, eq
-	cbz		w5, .Lmacout
-	encrypt_block	v0, w21, x20, x7, w8
-	st1		{v0.16b}, [x23]			/* return dg */
-	cond_yield_neon	.Lmacrestart
-	b		.Lmacloop4x
-.Lmac1x:
-	add		w22, w22, #4
-.Lmacloop:
-	cbz		w22, .Lmacout
-	ld1		{v1.16b}, [x19], #16		/* get next pt block */
-	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
-
-	subs		w22, w22, #1
-	csinv		x5, x24, xzr, eq
-	cbz		w5, .Lmacout
-
-.Lmacenc:
-	encrypt_block	v0, w21, x20, x7, w8
-	b		.Lmacloop
-
-.Lmacout:
-	st1		{v0.16b}, [x23]			/* return dg */
-	frame_pop
-	ret
-
-.Lmacrestart:
-	ld1		{v0.16b}, [x23]			/* get dg */
-	enc_prepare	w21, x20, x0
-	b		.Lmacloop4x
-AES_ENDPROC(aes_mac_update)
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
deleted file mode 100644
index 22d9b110cf78b1838ea1ef5c7643a9817e737756..0000000000000000000000000000000000000000
--- a/arch/arm64/crypto/aes-neon.S
+++ /dev/null
@@ -1,250 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
- *
- * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#define AES_ENTRY(func)		ENTRY(neon_ ## func)
-#define AES_ENDPROC(func)	ENDPROC(neon_ ## func)
-
-	xtsmask		.req	v7
-	cbciv		.req	v7
-	vctr		.req	v4
-
-	.macro		xts_reload_mask, tmp
-	xts_load_mask	\tmp
-	.endm
-
-	/* special case for the neon-bs driver calling into this one for CTS */
-	.macro		xts_cts_skip_tw, reg, lbl
-	tbnz		\reg, #1, \lbl
-	.endm
-
-	/* multiply by polynomial 'x' in GF(2^8) */
-	.macro		mul_by_x, out, in, temp, const
-	sshr		\temp, \in, #7
-	shl		\out, \in, #1
-	and		\temp, \temp, \const
-	eor		\out, \out, \temp
-	.endm
-
-	/* multiply by polynomial 'x^2' in GF(2^8) */
-	.macro		mul_by_x2, out, in, temp, const
-	ushr		\temp, \in, #6
-	shl		\out, \in, #2
-	pmul		\temp, \temp, \const
-	eor		\out, \out, \temp
-	.endm
-
-	/* preload the entire Sbox */
-	.macro		prepare, sbox, shiftrows, temp
-	movi		v12.16b, #0x1b
-	ldr_l		q13, \shiftrows, \temp
-	ldr_l		q14, .Lror32by8, \temp
-	adr_l		\temp, \sbox
-	ld1		{v16.16b-v19.16b}, [\temp], #64
-	ld1		{v20.16b-v23.16b}, [\temp], #64
-	ld1		{v24.16b-v27.16b}, [\temp], #64
-	ld1		{v28.16b-v31.16b}, [\temp]
-	.endm
-
-	/* do preload for encryption */
-	.macro		enc_prepare, ignore0, ignore1, temp
-	prepare		crypto_aes_sbox, .LForward_ShiftRows, \temp
-	.endm
-
-	.macro		enc_switch_key, ignore0, ignore1, temp
-	/* do nothing */
-	.endm
-
-	/* do preload for decryption */
-	.macro		dec_prepare, ignore0, ignore1, temp
-	prepare		crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
-	.endm
-
-	/* apply SubBytes transformation using the the preloaded Sbox */
-	.macro		sub_bytes, in
-	sub		v9.16b, \in\().16b, v15.16b
-	tbl		\in\().16b, {v16.16b-v19.16b}, \in\().16b
-	sub		v10.16b, v9.16b, v15.16b
-	tbx		\in\().16b, {v20.16b-v23.16b}, v9.16b
-	sub		v11.16b, v10.16b, v15.16b
-	tbx		\in\().16b, {v24.16b-v27.16b}, v10.16b
-	tbx		\in\().16b, {v28.16b-v31.16b}, v11.16b
-	.endm
-
-	/* apply MixColumns transformation */
-	.macro		mix_columns, in, enc
-	.if		\enc == 0
-	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
-	mul_by_x2	v8.16b, \in\().16b, v9.16b, v12.16b
-	eor		\in\().16b, \in\().16b, v8.16b
-	rev32		v8.8h, v8.8h
-	eor		\in\().16b, \in\().16b, v8.16b
-	.endif
-
-	mul_by_x	v9.16b, \in\().16b, v8.16b, v12.16b
-	rev32		v8.8h, \in\().8h
-	eor		v8.16b, v8.16b, v9.16b
-	eor		\in\().16b, \in\().16b, v8.16b
-	tbl		\in\().16b, {\in\().16b}, v14.16b
-	eor		\in\().16b, \in\().16b, v8.16b
-	.endm
-
-	.macro		do_block, enc, in, rounds, rk, rkp, i
-	ld1		{v15.4s}, [\rk]
-	add		\rkp, \rk, #16
-	mov		\i, \rounds
-1111:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
-	movi		v15.16b, #0x40
-	tbl		\in\().16b, {\in\().16b}, v13.16b	/* ShiftRows */
-	sub_bytes	\in
-	subs		\i, \i, #1
-	ld1		{v15.4s}, [\rkp], #16
-	beq		2222f
-	mix_columns	\in, \enc
-	b		1111b
-2222:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
-	.endm
-
-	.macro		encrypt_block, in, rounds, rk, rkp, i
-	do_block	1, \in, \rounds, \rk, \rkp, \i
-	.endm
-
-	.macro		decrypt_block, in, rounds, rk, rkp, i
-	do_block	0, \in, \rounds, \rk, \rkp, \i
-	.endm
-
-	/*
-	 * Interleaved versions: functionally equivalent to the
-	 * ones above, but applied to AES states in parallel.
-	 */
-
-	.macro		sub_bytes_4x, in0, in1, in2, in3
-	sub		v8.16b, \in0\().16b, v15.16b
-	tbl		\in0\().16b, {v16.16b-v19.16b}, \in0\().16b
-	sub		v9.16b, \in1\().16b, v15.16b
-	tbl		\in1\().16b, {v16.16b-v19.16b}, \in1\().16b
-	sub		v10.16b, \in2\().16b, v15.16b
-	tbl		\in2\().16b, {v16.16b-v19.16b}, \in2\().16b
-	sub		v11.16b, \in3\().16b, v15.16b
-	tbl		\in3\().16b, {v16.16b-v19.16b}, \in3\().16b
-	tbx		\in0\().16b, {v20.16b-v23.16b}, v8.16b
-	tbx		\in1\().16b, {v20.16b-v23.16b}, v9.16b
-	sub		v8.16b, v8.16b, v15.16b
-	tbx		\in2\().16b, {v20.16b-v23.16b}, v10.16b
-	sub		v9.16b, v9.16b, v15.16b
-	tbx		\in3\().16b, {v20.16b-v23.16b}, v11.16b
-	sub		v10.16b, v10.16b, v15.16b
-	tbx		\in0\().16b, {v24.16b-v27.16b}, v8.16b
-	sub		v11.16b, v11.16b, v15.16b
-	tbx		\in1\().16b, {v24.16b-v27.16b}, v9.16b
-	sub		v8.16b, v8.16b, v15.16b
-	tbx		\in2\().16b, {v24.16b-v27.16b}, v10.16b
-	sub		v9.16b, v9.16b, v15.16b
-	tbx		\in3\().16b, {v24.16b-v27.16b}, v11.16b
-	sub		v10.16b, v10.16b, v15.16b
-	tbx		\in0\().16b, {v28.16b-v31.16b}, v8.16b
-	sub		v11.16b, v11.16b, v15.16b
-	tbx		\in1\().16b, {v28.16b-v31.16b}, v9.16b
-	tbx		\in2\().16b, {v28.16b-v31.16b}, v10.16b
-	tbx		\in3\().16b, {v28.16b-v31.16b}, v11.16b
-	.endm
-
-	.macro		mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
-	sshr		\tmp0\().16b, \in0\().16b, #7
-	shl		\out0\().16b, \in0\().16b, #1
-	sshr		\tmp1\().16b, \in1\().16b, #7
-	and		\tmp0\().16b, \tmp0\().16b, \const\().16b
-	shl		\out1\().16b, \in1\().16b, #1
-	and		\tmp1\().16b, \tmp1\().16b, \const\().16b
-	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
-	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
-	.endm
-
-	.macro		mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
-	ushr		\tmp0\().16b, \in0\().16b, #6
-	shl		\out0\().16b, \in0\().16b, #2
-	ushr		\tmp1\().16b, \in1\().16b, #6
-	pmul		\tmp0\().16b, \tmp0\().16b, \const\().16b
-	shl		\out1\().16b, \in1\().16b, #2
-	pmul		\tmp1\().16b, \tmp1\().16b, \const\().16b
-	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
-	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
-	.endm
-
-	.macro		mix_columns_2x, in0, in1, enc
-	.if		\enc == 0
-	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
-	mul_by_x2_2x	v8, v9, \in0, \in1, v10, v11, v12
-	eor		\in0\().16b, \in0\().16b, v8.16b
-	rev32		v8.8h, v8.8h
-	eor		\in1\().16b, \in1\().16b, v9.16b
-	rev32		v9.8h, v9.8h
-	eor		\in0\().16b, \in0\().16b, v8.16b
-	eor		\in1\().16b, \in1\().16b, v9.16b
-	.endif
-
-	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v12
-	rev32		v10.8h, \in0\().8h
-	rev32		v11.8h, \in1\().8h
-	eor		v10.16b, v10.16b, v8.16b
-	eor		v11.16b, v11.16b, v9.16b
-	eor		\in0\().16b, \in0\().16b, v10.16b
-	eor		\in1\().16b, \in1\().16b, v11.16b
-	tbl		\in0\().16b, {\in0\().16b}, v14.16b
-	tbl		\in1\().16b, {\in1\().16b}, v14.16b
-	eor		\in0\().16b, \in0\().16b, v10.16b
-	eor		\in1\().16b, \in1\().16b, v11.16b
-	.endm
-
-	.macro		do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
-	ld1		{v15.4s}, [\rk]
-	add		\rkp, \rk, #16
-	mov		\i, \rounds
-1111:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
-	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
-	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
-	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
-	movi		v15.16b, #0x40
-	tbl		\in0\().16b, {\in0\().16b}, v13.16b	/* ShiftRows */
-	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
-	tbl		\in2\().16b, {\in2\().16b}, v13.16b	/* ShiftRows */
-	tbl		\in3\().16b, {\in3\().16b}, v13.16b	/* ShiftRows */
-	sub_bytes_4x	\in0, \in1, \in2, \in3
-	subs		\i, \i, #1
-	ld1		{v15.4s}, [\rkp], #16
-	beq		2222f
-	mix_columns_2x	\in0, \in1, \enc
-	mix_columns_2x	\in2, \in3, \enc
-	b		1111b
-2222:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
-	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
-	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
-	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
-	.endm
-
-	.macro		encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
-	do_block_4x	1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
-	.endm
-
-	.macro		decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
-	do_block_4x	0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
-	.endm
-
-#include "aes-modes.S"
-
-	.section	".rodata", "a"
-	.align		4
-.LForward_ShiftRows:
-	.octa		0x0b06010c07020d08030e09040f0a0500
-
-.LReverse_ShiftRows:
-	.octa		0x0306090c0f0205080b0e0104070a0d00
-
-.Lror32by8:
-	.octa		0x0c0f0e0d080b0a090407060500030201
diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
deleted file mode 100644
index 65982039fa3653faf9d9dc369c8b163b5ae95cc1..0000000000000000000000000000000000000000
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ /dev/null
@@ -1,1005 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Bit sliced AES using NEON instructions
- *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-/*
- * The algorithm implemented here is described in detail by the paper
- * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
- * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
- *
- * This implementation is based primarily on the OpenSSL implementation
- * for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.text
-
-	rounds		.req	x11
-	bskey		.req	x12
-
-	.macro		in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
-	eor		\b2, \b2, \b1
-	eor		\b5, \b5, \b6
-	eor		\b3, \b3, \b0
-	eor		\b6, \b6, \b2
-	eor		\b5, \b5, \b0
-	eor		\b6, \b6, \b3
-	eor		\b3, \b3, \b7
-	eor		\b7, \b7, \b5
-	eor		\b3, \b3, \b4
-	eor		\b4, \b4, \b5
-	eor		\b2, \b2, \b7
-	eor		\b3, \b3, \b1
-	eor		\b1, \b1, \b5
-	.endm
-
-	.macro		out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
-	eor		\b0, \b0, \b6
-	eor		\b1, \b1, \b4
-	eor		\b4, \b4, \b6
-	eor		\b2, \b2, \b0
-	eor		\b6, \b6, \b1
-	eor		\b1, \b1, \b5
-	eor		\b5, \b5, \b3
-	eor		\b3, \b3, \b7
-	eor		\b7, \b7, \b5
-	eor		\b2, \b2, \b5
-	eor		\b4, \b4, \b7
-	.endm
-
-	.macro		inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
-	eor		\b1, \b1, \b7
-	eor		\b4, \b4, \b7
-	eor		\b7, \b7, \b5
-	eor		\b1, \b1, \b3
-	eor		\b2, \b2, \b5
-	eor		\b3, \b3, \b7
-	eor		\b6, \b6, \b1
-	eor		\b2, \b2, \b0
-	eor		\b5, \b5, \b3
-	eor		\b4, \b4, \b6
-	eor		\b0, \b0, \b6
-	eor		\b1, \b1, \b4
-	.endm
-
-	.macro		inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
-	eor		\b1, \b1, \b5
-	eor		\b2, \b2, \b7
-	eor		\b3, \b3, \b1
-	eor		\b4, \b4, \b5
-	eor		\b7, \b7, \b5
-	eor		\b3, \b3, \b4
-	eor 		\b5, \b5, \b0
-	eor		\b3, \b3, \b7
-	eor		\b6, \b6, \b2
-	eor		\b2, \b2, \b1
-	eor		\b6, \b6, \b3
-	eor		\b3, \b3, \b0
-	eor		\b5, \b5, \b6
-	.endm
-
-	.macro		mul_gf4, x0, x1, y0, y1, t0, t1
-	eor 		\t0, \y0, \y1
-	and		\t0, \t0, \x0
-	eor		\x0, \x0, \x1
-	and		\t1, \x1, \y0
-	and		\x0, \x0, \y1
-	eor		\x1, \t1, \t0
-	eor		\x0, \x0, \t1
-	.endm
-
-	.macro		mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
-	eor		\t0, \y0, \y1
-	eor 		\t1, \y2, \y3
-	and		\t0, \t0, \x0
-	and		\t1, \t1, \x2
-	eor		\x0, \x0, \x1
-	eor		\x2, \x2, \x3
-	and		\x1, \x1, \y0
-	and		\x3, \x3, \y2
-	and		\x0, \x0, \y1
-	and		\x2, \x2, \y3
-	eor		\x1, \x1, \x0
-	eor		\x2, \x2, \x3
-	eor		\x0, \x0, \t0
-	eor		\x3, \x3, \t1
-	.endm
-
-	.macro		mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
-				    y0, y1, y2, y3, t0, t1, t2, t3
-	eor		\t0, \x0, \x2
-	eor		\t1, \x1, \x3
-	mul_gf4  	\x0, \x1, \y0, \y1, \t2, \t3
-	eor		\y0, \y0, \y2
-	eor		\y1, \y1, \y3
-	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
-	eor		\x0, \x0, \t0
-	eor		\x2, \x2, \t0
-	eor		\x1, \x1, \t1
-	eor		\x3, \x3, \t1
-	eor		\t0, \x4, \x6
-	eor		\t1, \x5, \x7
-	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
-	eor		\y0, \y0, \y2
-	eor		\y1, \y1, \y3
-	mul_gf4  	\x4, \x5, \y0, \y1, \t2, \t3
-	eor		\x4, \x4, \t0
-	eor		\x6, \x6, \t0
-	eor		\x5, \x5, \t1
-	eor		\x7, \x7, \t1
-	.endm
-
-	.macro		inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
-				   t0, t1, t2, t3, s0, s1, s2, s3
-	eor		\t3, \x4, \x6
-	eor		\t0, \x5, \x7
-	eor		\t1, \x1, \x3
-	eor		\s1, \x7, \x6
-	eor		\s0, \x0, \x2
-	eor		\s3, \t3, \t0
-	orr		\t2, \t0, \t1
-	and		\s2, \t3, \s0
-	orr		\t3, \t3, \s0
-	eor		\s0, \s0, \t1
-	and		\t0, \t0, \t1
-	eor		\t1, \x3, \x2
-	and		\s3, \s3, \s0
-	and		\s1, \s1, \t1
-	eor		\t1, \x4, \x5
-	eor		\s0, \x1, \x0
-	eor		\t3, \t3, \s1
-	eor		\t2, \t2, \s1
-	and		\s1, \t1, \s0
-	orr		\t1, \t1, \s0
-	eor		\t3, \t3, \s3
-	eor		\t0, \t0, \s1
-	eor		\t2, \t2, \s2
-	eor		\t1, \t1, \s3
-	eor		\t0, \t0, \s2
-	and		\s0, \x7, \x3
-	eor		\t1, \t1, \s2
-	and		\s1, \x6, \x2
-	and		\s2, \x5, \x1
-	orr		\s3, \x4, \x0
-	eor		\t3, \t3, \s0
-	eor		\t1, \t1, \s2
-	eor		\s0, \t0, \s3
-	eor		\t2, \t2, \s1
-	and		\s2, \t3, \t1
-	eor		\s1, \t2, \s2
-	eor		\s3, \s0, \s2
-	bsl		\s1, \t1, \s0
-	not		\t0, \s0
-	bsl		\s0, \s1, \s3
-	bsl		\t0, \s1, \s3
-	bsl		\s3, \t3, \t2
-	eor		\t3, \t3, \t2
-	and		\s2, \s0, \s3
-	eor		\t1, \t1, \t0
-	eor		\s2, \s2, \t3
-	mul_gf16_2	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
-			\s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
-	.endm
-
-	.macro		sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
-			      t0, t1, t2, t3, s0, s1, s2, s3
-	in_bs_ch	\b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
-			\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
-	inv_gf256	\b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
-			\b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
-			\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
-			\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
-	out_bs_ch	\b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
-			\b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
-	.endm
-
-	.macro		inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
-				  t0, t1, t2, t3, s0, s1, s2, s3
-	inv_in_bs_ch	\b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
-			\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
-	inv_gf256	\b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
-			\b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
-			\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
-			\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
-	inv_out_bs_ch	\b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
-			\b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
-	.endm
-
-	.macro		enc_next_rk
-	ldp		q16, q17, [bskey], #128
-	ldp		q18, q19, [bskey, #-96]
-	ldp		q20, q21, [bskey, #-64]
-	ldp		q22, q23, [bskey, #-32]
-	.endm
-
-	.macro		dec_next_rk
-	ldp		q16, q17, [bskey, #-128]!
-	ldp		q18, q19, [bskey, #32]
-	ldp		q20, q21, [bskey, #64]
-	ldp		q22, q23, [bskey, #96]
-	.endm
-
-	.macro		add_round_key, x0, x1, x2, x3, x4, x5, x6, x7
-	eor		\x0\().16b, \x0\().16b, v16.16b
-	eor		\x1\().16b, \x1\().16b, v17.16b
-	eor		\x2\().16b, \x2\().16b, v18.16b
-	eor		\x3\().16b, \x3\().16b, v19.16b
-	eor		\x4\().16b, \x4\().16b, v20.16b
-	eor		\x5\().16b, \x5\().16b, v21.16b
-	eor		\x6\().16b, \x6\().16b, v22.16b
-	eor		\x7\().16b, \x7\().16b, v23.16b
-	.endm
-
-	.macro		shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask
-	tbl		\x0\().16b, {\x0\().16b}, \mask\().16b
-	tbl		\x1\().16b, {\x1\().16b}, \mask\().16b
-	tbl		\x2\().16b, {\x2\().16b}, \mask\().16b
-	tbl		\x3\().16b, {\x3\().16b}, \mask\().16b
-	tbl		\x4\().16b, {\x4\().16b}, \mask\().16b
-	tbl		\x5\().16b, {\x5\().16b}, \mask\().16b
-	tbl		\x6\().16b, {\x6\().16b}, \mask\().16b
-	tbl		\x7\().16b, {\x7\().16b}, \mask\().16b
-	.endm
-
-	.macro		mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
-				  t0, t1, t2, t3, t4, t5, t6, t7, inv
-	ext		\t0\().16b, \x0\().16b, \x0\().16b, #12
-	ext		\t1\().16b, \x1\().16b, \x1\().16b, #12
-	eor		\x0\().16b, \x0\().16b, \t0\().16b
-	ext		\t2\().16b, \x2\().16b, \x2\().16b, #12
-	eor		\x1\().16b, \x1\().16b, \t1\().16b
-	ext		\t3\().16b, \x3\().16b, \x3\().16b, #12
-	eor		\x2\().16b, \x2\().16b, \t2\().16b
-	ext		\t4\().16b, \x4\().16b, \x4\().16b, #12
-	eor		\x3\().16b, \x3\().16b, \t3\().16b
-	ext		\t5\().16b, \x5\().16b, \x5\().16b, #12
-	eor		\x4\().16b, \x4\().16b, \t4\().16b
-	ext		\t6\().16b, \x6\().16b, \x6\().16b, #12
-	eor		\x5\().16b, \x5\().16b, \t5\().16b
-	ext		\t7\().16b, \x7\().16b, \x7\().16b, #12
-	eor		\x6\().16b, \x6\().16b, \t6\().16b
-	eor		\t1\().16b, \t1\().16b, \x0\().16b
-	eor		\x7\().16b, \x7\().16b, \t7\().16b
-	ext		\x0\().16b, \x0\().16b, \x0\().16b, #8
-	eor		\t2\().16b, \t2\().16b, \x1\().16b
-	eor		\t0\().16b, \t0\().16b, \x7\().16b
-	eor		\t1\().16b, \t1\().16b, \x7\().16b
-	ext		\x1\().16b, \x1\().16b, \x1\().16b, #8
-	eor		\t5\().16b, \t5\().16b, \x4\().16b
-	eor		\x0\().16b, \x0\().16b, \t0\().16b
-	eor		\t6\().16b, \t6\().16b, \x5\().16b
-	eor		\x1\().16b, \x1\().16b, \t1\().16b
-	ext		\t0\().16b, \x4\().16b, \x4\().16b, #8
-	eor		\t4\().16b, \t4\().16b, \x3\().16b
-	ext		\t1\().16b, \x5\().16b, \x5\().16b, #8
-	eor		\t7\().16b, \t7\().16b, \x6\().16b
-	ext		\x4\().16b, \x3\().16b, \x3\().16b, #8
-	eor		\t3\().16b, \t3\().16b, \x2\().16b
-	ext		\x5\().16b, \x7\().16b, \x7\().16b, #8
-	eor		\t4\().16b, \t4\().16b, \x7\().16b
-	ext		\x3\().16b, \x6\().16b, \x6\().16b, #8
-	eor		\t3\().16b, \t3\().16b, \x7\().16b
-	ext		\x6\().16b, \x2\().16b, \x2\().16b, #8
-	eor		\x7\().16b, \t1\().16b, \t5\().16b
-	.ifb		\inv
-	eor		\x2\().16b, \t0\().16b, \t4\().16b
-	eor		\x4\().16b, \x4\().16b, \t3\().16b
-	eor		\x5\().16b, \x5\().16b, \t7\().16b
-	eor		\x3\().16b, \x3\().16b, \t6\().16b
-	eor		\x6\().16b, \x6\().16b, \t2\().16b
-	.else
-	eor		\t3\().16b, \t3\().16b, \x4\().16b
-	eor		\x5\().16b, \x5\().16b, \t7\().16b
-	eor		\x2\().16b, \x3\().16b, \t6\().16b
-	eor		\x3\().16b, \t0\().16b, \t4\().16b
-	eor		\x4\().16b, \x6\().16b, \t2\().16b
-	mov		\x6\().16b, \t3\().16b
-	.endif
-	.endm
-
-	.macro		inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
-				      t0, t1, t2, t3, t4, t5, t6, t7
-	ext		\t0\().16b, \x0\().16b, \x0\().16b, #8
-	ext		\t6\().16b, \x6\().16b, \x6\().16b, #8
-	ext		\t7\().16b, \x7\().16b, \x7\().16b, #8
-	eor		\t0\().16b, \t0\().16b, \x0\().16b
-	ext		\t1\().16b, \x1\().16b, \x1\().16b, #8
-	eor		\t6\().16b, \t6\().16b, \x6\().16b
-	ext		\t2\().16b, \x2\().16b, \x2\().16b, #8
-	eor		\t7\().16b, \t7\().16b, \x7\().16b
-	ext		\t3\().16b, \x3\().16b, \x3\().16b, #8
-	eor		\t1\().16b, \t1\().16b, \x1\().16b
-	ext		\t4\().16b, \x4\().16b, \x4\().16b, #8
-	eor		\t2\().16b, \t2\().16b, \x2\().16b
-	ext		\t5\().16b, \x5\().16b, \x5\().16b, #8
-	eor		\t3\().16b, \t3\().16b, \x3\().16b
-	eor		\t4\().16b, \t4\().16b, \x4\().16b
-	eor		\t5\().16b, \t5\().16b, \x5\().16b
-	eor		\x0\().16b, \x0\().16b, \t6\().16b
-	eor		\x1\().16b, \x1\().16b, \t6\().16b
-	eor		\x2\().16b, \x2\().16b, \t0\().16b
-	eor		\x4\().16b, \x4\().16b, \t2\().16b
-	eor		\x3\().16b, \x3\().16b, \t1\().16b
-	eor		\x1\().16b, \x1\().16b, \t7\().16b
-	eor		\x2\().16b, \x2\().16b, \t7\().16b
-	eor		\x4\().16b, \x4\().16b, \t6\().16b
-	eor		\x5\().16b, \x5\().16b, \t3\().16b
-	eor		\x3\().16b, \x3\().16b, \t6\().16b
-	eor		\x6\().16b, \x6\().16b, \t4\().16b
-	eor		\x4\().16b, \x4\().16b, \t7\().16b
-	eor		\x5\().16b, \x5\().16b, \t7\().16b
-	eor		\x7\().16b, \x7\().16b, \t5\().16b
-	mix_cols	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
-			\t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
-	.endm
-
-	.macro		swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
-	ushr		\t0\().2d, \b0\().2d, #\n
-	ushr		\t1\().2d, \b1\().2d, #\n
-	eor		\t0\().16b, \t0\().16b, \a0\().16b
-	eor		\t1\().16b, \t1\().16b, \a1\().16b
-	and		\t0\().16b, \t0\().16b, \mask\().16b
-	and		\t1\().16b, \t1\().16b, \mask\().16b
-	eor		\a0\().16b, \a0\().16b, \t0\().16b
-	shl		\t0\().2d, \t0\().2d, #\n
-	eor		\a1\().16b, \a1\().16b, \t1\().16b
-	shl		\t1\().2d, \t1\().2d, #\n
-	eor		\b0\().16b, \b0\().16b, \t0\().16b
-	eor		\b1\().16b, \b1\().16b, \t1\().16b
-	.endm
-
-	.macro		bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
-	movi		\t0\().16b, #0x55
-	movi		\t1\().16b, #0x33
-	swapmove_2x	\x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
-	swapmove_2x	\x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
-	movi		\t0\().16b, #0x0f
-	swapmove_2x	\x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
-	swapmove_2x	\x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
-	swapmove_2x	\x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
-	swapmove_2x	\x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
-	.endm
-
-
-	.align		6
-M0:	.octa		0x0004080c0105090d02060a0e03070b0f
-
-M0SR:	.octa		0x0004080c05090d010a0e02060f03070b
-SR:	.octa		0x0f0e0d0c0a09080b0504070600030201
-SRM0:	.octa		0x01060b0c0207080d0304090e00050a0f
-
-M0ISR:	.octa		0x0004080c0d0105090a0e0206070b0f03
-ISR:	.octa		0x0f0e0d0c080b0a090504070602010003
-ISRM0:	.octa		0x0306090c00070a0d01040b0e0205080f
-
-	/*
-	 * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
-	 */
-ENTRY(aesbs_convert_key)
-	ld1		{v7.4s}, [x1], #16		// load round 0 key
-	ld1		{v17.4s}, [x1], #16		// load round 1 key
-
-	movi		v8.16b,  #0x01			// bit masks
-	movi		v9.16b,  #0x02
-	movi		v10.16b, #0x04
-	movi		v11.16b, #0x08
-	movi		v12.16b, #0x10
-	movi		v13.16b, #0x20
-	movi		v14.16b, #0x40
-	movi		v15.16b, #0x80
-	ldr		q16, M0
-
-	sub		x2, x2, #1
-	str		q7, [x0], #16		// save round 0 key
-
-.Lkey_loop:
-	tbl		v7.16b ,{v17.16b}, v16.16b
-	ld1		{v17.4s}, [x1], #16		// load next round key
-
-	cmtst		v0.16b, v7.16b, v8.16b
-	cmtst		v1.16b, v7.16b, v9.16b
-	cmtst		v2.16b, v7.16b, v10.16b
-	cmtst		v3.16b, v7.16b, v11.16b
-	cmtst		v4.16b, v7.16b, v12.16b
-	cmtst		v5.16b, v7.16b, v13.16b
-	cmtst		v6.16b, v7.16b, v14.16b
-	cmtst		v7.16b, v7.16b, v15.16b
-	not		v0.16b, v0.16b
-	not		v1.16b, v1.16b
-	not		v5.16b, v5.16b
-	not		v6.16b, v6.16b
-
-	subs		x2, x2, #1
-	stp		q0, q1, [x0], #128
-	stp		q2, q3, [x0, #-96]
-	stp		q4, q5, [x0, #-64]
-	stp		q6, q7, [x0, #-32]
-	b.ne		.Lkey_loop
-
-	movi		v7.16b, #0x63			// compose .L63
-	eor		v17.16b, v17.16b, v7.16b
-	str		q17, [x0]
-	ret
-ENDPROC(aesbs_convert_key)
-
-	.align		4
-aesbs_encrypt8:
-	ldr		q9, [bskey], #16		// round 0 key
-	ldr		q8, M0SR
-	ldr		q24, SR
-
-	eor		v10.16b, v0.16b, v9.16b		// xor with round0 key
-	eor		v11.16b, v1.16b, v9.16b
-	tbl		v0.16b, {v10.16b}, v8.16b
-	eor		v12.16b, v2.16b, v9.16b
-	tbl		v1.16b, {v11.16b}, v8.16b
-	eor		v13.16b, v3.16b, v9.16b
-	tbl		v2.16b, {v12.16b}, v8.16b
-	eor		v14.16b, v4.16b, v9.16b
-	tbl		v3.16b, {v13.16b}, v8.16b
-	eor		v15.16b, v5.16b, v9.16b
-	tbl		v4.16b, {v14.16b}, v8.16b
-	eor		v10.16b, v6.16b, v9.16b
-	tbl		v5.16b, {v15.16b}, v8.16b
-	eor		v11.16b, v7.16b, v9.16b
-	tbl		v6.16b, {v10.16b}, v8.16b
-	tbl		v7.16b, {v11.16b}, v8.16b
-
-	bitslice	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
-
-	sub		rounds, rounds, #1
-	b		.Lenc_sbox
-
-.Lenc_loop:
-	shift_rows	v0, v1, v2, v3, v4, v5, v6, v7, v24
-.Lenc_sbox:
-	sbox		v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
-								v13, v14, v15
-	subs		rounds, rounds, #1
-	b.cc		.Lenc_done
-
-	enc_next_rk
-
-	mix_cols	v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
-								v13, v14, v15
-
-	add_round_key	v0, v1, v2, v3, v4, v5, v6, v7
-
-	b.ne		.Lenc_loop
-	ldr		q24, SRM0
-	b		.Lenc_loop
-
-.Lenc_done:
-	ldr		q12, [bskey]			// last round key
-
-	bitslice	v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
-
-	eor		v0.16b, v0.16b, v12.16b
-	eor		v1.16b, v1.16b, v12.16b
-	eor		v4.16b, v4.16b, v12.16b
-	eor		v6.16b, v6.16b, v12.16b
-	eor		v3.16b, v3.16b, v12.16b
-	eor		v7.16b, v7.16b, v12.16b
-	eor		v2.16b, v2.16b, v12.16b
-	eor		v5.16b, v5.16b, v12.16b
-	ret
-ENDPROC(aesbs_encrypt8)
-
-	.align		4
-aesbs_decrypt8:
-	lsl		x9, rounds, #7
-	add		bskey, bskey, x9
-
-	ldr		q9, [bskey, #-112]!		// round 0 key
-	ldr		q8, M0ISR
-	ldr		q24, ISR
-
-	eor		v10.16b, v0.16b, v9.16b		// xor with round0 key
-	eor		v11.16b, v1.16b, v9.16b
-	tbl		v0.16b, {v10.16b}, v8.16b
-	eor		v12.16b, v2.16b, v9.16b
-	tbl		v1.16b, {v11.16b}, v8.16b
-	eor		v13.16b, v3.16b, v9.16b
-	tbl		v2.16b, {v12.16b}, v8.16b
-	eor		v14.16b, v4.16b, v9.16b
-	tbl		v3.16b, {v13.16b}, v8.16b
-	eor		v15.16b, v5.16b, v9.16b
-	tbl		v4.16b, {v14.16b}, v8.16b
-	eor		v10.16b, v6.16b, v9.16b
-	tbl		v5.16b, {v15.16b}, v8.16b
-	eor		v11.16b, v7.16b, v9.16b
-	tbl		v6.16b, {v10.16b}, v8.16b
-	tbl		v7.16b, {v11.16b}, v8.16b
-
-	bitslice	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
-
-	sub		rounds, rounds, #1
-	b		.Ldec_sbox
-
-.Ldec_loop:
-	shift_rows	v0, v1, v2, v3, v4, v5, v6, v7, v24
-.Ldec_sbox:
-	inv_sbox	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
-								v13, v14, v15
-	subs		rounds, rounds, #1
-	b.cc		.Ldec_done
-
-	dec_next_rk
-
-	add_round_key	v0, v1, v6, v4, v2, v7, v3, v5
-
-	inv_mix_cols	v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \
-								v13, v14, v15
-
-	b.ne		.Ldec_loop
-	ldr		q24, ISRM0
-	b		.Ldec_loop
-.Ldec_done:
-	ldr		q12, [bskey, #-16]		// last round key
-
-	bitslice	v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11
-
-	eor		v0.16b, v0.16b, v12.16b
-	eor		v1.16b, v1.16b, v12.16b
-	eor		v6.16b, v6.16b, v12.16b
-	eor		v4.16b, v4.16b, v12.16b
-	eor		v2.16b, v2.16b, v12.16b
-	eor		v7.16b, v7.16b, v12.16b
-	eor		v3.16b, v3.16b, v12.16b
-	eor		v5.16b, v5.16b, v12.16b
-	ret
-ENDPROC(aesbs_decrypt8)
-
-	/*
-	 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		     int blocks)
-	 * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		     int blocks)
-	 */
-	.macro		__ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
-	frame_push	5
-
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-
-99:	mov		x5, #1
-	lsl		x5, x5, x23
-	subs		w23, w23, #8
-	csel		x23, x23, xzr, pl
-	csel		x5, x5, xzr, mi
-
-	ld1		{v0.16b}, [x20], #16
-	tbnz		x5, #1, 0f
-	ld1		{v1.16b}, [x20], #16
-	tbnz		x5, #2, 0f
-	ld1		{v2.16b}, [x20], #16
-	tbnz		x5, #3, 0f
-	ld1		{v3.16b}, [x20], #16
-	tbnz		x5, #4, 0f
-	ld1		{v4.16b}, [x20], #16
-	tbnz		x5, #5, 0f
-	ld1		{v5.16b}, [x20], #16
-	tbnz		x5, #6, 0f
-	ld1		{v6.16b}, [x20], #16
-	tbnz		x5, #7, 0f
-	ld1		{v7.16b}, [x20], #16
-
-0:	mov		bskey, x21
-	mov		rounds, x22
-	bl		\do8
-
-	st1		{\o0\().16b}, [x19], #16
-	tbnz		x5, #1, 1f
-	st1		{\o1\().16b}, [x19], #16
-	tbnz		x5, #2, 1f
-	st1		{\o2\().16b}, [x19], #16
-	tbnz		x5, #3, 1f
-	st1		{\o3\().16b}, [x19], #16
-	tbnz		x5, #4, 1f
-	st1		{\o4\().16b}, [x19], #16
-	tbnz		x5, #5, 1f
-	st1		{\o5\().16b}, [x19], #16
-	tbnz		x5, #6, 1f
-	st1		{\o6\().16b}, [x19], #16
-	tbnz		x5, #7, 1f
-	st1		{\o7\().16b}, [x19], #16
-
-	cbz		x23, 1f
-	cond_yield_neon
-	b		99b
-
-1:	frame_pop
-	ret
-	.endm
-
-	.align		4
-ENTRY(aesbs_ecb_encrypt)
-	__ecb_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
-ENDPROC(aesbs_ecb_encrypt)
-
-	.align		4
-ENTRY(aesbs_ecb_decrypt)
-	__ecb_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
-ENDPROC(aesbs_ecb_decrypt)
-
-	/*
-	 * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		     int blocks, u8 iv[])
-	 */
-	.align		4
-ENTRY(aesbs_cbc_decrypt)
-	frame_push	6
-
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-	mov		x24, x5
-
-99:	mov		x6, #1
-	lsl		x6, x6, x23
-	subs		w23, w23, #8
-	csel		x23, x23, xzr, pl
-	csel		x6, x6, xzr, mi
-
-	ld1		{v0.16b}, [x20], #16
-	mov		v25.16b, v0.16b
-	tbnz		x6, #1, 0f
-	ld1		{v1.16b}, [x20], #16
-	mov		v26.16b, v1.16b
-	tbnz		x6, #2, 0f
-	ld1		{v2.16b}, [x20], #16
-	mov		v27.16b, v2.16b
-	tbnz		x6, #3, 0f
-	ld1		{v3.16b}, [x20], #16
-	mov		v28.16b, v3.16b
-	tbnz		x6, #4, 0f
-	ld1		{v4.16b}, [x20], #16
-	mov		v29.16b, v4.16b
-	tbnz		x6, #5, 0f
-	ld1		{v5.16b}, [x20], #16
-	mov		v30.16b, v5.16b
-	tbnz		x6, #6, 0f
-	ld1		{v6.16b}, [x20], #16
-	mov		v31.16b, v6.16b
-	tbnz		x6, #7, 0f
-	ld1		{v7.16b}, [x20]
-
-0:	mov		bskey, x21
-	mov		rounds, x22
-	bl		aesbs_decrypt8
-
-	ld1		{v24.16b}, [x24]		// load IV
-
-	eor		v1.16b, v1.16b, v25.16b
-	eor		v6.16b, v6.16b, v26.16b
-	eor		v4.16b, v4.16b, v27.16b
-	eor		v2.16b, v2.16b, v28.16b
-	eor		v7.16b, v7.16b, v29.16b
-	eor		v0.16b, v0.16b, v24.16b
-	eor		v3.16b, v3.16b, v30.16b
-	eor		v5.16b, v5.16b, v31.16b
-
-	st1		{v0.16b}, [x19], #16
-	mov		v24.16b, v25.16b
-	tbnz		x6, #1, 1f
-	st1		{v1.16b}, [x19], #16
-	mov		v24.16b, v26.16b
-	tbnz		x6, #2, 1f
-	st1		{v6.16b}, [x19], #16
-	mov		v24.16b, v27.16b
-	tbnz		x6, #3, 1f
-	st1		{v4.16b}, [x19], #16
-	mov		v24.16b, v28.16b
-	tbnz		x6, #4, 1f
-	st1		{v2.16b}, [x19], #16
-	mov		v24.16b, v29.16b
-	tbnz		x6, #5, 1f
-	st1		{v7.16b}, [x19], #16
-	mov		v24.16b, v30.16b
-	tbnz		x6, #6, 1f
-	st1		{v3.16b}, [x19], #16
-	mov		v24.16b, v31.16b
-	tbnz		x6, #7, 1f
-	ld1		{v24.16b}, [x20], #16
-	st1		{v5.16b}, [x19], #16
-1:	st1		{v24.16b}, [x24]		// store IV
-
-	cbz		x23, 2f
-	cond_yield_neon
-	b		99b
-
-2:	frame_pop
-	ret
-ENDPROC(aesbs_cbc_decrypt)
-
-	.macro		next_tweak, out, in, const, tmp
-	sshr		\tmp\().2d,  \in\().2d,   #63
-	and		\tmp\().16b, \tmp\().16b, \const\().16b
-	add		\out\().2d,  \in\().2d,   \in\().2d
-	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
-	eor		\out\().16b, \out\().16b, \tmp\().16b
-	.endm
-
-	/*
-	 * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		     int blocks, u8 iv[])
-	 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		     int blocks, u8 iv[])
-	 */
-__xts_crypt8:
-	mov		x6, #1
-	lsl		x6, x6, x23
-	subs		w23, w23, #8
-	csel		x23, x23, xzr, pl
-	csel		x6, x6, xzr, mi
-
-	ld1		{v0.16b}, [x20], #16
-	next_tweak	v26, v25, v30, v31
-	eor		v0.16b, v0.16b, v25.16b
-	tbnz		x6, #1, 0f
-
-	ld1		{v1.16b}, [x20], #16
-	next_tweak	v27, v26, v30, v31
-	eor		v1.16b, v1.16b, v26.16b
-	tbnz		x6, #2, 0f
-
-	ld1		{v2.16b}, [x20], #16
-	next_tweak	v28, v27, v30, v31
-	eor		v2.16b, v2.16b, v27.16b
-	tbnz		x6, #3, 0f
-
-	ld1		{v3.16b}, [x20], #16
-	next_tweak	v29, v28, v30, v31
-	eor		v3.16b, v3.16b, v28.16b
-	tbnz		x6, #4, 0f
-
-	ld1		{v4.16b}, [x20], #16
-	str		q29, [sp, #.Lframe_local_offset]
-	eor		v4.16b, v4.16b, v29.16b
-	next_tweak	v29, v29, v30, v31
-	tbnz		x6, #5, 0f
-
-	ld1		{v5.16b}, [x20], #16
-	str		q29, [sp, #.Lframe_local_offset + 16]
-	eor		v5.16b, v5.16b, v29.16b
-	next_tweak	v29, v29, v30, v31
-	tbnz		x6, #6, 0f
-
-	ld1		{v6.16b}, [x20], #16
-	str		q29, [sp, #.Lframe_local_offset + 32]
-	eor		v6.16b, v6.16b, v29.16b
-	next_tweak	v29, v29, v30, v31
-	tbnz		x6, #7, 0f
-
-	ld1		{v7.16b}, [x20], #16
-	str		q29, [sp, #.Lframe_local_offset + 48]
-	eor		v7.16b, v7.16b, v29.16b
-	next_tweak	v29, v29, v30, v31
-
-0:	mov		bskey, x21
-	mov		rounds, x22
-	br		x7
-ENDPROC(__xts_crypt8)
-
-	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
-	frame_push	6, 64
-
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-	mov		x24, x5
-
-0:	movi		v30.2s, #0x1
-	movi		v25.2s, #0x87
-	uzp1		v30.4s, v30.4s, v25.4s
-	ld1		{v25.16b}, [x24]
-
-99:	adr		x7, \do8
-	bl		__xts_crypt8
-
-	ldp		q16, q17, [sp, #.Lframe_local_offset]
-	ldp		q18, q19, [sp, #.Lframe_local_offset + 32]
-
-	eor		\o0\().16b, \o0\().16b, v25.16b
-	eor		\o1\().16b, \o1\().16b, v26.16b
-	eor		\o2\().16b, \o2\().16b, v27.16b
-	eor		\o3\().16b, \o3\().16b, v28.16b
-
-	st1		{\o0\().16b}, [x19], #16
-	mov		v25.16b, v26.16b
-	tbnz		x6, #1, 1f
-	st1		{\o1\().16b}, [x19], #16
-	mov		v25.16b, v27.16b
-	tbnz		x6, #2, 1f
-	st1		{\o2\().16b}, [x19], #16
-	mov		v25.16b, v28.16b
-	tbnz		x6, #3, 1f
-	st1		{\o3\().16b}, [x19], #16
-	mov		v25.16b, v29.16b
-	tbnz		x6, #4, 1f
-
-	eor		\o4\().16b, \o4\().16b, v16.16b
-	eor		\o5\().16b, \o5\().16b, v17.16b
-	eor		\o6\().16b, \o6\().16b, v18.16b
-	eor		\o7\().16b, \o7\().16b, v19.16b
-
-	st1		{\o4\().16b}, [x19], #16
-	tbnz		x6, #5, 1f
-	st1		{\o5\().16b}, [x19], #16
-	tbnz		x6, #6, 1f
-	st1		{\o6\().16b}, [x19], #16
-	tbnz		x6, #7, 1f
-	st1		{\o7\().16b}, [x19], #16
-
-	cbz		x23, 1f
-	st1		{v25.16b}, [x24]
-
-	cond_yield_neon	0b
-	b		99b
-
-1:	st1		{v25.16b}, [x24]
-	frame_pop
-	ret
-	.endm
-
-ENTRY(aesbs_xts_encrypt)
-	__xts_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
-ENDPROC(aesbs_xts_encrypt)
-
-ENTRY(aesbs_xts_decrypt)
-	__xts_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
-ENDPROC(aesbs_xts_decrypt)
-
-	.macro		next_ctr, v
-	mov		\v\().d[1], x8
-	adds		x8, x8, #1
-	mov		\v\().d[0], x7
-	adc		x7, x7, xzr
-	rev64		\v\().16b, \v\().16b
-	.endm
-
-	/*
-	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
-	 *		     int rounds, int blocks, u8 iv[], u8 final[])
-	 */
-ENTRY(aesbs_ctr_encrypt)
-	frame_push	8
-
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-	mov		x22, x3
-	mov		x23, x4
-	mov		x24, x5
-	mov		x25, x6
-
-	cmp		x25, #0
-	cset		x26, ne
-	add		x23, x23, x26		// do one extra block if final
-
-98:	ldp		x7, x8, [x24]
-	ld1		{v0.16b}, [x24]
-CPU_LE(	rev		x7, x7		)
-CPU_LE(	rev		x8, x8		)
-	adds		x8, x8, #1
-	adc		x7, x7, xzr
-
-99:	mov		x9, #1
-	lsl		x9, x9, x23
-	subs		w23, w23, #8
-	csel		x23, x23, xzr, pl
-	csel		x9, x9, xzr, le
-
-	tbnz		x9, #1, 0f
-	next_ctr	v1
-	tbnz		x9, #2, 0f
-	next_ctr	v2
-	tbnz		x9, #3, 0f
-	next_ctr	v3
-	tbnz		x9, #4, 0f
-	next_ctr	v4
-	tbnz		x9, #5, 0f
-	next_ctr	v5
-	tbnz		x9, #6, 0f
-	next_ctr	v6
-	tbnz		x9, #7, 0f
-	next_ctr	v7
-
-0:	mov		bskey, x21
-	mov		rounds, x22
-	bl		aesbs_encrypt8
-
-	lsr		x9, x9, x26		// disregard the extra block
-	tbnz		x9, #0, 0f
-
-	ld1		{v8.16b}, [x20], #16
-	eor		v0.16b, v0.16b, v8.16b
-	st1		{v0.16b}, [x19], #16
-	tbnz		x9, #1, 1f
-
-	ld1		{v9.16b}, [x20], #16
-	eor		v1.16b, v1.16b, v9.16b
-	st1		{v1.16b}, [x19], #16
-	tbnz		x9, #2, 2f
-
-	ld1		{v10.16b}, [x20], #16
-	eor		v4.16b, v4.16b, v10.16b
-	st1		{v4.16b}, [x19], #16
-	tbnz		x9, #3, 3f
-
-	ld1		{v11.16b}, [x20], #16
-	eor		v6.16b, v6.16b, v11.16b
-	st1		{v6.16b}, [x19], #16
-	tbnz		x9, #4, 4f
-
-	ld1		{v12.16b}, [x20], #16
-	eor		v3.16b, v3.16b, v12.16b
-	st1		{v3.16b}, [x19], #16
-	tbnz		x9, #5, 5f
-
-	ld1		{v13.16b}, [x20], #16
-	eor		v7.16b, v7.16b, v13.16b
-	st1		{v7.16b}, [x19], #16
-	tbnz		x9, #6, 6f
-
-	ld1		{v14.16b}, [x20], #16
-	eor		v2.16b, v2.16b, v14.16b
-	st1		{v2.16b}, [x19], #16
-	tbnz		x9, #7, 7f
-
-	ld1		{v15.16b}, [x20], #16
-	eor		v5.16b, v5.16b, v15.16b
-	st1		{v5.16b}, [x19], #16
-
-8:	next_ctr	v0
-	st1		{v0.16b}, [x24]
-	cbz		x23, .Lctr_done
-
-	cond_yield_neon	98b
-	b		99b
-
-.Lctr_done:
-	frame_pop
-	ret
-
-	/*
-	 * If we are handling the tail of the input (x6 != NULL), return the
-	 * final keystream block back to the caller.
-	 */
-0:	cbz		x25, 8b
-	st1		{v0.16b}, [x25]
-	b		8b
-1:	cbz		x25, 8b
-	st1		{v1.16b}, [x25]
-	b		8b
-2:	cbz		x25, 8b
-	st1		{v4.16b}, [x25]
-	b		8b
-3:	cbz		x25, 8b
-	st1		{v6.16b}, [x25]
-	b		8b
-4:	cbz		x25, 8b
-	st1		{v3.16b}, [x25]
-	b		8b
-5:	cbz		x25, 8b
-	st1		{v7.16b}, [x25]
-	b		8b
-6:	cbz		x25, 8b
-	st1		{v2.16b}, [x25]
-	b		8b
-7:	cbz		x25, 8b
-	st1		{v5.16b}, [x25]
-	b		8b
-ENDPROC(aesbs_ctr_encrypt)
diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S
deleted file mode 100644
index 706c4e10e9e294c7c5de49dbbe7a784ec7ca1458..0000000000000000000000000000000000000000
--- a/arch/arm64/crypto/chacha-neon-core.S
+++ /dev/null
@@ -1,860 +0,0 @@
-/*
- * ChaCha/XChaCha NEON helper functions
- *
- * Copyright (C) 2016-2018 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Originally based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/cache.h>
-
-	.text
-	.align		6
-
-/*
- * chacha_permute - permute one block
- *
- * Permute one 64-byte block where the state matrix is stored in the four NEON
- * registers v0-v3.  It performs matrix operations on four words in parallel,
- * but requires shuffling to rearrange the words after each round.
- *
- * The round count is given in w3.
- *
- * Clobbers: w3, x10, v4, v12
- */
-chacha_permute:
-
-	adr_l		x10, ROT8
-	ld1		{v12.4s}, [x10]
-
-.Ldoubleround:
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	add		v0.4s, v0.4s, v1.4s
-	eor		v3.16b, v3.16b, v0.16b
-	rev32		v3.8h, v3.8h
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	add		v2.4s, v2.4s, v3.4s
-	eor		v4.16b, v1.16b, v2.16b
-	shl		v1.4s, v4.4s, #12
-	sri		v1.4s, v4.4s, #20
-
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	add		v0.4s, v0.4s, v1.4s
-	eor		v3.16b, v3.16b, v0.16b
-	tbl		v3.16b, {v3.16b}, v12.16b
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	add		v2.4s, v2.4s, v3.4s
-	eor		v4.16b, v1.16b, v2.16b
-	shl		v1.4s, v4.4s, #7
-	sri		v1.4s, v4.4s, #25
-
-	// x1 = shuffle32(x1, MASK(0, 3, 2, 1))
-	ext		v1.16b, v1.16b, v1.16b, #4
-	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	ext		v2.16b, v2.16b, v2.16b, #8
-	// x3 = shuffle32(x3, MASK(2, 1, 0, 3))
-	ext		v3.16b, v3.16b, v3.16b, #12
-
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	add		v0.4s, v0.4s, v1.4s
-	eor		v3.16b, v3.16b, v0.16b
-	rev32		v3.8h, v3.8h
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	add		v2.4s, v2.4s, v3.4s
-	eor		v4.16b, v1.16b, v2.16b
-	shl		v1.4s, v4.4s, #12
-	sri		v1.4s, v4.4s, #20
-
-	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	add		v0.4s, v0.4s, v1.4s
-	eor		v3.16b, v3.16b, v0.16b
-	tbl		v3.16b, {v3.16b}, v12.16b
-
-	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	add		v2.4s, v2.4s, v3.4s
-	eor		v4.16b, v1.16b, v2.16b
-	shl		v1.4s, v4.4s, #7
-	sri		v1.4s, v4.4s, #25
-
-	// x1 = shuffle32(x1, MASK(2, 1, 0, 3))
-	ext		v1.16b, v1.16b, v1.16b, #12
-	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	ext		v2.16b, v2.16b, v2.16b, #8
-	// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
-	ext		v3.16b, v3.16b, v3.16b, #4
-
-	subs		w3, w3, #2
-	b.ne		.Ldoubleround
-
-	ret
-ENDPROC(chacha_permute)
-
-ENTRY(chacha_block_xor_neon)
-	// x0: Input state matrix, s
-	// x1: 1 data block output, o
-	// x2: 1 data block input, i
-	// w3: nrounds
-
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
-
-	// x0..3 = s0..3
-	ld1		{v0.4s-v3.4s}, [x0]
-	ld1		{v8.4s-v11.4s}, [x0]
-
-	bl		chacha_permute
-
-	ld1		{v4.16b-v7.16b}, [x2]
-
-	// o0 = i0 ^ (x0 + s0)
-	add		v0.4s, v0.4s, v8.4s
-	eor		v0.16b, v0.16b, v4.16b
-
-	// o1 = i1 ^ (x1 + s1)
-	add		v1.4s, v1.4s, v9.4s
-	eor		v1.16b, v1.16b, v5.16b
-
-	// o2 = i2 ^ (x2 + s2)
-	add		v2.4s, v2.4s, v10.4s
-	eor		v2.16b, v2.16b, v6.16b
-
-	// o3 = i3 ^ (x3 + s3)
-	add		v3.4s, v3.4s, v11.4s
-	eor		v3.16b, v3.16b, v7.16b
-
-	st1		{v0.16b-v3.16b}, [x1]
-
-	ldp		x29, x30, [sp], #16
-	ret
-ENDPROC(chacha_block_xor_neon)
-
-ENTRY(hchacha_block_neon)
-	// x0: Input state matrix, s
-	// x1: output (8 32-bit words)
-	// w2: nrounds
-
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
-
-	ld1		{v0.4s-v3.4s}, [x0]
-
-	mov		w3, w2
-	bl		chacha_permute
-
-	st1		{v0.4s}, [x1], #16
-	st1		{v3.4s}, [x1]
-
-	ldp		x29, x30, [sp], #16
-	ret
-ENDPROC(hchacha_block_neon)
-
-	a0		.req	w12
-	a1		.req	w13
-	a2		.req	w14
-	a3		.req	w15
-	a4		.req	w16
-	a5		.req	w17
-	a6		.req	w19
-	a7		.req	w20
-	a8		.req	w21
-	a9		.req	w22
-	a10		.req	w23
-	a11		.req	w24
-	a12		.req	w25
-	a13		.req	w26
-	a14		.req	w27
-	a15		.req	w28
-
-	.align		6
-ENTRY(chacha_4block_xor_neon)
-	frame_push	10
-
-	// x0: Input state matrix, s
-	// x1: 4 data blocks output, o
-	// x2: 4 data blocks input, i
-	// w3: nrounds
-	// x4: byte count
-
-	adr_l		x10, .Lpermute
-	and		x5, x4, #63
-	add		x10, x10, x5
-	add		x11, x10, #64
-
-	//
-	// This function encrypts four consecutive ChaCha blocks by loading
-	// the state matrix in NEON registers four times. The algorithm performs
-	// each operation on the corresponding word of each state matrix, hence
-	// requires no word shuffling. For final XORing step we transpose the
-	// matrix by interleaving 32- and then 64-bit words, which allows us to
-	// do XOR in NEON registers.
-	//
-	// At the same time, a fifth block is encrypted in parallel using
-	// scalar registers
-	//
-	adr_l		x9, CTRINC		// ... and ROT8
-	ld1		{v30.4s-v31.4s}, [x9]
-
-	// x0..15[0-3] = s0..3[0..3]
-	add		x8, x0, #16
-	ld4r		{ v0.4s- v3.4s}, [x0]
-	ld4r		{ v4.4s- v7.4s}, [x8], #16
-	ld4r		{ v8.4s-v11.4s}, [x8], #16
-	ld4r		{v12.4s-v15.4s}, [x8]
-
-	mov		a0, v0.s[0]
-	mov		a1, v1.s[0]
-	mov		a2, v2.s[0]
-	mov		a3, v3.s[0]
-	mov		a4, v4.s[0]
-	mov		a5, v5.s[0]
-	mov		a6, v6.s[0]
-	mov		a7, v7.s[0]
-	mov		a8, v8.s[0]
-	mov		a9, v9.s[0]
-	mov		a10, v10.s[0]
-	mov		a11, v11.s[0]
-	mov		a12, v12.s[0]
-	mov		a13, v13.s[0]
-	mov		a14, v14.s[0]
-	mov		a15, v15.s[0]
-
-	// x12 += counter values 1-4
-	add		v12.4s, v12.4s, v30.4s
-
-.Ldoubleround4:
-	// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
-	// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
-	// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
-	// x3 += x7, x15 = rotl32(x15 ^ x3, 16)
-	add		v0.4s, v0.4s, v4.4s
-	  add		a0, a0, a4
-	add		v1.4s, v1.4s, v5.4s
-	  add		a1, a1, a5
-	add		v2.4s, v2.4s, v6.4s
-	  add		a2, a2, a6
-	add		v3.4s, v3.4s, v7.4s
-	  add		a3, a3, a7
-
-	eor		v12.16b, v12.16b, v0.16b
-	  eor		a12, a12, a0
-	eor		v13.16b, v13.16b, v1.16b
-	  eor		a13, a13, a1
-	eor		v14.16b, v14.16b, v2.16b
-	  eor		a14, a14, a2
-	eor		v15.16b, v15.16b, v3.16b
-	  eor		a15, a15, a3
-
-	rev32		v12.8h, v12.8h
-	  ror		a12, a12, #16
-	rev32		v13.8h, v13.8h
-	  ror		a13, a13, #16
-	rev32		v14.8h, v14.8h
-	  ror		a14, a14, #16
-	rev32		v15.8h, v15.8h
-	  ror		a15, a15, #16
-
-	// x8 += x12, x4 = rotl32(x4 ^ x8, 12)
-	// x9 += x13, x5 = rotl32(x5 ^ x9, 12)
-	// x10 += x14, x6 = rotl32(x6 ^ x10, 12)
-	// x11 += x15, x7 = rotl32(x7 ^ x11, 12)
-	add		v8.4s, v8.4s, v12.4s
-	  add		a8, a8, a12
-	add		v9.4s, v9.4s, v13.4s
-	  add		a9, a9, a13
-	add		v10.4s, v10.4s, v14.4s
-	  add		a10, a10, a14
-	add		v11.4s, v11.4s, v15.4s
-	  add		a11, a11, a15
-
-	eor		v16.16b, v4.16b, v8.16b
-	  eor		a4, a4, a8
-	eor		v17.16b, v5.16b, v9.16b
-	  eor		a5, a5, a9
-	eor		v18.16b, v6.16b, v10.16b
-	  eor		a6, a6, a10
-	eor		v19.16b, v7.16b, v11.16b
-	  eor		a7, a7, a11
-
-	shl		v4.4s, v16.4s, #12
-	shl		v5.4s, v17.4s, #12
-	shl		v6.4s, v18.4s, #12
-	shl		v7.4s, v19.4s, #12
-
-	sri		v4.4s, v16.4s, #20
-	  ror		a4, a4, #20
-	sri		v5.4s, v17.4s, #20
-	  ror		a5, a5, #20
-	sri		v6.4s, v18.4s, #20
-	  ror		a6, a6, #20
-	sri		v7.4s, v19.4s, #20
-	  ror		a7, a7, #20
-
-	// x0 += x4, x12 = rotl32(x12 ^ x0, 8)
-	// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
-	// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
-	// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
-	add		v0.4s, v0.4s, v4.4s
-	  add		a0, a0, a4
-	add		v1.4s, v1.4s, v5.4s
-	  add		a1, a1, a5
-	add		v2.4s, v2.4s, v6.4s
-	  add		a2, a2, a6
-	add		v3.4s, v3.4s, v7.4s
-	  add		a3, a3, a7
-
-	eor		v12.16b, v12.16b, v0.16b
-	  eor		a12, a12, a0
-	eor		v13.16b, v13.16b, v1.16b
-	  eor		a13, a13, a1
-	eor		v14.16b, v14.16b, v2.16b
-	  eor		a14, a14, a2
-	eor		v15.16b, v15.16b, v3.16b
-	  eor		a15, a15, a3
-
-	tbl		v12.16b, {v12.16b}, v31.16b
-	  ror		a12, a12, #24
-	tbl		v13.16b, {v13.16b}, v31.16b
-	  ror		a13, a13, #24
-	tbl		v14.16b, {v14.16b}, v31.16b
-	  ror		a14, a14, #24
-	tbl		v15.16b, {v15.16b}, v31.16b
-	  ror		a15, a15, #24
-
-	// x8 += x12, x4 = rotl32(x4 ^ x8, 7)
-	// x9 += x13, x5 = rotl32(x5 ^ x9, 7)
-	// x10 += x14, x6 = rotl32(x6 ^ x10, 7)
-	// x11 += x15, x7 = rotl32(x7 ^ x11, 7)
-	add		v8.4s, v8.4s, v12.4s
-	  add		a8, a8, a12
-	add		v9.4s, v9.4s, v13.4s
-	  add		a9, a9, a13
-	add		v10.4s, v10.4s, v14.4s
-	  add		a10, a10, a14
-	add		v11.4s, v11.4s, v15.4s
-	  add		a11, a11, a15
-
-	eor		v16.16b, v4.16b, v8.16b
-	  eor		a4, a4, a8
-	eor		v17.16b, v5.16b, v9.16b
-	  eor		a5, a5, a9
-	eor		v18.16b, v6.16b, v10.16b
-	  eor		a6, a6, a10
-	eor		v19.16b, v7.16b, v11.16b
-	  eor		a7, a7, a11
-
-	shl		v4.4s, v16.4s, #7
-	shl		v5.4s, v17.4s, #7
-	shl		v6.4s, v18.4s, #7
-	shl		v7.4s, v19.4s, #7
-
-	sri		v4.4s, v16.4s, #25
-	  ror		a4, a4, #25
-	sri		v5.4s, v17.4s, #25
-	  ror		a5, a5, #25
-	sri		v6.4s, v18.4s, #25
-	 ror		a6, a6, #25
-	sri		v7.4s, v19.4s, #25
-	  ror		a7, a7, #25
-
-	// x0 += x5, x15 = rotl32(x15 ^ x0, 16)
-	// x1 += x6, x12 = rotl32(x12 ^ x1, 16)
-	// x2 += x7, x13 = rotl32(x13 ^ x2, 16)
-	// x3 += x4, x14 = rotl32(x14 ^ x3, 16)
-	add		v0.4s, v0.4s, v5.4s
-	  add		a0, a0, a5
-	add		v1.4s, v1.4s, v6.4s
-	  add		a1, a1, a6
-	add		v2.4s, v2.4s, v7.4s
-	  add		a2, a2, a7
-	add		v3.4s, v3.4s, v4.4s
-	  add		a3, a3, a4
-
-	eor		v15.16b, v15.16b, v0.16b
-	  eor		a15, a15, a0
-	eor		v12.16b, v12.16b, v1.16b
-	  eor		a12, a12, a1
-	eor		v13.16b, v13.16b, v2.16b
-	  eor		a13, a13, a2
-	eor		v14.16b, v14.16b, v3.16b
-	  eor		a14, a14, a3
-
-	rev32		v15.8h, v15.8h
-	  ror		a15, a15, #16
-	rev32		v12.8h, v12.8h
-	  ror		a12, a12, #16
-	rev32		v13.8h, v13.8h
-	  ror		a13, a13, #16
-	rev32		v14.8h, v14.8h
-	  ror		a14, a14, #16
-
-	// x10 += x15, x5 = rotl32(x5 ^ x10, 12)
-	// x11 += x12, x6 = rotl32(x6 ^ x11, 12)
-	// x8 += x13, x7 = rotl32(x7 ^ x8, 12)
-	// x9 += x14, x4 = rotl32(x4 ^ x9, 12)
-	add		v10.4s, v10.4s, v15.4s
-	  add		a10, a10, a15
-	add		v11.4s, v11.4s, v12.4s
-	  add		a11, a11, a12
-	add		v8.4s, v8.4s, v13.4s
-	  add		a8, a8, a13
-	add		v9.4s, v9.4s, v14.4s
-	  add		a9, a9, a14
-
-	eor		v16.16b, v5.16b, v10.16b
-	  eor		a5, a5, a10
-	eor		v17.16b, v6.16b, v11.16b
-	  eor		a6, a6, a11
-	eor		v18.16b, v7.16b, v8.16b
-	  eor		a7, a7, a8
-	eor		v19.16b, v4.16b, v9.16b
-	  eor		a4, a4, a9
-
-	shl		v5.4s, v16.4s, #12
-	shl		v6.4s, v17.4s, #12
-	shl		v7.4s, v18.4s, #12
-	shl		v4.4s, v19.4s, #12
-
-	sri		v5.4s, v16.4s, #20
-	  ror		a5, a5, #20
-	sri		v6.4s, v17.4s, #20
-	  ror		a6, a6, #20
-	sri		v7.4s, v18.4s, #20
-	  ror		a7, a7, #20
-	sri		v4.4s, v19.4s, #20
-	  ror		a4, a4, #20
-
-	// x0 += x5, x15 = rotl32(x15 ^ x0, 8)
-	// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
-	// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
-	// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
-	add		v0.4s, v0.4s, v5.4s
-	  add		a0, a0, a5
-	add		v1.4s, v1.4s, v6.4s
-	  add		a1, a1, a6
-	add		v2.4s, v2.4s, v7.4s
-	  add		a2, a2, a7
-	add		v3.4s, v3.4s, v4.4s
-	  add		a3, a3, a4
-
-	eor		v15.16b, v15.16b, v0.16b
-	  eor		a15, a15, a0
-	eor		v12.16b, v12.16b, v1.16b
-	  eor		a12, a12, a1
-	eor		v13.16b, v13.16b, v2.16b
-	  eor		a13, a13, a2
-	eor		v14.16b, v14.16b, v3.16b
-	  eor		a14, a14, a3
-
-	tbl		v15.16b, {v15.16b}, v31.16b
-	  ror		a15, a15, #24
-	tbl		v12.16b, {v12.16b}, v31.16b
-	  ror		a12, a12, #24
-	tbl		v13.16b, {v13.16b}, v31.16b
-	  ror		a13, a13, #24
-	tbl		v14.16b, {v14.16b}, v31.16b
-	  ror		a14, a14, #24
-
-	// x10 += x15, x5 = rotl32(x5 ^ x10, 7)
-	// x11 += x12, x6 = rotl32(x6 ^ x11, 7)
-	// x8 += x13, x7 = rotl32(x7 ^ x8, 7)
-	// x9 += x14, x4 = rotl32(x4 ^ x9, 7)
-	add		v10.4s, v10.4s, v15.4s
-	  add		a10, a10, a15
-	add		v11.4s, v11.4s, v12.4s
-	  add		a11, a11, a12
-	add		v8.4s, v8.4s, v13.4s
-	  add		a8, a8, a13
-	add		v9.4s, v9.4s, v14.4s
-	  add		a9, a9, a14
-
-	eor		v16.16b, v5.16b, v10.16b
-	  eor		a5, a5, a10
-	eor		v17.16b, v6.16b, v11.16b
-	  eor		a6, a6, a11
-	eor		v18.16b, v7.16b, v8.16b
-	  eor		a7, a7, a8
-	eor		v19.16b, v4.16b, v9.16b
-	  eor		a4, a4, a9
-
-	shl		v5.4s, v16.4s, #7
-	shl		v6.4s, v17.4s, #7
-	shl		v7.4s, v18.4s, #7
-	shl		v4.4s, v19.4s, #7
-
-	sri		v5.4s, v16.4s, #25
-	  ror		a5, a5, #25
-	sri		v6.4s, v17.4s, #25
-	  ror		a6, a6, #25
-	sri		v7.4s, v18.4s, #25
-	  ror		a7, a7, #25
-	sri		v4.4s, v19.4s, #25
-	  ror		a4, a4, #25
-
-	subs		w3, w3, #2
-	b.ne		.Ldoubleround4
-
-	ld4r		{v16.4s-v19.4s}, [x0], #16
-	ld4r		{v20.4s-v23.4s}, [x0], #16
-
-	// x12 += counter values 0-3
-	add		v12.4s, v12.4s, v30.4s
-
-	// x0[0-3] += s0[0]
-	// x1[0-3] += s0[1]
-	// x2[0-3] += s0[2]
-	// x3[0-3] += s0[3]
-	add		v0.4s, v0.4s, v16.4s
-	  mov		w6, v16.s[0]
-	  mov		w7, v17.s[0]
-	add		v1.4s, v1.4s, v17.4s
-	  mov		w8, v18.s[0]
-	  mov		w9, v19.s[0]
-	add		v2.4s, v2.4s, v18.4s
-	  add		a0, a0, w6
-	  add		a1, a1, w7
-	add		v3.4s, v3.4s, v19.4s
-	  add		a2, a2, w8
-	  add		a3, a3, w9
-CPU_BE(	  rev		a0, a0		)
-CPU_BE(	  rev		a1, a1		)
-CPU_BE(	  rev		a2, a2		)
-CPU_BE(	  rev		a3, a3		)
-
-	ld4r		{v24.4s-v27.4s}, [x0], #16
-	ld4r		{v28.4s-v31.4s}, [x0]
-
-	// x4[0-3] += s1[0]
-	// x5[0-3] += s1[1]
-	// x6[0-3] += s1[2]
-	// x7[0-3] += s1[3]
-	add		v4.4s, v4.4s, v20.4s
-	  mov		w6, v20.s[0]
-	  mov		w7, v21.s[0]
-	add		v5.4s, v5.4s, v21.4s
-	  mov		w8, v22.s[0]
-	  mov		w9, v23.s[0]
-	add		v6.4s, v6.4s, v22.4s
-	  add		a4, a4, w6
-	  add		a5, a5, w7
-	add		v7.4s, v7.4s, v23.4s
-	  add		a6, a6, w8
-	  add		a7, a7, w9
-CPU_BE(	  rev		a4, a4		)
-CPU_BE(	  rev		a5, a5		)
-CPU_BE(	  rev		a6, a6		)
-CPU_BE(	  rev		a7, a7		)
-
-	// x8[0-3] += s2[0]
-	// x9[0-3] += s2[1]
-	// x10[0-3] += s2[2]
-	// x11[0-3] += s2[3]
-	add		v8.4s, v8.4s, v24.4s
-	  mov		w6, v24.s[0]
-	  mov		w7, v25.s[0]
-	add		v9.4s, v9.4s, v25.4s
-	  mov		w8, v26.s[0]
-	  mov		w9, v27.s[0]
-	add		v10.4s, v10.4s, v26.4s
-	  add		a8, a8, w6
-	  add		a9, a9, w7
-	add		v11.4s, v11.4s, v27.4s
-	  add		a10, a10, w8
-	  add		a11, a11, w9
-CPU_BE(	  rev		a8, a8		)
-CPU_BE(	  rev		a9, a9		)
-CPU_BE(	  rev		a10, a10	)
-CPU_BE(	  rev		a11, a11	)
-
-	// x12[0-3] += s3[0]
-	// x13[0-3] += s3[1]
-	// x14[0-3] += s3[2]
-	// x15[0-3] += s3[3]
-	add		v12.4s, v12.4s, v28.4s
-	  mov		w6, v28.s[0]
-	  mov		w7, v29.s[0]
-	add		v13.4s, v13.4s, v29.4s
-	  mov		w8, v30.s[0]
-	  mov		w9, v31.s[0]
-	add		v14.4s, v14.4s, v30.4s
-	  add		a12, a12, w6
-	  add		a13, a13, w7
-	add		v15.4s, v15.4s, v31.4s
-	  add		a14, a14, w8
-	  add		a15, a15, w9
-CPU_BE(	  rev		a12, a12	)
-CPU_BE(	  rev		a13, a13	)
-CPU_BE(	  rev		a14, a14	)
-CPU_BE(	  rev		a15, a15	)
-
-	// interleave 32-bit words in state n, n+1
-	  ldp		w6, w7, [x2], #64
-	zip1		v16.4s, v0.4s, v1.4s
-	  ldp		w8, w9, [x2, #-56]
-	  eor		a0, a0, w6
-	zip2		v17.4s, v0.4s, v1.4s
-	  eor		a1, a1, w7
-	zip1		v18.4s, v2.4s, v3.4s
-	  eor		a2, a2, w8
-	zip2		v19.4s, v2.4s, v3.4s
-	  eor		a3, a3, w9
-	  ldp		w6, w7, [x2, #-48]
-	zip1		v20.4s, v4.4s, v5.4s
-	  ldp		w8, w9, [x2, #-40]
-	  eor		a4, a4, w6
-	zip2		v21.4s, v4.4s, v5.4s
-	  eor		a5, a5, w7
-	zip1		v22.4s, v6.4s, v7.4s
-	  eor		a6, a6, w8
-	zip2		v23.4s, v6.4s, v7.4s
-	  eor		a7, a7, w9
-	  ldp		w6, w7, [x2, #-32]
-	zip1		v24.4s, v8.4s, v9.4s
-	  ldp		w8, w9, [x2, #-24]
-	  eor		a8, a8, w6
-	zip2		v25.4s, v8.4s, v9.4s
-	  eor		a9, a9, w7
-	zip1		v26.4s, v10.4s, v11.4s
-	  eor		a10, a10, w8
-	zip2		v27.4s, v10.4s, v11.4s
-	  eor		a11, a11, w9
-	  ldp		w6, w7, [x2, #-16]
-	zip1		v28.4s, v12.4s, v13.4s
-	  ldp		w8, w9, [x2, #-8]
-	  eor		a12, a12, w6
-	zip2		v29.4s, v12.4s, v13.4s
-	  eor		a13, a13, w7
-	zip1		v30.4s, v14.4s, v15.4s
-	  eor		a14, a14, w8
-	zip2		v31.4s, v14.4s, v15.4s
-	  eor		a15, a15, w9
-
-	mov		x3, #64
-	subs		x5, x4, #128
-	add		x6, x5, x2
-	csel		x3, x3, xzr, ge
-	csel		x2, x2, x6, ge
-
-	// interleave 64-bit words in state n, n+2
-	zip1		v0.2d, v16.2d, v18.2d
-	zip2		v4.2d, v16.2d, v18.2d
-	  stp		a0, a1, [x1], #64
-	zip1		v8.2d, v17.2d, v19.2d
-	zip2		v12.2d, v17.2d, v19.2d
-	  stp		a2, a3, [x1, #-56]
-	ld1		{v16.16b-v19.16b}, [x2], x3
-
-	subs		x6, x4, #192
-	ccmp		x3, xzr, #4, lt
-	add		x7, x6, x2
-	csel		x3, x3, xzr, eq
-	csel		x2, x2, x7, eq
-
-	zip1		v1.2d, v20.2d, v22.2d
-	zip2		v5.2d, v20.2d, v22.2d
-	  stp		a4, a5, [x1, #-48]
-	zip1		v9.2d, v21.2d, v23.2d
-	zip2		v13.2d, v21.2d, v23.2d
-	  stp		a6, a7, [x1, #-40]
-	ld1		{v20.16b-v23.16b}, [x2], x3
-
-	subs		x7, x4, #256
-	ccmp		x3, xzr, #4, lt
-	add		x8, x7, x2
-	csel		x3, x3, xzr, eq
-	csel		x2, x2, x8, eq
-
-	zip1		v2.2d, v24.2d, v26.2d
-	zip2		v6.2d, v24.2d, v26.2d
-	  stp		a8, a9, [x1, #-32]
-	zip1		v10.2d, v25.2d, v27.2d
-	zip2		v14.2d, v25.2d, v27.2d
-	  stp		a10, a11, [x1, #-24]
-	ld1		{v24.16b-v27.16b}, [x2], x3
-
-	subs		x8, x4, #320
-	ccmp		x3, xzr, #4, lt
-	add		x9, x8, x2
-	csel		x2, x2, x9, eq
-
-	zip1		v3.2d, v28.2d, v30.2d
-	zip2		v7.2d, v28.2d, v30.2d
-	  stp		a12, a13, [x1, #-16]
-	zip1		v11.2d, v29.2d, v31.2d
-	zip2		v15.2d, v29.2d, v31.2d
-	  stp		a14, a15, [x1, #-8]
-	ld1		{v28.16b-v31.16b}, [x2]
-
-	// xor with corresponding input, write to output
-	tbnz		x5, #63, 0f
-	eor		v16.16b, v16.16b, v0.16b
-	eor		v17.16b, v17.16b, v1.16b
-	eor		v18.16b, v18.16b, v2.16b
-	eor		v19.16b, v19.16b, v3.16b
-	st1		{v16.16b-v19.16b}, [x1], #64
-	cbz		x5, .Lout
-
-	tbnz		x6, #63, 1f
-	eor		v20.16b, v20.16b, v4.16b
-	eor		v21.16b, v21.16b, v5.16b
-	eor		v22.16b, v22.16b, v6.16b
-	eor		v23.16b, v23.16b, v7.16b
-	st1		{v20.16b-v23.16b}, [x1], #64
-	cbz		x6, .Lout
-
-	tbnz		x7, #63, 2f
-	eor		v24.16b, v24.16b, v8.16b
-	eor		v25.16b, v25.16b, v9.16b
-	eor		v26.16b, v26.16b, v10.16b
-	eor		v27.16b, v27.16b, v11.16b
-	st1		{v24.16b-v27.16b}, [x1], #64
-	cbz		x7, .Lout
-
-	tbnz		x8, #63, 3f
-	eor		v28.16b, v28.16b, v12.16b
-	eor		v29.16b, v29.16b, v13.16b
-	eor		v30.16b, v30.16b, v14.16b
-	eor		v31.16b, v31.16b, v15.16b
-	st1		{v28.16b-v31.16b}, [x1]
-
-.Lout:	frame_pop
-	ret
-
-	// fewer than 128 bytes of in/output
-0:	ld1		{v8.16b}, [x10]
-	ld1		{v9.16b}, [x11]
-	movi		v10.16b, #16
-	sub		x2, x1, #64
-	add		x1, x1, x5
-	ld1		{v16.16b-v19.16b}, [x2]
-	tbl		v4.16b, {v0.16b-v3.16b}, v8.16b
-	tbx		v20.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v5.16b, {v0.16b-v3.16b}, v8.16b
-	tbx		v21.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v6.16b, {v0.16b-v3.16b}, v8.16b
-	tbx		v22.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v7.16b, {v0.16b-v3.16b}, v8.16b
-	tbx		v23.16b, {v16.16b-v19.16b}, v9.16b
-
-	eor		v20.16b, v20.16b, v4.16b
-	eor		v21.16b, v21.16b, v5.16b
-	eor		v22.16b, v22.16b, v6.16b
-	eor		v23.16b, v23.16b, v7.16b
-	st1		{v20.16b-v23.16b}, [x1]
-	b		.Lout
-
-	// fewer than 192 bytes of in/output
-1:	ld1		{v8.16b}, [x10]
-	ld1		{v9.16b}, [x11]
-	movi		v10.16b, #16
-	add		x1, x1, x6
-	tbl		v0.16b, {v4.16b-v7.16b}, v8.16b
-	tbx		v20.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v1.16b, {v4.16b-v7.16b}, v8.16b
-	tbx		v21.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v2.16b, {v4.16b-v7.16b}, v8.16b
-	tbx		v22.16b, {v16.16b-v19.16b}, v9.16b
-	add		v8.16b, v8.16b, v10.16b
-	add		v9.16b, v9.16b, v10.16b
-	tbl		v3.16b, {v4.16b-v7.16b}, v8.16b
-	tbx		v23.16b, {v16.16b-v19.16b}, v9.16b
-
-	eor		v20.16b, v20.16b, v0.16b
-	eor		v21.16b, v21.16b, v1.16b
-	eor		v22.16b, v22.16b, v2.16b
-	eor		v23.16b, v23.16b, v3.16b
-	st1		{v20.16b-v23.16b}, [x1]
-	b		.Lout
-
-	// fewer than 256 bytes of in/output
-2:	ld1		{v4.16b}, [x10]
-	ld1		{v5.16b}, [x11]
-	movi		v6.16b, #16
-	add		x1, x1, x7
-	tbl		v0.16b, {v8.16b-v11.16b}, v4.16b
-	tbx		v24.16b, {v20.16b-v23.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v1.16b, {v8.16b-v11.16b}, v4.16b
-	tbx		v25.16b, {v20.16b-v23.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v2.16b, {v8.16b-v11.16b}, v4.16b
-	tbx		v26.16b, {v20.16b-v23.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v3.16b, {v8.16b-v11.16b}, v4.16b
-	tbx		v27.16b, {v20.16b-v23.16b}, v5.16b
-
-	eor		v24.16b, v24.16b, v0.16b
-	eor		v25.16b, v25.16b, v1.16b
-	eor		v26.16b, v26.16b, v2.16b
-	eor		v27.16b, v27.16b, v3.16b
-	st1		{v24.16b-v27.16b}, [x1]
-	b		.Lout
-
-	// fewer than 320 bytes of in/output
-3:	ld1		{v4.16b}, [x10]
-	ld1		{v5.16b}, [x11]
-	movi		v6.16b, #16
-	add		x1, x1, x8
-	tbl		v0.16b, {v12.16b-v15.16b}, v4.16b
-	tbx		v28.16b, {v24.16b-v27.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v1.16b, {v12.16b-v15.16b}, v4.16b
-	tbx		v29.16b, {v24.16b-v27.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v2.16b, {v12.16b-v15.16b}, v4.16b
-	tbx		v30.16b, {v24.16b-v27.16b}, v5.16b
-	add		v4.16b, v4.16b, v6.16b
-	add		v5.16b, v5.16b, v6.16b
-	tbl		v3.16b, {v12.16b-v15.16b}, v4.16b
-	tbx		v31.16b, {v24.16b-v27.16b}, v5.16b
-
-	eor		v28.16b, v28.16b, v0.16b
-	eor		v29.16b, v29.16b, v1.16b
-	eor		v30.16b, v30.16b, v2.16b
-	eor		v31.16b, v31.16b, v3.16b
-	st1		{v28.16b-v31.16b}, [x1]
-	b		.Lout
-ENDPROC(chacha_4block_xor_neon)
-
-	.section	".rodata", "a", %progbits
-	.align		L1_CACHE_SHIFT
-.Lpermute:
-	.set		.Li, 0
-	.rept		192
-	.byte		(.Li - 64)
-	.set		.Li, .Li + 1
-	.endr
-
-CTRINC:	.word		1, 2, 3, 4
-ROT8:	.word		0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S
deleted file mode 100644
index e545b42e6a468aa296f8428aaabedc01156b9395..0000000000000000000000000000000000000000
--- a/arch/arm64/crypto/crct10dif-ce-core.S
+++ /dev/null
@@ -1,536 +0,0 @@
-//
-// Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
-//
-// Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
-// Copyright (C) 2019 Google LLC <ebiggers@google.com>
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License version 2 as
-// published by the Free Software Foundation.
-//
-
-// Derived from the x86 version:
-//
-// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
-//
-// Copyright (c) 2013, Intel Corporation
-//
-// Authors:
-//     Erdinc Ozturk <erdinc.ozturk@intel.com>
-//     Vinodh Gopal <vinodh.gopal@intel.com>
-//     James Guilford <james.guilford@intel.com>
-//     Tim Chen <tim.c.chen@linux.intel.com>
-//
-// This software is available to you under a choice of one of two
-// licenses.  You may choose to be licensed under the terms of the GNU
-// General Public License (GPL) Version 2, available from the file
-// COPYING in the main directory of this source tree, or the
-// OpenIB.org BSD license below:
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-//   notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above copyright
-//   notice, this list of conditions and the following disclaimer in the
-//   documentation and/or other materials provided with the
-//   distribution.
-//
-// * Neither the name of the Intel Corporation nor the names of its
-//   contributors may be used to endorse or promote products derived from
-//   this software without specific prior written permission.
-//
-//
-// THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-//       Reference paper titled "Fast CRC Computation for Generic
-//	Polynomials Using PCLMULQDQ Instruction"
-//       URL: http://www.intel.com/content/dam/www/public/us/en/documents
-//  /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
-//
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.text
-	.cpu		generic+crypto
-
-	init_crc	.req	w19
-	buf		.req	x20
-	len		.req	x21
-	fold_consts_ptr	.req	x22
-
-	fold_consts	.req	v10
-
-	ad		.req	v14
-
-	k00_16		.req	v15
-	k32_48		.req	v16
-
-	t3		.req	v17
-	t4		.req	v18
-	t5		.req	v19
-	t6		.req	v20
-	t7		.req	v21
-	t8		.req	v22
-	t9		.req	v23
-
-	perm1		.req	v24
-	perm2		.req	v25
-	perm3		.req	v26
-	perm4		.req	v27
-
-	bd1		.req	v28
-	bd2		.req	v29
-	bd3		.req	v30
-	bd4		.req	v31
-
-	.macro		__pmull_init_p64
-	.endm
-
-	.macro		__pmull_pre_p64, bd
-	.endm
-
-	.macro		__pmull_init_p8
-	// k00_16 := 0x0000000000000000_000000000000ffff
-	// k32_48 := 0x00000000ffffffff_0000ffffffffffff
-	movi		k32_48.2d, #0xffffffff
-	mov		k32_48.h[2], k32_48.h[0]
-	ushr		k00_16.2d, k32_48.2d, #32
-
-	// prepare the permutation vectors
-	mov_q		x5, 0x080f0e0d0c0b0a09
-	movi		perm4.8b, #8
-	dup		perm1.2d, x5
-	eor		perm1.16b, perm1.16b, perm4.16b
-	ushr		perm2.2d, perm1.2d, #8
-	ushr		perm3.2d, perm1.2d, #16
-	ushr		perm4.2d, perm1.2d, #24
-	sli		perm2.2d, perm1.2d, #56
-	sli		perm3.2d, perm1.2d, #48
-	sli		perm4.2d, perm1.2d, #40
-	.endm
-
-	.macro		__pmull_pre_p8, bd
-	tbl		bd1.16b, {\bd\().16b}, perm1.16b
-	tbl		bd2.16b, {\bd\().16b}, perm2.16b
-	tbl		bd3.16b, {\bd\().16b}, perm3.16b
-	tbl		bd4.16b, {\bd\().16b}, perm4.16b
-	.endm
-
-__pmull_p8_core:
-.L__pmull_p8_core:
-	ext		t4.8b, ad.8b, ad.8b, #1			// A1
-	ext		t5.8b, ad.8b, ad.8b, #2			// A2
-	ext		t6.8b, ad.8b, ad.8b, #3			// A3
-
-	pmull		t4.8h, t4.8b, fold_consts.8b		// F = A1*B
-	pmull		t8.8h, ad.8b, bd1.8b			// E = A*B1
-	pmull		t5.8h, t5.8b, fold_consts.8b		// H = A2*B
-	pmull		t7.8h, ad.8b, bd2.8b			// G = A*B2
-	pmull		t6.8h, t6.8b, fold_consts.8b		// J = A3*B
-	pmull		t9.8h, ad.8b, bd3.8b			// I = A*B3
-	pmull		t3.8h, ad.8b, bd4.8b			// K = A*B4
-	b		0f
-
-.L__pmull_p8_core2:
-	tbl		t4.16b, {ad.16b}, perm1.16b		// A1
-	tbl		t5.16b, {ad.16b}, perm2.16b		// A2
-	tbl		t6.16b, {ad.16b}, perm3.16b		// A3
-
-	pmull2		t4.8h, t4.16b, fold_consts.16b		// F = A1*B
-	pmull2		t8.8h, ad.16b, bd1.16b			// E = A*B1
-	pmull2		t5.8h, t5.16b, fold_consts.16b		// H = A2*B
-	pmull2		t7.8h, ad.16b, bd2.16b			// G = A*B2
-	pmull2		t6.8h, t6.16b, fold_consts.16b		// J = A3*B
-	pmull2		t9.8h, ad.16b, bd3.16b			// I = A*B3
-	pmull2		t3.8h, ad.16b, bd4.16b			// K = A*B4
-
-0:	eor		t4.16b, t4.16b, t8.16b			// L = E + F
-	eor		t5.16b, t5.16b, t7.16b			// M = G + H
-	eor		t6.16b, t6.16b, t9.16b			// N = I + J
-
-	uzp1		t8.2d, t4.2d, t5.2d
-	uzp2		t4.2d, t4.2d, t5.2d
-	uzp1		t7.2d, t6.2d, t3.2d
-	uzp2		t6.2d, t6.2d, t3.2d
-
-	// t4 = (L) (P0 + P1) << 8
-	// t5 = (M) (P2 + P3) << 16
-	eor		t8.16b, t8.16b, t4.16b
-	and		t4.16b, t4.16b, k32_48.16b
-
-	// t6 = (N) (P4 + P5) << 24
-	// t7 = (K) (P6 + P7) << 32
-	eor		t7.16b, t7.16b, t6.16b
-	and		t6.16b, t6.16b, k00_16.16b
-
-	eor		t8.16b, t8.16b, t4.16b
-	eor		t7.16b, t7.16b, t6.16b
-
-	zip2		t5.2d, t8.2d, t4.2d
-	zip1		t4.2d, t8.2d, t4.2d
-	zip2		t3.2d, t7.2d, t6.2d
-	zip1		t6.2d, t7.2d, t6.2d
-
-	ext		t4.16b, t4.16b, t4.16b, #15
-	ext		t5.16b, t5.16b, t5.16b, #14
-	ext		t6.16b, t6.16b, t6.16b, #13
-	ext		t3.16b, t3.16b, t3.16b, #12
-
-	eor		t4.16b, t4.16b, t5.16b
-	eor		t6.16b, t6.16b, t3.16b
-	ret
-ENDPROC(__pmull_p8_core)
-
-	.macro		__pmull_p8, rq, ad, bd, i
-	.ifnc		\bd, fold_consts
-	.err
-	.endif
-	mov		ad.16b, \ad\().16b
-	.ifb		\i
-	pmull		\rq\().8h, \ad\().8b, \bd\().8b		// D = A*B
-	.else
-	pmull2		\rq\().8h, \ad\().16b, \bd\().16b	// D = A*B
-	.endif
-
-	bl		.L__pmull_p8_core\i
-
-	eor		\rq\().16b, \rq\().16b, t4.16b
-	eor		\rq\().16b, \rq\().16b, t6.16b
-	.endm
-
-	// Fold reg1, reg2 into the next 32 data bytes, storing the result back
-	// into reg1, reg2.
-	.macro		fold_32_bytes, p, reg1, reg2
-	ldp		q11, q12, [buf], #0x20
-
-	__pmull_\p	v8, \reg1, fold_consts, 2
-	__pmull_\p	\reg1, \reg1, fold_consts
-
-CPU_LE(	rev64		v11.16b, v11.16b		)
-CPU_LE(	rev64		v12.16b, v12.16b		)
-
-	__pmull_\p	v9, \reg2, fold_consts, 2
-	__pmull_\p	\reg2, \reg2, fold_consts
-
-CPU_LE(	ext		v11.16b, v11.16b, v11.16b, #8	)
-CPU_LE(	ext		v12.16b, v12.16b, v12.16b, #8	)
-
-	eor		\reg1\().16b, \reg1\().16b, v8.16b
-	eor		\reg2\().16b, \reg2\().16b, v9.16b
-	eor		\reg1\().16b, \reg1\().16b, v11.16b
-	eor		\reg2\().16b, \reg2\().16b, v12.16b
-	.endm
-
-	// Fold src_reg into dst_reg, optionally loading the next fold constants
-	.macro		fold_16_bytes, p, src_reg, dst_reg, load_next_consts
-	__pmull_\p	v8, \src_reg, fold_consts
-	__pmull_\p	\src_reg, \src_reg, fold_consts, 2
-	.ifnb		\load_next_consts
-	ld1		{fold_consts.2d}, [fold_consts_ptr], #16
-	__pmull_pre_\p	fold_consts
-	.endif
-	eor		\dst_reg\().16b, \dst_reg\().16b, v8.16b
-	eor		\dst_reg\().16b, \dst_reg\().16b, \src_reg\().16b
-	.endm
-
-	.macro		__pmull_p64, rd, rn, rm, n
-	.ifb		\n
-	pmull		\rd\().1q, \rn\().1d, \rm\().1d
-	.else
-	pmull2		\rd\().1q, \rn\().2d, \rm\().2d
-	.endif
-	.endm
-
-	.macro		crc_t10dif_pmull, p
-	frame_push	4, 128
-
-	mov		init_crc, w0
-	mov		buf, x1
-	mov		len, x2
-
-	__pmull_init_\p
-
-	// For sizes less than 256 bytes, we can't fold 128 bytes at a time.
-	cmp		len, #256
-	b.lt		.Lless_than_256_bytes_\@
-
-	adr_l		fold_consts_ptr, .Lfold_across_128_bytes_consts
-
-	// Load the first 128 data bytes.  Byte swapping is necessary to make
-	// the bit order match the polynomial coefficient order.
-	ldp		q0, q1, [buf]
-	ldp		q2, q3, [buf, #0x20]
-	ldp		q4, q5, [buf, #0x40]
-	ldp		q6, q7, [buf, #0x60]
-	add		buf, buf, #0x80
-CPU_LE(	rev64		v0.16b, v0.16b			)
-CPU_LE(	rev64		v1.16b, v1.16b			)
-CPU_LE(	rev64		v2.16b, v2.16b			)
-CPU_LE(	rev64		v3.16b, v3.16b			)
-CPU_LE(	rev64		v4.16b, v4.16b			)
-CPU_LE(	rev64		v5.16b, v5.16b			)
-CPU_LE(	rev64		v6.16b, v6.16b			)
-CPU_LE(	rev64		v7.16b, v7.16b			)
-CPU_LE(	ext		v0.16b, v0.16b, v0.16b, #8	)
-CPU_LE(	ext		v1.16b, v1.16b, v1.16b, #8	)
-CPU_LE(	ext		v2.16b, v2.16b, v2.16b, #8	)
-CPU_LE(	ext		v3.16b, v3.16b, v3.16b, #8	)
-CPU_LE(	ext		v4.16b, v4.16b, v4.16b, #8	)
-CPU_LE(	ext		v5.16b, v5.16b, v5.16b, #8	)
-CPU_LE(	ext		v6.16b, v6.16b, v6.16b, #8	)
-CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
-
-	// XOR the first 16 data *bits* with the initial CRC value.
-	movi		v8.16b, #0
-	mov		v8.h[7], init_crc
-	eor		v0.16b, v0.16b, v8.16b
-
-	// Load the constants for folding across 128 bytes.
-	ld1		{fold_consts.2d}, [fold_consts_ptr]
-	__pmull_pre_\p	fold_consts
-
-	// Subtract 128 for the 128 data bytes just consumed.  Subtract another
-	// 128 to simplify the termination condition of the following loop.
-	sub		len, len, #256
-
-	// While >= 128 data bytes remain (not counting v0-v7), fold the 128
-	// bytes v0-v7 into them, storing the result back into v0-v7.
-.Lfold_128_bytes_loop_\@:
-	fold_32_bytes	\p, v0, v1
-	fold_32_bytes	\p, v2, v3
-	fold_32_bytes	\p, v4, v5
-	fold_32_bytes	\p, v6, v7
-
-	subs		len, len, #128
-	b.lt		.Lfold_128_bytes_loop_done_\@
-
-	if_will_cond_yield_neon
-	stp		q0, q1, [sp, #.Lframe_local_offset]
-	stp		q2, q3, [sp, #.Lframe_local_offset + 32]
-	stp		q4, q5, [sp, #.Lframe_local_offset + 64]
-	stp		q6, q7, [sp, #.Lframe_local_offset + 96]
-	do_cond_yield_neon
-	ldp		q0, q1, [sp, #.Lframe_local_offset]
-	ldp		q2, q3, [sp, #.Lframe_local_offset + 32]
-	ldp		q4, q5, [sp, #.Lframe_local_offset + 64]
-	ldp		q6, q7, [sp, #.Lframe_local_offset + 96]
-	ld1		{fold_consts.2d}, [fold_consts_ptr]
-	__pmull_init_\p
-	__pmull_pre_\p	fold_consts
-	endif_yield_neon
-
-	b		.Lfold_128_bytes_loop_\@
-
-.Lfold_128_bytes_loop_done_\@:
-
-	// Now fold the 112 bytes in v0-v6 into the 16 bytes in v7.
-
-	// Fold across 64 bytes.
-	add		fold_consts_ptr, fold_consts_ptr, #16
-	ld1		{fold_consts.2d}, [fold_consts_ptr], #16
-	__pmull_pre_\p	fold_consts
-	fold_16_bytes	\p, v0, v4
-	fold_16_bytes	\p, v1, v5
-	fold_16_bytes	\p, v2, v6
-	fold_16_bytes	\p, v3, v7, 1
-	// Fold across 32 bytes.
-	fold_16_bytes	\p, v4, v6
-	fold_16_bytes	\p, v5, v7, 1
-	// Fold across 16 bytes.
-	fold_16_bytes	\p, v6, v7
-
-	// Add 128 to get the correct number of data bytes remaining in 0...127
-	// (not counting v7), following the previous extra subtraction by 128.
-	// Then subtract 16 to simplify the termination condition of the
-	// following loop.
-	adds		len, len, #(128-16)
-
-	// While >= 16 data bytes remain (not counting v7), fold the 16 bytes v7
-	// into them, storing the result back into v7.
-	b.lt		.Lfold_16_bytes_loop_done_\@
-.Lfold_16_bytes_loop_\@:
-	__pmull_\p	v8, v7, fold_consts
-	__pmull_\p	v7, v7, fold_consts, 2
-	eor		v7.16b, v7.16b, v8.16b
-	ldr		q0, [buf], #16
-CPU_LE(	rev64		v0.16b, v0.16b			)
-CPU_LE(	ext		v0.16b, v0.16b, v0.16b, #8	)
-	eor		v7.16b, v7.16b, v0.16b
-	subs		len, len, #16
-	b.ge		.Lfold_16_bytes_loop_\@
-
-.Lfold_16_bytes_loop_done_\@:
-	// Add 16 to get the correct number of data bytes remaining in 0...15
-	// (not counting v7), following the previous extra subtraction by 16.
-	adds		len, len, #16
-	b.eq		.Lreduce_final_16_bytes_\@
-
-.Lhandle_partial_segment_\@:
-	// Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first
-	// 16 bytes are in v7 and the rest are the remaining data in 'buf'.  To
-	// do this without needing a fold constant for each possible 'len',
-	// redivide the bytes into a first chunk of 'len' bytes and a second
-	// chunk of 16 bytes, then fold the first chunk into the second.
-
-	// v0 = last 16 original data bytes
-	add		buf, buf, len
-	ldr		q0, [buf, #-16]
-CPU_LE(	rev64		v0.16b, v0.16b			)
-CPU_LE(	ext		v0.16b, v0.16b, v0.16b, #8	)
-
-	// v1 = high order part of second chunk: v7 left-shifted by 'len' bytes.
-	adr_l		x4, .Lbyteshift_table + 16
-	sub		x4, x4, len
-	ld1		{v2.16b}, [x4]
-	tbl		v1.16b, {v7.16b}, v2.16b
-
-	// v3 = first chunk: v7 right-shifted by '16-len' bytes.
-	movi		v3.16b, #0x80
-	eor		v2.16b, v2.16b, v3.16b
-	tbl		v3.16b, {v7.16b}, v2.16b
-
-	// Convert to 8-bit masks: 'len' 0x00 bytes, then '16-len' 0xff bytes.
-	sshr		v2.16b, v2.16b, #7
-
-	// v2 = second chunk: 'len' bytes from v0 (low-order bytes),
-	// then '16-len' bytes from v1 (high-order bytes).
-	bsl		v2.16b, v1.16b, v0.16b
-
-	// Fold the first chunk into the second chunk, storing the result in v7.
-	__pmull_\p	v0, v3, fold_consts
-	__pmull_\p	v7, v3, fold_consts, 2
-	eor		v7.16b, v7.16b, v0.16b
-	eor		v7.16b, v7.16b, v2.16b
-
-.Lreduce_final_16_bytes_\@:
-	// Reduce the 128-bit value M(x), stored in v7, to the final 16-bit CRC.
-
-	movi		v2.16b, #0		// init zero register
-
-	// Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'.
-	ld1		{fold_consts.2d}, [fold_consts_ptr], #16
-	__pmull_pre_\p	fold_consts
-
-	// Fold the high 64 bits into the low 64 bits, while also multiplying by
-	// x^64.  This produces a 128-bit value congruent to x^64 * M(x) and
-	// whose low 48 bits are 0.
-	ext		v0.16b, v2.16b, v7.16b, #8
-	__pmull_\p	v7, v7, fold_consts, 2	// high bits * x^48 * (x^80 mod G(x))
-	eor		v0.16b, v0.16b, v7.16b	// + low bits * x^64
-
-	// Fold the high 32 bits into the low 96 bits.  This produces a 96-bit
-	// value congruent to x^64 * M(x) and whose low 48 bits are 0.
-	ext		v1.16b, v0.16b, v2.16b, #12	// extract high 32 bits
-	mov		v0.s[3], v2.s[0]	// zero high 32 bits
-	__pmull_\p	v1, v1, fold_consts	// high 32 bits * x^48 * (x^48 mod G(x))
-	eor		v0.16b, v0.16b, v1.16b	// + low bits
-
-	// Load G(x) and floor(x^48 / G(x)).
-	ld1		{fold_consts.2d}, [fold_consts_ptr]
-	__pmull_pre_\p	fold_consts
-
-	// Use Barrett reduction to compute the final CRC value.
-	__pmull_\p	v1, v0, fold_consts, 2	// high 32 bits * floor(x^48 / G(x))
-	ushr		v1.2d, v1.2d, #32	// /= x^32
-	__pmull_\p	v1, v1, fold_consts	// *= G(x)
-	ushr		v0.2d, v0.2d, #48
-	eor		v0.16b, v0.16b, v1.16b	// + low 16 nonzero bits
-	// Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of v0.
-
-	umov		w0, v0.h[0]
-	frame_pop
-	ret
-
-.Lless_than_256_bytes_\@:
-	// Checksumming a buffer of length 16...255 bytes
-
-	adr_l		fold_consts_ptr, .Lfold_across_16_bytes_consts
-
-	// Load the first 16 data bytes.
-	ldr		q7, [buf], #0x10
-CPU_LE(	rev64		v7.16b, v7.16b			)
-CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
-
-	// XOR the first 16 data *bits* with the initial CRC value.
-	movi		v0.16b, #0
-	mov		v0.h[7], init_crc
-	eor		v7.16b, v7.16b, v0.16b
-
-	// Load the fold-across-16-bytes constants.
-	ld1		{fold_consts.2d}, [fold_consts_ptr], #16
-	__pmull_pre_\p	fold_consts
-
-	cmp		len, #16
-	b.eq		.Lreduce_final_16_bytes_\@	// len == 16
-	subs		len, len, #32
-	b.ge		.Lfold_16_bytes_loop_\@		// 32 <= len <= 255
-	add		len, len, #16
-	b		.Lhandle_partial_segment_\@	// 17 <= len <= 31
-	.endm
-
-//
-// u16 crc_t10dif_pmull_p8(u16 init_crc, const u8 *buf, size_t len);
-//
-// Assumes len >= 16.
-//
-ENTRY(crc_t10dif_pmull_p8)
-	crc_t10dif_pmull	p8
-ENDPROC(crc_t10dif_pmull_p8)
-
-	.align		5
-//
-// u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len);
-//
-// Assumes len >= 16.
-//
-ENTRY(crc_t10dif_pmull_p64)
-	crc_t10dif_pmull	p64
-ENDPROC(crc_t10dif_pmull_p64)
-
-	.section	".rodata", "a"
-	.align		4
-
-// Fold constants precomputed from the polynomial 0x18bb7
-// G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
-.Lfold_across_128_bytes_consts:
-	.quad		0x0000000000006123	// x^(8*128)	mod G(x)
-	.quad		0x0000000000002295	// x^(8*128+64)	mod G(x)
-// .Lfold_across_64_bytes_consts:
-	.quad		0x0000000000001069	// x^(4*128)	mod G(x)
-	.quad		0x000000000000dd31	// x^(4*128+64)	mod G(x)
-// .Lfold_across_32_bytes_consts:
-	.quad		0x000000000000857d	// x^(2*128)	mod G(x)
-	.quad		0x0000000000007acc	// x^(2*128+64)	mod G(x)
-.Lfold_across_16_bytes_consts:
-	.quad		0x000000000000a010	// x^(1*128)	mod G(x)
-	.quad		0x0000000000001faa	// x^(1*128+64)	mod G(x)
-// .Lfinal_fold_consts:
-	.quad		0x1368000000000000	// x^48 * (x^48 mod G(x))
-	.quad		0x2d56000000000000	// x^48 * (x^80 mod G(x))
-// .Lbarrett_reduction_consts:
-	.quad		0x0000000000018bb7	// G(x)
-	.quad		0x00000001f65a57f8	// floor(x^48 / G(x))
-
-// For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 -
-// len] is the index vector to shift left by 'len' bytes, and is also {0x80,
-// ..., 0x80} XOR the index vector to shift right by '16 - len' bytes.
-.Lbyteshift_table:
-	.byte		 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
-	.byte		0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
-	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
-	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe , 0x0
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
deleted file mode 100644
index 410e8afcf5a7dfdd3690f8e74ea6a382ac1fa02f..0000000000000000000000000000000000000000
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ /dev/null
@@ -1,575 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Accelerated GHASH implementation with ARMv8 PMULL instructions.
- *
- * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	SHASH		.req	v0
-	SHASH2		.req	v1
-	T1		.req	v2
-	T2		.req	v3
-	MASK		.req	v4
-	XL		.req	v5
-	XM		.req	v6
-	XH		.req	v7
-	IN1		.req	v7
-
-	k00_16		.req	v8
-	k32_48		.req	v9
-
-	t3		.req	v10
-	t4		.req	v11
-	t5		.req	v12
-	t6		.req	v13
-	t7		.req	v14
-	t8		.req	v15
-	t9		.req	v16
-
-	perm1		.req	v17
-	perm2		.req	v18
-	perm3		.req	v19
-
-	sh1		.req	v20
-	sh2		.req	v21
-	sh3		.req	v22
-	sh4		.req	v23
-
-	ss1		.req	v24
-	ss2		.req	v25
-	ss3		.req	v26
-	ss4		.req	v27
-
-	XL2		.req	v8
-	XM2		.req	v9
-	XH2		.req	v10
-	XL3		.req	v11
-	XM3		.req	v12
-	XH3		.req	v13
-	TT3		.req	v14
-	TT4		.req	v15
-	HH		.req	v16
-	HH3		.req	v17
-	HH4		.req	v18
-	HH34		.req	v19
-
-	.text
-	.arch		armv8-a+crypto
-
-	.macro		__pmull_p64, rd, rn, rm
-	pmull		\rd\().1q, \rn\().1d, \rm\().1d
-	.endm
-
-	.macro		__pmull2_p64, rd, rn, rm
-	pmull2		\rd\().1q, \rn\().2d, \rm\().2d
-	.endm
-
-	.macro		__pmull_p8, rq, ad, bd
-	ext		t3.8b, \ad\().8b, \ad\().8b, #1		// A1
-	ext		t5.8b, \ad\().8b, \ad\().8b, #2		// A2
-	ext		t7.8b, \ad\().8b, \ad\().8b, #3		// A3
-
-	__pmull_p8_\bd	\rq, \ad
-	.endm
-
-	.macro		__pmull2_p8, rq, ad, bd
-	tbl		t3.16b, {\ad\().16b}, perm1.16b		// A1
-	tbl		t5.16b, {\ad\().16b}, perm2.16b		// A2
-	tbl		t7.16b, {\ad\().16b}, perm3.16b		// A3
-
-	__pmull2_p8_\bd	\rq, \ad
-	.endm
-
-	.macro		__pmull_p8_SHASH, rq, ad
-	__pmull_p8_tail	\rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4
-	.endm
-
-	.macro		__pmull_p8_SHASH2, rq, ad
-	__pmull_p8_tail	\rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4
-	.endm
-
-	.macro		__pmull2_p8_SHASH, rq, ad
-	__pmull_p8_tail	\rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4
-	.endm
-
-	.macro		__pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4
-	pmull\t		t3.8h, t3.\nb, \bd			// F = A1*B
-	pmull\t		t4.8h, \ad, \b1\().\nb			// E = A*B1
-	pmull\t		t5.8h, t5.\nb, \bd			// H = A2*B
-	pmull\t		t6.8h, \ad, \b2\().\nb			// G = A*B2
-	pmull\t		t7.8h, t7.\nb, \bd			// J = A3*B
-	pmull\t		t8.8h, \ad, \b3\().\nb			// I = A*B3
-	pmull\t		t9.8h, \ad, \b4\().\nb			// K = A*B4
-	pmull\t		\rq\().8h, \ad, \bd			// D = A*B
-
-	eor		t3.16b, t3.16b, t4.16b			// L = E + F
-	eor		t5.16b, t5.16b, t6.16b			// M = G + H
-	eor		t7.16b, t7.16b, t8.16b			// N = I + J
-
-	uzp1		t4.2d, t3.2d, t5.2d
-	uzp2		t3.2d, t3.2d, t5.2d
-	uzp1		t6.2d, t7.2d, t9.2d
-	uzp2		t7.2d, t7.2d, t9.2d
-
-	// t3 = (L) (P0 + P1) << 8
-	// t5 = (M) (P2 + P3) << 16
-	eor		t4.16b, t4.16b, t3.16b
-	and		t3.16b, t3.16b, k32_48.16b
-
-	// t7 = (N) (P4 + P5) << 24
-	// t9 = (K) (P6 + P7) << 32
-	eor		t6.16b, t6.16b, t7.16b
-	and		t7.16b, t7.16b, k00_16.16b
-
-	eor		t4.16b, t4.16b, t3.16b
-	eor		t6.16b, t6.16b, t7.16b
-
-	zip2		t5.2d, t4.2d, t3.2d
-	zip1		t3.2d, t4.2d, t3.2d
-	zip2		t9.2d, t6.2d, t7.2d
-	zip1		t7.2d, t6.2d, t7.2d
-
-	ext		t3.16b, t3.16b, t3.16b, #15
-	ext		t5.16b, t5.16b, t5.16b, #14
-	ext		t7.16b, t7.16b, t7.16b, #13
-	ext		t9.16b, t9.16b, t9.16b, #12
-
-	eor		t3.16b, t3.16b, t5.16b
-	eor		t7.16b, t7.16b, t9.16b
-	eor		\rq\().16b, \rq\().16b, t3.16b
-	eor		\rq\().16b, \rq\().16b, t7.16b
-	.endm
-
-	.macro		__pmull_pre_p64
-	add		x8, x3, #16
-	ld1		{HH.2d-HH4.2d}, [x8]
-
-	trn1		SHASH2.2d, SHASH.2d, HH.2d
-	trn2		T1.2d, SHASH.2d, HH.2d
-	eor		SHASH2.16b, SHASH2.16b, T1.16b
-
-	trn1		HH34.2d, HH3.2d, HH4.2d
-	trn2		T1.2d, HH3.2d, HH4.2d
-	eor		HH34.16b, HH34.16b, T1.16b
-
-	movi		MASK.16b, #0xe1
-	shl		MASK.2d, MASK.2d, #57
-	.endm
-
-	.macro		__pmull_pre_p8
-	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
-	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
-
-	// k00_16 := 0x0000000000000000_000000000000ffff
-	// k32_48 := 0x00000000ffffffff_0000ffffffffffff
-	movi		k32_48.2d, #0xffffffff
-	mov		k32_48.h[2], k32_48.h[0]
-	ushr		k00_16.2d, k32_48.2d, #32
-
-	// prepare the permutation vectors
-	mov_q		x5, 0x080f0e0d0c0b0a09
-	movi		T1.8b, #8
-	dup		perm1.2d, x5
-	eor		perm1.16b, perm1.16b, T1.16b
-	ushr		perm2.2d, perm1.2d, #8
-	ushr		perm3.2d, perm1.2d, #16
-	ushr		T1.2d, perm1.2d, #24
-	sli		perm2.2d, perm1.2d, #56
-	sli		perm3.2d, perm1.2d, #48
-	sli		T1.2d, perm1.2d, #40
-
-	// precompute loop invariants
-	tbl		sh1.16b, {SHASH.16b}, perm1.16b
-	tbl		sh2.16b, {SHASH.16b}, perm2.16b
-	tbl		sh3.16b, {SHASH.16b}, perm3.16b
-	tbl		sh4.16b, {SHASH.16b}, T1.16b
-	ext		ss1.8b, SHASH2.8b, SHASH2.8b, #1
-	ext		ss2.8b, SHASH2.8b, SHASH2.8b, #2
-	ext		ss3.8b, SHASH2.8b, SHASH2.8b, #3
-	ext		ss4.8b, SHASH2.8b, SHASH2.8b, #4
-	.endm
-
-	//
-	// PMULL (64x64->128) based reduction for CPUs that can do
-	// it in a single instruction.
-	//
-	.macro		__pmull_reduce_p64
-	pmull		T2.1q, XL.1d, MASK.1d
-	eor		XM.16b, XM.16b, T1.16b
-
-	mov		XH.d[0], XM.d[1]
-	mov		XM.d[1], XL.d[0]
-
-	eor		XL.16b, XM.16b, T2.16b
-	ext		T2.16b, XL.16b, XL.16b, #8
-	pmull		XL.1q, XL.1d, MASK.1d
-	.endm
-
-	//
-	// Alternative reduction for CPUs that lack support for the
-	// 64x64->128 PMULL instruction
-	//
-	.macro		__pmull_reduce_p8
-	eor		XM.16b, XM.16b, T1.16b
-
-	mov		XL.d[1], XM.d[0]
-	mov		XH.d[0], XM.d[1]
-
-	shl		T1.2d, XL.2d, #57
-	shl		T2.2d, XL.2d, #62
-	eor		T2.16b, T2.16b, T1.16b
-	shl		T1.2d, XL.2d, #63
-	eor		T2.16b, T2.16b, T1.16b
-	ext		T1.16b, XL.16b, XH.16b, #8
-	eor		T2.16b, T2.16b, T1.16b
-
-	mov		XL.d[1], T2.d[0]
-	mov		XH.d[0], T2.d[1]
-
-	ushr		T2.2d, XL.2d, #1
-	eor		XH.16b, XH.16b, XL.16b
-	eor		XL.16b, XL.16b, T2.16b
-	ushr		T2.2d, T2.2d, #6
-	ushr		XL.2d, XL.2d, #1
-	.endm
-
-	.macro		__pmull_ghash, pn
-	ld1		{SHASH.2d}, [x3]
-	ld1		{XL.2d}, [x1]
-
-	__pmull_pre_\pn
-
-	/* do the head block first, if supplied */
-	cbz		x4, 0f
-	ld1		{T1.2d}, [x4]
-	mov		x4, xzr
-	b		3f
-
-0:	.ifc		\pn, p64
-	tbnz		w0, #0, 2f		// skip until #blocks is a
-	tbnz		w0, #1, 2f		// round multiple of 4
-
-1:	ld1		{XM3.16b-TT4.16b}, [x2], #64
-
-	sub		w0, w0, #4
-
-	rev64		T1.16b, XM3.16b
-	rev64		T2.16b, XH3.16b
-	rev64		TT4.16b, TT4.16b
-	rev64		TT3.16b, TT3.16b
-
-	ext		IN1.16b, TT4.16b, TT4.16b, #8
-	ext		XL3.16b, TT3.16b, TT3.16b, #8
-
-	eor		TT4.16b, TT4.16b, IN1.16b
-	pmull2		XH2.1q, SHASH.2d, IN1.2d	// a1 * b1
-	pmull		XL2.1q, SHASH.1d, IN1.1d	// a0 * b0
-	pmull		XM2.1q, SHASH2.1d, TT4.1d	// (a1 + a0)(b1 + b0)
-
-	eor		TT3.16b, TT3.16b, XL3.16b
-	pmull2		XH3.1q, HH.2d, XL3.2d		// a1 * b1
-	pmull		XL3.1q, HH.1d, XL3.1d		// a0 * b0
-	pmull2		XM3.1q, SHASH2.2d, TT3.2d	// (a1 + a0)(b1 + b0)
-
-	ext		IN1.16b, T2.16b, T2.16b, #8
-	eor		XL2.16b, XL2.16b, XL3.16b
-	eor		XH2.16b, XH2.16b, XH3.16b
-	eor		XM2.16b, XM2.16b, XM3.16b
-
-	eor		T2.16b, T2.16b, IN1.16b
-	pmull2		XH3.1q, HH3.2d, IN1.2d		// a1 * b1
-	pmull		XL3.1q, HH3.1d, IN1.1d		// a0 * b0
-	pmull		XM3.1q, HH34.1d, T2.1d		// (a1 + a0)(b1 + b0)
-
-	eor		XL2.16b, XL2.16b, XL3.16b
-	eor		XH2.16b, XH2.16b, XH3.16b
-	eor		XM2.16b, XM2.16b, XM3.16b
-
-	ext		IN1.16b, T1.16b, T1.16b, #8
-	ext		TT3.16b, XL.16b, XL.16b, #8
-	eor		XL.16b, XL.16b, IN1.16b
-	eor		T1.16b, T1.16b, TT3.16b
-
-	pmull2		XH.1q, HH4.2d, XL.2d		// a1 * b1
-	eor		T1.16b, T1.16b, XL.16b
-	pmull		XL.1q, HH4.1d, XL.1d		// a0 * b0
-	pmull2		XM.1q, HH34.2d, T1.2d		// (a1 + a0)(b1 + b0)
-
-	eor		XL.16b, XL.16b, XL2.16b
-	eor		XH.16b, XH.16b, XH2.16b
-	eor		XM.16b, XM.16b, XM2.16b
-
-	eor		T2.16b, XL.16b, XH.16b
-	ext		T1.16b, XL.16b, XH.16b, #8
-	eor		XM.16b, XM.16b, T2.16b
-
-	__pmull_reduce_p64
-
-	eor		T2.16b, T2.16b, XH.16b
-	eor		XL.16b, XL.16b, T2.16b
-
-	cbz		w0, 5f
-	b		1b
-	.endif
-
-2:	ld1		{T1.2d}, [x2], #16
-	sub		w0, w0, #1
-
-3:	/* multiply XL by SHASH in GF(2^128) */
-CPU_LE(	rev64		T1.16b, T1.16b	)
-
-	ext		T2.16b, XL.16b, XL.16b, #8
-	ext		IN1.16b, T1.16b, T1.16b, #8
-	eor		T1.16b, T1.16b, T2.16b
-	eor		XL.16b, XL.16b, IN1.16b
-
-	__pmull2_\pn	XH, XL, SHASH			// a1 * b1
-	eor		T1.16b, T1.16b, XL.16b
-	__pmull_\pn 	XL, XL, SHASH			// a0 * b0
-	__pmull_\pn	XM, T1, SHASH2			// (a1 + a0)(b1 + b0)
-
-4:	eor		T2.16b, XL.16b, XH.16b
-	ext		T1.16b, XL.16b, XH.16b, #8
-	eor		XM.16b, XM.16b, T2.16b
-
-	__pmull_reduce_\pn
-
-	eor		T2.16b, T2.16b, XH.16b
-	eor		XL.16b, XL.16b, T2.16b
-
-	cbnz		w0, 0b
-
-5:	st1		{XL.2d}, [x1]
-	ret
-	.endm
-
-	/*
-	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-	 *			   struct ghash_key const *k, const char *head)
-	 */
-ENTRY(pmull_ghash_update_p64)
-	__pmull_ghash	p64
-ENDPROC(pmull_ghash_update_p64)
-
-ENTRY(pmull_ghash_update_p8)
-	__pmull_ghash	p8
-ENDPROC(pmull_ghash_update_p8)
-
-	KS0		.req	v12
-	KS1		.req	v13
-	INP0		.req	v14
-	INP1		.req	v15
-
-	.macro		load_round_keys, rounds, rk
-	cmp		\rounds, #12
-	blo		2222f		/* 128 bits */
-	beq		1111f		/* 192 bits */
-	ld1		{v17.4s-v18.4s}, [\rk], #32
-1111:	ld1		{v19.4s-v20.4s}, [\rk], #32
-2222:	ld1		{v21.4s-v24.4s}, [\rk], #64
-	ld1		{v25.4s-v28.4s}, [\rk], #64
-	ld1		{v29.4s-v31.4s}, [\rk]
-	.endm
-
-	.macro		enc_round, state, key
-	aese		\state\().16b, \key\().16b
-	aesmc		\state\().16b, \state\().16b
-	.endm
-
-	.macro		enc_block, state, rounds
-	cmp		\rounds, #12
-	b.lo		2222f		/* 128 bits */
-	b.eq		1111f		/* 192 bits */
-	enc_round	\state, v17
-	enc_round	\state, v18
-1111:	enc_round	\state, v19
-	enc_round	\state, v20
-2222:	.irp		key, v21, v22, v23, v24, v25, v26, v27, v28, v29
-	enc_round	\state, \key
-	.endr
-	aese		\state\().16b, v30.16b
-	eor		\state\().16b, \state\().16b, v31.16b
-	.endm
-
-	.macro		pmull_gcm_do_crypt, enc
-	ld1		{SHASH.2d}, [x4], #16
-	ld1		{HH.2d}, [x4]
-	ld1		{XL.2d}, [x1]
-	ldr		x8, [x5, #8]			// load lower counter
-
-	movi		MASK.16b, #0xe1
-	trn1		SHASH2.2d, SHASH.2d, HH.2d
-	trn2		T1.2d, SHASH.2d, HH.2d
-CPU_LE(	rev		x8, x8		)
-	shl		MASK.2d, MASK.2d, #57
-	eor		SHASH2.16b, SHASH2.16b, T1.16b
-
-	.if		\enc == 1
-	ldr		x10, [sp]
-	ld1		{KS0.16b-KS1.16b}, [x10]
-	.endif
-
-	cbnz		x6, 4f
-
-0:	ld1		{INP0.16b-INP1.16b}, [x3], #32
-
-	rev		x9, x8
-	add		x11, x8, #1
-	add		x8, x8, #2
-
-	.if		\enc == 1
-	eor		INP0.16b, INP0.16b, KS0.16b	// encrypt input
-	eor		INP1.16b, INP1.16b, KS1.16b
-	.endif
-
-	ld1		{KS0.8b}, [x5]			// load upper counter
-	rev		x11, x11
-	sub		w0, w0, #2
-	mov		KS1.8b, KS0.8b
-	ins		KS0.d[1], x9			// set lower counter
-	ins		KS1.d[1], x11
-
-	rev64		T1.16b, INP1.16b
-
-	cmp		w7, #12
-	b.ge		2f				// AES-192/256?
-
-1:	enc_round	KS0, v21
-	ext		IN1.16b, T1.16b, T1.16b, #8
-
-	enc_round	KS1, v21
-	pmull2		XH2.1q, SHASH.2d, IN1.2d	// a1 * b1
-
-	enc_round	KS0, v22
-	eor		T1.16b, T1.16b, IN1.16b
-
-	enc_round	KS1, v22
-	pmull		XL2.1q, SHASH.1d, IN1.1d	// a0 * b0
-
-	enc_round	KS0, v23
-	pmull		XM2.1q, SHASH2.1d, T1.1d	// (a1 + a0)(b1 + b0)
-
-	enc_round	KS1, v23
-	rev64		T1.16b, INP0.16b
-	ext		T2.16b, XL.16b, XL.16b, #8
-
-	enc_round	KS0, v24
-	ext		IN1.16b, T1.16b, T1.16b, #8
-	eor		T1.16b, T1.16b, T2.16b
-
-	enc_round	KS1, v24
-	eor		XL.16b, XL.16b, IN1.16b
-
-	enc_round	KS0, v25
-	eor		T1.16b, T1.16b, XL.16b
-
-	enc_round	KS1, v25
-	pmull2		XH.1q, HH.2d, XL.2d		// a1 * b1
-
-	enc_round	KS0, v26
-	pmull		XL.1q, HH.1d, XL.1d		// a0 * b0
-
-	enc_round	KS1, v26
-	pmull2		XM.1q, SHASH2.2d, T1.2d		// (a1 + a0)(b1 + b0)
-
-	enc_round	KS0, v27
-	eor		XL.16b, XL.16b, XL2.16b
-	eor		XH.16b, XH.16b, XH2.16b
-
-	enc_round	KS1, v27
-	eor		XM.16b, XM.16b, XM2.16b
-	ext		T1.16b, XL.16b, XH.16b, #8
-
-	enc_round	KS0, v28
-	eor		T2.16b, XL.16b, XH.16b
-	eor		XM.16b, XM.16b, T1.16b
-
-	enc_round	KS1, v28
-	eor		XM.16b, XM.16b, T2.16b
-
-	enc_round	KS0, v29
-	pmull		T2.1q, XL.1d, MASK.1d
-
-	enc_round	KS1, v29
-	mov		XH.d[0], XM.d[1]
-	mov		XM.d[1], XL.d[0]
-
-	aese		KS0.16b, v30.16b
-	eor		XL.16b, XM.16b, T2.16b
-
-	aese		KS1.16b, v30.16b
-	ext		T2.16b, XL.16b, XL.16b, #8
-
-	eor		KS0.16b, KS0.16b, v31.16b
-	pmull		XL.1q, XL.1d, MASK.1d
-	eor		T2.16b, T2.16b, XH.16b
-
-	eor		KS1.16b, KS1.16b, v31.16b
-	eor		XL.16b, XL.16b, T2.16b
-
-	.if		\enc == 0
-	eor		INP0.16b, INP0.16b, KS0.16b
-	eor		INP1.16b, INP1.16b, KS1.16b
-	.endif
-
-	st1		{INP0.16b-INP1.16b}, [x2], #32
-
-	cbnz		w0, 0b
-
-CPU_LE(	rev		x8, x8		)
-	st1		{XL.2d}, [x1]
-	str		x8, [x5, #8]			// store lower counter
-
-	.if		\enc == 1
-	st1		{KS0.16b-KS1.16b}, [x10]
-	.endif
-
-	ret
-
-2:	b.eq		3f				// AES-192?
-	enc_round	KS0, v17
-	enc_round	KS1, v17
-	enc_round	KS0, v18
-	enc_round	KS1, v18
-3:	enc_round	KS0, v19
-	enc_round	KS1, v19
-	enc_round	KS0, v20
-	enc_round	KS1, v20
-	b		1b
-
-4:	load_round_keys	w7, x6
-	b		0b
-	.endm
-
-	/*
-	 * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
-	 *			  struct ghash_key const *k, u8 ctr[],
-	 *			  int rounds, u8 ks[])
-	 */
-ENTRY(pmull_gcm_encrypt)
-	pmull_gcm_do_crypt	1
-ENDPROC(pmull_gcm_encrypt)
-
-	/*
-	 * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
-	 *			  struct ghash_key const *k, u8 ctr[],
-	 *			  int rounds)
-	 */
-ENTRY(pmull_gcm_decrypt)
-	pmull_gcm_do_crypt	0
-ENDPROC(pmull_gcm_decrypt)
-
-	/*
-	 * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds)
-	 */
-ENTRY(pmull_gcm_encrypt_block)
-	cbz		x2, 0f
-	load_round_keys	w3, x2
-0:	ld1		{v0.16b}, [x1]
-	enc_block	v0, w3
-	st1		{v0.16b}, [x0]
-	ret
-ENDPROC(pmull_gcm_encrypt_block)
diff --git a/arch/arm64/crypto/nh-neon-core.S b/arch/arm64/crypto/nh-neon-core.S
deleted file mode 100644
index e05570c38de7621658313ef401fe46e708847594..0000000000000000000000000000000000000000
--- a/arch/arm64/crypto/nh-neon-core.S
+++ /dev/null
@@ -1,103 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * NH - ε-almost-universal hash function, ARM64 NEON accelerated version
- *
- * Copyright 2018 Google LLC
- *
- * Author: Eric Biggers <ebiggers@google.com>
- */
-
-#include <linux/linkage.h>
-
-	KEY		.req	x0
-	MESSAGE		.req	x1
-	MESSAGE_LEN	.req	x2
-	HASH		.req	x3
-
-	PASS0_SUMS	.req	v0
-	PASS1_SUMS	.req	v1
-	PASS2_SUMS	.req	v2
-	PASS3_SUMS	.req	v3
-	K0		.req	v4
-	K1		.req	v5
-	K2		.req	v6
-	K3		.req	v7
-	T0		.req	v8
-	T1		.req	v9
-	T2		.req	v10
-	T3		.req	v11
-	T4		.req	v12
-	T5		.req	v13
-	T6		.req	v14
-	T7		.req	v15
-
-.macro _nh_stride	k0, k1, k2, k3
-
-	// Load next message stride
-	ld1		{T3.16b}, [MESSAGE], #16
-
-	// Load next key stride
-	ld1		{\k3\().4s}, [KEY], #16
-
-	// Add message words to key words
-	add		T0.4s, T3.4s, \k0\().4s
-	add		T1.4s, T3.4s, \k1\().4s
-	add		T2.4s, T3.4s, \k2\().4s
-	add		T3.4s, T3.4s, \k3\().4s
-
-	// Multiply 32x32 => 64 and accumulate
-	mov		T4.d[0], T0.d[1]
-	mov		T5.d[0], T1.d[1]
-	mov		T6.d[0], T2.d[1]
-	mov		T7.d[0], T3.d[1]
-	umlal		PASS0_SUMS.2d, T0.2s, T4.2s
-	umlal		PASS1_SUMS.2d, T1.2s, T5.2s
-	umlal		PASS2_SUMS.2d, T2.2s, T6.2s
-	umlal		PASS3_SUMS.2d, T3.2s, T7.2s
-.endm
-
-/*
- * void nh_neon(const u32 *key, const u8 *message, size_t message_len,
- *		u8 hash[NH_HASH_BYTES])
- *
- * It's guaranteed that message_len % 16 == 0.
- */
-ENTRY(nh_neon)
-
-	ld1		{K0.4s,K1.4s}, [KEY], #32
-	  movi		PASS0_SUMS.2d, #0
-	  movi		PASS1_SUMS.2d, #0
-	ld1		{K2.4s}, [KEY], #16
-	  movi		PASS2_SUMS.2d, #0
-	  movi		PASS3_SUMS.2d, #0
-
-	subs		MESSAGE_LEN, MESSAGE_LEN, #64
-	blt		.Lloop4_done
-.Lloop4:
-	_nh_stride	K0, K1, K2, K3
-	_nh_stride	K1, K2, K3, K0
-	_nh_stride	K2, K3, K0, K1
-	_nh_stride	K3, K0, K1, K2
-	subs		MESSAGE_LEN, MESSAGE_LEN, #64
-	bge		.Lloop4
-
-.Lloop4_done:
-	ands		MESSAGE_LEN, MESSAGE_LEN, #63
-	beq		.Ldone
-	_nh_stride	K0, K1, K2, K3
-
-	subs		MESSAGE_LEN, MESSAGE_LEN, #16
-	beq		.Ldone
-	_nh_stride	K1, K2, K3, K0
-
-	subs		MESSAGE_LEN, MESSAGE_LEN, #16
-	beq		.Ldone
-	_nh_stride	K2, K3, K0, K1
-
-.Ldone:
-	// Sum the accumulators for each pass, then store the sums to 'hash'
-	addp		T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d
-	addp		T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d
-	st1		{T0.16b,T1.16b}, [HASH]
-	ret
-ENDPROC(nh_neon)
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
deleted file mode 100644
index c2ce1f820706f3ac535ffab1809e327577d3dca1..0000000000000000000000000000000000000000
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ /dev/null
@@ -1,163 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
- *
- * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.text
-	.arch		armv8-a+crypto
-
-	k0		.req	v0
-	k1		.req	v1
-	k2		.req	v2
-	k3		.req	v3
-
-	t0		.req	v4
-	t1		.req	v5
-
-	dga		.req	q6
-	dgav		.req	v6
-	dgb		.req	s7
-	dgbv		.req	v7
-
-	dg0q		.req	q12
-	dg0s		.req	s12
-	dg0v		.req	v12
-	dg1s		.req	s13
-	dg1v		.req	v13
-	dg2s		.req	s14
-
-	.macro		add_only, op, ev, rc, s0, dg1
-	.ifc		\ev, ev
-	add		t1.4s, v\s0\().4s, \rc\().4s
-	sha1h		dg2s, dg0s
-	.ifnb		\dg1
-	sha1\op		dg0q, \dg1, t0.4s
-	.else
-	sha1\op		dg0q, dg1s, t0.4s
-	.endif
-	.else
-	.ifnb		\s0
-	add		t0.4s, v\s0\().4s, \rc\().4s
-	.endif
-	sha1h		dg1s, dg0s
-	sha1\op		dg0q, dg2s, t1.4s
-	.endif
-	.endm
-
-	.macro		add_update, op, ev, rc, s0, s1, s2, s3, dg1
-	sha1su0		v\s0\().4s, v\s1\().4s, v\s2\().4s
-	add_only	\op, \ev, \rc, \s1, \dg1
-	sha1su1		v\s0\().4s, v\s3\().4s
-	.endm
-
-	.macro		loadrc, k, val, tmp
-	movz		\tmp, :abs_g0_nc:\val
-	movk		\tmp, :abs_g1:\val
-	dup		\k, \tmp
-	.endm
-
-	/*
-	 * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
-	 *			  int blocks)
-	 */
-ENTRY(sha1_ce_transform)
-	frame_push	3
-
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-
-	/* load round constants */
-0:	loadrc		k0.4s, 0x5a827999, w6
-	loadrc		k1.4s, 0x6ed9eba1, w6
-	loadrc		k2.4s, 0x8f1bbcdc, w6
-	loadrc		k3.4s, 0xca62c1d6, w6
-
-	/* load state */
-	ld1		{dgav.4s}, [x19]
-	ldr		dgb, [x19, #16]
-
-	/* load sha1_ce_state::finalize */
-	ldr_l		w4, sha1_ce_offsetof_finalize, x4
-	ldr		w4, [x19, x4]
-
-	/* load input */
-1:	ld1		{v8.4s-v11.4s}, [x20], #64
-	sub		w21, w21, #1
-
-CPU_LE(	rev32		v8.16b, v8.16b		)
-CPU_LE(	rev32		v9.16b, v9.16b		)
-CPU_LE(	rev32		v10.16b, v10.16b	)
-CPU_LE(	rev32		v11.16b, v11.16b	)
-
-2:	add		t0.4s, v8.4s, k0.4s
-	mov		dg0v.16b, dgav.16b
-
-	add_update	c, ev, k0,  8,  9, 10, 11, dgb
-	add_update	c, od, k0,  9, 10, 11,  8
-	add_update	c, ev, k0, 10, 11,  8,  9
-	add_update	c, od, k0, 11,  8,  9, 10
-	add_update	c, ev, k1,  8,  9, 10, 11
-
-	add_update	p, od, k1,  9, 10, 11,  8
-	add_update	p, ev, k1, 10, 11,  8,  9
-	add_update	p, od, k1, 11,  8,  9, 10
-	add_update	p, ev, k1,  8,  9, 10, 11
-	add_update	p, od, k2,  9, 10, 11,  8
-
-	add_update	m, ev, k2, 10, 11,  8,  9
-	add_update	m, od, k2, 11,  8,  9, 10
-	add_update	m, ev, k2,  8,  9, 10, 11
-	add_update	m, od, k2,  9, 10, 11,  8
-	add_update	m, ev, k3, 10, 11,  8,  9
-
-	add_update	p, od, k3, 11,  8,  9, 10
-	add_only	p, ev, k3,  9
-	add_only	p, od, k3, 10
-	add_only	p, ev, k3, 11
-	add_only	p, od
-
-	/* update state */
-	add		dgbv.2s, dgbv.2s, dg1v.2s
-	add		dgav.4s, dgav.4s, dg0v.4s
-
-	cbz		w21, 3f
-
-	if_will_cond_yield_neon
-	st1		{dgav.4s}, [x19]
-	str		dgb, [x19, #16]
-	do_cond_yield_neon
-	b		0b
-	endif_yield_neon
-
-	b		1b
-
-	/*
-	 * Final block: add padding and total bit count.
-	 * Skip if the input size was not a round multiple of the block size,
-	 * the padding is handled by the C code in that case.
-	 */
-3:	cbz		x4, 4f
-	ldr_l		w4, sha1_ce_offsetof_count, x4
-	ldr		x4, [x19, x4]
-	movi		v9.2d, #0
-	mov		x8, #0x80000000
-	movi		v10.2d, #0
-	ror		x7, x4, #29		// ror(lsl(x4, 3), 32)
-	fmov		d8, x8
-	mov		x4, #0
-	mov		v11.d[0], xzr
-	mov		v11.d[1], x7
-	b		2b
-
-	/* store new state */
-4:	st1		{dgav.4s}, [x19]
-	str		dgb, [x19, #16]
-	frame_pop
-	ret
-ENDPROC(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
deleted file mode 100644
index 6f728a41900937d48326f7a26caea3d58d0ff5e1..0000000000000000000000000000000000000000
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ /dev/null
@@ -1,169 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions
- *
- * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.text
-	.arch		armv8-a+crypto
-
-	dga		.req	q20
-	dgav		.req	v20
-	dgb		.req	q21
-	dgbv		.req	v21
-
-	t0		.req	v22
-	t1		.req	v23
-
-	dg0q		.req	q24
-	dg0v		.req	v24
-	dg1q		.req	q25
-	dg1v		.req	v25
-	dg2q		.req	q26
-	dg2v		.req	v26
-
-	.macro		add_only, ev, rc, s0
-	mov		dg2v.16b, dg0v.16b
-	.ifeq		\ev
-	add		t1.4s, v\s0\().4s, \rc\().4s
-	sha256h		dg0q, dg1q, t0.4s
-	sha256h2	dg1q, dg2q, t0.4s
-	.else
-	.ifnb		\s0
-	add		t0.4s, v\s0\().4s, \rc\().4s
-	.endif
-	sha256h		dg0q, dg1q, t1.4s
-	sha256h2	dg1q, dg2q, t1.4s
-	.endif
-	.endm
-
-	.macro		add_update, ev, rc, s0, s1, s2, s3
-	sha256su0	v\s0\().4s, v\s1\().4s
-	add_only	\ev, \rc, \s1
-	sha256su1	v\s0\().4s, v\s2\().4s, v\s3\().4s
-	.endm
-
-	/*
-	 * The SHA-256 round constants
-	 */
-	.section	".rodata", "a"
-	.align		4
-.Lsha2_rcon:
-	.word		0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
-	.word		0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
-	.word		0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
-	.word		0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
-	.word		0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
-	.word		0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
-	.word		0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
-	.word		0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
-	.word		0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
-	.word		0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
-	.word		0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
-	.word		0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
-	.word		0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
-	.word		0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
-	.word		0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
-	.word		0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-
-	/*
-	 * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
-	 *			  int blocks)
-	 */
-	.text
-ENTRY(sha2_ce_transform)
-	frame_push	3
-
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-
-	/* load round constants */
-0:	adr_l		x8, .Lsha2_rcon
-	ld1		{ v0.4s- v3.4s}, [x8], #64
-	ld1		{ v4.4s- v7.4s}, [x8], #64
-	ld1		{ v8.4s-v11.4s}, [x8], #64
-	ld1		{v12.4s-v15.4s}, [x8]
-
-	/* load state */
-	ld1		{dgav.4s, dgbv.4s}, [x19]
-
-	/* load sha256_ce_state::finalize */
-	ldr_l		w4, sha256_ce_offsetof_finalize, x4
-	ldr		w4, [x19, x4]
-
-	/* load input */
-1:	ld1		{v16.4s-v19.4s}, [x20], #64
-	sub		w21, w21, #1
-
-CPU_LE(	rev32		v16.16b, v16.16b	)
-CPU_LE(	rev32		v17.16b, v17.16b	)
-CPU_LE(	rev32		v18.16b, v18.16b	)
-CPU_LE(	rev32		v19.16b, v19.16b	)
-
-2:	add		t0.4s, v16.4s, v0.4s
-	mov		dg0v.16b, dgav.16b
-	mov		dg1v.16b, dgbv.16b
-
-	add_update	0,  v1, 16, 17, 18, 19
-	add_update	1,  v2, 17, 18, 19, 16
-	add_update	0,  v3, 18, 19, 16, 17
-	add_update	1,  v4, 19, 16, 17, 18
-
-	add_update	0,  v5, 16, 17, 18, 19
-	add_update	1,  v6, 17, 18, 19, 16
-	add_update	0,  v7, 18, 19, 16, 17
-	add_update	1,  v8, 19, 16, 17, 18
-
-	add_update	0,  v9, 16, 17, 18, 19
-	add_update	1, v10, 17, 18, 19, 16
-	add_update	0, v11, 18, 19, 16, 17
-	add_update	1, v12, 19, 16, 17, 18
-
-	add_only	0, v13, 17
-	add_only	1, v14, 18
-	add_only	0, v15, 19
-	add_only	1
-
-	/* update state */
-	add		dgav.4s, dgav.4s, dg0v.4s
-	add		dgbv.4s, dgbv.4s, dg1v.4s
-
-	/* handled all input blocks? */
-	cbz		w21, 3f
-
-	if_will_cond_yield_neon
-	st1		{dgav.4s, dgbv.4s}, [x19]
-	do_cond_yield_neon
-	b		0b
-	endif_yield_neon
-
-	b		1b
-
-	/*
-	 * Final block: add padding and total bit count.
-	 * Skip if the input size was not a round multiple of the block size,
-	 * the padding is handled by the C code in that case.
-	 */
-3:	cbz		x4, 4f
-	ldr_l		w4, sha256_ce_offsetof_count, x4
-	ldr		x4, [x19, x4]
-	movi		v17.2d, #0
-	mov		x8, #0x80000000
-	movi		v18.2d, #0
-	ror		x7, x4, #29		// ror(lsl(x4, 3), 32)
-	fmov		d16, x8
-	mov		x4, #0
-	mov		v19.d[0], xzr
-	mov		v19.d[1], x7
-	b		2b
-
-	/* store new state */
-4:	st1		{dgav.4s, dgbv.4s}, [x19]
-	frame_pop
-	ret
-ENDPROC(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S
deleted file mode 100644
index a7d587fa54f6c64836d6eba75f2a1ec6228b12c3..0000000000000000000000000000000000000000
--- a/arch/arm64/crypto/sha3-ce-core.S
+++ /dev/null
@@ -1,233 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
- *
- * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.irp	b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
-	.set	.Lv\b\().2d, \b
-	.set	.Lv\b\().16b, \b
-	.endr
-
-	/*
-	 * ARMv8.2 Crypto Extensions instructions
-	 */
-	.macro	eor3, rd, rn, rm, ra
-	.inst	0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
-	.endm
-
-	.macro	rax1, rd, rn, rm
-	.inst	0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
-	.endm
-
-	.macro	bcax, rd, rn, rm, ra
-	.inst	0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
-	.endm
-
-	.macro	xar, rd, rn, rm, imm6
-	.inst	0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
-	.endm
-
-	/*
-	 * sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
-	 */
-	.text
-ENTRY(sha3_ce_transform)
-	frame_push	4
-
-	mov	x19, x0
-	mov	x20, x1
-	mov	x21, x2
-	mov	x22, x3
-
-0:	/* load state */
-	add	x8, x19, #32
-	ld1	{ v0.1d- v3.1d}, [x19]
-	ld1	{ v4.1d- v7.1d}, [x8], #32
-	ld1	{ v8.1d-v11.1d}, [x8], #32
-	ld1	{v12.1d-v15.1d}, [x8], #32
-	ld1	{v16.1d-v19.1d}, [x8], #32
-	ld1	{v20.1d-v23.1d}, [x8], #32
-	ld1	{v24.1d}, [x8]
-
-1:	sub	w21, w21, #1
-	mov	w8, #24
-	adr_l	x9, .Lsha3_rcon
-
-	/* load input */
-	ld1	{v25.8b-v28.8b}, [x20], #32
-	ld1	{v29.8b-v31.8b}, [x20], #24
-	eor	v0.8b, v0.8b, v25.8b
-	eor	v1.8b, v1.8b, v26.8b
-	eor	v2.8b, v2.8b, v27.8b
-	eor	v3.8b, v3.8b, v28.8b
-	eor	v4.8b, v4.8b, v29.8b
-	eor	v5.8b, v5.8b, v30.8b
-	eor	v6.8b, v6.8b, v31.8b
-
-	tbnz	x22, #6, 3f		// SHA3-512
-
-	ld1	{v25.8b-v28.8b}, [x20], #32
-	ld1	{v29.8b-v30.8b}, [x20], #16
-	eor	 v7.8b,  v7.8b, v25.8b
-	eor	 v8.8b,  v8.8b, v26.8b
-	eor	 v9.8b,  v9.8b, v27.8b
-	eor	v10.8b, v10.8b, v28.8b
-	eor	v11.8b, v11.8b, v29.8b
-	eor	v12.8b, v12.8b, v30.8b
-
-	tbnz	x22, #4, 2f		// SHA3-384 or SHA3-224
-
-	// SHA3-256
-	ld1	{v25.8b-v28.8b}, [x20], #32
-	eor	v13.8b, v13.8b, v25.8b
-	eor	v14.8b, v14.8b, v26.8b
-	eor	v15.8b, v15.8b, v27.8b
-	eor	v16.8b, v16.8b, v28.8b
-	b	4f
-
-2:	tbz	x22, #2, 4f		// bit 2 cleared? SHA-384
-
-	// SHA3-224
-	ld1	{v25.8b-v28.8b}, [x20], #32
-	ld1	{v29.8b}, [x20], #8
-	eor	v13.8b, v13.8b, v25.8b
-	eor	v14.8b, v14.8b, v26.8b
-	eor	v15.8b, v15.8b, v27.8b
-	eor	v16.8b, v16.8b, v28.8b
-	eor	v17.8b, v17.8b, v29.8b
-	b	4f
-
-	// SHA3-512
-3:	ld1	{v25.8b-v26.8b}, [x20], #16
-	eor	 v7.8b,  v7.8b, v25.8b
-	eor	 v8.8b,  v8.8b, v26.8b
-
-4:	sub	w8, w8, #1
-
-	eor3	v29.16b,  v4.16b,  v9.16b, v14.16b
-	eor3	v26.16b,  v1.16b,  v6.16b, v11.16b
-	eor3	v28.16b,  v3.16b,  v8.16b, v13.16b
-	eor3	v25.16b,  v0.16b,  v5.16b, v10.16b
-	eor3	v27.16b,  v2.16b,  v7.16b, v12.16b
-	eor3	v29.16b, v29.16b, v19.16b, v24.16b
-	eor3	v26.16b, v26.16b, v16.16b, v21.16b
-	eor3	v28.16b, v28.16b, v18.16b, v23.16b
-	eor3	v25.16b, v25.16b, v15.16b, v20.16b
-	eor3	v27.16b, v27.16b, v17.16b, v22.16b
-
-	rax1	v30.2d, v29.2d, v26.2d	// bc[0]
-	rax1	v26.2d, v26.2d, v28.2d	// bc[2]
-	rax1	v28.2d, v28.2d, v25.2d	// bc[4]
-	rax1	v25.2d, v25.2d, v27.2d	// bc[1]
-	rax1	v27.2d, v27.2d, v29.2d	// bc[3]
-
-	eor	 v0.16b,  v0.16b, v30.16b
-	xar	 v29.2d,   v1.2d,  v25.2d, (64 - 1)
-	xar	  v1.2d,   v6.2d,  v25.2d, (64 - 44)
-	xar	  v6.2d,   v9.2d,  v28.2d, (64 - 20)
-	xar	  v9.2d,  v22.2d,  v26.2d, (64 - 61)
-	xar	 v22.2d,  v14.2d,  v28.2d, (64 - 39)
-	xar	 v14.2d,  v20.2d,  v30.2d, (64 - 18)
-	xar	 v31.2d,   v2.2d,  v26.2d, (64 - 62)
-	xar	  v2.2d,  v12.2d,  v26.2d, (64 - 43)
-	xar	 v12.2d,  v13.2d,  v27.2d, (64 - 25)
-	xar	 v13.2d,  v19.2d,  v28.2d, (64 - 8)
-	xar	 v19.2d,  v23.2d,  v27.2d, (64 - 56)
-	xar	 v23.2d,  v15.2d,  v30.2d, (64 - 41)
-	xar	 v15.2d,   v4.2d,  v28.2d, (64 - 27)
-	xar	 v28.2d,  v24.2d,  v28.2d, (64 - 14)
-	xar	 v24.2d,  v21.2d,  v25.2d, (64 - 2)
-	xar	  v8.2d,   v8.2d,  v27.2d, (64 - 55)
-	xar	  v4.2d,  v16.2d,  v25.2d, (64 - 45)
-	xar	 v16.2d,   v5.2d,  v30.2d, (64 - 36)
-	xar	  v5.2d,   v3.2d,  v27.2d, (64 - 28)
-	xar	 v27.2d,  v18.2d,  v27.2d, (64 - 21)
-	xar	  v3.2d,  v17.2d,  v26.2d, (64 - 15)
-	xar	 v25.2d,  v11.2d,  v25.2d, (64 - 10)
-	xar	 v26.2d,   v7.2d,  v26.2d, (64 - 6)
-	xar	 v30.2d,  v10.2d,  v30.2d, (64 - 3)
-
-	bcax	v20.16b, v31.16b, v22.16b,  v8.16b
-	bcax	v21.16b,  v8.16b, v23.16b, v22.16b
-	bcax	v22.16b, v22.16b, v24.16b, v23.16b
-	bcax	v23.16b, v23.16b, v31.16b, v24.16b
-	bcax	v24.16b, v24.16b,  v8.16b, v31.16b
-
-	ld1r	{v31.2d}, [x9], #8
-
-	bcax	v17.16b, v25.16b, v19.16b,  v3.16b
-	bcax	v18.16b,  v3.16b, v15.16b, v19.16b
-	bcax	v19.16b, v19.16b, v16.16b, v15.16b
-	bcax	v15.16b, v15.16b, v25.16b, v16.16b
-	bcax	v16.16b, v16.16b,  v3.16b, v25.16b
-
-	bcax	v10.16b, v29.16b, v12.16b, v26.16b
-	bcax	v11.16b, v26.16b, v13.16b, v12.16b
-	bcax	v12.16b, v12.16b, v14.16b, v13.16b
-	bcax	v13.16b, v13.16b, v29.16b, v14.16b
-	bcax	v14.16b, v14.16b, v26.16b, v29.16b
-
-	bcax	 v7.16b, v30.16b,  v9.16b,  v4.16b
-	bcax	 v8.16b,  v4.16b,  v5.16b,  v9.16b
-	bcax	 v9.16b,  v9.16b,  v6.16b,  v5.16b
-	bcax	 v5.16b,  v5.16b, v30.16b,  v6.16b
-	bcax	 v6.16b,  v6.16b,  v4.16b, v30.16b
-
-	bcax	 v3.16b, v27.16b,  v0.16b, v28.16b
-	bcax	 v4.16b, v28.16b,  v1.16b,  v0.16b
-	bcax	 v0.16b,  v0.16b,  v2.16b,  v1.16b
-	bcax	 v1.16b,  v1.16b, v27.16b,  v2.16b
-	bcax	 v2.16b,  v2.16b, v28.16b, v27.16b
-
-	eor	 v0.16b,  v0.16b, v31.16b
-
-	cbnz	w8, 4b
-	cbz	w21, 5f
-
-	if_will_cond_yield_neon
-	add	x8, x19, #32
-	st1	{ v0.1d- v3.1d}, [x19]
-	st1	{ v4.1d- v7.1d}, [x8], #32
-	st1	{ v8.1d-v11.1d}, [x8], #32
-	st1	{v12.1d-v15.1d}, [x8], #32
-	st1	{v16.1d-v19.1d}, [x8], #32
-	st1	{v20.1d-v23.1d}, [x8], #32
-	st1	{v24.1d}, [x8]
-	do_cond_yield_neon
-	b		0b
-	endif_yield_neon
-
-	b	1b
-
-	/* save state */
-5:	st1	{ v0.1d- v3.1d}, [x19], #32
-	st1	{ v4.1d- v7.1d}, [x19], #32
-	st1	{ v8.1d-v11.1d}, [x19], #32
-	st1	{v12.1d-v15.1d}, [x19], #32
-	st1	{v16.1d-v19.1d}, [x19], #32
-	st1	{v20.1d-v23.1d}, [x19], #32
-	st1	{v24.1d}, [x19]
-	frame_pop
-	ret
-ENDPROC(sha3_ce_transform)
-
-	.section	".rodata", "a"
-	.align		8
-.Lsha3_rcon:
-	.quad	0x0000000000000001, 0x0000000000008082, 0x800000000000808a
-	.quad	0x8000000080008000, 0x000000000000808b, 0x0000000080000001
-	.quad	0x8000000080008081, 0x8000000000008009, 0x000000000000008a
-	.quad	0x0000000000000088, 0x0000000080008009, 0x000000008000000a
-	.quad	0x000000008000808b, 0x800000000000008b, 0x8000000000008089
-	.quad	0x8000000000008003, 0x8000000000008002, 0x8000000000000080
-	.quad	0x000000000000800a, 0x800000008000000a, 0x8000000080008081
-	.quad	0x8000000000008080, 0x0000000080000001, 0x8000000080008008
diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S
deleted file mode 100644
index ce65e3abe4f2e5964cc2ba0537674d2d91f286fb..0000000000000000000000000000000000000000
--- a/arch/arm64/crypto/sha512-ce-core.S
+++ /dev/null
@@ -1,219 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * sha512-ce-core.S - core SHA-384/SHA-512 transform using v8 Crypto Extensions
- *
- * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.irp		b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
-	.set		.Lq\b, \b
-	.set		.Lv\b\().2d, \b
-	.endr
-
-	.macro		sha512h, rd, rn, rm
-	.inst		0xce608000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
-	.endm
-
-	.macro		sha512h2, rd, rn, rm
-	.inst		0xce608400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
-	.endm
-
-	.macro		sha512su0, rd, rn
-	.inst		0xcec08000 | .L\rd | (.L\rn << 5)
-	.endm
-
-	.macro		sha512su1, rd, rn, rm
-	.inst		0xce608800 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
-	.endm
-
-	/*
-	 * The SHA-512 round constants
-	 */
-	.section	".rodata", "a"
-	.align		4
-.Lsha512_rcon:
-	.quad		0x428a2f98d728ae22, 0x7137449123ef65cd
-	.quad		0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc
-	.quad		0x3956c25bf348b538, 0x59f111f1b605d019
-	.quad		0x923f82a4af194f9b, 0xab1c5ed5da6d8118
-	.quad		0xd807aa98a3030242, 0x12835b0145706fbe
-	.quad		0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2
-	.quad		0x72be5d74f27b896f, 0x80deb1fe3b1696b1
-	.quad		0x9bdc06a725c71235, 0xc19bf174cf692694
-	.quad		0xe49b69c19ef14ad2, 0xefbe4786384f25e3
-	.quad		0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65
-	.quad		0x2de92c6f592b0275, 0x4a7484aa6ea6e483
-	.quad		0x5cb0a9dcbd41fbd4, 0x76f988da831153b5
-	.quad		0x983e5152ee66dfab, 0xa831c66d2db43210
-	.quad		0xb00327c898fb213f, 0xbf597fc7beef0ee4
-	.quad		0xc6e00bf33da88fc2, 0xd5a79147930aa725
-	.quad		0x06ca6351e003826f, 0x142929670a0e6e70
-	.quad		0x27b70a8546d22ffc, 0x2e1b21385c26c926
-	.quad		0x4d2c6dfc5ac42aed, 0x53380d139d95b3df
-	.quad		0x650a73548baf63de, 0x766a0abb3c77b2a8
-	.quad		0x81c2c92e47edaee6, 0x92722c851482353b
-	.quad		0xa2bfe8a14cf10364, 0xa81a664bbc423001
-	.quad		0xc24b8b70d0f89791, 0xc76c51a30654be30
-	.quad		0xd192e819d6ef5218, 0xd69906245565a910
-	.quad		0xf40e35855771202a, 0x106aa07032bbd1b8
-	.quad		0x19a4c116b8d2d0c8, 0x1e376c085141ab53
-	.quad		0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8
-	.quad		0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb
-	.quad		0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3
-	.quad		0x748f82ee5defb2fc, 0x78a5636f43172f60
-	.quad		0x84c87814a1f0ab72, 0x8cc702081a6439ec
-	.quad		0x90befffa23631e28, 0xa4506cebde82bde9
-	.quad		0xbef9a3f7b2c67915, 0xc67178f2e372532b
-	.quad		0xca273eceea26619c, 0xd186b8c721c0c207
-	.quad		0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178
-	.quad		0x06f067aa72176fba, 0x0a637dc5a2c898a6
-	.quad		0x113f9804bef90dae, 0x1b710b35131c471b
-	.quad		0x28db77f523047d84, 0x32caab7b40c72493
-	.quad		0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c
-	.quad		0x4cc5d4becb3e42b6, 0x597f299cfc657e2a
-	.quad		0x5fcb6fab3ad6faec, 0x6c44198c4a475817
-
-	.macro		dround, i0, i1, i2, i3, i4, rc0, rc1, in0, in1, in2, in3, in4
-	.ifnb		\rc1
-	ld1		{v\rc1\().2d}, [x4], #16
-	.endif
-	add		v5.2d, v\rc0\().2d, v\in0\().2d
-	ext		v6.16b, v\i2\().16b, v\i3\().16b, #8
-	ext		v5.16b, v5.16b, v5.16b, #8
-	ext		v7.16b, v\i1\().16b, v\i2\().16b, #8
-	add		v\i3\().2d, v\i3\().2d, v5.2d
-	.ifnb		\in1
-	ext		v5.16b, v\in3\().16b, v\in4\().16b, #8
-	sha512su0	v\in0\().2d, v\in1\().2d
-	.endif
-	sha512h		q\i3, q6, v7.2d
-	.ifnb		\in1
-	sha512su1	v\in0\().2d, v\in2\().2d, v5.2d
-	.endif
-	add		v\i4\().2d, v\i1\().2d, v\i3\().2d
-	sha512h2	q\i3, q\i1, v\i0\().2d
-	.endm
-
-	/*
-	 * void sha512_ce_transform(struct sha512_state *sst, u8 const *src,
-	 *			  int blocks)
-	 */
-	.text
-ENTRY(sha512_ce_transform)
-	frame_push	3
-
-	mov		x19, x0
-	mov		x20, x1
-	mov		x21, x2
-
-	/* load state */
-0:	ld1		{v8.2d-v11.2d}, [x19]
-
-	/* load first 4 round constants */
-	adr_l		x3, .Lsha512_rcon
-	ld1		{v20.2d-v23.2d}, [x3], #64
-
-	/* load input */
-1:	ld1		{v12.2d-v15.2d}, [x20], #64
-	ld1		{v16.2d-v19.2d}, [x20], #64
-	sub		w21, w21, #1
-
-CPU_LE(	rev64		v12.16b, v12.16b	)
-CPU_LE(	rev64		v13.16b, v13.16b	)
-CPU_LE(	rev64		v14.16b, v14.16b	)
-CPU_LE(	rev64		v15.16b, v15.16b	)
-CPU_LE(	rev64		v16.16b, v16.16b	)
-CPU_LE(	rev64		v17.16b, v17.16b	)
-CPU_LE(	rev64		v18.16b, v18.16b	)
-CPU_LE(	rev64		v19.16b, v19.16b	)
-
-	mov		x4, x3				// rc pointer
-
-	mov		v0.16b, v8.16b
-	mov		v1.16b, v9.16b
-	mov		v2.16b, v10.16b
-	mov		v3.16b, v11.16b
-
-	// v0  ab  cd  --  ef  gh  ab
-	// v1  cd  --  ef  gh  ab  cd
-	// v2  ef  gh  ab  cd  --  ef
-	// v3  gh  ab  cd  --  ef  gh
-	// v4  --  ef  gh  ab  cd  --
-
-	dround		0, 1, 2, 3, 4, 20, 24, 12, 13, 19, 16, 17
-	dround		3, 0, 4, 2, 1, 21, 25, 13, 14, 12, 17, 18
-	dround		2, 3, 1, 4, 0, 22, 26, 14, 15, 13, 18, 19
-	dround		4, 2, 0, 1, 3, 23, 27, 15, 16, 14, 19, 12
-	dround		1, 4, 3, 0, 2, 24, 28, 16, 17, 15, 12, 13
-
-	dround		0, 1, 2, 3, 4, 25, 29, 17, 18, 16, 13, 14
-	dround		3, 0, 4, 2, 1, 26, 30, 18, 19, 17, 14, 15
-	dround		2, 3, 1, 4, 0, 27, 31, 19, 12, 18, 15, 16
-	dround		4, 2, 0, 1, 3, 28, 24, 12, 13, 19, 16, 17
-	dround		1, 4, 3, 0, 2, 29, 25, 13, 14, 12, 17, 18
-
-	dround		0, 1, 2, 3, 4, 30, 26, 14, 15, 13, 18, 19
-	dround		3, 0, 4, 2, 1, 31, 27, 15, 16, 14, 19, 12
-	dround		2, 3, 1, 4, 0, 24, 28, 16, 17, 15, 12, 13
-	dround		4, 2, 0, 1, 3, 25, 29, 17, 18, 16, 13, 14
-	dround		1, 4, 3, 0, 2, 26, 30, 18, 19, 17, 14, 15
-
-	dround		0, 1, 2, 3, 4, 27, 31, 19, 12, 18, 15, 16
-	dround		3, 0, 4, 2, 1, 28, 24, 12, 13, 19, 16, 17
-	dround		2, 3, 1, 4, 0, 29, 25, 13, 14, 12, 17, 18
-	dround		4, 2, 0, 1, 3, 30, 26, 14, 15, 13, 18, 19
-	dround		1, 4, 3, 0, 2, 31, 27, 15, 16, 14, 19, 12
-
-	dround		0, 1, 2, 3, 4, 24, 28, 16, 17, 15, 12, 13
-	dround		3, 0, 4, 2, 1, 25, 29, 17, 18, 16, 13, 14
-	dround		2, 3, 1, 4, 0, 26, 30, 18, 19, 17, 14, 15
-	dround		4, 2, 0, 1, 3, 27, 31, 19, 12, 18, 15, 16
-	dround		1, 4, 3, 0, 2, 28, 24, 12, 13, 19, 16, 17
-
-	dround		0, 1, 2, 3, 4, 29, 25, 13, 14, 12, 17, 18
-	dround		3, 0, 4, 2, 1, 30, 26, 14, 15, 13, 18, 19
-	dround		2, 3, 1, 4, 0, 31, 27, 15, 16, 14, 19, 12
-	dround		4, 2, 0, 1, 3, 24, 28, 16, 17, 15, 12, 13
-	dround		1, 4, 3, 0, 2, 25, 29, 17, 18, 16, 13, 14
-
-	dround		0, 1, 2, 3, 4, 26, 30, 18, 19, 17, 14, 15
-	dround		3, 0, 4, 2, 1, 27, 31, 19, 12, 18, 15, 16
-	dround		2, 3, 1, 4, 0, 28, 24, 12
-	dround		4, 2, 0, 1, 3, 29, 25, 13
-	dround		1, 4, 3, 0, 2, 30, 26, 14
-
-	dround		0, 1, 2, 3, 4, 31, 27, 15
-	dround		3, 0, 4, 2, 1, 24,   , 16
-	dround		2, 3, 1, 4, 0, 25,   , 17
-	dround		4, 2, 0, 1, 3, 26,   , 18
-	dround		1, 4, 3, 0, 2, 27,   , 19
-
-	/* update state */
-	add		v8.2d, v8.2d, v0.2d
-	add		v9.2d, v9.2d, v1.2d
-	add		v10.2d, v10.2d, v2.2d
-	add		v11.2d, v11.2d, v3.2d
-
-	/* handled all input blocks? */
-	cbz		w21, 3f
-
-	if_will_cond_yield_neon
-	st1		{v8.2d-v11.2d}, [x19]
-	do_cond_yield_neon
-	b		0b
-	endif_yield_neon
-
-	b		1b
-
-	/* store new state */
-3:	st1		{v8.2d-v11.2d}, [x19]
-	frame_pop
-	ret
-ENDPROC(sha512_ce_transform)
diff --git a/arch/arm64/crypto/sm3-ce-core.S b/arch/arm64/crypto/sm3-ce-core.S
deleted file mode 100644
index d50d187906cbe464cf31f898a855a522af8b1be6..0000000000000000000000000000000000000000
--- a/arch/arm64/crypto/sm3-ce-core.S
+++ /dev/null
@@ -1,138 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions
- *
- * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.irp		b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
-	.set		.Lv\b\().4s, \b
-	.endr
-
-	.macro		sm3partw1, rd, rn, rm
-	.inst		0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
-	.endm
-
-	.macro		sm3partw2, rd, rn, rm
-	.inst		0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
-	.endm
-
-	.macro		sm3ss1, rd, rn, rm, ra
-	.inst		0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
-	.endm
-
-	.macro		sm3tt1a, rd, rn, rm, imm2
-	.inst		0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
-	.endm
-
-	.macro		sm3tt1b, rd, rn, rm, imm2
-	.inst		0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
-	.endm
-
-	.macro		sm3tt2a, rd, rn, rm, imm2
-	.inst		0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
-	.endm
-
-	.macro		sm3tt2b, rd, rn, rm, imm2
-	.inst		0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
-	.endm
-
-	.macro		round, ab, s0, t0, t1, i
-	sm3ss1		v5.4s, v8.4s, \t0\().4s, v9.4s
-	shl		\t1\().4s, \t0\().4s, #1
-	sri		\t1\().4s, \t0\().4s, #31
-	sm3tt1\ab	v8.4s, v5.4s, v10.4s, \i
-	sm3tt2\ab	v9.4s, v5.4s, \s0\().4s, \i
-	.endm
-
-	.macro		qround, ab, s0, s1, s2, s3, s4
-	.ifnb		\s4
-	ext		\s4\().16b, \s1\().16b, \s2\().16b, #12
-	ext		v6.16b, \s0\().16b, \s1\().16b, #12
-	ext		v7.16b, \s2\().16b, \s3\().16b, #8
-	sm3partw1	\s4\().4s, \s0\().4s, \s3\().4s
-	.endif
-
-	eor		v10.16b, \s0\().16b, \s1\().16b
-
-	round		\ab, \s0, v11, v12, 0
-	round		\ab, \s0, v12, v11, 1
-	round		\ab, \s0, v11, v12, 2
-	round		\ab, \s0, v12, v11, 3
-
-	.ifnb		\s4
-	sm3partw2	\s4\().4s, v7.4s, v6.4s
-	.endif
-	.endm
-
-	/*
-	 * void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
-	 *                       int blocks)
-	 */
-	.text
-ENTRY(sm3_ce_transform)
-	/* load state */
-	ld1		{v8.4s-v9.4s}, [x0]
-	rev64		v8.4s, v8.4s
-	rev64		v9.4s, v9.4s
-	ext		v8.16b, v8.16b, v8.16b, #8
-	ext		v9.16b, v9.16b, v9.16b, #8
-
-	adr_l		x8, .Lt
-	ldp		s13, s14, [x8]
-
-	/* load input */
-0:	ld1		{v0.16b-v3.16b}, [x1], #64
-	sub		w2, w2, #1
-
-	mov		v15.16b, v8.16b
-	mov		v16.16b, v9.16b
-
-CPU_LE(	rev32		v0.16b, v0.16b		)
-CPU_LE(	rev32		v1.16b, v1.16b		)
-CPU_LE(	rev32		v2.16b, v2.16b		)
-CPU_LE(	rev32		v3.16b, v3.16b		)
-
-	ext		v11.16b, v13.16b, v13.16b, #4
-
-	qround		a, v0, v1, v2, v3, v4
-	qround		a, v1, v2, v3, v4, v0
-	qround		a, v2, v3, v4, v0, v1
-	qround		a, v3, v4, v0, v1, v2
-
-	ext		v11.16b, v14.16b, v14.16b, #4
-
-	qround		b, v4, v0, v1, v2, v3
-	qround		b, v0, v1, v2, v3, v4
-	qround		b, v1, v2, v3, v4, v0
-	qround		b, v2, v3, v4, v0, v1
-	qround		b, v3, v4, v0, v1, v2
-	qround		b, v4, v0, v1, v2, v3
-	qround		b, v0, v1, v2, v3, v4
-	qround		b, v1, v2, v3, v4, v0
-	qround		b, v2, v3, v4, v0, v1
-	qround		b, v3, v4
-	qround		b, v4, v0
-	qround		b, v0, v1
-
-	eor		v8.16b, v8.16b, v15.16b
-	eor		v9.16b, v9.16b, v16.16b
-
-	/* handled all input blocks? */
-	cbnz		w2, 0b
-
-	/* save state */
-	rev64		v8.4s, v8.4s
-	rev64		v9.4s, v9.4s
-	ext		v8.16b, v8.16b, v8.16b, #8
-	ext		v9.16b, v9.16b, v9.16b, #8
-	st1		{v8.4s-v9.4s}, [x0]
-	ret
-ENDPROC(sm3_ce_transform)
-
-	.section	".rodata", "a"
-	.align		3
-.Lt:	.word		0x79cc4519, 0x9d8a7a87
diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S
deleted file mode 100644
index af3bfbc3f4d4d44ec2b5129d2073abd94075af27..0000000000000000000000000000000000000000
--- a/arch/arm64/crypto/sm4-ce-core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-	.irp		b, 0, 1, 2, 3, 4, 5, 6, 7, 8
-	.set		.Lv\b\().4s, \b
-	.endr
-
-	.macro		sm4e, rd, rn
-	.inst		0xcec08400 | .L\rd | (.L\rn << 5)
-	.endm
-
-	/*
-	 * void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in);
-	 */
-	.text
-ENTRY(sm4_ce_do_crypt)
-	ld1		{v8.4s}, [x2]
-	ld1		{v0.4s-v3.4s}, [x0], #64
-CPU_LE(	rev32		v8.16b, v8.16b		)
-	ld1		{v4.4s-v7.4s}, [x0]
-	sm4e		v8.4s, v0.4s
-	sm4e		v8.4s, v1.4s
-	sm4e		v8.4s, v2.4s
-	sm4e		v8.4s, v3.4s
-	sm4e		v8.4s, v4.4s
-	sm4e		v8.4s, v5.4s
-	sm4e		v8.4s, v6.4s
-	sm4e		v8.4s, v7.4s
-	rev64		v8.4s, v8.4s
-	ext		v8.16b, v8.16b, v8.16b, #8
-CPU_LE(	rev32		v8.16b, v8.16b		)
-	st1		{v8.4s}, [x1]
-	ret
-ENDPROC(sm4_ce_do_crypt)
diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
deleted file mode 100644
index 6ea337d464c414ed67cd139071022500220604d1..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/cpu-reset.S
+++ /dev/null
@@ -1,52 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * CPU reset routines
- *
- * Copyright (C) 2001 Deep Blue Solutions Ltd.
- * Copyright (C) 2012 ARM Ltd.
- * Copyright (C) 2015 Huawei Futurewei Technologies.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/sysreg.h>
-#include <asm/virt.h>
-
-.text
-.pushsection    .idmap.text, "awx"
-
-/*
- * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for
- * cpu_soft_restart.
- *
- * @el2_switch: Flag to indicate a switch to EL2 is needed.
- * @entry: Location to jump to for soft reset.
- * arg0: First argument passed to @entry. (relocation list)
- * arg1: Second argument passed to @entry.(physical kernel entry)
- * arg2: Third argument passed to @entry. (physical dtb address)
- *
- * Put the CPU into the same state as it would be if it had been reset, and
- * branch to what would be the reset vector. It must be executed with the
- * flat identity mapping.
- */
-ENTRY(__cpu_soft_restart)
-	/* Clear sctlr_el1 flags. */
-	mrs	x12, sctlr_el1
-	ldr	x13, =SCTLR_ELx_FLAGS
-	bic	x12, x12, x13
-	pre_disable_mmu_workaround
-	msr	sctlr_el1, x12
-	isb
-
-	cbz	x0, 1f				// el2_switch?
-	mov	x0, #HVC_SOFT_RESTART
-	hvc	#0				// no return
-
-1:	mov	x18, x1				// entry
-	mov	x0, x2				// arg0
-	mov	x1, x3				// arg1
-	mov	x2, x4				// arg2
-	br	x18
-ENDPROC(__cpu_soft_restart)
-
-.popsection
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
deleted file mode 100644
index 304d5b02ca6712a693f060c26c9567370b8af130..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/efi-entry.S
+++ /dev/null
@@ -1,120 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * EFI entry point.
- *
- * Copyright (C) 2013, 2014 Red Hat, Inc.
- * Author: Mark Salter <msalter@redhat.com>
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-
-#include <asm/assembler.h>
-
-#define EFI_LOAD_ERROR 0x8000000000000001
-
-	__INIT
-
-	/*
-	 * We arrive here from the EFI boot manager with:
-	 *
-	 *    * CPU in little-endian mode
-	 *    * MMU on with identity-mapped RAM
-	 *    * Icache and Dcache on
-	 *
-	 * We will most likely be running from some place other than where
-	 * we want to be. The kernel image wants to be placed at TEXT_OFFSET
-	 * from start of RAM.
-	 */
-ENTRY(entry)
-	/*
-	 * Create a stack frame to save FP/LR with extra space
-	 * for image_addr variable passed to efi_entry().
-	 */
-	stp	x29, x30, [sp, #-32]!
-	mov	x29, sp
-
-	/*
-	 * Call efi_entry to do the real work.
-	 * x0 and x1 are already set up by firmware. Current runtime
-	 * address of image is calculated and passed via *image_addr.
-	 *
-	 * unsigned long efi_entry(void *handle,
-	 *                         efi_system_table_t *sys_table,
-	 *                         unsigned long *image_addr) ;
-	 */
-	adr_l	x8, _text
-	add	x2, sp, 16
-	str	x8, [x2]
-	bl	efi_entry
-	cmn	x0, #1
-	b.eq	efi_load_fail
-
-	/*
-	 * efi_entry() will have copied the kernel image if necessary and we
-	 * return here with device tree address in x0 and the kernel entry
-	 * point stored at *image_addr. Save those values in registers which
-	 * are callee preserved.
-	 */
-	mov	x20, x0		// DTB address
-	ldr	x0, [sp, #16]	// relocated _text address
-	ldr	w21, =stext_offset
-	add	x21, x0, x21
-
-	/*
-	 * Calculate size of the kernel Image (same for original and copy).
-	 */
-	adr_l	x1, _text
-	adr_l	x2, _edata
-	sub	x1, x2, x1
-
-	/*
-	 * Flush the copied Image to the PoC, and ensure it is not shadowed by
-	 * stale icache entries from before relocation.
-	 */
-	bl	__flush_dcache_area
-	ic	ialluis
-
-	/*
-	 * Ensure that the rest of this function (in the original Image) is
-	 * visible when the caches are disabled. The I-cache can't have stale
-	 * entries for the VA range of the current image, so no maintenance is
-	 * necessary.
-	 */
-	adr	x0, entry
-	adr	x1, entry_end
-	sub	x1, x1, x0
-	bl	__flush_dcache_area
-
-	/* Turn off Dcache and MMU */
-	mrs	x0, CurrentEL
-	cmp	x0, #CurrentEL_EL2
-	b.ne	1f
-	mrs	x0, sctlr_el2
-	bic	x0, x0, #1 << 0	// clear SCTLR.M
-	bic	x0, x0, #1 << 2	// clear SCTLR.C
-	pre_disable_mmu_workaround
-	msr	sctlr_el2, x0
-	isb
-	b	2f
-1:
-	mrs	x0, sctlr_el1
-	bic	x0, x0, #1 << 0	// clear SCTLR.M
-	bic	x0, x0, #1 << 2	// clear SCTLR.C
-	pre_disable_mmu_workaround
-	msr	sctlr_el1, x0
-	isb
-2:
-	/* Jump to kernel entry point */
-	mov	x0, x20
-	mov	x1, xzr
-	mov	x2, xzr
-	mov	x3, xzr
-	br	x21
-
-efi_load_fail:
-	mov	x0, #EFI_LOAD_ERROR
-	ldp	x29, x30, [sp], #32
-	ret
-
-entry_end:
-ENDPROC(entry)
diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S
deleted file mode 100644
index a7cfacce3e15775bfa662d5458dfe6d90c99a10f..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/efi-header.S
+++ /dev/null
@@ -1,152 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 - 2017 Linaro, Ltd.
- * Copyright (C) 2013, 2014 Red Hat, Inc.
- */
-
-#include <linux/pe.h>
-#include <linux/sizes.h>
-
-	.macro	__EFI_PE_HEADER
-	.long	PE_MAGIC
-coff_header:
-	.short	IMAGE_FILE_MACHINE_ARM64		// Machine
-	.short	section_count				// NumberOfSections
-	.long	0 					// TimeDateStamp
-	.long	0					// PointerToSymbolTable
-	.long	0					// NumberOfSymbols
-	.short	section_table - optional_header		// SizeOfOptionalHeader
-	.short	IMAGE_FILE_DEBUG_STRIPPED | \
-		IMAGE_FILE_EXECUTABLE_IMAGE | \
-		IMAGE_FILE_LINE_NUMS_STRIPPED		// Characteristics
-
-optional_header:
-	.short	PE_OPT_MAGIC_PE32PLUS			// PE32+ format
-	.byte	0x02					// MajorLinkerVersion
-	.byte	0x14					// MinorLinkerVersion
-	.long	__initdata_begin - efi_header_end	// SizeOfCode
-	.long	__pecoff_data_size			// SizeOfInitializedData
-	.long	0					// SizeOfUninitializedData
-	.long	__efistub_entry - _head			// AddressOfEntryPoint
-	.long	efi_header_end - _head			// BaseOfCode
-
-extra_header_fields:
-	.quad	0					// ImageBase
-	.long	SZ_4K					// SectionAlignment
-	.long	PECOFF_FILE_ALIGNMENT			// FileAlignment
-	.short	0					// MajorOperatingSystemVersion
-	.short	0					// MinorOperatingSystemVersion
-	.short	0					// MajorImageVersion
-	.short	0					// MinorImageVersion
-	.short	0					// MajorSubsystemVersion
-	.short	0					// MinorSubsystemVersion
-	.long	0					// Win32VersionValue
-
-	.long	_end - _head				// SizeOfImage
-
-	// Everything before the kernel image is considered part of the header
-	.long	efi_header_end - _head			// SizeOfHeaders
-	.long	0					// CheckSum
-	.short	IMAGE_SUBSYSTEM_EFI_APPLICATION		// Subsystem
-	.short	0					// DllCharacteristics
-	.quad	0					// SizeOfStackReserve
-	.quad	0					// SizeOfStackCommit
-	.quad	0					// SizeOfHeapReserve
-	.quad	0					// SizeOfHeapCommit
-	.long	0					// LoaderFlags
-	.long	(section_table - .) / 8			// NumberOfRvaAndSizes
-
-	.quad	0					// ExportTable
-	.quad	0					// ImportTable
-	.quad	0					// ResourceTable
-	.quad	0					// ExceptionTable
-	.quad	0					// CertificationTable
-	.quad	0					// BaseRelocationTable
-
-#ifdef CONFIG_DEBUG_EFI
-	.long	efi_debug_table - _head			// DebugTable
-	.long	efi_debug_table_size
-#endif
-
-	// Section table
-section_table:
-	.ascii	".text\0\0\0"
-	.long	__initdata_begin - efi_header_end	// VirtualSize
-	.long	efi_header_end - _head			// VirtualAddress
-	.long	__initdata_begin - efi_header_end	// SizeOfRawData
-	.long	efi_header_end - _head			// PointerToRawData
-
-	.long	0					// PointerToRelocations
-	.long	0					// PointerToLineNumbers
-	.short	0					// NumberOfRelocations
-	.short	0					// NumberOfLineNumbers
-	.long	IMAGE_SCN_CNT_CODE | \
-		IMAGE_SCN_MEM_READ | \
-		IMAGE_SCN_MEM_EXECUTE			// Characteristics
-
-	.ascii	".data\0\0\0"
-	.long	__pecoff_data_size			// VirtualSize
-	.long	__initdata_begin - _head		// VirtualAddress
-	.long	__pecoff_data_rawsize			// SizeOfRawData
-	.long	__initdata_begin - _head		// PointerToRawData
-
-	.long	0					// PointerToRelocations
-	.long	0					// PointerToLineNumbers
-	.short	0					// NumberOfRelocations
-	.short	0					// NumberOfLineNumbers
-	.long	IMAGE_SCN_CNT_INITIALIZED_DATA | \
-		IMAGE_SCN_MEM_READ | \
-		IMAGE_SCN_MEM_WRITE			// Characteristics
-
-	.set	section_count, (. - section_table) / 40
-
-#ifdef CONFIG_DEBUG_EFI
-	/*
-	 * The debug table is referenced via its Relative Virtual Address (RVA),
-	 * which is only defined for those parts of the image that are covered
-	 * by a section declaration. Since this header is not covered by any
-	 * section, the debug table must be emitted elsewhere. So stick it in
-	 * the .init.rodata section instead.
-	 *
-	 * Note that the EFI debug entry itself may legally have a zero RVA,
-	 * which means we can simply put it right after the section headers.
-	 */
-	__INITRODATA
-
-	.align	2
-efi_debug_table:
-	// EFI_IMAGE_DEBUG_DIRECTORY_ENTRY
-	.long	0					// Characteristics
-	.long	0					// TimeDateStamp
-	.short	0					// MajorVersion
-	.short	0					// MinorVersion
-	.long	IMAGE_DEBUG_TYPE_CODEVIEW		// Type
-	.long	efi_debug_entry_size			// SizeOfData
-	.long	0					// RVA
-	.long	efi_debug_entry - _head			// FileOffset
-
-	.set	efi_debug_table_size, . - efi_debug_table
-	.previous
-
-efi_debug_entry:
-	// EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY
-	.ascii	"NB10"					// Signature
-	.long	0					// Unknown
-	.long	0					// Unknown2
-	.long	0					// Unknown3
-
-	.asciz	VMLINUX_PATH
-
-	.set	efi_debug_entry_size, . - efi_debug_entry
-#endif
-
-	/*
-	 * EFI will load .text onwards at the 4k section alignment
-	 * described in the PE/COFF header. To ensure that instruction
-	 * sequences using an adrp and a :lo12: immediate will function
-	 * correctly at this alignment, we must ensure that .text is
-	 * placed at a 4k boundary in the Image to begin with.
-	 */
-	.align 12
-efi_header_end:
-	.endm
diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
deleted file mode 100644
index 3fc71106cb2b45eb0dd7091c7da824856bae4ea9..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-
-ENTRY(__efi_rt_asm_wrapper)
-	stp	x29, x30, [sp, #-32]!
-	mov	x29, sp
-
-	/*
-	 * Register x18 is designated as the 'platform' register by the AAPCS,
-	 * which means firmware running at the same exception level as the OS
-	 * (such as UEFI) should never touch it.
-	 */
-	stp	x1, x18, [sp, #16]
-
-	/*
-	 * We are lucky enough that no EFI runtime services take more than
-	 * 5 arguments, so all are passed in registers rather than via the
-	 * stack.
-	 */
-	mov	x8, x0
-	mov	x0, x2
-	mov	x1, x3
-	mov	x2, x4
-	mov	x3, x5
-	mov	x4, x6
-	blr	x8
-
-	ldp	x1, x2, [sp, #16]
-	cmp	x2, x18
-	ldp	x29, x30, [sp], #32
-	b.ne	0f
-	ret
-0:	b	efi_handle_corrupted_x18	// tail call
-ENDPROC(__efi_rt_asm_wrapper)
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
deleted file mode 100644
index 0f24eae8f3cceccee6bf4f20b19eb31d4e67ce83..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ /dev/null
@@ -1,49 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * FP/SIMD state saving and restoring
- *
- * Copyright (C) 2012 ARM Ltd.
- * Author: Catalin Marinas <catalin.marinas@arm.com>
- */
-
-#include <linux/linkage.h>
-
-#include <asm/assembler.h>
-#include <asm/fpsimdmacros.h>
-
-/*
- * Save the FP registers.
- *
- * x0 - pointer to struct fpsimd_state
- */
-ENTRY(fpsimd_save_state)
-	fpsimd_save x0, 8
-	ret
-ENDPROC(fpsimd_save_state)
-
-/*
- * Load the FP registers.
- *
- * x0 - pointer to struct fpsimd_state
- */
-ENTRY(fpsimd_load_state)
-	fpsimd_restore x0, 8
-	ret
-ENDPROC(fpsimd_load_state)
-
-#ifdef CONFIG_ARM64_SVE
-ENTRY(sve_save_state)
-	sve_save 0, x1, 2
-	ret
-ENDPROC(sve_save_state)
-
-ENTRY(sve_load_state)
-	sve_load 0, x1, x2, 3, x4
-	ret
-ENDPROC(sve_load_state)
-
-ENTRY(sve_get_vl)
-	_sve_rdvl	0, 1
-	ret
-ENDPROC(sve_get_vl)
-#endif /* CONFIG_ARM64_SVE */
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
deleted file mode 100644
index 7d02f9966d3452233329959ce0b91dce4178c042..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/entry-ftrace.S
+++ /dev/null
@@ -1,344 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm64/kernel/entry-ftrace.S
- *
- * Copyright (C) 2013 Linaro Limited
- * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/assembler.h>
-#include <asm/ftrace.h>
-#include <asm/insn.h>
-
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-/*
- * Due to -fpatchable-function-entry=2, the compiler has placed two NOPs before
- * the regular function prologue. For an enabled callsite, ftrace_init_nop() and
- * ftrace_make_call() have patched those NOPs to:
- *
- * 	MOV	X9, LR
- * 	BL	<entry>
- *
- * ... where <entry> is either ftrace_caller or ftrace_regs_caller.
- *
- * Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are
- * live, and x9-x18 are safe to clobber.
- *
- * We save the callsite's context into a pt_regs before invoking any ftrace
- * callbacks. So that we can get a sensible backtrace, we create a stack record
- * for the callsite and the ftrace entry assembly. This is not sufficient for
- * reliable stacktrace: until we create the callsite stack record, its caller
- * is missing from the LR and existing chain of frame records.
- */
-	.macro  ftrace_regs_entry, allregs=0
-	/* Make room for pt_regs, plus a callee frame */
-	sub	sp, sp, #(S_FRAME_SIZE + 16)
-
-	/* Save function arguments (and x9 for simplicity) */
-	stp	x0, x1, [sp, #S_X0]
-	stp	x2, x3, [sp, #S_X2]
-	stp	x4, x5, [sp, #S_X4]
-	stp	x6, x7, [sp, #S_X6]
-	stp	x8, x9, [sp, #S_X8]
-
-	/* Optionally save the callee-saved registers, always save the FP */
-	.if \allregs == 1
-	stp	x10, x11, [sp, #S_X10]
-	stp	x12, x13, [sp, #S_X12]
-	stp	x14, x15, [sp, #S_X14]
-	stp	x16, x17, [sp, #S_X16]
-	stp	x18, x19, [sp, #S_X18]
-	stp	x20, x21, [sp, #S_X20]
-	stp	x22, x23, [sp, #S_X22]
-	stp	x24, x25, [sp, #S_X24]
-	stp	x26, x27, [sp, #S_X26]
-	stp	x28, x29, [sp, #S_X28]
-	.else
-	str	x29, [sp, #S_FP]
-	.endif
-
-	/* Save the callsite's SP and LR */
-	add	x10, sp, #(S_FRAME_SIZE + 16)
-	stp	x9, x10, [sp, #S_LR]
-
-	/* Save the PC after the ftrace callsite */
-	str	x30, [sp, #S_PC]
-
-	/* Create a frame record for the callsite above pt_regs */
-	stp	x29, x9, [sp, #S_FRAME_SIZE]
-	add	x29, sp, #S_FRAME_SIZE
-
-	/* Create our frame record within pt_regs. */
-	stp	x29, x30, [sp, #S_STACKFRAME]
-	add	x29, sp, #S_STACKFRAME
-	.endm
-
-ENTRY(ftrace_regs_caller)
-	ftrace_regs_entry	1
-	b	ftrace_common
-ENDPROC(ftrace_regs_caller)
-
-ENTRY(ftrace_caller)
-	ftrace_regs_entry	0
-	b	ftrace_common
-ENDPROC(ftrace_caller)
-
-ENTRY(ftrace_common)
-	sub	x0, x30, #AARCH64_INSN_SIZE	// ip (callsite's BL insn)
-	mov	x1, x9				// parent_ip (callsite's LR)
-	ldr_l	x2, function_trace_op		// op
-	mov	x3, sp				// regs
-
-GLOBAL(ftrace_call)
-	bl	ftrace_stub
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-GLOBAL(ftrace_graph_call)		// ftrace_graph_caller();
-	nop				// If enabled, this will be replaced
-					// "b ftrace_graph_caller"
-#endif
-
-/*
- * At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved
- * x19-x29 per the AAPCS, and we created frame records upon entry, so we need
- * to restore x0-x8, x29, and x30.
- */
-ftrace_common_return:
-	/* Restore function arguments */
-	ldp	x0, x1, [sp]
-	ldp	x2, x3, [sp, #S_X2]
-	ldp	x4, x5, [sp, #S_X4]
-	ldp	x6, x7, [sp, #S_X6]
-	ldr	x8, [sp, #S_X8]
-
-	/* Restore the callsite's FP, LR, PC */
-	ldr	x29, [sp, #S_FP]
-	ldr	x30, [sp, #S_LR]
-	ldr	x9, [sp, #S_PC]
-
-	/* Restore the callsite's SP */
-	add	sp, sp, #S_FRAME_SIZE + 16
-
-	ret	x9
-ENDPROC(ftrace_common)
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
-	ldr	x0, [sp, #S_PC]
-	sub	x0, x0, #AARCH64_INSN_SIZE	// ip (callsite's BL insn)
-	add	x1, sp, #S_LR			// parent_ip (callsite's LR)
-	ldr	x2, [sp, #S_FRAME_SIZE]	   	// parent fp (callsite's FP)
-	bl	prepare_ftrace_return
-	b	ftrace_common_return
-ENDPROC(ftrace_graph_caller)
-#endif
-
-#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
-
-/*
- * Gcc with -pg will put the following code in the beginning of each function:
- *      mov x0, x30
- *      bl _mcount
- *	[function's body ...]
- * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic
- * ftrace is enabled.
- *
- * Please note that x0 as an argument will not be used here because we can
- * get lr(x30) of instrumented function at any time by winding up call stack
- * as long as the kernel is compiled without -fomit-frame-pointer.
- * (or CONFIG_FRAME_POINTER, this is forced on arm64)
- *
- * stack layout after mcount_enter in _mcount():
- *
- * current sp/fp =>  0:+-----+
- * in _mcount()        | x29 | -> instrumented function's fp
- *                     +-----+
- *                     | x30 | -> _mcount()'s lr (= instrumented function's pc)
- * old sp       => +16:+-----+
- * when instrumented   |     |
- * function calls      | ... |
- * _mcount()           |     |
- *                     |     |
- * instrumented => +xx:+-----+
- * function's fp       | x29 | -> parent's fp
- *                     +-----+
- *                     | x30 | -> instrumented function's lr (= parent's pc)
- *                     +-----+
- *                     | ... |
- */
-
-	.macro mcount_enter
-	stp	x29, x30, [sp, #-16]!
-	mov	x29, sp
-	.endm
-
-	.macro mcount_exit
-	ldp	x29, x30, [sp], #16
-	ret
-	.endm
-
-	.macro mcount_adjust_addr rd, rn
-	sub	\rd, \rn, #AARCH64_INSN_SIZE
-	.endm
-
-	/* for instrumented function's parent */
-	.macro mcount_get_parent_fp reg
-	ldr	\reg, [x29]
-	ldr	\reg, [\reg]
-	.endm
-
-	/* for instrumented function */
-	.macro mcount_get_pc0 reg
-	mcount_adjust_addr	\reg, x30
-	.endm
-
-	.macro mcount_get_pc reg
-	ldr	\reg, [x29, #8]
-	mcount_adjust_addr	\reg, \reg
-	.endm
-
-	.macro mcount_get_lr reg
-	ldr	\reg, [x29]
-	ldr	\reg, [\reg, #8]
-	.endm
-
-	.macro mcount_get_lr_addr reg
-	ldr	\reg, [x29]
-	add	\reg, \reg, #8
-	.endm
-
-#ifndef CONFIG_DYNAMIC_FTRACE
-/*
- * void _mcount(unsigned long return_address)
- * @return_address: return address to instrumented function
- *
- * This function makes calls, if enabled, to:
- *     - tracer function to probe instrumented function's entry,
- *     - ftrace_graph_caller to set up an exit hook
- */
-ENTRY(_mcount)
-	mcount_enter
-
-	ldr_l	x2, ftrace_trace_function
-	adr	x0, ftrace_stub
-	cmp	x0, x2			// if (ftrace_trace_function
-	b.eq	skip_ftrace_call	//     != ftrace_stub) {
-
-	mcount_get_pc	x0		//       function's pc
-	mcount_get_lr	x1		//       function's lr (= parent's pc)
-	blr	x2			//   (*ftrace_trace_function)(pc, lr);
-
-skip_ftrace_call:			// }
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	ldr_l	x2, ftrace_graph_return
-	cmp	x0, x2			//   if ((ftrace_graph_return
-	b.ne	ftrace_graph_caller	//        != ftrace_stub)
-
-	ldr_l	x2, ftrace_graph_entry	//     || (ftrace_graph_entry
-	adr_l	x0, ftrace_graph_entry_stub //     != ftrace_graph_entry_stub))
-	cmp	x0, x2
-	b.ne	ftrace_graph_caller	//     ftrace_graph_caller();
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-	mcount_exit
-ENDPROC(_mcount)
-EXPORT_SYMBOL(_mcount)
-NOKPROBE(_mcount)
-
-#else /* CONFIG_DYNAMIC_FTRACE */
-/*
- * _mcount() is used to build the kernel with -pg option, but all the branch
- * instructions to _mcount() are replaced to NOP initially at kernel start up,
- * and later on, NOP to branch to ftrace_caller() when enabled or branch to
- * NOP when disabled per-function base.
- */
-ENTRY(_mcount)
-	ret
-ENDPROC(_mcount)
-EXPORT_SYMBOL(_mcount)
-NOKPROBE(_mcount)
-
-/*
- * void ftrace_caller(unsigned long return_address)
- * @return_address: return address to instrumented function
- *
- * This function is a counterpart of _mcount() in 'static' ftrace, and
- * makes calls to:
- *     - tracer function to probe instrumented function's entry,
- *     - ftrace_graph_caller to set up an exit hook
- */
-ENTRY(ftrace_caller)
-	mcount_enter
-
-	mcount_get_pc0	x0		//     function's pc
-	mcount_get_lr	x1		//     function's lr
-
-GLOBAL(ftrace_call)			// tracer(pc, lr);
-	nop				// This will be replaced with "bl xxx"
-					// where xxx can be any kind of tracer.
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-GLOBAL(ftrace_graph_call)		// ftrace_graph_caller();
-	nop				// If enabled, this will be replaced
-					// "b ftrace_graph_caller"
-#endif
-
-	mcount_exit
-ENDPROC(ftrace_caller)
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-/*
- * void ftrace_graph_caller(void)
- *
- * Called from _mcount() or ftrace_caller() when function_graph tracer is
- * selected.
- * This function w/ prepare_ftrace_return() fakes link register's value on
- * the call stack in order to intercept instrumented function's return path
- * and run return_to_handler() later on its exit.
- */
-ENTRY(ftrace_graph_caller)
-	mcount_get_pc		  x0	//     function's pc
-	mcount_get_lr_addr	  x1	//     pointer to function's saved lr
-	mcount_get_parent_fp	  x2	//     parent's fp
-	bl	prepare_ftrace_return	// prepare_ftrace_return(pc, &lr, fp)
-
-	mcount_exit
-ENDPROC(ftrace_graph_caller)
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
-
-ENTRY(ftrace_stub)
-	ret
-ENDPROC(ftrace_stub)
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-/*
- * void return_to_handler(void)
- *
- * Run ftrace_return_to_handler() before going back to parent.
- * @fp is checked against the value passed by ftrace_graph_caller().
- */
-ENTRY(return_to_handler)
-	/* save return value regs */
-	sub sp, sp, #64
-	stp x0, x1, [sp]
-	stp x2, x3, [sp, #16]
-	stp x4, x5, [sp, #32]
-	stp x6, x7, [sp, #48]
-
-	mov	x0, x29			//     parent's fp
-	bl	ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
-	mov	x30, x0			// restore the original return address
-
-	/* restore return value regs */
-	ldp x0, x1, [sp]
-	ldp x2, x3, [sp, #16]
-	ldp x4, x5, [sp, #32]
-	ldp x6, x7, [sp, #48]
-	add sp, sp, #64
-
-	ret
-END(return_to_handler)
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
deleted file mode 100644
index cf3bd2976e5747ff5c5b22ee0665700476133816..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/entry.S
+++ /dev/null
@@ -1,1350 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Low-level exception handling code
- *
- * Copyright (C) 2012 ARM Ltd.
- * Authors:	Catalin Marinas <catalin.marinas@arm.com>
- *		Will Deacon <will.deacon@arm.com>
- */
-
-#include <linux/arm-smccc.h>
-#include <linux/init.h>
-#include <linux/linkage.h>
-
-#include <asm/alternative.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/cpufeature.h>
-#include <asm/errno.h>
-#include <asm/esr.h>
-#include <asm/irq.h>
-#include <asm/memory.h>
-#include <asm/mmu.h>
-#include <asm/processor.h>
-#include <asm/ptrace.h>
-#include <asm/thread_info.h>
-#include <asm/asm-uaccess.h>
-#include <asm/unistd.h>
-
-/*
- * Context tracking subsystem.  Used to instrument transitions
- * between user and kernel mode.
- */
-	.macro ct_user_exit_irqoff
-#ifdef CONFIG_CONTEXT_TRACKING
-	bl	enter_from_user_mode
-#endif
-	.endm
-
-	.macro ct_user_enter
-#ifdef CONFIG_CONTEXT_TRACKING
-	bl	context_tracking_user_enter
-#endif
-	.endm
-
-	.macro	clear_gp_regs
-	.irp	n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29
-	mov	x\n, xzr
-	.endr
-	.endm
-
-/*
- * Bad Abort numbers
- *-----------------
- */
-#define BAD_SYNC	0
-#define BAD_IRQ		1
-#define BAD_FIQ		2
-#define BAD_ERROR	3
-
-	.macro kernel_ventry, el, label, regsize = 64
-	.align 7
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-alternative_if ARM64_UNMAP_KERNEL_AT_EL0
-	.if	\el == 0
-	.if	\regsize == 64
-	mrs	x30, tpidrro_el0
-	msr	tpidrro_el0, xzr
-	.else
-	mov	x30, xzr
-	.endif
-	.endif
-alternative_else_nop_endif
-#endif
-
-	sub	sp, sp, #S_FRAME_SIZE
-#ifdef CONFIG_VMAP_STACK
-	/*
-	 * Test whether the SP has overflowed, without corrupting a GPR.
-	 * Task and IRQ stacks are aligned to (1 << THREAD_SHIFT).
-	 */
-	add	sp, sp, x0			// sp' = sp + x0
-	sub	x0, sp, x0			// x0' = sp' - x0 = (sp + x0) - x0 = sp
-	tbnz	x0, #THREAD_SHIFT, 0f
-	sub	x0, sp, x0			// x0'' = sp' - x0' = (sp + x0) - sp = x0
-	sub	sp, sp, x0			// sp'' = sp' - x0 = (sp + x0) - x0 = sp
-	b	el\()\el\()_\label
-
-0:
-	/*
-	 * Either we've just detected an overflow, or we've taken an exception
-	 * while on the overflow stack. Either way, we won't return to
-	 * userspace, and can clobber EL0 registers to free up GPRs.
-	 */
-
-	/* Stash the original SP (minus S_FRAME_SIZE) in tpidr_el0. */
-	msr	tpidr_el0, x0
-
-	/* Recover the original x0 value and stash it in tpidrro_el0 */
-	sub	x0, sp, x0
-	msr	tpidrro_el0, x0
-
-	/* Switch to the overflow stack */
-	adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0
-
-	/*
-	 * Check whether we were already on the overflow stack. This may happen
-	 * after panic() re-enables interrupts.
-	 */
-	mrs	x0, tpidr_el0			// sp of interrupted context
-	sub	x0, sp, x0			// delta with top of overflow stack
-	tst	x0, #~(OVERFLOW_STACK_SIZE - 1)	// within range?
-	b.ne	__bad_stack			// no? -> bad stack pointer
-
-	/* We were already on the overflow stack. Restore sp/x0 and carry on. */
-	sub	sp, sp, x0
-	mrs	x0, tpidrro_el0
-#endif
-	b	el\()\el\()_\label
-	.endm
-
-	.macro tramp_alias, dst, sym
-	mov_q	\dst, TRAMP_VALIAS
-	add	\dst, \dst, #(\sym - .entry.tramp.text)
-	.endm
-
-	// This macro corrupts x0-x3. It is the caller's duty
-	// to save/restore them if required.
-	.macro	apply_ssbd, state, tmp1, tmp2
-#ifdef CONFIG_ARM64_SSBD
-alternative_cb	arm64_enable_wa2_handling
-	b	.L__asm_ssbd_skip\@
-alternative_cb_end
-	ldr_this_cpu	\tmp2, arm64_ssbd_callback_required, \tmp1
-	cbz	\tmp2,	.L__asm_ssbd_skip\@
-	ldr	\tmp2, [tsk, #TSK_TI_FLAGS]
-	tbnz	\tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@
-	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_2
-	mov	w1, #\state
-alternative_cb	arm64_update_smccc_conduit
-	nop					// Patched to SMC/HVC #0
-alternative_cb_end
-.L__asm_ssbd_skip\@:
-#endif
-	.endm
-
-	.macro	kernel_entry, el, regsize = 64
-	.if	\regsize == 32
-	mov	w0, w0				// zero upper 32 bits of x0
-	.endif
-	stp	x0, x1, [sp, #16 * 0]
-	stp	x2, x3, [sp, #16 * 1]
-	stp	x4, x5, [sp, #16 * 2]
-	stp	x6, x7, [sp, #16 * 3]
-	stp	x8, x9, [sp, #16 * 4]
-	stp	x10, x11, [sp, #16 * 5]
-	stp	x12, x13, [sp, #16 * 6]
-	stp	x14, x15, [sp, #16 * 7]
-	stp	x16, x17, [sp, #16 * 8]
-	stp	x18, x19, [sp, #16 * 9]
-	stp	x20, x21, [sp, #16 * 10]
-	stp	x22, x23, [sp, #16 * 11]
-	stp	x24, x25, [sp, #16 * 12]
-	stp	x26, x27, [sp, #16 * 13]
-	stp	x28, x29, [sp, #16 * 14]
-
-	.if	\el == 0
-	clear_gp_regs
-	mrs	x21, sp_el0
-	ldr_this_cpu	tsk, __entry_task, x20	// Ensure MDSCR_EL1.SS is clear,
-	ldr	x19, [tsk, #TSK_TI_FLAGS]	// since we can unmask debug
-	disable_step_tsk x19, x20		// exceptions when scheduling.
-
-	apply_ssbd 1, x22, x23
-
-	.else
-	add	x21, sp, #S_FRAME_SIZE
-	get_current_task tsk
-	/* Save the task's original addr_limit and set USER_DS */
-	ldr	x20, [tsk, #TSK_TI_ADDR_LIMIT]
-	str	x20, [sp, #S_ORIG_ADDR_LIMIT]
-	mov	x20, #USER_DS
-	str	x20, [tsk, #TSK_TI_ADDR_LIMIT]
-	/* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
-	.endif /* \el == 0 */
-	mrs	x22, elr_el1
-	mrs	x23, spsr_el1
-	stp	lr, x21, [sp, #S_LR]
-
-	/*
-	 * In order to be able to dump the contents of struct pt_regs at the
-	 * time the exception was taken (in case we attempt to walk the call
-	 * stack later), chain it together with the stack frames.
-	 */
-	.if \el == 0
-	stp	xzr, xzr, [sp, #S_STACKFRAME]
-	.else
-	stp	x29, x22, [sp, #S_STACKFRAME]
-	.endif
-	add	x29, sp, #S_STACKFRAME
-
-#ifdef CONFIG_ARM64_SW_TTBR0_PAN
-	/*
-	 * Set the TTBR0 PAN bit in SPSR. When the exception is taken from
-	 * EL0, there is no need to check the state of TTBR0_EL1 since
-	 * accesses are always enabled.
-	 * Note that the meaning of this bit differs from the ARMv8.1 PAN
-	 * feature as all TTBR0_EL1 accesses are disabled, not just those to
-	 * user mappings.
-	 */
-alternative_if ARM64_HAS_PAN
-	b	1f				// skip TTBR0 PAN
-alternative_else_nop_endif
-
-	.if	\el != 0
-	mrs	x21, ttbr0_el1
-	tst	x21, #TTBR_ASID_MASK		// Check for the reserved ASID
-	orr	x23, x23, #PSR_PAN_BIT		// Set the emulated PAN in the saved SPSR
-	b.eq	1f				// TTBR0 access already disabled
-	and	x23, x23, #~PSR_PAN_BIT		// Clear the emulated PAN in the saved SPSR
-	.endif
-
-	__uaccess_ttbr0_disable x21
-1:
-#endif
-
-	stp	x22, x23, [sp, #S_PC]
-
-	/* Not in a syscall by default (el0_svc overwrites for real syscall) */
-	.if	\el == 0
-	mov	w21, #NO_SYSCALL
-	str	w21, [sp, #S_SYSCALLNO]
-	.endif
-
-	/*
-	 * Set sp_el0 to current thread_info.
-	 */
-	.if	\el == 0
-	msr	sp_el0, tsk
-	.endif
-
-	/* Save pmr */
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
-	mrs_s	x20, SYS_ICC_PMR_EL1
-	str	x20, [sp, #S_PMR_SAVE]
-alternative_else_nop_endif
-
-	/*
-	 * Registers that may be useful after this macro is invoked:
-	 *
-	 * x20 - ICC_PMR_EL1
-	 * x21 - aborted SP
-	 * x22 - aborted PC
-	 * x23 - aborted PSTATE
-	*/
-	.endm
-
-	.macro	kernel_exit, el
-	.if	\el != 0
-	disable_daif
-
-	/* Restore the task's original addr_limit. */
-	ldr	x20, [sp, #S_ORIG_ADDR_LIMIT]
-	str	x20, [tsk, #TSK_TI_ADDR_LIMIT]
-
-	/* No need to restore UAO, it will be restored from SPSR_EL1 */
-	.endif
-
-	/* Restore pmr */
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
-	ldr	x20, [sp, #S_PMR_SAVE]
-	msr_s	SYS_ICC_PMR_EL1, x20
-	/* Ensure priority change is seen by redistributor */
-	dsb	sy
-alternative_else_nop_endif
-
-	ldp	x21, x22, [sp, #S_PC]		// load ELR, SPSR
-	.if	\el == 0
-	ct_user_enter
-	.endif
-
-#ifdef CONFIG_ARM64_SW_TTBR0_PAN
-	/*
-	 * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
-	 * PAN bit checking.
-	 */
-alternative_if ARM64_HAS_PAN
-	b	2f				// skip TTBR0 PAN
-alternative_else_nop_endif
-
-	.if	\el != 0
-	tbnz	x22, #22, 1f			// Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
-	.endif
-
-	__uaccess_ttbr0_enable x0, x1
-
-	.if	\el == 0
-	/*
-	 * Enable errata workarounds only if returning to user. The only
-	 * workaround currently required for TTBR0_EL1 changes are for the
-	 * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
-	 * corruption).
-	 */
-	bl	post_ttbr_update_workaround
-	.endif
-1:
-	.if	\el != 0
-	and	x22, x22, #~PSR_PAN_BIT		// ARMv8.0 CPUs do not understand this bit
-	.endif
-2:
-#endif
-
-	.if	\el == 0
-	ldr	x23, [sp, #S_SP]		// load return stack pointer
-	msr	sp_el0, x23
-	tst	x22, #PSR_MODE32_BIT		// native task?
-	b.eq	3f
-
-#ifdef CONFIG_ARM64_ERRATUM_845719
-alternative_if ARM64_WORKAROUND_845719
-#ifdef CONFIG_PID_IN_CONTEXTIDR
-	mrs	x29, contextidr_el1
-	msr	contextidr_el1, x29
-#else
-	msr contextidr_el1, xzr
-#endif
-alternative_else_nop_endif
-#endif
-3:
-#ifdef CONFIG_ARM64_ERRATUM_1418040
-alternative_if_not ARM64_WORKAROUND_1418040
-	b	4f
-alternative_else_nop_endif
-	/*
-	 * if (x22.mode32 == cntkctl_el1.el0vcten)
-	 *     cntkctl_el1.el0vcten = ~cntkctl_el1.el0vcten
-	 */
-	mrs	x1, cntkctl_el1
-	eon	x0, x1, x22, lsr #3
-	tbz	x0, #1, 4f
-	eor	x1, x1, #2	// ARCH_TIMER_USR_VCT_ACCESS_EN
-	msr	cntkctl_el1, x1
-4:
-#endif
-	apply_ssbd 0, x0, x1
-	.endif
-
-	msr	elr_el1, x21			// set up the return data
-	msr	spsr_el1, x22
-	ldp	x0, x1, [sp, #16 * 0]
-	ldp	x2, x3, [sp, #16 * 1]
-	ldp	x4, x5, [sp, #16 * 2]
-	ldp	x6, x7, [sp, #16 * 3]
-	ldp	x8, x9, [sp, #16 * 4]
-	ldp	x10, x11, [sp, #16 * 5]
-	ldp	x12, x13, [sp, #16 * 6]
-	ldp	x14, x15, [sp, #16 * 7]
-	ldp	x16, x17, [sp, #16 * 8]
-	ldp	x18, x19, [sp, #16 * 9]
-	ldp	x20, x21, [sp, #16 * 10]
-	ldp	x22, x23, [sp, #16 * 11]
-	ldp	x24, x25, [sp, #16 * 12]
-	ldp	x26, x27, [sp, #16 * 13]
-	ldp	x28, x29, [sp, #16 * 14]
-	ldr	lr, [sp, #S_LR]
-	add	sp, sp, #S_FRAME_SIZE		// restore sp
-
-	.if	\el == 0
-alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-	bne	5f
-	msr	far_el1, x30
-	tramp_alias	x30, tramp_exit_native
-	br	x30
-5:
-	tramp_alias	x30, tramp_exit_compat
-	br	x30
-#endif
-	.else
-	eret
-	.endif
-	sb
-	.endm
-
-	.macro	irq_stack_entry
-	mov	x19, sp			// preserve the original sp
-
-	/*
-	 * Compare sp with the base of the task stack.
-	 * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack,
-	 * and should switch to the irq stack.
-	 */
-	ldr	x25, [tsk, TSK_STACK]
-	eor	x25, x25, x19
-	and	x25, x25, #~(THREAD_SIZE - 1)
-	cbnz	x25, 9998f
-
-	ldr_this_cpu x25, irq_stack_ptr, x26
-	mov	x26, #IRQ_STACK_SIZE
-	add	x26, x25, x26
-
-	/* switch to the irq stack */
-	mov	sp, x26
-9998:
-	.endm
-
-	/*
-	 * x19 should be preserved between irq_stack_entry and
-	 * irq_stack_exit.
-	 */
-	.macro	irq_stack_exit
-	mov	sp, x19
-	.endm
-
-/* GPRs used by entry code */
-tsk	.req	x28		// current thread_info
-
-/*
- * Interrupt handling.
- */
-	.macro	irq_handler
-	ldr_l	x1, handle_arch_irq
-	mov	x0, sp
-	irq_stack_entry
-	blr	x1
-	irq_stack_exit
-	.endm
-
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-	/*
-	 * Set res to 0 if irqs were unmasked in interrupted context.
-	 * Otherwise set res to non-0 value.
-	 */
-	.macro	test_irqs_unmasked res:req, pmr:req
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
-	sub	\res, \pmr, #GIC_PRIO_IRQON
-alternative_else
-	mov	\res, xzr
-alternative_endif
-	.endm
-#endif
-
-	.macro	gic_prio_kentry_setup, tmp:req
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-	alternative_if ARM64_HAS_IRQ_PRIO_MASKING
-	mov	\tmp, #(GIC_PRIO_PSR_I_SET | GIC_PRIO_IRQON)
-	msr_s	SYS_ICC_PMR_EL1, \tmp
-	alternative_else_nop_endif
-#endif
-	.endm
-
-	.macro	gic_prio_irq_setup, pmr:req, tmp:req
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-	alternative_if ARM64_HAS_IRQ_PRIO_MASKING
-	orr	\tmp, \pmr, #GIC_PRIO_PSR_I_SET
-	msr_s	SYS_ICC_PMR_EL1, \tmp
-	alternative_else_nop_endif
-#endif
-	.endm
-
-	.text
-
-/*
- * Exception vectors.
- */
-	.pushsection ".entry.text", "ax"
-
-	.align	11
-ENTRY(vectors)
-	kernel_ventry	1, sync_invalid			// Synchronous EL1t
-	kernel_ventry	1, irq_invalid			// IRQ EL1t
-	kernel_ventry	1, fiq_invalid			// FIQ EL1t
-	kernel_ventry	1, error_invalid		// Error EL1t
-
-	kernel_ventry	1, sync				// Synchronous EL1h
-	kernel_ventry	1, irq				// IRQ EL1h
-	kernel_ventry	1, fiq_invalid			// FIQ EL1h
-	kernel_ventry	1, error			// Error EL1h
-
-	kernel_ventry	0, sync				// Synchronous 64-bit EL0
-	kernel_ventry	0, irq				// IRQ 64-bit EL0
-	kernel_ventry	0, fiq_invalid			// FIQ 64-bit EL0
-	kernel_ventry	0, error			// Error 64-bit EL0
-
-#ifdef CONFIG_COMPAT
-	kernel_ventry	0, sync_compat, 32		// Synchronous 32-bit EL0
-	kernel_ventry	0, irq_compat, 32		// IRQ 32-bit EL0
-	kernel_ventry	0, fiq_invalid_compat, 32	// FIQ 32-bit EL0
-	kernel_ventry	0, error_compat, 32		// Error 32-bit EL0
-#else
-	kernel_ventry	0, sync_invalid, 32		// Synchronous 32-bit EL0
-	kernel_ventry	0, irq_invalid, 32		// IRQ 32-bit EL0
-	kernel_ventry	0, fiq_invalid, 32		// FIQ 32-bit EL0
-	kernel_ventry	0, error_invalid, 32		// Error 32-bit EL0
-#endif
-END(vectors)
-
-#ifdef CONFIG_VMAP_STACK
-	/*
-	 * We detected an overflow in kernel_ventry, which switched to the
-	 * overflow stack. Stash the exception regs, and head to our overflow
-	 * handler.
-	 */
-__bad_stack:
-	/* Restore the original x0 value */
-	mrs	x0, tpidrro_el0
-
-	/*
-	 * Store the original GPRs to the new stack. The orginal SP (minus
-	 * S_FRAME_SIZE) was stashed in tpidr_el0 by kernel_ventry.
-	 */
-	sub	sp, sp, #S_FRAME_SIZE
-	kernel_entry 1
-	mrs	x0, tpidr_el0
-	add	x0, x0, #S_FRAME_SIZE
-	str	x0, [sp, #S_SP]
-
-	/* Stash the regs for handle_bad_stack */
-	mov	x0, sp
-
-	/* Time to die */
-	bl	handle_bad_stack
-	ASM_BUG()
-#endif /* CONFIG_VMAP_STACK */
-
-/*
- * Invalid mode handlers
- */
-	.macro	inv_entry, el, reason, regsize = 64
-	kernel_entry \el, \regsize
-	mov	x0, sp
-	mov	x1, #\reason
-	mrs	x2, esr_el1
-	bl	bad_mode
-	ASM_BUG()
-	.endm
-
-el0_sync_invalid:
-	inv_entry 0, BAD_SYNC
-ENDPROC(el0_sync_invalid)
-
-el0_irq_invalid:
-	inv_entry 0, BAD_IRQ
-ENDPROC(el0_irq_invalid)
-
-el0_fiq_invalid:
-	inv_entry 0, BAD_FIQ
-ENDPROC(el0_fiq_invalid)
-
-el0_error_invalid:
-	inv_entry 0, BAD_ERROR
-ENDPROC(el0_error_invalid)
-
-#ifdef CONFIG_COMPAT
-el0_fiq_invalid_compat:
-	inv_entry 0, BAD_FIQ, 32
-ENDPROC(el0_fiq_invalid_compat)
-#endif
-
-el1_sync_invalid:
-	inv_entry 1, BAD_SYNC
-ENDPROC(el1_sync_invalid)
-
-el1_irq_invalid:
-	inv_entry 1, BAD_IRQ
-ENDPROC(el1_irq_invalid)
-
-el1_fiq_invalid:
-	inv_entry 1, BAD_FIQ
-ENDPROC(el1_fiq_invalid)
-
-el1_error_invalid:
-	inv_entry 1, BAD_ERROR
-ENDPROC(el1_error_invalid)
-
-/*
- * EL1 mode handlers.
- */
-	.align	6
-el1_sync:
-	kernel_entry 1
-	mrs	x1, esr_el1			// read the syndrome register
-	lsr	x24, x1, #ESR_ELx_EC_SHIFT	// exception class
-	cmp	x24, #ESR_ELx_EC_DABT_CUR	// data abort in EL1
-	b.eq	el1_da
-	cmp	x24, #ESR_ELx_EC_IABT_CUR	// instruction abort in EL1
-	b.eq	el1_ia
-	cmp	x24, #ESR_ELx_EC_SYS64		// configurable trap
-	b.eq	el1_undef
-	cmp	x24, #ESR_ELx_EC_PC_ALIGN	// pc alignment exception
-	b.eq	el1_pc
-	cmp	x24, #ESR_ELx_EC_UNKNOWN	// unknown exception in EL1
-	b.eq	el1_undef
-	cmp	x24, #ESR_ELx_EC_BREAKPT_CUR	// debug exception in EL1
-	b.ge	el1_dbg
-	b	el1_inv
-
-el1_ia:
-	/*
-	 * Fall through to the Data abort case
-	 */
-el1_da:
-	/*
-	 * Data abort handling
-	 */
-	mrs	x3, far_el1
-	inherit_daif	pstate=x23, tmp=x2
-	untagged_addr	x0, x3
-	mov	x2, sp				// struct pt_regs
-	bl	do_mem_abort
-
-	kernel_exit 1
-el1_pc:
-	/*
-	 * PC alignment exception handling. We don't handle SP alignment faults,
-	 * since we will have hit a recursive exception when trying to push the
-	 * initial pt_regs.
-	 */
-	mrs	x0, far_el1
-	inherit_daif	pstate=x23, tmp=x2
-	mov	x2, sp
-	bl	do_sp_pc_abort
-	ASM_BUG()
-el1_undef:
-	/*
-	 * Undefined instruction
-	 */
-	inherit_daif	pstate=x23, tmp=x2
-	mov	x0, sp
-	bl	do_undefinstr
-	kernel_exit 1
-el1_dbg:
-	/*
-	 * Debug exception handling
-	 */
-	cmp	x24, #ESR_ELx_EC_BRK64		// if BRK64
-	cinc	x24, x24, eq			// set bit '0'
-	tbz	x24, #0, el1_inv		// EL1 only
-	gic_prio_kentry_setup tmp=x3
-	mrs	x0, far_el1
-	mov	x2, sp				// struct pt_regs
-	bl	do_debug_exception
-	kernel_exit 1
-el1_inv:
-	// TODO: add support for undefined instructions in kernel mode
-	inherit_daif	pstate=x23, tmp=x2
-	mov	x0, sp
-	mov	x2, x1
-	mov	x1, #BAD_SYNC
-	bl	bad_mode
-	ASM_BUG()
-ENDPROC(el1_sync)
-
-	.align	6
-el1_irq:
-	kernel_entry 1
-	gic_prio_irq_setup pmr=x20, tmp=x1
-	enable_da_f
-
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-	test_irqs_unmasked	res=x0, pmr=x20
-	cbz	x0, 1f
-	bl	asm_nmi_enter
-1:
-#endif
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_off
-#endif
-
-	irq_handler
-
-#ifdef CONFIG_PREEMPT
-	ldr	x24, [tsk, #TSK_TI_PREEMPT]	// get preempt count
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
-	/*
-	 * DA_F were cleared at start of handling. If anything is set in DAIF,
-	 * we come back from an NMI, so skip preemption
-	 */
-	mrs	x0, daif
-	orr	x24, x24, x0
-alternative_else_nop_endif
-	cbnz	x24, 1f				// preempt count != 0 || NMI return path
-	bl	arm64_preempt_schedule_irq	// irq en/disable is done inside
-1:
-#endif
-
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-	/*
-	 * When using IRQ priority masking, we can get spurious interrupts while
-	 * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a
-	 * section with interrupts disabled. Skip tracing in those cases.
-	 */
-	test_irqs_unmasked	res=x0, pmr=x20
-	cbz	x0, 1f
-	bl	asm_nmi_exit
-1:
-#endif
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-	test_irqs_unmasked	res=x0, pmr=x20
-	cbnz	x0, 1f
-#endif
-	bl	trace_hardirqs_on
-1:
-#endif
-
-	kernel_exit 1
-ENDPROC(el1_irq)
-
-/*
- * EL0 mode handlers.
- */
-	.align	6
-el0_sync:
-	kernel_entry 0
-	mrs	x25, esr_el1			// read the syndrome register
-	lsr	x24, x25, #ESR_ELx_EC_SHIFT	// exception class
-	cmp	x24, #ESR_ELx_EC_SVC64		// SVC in 64-bit state
-	b.eq	el0_svc
-	cmp	x24, #ESR_ELx_EC_DABT_LOW	// data abort in EL0
-	b.eq	el0_da
-	cmp	x24, #ESR_ELx_EC_IABT_LOW	// instruction abort in EL0
-	b.eq	el0_ia
-	cmp	x24, #ESR_ELx_EC_FP_ASIMD	// FP/ASIMD access
-	b.eq	el0_fpsimd_acc
-	cmp	x24, #ESR_ELx_EC_SVE		// SVE access
-	b.eq	el0_sve_acc
-	cmp	x24, #ESR_ELx_EC_FP_EXC64	// FP/ASIMD exception
-	b.eq	el0_fpsimd_exc
-	cmp	x24, #ESR_ELx_EC_SYS64		// configurable trap
-	ccmp	x24, #ESR_ELx_EC_WFx, #4, ne
-	b.eq	el0_sys
-	cmp	x24, #ESR_ELx_EC_SP_ALIGN	// stack alignment exception
-	b.eq	el0_sp
-	cmp	x24, #ESR_ELx_EC_PC_ALIGN	// pc alignment exception
-	b.eq	el0_pc
-	cmp	x24, #ESR_ELx_EC_UNKNOWN	// unknown exception in EL0
-	b.eq	el0_undef
-	cmp	x24, #ESR_ELx_EC_BREAKPT_LOW	// debug exception in EL0
-	b.ge	el0_dbg
-	b	el0_inv
-
-#ifdef CONFIG_COMPAT
-	.align	6
-el0_sync_compat:
-	kernel_entry 0, 32
-	mrs	x25, esr_el1			// read the syndrome register
-	lsr	x24, x25, #ESR_ELx_EC_SHIFT	// exception class
-	cmp	x24, #ESR_ELx_EC_SVC32		// SVC in 32-bit state
-	b.eq	el0_svc_compat
-	cmp	x24, #ESR_ELx_EC_DABT_LOW	// data abort in EL0
-	b.eq	el0_da
-	cmp	x24, #ESR_ELx_EC_IABT_LOW	// instruction abort in EL0
-	b.eq	el0_ia
-	cmp	x24, #ESR_ELx_EC_FP_ASIMD	// FP/ASIMD access
-	b.eq	el0_fpsimd_acc
-	cmp	x24, #ESR_ELx_EC_FP_EXC32	// FP/ASIMD exception
-	b.eq	el0_fpsimd_exc
-	cmp	x24, #ESR_ELx_EC_PC_ALIGN	// pc alignment exception
-	b.eq	el0_pc
-	cmp	x24, #ESR_ELx_EC_UNKNOWN	// unknown exception in EL0
-	b.eq	el0_undef
-	cmp	x24, #ESR_ELx_EC_CP15_32	// CP15 MRC/MCR trap
-	b.eq	el0_cp15
-	cmp	x24, #ESR_ELx_EC_CP15_64	// CP15 MRRC/MCRR trap
-	b.eq	el0_cp15
-	cmp	x24, #ESR_ELx_EC_CP14_MR	// CP14 MRC/MCR trap
-	b.eq	el0_undef
-	cmp	x24, #ESR_ELx_EC_CP14_LS	// CP14 LDC/STC trap
-	b.eq	el0_undef
-	cmp	x24, #ESR_ELx_EC_CP14_64	// CP14 MRRC/MCRR trap
-	b.eq	el0_undef
-	cmp	x24, #ESR_ELx_EC_BREAKPT_LOW	// debug exception in EL0
-	b.ge	el0_dbg
-	b	el0_inv
-el0_svc_compat:
-	gic_prio_kentry_setup tmp=x1
-	mov	x0, sp
-	bl	el0_svc_compat_handler
-	b	ret_to_user
-
-	.align	6
-el0_irq_compat:
-	kernel_entry 0, 32
-	b	el0_irq_naked
-
-el0_error_compat:
-	kernel_entry 0, 32
-	b	el0_error_naked
-
-el0_cp15:
-	/*
-	 * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, x25
-	mov	x1, sp
-	bl	do_cp15instr
-	b	ret_to_user
-#endif
-
-el0_da:
-	/*
-	 * Data abort handling
-	 */
-	mrs	x26, far_el1
-	ct_user_exit_irqoff
-	enable_daif
-	untagged_addr	x0, x26
-	mov	x1, x25
-	mov	x2, sp
-	bl	do_mem_abort
-	b	ret_to_user
-el0_ia:
-	/*
-	 * Instruction abort handling
-	 */
-	mrs	x26, far_el1
-	gic_prio_kentry_setup tmp=x0
-	ct_user_exit_irqoff
-	enable_da_f
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_off
-#endif
-	mov	x0, x26
-	mov	x1, x25
-	mov	x2, sp
-	bl	do_el0_ia_bp_hardening
-	b	ret_to_user
-el0_fpsimd_acc:
-	/*
-	 * Floating Point or Advanced SIMD access
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, x25
-	mov	x1, sp
-	bl	do_fpsimd_acc
-	b	ret_to_user
-el0_sve_acc:
-	/*
-	 * Scalable Vector Extension access
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, x25
-	mov	x1, sp
-	bl	do_sve_acc
-	b	ret_to_user
-el0_fpsimd_exc:
-	/*
-	 * Floating Point, Advanced SIMD or SVE exception
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, x25
-	mov	x1, sp
-	bl	do_fpsimd_exc
-	b	ret_to_user
-el0_sp:
-	ldr	x26, [sp, #S_SP]
-	b	el0_sp_pc
-el0_pc:
-	mrs	x26, far_el1
-el0_sp_pc:
-	/*
-	 * Stack or PC alignment exception handling
-	 */
-	gic_prio_kentry_setup tmp=x0
-	ct_user_exit_irqoff
-	enable_da_f
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_off
-#endif
-	mov	x0, x26
-	mov	x1, x25
-	mov	x2, sp
-	bl	do_sp_pc_abort
-	b	ret_to_user
-el0_undef:
-	/*
-	 * Undefined instruction
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, sp
-	bl	do_undefinstr
-	b	ret_to_user
-el0_sys:
-	/*
-	 * System instructions, for trapped cache maintenance instructions
-	 */
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, x25
-	mov	x1, sp
-	bl	do_sysinstr
-	b	ret_to_user
-el0_dbg:
-	/*
-	 * Debug exception handling
-	 */
-	tbnz	x24, #0, el0_inv		// EL0 only
-	mrs	x24, far_el1
-	gic_prio_kentry_setup tmp=x3
-	ct_user_exit_irqoff
-	mov	x0, x24
-	mov	x1, x25
-	mov	x2, sp
-	bl	do_debug_exception
-	enable_da_f
-	b	ret_to_user
-el0_inv:
-	ct_user_exit_irqoff
-	enable_daif
-	mov	x0, sp
-	mov	x1, #BAD_SYNC
-	mov	x2, x25
-	bl	bad_el0_sync
-	b	ret_to_user
-ENDPROC(el0_sync)
-
-	.align	6
-el0_irq:
-	kernel_entry 0
-el0_irq_naked:
-	gic_prio_irq_setup pmr=x20, tmp=x0
-	ct_user_exit_irqoff
-	enable_da_f
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_off
-#endif
-
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
-	tbz	x22, #55, 1f
-	bl	do_el0_irq_bp_hardening
-1:
-#endif
-	irq_handler
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_on
-#endif
-	b	ret_to_user
-ENDPROC(el0_irq)
-
-el1_error:
-	kernel_entry 1
-	mrs	x1, esr_el1
-	gic_prio_kentry_setup tmp=x2
-	enable_dbg
-	mov	x0, sp
-	bl	do_serror
-	kernel_exit 1
-ENDPROC(el1_error)
-
-el0_error:
-	kernel_entry 0
-el0_error_naked:
-	mrs	x25, esr_el1
-	gic_prio_kentry_setup tmp=x2
-	ct_user_exit_irqoff
-	enable_dbg
-	mov	x0, sp
-	mov	x1, x25
-	bl	do_serror
-	enable_da_f
-	b	ret_to_user
-ENDPROC(el0_error)
-
-/*
- * Ok, we need to do extra processing, enter the slow path.
- */
-work_pending:
-	mov	x0, sp				// 'regs'
-	bl	do_notify_resume
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_on		// enabled while in userspace
-#endif
-	ldr	x1, [tsk, #TSK_TI_FLAGS]	// re-check for single-step
-	b	finish_ret_to_user
-/*
- * "slow" syscall return path.
- */
-ret_to_user:
-	disable_daif
-	gic_prio_kentry_setup tmp=x3
-	ldr	x1, [tsk, #TSK_TI_FLAGS]
-	and	x2, x1, #_TIF_WORK_MASK
-	cbnz	x2, work_pending
-finish_ret_to_user:
-	enable_step_tsk x1, x2
-#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
-	bl	stackleak_erase
-#endif
-	kernel_exit 0
-ENDPROC(ret_to_user)
-
-/*
- * SVC handler.
- */
-	.align	6
-el0_svc:
-	gic_prio_kentry_setup tmp=x1
-	mov	x0, sp
-	bl	el0_svc_handler
-	b	ret_to_user
-ENDPROC(el0_svc)
-
-	.popsection				// .entry.text
-
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-/*
- * Exception vectors trampoline.
- */
-	.pushsection ".entry.tramp.text", "ax"
-
-	.macro tramp_map_kernel, tmp
-	mrs	\tmp, ttbr1_el1
-	add	\tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE)
-	bic	\tmp, \tmp, #USER_ASID_FLAG
-	msr	ttbr1_el1, \tmp
-#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
-alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003
-	/* ASID already in \tmp[63:48] */
-	movk	\tmp, #:abs_g2_nc:(TRAMP_VALIAS >> 12)
-	movk	\tmp, #:abs_g1_nc:(TRAMP_VALIAS >> 12)
-	/* 2MB boundary containing the vectors, so we nobble the walk cache */
-	movk	\tmp, #:abs_g0_nc:((TRAMP_VALIAS & ~(SZ_2M - 1)) >> 12)
-	isb
-	tlbi	vae1, \tmp
-	dsb	nsh
-alternative_else_nop_endif
-#endif /* CONFIG_QCOM_FALKOR_ERRATUM_1003 */
-	.endm
-
-	.macro tramp_unmap_kernel, tmp
-	mrs	\tmp, ttbr1_el1
-	sub	\tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE)
-	orr	\tmp, \tmp, #USER_ASID_FLAG
-	msr	ttbr1_el1, \tmp
-	/*
-	 * We avoid running the post_ttbr_update_workaround here because
-	 * it's only needed by Cavium ThunderX, which requires KPTI to be
-	 * disabled.
-	 */
-	.endm
-
-	.macro tramp_ventry, regsize = 64
-	.align	7
-1:
-	.if	\regsize == 64
-	msr	tpidrro_el0, x30	// Restored in kernel_ventry
-	.endif
-	/*
-	 * Defend against branch aliasing attacks by pushing a dummy
-	 * entry onto the return stack and using a RET instruction to
-	 * enter the full-fat kernel vectors.
-	 */
-	bl	2f
-	b	.
-2:
-	tramp_map_kernel	x30
-#ifdef CONFIG_RANDOMIZE_BASE
-	adr	x30, tramp_vectors + PAGE_SIZE
-alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
-	ldr	x30, [x30]
-#else
-	ldr	x30, =vectors
-#endif
-alternative_if_not ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM
-	prfm	plil1strm, [x30, #(1b - tramp_vectors)]
-alternative_else_nop_endif
-	msr	vbar_el1, x30
-	add	x30, x30, #(1b - tramp_vectors)
-	isb
-	ret
-	.endm
-
-	.macro tramp_exit, regsize = 64
-	adr	x30, tramp_vectors
-	msr	vbar_el1, x30
-	tramp_unmap_kernel	x30
-	.if	\regsize == 64
-	mrs	x30, far_el1
-	.endif
-	eret
-	sb
-	.endm
-
-	.align	11
-ENTRY(tramp_vectors)
-	.space	0x400
-
-	tramp_ventry
-	tramp_ventry
-	tramp_ventry
-	tramp_ventry
-
-	tramp_ventry	32
-	tramp_ventry	32
-	tramp_ventry	32
-	tramp_ventry	32
-END(tramp_vectors)
-
-ENTRY(tramp_exit_native)
-	tramp_exit
-END(tramp_exit_native)
-
-ENTRY(tramp_exit_compat)
-	tramp_exit	32
-END(tramp_exit_compat)
-
-	.ltorg
-	.popsection				// .entry.tramp.text
-#ifdef CONFIG_RANDOMIZE_BASE
-	.pushsection ".rodata", "a"
-	.align PAGE_SHIFT
-	.globl	__entry_tramp_data_start
-__entry_tramp_data_start:
-	.quad	vectors
-	.popsection				// .rodata
-#endif /* CONFIG_RANDOMIZE_BASE */
-#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
-
-/*
- * Register switch for AArch64. The callee-saved registers need to be saved
- * and restored. On entry:
- *   x0 = previous task_struct (must be preserved across the switch)
- *   x1 = next task_struct
- * Previous and next are guaranteed not to be the same.
- *
- */
-ENTRY(cpu_switch_to)
-	mov	x10, #THREAD_CPU_CONTEXT
-	add	x8, x0, x10
-	mov	x9, sp
-	stp	x19, x20, [x8], #16		// store callee-saved registers
-	stp	x21, x22, [x8], #16
-	stp	x23, x24, [x8], #16
-	stp	x25, x26, [x8], #16
-	stp	x27, x28, [x8], #16
-	stp	x29, x9, [x8], #16
-	str	lr, [x8]
-	add	x8, x1, x10
-	ldp	x19, x20, [x8], #16		// restore callee-saved registers
-	ldp	x21, x22, [x8], #16
-	ldp	x23, x24, [x8], #16
-	ldp	x25, x26, [x8], #16
-	ldp	x27, x28, [x8], #16
-	ldp	x29, x9, [x8], #16
-	ldr	lr, [x8]
-	mov	sp, x9
-	msr	sp_el0, x1
-	ret
-ENDPROC(cpu_switch_to)
-NOKPROBE(cpu_switch_to)
-
-/*
- * This is how we return from a fork.
- */
-ENTRY(ret_from_fork)
-	bl	schedule_tail
-	cbz	x19, 1f				// not a kernel thread
-	mov	x0, x20
-	blr	x19
-1:	get_current_task tsk
-	b	ret_to_user
-ENDPROC(ret_from_fork)
-NOKPROBE(ret_from_fork)
-
-#ifdef CONFIG_ARM_SDE_INTERFACE
-
-#include <asm/sdei.h>
-#include <uapi/linux/arm_sdei.h>
-
-.macro sdei_handler_exit exit_mode
-	/* On success, this call never returns... */
-	cmp	\exit_mode, #SDEI_EXIT_SMC
-	b.ne	99f
-	smc	#0
-	b	.
-99:	hvc	#0
-	b	.
-.endm
-
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-/*
- * The regular SDEI entry point may have been unmapped along with the rest of
- * the kernel. This trampoline restores the kernel mapping to make the x1 memory
- * argument accessible.
- *
- * This clobbers x4, __sdei_handler() will restore this from firmware's
- * copy.
- */
-.ltorg
-.pushsection ".entry.tramp.text", "ax"
-ENTRY(__sdei_asm_entry_trampoline)
-	mrs	x4, ttbr1_el1
-	tbz	x4, #USER_ASID_BIT, 1f
-
-	tramp_map_kernel tmp=x4
-	isb
-	mov	x4, xzr
-
-	/*
-	 * Use reg->interrupted_regs.addr_limit to remember whether to unmap
-	 * the kernel on exit.
-	 */
-1:	str	x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
-
-#ifdef CONFIG_RANDOMIZE_BASE
-	adr	x4, tramp_vectors + PAGE_SIZE
-	add	x4, x4, #:lo12:__sdei_asm_trampoline_next_handler
-	ldr	x4, [x4]
-#else
-	ldr	x4, =__sdei_asm_handler
-#endif
-	br	x4
-ENDPROC(__sdei_asm_entry_trampoline)
-NOKPROBE(__sdei_asm_entry_trampoline)
-
-/*
- * Make the exit call and restore the original ttbr1_el1
- *
- * x0 & x1: setup for the exit API call
- * x2: exit_mode
- * x4: struct sdei_registered_event argument from registration time.
- */
-ENTRY(__sdei_asm_exit_trampoline)
-	ldr	x4, [x4, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
-	cbnz	x4, 1f
-
-	tramp_unmap_kernel	tmp=x4
-
-1:	sdei_handler_exit exit_mode=x2
-ENDPROC(__sdei_asm_exit_trampoline)
-NOKPROBE(__sdei_asm_exit_trampoline)
-	.ltorg
-.popsection		// .entry.tramp.text
-#ifdef CONFIG_RANDOMIZE_BASE
-.pushsection ".rodata", "a"
-__sdei_asm_trampoline_next_handler:
-	.quad	__sdei_asm_handler
-.popsection		// .rodata
-#endif /* CONFIG_RANDOMIZE_BASE */
-#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
-
-/*
- * Software Delegated Exception entry point.
- *
- * x0: Event number
- * x1: struct sdei_registered_event argument from registration time.
- * x2: interrupted PC
- * x3: interrupted PSTATE
- * x4: maybe clobbered by the trampoline
- *
- * Firmware has preserved x0->x17 for us, we must save/restore the rest to
- * follow SMC-CC. We save (or retrieve) all the registers as the handler may
- * want them.
- */
-ENTRY(__sdei_asm_handler)
-	stp     x2, x3, [x1, #SDEI_EVENT_INTREGS + S_PC]
-	stp     x4, x5, [x1, #SDEI_EVENT_INTREGS + 16 * 2]
-	stp     x6, x7, [x1, #SDEI_EVENT_INTREGS + 16 * 3]
-	stp     x8, x9, [x1, #SDEI_EVENT_INTREGS + 16 * 4]
-	stp     x10, x11, [x1, #SDEI_EVENT_INTREGS + 16 * 5]
-	stp     x12, x13, [x1, #SDEI_EVENT_INTREGS + 16 * 6]
-	stp     x14, x15, [x1, #SDEI_EVENT_INTREGS + 16 * 7]
-	stp     x16, x17, [x1, #SDEI_EVENT_INTREGS + 16 * 8]
-	stp     x18, x19, [x1, #SDEI_EVENT_INTREGS + 16 * 9]
-	stp     x20, x21, [x1, #SDEI_EVENT_INTREGS + 16 * 10]
-	stp     x22, x23, [x1, #SDEI_EVENT_INTREGS + 16 * 11]
-	stp     x24, x25, [x1, #SDEI_EVENT_INTREGS + 16 * 12]
-	stp     x26, x27, [x1, #SDEI_EVENT_INTREGS + 16 * 13]
-	stp     x28, x29, [x1, #SDEI_EVENT_INTREGS + 16 * 14]
-	mov	x4, sp
-	stp     lr, x4, [x1, #SDEI_EVENT_INTREGS + S_LR]
-
-	mov	x19, x1
-
-#ifdef CONFIG_VMAP_STACK
-	/*
-	 * entry.S may have been using sp as a scratch register, find whether
-	 * this is a normal or critical event and switch to the appropriate
-	 * stack for this CPU.
-	 */
-	ldrb	w4, [x19, #SDEI_EVENT_PRIORITY]
-	cbnz	w4, 1f
-	ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
-	b	2f
-1:	ldr_this_cpu dst=x5, sym=sdei_stack_critical_ptr, tmp=x6
-2:	mov	x6, #SDEI_STACK_SIZE
-	add	x5, x5, x6
-	mov	sp, x5
-#endif
-
-	/*
-	 * We may have interrupted userspace, or a guest, or exit-from or
-	 * return-to either of these. We can't trust sp_el0, restore it.
-	 */
-	mrs	x28, sp_el0
-	ldr_this_cpu	dst=x0, sym=__entry_task, tmp=x1
-	msr	sp_el0, x0
-
-	/* If we interrupted the kernel point to the previous stack/frame. */
-	and     x0, x3, #0xc
-	mrs     x1, CurrentEL
-	cmp     x0, x1
-	csel	x29, x29, xzr, eq	// fp, or zero
-	csel	x4, x2, xzr, eq		// elr, or zero
-
-	stp	x29, x4, [sp, #-16]!
-	mov	x29, sp
-
-	add	x0, x19, #SDEI_EVENT_INTREGS
-	mov	x1, x19
-	bl	__sdei_handler
-
-	msr	sp_el0, x28
-	/* restore regs >x17 that we clobbered */
-	mov	x4, x19         // keep x4 for __sdei_asm_exit_trampoline
-	ldp	x28, x29, [x4, #SDEI_EVENT_INTREGS + 16 * 14]
-	ldp	x18, x19, [x4, #SDEI_EVENT_INTREGS + 16 * 9]
-	ldp	lr, x1, [x4, #SDEI_EVENT_INTREGS + S_LR]
-	mov	sp, x1
-
-	mov	x1, x0			// address to complete_and_resume
-	/* x0 = (x0 <= 1) ? EVENT_COMPLETE:EVENT_COMPLETE_AND_RESUME */
-	cmp	x0, #1
-	mov_q	x2, SDEI_1_0_FN_SDEI_EVENT_COMPLETE
-	mov_q	x3, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME
-	csel	x0, x2, x3, ls
-
-	ldr_l	x2, sdei_exit_mode
-
-alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
-	sdei_handler_exit exit_mode=x2
-alternative_else_nop_endif
-
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-	tramp_alias	dst=x5, sym=__sdei_asm_exit_trampoline
-	br	x5
-#endif
-ENDPROC(__sdei_asm_handler)
-NOKPROBE(__sdei_asm_handler)
-#endif /* CONFIG_ARM_SDE_INTERFACE */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
deleted file mode 100644
index a2e0b37549433b8629183efb810c8513f1a21669..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/head.S
+++ /dev/null
@@ -1,987 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Low-level CPU initialisation
- * Based on arch/arm/kernel/head.S
- *
- * Copyright (C) 1994-2002 Russell King
- * Copyright (C) 2003-2012 ARM Ltd.
- * Authors:	Catalin Marinas <catalin.marinas@arm.com>
- *		Will Deacon <will.deacon@arm.com>
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <linux/irqchip/arm-gic-v3.h>
-
-#include <asm/assembler.h>
-#include <asm/boot.h>
-#include <asm/ptrace.h>
-#include <asm/asm-offsets.h>
-#include <asm/cache.h>
-#include <asm/cputype.h>
-#include <asm/elf.h>
-#include <asm/image.h>
-#include <asm/kernel-pgtable.h>
-#include <asm/kvm_arm.h>
-#include <asm/memory.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/smp.h>
-#include <asm/sysreg.h>
-#include <asm/thread_info.h>
-#include <asm/virt.h>
-
-#include "efi-header.S"
-
-#define __PHYS_OFFSET	(KERNEL_START - TEXT_OFFSET)
-
-#if (TEXT_OFFSET & 0xfff) != 0
-#error TEXT_OFFSET must be at least 4KB aligned
-#elif (PAGE_OFFSET & 0x1fffff) != 0
-#error PAGE_OFFSET must be at least 2MB aligned
-#elif TEXT_OFFSET > 0x1fffff
-#error TEXT_OFFSET must be less than 2MB
-#endif
-
-/*
- * Kernel startup entry point.
- * ---------------------------
- *
- * The requirements are:
- *   MMU = off, D-cache = off, I-cache = on or off,
- *   x0 = physical address to the FDT blob.
- *
- * This code is mostly position independent so you call this at
- * __pa(PAGE_OFFSET + TEXT_OFFSET).
- *
- * Note that the callee-saved registers are used for storing variables
- * that are useful before the MMU is enabled. The allocations are described
- * in the entry routines.
- */
-	__HEAD
-_head:
-	/*
-	 * DO NOT MODIFY. Image header expected by Linux boot-loaders.
-	 */
-#ifdef CONFIG_EFI
-	/*
-	 * This add instruction has no meaningful effect except that
-	 * its opcode forms the magic "MZ" signature required by UEFI.
-	 */
-	add	x13, x18, #0x16
-	b	stext
-#else
-	b	stext				// branch to kernel start, magic
-	.long	0				// reserved
-#endif
-	le64sym	_kernel_offset_le		// Image load offset from start of RAM, little-endian
-	le64sym	_kernel_size_le			// Effective size of kernel image, little-endian
-	le64sym	_kernel_flags_le		// Informative flags, little-endian
-	.quad	0				// reserved
-	.quad	0				// reserved
-	.quad	0				// reserved
-	.ascii	ARM64_IMAGE_MAGIC		// Magic number
-#ifdef CONFIG_EFI
-	.long	pe_header - _head		// Offset to the PE header.
-
-pe_header:
-	__EFI_PE_HEADER
-#else
-	.long	0				// reserved
-#endif
-
-	__INIT
-
-	/*
-	 * The following callee saved general purpose registers are used on the
-	 * primary lowlevel boot path:
-	 *
-	 *  Register   Scope                      Purpose
-	 *  x21        stext() .. start_kernel()  FDT pointer passed at boot in x0
-	 *  x23        stext() .. start_kernel()  physical misalignment/KASLR offset
-	 *  x28        __create_page_tables()     callee preserved temp register
-	 *  x19/x20    __primary_switch()         callee preserved temp registers
-	 *  x24        __primary_switch() .. relocate_kernel()
-	 *                                        current RELR displacement
-	 */
-ENTRY(stext)
-	bl	preserve_boot_args
-	bl	el2_setup			// Drop to EL1, w0=cpu_boot_mode
-	adrp	x23, __PHYS_OFFSET
-	and	x23, x23, MIN_KIMG_ALIGN - 1	// KASLR offset, defaults to 0
-	bl	set_cpu_boot_mode_flag
-	bl	__create_page_tables
-	/*
-	 * The following calls CPU setup code, see arch/arm64/mm/proc.S for
-	 * details.
-	 * On return, the CPU will be ready for the MMU to be turned on and
-	 * the TCR will have been set.
-	 */
-	bl	__cpu_setup			// initialise processor
-	b	__primary_switch
-ENDPROC(stext)
-
-/*
- * Preserve the arguments passed by the bootloader in x0 .. x3
- */
-preserve_boot_args:
-	mov	x21, x0				// x21=FDT
-
-	adr_l	x0, boot_args			// record the contents of
-	stp	x21, x1, [x0]			// x0 .. x3 at kernel entry
-	stp	x2, x3, [x0, #16]
-
-	dmb	sy				// needed before dc ivac with
-						// MMU off
-
-	mov	x1, #0x20			// 4 x 8 bytes
-	b	__inval_dcache_area		// tail call
-ENDPROC(preserve_boot_args)
-
-/*
- * Macro to create a table entry to the next page.
- *
- *	tbl:	page table address
- *	virt:	virtual address
- *	shift:	#imm page table shift
- *	ptrs:	#imm pointers per table page
- *
- * Preserves:	virt
- * Corrupts:	ptrs, tmp1, tmp2
- * Returns:	tbl -> next level table page address
- */
-	.macro	create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
-	add	\tmp1, \tbl, #PAGE_SIZE
-	phys_to_pte \tmp2, \tmp1
-	orr	\tmp2, \tmp2, #PMD_TYPE_TABLE	// address of next table and entry type
-	lsr	\tmp1, \virt, #\shift
-	sub	\ptrs, \ptrs, #1
-	and	\tmp1, \tmp1, \ptrs		// table index
-	str	\tmp2, [\tbl, \tmp1, lsl #3]
-	add	\tbl, \tbl, #PAGE_SIZE		// next level table page
-	.endm
-
-/*
- * Macro to populate page table entries, these entries can be pointers to the next level
- * or last level entries pointing to physical memory.
- *
- *	tbl:	page table address
- *	rtbl:	pointer to page table or physical memory
- *	index:	start index to write
- *	eindex:	end index to write - [index, eindex] written to
- *	flags:	flags for pagetable entry to or in
- *	inc:	increment to rtbl between each entry
- *	tmp1:	temporary variable
- *
- * Preserves:	tbl, eindex, flags, inc
- * Corrupts:	index, tmp1
- * Returns:	rtbl
- */
-	.macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1
-.Lpe\@:	phys_to_pte \tmp1, \rtbl
-	orr	\tmp1, \tmp1, \flags	// tmp1 = table entry
-	str	\tmp1, [\tbl, \index, lsl #3]
-	add	\rtbl, \rtbl, \inc	// rtbl = pa next level
-	add	\index, \index, #1
-	cmp	\index, \eindex
-	b.ls	.Lpe\@
-	.endm
-
-/*
- * Compute indices of table entries from virtual address range. If multiple entries
- * were needed in the previous page table level then the next page table level is assumed
- * to be composed of multiple pages. (This effectively scales the end index).
- *
- *	vstart:	virtual address of start of range
- *	vend:	virtual address of end of range
- *	shift:	shift used to transform virtual address into index
- *	ptrs:	number of entries in page table
- *	istart:	index in table corresponding to vstart
- *	iend:	index in table corresponding to vend
- *	count:	On entry: how many extra entries were required in previous level, scales
- *			  our end index.
- *		On exit: returns how many extra entries required for next page table level
- *
- * Preserves:	vstart, vend, shift, ptrs
- * Returns:	istart, iend, count
- */
-	.macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count
-	lsr	\iend, \vend, \shift
-	mov	\istart, \ptrs
-	sub	\istart, \istart, #1
-	and	\iend, \iend, \istart	// iend = (vend >> shift) & (ptrs - 1)
-	mov	\istart, \ptrs
-	mul	\istart, \istart, \count
-	add	\iend, \iend, \istart	// iend += (count - 1) * ptrs
-					// our entries span multiple tables
-
-	lsr	\istart, \vstart, \shift
-	mov	\count, \ptrs
-	sub	\count, \count, #1
-	and	\istart, \istart, \count
-
-	sub	\count, \iend, \istart
-	.endm
-
-/*
- * Map memory for specified virtual address range. Each level of page table needed supports
- * multiple entries. If a level requires n entries the next page table level is assumed to be
- * formed from n pages.
- *
- *	tbl:	location of page table
- *	rtbl:	address to be used for first level page table entry (typically tbl + PAGE_SIZE)
- *	vstart:	start address to map
- *	vend:	end address to map - we map [vstart, vend]
- *	flags:	flags to use to map last level entries
- *	phys:	physical address corresponding to vstart - physical memory is contiguous
- *	pgds:	the number of pgd entries
- *
- * Temporaries:	istart, iend, tmp, count, sv - these need to be different registers
- * Preserves:	vstart, vend, flags
- * Corrupts:	tbl, rtbl, istart, iend, tmp, count, sv
- */
-	.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv
-	add \rtbl, \tbl, #PAGE_SIZE
-	mov \sv, \rtbl
-	mov \count, #0
-	compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count
-	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
-	mov \tbl, \sv
-	mov \sv, \rtbl
-
-#if SWAPPER_PGTABLE_LEVELS > 3
-	compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count
-	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
-	mov \tbl, \sv
-	mov \sv, \rtbl
-#endif
-
-#if SWAPPER_PGTABLE_LEVELS > 2
-	compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count
-	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
-	mov \tbl, \sv
-#endif
-
-	compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count
-	bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1
-	populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
-	.endm
-
-/*
- * Setup the initial page tables. We only setup the barest amount which is
- * required to get the kernel running. The following sections are required:
- *   - identity mapping to enable the MMU (low address, TTBR0)
- *   - first few MB of the kernel linear mapping to jump to once the MMU has
- *     been enabled
- */
-__create_page_tables:
-	mov	x28, lr
-
-	/*
-	 * Invalidate the init page tables to avoid potential dirty cache lines
-	 * being evicted. Other page tables are allocated in rodata as part of
-	 * the kernel image, and thus are clean to the PoC per the boot
-	 * protocol.
-	 */
-	adrp	x0, init_pg_dir
-	adrp	x1, init_pg_end
-	sub	x1, x1, x0
-	bl	__inval_dcache_area
-
-	/*
-	 * Clear the init page tables.
-	 */
-	adrp	x0, init_pg_dir
-	adrp	x1, init_pg_end
-	sub	x1, x1, x0
-1:	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	subs	x1, x1, #64
-	b.ne	1b
-
-	mov	x7, SWAPPER_MM_MMUFLAGS
-
-	/*
-	 * Create the identity mapping.
-	 */
-	adrp	x0, idmap_pg_dir
-	adrp	x3, __idmap_text_start		// __pa(__idmap_text_start)
-
-#ifdef CONFIG_ARM64_VA_BITS_52
-	mrs_s	x6, SYS_ID_AA64MMFR2_EL1
-	and	x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
-	mov	x5, #52
-	cbnz	x6, 1f
-#endif
-	mov	x5, #VA_BITS_MIN
-1:
-	adr_l	x6, vabits_actual
-	str	x5, [x6]
-	dmb	sy
-	dc	ivac, x6		// Invalidate potentially stale cache line
-
-	/*
-	 * VA_BITS may be too small to allow for an ID mapping to be created
-	 * that covers system RAM if that is located sufficiently high in the
-	 * physical address space. So for the ID map, use an extended virtual
-	 * range in that case, and configure an additional translation level
-	 * if needed.
-	 *
-	 * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
-	 * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
-	 * this number conveniently equals the number of leading zeroes in
-	 * the physical address of __idmap_text_end.
-	 */
-	adrp	x5, __idmap_text_end
-	clz	x5, x5
-	cmp	x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough?
-	b.ge	1f			// .. then skip VA range extension
-
-	adr_l	x6, idmap_t0sz
-	str	x5, [x6]
-	dmb	sy
-	dc	ivac, x6		// Invalidate potentially stale cache line
-
-#if (VA_BITS < 48)
-#define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
-#define EXTRA_PTRS	(1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
-
-	/*
-	 * If VA_BITS < 48, we have to configure an additional table level.
-	 * First, we have to verify our assumption that the current value of
-	 * VA_BITS was chosen such that all translation levels are fully
-	 * utilised, and that lowering T0SZ will always result in an additional
-	 * translation level to be configured.
-	 */
-#if VA_BITS != EXTRA_SHIFT
-#error "Mismatch between VA_BITS and page size/number of translation levels"
-#endif
-
-	mov	x4, EXTRA_PTRS
-	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
-#else
-	/*
-	 * If VA_BITS == 48, we don't have to configure an additional
-	 * translation level, but the top-level table has more entries.
-	 */
-	mov	x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
-	str_l	x4, idmap_ptrs_per_pgd, x5
-#endif
-1:
-	ldr_l	x4, idmap_ptrs_per_pgd
-	mov	x5, x3				// __pa(__idmap_text_start)
-	adr_l	x6, __idmap_text_end		// __pa(__idmap_text_end)
-
-	map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14
-
-	/*
-	 * Map the kernel image (starting with PHYS_OFFSET).
-	 */
-	adrp	x0, init_pg_dir
-	mov_q	x5, KIMAGE_VADDR + TEXT_OFFSET	// compile time __va(_text)
-	add	x5, x5, x23			// add KASLR displacement
-	mov	x4, PTRS_PER_PGD
-	adrp	x6, _end			// runtime __pa(_end)
-	adrp	x3, _text			// runtime __pa(_text)
-	sub	x6, x6, x3			// _end - _text
-	add	x6, x6, x5			// runtime __va(_end)
-
-	map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
-
-	/*
-	 * Since the page tables have been populated with non-cacheable
-	 * accesses (MMU disabled), invalidate those tables again to
-	 * remove any speculatively loaded cache lines.
-	 */
-	dmb	sy
-
-	adrp	x0, idmap_pg_dir
-	adrp	x1, idmap_pg_end
-	sub	x1, x1, x0
-	bl	__inval_dcache_area
-
-	adrp	x0, init_pg_dir
-	adrp	x1, init_pg_end
-	sub	x1, x1, x0
-	bl	__inval_dcache_area
-
-	ret	x28
-ENDPROC(__create_page_tables)
-	.ltorg
-
-/*
- * The following fragment of code is executed with the MMU enabled.
- *
- *   x0 = __PHYS_OFFSET
- */
-__primary_switched:
-	adrp	x4, init_thread_union
-	add	sp, x4, #THREAD_SIZE
-	adr_l	x5, init_task
-	msr	sp_el0, x5			// Save thread_info
-
-	adr_l	x8, vectors			// load VBAR_EL1 with virtual
-	msr	vbar_el1, x8			// vector table address
-	isb
-
-	stp	xzr, x30, [sp, #-16]!
-	mov	x29, sp
-
-	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
-
-	ldr_l	x4, kimage_vaddr		// Save the offset between
-	sub	x4, x4, x0			// the kernel virtual and
-	str_l	x4, kimage_voffset, x5		// physical mappings
-
-	// Clear BSS
-	adr_l	x0, __bss_start
-	mov	x1, xzr
-	adr_l	x2, __bss_stop
-	sub	x2, x2, x0
-	bl	__pi_memset
-	dsb	ishst				// Make zero page visible to PTW
-
-#ifdef CONFIG_KASAN
-	bl	kasan_early_init
-#endif
-#ifdef CONFIG_RANDOMIZE_BASE
-	tst	x23, ~(MIN_KIMG_ALIGN - 1)	// already running randomized?
-	b.ne	0f
-	mov	x0, x21				// pass FDT address in x0
-	bl	kaslr_early_init		// parse FDT for KASLR options
-	cbz	x0, 0f				// KASLR disabled? just proceed
-	orr	x23, x23, x0			// record KASLR offset
-	ldp	x29, x30, [sp], #16		// we must enable KASLR, return
-	ret					// to __primary_switch()
-0:
-#endif
-	add	sp, sp, #16
-	mov	x29, #0
-	mov	x30, #0
-	b	start_kernel
-ENDPROC(__primary_switched)
-
-/*
- * end early head section, begin head code that is also used for
- * hotplug and needs to have the same protections as the text region
- */
-	.section ".idmap.text","awx"
-
-ENTRY(kimage_vaddr)
-	.quad		_text - TEXT_OFFSET
-EXPORT_SYMBOL(kimage_vaddr)
-
-/*
- * If we're fortunate enough to boot at EL2, ensure that the world is
- * sane before dropping to EL1.
- *
- * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
- * booted in EL1 or EL2 respectively.
- */
-ENTRY(el2_setup)
-	msr	SPsel, #1			// We want to use SP_EL{1,2}
-	mrs	x0, CurrentEL
-	cmp	x0, #CurrentEL_EL2
-	b.eq	1f
-	mov_q	x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
-	msr	sctlr_el1, x0
-	mov	w0, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1
-	isb
-	ret
-
-1:	mov_q	x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
-	msr	sctlr_el2, x0
-
-#ifdef CONFIG_ARM64_VHE
-	/*
-	 * Check for VHE being present. For the rest of the EL2 setup,
-	 * x2 being non-zero indicates that we do have VHE, and that the
-	 * kernel is intended to run at EL2.
-	 */
-	mrs	x2, id_aa64mmfr1_el1
-	ubfx	x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
-#else
-	mov	x2, xzr
-#endif
-
-	/* Hyp configuration. */
-	mov_q	x0, HCR_HOST_NVHE_FLAGS
-	cbz	x2, set_hcr
-	mov_q	x0, HCR_HOST_VHE_FLAGS
-set_hcr:
-	msr	hcr_el2, x0
-	isb
-
-	/*
-	 * Allow Non-secure EL1 and EL0 to access physical timer and counter.
-	 * This is not necessary for VHE, since the host kernel runs in EL2,
-	 * and EL0 accesses are configured in the later stage of boot process.
-	 * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout
-	 * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined
-	 * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1
-	 * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
-	 * EL2.
-	 */
-	cbnz	x2, 1f
-	mrs	x0, cnthctl_el2
-	orr	x0, x0, #3			// Enable EL1 physical timers
-	msr	cnthctl_el2, x0
-1:
-	msr	cntvoff_el2, xzr		// Clear virtual offset
-
-#ifdef CONFIG_ARM_GIC_V3
-	/* GICv3 system register access */
-	mrs	x0, id_aa64pfr0_el1
-	ubfx	x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
-	cbz	x0, 3f
-
-	mrs_s	x0, SYS_ICC_SRE_EL2
-	orr	x0, x0, #ICC_SRE_EL2_SRE	// Set ICC_SRE_EL2.SRE==1
-	orr	x0, x0, #ICC_SRE_EL2_ENABLE	// Set ICC_SRE_EL2.Enable==1
-	msr_s	SYS_ICC_SRE_EL2, x0
-	isb					// Make sure SRE is now set
-	mrs_s	x0, SYS_ICC_SRE_EL2		// Read SRE back,
-	tbz	x0, #0, 3f			// and check that it sticks
-	msr_s	SYS_ICH_HCR_EL2, xzr		// Reset ICC_HCR_EL2 to defaults
-
-3:
-#endif
-
-	/* Populate ID registers. */
-	mrs	x0, midr_el1
-	mrs	x1, mpidr_el1
-	msr	vpidr_el2, x0
-	msr	vmpidr_el2, x1
-
-#ifdef CONFIG_COMPAT
-	msr	hstr_el2, xzr			// Disable CP15 traps to EL2
-#endif
-
-	/* EL2 debug */
-	mrs	x1, id_aa64dfr0_el1
-	sbfx	x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
-	cmp	x0, #1
-	b.lt	4f				// Skip if no PMU present
-	mrs	x0, pmcr_el0			// Disable debug access traps
-	ubfx	x0, x0, #11, #5			// to EL2 and allow access to
-4:
-	csel	x3, xzr, x0, lt			// all PMU counters from EL1
-
-	/* Statistical profiling */
-	ubfx	x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
-	cbz	x0, 7f				// Skip if SPE not present
-	cbnz	x2, 6f				// VHE?
-	mrs_s	x4, SYS_PMBIDR_EL1		// If SPE available at EL2,
-	and	x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
-	cbnz	x4, 5f				// then permit sampling of physical
-	mov	x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
-		      1 << SYS_PMSCR_EL2_PA_SHIFT)
-	msr_s	SYS_PMSCR_EL2, x4		// addresses and physical counter
-5:
-	mov	x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
-	orr	x3, x3, x1			// If we don't have VHE, then
-	b	7f				// use EL1&0 translation.
-6:						// For VHE, use EL2 translation
-	orr	x3, x3, #MDCR_EL2_TPMS		// and disable access from EL1
-7:
-	msr	mdcr_el2, x3			// Configure debug traps
-
-	/* LORegions */
-	mrs	x1, id_aa64mmfr1_el1
-	ubfx	x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
-	cbz	x0, 1f
-	msr_s	SYS_LORC_EL1, xzr
-1:
-
-	/* Stage-2 translation */
-	msr	vttbr_el2, xzr
-
-	cbz	x2, install_el2_stub
-
-	mov	w0, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
-	isb
-	ret
-
-install_el2_stub:
-	/*
-	 * When VHE is not in use, early init of EL2 and EL1 needs to be
-	 * done here.
-	 * When VHE _is_ in use, EL1 will not be used in the host and
-	 * requires no configuration, and all non-hyp-specific EL2 setup
-	 * will be done via the _EL1 system register aliases in __cpu_setup.
-	 */
-	mov_q	x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
-	msr	sctlr_el1, x0
-
-	/* Coprocessor traps. */
-	mov	x0, #0x33ff
-	msr	cptr_el2, x0			// Disable copro. traps to EL2
-
-	/* SVE register access */
-	mrs	x1, id_aa64pfr0_el1
-	ubfx	x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
-	cbz	x1, 7f
-
-	bic	x0, x0, #CPTR_EL2_TZ		// Also disable SVE traps
-	msr	cptr_el2, x0			// Disable copro. traps to EL2
-	isb
-	mov	x1, #ZCR_ELx_LEN_MASK		// SVE: Enable full vector
-	msr_s	SYS_ZCR_EL2, x1			// length for EL1.
-
-	/* Hypervisor stub */
-7:	adr_l	x0, __hyp_stub_vectors
-	msr	vbar_el2, x0
-
-	/* spsr */
-	mov	x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
-		      PSR_MODE_EL1h)
-	msr	spsr_el2, x0
-	msr	elr_el2, lr
-	mov	w0, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
-	eret
-ENDPROC(el2_setup)
-
-/*
- * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
- * in w0. See arch/arm64/include/asm/virt.h for more info.
- */
-set_cpu_boot_mode_flag:
-	adr_l	x1, __boot_cpu_mode
-	cmp	w0, #BOOT_CPU_MODE_EL2
-	b.ne	1f
-	add	x1, x1, #4
-1:	str	w0, [x1]			// This CPU has booted in EL1
-	dmb	sy
-	dc	ivac, x1			// Invalidate potentially stale cache line
-	ret
-ENDPROC(set_cpu_boot_mode_flag)
-
-/*
- * These values are written with the MMU off, but read with the MMU on.
- * Writers will invalidate the corresponding address, discarding up to a
- * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures
- * sufficient alignment that the CWG doesn't overlap another section.
- */
-	.pushsection ".mmuoff.data.write", "aw"
-/*
- * We need to find out the CPU boot mode long after boot, so we need to
- * store it in a writable variable.
- *
- * This is not in .bss, because we set it sufficiently early that the boot-time
- * zeroing of .bss would clobber it.
- */
-ENTRY(__boot_cpu_mode)
-	.long	BOOT_CPU_MODE_EL2
-	.long	BOOT_CPU_MODE_EL1
-/*
- * The booting CPU updates the failed status @__early_cpu_boot_status,
- * with MMU turned off.
- */
-ENTRY(__early_cpu_boot_status)
-	.quad 	0
-
-	.popsection
-
-	/*
-	 * This provides a "holding pen" for platforms to hold all secondary
-	 * cores are held until we're ready for them to initialise.
-	 */
-ENTRY(secondary_holding_pen)
-	bl	el2_setup			// Drop to EL1, w0=cpu_boot_mode
-	bl	set_cpu_boot_mode_flag
-	mrs	x0, mpidr_el1
-	mov_q	x1, MPIDR_HWID_BITMASK
-	and	x0, x0, x1
-	adr_l	x3, secondary_holding_pen_release
-pen:	ldr	x4, [x3]
-	cmp	x4, x0
-	b.eq	secondary_startup
-	wfe
-	b	pen
-ENDPROC(secondary_holding_pen)
-
-	/*
-	 * Secondary entry point that jumps straight into the kernel. Only to
-	 * be used where CPUs are brought online dynamically by the kernel.
-	 */
-ENTRY(secondary_entry)
-	bl	el2_setup			// Drop to EL1
-	bl	set_cpu_boot_mode_flag
-	b	secondary_startup
-ENDPROC(secondary_entry)
-
-secondary_startup:
-	/*
-	 * Common entry point for secondary CPUs.
-	 */
-	bl	__cpu_secondary_check52bitva
-	bl	__cpu_setup			// initialise processor
-	adrp	x1, swapper_pg_dir
-	bl	__enable_mmu
-	ldr	x8, =__secondary_switched
-	br	x8
-ENDPROC(secondary_startup)
-
-__secondary_switched:
-	adr_l	x5, vectors
-	msr	vbar_el1, x5
-	isb
-
-	adr_l	x0, secondary_data
-	ldr	x1, [x0, #CPU_BOOT_STACK]	// get secondary_data.stack
-	cbz	x1, __secondary_too_slow
-	mov	sp, x1
-	ldr	x2, [x0, #CPU_BOOT_TASK]
-	cbz	x2, __secondary_too_slow
-	msr	sp_el0, x2
-	mov	x29, #0
-	mov	x30, #0
-	b	secondary_start_kernel
-ENDPROC(__secondary_switched)
-
-__secondary_too_slow:
-	wfe
-	wfi
-	b	__secondary_too_slow
-ENDPROC(__secondary_too_slow)
-
-/*
- * The booting CPU updates the failed status @__early_cpu_boot_status,
- * with MMU turned off.
- *
- * update_early_cpu_boot_status tmp, status
- *  - Corrupts tmp1, tmp2
- *  - Writes 'status' to __early_cpu_boot_status and makes sure
- *    it is committed to memory.
- */
-
-	.macro	update_early_cpu_boot_status status, tmp1, tmp2
-	mov	\tmp2, #\status
-	adr_l	\tmp1, __early_cpu_boot_status
-	str	\tmp2, [\tmp1]
-	dmb	sy
-	dc	ivac, \tmp1			// Invalidate potentially stale cache line
-	.endm
-
-/*
- * Enable the MMU.
- *
- *  x0  = SCTLR_EL1 value for turning on the MMU.
- *  x1  = TTBR1_EL1 value
- *
- * Returns to the caller via x30/lr. This requires the caller to be covered
- * by the .idmap.text section.
- *
- * Checks if the selected granule size is supported by the CPU.
- * If it isn't, park the CPU
- */
-ENTRY(__enable_mmu)
-	mrs	x2, ID_AA64MMFR0_EL1
-	ubfx	x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4
-	cmp	x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
-	b.ne	__no_granule_support
-	update_early_cpu_boot_status 0, x2, x3
-	adrp	x2, idmap_pg_dir
-	phys_to_ttbr x1, x1
-	phys_to_ttbr x2, x2
-	msr	ttbr0_el1, x2			// load TTBR0
-	offset_ttbr1 x1, x3
-	msr	ttbr1_el1, x1			// load TTBR1
-	isb
-	msr	sctlr_el1, x0
-	isb
-	/*
-	 * Invalidate the local I-cache so that any instructions fetched
-	 * speculatively from the PoC are discarded, since they may have
-	 * been dynamically patched at the PoU.
-	 */
-	ic	iallu
-	dsb	nsh
-	isb
-	ret
-ENDPROC(__enable_mmu)
-
-ENTRY(__cpu_secondary_check52bitva)
-#ifdef CONFIG_ARM64_VA_BITS_52
-	ldr_l	x0, vabits_actual
-	cmp	x0, #52
-	b.ne	2f
-
-	mrs_s	x0, SYS_ID_AA64MMFR2_EL1
-	and	x0, x0, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
-	cbnz	x0, 2f
-
-	update_early_cpu_boot_status \
-		CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_52_BIT_VA, x0, x1
-1:	wfe
-	wfi
-	b	1b
-
-#endif
-2:	ret
-ENDPROC(__cpu_secondary_check52bitva)
-
-__no_granule_support:
-	/* Indicate that this CPU can't boot and is stuck in the kernel */
-	update_early_cpu_boot_status \
-		CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_NO_GRAN, x1, x2
-1:
-	wfe
-	wfi
-	b	1b
-ENDPROC(__no_granule_support)
-
-#ifdef CONFIG_RELOCATABLE
-__relocate_kernel:
-	/*
-	 * Iterate over each entry in the relocation table, and apply the
-	 * relocations in place.
-	 */
-	ldr	w9, =__rela_offset		// offset to reloc table
-	ldr	w10, =__rela_size		// size of reloc table
-
-	mov_q	x11, KIMAGE_VADDR		// default virtual offset
-	add	x11, x11, x23			// actual virtual offset
-	add	x9, x9, x11			// __va(.rela)
-	add	x10, x9, x10			// __va(.rela) + sizeof(.rela)
-
-0:	cmp	x9, x10
-	b.hs	1f
-	ldp	x12, x13, [x9], #24
-	ldr	x14, [x9, #-8]
-	cmp	w13, #R_AARCH64_RELATIVE
-	b.ne	0b
-	add	x14, x14, x23			// relocate
-	str	x14, [x12, x23]
-	b	0b
-
-1:
-#ifdef CONFIG_RELR
-	/*
-	 * Apply RELR relocations.
-	 *
-	 * RELR is a compressed format for storing relative relocations. The
-	 * encoded sequence of entries looks like:
-	 * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
-	 *
-	 * i.e. start with an address, followed by any number of bitmaps. The
-	 * address entry encodes 1 relocation. The subsequent bitmap entries
-	 * encode up to 63 relocations each, at subsequent offsets following
-	 * the last address entry.
-	 *
-	 * The bitmap entries must have 1 in the least significant bit. The
-	 * assumption here is that an address cannot have 1 in lsb. Odd
-	 * addresses are not supported. Any odd addresses are stored in the RELA
-	 * section, which is handled above.
-	 *
-	 * Excluding the least significant bit in the bitmap, each non-zero
-	 * bit in the bitmap represents a relocation to be applied to
-	 * a corresponding machine word that follows the base address
-	 * word. The second least significant bit represents the machine
-	 * word immediately following the initial address, and each bit
-	 * that follows represents the next word, in linear order. As such,
-	 * a single bitmap can encode up to 63 relocations in a 64-bit object.
-	 *
-	 * In this implementation we store the address of the next RELR table
-	 * entry in x9, the address being relocated by the current address or
-	 * bitmap entry in x13 and the address being relocated by the current
-	 * bit in x14.
-	 *
-	 * Because addends are stored in place in the binary, RELR relocations
-	 * cannot be applied idempotently. We use x24 to keep track of the
-	 * currently applied displacement so that we can correctly relocate if
-	 * __relocate_kernel is called twice with non-zero displacements (i.e.
-	 * if there is both a physical misalignment and a KASLR displacement).
-	 */
-	ldr	w9, =__relr_offset		// offset to reloc table
-	ldr	w10, =__relr_size		// size of reloc table
-	add	x9, x9, x11			// __va(.relr)
-	add	x10, x9, x10			// __va(.relr) + sizeof(.relr)
-
-	sub	x15, x23, x24			// delta from previous offset
-	cbz	x15, 7f				// nothing to do if unchanged
-	mov	x24, x23			// save new offset
-
-2:	cmp	x9, x10
-	b.hs	7f
-	ldr	x11, [x9], #8
-	tbnz	x11, #0, 3f			// branch to handle bitmaps
-	add	x13, x11, x23
-	ldr	x12, [x13]			// relocate address entry
-	add	x12, x12, x15
-	str	x12, [x13], #8			// adjust to start of bitmap
-	b	2b
-
-3:	mov	x14, x13
-4:	lsr	x11, x11, #1
-	cbz	x11, 6f
-	tbz	x11, #0, 5f			// skip bit if not set
-	ldr	x12, [x14]			// relocate bit
-	add	x12, x12, x15
-	str	x12, [x14]
-
-5:	add	x14, x14, #8			// move to next bit's address
-	b	4b
-
-6:	/*
-	 * Move to the next bitmap's address. 8 is the word size, and 63 is the
-	 * number of significant bits in a bitmap entry.
-	 */
-	add	x13, x13, #(8 * 63)
-	b	2b
-
-7:
-#endif
-	ret
-
-ENDPROC(__relocate_kernel)
-#endif
-
-__primary_switch:
-#ifdef CONFIG_RANDOMIZE_BASE
-	mov	x19, x0				// preserve new SCTLR_EL1 value
-	mrs	x20, sctlr_el1			// preserve old SCTLR_EL1 value
-#endif
-
-	adrp	x1, init_pg_dir
-	bl	__enable_mmu
-#ifdef CONFIG_RELOCATABLE
-#ifdef CONFIG_RELR
-	mov	x24, #0				// no RELR displacement yet
-#endif
-	bl	__relocate_kernel
-#ifdef CONFIG_RANDOMIZE_BASE
-	ldr	x8, =__primary_switched
-	adrp	x0, __PHYS_OFFSET
-	blr	x8
-
-	/*
-	 * If we return here, we have a KASLR displacement in x23 which we need
-	 * to take into account by discarding the current kernel mapping and
-	 * creating a new one.
-	 */
-	pre_disable_mmu_workaround
-	msr	sctlr_el1, x20			// disable the MMU
-	isb
-	bl	__create_page_tables		// recreate kernel mapping
-
-	tlbi	vmalle1				// Remove any stale TLB entries
-	dsb	nsh
-	isb
-
-	msr	sctlr_el1, x19			// re-enable the MMU
-	isb
-	ic	iallu				// flush instructions fetched
-	dsb	nsh				// via old mapping
-	isb
-
-	bl	__relocate_kernel
-#endif
-#endif
-	ldr	x8, =__primary_switched
-	adrp	x0, __PHYS_OFFSET
-	br	x8
-ENDPROC(__primary_switch)
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
deleted file mode 100644
index 38bcd4d4e43bb1314f103f5ac477e1143c019240..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/hibernate-asm.S
+++ /dev/null
@@ -1,168 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Hibernate low-level support
- *
- * Copyright (C) 2016 ARM Ltd.
- * Author:	James Morse <james.morse@arm.com>
- */
-#include <linux/linkage.h>
-#include <linux/errno.h>
-
-#include <asm/asm-offsets.h>
-#include <asm/assembler.h>
-#include <asm/cputype.h>
-#include <asm/memory.h>
-#include <asm/page.h>
-#include <asm/virt.h>
-
-/*
- * To prevent the possibility of old and new partial table walks being visible
- * in the tlb, switch the ttbr to a zero page when we invalidate the old
- * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i
- * Even switching to our copied tables will cause a changed output address at
- * each stage of the walk.
- */
-.macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2
-	phys_to_ttbr \tmp, \zero_page
-	msr	ttbr1_el1, \tmp
-	isb
-	tlbi	vmalle1
-	dsb	nsh
-	phys_to_ttbr \tmp, \page_table
-	offset_ttbr1 \tmp, \tmp2
-	msr	ttbr1_el1, \tmp
-	isb
-.endm
-
-
-/*
- * Resume from hibernate
- *
- * Loads temporary page tables then restores the memory image.
- * Finally branches to cpu_resume() to restore the state saved by
- * swsusp_arch_suspend().
- *
- * Because this code has to be copied to a 'safe' page, it can't call out to
- * other functions by PC-relative address. Also remember that it may be
- * mid-way through over-writing other functions. For this reason it contains
- * code from flush_icache_range() and uses the copy_page() macro.
- *
- * This 'safe' page is mapped via ttbr0, and executed from there. This function
- * switches to a copy of the linear map in ttbr1, performs the restore, then
- * switches ttbr1 to the original kernel's swapper_pg_dir.
- *
- * All of memory gets written to, including code. We need to clean the kernel
- * text to the Point of Coherence (PoC) before secondary cores can be booted.
- * Because the kernel modules and executable pages mapped to user space are
- * also written as data, we clean all pages we touch to the Point of
- * Unification (PoU).
- *
- * x0: physical address of temporary page tables
- * x1: physical address of swapper page tables
- * x2: address of cpu_resume
- * x3: linear map address of restore_pblist in the current kernel
- * x4: physical address of __hyp_stub_vectors, or 0
- * x5: physical address of a  zero page that remains zero after resume
- */
-.pushsection    ".hibernate_exit.text", "ax"
-ENTRY(swsusp_arch_suspend_exit)
-	/*
-	 * We execute from ttbr0, change ttbr1 to our copied linear map tables
-	 * with a break-before-make via the zero page
-	 */
-	break_before_make_ttbr_switch	x5, x0, x6, x8
-
-	mov	x21, x1
-	mov	x30, x2
-	mov	x24, x4
-	mov	x25, x5
-
-	/* walk the restore_pblist and use copy_page() to over-write memory */
-	mov	x19, x3
-
-1:	ldr	x10, [x19, #HIBERN_PBE_ORIG]
-	mov	x0, x10
-	ldr	x1, [x19, #HIBERN_PBE_ADDR]
-
-	copy_page	x0, x1, x2, x3, x4, x5, x6, x7, x8, x9
-
-	add	x1, x10, #PAGE_SIZE
-	/* Clean the copied page to PoU - based on flush_icache_range() */
-	raw_dcache_line_size x2, x3
-	sub	x3, x2, #1
-	bic	x4, x10, x3
-2:	dc	cvau, x4	/* clean D line / unified line */
-	add	x4, x4, x2
-	cmp	x4, x1
-	b.lo	2b
-
-	ldr	x19, [x19, #HIBERN_PBE_NEXT]
-	cbnz	x19, 1b
-	dsb	ish		/* wait for PoU cleaning to finish */
-
-	/* switch to the restored kernels page tables */
-	break_before_make_ttbr_switch	x25, x21, x6, x8
-
-	ic	ialluis
-	dsb	ish
-	isb
-
-	cbz	x24, 3f		/* Do we need to re-initialise EL2? */
-	hvc	#0
-3:	ret
-
-	.ltorg
-ENDPROC(swsusp_arch_suspend_exit)
-
-/*
- * Restore the hyp stub.
- * This must be done before the hibernate page is unmapped by _cpu_resume(),
- * but happens before any of the hyp-stub's code is cleaned to PoC.
- *
- * x24: The physical address of __hyp_stub_vectors
- */
-el1_sync:
-	msr	vbar_el2, x24
-	eret
-ENDPROC(el1_sync)
-
-.macro invalid_vector	label
-\label:
-	b \label
-ENDPROC(\label)
-.endm
-
-	invalid_vector	el2_sync_invalid
-	invalid_vector	el2_irq_invalid
-	invalid_vector	el2_fiq_invalid
-	invalid_vector	el2_error_invalid
-	invalid_vector	el1_sync_invalid
-	invalid_vector	el1_irq_invalid
-	invalid_vector	el1_fiq_invalid
-	invalid_vector	el1_error_invalid
-
-/* el2 vectors - switch el2 here while we restore the memory image. */
-	.align 11
-ENTRY(hibernate_el2_vectors)
-	ventry	el2_sync_invalid		// Synchronous EL2t
-	ventry	el2_irq_invalid			// IRQ EL2t
-	ventry	el2_fiq_invalid			// FIQ EL2t
-	ventry	el2_error_invalid		// Error EL2t
-
-	ventry	el2_sync_invalid		// Synchronous EL2h
-	ventry	el2_irq_invalid			// IRQ EL2h
-	ventry	el2_fiq_invalid			// FIQ EL2h
-	ventry	el2_error_invalid		// Error EL2h
-
-	ventry	el1_sync			// Synchronous 64-bit EL1
-	ventry	el1_irq_invalid			// IRQ 64-bit EL1
-	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
-	ventry	el1_error_invalid		// Error 64-bit EL1
-
-	ventry	el1_sync_invalid		// Synchronous 32-bit EL1
-	ventry	el1_irq_invalid			// IRQ 32-bit EL1
-	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
-	ventry	el1_error_invalid		// Error 32-bit EL1
-END(hibernate_el2_vectors)
-
-.popsection
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
deleted file mode 100644
index 73d46070b31500117ade72ea1d2a6c2ed31feb2b..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/hyp-stub.S
+++ /dev/null
@@ -1,120 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Hypervisor stub
- *
- * Copyright (C) 2012 ARM Ltd.
- * Author:	Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <linux/irqchip/arm-gic-v3.h>
-
-#include <asm/assembler.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
-#include <asm/ptrace.h>
-#include <asm/virt.h>
-
-	.text
-	.pushsection	.hyp.text, "ax"
-
-	.align 11
-
-ENTRY(__hyp_stub_vectors)
-	ventry	el2_sync_invalid		// Synchronous EL2t
-	ventry	el2_irq_invalid			// IRQ EL2t
-	ventry	el2_fiq_invalid			// FIQ EL2t
-	ventry	el2_error_invalid		// Error EL2t
-
-	ventry	el2_sync_invalid		// Synchronous EL2h
-	ventry	el2_irq_invalid			// IRQ EL2h
-	ventry	el2_fiq_invalid			// FIQ EL2h
-	ventry	el2_error_invalid		// Error EL2h
-
-	ventry	el1_sync			// Synchronous 64-bit EL1
-	ventry	el1_irq_invalid			// IRQ 64-bit EL1
-	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
-	ventry	el1_error_invalid		// Error 64-bit EL1
-
-	ventry	el1_sync_invalid		// Synchronous 32-bit EL1
-	ventry	el1_irq_invalid			// IRQ 32-bit EL1
-	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
-	ventry	el1_error_invalid		// Error 32-bit EL1
-ENDPROC(__hyp_stub_vectors)
-
-	.align 11
-
-el1_sync:
-	cmp	x0, #HVC_SET_VECTORS
-	b.ne	2f
-	msr	vbar_el2, x1
-	b	9f
-
-2:	cmp	x0, #HVC_SOFT_RESTART
-	b.ne	3f
-	mov	x0, x2
-	mov	x2, x4
-	mov	x4, x1
-	mov	x1, x3
-	br	x4				// no return
-
-3:	cmp	x0, #HVC_RESET_VECTORS
-	beq	9f				// Nothing to reset!
-
-	/* Someone called kvm_call_hyp() against the hyp-stub... */
-	ldr	x0, =HVC_STUB_ERR
-	eret
-
-9:	mov	x0, xzr
-	eret
-ENDPROC(el1_sync)
-
-.macro invalid_vector	label
-\label:
-	b \label
-ENDPROC(\label)
-.endm
-
-	invalid_vector	el2_sync_invalid
-	invalid_vector	el2_irq_invalid
-	invalid_vector	el2_fiq_invalid
-	invalid_vector	el2_error_invalid
-	invalid_vector	el1_sync_invalid
-	invalid_vector	el1_irq_invalid
-	invalid_vector	el1_fiq_invalid
-	invalid_vector	el1_error_invalid
-
-/*
- * __hyp_set_vectors: Call this after boot to set the initial hypervisor
- * vectors as part of hypervisor installation.  On an SMP system, this should
- * be called on each CPU.
- *
- * x0 must be the physical address of the new vector table, and must be
- * 2KB aligned.
- *
- * Before calling this, you must check that the stub hypervisor is installed
- * everywhere, by waiting for any secondary CPUs to be brought up and then
- * checking that is_hyp_mode_available() is true.
- *
- * If not, there is a pre-existing hypervisor, some CPUs failed to boot, or
- * something else went wrong... in such cases, trying to install a new
- * hypervisor is unlikely to work as desired.
- *
- * When you call into your shiny new hypervisor, sp_el2 will contain junk,
- * so you will need to set that to something sensible at the new hypervisor's
- * initialisation entry point.
- */
-
-ENTRY(__hyp_set_vectors)
-	mov	x1, x0
-	mov	x0, #HVC_SET_VECTORS
-	hvc	#0
-	ret
-ENDPROC(__hyp_set_vectors)
-
-ENTRY(__hyp_reset_vectors)
-	mov	x0, #HVC_RESET_VECTORS
-	hvc	#0
-	ret
-ENDPROC(__hyp_reset_vectors)
diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
deleted file mode 100644
index 42bd8c0c60e09d66fbd53aa73db820d38d18d1e0..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/kuser32.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * AArch32 user helpers.
- * Based on the kuser helpers in arch/arm/kernel/entry-armv.S.
- *
- * Copyright (C) 2005-2011 Nicolas Pitre <nico@fluxnic.net>
- * Copyright (C) 2012-2018 ARM Ltd.
- *
- * The kuser helpers below are mapped at a fixed address by
- * aarch32_setup_additional_pages() and are provided for compatibility
- * reasons with 32 bit (aarch32) applications that need them.
- *
- * See Documentation/arm/kernel_user_helpers.rst for formal definitions.
- */
-
-#include <asm/unistd.h>
-
-	.align	5
-	.globl	__kuser_helper_start
-__kuser_helper_start:
-
-__kuser_cmpxchg64:			// 0xffff0f60
-	.inst	0xe92d00f0		//	push		{r4, r5, r6, r7}
-	.inst	0xe1c040d0		//	ldrd		r4, r5, [r0]
-	.inst	0xe1c160d0		//	ldrd		r6, r7, [r1]
-	.inst	0xe1b20f9f		// 1:	ldrexd		r0, r1, [r2]
-	.inst	0xe0303004		//	eors		r3, r0, r4
-	.inst	0x00313005		//	eoreqs		r3, r1, r5
-	.inst	0x01a23e96		//	stlexdeq	r3, r6, [r2]
-	.inst	0x03330001		//	teqeq		r3, #1
-	.inst	0x0afffff9		//	beq		1b
-	.inst	0xf57ff05b		//	dmb		ish
-	.inst	0xe2730000		//	rsbs		r0, r3, #0
-	.inst	0xe8bd00f0		//	pop		{r4, r5, r6, r7}
-	.inst	0xe12fff1e		//	bx		lr
-
-	.align	5
-__kuser_memory_barrier:			// 0xffff0fa0
-	.inst	0xf57ff05b		//	dmb		ish
-	.inst	0xe12fff1e		//	bx		lr
-
-	.align	5
-__kuser_cmpxchg:			// 0xffff0fc0
-	.inst	0xe1923f9f		// 1:	ldrex		r3, [r2]
-	.inst	0xe0533000		//	subs		r3, r3, r0
-	.inst	0x01823e91		//	stlexeq		r3, r1, [r2]
-	.inst	0x03330001		//	teqeq		r3, #1
-	.inst	0x0afffffa		//	beq		1b
-	.inst	0xf57ff05b		//	dmb		ish
-	.inst	0xe2730000		//	rsbs		r0, r3, #0
-	.inst	0xe12fff1e		//	bx		lr
-
-	.align	5
-__kuser_get_tls:			// 0xffff0fe0
-	.inst	0xee1d0f70		//	mrc		p15, 0, r0, c13, c0, 3
-	.inst	0xe12fff1e		//	bx		lr
-	.rep	5
-	.word	0
-	.endr
-
-__kuser_helper_version:			// 0xffff0ffc
-	.word	((__kuser_helper_end - __kuser_helper_start) >> 5)
-	.globl	__kuser_helper_end
-__kuser_helper_end:
diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S
deleted file mode 100644
index 45dce03aaeafc12ab196d6b444e9331654b7fad2..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/probes/kprobes_trampoline.S
+++ /dev/null
@@ -1,82 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * trampoline entry and return code for kretprobes.
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/assembler.h>
-
-	.text
-
-	.macro	save_all_base_regs
-	stp x0, x1, [sp, #S_X0]
-	stp x2, x3, [sp, #S_X2]
-	stp x4, x5, [sp, #S_X4]
-	stp x6, x7, [sp, #S_X6]
-	stp x8, x9, [sp, #S_X8]
-	stp x10, x11, [sp, #S_X10]
-	stp x12, x13, [sp, #S_X12]
-	stp x14, x15, [sp, #S_X14]
-	stp x16, x17, [sp, #S_X16]
-	stp x18, x19, [sp, #S_X18]
-	stp x20, x21, [sp, #S_X20]
-	stp x22, x23, [sp, #S_X22]
-	stp x24, x25, [sp, #S_X24]
-	stp x26, x27, [sp, #S_X26]
-	stp x28, x29, [sp, #S_X28]
-	add x0, sp, #S_FRAME_SIZE
-	stp lr, x0, [sp, #S_LR]
-	/*
-	 * Construct a useful saved PSTATE
-	 */
-	mrs x0, nzcv
-	mrs x1, daif
-	orr x0, x0, x1
-	mrs x1, CurrentEL
-	orr x0, x0, x1
-	mrs x1, SPSel
-	orr x0, x0, x1
-	stp xzr, x0, [sp, #S_PC]
-	.endm
-
-	.macro	restore_all_base_regs
-	ldr x0, [sp, #S_PSTATE]
-	and x0, x0, #(PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT)
-	msr nzcv, x0
-	ldp x0, x1, [sp, #S_X0]
-	ldp x2, x3, [sp, #S_X2]
-	ldp x4, x5, [sp, #S_X4]
-	ldp x6, x7, [sp, #S_X6]
-	ldp x8, x9, [sp, #S_X8]
-	ldp x10, x11, [sp, #S_X10]
-	ldp x12, x13, [sp, #S_X12]
-	ldp x14, x15, [sp, #S_X14]
-	ldp x16, x17, [sp, #S_X16]
-	ldp x18, x19, [sp, #S_X18]
-	ldp x20, x21, [sp, #S_X20]
-	ldp x22, x23, [sp, #S_X22]
-	ldp x24, x25, [sp, #S_X24]
-	ldp x26, x27, [sp, #S_X26]
-	ldp x28, x29, [sp, #S_X28]
-	.endm
-
-ENTRY(kretprobe_trampoline)
-	sub sp, sp, #S_FRAME_SIZE
-
-	save_all_base_regs
-
-	mov x0, sp
-	bl trampoline_probe_handler
-	/*
-	 * Replace trampoline address in lr with actual orig_ret_addr return
-	 * address.
-	 */
-	mov lr, x0
-
-	restore_all_base_regs
-
-	add sp, sp, #S_FRAME_SIZE
-	ret
-
-ENDPROC(kretprobe_trampoline)
diff --git a/arch/arm64/kernel/reloc_test_syms.S b/arch/arm64/kernel/reloc_test_syms.S
deleted file mode 100644
index 16a34f188f2672d01298b42cc7385da6c78bc14b..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/reloc_test_syms.S
+++ /dev/null
@@ -1,85 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-
-ENTRY(absolute_data64)
-	ldr	x0, 0f
-	ret
-0:	.quad	sym64_abs
-ENDPROC(absolute_data64)
-
-ENTRY(absolute_data32)
-	ldr	w0, 0f
-	ret
-0:	.long	sym32_abs
-ENDPROC(absolute_data32)
-
-ENTRY(absolute_data16)
-	adr	x0, 0f
-	ldrh	w0, [x0]
-	ret
-0:	.short	sym16_abs, 0
-ENDPROC(absolute_data16)
-
-ENTRY(signed_movw)
-	movz	x0, #:abs_g2_s:sym64_abs
-	movk	x0, #:abs_g1_nc:sym64_abs
-	movk	x0, #:abs_g0_nc:sym64_abs
-	ret
-ENDPROC(signed_movw)
-
-ENTRY(unsigned_movw)
-	movz	x0, #:abs_g3:sym64_abs
-	movk	x0, #:abs_g2_nc:sym64_abs
-	movk	x0, #:abs_g1_nc:sym64_abs
-	movk	x0, #:abs_g0_nc:sym64_abs
-	ret
-ENDPROC(unsigned_movw)
-
-	.align	12
-	.space	0xff8
-ENTRY(relative_adrp)
-	adrp	x0, sym64_rel
-	add	x0, x0, #:lo12:sym64_rel
-	ret
-ENDPROC(relative_adrp)
-
-	.align	12
-	.space	0xffc
-ENTRY(relative_adrp_far)
-	adrp	x0, memstart_addr
-	add	x0, x0, #:lo12:memstart_addr
-	ret
-ENDPROC(relative_adrp_far)
-
-ENTRY(relative_adr)
-	adr	x0, sym64_rel
-	ret
-ENDPROC(relative_adr)
-
-ENTRY(relative_data64)
-	adr	x1, 0f
-	ldr	x0, [x1]
-	add	x0, x0, x1
-	ret
-0:	.quad	sym64_rel - .
-ENDPROC(relative_data64)
-
-ENTRY(relative_data32)
-	adr	x1, 0f
-	ldr	w0, [x1]
-	add	x0, x0, x1
-	ret
-0:	.long	sym64_rel - .
-ENDPROC(relative_data32)
-
-ENTRY(relative_data16)
-	adr	x1, 0f
-	ldrsh	w0, [x1]
-	add	x0, x0, x1
-	ret
-0:	.short	sym64_rel - ., 0
-ENDPROC(relative_data16)
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
deleted file mode 100644
index c1d7db71a7269c622c11f181ec5929a5fa351a7f..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/relocate_kernel.S
+++ /dev/null
@@ -1,129 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * kexec for arm64
- *
- * Copyright (C) Linaro.
- * Copyright (C) Huawei Futurewei Technologies.
- */
-
-#include <linux/kexec.h>
-#include <linux/linkage.h>
-
-#include <asm/assembler.h>
-#include <asm/kexec.h>
-#include <asm/page.h>
-#include <asm/sysreg.h>
-
-/*
- * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
- *
- * The memory that the old kernel occupies may be overwritten when coping the
- * new image to its final location.  To assure that the
- * arm64_relocate_new_kernel routine which does that copy is not overwritten,
- * all code and data needed by arm64_relocate_new_kernel must be between the
- * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end.  The
- * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
- * control_code_page, a special page which has been set up to be preserved
- * during the copy operation.
- */
-ENTRY(arm64_relocate_new_kernel)
-
-	/* Setup the list loop variables. */
-	mov	x18, x2				/* x18 = dtb address */
-	mov	x17, x1				/* x17 = kimage_start */
-	mov	x16, x0				/* x16 = kimage_head */
-	raw_dcache_line_size x15, x0		/* x15 = dcache line size */
-	mov	x14, xzr			/* x14 = entry ptr */
-	mov	x13, xzr			/* x13 = copy dest */
-
-	/* Clear the sctlr_el2 flags. */
-	mrs	x0, CurrentEL
-	cmp	x0, #CurrentEL_EL2
-	b.ne	1f
-	mrs	x0, sctlr_el2
-	ldr	x1, =SCTLR_ELx_FLAGS
-	bic	x0, x0, x1
-	pre_disable_mmu_workaround
-	msr	sctlr_el2, x0
-	isb
-1:
-
-	/* Check if the new image needs relocation. */
-	tbnz	x16, IND_DONE_BIT, .Ldone
-
-.Lloop:
-	and	x12, x16, PAGE_MASK		/* x12 = addr */
-
-	/* Test the entry flags. */
-.Ltest_source:
-	tbz	x16, IND_SOURCE_BIT, .Ltest_indirection
-
-	/* Invalidate dest page to PoC. */
-	mov     x0, x13
-	add     x20, x0, #PAGE_SIZE
-	sub     x1, x15, #1
-	bic     x0, x0, x1
-2:	dc      ivac, x0
-	add     x0, x0, x15
-	cmp     x0, x20
-	b.lo    2b
-	dsb     sy
-
-	mov x20, x13
-	mov x21, x12
-	copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7
-
-	/* dest += PAGE_SIZE */
-	add	x13, x13, PAGE_SIZE
-	b	.Lnext
-
-.Ltest_indirection:
-	tbz	x16, IND_INDIRECTION_BIT, .Ltest_destination
-
-	/* ptr = addr */
-	mov	x14, x12
-	b	.Lnext
-
-.Ltest_destination:
-	tbz	x16, IND_DESTINATION_BIT, .Lnext
-
-	/* dest = addr */
-	mov	x13, x12
-
-.Lnext:
-	/* entry = *ptr++ */
-	ldr	x16, [x14], #8
-
-	/* while (!(entry & DONE)) */
-	tbz	x16, IND_DONE_BIT, .Lloop
-
-.Ldone:
-	/* wait for writes from copy_page to finish */
-	dsb	nsh
-	ic	iallu
-	dsb	nsh
-	isb
-
-	/* Start new image. */
-	mov	x0, x18
-	mov	x1, xzr
-	mov	x2, xzr
-	mov	x3, xzr
-	br	x17
-
-ENDPROC(arm64_relocate_new_kernel)
-
-.ltorg
-
-.align 3	/* To keep the 64-bit values below naturally aligned. */
-
-.Lcopy_end:
-.org	KEXEC_CONTROL_PAGE_SIZE
-
-/*
- * arm64_relocate_new_kernel_size - Number of bytes to copy to the
- * control_code_page.
- */
-.globl arm64_relocate_new_kernel_size
-arm64_relocate_new_kernel_size:
-	.quad	.Lcopy_end - arm64_relocate_new_kernel
diff --git a/arch/arm64/kernel/sigreturn32.S b/arch/arm64/kernel/sigreturn32.S
deleted file mode 100644
index 475d30d471ac1634364bab74e7f3d58c0dfc1fb6..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/sigreturn32.S
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * AArch32 sigreturn code.
- * Based on the kuser helpers in arch/arm/kernel/entry-armv.S.
- *
- * Copyright (C) 2005-2011 Nicolas Pitre <nico@fluxnic.net>
- * Copyright (C) 2012-2018 ARM Ltd.
- *
- * For ARM syscalls, the syscall number has to be loaded into r7.
- * We do not support an OABI userspace.
- *
- * For Thumb syscalls, we also pass the syscall number via r7. We therefore
- * need two 16-bit instructions.
- */
-
-#include <asm/unistd.h>
-
-	.globl __aarch32_sigret_code_start
-__aarch32_sigret_code_start:
-
-	/*
-	 * ARM Code
-	 */
-	.byte	__NR_compat_sigreturn, 0x70, 0xa0, 0xe3		// mov	r7, #__NR_compat_sigreturn
-	.byte	__NR_compat_sigreturn, 0x00, 0x00, 0xef		// svc	#__NR_compat_sigreturn
-
-	/*
-	 * Thumb code
-	 */
-	.byte	__NR_compat_sigreturn, 0x27			// svc	#__NR_compat_sigreturn
-	.byte	__NR_compat_sigreturn, 0xdf			// mov	r7, #__NR_compat_sigreturn
-
-	/*
-	 * ARM code
-	 */
-	.byte	__NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3	// mov	r7, #__NR_compat_rt_sigreturn
-	.byte	__NR_compat_rt_sigreturn, 0x00, 0x00, 0xef	// svc	#__NR_compat_rt_sigreturn
-
-	/*
-	 * Thumb code
-	 */
-	.byte	__NR_compat_rt_sigreturn, 0x27			// svc	#__NR_compat_rt_sigreturn
-	.byte	__NR_compat_rt_sigreturn, 0xdf			// mov	r7, #__NR_compat_rt_sigreturn
-
-        .globl __aarch32_sigret_code_end
-__aarch32_sigret_code_end:
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
deleted file mode 100644
index f5b04dd8a7107275b7cd145a9c29c38a8dd98cf4..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/sleep.S
+++ /dev/null
@@ -1,148 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/errno.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/assembler.h>
-
-	.text
-/*
- * Implementation of MPIDR_EL1 hash algorithm through shifting
- * and OR'ing.
- *
- * @dst: register containing hash result
- * @rs0: register containing affinity level 0 bit shift
- * @rs1: register containing affinity level 1 bit shift
- * @rs2: register containing affinity level 2 bit shift
- * @rs3: register containing affinity level 3 bit shift
- * @mpidr: register containing MPIDR_EL1 value
- * @mask: register containing MPIDR mask
- *
- * Pseudo C-code:
- *
- *u32 dst;
- *
- *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 rs3, u64 mpidr, u64 mask) {
- *	u32 aff0, aff1, aff2, aff3;
- *	u64 mpidr_masked = mpidr & mask;
- *	aff0 = mpidr_masked & 0xff;
- *	aff1 = mpidr_masked & 0xff00;
- *	aff2 = mpidr_masked & 0xff0000;
- *	aff3 = mpidr_masked & 0xff00000000;
- *	dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3);
- *}
- * Input registers: rs0, rs1, rs2, rs3, mpidr, mask
- * Output register: dst
- * Note: input and output registers must be disjoint register sets
-         (eg: a macro instance with mpidr = x1 and dst = x1 is invalid)
- */
-	.macro compute_mpidr_hash dst, rs0, rs1, rs2, rs3, mpidr, mask
-	and	\mpidr, \mpidr, \mask		// mask out MPIDR bits
-	and	\dst, \mpidr, #0xff		// mask=aff0
-	lsr	\dst ,\dst, \rs0		// dst=aff0>>rs0
-	and	\mask, \mpidr, #0xff00		// mask = aff1
-	lsr	\mask ,\mask, \rs1
-	orr	\dst, \dst, \mask		// dst|=(aff1>>rs1)
-	and	\mask, \mpidr, #0xff0000	// mask = aff2
-	lsr	\mask ,\mask, \rs2
-	orr	\dst, \dst, \mask		// dst|=(aff2>>rs2)
-	and	\mask, \mpidr, #0xff00000000	// mask = aff3
-	lsr	\mask ,\mask, \rs3
-	orr	\dst, \dst, \mask		// dst|=(aff3>>rs3)
-	.endm
-/*
- * Save CPU state in the provided sleep_stack_data area, and publish its
- * location for cpu_resume()'s use in sleep_save_stash.
- *
- * cpu_resume() will restore this saved state, and return. Because the
- * link-register is saved and restored, it will appear to return from this
- * function. So that the caller can tell the suspend/resume paths apart,
- * __cpu_suspend_enter() will always return a non-zero value, whereas the
- * path through cpu_resume() will return 0.
- *
- *  x0 = struct sleep_stack_data area
- */
-ENTRY(__cpu_suspend_enter)
-	stp	x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS]
-	stp	x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16]
-	stp	x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32]
-	stp	x23, x24, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+48]
-	stp	x25, x26, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+64]
-	stp	x27, x28, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+80]
-
-	/* save the sp in cpu_suspend_ctx */
-	mov	x2, sp
-	str	x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP]
-
-	/* find the mpidr_hash */
-	ldr_l	x1, sleep_save_stash
-	mrs	x7, mpidr_el1
-	adr_l	x9, mpidr_hash
-	ldr	x10, [x9, #MPIDR_HASH_MASK]
-	/*
-	 * Following code relies on the struct mpidr_hash
-	 * members size.
-	 */
-	ldp	w3, w4, [x9, #MPIDR_HASH_SHIFTS]
-	ldp	w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)]
-	compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10
-	add	x1, x1, x8, lsl #3
-
-	str	x0, [x1]
-	add	x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
-	stp	x29, lr, [sp, #-16]!
-	bl	cpu_do_suspend
-	ldp	x29, lr, [sp], #16
-	mov	x0, #1
-	ret
-ENDPROC(__cpu_suspend_enter)
-
-	.pushsection ".idmap.text", "awx"
-ENTRY(cpu_resume)
-	bl	el2_setup		// if in EL2 drop to EL1 cleanly
-	bl	__cpu_setup
-	/* enable the MMU early - so we can access sleep_save_stash by va */
-	adrp	x1, swapper_pg_dir
-	bl	__enable_mmu
-	ldr	x8, =_cpu_resume
-	br	x8
-ENDPROC(cpu_resume)
-	.ltorg
-	.popsection
-
-ENTRY(_cpu_resume)
-	mrs	x1, mpidr_el1
-	adr_l	x8, mpidr_hash		// x8 = struct mpidr_hash virt address
-
-	/* retrieve mpidr_hash members to compute the hash */
-	ldr	x2, [x8, #MPIDR_HASH_MASK]
-	ldp	w3, w4, [x8, #MPIDR_HASH_SHIFTS]
-	ldp	w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)]
-	compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2
-
-	/* x7 contains hash index, let's use it to grab context pointer */
-	ldr_l	x0, sleep_save_stash
-	ldr	x0, [x0, x7, lsl #3]
-	add	x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS
-	add	x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
-	/* load sp from context */
-	ldr	x2, [x0, #CPU_CTX_SP]
-	mov	sp, x2
-	/*
-	 * cpu_do_resume expects x0 to contain context address pointer
-	 */
-	bl	cpu_do_resume
-
-#ifdef CONFIG_KASAN
-	mov	x0, sp
-	bl	kasan_unpoison_task_stack_below
-#endif
-
-	ldp	x19, x20, [x29, #16]
-	ldp	x21, x22, [x29, #32]
-	ldp	x23, x24, [x29, #48]
-	ldp	x25, x26, [x29, #64]
-	ldp	x27, x28, [x29, #80]
-	ldp	x29, lr, [x29]
-	mov	x0, #0
-	ret
-ENDPROC(_cpu_resume)
diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
deleted file mode 100644
index 54655273d1e0ba9e619953e9ce4d3bbfd73426da..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/smccc-call.S
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2015, Linaro Limited
- */
-#include <linux/linkage.h>
-#include <linux/arm-smccc.h>
-
-#include <asm/asm-offsets.h>
-#include <asm/assembler.h>
-
-	.macro SMCCC instr
-	.cfi_startproc
-	\instr	#0
-	ldr	x4, [sp]
-	stp	x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
-	stp	x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS]
-	ldr	x4, [sp, #8]
-	cbz	x4, 1f /* no quirk structure */
-	ldr	x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS]
-	cmp	x9, #ARM_SMCCC_QUIRK_QCOM_A6
-	b.ne	1f
-	str	x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS]
-1:	ret
-	.cfi_endproc
-	.endm
-
-/*
- * void arm_smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2,
- *		  unsigned long a3, unsigned long a4, unsigned long a5,
- *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res,
- *		  struct arm_smccc_quirk *quirk)
- */
-ENTRY(__arm_smccc_smc)
-	SMCCC	smc
-ENDPROC(__arm_smccc_smc)
-EXPORT_SYMBOL(__arm_smccc_smc)
-
-/*
- * void arm_smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2,
- *		  unsigned long a3, unsigned long a4, unsigned long a5,
- *		  unsigned long a6, unsigned long a7, struct arm_smccc_res *res,
- *		  struct arm_smccc_quirk *quirk)
- */
-ENTRY(__arm_smccc_hvc)
-	SMCCC	hvc
-ENDPROC(__arm_smccc_hvc)
-EXPORT_SYMBOL(__arm_smccc_hvc)
diff --git a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh
old mode 100755
new mode 100644
diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S
deleted file mode 100644
index 0ce6ec75a525298b44de658c265f4762ecf4dd00..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/vdso/note.S
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- *
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-#include <linux/build-salt.h>
-
-ELFNOTE_START(Linux, 0, "a")
-	.long LINUX_VERSION_CODE
-ELFNOTE_END
-
-BUILD_SALT
diff --git a/arch/arm64/kernel/vdso/sigreturn.S b/arch/arm64/kernel/vdso/sigreturn.S
deleted file mode 100644
index 0723aa398d6eeea4a5baa58c17c6027fc8faea86..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/vdso/sigreturn.S
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Sigreturn trampoline for returning from a signal when the SA_RESTORER
- * flag is not set.
- *
- * Copyright (C) 2012 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-
-	.text
-
-	nop
-ENTRY(__kernel_rt_sigreturn)
-	.cfi_startproc
-	.cfi_signal_frame
-	.cfi_def_cfa	x29, 0
-	.cfi_offset	x29, 0 * 8
-	.cfi_offset	x30, 1 * 8
-	mov	x8, #__NR_rt_sigreturn
-	svc	#0
-	.cfi_endproc
-ENDPROC(__kernel_rt_sigreturn)
diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S
deleted file mode 100644
index d1414fee5274b7fbe76933672aeb99eb3884df0d..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/vdso/vdso.S
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <linux/const.h>
-#include <asm/page.h>
-
-	.globl vdso_start, vdso_end
-	.section .rodata
-	.balign PAGE_SIZE
-vdso_start:
-	.incbin "arch/arm64/kernel/vdso/vdso.so"
-	.balign PAGE_SIZE
-vdso_end:
-
-	.previous
diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
deleted file mode 100644
index 815df253f96e055a3ee584a7364503d02d698ed2..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/vdso/vdso.lds.S
+++ /dev/null
@@ -1,93 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * GNU linker script for the VDSO library.
-*
- * Copyright (C) 2012 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- * Heavily based on the vDSO linker scripts for other archs.
- */
-
-#include <linux/const.h>
-#include <asm/page.h>
-#include <asm/vdso.h>
-
-OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
-OUTPUT_ARCH(aarch64)
-
-SECTIONS
-{
-	PROVIDE(_vdso_data = . - PAGE_SIZE);
-	. = VDSO_LBASE + SIZEOF_HEADERS;
-
-	.hash		: { *(.hash) }			:text
-	.gnu.hash	: { *(.gnu.hash) }
-	.dynsym		: { *(.dynsym) }
-	.dynstr		: { *(.dynstr) }
-	.gnu.version	: { *(.gnu.version) }
-	.gnu.version_d	: { *(.gnu.version_d) }
-	.gnu.version_r	: { *(.gnu.version_r) }
-
-	/*
-	 * Discard .note.gnu.property sections which are unused and have
-	 * different alignment requirement from vDSO note sections.
-	 */
-	/DISCARD/	: {
-		*(.note.GNU-stack .note.gnu.property)
-	}
-	.note		: { *(.note.*) }		:text	:note
-
-	. = ALIGN(16);
-
-	.text		: { *(.text*) }			:text	=0xd503201f
-	PROVIDE (__etext = .);
-	PROVIDE (_etext = .);
-	PROVIDE (etext = .);
-
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-
-	.dynamic	: { *(.dynamic) }		:text	:dynamic
-
-	.rodata		: { *(.rodata*) }		:text
-
-	_end = .;
-	PROVIDE(end = .);
-
-	/DISCARD/	: {
-		*(.data .data.* .gnu.linkonce.d.* .sdata*)
-		*(.bss .sbss .dynbss .dynsbss)
-	}
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
-	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
-	note		PT_NOTE		FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-	LINUX_2.6.39 {
-	global:
-		__kernel_rt_sigreturn;
-		__kernel_gettimeofday;
-		__kernel_clock_gettime;
-		__kernel_clock_getres;
-	local: *;
-	};
-}
-
-/*
- * Make the sigreturn code visible to the kernel.
- */
-VDSO_sigtramp		= __kernel_rt_sigreturn;
diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S
deleted file mode 100644
index 1a81277c2d09a7798397d40185693e7df13e91fd..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/vdso32/sigreturn.S
+++ /dev/null
@@ -1,62 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This file provides both A32 and T32 versions, in accordance with the
- * arm sigreturn code.
- *
- * Copyright (C) 2018 ARM Limited
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
-#define ARM_ENTRY(name)		\
-	ENTRY(name)
-
-#define ARM_ENDPROC(name)	\
-	.type name, %function;	\
-	END(name)
-
-	.text
-
-	.arm
-	.fnstart
-	.save {r0-r15}
-	.pad #COMPAT_SIGFRAME_REGS_OFFSET
-	nop
-ARM_ENTRY(__kernel_sigreturn_arm)
-	mov r7, #__NR_compat_sigreturn
-	svc #0
-	.fnend
-ARM_ENDPROC(__kernel_sigreturn_arm)
-
-	.fnstart
-	.save {r0-r15}
-	.pad #COMPAT_RT_SIGFRAME_REGS_OFFSET
-	nop
-ARM_ENTRY(__kernel_rt_sigreturn_arm)
-	mov r7, #__NR_compat_rt_sigreturn
-	svc #0
-	.fnend
-ARM_ENDPROC(__kernel_rt_sigreturn_arm)
-
-	.thumb
-	.fnstart
-	.save {r0-r15}
-	.pad #COMPAT_SIGFRAME_REGS_OFFSET
-	nop
-ARM_ENTRY(__kernel_sigreturn_thumb)
-	mov r7, #__NR_compat_sigreturn
-	svc #0
-	.fnend
-ARM_ENDPROC(__kernel_sigreturn_thumb)
-
-	.fnstart
-	.save {r0-r15}
-	.pad #COMPAT_RT_SIGFRAME_REGS_OFFSET
-	nop
-ARM_ENTRY(__kernel_rt_sigreturn_thumb)
-	mov r7, #__NR_compat_rt_sigreturn
-	svc #0
-	.fnend
-ARM_ENDPROC(__kernel_rt_sigreturn_thumb)
diff --git a/arch/arm64/kernel/vdso32/vdso.S b/arch/arm64/kernel/vdso32/vdso.S
deleted file mode 100644
index e72ac7bc4c04f483f38e588c3098c53c96531a8d..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/vdso32/vdso.S
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2012 ARM Limited
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <linux/const.h>
-#include <asm/page.h>
-
-	.globl vdso32_start, vdso32_end
-	.section .rodata
-	.balign PAGE_SIZE
-vdso32_start:
-	.incbin "arch/arm64/kernel/vdso32/vdso.so"
-	.balign PAGE_SIZE
-vdso32_end:
-
-	.previous
diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S
deleted file mode 100644
index a3944927eaeb49cc29f07e327fdf5f0e40de1cfe..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/vdso32/vdso.lds.S
+++ /dev/null
@@ -1,82 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Adapted from arm64 version.
- *
- * GNU linker script for the VDSO library.
- * Heavily based on the vDSO linker scripts for other archs.
- *
- * Copyright (C) 2012-2018 ARM Limited
- */
-
-#include <linux/const.h>
-#include <asm/page.h>
-#include <asm/vdso.h>
-
-OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm")
-OUTPUT_ARCH(arm)
-
-SECTIONS
-{
-	PROVIDE_HIDDEN(_vdso_data = . - PAGE_SIZE);
-	. = VDSO_LBASE + SIZEOF_HEADERS;
-
-	.hash		: { *(.hash) }			:text
-	.gnu.hash	: { *(.gnu.hash) }
-	.dynsym		: { *(.dynsym) }
-	.dynstr		: { *(.dynstr) }
-	.gnu.version	: { *(.gnu.version) }
-	.gnu.version_d	: { *(.gnu.version_d) }
-	.gnu.version_r	: { *(.gnu.version_r) }
-
-	.note		: { *(.note.*) }		:text	:note
-
-	.dynamic	: { *(.dynamic) }		:text	:dynamic
-
-	.rodata		: { *(.rodata*) }		:text
-
-	.text		: { *(.text*) }			:text	=0xe7f001f2
-
-	.got		: { *(.got) }
-	.rel.plt	: { *(.rel.plt) }
-
-	/DISCARD/	: {
-		*(.note.GNU-stack)
-		*(.data .data.* .gnu.linkonce.d.* .sdata*)
-		*(.bss .sbss .dynbss .dynsbss)
-	}
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
-	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
-	note		PT_NOTE		FLAGS(4);		/* PF_R */
-}
-
-VERSION
-{
-	LINUX_2.6 {
-	global:
-		__vdso_clock_gettime;
-		__vdso_gettimeofday;
-		__vdso_clock_getres;
-		__kernel_sigreturn_arm;
-		__kernel_sigreturn_thumb;
-		__kernel_rt_sigreturn_arm;
-		__kernel_rt_sigreturn_thumb;
-		__vdso_clock_gettime64;
-	local: *;
-	};
-}
-
-/*
- * Make the sigreturn code visible to the kernel.
- */
-VDSO_compat_sigreturn_arm	= __kernel_sigreturn_arm;
-VDSO_compat_sigreturn_thumb	= __kernel_sigreturn_thumb;
-VDSO_compat_rt_sigreturn_arm	= __kernel_rt_sigreturn_arm;
-VDSO_compat_rt_sigreturn_thumb	= __kernel_rt_sigreturn_thumb;
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
deleted file mode 100644
index 0bab37b1acbe98b857d56166ba497c0507c7c537..0000000000000000000000000000000000000000
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,287 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ld script to make ARM Linux kernel
- * taken from the i386 version by Russell King
- * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
- */
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/cache.h>
-#include <asm/kernel-pgtable.h>
-#include <asm/thread_info.h>
-#include <asm/memory.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-
-#include "image.h"
-
-/* .exit.text needed in case of alternative patching */
-#define ARM_EXIT_KEEP(x)	x
-#define ARM_EXIT_DISCARD(x)
-
-OUTPUT_ARCH(aarch64)
-ENTRY(_text)
-
-jiffies = jiffies_64;
-
-
-#define HYPERVISOR_EXTABLE					\
-	. = ALIGN(SZ_8);					\
-	__start___kvm_ex_table = .;				\
-	*(__kvm_ex_table)					\
-	__stop___kvm_ex_table = .;
-
-#define HYPERVISOR_TEXT					\
-	/*						\
-	 * Align to 4 KB so that			\
-	 * a) the HYP vector table is at its minimum	\
-	 *    alignment of 2048 bytes			\
-	 * b) the HYP init code will not cross a page	\
-	 *    boundary if its size does not exceed	\
-	 *    4 KB (see related ASSERT() below)		\
-	 */						\
-	. = ALIGN(SZ_4K);				\
-	__hyp_idmap_text_start = .;			\
-	*(.hyp.idmap.text)				\
-	__hyp_idmap_text_end = .;			\
-	__hyp_text_start = .;				\
-	*(.hyp.text)					\
-	HYPERVISOR_EXTABLE				\
-	__hyp_text_end = .;
-
-#define IDMAP_TEXT					\
-	. = ALIGN(SZ_4K);				\
-	__idmap_text_start = .;				\
-	*(.idmap.text)					\
-	__idmap_text_end = .;
-
-#ifdef CONFIG_HIBERNATION
-#define HIBERNATE_TEXT					\
-	. = ALIGN(SZ_4K);				\
-	__hibernate_exit_text_start = .;		\
-	*(.hibernate_exit.text)				\
-	__hibernate_exit_text_end = .;
-#else
-#define HIBERNATE_TEXT
-#endif
-
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-#define TRAMP_TEXT					\
-	. = ALIGN(PAGE_SIZE);				\
-	__entry_tramp_text_start = .;			\
-	*(.entry.tramp.text)				\
-	. = ALIGN(PAGE_SIZE);				\
-	__entry_tramp_text_end = .;
-#else
-#define TRAMP_TEXT
-#endif
-
-/*
- * The size of the PE/COFF section that covers the kernel image, which
- * runs from stext to _edata, must be a round multiple of the PE/COFF
- * FileAlignment, which we set to its minimum value of 0x200. 'stext'
- * itself is 4 KB aligned, so padding out _edata to a 0x200 aligned
- * boundary should be sufficient.
- */
-PECOFF_FILE_ALIGNMENT = 0x200;
-
-#ifdef CONFIG_EFI
-#define PECOFF_EDATA_PADDING	\
-	.pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); }
-#else
-#define PECOFF_EDATA_PADDING
-#endif
-
-SECTIONS
-{
-	/*
-	 * XXX: The linker does not define how output sections are
-	 * assigned to input sections when there are multiple statements
-	 * matching the same input section name.  There is no documented
-	 * order of matching.
-	 */
-	/DISCARD/ : {
-		ARM_EXIT_DISCARD(EXIT_TEXT)
-		ARM_EXIT_DISCARD(EXIT_DATA)
-		EXIT_CALL
-		*(.discard)
-		*(.discard.*)
-		*(.interp .dynamic)
-		*(.dynsym .dynstr .hash .gnu.hash)
-		*(.eh_frame)
-	}
-
-	. = KIMAGE_VADDR + TEXT_OFFSET;
-
-	.head.text : {
-		_text = .;
-		HEAD_TEXT
-	}
-	.text : {			/* Real text segment		*/
-		_stext = .;		/* Text and read-only data	*/
-			__exception_text_start = .;
-			*(.exception.text)
-			__exception_text_end = .;
-			IRQENTRY_TEXT
-			SOFTIRQENTRY_TEXT
-			ENTRY_TEXT
-			TEXT_TEXT
-			SCHED_TEXT
-			CPUIDLE_TEXT
-			LOCK_TEXT
-			KPROBES_TEXT
-			HYPERVISOR_TEXT
-			IDMAP_TEXT
-			HIBERNATE_TEXT
-			TRAMP_TEXT
-			*(.fixup)
-			*(.gnu.warning)
-		. = ALIGN(16);
-		*(.got)			/* Global offset table		*/
-	}
-
-	. = ALIGN(SEGMENT_ALIGN);
-	_etext = .;			/* End of text section */
-
-	RO_DATA(PAGE_SIZE)		/* everything from this point to     */
-	EXCEPTION_TABLE(8)		/* __init_begin will be marked RO NX */
-	NOTES
-
-	. = ALIGN(PAGE_SIZE);
-	idmap_pg_dir = .;
-	. += IDMAP_DIR_SIZE;
-	idmap_pg_end = .;
-
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-	tramp_pg_dir = .;
-	. += PAGE_SIZE;
-#endif
-
-#ifdef CONFIG_ARM64_SW_TTBR0_PAN
-	reserved_ttbr0 = .;
-	. += RESERVED_TTBR0_SIZE;
-#endif
-	swapper_pg_dir = .;
-	. += PAGE_SIZE;
-	swapper_pg_end = .;
-
-	. = ALIGN(SEGMENT_ALIGN);
-	__init_begin = .;
-	__inittext_begin = .;
-
-	INIT_TEXT_SECTION(8)
-
-	__exittext_begin = .;
-	.exit.text : {
-		ARM_EXIT_KEEP(EXIT_TEXT)
-	}
-	__exittext_end = .;
-
-	. = ALIGN(4);
-	.altinstructions : {
-		__alt_instructions = .;
-		*(.altinstructions)
-		__alt_instructions_end = .;
-	}
-
-	. = ALIGN(PAGE_SIZE);
-	__inittext_end = .;
-	__initdata_begin = .;
-
-	.init.data : {
-		INIT_DATA
-		INIT_SETUP(16)
-		INIT_CALLS
-		CON_INITCALL
-		INIT_RAM_FS
-		*(.init.rodata.* .init.bss)	/* from the EFI stub */
-	}
-	.exit.data : {
-		ARM_EXIT_KEEP(EXIT_DATA)
-	}
-
-	PERCPU_SECTION(L1_CACHE_BYTES)
-
-	.rela.dyn : ALIGN(8) {
-		*(.rela .rela*)
-	}
-
-	__rela_offset	= ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR);
-	__rela_size	= SIZEOF(.rela.dyn);
-
-#ifdef CONFIG_RELR
-	.relr.dyn : ALIGN(8) {
-		*(.relr.dyn)
-	}
-
-	__relr_offset	= ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR);
-	__relr_size	= SIZEOF(.relr.dyn);
-#endif
-
-	. = ALIGN(SEGMENT_ALIGN);
-	__initdata_end = .;
-	__init_end = .;
-
-	_data = .;
-	_sdata = .;
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
-
-	/*
-	 * Data written with the MMU off but read with the MMU on requires
-	 * cache lines to be invalidated, discarding up to a Cache Writeback
-	 * Granule (CWG) of data from the cache. Keep the section that
-	 * requires this type of maintenance to be in its own Cache Writeback
-	 * Granule (CWG) area so the cache maintenance operations don't
-	 * interfere with adjacent data.
-	 */
-	.mmuoff.data.write : ALIGN(SZ_2K) {
-		__mmuoff_data_start = .;
-		*(.mmuoff.data.write)
-	}
-	. = ALIGN(SZ_2K);
-	.mmuoff.data.read : {
-		*(.mmuoff.data.read)
-		__mmuoff_data_end = .;
-	}
-
-	PECOFF_EDATA_PADDING
-	__pecoff_data_rawsize = ABSOLUTE(. - __initdata_begin);
-	_edata = .;
-
-	BSS_SECTION(0, 0, 0)
-
-	. = ALIGN(PAGE_SIZE);
-	init_pg_dir = .;
-	. += INIT_DIR_SIZE;
-	init_pg_end = .;
-
-	__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
-	_end = .;
-
-	STABS_DEBUG
-
-	HEAD_SYMBOLS
-}
-
-#include "image-vars.h"
-
-/*
- * The HYP init code and ID map text can't be longer than a page each,
- * and should not cross a page boundary.
- */
-ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
-	"HYP init code too big or misaligned")
-ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
-	"ID map text too big or misaligned")
-#ifdef CONFIG_HIBERNATION
-ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
-	<= SZ_4K, "Hibernate exit text too big or misaligned")
-#endif
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
-	"Entry trampoline text too big")
-#endif
-/*
- * If padding is applied before .head.text, virt<->phys conversions will fail.
- */
-ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned")
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
deleted file mode 100644
index dc41b505507d7afb7aca2b2235361dd54ddcef3f..0000000000000000000000000000000000000000
--- a/arch/arm64/kvm/hyp-init.S
+++ /dev/null
@@ -1,168 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/linkage.h>
-
-#include <asm/assembler.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_mmu.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/sysreg.h>
-#include <asm/virt.h>
-
-	.text
-	.pushsection	.hyp.idmap.text, "ax"
-
-	.align	11
-
-ENTRY(__kvm_hyp_init)
-	ventry	__invalid		// Synchronous EL2t
-	ventry	__invalid		// IRQ EL2t
-	ventry	__invalid		// FIQ EL2t
-	ventry	__invalid		// Error EL2t
-
-	ventry	__invalid		// Synchronous EL2h
-	ventry	__invalid		// IRQ EL2h
-	ventry	__invalid		// FIQ EL2h
-	ventry	__invalid		// Error EL2h
-
-	ventry	__do_hyp_init		// Synchronous 64-bit EL1
-	ventry	__invalid		// IRQ 64-bit EL1
-	ventry	__invalid		// FIQ 64-bit EL1
-	ventry	__invalid		// Error 64-bit EL1
-
-	ventry	__invalid		// Synchronous 32-bit EL1
-	ventry	__invalid		// IRQ 32-bit EL1
-	ventry	__invalid		// FIQ 32-bit EL1
-	ventry	__invalid		// Error 32-bit EL1
-
-__invalid:
-	b	.
-
-	/*
-	 * x0: HYP pgd
-	 * x1: HYP stack
-	 * x2: HYP vectors
-	 * x3: per-CPU offset
-	 */
-__do_hyp_init:
-	/* Check for a stub HVC call */
-	cmp	x0, #HVC_STUB_HCALL_NR
-	b.lo	__kvm_handle_stub_hvc
-
-	phys_to_ttbr x4, x0
-alternative_if ARM64_HAS_CNP
-	orr	x4, x4, #TTBR_CNP_BIT
-alternative_else_nop_endif
-	msr	ttbr0_el2, x4
-
-	mrs	x4, tcr_el1
-	ldr	x5, =TCR_EL2_MASK
-	and	x4, x4, x5
-	mov	x5, #TCR_EL2_RES1
-	orr	x4, x4, x5
-
-	/*
-	 * The ID map may be configured to use an extended virtual address
-	 * range. This is only the case if system RAM is out of range for the
-	 * currently configured page size and VA_BITS, in which case we will
-	 * also need the extended virtual range for the HYP ID map, or we won't
-	 * be able to enable the EL2 MMU.
-	 *
-	 * However, at EL2, there is only one TTBR register, and we can't switch
-	 * between translation tables *and* update TCR_EL2.T0SZ at the same
-	 * time. Bottom line: we need to use the extended range with *both* our
-	 * translation tables.
-	 *
-	 * So use the same T0SZ value we use for the ID map.
-	 */
-	ldr_l	x5, idmap_t0sz
-	bfi	x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
-
-	/*
-	 * Set the PS bits in TCR_EL2.
-	 */
-	tcr_compute_pa_size x4, #TCR_EL2_PS_SHIFT, x5, x6
-
-	msr	tcr_el2, x4
-
-	mrs	x4, mair_el1
-	msr	mair_el2, x4
-	isb
-
-	/* Invalidate the stale TLBs from Bootloader */
-	tlbi	alle2
-	dsb	sy
-
-	/*
-	 * Preserve all the RES1 bits while setting the default flags,
-	 * as well as the EE bit on BE. Drop the A flag since the compiler
-	 * is allowed to generate unaligned accesses.
-	 */
-	ldr	x4, =(SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
-CPU_BE(	orr	x4, x4, #SCTLR_ELx_EE)
-	msr	sctlr_el2, x4
-	isb
-
-	/* Set the stack and new vectors */
-	kern_hyp_va	x1
-	mov	sp, x1
-	msr	vbar_el2, x2
-
-	/* Set tpidr_el2 for use by HYP */
-	msr	tpidr_el2, x3
-
-	/* Hello, World! */
-	eret
-ENDPROC(__kvm_hyp_init)
-
-ENTRY(__kvm_handle_stub_hvc)
-	cmp	x0, #HVC_SOFT_RESTART
-	b.ne	1f
-
-	/* This is where we're about to jump, staying at EL2 */
-	msr	elr_el2, x1
-	mov	x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT | PSR_MODE_EL2h)
-	msr	spsr_el2, x0
-
-	/* Shuffle the arguments, and don't come back */
-	mov	x0, x2
-	mov	x1, x3
-	mov	x2, x4
-	b	reset
-
-1:	cmp	x0, #HVC_RESET_VECTORS
-	b.ne	1f
-
-	/*
-	 * Set the HVC_RESET_VECTORS return code before entering the common
-	 * path so that we do not clobber x0-x2 in case we are coming via
-	 * HVC_SOFT_RESTART.
-	 */
-	mov	x0, xzr
-reset:
-	/* Reset kvm back to the hyp stub. */
-	mrs	x5, sctlr_el2
-	ldr	x6, =SCTLR_ELx_FLAGS
-	bic	x5, x5, x6		// Clear SCTL_M and etc
-	pre_disable_mmu_workaround
-	msr	sctlr_el2, x5
-	isb
-
-	/* Install stub vectors */
-	adr_l	x5, __hyp_stub_vectors
-	msr	vbar_el2, x5
-	eret
-
-1:	/* Bad stub call */
-	ldr	x0, =HVC_STUB_ERR
-	eret
-
-ENDPROC(__kvm_handle_stub_hvc)
-
-	.ltorg
-
-	.popsection
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
deleted file mode 100644
index c0094d520dffedf74cd8df54aeea713bcdaf6d1d..0000000000000000000000000000000000000000
--- a/arch/arm64/kvm/hyp.S
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/linkage.h>
-
-#include <asm/alternative.h>
-#include <asm/assembler.h>
-#include <asm/cpufeature.h>
-
-/*
- * u64 __kvm_call_hyp(void *hypfn, ...);
- *
- * This is not really a variadic function in the classic C-way and care must
- * be taken when calling this to ensure parameters are passed in registers
- * only, since the stack will change between the caller and the callee.
- *
- * Call the function with the first argument containing a pointer to the
- * function you wish to call in Hyp mode, and subsequent arguments will be
- * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the
- * function pointer can be passed).  The function being called must be mapped
- * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
- * passed in x0.
- *
- * A function pointer with a value less than 0xfff has a special meaning,
- * and is used to implement hyp stubs in the same way as in
- * arch/arm64/kernel/hyp_stub.S.
- */
-ENTRY(__kvm_call_hyp)
-	hvc	#0
-	ret
-ENDPROC(__kvm_call_hyp)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
deleted file mode 100644
index dc3d7bc2292fd08dab40737454b89eba7fcd78da..0000000000000000000000000000000000000000
--- a/arch/arm64/kvm/hyp/entry.S
+++ /dev/null
@@ -1,198 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/linkage.h>
-
-#include <asm/alternative.h>
-#include <asm/asm-offsets.h>
-#include <asm/assembler.h>
-#include <asm/fpsimdmacros.h>
-#include <asm/kvm.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_mmu.h>
-#include <asm/kvm_ptrauth.h>
-
-#define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
-#define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
-
-	.text
-	.pushsection	.hyp.text, "ax"
-
-.macro save_callee_saved_regs ctxt
-	stp	x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
-	stp	x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
-	stp	x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
-	stp	x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
-	stp	x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
-	stp	x29, lr,  [\ctxt, #CPU_XREG_OFFSET(29)]
-.endm
-
-.macro restore_callee_saved_regs ctxt
-	ldp	x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
-	ldp	x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
-	ldp	x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
-	ldp	x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
-	ldp	x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
-	ldp	x29, lr,  [\ctxt, #CPU_XREG_OFFSET(29)]
-.endm
-
-/*
- * u64 __guest_enter(struct kvm_vcpu *vcpu,
- *		     struct kvm_cpu_context *host_ctxt);
- */
-ENTRY(__guest_enter)
-	// x0: vcpu
-	// x1: host context
-	// x2-x17: clobbered by macros
-	// x18: guest context
-
-	// Store the host regs
-	save_callee_saved_regs x1
-
-	// Now the host state is stored if we have a pending RAS SError it must
-	// affect the host. If any asynchronous exception is pending we defer
-	// the guest entry. The DSB isn't necessary before v8.2 as any SError
-	// would be fatal.
-alternative_if ARM64_HAS_RAS_EXTN
-	dsb	nshst
-	isb
-alternative_else_nop_endif
-	mrs	x1, isr_el1
-	cbz	x1,  1f
-	mov	x0, #ARM_EXCEPTION_IRQ
-	ret
-
-1:
-	add	x18, x0, #VCPU_CONTEXT
-
-	// Macro ptrauth_switch_to_guest format:
-	// 	ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
-	// The below macro to restore guest keys is not implemented in C code
-	// as it may cause Pointer Authentication key signing mismatch errors
-	// when this feature is enabled for kernel code.
-	ptrauth_switch_to_guest x18, x0, x1, x2
-
-	// Restore guest regs x0-x17
-	ldp	x0, x1,   [x18, #CPU_XREG_OFFSET(0)]
-	ldp	x2, x3,   [x18, #CPU_XREG_OFFSET(2)]
-	ldp	x4, x5,   [x18, #CPU_XREG_OFFSET(4)]
-	ldp	x6, x7,   [x18, #CPU_XREG_OFFSET(6)]
-	ldp	x8, x9,   [x18, #CPU_XREG_OFFSET(8)]
-	ldp	x10, x11, [x18, #CPU_XREG_OFFSET(10)]
-	ldp	x12, x13, [x18, #CPU_XREG_OFFSET(12)]
-	ldp	x14, x15, [x18, #CPU_XREG_OFFSET(14)]
-	ldp	x16, x17, [x18, #CPU_XREG_OFFSET(16)]
-
-	// Restore guest regs x19-x29, lr
-	restore_callee_saved_regs x18
-
-	// Restore guest reg x18
-	ldr	x18,      [x18, #CPU_XREG_OFFSET(18)]
-
-	// Do not touch any register after this!
-	eret
-	sb
-ENDPROC(__guest_enter)
-
-ENTRY(__guest_exit)
-	// x0: return code
-	// x1: vcpu
-	// x2-x29,lr: vcpu regs
-	// vcpu x0-x1 on the stack
-
-	add	x1, x1, #VCPU_CONTEXT
-
-	ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
-
-	// Store the guest regs x2 and x3
-	stp	x2, x3,   [x1, #CPU_XREG_OFFSET(2)]
-
-	// Retrieve the guest regs x0-x1 from the stack
-	ldp	x2, x3, [sp], #16	// x0, x1
-
-	// Store the guest regs x0-x1 and x4-x18
-	stp	x2, x3,   [x1, #CPU_XREG_OFFSET(0)]
-	stp	x4, x5,   [x1, #CPU_XREG_OFFSET(4)]
-	stp	x6, x7,   [x1, #CPU_XREG_OFFSET(6)]
-	stp	x8, x9,   [x1, #CPU_XREG_OFFSET(8)]
-	stp	x10, x11, [x1, #CPU_XREG_OFFSET(10)]
-	stp	x12, x13, [x1, #CPU_XREG_OFFSET(12)]
-	stp	x14, x15, [x1, #CPU_XREG_OFFSET(14)]
-	stp	x16, x17, [x1, #CPU_XREG_OFFSET(16)]
-	str	x18,      [x1, #CPU_XREG_OFFSET(18)]
-
-	// Store the guest regs x19-x29, lr
-	save_callee_saved_regs x1
-
-	get_host_ctxt	x2, x3
-
-	// Macro ptrauth_switch_to_guest format:
-	// 	ptrauth_switch_to_host(guest cxt, host cxt, tmp1, tmp2, tmp3)
-	// The below macro to save/restore keys is not implemented in C code
-	// as it may cause Pointer Authentication key signing mismatch errors
-	// when this feature is enabled for kernel code.
-	ptrauth_switch_to_host x1, x2, x3, x4, x5
-
-	// Now restore the host regs
-	restore_callee_saved_regs x2
-
-alternative_if ARM64_HAS_RAS_EXTN
-	// If we have the RAS extensions we can consume a pending error
-	// without an unmask-SError and isb. The ESB-instruction consumed any
-	// pending guest error when we took the exception from the guest.
-	mrs_s	x2, SYS_DISR_EL1
-	str	x2, [x1, #(VCPU_FAULT_DISR - VCPU_CONTEXT)]
-	cbz	x2, 1f
-	msr_s	SYS_DISR_EL1, xzr
-	orr	x0, x0, #(1<<ARM_EXIT_WITH_SERROR_BIT)
-1:	ret
-alternative_else
-	dsb	sy		// Synchronize against in-flight ld/st
-	isb			// Prevent an early read of side-effect free ISR
-	mrs	x2, isr_el1
-	tbnz	x2, #8, 2f	// ISR_EL1.A
-	ret
-	nop
-2:
-alternative_endif
-	// We know we have a pending asynchronous abort, now is the
-	// time to flush it out. From your VAXorcist book, page 666:
-	// "Threaten me not, oh Evil one!  For I speak with
-	// the power of DEC, and I command thee to show thyself!"
-	mrs	x2, elr_el2
-	mrs	x3, esr_el2
-	mrs	x4, spsr_el2
-	mov	x5, x0
-
-	msr	daifclr, #4	// Unmask aborts
-
-	// This is our single instruction exception window. A pending
-	// SError is guaranteed to occur at the earliest when we unmask
-	// it, and at the latest just after the ISB.
-abort_guest_exit_start:
-
-	isb
-
-abort_guest_exit_end:
-
-	msr	daifset, #4	// Mask aborts
-	ret
-
-	_kvm_extable	abort_guest_exit_start, 9997f
-	_kvm_extable	abort_guest_exit_end, 9997f
-9997:
-	msr	daifset, #4	// Mask aborts
-	mov	x0, #(1 << ARM_EXIT_WITH_SERROR_BIT)
-
-	// restore the EL1 exception context so that we can report some
-	// information. Merge the exception code with the SError pending bit.
-	msr	elr_el2, x2
-	msr	esr_el2, x3
-	msr	spsr_el2, x4
-	orr	x0, x0, x5
-1:	ret
-ENDPROC(__guest_exit)
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
deleted file mode 100644
index 78ff53225691a06518615e2fcd0432b62f89eab3..0000000000000000000000000000000000000000
--- a/arch/arm64/kvm/hyp/fpsimd.S
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/linkage.h>
-
-#include <asm/fpsimdmacros.h>
-
-	.text
-	.pushsection	.hyp.text, "ax"
-
-ENTRY(__fpsimd_save_state)
-	fpsimd_save	x0, 1
-	ret
-ENDPROC(__fpsimd_save_state)
-
-ENTRY(__fpsimd_restore_state)
-	fpsimd_restore	x0, 1
-	ret
-ENDPROC(__fpsimd_restore_state)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
deleted file mode 100644
index f36aad0f207bb582206e89cbe75c914df8c80032..0000000000000000000000000000000000000000
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ /dev/null
@@ -1,350 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2015-2018 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/arm-smccc.h>
-#include <linux/linkage.h>
-
-#include <asm/alternative.h>
-#include <asm/assembler.h>
-#include <asm/cpufeature.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_mmu.h>
-#include <asm/mmu.h>
-
-.macro save_caller_saved_regs_vect
-	/* x0 and x1 were saved in the vector entry */
-	stp	x2, x3,   [sp, #-16]!
-	stp	x4, x5,   [sp, #-16]!
-	stp	x6, x7,   [sp, #-16]!
-	stp	x8, x9,   [sp, #-16]!
-	stp	x10, x11, [sp, #-16]!
-	stp	x12, x13, [sp, #-16]!
-	stp	x14, x15, [sp, #-16]!
-	stp	x16, x17, [sp, #-16]!
-.endm
-
-.macro restore_caller_saved_regs_vect
-	ldp	x16, x17, [sp], #16
-	ldp	x14, x15, [sp], #16
-	ldp	x12, x13, [sp], #16
-	ldp	x10, x11, [sp], #16
-	ldp	x8, x9,   [sp], #16
-	ldp	x6, x7,   [sp], #16
-	ldp	x4, x5,   [sp], #16
-	ldp	x2, x3,   [sp], #16
-	ldp	x0, x1,   [sp], #16
-.endm
-
-	.text
-	.pushsection	.hyp.text, "ax"
-
-.macro do_el2_call
-	/*
-	 * Shuffle the parameters before calling the function
-	 * pointed to in x0. Assumes parameters in x[1,2,3].
-	 */
-	str	lr, [sp, #-16]!
-	mov	lr, x0
-	mov	x0, x1
-	mov	x1, x2
-	mov	x2, x3
-	blr	lr
-	ldr	lr, [sp], #16
-.endm
-
-el1_sync:				// Guest trapped into EL2
-
-	mrs	x0, esr_el2
-	lsr	x0, x0, #ESR_ELx_EC_SHIFT
-	cmp	x0, #ESR_ELx_EC_HVC64
-	ccmp	x0, #ESR_ELx_EC_HVC32, #4, ne
-	b.ne	el1_trap
-
-	mrs	x1, vttbr_el2		// If vttbr is valid, the guest
-	cbnz	x1, el1_hvc_guest	// called HVC
-
-	/* Here, we're pretty sure the host called HVC. */
-	ldp	x0, x1, [sp], #16
-
-	/* Check for a stub HVC call */
-	cmp	x0, #HVC_STUB_HCALL_NR
-	b.hs	1f
-
-	/*
-	 * Compute the idmap address of __kvm_handle_stub_hvc and
-	 * jump there. Since we use kimage_voffset, do not use the
-	 * HYP VA for __kvm_handle_stub_hvc, but the kernel VA instead
-	 * (by loading it from the constant pool).
-	 *
-	 * Preserve x0-x4, which may contain stub parameters.
-	 */
-	ldr	x5, =__kvm_handle_stub_hvc
-	ldr_l	x6, kimage_voffset
-
-	/* x5 = __pa(x5) */
-	sub	x5, x5, x6
-	br	x5
-
-1:
-	/*
-	 * Perform the EL2 call
-	 */
-	kern_hyp_va	x0
-	do_el2_call
-
-	eret
-	sb
-
-el1_hvc_guest:
-	/*
-	 * Fastest possible path for ARM_SMCCC_ARCH_WORKAROUND_1.
-	 * The workaround has already been applied on the host,
-	 * so let's quickly get back to the guest. We don't bother
-	 * restoring x1, as it can be clobbered anyway.
-	 */
-	ldr	x1, [sp]				// Guest's x0
-	eor	w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1
-	cbz	w1, wa_epilogue
-
-	/* ARM_SMCCC_ARCH_WORKAROUND_2 handling */
-	eor	w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \
-			  ARM_SMCCC_ARCH_WORKAROUND_2)
-	cbnz	w1, el1_trap
-
-#ifdef CONFIG_ARM64_SSBD
-alternative_cb	arm64_enable_wa2_handling
-	b	wa2_end
-alternative_cb_end
-	get_vcpu_ptr	x2, x0
-	ldr	x0, [x2, #VCPU_WORKAROUND_FLAGS]
-
-	// Sanitize the argument and update the guest flags
-	ldr	x1, [sp, #8]			// Guest's x1
-	clz	w1, w1				// Murphy's device:
-	lsr	w1, w1, #5			// w1 = !!w1 without using
-	eor	w1, w1, #1			// the flags...
-	bfi	x0, x1, #VCPU_WORKAROUND_2_FLAG_SHIFT, #1
-	str	x0, [x2, #VCPU_WORKAROUND_FLAGS]
-
-	/* Check that we actually need to perform the call */
-	hyp_ldr_this_cpu x0, arm64_ssbd_callback_required, x2
-	cbz	x0, wa2_end
-
-	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_2
-	smc	#0
-
-	/* Don't leak data from the SMC call */
-	mov	x3, xzr
-wa2_end:
-	mov	x2, xzr
-	mov	x1, xzr
-#endif
-
-wa_epilogue:
-	mov	x0, xzr
-	add	sp, sp, #16
-	eret
-	sb
-
-el1_trap:
-	get_vcpu_ptr	x1, x0
-	mov	x0, #ARM_EXCEPTION_TRAP
-	b	__guest_exit
-
-el1_irq:
-	get_vcpu_ptr	x1, x0
-	mov	x0, #ARM_EXCEPTION_IRQ
-	b	__guest_exit
-
-el1_error:
-	get_vcpu_ptr	x1, x0
-	mov	x0, #ARM_EXCEPTION_EL1_SERROR
-	b	__guest_exit
-
-el2_sync:
-	/* Check for illegal exception return */
-	mrs	x0, spsr_el2
-	tbnz	x0, #20, 1f
-
-	save_caller_saved_regs_vect
-	stp     x29, x30, [sp, #-16]!
-	bl	kvm_unexpected_el2_exception
-	ldp     x29, x30, [sp], #16
-	restore_caller_saved_regs_vect
-
-	eret
-
-1:
-	/* Let's attempt a recovery from the illegal exception return */
-	get_vcpu_ptr	x1, x0
-	mov	x0, #ARM_EXCEPTION_IL
-	b	__guest_exit
-
-
-el2_error:
-	save_caller_saved_regs_vect
-	stp     x29, x30, [sp, #-16]!
-
-	bl	kvm_unexpected_el2_exception
-
-	ldp     x29, x30, [sp], #16
-	restore_caller_saved_regs_vect
-
-	eret
-	sb
-
-ENTRY(__hyp_do_panic)
-	mov	lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
-		      PSR_MODE_EL1h)
-	msr	spsr_el2, lr
-	ldr	lr, =panic
-	msr	elr_el2, lr
-	eret
-	sb
-ENDPROC(__hyp_do_panic)
-
-ENTRY(__hyp_panic)
-	get_host_ctxt x0, x1
-	b	hyp_panic
-ENDPROC(__hyp_panic)
-
-.macro invalid_vector	label, target = __hyp_panic
-	.align	2
-\label:
-	b \target
-ENDPROC(\label)
-.endm
-
-	/* None of these should ever happen */
-	invalid_vector	el2t_sync_invalid
-	invalid_vector	el2t_irq_invalid
-	invalid_vector	el2t_fiq_invalid
-	invalid_vector	el2t_error_invalid
-	invalid_vector	el2h_sync_invalid
-	invalid_vector	el2h_irq_invalid
-	invalid_vector	el2h_fiq_invalid
-	invalid_vector	el1_fiq_invalid
-
-	.ltorg
-
-	.align 11
-
-.macro check_preamble_length start, end
-/* kvm_patch_vector_branch() generates code that jumps over the preamble. */
-.if ((\end-\start) != KVM_VECTOR_PREAMBLE)
-	.error "KVM vector preamble length mismatch"
-.endif
-.endm
-
-.macro valid_vect target
-	.align 7
-661:
-	esb
-	stp	x0, x1, [sp, #-16]!
-662:
-	b	\target
-
-check_preamble_length 661b, 662b
-.endm
-
-.macro invalid_vect target
-	.align 7
-661:
-	b	\target
-	nop
-662:
-	ldp	x0, x1, [sp], #16
-	b	\target
-
-check_preamble_length 661b, 662b
-.endm
-
-ENTRY(__kvm_hyp_vector)
-	invalid_vect	el2t_sync_invalid	// Synchronous EL2t
-	invalid_vect	el2t_irq_invalid	// IRQ EL2t
-	invalid_vect	el2t_fiq_invalid	// FIQ EL2t
-	invalid_vect	el2t_error_invalid	// Error EL2t
-
-	valid_vect	el2_sync		// Synchronous EL2h
-	invalid_vect	el2h_irq_invalid	// IRQ EL2h
-	invalid_vect	el2h_fiq_invalid	// FIQ EL2h
-	valid_vect	el2_error		// Error EL2h
-
-	valid_vect	el1_sync		// Synchronous 64-bit EL1
-	valid_vect	el1_irq			// IRQ 64-bit EL1
-	invalid_vect	el1_fiq_invalid		// FIQ 64-bit EL1
-	valid_vect	el1_error		// Error 64-bit EL1
-
-	valid_vect	el1_sync		// Synchronous 32-bit EL1
-	valid_vect	el1_irq			// IRQ 32-bit EL1
-	invalid_vect	el1_fiq_invalid		// FIQ 32-bit EL1
-	valid_vect	el1_error		// Error 32-bit EL1
-ENDPROC(__kvm_hyp_vector)
-
-#ifdef CONFIG_KVM_INDIRECT_VECTORS
-.macro hyp_ventry
-	.align 7
-1:	esb
-	.rept 26
-	nop
-	.endr
-/*
- * The default sequence is to directly branch to the KVM vectors,
- * using the computed offset. This applies for VHE as well as
- * !ARM64_HARDEN_EL2_VECTORS. The first vector must always run the preamble.
- *
- * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced
- * with:
- *
- * stp	x0, x1, [sp, #-16]!
- * movz	x0, #(addr & 0xffff)
- * movk	x0, #((addr >> 16) & 0xffff), lsl #16
- * movk	x0, #((addr >> 32) & 0xffff), lsl #32
- * br	x0
- *
- * Where:
- * addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE.
- * See kvm_patch_vector_branch for details.
- */
-alternative_cb	kvm_patch_vector_branch
-	stp	x0, x1, [sp, #-16]!
-	b	__kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE)
-	nop
-	nop
-	nop
-alternative_cb_end
-.endm
-
-.macro generate_vectors
-0:
-	.rept 16
-	hyp_ventry
-	.endr
-	.org 0b + SZ_2K		// Safety measure
-.endm
-
-	.align	11
-ENTRY(__bp_harden_hyp_vecs_start)
-	.rept BP_HARDEN_EL2_SLOTS
-	generate_vectors
-	.endr
-ENTRY(__bp_harden_hyp_vecs_end)
-
-	.popsection
-
-ENTRY(__smccc_workaround_1_smc_start)
-	esb
-	sub	sp, sp, #(8 * 4)
-	stp	x2, x3, [sp, #(8 * 0)]
-	stp	x0, x1, [sp, #(8 * 2)]
-	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_1
-	smc	#0
-	ldp	x2, x3, [sp, #(8 * 0)]
-	ldp	x0, x1, [sp, #(8 * 2)]
-	add	sp, sp, #(8 * 4)
-ENTRY(__smccc_workaround_1_smc_end)
-#endif
diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S
deleted file mode 100644
index 073acbf02a7c842520eeb42df0df4f2dd3a15480..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/clear_page.S
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 ARM Ltd.
- */
-
-#include <linux/linkage.h>
-#include <linux/const.h>
-#include <asm/assembler.h>
-#include <asm/page.h>
-
-/*
- * Clear page @dest
- *
- * Parameters:
- *	x0 - dest
- */
-SYM_FUNC_START(clear_page)
-	mrs	x1, dczid_el0
-	and	w1, w1, #0xf
-	mov	x2, #4
-	lsl	x1, x2, x1
-
-1:	dc	zva, x0
-	add	x0, x0, x1
-	tst	x0, #(PAGE_SIZE - 1)
-	b.ne	1b
-	ret
-SYM_FUNC_END(clear_page)
-EXPORT_SYMBOL(clear_page)
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
deleted file mode 100644
index 48a3a26eff663589a18c3944eff17d1240f66822..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/clear_user.S
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Based on arch/arm/lib/clear_user.S
- *
- * Copyright (C) 2012 ARM Ltd.
- */
-#include <linux/linkage.h>
-
-#include <asm/asm-uaccess.h>
-#include <asm/assembler.h>
-
-	.text
-
-/* Prototype: int __arch_clear_user(void *addr, size_t sz)
- * Purpose  : clear some user memory
- * Params   : addr - user memory address to clear
- *          : sz   - number of bytes to clear
- * Returns  : number of bytes NOT cleared
- *
- * Alignment fixed up by hardware.
- */
-SYM_FUNC_START(__arch_clear_user)
-	mov	x2, x1			// save the size for fixup return
-	subs	x1, x1, #8
-	b.mi	2f
-1:
-uao_user_alternative 9f, str, sttr, xzr, x0, 8
-	subs	x1, x1, #8
-	b.pl	1b
-2:	adds	x1, x1, #4
-	b.mi	3f
-uao_user_alternative 9f, str, sttr, wzr, x0, 4
-	sub	x1, x1, #4
-3:	adds	x1, x1, #2
-	b.mi	4f
-uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
-	sub	x1, x1, #2
-4:	adds	x1, x1, #1
-	b.mi	5f
-uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
-5:	mov	x0, #0
-	ret
-SYM_FUNC_END(__arch_clear_user)
-EXPORT_SYMBOL(__arch_clear_user)
-
-	.section .fixup,"ax"
-	.align	2
-9:	mov	x0, x2			// return the original size
-	ret
-	.previous
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
deleted file mode 100644
index 8e25e89ad01fd7daa41065bae7e1c9745dce986b..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/copy_from_user.S
+++ /dev/null
@@ -1,68 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 ARM Ltd.
- */
-
-#include <linux/linkage.h>
-
-#include <asm/asm-uaccess.h>
-#include <asm/assembler.h>
-#include <asm/cache.h>
-
-/*
- * Copy from user space to a kernel buffer (alignment handled by the hardware)
- *
- * Parameters:
- *	x0 - to
- *	x1 - from
- *	x2 - n
- * Returns:
- *	x0 - bytes not copied
- */
-
-	.macro ldrb1 ptr, regB, val
-	uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val
-	.endm
-
-	.macro strb1 ptr, regB, val
-	strb \ptr, [\regB], \val
-	.endm
-
-	.macro ldrh1 ptr, regB, val
-	uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val
-	.endm
-
-	.macro strh1 ptr, regB, val
-	strh \ptr, [\regB], \val
-	.endm
-
-	.macro ldr1 ptr, regB, val
-	uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val
-	.endm
-
-	.macro str1 ptr, regB, val
-	str \ptr, [\regB], \val
-	.endm
-
-	.macro ldp1 ptr, regB, regC, val
-	uao_ldp 9998f, \ptr, \regB, \regC, \val
-	.endm
-
-	.macro stp1 ptr, regB, regC, val
-	stp \ptr, \regB, [\regC], \val
-	.endm
-
-end	.req	x5
-SYM_FUNC_START(__arch_copy_from_user)
-	add	end, x0, x2
-#include "copy_template.S"
-	mov	x0, #0				// Nothing to copy
-	ret
-SYM_FUNC_END(__arch_copy_from_user)
-EXPORT_SYMBOL(__arch_copy_from_user)
-
-	.section .fixup,"ax"
-	.align	2
-9998:	sub	x0, end, dst			// bytes not copied
-	ret
-	.previous
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
deleted file mode 100644
index 667139013ed171ef4b5de1ba916941060858475c..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/copy_in_user.S
+++ /dev/null
@@ -1,70 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copy from user space to user space
- *
- * Copyright (C) 2012 ARM Ltd.
- */
-
-#include <linux/linkage.h>
-
-#include <asm/asm-uaccess.h>
-#include <asm/assembler.h>
-#include <asm/cache.h>
-
-/*
- * Copy from user space to user space (alignment handled by the hardware)
- *
- * Parameters:
- *	x0 - to
- *	x1 - from
- *	x2 - n
- * Returns:
- *	x0 - bytes not copied
- */
-	.macro ldrb1 ptr, regB, val
-	uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val
-	.endm
-
-	.macro strb1 ptr, regB, val
-	uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val
-	.endm
-
-	.macro ldrh1 ptr, regB, val
-	uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val
-	.endm
-
-	.macro strh1 ptr, regB, val
-	uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val
-	.endm
-
-	.macro ldr1 ptr, regB, val
-	uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val
-	.endm
-
-	.macro str1 ptr, regB, val
-	uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val
-	.endm
-
-	.macro ldp1 ptr, regB, regC, val
-	uao_ldp 9998f, \ptr, \regB, \regC, \val
-	.endm
-
-	.macro stp1 ptr, regB, regC, val
-	uao_stp 9998f, \ptr, \regB, \regC, \val
-	.endm
-
-end	.req	x5
-
-SYM_FUNC_START(__arch_copy_in_user)
-	add	end, x0, x2
-#include "copy_template.S"
-	mov	x0, #0
-	ret
-SYM_FUNC_END(__arch_copy_in_user)
-EXPORT_SYMBOL(__arch_copy_in_user)
-
-	.section .fixup,"ax"
-	.align	2
-9998:	sub	x0, end, dst			// bytes not copied
-	ret
-	.previous
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
deleted file mode 100644
index e125a84eb40009c371275b9be7320e690b92d7a8..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/copy_page.S
+++ /dev/null
@@ -1,79 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 ARM Ltd.
- */
-
-#include <linux/linkage.h>
-#include <linux/const.h>
-#include <asm/assembler.h>
-#include <asm/page.h>
-#include <asm/cpufeature.h>
-#include <asm/alternative.h>
-
-/*
- * Copy a page from src to dest (both are page aligned)
- *
- * Parameters:
- *	x0 - dest
- *	x1 - src
- */
-SYM_FUNC_START(copy_page)
-alternative_if ARM64_HAS_NO_HW_PREFETCH
-	// Prefetch three cache lines ahead.
-	prfm	pldl1strm, [x1, #128]
-	prfm	pldl1strm, [x1, #256]
-	prfm	pldl1strm, [x1, #384]
-alternative_else_nop_endif
-
-	ldp	x2, x3, [x1]
-	ldp	x4, x5, [x1, #16]
-	ldp	x6, x7, [x1, #32]
-	ldp	x8, x9, [x1, #48]
-	ldp	x10, x11, [x1, #64]
-	ldp	x12, x13, [x1, #80]
-	ldp	x14, x15, [x1, #96]
-	ldp	x16, x17, [x1, #112]
-
-	mov	x18, #(PAGE_SIZE - 128)
-	add	x1, x1, #128
-1:
-	subs	x18, x18, #128
-
-alternative_if ARM64_HAS_NO_HW_PREFETCH
-	prfm	pldl1strm, [x1, #384]
-alternative_else_nop_endif
-
-	stnp	x2, x3, [x0]
-	ldp	x2, x3, [x1]
-	stnp	x4, x5, [x0, #16]
-	ldp	x4, x5, [x1, #16]
-	stnp	x6, x7, [x0, #32]
-	ldp	x6, x7, [x1, #32]
-	stnp	x8, x9, [x0, #48]
-	ldp	x8, x9, [x1, #48]
-	stnp	x10, x11, [x0, #64]
-	ldp	x10, x11, [x1, #64]
-	stnp	x12, x13, [x0, #80]
-	ldp	x12, x13, [x1, #80]
-	stnp	x14, x15, [x0, #96]
-	ldp	x14, x15, [x1, #96]
-	stnp	x16, x17, [x0, #112]
-	ldp	x16, x17, [x1, #112]
-
-	add	x0, x0, #128
-	add	x1, x1, #128
-
-	b.gt	1b
-
-	stnp	x2, x3, [x0]
-	stnp	x4, x5, [x0, #16]
-	stnp	x6, x7, [x0, #32]
-	stnp	x8, x9, [x0, #48]
-	stnp	x10, x11, [x0, #64]
-	stnp	x12, x13, [x0, #80]
-	stnp	x14, x15, [x0, #96]
-	stnp	x16, x17, [x0, #112]
-
-	ret
-SYM_FUNC_END(copy_page)
-EXPORT_SYMBOL(copy_page)
diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S
deleted file mode 100644
index 488df234c49a2483d0151a463d745af8dfe70e2a..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/copy_template.S
+++ /dev/null
@@ -1,181 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
- *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
- */
-
-
-/*
- * Copy a buffer from src to dest (alignment handled by the hardware)
- *
- * Parameters:
- *	x0 - dest
- *	x1 - src
- *	x2 - n
- * Returns:
- *	x0 - dest
- */
-dstin	.req	x0
-src	.req	x1
-count	.req	x2
-tmp1	.req	x3
-tmp1w	.req	w3
-tmp2	.req	x4
-tmp2w	.req	w4
-dst	.req	x6
-
-A_l	.req	x7
-A_h	.req	x8
-B_l	.req	x9
-B_h	.req	x10
-C_l	.req	x11
-C_h	.req	x12
-D_l	.req	x13
-D_h	.req	x14
-
-	mov	dst, dstin
-	cmp	count, #16
-	/*When memory length is less than 16, the accessed are not aligned.*/
-	b.lo	.Ltiny15
-
-	neg	tmp2, src
-	ands	tmp2, tmp2, #15/* Bytes to reach alignment. */
-	b.eq	.LSrcAligned
-	sub	count, count, tmp2
-	/*
-	* Copy the leading memory data from src to dst in an increasing
-	* address order.By this way,the risk of overwriting the source
-	* memory data is eliminated when the distance between src and
-	* dst is less than 16. The memory accesses here are alignment.
-	*/
-	tbz	tmp2, #0, 1f
-	ldrb1	tmp1w, src, #1
-	strb1	tmp1w, dst, #1
-1:
-	tbz	tmp2, #1, 2f
-	ldrh1	tmp1w, src, #2
-	strh1	tmp1w, dst, #2
-2:
-	tbz	tmp2, #2, 3f
-	ldr1	tmp1w, src, #4
-	str1	tmp1w, dst, #4
-3:
-	tbz	tmp2, #3, .LSrcAligned
-	ldr1	tmp1, src, #8
-	str1	tmp1, dst, #8
-
-.LSrcAligned:
-	cmp	count, #64
-	b.ge	.Lcpy_over64
-	/*
-	* Deal with small copies quickly by dropping straight into the
-	* exit block.
-	*/
-.Ltail63:
-	/*
-	* Copy up to 48 bytes of data. At this point we only need the
-	* bottom 6 bits of count to be accurate.
-	*/
-	ands	tmp1, count, #0x30
-	b.eq	.Ltiny15
-	cmp	tmp1w, #0x20
-	b.eq	1f
-	b.lt	2f
-	ldp1	A_l, A_h, src, #16
-	stp1	A_l, A_h, dst, #16
-1:
-	ldp1	A_l, A_h, src, #16
-	stp1	A_l, A_h, dst, #16
-2:
-	ldp1	A_l, A_h, src, #16
-	stp1	A_l, A_h, dst, #16
-.Ltiny15:
-	/*
-	* Prefer to break one ldp/stp into several load/store to access
-	* memory in an increasing address order,rather than to load/store 16
-	* bytes from (src-16) to (dst-16) and to backward the src to aligned
-	* address,which way is used in original cortex memcpy. If keeping
-	* the original memcpy process here, memmove need to satisfy the
-	* precondition that src address is at least 16 bytes bigger than dst
-	* address,otherwise some source data will be overwritten when memove
-	* call memcpy directly. To make memmove simpler and decouple the
-	* memcpy's dependency on memmove, withdrew the original process.
-	*/
-	tbz	count, #3, 1f
-	ldr1	tmp1, src, #8
-	str1	tmp1, dst, #8
-1:
-	tbz	count, #2, 2f
-	ldr1	tmp1w, src, #4
-	str1	tmp1w, dst, #4
-2:
-	tbz	count, #1, 3f
-	ldrh1	tmp1w, src, #2
-	strh1	tmp1w, dst, #2
-3:
-	tbz	count, #0, .Lexitfunc
-	ldrb1	tmp1w, src, #1
-	strb1	tmp1w, dst, #1
-
-	b	.Lexitfunc
-
-.Lcpy_over64:
-	subs	count, count, #128
-	b.ge	.Lcpy_body_large
-	/*
-	* Less than 128 bytes to copy, so handle 64 here and then jump
-	* to the tail.
-	*/
-	ldp1	A_l, A_h, src, #16
-	stp1	A_l, A_h, dst, #16
-	ldp1	B_l, B_h, src, #16
-	ldp1	C_l, C_h, src, #16
-	stp1	B_l, B_h, dst, #16
-	stp1	C_l, C_h, dst, #16
-	ldp1	D_l, D_h, src, #16
-	stp1	D_l, D_h, dst, #16
-
-	tst	count, #0x3f
-	b.ne	.Ltail63
-	b	.Lexitfunc
-
-	/*
-	* Critical loop.  Start at a new cache line boundary.  Assuming
-	* 64 bytes per line this ensures the entire loop is in one line.
-	*/
-	.p2align	L1_CACHE_SHIFT
-.Lcpy_body_large:
-	/* pre-get 64 bytes data. */
-	ldp1	A_l, A_h, src, #16
-	ldp1	B_l, B_h, src, #16
-	ldp1	C_l, C_h, src, #16
-	ldp1	D_l, D_h, src, #16
-1:
-	/*
-	* interlace the load of next 64 bytes data block with store of the last
-	* loaded 64 bytes data.
-	*/
-	stp1	A_l, A_h, dst, #16
-	ldp1	A_l, A_h, src, #16
-	stp1	B_l, B_h, dst, #16
-	ldp1	B_l, B_h, src, #16
-	stp1	C_l, C_h, dst, #16
-	ldp1	C_l, C_h, src, #16
-	stp1	D_l, D_h, dst, #16
-	ldp1	D_l, D_h, src, #16
-	subs	count, count, #64
-	b.ge	1b
-	stp1	A_l, A_h, dst, #16
-	stp1	B_l, B_h, dst, #16
-	stp1	C_l, C_h, dst, #16
-	stp1	D_l, D_h, dst, #16
-
-	tst	count, #0x3f
-	b.ne	.Ltail63
-.Lexitfunc:
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
deleted file mode 100644
index 1a104d0089f3a4036574bb6d1c5ac9796740dc5b..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/copy_to_user.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 ARM Ltd.
- */
-
-#include <linux/linkage.h>
-
-#include <asm/asm-uaccess.h>
-#include <asm/assembler.h>
-#include <asm/cache.h>
-
-/*
- * Copy to user space from a kernel buffer (alignment handled by the hardware)
- *
- * Parameters:
- *	x0 - to
- *	x1 - from
- *	x2 - n
- * Returns:
- *	x0 - bytes not copied
- */
-	.macro ldrb1 ptr, regB, val
-	ldrb  \ptr, [\regB], \val
-	.endm
-
-	.macro strb1 ptr, regB, val
-	uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val
-	.endm
-
-	.macro ldrh1 ptr, regB, val
-	ldrh  \ptr, [\regB], \val
-	.endm
-
-	.macro strh1 ptr, regB, val
-	uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val
-	.endm
-
-	.macro ldr1 ptr, regB, val
-	ldr \ptr, [\regB], \val
-	.endm
-
-	.macro str1 ptr, regB, val
-	uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val
-	.endm
-
-	.macro ldp1 ptr, regB, regC, val
-	ldp \ptr, \regB, [\regC], \val
-	.endm
-
-	.macro stp1 ptr, regB, regC, val
-	uao_stp 9998f, \ptr, \regB, \regC, \val
-	.endm
-
-end	.req	x5
-SYM_FUNC_START(__arch_copy_to_user)
-	add	end, x0, x2
-#include "copy_template.S"
-	mov	x0, #0
-	ret
-SYM_FUNC_END(__arch_copy_to_user)
-EXPORT_SYMBOL(__arch_copy_to_user)
-
-	.section .fixup,"ax"
-	.align	2
-9998:	sub	x0, end, dst			// bytes not copied
-	ret
-	.previous
diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S
deleted file mode 100644
index 243e107e98963b21552ea2cc2ee52e9fc24026e3..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/crc32.S
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Accelerated CRC32(C) using AArch64 CRC instructions
- *
- * Copyright (C) 2016 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/alternative.h>
-#include <asm/assembler.h>
-
-	.cpu		generic+crc
-
-	.macro		__crc32, c
-	cmp		x2, #16
-	b.lt		8f			// less than 16 bytes
-
-	and		x7, x2, #0x1f
-	and		x2, x2, #~0x1f
-	cbz		x7, 32f			// multiple of 32 bytes
-
-	and		x8, x7, #0xf
-	ldp		x3, x4, [x1]
-	add		x8, x8, x1
-	add		x1, x1, x7
-	ldp		x5, x6, [x8]
-CPU_BE(	rev		x3, x3		)
-CPU_BE(	rev		x4, x4		)
-CPU_BE(	rev		x5, x5		)
-CPU_BE(	rev		x6, x6		)
-
-	tst		x7, #8
-	crc32\c\()x	w8, w0, x3
-	csel		x3, x3, x4, eq
-	csel		w0, w0, w8, eq
-	tst		x7, #4
-	lsr		x4, x3, #32
-	crc32\c\()w	w8, w0, w3
-	csel		x3, x3, x4, eq
-	csel		w0, w0, w8, eq
-	tst		x7, #2
-	lsr		w4, w3, #16
-	crc32\c\()h	w8, w0, w3
-	csel		w3, w3, w4, eq
-	csel		w0, w0, w8, eq
-	tst		x7, #1
-	crc32\c\()b	w8, w0, w3
-	csel		w0, w0, w8, eq
-	tst		x7, #16
-	crc32\c\()x	w8, w0, x5
-	crc32\c\()x	w8, w8, x6
-	csel		w0, w0, w8, eq
-	cbz		x2, 0f
-
-32:	ldp		x3, x4, [x1], #32
-	sub		x2, x2, #32
-	ldp		x5, x6, [x1, #-16]
-CPU_BE(	rev		x3, x3		)
-CPU_BE(	rev		x4, x4		)
-CPU_BE(	rev		x5, x5		)
-CPU_BE(	rev		x6, x6		)
-	crc32\c\()x	w0, w0, x3
-	crc32\c\()x	w0, w0, x4
-	crc32\c\()x	w0, w0, x5
-	crc32\c\()x	w0, w0, x6
-	cbnz		x2, 32b
-0:	ret
-
-8:	tbz		x2, #3, 4f
-	ldr		x3, [x1], #8
-CPU_BE(	rev		x3, x3		)
-	crc32\c\()x	w0, w0, x3
-4:	tbz		x2, #2, 2f
-	ldr		w3, [x1], #4
-CPU_BE(	rev		w3, w3		)
-	crc32\c\()w	w0, w0, w3
-2:	tbz		x2, #1, 1f
-	ldrh		w3, [x1], #2
-CPU_BE(	rev16		w3, w3		)
-	crc32\c\()h	w0, w0, w3
-1:	tbz		x2, #0, 0f
-	ldrb		w3, [x1]
-	crc32\c\()b	w0, w0, w3
-0:	ret
-	.endm
-
-	.align		5
-SYM_FUNC_START(crc32_le)
-alternative_if_not ARM64_HAS_CRC32
-	b		crc32_le_base
-alternative_else_nop_endif
-	__crc32
-SYM_FUNC_END(crc32_le)
-
-	.align		5
-SYM_FUNC_START(__crc32c_le)
-alternative_if_not ARM64_HAS_CRC32
-	b		__crc32c_le_base
-alternative_else_nop_endif
-	__crc32		c
-SYM_FUNC_END(__crc32c_le)
diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S
deleted file mode 100644
index edf6b970a2774374ad79a828eaefd645cf7d2f1e..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/memchr.S
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Based on arch/arm/lib/memchr.S
- *
- * Copyright (C) 1995-2000 Russell King
- * Copyright (C) 2013 ARM Ltd.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * Find a character in an area of memory.
- *
- * Parameters:
- *	x0 - buf
- *	x1 - c
- *	x2 - n
- * Returns:
- *	x0 - address of first occurrence of 'c' or 0
- */
-SYM_FUNC_START_WEAK_PI(memchr)
-	and	w1, w1, #0xff
-1:	subs	x2, x2, #1
-	b.mi	2f
-	ldrb	w3, [x0], #1
-	cmp	w3, w1
-	b.ne	1b
-	sub	x0, x0, #1
-	ret
-2:	mov	x0, #0
-	ret
-SYM_FUNC_END_PI(memchr)
-EXPORT_SYMBOL_NOKASAN(memchr)
diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
deleted file mode 100644
index c0671e793ea9183e5ddc63696d79c8fa2466c2a6..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/memcmp.S
+++ /dev/null
@@ -1,247 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
- *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
-* compare memory areas(when two memory areas' offset are different,
-* alignment handled by the hardware)
-*
-* Parameters:
-*  x0 - const memory area 1 pointer
-*  x1 - const memory area 2 pointer
-*  x2 - the maximal compare byte length
-* Returns:
-*  x0 - a compare result, maybe less than, equal to, or greater than ZERO
-*/
-
-/* Parameters and result.  */
-src1		.req	x0
-src2		.req	x1
-limit		.req	x2
-result		.req	x0
-
-/* Internal variables.  */
-data1		.req	x3
-data1w		.req	w3
-data2		.req	x4
-data2w		.req	w4
-has_nul		.req	x5
-diff		.req	x6
-endloop		.req	x7
-tmp1		.req	x8
-tmp2		.req	x9
-tmp3		.req	x10
-pos		.req	x11
-limit_wd	.req	x12
-mask		.req	x13
-
-SYM_FUNC_START_WEAK_PI(memcmp)
-	cbz	limit, .Lret0
-	eor	tmp1, src1, src2
-	tst	tmp1, #7
-	b.ne	.Lmisaligned8
-	ands	tmp1, src1, #7
-	b.ne	.Lmutual_align
-	sub	limit_wd, limit, #1 /* limit != 0, so no underflow.  */
-	lsr	limit_wd, limit_wd, #3 /* Convert to Dwords.  */
-	/*
-	* The input source addresses are at alignment boundary.
-	* Directly compare eight bytes each time.
-	*/
-.Lloop_aligned:
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-.Lstart_realigned:
-	subs	limit_wd, limit_wd, #1
-	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	csinv	endloop, diff, xzr, cs	/* Last Dword or differences.  */
-	cbz	endloop, .Lloop_aligned
-
-	/* Not reached the limit, must have found a diff.  */
-	tbz	limit_wd, #63, .Lnot_limit
-
-	/* Limit % 8 == 0 => the diff is in the last 8 bytes. */
-	ands	limit, limit, #7
-	b.eq	.Lnot_limit
-	/*
-	* The remained bytes less than 8. It is needed to extract valid data
-	* from last eight bytes of the intended memory range.
-	*/
-	lsl	limit, limit, #3	/* bytes-> bits.  */
-	mov	mask, #~0
-CPU_BE( lsr	mask, mask, limit )
-CPU_LE( lsl	mask, mask, limit )
-	bic	data1, data1, mask
-	bic	data2, data2, mask
-
-	orr	diff, diff, mask
-	b	.Lnot_limit
-
-.Lmutual_align:
-	/*
-	* Sources are mutually aligned, but are not currently at an
-	* alignment boundary. Round down the addresses and then mask off
-	* the bytes that precede the start point.
-	*/
-	bic	src1, src1, #7
-	bic	src2, src2, #7
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-	/*
-	* We can not add limit with alignment offset(tmp1) here. Since the
-	* addition probably make the limit overflown.
-	*/
-	sub	limit_wd, limit, #1/*limit != 0, so no underflow.*/
-	and	tmp3, limit_wd, #7
-	lsr	limit_wd, limit_wd, #3
-	add	tmp3, tmp3, tmp1
-	add	limit_wd, limit_wd, tmp3, lsr #3
-	add	limit, limit, tmp1/* Adjust the limit for the extra.  */
-
-	lsl	tmp1, tmp1, #3/* Bytes beyond alignment -> bits.*/
-	neg	tmp1, tmp1/* Bits to alignment -64.  */
-	mov	tmp2, #~0
-	/*mask off the non-intended bytes before the start address.*/
-CPU_BE( lsl	tmp2, tmp2, tmp1 )/*Big-endian.Early bytes are at MSB*/
-	/* Little-endian.  Early bytes are at LSB.  */
-CPU_LE( lsr	tmp2, tmp2, tmp1 )
-
-	orr	data1, data1, tmp2
-	orr	data2, data2, tmp2
-	b	.Lstart_realigned
-
-	/*src1 and src2 have different alignment offset.*/
-.Lmisaligned8:
-	cmp	limit, #8
-	b.lo	.Ltiny8proc /*limit < 8: compare byte by byte*/
-
-	and	tmp1, src1, #7
-	neg	tmp1, tmp1
-	add	tmp1, tmp1, #8/*valid length in the first 8 bytes of src1*/
-	and	tmp2, src2, #7
-	neg	tmp2, tmp2
-	add	tmp2, tmp2, #8/*valid length in the first 8 bytes of src2*/
-	subs	tmp3, tmp1, tmp2
-	csel	pos, tmp1, tmp2, hi /*Choose the maximum.*/
-
-	sub	limit, limit, pos
-	/*compare the proceeding bytes in the first 8 byte segment.*/
-.Ltinycmp:
-	ldrb	data1w, [src1], #1
-	ldrb	data2w, [src2], #1
-	subs	pos, pos, #1
-	ccmp	data1w, data2w, #0, ne  /* NZCV = 0b0000.  */
-	b.eq	.Ltinycmp
-	cbnz	pos, 1f /*diff occurred before the last byte.*/
-	cmp	data1w, data2w
-	b.eq	.Lstart_align
-1:
-	sub	result, data1, data2
-	ret
-
-.Lstart_align:
-	lsr	limit_wd, limit, #3
-	cbz	limit_wd, .Lremain8
-
-	ands	xzr, src1, #7
-	b.eq	.Lrecal_offset
-	/*process more leading bytes to make src1 aligned...*/
-	add	src1, src1, tmp3 /*backwards src1 to alignment boundary*/
-	add	src2, src2, tmp3
-	sub	limit, limit, tmp3
-	lsr	limit_wd, limit, #3
-	cbz	limit_wd, .Lremain8
-	/*load 8 bytes from aligned SRC1..*/
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-
-	subs	limit_wd, limit_wd, #1
-	eor	diff, data1, data2  /*Non-zero if differences found.*/
-	csinv	endloop, diff, xzr, ne
-	cbnz	endloop, .Lunequal_proc
-	/*How far is the current SRC2 from the alignment boundary...*/
-	and	tmp3, tmp3, #7
-
-.Lrecal_offset:/*src1 is aligned now..*/
-	neg	pos, tmp3
-.Lloopcmp_proc:
-	/*
-	* Divide the eight bytes into two parts. First,backwards the src2
-	* to an alignment boundary,load eight bytes and compare from
-	* the SRC2 alignment boundary. If all 8 bytes are equal,then start
-	* the second part's comparison. Otherwise finish the comparison.
-	* This special handle can garantee all the accesses are in the
-	* thread/task space in avoid to overrange access.
-	*/
-	ldr	data1, [src1,pos]
-	ldr	data2, [src2,pos]
-	eor	diff, data1, data2  /* Non-zero if differences found.  */
-	cbnz	diff, .Lnot_limit
-
-	/*The second part process*/
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-	eor	diff, data1, data2  /* Non-zero if differences found.  */
-	subs	limit_wd, limit_wd, #1
-	csinv	endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
-	cbz	endloop, .Lloopcmp_proc
-.Lunequal_proc:
-	cbz	diff, .Lremain8
-
-/* There is difference occurred in the latest comparison. */
-.Lnot_limit:
-/*
-* For little endian,reverse the low significant equal bits into MSB,then
-* following CLZ can find how many equal bits exist.
-*/
-CPU_LE( rev	diff, diff )
-CPU_LE( rev	data1, data1 )
-CPU_LE( rev	data2, data2 )
-
-	/*
-	* The MS-non-zero bit of DIFF marks either the first bit
-	* that is different, or the end of the significant data.
-	* Shifting left now will bring the critical information into the
-	* top bits.
-	*/
-	clz	pos, diff
-	lsl	data1, data1, pos
-	lsl	data2, data2, pos
-	/*
-	* We need to zero-extend (char is unsigned) the value and then
-	* perform a signed subtraction.
-	*/
-	lsr	data1, data1, #56
-	sub	result, data1, data2, lsr #56
-	ret
-
-.Lremain8:
-	/* Limit % 8 == 0 =>. all data are equal.*/
-	ands	limit, limit, #7
-	b.eq	.Lret0
-
-.Ltiny8proc:
-	ldrb	data1w, [src1], #1
-	ldrb	data2w, [src2], #1
-	subs	limit, limit, #1
-
-	ccmp	data1w, data2w, #0, ne  /* NZCV = 0b0000. */
-	b.eq	.Ltiny8proc
-	sub	result, data1, data2
-	ret
-.Lret0:
-	mov	result, #0
-	ret
-SYM_FUNC_END_PI(memcmp)
-EXPORT_SYMBOL_NOKASAN(memcmp)
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
deleted file mode 100644
index b03cbb3455d4da23413dc91b468efa79396261b7..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/memcpy.S
+++ /dev/null
@@ -1,66 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
- *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/cache.h>
-
-/*
- * Copy a buffer from src to dest (alignment handled by the hardware)
- *
- * Parameters:
- *	x0 - dest
- *	x1 - src
- *	x2 - n
- * Returns:
- *	x0 - dest
- */
-	.macro ldrb1 ptr, regB, val
-	ldrb  \ptr, [\regB], \val
-	.endm
-
-	.macro strb1 ptr, regB, val
-	strb \ptr, [\regB], \val
-	.endm
-
-	.macro ldrh1 ptr, regB, val
-	ldrh  \ptr, [\regB], \val
-	.endm
-
-	.macro strh1 ptr, regB, val
-	strh \ptr, [\regB], \val
-	.endm
-
-	.macro ldr1 ptr, regB, val
-	ldr \ptr, [\regB], \val
-	.endm
-
-	.macro str1 ptr, regB, val
-	str \ptr, [\regB], \val
-	.endm
-
-	.macro ldp1 ptr, regB, regC, val
-	ldp \ptr, \regB, [\regC], \val
-	.endm
-
-	.macro stp1 ptr, regB, regC, val
-	stp \ptr, \regB, [\regC], \val
-	.endm
-
-SYM_FUNC_START_ALIAS(__memcpy)
-SYM_FUNC_START_WEAK_PI(memcpy)
-#include "copy_template.S"
-	ret
-SYM_FUNC_END_PI(memcpy)
-EXPORT_SYMBOL(memcpy)
-SYM_FUNC_END_ALIAS(__memcpy)
-EXPORT_SYMBOL(__memcpy)
diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S
deleted file mode 100644
index 1035dce4bdaf42572708d94412420ee4c37253fc..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/memmove.S
+++ /dev/null
@@ -1,189 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
- *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/cache.h>
-
-/*
- * Move a buffer from src to test (alignment handled by the hardware).
- * If dest <= src, call memcpy, otherwise copy in reverse order.
- *
- * Parameters:
- *	x0 - dest
- *	x1 - src
- *	x2 - n
- * Returns:
- *	x0 - dest
- */
-dstin	.req	x0
-src	.req	x1
-count	.req	x2
-tmp1	.req	x3
-tmp1w	.req	w3
-tmp2	.req	x4
-tmp2w	.req	w4
-tmp3	.req	x5
-tmp3w	.req	w5
-dst	.req	x6
-
-A_l	.req	x7
-A_h	.req	x8
-B_l	.req	x9
-B_h	.req	x10
-C_l	.req	x11
-C_h	.req	x12
-D_l	.req	x13
-D_h	.req	x14
-
-SYM_FUNC_START_ALIAS(__memmove)
-SYM_FUNC_START_WEAK_PI(memmove)
-	cmp	dstin, src
-	b.lo	__memcpy
-	add	tmp1, src, count
-	cmp	dstin, tmp1
-	b.hs	__memcpy		/* No overlap.  */
-
-	add	dst, dstin, count
-	add	src, src, count
-	cmp	count, #16
-	b.lo	.Ltail15  /*probably non-alignment accesses.*/
-
-	ands	tmp2, src, #15     /* Bytes to reach alignment.  */
-	b.eq	.LSrcAligned
-	sub	count, count, tmp2
-	/*
-	* process the aligned offset length to make the src aligned firstly.
-	* those extra instructions' cost is acceptable. It also make the
-	* coming accesses are based on aligned address.
-	*/
-	tbz	tmp2, #0, 1f
-	ldrb	tmp1w, [src, #-1]!
-	strb	tmp1w, [dst, #-1]!
-1:
-	tbz	tmp2, #1, 2f
-	ldrh	tmp1w, [src, #-2]!
-	strh	tmp1w, [dst, #-2]!
-2:
-	tbz	tmp2, #2, 3f
-	ldr	tmp1w, [src, #-4]!
-	str	tmp1w, [dst, #-4]!
-3:
-	tbz	tmp2, #3, .LSrcAligned
-	ldr	tmp1, [src, #-8]!
-	str	tmp1, [dst, #-8]!
-
-.LSrcAligned:
-	cmp	count, #64
-	b.ge	.Lcpy_over64
-
-	/*
-	* Deal with small copies quickly by dropping straight into the
-	* exit block.
-	*/
-.Ltail63:
-	/*
-	* Copy up to 48 bytes of data. At this point we only need the
-	* bottom 6 bits of count to be accurate.
-	*/
-	ands	tmp1, count, #0x30
-	b.eq	.Ltail15
-	cmp	tmp1w, #0x20
-	b.eq	1f
-	b.lt	2f
-	ldp	A_l, A_h, [src, #-16]!
-	stp	A_l, A_h, [dst, #-16]!
-1:
-	ldp	A_l, A_h, [src, #-16]!
-	stp	A_l, A_h, [dst, #-16]!
-2:
-	ldp	A_l, A_h, [src, #-16]!
-	stp	A_l, A_h, [dst, #-16]!
-
-.Ltail15:
-	tbz	count, #3, 1f
-	ldr	tmp1, [src, #-8]!
-	str	tmp1, [dst, #-8]!
-1:
-	tbz	count, #2, 2f
-	ldr	tmp1w, [src, #-4]!
-	str	tmp1w, [dst, #-4]!
-2:
-	tbz	count, #1, 3f
-	ldrh	tmp1w, [src, #-2]!
-	strh	tmp1w, [dst, #-2]!
-3:
-	tbz	count, #0, .Lexitfunc
-	ldrb	tmp1w, [src, #-1]
-	strb	tmp1w, [dst, #-1]
-
-.Lexitfunc:
-	ret
-
-.Lcpy_over64:
-	subs	count, count, #128
-	b.ge	.Lcpy_body_large
-	/*
-	* Less than 128 bytes to copy, so handle 64 bytes here and then jump
-	* to the tail.
-	*/
-	ldp	A_l, A_h, [src, #-16]
-	stp	A_l, A_h, [dst, #-16]
-	ldp	B_l, B_h, [src, #-32]
-	ldp	C_l, C_h, [src, #-48]
-	stp	B_l, B_h, [dst, #-32]
-	stp	C_l, C_h, [dst, #-48]
-	ldp	D_l, D_h, [src, #-64]!
-	stp	D_l, D_h, [dst, #-64]!
-
-	tst	count, #0x3f
-	b.ne	.Ltail63
-	ret
-
-	/*
-	* Critical loop. Start at a new cache line boundary. Assuming
-	* 64 bytes per line this ensures the entire loop is in one line.
-	*/
-	.p2align	L1_CACHE_SHIFT
-.Lcpy_body_large:
-	/* pre-load 64 bytes data. */
-	ldp	A_l, A_h, [src, #-16]
-	ldp	B_l, B_h, [src, #-32]
-	ldp	C_l, C_h, [src, #-48]
-	ldp	D_l, D_h, [src, #-64]!
-1:
-	/*
-	* interlace the load of next 64 bytes data block with store of the last
-	* loaded 64 bytes data.
-	*/
-	stp	A_l, A_h, [dst, #-16]
-	ldp	A_l, A_h, [src, #-16]
-	stp	B_l, B_h, [dst, #-32]
-	ldp	B_l, B_h, [src, #-32]
-	stp	C_l, C_h, [dst, #-48]
-	ldp	C_l, C_h, [src, #-48]
-	stp	D_l, D_h, [dst, #-64]!
-	ldp	D_l, D_h, [src, #-64]!
-	subs	count, count, #64
-	b.ge	1b
-	stp	A_l, A_h, [dst, #-16]
-	stp	B_l, B_h, [dst, #-32]
-	stp	C_l, C_h, [dst, #-48]
-	stp	D_l, D_h, [dst, #-64]!
-
-	tst	count, #0x3f
-	b.ne	.Ltail63
-	ret
-SYM_FUNC_END_PI(memmove)
-EXPORT_SYMBOL(memmove)
-SYM_FUNC_END_ALIAS(__memmove)
-EXPORT_SYMBOL(__memmove)
diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S
deleted file mode 100644
index a9c1c9a01ea906954953c6dce74d4c3e482328da..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/memset.S
+++ /dev/null
@@ -1,208 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
- *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/cache.h>
-
-/*
- * Fill in the buffer with character c (alignment handled by the hardware)
- *
- * Parameters:
- *	x0 - buf
- *	x1 - c
- *	x2 - n
- * Returns:
- *	x0 - buf
- */
-
-dstin		.req	x0
-val		.req	w1
-count		.req	x2
-tmp1		.req	x3
-tmp1w		.req	w3
-tmp2		.req	x4
-tmp2w		.req	w4
-zva_len_x	.req	x5
-zva_len		.req	w5
-zva_bits_x	.req	x6
-
-A_l		.req	x7
-A_lw		.req	w7
-dst		.req	x8
-tmp3w		.req	w9
-tmp3		.req	x9
-
-SYM_FUNC_START_ALIAS(__memset)
-SYM_FUNC_START_WEAK_PI(memset)
-	mov	dst, dstin	/* Preserve return value.  */
-	and	A_lw, val, #255
-	orr	A_lw, A_lw, A_lw, lsl #8
-	orr	A_lw, A_lw, A_lw, lsl #16
-	orr	A_l, A_l, A_l, lsl #32
-
-	cmp	count, #15
-	b.hi	.Lover16_proc
-	/*All store maybe are non-aligned..*/
-	tbz	count, #3, 1f
-	str	A_l, [dst], #8
-1:
-	tbz	count, #2, 2f
-	str	A_lw, [dst], #4
-2:
-	tbz	count, #1, 3f
-	strh	A_lw, [dst], #2
-3:
-	tbz	count, #0, 4f
-	strb	A_lw, [dst]
-4:
-	ret
-
-.Lover16_proc:
-	/*Whether  the start address is aligned with 16.*/
-	neg	tmp2, dst
-	ands	tmp2, tmp2, #15
-	b.eq	.Laligned
-/*
-* The count is not less than 16, we can use stp to store the start 16 bytes,
-* then adjust the dst aligned with 16.This process will make the current
-* memory address at alignment boundary.
-*/
-	stp	A_l, A_l, [dst] /*non-aligned store..*/
-	/*make the dst aligned..*/
-	sub	count, count, tmp2
-	add	dst, dst, tmp2
-
-.Laligned:
-	cbz	A_l, .Lzero_mem
-
-.Ltail_maybe_long:
-	cmp	count, #64
-	b.ge	.Lnot_short
-.Ltail63:
-	ands	tmp1, count, #0x30
-	b.eq	3f
-	cmp	tmp1w, #0x20
-	b.eq	1f
-	b.lt	2f
-	stp	A_l, A_l, [dst], #16
-1:
-	stp	A_l, A_l, [dst], #16
-2:
-	stp	A_l, A_l, [dst], #16
-/*
-* The last store length is less than 16,use stp to write last 16 bytes.
-* It will lead some bytes written twice and the access is non-aligned.
-*/
-3:
-	ands	count, count, #15
-	cbz	count, 4f
-	add	dst, dst, count
-	stp	A_l, A_l, [dst, #-16]	/* Repeat some/all of last store. */
-4:
-	ret
-
-	/*
-	* Critical loop. Start at a new cache line boundary. Assuming
-	* 64 bytes per line, this ensures the entire loop is in one line.
-	*/
-	.p2align	L1_CACHE_SHIFT
-.Lnot_short:
-	sub	dst, dst, #16/* Pre-bias.  */
-	sub	count, count, #64
-1:
-	stp	A_l, A_l, [dst, #16]
-	stp	A_l, A_l, [dst, #32]
-	stp	A_l, A_l, [dst, #48]
-	stp	A_l, A_l, [dst, #64]!
-	subs	count, count, #64
-	b.ge	1b
-	tst	count, #0x3f
-	add	dst, dst, #16
-	b.ne	.Ltail63
-.Lexitfunc:
-	ret
-
-	/*
-	* For zeroing memory, check to see if we can use the ZVA feature to
-	* zero entire 'cache' lines.
-	*/
-.Lzero_mem:
-	cmp	count, #63
-	b.le	.Ltail63
-	/*
-	* For zeroing small amounts of memory, it's not worth setting up
-	* the line-clear code.
-	*/
-	cmp	count, #128
-	b.lt	.Lnot_short /*count is at least  128 bytes*/
-
-	mrs	tmp1, dczid_el0
-	tbnz	tmp1, #4, .Lnot_short
-	mov	tmp3w, #4
-	and	zva_len, tmp1w, #15	/* Safety: other bits reserved.  */
-	lsl	zva_len, tmp3w, zva_len
-
-	ands	tmp3w, zva_len, #63
-	/*
-	* ensure the zva_len is not less than 64.
-	* It is not meaningful to use ZVA if the block size is less than 64.
-	*/
-	b.ne	.Lnot_short
-.Lzero_by_line:
-	/*
-	* Compute how far we need to go to become suitably aligned. We're
-	* already at quad-word alignment.
-	*/
-	cmp	count, zva_len_x
-	b.lt	.Lnot_short		/* Not enough to reach alignment.  */
-	sub	zva_bits_x, zva_len_x, #1
-	neg	tmp2, dst
-	ands	tmp2, tmp2, zva_bits_x
-	b.eq	2f			/* Already aligned.  */
-	/* Not aligned, check that there's enough to copy after alignment.*/
-	sub	tmp1, count, tmp2
-	/*
-	* grantee the remain length to be ZVA is bigger than 64,
-	* avoid to make the 2f's process over mem range.*/
-	cmp	tmp1, #64
-	ccmp	tmp1, zva_len_x, #8, ge	/* NZCV=0b1000 */
-	b.lt	.Lnot_short
-	/*
-	* We know that there's at least 64 bytes to zero and that it's safe
-	* to overrun by 64 bytes.
-	*/
-	mov	count, tmp1
-1:
-	stp	A_l, A_l, [dst]
-	stp	A_l, A_l, [dst, #16]
-	stp	A_l, A_l, [dst, #32]
-	subs	tmp2, tmp2, #64
-	stp	A_l, A_l, [dst, #48]
-	add	dst, dst, #64
-	b.ge	1b
-	/* We've overrun a bit, so adjust dst downwards.*/
-	add	dst, dst, tmp2
-2:
-	sub	count, count, zva_len_x
-3:
-	dc	zva, dst
-	add	dst, dst, zva_len_x
-	subs	count, count, zva_len_x
-	b.ge	3b
-	ands	count, count, zva_bits_x
-	b.ne	.Ltail_maybe_long
-	ret
-SYM_FUNC_END_PI(memset)
-EXPORT_SYMBOL(memset)
-SYM_FUNC_END_ALIAS(__memset)
-EXPORT_SYMBOL(__memset)
diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S
deleted file mode 100644
index 1f47eae3b0d6d618d24c347db7c2da9ffce98068..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/strchr.S
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Based on arch/arm/lib/strchr.S
- *
- * Copyright (C) 1995-2000 Russell King
- * Copyright (C) 2013 ARM Ltd.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * Find the first occurrence of a character in a string.
- *
- * Parameters:
- *	x0 - str
- *	x1 - c
- * Returns:
- *	x0 - address of first occurrence of 'c' or 0
- */
-SYM_FUNC_START_WEAK(strchr)
-	and	w1, w1, #0xff
-1:	ldrb	w2, [x0], #1
-	cmp	w2, w1
-	ccmp	w2, wzr, #4, ne
-	b.ne	1b
-	sub	x0, x0, #1
-	cmp	w2, w1
-	csel	x0, x0, xzr, eq
-	ret
-SYM_FUNC_END(strchr)
-EXPORT_SYMBOL_NOKASAN(strchr)
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
deleted file mode 100644
index 4767540d1b94ed4bacb2903cbf3904d376c4f5d9..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/strcmp.S
+++ /dev/null
@@ -1,223 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
- *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * compare two strings
- *
- * Parameters:
- *	x0 - const string 1 pointer
- *    x1 - const string 2 pointer
- * Returns:
- * x0 - an integer less than, equal to, or greater than zero
- * if  s1  is  found, respectively, to be less than, to match,
- * or be greater than s2.
- */
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
-/* Parameters and result.  */
-src1		.req	x0
-src2		.req	x1
-result		.req	x0
-
-/* Internal variables.  */
-data1		.req	x2
-data1w		.req	w2
-data2		.req	x3
-data2w		.req	w3
-has_nul		.req	x4
-diff		.req	x5
-syndrome	.req	x6
-tmp1		.req	x7
-tmp2		.req	x8
-tmp3		.req	x9
-zeroones	.req	x10
-pos		.req	x11
-
-SYM_FUNC_START_WEAK_PI(strcmp)
-	eor	tmp1, src1, src2
-	mov	zeroones, #REP8_01
-	tst	tmp1, #7
-	b.ne	.Lmisaligned8
-	ands	tmp1, src1, #7
-	b.ne	.Lmutual_align
-
-	/*
-	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
-	* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
-	* can be done in parallel across the entire word.
-	*/
-.Lloop_aligned:
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-.Lstart_realigned:
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
-	orr	syndrome, diff, has_nul
-	cbz	syndrome, .Lloop_aligned
-	b	.Lcal_cmpresult
-
-.Lmutual_align:
-	/*
-	* Sources are mutually aligned, but are not currently at an
-	* alignment boundary.  Round down the addresses and then mask off
-	* the bytes that preceed the start point.
-	*/
-	bic	src1, src1, #7
-	bic	src2, src2, #7
-	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
-	ldr	data1, [src1], #8
-	neg	tmp1, tmp1		/* Bits to alignment -64.  */
-	ldr	data2, [src2], #8
-	mov	tmp2, #~0
-	/* Big-endian.  Early bytes are at MSB.  */
-CPU_BE( lsl	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
-	/* Little-endian.  Early bytes are at LSB.  */
-CPU_LE( lsr	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
-
-	orr	data1, data1, tmp2
-	orr	data2, data2, tmp2
-	b	.Lstart_realigned
-
-.Lmisaligned8:
-	/*
-	* Get the align offset length to compare per byte first.
-	* After this process, one string's address will be aligned.
-	*/
-	and	tmp1, src1, #7
-	neg	tmp1, tmp1
-	add	tmp1, tmp1, #8
-	and	tmp2, src2, #7
-	neg	tmp2, tmp2
-	add	tmp2, tmp2, #8
-	subs	tmp3, tmp1, tmp2
-	csel	pos, tmp1, tmp2, hi /*Choose the maximum. */
-.Ltinycmp:
-	ldrb	data1w, [src1], #1
-	ldrb	data2w, [src2], #1
-	subs	pos, pos, #1
-	ccmp	data1w, #1, #0, ne  /* NZCV = 0b0000.  */
-	ccmp	data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
-	b.eq	.Ltinycmp
-	cbnz	pos, 1f /*find the null or unequal...*/
-	cmp	data1w, #1
-	ccmp	data1w, data2w, #0, cs
-	b.eq	.Lstart_align /*the last bytes are equal....*/
-1:
-	sub	result, data1, data2
-	ret
-
-.Lstart_align:
-	ands	xzr, src1, #7
-	b.eq	.Lrecal_offset
-	/*process more leading bytes to make str1 aligned...*/
-	add	src1, src1, tmp3
-	add	src2, src2, tmp3
-	/*load 8 bytes from aligned str1 and non-aligned str2..*/
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	bic	has_nul, tmp1, tmp2
-	eor	diff, data1, data2 /* Non-zero if differences found.  */
-	orr	syndrome, diff, has_nul
-	cbnz	syndrome, .Lcal_cmpresult
-	/*How far is the current str2 from the alignment boundary...*/
-	and	tmp3, tmp3, #7
-.Lrecal_offset:
-	neg	pos, tmp3
-.Lloopcmp_proc:
-	/*
-	* Divide the eight bytes into two parts. First,backwards the src2
-	* to an alignment boundary,load eight bytes from the SRC2 alignment
-	* boundary,then compare with the relative bytes from SRC1.
-	* If all 8 bytes are equal,then start the second part's comparison.
-	* Otherwise finish the comparison.
-	* This special handle can garantee all the accesses are in the
-	* thread/task space in avoid to overrange access.
-	*/
-	ldr	data1, [src1,pos]
-	ldr	data2, [src2,pos]
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	bic	has_nul, tmp1, tmp2
-	eor	diff, data1, data2  /* Non-zero if differences found.  */
-	orr	syndrome, diff, has_nul
-	cbnz	syndrome, .Lcal_cmpresult
-
-	/*The second part process*/
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	bic	has_nul, tmp1, tmp2
-	eor	diff, data1, data2  /* Non-zero if differences found.  */
-	orr	syndrome, diff, has_nul
-	cbz	syndrome, .Lloopcmp_proc
-
-.Lcal_cmpresult:
-	/*
-	* reversed the byte-order as big-endian,then CLZ can find the most
-	* significant zero bits.
-	*/
-CPU_LE( rev	syndrome, syndrome )
-CPU_LE( rev	data1, data1 )
-CPU_LE( rev	data2, data2 )
-
-	/*
-	* For big-endian we cannot use the trick with the syndrome value
-	* as carry-propagation can corrupt the upper bits if the trailing
-	* bytes in the string contain 0x01.
-	* However, if there is no NUL byte in the dword, we can generate
-	* the result directly.  We ca not just subtract the bytes as the
-	* MSB might be significant.
-	*/
-CPU_BE( cbnz	has_nul, 1f )
-CPU_BE( cmp	data1, data2 )
-CPU_BE( cset	result, ne )
-CPU_BE( cneg	result, result, lo )
-CPU_BE( ret )
-CPU_BE( 1: )
-	/*Re-compute the NUL-byte detection, using a byte-reversed value. */
-CPU_BE(	rev	tmp3, data1 )
-CPU_BE(	sub	tmp1, tmp3, zeroones )
-CPU_BE(	orr	tmp2, tmp3, #REP8_7f )
-CPU_BE(	bic	has_nul, tmp1, tmp2 )
-CPU_BE(	rev	has_nul, has_nul )
-CPU_BE(	orr	syndrome, diff, has_nul )
-
-	clz	pos, syndrome
-	/*
-	* The MS-non-zero bit of the syndrome marks either the first bit
-	* that is different, or the top bit of the first zero byte.
-	* Shifting left now will bring the critical information into the
-	* top bits.
-	*/
-	lsl	data1, data1, pos
-	lsl	data2, data2, pos
-	/*
-	* But we need to zero-extend (char is unsigned) the value and then
-	* perform a signed 32-bit subtraction.
-	*/
-	lsr	data1, data1, #56
-	sub	result, data1, data2, lsr #56
-	ret
-SYM_FUNC_END_PI(strcmp)
-EXPORT_SYMBOL_NOKASAN(strcmp)
diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S
deleted file mode 100644
index ee3ed882dd79fbfd4aecdbd6c4b5e1948603011b..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/strlen.S
+++ /dev/null
@@ -1,115 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
- *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * calculate the length of a string
- *
- * Parameters:
- *	x0 - const string pointer
- * Returns:
- *	x0 - the return length of specific string
- */
-
-/* Arguments and results.  */
-srcin		.req	x0
-len		.req	x0
-
-/* Locals and temporaries.  */
-src		.req	x1
-data1		.req	x2
-data2		.req	x3
-data2a		.req	x4
-has_nul1	.req	x5
-has_nul2	.req	x6
-tmp1		.req	x7
-tmp2		.req	x8
-tmp3		.req	x9
-tmp4		.req	x10
-zeroones	.req	x11
-pos		.req	x12
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
-SYM_FUNC_START_WEAK_PI(strlen)
-	mov	zeroones, #REP8_01
-	bic	src, srcin, #15
-	ands	tmp1, srcin, #15
-	b.ne	.Lmisaligned
-	/*
-	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
-	* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
-	* can be done in parallel across the entire word.
-	*/
-	/*
-	* The inner loop deals with two Dwords at a time. This has a
-	* slightly higher start-up cost, but we should win quite quickly,
-	* especially on cores with a high number of issue slots per
-	* cycle, as we get much better parallelism out of the operations.
-	*/
-.Lloop:
-	ldp	data1, data2, [src], #16
-.Lrealigned:
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	sub	tmp3, data2, zeroones
-	orr	tmp4, data2, #REP8_7f
-	bic	has_nul1, tmp1, tmp2
-	bics	has_nul2, tmp3, tmp4
-	ccmp	has_nul1, #0, #0, eq	/* NZCV = 0000  */
-	b.eq	.Lloop
-
-	sub	len, src, srcin
-	cbz	has_nul1, .Lnul_in_data2
-CPU_BE(	mov	data2, data1 )	/*prepare data to re-calculate the syndrome*/
-	sub	len, len, #8
-	mov	has_nul2, has_nul1
-.Lnul_in_data2:
-	/*
-	* For big-endian, carry propagation (if the final byte in the
-	* string is 0x01) means we cannot use has_nul directly.  The
-	* easiest way to get the correct byte is to byte-swap the data
-	* and calculate the syndrome a second time.
-	*/
-CPU_BE( rev	data2, data2 )
-CPU_BE( sub	tmp1, data2, zeroones )
-CPU_BE( orr	tmp2, data2, #REP8_7f )
-CPU_BE( bic	has_nul2, tmp1, tmp2 )
-
-	sub	len, len, #8
-	rev	has_nul2, has_nul2
-	clz	pos, has_nul2
-	add	len, len, pos, lsr #3		/* Bits to bytes.  */
-	ret
-
-.Lmisaligned:
-	cmp	tmp1, #8
-	neg	tmp1, tmp1
-	ldp	data1, data2, [src], #16
-	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
-	mov	tmp2, #~0
-	/* Big-endian.  Early bytes are at MSB.  */
-CPU_BE( lsl	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
-	/* Little-endian.  Early bytes are at LSB.  */
-CPU_LE( lsr	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
-
-	orr	data1, data1, tmp2
-	orr	data2a, data2, tmp2
-	csinv	data1, data1, xzr, le
-	csel	data2, data2, data2a, le
-	b	.Lrealigned
-SYM_FUNC_END_PI(strlen)
-EXPORT_SYMBOL_NOKASAN(strlen)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
deleted file mode 100644
index 2a7ee949ed4714fd376a0913e6cc66bced6391e5..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/strncmp.S
+++ /dev/null
@@ -1,299 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
- *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * compare two strings
- *
- * Parameters:
- *  x0 - const string 1 pointer
- *  x1 - const string 2 pointer
- *  x2 - the maximal length to be compared
- * Returns:
- *  x0 - an integer less than, equal to, or greater than zero if s1 is found,
- *     respectively, to be less than, to match, or be greater than s2.
- */
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
-/* Parameters and result.  */
-src1		.req	x0
-src2		.req	x1
-limit		.req	x2
-result		.req	x0
-
-/* Internal variables.  */
-data1		.req	x3
-data1w		.req	w3
-data2		.req	x4
-data2w		.req	w4
-has_nul		.req	x5
-diff		.req	x6
-syndrome	.req	x7
-tmp1		.req	x8
-tmp2		.req	x9
-tmp3		.req	x10
-zeroones	.req	x11
-pos		.req	x12
-limit_wd	.req	x13
-mask		.req	x14
-endloop		.req	x15
-
-SYM_FUNC_START_WEAK_PI(strncmp)
-	cbz	limit, .Lret0
-	eor	tmp1, src1, src2
-	mov	zeroones, #REP8_01
-	tst	tmp1, #7
-	b.ne	.Lmisaligned8
-	ands	tmp1, src1, #7
-	b.ne	.Lmutual_align
-	/* Calculate the number of full and partial words -1.  */
-	/*
-	* when limit is mulitply of 8, if not sub 1,
-	* the judgement of last dword will wrong.
-	*/
-	sub	limit_wd, limit, #1 /* limit != 0, so no underflow.  */
-	lsr	limit_wd, limit_wd, #3  /* Convert to Dwords.  */
-
-	/*
-	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
-	* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
-	* can be done in parallel across the entire word.
-	*/
-.Lloop_aligned:
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-.Lstart_realigned:
-	subs	limit_wd, limit_wd, #1
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2  /* Non-zero if differences found.  */
-	csinv	endloop, diff, xzr, pl  /* Last Dword or differences.*/
-	bics	has_nul, tmp1, tmp2 /* Non-zero if NUL terminator.  */
-	ccmp	endloop, #0, #0, eq
-	b.eq	.Lloop_aligned
-
-	/*Not reached the limit, must have found the end or a diff.  */
-	tbz	limit_wd, #63, .Lnot_limit
-
-	/* Limit % 8 == 0 => all bytes significant.  */
-	ands	limit, limit, #7
-	b.eq	.Lnot_limit
-
-	lsl	limit, limit, #3    /* Bits -> bytes.  */
-	mov	mask, #~0
-CPU_BE( lsr	mask, mask, limit )
-CPU_LE( lsl	mask, mask, limit )
-	bic	data1, data1, mask
-	bic	data2, data2, mask
-
-	/* Make sure that the NUL byte is marked in the syndrome.  */
-	orr	has_nul, has_nul, mask
-
-.Lnot_limit:
-	orr	syndrome, diff, has_nul
-	b	.Lcal_cmpresult
-
-.Lmutual_align:
-	/*
-	* Sources are mutually aligned, but are not currently at an
-	* alignment boundary.  Round down the addresses and then mask off
-	* the bytes that precede the start point.
-	* We also need to adjust the limit calculations, but without
-	* overflowing if the limit is near ULONG_MAX.
-	*/
-	bic	src1, src1, #7
-	bic	src2, src2, #7
-	ldr	data1, [src1], #8
-	neg	tmp3, tmp1, lsl #3  /* 64 - bits(bytes beyond align). */
-	ldr	data2, [src2], #8
-	mov	tmp2, #~0
-	sub	limit_wd, limit, #1 /* limit != 0, so no underflow.  */
-	/* Big-endian.  Early bytes are at MSB.  */
-CPU_BE( lsl	tmp2, tmp2, tmp3 )	/* Shift (tmp1 & 63).  */
-	/* Little-endian.  Early bytes are at LSB.  */
-CPU_LE( lsr	tmp2, tmp2, tmp3 )	/* Shift (tmp1 & 63).  */
-
-	and	tmp3, limit_wd, #7
-	lsr	limit_wd, limit_wd, #3
-	/* Adjust the limit. Only low 3 bits used, so overflow irrelevant.*/
-	add	limit, limit, tmp1
-	add	tmp3, tmp3, tmp1
-	orr	data1, data1, tmp2
-	orr	data2, data2, tmp2
-	add	limit_wd, limit_wd, tmp3, lsr #3
-	b	.Lstart_realigned
-
-/*when src1 offset is not equal to src2 offset...*/
-.Lmisaligned8:
-	cmp	limit, #8
-	b.lo	.Ltiny8proc /*limit < 8... */
-	/*
-	* Get the align offset length to compare per byte first.
-	* After this process, one string's address will be aligned.*/
-	and	tmp1, src1, #7
-	neg	tmp1, tmp1
-	add	tmp1, tmp1, #8
-	and	tmp2, src2, #7
-	neg	tmp2, tmp2
-	add	tmp2, tmp2, #8
-	subs	tmp3, tmp1, tmp2
-	csel	pos, tmp1, tmp2, hi /*Choose the maximum. */
-	/*
-	* Here, limit is not less than 8, so directly run .Ltinycmp
-	* without checking the limit.*/
-	sub	limit, limit, pos
-.Ltinycmp:
-	ldrb	data1w, [src1], #1
-	ldrb	data2w, [src2], #1
-	subs	pos, pos, #1
-	ccmp	data1w, #1, #0, ne  /* NZCV = 0b0000.  */
-	ccmp	data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
-	b.eq	.Ltinycmp
-	cbnz	pos, 1f /*find the null or unequal...*/
-	cmp	data1w, #1
-	ccmp	data1w, data2w, #0, cs
-	b.eq	.Lstart_align /*the last bytes are equal....*/
-1:
-	sub	result, data1, data2
-	ret
-
-.Lstart_align:
-	lsr	limit_wd, limit, #3
-	cbz	limit_wd, .Lremain8
-	/*process more leading bytes to make str1 aligned...*/
-	ands	xzr, src1, #7
-	b.eq	.Lrecal_offset
-	add	src1, src1, tmp3	/*tmp3 is positive in this branch.*/
-	add	src2, src2, tmp3
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-
-	sub	limit, limit, tmp3
-	lsr	limit_wd, limit, #3
-	subs	limit_wd, limit_wd, #1
-
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2  /* Non-zero if differences found.  */
-	csinv	endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
-	bics	has_nul, tmp1, tmp2
-	ccmp	endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
-	b.ne	.Lunequal_proc
-	/*How far is the current str2 from the alignment boundary...*/
-	and	tmp3, tmp3, #7
-.Lrecal_offset:
-	neg	pos, tmp3
-.Lloopcmp_proc:
-	/*
-	* Divide the eight bytes into two parts. First,backwards the src2
-	* to an alignment boundary,load eight bytes from the SRC2 alignment
-	* boundary,then compare with the relative bytes from SRC1.
-	* If all 8 bytes are equal,then start the second part's comparison.
-	* Otherwise finish the comparison.
-	* This special handle can garantee all the accesses are in the
-	* thread/task space in avoid to overrange access.
-	*/
-	ldr	data1, [src1,pos]
-	ldr	data2, [src2,pos]
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	bics	has_nul, tmp1, tmp2 /* Non-zero if NUL terminator.  */
-	eor	diff, data1, data2  /* Non-zero if differences found.  */
-	csinv	endloop, diff, xzr, eq
-	cbnz	endloop, .Lunequal_proc
-
-	/*The second part process*/
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-	subs	limit_wd, limit_wd, #1
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2  /* Non-zero if differences found.  */
-	csinv	endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
-	bics	has_nul, tmp1, tmp2
-	ccmp	endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
-	b.eq	.Lloopcmp_proc
-
-.Lunequal_proc:
-	orr	syndrome, diff, has_nul
-	cbz	syndrome, .Lremain8
-.Lcal_cmpresult:
-	/*
-	* reversed the byte-order as big-endian,then CLZ can find the most
-	* significant zero bits.
-	*/
-CPU_LE( rev	syndrome, syndrome )
-CPU_LE( rev	data1, data1 )
-CPU_LE( rev	data2, data2 )
-	/*
-	* For big-endian we cannot use the trick with the syndrome value
-	* as carry-propagation can corrupt the upper bits if the trailing
-	* bytes in the string contain 0x01.
-	* However, if there is no NUL byte in the dword, we can generate
-	* the result directly.  We can't just subtract the bytes as the
-	* MSB might be significant.
-	*/
-CPU_BE( cbnz	has_nul, 1f )
-CPU_BE( cmp	data1, data2 )
-CPU_BE( cset	result, ne )
-CPU_BE( cneg	result, result, lo )
-CPU_BE( ret )
-CPU_BE( 1: )
-	/* Re-compute the NUL-byte detection, using a byte-reversed value.*/
-CPU_BE( rev	tmp3, data1 )
-CPU_BE( sub	tmp1, tmp3, zeroones )
-CPU_BE( orr	tmp2, tmp3, #REP8_7f )
-CPU_BE( bic	has_nul, tmp1, tmp2 )
-CPU_BE( rev	has_nul, has_nul )
-CPU_BE( orr	syndrome, diff, has_nul )
-	/*
-	* The MS-non-zero bit of the syndrome marks either the first bit
-	* that is different, or the top bit of the first zero byte.
-	* Shifting left now will bring the critical information into the
-	* top bits.
-	*/
-	clz	pos, syndrome
-	lsl	data1, data1, pos
-	lsl	data2, data2, pos
-	/*
-	* But we need to zero-extend (char is unsigned) the value and then
-	* perform a signed 32-bit subtraction.
-	*/
-	lsr	data1, data1, #56
-	sub	result, data1, data2, lsr #56
-	ret
-
-.Lremain8:
-	/* Limit % 8 == 0 => all bytes significant.  */
-	ands	limit, limit, #7
-	b.eq	.Lret0
-.Ltiny8proc:
-	ldrb	data1w, [src1], #1
-	ldrb	data2w, [src2], #1
-	subs	limit, limit, #1
-
-	ccmp	data1w, #1, #0, ne  /* NZCV = 0b0000.  */
-	ccmp	data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
-	b.eq	.Ltiny8proc
-	sub	result, data1, data2
-	ret
-
-.Lret0:
-	mov	result, #0
-	ret
-SYM_FUNC_END_PI(strncmp)
-EXPORT_SYMBOL_NOKASAN(strncmp)
diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S
deleted file mode 100644
index b72913a990389a22be61fc981a730816e9a427b6..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/strnlen.S
+++ /dev/null
@@ -1,160 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
- *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * determine the length of a fixed-size string
- *
- * Parameters:
- *	x0 - const string pointer
- *	x1 - maximal string length
- * Returns:
- *	x0 - the return length of specific string
- */
-
-/* Arguments and results.  */
-srcin		.req	x0
-len		.req	x0
-limit		.req	x1
-
-/* Locals and temporaries.  */
-src		.req	x2
-data1		.req	x3
-data2		.req	x4
-data2a		.req	x5
-has_nul1	.req	x6
-has_nul2	.req	x7
-tmp1		.req	x8
-tmp2		.req	x9
-tmp3		.req	x10
-tmp4		.req	x11
-zeroones	.req	x12
-pos		.req	x13
-limit_wd	.req	x14
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
-SYM_FUNC_START_WEAK_PI(strnlen)
-	cbz	limit, .Lhit_limit
-	mov	zeroones, #REP8_01
-	bic	src, srcin, #15
-	ands	tmp1, srcin, #15
-	b.ne	.Lmisaligned
-	/* Calculate the number of full and partial words -1.  */
-	sub	limit_wd, limit, #1 /* Limit != 0, so no underflow.  */
-	lsr	limit_wd, limit_wd, #4  /* Convert to Qwords.  */
-
-	/*
-	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
-	* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
-	* can be done in parallel across the entire word.
-	*/
-	/*
-	* The inner loop deals with two Dwords at a time.  This has a
-	* slightly higher start-up cost, but we should win quite quickly,
-	* especially on cores with a high number of issue slots per
-	* cycle, as we get much better parallelism out of the operations.
-	*/
-.Lloop:
-	ldp	data1, data2, [src], #16
-.Lrealigned:
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	sub	tmp3, data2, zeroones
-	orr	tmp4, data2, #REP8_7f
-	bic	has_nul1, tmp1, tmp2
-	bic	has_nul2, tmp3, tmp4
-	subs	limit_wd, limit_wd, #1
-	orr	tmp1, has_nul1, has_nul2
-	ccmp	tmp1, #0, #0, pl    /* NZCV = 0000  */
-	b.eq	.Lloop
-
-	cbz	tmp1, .Lhit_limit   /* No null in final Qword.  */
-
-	/*
-	* We know there's a null in the final Qword. The easiest thing
-	* to do now is work out the length of the string and return
-	* MIN (len, limit).
-	*/
-	sub	len, src, srcin
-	cbz	has_nul1, .Lnul_in_data2
-CPU_BE( mov	data2, data1 )	/*perpare data to re-calculate the syndrome*/
-
-	sub	len, len, #8
-	mov	has_nul2, has_nul1
-.Lnul_in_data2:
-	/*
-	* For big-endian, carry propagation (if the final byte in the
-	* string is 0x01) means we cannot use has_nul directly.  The
-	* easiest way to get the correct byte is to byte-swap the data
-	* and calculate the syndrome a second time.
-	*/
-CPU_BE( rev	data2, data2 )
-CPU_BE( sub	tmp1, data2, zeroones )
-CPU_BE( orr	tmp2, data2, #REP8_7f )
-CPU_BE( bic	has_nul2, tmp1, tmp2 )
-
-	sub	len, len, #8
-	rev	has_nul2, has_nul2
-	clz	pos, has_nul2
-	add	len, len, pos, lsr #3       /* Bits to bytes.  */
-	cmp	len, limit
-	csel	len, len, limit, ls     /* Return the lower value.  */
-	ret
-
-.Lmisaligned:
-	/*
-	* Deal with a partial first word.
-	* We're doing two things in parallel here;
-	* 1) Calculate the number of words (but avoiding overflow if
-	* limit is near ULONG_MAX) - to do this we need to work out
-	* limit + tmp1 - 1 as a 65-bit value before shifting it;
-	* 2) Load and mask the initial data words - we force the bytes
-	* before the ones we are interested in to 0xff - this ensures
-	* early bytes will not hit any zero detection.
-	*/
-	ldp	data1, data2, [src], #16
-
-	sub	limit_wd, limit, #1
-	and	tmp3, limit_wd, #15
-	lsr	limit_wd, limit_wd, #4
-
-	add	tmp3, tmp3, tmp1
-	add	limit_wd, limit_wd, tmp3, lsr #4
-
-	neg	tmp4, tmp1
-	lsl	tmp4, tmp4, #3  /* Bytes beyond alignment -> bits.  */
-
-	mov	tmp2, #~0
-	/* Big-endian.  Early bytes are at MSB.  */
-CPU_BE( lsl	tmp2, tmp2, tmp4 )	/* Shift (tmp1 & 63).  */
-	/* Little-endian.  Early bytes are at LSB.  */
-CPU_LE( lsr	tmp2, tmp2, tmp4 )	/* Shift (tmp1 & 63).  */
-
-	cmp	tmp1, #8
-
-	orr	data1, data1, tmp2
-	orr	data2a, data2, tmp2
-
-	csinv	data1, data1, xzr, le
-	csel	data2, data2, data2a, le
-	b	.Lrealigned
-
-.Lhit_limit:
-	mov	len, limit
-	ret
-SYM_FUNC_END_PI(strnlen)
-EXPORT_SYMBOL_NOKASAN(strnlen)
diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S
deleted file mode 100644
index 13132d1ed6d127913883f3215a3c0819cbb5598e..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/strrchr.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Based on arch/arm/lib/strrchr.S
- *
- * Copyright (C) 1995-2000 Russell King
- * Copyright (C) 2013 ARM Ltd.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * Find the last occurrence of a character in a string.
- *
- * Parameters:
- *	x0 - str
- *	x1 - c
- * Returns:
- *	x0 - address of last occurrence of 'c' or 0
- */
-SYM_FUNC_START_WEAK_PI(strrchr)
-	mov	x3, #0
-	and	w1, w1, #0xff
-1:	ldrb	w2, [x0], #1
-	cbz	w2, 2f
-	cmp	w2, w1
-	b.ne	1b
-	sub	x3, x0, #1
-	b	1b
-2:	mov	x0, x3
-	ret
-SYM_FUNC_END_PI(strrchr)
-EXPORT_SYMBOL_NOKASAN(strrchr)
diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S
deleted file mode 100644
index a88613834fb07b350031390cb3ecf4595bc877f5..0000000000000000000000000000000000000000
--- a/arch/arm64/lib/tishift.S
+++ /dev/null
@@ -1,74 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
- *
- * Copyright (C) 2017-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- */
-
-#include <linux/linkage.h>
-
-#include <asm/assembler.h>
-
-SYM_FUNC_START(__ashlti3)
-	cbz	x2, 1f
-	mov	x3, #64
-	sub	x3, x3, x2
-	cmp	x3, #0
-	b.le	2f
-	lsl	x1, x1, x2
-	lsr	x3, x0, x3
-	lsl	x2, x0, x2
-	orr	x1, x1, x3
-	mov	x0, x2
-1:
-	ret
-2:
-	neg	w1, w3
-	mov	x2, #0
-	lsl	x1, x0, x1
-	mov	x0, x2
-	ret
-SYM_FUNC_END(__ashlti3)
-EXPORT_SYMBOL(__ashlti3)
-
-SYM_FUNC_START(__ashrti3)
-	cbz	x2, 1f
-	mov	x3, #64
-	sub	x3, x3, x2
-	cmp	x3, #0
-	b.le	2f
-	lsr	x0, x0, x2
-	lsl	x3, x1, x3
-	asr	x2, x1, x2
-	orr	x0, x0, x3
-	mov	x1, x2
-1:
-	ret
-2:
-	neg	w0, w3
-	asr	x2, x1, #63
-	asr	x0, x1, x0
-	mov	x1, x2
-	ret
-SYM_FUNC_END(__ashrti3)
-EXPORT_SYMBOL(__ashrti3)
-
-SYM_FUNC_START(__lshrti3)
-	cbz	x2, 1f
-	mov	x3, #64
-	sub	x3, x3, x2
-	cmp	x3, #0
-	b.le	2f
-	lsr	x0, x0, x2
-	lsl	x3, x1, x3
-	lsr	x2, x1, x2
-	orr	x0, x0, x3
-	mov	x1, x2
-1:
-	ret
-2:
-	neg	w0, w3
-	mov	x2, #0
-	lsr	x0, x1, x0
-	mov	x1, x2
-	ret
-SYM_FUNC_END(__lshrti3)
-EXPORT_SYMBOL(__lshrti3)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
deleted file mode 100644
index db767b072601e36fddb8ee7991d801d6f1f2f6d6..0000000000000000000000000000000000000000
--- a/arch/arm64/mm/cache.S
+++ /dev/null
@@ -1,246 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Cache maintenance
- *
- * Copyright (C) 2001 Deep Blue Solutions Ltd.
- * Copyright (C) 2012 ARM Ltd.
- */
-
-#include <linux/errno.h>
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/cpufeature.h>
-#include <asm/alternative.h>
-#include <asm/asm-uaccess.h>
-
-/*
- *	flush_icache_range(start,end)
- *
- *	Ensure that the I and D caches are coherent within specified region.
- *	This is typically used when code has been written to a memory region,
- *	and will be executed.
- *
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-ENTRY(__flush_icache_range)
-	/* FALLTHROUGH */
-
-/*
- *	__flush_cache_user_range(start,end)
- *
- *	Ensure that the I and D caches are coherent within specified region.
- *	This is typically used when code has been written to a memory region,
- *	and will be executed.
- *
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-ENTRY(__flush_cache_user_range)
-	uaccess_ttbr0_enable x2, x3, x4
-alternative_if ARM64_HAS_CACHE_IDC
-	dsb	ishst
-	b	7f
-alternative_else_nop_endif
-	dcache_line_size x2, x3
-	sub	x3, x2, #1
-	bic	x4, x0, x3
-1:
-user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
-	add	x4, x4, x2
-	cmp	x4, x1
-	b.lo	1b
-	dsb	ish
-
-7:
-alternative_if ARM64_HAS_CACHE_DIC
-	isb
-	b	8f
-alternative_else_nop_endif
-	invalidate_icache_by_line x0, x1, x2, x3, 9f
-8:	mov	x0, #0
-1:
-	uaccess_ttbr0_disable x1, x2
-	ret
-9:
-	mov	x0, #-EFAULT
-	b	1b
-ENDPROC(__flush_icache_range)
-ENDPROC(__flush_cache_user_range)
-
-/*
- *	invalidate_icache_range(start,end)
- *
- *	Ensure that the I cache is invalid within specified region.
- *
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-ENTRY(invalidate_icache_range)
-alternative_if ARM64_HAS_CACHE_DIC
-	mov	x0, xzr
-	isb
-	ret
-alternative_else_nop_endif
-
-	uaccess_ttbr0_enable x2, x3, x4
-
-	invalidate_icache_by_line x0, x1, x2, x3, 2f
-	mov	x0, xzr
-1:
-	uaccess_ttbr0_disable x1, x2
-	ret
-2:
-	mov	x0, #-EFAULT
-	b	1b
-ENDPROC(invalidate_icache_range)
-
-/*
- *	__flush_dcache_area(kaddr, size)
- *
- *	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
- *	are cleaned and invalidated to the PoC.
- *
- *	- kaddr   - kernel address
- *	- size    - size in question
- */
-ENTRY(__flush_dcache_area)
-	dcache_by_line_op civac, sy, x0, x1, x2, x3
-	ret
-ENDPIPROC(__flush_dcache_area)
-
-/*
- *	__clean_dcache_area_pou(kaddr, size)
- *
- * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
- * 	are cleaned to the PoU.
- *
- *	- kaddr   - kernel address
- *	- size    - size in question
- */
-ENTRY(__clean_dcache_area_pou)
-alternative_if ARM64_HAS_CACHE_IDC
-	dsb	ishst
-	ret
-alternative_else_nop_endif
-	dcache_by_line_op cvau, ish, x0, x1, x2, x3
-	ret
-ENDPROC(__clean_dcache_area_pou)
-
-/*
- *	__inval_dcache_area(kaddr, size)
- *
- * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
- * 	are invalidated. Any partial lines at the ends of the interval are
- *	also cleaned to PoC to prevent data loss.
- *
- *	- kaddr   - kernel address
- *	- size    - size in question
- */
-ENTRY(__inval_dcache_area)
-	/* FALLTHROUGH */
-
-/*
- *	__dma_inv_area(start, size)
- *	- start   - virtual start address of region
- *	- size    - size in question
- */
-__dma_inv_area:
-	add	x1, x1, x0
-	dcache_line_size x2, x3
-	sub	x3, x2, #1
-	tst	x1, x3				// end cache line aligned?
-	bic	x1, x1, x3
-	b.eq	1f
-	dc	civac, x1			// clean & invalidate D / U line
-1:	tst	x0, x3				// start cache line aligned?
-	bic	x0, x0, x3
-	b.eq	2f
-	dc	civac, x0			// clean & invalidate D / U line
-	b	3f
-2:	dc	ivac, x0			// invalidate D / U line
-3:	add	x0, x0, x2
-	cmp	x0, x1
-	b.lo	2b
-	dsb	sy
-	ret
-ENDPIPROC(__inval_dcache_area)
-ENDPROC(__dma_inv_area)
-
-/*
- *	__clean_dcache_area_poc(kaddr, size)
- *
- * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
- * 	are cleaned to the PoC.
- *
- *	- kaddr   - kernel address
- *	- size    - size in question
- */
-ENTRY(__clean_dcache_area_poc)
-	/* FALLTHROUGH */
-
-/*
- *	__dma_clean_area(start, size)
- *	- start   - virtual start address of region
- *	- size    - size in question
- */
-__dma_clean_area:
-	dcache_by_line_op cvac, sy, x0, x1, x2, x3
-	ret
-ENDPIPROC(__clean_dcache_area_poc)
-ENDPROC(__dma_clean_area)
-
-/*
- *	__clean_dcache_area_pop(kaddr, size)
- *
- * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
- * 	are cleaned to the PoP.
- *
- *	- kaddr   - kernel address
- *	- size    - size in question
- */
-ENTRY(__clean_dcache_area_pop)
-	alternative_if_not ARM64_HAS_DCPOP
-	b	__clean_dcache_area_poc
-	alternative_else_nop_endif
-	dcache_by_line_op cvap, sy, x0, x1, x2, x3
-	ret
-ENDPIPROC(__clean_dcache_area_pop)
-
-/*
- *	__dma_flush_area(start, size)
- *
- *	clean & invalidate D / U line
- *
- *	- start   - virtual start address of region
- *	- size    - size in question
- */
-ENTRY(__dma_flush_area)
-	dcache_by_line_op civac, sy, x0, x1, x2, x3
-	ret
-ENDPIPROC(__dma_flush_area)
-
-/*
- *	__dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(__dma_map_area)
-	cmp	w2, #DMA_FROM_DEVICE
-	b.eq	__dma_inv_area
-	b	__dma_clean_area
-ENDPIPROC(__dma_map_area)
-
-/*
- *	__dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-ENTRY(__dma_unmap_area)
-	cmp	w2, #DMA_TO_DEVICE
-	b.ne	__dma_inv_area
-	ret
-ENDPIPROC(__dma_unmap_area)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
deleted file mode 100644
index a1e0592d1fbcd8833e41b2486869d7ae9898981b..0000000000000000000000000000000000000000
--- a/arch/arm64/mm/proc.S
+++ /dev/null
@@ -1,478 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Based on arch/arm/mm/proc.S
- *
- * Copyright (C) 2001 Deep Blue Solutions Ltd.
- * Copyright (C) 2012 ARM Ltd.
- * Author: Catalin Marinas <catalin.marinas@arm.com>
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/cpufeature.h>
-#include <asm/alternative.h>
-
-#ifdef CONFIG_ARM64_64K_PAGES
-#define TCR_TG_FLAGS	TCR_TG0_64K | TCR_TG1_64K
-#elif defined(CONFIG_ARM64_16K_PAGES)
-#define TCR_TG_FLAGS	TCR_TG0_16K | TCR_TG1_16K
-#else /* CONFIG_ARM64_4K_PAGES */
-#define TCR_TG_FLAGS	TCR_TG0_4K | TCR_TG1_4K
-#endif
-
-#ifdef CONFIG_RANDOMIZE_BASE
-#define TCR_KASLR_FLAGS	TCR_NFD1
-#else
-#define TCR_KASLR_FLAGS	0
-#endif
-
-#define TCR_SMP_FLAGS	TCR_SHARED
-
-/* PTWs cacheable, inner/outer WBWA */
-#define TCR_CACHE_FLAGS	TCR_IRGN_WBWA | TCR_ORGN_WBWA
-
-#ifdef CONFIG_KASAN_SW_TAGS
-#define TCR_KASAN_FLAGS TCR_TBI1
-#else
-#define TCR_KASAN_FLAGS 0
-#endif
-
-#define MAIR(attr, mt)	((attr) << ((mt) * 8))
-
-#ifdef CONFIG_CPU_PM
-/**
- * cpu_do_suspend - save CPU registers context
- *
- * x0: virtual address of context pointer
- */
-ENTRY(cpu_do_suspend)
-	mrs	x2, tpidr_el0
-	mrs	x3, tpidrro_el0
-	mrs	x4, contextidr_el1
-	mrs	x5, osdlr_el1
-	mrs	x6, cpacr_el1
-	mrs	x7, tcr_el1
-	mrs	x8, vbar_el1
-	mrs	x9, mdscr_el1
-	mrs	x10, oslsr_el1
-	mrs	x11, sctlr_el1
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
-	mrs	x12, tpidr_el1
-alternative_else
-	mrs	x12, tpidr_el2
-alternative_endif
-	mrs	x13, sp_el0
-	stp	x2, x3, [x0]
-	stp	x4, x5, [x0, #16]
-	stp	x6, x7, [x0, #32]
-	stp	x8, x9, [x0, #48]
-	stp	x10, x11, [x0, #64]
-	stp	x12, x13, [x0, #80]
-	ret
-ENDPROC(cpu_do_suspend)
-
-/**
- * cpu_do_resume - restore CPU register context
- *
- * x0: Address of context pointer
- */
-	.pushsection ".idmap.text", "awx"
-ENTRY(cpu_do_resume)
-	ldp	x2, x3, [x0]
-	ldp	x4, x5, [x0, #16]
-	ldp	x6, x8, [x0, #32]
-	ldp	x9, x10, [x0, #48]
-	ldp	x11, x12, [x0, #64]
-	ldp	x13, x14, [x0, #80]
-	msr	tpidr_el0, x2
-	msr	tpidrro_el0, x3
-	msr	contextidr_el1, x4
-	msr	cpacr_el1, x6
-
-	/* Don't change t0sz here, mask those bits when restoring */
-	mrs	x7, tcr_el1
-	bfi	x8, x7, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
-
-	msr	tcr_el1, x8
-	msr	vbar_el1, x9
-
-	/*
-	 * __cpu_setup() cleared MDSCR_EL1.MDE and friends, before unmasking
-	 * debug exceptions. By restoring MDSCR_EL1 here, we may take a debug
-	 * exception. Mask them until local_daif_restore() in cpu_suspend()
-	 * resets them.
-	 */
-	disable_daif
-	msr	mdscr_el1, x10
-
-	msr	sctlr_el1, x12
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
-	msr	tpidr_el1, x13
-alternative_else
-	msr	tpidr_el2, x13
-alternative_endif
-	msr	sp_el0, x14
-	/*
-	 * Restore oslsr_el1 by writing oslar_el1
-	 */
-	msr	osdlr_el1, x5
-	ubfx	x11, x11, #1, #1
-	msr	oslar_el1, x11
-	reset_pmuserenr_el0 x0			// Disable PMU access from EL0
-
-alternative_if ARM64_HAS_RAS_EXTN
-	msr_s	SYS_DISR_EL1, xzr
-alternative_else_nop_endif
-
-	isb
-	ret
-ENDPROC(cpu_do_resume)
-	.popsection
-#endif
-
-/*
- *	cpu_do_switch_mm(pgd_phys, tsk)
- *
- *	Set the translation table base pointer to be pgd_phys.
- *
- *	- pgd_phys - physical address of new TTB
- */
-ENTRY(cpu_do_switch_mm)
-	mrs	x2, ttbr1_el1
-	mmid	x1, x1				// get mm->context.id
-	phys_to_ttbr x3, x0
-
-alternative_if ARM64_HAS_CNP
-	cbz     x1, 1f                          // skip CNP for reserved ASID
-	orr     x3, x3, #TTBR_CNP_BIT
-1:
-alternative_else_nop_endif
-#ifdef CONFIG_ARM64_SW_TTBR0_PAN
-	bfi	x3, x1, #48, #16		// set the ASID field in TTBR0
-#endif
-	bfi	x2, x1, #48, #16		// set the ASID
-	msr	ttbr1_el1, x2			// in TTBR1 (since TCR.A1 is set)
-	isb
-	msr	ttbr0_el1, x3			// now update TTBR0
-	isb
-	b	post_ttbr_update_workaround	// Back to C code...
-ENDPROC(cpu_do_switch_mm)
-
-	.pushsection ".idmap.text", "awx"
-
-.macro	__idmap_cpu_set_reserved_ttbr1, tmp1, tmp2
-	adrp	\tmp1, empty_zero_page
-	phys_to_ttbr \tmp2, \tmp1
-	offset_ttbr1 \tmp2, \tmp1
-	msr	ttbr1_el1, \tmp2
-	isb
-	tlbi	vmalle1
-	dsb	nsh
-	isb
-.endm
-
-/*
- * void idmap_cpu_replace_ttbr1(phys_addr_t ttbr1)
- *
- * This is the low-level counterpart to cpu_replace_ttbr1, and should not be
- * called by anything else. It can only be executed from a TTBR0 mapping.
- */
-ENTRY(idmap_cpu_replace_ttbr1)
-	save_and_disable_daif flags=x2
-
-	__idmap_cpu_set_reserved_ttbr1 x1, x3
-
-	offset_ttbr1 x0, x3
-	msr	ttbr1_el1, x0
-	isb
-
-	restore_daif x2
-
-	ret
-ENDPROC(idmap_cpu_replace_ttbr1)
-	.popsection
-
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-	.pushsection ".idmap.text", "awx"
-
-	.macro	__idmap_kpti_get_pgtable_ent, type
-	dc	cvac, cur_\()\type\()p		// Ensure any existing dirty
-	dmb	sy				// lines are written back before
-	ldr	\type, [cur_\()\type\()p]	// loading the entry
-	tbz	\type, #0, skip_\()\type	// Skip invalid and
-	tbnz	\type, #11, skip_\()\type	// non-global entries
-	.endm
-
-	.macro __idmap_kpti_put_pgtable_ent_ng, type
-	orr	\type, \type, #PTE_NG		// Same bit for blocks and pages
-	str	\type, [cur_\()\type\()p]	// Update the entry and ensure
-	dmb	sy				// that it is visible to all
-	dc	civac, cur_\()\type\()p		// CPUs.
-	.endm
-
-/*
- * void __kpti_install_ng_mappings(int cpu, int num_cpus, phys_addr_t swapper)
- *
- * Called exactly once from stop_machine context by each CPU found during boot.
- */
-__idmap_kpti_flag:
-	.long	1
-ENTRY(idmap_kpti_install_ng_mappings)
-	cpu		.req	w0
-	num_cpus	.req	w1
-	swapper_pa	.req	x2
-	swapper_ttb	.req	x3
-	flag_ptr	.req	x4
-	cur_pgdp	.req	x5
-	end_pgdp	.req	x6
-	pgd		.req	x7
-	cur_pudp	.req	x8
-	end_pudp	.req	x9
-	pud		.req	x10
-	cur_pmdp	.req	x11
-	end_pmdp	.req	x12
-	pmd		.req	x13
-	cur_ptep	.req	x14
-	end_ptep	.req	x15
-	pte		.req	x16
-
-	mrs	swapper_ttb, ttbr1_el1
-	restore_ttbr1	swapper_ttb
-	adr	flag_ptr, __idmap_kpti_flag
-
-	cbnz	cpu, __idmap_kpti_secondary
-
-	/* We're the boot CPU. Wait for the others to catch up */
-	sevl
-1:	wfe
-	ldaxr	w18, [flag_ptr]
-	eor	w18, w18, num_cpus
-	cbnz	w18, 1b
-
-	/* We need to walk swapper, so turn off the MMU. */
-	pre_disable_mmu_workaround
-	mrs	x18, sctlr_el1
-	bic	x18, x18, #SCTLR_ELx_M
-	msr	sctlr_el1, x18
-	isb
-
-	/* Everybody is enjoying the idmap, so we can rewrite swapper. */
-	/* PGD */
-	mov	cur_pgdp, swapper_pa
-	add	end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)
-do_pgd:	__idmap_kpti_get_pgtable_ent	pgd
-	tbnz	pgd, #1, walk_puds
-next_pgd:
-	__idmap_kpti_put_pgtable_ent_ng	pgd
-skip_pgd:
-	add	cur_pgdp, cur_pgdp, #8
-	cmp	cur_pgdp, end_pgdp
-	b.ne	do_pgd
-
-	/* Publish the updated tables and nuke all the TLBs */
-	dsb	sy
-	tlbi	vmalle1is
-	dsb	ish
-	isb
-
-	/* We're done: fire up the MMU again */
-	mrs	x18, sctlr_el1
-	orr	x18, x18, #SCTLR_ELx_M
-	msr	sctlr_el1, x18
-	isb
-
-	/*
-	 * Invalidate the local I-cache so that any instructions fetched
-	 * speculatively from the PoC are discarded, since they may have
-	 * been dynamically patched at the PoU.
-	 */
-	ic	iallu
-	dsb	nsh
-	isb
-
-	/* Set the flag to zero to indicate that we're all done */
-	str	wzr, [flag_ptr]
-	ret
-
-	/* PUD */
-walk_puds:
-	.if CONFIG_PGTABLE_LEVELS > 3
-	pte_to_phys	cur_pudp, pgd
-	add	end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)
-do_pud:	__idmap_kpti_get_pgtable_ent	pud
-	tbnz	pud, #1, walk_pmds
-next_pud:
-	__idmap_kpti_put_pgtable_ent_ng	pud
-skip_pud:
-	add	cur_pudp, cur_pudp, 8
-	cmp	cur_pudp, end_pudp
-	b.ne	do_pud
-	b	next_pgd
-	.else /* CONFIG_PGTABLE_LEVELS <= 3 */
-	mov	pud, pgd
-	b	walk_pmds
-next_pud:
-	b	next_pgd
-	.endif
-
-	/* PMD */
-walk_pmds:
-	.if CONFIG_PGTABLE_LEVELS > 2
-	pte_to_phys	cur_pmdp, pud
-	add	end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)
-do_pmd:	__idmap_kpti_get_pgtable_ent	pmd
-	tbnz	pmd, #1, walk_ptes
-next_pmd:
-	__idmap_kpti_put_pgtable_ent_ng	pmd
-skip_pmd:
-	add	cur_pmdp, cur_pmdp, #8
-	cmp	cur_pmdp, end_pmdp
-	b.ne	do_pmd
-	b	next_pud
-	.else /* CONFIG_PGTABLE_LEVELS <= 2 */
-	mov	pmd, pud
-	b	walk_ptes
-next_pmd:
-	b	next_pud
-	.endif
-
-	/* PTE */
-walk_ptes:
-	pte_to_phys	cur_ptep, pmd
-	add	end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)
-do_pte:	__idmap_kpti_get_pgtable_ent	pte
-	__idmap_kpti_put_pgtable_ent_ng	pte
-skip_pte:
-	add	cur_ptep, cur_ptep, #8
-	cmp	cur_ptep, end_ptep
-	b.ne	do_pte
-	b	next_pmd
-
-	/* Secondary CPUs end up here */
-__idmap_kpti_secondary:
-	/* Uninstall swapper before surgery begins */
-	__idmap_cpu_set_reserved_ttbr1 x18, x17
-
-	/* Increment the flag to let the boot CPU we're ready */
-1:	ldxr	w18, [flag_ptr]
-	add	w18, w18, #1
-	stxr	w17, w18, [flag_ptr]
-	cbnz	w17, 1b
-
-	/* Wait for the boot CPU to finish messing around with swapper */
-	sevl
-1:	wfe
-	ldxr	w18, [flag_ptr]
-	cbnz	w18, 1b
-
-	/* All done, act like nothing happened */
-	offset_ttbr1 swapper_ttb, x18
-	msr	ttbr1_el1, swapper_ttb
-	isb
-	ret
-
-	.unreq	cpu
-	.unreq	num_cpus
-	.unreq	swapper_pa
-	.unreq	swapper_ttb
-	.unreq	flag_ptr
-	.unreq	cur_pgdp
-	.unreq	end_pgdp
-	.unreq	pgd
-	.unreq	cur_pudp
-	.unreq	end_pudp
-	.unreq	pud
-	.unreq	cur_pmdp
-	.unreq	end_pmdp
-	.unreq	pmd
-	.unreq	cur_ptep
-	.unreq	end_ptep
-	.unreq	pte
-ENDPROC(idmap_kpti_install_ng_mappings)
-	.popsection
-#endif
-
-/*
- *	__cpu_setup
- *
- *	Initialise the processor for turning the MMU on.  Return in x0 the
- *	value of the SCTLR_EL1 register.
- */
-	.pushsection ".idmap.text", "awx"
-ENTRY(__cpu_setup)
-	tlbi	vmalle1				// Invalidate local TLB
-	dsb	nsh
-
-	mov	x0, #3 << 20
-	msr	cpacr_el1, x0			// Enable FP/ASIMD
-	mov	x0, #1 << 12			// Reset mdscr_el1 and disable
-	msr	mdscr_el1, x0			// access to the DCC from EL0
-	isb					// Unmask debug exceptions now,
-	enable_dbg				// since this is per-cpu
-	reset_pmuserenr_el0 x0			// Disable PMU access from EL0
-	/*
-	 * Memory region attributes for LPAE:
-	 *
-	 *   n = AttrIndx[2:0]
-	 *			n	MAIR
-	 *   DEVICE_nGnRnE	000	00000000
-	 *   DEVICE_nGnRE	001	00000100
-	 *   DEVICE_GRE		010	00001100
-	 *   NORMAL_NC		011	01000100
-	 *   NORMAL		100	11111111
-	 *   NORMAL_WT		101	10111011
-	 */
-	ldr	x5, =MAIR(0x00, MT_DEVICE_nGnRnE) | \
-		     MAIR(0x04, MT_DEVICE_nGnRE) | \
-		     MAIR(0x0c, MT_DEVICE_GRE) | \
-		     MAIR(0x44, MT_NORMAL_NC) | \
-		     MAIR(0xff, MT_NORMAL) | \
-		     MAIR(0xbb, MT_NORMAL_WT)
-	msr	mair_el1, x5
-	/*
-	 * Prepare SCTLR
-	 */
-	mov_q	x0, SCTLR_EL1_SET
-	/*
-	 * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for
-	 * both user and kernel.
-	 */
-	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
-			TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
-			TCR_TBI0 | TCR_A1 | TCR_KASAN_FLAGS
-	tcr_clear_errata_bits x10, x9, x5
-
-#ifdef CONFIG_ARM64_VA_BITS_52
-	ldr_l		x9, vabits_actual
-	sub		x9, xzr, x9
-	add		x9, x9, #64
-	tcr_set_t1sz	x10, x9
-#else
-	ldr_l		x9, idmap_t0sz
-#endif
-	tcr_set_t0sz	x10, x9
-
-	/*
-	 * Set the IPS bits in TCR_EL1.
-	 */
-	tcr_compute_pa_size x10, #TCR_IPS_SHIFT, x5, x6
-#ifdef CONFIG_ARM64_HW_AFDBM
-	/*
-	 * Enable hardware update of the Access Flags bit.
-	 * Hardware dirty bit management is enabled later,
-	 * via capabilities.
-	 */
-	mrs	x9, ID_AA64MMFR1_EL1
-	and	x9, x9, #0xf
-	cbz	x9, 1f
-	orr	x10, x10, #TCR_HA		// hardware Access flag update
-1:
-#endif	/* CONFIG_ARM64_HW_AFDBM */
-	msr	tcr_el1, x10
-	ret					// return to head.S
-ENDPROC(__cpu_setup)
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
deleted file mode 100644
index c5f05c4a4d00883422ed6e211135302cff3be14f..0000000000000000000000000000000000000000
--- a/arch/arm64/xen/hypercall.S
+++ /dev/null
@@ -1,112 +0,0 @@
-/******************************************************************************
- * hypercall.S
- *
- * Xen hypercall wrappers
- *
- * Stefano Stabellini <stefano.stabellini@eu.citrix.com>, Citrix, 2012
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/*
- * The Xen hypercall calling convention is very similar to the procedure
- * call standard for the ARM 64-bit architecture: the first parameter is
- * passed in x0, the second in x1, the third in x2, the fourth in x3 and
- * the fifth in x4.
- *
- * The hypercall number is passed in x16.
- *
- * The return value is in x0.
- *
- * The hvc ISS is required to be 0xEA1, that is the Xen specific ARM
- * hypercall tag.
- *
- * Parameter structs passed to hypercalls are laid out according to
- * the ARM 64-bit EABI standard.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/asm-uaccess.h>
-#include <xen/interface/xen.h>
-
-
-#define XEN_IMM 0xEA1
-
-#define HYPERCALL_SIMPLE(hypercall)		\
-ENTRY(HYPERVISOR_##hypercall)			\
-	mov x16, #__HYPERVISOR_##hypercall;	\
-	hvc XEN_IMM;				\
-	ret;					\
-ENDPROC(HYPERVISOR_##hypercall)
-
-#define HYPERCALL0 HYPERCALL_SIMPLE
-#define HYPERCALL1 HYPERCALL_SIMPLE
-#define HYPERCALL2 HYPERCALL_SIMPLE
-#define HYPERCALL3 HYPERCALL_SIMPLE
-#define HYPERCALL4 HYPERCALL_SIMPLE
-#define HYPERCALL5 HYPERCALL_SIMPLE
-
-                .text
-
-HYPERCALL2(xen_version);
-HYPERCALL3(console_io);
-HYPERCALL3(grant_table_op);
-HYPERCALL2(sched_op);
-HYPERCALL2(event_channel_op);
-HYPERCALL2(hvm_op);
-HYPERCALL2(memory_op);
-HYPERCALL2(physdev_op);
-HYPERCALL3(vcpu_op);
-HYPERCALL1(tmem_op);
-HYPERCALL1(platform_op_raw);
-HYPERCALL2(multicall);
-HYPERCALL2(vm_assist);
-HYPERCALL3(dm_op);
-
-ENTRY(privcmd_call)
-	mov x16, x0
-	mov x0, x1
-	mov x1, x2
-	mov x2, x3
-	mov x3, x4
-	mov x4, x5
-	/*
-	 * Privcmd calls are issued by the userspace. The kernel needs to
-	 * enable access to TTBR0_EL1 as the hypervisor would issue stage 1
-	 * translations to user memory via AT instructions. Since AT
-	 * instructions are not affected by the PAN bit (ARMv8.1), we only
-	 * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation
-	 * is enabled (it implies that hardware UAO and PAN disabled).
-	 */
-	uaccess_ttbr0_enable x6, x7, x8
-	hvc XEN_IMM
-
-	/*
-	 * Disable userspace access from kernel once the hyp call completed.
-	 */
-	uaccess_ttbr0_disable x6, x7
-	ret
-ENDPROC(privcmd_call);
diff --git a/arch/c6x/kernel/entry.S b/arch/c6x/kernel/entry.S
deleted file mode 100644
index 4332a10aec6c79e137d8d4166328978ee6598366..0000000000000000000000000000000000000000
--- a/arch/c6x/kernel/entry.S
+++ /dev/null
@@ -1,736 +0,0 @@
-; SPDX-License-Identifier: GPL-2.0-only
-;
-;  Port on Texas Instruments TMS320C6x architecture
-;
-;  Copyright (C) 2004-2011 Texas Instruments Incorporated
-;  Author: Aurelien Jacquiot (aurelien.jacquiot@virtuallogix.com)
-;  Updated for 2.6.34: Mark Salter <msalter@redhat.com>
-;
-
-#include <linux/sys.h>
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/errno.h>
-
-; Registers naming
-#define DP	B14
-#define SP	B15
-
-#ifndef CONFIG_PREEMPT
-#define resume_kernel restore_all
-#endif
-
-	.altmacro
-
-	.macro MASK_INT reg
-	MVC	.S2	CSR,reg
-	CLR	.S2	reg,0,0,reg
-	MVC	.S2	reg,CSR
-	.endm
-
-	.macro UNMASK_INT reg
-	MVC	.S2	CSR,reg
-	SET	.S2	reg,0,0,reg
-	MVC	.S2	reg,CSR
-	.endm
-
-	.macro GET_THREAD_INFO reg
-	SHR	.S1X	SP,THREAD_SHIFT,reg
-	SHL	.S1	reg,THREAD_SHIFT,reg
-	.endm
-
-	;;
-	;;  This defines the normal kernel pt_regs layout.
-	;;
-	.macro SAVE_ALL __rp __tsr
-	STW	.D2T2	B0,*SP--[2]		; save original B0
-	MVKL	.S2	current_ksp,B0
-	MVKH	.S2	current_ksp,B0
-	LDW	.D2T2	*B0,B1			; KSP
-
-	NOP	3
-	STW	.D2T2	B1,*+SP[1]		; save original B1
-	XOR	.D2	SP,B1,B0		; (SP ^ KSP)
-	LDW	.D2T2	*+SP[1],B1		; restore B0/B1
-	LDW	.D2T2	*++SP[2],B0
-	SHR	.S2	B0,THREAD_SHIFT,B0	; 0 if already using kstack
-  [B0]	STDW	.D2T2	SP:DP,*--B1[1]		; user: save user sp/dp kstack
-  [B0]	MV	.S2	B1,SP			;    and switch to kstack
-||[!B0] STDW	.D2T2	SP:DP,*--SP[1]		; kernel: save on current stack
-
-	SUBAW	.D2	SP,2,SP
-
-	ADD	.D1X	SP,-8,A15
- ||	STDW	.D2T1	A15:A14,*SP--[16]	; save A15:A14
-
-	STDW	.D2T2	B13:B12,*SP--[1]
- ||	STDW	.D1T1	A13:A12,*A15--[1]
- ||	MVC	.S2	__rp,B13
-
-	STDW	.D2T2	B11:B10,*SP--[1]
- ||	STDW	.D1T1	A11:A10,*A15--[1]
- ||	MVC	.S2	CSR,B12
-
-	STDW	.D2T2	B9:B8,*SP--[1]
- ||	STDW	.D1T1	A9:A8,*A15--[1]
- ||	MVC	.S2	RILC,B11
-	STDW	.D2T2	B7:B6,*SP--[1]
- ||	STDW	.D1T1	A7:A6,*A15--[1]
- ||	MVC	.S2	ILC,B10
-
-	STDW	.D2T2	B5:B4,*SP--[1]
- ||	STDW	.D1T1	A5:A4,*A15--[1]
-
-	STDW	.D2T2	B3:B2,*SP--[1]
- ||	STDW	.D1T1	A3:A2,*A15--[1]
- ||	MVC	.S2	__tsr,B5
-
-	STDW	.D2T2	B1:B0,*SP--[1]
- ||	STDW	.D1T1	A1:A0,*A15--[1]
- ||	MV	.S1X	B5,A5
-
-	STDW	.D2T2	B31:B30,*SP--[1]
- ||	STDW	.D1T1	A31:A30,*A15--[1]
-	STDW	.D2T2	B29:B28,*SP--[1]
- ||	STDW	.D1T1	A29:A28,*A15--[1]
-	STDW	.D2T2	B27:B26,*SP--[1]
- ||	STDW	.D1T1	A27:A26,*A15--[1]
-	STDW	.D2T2	B25:B24,*SP--[1]
- ||	STDW	.D1T1	A25:A24,*A15--[1]
-	STDW	.D2T2	B23:B22,*SP--[1]
- ||	STDW	.D1T1	A23:A22,*A15--[1]
-	STDW	.D2T2	B21:B20,*SP--[1]
- ||	STDW	.D1T1	A21:A20,*A15--[1]
-	STDW	.D2T2	B19:B18,*SP--[1]
- ||	STDW	.D1T1	A19:A18,*A15--[1]
-	STDW	.D2T2	B17:B16,*SP--[1]
- ||	STDW	.D1T1	A17:A16,*A15--[1]
-
-	STDW	.D2T2	B13:B12,*SP--[1]	; save PC and CSR
-
-	STDW	.D2T2	B11:B10,*SP--[1]	; save RILC and ILC
-	STDW	.D2T1	A5:A4,*SP--[1]		; save TSR and orig A4
-
-	;; We left an unused word on the stack just above pt_regs.
-	;; It is used to save whether or not this frame is due to
-	;; a syscall. It is cleared here, but the syscall handler
-	;; sets it to a non-zero value.
-	MVK	.L2	0,B1
-	STW	.D2T2	B1,*+SP(REGS__END+8)	; clear syscall flag
-	.endm
-
-	.macro RESTORE_ALL __rp __tsr
-	LDDW	.D2T2	*++SP[1],B9:B8		; get TSR (B9)
-	LDDW	.D2T2	*++SP[1],B11:B10	; get RILC (B11) and ILC (B10)
-	LDDW	.D2T2	*++SP[1],B13:B12	; get PC (B13) and CSR (B12)
-
-	ADDAW	.D1X	SP,30,A15
-
-	LDDW	.D1T1	*++A15[1],A17:A16
- ||	LDDW	.D2T2	*++SP[1],B17:B16
-	LDDW	.D1T1	*++A15[1],A19:A18
- ||	LDDW	.D2T2	*++SP[1],B19:B18
-	LDDW	.D1T1	*++A15[1],A21:A20
- ||	LDDW	.D2T2	*++SP[1],B21:B20
-	LDDW	.D1T1	*++A15[1],A23:A22
- ||	LDDW	.D2T2	*++SP[1],B23:B22
-	LDDW	.D1T1	*++A15[1],A25:A24
- ||	LDDW	.D2T2	*++SP[1],B25:B24
-	LDDW	.D1T1	*++A15[1],A27:A26
- ||	LDDW	.D2T2	*++SP[1],B27:B26
-	LDDW	.D1T1	*++A15[1],A29:A28
- ||	LDDW	.D2T2	*++SP[1],B29:B28
-	LDDW	.D1T1	*++A15[1],A31:A30
- ||	LDDW	.D2T2	*++SP[1],B31:B30
-
-	LDDW	.D1T1	*++A15[1],A1:A0
- ||	LDDW	.D2T2	*++SP[1],B1:B0
-
-	LDDW	.D1T1	*++A15[1],A3:A2
- ||	LDDW	.D2T2	*++SP[1],B3:B2
- ||	MVC	.S2	B9,__tsr
-	LDDW	.D1T1	*++A15[1],A5:A4
- ||	LDDW	.D2T2	*++SP[1],B5:B4
- ||	MVC	.S2	B11,RILC
-	LDDW	.D1T1	*++A15[1],A7:A6
- ||	LDDW	.D2T2	*++SP[1],B7:B6
- ||	MVC	.S2	B10,ILC
-
-	LDDW	.D1T1	*++A15[1],A9:A8
- ||	LDDW	.D2T2	*++SP[1],B9:B8
- ||	MVC	.S2	B13,__rp
-
-	LDDW	.D1T1	*++A15[1],A11:A10
- ||	LDDW	.D2T2	*++SP[1],B11:B10
- ||	MVC	.S2	B12,CSR
-
-	LDDW	.D1T1	*++A15[1],A13:A12
- ||	LDDW	.D2T2	*++SP[1],B13:B12
-
-	MV	.D2X	A15,SP
- ||	MVKL	.S1	current_ksp,A15
-	MVKH	.S1	current_ksp,A15
- ||	ADDAW	.D1X	SP,6,A14
-	STW	.D1T1	A14,*A15	; save kernel stack pointer
-
-	LDDW	.D2T1	*++SP[1],A15:A14
-
-	B	.S2	__rp		; return from interruption
-	LDDW	.D2T2	*+SP[1],SP:DP
-	NOP	4
-	.endm
-
-	.section .text
-
-	;;
-	;; Jump to schedule() then return to ret_from_exception
-	;;
-_reschedule:
-#ifdef CONFIG_C6X_BIG_KERNEL
-	MVKL	.S1	schedule,A0
-	MVKH	.S1	schedule,A0
-	B	.S2X	A0
-#else
-	B	.S1	schedule
-#endif
-	ADDKPC	.S2	ret_from_exception,B3,4
-
-	;;
-	;; Called before syscall handler when process is being debugged
-	;;
-tracesys_on:
-#ifdef CONFIG_C6X_BIG_KERNEL
-	MVKL	.S1	syscall_trace_entry,A0
-	MVKH	.S1	syscall_trace_entry,A0
-	B	.S2X	A0
-#else
-	B	.S1	syscall_trace_entry
-#endif
-	ADDKPC	.S2	ret_from_syscall_trace,B3,3
-	ADD	.S1X	8,SP,A4
-
-ret_from_syscall_trace:
-	;; tracing returns (possibly new) syscall number
-	MV	.D2X	A4,B0
- ||	MVK	.S2	__NR_syscalls,B1
-	CMPLTU	.L2	B0,B1,B1
-
- [!B1]	BNOP	.S2	ret_from_syscall_function,5
- ||	MVK	.S1	-ENOSYS,A4
-
-	;; reload syscall args from (possibly modified) stack frame
-	;; and get syscall handler addr from sys_call_table:
-	LDW	.D2T2	*+SP(REGS_B4+8),B4
- ||	MVKL	.S2	sys_call_table,B1
-	LDW	.D2T1	*+SP(REGS_A6+8),A6
- ||	MVKH	.S2	sys_call_table,B1
-	LDW	.D2T2	*+B1[B0],B0
- ||	MVKL	.S2	ret_from_syscall_function,B3
-	LDW	.D2T2	*+SP(REGS_B6+8),B6
- ||	MVKH	.S2	ret_from_syscall_function,B3
-	LDW	.D2T1	*+SP(REGS_A8+8),A8
-	LDW	.D2T2	*+SP(REGS_B8+8),B8
-	NOP
-	; B0 = sys_call_table[__NR_*]
-	BNOP	.S2	B0,5			; branch to syscall handler
- ||	LDW	.D2T1	*+SP(REGS_ORIG_A4+8),A4
-
-syscall_exit_work:
-	AND	.D1	_TIF_SYSCALL_TRACE,A2,A0
- [!A0]	BNOP	.S1	work_pending,5
- [A0]	B	.S2	syscall_trace_exit
-	ADDKPC	.S2	resume_userspace,B3,1
-	MVC	.S2	CSR,B1
-	SET	.S2	B1,0,0,B1
-	MVC	.S2	B1,CSR		; enable ints
-
-work_pending:
-	AND	.D1	_TIF_NEED_RESCHED,A2,A0
- [!A0]	BNOP	.S1	work_notifysig,5
-
-work_resched:
-#ifdef CONFIG_C6X_BIG_KERNEL
-	MVKL	.S1	schedule,A1
-	MVKH	.S1	schedule,A1
-	B	.S2X	A1
-#else
-	B	.S2	schedule
-#endif
-	ADDKPC	.S2	work_rescheduled,B3,4
-work_rescheduled:
-	;; make sure we don't miss an interrupt setting need_resched or
-	;; sigpending between sampling and the rti
-	MASK_INT B2
-	GET_THREAD_INFO A12
-	LDW	.D1T1	*+A12(THREAD_INFO_FLAGS),A2
-	MVK	.S1	_TIF_WORK_MASK,A1
-	MVK	.S1	_TIF_NEED_RESCHED,A3
-	NOP	2
-	AND	.D1	A1,A2,A0
- ||	AND	.S1	A3,A2,A1
- [!A0]	BNOP	.S1	restore_all,5
- [A1]	BNOP	.S1	work_resched,5
-
-work_notifysig:
-	;; enable interrupts for do_notify_resume()
-	UNMASK_INT B2
-	B	.S2	do_notify_resume
-	LDW	.D2T1	*+SP(REGS__END+8),A6 ; syscall flag
-	ADDKPC	.S2	resume_userspace,B3,1
-	ADD	.S1X	8,SP,A4		; pt_regs pointer is first arg
-	MV	.D2X	A2,B4		; thread_info flags is second arg
-
-	;;
-	;; On C64x+, the return way from exception and interrupt
-	;; is a little bit different
-	;;
-ENTRY(ret_from_exception)
-#ifdef CONFIG_PREEMPT
-	MASK_INT B2
-#endif
-
-ENTRY(ret_from_interrupt)
-	;;
-	;; Check if we are comming from user mode.
-	;;
-	LDW	.D2T2	*+SP(REGS_TSR+8),B0
-	MVK	.S2	0x40,B1
-	NOP	3
-	AND	.D2	B0,B1,B0
- [!B0]	BNOP	.S2	resume_kernel,5
-
-resume_userspace:
-	;; make sure we don't miss an interrupt setting need_resched or
-	;; sigpending between sampling and the rti
-	MASK_INT B2
-	GET_THREAD_INFO A12
-	LDW	.D1T1	*+A12(THREAD_INFO_FLAGS),A2
-	MVK	.S1	_TIF_WORK_MASK,A1
-	MVK	.S1	_TIF_NEED_RESCHED,A3
-	NOP	2
-	AND	.D1	A1,A2,A0
- [A0]	BNOP	.S1	work_pending,5
-	BNOP	.S1	restore_all,5
-
-	;;
-	;; System call handling
-	;; B0 = syscall number (in sys_call_table)
-	;; A4,B4,A6,B6,A8,B8 = arguments of the syscall function
-	;; A4 is the return value register
-	;;
-system_call_saved:
-	MVK	.L2	1,B2
-	STW	.D2T2	B2,*+SP(REGS__END+8)	; set syscall flag
-	MVC	.S2	B2,ECR			; ack the software exception
-
-	UNMASK_INT B2			; re-enable global IT
-
-system_call_saved_noack:
-	;; Check system call number
-	MVK	.S2	__NR_syscalls,B1
-#ifdef CONFIG_C6X_BIG_KERNEL
- ||	MVKL	.S1	sys_ni_syscall,A0
-#endif
-	CMPLTU	.L2	B0,B1,B1
-#ifdef CONFIG_C6X_BIG_KERNEL
- ||	MVKH	.S1	sys_ni_syscall,A0
-#endif
-
-	;; Check for ptrace
-	GET_THREAD_INFO A12
-
-#ifdef CONFIG_C6X_BIG_KERNEL
- [!B1]	B	.S2X	A0
-#else
- [!B1]	B	.S2	sys_ni_syscall
-#endif
- [!B1]	ADDKPC	.S2	ret_from_syscall_function,B3,4
-
-	;; Get syscall handler addr from sys_call_table
-	;; call tracesys_on or call syscall handler
-	LDW	.D1T1	*+A12(THREAD_INFO_FLAGS),A2
- ||	MVKL	.S2	sys_call_table,B1
-	MVKH	.S2	sys_call_table,B1
-	LDW	.D2T2	*+B1[B0],B0
-	NOP	2
-	; A2 = thread_info flags
-	AND	.D1	_TIF_SYSCALL_TRACE,A2,A2
- [A2]	BNOP	.S1	tracesys_on,5
-	;; B0 = _sys_call_table[__NR_*]
-	B	.S2	B0
-	ADDKPC	.S2	ret_from_syscall_function,B3,4
-
-ret_from_syscall_function:
-	STW	.D2T1	A4,*+SP(REGS_A4+8)	; save return value in A4
-						; original A4 is in orig_A4
-syscall_exit:
-	;; make sure we don't miss an interrupt setting need_resched or
-	;; sigpending between sampling and the rti
-	MASK_INT B2
-	LDW	.D1T1	*+A12(THREAD_INFO_FLAGS),A2
-	MVK	.S1	_TIF_ALLWORK_MASK,A1
-	NOP	3
-	AND	.D1	A1,A2,A2 ; check for work to do
- [A2]	BNOP	.S1	syscall_exit_work,5
-
-restore_all:
-	RESTORE_ALL NRP,NTSR
-
-	;;
-	;; After a fork we jump here directly from resume,
-	;; so that A4 contains the previous task structure.
-	;;
-ENTRY(ret_from_fork)
-#ifdef CONFIG_C6X_BIG_KERNEL
-	MVKL	.S1	schedule_tail,A0
-	MVKH	.S1	schedule_tail,A0
-	B	.S2X	A0
-#else
-	B	.S2	schedule_tail
-#endif
-	ADDKPC	.S2	ret_from_fork_2,B3,4
-ret_from_fork_2:
-	;; return 0 in A4 for child process
-	GET_THREAD_INFO A12
-	BNOP	.S2	syscall_exit,3
-	MVK	.L2	0,B0
-	STW	.D2T2	B0,*+SP(REGS_A4+8)
-ENDPROC(ret_from_fork)
-
-ENTRY(ret_from_kernel_thread)
-#ifdef CONFIG_C6X_BIG_KERNEL
-	MVKL	.S1	schedule_tail,A0
-	MVKH	.S1	schedule_tail,A0
-	B	.S2X	A0
-#else
-	B	.S2	schedule_tail
-#endif
-	LDW	.D2T2	*+SP(REGS_A0+8),B10 /* get fn  */
-	ADDKPC	.S2	0f,B3,3
-0:
-	B	.S2	B10		   /* call fn */
-	LDW	.D2T1	*+SP(REGS_A1+8),A4 /* get arg */
-	ADDKPC	.S2	ret_from_fork_2,B3,3
-ENDPROC(ret_from_kernel_thread)
-
-	;;
-	;; These are the interrupt handlers, responsible for calling c6x_do_IRQ()
-	;;
-	.macro SAVE_ALL_INT
-	SAVE_ALL IRP,ITSR
-	.endm
-
-	.macro CALL_INT int
-#ifdef CONFIG_C6X_BIG_KERNEL
-	MVKL	.S1	c6x_do_IRQ,A0
-	MVKH	.S1	c6x_do_IRQ,A0
-	BNOP	.S2X	A0,1
-	MVK	.S1	int,A4
-	ADDAW	.D2	SP,2,B4
-	MVKL	.S2	ret_from_interrupt,B3
-	MVKH	.S2	ret_from_interrupt,B3
-#else
-	CALLP   .S2	c6x_do_IRQ,B3
- ||	MVK	.S1	int,A4
- ||	ADDAW	.D2	SP,2,B4
-	B	.S1	ret_from_interrupt
-	NOP	5
-#endif
-	.endm
-
-ENTRY(_int4_handler)
-	SAVE_ALL_INT
-	CALL_INT 4
-ENDPROC(_int4_handler)
-
-ENTRY(_int5_handler)
-	SAVE_ALL_INT
-	CALL_INT 5
-ENDPROC(_int5_handler)
-
-ENTRY(_int6_handler)
-	SAVE_ALL_INT
-	CALL_INT 6
-ENDPROC(_int6_handler)
-
-ENTRY(_int7_handler)
-	SAVE_ALL_INT
-	CALL_INT 7
-ENDPROC(_int7_handler)
-
-ENTRY(_int8_handler)
-	SAVE_ALL_INT
-	CALL_INT 8
-ENDPROC(_int8_handler)
-
-ENTRY(_int9_handler)
-	SAVE_ALL_INT
-	CALL_INT 9
-ENDPROC(_int9_handler)
-
-ENTRY(_int10_handler)
-	SAVE_ALL_INT
-	CALL_INT 10
-ENDPROC(_int10_handler)
-
-ENTRY(_int11_handler)
-	SAVE_ALL_INT
-	CALL_INT 11
-ENDPROC(_int11_handler)
-
-ENTRY(_int12_handler)
-	SAVE_ALL_INT
-	CALL_INT 12
-ENDPROC(_int12_handler)
-
-ENTRY(_int13_handler)
-	SAVE_ALL_INT
-	CALL_INT 13
-ENDPROC(_int13_handler)
-
-ENTRY(_int14_handler)
-	SAVE_ALL_INT
-	CALL_INT 14
-ENDPROC(_int14_handler)
-
-ENTRY(_int15_handler)
-	SAVE_ALL_INT
-	CALL_INT 15
-ENDPROC(_int15_handler)
-
-	;;
-	;; Handler for uninitialized and spurious interrupts
-	;;
-ENTRY(_bad_interrupt)
-	B	.S2	IRP
-	NOP	5
-ENDPROC(_bad_interrupt)
-
-	;;
-	;; Entry for NMI/exceptions/syscall
-	;;
-ENTRY(_nmi_handler)
-	SAVE_ALL NRP,NTSR
-
-	MVC	.S2	EFR,B2
-	CMPEQ	.L2	1,B2,B2
- ||	MVC	.S2	TSR,B1
-	CLR	.S2	B1,10,10,B1
-	MVC	.S2	B1,TSR
-#ifdef CONFIG_C6X_BIG_KERNEL
- [!B2]	MVKL	.S1	process_exception,A0
- [!B2]	MVKH	.S1	process_exception,A0
- [!B2]	B	.S2X	A0
-#else
- [!B2]	B	.S2	process_exception
-#endif
- [B2]	B	.S2	system_call_saved
- [!B2]	ADDAW	.D2	SP,2,B1
- [!B2]	MV	.D1X	B1,A4
-	ADDKPC	.S2	ret_from_trap,B3,2
-
-ret_from_trap:
-	MV	.D2X	A4,B0
- [!B0]	BNOP	.S2	ret_from_exception,5
-
-#ifdef CONFIG_C6X_BIG_KERNEL
-	MVKL	.S2	system_call_saved_noack,B3
-	MVKH	.S2	system_call_saved_noack,B3
-#endif
-	LDW	.D2T2	*+SP(REGS_B0+8),B0
-	LDW	.D2T1	*+SP(REGS_A4+8),A4
-	LDW	.D2T2	*+SP(REGS_B4+8),B4
-	LDW	.D2T1	*+SP(REGS_A6+8),A6
-	LDW	.D2T2	*+SP(REGS_B6+8),B6
-	LDW	.D2T1	*+SP(REGS_A8+8),A8
-#ifdef CONFIG_C6X_BIG_KERNEL
- ||	B	.S2	B3
-#else
- ||	B	.S2	system_call_saved_noack
-#endif
-	LDW	.D2T2	*+SP(REGS_B8+8),B8
-	NOP	4
-ENDPROC(_nmi_handler)
-
-	;;
-	;; Jump to schedule() then return to ret_from_isr
-	;;
-#ifdef	CONFIG_PREEMPT
-resume_kernel:
-	GET_THREAD_INFO A12
-	LDW	.D1T1	*+A12(THREAD_INFO_PREEMPT_COUNT),A1
-	NOP	4
- [A1]	BNOP	.S2	restore_all,5
-
-preempt_schedule:
-	GET_THREAD_INFO A2
-	LDW	.D1T1	*+A2(THREAD_INFO_FLAGS),A1
-#ifdef CONFIG_C6X_BIG_KERNEL
-	MVKL	.S2	preempt_schedule_irq,B0
-	MVKH	.S2	preempt_schedule_irq,B0
-	NOP	2
-#else
-	NOP	4
-#endif
-	AND	.D1	_TIF_NEED_RESCHED,A1,A1
- [!A1]	BNOP	.S2	restore_all,5
-#ifdef CONFIG_C6X_BIG_KERNEL
-	B	.S2	B0
-#else
-	B	.S2	preempt_schedule_irq
-#endif
-	ADDKPC	.S2	preempt_schedule,B3,4
-#endif /* CONFIG_PREEMPT */
-
-ENTRY(enable_exception)
-	DINT
-	MVC	.S2	TSR,B0
-	MVC	.S2	B3,NRP
-	MVK	.L2	0xc,B1
-	OR	.D2	B0,B1,B0
-	MVC	.S2	B0,TSR			;  Set GEE and XEN in TSR
-	B	.S2	NRP
-	NOP	5
-ENDPROC(enable_exception)
-
-	;;
-	;; Special system calls
-	;; return address is in B3
-	;;
-ENTRY(sys_rt_sigreturn)
-	ADD	.D1X	SP,8,A4
-#ifdef CONFIG_C6X_BIG_KERNEL
- ||	MVKL	.S1	do_rt_sigreturn,A0
-	MVKH	.S1	do_rt_sigreturn,A0
-	BNOP	.S2X	A0,5
-#else
- ||	B	.S2	do_rt_sigreturn
-	NOP	5
-#endif
-ENDPROC(sys_rt_sigreturn)
-
-ENTRY(sys_pread_c6x)
-	MV	.D2X	A8,B7
-#ifdef CONFIG_C6X_BIG_KERNEL
- ||	MVKL	.S1	sys_pread64,A0
-	MVKH	.S1	sys_pread64,A0
-	BNOP	.S2X	A0,5
-#else
- ||	B	.S2	sys_pread64
-	NOP	5
-#endif
-ENDPROC(sys_pread_c6x)
-
-ENTRY(sys_pwrite_c6x)
-	MV	.D2X	A8,B7
-#ifdef CONFIG_C6X_BIG_KERNEL
- ||	MVKL	.S1	sys_pwrite64,A0
-	MVKH	.S1	sys_pwrite64,A0
-	BNOP	.S2X	A0,5
-#else
- ||	B	.S2	sys_pwrite64
-	NOP	5
-#endif
-ENDPROC(sys_pwrite_c6x)
-
-;; On Entry
-;;   A4 - path
-;;   B4 - offset_lo (LE), offset_hi (BE)
-;;   A6 - offset_lo (BE), offset_hi (LE)
-ENTRY(sys_truncate64_c6x)
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	MV	.S2	B4,B5
-	MV	.D2X	A6,B4
-#else
-	MV	.D2X	A6,B5
-#endif
-#ifdef CONFIG_C6X_BIG_KERNEL
- ||	MVKL	.S1	sys_truncate64,A0
-	MVKH	.S1	sys_truncate64,A0
-	BNOP	.S2X	A0,5
-#else
- ||	B	.S2	sys_truncate64
-	NOP	5
-#endif
-ENDPROC(sys_truncate64_c6x)
-
-;; On Entry
-;;   A4 - fd
-;;   B4 - offset_lo (LE), offset_hi (BE)
-;;   A6 - offset_lo (BE), offset_hi (LE)
-ENTRY(sys_ftruncate64_c6x)
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	MV	.S2	B4,B5
-	MV	.D2X	A6,B4
-#else
-	MV	.D2X	A6,B5
-#endif
-#ifdef CONFIG_C6X_BIG_KERNEL
- ||	MVKL	.S1	sys_ftruncate64,A0
-	MVKH	.S1	sys_ftruncate64,A0
-	BNOP	.S2X	A0,5
-#else
- ||	B	.S2	sys_ftruncate64
-	NOP	5
-#endif
-ENDPROC(sys_ftruncate64_c6x)
-
-;; On Entry
-;;   A4 - fd
-;;   B4 - offset_lo (LE), offset_hi (BE)
-;;   A6 - offset_lo (BE), offset_hi (LE)
-;;   B6 - len_lo (LE), len_hi (BE)
-;;   A8 - len_lo (BE), len_hi (LE)
-;;   B8 - advice
-ENTRY(sys_fadvise64_64_c6x)
-#ifdef CONFIG_C6X_BIG_KERNEL
-	MVKL	.S1	sys_fadvise64_64,A0
-	MVKH	.S1	sys_fadvise64_64,A0
-	BNOP	.S2X	A0,2
-#else
-	B	.S2	sys_fadvise64_64
-	NOP	2
-#endif
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	MV	.L2	B4,B5
- ||	MV	.D2X	A6,B4
-	MV	.L1	A8,A6
- ||	MV	.D1X	B6,A7
-#else
-	MV	.D2X	A6,B5
-	MV	.L1	A8,A7
- ||	MV	.D1X	B6,A6
-#endif
-	MV	.L2	B8,B6
-ENDPROC(sys_fadvise64_64_c6x)
-
-;; On Entry
-;;   A4 - fd
-;;   B4 - mode
-;;   A6 - offset_hi
-;;   B6 - offset_lo
-;;   A8 - len_hi
-;;   B8 - len_lo
-ENTRY(sys_fallocate_c6x)
-#ifdef CONFIG_C6X_BIG_KERNEL
-	MVKL	.S1	sys_fallocate,A0
-	MVKH	.S1	sys_fallocate,A0
-	BNOP	.S2X	A0,1
-#else
-	B	.S2	sys_fallocate
-	NOP
-#endif
-	MV	.D1	A6,A7
-	MV	.D1X	B6,A6
-	MV	.D2X	A8,B7
-	MV	.D2	B8,B6
-ENDPROC(sys_fallocate_c6x)
-
-	;; put this in .neardata for faster access when using DSBT mode
-	.section .neardata,"aw",@progbits
-	.global	current_ksp
-	.hidden	current_ksp
-current_ksp:
-	.word	init_thread_union + THREAD_START_SP
diff --git a/arch/c6x/kernel/head.S b/arch/c6x/kernel/head.S
deleted file mode 100644
index fecbeef827bc186491a9fba7ad91927128c84f13..0000000000000000000000000000000000000000
--- a/arch/c6x/kernel/head.S
+++ /dev/null
@@ -1,81 +0,0 @@
-; SPDX-License-Identifier: GPL-2.0-only
-;
-;  Port on Texas Instruments TMS320C6x architecture
-;
-;  Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated
-;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
-;
-#include <linux/linkage.h>
-#include <linux/of_fdt.h>
-#include <asm/asm-offsets.h>
-
-	__HEAD
-ENTRY(_c_int00)
-	;; Save magic and pointer
-	MV	.S1	A4,A10
-	MV	.S2	B4,B10
-	MVKL	.S2	__bss_start,B5
-	MVKH	.S2	__bss_start,B5
-	MVKL	.S2	__bss_stop,B6
-	MVKH	.S2	__bss_stop,B6
-	SUB	.L2	B6,B5,B6 ; bss size
-
-	;; Set the stack pointer
-	MVKL	.S2	current_ksp,B0
-	MVKH	.S2	current_ksp,B0
-	LDW	.D2T2	*B0,B15
-
-	;; clear bss
-	SHR	.S2	B6,3,B0	  ; number of dwords to clear
-	ZERO	.L2	B13
-	ZERO	.L2	B12
-bss_loop:
-	BDEC	.S2	bss_loop,B0
-	NOP	3
-	CMPLT	.L2	B0,0,B1
- [!B1]	STDW	.D2T2	B13:B12,*B5++[1]
-
-	NOP	4
-	AND	.D2	~7,B15,B15
-
-	;; Clear GIE and PGIE
-	MVC	.S2	CSR,B2
-	CLR	.S2	B2,0,1,B2
-	MVC	.S2	B2,CSR
-	MVC	.S2	TSR,B2
-	CLR	.S2	B2,0,1,B2
-	MVC	.S2	B2,TSR
-	MVC	.S2	ITSR,B2
-	CLR	.S2	B2,0,1,B2
-	MVC	.S2	B2,ITSR
-	MVC	.S2	NTSR,B2
-	CLR	.S2	B2,0,1,B2
-	MVC	.S2	B2,NTSR
-
-	;; pass DTB pointer to machine_init (or zero if none)
-	MVKL	.S1	OF_DT_HEADER,A0
-	MVKH	.S1	OF_DT_HEADER,A0
-	CMPEQ	.L1	A10,A0,A0
-  [A0]	MV	.S1X	B10,A4
-  [!A0] MVK	.S1	0,A4
-
-#ifdef CONFIG_C6X_BIG_KERNEL
-	MVKL	.S1	machine_init,A0
-	MVKH	.S1	machine_init,A0
-	B	.S2X	A0
-	ADDKPC  .S2     0f,B3,4
-0:
-#else
-	CALLP	.S2	machine_init,B3
-#endif
-
-	;; Jump to Linux init
-#ifdef CONFIG_C6X_BIG_KERNEL
-	MVKL	.S1	start_kernel,A0
-	MVKH	.S1	start_kernel,A0
-	B	.S2X	A0
-#else
-	B	.S2	start_kernel
-#endif
-	NOP	5
-L1:	BNOP	.S2	L1,5
diff --git a/arch/c6x/kernel/switch_to.S b/arch/c6x/kernel/switch_to.S
deleted file mode 100644
index b7f9f607042e68a5c6ffeec1168e666aa5bac00e..0000000000000000000000000000000000000000
--- a/arch/c6x/kernel/switch_to.S
+++ /dev/null
@@ -1,71 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) 2011 Texas Instruments Incorporated
- *  Author: Mark Salter (msalter@redhat.com)
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-
-#define SP	B15
-
-	/*
-	 * void __switch_to(struct thread_info *prev,
-	 *      	    struct thread_info *next,
-	 *		    struct task_struct *tsk) ;
-	 */
-ENTRY(__switch_to)
-	LDDW	.D2T2	*+B4(THREAD_B15_14),B7:B6
- ||	MV	.L2X	A4,B5	; prev
- ||	MV	.L1X	B4,A5	; next
- ||	MVC	.S2	RILC,B1
-
-	STW	.D2T2	B3,*+B5(THREAD_PC)
- ||	STDW	.D1T1	A13:A12,*+A4(THREAD_A13_12)
- ||	MVC	.S2	ILC,B0
-
-	LDW	.D2T2	*+B4(THREAD_PC),B3
- ||	LDDW	.D1T1	*+A5(THREAD_A13_12),A13:A12
-
-	STDW	.D1T1	A11:A10,*+A4(THREAD_A11_10)
- ||	STDW	.D2T2	B1:B0,*+B5(THREAD_RICL_ICL)
-#ifndef __DSBT__
- ||	MVKL	.S2	current_ksp,B1
-#endif
-
-	STDW	.D2T2	B15:B14,*+B5(THREAD_B15_14)
- ||	STDW	.D1T1	A15:A14,*+A4(THREAD_A15_14)
-#ifndef __DSBT__
- ||	MVKH	.S2	current_ksp,B1
-#endif
-
-	;; Switch to next SP
-	MV	.S2	B7,SP
-#ifdef __DSBT__
- ||	STW	.D2T2	B7,*+B14(current_ksp)
-#else
- ||	STW	.D2T2	B7,*B1
- ||	MV	.L2	B6,B14
-#endif
- ||	LDDW	.D1T1	*+A5(THREAD_RICL_ICL),A1:A0
-
-	STDW	.D2T2	B11:B10,*+B5(THREAD_B11_10)
- ||	LDDW	.D1T1	*+A5(THREAD_A15_14),A15:A14
-
-	STDW	.D2T2	B13:B12,*+B5(THREAD_B13_12)
- ||	LDDW	.D1T1	*+A5(THREAD_A11_10),A11:A10
-
-	B	.S2	B3		; return in next E1
- ||	LDDW	.D2T2	*+B4(THREAD_B13_12),B13:B12
-
-	LDDW	.D2T2	*+B4(THREAD_B11_10),B11:B10
-	NOP
-
-	MV	.L2X	A0,B0
- ||	MV	.S1	A6,A4
-
-	MVC	.S2	B0,ILC
- ||	MV	.L2X	A1,B1
-
-	MVC	.S2	B1,RILC
-ENDPROC(__switch_to)
diff --git a/arch/c6x/kernel/vectors.S b/arch/c6x/kernel/vectors.S
deleted file mode 100644
index ad3dc006a6d3a952c866e90edf38a5a00efd074f..0000000000000000000000000000000000000000
--- a/arch/c6x/kernel/vectors.S
+++ /dev/null
@@ -1,78 +0,0 @@
-; SPDX-License-Identifier: GPL-2.0-only
-;
-;  Port on Texas Instruments TMS320C6x architecture
-;
-;  Copyright (C) 2004, 2006, 2009, 2010, 2011 Texas Instruments Incorporated
-;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
-;
-;  This section handles all the interrupt vector routines.
-;  At RESET the processor sets up the DRAM timing parameters and
-;  branches to the label _c_int00 which handles initialization for the C code.
-;
-
-#define ALIGNMENT 5
-
-	.macro IRQVEC name, handler
-	.align ALIGNMENT
-	.hidden \name
-	.global \name
-\name:
-#ifdef CONFIG_C6X_BIG_KERNEL
-	STW	.D2T1	A0,*B15--[2]
- ||	MVKL	.S1	\handler,A0
-	MVKH	.S1	\handler,A0
-	B	.S2X	A0
-	LDW	.D2T1	*++B15[2],A0
-	NOP	4
-	NOP
-	NOP
-	.endm
-#else /* CONFIG_C6X_BIG_KERNEL */
-	B	.S2	\handler
-	NOP
-	NOP
-	NOP
-	NOP
-	NOP
-	NOP
-	NOP
-	.endm
-#endif /* CONFIG_C6X_BIG_KERNEL */
-
-	   .sect ".vectors","ax"
-	   .align ALIGNMENT
-	   .global RESET
-	   .hidden RESET
-RESET:
-#ifdef CONFIG_C6X_BIG_KERNEL
-	   MVKL	.S1	_c_int00,A0		; branch to _c_int00
-	   MVKH	.S1	_c_int00,A0
-	   B	.S2X	A0
-#else
-	   B	.S2	_c_int00
-	   NOP
-	   NOP
-#endif
-	   NOP
-	   NOP
-	   NOP
-	   NOP
-	   NOP
-
-
-	   IRQVEC NMI,_nmi_handler		; NMI interrupt
-	   IRQVEC AINT,_bad_interrupt		; reserved
-	   IRQVEC MSGINT,_bad_interrupt		; reserved
-
-	   IRQVEC INT4,_int4_handler
-	   IRQVEC INT5,_int5_handler
-	   IRQVEC INT6,_int6_handler
-	   IRQVEC INT7,_int7_handler
-	   IRQVEC INT8,_int8_handler
-	   IRQVEC INT9,_int9_handler
-	   IRQVEC INT10,_int10_handler
-	   IRQVEC INT11,_int11_handler
-	   IRQVEC INT12,_int12_handler
-	   IRQVEC INT13,_int13_handler
-	   IRQVEC INT14,_int14_handler
-	   IRQVEC INT15,_int15_handler
diff --git a/arch/c6x/kernel/vmlinux.lds.S b/arch/c6x/kernel/vmlinux.lds.S
deleted file mode 100644
index 584bab2bace6e22d59619434f86f95ee41a04e62..0000000000000000000000000000000000000000
--- a/arch/c6x/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,151 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ld script for the c6x kernel
- *
- *  Copyright (C) 2010, 2011 Texas Instruments Incorporated
- *  Mark Salter <msalter@redhat.com>
- */
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/thread_info.h>
-#include <asm/page.h>
-
-ENTRY(_c_int00)
-
-#if defined(CONFIG_CPU_BIG_ENDIAN)
-jiffies = jiffies_64 + 4;
-#else
-jiffies = jiffies_64;
-#endif
-
-#define	READONLY_SEGMENT_START	\
-	. = PAGE_OFFSET;
-#define	READWRITE_SEGMENT_START	\
-	. = ALIGN(128);		\
-	_data_lma = .;
-
-SECTIONS
-{
-	/*
-	 * Start kernel read only segment
-	 */
-	READONLY_SEGMENT_START
-
-	.vectors :
-	{
-		_vectors_start = .;
-		*(.vectors)
-		. = ALIGN(0x400);
-		_vectors_end = .;
-	}
-
-	/*
-	 * This section contains data which may be shared with other
-	 * cores. It needs to be a fixed offset from PAGE_OFFSET
-	 * regardless of kernel configuration.
-	 */
-	.virtio_ipc_dev :
-	{
-		*(.virtio_ipc_dev)
-	}
-
-	. = ALIGN(PAGE_SIZE);
-	__init_begin = .;
-	.init :
-	{
-		_sinittext = .;
-		HEAD_TEXT
-		INIT_TEXT
-		_einittext = .;
-	}
-
-	INIT_DATA_SECTION(16)
-
-	PERCPU_SECTION(128)
-
-	. = ALIGN(PAGE_SIZE);
-	__init_end = .;
-
-	.text :
-	{
-		_text = .;
-		_stext = .;
-		TEXT_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-		IRQENTRY_TEXT
-		SOFTIRQENTRY_TEXT
-		KPROBES_TEXT
-		*(.fixup)
-		*(.gnu.warning)
-	}
-
-	EXCEPTION_TABLE(16)
-	NOTES
-
-	RO_DATA_SECTION(PAGE_SIZE)
-	.const :
-	{
-		*(.const .const.* .gnu.linkonce.r.*)
-		*(.switch)
-	}
-
-	_etext = .;
-
-	/*
-	 * Start kernel read-write segment.
-	 */
-	READWRITE_SEGMENT_START
-	_sdata = .;
-
-	.fardata : AT(ADDR(.fardata) - LOAD_OFFSET)
-	{
-		INIT_TASK_DATA(THREAD_SIZE)
-		NOSAVE_DATA
-		PAGE_ALIGNED_DATA(PAGE_SIZE)
-		CACHELINE_ALIGNED_DATA(128)
-		READ_MOSTLY_DATA(128)
-		DATA_DATA
-		CONSTRUCTORS
-		*(.data1)
-		*(.fardata .fardata.*)
-		*(.data.debug_bpt)
-	}
-
-	.neardata ALIGN(8) : AT(ADDR(.neardata) - LOAD_OFFSET)
-	{
-		*(.neardata2 .neardata2.* .gnu.linkonce.s2.*)
-		*(.neardata .neardata.* .gnu.linkonce.s.*)
-		. = ALIGN(8);
-	}
-
-	BUG_TABLE
-
-	_edata = .;
-
-	__bss_start = .;
-	SBSS(8)
-	BSS(8)
-	.far :
-	{
-		. = ALIGN(8);
-		*(.dynfar)
-		*(.far .far.* .gnu.linkonce.b.*)
-		. = ALIGN(8);
-	}
-	__bss_stop = .;
-
-	_end = .;
-
-	DWARF_DEBUG
-
-	/DISCARD/ :
-	{
-		  EXIT_TEXT
-		  EXIT_DATA
-		  EXIT_CALL
-		  *(.discard)
-		  *(.discard.*)
-		  *(.interp)
-	}
-}
diff --git a/arch/c6x/lib/csum_64plus.S b/arch/c6x/lib/csum_64plus.S
deleted file mode 100644
index 8e625a30fd435a676ec27804ce2618209fd77829..0000000000000000000000000000000000000000
--- a/arch/c6x/lib/csum_64plus.S
+++ /dev/null
@@ -1,416 +0,0 @@
-; SPDX-License-Identifier: GPL-2.0-only
-;
-;  linux/arch/c6x/lib/csum_64plus.s
-;
-;  Port on Texas Instruments TMS320C6x architecture
-;
-;  Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated
-;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
-;
-#include <linux/linkage.h>
-
-;
-;unsigned int csum_partial_copy(const char *src, char * dst,
-;				int len, int sum)
-;
-; A4:	src
-; B4:	dst
-; A6:	len
-; B6:	sum
-; return csum in A4
-;
-
-	.text
-ENTRY(csum_partial_copy)
-	MVC	.S2	ILC,B30
-
-	MV	.D1X	B6,A31		; given csum
-	ZERO	.D1	A9		; csum (a side)
-||	ZERO	.D2	B9		; csum (b side)
-||	SHRU	.S2X	A6,2,B5		; len / 4
-
-	;; Check alignment and size
-	AND	.S1	3,A4,A1
-||	AND	.S2	3,B4,B0
-	OR	.L2X	B0,A1,B0	; non aligned condition
-||	MVC	.S2	B5,ILC
-||	MVK	.D2	1,B2
-||	MV	.D1X	B5,A1		; words condition
-  [!A1]	B	.S1	L8
-   [B0] BNOP	.S1	L6,5
-
-	SPLOOP		1
-
-	;; Main loop for aligned words
-	LDW	.D1T1	*A4++,A7
-	NOP	4
-	MV	.S2X	A7,B7
-||	EXTU	.S1	A7,0,16,A16
-	STW	.D2T2	B7,*B4++
-||	MPYU	.M2	B7,B2,B8
-||	ADD	.L1	A16,A9,A9
-	NOP
-	SPKERNEL	8,0
-||	ADD	.L2	B8,B9,B9
-
-	ZERO	.D1	A1
-||	ADD	.L1X	A9,B9,A9	;  add csum from a and b sides
-
-L6:
-  [!A1]	BNOP	.S1	L8,5
-
-	;; Main loop for non-aligned words
-	SPLOOP		2
- ||	MVK	.L1	1,A2
-
-	LDNW	.D1T1	*A4++,A7
-	NOP		3
-
-	NOP
-	MV	.S2X	A7,B7
- ||	EXTU	.S1	A7,0,16,A16
- ||	MPYU	.M1	A7,A2,A8
-
-	ADD	.L1	A16,A9,A9
-	SPKERNEL	6,0
- ||	STNW	.D2T2	B7,*B4++
- ||	ADD	.L1	A8,A9,A9
-
-L8:	AND	.S2X	2,A6,B5
-	CMPGT	.L2	B5,0,B0
-  [!B0]	BNOP	.S1	L82,4
-
-	;; Manage half-word
-	ZERO	.L1	A7
-||	ZERO	.D1	A8
-
-#ifdef CONFIG_CPU_BIG_ENDIAN
-
-	LDBU	.D1T1	*A4++,A7
-	LDBU	.D1T1	*A4++,A8
-	NOP		3
-	SHL	.S1	A7,8,A0
-	ADD	.S1	A8,A9,A9
-	STB	.D2T1	A7,*B4++
-||	ADD	.S1	A0,A9,A9
-	STB	.D2T1	A8,*B4++
-
-#else
-
-	LDBU	.D1T1	*A4++,A7
-	LDBU	.D1T1	*A4++,A8
-	NOP		3
-	ADD	.S1	A7,A9,A9
-	SHL	.S1	A8,8,A0
-
-	STB	.D2T1	A7,*B4++
-||	ADD	.S1	A0,A9,A9
-	STB	.D2T1	A8,*B4++
-
-#endif
-
-	;; Manage eventually the last byte
-L82:	AND	.S2X	1,A6,B0
-  [!B0]	BNOP	.S1	L9,5
-
-||	ZERO	.L1	A7
-
-L83:	LDBU	.D1T1	*A4++,A7
-	NOP		4
-
-	MV	.L2X	A7,B7
-
-#ifdef CONFIG_CPU_BIG_ENDIAN
-
-	STB	.D2T2	B7,*B4++
-||	SHL	.S1	A7,8,A7
-	ADD	.S1	A7,A9,A9
-
-#else
-
-	STB	.D2T2	B7,*B4++
-||	ADD	.S1	A7,A9,A9
-
-#endif
-
-	;; Fold the csum
-L9:	SHRU	.S2X	A9,16,B0
-  [!B0]	BNOP	.S1	L10,5
-
-L91:	SHRU	.S2X	A9,16,B4
-||	EXTU	.S1	A9,16,16,A3
-	ADD	.D1X	A3,B4,A9
-
-	SHRU	.S1	A9,16,A0
-   [A0]	BNOP	.S1	L91,5
-
-L10:	ADD	.D1	A31,A9,A9
-	MV	.D1	A9,A4
-
-	BNOP	.S2	B3,4
-	MVC	.S2	B30,ILC
-ENDPROC(csum_partial_copy)
-
-;
-;unsigned short
-;ip_fast_csum(unsigned char *iph, unsigned int ihl)
-;{
-;	unsigned int checksum = 0;
-;	unsigned short *tosum = (unsigned short *) iph;
-;	int len;
-;
-;	len = ihl*4;
-;
-;	if (len <= 0)
-;		return 0;
-;
-;	while(len) {
-;		len -= 2;
-;		checksum += *tosum++;
-;	}
-;	if (len & 1)
-;		checksum += *(unsigned char*) tosum;
-;
-;	while(checksum >> 16)
-;		checksum = (checksum & 0xffff) + (checksum >> 16);
-;
-;	return ~checksum;
-;}
-;
-; A4:	iph
-; B4:	ihl
-; return checksum in A4
-;
-	.text
-
-ENTRY(ip_fast_csum)
-	ZERO	.D1	A5
- ||	MVC	.S2	ILC,B30
-	SHL	.S2	B4,2,B0
-	CMPGT	.L2	B0,0,B1
-  [!B1] BNOP	.S1	L15,4
-  [!B1]	ZERO	.D1	A3
-
-  [!B0]	B	.S1	L12
-	SHRU	.S2	B0,1,B0
-	MVC	.S2	B0,ILC
-	NOP	3
-
-	SPLOOP	1
-	LDHU	.D1T1	*A4++,A3
-	NOP	3
-	NOP
-	SPKERNEL	5,0
- ||	ADD	.L1	A3,A5,A5
-
-L12:	SHRU	.S1	A5,16,A0
-  [!A0]	BNOP	.S1	L14,5
-
-L13:	SHRU	.S2X	A5,16,B4
-	EXTU	.S1	A5,16,16,A3
-	ADD	.D1X	A3,B4,A5
-	SHRU	.S1	A5,16,A0
-  [A0]	BNOP	.S1	L13,5
-
-L14:	NOT	.D1	A5,A3
-	EXTU	.S1	A3,16,16,A3
-
-L15:	BNOP	.S2	B3,3
-	MVC	.S2	B30,ILC
-	MV	.D1	A3,A4
-ENDPROC(ip_fast_csum)
-
-;
-;unsigned short
-;do_csum(unsigned char *buff, unsigned int len)
-;{
-;	int odd, count;
-;	unsigned int result = 0;
-;
-;	if (len <= 0)
-;		goto out;
-;	odd = 1 & (unsigned long) buff;
-;	if (odd) {
-;#ifdef __LITTLE_ENDIAN
-;		result += (*buff << 8);
-;#else
-;		result = *buff;
-;#endif
-;		len--;
-;		buff++;
-;	}
-;	count = len >> 1;		/* nr of 16-bit words.. */
-;	if (count) {
-;		if (2 & (unsigned long) buff) {
-;			result += *(unsigned short *) buff;
-;			count--;
-;			len -= 2;
-;			buff += 2;
-;		}
-;		count >>= 1;		/* nr of 32-bit words.. */
-;		if (count) {
-;			unsigned int carry = 0;
-;			do {
-;				unsigned int w = *(unsigned int *) buff;
-;				count--;
-;				buff += 4;
-;				result += carry;
-;				result += w;
-;				carry = (w > result);
-;			} while (count);
-;			result += carry;
-;			result = (result & 0xffff) + (result >> 16);
-;		}
-;		if (len & 2) {
-;			result += *(unsigned short *) buff;
-;			buff += 2;
-;		}
-;	}
-;	if (len & 1)
-;#ifdef __LITTLE_ENDIAN
-;		result += *buff;
-;#else
-;		result += (*buff << 8);
-;#endif
-;	result = (result & 0xffff) + (result >> 16);
-;	/* add up carry.. */
-;	result = (result & 0xffff) + (result >> 16);
-;	if (odd)
-;		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
-;out:
-;	return result;
-;}
-;
-; A4:	buff
-; B4:	len
-; return checksum in A4
-;
-
-ENTRY(do_csum)
-	   CMPGT   .L2	   B4,0,B0
-   [!B0]   BNOP    .S1	   L26,3
-	   EXTU    .S1	   A4,31,31,A0
-
-	   MV	   .L1	   A0,A3
-||	   MV	   .S1X    B3,A5
-||	   MV	   .L2	   B4,B3
-||	   ZERO    .D1	   A1
-
-#ifdef CONFIG_CPU_BIG_ENDIAN
-   [A0]    SUB	   .L2	   B3,1,B3
-|| [A0]    LDBU    .D1T1   *A4++,A1
-#else
-   [!A0]   BNOP    .S1	   L21,5
-|| [A0]    LDBU    .D1T1   *A4++,A0
-	   SUB	   .L2	   B3,1,B3
-||	   SHL	   .S1	   A0,8,A1
-L21:
-#endif
-	   SHR	   .S2	   B3,1,B0
-   [!B0]   BNOP    .S1	   L24,3
-	   MVK	   .L1	   2,A0
-	   AND	   .L1	   A4,A0,A0
-
-   [!A0]   BNOP    .S1	   L22,5
-|| [A0]    LDHU    .D1T1   *A4++,A0
-	   SUB	   .L2	   B0,1,B0
-||	   SUB	   .S2	   B3,2,B3
-||	   ADD	   .L1	   A0,A1,A1
-L22:
-	   SHR	   .S2	   B0,1,B0
-||	   ZERO    .L1	   A0
-
-   [!B0]   BNOP    .S1	   L23,5
-|| [B0]    MVC	   .S2	   B0,ILC
-
-	   SPLOOP  3
-	   SPMASK  L1
-||	   MV	   .L1	   A1,A2
-||	   LDW	   .D1T1   *A4++,A1
-
-	   NOP	   4
-	   ADD	   .L1	   A0,A1,A0
-	   ADD	   .L1	   A2,A0,A2
-
-	   SPKERNEL 1,2
-||	   CMPGTU  .L1	   A1,A2,A0
-
-	   ADD	   .L1	   A0,A2,A6
-	   EXTU    .S1	   A6,16,16,A7
-	   SHRU    .S2X    A6,16,B0
-	   NOP		   1
-	   ADD	   .L1X    A7,B0,A1
-L23:
-	   MVK	   .L2	   2,B0
-	   AND	   .L2	   B3,B0,B0
-   [B0]    LDHU    .D1T1   *A4++,A0
-	   NOP	   4
-   [B0]    ADD	   .L1	   A0,A1,A1
-L24:
-	   EXTU    .S2	   B3,31,31,B0
-#ifdef CONFIG_CPU_BIG_ENDIAN
-   [!B0]   BNOP    .S1	   L25,4
-|| [B0]    LDBU    .D1T1   *A4,A0
-	   SHL	   .S1	   A0,8,A0
-	   ADD	   .L1	   A0,A1,A1
-L25:
-#else
-   [B0]    LDBU    .D1T1   *A4,A0
-	   NOP	   4
-   [B0]    ADD	   .L1	   A0,A1,A1
-#endif
-	   EXTU    .S1	   A1,16,16,A0
-	   SHRU    .S2X    A1,16,B0
-	   NOP	   1
-	   ADD	   .L1X    A0,B0,A0
-	   SHRU    .S1	   A0,16,A1
-	   ADD	   .L1	   A0,A1,A0
-	   EXTU    .S1	   A0,16,16,A1
-	   EXTU    .S1	   A1,16,24,A2
-
-	   EXTU    .S1	   A1,24,16,A0
-||	   MV	   .L2X    A3,B0
-
-   [B0]    OR	   .L1	   A0,A2,A1
-L26:
-	   NOP	   1
-	   BNOP    .S2X    A5,4
-	   MV	   .L1	   A1,A4
-ENDPROC(do_csum)
-
-;__wsum csum_partial(const void *buff, int len, __wsum wsum)
-;{
-;	unsigned int sum = (__force unsigned int)wsum;
-;	unsigned int result = do_csum(buff, len);
-;
-;	/* add in old sum, and carry.. */
-;	result += sum;
-;	if (sum > result)
-;		result += 1;
-;	return (__force __wsum)result;
-;}
-;
-ENTRY(csum_partial)
-	   MV	   .L1X    B3,A9
-||	   CALLP   .S2	   do_csum,B3
-||	   MV	   .S1	   A6,A8
-	   BNOP    .S2X    A9,2
-	   ADD	   .L1	   A8,A4,A1
-	   CMPGTU  .L1	   A8,A1,A0
-	   ADD	   .L1	   A1,A0,A4
-ENDPROC(csum_partial)
-
-;unsigned short
-;ip_compute_csum(unsigned char *buff, unsigned int len)
-;
-; A4:	buff
-; B4:	len
-; return checksum in A4
-
-ENTRY(ip_compute_csum)
-	   MV	   .L1X    B3,A9
-||	   CALLP   .S2	   do_csum,B3
-	   BNOP    .S2X    A9,3
-	   NOT	   .S1	   A4,A4
-	   CLR     .S1	   A4,16,31,A4
-ENDPROC(ip_compute_csum)
diff --git a/arch/c6x/lib/divi.S b/arch/c6x/lib/divi.S
deleted file mode 100644
index d1764ae0b519e027502d70d12f0180ff4b9aed0e..0000000000000000000000000000000000000000
--- a/arch/c6x/lib/divi.S
+++ /dev/null
@@ -1,41 +0,0 @@
-;; SPDX-License-Identifier: GPL-2.0-or-later
-;;  Copyright 2010  Free Software Foundation, Inc.
-;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
-;;
-
-#include <linux/linkage.h>
-
-	;; ABI considerations for the divide functions
-	;; The following registers are call-used:
-	;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
-	;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
-	;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
-	;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
-	;;
-	;; In our implementation, divu and remu are leaf functions,
-	;; while both divi and remi call into divu.
-	;; A0 is not clobbered by any of the functions.
-	;; divu does not clobber B2 either, which is taken advantage of
-	;; in remi.
-	;; divi uses B5 to hold the original return address during
-	;; the call to divu.
-	;; remi uses B2 and A5 to hold the input values during the
-	;; call to divu.  It stores B3 in on the stack.
-
-	.text
-ENTRY(__c6xabi_divi)
-	call	.s2	__c6xabi_divu
-||	mv	.d2	B3, B5
-||	cmpgt	.l1	0, A4, A1
-||	cmpgt	.l2	0, B4, B1
-
-   [A1]	neg	.l1	A4, A4
-|| [B1]	neg	.l2	B4, B4
-||	xor	.s1x	A1, B1, A1
-   [A1] addkpc	.s2	_divu_ret, B3, 4
-_divu_ret:
-	neg	.l1	A4, A4
-||	mv	.l2	B3,B5
-||	ret	.s2	B5
-	nop		5
-ENDPROC(__c6xabi_divi)
diff --git a/arch/c6x/lib/divremi.S b/arch/c6x/lib/divremi.S
deleted file mode 100644
index 575fc57a8a7673e23eb9232c6940cb2d873bf6db..0000000000000000000000000000000000000000
--- a/arch/c6x/lib/divremi.S
+++ /dev/null
@@ -1,34 +0,0 @@
-;; SPDX-License-Identifier: GPL-2.0-or-later
-;;  Copyright 2010  Free Software Foundation, Inc.
-;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
-;;
-
-#include <linux/linkage.h>
-
-	.text
-ENTRY(__c6xabi_divremi)
-	stw	.d2t2	B3, *B15--[2]
-||	cmpgt	.l1	0, A4, A1
-||	cmpgt	.l2	0, B4, B2
-||	mv	.s1	A4, A5
-||	call	.s2	__c6xabi_divu
-
-   [A1]	neg	.l1	A4, A4
-|| [B2]	neg	.l2	B4, B4
-||	xor	.s2x	B2, A1, B0
-||	mv	.d2	B4, B2
-
-   [B0]	addkpc	.s2	_divu_ret_1, B3, 1
-  [!B0] addkpc	.s2	_divu_ret_2, B3, 1
-	nop	2
-_divu_ret_1:
-	neg	.l1	A4, A4
-_divu_ret_2:
-	ldw	.d2t2	*++B15[2], B3
-
-	mpy32	.m1x	A4, B2, A6
-	nop		3
-	ret	.s2	B3
-	sub	.l1	A5, A6, A5
-	nop	4
-ENDPROC(__c6xabi_divremi)
diff --git a/arch/c6x/lib/divremu.S b/arch/c6x/lib/divremu.S
deleted file mode 100644
index 5f6a6a2997ae9806004fab70590605a3fc8a66fa..0000000000000000000000000000000000000000
--- a/arch/c6x/lib/divremu.S
+++ /dev/null
@@ -1,75 +0,0 @@
-;; SPDX-License-Identifier: GPL-2.0-or-later
-;;  Copyright 2011  Free Software Foundation, Inc.
-;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
-;;
-
-#include <linux/linkage.h>
-
-	.text
-ENTRY(__c6xabi_divremu)
-	;; We use a series of up to 31 subc instructions.  First, we find
-	;; out how many leading zero bits there are in the divisor.  This
-	;; gives us both a shift count for aligning (shifting) the divisor
-	;; to the, and the number of times we have to execute subc.
-
-	;; At the end, we have both the remainder and most of the quotient
-	;; in A4.  The top bit of the quotient is computed first and is
-	;; placed in A2.
-
-	;; Return immediately if the dividend is zero.	Setting B4 to 1
-	;; is a trick to allow us to leave the following insns in the jump
-	;; delay slot without affecting the result.
-	mv	.s2x	A4, B1
-
-  [b1]	lmbd	.l2	1, B4, B1
-||[!b1] b	.s2	B3	; RETURN A
-||[!b1] mvk	.d2	1, B4
-
-||[!b1] zero	.s1	A5
-	mv	.l1x	B1, A6
-||	shl	.s2	B4, B1, B4
-
-	;; The loop performs a maximum of 28 steps, so we do the
-	;; first 3 here.
-	cmpltu	.l1x	A4, B4, A2
-  [!A2]	sub	.l1x	A4, B4, A4
-||	shru	.s2	B4, 1, B4
-||	xor	.s1	1, A2, A2
-
-	shl	.s1	A2, 31, A2
-|| [b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-   [b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-
-	;; RETURN A may happen here (note: must happen before the next branch)
-__divremu0:
-	cmpgt	.l2	B1, 7, B0
-|| [b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-   [b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-|| [b0] b	.s1	__divremu0
-   [b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-   [b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-   [b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-   [b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-   [b1]	subc	.l1x	A4,B4,A4
-|| [b1]	add	.s2	-1, B1, B1
-	;; loop backwards branch happens here
-
-	ret	.s2	B3
-||	mvk	.s1	32, A1
-	sub	.l1	A1, A6, A6
-||	extu	.s1	A4, A6, A5
-	shl	.s1	A4, A6, A4
-	shru	.s1	A4, 1, A4
-||	sub	.l1	A6, 1, A6
-	or	.l1	A2, A4, A4
-	shru	.s1	A4, A6, A4
-	nop
-ENDPROC(__c6xabi_divremu)
diff --git a/arch/c6x/lib/divu.S b/arch/c6x/lib/divu.S
deleted file mode 100644
index f0f6082944c23917fdb3099d609f11dfbffe34b8..0000000000000000000000000000000000000000
--- a/arch/c6x/lib/divu.S
+++ /dev/null
@@ -1,86 +0,0 @@
-;; SPDX-License-Identifier: GPL-2.0-or-later
-;;  Copyright 2010  Free Software Foundation, Inc.
-;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
-;;
-
-#include <linux/linkage.h>
-
-	;; ABI considerations for the divide functions
-	;; The following registers are call-used:
-	;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
-	;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
-	;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
-	;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
-	;;
-	;; In our implementation, divu and remu are leaf functions,
-	;; while both divi and remi call into divu.
-	;; A0 is not clobbered by any of the functions.
-	;; divu does not clobber B2 either, which is taken advantage of
-	;; in remi.
-	;; divi uses B5 to hold the original return address during
-	;; the call to divu.
-	;; remi uses B2 and A5 to hold the input values during the
-	;; call to divu.  It stores B3 in on the stack.
-
-	.text
-ENTRY(__c6xabi_divu)
-	;; We use a series of up to 31 subc instructions.  First, we find
-	;; out how many leading zero bits there are in the divisor.  This
-	;; gives us both a shift count for aligning (shifting) the divisor
-	;; to the, and the number of times we have to execute subc.
-
-	;; At the end, we have both the remainder and most of the quotient
-	;; in A4.  The top bit of the quotient is computed first and is
-	;; placed in A2.
-
-	;; Return immediately if the dividend is zero.
-	 mv	.s2x	A4, B1
-   [B1]	 lmbd	.l2	1, B4, B1
-|| [!B1] b	.s2	B3	; RETURN A
-|| [!B1] mvk	.d2	1, B4
-	 mv	.l1x	B1, A6
-||	 shl	.s2	B4, B1, B4
-
-	;; The loop performs a maximum of 28 steps, so we do the
-	;; first 3 here.
-	 cmpltu	.l1x	A4, B4, A2
-   [!A2] sub	.l1x	A4, B4, A4
-||	 shru	.s2	B4, 1, B4
-||	 xor	.s1	1, A2, A2
-
-	 shl	.s1	A2, 31, A2
-|| [B1]	 subc	.l1x	A4,B4,A4
-|| [B1]	 add	.s2	-1, B1, B1
-   [B1]	 subc	.l1x	A4,B4,A4
-|| [B1]	 add	.s2	-1, B1, B1
-
-	;; RETURN A may happen here (note: must happen before the next branch)
-_divu_loop:
-	 cmpgt	.l2	B1, 7, B0
-|| [B1]	 subc	.l1x	A4,B4,A4
-|| [B1]	 add	.s2	-1, B1, B1
-   [B1]	 subc	.l1x	A4,B4,A4
-|| [B1]	 add	.s2	-1, B1, B1
-|| [B0]  b	.s1	_divu_loop
-   [B1]	 subc	.l1x	A4,B4,A4
-|| [B1]	 add	.s2	-1, B1, B1
-   [B1]	 subc	.l1x	A4,B4,A4
-|| [B1]	 add	.s2	-1, B1, B1
-   [B1]	 subc	.l1x	A4,B4,A4
-|| [B1]	 add	.s2	-1, B1, B1
-   [B1]	 subc	.l1x	A4,B4,A4
-|| [B1]	 add	.s2	-1, B1, B1
-   [B1]	 subc	.l1x	A4,B4,A4
-|| [B1]	 add	.s2	-1, B1, B1
-	;; loop backwards branch happens here
-
-	 ret	.s2	B3
-||	 mvk	.s1	32, A1
-	 sub	.l1	A1, A6, A6
-	 shl	.s1	A4, A6, A4
-	 shru	.s1	A4, 1, A4
-||	 sub	.l1	A6, 1, A6
-	 or	.l1	A2, A4, A4
-	 shru	.s1	A4, A6, A4
-	 nop
-ENDPROC(__c6xabi_divu)
diff --git a/arch/c6x/lib/llshl.S b/arch/c6x/lib/llshl.S
deleted file mode 100644
index 3272499618e0b2b4a2715c92758cc2648922ccbc..0000000000000000000000000000000000000000
--- a/arch/c6x/lib/llshl.S
+++ /dev/null
@@ -1,25 +0,0 @@
-;; SPDX-License-Identifier: GPL-2.0-or-later
-;;  Copyright (C) 2010 Texas Instruments Incorporated
-;;  Contributed by Mark Salter <msalter@redhat.com>.
-;;
-
-;;  uint64_t __c6xabi_llshl(uint64_t val, uint shift)
-
-#include <linux/linkage.h>
-
-	.text
-ENTRY(__c6xabi_llshl)
-	 mv	.l1x	B4,A1
-   [!A1] b	.s2	B3		; just return if zero shift
-	 mvk	.s1	32,A0
-	 sub	.d1	A0,A1,A0
-	 cmplt	.l1	0,A0,A2
-   [A2]	 shru	.s1	A4,A0,A0
-   [!A2] neg	.l1	A0,A5
-|| [A2]  shl	.s1	A5,A1,A5
-   [!A2] shl	.s1	A4,A5,A5
-|| [A2]  or	.d1	A5,A0,A5
-|| [!A2] mvk	.l1	0,A4
-   [A2]	 shl	.s1	A4,A1,A4
-	 bnop	.s2	B3,5
-ENDPROC(__c6xabi_llshl)
diff --git a/arch/c6x/lib/llshr.S b/arch/c6x/lib/llshr.S
deleted file mode 100644
index 6bfaacd15e73573efc7ef7154992e4057314bef0..0000000000000000000000000000000000000000
--- a/arch/c6x/lib/llshr.S
+++ /dev/null
@@ -1,26 +0,0 @@
-;; SPDX-License-Identifier: GPL-2.0-or-later
-;;  Copyright (C) 2010 Texas Instruments Incorporated
-;;  Contributed by Mark Salter <msalter@redhat.com>.
-;;
-
-;;  uint64_t __c6xabi_llshr(uint64_t val, uint shift)
-
-#include <linux/linkage.h>
-
-	.text
-ENTRY(__c6xabi_llshr)
-	 mv	.l1x	B4,A1
-   [!A1] b	.s2	B3		; return if zero shift count
-	 mvk	.s1	32,A0
-	 sub	.d1	A0,A1,A0
-	 cmplt	.l1	0,A0,A2
-   [A2]  shl	.s1	A5,A0,A0
-	 nop
-   [!A2] neg	.l1	A0,A4
-|| [A2]  shru	.s1	A4,A1,A4
-   [!A2] shr	.s1	A5,A4,A4
-|| [A2]  or	.d1	A4,A0,A4
-   [!A2] shr	.s1	A5,0x1f,A5
-   [A2]  shr	.s1	A5,A1,A5
-	 bnop	.s2	B3,5
-ENDPROC(__c6xabi_llshr)
diff --git a/arch/c6x/lib/llshru.S b/arch/c6x/lib/llshru.S
deleted file mode 100644
index 103128f50770abd88a9ff8970f9abd893c90bcd4..0000000000000000000000000000000000000000
--- a/arch/c6x/lib/llshru.S
+++ /dev/null
@@ -1,26 +0,0 @@
-;; SPDX-License-Identifier: GPL-2.0-or-later
-;;  Copyright (C) 2010 Texas Instruments Incorporated
-;;  Contributed by Mark Salter <msalter@redhat.com>.
-;;
-
-;;  uint64_t __c6xabi_llshru(uint64_t val, uint shift)
-
-#include <linux/linkage.h>
-
-	.text
-ENTRY(__c6xabi_llshru)
-	 mv	.l1x	B4,A1
-   [!A1] b	.s2	B3		; return if zero shift count
-	 mvk	.s1	32,A0
-	 sub	.d1	A0,A1,A0
-	 cmplt	.l1	0,A0,A2
-   [A2]  shl	.s1	A5,A0,A0
-	 nop
-   [!A2] neg	.l1	A0,A4
-|| [A2]  shru	.s1	A4,A1,A4
-   [!A2] shru	.s1	A5,A4,A4
-|| [A2]  or	.d1	A4,A0,A4
-|| [!A2] mvk	.l1	0,A5
-   [A2]  shru	.s1	A5,A1,A5
-	 bnop	.s2	B3,5
-ENDPROC(__c6xabi_llshru)
diff --git a/arch/c6x/lib/memcpy_64plus.S b/arch/c6x/lib/memcpy_64plus.S
deleted file mode 100644
index 157a30486bfd0837c330224643eeffb92ad4c191..0000000000000000000000000000000000000000
--- a/arch/c6x/lib/memcpy_64plus.S
+++ /dev/null
@@ -1,43 +0,0 @@
-; SPDX-License-Identifier: GPL-2.0-only
-;  Port on Texas Instruments TMS320C6x architecture
-;
-;  Copyright (C) 2006, 2009, 2010 Texas Instruments Incorporated
-;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
-;
-
-#include <linux/linkage.h>
-
-	.text
-
-ENTRY(memcpy)
-	AND	.L1	0x1,A6,A0
- ||	AND	.S1	0x2,A6,A1
- ||	AND	.L2X	0x4,A6,B0
- ||	MV	.D1	A4,A3
- ||	MVC	.S2	ILC,B2
-
-   [A0] LDB	.D2T1	*B4++,A5
-   [A1] LDB	.D2T1	*B4++,A7
-   [A1] LDB	.D2T1	*B4++,A8
-   [B0] LDNW	.D2T1	*B4++,A9
- ||	SHRU	.S2X	A6,0x3,B1
-  [!B1] BNOP	.S2	B3,1
-
-   [A0] STB	.D1T1	A5,*A3++
- ||[B1] MVC	.S2	B1,ILC
-   [A1] STB	.D1T1	A7,*A3++
-   [A1] STB	.D1T1	A8,*A3++
-   [B0] STNW	.D1T1	A9,*A3++	; return when len < 8
-
-	SPLOOP	2
-
-	LDNDW	.D2T1	*B4++,A9:A8
-	NOP	3
-
-	NOP
-	SPKERNEL	0,0
- ||	STNDW	.D1T1	A9:A8,*A3++
-
-	BNOP	.S2	B3,4
-	MVC	.S2	B2,ILC
-ENDPROC(memcpy)
diff --git a/arch/c6x/lib/mpyll.S b/arch/c6x/lib/mpyll.S
deleted file mode 100644
index d07c13ec4fd4c1a54c522b4d5be4b77b201d7946..0000000000000000000000000000000000000000
--- a/arch/c6x/lib/mpyll.S
+++ /dev/null
@@ -1,37 +0,0 @@
-;; SPDX-License-Identifier: GPL-2.0-or-later
-;;  Copyright (C) 2010 Texas Instruments Incorporated
-;;  Contributed by Mark Salter <msalter@redhat.com>.
-;;
-
-#include <linux/linkage.h>
-
-	;; uint64_t __c6xabi_mpyll(uint64_t x, uint64_t y)
-	;;
-	;; 64x64 multiply
-	;; First compute partial results using 32-bit parts of x and y:
-	;;
-	;;   b63	 b32 b31	  b0
-	;;    -----------------------------
-	;;    |      1	    |	   0	  |
-	;;    -----------------------------
-	;;
-	;;   P0 = X0*Y0
-	;;   P1 = X0*Y1 + X1*Y0
-	;;   P2 = X1*Y1
-	;;
-	;;   result = (P2 << 64) + (P1 << 32) + P0
-	;;
-	;; Since the result is also 64-bit, we can skip the P2 term.
-
-	.text
-ENTRY(__c6xabi_mpyll)
-	mpy32u	.m1x	A4,B4,A1:A0	; X0*Y0
-	b	.s2	B3
- ||	mpy32u	.m2x	B5,A4,B1:B0	; X0*Y1 (don't need upper 32-bits)
- ||	mpy32u	.m1x	A5,B4,A3:A2	; X1*Y0 (don't need upper 32-bits)
-	nop
-	nop
-	mv	.s1	A0,A4
-	add	.l1x	A2,B0,A5
-	add	.s1	A1,A5,A5
-ENDPROC(__c6xabi_mpyll)
diff --git a/arch/c6x/lib/negll.S b/arch/c6x/lib/negll.S
deleted file mode 100644
index 9ba434db5366308a1e0c2e19b2b8c3df503ae791..0000000000000000000000000000000000000000
--- a/arch/c6x/lib/negll.S
+++ /dev/null
@@ -1,19 +0,0 @@
-;; SPDX-License-Identifier: GPL-2.0-or-later
-;;  Copyright (C) 2010 Texas Instruments Incorporated
-;;  Contributed by Mark Salter <msalter@redhat.com>.
-;;
-
-;;  int64_t __c6xabi_negll(int64_t val)
-
-#include <linux/linkage.h>
-
-	.text
-ENTRY(__c6xabi_negll)
-	b	.s2	B3
-	mvk	.l1	0,A0
-	subu	.l1	A0,A4,A3:A2
-	sub	.l1	A0,A5,A0
-||	ext	.s1	A3,24,24,A5
-	add	.l1	A5,A0,A5
-	mv	.s1	A2,A4
-ENDPROC(__c6xabi_negll)
diff --git a/arch/c6x/lib/pop_rts.S b/arch/c6x/lib/pop_rts.S
deleted file mode 100644
index f129e32943c57b370f5f3bcbc66ad92f10ba81a2..0000000000000000000000000000000000000000
--- a/arch/c6x/lib/pop_rts.S
+++ /dev/null
@@ -1,20 +0,0 @@
-;; SPDX-License-Identifier: GPL-2.0-or-later
-;;  Copyright 2010  Free Software Foundation, Inc.
-;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
-;;
-
-#include <linux/linkage.h>
-
-	.text
-
-ENTRY(__c6xabi_pop_rts)
-	lddw	.d2t2	*++B15, B3:B2
-	lddw	.d2t1	*++B15, A11:A10
-	lddw	.d2t2	*++B15, B11:B10
-	lddw	.d2t1	*++B15, A13:A12
-	lddw	.d2t2	*++B15, B13:B12
-	lddw	.d2t1	*++B15, A15:A14
-||	b	.s2	B3
-	ldw	.d2t2	*++B15[2], B14
-	nop	4
-ENDPROC(__c6xabi_pop_rts)
diff --git a/arch/c6x/lib/push_rts.S b/arch/c6x/lib/push_rts.S
deleted file mode 100644
index 40b0a4fe937c4014eaffed3cc3921f6f079721fd..0000000000000000000000000000000000000000
--- a/arch/c6x/lib/push_rts.S
+++ /dev/null
@@ -1,19 +0,0 @@
-;; SPDX-License-Identifier: GPL-2.0-or-later
-;;  Copyright 2010  Free Software Foundation, Inc.
-;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
-;;
-
-#include <linux/linkage.h>
-
-	.text
-
-ENTRY(__c6xabi_push_rts)
-	stw	.d2t2	B14, *B15--[2]
-	stdw	.d2t1	A15:A14, *B15--
-||	b	.s2x	A3
-	stdw	.d2t2	B13:B12, *B15--
-	stdw	.d2t1	A13:A12, *B15--
-	stdw	.d2t2	B11:B10, *B15--
-	stdw	.d2t1	A11:A10, *B15--
-	stdw	.d2t2	B3:B2, *B15--
-ENDPROC(__c6xabi_push_rts)
diff --git a/arch/c6x/lib/remi.S b/arch/c6x/lib/remi.S
deleted file mode 100644
index 96a1335eac202ddd9d42e278ce841cf2c410620a..0000000000000000000000000000000000000000
--- a/arch/c6x/lib/remi.S
+++ /dev/null
@@ -1,52 +0,0 @@
-;; SPDX-License-Identifier: GPL-2.0-or-later
-;;  Copyright 2010  Free Software Foundation, Inc.
-;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
-;;
-
-#include <linux/linkage.h>
-
-	;; ABI considerations for the divide functions
-	;; The following registers are call-used:
-	;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
-	;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
-	;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
-	;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
-	;;
-	;; In our implementation, divu and remu are leaf functions,
-	;; while both divi and remi call into divu.
-	;; A0 is not clobbered by any of the functions.
-	;; divu does not clobber B2 either, which is taken advantage of
-	;; in remi.
-	;; divi uses B5 to hold the original return address during
-	;; the call to divu.
-	;; remi uses B2 and A5 to hold the input values during the
-	;; call to divu.  It stores B3 in on the stack.
-
-	.text
-
-ENTRY(__c6xabi_remi)
-	stw	.d2t2	B3, *B15--[2]
-||	cmpgt	.l1	0, A4, A1
-||	cmpgt	.l2	0, B4, B2
-||	mv	.s1	A4, A5
-||	call	.s2	__c6xabi_divu
-
-   [A1]	neg	.l1	A4, A4
-|| [B2]	neg	.l2	B4, B4
-||	xor	.s2x	B2, A1, B0
-||	mv	.d2	B4, B2
-
-   [B0]	addkpc	.s2	_divu_ret_1, B3, 1
-  [!B0] addkpc	.s2	_divu_ret_2, B3, 1
-	nop	2
-_divu_ret_1:
-	neg	.l1	A4, A4
-_divu_ret_2:
-	ldw	.d2t2	*++B15[2], B3
-
-	mpy32	.m1x	A4, B2, A6
-	nop		3
-	ret	.s2	B3
-	sub	.l1	A5, A6, A4
-	nop	4
-ENDPROC(__c6xabi_remi)
diff --git a/arch/c6x/lib/remu.S b/arch/c6x/lib/remu.S
deleted file mode 100644
index 428feb9c06c06616c5fd1847235f8bd04a3f7761..0000000000000000000000000000000000000000
--- a/arch/c6x/lib/remu.S
+++ /dev/null
@@ -1,70 +0,0 @@
-;; SPDX-License-Identifier: GPL-2.0-or-later
-;;  Copyright 2010  Free Software Foundation, Inc.
-;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
-;;
-
-#include <linux/linkage.h>
-
-	;; ABI considerations for the divide functions
-	;; The following registers are call-used:
-	;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
-	;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
-	;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
-	;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
-	;;
-	;; In our implementation, divu and remu are leaf functions,
-	;; while both divi and remi call into divu.
-	;; A0 is not clobbered by any of the functions.
-	;; divu does not clobber B2 either, which is taken advantage of
-	;; in remi.
-	;; divi uses B5 to hold the original return address during
-	;; the call to divu.
-	;; remi uses B2 and A5 to hold the input values during the
-	;; call to divu.  It stores B3 in on the stack.
-
-
-	.text
-
-ENTRY(__c6xabi_remu)
-	;; The ABI seems designed to prevent these functions calling each other,
-	;; so we duplicate most of the divsi3 code here.
-	 mv	.s2x	A4, B1
-	 lmbd	.l2	1, B4, B1
-|| [!B1] b	.s2	B3	; RETURN A
-|| [!B1] mvk	.d2	1, B4
-
-	 mv	.l1x	B1, A7
-||	 shl	.s2	B4, B1, B4
-
-	 cmpltu	.l1x	A4, B4, A1
-   [!A1] sub	.l1x	A4, B4, A4
-	 shru	.s2	B4, 1, B4
-
-_remu_loop:
-	 cmpgt	.l2	B1, 7, B0
-|| [B1]	 subc	.l1x	A4,B4,A4
-|| [B1]	 add	.s2	-1, B1, B1
-	;; RETURN A may happen here (note: must happen before the next branch)
-   [B1]	 subc	.l1x	A4,B4,A4
-|| [B1]	 add	.s2	-1, B1, B1
-|| [B0]	 b	.s1	_remu_loop
-   [B1]	 subc	.l1x	A4,B4,A4
-|| [B1]	 add	.s2	-1, B1, B1
-   [B1]	 subc	.l1x	A4,B4,A4
-|| [B1]	 add	.s2	-1, B1, B1
-   [B1]	 subc	.l1x	A4,B4,A4
-|| [B1]	 add	.s2	-1, B1, B1
-   [B1]	 subc	.l1x	A4,B4,A4
-|| [B1]	 add	.s2	-1, B1, B1
-   [B1]	 subc	.l1x	A4,B4,A4
-|| [B1]	 add	.s2	-1, B1, B1
-	;; loop backwards branch happens here
-
-	 ret	.s2	B3
-   [B1]	 subc	.l1x	A4,B4,A4
-|| [B1]	 add	.s2	-1, B1, B1
-   [B1]	 subc	.l1x	A4,B4,A4
-
-	 extu	.s1	A4, A7, A4
-	 nop	2
-ENDPROC(__c6xabi_remu)
diff --git a/arch/c6x/lib/strasgi.S b/arch/c6x/lib/strasgi.S
deleted file mode 100644
index 715aeb2007924d0ab4584f294f5a87cde95cd8e4..0000000000000000000000000000000000000000
--- a/arch/c6x/lib/strasgi.S
+++ /dev/null
@@ -1,77 +0,0 @@
-;; SPDX-License-Identifier: GPL-2.0-or-later
-;;  Copyright 2010  Free Software Foundation, Inc.
-;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
-;;
-
-#include <linux/linkage.h>
-
-	.text
-
-ENTRY(__c6xabi_strasgi)
-	;; This is essentially memcpy, with alignment known to be at least
-	;; 4, and the size a multiple of 4 greater than or equal to 28.
-	 ldw	.d2t1	*B4++, A0
-||	 mvk	.s2	16, B1
-	 ldw	.d2t1	*B4++, A1
-||	 mvk	.s2	20, B2
-||	 sub	.d1	A6, 24, A6
-	 ldw	.d2t1	*B4++, A5
-	 ldw	.d2t1	*B4++, A7
-||	 mv	.l2x	A6, B7
-	 ldw	.d2t1	*B4++, A8
-	 ldw	.d2t1	*B4++, A9
-||	 mv	.s2x	A0, B5
-||	 cmpltu	.l2	B2, B7, B0
-
-_strasgi_loop:
-	 stw	.d1t2	B5, *A4++
-|| [B0]	 ldw	.d2t1	*B4++, A0
-||	 mv	.s2x	A1, B5
-||	 mv	.l2	B7, B6
-
-   [B0]	 sub	.d2	B6, 24, B7
-|| [B0]	 b	.s2	_strasgi_loop
-||	 cmpltu	.l2	B1, B6, B0
-
-   [B0]	 ldw	.d2t1	*B4++, A1
-||	 stw	.d1t2	B5, *A4++
-||	 mv	.s2x	A5, B5
-||	 cmpltu	.l2	12, B6, B0
-
-   [B0]	 ldw	.d2t1	*B4++, A5
-||	 stw	.d1t2	B5, *A4++
-||	 mv	.s2x	A7, B5
-||	 cmpltu	.l2	8, B6, B0
-
-   [B0]	 ldw	.d2t1	*B4++, A7
-||	 stw	.d1t2	B5, *A4++
-||	 mv	.s2x	A8, B5
-||	 cmpltu	.l2	4, B6, B0
-
-   [B0]	 ldw	.d2t1	*B4++, A8
-||	 stw	.d1t2	B5, *A4++
-||	 mv	.s2x	A9, B5
-||	 cmpltu	.l2	0, B6, B0
-
-   [B0]	 ldw	.d2t1	*B4++, A9
-||	 stw	.d1t2	B5, *A4++
-||	 mv	.s2x	A0, B5
-||	 cmpltu	.l2	B2, B7, B0
-
-	;; loop back branch happens here
-
-	 cmpltu	.l2	B1, B6, B0
-||	 ret	.s2	b3
-
-   [B0]	 stw	.d1t1	A1, *A4++
-||	 cmpltu	.l2	12, B6, B0
-   [B0]	 stw	.d1t1	A5, *A4++
-||	 cmpltu	.l2	8, B6, B0
-   [B0]	 stw	.d1t1	A7, *A4++
-||	 cmpltu	.l2	4, B6, B0
-   [B0]	 stw	.d1t1	A8, *A4++
-||	 cmpltu	.l2	0, B6, B0
-   [B0]	 stw	.d1t1	A9, *A4++
-
-	;; return happens here
-ENDPROC(__c6xabi_strasgi)
diff --git a/arch/c6x/lib/strasgi_64plus.S b/arch/c6x/lib/strasgi_64plus.S
deleted file mode 100644
index d10aa2dc32498b6b8b62f19e11aafe3388603e1e..0000000000000000000000000000000000000000
--- a/arch/c6x/lib/strasgi_64plus.S
+++ /dev/null
@@ -1,27 +0,0 @@
-;; SPDX-License-Identifier: GPL-2.0-or-later
-;;  Copyright 2010  Free Software Foundation, Inc.
-;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
-;;
-
-#include <linux/linkage.h>
-
-	.text
-
-ENTRY(__c6xabi_strasgi_64plus)
-	shru	.s2x	a6, 2, b31
-||	mv	.s1	a4, a30
-||	mv	.d2	b4, b30
-
-	add	.s2	-4, b31, b31
-
-	sploopd		1
-||	mvc	.s2	b31, ilc
-	ldw	.d2t2	*b30++, b31
-	nop	4
-	mv	.s1x	b31,a31
-	spkernel	6, 0
-||	stw	.d1t1	a31, *a30++
-
-	ret	.s2	b3
-	nop 5
-ENDPROC(__c6xabi_strasgi_64plus)
diff --git a/arch/csky/abiv1/memcpy.S b/arch/csky/abiv1/memcpy.S
deleted file mode 100644
index 5078eb5169faebb1b9c5b62fbcb29411494b8664..0000000000000000000000000000000000000000
--- a/arch/csky/abiv1/memcpy.S
+++ /dev/null
@@ -1,347 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/linkage.h>
-
-.macro	GET_FRONT_BITS rx y
-#ifdef	__cskyLE__
-	lsri	\rx, \y
-#else
-	lsli	\rx, \y
-#endif
-.endm
-
-.macro	GET_AFTER_BITS rx y
-#ifdef	__cskyLE__
-	lsli	\rx, \y
-#else
-	lsri	\rx, \y
-#endif
-.endm
-
-/* void *memcpy(void *dest, const void *src, size_t n); */
-ENTRY(memcpy)
-	mov	r7, r2
-	cmplti	r4, 4
-	bt	.L_copy_by_byte
-	mov	r6, r2
-	andi	r6, 3
-	cmpnei	r6, 0
-	jbt	.L_dest_not_aligned
-	mov	r6, r3
-	andi	r6, 3
-	cmpnei	r6, 0
-	jbt	.L_dest_aligned_but_src_not_aligned
-.L0:
-	cmplti	r4, 16
-	jbt	.L_aligned_and_len_less_16bytes
-	subi	sp, 8
-	stw	r8, (sp, 0)
-.L_aligned_and_len_larger_16bytes:
-	ldw	r1, (r3, 0)
-	ldw	r5, (r3, 4)
-	ldw	r8, (r3, 8)
-	stw	r1, (r7, 0)
-	ldw	r1, (r3, 12)
-	stw	r5, (r7, 4)
-	stw	r8, (r7, 8)
-	stw	r1, (r7, 12)
-	subi	r4, 16
-	addi	r3, 16
-	addi	r7, 16
-	cmplti	r4, 16
-	jbf	.L_aligned_and_len_larger_16bytes
-	ldw	r8, (sp, 0)
-	addi	sp, 8
-	cmpnei	r4, 0
-	jbf	.L_return
-
-.L_aligned_and_len_less_16bytes:
-	cmplti	r4, 4
-	bt	.L_copy_by_byte
-.L1:
-	ldw	r1, (r3, 0)
-	stw	r1, (r7, 0)
-	subi	r4, 4
-	addi	r3, 4
-	addi	r7, 4
-	cmplti	r4, 4
-	jbf	.L1
-	br	.L_copy_by_byte
-
-.L_return:
-	rts
-
-.L_copy_by_byte:                      /* len less than 4 bytes */
-	cmpnei	r4, 0
-	jbf	.L_return
-.L4:
-	ldb	r1, (r3, 0)
-	stb	r1, (r7, 0)
-	addi	r3, 1
-	addi	r7, 1
-	decne	r4
-	jbt	.L4
-	rts
-
-/*
- * If dest is not aligned, just copying some bytes makes the dest align.
- * Afther that, we judge whether the src is aligned.
- */
-.L_dest_not_aligned:
-	mov	r5, r3
-	rsub	r5, r5, r7
-	abs	r5, r5
-	cmplt	r5, r4
-	bt	.L_copy_by_byte
-	mov	r5, r7
-	sub	r5, r3
-	cmphs	r5, r4
-	bf	.L_copy_by_byte
-	mov	r5, r6
-.L5:
-	ldb	r1, (r3, 0)              /* makes the dest align. */
-	stb	r1, (r7, 0)
-	addi	r5, 1
-	subi	r4, 1
-	addi	r3, 1
-	addi	r7, 1
-	cmpnei	r5, 4
-	jbt	.L5
-	cmplti	r4, 4
-	jbt	.L_copy_by_byte
-	mov	r6, r3                   /* judge whether the src is aligned. */
-	andi	r6, 3
-	cmpnei	r6, 0
-	jbf	.L0
-
-/* Judge the number of misaligned, 1, 2, 3? */
-.L_dest_aligned_but_src_not_aligned:
-	mov	r5, r3
-	rsub	r5, r5, r7
-	abs	r5, r5
-	cmplt	r5, r4
-	bt	.L_copy_by_byte
-	bclri	r3, 0
-	bclri	r3, 1
-	ldw	r1, (r3, 0)
-	addi	r3, 4
-	cmpnei	r6, 2
-	bf	.L_dest_aligned_but_src_not_aligned_2bytes
-	cmpnei	r6, 3
-	bf	.L_dest_aligned_but_src_not_aligned_3bytes
-
-.L_dest_aligned_but_src_not_aligned_1byte:
-	mov	r5, r7
-	sub	r5, r3
-	cmphs	r5, r4
-	bf	.L_copy_by_byte
-	cmplti	r4, 16
-	bf	.L11
-.L10:                                     /* If the len is less than 16 bytes */
-	GET_FRONT_BITS r1 8
-	mov	r5, r1
-	ldw	r6, (r3, 0)
-	mov	r1, r6
-	GET_AFTER_BITS r6 24
-	or	r5, r6
-	stw	r5, (r7, 0)
-	subi	r4, 4
-	addi	r3, 4
-	addi	r7, 4
-	cmplti	r4, 4
-	bf	.L10
-	subi	r3, 3
-	br	.L_copy_by_byte
-.L11:
-	subi	sp, 16
-	stw	r8, (sp, 0)
-	stw	r9, (sp, 4)
-	stw	r10, (sp, 8)
-	stw	r11, (sp, 12)
-.L12:
-	ldw	r5, (r3, 0)
-	ldw	r11, (r3, 4)
-	ldw	r8, (r3, 8)
-	ldw	r9, (r3, 12)
-
-	GET_FRONT_BITS r1 8               /* little or big endian? */
-	mov	r10, r5
-	GET_AFTER_BITS r5 24
-	or	r5, r1
-
-	GET_FRONT_BITS r10 8
-	mov	r1, r11
-	GET_AFTER_BITS r11 24
-	or	r11, r10
-
-	GET_FRONT_BITS r1 8
-	mov	r10, r8
-	GET_AFTER_BITS r8 24
-	or	r8, r1
-
-	GET_FRONT_BITS r10 8
-	mov	r1, r9
-	GET_AFTER_BITS r9 24
-	or	r9, r10
-
-	stw	r5, (r7, 0)
-	stw	r11, (r7, 4)
-	stw	r8, (r7, 8)
-	stw	r9, (r7, 12)
-	subi	r4, 16
-	addi	r3, 16
-	addi	r7, 16
-	cmplti	r4, 16
-	jbf	.L12
-	ldw	r8, (sp, 0)
-	ldw	r9, (sp, 4)
-	ldw	r10, (sp, 8)
-	ldw	r11, (sp, 12)
-	addi	sp , 16
-	cmplti	r4, 4
-	bf	.L10
-	subi	r3, 3
-	br	.L_copy_by_byte
-
-.L_dest_aligned_but_src_not_aligned_2bytes:
-	cmplti	r4, 16
-	bf	.L21
-.L20:
-	GET_FRONT_BITS r1 16
-	mov	r5, r1
-	ldw	r6, (r3, 0)
-	mov	r1, r6
-	GET_AFTER_BITS r6 16
-	or	r5, r6
-	stw	r5, (r7, 0)
-	subi	r4, 4
-	addi	r3, 4
-	addi	r7, 4
-	cmplti	r4, 4
-	bf	.L20
-	subi	r3, 2
-	br	.L_copy_by_byte
-	rts
-
-.L21:	/* n > 16 */
-	subi 	sp, 16
-	stw	r8, (sp, 0)
-	stw	r9, (sp, 4)
-	stw	r10, (sp, 8)
-	stw	r11, (sp, 12)
-
-.L22:
-	ldw	r5, (r3, 0)
-	ldw	r11, (r3, 4)
-	ldw	r8, (r3, 8)
-	ldw	r9, (r3, 12)
-
-	GET_FRONT_BITS r1 16
-	mov	r10, r5
-	GET_AFTER_BITS r5 16
-	or	r5, r1
-
-	GET_FRONT_BITS r10 16
-	mov	r1, r11
-	GET_AFTER_BITS r11 16
-	or	r11, r10
-
-	GET_FRONT_BITS r1 16
-	mov	r10, r8
-	GET_AFTER_BITS r8 16
-	or	r8, r1
-
-	GET_FRONT_BITS r10 16
-	mov	r1, r9
-	GET_AFTER_BITS r9 16
-	or	r9, r10
-
-	stw	r5, (r7, 0)
-	stw	r11, (r7, 4)
-	stw	r8, (r7, 8)
-	stw	r9, (r7, 12)
-	subi	r4, 16
-	addi	r3, 16
-	addi	r7, 16
-	cmplti	r4, 16
-	jbf	.L22
-	ldw	r8, (sp, 0)
-	ldw	r9, (sp, 4)
-	ldw	r10, (sp, 8)
-	ldw	r11, (sp, 12)
-	addi	sp, 16
-	cmplti	r4, 4
-	bf	.L20
-	subi	r3, 2
-	br	.L_copy_by_byte
-
-
-.L_dest_aligned_but_src_not_aligned_3bytes:
-	cmplti	r4, 16
-	bf	.L31
-.L30:
-	GET_FRONT_BITS r1 24
-	mov	r5, r1
-	ldw	r6, (r3, 0)
-	mov	r1, r6
-	GET_AFTER_BITS r6 8
-	or	r5, r6
-	stw	r5, (r7, 0)
-	subi	r4, 4
-	addi	r3, 4
-	addi	r7, 4
-	cmplti	r4, 4
-	bf	.L30
-	subi	r3, 1
-	br	.L_copy_by_byte
-.L31:
-	subi	sp, 16
-	stw	r8, (sp, 0)
-	stw	r9, (sp, 4)
-	stw	r10, (sp, 8)
-	stw	r11, (sp, 12)
-.L32:
-	ldw	r5, (r3, 0)
-	ldw	r11, (r3, 4)
-	ldw	r8, (r3, 8)
-	ldw	r9, (r3, 12)
-
-	GET_FRONT_BITS r1 24
-	mov	r10, r5
-	GET_AFTER_BITS r5 8
-	or	r5, r1
-
-	GET_FRONT_BITS r10 24
-	mov	r1, r11
-	GET_AFTER_BITS r11 8
-	or	r11, r10
-
-	GET_FRONT_BITS r1 24
-	mov	r10, r8
-	GET_AFTER_BITS r8 8
-	or	r8, r1
-
-	GET_FRONT_BITS r10 24
-	mov	r1, r9
-	GET_AFTER_BITS r9 8
-	or	r9, r10
-
-	stw	r5, (r7, 0)
-	stw	r11, (r7, 4)
-	stw	r8, (r7, 8)
-	stw	r9, (r7, 12)
-	subi	r4, 16
-	addi	r3, 16
-	addi	r7, 16
-	cmplti	r4, 16
-	jbf	.L32
-	ldw	r8, (sp, 0)
-	ldw	r9, (sp, 4)
-	ldw	r10, (sp, 8)
-	ldw	r11, (sp, 12)
-	addi	sp, 16
-	cmplti	r4, 4
-	bf	.L30
-	subi	r3, 1
-	br	.L_copy_by_byte
diff --git a/arch/csky/abiv2/mcount.S b/arch/csky/abiv2/mcount.S
deleted file mode 100644
index 326402e65f9e0bc610215eb63a78fd4dbaa54e55..0000000000000000000000000000000000000000
--- a/arch/csky/abiv2/mcount.S
+++ /dev/null
@@ -1,159 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/linkage.h>
-#include <asm/ftrace.h>
-
-/*
- * csky-gcc with -pg will put the following asm after prologue:
- *      push	r15
- *      jsri	_mcount
- *
- * stack layout after mcount_enter in _mcount():
- *
- * current sp => 0:+-------+
- *                 | a0-a3 | -> must save all argument regs
- *             +16:+-------+
- *                 | lr    | -> _mcount lr (instrumente function's pc)
- *             +20:+-------+
- *                 | fp=r8 | -> instrumented function fp
- *             +24:+-------+
- *                 | plr   | -> instrumented function lr (parent's pc)
- *                 +-------+
- */
-
-.macro mcount_enter
-	subi	sp, 24
-	stw	a0, (sp, 0)
-	stw	a1, (sp, 4)
-	stw	a2, (sp, 8)
-	stw	a3, (sp, 12)
-	stw	lr, (sp, 16)
-	stw	r8, (sp, 20)
-.endm
-
-.macro mcount_exit
-	ldw	a0, (sp, 0)
-	ldw	a1, (sp, 4)
-	ldw	a2, (sp, 8)
-	ldw	a3, (sp, 12)
-	ldw	t1, (sp, 16)
-	ldw	r8, (sp, 20)
-	ldw	lr, (sp, 24)
-	addi	sp, 28
-	jmp	t1
-.endm
-
-.macro save_return_regs
-	subi	sp, 16
-	stw	a0, (sp, 0)
-	stw	a1, (sp, 4)
-	stw	a2, (sp, 8)
-	stw	a3, (sp, 12)
-.endm
-
-.macro restore_return_regs
-	mov	lr, a0
-	ldw	a0, (sp, 0)
-	ldw	a1, (sp, 4)
-	ldw	a2, (sp, 8)
-	ldw	a3, (sp, 12)
-	addi	sp, 16
-.endm
-
-.macro nop32_stub
-	nop32
-	nop32
-	nop32
-.endm
-
-ENTRY(ftrace_stub)
-	jmp	lr
-END(ftrace_stub)
-
-#ifndef CONFIG_DYNAMIC_FTRACE
-ENTRY(_mcount)
-	mcount_enter
-
-	/* r26 is link register, only used with jsri translation */
-	lrw	r26, ftrace_trace_function
-	ldw	r26, (r26, 0)
-	lrw	a1, ftrace_stub
-	cmpne	r26, a1
-	bf	skip_ftrace
-
-	mov	a0, lr
-	subi	a0, 4
-	ldw	a1, (sp, 24)
-
-	jsr	r26
-
-#ifndef CONFIG_FUNCTION_GRAPH_TRACER
-skip_ftrace:
-	mcount_exit
-#else
-skip_ftrace:
-	lrw	a0, ftrace_graph_return
-	ldw	a0, (a0, 0)
-	lrw	a1, ftrace_stub
-	cmpne	a0, a1
-	bt	ftrace_graph_caller
-
-	lrw	a0, ftrace_graph_entry
-	ldw	a0, (a0, 0)
-	lrw	a1, ftrace_graph_entry_stub
-	cmpne	a0, a1
-	bt	ftrace_graph_caller
-
-	mcount_exit
-#endif
-END(_mcount)
-#else /* CONFIG_DYNAMIC_FTRACE */
-ENTRY(_mcount)
-	mov	t1, lr
-	ldw	lr, (sp, 0)
-	addi	sp, 4
-	jmp	t1
-ENDPROC(_mcount)
-
-ENTRY(ftrace_caller)
-	mcount_enter
-
-	ldw	a0, (sp, 16)
-	subi	a0, 4
-	ldw	a1, (sp, 24)
-
-	nop
-GLOBAL(ftrace_call)
-	nop32_stub
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	nop
-GLOBAL(ftrace_graph_call)
-	nop32_stub
-#endif
-
-	mcount_exit
-ENDPROC(ftrace_caller)
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
-	mov	a0, sp
-	addi	a0, 24
-	ldw	a1, (sp, 16)
-	subi	a1, 4
-	mov	a2, r8
-	lrw	r26, prepare_ftrace_return
-	jsr	r26
-	mcount_exit
-END(ftrace_graph_caller)
-
-ENTRY(return_to_handler)
-	save_return_regs
-	mov	a0, r8
-	jsri	ftrace_return_to_handler
-	restore_return_regs
-	jmp	lr
-END(return_to_handler)
-#endif
diff --git a/arch/csky/abiv2/memcmp.S b/arch/csky/abiv2/memcmp.S
deleted file mode 100644
index bf0d809f09e225fd9600993f893ea8de7394a60c..0000000000000000000000000000000000000000
--- a/arch/csky/abiv2/memcmp.S
+++ /dev/null
@@ -1,152 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/linkage.h>
-#include "sysdep.h"
-
-ENTRY(memcmp)
-	/* Test if len less than 4 bytes.  */
-	mov	r3, r0
-	movi	r0, 0
-	mov	r12, r4
-	cmplti	r2, 4
-	bt	.L_compare_by_byte
-
-	andi	r13, r0, 3
-	movi	r19, 4
-
-	/* Test if s1 is not 4 bytes aligned.  */
-	bnez	r13, .L_s1_not_aligned
-
-	LABLE_ALIGN
-.L_s1_aligned:
-	/* If dest is aligned, then copy.  */
-	zext	r18, r2, 31, 4
-	/* Test if len less than 16 bytes.  */
-	bez	r18, .L_compare_by_word
-
-.L_compare_by_4word:
-	/* If aligned, load word each time.  */
-	ldw	r20, (r3, 0)
-	ldw	r21, (r1, 0)
-	/* If s1[i] != s2[i], goto .L_byte_check.  */
-	cmpne	r20, r21
-	bt	.L_byte_check
-
-	ldw	r20, (r3, 4)
-	ldw	r21, (r1, 4)
-	cmpne	r20, r21
-	bt	.L_byte_check
-
-	ldw	r20, (r3, 8)
-	ldw	r21, (r1, 8)
-	cmpne	r20, r21
-	bt	.L_byte_check
-
-	ldw	r20, (r3, 12)
-	ldw	r21, (r1, 12)
-	cmpne	r20, r21
-	bt	.L_byte_check
-
-	PRE_BNEZAD (r18)
-	addi	a3, 16
-	addi	a1, 16
-
-	BNEZAD (r18, .L_compare_by_4word)
-
-.L_compare_by_word:
-	zext	r18, r2, 3, 2
-	bez	r18, .L_compare_by_byte
-.L_compare_by_word_loop:
-	ldw	r20, (r3, 0)
-	ldw	r21, (r1, 0)
-	addi	r3, 4
-	PRE_BNEZAD (r18)
-	cmpne	r20, r21
-	addi    r1, 4
-	bt	.L_byte_check
-	BNEZAD (r18, .L_compare_by_word_loop)
-
-.L_compare_by_byte:
-        zext    r18, r2, 1, 0
-        bez     r18, .L_return
-.L_compare_by_byte_loop:
-        ldb     r0, (r3, 0)
-        ldb     r4, (r1, 0)
-        addi    r3, 1
-        subu    r0, r4
-        PRE_BNEZAD (r18)
-        addi    r1, 1
-        bnez    r0, .L_return
-        BNEZAD (r18, .L_compare_by_byte_loop)
-
-.L_return:
-        mov     r4, r12
-        rts
-
-# ifdef __CSKYBE__
-/* d[i] != s[i] in word, so we check byte 0.  */
-.L_byte_check:
-        xtrb0   r0, r20
-        xtrb0   r2, r21
-        subu    r0, r2
-        bnez    r0, .L_return
-
-        /* check byte 1 */
-        xtrb1   r0, r20
-        xtrb1   r2, r21
-        subu    r0, r2
-        bnez    r0, .L_return
-
-        /* check byte 2 */
-        xtrb2   r0, r20
-        xtrb2   r2, r21
-        subu    r0, r2
-        bnez    r0, .L_return
-
-        /* check byte 3 */
-        xtrb3   r0, r20
-        xtrb3   r2, r21
-        subu    r0, r2
-# else
-/* s1[i] != s2[i] in word, so we check byte 3.  */
-.L_byte_check:
-	xtrb3	r0, r20
-	xtrb3	r2, r21
-        subu    r0, r2
-        bnez    r0, .L_return
-
-	/* check byte 2 */
-	xtrb2	r0, r20
-	xtrb2	r2, r21
-        subu    r0, r2
-        bnez    r0, .L_return
-
-	/* check byte 1 */
-	xtrb1	r0, r20
-	xtrb1	r2, r21
-	subu	r0, r2
-	bnez    r0, .L_return
-
-	/* check byte 0 */
-	xtrb0	r0, r20
-	xtrb0	r2, r21
-	subu	r0, r2
-	br	.L_return
-# endif /* !__CSKYBE__ */
-
-/* Compare when s1 is not aligned.  */
-.L_s1_not_aligned:
-	sub	r13, r19, r13
-	sub	r2, r13
-.L_s1_not_aligned_loop:
-	ldb	r0, (r3, 0)
-	ldb	r4, (r1, 0)
-	addi	r3, 1
-	subu	r0, r4
-	PRE_BNEZAD (r13)
-	addi	r1, 1
-	bnez	r0, .L_return
-	BNEZAD (r13, .L_s1_not_aligned_loop)
-	br	.L_s1_aligned
-ENDPROC(memcmp)
diff --git a/arch/csky/abiv2/memcpy.S b/arch/csky/abiv2/memcpy.S
deleted file mode 100644
index 145bf3a9360ee18636a4c66862ef34d4591e14fa..0000000000000000000000000000000000000000
--- a/arch/csky/abiv2/memcpy.S
+++ /dev/null
@@ -1,104 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/linkage.h>
-#include "sysdep.h"
-
-ENTRY(__memcpy)
-ENTRY(memcpy)
-	/* Test if len less than 4 bytes.  */
-	mov	r12, r0
-	cmplti	r2, 4
-	bt	.L_copy_by_byte
-
-	andi	r13, r0, 3
-	movi	r19, 4
-	/* Test if dest is not 4 bytes aligned.  */
-	bnez	r13, .L_dest_not_aligned
-
-/* Hardware can handle unaligned access directly.  */
-.L_dest_aligned:
-	/* If dest is aligned, then copy.  */
-	zext	r18, r2, 31, 4
-
-	/* Test if len less than 16 bytes.  */
-	bez	r18, .L_len_less_16bytes
-	movi	r19, 0
-
-	LABLE_ALIGN
-.L_len_larger_16bytes:
-#if defined(__CK860__)
-	ldw	r3, (r1, 0)
-	stw	r3, (r0, 0)
-	ldw	r3, (r1, 4)
-	stw	r3, (r0, 4)
-	ldw	r3, (r1, 8)
-	stw	r3, (r0, 8)
-	ldw	r3, (r1, 12)
-	addi	r1, 16
-	stw	r3, (r0, 12)
-	addi	r0, 16
-#else
-	ldw	r20, (r1, 0)
-	ldw	r21, (r1, 4)
-	ldw	r22, (r1, 8)
-	ldw	r23, (r1, 12)
-	stw	r20, (r0, 0)
-	stw	r21, (r0, 4)
-	stw	r22, (r0, 8)
-	stw	r23, (r0, 12)
-	PRE_BNEZAD (r18)
-	addi	r1, 16
-	addi	r0, 16
-#endif
-	BNEZAD (r18, .L_len_larger_16bytes)
-
-.L_len_less_16bytes:
-	zext	r18, r2, 3, 2
-	bez	r18, .L_copy_by_byte
-.L_len_less_16bytes_loop:
-	ldw	r3, (r1, 0)
-	PRE_BNEZAD (r18)
-	addi	r1, 4
-	stw	r3, (r0, 0)
-	addi	r0, 4
-	BNEZAD (r18, .L_len_less_16bytes_loop)
-
-/* Test if len less than 4 bytes.  */
-.L_copy_by_byte:
-	zext	r18, r2, 1, 0
-	bez	r18, .L_return
-.L_copy_by_byte_loop:
-	ldb	r3, (r1, 0)
-	PRE_BNEZAD (r18)
-	addi	r1, 1
-	stb	r3, (r0, 0)
-	addi	r0, 1
-	BNEZAD (r18, .L_copy_by_byte_loop)
-
-.L_return:
-	mov	r0, r12
-	rts
-
-/*
- * If dest is not aligned, just copying some bytes makes the
- * dest align.
- */
-.L_dest_not_aligned:
-	sub	r13, r19, r13
-	sub	r2, r13
-
-/* Makes the dest align.  */
-.L_dest_not_aligned_loop:
-	ldb	r3, (r1, 0)
-	PRE_BNEZAD (r13)
-	addi	r1, 1
-	stb	r3, (r0, 0)
-	addi	r0, 1
-	BNEZAD (r13, .L_dest_not_aligned_loop)
-	cmplti	r2, 4
-	bt	.L_copy_by_byte
-
-	/* Check whether the src is aligned.  */
-	jbr	.L_dest_aligned
-ENDPROC(__memcpy)
diff --git a/arch/csky/abiv2/memmove.S b/arch/csky/abiv2/memmove.S
deleted file mode 100644
index 5721e73ad3d8f4185059952a903337be80698d36..0000000000000000000000000000000000000000
--- a/arch/csky/abiv2/memmove.S
+++ /dev/null
@@ -1,104 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/linkage.h>
-#include "sysdep.h"
-
-	.weak memmove
-ENTRY(__memmove)
-ENTRY(memmove)
-	subu	r3, r0, r1
-	cmphs	r3, r2
-	bt	memcpy
-
-	mov	r12, r0
-	addu	r0, r0, r2
-	addu	r1, r1, r2
-
-	/* Test if len less than 4 bytes.  */
-	cmplti	r2, 4
-	bt	.L_copy_by_byte
-
-	andi	r13, r0, 3
-	/* Test if dest is not 4 bytes aligned.  */
-	bnez	r13, .L_dest_not_aligned
-	/* Hardware can handle unaligned access directly.  */
-.L_dest_aligned:
-	/* If dest is aligned, then copy.  */
-	zext	r18, r2, 31, 4
-	/* Test if len less than 16 bytes.  */
-	bez	r18, .L_len_less_16bytes
-	movi	r19, 0
-
-	/* len > 16 bytes */
-	LABLE_ALIGN
-.L_len_larger_16bytes:
-	subi	r1, 16
-	subi	r0, 16
-#if defined(__CK860__)
-	ldw	r3, (r1, 12)
-	stw	r3, (r0, 12)
-	ldw	r3, (r1, 8)
-	stw	r3, (r0, 8)
-	ldw	r3, (r1, 4)
-	stw	r3, (r0, 4)
-	ldw	r3, (r1, 0)
-	stw	r3, (r0, 0)
-#else
-	ldw	r20, (r1, 0)
-	ldw	r21, (r1, 4)
-	ldw	r22, (r1, 8)
-	ldw	r23, (r1, 12)
-	stw	r20, (r0, 0)
-	stw	r21, (r0, 4)
-	stw	r22, (r0, 8)
-	stw	r23, (r0, 12)
-	PRE_BNEZAD (r18)
-#endif
-	BNEZAD (r18, .L_len_larger_16bytes)
-
-.L_len_less_16bytes:
-	zext	r18, r2, 3, 2
-	bez	r18, .L_copy_by_byte
-.L_len_less_16bytes_loop:
-	subi	r1, 4
-	subi	r0, 4
-	ldw	r3, (r1, 0)
-	PRE_BNEZAD (r18)
-	stw	r3, (r0, 0)
-	BNEZAD (r18, .L_len_less_16bytes_loop)
-
-	/* Test if len less than 4 bytes.  */
-.L_copy_by_byte:
-	zext	r18, r2, 1, 0
-	bez	r18, .L_return
-.L_copy_by_byte_loop:
-	subi	r1, 1
-	subi	r0, 1
-	ldb	r3, (r1, 0)
-	PRE_BNEZAD (r18)
-	stb	r3, (r0, 0)
-	BNEZAD (r18, .L_copy_by_byte_loop)
-
-.L_return:
-	mov	r0, r12
-	rts
-
-	/* If dest is not aligned, just copy some bytes makes the dest
-	   align.  */
-.L_dest_not_aligned:
-	sub	r2, r13
-.L_dest_not_aligned_loop:
-	subi	r1, 1
-	subi	r0, 1
-	/* Makes the dest align.  */
-	ldb	r3, (r1, 0)
-	PRE_BNEZAD (r13)
-	stb	r3, (r0, 0)
-	BNEZAD (r13, .L_dest_not_aligned_loop)
-	cmplti	r2, 4
-	bt	.L_copy_by_byte
-	/* Check whether the src is aligned.  */
-	jbr	.L_dest_aligned
-ENDPROC(memmove)
-ENDPROC(__memmove)
diff --git a/arch/csky/abiv2/memset.S b/arch/csky/abiv2/memset.S
deleted file mode 100644
index a7e7d994b667d48916aa696e2803c7f93e7346b3..0000000000000000000000000000000000000000
--- a/arch/csky/abiv2/memset.S
+++ /dev/null
@@ -1,83 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/linkage.h>
-#include "sysdep.h"
-
-	.weak memset
-ENTRY(__memset)
-ENTRY(memset)
-	/* Test if len less than 4 bytes.  */
-	mov	r12, r0
-	cmplti	r2, 8
-	bt	.L_set_by_byte
-
-	andi	r13, r0, 3
-	movi	r19, 4
-	/* Test if dest is not 4 bytes aligned.  */
-	bnez	r13, .L_dest_not_aligned
-	/* Hardware can handle unaligned access directly.  */
-.L_dest_aligned:
-        zextb   r3, r1
-        lsli    r1, 8
-        or      r1, r3
-        lsli    r3, r1, 16
-        or      r3, r1
-
-	/* If dest is aligned, then copy.  */
-	zext	r18, r2, 31, 4
-	/* Test if len less than 16 bytes.  */
-	bez	r18, .L_len_less_16bytes
-
-	LABLE_ALIGN
-.L_len_larger_16bytes:
-	stw	r3, (r0, 0)
-	stw	r3, (r0, 4)
-	stw	r3, (r0, 8)
-	stw	r3, (r0, 12)
-	PRE_BNEZAD (r18)
-	addi	r0, 16
-	BNEZAD (r18, .L_len_larger_16bytes)
-
-.L_len_less_16bytes:
-	zext	r18, r2, 3, 2
-	andi	r2, 3
-	bez	r18, .L_set_by_byte
-.L_len_less_16bytes_loop:
-	stw	r3, (r0, 0)
-	PRE_BNEZAD (r18)
-	addi	r0, 4
-	BNEZAD (r18, .L_len_less_16bytes_loop)
-
-	/* Test if len less than 4 bytes.  */
-.L_set_by_byte:
-	zext	r18, r2, 2, 0
-	bez	r18, .L_return
-.L_set_by_byte_loop:
-	stb	r1, (r0, 0)
-	PRE_BNEZAD (r18)
-	addi	r0, 1
-	BNEZAD (r18, .L_set_by_byte_loop)
-
-.L_return:
-	mov	r0, r12
-	rts
-
-	/* If dest is not aligned, just set some bytes makes the dest
-	   align.  */
-
-.L_dest_not_aligned:
-	sub	r13, r19, r13
-	sub	r2, r13
-.L_dest_not_aligned_loop:
-	/* Makes the dest align.  */
-	stb	r1, (r0, 0)
-	PRE_BNEZAD (r13)
-	addi	r0, 1
-	BNEZAD (r13, .L_dest_not_aligned_loop)
-	cmplti	r2, 8
-	bt	.L_set_by_byte
-	/* Check whether the src is aligned.  */
-	jbr	.L_dest_aligned
-ENDPROC(memset)
-ENDPROC(__memset)
diff --git a/arch/csky/abiv2/strcmp.S b/arch/csky/abiv2/strcmp.S
deleted file mode 100644
index f8403f4d8c2beb75b91da2e97c7d5161182ef7b3..0000000000000000000000000000000000000000
--- a/arch/csky/abiv2/strcmp.S
+++ /dev/null
@@ -1,168 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/linkage.h>
-#include "sysdep.h"
-
-ENTRY(strcmp)
-	mov	a3, a0
-	/* Check if the s1 addr is aligned.  */
-	xor	a2, a3, a1
-	andi	a2, 0x3
-	bnez	a2, 7f
-	andi	t1, a0, 0x3
-	bnez	t1, 5f
-
-1:
-	/* If aligned, load word each time.  */
-	ldw	t0, (a3, 0)
-	ldw	t1, (a1, 0)
-	/* If s1[i] != s2[i], goto 2f.  */
-	cmpne   t0, t1
-	bt      2f
-	/* If s1[i] == s2[i], check if s1 or s2 is at the end.  */
-	tstnbz	t0
-	/* If at the end, goto 3f (finish comparing).  */
-	bf	3f
-
-	ldw	t0, (a3, 4)
-	ldw	t1, (a1, 4)
-	cmpne	t0, t1
-	bt	2f
-	tstnbz	t0
-	bf	3f
-
-	ldw	t0, (a3, 8)
-	ldw	t1, (a1, 8)
-	cmpne	t0, t1
-	bt	2f
-	tstnbz	t0
-	bf	3f
-
-	ldw	t0, (a3, 12)
-	ldw	t1, (a1, 12)
-	cmpne	t0, t1
-	bt	2f
-	tstnbz	t0
-	bf	3f
-
-	ldw	t0, (a3, 16)
-	ldw	t1, (a1, 16)
-	cmpne	t0, t1
-	bt	2f
-	tstnbz	t0
-	bf	3f
-
-	ldw	t0, (a3, 20)
-	ldw	t1, (a1, 20)
-	cmpne	t0, t1
-	bt	2f
-	tstnbz	t0
-	bf	3f
-
-	ldw	t0, (a3, 24)
-	ldw	t1, (a1, 24)
-	cmpne	t0, t1
-	bt	2f
-	tstnbz	t0
-	bf	3f
-
-	ldw	t0, (a3, 28)
-	ldw	t1, (a1, 28)
-	cmpne	t0, t1
-	bt	2f
-	tstnbz	t0
-	bf	3f
-
-	addi	a3, 32
-	addi	a1, 32
-
-	br	1b
-
-# ifdef __CSKYBE__
-	/* d[i] != s[i] in word, so we check byte 0.  */
-2:
-	xtrb0   a0, t0
-	xtrb0   a2, t1
-	subu    a0, a2
-	bez     a2, 4f
-	bnez    a0, 4f
-
-	/* check byte 1 */
-	xtrb1   a0, t0
-	xtrb1   a2, t1
-	subu    a0, a2
-	bez     a2, 4f
-	bnez    a0, 4f
-
-	/* check byte 2 */
-	xtrb2   a0, t0
-	xtrb2   a2, t1
-	subu    a0, a2
-	bez     a2, 4f
-	bnez    a0, 4f
-
-	/* check byte 3 */
-	xtrb3   a0, t0
-	xtrb3   a2, t1
-	subu    a0, a2
-# else
-	/* s1[i] != s2[i] in word, so we check byte 3.  */
-2:
-	xtrb3	a0, t0
-	xtrb3	a2, t1
-	subu    a0, a2
-	bez     a2, 4f
-	bnez    a0, 4f
-
-	/* check byte 2 */
-	xtrb2	a0, t0
-	xtrb2	a2, t1
-	subu    a0, a2
-	bez     a2, 4f
-	bnez    a0, 4f
-
-	/* check byte 1 */
-	xtrb1	a0, t0
-	xtrb1	a2, t1
-	subu	a0, a2
-	bez	a2, 4f
-	bnez    a0, 4f
-
-	/* check byte 0 */
-	xtrb0	a0, t0
-	xtrb0	a2, t1
-	subu	a0, a2
-
-# endif /* !__CSKYBE__ */
-	jmp     lr
-3:
-	movi	a0, 0
-4:
-	jmp     lr
-
-	/* Compare when s1 or s2 is not aligned.  */
-5:
-	subi    t1, 4
-6:
-	ldb	a0, (a3, 0)
-	ldb	a2, (a1, 0)
-	subu	a0, a2
-	bez	a2, 4b
-	bnez	a0, 4b
-	addi    t1, 1
-	addi	a1, 1
-	addi	a3, 1
-	bnez	t1, 6b
-	br	1b
-
-7:
-	ldb	a0, (a3, 0)
-	addi	a3, 1
-	ldb	a2, (a1, 0)
-	addi	a1, 1
-	subu    a0, a2
-	bnez    a0, 4b
-	bnez	a2, 7b
-	jmp	r15
-ENDPROC(strcmp)
diff --git a/arch/csky/abiv2/strcpy.S b/arch/csky/abiv2/strcpy.S
deleted file mode 100644
index 3c6d3f6a573a1edcfb6efd22f61216a8b157ef03..0000000000000000000000000000000000000000
--- a/arch/csky/abiv2/strcpy.S
+++ /dev/null
@@ -1,123 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/linkage.h>
-#include "sysdep.h"
-
-ENTRY(strcpy)
-	mov	a3, a0
-	/* Check if the src addr is aligned.  */
-        andi    t0, a1, 3
-        bnez	t0, 11f
-1:
-	/* Check if all the bytes in the word are not zero.  */
-	ldw	a2, (a1)
-	tstnbz	a2
-	bf	9f
-	stw	a2, (a3)
-
-	ldw	a2, (a1, 4)
-	tstnbz	a2
-	bf	2f
-	stw	a2, (a3, 4)
-
-	ldw	a2, (a1, 8)
-	tstnbz	a2
-	bf	3f
-	stw	a2, (a3, 8)
-
-	ldw	a2, (a1, 12)
-	tstnbz	a2
-	bf	4f
-	stw	a2, (a3, 12)
-
-	ldw	a2, (a1, 16)
-	tstnbz	a2
-	bf	5f
-	stw	a2, (a3, 16)
-
-	ldw	a2, (a1, 20)
-	tstnbz	a2
-	bf	6f
-	stw	a2, (a3, 20)
-
-	ldw	a2, (a1, 24)
-	tstnbz	a2
-	bf	7f
-	stw	a2, (a3, 24)
-
-	ldw	a2, (a1, 28)
-	tstnbz	a2
-	bf	8f
-	stw	a2, (a3, 28)
-
-	addi	a3, 32
-	addi	a1, 32
-	br	1b
-
-
-2:
-	addi	a3, 4
-	br	9f
-
-3:
-	addi	a3, 8
-	br	9f
-
-4:
-	addi	a3, 12
-	br	9f
-
-5:
-	addi	a3, 16
-	br	9f
-
-6:
-	addi	a3, 20
-	br	9f
-
-7:
-	addi	a3, 24
-	br	9f
-
-8:
-	addi	a3, 28
-9:
-# ifdef __CSKYBE__
-	xtrb0	t0, a2
-	st.b	t0, (a3)
-	bez	t0, 10f
-	xtrb1	t0, a2
-	st.b	t0, (a3, 1)
-	bez	t0, 10f
-	xtrb2	t0, a2
-	st.b	t0, (a3, 2)
-	bez	t0, 10f
-	stw	a2, (a3)
-# else
-	xtrb3	t0, a2
-	st.b	t0, (a3)
-	bez	t0, 10f
-	xtrb2	t0, a2
-	st.b	t0, (a3, 1)
-	bez	t0, 10f
-	xtrb1	t0, a2
-	st.b	t0, (a3, 2)
-	bez	t0, 10f
-	stw	a2, (a3)
-# endif	/* !__CSKYBE__ */
-10:
-	jmp	lr
-
-11:
-	subi    t0, 4
-12:
-        ld.b    a2, (a1)
-        st.b	a2, (a3)
-        bez	a2, 10b
-	addi    t0, 1
-        addi    a1, a1, 1
-        addi    a3, a3, 1
-	bnez	t0, 12b
-	jbr	1b
-ENDPROC(strcpy)
diff --git a/arch/csky/abiv2/strlen.S b/arch/csky/abiv2/strlen.S
deleted file mode 100644
index bcdd70764d086441eeb80414e37f99e4d4781eb6..0000000000000000000000000000000000000000
--- a/arch/csky/abiv2/strlen.S
+++ /dev/null
@@ -1,97 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/linkage.h>
-#include "sysdep.h"
-
-ENTRY(strlen)
-	/* Check if the start addr is aligned.  */
-	mov	r3, r0
-	andi	r1, r0, 3
-	movi	r2, 4
-	movi	r0, 0
-	bnez	r1, .L_start_not_aligned
-
-	LABLE_ALIGN
-.L_start_addr_aligned:
-	/* Check if all the bytes in the word are not zero.  */
-	ldw	r1, (r3)
-	tstnbz	r1
-	bf	.L_string_tail
-
-	ldw	r1, (r3, 4)
-	addi	r0, 4
-	tstnbz	r1
-	bf	.L_string_tail
-
-	ldw	r1, (r3, 8)
-	addi	r0, 4
-	tstnbz	r1
-	bf	.L_string_tail
-
-	ldw	r1, (r3, 12)
-	addi	r0, 4
-	tstnbz	r1
-	bf	.L_string_tail
-
-	ldw	r1, (r3, 16)
-	addi	r0, 4
-	tstnbz	r1
-	bf	.L_string_tail
-
-	ldw	r1, (r3, 20)
-	addi	r0, 4
-	tstnbz	r1
-	bf	.L_string_tail
-
-	ldw	r1, (r3, 24)
-	addi	r0, 4
-	tstnbz	r1
-	bf	.L_string_tail
-
-	ldw	r1, (r3, 28)
-	addi	r0, 4
-	tstnbz	r1
-	bf	.L_string_tail
-
-	addi	r0, 4
-	addi	r3, 32
-	br	.L_start_addr_aligned
-
-.L_string_tail:
-# ifdef __CSKYBE__
-	xtrb0	r3, r1
-	bez	r3, .L_return
-	addi	r0, 1
-	xtrb1	r3, r1
-	bez	r3, .L_return
-	addi	r0, 1
-	xtrb2	r3, r1
-	bez	r3, .L_return
-	addi	r0, 1
-# else
-	xtrb3	r3, r1
-	bez	r3, .L_return
-	addi	r0, 1
-	xtrb2	r3, r1
-	bez	r3, .L_return
-	addi	r0, 1
-	xtrb1	r3, r1
-	bez	r3, .L_return
-	addi	r0, 1
-# endif	/* !__CSKYBE__ */
-
-.L_return:
-	rts
-
-.L_start_not_aligned:
-	sub	r2, r2, r1
-.L_start_not_aligned_loop:
-	ldb	r1, (r3)
-	PRE_BNEZAD (r2)
-	addi	r3, 1
-	bez	r1, .L_return
-	addi	r0, 1
-	BNEZAD (r2, .L_start_not_aligned_loop)
-	br	.L_start_addr_aligned
-ENDPROC(strlen)
diff --git a/arch/csky/kernel/atomic.S b/arch/csky/kernel/atomic.S
deleted file mode 100644
index 3821ef9b75672d8a5af90839ffa7f95cfdb4da50..0000000000000000000000000000000000000000
--- a/arch/csky/kernel/atomic.S
+++ /dev/null
@@ -1,73 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/linkage.h>
-#include <abi/entry.h>
-
-.text
-
-/*
- * int csky_cmpxchg(int oldval, int newval, int *ptr)
- *
- * If *ptr != oldval && return 1,
- * else *ptr = newval return 0.
- */
-ENTRY(csky_cmpxchg)
-	USPTOKSP
-	mfcr	a3, epc
-	addi	a3, TRAP0_SIZE
-
-	subi    sp, 16
-	stw     a3, (sp, 0)
-	mfcr    a3, epsr
-	stw     a3, (sp, 4)
-	mfcr	a3, usp
-	stw     a3, (sp, 8)
-
-	psrset	ee
-#ifdef CONFIG_CPU_HAS_LDSTEX
-1:
-	ldex	a3, (a2)
-	cmpne	a0, a3
-	bt16	2f
-	mov	a3, a1
-	stex	a3, (a2)
-	bez	a3, 1b
-2:
-	sync.is
-#else
-1:
-	ldw	a3, (a2)
-	cmpne	a0, a3
-	bt16	3f
-2:
-	stw	a1, (a2)
-3:
-#endif
-	mvc	a0
-	ldw	a3, (sp, 0)
-	mtcr	a3, epc
-	ldw     a3, (sp, 4)
-	mtcr	a3, epsr
-	ldw     a3, (sp, 8)
-	mtcr	a3, usp
-	addi	sp, 16
-	KSPTOUSP
-	rte
-END(csky_cmpxchg)
-
-#ifndef CONFIG_CPU_HAS_LDSTEX
-/*
- * Called from tlbmodified exception
- */
-ENTRY(csky_cmpxchg_fixup)
-	mfcr	a0, epc
-	lrw	a1, 2b
-	cmpne	a1, a0
-	bt	1f
-	subi	a1, (2b - 1b)
-	stw	a1, (sp, LSAVE_PC)
-1:
-	rts
-END(csky_cmpxchg_fixup)
-#endif
diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S
deleted file mode 100644
index 4349528fbf38a59432911a22dd26c665409d11a2..0000000000000000000000000000000000000000
--- a/arch/csky/kernel/entry.S
+++ /dev/null
@@ -1,345 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-
-#include <linux/linkage.h>
-#include <abi/entry.h>
-#include <abi/pgtable-bits.h>
-#include <asm/errno.h>
-#include <asm/setup.h>
-#include <asm/unistd.h>
-#include <asm/asm-offsets.h>
-#include <linux/threads.h>
-#include <asm/setup.h>
-#include <asm/page.h>
-#include <asm/thread_info.h>
-
-#define PTE_INDX_MSK    0xffc
-#define PTE_INDX_SHIFT  10
-#define _PGDIR_SHIFT    22
-
-.macro	zero_fp
-#ifdef CONFIG_STACKTRACE
-	movi	r8, 0
-#endif
-.endm
-
-.macro tlbop_begin name, val0, val1, val2
-ENTRY(csky_\name)
-	mtcr    a3, ss2
-	mtcr    r6, ss3
-	mtcr    a2, ss4
-
-	RD_PGDR	r6
-	RD_MEH	a3
-#ifdef CONFIG_CPU_HAS_TLBI
-	tlbi.vaas a3
-	sync.is
-
-	btsti	a3, 31
-	bf	1f
-	RD_PGDR_K r6
-1:
-#else
-	bgeni	a2, 31
-	WR_MCIR	a2
-	bgeni	a2, 25
-	WR_MCIR	a2
-#endif
-	bclri   r6, 0
-	lrw	a2, va_pa_offset
-	ld.w	a2, (a2, 0)
-	subu	r6, a2
-	bseti	r6, 31
-
-	mov     a2, a3
-	lsri    a2, _PGDIR_SHIFT
-	lsli    a2, 2
-	addu    r6, a2
-	ldw     r6, (r6)
-
-	lrw	a2, va_pa_offset
-	ld.w	a2, (a2, 0)
-	subu	r6, a2
-	bseti	r6, 31
-
-	lsri    a3, PTE_INDX_SHIFT
-	lrw     a2, PTE_INDX_MSK
-	and     a3, a2
-	addu    r6, a3
-	ldw     a3, (r6)
-
-	movi	a2, (_PAGE_PRESENT | \val0)
-	and     a3, a2
-	cmpne   a3, a2
-	bt	\name
-
-	/* First read/write the page, just update the flags */
-	ldw     a3, (r6)
-	bgeni   a2, PAGE_VALID_BIT
-	bseti   a2, PAGE_ACCESSED_BIT
-	bseti   a2, \val1
-	bseti   a2, \val2
-	or      a3, a2
-	stw     a3, (r6)
-
-	/* Some cpu tlb-hardrefill bypass the cache */
-#ifdef CONFIG_CPU_NEED_TLBSYNC
-	movi	a2, 0x22
-	bseti	a2, 6
-	mtcr	r6, cr22
-	mtcr	a2, cr17
-	sync
-#endif
-
-	mfcr    a3, ss2
-	mfcr    r6, ss3
-	mfcr    a2, ss4
-	rte
-\name:
-	mfcr    a3, ss2
-	mfcr    r6, ss3
-	mfcr    a2, ss4
-	SAVE_ALL 0
-.endm
-.macro tlbop_end is_write
-	zero_fp
-	RD_MEH	a2
-	psrset  ee, ie
-	mov     a0, sp
-	movi    a1, \is_write
-	jbsr    do_page_fault
-	jmpi    ret_from_exception
-.endm
-
-.text
-
-tlbop_begin tlbinvalidl, _PAGE_READ, PAGE_VALID_BIT, PAGE_ACCESSED_BIT
-tlbop_end 0
-
-tlbop_begin tlbinvalids, _PAGE_WRITE, PAGE_DIRTY_BIT, PAGE_MODIFIED_BIT
-tlbop_end 1
-
-tlbop_begin tlbmodified, _PAGE_WRITE, PAGE_DIRTY_BIT, PAGE_MODIFIED_BIT
-#ifndef CONFIG_CPU_HAS_LDSTEX
-jbsr csky_cmpxchg_fixup
-#endif
-tlbop_end 1
-
-ENTRY(csky_systemcall)
-	SAVE_ALL TRAP0_SIZE
-	zero_fp
-
-	psrset  ee, ie
-
-	lrw     r11, __NR_syscalls
-	cmphs   syscallid, r11		/* Check nr of syscall */
-	bt      ret_from_exception
-
-	lrw     r13, sys_call_table
-	ixw     r13, syscallid
-	ldw     r11, (r13)
-	cmpnei  r11, 0
-	bf      ret_from_exception
-
-	mov     r9, sp
-	bmaski  r10, THREAD_SHIFT
-	andn    r9, r10
-	ldw     r12, (r9, TINFO_FLAGS)
-	ANDI_R3	r12, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT)
-	cmpnei	r12, 0
-	bt      csky_syscall_trace
-#if defined(__CSKYABIV2__)
-	subi    sp, 8
-	stw  	r5, (sp, 0x4)
-	stw  	r4, (sp, 0x0)
-	jsr     r11                      /* Do system call */
-	addi 	sp, 8
-#else
-	jsr     r11
-#endif
-	stw     a0, (sp, LSAVE_A0)      /* Save return value */
-	jmpi    ret_from_exception
-
-csky_syscall_trace:
-	mov	a0, sp                  /* sp = pt_regs pointer */
-	jbsr	syscall_trace_enter
-	/* Prepare args before do system call */
-	ldw	a0, (sp, LSAVE_A0)
-	ldw	a1, (sp, LSAVE_A1)
-	ldw	a2, (sp, LSAVE_A2)
-	ldw	a3, (sp, LSAVE_A3)
-#if defined(__CSKYABIV2__)
-	subi	sp, 8
-	ldw	r9, (sp, LSAVE_A4)
-	stw	r9, (sp, 0x0)
-	ldw	r9, (sp, LSAVE_A5)
-	stw	r9, (sp, 0x4)
-#else
-	ldw	r6, (sp, LSAVE_A4)
-	ldw	r7, (sp, LSAVE_A5)
-#endif
-	jsr	r11                     /* Do system call */
-#if defined(__CSKYABIV2__)
-	addi	sp, 8
-#endif
-	stw	a0, (sp, LSAVE_A0)	/* Save return value */
-
-	mov     a0, sp                  /* right now, sp --> pt_regs */
-	jbsr    syscall_trace_exit
-	br	ret_from_exception
-
-ENTRY(ret_from_kernel_thread)
-	jbsr	schedule_tail
-	mov	a0, r10
-	jsr	r9
-	jbsr	ret_from_exception
-
-ENTRY(ret_from_fork)
-	jbsr	schedule_tail
-	mov	r9, sp
-	bmaski	r10, THREAD_SHIFT
-	andn	r9, r10
-	ldw	r12, (r9, TINFO_FLAGS)
-	ANDI_R3	r12, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT)
-	cmpnei	r12, 0
-	bf	ret_from_exception
-	mov	a0, sp			/* sp = pt_regs pointer */
-	jbsr	syscall_trace_exit
-
-ret_from_exception:
-	ld	syscallid, (sp, LSAVE_PSR)
-	btsti	syscallid, 31
-	bt	1f
-
-	/*
-	 * Load address of current->thread_info, Then get address of task_struct
-	 * Get task_needreshed in task_struct
-	 */
-	mov	r9, sp
-	bmaski	r10, THREAD_SHIFT
-	andn	r9, r10
-
-	ldw	r12, (r9, TINFO_FLAGS)
-	andi	r12, (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED)
-	cmpnei	r12, 0
-	bt	exit_work
-1:
-	RESTORE_ALL
-
-exit_work:
-	lrw	syscallid, ret_from_exception
-	mov	lr, syscallid
-
-	btsti	r12, TIF_NEED_RESCHED
-	bt	work_resched
-
-	mov	a0, sp
-	mov	a1, r12
-	jmpi	do_notify_resume
-
-work_resched:
-	jmpi	schedule
-
-ENTRY(csky_trap)
-	SAVE_ALL 0
-	zero_fp
-	psrset	ee
-	mov	a0, sp                 /* Push Stack pointer arg */
-	jbsr	trap_c                 /* Call C-level trap handler */
-	jmpi	ret_from_exception
-
-/*
- * Prototype from libc for abiv1:
- * register unsigned int __result asm("a0");
- * asm( "trap 3" :"=r"(__result)::);
- */
-ENTRY(csky_get_tls)
-	USPTOKSP
-
-	/* increase epc for continue */
-	mfcr	a0, epc
-	addi	a0, TRAP0_SIZE
-	mtcr	a0, epc
-
-	/* get current task thread_info with kernel 8K stack */
-	bmaski	a0, THREAD_SHIFT
-	not	a0
-	subi	sp, 1
-	and	a0, sp
-	addi	sp, 1
-
-	/* get tls */
-	ldw	a0, (a0, TINFO_TP_VALUE)
-
-	KSPTOUSP
-	rte
-
-ENTRY(csky_irq)
-	SAVE_ALL 0
-	zero_fp
-	psrset	ee
-
-#ifdef CONFIG_PREEMPT
-	mov	r9, sp			/* Get current stack  pointer */
-	bmaski	r10, THREAD_SHIFT
-	andn	r9, r10			/* Get thread_info */
-
-	/*
-	 * Get task_struct->stack.preempt_count for current,
-	 * and increase 1.
-	 */
-	ldw	r12, (r9, TINFO_PREEMPT)
-	addi	r12, 1
-	stw	r12, (r9, TINFO_PREEMPT)
-#endif
-
-	mov	a0, sp
-	jbsr	csky_do_IRQ
-
-#ifdef CONFIG_PREEMPT
-	subi	r12, 1
-	stw	r12, (r9, TINFO_PREEMPT)
-	cmpnei	r12, 0
-	bt	2f
-	ldw	r12, (r9, TINFO_FLAGS)
-	btsti	r12, TIF_NEED_RESCHED
-	bf	2f
-	jbsr	preempt_schedule_irq	/* irq en/disable is done inside */
-#endif
-2:
-	jmpi	ret_from_exception
-
-/*
- * a0 =  prev task_struct *
- * a1 =  next task_struct *
- * a0 =  return next
- */
-ENTRY(__switch_to)
-	lrw	a3, TASK_THREAD
-	addu	a3, a0
-
-	mfcr	a2, psr			/* Save PSR value */
-	stw	a2, (a3, THREAD_SR)	/* Save PSR in task struct */
-
-	SAVE_SWITCH_STACK
-
-	stw	sp, (a3, THREAD_KSP)
-
-	/* Set up next process to run */
-	lrw	a3, TASK_THREAD
-	addu	a3, a1
-
-	ldw	sp, (a3, THREAD_KSP)	/* Set next kernel sp */
-
-	ldw	a2, (a3, THREAD_SR)	/* Set next PSR */
-	mtcr	a2, psr
-
-#if  defined(__CSKYABIV2__)
-	addi	r7, a1, TASK_THREAD_INFO
-	ldw	tls, (r7, TINFO_TP_VALUE)
-#endif
-
-	RESTORE_SWITCH_STACK
-
-	rts
-ENDPROC(__switch_to)
diff --git a/arch/csky/kernel/head.S b/arch/csky/kernel/head.S
deleted file mode 100644
index 17ed9d2504807dfa385f5da0ef380b635a388c99..0000000000000000000000000000000000000000
--- a/arch/csky/kernel/head.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/page.h>
-#include <abi/entry.h>
-
-__HEAD
-ENTRY(_start)
-	SETUP_MMU
-
-	/* set stack point */
-	lrw     r6, init_thread_union + THREAD_SIZE
-	mov	sp, r6
-
-	jmpi	csky_start
-END(_start)
-
-#ifdef CONFIG_SMP
-.align 10
-ENTRY(_start_smp_secondary)
-	SETUP_MMU
-
-	/* copy msa1 from CPU0 */
-	lrw     r6, secondary_msa1
-	ld.w	r6, (r6, 0)
-	mtcr	r6, cr<31, 15>
-
-	/* set stack point */
-	lrw     r6, secondary_stack
-	ld.w	r6, (r6, 0)
-	mov	sp, r6
-
-	jmpi	csky_start_secondary
-END(_start_smp_secondary)
-#endif
diff --git a/arch/csky/kernel/vmlinux.lds.S b/arch/csky/kernel/vmlinux.lds.S
deleted file mode 100644
index ae7961b973f26e8794a7597773fb9758b335a2bc..0000000000000000000000000000000000000000
--- a/arch/csky/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,66 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#include <asm/vmlinux.lds.h>
-#include <asm/page.h>
-
-OUTPUT_ARCH(csky)
-ENTRY(_start)
-
-#ifndef __cskyBE__
-jiffies = jiffies_64;
-#else
-jiffies = jiffies_64 + 4;
-#endif
-
-#define VBR_BASE \
-	. = ALIGN(1024); \
-	vec_base = .; \
-	. += 512;
-
-SECTIONS
-{
-	. = PAGE_OFFSET + PHYS_OFFSET_OFFSET;
-
-	_stext = .;
-	__init_begin = .;
-	HEAD_TEXT_SECTION
-	INIT_TEXT_SECTION(PAGE_SIZE)
-	INIT_DATA_SECTION(PAGE_SIZE)
-	PERCPU_SECTION(L1_CACHE_BYTES)
-	. = ALIGN(PAGE_SIZE);
-	__init_end = .;
-
-	.text : AT(ADDR(.text) - LOAD_OFFSET) {
-		_text = .;
-		IRQENTRY_TEXT
-		SOFTIRQENTRY_TEXT
-		TEXT_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		*(.fixup)
-		*(.gnu.warning)
-	} = 0
-	_etext = .;
-
-	/* __init_begin __init_end must be page aligned for free_initmem */
-	. = ALIGN(PAGE_SIZE);
-
-
-	_sdata = .;
-	RO_DATA_SECTION(PAGE_SIZE)
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
-	_edata = .;
-
-	NOTES
-	EXCEPTION_TABLE(L1_CACHE_BYTES)
-	BSS_SECTION(L1_CACHE_BYTES, PAGE_SIZE, L1_CACHE_BYTES)
-	VBR_BASE
-	_end = . ;
-
-	STABS_DEBUG
-	DWARF_DEBUG
-
-	DISCARDS
-}
diff --git a/arch/h8300/boot/compressed/head.S b/arch/h8300/boot/compressed/head.S
deleted file mode 100644
index 11ef509579cfa651ca3b087c38a97e0ada253fbb..0000000000000000000000000000000000000000
--- a/arch/h8300/boot/compressed/head.S
+++ /dev/null
@@ -1,49 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  linux/arch/h8300/boot/compressed/head.S
- *
- *  Copyright (C) 2006 Yoshinori Sato
- */
-
-#include <linux/linkage.h>
-
-	.section	.text..startup,"ax"
-	.global	startup
-startup:
-	mov.l	#startup, sp
-	mov.l	er0, er4
-	mov.l	#__sbss, er0
-	mov.l	#__ebss, er1
-	sub.l	er0, er1
-	shlr	er1
-	shlr	er1
-	sub.l	er2, er2
-1:
-	mov.l	er2, @er0
-	adds	#4, er0
-	dec.l	#1, er1
-	bne	1b
-	jsr	@decompress_kernel
-	mov.l	er4, er0
-	jmp	@output
-
-	.align	9
-fake_headers_as_bzImage:
-	.word	0
-	.ascii	"HdrS"		; header signature
-	.word	0x0202		; header version number (>= 0x0105)
-				; or else old loadlin-1.5 will fail)
-	.word	0		; default_switch
-	.word	0		; SETUPSEG
-	.word	0x1000
-	.word	0		; pointing to kernel version string
-	.byte	0		; = 0, old one (LILO, Loadlin,
-				; 0xTV: T=0 for LILO
-				;       V = version
-	.byte	1		; Load flags bzImage=1
-	.word	0x8000		; size to move, when setup is not
-	.long	0x100000	; 0x100000 = default for big kernel
-	.long	0		; address of loaded ramdisk image
-	.long	0		; its size in bytes
-
-	.end
diff --git a/arch/h8300/kernel/entry.S b/arch/h8300/kernel/entry.S
deleted file mode 100644
index 4ade5f8299baed742c24b839d5bb3bec867b3d78..0000000000000000000000000000000000000000
--- a/arch/h8300/kernel/entry.S
+++ /dev/null
@@ -1,434 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- *  linux/arch/h8300/kernel/entry.S
- *
- *  Yoshinori Sato <ysato@users.sourceforge.jp>
- *  David McCullough <davidm@snapgear.com>
- *
- */
-
-/*
- *  entry.S
- *  include exception/interrupt gateway
- *          system call entry
- */
-
-#include <linux/sys.h>
-#include <asm/unistd.h>
-#include <asm/setup.h>
-#include <asm/segment.h>
-#include <asm/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/errno.h>
-
-#if defined(CONFIG_CPU_H8300H)
-#define USERRET 8
-INTERRUPTS = 64
-	.h8300h
-	.macro	SHLL2 reg
-	shll.l	\reg
-	shll.l	\reg
-	.endm
-	.macro	SHLR2 reg
-	shlr.l	\reg
-	shlr.l	\reg
-	.endm
-	.macro	SAVEREGS
-	mov.l	er0,@-sp
-	mov.l	er1,@-sp
-	mov.l	er2,@-sp
-	mov.l	er3,@-sp
-	.endm
-	.macro	RESTOREREGS
-	mov.l	@sp+,er3
-	mov.l	@sp+,er2
-	.endm
-	.macro	SAVEEXR
-	.endm
-	.macro	RESTOREEXR
-	.endm
-#endif
-#if defined(CONFIG_CPU_H8S)
-#define USERRET 10
-#define USEREXR 8
-INTERRUPTS = 128
-	.h8300s
-	.macro	SHLL2 reg
-	shll.l	#2,\reg
-	.endm
-	.macro	SHLR2 reg
-	shlr.l	#2,\reg
-	.endm
-	.macro	SAVEREGS
-	stm.l	er0-er3,@-sp
-	.endm
-	.macro	RESTOREREGS
-	ldm.l	@sp+,er2-er3
-	.endm
-	.macro	SAVEEXR
-	mov.w	@(USEREXR:16,er0),r1
-	mov.w	r1,@(LEXR-LER3:16,sp)		/* copy EXR */
-	.endm
-	.macro	RESTOREEXR
-	mov.w	@(LEXR-LER1:16,sp),r1		/* restore EXR */
-	mov.b	r1l,r1h
-	mov.w	r1,@(USEREXR:16,er0)
-	.endm
-#endif
-
-
-/* CPU context save/restore macros. */
-
-	.macro	SAVE_ALL
-	mov.l	er0,@-sp
-	stc	ccr,r0l				/* check kernel mode */
-	btst	#4,r0l
-	bne	5f
-
-	/* user mode */
-	mov.l	sp,@_sw_usp
-	mov.l	@sp,er0				/* restore saved er0 */
-	orc	#0x10,ccr			/* switch kernel stack */
-	mov.l	@_sw_ksp,sp
-	sub.l	#(LRET-LORIG),sp		/* allocate LORIG - LRET */
-	SAVEREGS
-	mov.l   @_sw_usp,er0
-	mov.l   @(USERRET:16,er0),er1           /* copy the RET addr */
-	mov.l   er1,@(LRET-LER3:16,sp)
-	SAVEEXR
-
-	mov.l	@(LORIG-LER3:16,sp),er0
-	mov.l	er0,@(LER0-LER3:16,sp)		/* copy ER0 */
-	mov.w	e1,r1				/* e1 highbyte = ccr */
-	and	#0xef,r1h			/* mask mode? flag */
-	bra	6f
-5:
-	/* kernel mode */
-	mov.l	@sp,er0				/* restore saved er0 */
-	subs	#2,sp				/* set dummy ccr */
-	subs	#4,sp				/* set dummp sp */
-	SAVEREGS
-	mov.w	@(LRET-LER3:16,sp),r1		/* copy old ccr */
-6:
-	mov.b	r1h,r1l
-	mov.b	#0,r1h
-	mov.w	r1,@(LCCR-LER3:16,sp)		/* set ccr */
-	mov.l	@_sw_usp,er2
-	mov.l	er2,@(LSP-LER3:16,sp)		/* set usp */
-	mov.l	er6,@-sp			/* syscall arg #6 */
-	mov.l	er5,@-sp			/* syscall arg #5 */
-	mov.l	er4,@-sp			/* syscall arg #4 */
-	.endm					/* r1 = ccr */
-
-	.macro	RESTORE_ALL
-	mov.l	@sp+,er4
-	mov.l	@sp+,er5
-	mov.l	@sp+,er6
-	RESTOREREGS
-	mov.w	@(LCCR-LER1:16,sp),r0		/* check kernel mode */
-	btst	#4,r0l
-	bne	7f
-
-	orc	#0xc0,ccr
-	mov.l	@(LSP-LER1:16,sp),er0
-	mov.l	@(LER0-LER1:16,sp),er1		/* restore ER0 */
-	mov.l	er1,@er0
-	RESTOREEXR
-	mov.w	@(LCCR-LER1:16,sp),r1		/* restore the RET addr */
-	mov.b	r1l,r1h
-	mov.b	@(LRET+1-LER1:16,sp),r1l
-	mov.w	r1,e1
-	mov.w	@(LRET+2-LER1:16,sp),r1
-	mov.l	er1,@(USERRET:16,er0)
-
-	mov.l	@sp+,er1
-	add.l	#(LRET-LER1),sp			/* remove LORIG - LRET */
-	mov.l	sp,@_sw_ksp
-	andc	#0xef,ccr			/* switch to user mode */
-	mov.l	er0,sp
-	bra	8f
-7:
-	mov.l	@sp+,er1
-	add.l	#10,sp
-8:
-	mov.l	@sp+,er0
-	adds	#4,sp				/* remove the sw created LVEC */
-	rte
-	.endm
-
-.globl _system_call
-.globl ret_from_exception
-.globl ret_from_fork
-.globl ret_from_kernel_thread
-.globl ret_from_interrupt
-.globl _interrupt_redirect_table
-.globl _sw_ksp,_sw_usp
-.globl _resume
-.globl _interrupt_entry
-.globl _trace_break
-.globl _nmi
-
-#if defined(CONFIG_ROMKERNEL)
-	.section .int_redirect,"ax"
-_interrupt_redirect_table:
-#if defined(CONFIG_CPU_H8300H)
-	.rept	7
-	.long	0
-	.endr
-#endif
-#if defined(CONFIG_CPU_H8S)
-	.rept	5
-	.long	0
-	.endr
-	jmp	@_trace_break
-	.long	0
-#endif
-
-	jsr	@_interrupt_entry		/* NMI */
-	jmp	@_system_call			/* TRAPA #0 (System call) */
-	.long	0
-#if defined(CONFIG_KGDB)
-	jmp	@_kgdb_trap
-#else
-	.long	0
-#endif
-	jmp	@_trace_break			/* TRAPA #3 (breakpoint) */
-	.rept	INTERRUPTS-12
-	jsr	@_interrupt_entry
-	.endr
-#endif
-#if defined(CONFIG_RAMKERNEL)
-.globl _interrupt_redirect_table
-	.section .bss
-_interrupt_redirect_table:
-	.space	4
-#endif
-
-	.section .text
-	.align	2
-_interrupt_entry:
-	SAVE_ALL
-/* r1l is saved ccr */
-	mov.l	sp,er0
-	add.l	#LVEC,er0
-	btst	#4,r1l
-	bne	1f
-	/* user LVEC */
-	mov.l	@_sw_usp,er0
-	adds	#4,er0
-1:
-	mov.l	@er0,er0			/* LVEC address */
-#if defined(CONFIG_ROMKERNEL)
-	sub.l	#_interrupt_redirect_table,er0
-#endif
-#if defined(CONFIG_RAMKERNEL)
-	mov.l	@_interrupt_redirect_table,er1
-	sub.l	er1,er0
-#endif
-	SHLR2	er0
-	dec.l	#1,er0
-	mov.l	sp,er1
-	subs	#4,er1				/* adjust ret_pc */
-#if defined(CONFIG_CPU_H8S)
-	orc	#7,exr
-#endif
-	jsr	@do_IRQ
-	jmp	@ret_from_interrupt
-
-_system_call:
-	subs	#4,sp				/* dummy LVEC */
-	SAVE_ALL
-	/* er0: syscall nr */
-	andc	#0xbf,ccr
-	mov.l	er0,er4
-
-	/* save top of frame */
-	mov.l	sp,er0
-	jsr	@set_esp0
-	andc	#0x3f,ccr
-	mov.l	sp,er2
-	and.w	#0xe000,r2
-	mov.l	@(TI_FLAGS:16,er2),er2
-	and.w	#_TIF_WORK_SYSCALL_MASK,r2
-	beq	1f
-	mov.l	sp,er0
-	jsr	@do_syscall_trace_enter
-1:
-	cmp.l	#__NR_syscalls,er4
-	bcc	badsys
-	SHLL2	er4
-	mov.l	#_sys_call_table,er0
-	add.l	er4,er0
-	mov.l	@er0,er4
-	beq	ret_from_exception:16
-	mov.l	@(LER1:16,sp),er0
-	mov.l	@(LER2:16,sp),er1
-	mov.l	@(LER3:16,sp),er2
-	jsr	@er4
-	mov.l	er0,@(LER0:16,sp)		/* save the return value */
-	mov.l	sp,er2
-	and.w	#0xe000,r2
-	mov.l	@(TI_FLAGS:16,er2),er2
-	and.w	#_TIF_WORK_SYSCALL_MASK,r2
-	beq	2f
-	mov.l	sp,er0
-	jsr	@do_syscall_trace_leave
-2:
-	orc	#0xc0,ccr
-	bra	resume_userspace
-
-badsys:
-	mov.l	#-ENOSYS,er0
-	mov.l	er0,@(LER0:16,sp)
-	bra	resume_userspace
-
-#if !defined(CONFIG_PREEMPT)
-#define resume_kernel restore_all
-#endif
-
-ret_from_exception:
-#if defined(CONFIG_PREEMPT)
-	orc	#0xc0,ccr
-#endif
-ret_from_interrupt:
-	mov.b	@(LCCR+1:16,sp),r0l
-	btst	#4,r0l
-	bne	resume_kernel:16	/* return from kernel */
-resume_userspace:
-	andc	#0xbf,ccr
-	mov.l	sp,er4
-	and.w	#0xe000,r4		/* er4 <- current thread info */
-	mov.l	@(TI_FLAGS:16,er4),er1
-	and.l	#_TIF_WORK_MASK,er1
-	beq	restore_all:8
-work_pending:
-	btst	#TIF_NEED_RESCHED,r1l
-	bne	work_resched:8
-	/* work notifysig */
-	mov.l	sp,er0
-	subs	#4,er0			/* er0: pt_regs */
-	jsr	@do_notify_resume
-	bra	resume_userspace:8
-work_resched:
-	mov.l	sp,er0
-	jsr	@set_esp0
-	jsr	@schedule
-	bra	resume_userspace:8
-restore_all:
-	RESTORE_ALL			/* Does RTE */
-
-#if defined(CONFIG_PREEMPT)
-resume_kernel:
-	mov.l	@(TI_PRE_COUNT:16,er4),er0
-	bne	restore_all:8
-need_resched:
-	mov.l	@(TI_FLAGS:16,er4),er0
-	btst	#TIF_NEED_RESCHED,r0l
-	beq	restore_all:8
-	mov.b	@(LCCR+1:16,sp),r0l	/* Interrupt Enabled? */
-	bmi	restore_all:8
-	mov.l	sp,er0
-	jsr	@set_esp0
-	jsr	@preempt_schedule_irq
-	bra	need_resched:8
-#endif
-
-ret_from_fork:
-	mov.l	er2,er0
-	jsr	@schedule_tail
-	jmp	@ret_from_exception
-
-ret_from_kernel_thread:
-	mov.l	er2,er0
-	jsr	@schedule_tail
-	mov.l	@(LER4:16,sp),er0
-	mov.l	@(LER5:16,sp),er1
-	jsr	@er1
-	jmp	@ret_from_exception
-
-_resume:
-	/*
-	 * Beware - when entering resume, offset of tss is in d1,
-	 * prev (the current task) is in a0, next (the new task)
-	 * is in a1 and d2.b is non-zero if the mm structure is
-	 * shared between the tasks, so don't change these
-	 * registers until their contents are no longer needed.
-	 */
-
-	/* save sr */
-	sub.w	r3,r3
-	stc	ccr,r3l
-	mov.w	r3,@(THREAD_CCR+2:16,er0)
-
-	/* disable interrupts */
-	orc	#0xc0,ccr
-	mov.l	@_sw_usp,er3
-	mov.l	er3,@(THREAD_USP:16,er0)
-	mov.l	sp,@(THREAD_KSP:16,er0)
-
-	/* Skip address space switching if they are the same. */
-	/* FIXME: what did we hack out of here, this does nothing! */
-
-	mov.l	@(THREAD_USP:16,er1),er0
-	mov.l	er0,@_sw_usp
-	mov.l	@(THREAD_KSP:16,er1),sp
-
-	/* restore status register */
-	mov.w	@(THREAD_CCR+2:16,er1),r3
-
-	ldc	r3l,ccr
-	rts
-
-_trace_break:
-	subs	#4,sp
-	SAVE_ALL
-	sub.l	er1,er1
-	dec.l	#1,er1
-	mov.l	er1,@(LORIG,sp)
-	mov.l	sp,er0
-	jsr	@set_esp0
-	mov.l	@_sw_usp,er0
-	mov.l	@er0,er1
-	mov.w	@(-2:16,er1),r2
-	cmp.w	#0x5730,r2
-	beq	1f
-	subs	#2,er1
-	mov.l	er1,@er0
-1:
-	and.w	#0xff,e1
-	mov.l	er1,er0
-	jsr	@trace_trap
-	jmp	@ret_from_exception
-
-_nmi:
-	subs	#4, sp
-	mov.l	er0, @-sp
-	mov.l	@_interrupt_redirect_table, er0
-	add.l	#8*4, er0
-	mov.l	er0, @(4,sp)
-	mov.l	@sp+, er0
-	jmp	@_interrupt_entry
-
-#if defined(CONFIG_KGDB)
-_kgdb_trap:
-	subs	#4,sp
-	SAVE_ALL
-	mov.l	sp,er0
-	add.l	#LRET,er0
-	mov.l	er0,@(LSP,sp)
-	jsr	@set_esp0
-	mov.l	sp,er0
-	subs	#4,er0
-	jsr	@h8300_kgdb_trap
-	jmp	@ret_from_exception
-#endif
-
-	.section	.bss
-_sw_ksp:
-	.space	4
-_sw_usp:
-	.space	4
-
-	.end
diff --git a/arch/h8300/kernel/head_ram.S b/arch/h8300/kernel/head_ram.S
deleted file mode 100644
index dbf8429f5fab5f91071242a059f159fbe92c0c62..0000000000000000000000000000000000000000
--- a/arch/h8300/kernel/head_ram.S
+++ /dev/null
@@ -1,61 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#include <linux/sys.h>
-#include <linux/init.h>
-#include <asm/unistd.h>
-#include <asm/setup.h>
-#include <asm/segment.h>
-#include <asm/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/errno.h>
-
-#if defined(CONFIG_CPU_H8300H)
-	.h8300h
-#define SYSCR 0xfee012
-#define IRAMTOP 0xffff20
-#endif
-#if defined(CONFIG_CPU_H8S)
-	.h8300s
-#define INTCR 0xffff31
-#define IRAMTOP 0xffc000
-#endif
-
-	__HEAD
-	.global	_start
-_start:
-	mov.l	#IRAMTOP,sp
-	/* .bss clear */
-	mov.l	#_sbss,er5
-	mov.l	#_ebss,er4
-	sub.l	er5,er4
-	shlr	er4
-	shlr	er4
-	sub.l	er2,er2
-1:
-	mov.l	er2,@er5
-	adds	#4,er5
-	dec.l	#1,er4
-	bne	1b
-	jsr	@h8300_fdt_init
-
-	/* linux kernel start */
-#if defined(CONFIG_CPU_H8300H)
-	ldc	#0xd0,ccr	/* running kernel */
-	mov.l	#SYSCR,er0
-	bclr	#3,@er0
-#endif
-#if defined(CONFIG_CPU_H8S)
-	ldc	#0x07,exr
-	bclr	#4,@INTCR:8
-	bset	#5,@INTCR:8	/* Interrupt mode 2 */
-	ldc	#0x90,ccr	/* running kernel */
-#endif
-	mov.l	#init_thread_union,sp
-	add.l	#0x2000,sp
-	jsr	@start_kernel
-
-1:
-	bra	1b
-
-	.end
diff --git a/arch/h8300/kernel/head_rom.S b/arch/h8300/kernel/head_rom.S
deleted file mode 100644
index ab55a9cb2f367cf2cdbe9c2dd18c845b70ca5a82..0000000000000000000000000000000000000000
--- a/arch/h8300/kernel/head_rom.S
+++ /dev/null
@@ -1,111 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/init.h>
-#include <asm/thread_info.h>
-
-#if defined(CONFIG_CPU_H8300H)
-	.h8300h
-#define SYSCR 0xfee012
-#define IRAMTOP 0xffff20
-#define NR_INT 64
-#endif
-#if defined(CONFIG_CPU_H8S)
-	.h8300s
-#define INTCR 0xffff31
-#define IRAMTOP 0xffc000
-#define NR_INT 128
-#endif
-
-	__HEAD
-	.global	_start
-_start:
-	mov.l	#IRAMTOP,sp
-#if !defined(CONFIG_H8300H_SIM) && \
-    !defined(CONFIG_H8S_SIM)
-	jsr	@lowlevel_init
-
-	/* copy .data */
-	mov.l	#_begin_data,er5
-	mov.l	#_sdata,er6
-	mov.l	#_edata,er4
-	sub.l	er6,er4
-	shlr.l	er4
-	shlr.l	er4
-1:
-	mov.l	@er5+,er0
-	mov.l	er0,@er6
-	adds	#4,er6
-	dec.l	#1,er4
-	bne	1b
-	/* .bss clear */
-	mov.l	#_sbss,er5
-	mov.l	#_ebss,er4
-	sub.l	er5,er4
-	shlr	er4
-	shlr	er4
-	sub.l	er0,er0
-1:
-	mov.l	er0,@er5
-	adds	#4,er5
-	dec.l	#1,er4
-	bne	1b
-#else
-	/* get cmdline from gdb */
-	jsr	@0xcc
-	;; er0 - argc
-	;; er1 - argv
-	mov.l	#command_line,er3
-	adds	#4,er1
-	dec.l	#1,er0
-	beq	4f
-1:
-	mov.l	@er1+,er2
-2:
-	mov.b	@er2+,r4l
-	beq	3f
-	mov.b	r4l,@er3
-	adds	#1,er3
-	bra	2b
-3:
-	mov.b	#' ',r4l
-	mov.b	r4l,@er3
-	adds	#1,er3
-	dec.l	#1,er0
-	bne	1b
-	subs	#1,er3
-	mov.b	#0,r4l
-	mov.b	r4l,@er3
-4:
-#endif
-	sub.l	er0,er0
-	jsr	@h8300_fdt_init
-	/* linux kernel start */
-#if defined(CONFIG_CPU_H8300H)
-	ldc	#0xd0,ccr	/* running kernel */
-	mov.l	#SYSCR,er0
-	bclr	#3,@er0
-#endif
-#if defined(CONFIG_CPU_H8S)
-	ldc	#0x07,exr
-	bclr	#4,@INTCR:8
-	bset	#5,@INTCR:8	/* Interrupt mode 2 */
-	ldc	#0x90,ccr	/* running kernel */
-#endif
-	mov.l	#init_thread_union,sp
-	add.l	#0x2000,sp
-	jsr	@start_kernel
-
-1:
-	bra	1b
-
-#if defined(CONFIG_ROMKERNEL)
-	/* interrupt vector */
-	.section .vectors,"ax"
-	.long	_start
-	.long	_start
-vector	=	2
-	.rept	NR_INT - 2
-	.long	_interrupt_redirect_table+vector*4
-vector	=	vector + 1
-	.endr
-#endif
-	.end
diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S
deleted file mode 100644
index 49f716c0a1df977d5981c6b80dc4e126c282cb83..0000000000000000000000000000000000000000
--- a/arch/h8300/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,68 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/page.h>
-#include <asm/thread_info.h>
-
-#define ROMTOP 0x000000
-#define RAMTOP 0x400000
-
-jiffies = jiffies_64 + 4;
-
-ENTRY(_start)
-
-SECTIONS
-{
-#if defined(CONFIG_ROMKERNEL)
-	. = ROMTOP;
-	.vectors :
-	{
-	_vector = . ;
-		*(.vector*)
-	}
-#else
-	. = RAMTOP;
-	_ramstart = .;
-	. = . + CONFIG_OFFSET;
-#endif
-	_text = .;
-	HEAD_TEXT_SECTION
-	.text : {
-	_stext = . ;
-		TEXT_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-#if defined(CONFIG_ROMKERNEL)
-		*(.int_redirect)
-#endif
-	_etext = . ;
-	}
-	EXCEPTION_TABLE(16)
-	NOTES
-	RO_DATA_SECTION(4)
-	ROMEND = .;
-#if defined(CONFIG_ROMKERNEL)
-	. = RAMTOP;
-	_ramstart = .;
-#define ADDR(x) ROMEND
-#endif
-	_sdata = . ;
-	__data_start = . ;
-	RW_DATA_SECTION(0, PAGE_SIZE, THREAD_SIZE)
-#if defined(CONFIG_ROMKERNEL)
-#undef ADDR
-#endif
-	. = ALIGN(0x4) ;
-	__init_begin = .;
-	INIT_TEXT_SECTION(4)
-	INIT_DATA_SECTION(4)
-	__init_end = .;
-	_edata = . ;
-	_begin_data = LOADADDR(.data);
-	_sbss =.;
-	BSS_SECTION(0, 0 ,0)
-	_ebss =.;
-	_ramend = .;
-	_end = .;
-	DISCARDS
-}
diff --git a/arch/h8300/lib/abs.S b/arch/h8300/lib/abs.S
deleted file mode 100644
index 6e1a4ed3af53369871ac530a282953bea89c20d0..0000000000000000000000000000000000000000
--- a/arch/h8300/lib/abs.S
+++ /dev/null
@@ -1,21 +0,0 @@
-;;; SPDX-License-Identifier: GPL-2.0
-;;; abs.S
-
-#include <asm/linkage.h>
-
-#if defined(CONFIG_CPU_H8300H)
-	.h8300h
-#endif
-#if defined(CONFIG_CPU_H8S)
-	.h8300s
-#endif
-	.text
-.global _abs
-
-;;; int abs(int n)
-_abs:
-	mov.l	er0,er0
-	bpl	1f
-	neg.l	er0
-1:
-	rts
diff --git a/arch/h8300/lib/memcpy.S b/arch/h8300/lib/memcpy.S
deleted file mode 100644
index f1cd67d5e3ea68e165402673f01bab686bc32b81..0000000000000000000000000000000000000000
--- a/arch/h8300/lib/memcpy.S
+++ /dev/null
@@ -1,86 +0,0 @@
-;;; SPDX-License-Identifier: GPL-2.0
-;;; memcpy.S
-
-#include <asm/linkage.h>
-
-#if defined(CONFIG_CPU_H8300H)
-	.h8300h
-#endif
-#if defined(CONFIG_CPU_H8S)
-	.h8300s
-#endif
-	.text
-.global memcpy
-
-;;; void *memcpy(void *to, void *from, size_t n)
-memcpy:
-	mov.l	er2,er2
-	bne	1f
-	rts
-1:
-	;; address check
-	bld	#0,r0l
-	bxor	#0,r1l
-	bcs	4f
-	mov.l	er4,@-sp
-	mov.l	er0,@-sp
-	btst	#0,r0l
-	beq	1f
-	;; (aligned even) odd address
-	mov.b	@er1,r3l
-	mov.b	r3l,@er0
-	adds	#1,er1
-	adds	#1,er0
-	dec.l	#1,er2
-	beq	3f
-1:
-	;; n < sizeof(unsigned long) check
-	sub.l	er4,er4
-	adds	#4,er4		; loop count check value
-	cmp.l	er4,er2
-	blo	2f
-	;; unsigned long copy
-1:
-	mov.l	@er1,er3
-	mov.l	er3,@er0
-	adds	#4,er0
-	adds	#4,er1
-	subs	#4,er2
-	cmp.l	er4,er2
-	bcc	1b
-	;; rest
-2:
-	mov.l	er2,er2
-	beq	3f
-1:
-	mov.b	@er1,r3l
-	mov.b	r3l,@er0
-	adds	#1,er1
-	adds	#1,er0
-	dec.l	#1,er2
-	bne	1b
-3:
-	mov.l	@sp+,er0
-	mov.l	@sp+,er4
-	rts
-
-	;; odd <- even / even <- odd
-4:
-	mov.l	er4,er3
-	mov.l	er2,er4
-	mov.l	er5,er2
-	mov.l	er1,er5
-	mov.l	er6,er1
-	mov.l	er0,er6
-1:
-	eepmov.w
-	mov.w	r4,r4
-	bne	1b
-	dec.w	#1,e4
-	bpl	1b
-	mov.l	er1,er6
-	mov.l	er2,er5
-	mov.l	er3,er4
-	rts
-
-	.end
diff --git a/arch/h8300/lib/memset.S b/arch/h8300/lib/memset.S
deleted file mode 100644
index 2d1abc37fd08b0a78856190fdb00e41f4a264fca..0000000000000000000000000000000000000000
--- a/arch/h8300/lib/memset.S
+++ /dev/null
@@ -1,70 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* memset.S */
-
-#include <asm/linkage.h>
-
-#if defined(CONFIG_CPU_H8300H)
-	.h8300h
-#endif
-#if defined(CONFIG_CPU_H8S)
-	.h8300s
-#endif
-	.text
-
-.global	memset
-.global	clear_user
-
-;;void *memset(*ptr, int c, size_t count)
-;; ptr = er0
-;; c   = er1(r1l)
-;; count = er2
-memset:
-	btst	#0,r0l
-	beq	2f
-
-	;; odd address
-1:
-	mov.b	r1l,@er0
-	adds	#1,er0
-	dec.l	#1,er2
-	beq	6f
-
-	;; even address
-2:
-	mov.l	er2,er3
-	cmp.l	#4,er2
-	blo	4f
-	;; count>=4 -> count/4
-#if defined(CONFIG_CPU_H8300H)
-	shlr.l	er2
-	shlr.l	er2
-#endif
-#if defined(CONFIG_CPU_H8S)
-	shlr.l	#2,er2
-#endif
-	;; byte -> long
-	mov.b	r1l,r1h
-	mov.w	r1,e1
-3:
-	mov.l	er1,@er0
-	adds	#4,er0
-	dec.l	#1,er2
-	bne	3b
-4:
-	;; count % 4
-	and.b	#3,r3l
-	beq	6f
-5:
-	mov.b	r1l,@er0
-	adds	#1,er0
-	dec.b	r3l
-	bne	5b
-6:
-	rts
-
-clear_user:
-	mov.l	er1, er2
-	sub.l	er1, er1
-	bra	memset
-
-	.end
diff --git a/arch/h8300/lib/moddivsi3.S b/arch/h8300/lib/moddivsi3.S
deleted file mode 100644
index 9e33ab0456c7501d89e9fedba025b50e1ed584ea..0000000000000000000000000000000000000000
--- a/arch/h8300/lib/moddivsi3.S
+++ /dev/null
@@ -1,73 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include "libgcc.h"
-
-; numerator in A0/A1
-; denominator in A2/A3
-	.global	__modsi3
-__modsi3:
-	PUSHP	S2P
-	bsr	modnorm
-	bsr	__divsi3
-	mov.l	er3,er0
-	bra	exitdiv
-
-	.global	__umodsi3
-__umodsi3:
-	bsr	__udivsi3:16
-	mov.l	er3,er0
-	rts
-
-	.global	__divsi3
-__divsi3:
-	PUSHP	S2P
-	bsr	divnorm
-	bsr	__udivsi3:16
-
-	; examine what the sign should be
-exitdiv:
-	btst	#3,S2L
-	beq	reti
-
-	; should be -ve
-	neg.l	A0P
-
-reti:
-	POPP	S2P
-	rts
-
-divnorm:
-	mov.l	A0P,A0P		; is the numerator -ve
-	stc	ccr,S2L		; keep the sign in bit 3 of S2L
-	bge	postive
-
-	neg.l	A0P		; negate arg
-
-postive:
-	mov.l	A1P,A1P		; is the denominator -ve
-	bge	postive2
-
-	neg.l	A1P		; negate arg
-	xor.b	#0x08,S2L	; toggle the result sign
-
-postive2:
-	rts
-
-;; Basically the same, except that the sign of the divisor determines
-;; the sign.
-modnorm:
-	mov.l	A0P,A0P		; is the numerator -ve
-	stc	ccr,S2L		; keep the sign in bit 3 of S2L
-	bge	mpostive
-
-	neg.l	A0P		; negate arg
-
-mpostive:
-	mov.l	A1P,A1P		; is the denominator -ve
-	bge	mpostive2
-
-	neg.l	A1P		; negate arg
-
-mpostive2:
-	rts
-
-	.end
diff --git a/arch/h8300/lib/modsi3.S b/arch/h8300/lib/modsi3.S
deleted file mode 100644
index bdc8a002921df129a5ff9a584b87a96035cb483d..0000000000000000000000000000000000000000
--- a/arch/h8300/lib/modsi3.S
+++ /dev/null
@@ -1,73 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include "libgcc.h"
-
-; numerator in A0/A1
-; denominator in A2/A3
-	.global	__modsi3
-__modsi3:
-	PUSHP	S2P
-	bsr	modnorm
-	bsr	__divsi3
-	mov.l	er3,er0
-	bra	exitdiv
-
-	.global	__umodsi3
-__umodsi3:
-	bsr	__udivsi3
-	mov.l	er3,er0
-	rts
-
-	.global	__divsi3
-__divsi3:
-	PUSHP	S2P
-	jsr	divnorm
-	bsr	__udivsi3
-
-	; examine what the sign should be
-exitdiv:
-	btst	#3,S2L
-	beq	reti
-
-	; should be -ve
-	neg.l	A0P
-
-reti:
-	POPP	S2P
-	rts
-
-divnorm:
-	mov.l	A0P,A0P		; is the numerator -ve
-	stc	ccr,S2L		; keep the sign in bit 3 of S2L
-	bge	postive
-
-	neg.l	A0P		; negate arg
-
-postive:
-	mov.l	A1P,A1P		; is the denominator -ve
-	bge	postive2
-
-	neg.l	A1P		; negate arg
-	xor.b	#0x08,S2L	; toggle the result sign
-
-postive2:
-	rts
-
-;; Basically the same, except that the sign of the divisor determines
-;; the sign.
-modnorm:
-	mov.l	A0P,A0P		; is the numerator -ve
-	stc	ccr,S2L		; keep the sign in bit 3 of S2L
-	bge	mpostive
-
-	neg.l	A0P		; negate arg
-
-mpostive:
-	mov.l	A1P,A1P		; is the denominator -ve
-	bge	mpostive2
-
-	neg.l	A1P		; negate arg
-
-mpostive2:
-	rts
-
-	.end
diff --git a/arch/h8300/lib/mulsi3.S b/arch/h8300/lib/mulsi3.S
deleted file mode 100644
index 5a062fd298d128b0d7f505655e908448b6557388..0000000000000000000000000000000000000000
--- a/arch/h8300/lib/mulsi3.S
+++ /dev/null
@@ -1,39 +0,0 @@
-; SPDX-License-Identifier: GPL-2.0
-;
-; mulsi3 for H8/300H - based on Renesas SH implementation
-;
-; by Toshiyasu Morita
-;
-; Old code:
-;
-; 16b * 16b = 372 states (worst case)
-; 32b * 32b = 724 states (worst case)
-;
-; New code:
-;
-; 16b * 16b =  48 states
-; 16b * 32b =  72 states
-; 32b * 32b =  92 states
-;
-
-	.global __mulsi3
-__mulsi3:
-	mov.w	r1,r2   ; ( 2 states) b * d
-	mulxu	r0,er2  ; (22 states)
-
-	mov.w	e0,r3   ; ( 2 states) a * d
-	beq	L_skip1 ; ( 4 states)
-	mulxu	r1,er3  ; (22 states)
-	add.w	r3,e2   ; ( 2 states)
-
-L_skip1:
-	mov.w	e1,r3   ; ( 2 states) c * b
-	beq	L_skip2 ; ( 4 states)
-	mulxu	r0,er3  ; (22 states)
-	add.w	r3,e2   ; ( 2 states)
-
-L_skip2:
-	mov.l	er2,er0	; ( 2 states)
-	rts		; (10 states)
-
-	.end
diff --git a/arch/h8300/lib/strncpy.S b/arch/h8300/lib/strncpy.S
deleted file mode 100644
index 8b65d7c4727b280ee30b406a1091642683369f04..0000000000000000000000000000000000000000
--- a/arch/h8300/lib/strncpy.S
+++ /dev/null
@@ -1,35 +0,0 @@
-;;; SPDX-License-Identifier: GPL-2.0
-;;; strncpy.S
-
-#include <asm/linkage.h>
-
-	.text
-.global strncpy_from_user
-
-;;; long strncpy_from_user(void *to, void *from, size_t n)
-strncpy_from_user:
-	mov.l	er2,er2
-	bne	1f
-	sub.l	er0,er0
-	rts
-1:
-	mov.l	er4,@-sp
-	sub.l	er3,er3
-2:
-	mov.b	@er1+,r4l
-	mov.b	r4l,@er0
-	adds	#1,er0
-	beq	3f
-	inc.l	#1,er3
-	dec.l	#1,er2
-	bne	2b
-3:
-	dec.l	#1,er2
-4:
-	mov.b	r4l,@er0
-	adds	#1,er0
-	dec.l	#1,er2
-	bne	4b
-	mov.l	er3,er0
-	mov.l	@sp+,er4
-	rts
diff --git a/arch/h8300/lib/udivsi3.S b/arch/h8300/lib/udivsi3.S
deleted file mode 100644
index b810aba8e1009603c5cf07b88ad3c5ebd3333f4b..0000000000000000000000000000000000000000
--- a/arch/h8300/lib/udivsi3.S
+++ /dev/null
@@ -1,77 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include "libgcc.h"
-
-	;; This function also computes the remainder and stores it in er3.
-	.global	__udivsi3
-__udivsi3:
-	mov.w	A1E,A1E		; denominator top word 0?
-	bne	DenHighNonZero
-
-	; do it the easy way, see page 107 in manual
-	mov.w	A0E,A2
-	extu.l	A2P
-	divxu.w	A1,A2P
-	mov.w	A2E,A0E
-	divxu.w	A1,A0P
-	mov.w	A0E,A3
-	mov.w	A2,A0E
-	extu.l	A3P
-	rts
-
-	; er0 = er0 / er1
-	; er3 = er0 % er1
-	; trashes er1 er2
-	; expects er1 >= 2^16
-DenHighNonZero:
-	mov.l	er0,er3
-	mov.l	er1,er2
-#ifdef CONFIG_CPU_H8300H
-divmod_L21:
-	shlr.l	er0
-	shlr.l	er2		; make divisor < 2^16
-	mov.w	e2,e2
-	bne	divmod_L21
-#else
-	shlr.l	#2,er2		; make divisor < 2^16
-	mov.w	e2,e2
-	beq	divmod_L22A
-divmod_L21:
-	shlr.l	#2,er0
-divmod_L22:
-	shlr.l	#2,er2		; make divisor < 2^16
-	mov.w	e2,e2
-	bne	divmod_L21
-divmod_L22A:
-	rotxl.w	r2
-	bcs	divmod_L23
-	shlr.l	er0
-	bra	divmod_L24
-divmod_L23:
-	rotxr.w	r2
-	shlr.l	#2,er0
-divmod_L24:
-#endif
-	;; At this point,
-	;;  er0 contains shifted dividend
-	;;  er1 contains divisor
-	;;  er2 contains shifted divisor
-	;;  er3 contains dividend, later remainder
-	divxu.w	r2,er0		; r0 now contains the approximate quotient (AQ)
-	extu.l	er0
-	beq	divmod_L25
-	subs	#1,er0		; er0 = AQ - 1
-	mov.w	e1,r2
-	mulxu.w	r0,er2		; er2 = upper (AQ - 1) * divisor
-	sub.w	r2,e3		; dividend - 65536 * er2
-	mov.w	r1,r2
-	mulxu.w	r0,er2		; compute er3 = remainder (tentative)
-	sub.l	er2,er3		; er3 = dividend - (AQ - 1) * divisor
-divmod_L25:
-	cmp.l	er1,er3		; is divisor < remainder?
-	blo	divmod_L26
-	adds	#1,er0
-	sub.l	er1,er3		; correct the remainder
-divmod_L26:
-	rts
-
-	.end
diff --git a/arch/hexagon/kernel/head.S b/arch/hexagon/kernel/head.S
deleted file mode 100644
index 0b016308cc79f041579cfcf8929afe1e5b2b6b5f..0000000000000000000000000000000000000000
--- a/arch/hexagon/kernel/head.S
+++ /dev/null
@@ -1,223 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Early kernel startup code for Hexagon
- *
- * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/asm-offsets.h>
-#include <asm/mem-layout.h>
-#include <asm/vm_mmu.h>
-#include <asm/page.h>
-#include <asm/hexagon_vm.h>
-
-#define SEGTABLE_ENTRIES #0x0e0
-
-	__INIT
-ENTRY(stext)
-	/*
-	 * VMM will already have set up true vector page, MMU, etc.
-	 * To set up initial kernel identity map, we have to pass
-	 * the VMM a pointer to some canonical page tables. In
-	 * this implementation, we're assuming that we've got
-	 * them precompiled. Generate value in R24, as we'll need
-	 * it again shortly.
-	 */
-	r24.L = #LO(swapper_pg_dir)
-	r24.H = #HI(swapper_pg_dir)
-
-	/*
-	 * Symbol is kernel segment address, but we need
-	 * the logical/physical address.
-	 */
-	r25 = pc;
-	r2.h = #0xffc0;
-	r2.l = #0x0000;
-	r25 = and(r2,r25);	/*  R25 holds PHYS_OFFSET now  */
-	r1.h = #HI(PAGE_OFFSET);
-	r1.l = #LO(PAGE_OFFSET);
-	r24 = sub(r24,r1);	/* swapper_pg_dir - PAGE_OFFSET */
-	r24 = add(r24,r25);	/* + PHYS_OFFSET */
-
-	r0 = r24;  /* aka __pa(swapper_pg_dir)  */
-
-	/*
-	 * Initialize page dir to make the virtual and physical
-	 * addresses where the kernel was loaded be identical.
-	 * Done in 4MB chunks.
-	 */
-#define PTE_BITS ( __HVM_PTE_R | __HVM_PTE_W | __HVM_PTE_X	\
-		  | __HEXAGON_C_WB_L2 << 6			\
-		  | __HVM_PDE_S_4MB)
-
-	/*
-	 * Get number of VA=PA entries; only really needed for jump
-	 * to hyperspace; gets blown away immediately after
-	 */
-
-	{
-		r1.l = #LO(_end);
-		r2.l = #LO(stext);
-		r3 = #1;
-	}
-	{
-		r1.h = #HI(_end);
-		r2.h = #HI(stext);
-		r3 = asl(r3, #22);
-	}
-	{
-		r1 = sub(r1, r2);
-		r3 = add(r3, #-1);
-	}  /* r1 =  _end - stext  */
-	r1 = add(r1, r3);  /*  + (4M-1) */
-	r26 = lsr(r1, #22); /*  / 4M = # of entries */
-
-	r1 = r25;
-	r2.h = #0xffc0;
-	r2.l = #0x0000;		/* round back down to 4MB boundary  */
-	r1 = and(r1,r2);
-	r2 = lsr(r1, #22)	/* 4MB page number		*/
-	r2 = asl(r2, #2)	/* times sizeof(PTE) (4bytes)	*/
-	r0 = add(r0,r2)		/* r0 = address of correct PTE	*/
-	r2 = #PTE_BITS
-	r1 = add(r1,r2)		/* r1 = 4MB PTE for the first entry	*/
-	r2.h = #0x0040
-	r2.l = #0x0000		/* 4MB increments */
-	loop0(1f,r26);
-1:
-	memw(r0 ++ #4) = r1
-	{ r1 = add(r1, r2); } :endloop0
-
-	/*  Also need to overwrite the initial 0xc0000000 entries  */
-	/*  PAGE_OFFSET >> (4MB shift - 4 bytes per entry shift)  */
-	R1.H = #HI(PAGE_OFFSET >> (22 - 2))
-	R1.L = #LO(PAGE_OFFSET >> (22 - 2))
-
-	r0 = add(r1, r24);	/* advance to 0xc0000000 entry */
-	r1 = r25;
-	r2.h = #0xffc0;
-	r2.l = #0x0000;		/* round back down to 4MB boundary  */
-	r1 = and(r1,r2);	/* for huge page */
-	r2 = #PTE_BITS
-	r1 = add(r1,r2);
-	r2.h = #0x0040
-	r2.l = #0x0000		/* 4MB increments */
-
-	loop0(1f,SEGTABLE_ENTRIES);
-1:
-	memw(r0 ++ #4) = r1;
-	{ r1 = add(r1,r2); } :endloop0
-
-	r0 = r24;
-
-	/*
-	 * The subroutine wrapper around the virtual instruction touches
-	 * no memory, so we should be able to use it even here.
-	 * Note that in this version, R1 and R2 get "clobbered"; see
-	 * vm_ops.S
-	 */
-	r1 = #VM_TRANS_TYPE_TABLE
-	call	__vmnewmap;
-
-	/*  Jump into virtual address range.  */
-
-	r31.h = #hi(__head_s_vaddr_target)
-	r31.l = #lo(__head_s_vaddr_target)
-	jumpr r31
-
-	/*  Insert trippy space effects.  */
-
-__head_s_vaddr_target:
-	/*
-	 * Tear down VA=PA translation now that we are running
-	 * in kernel virtual space.
-	 */
-	r0 = #__HVM_PDE_S_INVALID
-
-	r1.h = #0xffc0;
-	r1.l = #0x0000;
-	r2 = r25;		/* phys_offset */
-	r2 = and(r1,r2);
-
-	r1.l = #lo(swapper_pg_dir)
-	r1.h = #hi(swapper_pg_dir)
-	r2 = lsr(r2, #22)	/* 4MB page number		*/
-	r2 = asl(r2, #2)	/* times sizeof(PTE) (4bytes)	*/
-	r1 = add(r1,r2);
-	loop0(1f,r26)
-
-1:
-	{
-		memw(R1 ++ #4) = R0
-	}:endloop0
-
-	r0 = r24
-	r1 = #VM_TRANS_TYPE_TABLE
-	call __vmnewmap
-
-	/*  Go ahead and install the trap0 return so angel calls work  */
-	r0.h = #hi(_K_provisional_vec)
-	r0.l = #lo(_K_provisional_vec)
-	call __vmsetvec
-
-	/*
-	 * OK, at this point we should start to be much more careful,
-	 * we're going to enter C code and start touching memory
-	 * in all sorts of places.
-	 * This means:
-	 *      SGP needs to be OK
-	 *	Need to lock shared resources
-	 *	A bunch of other things that will cause
-	 * 	all kinds of painful bugs
-	 */
-
-	/*
-	 * Stack pointer should be pointed at the init task's
-	 * thread stack, which should have been declared in arch/init_task.c.
-	 * So uhhhhh...
-	 * It's accessible via the init_thread_union, which is a union
-	 * of a thread_info struct and a stack; of course, the top
-	 * of the stack is not for you.  The end of the stack
-	 * is simply init_thread_union + THREAD_SIZE.
-	 */
-
-	{r29.H = #HI(init_thread_union); r0.H = #HI(_THREAD_SIZE); }
-	{r29.L = #LO(init_thread_union); r0.L = #LO(_THREAD_SIZE); }
-
-	/*  initialize the register used to point to current_thread_info */
-	/*  Fixme:  THREADINFO_REG can't be R2 because of that memset thing. */
-	{r29 = add(r29,r0); THREADINFO_REG = r29; }
-
-	/*  Hack:  zero bss; */
-	{ r0.L = #LO(__bss_start);  r1 = #0; r2.l = #LO(__bss_stop); }
-	{ r0.H = #HI(__bss_start);           r2.h = #HI(__bss_stop); }
-
-	r2 = sub(r2,r0);
-	call memset;
-
-	/*  Set PHYS_OFFSET; should be in R25 */
-#ifdef CONFIG_HEXAGON_PHYS_OFFSET
-	r0.l = #LO(__phys_offset);
-	r0.h = #HI(__phys_offset);
-	memw(r0) = r25;
-#endif
-
-	/* Time to make the doughnuts.   */
-	call start_kernel
-
-	/*
-	 * Should not reach here.
-	 */
-1:
-	jump 1b
-
-.p2align PAGE_SHIFT
-ENTRY(external_cmdline_buffer)
-        .fill _PAGE_SIZE,1,0
-
-.data
-.p2align PAGE_SHIFT
-ENTRY(empty_zero_page)
-        .fill _PAGE_SIZE,1,0
diff --git a/arch/hexagon/kernel/trampoline.S b/arch/hexagon/kernel/trampoline.S
deleted file mode 100644
index 58f631870f7e3ae780da2f16df052c4d4df533cc..0000000000000000000000000000000000000000
--- a/arch/hexagon/kernel/trampoline.S
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-/*
- * Trampoline sequences to be copied onto user stack.
- * This consumes a little more space than hand-assembling
- * immediate constants for use in C, but is more portable
- * to future tweaks to the Hexagon instruction set.
- */
-
-#include <asm/unistd.h>
-
-/*  Sig trampolines - call sys_sigreturn or sys_rt_sigreturn as appropriate */
-
-/*  plain sigreturn is gone.  */
-
-	.globl __rt_sigtramp_template
-__rt_sigtramp_template:
-	r6 = #__NR_rt_sigreturn;
-	trap0(#1);
diff --git a/arch/hexagon/kernel/vm_entry.S b/arch/hexagon/kernel/vm_entry.S
deleted file mode 100644
index 4023fdbea4902e090e0798cf2c2f1006b1d2b259..0000000000000000000000000000000000000000
--- a/arch/hexagon/kernel/vm_entry.S
+++ /dev/null
@@ -1,380 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Event entry/exit for Hexagon
- *
- * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
- */
-
-#include <asm/asm-offsets.h>  /*  assembly-safer versions of C defines */
-#include <asm/mem-layout.h>   /*  sigh, except for page_offset  */
-#include <asm/hexagon_vm.h>
-#include <asm/thread_info.h>
-
-/*
- * Entry into guest-mode Linux under Hexagon Virtual Machine.
- * Stack pointer points to event record - build pt_regs on top of it,
- * set up a plausible C stack frame, and dispatch to the C handler.
- * On return, do vmrte virtual instruction with SP where we started.
- *
- * VM Spec 0.5 uses a trap to fetch HVM record now.
- */
-
-/*
- * Save full register state, while setting up thread_info struct
- * pointer derived from kernel stack pointer in THREADINFO_REG
- * register, putting prior thread_info.regs pointer in a callee-save
- * register (R24, which had better not ever be assigned to THREADINFO_REG),
- * and updating thread_info.regs to point to current stack frame,
- * so as to support nested events in kernel mode.
- *
- * As this is common code, we set the pt_regs system call number
- * to -1 for all events.  It will be replaced with the system call
- * number in the case where we decode a system call (trap0(#1)).
- */
-
-#if CONFIG_HEXAGON_ARCH_VERSION < 4
-#define save_pt_regs()\
- memd(R0 + #_PT_R3130) = R31:30; \
- { memw(R0 + #_PT_R2928) = R28; \
-   R31 = memw(R0 + #_PT_ER_VMPSP); }\
- { memw(R0 + #(_PT_R2928 + 4)) = R31; \
-   R31 = ugp; } \
- { memd(R0 + #_PT_R2726) = R27:26; \
-   R30 = gp ; } \
- memd(R0 + #_PT_R2524) = R25:24; \
- memd(R0 + #_PT_R2322) = R23:22; \
- memd(R0 + #_PT_R2120) = R21:20; \
- memd(R0 + #_PT_R1918) = R19:18; \
- memd(R0 + #_PT_R1716) = R17:16; \
- memd(R0 + #_PT_R1514) = R15:14; \
- memd(R0 + #_PT_R1312) = R13:12; \
- { memd(R0 + #_PT_R1110) = R11:10; \
-   R15 = lc0; } \
- { memd(R0 + #_PT_R0908) = R9:8; \
-   R14 = sa0; } \
- { memd(R0 + #_PT_R0706) = R7:6; \
-   R13 = lc1; } \
- { memd(R0 + #_PT_R0504) = R5:4; \
-   R12 = sa1; } \
- { memd(R0 + #_PT_GPUGP) = R31:30; \
-   R11 = m1; \
-   R2.H = #HI(_THREAD_SIZE); } \
- { memd(R0 + #_PT_LC0SA0) = R15:14; \
-   R10 = m0; \
-   R2.L = #LO(_THREAD_SIZE); } \
- { memd(R0 + #_PT_LC1SA1) = R13:12; \
-   R15 = p3:0; \
-   R2 = neg(R2); } \
- { memd(R0 + #_PT_M1M0) = R11:10; \
-   R14  = usr; \
-   R2 = and(R0,R2); } \
- { memd(R0 + #_PT_PREDSUSR) =  R15:14; \
-   THREADINFO_REG = R2; } \
- { r24 = memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS); \
-   memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS) = R0; \
-   R2 = #-1; } \
- { memw(R0 + #_PT_SYSCALL_NR) = R2; \
-   R30 = #0; }
-#else
-/* V4+ */
-/* the # ## # syntax inserts a literal ## */
-#define save_pt_regs()\
-	{ memd(R0 + #_PT_R3130) = R31:30; \
-		R30 = memw(R0 + #_PT_ER_VMPSP); }\
-	{ memw(R0 + #_PT_R2928) = R28; \
-		memw(R0 + #(_PT_R2928 + 4)) = R30; }\
-	{ R31:30 = C11:10; \
-		memd(R0 + #_PT_R2726) = R27:26; \
-		memd(R0 + #_PT_R2524) = R25:24; }\
-	{ memd(R0 + #_PT_R2322) = R23:22; \
-		memd(R0 + #_PT_R2120) = R21:20; }\
-	{ memd(R0 + #_PT_R1918) = R19:18; \
-		memd(R0 + #_PT_R1716) = R17:16; }\
-	{ memd(R0 + #_PT_R1514) = R15:14; \
-		memd(R0 + #_PT_R1312) = R13:12; \
-		R17:16 = C13:12; }\
-	{ memd(R0 + #_PT_R1110) = R11:10; \
-		memd(R0 + #_PT_R0908) = R9:8; \
-	  R15:14 = C1:0; } \
-	{ memd(R0 + #_PT_R0706) = R7:6; \
-		memd(R0 + #_PT_R0504) = R5:4; \
-    R13:12 = C3:2; } \
-	{ memd(R0 + #_PT_GPUGP) = R31:30; \
-		memd(R0 + #_PT_LC0SA0) = R15:14; \
-	  R11:10 = C7:6; }\
-	{	THREADINFO_REG = and(R0, # ## #-_THREAD_SIZE); \
-		memd(R0 + #_PT_LC1SA1) = R13:12; \
-	  R15 = p3:0; }\
-	{ memd(R0 + #_PT_M1M0) = R11:10; \
-		memw(R0 + #_PT_PREDSUSR + 4) =  R15; }\
-	{ r24 = memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS); \
-	  memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS) = R0; \
-	  R2 = #-1; } \
-	{ memw(R0 + #_PT_SYSCALL_NR) = R2; \
-		memd(R0 + #_PT_CS1CS0) = R17:16; \
-	  R30 = #0; }
-#endif
-
-/*
- * Restore registers and thread_info.regs state. THREADINFO_REG
- * is assumed to still be sane, and R24 to have been correctly
- * preserved. Don't restore R29 (SP) until later.
- */
-
-#if CONFIG_HEXAGON_ARCH_VERSION < 4
-#define restore_pt_regs() \
-	{ memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS) = R24; \
-	  R15:14 = memd(R0 + #_PT_PREDSUSR); } \
-	{ R11:10 = memd(R0 + #_PT_M1M0); \
-	  p3:0 = R15; } \
-	{ R13:12 = memd(R0 + #_PT_LC1SA1); \
-	  usr = R14; } \
-	{ R15:14 = memd(R0 + #_PT_LC0SA0); \
-	  m1 = R11; } \
-	{ R3:2 = memd(R0 + #_PT_R0302); \
-	  m0 = R10; } \
-	{ R5:4 = memd(R0 + #_PT_R0504); \
-	  lc1 = R13; } \
-	{ R7:6 = memd(R0 + #_PT_R0706); \
-	  sa1 = R12; } \
-	{ R9:8 = memd(R0 + #_PT_R0908); \
-	  lc0 = R15; } \
-	{ R11:10 = memd(R0 + #_PT_R1110); \
-	  sa0 = R14; } \
-	{ R13:12 = memd(R0 + #_PT_R1312); \
-	  R15:14 = memd(R0 + #_PT_R1514); } \
-	{ R17:16 = memd(R0 + #_PT_R1716); \
-	  R19:18 = memd(R0 + #_PT_R1918); } \
-	{ R21:20 = memd(R0 + #_PT_R2120); \
-	  R23:22 = memd(R0 + #_PT_R2322); } \
-	{ R25:24 = memd(R0 + #_PT_R2524); \
-	  R27:26 = memd(R0 + #_PT_R2726); } \
-	R31:30 = memd(R0 + #_PT_GPUGP); \
-	{ R28 = memw(R0 + #_PT_R2928); \
-	  ugp = R31; } \
-	{ R31:30 = memd(R0 + #_PT_R3130); \
-	  gp = R30; }
-#else
-/* V4+ */
-#define restore_pt_regs() \
-	{ memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS) = R24; \
-	  R15:14 = memd(R0 + #_PT_PREDSUSR); } \
-	{ R11:10 = memd(R0 + #_PT_M1M0); \
-		R13:12 = memd(R0 + #_PT_LC1SA1); \
-		p3:0 = R15; } \
-	{ R15:14 = memd(R0 + #_PT_LC0SA0); \
-		R3:2 = memd(R0 + #_PT_R0302); \
-		usr = R14; } \
-	{ R5:4 = memd(R0 + #_PT_R0504); \
-		R7:6 = memd(R0 + #_PT_R0706); \
-		C7:6 = R11:10; }\
-	{ R9:8 = memd(R0 + #_PT_R0908); \
-		R11:10 = memd(R0 + #_PT_R1110); \
-    C3:2 = R13:12; }\
-	{ R13:12 = memd(R0 + #_PT_R1312); \
-	  R15:14 = memd(R0 + #_PT_R1514); \
-		C1:0 = R15:14; }\
-	{ R17:16 = memd(R0 + #_PT_R1716); \
-	  R19:18 = memd(R0 + #_PT_R1918); } \
-	{ R21:20 = memd(R0 + #_PT_R2120); \
-	  R23:22 = memd(R0 + #_PT_R2322); } \
-	{ R25:24 = memd(R0 + #_PT_R2524); \
-	  R27:26 = memd(R0 + #_PT_R2726); } \
-	R31:30 = memd(R0 + #_PT_CS1CS0); \
-	{ C13:12 = R31:30; \
-		R31:30 = memd(R0 + #_PT_GPUGP) ; \
-		R28 = memw(R0 + #_PT_R2928); }\
-	{ C11:10 = R31:30; \
-		R31:30 = memd(R0 + #_PT_R3130); }
-#endif
-
-	/*
-	 * Clears off enough space for the rest of pt_regs; evrec is a part
-	 * of pt_regs in HVM mode.  Save R0/R1, set handler's address in R1.
-	 * R0 is the address of pt_regs and is the parameter to save_pt_regs.
-	 */
-
-/*
- * Since the HVM isn't automagically pushing the EVREC onto the stack anymore,
- * we'll subract the entire size out and then fill it in ourselves.
- * Need to save off R0, R1, R2, R3 immediately.
- */
-
-#if CONFIG_HEXAGON_ARCH_VERSION < 4
-#define	vm_event_entry(CHandler) \
-	{ \
-		R29 = add(R29, #-(_PT_REGS_SIZE)); \
-		memd(R29 + #(_PT_R0100 + -_PT_REGS_SIZE)) = R1:0; \
-	} \
-	{ \
-		memd(R29 +#_PT_R0302) = R3:2; \
-	} \
-	trap1(#HVM_TRAP1_VMGETREGS); \
-	{ \
-		memd(R29 + #_PT_ER_VMEL) = R1:0; \
-		R0 = R29; \
-		R1.L = #LO(CHandler); \
-	} \
-	{ \
-		memd(R29 + #_PT_ER_VMPSP) = R3:2; \
-		R1.H = #HI(CHandler); \
-		jump event_dispatch; \
-	}
-#else
-/* V4+ */
-/* turn on I$ prefetch early */
-/* the # ## # syntax inserts a literal ## */
-#define	vm_event_entry(CHandler) \
-	{ \
-		R29 = add(R29, #-(_PT_REGS_SIZE)); \
-		memd(R29 + #(_PT_R0100 + -_PT_REGS_SIZE)) = R1:0; \
-		memd(R29 + #(_PT_R0302 + -_PT_REGS_SIZE)) = R3:2; \
-		R0 = usr; \
-	} \
-	{ \
-		memw(R29 + #_PT_PREDSUSR) = R0; \
-		R0 = setbit(R0, #16); \
-	} \
-	usr = R0; \
-	R1:0 = G1:0; \
-	{ \
-		memd(R29 + #_PT_ER_VMEL) = R1:0; \
-		R1 = # ## #(CHandler); \
-		R3:2 = G3:2; \
-	} \
-	{ \
-		R0 = R29; \
-		memd(R29 + #_PT_ER_VMPSP) = R3:2; \
-		jump event_dispatch; \
-	}
-#endif
-
-.text
-	/*
-	 * Do bulk save/restore in one place.
-	 * Adds a jump to dispatch latency, but
-	 * saves hundreds of bytes.
-	 */
-
-event_dispatch:
-	save_pt_regs()
-	callr	r1
-
-	/*
-	 * Coming back from the C-world, our thread info pointer
-	 * should be in the designated register (usually R19)
-	 *
-	 * If we were in kernel mode, we don't need to check scheduler
-	 * or signals if CONFIG_PREEMPT is not set.  If set, then it has
-	 * to jump to a need_resched kind of block.
-	 * BTW, CONFIG_PREEMPT is not supported yet.
-	 */
-
-#ifdef CONFIG_PREEMPT
-	R0 = #VM_INT_DISABLE
-	trap1(#HVM_TRAP1_VMSETIE)
-#endif
-
-	/*  "Nested control path" -- if the previous mode was kernel  */
-	{
-		R0 = memw(R29 + #_PT_ER_VMEST);
-		R26.L = #LO(do_work_pending);
-	}
-	{
-		P0 = tstbit(R0, #HVM_VMEST_UM_SFT);
-		if (!P0.new) jump:nt restore_all;
-		R26.H = #HI(do_work_pending);
-		R0 = #VM_INT_DISABLE;
-	}
-
-	/*
-	 * Check also the return from fork/system call, normally coming back from
-	 * user mode
-	 *
-	 * R26 needs to have do_work_pending, and R0 should have VM_INT_DISABLE
-	 */
-
-check_work_pending:
-	/*  Disable interrupts while checking TIF  */
-	trap1(#HVM_TRAP1_VMSETIE)
-	{
-		R0 = R29;  /*  regs should still be at top of stack  */
-		R1 = memw(THREADINFO_REG + #_THREAD_INFO_FLAGS);
-		callr R26;
-	}
-
-	{
-		P0 = cmp.eq(R0, #0); if (!P0.new) jump:nt check_work_pending;
-		R0 = #VM_INT_DISABLE;
-	}
-
-restore_all:
-	/*
-	 * Disable interrupts, if they weren't already, before reg restore.
-	 * R0 gets preloaded with #VM_INT_DISABLE before we get here.
-	 */
-	trap1(#HVM_TRAP1_VMSETIE)
-
-	/*  do the setregs here for VM 0.5  */
-	/*  R29 here should already be pointing at pt_regs  */
-	{
-		R1:0 = memd(R29 + #_PT_ER_VMEL);
-		R3:2 = memd(R29 + #_PT_ER_VMPSP);
-	}
-#if CONFIG_HEXAGON_ARCH_VERSION < 4
-	trap1(#HVM_TRAP1_VMSETREGS);
-#else
-	G1:0 = R1:0;
-	G3:2 = R3:2;
-#endif
-
-	R0 = R29
-	restore_pt_regs()
-	{
-		R1:0 = memd(R29 + #_PT_R0100);
-		R29 = add(R29, #_PT_REGS_SIZE);
-	}
-	trap1(#HVM_TRAP1_VMRTE)
-	/* Notreached */
-
-
-	.globl _K_enter_genex
-_K_enter_genex:
-	vm_event_entry(do_genex)
-
-	.globl _K_enter_interrupt
-_K_enter_interrupt:
-	vm_event_entry(arch_do_IRQ)
-
-	.globl _K_enter_trap0
-_K_enter_trap0:
-	vm_event_entry(do_trap0)
-
-	.globl _K_enter_machcheck
-_K_enter_machcheck:
-	vm_event_entry(do_machcheck)
-
-	.globl _K_enter_debug
-_K_enter_debug:
-	vm_event_entry(do_debug_exception)
-
-	.globl ret_from_fork
-ret_from_fork:
-	{
-		call schedule_tail
-		R26.H = #HI(do_work_pending);
-	}
-	{
-		P0 = cmp.eq(R24, #0);
-		R26.L = #LO(do_work_pending);
-		R0 = #VM_INT_DISABLE;
-	}
-	if (P0) jump check_work_pending
-	{
-		R0 = R25;
-		callr R24
-	}
-	{
-		jump check_work_pending
-		R0 = #VM_INT_DISABLE;
-	}
diff --git a/arch/hexagon/kernel/vm_init_segtable.S b/arch/hexagon/kernel/vm_init_segtable.S
deleted file mode 100644
index 2638a090636101b7c5cd63193c85c3d39060197c..0000000000000000000000000000000000000000
--- a/arch/hexagon/kernel/vm_init_segtable.S
+++ /dev/null
@@ -1,429 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Initial page table for Linux kernel under Hexagon VM,
- *
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-/*
- * These tables are pre-computed and linked into kernel.
- */
-
-#include <asm/vm_mmu.h>
-/*  #include <asm/iomap.h>  */
-
-/*
- * Start with mapping PA=0 to both VA=0x0 and VA=0xc000000 as 16MB large pages.
- * No user mode access, RWX, write-back cache.  The entry needs
- * to be replicated for all 4 virtual segments mapping to the page.
- */
-
-/* "Big Kernel Page"  */
-#define BKP(pa) (((pa) & __HVM_PTE_PGMASK_4MB)		\
-		| __HVM_PTE_R | __HVM_PTE_W | __HVM_PTE_X	\
-		| __HEXAGON_C_WB_L2 << 6			\
-		| __HVM_PDE_S_16MB)
-
-/*  No cache version  */
-
-#define BKPG_IO(pa) (((pa) & __HVM_PTE_PGMASK_16MB) \
-			| __HVM_PTE_R | __HVM_PTE_W | __HVM_PTE_X \
-			| __HVM_PDE_S_16MB | __HEXAGON_C_DEV << 6 )
-
-#define FOURK_IO(pa) (((pa) & __HVM_PTE_PGMASK_4KB) \
-			| __HVM_PTE_R | __HVM_PTE_W | __HVM_PTE_X \
-			| __HEXAGON_C_DEV << 6 )
-
-#define L2_PTR(pa) (((pa) & __HVM_PTE_PGMASK_4KB) \
-			| __HVM_PDE_S_4KB  )
-
-#define X __HVM_PDE_S_INVALID
-
-	.p2align 12
-	.globl swapper_pg_dir
-	.globl _K_init_segtable
-swapper_pg_dir:
-/* VA 0x00000000 */
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-/* VA 0x40000000 */
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-/* VA 0x80000000 */
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-/*0xa8*/.word X,X,X,X
-#ifdef CONFIG_COMET_EARLY_UART_DEBUG
-UART_PTE_ENTRY:
-/*0xa9*/.word BKPG_IO(0xa9000000),BKPG_IO(0xa9000000),BKPG_IO(0xa9000000),BKPG_IO(0xa9000000)
-#else
-/*0xa9*/.word X,X,X,X
-#endif
-/*0xaa*/.word X,X,X,X
-/*0xab*/.word X,X,X,X
-/*0xac*/.word X,X,X,X
-/*0xad*/.word X,X,X,X
-/*0xae*/.word X,X,X,X
-/*0xaf*/.word X,X,X,X
-/*0xb0*/.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
-_K_init_segtable:
-/* VA 0xC0000000 */
-	.word BKP(0x00000000), BKP(0x00400000), BKP(0x00800000), BKP(0x00c00000)
-	.word BKP(0x01000000), BKP(0x01400000), BKP(0x01800000), BKP(0x01c00000)
-	.word BKP(0x02000000), BKP(0x02400000), BKP(0x02800000), BKP(0x02c00000)
-	.word BKP(0x03000000), BKP(0x03400000), BKP(0x03800000), BKP(0x03c00000)
-	.word BKP(0x04000000), BKP(0x04400000), BKP(0x04800000), BKP(0x04c00000)
-	.word BKP(0x05000000), BKP(0x05400000), BKP(0x05800000), BKP(0x05c00000)
-	.word BKP(0x06000000), BKP(0x06400000), BKP(0x06800000), BKP(0x06c00000)
-	.word BKP(0x07000000), BKP(0x07400000), BKP(0x07800000), BKP(0x07c00000)
-
-	.word BKP(0x08000000), BKP(0x08400000), BKP(0x08800000), BKP(0x08c00000)
-	.word BKP(0x09000000), BKP(0x09400000), BKP(0x09800000), BKP(0x09c00000)
-	.word BKP(0x0a000000), BKP(0x0a400000), BKP(0x0a800000), BKP(0x0ac00000)
-	.word BKP(0x0b000000), BKP(0x0b400000), BKP(0x0b800000), BKP(0x0bc00000)
-	.word BKP(0x0c000000), BKP(0x0c400000), BKP(0x0c800000), BKP(0x0cc00000)
-	.word BKP(0x0d000000), BKP(0x0d400000), BKP(0x0d800000), BKP(0x0dc00000)
-	.word BKP(0x0e000000), BKP(0x0e400000), BKP(0x0e800000), BKP(0x0ec00000)
-	.word BKP(0x0f000000), BKP(0x0f400000), BKP(0x0f800000), BKP(0x0fc00000)
-
-	.word BKP(0x10000000), BKP(0x10400000), BKP(0x10800000), BKP(0x10c00000)
-	.word BKP(0x11000000), BKP(0x11400000), BKP(0x11800000), BKP(0x11c00000)
-	.word BKP(0x12000000), BKP(0x12400000), BKP(0x12800000), BKP(0x12c00000)
-	.word BKP(0x13000000), BKP(0x13400000), BKP(0x13800000), BKP(0x13c00000)
-	.word BKP(0x14000000), BKP(0x14400000), BKP(0x14800000), BKP(0x14c00000)
-	.word BKP(0x15000000), BKP(0x15400000), BKP(0x15800000), BKP(0x15c00000)
-	.word BKP(0x16000000), BKP(0x16400000), BKP(0x16800000), BKP(0x16c00000)
-	.word BKP(0x17000000), BKP(0x17400000), BKP(0x17800000), BKP(0x17c00000)
-
-	.word BKP(0x18000000), BKP(0x18400000), BKP(0x18800000), BKP(0x18c00000)
-	.word BKP(0x19000000), BKP(0x19400000), BKP(0x19800000), BKP(0x19c00000)
-	.word BKP(0x1a000000), BKP(0x1a400000), BKP(0x1a800000), BKP(0x1ac00000)
-	.word BKP(0x1b000000), BKP(0x1b400000), BKP(0x1b800000), BKP(0x1bc00000)
-	.word BKP(0x1c000000), BKP(0x1c400000), BKP(0x1c800000), BKP(0x1cc00000)
-	.word BKP(0x1d000000), BKP(0x1d400000), BKP(0x1d800000), BKP(0x1dc00000)
-	.word BKP(0x1e000000), BKP(0x1e400000), BKP(0x1e800000), BKP(0x1ec00000)
-	.word BKP(0x1f000000), BKP(0x1f400000), BKP(0x1f800000), BKP(0x1fc00000)
-
-	.word BKP(0x20000000), BKP(0x20400000), BKP(0x20800000), BKP(0x20c00000)
-	.word BKP(0x21000000), BKP(0x21400000), BKP(0x21800000), BKP(0x21c00000)
-	.word BKP(0x22000000), BKP(0x22400000), BKP(0x22800000), BKP(0x22c00000)
-	.word BKP(0x23000000), BKP(0x23400000), BKP(0x23800000), BKP(0x23c00000)
-	.word BKP(0x24000000), BKP(0x24400000), BKP(0x24800000), BKP(0x24c00000)
-	.word BKP(0x25000000), BKP(0x25400000), BKP(0x25800000), BKP(0x25c00000)
-	.word BKP(0x26000000), BKP(0x26400000), BKP(0x26800000), BKP(0x26c00000)
-	.word BKP(0x27000000), BKP(0x27400000), BKP(0x27800000), BKP(0x27c00000)
-
-	.word BKP(0x28000000), BKP(0x28400000), BKP(0x28800000), BKP(0x28c00000)
-	.word BKP(0x29000000), BKP(0x29400000), BKP(0x29800000), BKP(0x29c00000)
-	.word BKP(0x2a000000), BKP(0x2a400000), BKP(0x2a800000), BKP(0x2ac00000)
-	.word BKP(0x2b000000), BKP(0x2b400000), BKP(0x2b800000), BKP(0x2bc00000)
-	.word BKP(0x2c000000), BKP(0x2c400000), BKP(0x2c800000), BKP(0x2cc00000)
-	.word BKP(0x2d000000), BKP(0x2d400000), BKP(0x2d800000), BKP(0x2dc00000)
-	.word BKP(0x2e000000), BKP(0x2e400000), BKP(0x2e800000), BKP(0x2ec00000)
-	.word BKP(0x2f000000), BKP(0x2f400000), BKP(0x2f800000), BKP(0x2fc00000)
-
-	.word BKP(0x30000000), BKP(0x30400000), BKP(0x30800000), BKP(0x30c00000)
-	.word BKP(0x31000000), BKP(0x31400000), BKP(0x31800000), BKP(0x31c00000)
-	.word BKP(0x32000000), BKP(0x32400000), BKP(0x32800000), BKP(0x32c00000)
-	.word BKP(0x33000000), BKP(0x33400000), BKP(0x33800000), BKP(0x33c00000)
-	.word BKP(0x34000000), BKP(0x34400000), BKP(0x34800000), BKP(0x34c00000)
-	.word BKP(0x35000000), BKP(0x35400000), BKP(0x35800000), BKP(0x35c00000)
-	.word BKP(0x36000000), BKP(0x36400000), BKP(0x36800000), BKP(0x36c00000)
-	.word BKP(0x37000000), BKP(0x37400000), BKP(0x37800000), BKP(0x37c00000)
-
-	.word BKP(0x38000000), BKP(0x38400000), BKP(0x38800000), BKP(0x38c00000)
-	.word BKP(0x39000000), BKP(0x39400000), BKP(0x39800000), BKP(0x39c00000)
-	.word BKP(0x3a000000), BKP(0x3a400000), BKP(0x3a800000), BKP(0x3ac00000)
-	.word BKP(0x3b000000), BKP(0x3b400000), BKP(0x3b800000), BKP(0x3bc00000)
-	.word BKP(0x3c000000), BKP(0x3c400000), BKP(0x3c800000), BKP(0x3cc00000)
-	.word BKP(0x3d000000), BKP(0x3d400000), BKP(0x3d800000), BKP(0x3dc00000)
-_K_io_map:
-	.word X,X,X,X /* 0x3e000000 - device IO early remap */
-	.word X,X,X,X /* 0x3f000000 - hypervisor space*/
-
-#if 0
-/*
- * This is in here as an example for devices which need to be mapped really
- * early.
- */
-	.p2align 12
-	.globl _K_io_kmap
-	.globl _K_init_devicetable
-_K_init_devicetable:  /*  Should be 4MB worth of entries  */
-	.word FOURK_IO(MSM_GPIO1_PHYS),FOURK_IO(MSM_GPIO2_PHYS),FOURK_IO(MSM_SIRC_PHYS),X
-	.word FOURK_IO(TLMM_GPIO1_PHYS),X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-	.word X,X,X,X
-#endif
diff --git a/arch/hexagon/kernel/vm_ops.S b/arch/hexagon/kernel/vm_ops.S
deleted file mode 100644
index f61c04d485f6f9111a89048960c14029b909c321..0000000000000000000000000000000000000000
--- a/arch/hexagon/kernel/vm_ops.S
+++ /dev/null
@@ -1,89 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Hexagon VM instruction support
- *
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/hexagon_vm.h>
-
-/*
- * C wrappers for virtual machine "instructions".  These
- * could be, and perhaps some day will be, handled as in-line
- * macros, but for tracing/debugging it's handy to have
- * a single point of invocation for each of them.
- * Conveniently, they take parameters and return values
- * consistent with the ABI calling convention.
- */
-
-ENTRY(__vmrte)
-	trap1(#HVM_TRAP1_VMRTE);
-	jumpr	R31;
-
-ENTRY(__vmsetvec)
-	trap1(#HVM_TRAP1_VMSETVEC);
-	jumpr	R31;
-
-ENTRY(__vmsetie)
-	trap1(#HVM_TRAP1_VMSETIE);
-	jumpr	R31;
-
-ENTRY(__vmgetie)
-	trap1(#HVM_TRAP1_VMGETIE);
-	jumpr	R31;
-
-ENTRY(__vmintop)
-	trap1(#HVM_TRAP1_VMINTOP);
-	jumpr	R31;
-
-ENTRY(__vmclrmap)
-	trap1(#HVM_TRAP1_VMCLRMAP);
-	jumpr	R31;
-
-ENTRY(__vmnewmap)
-	r1 = #VM_NEWMAP_TYPE_PGTABLES;
-	trap1(#HVM_TRAP1_VMNEWMAP);
-	jumpr	R31;
-
-ENTRY(__vmcache)
-	trap1(#HVM_TRAP1_VMCACHE);
-	jumpr	R31;
-
-ENTRY(__vmgettime)
-	trap1(#HVM_TRAP1_VMGETTIME);
-	jumpr	R31;
-
-ENTRY(__vmsettime)
-	trap1(#HVM_TRAP1_VMSETTIME);
-	jumpr	R31;
-
-ENTRY(__vmwait)
-	trap1(#HVM_TRAP1_VMWAIT);
-	jumpr	R31;
-
-ENTRY(__vmyield)
-	trap1(#HVM_TRAP1_VMYIELD);
-	jumpr	R31;
-
-ENTRY(__vmstart)
-	trap1(#HVM_TRAP1_VMSTART);
-	jumpr	R31;
-
-ENTRY(__vmstop)
-	trap1(#HVM_TRAP1_VMSTOP);
-	jumpr	R31;
-
-ENTRY(__vmvpid)
-	trap1(#HVM_TRAP1_VMVPID);
-	jumpr	R31;
-
-/*  Probably not actually going to use these; see vm_entry.S  */
-
-ENTRY(__vmsetregs)
-	trap1(#HVM_TRAP1_VMSETREGS);
-	jumpr	R31;
-
-ENTRY(__vmgetregs)
-	trap1(#HVM_TRAP1_VMGETREGS);
-	jumpr	R31;
diff --git a/arch/hexagon/kernel/vm_switch.S b/arch/hexagon/kernel/vm_switch.S
deleted file mode 100644
index 5ec2d43fee1f94a3931a87a9fada10e6918c40e6..0000000000000000000000000000000000000000
--- a/arch/hexagon/kernel/vm_switch.S
+++ /dev/null
@@ -1,82 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Context switch support for Hexagon
- *
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-#include <asm/asm-offsets.h>
-
-.text
-
-/*
- * The register used as a fast-path thread information pointer
- * is determined as a kernel configuration option.  If it happens
- * to be a callee-save register, we're going to be saving and
- * restoring it twice here.
- *
- * This code anticipates a revised ABI where R20-23 are added
- * to the set of callee-save registers, but this should be
- * backward compatible to legacy tools.
- */
-
-
-/*
- *	void switch_to(struct task_struct *prev,
- *		struct task_struct *next, struct task_struct *last);
- */
-	.p2align 2
-	.globl __switch_to
-	.type	__switch_to, @function
-
-/*
- * When we exit the wormhole, we need to store the previous task
- * in the new R0's pointer.  Technically it should be R2, but they should
- * be the same; seems like a legacy thing.  In short, don't butcher
- * R0, let it go back out unmolested.
- */
-
-__switch_to:
-	/*
-	 * Push callee-saves onto "prev" stack.
-	 * Here, we're sneaky because the LR and FP
-	 * storage of the thread_stack structure
-	 * is automagically allocated by allocframe,
-	 * so we pass struct size less 8.
-	 */
-	allocframe(#(_SWITCH_STACK_SIZE - 8));
-	memd(R29+#(_SWITCH_R2726))=R27:26;
-	memd(R29+#(_SWITCH_R2524))=R25:24;
-	memd(R29+#(_SWITCH_R2322))=R23:22;
-	memd(R29+#(_SWITCH_R2120))=R21:20;
-	memd(R29+#(_SWITCH_R1918))=R19:18;
-	memd(R29+#(_SWITCH_R1716))=R17:16;
-	/* Stash thread_info pointer in task_struct */
-	memw(R0+#_TASK_THREAD_INFO) = THREADINFO_REG;
-	memw(R0 +#(_TASK_STRUCT_THREAD + _THREAD_STRUCT_SWITCH_SP)) = R29;
-	/* Switch to "next" stack and restore callee saves from there */
-	R29 = memw(R1 + #(_TASK_STRUCT_THREAD + _THREAD_STRUCT_SWITCH_SP));
-	{
-	    R27:26 = memd(R29+#(_SWITCH_R2726));
-	    R25:24 = memd(R29+#(_SWITCH_R2524));
-	}
-	{
-	    R23:22 = memd(R29+#(_SWITCH_R2322));
-	    R21:20 = memd(R29+#(_SWITCH_R2120));
-	}
-	{
-	    R19:18 = memd(R29+#(_SWITCH_R1918));
-	    R17:16 = memd(R29+#(_SWITCH_R1716));
-	}
-	{
-	    /* THREADINFO_REG is currently one of the callee-saved regs
-	     * above, and so be sure to re-load it last.
-	     */
-	    THREADINFO_REG = memw(R1 + #_TASK_THREAD_INFO);
-	    R31:30 = memd(R29+#_SWITCH_FP);
-	}
-	{
-	    R29 = add(R29,#_SWITCH_STACK_SIZE);
-	    jumpr R31;
-	}
-	.size	__switch_to, .-__switch_to
diff --git a/arch/hexagon/kernel/vm_vectors.S b/arch/hexagon/kernel/vm_vectors.S
deleted file mode 100644
index fba33745ce579c4071730c181f76345f15c21724..0000000000000000000000000000000000000000
--- a/arch/hexagon/kernel/vm_vectors.S
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Event jump tables
- *
- * Copyright (c) 2010-2012,2013, The Linux Foundation. All rights reserved.
- */
-
-#include <asm/hexagon_vm.h>
-
-.text
-
-/*  This is registered early on to allow angel  */
-.global _K_provisional_vec
-_K_provisional_vec:
-	jump 1f;
-	jump 1f;
-	jump 1f;
-	jump 1f;
-	jump 1f;
-	trap1(#HVM_TRAP1_VMRTE)
-	jump 1f;
-	jump 1f;
-
-
-.global _K_VM_event_vector
-_K_VM_event_vector:
-1:
-	jump 1b;  /*  Reset  */
-	jump _K_enter_machcheck;
-	jump _K_enter_genex;
-	jump _K_enter_debug;
-	jump 1b;  /*  4 Rsvd  */
-	jump _K_enter_trap0;
-	jump 1b;  /*  6 Rsvd  */
-	jump _K_enter_interrupt;
diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S
deleted file mode 100644
index 78f2418e97c8425dc2898d5d1fee76b4da30f708..0000000000000000000000000000000000000000
--- a/arch/hexagon/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,72 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Linker script for Hexagon kernel
- *
- * Copyright (c) 2010-2014, The Linux Foundation. All rights reserved.
- */
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/asm-offsets.h>	/*  Most of the kernel defines are here  */
-#include <asm/mem-layout.h>	/*  except for page_offset  */
-#include <asm/cache.h>		/*  and now we're pulling cache line size  */
-#include <asm/thread_info.h>	/*  and we need THREAD_SIZE too */
-
-OUTPUT_ARCH(hexagon)
-ENTRY(stext)
-
-jiffies = jiffies_64;
-
-/*
-See asm-generic/vmlinux.lds.h for expansion of some of these macros.
-See asm-generic/sections.h for seemingly required labels.
-*/
-
-#define PAGE_SIZE _PAGE_SIZE
-
-SECTIONS
-{
-	. = PAGE_OFFSET;
-
-	__init_begin = .;
-	HEAD_TEXT_SECTION
-	INIT_TEXT_SECTION(PAGE_SIZE)
-	PERCPU_SECTION(L1_CACHE_BYTES)
-	__init_end = .;
-
-        . = ALIGN(_PAGE_SIZE);
-	_stext = .;
-	.text : AT(ADDR(.text)) {
-		_text = .;
-		TEXT_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		*(.fixup)
-	}
-	_etext = .;
-
-	INIT_DATA_SECTION(PAGE_SIZE)
-
-	_sdata = .;
-		RW_DATA_SECTION(32,PAGE_SIZE,_THREAD_SIZE)
-		RO_DATA_SECTION(PAGE_SIZE)
-	_edata = .;
-
-	EXCEPTION_TABLE(16)
-	NOTES
-
-	BSS_SECTION(_PAGE_SIZE, _PAGE_SIZE, _PAGE_SIZE)
-
-	_end = .;
-
-	/DISCARD/ : {
-		EXIT_TEXT
-		EXIT_DATA
-		EXIT_CALL
-	}
-
-	STABS_DEBUG
-	DWARF_DEBUG
-
-}
diff --git a/arch/hexagon/lib/memcpy.S b/arch/hexagon/lib/memcpy.S
deleted file mode 100644
index f8b3c02a8ad1bdb154ec9b82b9cabd57f72256ed..0000000000000000000000000000000000000000
--- a/arch/hexagon/lib/memcpy.S
+++ /dev/null
@@ -1,529 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-/*
- * Description
- *
- *   library function for memcpy where length bytes are copied from
- *   ptr_in to ptr_out. ptr_out is returned unchanged.
- *   Allows any combination of alignment on input and output pointers
- *   and length from 0 to 2^32-1
- *
- * Restrictions
- *   The arrays should not overlap, the program will produce undefined output
- *   if they do.
- *   For blocks less than 16 bytes a byte by byte copy is performed. For
- *   8byte alignments, and length multiples, a dword copy is performed up to
- *   96bytes
- * History
- *
- *   DJH  5/15/09 Initial version 1.0
- *   DJH  6/ 1/09 Version 1.1 modified ABI to inlcude R16-R19
- *   DJH  7/12/09 Version 1.2 optimized codesize down to 760 was 840
- *   DJH 10/14/09 Version 1.3 added special loop for aligned case, was
- *                            overreading bloated codesize back up to 892
- *   DJH  4/20/10 Version 1.4 fixed Ldword_loop_epilog loop to prevent loads
- *                            occurring if only 1 left outstanding, fixes bug
- *                            # 3888, corrected for all alignments. Peeled off
- *                            1 32byte chunk from kernel loop and extended 8byte
- *                            loop at end to solve all combinations and prevent
- *                            over read.  Fixed Ldword_loop_prolog to prevent
- *                            overread for blocks less than 48bytes. Reduced
- *                            codesize to 752 bytes
- *   DJH  4/21/10 version 1.5 1.4 fix broke code for input block ends not
- *                            aligned to dword boundaries,underwriting by 1
- *                            byte, added detection for this and fixed. A
- *                            little bloat.
- *   DJH  4/23/10 version 1.6 corrected stack error, R20 was not being restored
- *                            always, fixed the error of R20 being modified
- *                            before it was being saved
- * Natural c model
- * ===============
- * void * memcpy(char * ptr_out, char * ptr_in, int length) {
- *   int i;
- *   if(length) for(i=0; i < length; i++) { ptr_out[i] = ptr_in[i]; }
- *   return(ptr_out);
- * }
- *
- * Optimized memcpy function
- * =========================
- * void * memcpy(char * ptr_out, char * ptr_in, int len) {
- *   int i, prolog, kernel, epilog, mask;
- *   u8 offset;
- *   s64 data0, dataF8, data70;
- *
- *   s64 * ptr8_in;
- *   s64 * ptr8_out;
- *   s32 * ptr4;
- *   s16 * ptr2;
- *
- *   offset = ((int) ptr_in) & 7;
- *   ptr8_in = (s64 *) &ptr_in[-offset];   //read in the aligned pointers
- *
- *   data70 = *ptr8_in++;
- *   dataF8 = *ptr8_in++;
- *
- *   data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset);
- *
- *   prolog = 32 - ((int) ptr_out);
- *   mask  = 0x7fffffff >> HEXAGON_R_cl0_R(len);
- *   prolog = prolog & mask;
- *   kernel = len - prolog;
- *   epilog = kernel & 0x1F;
- *   kernel = kernel>>5;
- *
- *   if (prolog & 1) { ptr_out[0] = (u8) data0; data0 >>= 8; ptr_out += 1;}
- *   ptr2 = (s16 *) &ptr_out[0];
- *   if (prolog & 2) { ptr2[0] = (u16) data0;  data0 >>= 16; ptr_out += 2;}
- *   ptr4 = (s32 *) &ptr_out[0];
- *   if (prolog & 4) { ptr4[0] = (u32) data0;  data0 >>= 32; ptr_out += 4;}
- *
- *   offset = offset + (prolog & 7);
- *   if (offset >= 8) {
- *     data70 = dataF8;
- *     dataF8 = *ptr8_in++;
- *   }
- *   offset = offset & 0x7;
- *
- *   prolog = prolog >> 3;
- *   if (prolog) for (i=0; i < prolog; i++) {
- *       data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset);
- *       ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8;
- *       data70 = dataF8;
- *       dataF8 = *ptr8_in++;
- *   }
- *   if(kernel) { kernel -= 1; epilog += 32; }
- *   if(kernel) for(i=0; i < kernel; i++) {
- *       data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset);
- *       ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8;
- *       data70 = *ptr8_in++;
- *
- *       data0 = HEXAGON_P_valignb_PPp(data70, dataF8, offset);
- *       ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8;
- *       dataF8 = *ptr8_in++;
- *
- *       data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset);
- *       ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8;
- *       data70 = *ptr8_in++;
- *
- *       data0 = HEXAGON_P_valignb_PPp(data70, dataF8, offset);
- *       ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8;
- *       dataF8 = *ptr8_in++;
- *   }
- *   epilogdws = epilog >> 3;
- *   if (epilogdws) for (i=0; i < epilogdws; i++) {
- *       data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset);
- *       ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8;
- *       data70 = dataF8;
- *       dataF8 = *ptr8_in++;
- *   }
- *   data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset);
- *
- *   ptr4 = (s32 *) &ptr_out[0];
- *   if (epilog & 4) { ptr4[0] = (u32) data0; data0 >>= 32; ptr_out += 4;}
- *   ptr2 = (s16 *) &ptr_out[0];
- *   if (epilog & 2) { ptr2[0] = (u16) data0; data0 >>= 16; ptr_out += 2;}
- *   if (epilog & 1) { *ptr_out++ = (u8) data0; }
- *
- *   return(ptr_out - length);
- * }
- *
- * Codesize : 784 bytes
- */
-
-
-#define ptr_out		R0	/*  destination  pounter  */
-#define ptr_in		R1	/*  source pointer  */
-#define len		R2	/*  length of copy in bytes  */
-
-#define data70		R13:12	/*  lo 8 bytes of non-aligned transfer  */
-#define dataF8		R11:10	/*  hi 8 bytes of non-aligned transfer  */
-#define ldata0		R7:6	/*  even 8 bytes chunks  */
-#define ldata1		R25:24	/*  odd 8 bytes chunks  */
-#define data1		R7	/*  lower 8 bytes of ldata1  */
-#define data0		R6	/*  lower 8 bytes of ldata0  */
-
-#define ifbyte		p0	/*  if transfer has bytes in epilog/prolog  */
-#define ifhword		p0	/*  if transfer has shorts in epilog/prolog  */
-#define ifword		p0	/*  if transfer has words in epilog/prolog  */
-#define noprolog	p0	/*  no prolog, xfer starts at 32byte  */
-#define nokernel	p1	/*  no 32byte multiple block in the transfer  */
-#define noepilog	p0	/*  no epilog, xfer ends on 32byte boundary  */
-#define align		p2	/*  alignment of input rel to 8byte boundary  */
-#define kernel1		p0	/*  kernel count == 1  */
-
-#define dalign		R25	/*  rel alignment of input to output data  */
-#define star3		R16	/*  number bytes in prolog - dwords  */
-#define rest		R8	/*  length - prolog bytes  */
-#define back		R7	/*  nr bytes > dword boundary in src block  */
-#define epilog		R3	/*  bytes in epilog  */
-#define inc		R15:14	/*  inc kernel by -1 and defetch ptr by 32  */
-#define kernel		R4	/*  number of 32byte chunks in kernel  */
-#define ptr_in_p_128	R5	/*  pointer for prefetch of input data  */
-#define mask		R8	/*  mask used to determine prolog size  */
-#define shift		R8	/*  used to work a shifter to extract bytes  */
-#define shift2		R5	/*  in epilog to workshifter to extract bytes */
-#define prolog		R15	/*  bytes in  prolog  */
-#define epilogdws	R15	/*  number dwords in epilog  */
-#define shiftb		R14	/*  used to extract bytes  */
-#define offset		R9	/*  same as align in reg  */
-#define ptr_out_p_32	R17	/*  pointer to output dczero  */
-#define align888	R14	/*  if simple dword loop can be used  */
-#define len8		R9	/*  number of dwords in length  */
-#define over		R20	/*  nr of bytes > last inp buf dword boundary */
-
-#define ptr_in_p_128kernel	R5:4	/*  packed fetch pointer & kernel cnt */
-
-	.section .text
-	.p2align 4
-        .global memcpy
-        .type memcpy, @function
-memcpy:
-{
-	p2 = cmp.eq(len, #0);		/*  =0 */
-	align888 = or(ptr_in, ptr_out);	/*  %8 < 97 */
-	p0 = cmp.gtu(len, #23);		/*  %1, <24 */
-	p1 = cmp.eq(ptr_in, ptr_out);	/*  attempt to overwrite self */
-}
-{
-	p1 = or(p2, p1);
-	p3 = cmp.gtu(len, #95);		/*  %8 < 97 */
-	align888 = or(align888, len);	/*  %8 < 97 */
-	len8 = lsr(len, #3);		/*  %8 < 97 */
-}
-{
-	dcfetch(ptr_in);		/*  zero/ptrin=ptrout causes fetch */
-	p2 = bitsclr(align888, #7);	/*  %8 < 97  */
-	if(p1) jumpr r31;		/*  =0  */
-}
-{
-	p2 = and(p2,!p3);			/*  %8 < 97  */
-	if (p2.new) len = add(len, #-8);	/*  %8 < 97  */
-	if (p2.new) jump:NT .Ldwordaligned; 	/*  %8 < 97  */
-}
-{
-	if(!p0) jump .Lbytes23orless;	/*  %1, <24  */
-	mask.l = #LO(0x7fffffff);
-	/*  all bytes before line multiples of data  */
-	prolog = sub(#0, ptr_out);
-}
-{
-	/*  save r31 on stack, decrement sp by 16  */
-	allocframe(#24);
-	mask.h = #HI(0x7fffffff);
-	ptr_in_p_128 = add(ptr_in, #32);
-	back = cl0(len);
-}
-{
-	memd(sp+#0) = R17:16;		/*  save r16,r17 on stack6  */
-	r31.l = #LO(.Lmemcpy_return);	/*  set up final return pointer  */
-	prolog &= lsr(mask, back);
-	offset = and(ptr_in, #7);
-}
-{
-	memd(sp+#8) = R25:24;		/*  save r25,r24 on stack  */
-	dalign = sub(ptr_out, ptr_in);
-	r31.h = #HI(.Lmemcpy_return);	/*  set up final return pointer  */
-}
-{
-	/*  see if there if input buffer end if aligned  */
-	over = add(len, ptr_in);
-	back = add(len, offset);
-	memd(sp+#16) = R21:20;		/*  save r20,r21 on stack  */
-}
-{
-	noprolog = bitsclr(prolog, #7);
-	prolog = and(prolog, #31);
-	dcfetch(ptr_in_p_128);
-	ptr_in_p_128 = add(ptr_in_p_128, #32);
-}
-{
-	kernel = sub(len, prolog);
-	shift = asl(prolog, #3);
-	star3 = and(prolog, #7);
-	ptr_in = and(ptr_in, #-8);
-}
-{
-	prolog = lsr(prolog, #3);
-	epilog = and(kernel, #31);
-	ptr_out_p_32 = add(ptr_out, prolog);
-	over = and(over, #7);
-}
-{
-	p3 = cmp.gtu(back, #8);
-	kernel = lsr(kernel, #5);
-	dcfetch(ptr_in_p_128);
-	ptr_in_p_128 = add(ptr_in_p_128, #32);
-}
-{
-	p1 = cmp.eq(prolog, #0);
-	if(!p1.new) prolog = add(prolog, #1);
-	dcfetch(ptr_in_p_128);	/*  reserve the line 64bytes on  */
-	ptr_in_p_128 = add(ptr_in_p_128, #32);
-}
-{
-	nokernel = cmp.eq(kernel,#0);
-	dcfetch(ptr_in_p_128);	/* reserve the line 64bytes on  */
-	ptr_in_p_128 = add(ptr_in_p_128, #32);
-	shiftb = and(shift, #8);
-}
-{
-	dcfetch(ptr_in_p_128);		/*  reserve the line 64bytes on  */
-	ptr_in_p_128 = add(ptr_in_p_128, #32);
-	if(nokernel) jump .Lskip64;
-	p2 = cmp.eq(kernel, #1);	/*  skip ovr if kernel == 0  */
-}
-{
-	dczeroa(ptr_out_p_32);
-	/*  don't advance pointer  */
-	if(!p2) ptr_out_p_32 = add(ptr_out_p_32, #32);
-}
-{
-	dalign = and(dalign, #31);
-	dczeroa(ptr_out_p_32);
-}
-.Lskip64:
-{
-	data70 = memd(ptr_in++#16);
-	if(p3) dataF8 = memd(ptr_in+#8);
-	if(noprolog) jump .Lnoprolog32;
-	align = offset;
-}
-/*  upto initial 7 bytes  */
-{
-	ldata0 = valignb(dataF8, data70, align);
-	ifbyte = tstbit(shift,#3);
-	offset = add(offset, star3);
-}
-{
-	if(ifbyte) memb(ptr_out++#1) = data0;
-	ldata0 = lsr(ldata0, shiftb);
-	shiftb = and(shift, #16);
-	ifhword = tstbit(shift,#4);
-}
-{
-	if(ifhword) memh(ptr_out++#2) = data0;
-	ldata0 = lsr(ldata0, shiftb);
-	ifword = tstbit(shift,#5);
-	p2 = cmp.gtu(offset, #7);
-}
-{
-	if(ifword) memw(ptr_out++#4) = data0;
-	if(p2) data70 = dataF8;
-	if(p2) dataF8 = memd(ptr_in++#8);	/*  another 8 bytes  */
-	align = offset;
-}
-.Lnoprolog32:
-{
-	p3 = sp1loop0(.Ldword_loop_prolog, prolog)
-	rest = sub(len, star3);	/*  whats left after the loop  */
-	p0 = cmp.gt(over, #0);
-}
-	if(p0) rest = add(rest, #16);
-.Ldword_loop_prolog:
-{
-	if(p3) memd(ptr_out++#8) = ldata0;
-	ldata0 = valignb(dataF8, data70, align);
-	p0 = cmp.gt(rest, #16);
-}
-{
-	data70 = dataF8;
-	if(p0) dataF8 = memd(ptr_in++#8);
-	rest = add(rest, #-8);
-}:endloop0
-.Lkernel:
-{
-	/*  kernel is at least 32bytes  */
-	p3 = cmp.gtu(kernel, #0);
-	/*  last itn. remove edge effects  */
-	if(p3.new) kernel = add(kernel, #-1);
-	/*  dealt with in last dword loop  */
-	if(p3.new) epilog = add(epilog, #32);
-}
-{
-	nokernel = cmp.eq(kernel, #0);		/*  after adjustment, recheck */
-	if(nokernel.new) jump:NT .Lepilog;	/*  likely not taken  */
-	inc = combine(#32, #-1);
-	p3 = cmp.gtu(dalign, #24);
-}
-{
-	if(p3) jump .Lodd_alignment;
-}
-{
-	loop0(.Loword_loop_25to31, kernel);
-	kernel1 = cmp.gtu(kernel, #1);
-	rest = kernel;
-}
-	.falign
-.Loword_loop_25to31:
-{
-	dcfetch(ptr_in_p_128);	/*  prefetch 4 lines ahead  */
-	if(kernel1) ptr_out_p_32 = add(ptr_out_p_32, #32);
-}
-{
-	dczeroa(ptr_out_p_32);	/*  reserve the next 32bytes in cache  */
-	p3 = cmp.eq(kernel, rest);
-}
-{
-	/*  kernel -= 1  */
-	ptr_in_p_128kernel = vaddw(ptr_in_p_128kernel, inc);
-	/*  kill write on first iteration  */
-	if(!p3) memd(ptr_out++#8) = ldata1;
-	ldata1 = valignb(dataF8, data70, align);
-	data70 = memd(ptr_in++#8);
-}
-{
-	memd(ptr_out++#8) = ldata0;
-	ldata0 = valignb(data70, dataF8, align);
-	dataF8 = memd(ptr_in++#8);
-}
-{
-	memd(ptr_out++#8) = ldata1;
-	ldata1 = valignb(dataF8, data70, align);
-	data70 = memd(ptr_in++#8);
-}
-{
-	memd(ptr_out++#8) = ldata0;
-	ldata0 = valignb(data70, dataF8, align);
-	dataF8 = memd(ptr_in++#8);
-	kernel1 = cmp.gtu(kernel, #1);
-}:endloop0
-{
-	memd(ptr_out++#8) = ldata1;
-	jump .Lepilog;
-}
-.Lodd_alignment:
-{
-	loop0(.Loword_loop_00to24, kernel);
-	kernel1 = cmp.gtu(kernel, #1);
-	rest = add(kernel, #-1);
-}
-	.falign
-.Loword_loop_00to24:
-{
-	dcfetch(ptr_in_p_128);	/*  prefetch 4 lines ahead  */
-	ptr_in_p_128kernel = vaddw(ptr_in_p_128kernel, inc);
-	if(kernel1) ptr_out_p_32 = add(ptr_out_p_32, #32);
-}
-{
-	dczeroa(ptr_out_p_32);	/*  reserve the next 32bytes in cache  */
-}
-{
-	memd(ptr_out++#8) = ldata0;
-	ldata0 = valignb(dataF8, data70, align);
-	data70 = memd(ptr_in++#8);
-}
-{
-	memd(ptr_out++#8) = ldata0;
-	ldata0 = valignb(data70, dataF8, align);
-	dataF8 = memd(ptr_in++#8);
-}
-{
-	memd(ptr_out++#8) = ldata0;
-	ldata0 = valignb(dataF8, data70, align);
-	data70 = memd(ptr_in++#8);
-}
-{
-	memd(ptr_out++#8) = ldata0;
-	ldata0 = valignb(data70, dataF8, align);
-	dataF8 = memd(ptr_in++#8);
-	kernel1 = cmp.gtu(kernel, #1);
-}:endloop0
-.Lepilog:
-{
-	noepilog = cmp.eq(epilog,#0);
-	epilogdws = lsr(epilog, #3);
-	kernel = and(epilog, #7);
-}
-{
-	if(noepilog) jumpr r31;
-	if(noepilog) ptr_out = sub(ptr_out, len);
-	p3 = cmp.eq(epilogdws, #0);
-	shift2 = asl(epilog, #3);
-}
-{
-	shiftb = and(shift2, #32);
-	ifword = tstbit(epilog,#2);
-	if(p3) jump .Lepilog60;
-	if(!p3) epilog = add(epilog, #-16);
-}
-{
-	loop0(.Ldword_loop_epilog, epilogdws);
-	/*  stop criteria is lsbs unless = 0 then its 8  */
-	p3 = cmp.eq(kernel, #0);
-	if(p3.new) kernel= #8;
-	p1 = cmp.gt(over, #0);
-}
-	/*  if not aligned to end of buffer execute 1 more iteration  */
-	if(p1) kernel= #0;
-.Ldword_loop_epilog:
-{
-	memd(ptr_out++#8) = ldata0;
-	ldata0 = valignb(dataF8, data70, align);
-	p3 = cmp.gt(epilog, kernel);
-}
-{
-	data70 = dataF8;
-	if(p3) dataF8 = memd(ptr_in++#8);
-	epilog = add(epilog, #-8);
-}:endloop0
-/* copy last 7 bytes */
-.Lepilog60:
-{
-	if(ifword) memw(ptr_out++#4) = data0;
-	ldata0 = lsr(ldata0, shiftb);
-	ifhword = tstbit(epilog,#1);
-	shiftb = and(shift2, #16);
-}
-{
-	if(ifhword) memh(ptr_out++#2) = data0;
-	ldata0 = lsr(ldata0, shiftb);
-	ifbyte = tstbit(epilog,#0);
-	if(ifbyte.new) len = add(len, #-1);
-}
-{
-	if(ifbyte) memb(ptr_out) = data0;
-	ptr_out = sub(ptr_out, len);	/*  return dest pointer  */
-        jumpr r31;
-}
-/*  do byte copy for small n  */
-.Lbytes23orless:
-{
-	p3 = sp1loop0(.Lbyte_copy, len);
-	len = add(len, #-1);
-}
-.Lbyte_copy:
-{
-	data0 = memb(ptr_in++#1);
-	if(p3) memb(ptr_out++#1) = data0;
-}:endloop0
-{
-	memb(ptr_out) = data0;
-	ptr_out = sub(ptr_out, len);
-	jumpr r31;
-}
-/*  do dword copies for aligned in, out and length  */
-.Ldwordaligned:
-{
-	p3 = sp1loop0(.Ldword_copy, len8);
-}
-.Ldword_copy:
-{
-	if(p3) memd(ptr_out++#8) = ldata0;
-	ldata0 = memd(ptr_in++#8);
-}:endloop0
-{
-	memd(ptr_out) = ldata0;
-	ptr_out = sub(ptr_out, len);
-	jumpr r31;	/*  return to function caller  */
-}
-.Lmemcpy_return:
-	r21:20 = memd(sp+#16);	/*  restore r20+r21  */
-{
-	r25:24 = memd(sp+#8);	/*  restore r24+r25  */
-	r17:16 = memd(sp+#0);	/*  restore r16+r17  */
-}
-	deallocframe;	/*  restore r31 and incrment stack by 16  */
-	jumpr r31
diff --git a/arch/hexagon/lib/memset.S b/arch/hexagon/lib/memset.S
deleted file mode 100644
index e67304e3f7cfc6372a6642db251d1ebb3e193729..0000000000000000000000000000000000000000
--- a/arch/hexagon/lib/memset.S
+++ /dev/null
@@ -1,302 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2011, The Linux Foundation. All rights reserved.
- */
-
-
-/* HEXAGON assembly optimized memset */
-/* Replaces the standard library function memset */
-
-
-        .macro HEXAGON_OPT_FUNC_BEGIN name
-	.text
-	.p2align 4
-	.globl \name
-	.type  \name, @function
-\name:
-	.endm
-
-	.macro HEXAGON_OPT_FUNC_FINISH name
-	.size  \name, . - \name
-	.endm
-
-/* FUNCTION: memset (v2 version) */
-#if __HEXAGON_ARCH__ < 3
-HEXAGON_OPT_FUNC_BEGIN memset
-	{
-		r6 = #8
-		r7 = extractu(r0, #3 , #0)
-		p0 = cmp.eq(r2, #0)
-		p1 = cmp.gtu(r2, #7)
-	}
-	{
-		r4 = vsplatb(r1)
-		r8 = r0           /* leave r0 intact for return val  */
-		r9 = sub(r6, r7)  /* bytes until double alignment  */
-		if p0 jumpr r31   /* count == 0, so return  */
-	}
-	{
-		r3 = #0
-		r7 = #0
-		p0 = tstbit(r9, #0)
-		if p1 jump 2f /* skip byte loop */
-	}
-
-/* less than 8 bytes to set, so just set a byte at a time and return  */
-
-		loop0(1f, r2) /* byte loop */
-	.falign
-1: /* byte loop */
-	{
-		memb(r8++#1) = r4
-	}:endloop0
-		jumpr r31
-	.falign
-2: /* skip byte loop */
-	{
-		r6 = #1
-		p0 = tstbit(r9, #1)
-		p1 = cmp.eq(r2, #1)
-		if !p0 jump 3f /* skip initial byte store */
-	}
-	{
-		memb(r8++#1) = r4
-		r3:2 = sub(r3:2, r7:6)
-		if p1 jumpr r31
-	}
-	.falign
-3: /* skip initial byte store */
-	{
-		r6 = #2
-		p0 = tstbit(r9, #2)
-		p1 = cmp.eq(r2, #2)
-		if !p0 jump 4f /* skip initial half store */
-	}
-	{
-		memh(r8++#2) = r4
-		r3:2 = sub(r3:2, r7:6)
-		if p1 jumpr r31
-	}
-	.falign
-4: /* skip initial half store */
-	{
-		r6 = #4
-		p0 = cmp.gtu(r2, #7)
-		p1 = cmp.eq(r2, #4)
-		if !p0 jump 5f /* skip initial word store */
-	}
-	{
-		memw(r8++#4) = r4
-		r3:2 = sub(r3:2, r7:6)
-		p0 = cmp.gtu(r2, #11)
-		if p1 jumpr r31
-	}
-	.falign
-5: /* skip initial word store */
-	{
-		r10 = lsr(r2, #3)
-		p1 = cmp.eq(r3, #1)
-		if !p0 jump 7f /* skip double loop */
-	}
-	{
-		r5 = r4
-		r6 = #8
-		loop0(6f, r10) /* double loop */
-	}
-
-/* set bytes a double word at a time  */
-
-	.falign
-6: /* double loop */
-	{
-		memd(r8++#8) = r5:4
-		r3:2 = sub(r3:2, r7:6)
-		p1 = cmp.eq(r2, #8)
-	}:endloop0
-	.falign
-7: /* skip double loop */
-	{
-		p0 = tstbit(r2, #2)
-		if p1 jumpr r31
-	}
-	{
-		r6 = #4
-		p0 = tstbit(r2, #1)
-		p1 = cmp.eq(r2, #4)
-		if !p0 jump 8f /* skip final word store */
-	}
-	{
-		memw(r8++#4) = r4
-		r3:2 = sub(r3:2, r7:6)
-		if p1 jumpr r31
-	}
-	.falign
-8: /* skip final word store */
-	{
-		p1 = cmp.eq(r2, #2)
-		if !p0 jump 9f /* skip final half store */
-	}
-	{
-		memh(r8++#2) = r4
-		if p1 jumpr r31
-	}
-	.falign
-9: /* skip final half store */
-	{
-		memb(r8++#1) = r4
-		jumpr r31
-	}
-HEXAGON_OPT_FUNC_FINISH memset
-#endif
-
-
-/*  FUNCTION: memset (v3 and higher version)  */
-#if __HEXAGON_ARCH__ >= 3
-HEXAGON_OPT_FUNC_BEGIN memset
-	{
-		r7=vsplatb(r1)
-		r6 = r0
-		if (r2==#0) jump:nt .L1
-	}
-	{
-		r5:4=combine(r7,r7)
-		p0 = cmp.gtu(r2,#8)
-		if (p0.new) jump:nt .L3
-	}
-	{
-		r3 = r0
-		loop0(.L47,r2)
-	}
-	.falign
-.L47:
-	{
-		memb(r3++#1) = r1
-	}:endloop0 /* start=.L47 */
-		jumpr r31
-.L3:
-	{
-		p0 = tstbit(r0,#0)
-		if (!p0.new) jump:nt .L8
-		p1 = cmp.eq(r2, #1)
-	}
-	{
-		r6 = add(r0, #1)
-		r2 = add(r2,#-1)
-		memb(r0) = r1
-		if (p1) jump .L1
-	}
-.L8:
-	{
-		p0 = tstbit(r6,#1)
-		if (!p0.new) jump:nt .L10
-	}
-	{
-		r2 = add(r2,#-2)
-		memh(r6++#2) = r7
-		p0 = cmp.eq(r2, #2)
-		if (p0.new) jump:nt .L1
-	}
-.L10:
-	{
-		p0 = tstbit(r6,#2)
-		if (!p0.new) jump:nt .L12
-	}
-	{
-		r2 = add(r2,#-4)
-		memw(r6++#4) = r7
-		p0 = cmp.eq(r2, #4)
-		if (p0.new) jump:nt .L1
-	}
-.L12:
-	{
-		p0 = cmp.gtu(r2,#127)
-		if (!p0.new) jump:nt .L14
-	}
-		r3 = and(r6,#31)
-		if (r3==#0) jump:nt .L17
-	{
-		memd(r6++#8) = r5:4
-		r2 = add(r2,#-8)
-	}
-		r3 = and(r6,#31)
-		if (r3==#0) jump:nt .L17
-	{
-		memd(r6++#8) = r5:4
-		r2 = add(r2,#-8)
-	}
-		r3 = and(r6,#31)
-		if (r3==#0) jump:nt .L17
-	{
-		memd(r6++#8) = r5:4
-		r2 = add(r2,#-8)
-	}
-.L17:
-	{
-		r3 = lsr(r2,#5)
-		if (r1!=#0) jump:nt .L18
-	}
-	{
-		r8 = r3
-		r3 = r6
-		loop0(.L46,r3)
-	}
-	.falign
-.L46:
-	{
-		dczeroa(r6)
-		r6 = add(r6,#32)
-		r2 = add(r2,#-32)
-	}:endloop0 /* start=.L46 */
-.L14:
-	{
-		p0 = cmp.gtu(r2,#7)
-		if (!p0.new) jump:nt .L28
-		r8 = lsr(r2,#3)
-	}
-		loop0(.L44,r8)
-	.falign
-.L44:
-	{
-		memd(r6++#8) = r5:4
-		r2 = add(r2,#-8)
-	}:endloop0 /* start=.L44 */
-.L28:
-	{
-		p0 = tstbit(r2,#2)
-		if (!p0.new) jump:nt .L33
-	}
-	{
-		r2 = add(r2,#-4)
-		memw(r6++#4) = r7
-	}
-.L33:
-	{
-		p0 = tstbit(r2,#1)
-		if (!p0.new) jump:nt .L35
-	}
-	{
-		r2 = add(r2,#-2)
-		memh(r6++#2) = r7
-	}
-.L35:
-		p0 = cmp.eq(r2,#1)
-		if (p0) memb(r6) = r1
-.L1:
-		jumpr r31
-.L18:
-		loop0(.L45,r3)
-	.falign
-.L45:
-		dczeroa(r6)
-	{
-		memd(r6++#8) = r5:4
-		r2 = add(r2,#-32)
-	}
-		memd(r6++#8) = r5:4
-		memd(r6++#8) = r5:4
-	{
-		memd(r6++#8) = r5:4
-	}:endloop0 /* start=.L45  */
-		jump .L14
-HEXAGON_OPT_FUNC_FINISH memset
-#endif
diff --git a/arch/hexagon/mm/copy_from_user.S b/arch/hexagon/mm/copy_from_user.S
deleted file mode 100644
index 1a49bf24f68bc1104ff2610020ae1d19d5d251ef..0000000000000000000000000000000000000000
--- a/arch/hexagon/mm/copy_from_user.S
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * User memory copy functions for kernel
- *
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-/*
- * The right way to do this involves valignb
- * The easy way to do this is only speed up src/dest similar alignment.
- */
-
-/*
- * Copy to/from user are the same, except that for packets with a load and
- * a store, I don't know how to tell which kind of exception we got.
- * Therefore, we duplicate the function, and handle faulting addresses
- * differently for each function
- */
-
-/*
- * copy from user: loads can fault
- */
-#define src_sav r13
-#define dst_sav r12
-#define src_dst_sav r13:12
-#define d_dbuf r15:14
-#define w_dbuf r15
-
-#define dst r0
-#define src r1
-#define bytes r2
-#define loopcount r5
-
-#define FUNCNAME raw_copy_from_user
-#include "copy_user_template.S"
-
-	/* LOAD FAULTS from COPY_FROM_USER */
-
-	/* Alignment loop.  r2 has been updated. Return it. */
-	.falign
-1009:
-2009:
-4009:
-	{
-		r0 = r2
-		jumpr r31
-	}
-	/* Normal copy loops. Do epilog. Use src-src_sav to compute distance */
-	/* X - (A - B) == X + B - A */
-	.falign
-8089:
-	{
-		memd(dst) = d_dbuf
-		r2 += sub(src_sav,src)
-	}
-	{
-		r0 = r2
-		jumpr r31
-	}
-	.falign
-4089:
-	{
-		memw(dst) = w_dbuf
-		r2 += sub(src_sav,src)
-	}
-	{
-		r0 = r2
-		jumpr r31
-	}
-	.falign
-2089:
-	{
-		memh(dst) = w_dbuf
-		r2 += sub(src_sav,src)
-	}
-	{
-		r0 = r2
-		jumpr r31
-	}
-	.falign
-1089:
-	{
-		memb(dst) = w_dbuf
-		r2 += sub(src_sav,src)
-	}
-	{
-		r0 = r2
-		jumpr r31
-	}
-
-	/* COPY FROM USER: only loads can fail */
-
-	.section __ex_table,"a"
-	.long 1000b,1009b
-	.long 2000b,2009b
-	.long 4000b,4009b
-	.long 8080b,8089b
-	.long 4080b,4089b
-	.long 2080b,2089b
-	.long 1080b,1089b
-	.previous
diff --git a/arch/hexagon/mm/copy_to_user.S b/arch/hexagon/mm/copy_to_user.S
deleted file mode 100644
index ed8e3cafb36e4219f9f6caa0768dbc21d11e282e..0000000000000000000000000000000000000000
--- a/arch/hexagon/mm/copy_to_user.S
+++ /dev/null
@@ -1,79 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * User memory copying routines for the Hexagon Kernel
- *
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-/* The right way to do this involves valignb
- * The easy way to do this is only speed up src/dest similar alignment.
- */
-
-/*
- * Copy to/from user are the same, except that for packets with a load and
- * a store, I don't know how to tell which kind of exception we got.
- * Therefore, we duplicate the function, and handle faulting addresses
- * differently for each function
- */
-
-/*
- * copy to user: stores can fault
- */
-#define src_sav r13
-#define dst_sav r12
-#define src_dst_sav r13:12
-#define d_dbuf r15:14
-#define w_dbuf r15
-
-#define dst r0
-#define src r1
-#define bytes r2
-#define loopcount r5
-
-#define FUNCNAME raw_copy_to_user
-#include "copy_user_template.S"
-
-	/* STORE FAULTS from COPY_TO_USER */
-	.falign
-1109:
-2109:
-4109:
-	/* Alignment loop.  r2 has been updated.  Return it. */
-	{
-		r0 = r2
-		jumpr r31
-	}
-	/* Normal copy loops.  Use dst-dst_sav to compute distance */
-	/* dst holds best write, no need to unwind any loops */
-	/* X - (A - B) == X + B - A */
-	.falign
-8189:
-8199:
-4189:
-4199:
-2189:
-2199:
-1189:
-1199:
-	{
-		r2 += sub(dst_sav,dst)
-	}
-	{
-		r0 = r2
-		jumpr r31
-	}
-
-	/* COPY TO USER: only stores can fail */
-	.section __ex_table,"a"
-	.long 1100b,1109b
-	.long 2100b,2109b
-	.long 4100b,4109b
-	.long 8180b,8189b
-	.long 8190b,8199b
-	.long 4180b,4189b
-	.long 4190b,4199b
-	.long 2180b,2189b
-	.long 2190b,2199b
-	.long 1180b,1189b
-	.long 1190b,1199b
-	.previous
diff --git a/arch/hexagon/mm/copy_user_template.S b/arch/hexagon/mm/copy_user_template.S
deleted file mode 100644
index d297df01b43f941fe28684b500332bc1dbe7cca8..0000000000000000000000000000000000000000
--- a/arch/hexagon/mm/copy_user_template.S
+++ /dev/null
@@ -1,172 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-/* Numerology:
- * WXYZ
- * W: width in bytes
- * X: Load=0, Store=1
- * Y: Location 0=preamble,8=loop,9=epilog
- * Z: Location=0,handler=9
- */
-	.text
-	.global FUNCNAME
-	.type FUNCNAME, @function
-	.p2align 5
-FUNCNAME:
-	{
-		p0 = cmp.gtu(bytes,#0)
-		if (!p0.new) jump:nt .Ldone
-		r3 = or(dst,src)
-		r4 = xor(dst,src)
-	}
-	{
-		p1 = cmp.gtu(bytes,#15)
-		p0 = bitsclr(r3,#7)
-		if (!p0.new) jump:nt .Loop_not_aligned_8
-		src_dst_sav = combine(src,dst)
-	}
-
-	{
-		loopcount = lsr(bytes,#3)
-		if (!p1) jump .Lsmall
-	}
-	p3=sp1loop0(.Loop8,loopcount)
-.Loop8:
-8080:
-8180:
-	{
-		if (p3) memd(dst++#8) = d_dbuf
-		d_dbuf = memd(src++#8)
-	}:endloop0
-8190:
-	{
-		memd(dst++#8) = d_dbuf
-		bytes -= asl(loopcount,#3)
-		jump .Lsmall
-	}
-
-.Loop_not_aligned_8:
-	{
-		p0 = bitsclr(r4,#7)
-		if (p0.new) jump:nt .Lalign
-	}
-	{
-		p0 = bitsclr(r3,#3)
-		if (!p0.new) jump:nt .Loop_not_aligned_4
-		p1 = cmp.gtu(bytes,#7)
-	}
-
-	{
-		if (!p1) jump .Lsmall
-		loopcount = lsr(bytes,#2)
-	}
-	p3=sp1loop0(.Loop4,loopcount)
-.Loop4:
-4080:
-4180:
-	{
-		if (p3) memw(dst++#4) = w_dbuf
-		w_dbuf = memw(src++#4)
-	}:endloop0
-4190:
-	{
-		memw(dst++#4) = w_dbuf
-		bytes -= asl(loopcount,#2)
-		jump .Lsmall
-	}
-
-.Loop_not_aligned_4:
-	{
-		p0 = bitsclr(r3,#1)
-		if (!p0.new) jump:nt .Loop_not_aligned
-		p1 = cmp.gtu(bytes,#3)
-	}
-
-	{
-		if (!p1) jump .Lsmall
-		loopcount = lsr(bytes,#1)
-	}
-	p3=sp1loop0(.Loop2,loopcount)
-.Loop2:
-2080:
-2180:
-	{
-		if (p3) memh(dst++#2) = w_dbuf
-		w_dbuf = memuh(src++#2)
-	}:endloop0
-2190:
-	{
-		memh(dst++#2) = w_dbuf
-		bytes -= asl(loopcount,#1)
-		jump .Lsmall
-	}
-
-.Loop_not_aligned: /* Works for as small as one byte */
-	p3=sp1loop0(.Loop1,bytes)
-.Loop1:
-1080:
-1180:
-	{
-		if (p3) memb(dst++#1) = w_dbuf
-		w_dbuf = memub(src++#1)
-	}:endloop0
-	/* Done */
-1190:
-	{
-		memb(dst) = w_dbuf
-		jumpr r31
-		r0 = #0
-	}
-
-.Lsmall:
-	{
-		p0 = cmp.gtu(bytes,#0)
-		if (p0.new) jump:nt .Loop_not_aligned
-	}
-.Ldone:
-	{
-		r0 = #0
-		jumpr r31
-	}
-	.falign
-.Lalign:
-1000:
-	{
-		if (p0.new) w_dbuf = memub(src)
-		p0 = tstbit(src,#0)
-		if (!p1) jump .Lsmall
-	}
-1100:
-	{
-		if (p0) memb(dst++#1) = w_dbuf
-		if (p0) bytes = add(bytes,#-1)
-		if (p0) src = add(src,#1)
-	}
-2000:
-	{
-		if (p0.new) w_dbuf = memuh(src)
-		p0 = tstbit(src,#1)
-		if (!p1) jump .Lsmall
-	}
-2100:
-	{
-		if (p0) memh(dst++#2) = w_dbuf
-		if (p0) bytes = add(bytes,#-2)
-		if (p0) src = add(src,#2)
-	}
-4000:
-	{
-		if (p0.new) w_dbuf = memw(src)
-		p0 = tstbit(src,#2)
-		if (!p1) jump .Lsmall
-	}
-4100:
-	{
-		if (p0) memw(dst++#4) = w_dbuf
-		if (p0) bytes = add(bytes,#-4)
-		if (p0) src = add(src,#4)
-		jump FUNCNAME
-	}
-	.size FUNCNAME,.-FUNCNAME
diff --git a/arch/hexagon/mm/strnlen_user.S b/arch/hexagon/mm/strnlen_user.S
deleted file mode 100644
index 4b5574a7cc9cfac19eb953606db48147fae10e8c..0000000000000000000000000000000000000000
--- a/arch/hexagon/mm/strnlen_user.S
+++ /dev/null
@@ -1,126 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * User string length functions for kernel
- *
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-#define isrc	r0
-#define max	r1	/*  Do not change!  */
-
-#define end	r2
-#define tmp1	r3
-
-#define obo	r6	/*  off-by-one  */
-#define start	r7
-#define mod8	r8
-#define dbuf    r15:14
-#define dcmp	r13:12
-
-/*
- * The vector mask version of this turned out *really* badly.
- * The hardware loop version also turned out *really* badly.
- * Seems straight pointer arithmetic basically wins here.
- */
-
-#define fname __strnlen_user
-
-	.text
-	.global fname
-	.type fname, @function
-	.p2align 5  /*  why?  */
-fname:
-	{
-		mod8 = and(isrc,#7);
-		end = add(isrc,max);
-		start = isrc;
-	}
-	{
-		P0 = cmp.eq(mod8,#0);
-		mod8 = and(end,#7);
-		dcmp = #0;
-		if (P0.new) jump:t dw_loop;	/*  fire up the oven  */
-	}
-
-alignment_loop:
-fail_1:	{
-		tmp1 = memb(start++#1);
-	}
-	{
-		P0 = cmp.eq(tmp1,#0);
-		if (P0.new) jump:nt exit_found;
-		P1 = cmp.gtu(end,start);
-		mod8 = and(start,#7);
-	}
-	{
-		if (!P1) jump exit_error;  /*  hit the end  */
-		P0 = cmp.eq(mod8,#0);
-	}
-	{
-		if (!P0) jump alignment_loop;
-	}
-
-
-
-dw_loop:
-fail_2:	{
-		dbuf = memd(start);
-		obo = add(start,#1);
-	}
-	{
-		P0 = vcmpb.eq(dbuf,dcmp);
-	}
-	{
-		tmp1 = P0;
-		P0 = cmp.gtu(end,start);
-	}
-	{
-		tmp1 = ct0(tmp1);
-		mod8 = and(end,#7);
-		if (!P0) jump end_check;
-	}
-	{
-		P0 = cmp.eq(tmp1,#32);
-		if (!P0.new) jump:nt exit_found;
-		if (!P0.new) start = add(obo,tmp1);
-	}
-	{
-		start = add(start,#8);
-		jump dw_loop;
-	}	/*  might be nice to combine these jumps...   */
-
-
-end_check:
-	{
-		P0 = cmp.gt(tmp1,mod8);
-		if (P0.new) jump:nt exit_error;	/*  neverfound!  */
-		start = add(obo,tmp1);
-	}
-
-exit_found:
-	{
-		R0 = sub(start,isrc);
-		jumpr R31;
-	}
-
-exit_error:
-	{
-		R0 = add(max,#1);
-		jumpr R31;
-	}
-
-	/*  Uh, what does the "fixup" return here?  */
-	.falign
-fix_1:
-	{
-		R0 = #0;
-		jumpr R31;
-	}
-
-	.size fname,.-fname
-
-
-.section __ex_table,"a"
-.long fail_1,fix_1
-.long fail_2,fix_1
-.previous
diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S
deleted file mode 100644
index 58233bb7976dfa46651f80ef2aaa67df9f8627b4..0000000000000000000000000000000000000000
--- a/arch/ia64/kernel/efi_stub.S
+++ /dev/null
@@ -1,87 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * EFI call stub.
- *
- * Copyright (C) 1999-2001 Hewlett-Packard Co
- *	David Mosberger <davidm@hpl.hp.com>
- *
- * This stub allows us to make EFI calls in physical mode with interrupts
- * turned off.  We need this because we can't call SetVirtualMap() until
- * the kernel has booted far enough to allow allocation of struct vma_struct
- * entries (which we would need to map stuff with memory attributes other
- * than uncached or writeback...).  Since the GetTime() service gets called
- * earlier than that, we need to be able to make physical mode EFI calls from
- * the kernel.
- */
-
-/*
- * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
- * Abstraction Layer Specification", revision 2.6e).  Note that
- * psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
- * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
- * (the br.ia instruction fails unless psr.dfl and psr.dfh are
- * cleared).  Fortunately, SAL promises not to touch the floating
- * point regs, so at least we don't have to save f2-f127.
- */
-#define PSR_BITS_TO_CLEAR						\
-	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT |		\
-	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |	\
-	 IA64_PSR_DFL | IA64_PSR_DFH)
-
-#define PSR_BITS_TO_SET							\
-	(IA64_PSR_BN)
-
-#include <asm/processor.h>
-#include <asm/asmmacro.h>
-
-/*
- * Inputs:
- *	in0 = address of function descriptor of EFI routine to call
- *	in1..in7 = arguments to routine
- *
- * Outputs:
- *	r8 = EFI_STATUS returned by called function
- */
-
-GLOBAL_ENTRY(efi_call_phys)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,7,7,0
-	ld8 r2=[in0],8			// load EFI function's entry point
-	mov loc0=rp
-	.body
-	;;
-	mov loc2=gp			// save global pointer
-	mov loc4=ar.rsc			// save RSE configuration
-	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
-	;;
-	ld8 gp=[in0]			// load EFI function's global pointer
-	movl r16=PSR_BITS_TO_CLEAR
-	mov loc3=psr			// save processor status word
-	movl r17=PSR_BITS_TO_SET
-	;;
-	or loc3=loc3,r17
-	mov b6=r2
-	;;
-	andcm r16=loc3,r16	// get psr with IT, DT, and RT bits cleared
-	br.call.sptk.many rp=ia64_switch_mode_phys
-.ret0:	mov out4=in5
-	mov out0=in1
-	mov out1=in2
-	mov out2=in3
-	mov out3=in4
-	mov out5=in6
-	mov out6=in7
-	mov loc5=r19
-	mov loc6=r20
-	br.call.sptk.many rp=b6		// call the EFI function
-.ret1:	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
-	mov r16=loc3
-	mov r19=loc5
-	mov r20=loc6
-	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
-.ret2:	mov ar.rsc=loc4			// restore RSE configuration
-	mov ar.pfs=loc1
-	mov rp=loc0
-	mov gp=loc2
-	br.ret.sptk.many rp
-END(efi_call_phys)
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
deleted file mode 100644
index a9992be5718b88a9e7e31e00db188fb03baeadfe..0000000000000000000000000000000000000000
--- a/arch/ia64/kernel/entry.S
+++ /dev/null
@@ -1,1435 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/ia64/kernel/entry.S
- *
- * Kernel entry points.
- *
- * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- * Copyright (C) 1999, 2002-2003
- *	Asit Mallick <Asit.K.Mallick@intel.com>
- * 	Don Dugger <Don.Dugger@intel.com>
- *	Suresh Siddha <suresh.b.siddha@intel.com>
- *	Fenghua Yu <fenghua.yu@intel.com>
- * Copyright (C) 1999 VA Linux Systems
- * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
- */
-/*
- * ia64_switch_to now places correct virtual mapping in in TR2 for
- * kernel stack. This allows us to handle interrupts without changing
- * to physical mode.
- *
- * Jonathan Nicklin	<nicklin@missioncriticallinux.com>
- * Patrick O'Rourke	<orourke@missioncriticallinux.com>
- * 11/07/2000
- */
-/*
- * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *                    pv_ops.
- */
-/*
- * Global (preserved) predicate usage on syscall entry/exit path:
- *
- *	pKStk:		See entry.h.
- *	pUStk:		See entry.h.
- *	pSys:		See entry.h.
- *	pNonSys:	!pSys
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
-#include <asm/kregs.h>
-#include <asm/asm-offsets.h>
-#include <asm/pgtable.h>
-#include <asm/percpu.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-#include <asm/ftrace.h>
-#include <asm/export.h>
-
-#include "minstate.h"
-
-	/*
-	 * execve() is special because in case of success, we need to
-	 * setup a null register window frame.
-	 */
-ENTRY(ia64_execve)
-	/*
-	 * Allocate 8 input registers since ptrace() may clobber them
-	 */
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,3,0
-	mov loc0=rp
-	.body
-	mov out0=in0			// filename
-	;;				// stop bit between alloc and call
-	mov out1=in1			// argv
-	mov out2=in2			// envp
-	br.call.sptk.many rp=sys_execve
-.ret0:
-	cmp4.ge p6,p7=r8,r0
-	mov ar.pfs=loc1			// restore ar.pfs
-	sxt4 r8=r8			// return 64-bit result
-	;;
-	stf.spill [sp]=f0
-	mov rp=loc0
-(p6)	mov ar.pfs=r0			// clear ar.pfs on success
-(p7)	br.ret.sptk.many rp
-
-	/*
-	 * In theory, we'd have to zap this state only to prevent leaking of
-	 * security sensitive state (e.g., if current->mm->dumpable is zero).  However,
-	 * this executes in less than 20 cycles even on Itanium, so it's not worth
-	 * optimizing for...).
-	 */
-	mov ar.unat=0; 		mov ar.lc=0
-	mov r4=0;		mov f2=f0;		mov b1=r0
-	mov r5=0;		mov f3=f0;		mov b2=r0
-	mov r6=0;		mov f4=f0;		mov b3=r0
-	mov r7=0;		mov f5=f0;		mov b4=r0
-	ldf.fill f12=[sp];	mov f13=f0;		mov b5=r0
-	ldf.fill f14=[sp];	ldf.fill f15=[sp];	mov f16=f0
-	ldf.fill f17=[sp];	ldf.fill f18=[sp];	mov f19=f0
-	ldf.fill f20=[sp];	ldf.fill f21=[sp];	mov f22=f0
-	ldf.fill f23=[sp];	ldf.fill f24=[sp];	mov f25=f0
-	ldf.fill f26=[sp];	ldf.fill f27=[sp];	mov f28=f0
-	ldf.fill f29=[sp];	ldf.fill f30=[sp];	mov f31=f0
-	br.ret.sptk.many rp
-END(ia64_execve)
-
-/*
- * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 parent_tidptr, u64 child_tidptr,
- *	      u64 tls)
- */
-GLOBAL_ENTRY(sys_clone2)
-	/*
-	 * Allocate 8 input registers since ptrace() may clobber them
-	 */
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc r16=ar.pfs,8,2,6,0
-	DO_SAVE_SWITCH_STACK
-	adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
-	mov loc0=rp
-	mov loc1=r16				// save ar.pfs across do_fork
-	.body
-	mov out1=in1
-	mov out2=in2
-	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
-	mov out3=in3	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
-	;;
-(p6)	st8 [r2]=in5				// store TLS in r16 for copy_thread()
-	mov out4=in4	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
-	mov out0=in0				// out0 = clone_flags
-	br.call.sptk.many rp=do_fork
-.ret1:	.restore sp
-	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
-	mov ar.pfs=loc1
-	mov rp=loc0
-	br.ret.sptk.many rp
-END(sys_clone2)
-
-/*
- * sys_clone(u64 flags, u64 ustack_base, u64 parent_tidptr, u64 child_tidptr, u64 tls)
- *	Deprecated.  Use sys_clone2() instead.
- */
-GLOBAL_ENTRY(sys_clone)
-	/*
-	 * Allocate 8 input registers since ptrace() may clobber them
-	 */
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc r16=ar.pfs,8,2,6,0
-	DO_SAVE_SWITCH_STACK
-	adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
-	mov loc0=rp
-	mov loc1=r16				// save ar.pfs across do_fork
-	.body
-	mov out1=in1
-	mov out2=16				// stacksize (compensates for 16-byte scratch area)
-	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
-	mov out3=in2	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
-	;;
-(p6)	st8 [r2]=in4				// store TLS in r13 (tp)
-	mov out4=in3	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
-	mov out0=in0				// out0 = clone_flags
-	br.call.sptk.many rp=do_fork
-.ret2:	.restore sp
-	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
-	mov ar.pfs=loc1
-	mov rp=loc0
-	br.ret.sptk.many rp
-END(sys_clone)
-
-/*
- * prev_task <- ia64_switch_to(struct task_struct *next)
- *	With Ingo's new scheduler, interrupts are disabled when this routine gets
- *	called.  The code starting at .map relies on this.  The rest of the code
- *	doesn't care about the interrupt masking status.
- */
-GLOBAL_ENTRY(ia64_switch_to)
-	.prologue
-	alloc r16=ar.pfs,1,0,0,0
-	DO_SAVE_SWITCH_STACK
-	.body
-
-	adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
-	movl r25=init_task
-	mov r27=IA64_KR(CURRENT_STACK)
-	adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
-	dep r20=0,in0,61,3		// physical address of "next"
-	;;
-	st8 [r22]=sp			// save kernel stack pointer of old task
-	shr.u r26=r20,IA64_GRANULE_SHIFT
-	cmp.eq p7,p6=r25,in0
-	;;
-	/*
-	 * If we've already mapped this task's page, we can skip doing it again.
-	 */
-(p6)	cmp.eq p7,p6=r26,r27
-(p6)	br.cond.dpnt .map
-	;;
-.done:
-	ld8 sp=[r21]			// load kernel stack pointer of new task
-	MOV_TO_KR(CURRENT, in0, r8, r9)		// update "current" application register
-	mov r8=r13			// return pointer to previously running task
-	mov r13=in0			// set "current" pointer
-	;;
-	DO_LOAD_SWITCH_STACK
-
-#ifdef CONFIG_SMP
-	sync.i				// ensure "fc"s done by this CPU are visible on other CPUs
-#endif
-	br.ret.sptk.many rp		// boogie on out in new context
-
-.map:
-	RSM_PSR_IC(r25)			// interrupts (psr.i) are already disabled here
-	movl r25=PAGE_KERNEL
-	;;
-	srlz.d
-	or r23=r25,r20			// construct PA | page properties
-	mov r25=IA64_GRANULE_SHIFT<<2
-	;;
-	MOV_TO_ITIR(p0, r25, r8)
-	MOV_TO_IFA(in0, r8)		// VA of next task...
-	;;
-	mov r25=IA64_TR_CURRENT_STACK
-	MOV_TO_KR(CURRENT_STACK, r26, r8, r9)	// remember last page we mapped...
-	;;
-	itr.d dtr[r25]=r23		// wire in new mapping...
-	SSM_PSR_IC_AND_SRLZ_D(r8, r9)	// reenable the psr.ic bit
-	br.cond.sptk .done
-END(ia64_switch_to)
-
-/*
- * Note that interrupts are enabled during save_switch_stack and load_switch_stack.  This
- * means that we may get an interrupt with "sp" pointing to the new kernel stack while
- * ar.bspstore is still pointing to the old kernel backing store area.  Since ar.rsc,
- * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, this is not a
- * problem.  Also, we don't need to specify unwind information for preserved registers
- * that are not modified in save_switch_stack as the right unwind information is already
- * specified at the call-site of save_switch_stack.
- */
-
-/*
- * save_switch_stack:
- *	- r16 holds ar.pfs
- *	- b7 holds address to return to
- *	- rp (b0) holds return address to save
- */
-GLOBAL_ENTRY(save_switch_stack)
-	.prologue
-	.altrp b7
-	flushrs			// flush dirty regs to backing store (must be first in insn group)
-	.save @priunat,r17
-	mov r17=ar.unat		// preserve caller's
-	.body
-#ifdef CONFIG_ITANIUM
-	adds r2=16+128,sp
-	adds r3=16+64,sp
-	adds r14=SW(R4)+16,sp
-	;;
-	st8.spill [r14]=r4,16		// spill r4
-	lfetch.fault.excl.nt1 [r3],128
-	;;
-	lfetch.fault.excl.nt1 [r2],128
-	lfetch.fault.excl.nt1 [r3],128
-	;;
-	lfetch.fault.excl [r2]
-	lfetch.fault.excl [r3]
-	adds r15=SW(R5)+16,sp
-#else
-	add r2=16+3*128,sp
-	add r3=16,sp
-	add r14=SW(R4)+16,sp
-	;;
-	st8.spill [r14]=r4,SW(R6)-SW(R4)	// spill r4 and prefetch offset 0x1c0
-	lfetch.fault.excl.nt1 [r3],128	//		prefetch offset 0x010
-	;;
-	lfetch.fault.excl.nt1 [r3],128	//		prefetch offset 0x090
-	lfetch.fault.excl.nt1 [r2],128	//		prefetch offset 0x190
-	;;
-	lfetch.fault.excl.nt1 [r3]	//		prefetch offset 0x110
-	lfetch.fault.excl.nt1 [r2]	//		prefetch offset 0x210
-	adds r15=SW(R5)+16,sp
-#endif
-	;;
-	st8.spill [r15]=r5,SW(R7)-SW(R5)	// spill r5
-	mov.m ar.rsc=0			// put RSE in mode: enforced lazy, little endian, pl 0
-	add r2=SW(F2)+16,sp		// r2 = &sw->f2
-	;;
-	st8.spill [r14]=r6,SW(B0)-SW(R6)	// spill r6
-	mov.m r18=ar.fpsr		// preserve fpsr
-	add r3=SW(F3)+16,sp		// r3 = &sw->f3
-	;;
-	stf.spill [r2]=f2,32
-	mov.m r19=ar.rnat
-	mov r21=b0
-
-	stf.spill [r3]=f3,32
-	st8.spill [r15]=r7,SW(B2)-SW(R7)	// spill r7
-	mov r22=b1
-	;;
-	// since we're done with the spills, read and save ar.unat:
-	mov.m r29=ar.unat
-	mov.m r20=ar.bspstore
-	mov r23=b2
-	stf.spill [r2]=f4,32
-	stf.spill [r3]=f5,32
-	mov r24=b3
-	;;
-	st8 [r14]=r21,SW(B1)-SW(B0)		// save b0
-	st8 [r15]=r23,SW(B3)-SW(B2)		// save b2
-	mov r25=b4
-	mov r26=b5
-	;;
-	st8 [r14]=r22,SW(B4)-SW(B1)		// save b1
-	st8 [r15]=r24,SW(AR_PFS)-SW(B3)		// save b3
-	mov r21=ar.lc		// I-unit
-	stf.spill [r2]=f12,32
-	stf.spill [r3]=f13,32
-	;;
-	st8 [r14]=r25,SW(B5)-SW(B4)		// save b4
-	st8 [r15]=r16,SW(AR_LC)-SW(AR_PFS)	// save ar.pfs
-	stf.spill [r2]=f14,32
-	stf.spill [r3]=f15,32
-	;;
-	st8 [r14]=r26				// save b5
-	st8 [r15]=r21				// save ar.lc
-	stf.spill [r2]=f16,32
-	stf.spill [r3]=f17,32
-	;;
-	stf.spill [r2]=f18,32
-	stf.spill [r3]=f19,32
-	;;
-	stf.spill [r2]=f20,32
-	stf.spill [r3]=f21,32
-	;;
-	stf.spill [r2]=f22,32
-	stf.spill [r3]=f23,32
-	;;
-	stf.spill [r2]=f24,32
-	stf.spill [r3]=f25,32
-	;;
-	stf.spill [r2]=f26,32
-	stf.spill [r3]=f27,32
-	;;
-	stf.spill [r2]=f28,32
-	stf.spill [r3]=f29,32
-	;;
-	stf.spill [r2]=f30,SW(AR_UNAT)-SW(F30)
-	stf.spill [r3]=f31,SW(PR)-SW(F31)
-	add r14=SW(CALLER_UNAT)+16,sp
-	;;
-	st8 [r2]=r29,SW(AR_RNAT)-SW(AR_UNAT)	// save ar.unat
-	st8 [r14]=r17,SW(AR_FPSR)-SW(CALLER_UNAT) // save caller_unat
-	mov r21=pr
-	;;
-	st8 [r2]=r19,SW(AR_BSPSTORE)-SW(AR_RNAT) // save ar.rnat
-	st8 [r3]=r21				// save predicate registers
-	;;
-	st8 [r2]=r20				// save ar.bspstore
-	st8 [r14]=r18				// save fpsr
-	mov ar.rsc=3		// put RSE back into eager mode, pl 0
-	br.cond.sptk.many b7
-END(save_switch_stack)
-
-/*
- * load_switch_stack:
- *	- "invala" MUST be done at call site (normally in DO_LOAD_SWITCH_STACK)
- *	- b7 holds address to return to
- *	- must not touch r8-r11
- */
-GLOBAL_ENTRY(load_switch_stack)
-	.prologue
-	.altrp b7
-
-	.body
-	lfetch.fault.nt1 [sp]
-	adds r2=SW(AR_BSPSTORE)+16,sp
-	adds r3=SW(AR_UNAT)+16,sp
-	mov ar.rsc=0						// put RSE into enforced lazy mode
-	adds r14=SW(CALLER_UNAT)+16,sp
-	adds r15=SW(AR_FPSR)+16,sp
-	;;
-	ld8 r27=[r2],(SW(B0)-SW(AR_BSPSTORE))	// bspstore
-	ld8 r29=[r3],(SW(B1)-SW(AR_UNAT))	// unat
-	;;
-	ld8 r21=[r2],16		// restore b0
-	ld8 r22=[r3],16		// restore b1
-	;;
-	ld8 r23=[r2],16		// restore b2
-	ld8 r24=[r3],16		// restore b3
-	;;
-	ld8 r25=[r2],16		// restore b4
-	ld8 r26=[r3],16		// restore b5
-	;;
-	ld8 r16=[r2],(SW(PR)-SW(AR_PFS))	// ar.pfs
-	ld8 r17=[r3],(SW(AR_RNAT)-SW(AR_LC))	// ar.lc
-	;;
-	ld8 r28=[r2]		// restore pr
-	ld8 r30=[r3]		// restore rnat
-	;;
-	ld8 r18=[r14],16	// restore caller's unat
-	ld8 r19=[r15],24	// restore fpsr
-	;;
-	ldf.fill f2=[r14],32
-	ldf.fill f3=[r15],32
-	;;
-	ldf.fill f4=[r14],32
-	ldf.fill f5=[r15],32
-	;;
-	ldf.fill f12=[r14],32
-	ldf.fill f13=[r15],32
-	;;
-	ldf.fill f14=[r14],32
-	ldf.fill f15=[r15],32
-	;;
-	ldf.fill f16=[r14],32
-	ldf.fill f17=[r15],32
-	;;
-	ldf.fill f18=[r14],32
-	ldf.fill f19=[r15],32
-	mov b0=r21
-	;;
-	ldf.fill f20=[r14],32
-	ldf.fill f21=[r15],32
-	mov b1=r22
-	;;
-	ldf.fill f22=[r14],32
-	ldf.fill f23=[r15],32
-	mov b2=r23
-	;;
-	mov ar.bspstore=r27
-	mov ar.unat=r29		// establish unat holding the NaT bits for r4-r7
-	mov b3=r24
-	;;
-	ldf.fill f24=[r14],32
-	ldf.fill f25=[r15],32
-	mov b4=r25
-	;;
-	ldf.fill f26=[r14],32
-	ldf.fill f27=[r15],32
-	mov b5=r26
-	;;
-	ldf.fill f28=[r14],32
-	ldf.fill f29=[r15],32
-	mov ar.pfs=r16
-	;;
-	ldf.fill f30=[r14],32
-	ldf.fill f31=[r15],24
-	mov ar.lc=r17
-	;;
-	ld8.fill r4=[r14],16
-	ld8.fill r5=[r15],16
-	mov pr=r28,-1
-	;;
-	ld8.fill r6=[r14],16
-	ld8.fill r7=[r15],16
-
-	mov ar.unat=r18				// restore caller's unat
-	mov ar.rnat=r30				// must restore after bspstore but before rsc!
-	mov ar.fpsr=r19				// restore fpsr
-	mov ar.rsc=3				// put RSE back into eager mode, pl 0
-	br.cond.sptk.many b7
-END(load_switch_stack)
-
-	/*
-	 * Invoke a system call, but do some tracing before and after the call.
-	 * We MUST preserve the current register frame throughout this routine
-	 * because some system calls (such as ia64_execve) directly
-	 * manipulate ar.pfs.
-	 */
-GLOBAL_ENTRY(ia64_trace_syscall)
-	PT_REGS_UNWIND_INFO(0)
-	/*
-	 * We need to preserve the scratch registers f6-f11 in case the system
-	 * call is sigreturn.
-	 */
-	adds r16=PT(F6)+16,sp
-	adds r17=PT(F7)+16,sp
-	;;
- 	stf.spill [r16]=f6,32
- 	stf.spill [r17]=f7,32
-	;;
- 	stf.spill [r16]=f8,32
- 	stf.spill [r17]=f9,32
-	;;
- 	stf.spill [r16]=f10
- 	stf.spill [r17]=f11
-	br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args
-	cmp.lt p6,p0=r8,r0			// check tracehook
-	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
-	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
-	mov r10=0
-(p6)	br.cond.sptk strace_error		// syscall failed ->
-	adds r16=PT(F6)+16,sp
-	adds r17=PT(F7)+16,sp
-	;;
-	ldf.fill f6=[r16],32
-	ldf.fill f7=[r17],32
-	;;
-	ldf.fill f8=[r16],32
-	ldf.fill f9=[r17],32
-	;;
-	ldf.fill f10=[r16]
-	ldf.fill f11=[r17]
-	// the syscall number may have changed, so re-load it and re-calculate the
-	// syscall entry-point:
-	adds r15=PT(R15)+16,sp			// r15 = &pt_regs.r15 (syscall #)
-	;;
-	ld8 r15=[r15]
-	mov r3=NR_syscalls - 1
-	;;
-	adds r15=-1024,r15
-	movl r16=sys_call_table
-	;;
-	shladd r20=r15,3,r16			// r20 = sys_call_table + 8*(syscall-1024)
-	cmp.leu p6,p7=r15,r3
-	;;
-(p6)	ld8 r20=[r20]				// load address of syscall entry point
-(p7)	movl r20=sys_ni_syscall
-	;;
-	mov b6=r20
-	br.call.sptk.many rp=b6			// do the syscall
-.strace_check_retval:
-	cmp.lt p6,p0=r8,r0			// syscall failed?
-	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
-	adds r3=PT(R10)+16,sp			// r3 = &pt_regs.r10
-	mov r10=0
-(p6)	br.cond.sptk strace_error		// syscall failed ->
-	;;					// avoid RAW on r10
-.strace_save_retval:
-.mem.offset 0,0; st8.spill [r2]=r8		// store return value in slot for r8
-.mem.offset 8,0; st8.spill [r3]=r10		// clear error indication in slot for r10
-	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
-.ret3:
-(pUStk)	cmp.eq.unc p6,p0=r0,r0			// p6 <- pUStk
-(pUStk)	rsm psr.i				// disable interrupts
-	br.cond.sptk ia64_work_pending_syscall_end
-
-strace_error:
-	ld8 r3=[r2]				// load pt_regs.r8
-	sub r9=0,r8				// negate return value to get errno value
-	;;
-	cmp.ne p6,p0=r3,r0			// is pt_regs.r8!=0?
-	adds r3=16,r2				// r3=&pt_regs.r10
-	;;
-(p6)	mov r10=-1
-(p6)	mov r8=r9
-	br.cond.sptk .strace_save_retval
-END(ia64_trace_syscall)
-
-	/*
-	 * When traced and returning from sigreturn, we invoke syscall_trace but then
-	 * go straight to ia64_leave_kernel rather than ia64_leave_syscall.
-	 */
-GLOBAL_ENTRY(ia64_strace_leave_kernel)
-	PT_REGS_UNWIND_INFO(0)
-{	/*
-	 * Some versions of gas generate bad unwind info if the first instruction of a
-	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
-	 */
-	nop.m 0
-	nop.i 0
-	br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
-}
-.ret4:	br.cond.sptk ia64_leave_kernel
-END(ia64_strace_leave_kernel)
-
-ENTRY(call_payload)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(0)
-	/* call the kernel_thread payload; fn is in r4, arg - in r5 */
-	alloc loc1=ar.pfs,0,3,1,0
-	mov loc0=rp
-	mov loc2=gp
-	mov out0=r5		// arg
-	ld8 r14 = [r4], 8	// fn.address
-	;;
-	mov b6 = r14
-	ld8 gp = [r4]		// fn.gp
-	;;
-	br.call.sptk.many rp=b6	// fn(arg)
-.ret12:	mov gp=loc2
-	mov rp=loc0
-	mov ar.pfs=loc1
-	/* ... and if it has returned, we are going to userland */
-	cmp.ne pKStk,pUStk=r0,r0
-	br.ret.sptk.many rp
-END(call_payload)
-
-GLOBAL_ENTRY(ia64_ret_from_clone)
-	PT_REGS_UNWIND_INFO(0)
-{	/*
-	 * Some versions of gas generate bad unwind info if the first instruction of a
-	 * procedure doesn't go into the first slot of a bundle.  This is a workaround.
-	 */
-	nop.m 0
-	nop.i 0
-	/*
-	 * We need to call schedule_tail() to complete the scheduling process.
-	 * Called by ia64_switch_to() after do_fork()->copy_thread().  r8 contains the
-	 * address of the previously executing task.
-	 */
-	br.call.sptk.many rp=ia64_invoke_schedule_tail
-}
-.ret8:
-(pKStk)	br.call.sptk.many rp=call_payload
-	adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
-	;;
-	ld4 r2=[r2]
-	;;
-	mov r8=0
-	and r2=_TIF_SYSCALL_TRACEAUDIT,r2
-	;;
-	cmp.ne p6,p0=r2,r0
-(p6)	br.cond.spnt .strace_check_retval
-	;;					// added stop bits to prevent r8 dependency
-END(ia64_ret_from_clone)
-	// fall through
-GLOBAL_ENTRY(ia64_ret_from_syscall)
-	PT_REGS_UNWIND_INFO(0)
-	cmp.ge p6,p7=r8,r0			// syscall executed successfully?
-	adds r2=PT(R8)+16,sp			// r2 = &pt_regs.r8
-	mov r10=r0				// clear error indication in r10
-(p7)	br.cond.spnt handle_syscall_error	// handle potential syscall failure
-END(ia64_ret_from_syscall)
-	// fall through
-
-/*
- * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
- *	need to switch to bank 0 and doesn't restore the scratch registers.
- *	To avoid leaking kernel bits, the scratch registers are set to
- *	the following known-to-be-safe values:
- *
- *		  r1: restored (global pointer)
- *		  r2: cleared
- *		  r3: 1 (when returning to user-level)
- *	      r8-r11: restored (syscall return value(s))
- *		 r12: restored (user-level stack pointer)
- *		 r13: restored (user-level thread pointer)
- *		 r14: set to __kernel_syscall_via_epc
- *		 r15: restored (syscall #)
- *	     r16-r17: cleared
- *		 r18: user-level b6
- *		 r19: cleared
- *		 r20: user-level ar.fpsr
- *		 r21: user-level b0
- *		 r22: cleared
- *		 r23: user-level ar.bspstore
- *		 r24: user-level ar.rnat
- *		 r25: user-level ar.unat
- *		 r26: user-level ar.pfs
- *		 r27: user-level ar.rsc
- *		 r28: user-level ip
- *		 r29: user-level psr
- *		 r30: user-level cfm
- *		 r31: user-level pr
- *	      f6-f11: cleared
- *		  pr: restored (user-level pr)
- *		  b0: restored (user-level rp)
- *	          b6: restored
- *		  b7: set to __kernel_syscall_via_epc
- *	     ar.unat: restored (user-level ar.unat)
- *	      ar.pfs: restored (user-level ar.pfs)
- *	      ar.rsc: restored (user-level ar.rsc)
- *	     ar.rnat: restored (user-level ar.rnat)
- *	 ar.bspstore: restored (user-level ar.bspstore)
- *	     ar.fpsr: restored (user-level ar.fpsr)
- *	      ar.ccv: cleared
- *	      ar.csd: cleared
- *	      ar.ssd: cleared
- */
-GLOBAL_ENTRY(ia64_leave_syscall)
-	PT_REGS_UNWIND_INFO(0)
-	/*
-	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
-	 * user- or fsys-mode, hence we disable interrupts early on.
-	 *
-	 * p6 controls whether current_thread_info()->flags needs to be check for
-	 * extra work.  We always check for extra work when returning to user-level.
-	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
-	 * is 0.  After extra work processing has been completed, execution
-	 * resumes at ia64_work_processed_syscall with p6 set to 1 if the extra-work-check
-	 * needs to be redone.
-	 */
-#ifdef CONFIG_PREEMPT
-	RSM_PSR_I(p0, r2, r18)			// disable interrupts
-	cmp.eq pLvSys,p0=r0,r0			// pLvSys=1: leave from syscall
-(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
-	;;
-	.pred.rel.mutex pUStk,pKStk
-(pKStk) ld4 r21=[r20]			// r21 <- preempt_count
-(pUStk)	mov r21=0			// r21 <- 0
-	;;
-	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
-#else /* !CONFIG_PREEMPT */
-	RSM_PSR_I(pUStk, r2, r18)
-	cmp.eq pLvSys,p0=r0,r0		// pLvSys=1: leave from syscall
-(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
-#endif
-.global ia64_work_processed_syscall;
-ia64_work_processed_syscall:
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	adds r2=PT(LOADRS)+16,r12
-	MOV_FROM_ITC(pUStk, p9, r22, r19)	// fetch time at leave
-	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
-	;;
-(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
-	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
-	adds r3=PT(AR_BSPSTORE)+16,r12		// deferred
-	;;
-#else
-	adds r2=PT(LOADRS)+16,r12
-	adds r3=PT(AR_BSPSTORE)+16,r12
-	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
-	;;
-(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
-	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
-	nop.i 0
-	;;
-#endif
-	mov r16=ar.bsp				// M2  get existing backing store pointer
-	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
-(p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
-	;;
-	ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
-(p6)	cmp4.ne.unc p6,p0=r15, r0		// any special work pending?
-(p6)	br.cond.spnt .work_pending_syscall
-	;;
-	// start restoring the state saved on the kernel stack (struct pt_regs):
-	ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
-	ld8 r11=[r3],PT(CR_IIP)-PT(R11)
-(pNonSys) break 0		//      bug check: we shouldn't be here if pNonSys is TRUE!
-	;;
-	invala			// M0|1 invalidate ALAT
-	RSM_PSR_I_IC(r28, r29, r30)	// M2   turn off interrupts and interruption collection
-	cmp.eq p9,p0=r0,r0	// A    set p9 to indicate that we should restore cr.ifs
-
-	ld8 r29=[r2],16		// M0|1 load cr.ipsr
-	ld8 r28=[r3],16		// M0|1 load cr.iip
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
-	;;
-	ld8 r30=[r2],16		// M0|1 load cr.ifs
-	ld8 r25=[r3],16		// M0|1 load ar.unat
-(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
-	;;
-#else
-	mov r22=r0		// A    clear r22
-	;;
-	ld8 r30=[r2],16		// M0|1 load cr.ifs
-	ld8 r25=[r3],16		// M0|1 load ar.unat
-(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
-	;;
-#endif
-	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
-	MOV_FROM_PSR(pKStk, r22, r21)	// M2   read PSR now that interrupts are disabled
-	nop 0
-	;;
-	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
-	ld8 r27=[r3],PT(PR)-PT(AR_RSC)	// M0|1 load ar.rsc
-	mov f6=f0			// F    clear f6
-	;;
-	ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)	// M0|1 load ar.rnat (may be garbage)
-	ld8 r31=[r3],PT(R1)-PT(PR)		// M0|1 load predicates
-	mov f7=f0				// F    clear f7
-	;;
-	ld8 r20=[r2],PT(R12)-PT(AR_FPSR)	// M0|1 load ar.fpsr
-	ld8.fill r1=[r3],16			// M0|1 load r1
-(pUStk) mov r17=1				// A
-	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-(pUStk) st1 [r15]=r17				// M2|3
-#else
-(pUStk) st1 [r14]=r17				// M2|3
-#endif
-	ld8.fill r13=[r3],16			// M0|1
-	mov f8=f0				// F    clear f8
-	;;
-	ld8.fill r12=[r2]			// M0|1 restore r12 (sp)
-	ld8.fill r15=[r3]			// M0|1 restore r15
-	mov b6=r18				// I0   restore b6
-
-	LOAD_PHYS_STACK_REG_SIZE(r17)
-	mov f9=f0					// F    clear f9
-(pKStk) br.cond.dpnt.many skip_rbs_switch		// B
-
-	srlz.d				// M0   ensure interruption collection is off (for cover)
-	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
-	COVER				// B    add current frame into dirty partition & set cr.ifs
-	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	mov r19=ar.bsp			// M2   get new backing store pointer
-	st8 [r14]=r22			// M	save time at leave
-	mov f10=f0			// F    clear f10
-
-	mov r22=r0			// A	clear r22
-	movl r14=__kernel_syscall_via_epc // X
-	;;
-#else
-	mov r19=ar.bsp			// M2   get new backing store pointer
-	mov f10=f0			// F    clear f10
-
-	nop.m 0
-	movl r14=__kernel_syscall_via_epc // X
-	;;
-#endif
-	mov.m ar.csd=r0			// M2   clear ar.csd
-	mov.m ar.ccv=r0			// M2   clear ar.ccv
-	mov b7=r14			// I0   clear b7 (hint with __kernel_syscall_via_epc)
-
-	mov.m ar.ssd=r0			// M2   clear ar.ssd
-	mov f11=f0			// F    clear f11
-	br.cond.sptk.many rbs_switch	// B
-END(ia64_leave_syscall)
-
-GLOBAL_ENTRY(ia64_leave_kernel)
-	PT_REGS_UNWIND_INFO(0)
-	/*
-	 * work.need_resched etc. mustn't get changed by this CPU before it returns to
-	 * user- or fsys-mode, hence we disable interrupts early on.
-	 *
-	 * p6 controls whether current_thread_info()->flags needs to be check for
-	 * extra work.  We always check for extra work when returning to user-level.
-	 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
-	 * is 0.  After extra work processing has been completed, execution
-	 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
-	 * needs to be redone.
-	 */
-#ifdef CONFIG_PREEMPT
-	RSM_PSR_I(p0, r17, r31)			// disable interrupts
-	cmp.eq p0,pLvSys=r0,r0			// pLvSys=0: leave from kernel
-(pKStk)	adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
-	;;
-	.pred.rel.mutex pUStk,pKStk
-(pKStk)	ld4 r21=[r20]			// r21 <- preempt_count
-(pUStk)	mov r21=0			// r21 <- 0
-	;;
-	cmp.eq p6,p0=r21,r0		// p6 <- pUStk || (preempt_count == 0)
-#else
-	RSM_PSR_I(pUStk, r17, r31)
-	cmp.eq p0,pLvSys=r0,r0		// pLvSys=0: leave from kernel
-(pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
-#endif
-.work_processed_kernel:
-	adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
-	;;
-(p6)	ld4 r31=[r17]				// load current_thread_info()->flags
-	adds r21=PT(PR)+16,r12
-	;;
-
-	lfetch [r21],PT(CR_IPSR)-PT(PR)
-	adds r2=PT(B6)+16,r12
-	adds r3=PT(R16)+16,r12
-	;;
-	lfetch [r21]
-	ld8 r28=[r2],8		// load b6
-	adds r29=PT(R24)+16,r12
-
-	ld8.fill r16=[r3],PT(AR_CSD)-PT(R16)
-	adds r30=PT(AR_CCV)+16,r12
-(p6)	and r19=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
-	;;
-	ld8.fill r24=[r29]
-	ld8 r15=[r30]		// load ar.ccv
-(p6)	cmp4.ne.unc p6,p0=r19, r0		// any special work pending?
-	;;
-	ld8 r29=[r2],16		// load b7
-	ld8 r30=[r3],16		// load ar.csd
-(p6)	br.cond.spnt .work_pending
-	;;
-	ld8 r31=[r2],16		// load ar.ssd
-	ld8.fill r8=[r3],16
-	;;
-	ld8.fill r9=[r2],16
-	ld8.fill r10=[r3],PT(R17)-PT(R10)
-	;;
-	ld8.fill r11=[r2],PT(R18)-PT(R11)
-	ld8.fill r17=[r3],16
-	;;
-	ld8.fill r18=[r2],16
-	ld8.fill r19=[r3],16
-	;;
-	ld8.fill r20=[r2],16
-	ld8.fill r21=[r3],16
-	mov ar.csd=r30
-	mov ar.ssd=r31
-	;;
-	RSM_PSR_I_IC(r23, r22, r25)	// initiate turning off of interrupt and interruption collection
-	invala			// invalidate ALAT
-	;;
-	ld8.fill r22=[r2],24
-	ld8.fill r23=[r3],24
-	mov b6=r28
-	;;
-	ld8.fill r25=[r2],16
-	ld8.fill r26=[r3],16
-	mov b7=r29
-	;;
-	ld8.fill r27=[r2],16
-	ld8.fill r28=[r3],16
-	;;
-	ld8.fill r29=[r2],16
-	ld8.fill r30=[r3],24
-	;;
-	ld8.fill r31=[r2],PT(F9)-PT(R31)
-	adds r3=PT(F10)-PT(F6),r3
-	;;
-	ldf.fill f9=[r2],PT(F6)-PT(F9)
-	ldf.fill f10=[r3],PT(F8)-PT(F10)
-	;;
-	ldf.fill f6=[r2],PT(F7)-PT(F6)
-	;;
-	ldf.fill f7=[r2],PT(F11)-PT(F7)
-	ldf.fill f8=[r3],32
-	;;
-	srlz.d	// ensure that inter. collection is off (VHPT is don't care, since text is pinned)
-	mov ar.ccv=r15
-	;;
-	ldf.fill f11=[r2]
-	BSW_0(r2, r3, r15)	// switch back to bank 0 (no stop bit required beforehand...)
-	;;
-(pUStk)	mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
-	adds r16=PT(CR_IPSR)+16,r12
-	adds r17=PT(CR_IIP)+16,r12
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	.pred.rel.mutex pUStk,pKStk
-	MOV_FROM_PSR(pKStk, r22, r29)	// M2 read PSR now that interrupts are disabled
-	MOV_FROM_ITC(pUStk, p9, r22, r29)	// M  fetch time at leave
-	nop.i 0
-	;;
-#else
-	MOV_FROM_PSR(pKStk, r22, r29)	// M2 read PSR now that interrupts are disabled
-	nop.i 0
-	nop.i 0
-	;;
-#endif
-	ld8 r29=[r16],16	// load cr.ipsr
-	ld8 r28=[r17],16	// load cr.iip
-	;;
-	ld8 r30=[r16],16	// load cr.ifs
-	ld8 r25=[r17],16	// load ar.unat
-	;;
-	ld8 r26=[r16],16	// load ar.pfs
-	ld8 r27=[r17],16	// load ar.rsc
-	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
-	;;
-	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
-	ld8 r23=[r17],16	// load ar.bspstore (may be garbage)
-	;;
-	ld8 r31=[r16],16	// load predicates
-	ld8 r21=[r17],16	// load b0
-	;;
-	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
-	ld8.fill r1=[r17],16	// load r1
-	;;
-	ld8.fill r12=[r16],16
-	ld8.fill r13=[r17],16
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-(pUStk)	adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
-#else
-(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
-#endif
-	;;
-	ld8 r20=[r16],16	// ar.fpsr
-	ld8.fill r15=[r17],16
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18	// deferred
-#endif
-	;;
-	ld8.fill r14=[r16],16
-	ld8.fill r2=[r17]
-(pUStk)	mov r17=1
-	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	//  mmi_ :  ld8 st1 shr;;         mmi_ : st8 st1 shr;;
-	//  mib  :  mov add br        ->  mib  : ld8 add br
-	//  bbb_ :  br  nop cover;;       mbb_ : mov br  cover;;
-	//
-	//  no one require bsp in r16 if (pKStk) branch is selected.
-(pUStk)	st8 [r3]=r22		// save time at leave
-(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
-	shr.u r18=r19,16	// get byte size of existing "dirty" partition
-	;;
-	ld8.fill r3=[r16]	// deferred
-	LOAD_PHYS_STACK_REG_SIZE(r17)
-(pKStk)	br.cond.dpnt skip_rbs_switch
-	mov r16=ar.bsp		// get existing backing store pointer
-#else
-	ld8.fill r3=[r16]
-(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
-	shr.u r18=r19,16	// get byte size of existing "dirty" partition
-	;;
-	mov r16=ar.bsp		// get existing backing store pointer
-	LOAD_PHYS_STACK_REG_SIZE(r17)
-(pKStk)	br.cond.dpnt skip_rbs_switch
-#endif
-
-	/*
-	 * Restore user backing store.
-	 *
-	 * NOTE: alloc, loadrs, and cover can't be predicated.
-	 */
-(pNonSys) br.cond.dpnt dont_preserve_current_frame
-	COVER				// add current frame into dirty partition and set cr.ifs
-	;;
-	mov r19=ar.bsp			// get new backing store pointer
-rbs_switch:
-	sub r16=r16,r18			// krbs = old bsp - size of dirty partition
-	cmp.ne p9,p0=r0,r0		// clear p9 to skip restore of cr.ifs
-	;;
-	sub r19=r19,r16			// calculate total byte size of dirty partition
-	add r18=64,r18			// don't force in0-in7 into memory...
-	;;
-	shl r19=r19,16			// shift size of dirty partition into loadrs position
-	;;
-dont_preserve_current_frame:
-	/*
-	 * To prevent leaking bits between the kernel and user-space,
-	 * we must clear the stacked registers in the "invalid" partition here.
-	 * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium,
-	 * 5 registers/cycle on McKinley).
-	 */
-#	define pRecurse	p6
-#	define pReturn	p7
-#ifdef CONFIG_ITANIUM
-#	define Nregs	10
-#else
-#	define Nregs	14
-#endif
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	shr.u loc1=r18,9		// RNaTslots <= floor(dirtySize / (64*8))
-	sub r17=r17,r18			// r17 = (physStackedSize + 8) - dirtySize
-	;;
-	mov ar.rsc=r19			// load ar.rsc to be used for "loadrs"
-	shladd in0=loc1,3,r17
-	mov in1=0
-	;;
-	TEXT_ALIGN(32)
-rse_clear_invalid:
-#ifdef CONFIG_ITANIUM
-	// cycle 0
- { .mii
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
-	add out0=-Nregs*8,in0
-}{ .mfb
-	add out1=1,in1			// increment recursion count
-	nop.f 0
-	nop.b 0				// can't do br.call here because of alloc (WAW on CFM)
-	;;
-}{ .mfi	// cycle 1
-	mov loc1=0
-	nop.f 0
-	mov loc2=0
-}{ .mib
-	mov loc3=0
-	mov loc4=0
-(pRecurse) br.call.sptk.many b0=rse_clear_invalid
-
-}{ .mfi	// cycle 2
-	mov loc5=0
-	nop.f 0
-	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
-}{ .mib
-	mov loc6=0
-	mov loc7=0
-(pReturn) br.ret.sptk.many b0
-}
-#else /* !CONFIG_ITANIUM */
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	cmp.lt pRecurse,p0=Nregs*8,in0	// if more than Nregs regs left to clear, (re)curse
-	add out0=-Nregs*8,in0
-	add out1=1,in1			// increment recursion count
-	mov loc1=0
-	mov loc2=0
-	;;
-	mov loc3=0
-	mov loc4=0
-	mov loc5=0
-	mov loc6=0
-	mov loc7=0
-(pRecurse) br.call.dptk.few b0=rse_clear_invalid
-	;;
-	mov loc8=0
-	mov loc9=0
-	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
-	mov loc10=0
-	mov loc11=0
-(pReturn) br.ret.dptk.many b0
-#endif /* !CONFIG_ITANIUM */
-#	undef pRecurse
-#	undef pReturn
-	;;
-	alloc r17=ar.pfs,0,0,0,0	// drop current register frame
-	;;
-	loadrs
-	;;
-skip_rbs_switch:
-	mov ar.unat=r25		// M2
-(pKStk)	extr.u r22=r22,21,1	// I0 extract current value of psr.pp from r22
-(pLvSys)mov r19=r0		// A  clear r19 for leave_syscall, no-op otherwise
-	;;
-(pUStk)	mov ar.bspstore=r23	// M2
-(pKStk)	dep r29=r22,r29,21,1	// I0 update ipsr.pp with psr.pp
-(pLvSys)mov r16=r0		// A  clear r16 for leave_syscall, no-op otherwise
-	;;
-	MOV_TO_IPSR(p0, r29, r25)	// M2
-	mov ar.pfs=r26		// I0
-(pLvSys)mov r17=r0		// A  clear r17 for leave_syscall, no-op otherwise
-
-	MOV_TO_IFS(p9, r30, r25)// M2
-	mov b0=r21		// I0
-(pLvSys)mov r18=r0		// A  clear r18 for leave_syscall, no-op otherwise
-
-	mov ar.fpsr=r20		// M2
-	MOV_TO_IIP(r28, r25)	// M2
-	nop 0
-	;;
-(pUStk)	mov ar.rnat=r24		// M2 must happen with RSE in lazy mode
-	nop 0
-(pLvSys)mov r2=r0
-
-	mov ar.rsc=r27		// M2
-	mov pr=r31,-1		// I0
-	RFI			// B
-
-	/*
-	 * On entry:
-	 *	r20 = &current->thread_info->pre_count (if CONFIG_PREEMPT)
-	 *	r31 = current->thread_info->flags
-	 * On exit:
-	 *	p6 = TRUE if work-pending-check needs to be redone
-	 *
-	 * Interrupts are disabled on entry, reenabled depend on work, and
-	 * disabled on exit.
-	 */
-.work_pending_syscall:
-	add r2=-8,r2
-	add r3=-8,r3
-	;;
-	st8 [r2]=r8
-	st8 [r3]=r10
-.work_pending:
-	tbit.z p6,p0=r31,TIF_NEED_RESCHED	// is resched not needed?
-(p6)	br.cond.sptk.few .notify
-	br.call.spnt.many rp=preempt_schedule_irq
-.ret9:	cmp.eq p6,p0=r0,r0	// p6 <- 1 (re-check)
-(pLvSys)br.cond.sptk.few  ia64_work_pending_syscall_end
-	br.cond.sptk.many .work_processed_kernel
-
-.notify:
-(pUStk)	br.call.spnt.many rp=notify_resume_user
-.ret10:	cmp.ne p6,p0=r0,r0	// p6 <- 0 (don't re-check)
-(pLvSys)br.cond.sptk.few  ia64_work_pending_syscall_end
-	br.cond.sptk.many .work_processed_kernel
-
-.global ia64_work_pending_syscall_end;
-ia64_work_pending_syscall_end:
-	adds r2=PT(R8)+16,r12
-	adds r3=PT(R10)+16,r12
-	;;
-	ld8 r8=[r2]
-	ld8 r10=[r3]
-	br.cond.sptk.many ia64_work_processed_syscall
-END(ia64_leave_kernel)
-
-ENTRY(handle_syscall_error)
-	/*
-	 * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could
-	 * lead us to mistake a negative return value as a failed syscall.  Those syscall
-	 * must deposit a non-zero value in pt_regs.r8 to indicate an error.  If
-	 * pt_regs.r8 is zero, we assume that the call completed successfully.
-	 */
-	PT_REGS_UNWIND_INFO(0)
-	ld8 r3=[r2]		// load pt_regs.r8
-	;;
-	cmp.eq p6,p7=r3,r0	// is pt_regs.r8==0?
-	;;
-(p7)	mov r10=-1
-(p7)	sub r8=0,r8		// negate return value to get errno
-	br.cond.sptk ia64_leave_syscall
-END(handle_syscall_error)
-
-	/*
-	 * Invoke schedule_tail(task) while preserving in0-in7, which may be needed
-	 * in case a system call gets restarted.
-	 */
-GLOBAL_ENTRY(ia64_invoke_schedule_tail)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,1,0
-	mov loc0=rp
-	mov out0=r8				// Address of previous task
-	;;
-	br.call.sptk.many rp=schedule_tail
-.ret11:	mov ar.pfs=loc1
-	mov rp=loc0
-	br.ret.sptk.many rp
-END(ia64_invoke_schedule_tail)
-
-	/*
-	 * Setup stack and call do_notify_resume_user(), keeping interrupts
-	 * disabled.
-	 *
-	 * Note that pSys and pNonSys need to be set up by the caller.
-	 * We declare 8 input registers so the system call args get preserved,
-	 * in case we need to restart a system call.
-	 */
-GLOBAL_ENTRY(notify_resume_user)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
-	mov r9=ar.unat
-	mov loc0=rp				// save return address
-	mov out0=0				// there is no "oldset"
-	adds out1=8,sp				// out1=&sigscratch->ar_pfs
-(pSys)	mov out2=1				// out2==1 => we're in a syscall
-	;;
-(pNonSys) mov out2=0				// out2==0 => not a syscall
-	.fframe 16
-	.spillsp ar.unat, 16
-	st8 [sp]=r9,-16				// allocate space for ar.unat and save it
-	st8 [out1]=loc1,-8			// save ar.pfs, out1=&sigscratch
-	.body
-	br.call.sptk.many rp=do_notify_resume_user
-.ret15:	.restore sp
-	adds sp=16,sp				// pop scratch stack space
-	;;
-	ld8 r9=[sp]				// load new unat from sigscratch->scratch_unat
-	mov rp=loc0
-	;;
-	mov ar.unat=r9
-	mov ar.pfs=loc1
-	br.ret.sptk.many rp
-END(notify_resume_user)
-
-ENTRY(sys_rt_sigreturn)
-	PT_REGS_UNWIND_INFO(0)
-	/*
-	 * Allocate 8 input registers since ptrace() may clobber them
-	 */
-	alloc r2=ar.pfs,8,0,1,0
-	.prologue
-	PT_REGS_SAVES(16)
-	adds sp=-16,sp
-	.body
-	cmp.eq pNonSys,pSys=r0,r0		// sigreturn isn't a normal syscall...
-	;;
-	/*
-	 * leave_kernel() restores f6-f11 from pt_regs, but since the streamlined
-	 * syscall-entry path does not save them we save them here instead.  Note: we
-	 * don't need to save any other registers that are not saved by the stream-lined
-	 * syscall path, because restore_sigcontext() restores them.
-	 */
-	adds r16=PT(F6)+32,sp
-	adds r17=PT(F7)+32,sp
-	;;
- 	stf.spill [r16]=f6,32
- 	stf.spill [r17]=f7,32
-	;;
- 	stf.spill [r16]=f8,32
- 	stf.spill [r17]=f9,32
-	;;
- 	stf.spill [r16]=f10
- 	stf.spill [r17]=f11
-	adds out0=16,sp				// out0 = &sigscratch
-	br.call.sptk.many rp=ia64_rt_sigreturn
-.ret19:	.restore sp,0
-	adds sp=16,sp
-	;;
-	ld8 r9=[sp]				// load new ar.unat
-	mov.sptk b7=r8,ia64_leave_kernel
-	;;
-	mov ar.unat=r9
-	br.many b7
-END(sys_rt_sigreturn)
-
-GLOBAL_ENTRY(ia64_prepare_handle_unaligned)
-	.prologue
-	/*
-	 * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
-	 */
-	mov r16=r0
-	DO_SAVE_SWITCH_STACK
-	br.call.sptk.many rp=ia64_handle_unaligned	// stack frame setup in ivt
-.ret21:	.body
-	DO_LOAD_SWITCH_STACK
-	br.cond.sptk.many rp				// goes to ia64_leave_kernel
-END(ia64_prepare_handle_unaligned)
-
-	//
-	// unw_init_running(void (*callback)(info, arg), void *arg)
-	//
-#	define EXTRA_FRAME_SIZE	((UNW_FRAME_INFO_SIZE+15)&~15)
-
-GLOBAL_ENTRY(unw_init_running)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
-	alloc loc1=ar.pfs,2,3,3,0
-	;;
-	ld8 loc2=[in0],8
-	mov loc0=rp
-	mov r16=loc1
-	DO_SAVE_SWITCH_STACK
-	.body
-
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
-	.fframe IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE
-	SWITCH_STACK_SAVES(EXTRA_FRAME_SIZE)
-	adds sp=-EXTRA_FRAME_SIZE,sp
-	.body
-	;;
-	adds out0=16,sp				// &info
-	mov out1=r13				// current
-	adds out2=16+EXTRA_FRAME_SIZE,sp	// &switch_stack
-	br.call.sptk.many rp=unw_init_frame_info
-1:	adds out0=16,sp				// &info
-	mov b6=loc2
-	mov loc2=gp				// save gp across indirect function call
-	;;
-	ld8 gp=[in0]
-	mov out1=in1				// arg
-	br.call.sptk.many rp=b6			// invoke the callback function
-1:	mov gp=loc2				// restore gp
-
-	// For now, we don't allow changing registers from within
-	// unw_init_running; if we ever want to allow that, we'd
-	// have to do a load_switch_stack here:
-	.restore sp
-	adds sp=IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE,sp
-
-	mov ar.pfs=loc1
-	mov rp=loc0
-	br.ret.sptk.many rp
-END(unw_init_running)
-EXPORT_SYMBOL(unw_init_running)
-
-#ifdef CONFIG_FUNCTION_TRACER
-#ifdef CONFIG_DYNAMIC_FTRACE
-GLOBAL_ENTRY(_mcount)
-	br ftrace_stub
-END(_mcount)
-EXPORT_SYMBOL(_mcount)
-
-.here:
-	br.ret.sptk.many b0
-
-GLOBAL_ENTRY(ftrace_caller)
-	alloc out0 = ar.pfs, 8, 0, 4, 0
-	mov out3 = r0
-	;;
-	mov out2 = b0
-	add r3 = 0x20, r3
-	mov out1 = r1;
-	br.call.sptk.many b0 = ftrace_patch_gp
-	//this might be called from module, so we must patch gp
-ftrace_patch_gp:
-	movl gp=__gp
-	mov b0 = r3
-	;;
-.global ftrace_call;
-ftrace_call:
-{
-	.mlx
-	nop.m 0x0
-	movl r3 = .here;;
-}
-	alloc loc0 = ar.pfs, 4, 4, 2, 0
-	;;
-	mov loc1 = b0
-	mov out0 = b0
-	mov loc2 = r8
-	mov loc3 = r15
-	;;
-	adds out0 = -MCOUNT_INSN_SIZE, out0
-	mov out1 = in2
-	mov b6 = r3
-
-	br.call.sptk.many b0 = b6
-	;;
-	mov ar.pfs = loc0
-	mov b0 = loc1
-	mov r8 = loc2
-	mov r15 = loc3
-	br ftrace_stub
-	;;
-END(ftrace_caller)
-
-#else
-GLOBAL_ENTRY(_mcount)
-	movl r2 = ftrace_stub
-	movl r3 = ftrace_trace_function;;
-	ld8 r3 = [r3];;
-	ld8 r3 = [r3];;
-	cmp.eq p7,p0 = r2, r3
-(p7)	br.sptk.many ftrace_stub
-	;;
-
-	alloc loc0 = ar.pfs, 4, 4, 2, 0
-	;;
-	mov loc1 = b0
-	mov out0 = b0
-	mov loc2 = r8
-	mov loc3 = r15
-	;;
-	adds out0 = -MCOUNT_INSN_SIZE, out0
-	mov out1 = in2
-	mov b6 = r3
-
-	br.call.sptk.many b0 = b6
-	;;
-	mov ar.pfs = loc0
-	mov b0 = loc1
-	mov r8 = loc2
-	mov r15 = loc3
-	br ftrace_stub
-	;;
-END(_mcount)
-#endif
-
-GLOBAL_ENTRY(ftrace_stub)
-	mov r3 = b0
-	movl r2 = _mcount_ret_helper
-	;;
-	mov b6 = r2
-	mov b7 = r3
-	br.ret.sptk.many b6
-
-_mcount_ret_helper:
-	mov b0 = r42
-	mov r1 = r41
-	mov ar.pfs = r40
-	br b7
-END(ftrace_stub)
-
-#endif /* CONFIG_FUNCTION_TRACER */
-
-#define __SYSCALL(nr, entry, nargs) data8 entry
-	.rodata
-	.align 8
-	.globl sys_call_table
-sys_call_table:
-#include <asm/syscall_table.h>
-#undef __SYSCALL
diff --git a/arch/ia64/kernel/esi_stub.S b/arch/ia64/kernel/esi_stub.S
deleted file mode 100644
index 821e68d1059874454efb1cf0a845a668de4990ef..0000000000000000000000000000000000000000
--- a/arch/ia64/kernel/esi_stub.S
+++ /dev/null
@@ -1,99 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * ESI call stub.
- *
- * Copyright (C) 2005 Hewlett-Packard Co
- *	Alex Williamson <alex.williamson@hp.com>
- *
- * Based on EFI call stub by David Mosberger.  The stub is virtually
- * identical to the one for EFI phys-mode calls, except that ESI
- * calls may have up to 8 arguments, so they get passed to this routine
- * through memory.
- *
- * This stub allows us to make ESI calls in physical mode with interrupts
- * turned off.  ESI calls may not support calling from virtual mode.
- *
- * Google for "Extensible SAL specification" for a document describing the
- * ESI standard.
- */
-
-/*
- * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
- * Abstraction Layer Specification", revision 2.6e).  Note that
- * psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
- * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
- * (the br.ia instruction fails unless psr.dfl and psr.dfh are
- * cleared).  Fortunately, SAL promises not to touch the floating
- * point regs, so at least we don't have to save f2-f127.
- */
-#define PSR_BITS_TO_CLEAR						\
-	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT |		\
-	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |	\
-	 IA64_PSR_DFL | IA64_PSR_DFH)
-
-#define PSR_BITS_TO_SET							\
-	(IA64_PSR_BN)
-
-#include <asm/processor.h>
-#include <asm/asmmacro.h>
-#include <asm/export.h>
-
-/*
- * Inputs:
- *	in0 = address of function descriptor of ESI routine to call
- *	in1 = address of array of ESI parameters
- *
- * Outputs:
- *	r8 = result returned by called function
- */
-GLOBAL_ENTRY(esi_call_phys)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
-	alloc loc1=ar.pfs,2,7,8,0
-	ld8 r2=[in0],8			// load ESI function's entry point
-	mov loc0=rp
-	.body
-	;;
-	ld8 out0=[in1],8		// ESI params loaded from array
-	;;				// passing all as inputs doesn't work
-	ld8 out1=[in1],8
-	;;
-	ld8 out2=[in1],8
-	;;
-	ld8 out3=[in1],8
-	;;
-	ld8 out4=[in1],8
-	;;
-	ld8 out5=[in1],8
-	;;
-	ld8 out6=[in1],8
-	;;
-	ld8 out7=[in1]
-	mov loc2=gp			// save global pointer
-	mov loc4=ar.rsc			// save RSE configuration
-	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
-	;;
-	ld8 gp=[in0]			// load ESI function's global pointer
-	movl r16=PSR_BITS_TO_CLEAR
-	mov loc3=psr			// save processor status word
-	movl r17=PSR_BITS_TO_SET
-	;;
-	or loc3=loc3,r17
-	mov b6=r2
-	;;
-	andcm r16=loc3,r16	// get psr with IT, DT, and RT bits cleared
-	br.call.sptk.many rp=ia64_switch_mode_phys
-.ret0:	mov loc5=r19			// old ar.bsp
-	mov loc6=r20			// old sp
-	br.call.sptk.many rp=b6		// call the ESI function
-.ret1:	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
-	mov r16=loc3			// save virtual mode psr
-	mov r19=loc5			// save virtual mode bspstore
-	mov r20=loc6			// save virtual mode sp
-	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
-.ret2:	mov ar.rsc=loc4			// restore RSE configuration
-	mov ar.pfs=loc1
-	mov rp=loc0
-	mov gp=loc2
-	br.ret.sptk.many rp
-END(esi_call_phys)
-EXPORT_SYMBOL_GPL(esi_call_phys)
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
deleted file mode 100644
index 0750a716adc72a5f5d5f6cb502448a74da2d18db..0000000000000000000000000000000000000000
--- a/arch/ia64/kernel/fsys.S
+++ /dev/null
@@ -1,837 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This file contains the light-weight system call handlers (fsyscall-handlers).
- *
- * Copyright (C) 2003 Hewlett-Packard Co
- * 	David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 25-Sep-03 davidm	Implement fsys_rt_sigprocmask().
- * 18-Feb-03 louisk	Implement fsys_gettimeofday().
- * 28-Feb-03 davidm	Fixed several bugs in fsys_gettimeofday().  Tuned it some more,
- *			probably broke it along the way... ;-)
- * 13-Jul-04 clameter   Implement fsys_clock_gettime and revise fsys_gettimeofday to make
- *                      it capable of using memory based clocks without falling back to C code.
- * 08-Feb-07 Fenghua Yu Implement fsys_getcpu.
- *
- */
-
-#include <asm/asmmacro.h>
-#include <asm/errno.h>
-#include <asm/asm-offsets.h>
-#include <asm/percpu.h>
-#include <asm/thread_info.h>
-#include <asm/sal.h>
-#include <asm/signal.h>
-#include <asm/unistd.h>
-
-#include "entry.h"
-#include <asm/native/inst.h>
-
-/*
- * See Documentation/ia64/fsys.rst for details on fsyscalls.
- *
- * On entry to an fsyscall handler:
- *   r10	= 0 (i.e., defaults to "successful syscall return")
- *   r11	= saved ar.pfs (a user-level value)
- *   r15	= system call number
- *   r16	= "current" task pointer (in normal kernel-mode, this is in r13)
- *   r32-r39	= system call arguments
- *   b6		= return address (a user-level value)
- *   ar.pfs	= previous frame-state (a user-level value)
- *   PSR.be	= cleared to zero (i.e., little-endian byte order is in effect)
- *   all other registers may contain values passed in from user-mode
- *
- * On return from an fsyscall handler:
- *   r11	= saved ar.pfs (as passed into the fsyscall handler)
- *   r15	= system call number (as passed into the fsyscall handler)
- *   r32-r39	= system call arguments (as passed into the fsyscall handler)
- *   b6		= return address (as passed into the fsyscall handler)
- *   ar.pfs	= previous frame-state (as passed into the fsyscall handler)
- */
-
-ENTRY(fsys_ni_syscall)
-	.prologue
-	.altrp b6
-	.body
-	mov r8=ENOSYS
-	mov r10=-1
-	FSYS_RETURN
-END(fsys_ni_syscall)
-
-ENTRY(fsys_getpid)
-	.prologue
-	.altrp b6
-	.body
-	add r17=IA64_TASK_SIGNAL_OFFSET,r16
-	;;
-	ld8 r17=[r17]				// r17 = current->signal
-	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
-	;;
-	ld4 r9=[r9]
-	add r17=IA64_SIGNAL_PIDS_TGID_OFFSET,r17
-	;;
-	and r9=TIF_ALLWORK_MASK,r9
-	ld8 r17=[r17]				// r17 = current->signal->pids[PIDTYPE_TGID]
-	;;
-	add r8=IA64_PID_LEVEL_OFFSET,r17
-	;;
-	ld4 r8=[r8]				// r8 = pid->level
-	add r17=IA64_PID_UPID_OFFSET,r17	// r17 = &pid->numbers[0]
-	;;
-	shl r8=r8,IA64_UPID_SHIFT
-	;;
-	add r17=r17,r8				// r17 = &pid->numbers[pid->level]
-	;;
-	ld4 r8=[r17]				// r8 = pid->numbers[pid->level].nr
-	;;
-	mov r17=0
-	;;
-	cmp.ne p8,p0=0,r9
-(p8)	br.spnt.many fsys_fallback_syscall
-	FSYS_RETURN
-END(fsys_getpid)
-
-ENTRY(fsys_set_tid_address)
-	.prologue
-	.altrp b6
-	.body
-	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
-	add r17=IA64_TASK_THREAD_PID_OFFSET,r16
-	;;
-	ld4 r9=[r9]
-	tnat.z p6,p7=r32		// check argument register for being NaT
-	ld8 r17=[r17]				// r17 = current->thread_pid
-	;;
-	and r9=TIF_ALLWORK_MASK,r9
-	add r8=IA64_PID_LEVEL_OFFSET,r17
-	add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
-	;;
-	ld4 r8=[r8]				// r8 = pid->level
-	add r17=IA64_PID_UPID_OFFSET,r17	// r17 = &pid->numbers[0]
-	;;
-	shl r8=r8,IA64_UPID_SHIFT
-	;;
-	add r17=r17,r8				// r17 = &pid->numbers[pid->level]
-	;;
-	ld4 r8=[r17]				// r8 = pid->numbers[pid->level].nr
-	;;
-	cmp.ne p8,p0=0,r9
-	mov r17=-1
-	;;
-(p6)	st8 [r18]=r32
-(p7)	st8 [r18]=r17
-(p8)	br.spnt.many fsys_fallback_syscall
-	;;
-	mov r17=0			// i must not leak kernel bits...
-	mov r18=0			// i must not leak kernel bits...
-	FSYS_RETURN
-END(fsys_set_tid_address)
-
-#if IA64_GTOD_SEQ_OFFSET !=0
-#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t
-#endif
-#if IA64_ITC_JITTER_OFFSET !=0
-#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t
-#endif
-#define CLOCK_REALTIME 0
-#define CLOCK_MONOTONIC 1
-#define CLOCK_DIVIDE_BY_1000 0x4000
-#define CLOCK_ADD_MONOTONIC 0x8000
-
-ENTRY(fsys_gettimeofday)
-	.prologue
-	.altrp b6
-	.body
-	mov r31 = r32
-	tnat.nz p6,p0 = r33		// guard against NaT argument
-(p6)    br.cond.spnt.few .fail_einval
-	mov r30 = CLOCK_DIVIDE_BY_1000
-	;;
-.gettime:
-	// Register map
-	// Incoming r31 = pointer to address where to place result
-	//          r30 = flags determining how time is processed
-	// r2,r3 = temp r4-r7 preserved
-	// r8 = result nanoseconds
-	// r9 = result seconds
-	// r10 = temporary storage for clock difference
-	// r11 = preserved: saved ar.pfs
-	// r12 = preserved: memory stack
-	// r13 = preserved: thread pointer
-	// r14 = address of mask / mask value
-	// r15 = preserved: system call number
-	// r16 = preserved: current task pointer
-	// r17 = (not used)
-	// r18 = (not used)
-	// r19 = address of itc_lastcycle
-	// r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence)
-	// r21 = address of mmio_ptr
-	// r22 = address of wall_time or monotonic_time
-	// r23 = address of shift / value
-	// r24 = address mult factor / cycle_last value
-	// r25 = itc_lastcycle value
-	// r26 = address clocksource cycle_last
-	// r27 = (not used)
-	// r28 = sequence number at the beginning of critcal section
-	// r29 = address of itc_jitter
-	// r30 = time processing flags / memory address
-	// r31 = pointer to result
-	// Predicates
-	// p6,p7 short term use
-	// p8 = timesource ar.itc
-	// p9 = timesource mmio64
-	// p10 = timesource mmio32 - not used
-	// p11 = timesource not to be handled by asm code
-	// p12 = memory time source ( = p9 | p10) - not used
-	// p13 = do cmpxchg with itc_lastcycle
-	// p14 = Divide by 1000
-	// p15 = Add monotonic
-	//
-	// Note that instructions are optimized for McKinley. McKinley can
-	// process two bundles simultaneously and therefore we continuously
-	// try to feed the CPU two bundles and then a stop.
-
-	add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
-	tnat.nz p6,p0 = r31		// guard against Nat argument
-(p6)	br.cond.spnt.few .fail_einval
-	movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
-	;;
-	ld4 r2 = [r2]			// process work pending flags
-	movl r29 = itc_jitter_data	// itc_jitter
-	add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20	// wall_time
-	add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
-	mov pr = r30,0xc000	// Set predicates according to function
-	;;
-	and r2 = TIF_ALLWORK_MASK,r2
-	add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
-(p15)	add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20	// monotonic_time
-	;;
-	add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20	// clksrc_cycle_last
-	cmp.ne p6, p0 = 0, r2	// Fallback if work is scheduled
-(p6)	br.cond.spnt.many fsys_fallback_syscall
-	;;
-	// Begin critical section
-.time_redo:
-	ld4.acq r28 = [r20]	// gtod_lock.sequence, Must take first
-	;;
-	and r28 = ~1,r28	// And make sequence even to force retry if odd
-	;;
-	ld8 r30 = [r21]		// clocksource->mmio_ptr
-	add r24 = IA64_CLKSRC_MULT_OFFSET,r20
-	ld4 r2 = [r29]		// itc_jitter value
-	add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20
-	add r14 = IA64_CLKSRC_MASK_OFFSET,r20
-	;;
-	ld4 r3 = [r24]		// clocksource mult value
-	ld8 r14 = [r14]         // clocksource mask value
-	cmp.eq p8,p9 = 0,r30	// use cpu timer if no mmio_ptr
-	;;
-	setf.sig f7 = r3	// Setup for mult scaling of counter
-(p8)	cmp.ne p13,p0 = r2,r0	// need itc_jitter compensation, set p13
-	ld4 r23 = [r23]		// clocksource shift value
-	ld8 r24 = [r26]		// get clksrc_cycle_last value
-(p9)	cmp.eq p13,p0 = 0,r30	// if mmio_ptr, clear p13 jitter control
-	;;
-	.pred.rel.mutex p8,p9
-	MOV_FROM_ITC(p8, p6, r2, r10)	// CPU_TIMER. 36 clocks latency!!!
-(p9)	ld8 r2 = [r30]		// MMIO_TIMER. Could also have latency issues..
-(p13)	ld8 r25 = [r19]		// get itc_lastcycle value
-	ld8 r9 = [r22],IA64_TIME_SN_SPEC_SNSEC_OFFSET	// sec
-	;;
-	ld8 r8 = [r22],-IA64_TIME_SN_SPEC_SNSEC_OFFSET	// snsec
-(p13)	sub r3 = r25,r2		// Diff needed before comparison (thanks davidm)
-	;;
-(p13)	cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
-	sub r10 = r2,r24	// current_cycle - last_cycle
-	;;
-(p6)	sub r10 = r25,r24	// time we got was less than last_cycle
-(p7)	mov ar.ccv = r25	// more than last_cycle. Prep for cmpxchg
-	;;
-(p7)	cmpxchg8.rel r3 = [r19],r2,ar.ccv
-	;;
-(p7)	cmp.ne p7,p0 = r25,r3	// if cmpxchg not successful
-	;;
-(p7)	sub r10 = r3,r24	// then use new last_cycle instead
-	;;
-	and r10 = r10,r14	// Apply mask
-	;;
-	setf.sig f8 = r10
-	nop.i 123
-	;;
-	// fault check takes 5 cycles and we have spare time
-EX(.fail_efault, probe.w.fault r31, 3)
-	xmpy.l f8 = f8,f7	// nsec_per_cyc*(counter-last_counter)
-	;;
-	getf.sig r2 = f8
-	mf
-	;;
-	ld4 r10 = [r20]		// gtod_lock.sequence
-	add r8 = r8,r2		// Add xtime.nsecs
-	;;
-	shr.u r8 = r8,r23	// shift by factor
-	cmp4.ne p7,p0 = r28,r10
-(p7)	br.cond.dpnt.few .time_redo	// sequence number changed, redo
-	// End critical section.
-	// Now r8=tv->tv_nsec and r9=tv->tv_sec
-	mov r10 = r0
-	movl r2 = 1000000000
-	add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
-(p14)	movl r3 = 2361183241434822607	// Prep for / 1000 hack
-	;;
-.time_normalize:
-	mov r21 = r8
-	cmp.ge p6,p0 = r8,r2
-(p14)	shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time
-	;;
-(p14)	setf.sig f8 = r20
-(p6)	sub r8 = r8,r2
-(p6)	add r9 = 1,r9		// two nops before the branch.
-(p14)	setf.sig f7 = r3	// Chances for repeats are 1 in 10000 for gettod
-(p6)	br.cond.dpnt.few .time_normalize
-	;;
-	// Divided by 8 though shift. Now divide by 125
-	// The compiler was able to do that with a multiply
-	// and a shift and we do the same
-EX(.fail_efault, probe.w.fault r23, 3)	// This also costs 5 cycles
-(p14)	xmpy.hu f8 = f8, f7		// xmpy has 5 cycles latency so use it
-	;;
-(p14)	getf.sig r2 = f8
-	;;
-	mov r8 = r0
-(p14)	shr.u r21 = r2, 4
-	;;
-EX(.fail_efault, st8 [r31] = r9)
-EX(.fail_efault, st8 [r23] = r21)
-	FSYS_RETURN
-.fail_einval:
-	mov r8 = EINVAL
-	mov r10 = -1
-	FSYS_RETURN
-.fail_efault:
-	mov r8 = EFAULT
-	mov r10 = -1
-	FSYS_RETURN
-END(fsys_gettimeofday)
-
-ENTRY(fsys_clock_gettime)
-	.prologue
-	.altrp b6
-	.body
-	cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
-	// Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
-(p6)	br.spnt.few fsys_fallback_syscall
-	mov r31 = r33
-	shl r30 = r32,15
-	br.many .gettime
-END(fsys_clock_gettime)
-
-/*
- * fsys_getcpu doesn't use the third parameter in this implementation. It reads
- * current_thread_info()->cpu and corresponding node in cpu_to_node_map.
- */
-ENTRY(fsys_getcpu)
-	.prologue
-	.altrp b6
-	.body
-	;;
-	add r2=TI_FLAGS+IA64_TASK_SIZE,r16
-	tnat.nz p6,p0 = r32			// guard against NaT argument
-	add r3=TI_CPU+IA64_TASK_SIZE,r16
-	;;
-	ld4 r3=[r3]				// M r3 = thread_info->cpu
-	ld4 r2=[r2]				// M r2 = thread_info->flags
-(p6)    br.cond.spnt.few .fail_einval		// B
-	;;
-	tnat.nz p7,p0 = r33			// I guard against NaT argument
-(p7)    br.cond.spnt.few .fail_einval		// B
-	;;
-	cmp.ne p6,p0=r32,r0
-	cmp.ne p7,p0=r33,r0
-	;;
-#ifdef CONFIG_NUMA
-	movl r17=cpu_to_node_map
-	;;
-EX(.fail_efault, (p6) probe.w.fault r32, 3)		// M This takes 5 cycles
-EX(.fail_efault, (p7) probe.w.fault r33, 3)		// M This takes 5 cycles
-	shladd r18=r3,1,r17
-	;;
-	ld2 r20=[r18]				// r20 = cpu_to_node_map[cpu]
-	and r2 = TIF_ALLWORK_MASK,r2
-	;;
-	cmp.ne p8,p0=0,r2
-(p8)	br.spnt.many fsys_fallback_syscall
-	;;
-	;;
-EX(.fail_efault, (p6) st4 [r32] = r3)
-EX(.fail_efault, (p7) st2 [r33] = r20)
-	mov r8=0
-	;;
-#else
-EX(.fail_efault, (p6) probe.w.fault r32, 3)		// M This takes 5 cycles
-EX(.fail_efault, (p7) probe.w.fault r33, 3)		// M This takes 5 cycles
-	and r2 = TIF_ALLWORK_MASK,r2
-	;;
-	cmp.ne p8,p0=0,r2
-(p8)	br.spnt.many fsys_fallback_syscall
-	;;
-EX(.fail_efault, (p6) st4 [r32] = r3)
-EX(.fail_efault, (p7) st2 [r33] = r0)
-	mov r8=0
-	;;
-#endif
-	FSYS_RETURN
-END(fsys_getcpu)
-
-ENTRY(fsys_fallback_syscall)
-	.prologue
-	.altrp b6
-	.body
-	/*
-	 * We only get here from light-weight syscall handlers.  Thus, we already
-	 * know that r15 contains a valid syscall number.  No need to re-check.
-	 */
-	adds r17=-1024,r15
-	movl r14=sys_call_table
-	;;
-	RSM_PSR_I(p0, r26, r27)
-	shladd r18=r17,3,r14
-	;;
-	ld8 r18=[r18]				// load normal (heavy-weight) syscall entry-point
-	MOV_FROM_PSR(p0, r29, r26)		// read psr (12 cyc load latency)
-	mov r27=ar.rsc
-	mov r21=ar.fpsr
-	mov r26=ar.pfs
-END(fsys_fallback_syscall)
-	/* FALL THROUGH */
-GLOBAL_ENTRY(fsys_bubble_down)
-	.prologue
-	.altrp b6
-	.body
-	/*
-	 * We get here for syscalls that don't have a lightweight
-	 * handler.  For those, we need to bubble down into the kernel
-	 * and that requires setting up a minimal pt_regs structure,
-	 * and initializing the CPU state more or less as if an
-	 * interruption had occurred.  To make syscall-restarts work,
-	 * we setup pt_regs such that cr_iip points to the second
-	 * instruction in syscall_via_break.  Decrementing the IP
-	 * hence will restart the syscall via break and not
-	 * decrementing IP will return us to the caller, as usual.
-	 * Note that we preserve the value of psr.pp rather than
-	 * initializing it from dcr.pp.  This makes it possible to
-	 * distinguish fsyscall execution from other privileged
-	 * execution.
-	 *
-	 * On entry:
-	 *	- normal fsyscall handler register usage, except
-	 *	  that we also have:
-	 *	- r18: address of syscall entry point
-	 *	- r21: ar.fpsr
-	 *	- r26: ar.pfs
-	 *	- r27: ar.rsc
-	 *	- r29: psr
-	 *
-	 * We used to clear some PSR bits here but that requires slow
-	 * serialization.  Fortuntely, that isn't really necessary.
-	 * The rationale is as follows: we used to clear bits
-	 * ~PSR_PRESERVED_BITS in PSR.L.  Since
-	 * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
-	 * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
-	 * However,
-	 *
-	 * PSR.BE : already is turned off in __kernel_syscall_via_epc()
-	 * PSR.AC : don't care (kernel normally turns PSR.AC on)
-	 * PSR.I  : already turned off by the time fsys_bubble_down gets
-	 *	    invoked
-	 * PSR.DFL: always 0 (kernel never turns it on)
-	 * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
-	 *	    initiative
-	 * PSR.DI : always 0 (kernel never turns it on)
-	 * PSR.SI : always 0 (kernel never turns it on)
-	 * PSR.DB : don't care --- kernel never enables kernel-level
-	 *	    breakpoints
-	 * PSR.TB : must be 0 already; if it wasn't zero on entry to
-	 *          __kernel_syscall_via_epc, the branch to fsys_bubble_down
-	 *          will trigger a taken branch; the taken-trap-handler then
-	 *          converts the syscall into a break-based system-call.
-	 */
-	/*
-	 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
-	 * The rest we have to synthesize.
-	 */
-#	define PSR_ONE_BITS		((3 << IA64_PSR_CPL0_BIT)	\
-					 | (0x1 << IA64_PSR_RI_BIT)	\
-					 | IA64_PSR_BN | IA64_PSR_I)
-
-	invala					// M0|1
-	movl r14=ia64_ret_from_syscall		// X
-
-	nop.m 0
-	movl r28=__kernel_syscall_via_break	// X	create cr.iip
-	;;
-
-	mov r2=r16				// A    get task addr to addl-addressable register
-	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
-	mov r31=pr				// I0   save pr (2 cyc)
-	;;
-	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
-	addl r22=IA64_RBS_OFFSET,r2		// A    compute base of RBS
-	add r3=TI_FLAGS+IA64_TASK_SIZE,r2	// A
-	;;
-	ld4 r3=[r3]				// M0|1 r3 = current_thread_info()->flags
-	lfetch.fault.excl.nt1 [r22]		// M0|1 prefetch register backing-store
-	nop.i 0
-	;;
-	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	MOV_FROM_ITC(p0, p6, r30, r23)		// M    get cycle for accounting
-#else
-	nop.m 0
-#endif
-	nop.i 0
-	;;
-	mov r23=ar.bspstore			// M2 (12 cyc) save ar.bspstore
-	mov.m r24=ar.rnat			// M2 (5 cyc) read ar.rnat (dual-issues!)
-	nop.i 0
-	;;
-	mov ar.bspstore=r22			// M2 (6 cyc) switch to kernel RBS
-	movl r8=PSR_ONE_BITS			// X
-	;;
-	mov r25=ar.unat				// M2 (5 cyc) save ar.unat
-	mov r19=b6				// I0   save b6 (2 cyc)
-	mov r20=r1				// A    save caller's gp in r20
-	;;
-	or r29=r8,r29				// A    construct cr.ipsr value to save
-	mov b6=r18				// I0   copy syscall entry-point to b6 (7 cyc)
-	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
-
-	mov r18=ar.bsp				// M2   save (kernel) ar.bsp (12 cyc)
-	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
-	br.call.sptk.many b7=ia64_syscall_setup	// B
-	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	// mov.m r30=ar.itc is called in advance
-	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
-	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
-	;;
-	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
-	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at leave kernel
-	;;
-	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
-	ld8 r21=[r17]				// cumulated utime
-	sub r22=r19,r18				// stime before leave kernel
-	;;
-	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// update stamp
-	sub r18=r30,r19				// elapsed time in user mode
-	;;
-	add r20=r20,r22				// sum stime
-	add r21=r21,r18				// sum utime
-	;;
-	st8 [r16]=r20				// update stime
-	st8 [r17]=r21				// update utime
-	;;
-#endif
-	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
-	mov rp=r14				// I0   set the real return addr
-	and r3=_TIF_SYSCALL_TRACEAUDIT,r3	// A
-	;;
-	SSM_PSR_I(p0, p6, r22)			// M2   we're on kernel stacks now, reenable irqs
-	cmp.eq p8,p0=r3,r0			// A
-(p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
-
-	nop.m 0
-(p8)	br.call.sptk.many b6=b6			// B    (ignore return address)
-	br.cond.spnt ia64_trace_syscall		// B
-END(fsys_bubble_down)
-
-	.rodata
-	.align 8
-	.globl fsyscall_table
-
-	data8 fsys_bubble_down
-fsyscall_table:
-	data8 fsys_ni_syscall
-	data8 0				// exit			// 1025
-	data8 0				// read
-	data8 0				// write
-	data8 0				// open
-	data8 0				// close
-	data8 0				// creat		// 1030
-	data8 0				// link
-	data8 0				// unlink
-	data8 0				// execve
-	data8 0				// chdir
-	data8 0				// fchdir		// 1035
-	data8 0				// utimes
-	data8 0				// mknod
-	data8 0				// chmod
-	data8 0				// chown
-	data8 0				// lseek		// 1040
-	data8 fsys_getpid		// getpid
-	data8 0				// getppid
-	data8 0				// mount
-	data8 0				// umount
-	data8 0				// setuid		// 1045
-	data8 0				// getuid
-	data8 0				// geteuid
-	data8 0				// ptrace
-	data8 0				// access
-	data8 0				// sync			// 1050
-	data8 0				// fsync
-	data8 0				// fdatasync
-	data8 0				// kill
-	data8 0				// rename
-	data8 0				// mkdir		// 1055
-	data8 0				// rmdir
-	data8 0				// dup
-	data8 0				// pipe
-	data8 0				// times
-	data8 0				// brk			// 1060
-	data8 0				// setgid
-	data8 0				// getgid
-	data8 0				// getegid
-	data8 0				// acct
-	data8 0				// ioctl		// 1065
-	data8 0				// fcntl
-	data8 0				// umask
-	data8 0				// chroot
-	data8 0				// ustat
-	data8 0				// dup2			// 1070
-	data8 0				// setreuid
-	data8 0				// setregid
-	data8 0				// getresuid
-	data8 0				// setresuid
-	data8 0				// getresgid		// 1075
-	data8 0				// setresgid
-	data8 0				// getgroups
-	data8 0				// setgroups
-	data8 0				// getpgid
-	data8 0				// setpgid		// 1080
-	data8 0				// setsid
-	data8 0				// getsid
-	data8 0				// sethostname
-	data8 0				// setrlimit
-	data8 0				// getrlimit		// 1085
-	data8 0				// getrusage
-	data8 fsys_gettimeofday		// gettimeofday
-	data8 0				// settimeofday
-	data8 0				// select
-	data8 0				// poll			// 1090
-	data8 0				// symlink
-	data8 0				// readlink
-	data8 0				// uselib
-	data8 0				// swapon
-	data8 0				// swapoff		// 1095
-	data8 0				// reboot
-	data8 0				// truncate
-	data8 0				// ftruncate
-	data8 0				// fchmod
-	data8 0				// fchown		// 1100
-	data8 0				// getpriority
-	data8 0				// setpriority
-	data8 0				// statfs
-	data8 0				// fstatfs
-	data8 0				// gettid		// 1105
-	data8 0				// semget
-	data8 0				// semop
-	data8 0				// semctl
-	data8 0				// msgget
-	data8 0				// msgsnd		// 1110
-	data8 0				// msgrcv
-	data8 0				// msgctl
-	data8 0				// shmget
-	data8 0				// shmat
-	data8 0				// shmdt		// 1115
-	data8 0				// shmctl
-	data8 0				// syslog
-	data8 0				// setitimer
-	data8 0				// getitimer
-	data8 0					 		// 1120
-	data8 0
-	data8 0
-	data8 0				// vhangup
-	data8 0				// lchown
-	data8 0				// remap_file_pages	// 1125
-	data8 0				// wait4
-	data8 0				// sysinfo
-	data8 0				// clone
-	data8 0				// setdomainname
-	data8 0				// newuname		// 1130
-	data8 0				// adjtimex
-	data8 0
-	data8 0				// init_module
-	data8 0				// delete_module
-	data8 0							// 1135
-	data8 0
-	data8 0				// quotactl
-	data8 0				// bdflush
-	data8 0				// sysfs
-	data8 0				// personality		// 1140
-	data8 0				// afs_syscall
-	data8 0				// setfsuid
-	data8 0				// setfsgid
-	data8 0				// getdents
-	data8 0				// flock		// 1145
-	data8 0				// readv
-	data8 0				// writev
-	data8 0				// pread64
-	data8 0				// pwrite64
-	data8 0				// sysctl		// 1150
-	data8 0				// mmap
-	data8 0				// munmap
-	data8 0				// mlock
-	data8 0				// mlockall
-	data8 0				// mprotect		// 1155
-	data8 0				// mremap
-	data8 0				// msync
-	data8 0				// munlock
-	data8 0				// munlockall
-	data8 0				// sched_getparam	// 1160
-	data8 0				// sched_setparam
-	data8 0				// sched_getscheduler
-	data8 0				// sched_setscheduler
-	data8 0				// sched_yield
-	data8 0				// sched_get_priority_max	// 1165
-	data8 0				// sched_get_priority_min
-	data8 0				// sched_rr_get_interval
-	data8 0				// nanosleep
-	data8 0				// nfsservctl
-	data8 0				// prctl		// 1170
-	data8 0				// getpagesize
-	data8 0				// mmap2
-	data8 0				// pciconfig_read
-	data8 0				// pciconfig_write
-	data8 0				// perfmonctl		// 1175
-	data8 0				// sigaltstack
-	data8 0				// rt_sigaction
-	data8 0				// rt_sigpending
-	data8 0				// rt_sigprocmask
-	data8 0				// rt_sigqueueinfo	// 1180
-	data8 0				// rt_sigreturn
-	data8 0				// rt_sigsuspend
-	data8 0				// rt_sigtimedwait
-	data8 0				// getcwd
-	data8 0				// capget		// 1185
-	data8 0				// capset
-	data8 0				// sendfile
-	data8 0
-	data8 0
-	data8 0				// socket		// 1190
-	data8 0				// bind
-	data8 0				// connect
-	data8 0				// listen
-	data8 0				// accept
-	data8 0				// getsockname		// 1195
-	data8 0				// getpeername
-	data8 0				// socketpair
-	data8 0				// send
-	data8 0				// sendto
-	data8 0				// recv			// 1200
-	data8 0				// recvfrom
-	data8 0				// shutdown
-	data8 0				// setsockopt
-	data8 0				// getsockopt
-	data8 0				// sendmsg		// 1205
-	data8 0				// recvmsg
-	data8 0				// pivot_root
-	data8 0				// mincore
-	data8 0				// madvise
-	data8 0				// newstat		// 1210
-	data8 0				// newlstat
-	data8 0				// newfstat
-	data8 0				// clone2
-	data8 0				// getdents64
-	data8 0				// getunwind		// 1215
-	data8 0				// readahead
-	data8 0				// setxattr
-	data8 0				// lsetxattr
-	data8 0				// fsetxattr
-	data8 0				// getxattr		// 1220
-	data8 0				// lgetxattr
-	data8 0				// fgetxattr
-	data8 0				// listxattr
-	data8 0				// llistxattr
-	data8 0				// flistxattr		// 1225
-	data8 0				// removexattr
-	data8 0				// lremovexattr
-	data8 0				// fremovexattr
-	data8 0				// tkill
-	data8 0				// futex		// 1230
-	data8 0				// sched_setaffinity
-	data8 0				// sched_getaffinity
-	data8 fsys_set_tid_address	// set_tid_address
-	data8 0				// fadvise64_64
-	data8 0				// tgkill		// 1235
-	data8 0				// exit_group
-	data8 0				// lookup_dcookie
-	data8 0				// io_setup
-	data8 0				// io_destroy
-	data8 0				// io_getevents		// 1240
-	data8 0				// io_submit
-	data8 0				// io_cancel
-	data8 0				// epoll_create
-	data8 0				// epoll_ctl
-	data8 0				// epoll_wait		// 1245
-	data8 0				// restart_syscall
-	data8 0				// semtimedop
-	data8 0				// timer_create
-	data8 0				// timer_settime
-	data8 0				// timer_gettime 	// 1250
-	data8 0				// timer_getoverrun
-	data8 0				// timer_delete
-	data8 0				// clock_settime
-	data8 fsys_clock_gettime	// clock_gettime
-	data8 0				// clock_getres		// 1255
-	data8 0				// clock_nanosleep
-	data8 0				// fstatfs64
-	data8 0				// statfs64
-	data8 0				// mbind
-	data8 0				// get_mempolicy	// 1260
-	data8 0				// set_mempolicy
-	data8 0				// mq_open
-	data8 0				// mq_unlink
-	data8 0				// mq_timedsend
-	data8 0				// mq_timedreceive	// 1265
-	data8 0				// mq_notify
-	data8 0				// mq_getsetattr
-	data8 0				// kexec_load
-	data8 0				// vserver
-	data8 0				// waitid		// 1270
-	data8 0				// add_key
-	data8 0				// request_key
-	data8 0				// keyctl
-	data8 0				// ioprio_set
-	data8 0				// ioprio_get		// 1275
-	data8 0				// move_pages
-	data8 0				// inotify_init
-	data8 0				// inotify_add_watch
-	data8 0				// inotify_rm_watch
-	data8 0				// migrate_pages	// 1280
-	data8 0				// openat
-	data8 0				// mkdirat
-	data8 0				// mknodat
-	data8 0				// fchownat
-	data8 0				// futimesat		// 1285
-	data8 0				// newfstatat
-	data8 0				// unlinkat
-	data8 0				// renameat
-	data8 0				// linkat
-	data8 0				// symlinkat		// 1290
-	data8 0				// readlinkat
-	data8 0				// fchmodat
-	data8 0				// faccessat
-	data8 0
-	data8 0							// 1295
-	data8 0				// unshare
-	data8 0				// splice
-	data8 0				// set_robust_list
-	data8 0				// get_robust_list
-	data8 0				// sync_file_range	// 1300
-	data8 0				// tee
-	data8 0				// vmsplice
-	data8 0
-	data8 fsys_getcpu		// getcpu		// 1304
-
-	// fill in zeros for the remaining entries
-	.zero:
-	.space fsyscall_table + 8*NR_syscalls - .zero, 0
diff --git a/arch/ia64/kernel/gate-data.S b/arch/ia64/kernel/gate-data.S
deleted file mode 100644
index b3ef1c72e132b44e00ade63410380370e25114e2..0000000000000000000000000000000000000000
--- a/arch/ia64/kernel/gate-data.S
+++ /dev/null
@@ -1,3 +0,0 @@
-	.section .data..gate, "aw"
-
-	.incbin "arch/ia64/kernel/gate.so"
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
deleted file mode 100644
index 9f235cd551abf0479377943f86a036129c438928..0000000000000000000000000000000000000000
--- a/arch/ia64/kernel/gate.S
+++ /dev/null
@@ -1,380 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This file contains the code that gets mapped at the upper end of each task's text
- * region.  For now, it contains the signal trampoline code only.
- *
- * Copyright (C) 1999-2003 Hewlett-Packard Co
- * 	David Mosberger-Tang <davidm@hpl.hp.com>
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/errno.h>
-#include <asm/asm-offsets.h>
-#include <asm/sigcontext.h>
-#include <asm/unistd.h>
-#include <asm/kregs.h>
-#include <asm/page.h>
-#include <asm/native/inst.h>
-
-/*
- * We can't easily refer to symbols inside the kernel.  To avoid full runtime relocation,
- * complications with the linker (which likes to create PLT stubs for branches
- * to targets outside the shared object) and to avoid multi-phase kernel builds, we
- * simply create minimalistic "patch lists" in special ELF sections.
- */
-	.section ".data..patch.fsyscall_table", "a"
-	.previous
-#define LOAD_FSYSCALL_TABLE(reg)			\
-[1:]	movl reg=0;					\
-	.xdata4 ".data..patch.fsyscall_table", 1b-.
-
-	.section ".data..patch.brl_fsys_bubble_down", "a"
-	.previous
-#define BRL_COND_FSYS_BUBBLE_DOWN(pr)			\
-[1:](pr)brl.cond.sptk 0;				\
-	;;						\
-	.xdata4 ".data..patch.brl_fsys_bubble_down", 1b-.
-
-GLOBAL_ENTRY(__kernel_syscall_via_break)
-	.prologue
-	.altrp b6
-	.body
-	/*
-	 * Note: for (fast) syscall restart to work, the break instruction must be
-	 *	 the first one in the bundle addressed by syscall_via_break.
-	 */
-{ .mib
-	break 0x100000
-	nop.i 0
-	br.ret.sptk.many b6
-}
-END(__kernel_syscall_via_break)
-
-#	define ARG0_OFF		(16 + IA64_SIGFRAME_ARG0_OFFSET)
-#	define ARG1_OFF		(16 + IA64_SIGFRAME_ARG1_OFFSET)
-#	define ARG2_OFF		(16 + IA64_SIGFRAME_ARG2_OFFSET)
-#	define SIGHANDLER_OFF	(16 + IA64_SIGFRAME_HANDLER_OFFSET)
-#	define SIGCONTEXT_OFF	(16 + IA64_SIGFRAME_SIGCONTEXT_OFFSET)
-
-#	define FLAGS_OFF	IA64_SIGCONTEXT_FLAGS_OFFSET
-#	define CFM_OFF		IA64_SIGCONTEXT_CFM_OFFSET
-#	define FR6_OFF		IA64_SIGCONTEXT_FR6_OFFSET
-#	define BSP_OFF		IA64_SIGCONTEXT_AR_BSP_OFFSET
-#	define RNAT_OFF		IA64_SIGCONTEXT_AR_RNAT_OFFSET
-#	define UNAT_OFF		IA64_SIGCONTEXT_AR_UNAT_OFFSET
-#	define FPSR_OFF		IA64_SIGCONTEXT_AR_FPSR_OFFSET
-#	define PR_OFF		IA64_SIGCONTEXT_PR_OFFSET
-#	define RP_OFF		IA64_SIGCONTEXT_IP_OFFSET
-#	define SP_OFF		IA64_SIGCONTEXT_R12_OFFSET
-#	define RBS_BASE_OFF	IA64_SIGCONTEXT_RBS_BASE_OFFSET
-#	define LOADRS_OFF	IA64_SIGCONTEXT_LOADRS_OFFSET
-#	define base0		r2
-#	define base1		r3
-	/*
-	 * When we get here, the memory stack looks like this:
-	 *
-	 *   +===============================+
-       	 *   |				     |
-       	 *   //	    struct sigframe          //
-       	 *   |				     |
-	 *   +-------------------------------+ <-- sp+16
-	 *   |      16 byte of scratch       |
-	 *   |            space              |
-	 *   +-------------------------------+ <-- sp
-	 *
-	 * The register stack looks _exactly_ the way it looked at the time the signal
-	 * occurred.  In other words, we're treading on a potential mine-field: each
-	 * incoming general register may be a NaT value (including sp, in which case the
-	 * process ends up dying with a SIGSEGV).
-	 *
-	 * The first thing need to do is a cover to get the registers onto the backing
-	 * store.  Once that is done, we invoke the signal handler which may modify some
-	 * of the machine state.  After returning from the signal handler, we return
-	 * control to the previous context by executing a sigreturn system call.  A signal
-	 * handler may call the rt_sigreturn() function to directly return to a given
-	 * sigcontext.  However, the user-level sigreturn() needs to do much more than
-	 * calling the rt_sigreturn() system call as it needs to unwind the stack to
-	 * restore preserved registers that may have been saved on the signal handler's
-	 * call stack.
-	 */
-
-#define SIGTRAMP_SAVES										\
-	.unwabi 3, 's';		/* mark this as a sigtramp handler (saves scratch regs) */	\
-	.unwabi @svr4, 's'; /* backwards compatibility with old unwinders (remove in v2.7) */	\
-	.savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF;						\
-	.savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF;						\
-	.savesp pr, PR_OFF+SIGCONTEXT_OFF;     							\
-	.savesp rp, RP_OFF+SIGCONTEXT_OFF;							\
-	.savesp ar.pfs, CFM_OFF+SIGCONTEXT_OFF;							\
-	.vframesp SP_OFF+SIGCONTEXT_OFF
-
-GLOBAL_ENTRY(__kernel_sigtramp)
-	// describe the state that is active when we get here:
-	.prologue
-	SIGTRAMP_SAVES
-	.body
-
-	.label_state 1
-
-	adds base0=SIGHANDLER_OFF,sp
-	adds base1=RBS_BASE_OFF+SIGCONTEXT_OFF,sp
-	br.call.sptk.many rp=1f
-1:
-	ld8 r17=[base0],(ARG0_OFF-SIGHANDLER_OFF)	// get pointer to signal handler's plabel
-	ld8 r15=[base1]					// get address of new RBS base (or NULL)
-	cover				// push args in interrupted frame onto backing store
-	;;
-	cmp.ne p1,p0=r15,r0		// do we need to switch rbs? (note: pr is saved by kernel)
-	mov.m r9=ar.bsp			// fetch ar.bsp
-	.spillsp.p p1, ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
-(p1)	br.cond.spnt setup_rbs		// yup -> (clobbers p8, r14-r16, and r18-r20)
-back_from_setup_rbs:
-	alloc r8=ar.pfs,0,0,3,0
-	ld8 out0=[base0],16		// load arg0 (signum)
-	adds base1=(ARG1_OFF-(RBS_BASE_OFF+SIGCONTEXT_OFF)),base1
-	;;
-	ld8 out1=[base1]		// load arg1 (siginfop)
-	ld8 r10=[r17],8			// get signal handler entry point
-	;;
-	ld8 out2=[base0]		// load arg2 (sigcontextp)
-	ld8 gp=[r17]			// get signal handler's global pointer
-	adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
-	;;
-	.spillsp ar.bsp, BSP_OFF+SIGCONTEXT_OFF
-	st8 [base0]=r9			// save sc_ar_bsp
-	adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
-	adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
-	;;
-	stf.spill [base0]=f6,32
-	stf.spill [base1]=f7,32
-	;;
-	stf.spill [base0]=f8,32
-	stf.spill [base1]=f9,32
-	mov b6=r10
-	;;
-	stf.spill [base0]=f10,32
-	stf.spill [base1]=f11,32
-	;;
-	stf.spill [base0]=f12,32
-	stf.spill [base1]=f13,32
-	;;
-	stf.spill [base0]=f14,32
-	stf.spill [base1]=f15,32
-	br.call.sptk.many rp=b6			// call the signal handler
-.ret0:	adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
-	;;
-	ld8 r15=[base0]				// fetch sc_ar_bsp
-	mov r14=ar.bsp
-	;;
-	cmp.ne p1,p0=r14,r15			// do we need to restore the rbs?
-(p1)	br.cond.spnt restore_rbs		// yup -> (clobbers r14-r18, f6 & f7)
-	;;
-back_from_restore_rbs:
-	adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
-	adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
-	;;
-	ldf.fill f6=[base0],32
-	ldf.fill f7=[base1],32
-	;;
-	ldf.fill f8=[base0],32
-	ldf.fill f9=[base1],32
-	;;
-	ldf.fill f10=[base0],32
-	ldf.fill f11=[base1],32
-	;;
-	ldf.fill f12=[base0],32
-	ldf.fill f13=[base1],32
-	;;
-	ldf.fill f14=[base0],32
-	ldf.fill f15=[base1],32
-	mov r15=__NR_rt_sigreturn
-	.restore sp				// pop .prologue
-	break __BREAK_SYSCALL
-
-	.prologue
-	SIGTRAMP_SAVES
-setup_rbs:
-	mov ar.rsc=0				// put RSE into enforced lazy mode
-	;;
-	.save ar.rnat, r19
-	mov r19=ar.rnat				// save RNaT before switching backing store area
-	adds r14=(RNAT_OFF+SIGCONTEXT_OFF),sp
-
-	mov r18=ar.bspstore
-	mov ar.bspstore=r15			// switch over to new register backing store area
-	;;
-
-	.spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
-	st8 [r14]=r19				// save sc_ar_rnat
-	.body
-	mov.m r16=ar.bsp			// sc_loadrs <- (new bsp - new bspstore) << 16
-	adds r14=(LOADRS_OFF+SIGCONTEXT_OFF),sp
-	;;
-	invala
-	sub r15=r16,r15
-	extr.u r20=r18,3,6
-	;;
-	mov ar.rsc=0xf				// set RSE into eager mode, pl 3
-	cmp.eq p8,p0=63,r20
-	shl r15=r15,16
-	;;
-	st8 [r14]=r15				// save sc_loadrs
-(p8)	st8 [r18]=r19		// if bspstore points at RNaT slot, store RNaT there now
-	.restore sp				// pop .prologue
-	br.cond.sptk back_from_setup_rbs
-
-	.prologue
-	SIGTRAMP_SAVES
-	.spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
-	.body
-restore_rbs:
-	// On input:
-	//	r14 = bsp1 (bsp at the time of return from signal handler)
-	//	r15 = bsp0 (bsp at the time the signal occurred)
-	//
-	// Here, we need to calculate bspstore0, the value that ar.bspstore needs
-	// to be set to, based on bsp0 and the size of the dirty partition on
-	// the alternate stack (sc_loadrs >> 16).  This can be done with the
-	// following algorithm:
-	//
-	//  bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1));
-	//
-	// This is what the code below does.
-	//
-	alloc r2=ar.pfs,0,0,0,0			// alloc null frame
-	adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp
-	adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp
-	;;
-	ld8 r17=[r16]
-	ld8 r16=[r18]			// get new rnat
-	extr.u r18=r15,3,6	// r18 <- rse_slot_num(bsp0)
-	;;
-	mov ar.rsc=r17			// put RSE into enforced lazy mode
-	shr.u r17=r17,16
-	;;
-	sub r14=r14,r17		// r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16)
-	shr.u r17=r17,3		// r17 <- (sc_loadrs >> 19)
-	;;
-	loadrs			// restore dirty partition
-	extr.u r14=r14,3,6	// r14 <- rse_slot_num(bspstore1)
-	;;
-	add r14=r14,r17		// r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19)
-	;;
-	shr.u r14=r14,6		// r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40
-	;;
-	sub r14=r14,r17		// r14 <- -rse_num_regs(bspstore1, bsp1)
-	movl r17=0x8208208208208209
-	;;
-	add r18=r18,r14		// r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1)
-	setf.sig f7=r17
-	cmp.lt p7,p0=r14,r0	// p7 <- (r14 < 0)?
-	;;
-(p7)	adds r18=-62,r18	// delta -= 62
-	;;
-	setf.sig f6=r18
-	;;
-	xmpy.h f6=f6,f7
-	;;
-	getf.sig r17=f6
-	;;
-	add r17=r17,r18
-	shr r18=r18,63
-	;;
-	shr r17=r17,5
-	;;
-	sub r17=r17,r18		// r17 = delta/63
-	;;
-	add r17=r14,r17		// r17 <- delta/63 - rse_num_regs(bspstore1, bsp1)
-	;;
-	shladd r15=r17,3,r15	// r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1))
-	;;
-	mov ar.bspstore=r15			// switch back to old register backing store area
-	;;
-	mov ar.rnat=r16				// restore RNaT
-	mov ar.rsc=0xf				// (will be restored later on from sc_ar_rsc)
-	// invala not necessary as that will happen when returning to user-mode
-	br.cond.sptk back_from_restore_rbs
-END(__kernel_sigtramp)
-
-/*
- * On entry:
- *	r11 = saved ar.pfs
- *	r15 = system call #
- *	b0  = saved return address
- *	b6  = return address
- * On exit:
- *	r11 = saved ar.pfs
- *	r15 = system call #
- *	b0  = saved return address
- *	all other "scratch" registers:	undefined
- *	all "preserved" registers:	same as on entry
- */
-
-GLOBAL_ENTRY(__kernel_syscall_via_epc)
-	.prologue
-	.altrp b6
-	.body
-{
-	/*
-	 * Note: the kernel cannot assume that the first two instructions in this
-	 * bundle get executed.  The remaining code must be safe even if
-	 * they do not get executed.
-	 */
-	adds r17=-1024,r15			// A
-	mov r10=0				// A    default to successful syscall execution
-	epc					// B	causes split-issue
-}
-	;;
-	RSM_PSR_BE_I(r20, r22)			// M2 (5 cyc to srlz.d)
-	LOAD_FSYSCALL_TABLE(r14)		// X
-	;;
-	mov r16=IA64_KR(CURRENT)		// M2 (12 cyc)
-	shladd r18=r17,3,r14			// A
-	mov r19=NR_syscalls-1			// A
-	;;
-	lfetch [r18]				// M0|1
-	MOV_FROM_PSR(p0, r29, r8)		// M2 (12 cyc)
-	// If r17 is a NaT, p6 will be zero
-	cmp.geu p6,p7=r19,r17			// A    (sysnr > 0 && sysnr < 1024+NR_syscalls)?
-	;;
-	mov r21=ar.fpsr				// M2 (12 cyc)
-	tnat.nz p10,p9=r15			// I0
-	mov.i r26=ar.pfs			// I0 (would stall anyhow due to srlz.d...)
-	;;
-	srlz.d					// M0 (forces split-issue) ensure PSR.BE==0
-(p6)	ld8 r18=[r18]				// M0|1
-	nop.i 0
-	;;
-	nop.m 0
-(p6)	tbit.z.unc p8,p0=r18,0			// I0 (dual-issues with "mov b7=r18"!)
-	nop.i 0
-	;;
-	SSM_PSR_I(p8, p14, r25)
-(p6)	mov b7=r18				// I0
-(p8)	br.dptk.many b7				// B
-
-	mov r27=ar.rsc				// M2 (12 cyc)
-/*
- * brl.cond doesn't work as intended because the linker would convert this branch
- * into a branch to a PLT.  Perhaps there will be a way to avoid this with some
- * future version of the linker.  In the meantime, we just use an indirect branch
- * instead.
- */
-#ifdef CONFIG_ITANIUM
-(p6)	add r14=-8,r14				// r14 <- addr of fsys_bubble_down entry
-	;;
-(p6)	ld8 r14=[r14]				// r14 <- fsys_bubble_down
-	;;
-(p6)	mov b7=r14
-(p6)	br.sptk.many b7
-#else
-	BRL_COND_FSYS_BUBBLE_DOWN(p6)
-#endif
-	SSM_PSR_I(p0, p14, r10)
-	mov r10=-1
-(p10)	mov r8=EINVAL
-(p9)	mov r8=ENOSYS
-	FSYS_RETURN
-
-END(__kernel_syscall_via_epc)
diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S
deleted file mode 100644
index 461c7e69d46565771b58dc2aa0070f36b629276f..0000000000000000000000000000000000000000
--- a/arch/ia64/kernel/gate.lds.S
+++ /dev/null
@@ -1,108 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Linker script for gate DSO.  The gate pages are an ELF shared object
- * prelinked to its virtual address, with only one read-only segment and
- * one execute-only segment (both fit in one page).  This script controls
- * its layout.
- */
-
-#include <asm/page.h>
-
-SECTIONS
-{
-	. = GATE_ADDR + SIZEOF_HEADERS;
-
-	.hash			: { *(.hash) }		:readable
-	.gnu.hash		: { *(.gnu.hash) }
-	.dynsym			: { *(.dynsym) }
-	.dynstr			: { *(.dynstr) }
-	.gnu.version		: { *(.gnu.version) }
-	.gnu.version_d		: { *(.gnu.version_d) }
-	.gnu.version_r		: { *(.gnu.version_r) }
-
-	.note			: { *(.note*) }		:readable	:note
-
-	.dynamic		: { *(.dynamic) }	:readable	:dynamic
-
-	/*
-	 * This linker script is used both with -r and with -shared.  For
-	 * the layouts to match, we need to skip more than enough space for
-	 * the dynamic symbol table et al.  If this amount is insufficient,
-	 * ld -shared will barf.  Just increase it here.
-	 */
-	. = GATE_ADDR + 0x600;
-
-	.data..patch		: {
-		__start_gate_mckinley_e9_patchlist = .;
-		*(.data..patch.mckinley_e9)
-		__end_gate_mckinley_e9_patchlist = .;
-
-		__start_gate_vtop_patchlist = .;
-		*(.data..patch.vtop)
-		__end_gate_vtop_patchlist = .;
-
-		__start_gate_fsyscall_patchlist = .;
-		*(.data..patch.fsyscall_table)
-		__end_gate_fsyscall_patchlist = .;
-
-		__start_gate_brl_fsys_bubble_down_patchlist = .;
-		*(.data..patch.brl_fsys_bubble_down)
-		__end_gate_brl_fsys_bubble_down_patchlist = .;
-	}						:readable
-
-	.IA_64.unwind_info	: { *(.IA_64.unwind_info*) }
-	.IA_64.unwind		: { *(.IA_64.unwind*) }	:readable	:unwind
-#ifdef HAVE_BUGGY_SEGREL
-	.text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) }	:readable
-#else
-	. = ALIGN(PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1));
-	.text			: { *(.text) *(.text.*) }	:epc
-#endif
-
-	/DISCARD/		: {
-		*(.got.plt) *(.got)
-		*(.data .data.* .gnu.linkonce.d.*)
-		*(.dynbss)
-		*(.bss .bss.* .gnu.linkonce.b.*)
-		*(__ex_table)
-		*(__mca_table)
-	}
-}
-
-/*
- * ld does not recognize this name token; use the constant.
- */
-#define	PT_IA_64_UNWIND	0x70000001
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	readable	PT_LOAD	FILEHDR	PHDRS	FLAGS(4);	/* PF_R */
-#ifndef HAVE_BUGGY_SEGREL
-	epc		PT_LOAD	FILEHDR PHDRS	FLAGS(1);	/* PF_X */
-#endif
-	dynamic		PT_DYNAMIC		FLAGS(4);	/* PF_R */
-	note		PT_NOTE			FLAGS(4);	/* PF_R */
-	unwind		PT_IA_64_UNWIND;
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-	LINUX_2.5 {
-	global:
-		__kernel_syscall_via_break;
-		__kernel_syscall_via_epc;
-		__kernel_sigtramp;
-
-	local: *;
-	};
-}
-
-/* The ELF entry point can be used to set the AT_SYSINFO value.  */
-ENTRY(__kernel_syscall_via_epc)
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
deleted file mode 100644
index e6f45170a4b97a80d9f63f4eeace13d0b9e2f64b..0000000000000000000000000000000000000000
--- a/arch/ia64/kernel/head.S
+++ /dev/null
@@ -1,1173 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Here is where the ball gets rolling as far as the kernel is concerned.
- * When control is transferred to _start, the bootload has already
- * loaded us to the correct address.  All that's left to do here is
- * to set up the kernel's global pointer and jump to the kernel
- * entry point.
- *
- * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- *	Stephane Eranian <eranian@hpl.hp.com>
- * Copyright (C) 1999 VA Linux Systems
- * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
- * Copyright (C) 1999 Intel Corp.
- * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
- * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
- * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com>
- *   -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2.
- * Copyright (C) 2004 Ashok Raj <ashok.raj@intel.com>
- *   Support for CPU Hotplug
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/fpu.h>
-#include <asm/kregs.h>
-#include <asm/mmu_context.h>
-#include <asm/asm-offsets.h>
-#include <asm/pal.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/ptrace.h>
-#include <asm/mca_asm.h>
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/export.h>
-
-#ifdef CONFIG_HOTPLUG_CPU
-#define SAL_PSR_BITS_TO_SET				\
-	(IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_MFH | IA64_PSR_MFL)
-
-#define SAVE_FROM_REG(src, ptr, dest)	\
-	mov dest=src;;						\
-	st8 [ptr]=dest,0x08
-
-#define RESTORE_REG(reg, ptr, _tmp)		\
-	ld8 _tmp=[ptr],0x08;;				\
-	mov reg=_tmp
-
-#define SAVE_BREAK_REGS(ptr, _idx, _breg, _dest)\
-	mov ar.lc=IA64_NUM_DBG_REGS-1;; 			\
-	mov _idx=0;; 								\
-1: 												\
-	SAVE_FROM_REG(_breg[_idx], ptr, _dest);;	\
-	add _idx=1,_idx;;							\
-	br.cloop.sptk.many 1b
-
-#define RESTORE_BREAK_REGS(ptr, _idx, _breg, _tmp, _lbl)\
-	mov ar.lc=IA64_NUM_DBG_REGS-1;;			\
-	mov _idx=0;;							\
-_lbl:  RESTORE_REG(_breg[_idx], ptr, _tmp);;	\
-	add _idx=1, _idx;;						\
-	br.cloop.sptk.many _lbl
-
-#define SAVE_ONE_RR(num, _reg, _tmp) \
-	movl _tmp=(num<<61);;	\
-	mov _reg=rr[_tmp]
-
-#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \
-	SAVE_ONE_RR(0,_r0, _tmp);; \
-	SAVE_ONE_RR(1,_r1, _tmp);; \
-	SAVE_ONE_RR(2,_r2, _tmp);; \
-	SAVE_ONE_RR(3,_r3, _tmp);; \
-	SAVE_ONE_RR(4,_r4, _tmp);; \
-	SAVE_ONE_RR(5,_r5, _tmp);; \
-	SAVE_ONE_RR(6,_r6, _tmp);; \
-	SAVE_ONE_RR(7,_r7, _tmp);;
-
-#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \
-	st8 [ptr]=_r0, 8;; \
-	st8 [ptr]=_r1, 8;; \
-	st8 [ptr]=_r2, 8;; \
-	st8 [ptr]=_r3, 8;; \
-	st8 [ptr]=_r4, 8;; \
-	st8 [ptr]=_r5, 8;; \
-	st8 [ptr]=_r6, 8;; \
-	st8 [ptr]=_r7, 8;;
-
-#define RESTORE_REGION_REGS(ptr, _idx1, _idx2, _tmp) \
-	mov		ar.lc=0x08-1;;						\
-	movl	_idx1=0x00;;						\
-RestRR:											\
-	dep.z	_idx2=_idx1,61,3;;					\
-	ld8		_tmp=[ptr],8;;						\
-	mov		rr[_idx2]=_tmp;;					\
-	srlz.d;;									\
-	add		_idx1=1,_idx1;;						\
-	br.cloop.sptk.few	RestRR
-
-#define SET_AREA_FOR_BOOTING_CPU(reg1, reg2) \
-	movl reg1=sal_state_for_booting_cpu;;	\
-	ld8 reg2=[reg1];;
-
-/*
- * Adjust region registers saved before starting to save
- * break regs and rest of the states that need to be preserved.
- */
-#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(_reg1,_reg2,_pred)  \
-	SAVE_FROM_REG(b0,_reg1,_reg2);;						\
-	SAVE_FROM_REG(b1,_reg1,_reg2);;						\
-	SAVE_FROM_REG(b2,_reg1,_reg2);;						\
-	SAVE_FROM_REG(b3,_reg1,_reg2);;						\
-	SAVE_FROM_REG(b4,_reg1,_reg2);;						\
-	SAVE_FROM_REG(b5,_reg1,_reg2);;						\
-	st8 [_reg1]=r1,0x08;;								\
-	st8 [_reg1]=r12,0x08;;								\
-	st8 [_reg1]=r13,0x08;;								\
-	SAVE_FROM_REG(ar.fpsr,_reg1,_reg2);;				\
-	SAVE_FROM_REG(ar.pfs,_reg1,_reg2);;					\
-	SAVE_FROM_REG(ar.rnat,_reg1,_reg2);;				\
-	SAVE_FROM_REG(ar.unat,_reg1,_reg2);;				\
-	SAVE_FROM_REG(ar.bspstore,_reg1,_reg2);;			\
-	SAVE_FROM_REG(cr.dcr,_reg1,_reg2);;					\
-	SAVE_FROM_REG(cr.iva,_reg1,_reg2);;					\
-	SAVE_FROM_REG(cr.pta,_reg1,_reg2);;					\
-	SAVE_FROM_REG(cr.itv,_reg1,_reg2);;					\
-	SAVE_FROM_REG(cr.pmv,_reg1,_reg2);;					\
-	SAVE_FROM_REG(cr.cmcv,_reg1,_reg2);;				\
-	SAVE_FROM_REG(cr.lrr0,_reg1,_reg2);;				\
-	SAVE_FROM_REG(cr.lrr1,_reg1,_reg2);;				\
-	st8 [_reg1]=r4,0x08;;								\
-	st8 [_reg1]=r5,0x08;;								\
-	st8 [_reg1]=r6,0x08;;								\
-	st8 [_reg1]=r7,0x08;;								\
-	st8 [_reg1]=_pred,0x08;;							\
-	SAVE_FROM_REG(ar.lc, _reg1, _reg2);;				\
-	stf.spill.nta [_reg1]=f2,16;;						\
-	stf.spill.nta [_reg1]=f3,16;;						\
-	stf.spill.nta [_reg1]=f4,16;;						\
-	stf.spill.nta [_reg1]=f5,16;;						\
-	stf.spill.nta [_reg1]=f16,16;;						\
-	stf.spill.nta [_reg1]=f17,16;;						\
-	stf.spill.nta [_reg1]=f18,16;;						\
-	stf.spill.nta [_reg1]=f19,16;;						\
-	stf.spill.nta [_reg1]=f20,16;;						\
-	stf.spill.nta [_reg1]=f21,16;;						\
-	stf.spill.nta [_reg1]=f22,16;;						\
-	stf.spill.nta [_reg1]=f23,16;;						\
-	stf.spill.nta [_reg1]=f24,16;;						\
-	stf.spill.nta [_reg1]=f25,16;;						\
-	stf.spill.nta [_reg1]=f26,16;;						\
-	stf.spill.nta [_reg1]=f27,16;;						\
-	stf.spill.nta [_reg1]=f28,16;;						\
-	stf.spill.nta [_reg1]=f29,16;;						\
-	stf.spill.nta [_reg1]=f30,16;;						\
-	stf.spill.nta [_reg1]=f31,16;;
-
-#else
-#define SET_AREA_FOR_BOOTING_CPU(a1, a2)
-#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(a1,a2, a3)
-#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7)
-#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7)
-#endif
-
-#define SET_ONE_RR(num, pgsize, _tmp1, _tmp2, vhpt) \
-	movl _tmp1=(num << 61);;	\
-	mov _tmp2=((ia64_rid(IA64_REGION_ID_KERNEL, (num<<61)) << 8) | (pgsize << 2) | vhpt);; \
-	mov rr[_tmp1]=_tmp2
-
-	__PAGE_ALIGNED_DATA
-
-	.global empty_zero_page
-EXPORT_DATA_SYMBOL_GPL(empty_zero_page)
-empty_zero_page:
-	.skip PAGE_SIZE
-
-	.global swapper_pg_dir
-swapper_pg_dir:
-	.skip PAGE_SIZE
-
-	.rodata
-halt_msg:
-	stringz "Halting kernel\n"
-
-	__REF
-
-	.global start_ap
-
-	/*
-	 * Start the kernel.  When the bootloader passes control to _start(), r28
-	 * points to the address of the boot parameter area.  Execution reaches
-	 * here in physical mode.
-	 */
-GLOBAL_ENTRY(_start)
-start_ap:
-	.prologue
-	.save rp, r0		// terminate unwind chain with a NULL rp
-	.body
-
-	rsm psr.i | psr.ic
-	;;
-	srlz.i
-	;;
- {
-	flushrs				// must be first insn in group
-	srlz.i
- }
-	;;
-	/*
-	 * Save the region registers, predicate before they get clobbered
-	 */
-	SAVE_REGION_REGS(r2, r8,r9,r10,r11,r12,r13,r14,r15);
-	mov r25=pr;;
-
-	/*
-	 * Initialize kernel region registers:
-	 *	rr[0]: VHPT enabled, page size = PAGE_SHIFT
-	 *	rr[1]: VHPT enabled, page size = PAGE_SHIFT
-	 *	rr[2]: VHPT enabled, page size = PAGE_SHIFT
-	 *	rr[3]: VHPT enabled, page size = PAGE_SHIFT
-	 *	rr[4]: VHPT enabled, page size = PAGE_SHIFT
-	 *	rr[5]: VHPT enabled, page size = PAGE_SHIFT
-	 *	rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT
-	 *	rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT
-	 * We initialize all of them to prevent inadvertently assuming
-	 * something about the state of address translation early in boot.
-	 */
-	SET_ONE_RR(0, PAGE_SHIFT, r2, r16, 1);;
-	SET_ONE_RR(1, PAGE_SHIFT, r2, r16, 1);;
-	SET_ONE_RR(2, PAGE_SHIFT, r2, r16, 1);;
-	SET_ONE_RR(3, PAGE_SHIFT, r2, r16, 1);;
-	SET_ONE_RR(4, PAGE_SHIFT, r2, r16, 1);;
-	SET_ONE_RR(5, PAGE_SHIFT, r2, r16, 1);;
-	SET_ONE_RR(6, IA64_GRANULE_SHIFT, r2, r16, 0);;
-	SET_ONE_RR(7, IA64_GRANULE_SHIFT, r2, r16, 0);;
-	/*
-	 * Now pin mappings into the TLB for kernel text and data
-	 */
-	mov r18=KERNEL_TR_PAGE_SHIFT<<2
-	movl r17=KERNEL_START
-	;;
-	mov cr.itir=r18
-	mov cr.ifa=r17
-	mov r16=IA64_TR_KERNEL
-	mov r3=ip
-	movl r18=PAGE_KERNEL
-	;;
-	dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
-	;;
-	or r18=r2,r18
-	;;
-	srlz.i
-	;;
-	itr.i itr[r16]=r18
-	;;
-	itr.d dtr[r16]=r18
-	;;
-	srlz.i
-
-	/*
-	 * Switch into virtual mode:
-	 */
-	movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
-		  |IA64_PSR_DI)
-	;;
-	mov cr.ipsr=r16
-	movl r17=1f
-	;;
-	mov cr.iip=r17
-	mov cr.ifs=r0
-	;;
-	rfi
-	;;
-1:	// now we are in virtual mode
-
-	SET_AREA_FOR_BOOTING_CPU(r2, r16);
-
-	STORE_REGION_REGS(r16, r8,r9,r10,r11,r12,r13,r14,r15);
-	SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(r16,r17,r25)
-	;;
-
-	// set IVT entry point---can't access I/O ports without it
-	movl r3=ia64_ivt
-	;;
-	mov cr.iva=r3
-	movl r2=FPSR_DEFAULT
-	;;
-	srlz.i
-	movl gp=__gp
-
-	mov ar.fpsr=r2
-	;;
-
-#define isAP	p2	// are we an Application Processor?
-#define isBP	p3	// are we the Bootstrap Processor?
-
-#ifdef CONFIG_SMP
-	/*
-	 * Find the init_task for the currently booting CPU.  At poweron, and in
-	 * UP mode, task_for_booting_cpu is NULL.
-	 */
-	movl r3=task_for_booting_cpu
- 	;;
-	ld8 r3=[r3]
-	movl r2=init_task
-	;;
-	cmp.eq isBP,isAP=r3,r0
-	;;
-(isAP)	mov r2=r3
-#else
-	movl r2=init_task
-	cmp.eq isBP,isAP=r0,r0
-#endif
-	;;
-	tpa r3=r2		// r3 == phys addr of task struct
-	mov r16=-1
-(isBP)	br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it
-
-	// load mapping for stack (virtaddr in r2, physaddr in r3)
-	rsm psr.ic
-	movl r17=PAGE_KERNEL
-	;;
-	srlz.d
-	dep r18=0,r3,0,12
-	;;
-	or r18=r17,r18
-	dep r2=-1,r3,61,3	// IMVA of task
-	;;
-	mov r17=rr[r2]
-	shr.u r16=r3,IA64_GRANULE_SHIFT
-	;;
-	dep r17=0,r17,8,24
-	;;
-	mov cr.itir=r17
-	mov cr.ifa=r2
-
-	mov r19=IA64_TR_CURRENT_STACK
-	;;
-	itr.d dtr[r19]=r18
-	;;
-	ssm psr.ic
-	srlz.d
-  	;;
-
-.load_current:
-	// load the "current" pointer (r13) and ar.k6 with the current task
-	mov IA64_KR(CURRENT)=r2		// virtual address
-	mov IA64_KR(CURRENT_STACK)=r16
-	mov r13=r2
-	/*
-	 * Reserve space at the top of the stack for "struct pt_regs".  Kernel
-	 * threads don't store interesting values in that structure, but the space
-	 * still needs to be there because time-critical stuff such as the context
-	 * switching can be implemented more efficiently (for example, __switch_to()
-	 * always sets the psr.dfh bit of the task it is switching to).
-	 */
-
-	addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2
-	addl r2=IA64_RBS_OFFSET,r2	// initialize the RSE
-	mov ar.rsc=0		// place RSE in enforced lazy mode
-	;;
-	loadrs			// clear the dirty partition
-	movl r19=__phys_per_cpu_start
-	mov r18=PERCPU_PAGE_SIZE
-	;;
-#ifndef CONFIG_SMP
-	add r19=r19,r18
-	;;
-#else
-(isAP)	br.few 2f
-	movl r20=__cpu0_per_cpu
-	;;
-	shr.u r18=r18,3
-1:
-	ld8 r21=[r19],8;;
-	st8[r20]=r21,8
-	adds r18=-1,r18;;
-	cmp4.lt p7,p6=0,r18
-(p7)	br.cond.dptk.few 1b
-	mov r19=r20
-	;;
-2:
-#endif
-	tpa r19=r19
-	;;
-	.pred.rel.mutex isBP,isAP
-(isBP)	mov IA64_KR(PER_CPU_DATA)=r19	// per-CPU base for cpu0
-(isAP)	mov IA64_KR(PER_CPU_DATA)=r0	// clear physical per-CPU base
-	;;
-	mov ar.bspstore=r2	// establish the new RSE stack
-	;;
-	mov ar.rsc=0x3		// place RSE in eager mode
-
-(isBP)	dep r28=-1,r28,61,3	// make address virtual
-(isBP)	movl r2=ia64_boot_param
-	;;
-(isBP)	st8 [r2]=r28		// save the address of the boot param area passed by the bootloader
-
-#ifdef CONFIG_SMP
-(isAP)	br.call.sptk.many rp=start_secondary
-.ret0:
-(isAP)	br.cond.sptk self
-#endif
-
-	// This is executed by the bootstrap processor (bsp) only:
-
-#ifdef CONFIG_IA64_FW_EMU
-	// initialize PAL & SAL emulator:
-	br.call.sptk.many rp=sys_fw_init
-.ret1:
-#endif
-	br.call.sptk.many rp=start_kernel
-.ret2:	addl r3=@ltoff(halt_msg),gp
-	;;
-	alloc r2=ar.pfs,8,0,2,0
-	;;
-	ld8 out0=[r3]
-	br.call.sptk.many b0=console_print
-
-self:	hint @pause
-	br.sptk.many self		// endless loop
-END(_start)
-
-	.text
-
-GLOBAL_ENTRY(ia64_save_debug_regs)
-	alloc r16=ar.pfs,1,0,0,0
-	mov r20=ar.lc			// preserve ar.lc
-	mov ar.lc=IA64_NUM_DBG_REGS-1
-	mov r18=0
-	add r19=IA64_NUM_DBG_REGS*8,in0
-	;;
-1:	mov r16=dbr[r18]
-#ifdef CONFIG_ITANIUM
-	;;
-	srlz.d
-#endif
-	mov r17=ibr[r18]
-	add r18=1,r18
-	;;
-	st8.nta [in0]=r16,8
-	st8.nta [r19]=r17,8
-	br.cloop.sptk.many 1b
-	;;
-	mov ar.lc=r20			// restore ar.lc
-	br.ret.sptk.many rp
-END(ia64_save_debug_regs)
-
-GLOBAL_ENTRY(ia64_load_debug_regs)
-	alloc r16=ar.pfs,1,0,0,0
-	lfetch.nta [in0]
-	mov r20=ar.lc			// preserve ar.lc
-	add r19=IA64_NUM_DBG_REGS*8,in0
-	mov ar.lc=IA64_NUM_DBG_REGS-1
-	mov r18=-1
-	;;
-1:	ld8.nta r16=[in0],8
-	ld8.nta r17=[r19],8
-	add r18=1,r18
-	;;
-	mov dbr[r18]=r16
-#ifdef CONFIG_ITANIUM
-	;;
-	srlz.d				// Errata 132 (NoFix status)
-#endif
-	mov ibr[r18]=r17
-	br.cloop.sptk.many 1b
-	;;
-	mov ar.lc=r20			// restore ar.lc
-	br.ret.sptk.many rp
-END(ia64_load_debug_regs)
-
-GLOBAL_ENTRY(__ia64_save_fpu)
-	alloc r2=ar.pfs,1,4,0,0
-	adds loc0=96*16-16,in0
-	adds loc1=96*16-16-128,in0
-	;;
-	stf.spill.nta [loc0]=f127,-256
-	stf.spill.nta [loc1]=f119,-256
-	;;
-	stf.spill.nta [loc0]=f111,-256
-	stf.spill.nta [loc1]=f103,-256
-	;;
-	stf.spill.nta [loc0]=f95,-256
-	stf.spill.nta [loc1]=f87,-256
-	;;
-	stf.spill.nta [loc0]=f79,-256
-	stf.spill.nta [loc1]=f71,-256
-	;;
-	stf.spill.nta [loc0]=f63,-256
-	stf.spill.nta [loc1]=f55,-256
-	adds loc2=96*16-32,in0
-	;;
-	stf.spill.nta [loc0]=f47,-256
-	stf.spill.nta [loc1]=f39,-256
-	adds loc3=96*16-32-128,in0
-	;;
-	stf.spill.nta [loc2]=f126,-256
-	stf.spill.nta [loc3]=f118,-256
-	;;
-	stf.spill.nta [loc2]=f110,-256
-	stf.spill.nta [loc3]=f102,-256
-	;;
-	stf.spill.nta [loc2]=f94,-256
-	stf.spill.nta [loc3]=f86,-256
-	;;
-	stf.spill.nta [loc2]=f78,-256
-	stf.spill.nta [loc3]=f70,-256
-	;;
-	stf.spill.nta [loc2]=f62,-256
-	stf.spill.nta [loc3]=f54,-256
-	adds loc0=96*16-48,in0
-	;;
-	stf.spill.nta [loc2]=f46,-256
-	stf.spill.nta [loc3]=f38,-256
-	adds loc1=96*16-48-128,in0
-	;;
-	stf.spill.nta [loc0]=f125,-256
-	stf.spill.nta [loc1]=f117,-256
-	;;
-	stf.spill.nta [loc0]=f109,-256
-	stf.spill.nta [loc1]=f101,-256
-	;;
-	stf.spill.nta [loc0]=f93,-256
-	stf.spill.nta [loc1]=f85,-256
-	;;
-	stf.spill.nta [loc0]=f77,-256
-	stf.spill.nta [loc1]=f69,-256
-	;;
-	stf.spill.nta [loc0]=f61,-256
-	stf.spill.nta [loc1]=f53,-256
-	adds loc2=96*16-64,in0
-	;;
-	stf.spill.nta [loc0]=f45,-256
-	stf.spill.nta [loc1]=f37,-256
-	adds loc3=96*16-64-128,in0
-	;;
-	stf.spill.nta [loc2]=f124,-256
-	stf.spill.nta [loc3]=f116,-256
-	;;
-	stf.spill.nta [loc2]=f108,-256
-	stf.spill.nta [loc3]=f100,-256
-	;;
-	stf.spill.nta [loc2]=f92,-256
-	stf.spill.nta [loc3]=f84,-256
-	;;
-	stf.spill.nta [loc2]=f76,-256
-	stf.spill.nta [loc3]=f68,-256
-	;;
-	stf.spill.nta [loc2]=f60,-256
-	stf.spill.nta [loc3]=f52,-256
-	adds loc0=96*16-80,in0
-	;;
-	stf.spill.nta [loc2]=f44,-256
-	stf.spill.nta [loc3]=f36,-256
-	adds loc1=96*16-80-128,in0
-	;;
-	stf.spill.nta [loc0]=f123,-256
-	stf.spill.nta [loc1]=f115,-256
-	;;
-	stf.spill.nta [loc0]=f107,-256
-	stf.spill.nta [loc1]=f99,-256
-	;;
-	stf.spill.nta [loc0]=f91,-256
-	stf.spill.nta [loc1]=f83,-256
-	;;
-	stf.spill.nta [loc0]=f75,-256
-	stf.spill.nta [loc1]=f67,-256
-	;;
-	stf.spill.nta [loc0]=f59,-256
-	stf.spill.nta [loc1]=f51,-256
-	adds loc2=96*16-96,in0
-	;;
-	stf.spill.nta [loc0]=f43,-256
-	stf.spill.nta [loc1]=f35,-256
-	adds loc3=96*16-96-128,in0
-	;;
-	stf.spill.nta [loc2]=f122,-256
-	stf.spill.nta [loc3]=f114,-256
-	;;
-	stf.spill.nta [loc2]=f106,-256
-	stf.spill.nta [loc3]=f98,-256
-	;;
-	stf.spill.nta [loc2]=f90,-256
-	stf.spill.nta [loc3]=f82,-256
-	;;
-	stf.spill.nta [loc2]=f74,-256
-	stf.spill.nta [loc3]=f66,-256
-	;;
-	stf.spill.nta [loc2]=f58,-256
-	stf.spill.nta [loc3]=f50,-256
-	adds loc0=96*16-112,in0
-	;;
-	stf.spill.nta [loc2]=f42,-256
-	stf.spill.nta [loc3]=f34,-256
-	adds loc1=96*16-112-128,in0
-	;;
-	stf.spill.nta [loc0]=f121,-256
-	stf.spill.nta [loc1]=f113,-256
-	;;
-	stf.spill.nta [loc0]=f105,-256
-	stf.spill.nta [loc1]=f97,-256
-	;;
-	stf.spill.nta [loc0]=f89,-256
-	stf.spill.nta [loc1]=f81,-256
-	;;
-	stf.spill.nta [loc0]=f73,-256
-	stf.spill.nta [loc1]=f65,-256
-	;;
-	stf.spill.nta [loc0]=f57,-256
-	stf.spill.nta [loc1]=f49,-256
-	adds loc2=96*16-128,in0
-	;;
-	stf.spill.nta [loc0]=f41,-256
-	stf.spill.nta [loc1]=f33,-256
-	adds loc3=96*16-128-128,in0
-	;;
-	stf.spill.nta [loc2]=f120,-256
-	stf.spill.nta [loc3]=f112,-256
-	;;
-	stf.spill.nta [loc2]=f104,-256
-	stf.spill.nta [loc3]=f96,-256
-	;;
-	stf.spill.nta [loc2]=f88,-256
-	stf.spill.nta [loc3]=f80,-256
-	;;
-	stf.spill.nta [loc2]=f72,-256
-	stf.spill.nta [loc3]=f64,-256
-	;;
-	stf.spill.nta [loc2]=f56,-256
-	stf.spill.nta [loc3]=f48,-256
-	;;
-	stf.spill.nta [loc2]=f40
-	stf.spill.nta [loc3]=f32
-	br.ret.sptk.many rp
-END(__ia64_save_fpu)
-
-GLOBAL_ENTRY(__ia64_load_fpu)
-	alloc r2=ar.pfs,1,2,0,0
-	adds r3=128,in0
-	adds r14=256,in0
-	adds r15=384,in0
-	mov loc0=512
-	mov loc1=-1024+16
-	;;
-	ldf.fill.nta f32=[in0],loc0
-	ldf.fill.nta f40=[ r3],loc0
-	ldf.fill.nta f48=[r14],loc0
-	ldf.fill.nta f56=[r15],loc0
-	;;
-	ldf.fill.nta f64=[in0],loc0
-	ldf.fill.nta f72=[ r3],loc0
-	ldf.fill.nta f80=[r14],loc0
-	ldf.fill.nta f88=[r15],loc0
-	;;
-	ldf.fill.nta f96=[in0],loc1
-	ldf.fill.nta f104=[ r3],loc1
-	ldf.fill.nta f112=[r14],loc1
-	ldf.fill.nta f120=[r15],loc1
-	;;
-	ldf.fill.nta f33=[in0],loc0
-	ldf.fill.nta f41=[ r3],loc0
-	ldf.fill.nta f49=[r14],loc0
-	ldf.fill.nta f57=[r15],loc0
-	;;
-	ldf.fill.nta f65=[in0],loc0
-	ldf.fill.nta f73=[ r3],loc0
-	ldf.fill.nta f81=[r14],loc0
-	ldf.fill.nta f89=[r15],loc0
-	;;
-	ldf.fill.nta f97=[in0],loc1
-	ldf.fill.nta f105=[ r3],loc1
-	ldf.fill.nta f113=[r14],loc1
-	ldf.fill.nta f121=[r15],loc1
-	;;
-	ldf.fill.nta f34=[in0],loc0
-	ldf.fill.nta f42=[ r3],loc0
-	ldf.fill.nta f50=[r14],loc0
-	ldf.fill.nta f58=[r15],loc0
-	;;
-	ldf.fill.nta f66=[in0],loc0
-	ldf.fill.nta f74=[ r3],loc0
-	ldf.fill.nta f82=[r14],loc0
-	ldf.fill.nta f90=[r15],loc0
-	;;
-	ldf.fill.nta f98=[in0],loc1
-	ldf.fill.nta f106=[ r3],loc1
-	ldf.fill.nta f114=[r14],loc1
-	ldf.fill.nta f122=[r15],loc1
-	;;
-	ldf.fill.nta f35=[in0],loc0
-	ldf.fill.nta f43=[ r3],loc0
-	ldf.fill.nta f51=[r14],loc0
-	ldf.fill.nta f59=[r15],loc0
-	;;
-	ldf.fill.nta f67=[in0],loc0
-	ldf.fill.nta f75=[ r3],loc0
-	ldf.fill.nta f83=[r14],loc0
-	ldf.fill.nta f91=[r15],loc0
-	;;
-	ldf.fill.nta f99=[in0],loc1
-	ldf.fill.nta f107=[ r3],loc1
-	ldf.fill.nta f115=[r14],loc1
-	ldf.fill.nta f123=[r15],loc1
-	;;
-	ldf.fill.nta f36=[in0],loc0
-	ldf.fill.nta f44=[ r3],loc0
-	ldf.fill.nta f52=[r14],loc0
-	ldf.fill.nta f60=[r15],loc0
-	;;
-	ldf.fill.nta f68=[in0],loc0
-	ldf.fill.nta f76=[ r3],loc0
-	ldf.fill.nta f84=[r14],loc0
-	ldf.fill.nta f92=[r15],loc0
-	;;
-	ldf.fill.nta f100=[in0],loc1
-	ldf.fill.nta f108=[ r3],loc1
-	ldf.fill.nta f116=[r14],loc1
-	ldf.fill.nta f124=[r15],loc1
-	;;
-	ldf.fill.nta f37=[in0],loc0
-	ldf.fill.nta f45=[ r3],loc0
-	ldf.fill.nta f53=[r14],loc0
-	ldf.fill.nta f61=[r15],loc0
-	;;
-	ldf.fill.nta f69=[in0],loc0
-	ldf.fill.nta f77=[ r3],loc0
-	ldf.fill.nta f85=[r14],loc0
-	ldf.fill.nta f93=[r15],loc0
-	;;
-	ldf.fill.nta f101=[in0],loc1
-	ldf.fill.nta f109=[ r3],loc1
-	ldf.fill.nta f117=[r14],loc1
-	ldf.fill.nta f125=[r15],loc1
-	;;
-	ldf.fill.nta f38 =[in0],loc0
-	ldf.fill.nta f46 =[ r3],loc0
-	ldf.fill.nta f54 =[r14],loc0
-	ldf.fill.nta f62 =[r15],loc0
-	;;
-	ldf.fill.nta f70 =[in0],loc0
-	ldf.fill.nta f78 =[ r3],loc0
-	ldf.fill.nta f86 =[r14],loc0
-	ldf.fill.nta f94 =[r15],loc0
-	;;
-	ldf.fill.nta f102=[in0],loc1
-	ldf.fill.nta f110=[ r3],loc1
-	ldf.fill.nta f118=[r14],loc1
-	ldf.fill.nta f126=[r15],loc1
-	;;
-	ldf.fill.nta f39 =[in0],loc0
-	ldf.fill.nta f47 =[ r3],loc0
-	ldf.fill.nta f55 =[r14],loc0
-	ldf.fill.nta f63 =[r15],loc0
-	;;
-	ldf.fill.nta f71 =[in0],loc0
-	ldf.fill.nta f79 =[ r3],loc0
-	ldf.fill.nta f87 =[r14],loc0
-	ldf.fill.nta f95 =[r15],loc0
-	;;
-	ldf.fill.nta f103=[in0]
-	ldf.fill.nta f111=[ r3]
-	ldf.fill.nta f119=[r14]
-	ldf.fill.nta f127=[r15]
-	br.ret.sptk.many rp
-END(__ia64_load_fpu)
-
-GLOBAL_ENTRY(__ia64_init_fpu)
-	stf.spill [sp]=f0		// M3
-	mov	 f32=f0			// F
-	nop.b	 0
-
-	ldfps	 f33,f34=[sp]		// M0
-	ldfps	 f35,f36=[sp]		// M1
-	mov      f37=f0			// F
-	;;
-
-	setf.s	 f38=r0			// M2
-	setf.s	 f39=r0			// M3
-	mov      f40=f0			// F
-
-	ldfps	 f41,f42=[sp]		// M0
-	ldfps	 f43,f44=[sp]		// M1
-	mov      f45=f0			// F
-
-	setf.s	 f46=r0			// M2
-	setf.s	 f47=r0			// M3
-	mov      f48=f0			// F
-
-	ldfps	 f49,f50=[sp]		// M0
-	ldfps	 f51,f52=[sp]		// M1
-	mov      f53=f0			// F
-
-	setf.s	 f54=r0			// M2
-	setf.s	 f55=r0			// M3
-	mov      f56=f0			// F
-
-	ldfps	 f57,f58=[sp]		// M0
-	ldfps	 f59,f60=[sp]		// M1
-	mov      f61=f0			// F
-
-	setf.s	 f62=r0			// M2
-	setf.s	 f63=r0			// M3
-	mov      f64=f0			// F
-
-	ldfps	 f65,f66=[sp]		// M0
-	ldfps	 f67,f68=[sp]		// M1
-	mov      f69=f0			// F
-
-	setf.s	 f70=r0			// M2
-	setf.s	 f71=r0			// M3
-	mov      f72=f0			// F
-
-	ldfps	 f73,f74=[sp]		// M0
-	ldfps	 f75,f76=[sp]		// M1
-	mov      f77=f0			// F
-
-	setf.s	 f78=r0			// M2
-	setf.s	 f79=r0			// M3
-	mov      f80=f0			// F
-
-	ldfps	 f81,f82=[sp]		// M0
-	ldfps	 f83,f84=[sp]		// M1
-	mov      f85=f0			// F
-
-	setf.s	 f86=r0			// M2
-	setf.s	 f87=r0			// M3
-	mov      f88=f0			// F
-
-	/*
-	 * When the instructions are cached, it would be faster to initialize
-	 * the remaining registers with simply mov instructions (F-unit).
-	 * This gets the time down to ~29 cycles.  However, this would use up
-	 * 33 bundles, whereas continuing with the above pattern yields
-	 * 10 bundles and ~30 cycles.
-	 */
-
-	ldfps	 f89,f90=[sp]		// M0
-	ldfps	 f91,f92=[sp]		// M1
-	mov      f93=f0			// F
-
-	setf.s	 f94=r0			// M2
-	setf.s	 f95=r0			// M3
-	mov      f96=f0			// F
-
-	ldfps	 f97,f98=[sp]		// M0
-	ldfps	 f99,f100=[sp]		// M1
-	mov      f101=f0		// F
-
-	setf.s	 f102=r0		// M2
-	setf.s	 f103=r0		// M3
-	mov      f104=f0		// F
-
-	ldfps	 f105,f106=[sp]		// M0
-	ldfps	 f107,f108=[sp]		// M1
-	mov      f109=f0		// F
-
-	setf.s	 f110=r0		// M2
-	setf.s	 f111=r0		// M3
-	mov      f112=f0		// F
-
-	ldfps	 f113,f114=[sp]		// M0
-	ldfps	 f115,f116=[sp]		// M1
-	mov      f117=f0		// F
-
-	setf.s	 f118=r0		// M2
-	setf.s	 f119=r0		// M3
-	mov      f120=f0		// F
-
-	ldfps	 f121,f122=[sp]		// M0
-	ldfps	 f123,f124=[sp]		// M1
-	mov      f125=f0		// F
-
-	setf.s	 f126=r0		// M2
-	setf.s	 f127=r0		// M3
-	br.ret.sptk.many rp		// F
-END(__ia64_init_fpu)
-
-/*
- * Switch execution mode from virtual to physical
- *
- * Inputs:
- *	r16 = new psr to establish
- * Output:
- *	r19 = old virtual address of ar.bsp
- *	r20 = old virtual address of sp
- *
- * Note: RSE must already be in enforced lazy mode
- */
-GLOBAL_ENTRY(ia64_switch_mode_phys)
- {
-	rsm psr.i | psr.ic		// disable interrupts and interrupt collection
-	mov r15=ip
- }
-	;;
- {
-	flushrs				// must be first insn in group
-	srlz.i
- }
-	;;
-	mov cr.ipsr=r16			// set new PSR
-	add r3=1f-ia64_switch_mode_phys,r15
-
-	mov r19=ar.bsp
-	mov r20=sp
-	mov r14=rp			// get return address into a general register
-	;;
-
-	// going to physical mode, use tpa to translate virt->phys
-	tpa r17=r19
-	tpa r3=r3
-	tpa sp=sp
-	tpa r14=r14
-	;;
-
-	mov r18=ar.rnat			// save ar.rnat
-	mov ar.bspstore=r17		// this steps on ar.rnat
-	mov cr.iip=r3
-	mov cr.ifs=r0
-	;;
-	mov ar.rnat=r18			// restore ar.rnat
-	rfi				// must be last insn in group
-	;;
-1:	mov rp=r14
-	br.ret.sptk.many rp
-END(ia64_switch_mode_phys)
-
-/*
- * Switch execution mode from physical to virtual
- *
- * Inputs:
- *	r16 = new psr to establish
- *	r19 = new bspstore to establish
- *	r20 = new sp to establish
- *
- * Note: RSE must already be in enforced lazy mode
- */
-GLOBAL_ENTRY(ia64_switch_mode_virt)
- {
-	rsm psr.i | psr.ic		// disable interrupts and interrupt collection
-	mov r15=ip
- }
-	;;
- {
-	flushrs				// must be first insn in group
-	srlz.i
- }
-	;;
-	mov cr.ipsr=r16			// set new PSR
-	add r3=1f-ia64_switch_mode_virt,r15
-
-	mov r14=rp			// get return address into a general register
-	;;
-
-	// going to virtual
-	//   - for code addresses, set upper bits of addr to KERNEL_START
-	//   - for stack addresses, copy from input argument
-	movl r18=KERNEL_START
-	dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
-	dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
-	mov sp=r20
-	;;
-	or r3=r3,r18
-	or r14=r14,r18
-	;;
-
-	mov r18=ar.rnat			// save ar.rnat
-	mov ar.bspstore=r19		// this steps on ar.rnat
-	mov cr.iip=r3
-	mov cr.ifs=r0
-	;;
-	mov ar.rnat=r18			// restore ar.rnat
-	rfi				// must be last insn in group
-	;;
-1:	mov rp=r14
-	br.ret.sptk.many rp
-END(ia64_switch_mode_virt)
-
-GLOBAL_ENTRY(ia64_delay_loop)
-	.prologue
-{	nop 0			// work around GAS unwind info generation bug...
-	.save ar.lc,r2
-	mov r2=ar.lc
-	.body
-	;;
-	mov ar.lc=r32
-}
-	;;
-	// force loop to be 32-byte aligned (GAS bug means we cannot use .align
-	// inside function body without corrupting unwind info).
-{	nop 0 }
-1:	br.cloop.sptk.few 1b
-	;;
-	mov ar.lc=r2
-	br.ret.sptk.many rp
-END(ia64_delay_loop)
-
-/*
- * Return a CPU-local timestamp in nano-seconds.  This timestamp is
- * NOT synchronized across CPUs its return value must never be
- * compared against the values returned on another CPU.  The usage in
- * kernel/sched/core.c ensures that.
- *
- * The return-value of sched_clock() is NOT supposed to wrap-around.
- * If it did, it would cause some scheduling hiccups (at the worst).
- * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even
- * that would happen only once every 5+ years.
- *
- * The code below basically calculates:
- *
- *   (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT
- *
- * except that the multiplication and the shift are done with 128-bit
- * intermediate precision so that we can produce a full 64-bit result.
- */
-GLOBAL_ENTRY(ia64_native_sched_clock)
-	addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
-	mov.m r9=ar.itc		// fetch cycle-counter				(35 cyc)
-	;;
-	ldf8 f8=[r8]
-	;;
-	setf.sig f9=r9		// certain to stall, so issue it _after_ ldf8...
-	;;
-	xmpy.lu f10=f9,f8	// calculate low 64 bits of 128-bit product	(4 cyc)
-	xmpy.hu f11=f9,f8	// calculate high 64 bits of 128-bit product
-	;;
-	getf.sig r8=f10		//						(5 cyc)
-	getf.sig r9=f11
-	;;
-	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
-	br.ret.sptk.many rp
-END(ia64_native_sched_clock)
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-GLOBAL_ENTRY(cycle_to_nsec)
-	alloc r16=ar.pfs,1,0,0,0
-	addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
-	;;
-	ldf8 f8=[r8]
-	;;
-	setf.sig f9=r32
-	;;
-	xmpy.lu f10=f9,f8	// calculate low 64 bits of 128-bit product	(4 cyc)
-	xmpy.hu f11=f9,f8	// calculate high 64 bits of 128-bit product
-	;;
-	getf.sig r8=f10		//						(5 cyc)
-	getf.sig r9=f11
-	;;
-	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
-	br.ret.sptk.many rp
-END(cycle_to_nsec)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
-
-#ifdef CONFIG_IA64_BRL_EMU
-
-/*
- *  Assembly routines used by brl_emu.c to set preserved register state.
- */
-
-#define SET_REG(reg)				\
- GLOBAL_ENTRY(ia64_set_##reg);			\
-	alloc r16=ar.pfs,1,0,0,0;		\
-	mov reg=r32;				\
-	;;					\
-	br.ret.sptk.many rp;			\
- END(ia64_set_##reg)
-
-SET_REG(b1);
-SET_REG(b2);
-SET_REG(b3);
-SET_REG(b4);
-SET_REG(b5);
-
-#endif /* CONFIG_IA64_BRL_EMU */
-
-#ifdef CONFIG_SMP
-
-#ifdef CONFIG_HOTPLUG_CPU
-GLOBAL_ENTRY(ia64_jump_to_sal)
-	alloc r16=ar.pfs,1,0,0,0;;
-	rsm psr.i  | psr.ic
-{
-	flushrs
-	srlz.i
-}
-	tpa r25=in0
-	movl r18=tlb_purge_done;;
-	DATA_VA_TO_PA(r18);;
-	mov b1=r18 	// Return location
-	movl r18=ia64_do_tlb_purge;;
-	DATA_VA_TO_PA(r18);;
-	mov b2=r18 	// doing tlb_flush work
-	mov ar.rsc=0  // Put RSE  in enforced lazy, LE mode
-	movl r17=1f;;
-	DATA_VA_TO_PA(r17);;
-	mov cr.iip=r17
-	movl r16=SAL_PSR_BITS_TO_SET;;
-	mov cr.ipsr=r16
-	mov cr.ifs=r0;;
-	rfi;;			// note: this unmask MCA/INIT (psr.mc)
-1:
-	/*
-	 * Invalidate all TLB data/inst
-	 */
-	br.sptk.many b2;; // jump to tlb purge code
-
-tlb_purge_done:
-	RESTORE_REGION_REGS(r25, r17,r18,r19);;
-	RESTORE_REG(b0, r25, r17);;
-	RESTORE_REG(b1, r25, r17);;
-	RESTORE_REG(b2, r25, r17);;
-	RESTORE_REG(b3, r25, r17);;
-	RESTORE_REG(b4, r25, r17);;
-	RESTORE_REG(b5, r25, r17);;
-	ld8 r1=[r25],0x08;;
-	ld8 r12=[r25],0x08;;
-	ld8 r13=[r25],0x08;;
-	RESTORE_REG(ar.fpsr, r25, r17);;
-	RESTORE_REG(ar.pfs, r25, r17);;
-	RESTORE_REG(ar.rnat, r25, r17);;
-	RESTORE_REG(ar.unat, r25, r17);;
-	RESTORE_REG(ar.bspstore, r25, r17);;
-	RESTORE_REG(cr.dcr, r25, r17);;
-	RESTORE_REG(cr.iva, r25, r17);;
-	RESTORE_REG(cr.pta, r25, r17);;
-	srlz.d;;	// required not to violate RAW dependency
-	RESTORE_REG(cr.itv, r25, r17);;
-	RESTORE_REG(cr.pmv, r25, r17);;
-	RESTORE_REG(cr.cmcv, r25, r17);;
-	RESTORE_REG(cr.lrr0, r25, r17);;
-	RESTORE_REG(cr.lrr1, r25, r17);;
-	ld8 r4=[r25],0x08;;
-	ld8 r5=[r25],0x08;;
-	ld8 r6=[r25],0x08;;
-	ld8 r7=[r25],0x08;;
-	ld8 r17=[r25],0x08;;
-	mov pr=r17,-1;;
-	RESTORE_REG(ar.lc, r25, r17);;
-	/*
-	 * Now Restore floating point regs
-	 */
-	ldf.fill.nta f2=[r25],16;;
-	ldf.fill.nta f3=[r25],16;;
-	ldf.fill.nta f4=[r25],16;;
-	ldf.fill.nta f5=[r25],16;;
-	ldf.fill.nta f16=[r25],16;;
-	ldf.fill.nta f17=[r25],16;;
-	ldf.fill.nta f18=[r25],16;;
-	ldf.fill.nta f19=[r25],16;;
-	ldf.fill.nta f20=[r25],16;;
-	ldf.fill.nta f21=[r25],16;;
-	ldf.fill.nta f22=[r25],16;;
-	ldf.fill.nta f23=[r25],16;;
-	ldf.fill.nta f24=[r25],16;;
-	ldf.fill.nta f25=[r25],16;;
-	ldf.fill.nta f26=[r25],16;;
-	ldf.fill.nta f27=[r25],16;;
-	ldf.fill.nta f28=[r25],16;;
-	ldf.fill.nta f29=[r25],16;;
-	ldf.fill.nta f30=[r25],16;;
-	ldf.fill.nta f31=[r25],16;;
-
-	/*
-	 * Now that we have done all the register restores
-	 * we are now ready for the big DIVE to SAL Land
-	 */
-	ssm psr.ic;;
-	srlz.d;;
-	br.ret.sptk.many b0;;
-END(ia64_jump_to_sal)
-#endif /* CONFIG_HOTPLUG_CPU */
-
-#endif /* CONFIG_SMP */
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
deleted file mode 100644
index 1efcbe5f0c78183f7571a58a5c0fe98767a04d34..0000000000000000000000000000000000000000
--- a/arch/ia64/kernel/ivt.S
+++ /dev/null
@@ -1,1689 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/ia64/kernel/ivt.S
- *
- * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co
- *	Stephane Eranian <eranian@hpl.hp.com>
- *	David Mosberger <davidm@hpl.hp.com>
- * Copyright (C) 2000, 2002-2003 Intel Co
- *	Asit Mallick <asit.k.mallick@intel.com>
- *      Suresh Siddha <suresh.b.siddha@intel.com>
- *      Kenneth Chen <kenneth.w.chen@intel.com>
- *      Fenghua Yu <fenghua.yu@intel.com>
- *
- * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling for SMP
- * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB handler now uses virtual PT.
- *
- * Copyright (C) 2005 Hewlett-Packard Co
- *	Dan Magenheimer <dan.magenheimer@hp.com>
- *      Xen paravirtualization
- * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
- *                    VA Linux Systems Japan K.K.
- *                    pv_ops.
- *      Yaozu (Eddie) Dong <eddie.dong@intel.com>
- */
-/*
- * This file defines the interruption vector table used by the CPU.
- * It does not include one entry per possible cause of interruption.
- *
- * The first 20 entries of the table contain 64 bundles each while the
- * remaining 48 entries contain only 16 bundles each.
- *
- * The 64 bundles are used to allow inlining the whole handler for critical
- * interruptions like TLB misses.
- *
- *  For each entry, the comment is as follows:
- *
- *		// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
- *  entry offset ----/     /         /                  /          /
- *  entry number ---------/         /                  /          /
- *  size of the entry -------------/                  /          /
- *  vector name -------------------------------------/          /
- *  interruptions triggering this vector ----------------------/
- *
- * The table is 32KB in size and must be aligned on 32KB boundary.
- * (The CPU ignores the 15 lower bits of the address)
- *
- * Table is based upon EAS2.6 (Oct 1999)
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/break.h>
-#include <asm/kregs.h>
-#include <asm/asm-offsets.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/ptrace.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-#include <asm/errno.h>
-#include <asm/export.h>
-
-#if 0
-# define PSR_DEFAULT_BITS	psr.ac
-#else
-# define PSR_DEFAULT_BITS	0
-#endif
-
-#if 0
-  /*
-   * This lets you track the last eight faults that occurred on the CPU.  Make sure ar.k2 isn't
-   * needed for something else before enabling this...
-   */
-# define DBG_FAULT(i)	mov r16=ar.k2;;	shl r16=r16,8;;	add r16=(i),r16;;mov ar.k2=r16
-#else
-# define DBG_FAULT(i)
-#endif
-
-#include "minstate.h"
-
-#define FAULT(n)									\
-	mov r31=pr;									\
-	mov r19=n;;			/* prepare to save predicates */		\
-	br.sptk.many dispatch_to_fault_handler
-
-	.section .text..ivt,"ax"
-
-	.align 32768	// align on 32KB boundary
-	.global ia64_ivt
-	EXPORT_DATA_SYMBOL(ia64_ivt)
-ia64_ivt:
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
-ENTRY(vhpt_miss)
-	DBG_FAULT(0)
-	/*
-	 * The VHPT vector is invoked when the TLB entry for the virtual page table
-	 * is missing.  This happens only as a result of a previous
-	 * (the "original") TLB miss, which may either be caused by an instruction
-	 * fetch or a data access (or non-access).
-	 *
-	 * What we do here is normal TLB miss handing for the _original_ miss,
-	 * followed by inserting the TLB entry for the virtual page table page
-	 * that the VHPT walker was attempting to access.  The latter gets
-	 * inserted as long as page table entry above pte level have valid
-	 * mappings for the faulting address.  The TLB entry for the original
-	 * miss gets inserted only if the pte entry indicates that the page is
-	 * present.
-	 *
-	 * do_page_fault gets invoked in the following cases:
-	 *	- the faulting virtual address uses unimplemented address bits
-	 *	- the faulting virtual address has no valid page table mapping
-	 */
-	MOV_FROM_IFA(r16)			// get address that caused the TLB miss
-#ifdef CONFIG_HUGETLB_PAGE
-	movl r18=PAGE_SHIFT
-	MOV_FROM_ITIR(r25)
-#endif
-	;;
-	RSM_PSR_DT				// use physical addressing for data
-	mov r31=pr				// save the predicate registers
-	mov r19=IA64_KR(PT_BASE)		// get page table base address
-	shl r21=r16,3				// shift bit 60 into sign bit
-	shr.u r17=r16,61			// get the region number into r17
-	;;
-	shr.u r22=r21,3
-#ifdef CONFIG_HUGETLB_PAGE
-	extr.u r26=r25,2,6
-	;;
-	cmp.ne p8,p0=r18,r26
-	sub r27=r26,r18
-	;;
-(p8)	dep r25=r18,r25,2,6
-(p8)	shr r22=r22,r27
-#endif
-	;;
-	cmp.eq p6,p7=5,r17			// is IFA pointing into to region 5?
-	shr.u r18=r22,PGDIR_SHIFT		// get bottom portion of pgd index bit
-	;;
-(p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
-
-	srlz.d
-	LOAD_PHYSICAL(p6, r19, swapper_pg_dir)	// region 5 is rooted at swapper_pg_dir
-
-	.pred.rel "mutex", p6, p7
-(p6)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
-(p7)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
-	;;
-(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=pgd_offset for region 5
-(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=pgd_offset for region[0-4]
-	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
-#if CONFIG_PGTABLE_LEVELS == 4
-	shr.u r28=r22,PUD_SHIFT			// shift pud index into position
-#else
-	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
-#endif
-	;;
-	ld8 r17=[r17]				// get *pgd (may be 0)
-	;;
-(p7)	cmp.eq p6,p7=r17,r0			// was pgd_present(*pgd) == NULL?
-#if CONFIG_PGTABLE_LEVELS == 4
-	dep r28=r28,r17,3,(PAGE_SHIFT-3)	// r28=pud_offset(pgd,addr)
-	;;
-	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
-(p7)	ld8 r29=[r28]				// get *pud (may be 0)
-	;;
-(p7)	cmp.eq.or.andcm p6,p7=r29,r0		// was pud_present(*pud) == NULL?
-	dep r17=r18,r29,3,(PAGE_SHIFT-3)	// r17=pmd_offset(pud,addr)
-#else
-	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=pmd_offset(pgd,addr)
-#endif
-	;;
-(p7)	ld8 r20=[r17]				// get *pmd (may be 0)
-	shr.u r19=r22,PAGE_SHIFT		// shift pte index into position
-	;;
-(p7)	cmp.eq.or.andcm p6,p7=r20,r0		// was pmd_present(*pmd) == NULL?
-	dep r21=r19,r20,3,(PAGE_SHIFT-3)	// r21=pte_offset(pmd,addr)
-	;;
-(p7)	ld8 r18=[r21]				// read *pte
-	MOV_FROM_ISR(r19)			// cr.isr bit 32 tells us if this is an insn miss
-	;;
-(p7)	tbit.z p6,p7=r18,_PAGE_P_BIT		// page present bit cleared?
-	MOV_FROM_IHA(r22)			// get the VHPT address that caused the TLB miss
-	;;					// avoid RAW on p7
-(p7)	tbit.nz.unc p10,p11=r19,32		// is it an instruction TLB miss?
-	dep r23=0,r20,0,PAGE_SHIFT		// clear low bits to get page address
-	;;
-	ITC_I_AND_D(p10, p11, r18, r24)		// insert the instruction TLB entry and
-						// insert the data TLB entry
-(p6)	br.cond.spnt.many page_fault		// handle bad address/page not present (page fault)
-	MOV_TO_IFA(r22, r24)
-
-#ifdef CONFIG_HUGETLB_PAGE
-	MOV_TO_ITIR(p8, r25, r24)		// change to default page-size for VHPT
-#endif
-
-	/*
-	 * Now compute and insert the TLB entry for the virtual page table.  We never
-	 * execute in a page table page so there is no need to set the exception deferral
-	 * bit.
-	 */
-	adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23
-	;;
-	ITC_D(p7, r24, r25)
-	;;
-#ifdef CONFIG_SMP
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
-
-	/*
-	 * Re-check pagetable entry.  If they changed, we may have received a ptc.g
-	 * between reading the pagetable and the "itc".  If so, flush the entry we
-	 * inserted and retry.  At this point, we have:
-	 *
-	 * r28 = equivalent of pud_offset(pgd, ifa)
-	 * r17 = equivalent of pmd_offset(pud, ifa)
-	 * r21 = equivalent of pte_offset(pmd, ifa)
-	 *
-	 * r29 = *pud
-	 * r20 = *pmd
-	 * r18 = *pte
-	 */
-	ld8 r25=[r21]				// read *pte again
-	ld8 r26=[r17]				// read *pmd again
-#if CONFIG_PGTABLE_LEVELS == 4
-	ld8 r19=[r28]				// read *pud again
-#endif
-	cmp.ne p6,p7=r0,r0
-	;;
-	cmp.ne.or.andcm p6,p7=r26,r20		// did *pmd change
-#if CONFIG_PGTABLE_LEVELS == 4
-	cmp.ne.or.andcm p6,p7=r19,r29		// did *pud change
-#endif
-	mov r27=PAGE_SHIFT<<2
-	;;
-(p6)	ptc.l r22,r27				// purge PTE page translation
-(p7)	cmp.ne.or.andcm p6,p7=r25,r18		// did *pte change
-	;;
-(p6)	ptc.l r16,r27				// purge translation
-#endif
-
-	mov pr=r31,-1				// restore predicate registers
-	RFI
-END(vhpt_miss)
-
-	.org ia64_ivt+0x400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
-ENTRY(itlb_miss)
-	DBG_FAULT(1)
-	/*
-	 * The ITLB handler accesses the PTE via the virtually mapped linear
-	 * page table.  If a nested TLB miss occurs, we switch into physical
-	 * mode, walk the page table, and then re-execute the PTE read and
-	 * go on normally after that.
-	 */
-	MOV_FROM_IFA(r16)			// get virtual address
-	mov r29=b0				// save b0
-	mov r31=pr				// save predicates
-.itlb_fault:
-	MOV_FROM_IHA(r17)			// get virtual address of PTE
-	movl r30=1f				// load nested fault continuation point
-	;;
-1:	ld8 r18=[r17]				// read *pte
-	;;
-	mov b0=r29
-	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
-(p6)	br.cond.spnt page_fault
-	;;
-	ITC_I(p0, r18, r19)
-	;;
-#ifdef CONFIG_SMP
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
-
-	ld8 r19=[r17]				// read *pte again and see if same
-	mov r20=PAGE_SHIFT<<2			// setup page size for purge
-	;;
-	cmp.ne p7,p0=r18,r19
-	;;
-(p7)	ptc.l r16,r20
-#endif
-	mov pr=r31,-1
-	RFI
-END(itlb_miss)
-
-	.org ia64_ivt+0x0800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
-ENTRY(dtlb_miss)
-	DBG_FAULT(2)
-	/*
-	 * The DTLB handler accesses the PTE via the virtually mapped linear
-	 * page table.  If a nested TLB miss occurs, we switch into physical
-	 * mode, walk the page table, and then re-execute the PTE read and
-	 * go on normally after that.
-	 */
-	MOV_FROM_IFA(r16)			// get virtual address
-	mov r29=b0				// save b0
-	mov r31=pr				// save predicates
-dtlb_fault:
-	MOV_FROM_IHA(r17)			// get virtual address of PTE
-	movl r30=1f				// load nested fault continuation point
-	;;
-1:	ld8 r18=[r17]				// read *pte
-	;;
-	mov b0=r29
-	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
-(p6)	br.cond.spnt page_fault
-	;;
-	ITC_D(p0, r18, r19)
-	;;
-#ifdef CONFIG_SMP
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
-
-	ld8 r19=[r17]				// read *pte again and see if same
-	mov r20=PAGE_SHIFT<<2			// setup page size for purge
-	;;
-	cmp.ne p7,p0=r18,r19
-	;;
-(p7)	ptc.l r16,r20
-#endif
-	mov pr=r31,-1
-	RFI
-END(dtlb_miss)
-
-	.org ia64_ivt+0x0c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
-ENTRY(alt_itlb_miss)
-	DBG_FAULT(3)
-	MOV_FROM_IFA(r16)	// get address that caused the TLB miss
-	movl r17=PAGE_KERNEL
-	MOV_FROM_IPSR(p0, r21)
-	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-	mov r31=pr
-	;;
-#ifdef CONFIG_DISABLE_VHPT
-	shr.u r22=r16,61			// get the region number into r21
-	;;
-	cmp.gt p8,p0=6,r22			// user mode
-	;;
-	THASH(p8, r17, r16, r23)
-	;;
-	MOV_TO_IHA(p8, r17, r23)
-(p8)	mov r29=b0				// save b0
-(p8)	br.cond.dptk .itlb_fault
-#endif
-	extr.u r23=r21,IA64_PSR_CPL0_BIT,2	// extract psr.cpl
-	and r19=r19,r16		// clear ed, reserved bits, and PTE control bits
-	shr.u r18=r16,57	// move address bit 61 to bit 4
-	;;
-	andcm r18=0x10,r18	// bit 4=~address-bit(61)
-	cmp.ne p8,p0=r0,r23	// psr.cpl != 0?
-	or r19=r17,r19		// insert PTE control bits into r19
-	;;
-	or r19=r19,r18		// set bit 4 (uncached) if the access was to region 6
-(p8)	br.cond.spnt page_fault
-	;;
-	ITC_I(p0, r19, r18)	// insert the TLB entry
-	mov pr=r31,-1
-	RFI
-END(alt_itlb_miss)
-
-	.org ia64_ivt+0x1000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
-ENTRY(alt_dtlb_miss)
-	DBG_FAULT(4)
-	MOV_FROM_IFA(r16)	// get address that caused the TLB miss
-	movl r17=PAGE_KERNEL
-	MOV_FROM_ISR(r20)
-	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-	MOV_FROM_IPSR(p0, r21)
-	mov r31=pr
-	mov r24=PERCPU_ADDR
-	;;
-#ifdef CONFIG_DISABLE_VHPT
-	shr.u r22=r16,61			// get the region number into r21
-	;;
-	cmp.gt p8,p0=6,r22			// access to region 0-5
-	;;
-	THASH(p8, r17, r16, r25)
-	;;
-	MOV_TO_IHA(p8, r17, r25)
-(p8)	mov r29=b0				// save b0
-(p8)	br.cond.dptk dtlb_fault
-#endif
-	cmp.ge p10,p11=r16,r24			// access to per_cpu_data?
-	tbit.z p12,p0=r16,61			// access to region 6?
-	mov r25=PERCPU_PAGE_SHIFT << 2
-	mov r26=PERCPU_PAGE_SIZE
-	nop.m 0
-	nop.b 0
-	;;
-(p10)	mov r19=IA64_KR(PER_CPU_DATA)
-(p11)	and r19=r19,r16				// clear non-ppn fields
-	extr.u r23=r21,IA64_PSR_CPL0_BIT,2	// extract psr.cpl
-	and r22=IA64_ISR_CODE_MASK,r20		// get the isr.code field
-	tbit.nz p6,p7=r20,IA64_ISR_SP_BIT	// is speculation bit on?
-	tbit.nz p9,p0=r20,IA64_ISR_NA_BIT	// is non-access bit on?
-	;;
-(p10)	sub r19=r19,r26
-	MOV_TO_ITIR(p10, r25, r24)
-	cmp.ne p8,p0=r0,r23
-(p9)	cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22	// check isr.code field
-(p12)	dep r17=-1,r17,4,1			// set ma=UC for region 6 addr
-(p8)	br.cond.spnt page_fault
-
-	dep r21=-1,r21,IA64_PSR_ED_BIT,1
-	;;
-	or r19=r19,r17		// insert PTE control bits into r19
-	MOV_TO_IPSR(p6, r21, r24)
-	;;
-	ITC_D(p7, r19, r18)	// insert the TLB entry
-	mov pr=r31,-1
-	RFI
-END(alt_dtlb_miss)
-
-	.org ia64_ivt+0x1400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
-ENTRY(nested_dtlb_miss)
-	/*
-	 * In the absence of kernel bugs, we get here when the virtually mapped linear
-	 * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction
-	 * Access-bit, or Data Access-bit faults).  If the DTLB entry for the virtual page
-	 * table is missing, a nested TLB miss fault is triggered and control is
-	 * transferred to this point.  When this happens, we lookup the pte for the
-	 * faulting address by walking the page table in physical mode and return to the
-	 * continuation point passed in register r30 (or call page_fault if the address is
-	 * not mapped).
-	 *
-	 * Input:	r16:	faulting address
-	 *		r29:	saved b0
-	 *		r30:	continuation address
-	 *		r31:	saved pr
-	 *
-	 * Output:	r17:	physical address of PTE of faulting address
-	 *		r29:	saved b0
-	 *		r30:	continuation address
-	 *		r31:	saved pr
-	 *
-	 * Clobbered:	b0, r18, r19, r21, r22, psr.dt (cleared)
-	 */
-	RSM_PSR_DT				// switch to using physical data addressing
-	mov r19=IA64_KR(PT_BASE)		// get the page table base address
-	shl r21=r16,3				// shift bit 60 into sign bit
-	MOV_FROM_ITIR(r18)
-	;;
-	shr.u r17=r16,61			// get the region number into r17
-	extr.u r18=r18,2,6			// get the faulting page size
-	;;
-	cmp.eq p6,p7=5,r17			// is faulting address in region 5?
-	add r22=-PAGE_SHIFT,r18			// adjustment for hugetlb address
-	add r18=PGDIR_SHIFT-PAGE_SHIFT,r18
-	;;
-	shr.u r22=r16,r22
-	shr.u r18=r16,r18
-(p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
-
-	srlz.d
-	LOAD_PHYSICAL(p6, r19, swapper_pg_dir)	// region 5 is rooted at swapper_pg_dir
-
-	.pred.rel "mutex", p6, p7
-(p6)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
-(p7)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
-	;;
-(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=pgd_offset for region 5
-(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=pgd_offset for region[0-4]
-	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
-#if CONFIG_PGTABLE_LEVELS == 4
-	shr.u r18=r22,PUD_SHIFT			// shift pud index into position
-#else
-	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
-#endif
-	;;
-	ld8 r17=[r17]				// get *pgd (may be 0)
-	;;
-(p7)	cmp.eq p6,p7=r17,r0			// was pgd_present(*pgd) == NULL?
-	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=p[u|m]d_offset(pgd,addr)
-	;;
-#if CONFIG_PGTABLE_LEVELS == 4
-(p7)	ld8 r17=[r17]				// get *pud (may be 0)
-	shr.u r18=r22,PMD_SHIFT			// shift pmd index into position
-	;;
-(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was pud_present(*pud) == NULL?
-	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=pmd_offset(pud,addr)
-	;;
-#endif
-(p7)	ld8 r17=[r17]				// get *pmd (may be 0)
-	shr.u r19=r22,PAGE_SHIFT		// shift pte index into position
-	;;
-(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was pmd_present(*pmd) == NULL?
-	dep r17=r19,r17,3,(PAGE_SHIFT-3)	// r17=pte_offset(pmd,addr);
-(p6)	br.cond.spnt page_fault
-	mov b0=r30
-	br.sptk.many b0				// return to continuation point
-END(nested_dtlb_miss)
-
-	.org ia64_ivt+0x1800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
-ENTRY(ikey_miss)
-	DBG_FAULT(6)
-	FAULT(6)
-END(ikey_miss)
-
-	.org ia64_ivt+0x1c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
-ENTRY(dkey_miss)
-	DBG_FAULT(7)
-	FAULT(7)
-END(dkey_miss)
-
-	.org ia64_ivt+0x2000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
-ENTRY(dirty_bit)
-	DBG_FAULT(8)
-	/*
-	 * What we do here is to simply turn on the dirty bit in the PTE.  We need to
-	 * update both the page-table and the TLB entry.  To efficiently access the PTE,
-	 * we address it through the virtual page table.  Most likely, the TLB entry for
-	 * the relevant virtual page table page is still present in the TLB so we can
-	 * normally do this without additional TLB misses.  In case the necessary virtual
-	 * page table TLB entry isn't present, we take a nested TLB miss hit where we look
-	 * up the physical address of the L3 PTE and then continue at label 1 below.
-	 */
-	MOV_FROM_IFA(r16)			// get the address that caused the fault
-	movl r30=1f				// load continuation point in case of nested fault
-	;;
-	THASH(p0, r17, r16, r18)		// compute virtual address of L3 PTE
-	mov r29=b0				// save b0 in case of nested fault
-	mov r31=pr				// save pr
-#ifdef CONFIG_SMP
-	mov r28=ar.ccv				// save ar.ccv
-	;;
-1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
-	mov ar.ccv=r18				// set compare value for cmpxchg
-	or r25=_PAGE_D|_PAGE_A,r18		// set the dirty and accessed bits
-	tbit.z p7,p6 = r18,_PAGE_P_BIT		// Check present bit
-	;;
-(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only update if page is present
-	mov r24=PAGE_SHIFT<<2
-	;;
-(p6)	cmp.eq p6,p7=r26,r18			// Only compare if page is present
-	;;
-	ITC_D(p6, r25, r18)			// install updated PTE
-	;;
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
-
-	ld8 r18=[r17]				// read PTE again
-	;;
-	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
-	;;
-(p7)	ptc.l r16,r24
-	mov b0=r29				// restore b0
-	mov ar.ccv=r28
-#else
-	;;
-1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
-	or r18=_PAGE_D|_PAGE_A,r18		// set the dirty and accessed bits
-	mov b0=r29				// restore b0
-	;;
-	st8 [r17]=r18				// store back updated PTE
-	ITC_D(p0, r18, r16)			// install updated PTE
-#endif
-	mov pr=r31,-1				// restore pr
-	RFI
-END(dirty_bit)
-
-	.org ia64_ivt+0x2400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
-ENTRY(iaccess_bit)
-	DBG_FAULT(9)
-	// Like Entry 8, except for instruction access
-	MOV_FROM_IFA(r16)			// get the address that caused the fault
-	movl r30=1f				// load continuation point in case of nested fault
-	mov r31=pr				// save predicates
-#ifdef CONFIG_ITANIUM
-	/*
-	 * Erratum 10 (IFA may contain incorrect address) has "NoFix" status.
-	 */
-	MOV_FROM_IPSR(p0, r17)
-	;;
-	MOV_FROM_IIP(r18)
-	tbit.z p6,p0=r17,IA64_PSR_IS_BIT	// IA64 instruction set?
-	;;
-(p6)	mov r16=r18				// if so, use cr.iip instead of cr.ifa
-#endif /* CONFIG_ITANIUM */
-	;;
-	THASH(p0, r17, r16, r18)		// compute virtual address of L3 PTE
-	mov r29=b0				// save b0 in case of nested fault)
-#ifdef CONFIG_SMP
-	mov r28=ar.ccv				// save ar.ccv
-	;;
-1:	ld8 r18=[r17]
-	;;
-	mov ar.ccv=r18				// set compare value for cmpxchg
-	or r25=_PAGE_A,r18			// set the accessed bit
-	tbit.z p7,p6 = r18,_PAGE_P_BIT	 	// Check present bit
-	;;
-(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only if page present
-	mov r24=PAGE_SHIFT<<2
-	;;
-(p6)	cmp.eq p6,p7=r26,r18			// Only if page present
-	;;
-	ITC_I(p6, r25, r26)			// install updated PTE
-	;;
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
-
-	ld8 r18=[r17]				// read PTE again
-	;;
-	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
-	;;
-(p7)	ptc.l r16,r24
-	mov b0=r29				// restore b0
-	mov ar.ccv=r28
-#else /* !CONFIG_SMP */
-	;;
-1:	ld8 r18=[r17]
-	;;
-	or r18=_PAGE_A,r18			// set the accessed bit
-	mov b0=r29				// restore b0
-	;;
-	st8 [r17]=r18				// store back updated PTE
-	ITC_I(p0, r18, r16)			// install updated PTE
-#endif /* !CONFIG_SMP */
-	mov pr=r31,-1
-	RFI
-END(iaccess_bit)
-
-	.org ia64_ivt+0x2800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
-ENTRY(daccess_bit)
-	DBG_FAULT(10)
-	// Like Entry 8, except for data access
-	MOV_FROM_IFA(r16)			// get the address that caused the fault
-	movl r30=1f				// load continuation point in case of nested fault
-	;;
-	THASH(p0, r17, r16, r18)		// compute virtual address of L3 PTE
-	mov r31=pr
-	mov r29=b0				// save b0 in case of nested fault)
-#ifdef CONFIG_SMP
-	mov r28=ar.ccv				// save ar.ccv
-	;;
-1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
-	mov ar.ccv=r18				// set compare value for cmpxchg
-	or r25=_PAGE_A,r18			// set the dirty bit
-	tbit.z p7,p6 = r18,_PAGE_P_BIT		// Check present bit
-	;;
-(p6)	cmpxchg8.acq r26=[r17],r25,ar.ccv	// Only if page is present
-	mov r24=PAGE_SHIFT<<2
-	;;
-(p6)	cmp.eq p6,p7=r26,r18			// Only if page is present
-	;;
-	ITC_D(p6, r25, r26)			// install updated PTE
-	/*
-	 * Tell the assemblers dependency-violation checker that the above "itc" instructions
-	 * cannot possibly affect the following loads:
-	 */
-	dv_serialize_data
-	;;
-	ld8 r18=[r17]				// read PTE again
-	;;
-	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
-	;;
-(p7)	ptc.l r16,r24
-	mov ar.ccv=r28
-#else
-	;;
-1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
-	or r18=_PAGE_A,r18			// set the accessed bit
-	;;
-	st8 [r17]=r18				// store back updated PTE
-	ITC_D(p0, r18, r16)			// install updated PTE
-#endif
-	mov b0=r29				// restore b0
-	mov pr=r31,-1
-	RFI
-END(daccess_bit)
-
-	.org ia64_ivt+0x2c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
-ENTRY(break_fault)
-	/*
-	 * The streamlined system call entry/exit paths only save/restore the initial part
-	 * of pt_regs.  This implies that the callers of system-calls must adhere to the
-	 * normal procedure calling conventions.
-	 *
-	 *   Registers to be saved & restored:
-	 *	CR registers: cr.ipsr, cr.iip, cr.ifs
-	 *	AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, ar.fpsr
-	 * 	others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15
-	 *   Registers to be restored only:
-	 * 	r8-r11: output value from the system call.
-	 *
-	 * During system call exit, scratch registers (including r15) are modified/cleared
-	 * to prevent leaking bits from kernel to user level.
-	 */
-	DBG_FAULT(11)
-	mov.m r16=IA64_KR(CURRENT)		// M2 r16 <- current task (12 cyc)
-	MOV_FROM_IPSR(p0, r29)			// M2 (12 cyc)
-	mov r31=pr				// I0 (2 cyc)
-
-	MOV_FROM_IIM(r17)			// M2 (2 cyc)
-	mov.m r27=ar.rsc			// M2 (12 cyc)
-	mov r18=__IA64_BREAK_SYSCALL		// A
-
-	mov.m ar.rsc=0				// M2
-	mov.m r21=ar.fpsr			// M2 (12 cyc)
-	mov r19=b6				// I0 (2 cyc)
-	;;
-	mov.m r23=ar.bspstore			// M2 (12 cyc)
-	mov.m r24=ar.rnat			// M2 (5 cyc)
-	mov.i r26=ar.pfs			// I0 (2 cyc)
-
-	invala					// M0|1
-	nop.m 0					// M
-	mov r20=r1				// A			save r1
-
-	nop.m 0
-	movl r30=sys_call_table			// X
-
-	MOV_FROM_IIP(r28)			// M2 (2 cyc)
-	cmp.eq p0,p7=r18,r17			// I0 is this a system call?
-(p7)	br.cond.spnt non_syscall		// B  no ->
-	//
-	// From this point on, we are definitely on the syscall-path
-	// and we can use (non-banked) scratch registers.
-	//
-///////////////////////////////////////////////////////////////////////
-	mov r1=r16				// A    move task-pointer to "addl"-addressable reg
-	mov r2=r16				// A    setup r2 for ia64_syscall_setup
-	add r9=TI_FLAGS+IA64_TASK_SIZE,r16	// A	r9 = &current_thread_info()->flags
-
-	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
-	adds r15=-1024,r15			// A    subtract 1024 from syscall number
-	mov r3=NR_syscalls - 1
-	;;
-	ld1.bias r17=[r16]			// M0|1 r17 = current->thread.on_ustack flag
-	ld4 r9=[r9]				// M0|1 r9 = current_thread_info()->flags
-	extr.u r8=r29,41,2			// I0   extract ei field from cr.ipsr
-
-	shladd r30=r15,3,r30			// A    r30 = sys_call_table + 8*(syscall-1024)
-	addl r22=IA64_RBS_OFFSET,r1		// A    compute base of RBS
-	cmp.leu p6,p7=r15,r3			// A    syscall number in range?
-	;;
-
-	lfetch.fault.excl.nt1 [r22]		// M0|1 prefetch RBS
-(p6)	ld8 r30=[r30]				// M0|1 load address of syscall entry point
-	tnat.nz.or p7,p0=r15			// I0	is syscall nr a NaT?
-
-	mov.m ar.bspstore=r22			// M2   switch to kernel RBS
-	cmp.eq p8,p9=2,r8			// A    isr.ei==2?
-	;;
-
-(p8)	mov r8=0				// A    clear ei to 0
-(p7)	movl r30=sys_ni_syscall			// X
-
-(p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
-(p9)	adds r8=1,r8				// A    increment ei to next slot
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	;;
-	mov b6=r30				// I0   setup syscall handler branch reg early
-#else
-	nop.i 0
-	;;
-#endif
-
-	mov.m r25=ar.unat			// M2 (5 cyc)
-	dep r29=r8,r29,41,2			// I0   insert new ei into cr.ipsr
-	adds r15=1024,r15			// A    restore original syscall number
-	//
-	// If any of the above loads miss in L1D, we'll stall here until
-	// the data arrives.
-	//
-///////////////////////////////////////////////////////////////////////
-	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	MOV_FROM_ITC(p0, p14, r30, r18)		// M    get cycle for accounting
-#else
-	mov b6=r30				// I0   setup syscall handler branch reg early
-#endif
-	cmp.eq pKStk,pUStk=r0,r17		// A    were we on kernel stacks already?
-
-	and r9=_TIF_SYSCALL_TRACEAUDIT,r9	// A    mask trace or audit
-	mov r18=ar.bsp				// M2 (12 cyc)
-(pKStk)	br.cond.spnt .break_fixup		// B	we're already in kernel-mode -- fix up RBS
-	;;
-.back_from_break_fixup:
-(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A    compute base of memory stack
-	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
-	br.call.sptk.many b7=ia64_syscall_setup	// B
-1:
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	// mov.m r30=ar.itc is called in advance, and r13 is current
-	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13	// A
-	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13	// A
-(pKStk)	br.cond.spnt .skip_accounting		// B	unlikely skip
-	;;
-	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// M  get last stamp
-	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// M  time at leave
-	;;
-	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// M  cumulated stime
-	ld8 r21=[r17]				// M  cumulated utime
-	sub r22=r19,r18				// A  stime before leave
-	;;
-	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// M  update stamp
-	sub r18=r30,r19				// A  elapsed time in user
-	;;
-	add r20=r20,r22				// A  sum stime
-	add r21=r21,r18				// A  sum utime
-	;;
-	st8 [r16]=r20				// M  update stime
-	st8 [r17]=r21				// M  update utime
-	;;
-.skip_accounting:
-#endif
-	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
-	nop 0
-	BSW_1(r2, r14)				// B (6 cyc) regs are saved, switch to bank 1
-	;;
-
-	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r16)	// M2	now it's safe to re-enable intr.-collection
-						// M0   ensure interruption collection is on
-	movl r3=ia64_ret_from_syscall		// X
-	;;
-	mov rp=r3				// I0   set the real return addr
-(p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
-
-	SSM_PSR_I(p15, p15, r16)		// M2   restore psr.i
-(p14)	br.call.sptk.many b6=b6			// B    invoke syscall-handker (ignore return addr)
-	br.cond.spnt.many ia64_trace_syscall	// B	do syscall-tracing thingamagic
-	// NOT REACHED
-///////////////////////////////////////////////////////////////////////
-	// On entry, we optimistically assumed that we're coming from user-space.
-	// For the rare cases where a system-call is done from within the kernel,
-	// we fix things up at this point:
-.break_fixup:
-	add r1=-IA64_PT_REGS_SIZE,sp		// A    allocate space for pt_regs structure
-	mov ar.rnat=r24				// M2	restore kernel's AR.RNAT
-	;;
-	mov ar.bspstore=r23			// M2	restore kernel's AR.BSPSTORE
-	br.cond.sptk .back_from_break_fixup
-END(break_fault)
-
-	.org ia64_ivt+0x3000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
-ENTRY(interrupt)
-	/* interrupt handler has become too big to fit this area. */
-	br.sptk.many __interrupt
-END(interrupt)
-
-	.org ia64_ivt+0x3400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3400 Entry 13 (size 64 bundles) Reserved
-	DBG_FAULT(13)
-	FAULT(13)
-
-	.org ia64_ivt+0x3800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3800 Entry 14 (size 64 bundles) Reserved
-	DBG_FAULT(14)
-	FAULT(14)
-
-	/*
-	 * There is no particular reason for this code to be here, other than that
-	 * there happens to be space here that would go unused otherwise.  If this
-	 * fault ever gets "unreserved", simply moved the following code to a more
-	 * suitable spot...
-	 *
-	 * ia64_syscall_setup() is a separate subroutine so that it can
-	 *	allocate stacked registers so it can safely demine any
-	 *	potential NaT values from the input registers.
-	 *
-	 * On entry:
-	 *	- executing on bank 0 or bank 1 register set (doesn't matter)
-	 *	-  r1: stack pointer
-	 *	-  r2: current task pointer
-	 *	-  r3: preserved
-	 *	- r11: original contents (saved ar.pfs to be saved)
-	 *	- r12: original contents (sp to be saved)
-	 *	- r13: original contents (tp to be saved)
-	 *	- r15: original contents (syscall # to be saved)
-	 *	- r18: saved bsp (after switching to kernel stack)
-	 *	- r19: saved b6
-	 *	- r20: saved r1 (gp)
-	 *	- r21: saved ar.fpsr
-	 *	- r22: kernel's register backing store base (krbs_base)
-	 *	- r23: saved ar.bspstore
-	 *	- r24: saved ar.rnat
-	 *	- r25: saved ar.unat
-	 *	- r26: saved ar.pfs
-	 *	- r27: saved ar.rsc
-	 *	- r28: saved cr.iip
-	 *	- r29: saved cr.ipsr
-	 *	- r30: ar.itc for accounting (don't touch)
-	 *	- r31: saved pr
-	 *	-  b0: original contents (to be saved)
-	 * On exit:
-	 *	-  p10: TRUE if syscall is invoked with more than 8 out
-	 *		registers or r15's Nat is true
-	 *	-  r1: kernel's gp
-	 *	-  r3: preserved (same as on entry)
-	 *	-  r8: -EINVAL if p10 is true
-	 *	- r12: points to kernel stack
-	 *	- r13: points to current task
-	 *	- r14: preserved (same as on entry)
-	 *	- p13: preserved
-	 *	- p15: TRUE if interrupts need to be re-enabled
-	 *	- ar.fpsr: set to kernel settings
-	 *	-  b6: preserved (same as on entry)
-	 */
-GLOBAL_ENTRY(ia64_syscall_setup)
-#if PT(B6) != 0
-# error This code assumes that b6 is the first field in pt_regs.
-#endif
-	st8 [r1]=r19				// save b6
-	add r16=PT(CR_IPSR),r1			// initialize first base pointer
-	add r17=PT(R11),r1			// initialize second base pointer
-	;;
-	alloc r19=ar.pfs,8,0,0,0		// ensure in0-in7 are writable
-	st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR)	// save cr.ipsr
-	tnat.nz p8,p0=in0
-
-	st8.spill [r17]=r11,PT(CR_IIP)-PT(R11)	// save r11
-	tnat.nz p9,p0=in1
-(pKStk)	mov r18=r0				// make sure r18 isn't NaT
-	;;
-
-	st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS)	// save ar.pfs
-	st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP)	// save cr.iip
-	mov r28=b0				// save b0 (2 cyc)
-	;;
-
-	st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT)	// save ar.unat
-	dep r19=0,r19,38,26			// clear all bits but 0..37 [I0]
-(p8)	mov in0=-1
-	;;
-
-	st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS)	// store ar.pfs.pfm in cr.ifs
-	extr.u r11=r19,7,7	// I0		// get sol of ar.pfs
-	and r8=0x7f,r19		// A		// get sof of ar.pfs
-
-	st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc
-	tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0
-(p9)	mov in1=-1
-	;;
-
-(pUStk) sub r18=r18,r22				// r18=RSE.ndirty*8
-	tnat.nz p10,p0=in2
-	add r11=8,r11
-	;;
-(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16		// skip over ar_rnat field
-(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17	// skip over ar_bspstore field
-	tnat.nz p11,p0=in3
-	;;
-(p10)	mov in2=-1
-	tnat.nz p12,p0=in4				// [I0]
-(p11)	mov in3=-1
-	;;
-(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT)	// save ar.rnat
-(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE)	// save ar.bspstore
-	shl r18=r18,16				// compute ar.rsc to be used for "loadrs"
-	;;
-	st8 [r16]=r31,PT(LOADRS)-PT(PR)		// save predicates
-	st8 [r17]=r28,PT(R1)-PT(B0)		// save b0
-	tnat.nz p13,p0=in5				// [I0]
-	;;
-	st8 [r16]=r18,PT(R12)-PT(LOADRS)	// save ar.rsc value for "loadrs"
-	st8.spill [r17]=r20,PT(R13)-PT(R1)	// save original r1
-(p12)	mov in4=-1
-	;;
-
-.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12)	// save r12
-.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13)		// save r13
-(p13)	mov in5=-1
-	;;
-	st8 [r16]=r21,PT(R8)-PT(AR_FPSR)	// save ar.fpsr
-	tnat.nz p13,p0=in6
-	cmp.lt p10,p9=r11,r8	// frame size can't be more than local+8
-	;;
-	mov r8=1
-(p9)	tnat.nz p10,p0=r15
-	adds r12=-16,r1		// switch to kernel memory stack (with 16 bytes of scratch)
-
-	st8.spill [r17]=r15			// save r15
-	tnat.nz p8,p0=in7
-	nop.i 0
-
-	mov r13=r2				// establish `current'
-	movl r1=__gp				// establish kernel global pointer
-	;;
-	st8 [r16]=r8		// ensure pt_regs.r8 != 0 (see handle_syscall_error)
-(p13)	mov in6=-1
-(p8)	mov in7=-1
-
-	cmp.eq pSys,pNonSys=r0,r0		// set pSys=1, pNonSys=0
-	movl r17=FPSR_DEFAULT
-	;;
-	mov.m ar.fpsr=r17			// set ar.fpsr to kernel default value
-(p10)	mov r8=-EINVAL
-	br.ret.sptk.many b7
-END(ia64_syscall_setup)
-
-	.org ia64_ivt+0x3c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x3c00 Entry 15 (size 64 bundles) Reserved
-	DBG_FAULT(15)
-	FAULT(15)
-
-	.org ia64_ivt+0x4000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4000 Entry 16 (size 64 bundles) Reserved
-	DBG_FAULT(16)
-	FAULT(16)
-
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE)
-	/*
-	 * There is no particular reason for this code to be here, other than
-	 * that there happens to be space here that would go unused otherwise.
-	 * If this fault ever gets "unreserved", simply moved the following
-	 * code to a more suitable spot...
-	 *
-	 * account_sys_enter is called from SAVE_MIN* macros if accounting is
-	 * enabled and if the macro is entered from user mode.
-	 */
-GLOBAL_ENTRY(account_sys_enter)
-	// mov.m r20=ar.itc is called in advance, and r13 is current
-	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13
-	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13
-	;;
-	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
-	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at left from kernel
-        ;;
-	ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
-	ld8 r21=[r17]				// cumulated utime
-	sub r22=r19,r18				// stime before leave kernel
-	;;
-	st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP	// update stamp
-	sub r18=r20,r19				// elapsed time in user mode
-	;;
-	add r23=r23,r22				// sum stime
-	add r21=r21,r18				// sum utime
-	;;
-	st8 [r16]=r23				// update stime
-	st8 [r17]=r21				// update utime
-	;;
-	br.ret.sptk.many rp
-END(account_sys_enter)
-#endif
-
-	.org ia64_ivt+0x4400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4400 Entry 17 (size 64 bundles) Reserved
-	DBG_FAULT(17)
-	FAULT(17)
-
-	.org ia64_ivt+0x4800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4800 Entry 18 (size 64 bundles) Reserved
-	DBG_FAULT(18)
-	FAULT(18)
-
-	.org ia64_ivt+0x4c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4c00 Entry 19 (size 64 bundles) Reserved
-	DBG_FAULT(19)
-	FAULT(19)
-
-//
-// --- End of long entries, Beginning of short entries
-//
-
-	.org ia64_ivt+0x5000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
-ENTRY(page_not_present)
-	DBG_FAULT(20)
-	MOV_FROM_IFA(r16)
-	RSM_PSR_DT
-	/*
-	 * The Linux page fault handler doesn't expect non-present pages to be in
-	 * the TLB.  Flush the existing entry now, so we meet that expectation.
-	 */
-	mov r17=PAGE_SHIFT<<2
-	;;
-	ptc.l r16,r17
-	;;
-	mov r31=pr
-	srlz.d
-	br.sptk.many page_fault
-END(page_not_present)
-
-	.org ia64_ivt+0x5100
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
-ENTRY(key_permission)
-	DBG_FAULT(21)
-	MOV_FROM_IFA(r16)
-	RSM_PSR_DT
-	mov r31=pr
-	;;
-	srlz.d
-	br.sptk.many page_fault
-END(key_permission)
-
-	.org ia64_ivt+0x5200
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
-ENTRY(iaccess_rights)
-	DBG_FAULT(22)
-	MOV_FROM_IFA(r16)
-	RSM_PSR_DT
-	mov r31=pr
-	;;
-	srlz.d
-	br.sptk.many page_fault
-END(iaccess_rights)
-
-	.org ia64_ivt+0x5300
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
-ENTRY(daccess_rights)
-	DBG_FAULT(23)
-	MOV_FROM_IFA(r16)
-	RSM_PSR_DT
-	mov r31=pr
-	;;
-	srlz.d
-	br.sptk.many page_fault
-END(daccess_rights)
-
-	.org ia64_ivt+0x5400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
-ENTRY(general_exception)
-	DBG_FAULT(24)
-	MOV_FROM_ISR(r16)
-	mov r31=pr
-	;;
-	cmp4.eq p6,p0=0,r16
-(p6)	br.sptk.many dispatch_illegal_op_fault
-	;;
-	mov r19=24		// fault number
-	br.sptk.many dispatch_to_fault_handler
-END(general_exception)
-
-	.org ia64_ivt+0x5500
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
-ENTRY(disabled_fp_reg)
-	DBG_FAULT(25)
-	rsm psr.dfh		// ensure we can access fph
-	;;
-	srlz.d
-	mov r31=pr
-	mov r19=25
-	br.sptk.many dispatch_to_fault_handler
-END(disabled_fp_reg)
-
-	.org ia64_ivt+0x5600
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
-ENTRY(nat_consumption)
-	DBG_FAULT(26)
-
-	MOV_FROM_IPSR(p0, r16)
-	MOV_FROM_ISR(r17)
-	mov r31=pr				// save PR
-	;;
-	and r18=0xf,r17				// r18 = cr.ipsr.code{3:0}
-	tbit.z p6,p0=r17,IA64_ISR_NA_BIT
-	;;
-	cmp.ne.or p6,p0=IA64_ISR_CODE_LFETCH,r18
-	dep r16=-1,r16,IA64_PSR_ED_BIT,1
-(p6)	br.cond.spnt 1f		// branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH)
-	;;
-	MOV_TO_IPSR(p0, r16, r18)
-	mov pr=r31,-1
-	;;
-	RFI
-
-1:	mov pr=r31,-1
-	;;
-	FAULT(26)
-END(nat_consumption)
-
-	.org ia64_ivt+0x5700
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
-ENTRY(speculation_vector)
-	DBG_FAULT(27)
-	/*
-	 * A [f]chk.[as] instruction needs to take the branch to the recovery code but
-	 * this part of the architecture is not implemented in hardware on some CPUs, such
-	 * as Itanium.  Thus, in general we need to emulate the behavior.  IIM contains
-	 * the relative target (not yet sign extended).  So after sign extending it we
-	 * simply add it to IIP.  We also need to reset the EI field of the IPSR to zero,
-	 * i.e., the slot to restart into.
-	 *
-	 * cr.imm contains zero_ext(imm21)
-	 */
-	MOV_FROM_IIM(r18)
-	;;
-	MOV_FROM_IIP(r17)
-	shl r18=r18,43			// put sign bit in position (43=64-21)
-	;;
-
-	MOV_FROM_IPSR(p0, r16)
-	shr r18=r18,39			// sign extend (39=43-4)
-	;;
-
-	add r17=r17,r18			// now add the offset
-	;;
-	MOV_TO_IIP(r17, r19)
-	dep r16=0,r16,41,2		// clear EI
-	;;
-
-	MOV_TO_IPSR(p0, r16, r19)
-	;;
-
-	RFI
-END(speculation_vector)
-
-	.org ia64_ivt+0x5800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5800 Entry 28 (size 16 bundles) Reserved
-	DBG_FAULT(28)
-	FAULT(28)
-
-	.org ia64_ivt+0x5900
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
-ENTRY(debug_vector)
-	DBG_FAULT(29)
-	FAULT(29)
-END(debug_vector)
-
-	.org ia64_ivt+0x5a00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
-ENTRY(unaligned_access)
-	DBG_FAULT(30)
-	mov r31=pr		// prepare to save predicates
-	;;
-	br.sptk.many dispatch_unaligned_handler
-END(unaligned_access)
-
-	.org ia64_ivt+0x5b00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
-ENTRY(unsupported_data_reference)
-	DBG_FAULT(31)
-	FAULT(31)
-END(unsupported_data_reference)
-
-	.org ia64_ivt+0x5c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
-ENTRY(floating_point_fault)
-	DBG_FAULT(32)
-	FAULT(32)
-END(floating_point_fault)
-
-	.org ia64_ivt+0x5d00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
-ENTRY(floating_point_trap)
-	DBG_FAULT(33)
-	FAULT(33)
-END(floating_point_trap)
-
-	.org ia64_ivt+0x5e00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
-ENTRY(lower_privilege_trap)
-	DBG_FAULT(34)
-	FAULT(34)
-END(lower_privilege_trap)
-
-	.org ia64_ivt+0x5f00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
-ENTRY(taken_branch_trap)
-	DBG_FAULT(35)
-	FAULT(35)
-END(taken_branch_trap)
-
-	.org ia64_ivt+0x6000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
-ENTRY(single_step_trap)
-	DBG_FAULT(36)
-	FAULT(36)
-END(single_step_trap)
-
-	.org ia64_ivt+0x6100
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6100 Entry 37 (size 16 bundles) Reserved
-	DBG_FAULT(37)
-	FAULT(37)
-
-	.org ia64_ivt+0x6200
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6200 Entry 38 (size 16 bundles) Reserved
-	DBG_FAULT(38)
-	FAULT(38)
-
-	.org ia64_ivt+0x6300
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6300 Entry 39 (size 16 bundles) Reserved
-	DBG_FAULT(39)
-	FAULT(39)
-
-	.org ia64_ivt+0x6400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6400 Entry 40 (size 16 bundles) Reserved
-	DBG_FAULT(40)
-	FAULT(40)
-
-	.org ia64_ivt+0x6500
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6500 Entry 41 (size 16 bundles) Reserved
-	DBG_FAULT(41)
-	FAULT(41)
-
-	.org ia64_ivt+0x6600
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6600 Entry 42 (size 16 bundles) Reserved
-	DBG_FAULT(42)
-	FAULT(42)
-
-	.org ia64_ivt+0x6700
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6700 Entry 43 (size 16 bundles) Reserved
-	DBG_FAULT(43)
-	FAULT(43)
-
-	.org ia64_ivt+0x6800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6800 Entry 44 (size 16 bundles) Reserved
-	DBG_FAULT(44)
-	FAULT(44)
-
-	.org ia64_ivt+0x6900
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
-ENTRY(ia32_exception)
-	DBG_FAULT(45)
-	FAULT(45)
-END(ia32_exception)
-
-	.org ia64_ivt+0x6a00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
-ENTRY(ia32_intercept)
-	DBG_FAULT(46)
-	FAULT(46)
-END(ia32_intercept)
-
-	.org ia64_ivt+0x6b00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt  (74)
-ENTRY(ia32_interrupt)
-	DBG_FAULT(47)
-	FAULT(47)
-END(ia32_interrupt)
-
-	.org ia64_ivt+0x6c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6c00 Entry 48 (size 16 bundles) Reserved
-	DBG_FAULT(48)
-	FAULT(48)
-
-	.org ia64_ivt+0x6d00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6d00 Entry 49 (size 16 bundles) Reserved
-	DBG_FAULT(49)
-	FAULT(49)
-
-	.org ia64_ivt+0x6e00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6e00 Entry 50 (size 16 bundles) Reserved
-	DBG_FAULT(50)
-	FAULT(50)
-
-	.org ia64_ivt+0x6f00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x6f00 Entry 51 (size 16 bundles) Reserved
-	DBG_FAULT(51)
-	FAULT(51)
-
-	.org ia64_ivt+0x7000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7000 Entry 52 (size 16 bundles) Reserved
-	DBG_FAULT(52)
-	FAULT(52)
-
-	.org ia64_ivt+0x7100
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7100 Entry 53 (size 16 bundles) Reserved
-	DBG_FAULT(53)
-	FAULT(53)
-
-	.org ia64_ivt+0x7200
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7200 Entry 54 (size 16 bundles) Reserved
-	DBG_FAULT(54)
-	FAULT(54)
-
-	.org ia64_ivt+0x7300
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7300 Entry 55 (size 16 bundles) Reserved
-	DBG_FAULT(55)
-	FAULT(55)
-
-	.org ia64_ivt+0x7400
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7400 Entry 56 (size 16 bundles) Reserved
-	DBG_FAULT(56)
-	FAULT(56)
-
-	.org ia64_ivt+0x7500
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7500 Entry 57 (size 16 bundles) Reserved
-	DBG_FAULT(57)
-	FAULT(57)
-
-	.org ia64_ivt+0x7600
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7600 Entry 58 (size 16 bundles) Reserved
-	DBG_FAULT(58)
-	FAULT(58)
-
-	.org ia64_ivt+0x7700
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7700 Entry 59 (size 16 bundles) Reserved
-	DBG_FAULT(59)
-	FAULT(59)
-
-	.org ia64_ivt+0x7800
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7800 Entry 60 (size 16 bundles) Reserved
-	DBG_FAULT(60)
-	FAULT(60)
-
-	.org ia64_ivt+0x7900
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7900 Entry 61 (size 16 bundles) Reserved
-	DBG_FAULT(61)
-	FAULT(61)
-
-	.org ia64_ivt+0x7a00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7a00 Entry 62 (size 16 bundles) Reserved
-	DBG_FAULT(62)
-	FAULT(62)
-
-	.org ia64_ivt+0x7b00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7b00 Entry 63 (size 16 bundles) Reserved
-	DBG_FAULT(63)
-	FAULT(63)
-
-	.org ia64_ivt+0x7c00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7c00 Entry 64 (size 16 bundles) Reserved
-	DBG_FAULT(64)
-	FAULT(64)
-
-	.org ia64_ivt+0x7d00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7d00 Entry 65 (size 16 bundles) Reserved
-	DBG_FAULT(65)
-	FAULT(65)
-
-	.org ia64_ivt+0x7e00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7e00 Entry 66 (size 16 bundles) Reserved
-	DBG_FAULT(66)
-	FAULT(66)
-
-	.org ia64_ivt+0x7f00
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x7f00 Entry 67 (size 16 bundles) Reserved
-	DBG_FAULT(67)
-	FAULT(67)
-
-	//-----------------------------------------------------------------------------------
-	// call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
-ENTRY(page_fault)
-	SSM_PSR_DT_AND_SRLZ_I
-	;;
-	SAVE_MIN_WITH_COVER
-	alloc r15=ar.pfs,0,0,3,0
-	MOV_FROM_IFA(out0)
-	MOV_FROM_ISR(out1)
-	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3)
-	adds r3=8,r2				// set up second base pointer
-	SSM_PSR_I(p15, p15, r14)		// restore psr.i
-	movl r14=ia64_leave_kernel
-	;;
-	SAVE_REST
-	mov rp=r14
-	;;
-	adds out2=16,r12			// out2 = pointer to pt_regs
-	br.call.sptk.many b6=ia64_do_page_fault	// ignore return address
-END(page_fault)
-
-ENTRY(non_syscall)
-	mov ar.rsc=r27			// restore ar.rsc before SAVE_MIN_WITH_COVER
-	;;
-	SAVE_MIN_WITH_COVER
-
-	// There is no particular reason for this code to be here, other than that
-	// there happens to be space here that would go unused otherwise.  If this
-	// fault ever gets "unreserved", simply moved the following code to a more
-	// suitable spot...
-
-	alloc r14=ar.pfs,0,0,2,0
-	MOV_FROM_IIM(out0)
-	add out1=16,sp
-	adds r3=8,r2			// set up second base pointer for SAVE_REST
-
-	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r15, r24)
-					// guarantee that interruption collection is on
-	SSM_PSR_I(p15, p15, r15)	// restore psr.i
-	movl r15=ia64_leave_kernel
-	;;
-	SAVE_REST
-	mov rp=r15
-	;;
-	br.call.sptk.many b6=ia64_bad_break	// avoid WAW on CFM and ignore return addr
-END(non_syscall)
-
-ENTRY(__interrupt)
-	DBG_FAULT(12)
-	mov r31=pr		// prepare to save predicates
-	;;
-	SAVE_MIN_WITH_COVER	// uses r31; defines r2 and r3
-	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r14)
-				// ensure everybody knows psr.ic is back on
-	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	;;
-	SAVE_REST
-	;;
-	MCA_RECOVER_RANGE(interrupt)
-	alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
-	MOV_FROM_IVR(out0, r8)	// pass cr.ivr as first arg
-	add out1=16,sp		// pass pointer to pt_regs as second arg
-	;;
-	srlz.d			// make sure we see the effect of cr.ivr
-	movl r14=ia64_leave_kernel
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=ia64_handle_irq
-END(__interrupt)
-
-	/*
-	 * There is no particular reason for this code to be here, other than that
-	 * there happens to be space here that would go unused otherwise.  If this
-	 * fault ever gets "unreserved", simply moved the following code to a more
-	 * suitable spot...
-	 */
-
-ENTRY(dispatch_unaligned_handler)
-	SAVE_MIN_WITH_COVER
-	;;
-	alloc r14=ar.pfs,0,0,2,0		// now it's safe (must be first in insn group!)
-	MOV_FROM_IFA(out0)
-	adds out1=16,sp
-
-	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24)
-						// guarantee that interruption collection is on
-	SSM_PSR_I(p15, p15, r3)			// restore psr.i
-	adds r3=8,r2				// set up second base pointer
-	;;
-	SAVE_REST
-	movl r14=ia64_leave_kernel
-	;;
-	mov rp=r14
-	br.sptk.many ia64_prepare_handle_unaligned
-END(dispatch_unaligned_handler)
-
-	/*
-	 * There is no particular reason for this code to be here, other than that
-	 * there happens to be space here that would go unused otherwise.  If this
-	 * fault ever gets "unreserved", simply moved the following code to a more
-	 * suitable spot...
-	 */
-
-ENTRY(dispatch_to_fault_handler)
-	/*
-	 * Input:
-	 *	psr.ic:	off
-	 *	r19:	fault vector number (e.g., 24 for General Exception)
-	 *	r31:	contains saved predicates (pr)
-	 */
-	SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,5,0
-	MOV_FROM_ISR(out1)
-	MOV_FROM_IFA(out2)
-	MOV_FROM_IIM(out3)
-	MOV_FROM_ITIR(out4)
-	;;
-	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, out0)
-						// guarantee that interruption collection is on
-	mov out0=r15
-	;;
-	SSM_PSR_I(p15, p15, r3)			// restore psr.i
-	adds r3=8,r2				// set up second base pointer for SAVE_REST
-	;;
-	SAVE_REST
-	movl r14=ia64_leave_kernel
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=ia64_fault
-END(dispatch_to_fault_handler)
-
-	/*
-	 * Squatting in this space ...
-	 *
-	 * This special case dispatcher for illegal operation faults allows preserved
-	 * registers to be modified through a callback function (asm only) that is handed
-	 * back from the fault handler in r8. Up to three arguments can be passed to the
-	 * callback function by returning an aggregate with the callback as its first
-	 * element, followed by the arguments.
-	 */
-ENTRY(dispatch_illegal_op_fault)
-	.prologue
-	.body
-	SAVE_MIN_WITH_COVER
-	SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24)
-				// guarantee that interruption collection is on
-	;;
-	SSM_PSR_I(p15, p15, r3)	// restore psr.i
-	adds r3=8,r2	// set up second base pointer for SAVE_REST
-	;;
-	alloc r14=ar.pfs,0,0,1,0	// must be first in insn group
-	mov out0=ar.ec
-	;;
-	SAVE_REST
-	PT_REGS_UNWIND_INFO(0)
-	;;
-	br.call.sptk.many rp=ia64_illegal_op_fault
-.ret0:	;;
-	alloc r14=ar.pfs,0,0,3,0	// must be first in insn group
-	mov out0=r9
-	mov out1=r10
-	mov out2=r11
-	movl r15=ia64_leave_kernel
-	;;
-	mov rp=r15
-	mov b6=r8
-	;;
-	cmp.ne p6,p0=0,r8
-(p6)	br.call.dpnt.many b6=b6		// call returns to ia64_leave_kernel
-	br.sptk.many ia64_leave_kernel
-END(dispatch_illegal_op_fault)
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
deleted file mode 100644
index 086cfa4999fd2ac29d452d823220513aee3d4f96..0000000000000000000000000000000000000000
--- a/arch/ia64/kernel/mca_asm.S
+++ /dev/null
@@ -1,1123 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * File:	mca_asm.S
- * Purpose:	assembly portion of the IA64 MCA handling
- *
- * Mods by cfleck to integrate into kernel build
- *
- * 2000-03-15 David Mosberger-Tang <davidm@hpl.hp.com>
- *		Added various stop bits to get a clean compile
- *
- * 2000-03-29 Chuck Fleckenstein <cfleck@co.intel.com>
- *		Added code to save INIT handoff state in pt_regs format,
- *		switch to temp kstack, switch modes, jump to C INIT handler
- *
- * 2002-01-04 J.Hall <jenna.s.hall@intel.com>
- *		Before entering virtual mode code:
- *		 1. Check for TLB CPU error
- *		 2. Restore current thread pointer to kr6
- *		 3. Move stack ptr 16 bytes to conform to C calling convention
- *
- * 2004-11-12 Russ Anderson <rja@sgi.com>
- *		Added per cpu MCA/INIT stack save areas.
- *
- * 2005-12-08 Keith Owens <kaos@sgi.com>
- *		Use per cpu MCA/INIT stacks for all data.
- */
-#include <linux/threads.h>
-
-#include <asm/asmmacro.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/mca_asm.h>
-#include <asm/mca.h>
-
-#include "entry.h"
-
-#define GET_IA64_MCA_DATA(reg)						\
-	GET_THIS_PADDR(reg, ia64_mca_data)				\
-	;;								\
-	ld8 reg=[reg]
-
-	.global ia64_do_tlb_purge
-	.global ia64_os_mca_dispatch
-	.global ia64_os_init_on_kdump
-	.global ia64_os_init_dispatch_monarch
-	.global ia64_os_init_dispatch_slave
-
-	.text
-	.align 16
-
-//StartMain////////////////////////////////////////////////////////////////////
-
-/*
- * Just the TLB purge part is moved to a separate function
- * so we can re-use the code for cpu hotplug code as well
- * Caller should now setup b1, so we can branch once the
- * tlb flush is complete.
- */
-
-ia64_do_tlb_purge:
-#define O(member)	IA64_CPUINFO_##member##_OFFSET
-
-	GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2
-	;;
-	addl r17=O(PTCE_STRIDE),r2
-	addl r2=O(PTCE_BASE),r2
-	;;
-	ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));;	// r18=ptce_base
-	ld4 r19=[r2],4					// r19=ptce_count[0]
-	ld4 r21=[r17],4					// r21=ptce_stride[0]
-	;;
-	ld4 r20=[r2]					// r20=ptce_count[1]
-	ld4 r22=[r17]					// r22=ptce_stride[1]
-	mov r24=0
-	;;
-	adds r20=-1,r20
-	;;
-#undef O
-
-2:
-	cmp.ltu p6,p7=r24,r19
-(p7)	br.cond.dpnt.few 4f
-	mov ar.lc=r20
-3:
-	ptc.e r18
-	;;
-	add r18=r22,r18
-	br.cloop.sptk.few 3b
-	;;
-	add r18=r21,r18
-	add r24=1,r24
-	;;
-	br.sptk.few 2b
-4:
-	srlz.i 			// srlz.i implies srlz.d
-	;;
-
-        // Now purge addresses formerly mapped by TR registers
-	// 1. Purge ITR&DTR for kernel.
-	movl r16=KERNEL_START
-	mov r18=KERNEL_TR_PAGE_SHIFT<<2
-	;;
-	ptr.i r16, r18
-	ptr.d r16, r18
-	;;
-	srlz.i
-	;;
-	srlz.d
-	;;
-	// 3. Purge ITR for PAL code.
-	GET_THIS_PADDR(r2, ia64_mca_pal_base)
-	;;
-	ld8 r16=[r2]
-	mov r18=IA64_GRANULE_SHIFT<<2
-	;;
-	ptr.i r16,r18
-	;;
-	srlz.i
-	;;
-	// 4. Purge DTR for stack.
-	mov r16=IA64_KR(CURRENT_STACK)
-	;;
-	shl r16=r16,IA64_GRANULE_SHIFT
-	movl r19=PAGE_OFFSET
-	;;
-	add r16=r19,r16
-	mov r18=IA64_GRANULE_SHIFT<<2
-	;;
-	ptr.d r16,r18
-	;;
-	srlz.i
-	;;
-	// Now branch away to caller.
-	br.sptk.many b1
-	;;
-
-//EndMain//////////////////////////////////////////////////////////////////////
-
-//StartMain////////////////////////////////////////////////////////////////////
-
-ia64_os_mca_dispatch:
-	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
-	LOAD_PHYSICAL(p0,r2,1f)			// return address
-	mov r19=1				// All MCA events are treated as monarch (for now)
-	br.sptk ia64_state_save			// save the state that is not in minstate
-1:
-
-	GET_IA64_MCA_DATA(r2)
-	// Using MCA stack, struct ia64_sal_os_state, variable proc_state_param
-	;;
-	add r3=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET+SOS(PROC_STATE_PARAM), r2
-	;;
-	ld8 r18=[r3]				// Get processor state parameter on existing PALE_CHECK.
-	;;
-	tbit.nz p6,p7=r18,60
-(p7)	br.spnt done_tlb_purge_and_reload
-
-	// The following code purges TC and TR entries. Then reload all TC entries.
-	// Purge percpu data TC entries.
-begin_tlb_purge_and_reload:
-	movl r18=ia64_reload_tr;;
-	LOAD_PHYSICAL(p0,r18,ia64_reload_tr);;
-	mov b1=r18;;
-	br.sptk.many ia64_do_tlb_purge;;
-
-ia64_reload_tr:
-	// Finally reload the TR registers.
-	// 1. Reload DTR/ITR registers for kernel.
-	mov r18=KERNEL_TR_PAGE_SHIFT<<2
-	movl r17=KERNEL_START
-	;;
-	mov cr.itir=r18
-	mov cr.ifa=r17
-        mov r16=IA64_TR_KERNEL
-	mov r19=ip
-	movl r18=PAGE_KERNEL
-	;;
-        dep r17=0,r19,0, KERNEL_TR_PAGE_SHIFT
-	;;
-	or r18=r17,r18
-	;;
-        itr.i itr[r16]=r18
-	;;
-        itr.d dtr[r16]=r18
-        ;;
-	srlz.i
-	srlz.d
-	;;
-	// 3. Reload ITR for PAL code.
-	GET_THIS_PADDR(r2, ia64_mca_pal_pte)
-	;;
-	ld8 r18=[r2]			// load PAL PTE
-	;;
-	GET_THIS_PADDR(r2, ia64_mca_pal_base)
-	;;
-	ld8 r16=[r2]			// load PAL vaddr
-	mov r19=IA64_GRANULE_SHIFT<<2
-	;;
-	mov cr.itir=r19
-	mov cr.ifa=r16
-	mov r20=IA64_TR_PALCODE
-	;;
-	itr.i itr[r20]=r18
-	;;
-	srlz.i
-	;;
-	// 4. Reload DTR for stack.
-	mov r16=IA64_KR(CURRENT_STACK)
-	;;
-	shl r16=r16,IA64_GRANULE_SHIFT
-	movl r19=PAGE_OFFSET
-	;;
-	add r18=r19,r16
-	movl r20=PAGE_KERNEL
-	;;
-	add r16=r20,r16
-	mov r19=IA64_GRANULE_SHIFT<<2
-	;;
-	mov cr.itir=r19
-	mov cr.ifa=r18
-	mov r20=IA64_TR_CURRENT_STACK
-	;;
-	itr.d dtr[r20]=r16
-	GET_THIS_PADDR(r2, ia64_mca_tr_reload)
-	mov r18 = 1
-	;;
-	srlz.d
-	;;
-	st8 [r2] =r18
-	;;
-
-done_tlb_purge_and_reload:
-
-	// switch to per cpu MCA stack
-	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
-	LOAD_PHYSICAL(p0,r2,1f)			// return address
-	br.sptk ia64_new_stack
-1:
-
-	// everything saved, now we can set the kernel registers
-	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
-	LOAD_PHYSICAL(p0,r2,1f)			// return address
-	br.sptk ia64_set_kernel_registers
-1:
-
-	// This must be done in physical mode
-	GET_IA64_MCA_DATA(r2)
-	;;
-	mov r7=r2
-
-        // Enter virtual mode from physical mode
-	VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4)
-
-	// This code returns to SAL via SOS r2, in general SAL has no unwind
-	// data.  To get a clean termination when backtracing the C MCA/INIT
-	// handler, set a dummy return address of 0 in this routine.  That
-	// requires that ia64_os_mca_virtual_begin be a global function.
-ENTRY(ia64_os_mca_virtual_begin)
-	.prologue
-	.save rp,r0
-	.body
-
-	mov ar.rsc=3				// set eager mode for C handler
-	mov r2=r7				// see GET_IA64_MCA_DATA above
-	;;
-
-	// Call virtual mode handler
-	alloc r14=ar.pfs,0,0,3,0
-	;;
-	DATA_PA_TO_VA(r2,r7)
-	;;
-	add out0=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2
-	add out1=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2
-	add out2=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET, r2
-	br.call.sptk.many    b0=ia64_mca_handler
-
-	// Revert back to physical mode before going back to SAL
-	PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4)
-ia64_os_mca_virtual_end:
-
-END(ia64_os_mca_virtual_begin)
-
-	// switch back to previous stack
-	alloc r14=ar.pfs,0,0,0,0		// remove the MCA handler frame
-	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
-	LOAD_PHYSICAL(p0,r2,1f)			// return address
-	br.sptk ia64_old_stack
-1:
-
-	mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET	// use the MCA stack
-	LOAD_PHYSICAL(p0,r2,1f)			// return address
-	br.sptk ia64_state_restore		// restore the SAL state
-1:
-
-	mov		b0=r12			// SAL_CHECK return address
-
-	br		b0
-
-//EndMain//////////////////////////////////////////////////////////////////////
-
-//StartMain////////////////////////////////////////////////////////////////////
-
-//
-// NOP init handler for kdump.  In panic situation, we may receive INIT
-// while kernel transition.  Since we initialize registers on leave from
-// current kernel, no longer monarch/slave handlers of current kernel in
-// virtual mode are called safely.
-// We can unregister these init handlers from SAL, however then the INIT
-// will result in warmboot by SAL and we cannot retrieve the crashdump.
-// Therefore register this NOP function to SAL, to prevent entering virtual
-// mode and resulting warmboot by SAL.
-//
-ia64_os_init_on_kdump:
-	mov		r8=r0		// IA64_INIT_RESUME
-	mov             r9=r10		// SAL_GP
-	mov		r22=r17		// *minstate
-	;;
-	mov		r10=r0		// return to same context
-	mov		b0=r12		// SAL_CHECK return address
-	br		b0
-
-//
-// SAL to OS entry point for INIT on all processors.  This has been defined for
-// registration purposes with SAL as a part of ia64_mca_init.  Monarch and
-// slave INIT have identical processing, except for the value of the
-// sos->monarch flag in r19.
-//
-
-ia64_os_init_dispatch_monarch:
-	mov r19=1				// Bow, bow, ye lower middle classes!
-	br.sptk ia64_os_init_dispatch
-
-ia64_os_init_dispatch_slave:
-	mov r19=0				// <igor>yeth, mathter</igor>
-
-ia64_os_init_dispatch:
-
-	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
-	LOAD_PHYSICAL(p0,r2,1f)			// return address
-	br.sptk ia64_state_save			// save the state that is not in minstate
-1:
-
-	// switch to per cpu INIT stack
-	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
-	LOAD_PHYSICAL(p0,r2,1f)			// return address
-	br.sptk ia64_new_stack
-1:
-
-	// everything saved, now we can set the kernel registers
-	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
-	LOAD_PHYSICAL(p0,r2,1f)			// return address
-	br.sptk ia64_set_kernel_registers
-1:
-
-	// This must be done in physical mode
-	GET_IA64_MCA_DATA(r2)
-	;;
-	mov r7=r2
-
-        // Enter virtual mode from physical mode
-	VIRTUAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_begin, r4)
-
-	// This code returns to SAL via SOS r2, in general SAL has no unwind
-	// data.  To get a clean termination when backtracing the C MCA/INIT
-	// handler, set a dummy return address of 0 in this routine.  That
-	// requires that ia64_os_init_virtual_begin be a global function.
-ENTRY(ia64_os_init_virtual_begin)
-	.prologue
-	.save rp,r0
-	.body
-
-	mov ar.rsc=3				// set eager mode for C handler
-	mov r2=r7				// see GET_IA64_MCA_DATA above
-	;;
-
-	// Call virtual mode handler
-	alloc r14=ar.pfs,0,0,3,0
-	;;
-	DATA_PA_TO_VA(r2,r7)
-	;;
-	add out0=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2
-	add out1=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2
-	add out2=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SOS_OFFSET, r2
-	br.call.sptk.many    b0=ia64_init_handler
-
-	// Revert back to physical mode before going back to SAL
-	PHYSICAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_end, r4)
-ia64_os_init_virtual_end:
-
-END(ia64_os_init_virtual_begin)
-
-	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
-	LOAD_PHYSICAL(p0,r2,1f)			// return address
-	br.sptk ia64_state_restore		// restore the SAL state
-1:
-
-	// switch back to previous stack
-	alloc r14=ar.pfs,0,0,0,0		// remove the INIT handler frame
-	mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET	// use the INIT stack
-	LOAD_PHYSICAL(p0,r2,1f)			// return address
-	br.sptk ia64_old_stack
-1:
-
-	mov		b0=r12			// SAL_CHECK return address
-	br		b0
-
-//EndMain//////////////////////////////////////////////////////////////////////
-
-// common defines for the stubs
-#define	ms		r4
-#define	regs		r5
-#define	temp1		r2	/* careful, it overlaps with input registers */
-#define	temp2		r3	/* careful, it overlaps with input registers */
-#define	temp3		r7
-#define	temp4		r14
-
-
-//++
-// Name:
-//	ia64_state_save()
-//
-// Stub Description:
-//
-//	Save the state that is not in minstate.  This is sensitive to the layout of
-//	struct ia64_sal_os_state in mca.h.
-//
-//	r2 contains the return address, r3 contains either
-//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
-//
-//	The OS to SAL section of struct ia64_sal_os_state is set to a default
-//	value of cold boot (MCA) or warm boot (INIT) and return to the same
-//	context.  ia64_sal_os_state is also used to hold some registers that
-//	need to be saved and restored across the stack switches.
-//
-//	Most input registers to this stub come from PAL/SAL
-//	r1  os gp, physical
-//	r8  pal_proc entry point
-//	r9  sal_proc entry point
-//	r10 sal gp
-//	r11 MCA - rendevzous state, INIT - reason code
-//	r12 sal return address
-//	r17 pal min_state
-//	r18 processor state parameter
-//	r19 monarch flag, set by the caller of this routine
-//
-//	In addition to the SAL to OS state, this routine saves all the
-//	registers that appear in struct pt_regs and struct switch_stack,
-//	excluding those that are already in the PAL minstate area.  This
-//	results in a partial pt_regs and switch_stack, the C code copies the
-//	remaining registers from PAL minstate to pt_regs and switch_stack.  The
-//	resulting structures contain all the state of the original process when
-//	MCA/INIT occurred.
-//
-//--
-
-ia64_state_save:
-	add regs=MCA_SOS_OFFSET, r3
-	add ms=MCA_SOS_OFFSET+8, r3
-	mov b0=r2		// save return address
-	cmp.eq p1,p2=IA64_MCA_CPU_MCA_STACK_OFFSET, r3
-	;;
-	GET_IA64_MCA_DATA(temp2)
-	;;
-	add temp1=temp2, regs	// struct ia64_sal_os_state on MCA or INIT stack
-	add temp2=temp2, ms	// struct ia64_sal_os_state+8 on MCA or INIT stack
-	;;
-	mov regs=temp1		// save the start of sos
-	st8 [temp1]=r1,16	// os_gp
-	st8 [temp2]=r8,16	// pal_proc
-	;;
-	st8 [temp1]=r9,16	// sal_proc
-	st8 [temp2]=r11,16	// rv_rc
-	mov r11=cr.iipa
-	;;
-	st8 [temp1]=r18		// proc_state_param
-	st8 [temp2]=r19		// monarch
-	mov r6=IA64_KR(CURRENT)
-	add temp1=SOS(SAL_RA), regs
-	add temp2=SOS(SAL_GP), regs
-	;;
-	st8 [temp1]=r12,16	// sal_ra
-	st8 [temp2]=r10,16	// sal_gp
-	mov r12=cr.isr
-	;;
-	st8 [temp1]=r17,16	// pal_min_state
-	st8 [temp2]=r6,16	// prev_IA64_KR_CURRENT
-	mov r6=IA64_KR(CURRENT_STACK)
-	;;
-	st8 [temp1]=r6,16	// prev_IA64_KR_CURRENT_STACK
-	st8 [temp2]=r0,16	// prev_task, starts off as NULL
-	mov r6=cr.ifa
-	;;
-	st8 [temp1]=r12,16	// cr.isr
-	st8 [temp2]=r6,16	// cr.ifa
-	mov r12=cr.itir
-	;;
-	st8 [temp1]=r12,16	// cr.itir
-	st8 [temp2]=r11,16	// cr.iipa
-	mov r12=cr.iim
-	;;
-	st8 [temp1]=r12		// cr.iim
-(p1)	mov r12=IA64_MCA_COLD_BOOT
-(p2)	mov r12=IA64_INIT_WARM_BOOT
-	mov r6=cr.iha
-	add temp1=SOS(OS_STATUS), regs
-	;;
-	st8 [temp2]=r6		// cr.iha
-	add temp2=SOS(CONTEXT), regs
-	st8 [temp1]=r12		// os_status, default is cold boot
-	mov r6=IA64_MCA_SAME_CONTEXT
-	;;
-	st8 [temp2]=r6		// context, default is same context
-
-	// Save the pt_regs data that is not in minstate.  The previous code
-	// left regs at sos.
-	add regs=MCA_PT_REGS_OFFSET-MCA_SOS_OFFSET, regs
-	;;
-	add temp1=PT(B6), regs
-	mov temp3=b6
-	mov temp4=b7
-	add temp2=PT(B7), regs
-	;;
-	st8 [temp1]=temp3,PT(AR_CSD)-PT(B6)		// save b6
-	st8 [temp2]=temp4,PT(AR_SSD)-PT(B7)		// save b7
-	mov temp3=ar.csd
-	mov temp4=ar.ssd
-	cover						// must be last in group
-	;;
-	st8 [temp1]=temp3,PT(AR_UNAT)-PT(AR_CSD)	// save ar.csd
-	st8 [temp2]=temp4,PT(AR_PFS)-PT(AR_SSD)		// save ar.ssd
-	mov temp3=ar.unat
-	mov temp4=ar.pfs
-	;;
-	st8 [temp1]=temp3,PT(AR_RNAT)-PT(AR_UNAT)	// save ar.unat
-	st8 [temp2]=temp4,PT(AR_BSPSTORE)-PT(AR_PFS)	// save ar.pfs
-	mov temp3=ar.rnat
-	mov temp4=ar.bspstore
-	;;
-	st8 [temp1]=temp3,PT(LOADRS)-PT(AR_RNAT)	// save ar.rnat
-	st8 [temp2]=temp4,PT(AR_FPSR)-PT(AR_BSPSTORE)	// save ar.bspstore
-	mov temp3=ar.bsp
-	;;
-	sub temp3=temp3, temp4	// ar.bsp - ar.bspstore
-	mov temp4=ar.fpsr
-	;;
-	shl temp3=temp3,16	// compute ar.rsc to be used for "loadrs"
-	;;
-	st8 [temp1]=temp3,PT(AR_CCV)-PT(LOADRS)		// save loadrs
-	st8 [temp2]=temp4,PT(F6)-PT(AR_FPSR)		// save ar.fpsr
-	mov temp3=ar.ccv
-	;;
-	st8 [temp1]=temp3,PT(F7)-PT(AR_CCV)		// save ar.ccv
-	stf.spill [temp2]=f6,PT(F8)-PT(F6)
-	;;
-	stf.spill [temp1]=f7,PT(F9)-PT(F7)
-	stf.spill [temp2]=f8,PT(F10)-PT(F8)
-	;;
-	stf.spill [temp1]=f9,PT(F11)-PT(F9)
-	stf.spill [temp2]=f10
-	;;
-	stf.spill [temp1]=f11
-
-	// Save the switch_stack data that is not in minstate nor pt_regs.  The
-	// previous code left regs at pt_regs.
-	add regs=MCA_SWITCH_STACK_OFFSET-MCA_PT_REGS_OFFSET, regs
-	;;
-	add temp1=SW(F2), regs
-	add temp2=SW(F3), regs
-	;;
-	stf.spill [temp1]=f2,32
-	stf.spill [temp2]=f3,32
-	;;
-	stf.spill [temp1]=f4,32
-	stf.spill [temp2]=f5,32
-	;;
-	stf.spill [temp1]=f12,32
-	stf.spill [temp2]=f13,32
-	;;
-	stf.spill [temp1]=f14,32
-	stf.spill [temp2]=f15,32
-	;;
-	stf.spill [temp1]=f16,32
-	stf.spill [temp2]=f17,32
-	;;
-	stf.spill [temp1]=f18,32
-	stf.spill [temp2]=f19,32
-	;;
-	stf.spill [temp1]=f20,32
-	stf.spill [temp2]=f21,32
-	;;
-	stf.spill [temp1]=f22,32
-	stf.spill [temp2]=f23,32
-	;;
-	stf.spill [temp1]=f24,32
-	stf.spill [temp2]=f25,32
-	;;
-	stf.spill [temp1]=f26,32
-	stf.spill [temp2]=f27,32
-	;;
-	stf.spill [temp1]=f28,32
-	stf.spill [temp2]=f29,32
-	;;
-	stf.spill [temp1]=f30,SW(B2)-SW(F30)
-	stf.spill [temp2]=f31,SW(B3)-SW(F31)
-	mov temp3=b2
-	mov temp4=b3
-	;;
-	st8 [temp1]=temp3,16	// save b2
-	st8 [temp2]=temp4,16	// save b3
-	mov temp3=b4
-	mov temp4=b5
-	;;
-	st8 [temp1]=temp3,SW(AR_LC)-SW(B4)	// save b4
-	st8 [temp2]=temp4	// save b5
-	mov temp3=ar.lc
-	;;
-	st8 [temp1]=temp3	// save ar.lc
-
-	// FIXME: Some proms are incorrectly accessing the minstate area as
-	// cached data.  The C code uses region 6, uncached virtual.  Ensure
-	// that there is no cache data lying around for the first 1K of the
-	// minstate area.
-	// Remove this code in September 2006, that gives platforms a year to
-	// fix their proms and get their customers updated.
-
-	add r1=32*1,r17
-	add r2=32*2,r17
-	add r3=32*3,r17
-	add r4=32*4,r17
-	add r5=32*5,r17
-	add r6=32*6,r17
-	add r7=32*7,r17
-	;;
-	fc r17
-	fc r1
-	fc r2
-	fc r3
-	fc r4
-	fc r5
-	fc r6
-	fc r7
-	add r17=32*8,r17
-	add r1=32*8,r1
-	add r2=32*8,r2
-	add r3=32*8,r3
-	add r4=32*8,r4
-	add r5=32*8,r5
-	add r6=32*8,r6
-	add r7=32*8,r7
-	;;
-	fc r17
-	fc r1
-	fc r2
-	fc r3
-	fc r4
-	fc r5
-	fc r6
-	fc r7
-	add r17=32*8,r17
-	add r1=32*8,r1
-	add r2=32*8,r2
-	add r3=32*8,r3
-	add r4=32*8,r4
-	add r5=32*8,r5
-	add r6=32*8,r6
-	add r7=32*8,r7
-	;;
-	fc r17
-	fc r1
-	fc r2
-	fc r3
-	fc r4
-	fc r5
-	fc r6
-	fc r7
-	add r17=32*8,r17
-	add r1=32*8,r1
-	add r2=32*8,r2
-	add r3=32*8,r3
-	add r4=32*8,r4
-	add r5=32*8,r5
-	add r6=32*8,r6
-	add r7=32*8,r7
-	;;
-	fc r17
-	fc r1
-	fc r2
-	fc r3
-	fc r4
-	fc r5
-	fc r6
-	fc r7
-
-	br.sptk b0
-
-//EndStub//////////////////////////////////////////////////////////////////////
-
-
-//++
-// Name:
-//	ia64_state_restore()
-//
-// Stub Description:
-//
-//	Restore the SAL/OS state.  This is sensitive to the layout of struct
-//	ia64_sal_os_state in mca.h.
-//
-//	r2 contains the return address, r3 contains either
-//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
-//
-//	In addition to the SAL to OS state, this routine restores all the
-//	registers that appear in struct pt_regs and struct switch_stack,
-//	excluding those in the PAL minstate area.
-//
-//--
-
-ia64_state_restore:
-	// Restore the switch_stack data that is not in minstate nor pt_regs.
-	add regs=MCA_SWITCH_STACK_OFFSET, r3
-	mov b0=r2		// save return address
-	;;
-	GET_IA64_MCA_DATA(temp2)
-	;;
-	add regs=temp2, regs
-	;;
-	add temp1=SW(F2), regs
-	add temp2=SW(F3), regs
-	;;
-	ldf.fill f2=[temp1],32
-	ldf.fill f3=[temp2],32
-	;;
-	ldf.fill f4=[temp1],32
-	ldf.fill f5=[temp2],32
-	;;
-	ldf.fill f12=[temp1],32
-	ldf.fill f13=[temp2],32
-	;;
-	ldf.fill f14=[temp1],32
-	ldf.fill f15=[temp2],32
-	;;
-	ldf.fill f16=[temp1],32
-	ldf.fill f17=[temp2],32
-	;;
-	ldf.fill f18=[temp1],32
-	ldf.fill f19=[temp2],32
-	;;
-	ldf.fill f20=[temp1],32
-	ldf.fill f21=[temp2],32
-	;;
-	ldf.fill f22=[temp1],32
-	ldf.fill f23=[temp2],32
-	;;
-	ldf.fill f24=[temp1],32
-	ldf.fill f25=[temp2],32
-	;;
-	ldf.fill f26=[temp1],32
-	ldf.fill f27=[temp2],32
-	;;
-	ldf.fill f28=[temp1],32
-	ldf.fill f29=[temp2],32
-	;;
-	ldf.fill f30=[temp1],SW(B2)-SW(F30)
-	ldf.fill f31=[temp2],SW(B3)-SW(F31)
-	;;
-	ld8 temp3=[temp1],16	// restore b2
-	ld8 temp4=[temp2],16	// restore b3
-	;;
-	mov b2=temp3
-	mov b3=temp4
-	ld8 temp3=[temp1],SW(AR_LC)-SW(B4)	// restore b4
-	ld8 temp4=[temp2]	// restore b5
-	;;
-	mov b4=temp3
-	mov b5=temp4
-	ld8 temp3=[temp1]	// restore ar.lc
-	;;
-	mov ar.lc=temp3
-
-	// Restore the pt_regs data that is not in minstate.  The previous code
-	// left regs at switch_stack.
-	add regs=MCA_PT_REGS_OFFSET-MCA_SWITCH_STACK_OFFSET, regs
-	;;
-	add temp1=PT(B6), regs
-	add temp2=PT(B7), regs
-	;;
-	ld8 temp3=[temp1],PT(AR_CSD)-PT(B6)		// restore b6
-	ld8 temp4=[temp2],PT(AR_SSD)-PT(B7)		// restore b7
-	;;
-	mov b6=temp3
-	mov b7=temp4
-	ld8 temp3=[temp1],PT(AR_UNAT)-PT(AR_CSD)	// restore ar.csd
-	ld8 temp4=[temp2],PT(AR_PFS)-PT(AR_SSD)		// restore ar.ssd
-	;;
-	mov ar.csd=temp3
-	mov ar.ssd=temp4
-	ld8 temp3=[temp1]				// restore ar.unat
-	add temp1=PT(AR_CCV)-PT(AR_UNAT), temp1
-	ld8 temp4=[temp2],PT(AR_FPSR)-PT(AR_PFS)	// restore ar.pfs
-	;;
-	mov ar.unat=temp3
-	mov ar.pfs=temp4
-	// ar.rnat, ar.bspstore, loadrs are restore in ia64_old_stack.
-	ld8 temp3=[temp1],PT(F6)-PT(AR_CCV)		// restore ar.ccv
-	ld8 temp4=[temp2],PT(F7)-PT(AR_FPSR)		// restore ar.fpsr
-	;;
-	mov ar.ccv=temp3
-	mov ar.fpsr=temp4
-	ldf.fill f6=[temp1],PT(F8)-PT(F6)
-	ldf.fill f7=[temp2],PT(F9)-PT(F7)
-	;;
-	ldf.fill f8=[temp1],PT(F10)-PT(F8)
-	ldf.fill f9=[temp2],PT(F11)-PT(F9)
-	;;
-	ldf.fill f10=[temp1]
-	ldf.fill f11=[temp2]
-
-	// Restore the SAL to OS state. The previous code left regs at pt_regs.
-	add regs=MCA_SOS_OFFSET-MCA_PT_REGS_OFFSET, regs
-	;;
-	add temp1=SOS(SAL_RA), regs
-	add temp2=SOS(SAL_GP), regs
-	;;
-	ld8 r12=[temp1],16	// sal_ra
-	ld8 r9=[temp2],16	// sal_gp
-	;;
-	ld8 r22=[temp1],16	// pal_min_state, virtual
-	ld8 r13=[temp2],16	// prev_IA64_KR_CURRENT
-	;;
-	ld8 r16=[temp1],16	// prev_IA64_KR_CURRENT_STACK
-	ld8 r20=[temp2],16	// prev_task
-	;;
-	ld8 temp3=[temp1],16	// cr.isr
-	ld8 temp4=[temp2],16	// cr.ifa
-	;;
-	mov cr.isr=temp3
-	mov cr.ifa=temp4
-	ld8 temp3=[temp1],16	// cr.itir
-	ld8 temp4=[temp2],16	// cr.iipa
-	;;
-	mov cr.itir=temp3
-	mov cr.iipa=temp4
-	ld8 temp3=[temp1]	// cr.iim
-	ld8 temp4=[temp2]		// cr.iha
-	add temp1=SOS(OS_STATUS), regs
-	add temp2=SOS(CONTEXT), regs
-	;;
-	mov cr.iim=temp3
-	mov cr.iha=temp4
-	dep r22=0,r22,62,1	// pal_min_state, physical, uncached
-	mov IA64_KR(CURRENT)=r13
-	ld8 r8=[temp1]		// os_status
-	ld8 r10=[temp2]		// context
-
-	/* Wire IA64_TR_CURRENT_STACK to the stack that we are resuming to.  To
-	 * avoid any dependencies on the algorithm in ia64_switch_to(), just
-	 * purge any existing CURRENT_STACK mapping and insert the new one.
-	 *
-	 * r16 contains prev_IA64_KR_CURRENT_STACK, r13 contains
-	 * prev_IA64_KR_CURRENT, these values may have been changed by the C
-	 * code.  Do not use r8, r9, r10, r22, they contain values ready for
-	 * the return to SAL.
-	 */
-
-	mov r15=IA64_KR(CURRENT_STACK)		// physical granule mapped by IA64_TR_CURRENT_STACK
-	;;
-	shl r15=r15,IA64_GRANULE_SHIFT
-	;;
-	dep r15=-1,r15,61,3			// virtual granule
-	mov r18=IA64_GRANULE_SHIFT<<2		// for cr.itir.ps
-	;;
-	ptr.d r15,r18
-	;;
-	srlz.d
-
-	extr.u r19=r13,61,3			// r13 = prev_IA64_KR_CURRENT
-	shl r20=r16,IA64_GRANULE_SHIFT		// r16 = prev_IA64_KR_CURRENT_STACK
-	movl r21=PAGE_KERNEL			// page properties
-	;;
-	mov IA64_KR(CURRENT_STACK)=r16
-	cmp.ne p6,p0=RGN_KERNEL,r19		// new stack is in the kernel region?
-	or r21=r20,r21				// construct PA | page properties
-(p6)	br.spnt 1f				// the dreaded cpu 0 idle task in region 5:(
-	;;
-	mov cr.itir=r18
-	mov cr.ifa=r13
-	mov r20=IA64_TR_CURRENT_STACK
-	;;
-	itr.d dtr[r20]=r21
-	;;
-	srlz.d
-1:
-
-	br.sptk b0
-
-//EndStub//////////////////////////////////////////////////////////////////////
-
-
-//++
-// Name:
-//	ia64_new_stack()
-//
-// Stub Description:
-//
-//	Switch to the MCA/INIT stack.
-//
-//	r2 contains the return address, r3 contains either
-//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
-//
-//	On entry RBS is still on the original stack, this routine switches RBS
-//	to use the MCA/INIT stack.
-//
-//	On entry, sos->pal_min_state is physical, on exit it is virtual.
-//
-//--
-
-ia64_new_stack:
-	add regs=MCA_PT_REGS_OFFSET, r3
-	add temp2=MCA_SOS_OFFSET+SOS(PAL_MIN_STATE), r3
-	mov b0=r2			// save return address
-	GET_IA64_MCA_DATA(temp1)
-	invala
-	;;
-	add temp2=temp2, temp1		// struct ia64_sal_os_state.pal_min_state on MCA or INIT stack
-	add regs=regs, temp1		// struct pt_regs on MCA or INIT stack
-	;;
-	// Address of minstate area provided by PAL is physical, uncacheable.
-	// Convert to Linux virtual address in region 6 for C code.
-	ld8 ms=[temp2]			// pal_min_state, physical
-	;;
-	dep temp1=-1,ms,62,2		// set region 6
-	mov temp3=IA64_RBS_OFFSET-MCA_PT_REGS_OFFSET
-	;;
-	st8 [temp2]=temp1		// pal_min_state, virtual
-
-	add temp4=temp3, regs		// start of bspstore on new stack
-	;;
-	mov ar.bspstore=temp4		// switch RBS to MCA/INIT stack
-	;;
-	flushrs				// must be first in group
-	br.sptk b0
-
-//EndStub//////////////////////////////////////////////////////////////////////
-
-
-//++
-// Name:
-//	ia64_old_stack()
-//
-// Stub Description:
-//
-//	Switch to the old stack.
-//
-//	r2 contains the return address, r3 contains either
-//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
-//
-//	On entry, pal_min_state is virtual, on exit it is physical.
-//
-//	On entry RBS is on the MCA/INIT stack, this routine switches RBS
-//	back to the previous stack.
-//
-//	The psr is set to all zeroes.  SAL return requires either all zeroes or
-//	just psr.mc set.  Leaving psr.mc off allows INIT to be issued if this
-//	code does not perform correctly.
-//
-//	The dirty registers at the time of the event were flushed to the
-//	MCA/INIT stack in ia64_pt_regs_save().  Restore the dirty registers
-//	before reverting to the previous bspstore.
-//--
-
-ia64_old_stack:
-	add regs=MCA_PT_REGS_OFFSET, r3
-	mov b0=r2			// save return address
-	GET_IA64_MCA_DATA(temp2)
-	LOAD_PHYSICAL(p0,temp1,1f)
-	;;
-	mov cr.ipsr=r0
-	mov cr.ifs=r0
-	mov cr.iip=temp1
-	;;
-	invala
-	rfi
-1:
-
-	add regs=regs, temp2		// struct pt_regs on MCA or INIT stack
-	;;
-	add temp1=PT(LOADRS), regs
-	;;
-	ld8 temp2=[temp1],PT(AR_BSPSTORE)-PT(LOADRS)	// restore loadrs
-	;;
-	ld8 temp3=[temp1],PT(AR_RNAT)-PT(AR_BSPSTORE)	// restore ar.bspstore
-	mov ar.rsc=temp2
-	;;
-	loadrs
-	ld8 temp4=[temp1]		// restore ar.rnat
-	;;
-	mov ar.bspstore=temp3		// back to old stack
-	;;
-	mov ar.rnat=temp4
-	;;
-
-	br.sptk b0
-
-//EndStub//////////////////////////////////////////////////////////////////////
-
-
-//++
-// Name:
-//	ia64_set_kernel_registers()
-//
-// Stub Description:
-//
-//	Set the registers that are required by the C code in order to run on an
-//	MCA/INIT stack.
-//
-//	r2 contains the return address, r3 contains either
-//	IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET.
-//
-//--
-
-ia64_set_kernel_registers:
-	add temp3=MCA_SP_OFFSET, r3
-	mov b0=r2		// save return address
-	GET_IA64_MCA_DATA(temp1)
-	;;
-	add r12=temp1, temp3	// kernel stack pointer on MCA/INIT stack
-	add r13=temp1, r3	// set current to start of MCA/INIT stack
-	add r20=temp1, r3	// physical start of MCA/INIT stack
-	;;
-	DATA_PA_TO_VA(r12,temp2)
-	DATA_PA_TO_VA(r13,temp3)
-	;;
-	mov IA64_KR(CURRENT)=r13
-
-	/* Wire IA64_TR_CURRENT_STACK to the MCA/INIT handler stack.  To avoid
-	 * any dependencies on the algorithm in ia64_switch_to(), just purge
-	 * any existing CURRENT_STACK mapping and insert the new one.
-	 */
-
-	mov r16=IA64_KR(CURRENT_STACK)		// physical granule mapped by IA64_TR_CURRENT_STACK
-	;;
-	shl r16=r16,IA64_GRANULE_SHIFT
-	;;
-	dep r16=-1,r16,61,3			// virtual granule
-	mov r18=IA64_GRANULE_SHIFT<<2		// for cr.itir.ps
-	;;
-	ptr.d r16,r18
-	;;
-	srlz.d
-
-	shr.u r16=r20,IA64_GRANULE_SHIFT	// r20 = physical start of MCA/INIT stack
-	movl r21=PAGE_KERNEL			// page properties
-	;;
-	mov IA64_KR(CURRENT_STACK)=r16
-	or r21=r20,r21				// construct PA | page properties
-	;;
-	mov cr.itir=r18
-	mov cr.ifa=r13
-	mov r20=IA64_TR_CURRENT_STACK
-
-	movl r17=FPSR_DEFAULT
-	;;
-	mov.m ar.fpsr=r17			// set ar.fpsr to kernel default value
-	;;
-	itr.d dtr[r20]=r21
-	;;
-	srlz.d
-
-	br.sptk b0
-
-//EndStub//////////////////////////////////////////////////////////////////////
-
-#undef	ms
-#undef	regs
-#undef	temp1
-#undef	temp2
-#undef	temp3
-#undef	temp4
-
-
-// Support function for mca.c, it is here to avoid using inline asm.  Given the
-// address of an rnat slot, if that address is below the current ar.bspstore
-// then return the contents of that slot, otherwise return the contents of
-// ar.rnat.
-GLOBAL_ENTRY(ia64_get_rnat)
-	alloc r14=ar.pfs,1,0,0,0
-	mov ar.rsc=0
-	;;
-	mov r14=ar.bspstore
-	;;
-	cmp.lt p6,p7=in0,r14
-	;;
-(p6)	ld8 r8=[in0]
-(p7)	mov r8=ar.rnat
-	mov ar.rsc=3
-	br.ret.sptk.many rp
-END(ia64_get_rnat)
-
-
-// void ia64_set_psr_mc(void)
-//
-// Set psr.mc bit to mask MCA/INIT.
-GLOBAL_ENTRY(ia64_set_psr_mc)
-	rsm psr.i | psr.ic		// disable interrupts
-	;;
-	srlz.d
-	;;
-	mov r14 = psr			// get psr{36:35,31:0}
-	movl r15 = 1f
-	;;
-	dep r14 = -1, r14, PSR_MC, 1	// set psr.mc
-	;;
-	dep r14 = -1, r14, PSR_IC, 1	// set psr.ic
-	;;
-	dep r14 = -1, r14, PSR_BN, 1	// keep bank1 in use
-	;;
-	mov cr.ipsr = r14
-	mov cr.ifs = r0
-	mov cr.iip = r15
-	;;
-	rfi
-1:
-	br.ret.sptk.many rp
-END(ia64_set_psr_mc)
diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S
deleted file mode 100644
index 4428f57bee73565b34b3e043ad5391e31cd783e6..0000000000000000000000000000000000000000
--- a/arch/ia64/kernel/mca_drv_asm.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * File:        mca_drv_asm.S
- * Purpose:     Assembly portion of Generic MCA handling
- *
- * Copyright (C) 2004 FUJITSU LIMITED
- * Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
- */
-#include <linux/threads.h>
-
-#include <asm/asmmacro.h>
-#include <asm/processor.h>
-#include <asm/ptrace.h>
-
-GLOBAL_ENTRY(mca_handler_bhhook)
-	invala				// clear RSE ?
-	cover
-	;;
-	clrrrb
-	;;						
-	alloc	r16=ar.pfs,0,2,3,0	// make a new frame
-	mov	ar.rsc=0
-	mov	r13=IA64_KR(CURRENT)	// current task pointer
-	;;
-	mov	r2=r13
-	;;
-	addl	r22=IA64_RBS_OFFSET,r2
-	;;
-	mov	ar.bspstore=r22
-	addl	sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2
-	;;
-	adds	r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
-	;;
-	st1	[r2]=r0		// clear current->thread.on_ustack flag
-	mov	loc0=r16
-	movl	loc1=mca_handler_bh	// recovery C function
-	;;
-	mov	out0=r8			// poisoned address
-	mov	out1=r9			// iip
-	mov	out2=r10		// psr
-	mov	b6=loc1
-	;;
-	mov	loc1=rp
-	ssm	psr.ic
-	;;
-	srlz.i
-	;;
-	ssm	psr.i
-	br.call.sptk.many rp=b6		// does not return ...
-	;;
-	mov	ar.pfs=loc0
-	mov 	rp=loc1
-	;;
-	mov	r8=r0
-	br.ret.sptk.many rp
-END(mca_handler_bhhook)
diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
deleted file mode 100644
index d3e22c018b68acd9a8d02e41214e25835ec1c0e8..0000000000000000000000000000000000000000
--- a/arch/ia64/kernel/pal.S
+++ /dev/null
@@ -1,306 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PAL Firmware support
- * IA-64 Processor Programmers Reference Vol 2
- *
- * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
- * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
- * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
- *	David Mosberger <davidm@hpl.hp.com>
- *	Stephane Eranian <eranian@hpl.hp.com>
- *
- * 05/22/2000 eranian Added support for stacked register calls
- * 05/24/2000 eranian Added support for physical mode static calls
- */
-
-#include <asm/asmmacro.h>
-#include <asm/processor.h>
-#include <asm/export.h>
-
-	.data
-pal_entry_point:
-	data8 ia64_pal_default_handler
-	.text
-
-/*
- * Set the PAL entry point address.  This could be written in C code, but we
- * do it here to keep it all in one module (besides, it's so trivial that it's
- * not a big deal).
- *
- * in0		Address of the PAL entry point (text address, NOT a function
- *		descriptor).
- */
-GLOBAL_ENTRY(ia64_pal_handler_init)
-	alloc r3=ar.pfs,1,0,0,0
-	movl r2=pal_entry_point
-	;;
-	st8 [r2]=in0
-	br.ret.sptk.many rp
-END(ia64_pal_handler_init)
-
-/*
- * Default PAL call handler.  This needs to be coded in assembly because it
- * uses the static calling convention, i.e., the RSE may not be used and
- * calls are done via "br.cond" (not "br.call").
- */
-GLOBAL_ENTRY(ia64_pal_default_handler)
-	mov r8=-1
-	br.cond.sptk.many rp
-END(ia64_pal_default_handler)
-
-/*
- * Make a PAL call using the static calling convention.
- *
- * in0         Index of PAL service
- * in1 - in3   Remaining PAL arguments
- */
-GLOBAL_ENTRY(ia64_pal_call_static)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
-	alloc loc1 = ar.pfs,4,5,0,0
-	movl loc2 = pal_entry_point
-1:	{
-	  mov r28 = in0
-	  mov r29 = in1
-	  mov r8 = ip
-	}
-	;;
-	ld8 loc2 = [loc2]		// loc2 <- entry point
-	adds r8 = 1f-1b,r8
-	mov loc4=ar.rsc			// save RSE configuration
-	;;
-	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
-	mov loc3 = psr
-	mov loc0 = rp
-	.body
-	mov r30 = in2
-
-	mov r31 = in3
-	mov b7 = loc2
-
-	rsm psr.i
-	;;
-	mov rp = r8
-	br.cond.sptk.many b7
-1:	mov psr.l = loc3
-	mov ar.rsc = loc4		// restore RSE configuration
-	mov ar.pfs = loc1
-	mov rp = loc0
-	;;
-	srlz.d				// seralize restoration of psr.l
-	br.ret.sptk.many b0
-END(ia64_pal_call_static)
-EXPORT_SYMBOL(ia64_pal_call_static)
-
-/*
- * Make a PAL call using the stacked registers calling convention.
- *
- * Inputs:
- *	in0         Index of PAL service
- *	in2 - in3   Remaining PAL arguments
- */
-GLOBAL_ENTRY(ia64_pal_call_stacked)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
-	alloc loc1 = ar.pfs,4,4,4,0
-	movl loc2 = pal_entry_point
-
-	mov r28  = in0			// Index MUST be copied to r28
-	mov out0 = in0			// AND in0 of PAL function
-	mov loc0 = rp
-	.body
-	;;
-	ld8 loc2 = [loc2]		// loc2 <- entry point
-	mov out1 = in1
-	mov out2 = in2
-	mov out3 = in3
-	mov loc3 = psr
-	;;
-	rsm psr.i
-	mov b7 = loc2
-	;;
-	br.call.sptk.many rp=b7		// now make the call
-.ret0:	mov psr.l  = loc3
-	mov ar.pfs = loc1
-	mov rp = loc0
-	;;
-	srlz.d				// serialize restoration of psr.l
-	br.ret.sptk.many b0
-END(ia64_pal_call_stacked)
-EXPORT_SYMBOL(ia64_pal_call_stacked)
-
-/*
- * Make a physical mode PAL call using the static registers calling convention.
- *
- * Inputs:
- *	in0         Index of PAL service
- *	in2 - in3   Remaining PAL arguments
- *
- * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel.
- * So we don't need to clear them.
- */
-#define PAL_PSR_BITS_TO_CLEAR						      \
-	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT  | IA64_PSR_DB | IA64_PSR_RT |\
-	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |	      \
-	 IA64_PSR_DFL | IA64_PSR_DFH)
-
-#define PAL_PSR_BITS_TO_SET						      \
-	(IA64_PSR_BN)
-
-
-GLOBAL_ENTRY(ia64_pal_call_phys_static)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
-	alloc loc1 = ar.pfs,4,7,0,0
-	movl loc2 = pal_entry_point
-1:	{
-	  mov r28  = in0		// copy procedure index
-	  mov r8   = ip			// save ip to compute branch
-	  mov loc0 = rp			// save rp
-	}
-	.body
-	;;
-	ld8 loc2 = [loc2]		// loc2 <- entry point
-	mov r29  = in1			// first argument
-	mov r30  = in2			// copy arg2
-	mov r31  = in3			// copy arg3
-	;;
-	mov loc3 = psr			// save psr
-	adds r8  = 1f-1b,r8		// calculate return address for call
-	;;
-	mov loc4=ar.rsc			// save RSE configuration
-	dep.z loc2=loc2,0,61		// convert pal entry point to physical
-	tpa r8=r8			// convert rp to physical
-	;;
-	mov b7 = loc2			// install target to branch reg
-	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
-	movl r16=PAL_PSR_BITS_TO_CLEAR
-	movl r17=PAL_PSR_BITS_TO_SET
-	;;
-	or loc3=loc3,r17		// add in psr the bits to set
-	;;
-	andcm r16=loc3,r16		// removes bits to clear from psr
-	br.call.sptk.many rp=ia64_switch_mode_phys
-	mov rp = r8			// install return address (physical)
-	mov loc5 = r19
-	mov loc6 = r20
-	br.cond.sptk.many b7
-1:
-	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
-	mov r16=loc3			// r16= original psr
-	mov r19=loc5
-	mov r20=loc6
-	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
-	mov psr.l = loc3		// restore init PSR
-
-	mov ar.pfs = loc1
-	mov rp = loc0
-	;;
-	mov ar.rsc=loc4			// restore RSE configuration
-	srlz.d				// seralize restoration of psr.l
-	br.ret.sptk.many b0
-END(ia64_pal_call_phys_static)
-EXPORT_SYMBOL(ia64_pal_call_phys_static)
-
-/*
- * Make a PAL call using the stacked registers in physical mode.
- *
- * Inputs:
- *	in0         Index of PAL service
- *	in2 - in3   Remaining PAL arguments
- */
-GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
-	alloc	loc1 = ar.pfs,5,7,4,0
-	movl	loc2 = pal_entry_point
-1:	{
-	  mov r28  = in0		// copy procedure index
-	  mov loc0 = rp			// save rp
-	}
-	.body
-	;;
-	ld8 loc2 = [loc2]		// loc2 <- entry point
-	mov loc3 = psr			// save psr
-	;;
-	mov loc4=ar.rsc			// save RSE configuration
-	dep.z loc2=loc2,0,61		// convert pal entry point to physical
-	;;
-	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
-	movl r16=PAL_PSR_BITS_TO_CLEAR
-	movl r17=PAL_PSR_BITS_TO_SET
-	;;
-	or loc3=loc3,r17		// add in psr the bits to set
-	mov b7 = loc2			// install target to branch reg
-	;;
-	andcm r16=loc3,r16		// removes bits to clear from psr
-	br.call.sptk.many rp=ia64_switch_mode_phys
-
-	mov out0 = in0			// first argument
-	mov out1 = in1			// copy arg2
-	mov out2 = in2			// copy arg3
-	mov out3 = in3			// copy arg3
-	mov loc5 = r19
-	mov loc6 = r20
-
-	br.call.sptk.many rp=b7		// now make the call
-
-	mov ar.rsc=0			// put RSE in enforced lazy, LE mode
-	mov r16=loc3			// r16= original psr
-	mov r19=loc5
-	mov r20=loc6
-	br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
-
-	mov psr.l  = loc3		// restore init PSR
-	mov ar.pfs = loc1
-	mov rp = loc0
-	;;
-	mov ar.rsc=loc4			// restore RSE configuration
-	srlz.d				// seralize restoration of psr.l
-	br.ret.sptk.many b0
-END(ia64_pal_call_phys_stacked)
-EXPORT_SYMBOL(ia64_pal_call_phys_stacked)
-
-/*
- * Save scratch fp scratch regs which aren't saved in pt_regs already
- * (fp10-fp15).
- *
- * NOTE: We need to do this since firmware (SAL and PAL) may use any of the
- * scratch regs fp-low partition.
- *
- * Inputs:
- *      in0	Address of stack storage for fp regs
- */
-GLOBAL_ENTRY(ia64_save_scratch_fpregs)
-	alloc r3=ar.pfs,1,0,0,0
-	add r2=16,in0
-	;;
-	stf.spill [in0] = f10,32
-	stf.spill [r2]  = f11,32
-	;;
-	stf.spill [in0] = f12,32
-	stf.spill [r2]  = f13,32
-	;;
-	stf.spill [in0] = f14,32
-	stf.spill [r2]  = f15,32
-	br.ret.sptk.many rp
-END(ia64_save_scratch_fpregs)
-EXPORT_SYMBOL(ia64_save_scratch_fpregs)
-
-/*
- * Load scratch fp scratch regs (fp10-fp15)
- *
- * Inputs:
- *      in0	Address of stack storage for fp regs
- */
-GLOBAL_ENTRY(ia64_load_scratch_fpregs)
-	alloc r3=ar.pfs,1,0,0,0
-	add r2=16,in0
-	;;
-	ldf.fill  f10 = [in0],32
-	ldf.fill  f11 = [r2],32
-	;;
-	ldf.fill  f12 = [in0],32
-	ldf.fill  f13 = [r2],32
-	;;
-	ldf.fill  f14 = [in0],32
-	ldf.fill  f15 = [r2],32
-	br.ret.sptk.many rp
-END(ia64_load_scratch_fpregs)
-EXPORT_SYMBOL(ia64_load_scratch_fpregs)
diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S
deleted file mode 100644
index 7124fe7bec7c322ef879829342280dbdf6ff442c..0000000000000000000000000000000000000000
--- a/arch/ia64/kernel/relocate_kernel.S
+++ /dev/null
@@ -1,323 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/ia64/kernel/relocate_kernel.S
- *
- * Relocate kexec'able kernel and start it
- *
- * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
- * Copyright (C) 2005 Khalid Aziz  <khalid.aziz@hp.com>
- * Copyright (C) 2005 Intel Corp,  Zou Nan hai <nanhai.zou@intel.com>
- */
-#include <asm/asmmacro.h>
-#include <asm/kregs.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/mca_asm.h>
-
-       /* Must be relocatable PIC code callable as a C function
-        */
-GLOBAL_ENTRY(relocate_new_kernel)
-	.prologue
-	alloc r31=ar.pfs,4,0,0,0
-        .body
-.reloc_entry:
-{
-	rsm psr.i| psr.ic
-	mov r2=ip
-}
-	;;
-{
-        flushrs                         // must be first insn in group
-        srlz.i
-}
-	;;
-	dep r2=0,r2,61,3		//to physical address
-	;;
-	//first switch to physical mode
-	add r3=1f-.reloc_entry, r2
-	movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC
-	mov ar.rsc=0	          	// put RSE in enforced lazy mode
-	;;
-	add sp=(memory_stack_end - 16 - .reloc_entry),r2
-	add r8=(register_stack - .reloc_entry),r2
-	;;
-	mov r18=ar.rnat
-	mov ar.bspstore=r8
-	;;
-        mov cr.ipsr=r16
-        mov cr.iip=r3
-        mov cr.ifs=r0
-	srlz.i
-	;;
-	mov ar.rnat=r18
-	rfi				// note: this unmask MCA/INIT (psr.mc)
-	;;
-1:
-	//physical mode code begin
-	mov b6=in1
-	dep r28=0,in2,61,3	//to physical address
-
-	// purge all TC entries
-#define O(member)       IA64_CPUINFO_##member##_OFFSET
-        GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2
-        ;;
-        addl r17=O(PTCE_STRIDE),r2
-        addl r2=O(PTCE_BASE),r2
-        ;;
-        ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));;    	// r18=ptce_base
-        ld4 r19=[r2],4                                  // r19=ptce_count[0]
-        ld4 r21=[r17],4                                 // r21=ptce_stride[0]
-        ;;
-        ld4 r20=[r2]                                    // r20=ptce_count[1]
-        ld4 r22=[r17]                                   // r22=ptce_stride[1]
-        mov r24=r0
-        ;;
-        adds r20=-1,r20
-        ;;
-#undef O
-2:
-        cmp.ltu p6,p7=r24,r19
-(p7)    br.cond.dpnt.few 4f
-        mov ar.lc=r20
-3:
-        ptc.e r18
-        ;;
-        add r18=r22,r18
-        br.cloop.sptk.few 3b
-        ;;
-        add r18=r21,r18
-        add r24=1,r24
-        ;;
-        br.sptk.few 2b
-4:
-        srlz.i
-        ;;
-	// purge TR entry for kernel text and data
-        movl r16=KERNEL_START
-        mov r18=KERNEL_TR_PAGE_SHIFT<<2
-        ;;
-        ptr.i r16, r18
-        ptr.d r16, r18
-        ;;
-        srlz.i
-        ;;
-
-        // purge TR entry for pal code
-        mov r16=in3
-        mov r18=IA64_GRANULE_SHIFT<<2
-        ;;
-        ptr.i r16,r18
-        ;;
-        srlz.i
-	;;
-
-        // purge TR entry for stack
-        mov r16=IA64_KR(CURRENT_STACK)
-        ;;
-        shl r16=r16,IA64_GRANULE_SHIFT
-        movl r19=PAGE_OFFSET
-        ;;
-        add r16=r19,r16
-        mov r18=IA64_GRANULE_SHIFT<<2
-        ;;
-        ptr.d r16,r18
-        ;;
-        srlz.i
-	;;
-
-	//copy segments
-	movl r16=PAGE_MASK
-        mov  r30=in0                    // in0 is page_list
-        br.sptk.few .dest_page
-	;;
-.loop:
-	ld8  r30=[in0], 8;;
-.dest_page:
-	tbit.z p0, p6=r30, 0;;    	// 0x1 dest page
-(p6)	and r17=r30, r16
-(p6)	br.cond.sptk.few .loop;;
-
-	tbit.z p0, p6=r30, 1;;		// 0x2 indirect page
-(p6)	and in0=r30, r16
-(p6)	br.cond.sptk.few .loop;;
-
-	tbit.z p0, p6=r30, 2;;		// 0x4 end flag
-(p6)	br.cond.sptk.few .end_loop;;
-
-	tbit.z p6, p0=r30, 3;;		// 0x8 source page
-(p6)	br.cond.sptk.few .loop
-
-	and r18=r30, r16
-
-	// simple copy page, may optimize later
-	movl r14=PAGE_SIZE/8 - 1;;
-	mov ar.lc=r14;;
-1:
-	ld8 r14=[r18], 8;;
-	st8 [r17]=r14;;
-	fc.i r17
-	add r17=8, r17
-	br.ctop.sptk.few 1b
-	br.sptk.few .loop
-	;;
-
-.end_loop:
-	sync.i			// for fc.i
-	;;
-	srlz.i
-	;;
-	srlz.d
-	;;
-	br.call.sptk.many b0=b6;;
-
-.align  32
-memory_stack:
-	.fill           8192, 1, 0
-memory_stack_end:
-register_stack:
-	.fill           8192, 1, 0
-register_stack_end:
-relocate_new_kernel_end:
-END(relocate_new_kernel)
-
-.global relocate_new_kernel_size
-relocate_new_kernel_size:
-	data8	relocate_new_kernel_end - relocate_new_kernel
-
-GLOBAL_ENTRY(ia64_dump_cpu_regs)
-        .prologue
-        alloc loc0=ar.pfs,1,2,0,0
-        .body
-        mov     ar.rsc=0                // put RSE in enforced lazy mode
-        add     loc1=4*8, in0           // save r4 and r5 first
-        ;;
-{
-        flushrs                         // flush dirty regs to backing store
-        srlz.i
-}
-        st8 [loc1]=r4, 8
-        ;;
-        st8 [loc1]=r5, 8
-        ;;
-        add loc1=32*8, in0
-        mov r4=ar.rnat
-        ;;
-        st8 [in0]=r0, 8			// r0
-        st8 [loc1]=r4, 8		// rnat
-        mov r5=pr
-        ;;
-        st8 [in0]=r1, 8			// r1
-        st8 [loc1]=r5, 8		// pr
-        mov r4=b0
-        ;;
-        st8 [in0]=r2, 8			// r2
-        st8 [loc1]=r4, 8		// b0
-        mov r5=b1;
-        ;;
-        st8 [in0]=r3, 24		// r3
-        st8 [loc1]=r5, 8		// b1
-        mov r4=b2
-        ;;
-        st8 [in0]=r6, 8			// r6
-        st8 [loc1]=r4, 8		// b2
-	mov r5=b3
-        ;;
-        st8 [in0]=r7, 8			// r7
-        st8 [loc1]=r5, 8		// b3
-        mov r4=b4
-        ;;
-        st8 [in0]=r8, 8			// r8
-        st8 [loc1]=r4, 8		// b4
-        mov r5=b5
-        ;;
-        st8 [in0]=r9, 8			// r9
-        st8 [loc1]=r5, 8		// b5
-        mov r4=b6
-        ;;
-        st8 [in0]=r10, 8		// r10
-        st8 [loc1]=r5, 8		// b6
-        mov r5=b7
-        ;;
-        st8 [in0]=r11, 8		// r11
-        st8 [loc1]=r5, 8		// b7
-        mov r4=b0
-        ;;
-        st8 [in0]=r12, 8		// r12
-        st8 [loc1]=r4, 8		// ip
-        mov r5=loc0
-	;;
-        st8 [in0]=r13, 8		// r13
-        extr.u r5=r5, 0, 38		// ar.pfs.pfm
-	mov r4=r0			// user mask
-        ;;
-        st8 [in0]=r14, 8		// r14
-        st8 [loc1]=r5, 8		// cfm
-        ;;
-        st8 [in0]=r15, 8		// r15
-        st8 [loc1]=r4, 8        	// user mask
-	mov r5=ar.rsc
-        ;;
-        st8 [in0]=r16, 8		// r16
-        st8 [loc1]=r5, 8        	// ar.rsc
-        mov r4=ar.bsp
-        ;;
-        st8 [in0]=r17, 8		// r17
-        st8 [loc1]=r4, 8        	// ar.bsp
-        mov r5=ar.bspstore
-        ;;
-        st8 [in0]=r18, 8		// r18
-        st8 [loc1]=r5, 8        	// ar.bspstore
-        mov r4=ar.rnat
-        ;;
-        st8 [in0]=r19, 8		// r19
-        st8 [loc1]=r4, 8        	// ar.rnat
-        mov r5=ar.ccv
-        ;;
-        st8 [in0]=r20, 8		// r20
-	st8 [loc1]=r5, 8        	// ar.ccv
-        mov r4=ar.unat
-        ;;
-        st8 [in0]=r21, 8		// r21
-        st8 [loc1]=r4, 8        	// ar.unat
-        mov r5 = ar.fpsr
-        ;;
-        st8 [in0]=r22, 8		// r22
-        st8 [loc1]=r5, 8        	// ar.fpsr
-        mov r4 = ar.unat
-        ;;
-        st8 [in0]=r23, 8		// r23
-        st8 [loc1]=r4, 8        	// unat
-        mov r5 = ar.fpsr
-        ;;
-        st8 [in0]=r24, 8		// r24
-        st8 [loc1]=r5, 8        	// fpsr
-        mov r4 = ar.pfs
-        ;;
-        st8 [in0]=r25, 8		// r25
-        st8 [loc1]=r4, 8        	// ar.pfs
-        mov r5 = ar.lc
-        ;;
-        st8 [in0]=r26, 8		// r26
-        st8 [loc1]=r5, 8        	// ar.lc
-        mov r4 = ar.ec
-        ;;
-        st8 [in0]=r27, 8		// r27
-        st8 [loc1]=r4, 8        	// ar.ec
-        mov r5 = ar.csd
-        ;;
-        st8 [in0]=r28, 8		// r28
-        st8 [loc1]=r5, 8        	// ar.csd
-        mov r4 = ar.ssd
-        ;;
-        st8 [in0]=r29, 8		// r29
-        st8 [loc1]=r4, 8        	// ar.ssd
-        ;;
-        st8 [in0]=r30, 8		// r30
-        ;;
-	st8 [in0]=r31, 8		// r31
-        mov ar.pfs=loc0
-        ;;
-        br.ret.sptk.many rp
-END(ia64_dump_cpu_regs)
-
-
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
deleted file mode 100644
index d9d4e21107cdbe8cee21c863ff0e7f585eaa171a..0000000000000000000000000000000000000000
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,224 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#include <asm/cache.h>
-#include <asm/ptrace.h>
-#include <asm/pgtable.h>
-#include <asm/thread_info.h>
-
-#include <asm-generic/vmlinux.lds.h>
-
-OUTPUT_FORMAT("elf64-ia64-little")
-OUTPUT_ARCH(ia64)
-ENTRY(phys_start)
-jiffies = jiffies_64;
-
-PHDRS {
-	code   PT_LOAD;
-	percpu PT_LOAD;
-	data   PT_LOAD;
-	note   PT_NOTE;
-	unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */
-}
-
-SECTIONS {
-	/*
-	 * unwind exit sections must be discarded before
-	 * the rest of the sections get included.
-	 */
-	/DISCARD/ : {
-		*(.IA_64.unwind.exit.text)
-		*(.IA_64.unwind_info.exit.text)
-		*(.comment)
-		*(.note)
-	}
-
-	v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */
-	phys_start = _start - LOAD_OFFSET;
-
-	code : {
-	} :code
-	. = KERNEL_START;
-
-	_text = .;
-	_stext = .;
-
-	.text : AT(ADDR(.text) - LOAD_OFFSET) {
-		__start_ivt_text = .;
-		*(.text..ivt)
-		__end_ivt_text = .;
-		TEXT_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		*(.gnu.linkonce.t*)
-	}
-
-	.text2 : AT(ADDR(.text2) - LOAD_OFFSET)	{
-		*(.text2)
-	}
-
-#ifdef CONFIG_SMP
-	.text..lock : AT(ADDR(.text..lock) - LOAD_OFFSET) {
-		*(.text..lock)
-	}
-#endif
-	_etext = .;
-
-	/*
-	 * Read-only data
-	 */
-	NOTES :code :note       /* put .notes in text and mark in PT_NOTE  */
-	code_continues : {
-	} : code               /* switch back to regular program...  */
-
-	EXCEPTION_TABLE(16)
-
-	/* MCA table */
-	. = ALIGN(16);
-	__mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET) {
-		__start___mca_table = .;
-		*(__mca_table)
-		__stop___mca_table = .;
-	}
-
-	.data..patch.phys_stack_reg : AT(ADDR(.data..patch.phys_stack_reg) - LOAD_OFFSET) {
-		__start___phys_stack_reg_patchlist = .;
-		*(.data..patch.phys_stack_reg)
-		__end___phys_stack_reg_patchlist = .;
-	}
-
-	/*
-	 * Global data
-	 */
-	_data = .;
-
-	/* Unwind info & table: */
-	. = ALIGN(8);
-	.IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) {
-		*(.IA_64.unwind_info*)
-	}
-	.IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET) {
-		__start_unwind = .;
-		*(.IA_64.unwind*)
-		__end_unwind = .;
-	} :code :unwind
-	code_continues2 : {
-	} : code
-
-	RODATA
-
-	.opd : AT(ADDR(.opd) - LOAD_OFFSET) {
-		__start_opd = .;
-		*(.opd)
-		__end_opd = .;
-	}
-
-	/*
-	 * Initialization code and data:
-	 */
-	. = ALIGN(PAGE_SIZE);
-	__init_begin = .;
-
-	INIT_TEXT_SECTION(PAGE_SIZE)
-	INIT_DATA_SECTION(16)
-
-	.data..patch.vtop : AT(ADDR(.data..patch.vtop) - LOAD_OFFSET) {
-		__start___vtop_patchlist = .;
-		*(.data..patch.vtop)
-		__end___vtop_patchlist = .;
-	}
-
-	.data..patch.rse : AT(ADDR(.data..patch.rse) - LOAD_OFFSET) {
-		__start___rse_patchlist = .;
-		*(.data..patch.rse)
-		__end___rse_patchlist = .;
-	}
-
-	.data..patch.mckinley_e9 : AT(ADDR(.data..patch.mckinley_e9) - LOAD_OFFSET) {
-		__start___mckinley_e9_bundles = .;
-		*(.data..patch.mckinley_e9)
-		__end___mckinley_e9_bundles = .;
-	}
-
-#ifdef	CONFIG_SMP
-	. = ALIGN(PERCPU_PAGE_SIZE);
-	__cpu0_per_cpu = .;
-	. = . + PERCPU_PAGE_SIZE;   /* cpu0 per-cpu space */
-#endif
-
-	. = ALIGN(PAGE_SIZE);
-	__init_end = .;
-
-	.data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) {
-		PAGE_ALIGNED_DATA(PAGE_SIZE)
-		. = ALIGN(PAGE_SIZE);
-		__start_gate_section = .;
-		*(.data..gate)
-		__stop_gate_section = .;
-	}
-	/*
-	 * make sure the gate page doesn't expose
-	 * kernel data
-	 */
-	. = ALIGN(PAGE_SIZE);
-
-	/* Per-cpu data: */
-	. = ALIGN(PERCPU_PAGE_SIZE);
-	PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu)
-	__phys_per_cpu_start = __per_cpu_load;
-	/*
-	 * ensure percpu data fits
-	 * into percpu page size
-	 */
-	. = __phys_per_cpu_start + PERCPU_PAGE_SIZE;
-
-	data : {
-	} :data
-	.data : AT(ADDR(.data) - LOAD_OFFSET) {
-		_sdata  =  .;
-		INIT_TASK_DATA(PAGE_SIZE)
-		CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES)
-		READ_MOSTLY_DATA(SMP_CACHE_BYTES)
-		DATA_DATA
-		*(.data1)
-		*(.gnu.linkonce.d*)
-		CONSTRUCTORS
-	}
-
-	BUG_TABLE
-
-	. = ALIGN(16);	/* gp must be 16-byte aligned for exc. table */
-	.got : AT(ADDR(.got) - LOAD_OFFSET) {
-		*(.got.plt)
-		*(.got)
-	}
-	__gp = ADDR(.got) + 0x200000;
-
-	/*
-	 * We want the small data sections together,
-	 * so single-instruction offsets can access
-	 * them all, and initialized data all before
-	 * uninitialized, so we can shorten the
-	 * on-disk segment size.
-	 */
-	.sdata : AT(ADDR(.sdata) - LOAD_OFFSET) {
-		*(.sdata)
-		*(.sdata1)
-		*(.srdata)
-	}
-	_edata  =  .;
-
-	BSS_SECTION(0, 0, 0)
-
-	_end = .;
-
-	code : {
-	} :code
-
-	STABS_DEBUG
-	DWARF_DEBUG
-
-	/* Default discards */
-	DISCARDS
-}
diff --git a/arch/ia64/lib/carta_random.S b/arch/ia64/lib/carta_random.S
deleted file mode 100644
index 1a4a639dc42f05a73fff3c29b5ad26481befd91c..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/carta_random.S
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Fast, simple, yet decent quality random number generator based on
- * a paper by David G. Carta ("Two Fast Implementations of the
- * `Minimal Standard' Random Number Generator," Communications of the
- * ACM, January, 1990).
- *
- * Copyright (C) 2002 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- */
-
-#include <asm/asmmacro.h>
-
-#define a	r2
-#define m	r3
-#define lo	r8
-#define hi	r9
-#define t0	r16
-#define t1	r17
-#define	seed	r32
-
-GLOBAL_ENTRY(carta_random32)
-	movl	a = (16807 << 16) | 16807
-	;;
-	pmpyshr2.u t0 = a, seed, 0
-	pmpyshr2.u t1 = a, seed, 16
-	;;
-	unpack2.l t0 = t1, t0
-	dep	m = -1, r0, 0, 31
-	;;
-	zxt4	lo = t0
-	shr.u	hi = t0, 32
-	;;
-	dep	t0 = 0, hi, 15, 49	// t0 = (hi & 0x7fff)
-	;;
-	shl	t0 = t0, 16		// t0 = (hi & 0x7fff) << 16
-	shr	t1 = hi, 15		// t1 = (hi >> 15)
-	;;
-	add	lo = lo, t0
-	;;
-	cmp.gtu	p6, p0 = lo, m
-	;;
-(p6)	and	lo = lo, m
-	;;
-(p6)	add	lo = 1, lo
-	;;
-	add	lo = lo, t1
-	;;
-	cmp.gtu p6, p0 = lo, m
-	;;
-(p6)	and	lo = lo, m
-	;;
-(p6)	add	lo = 1, lo
-	br.ret.sptk.many rp
-END(carta_random32)
diff --git a/arch/ia64/lib/clear_page.S b/arch/ia64/lib/clear_page.S
deleted file mode 100644
index 65b75085c8f46c231586745f22dc14a6aa13c11a..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/clear_page.S
+++ /dev/null
@@ -1,79 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 1999-2002 Hewlett-Packard Co
- *	Stephane Eranian <eranian@hpl.hp.com>
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
- *
- * 1/06/01 davidm	Tuned for Itanium.
- * 2/12/02 kchen	Tuned for both Itanium and McKinley
- * 3/08/02 davidm	Some more tweaking
- */
-
-#include <asm/asmmacro.h>
-#include <asm/page.h>
-#include <asm/export.h>
-
-#ifdef CONFIG_ITANIUM
-# define L3_LINE_SIZE	64	// Itanium L3 line size
-# define PREFETCH_LINES	9	// magic number
-#else
-# define L3_LINE_SIZE	128	// McKinley L3 line size
-# define PREFETCH_LINES	12	// magic number
-#endif
-
-#define saved_lc	r2
-#define dst_fetch	r3
-#define dst1		r8
-#define dst2		r9
-#define dst3		r10
-#define dst4		r11
-
-#define dst_last	r31
-
-GLOBAL_ENTRY(clear_page)
-	.prologue
-	.regstk 1,0,0,0
-	mov r16 = PAGE_SIZE/L3_LINE_SIZE-1	// main loop count, -1=repeat/until
-	.save ar.lc, saved_lc
-	mov saved_lc = ar.lc
-
-	.body
-	mov ar.lc = (PREFETCH_LINES - 1)
-	mov dst_fetch = in0
-	adds dst1 = 16, in0
-	adds dst2 = 32, in0
-	;;
-.fetch:	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
-	adds dst3 = 48, in0		// executing this multiple times is harmless
-	br.cloop.sptk.few .fetch
-	;;
-	addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch
-	mov ar.lc = r16			// one L3 line per iteration
-	adds dst4 = 64, in0
-	;;
-#ifdef CONFIG_ITANIUM
-	// Optimized for Itanium
-1:	stf.spill.nta [dst1] = f0, 64
-	stf.spill.nta [dst2] = f0, 64
-	cmp.lt p8,p0=dst_fetch, dst_last
-	;;
-#else
-	// Optimized for McKinley
-1:	stf.spill.nta [dst1] = f0, 64
-	stf.spill.nta [dst2] = f0, 64
-	stf.spill.nta [dst3] = f0, 64
-	stf.spill.nta [dst4] = f0, 128
-	cmp.lt p8,p0=dst_fetch, dst_last
-	;;
-	stf.spill.nta [dst1] = f0, 64
-	stf.spill.nta [dst2] = f0, 64
-#endif
-	stf.spill.nta [dst3] = f0, 64
-(p8)	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
-	br.cloop.sptk.few 1b
-	;;
-	mov ar.lc = saved_lc		// restore lc
-	br.ret.sptk.many rp
-END(clear_page)
-EXPORT_SYMBOL(clear_page)
diff --git a/arch/ia64/lib/clear_user.S b/arch/ia64/lib/clear_user.S
deleted file mode 100644
index a28f39d349ebeabb784962f18f3df8df0ae220ff..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/clear_user.S
+++ /dev/null
@@ -1,212 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This routine clears to zero a linear memory buffer in user space.
- *
- * Inputs:
- *	in0:	address of buffer
- *	in1:	length of buffer in bytes
- * Outputs:
- *	r8:	number of bytes that didn't get cleared due to a fault
- *
- * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co
- *	Stephane Eranian <eranian@hpl.hp.com>
- */
-
-#include <asm/asmmacro.h>
-#include <asm/export.h>
-
-//
-// arguments
-//
-#define buf		r32
-#define len		r33
-
-//
-// local registers
-//
-#define cnt		r16
-#define buf2		r17
-#define saved_lc	r18
-#define saved_pfs	r19
-#define tmp		r20
-#define len2		r21
-#define len3		r22
-
-//
-// Theory of operations:
-//	- we check whether or not the buffer is small, i.e., less than 17
-//	  in which case we do the byte by byte loop.
-//
-//	- Otherwise we go progressively from 1 byte store to 8byte store in
-//	  the head part, the body is a 16byte store loop and we finish we the
-//	  tail for the last 15 bytes.
-//	  The good point about this breakdown is that the long buffer handling
-//	  contains only 2 branches.
-//
-//	The reason for not using shifting & masking for both the head and the
-//	tail is to stay semantically correct. This routine is not supposed
-//	to write bytes outside of the buffer. While most of the time this would
-//	be ok, we can't tolerate a mistake. A classical example is the case
-//	of multithreaded code were to the extra bytes touched is actually owned
-//	by another thread which runs concurrently to ours. Another, less likely,
-//	example is with device drivers where reading an I/O mapped location may
-//	have side effects (same thing for writing).
-//
-
-GLOBAL_ENTRY(__do_clear_user)
-	.prologue
-	.save ar.pfs, saved_pfs
-	alloc	saved_pfs=ar.pfs,2,0,0,0
-	cmp.eq p6,p0=r0,len		// check for zero length
-	.save ar.lc, saved_lc
-	mov saved_lc=ar.lc		// preserve ar.lc (slow)
-	.body
-	;;				// avoid WAW on CFM
-	adds tmp=-1,len			// br.ctop is repeat/until
-	mov ret0=len			// return value is length at this point
-(p6)	br.ret.spnt.many rp
-	;;
-	cmp.lt p6,p0=16,len		// if len > 16 then long memset
-	mov ar.lc=tmp			// initialize lc for small count
-(p6)	br.cond.dptk .long_do_clear
-	;;				// WAR on ar.lc
-	//
-	// worst case 16 iterations, avg 8 iterations
-	//
-	// We could have played with the predicates to use the extra
-	// M slot for 2 stores/iteration but the cost the initialization
-	// the various counters compared to how long the loop is supposed
-	// to last on average does not make this solution viable.
-	//
-1:
-	EX( .Lexit1, st1 [buf]=r0,1 )
-	adds len=-1,len			// countdown length using len
-	br.cloop.dptk 1b
-	;;				// avoid RAW on ar.lc
-	//
-	// .Lexit4: comes from byte by byte loop
-	//	    len contains bytes left
-.Lexit1:
-	mov ret0=len			// faster than using ar.lc
-	mov ar.lc=saved_lc
-	br.ret.sptk.many rp		// end of short clear_user
-
-
-	//
-	// At this point we know we have more than 16 bytes to copy
-	// so we focus on alignment (no branches required)
-	//
-	// The use of len/len2 for countdown of the number of bytes left
-	// instead of ret0 is due to the fact that the exception code
-	// changes the values of r8.
-	//
-.long_do_clear:
-	tbit.nz p6,p0=buf,0		// odd alignment (for long_do_clear)
-	;;
-	EX( .Lexit3, (p6) st1 [buf]=r0,1 )	// 1-byte aligned
-(p6)	adds len=-1,len;;		// sync because buf is modified
-	tbit.nz p6,p0=buf,1
-	;;
-	EX( .Lexit3, (p6) st2 [buf]=r0,2 )	// 2-byte aligned
-(p6)	adds len=-2,len;;
-	tbit.nz p6,p0=buf,2
-	;;
-	EX( .Lexit3, (p6) st4 [buf]=r0,4 )	// 4-byte aligned
-(p6)	adds len=-4,len;;
-	tbit.nz p6,p0=buf,3
-	;;
-	EX( .Lexit3, (p6) st8 [buf]=r0,8 )	// 8-byte aligned
-(p6)	adds len=-8,len;;
-	shr.u cnt=len,4		// number of 128-bit (2x64bit) words
-	;;
-	cmp.eq p6,p0=r0,cnt
-	adds tmp=-1,cnt
-(p6)	br.cond.dpnt .dotail		// we have less than 16 bytes left
-	;;
-	adds buf2=8,buf			// setup second base pointer
-	mov ar.lc=tmp
-	;;
-
-	//
-	// 16bytes/iteration core loop
-	//
-	// The second store can never generate a fault because
-	// we come into the loop only when we are 16-byte aligned.
-	// This means that if we cross a page then it will always be
-	// in the first store and never in the second.
-	//
-	//
-	// We need to keep track of the remaining length. A possible (optimistic)
-	// way would be to use ar.lc and derive how many byte were left by
-	// doing : left= 16*ar.lc + 16.  this would avoid the addition at
-	// every iteration.
-	// However we need to keep the synchronization point. A template
-	// M;;MB does not exist and thus we can keep the addition at no
-	// extra cycle cost (use a nop slot anyway). It also simplifies the
-	// (unlikely)  error recovery code
-	//
-
-2:	EX(.Lexit3, st8 [buf]=r0,16 )
-	;;				// needed to get len correct when error
-	st8 [buf2]=r0,16
-	adds len=-16,len
-	br.cloop.dptk 2b
-	;;
-	mov ar.lc=saved_lc
-	//
-	// tail correction based on len only
-	//
-	// We alternate the use of len3,len2 to allow parallelism and correct
-	// error handling. We also reuse p6/p7 to return correct value.
-	// The addition of len2/len3 does not cost anything more compared to
-	// the regular memset as we had empty slots.
-	//
-.dotail:
-	mov len2=len			// for parallelization of error handling
-	mov len3=len
-	tbit.nz p6,p0=len,3
-	;;
-	EX( .Lexit2, (p6) st8 [buf]=r0,8 )	// at least 8 bytes
-(p6)	adds len3=-8,len2
-	tbit.nz p7,p6=len,2
-	;;
-	EX( .Lexit2, (p7) st4 [buf]=r0,4 )	// at least 4 bytes
-(p7)	adds len2=-4,len3
-	tbit.nz p6,p7=len,1
-	;;
-	EX( .Lexit2, (p6) st2 [buf]=r0,2 )	// at least 2 bytes
-(p6)	adds len3=-2,len2
-	tbit.nz p7,p6=len,0
-	;;
-	EX( .Lexit2, (p7) st1 [buf]=r0 )	// only 1 byte left
-	mov ret0=r0				// success
-	br.ret.sptk.many rp			// end of most likely path
-
-	//
-	// Outlined error handling code
-	//
-
-	//
-	// .Lexit3: comes from core loop, need restore pr/lc
-	//	    len contains bytes left
-	//
-	//
-	// .Lexit2:
-	//	if p6 -> coming from st8 or st2 : len2 contains what's left
-	//	if p7 -> coming from st4 or st1 : len3 contains what's left
-	// We must restore lc/pr even though might not have been used.
-.Lexit2:
-	.pred.rel "mutex", p6, p7
-(p6)	mov len=len2
-(p7)	mov len=len3
-	;;
-	//
-	// .Lexit4: comes from head, need not restore pr/lc
-	//	    len contains bytes left
-	//
-.Lexit3:
-	mov ret0=len
-	mov ar.lc=saved_lc
-	br.ret.sptk.many rp
-END(__do_clear_user)
-EXPORT_SYMBOL(__do_clear_user)
diff --git a/arch/ia64/lib/copy_page.S b/arch/ia64/lib/copy_page.S
deleted file mode 100644
index 176f857c522e8a9d620026db1903c0e36206439a..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/copy_page.S
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * Optimized version of the standard copy_page() function
- *
- * Inputs:
- *	in0:	address of target page
- *	in1:	address of source page
- * Output:
- *	no return value
- *
- * Copyright (C) 1999, 2001 Hewlett-Packard Co
- *	Stephane Eranian <eranian@hpl.hp.com>
- *	David Mosberger <davidm@hpl.hp.com>
- *
- * 4/06/01 davidm	Tuned to make it perform well both for cached and uncached copies.
- */
-#include <asm/asmmacro.h>
-#include <asm/page.h>
-#include <asm/export.h>
-
-#define PIPE_DEPTH	3
-#define EPI		p[PIPE_DEPTH-1]
-
-#define lcount		r16
-#define saved_pr	r17
-#define saved_lc	r18
-#define saved_pfs	r19
-#define src1		r20
-#define src2		r21
-#define tgt1		r22
-#define tgt2		r23
-#define srcf		r24
-#define tgtf		r25
-#define tgt_last	r26
-
-#define Nrot		((8*PIPE_DEPTH+7)&~7)
-
-GLOBAL_ENTRY(copy_page)
-	.prologue
-	.save ar.pfs, saved_pfs
-	alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot
-
-	.rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH], t3[PIPE_DEPTH], t4[PIPE_DEPTH], \
-	      t5[PIPE_DEPTH], t6[PIPE_DEPTH], t7[PIPE_DEPTH], t8[PIPE_DEPTH]
-	.rotp p[PIPE_DEPTH]
-
-	.save ar.lc, saved_lc
-	mov saved_lc=ar.lc
-	mov ar.ec=PIPE_DEPTH
-
-	mov lcount=PAGE_SIZE/64-1
-	.save pr, saved_pr
-	mov saved_pr=pr
-	mov pr.rot=1<<16
-
-	.body
-
-	mov src1=in1
-	adds src2=8,in1
-	mov tgt_last = PAGE_SIZE
-	;;
-	adds tgt2=8,in0
-	add srcf=512,in1
-	mov ar.lc=lcount
-	mov tgt1=in0
-	add tgtf=512,in0
-	add tgt_last = tgt_last, in0
-	;;
-1:
-(p[0])	ld8 t1[0]=[src1],16
-(EPI)	st8 [tgt1]=t1[PIPE_DEPTH-1],16
-(p[0])	ld8 t2[0]=[src2],16
-(EPI)	st8 [tgt2]=t2[PIPE_DEPTH-1],16
-	cmp.ltu p6,p0 = tgtf, tgt_last
-	;;
-(p[0])	ld8 t3[0]=[src1],16
-(EPI)	st8 [tgt1]=t3[PIPE_DEPTH-1],16
-(p[0])	ld8 t4[0]=[src2],16
-(EPI)	st8 [tgt2]=t4[PIPE_DEPTH-1],16
-	;;
-(p[0])	ld8 t5[0]=[src1],16
-(EPI)	st8 [tgt1]=t5[PIPE_DEPTH-1],16
-(p[0])	ld8 t6[0]=[src2],16
-(EPI)	st8 [tgt2]=t6[PIPE_DEPTH-1],16
-	;;
-(p[0])	ld8 t7[0]=[src1],16
-(EPI)	st8 [tgt1]=t7[PIPE_DEPTH-1],16
-(p[0])	ld8 t8[0]=[src2],16
-(EPI)	st8 [tgt2]=t8[PIPE_DEPTH-1],16
-
-(p6)	lfetch [srcf], 64
-(p6)	lfetch [tgtf], 64
-	br.ctop.sptk.few 1b
-	;;
-	mov pr=saved_pr,0xffffffffffff0000	// restore predicates
-	mov ar.pfs=saved_pfs
-	mov ar.lc=saved_lc
-	br.ret.sptk.many rp
-END(copy_page)
-EXPORT_SYMBOL(copy_page)
diff --git a/arch/ia64/lib/copy_page_mck.S b/arch/ia64/lib/copy_page_mck.S
deleted file mode 100644
index d6fd56e4f1c1dcac5965ce4cb03516a7f15d4587..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/copy_page_mck.S
+++ /dev/null
@@ -1,188 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * McKinley-optimized version of copy_page().
- *
- * Copyright (C) 2002 Hewlett-Packard Co
- *	David Mosberger <davidm@hpl.hp.com>
- *
- * Inputs:
- *	in0:	address of target page
- *	in1:	address of source page
- * Output:
- *	no return value
- *
- * General idea:
- *	- use regular loads and stores to prefetch data to avoid consuming M-slot just for
- *	  lfetches => good for in-cache performance
- *	- avoid l2 bank-conflicts by not storing into the same 16-byte bank within a single
- *	  cycle
- *
- * Principle of operation:
- *	First, note that L1 has a line-size of 64 bytes and L2 a line-size of 128 bytes.
- *	To avoid secondary misses in L2, we prefetch both source and destination with a line-size
- *	of 128 bytes.  When both of these lines are in the L2 and the first half of the
- *	source line is in L1, we start copying the remaining words.  The second half of the
- *	source line is prefetched in an earlier iteration, so that by the time we start
- *	accessing it, it's also present in the L1.
- *
- *	We use a software-pipelined loop to control the overall operation.  The pipeline
- *	has 2*PREFETCH_DIST+K stages.  The first PREFETCH_DIST stages are used for prefetching
- *	source cache-lines.  The second PREFETCH_DIST stages are used for prefetching destination
- *	cache-lines, the last K stages are used to copy the cache-line words not copied by
- *	the prefetches.  The four relevant points in the pipelined are called A, B, C, D:
- *	p[A] is TRUE if a source-line should be prefetched, p[B] is TRUE if a destination-line
- *	should be prefetched, p[C] is TRUE if the second half of an L2 line should be brought
- *	into L1D and p[D] is TRUE if a cacheline needs to be copied.
- *
- *	This all sounds very complicated, but thanks to the modulo-scheduled loop support,
- *	the resulting code is very regular and quite easy to follow (once you get the idea).
- *
- *	As a secondary optimization, the first 2*PREFETCH_DIST iterations are implemented
- *	as the separate .prefetch_loop.  Logically, this loop performs exactly like the
- *	main-loop (.line_copy), but has all known-to-be-predicated-off instructions removed,
- *	so that each loop iteration is faster (again, good for cached case).
- *
- *	When reading the code, it helps to keep the following picture in mind:
- *
- *	       word 0 word 1
- *            +------+------+---
- *	      |	v[x] | 	t1  | ^
- *	      |	t2   |	t3  | |
- *	      |	t4   |	t5  | |
- *	      |	t6   |	t7  | | 128 bytes
- *     	      |	n[y] | 	t9  | |	(L2 cache line)
- *	      |	t10  | 	t11 | |
- *	      |	t12  | 	t13 | |
- *	      |	t14  | 	t15 | v
- *	      +------+------+---
- *
- *	Here, v[x] is copied by the (memory) prefetch.  n[y] is loaded at p[C]
- *	to fetch the second-half of the L2 cache line into L1, and the tX words are copied in
- *	an order that avoids bank conflicts.
- */
-#include <asm/asmmacro.h>
-#include <asm/page.h>
-#include <asm/export.h>
-
-#define PREFETCH_DIST	8		// McKinley sustains 16 outstanding L2 misses (8 ld, 8 st)
-
-#define src0		r2
-#define src1		r3
-#define dst0		r9
-#define dst1		r10
-#define src_pre_mem	r11
-#define dst_pre_mem	r14
-#define src_pre_l2	r15
-#define dst_pre_l2	r16
-#define t1		r17
-#define t2		r18
-#define t3		r19
-#define t4		r20
-#define t5		t1	// alias!
-#define t6		t2	// alias!
-#define t7		t3	// alias!
-#define t9		t5	// alias!
-#define t10		t4	// alias!
-#define t11		t7	// alias!
-#define t12		t6	// alias!
-#define t14		t10	// alias!
-#define t13		r21
-#define t15		r22
-
-#define saved_lc	r23
-#define saved_pr	r24
-
-#define	A	0
-#define B	(PREFETCH_DIST)
-#define C	(B + PREFETCH_DIST)
-#define D	(C + 3)
-#define N	(D + 1)
-#define Nrot	((N + 7) & ~7)
-
-GLOBAL_ENTRY(copy_page)
-	.prologue
-	alloc r8 = ar.pfs, 2, Nrot-2, 0, Nrot
-
-	.rotr v[2*PREFETCH_DIST], n[D-C+1]
-	.rotp p[N]
-
-	.save ar.lc, saved_lc
-	mov saved_lc = ar.lc
-	.save pr, saved_pr
-	mov saved_pr = pr
-	.body
-
-	mov src_pre_mem = in1
-	mov pr.rot = 0x10000
-	mov ar.ec = 1				// special unrolled loop
-
-	mov dst_pre_mem = in0
-	mov ar.lc = 2*PREFETCH_DIST - 1
-
-	add src_pre_l2 = 8*8, in1
-	add dst_pre_l2 = 8*8, in0
-	add src0 = 8, in1			// first t1 src
-	add src1 = 3*8, in1			// first t3 src
-	add dst0 = 8, in0			// first t1 dst
-	add dst1 = 3*8, in0			// first t3 dst
-	mov t1 = (PAGE_SIZE/128) - (2*PREFETCH_DIST) - 1
-	nop.m 0
-	nop.i 0
-	;;
-	// same as .line_copy loop, but with all predicated-off instructions removed:
-.prefetch_loop:
-(p[A])	ld8 v[A] = [src_pre_mem], 128		// M0
-(p[B])	st8 [dst_pre_mem] = v[B], 128		// M2
-	br.ctop.sptk .prefetch_loop
-	;;
-	cmp.eq p16, p0 = r0, r0			// reset p16 to 1 (br.ctop cleared it to zero)
-	mov ar.lc = t1				// with 64KB pages, t1 is too big to fit in 8 bits!
-	mov ar.ec = N				// # of stages in pipeline
-	;;
-.line_copy:
-(p[D])	ld8 t2 = [src0], 3*8			// M0
-(p[D])	ld8 t4 = [src1], 3*8			// M1
-(p[B])	st8 [dst_pre_mem] = v[B], 128		// M2 prefetch dst from memory
-(p[D])	st8 [dst_pre_l2] = n[D-C], 128		// M3 prefetch dst from L2
-	;;
-(p[A])	ld8 v[A] = [src_pre_mem], 128		// M0 prefetch src from memory
-(p[C])	ld8 n[0] = [src_pre_l2], 128		// M1 prefetch src from L2
-(p[D])	st8 [dst0] =  t1, 8			// M2
-(p[D])	st8 [dst1] =  t3, 8			// M3
-	;;
-(p[D])	ld8  t5 = [src0], 8
-(p[D])	ld8  t7 = [src1], 3*8
-(p[D])	st8 [dst0] =  t2, 3*8
-(p[D])	st8 [dst1] =  t4, 3*8
-	;;
-(p[D])	ld8  t6 = [src0], 3*8
-(p[D])	ld8 t10 = [src1], 8
-(p[D])	st8 [dst0] =  t5, 8
-(p[D])	st8 [dst1] =  t7, 3*8
-	;;
-(p[D])	ld8  t9 = [src0], 3*8
-(p[D])	ld8 t11 = [src1], 3*8
-(p[D])	st8 [dst0] =  t6, 3*8
-(p[D])	st8 [dst1] = t10, 8
-	;;
-(p[D])	ld8 t12 = [src0], 8
-(p[D])	ld8 t14 = [src1], 8
-(p[D])	st8 [dst0] =  t9, 3*8
-(p[D])	st8 [dst1] = t11, 3*8
-	;;
-(p[D])	ld8 t13 = [src0], 4*8
-(p[D])	ld8 t15 = [src1], 4*8
-(p[D])	st8 [dst0] = t12, 8
-(p[D])	st8 [dst1] = t14, 8
-	;;
-(p[D-1])ld8  t1 = [src0], 8
-(p[D-1])ld8  t3 = [src1], 8
-(p[D])	st8 [dst0] = t13, 4*8
-(p[D])	st8 [dst1] = t15, 4*8
-	br.ctop.sptk .line_copy
-	;;
-	mov ar.lc = saved_lc
-	mov pr = saved_pr, -1
-	br.ret.sptk.many rp
-END(copy_page)
-EXPORT_SYMBOL(copy_page)
diff --git a/arch/ia64/lib/copy_user.S b/arch/ia64/lib/copy_user.S
deleted file mode 100644
index f681556c6b86d99525c3674daa071ff4d5c3e6d5..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/copy_user.S
+++ /dev/null
@@ -1,613 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * Optimized version of the copy_user() routine.
- * It is used to copy date across the kernel/user boundary.
- *
- * The source and destination are always on opposite side of
- * the boundary. When reading from user space we must catch
- * faults on loads. When writing to user space we must catch
- * errors on stores. Note that because of the nature of the copy
- * we don't need to worry about overlapping regions.
- *
- *
- * Inputs:
- *	in0	address of source buffer
- *	in1	address of destination buffer
- *	in2	number of bytes to copy
- *
- * Outputs:
- *	ret0	0 in case of success. The number of bytes NOT copied in
- *		case of error.
- *
- * Copyright (C) 2000-2001 Hewlett-Packard Co
- *	Stephane Eranian <eranian@hpl.hp.com>
- *
- * Fixme:
- *	- handle the case where we have more than 16 bytes and the alignment
- *	  are different.
- *	- more benchmarking
- *	- fix extraneous stop bit introduced by the EX() macro.
- */
-
-#include <asm/asmmacro.h>
-#include <asm/export.h>
-
-//
-// Tuneable parameters
-//
-#define COPY_BREAK	16	// we do byte copy below (must be >=16)
-#define PIPE_DEPTH	21	// pipe depth
-
-#define EPI		p[PIPE_DEPTH-1]
-
-//
-// arguments
-//
-#define dst		in0
-#define src		in1
-#define len		in2
-
-//
-// local registers
-//
-#define t1		r2	// rshift in bytes
-#define t2		r3	// lshift in bytes
-#define rshift		r14	// right shift in bits
-#define lshift		r15	// left shift in bits
-#define word1		r16
-#define word2		r17
-#define cnt		r18
-#define len2		r19
-#define saved_lc	r20
-#define saved_pr	r21
-#define tmp		r22
-#define val		r23
-#define src1		r24
-#define dst1		r25
-#define src2		r26
-#define dst2		r27
-#define len1		r28
-#define enddst		r29
-#define endsrc		r30
-#define saved_pfs	r31
-
-GLOBAL_ENTRY(__copy_user)
-	.prologue
-	.save ar.pfs, saved_pfs
-	alloc saved_pfs=ar.pfs,3,((2*PIPE_DEPTH+7)&~7),0,((2*PIPE_DEPTH+7)&~7)
-
-	.rotr val1[PIPE_DEPTH],val2[PIPE_DEPTH]
-	.rotp p[PIPE_DEPTH]
-
-	adds len2=-1,len	// br.ctop is repeat/until
-	mov ret0=r0
-
-	;;			// RAW of cfm when len=0
-	cmp.eq p8,p0=r0,len	// check for zero length
-	.save ar.lc, saved_lc
-	mov saved_lc=ar.lc	// preserve ar.lc (slow)
-(p8)	br.ret.spnt.many rp	// empty mempcy()
-	;;
-	add enddst=dst,len	// first byte after end of source
-	add endsrc=src,len	// first byte after end of destination
-	.save pr, saved_pr
-	mov saved_pr=pr		// preserve predicates
-
-	.body
-
-	mov dst1=dst		// copy because of rotation
-	mov ar.ec=PIPE_DEPTH
-	mov pr.rot=1<<16	// p16=true all others are false
-
-	mov src1=src		// copy because of rotation
-	mov ar.lc=len2		// initialize lc for small count
-	cmp.lt p10,p7=COPY_BREAK,len	// if len > COPY_BREAK then long copy
-
-	xor tmp=src,dst		// same alignment test prepare
-(p10)	br.cond.dptk .long_copy_user
-	;;			// RAW pr.rot/p16 ?
-	//
-	// Now we do the byte by byte loop with software pipeline
-	//
-	// p7 is necessarily false by now
-1:
-	EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)
-	EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
-	br.ctop.dptk.few 1b
-	;;
-	mov ar.lc=saved_lc
-	mov pr=saved_pr,0xffffffffffff0000
-	mov ar.pfs=saved_pfs		// restore ar.ec
-	br.ret.sptk.many rp		// end of short memcpy
-
-	//
-	// Not 8-byte aligned
-	//
-.diff_align_copy_user:
-	// At this point we know we have more than 16 bytes to copy
-	// and also that src and dest do _not_ have the same alignment.
-	and src2=0x7,src1				// src offset
-	and dst2=0x7,dst1				// dst offset
-	;;
-	// The basic idea is that we copy byte-by-byte at the head so
-	// that we can reach 8-byte alignment for both src1 and dst1.
-	// Then copy the body using software pipelined 8-byte copy,
-	// shifting the two back-to-back words right and left, then copy
-	// the tail by copying byte-by-byte.
-	//
-	// Fault handling. If the byte-by-byte at the head fails on the
-	// load, then restart and finish the pipleline by copying zeros
-	// to the dst1. Then copy zeros for the rest of dst1.
-	// If 8-byte software pipeline fails on the load, do the same as
-	// failure_in3 does. If the byte-by-byte at the tail fails, it is
-	// handled simply by failure_in_pipe1.
-	//
-	// The case p14 represents the source has more bytes in the
-	// the first word (by the shifted part), whereas the p15 needs to
-	// copy some bytes from the 2nd word of the source that has the
-	// tail of the 1st of the destination.
-	//
-
-	//
-	// Optimization. If dst1 is 8-byte aligned (quite common), we don't need
-	// to copy the head to dst1, to start 8-byte copy software pipeline.
-	// We know src1 is not 8-byte aligned in this case.
-	//
-	cmp.eq p14,p15=r0,dst2
-(p15)	br.cond.spnt 1f
-	;;
-	sub t1=8,src2
-	mov t2=src2
-	;;
-	shl rshift=t2,3
-	sub len1=len,t1					// set len1
-	;;
-	sub lshift=64,rshift
-	;;
-	br.cond.spnt .word_copy_user
-	;;
-1:
-	cmp.leu	p14,p15=src2,dst2
-	sub t1=dst2,src2
-	;;
-	.pred.rel "mutex", p14, p15
-(p14)	sub word1=8,src2				// (8 - src offset)
-(p15)	sub t1=r0,t1					// absolute value
-(p15)	sub word1=8,dst2				// (8 - dst offset)
-	;;
-	// For the case p14, we don't need to copy the shifted part to
-	// the 1st word of destination.
-	sub t2=8,t1
-(p14)	sub word1=word1,t1
-	;;
-	sub len1=len,word1				// resulting len
-(p15)	shl rshift=t1,3					// in bits
-(p14)	shl rshift=t2,3
-	;;
-(p14)	sub len1=len1,t1
-	adds cnt=-1,word1
-	;;
-	sub lshift=64,rshift
-	mov ar.ec=PIPE_DEPTH
-	mov pr.rot=1<<16	// p16=true all others are false
-	mov ar.lc=cnt
-	;;
-2:
-	EX(.failure_in_pipe2,(p16) ld1 val1[0]=[src1],1)
-	EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
-	br.ctop.dptk.few 2b
-	;;
-	clrrrb
-	;;
-.word_copy_user:
-	cmp.gtu p9,p0=16,len1
-(p9)	br.cond.spnt 4f			// if (16 > len1) skip 8-byte copy
-	;;
-	shr.u cnt=len1,3		// number of 64-bit words
-	;;
-	adds cnt=-1,cnt
-	;;
-	.pred.rel "mutex", p14, p15
-(p14)	sub src1=src1,t2
-(p15)	sub src1=src1,t1
-	//
-	// Now both src1 and dst1 point to an 8-byte aligned address. And
-	// we have more than 8 bytes to copy.
-	//
-	mov ar.lc=cnt
-	mov ar.ec=PIPE_DEPTH
-	mov pr.rot=1<<16	// p16=true all others are false
-	;;
-3:
-	//
-	// The pipleline consists of 3 stages:
-	// 1 (p16):	Load a word from src1
-	// 2 (EPI_1):	Shift right pair, saving to tmp
-	// 3 (EPI):	Store tmp to dst1
-	//
-	// To make it simple, use at least 2 (p16) loops to set up val1[n]
-	// because we need 2 back-to-back val1[] to get tmp.
-	// Note that this implies EPI_2 must be p18 or greater.
-	//
-
-#define EPI_1		p[PIPE_DEPTH-2]
-#define SWITCH(pred, shift)	cmp.eq pred,p0=shift,rshift
-#define CASE(pred, shift)	\
-	(pred)	br.cond.spnt .copy_user_bit##shift
-#define BODY(rshift)						\
-.copy_user_bit##rshift:						\
-1:								\
-	EX(.failure_out,(EPI) st8 [dst1]=tmp,8);		\
-(EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift;	\
-	EX(3f,(p16) ld8 val1[1]=[src1],8);			\
-(p16)	mov val1[0]=r0;						\
-	br.ctop.dptk 1b;					\
-	;;							\
-	br.cond.sptk.many .diff_align_do_tail;			\
-2:								\
-(EPI)	st8 [dst1]=tmp,8;					\
-(EPI_1)	shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift;	\
-3:								\
-(p16)	mov val1[1]=r0;						\
-(p16)	mov val1[0]=r0;						\
-	br.ctop.dptk 2b;					\
-	;;							\
-	br.cond.sptk.many .failure_in2
-
-	//
-	// Since the instruction 'shrp' requires a fixed 128-bit value
-	// specifying the bits to shift, we need to provide 7 cases
-	// below.
-	//
-	SWITCH(p6, 8)
-	SWITCH(p7, 16)
-	SWITCH(p8, 24)
-	SWITCH(p9, 32)
-	SWITCH(p10, 40)
-	SWITCH(p11, 48)
-	SWITCH(p12, 56)
-	;;
-	CASE(p6, 8)
-	CASE(p7, 16)
-	CASE(p8, 24)
-	CASE(p9, 32)
-	CASE(p10, 40)
-	CASE(p11, 48)
-	CASE(p12, 56)
-	;;
-	BODY(8)
-	BODY(16)
-	BODY(24)
-	BODY(32)
-	BODY(40)
-	BODY(48)
-	BODY(56)
-	;;
-.diff_align_do_tail:
-	.pred.rel "mutex", p14, p15
-(p14)	sub src1=src1,t1
-(p14)	adds dst1=-8,dst1
-(p15)	sub dst1=dst1,t1
-	;;
-4:
-	// Tail correction.
-	//
-	// The problem with this piplelined loop is that the last word is not
-	// loaded and thus parf of the last word written is not correct.
-	// To fix that, we simply copy the tail byte by byte.
-
-	sub len1=endsrc,src1,1
-	clrrrb
-	;;
-	mov ar.ec=PIPE_DEPTH
-	mov pr.rot=1<<16	// p16=true all others are false
-	mov ar.lc=len1
-	;;
-5:
-	EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)
-	EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
-	br.ctop.dptk.few 5b
-	;;
-	mov ar.lc=saved_lc
-	mov pr=saved_pr,0xffffffffffff0000
-	mov ar.pfs=saved_pfs
-	br.ret.sptk.many rp
-
-	//
-	// Beginning of long mempcy (i.e. > 16 bytes)
-	//
-.long_copy_user:
-	tbit.nz p6,p7=src1,0	// odd alignment
-	and tmp=7,tmp
-	;;
-	cmp.eq p10,p8=r0,tmp
-	mov len1=len		// copy because of rotation
-(p8)	br.cond.dpnt .diff_align_copy_user
-	;;
-	// At this point we know we have more than 16 bytes to copy
-	// and also that both src and dest have the same alignment
-	// which may not be the one we want. So for now we must move
-	// forward slowly until we reach 16byte alignment: no need to
-	// worry about reaching the end of buffer.
-	//
-	EX(.failure_in1,(p6) ld1 val1[0]=[src1],1)	// 1-byte aligned
-(p6)	adds len1=-1,len1;;
-	tbit.nz p7,p0=src1,1
-	;;
-	EX(.failure_in1,(p7) ld2 val1[1]=[src1],2)	// 2-byte aligned
-(p7)	adds len1=-2,len1;;
-	tbit.nz p8,p0=src1,2
-	;;
-	//
-	// Stop bit not required after ld4 because if we fail on ld4
-	// we have never executed the ld1, therefore st1 is not executed.
-	//
-	EX(.failure_in1,(p8) ld4 val2[0]=[src1],4)	// 4-byte aligned
-	;;
-	EX(.failure_out,(p6) st1 [dst1]=val1[0],1)
-	tbit.nz p9,p0=src1,3
-	;;
-	//
-	// Stop bit not required after ld8 because if we fail on ld8
-	// we have never executed the ld2, therefore st2 is not executed.
-	//
-	EX(.failure_in1,(p9) ld8 val2[1]=[src1],8)	// 8-byte aligned
-	EX(.failure_out,(p7) st2 [dst1]=val1[1],2)
-(p8)	adds len1=-4,len1
-	;;
-	EX(.failure_out, (p8) st4 [dst1]=val2[0],4)
-(p9)	adds len1=-8,len1;;
-	shr.u cnt=len1,4		// number of 128-bit (2x64bit) words
-	;;
-	EX(.failure_out, (p9) st8 [dst1]=val2[1],8)
-	tbit.nz p6,p0=len1,3
-	cmp.eq p7,p0=r0,cnt
-	adds tmp=-1,cnt			// br.ctop is repeat/until
-(p7)	br.cond.dpnt .dotail		// we have less than 16 bytes left
-	;;
-	adds src2=8,src1
-	adds dst2=8,dst1
-	mov ar.lc=tmp
-	;;
-	//
-	// 16bytes/iteration
-	//
-2:
-	EX(.failure_in3,(p16) ld8 val1[0]=[src1],16)
-(p16)	ld8 val2[0]=[src2],16
-
-	EX(.failure_out, (EPI)	st8 [dst1]=val1[PIPE_DEPTH-1],16)
-(EPI)	st8 [dst2]=val2[PIPE_DEPTH-1],16
-	br.ctop.dptk 2b
-	;;			// RAW on src1 when fall through from loop
-	//
-	// Tail correction based on len only
-	//
-	// No matter where we come from (loop or test) the src1 pointer
-	// is 16 byte aligned AND we have less than 16 bytes to copy.
-	//
-.dotail:
-	EX(.failure_in1,(p6) ld8 val1[0]=[src1],8)	// at least 8 bytes
-	tbit.nz p7,p0=len1,2
-	;;
-	EX(.failure_in1,(p7) ld4 val1[1]=[src1],4)	// at least 4 bytes
-	tbit.nz p8,p0=len1,1
-	;;
-	EX(.failure_in1,(p8) ld2 val2[0]=[src1],2)	// at least 2 bytes
-	tbit.nz p9,p0=len1,0
-	;;
-	EX(.failure_out, (p6) st8 [dst1]=val1[0],8)
-	;;
-	EX(.failure_in1,(p9) ld1 val2[1]=[src1])	// only 1 byte left
-	mov ar.lc=saved_lc
-	;;
-	EX(.failure_out,(p7) st4 [dst1]=val1[1],4)
-	mov pr=saved_pr,0xffffffffffff0000
-	;;
-	EX(.failure_out, (p8)	st2 [dst1]=val2[0],2)
-	mov ar.pfs=saved_pfs
-	;;
-	EX(.failure_out, (p9)	st1 [dst1]=val2[1])
-	br.ret.sptk.many rp
-
-
-	//
-	// Here we handle the case where the byte by byte copy fails
-	// on the load.
-	// Several factors make the zeroing of the rest of the buffer kind of
-	// tricky:
-	//	- the pipeline: loads/stores are not in sync (pipeline)
-	//
-	//	  In the same loop iteration, the dst1 pointer does not directly
-	//	  reflect where the faulty load was.
-	//
-	//	- pipeline effect
-	//	  When you get a fault on load, you may have valid data from
-	//	  previous loads not yet store in transit. Such data must be
-	//	  store normally before moving onto zeroing the rest.
-	//
-	//	- single/multi dispersal independence.
-	//
-	// solution:
-	//	- we don't disrupt the pipeline, i.e. data in transit in
-	//	  the software pipeline will be eventually move to memory.
-	//	  We simply replace the load with a simple mov and keep the
-	//	  pipeline going. We can't really do this inline because
-	//	  p16 is always reset to 1 when lc > 0.
-	//
-.failure_in_pipe1:
-	sub ret0=endsrc,src1	// number of bytes to zero, i.e. not copied
-1:
-(p16)	mov val1[0]=r0
-(EPI)	st1 [dst1]=val1[PIPE_DEPTH-1],1
-	br.ctop.dptk 1b
-	;;
-	mov pr=saved_pr,0xffffffffffff0000
-	mov ar.lc=saved_lc
-	mov ar.pfs=saved_pfs
-	br.ret.sptk.many rp
-
-	//
-	// This is the case where the byte by byte copy fails on the load
-	// when we copy the head. We need to finish the pipeline and copy
-	// zeros for the rest of the destination. Since this happens
-	// at the top we still need to fill the body and tail.
-.failure_in_pipe2:
-	sub ret0=endsrc,src1	// number of bytes to zero, i.e. not copied
-2:
-(p16)	mov val1[0]=r0
-(EPI)	st1 [dst1]=val1[PIPE_DEPTH-1],1
-	br.ctop.dptk 2b
-	;;
-	sub len=enddst,dst1,1		// precompute len
-	br.cond.dptk.many .failure_in1bis
-	;;
-
-	//
-	// Here we handle the head & tail part when we check for alignment.
-	// The following code handles only the load failures. The
-	// main diffculty comes from the fact that loads/stores are
-	// scheduled. So when you fail on a load, the stores corresponding
-	// to previous successful loads must be executed.
-	//
-	// However some simplifications are possible given the way
-	// things work.
-	//
-	// 1) HEAD
-	// Theory of operation:
-	//
-	//  Page A   | Page B
-	//  ---------|-----
-	//          1|8 x
-	//	  1 2|8 x
-	//	    4|8 x
-	//	  1 4|8 x
-	//        2 4|8 x
-	//      1 2 4|8 x
-	//	     |1
-	//	     |2 x
-	//	     |4 x
-	//
-	// page_size >= 4k (2^12).  (x means 4, 2, 1)
-	// Here we suppose Page A exists and Page B does not.
-	//
-	// As we move towards eight byte alignment we may encounter faults.
-	// The numbers on each page show the size of the load (current alignment).
-	//
-	// Key point:
-	//	- if you fail on 1, 2, 4 then you have never executed any smaller
-	//	  size loads, e.g. failing ld4 means no ld1 nor ld2 executed
-	//	  before.
-	//
-	// This allows us to simplify the cleanup code, because basically you
-	// only have to worry about "pending" stores in the case of a failing
-	// ld8(). Given the way the code is written today, this means only
-	// worry about st2, st4. There we can use the information encapsulated
-	// into the predicates.
-	//
-	// Other key point:
-	//	- if you fail on the ld8 in the head, it means you went straight
-	//	  to it, i.e. 8byte alignment within an unexisting page.
-	// Again this comes from the fact that if you crossed just for the ld8 then
-	// you are 8byte aligned but also 16byte align, therefore you would
-	// either go for the 16byte copy loop OR the ld8 in the tail part.
-	// The combination ld1, ld2, ld4, ld8 where you fail on ld8 is impossible
-	// because it would mean you had 15bytes to copy in which case you
-	// would have defaulted to the byte by byte copy.
-	//
-	//
-	// 2) TAIL
-	// Here we now we have less than 16 bytes AND we are either 8 or 16 byte
-	// aligned.
-	//
-	// Key point:
-	// This means that we either:
-	//		- are right on a page boundary
-	//	OR
-	//		- are at more than 16 bytes from a page boundary with
-	//		  at most 15 bytes to copy: no chance of crossing.
-	//
-	// This allows us to assume that if we fail on a load we haven't possibly
-	// executed any of the previous (tail) ones, so we don't need to do
-	// any stores. For instance, if we fail on ld2, this means we had
-	// 2 or 3 bytes left to copy and we did not execute the ld8 nor ld4.
-	//
-	// This means that we are in a situation similar the a fault in the
-	// head part. That's nice!
-	//
-.failure_in1:
-	sub ret0=endsrc,src1	// number of bytes to zero, i.e. not copied
-	sub len=endsrc,src1,1
-	//
-	// we know that ret0 can never be zero at this point
-	// because we failed why trying to do a load, i.e. there is still
-	// some work to do.
-	// The failure_in1bis and length problem is taken care of at the
-	// calling side.
-	//
-	;;
-.failure_in1bis:		// from (.failure_in3)
-	mov ar.lc=len		// Continue with a stupid byte store.
-	;;
-5:
-	st1 [dst1]=r0,1
-	br.cloop.dptk 5b
-	;;
-	mov pr=saved_pr,0xffffffffffff0000
-	mov ar.lc=saved_lc
-	mov ar.pfs=saved_pfs
-	br.ret.sptk.many rp
-
-	//
-	// Here we simply restart the loop but instead
-	// of doing loads we fill the pipeline with zeroes
-	// We can't simply store r0 because we may have valid
-	// data in transit in the pipeline.
-	// ar.lc and ar.ec are setup correctly at this point
-	//
-	// we MUST use src1/endsrc here and not dst1/enddst because
-	// of the pipeline effect.
-	//
-.failure_in3:
-	sub ret0=endsrc,src1	// number of bytes to zero, i.e. not copied
-	;;
-2:
-(p16)	mov val1[0]=r0
-(p16)	mov val2[0]=r0
-(EPI)	st8 [dst1]=val1[PIPE_DEPTH-1],16
-(EPI)	st8 [dst2]=val2[PIPE_DEPTH-1],16
-	br.ctop.dptk 2b
-	;;
-	cmp.ne p6,p0=dst1,enddst	// Do we need to finish the tail ?
-	sub len=enddst,dst1,1		// precompute len
-(p6)	br.cond.dptk .failure_in1bis
-	;;
-	mov pr=saved_pr,0xffffffffffff0000
-	mov ar.lc=saved_lc
-	mov ar.pfs=saved_pfs
-	br.ret.sptk.many rp
-
-.failure_in2:
-	sub ret0=endsrc,src1
-	cmp.ne p6,p0=dst1,enddst	// Do we need to finish the tail ?
-	sub len=enddst,dst1,1		// precompute len
-(p6)	br.cond.dptk .failure_in1bis
-	;;
-	mov pr=saved_pr,0xffffffffffff0000
-	mov ar.lc=saved_lc
-	mov ar.pfs=saved_pfs
-	br.ret.sptk.many rp
-
-	//
-	// handling of failures on stores: that's the easy part
-	//
-.failure_out:
-	sub ret0=enddst,dst1
-	mov pr=saved_pr,0xffffffffffff0000
-	mov ar.lc=saved_lc
-
-	mov ar.pfs=saved_pfs
-	br.ret.sptk.many rp
-END(__copy_user)
-EXPORT_SYMBOL(__copy_user)
diff --git a/arch/ia64/lib/do_csum.S b/arch/ia64/lib/do_csum.S
deleted file mode 100644
index 6004dad2597c3e3842052344306594455c4c06ab..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/do_csum.S
+++ /dev/null
@@ -1,324 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * Optmized version of the standard do_csum() function
- *
- * Return: a 64bit quantity containing the 16bit Internet checksum
- *
- * Inputs:
- *	in0: address of buffer to checksum (char *)
- *	in1: length of the buffer (int)
- *
- * Copyright (C) 1999, 2001-2002 Hewlett-Packard Co
- *	Stephane Eranian <eranian@hpl.hp.com>
- *
- * 02/04/22	Ken Chen <kenneth.w.chen@intel.com>
- *		Data locality study on the checksum buffer.
- *		More optimization cleanup - remove excessive stop bits.
- * 02/04/08	David Mosberger <davidm@hpl.hp.com>
- *		More cleanup and tuning.
- * 01/04/18	Jun Nakajima <jun.nakajima@intel.com>
- *		Clean up and optimize and the software pipeline, loading two
- *		back-to-back 8-byte words per loop. Clean up the initialization
- *		for the loop. Support the cases where load latency = 1 or 2.
- *		Set CONFIG_IA64_LOAD_LATENCY to 1 or 2 (default).
- */
-
-#include <asm/asmmacro.h>
-
-//
-// Theory of operations:
-//	The goal is to go as quickly as possible to the point where
-//	we can checksum 16 bytes/loop. Before reaching that point we must
-//	take care of incorrect alignment of first byte.
-//
-//	The code hereafter also takes care of the "tail" part of the buffer
-//	before entering the core loop, if any. The checksum is a sum so it
-//	allows us to commute operations. So we do the "head" and "tail"
-//	first to finish at full speed in the body. Once we get the head and
-//	tail values, we feed them into the pipeline, very handy initialization.
-//
-//	Of course we deal with the special case where the whole buffer fits
-//	into one 8 byte word. In this case we have only one entry in the pipeline.
-//
-//	We use a (LOAD_LATENCY+2)-stage pipeline in the loop to account for
-//	possible load latency and also to accommodate for head and tail.
-//
-//	The end of the function deals with folding the checksum from 64bits
-//	down to 16bits taking care of the carry.
-//
-//	This version avoids synchronization in the core loop by also using a
-//	pipeline for the accumulation of the checksum in resultx[] (x=1,2).
-//
-//	 wordx[] (x=1,2)
-//	|---|
-//      |   | 0			: new value loaded in pipeline
-//	|---|
-//      |   | -			: in transit data
-//	|---|
-//      |   | LOAD_LATENCY	: current value to add to checksum
-//	|---|
-//      |   | LOAD_LATENCY+1	: previous value added to checksum
-//      |---|			(previous iteration)
-//
-//	resultx[] (x=1,2)
-//	|---|
-//      |   | 0			: initial value
-//	|---|
-//      |   | LOAD_LATENCY-1	: new checksum
-//	|---|
-//      |   | LOAD_LATENCY	: previous value of checksum
-//	|---|
-//      |   | LOAD_LATENCY+1	: final checksum when out of the loop
-//      |---|
-//
-//
-//	See RFC1071 "Computing the Internet Checksum" for various techniques for
-//	calculating the Internet checksum.
-//
-// NOT YET DONE:
-//	- Maybe another algorithm which would take care of the folding at the
-//	  end in a different manner
-//	- Work with people more knowledgeable than me on the network stack
-//	  to figure out if we could not split the function depending on the
-//	  type of packet or alignment we get. Like the ip_fast_csum() routine
-//	  where we know we have at least 20bytes worth of data to checksum.
-//	- Do a better job of handling small packets.
-//	- Note on prefetching: it was found that under various load, i.e. ftp read/write,
-//	  nfs read/write, the L1 cache hit rate is at 60% and L2 cache hit rate is at 99.8%
-//	  on the data that buffer points to (partly because the checksum is often preceded by
-//	  a copy_from_user()).  This finding indiate that lfetch will not be beneficial since
-//	  the data is already in the cache.
-//
-
-#define saved_pfs	r11
-#define hmask		r16
-#define tmask		r17
-#define first1		r18
-#define firstval	r19
-#define firstoff	r20
-#define last		r21
-#define lastval		r22
-#define lastoff		r23
-#define saved_lc	r24
-#define saved_pr	r25
-#define tmp1		r26
-#define tmp2		r27
-#define tmp3		r28
-#define carry1		r29
-#define carry2		r30
-#define first2		r31
-
-#define buf		in0
-#define len		in1
-
-#define LOAD_LATENCY	2	// XXX fix me
-
-#if (LOAD_LATENCY != 1) && (LOAD_LATENCY != 2)
-# error "Only 1 or 2 is supported/tested for LOAD_LATENCY."
-#endif
-
-#define PIPE_DEPTH			(LOAD_LATENCY+2)
-#define ELD	p[LOAD_LATENCY]		// end of load
-#define ELD_1	p[LOAD_LATENCY+1]	// and next stage
-
-// unsigned long do_csum(unsigned char *buf,long len)
-
-GLOBAL_ENTRY(do_csum)
-	.prologue
-	.save ar.pfs, saved_pfs
-	alloc saved_pfs=ar.pfs,2,16,0,16
-	.rotr word1[4], word2[4],result1[LOAD_LATENCY+2],result2[LOAD_LATENCY+2]
-	.rotp p[PIPE_DEPTH], pC1[2], pC2[2]
-	mov ret0=r0		// in case we have zero length
-	cmp.lt p0,p6=r0,len	// check for zero length or negative (32bit len)
-	;;
-	add tmp1=buf,len	// last byte's address
-	.save pr, saved_pr
-	mov saved_pr=pr		// preserve predicates (rotation)
-(p6)	br.ret.spnt.many rp	// return if zero or negative length
-
-	mov hmask=-1		// initialize head mask
-	tbit.nz p15,p0=buf,0	// is buf an odd address?
-	and first1=-8,buf	// 8-byte align down address of first1 element
-
-	and firstoff=7,buf	// how many bytes off for first1 element
-	mov tmask=-1		// initialize tail mask
-
-	;;
-	adds tmp2=-1,tmp1	// last-1
-	and lastoff=7,tmp1	// how many bytes off for last element
-	;;
-	sub tmp1=8,lastoff	// complement to lastoff
-	and last=-8,tmp2	// address of word containing last byte
-	;;
-	sub tmp3=last,first1	// tmp3=distance from first1 to last
-	.save ar.lc, saved_lc
-	mov saved_lc=ar.lc	// save lc
-	cmp.eq p8,p9=last,first1	// everything fits in one word ?
-
-	ld8 firstval=[first1],8	// load, ahead of time, "first1" word
-	and tmp1=7, tmp1	// make sure that if tmp1==8 -> tmp1=0
-	shl tmp2=firstoff,3	// number of bits
-	;;
-(p9)	ld8 lastval=[last]	// load, ahead of time, "last" word, if needed
-	shl tmp1=tmp1,3		// number of bits
-(p9)	adds tmp3=-8,tmp3	// effectively loaded
-	;;
-(p8)	mov lastval=r0		// we don't need lastval if first1==last
-	shl hmask=hmask,tmp2	// build head mask, mask off [0,first1off[
-	shr.u tmask=tmask,tmp1	// build tail mask, mask off ]8,lastoff]
-	;;
-	.body
-#define count tmp3
-
-(p8)	and hmask=hmask,tmask	// apply tail mask to head mask if 1 word only
-(p9)	and word2[0]=lastval,tmask	// mask last it as appropriate
-	shr.u count=count,3	// how many 8-byte?
-	;;
-	// If count is odd, finish this 8-byte word so that we can
-	// load two back-to-back 8-byte words per loop thereafter.
-	and word1[0]=firstval,hmask	// and mask it as appropriate
-	tbit.nz p10,p11=count,0		// if (count is odd)
-	;;
-(p8)	mov result1[0]=word1[0]
-(p9)	add result1[0]=word1[0],word2[0]
-	;;
-	cmp.ltu p6,p0=result1[0],word1[0]	// check the carry
-	cmp.eq.or.andcm p8,p0=0,count		// exit if zero 8-byte
-	;;
-(p6)	adds result1[0]=1,result1[0]
-(p8)	br.cond.dptk .do_csum_exit	// if (within an 8-byte word)
-(p11)	br.cond.dptk .do_csum16		// if (count is even)
-
-	// Here count is odd.
-	ld8 word1[1]=[first1],8		// load an 8-byte word
-	cmp.eq p9,p10=1,count		// if (count == 1)
-	adds count=-1,count		// loaded an 8-byte word
-	;;
-	add result1[0]=result1[0],word1[1]
-	;;
-	cmp.ltu p6,p0=result1[0],word1[1]
-	;;
-(p6)	adds result1[0]=1,result1[0]
-(p9)	br.cond.sptk .do_csum_exit	// if (count == 1) exit
-	// Fall through to calculate the checksum, feeding result1[0] as
-	// the initial value in result1[0].
-	//
-	// Calculate the checksum loading two 8-byte words per loop.
-	//
-.do_csum16:
-	add first2=8,first1
-	shr.u count=count,1	// we do 16 bytes per loop
-	;;
-	adds count=-1,count
-	mov carry1=r0
-	mov carry2=r0
-	brp.loop.imp 1f,2f
-	;;
-	mov ar.ec=PIPE_DEPTH
-	mov ar.lc=count	// set lc
-	mov pr.rot=1<<16
-	// result1[0] must be initialized in advance.
-	mov result2[0]=r0
-	;;
-	.align 32
-1:
-(ELD_1)	cmp.ltu pC1[0],p0=result1[LOAD_LATENCY],word1[LOAD_LATENCY+1]
-(pC1[1])adds carry1=1,carry1
-(ELD_1)	cmp.ltu pC2[0],p0=result2[LOAD_LATENCY],word2[LOAD_LATENCY+1]
-(pC2[1])adds carry2=1,carry2
-(ELD)	add result1[LOAD_LATENCY-1]=result1[LOAD_LATENCY],word1[LOAD_LATENCY]
-(ELD)	add result2[LOAD_LATENCY-1]=result2[LOAD_LATENCY],word2[LOAD_LATENCY]
-2:
-(p[0])	ld8 word1[0]=[first1],16
-(p[0])	ld8 word2[0]=[first2],16
-	br.ctop.sptk 1b
-	;;
-	// Since len is a 32-bit value, carry cannot be larger than a 64-bit value.
-(pC1[1])adds carry1=1,carry1	// since we miss the last one
-(pC2[1])adds carry2=1,carry2
-	;;
-	add result1[LOAD_LATENCY+1]=result1[LOAD_LATENCY+1],carry1
-	add result2[LOAD_LATENCY+1]=result2[LOAD_LATENCY+1],carry2
-	;;
-	cmp.ltu p6,p0=result1[LOAD_LATENCY+1],carry1
-	cmp.ltu p7,p0=result2[LOAD_LATENCY+1],carry2
-	;;
-(p6)	adds result1[LOAD_LATENCY+1]=1,result1[LOAD_LATENCY+1]
-(p7)	adds result2[LOAD_LATENCY+1]=1,result2[LOAD_LATENCY+1]
-	;;
-	add result1[0]=result1[LOAD_LATENCY+1],result2[LOAD_LATENCY+1]
-	;;
-	cmp.ltu p6,p0=result1[0],result2[LOAD_LATENCY+1]
-	;;
-(p6)	adds result1[0]=1,result1[0]
-	;;
-.do_csum_exit:
-	//
-	// now fold 64 into 16 bits taking care of carry
-	// that's not very good because it has lots of sequentiality
-	//
-	mov tmp3=0xffff
-	zxt4 tmp1=result1[0]
-	shr.u tmp2=result1[0],32
-	;;
-	add result1[0]=tmp1,tmp2
-	;;
-	and tmp1=result1[0],tmp3
-	shr.u tmp2=result1[0],16
-	;;
-	add result1[0]=tmp1,tmp2
-	;;
-	and tmp1=result1[0],tmp3
-	shr.u tmp2=result1[0],16
-	;;
-	add result1[0]=tmp1,tmp2
-	;;
-	and tmp1=result1[0],tmp3
-	shr.u tmp2=result1[0],16
-	;;
-	add ret0=tmp1,tmp2
-	mov pr=saved_pr,0xffffffffffff0000
-	;;
-	// if buf was odd then swap bytes
-	mov ar.pfs=saved_pfs		// restore ar.ec
-(p15)	mux1 ret0=ret0,@rev		// reverse word
-	;;
-	mov ar.lc=saved_lc
-(p15)	shr.u ret0=ret0,64-16	// + shift back to position = swap bytes
-	br.ret.sptk.many rp
-
-//	I (Jun Nakajima) wrote an equivalent code (see below), but it was
-//	not much better than the original. So keep the original there so that
-//	someone else can challenge.
-//
-//	shr.u word1[0]=result1[0],32
-//	zxt4 result1[0]=result1[0]
-//	;;
-//	add result1[0]=result1[0],word1[0]
-//	;;
-//	zxt2 result2[0]=result1[0]
-//	extr.u word1[0]=result1[0],16,16
-//	shr.u carry1=result1[0],32
-//	;;
-//	add result2[0]=result2[0],word1[0]
-//	;;
-//	add result2[0]=result2[0],carry1
-//	;;
-//	extr.u ret0=result2[0],16,16
-//	;;
-//	add ret0=ret0,result2[0]
-//	;;
-//	zxt2 ret0=ret0
-//	mov ar.pfs=saved_pfs		 // restore ar.ec
-//	mov pr=saved_pr,0xffffffffffff0000
-//	;;
-//	// if buf was odd then swap bytes
-//	mov ar.lc=saved_lc
-//(p15)	mux1 ret0=ret0,@rev		// reverse word
-//	;;
-//(p15)	shr.u ret0=ret0,64-16	// + shift back to position = swap bytes
-//	br.ret.sptk.many rp
-
-END(do_csum)
diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S
deleted file mode 100644
index 8573d59c9ed17098bfd1b38243b9c5a178fdcb8f..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/flush.S
+++ /dev/null
@@ -1,120 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Cache flushing routines.
- *
- * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 05/28/05 Zoltan Menyhart	Dynamic stride size
- */
-
-#include <asm/asmmacro.h>
-#include <asm/export.h>
-
-
-	/*
-	 * flush_icache_range(start,end)
-	 *
-	 *	Make i-cache(s) coherent with d-caches.
-	 *
-	 *	Must deal with range from start to end-1 but nothing else (need to
-	 *	be careful not to touch addresses that may be unmapped).
-	 *
-	 *	Note: "in0" and "in1" are preserved for debugging purposes.
-	 */
-	.section .kprobes.text,"ax"
-GLOBAL_ENTRY(flush_icache_range)
-
-	.prologue
-	alloc	r2=ar.pfs,2,0,0,0
-	movl	r3=ia64_i_cache_stride_shift
- 	mov	r21=1
-	;;
-	ld8	r20=[r3]		// r20: stride shift
-	sub	r22=in1,r0,1		// last byte address
-	;;
-	shr.u	r23=in0,r20		// start / (stride size)
-	shr.u	r22=r22,r20		// (last byte address) / (stride size)
-	shl	r21=r21,r20		// r21: stride size of the i-cache(s)
-	;;
-	sub	r8=r22,r23		// number of strides - 1
-	shl	r24=r23,r20		// r24: addresses for "fc.i" =
-					//	"start" rounded down to stride boundary
-	.save	ar.lc,r3
-	mov	r3=ar.lc		// save ar.lc
-	;;
-
-	.body
-	mov	ar.lc=r8
-	;;
-	/*
-	 * 32 byte aligned loop, even number of (actually 2) bundles
-	 */
-.Loop:	fc.i	r24			// issuable on M0 only
-	add	r24=r21,r24		// we flush "stride size" bytes per iteration
-	nop.i	0
-	br.cloop.sptk.few .Loop
-	;;
-	sync.i
-	;;
-	srlz.i
-	;;
-	mov	ar.lc=r3		// restore ar.lc
-	br.ret.sptk.many rp
-END(flush_icache_range)
-EXPORT_SYMBOL_GPL(flush_icache_range)
-
-	/*
-	 * clflush_cache_range(start,size)
-	 *
-	 *	Flush cache lines from start to start+size-1.
-	 *
-	 *	Must deal with range from start to start+size-1 but nothing else
-	 *	(need to be careful not to touch addresses that may be
-	 *	unmapped).
-	 *
-	 *	Note: "in0" and "in1" are preserved for debugging purposes.
-	 */
-	.section .kprobes.text,"ax"
-GLOBAL_ENTRY(clflush_cache_range)
-
-	.prologue
-	alloc	r2=ar.pfs,2,0,0,0
-	movl	r3=ia64_cache_stride_shift
-	mov	r21=1
-	add     r22=in1,in0
-	;;
-	ld8	r20=[r3]		// r20: stride shift
-	sub	r22=r22,r0,1		// last byte address
-	;;
-	shr.u	r23=in0,r20		// start / (stride size)
-	shr.u	r22=r22,r20		// (last byte address) / (stride size)
-	shl	r21=r21,r20		// r21: stride size of the i-cache(s)
-	;;
-	sub	r8=r22,r23		// number of strides - 1
-	shl	r24=r23,r20		// r24: addresses for "fc" =
-					//	"start" rounded down to stride
-					//	boundary
-	.save	ar.lc,r3
-	mov	r3=ar.lc		// save ar.lc
-	;;
-
-	.body
-	mov	ar.lc=r8
-	;;
-	/*
-	 * 32 byte aligned loop, even number of (actually 2) bundles
-	 */
-.Loop_fc:
-	fc	r24		// issuable on M0 only
-	add	r24=r21,r24	// we flush "stride size" bytes per iteration
-	nop.i	0
-	br.cloop.sptk.few .Loop_fc
-	;;
-	sync.i
-	;;
-	srlz.i
-	;;
-	mov	ar.lc=r3		// restore ar.lc
-	br.ret.sptk.many rp
-END(clflush_cache_range)
diff --git a/arch/ia64/lib/idiv32.S b/arch/ia64/lib/idiv32.S
deleted file mode 100644
index def92b708e6e1f209ff6f26fb3158c20917509a2..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/idiv32.S
+++ /dev/null
@@ -1,86 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2000 Hewlett-Packard Co
- * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 32-bit integer division.
- *
- * This code is based on the application note entitled "Divide, Square Root
- * and Remainder Algorithms for the IA-64 Architecture".  This document
- * is available as Intel document number 248725-002 or via the web at
- * http://developer.intel.com/software/opensource/numerics/
- *
- * For more details on the theory behind these algorithms, see "IA-64
- * and Elementary Functions" by Peter Markstein; HP Professional Books
- * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions)
- */
-
-#include <asm/asmmacro.h>
-#include <asm/export.h>
-
-#ifdef MODULO
-# define OP	mod
-#else
-# define OP	div
-#endif
-
-#ifdef UNSIGNED
-# define SGN	u
-# define EXTEND	zxt4
-# define INT_TO_FP(a,b)	fcvt.xuf.s1 a=b
-# define FP_TO_INT(a,b)	fcvt.fxu.trunc.s1 a=b
-#else
-# define SGN
-# define EXTEND	sxt4
-# define INT_TO_FP(a,b)	fcvt.xf a=b
-# define FP_TO_INT(a,b)	fcvt.fx.trunc.s1 a=b
-#endif
-
-#define PASTE1(a,b)	a##b
-#define PASTE(a,b)	PASTE1(a,b)
-#define NAME		PASTE(PASTE(__,SGN),PASTE(OP,si3))
-
-GLOBAL_ENTRY(NAME)
-	.regstk 2,0,0,0
-	// Transfer inputs to FP registers.
-	mov r2 = 0xffdd			// r2 = -34 + 65535 (fp reg format bias)
-	EXTEND in0 = in0		// in0 = a
-	EXTEND in1 = in1		// in1 = b
-	;;
-	setf.sig f8 = in0
-	setf.sig f9 = in1
-#ifdef MODULO
-	sub in1 = r0, in1		// in1 = -b
-#endif
-	;;
-	// Convert the inputs to FP, to avoid FP software-assist faults.
-	INT_TO_FP(f8, f8)
-	INT_TO_FP(f9, f9)
-	;;
-	setf.exp f7 = r2		// f7 = 2^-34
-	frcpa.s1 f6, p6 = f8, f9	// y0 = frcpa(b)
-	;;
-(p6)	fmpy.s1 f8 = f8, f6		// q0 = a*y0
-(p6)	fnma.s1 f6 = f9, f6, f1		// e0 = -b*y0 + 1 
-	;;
-#ifdef MODULO
-	setf.sig f9 = in1		// f9 = -b
-#endif
-(p6)	fma.s1 f8 = f6, f8, f8		// q1 = e0*q0 + q0
-(p6)	fma.s1 f6 = f6, f6, f7		// e1 = e0*e0 + 2^-34
-	;;
-#ifdef MODULO
-	setf.sig f7 = in0
-#endif
-(p6)	fma.s1 f6 = f6, f8, f8		// q2 = e1*q1 + q1
-	;;
-	FP_TO_INT(f6, f6)		// q = trunc(q2)
-	;;
-#ifdef MODULO
-	xma.l f6 = f6, f9, f7		// r = q*(-b) + a
-	;;
-#endif
-	getf.sig r8 = f6		// transfer result to result register
-	br.ret.sptk.many rp
-END(NAME)
-EXPORT_SYMBOL(NAME)
diff --git a/arch/ia64/lib/idiv64.S b/arch/ia64/lib/idiv64.S
deleted file mode 100644
index a8ba3bd3d4d8cc545eaab2c62584ba4234efe15f..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/idiv64.S
+++ /dev/null
@@ -1,83 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 1999-2000 Hewlett-Packard Co
- * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 64-bit integer division.
- *
- * This code is based on the application note entitled "Divide, Square Root
- * and Remainder Algorithms for the IA-64 Architecture".  This document
- * is available as Intel document number 248725-002 or via the web at
- * http://developer.intel.com/software/opensource/numerics/
- *
- * For more details on the theory behind these algorithms, see "IA-64
- * and Elementary Functions" by Peter Markstein; HP Professional Books
- * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions)
- */
-
-#include <asm/asmmacro.h>
-#include <asm/export.h>
-
-#ifdef MODULO
-# define OP	mod
-#else
-# define OP	div
-#endif
-
-#ifdef UNSIGNED
-# define SGN	u
-# define INT_TO_FP(a,b)	fcvt.xuf.s1 a=b
-# define FP_TO_INT(a,b)	fcvt.fxu.trunc.s1 a=b
-#else
-# define SGN
-# define INT_TO_FP(a,b)	fcvt.xf a=b
-# define FP_TO_INT(a,b)	fcvt.fx.trunc.s1 a=b
-#endif
-
-#define PASTE1(a,b)	a##b
-#define PASTE(a,b)	PASTE1(a,b)
-#define NAME		PASTE(PASTE(__,SGN),PASTE(OP,di3))
-
-GLOBAL_ENTRY(NAME)
-	.regstk 2,0,0,0
-	// Transfer inputs to FP registers.
-	setf.sig f8 = in0
-	setf.sig f9 = in1
-	;;
-	// Convert the inputs to FP, to avoid FP software-assist faults.
-	INT_TO_FP(f8, f8)
-	INT_TO_FP(f9, f9)
-	;;
-	frcpa.s1 f11, p6 = f8, f9	// y0 = frcpa(b)
-	;;
-(p6)	fmpy.s1 f7 = f8, f11		// q0 = a*y0
-(p6)	fnma.s1 f6 = f9, f11, f1	// e0 = -b*y0 + 1
-	;;
-(p6)	fma.s1 f10 = f7, f6, f7		// q1 = q0*e0 + q0
-(p6)	fmpy.s1 f7 = f6, f6		// e1 = e0*e0
-	;;
-#ifdef MODULO
-	sub in1 = r0, in1		// in1 = -b
-#endif
-(p6)	fma.s1 f10 = f10, f7, f10	// q2 = q1*e1 + q1
-(p6)	fma.s1 f6 = f11, f6, f11	// y1 = y0*e0 + y0
-	;;
-(p6)	fma.s1 f6 = f6, f7, f6		// y2 = y1*e1 + y1
-(p6)	fnma.s1 f7 = f9, f10, f8	// r = -b*q2 + a
-	;;
-#ifdef MODULO
-	setf.sig f8 = in0		// f8 = a
-	setf.sig f9 = in1		// f9 = -b
-#endif
-(p6)	fma.s1 f11 = f7, f6, f10	// q3 = r*y2 + q2
-	;;
-	FP_TO_INT(f11, f11)		// q = trunc(q3)
-	;;
-#ifdef MODULO
-	xma.l f11 = f11, f9, f8		// r = q*(-b) + a
-	;;
-#endif
-	getf.sig r8 = f11		// transfer result to result register
-	br.ret.sptk.many rp
-END(NAME)
-EXPORT_SYMBOL(NAME)
diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S
deleted file mode 100644
index dc9e6e6fe87695a48160d634890987dac93ebcca..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/ip_fast_csum.S
+++ /dev/null
@@ -1,148 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Optmized version of the ip_fast_csum() function
- * Used for calculating IP header checksum
- *
- * Return: 16bit checksum, complemented
- *
- * Inputs:
- *      in0: address of buffer to checksum (char *)
- *      in1: length of the buffer (int)
- *
- * Copyright (C) 2002, 2006 Intel Corp.
- * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com>
- */
-
-#include <asm/asmmacro.h>
-#include <asm/export.h>
-
-/*
- * Since we know that most likely this function is called with buf aligned
- * on 4-byte boundary and 20 bytes in length, we can execution rather quickly
- * versus calling generic version of do_csum, which has lots of overhead in
- * handling various alignments and sizes.  However, due to lack of constrains
- * put on the function input argument, cases with alignment not on 4-byte or
- * size not equal to 20 bytes will be handled by the generic do_csum function.
- */
-
-#define in0	r32
-#define in1	r33
-#define in2	r34
-#define in3	r35
-#define in4	r36
-#define ret0	r8
-
-GLOBAL_ENTRY(ip_fast_csum)
-	.prologue
-	.body
-	cmp.ne	p6,p7=5,in1	// size other than 20 byte?
-	and	r14=3,in0	// is it aligned on 4-byte?
-	add	r15=4,in0	// second source pointer
-	;;
-	cmp.ne.or.andcm p6,p7=r14,r0
-	;;
-(p7)	ld4	r20=[in0],8
-(p7)	ld4	r21=[r15],8
-(p6)	br.spnt	.generic
-	;;
-	ld4	r22=[in0],8
-	ld4	r23=[r15],8
-	;;
-	ld4	r24=[in0]
-	add	r20=r20,r21
-	add	r22=r22,r23
-	;;
-	add	r20=r20,r22
-	;;
-	add	r20=r20,r24
-	;;
-	shr.u	ret0=r20,16	// now need to add the carry
-	zxt2	r20=r20
-	;;
-	add	r20=ret0,r20
-	;;
-	shr.u	ret0=r20,16	// add carry again
-	zxt2	r20=r20
-	;;
-	add	r20=ret0,r20
-	;;
-	shr.u	ret0=r20,16
-	zxt2	r20=r20
-	;;
-	add	r20=ret0,r20
-	mov	r9=0xffff
-	;;
-	andcm	ret0=r9,r20
-	.restore sp		// reset frame state
-	br.ret.sptk.many b0
-	;;
-
-.generic:
-	.prologue
-	.save ar.pfs, r35
-	alloc	r35=ar.pfs,2,2,2,0
-	.save rp, r34
-	mov	r34=b0
-	.body
-	dep.z	out1=in1,2,30
-	mov	out0=in0
-	;;
-	br.call.sptk.many b0=do_csum
-	;;
-	andcm	ret0=-1,ret0
-	mov	ar.pfs=r35
-	mov	b0=r34
-	br.ret.sptk.many b0
-END(ip_fast_csum)
-EXPORT_SYMBOL(ip_fast_csum)
-
-GLOBAL_ENTRY(csum_ipv6_magic)
-	ld4	r20=[in0],4
-	ld4	r21=[in1],4
-	zxt4	in2=in2
-	;;
-	ld4	r22=[in0],4
-	ld4	r23=[in1],4
-	dep	r15=in3,in2,32,16
-	;;
-	ld4	r24=[in0],4
-	ld4	r25=[in1],4
-	mux1	r15=r15,@rev
-	add	r16=r20,r21
-	add	r17=r22,r23
-	zxt4	in4=in4
-	;;
-	ld4	r26=[in0],4
-	ld4	r27=[in1],4
-	shr.u	r15=r15,16
-	add	r18=r24,r25
-	add	r8=r16,r17
-	;;
-	add	r19=r26,r27
-	add	r8=r8,r18
-	;;
-	add	r8=r8,r19
-	add	r15=r15,in4
-	;;
-	add	r8=r8,r15
-	;;
-	shr.u	r10=r8,32	// now fold sum into short
-	zxt4	r11=r8
-	;;
-	add	r8=r10,r11
-	;;
-	shr.u	r10=r8,16	// yeah, keep it rolling
-	zxt2	r11=r8
-	;;
-	add	r8=r10,r11
-	;;
-	shr.u	r10=r8,16	// three times lucky
-	zxt2	r11=r8
-	;;
-	add	r8=r10,r11
-	mov	r9=0xffff
-	;;
-	andcm	r8=r9,r8
-	br.ret.sptk.many b0
-END(csum_ipv6_magic)
-EXPORT_SYMBOL(csum_ipv6_magic)
diff --git a/arch/ia64/lib/memcpy.S b/arch/ia64/lib/memcpy.S
deleted file mode 100644
index 91a625fddbf0599e78b2419013be61f33ff727b4..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/memcpy.S
+++ /dev/null
@@ -1,304 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * Optimized version of the standard memcpy() function
- *
- * Inputs:
- * 	in0:	destination address
- *	in1:	source address
- *	in2:	number of bytes to copy
- * Output:
- * 	no return value
- *
- * Copyright (C) 2000-2001 Hewlett-Packard Co
- *	Stephane Eranian <eranian@hpl.hp.com>
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- */
-#include <asm/asmmacro.h>
-#include <asm/export.h>
-
-GLOBAL_ENTRY(memcpy)
-
-#	define MEM_LAT	21		/* latency to memory */
-
-#	define dst	r2
-#	define src	r3
-#	define retval	r8
-#	define saved_pfs r9
-#	define saved_lc	r10
-#	define saved_pr	r11
-#	define cnt	r16
-#	define src2	r17
-#	define t0	r18
-#	define t1	r19
-#	define t2	r20
-#	define t3	r21
-#	define t4	r22
-#	define src_end	r23
-
-#	define N	(MEM_LAT + 4)
-#	define Nrot	((N + 7) & ~7)
-
-	/*
-	 * First, check if everything (src, dst, len) is a multiple of eight.  If
-	 * so, we handle everything with no taken branches (other than the loop
-	 * itself) and a small icache footprint.  Otherwise, we jump off to
-	 * the more general copy routine handling arbitrary
-	 * sizes/alignment etc.
-	 */
-	.prologue
-	.save ar.pfs, saved_pfs
-	alloc saved_pfs=ar.pfs,3,Nrot,0,Nrot
-	.save ar.lc, saved_lc
-	mov saved_lc=ar.lc
-	or t0=in0,in1
-	;;
-
-	or t0=t0,in2
-	.save pr, saved_pr
-	mov saved_pr=pr
-
-	.body
-
-	cmp.eq p6,p0=in2,r0	// zero length?
-	mov retval=in0		// return dst
-(p6)	br.ret.spnt.many rp	// zero length, return immediately
-	;;
-
-	mov dst=in0		// copy because of rotation
-	shr.u cnt=in2,3		// number of 8-byte words to copy
-	mov pr.rot=1<<16
-	;;
-
-	adds cnt=-1,cnt		// br.ctop is repeat/until
-	cmp.gtu p7,p0=16,in2	// copying less than 16 bytes?
-	mov ar.ec=N
-	;;
-
-	and t0=0x7,t0
-	mov ar.lc=cnt
-	;;
-	cmp.ne p6,p0=t0,r0
-
-	mov src=in1		// copy because of rotation
-(p7)	br.cond.spnt.few .memcpy_short
-(p6)	br.cond.spnt.few .memcpy_long
-	;;
-	nop.m	0
-	;;
-	nop.m	0
-	nop.i	0
-	;;
-	nop.m	0
-	;;
-	.rotr val[N]
-	.rotp p[N]
-	.align 32
-1: { .mib
-(p[0])	ld8 val[0]=[src],8
-	nop.i 0
-	brp.loop.imp 1b, 2f
-}
-2: { .mfb
-(p[N-1])st8 [dst]=val[N-1],8
-	nop.f 0
-	br.ctop.dptk.few 1b
-}
-	;;
-	mov ar.lc=saved_lc
-	mov pr=saved_pr,-1
-	mov ar.pfs=saved_pfs
-	br.ret.sptk.many rp
-
-	/*
-	 * Small (<16 bytes) unaligned copying is done via a simple byte-at-the-time
-	 * copy loop.  This performs relatively poorly on Itanium, but it doesn't
-	 * get used very often (gcc inlines small copies) and due to atomicity
-	 * issues, we want to avoid read-modify-write of entire words.
-	 */
-	.align 32
-.memcpy_short:
-	adds cnt=-1,in2		// br.ctop is repeat/until
-	mov ar.ec=MEM_LAT
-	brp.loop.imp 1f, 2f
-	;;
-	mov ar.lc=cnt
-	;;
-	nop.m	0
-	;;
-	nop.m	0
-	nop.i	0
-	;;
-	nop.m	0
-	;;
-	nop.m	0
-	;;
-	/*
-	 * It is faster to put a stop bit in the loop here because it makes
-	 * the pipeline shorter (and latency is what matters on short copies).
-	 */
-	.align 32
-1: { .mib
-(p[0])	ld1 val[0]=[src],1
-	nop.i 0
-	brp.loop.imp 1b, 2f
-} ;;
-2: { .mfb
-(p[MEM_LAT-1])st1 [dst]=val[MEM_LAT-1],1
-	nop.f 0
-	br.ctop.dptk.few 1b
-} ;;
-	mov ar.lc=saved_lc
-	mov pr=saved_pr,-1
-	mov ar.pfs=saved_pfs
-	br.ret.sptk.many rp
-
-	/*
-	 * Large (>= 16 bytes) copying is done in a fancy way.  Latency isn't
-	 * an overriding concern here, but throughput is.  We first do
-	 * sub-word copying until the destination is aligned, then we check
-	 * if the source is also aligned.  If so, we do a simple load/store-loop
-	 * until there are less than 8 bytes left over and then we do the tail,
-	 * by storing the last few bytes using sub-word copying.  If the source
-	 * is not aligned, we branch off to the non-congruent loop.
-	 *
-	 *   stage:   op:
-	 *         0  ld
-	 *	   :
-	 * MEM_LAT+3  shrp
-	 * MEM_LAT+4  st
-	 *
-	 * On Itanium, the pipeline itself runs without stalls.  However,  br.ctop
-	 * seems to introduce an unavoidable bubble in the pipeline so the overall
-	 * latency is 2 cycles/iteration.  This gives us a _copy_ throughput
-	 * of 4 byte/cycle.  Still not bad.
-	 */
-#	undef N
-#	undef Nrot
-#	define N	(MEM_LAT + 5)		/* number of stages */
-#	define Nrot	((N+1 + 2 + 7) & ~7)	/* number of rotating regs */
-
-#define LOG_LOOP_SIZE	6
-
-.memcpy_long:
-	alloc t3=ar.pfs,3,Nrot,0,Nrot	// resize register frame
-	and t0=-8,src		// t0 = src & ~7
-	and t2=7,src		// t2 = src & 7
-	;;
-	ld8 t0=[t0]		// t0 = 1st source word
-	adds src2=7,src		// src2 = (src + 7)
-	sub t4=r0,dst		// t4 = -dst
-	;;
-	and src2=-8,src2	// src2 = (src + 7) & ~7
-	shl t2=t2,3		// t2 = 8*(src & 7)
-	shl t4=t4,3		// t4 = 8*(dst & 7)
-	;;
-	ld8 t1=[src2]		// t1 = 1st source word if src is 8-byte aligned, 2nd otherwise
-	sub t3=64,t2		// t3 = 64-8*(src & 7)
-	shr.u t0=t0,t2
-	;;
-	add src_end=src,in2
-	shl t1=t1,t3
-	mov pr=t4,0x38		// (p5,p4,p3)=(dst & 7)
-	;;
-	or t0=t0,t1
-	mov cnt=r0
-	adds src_end=-1,src_end
-	;;
-(p3)	st1 [dst]=t0,1
-(p3)	shr.u t0=t0,8
-(p3)	adds cnt=1,cnt
-	;;
-(p4)	st2 [dst]=t0,2
-(p4)	shr.u t0=t0,16
-(p4)	adds cnt=2,cnt
-	;;
-(p5)	st4 [dst]=t0,4
-(p5)	adds cnt=4,cnt
-	and src_end=-8,src_end	// src_end = last word of source buffer
-	;;
-
-	// At this point, dst is aligned to 8 bytes and there at least 16-7=9 bytes left to copy:
-
-1:{	add src=cnt,src			// make src point to remainder of source buffer
-	sub cnt=in2,cnt			// cnt = number of bytes left to copy
-	mov t4=ip
-  }	;;
-	and src2=-8,src			// align source pointer
-	adds t4=.memcpy_loops-1b,t4
-	mov ar.ec=N
-
-	and t0=7,src			// t0 = src & 7
-	shr.u t2=cnt,3			// t2 = number of 8-byte words left to copy
-	shl cnt=cnt,3			// move bits 0-2 to 3-5
-	;;
-
-	.rotr val[N+1], w[2]
-	.rotp p[N]
-
-	cmp.ne p6,p0=t0,r0		// is src aligned, too?
-	shl t0=t0,LOG_LOOP_SIZE		// t0 = 8*(src & 7)
-	adds t2=-1,t2			// br.ctop is repeat/until
-	;;
-	add t4=t0,t4
-	mov pr=cnt,0x38			// set (p5,p4,p3) to # of bytes last-word bytes to copy
-	mov ar.lc=t2
-	;;
-	nop.m	0
-	;;
-	nop.m	0
-	nop.i	0
-	;;
-	nop.m	0
-	;;
-(p6)	ld8 val[1]=[src2],8		// prime the pump...
-	mov b6=t4
-	br.sptk.few b6
-	;;
-
-.memcpy_tail:
-	// At this point, (p5,p4,p3) are set to the number of bytes left to copy (which is
-	// less than 8) and t0 contains the last few bytes of the src buffer:
-(p5)	st4 [dst]=t0,4
-(p5)	shr.u t0=t0,32
-	mov ar.lc=saved_lc
-	;;
-(p4)	st2 [dst]=t0,2
-(p4)	shr.u t0=t0,16
-	mov ar.pfs=saved_pfs
-	;;
-(p3)	st1 [dst]=t0
-	mov pr=saved_pr,-1
-	br.ret.sptk.many rp
-
-///////////////////////////////////////////////////////
-	.align 64
-
-#define COPY(shift,index)									\
- 1: { .mib											\
-	(p[0])		ld8 val[0]=[src2],8;							\
-	(p[MEM_LAT+3])	shrp w[0]=val[MEM_LAT+3],val[MEM_LAT+4-index],shift;			\
-			brp.loop.imp 1b, 2f							\
-    };												\
- 2: { .mfb											\
-	(p[MEM_LAT+4])	st8 [dst]=w[1],8;							\
-			nop.f 0;								\
-			br.ctop.dptk.few 1b;							\
-    };												\
-			;;									\
-			ld8 val[N-1]=[src_end];	/* load last word (may be same as val[N]) */	\
-			;;									\
-			shrp t0=val[N-1],val[N-index],shift;					\
-			br .memcpy_tail
-.memcpy_loops:
-	COPY(0, 1) /* no point special casing this---it doesn't go any faster without shrp */
-	COPY(8, 0)
-	COPY(16, 0)
-	COPY(24, 0)
-	COPY(32, 0)
-	COPY(40, 0)
-	COPY(48, 0)
-	COPY(56, 0)
-
-END(memcpy)
-EXPORT_SYMBOL(memcpy)
diff --git a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S
deleted file mode 100644
index cc4e6ac914b6c2699749a700b7ff6e97a72ee02a..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/memcpy_mck.S
+++ /dev/null
@@ -1,659 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Itanium 2-optimized version of memcpy and copy_user function
- *
- * Inputs:
- * 	in0:	destination address
- *	in1:	source address
- *	in2:	number of bytes to copy
- * Output:
- *	for memcpy:    return dest
- * 	for copy_user: return 0 if success,
- *		       or number of byte NOT copied if error occurred.
- *
- * Copyright (C) 2002 Intel Corp.
- * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
- */
-#include <asm/asmmacro.h>
-#include <asm/page.h>
-#include <asm/export.h>
-
-#define EK(y...) EX(y)
-
-/* McKinley specific optimization */
-
-#define retval		r8
-#define saved_pfs	r31
-#define saved_lc	r10
-#define saved_pr	r11
-#define saved_in0	r14
-#define saved_in1	r15
-#define saved_in2	r16
-
-#define src0		r2
-#define src1		r3
-#define dst0		r17
-#define dst1		r18
-#define cnt		r9
-
-/* r19-r30 are temp for each code section */
-#define PREFETCH_DIST	8
-#define src_pre_mem	r19
-#define dst_pre_mem	r20
-#define src_pre_l2	r21
-#define dst_pre_l2	r22
-#define t1		r23
-#define t2		r24
-#define t3		r25
-#define t4		r26
-#define t5		t1	// alias!
-#define t6		t2	// alias!
-#define t7		t3	// alias!
-#define n8		r27
-#define t9		t5	// alias!
-#define t10		t4	// alias!
-#define t11		t7	// alias!
-#define t12		t6	// alias!
-#define t14		t10	// alias!
-#define t13		r28
-#define t15		r29
-#define tmp		r30
-
-/* defines for long_copy block */
-#define	A	0
-#define B	(PREFETCH_DIST)
-#define C	(B + PREFETCH_DIST)
-#define D	(C + 1)
-#define N	(D + 1)
-#define Nrot	((N + 7) & ~7)
-
-/* alias */
-#define in0		r32
-#define in1		r33
-#define in2		r34
-
-GLOBAL_ENTRY(memcpy)
-	and	r28=0x7,in0
-	and	r29=0x7,in1
-	mov	f6=f0
-	mov	retval=in0
-	br.cond.sptk .common_code
-	;;
-END(memcpy)
-EXPORT_SYMBOL(memcpy)
-GLOBAL_ENTRY(__copy_user)
-	.prologue
-// check dest alignment
-	and	r28=0x7,in0
-	and	r29=0x7,in1
-	mov	f6=f1
-	mov	saved_in0=in0	// save dest pointer
-	mov	saved_in1=in1	// save src pointer
-	mov	retval=r0	// initialize return value
-	;;
-.common_code:
-	cmp.gt	p15,p0=8,in2	// check for small size
-	cmp.ne	p13,p0=0,r28	// check dest alignment
-	cmp.ne	p14,p0=0,r29	// check src alignment
-	add	src0=0,in1
-	sub	r30=8,r28	// for .align_dest
-	mov	saved_in2=in2	// save len
-	;;
-	add	dst0=0,in0
-	add	dst1=1,in0	// dest odd index
-	cmp.le	p6,p0 = 1,r30	// for .align_dest
-(p15)	br.cond.dpnt .memcpy_short
-(p13)	br.cond.dpnt .align_dest
-(p14)	br.cond.dpnt .unaligned_src
-	;;
-
-// both dest and src are aligned on 8-byte boundary
-.aligned_src:
-	.save ar.pfs, saved_pfs
-	alloc	saved_pfs=ar.pfs,3,Nrot-3,0,Nrot
-	.save pr, saved_pr
-	mov	saved_pr=pr
-
-	shr.u	cnt=in2,7	// this much cache line
-	;;
-	cmp.lt	p6,p0=2*PREFETCH_DIST,cnt
-	cmp.lt	p7,p8=1,cnt
-	.save ar.lc, saved_lc
-	mov	saved_lc=ar.lc
-	.body
-	add	cnt=-1,cnt
-	add	src_pre_mem=0,in1	// prefetch src pointer
-	add	dst_pre_mem=0,in0	// prefetch dest pointer
-	;;
-(p7)	mov	ar.lc=cnt	// prefetch count
-(p8)	mov	ar.lc=r0
-(p6)	br.cond.dpnt .long_copy
-	;;
-
-.prefetch:
-	lfetch.fault	  [src_pre_mem], 128
-	lfetch.fault.excl [dst_pre_mem], 128
-	br.cloop.dptk.few .prefetch
-	;;
-
-.medium_copy:
-	and	tmp=31,in2	// copy length after iteration
-	shr.u	r29=in2,5	// number of 32-byte iteration
-	add	dst1=8,dst0	// 2nd dest pointer
-	;;
-	add	cnt=-1,r29	// ctop iteration adjustment
-	cmp.eq	p10,p0=r29,r0	// do we really need to loop?
-	add	src1=8,src0	// 2nd src pointer
-	cmp.le	p6,p0=8,tmp
-	;;
-	cmp.le	p7,p0=16,tmp
-	mov	ar.lc=cnt	// loop setup
-	cmp.eq	p16,p17 = r0,r0
-	mov	ar.ec=2
-(p10)	br.dpnt.few .aligned_src_tail
-	;;
-	TEXT_ALIGN(32)
-1:
-EX(.ex_handler, (p16)	ld8	r34=[src0],16)
-EK(.ex_handler, (p16)	ld8	r38=[src1],16)
-EX(.ex_handler, (p17)	st8	[dst0]=r33,16)
-EK(.ex_handler, (p17)	st8	[dst1]=r37,16)
-	;;
-EX(.ex_handler, (p16)	ld8	r32=[src0],16)
-EK(.ex_handler, (p16)	ld8	r36=[src1],16)
-EX(.ex_handler, (p16)	st8	[dst0]=r34,16)
-EK(.ex_handler, (p16)	st8	[dst1]=r38,16)
-	br.ctop.dptk.few 1b
-	;;
-
-.aligned_src_tail:
-EX(.ex_handler, (p6)	ld8	t1=[src0])
-	mov	ar.lc=saved_lc
-	mov	ar.pfs=saved_pfs
-EX(.ex_hndlr_s, (p7)	ld8	t2=[src1],8)
-	cmp.le	p8,p0=24,tmp
-	and	r21=-8,tmp
-	;;
-EX(.ex_hndlr_s, (p8)	ld8	t3=[src1])
-EX(.ex_handler, (p6)	st8	[dst0]=t1)	// store byte 1
-	and	in2=7,tmp	// remaining length
-EX(.ex_hndlr_d, (p7)	st8	[dst1]=t2,8)	// store byte 2
-	add	src0=src0,r21	// setting up src pointer
-	add	dst0=dst0,r21	// setting up dest pointer
-	;;
-EX(.ex_handler, (p8)	st8	[dst1]=t3)	// store byte 3
-	mov	pr=saved_pr,-1
-	br.dptk.many .memcpy_short
-	;;
-
-/* code taken from copy_page_mck */
-.long_copy:
-	.rotr v[2*PREFETCH_DIST]
-	.rotp p[N]
-
-	mov src_pre_mem = src0
-	mov pr.rot = 0x10000
-	mov ar.ec = 1				// special unrolled loop
-
-	mov dst_pre_mem = dst0
-
-	add src_pre_l2 = 8*8, src0
-	add dst_pre_l2 = 8*8, dst0
-	;;
-	add src0 = 8, src_pre_mem		// first t1 src
-	mov ar.lc = 2*PREFETCH_DIST - 1
-	shr.u cnt=in2,7				// number of lines
-	add src1 = 3*8, src_pre_mem		// first t3 src
-	add dst0 = 8, dst_pre_mem		// first t1 dst
-	add dst1 = 3*8, dst_pre_mem		// first t3 dst
-	;;
-	and tmp=127,in2				// remaining bytes after this block
-	add cnt = -(2*PREFETCH_DIST) - 1, cnt
-	// same as .line_copy loop, but with all predicated-off instructions removed:
-.prefetch_loop:
-EX(.ex_hndlr_lcpy_1, (p[A])	ld8 v[A] = [src_pre_mem], 128)		// M0
-EK(.ex_hndlr_lcpy_1, (p[B])	st8 [dst_pre_mem] = v[B], 128)		// M2
-	br.ctop.sptk .prefetch_loop
-	;;
-	cmp.eq p16, p0 = r0, r0			// reset p16 to 1
-	mov ar.lc = cnt
-	mov ar.ec = N				// # of stages in pipeline
-	;;
-.line_copy:
-EX(.ex_handler,	(p[D])	ld8 t2 = [src0], 3*8)			// M0
-EK(.ex_handler,	(p[D])	ld8 t4 = [src1], 3*8)			// M1
-EX(.ex_handler_lcpy,	(p[B])	st8 [dst_pre_mem] = v[B], 128)		// M2 prefetch dst from memory
-EK(.ex_handler_lcpy,	(p[D])	st8 [dst_pre_l2] = n8, 128)		// M3 prefetch dst from L2
-	;;
-EX(.ex_handler_lcpy,	(p[A])	ld8 v[A] = [src_pre_mem], 128)		// M0 prefetch src from memory
-EK(.ex_handler_lcpy,	(p[C])	ld8 n8 = [src_pre_l2], 128)		// M1 prefetch src from L2
-EX(.ex_handler,	(p[D])	st8 [dst0] =  t1, 8)			// M2
-EK(.ex_handler,	(p[D])	st8 [dst1] =  t3, 8)			// M3
-	;;
-EX(.ex_handler,	(p[D])	ld8  t5 = [src0], 8)
-EK(.ex_handler,	(p[D])	ld8  t7 = [src1], 3*8)
-EX(.ex_handler,	(p[D])	st8 [dst0] =  t2, 3*8)
-EK(.ex_handler,	(p[D])	st8 [dst1] =  t4, 3*8)
-	;;
-EX(.ex_handler,	(p[D])	ld8  t6 = [src0], 3*8)
-EK(.ex_handler,	(p[D])	ld8 t10 = [src1], 8)
-EX(.ex_handler,	(p[D])	st8 [dst0] =  t5, 8)
-EK(.ex_handler,	(p[D])	st8 [dst1] =  t7, 3*8)
-	;;
-EX(.ex_handler,	(p[D])	ld8  t9 = [src0], 3*8)
-EK(.ex_handler,	(p[D])	ld8 t11 = [src1], 3*8)
-EX(.ex_handler,	(p[D])	st8 [dst0] =  t6, 3*8)
-EK(.ex_handler,	(p[D])	st8 [dst1] = t10, 8)
-	;;
-EX(.ex_handler,	(p[D])	ld8 t12 = [src0], 8)
-EK(.ex_handler,	(p[D])	ld8 t14 = [src1], 8)
-EX(.ex_handler,	(p[D])	st8 [dst0] =  t9, 3*8)
-EK(.ex_handler,	(p[D])	st8 [dst1] = t11, 3*8)
-	;;
-EX(.ex_handler,	(p[D])	ld8 t13 = [src0], 4*8)
-EK(.ex_handler,	(p[D])	ld8 t15 = [src1], 4*8)
-EX(.ex_handler,	(p[D])	st8 [dst0] = t12, 8)
-EK(.ex_handler,	(p[D])	st8 [dst1] = t14, 8)
-	;;
-EX(.ex_handler,	(p[C])	ld8  t1 = [src0], 8)
-EK(.ex_handler,	(p[C])	ld8  t3 = [src1], 8)
-EX(.ex_handler,	(p[D])	st8 [dst0] = t13, 4*8)
-EK(.ex_handler,	(p[D])	st8 [dst1] = t15, 4*8)
-	br.ctop.sptk .line_copy
-	;;
-
-	add dst0=-8,dst0
-	add src0=-8,src0
-	mov in2=tmp
-	.restore sp
-	br.sptk.many .medium_copy
-	;;
-
-#define BLOCK_SIZE	128*32
-#define blocksize	r23
-#define curlen		r24
-
-// dest is on 8-byte boundary, src is not. We need to do
-// ld8-ld8, shrp, then st8.  Max 8 byte copy per cycle.
-.unaligned_src:
-	.prologue
-	.save ar.pfs, saved_pfs
-	alloc	saved_pfs=ar.pfs,3,5,0,8
-	.save ar.lc, saved_lc
-	mov	saved_lc=ar.lc
-	.save pr, saved_pr
-	mov	saved_pr=pr
-	.body
-.4k_block:
-	mov	saved_in0=dst0	// need to save all input arguments
-	mov	saved_in2=in2
-	mov	blocksize=BLOCK_SIZE
-	;;
-	cmp.lt	p6,p7=blocksize,in2
-	mov	saved_in1=src0
-	;;
-(p6)	mov	in2=blocksize
-	;;
-	shr.u	r21=in2,7	// this much cache line
-	shr.u	r22=in2,4	// number of 16-byte iteration
-	and	curlen=15,in2	// copy length after iteration
-	and	r30=7,src0	// source alignment
-	;;
-	cmp.lt	p7,p8=1,r21
-	add	cnt=-1,r21
-	;;
-
-	add	src_pre_mem=0,src0	// prefetch src pointer
-	add	dst_pre_mem=0,dst0	// prefetch dest pointer
-	and	src0=-8,src0		// 1st src pointer
-(p7)	mov	ar.lc = cnt
-(p8)	mov	ar.lc = r0
-	;;
-	TEXT_ALIGN(32)
-1:	lfetch.fault	  [src_pre_mem], 128
-	lfetch.fault.excl [dst_pre_mem], 128
-	br.cloop.dptk.few 1b
-	;;
-
-	shladd	dst1=r22,3,dst0	// 2nd dest pointer
-	shladd	src1=r22,3,src0	// 2nd src pointer
-	cmp.eq	p8,p9=r22,r0	// do we really need to loop?
-	cmp.le	p6,p7=8,curlen;	// have at least 8 byte remaining?
-	add	cnt=-1,r22	// ctop iteration adjustment
-	;;
-EX(.ex_handler, (p9)	ld8	r33=[src0],8)	// loop primer
-EK(.ex_handler, (p9)	ld8	r37=[src1],8)
-(p8)	br.dpnt.few .noloop
-	;;
-
-// The jump address is calculated based on src alignment. The COPYU
-// macro below need to confine its size to power of two, so an entry
-// can be caulated using shl instead of an expensive multiply. The
-// size is then hard coded by the following #define to match the
-// actual size.  This make it somewhat tedious when COPYU macro gets
-// changed and this need to be adjusted to match.
-#define LOOP_SIZE 6
-1:
-	mov	r29=ip		// jmp_table thread
-	mov	ar.lc=cnt
-	;;
-	add	r29=.jump_table - 1b - (.jmp1-.jump_table), r29
-	shl	r28=r30, LOOP_SIZE	// jmp_table thread
-	mov	ar.ec=2		// loop setup
-	;;
-	add	r29=r29,r28		// jmp_table thread
-	cmp.eq	p16,p17=r0,r0
-	;;
-	mov	b6=r29			// jmp_table thread
-	;;
-	br.cond.sptk.few b6
-
-// for 8-15 byte case
-// We will skip the loop, but need to replicate the side effect
-// that the loop produces.
-.noloop:
-EX(.ex_handler, (p6)	ld8	r37=[src1],8)
-	add	src0=8,src0
-(p6)	shl	r25=r30,3
-	;;
-EX(.ex_handler, (p6)	ld8	r27=[src1])
-(p6)	shr.u	r28=r37,r25
-(p6)	sub	r26=64,r25
-	;;
-(p6)	shl	r27=r27,r26
-	;;
-(p6)	or	r21=r28,r27
-
-.unaligned_src_tail:
-/* check if we have more than blocksize to copy, if so go back */
-	cmp.gt	p8,p0=saved_in2,blocksize
-	;;
-(p8)	add	dst0=saved_in0,blocksize
-(p8)	add	src0=saved_in1,blocksize
-(p8)	sub	in2=saved_in2,blocksize
-(p8)	br.dpnt	.4k_block
-	;;
-
-/* we have up to 15 byte to copy in the tail.
- * part of work is already done in the jump table code
- * we are at the following state.
- * src side:
- * 
- *   xxxxxx xx                   <----- r21 has xxxxxxxx already
- * -------- -------- --------
- * 0        8        16
- *          ^
- *          |
- *          src1
- * 
- * dst
- * -------- -------- --------
- * ^
- * |
- * dst1
- */
-EX(.ex_handler, (p6)	st8	[dst1]=r21,8)	// more than 8 byte to copy
-(p6)	add	curlen=-8,curlen	// update length
-	mov	ar.pfs=saved_pfs
-	;;
-	mov	ar.lc=saved_lc
-	mov	pr=saved_pr,-1
-	mov	in2=curlen	// remaining length
-	mov	dst0=dst1	// dest pointer
-	add	src0=src1,r30	// forward by src alignment
-	;;
-
-// 7 byte or smaller.
-.memcpy_short:
-	cmp.le	p8,p9   = 1,in2
-	cmp.le	p10,p11 = 2,in2
-	cmp.le	p12,p13 = 3,in2
-	cmp.le	p14,p15 = 4,in2
-	add	src1=1,src0	// second src pointer
-	add	dst1=1,dst0	// second dest pointer
-	;;
-
-EX(.ex_handler_short, (p8)	ld1	t1=[src0],2)
-EK(.ex_handler_short, (p10)	ld1	t2=[src1],2)
-(p9)	br.ret.dpnt rp		// 0 byte copy
-	;;
-
-EX(.ex_handler_short, (p8)	st1	[dst0]=t1,2)
-EK(.ex_handler_short, (p10)	st1	[dst1]=t2,2)
-(p11)	br.ret.dpnt rp		// 1 byte copy
-
-EX(.ex_handler_short, (p12)	ld1	t3=[src0],2)
-EK(.ex_handler_short, (p14)	ld1	t4=[src1],2)
-(p13)	br.ret.dpnt rp		// 2 byte copy
-	;;
-
-	cmp.le	p6,p7   = 5,in2
-	cmp.le	p8,p9   = 6,in2
-	cmp.le	p10,p11 = 7,in2
-
-EX(.ex_handler_short, (p12)	st1	[dst0]=t3,2)
-EK(.ex_handler_short, (p14)	st1	[dst1]=t4,2)
-(p15)	br.ret.dpnt rp		// 3 byte copy
-	;;
-
-EX(.ex_handler_short, (p6)	ld1	t5=[src0],2)
-EK(.ex_handler_short, (p8)	ld1	t6=[src1],2)
-(p7)	br.ret.dpnt rp		// 4 byte copy
-	;;
-
-EX(.ex_handler_short, (p6)	st1	[dst0]=t5,2)
-EK(.ex_handler_short, (p8)	st1	[dst1]=t6,2)
-(p9)	br.ret.dptk rp		// 5 byte copy
-
-EX(.ex_handler_short, (p10)	ld1	t7=[src0],2)
-(p11)	br.ret.dptk rp		// 6 byte copy
-	;;
-
-EX(.ex_handler_short, (p10)	st1	[dst0]=t7,2)
-	br.ret.dptk rp		// done all cases
-
-
-/* Align dest to nearest 8-byte boundary. We know we have at
- * least 7 bytes to copy, enough to crawl to 8-byte boundary.
- * Actual number of byte to crawl depend on the dest alignment.
- * 7 byte or less is taken care at .memcpy_short
-
- * src0 - source even index
- * src1 - source  odd index
- * dst0 - dest even index
- * dst1 - dest  odd index
- * r30  - distance to 8-byte boundary
- */
-
-.align_dest:
-	add	src1=1,in1	// source odd index
-	cmp.le	p7,p0 = 2,r30	// for .align_dest
-	cmp.le	p8,p0 = 3,r30	// for .align_dest
-EX(.ex_handler_short, (p6)	ld1	t1=[src0],2)
-	cmp.le	p9,p0 = 4,r30	// for .align_dest
-	cmp.le	p10,p0 = 5,r30
-	;;
-EX(.ex_handler_short, (p7)	ld1	t2=[src1],2)
-EK(.ex_handler_short, (p8)	ld1	t3=[src0],2)
-	cmp.le	p11,p0 = 6,r30
-EX(.ex_handler_short, (p6)	st1	[dst0] = t1,2)
-	cmp.le	p12,p0 = 7,r30
-	;;
-EX(.ex_handler_short, (p9)	ld1	t4=[src1],2)
-EK(.ex_handler_short, (p10)	ld1	t5=[src0],2)
-EX(.ex_handler_short, (p7)	st1	[dst1] = t2,2)
-EK(.ex_handler_short, (p8)	st1	[dst0] = t3,2)
-	;;
-EX(.ex_handler_short, (p11)	ld1	t6=[src1],2)
-EK(.ex_handler_short, (p12)	ld1	t7=[src0],2)
-	cmp.eq	p6,p7=r28,r29
-EX(.ex_handler_short, (p9)	st1	[dst1] = t4,2)
-EK(.ex_handler_short, (p10)	st1	[dst0] = t5,2)
-	sub	in2=in2,r30
-	;;
-EX(.ex_handler_short, (p11)	st1	[dst1] = t6,2)
-EK(.ex_handler_short, (p12)	st1	[dst0] = t7)
-	add	dst0=in0,r30	// setup arguments
-	add	src0=in1,r30
-(p6)	br.cond.dptk .aligned_src
-(p7)	br.cond.dpnt .unaligned_src
-	;;
-
-/* main loop body in jump table format */
-#define COPYU(shift)									\
-1:											\
-EX(.ex_handler,  (p16)	ld8	r32=[src0],8);		/* 1 */				\
-EK(.ex_handler,  (p16)	ld8	r36=[src1],8);						\
-		 (p17)	shrp	r35=r33,r34,shift;;	/* 1 */				\
-EX(.ex_handler,  (p6)	ld8	r22=[src1]);	/* common, prime for tail section */	\
-		 nop.m	0;								\
-		 (p16)	shrp	r38=r36,r37,shift;					\
-EX(.ex_handler,  (p17)	st8	[dst0]=r35,8);		/* 1 */				\
-EK(.ex_handler,  (p17)	st8	[dst1]=r39,8);						\
-		 br.ctop.dptk.few 1b;;							\
-		 (p7)	add	src1=-8,src1;	/* back out for <8 byte case */		\
-		 shrp	r21=r22,r38,shift;	/* speculative work */			\
-		 br.sptk.few .unaligned_src_tail /* branch out of jump table */		\
-		 ;;
-	TEXT_ALIGN(32)
-.jump_table:
-	COPYU(8)	// unaligned cases
-.jmp1:
-	COPYU(16)
-	COPYU(24)
-	COPYU(32)
-	COPYU(40)
-	COPYU(48)
-	COPYU(56)
-
-#undef A
-#undef B
-#undef C
-#undef D
-
-/*
- * Due to lack of local tag support in gcc 2.x assembler, it is not clear which
- * instruction failed in the bundle.  The exception algorithm is that we
- * first figure out the faulting address, then detect if there is any
- * progress made on the copy, if so, redo the copy from last known copied
- * location up to the faulting address (exclusive). In the copy_from_user
- * case, remaining byte in kernel buffer will be zeroed.
- *
- * Take copy_from_user as an example, in the code there are multiple loads
- * in a bundle and those multiple loads could span over two pages, the
- * faulting address is calculated as page_round_down(max(src0, src1)).
- * This is based on knowledge that if we can access one byte in a page, we
- * can access any byte in that page.
- *
- * predicate used in the exception handler:
- * p6-p7: direction
- * p10-p11: src faulting addr calculation
- * p12-p13: dst faulting addr calculation
- */
-
-#define A	r19
-#define B	r20
-#define C	r21
-#define D	r22
-#define F	r28
-
-#define saved_retval	loc0
-#define saved_rtlink	loc1
-#define saved_pfs_stack	loc2
-
-.ex_hndlr_s:
-	add	src0=8,src0
-	br.sptk .ex_handler
-	;;
-.ex_hndlr_d:
-	add	dst0=8,dst0
-	br.sptk .ex_handler
-	;;
-.ex_hndlr_lcpy_1:
-	mov	src1=src_pre_mem
-	mov	dst1=dst_pre_mem
-	cmp.gtu	p10,p11=src_pre_mem,saved_in1
-	cmp.gtu	p12,p13=dst_pre_mem,saved_in0
-	;;
-(p10)	add	src0=8,saved_in1
-(p11)	mov	src0=saved_in1
-(p12)	add	dst0=8,saved_in0
-(p13)	mov	dst0=saved_in0
-	br.sptk	.ex_handler
-.ex_handler_lcpy:
-	// in line_copy block, the preload addresses should always ahead
-	// of the other two src/dst pointers.  Furthermore, src1/dst1 should
-	// always ahead of src0/dst0.
-	mov	src1=src_pre_mem
-	mov	dst1=dst_pre_mem
-.ex_handler:
-	mov	pr=saved_pr,-1		// first restore pr, lc, and pfs
-	mov	ar.lc=saved_lc
-	mov	ar.pfs=saved_pfs
-	;;
-.ex_handler_short: // fault occurred in these sections didn't change pr, lc, pfs
-	cmp.ltu	p6,p7=saved_in0, saved_in1	// get the copy direction
-	cmp.ltu	p10,p11=src0,src1
-	cmp.ltu	p12,p13=dst0,dst1
-	fcmp.eq	p8,p0=f6,f0		// is it memcpy?
-	mov	tmp = dst0
-	;;
-(p11)	mov	src1 = src0		// pick the larger of the two
-(p13)	mov	dst0 = dst1		// make dst0 the smaller one
-(p13)	mov	dst1 = tmp		// and dst1 the larger one
-	;;
-(p6)	dep	F = r0,dst1,0,PAGE_SHIFT // usr dst round down to page boundary
-(p7)	dep	F = r0,src1,0,PAGE_SHIFT // usr src round down to page boundary
-	;;
-(p6)	cmp.le	p14,p0=dst0,saved_in0	// no progress has been made on store
-(p7)	cmp.le	p14,p0=src0,saved_in1	// no progress has been made on load
-	mov	retval=saved_in2
-(p8)	ld1	tmp=[src1]		// force an oops for memcpy call
-(p8)	st1	[dst1]=r0		// force an oops for memcpy call
-(p14)	br.ret.sptk.many rp
-
-/*
- * The remaining byte to copy is calculated as:
- *
- * A =	(faulting_addr - orig_src)	-> len to faulting ld address
- *	or 
- * 	(faulting_addr - orig_dst)	-> len to faulting st address
- * B =	(cur_dst - orig_dst)		-> len copied so far
- * C =	A - B				-> len need to be copied
- * D =	orig_len - A			-> len need to be left along
- */
-(p6)	sub	A = F, saved_in0
-(p7)	sub	A = F, saved_in1
-	clrrrb
-	;;
-	alloc	saved_pfs_stack=ar.pfs,3,3,3,0
-	cmp.lt	p8,p0=A,r0
-	sub	B = dst0, saved_in0	// how many byte copied so far
-	;;
-(p8)	mov	A = 0;			// A shouldn't be negative, cap it
-	;;
-	sub	C = A, B
-	sub	D = saved_in2, A
-	;;
-	cmp.gt	p8,p0=C,r0		// more than 1 byte?
-	mov	r8=0
-	mov	saved_retval = D
-	mov	saved_rtlink = b0
-
-	add	out0=saved_in0, B
-	add	out1=saved_in1, B
-	mov	out2=C
-(p8)	br.call.sptk.few b0=__copy_user	// recursive call
-	;;
-
-	add	saved_retval=saved_retval,r8	// above might return non-zero value
-	;;
-
-	mov	retval=saved_retval
-	mov	ar.pfs=saved_pfs_stack
-	mov	b0=saved_rtlink
-	br.ret.sptk.many rp
-
-/* end of McKinley specific optimization */
-END(__copy_user)
-EXPORT_SYMBOL(__copy_user)
diff --git a/arch/ia64/lib/memset.S b/arch/ia64/lib/memset.S
deleted file mode 100644
index 07a8b92c64965e2578e3607532325c170ae220c7..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/memset.S
+++ /dev/null
@@ -1,365 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Optimized version of the standard memset() function.
-
-   Copyright (c) 2002 Hewlett-Packard Co/CERN
-	Sverre Jarp <Sverre.Jarp@cern.ch>
-
-   Return: dest
-
-   Inputs:
-        in0:    dest
-        in1:    value
-        in2:    count
-
-   The algorithm is fairly straightforward: set byte by byte until we
-   we get to a 16B-aligned address, then loop on 128 B chunks using an
-   early store as prefetching, then loop on 32B chucks, then clear remaining
-   words, finally clear remaining bytes.
-   Since a stf.spill f0 can store 16B in one go, we use this instruction
-   to get peak speed when value = 0.  */
-
-#include <asm/asmmacro.h>
-#include <asm/export.h>
-#undef ret
-
-#define dest		in0
-#define value		in1
-#define	cnt		in2
-
-#define tmp		r31
-#define save_lc		r30
-#define ptr0		r29
-#define ptr1		r28
-#define ptr2		r27
-#define ptr3		r26
-#define ptr9 		r24
-#define	loopcnt		r23
-#define linecnt		r22
-#define bytecnt		r21
-
-#define fvalue		f6
-
-// This routine uses only scratch predicate registers (p6 - p15)
-#define p_scr		p6			// default register for same-cycle branches
-#define p_nz		p7
-#define p_zr		p8
-#define p_unalgn	p9
-#define p_y		p11
-#define p_n		p12
-#define p_yy		p13
-#define p_nn		p14
-
-#define MIN1		15
-#define MIN1P1HALF	8
-#define LINE_SIZE	128
-#define LSIZE_SH        7			// shift amount
-#define PREF_AHEAD	8
-
-GLOBAL_ENTRY(memset)
-{ .mmi
-	.prologue
-	alloc	tmp = ar.pfs, 3, 0, 0, 0
-	lfetch.nt1 [dest]			//
-	.save   ar.lc, save_lc
-	mov.i	save_lc = ar.lc
-	.body
-} { .mmi
-	mov	ret0 = dest			// return value
-	cmp.ne	p_nz, p_zr = value, r0		// use stf.spill if value is zero
-	cmp.eq	p_scr, p0 = cnt, r0
-;; }
-{ .mmi
-	and	ptr2 = -(MIN1+1), dest		// aligned address
-	and	tmp = MIN1, dest		// prepare to check for correct alignment
-	tbit.nz p_y, p_n = dest, 0		// Do we have an odd address? (M_B_U)
-} { .mib
-	mov	ptr1 = dest
-	mux1	value = value, @brcst		// create 8 identical bytes in word
-(p_scr)	br.ret.dpnt.many rp			// return immediately if count = 0
-;; }
-{ .mib
-	cmp.ne	p_unalgn, p0 = tmp, r0		//
-} { .mib
-	sub	bytecnt = (MIN1+1), tmp		// NB: # of bytes to move is 1 higher than loopcnt
-	cmp.gt	p_scr, p0 = 16, cnt		// is it a minimalistic task?
-(p_scr)	br.cond.dptk.many .move_bytes_unaligned	// go move just a few (M_B_U)
-;; }
-{ .mmi
-(p_unalgn) add	ptr1 = (MIN1+1), ptr2		// after alignment
-(p_unalgn) add	ptr2 = MIN1P1HALF, ptr2		// after alignment
-(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3	// should we do a st8 ?
-;; }
-{ .mib
-(p_y)	add	cnt = -8, cnt			//
-(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2	// should we do a st4 ?
-} { .mib
-(p_y)	st8	[ptr2] = value,-4		//
-(p_n)	add	ptr2 = 4, ptr2			//
-;; }
-{ .mib
-(p_yy)	add	cnt = -4, cnt			//
-(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1	// should we do a st2 ?
-} { .mib
-(p_yy)	st4	[ptr2] = value,-2		//
-(p_nn)	add	ptr2 = 2, ptr2			//
-;; }
-{ .mmi
-	mov	tmp = LINE_SIZE+1		// for compare
-(p_y)	add	cnt = -2, cnt			//
-(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0	// should we do a st1 ?
-} { .mmi
-	setf.sig fvalue=value			// transfer value to FLP side
-(p_y)	st2	[ptr2] = value,-1		//
-(p_n)	add	ptr2 = 1, ptr2			//
-;; }
-
-{ .mmi
-(p_yy)	st1	[ptr2] = value 			//
-  	cmp.gt	p_scr, p0 = tmp, cnt		// is it a minimalistic task?
-} { .mbb
-(p_yy)	add	cnt = -1, cnt			//
-(p_scr)	br.cond.dpnt.many .fraction_of_line	// go move just a few
-;; }
-
-{ .mib
-	nop.m 0
-	shr.u	linecnt = cnt, LSIZE_SH
-(p_zr)	br.cond.dptk.many .l1b			// Jump to use stf.spill
-;; }
-
-	TEXT_ALIGN(32) // --------------------- //  L1A: store ahead into cache lines; fill later
-{ .mmi
-	and	tmp = -(LINE_SIZE), cnt		// compute end of range
-	mov	ptr9 = ptr1			// used for prefetching
-	and	cnt = (LINE_SIZE-1), cnt	// remainder
-} { .mmi
-	mov	loopcnt = PREF_AHEAD-1		// default prefetch loop
-	cmp.gt	p_scr, p0 = PREF_AHEAD, linecnt	// check against actual value
-;; }
-{ .mmi
-(p_scr)	add	loopcnt = -1, linecnt		//
-	add	ptr2 = 8, ptr1			// start of stores (beyond prefetch stores)
-	add	ptr1 = tmp, ptr1		// first address beyond total range
-;; }
-{ .mmi
-	add	tmp = -1, linecnt		// next loop count
-	mov.i	ar.lc = loopcnt			//
-;; }
-.pref_l1a:
-{ .mib
-	stf8 [ptr9] = fvalue, 128		// Do stores one cache line apart
-	nop.i	0
-	br.cloop.dptk.few .pref_l1a
-;; }
-{ .mmi
-	add	ptr0 = 16, ptr2			// Two stores in parallel
-	mov.i	ar.lc = tmp			//
-;; }
-.l1ax:
- { .mmi
-	stf8 [ptr2] = fvalue, 8
-	stf8 [ptr0] = fvalue, 8
- ;; }
- { .mmi
-	stf8 [ptr2] = fvalue, 24
-	stf8 [ptr0] = fvalue, 24
- ;; }
- { .mmi
-	stf8 [ptr2] = fvalue, 8
-	stf8 [ptr0] = fvalue, 8
- ;; }
- { .mmi
-	stf8 [ptr2] = fvalue, 24
-	stf8 [ptr0] = fvalue, 24
- ;; }
- { .mmi
-	stf8 [ptr2] = fvalue, 8
-	stf8 [ptr0] = fvalue, 8
- ;; }
- { .mmi
-	stf8 [ptr2] = fvalue, 24
-	stf8 [ptr0] = fvalue, 24
- ;; }
- { .mmi
-	stf8 [ptr2] = fvalue, 8
-	stf8 [ptr0] = fvalue, 32
- 	cmp.lt	p_scr, p0 = ptr9, ptr1		// do we need more prefetching?
- ;; }
-{ .mmb
-	stf8 [ptr2] = fvalue, 24
-(p_scr)	stf8 [ptr9] = fvalue, 128
-	br.cloop.dptk.few .l1ax
-;; }
-{ .mbb
-	cmp.le  p_scr, p0 = 8, cnt		// just a few bytes left ?
-(p_scr) br.cond.dpnt.many  .fraction_of_line	// Branch no. 2
-	br.cond.dpnt.many  .move_bytes_from_alignment	// Branch no. 3
-;; }
-
-	TEXT_ALIGN(32)
-.l1b:	// ------------------------------------ //  L1B: store ahead into cache lines; fill later
-{ .mmi
-	and	tmp = -(LINE_SIZE), cnt		// compute end of range
-	mov	ptr9 = ptr1			// used for prefetching
-	and	cnt = (LINE_SIZE-1), cnt	// remainder
-} { .mmi
-	mov	loopcnt = PREF_AHEAD-1		// default prefetch loop
-	cmp.gt	p_scr, p0 = PREF_AHEAD, linecnt	// check against actual value
-;; }
-{ .mmi
-(p_scr)	add	loopcnt = -1, linecnt
-	add	ptr2 = 16, ptr1			// start of stores (beyond prefetch stores)
-	add	ptr1 = tmp, ptr1		// first address beyond total range
-;; }
-{ .mmi
-	add	tmp = -1, linecnt		// next loop count
-	mov.i	ar.lc = loopcnt
-;; }
-.pref_l1b:
-{ .mib
-	stf.spill [ptr9] = f0, 128		// Do stores one cache line apart
-	nop.i   0
-	br.cloop.dptk.few .pref_l1b
-;; }
-{ .mmi
-	add	ptr0 = 16, ptr2			// Two stores in parallel
-	mov.i	ar.lc = tmp
-;; }
-.l1bx:
- { .mmi
-	stf.spill [ptr2] = f0, 32
-	stf.spill [ptr0] = f0, 32
- ;; }
- { .mmi
-	stf.spill [ptr2] = f0, 32
-	stf.spill [ptr0] = f0, 32
- ;; }
- { .mmi
-	stf.spill [ptr2] = f0, 32
-	stf.spill [ptr0] = f0, 64
- 	cmp.lt	p_scr, p0 = ptr9, ptr1		// do we need more prefetching?
- ;; }
-{ .mmb
-	stf.spill [ptr2] = f0, 32
-(p_scr)	stf.spill [ptr9] = f0, 128
-	br.cloop.dptk.few .l1bx
-;; }
-{ .mib
-	cmp.gt  p_scr, p0 = 8, cnt		// just a few bytes left ?
-(p_scr)	br.cond.dpnt.many  .move_bytes_from_alignment	//
-;; }
-
-.fraction_of_line:
-{ .mib
-	add	ptr2 = 16, ptr1
-	shr.u	loopcnt = cnt, 5   		// loopcnt = cnt / 32
-;; }
-{ .mib
-	cmp.eq	p_scr, p0 = loopcnt, r0
-	add	loopcnt = -1, loopcnt
-(p_scr)	br.cond.dpnt.many .store_words
-;; }
-{ .mib
-	and	cnt = 0x1f, cnt			// compute the remaining cnt
-	mov.i   ar.lc = loopcnt
-;; }
-	TEXT_ALIGN(32)
-.l2:	// ------------------------------------ //  L2A:  store 32B in 2 cycles
-{ .mmb
-	stf8	[ptr1] = fvalue, 8
-	stf8	[ptr2] = fvalue, 8
-;; } { .mmb
-	stf8	[ptr1] = fvalue, 24
-	stf8	[ptr2] = fvalue, 24
-	br.cloop.dptk.many .l2
-;; }
-.store_words:
-{ .mib
-	cmp.gt	p_scr, p0 = 8, cnt		// just a few bytes left ?
-(p_scr)	br.cond.dpnt.many .move_bytes_from_alignment	// Branch
-;; }
-
-{ .mmi
-	stf8	[ptr1] = fvalue, 8		// store
-	cmp.le	p_y, p_n = 16, cnt
-	add	cnt = -8, cnt			// subtract
-;; }
-{ .mmi
-(p_y)	stf8	[ptr1] = fvalue, 8		// store
-(p_y)	cmp.le.unc p_yy, p_nn = 16, cnt
-(p_y)	add	cnt = -8, cnt			// subtract
-;; }
-{ .mmi						// store
-(p_yy)	stf8	[ptr1] = fvalue, 8
-(p_yy)	add	cnt = -8, cnt			// subtract
-;; }
-
-.move_bytes_from_alignment:
-{ .mib
-	cmp.eq	p_scr, p0 = cnt, r0
-	tbit.nz.unc p_y, p0 = cnt, 2		// should we terminate with a st4 ?
-(p_scr)	br.cond.dpnt.few .restore_and_exit
-;; }
-{ .mib
-(p_y)	st4	[ptr1] = value,4
-	tbit.nz.unc p_yy, p0 = cnt, 1		// should we terminate with a st2 ?
-;; }
-{ .mib
-(p_yy)	st2	[ptr1] = value,2
-	tbit.nz.unc p_y, p0 = cnt, 0		// should we terminate with a st1 ?
-;; }
-
-{ .mib
-(p_y)	st1	[ptr1] = value
-;; }
-.restore_and_exit:
-{ .mib
-	nop.m	0
-	mov.i	ar.lc = save_lc
-	br.ret.sptk.many rp
-;; }
-
-.move_bytes_unaligned:
-{ .mmi
-       .pred.rel "mutex",p_y, p_n
-       .pred.rel "mutex",p_yy, p_nn
-(p_n)	cmp.le  p_yy, p_nn = 4, cnt
-(p_y)	cmp.le  p_yy, p_nn = 5, cnt
-(p_n)	add	ptr2 = 2, ptr1
-} { .mmi
-(p_y)	add	ptr2 = 3, ptr1
-(p_y)	st1	[ptr1] = value, 1		// fill 1 (odd-aligned) byte [15, 14 (or less) left]
-(p_y)	add	cnt = -1, cnt
-;; }
-{ .mmi
-(p_yy)	cmp.le.unc p_y, p0 = 8, cnt
-	add	ptr3 = ptr1, cnt		// prepare last store
-	mov.i	ar.lc = save_lc
-} { .mmi
-(p_yy)	st2	[ptr1] = value, 4		// fill 2 (aligned) bytes
-(p_yy)	st2	[ptr2] = value, 4		// fill 2 (aligned) bytes [11, 10 (o less) left]
-(p_yy)	add	cnt = -4, cnt
-;; }
-{ .mmi
-(p_y)	cmp.le.unc p_yy, p0 = 8, cnt
-	add	ptr3 = -1, ptr3			// last store
-	tbit.nz p_scr, p0 = cnt, 1		// will there be a st2 at the end ?
-} { .mmi
-(p_y)	st2	[ptr1] = value, 4		// fill 2 (aligned) bytes
-(p_y)	st2	[ptr2] = value, 4		// fill 2 (aligned) bytes [7, 6 (or less) left]
-(p_y)	add	cnt = -4, cnt
-;; }
-{ .mmi
-(p_yy)	st2	[ptr1] = value, 4		// fill 2 (aligned) bytes
-(p_yy)	st2	[ptr2] = value, 4		// fill 2 (aligned) bytes [3, 2 (or less) left]
-	tbit.nz p_y, p0 = cnt, 0		// will there be a st1 at the end ?
-} { .mmi
-(p_yy)	add	cnt = -4, cnt
-;; }
-{ .mmb
-(p_scr)	st2	[ptr1] = value			// fill 2 (aligned) bytes
-(p_y)	st1	[ptr3] = value			// fill last byte (using ptr3)
-	br.ret.sptk.many rp
-}
-END(memset)
-EXPORT_SYMBOL(memset)
diff --git a/arch/ia64/lib/strlen.S b/arch/ia64/lib/strlen.S
deleted file mode 100644
index d66de596697441c8cab0c73589a017ca0fc6c466..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/strlen.S
+++ /dev/null
@@ -1,195 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- * Optimized version of the standard strlen() function
- *
- *
- * Inputs:
- *	in0	address of string
- *
- * Outputs:
- *	ret0	the number of characters in the string (0 if empty string)
- *	does not count the \0
- *
- * Copyright (C) 1999, 2001 Hewlett-Packard Co
- *	Stephane Eranian <eranian@hpl.hp.com>
- *
- * 09/24/99 S.Eranian add speculation recovery code
- */
-
-#include <asm/asmmacro.h>
-#include <asm/export.h>
-
-//
-//
-// This is an enhanced version of the basic strlen. it includes a combination
-// of compute zero index (czx), parallel comparisons, speculative loads and
-// loop unroll using rotating registers.
-//
-// General Ideas about the algorithm:
-//	  The goal is to look at the string in chunks of 8 bytes.
-//	  so we need to do a few extra checks at the beginning because the
-//	  string may not be 8-byte aligned. In this case we load the 8byte
-//	  quantity which includes the start of the string and mask the unused
-//	  bytes with 0xff to avoid confusing czx.
-//	  We use speculative loads and software pipelining to hide memory
-//	  latency and do read ahead safely. This way we defer any exception.
-//
-//	  Because we don't want the kernel to be relying on particular
-//	  settings of the DCR register, we provide recovery code in case
-//	  speculation fails. The recovery code is going to "redo" the work using
-//	  only normal loads. If we still get a fault then we generate a
-//	  kernel panic. Otherwise we return the strlen as usual.
-//
-//	  The fact that speculation may fail can be caused, for instance, by
-//	  the DCR.dm bit being set. In this case TLB misses are deferred, i.e.,
-//	  a NaT bit will be set if the translation is not present. The normal
-//	  load, on the other hand, will cause the translation to be inserted
-//	  if the mapping exists.
-//
-//	  It should be noted that we execute recovery code only when we need
-//	  to use the data that has been speculatively loaded: we don't execute
-//	  recovery code on pure read ahead data.
-//
-// Remarks:
-//	- the cmp r0,r0 is used as a fast way to initialize a predicate
-//	  register to 1. This is required to make sure that we get the parallel
-//	  compare correct.
-//
-//	- we don't use the epilogue counter to exit the loop but we need to set
-//	  it to zero beforehand.
-//
-//	- after the loop we must test for Nat values because neither the
-//	  czx nor cmp instruction raise a NaT consumption fault. We must be
-//	  careful not to look too far for a Nat for which we don't care.
-//	  For instance we don't need to look at a NaT in val2 if the zero byte
-//	  was in val1.
-//
-//	- Clearly performance tuning is required.
-//
-//
-//
-#define saved_pfs	r11
-#define	tmp		r10
-#define base		r16
-#define orig		r17
-#define saved_pr	r18
-#define src		r19
-#define mask		r20
-#define val		r21
-#define val1		r22
-#define val2		r23
-
-GLOBAL_ENTRY(strlen)
-	.prologue
-	.save ar.pfs, saved_pfs
-	alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8
-
-	.rotr v[2], w[2]	// declares our 4 aliases
-
-	extr.u tmp=in0,0,3	// tmp=least significant 3 bits
-	mov orig=in0		// keep trackof initial byte address
-	dep src=0,in0,0,3	// src=8byte-aligned in0 address
-	.save pr, saved_pr
-	mov saved_pr=pr		// preserve predicates (rotation)
-	;;
-
-	.body
-
-	ld8 v[1]=[src],8	// must not speculate: can fail here
-	shl tmp=tmp,3		// multiply by 8bits/byte
-	mov mask=-1		// our mask
-	;;
-	ld8.s w[1]=[src],8	// speculatively load next
-	cmp.eq p6,p0=r0,r0	// sets p6 to true for cmp.and
-	sub tmp=64,tmp		// how many bits to shift our mask on the right
-	;;
-	shr.u	mask=mask,tmp	// zero enough bits to hold v[1] valuable part
-	mov ar.ec=r0		// clear epilogue counter (saved in ar.pfs)
-	;;
-	add base=-16,src	// keep track of aligned base
-	or v[1]=v[1],mask	// now we have a safe initial byte pattern
-	;;
-1:
-	ld8.s v[0]=[src],8	// speculatively load next
-	czx1.r val1=v[1]	// search 0 byte from right
-	czx1.r val2=w[1]	// search 0 byte from right following 8bytes
-	;;
-	ld8.s w[0]=[src],8	// speculatively load next to next
-	cmp.eq.and p6,p0=8,val1	// p6 = p6 and val1==8
-	cmp.eq.and p6,p0=8,val2	// p6 = p6 and mask==8
-(p6)	br.wtop.dptk 1b		// loop until p6 == 0
-	;;
-	//
-	// We must return try the recovery code iff
-	// val1_is_nat || (val1==8 && val2_is_nat)
-	//
-	// XXX Fixme
-	//	- there must be a better way of doing the test
-	//
-	cmp.eq  p8,p9=8,val1	// p6 = val1 had zero (disambiguate)
-	tnat.nz p6,p7=val1	// test NaT on val1
-(p6)	br.cond.spnt .recover	// jump to recovery if val1 is NaT
-	;;
-	//
-	// if we come here p7 is true, i.e., initialized for // cmp
-	//
-	cmp.eq.and  p7,p0=8,val1// val1==8?
-	tnat.nz.and p7,p0=val2	// test NaT if val2
-(p7)	br.cond.spnt .recover	// jump to recovery if val2 is NaT
-	;;
-(p8)	mov val1=val2		// the other test got us out of the loop
-(p8)	adds src=-16,src	// correct position when 3 ahead
-(p9)	adds src=-24,src	// correct position when 4 ahead
-	;;
-	sub ret0=src,orig	// distance from base
-	sub tmp=8,val1		// which byte in word
-	mov pr=saved_pr,0xffffffffffff0000
-	;;
-	sub ret0=ret0,tmp	// adjust
-	mov ar.pfs=saved_pfs	// because of ar.ec, restore no matter what
-	br.ret.sptk.many rp	// end of normal execution
-
-	//
-	// Outlined recovery code when speculation failed
-	//
-	// This time we don't use speculation and rely on the normal exception
-	// mechanism. that's why the loop is not as good as the previous one
-	// because read ahead is not possible
-	//
-	// IMPORTANT:
-	// Please note that in the case of strlen() as opposed to strlen_user()
-	// we don't use the exception mechanism, as this function is not
-	// supposed to fail. If that happens it means we have a bug and the
-	// code will cause of kernel fault.
-	//
-	// XXX Fixme
-	//	- today we restart from the beginning of the string instead
-	//	  of trying to continue where we left off.
-	//
-.recover:
-	ld8 val=[base],8	// will fail if unrecoverable fault
-	;;
-	or val=val,mask		// remask first bytes
-	cmp.eq p0,p6=r0,r0	// nullify first ld8 in loop
-	;;
-	//
-	// ar.ec is still zero here
-	//
-2:
-(p6)	ld8 val=[base],8	// will fail if unrecoverable fault
-	;;
-	czx1.r val1=val		// search 0 byte from right
-	;;
-	cmp.eq p6,p0=8,val1	// val1==8 ?
-(p6)	br.wtop.dptk 2b		// loop until p6 == 0
-	;;			// (avoid WAW on p63)
-	sub ret0=base,orig	// distance from base
-	sub tmp=8,val1
-	mov pr=saved_pr,0xffffffffffff0000
-	;;
-	sub ret0=ret0,tmp	// length=now - back -1
-	mov ar.pfs=saved_pfs	// because of ar.ec, restore no matter what
-	br.ret.sptk.many rp	// end of successful recovery code
-END(strlen)
-EXPORT_SYMBOL(strlen)
diff --git a/arch/ia64/lib/strncpy_from_user.S b/arch/ia64/lib/strncpy_from_user.S
deleted file mode 100644
index 49eb81b69cd224f77addd7853d87079eac6c7994..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/strncpy_from_user.S
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Just like strncpy() except that if a fault occurs during copying,
- * -EFAULT is returned.
- *
- * Inputs:
- *	in0:	address of destination buffer
- *	in1:	address of string to be copied
- *	in2:	length of buffer in bytes
- * Outputs:
- *	r8:	-EFAULT in case of fault or number of bytes copied if no fault
- *
- * Copyright (C) 1998-2001 Hewlett-Packard Co
- * Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 00/03/06 D. Mosberger Fixed to return proper return value (bug found by
- *			 by Andreas Schwab <schwab@suse.de>).
- */
-
-#include <asm/asmmacro.h>
-#include <asm/export.h>
-
-GLOBAL_ENTRY(__strncpy_from_user)
-	alloc r2=ar.pfs,3,0,0,0
-	mov r8=0
-	mov r9=in1
-	;;
-	add r10=in1,in2
-	cmp.eq p6,p0=r0,in2
-(p6)	br.ret.spnt.many rp
-
-	// XXX braindead copy loop---this needs to be optimized
-.Loop1:
-	EX(.Lexit, ld1 r8=[in1],1)
-	;;
-	EX(.Lexit, st1 [in0]=r8,1)
-	cmp.ne p6,p7=r8,r0
-	;;
-(p6)	cmp.ne.unc p8,p0=in1,r10
-(p8)	br.cond.dpnt.few .Loop1
-	;;
-(p6)	mov r8=in2		// buffer filled up---return buffer length
-(p7)	sub r8=in1,r9,1		// return string length (excluding NUL character)
-[.Lexit:]
-	br.ret.sptk.many rp
-END(__strncpy_from_user)
-EXPORT_SYMBOL(__strncpy_from_user)
diff --git a/arch/ia64/lib/strnlen_user.S b/arch/ia64/lib/strnlen_user.S
deleted file mode 100644
index 4b684d4da10644db089c5b84e4a62e835883c06a..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/strnlen_user.S
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Returns 0 if exception before NUL or reaching the supplied limit (N),
- * a value greater than N if the string is longer than the limit, else
- * strlen.
- *
- * Inputs:
- *	in0:	address of buffer
- *	in1:	string length limit N
- * Outputs:
- *	r8:	0 in case of fault, strlen(buffer)+1 otherwise
- *
- * Copyright (C) 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com>
- */
-
-#include <asm/asmmacro.h>
-#include <asm/export.h>
-
-GLOBAL_ENTRY(__strnlen_user)
-	.prologue
-	alloc r2=ar.pfs,2,0,0,0
-	.save ar.lc, r16
-	mov r16=ar.lc			// preserve ar.lc
-
-	.body
-
-	add r3=-1,in1
-	;;
-	mov ar.lc=r3
-	mov r9=0
-	;;
-	// XXX braindead strlen loop---this needs to be optimized
-.Loop1:
-	EXCLR(.Lexit, ld1 r8=[in0],1)
-	add r9=1,r9
-	;;
-	cmp.eq p6,p0=r8,r0
-(p6)	br.cond.dpnt .Lexit
-	br.cloop.dptk.few .Loop1
-
-	add r9=1,in1			// NUL not found---return N+1
-	;;
-.Lexit:
-	mov r8=r9
-	mov ar.lc=r16			// restore ar.lc
-	br.ret.sptk.many rp
-END(__strnlen_user)
-EXPORT_SYMBOL(__strnlen_user)
diff --git a/arch/ia64/lib/xor.S b/arch/ia64/lib/xor.S
deleted file mode 100644
index 5413dafe6b2e0103e418c7fbd8dd7b08c51e9e72..0000000000000000000000000000000000000000
--- a/arch/ia64/lib/xor.S
+++ /dev/null
@@ -1,181 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * arch/ia64/lib/xor.S
- *
- * Optimized RAID-5 checksumming functions for IA-64.
- */
-
-#include <asm/asmmacro.h>
-#include <asm/export.h>
-
-GLOBAL_ENTRY(xor_ia64_2)
-	.prologue
-	.fframe 0
-	.save ar.pfs, r31
-	alloc r31 = ar.pfs, 3, 0, 13, 16
-	.save ar.lc, r30
-	mov r30 = ar.lc
-	.save pr, r29
-	mov r29 = pr
-	;;
-	.body
-	mov r8 = in1
-	mov ar.ec = 6 + 2
-	shr in0 = in0, 3
-	;;
-	adds in0 = -1, in0
-	mov r16 = in1
-	mov r17 = in2
-	;;
-	mov ar.lc = in0
-	mov pr.rot = 1 << 16
-	;;
-	.rotr s1[6+1], s2[6+1], d[2]
-	.rotp p[6+2]
-0:
-(p[0])	ld8.nta s1[0] = [r16], 8
-(p[0])	ld8.nta s2[0] = [r17], 8
-(p[6])	xor d[0] = s1[6], s2[6]
-(p[6+1])st8.nta [r8] = d[1], 8
-	nop.f 0
-	br.ctop.dptk.few 0b
-	;;
-	mov ar.lc = r30
-	mov pr = r29, -1
-	br.ret.sptk.few rp
-END(xor_ia64_2)
-EXPORT_SYMBOL(xor_ia64_2)
-
-GLOBAL_ENTRY(xor_ia64_3)
-	.prologue
-	.fframe 0
-	.save ar.pfs, r31
-	alloc r31 = ar.pfs, 4, 0, 20, 24
-	.save ar.lc, r30
-	mov r30 = ar.lc
-	.save pr, r29
-	mov r29 = pr
-	;;
-	.body
-	mov r8 = in1
-	mov ar.ec = 6 + 2
-	shr in0 = in0, 3
-	;;
-	adds in0 = -1, in0
-	mov r16 = in1
-	mov r17 = in2
-	;;
-	mov r18 = in3
-	mov ar.lc = in0
-	mov pr.rot = 1 << 16
-	;;
-	.rotr s1[6+1], s2[6+1], s3[6+1], d[2]
-	.rotp p[6+2]
-0:
-(p[0])	ld8.nta s1[0] = [r16], 8
-(p[0])	ld8.nta s2[0] = [r17], 8
-(p[6])	xor d[0] = s1[6], s2[6]
-	;;
-(p[0])	ld8.nta s3[0] = [r18], 8
-(p[6+1])st8.nta [r8] = d[1], 8
-(p[6])	xor d[0] = d[0], s3[6]
-	br.ctop.dptk.few 0b
-	;;
-	mov ar.lc = r30
-	mov pr = r29, -1
-	br.ret.sptk.few rp
-END(xor_ia64_3)
-EXPORT_SYMBOL(xor_ia64_3)
-
-GLOBAL_ENTRY(xor_ia64_4)
-	.prologue
-	.fframe 0
-	.save ar.pfs, r31
-	alloc r31 = ar.pfs, 5, 0, 27, 32
-	.save ar.lc, r30
-	mov r30 = ar.lc
-	.save pr, r29
-	mov r29 = pr
-	;;
-	.body
-	mov r8 = in1
-	mov ar.ec = 6 + 2
-	shr in0 = in0, 3
-	;;
-	adds in0 = -1, in0
-	mov r16 = in1
-	mov r17 = in2
-	;;
-	mov r18 = in3
-	mov ar.lc = in0
-	mov pr.rot = 1 << 16
-	mov r19 = in4
-	;;
-	.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
-	.rotp p[6+2]
-0:
-(p[0])	ld8.nta s1[0] = [r16], 8
-(p[0])	ld8.nta s2[0] = [r17], 8
-(p[6])	xor d[0] = s1[6], s2[6]
-(p[0])	ld8.nta s3[0] = [r18], 8
-(p[0])	ld8.nta s4[0] = [r19], 8
-(p[6])	xor r20 = s3[6], s4[6]
-	;;
-(p[6+1])st8.nta [r8] = d[1], 8
-(p[6])	xor d[0] = d[0], r20
-	br.ctop.dptk.few 0b
-	;;
-	mov ar.lc = r30
-	mov pr = r29, -1
-	br.ret.sptk.few rp
-END(xor_ia64_4)
-EXPORT_SYMBOL(xor_ia64_4)
-
-GLOBAL_ENTRY(xor_ia64_5)
-	.prologue
-	.fframe 0
-	.save ar.pfs, r31
-	alloc r31 = ar.pfs, 6, 0, 34, 40
-	.save ar.lc, r30
-	mov r30 = ar.lc
-	.save pr, r29
-	mov r29 = pr
-	;;
-	.body
-	mov r8 = in1
-	mov ar.ec = 6 + 2
-	shr in0 = in0, 3
-	;;
-	adds in0 = -1, in0
-	mov r16 = in1
-	mov r17 = in2
-	;;
-	mov r18 = in3
-	mov ar.lc = in0
-	mov pr.rot = 1 << 16
-	mov r19 = in4
-	mov r20 = in5
-	;;
-	.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
-	.rotp p[6+2]
-0:
-(p[0])	ld8.nta s1[0] = [r16], 8
-(p[0])	ld8.nta s2[0] = [r17], 8
-(p[6])	xor d[0] = s1[6], s2[6]
-(p[0])	ld8.nta s3[0] = [r18], 8
-(p[0])	ld8.nta s4[0] = [r19], 8
-(p[6])	xor r21 = s3[6], s4[6]
-	;;
-(p[0])	ld8.nta s5[0] = [r20], 8
-(p[6+1])st8.nta [r8] = d[1], 8
-(p[6])	xor d[0] = d[0], r21
-	;;
-(p[6])	  xor d[0] = d[0], s5[6]
-	nop.f 0
-	br.ctop.dptk.few 0b
-	;;
-	mov ar.lc = r30
-	mov pr = r29, -1
-	br.ret.sptk.few rp
-END(xor_ia64_5)
-EXPORT_SYMBOL(xor_ia64_5)
diff --git a/arch/ia64/scripts/check-gas b/arch/ia64/scripts/check-gas
old mode 100755
new mode 100644
diff --git a/arch/ia64/scripts/check-gas-asm.S b/arch/ia64/scripts/check-gas-asm.S
deleted file mode 100644
index 010e1d227e5dbeb1fed42fbb67e2d824455f1b6c..0000000000000000000000000000000000000000
--- a/arch/ia64/scripts/check-gas-asm.S
+++ /dev/null
@@ -1,2 +0,0 @@
-[1:]	nop 0
-	.xdata4 ".data", 0, 1b-.
diff --git a/arch/ia64/scripts/check-segrel.S b/arch/ia64/scripts/check-segrel.S
deleted file mode 100644
index 65d6378adaaaa5fbc791fe32608c3a285d432175..0000000000000000000000000000000000000000
--- a/arch/ia64/scripts/check-segrel.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.rodata
-	data4 @segrel(start)
-	.data
-start:
diff --git a/arch/ia64/scripts/check-serialize.S b/arch/ia64/scripts/check-serialize.S
deleted file mode 100644
index 0400c106806cd58d0f47ce02e49c88f9e0c977e4..0000000000000000000000000000000000000000
--- a/arch/ia64/scripts/check-serialize.S
+++ /dev/null
@@ -1,2 +0,0 @@
-	.serialize.data
-	.serialize.instruction
diff --git a/arch/ia64/scripts/check-text-align.S b/arch/ia64/scripts/check-text-align.S
deleted file mode 100644
index 107fa1c88c2e115f5bb497900f2619749e28b520..0000000000000000000000000000000000000000
--- a/arch/ia64/scripts/check-text-align.S
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.proc foo
-	.prologue
-foo:	.save rp, r2
-	nop 0
-	.align 64
-	.endp foo
diff --git a/arch/ia64/scripts/toolchain-flags b/arch/ia64/scripts/toolchain-flags
old mode 100755
new mode 100644
diff --git a/arch/m68k/68000/entry.S b/arch/m68k/68000/entry.S
deleted file mode 100644
index 259b3661b614168ff8ab377587c66b1478222218..0000000000000000000000000000000000000000
--- a/arch/m68k/68000/entry.S
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- *  entry.S -- non-mmu 68000 interrupt and exception entry points
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file README.legal in the main directory of this archive
- * for more details.
- *
- * Linux/m68k support by Hamish Macdonald
- */
-
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-#include <asm/errno.h>
-#include <asm/setup.h>
-#include <asm/segment.h>
-#include <asm/traps.h>
-#include <asm/asm-offsets.h>
-#include <asm/entry.h>
-
-.text
-
-.globl system_call
-.globl resume
-.globl ret_from_exception
-.globl ret_from_signal
-.globl sys_call_table
-.globl bad_interrupt
-.globl inthandler1
-.globl inthandler2
-.globl inthandler3
-.globl inthandler4
-.globl inthandler5
-.globl inthandler6
-.globl inthandler7
-
-badsys:
-	movel	#-ENOSYS,%sp@(PT_OFF_D0)
-	jra	ret_from_exception
-
-do_trace:
-	movel	#-ENOSYS,%sp@(PT_OFF_D0) /* needed for strace*/
-	subql	#4,%sp
-	SAVE_SWITCH_STACK
-	jbsr	syscall_trace_enter
-	RESTORE_SWITCH_STACK
-	addql	#4,%sp
-	movel	%sp@(PT_OFF_ORIG_D0),%d1
-	movel	#-ENOSYS,%d0
-	cmpl	#NR_syscalls,%d1
-	jcc	1f
-	lsl	#2,%d1
-	lea	sys_call_table, %a0
-	jbsr	%a0@(%d1)
-
-1:	movel	%d0,%sp@(PT_OFF_D0)	/* save the return value */
-	subql	#4,%sp			/* dummy return address */
-	SAVE_SWITCH_STACK
-	jbsr	syscall_trace_leave
-
-ret_from_signal:
-	RESTORE_SWITCH_STACK
-	addql	#4,%sp
-	jra	ret_from_exception
-
-ENTRY(system_call)
-	SAVE_ALL_SYS
-
-	/* save top of frame*/
-	pea	%sp@
-	jbsr	set_esp0
-	addql	#4,%sp
-
-	movel	%sp@(PT_OFF_ORIG_D0),%d0
-
-	movel	%sp,%d1			/* get thread_info pointer */
-	andl	#-THREAD_SIZE,%d1
-	movel	%d1,%a2
-	btst	#(TIF_SYSCALL_TRACE%8),%a2@(TINFO_FLAGS+(31-TIF_SYSCALL_TRACE)/8)
-	jne	do_trace
-	cmpl	#NR_syscalls,%d0
-	jcc	badsys
-	lsl	#2,%d0
-	lea	sys_call_table,%a0
-	movel	%a0@(%d0), %a0
-	jbsr	%a0@
-	movel	%d0,%sp@(PT_OFF_D0)	/* save the return value*/
-
-ret_from_exception:
-	btst	#5,%sp@(PT_OFF_SR)	/* check if returning to kernel*/
-	jeq	Luser_return		/* if so, skip resched, signals*/
-
-Lkernel_return:
-	RESTORE_ALL
-
-Luser_return:
-	/* only allow interrupts when we are really the last one on the*/
-	/* kernel stack, otherwise stack overflow can occur during*/
-	/* heavy interrupt load*/
-	andw	#ALLOWINT,%sr
-
-	movel	%sp,%d1			/* get thread_info pointer */
-	andl	#-THREAD_SIZE,%d1
-	movel	%d1,%a2
-1:
-	move	%a2@(TINFO_FLAGS),%d1	/* thread_info->flags */
-	jne	Lwork_to_do
-	RESTORE_ALL
-
-Lwork_to_do:
-	movel	%a2@(TINFO_FLAGS),%d1	/* thread_info->flags */
-	btst	#TIF_NEED_RESCHED,%d1
-	jne	reschedule
-
-Lsignal_return:
-	subql	#4,%sp			/* dummy return address*/
-	SAVE_SWITCH_STACK
-	pea	%sp@(SWITCH_STACK_SIZE)
-	bsrw	do_notify_resume
-	addql	#4,%sp
-	RESTORE_SWITCH_STACK
-	addql	#4,%sp
-	jra	1b
-
-/*
- * This is the main interrupt handler, responsible for calling process_int()
- */
-inthandler1:
-	SAVE_ALL_INT
-	movew	%sp@(PT_OFF_FORMATVEC), %d0
-	and	#0x3ff, %d0
-
-	movel	%sp,%sp@-
-	movel	#65,%sp@- 		/*  put vector # on stack*/
-	jbsr	process_int		/*  process the IRQ*/
-3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_exception
-
-inthandler2:
-	SAVE_ALL_INT
-	movew	%sp@(PT_OFF_FORMATVEC), %d0
-	and	#0x3ff, %d0
-
-	movel	%sp,%sp@-
-	movel	#66,%sp@- 		/*  put vector # on stack*/
-	jbsr	process_int		/*  process the IRQ*/
-3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_exception
-
-inthandler3:
-	SAVE_ALL_INT
-	movew	%sp@(PT_OFF_FORMATVEC), %d0
-	and	#0x3ff, %d0
-
-	movel	%sp,%sp@-
-	movel	#67,%sp@- 		/*  put vector # on stack*/
-	jbsr	process_int		/*  process the IRQ*/
-3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_exception
-
-inthandler4:
-	SAVE_ALL_INT
-	movew	%sp@(PT_OFF_FORMATVEC), %d0
-	and	#0x3ff, %d0
-
-	movel	%sp,%sp@-
-	movel	#68,%sp@- 		/*  put vector # on stack*/
-	jbsr	process_int		/*  process the IRQ*/
-3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_exception
-
-inthandler5:
-	SAVE_ALL_INT
-	movew	%sp@(PT_OFF_FORMATVEC), %d0
-	and	#0x3ff, %d0
-
-	movel	%sp,%sp@-
-	movel	#69,%sp@- 		/*  put vector # on stack*/
-	jbsr	process_int		/*  process the IRQ*/
-3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_exception
-
-inthandler6:
-	SAVE_ALL_INT
-	movew	%sp@(PT_OFF_FORMATVEC), %d0
-	and	#0x3ff, %d0
-
-	movel	%sp,%sp@-
-	movel	#70,%sp@- 		/*  put vector # on stack*/
-	jbsr	process_int		/*  process the IRQ*/
-3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_exception
-
-inthandler7:
-	SAVE_ALL_INT
-	movew	%sp@(PT_OFF_FORMATVEC), %d0
-	and	#0x3ff, %d0
-
-	movel	%sp,%sp@-
-	movel	#71,%sp@- 		/*  put vector # on stack*/
-	jbsr	process_int		/*  process the IRQ*/
-3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_exception
-
-inthandler:
-	SAVE_ALL_INT
-	movew	%sp@(PT_OFF_FORMATVEC), %d0
-	and	#0x3ff, %d0
-
-	movel	%sp,%sp@-
-	movel	%d0,%sp@- 		/*  put vector # on stack*/
-	jbsr	process_int		/*  process the IRQ*/
-3:     	addql	#8,%sp			/*  pop parameters off stack*/
-	bra	ret_from_exception
-
-/*
- * Handler for uninitialized and spurious interrupts.
- */
-ENTRY(bad_interrupt)
-	addql	#1,irq_err_count
-	rte
-
-/*
- * Beware - when entering resume, prev (the current task) is
- * in a0, next (the new task) is in a1, so don't change these
- * registers until their contents are no longer needed.
- */
-ENTRY(resume)
-	movel	%a0,%d1				/* save prev thread in d1 */
-	movew	%sr,%a0@(TASK_THREAD+THREAD_SR)	/* save sr */
-	SAVE_SWITCH_STACK
-	movel	%sp,%a0@(TASK_THREAD+THREAD_KSP) /* save kernel stack */
-	movel	%usp,%a3			/* save usp */
-	movel	%a3,%a0@(TASK_THREAD+THREAD_USP)
-
-	movel	%a1@(TASK_THREAD+THREAD_USP),%a3 /* restore user stack */
-	movel	%a3,%usp
-	movel	%a1@(TASK_THREAD+THREAD_KSP),%sp /* restore new thread stack */
-	RESTORE_SWITCH_STACK
-	movew	%a1@(TASK_THREAD+THREAD_SR),%sr	/* restore thread status reg */
-	rts
-
diff --git a/arch/m68k/68000/head.S b/arch/m68k/68000/head.S
deleted file mode 100644
index 140220662e33e9932e6d8442f0346ec74b48138c..0000000000000000000000000000000000000000
--- a/arch/m68k/68000/head.S
+++ /dev/null
@@ -1,241 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * head.S - Common startup code for 68000 core based CPU's
- *
- * 2012.10.21, Luis Alves <ljalvs@gmail.com>, Single head.S file for all
- *             68000 core based CPU's. Based on the sources from:
- *             Coldfire by Greg Ungerer <gerg@snapgear.com>
- *             68328 by D. Jeff Dionne <jeff@ryeham.ee.ryerson.ca>,
- *                      Kenneth Albanowski <kjahds@kjahds.com>,
- *                      The Silver Hammer Group, Ltd.
- *
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-
-
-/*****************************************************************************
- * UCSIMM and UCDIMM use CONFIG_MEMORY_RESERVE to reserve some RAM
- *****************************************************************************/
-#ifdef CONFIG_MEMORY_RESERVE
-#define RAMEND (CONFIG_RAMBASE+CONFIG_RAMSIZE)-(CONFIG_MEMORY_RESERVE*0x100000)
-#else
-#define RAMEND (CONFIG_RAMBASE+CONFIG_RAMSIZE)
-#endif
-/*****************************************************************************/
-
-.global _start
-.global _rambase
-.global _ramvec
-.global _ramstart
-.global _ramend
-
-#if defined(CONFIG_PILOT) || defined(CONFIG_INIT_LCD)
-.global bootlogo_bits
-#endif
-
-/* Defining DEBUG_HEAD_CODE, serial port in 68x328 is inited */
-/* #define DEBUG_HEAD_CODE */
-#undef DEBUG_HEAD_CODE
-
-.data
-
-/*****************************************************************************
- * RAM setup pointers. Used by the kernel to determine RAM location and size.
- *****************************************************************************/
-
-_rambase:
-	.long	0
-_ramvec:
-	.long	0
-_ramstart:
-	.long	0
-_ramend:
-	.long	0
-
-__HEAD
-
-/*****************************************************************************
- * Entry point, where all begins!
- *****************************************************************************/
-
-_start:
-
-/* Pilot need this specific signature at the start of ROM */
-#ifdef CONFIG_PILOT
-	.byte	0x4e, 0xfa, 0x00, 0x0a		/* bra opcode (jmp 10 bytes) */
-	.byte	'b', 'o', 'o', 't'
-	.word	10000
-	nop
-	moveq	#0, %d0
-	movew	%d0, 0xfffff618			/* Watchdog off */
-	movel	#0x00011f07, 0xfffff114		/* CS A1 Mask */
-#endif /* CONFIG_PILOT */
-
-	movew	#0x2700, %sr			/* disable all interrupts */
-
-/*****************************************************************************
- * Setup PLL and wait for it to settle (in 68x328 cpu's).
- * Also, if enabled, init serial port.
- *****************************************************************************/
-#if defined(CONFIG_M68328) || \
-    defined(CONFIG_M68EZ328) || \
-    defined(CONFIG_M68VZ328)
-
-/* Serial port setup. Should only be needed if debugging this startup code. */
-#ifdef DEBUG_HEAD_CODE
-	movew	#0x0800, 0xfffff906		/* Ignore CTS */
-	movew	#0x010b, 0xfffff902		/* BAUD to 9600 */
-	movew	#0xe100, 0xfffff900		/* enable */
-#endif /* DEBUG_HEAD */
-
-#ifdef CONFIG_PILOT
-	movew	#0x2410, 0xfffff200		/* PLLCR */
-#else
-	movew	#0x2400, 0xfffff200		/* PLLCR */
-#endif
-	movew	#0x0123, 0xfffff202		/* PLLFSR */
-	moveq	#0, %d0
-	movew	#16384, %d0			/* PLL settle wait loop */
-_pll_settle:
-	subw	#1, %d0
-	bne	_pll_settle
-#endif /* CONFIG_M68x328 */
-
-
-/*****************************************************************************
- * If running kernel from ROM some specific initialization has to be done.
- * (Assuming that everything is already init'ed when running from RAM)
- *****************************************************************************/
-#ifdef CONFIG_ROMKERNEL
-
-/*****************************************************************************
- * Init chip registers (uCsimm specific)
- *****************************************************************************/
-#ifdef CONFIG_UCSIMM
-	moveb	#0x00, 0xfffffb0b	/* Watchdog off */
-	moveb	#0x10, 0xfffff000	/* SCR */
-	moveb	#0x00, 0xfffff40b	/* enable chip select */
-	moveb	#0x00, 0xfffff423	/* enable /DWE */
-	moveb	#0x08, 0xfffffd0d	/* disable hardmap */
-	moveb	#0x07, 0xfffffd0e	/* level 7 interrupt clear */
-	movew	#0x8600, 0xfffff100	/* FLASH at 0x10c00000 */
-	movew	#0x018b, 0xfffff110	/* 2Meg, enable, 0ws */
-	movew	#0x8f00, 0xfffffc00	/* DRAM configuration */
-	movew	#0x9667, 0xfffffc02	/* DRAM control */
-	movew	#0x0000, 0xfffff106	/* DRAM at 0x00000000 */
-	movew	#0x068f, 0xfffff116	/* 8Meg, enable, 0ws */
-	moveb	#0x40, 0xfffff300	/* IVR */
-	movel	#0x007FFFFF, %d0	/* IMR */
-	movel	%d0, 0xfffff304
-	moveb	0xfffff42b, %d0
-	andb	#0xe0, %d0
-	moveb	%d0, 0xfffff42b
-#endif
-
-/*****************************************************************************
- * Init LCD controller.
- * (Assuming that LCD controller is already init'ed when running from RAM)
- *****************************************************************************/
-#ifdef CONFIG_INIT_LCD
-#ifdef CONFIG_PILOT
-	moveb	#0, 0xfffffA27			/* LCKCON */
-	movel	#_start, 0xfffffA00		/* LSSA */
-	moveb	#0xa, 0xfffffA05		/* LVPW */
-	movew	#0x9f, 0xFFFFFa08		/* LXMAX */
-	movew	#0x9f, 0xFFFFFa0a		/* LYMAX */
-	moveb	#9, 0xfffffa29			/* LBAR */
-	moveb	#0, 0xfffffa25			/* LPXCD */
-	moveb	#0x04, 0xFFFFFa20		/* LPICF */
-	moveb	#0x58, 0xfffffA27		/* LCKCON */
-	moveb	#0x85, 0xfffff429		/* PFDATA */
-	moveb	#0xd8, 0xfffffA27		/* LCKCON */
-	moveb	#0xc5, 0xfffff429		/* PFDATA */
-	moveb	#0xd5, 0xfffff429		/* PFDATA */
-	movel	#bootlogo_bits, 0xFFFFFA00	/* LSSA */
-	moveb	#10, 0xFFFFFA05			/* LVPW */
-	movew	#160, 0xFFFFFA08		/* LXMAX */
-	movew	#160, 0xFFFFFA0A		/* LYMAX */
-#else /* CONFIG_PILOT */
-	movel	#bootlogo_bits, 0xfffffA00	/* LSSA */
-	moveb	#0x28, 0xfffffA05		/* LVPW */
-	movew	#0x280, 0xFFFFFa08		/* LXMAX */
-	movew	#0x1df, 0xFFFFFa0a		/* LYMAX */
-	moveb	#0, 0xfffffa29			/* LBAR */
-	moveb	#0, 0xfffffa25			/* LPXCD */
-	moveb	#0x08, 0xFFFFFa20		/* LPICF */
-	moveb	#0x01, 0xFFFFFA21		/* -ve pol */
-	moveb	#0x81, 0xfffffA27		/* LCKCON */
-	movew	#0xff00, 0xfffff412		/* LCD pins */
-#endif /* CONFIG_PILOT */
-#endif /* CONFIG_INIT_LCD */
-
-/*****************************************************************************
- * Kernel is running from FLASH/ROM (XIP)
- * Copy init text & data to RAM
- *****************************************************************************/
-	moveal	#_etext, %a0
-	moveal	#_sdata, %a1
-	moveal	#__bss_start, %a2
-_copy_initmem:
-	movel	%a0@+, %a1@+
-	cmpal	%a1, %a2
-	bhi	_copy_initmem
-#endif /* CONFIG_ROMKERNEL */
-
-/*****************************************************************************
- * Setup basic memory information for kernel
- *****************************************************************************/
-	movel	#CONFIG_VECTORBASE,_ramvec	/* set vector base location */
-	movel	#CONFIG_RAMBASE,_rambase	/* set the base of RAM */
-	movel	#RAMEND, _ramend		/* set end ram addr */
-	lea	__bss_stop,%a1
-	movel	%a1,_ramstart
-
-/*****************************************************************************
- * If the kernel is in RAM, move romfs to right above bss and
- * adjust _ramstart to where romfs ends.
- *
- * (Do this only if CONFIG_MTD_UCLINUX is true)
- *****************************************************************************/
-
-#if defined(CONFIG_ROMFS_FS) && defined(CONFIG_RAMKERNEL) && \
-    defined(CONFIG_MTD_UCLINUX)
-	lea	__bss_start, %a0		/* get start of bss */
-	lea	__bss_stop, %a1			/* set up destination  */
-	movel	%a0, %a2			/* copy of bss start */
-
-	movel	8(%a0), %d0			/* get size of ROMFS */
-	addql	#8, %d0				/* allow for rounding */
-	andl	#0xfffffffc, %d0		/* whole words */
-
-	addl	%d0, %a0			/* copy from end */
-	addl	%d0, %a1			/* copy from end */
-	movel	%a1, _ramstart			/* set start of ram */
-_copy_romfs:
-	movel	-(%a0), -(%a1)			/* copy dword */
-	cmpl	%a0, %a2			/* check if at end */
-	bne	_copy_romfs
-#endif /* CONFIG_ROMFS_FS && CONFIG_RAMKERNEL && CONFIG_MTD_UCLINUX */
-
-/*****************************************************************************
- * Clear bss region
- *****************************************************************************/
-	lea	__bss_start, %a0		/* get start of bss */
-	lea	__bss_stop, %a1			/* get end of bss */
-_clear_bss:
-	movel	#0, (%a0)+			/* clear each word */
-	cmpl	%a0, %a1			/* check if at end */
-	bne	_clear_bss
-
-/*****************************************************************************
- * Load the current task pointer and stack.
- *****************************************************************************/
-	lea	init_thread_union,%a0
-	lea	THREAD_SIZE(%a0),%sp
-	jsr	start_kernel			/* start Linux kernel */
-_exit:
-	jmp	_exit				/* should never get here */
diff --git a/arch/m68k/68000/romvec.S b/arch/m68k/68000/romvec.S
deleted file mode 100644
index 15c70cd6453fa2a995334e4b45d21fe62c314b66..0000000000000000000000000000000000000000
--- a/arch/m68k/68000/romvec.S
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * romvec.S - Vector table for 68000 cpus
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of this archive
- * for more details.
- *
- * Copyright 1996 Roman Zippel
- * Copyright 1999 D. Jeff Dionne <jeff@rt-control.com>
- * Copyright 2006 Greg Ungerer <gerg@snapgear.com>
- */
-
-.global _start
-.global _buserr
-.global trap
-.global system_call
-
-.section .romvec
-
-e_vectors:
-.long CONFIG_RAMBASE+CONFIG_RAMSIZE-4, _start, buserr, trap
-.long trap, trap, trap, trap
-.long trap, trap, trap, trap
-.long trap, trap, trap, trap
-.long trap, trap, trap, trap
-.long trap, trap, trap, trap
-.long trap, trap, trap, trap
-.long trap, trap, trap, trap
-/* TRAP #0-15 */
-.long system_call, trap, trap, trap
-.long trap, trap, trap, trap
-.long trap, trap, trap, trap
-.long trap, trap, trap, trap
-.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-
diff --git a/arch/m68k/coldfire/entry.S b/arch/m68k/coldfire/entry.S
deleted file mode 100644
index 52d312d5b4d4f68337aa8f9b711f50b1a1ad9eda..0000000000000000000000000000000000000000
--- a/arch/m68k/coldfire/entry.S
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- *  entry.S  -- interrupt and exception processing for ColdFire
- *
- *  Copyright (C) 1999-2007, Greg Ungerer (gerg@snapgear.com)
- *  Copyright (C) 1998  D. Jeff Dionne <jeff@lineo.ca>,
- *                      Kenneth Albanowski <kjahds@kjahds.com>,
- *  Copyright (C) 2000  Lineo Inc. (www.lineo.com)
- *  Copyright (C) 2004-2006  Macq Electronique SA. (www.macqel.com)
- *
- * Based on:
- *
- *  linux/arch/m68k/kernel/entry.S
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file README.legal in the main directory of this archive
- * for more details.
- *
- * Linux/m68k support by Hamish Macdonald
- *
- * 68060 fixes by Jesper Skov
- * ColdFire support by Greg Ungerer (gerg@snapgear.com)
- * 5307 fixes by David W. Miller
- * linux 2.4 support David McCullough <davidm@snapgear.com>
- * Bug, speed and maintainability fixes by Philippe De Muyter <phdm@macqel.be>
- */
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-#include <asm/thread_info.h>
-#include <asm/errno.h>
-#include <asm/setup.h>
-#include <asm/segment.h>
-#include <asm/asm-offsets.h>
-#include <asm/entry.h>
-
-#ifdef CONFIG_COLDFIRE_SW_A7
-/*
- *	Define software copies of the supervisor and user stack pointers.
- */
-.bss
-sw_ksp:
-.long	0
-sw_usp:
-.long	0
-#endif /* CONFIG_COLDFIRE_SW_A7 */
-
-.text
-
-.globl system_call
-.globl resume
-.globl ret_from_exception
-.globl ret_from_signal
-.globl sys_call_table
-.globl inthandler
-
-enosys:
-	mov.l	#sys_ni_syscall,%d3
-	bra	1f
-
-ENTRY(system_call)
-	SAVE_ALL_SYS
-	move	#0x2000,%sr		/* enable intrs again */
-	GET_CURRENT(%d2)
-
-	cmpl	#NR_syscalls,%d0
-	jcc	enosys
-	lea	sys_call_table,%a0
-	lsll	#2,%d0			/* movel %a0@(%d0:l:4),%d3 */
-	movel	%a0@(%d0),%d3
-	jeq	enosys
-
-1:
-	movel	%sp,%d2			/* get thread_info pointer */
-	andl	#-THREAD_SIZE,%d2	/* at start of kernel stack */
-	movel	%d2,%a0
-	movel	%a0@,%a1		/* save top of frame */
-	movel	%sp,%a1@(TASK_THREAD+THREAD_ESP0)
-	btst	#(TIF_SYSCALL_TRACE%8),%a0@(TINFO_FLAGS+(31-TIF_SYSCALL_TRACE)/8)
-	bnes	1f
-
-	movel	%d3,%a0
-	jbsr	%a0@
-	movel	%d0,%sp@(PT_OFF_D0)	/* save the return value */
-	jra	ret_from_exception
-1:
-	movel	#-ENOSYS,%d2		/* strace needs -ENOSYS in PT_OFF_D0 */
-	movel	%d2,PT_OFF_D0(%sp)	/* on syscall entry */
-	subql	#4,%sp
-	SAVE_SWITCH_STACK
-	jbsr	syscall_trace_enter
-	RESTORE_SWITCH_STACK
-	addql	#4,%sp
-	movel	%d3,%a0
-	jbsr	%a0@
-	movel	%d0,%sp@(PT_OFF_D0)		/* save the return value */
-	subql	#4,%sp			/* dummy return address */
-	SAVE_SWITCH_STACK
-	jbsr	syscall_trace_leave
-
-ret_from_signal:
-	RESTORE_SWITCH_STACK
-	addql	#4,%sp
-
-ret_from_exception:
-	move	#0x2700,%sr		/* disable intrs */
-	btst	#5,%sp@(PT_OFF_SR)	/* check if returning to kernel */
-	jeq	Luser_return		/* if so, skip resched, signals */
-
-#ifdef CONFIG_PREEMPT
-	movel	%sp,%d1			/* get thread_info pointer */
-	andl	#-THREAD_SIZE,%d1	/* at base of kernel stack */
-	movel	%d1,%a0
-	movel	%a0@(TINFO_FLAGS),%d1	/* get thread_info->flags */
-	andl	#(1<<TIF_NEED_RESCHED),%d1
-	jeq	Lkernel_return
-
-	movel	%a0@(TINFO_PREEMPT),%d1
-	cmpl	#0,%d1
-	jne	Lkernel_return
-
-	pea	Lkernel_return
-	jmp	preempt_schedule_irq	/* preempt the kernel */
-#endif
-
-Lkernel_return:
-	moveml	%sp@,%d1-%d5/%a0-%a2
-	lea	%sp@(32),%sp		/* space for 8 regs */
-	movel	%sp@+,%d0
-	addql	#4,%sp			/* orig d0 */
-	addl	%sp@+,%sp		/* stk adj */
-	rte
-
-Luser_return:
-	movel	%sp,%d1			/* get thread_info pointer */
-	andl	#-THREAD_SIZE,%d1	/* at base of kernel stack */
-	movel	%d1,%a0
-	moveb	%a0@(TINFO_FLAGS+3),%d1	/* thread_info->flags (low 8 bits) */
-	jne	Lwork_to_do		/* still work to do */
-
-Lreturn:
-	RESTORE_USER
-
-Lwork_to_do:
-	movel	%a0@(TINFO_FLAGS),%d1	/* get thread_info->flags */
-	move	#0x2000,%sr		/* enable intrs again */
-	btst	#TIF_NEED_RESCHED,%d1
-	jne	reschedule
-
-Lsignal_return:
-	subql	#4,%sp			/* dummy return address */
-	SAVE_SWITCH_STACK
-	pea	%sp@(SWITCH_STACK_SIZE)
-	jsr	do_notify_resume
-	addql	#4,%sp
-	RESTORE_SWITCH_STACK
-	addql	#4,%sp
-	jmp	Luser_return
-
-/*
- * This is the generic interrupt handler (for all hardware interrupt
- * sources). Calls up to high level code to do all the work.
- */
-ENTRY(inthandler)
-	SAVE_ALL_INT
-	GET_CURRENT(%d2)
-
-	movew	%sp@(PT_OFF_FORMATVEC),%d0 /* put exception # in d0 */
-	andl	#0x03fc,%d0		/* mask out vector only */
-
-	movel	%sp,%sp@-		/* push regs arg */
-	lsrl	#2,%d0			/* calculate real vector # */
-	movel	%d0,%sp@-		/* push vector number */
-	jbsr	do_IRQ			/* call high level irq handler */
-	lea	%sp@(8),%sp		/* pop args off stack */
-
-	bra	ret_from_exception
-
-/*
- * Beware - when entering resume, prev (the current task) is
- * in a0, next (the new task) is in a1, so don't change these
- * registers until their contents are no longer needed.
- */
-ENTRY(resume)
-	movew	%sr,%d1				 /* save current status */
-	movew	%d1,%a0@(TASK_THREAD+THREAD_SR)
-	movel	%a0,%d1				 /* get prev thread in d1 */
-	SAVE_SWITCH_STACK
-	movel	%sp,%a0@(TASK_THREAD+THREAD_KSP) /* save kernel stack pointer */
-	RDUSP					 /* movel %usp,%a3 */
-	movel	%a3,%a0@(TASK_THREAD+THREAD_USP) /* save thread user stack */
-#ifdef CONFIG_MMU
-	movel	%a1,%a2				 /* set new current */
-#endif
-	movel	%a1@(TASK_THREAD+THREAD_USP),%a3 /* restore thread user stack */
-	WRUSP					 /* movel %a3,%usp */
-	movel	%a1@(TASK_THREAD+THREAD_KSP),%sp /* restore new kernel stack */
-	movew	%a1@(TASK_THREAD+THREAD_SR),%d7	 /* restore new status */
-	movew	%d7,%sr
-	RESTORE_SWITCH_STACK
-	rts
-
diff --git a/arch/m68k/coldfire/head.S b/arch/m68k/coldfire/head.S
deleted file mode 100644
index c6d7fd28c60237f1a05659f3527efe1d81ca2887..0000000000000000000000000000000000000000
--- a/arch/m68k/coldfire/head.S
+++ /dev/null
@@ -1,299 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*****************************************************************************/
-
-/*
- *	head.S -- common startup code for ColdFire CPUs.
- *
- *	(C) Copyright 1999-2011, Greg Ungerer <gerg@snapgear.com>.
- */
-
-/*****************************************************************************/
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/asm-offsets.h>
-#include <asm/coldfire.h>
-#include <asm/mcfsim.h>
-#include <asm/mcfmmu.h>
-#include <asm/thread_info.h>
-
-/*****************************************************************************/
-
-/*
- *	If we don't have a fixed memory size, then lets build in code
- *	to auto detect the DRAM size. Obviously this is the preferred
- *	method, and should work for most boards. It won't work for those
- *	that do not have their RAM starting at address 0, and it only
- *	works on SDRAM (not boards fitted with SRAM).
- */
-#if CONFIG_RAMSIZE != 0
-.macro GET_MEM_SIZE
-	movel	#CONFIG_RAMSIZE,%d0	/* hard coded memory size */
-.endm
-
-#elif defined(CONFIG_M5206) || defined(CONFIG_M5206e) || \
-      defined(CONFIG_M5249) || defined(CONFIG_M525x) || \
-      defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
-      defined(CONFIG_M5307) || defined(CONFIG_M5407)
-/*
- *	Not all these devices have exactly the same DRAM controller,
- *	but the DCMR register is virtually identical - give or take
- *	a couple of bits. The only exception is the 5272 devices, their
- *	DRAM controller is quite different.
- */
-.macro GET_MEM_SIZE
-	movel	MCFSIM_DMR0,%d0		/* get mask for 1st bank */
-	btst	#0,%d0			/* check if region enabled */
-	beq	1f
-	andl	#0xfffc0000,%d0
-	beq	1f
-	addl	#0x00040000,%d0		/* convert mask to size */
-1:
-	movel	MCFSIM_DMR1,%d1		/* get mask for 2nd bank */
-	btst	#0,%d1			/* check if region enabled */
-	beq	2f
-	andl	#0xfffc0000,%d1
-	beq	2f
-	addl	#0x00040000,%d1
-	addl	%d1,%d0			/* total mem size in d0 */
-2:
-.endm
-
-#elif defined(CONFIG_M5272)
-.macro GET_MEM_SIZE
-	movel	MCFSIM_CSOR7,%d0	/* get SDRAM address mask */
-	andil	#0xfffff000,%d0		/* mask out chip select options */
-	negl	%d0			/* negate bits */
-.endm
-
-#elif defined(CONFIG_M520x)
-.macro GET_MEM_SIZE
-	clrl	%d0
-	movel	MCFSIM_SDCS0, %d2	/* Get SDRAM chip select 0 config */
-	andl	#0x1f, %d2		/* Get only the chip select size */
-	beq	3f			/* Check if it is enabled */
-	addql	#1, %d2			/* Form exponent */
-	moveql	#1, %d0
-	lsll	%d2, %d0		/* 2 ^ exponent */
-3:
-	movel	MCFSIM_SDCS1, %d2	/* Get SDRAM chip select 1 config */
-	andl	#0x1f, %d2		/* Get only the chip select size */
-	beq	4f			/* Check if it is enabled */
-	addql	#1, %d2			/* Form exponent */
-	moveql	#1, %d1
-	lsll	%d2, %d1		/* 2 ^ exponent */
-	addl	%d1, %d0		/* Total size of SDRAM in d0 */
-4:
-.endm
-
-#else
-#error "ERROR: I don't know how to probe your boards memory size?"
-#endif
-
-/*****************************************************************************/
-
-/*
- *	Boards and platforms can do specific early hardware setup if
- *	they need to. Most don't need this, define away if not required.
- */
-#ifndef PLATFORM_SETUP
-#define	PLATFORM_SETUP
-#endif
-
-/*****************************************************************************/
-
-.global	_start
-.global _rambase
-.global _ramvec
-.global	_ramstart
-.global	_ramend
-#if defined(CONFIG_UBOOT)
-.global	_init_sp
-#endif
-
-/*****************************************************************************/
-
-.data
-
-/*
- *	During startup we store away the RAM setup. These are not in the
- *	bss, since their values are determined and written before the bss
- *	has been cleared.
- */
-_rambase:
-.long	0
-_ramvec:
-.long	0
-_ramstart:
-.long	0
-_ramend:
-.long	0
-#if defined(CONFIG_UBOOT)
-_init_sp:
-.long	0
-#endif
-
-/*****************************************************************************/
-
-__HEAD
-
-#ifdef CONFIG_MMU
-_start0:
-	jmp	_start
-.global kernel_pg_dir
-.equ	kernel_pg_dir,_start0
-.equ	.,_start0+0x1000
-#endif
-
-/*
- *	This is the codes first entry point. This is where it all
- *	begins...
- */
-
-_start:
-	nop					/* filler */
-	movew	#0x2700, %sr			/* no interrupts */
-	movel	#CACHE_INIT,%d0			/* disable cache */
-	movec	%d0,%CACR
-	nop
-#if defined(CONFIG_UBOOT)
-	movel	%sp,_init_sp			/* save initial stack pointer */
-#endif
-#ifdef CONFIG_MBAR
-	movel	#CONFIG_MBAR+1,%d0		/* configured MBAR address */
-	movec	%d0,%MBAR			/* set it */
-#endif
-
-	/*
-	 *	Do any platform or board specific setup now. Most boards
-	 *	don't need anything. Those exceptions are define this in
-	 *	their board specific includes.
-	 */
-	PLATFORM_SETUP
-
-	/*
-	 *	Create basic memory configuration. Set VBR accordingly,
-	 *	and size memory.
-	 */
-	movel	#CONFIG_VECTORBASE,%a7
-	movec   %a7,%VBR			/* set vectors addr */
-	movel	%a7,_ramvec
-
-	movel	#CONFIG_RAMBASE,%a7		/* mark the base of RAM */
-	movel	%a7,_rambase
-
-	GET_MEM_SIZE				/* macro code determines size */
-	addl	%a7,%d0
-	movel	%d0,_ramend			/* set end ram addr */
-
-	/*
-	 *	Now that we know what the memory is, lets enable cache
-	 *	and get things moving. This is Coldfire CPU specific. Not
-	 *	all version cores have identical cache register setup. But
-	 *	it is very similar. Define the exact settings in the headers
-	 *	then the code here is the same for all.
-	 */
-	movel	#ACR0_MODE,%d0			/* set RAM region for caching */
-	movec	%d0,%ACR0
-	movel	#ACR1_MODE,%d0			/* anything else to cache? */
-	movec	%d0,%ACR1
-#ifdef ACR2_MODE
-	movel	#ACR2_MODE,%d0
-	movec	%d0,%ACR2
-	movel	#ACR3_MODE,%d0
-	movec	%d0,%ACR3
-#endif
-	movel	#CACHE_MODE,%d0			/* enable cache */
-	movec	%d0,%CACR
-	nop
-
-#ifdef CONFIG_MMU
-	/*
-	 *	Identity mapping for the kernel region.
-	 */
-	movel	#(MMUBASE+1),%d0		/* enable MMUBAR registers */
-	movec	%d0,%MMUBAR
-	movel	#MMUOR_CA,%d0			/* clear TLB entries */
-	movel	%d0,MMUOR
-	movel	#0,%d0				/* set ASID to 0 */
-	movec	%d0,%asid
-
-	movel	#MMUCR_EN,%d0			/* Enable the identity map */
-	movel	%d0,MMUCR
-	nop					/* sync i-pipeline */
-
-	movel	#_vstart,%a0			/* jump to "virtual" space */
-	jmp	%a0@
-_vstart:
-#endif /* CONFIG_MMU */
-
-#ifdef CONFIG_ROMFS_FS
-	/*
-	 *	Move ROM filesystem above bss :-)
-	 */
-	lea	__bss_start,%a0			/* get start of bss */
-	lea	__bss_stop,%a1			/* set up destination  */
-	movel	%a0,%a2				/* copy of bss start */
-
-	movel	8(%a0),%d0			/* get size of ROMFS */
-	addql	#8,%d0				/* allow for rounding */
-	andl	#0xfffffffc, %d0		/* whole words */
-
-	addl	%d0,%a0				/* copy from end */
-	addl	%d0,%a1				/* copy from end */
-	movel	%a1,_ramstart			/* set start of ram */
-
-_copy_romfs:
-	movel	-(%a0),%d0			/* copy dword */
-	movel	%d0,-(%a1)
-	cmpl	%a0,%a2				/* check if at end */
-	bne	_copy_romfs
-
-#else /* CONFIG_ROMFS_FS */
-	lea	__bss_stop,%a1
-	movel	%a1,_ramstart
-#endif /* CONFIG_ROMFS_FS */
-
-
-	/*
-	 *	Zero out the bss region.
-	 */
-	lea	__bss_start,%a0			/* get start of bss */
-	lea	__bss_stop,%a1			/* get end of bss */
-	clrl	%d0				/* set value */
-_clear_bss:
-	movel	%d0,(%a0)+			/* clear each word */
-	cmpl	%a0,%a1				/* check if at end */
-	bne	_clear_bss
-
-	/*
-	 *	Load the current task pointer and stack.
-	 */
-	lea	init_thread_union,%a0
-	lea	THREAD_SIZE(%a0),%sp
-
-#ifdef CONFIG_MMU
-.global m68k_cputype
-.global m68k_mmutype
-.global m68k_fputype
-.global m68k_machtype
-	movel	#CPU_COLDFIRE,%d0
-	movel	%d0,m68k_cputype		/* Mark us as a ColdFire */
-	movel	#MMU_COLDFIRE,%d0
-	movel	%d0,m68k_mmutype
-	movel	#FPUTYPE,%d0
-	movel	%d0,m68k_fputype		/* Mark FPU type */
-	movel	#MACHINE,%d0
-	movel	%d0,m68k_machtype		/* Mark machine type */
-	lea	init_task,%a2			/* Set "current" init task */
-#endif
-
-	/*
-	 *	Assembler start up done, start code proper.
-	 */
-	jsr	start_kernel			/* start Linux kernel */
-
-_exit:
-	jmp	_exit				/* should never get here */
-
-/*****************************************************************************/
diff --git a/arch/m68k/fpsp040/bindec.S b/arch/m68k/fpsp040/bindec.S
deleted file mode 100644
index f2e795231046049bd41950bd642178f81065c819..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/bindec.S
+++ /dev/null
@@ -1,919 +0,0 @@
-|
-|	bindec.sa 3.4 1/3/91
-|
-|	bindec
-|
-|	Description:
-|		Converts an input in extended precision format
-|		to bcd format.
-|
-|	Input:
-|		a0 points to the input extended precision value
-|		value in memory; d0 contains the k-factor sign-extended
-|		to 32-bits.  The input may be either normalized,
-|		unnormalized, or denormalized.
-|
-|	Output:	result in the FP_SCR1 space on the stack.
-|
-|	Saves and Modifies: D2-D7,A2,FP2
-|
-|	Algorithm:
-|
-|	A1.	Set RM and size ext;  Set SIGMA = sign of input.
-|		The k-factor is saved for use in d7. Clear the
-|		BINDEC_FLG for separating normalized/denormalized
-|		input.  If input is unnormalized or denormalized,
-|		normalize it.
-|
-|	A2.	Set X = abs(input).
-|
-|	A3.	Compute ILOG.
-|		ILOG is the log base 10 of the input value.  It is
-|		approximated by adding e + 0.f when the original
-|		value is viewed as 2^^e * 1.f in extended precision.
-|		This value is stored in d6.
-|
-|	A4.	Clr INEX bit.
-|		The operation in A3 above may have set INEX2.
-|
-|	A5.	Set ICTR = 0;
-|		ICTR is a flag used in A13.  It must be set before the
-|		loop entry A6.
-|
-|	A6.	Calculate LEN.
-|		LEN is the number of digits to be displayed.  The
-|		k-factor can dictate either the total number of digits,
-|		if it is a positive number, or the number of digits
-|		after the decimal point which are to be included as
-|		significant.  See the 68882 manual for examples.
-|		If LEN is computed to be greater than 17, set OPERR in
-|		USER_FPSR.  LEN is stored in d4.
-|
-|	A7.	Calculate SCALE.
-|		SCALE is equal to 10^ISCALE, where ISCALE is the number
-|		of decimal places needed to insure LEN integer digits
-|		in the output before conversion to bcd. LAMBDA is the
-|		sign of ISCALE, used in A9. Fp1 contains
-|		10^^(abs(ISCALE)) using a rounding mode which is a
-|		function of the original rounding mode and the signs
-|		of ISCALE and X.  A table is given in the code.
-|
-|	A8.	Clr INEX; Force RZ.
-|		The operation in A3 above may have set INEX2.
-|		RZ mode is forced for the scaling operation to insure
-|		only one rounding error.  The grs bits are collected in
-|		the INEX flag for use in A10.
-|
-|	A9.	Scale X -> Y.
-|		The mantissa is scaled to the desired number of
-|		significant digits.  The excess digits are collected
-|		in INEX2.
-|
-|	A10.	Or in INEX.
-|		If INEX is set, round error occurred.  This is
-|		compensated for by 'or-ing' in the INEX2 flag to
-|		the lsb of Y.
-|
-|	A11.	Restore original FPCR; set size ext.
-|		Perform FINT operation in the user's rounding mode.
-|		Keep the size to extended.
-|
-|	A12.	Calculate YINT = FINT(Y) according to user's rounding
-|		mode.  The FPSP routine sintd0 is used.  The output
-|		is in fp0.
-|
-|	A13.	Check for LEN digits.
-|		If the int operation results in more than LEN digits,
-|		or less than LEN -1 digits, adjust ILOG and repeat from
-|		A6.  This test occurs only on the first pass.  If the
-|		result is exactly 10^LEN, decrement ILOG and divide
-|		the mantissa by 10.
-|
-|	A14.	Convert the mantissa to bcd.
-|		The binstr routine is used to convert the LEN digit
-|		mantissa to bcd in memory.  The input to binstr is
-|		to be a fraction; i.e. (mantissa)/10^LEN and adjusted
-|		such that the decimal point is to the left of bit 63.
-|		The bcd digits are stored in the correct position in
-|		the final string area in memory.
-|
-|	A15.	Convert the exponent to bcd.
-|		As in A14 above, the exp is converted to bcd and the
-|		digits are stored in the final string.
-|		Test the length of the final exponent string.  If the
-|		length is 4, set operr.
-|
-|	A16.	Write sign bits to final string.
-|
-|	Implementation Notes:
-|
-|	The registers are used as follows:
-|
-|		d0: scratch; LEN input to binstr
-|		d1: scratch
-|		d2: upper 32-bits of mantissa for binstr
-|		d3: scratch;lower 32-bits of mantissa for binstr
-|		d4: LEN
-|		d5: LAMBDA/ICTR
-|		d6: ILOG
-|		d7: k-factor
-|		a0: ptr for original operand/final result
-|		a1: scratch pointer
-|		a2: pointer to FP_X; abs(original value) in ext
-|		fp0: scratch
-|		fp1: scratch
-|		fp2: scratch
-|		F_SCR1:
-|		F_SCR2:
-|		L_SCR1:
-|		L_SCR2:
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|BINDEC    idnt    2,1 | Motorola 040 Floating Point Software Package
-
-#include "fpsp.h"
-
-	|section	8
-
-| Constants in extended precision
-LOG2:	.long	0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
-LOG2UP1:	.long	0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
-
-| Constants in single precision
-FONE:	.long	0x3F800000,0x00000000,0x00000000,0x00000000
-FTWO:	.long	0x40000000,0x00000000,0x00000000,0x00000000
-FTEN:	.long	0x41200000,0x00000000,0x00000000,0x00000000
-F4933:	.long	0x459A2800,0x00000000,0x00000000,0x00000000
-
-RBDTBL:	.byte	0,0,0,0
-	.byte	3,3,2,2
-	.byte	3,2,2,3
-	.byte	2,3,3,2
-
-	|xref	binstr
-	|xref	sintdo
-	|xref	ptenrn,ptenrm,ptenrp
-
-	.global	bindec
-	.global	sc_mul
-bindec:
-	moveml	%d2-%d7/%a2,-(%a7)
-	fmovemx %fp0-%fp2,-(%a7)
-
-| A1. Set RM and size ext. Set SIGMA = sign input;
-|     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
-|     separating  normalized/denormalized input.  If the input
-|     is a denormalized number, set the BINDEC_FLG memory word
-|     to signal denorm.  If the input is unnormalized, normalize
-|     the input and test for denormalized result.
-|
-	fmovel	#rm_mode,%FPCR	|set RM and ext
-	movel	(%a0),L_SCR2(%a6)	|save exponent for sign check
-	movel	%d0,%d7		|move k-factor to d7
-	clrb	BINDEC_FLG(%a6)	|clr norm/denorm flag
-	movew	STAG(%a6),%d0	|get stag
-	andiw	#0xe000,%d0	|isolate stag bits
-	beq	A2_str		|if zero, input is norm
-|
-| Normalize the denorm
-|
-un_de_norm:
-	movew	(%a0),%d0
-	andiw	#0x7fff,%d0	|strip sign of normalized exp
-	movel	4(%a0),%d1
-	movel	8(%a0),%d2
-norm_loop:
-	subw	#1,%d0
-	lsll	#1,%d2
-	roxll	#1,%d1
-	tstl	%d1
-	bges	norm_loop
-|
-| Test if the normalized input is denormalized
-|
-	tstw	%d0
-	bgts	pos_exp		|if greater than zero, it is a norm
-	st	BINDEC_FLG(%a6)	|set flag for denorm
-pos_exp:
-	andiw	#0x7fff,%d0	|strip sign of normalized exp
-	movew	%d0,(%a0)
-	movel	%d1,4(%a0)
-	movel	%d2,8(%a0)
-
-| A2. Set X = abs(input).
-|
-A2_str:
-	movel	(%a0),FP_SCR2(%a6) | move input to work space
-	movel	4(%a0),FP_SCR2+4(%a6) | move input to work space
-	movel	8(%a0),FP_SCR2+8(%a6) | move input to work space
-	andil	#0x7fffffff,FP_SCR2(%a6) |create abs(X)
-
-| A3. Compute ILOG.
-|     ILOG is the log base 10 of the input value.  It is approx-
-|     imated by adding e + 0.f when the original value is viewed
-|     as 2^^e * 1.f in extended precision.  This value is stored
-|     in d6.
-|
-| Register usage:
-|	Input/Output
-|	d0: k-factor/exponent
-|	d2: x/x
-|	d3: x/x
-|	d4: x/x
-|	d5: x/x
-|	d6: x/ILOG
-|	d7: k-factor/Unchanged
-|	a0: ptr for original operand/final result
-|	a1: x/x
-|	a2: x/x
-|	fp0: x/float(ILOG)
-|	fp1: x/x
-|	fp2: x/x
-|	F_SCR1:x/x
-|	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
-|	L_SCR1:x/x
-|	L_SCR2:first word of X packed/Unchanged
-
-	tstb	BINDEC_FLG(%a6)	|check for denorm
-	beqs	A3_cont		|if clr, continue with norm
-	movel	#-4933,%d6	|force ILOG = -4933
-	bras	A4_str
-A3_cont:
-	movew	FP_SCR2(%a6),%d0	|move exp to d0
-	movew	#0x3fff,FP_SCR2(%a6) |replace exponent with 0x3fff
-	fmovex	FP_SCR2(%a6),%fp0	|now fp0 has 1.f
-	subw	#0x3fff,%d0	|strip off bias
-	faddw	%d0,%fp0		|add in exp
-	fsubs	FONE,%fp0	|subtract off 1.0
-	fbge	pos_res		|if pos, branch
-	fmulx	LOG2UP1,%fp0	|if neg, mul by LOG2UP1
-	fmovel	%fp0,%d6		|put ILOG in d6 as a lword
-	bras	A4_str		|go move out ILOG
-pos_res:
-	fmulx	LOG2,%fp0	|if pos, mul by LOG2
-	fmovel	%fp0,%d6		|put ILOG in d6 as a lword
-
-
-| A4. Clr INEX bit.
-|     The operation in A3 above may have set INEX2.
-
-A4_str:
-	fmovel	#0,%FPSR		|zero all of fpsr - nothing needed
-
-
-| A5. Set ICTR = 0;
-|     ICTR is a flag used in A13.  It must be set before the
-|     loop entry A6. The lower word of d5 is used for ICTR.
-
-	clrw	%d5		|clear ICTR
-
-
-| A6. Calculate LEN.
-|     LEN is the number of digits to be displayed.  The k-factor
-|     can dictate either the total number of digits, if it is
-|     a positive number, or the number of digits after the
-|     original decimal point which are to be included as
-|     significant.  See the 68882 manual for examples.
-|     If LEN is computed to be greater than 17, set OPERR in
-|     USER_FPSR.  LEN is stored in d4.
-|
-| Register usage:
-|	Input/Output
-|	d0: exponent/Unchanged
-|	d2: x/x/scratch
-|	d3: x/x
-|	d4: exc picture/LEN
-|	d5: ICTR/Unchanged
-|	d6: ILOG/Unchanged
-|	d7: k-factor/Unchanged
-|	a0: ptr for original operand/final result
-|	a1: x/x
-|	a2: x/x
-|	fp0: float(ILOG)/Unchanged
-|	fp1: x/x
-|	fp2: x/x
-|	F_SCR1:x/x
-|	F_SCR2:Abs(X) with $3fff exponent/Unchanged
-|	L_SCR1:x/x
-|	L_SCR2:first word of X packed/Unchanged
-
-A6_str:
-	tstl	%d7		|branch on sign of k
-	bles	k_neg		|if k <= 0, LEN = ILOG + 1 - k
-	movel	%d7,%d4		|if k > 0, LEN = k
-	bras	len_ck		|skip to LEN check
-k_neg:
-	movel	%d6,%d4		|first load ILOG to d4
-	subl	%d7,%d4		|subtract off k
-	addql	#1,%d4		|add in the 1
-len_ck:
-	tstl	%d4		|LEN check: branch on sign of LEN
-	bles	LEN_ng		|if neg, set LEN = 1
-	cmpl	#17,%d4		|test if LEN > 17
-	bles	A7_str		|if not, forget it
-	movel	#17,%d4		|set max LEN = 17
-	tstl	%d7		|if negative, never set OPERR
-	bles	A7_str		|if positive, continue
-	orl	#opaop_mask,USER_FPSR(%a6) |set OPERR & AIOP in USER_FPSR
-	bras	A7_str		|finished here
-LEN_ng:
-	moveql	#1,%d4		|min LEN is 1
-
-
-| A7. Calculate SCALE.
-|     SCALE is equal to 10^ISCALE, where ISCALE is the number
-|     of decimal places needed to insure LEN integer digits
-|     in the output before conversion to bcd. LAMBDA is the sign
-|     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
-|     the rounding mode as given in the following table (see
-|     Coonen, p. 7.23 as ref.; however, the SCALE variable is
-|     of opposite sign in bindec.sa from Coonen).
-|
-|	Initial					USE
-|	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
-|	----------------------------------------------
-|	 RN	00	   0	   0		00/0	RN
-|	 RN	00	   0	   1		00/0	RN
-|	 RN	00	   1	   0		00/0	RN
-|	 RN	00	   1	   1		00/0	RN
-|	 RZ	01	   0	   0		11/3	RP
-|	 RZ	01	   0	   1		11/3	RP
-|	 RZ	01	   1	   0		10/2	RM
-|	 RZ	01	   1	   1		10/2	RM
-|	 RM	10	   0	   0		11/3	RP
-|	 RM	10	   0	   1		10/2	RM
-|	 RM	10	   1	   0		10/2	RM
-|	 RM	10	   1	   1		11/3	RP
-|	 RP	11	   0	   0		10/2	RM
-|	 RP	11	   0	   1		11/3	RP
-|	 RP	11	   1	   0		11/3	RP
-|	 RP	11	   1	   1		10/2	RM
-|
-| Register usage:
-|	Input/Output
-|	d0: exponent/scratch - final is 0
-|	d2: x/0 or 24 for A9
-|	d3: x/scratch - offset ptr into PTENRM array
-|	d4: LEN/Unchanged
-|	d5: 0/ICTR:LAMBDA
-|	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
-|	d7: k-factor/Unchanged
-|	a0: ptr for original operand/final result
-|	a1: x/ptr to PTENRM array
-|	a2: x/x
-|	fp0: float(ILOG)/Unchanged
-|	fp1: x/10^ISCALE
-|	fp2: x/x
-|	F_SCR1:x/x
-|	F_SCR2:Abs(X) with $3fff exponent/Unchanged
-|	L_SCR1:x/x
-|	L_SCR2:first word of X packed/Unchanged
-
-A7_str:
-	tstl	%d7		|test sign of k
-	bgts	k_pos		|if pos and > 0, skip this
-	cmpl	%d6,%d7		|test k - ILOG
-	blts	k_pos		|if ILOG >= k, skip this
-	movel	%d7,%d6		|if ((k<0) & (ILOG < k)) ILOG = k
-k_pos:
-	movel	%d6,%d0		|calc ILOG + 1 - LEN in d0
-	addql	#1,%d0		|add the 1
-	subl	%d4,%d0		|sub off LEN
-	swap	%d5		|use upper word of d5 for LAMBDA
-	clrw	%d5		|set it zero initially
-	clrw	%d2		|set up d2 for very small case
-	tstl	%d0		|test sign of ISCALE
-	bges	iscale		|if pos, skip next inst
-	addqw	#1,%d5		|if neg, set LAMBDA true
-	cmpl	#0xffffecd4,%d0	|test iscale <= -4908
-	bgts	no_inf		|if false, skip rest
-	addil	#24,%d0		|add in 24 to iscale
-	movel	#24,%d2		|put 24 in d2 for A9
-no_inf:
-	negl	%d0		|and take abs of ISCALE
-iscale:
-	fmoves	FONE,%fp1	|init fp1 to 1
-	bfextu	USER_FPCR(%a6){#26:#2},%d1 |get initial rmode bits
-	lslw	#1,%d1		|put them in bits 2:1
-	addw	%d5,%d1		|add in LAMBDA
-	lslw	#1,%d1		|put them in bits 3:1
-	tstl	L_SCR2(%a6)	|test sign of original x
-	bges	x_pos		|if pos, don't set bit 0
-	addql	#1,%d1		|if neg, set bit 0
-x_pos:
-	leal	RBDTBL,%a2	|load rbdtbl base
-	moveb	(%a2,%d1),%d3	|load d3 with new rmode
-	lsll	#4,%d3		|put bits in proper position
-	fmovel	%d3,%fpcr		|load bits into fpu
-	lsrl	#4,%d3		|put bits in proper position
-	tstb	%d3		|decode new rmode for pten table
-	bnes	not_rn		|if zero, it is RN
-	leal	PTENRN,%a1	|load a1 with RN table base
-	bras	rmode		|exit decode
-not_rn:
-	lsrb	#1,%d3		|get lsb in carry
-	bccs	not_rp		|if carry clear, it is RM
-	leal	PTENRP,%a1	|load a1 with RP table base
-	bras	rmode		|exit decode
-not_rp:
-	leal	PTENRM,%a1	|load a1 with RM table base
-rmode:
-	clrl	%d3		|clr table index
-e_loop:
-	lsrl	#1,%d0		|shift next bit into carry
-	bccs	e_next		|if zero, skip the mul
-	fmulx	(%a1,%d3),%fp1	|mul by 10**(d3_bit_no)
-e_next:
-	addl	#12,%d3		|inc d3 to next pwrten table entry
-	tstl	%d0		|test if ISCALE is zero
-	bnes	e_loop		|if not, loop
-
-
-| A8. Clr INEX; Force RZ.
-|     The operation in A3 above may have set INEX2.
-|     RZ mode is forced for the scaling operation to insure
-|     only one rounding error.  The grs bits are collected in
-|     the INEX flag for use in A10.
-|
-| Register usage:
-|	Input/Output
-
-	fmovel	#0,%FPSR		|clr INEX
-	fmovel	#rz_mode,%FPCR	|set RZ rounding mode
-
-
-| A9. Scale X -> Y.
-|     The mantissa is scaled to the desired number of significant
-|     digits.  The excess digits are collected in INEX2. If mul,
-|     Check d2 for excess 10 exponential value.  If not zero,
-|     the iscale value would have caused the pwrten calculation
-|     to overflow.  Only a negative iscale can cause this, so
-|     multiply by 10^(d2), which is now only allowed to be 24,
-|     with a multiply by 10^8 and 10^16, which is exact since
-|     10^24 is exact.  If the input was denormalized, we must
-|     create a busy stack frame with the mul command and the
-|     two operands, and allow the fpu to complete the multiply.
-|
-| Register usage:
-|	Input/Output
-|	d0: FPCR with RZ mode/Unchanged
-|	d2: 0 or 24/unchanged
-|	d3: x/x
-|	d4: LEN/Unchanged
-|	d5: ICTR:LAMBDA
-|	d6: ILOG/Unchanged
-|	d7: k-factor/Unchanged
-|	a0: ptr for original operand/final result
-|	a1: ptr to PTENRM array/Unchanged
-|	a2: x/x
-|	fp0: float(ILOG)/X adjusted for SCALE (Y)
-|	fp1: 10^ISCALE/Unchanged
-|	fp2: x/x
-|	F_SCR1:x/x
-|	F_SCR2:Abs(X) with $3fff exponent/Unchanged
-|	L_SCR1:x/x
-|	L_SCR2:first word of X packed/Unchanged
-
-A9_str:
-	fmovex	(%a0),%fp0	|load X from memory
-	fabsx	%fp0		|use abs(X)
-	tstw	%d5		|LAMBDA is in lower word of d5
-	bne	sc_mul		|if neg (LAMBDA = 1), scale by mul
-	fdivx	%fp1,%fp0		|calculate X / SCALE -> Y to fp0
-	bras	A10_st		|branch to A10
-
-sc_mul:
-	tstb	BINDEC_FLG(%a6)	|check for denorm
-	beqs	A9_norm		|if norm, continue with mul
-	fmovemx %fp1-%fp1,-(%a7)	|load ETEMP with 10^ISCALE
-	movel	8(%a0),-(%a7)	|load FPTEMP with input arg
-	movel	4(%a0),-(%a7)
-	movel	(%a0),-(%a7)
-	movel	#18,%d3		|load count for busy stack
-A9_loop:
-	clrl	-(%a7)		|clear lword on stack
-	dbf	%d3,A9_loop
-	moveb	VER_TMP(%a6),(%a7) |write current version number
-	moveb	#BUSY_SIZE-4,1(%a7) |write current busy size
-	moveb	#0x10,0x44(%a7)	|set fcefpte[15] bit
-	movew	#0x0023,0x40(%a7)	|load cmdreg1b with mul command
-	moveb	#0xfe,0x8(%a7)	|load all 1s to cu savepc
-	frestore (%a7)+		|restore frame to fpu for completion
-	fmulx	36(%a1),%fp0	|multiply fp0 by 10^8
-	fmulx	48(%a1),%fp0	|multiply fp0 by 10^16
-	bras	A10_st
-A9_norm:
-	tstw	%d2		|test for small exp case
-	beqs	A9_con		|if zero, continue as normal
-	fmulx	36(%a1),%fp0	|multiply fp0 by 10^8
-	fmulx	48(%a1),%fp0	|multiply fp0 by 10^16
-A9_con:
-	fmulx	%fp1,%fp0		|calculate X * SCALE -> Y to fp0
-
-
-| A10. Or in INEX.
-|      If INEX is set, round error occurred.  This is compensated
-|      for by 'or-ing' in the INEX2 flag to the lsb of Y.
-|
-| Register usage:
-|	Input/Output
-|	d0: FPCR with RZ mode/FPSR with INEX2 isolated
-|	d2: x/x
-|	d3: x/x
-|	d4: LEN/Unchanged
-|	d5: ICTR:LAMBDA
-|	d6: ILOG/Unchanged
-|	d7: k-factor/Unchanged
-|	a0: ptr for original operand/final result
-|	a1: ptr to PTENxx array/Unchanged
-|	a2: x/ptr to FP_SCR2(a6)
-|	fp0: Y/Y with lsb adjusted
-|	fp1: 10^ISCALE/Unchanged
-|	fp2: x/x
-
-A10_st:
-	fmovel	%FPSR,%d0		|get FPSR
-	fmovex	%fp0,FP_SCR2(%a6)	|move Y to memory
-	leal	FP_SCR2(%a6),%a2	|load a2 with ptr to FP_SCR2
-	btstl	#9,%d0		|check if INEX2 set
-	beqs	A11_st		|if clear, skip rest
-	oril	#1,8(%a2)	|or in 1 to lsb of mantissa
-	fmovex	FP_SCR2(%a6),%fp0	|write adjusted Y back to fpu
-
-
-| A11. Restore original FPCR; set size ext.
-|      Perform FINT operation in the user's rounding mode.  Keep
-|      the size to extended.  The sintdo entry point in the sint
-|      routine expects the FPCR value to be in USER_FPCR for
-|      mode and precision.  The original FPCR is saved in L_SCR1.
-
-A11_st:
-	movel	USER_FPCR(%a6),L_SCR1(%a6) |save it for later
-	andil	#0x00000030,USER_FPCR(%a6) |set size to ext,
-|					;block exceptions
-
-
-| A12. Calculate YINT = FINT(Y) according to user's rounding mode.
-|      The FPSP routine sintd0 is used.  The output is in fp0.
-|
-| Register usage:
-|	Input/Output
-|	d0: FPSR with AINEX cleared/FPCR with size set to ext
-|	d2: x/x/scratch
-|	d3: x/x
-|	d4: LEN/Unchanged
-|	d5: ICTR:LAMBDA/Unchanged
-|	d6: ILOG/Unchanged
-|	d7: k-factor/Unchanged
-|	a0: ptr for original operand/src ptr for sintdo
-|	a1: ptr to PTENxx array/Unchanged
-|	a2: ptr to FP_SCR2(a6)/Unchanged
-|	a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
-|	fp0: Y/YINT
-|	fp1: 10^ISCALE/Unchanged
-|	fp2: x/x
-|	F_SCR1:x/x
-|	F_SCR2:Y adjusted for inex/Y with original exponent
-|	L_SCR1:x/original USER_FPCR
-|	L_SCR2:first word of X packed/Unchanged
-
-A12_st:
-	moveml	%d0-%d1/%a0-%a1,-(%a7)	|save regs used by sintd0
-	movel	L_SCR1(%a6),-(%a7)
-	movel	L_SCR2(%a6),-(%a7)
-	leal	FP_SCR2(%a6),%a0		|a0 is ptr to F_SCR2(a6)
-	fmovex	%fp0,(%a0)		|move Y to memory at FP_SCR2(a6)
-	tstl	L_SCR2(%a6)		|test sign of original operand
-	bges	do_fint			|if pos, use Y
-	orl	#0x80000000,(%a0)		|if neg, use -Y
-do_fint:
-	movel	USER_FPSR(%a6),-(%a7)
-	bsr	sintdo			|sint routine returns int in fp0
-	moveb	(%a7),USER_FPSR(%a6)
-	addl	#4,%a7
-	movel	(%a7)+,L_SCR2(%a6)
-	movel	(%a7)+,L_SCR1(%a6)
-	moveml	(%a7)+,%d0-%d1/%a0-%a1	|restore regs used by sint
-	movel	L_SCR2(%a6),FP_SCR2(%a6)	|restore original exponent
-	movel	L_SCR1(%a6),USER_FPCR(%a6) |restore user's FPCR
-
-
-| A13. Check for LEN digits.
-|      If the int operation results in more than LEN digits,
-|      or less than LEN -1 digits, adjust ILOG and repeat from
-|      A6.  This test occurs only on the first pass.  If the
-|      result is exactly 10^LEN, decrement ILOG and divide
-|      the mantissa by 10.  The calculation of 10^LEN cannot
-|      be inexact, since all powers of ten up to 10^27 are exact
-|      in extended precision, so the use of a previous power-of-ten
-|      table will introduce no error.
-|
-|
-| Register usage:
-|	Input/Output
-|	d0: FPCR with size set to ext/scratch final = 0
-|	d2: x/x
-|	d3: x/scratch final = x
-|	d4: LEN/LEN adjusted
-|	d5: ICTR:LAMBDA/LAMBDA:ICTR
-|	d6: ILOG/ILOG adjusted
-|	d7: k-factor/Unchanged
-|	a0: pointer into memory for packed bcd string formation
-|	a1: ptr to PTENxx array/Unchanged
-|	a2: ptr to FP_SCR2(a6)/Unchanged
-|	fp0: int portion of Y/abs(YINT) adjusted
-|	fp1: 10^ISCALE/Unchanged
-|	fp2: x/10^LEN
-|	F_SCR1:x/x
-|	F_SCR2:Y with original exponent/Unchanged
-|	L_SCR1:original USER_FPCR/Unchanged
-|	L_SCR2:first word of X packed/Unchanged
-
-A13_st:
-	swap	%d5		|put ICTR in lower word of d5
-	tstw	%d5		|check if ICTR = 0
-	bne	not_zr		|if non-zero, go to second test
-|
-| Compute 10^(LEN-1)
-|
-	fmoves	FONE,%fp2	|init fp2 to 1.0
-	movel	%d4,%d0		|put LEN in d0
-	subql	#1,%d0		|d0 = LEN -1
-	clrl	%d3		|clr table index
-l_loop:
-	lsrl	#1,%d0		|shift next bit into carry
-	bccs	l_next		|if zero, skip the mul
-	fmulx	(%a1,%d3),%fp2	|mul by 10**(d3_bit_no)
-l_next:
-	addl	#12,%d3		|inc d3 to next pwrten table entry
-	tstl	%d0		|test if LEN is zero
-	bnes	l_loop		|if not, loop
-|
-| 10^LEN-1 is computed for this test and A14.  If the input was
-| denormalized, check only the case in which YINT > 10^LEN.
-|
-	tstb	BINDEC_FLG(%a6)	|check if input was norm
-	beqs	A13_con		|if norm, continue with checking
-	fabsx	%fp0		|take abs of YINT
-	bra	test_2
-|
-| Compare abs(YINT) to 10^(LEN-1) and 10^LEN
-|
-A13_con:
-	fabsx	%fp0		|take abs of YINT
-	fcmpx	%fp2,%fp0		|compare abs(YINT) with 10^(LEN-1)
-	fbge	test_2		|if greater, do next test
-	subql	#1,%d6		|subtract 1 from ILOG
-	movew	#1,%d5		|set ICTR
-	fmovel	#rm_mode,%FPCR	|set rmode to RM
-	fmuls	FTEN,%fp2	|compute 10^LEN
-	bra	A6_str		|return to A6 and recompute YINT
-test_2:
-	fmuls	FTEN,%fp2	|compute 10^LEN
-	fcmpx	%fp2,%fp0		|compare abs(YINT) with 10^LEN
-	fblt	A14_st		|if less, all is ok, go to A14
-	fbgt	fix_ex		|if greater, fix and redo
-	fdivs	FTEN,%fp0	|if equal, divide by 10
-	addql	#1,%d6		| and inc ILOG
-	bras	A14_st		| and continue elsewhere
-fix_ex:
-	addql	#1,%d6		|increment ILOG by 1
-	movew	#1,%d5		|set ICTR
-	fmovel	#rm_mode,%FPCR	|set rmode to RM
-	bra	A6_str		|return to A6 and recompute YINT
-|
-| Since ICTR <> 0, we have already been through one adjustment,
-| and shouldn't have another; this is to check if abs(YINT) = 10^LEN
-| 10^LEN is again computed using whatever table is in a1 since the
-| value calculated cannot be inexact.
-|
-not_zr:
-	fmoves	FONE,%fp2	|init fp2 to 1.0
-	movel	%d4,%d0		|put LEN in d0
-	clrl	%d3		|clr table index
-z_loop:
-	lsrl	#1,%d0		|shift next bit into carry
-	bccs	z_next		|if zero, skip the mul
-	fmulx	(%a1,%d3),%fp2	|mul by 10**(d3_bit_no)
-z_next:
-	addl	#12,%d3		|inc d3 to next pwrten table entry
-	tstl	%d0		|test if LEN is zero
-	bnes	z_loop		|if not, loop
-	fabsx	%fp0		|get abs(YINT)
-	fcmpx	%fp2,%fp0		|check if abs(YINT) = 10^LEN
-	fbne	A14_st		|if not, skip this
-	fdivs	FTEN,%fp0	|divide abs(YINT) by 10
-	addql	#1,%d6		|and inc ILOG by 1
-	addql	#1,%d4		| and inc LEN
-	fmuls	FTEN,%fp2	| if LEN++, the get 10^^LEN
-
-
-| A14. Convert the mantissa to bcd.
-|      The binstr routine is used to convert the LEN digit
-|      mantissa to bcd in memory.  The input to binstr is
-|      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
-|      such that the decimal point is to the left of bit 63.
-|      The bcd digits are stored in the correct position in
-|      the final string area in memory.
-|
-|
-| Register usage:
-|	Input/Output
-|	d0: x/LEN call to binstr - final is 0
-|	d1: x/0
-|	d2: x/ms 32-bits of mant of abs(YINT)
-|	d3: x/ls 32-bits of mant of abs(YINT)
-|	d4: LEN/Unchanged
-|	d5: ICTR:LAMBDA/LAMBDA:ICTR
-|	d6: ILOG
-|	d7: k-factor/Unchanged
-|	a0: pointer into memory for packed bcd string formation
-|	    /ptr to first mantissa byte in result string
-|	a1: ptr to PTENxx array/Unchanged
-|	a2: ptr to FP_SCR2(a6)/Unchanged
-|	fp0: int portion of Y/abs(YINT) adjusted
-|	fp1: 10^ISCALE/Unchanged
-|	fp2: 10^LEN/Unchanged
-|	F_SCR1:x/Work area for final result
-|	F_SCR2:Y with original exponent/Unchanged
-|	L_SCR1:original USER_FPCR/Unchanged
-|	L_SCR2:first word of X packed/Unchanged
-
-A14_st:
-	fmovel	#rz_mode,%FPCR	|force rz for conversion
-	fdivx	%fp2,%fp0		|divide abs(YINT) by 10^LEN
-	leal	FP_SCR1(%a6),%a0
-	fmovex	%fp0,(%a0)	|move abs(YINT)/10^LEN to memory
-	movel	4(%a0),%d2	|move 2nd word of FP_RES to d2
-	movel	8(%a0),%d3	|move 3rd word of FP_RES to d3
-	clrl	4(%a0)		|zero word 2 of FP_RES
-	clrl	8(%a0)		|zero word 3 of FP_RES
-	movel	(%a0),%d0		|move exponent to d0
-	swap	%d0		|put exponent in lower word
-	beqs	no_sft		|if zero, don't shift
-	subil	#0x3ffd,%d0	|sub bias less 2 to make fract
-	tstl	%d0		|check if > 1
-	bgts	no_sft		|if so, don't shift
-	negl	%d0		|make exp positive
-m_loop:
-	lsrl	#1,%d2		|shift d2:d3 right, add 0s
-	roxrl	#1,%d3		|the number of places
-	dbf	%d0,m_loop	|given in d0
-no_sft:
-	tstl	%d2		|check for mantissa of zero
-	bnes	no_zr		|if not, go on
-	tstl	%d3		|continue zero check
-	beqs	zer_m		|if zero, go directly to binstr
-no_zr:
-	clrl	%d1		|put zero in d1 for addx
-	addil	#0x00000080,%d3	|inc at bit 7
-	addxl	%d1,%d2		|continue inc
-	andil	#0xffffff80,%d3	|strip off lsb not used by 882
-zer_m:
-	movel	%d4,%d0		|put LEN in d0 for binstr call
-	addql	#3,%a0		|a0 points to M16 byte in result
-	bsr	binstr		|call binstr to convert mant
-
-
-| A15. Convert the exponent to bcd.
-|      As in A14 above, the exp is converted to bcd and the
-|      digits are stored in the final string.
-|
-|      Digits are stored in L_SCR1(a6) on return from BINDEC as:
-|
-|	 32               16 15                0
-|	-----------------------------------------
-|	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
-|	-----------------------------------------
-|
-| And are moved into their proper places in FP_SCR1.  If digit e4
-| is non-zero, OPERR is signaled.  In all cases, all 4 digits are
-| written as specified in the 881/882 manual for packed decimal.
-|
-| Register usage:
-|	Input/Output
-|	d0: x/LEN call to binstr - final is 0
-|	d1: x/scratch (0);shift count for final exponent packing
-|	d2: x/ms 32-bits of exp fraction/scratch
-|	d3: x/ls 32-bits of exp fraction
-|	d4: LEN/Unchanged
-|	d5: ICTR:LAMBDA/LAMBDA:ICTR
-|	d6: ILOG
-|	d7: k-factor/Unchanged
-|	a0: ptr to result string/ptr to L_SCR1(a6)
-|	a1: ptr to PTENxx array/Unchanged
-|	a2: ptr to FP_SCR2(a6)/Unchanged
-|	fp0: abs(YINT) adjusted/float(ILOG)
-|	fp1: 10^ISCALE/Unchanged
-|	fp2: 10^LEN/Unchanged
-|	F_SCR1:Work area for final result/BCD result
-|	F_SCR2:Y with original exponent/ILOG/10^4
-|	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
-|	L_SCR2:first word of X packed/Unchanged
-
-A15_st:
-	tstb	BINDEC_FLG(%a6)	|check for denorm
-	beqs	not_denorm
-	ftstx	%fp0		|test for zero
-	fbeq	den_zero	|if zero, use k-factor or 4933
-	fmovel	%d6,%fp0		|float ILOG
-	fabsx	%fp0		|get abs of ILOG
-	bras	convrt
-den_zero:
-	tstl	%d7		|check sign of the k-factor
-	blts	use_ilog	|if negative, use ILOG
-	fmoves	F4933,%fp0	|force exponent to 4933
-	bras	convrt		|do it
-use_ilog:
-	fmovel	%d6,%fp0		|float ILOG
-	fabsx	%fp0		|get abs of ILOG
-	bras	convrt
-not_denorm:
-	ftstx	%fp0		|test for zero
-	fbne	not_zero	|if zero, force exponent
-	fmoves	FONE,%fp0	|force exponent to 1
-	bras	convrt		|do it
-not_zero:
-	fmovel	%d6,%fp0		|float ILOG
-	fabsx	%fp0		|get abs of ILOG
-convrt:
-	fdivx	24(%a1),%fp0	|compute ILOG/10^4
-	fmovex	%fp0,FP_SCR2(%a6)	|store fp0 in memory
-	movel	4(%a2),%d2	|move word 2 to d2
-	movel	8(%a2),%d3	|move word 3 to d3
-	movew	(%a2),%d0		|move exp to d0
-	beqs	x_loop_fin	|if zero, skip the shift
-	subiw	#0x3ffd,%d0	|subtract off bias
-	negw	%d0		|make exp positive
-x_loop:
-	lsrl	#1,%d2		|shift d2:d3 right
-	roxrl	#1,%d3		|the number of places
-	dbf	%d0,x_loop	|given in d0
-x_loop_fin:
-	clrl	%d1		|put zero in d1 for addx
-	addil	#0x00000080,%d3	|inc at bit 6
-	addxl	%d1,%d2		|continue inc
-	andil	#0xffffff80,%d3	|strip off lsb not used by 882
-	movel	#4,%d0		|put 4 in d0 for binstr call
-	leal	L_SCR1(%a6),%a0	|a0 is ptr to L_SCR1 for exp digits
-	bsr	binstr		|call binstr to convert exp
-	movel	L_SCR1(%a6),%d0	|load L_SCR1 lword to d0
-	movel	#12,%d1		|use d1 for shift count
-	lsrl	%d1,%d0		|shift d0 right by 12
-	bfins	%d0,FP_SCR1(%a6){#4:#12} |put e3:e2:e1 in FP_SCR1
-	lsrl	%d1,%d0		|shift d0 right by 12
-	bfins	%d0,FP_SCR1(%a6){#16:#4} |put e4 in FP_SCR1
-	tstb	%d0		|check if e4 is zero
-	beqs	A16_st		|if zero, skip rest
-	orl	#opaop_mask,USER_FPSR(%a6) |set OPERR & AIOP in USER_FPSR
-
-
-| A16. Write sign bits to final string.
-|	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
-|
-| Register usage:
-|	Input/Output
-|	d0: x/scratch - final is x
-|	d2: x/x
-|	d3: x/x
-|	d4: LEN/Unchanged
-|	d5: ICTR:LAMBDA/LAMBDA:ICTR
-|	d6: ILOG/ILOG adjusted
-|	d7: k-factor/Unchanged
-|	a0: ptr to L_SCR1(a6)/Unchanged
-|	a1: ptr to PTENxx array/Unchanged
-|	a2: ptr to FP_SCR2(a6)/Unchanged
-|	fp0: float(ILOG)/Unchanged
-|	fp1: 10^ISCALE/Unchanged
-|	fp2: 10^LEN/Unchanged
-|	F_SCR1:BCD result with correct signs
-|	F_SCR2:ILOG/10^4
-|	L_SCR1:Exponent digits on return from binstr
-|	L_SCR2:first word of X packed/Unchanged
-
-A16_st:
-	clrl	%d0		|clr d0 for collection of signs
-	andib	#0x0f,FP_SCR1(%a6) |clear first nibble of FP_SCR1
-	tstl	L_SCR2(%a6)	|check sign of original mantissa
-	bges	mant_p		|if pos, don't set SM
-	moveql	#2,%d0		|move 2 in to d0 for SM
-mant_p:
-	tstl	%d6		|check sign of ILOG
-	bges	wr_sgn		|if pos, don't set SE
-	addql	#1,%d0		|set bit 0 in d0 for SE
-wr_sgn:
-	bfins	%d0,FP_SCR1(%a6){#0:#2} |insert SM and SE into FP_SCR1
-
-| Clean up and restore all registers used.
-
-	fmovel	#0,%FPSR		|clear possible inex2/ainex bits
-	fmovemx (%a7)+,%fp0-%fp2
-	moveml	(%a7)+,%d2-%d7/%a2
-	rts
-
-	|end
diff --git a/arch/m68k/fpsp040/binstr.S b/arch/m68k/fpsp040/binstr.S
deleted file mode 100644
index 8a05ba92a8a0f1d493c447044c4d3ef4c7f72c97..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/binstr.S
+++ /dev/null
@@ -1,139 +0,0 @@
-|
-|	binstr.sa 3.3 12/19/90
-|
-|
-|	Description: Converts a 64-bit binary integer to bcd.
-|
-|	Input: 64-bit binary integer in d2:d3, desired length (LEN) in
-|          d0, and a  pointer to start in memory for bcd characters
-|          in d0. (This pointer must point to byte 4 of the first
-|          lword of the packed decimal memory string.)
-|
-|	Output:	LEN bcd digits representing the 64-bit integer.
-|
-|	Algorithm:
-|		The 64-bit binary is assumed to have a decimal point before
-|		bit 63.  The fraction is multiplied by 10 using a mul by 2
-|		shift and a mul by 8 shift.  The bits shifted out of the
-|		msb form a decimal digit.  This process is iterated until
-|		LEN digits are formed.
-|
-|	A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the
-|		digit formed will be assumed the least significant.  This is
-|		to force the first byte formed to have a 0 in the upper 4 bits.
-|
-|	A2. Beginning of the loop:
-|		Copy the fraction in d2:d3 to d4:d5.
-|
-|	A3. Multiply the fraction in d2:d3 by 8 using bit-field
-|		extracts and shifts.  The three msbs from d2 will go into
-|		d1.
-|
-|	A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb
-|		will be collected by the carry.
-|
-|	A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5
-|		into d2:d3.  D1 will contain the bcd digit formed.
-|
-|	A6. Test d7.  If zero, the digit formed is the ms digit.  If non-
-|		zero, it is the ls digit.  Put the digit in its place in the
-|		upper word of d0.  If it is the ls digit, write the word
-|		from d0 to memory.
-|
-|	A7. Decrement d6 (LEN counter) and repeat the loop until zero.
-|
-|	Implementation Notes:
-|
-|	The registers are used as follows:
-|
-|		d0: LEN counter
-|		d1: temp used to form the digit
-|		d2: upper 32-bits of fraction for mul by 8
-|		d3: lower 32-bits of fraction for mul by 8
-|		d4: upper 32-bits of fraction for mul by 2
-|		d5: lower 32-bits of fraction for mul by 2
-|		d6: temp for bit-field extracts
-|		d7: byte digit formation word;digit count {0,1}
-|		a0: pointer into memory for packed bcd string formation
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|BINSTR    idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-	.global	binstr
-binstr:
-	moveml	%d0-%d7,-(%a7)
-|
-| A1: Init d7
-|
-	moveql	#1,%d7			|init d7 for second digit
-	subql	#1,%d0			|for dbf d0 would have LEN+1 passes
-|
-| A2. Copy d2:d3 to d4:d5.  Start loop.
-|
-loop:
-	movel	%d2,%d4			|copy the fraction before muls
-	movel	%d3,%d5			|to d4:d5
-|
-| A3. Multiply d2:d3 by 8; extract msbs into d1.
-|
-	bfextu	%d2{#0:#3},%d1		|copy 3 msbs of d2 into d1
-	asll	#3,%d2			|shift d2 left by 3 places
-	bfextu	%d3{#0:#3},%d6		|copy 3 msbs of d3 into d6
-	asll	#3,%d3			|shift d3 left by 3 places
-	orl	%d6,%d2			|or in msbs from d3 into d2
-|
-| A4. Multiply d4:d5 by 2; add carry out to d1.
-|
-	asll	#1,%d5			|mul d5 by 2
-	roxll	#1,%d4			|mul d4 by 2
-	swap	%d6			|put 0 in d6 lower word
-	addxw	%d6,%d1			|add in extend from mul by 2
-|
-| A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
-|
-	addl	%d5,%d3			|add lower 32 bits
-	nop				|ERRATA ; FIX #13 (Rev. 1.2 6/6/90)
-	addxl	%d4,%d2			|add with extend upper 32 bits
-	nop				|ERRATA ; FIX #13 (Rev. 1.2 6/6/90)
-	addxw	%d6,%d1			|add in extend from add to d1
-	swap	%d6			|with d6 = 0; put 0 in upper word
-|
-| A6. Test d7 and branch.
-|
-	tstw	%d7			|if zero, store digit & to loop
-	beqs	first_d			|if non-zero, form byte & write
-sec_d:
-	swap	%d7			|bring first digit to word d7b
-	aslw	#4,%d7			|first digit in upper 4 bits d7b
-	addw	%d1,%d7			|add in ls digit to d7b
-	moveb	%d7,(%a0)+		|store d7b byte in memory
-	swap	%d7			|put LEN counter in word d7a
-	clrw	%d7			|set d7a to signal no digits done
-	dbf	%d0,loop		|do loop some more!
-	bras	end_bstr		|finished, so exit
-first_d:
-	swap	%d7			|put digit word in d7b
-	movew	%d1,%d7			|put new digit in d7b
-	swap	%d7			|put LEN counter in word d7a
-	addqw	#1,%d7			|set d7a to signal first digit done
-	dbf	%d0,loop		|do loop some more!
-	swap	%d7			|put last digit in string
-	lslw	#4,%d7			|move it to upper 4 bits
-	moveb	%d7,(%a0)+		|store it in memory string
-|
-| Clean up and return with result in fp0.
-|
-end_bstr:
-	moveml	(%a7)+,%d0-%d7
-	rts
-	|end
diff --git a/arch/m68k/fpsp040/bugfix.S b/arch/m68k/fpsp040/bugfix.S
deleted file mode 100644
index 3bb9c84bb0582d1d012ff40be4ef7bc6d02f57d0..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/bugfix.S
+++ /dev/null
@@ -1,495 +0,0 @@
-|
-|	bugfix.sa 3.2 1/31/91
-|
-|
-|	This file contains workarounds for bugs in the 040
-|	relating to the Floating-Point Software Package (FPSP)
-|
-|	Fixes for bugs: 1238
-|
-|	Bug: 1238
-|
-|
-|    /* The following dirty_bit clear should be left in
-|     * the handler permanently to improve throughput.
-|     * The dirty_bits are located at bits [23:16] in
-|     * longword $08 in the busy frame $4x60.  Bit 16
-|     * corresponds to FP0, bit 17 corresponds to FP1,
-|     * and so on.
-|     */
-|    if  (E3_exception_just_serviced)   {
-|         dirty_bit[cmdreg3b[9:7]] = 0;
-|         }
-|
-|    if  (fsave_format_version != $40)  {goto NOFIX}
-|
-|    if !(E3_exception_just_serviced)   {goto NOFIX}
-|    if  (cupc == 0000000)              {goto NOFIX}
-|    if  ((cmdreg1b[15:13] != 000) &&
-|         (cmdreg1b[15:10] != 010001))  {goto NOFIX}
-|    if (((cmdreg1b[15:13] != 000) || ((cmdreg1b[12:10] != cmdreg2b[9:7]) &&
-|				      (cmdreg1b[12:10] != cmdreg3b[9:7]))  ) &&
-|	 ((cmdreg1b[ 9: 7] != cmdreg2b[9:7]) &&
-|	  (cmdreg1b[ 9: 7] != cmdreg3b[9:7])) )  {goto NOFIX}
-|
-|    /* Note: for 6d43b or 8d43b, you may want to add the following code
-|     * to get better coverage.  (If you do not insert this code, the part
-|     * won't lock up; it will simply get the wrong answer.)
-|     * Do NOT insert this code for 10d43b or later parts.
-|     *
-|     *  if (fpiarcu == integer stack return address) {
-|     *       cupc = 0000000;
-|     *       goto NOFIX;
-|     *       }
-|     */
-|
-|    if (cmdreg1b[15:13] != 000)   {goto FIX_OPCLASS2}
-|    FIX_OPCLASS0:
-|    if (((cmdreg1b[12:10] == cmdreg2b[9:7]) ||
-|	 (cmdreg1b[ 9: 7] == cmdreg2b[9:7])) &&
-|	(cmdreg1b[12:10] != cmdreg3b[9:7]) &&
-|	(cmdreg1b[ 9: 7] != cmdreg3b[9:7]))  {  /* xu conflict only */
-|	/* We execute the following code if there is an
-|	   xu conflict and NOT an nu conflict */
-|
-|	/* first save some values on the fsave frame */
-|	stag_temp     = STAG[fsave_frame];
-|	cmdreg1b_temp = CMDREG1B[fsave_frame];
-|	dtag_temp     = DTAG[fsave_frame];
-|	ete15_temp    = ETE15[fsave_frame];
-|
-|	CUPC[fsave_frame] = 0000000;
-|	FRESTORE
-|	FSAVE
-|
-|	/* If the xu instruction is exceptional, we punt.
-|	 * Otherwise, we would have to include OVFL/UNFL handler
-|	 * code here to get the correct answer.
-|	 */
-|	if (fsave_frame_format == $4060) {goto KILL_PROCESS}
-|
-|	fsave_frame = /* build a long frame of all zeros */
-|	fsave_frame_format = $4060;  /* label it as long frame */
-|
-|	/* load it with the temps we saved */
-|	STAG[fsave_frame]     =  stag_temp;
-|	CMDREG1B[fsave_frame] =  cmdreg1b_temp;
-|	DTAG[fsave_frame]     =  dtag_temp;
-|	ETE15[fsave_frame]    =  ete15_temp;
-|
-|	/* Make sure that the cmdreg3b dest reg is not going to
-|	 * be destroyed by a FMOVEM at the end of all this code.
-|	 * If it is, you should move the current value of the reg
-|	 * onto the stack so that the reg will loaded with that value.
-|	 */
-|
-|	/* All done.  Proceed with the code below */
-|    }
-|
-|    etemp  = FP_reg_[cmdreg1b[12:10]];
-|    ete15  = ~ete14;
-|    cmdreg1b[15:10] = 010010;
-|    clear(bug_flag_procIDxxxx);
-|    FRESTORE and return;
-|
-|
-|    FIX_OPCLASS2:
-|    if ((cmdreg1b[9:7] == cmdreg2b[9:7]) &&
-|	(cmdreg1b[9:7] != cmdreg3b[9:7]))  {  /* xu conflict only */
-|	/* We execute the following code if there is an
-|	   xu conflict and NOT an nu conflict */
-|
-|	/* first save some values on the fsave frame */
-|	stag_temp     = STAG[fsave_frame];
-|	cmdreg1b_temp = CMDREG1B[fsave_frame];
-|	dtag_temp     = DTAG[fsave_frame];
-|	ete15_temp    = ETE15[fsave_frame];
-|	etemp_temp    = ETEMP[fsave_frame];
-|
-|	CUPC[fsave_frame] = 0000000;
-|	FRESTORE
-|	FSAVE
-|
-|
-|	/* If the xu instruction is exceptional, we punt.
-|	 * Otherwise, we would have to include OVFL/UNFL handler
-|	 * code here to get the correct answer.
-|	 */
-|	if (fsave_frame_format == $4060) {goto KILL_PROCESS}
-|
-|	fsave_frame = /* build a long frame of all zeros */
-|	fsave_frame_format = $4060;  /* label it as long frame */
-|
-|	/* load it with the temps we saved */
-|	STAG[fsave_frame]     =  stag_temp;
-|	CMDREG1B[fsave_frame] =  cmdreg1b_temp;
-|	DTAG[fsave_frame]     =  dtag_temp;
-|	ETE15[fsave_frame]    =  ete15_temp;
-|	ETEMP[fsave_frame]    =  etemp_temp;
-|
-|	/* Make sure that the cmdreg3b dest reg is not going to
-|	 * be destroyed by a FMOVEM at the end of all this code.
-|	 * If it is, you should move the current value of the reg
-|	 * onto the stack so that the reg will loaded with that value.
-|	 */
-|
-|	/* All done.  Proceed with the code below */
-|    }
-|
-|    if (etemp_exponent == min_sgl)   etemp_exponent = min_dbl;
-|    if (etemp_exponent == max_sgl)   etemp_exponent = max_dbl;
-|    cmdreg1b[15:10] = 010101;
-|    clear(bug_flag_procIDxxxx);
-|    FRESTORE and return;
-|
-|
-|    NOFIX:
-|    clear(bug_flag_procIDxxxx);
-|    FRESTORE and return;
-|
-
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|BUGFIX    idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-	|xref	fpsp_fmt_error
-
-	.global	b1238_fix
-b1238_fix:
-|
-| This code is entered only on completion of the handling of an
-| nu-generated ovfl, unfl, or inex exception.  If the version
-| number of the fsave is not $40, this handler is not necessary.
-| Simply branch to fix_done and exit normally.
-|
-	cmpib	#VER_40,4(%a7)
-	bne	fix_done
-|
-| Test for cu_savepc equal to zero.  If not, this is not a bug
-| #1238 case.
-|
-	moveb	CU_SAVEPC(%a6),%d0
-	andib	#0xFE,%d0
-	beq	fix_done	|if zero, this is not bug #1238
-
-|
-| Test the register conflict aspect.  If opclass0, check for
-| cu src equal to xu dest or equal to nu dest.  If so, go to
-| op0.  Else, or if opclass2, check for cu dest equal to
-| xu dest or equal to nu dest.  If so, go to tst_opcl.  Else,
-| exit, it is not the bug case.
-|
-| Check for opclass 0.  If not, go and check for opclass 2 and sgl.
-|
-	movew	CMDREG1B(%a6),%d0
-	andiw	#0xE000,%d0		|strip all but opclass
-	bne	op2sgl			|not opclass 0, check op2
-|
-| Check for cu and nu register conflict.  If one exists, this takes
-| priority over a cu and xu conflict.
-|
-	bfextu	CMDREG1B(%a6){#3:#3},%d0	|get 1st src
-	bfextu	CMDREG3B(%a6){#6:#3},%d1	|get 3rd dest
-	cmpb	%d0,%d1
-	beqs	op0			|if equal, continue bugfix
-|
-| Check for cu dest equal to nu dest.  If so, go and fix the
-| bug condition.  Otherwise, exit.
-|
-	bfextu	CMDREG1B(%a6){#6:#3},%d0	|get 1st dest
-	cmpb	%d0,%d1			|cmp 1st dest with 3rd dest
-	beqs	op0			|if equal, continue bugfix
-|
-| Check for cu and xu register conflict.
-|
-	bfextu	CMDREG2B(%a6){#6:#3},%d1	|get 2nd dest
-	cmpb	%d0,%d1			|cmp 1st dest with 2nd dest
-	beqs	op0_xu			|if equal, continue bugfix
-	bfextu	CMDREG1B(%a6){#3:#3},%d0	|get 1st src
-	cmpb	%d0,%d1			|cmp 1st src with 2nd dest
-	beq	op0_xu
-	bne	fix_done		|if the reg checks fail, exit
-|
-| We have the opclass 0 situation.
-|
-op0:
-	bfextu	CMDREG1B(%a6){#3:#3},%d0	|get source register no
-	movel	#7,%d1
-	subl	%d0,%d1
-	clrl	%d0
-	bsetl	%d1,%d0
-	fmovemx %d0,ETEMP(%a6)		|load source to ETEMP
-
-	moveb	#0x12,%d0
-	bfins	%d0,CMDREG1B(%a6){#0:#6}	|opclass 2, extended
-|
-|	Set ETEMP exponent bit 15 as the opposite of ete14
-|
-	btst	#6,ETEMP_EX(%a6)		|check etemp exponent bit 14
-	beq	setete15
-	bclr	#etemp15_bit,STAG(%a6)
-	bra	finish
-setete15:
-	bset	#etemp15_bit,STAG(%a6)
-	bra	finish
-
-|
-| We have the case in which a conflict exists between the cu src or
-| dest and the dest of the xu.  We must clear the instruction in
-| the cu and restore the state, allowing the instruction in the
-| xu to complete.  Remember, the instruction in the nu
-| was exceptional, and was completed by the appropriate handler.
-| If the result of the xu instruction is not exceptional, we can
-| restore the instruction from the cu to the frame and continue
-| processing the original exception.  If the result is also
-| exceptional, we choose to kill the process.
-|
-|	Items saved from the stack:
-|
-|		$3c stag     - L_SCR1
-|		$40 cmdreg1b - L_SCR2
-|		$44 dtag     - L_SCR3
-|
-| The cu savepc is set to zero, and the frame is restored to the
-| fpu.
-|
-op0_xu:
-	movel	STAG(%a6),L_SCR1(%a6)
-	movel	CMDREG1B(%a6),L_SCR2(%a6)
-	movel	DTAG(%a6),L_SCR3(%a6)
-	andil	#0xe0000000,L_SCR3(%a6)
-	moveb	#0,CU_SAVEPC(%a6)
-	movel	(%a7)+,%d1		|save return address from bsr
-	frestore (%a7)+
-	fsave	-(%a7)
-|
-| Check if the instruction which just completed was exceptional.
-|
-	cmpw	#0x4060,(%a7)
-	beq	op0_xb
-|
-| It is necessary to isolate the result of the instruction in the
-| xu if it is to fp0 - fp3 and write that value to the USER_FPn
-| locations on the stack.  The correct destination register is in
-| cmdreg2b.
-|
-	bfextu	CMDREG2B(%a6){#6:#3},%d0	|get dest register no
-	cmpil	#3,%d0
-	bgts	op0_xi
-	beqs	op0_fp3
-	cmpil	#1,%d0
-	blts	op0_fp0
-	beqs	op0_fp1
-op0_fp2:
-	fmovemx %fp2-%fp2,USER_FP2(%a6)
-	bras	op0_xi
-op0_fp1:
-	fmovemx %fp1-%fp1,USER_FP1(%a6)
-	bras	op0_xi
-op0_fp0:
-	fmovemx %fp0-%fp0,USER_FP0(%a6)
-	bras	op0_xi
-op0_fp3:
-	fmovemx %fp3-%fp3,USER_FP3(%a6)
-|
-| The frame returned is idle.  We must build a busy frame to hold
-| the cu state information and setup etemp.
-|
-op0_xi:
-	movel	#22,%d0		|clear 23 lwords
-	clrl	(%a7)
-op0_loop:
-	clrl	-(%a7)
-	dbf	%d0,op0_loop
-	movel	#0x40600000,-(%a7)
-	movel	L_SCR1(%a6),STAG(%a6)
-	movel	L_SCR2(%a6),CMDREG1B(%a6)
-	movel	L_SCR3(%a6),DTAG(%a6)
-	moveb	#0x6,CU_SAVEPC(%a6)
-	movel	%d1,-(%a7)		|return bsr return address
-	bfextu	CMDREG1B(%a6){#3:#3},%d0	|get source register no
-	movel	#7,%d1
-	subl	%d0,%d1
-	clrl	%d0
-	bsetl	%d1,%d0
-	fmovemx %d0,ETEMP(%a6)		|load source to ETEMP
-
-	moveb	#0x12,%d0
-	bfins	%d0,CMDREG1B(%a6){#0:#6}	|opclass 2, extended
-|
-|	Set ETEMP exponent bit 15 as the opposite of ete14
-|
-	btst	#6,ETEMP_EX(%a6)		|check etemp exponent bit 14
-	beq	op0_sete15
-	bclr	#etemp15_bit,STAG(%a6)
-	bra	finish
-op0_sete15:
-	bset	#etemp15_bit,STAG(%a6)
-	bra	finish
-
-|
-| The frame returned is busy.  It is not possible to reconstruct
-| the code sequence to allow completion.  We will jump to
-| fpsp_fmt_error and allow the kernel to kill the process.
-|
-op0_xb:
-	jmp	fpsp_fmt_error
-
-|
-| Check for opclass 2 and single size.  If not both, exit.
-|
-op2sgl:
-	movew	CMDREG1B(%a6),%d0
-	andiw	#0xFC00,%d0		|strip all but opclass and size
-	cmpiw	#0x4400,%d0		|test for opclass 2 and size=sgl
-	bne	fix_done		|if not, it is not bug 1238
-|
-| Check for cu dest equal to nu dest or equal to xu dest, with
-| a cu and nu conflict taking priority an nu conflict.  If either,
-| go and fix the bug condition.  Otherwise, exit.
-|
-	bfextu	CMDREG1B(%a6){#6:#3},%d0	|get 1st dest
-	bfextu	CMDREG3B(%a6){#6:#3},%d1	|get 3rd dest
-	cmpb	%d0,%d1			|cmp 1st dest with 3rd dest
-	beq	op2_com			|if equal, continue bugfix
-	bfextu	CMDREG2B(%a6){#6:#3},%d1	|get 2nd dest
-	cmpb	%d0,%d1			|cmp 1st dest with 2nd dest
-	bne	fix_done		|if the reg checks fail, exit
-|
-| We have the case in which a conflict exists between the cu src or
-| dest and the dest of the xu.  We must clear the instruction in
-| the cu and restore the state, allowing the instruction in the
-| xu to complete.  Remember, the instruction in the nu
-| was exceptional, and was completed by the appropriate handler.
-| If the result of the xu instruction is not exceptional, we can
-| restore the instruction from the cu to the frame and continue
-| processing the original exception.  If the result is also
-| exceptional, we choose to kill the process.
-|
-|	Items saved from the stack:
-|
-|		$3c stag     - L_SCR1
-|		$40 cmdreg1b - L_SCR2
-|		$44 dtag     - L_SCR3
-|		etemp        - FP_SCR2
-|
-| The cu savepc is set to zero, and the frame is restored to the
-| fpu.
-|
-op2_xu:
-	movel	STAG(%a6),L_SCR1(%a6)
-	movel	CMDREG1B(%a6),L_SCR2(%a6)
-	movel	DTAG(%a6),L_SCR3(%a6)
-	andil	#0xe0000000,L_SCR3(%a6)
-	moveb	#0,CU_SAVEPC(%a6)
-	movel	ETEMP(%a6),FP_SCR2(%a6)
-	movel	ETEMP_HI(%a6),FP_SCR2+4(%a6)
-	movel	ETEMP_LO(%a6),FP_SCR2+8(%a6)
-	movel	(%a7)+,%d1		|save return address from bsr
-	frestore (%a7)+
-	fsave	-(%a7)
-|
-| Check if the instruction which just completed was exceptional.
-|
-	cmpw	#0x4060,(%a7)
-	beq	op2_xb
-|
-| It is necessary to isolate the result of the instruction in the
-| xu if it is to fp0 - fp3 and write that value to the USER_FPn
-| locations on the stack.  The correct destination register is in
-| cmdreg2b.
-|
-	bfextu	CMDREG2B(%a6){#6:#3},%d0	|get dest register no
-	cmpil	#3,%d0
-	bgts	op2_xi
-	beqs	op2_fp3
-	cmpil	#1,%d0
-	blts	op2_fp0
-	beqs	op2_fp1
-op2_fp2:
-	fmovemx %fp2-%fp2,USER_FP2(%a6)
-	bras	op2_xi
-op2_fp1:
-	fmovemx %fp1-%fp1,USER_FP1(%a6)
-	bras	op2_xi
-op2_fp0:
-	fmovemx %fp0-%fp0,USER_FP0(%a6)
-	bras	op2_xi
-op2_fp3:
-	fmovemx %fp3-%fp3,USER_FP3(%a6)
-|
-| The frame returned is idle.  We must build a busy frame to hold
-| the cu state information and fix up etemp.
-|
-op2_xi:
-	movel	#22,%d0		|clear 23 lwords
-	clrl	(%a7)
-op2_loop:
-	clrl	-(%a7)
-	dbf	%d0,op2_loop
-	movel	#0x40600000,-(%a7)
-	movel	L_SCR1(%a6),STAG(%a6)
-	movel	L_SCR2(%a6),CMDREG1B(%a6)
-	movel	L_SCR3(%a6),DTAG(%a6)
-	moveb	#0x6,CU_SAVEPC(%a6)
-	movel	FP_SCR2(%a6),ETEMP(%a6)
-	movel	FP_SCR2+4(%a6),ETEMP_HI(%a6)
-	movel	FP_SCR2+8(%a6),ETEMP_LO(%a6)
-	movel	%d1,-(%a7)
-	bra	op2_com
-
-|
-| We have the opclass 2 single source situation.
-|
-op2_com:
-	moveb	#0x15,%d0
-	bfins	%d0,CMDREG1B(%a6){#0:#6}	|opclass 2, double
-
-	cmpw	#0x407F,ETEMP_EX(%a6)	|single +max
-	bnes	case2
-	movew	#0x43FF,ETEMP_EX(%a6)	|to double +max
-	bra	finish
-case2:
-	cmpw	#0xC07F,ETEMP_EX(%a6)	|single -max
-	bnes	case3
-	movew	#0xC3FF,ETEMP_EX(%a6)	|to double -max
-	bra	finish
-case3:
-	cmpw	#0x3F80,ETEMP_EX(%a6)	|single +min
-	bnes	case4
-	movew	#0x3C00,ETEMP_EX(%a6)	|to double +min
-	bra	finish
-case4:
-	cmpw	#0xBF80,ETEMP_EX(%a6)	|single -min
-	bne	fix_done
-	movew	#0xBC00,ETEMP_EX(%a6)	|to double -min
-	bra	finish
-|
-| The frame returned is busy.  It is not possible to reconstruct
-| the code sequence to allow completion.  fpsp_fmt_error causes
-| an fline illegal instruction to be executed.
-|
-| You should replace the jump to fpsp_fmt_error with a jump
-| to the entry point used to kill a process.
-|
-op2_xb:
-	jmp	fpsp_fmt_error
-
-|
-| Enter here if the case is not of the situations affected by
-| bug #1238, or if the fix is completed, and exit.
-|
-finish:
-fix_done:
-	rts
-
-	|end
diff --git a/arch/m68k/fpsp040/decbin.S b/arch/m68k/fpsp040/decbin.S
deleted file mode 100644
index 16ed796bad87f375b98e21e5804aa375c4648858..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/decbin.S
+++ /dev/null
@@ -1,505 +0,0 @@
-|
-|	decbin.sa 3.3 12/19/90
-|
-|	Description: Converts normalized packed bcd value pointed to by
-|	register A6 to extended-precision value in FP0.
-|
-|	Input: Normalized packed bcd value in ETEMP(a6).
-|
-|	Output:	Exact floating-point representation of the packed bcd value.
-|
-|	Saves and Modifies: D2-D5
-|
-|	Speed: The program decbin takes ??? cycles to execute.
-|
-|	Object Size:
-|
-|	External Reference(s): None.
-|
-|	Algorithm:
-|	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,
-|	and NaN operands are dispatched without entering this routine)
-|	value in 68881/882 format at location ETEMP(A6).
-|
-|	A1.	Convert the bcd exponent to binary by successive adds and muls.
-|	Set the sign according to SE. Subtract 16 to compensate
-|	for the mantissa which is to be interpreted as 17 integer
-|	digits, rather than 1 integer and 16 fraction digits.
-|	Note: this operation can never overflow.
-|
-|	A2. Convert the bcd mantissa to binary by successive
-|	adds and muls in FP0. Set the sign according to SM.
-|	The mantissa digits will be converted with the decimal point
-|	assumed following the least-significant digit.
-|	Note: this operation can never overflow.
-|
-|	A3. Count the number of leading/trailing zeros in the
-|	bcd string.  If SE is positive, count the leading zeros;
-|	if negative, count the trailing zeros.  Set the adjusted
-|	exponent equal to the exponent from A1 and the zero count
-|	added if SM = 1 and subtracted if SM = 0.  Scale the
-|	mantissa the equivalent of forcing in the bcd value:
-|
-|	SM = 0	a non-zero digit in the integer position
-|	SM = 1	a non-zero digit in Mant0, lsd of the fraction
-|
-|	this will insure that any value, regardless of its
-|	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted
-|	consistently.
-|
-|	A4. Calculate the factor 10^exp in FP1 using a table of
-|	10^(2^n) values.  To reduce the error in forming factors
-|	greater than 10^27, a directed rounding scheme is used with
-|	tables rounded to RN, RM, and RP, according to the table
-|	in the comments of the pwrten section.
-|
-|	A5. Form the final binary number by scaling the mantissa by
-|	the exponent factor.  This is done by multiplying the
-|	mantissa in FP0 by the factor in FP1 if the adjusted
-|	exponent sign is positive, and dividing FP0 by FP1 if
-|	it is negative.
-|
-|	Clean up and return.  Check if the final mul or div resulted
-|	in an inex2 exception.  If so, set inex1 in the fpsr and
-|	check if the inex1 exception is enabled.  If so, set d7 upper
-|	word to $0100.  This will signal unimp.sa that an enabled inex1
-|	exception occurred.  Unimp will fix the stack.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|DECBIN    idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-|
-|	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
-|	to nearest, minus, and plus, respectively.  The tables include
-|	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
-|	is required until the power is greater than 27, however, all
-|	tables include the first 5 for ease of indexing.
-|
-	|xref	PTENRN
-	|xref	PTENRM
-	|xref	PTENRP
-
-RTABLE:	.byte	0,0,0,0
-	.byte	2,3,2,3
-	.byte	2,3,3,2
-	.byte	3,2,2,3
-
-	.global	decbin
-	.global	calc_e
-	.global	pwrten
-	.global	calc_m
-	.global	norm
-	.global	ap_st_z
-	.global	ap_st_n
-|
-	.set	FNIBS,7
-	.set	FSTRT,0
-|
-	.set	ESTRT,4
-	.set	EDIGITS,2	|
-|
-| Constants in single precision
-FZERO:	.long	0x00000000
-FONE:	.long	0x3F800000
-FTEN:	.long	0x41200000
-
-	.set	TEN,10
-
-|
-decbin:
-	| fmovel	#0,FPCR		;clr real fpcr
-	moveml	%d2-%d5,-(%a7)
-|
-| Calculate exponent:
-|  1. Copy bcd value in memory for use as a working copy.
-|  2. Calculate absolute value of exponent in d1 by mul and add.
-|  3. Correct for exponent sign.
-|  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
-|     (i.e., all digits assumed left of the decimal point.)
-|
-| Register usage:
-|
-|  calc_e:
-|	(*)  d0: temp digit storage
-|	(*)  d1: accumulator for binary exponent
-|	(*)  d2: digit count
-|	(*)  d3: offset pointer
-|	( )  d4: first word of bcd
-|	( )  a0: pointer to working bcd value
-|	( )  a6: pointer to original bcd value
-|	(*)  FP_SCR1: working copy of original bcd value
-|	(*)  L_SCR1: copy of original exponent word
-|
-calc_e:
-	movel	#EDIGITS,%d2	|# of nibbles (digits) in fraction part
-	moveql	#ESTRT,%d3	|counter to pick up digits
-	leal	FP_SCR1(%a6),%a0	|load tmp bcd storage address
-	movel	ETEMP(%a6),(%a0)	|save input bcd value
-	movel	ETEMP_HI(%a6),4(%a0) |save words 2 and 3
-	movel	ETEMP_LO(%a6),8(%a0) |and work with these
-	movel	(%a0),%d4	|get first word of bcd
-	clrl	%d1		|zero d1 for accumulator
-e_gd:
-	mulul	#TEN,%d1	|mul partial product by one digit place
-	bfextu	%d4{%d3:#4},%d0	|get the digit and zero extend into d0
-	addl	%d0,%d1		|d1 = d1 + d0
-	addqb	#4,%d3		|advance d3 to the next digit
-	dbf	%d2,e_gd	|if we have used all 3 digits, exit loop
-	btst	#30,%d4		|get SE
-	beqs	e_pos		|don't negate if pos
-	negl	%d1		|negate before subtracting
-e_pos:
-	subl	#16,%d1		|sub to compensate for shift of mant
-	bges	e_save		|if still pos, do not neg
-	negl	%d1		|now negative, make pos and set SE
-	orl	#0x40000000,%d4	|set SE in d4,
-	orl	#0x40000000,(%a0)	|and in working bcd
-e_save:
-	movel	%d1,L_SCR1(%a6)	|save exp in memory
-|
-|
-| Calculate mantissa:
-|  1. Calculate absolute value of mantissa in fp0 by mul and add.
-|  2. Correct for mantissa sign.
-|     (i.e., all digits assumed left of the decimal point.)
-|
-| Register usage:
-|
-|  calc_m:
-|	(*)  d0: temp digit storage
-|	(*)  d1: lword counter
-|	(*)  d2: digit count
-|	(*)  d3: offset pointer
-|	( )  d4: words 2 and 3 of bcd
-|	( )  a0: pointer to working bcd value
-|	( )  a6: pointer to original bcd value
-|	(*) fp0: mantissa accumulator
-|	( )  FP_SCR1: working copy of original bcd value
-|	( )  L_SCR1: copy of original exponent word
-|
-calc_m:
-	moveql	#1,%d1		|word counter, init to 1
-	fmoves	FZERO,%fp0	|accumulator
-|
-|
-|  Since the packed number has a long word between the first & second parts,
-|  get the integer digit then skip down & get the rest of the
-|  mantissa.  We will unroll the loop once.
-|
-	bfextu	(%a0){#28:#4},%d0	|integer part is ls digit in long word
-	faddb	%d0,%fp0		|add digit to sum in fp0
-|
-|
-|  Get the rest of the mantissa.
-|
-loadlw:
-	movel	(%a0,%d1.L*4),%d4	|load mantissa longword into d4
-	moveql	#FSTRT,%d3	|counter to pick up digits
-	moveql	#FNIBS,%d2	|reset number of digits per a0 ptr
-md2b:
-	fmuls	FTEN,%fp0	|fp0 = fp0 * 10
-	bfextu	%d4{%d3:#4},%d0	|get the digit and zero extend
-	faddb	%d0,%fp0	|fp0 = fp0 + digit
-|
-|
-|  If all the digits (8) in that long word have been converted (d2=0),
-|  then inc d1 (=2) to point to the next long word and reset d3 to 0
-|  to initialize the digit offset, and set d2 to 7 for the digit count;
-|  else continue with this long word.
-|
-	addqb	#4,%d3		|advance d3 to the next digit
-	dbf	%d2,md2b		|check for last digit in this lw
-nextlw:
-	addql	#1,%d1		|inc lw pointer in mantissa
-	cmpl	#2,%d1		|test for last lw
-	ble	loadlw		|if not, get last one
-
-|
-|  Check the sign of the mant and make the value in fp0 the same sign.
-|
-m_sign:
-	btst	#31,(%a0)	|test sign of the mantissa
-	beq	ap_st_z		|if clear, go to append/strip zeros
-	fnegx	%fp0		|if set, negate fp0
-
-|
-| Append/strip zeros:
-|
-|  For adjusted exponents which have an absolute value greater than 27*,
-|  this routine calculates the amount needed to normalize the mantissa
-|  for the adjusted exponent.  That number is subtracted from the exp
-|  if the exp was positive, and added if it was negative.  The purpose
-|  of this is to reduce the value of the exponent and the possibility
-|  of error in calculation of pwrten.
-|
-|  1. Branch on the sign of the adjusted exponent.
-|  2p.(positive exp)
-|   2. Check M16 and the digits in lwords 2 and 3 in descending order.
-|   3. Add one for each zero encountered until a non-zero digit.
-|   4. Subtract the count from the exp.
-|   5. Check if the exp has crossed zero in #3 above; make the exp abs
-|	   and set SE.
-|	6. Multiply the mantissa by 10**count.
-|  2n.(negative exp)
-|   2. Check the digits in lwords 3 and 2 in descending order.
-|   3. Add one for each zero encountered until a non-zero digit.
-|   4. Add the count to the exp.
-|   5. Check if the exp has crossed zero in #3 above; clear SE.
-|   6. Divide the mantissa by 10**count.
-|
-|  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
-|   any adjustment due to append/strip zeros will drive the resultant
-|   exponent towards zero.  Since all pwrten constants with a power
-|   of 27 or less are exact, there is no need to use this routine to
-|   attempt to lessen the resultant exponent.
-|
-| Register usage:
-|
-|  ap_st_z:
-|	(*)  d0: temp digit storage
-|	(*)  d1: zero count
-|	(*)  d2: digit count
-|	(*)  d3: offset pointer
-|	( )  d4: first word of bcd
-|	(*)  d5: lword counter
-|	( )  a0: pointer to working bcd value
-|	( )  FP_SCR1: working copy of original bcd value
-|	( )  L_SCR1: copy of original exponent word
-|
-|
-| First check the absolute value of the exponent to see if this
-| routine is necessary.  If so, then check the sign of the exponent
-| and do append (+) or strip (-) zeros accordingly.
-| This section handles a positive adjusted exponent.
-|
-ap_st_z:
-	movel	L_SCR1(%a6),%d1	|load expA for range test
-	cmpl	#27,%d1		|test is with 27
-	ble	pwrten		|if abs(expA) <28, skip ap/st zeros
-	btst	#30,(%a0)	|check sign of exp
-	bne	ap_st_n		|if neg, go to neg side
-	clrl	%d1		|zero count reg
-	movel	(%a0),%d4		|load lword 1 to d4
-	bfextu	%d4{#28:#4},%d0	|get M16 in d0
-	bnes	ap_p_fx		|if M16 is non-zero, go fix exp
-	addql	#1,%d1		|inc zero count
-	moveql	#1,%d5		|init lword counter
-	movel	(%a0,%d5.L*4),%d4	|get lword 2 to d4
-	bnes	ap_p_cl		|if lw 2 is zero, skip it
-	addql	#8,%d1		|and inc count by 8
-	addql	#1,%d5		|inc lword counter
-	movel	(%a0,%d5.L*4),%d4	|get lword 3 to d4
-ap_p_cl:
-	clrl	%d3		|init offset reg
-	moveql	#7,%d2		|init digit counter
-ap_p_gd:
-	bfextu	%d4{%d3:#4},%d0	|get digit
-	bnes	ap_p_fx		|if non-zero, go to fix exp
-	addql	#4,%d3		|point to next digit
-	addql	#1,%d1		|inc digit counter
-	dbf	%d2,ap_p_gd	|get next digit
-ap_p_fx:
-	movel	%d1,%d0		|copy counter to d2
-	movel	L_SCR1(%a6),%d1	|get adjusted exp from memory
-	subl	%d0,%d1		|subtract count from exp
-	bges	ap_p_fm		|if still pos, go to pwrten
-	negl	%d1		|now its neg; get abs
-	movel	(%a0),%d4		|load lword 1 to d4
-	orl	#0x40000000,%d4	| and set SE in d4
-	orl	#0x40000000,(%a0)	| and in memory
-|
-| Calculate the mantissa multiplier to compensate for the striping of
-| zeros from the mantissa.
-|
-ap_p_fm:
-	movel	#PTENRN,%a1	|get address of power-of-ten table
-	clrl	%d3		|init table index
-	fmoves	FONE,%fp1	|init fp1 to 1
-	moveql	#3,%d2		|init d2 to count bits in counter
-ap_p_el:
-	asrl	#1,%d0		|shift lsb into carry
-	bccs	ap_p_en		|if 1, mul fp1 by pwrten factor
-	fmulx	(%a1,%d3),%fp1	|mul by 10**(d3_bit_no)
-ap_p_en:
-	addl	#12,%d3		|inc d3 to next rtable entry
-	tstl	%d0		|check if d0 is zero
-	bnes	ap_p_el		|if not, get next bit
-	fmulx	%fp1,%fp0		|mul mantissa by 10**(no_bits_shifted)
-	bra	pwrten		|go calc pwrten
-|
-| This section handles a negative adjusted exponent.
-|
-ap_st_n:
-	clrl	%d1		|clr counter
-	moveql	#2,%d5		|set up d5 to point to lword 3
-	movel	(%a0,%d5.L*4),%d4	|get lword 3
-	bnes	ap_n_cl		|if not zero, check digits
-	subl	#1,%d5		|dec d5 to point to lword 2
-	addql	#8,%d1		|inc counter by 8
-	movel	(%a0,%d5.L*4),%d4	|get lword 2
-ap_n_cl:
-	movel	#28,%d3		|point to last digit
-	moveql	#7,%d2		|init digit counter
-ap_n_gd:
-	bfextu	%d4{%d3:#4},%d0	|get digit
-	bnes	ap_n_fx		|if non-zero, go to exp fix
-	subql	#4,%d3		|point to previous digit
-	addql	#1,%d1		|inc digit counter
-	dbf	%d2,ap_n_gd	|get next digit
-ap_n_fx:
-	movel	%d1,%d0		|copy counter to d0
-	movel	L_SCR1(%a6),%d1	|get adjusted exp from memory
-	subl	%d0,%d1		|subtract count from exp
-	bgts	ap_n_fm		|if still pos, go fix mantissa
-	negl	%d1		|take abs of exp and clr SE
-	movel	(%a0),%d4		|load lword 1 to d4
-	andl	#0xbfffffff,%d4	| and clr SE in d4
-	andl	#0xbfffffff,(%a0)	| and in memory
-|
-| Calculate the mantissa multiplier to compensate for the appending of
-| zeros to the mantissa.
-|
-ap_n_fm:
-	movel	#PTENRN,%a1	|get address of power-of-ten table
-	clrl	%d3		|init table index
-	fmoves	FONE,%fp1	|init fp1 to 1
-	moveql	#3,%d2		|init d2 to count bits in counter
-ap_n_el:
-	asrl	#1,%d0		|shift lsb into carry
-	bccs	ap_n_en		|if 1, mul fp1 by pwrten factor
-	fmulx	(%a1,%d3),%fp1	|mul by 10**(d3_bit_no)
-ap_n_en:
-	addl	#12,%d3		|inc d3 to next rtable entry
-	tstl	%d0		|check if d0 is zero
-	bnes	ap_n_el		|if not, get next bit
-	fdivx	%fp1,%fp0		|div mantissa by 10**(no_bits_shifted)
-|
-|
-| Calculate power-of-ten factor from adjusted and shifted exponent.
-|
-| Register usage:
-|
-|  pwrten:
-|	(*)  d0: temp
-|	( )  d1: exponent
-|	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
-|	(*)  d3: FPCR work copy
-|	( )  d4: first word of bcd
-|	(*)  a1: RTABLE pointer
-|  calc_p:
-|	(*)  d0: temp
-|	( )  d1: exponent
-|	(*)  d3: PWRTxx table index
-|	( )  a0: pointer to working copy of bcd
-|	(*)  a1: PWRTxx pointer
-|	(*) fp1: power-of-ten accumulator
-|
-| Pwrten calculates the exponent factor in the selected rounding mode
-| according to the following table:
-|
-|	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
-|
-|	ANY	  ANY	RN	RN
-|
-|	 +	   +	RP	RP
-|	 -	   +	RP	RM
-|	 +	   -	RP	RM
-|	 -	   -	RP	RP
-|
-|	 +	   +	RM	RM
-|	 -	   +	RM	RP
-|	 +	   -	RM	RP
-|	 -	   -	RM	RM
-|
-|	 +	   +	RZ	RM
-|	 -	   +	RZ	RM
-|	 +	   -	RZ	RP
-|	 -	   -	RZ	RP
-|
-|
-pwrten:
-	movel	USER_FPCR(%a6),%d3 |get user's FPCR
-	bfextu	%d3{#26:#2},%d2	|isolate rounding mode bits
-	movel	(%a0),%d4		|reload 1st bcd word to d4
-	asll	#2,%d2		|format d2 to be
-	bfextu	%d4{#0:#2},%d0	| {FPCR[6],FPCR[5],SM,SE}
-	addl	%d0,%d2		|in d2 as index into RTABLE
-	leal	RTABLE,%a1	|load rtable base
-	moveb	(%a1,%d2),%d0	|load new rounding bits from table
-	clrl	%d3			|clear d3 to force no exc and extended
-	bfins	%d0,%d3{#26:#2}	|stuff new rounding bits in FPCR
-	fmovel	%d3,%FPCR		|write new FPCR
-	asrl	#1,%d0		|write correct PTENxx table
-	bccs	not_rp		|to a1
-	leal	PTENRP,%a1	|it is RP
-	bras	calc_p		|go to init section
-not_rp:
-	asrl	#1,%d0		|keep checking
-	bccs	not_rm
-	leal	PTENRM,%a1	|it is RM
-	bras	calc_p		|go to init section
-not_rm:
-	leal	PTENRN,%a1	|it is RN
-calc_p:
-	movel	%d1,%d0		|copy exp to d0;use d0
-	bpls	no_neg		|if exp is negative,
-	negl	%d0		|invert it
-	orl	#0x40000000,(%a0)	|and set SE bit
-no_neg:
-	clrl	%d3		|table index
-	fmoves	FONE,%fp1	|init fp1 to 1
-e_loop:
-	asrl	#1,%d0		|shift next bit into carry
-	bccs	e_next		|if zero, skip the mul
-	fmulx	(%a1,%d3),%fp1	|mul by 10**(d3_bit_no)
-e_next:
-	addl	#12,%d3		|inc d3 to next rtable entry
-	tstl	%d0		|check if d0 is zero
-	bnes	e_loop		|not zero, continue shifting
-|
-|
-|  Check the sign of the adjusted exp and make the value in fp0 the
-|  same sign. If the exp was pos then multiply fp1*fp0;
-|  else divide fp0/fp1.
-|
-| Register Usage:
-|  norm:
-|	( )  a0: pointer to working bcd value
-|	(*) fp0: mantissa accumulator
-|	( ) fp1: scaling factor - 10**(abs(exp))
-|
-norm:
-	btst	#30,(%a0)	|test the sign of the exponent
-	beqs	mul		|if clear, go to multiply
-div:
-	fdivx	%fp1,%fp0		|exp is negative, so divide mant by exp
-	bras	end_dec
-mul:
-	fmulx	%fp1,%fp0		|exp is positive, so multiply by exp
-|
-|
-| Clean up and return with result in fp0.
-|
-| If the final mul/div in decbin incurred an inex exception,
-| it will be inex2, but will be reported as inex1 by get_op.
-|
-end_dec:
-	fmovel	%FPSR,%d0		|get status register
-	bclrl	#inex2_bit+8,%d0	|test for inex2 and clear it
-	fmovel	%d0,%FPSR		|return status reg w/o inex2
-	beqs	no_exc		|skip this if no exc
-	orl	#inx1a_mask,USER_FPSR(%a6) |set inex1/ainex
-no_exc:
-	moveml	(%a7)+,%d2-%d5
-	rts
-	|end
diff --git a/arch/m68k/fpsp040/do_func.S b/arch/m68k/fpsp040/do_func.S
deleted file mode 100644
index 3eff99a804138c39232ed25c05dc6ec7cbeb5b56..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/do_func.S
+++ /dev/null
@@ -1,558 +0,0 @@
-|
-|	do_func.sa 3.4 2/18/91
-|
-| Do_func performs the unimplemented operation.  The operation
-| to be performed is determined from the lower 7 bits of the
-| extension word (except in the case of fmovecr and fsincos).
-| The opcode and tag bits form an index into a jump table in
-| tbldo.sa.  Cases of zero, infinity and NaN are handled in
-| do_func by forcing the default result.  Normalized and
-| denormalized (there are no unnormalized numbers at this
-| point) are passed onto the emulation code.
-|
-| CMDREG1B and STAG are extracted from the fsave frame
-| and combined to form the table index.  The function called
-| will start with a0 pointing to the ETEMP operand.  Dyadic
-| functions can find FPTEMP at -12(a0).
-|
-| Called functions return their result in fp0.  Sincos returns
-| sin(x) in fp0 and cos(x) in fp1.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-DO_FUNC:	|idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-	|xref	t_dz2
-	|xref	t_operr
-	|xref	t_inx2
-	|xref	t_resdnrm
-	|xref	dst_nan
-	|xref	src_nan
-	|xref	nrm_set
-	|xref	sto_cos
-
-	|xref	tblpre
-	|xref	slognp1,slogn,slog10,slog2
-	|xref	slognd,slog10d,slog2d
-	|xref	smod,srem
-	|xref	sscale
-	|xref	smovcr
-
-PONE:	.long	0x3fff0000,0x80000000,0x00000000	|+1
-MONE:	.long	0xbfff0000,0x80000000,0x00000000	|-1
-PZERO:	.long	0x00000000,0x00000000,0x00000000	|+0
-MZERO:	.long	0x80000000,0x00000000,0x00000000	|-0
-PINF:	.long	0x7fff0000,0x00000000,0x00000000	|+inf
-MINF:	.long	0xffff0000,0x00000000,0x00000000	|-inf
-QNAN:	.long	0x7fff0000,0xffffffff,0xffffffff	|non-signaling nan
-PPIBY2:  .long	0x3FFF0000,0xC90FDAA2,0x2168C235	|+PI/2
-MPIBY2:  .long	0xbFFF0000,0xC90FDAA2,0x2168C235	|-PI/2
-
-	.global	do_func
-do_func:
-	clrb	CU_ONLY(%a6)
-|
-| Check for fmovecr.  It does not follow the format of fp gen
-| unimplemented instructions.  The test is on the upper 6 bits;
-| if they are $17, the inst is fmovecr.  Call entry smovcr
-| directly.
-|
-	bfextu	CMDREG1B(%a6){#0:#6},%d0 |get opclass and src fields
-	cmpil	#0x17,%d0		|if op class and size fields are $17,
-|				;it is FMOVECR; if not, continue
-	bnes	not_fmovecr
-	jmp	smovcr		|fmovecr; jmp directly to emulation
-
-not_fmovecr:
-	movew	CMDREG1B(%a6),%d0
-	andl	#0x7F,%d0
-	cmpil	#0x38,%d0		|if the extension is >= $38,
-	bge	serror		|it is illegal
-	bfextu	STAG(%a6){#0:#3},%d1
-	lsll	#3,%d0		|make room for STAG
-	addl	%d1,%d0		|combine for final index into table
-	leal	tblpre,%a1	|start of monster jump table
-	movel	(%a1,%d0.w*4),%a1	|real target address
-	leal	ETEMP(%a6),%a0	|a0 is pointer to src op
-	movel	USER_FPCR(%a6),%d1
-	andl	#0xFF,%d1		| discard all but rounding mode/prec
-	fmovel	#0,%fpcr
-	jmp	(%a1)
-|
-|	ERROR
-|
-	.global	serror
-serror:
-	st	STORE_FLG(%a6)
-	rts
-|
-| These routines load forced values into fp0.  They are called
-| by index into tbldo.
-|
-| Load a signed zero to fp0 and set inex2/ainex
-|
-	.global	snzrinx
-snzrinx:
-	btstb	#sign_bit,LOCAL_EX(%a0)	|get sign of source operand
-	bnes	ld_mzinx	|if negative, branch
-	bsr	ld_pzero	|bsr so we can return and set inx
-	bra	t_inx2		|now, set the inx for the next inst
-ld_mzinx:
-	bsr	ld_mzero	|if neg, load neg zero, return here
-	bra	t_inx2		|now, set the inx for the next inst
-|
-| Load a signed zero to fp0; do not set inex2/ainex
-|
-	.global	szero
-szero:
-	btstb	#sign_bit,LOCAL_EX(%a0) |get sign of source operand
-	bne	ld_mzero	|if neg, load neg zero
-	bra	ld_pzero	|load positive zero
-|
-| Load a signed infinity to fp0; do not set inex2/ainex
-|
-	.global	sinf
-sinf:
-	btstb	#sign_bit,LOCAL_EX(%a0)	|get sign of source operand
-	bne	ld_minf			|if negative branch
-	bra	ld_pinf
-|
-| Load a signed one to fp0; do not set inex2/ainex
-|
-	.global	sone
-sone:
-	btstb	#sign_bit,LOCAL_EX(%a0)	|check sign of source
-	bne	ld_mone
-	bra	ld_pone
-|
-| Load a signed pi/2 to fp0; do not set inex2/ainex
-|
-	.global	spi_2
-spi_2:
-	btstb	#sign_bit,LOCAL_EX(%a0)	|check sign of source
-	bne	ld_mpi2
-	bra	ld_ppi2
-|
-| Load either a +0 or +inf for plus/minus operand
-|
-	.global	szr_inf
-szr_inf:
-	btstb	#sign_bit,LOCAL_EX(%a0)	|check sign of source
-	bne	ld_pzero
-	bra	ld_pinf
-|
-| Result is either an operr or +inf for plus/minus operand
-| [Used by slogn, slognp1, slog10, and slog2]
-|
-	.global	sopr_inf
-sopr_inf:
-	btstb	#sign_bit,LOCAL_EX(%a0)	|check sign of source
-	bne	t_operr
-	bra	ld_pinf
-|
-|	FLOGNP1
-|
-	.global	sslognp1
-sslognp1:
-	fmovemx (%a0),%fp0-%fp0
-	fcmpb	#-1,%fp0
-	fbgt	slognp1
-	fbeq	t_dz2		|if = -1, divide by zero exception
-	fmovel	#0,%FPSR		|clr N flag
-	bra	t_operr		|take care of operands < -1
-|
-|	FETOXM1
-|
-	.global	setoxm1i
-setoxm1i:
-	btstb	#sign_bit,LOCAL_EX(%a0)	|check sign of source
-	bne	ld_mone
-	bra	ld_pinf
-|
-|	FLOGN
-|
-| Test for 1.0 as an input argument, returning +zero.  Also check
-| the sign and return operr if negative.
-|
-	.global	sslogn
-sslogn:
-	btstb	#sign_bit,LOCAL_EX(%a0)
-	bne	t_operr		|take care of operands < 0
-	cmpiw	#0x3fff,LOCAL_EX(%a0) |test for 1.0 input
-	bne	slogn
-	cmpil	#0x80000000,LOCAL_HI(%a0)
-	bne	slogn
-	tstl	LOCAL_LO(%a0)
-	bne	slogn
-	fmovex	PZERO,%fp0
-	rts
-
-	.global	sslognd
-sslognd:
-	btstb	#sign_bit,LOCAL_EX(%a0)
-	beq	slognd
-	bra	t_operr		|take care of operands < 0
-
-|
-|	FLOG10
-|
-	.global	sslog10
-sslog10:
-	btstb	#sign_bit,LOCAL_EX(%a0)
-	bne	t_operr		|take care of operands < 0
-	cmpiw	#0x3fff,LOCAL_EX(%a0) |test for 1.0 input
-	bne	slog10
-	cmpil	#0x80000000,LOCAL_HI(%a0)
-	bne	slog10
-	tstl	LOCAL_LO(%a0)
-	bne	slog10
-	fmovex	PZERO,%fp0
-	rts
-
-	.global	sslog10d
-sslog10d:
-	btstb	#sign_bit,LOCAL_EX(%a0)
-	beq	slog10d
-	bra	t_operr		|take care of operands < 0
-
-|
-|	FLOG2
-|
-	.global	sslog2
-sslog2:
-	btstb	#sign_bit,LOCAL_EX(%a0)
-	bne	t_operr		|take care of operands < 0
-	cmpiw	#0x3fff,LOCAL_EX(%a0) |test for 1.0 input
-	bne	slog2
-	cmpil	#0x80000000,LOCAL_HI(%a0)
-	bne	slog2
-	tstl	LOCAL_LO(%a0)
-	bne	slog2
-	fmovex	PZERO,%fp0
-	rts
-
-	.global	sslog2d
-sslog2d:
-	btstb	#sign_bit,LOCAL_EX(%a0)
-	beq	slog2d
-	bra	t_operr		|take care of operands < 0
-
-|
-|	FMOD
-|
-pmodt:
-|				;$21 fmod
-|				;dtag,stag
-	.long	smod		|  00,00  norm,norm = normal
-	.long	smod_oper	|  00,01  norm,zero = nan with operr
-	.long	smod_fpn	|  00,10  norm,inf  = fpn
-	.long	smod_snan	|  00,11  norm,nan  = nan
-	.long	smod_zro	|  01,00  zero,norm = +-zero
-	.long	smod_oper	|  01,01  zero,zero = nan with operr
-	.long	smod_zro	|  01,10  zero,inf  = +-zero
-	.long	smod_snan	|  01,11  zero,nan  = nan
-	.long	smod_oper	|  10,00  inf,norm  = nan with operr
-	.long	smod_oper	|  10,01  inf,zero  = nan with operr
-	.long	smod_oper	|  10,10  inf,inf   = nan with operr
-	.long	smod_snan	|  10,11  inf,nan   = nan
-	.long	smod_dnan	|  11,00  nan,norm  = nan
-	.long	smod_dnan	|  11,01  nan,zero  = nan
-	.long	smod_dnan	|  11,10  nan,inf   = nan
-	.long	smod_dnan	|  11,11  nan,nan   = nan
-
-	.global	pmod
-pmod:
-	clrb	FPSR_QBYTE(%a6) | clear quotient field
-	bfextu	STAG(%a6){#0:#3},%d0 |stag = d0
-	bfextu	DTAG(%a6){#0:#3},%d1 |dtag = d1
-
-|
-| Alias extended denorms to norms for the jump table.
-|
-	bclrl	#2,%d0
-	bclrl	#2,%d1
-
-	lslb	#2,%d1
-	orb	%d0,%d1		|d1{3:2} = dtag, d1{1:0} = stag
-|				;Tag values:
-|				;00 = norm or denorm
-|				;01 = zero
-|				;10 = inf
-|				;11 = nan
-	lea	pmodt,%a1
-	movel	(%a1,%d1.w*4),%a1
-	jmp	(%a1)
-
-smod_snan:
-	bra	src_nan
-smod_dnan:
-	bra	dst_nan
-smod_oper:
-	bra	t_operr
-smod_zro:
-	moveb	ETEMP(%a6),%d1	|get sign of src op
-	moveb	FPTEMP(%a6),%d0	|get sign of dst op
-	eorb	%d0,%d1		|get exor of sign bits
-	btstl	#7,%d1		|test for sign
-	beqs	smod_zsn	|if clr, do not set sign big
-	bsetb	#q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit
-smod_zsn:
-	btstl	#7,%d0		|test if + or -
-	beq	ld_pzero	|if pos then load +0
-	bra	ld_mzero	|else neg load -0
-
-smod_fpn:
-	moveb	ETEMP(%a6),%d1	|get sign of src op
-	moveb	FPTEMP(%a6),%d0	|get sign of dst op
-	eorb	%d0,%d1		|get exor of sign bits
-	btstl	#7,%d1		|test for sign
-	beqs	smod_fsn	|if clr, do not set sign big
-	bsetb	#q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit
-smod_fsn:
-	tstb	DTAG(%a6)	|filter out denormal destination case
-	bpls	smod_nrm	|
-	leal	FPTEMP(%a6),%a0	|a0<- addr(FPTEMP)
-	bra	t_resdnrm	|force UNFL(but exact) result
-smod_nrm:
-	fmovel USER_FPCR(%a6),%fpcr |use user's rmode and precision
-	fmovex FPTEMP(%a6),%fp0	|return dest to fp0
-	rts
-
-|
-|	FREM
-|
-premt:
-|				;$25 frem
-|				;dtag,stag
-	.long	srem		|  00,00  norm,norm = normal
-	.long	srem_oper	|  00,01  norm,zero = nan with operr
-	.long	srem_fpn	|  00,10  norm,inf  = fpn
-	.long	srem_snan	|  00,11  norm,nan  = nan
-	.long	srem_zro	|  01,00  zero,norm = +-zero
-	.long	srem_oper	|  01,01  zero,zero = nan with operr
-	.long	srem_zro	|  01,10  zero,inf  = +-zero
-	.long	srem_snan	|  01,11  zero,nan  = nan
-	.long	srem_oper	|  10,00  inf,norm  = nan with operr
-	.long	srem_oper	|  10,01  inf,zero  = nan with operr
-	.long	srem_oper	|  10,10  inf,inf   = nan with operr
-	.long	srem_snan	|  10,11  inf,nan   = nan
-	.long	srem_dnan	|  11,00  nan,norm  = nan
-	.long	srem_dnan	|  11,01  nan,zero  = nan
-	.long	srem_dnan	|  11,10  nan,inf   = nan
-	.long	srem_dnan	|  11,11  nan,nan   = nan
-
-	.global	prem
-prem:
-	clrb	FPSR_QBYTE(%a6)   |clear quotient field
-	bfextu	STAG(%a6){#0:#3},%d0 |stag = d0
-	bfextu	DTAG(%a6){#0:#3},%d1 |dtag = d1
-|
-| Alias extended denorms to norms for the jump table.
-|
-	bclr	#2,%d0
-	bclr	#2,%d1
-
-	lslb	#2,%d1
-	orb	%d0,%d1		|d1{3:2} = dtag, d1{1:0} = stag
-|				;Tag values:
-|				;00 = norm or denorm
-|				;01 = zero
-|				;10 = inf
-|				;11 = nan
-	lea	premt,%a1
-	movel	(%a1,%d1.w*4),%a1
-	jmp	(%a1)
-
-srem_snan:
-	bra	src_nan
-srem_dnan:
-	bra	dst_nan
-srem_oper:
-	bra	t_operr
-srem_zro:
-	moveb	ETEMP(%a6),%d1	|get sign of src op
-	moveb	FPTEMP(%a6),%d0	|get sign of dst op
-	eorb	%d0,%d1		|get exor of sign bits
-	btstl	#7,%d1		|test for sign
-	beqs	srem_zsn	|if clr, do not set sign big
-	bsetb	#q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit
-srem_zsn:
-	btstl	#7,%d0		|test if + or -
-	beq	ld_pzero	|if pos then load +0
-	bra	ld_mzero	|else neg load -0
-
-srem_fpn:
-	moveb	ETEMP(%a6),%d1	|get sign of src op
-	moveb	FPTEMP(%a6),%d0	|get sign of dst op
-	eorb	%d0,%d1		|get exor of sign bits
-	btstl	#7,%d1		|test for sign
-	beqs	srem_fsn	|if clr, do not set sign big
-	bsetb	#q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit
-srem_fsn:
-	tstb	DTAG(%a6)	|filter out denormal destination case
-	bpls	srem_nrm	|
-	leal	FPTEMP(%a6),%a0	|a0<- addr(FPTEMP)
-	bra	t_resdnrm	|force UNFL(but exact) result
-srem_nrm:
-	fmovel USER_FPCR(%a6),%fpcr |use user's rmode and precision
-	fmovex FPTEMP(%a6),%fp0	|return dest to fp0
-	rts
-|
-|	FSCALE
-|
-pscalet:
-|				;$26 fscale
-|				;dtag,stag
-	.long	sscale		|  00,00  norm,norm = result
-	.long	sscale		|  00,01  norm,zero = fpn
-	.long	scl_opr		|  00,10  norm,inf  = nan with operr
-	.long	scl_snan	|  00,11  norm,nan  = nan
-	.long	scl_zro		|  01,00  zero,norm = +-zero
-	.long	scl_zro		|  01,01  zero,zero = +-zero
-	.long	scl_opr		|  01,10  zero,inf  = nan with operr
-	.long	scl_snan	|  01,11  zero,nan  = nan
-	.long	scl_inf		|  10,00  inf,norm  = +-inf
-	.long	scl_inf		|  10,01  inf,zero  = +-inf
-	.long	scl_opr		|  10,10  inf,inf   = nan with operr
-	.long	scl_snan	|  10,11  inf,nan   = nan
-	.long	scl_dnan	|  11,00  nan,norm  = nan
-	.long	scl_dnan	|  11,01  nan,zero  = nan
-	.long	scl_dnan	|  11,10  nan,inf   = nan
-	.long	scl_dnan	|  11,11  nan,nan   = nan
-
-	.global	pscale
-pscale:
-	bfextu	STAG(%a6){#0:#3},%d0 |stag in d0
-	bfextu	DTAG(%a6){#0:#3},%d1 |dtag in d1
-	bclrl	#2,%d0		|alias  denorm into norm
-	bclrl	#2,%d1		|alias  denorm into norm
-	lslb	#2,%d1
-	orb	%d0,%d1		|d1{4:2} = dtag, d1{1:0} = stag
-|				;dtag values     stag values:
-|				;000 = norm      00 = norm
-|				;001 = zero	 01 = zero
-|				;010 = inf	 10 = inf
-|				;011 = nan	 11 = nan
-|				;100 = dnrm
-|
-|
-	leal	pscalet,%a1	|load start of jump table
-	movel	(%a1,%d1.w*4),%a1	|load a1 with label depending on tag
-	jmp	(%a1)		|go to the routine
-
-scl_opr:
-	bra	t_operr
-
-scl_dnan:
-	bra	dst_nan
-
-scl_zro:
-	btstb	#sign_bit,FPTEMP_EX(%a6)	|test if + or -
-	beq	ld_pzero		|if pos then load +0
-	bra	ld_mzero		|if neg then load -0
-scl_inf:
-	btstb	#sign_bit,FPTEMP_EX(%a6)	|test if + or -
-	beq	ld_pinf			|if pos then load +inf
-	bra	ld_minf			|else neg load -inf
-scl_snan:
-	bra	src_nan
-|
-|	FSINCOS
-|
-	.global	ssincosz
-ssincosz:
-	btstb	#sign_bit,ETEMP(%a6)	|get sign
-	beqs	sincosp
-	fmovex	MZERO,%fp0
-	bras	sincoscom
-sincosp:
-	fmovex PZERO,%fp0
-sincoscom:
-	fmovemx PONE,%fp1-%fp1	|do not allow FPSR to be affected
-	bra	sto_cos		|store cosine result
-
-	.global	ssincosi
-ssincosi:
-	fmovex QNAN,%fp1	|load NAN
-	bsr	sto_cos		|store cosine result
-	fmovex QNAN,%fp0	|load NAN
-	bra	t_operr
-
-	.global	ssincosnan
-ssincosnan:
-	movel	ETEMP_EX(%a6),FP_SCR1(%a6)
-	movel	ETEMP_HI(%a6),FP_SCR1+4(%a6)
-	movel	ETEMP_LO(%a6),FP_SCR1+8(%a6)
-	bsetb	#signan_bit,FP_SCR1+4(%a6)
-	fmovemx FP_SCR1(%a6),%fp1-%fp1
-	bsr	sto_cos
-	bra	src_nan
-|
-| This code forces default values for the zero, inf, and nan cases
-| in the transcendentals code.  The CC bits must be set in the
-| stacked FPSR to be correctly reported.
-|
-|**Returns +PI/2
-	.global	ld_ppi2
-ld_ppi2:
-	fmovex PPIBY2,%fp0		|load +pi/2
-	bra	t_inx2			|set inex2 exc
-
-|**Returns -PI/2
-	.global	ld_mpi2
-ld_mpi2:
-	fmovex MPIBY2,%fp0		|load -pi/2
-	orl	#neg_mask,USER_FPSR(%a6)	|set N bit
-	bra	t_inx2			|set inex2 exc
-
-|**Returns +inf
-	.global	ld_pinf
-ld_pinf:
-	fmovex PINF,%fp0		|load +inf
-	orl	#inf_mask,USER_FPSR(%a6)	|set I bit
-	rts
-
-|**Returns -inf
-	.global	ld_minf
-ld_minf:
-	fmovex MINF,%fp0		|load -inf
-	orl	#neg_mask+inf_mask,USER_FPSR(%a6)	|set N and I bits
-	rts
-
-|**Returns +1
-	.global	ld_pone
-ld_pone:
-	fmovex PONE,%fp0		|load +1
-	rts
-
-|**Returns -1
-	.global	ld_mone
-ld_mone:
-	fmovex MONE,%fp0		|load -1
-	orl	#neg_mask,USER_FPSR(%a6)	|set N bit
-	rts
-
-|**Returns +0
-	.global	ld_pzero
-ld_pzero:
-	fmovex PZERO,%fp0		|load +0
-	orl	#z_mask,USER_FPSR(%a6)	|set Z bit
-	rts
-
-|**Returns -0
-	.global	ld_mzero
-ld_mzero:
-	fmovex MZERO,%fp0		|load -0
-	orl	#neg_mask+z_mask,USER_FPSR(%a6)	|set N and Z bits
-	rts
-
-	|end
diff --git a/arch/m68k/fpsp040/gen_except.S b/arch/m68k/fpsp040/gen_except.S
deleted file mode 100644
index 3642cb7e3641748743a799d164dd941be57aff3e..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/gen_except.S
+++ /dev/null
@@ -1,467 +0,0 @@
-|
-|	gen_except.sa 3.7 1/16/92
-|
-|	gen_except --- FPSP routine to detect reportable exceptions
-|
-|	This routine compares the exception enable byte of the
-|	user_fpcr on the stack with the exception status byte
-|	of the user_fpsr.
-|
-|	Any routine which may report an exceptions must load
-|	the stack frame in memory with the exceptional operand(s).
-|
-|	Priority for exceptions is:
-|
-|	Highest:	bsun
-|			snan
-|			operr
-|			ovfl
-|			unfl
-|			dz
-|			inex2
-|	Lowest:		inex1
-|
-|	Note: The IEEE standard specifies that inex2 is to be
-|	reported if ovfl occurs and the ovfl enable bit is not
-|	set but the inex2 enable bit is.
-|
-|
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-GEN_EXCEPT:    |idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section 8
-
-#include "fpsp.h"
-
-	|xref	real_trace
-	|xref	fpsp_done
-	|xref	fpsp_fmt_error
-
-exc_tbl:
-	.long	bsun_exc
-	.long	commonE1
-	.long	commonE1
-	.long	ovfl_unfl
-	.long	ovfl_unfl
-	.long	commonE1
-	.long	commonE3
-	.long	commonE3
-	.long	no_match
-
-	.global	gen_except
-gen_except:
-	cmpib	#IDLE_SIZE-4,1(%a7)	|test for idle frame
-	beq	do_check		|go handle idle frame
-	cmpib	#UNIMP_40_SIZE-4,1(%a7)	|test for orig unimp frame
-	beqs	unimp_x			|go handle unimp frame
-	cmpib	#UNIMP_41_SIZE-4,1(%a7)	|test for rev unimp frame
-	beqs	unimp_x			|go handle unimp frame
-	cmpib	#BUSY_SIZE-4,1(%a7)	|if size <> $60, fmt error
-	bnel	fpsp_fmt_error
-	leal	BUSY_SIZE+LOCAL_SIZE(%a7),%a1 |init a1 so fpsp.h
-|					;equates will work
-| Fix up the new busy frame with entries from the unimp frame
-|
-	movel	ETEMP_EX(%a6),ETEMP_EX(%a1) |copy etemp from unimp
-	movel	ETEMP_HI(%a6),ETEMP_HI(%a1) |frame to busy frame
-	movel	ETEMP_LO(%a6),ETEMP_LO(%a1)
-	movel	CMDREG1B(%a6),CMDREG1B(%a1) |set inst in frame to unimp
-	movel	CMDREG1B(%a6),%d0		|fix cmd1b to make it
-	andl	#0x03c30000,%d0		|work for cmd3b
-	bfextu	CMDREG1B(%a6){#13:#1},%d1	|extract bit 2
-	lsll	#5,%d1
-	swap	%d1
-	orl	%d1,%d0			|put it in the right place
-	bfextu	CMDREG1B(%a6){#10:#3},%d1	|extract bit 3,4,5
-	lsll	#2,%d1
-	swap	%d1
-	orl	%d1,%d0			|put them in the right place
-	movel	%d0,CMDREG3B(%a1)		|in the busy frame
-|
-| Or in the FPSR from the emulation with the USER_FPSR on the stack.
-|
-	fmovel	%FPSR,%d0
-	orl	%d0,USER_FPSR(%a6)
-	movel	USER_FPSR(%a6),FPSR_SHADOW(%a1) |set exc bits
-	orl	#sx_mask,E_BYTE(%a1)
-	bra	do_clean
-
-|
-| Frame is an unimp frame possible resulting from an fmove <ea>,fp0
-| that caused an exception
-|
-| a1 is modified to point into the new frame allowing fpsp equates
-| to be valid.
-|
-unimp_x:
-	cmpib	#UNIMP_40_SIZE-4,1(%a7)	|test for orig unimp frame
-	bnes	test_rev
-	leal	UNIMP_40_SIZE+LOCAL_SIZE(%a7),%a1
-	bras	unimp_con
-test_rev:
-	cmpib	#UNIMP_41_SIZE-4,1(%a7)	|test for rev unimp frame
-	bnel	fpsp_fmt_error		|if not $28 or $30
-	leal	UNIMP_41_SIZE+LOCAL_SIZE(%a7),%a1
-
-unimp_con:
-|
-| Fix up the new unimp frame with entries from the old unimp frame
-|
-	movel	CMDREG1B(%a6),CMDREG1B(%a1) |set inst in frame to unimp
-|
-| Or in the FPSR from the emulation with the USER_FPSR on the stack.
-|
-	fmovel	%FPSR,%d0
-	orl	%d0,USER_FPSR(%a6)
-	bra	do_clean
-
-|
-| Frame is idle, so check for exceptions reported through
-| USER_FPSR and set the unimp frame accordingly.
-| A7 must be incremented to the point before the
-| idle fsave vector to the unimp vector.
-|
-
-do_check:
-	addl	#4,%a7			|point A7 back to unimp frame
-|
-| Or in the FPSR from the emulation with the USER_FPSR on the stack.
-|
-	fmovel	%FPSR,%d0
-	orl	%d0,USER_FPSR(%a6)
-|
-| On a busy frame, we must clear the nmnexc bits.
-|
-	cmpib	#BUSY_SIZE-4,1(%a7)	|check frame type
-	bnes	check_fr		|if busy, clr nmnexc
-	clrw	NMNEXC(%a6)		|clr nmnexc & nmcexc
-	btstb	#5,CMDREG1B(%a6)		|test for fmove out
-	bnes	frame_com
-	movel	USER_FPSR(%a6),FPSR_SHADOW(%a6) |set exc bits
-	orl	#sx_mask,E_BYTE(%a6)
-	bras	frame_com
-check_fr:
-	cmpb	#UNIMP_40_SIZE-4,1(%a7)
-	beqs	frame_com
-	clrw	NMNEXC(%a6)
-frame_com:
-	moveb	FPCR_ENABLE(%a6),%d0	|get fpcr enable byte
-	andb	FPSR_EXCEPT(%a6),%d0	|and in the fpsr exc byte
-	bfffo	%d0{#24:#8},%d1		|test for first set bit
-	leal	exc_tbl,%a0		|load jmp table address
-	subib	#24,%d1			|normalize bit offset to 0-8
-	movel	(%a0,%d1.w*4),%a0		|load routine address based
-|					;based on first enabled exc
-	jmp	(%a0)			|jump to routine
-|
-| Bsun is not possible in unimp or unsupp
-|
-bsun_exc:
-	bra	do_clean
-|
-| The typical work to be done to the unimp frame to report an
-| exception is to set the E1/E3 byte and clr the U flag.
-| commonE1 does this for E1 exceptions, which are snan,
-| operr, and dz.  commonE3 does this for E3 exceptions, which
-| are inex2 and inex1, and also clears the E1 exception bit
-| left over from the unimp exception.
-|
-commonE1:
-	bsetb	#E1,E_BYTE(%a6)		|set E1 flag
-	bra	commonE			|go clean and exit
-
-commonE3:
-	tstb	UFLG_TMP(%a6)		|test flag for unsup/unimp state
-	bnes	unsE3
-uniE3:
-	bsetb	#E3,E_BYTE(%a6)		|set E3 flag
-	bclrb	#E1,E_BYTE(%a6)		|clr E1 from unimp
-	bra	commonE
-
-unsE3:
-	tstb	RES_FLG(%a6)
-	bnes	unsE3_0
-unsE3_1:
-	bsetb	#E3,E_BYTE(%a6)		|set E3 flag
-unsE3_0:
-	bclrb	#E1,E_BYTE(%a6)		|clr E1 flag
-	movel	CMDREG1B(%a6),%d0
-	andl	#0x03c30000,%d0		|work for cmd3b
-	bfextu	CMDREG1B(%a6){#13:#1},%d1	|extract bit 2
-	lsll	#5,%d1
-	swap	%d1
-	orl	%d1,%d0			|put it in the right place
-	bfextu	CMDREG1B(%a6){#10:#3},%d1	|extract bit 3,4,5
-	lsll	#2,%d1
-	swap	%d1
-	orl	%d1,%d0			|put them in the right place
-	movel	%d0,CMDREG3B(%a6)		|in the busy frame
-
-commonE:
-	bclrb	#UFLAG,T_BYTE(%a6)	|clr U flag from unimp
-	bra	do_clean		|go clean and exit
-|
-| No bits in the enable byte match existing exceptions.  Check for
-| the case of the ovfl exc without the ovfl enabled, but with
-| inex2 enabled.
-|
-no_match:
-	btstb	#inex2_bit,FPCR_ENABLE(%a6) |check for ovfl/inex2 case
-	beqs	no_exc			|if clear, exit
-	btstb	#ovfl_bit,FPSR_EXCEPT(%a6) |now check ovfl
-	beqs	no_exc			|if clear, exit
-	bras	ovfl_unfl		|go to unfl_ovfl to determine if
-|					;it is an unsupp or unimp exc
-
-| No exceptions are to be reported.  If the instruction was
-| unimplemented, no FPU restore is necessary.  If it was
-| unsupported, we must perform the restore.
-no_exc:
-	tstb	UFLG_TMP(%a6)	|test flag for unsupp/unimp state
-	beqs	uni_no_exc
-uns_no_exc:
-	tstb	RES_FLG(%a6)	|check if frestore is needed
-	bne	do_clean	|if clear, no frestore needed
-uni_no_exc:
-	moveml	USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx USER_FP0(%a6),%fp0-%fp3
-	fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	unlk	%a6
-	bra	finish_up
-|
-| Unsupported Data Type Handler:
-| Ovfl:
-|   An fmoveout that results in an overflow is reported this way.
-| Unfl:
-|   An fmoveout that results in an underflow is reported this way.
-|
-| Unimplemented Instruction Handler:
-| Ovfl:
-|   Only scosh, setox, ssinh, stwotox, and scale can set overflow in
-|   this manner.
-| Unfl:
-|   Stwotox, setox, and scale can set underflow in this manner.
-|   Any of the other Library Routines such that f(x)=x in which
-|   x is an extended denorm can report an underflow exception.
-|   It is the responsibility of the exception-causing exception
-|   to make sure that WBTEMP is correct.
-|
-|   The exceptional operand is in FP_SCR1.
-|
-ovfl_unfl:
-	tstb	UFLG_TMP(%a6)	|test flag for unsupp/unimp state
-	beqs	ofuf_con
-|
-| The caller was from an unsupported data type trap.  Test if the
-| caller set CU_ONLY.  If so, the exceptional operand is expected in
-| FPTEMP, rather than WBTEMP.
-|
-	tstb	CU_ONLY(%a6)		|test if inst is cu-only
-	beq	unsE3
-|	move.w	#$fe,CU_SAVEPC(%a6)
-	clrb	CU_SAVEPC(%a6)
-	bsetb	#E1,E_BYTE(%a6)		|set E1 exception flag
-	movew	ETEMP_EX(%a6),FPTEMP_EX(%a6)
-	movel	ETEMP_HI(%a6),FPTEMP_HI(%a6)
-	movel	ETEMP_LO(%a6),FPTEMP_LO(%a6)
-	bsetb	#fptemp15_bit,DTAG(%a6)	|set fpte15
-	bclrb	#UFLAG,T_BYTE(%a6)	|clr U flag from unimp
-	bra	do_clean		|go clean and exit
-
-ofuf_con:
-	moveb	(%a7),VER_TMP(%a6)	|save version number
-	cmpib	#BUSY_SIZE-4,1(%a7)	|check for busy frame
-	beqs	busy_fr			|if unimp, grow to busy
-	cmpib	#VER_40,(%a7)		|test for orig unimp frame
-	bnes	try_41			|if not, test for rev frame
-	moveql	#13,%d0			|need to zero 14 lwords
-	bras	ofuf_fin
-try_41:
-	cmpib	#VER_41,(%a7)		|test for rev unimp frame
-	bnel	fpsp_fmt_error		|if neither, exit with error
-	moveql	#11,%d0			|need to zero 12 lwords
-
-ofuf_fin:
-	clrl	(%a7)
-loop1:
-	clrl	-(%a7)			|clear and dec a7
-	dbra	%d0,loop1
-	moveb	VER_TMP(%a6),(%a7)
-	moveb	#BUSY_SIZE-4,1(%a7)		|write busy fmt word.
-busy_fr:
-	movel	FP_SCR1(%a6),WBTEMP_EX(%a6)	|write
-	movel	FP_SCR1+4(%a6),WBTEMP_HI(%a6)	|exceptional op to
-	movel	FP_SCR1+8(%a6),WBTEMP_LO(%a6)	|wbtemp
-	bsetb	#E3,E_BYTE(%a6)			|set E3 flag
-	bclrb	#E1,E_BYTE(%a6)			|make sure E1 is clear
-	bclrb	#UFLAG,T_BYTE(%a6)		|clr U flag
-	movel	USER_FPSR(%a6),FPSR_SHADOW(%a6)
-	orl	#sx_mask,E_BYTE(%a6)
-	movel	CMDREG1B(%a6),%d0		|fix cmd1b to make it
-	andl	#0x03c30000,%d0		|work for cmd3b
-	bfextu	CMDREG1B(%a6){#13:#1},%d1	|extract bit 2
-	lsll	#5,%d1
-	swap	%d1
-	orl	%d1,%d0			|put it in the right place
-	bfextu	CMDREG1B(%a6){#10:#3},%d1	|extract bit 3,4,5
-	lsll	#2,%d1
-	swap	%d1
-	orl	%d1,%d0			|put them in the right place
-	movel	%d0,CMDREG3B(%a6)		|in the busy frame
-
-|
-| Check if the frame to be restored is busy or unimp.
-|** NOTE *** Bug fix for errata (0d43b #3)
-| If the frame is unimp, we must create a busy frame to
-| fix the bug with the nmnexc bits in cases in which they
-| are set by a previous instruction and not cleared by
-| the save. The frame will be unimp only if the final
-| instruction in an emulation routine caused the exception
-| by doing an fmove <ea>,fp0.  The exception operand, in
-| internal format, is in fptemp.
-|
-do_clean:
-	cmpib	#UNIMP_40_SIZE-4,1(%a7)
-	bnes	do_con
-	moveql	#13,%d0			|in orig, need to zero 14 lwords
-	bras	do_build
-do_con:
-	cmpib	#UNIMP_41_SIZE-4,1(%a7)
-	bnes	do_restore		|frame must be busy
-	moveql	#11,%d0			|in rev, need to zero 12 lwords
-
-do_build:
-	moveb	(%a7),VER_TMP(%a6)
-	clrl	(%a7)
-loop2:
-	clrl	-(%a7)			|clear and dec a7
-	dbra	%d0,loop2
-|
-| Use a1 as pointer into new frame.  a6 is not correct if an unimp or
-| busy frame was created as the result of an exception on the final
-| instruction of an emulation routine.
-|
-| We need to set the nmcexc bits if the exception is E1. Otherwise,
-| the exc taken will be inex2.
-|
-	leal	BUSY_SIZE+LOCAL_SIZE(%a7),%a1	|init a1 for new frame
-	moveb	VER_TMP(%a6),(%a7)	|write busy fmt word
-	moveb	#BUSY_SIZE-4,1(%a7)
-	movel	FP_SCR1(%a6),WBTEMP_EX(%a1)	|write
-	movel	FP_SCR1+4(%a6),WBTEMP_HI(%a1)	|exceptional op to
-	movel	FP_SCR1+8(%a6),WBTEMP_LO(%a1)	|wbtemp
-|	btst.b	#E1,E_BYTE(%a1)
-|	beq.b	do_restore
-	bfextu	USER_FPSR(%a6){#17:#4},%d0	|get snan/operr/ovfl/unfl bits
-	bfins	%d0,NMCEXC(%a1){#4:#4}	|and insert them in nmcexc
-	movel	USER_FPSR(%a6),FPSR_SHADOW(%a1) |set exc bits
-	orl	#sx_mask,E_BYTE(%a1)
-
-do_restore:
-	moveml	USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx USER_FP0(%a6),%fp0-%fp3
-	fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	frestore (%a7)+
-	tstb	RES_FLG(%a6)	|RES_FLG indicates a "continuation" frame
-	beq	cont
-	bsr	bug1384
-cont:
-	unlk	%a6
-|
-| If trace mode enabled, then go to trace handler.  This handler
-| cannot have any fp instructions.  If there are fp inst's and an
-| exception has been restored into the machine then the exception
-| will occur upon execution of the fp inst.  This is not desirable
-| in the kernel (supervisor mode).  See MC68040 manual Section 9.3.8.
-|
-finish_up:
-	btstb	#7,(%a7)		|test T1 in SR
-	bnes	g_trace
-	btstb	#6,(%a7)		|test T0 in SR
-	bnes	g_trace
-	bral	fpsp_done
-|
-| Change integer stack to look like trace stack
-| The address of the instruction that caused the
-| exception is already in the integer stack (is
-| the same as the saved friar)
-|
-| If the current frame is already a 6-word stack then all
-| that needs to be done is to change the vector# to TRACE.
-| If the frame is only a 4-word stack (meaning we got here
-| on an Unsupported data type exception), then we need to grow
-| the stack an extra 2 words and get the FPIAR from the FPU.
-|
-g_trace:
-	bftst	EXC_VEC-4(%sp){#0:#4}
-	bne	g_easy
-
-	subw	#4,%sp		| make room
-	movel	4(%sp),(%sp)
-	movel	8(%sp),4(%sp)
-	subw	#BUSY_SIZE,%sp
-	fsave	(%sp)
-	fmovel	%fpiar,BUSY_SIZE+EXC_EA-4(%sp)
-	frestore (%sp)
-	addw	#BUSY_SIZE,%sp
-
-g_easy:
-	movew	#TRACE_VEC,EXC_VEC-4(%a7)
-	bral	real_trace
-|
-|  This is a work-around for hardware bug 1384.
-|
-bug1384:
-	link	%a5,#0
-	fsave	-(%sp)
-	cmpib	#0x41,(%sp)	| check for correct frame
-	beq	frame_41
-	bgt	nofix		| if more advanced mask, do nada
-
-frame_40:
-	tstb	1(%sp)		| check to see if idle
-	bne	notidle
-idle40:
-	clrl	(%sp)		| get rid of old fsave frame
-        movel  %d1,USER_D1(%a6)  | save d1
-	movew	#8,%d1		| place unimp frame instead
-loop40:	clrl	-(%sp)
-	dbra	%d1,loop40
-        movel  USER_D1(%a6),%d1  | restore d1
-	movel	#0x40280000,-(%sp)
-	frestore (%sp)+
-	unlk	%a5
-	rts
-
-frame_41:
-	tstb	1(%sp)		| check to see if idle
-	bne	notidle
-idle41:
-	clrl	(%sp)		| get rid of old fsave frame
-        movel  %d1,USER_D1(%a6)  | save d1
-	movew	#10,%d1		| place unimp frame instead
-loop41:	clrl	-(%sp)
-	dbra	%d1,loop41
-        movel  USER_D1(%a6),%d1  | restore d1
-	movel	#0x41300000,-(%sp)
-	frestore (%sp)+
-	unlk	%a5
-	rts
-
-notidle:
-	bclrb	#etemp15_bit,-40(%a5)
-	frestore (%sp)+
-	unlk	%a5
-	rts
-
-nofix:
-	frestore (%sp)+
-	unlk	%a5
-	rts
-
-	|end
diff --git a/arch/m68k/fpsp040/get_op.S b/arch/m68k/fpsp040/get_op.S
deleted file mode 100644
index 64c36d79ef836ca97a8bbdbe741addcb4c1a452e..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/get_op.S
+++ /dev/null
@@ -1,675 +0,0 @@
-|
-|	get_op.sa 3.6 5/19/92
-|
-|	get_op.sa 3.5 4/26/91
-|
-|  Description: This routine is called by the unsupported format/data
-| type exception handler ('unsupp' - vector 55) and the unimplemented
-| instruction exception handler ('unimp' - vector 11).  'get_op'
-| determines the opclass (0, 2, or 3) and branches to the
-| opclass handler routine.  See 68881/2 User's Manual table 4-11
-| for a description of the opclasses.
-|
-| For UNSUPPORTED data/format (exception vector 55) and for
-| UNIMPLEMENTED instructions (exception vector 11) the following
-| applies:
-|
-| - For unnormalized numbers (opclass 0, 2, or 3) the
-| number(s) is normalized and the operand type tag is updated.
-|
-| - For a packed number (opclass 2) the number is unpacked and the
-| operand type tag is updated.
-|
-| - For denormalized numbers (opclass 0 or 2) the number(s) is not
-| changed but passed to the next module.  The next module for
-| unimp is do_func, the next module for unsupp is res_func.
-|
-| For UNSUPPORTED data/format (exception vector 55) only the
-| following applies:
-|
-| - If there is a move out with a packed number (opclass 3) the
-| number is packed and written to user memory.  For the other
-| opclasses the number(s) are written back to the fsave stack
-| and the instruction is then restored back into the '040.  The
-| '040 is then able to complete the instruction.
-|
-| For example:
-| fadd.x fpm,fpn where the fpm contains an unnormalized number.
-| The '040 takes an unsupported data trap and gets to this
-| routine.  The number is normalized, put back on the stack and
-| then an frestore is done to restore the instruction back into
-| the '040.  The '040 then re-executes the fadd.x fpm,fpn with
-| a normalized number in the source and the instruction is
-| successful.
-|
-| Next consider if in the process of normalizing the un-
-| normalized number it becomes a denormalized number.  The
-| routine which converts the unnorm to a norm (called mk_norm)
-| detects this and tags the number as a denorm.  The routine
-| res_func sees the denorm tag and converts the denorm to a
-| norm.  The instruction is then restored back into the '040
-| which re_executes the instruction.
-|
-|
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-GET_OP:    |idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-	.global	PIRN,PIRZRM,PIRP
-	.global	SMALRN,SMALRZRM,SMALRP
-	.global	BIGRN,BIGRZRM,BIGRP
-
-PIRN:
-	.long 0x40000000,0xc90fdaa2,0x2168c235    |pi
-PIRZRM:
-	.long 0x40000000,0xc90fdaa2,0x2168c234    |pi
-PIRP:
-	.long 0x40000000,0xc90fdaa2,0x2168c235    |pi
-
-|round to nearest
-SMALRN:
-	.long 0x3ffd0000,0x9a209a84,0xfbcff798    |log10(2)
-	.long 0x40000000,0xadf85458,0xa2bb4a9a    |e
-	.long 0x3fff0000,0xb8aa3b29,0x5c17f0bc    |log2(e)
-	.long 0x3ffd0000,0xde5bd8a9,0x37287195    |log10(e)
-	.long 0x00000000,0x00000000,0x00000000    |0.0
-| round to zero;round to negative infinity
-SMALRZRM:
-	.long 0x3ffd0000,0x9a209a84,0xfbcff798    |log10(2)
-	.long 0x40000000,0xadf85458,0xa2bb4a9a    |e
-	.long 0x3fff0000,0xb8aa3b29,0x5c17f0bb    |log2(e)
-	.long 0x3ffd0000,0xde5bd8a9,0x37287195    |log10(e)
-	.long 0x00000000,0x00000000,0x00000000    |0.0
-| round to positive infinity
-SMALRP:
-	.long 0x3ffd0000,0x9a209a84,0xfbcff799    |log10(2)
-	.long 0x40000000,0xadf85458,0xa2bb4a9b    |e
-	.long 0x3fff0000,0xb8aa3b29,0x5c17f0bc    |log2(e)
-	.long 0x3ffd0000,0xde5bd8a9,0x37287195    |log10(e)
-	.long 0x00000000,0x00000000,0x00000000    |0.0
-
-|round to nearest
-BIGRN:
-	.long 0x3ffe0000,0xb17217f7,0xd1cf79ac    |ln(2)
-	.long 0x40000000,0x935d8ddd,0xaaa8ac17    |ln(10)
-	.long 0x3fff0000,0x80000000,0x00000000    |10 ^ 0
-
-	.global	PTENRN
-PTENRN:
-	.long 0x40020000,0xA0000000,0x00000000    |10 ^ 1
-	.long 0x40050000,0xC8000000,0x00000000    |10 ^ 2
-	.long 0x400C0000,0x9C400000,0x00000000    |10 ^ 4
-	.long 0x40190000,0xBEBC2000,0x00000000    |10 ^ 8
-	.long 0x40340000,0x8E1BC9BF,0x04000000    |10 ^ 16
-	.long 0x40690000,0x9DC5ADA8,0x2B70B59E    |10 ^ 32
-	.long 0x40D30000,0xC2781F49,0xFFCFA6D5    |10 ^ 64
-	.long 0x41A80000,0x93BA47C9,0x80E98CE0    |10 ^ 128
-	.long 0x43510000,0xAA7EEBFB,0x9DF9DE8E    |10 ^ 256
-	.long 0x46A30000,0xE319A0AE,0xA60E91C7    |10 ^ 512
-	.long 0x4D480000,0xC9767586,0x81750C17    |10 ^ 1024
-	.long 0x5A920000,0x9E8B3B5D,0xC53D5DE5    |10 ^ 2048
-	.long 0x75250000,0xC4605202,0x8A20979B    |10 ^ 4096
-|round to minus infinity
-BIGRZRM:
-	.long 0x3ffe0000,0xb17217f7,0xd1cf79ab    |ln(2)
-	.long 0x40000000,0x935d8ddd,0xaaa8ac16    |ln(10)
-	.long 0x3fff0000,0x80000000,0x00000000    |10 ^ 0
-
-	.global	PTENRM
-PTENRM:
-	.long 0x40020000,0xA0000000,0x00000000    |10 ^ 1
-	.long 0x40050000,0xC8000000,0x00000000    |10 ^ 2
-	.long 0x400C0000,0x9C400000,0x00000000    |10 ^ 4
-	.long 0x40190000,0xBEBC2000,0x00000000    |10 ^ 8
-	.long 0x40340000,0x8E1BC9BF,0x04000000    |10 ^ 16
-	.long 0x40690000,0x9DC5ADA8,0x2B70B59D    |10 ^ 32
-	.long 0x40D30000,0xC2781F49,0xFFCFA6D5    |10 ^ 64
-	.long 0x41A80000,0x93BA47C9,0x80E98CDF    |10 ^ 128
-	.long 0x43510000,0xAA7EEBFB,0x9DF9DE8D    |10 ^ 256
-	.long 0x46A30000,0xE319A0AE,0xA60E91C6    |10 ^ 512
-	.long 0x4D480000,0xC9767586,0x81750C17    |10 ^ 1024
-	.long 0x5A920000,0x9E8B3B5D,0xC53D5DE5    |10 ^ 2048
-	.long 0x75250000,0xC4605202,0x8A20979A    |10 ^ 4096
-|round to positive infinity
-BIGRP:
-	.long 0x3ffe0000,0xb17217f7,0xd1cf79ac    |ln(2)
-	.long 0x40000000,0x935d8ddd,0xaaa8ac17    |ln(10)
-	.long 0x3fff0000,0x80000000,0x00000000    |10 ^ 0
-
-	.global	PTENRP
-PTENRP:
-	.long 0x40020000,0xA0000000,0x00000000    |10 ^ 1
-	.long 0x40050000,0xC8000000,0x00000000    |10 ^ 2
-	.long 0x400C0000,0x9C400000,0x00000000    |10 ^ 4
-	.long 0x40190000,0xBEBC2000,0x00000000    |10 ^ 8
-	.long 0x40340000,0x8E1BC9BF,0x04000000    |10 ^ 16
-	.long 0x40690000,0x9DC5ADA8,0x2B70B59E    |10 ^ 32
-	.long 0x40D30000,0xC2781F49,0xFFCFA6D6    |10 ^ 64
-	.long 0x41A80000,0x93BA47C9,0x80E98CE0    |10 ^ 128
-	.long 0x43510000,0xAA7EEBFB,0x9DF9DE8E    |10 ^ 256
-	.long 0x46A30000,0xE319A0AE,0xA60E91C7    |10 ^ 512
-	.long 0x4D480000,0xC9767586,0x81750C18    |10 ^ 1024
-	.long 0x5A920000,0x9E8B3B5D,0xC53D5DE6    |10 ^ 2048
-	.long 0x75250000,0xC4605202,0x8A20979B    |10 ^ 4096
-
-	|xref	nrm_zero
-	|xref	decbin
-	|xref	round
-
-	.global    get_op
-	.global    uns_getop
-	.global    uni_getop
-get_op:
-	clrb	DY_MO_FLG(%a6)
-	tstb	UFLG_TMP(%a6)	|test flag for unsupp/unimp state
-	beq	uni_getop
-
-uns_getop:
-	btstb	#direction_bit,CMDREG1B(%a6)
-	bne	opclass3	|branch if a fmove out (any kind)
-	btstb	#6,CMDREG1B(%a6)
-	beqs	uns_notpacked
-
-	bfextu	CMDREG1B(%a6){#3:#3},%d0
-	cmpb	#3,%d0
-	beq	pack_source	|check for a packed src op, branch if so
-uns_notpacked:
-	bsr	chk_dy_mo	|set the dyadic/monadic flag
-	tstb	DY_MO_FLG(%a6)
-	beqs	src_op_ck	|if monadic, go check src op
-|				;else, check dst op (fall through)
-
-	btstb	#7,DTAG(%a6)
-	beqs	src_op_ck	|if dst op is norm, check src op
-	bras	dst_ex_dnrm	|else, handle destination unnorm/dnrm
-
-uni_getop:
-	bfextu	CMDREG1B(%a6){#0:#6},%d0 |get opclass and src fields
-	cmpil	#0x17,%d0		|if op class and size fields are $17,
-|				;it is FMOVECR; if not, continue
-|
-| If the instruction is fmovecr, exit get_op.  It is handled
-| in do_func and smovecr.sa.
-|
-	bne	not_fmovecr	|handle fmovecr as an unimplemented inst
-	rts
-
-not_fmovecr:
-	btstb	#E1,E_BYTE(%a6)	|if set, there is a packed operand
-	bne	pack_source	|check for packed src op, branch if so
-
-| The following lines of are coded to optimize on normalized operands
-	moveb	STAG(%a6),%d0
-	orb	DTAG(%a6),%d0	|check if either of STAG/DTAG msb set
-	bmis	dest_op_ck	|if so, some op needs to be fixed
-	rts
-
-dest_op_ck:
-	btstb	#7,DTAG(%a6)	|check for unsupported data types in
-	beqs	src_op_ck	|the destination, if not, check src op
-	bsr	chk_dy_mo	|set dyadic/monadic flag
-	tstb	DY_MO_FLG(%a6)	|
-	beqs	src_op_ck	|if monadic, check src op
-|
-| At this point, destination has an extended denorm or unnorm.
-|
-dst_ex_dnrm:
-	movew	FPTEMP_EX(%a6),%d0 |get destination exponent
-	andiw	#0x7fff,%d0	|mask sign, check if exp = 0000
-	beqs	src_op_ck	|if denorm then check source op.
-|				;denorms are taken care of in res_func
-|				;(unsupp) or do_func (unimp)
-|				;else unnorm fall through
-	leal	FPTEMP(%a6),%a0	|point a0 to dop - used in mk_norm
-	bsr	mk_norm		|go normalize - mk_norm returns:
-|				;L_SCR1{7:5} = operand tag
-|				;	(000 = norm, 100 = denorm)
-|				;L_SCR1{4} = fpte15 or ete15
-|				;	0 = exp >  $3fff
-|				;	1 = exp <= $3fff
-|				;and puts the normalized num back
-|				;on the fsave stack
-|
-	moveb L_SCR1(%a6),DTAG(%a6) |write the new tag & fpte15
-|				;to the fsave stack and fall
-|				;through to check source operand
-|
-src_op_ck:
-	btstb	#7,STAG(%a6)
-	beq	end_getop	|check for unsupported data types on the
-|				;source operand
-	btstb	#5,STAG(%a6)
-	bnes	src_sd_dnrm	|if bit 5 set, handle sgl/dbl denorms
-|
-| At this point only unnorms or extended denorms are possible.
-|
-src_ex_dnrm:
-	movew	ETEMP_EX(%a6),%d0 |get source exponent
-	andiw	#0x7fff,%d0	|mask sign, check if exp = 0000
-	beq	end_getop	|if denorm then exit, denorms are
-|				;handled in do_func
-	leal	ETEMP(%a6),%a0	|point a0 to sop - used in mk_norm
-	bsr	mk_norm		|go normalize - mk_norm returns:
-|				;L_SCR1{7:5} = operand tag
-|				;	(000 = norm, 100 = denorm)
-|				;L_SCR1{4} = fpte15 or ete15
-|				;	0 = exp >  $3fff
-|				;	1 = exp <= $3fff
-|				;and puts the normalized num back
-|				;on the fsave stack
-|
-	moveb	L_SCR1(%a6),STAG(%a6) |write the new tag & ete15
-	rts			|end_getop
-
-|
-| At this point, only single or double denorms are possible.
-| If the inst is not fmove, normalize the source.  If it is,
-| do nothing to the input.
-|
-src_sd_dnrm:
-	btstb	#4,CMDREG1B(%a6)	|differentiate between sgl/dbl denorm
-	bnes	is_double
-is_single:
-	movew	#0x3f81,%d1	|write bias for sgl denorm
-	bras	common		|goto the common code
-is_double:
-	movew	#0x3c01,%d1	|write the bias for a dbl denorm
-common:
-	btstb	#sign_bit,ETEMP_EX(%a6) |grab sign bit of mantissa
-	beqs	pos
-	bset	#15,%d1		|set sign bit because it is negative
-pos:
-	movew	%d1,ETEMP_EX(%a6)
-|				;put exponent on stack
-
-	movew	CMDREG1B(%a6),%d1
-	andw	#0xe3ff,%d1	|clear out source specifier
-	orw	#0x0800,%d1	|set source specifier to extended prec
-	movew	%d1,CMDREG1B(%a6)	|write back to the command word in stack
-|				;this is needed to fix unsupp data stack
-	leal	ETEMP(%a6),%a0	|point a0 to sop
-
-	bsr	mk_norm		|convert sgl/dbl denorm to norm
-	moveb	L_SCR1(%a6),STAG(%a6) |put tag into source tag reg - d0
-	rts			|end_getop
-|
-| At this point, the source is definitely packed, whether
-| instruction is dyadic or monadic is still unknown
-|
-pack_source:
-	movel	FPTEMP_LO(%a6),ETEMP(%a6)	|write ms part of packed
-|				;number to etemp slot
-	bsr	chk_dy_mo	|set dyadic/monadic flag
-	bsr	unpack
-
-	tstb	DY_MO_FLG(%a6)
-	beqs	end_getop	|if monadic, exit
-|				;else, fix FPTEMP
-pack_dya:
-	bfextu	CMDREG1B(%a6){#6:#3},%d0 |extract dest fp reg
-	movel	#7,%d1
-	subl	%d0,%d1
-	clrl	%d0
-	bsetl	%d1,%d0		|set up d0 as a dynamic register mask
-	fmovemx %d0,FPTEMP(%a6)	|write to FPTEMP
-
-	btstb	#7,DTAG(%a6)	|check dest tag for unnorm or denorm
-	bne	dst_ex_dnrm	|else, handle the unnorm or ext denorm
-|
-| Dest is not denormalized.  Check for norm, and set fpte15
-| accordingly.
-|
-	moveb	DTAG(%a6),%d0
-	andib	#0xf0,%d0		|strip to only dtag:fpte15
-	tstb	%d0		|check for normalized value
-	bnes	end_getop	|if inf/nan/zero leave get_op
-	movew	FPTEMP_EX(%a6),%d0
-	andiw	#0x7fff,%d0
-	cmpiw	#0x3fff,%d0	|check if fpte15 needs setting
-	bges	end_getop	|if >= $3fff, leave fpte15=0
-	orb	#0x10,DTAG(%a6)
-	bras	end_getop
-
-|
-| At this point, it is either an fmoveout packed, unnorm or denorm
-|
-opclass3:
-	clrb	DY_MO_FLG(%a6)	|set dyadic/monadic flag to monadic
-	bfextu	CMDREG1B(%a6){#4:#2},%d0
-	cmpib	#3,%d0
-	bne	src_ex_dnrm	|if not equal, must be unnorm or denorm
-|				;else it is a packed move out
-|				;exit
-end_getop:
-	rts
-
-|
-| Sets the DY_MO_FLG correctly. This is used only on if it is an
-| unsupported data type exception.  Set if dyadic.
-|
-chk_dy_mo:
-	movew	CMDREG1B(%a6),%d0
-	btstl	#5,%d0		|testing extension command word
-	beqs	set_mon		|if bit 5 = 0 then monadic
-	btstl	#4,%d0		|know that bit 5 = 1
-	beqs	set_dya		|if bit 4 = 0 then dyadic
-	andiw	#0x007f,%d0	|get rid of all but extension bits {6:0}
-	cmpiw	#0x0038,%d0	|if extension = $38 then fcmp (dyadic)
-	bnes	set_mon
-set_dya:
-	st	DY_MO_FLG(%a6)	|set the inst flag type to dyadic
-	rts
-set_mon:
-	clrb	DY_MO_FLG(%a6)	|set the inst flag type to monadic
-	rts
-|
-|	MK_NORM
-|
-| Normalizes unnormalized numbers, sets tag to norm or denorm, sets unfl
-| exception if denorm.
-|
-| CASE opclass 0x0 unsupp
-|	mk_norm till msb set
-|	set tag = norm
-|
-| CASE opclass 0x0 unimp
-|	mk_norm till msb set or exp = 0
-|	if integer bit = 0
-|	   tag = denorm
-|	else
-|	   tag = norm
-|
-| CASE opclass 011 unsupp
-|	mk_norm till msb set or exp = 0
-|	if integer bit = 0
-|	   tag = denorm
-|	   set unfl_nmcexe = 1
-|	else
-|	   tag = norm
-|
-| if exp <= $3fff
-|   set ete15 or fpte15 = 1
-| else set ete15 or fpte15 = 0
-
-| input:
-|	a0 = points to operand to be normalized
-| output:
-|	L_SCR1{7:5} = operand tag (000 = norm, 100 = denorm)
-|	L_SCR1{4}   = fpte15 or ete15 (0 = exp > $3fff, 1 = exp <=$3fff)
-|	the normalized operand is placed back on the fsave stack
-mk_norm:
-	clrl	L_SCR1(%a6)
-	bclrb	#sign_bit,LOCAL_EX(%a0)
-	sne	LOCAL_SGN(%a0)	|transform into internal extended format
-
-	cmpib	#0x2c,1+EXC_VEC(%a6) |check if unimp
-	bnes	uns_data	|branch if unsupp
-	bsr	uni_inst	|call if unimp (opclass 0x0)
-	bras	reload
-uns_data:
-	btstb	#direction_bit,CMDREG1B(%a6) |check transfer direction
-	bnes	bit_set		|branch if set (opclass 011)
-	bsr	uns_opx		|call if opclass 0x0
-	bras	reload
-bit_set:
-	bsr	uns_op3		|opclass 011
-reload:
-	cmpw	#0x3fff,LOCAL_EX(%a0) |if exp > $3fff
-	bgts	end_mk		|   fpte15/ete15 already set to 0
-	bsetb	#4,L_SCR1(%a6)	|else set fpte15/ete15 to 1
-|				;calling routine actually sets the
-|				;value on the stack (along with the
-|				;tag), since this routine doesn't
-|				;know if it should set ete15 or fpte15
-|				;ie, it doesn't know if this is the
-|				;src op or dest op.
-end_mk:
-	bfclr	LOCAL_SGN(%a0){#0:#8}
-	beqs	end_mk_pos
-	bsetb	#sign_bit,LOCAL_EX(%a0) |convert back to IEEE format
-end_mk_pos:
-	rts
-|
-|     CASE opclass 011 unsupp
-|
-uns_op3:
-	bsr	nrm_zero	|normalize till msb = 1 or exp = zero
-	btstb	#7,LOCAL_HI(%a0)	|if msb = 1
-	bnes	no_unfl		|then branch
-set_unfl:
-	orw	#dnrm_tag,L_SCR1(%a6) |set denorm tag
-	bsetb	#unfl_bit,FPSR_EXCEPT(%a6) |set unfl exception bit
-no_unfl:
-	rts
-|
-|     CASE opclass 0x0 unsupp
-|
-uns_opx:
-	bsr	nrm_zero	|normalize the number
-	btstb	#7,LOCAL_HI(%a0)	|check if integer bit (j-bit) is set
-	beqs	uns_den		|if clear then now have a denorm
-uns_nrm:
-	orb	#norm_tag,L_SCR1(%a6) |set tag to norm
-	rts
-uns_den:
-	orb	#dnrm_tag,L_SCR1(%a6) |set tag to denorm
-	rts
-|
-|     CASE opclass 0x0 unimp
-|
-uni_inst:
-	bsr	nrm_zero
-	btstb	#7,LOCAL_HI(%a0)	|check if integer bit (j-bit) is set
-	beqs	uni_den		|if clear then now have a denorm
-uni_nrm:
-	orb	#norm_tag,L_SCR1(%a6) |set tag to norm
-	rts
-uni_den:
-	orb	#dnrm_tag,L_SCR1(%a6) |set tag to denorm
-	rts
-
-|
-|	Decimal to binary conversion
-|
-| Special cases of inf and NaNs are completed outside of decbin.
-| If the input is an snan, the snan bit is not set.
-|
-| input:
-|	ETEMP(a6)	- points to packed decimal string in memory
-| output:
-|	fp0	- contains packed string converted to extended precision
-|	ETEMP	- same as fp0
-unpack:
-	movew	CMDREG1B(%a6),%d0	|examine command word, looking for fmove's
-	andw	#0x3b,%d0
-	beq	move_unpack	|special handling for fmove: must set FPSR_CC
-
-	movew	ETEMP(%a6),%d0	|get word with inf information
-	bfextu	%d0{#20:#12},%d1	|get exponent into d1
-	cmpiw	#0x0fff,%d1	|test for inf or NaN
-	bnes	try_zero	|if not equal, it is not special
-	bfextu	%d0{#17:#3},%d1	|get SE and y bits into d1
-	cmpiw	#7,%d1		|SE and y bits must be on for special
-	bnes	try_zero	|if not on, it is not special
-|input is of the special cases of inf and NaN
-	tstl	ETEMP_HI(%a6)	|check ms mantissa
-	bnes	fix_nan		|if non-zero, it is a NaN
-	tstl	ETEMP_LO(%a6)	|check ls mantissa
-	bnes	fix_nan		|if non-zero, it is a NaN
-	bra	finish		|special already on stack
-fix_nan:
-	btstb	#signan_bit,ETEMP_HI(%a6) |test for snan
-	bne	finish
-	orl	#snaniop_mask,USER_FPSR(%a6) |always set snan if it is so
-	bra	finish
-try_zero:
-	movew	ETEMP_EX+2(%a6),%d0 |get word 4
-	andiw	#0x000f,%d0	|clear all but last ni(y)bble
-	tstw	%d0		|check for zero.
-	bne	not_spec
-	tstl	ETEMP_HI(%a6)	|check words 3 and 2
-	bne	not_spec
-	tstl	ETEMP_LO(%a6)	|check words 1 and 0
-	bne	not_spec
-	tstl	ETEMP(%a6)	|test sign of the zero
-	bges	pos_zero
-	movel	#0x80000000,ETEMP(%a6) |write neg zero to etemp
-	clrl	ETEMP_HI(%a6)
-	clrl	ETEMP_LO(%a6)
-	bra	finish
-pos_zero:
-	clrl	ETEMP(%a6)
-	clrl	ETEMP_HI(%a6)
-	clrl	ETEMP_LO(%a6)
-	bra	finish
-
-not_spec:
-	fmovemx %fp0-%fp1,-(%a7)	|save fp0 - decbin returns in it
-	bsr	decbin
-	fmovex %fp0,ETEMP(%a6)	|put the unpacked sop in the fsave stack
-	fmovemx (%a7)+,%fp0-%fp1
-	fmovel	#0,%FPSR		|clr fpsr from decbin
-	bra	finish
-
-|
-| Special handling for packed move in:  Same results as all other
-| packed cases, but we must set the FPSR condition codes properly.
-|
-move_unpack:
-	movew	ETEMP(%a6),%d0	|get word with inf information
-	bfextu	%d0{#20:#12},%d1	|get exponent into d1
-	cmpiw	#0x0fff,%d1	|test for inf or NaN
-	bnes	mtry_zero	|if not equal, it is not special
-	bfextu	%d0{#17:#3},%d1	|get SE and y bits into d1
-	cmpiw	#7,%d1		|SE and y bits must be on for special
-	bnes	mtry_zero	|if not on, it is not special
-|input is of the special cases of inf and NaN
-	tstl	ETEMP_HI(%a6)	|check ms mantissa
-	bnes	mfix_nan		|if non-zero, it is a NaN
-	tstl	ETEMP_LO(%a6)	|check ls mantissa
-	bnes	mfix_nan		|if non-zero, it is a NaN
-|input is inf
-	orl	#inf_mask,USER_FPSR(%a6) |set I bit
-	tstl	ETEMP(%a6)	|check sign
-	bge	finish
-	orl	#neg_mask,USER_FPSR(%a6) |set N bit
-	bra	finish		|special already on stack
-mfix_nan:
-	orl	#nan_mask,USER_FPSR(%a6) |set NaN bit
-	moveb	#nan_tag,STAG(%a6)	|set stag to NaN
-	btstb	#signan_bit,ETEMP_HI(%a6) |test for snan
-	bnes	mn_snan
-	orl	#snaniop_mask,USER_FPSR(%a6) |set snan bit
-	btstb	#snan_bit,FPCR_ENABLE(%a6) |test for snan enabled
-	bnes	mn_snan
-	bsetb	#signan_bit,ETEMP_HI(%a6) |force snans to qnans
-mn_snan:
-	tstl	ETEMP(%a6)	|check for sign
-	bge	finish		|if clr, go on
-	orl	#neg_mask,USER_FPSR(%a6) |set N bit
-	bra	finish
-
-mtry_zero:
-	movew	ETEMP_EX+2(%a6),%d0 |get word 4
-	andiw	#0x000f,%d0	|clear all but last ni(y)bble
-	tstw	%d0		|check for zero.
-	bnes	mnot_spec
-	tstl	ETEMP_HI(%a6)	|check words 3 and 2
-	bnes	mnot_spec
-	tstl	ETEMP_LO(%a6)	|check words 1 and 0
-	bnes	mnot_spec
-	tstl	ETEMP(%a6)	|test sign of the zero
-	bges	mpos_zero
-	orl	#neg_mask+z_mask,USER_FPSR(%a6) |set N and Z
-	movel	#0x80000000,ETEMP(%a6) |write neg zero to etemp
-	clrl	ETEMP_HI(%a6)
-	clrl	ETEMP_LO(%a6)
-	bras	finish
-mpos_zero:
-	orl	#z_mask,USER_FPSR(%a6) |set Z
-	clrl	ETEMP(%a6)
-	clrl	ETEMP_HI(%a6)
-	clrl	ETEMP_LO(%a6)
-	bras	finish
-
-mnot_spec:
-	fmovemx %fp0-%fp1,-(%a7)	|save fp0 ,fp1 - decbin returns in fp0
-	bsr	decbin
-	fmovex %fp0,ETEMP(%a6)
-|				;put the unpacked sop in the fsave stack
-	fmovemx (%a7)+,%fp0-%fp1
-
-finish:
-	movew	CMDREG1B(%a6),%d0	|get the command word
-	andw	#0xfbff,%d0	|change the source specifier field to
-|				;extended (was packed).
-	movew	%d0,CMDREG1B(%a6)	|write command word back to fsave stack
-|				;we need to do this so the 040 will
-|				;re-execute the inst. without taking
-|				;another packed trap.
-
-fix_stag:
-|Converted result is now in etemp on fsave stack, now set the source
-|tag (stag)
-|	if (ete =$7fff) then INF or NAN
-|		if (etemp = $x.0----0) then
-|			stag = INF
-|		else
-|			stag = NAN
-|	else
-|		if (ete = $0000) then
-|			stag = ZERO
-|		else
-|			stag = NORM
-|
-| Note also that the etemp_15 bit (just right of the stag) must
-| be set accordingly.
-|
-	movew		ETEMP_EX(%a6),%d1
-	andiw		#0x7fff,%d1   |strip sign
-	cmpw		#0x7fff,%d1
-	bnes		z_or_nrm
-	movel		ETEMP_HI(%a6),%d1
-	bnes		is_nan
-	movel		ETEMP_LO(%a6),%d1
-	bnes		is_nan
-is_inf:
-	moveb		#0x40,STAG(%a6)
-	movel		#0x40,%d0
-	rts
-is_nan:
-	moveb		#0x60,STAG(%a6)
-	movel		#0x60,%d0
-	rts
-z_or_nrm:
-	tstw		%d1
-	bnes		is_nrm
-is_zro:
-| For a zero, set etemp_15
-	moveb		#0x30,STAG(%a6)
-	movel		#0x20,%d0
-	rts
-is_nrm:
-| For a norm, check if the exp <= $3fff; if so, set etemp_15
-	cmpiw		#0x3fff,%d1
-	bles		set_bit15
-	moveb		#0,STAG(%a6)
-	bras		end_is_nrm
-set_bit15:
-	moveb		#0x10,STAG(%a6)
-end_is_nrm:
-	movel		#0,%d0
-end_fix:
-	rts
-
-end_get:
-	rts
-	|end
diff --git a/arch/m68k/fpsp040/kernel_ex.S b/arch/m68k/fpsp040/kernel_ex.S
deleted file mode 100644
index 45bcf3455d341e6ae9ebdbdd28e36caa76038f94..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/kernel_ex.S
+++ /dev/null
@@ -1,493 +0,0 @@
-|
-|	kernel_ex.sa 3.3 12/19/90
-|
-| This file contains routines to force exception status in the
-| fpu for exceptional cases detected or reported within the
-| transcendental functions.  Typically, the t_xx routine will
-| set the appropriate bits in the USER_FPSR word on the stack.
-| The bits are tested in gen_except.sa to determine if an exceptional
-| situation needs to be created on return from the FPSP.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-KERNEL_EX:    |idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section    8
-
-#include "fpsp.h"
-
-mns_inf:  .long 0xffff0000,0x00000000,0x00000000
-pls_inf:  .long 0x7fff0000,0x00000000,0x00000000
-nan:      .long 0x7fff0000,0xffffffff,0xffffffff
-huge:     .long 0x7ffe0000,0xffffffff,0xffffffff
-
-	|xref	  ovf_r_k
-	|xref	  unf_sub
-	|xref	  nrm_set
-
-	.global	  t_dz
-	.global      t_dz2
-	.global      t_operr
-	.global      t_unfl
-	.global      t_ovfl
-	.global      t_ovfl2
-	.global      t_inx2
-	.global	  t_frcinx
-	.global	  t_extdnrm
-	.global	  t_resdnrm
-	.global	  dst_nan
-	.global	  src_nan
-|
-|	DZ exception
-|
-|
-|	if dz trap disabled
-|		store properly signed inf (use sign of etemp) into fp0
-|		set FPSR exception status dz bit, condition code
-|		inf bit, and accrued dz bit
-|		return
-|		frestore the frame into the machine (done by unimp_hd)
-|
-|	else dz trap enabled
-|		set exception status bit & accrued bits in FPSR
-|		set flag to disable sto_res from corrupting fp register
-|		return
-|		frestore the frame into the machine (done by unimp_hd)
-|
-| t_dz2 is used by monadic functions such as flogn (from do_func).
-| t_dz is used by monadic functions such as satanh (from the
-| transcendental function).
-|
-t_dz2:
-	bsetb	#neg_bit,FPSR_CC(%a6)	|set neg bit in FPSR
-	fmovel	#0,%FPSR			|clr status bits (Z set)
-	btstb	#dz_bit,FPCR_ENABLE(%a6)	|test FPCR for dz exc enabled
-	bnes	dz_ena_end
-	bras	m_inf			|flogx always returns -inf
-t_dz:
-	fmovel	#0,%FPSR			|clr status bits (Z set)
-	btstb	#dz_bit,FPCR_ENABLE(%a6)	|test FPCR for dz exc enabled
-	bnes	dz_ena
-|
-|	dz disabled
-|
-	btstb	#sign_bit,ETEMP_EX(%a6)	|check sign for neg or pos
-	beqs	p_inf			|branch if pos sign
-
-m_inf:
-	fmovemx mns_inf,%fp0-%fp0		|load -inf
-	bsetb	#neg_bit,FPSR_CC(%a6)	|set neg bit in FPSR
-	bras	set_fpsr
-p_inf:
-	fmovemx pls_inf,%fp0-%fp0		|load +inf
-set_fpsr:
-	orl	#dzinf_mask,USER_FPSR(%a6) |set I,DZ,ADZ
-	rts
-|
-|	dz enabled
-|
-dz_ena:
-	btstb	#sign_bit,ETEMP_EX(%a6)	|check sign for neg or pos
-	beqs	dz_ena_end
-	bsetb	#neg_bit,FPSR_CC(%a6)	|set neg bit in FPSR
-dz_ena_end:
-	orl	#dzinf_mask,USER_FPSR(%a6) |set I,DZ,ADZ
-	st	STORE_FLG(%a6)
-	rts
-|
-|	OPERR exception
-|
-|	if (operr trap disabled)
-|		set FPSR exception status operr bit, condition code
-|		nan bit; Store default NAN into fp0
-|		frestore the frame into the machine (done by unimp_hd)
-|
-|	else (operr trap enabled)
-|		set FPSR exception status operr bit, accrued operr bit
-|		set flag to disable sto_res from corrupting fp register
-|		frestore the frame into the machine (done by unimp_hd)
-|
-t_operr:
-	orl	#opnan_mask,USER_FPSR(%a6) |set NaN, OPERR, AIOP
-
-	btstb	#operr_bit,FPCR_ENABLE(%a6) |test FPCR for operr enabled
-	bnes	op_ena
-
-	fmovemx nan,%fp0-%fp0		|load default nan
-	rts
-op_ena:
-	st	STORE_FLG(%a6)		|do not corrupt destination
-	rts
-
-|
-|	t_unfl --- UNFL exception
-|
-| This entry point is used by all routines requiring unfl, inex2,
-| aunfl, and ainex to be set on exit.
-|
-| On entry, a0 points to the exceptional operand.  The final exceptional
-| operand is built in FP_SCR1 and only the sign from the original operand
-| is used.
-|
-t_unfl:
-	clrl	FP_SCR1(%a6)		|set exceptional operand to zero
-	clrl	FP_SCR1+4(%a6)
-	clrl	FP_SCR1+8(%a6)
-	tstb	(%a0)			|extract sign from caller's exop
-	bpls	unfl_signok
-	bset	#sign_bit,FP_SCR1(%a6)
-unfl_signok:
-	leal	FP_SCR1(%a6),%a0
-	orl	#unfinx_mask,USER_FPSR(%a6)
-|					;set UNFL, INEX2, AUNFL, AINEX
-unfl_con:
-	btstb	#unfl_bit,FPCR_ENABLE(%a6)
-	beqs	unfl_dis
-
-unfl_ena:
-	bfclr	STAG(%a6){#5:#3}		|clear wbtm66,wbtm1,wbtm0
-	bsetb	#wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15
-	bsetb	#sticky_bit,STICKY(%a6)	|set sticky bit
-
-	bclrb	#E1,E_BYTE(%a6)
-
-unfl_dis:
-	bfextu	FPCR_MODE(%a6){#0:#2},%d0	|get round precision
-
-	bclrb	#sign_bit,LOCAL_EX(%a0)
-	sne	LOCAL_SGN(%a0)		|convert to internal ext format
-
-	bsr	unf_sub			|returns IEEE result at a0
-|					;and sets FPSR_CC accordingly
-
-	bfclr	LOCAL_SGN(%a0){#0:#8}	|convert back to IEEE ext format
-	beqs	unfl_fin
-
-	bsetb	#sign_bit,LOCAL_EX(%a0)
-	bsetb	#sign_bit,FP_SCR1(%a6)	|set sign bit of exc operand
-
-unfl_fin:
-	fmovemx (%a0),%fp0-%fp0		|store result in fp0
-	rts
-
-
-|
-|	t_ovfl2 --- OVFL exception (without inex2 returned)
-|
-| This entry is used by scale to force catastrophic overflow.  The
-| ovfl, aovfl, and ainex bits are set, but not the inex2 bit.
-|
-t_ovfl2:
-	orl	#ovfl_inx_mask,USER_FPSR(%a6)
-	movel	ETEMP(%a6),FP_SCR1(%a6)
-	movel	ETEMP_HI(%a6),FP_SCR1+4(%a6)
-	movel	ETEMP_LO(%a6),FP_SCR1+8(%a6)
-|
-| Check for single or double round precision.  If single, check if
-| the lower 40 bits of ETEMP are zero; if not, set inex2.  If double,
-| check if the lower 21 bits are zero; if not, set inex2.
-|
-	moveb	FPCR_MODE(%a6),%d0
-	andib	#0xc0,%d0
-	beq	t_work		|if extended, finish ovfl processing
-	cmpib	#0x40,%d0		|test for single
-	bnes	t_dbl
-t_sgl:
-	tstb	ETEMP_LO(%a6)
-	bnes	t_setinx2
-	movel	ETEMP_HI(%a6),%d0
-	andil	#0xff,%d0		|look at only lower 8 bits
-	bnes	t_setinx2
-	bra	t_work
-t_dbl:
-	movel	ETEMP_LO(%a6),%d0
-	andil	#0x7ff,%d0	|look at only lower 11 bits
-	beq	t_work
-t_setinx2:
-	orl	#inex2_mask,USER_FPSR(%a6)
-	bras	t_work
-|
-|	t_ovfl --- OVFL exception
-|
-|** Note: the exc operand is returned in ETEMP.
-|
-t_ovfl:
-	orl	#ovfinx_mask,USER_FPSR(%a6)
-t_work:
-	btstb	#ovfl_bit,FPCR_ENABLE(%a6) |test FPCR for ovfl enabled
-	beqs	ovf_dis
-
-ovf_ena:
-	clrl	FP_SCR1(%a6)		|set exceptional operand
-	clrl	FP_SCR1+4(%a6)
-	clrl	FP_SCR1+8(%a6)
-
-	bfclr	STAG(%a6){#5:#3}		|clear wbtm66,wbtm1,wbtm0
-	bclrb	#wbtemp15_bit,WB_BYTE(%a6) |clear wbtemp15
-	bsetb	#sticky_bit,STICKY(%a6)	|set sticky bit
-
-	bclrb	#E1,E_BYTE(%a6)
-|					;fall through to disabled case
-
-| For disabled overflow call 'ovf_r_k'.  This routine loads the
-| correct result based on the rounding precision, destination
-| format, rounding mode and sign.
-|
-ovf_dis:
-	bsr	ovf_r_k			|returns unsigned ETEMP_EX
-|					;and sets FPSR_CC accordingly.
-	bfclr	ETEMP_SGN(%a6){#0:#8}	|fix sign
-	beqs	ovf_pos
-	bsetb	#sign_bit,ETEMP_EX(%a6)
-	bsetb	#sign_bit,FP_SCR1(%a6)	|set exceptional operand sign
-ovf_pos:
-	fmovemx ETEMP(%a6),%fp0-%fp0		|move the result to fp0
-	rts
-
-
-|
-|	INEX2 exception
-|
-| The inex2 and ainex bits are set.
-|
-t_inx2:
-	orl	#inx2a_mask,USER_FPSR(%a6) |set INEX2, AINEX
-	rts
-
-|
-|	Force Inex2
-|
-| This routine is called by the transcendental routines to force
-| the inex2 exception bits set in the FPSR.  If the underflow bit
-| is set, but the underflow trap was not taken, the aunfl bit in
-| the FPSR must be set.
-|
-t_frcinx:
-	orl	#inx2a_mask,USER_FPSR(%a6) |set INEX2, AINEX
-	btstb	#unfl_bit,FPSR_EXCEPT(%a6) |test for unfl bit set
-	beqs	no_uacc1		|if clear, do not set aunfl
-	bsetb	#aunfl_bit,FPSR_AEXCEPT(%a6)
-no_uacc1:
-	rts
-
-|
-|	DST_NAN
-|
-| Determine if the destination nan is signalling or non-signalling,
-| and set the FPSR bits accordingly.  See the MC68040 User's Manual
-| section 3.2.2.5 NOT-A-NUMBERS.
-|
-dst_nan:
-	btstb	#sign_bit,FPTEMP_EX(%a6) |test sign of nan
-	beqs	dst_pos			|if clr, it was positive
-	bsetb	#neg_bit,FPSR_CC(%a6)	|set N bit
-dst_pos:
-	btstb	#signan_bit,FPTEMP_HI(%a6) |check if signalling
-	beqs	dst_snan		|branch if signalling
-
-	fmovel	%d1,%fpcr			|restore user's rmode/prec
-	fmovex FPTEMP(%a6),%fp0		|return the non-signalling nan
-|
-| Check the source nan.  If it is signalling, snan will be reported.
-|
-	moveb	STAG(%a6),%d0
-	andib	#0xe0,%d0
-	cmpib	#0x60,%d0
-	bnes	no_snan
-	btstb	#signan_bit,ETEMP_HI(%a6) |check if signalling
-	bnes	no_snan
-	orl	#snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
-no_snan:
-	rts
-
-dst_snan:
-	btstb	#snan_bit,FPCR_ENABLE(%a6) |check if trap enabled
-	beqs	dst_dis			|branch if disabled
-
-	orb	#nan_tag,DTAG(%a6)	|set up dtag for nan
-	st	STORE_FLG(%a6)		|do not store a result
-	orl	#snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
-	rts
-
-dst_dis:
-	bsetb	#signan_bit,FPTEMP_HI(%a6) |set SNAN bit in sop
-	fmovel	%d1,%fpcr			|restore user's rmode/prec
-	fmovex FPTEMP(%a6),%fp0		|load non-sign. nan
-	orl	#snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
-	rts
-
-|
-|	SRC_NAN
-|
-| Determine if the source nan is signalling or non-signalling,
-| and set the FPSR bits accordingly.  See the MC68040 User's Manual
-| section 3.2.2.5 NOT-A-NUMBERS.
-|
-src_nan:
-	btstb	#sign_bit,ETEMP_EX(%a6) |test sign of nan
-	beqs	src_pos			|if clr, it was positive
-	bsetb	#neg_bit,FPSR_CC(%a6)	|set N bit
-src_pos:
-	btstb	#signan_bit,ETEMP_HI(%a6) |check if signalling
-	beqs	src_snan		|branch if signalling
-	fmovel	%d1,%fpcr			|restore user's rmode/prec
-	fmovex ETEMP(%a6),%fp0		|return the non-signalling nan
-	rts
-
-src_snan:
-	btstb	#snan_bit,FPCR_ENABLE(%a6) |check if trap enabled
-	beqs	src_dis			|branch if disabled
-	bsetb	#signan_bit,ETEMP_HI(%a6) |set SNAN bit in sop
-	orb	#norm_tag,DTAG(%a6)	|set up dtag for norm
-	orb	#nan_tag,STAG(%a6)	|set up stag for nan
-	st	STORE_FLG(%a6)		|do not store a result
-	orl	#snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
-	rts
-
-src_dis:
-	bsetb	#signan_bit,ETEMP_HI(%a6) |set SNAN bit in sop
-	fmovel	%d1,%fpcr			|restore user's rmode/prec
-	fmovex ETEMP(%a6),%fp0		|load non-sign. nan
-	orl	#snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
-	rts
-
-|
-| For all functions that have a denormalized input and that f(x)=x,
-| this is the entry point
-|
-t_extdnrm:
-	orl	#unfinx_mask,USER_FPSR(%a6)
-|					;set UNFL, INEX2, AUNFL, AINEX
-	bras	xdnrm_con
-|
-| Entry point for scale with extended denorm.  The function does
-| not set inex2, aunfl, or ainex.
-|
-t_resdnrm:
-	orl	#unfl_mask,USER_FPSR(%a6)
-
-xdnrm_con:
-	btstb	#unfl_bit,FPCR_ENABLE(%a6)
-	beqs	xdnrm_dis
-
-|
-| If exceptions are enabled, the additional task of setting up WBTEMP
-| is needed so that when the underflow exception handler is entered,
-| the user perceives no difference between what the 040 provides vs.
-| what the FPSP provides.
-|
-xdnrm_ena:
-	movel	%a0,-(%a7)
-
-	movel	LOCAL_EX(%a0),FP_SCR1(%a6)
-	movel	LOCAL_HI(%a0),FP_SCR1+4(%a6)
-	movel	LOCAL_LO(%a0),FP_SCR1+8(%a6)
-
-	lea	FP_SCR1(%a6),%a0
-
-	bclrb	#sign_bit,LOCAL_EX(%a0)
-	sne	LOCAL_SGN(%a0)		|convert to internal ext format
-	tstw	LOCAL_EX(%a0)		|check if input is denorm
-	beqs	xdnrm_dn		|if so, skip nrm_set
-	bsr	nrm_set			|normalize the result (exponent
-|					;will be negative
-xdnrm_dn:
-	bclrb	#sign_bit,LOCAL_EX(%a0)	|take off false sign
-	bfclr	LOCAL_SGN(%a0){#0:#8}	|change back to IEEE ext format
-	beqs	xdep
-	bsetb	#sign_bit,LOCAL_EX(%a0)
-xdep:
-	bfclr	STAG(%a6){#5:#3}		|clear wbtm66,wbtm1,wbtm0
-	bsetb	#wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15
-	bclrb	#sticky_bit,STICKY(%a6)	|clear sticky bit
-	bclrb	#E1,E_BYTE(%a6)
-	movel	(%a7)+,%a0
-xdnrm_dis:
-	bfextu	FPCR_MODE(%a6){#0:#2},%d0	|get round precision
-	bnes	not_ext			|if not round extended, store
-|					;IEEE defaults
-is_ext:
-	btstb	#sign_bit,LOCAL_EX(%a0)
-	beqs	xdnrm_store
-
-	bsetb	#neg_bit,FPSR_CC(%a6)	|set N bit in FPSR_CC
-
-	bras	xdnrm_store
-
-not_ext:
-	bclrb	#sign_bit,LOCAL_EX(%a0)
-	sne	LOCAL_SGN(%a0)		|convert to internal ext format
-	bsr	unf_sub			|returns IEEE result pointed by
-|					;a0; sets FPSR_CC accordingly
-	bfclr	LOCAL_SGN(%a0){#0:#8}	|convert back to IEEE ext format
-	beqs	xdnrm_store
-	bsetb	#sign_bit,LOCAL_EX(%a0)
-xdnrm_store:
-	fmovemx (%a0),%fp0-%fp0		|store result in fp0
-	rts
-
-|
-| This subroutine is used for dyadic operations that use an extended
-| denorm within the kernel. The approach used is to capture the frame,
-| fix/restore.
-|
-	.global	t_avoid_unsupp
-t_avoid_unsupp:
-	link	%a2,#-LOCAL_SIZE		|so that a2 fpsp.h negative
-|					;offsets may be used
-	fsave	-(%a7)
-	tstb	1(%a7)			|check if idle, exit if so
-	beq	idle_end
-	btstb	#E1,E_BYTE(%a2)		|check for an E1 exception if
-|					;enabled, there is an unsupp
-	beq	end_avun		|else, exit
-	btstb	#7,DTAG(%a2)		|check for denorm destination
-	beqs	src_den			|else, must be a source denorm
-|
-| handle destination denorm
-|
-	lea	FPTEMP(%a2),%a0
-	btstb	#sign_bit,LOCAL_EX(%a0)
-	sne	LOCAL_SGN(%a0)		|convert to internal ext format
-	bclrb	#7,DTAG(%a2)		|set DTAG to norm
-	bsr	nrm_set			|normalize result, exponent
-|					;will become negative
-	bclrb	#sign_bit,LOCAL_EX(%a0)	|get rid of fake sign
-	bfclr	LOCAL_SGN(%a0){#0:#8}	|convert back to IEEE ext format
-	beqs	ck_src_den		|check if source is also denorm
-	bsetb	#sign_bit,LOCAL_EX(%a0)
-ck_src_den:
-	btstb	#7,STAG(%a2)
-	beqs	end_avun
-src_den:
-	lea	ETEMP(%a2),%a0
-	btstb	#sign_bit,LOCAL_EX(%a0)
-	sne	LOCAL_SGN(%a0)		|convert to internal ext format
-	bclrb	#7,STAG(%a2)		|set STAG to norm
-	bsr	nrm_set			|normalize result, exponent
-|					;will become negative
-	bclrb	#sign_bit,LOCAL_EX(%a0)	|get rid of fake sign
-	bfclr	LOCAL_SGN(%a0){#0:#8}	|convert back to IEEE ext format
-	beqs	den_com
-	bsetb	#sign_bit,LOCAL_EX(%a0)
-den_com:
-	moveb	#0xfe,CU_SAVEPC(%a2)	|set continue frame
-	clrw	NMNEXC(%a2)		|clear NMNEXC
-	bclrb	#E1,E_BYTE(%a2)
-|	fmove.l	%FPSR,FPSR_SHADOW(%a2)
-|	bset.b	#SFLAG,E_BYTE(%a2)
-|	bset.b	#XFLAG,T_BYTE(%a2)
-end_avun:
-	frestore (%a7)+
-	unlk	%a2
-	rts
-idle_end:
-	addl	#4,%a7
-	unlk	%a2
-	rts
-	|end
diff --git a/arch/m68k/fpsp040/res_func.S b/arch/m68k/fpsp040/res_func.S
deleted file mode 100644
index d9cdf4383545b7aa21dd8b5d6976f38890611e68..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/res_func.S
+++ /dev/null
@@ -1,2039 +0,0 @@
-|
-|	res_func.sa 3.9 7/29/91
-|
-| Normalizes denormalized numbers if necessary and updates the
-| stack frame.  The function is then restored back into the
-| machine and the 040 completes the operation.  This routine
-| is only used by the unsupported data type/format handler.
-| (Exception vector 55).
-|
-| For packed move out (fmove.p fpm,<ea>) the operation is
-| completed here; data is packed and moved to user memory.
-| The stack is restored to the 040 only in the case of a
-| reportable exception in the conversion.
-|
-|
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-RES_FUNC:    |idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-sp_bnds:	.short	0x3f81,0x407e
-		.short	0x3f6a,0x0000
-dp_bnds:	.short	0x3c01,0x43fe
-		.short	0x3bcd,0x0000
-
-	|xref	mem_write
-	|xref	bindec
-	|xref	get_fline
-	|xref	round
-	|xref	denorm
-	|xref	dest_ext
-	|xref	dest_dbl
-	|xref	dest_sgl
-	|xref	unf_sub
-	|xref	nrm_set
-	|xref	dnrm_lp
-	|xref	ovf_res
-	|xref	reg_dest
-	|xref	t_ovfl
-	|xref	t_unfl
-
-	.global	res_func
-	.global	p_move
-
-res_func:
-	clrb	DNRM_FLG(%a6)
-	clrb	RES_FLG(%a6)
-	clrb	CU_ONLY(%a6)
-	tstb	DY_MO_FLG(%a6)
-	beqs	monadic
-dyadic:
-	btstb	#7,DTAG(%a6)	|if dop = norm=000, zero=001,
-|				;inf=010 or nan=011
-	beqs	monadic		|then branch
-|				;else denorm
-| HANDLE DESTINATION DENORM HERE
-|				;set dtag to norm
-|				;write the tag & fpte15 to the fstack
-	leal	FPTEMP(%a6),%a0
-
-	bclrb	#sign_bit,LOCAL_EX(%a0)
-	sne	LOCAL_SGN(%a0)
-
-	bsr	nrm_set		|normalize number (exp will go negative)
-	bclrb	#sign_bit,LOCAL_EX(%a0) |get rid of false sign
-	bfclr	LOCAL_SGN(%a0){#0:#8}	|change back to IEEE ext format
-	beqs	dpos
-	bsetb	#sign_bit,LOCAL_EX(%a0)
-dpos:
-	bfclr	DTAG(%a6){#0:#4}	|set tag to normalized, FPTE15 = 0
-	bsetb	#4,DTAG(%a6)	|set FPTE15
-	orb	#0x0f,DNRM_FLG(%a6)
-monadic:
-	leal	ETEMP(%a6),%a0
-	btstb	#direction_bit,CMDREG1B(%a6)	|check direction
-	bne	opclass3			|it is a mv out
-|
-| At this point, only opclass 0 and 2 possible
-|
-	btstb	#7,STAG(%a6)	|if sop = norm=000, zero=001,
-|				;inf=010 or nan=011
-	bne	mon_dnrm	|else denorm
-	tstb	DY_MO_FLG(%a6)	|all cases of dyadic instructions would
-	bne	normal		|require normalization of denorm
-
-| At this point:
-|	monadic instructions:	fabs  = $18  fneg   = $1a  ftst   = $3a
-|				fmove = $00  fsmove = $40  fdmove = $44
-|				fsqrt = $05* fssqrt = $41  fdsqrt = $45
-|				(*fsqrt reencoded to $05)
-|
-	movew	CMDREG1B(%a6),%d0	|get command register
-	andil	#0x7f,%d0			|strip to only command word
-|
-| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
-| fdsqrt are possible.
-| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
-| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
-|
-	btstl	#0,%d0
-	bne	normal			|weed out fsqrt instructions
-|
-| cu_norm handles fmove in instructions with normalized inputs.
-| The routine round is used to correctly round the input for the
-| destination precision and mode.
-|
-cu_norm:
-	st	CU_ONLY(%a6)		|set cu-only inst flag
-	movew	CMDREG1B(%a6),%d0
-	andib	#0x3b,%d0		|isolate bits to select inst
-	tstb	%d0
-	beql	cu_nmove	|if zero, it is an fmove
-	cmpib	#0x18,%d0
-	beql	cu_nabs		|if $18, it is fabs
-	cmpib	#0x1a,%d0
-	beql	cu_nneg		|if $1a, it is fneg
-|
-| Inst is ftst.  Check the source operand and set the cc's accordingly.
-| No write is done, so simply rts.
-|
-cu_ntst:
-	movew	LOCAL_EX(%a0),%d0
-	bclrl	#15,%d0
-	sne	LOCAL_SGN(%a0)
-	beqs	cu_ntpo
-	orl	#neg_mask,USER_FPSR(%a6) |set N
-cu_ntpo:
-	cmpiw	#0x7fff,%d0	|test for inf/nan
-	bnes	cu_ntcz
-	tstl	LOCAL_HI(%a0)
-	bnes	cu_ntn
-	tstl	LOCAL_LO(%a0)
-	bnes	cu_ntn
-	orl	#inf_mask,USER_FPSR(%a6)
-	rts
-cu_ntn:
-	orl	#nan_mask,USER_FPSR(%a6)
-	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	|set up fptemp sign for
-|						;snan handler
-
-	rts
-cu_ntcz:
-	tstl	LOCAL_HI(%a0)
-	bnel	cu_ntsx
-	tstl	LOCAL_LO(%a0)
-	bnel	cu_ntsx
-	orl	#z_mask,USER_FPSR(%a6)
-cu_ntsx:
-	rts
-|
-| Inst is fabs.  Execute the absolute value function on the input.
-| Branch to the fmove code.  If the operand is NaN, do nothing.
-|
-cu_nabs:
-	moveb	STAG(%a6),%d0
-	btstl	#5,%d0			|test for NaN or zero
-	bne	wr_etemp		|if either, simply write it
-	bclrb	#7,LOCAL_EX(%a0)		|do abs
-	bras	cu_nmove		|fmove code will finish
-|
-| Inst is fneg.  Execute the negate value function on the input.
-| Fall though to the fmove code.  If the operand is NaN, do nothing.
-|
-cu_nneg:
-	moveb	STAG(%a6),%d0
-	btstl	#5,%d0			|test for NaN or zero
-	bne	wr_etemp		|if either, simply write it
-	bchgb	#7,LOCAL_EX(%a0)		|do neg
-|
-| Inst is fmove.  This code also handles all result writes.
-| If bit 2 is set, round is forced to double.  If it is clear,
-| and bit 6 is set, round is forced to single.  If both are clear,
-| the round precision is found in the fpcr.  If the rounding precision
-| is double or single, round the result before the write.
-|
-cu_nmove:
-	moveb	STAG(%a6),%d0
-	andib	#0xe0,%d0			|isolate stag bits
-	bne	wr_etemp		|if not norm, simply write it
-	btstb	#2,CMDREG1B+1(%a6)	|check for rd
-	bne	cu_nmrd
-	btstb	#6,CMDREG1B+1(%a6)	|check for rs
-	bne	cu_nmrs
-|
-| The move or operation is not with forced precision.  Test for
-| nan or inf as the input; if so, simply write it to FPn.  Use the
-| FPCR_MODE byte to get rounding on norms and zeros.
-|
-cu_nmnr:
-	bfextu	FPCR_MODE(%a6){#0:#2},%d0
-	tstb	%d0			|check for extended
-	beq	cu_wrexn		|if so, just write result
-	cmpib	#1,%d0			|check for single
-	beq	cu_nmrs			|fall through to double
-|
-| The move is fdmove or round precision is double.
-|
-cu_nmrd:
-	movel	#2,%d0			|set up the size for denorm
-	movew	LOCAL_EX(%a0),%d1		|compare exponent to double threshold
-	andw	#0x7fff,%d1
-	cmpw	#0x3c01,%d1
-	bls	cu_nunfl
-	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|get rmode
-	orl	#0x00020000,%d1		|or in rprec (double)
-	clrl	%d0			|clear g,r,s for round
-	bclrb	#sign_bit,LOCAL_EX(%a0)	|convert to internal format
-	sne	LOCAL_SGN(%a0)
-	bsrl	round
-	bfclr	LOCAL_SGN(%a0){#0:#8}
-	beqs	cu_nmrdc
-	bsetb	#sign_bit,LOCAL_EX(%a0)
-cu_nmrdc:
-	movew	LOCAL_EX(%a0),%d1		|check for overflow
-	andw	#0x7fff,%d1
-	cmpw	#0x43ff,%d1
-	bge	cu_novfl		|take care of overflow case
-	bra	cu_wrexn
-|
-| The move is fsmove or round precision is single.
-|
-cu_nmrs:
-	movel	#1,%d0
-	movew	LOCAL_EX(%a0),%d1
-	andw	#0x7fff,%d1
-	cmpw	#0x3f81,%d1
-	bls	cu_nunfl
-	bfextu	FPCR_MODE(%a6){#2:#2},%d1
-	orl	#0x00010000,%d1
-	clrl	%d0
-	bclrb	#sign_bit,LOCAL_EX(%a0)
-	sne	LOCAL_SGN(%a0)
-	bsrl	round
-	bfclr	LOCAL_SGN(%a0){#0:#8}
-	beqs	cu_nmrsc
-	bsetb	#sign_bit,LOCAL_EX(%a0)
-cu_nmrsc:
-	movew	LOCAL_EX(%a0),%d1
-	andw	#0x7FFF,%d1
-	cmpw	#0x407f,%d1
-	blt	cu_wrexn
-|
-| The operand is above precision boundaries.  Use t_ovfl to
-| generate the correct value.
-|
-cu_novfl:
-	bsr	t_ovfl
-	bra	cu_wrexn
-|
-| The operand is below precision boundaries.  Use denorm to
-| generate the correct value.
-|
-cu_nunfl:
-	bclrb	#sign_bit,LOCAL_EX(%a0)
-	sne	LOCAL_SGN(%a0)
-	bsr	denorm
-	bfclr	LOCAL_SGN(%a0){#0:#8}	|change back to IEEE ext format
-	beqs	cu_nucont
-	bsetb	#sign_bit,LOCAL_EX(%a0)
-cu_nucont:
-	bfextu	FPCR_MODE(%a6){#2:#2},%d1
-	btstb	#2,CMDREG1B+1(%a6)	|check for rd
-	bne	inst_d
-	btstb	#6,CMDREG1B+1(%a6)	|check for rs
-	bne	inst_s
-	swap	%d1
-	moveb	FPCR_MODE(%a6),%d1
-	lsrb	#6,%d1
-	swap	%d1
-	bra	inst_sd
-inst_d:
-	orl	#0x00020000,%d1
-	bra	inst_sd
-inst_s:
-	orl	#0x00010000,%d1
-inst_sd:
-	bclrb	#sign_bit,LOCAL_EX(%a0)
-	sne	LOCAL_SGN(%a0)
-	bsrl	round
-	bfclr	LOCAL_SGN(%a0){#0:#8}
-	beqs	cu_nuflp
-	bsetb	#sign_bit,LOCAL_EX(%a0)
-cu_nuflp:
-	btstb	#inex2_bit,FPSR_EXCEPT(%a6)
-	beqs	cu_nuninx
-	orl	#aunfl_mask,USER_FPSR(%a6) |if the round was inex, set AUNFL
-cu_nuninx:
-	tstl	LOCAL_HI(%a0)		|test for zero
-	bnes	cu_nunzro
-	tstl	LOCAL_LO(%a0)
-	bnes	cu_nunzro
-|
-| The mantissa is zero from the denorm loop.  Check sign and rmode
-| to see if rounding should have occurred which would leave the lsb.
-|
-	movel	USER_FPCR(%a6),%d0
-	andil	#0x30,%d0		|isolate rmode
-	cmpil	#0x20,%d0
-	blts	cu_nzro
-	bnes	cu_nrp
-cu_nrm:
-	tstw	LOCAL_EX(%a0)	|if positive, set lsb
-	bges	cu_nzro
-	btstb	#7,FPCR_MODE(%a6) |check for double
-	beqs	cu_nincs
-	bras	cu_nincd
-cu_nrp:
-	tstw	LOCAL_EX(%a0)	|if positive, set lsb
-	blts	cu_nzro
-	btstb	#7,FPCR_MODE(%a6) |check for double
-	beqs	cu_nincs
-cu_nincd:
-	orl	#0x800,LOCAL_LO(%a0) |inc for double
-	bra	cu_nunzro
-cu_nincs:
-	orl	#0x100,LOCAL_HI(%a0) |inc for single
-	bra	cu_nunzro
-cu_nzro:
-	orl	#z_mask,USER_FPSR(%a6)
-	moveb	STAG(%a6),%d0
-	andib	#0xe0,%d0
-	cmpib	#0x40,%d0		|check if input was tagged zero
-	beqs	cu_numv
-cu_nunzro:
-	orl	#unfl_mask,USER_FPSR(%a6) |set unfl
-cu_numv:
-	movel	(%a0),ETEMP(%a6)
-	movel	4(%a0),ETEMP_HI(%a6)
-	movel	8(%a0),ETEMP_LO(%a6)
-|
-| Write the result to memory, setting the fpsr cc bits.  NaN and Inf
-| bypass cu_wrexn.
-|
-cu_wrexn:
-	tstw	LOCAL_EX(%a0)		|test for zero
-	beqs	cu_wrzero
-	cmpw	#0x8000,LOCAL_EX(%a0)	|test for zero
-	bnes	cu_wreon
-cu_wrzero:
-	orl	#z_mask,USER_FPSR(%a6)	|set Z bit
-cu_wreon:
-	tstw	LOCAL_EX(%a0)
-	bpl	wr_etemp
-	orl	#neg_mask,USER_FPSR(%a6)
-	bra	wr_etemp
-
-|
-| HANDLE SOURCE DENORM HERE
-|
-|				;clear denorm stag to norm
-|				;write the new tag & ete15 to the fstack
-mon_dnrm:
-|
-| At this point, check for the cases in which normalizing the
-| denorm produces incorrect results.
-|
-	tstb	DY_MO_FLG(%a6)	|all cases of dyadic instructions would
-	bnes	nrm_src		|require normalization of denorm
-
-| At this point:
-|	monadic instructions:	fabs  = $18  fneg   = $1a  ftst   = $3a
-|				fmove = $00  fsmove = $40  fdmove = $44
-|				fsqrt = $05* fssqrt = $41  fdsqrt = $45
-|				(*fsqrt reencoded to $05)
-|
-	movew	CMDREG1B(%a6),%d0	|get command register
-	andil	#0x7f,%d0			|strip to only command word
-|
-| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
-| fdsqrt are possible.
-| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
-| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
-|
-	btstl	#0,%d0
-	bnes	nrm_src		|weed out fsqrt instructions
-	st	CU_ONLY(%a6)	|set cu-only inst flag
-	bra	cu_dnrm		|fmove, fabs, fneg, ftst
-|				;cases go to cu_dnrm
-nrm_src:
-	bclrb	#sign_bit,LOCAL_EX(%a0)
-	sne	LOCAL_SGN(%a0)
-	bsr	nrm_set		|normalize number (exponent will go
-|				; negative)
-	bclrb	#sign_bit,LOCAL_EX(%a0) |get rid of false sign
-
-	bfclr	LOCAL_SGN(%a0){#0:#8}	|change back to IEEE ext format
-	beqs	spos
-	bsetb	#sign_bit,LOCAL_EX(%a0)
-spos:
-	bfclr	STAG(%a6){#0:#4}	|set tag to normalized, FPTE15 = 0
-	bsetb	#4,STAG(%a6)	|set ETE15
-	orb	#0xf0,DNRM_FLG(%a6)
-normal:
-	tstb	DNRM_FLG(%a6)	|check if any of the ops were denorms
-	bne	ck_wrap		|if so, check if it is a potential
-|				;wrap-around case
-fix_stk:
-	moveb	#0xfe,CU_SAVEPC(%a6)
-	bclrb	#E1,E_BYTE(%a6)
-
-	clrw	NMNEXC(%a6)
-
-	st	RES_FLG(%a6)	|indicate that a restore is needed
-	rts
-
-|
-| cu_dnrm handles all cu-only instructions (fmove, fabs, fneg, and
-| ftst) completely in software without an frestore to the 040.
-|
-cu_dnrm:
-	st	CU_ONLY(%a6)
-	movew	CMDREG1B(%a6),%d0
-	andib	#0x3b,%d0		|isolate bits to select inst
-	tstb	%d0
-	beql	cu_dmove	|if zero, it is an fmove
-	cmpib	#0x18,%d0
-	beql	cu_dabs		|if $18, it is fabs
-	cmpib	#0x1a,%d0
-	beql	cu_dneg		|if $1a, it is fneg
-|
-| Inst is ftst.  Check the source operand and set the cc's accordingly.
-| No write is done, so simply rts.
-|
-cu_dtst:
-	movew	LOCAL_EX(%a0),%d0
-	bclrl	#15,%d0
-	sne	LOCAL_SGN(%a0)
-	beqs	cu_dtpo
-	orl	#neg_mask,USER_FPSR(%a6) |set N
-cu_dtpo:
-	cmpiw	#0x7fff,%d0	|test for inf/nan
-	bnes	cu_dtcz
-	tstl	LOCAL_HI(%a0)
-	bnes	cu_dtn
-	tstl	LOCAL_LO(%a0)
-	bnes	cu_dtn
-	orl	#inf_mask,USER_FPSR(%a6)
-	rts
-cu_dtn:
-	orl	#nan_mask,USER_FPSR(%a6)
-	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	|set up fptemp sign for
-|						;snan handler
-	rts
-cu_dtcz:
-	tstl	LOCAL_HI(%a0)
-	bnel	cu_dtsx
-	tstl	LOCAL_LO(%a0)
-	bnel	cu_dtsx
-	orl	#z_mask,USER_FPSR(%a6)
-cu_dtsx:
-	rts
-|
-| Inst is fabs.  Execute the absolute value function on the input.
-| Branch to the fmove code.
-|
-cu_dabs:
-	bclrb	#7,LOCAL_EX(%a0)		|do abs
-	bras	cu_dmove		|fmove code will finish
-|
-| Inst is fneg.  Execute the negate value function on the input.
-| Fall though to the fmove code.
-|
-cu_dneg:
-	bchgb	#7,LOCAL_EX(%a0)		|do neg
-|
-| Inst is fmove.  This code also handles all result writes.
-| If bit 2 is set, round is forced to double.  If it is clear,
-| and bit 6 is set, round is forced to single.  If both are clear,
-| the round precision is found in the fpcr.  If the rounding precision
-| is double or single, the result is zero, and the mode is checked
-| to determine if the lsb of the result should be set.
-|
-cu_dmove:
-	btstb	#2,CMDREG1B+1(%a6)	|check for rd
-	bne	cu_dmrd
-	btstb	#6,CMDREG1B+1(%a6)	|check for rs
-	bne	cu_dmrs
-|
-| The move or operation is not with forced precision.  Use the
-| FPCR_MODE byte to get rounding.
-|
-cu_dmnr:
-	bfextu	FPCR_MODE(%a6){#0:#2},%d0
-	tstb	%d0			|check for extended
-	beq	cu_wrexd		|if so, just write result
-	cmpib	#1,%d0			|check for single
-	beq	cu_dmrs			|fall through to double
-|
-| The move is fdmove or round precision is double.  Result is zero.
-| Check rmode for rp or rm and set lsb accordingly.
-|
-cu_dmrd:
-	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|get rmode
-	tstw	LOCAL_EX(%a0)		|check sign
-	blts	cu_dmdn
-	cmpib	#3,%d1			|check for rp
-	bne	cu_dpd			|load double pos zero
-	bra	cu_dpdr			|load double pos zero w/lsb
-cu_dmdn:
-	cmpib	#2,%d1			|check for rm
-	bne	cu_dnd			|load double neg zero
-	bra	cu_dndr			|load double neg zero w/lsb
-|
-| The move is fsmove or round precision is single.  Result is zero.
-| Check for rp or rm and set lsb accordingly.
-|
-cu_dmrs:
-	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|get rmode
-	tstw	LOCAL_EX(%a0)		|check sign
-	blts	cu_dmsn
-	cmpib	#3,%d1			|check for rp
-	bne	cu_spd			|load single pos zero
-	bra	cu_spdr			|load single pos zero w/lsb
-cu_dmsn:
-	cmpib	#2,%d1			|check for rm
-	bne	cu_snd			|load single neg zero
-	bra	cu_sndr			|load single neg zero w/lsb
-|
-| The precision is extended, so the result in etemp is correct.
-| Simply set unfl (not inex2 or aunfl) and write the result to
-| the correct fp register.
-cu_wrexd:
-	orl	#unfl_mask,USER_FPSR(%a6)
-	tstw	LOCAL_EX(%a0)
-	beq	wr_etemp
-	orl	#neg_mask,USER_FPSR(%a6)
-	bra	wr_etemp
-|
-| These routines write +/- zero in double format.  The routines
-| cu_dpdr and cu_dndr set the double lsb.
-|
-cu_dpd:
-	movel	#0x3c010000,LOCAL_EX(%a0)	|force pos double zero
-	clrl	LOCAL_HI(%a0)
-	clrl	LOCAL_LO(%a0)
-	orl	#z_mask,USER_FPSR(%a6)
-	orl	#unfinx_mask,USER_FPSR(%a6)
-	bra	wr_etemp
-cu_dpdr:
-	movel	#0x3c010000,LOCAL_EX(%a0)	|force pos double zero
-	clrl	LOCAL_HI(%a0)
-	movel	#0x800,LOCAL_LO(%a0)	|with lsb set
-	orl	#unfinx_mask,USER_FPSR(%a6)
-	bra	wr_etemp
-cu_dnd:
-	movel	#0xbc010000,LOCAL_EX(%a0)	|force pos double zero
-	clrl	LOCAL_HI(%a0)
-	clrl	LOCAL_LO(%a0)
-	orl	#z_mask,USER_FPSR(%a6)
-	orl	#neg_mask,USER_FPSR(%a6)
-	orl	#unfinx_mask,USER_FPSR(%a6)
-	bra	wr_etemp
-cu_dndr:
-	movel	#0xbc010000,LOCAL_EX(%a0)	|force pos double zero
-	clrl	LOCAL_HI(%a0)
-	movel	#0x800,LOCAL_LO(%a0)	|with lsb set
-	orl	#neg_mask,USER_FPSR(%a6)
-	orl	#unfinx_mask,USER_FPSR(%a6)
-	bra	wr_etemp
-|
-| These routines write +/- zero in single format.  The routines
-| cu_dpdr and cu_dndr set the single lsb.
-|
-cu_spd:
-	movel	#0x3f810000,LOCAL_EX(%a0)	|force pos single zero
-	clrl	LOCAL_HI(%a0)
-	clrl	LOCAL_LO(%a0)
-	orl	#z_mask,USER_FPSR(%a6)
-	orl	#unfinx_mask,USER_FPSR(%a6)
-	bra	wr_etemp
-cu_spdr:
-	movel	#0x3f810000,LOCAL_EX(%a0)	|force pos single zero
-	movel	#0x100,LOCAL_HI(%a0)	|with lsb set
-	clrl	LOCAL_LO(%a0)
-	orl	#unfinx_mask,USER_FPSR(%a6)
-	bra	wr_etemp
-cu_snd:
-	movel	#0xbf810000,LOCAL_EX(%a0)	|force pos single zero
-	clrl	LOCAL_HI(%a0)
-	clrl	LOCAL_LO(%a0)
-	orl	#z_mask,USER_FPSR(%a6)
-	orl	#neg_mask,USER_FPSR(%a6)
-	orl	#unfinx_mask,USER_FPSR(%a6)
-	bra	wr_etemp
-cu_sndr:
-	movel	#0xbf810000,LOCAL_EX(%a0)	|force pos single zero
-	movel	#0x100,LOCAL_HI(%a0)	|with lsb set
-	clrl	LOCAL_LO(%a0)
-	orl	#neg_mask,USER_FPSR(%a6)
-	orl	#unfinx_mask,USER_FPSR(%a6)
-	bra	wr_etemp
-
-|
-| This code checks for 16-bit overflow conditions on dyadic
-| operations which are not restorable into the floating-point
-| unit and must be completed in software.  Basically, this
-| condition exists with a very large norm and a denorm.  One
-| of the operands must be denormalized to enter this code.
-|
-| Flags used:
-|	DY_MO_FLG contains 0 for monadic op, $ff for dyadic
-|	DNRM_FLG contains $00 for neither op denormalized
-|	                  $0f for the destination op denormalized
-|	                  $f0 for the source op denormalized
-|	                  $ff for both ops denormalized
-|
-| The wrap-around condition occurs for add, sub, div, and cmp
-| when
-|
-|	abs(dest_exp - src_exp) >= $8000
-|
-| and for mul when
-|
-|	(dest_exp + src_exp) < $0
-|
-| we must process the operation here if this case is true.
-|
-| The rts following the frcfpn routine is the exit from res_func
-| for this condition.  The restore flag (RES_FLG) is left clear.
-| No frestore is done unless an exception is to be reported.
-|
-| For fadd:
-|	if(sign_of(dest) != sign_of(src))
-|		replace exponent of src with $3fff (keep sign)
-|		use fpu to perform dest+new_src (user's rmode and X)
-|		clr sticky
-|	else
-|		set sticky
-|	call round with user's precision and mode
-|	move result to fpn and wbtemp
-|
-| For fsub:
-|	if(sign_of(dest) == sign_of(src))
-|		replace exponent of src with $3fff (keep sign)
-|		use fpu to perform dest+new_src (user's rmode and X)
-|		clr sticky
-|	else
-|		set sticky
-|	call round with user's precision and mode
-|	move result to fpn and wbtemp
-|
-| For fdiv/fsgldiv:
-|	if(both operands are denorm)
-|		restore_to_fpu;
-|	if(dest is norm)
-|		force_ovf;
-|	else(dest is denorm)
-|		force_unf:
-|
-| For fcmp:
-|	if(dest is norm)
-|		N = sign_of(dest);
-|	else(dest is denorm)
-|		N = sign_of(src);
-|
-| For fmul:
-|	if(both operands are denorm)
-|		force_unf;
-|	if((dest_exp + src_exp) < 0)
-|		force_unf:
-|	else
-|		restore_to_fpu;
-|
-| local equates:
-	.set	addcode,0x22
-	.set	subcode,0x28
-	.set	mulcode,0x23
-	.set	divcode,0x20
-	.set	cmpcode,0x38
-ck_wrap:
-	| tstb	DY_MO_FLG(%a6)	;check for fsqrt
-	beq	fix_stk		|if zero, it is fsqrt
-	movew	CMDREG1B(%a6),%d0
-	andiw	#0x3b,%d0		|strip to command bits
-	cmpiw	#addcode,%d0
-	beq	wrap_add
-	cmpiw	#subcode,%d0
-	beq	wrap_sub
-	cmpiw	#mulcode,%d0
-	beq	wrap_mul
-	cmpiw	#cmpcode,%d0
-	beq	wrap_cmp
-|
-| Inst is fdiv.
-|
-wrap_div:
-	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
-	beq	fix_stk		 |restore to fpu
-|
-| One of the ops is denormalized.  Test for wrap condition
-| and force the result.
-|
-	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
-	bnes	div_srcd
-div_destd:
-	bsrl	ckinf_ns
-	bne	fix_stk
-	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
-	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
-	subl	%d1,%d0			|subtract dest from src
-	cmpl	#0x7fff,%d0
-	blt	fix_stk			|if less, not wrap case
-	clrb	WBTEMP_SGN(%a6)
-	movew	ETEMP_EX(%a6),%d0		|find the sign of the result
-	movew	FPTEMP_EX(%a6),%d1
-	eorw	%d1,%d0
-	andiw	#0x8000,%d0
-	beq	force_unf
-	st	WBTEMP_SGN(%a6)
-	bra	force_unf
-
-ckinf_ns:
-	moveb	STAG(%a6),%d0		|check source tag for inf or nan
-	bra	ck_in_com
-ckinf_nd:
-	moveb	DTAG(%a6),%d0		|check destination tag for inf or nan
-ck_in_com:
-	andib	#0x60,%d0			|isolate tag bits
-	cmpb	#0x40,%d0			|is it inf?
-	beq	nan_or_inf		|not wrap case
-	cmpb	#0x60,%d0			|is it nan?
-	beq	nan_or_inf		|yes, not wrap case?
-	cmpb	#0x20,%d0			|is it a zero?
-	beq	nan_or_inf		|yes
-	clrl	%d0
-	rts				|then ; it is either a zero of norm,
-|					;check wrap case
-nan_or_inf:
-	moveql	#-1,%d0
-	rts
-
-
-
-div_srcd:
-	bsrl	ckinf_nd
-	bne	fix_stk
-	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
-	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
-	subl	%d1,%d0			|subtract src from dest
-	cmpl	#0x8000,%d0
-	blt	fix_stk			|if less, not wrap case
-	clrb	WBTEMP_SGN(%a6)
-	movew	ETEMP_EX(%a6),%d0		|find the sign of the result
-	movew	FPTEMP_EX(%a6),%d1
-	eorw	%d1,%d0
-	andiw	#0x8000,%d0
-	beqs	force_ovf
-	st	WBTEMP_SGN(%a6)
-|
-| This code handles the case of the instruction resulting in
-| an overflow condition.
-|
-force_ovf:
-	bclrb	#E1,E_BYTE(%a6)
-	orl	#ovfl_inx_mask,USER_FPSR(%a6)
-	clrw	NMNEXC(%a6)
-	leal	WBTEMP(%a6),%a0		|point a0 to memory location
-	movew	CMDREG1B(%a6),%d0
-	btstl	#6,%d0			|test for forced precision
-	beqs	frcovf_fpcr
-	btstl	#2,%d0			|check for double
-	bnes	frcovf_dbl
-	movel	#0x1,%d0			|inst is forced single
-	bras	frcovf_rnd
-frcovf_dbl:
-	movel	#0x2,%d0			|inst is forced double
-	bras	frcovf_rnd
-frcovf_fpcr:
-	bfextu	FPCR_MODE(%a6){#0:#2},%d0	|inst not forced - use fpcr prec
-frcovf_rnd:
-
-| The 881/882 does not set inex2 for the following case, so the
-| line is commented out to be compatible with 881/882
-|	tst.b	%d0
-|	beq.b	frcovf_x
-|	or.l	#inex2_mask,USER_FPSR(%a6) ;if prec is s or d, set inex2
-
-|frcovf_x:
-	bsrl	ovf_res			|get correct result based on
-|					;round precision/mode.  This
-|					;sets FPSR_CC correctly
-|					;returns in external format
-	bfclr	WBTEMP_SGN(%a6){#0:#8}
-	beq	frcfpn
-	bsetb	#sign_bit,WBTEMP_EX(%a6)
-	bra	frcfpn
-|
-| Inst is fadd.
-|
-wrap_add:
-	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
-	beq	fix_stk		 |restore to fpu
-|
-| One of the ops is denormalized.  Test for wrap condition
-| and complete the instruction.
-|
-	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
-	bnes	add_srcd
-add_destd:
-	bsrl	ckinf_ns
-	bne	fix_stk
-	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
-	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
-	subl	%d1,%d0			|subtract dest from src
-	cmpl	#0x8000,%d0
-	blt	fix_stk			|if less, not wrap case
-	bra	add_wrap
-add_srcd:
-	bsrl	ckinf_nd
-	bne	fix_stk
-	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
-	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
-	subl	%d1,%d0			|subtract src from dest
-	cmpl	#0x8000,%d0
-	blt	fix_stk			|if less, not wrap case
-|
-| Check the signs of the operands.  If they are unlike, the fpu
-| can be used to add the norm and 1.0 with the sign of the
-| denorm and it will correctly generate the result in extended
-| precision.  We can then call round with no sticky and the result
-| will be correct for the user's rounding mode and precision.  If
-| the signs are the same, we call round with the sticky bit set
-| and the result will be correct for the user's rounding mode and
-| precision.
-|
-add_wrap:
-	movew	ETEMP_EX(%a6),%d0
-	movew	FPTEMP_EX(%a6),%d1
-	eorw	%d1,%d0
-	andiw	#0x8000,%d0
-	beq	add_same
-|
-| The signs are unlike.
-|
-	cmpb	#0x0f,DNRM_FLG(%a6) |is dest the denorm?
-	bnes	add_u_srcd
-	movew	FPTEMP_EX(%a6),%d0
-	andiw	#0x8000,%d0
-	orw	#0x3fff,%d0	|force the exponent to +/- 1
-	movew	%d0,FPTEMP_EX(%a6) |in the denorm
-	movel	USER_FPCR(%a6),%d0
-	andil	#0x30,%d0
-	fmovel	%d0,%fpcr		|set up users rmode and X
-	fmovex	ETEMP(%a6),%fp0
-	faddx	FPTEMP(%a6),%fp0
-	leal	WBTEMP(%a6),%a0	|point a0 to wbtemp in frame
-	fmovel	%fpsr,%d1
-	orl	%d1,USER_FPSR(%a6) |capture cc's and inex from fadd
-	fmovex	%fp0,WBTEMP(%a6)	|write result to memory
-	lsrl	#4,%d0		|put rmode in lower 2 bits
-	movel	USER_FPCR(%a6),%d1
-	andil	#0xc0,%d1
-	lsrl	#6,%d1		|put precision in upper word
-	swap	%d1
-	orl	%d0,%d1		|set up for round call
-	clrl	%d0		|force sticky to zero
-	bclrb	#sign_bit,WBTEMP_EX(%a6)
-	sne	WBTEMP_SGN(%a6)
-	bsrl	round		|round result to users rmode & prec
-	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
-	beq	frcfpnr
-	bsetb	#sign_bit,WBTEMP_EX(%a6)
-	bra	frcfpnr
-add_u_srcd:
-	movew	ETEMP_EX(%a6),%d0
-	andiw	#0x8000,%d0
-	orw	#0x3fff,%d0	|force the exponent to +/- 1
-	movew	%d0,ETEMP_EX(%a6) |in the denorm
-	movel	USER_FPCR(%a6),%d0
-	andil	#0x30,%d0
-	fmovel	%d0,%fpcr		|set up users rmode and X
-	fmovex	ETEMP(%a6),%fp0
-	faddx	FPTEMP(%a6),%fp0
-	fmovel	%fpsr,%d1
-	orl	%d1,USER_FPSR(%a6) |capture cc's and inex from fadd
-	leal	WBTEMP(%a6),%a0	|point a0 to wbtemp in frame
-	fmovex	%fp0,WBTEMP(%a6)	|write result to memory
-	lsrl	#4,%d0		|put rmode in lower 2 bits
-	movel	USER_FPCR(%a6),%d1
-	andil	#0xc0,%d1
-	lsrl	#6,%d1		|put precision in upper word
-	swap	%d1
-	orl	%d0,%d1		|set up for round call
-	clrl	%d0		|force sticky to zero
-	bclrb	#sign_bit,WBTEMP_EX(%a6)
-	sne	WBTEMP_SGN(%a6)	|use internal format for round
-	bsrl	round		|round result to users rmode & prec
-	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
-	beq	frcfpnr
-	bsetb	#sign_bit,WBTEMP_EX(%a6)
-	bra	frcfpnr
-|
-| Signs are alike:
-|
-add_same:
-	cmpb	#0x0f,DNRM_FLG(%a6) |is dest the denorm?
-	bnes	add_s_srcd
-add_s_destd:
-	leal	ETEMP(%a6),%a0
-	movel	USER_FPCR(%a6),%d0
-	andil	#0x30,%d0
-	lsrl	#4,%d0		|put rmode in lower 2 bits
-	movel	USER_FPCR(%a6),%d1
-	andil	#0xc0,%d1
-	lsrl	#6,%d1		|put precision in upper word
-	swap	%d1
-	orl	%d0,%d1		|set up for round call
-	movel	#0x20000000,%d0	|set sticky for round
-	bclrb	#sign_bit,ETEMP_EX(%a6)
-	sne	ETEMP_SGN(%a6)
-	bsrl	round		|round result to users rmode & prec
-	bfclr	ETEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
-	beqs	add_s_dclr
-	bsetb	#sign_bit,ETEMP_EX(%a6)
-add_s_dclr:
-	leal	WBTEMP(%a6),%a0
-	movel	ETEMP(%a6),(%a0)	|write result to wbtemp
-	movel	ETEMP_HI(%a6),4(%a0)
-	movel	ETEMP_LO(%a6),8(%a0)
-	tstw	ETEMP_EX(%a6)
-	bgt	add_ckovf
-	orl	#neg_mask,USER_FPSR(%a6)
-	bra	add_ckovf
-add_s_srcd:
-	leal	FPTEMP(%a6),%a0
-	movel	USER_FPCR(%a6),%d0
-	andil	#0x30,%d0
-	lsrl	#4,%d0		|put rmode in lower 2 bits
-	movel	USER_FPCR(%a6),%d1
-	andil	#0xc0,%d1
-	lsrl	#6,%d1		|put precision in upper word
-	swap	%d1
-	orl	%d0,%d1		|set up for round call
-	movel	#0x20000000,%d0	|set sticky for round
-	bclrb	#sign_bit,FPTEMP_EX(%a6)
-	sne	FPTEMP_SGN(%a6)
-	bsrl	round		|round result to users rmode & prec
-	bfclr	FPTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
-	beqs	add_s_sclr
-	bsetb	#sign_bit,FPTEMP_EX(%a6)
-add_s_sclr:
-	leal	WBTEMP(%a6),%a0
-	movel	FPTEMP(%a6),(%a0)	|write result to wbtemp
-	movel	FPTEMP_HI(%a6),4(%a0)
-	movel	FPTEMP_LO(%a6),8(%a0)
-	tstw	FPTEMP_EX(%a6)
-	bgt	add_ckovf
-	orl	#neg_mask,USER_FPSR(%a6)
-add_ckovf:
-	movew	WBTEMP_EX(%a6),%d0
-	andiw	#0x7fff,%d0
-	cmpiw	#0x7fff,%d0
-	bne	frcfpnr
-|
-| The result has overflowed to $7fff exponent.  Set I, ovfl,
-| and aovfl, and clr the mantissa (incorrectly set by the
-| round routine.)
-|
-	orl	#inf_mask+ovfl_inx_mask,USER_FPSR(%a6)
-	clrl	4(%a0)
-	bra	frcfpnr
-|
-| Inst is fsub.
-|
-wrap_sub:
-	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
-	beq	fix_stk		 |restore to fpu
-|
-| One of the ops is denormalized.  Test for wrap condition
-| and complete the instruction.
-|
-	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
-	bnes	sub_srcd
-sub_destd:
-	bsrl	ckinf_ns
-	bne	fix_stk
-	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
-	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
-	subl	%d1,%d0			|subtract src from dest
-	cmpl	#0x8000,%d0
-	blt	fix_stk			|if less, not wrap case
-	bra	sub_wrap
-sub_srcd:
-	bsrl	ckinf_nd
-	bne	fix_stk
-	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
-	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
-	subl	%d1,%d0			|subtract dest from src
-	cmpl	#0x8000,%d0
-	blt	fix_stk			|if less, not wrap case
-|
-| Check the signs of the operands.  If they are alike, the fpu
-| can be used to subtract from the norm 1.0 with the sign of the
-| denorm and it will correctly generate the result in extended
-| precision.  We can then call round with no sticky and the result
-| will be correct for the user's rounding mode and precision.  If
-| the signs are unlike, we call round with the sticky bit set
-| and the result will be correct for the user's rounding mode and
-| precision.
-|
-sub_wrap:
-	movew	ETEMP_EX(%a6),%d0
-	movew	FPTEMP_EX(%a6),%d1
-	eorw	%d1,%d0
-	andiw	#0x8000,%d0
-	bne	sub_diff
-|
-| The signs are alike.
-|
-	cmpb	#0x0f,DNRM_FLG(%a6) |is dest the denorm?
-	bnes	sub_u_srcd
-	movew	FPTEMP_EX(%a6),%d0
-	andiw	#0x8000,%d0
-	orw	#0x3fff,%d0	|force the exponent to +/- 1
-	movew	%d0,FPTEMP_EX(%a6) |in the denorm
-	movel	USER_FPCR(%a6),%d0
-	andil	#0x30,%d0
-	fmovel	%d0,%fpcr		|set up users rmode and X
-	fmovex	FPTEMP(%a6),%fp0
-	fsubx	ETEMP(%a6),%fp0
-	fmovel	%fpsr,%d1
-	orl	%d1,USER_FPSR(%a6) |capture cc's and inex from fadd
-	leal	WBTEMP(%a6),%a0	|point a0 to wbtemp in frame
-	fmovex	%fp0,WBTEMP(%a6)	|write result to memory
-	lsrl	#4,%d0		|put rmode in lower 2 bits
-	movel	USER_FPCR(%a6),%d1
-	andil	#0xc0,%d1
-	lsrl	#6,%d1		|put precision in upper word
-	swap	%d1
-	orl	%d0,%d1		|set up for round call
-	clrl	%d0		|force sticky to zero
-	bclrb	#sign_bit,WBTEMP_EX(%a6)
-	sne	WBTEMP_SGN(%a6)
-	bsrl	round		|round result to users rmode & prec
-	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
-	beq	frcfpnr
-	bsetb	#sign_bit,WBTEMP_EX(%a6)
-	bra	frcfpnr
-sub_u_srcd:
-	movew	ETEMP_EX(%a6),%d0
-	andiw	#0x8000,%d0
-	orw	#0x3fff,%d0	|force the exponent to +/- 1
-	movew	%d0,ETEMP_EX(%a6) |in the denorm
-	movel	USER_FPCR(%a6),%d0
-	andil	#0x30,%d0
-	fmovel	%d0,%fpcr		|set up users rmode and X
-	fmovex	FPTEMP(%a6),%fp0
-	fsubx	ETEMP(%a6),%fp0
-	fmovel	%fpsr,%d1
-	orl	%d1,USER_FPSR(%a6) |capture cc's and inex from fadd
-	leal	WBTEMP(%a6),%a0	|point a0 to wbtemp in frame
-	fmovex	%fp0,WBTEMP(%a6)	|write result to memory
-	lsrl	#4,%d0		|put rmode in lower 2 bits
-	movel	USER_FPCR(%a6),%d1
-	andil	#0xc0,%d1
-	lsrl	#6,%d1		|put precision in upper word
-	swap	%d1
-	orl	%d0,%d1		|set up for round call
-	clrl	%d0		|force sticky to zero
-	bclrb	#sign_bit,WBTEMP_EX(%a6)
-	sne	WBTEMP_SGN(%a6)
-	bsrl	round		|round result to users rmode & prec
-	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
-	beq	frcfpnr
-	bsetb	#sign_bit,WBTEMP_EX(%a6)
-	bra	frcfpnr
-|
-| Signs are unlike:
-|
-sub_diff:
-	cmpb	#0x0f,DNRM_FLG(%a6) |is dest the denorm?
-	bnes	sub_s_srcd
-sub_s_destd:
-	leal	ETEMP(%a6),%a0
-	movel	USER_FPCR(%a6),%d0
-	andil	#0x30,%d0
-	lsrl	#4,%d0		|put rmode in lower 2 bits
-	movel	USER_FPCR(%a6),%d1
-	andil	#0xc0,%d1
-	lsrl	#6,%d1		|put precision in upper word
-	swap	%d1
-	orl	%d0,%d1		|set up for round call
-	movel	#0x20000000,%d0	|set sticky for round
-|
-| Since the dest is the denorm, the sign is the opposite of the
-| norm sign.
-|
-	eoriw	#0x8000,ETEMP_EX(%a6)	|flip sign on result
-	tstw	ETEMP_EX(%a6)
-	bgts	sub_s_dwr
-	orl	#neg_mask,USER_FPSR(%a6)
-sub_s_dwr:
-	bclrb	#sign_bit,ETEMP_EX(%a6)
-	sne	ETEMP_SGN(%a6)
-	bsrl	round		|round result to users rmode & prec
-	bfclr	ETEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
-	beqs	sub_s_dclr
-	bsetb	#sign_bit,ETEMP_EX(%a6)
-sub_s_dclr:
-	leal	WBTEMP(%a6),%a0
-	movel	ETEMP(%a6),(%a0)	|write result to wbtemp
-	movel	ETEMP_HI(%a6),4(%a0)
-	movel	ETEMP_LO(%a6),8(%a0)
-	bra	sub_ckovf
-sub_s_srcd:
-	leal	FPTEMP(%a6),%a0
-	movel	USER_FPCR(%a6),%d0
-	andil	#0x30,%d0
-	lsrl	#4,%d0		|put rmode in lower 2 bits
-	movel	USER_FPCR(%a6),%d1
-	andil	#0xc0,%d1
-	lsrl	#6,%d1		|put precision in upper word
-	swap	%d1
-	orl	%d0,%d1		|set up for round call
-	movel	#0x20000000,%d0	|set sticky for round
-	bclrb	#sign_bit,FPTEMP_EX(%a6)
-	sne	FPTEMP_SGN(%a6)
-	bsrl	round		|round result to users rmode & prec
-	bfclr	FPTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
-	beqs	sub_s_sclr
-	bsetb	#sign_bit,FPTEMP_EX(%a6)
-sub_s_sclr:
-	leal	WBTEMP(%a6),%a0
-	movel	FPTEMP(%a6),(%a0)	|write result to wbtemp
-	movel	FPTEMP_HI(%a6),4(%a0)
-	movel	FPTEMP_LO(%a6),8(%a0)
-	tstw	FPTEMP_EX(%a6)
-	bgt	sub_ckovf
-	orl	#neg_mask,USER_FPSR(%a6)
-sub_ckovf:
-	movew	WBTEMP_EX(%a6),%d0
-	andiw	#0x7fff,%d0
-	cmpiw	#0x7fff,%d0
-	bne	frcfpnr
-|
-| The result has overflowed to $7fff exponent.  Set I, ovfl,
-| and aovfl, and clr the mantissa (incorrectly set by the
-| round routine.)
-|
-	orl	#inf_mask+ovfl_inx_mask,USER_FPSR(%a6)
-	clrl	4(%a0)
-	bra	frcfpnr
-|
-| Inst is fcmp.
-|
-wrap_cmp:
-	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
-	beq	fix_stk		 |restore to fpu
-|
-| One of the ops is denormalized.  Test for wrap condition
-| and complete the instruction.
-|
-	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
-	bnes	cmp_srcd
-cmp_destd:
-	bsrl	ckinf_ns
-	bne	fix_stk
-	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
-	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
-	subl	%d1,%d0			|subtract dest from src
-	cmpl	#0x8000,%d0
-	blt	fix_stk			|if less, not wrap case
-	tstw	ETEMP_EX(%a6)		|set N to ~sign_of(src)
-	bge	cmp_setn
-	rts
-cmp_srcd:
-	bsrl	ckinf_nd
-	bne	fix_stk
-	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
-	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
-	subl	%d1,%d0			|subtract src from dest
-	cmpl	#0x8000,%d0
-	blt	fix_stk			|if less, not wrap case
-	tstw	FPTEMP_EX(%a6)		|set N to sign_of(dest)
-	blt	cmp_setn
-	rts
-cmp_setn:
-	orl	#neg_mask,USER_FPSR(%a6)
-	rts
-
-|
-| Inst is fmul.
-|
-wrap_mul:
-	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
-	beq	force_unf	|force an underflow (really!)
-|
-| One of the ops is denormalized.  Test for wrap condition
-| and complete the instruction.
-|
-	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
-	bnes	mul_srcd
-mul_destd:
-	bsrl	ckinf_ns
-	bne	fix_stk
-	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
-	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
-	addl	%d1,%d0			|subtract dest from src
-	bgt	fix_stk
-	bra	force_unf
-mul_srcd:
-	bsrl	ckinf_nd
-	bne	fix_stk
-	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
-	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
-	addl	%d1,%d0			|subtract src from dest
-	bgt	fix_stk
-
-|
-| This code handles the case of the instruction resulting in
-| an underflow condition.
-|
-force_unf:
-	bclrb	#E1,E_BYTE(%a6)
-	orl	#unfinx_mask,USER_FPSR(%a6)
-	clrw	NMNEXC(%a6)
-	clrb	WBTEMP_SGN(%a6)
-	movew	ETEMP_EX(%a6),%d0		|find the sign of the result
-	movew	FPTEMP_EX(%a6),%d1
-	eorw	%d1,%d0
-	andiw	#0x8000,%d0
-	beqs	frcunfcont
-	st	WBTEMP_SGN(%a6)
-frcunfcont:
-	lea	WBTEMP(%a6),%a0		|point a0 to memory location
-	movew	CMDREG1B(%a6),%d0
-	btstl	#6,%d0			|test for forced precision
-	beqs	frcunf_fpcr
-	btstl	#2,%d0			|check for double
-	bnes	frcunf_dbl
-	movel	#0x1,%d0			|inst is forced single
-	bras	frcunf_rnd
-frcunf_dbl:
-	movel	#0x2,%d0			|inst is forced double
-	bras	frcunf_rnd
-frcunf_fpcr:
-	bfextu	FPCR_MODE(%a6){#0:#2},%d0	|inst not forced - use fpcr prec
-frcunf_rnd:
-	bsrl	unf_sub			|get correct result based on
-|					;round precision/mode.  This
-|					;sets FPSR_CC correctly
-	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
-	beqs	frcfpn
-	bsetb	#sign_bit,WBTEMP_EX(%a6)
-	bra	frcfpn
-
-|
-| Write the result to the user's fpn.  All results must be HUGE to be
-| written; otherwise the results would have overflowed or underflowed.
-| If the rounding precision is single or double, the ovf_res routine
-| is needed to correctly supply the max value.
-|
-frcfpnr:
-	movew	CMDREG1B(%a6),%d0
-	btstl	#6,%d0			|test for forced precision
-	beqs	frcfpn_fpcr
-	btstl	#2,%d0			|check for double
-	bnes	frcfpn_dbl
-	movel	#0x1,%d0			|inst is forced single
-	bras	frcfpn_rnd
-frcfpn_dbl:
-	movel	#0x2,%d0			|inst is forced double
-	bras	frcfpn_rnd
-frcfpn_fpcr:
-	bfextu	FPCR_MODE(%a6){#0:#2},%d0	|inst not forced - use fpcr prec
-	tstb	%d0
-	beqs	frcfpn			|if extended, write what you got
-frcfpn_rnd:
-	bclrb	#sign_bit,WBTEMP_EX(%a6)
-	sne	WBTEMP_SGN(%a6)
-	bsrl	ovf_res			|get correct result based on
-|					;round precision/mode.  This
-|					;sets FPSR_CC correctly
-	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
-	beqs	frcfpn_clr
-	bsetb	#sign_bit,WBTEMP_EX(%a6)
-frcfpn_clr:
-	orl	#ovfinx_mask,USER_FPSR(%a6)
-|
-| Perform the write.
-|
-frcfpn:
-	bfextu	CMDREG1B(%a6){#6:#3},%d0	|extract fp destination register
-	cmpib	#3,%d0
-	bles	frc0123			|check if dest is fp0-fp3
-	movel	#7,%d1
-	subl	%d0,%d1
-	clrl	%d0
-	bsetl	%d1,%d0
-	fmovemx WBTEMP(%a6),%d0
-	rts
-frc0123:
-	cmpib	#0,%d0
-	beqs	frc0_dst
-	cmpib	#1,%d0
-	beqs	frc1_dst
-	cmpib	#2,%d0
-	beqs	frc2_dst
-frc3_dst:
-	movel	WBTEMP_EX(%a6),USER_FP3(%a6)
-	movel	WBTEMP_HI(%a6),USER_FP3+4(%a6)
-	movel	WBTEMP_LO(%a6),USER_FP3+8(%a6)
-	rts
-frc2_dst:
-	movel	WBTEMP_EX(%a6),USER_FP2(%a6)
-	movel	WBTEMP_HI(%a6),USER_FP2+4(%a6)
-	movel	WBTEMP_LO(%a6),USER_FP2+8(%a6)
-	rts
-frc1_dst:
-	movel	WBTEMP_EX(%a6),USER_FP1(%a6)
-	movel	WBTEMP_HI(%a6),USER_FP1+4(%a6)
-	movel	WBTEMP_LO(%a6),USER_FP1+8(%a6)
-	rts
-frc0_dst:
-	movel	WBTEMP_EX(%a6),USER_FP0(%a6)
-	movel	WBTEMP_HI(%a6),USER_FP0+4(%a6)
-	movel	WBTEMP_LO(%a6),USER_FP0+8(%a6)
-	rts
-
-|
-| Write etemp to fpn.
-| A check is made on enabled and signalled snan exceptions,
-| and the destination is not overwritten if this condition exists.
-| This code is designed to make fmoveins of unsupported data types
-| faster.
-|
-wr_etemp:
-	btstb	#snan_bit,FPSR_EXCEPT(%a6)	|if snan is set, and
-	beqs	fmoveinc		|enabled, force restore
-	btstb	#snan_bit,FPCR_ENABLE(%a6) |and don't overwrite
-	beqs	fmoveinc		|the dest
-	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	|set up fptemp sign for
-|						;snan handler
-	tstb	ETEMP(%a6)		|check for negative
-	blts	snan_neg
-	rts
-snan_neg:
-	orl	#neg_bit,USER_FPSR(%a6)	|snan is negative; set N
-	rts
-fmoveinc:
-	clrw	NMNEXC(%a6)
-	bclrb	#E1,E_BYTE(%a6)
-	moveb	STAG(%a6),%d0		|check if stag is inf
-	andib	#0xe0,%d0
-	cmpib	#0x40,%d0
-	bnes	fminc_cnan
-	orl	#inf_mask,USER_FPSR(%a6) |if inf, nothing yet has set I
-	tstw	LOCAL_EX(%a0)		|check sign
-	bges	fminc_con
-	orl	#neg_mask,USER_FPSR(%a6)
-	bra	fminc_con
-fminc_cnan:
-	cmpib	#0x60,%d0			|check if stag is NaN
-	bnes	fminc_czero
-	orl	#nan_mask,USER_FPSR(%a6) |if nan, nothing yet has set NaN
-	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	|set up fptemp sign for
-|						;snan handler
-	tstw	LOCAL_EX(%a0)		|check sign
-	bges	fminc_con
-	orl	#neg_mask,USER_FPSR(%a6)
-	bra	fminc_con
-fminc_czero:
-	cmpib	#0x20,%d0			|check if zero
-	bnes	fminc_con
-	orl	#z_mask,USER_FPSR(%a6)	|if zero, set Z
-	tstw	LOCAL_EX(%a0)		|check sign
-	bges	fminc_con
-	orl	#neg_mask,USER_FPSR(%a6)
-fminc_con:
-	bfextu	CMDREG1B(%a6){#6:#3},%d0	|extract fp destination register
-	cmpib	#3,%d0
-	bles	fp0123			|check if dest is fp0-fp3
-	movel	#7,%d1
-	subl	%d0,%d1
-	clrl	%d0
-	bsetl	%d1,%d0
-	fmovemx ETEMP(%a6),%d0
-	rts
-
-fp0123:
-	cmpib	#0,%d0
-	beqs	fp0_dst
-	cmpib	#1,%d0
-	beqs	fp1_dst
-	cmpib	#2,%d0
-	beqs	fp2_dst
-fp3_dst:
-	movel	ETEMP_EX(%a6),USER_FP3(%a6)
-	movel	ETEMP_HI(%a6),USER_FP3+4(%a6)
-	movel	ETEMP_LO(%a6),USER_FP3+8(%a6)
-	rts
-fp2_dst:
-	movel	ETEMP_EX(%a6),USER_FP2(%a6)
-	movel	ETEMP_HI(%a6),USER_FP2+4(%a6)
-	movel	ETEMP_LO(%a6),USER_FP2+8(%a6)
-	rts
-fp1_dst:
-	movel	ETEMP_EX(%a6),USER_FP1(%a6)
-	movel	ETEMP_HI(%a6),USER_FP1+4(%a6)
-	movel	ETEMP_LO(%a6),USER_FP1+8(%a6)
-	rts
-fp0_dst:
-	movel	ETEMP_EX(%a6),USER_FP0(%a6)
-	movel	ETEMP_HI(%a6),USER_FP0+4(%a6)
-	movel	ETEMP_LO(%a6),USER_FP0+8(%a6)
-	rts
-
-opclass3:
-	st	CU_ONLY(%a6)
-	movew	CMDREG1B(%a6),%d0	|check if packed moveout
-	andiw	#0x0c00,%d0	|isolate last 2 bits of size field
-	cmpiw	#0x0c00,%d0	|if size is 011 or 111, it is packed
-	beq	pack_out	|else it is norm or denorm
-	bra	mv_out
-
-
-|
-|	MOVE OUT
-|
-
-mv_tbl:
-	.long	li
-	.long	sgp
-	.long	xp
-	.long	mvout_end	|should never be taken
-	.long	wi
-	.long	dp
-	.long	bi
-	.long	mvout_end	|should never be taken
-mv_out:
-	bfextu	CMDREG1B(%a6){#3:#3},%d1	|put source specifier in d1
-	leal	mv_tbl,%a0
-	movel	%a0@(%d1:l:4),%a0
-	jmp	(%a0)
-
-|
-| This exit is for move-out to memory.  The aunfl bit is
-| set if the result is inex and unfl is signalled.
-|
-mvout_end:
-	btstb	#inex2_bit,FPSR_EXCEPT(%a6)
-	beqs	no_aufl
-	btstb	#unfl_bit,FPSR_EXCEPT(%a6)
-	beqs	no_aufl
-	bsetb	#aunfl_bit,FPSR_AEXCEPT(%a6)
-no_aufl:
-	clrw	NMNEXC(%a6)
-	bclrb	#E1,E_BYTE(%a6)
-	fmovel	#0,%FPSR			|clear any cc bits from res_func
-|
-| Return ETEMP to extended format from internal extended format so
-| that gen_except will have a correctly signed value for ovfl/unfl
-| handlers.
-|
-	bfclr	ETEMP_SGN(%a6){#0:#8}
-	beqs	mvout_con
-	bsetb	#sign_bit,ETEMP_EX(%a6)
-mvout_con:
-	rts
-|
-| This exit is for move-out to int register.  The aunfl bit is
-| not set in any case for this move.
-|
-mvouti_end:
-	clrw	NMNEXC(%a6)
-	bclrb	#E1,E_BYTE(%a6)
-	fmovel	#0,%FPSR			|clear any cc bits from res_func
-|
-| Return ETEMP to extended format from internal extended format so
-| that gen_except will have a correctly signed value for ovfl/unfl
-| handlers.
-|
-	bfclr	ETEMP_SGN(%a6){#0:#8}
-	beqs	mvouti_con
-	bsetb	#sign_bit,ETEMP_EX(%a6)
-mvouti_con:
-	rts
-|
-| li is used to handle a long integer source specifier
-|
-
-li:
-	moveql	#4,%d0		|set byte count
-
-	btstb	#7,STAG(%a6)	|check for extended denorm
-	bne	int_dnrm	|if so, branch
-
-	fmovemx ETEMP(%a6),%fp0-%fp0
-	fcmpd	#0x41dfffffffc00000,%fp0
-| 41dfffffffc00000 in dbl prec = 401d0000fffffffe00000000 in ext prec
-	fbge	lo_plrg
-	fcmpd	#0xc1e0000000000000,%fp0
-| c1e0000000000000 in dbl prec = c01e00008000000000000000 in ext prec
-	fble	lo_nlrg
-|
-| at this point, the answer is between the largest pos and neg values
-|
-	movel	USER_FPCR(%a6),%d1	|use user's rounding mode
-	andil	#0x30,%d1
-	fmovel	%d1,%fpcr
-	fmovel	%fp0,L_SCR1(%a6)	|let the 040 perform conversion
-	fmovel %fpsr,%d1
-	orl	%d1,USER_FPSR(%a6)	|capture inex2/ainex if set
-	bra	int_wrt
-
-
-lo_plrg:
-	movel	#0x7fffffff,L_SCR1(%a6)	|answer is largest positive int
-	fbeq	int_wrt			|exact answer
-	fcmpd	#0x41dfffffffe00000,%fp0
-| 41dfffffffe00000 in dbl prec = 401d0000ffffffff00000000 in ext prec
-	fbge	int_operr		|set operr
-	bra	int_inx			|set inexact
-
-lo_nlrg:
-	movel	#0x80000000,L_SCR1(%a6)
-	fbeq	int_wrt			|exact answer
-	fcmpd	#0xc1e0000000100000,%fp0
-| c1e0000000100000 in dbl prec = c01e00008000000080000000 in ext prec
-	fblt	int_operr		|set operr
-	bra	int_inx			|set inexact
-
-|
-| wi is used to handle a word integer source specifier
-|
-
-wi:
-	moveql	#2,%d0		|set byte count
-
-	btstb	#7,STAG(%a6)	|check for extended denorm
-	bne	int_dnrm	|branch if so
-
-	fmovemx ETEMP(%a6),%fp0-%fp0
-	fcmps	#0x46fffe00,%fp0
-| 46fffe00 in sgl prec = 400d0000fffe000000000000 in ext prec
-	fbge	wo_plrg
-	fcmps	#0xc7000000,%fp0
-| c7000000 in sgl prec = c00e00008000000000000000 in ext prec
-	fble	wo_nlrg
-
-|
-| at this point, the answer is between the largest pos and neg values
-|
-	movel	USER_FPCR(%a6),%d1	|use user's rounding mode
-	andil	#0x30,%d1
-	fmovel	%d1,%fpcr
-	fmovew	%fp0,L_SCR1(%a6)	|let the 040 perform conversion
-	fmovel %fpsr,%d1
-	orl	%d1,USER_FPSR(%a6)	|capture inex2/ainex if set
-	bra	int_wrt
-
-wo_plrg:
-	movew	#0x7fff,L_SCR1(%a6)	|answer is largest positive int
-	fbeq	int_wrt			|exact answer
-	fcmps	#0x46ffff00,%fp0
-| 46ffff00 in sgl prec = 400d0000ffff000000000000 in ext prec
-	fbge	int_operr		|set operr
-	bra	int_inx			|set inexact
-
-wo_nlrg:
-	movew	#0x8000,L_SCR1(%a6)
-	fbeq	int_wrt			|exact answer
-	fcmps	#0xc7000080,%fp0
-| c7000080 in sgl prec = c00e00008000800000000000 in ext prec
-	fblt	int_operr		|set operr
-	bra	int_inx			|set inexact
-
-|
-| bi is used to handle a byte integer source specifier
-|
-
-bi:
-	moveql	#1,%d0		|set byte count
-
-	btstb	#7,STAG(%a6)	|check for extended denorm
-	bne	int_dnrm	|branch if so
-
-	fmovemx ETEMP(%a6),%fp0-%fp0
-	fcmps	#0x42fe0000,%fp0
-| 42fe0000 in sgl prec = 40050000fe00000000000000 in ext prec
-	fbge	by_plrg
-	fcmps	#0xc3000000,%fp0
-| c3000000 in sgl prec = c00600008000000000000000 in ext prec
-	fble	by_nlrg
-
-|
-| at this point, the answer is between the largest pos and neg values
-|
-	movel	USER_FPCR(%a6),%d1	|use user's rounding mode
-	andil	#0x30,%d1
-	fmovel	%d1,%fpcr
-	fmoveb	%fp0,L_SCR1(%a6)	|let the 040 perform conversion
-	fmovel %fpsr,%d1
-	orl	%d1,USER_FPSR(%a6)	|capture inex2/ainex if set
-	bra	int_wrt
-
-by_plrg:
-	moveb	#0x7f,L_SCR1(%a6)		|answer is largest positive int
-	fbeq	int_wrt			|exact answer
-	fcmps	#0x42ff0000,%fp0
-| 42ff0000 in sgl prec = 40050000ff00000000000000 in ext prec
-	fbge	int_operr		|set operr
-	bra	int_inx			|set inexact
-
-by_nlrg:
-	moveb	#0x80,L_SCR1(%a6)
-	fbeq	int_wrt			|exact answer
-	fcmps	#0xc3008000,%fp0
-| c3008000 in sgl prec = c00600008080000000000000 in ext prec
-	fblt	int_operr		|set operr
-	bra	int_inx			|set inexact
-
-|
-| Common integer routines
-|
-| int_drnrm---account for possible nonzero result for round up with positive
-| operand and round down for negative answer.  In the first case (result = 1)
-| byte-width (store in d0) of result must be honored.  In the second case,
-| -1 in L_SCR1(a6) will cover all contingencies (FMOVE.B/W/L out).
-
-int_dnrm:
-	movel	#0,L_SCR1(%a6)	| initialize result to 0
-	bfextu	FPCR_MODE(%a6){#2:#2},%d1	| d1 is the rounding mode
-	cmpb	#2,%d1
-	bmis	int_inx		| if RN or RZ, done
-	bnes	int_rp		| if RP, continue below
-	tstw	ETEMP(%a6)	| RM: store -1 in L_SCR1 if src is negative
-	bpls	int_inx		| otherwise result is 0
-	movel	#-1,L_SCR1(%a6)
-	bras	int_inx
-int_rp:
-	tstw	ETEMP(%a6)	| RP: store +1 of proper width in L_SCR1 if
-|				; source is greater than 0
-	bmis	int_inx		| otherwise, result is 0
-	lea	L_SCR1(%a6),%a1	| a1 is address of L_SCR1
-	addal	%d0,%a1		| offset by destination width -1
-	subal	#1,%a1
-	bsetb	#0,(%a1)		| set low bit at a1 address
-int_inx:
-	oril	#inx2a_mask,USER_FPSR(%a6)
-	bras	int_wrt
-int_operr:
-	fmovemx %fp0-%fp0,FPTEMP(%a6)	|FPTEMP must contain the extended
-|				;precision source that needs to be
-|				;converted to integer this is required
-|				;if the operr exception is enabled.
-|				;set operr/aiop (no inex2 on int ovfl)
-
-	oril	#opaop_mask,USER_FPSR(%a6)
-|				;fall through to perform int_wrt
-int_wrt:
-	movel	EXC_EA(%a6),%a1	|load destination address
-	tstl	%a1		|check to see if it is a dest register
-	beqs	wrt_dn		|write data register
-	lea	L_SCR1(%a6),%a0	|point to supervisor source address
-	bsrl	mem_write
-	bra	mvouti_end
-
-wrt_dn:
-	movel	%d0,-(%sp)	|d0 currently contains the size to write
-	bsrl	get_fline	|get_fline returns Dn in d0
-	andiw	#0x7,%d0		|isolate register
-	movel	(%sp)+,%d1	|get size
-	cmpil	#4,%d1		|most frequent case
-	beqs	sz_long
-	cmpil	#2,%d1
-	bnes	sz_con
-	orl	#8,%d0		|add 'word' size to register#
-	bras	sz_con
-sz_long:
-	orl	#0x10,%d0		|add 'long' size to register#
-sz_con:
-	movel	%d0,%d1		|reg_dest expects size:reg in d1
-	bsrl	reg_dest	|load proper data register
-	bra	mvouti_end
-xp:
-	lea	ETEMP(%a6),%a0
-	bclrb	#sign_bit,LOCAL_EX(%a0)
-	sne	LOCAL_SGN(%a0)
-	btstb	#7,STAG(%a6)	|check for extended denorm
-	bne	xdnrm
-	clrl	%d0
-	bras	do_fp		|do normal case
-sgp:
-	lea	ETEMP(%a6),%a0
-	bclrb	#sign_bit,LOCAL_EX(%a0)
-	sne	LOCAL_SGN(%a0)
-	btstb	#7,STAG(%a6)	|check for extended denorm
-	bne	sp_catas	|branch if so
-	movew	LOCAL_EX(%a0),%d0
-	lea	sp_bnds,%a1
-	cmpw	(%a1),%d0
-	blt	sp_under
-	cmpw	2(%a1),%d0
-	bgt	sp_over
-	movel	#1,%d0		|set destination format to single
-	bras	do_fp		|do normal case
-dp:
-	lea	ETEMP(%a6),%a0
-	bclrb	#sign_bit,LOCAL_EX(%a0)
-	sne	LOCAL_SGN(%a0)
-
-	btstb	#7,STAG(%a6)	|check for extended denorm
-	bne	dp_catas	|branch if so
-
-	movew	LOCAL_EX(%a0),%d0
-	lea	dp_bnds,%a1
-
-	cmpw	(%a1),%d0
-	blt	dp_under
-	cmpw	2(%a1),%d0
-	bgt	dp_over
-
-	movel	#2,%d0		|set destination format to double
-|				;fall through to do_fp
-|
-do_fp:
-	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|rnd mode in d1
-	swap	%d0			|rnd prec in upper word
-	addl	%d0,%d1			|d1 has PREC/MODE info
-
-	clrl	%d0			|clear g,r,s
-
-	bsrl	round			|round
-
-	movel	%a0,%a1
-	movel	EXC_EA(%a6),%a0
-
-	bfextu	CMDREG1B(%a6){#3:#3},%d1	|extract destination format
-|					;at this point only the dest
-|					;formats sgl, dbl, ext are
-|					;possible
-	cmpb	#2,%d1
-	bgts	ddbl			|double=5, extended=2, single=1
-	bnes	dsgl
-|					;fall through to dext
-dext:
-	bsrl	dest_ext
-	bra	mvout_end
-dsgl:
-	bsrl	dest_sgl
-	bra	mvout_end
-ddbl:
-	bsrl	dest_dbl
-	bra	mvout_end
-
-|
-| Handle possible denorm or catastrophic underflow cases here
-|
-xdnrm:
-	bsr	set_xop		|initialize WBTEMP
-	bsetb	#wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15
-
-	movel	%a0,%a1
-	movel	EXC_EA(%a6),%a0	|a0 has the destination pointer
-	bsrl	dest_ext	|store to memory
-	bsetb	#unfl_bit,FPSR_EXCEPT(%a6)
-	bra	mvout_end
-
-sp_under:
-	bsetb	#etemp15_bit,STAG(%a6)
-
-	cmpw	4(%a1),%d0
-	blts	sp_catas	|catastrophic underflow case
-
-	movel	#1,%d0		|load in round precision
-	movel	#sgl_thresh,%d1	|load in single denorm threshold
-	bsrl	dpspdnrm	|expects d1 to have the proper
-|				;denorm threshold
-	bsrl	dest_sgl	|stores value to destination
-	bsetb	#unfl_bit,FPSR_EXCEPT(%a6)
-	bra	mvout_end	|exit
-
-dp_under:
-	bsetb	#etemp15_bit,STAG(%a6)
-
-	cmpw	4(%a1),%d0
-	blts	dp_catas	|catastrophic underflow case
-
-	movel	#dbl_thresh,%d1	|load in double precision threshold
-	movel	#2,%d0
-	bsrl	dpspdnrm	|expects d1 to have proper
-|				;denorm threshold
-|				;expects d0 to have round precision
-	bsrl	dest_dbl	|store value to destination
-	bsetb	#unfl_bit,FPSR_EXCEPT(%a6)
-	bra	mvout_end	|exit
-
-|
-| Handle catastrophic underflow cases here
-|
-sp_catas:
-| Temp fix for z bit set in unf_sub
-	movel	USER_FPSR(%a6),-(%a7)
-
-	movel	#1,%d0		|set round precision to sgl
-
-	bsrl	unf_sub		|a0 points to result
-
-	movel	(%a7)+,USER_FPSR(%a6)
-
-	movel	#1,%d0
-	subw	%d0,LOCAL_EX(%a0) |account for difference between
-|				;denorm/norm bias
-
-	movel	%a0,%a1		|a1 has the operand input
-	movel	EXC_EA(%a6),%a0	|a0 has the destination pointer
-
-	bsrl	dest_sgl	|store the result
-	oril	#unfinx_mask,USER_FPSR(%a6)
-	bra	mvout_end
-
-dp_catas:
-| Temp fix for z bit set in unf_sub
-	movel	USER_FPSR(%a6),-(%a7)
-
-	movel	#2,%d0		|set round precision to dbl
-	bsrl	unf_sub		|a0 points to result
-
-	movel	(%a7)+,USER_FPSR(%a6)
-
-	movel	#1,%d0
-	subw	%d0,LOCAL_EX(%a0) |account for difference between
-|				;denorm/norm bias
-
-	movel	%a0,%a1		|a1 has the operand input
-	movel	EXC_EA(%a6),%a0	|a0 has the destination pointer
-
-	bsrl	dest_dbl	|store the result
-	oril	#unfinx_mask,USER_FPSR(%a6)
-	bra	mvout_end
-
-|
-| Handle catastrophic overflow cases here
-|
-sp_over:
-| Temp fix for z bit set in unf_sub
-	movel	USER_FPSR(%a6),-(%a7)
-
-	movel	#1,%d0
-	leal	FP_SCR1(%a6),%a0	|use FP_SCR1 for creating result
-	movel	ETEMP_EX(%a6),(%a0)
-	movel	ETEMP_HI(%a6),4(%a0)
-	movel	ETEMP_LO(%a6),8(%a0)
-	bsrl	ovf_res
-
-	movel	(%a7)+,USER_FPSR(%a6)
-
-	movel	%a0,%a1
-	movel	EXC_EA(%a6),%a0
-	bsrl	dest_sgl
-	orl	#ovfinx_mask,USER_FPSR(%a6)
-	bra	mvout_end
-
-dp_over:
-| Temp fix for z bit set in ovf_res
-	movel	USER_FPSR(%a6),-(%a7)
-
-	movel	#2,%d0
-	leal	FP_SCR1(%a6),%a0	|use FP_SCR1 for creating result
-	movel	ETEMP_EX(%a6),(%a0)
-	movel	ETEMP_HI(%a6),4(%a0)
-	movel	ETEMP_LO(%a6),8(%a0)
-	bsrl	ovf_res
-
-	movel	(%a7)+,USER_FPSR(%a6)
-
-	movel	%a0,%a1
-	movel	EXC_EA(%a6),%a0
-	bsrl	dest_dbl
-	orl	#ovfinx_mask,USER_FPSR(%a6)
-	bra	mvout_end
-
-|
-|	DPSPDNRM
-|
-| This subroutine takes an extended normalized number and denormalizes
-| it to the given round precision. This subroutine also decrements
-| the input operand's exponent by 1 to account for the fact that
-| dest_sgl or dest_dbl expects a normalized number's bias.
-|
-| Input: a0  points to a normalized number in internal extended format
-|	 d0  is the round precision (=1 for sgl; =2 for dbl)
-|	 d1  is the single precision or double precision
-|	     denorm threshold
-|
-| Output: (In the format for dest_sgl or dest_dbl)
-|	 a0   points to the destination
-|	 a1   points to the operand
-|
-| Exceptions: Reports inexact 2 exception by setting USER_FPSR bits
-|
-dpspdnrm:
-	movel	%d0,-(%a7)	|save round precision
-	clrl	%d0		|clear initial g,r,s
-	bsrl	dnrm_lp		|careful with d0, it's needed by round
-
-	bfextu	FPCR_MODE(%a6){#2:#2},%d1 |get rounding mode
-	swap	%d1
-	movew	2(%a7),%d1	|set rounding precision
-	swap	%d1		|at this point d1 has PREC/MODE info
-	bsrl	round		|round result, sets the inex bit in
-|				;USER_FPSR if needed
-
-	movew	#1,%d0
-	subw	%d0,LOCAL_EX(%a0) |account for difference in denorm
-|				;vs norm bias
-
-	movel	%a0,%a1		|a1 has the operand input
-	movel	EXC_EA(%a6),%a0	|a0 has the destination pointer
-	addw	#4,%a7		|pop stack
-	rts
-|
-| SET_XOP initialized WBTEMP with the value pointed to by a0
-| input: a0 points to input operand in the internal extended format
-|
-set_xop:
-	movel	LOCAL_EX(%a0),WBTEMP_EX(%a6)
-	movel	LOCAL_HI(%a0),WBTEMP_HI(%a6)
-	movel	LOCAL_LO(%a0),WBTEMP_LO(%a6)
-	bfclr	WBTEMP_SGN(%a6){#0:#8}
-	beqs	sxop
-	bsetb	#sign_bit,WBTEMP_EX(%a6)
-sxop:
-	bfclr	STAG(%a6){#5:#4}	|clear wbtm66,wbtm1,wbtm0,sbit
-	rts
-|
-|	P_MOVE
-|
-p_movet:
-	.long	p_move
-	.long	p_movez
-	.long	p_movei
-	.long	p_moven
-	.long	p_move
-p_regd:
-	.long	p_dyd0
-	.long	p_dyd1
-	.long	p_dyd2
-	.long	p_dyd3
-	.long	p_dyd4
-	.long	p_dyd5
-	.long	p_dyd6
-	.long	p_dyd7
-
-pack_out:
-	leal	p_movet,%a0	|load jmp table address
-	movew	STAG(%a6),%d0	|get source tag
-	bfextu	%d0{#16:#3},%d0	|isolate source bits
-	movel	(%a0,%d0.w*4),%a0	|load a0 with routine label for tag
-	jmp	(%a0)		|go to the routine
-
-p_write:
-	movel	#0x0c,%d0	|get byte count
-	movel	EXC_EA(%a6),%a1	|get the destination address
-	bsr	mem_write	|write the user's destination
-	moveb	#0,CU_SAVEPC(%a6) |set the cu save pc to all 0's
-
-|
-| Also note that the dtag must be set to norm here - this is because
-| the 040 uses the dtag to execute the correct microcode.
-|
-        bfclr    DTAG(%a6){#0:#3}  |set dtag to norm
-
-	rts
-
-| Notes on handling of special case (zero, inf, and nan) inputs:
-|	1. Operr is not signalled if the k-factor is greater than 18.
-|	2. Per the manual, status bits are not set.
-|
-
-p_move:
-	movew	CMDREG1B(%a6),%d0
-	btstl	#kfact_bit,%d0	|test for dynamic k-factor
-	beqs	statick		|if clear, k-factor is static
-dynamick:
-	bfextu	%d0{#25:#3},%d0	|isolate register for dynamic k-factor
-	lea	p_regd,%a0
-	movel	%a0@(%d0:l:4),%a0
-	jmp	(%a0)
-statick:
-	andiw	#0x007f,%d0	|get k-factor
-	bfexts	%d0{#25:#7},%d0	|sign extend d0 for bindec
-	leal	ETEMP(%a6),%a0	|a0 will point to the packed decimal
-	bsrl	bindec		|perform the convert; data at a6
-	leal	FP_SCR1(%a6),%a0	|load a0 with result address
-	bral	p_write
-p_movez:
-	leal	ETEMP(%a6),%a0	|a0 will point to the packed decimal
-	clrw	2(%a0)		|clear lower word of exp
-	clrl	4(%a0)		|load second lword of ZERO
-	clrl	8(%a0)		|load third lword of ZERO
-	bra	p_write		|go write results
-p_movei:
-	fmovel	#0,%FPSR		|clear aiop
-	leal	ETEMP(%a6),%a0	|a0 will point to the packed decimal
-	clrw	2(%a0)		|clear lower word of exp
-	bra	p_write		|go write the result
-p_moven:
-	leal	ETEMP(%a6),%a0	|a0 will point to the packed decimal
-	clrw	2(%a0)		|clear lower word of exp
-	bra	p_write		|go write the result
-
-|
-| Routines to read the dynamic k-factor from Dn.
-|
-p_dyd0:
-	movel	USER_D0(%a6),%d0
-	bras	statick
-p_dyd1:
-	movel	USER_D1(%a6),%d0
-	bras	statick
-p_dyd2:
-	movel	%d2,%d0
-	bras	statick
-p_dyd3:
-	movel	%d3,%d0
-	bras	statick
-p_dyd4:
-	movel	%d4,%d0
-	bras	statick
-p_dyd5:
-	movel	%d5,%d0
-	bras	statick
-p_dyd6:
-	movel	%d6,%d0
-	bra	statick
-p_dyd7:
-	movel	%d7,%d0
-	bra	statick
-
-	|end
diff --git a/arch/m68k/fpsp040/round.S b/arch/m68k/fpsp040/round.S
deleted file mode 100644
index f84ae0dd435864d660e9054821cc199fa3135602..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/round.S
+++ /dev/null
@@ -1,648 +0,0 @@
-|
-|	round.sa 3.4 7/29/91
-|
-|	handle rounding and normalization tasks
-|
-|
-|
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|ROUND	idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-|
-|	round --- round result according to precision/mode
-|
-|	a0 points to the input operand in the internal extended format
-|	d1(high word) contains rounding precision:
-|		ext = $0000xxxx
-|		sgl = $0001xxxx
-|		dbl = $0002xxxx
-|	d1(low word) contains rounding mode:
-|		RN  = $xxxx0000
-|		RZ  = $xxxx0001
-|		RM  = $xxxx0010
-|		RP  = $xxxx0011
-|	d0{31:29} contains the g,r,s bits (extended)
-|
-|	On return the value pointed to by a0 is correctly rounded,
-|	a0 is preserved and the g-r-s bits in d0 are cleared.
-|	The result is not typed - the tag field is invalid.  The
-|	result is still in the internal extended format.
-|
-|	The INEX bit of USER_FPSR will be set if the rounded result was
-|	inexact (i.e. if any of the g-r-s bits were set).
-|
-
-	.global	round
-round:
-| If g=r=s=0 then result is exact and round is done, else set
-| the inex flag in status reg and continue.
-|
-	bsrs	ext_grs			|this subroutine looks at the
-|					:rounding precision and sets
-|					;the appropriate g-r-s bits.
-	tstl	%d0			|if grs are zero, go force
-	bne	rnd_cont		|lower bits to zero for size
-
-	swap	%d1			|set up d1.w for round prec.
-	bra	truncate
-
-rnd_cont:
-|
-| Use rounding mode as an index into a jump table for these modes.
-|
-	orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
-	lea	mode_tab,%a1
-	movel	(%a1,%d1.w*4),%a1
-	jmp	(%a1)
-|
-| Jump table indexed by rounding mode in d1.w.  All following assumes
-| grs != 0.
-|
-mode_tab:
-	.long	rnd_near
-	.long	rnd_zero
-	.long	rnd_mnus
-	.long	rnd_plus
-|
-|	ROUND PLUS INFINITY
-|
-|	If sign of fp number = 0 (positive), then add 1 to l.
-|
-rnd_plus:
-	swap	%d1			|set up d1 for round prec.
-	tstb	LOCAL_SGN(%a0)		|check for sign
-	bmi	truncate		|if positive then truncate
-	movel	#0xffffffff,%d0		|force g,r,s to be all f's
-	lea	add_to_l,%a1
-	movel	(%a1,%d1.w*4),%a1
-	jmp	(%a1)
-|
-|	ROUND MINUS INFINITY
-|
-|	If sign of fp number = 1 (negative), then add 1 to l.
-|
-rnd_mnus:
-	swap	%d1			|set up d1 for round prec.
-	tstb	LOCAL_SGN(%a0)		|check for sign
-	bpl	truncate		|if negative then truncate
-	movel	#0xffffffff,%d0		|force g,r,s to be all f's
-	lea	add_to_l,%a1
-	movel	(%a1,%d1.w*4),%a1
-	jmp	(%a1)
-|
-|	ROUND ZERO
-|
-|	Always truncate.
-rnd_zero:
-	swap	%d1			|set up d1 for round prec.
-	bra	truncate
-|
-|
-|	ROUND NEAREST
-|
-|	If (g=1), then add 1 to l and if (r=s=0), then clear l
-|	Note that this will round to even in case of a tie.
-|
-rnd_near:
-	swap	%d1			|set up d1 for round prec.
-	asll	#1,%d0			|shift g-bit to c-bit
-	bcc	truncate		|if (g=1) then
-	lea	add_to_l,%a1
-	movel	(%a1,%d1.w*4),%a1
-	jmp	(%a1)
-
-|
-|	ext_grs --- extract guard, round and sticky bits
-|
-| Input:	d1 =		PREC:ROUND
-| Output:	d0{31:29}=	guard, round, sticky
-|
-| The ext_grs extract the guard/round/sticky bits according to the
-| selected rounding precision. It is called by the round subroutine
-| only.  All registers except d0 are kept intact. d0 becomes an
-| updated guard,round,sticky in d0{31:29}
-|
-| Notes: the ext_grs uses the round PREC, and therefore has to swap d1
-|	 prior to usage, and needs to restore d1 to original.
-|
-ext_grs:
-	swap	%d1			|have d1.w point to round precision
-	cmpiw	#0,%d1
-	bnes	sgl_or_dbl
-	bras	end_ext_grs
-
-sgl_or_dbl:
-	moveml	%d2/%d3,-(%a7)		|make some temp registers
-	cmpiw	#1,%d1
-	bnes	grs_dbl
-grs_sgl:
-	bfextu	LOCAL_HI(%a0){#24:#2},%d3	|sgl prec. g-r are 2 bits right
-	movel	#30,%d2			|of the sgl prec. limits
-	lsll	%d2,%d3			|shift g-r bits to MSB of d3
-	movel	LOCAL_HI(%a0),%d2		|get word 2 for s-bit test
-	andil	#0x0000003f,%d2		|s bit is the or of all other
-	bnes	st_stky			|bits to the right of g-r
-	tstl	LOCAL_LO(%a0)		|test lower mantissa
-	bnes	st_stky			|if any are set, set sticky
-	tstl	%d0			|test original g,r,s
-	bnes	st_stky			|if any are set, set sticky
-	bras	end_sd			|if words 3 and 4 are clr, exit
-grs_dbl:
-	bfextu	LOCAL_LO(%a0){#21:#2},%d3	|dbl-prec. g-r are 2 bits right
-	movel	#30,%d2			|of the dbl prec. limits
-	lsll	%d2,%d3			|shift g-r bits to the MSB of d3
-	movel	LOCAL_LO(%a0),%d2		|get lower mantissa  for s-bit test
-	andil	#0x000001ff,%d2		|s bit is the or-ing of all
-	bnes	st_stky			|other bits to the right of g-r
-	tstl	%d0			|test word original g,r,s
-	bnes	st_stky			|if any are set, set sticky
-	bras	end_sd			|if clear, exit
-st_stky:
-	bset	#rnd_stky_bit,%d3
-end_sd:
-	movel	%d3,%d0			|return grs to d0
-	moveml	(%a7)+,%d2/%d3		|restore scratch registers
-end_ext_grs:
-	swap	%d1			|restore d1 to original
-	rts
-
-|*******************  Local Equates
-	.set	ad_1_sgl,0x00000100	|  constant to add 1 to l-bit in sgl prec
-	.set	ad_1_dbl,0x00000800	|  constant to add 1 to l-bit in dbl prec
-
-
-|Jump table for adding 1 to the l-bit indexed by rnd prec
-
-add_to_l:
-	.long	add_ext
-	.long	add_sgl
-	.long	add_dbl
-	.long	add_dbl
-|
-|	ADD SINGLE
-|
-add_sgl:
-	addl	#ad_1_sgl,LOCAL_HI(%a0)
-	bccs	scc_clr			|no mantissa overflow
-	roxrw  LOCAL_HI(%a0)		|shift v-bit back in
-	roxrw  LOCAL_HI+2(%a0)		|shift v-bit back in
-	addw	#0x1,LOCAL_EX(%a0)	|and incr exponent
-scc_clr:
-	tstl	%d0			|test for rs = 0
-	bnes	sgl_done
-	andiw  #0xfe00,LOCAL_HI+2(%a0)	|clear the l-bit
-sgl_done:
-	andil	#0xffffff00,LOCAL_HI(%a0) |truncate bits beyond sgl limit
-	clrl	LOCAL_LO(%a0)		|clear d2
-	rts
-
-|
-|	ADD EXTENDED
-|
-add_ext:
-	addql  #1,LOCAL_LO(%a0)		|add 1 to l-bit
-	bccs	xcc_clr			|test for carry out
-	addql  #1,LOCAL_HI(%a0)		|propagate carry
-	bccs	xcc_clr
-	roxrw  LOCAL_HI(%a0)		|mant is 0 so restore v-bit
-	roxrw  LOCAL_HI+2(%a0)		|mant is 0 so restore v-bit
-	roxrw	LOCAL_LO(%a0)
-	roxrw	LOCAL_LO+2(%a0)
-	addw	#0x1,LOCAL_EX(%a0)	|and inc exp
-xcc_clr:
-	tstl	%d0			|test rs = 0
-	bnes	add_ext_done
-	andib	#0xfe,LOCAL_LO+3(%a0)	|clear the l bit
-add_ext_done:
-	rts
-|
-|	ADD DOUBLE
-|
-add_dbl:
-	addl	#ad_1_dbl,LOCAL_LO(%a0)
-	bccs	dcc_clr
-	addql	#1,LOCAL_HI(%a0)		|propagate carry
-	bccs	dcc_clr
-	roxrw	LOCAL_HI(%a0)		|mant is 0 so restore v-bit
-	roxrw	LOCAL_HI+2(%a0)		|mant is 0 so restore v-bit
-	roxrw	LOCAL_LO(%a0)
-	roxrw	LOCAL_LO+2(%a0)
-	addw	#0x1,LOCAL_EX(%a0)	|incr exponent
-dcc_clr:
-	tstl	%d0			|test for rs = 0
-	bnes	dbl_done
-	andiw	#0xf000,LOCAL_LO+2(%a0)	|clear the l-bit
-
-dbl_done:
-	andil	#0xfffff800,LOCAL_LO(%a0) |truncate bits beyond dbl limit
-	rts
-
-error:
-	rts
-|
-| Truncate all other bits
-|
-trunct:
-	.long	end_rnd
-	.long	sgl_done
-	.long	dbl_done
-	.long	dbl_done
-
-truncate:
-	lea	trunct,%a1
-	movel	(%a1,%d1.w*4),%a1
-	jmp	(%a1)
-
-end_rnd:
-	rts
-
-|
-|	NORMALIZE
-|
-| These routines (nrm_zero & nrm_set) normalize the unnorm.  This
-| is done by shifting the mantissa left while decrementing the
-| exponent.
-|
-| NRM_SET shifts and decrements until there is a 1 set in the integer
-| bit of the mantissa (msb in d1).
-|
-| NRM_ZERO shifts and decrements until there is a 1 set in the integer
-| bit of the mantissa (msb in d1) unless this would mean the exponent
-| would go less than 0.  In that case the number becomes a denorm - the
-| exponent (d0) is set to 0 and the mantissa (d1 & d2) is not
-| normalized.
-|
-| Note that both routines have been optimized (for the worst case) and
-| therefore do not have the easy to follow decrement/shift loop.
-|
-|	NRM_ZERO
-|
-|	Distance to first 1 bit in mantissa = X
-|	Distance to 0 from exponent = Y
-|	If X < Y
-|	Then
-|	  nrm_set
-|	Else
-|	  shift mantissa by Y
-|	  set exponent = 0
-|
-|input:
-|	FP_SCR1 = exponent, ms mantissa part, ls mantissa part
-|output:
-|	L_SCR1{4} = fpte15 or ete15 bit
-|
-	.global	nrm_zero
-nrm_zero:
-	movew	LOCAL_EX(%a0),%d0
-	cmpw   #64,%d0          |see if exp > 64
-	bmis	d0_less
-	bsr	nrm_set		|exp > 64 so exp won't exceed 0
-	rts
-d0_less:
-	moveml	%d2/%d3/%d5/%d6,-(%a7)
-	movel	LOCAL_HI(%a0),%d1
-	movel	LOCAL_LO(%a0),%d2
-
-	bfffo	%d1{#0:#32},%d3	|get the distance to the first 1
-|				;in ms mant
-	beqs	ms_clr		|branch if no bits were set
-	cmpw	%d3,%d0		|of X>Y
-	bmis	greater		|then exp will go past 0 (neg) if
-|				;it is just shifted
-	bsr	nrm_set		|else exp won't go past 0
-	moveml	(%a7)+,%d2/%d3/%d5/%d6
-	rts
-greater:
-	movel	%d2,%d6		|save ls mant in d6
-	lsll	%d0,%d2		|shift ls mant by count
-	lsll	%d0,%d1		|shift ms mant by count
-	movel	#32,%d5
-	subl	%d0,%d5		|make op a denorm by shifting bits
-	lsrl	%d5,%d6		|by the number in the exp, then
-|				;set exp = 0.
-	orl	%d6,%d1		|shift the ls mant bits into the ms mant
-	movel	#0,%d0		|same as if decremented exp to 0
-|				;while shifting
-	movew	%d0,LOCAL_EX(%a0)
-	movel	%d1,LOCAL_HI(%a0)
-	movel	%d2,LOCAL_LO(%a0)
-	moveml	(%a7)+,%d2/%d3/%d5/%d6
-	rts
-ms_clr:
-	bfffo	%d2{#0:#32},%d3	|check if any bits set in ls mant
-	beqs	all_clr		|branch if none set
-	addw	#32,%d3
-	cmpw	%d3,%d0		|if X>Y
-	bmis	greater		|then branch
-	bsr	nrm_set		|else exp won't go past 0
-	moveml	(%a7)+,%d2/%d3/%d5/%d6
-	rts
-all_clr:
-	movew	#0,LOCAL_EX(%a0)	|no mantissa bits set. Set exp = 0.
-	moveml	(%a7)+,%d2/%d3/%d5/%d6
-	rts
-|
-|	NRM_SET
-|
-	.global	nrm_set
-nrm_set:
-	movel	%d7,-(%a7)
-	bfffo	LOCAL_HI(%a0){#0:#32},%d7 |find first 1 in ms mant to d7)
-	beqs	lower		|branch if ms mant is all 0's
-
-	movel	%d6,-(%a7)
-
-	subw	%d7,LOCAL_EX(%a0)	|sub exponent by count
-	movel	LOCAL_HI(%a0),%d0	|d0 has ms mant
-	movel	LOCAL_LO(%a0),%d1 |d1 has ls mant
-
-	lsll	%d7,%d0		|shift first 1 to j bit position
-	movel	%d1,%d6		|copy ls mant into d6
-	lsll	%d7,%d6		|shift ls mant by count
-	movel	%d6,LOCAL_LO(%a0)	|store ls mant into memory
-	moveql	#32,%d6
-	subl	%d7,%d6		|continue shift
-	lsrl	%d6,%d1		|shift off all bits but those that will
-|				;be shifted into ms mant
-	orl	%d1,%d0		|shift the ls mant bits into the ms mant
-	movel	%d0,LOCAL_HI(%a0)	|store ms mant into memory
-	moveml	(%a7)+,%d7/%d6	|restore registers
-	rts
-
-|
-| We get here if ms mant was = 0, and we assume ls mant has bits
-| set (otherwise this would have been tagged a zero not a denorm).
-|
-lower:
-	movew	LOCAL_EX(%a0),%d0	|d0 has exponent
-	movel	LOCAL_LO(%a0),%d1	|d1 has ls mant
-	subw	#32,%d0		|account for ms mant being all zeros
-	bfffo	%d1{#0:#32},%d7	|find first 1 in ls mant to d7)
-	subw	%d7,%d0		|subtract shift count from exp
-	lsll	%d7,%d1		|shift first 1 to integer bit in ms mant
-	movew	%d0,LOCAL_EX(%a0)	|store ms mant
-	movel	%d1,LOCAL_HI(%a0)	|store exp
-	clrl	LOCAL_LO(%a0)	|clear ls mant
-	movel	(%a7)+,%d7
-	rts
-|
-|	denorm --- denormalize an intermediate result
-|
-|	Used by underflow.
-|
-| Input:
-|	a0	 points to the operand to be denormalized
-|		 (in the internal extended format)
-|
-|	d0:	 rounding precision
-| Output:
-|	a0	 points to the denormalized result
-|		 (in the internal extended format)
-|
-|	d0	is guard,round,sticky
-|
-| d0 comes into this routine with the rounding precision. It
-| is then loaded with the denormalized exponent threshold for the
-| rounding precision.
-|
-
-	.global	denorm
-denorm:
-	btstb	#6,LOCAL_EX(%a0)	|check for exponents between $7fff-$4000
-	beqs	no_sgn_ext
-	bsetb	#7,LOCAL_EX(%a0)	|sign extend if it is so
-no_sgn_ext:
-
-	cmpib	#0,%d0		|if 0 then extended precision
-	bnes	not_ext		|else branch
-
-	clrl	%d1		|load d1 with ext threshold
-	clrl	%d0		|clear the sticky flag
-	bsr	dnrm_lp		|denormalize the number
-	tstb	%d1		|check for inex
-	beq	no_inex		|if clr, no inex
-	bras	dnrm_inex	|if set, set inex
-
-not_ext:
-	cmpil	#1,%d0		|if 1 then single precision
-	beqs	load_sgl	|else must be 2, double prec
-
-load_dbl:
-	movew	#dbl_thresh,%d1	|put copy of threshold in d1
-	movel	%d1,%d0		|copy d1 into d0
-	subw	LOCAL_EX(%a0),%d0	|diff = threshold - exp
-	cmpw	#67,%d0		|if diff > 67 (mant + grs bits)
-	bpls	chk_stky	|then branch (all bits would be
-|				; shifted off in denorm routine)
-	clrl	%d0		|else clear the sticky flag
-	bsr	dnrm_lp		|denormalize the number
-	tstb	%d1		|check flag
-	beqs	no_inex		|if clr, no inex
-	bras	dnrm_inex	|if set, set inex
-
-load_sgl:
-	movew	#sgl_thresh,%d1	|put copy of threshold in d1
-	movel	%d1,%d0		|copy d1 into d0
-	subw	LOCAL_EX(%a0),%d0	|diff = threshold - exp
-	cmpw	#67,%d0		|if diff > 67 (mant + grs bits)
-	bpls	chk_stky	|then branch (all bits would be
-|				; shifted off in denorm routine)
-	clrl	%d0		|else clear the sticky flag
-	bsr	dnrm_lp		|denormalize the number
-	tstb	%d1		|check flag
-	beqs	no_inex		|if clr, no inex
-	bras	dnrm_inex	|if set, set inex
-
-chk_stky:
-	tstl	LOCAL_HI(%a0)	|check for any bits set
-	bnes	set_stky
-	tstl	LOCAL_LO(%a0)	|check for any bits set
-	bnes	set_stky
-	bras	clr_mant
-set_stky:
-	orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
-	movel	#0x20000000,%d0	|set sticky bit in return value
-clr_mant:
-	movew	%d1,LOCAL_EX(%a0)		|load exp with threshold
-	movel	#0,LOCAL_HI(%a0)	|set d1 = 0 (ms mantissa)
-	movel	#0,LOCAL_LO(%a0)		|set d2 = 0 (ms mantissa)
-	rts
-dnrm_inex:
-	orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
-no_inex:
-	rts
-
-|
-|	dnrm_lp --- normalize exponent/mantissa to specified threshold
-|
-| Input:
-|	a0		points to the operand to be denormalized
-|	d0{31:29}	initial guard,round,sticky
-|	d1{15:0}	denormalization threshold
-| Output:
-|	a0		points to the denormalized operand
-|	d0{31:29}	final guard,round,sticky
-|	d1.b		inexact flag:  all ones means inexact result
-|
-| The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2
-| so that bfext can be used to extract the new low part of the mantissa.
-| Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there
-| is no LOCAL_GRS scratch word following it on the fsave frame.
-|
-	.global	dnrm_lp
-dnrm_lp:
-	movel	%d2,-(%sp)		|save d2 for temp use
-	btstb	#E3,E_BYTE(%a6)		|test for type E3 exception
-	beqs	not_E3			|not type E3 exception
-	bfextu	WBTEMP_GRS(%a6){#6:#3},%d2	|extract guard,round, sticky  bit
-	movel	#29,%d0
-	lsll	%d0,%d2			|shift g,r,s to their positions
-	movel	%d2,%d0
-not_E3:
-	movel	(%sp)+,%d2		|restore d2
-	movel	LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6)
-	movel	%d0,FP_SCR2+LOCAL_GRS(%a6)
-	movel	%d1,%d0			|copy the denorm threshold
-	subw	LOCAL_EX(%a0),%d1		|d1 = threshold - uns exponent
-	bles	no_lp			|d1 <= 0
-	cmpw	#32,%d1
-	blts	case_1			|0 = d1 < 32
-	cmpw	#64,%d1
-	blts	case_2			|32 <= d1 < 64
-	bra	case_3			|d1 >= 64
-|
-| No normalization necessary
-|
-no_lp:
-	clrb	%d1			|set no inex2 reported
-	movel	FP_SCR2+LOCAL_GRS(%a6),%d0	|restore original g,r,s
-	rts
-|
-| case (0<d1<32)
-|
-case_1:
-	movel	%d2,-(%sp)
-	movew	%d0,LOCAL_EX(%a0)		|exponent = denorm threshold
-	movel	#32,%d0
-	subw	%d1,%d0			|d0 = 32 - d1
-	bfextu	LOCAL_EX(%a0){%d0:#32},%d2
-	bfextu	%d2{%d1:%d0},%d2		|d2 = new LOCAL_HI
-	bfextu	LOCAL_HI(%a0){%d0:#32},%d1	|d1 = new LOCAL_LO
-	bfextu	FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0	|d0 = new G,R,S
-	movel	%d2,LOCAL_HI(%a0)		|store new LOCAL_HI
-	movel	%d1,LOCAL_LO(%a0)		|store new LOCAL_LO
-	clrb	%d1
-	bftst	%d0{#2:#30}
-	beqs	c1nstky
-	bsetl	#rnd_stky_bit,%d0
-	st	%d1
-c1nstky:
-	movel	FP_SCR2+LOCAL_GRS(%a6),%d2	|restore original g,r,s
-	andil	#0xe0000000,%d2		|clear all but G,R,S
-	tstl	%d2			|test if original G,R,S are clear
-	beqs	grs_clear
-	orl	#0x20000000,%d0		|set sticky bit in d0
-grs_clear:
-	andil	#0xe0000000,%d0		|clear all but G,R,S
-	movel	(%sp)+,%d2
-	rts
-|
-| case (32<=d1<64)
-|
-case_2:
-	movel	%d2,-(%sp)
-	movew	%d0,LOCAL_EX(%a0)		|unsigned exponent = threshold
-	subw	#32,%d1			|d1 now between 0 and 32
-	movel	#32,%d0
-	subw	%d1,%d0			|d0 = 32 - d1
-	bfextu	LOCAL_EX(%a0){%d0:#32},%d2
-	bfextu	%d2{%d1:%d0},%d2		|d2 = new LOCAL_LO
-	bfextu	LOCAL_HI(%a0){%d0:#32},%d1	|d1 = new G,R,S
-	bftst	%d1{#2:#30}
-	bnes	c2_sstky		|bra if sticky bit to be set
-	bftst	FP_SCR2+LOCAL_LO(%a6){%d0:#32}
-	bnes	c2_sstky		|bra if sticky bit to be set
-	movel	%d1,%d0
-	clrb	%d1
-	bras	end_c2
-c2_sstky:
-	movel	%d1,%d0
-	bsetl	#rnd_stky_bit,%d0
-	st	%d1
-end_c2:
-	clrl	LOCAL_HI(%a0)		|store LOCAL_HI = 0
-	movel	%d2,LOCAL_LO(%a0)		|store LOCAL_LO
-	movel	FP_SCR2+LOCAL_GRS(%a6),%d2	|restore original g,r,s
-	andil	#0xe0000000,%d2		|clear all but G,R,S
-	tstl	%d2			|test if original G,R,S are clear
-	beqs	clear_grs
-	orl	#0x20000000,%d0		|set sticky bit in d0
-clear_grs:
-	andil	#0xe0000000,%d0		|get rid of all but G,R,S
-	movel	(%sp)+,%d2
-	rts
-|
-| d1 >= 64 Force the exponent to be the denorm threshold with the
-| correct sign.
-|
-case_3:
-	movew	%d0,LOCAL_EX(%a0)
-	tstw	LOCAL_SGN(%a0)
-	bges	c3con
-c3neg:
-	orl	#0x80000000,LOCAL_EX(%a0)
-c3con:
-	cmpw	#64,%d1
-	beqs	sixty_four
-	cmpw	#65,%d1
-	beqs	sixty_five
-|
-| Shift value is out of range.  Set d1 for inex2 flag and
-| return a zero with the given threshold.
-|
-	clrl	LOCAL_HI(%a0)
-	clrl	LOCAL_LO(%a0)
-	movel	#0x20000000,%d0
-	st	%d1
-	rts
-
-sixty_four:
-	movel	LOCAL_HI(%a0),%d0
-	bfextu	%d0{#2:#30},%d1
-	andil	#0xc0000000,%d0
-	bras	c3com
-
-sixty_five:
-	movel	LOCAL_HI(%a0),%d0
-	bfextu	%d0{#1:#31},%d1
-	andil	#0x80000000,%d0
-	lsrl	#1,%d0			|shift high bit into R bit
-
-c3com:
-	tstl	%d1
-	bnes	c3ssticky
-	tstl	LOCAL_LO(%a0)
-	bnes	c3ssticky
-	tstb	FP_SCR2+LOCAL_GRS(%a6)
-	bnes	c3ssticky
-	clrb	%d1
-	bras	c3end
-
-c3ssticky:
-	bsetl	#rnd_stky_bit,%d0
-	st	%d1
-c3end:
-	clrl	LOCAL_HI(%a0)
-	clrl	LOCAL_LO(%a0)
-	rts
-
-	|end
diff --git a/arch/m68k/fpsp040/sacos.S b/arch/m68k/fpsp040/sacos.S
deleted file mode 100644
index 513c7cca7318d004b5e96b395f2333ff4f575847..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/sacos.S
+++ /dev/null
@@ -1,114 +0,0 @@
-|
-|	sacos.sa 3.3 12/19/90
-|
-|	Description: The entry point sAcos computes the inverse cosine of
-|		an input argument; sAcosd does the same except for denormalized
-|		input.
-|
-|	Input: Double-extended number X in location pointed to
-|		by address register a0.
-|
-|	Output: The value arccos(X) returned in floating-point register Fp0.
-|
-|	Accuracy and Monotonicity: The returned result is within 3 ulps in
-|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
-|		result is subsequently rounded to double precision. The
-|		result is provably monotonic in double precision.
-|
-|	Speed: The program sCOS takes approximately 310 cycles.
-|
-|	Algorithm:
-|
-|	ACOS
-|	1. If |X| >= 1, go to 3.
-|
-|	2. (|X| < 1) Calculate acos(X) by
-|		z := (1-X) / (1+X)
-|		acos(X) = 2 * atan( sqrt(z) ).
-|		Exit.
-|
-|	3. If |X| > 1, go to 5.
-|
-|	4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.
-|
-|	5. (|X| > 1) Generate an invalid operation by 0 * infinity.
-|		Exit.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|SACOS	idnt	2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-PI:	.long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
-PIBY2:	.long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
-
-	|xref	t_operr
-	|xref	t_frcinx
-	|xref	satan
-
-	.global	sacosd
-sacosd:
-|--ACOS(X) = PI/2 FOR DENORMALIZED X
-	fmovel		%d1,%fpcr		| ...load user's rounding mode/precision
-	fmovex		PIBY2,%fp0
-	bra		t_frcinx
-
-	.global	sacos
-sacos:
-	fmovex		(%a0),%fp0	| ...LOAD INPUT
-
-	movel		(%a0),%d0		| ...pack exponent with upper 16 fraction
-	movew		4(%a0),%d0
-	andil		#0x7FFFFFFF,%d0
-	cmpil		#0x3FFF8000,%d0
-	bges		ACOSBIG
-
-|--THIS IS THE USUAL CASE, |X| < 1
-|--ACOS(X) = 2 * ATAN(	SQRT( (1-X)/(1+X) )	)
-
-	fmoves		#0x3F800000,%fp1
-	faddx		%fp0,%fp1		| ...1+X
-	fnegx		%fp0		| ... -X
-	fadds		#0x3F800000,%fp0	| ...1-X
-	fdivx		%fp1,%fp0		| ...(1-X)/(1+X)
-	fsqrtx		%fp0		| ...SQRT((1-X)/(1+X))
-	fmovemx	%fp0-%fp0,(%a0)	| ...overwrite input
-	movel		%d1,-(%sp)	|save original users fpcr
-	clrl		%d1
-	bsr		satan		| ...ATAN(SQRT([1-X]/[1+X]))
-	fmovel		(%sp)+,%fpcr	|restore users exceptions
-	faddx		%fp0,%fp0		| ...2 * ATAN( STUFF )
-	bra		t_frcinx
-
-ACOSBIG:
-	fabsx		%fp0
-	fcmps		#0x3F800000,%fp0
-	fbgt		t_operr		|cause an operr exception
-
-|--|X| = 1, ACOS(X) = 0 OR PI
-	movel		(%a0),%d0		| ...pack exponent with upper 16 fraction
-	movew		4(%a0),%d0
-	cmpl		#0,%d0		|D0 has original exponent+fraction
-	bgts		ACOSP1
-
-|--X = -1
-|Returns PI and inexact exception
-	fmovex		PI,%fp0
-	fmovel		%d1,%FPCR
-	fadds		#0x00800000,%fp0	|cause an inexact exception to be put
-|					;into the 040 - will not trap until next
-|					;fp inst.
-	bra		t_frcinx
-
-ACOSP1:
-	fmovel		%d1,%FPCR
-	fmoves		#0x00000000,%fp0
-	rts				|Facos ; of +1 is exact
-
-	|end
diff --git a/arch/m68k/fpsp040/sasin.S b/arch/m68k/fpsp040/sasin.S
deleted file mode 100644
index 2a269a58ceaa8ccde03ab59e77c740d34f44c128..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/sasin.S
+++ /dev/null
@@ -1,103 +0,0 @@
-|
-|	sasin.sa 3.3 12/19/90
-|
-|	Description: The entry point sAsin computes the inverse sine of
-|		an input argument; sAsind does the same except for denormalized
-|		input.
-|
-|	Input: Double-extended number X in location pointed to
-|		by address register a0.
-|
-|	Output: The value arcsin(X) returned in floating-point register Fp0.
-|
-|	Accuracy and Monotonicity: The returned result is within 3 ulps in
-|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
-|		result is subsequently rounded to double precision. The
-|		result is provably monotonic in double precision.
-|
-|	Speed: The program sASIN takes approximately 310 cycles.
-|
-|	Algorithm:
-|
-|	ASIN
-|	1. If |X| >= 1, go to 3.
-|
-|	2. (|X| < 1) Calculate asin(X) by
-|		z := sqrt( [1-X][1+X] )
-|		asin(X) = atan( x / z ).
-|		Exit.
-|
-|	3. If |X| > 1, go to 5.
-|
-|	4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.
-|
-|	5. (|X| > 1) Generate an invalid operation by 0 * infinity.
-|		Exit.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|SASIN	idnt	2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-PIBY2:	.long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
-
-	|xref	t_operr
-	|xref	t_frcinx
-	|xref	t_extdnrm
-	|xref	satan
-
-	.global	sasind
-sasind:
-|--ASIN(X) = X FOR DENORMALIZED X
-
-	bra		t_extdnrm
-
-	.global	sasin
-sasin:
-	fmovex		(%a0),%fp0	| ...LOAD INPUT
-
-	movel		(%a0),%d0
-	movew		4(%a0),%d0
-	andil		#0x7FFFFFFF,%d0
-	cmpil		#0x3FFF8000,%d0
-	bges		asinbig
-
-|--THIS IS THE USUAL CASE, |X| < 1
-|--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
-
-	fmoves		#0x3F800000,%fp1
-	fsubx		%fp0,%fp1		| ...1-X
-	fmovemx	%fp2-%fp2,-(%a7)
-	fmoves		#0x3F800000,%fp2
-	faddx		%fp0,%fp2		| ...1+X
-	fmulx		%fp2,%fp1		| ...(1+X)(1-X)
-	fmovemx	(%a7)+,%fp2-%fp2
-	fsqrtx		%fp1		| ...SQRT([1-X][1+X])
-	fdivx		%fp1,%fp0		| ...X/SQRT([1-X][1+X])
-	fmovemx	%fp0-%fp0,(%a0)
-	bsr		satan
-	bra		t_frcinx
-
-asinbig:
-	fabsx		%fp0	 | ...|X|
-	fcmps		#0x3F800000,%fp0
-	fbgt		t_operr		|cause an operr exception
-
-|--|X| = 1, ASIN(X) = +- PI/2.
-
-	fmovex		PIBY2,%fp0
-	movel		(%a0),%d0
-	andil		#0x80000000,%d0	| ...SIGN BIT OF X
-	oril		#0x3F800000,%d0	| ...+-1 IN SGL FORMAT
-	movel		%d0,-(%sp)	| ...push SIGN(X) IN SGL-FMT
-	fmovel		%d1,%FPCR
-	fmuls		(%sp)+,%fp0
-	bra		t_frcinx
-
-	|end
diff --git a/arch/m68k/fpsp040/satan.S b/arch/m68k/fpsp040/satan.S
deleted file mode 100644
index c8a664998f92d659c2a44937657a1b461fa02600..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/satan.S
+++ /dev/null
@@ -1,477 +0,0 @@
-|
-|	satan.sa 3.3 12/19/90
-|
-|	The entry point satan computes the arctangent of an
-|	input value. satand does the same except the input value is a
-|	denormalized number.
-|
-|	Input: Double-extended value in memory location pointed to by address
-|		register a0.
-|
-|	Output:	Arctan(X) returned in floating-point register Fp0.
-|
-|	Accuracy and Monotonicity: The returned result is within 2 ulps in
-|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
-|		result is subsequently rounded to double precision. The
-|		result is provably monotonic in double precision.
-|
-|	Speed: The program satan takes approximately 160 cycles for input
-|		argument X such that 1/16 < |X| < 16. For the other arguments,
-|		the program will run no worse than 10% slower.
-|
-|	Algorithm:
-|	Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.
-|
-|	Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3.
-|		Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits
-|		of X with a bit-1 attached at the 6-th bit position. Define u
-|		to be u = (X-F) / (1 + X*F).
-|
-|	Step 3. Approximate arctan(u) by a polynomial poly.
-|
-|	Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values
-|		calculated beforehand. Exit.
-|
-|	Step 5. If |X| >= 16, go to Step 7.
-|
-|	Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.
-|
-|	Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'.
-|		Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|satan	idnt	2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-BOUNDS1:	.long 0x3FFB8000,0x4002FFFF
-
-ONE:	.long 0x3F800000
-
-	.long 0x00000000
-
-ATANA3:	.long 0xBFF6687E,0x314987D8
-ATANA2:	.long 0x4002AC69,0x34A26DB3
-
-ATANA1:	.long 0xBFC2476F,0x4E1DA28E
-ATANB6:	.long 0x3FB34444,0x7F876989
-
-ATANB5:	.long 0xBFB744EE,0x7FAF45DB
-ATANB4:	.long 0x3FBC71C6,0x46940220
-
-ATANB3:	.long 0xBFC24924,0x921872F9
-ATANB2:	.long 0x3FC99999,0x99998FA9
-
-ATANB1:	.long 0xBFD55555,0x55555555
-ATANC5:	.long 0xBFB70BF3,0x98539E6A
-
-ATANC4:	.long 0x3FBC7187,0x962D1D7D
-ATANC3:	.long 0xBFC24924,0x827107B8
-
-ATANC2:	.long 0x3FC99999,0x9996263E
-ATANC1:	.long 0xBFD55555,0x55555536
-
-PPIBY2:	.long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
-NPIBY2:	.long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
-PTINY:	.long 0x00010000,0x80000000,0x00000000,0x00000000
-NTINY:	.long 0x80010000,0x80000000,0x00000000,0x00000000
-
-ATANTBL:
-	.long	0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
-	.long	0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
-	.long	0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
-	.long	0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
-	.long	0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
-	.long	0x3FFB0000,0xAB98E943,0x62765619,0x00000000
-	.long	0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
-	.long	0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
-	.long	0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
-	.long	0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
-	.long	0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
-	.long	0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
-	.long	0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
-	.long	0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
-	.long	0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
-	.long	0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
-	.long	0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
-	.long	0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
-	.long	0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
-	.long	0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
-	.long	0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
-	.long	0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
-	.long	0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
-	.long	0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
-	.long	0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
-	.long	0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
-	.long	0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
-	.long	0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
-	.long	0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
-	.long	0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
-	.long	0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
-	.long	0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
-	.long	0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
-	.long	0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
-	.long	0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
-	.long	0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
-	.long	0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
-	.long	0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
-	.long	0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
-	.long	0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
-	.long	0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
-	.long	0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
-	.long	0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
-	.long	0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
-	.long	0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
-	.long	0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
-	.long	0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
-	.long	0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
-	.long	0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
-	.long	0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
-	.long	0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
-	.long	0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
-	.long	0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
-	.long	0x3FFE0000,0x97731420,0x365E538C,0x00000000
-	.long	0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
-	.long	0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
-	.long	0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
-	.long	0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
-	.long	0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
-	.long	0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
-	.long	0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
-	.long	0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
-	.long	0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
-	.long	0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
-	.long	0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
-	.long	0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
-	.long	0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
-	.long	0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
-	.long	0x3FFE0000,0xE8771129,0xC4353259,0x00000000
-	.long	0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
-	.long	0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
-	.long	0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
-	.long	0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
-	.long	0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
-	.long	0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
-	.long	0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
-	.long	0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
-	.long	0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
-	.long	0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
-	.long	0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
-	.long	0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
-	.long	0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
-	.long	0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
-	.long	0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
-	.long	0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
-	.long	0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
-	.long	0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
-	.long	0x3FFF0000,0x9F100575,0x006CC571,0x00000000
-	.long	0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
-	.long	0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
-	.long	0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
-	.long	0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
-	.long	0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
-	.long	0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
-	.long	0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
-	.long	0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
-	.long	0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
-	.long	0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
-	.long	0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
-	.long	0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
-	.long	0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
-	.long	0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
-	.long	0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
-	.long	0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
-	.long	0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
-	.long	0x3FFF0000,0xB525529D,0x562246BD,0x00000000
-	.long	0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
-	.long	0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
-	.long	0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
-	.long	0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
-	.long	0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
-	.long	0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
-	.long	0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
-	.long	0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
-	.long	0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
-	.long	0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
-	.long	0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
-	.long	0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
-	.long	0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
-	.long	0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
-	.long	0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
-	.long	0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
-	.long	0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
-	.long	0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
-	.long	0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
-	.long	0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
-	.long	0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
-	.long	0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
-
-	.set	X,FP_SCR1
-	.set	XDCARE,X+2
-	.set	XFRAC,X+4
-	.set	XFRACLO,X+8
-
-	.set	ATANF,FP_SCR2
-	.set	ATANFHI,ATANF+4
-	.set	ATANFLO,ATANF+8
-
-
-	| xref	t_frcinx
-	|xref	t_extdnrm
-
-	.global	satand
-satand:
-|--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
-
-	bra		t_extdnrm
-
-	.global	satan
-satan:
-|--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
-
-	fmovex		(%a0),%fp0	| ...LOAD INPUT
-
-	movel		(%a0),%d0
-	movew		4(%a0),%d0
-	fmovex		%fp0,X(%a6)
-	andil		#0x7FFFFFFF,%d0
-
-	cmpil		#0x3FFB8000,%d0		| ...|X| >= 1/16?
-	bges		ATANOK1
-	bra		ATANSM
-
-ATANOK1:
-	cmpil		#0x4002FFFF,%d0		| ...|X| < 16 ?
-	bles		ATANMAIN
-	bra		ATANBIG
-
-
-|--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
-|--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
-|--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
-|--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
-|--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
-|--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
-|--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
-|--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
-|--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
-|--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
-|--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
-|--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
-|--WILL INVOLVE A VERY LONG POLYNOMIAL.
-
-|--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
-|--WE CHOSE F TO BE +-2^K * 1.BBBB1
-|--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
-|--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
-|--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
-|-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
-
-ATANMAIN:
-
-	movew		#0x0000,XDCARE(%a6)	| ...CLEAN UP X JUST IN CASE
-	andil		#0xF8000000,XFRAC(%a6)	| ...FIRST 5 BITS
-	oril		#0x04000000,XFRAC(%a6)	| ...SET 6-TH BIT TO 1
-	movel		#0x00000000,XFRACLO(%a6)	| ...LOCATION OF X IS NOW F
-
-	fmovex		%fp0,%fp1			| ...FP1 IS X
-	fmulx		X(%a6),%fp1		| ...FP1 IS X*F, NOTE THAT X*F > 0
-	fsubx		X(%a6),%fp0		| ...FP0 IS X-F
-	fadds		#0x3F800000,%fp1		| ...FP1 IS 1 + X*F
-	fdivx		%fp1,%fp0			| ...FP0 IS U = (X-F)/(1+X*F)
-
-|--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
-|--CREATE ATAN(F) AND STORE IT IN ATANF, AND
-|--SAVE REGISTERS FP2.
-
-	movel		%d2,-(%a7)	| ...SAVE d2 TEMPORARILY
-	movel		%d0,%d2		| ...THE EXPO AND 16 BITS OF X
-	andil		#0x00007800,%d0	| ...4 VARYING BITS OF F'S FRACTION
-	andil		#0x7FFF0000,%d2	| ...EXPONENT OF F
-	subil		#0x3FFB0000,%d2	| ...K+4
-	asrl		#1,%d2
-	addl		%d2,%d0		| ...THE 7 BITS IDENTIFYING F
-	asrl		#7,%d0		| ...INDEX INTO TBL OF ATAN(|F|)
-	lea		ATANTBL,%a1
-	addal		%d0,%a1		| ...ADDRESS OF ATAN(|F|)
-	movel		(%a1)+,ATANF(%a6)
-	movel		(%a1)+,ATANFHI(%a6)
-	movel		(%a1)+,ATANFLO(%a6)	| ...ATANF IS NOW ATAN(|F|)
-	movel		X(%a6),%d0		| ...LOAD SIGN AND EXPO. AGAIN
-	andil		#0x80000000,%d0	| ...SIGN(F)
-	orl		%d0,ATANF(%a6)	| ...ATANF IS NOW SIGN(F)*ATAN(|F|)
-	movel		(%a7)+,%d2	| ...RESTORE d2
-
-|--THAT'S ALL I HAVE TO DO FOR NOW,
-|--BUT ALAS, THE DIVIDE IS STILL CRANKING!
-
-|--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
-|--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
-|--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
-|--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
-|--WHAT WE HAVE HERE IS MERELY	A1 = A3, A2 = A1/A3, A3 = A2/A3.
-|--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
-|--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
-
-
-	fmovex		%fp0,%fp1
-	fmulx		%fp1,%fp1
-	fmoved		ATANA3,%fp2
-	faddx		%fp1,%fp2		| ...A3+V
-	fmulx		%fp1,%fp2		| ...V*(A3+V)
-	fmulx		%fp0,%fp1		| ...U*V
-	faddd		ATANA2,%fp2	| ...A2+V*(A3+V)
-	fmuld		ATANA1,%fp1	| ...A1*U*V
-	fmulx		%fp2,%fp1		| ...A1*U*V*(A2+V*(A3+V))
-
-	faddx		%fp1,%fp0		| ...ATAN(U), FP1 RELEASED
-	fmovel		%d1,%FPCR		|restore users exceptions
-	faddx		ATANF(%a6),%fp0	| ...ATAN(X)
-	bra		t_frcinx
-
-ATANBORS:
-|--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
-|--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
-	cmpil		#0x3FFF8000,%d0
-	bgt		ATANBIG	| ...I.E. |X| >= 16
-
-ATANSM:
-|--|X| <= 1/16
-|--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
-|--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
-|--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
-|--WHERE Y = X*X, AND Z = Y*Y.
-
-	cmpil		#0x3FD78000,%d0
-	blt		ATANTINY
-|--COMPUTE POLYNOMIAL
-	fmulx		%fp0,%fp0	| ...FP0 IS Y = X*X
-
-
-	movew		#0x0000,XDCARE(%a6)
-
-	fmovex		%fp0,%fp1
-	fmulx		%fp1,%fp1		| ...FP1 IS Z = Y*Y
-
-	fmoved		ATANB6,%fp2
-	fmoved		ATANB5,%fp3
-
-	fmulx		%fp1,%fp2		| ...Z*B6
-	fmulx		%fp1,%fp3		| ...Z*B5
-
-	faddd		ATANB4,%fp2	| ...B4+Z*B6
-	faddd		ATANB3,%fp3	| ...B3+Z*B5
-
-	fmulx		%fp1,%fp2		| ...Z*(B4+Z*B6)
-	fmulx		%fp3,%fp1		| ...Z*(B3+Z*B5)
-
-	faddd		ATANB2,%fp2	| ...B2+Z*(B4+Z*B6)
-	faddd		ATANB1,%fp1	| ...B1+Z*(B3+Z*B5)
-
-	fmulx		%fp0,%fp2		| ...Y*(B2+Z*(B4+Z*B6))
-	fmulx		X(%a6),%fp0		| ...X*Y
-
-	faddx		%fp2,%fp1		| ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
-
-
-	fmulx		%fp1,%fp0	| ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
-
-	fmovel		%d1,%FPCR		|restore users exceptions
-	faddx		X(%a6),%fp0
-
-	bra		t_frcinx
-
-ATANTINY:
-|--|X| < 2^(-40), ATAN(X) = X
-	movew		#0x0000,XDCARE(%a6)
-
-	fmovel		%d1,%FPCR		|restore users exceptions
-	fmovex		X(%a6),%fp0	|last inst - possible exception set
-
-	bra		t_frcinx
-
-ATANBIG:
-|--IF |X| > 2^(100), RETURN	SIGN(X)*(PI/2 - TINY). OTHERWISE,
-|--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
-	cmpil		#0x40638000,%d0
-	bgt		ATANHUGE
-
-|--APPROXIMATE ATAN(-1/X) BY
-|--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
-|--THIS CAN BE RE-WRITTEN AS
-|--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
-
-	fmoves		#0xBF800000,%fp1	| ...LOAD -1
-	fdivx		%fp0,%fp1		| ...FP1 IS -1/X
-
-
-|--DIVIDE IS STILL CRANKING
-
-	fmovex		%fp1,%fp0		| ...FP0 IS X'
-	fmulx		%fp0,%fp0		| ...FP0 IS Y = X'*X'
-	fmovex		%fp1,X(%a6)		| ...X IS REALLY X'
-
-	fmovex		%fp0,%fp1
-	fmulx		%fp1,%fp1		| ...FP1 IS Z = Y*Y
-
-	fmoved		ATANC5,%fp3
-	fmoved		ATANC4,%fp2
-
-	fmulx		%fp1,%fp3		| ...Z*C5
-	fmulx		%fp1,%fp2		| ...Z*B4
-
-	faddd		ATANC3,%fp3	| ...C3+Z*C5
-	faddd		ATANC2,%fp2	| ...C2+Z*C4
-
-	fmulx		%fp3,%fp1		| ...Z*(C3+Z*C5), FP3 RELEASED
-	fmulx		%fp0,%fp2		| ...Y*(C2+Z*C4)
-
-	faddd		ATANC1,%fp1	| ...C1+Z*(C3+Z*C5)
-	fmulx		X(%a6),%fp0		| ...X'*Y
-
-	faddx		%fp2,%fp1		| ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
-
-
-	fmulx		%fp1,%fp0		| ...X'*Y*([B1+Z*(B3+Z*B5)]
-|					...	+[Y*(B2+Z*(B4+Z*B6))])
-	faddx		X(%a6),%fp0
-
-	fmovel		%d1,%FPCR		|restore users exceptions
-
-	btstb		#7,(%a0)
-	beqs		pos_big
-
-neg_big:
-	faddx		NPIBY2,%fp0
-	bra		t_frcinx
-
-pos_big:
-	faddx		PPIBY2,%fp0
-	bra		t_frcinx
-
-ATANHUGE:
-|--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
-	btstb		#7,(%a0)
-	beqs		pos_huge
-
-neg_huge:
-	fmovex		NPIBY2,%fp0
-	fmovel		%d1,%fpcr
-	fsubx		NTINY,%fp0
-	bra		t_frcinx
-
-pos_huge:
-	fmovex		PPIBY2,%fp0
-	fmovel		%d1,%fpcr
-	fsubx		PTINY,%fp0
-	bra		t_frcinx
-
-	|end
diff --git a/arch/m68k/fpsp040/satanh.S b/arch/m68k/fpsp040/satanh.S
deleted file mode 100644
index ba91f77a75716e92edc25f6911bd0d81f4c307b2..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/satanh.S
+++ /dev/null
@@ -1,103 +0,0 @@
-|
-|	satanh.sa 3.3 12/19/90
-|
-|	The entry point satanh computes the inverse
-|	hyperbolic tangent of
-|	an input argument; satanhd does the same except for denormalized
-|	input.
-|
-|	Input: Double-extended number X in location pointed to
-|		by address register a0.
-|
-|	Output: The value arctanh(X) returned in floating-point register Fp0.
-|
-|	Accuracy and Monotonicity: The returned result is within 3 ulps in
-|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
-|		result is subsequently rounded to double precision. The
-|		result is provably monotonic in double precision.
-|
-|	Speed: The program satanh takes approximately 270 cycles.
-|
-|	Algorithm:
-|
-|	ATANH
-|	1. If |X| >= 1, go to 3.
-|
-|	2. (|X| < 1) Calculate atanh(X) by
-|		sgn := sign(X)
-|		y := |X|
-|		z := 2y/(1-y)
-|		atanh(X) := sgn * (1/2) * logp1(z)
-|		Exit.
-|
-|	3. If |X| > 1, go to 5.
-|
-|	4. (|X| = 1) Generate infinity with an appropriate sign and
-|		divide-by-zero by
-|		sgn := sign(X)
-|		atan(X) := sgn / (+0).
-|		Exit.
-|
-|	5. (|X| > 1) Generate an invalid operation by 0 * infinity.
-|		Exit.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|satanh	idnt	2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-	|xref	t_dz
-	|xref	t_operr
-	|xref	t_frcinx
-	|xref	t_extdnrm
-	|xref	slognp1
-
-	.global	satanhd
-satanhd:
-|--ATANH(X) = X FOR DENORMALIZED X
-
-	bra		t_extdnrm
-
-	.global	satanh
-satanh:
-	movel		(%a0),%d0
-	movew		4(%a0),%d0
-	andil		#0x7FFFFFFF,%d0
-	cmpil		#0x3FFF8000,%d0
-	bges		ATANHBIG
-
-|--THIS IS THE USUAL CASE, |X| < 1
-|--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
-
-	fabsx		(%a0),%fp0	| ...Y = |X|
-	fmovex		%fp0,%fp1
-	fnegx		%fp1		| ...-Y
-	faddx		%fp0,%fp0		| ...2Y
-	fadds		#0x3F800000,%fp1	| ...1-Y
-	fdivx		%fp1,%fp0		| ...2Y/(1-Y)
-	movel		(%a0),%d0
-	andil		#0x80000000,%d0
-	oril		#0x3F000000,%d0	| ...SIGN(X)*HALF
-	movel		%d0,-(%sp)
-
-	fmovemx	%fp0-%fp0,(%a0)	| ...overwrite input
-	movel		%d1,-(%sp)
-	clrl		%d1
-	bsr		slognp1		| ...LOG1P(Z)
-	fmovel		(%sp)+,%fpcr
-	fmuls		(%sp)+,%fp0
-	bra		t_frcinx
-
-ATANHBIG:
-	fabsx		(%a0),%fp0	| ...|X|
-	fcmps		#0x3F800000,%fp0
-	fbgt		t_operr
-	bra		t_dz
-
-	|end
diff --git a/arch/m68k/fpsp040/scale.S b/arch/m68k/fpsp040/scale.S
deleted file mode 100644
index 04829dd4f1f48172b978a7d582fcd8f57067cb7b..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/scale.S
+++ /dev/null
@@ -1,370 +0,0 @@
-|
-|	scale.sa 3.3 7/30/91
-|
-|	The entry point sSCALE computes the destination operand
-|	scaled by the source operand.  If the absolute value of
-|	the source operand is (>= 2^14) an overflow or underflow
-|	is returned.
-|
-|	The entry point sscale is called from do_func to emulate
-|	the fscale unimplemented instruction.
-|
-|	Input: Double-extended destination operand in FPTEMP,
-|		double-extended source operand in ETEMP.
-|
-|	Output: The function returns scale(X,Y) to fp0.
-|
-|	Modifies: fp0.
-|
-|	Algorithm:
-|
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|SCALE    idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-	|xref	t_ovfl2
-	|xref	t_unfl
-	|xref	round
-	|xref	t_resdnrm
-
-SRC_BNDS: .short	0x3fff,0x400c
-
-|
-| This entry point is used by the unimplemented instruction exception
-| handler.
-|
-|
-|
-|	FSCALE
-|
-	.global	sscale
-sscale:
-	fmovel		#0,%fpcr		|clr user enabled exc
-	clrl		%d1
-	movew		FPTEMP(%a6),%d1	|get dest exponent
-	smi		L_SCR1(%a6)	|use L_SCR1 to hold sign
-	andil		#0x7fff,%d1	|strip sign
-	movew		ETEMP(%a6),%d0	|check src bounds
-	andiw		#0x7fff,%d0	|clr sign bit
-	cmp2w		SRC_BNDS,%d0
-	bccs		src_in
-	cmpiw		#0x400c,%d0	|test for too large
-	bge		src_out
-|
-| The source input is below 1, so we check for denormalized numbers
-| and set unfl.
-|
-src_small:
-	moveb		DTAG(%a6),%d0
-	andib		#0xe0,%d0
-	tstb		%d0
-	beqs		no_denorm
-	st		STORE_FLG(%a6)	|dest already contains result
-	orl		#unfl_mask,USER_FPSR(%a6) |set UNFL
-den_done:
-	leal		FPTEMP(%a6),%a0
-	bra		t_resdnrm
-no_denorm:
-	fmovel		USER_FPCR(%a6),%FPCR
-	fmovex		FPTEMP(%a6),%fp0	|simply return dest
-	rts
-
-
-|
-| Source is within 2^14 range.  To perform the int operation,
-| move it to d0.
-|
-src_in:
-	fmovex		ETEMP(%a6),%fp0	|move in src for int
-	fmovel		#rz_mode,%fpcr	|force rz for src conversion
-	fmovel		%fp0,%d0		|int src to d0
-	fmovel		#0,%FPSR		|clr status from above
-	tstw		ETEMP(%a6)	|check src sign
-	blt		src_neg
-|
-| Source is positive.  Add the src to the dest exponent.
-| The result can be denormalized, if src = 0, or overflow,
-| if the result of the add sets a bit in the upper word.
-|
-src_pos:
-	tstw		%d1		|check for denorm
-	beq		dst_dnrm
-	addl		%d0,%d1		|add src to dest exp
-	beqs		denorm		|if zero, result is denorm
-	cmpil		#0x7fff,%d1	|test for overflow
-	bges		ovfl
-	tstb		L_SCR1(%a6)
-	beqs		spos_pos
-	orw		#0x8000,%d1
-spos_pos:
-	movew		%d1,FPTEMP(%a6)	|result in FPTEMP
-	fmovel		USER_FPCR(%a6),%FPCR
-	fmovex		FPTEMP(%a6),%fp0	|write result to fp0
-	rts
-ovfl:
-	tstb		L_SCR1(%a6)
-	beqs		sovl_pos
-	orw		#0x8000,%d1
-sovl_pos:
-	movew		FPTEMP(%a6),ETEMP(%a6)	|result in ETEMP
-	movel		FPTEMP_HI(%a6),ETEMP_HI(%a6)
-	movel		FPTEMP_LO(%a6),ETEMP_LO(%a6)
-	bra		t_ovfl2
-
-denorm:
-	tstb		L_SCR1(%a6)
-	beqs		den_pos
-	orw		#0x8000,%d1
-den_pos:
-	tstl		FPTEMP_HI(%a6)	|check j bit
-	blts		nden_exit	|if set, not denorm
-	movew		%d1,ETEMP(%a6)	|input expected in ETEMP
-	movel		FPTEMP_HI(%a6),ETEMP_HI(%a6)
-	movel		FPTEMP_LO(%a6),ETEMP_LO(%a6)
-	orl		#unfl_bit,USER_FPSR(%a6)	|set unfl
-	leal		ETEMP(%a6),%a0
-	bra		t_resdnrm
-nden_exit:
-	movew		%d1,FPTEMP(%a6)	|result in FPTEMP
-	fmovel		USER_FPCR(%a6),%FPCR
-	fmovex		FPTEMP(%a6),%fp0	|write result to fp0
-	rts
-
-|
-| Source is negative.  Add the src to the dest exponent.
-| (The result exponent will be reduced).  The result can be
-| denormalized.
-|
-src_neg:
-	addl		%d0,%d1		|add src to dest
-	beqs		denorm		|if zero, result is denorm
-	blts		fix_dnrm	|if negative, result is
-|					;needing denormalization
-	tstb		L_SCR1(%a6)
-	beqs		sneg_pos
-	orw		#0x8000,%d1
-sneg_pos:
-	movew		%d1,FPTEMP(%a6)	|result in FPTEMP
-	fmovel		USER_FPCR(%a6),%FPCR
-	fmovex		FPTEMP(%a6),%fp0	|write result to fp0
-	rts
-
-
-|
-| The result exponent is below denorm value.  Test for catastrophic
-| underflow and force zero if true.  If not, try to shift the
-| mantissa right until a zero exponent exists.
-|
-fix_dnrm:
-	cmpiw		#0xffc0,%d1	|lower bound for normalization
-	blt		fix_unfl	|if lower, catastrophic unfl
-	movew		%d1,%d0		|use d0 for exp
-	movel		%d2,-(%a7)	|free d2 for norm
-	movel		FPTEMP_HI(%a6),%d1
-	movel		FPTEMP_LO(%a6),%d2
-	clrl		L_SCR2(%a6)
-fix_loop:
-	addw		#1,%d0		|drive d0 to 0
-	lsrl		#1,%d1		|while shifting the
-	roxrl		#1,%d2		|mantissa to the right
-	bccs		no_carry
-	st		L_SCR2(%a6)	|use L_SCR2 to capture inex
-no_carry:
-	tstw		%d0		|it is finished when
-	blts		fix_loop	|d0 is zero or the mantissa
-	tstb		L_SCR2(%a6)
-	beqs		tst_zero
-	orl		#unfl_inx_mask,USER_FPSR(%a6)
-|					;set unfl, aunfl, ainex
-|
-| Test for zero. If zero, simply use fmove to return +/- zero
-| to the fpu.
-|
-tst_zero:
-	clrw		FPTEMP_EX(%a6)
-	tstb		L_SCR1(%a6)	|test for sign
-	beqs		tst_con
-	orw		#0x8000,FPTEMP_EX(%a6) |set sign bit
-tst_con:
-	movel		%d1,FPTEMP_HI(%a6)
-	movel		%d2,FPTEMP_LO(%a6)
-	movel		(%a7)+,%d2
-	tstl		%d1
-	bnes		not_zero
-	tstl		FPTEMP_LO(%a6)
-	bnes		not_zero
-|
-| Result is zero.  Check for rounding mode to set lsb.  If the
-| mode is rp, and the zero is positive, return smallest denorm.
-| If the mode is rm, and the zero is negative, return smallest
-| negative denorm.
-|
-	btstb		#5,FPCR_MODE(%a6) |test if rm or rp
-	beqs		no_dir
-	btstb		#4,FPCR_MODE(%a6) |check which one
-	beqs		zer_rm
-zer_rp:
-	tstb		L_SCR1(%a6)	|check sign
-	bnes		no_dir		|if set, neg op, no inc
-	movel		#1,FPTEMP_LO(%a6) |set lsb
-	bras		sm_dnrm
-zer_rm:
-	tstb		L_SCR1(%a6)	|check sign
-	beqs		no_dir		|if clr, neg op, no inc
-	movel		#1,FPTEMP_LO(%a6) |set lsb
-	orl		#neg_mask,USER_FPSR(%a6) |set N
-	bras		sm_dnrm
-no_dir:
-	fmovel		USER_FPCR(%a6),%FPCR
-	fmovex		FPTEMP(%a6),%fp0	|use fmove to set cc's
-	rts
-
-|
-| The rounding mode changed the zero to a smallest denorm. Call
-| t_resdnrm with exceptional operand in ETEMP.
-|
-sm_dnrm:
-	movel		FPTEMP_EX(%a6),ETEMP_EX(%a6)
-	movel		FPTEMP_HI(%a6),ETEMP_HI(%a6)
-	movel		FPTEMP_LO(%a6),ETEMP_LO(%a6)
-	leal		ETEMP(%a6),%a0
-	bra		t_resdnrm
-
-|
-| Result is still denormalized.
-|
-not_zero:
-	orl		#unfl_mask,USER_FPSR(%a6) |set unfl
-	tstb		L_SCR1(%a6)	|check for sign
-	beqs		fix_exit
-	orl		#neg_mask,USER_FPSR(%a6) |set N
-fix_exit:
-	bras		sm_dnrm
-
-
-|
-| The result has underflowed to zero. Return zero and set
-| unfl, aunfl, and ainex.
-|
-fix_unfl:
-	orl		#unfl_inx_mask,USER_FPSR(%a6)
-	btstb		#5,FPCR_MODE(%a6) |test if rm or rp
-	beqs		no_dir2
-	btstb		#4,FPCR_MODE(%a6) |check which one
-	beqs		zer_rm2
-zer_rp2:
-	tstb		L_SCR1(%a6)	|check sign
-	bnes		no_dir2		|if set, neg op, no inc
-	clrl		FPTEMP_EX(%a6)
-	clrl		FPTEMP_HI(%a6)
-	movel		#1,FPTEMP_LO(%a6) |set lsb
-	bras		sm_dnrm		|return smallest denorm
-zer_rm2:
-	tstb		L_SCR1(%a6)	|check sign
-	beqs		no_dir2		|if clr, neg op, no inc
-	movew		#0x8000,FPTEMP_EX(%a6)
-	clrl		FPTEMP_HI(%a6)
-	movel		#1,FPTEMP_LO(%a6) |set lsb
-	orl		#neg_mask,USER_FPSR(%a6) |set N
-	bra		sm_dnrm		|return smallest denorm
-
-no_dir2:
-	tstb		L_SCR1(%a6)
-	bges		pos_zero
-neg_zero:
-	clrl		FP_SCR1(%a6)	|clear the exceptional operand
-	clrl		FP_SCR1+4(%a6)	|for gen_except.
-	clrl		FP_SCR1+8(%a6)
-	fmoves		#0x80000000,%fp0
-	rts
-pos_zero:
-	clrl		FP_SCR1(%a6)	|clear the exceptional operand
-	clrl		FP_SCR1+4(%a6)	|for gen_except.
-	clrl		FP_SCR1+8(%a6)
-	fmoves		#0x00000000,%fp0
-	rts
-
-|
-| The destination is a denormalized number.  It must be handled
-| by first shifting the bits in the mantissa until it is normalized,
-| then adding the remainder of the source to the exponent.
-|
-dst_dnrm:
-	moveml		%d2/%d3,-(%a7)
-	movew		FPTEMP_EX(%a6),%d1
-	movel		FPTEMP_HI(%a6),%d2
-	movel		FPTEMP_LO(%a6),%d3
-dst_loop:
-	tstl		%d2		|test for normalized result
-	blts		dst_norm	|exit loop if so
-	tstl		%d0		|otherwise, test shift count
-	beqs		dst_fin		|if zero, shifting is done
-	subil		#1,%d0		|dec src
-	lsll		#1,%d3
-	roxll		#1,%d2
-	bras		dst_loop
-|
-| Destination became normalized.  Simply add the remaining
-| portion of the src to the exponent.
-|
-dst_norm:
-	addw		%d0,%d1		|dst is normalized; add src
-	tstb		L_SCR1(%a6)
-	beqs		dnrm_pos
-	orl		#0x8000,%d1
-dnrm_pos:
-	movemw		%d1,FPTEMP_EX(%a6)
-	moveml		%d2,FPTEMP_HI(%a6)
-	moveml		%d3,FPTEMP_LO(%a6)
-	fmovel		USER_FPCR(%a6),%FPCR
-	fmovex		FPTEMP(%a6),%fp0
-	moveml		(%a7)+,%d2/%d3
-	rts
-
-|
-| Destination remained denormalized.  Call t_excdnrm with
-| exceptional operand in ETEMP.
-|
-dst_fin:
-	tstb		L_SCR1(%a6)	|check for sign
-	beqs		dst_exit
-	orl		#neg_mask,USER_FPSR(%a6) |set N
-	orl		#0x8000,%d1
-dst_exit:
-	movemw		%d1,ETEMP_EX(%a6)
-	moveml		%d2,ETEMP_HI(%a6)
-	moveml		%d3,ETEMP_LO(%a6)
-	orl		#unfl_mask,USER_FPSR(%a6) |set unfl
-	moveml		(%a7)+,%d2/%d3
-	leal		ETEMP(%a6),%a0
-	bra		t_resdnrm
-
-|
-| Source is outside of 2^14 range.  Test the sign and branch
-| to the appropriate exception handler.
-|
-src_out:
-	tstb		L_SCR1(%a6)
-	beqs		scro_pos
-	orl		#0x8000,%d1
-scro_pos:
-	movel		FPTEMP_HI(%a6),ETEMP_HI(%a6)
-	movel		FPTEMP_LO(%a6),ETEMP_LO(%a6)
-	tstw		ETEMP(%a6)
-	blts		res_neg
-res_pos:
-	movew		%d1,ETEMP(%a6)	|result in ETEMP
-	bra		t_ovfl2
-res_neg:
-	movew		%d1,ETEMP(%a6)	|result in ETEMP
-	leal		ETEMP(%a6),%a0
-	bra		t_unfl
-	|end
diff --git a/arch/m68k/fpsp040/scosh.S b/arch/m68k/fpsp040/scosh.S
deleted file mode 100644
index 07d3a4d7c86d5c7fb4c1527c47b18c04c7770125..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/scosh.S
+++ /dev/null
@@ -1,131 +0,0 @@
-|
-|	scosh.sa 3.1 12/10/90
-|
-|	The entry point sCosh computes the hyperbolic cosine of
-|	an input argument; sCoshd does the same except for denormalized
-|	input.
-|
-|	Input: Double-extended number X in location pointed to
-|		by address register a0.
-|
-|	Output: The value cosh(X) returned in floating-point register Fp0.
-|
-|	Accuracy and Monotonicity: The returned result is within 3 ulps in
-|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
-|		result is subsequently rounded to double precision. The
-|		result is provably monotonic in double precision.
-|
-|	Speed: The program sCOSH takes approximately 250 cycles.
-|
-|	Algorithm:
-|
-|	COSH
-|	1. If |X| > 16380 log2, go to 3.
-|
-|	2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae
-|		y = |X|, z = exp(Y), and
-|		cosh(X) = (1/2)*( z + 1/z ).
-|		Exit.
-|
-|	3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.
-|
-|	4. (16380 log2 < |X| <= 16480 log2)
-|		cosh(X) = sign(X) * exp(|X|)/2.
-|		However, invoking exp(|X|) may cause premature overflow.
-|		Thus, we calculate sinh(X) as follows:
-|		Y	:= |X|
-|		Fact	:=	2**(16380)
-|		Y'	:= Y - 16381 log2
-|		cosh(X) := Fact * exp(Y').
-|		Exit.
-|
-|	5. (|X| > 16480 log2) sinh(X) must overflow. Return
-|		Huge*Huge to generate overflow and an infinity with
-|		the appropriate sign. Huge is the largest finite number in
-|		extended format. Exit.
-|
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|SCOSH	idnt	2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-	|xref	t_ovfl
-	|xref	t_frcinx
-	|xref	setox
-
-T1:	.long 0x40C62D38,0xD3D64634 | ... 16381 LOG2 LEAD
-T2:	.long 0x3D6F90AE,0xB1E75CC7 | ... 16381 LOG2 TRAIL
-
-TWO16380: .long 0x7FFB0000,0x80000000,0x00000000,0x00000000
-
-	.global	scoshd
-scoshd:
-|--COSH(X) = 1 FOR DENORMALIZED X
-
-	fmoves		#0x3F800000,%fp0
-
-	fmovel		%d1,%FPCR
-	fadds		#0x00800000,%fp0
-	bra		t_frcinx
-
-	.global	scosh
-scosh:
-	fmovex		(%a0),%fp0	| ...LOAD INPUT
-
-	movel		(%a0),%d0
-	movew		4(%a0),%d0
-	andil		#0x7FFFFFFF,%d0
-	cmpil		#0x400CB167,%d0
-	bgts		COSHBIG
-
-|--THIS IS THE USUAL CASE, |X| < 16380 LOG2
-|--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
-
-	fabsx		%fp0		| ...|X|
-
-	movel		%d1,-(%sp)
-	clrl		%d1
-	fmovemx	%fp0-%fp0,(%a0)	|pass parameter to setox
-	bsr		setox		| ...FP0 IS EXP(|X|)
-	fmuls		#0x3F000000,%fp0	| ...(1/2)EXP(|X|)
-	movel		(%sp)+,%d1
-
-	fmoves		#0x3E800000,%fp1	| ...(1/4)
-	fdivx		%fp0,%fp1		| ...1/(2 EXP(|X|))
-
-	fmovel		%d1,%FPCR
-	faddx		%fp1,%fp0
-
-	bra		t_frcinx
-
-COSHBIG:
-	cmpil		#0x400CB2B3,%d0
-	bgts		COSHHUGE
-
-	fabsx		%fp0
-	fsubd		T1(%pc),%fp0		| ...(|X|-16381LOG2_LEAD)
-	fsubd		T2(%pc),%fp0		| ...|X| - 16381 LOG2, ACCURATE
-
-	movel		%d1,-(%sp)
-	clrl		%d1
-	fmovemx	%fp0-%fp0,(%a0)
-	bsr		setox
-	fmovel		(%sp)+,%fpcr
-
-	fmulx		TWO16380(%pc),%fp0
-	bra		t_frcinx
-
-COSHHUGE:
-	fmovel		#0,%fpsr		|clr N bit if set by source
-	bclrb		#7,(%a0)		|always return positive value
-	fmovemx	(%a0),%fp0-%fp0
-	bra		t_ovfl
-
-	|end
diff --git a/arch/m68k/fpsp040/setox.S b/arch/m68k/fpsp040/setox.S
deleted file mode 100644
index f1acf7e36d6b826c77c167efdefaa5729b140977..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/setox.S
+++ /dev/null
@@ -1,864 +0,0 @@
-|
-|	setox.sa 3.1 12/10/90
-|
-|	The entry point setox computes the exponential of a value.
-|	setoxd does the same except the input value is a denormalized
-|	number.	setoxm1 computes exp(X)-1, and setoxm1d computes
-|	exp(X)-1 for denormalized X.
-|
-|	INPUT
-|	-----
-|	Double-extended value in memory location pointed to by address
-|	register a0.
-|
-|	OUTPUT
-|	------
-|	exp(X) or exp(X)-1 returned in floating-point register fp0.
-|
-|	ACCURACY and MONOTONICITY
-|	-------------------------
-|	The returned result is within 0.85 ulps in 64 significant bit, i.e.
-|	within 0.5001 ulp to 53 bits if the result is subsequently rounded
-|	to double precision. The result is provably monotonic in double
-|	precision.
-|
-|	SPEED
-|	-----
-|	Two timings are measured, both in the copy-back mode. The
-|	first one is measured when the function is invoked the first time
-|	(so the instructions and data are not in cache), and the
-|	second one is measured when the function is reinvoked at the same
-|	input argument.
-|
-|	The program setox takes approximately 210/190 cycles for input
-|	argument X whose magnitude is less than 16380 log2, which
-|	is the usual situation.	For the less common arguments,
-|	depending on their values, the program may run faster or slower --
-|	but no worse than 10% slower even in the extreme cases.
-|
-|	The program setoxm1 takes approximately ??? / ??? cycles for input
-|	argument X, 0.25 <= |X| < 70log2. For |X| < 0.25, it takes
-|	approximately ??? / ??? cycles. For the less common arguments,
-|	depending on their values, the program may run faster or slower --
-|	but no worse than 10% slower even in the extreme cases.
-|
-|	ALGORITHM and IMPLEMENTATION NOTES
-|	----------------------------------
-|
-|	setoxd
-|	------
-|	Step 1.	Set ans := 1.0
-|
-|	Step 2.	Return	ans := ans + sign(X)*2^(-126). Exit.
-|	Notes:	This will always generate one exception -- inexact.
-|
-|
-|	setox
-|	-----
-|
-|	Step 1.	Filter out extreme cases of input argument.
-|		1.1	If |X| >= 2^(-65), go to Step 1.3.
-|		1.2	Go to Step 7.
-|		1.3	If |X| < 16380 log(2), go to Step 2.
-|		1.4	Go to Step 8.
-|	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.
-|		 To avoid the use of floating-point comparisons, a
-|		 compact representation of |X| is used. This format is a
-|		 32-bit integer, the upper (more significant) 16 bits are
-|		 the sign and biased exponent field of |X|; the lower 16
-|		 bits are the 16 most significant fraction (including the
-|		 explicit bit) bits of |X|. Consequently, the comparisons
-|		 in Steps 1.1 and 1.3 can be performed by integer comparison.
-|		 Note also that the constant 16380 log(2) used in Step 1.3
-|		 is also in the compact form. Thus taking the branch
-|		 to Step 2 guarantees |X| < 16380 log(2). There is no harm
-|		 to have a small number of cases where |X| is less than,
-|		 but close to, 16380 log(2) and the branch to Step 9 is
-|		 taken.
-|
-|	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).
-|		2.1	Set AdjFlag := 0 (indicates the branch 1.3 -> 2 was taken)
-|		2.2	N := round-to-nearest-integer( X * 64/log2 ).
-|		2.3	Calculate	J = N mod 64; so J = 0,1,2,..., or 63.
-|		2.4	Calculate	M = (N - J)/64; so N = 64M + J.
-|		2.5	Calculate the address of the stored value of 2^(J/64).
-|		2.6	Create the value Scale = 2^M.
-|	Notes:	The calculation in 2.2 is really performed by
-|
-|			Z := X * constant
-|			N := round-to-nearest-integer(Z)
-|
-|		 where
-|
-|			constant := single-precision( 64/log 2 ).
-|
-|		 Using a single-precision constant avoids memory access.
-|		 Another effect of using a single-precision "constant" is
-|		 that the calculated value Z is
-|
-|			Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).
-|
-|		 This error has to be considered later in Steps 3 and 4.
-|
-|	Step 3.	Calculate X - N*log2/64.
-|		3.1	R := X + N*L1, where L1 := single-precision(-log2/64).
-|		3.2	R := R + N*L2, L2 := extended-precision(-log2/64 - L1).
-|	Notes:	a) The way L1 and L2 are chosen ensures L1+L2 approximate
-|		 the value	-log2/64	to 88 bits of accuracy.
-|		 b) N*L1 is exact because N is no longer than 22 bits and
-|		 L1 is no longer than 24 bits.
-|		 c) The calculation X+N*L1 is also exact due to cancellation.
-|		 Thus, R is practically X+N(L1+L2) to full 64 bits.
-|		 d) It is important to estimate how large can |R| be after
-|		 Step 3.2.
-|
-|			N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)
-|			X*64/log2 (1+eps)	=	N + f,	|f| <= 0.5
-|			X*64/log2 - N	=	f - eps*X 64/log2
-|			X - N*log2/64	=	f*log2/64 - eps*X
-|
-|
-|		 Now |X| <= 16446 log2, thus
-|
-|			|X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64
-|					<= 0.57 log2/64.
-|		 This bound will be used in Step 4.
-|
-|	Step 4.	Approximate exp(R)-1 by a polynomial
-|			p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
-|	Notes:	a) In order to reduce memory access, the coefficients are
-|		 made as "short" as possible: A1 (which is 1/2), A4 and A5
-|		 are single precision; A2 and A3 are double precision.
-|		 b) Even with the restrictions above,
-|			|p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.
-|		 Note that 0.0062 is slightly bigger than 0.57 log2/64.
-|		 c) To fully utilize the pipeline, p is separated into
-|		 two independent pieces of roughly equal complexities
-|			p = [ R + R*S*(A2 + S*A4) ]	+
-|				[ S*(A1 + S*(A3 + S*A5)) ]
-|		 where S = R*R.
-|
-|	Step 5.	Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by
-|				ans := T + ( T*p + t)
-|		 where T and t are the stored values for 2^(J/64).
-|	Notes:	2^(J/64) is stored as T and t where T+t approximates
-|		 2^(J/64) to roughly 85 bits; T is in extended precision
-|		 and t is in single precision. Note also that T is rounded
-|		 to 62 bits so that the last two bits of T are zero. The
-|		 reason for such a special form is that T-1, T-2, and T-8
-|		 will all be exact --- a property that will give much
-|		 more accurate computation of the function EXPM1.
-|
-|	Step 6.	Reconstruction of exp(X)
-|			exp(X) = 2^M * 2^(J/64) * exp(R).
-|		6.1	If AdjFlag = 0, go to 6.3
-|		6.2	ans := ans * AdjScale
-|		6.3	Restore the user FPCR
-|		6.4	Return ans := ans * Scale. Exit.
-|	Notes:	If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,
-|		 |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will
-|		 neither overflow nor underflow. If AdjFlag = 1, that
-|		 means that
-|			X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380.
-|		 Hence, exp(X) may overflow or underflow or neither.
-|		 When that is the case, AdjScale = 2^(M1) where M1 is
-|		 approximately M. Thus 6.2 will never cause over/underflow.
-|		 Possible exception in 6.4 is overflow or underflow.
-|		 The inexact exception is not generated in 6.4. Although
-|		 one can argue that the inexact flag should always be
-|		 raised, to simulate that exception cost to much than the
-|		 flag is worth in practical uses.
-|
-|	Step 7.	Return 1 + X.
-|		7.1	ans := X
-|		7.2	Restore user FPCR.
-|		7.3	Return ans := 1 + ans. Exit
-|	Notes:	For non-zero X, the inexact exception will always be
-|		 raised by 7.3. That is the only exception raised by 7.3.
-|		 Note also that we use the FMOVEM instruction to move X
-|		 in Step 7.1 to avoid unnecessary trapping. (Although
-|		 the FMOVEM may not seem relevant since X is normalized,
-|		 the precaution will be useful in the library version of
-|		 this code where the separate entry for denormalized inputs
-|		 will be done away with.)
-|
-|	Step 8.	Handle exp(X) where |X| >= 16380log2.
-|		8.1	If |X| > 16480 log2, go to Step 9.
-|		(mimic 2.2 - 2.6)
-|		8.2	N := round-to-integer( X * 64/log2 )
-|		8.3	Calculate J = N mod 64, J = 0,1,...,63
-|		8.4	K := (N-J)/64, M1 := truncate(K/2), M = K-M1, AdjFlag := 1.
-|		8.5	Calculate the address of the stored value 2^(J/64).
-|		8.6	Create the values Scale = 2^M, AdjScale = 2^M1.
-|		8.7	Go to Step 3.
-|	Notes:	Refer to notes for 2.2 - 2.6.
-|
-|	Step 9.	Handle exp(X), |X| > 16480 log2.
-|		9.1	If X < 0, go to 9.3
-|		9.2	ans := Huge, go to 9.4
-|		9.3	ans := Tiny.
-|		9.4	Restore user FPCR.
-|		9.5	Return ans := ans * ans. Exit.
-|	Notes:	Exp(X) will surely overflow or underflow, depending on
-|		 X's sign. "Huge" and "Tiny" are respectively large/tiny
-|		 extended-precision numbers whose square over/underflow
-|		 with an inexact result. Thus, 9.5 always raises the
-|		 inexact together with either overflow or underflow.
-|
-|
-|	setoxm1d
-|	--------
-|
-|	Step 1.	Set ans := 0
-|
-|	Step 2.	Return	ans := X + ans. Exit.
-|	Notes:	This will return X with the appropriate rounding
-|		 precision prescribed by the user FPCR.
-|
-|	setoxm1
-|	-------
-|
-|	Step 1.	Check |X|
-|		1.1	If |X| >= 1/4, go to Step 1.3.
-|		1.2	Go to Step 7.
-|		1.3	If |X| < 70 log(2), go to Step 2.
-|		1.4	Go to Step 10.
-|	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.
-|		 However, it is conceivable |X| can be small very often
-|		 because EXPM1 is intended to evaluate exp(X)-1 accurately
-|		 when |X| is small. For further details on the comparisons,
-|		 see the notes on Step 1 of setox.
-|
-|	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).
-|		2.1	N := round-to-nearest-integer( X * 64/log2 ).
-|		2.2	Calculate	J = N mod 64; so J = 0,1,2,..., or 63.
-|		2.3	Calculate	M = (N - J)/64; so N = 64M + J.
-|		2.4	Calculate the address of the stored value of 2^(J/64).
-|		2.5	Create the values Sc = 2^M and OnebySc := -2^(-M).
-|	Notes:	See the notes on Step 2 of setox.
-|
-|	Step 3.	Calculate X - N*log2/64.
-|		3.1	R := X + N*L1, where L1 := single-precision(-log2/64).
-|		3.2	R := R + N*L2, L2 := extended-precision(-log2/64 - L1).
-|	Notes:	Applying the analysis of Step 3 of setox in this case
-|		 shows that |R| <= 0.0055 (note that |X| <= 70 log2 in
-|		 this case).
-|
-|	Step 4.	Approximate exp(R)-1 by a polynomial
-|			p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6)))))
-|	Notes:	a) In order to reduce memory access, the coefficients are
-|		 made as "short" as possible: A1 (which is 1/2), A5 and A6
-|		 are single precision; A2, A3 and A4 are double precision.
-|		 b) Even with the restriction above,
-|			|p - (exp(R)-1)| <	|R| * 2^(-72.7)
-|		 for all |R| <= 0.0055.
-|		 c) To fully utilize the pipeline, p is separated into
-|		 two independent pieces of roughly equal complexity
-|			p = [ R*S*(A2 + S*(A4 + S*A6)) ]	+
-|				[ R + S*(A1 + S*(A3 + S*A5)) ]
-|		 where S = R*R.
-|
-|	Step 5.	Compute 2^(J/64)*p by
-|				p := T*p
-|		 where T and t are the stored values for 2^(J/64).
-|	Notes:	2^(J/64) is stored as T and t where T+t approximates
-|		 2^(J/64) to roughly 85 bits; T is in extended precision
-|		 and t is in single precision. Note also that T is rounded
-|		 to 62 bits so that the last two bits of T are zero. The
-|		 reason for such a special form is that T-1, T-2, and T-8
-|		 will all be exact --- a property that will be exploited
-|		 in Step 6 below. The total relative error in p is no
-|		 bigger than 2^(-67.7) compared to the final result.
-|
-|	Step 6.	Reconstruction of exp(X)-1
-|			exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).
-|		6.1	If M <= 63, go to Step 6.3.
-|		6.2	ans := T + (p + (t + OnebySc)). Go to 6.6
-|		6.3	If M >= -3, go to 6.5.
-|		6.4	ans := (T + (p + t)) + OnebySc. Go to 6.6
-|		6.5	ans := (T + OnebySc) + (p + t).
-|		6.6	Restore user FPCR.
-|		6.7	Return ans := Sc * ans. Exit.
-|	Notes:	The various arrangements of the expressions give accurate
-|		 evaluations.
-|
-|	Step 7.	exp(X)-1 for |X| < 1/4.
-|		7.1	If |X| >= 2^(-65), go to Step 9.
-|		7.2	Go to Step 8.
-|
-|	Step 8.	Calculate exp(X)-1, |X| < 2^(-65).
-|		8.1	If |X| < 2^(-16312), goto 8.3
-|		8.2	Restore FPCR; return ans := X - 2^(-16382). Exit.
-|		8.3	X := X * 2^(140).
-|		8.4	Restore FPCR; ans := ans - 2^(-16382).
-|		 Return ans := ans*2^(140). Exit
-|	Notes:	The idea is to return "X - tiny" under the user
-|		 precision and rounding modes. To avoid unnecessary
-|		 inefficiency, we stay away from denormalized numbers the
-|		 best we can. For |X| >= 2^(-16312), the straightforward
-|		 8.2 generates the inexact exception as the case warrants.
-|
-|	Step 9.	Calculate exp(X)-1, |X| < 1/4, by a polynomial
-|			p = X + X*X*(B1 + X*(B2 + ... + X*B12))
-|	Notes:	a) In order to reduce memory access, the coefficients are
-|		 made as "short" as possible: B1 (which is 1/2), B9 to B12
-|		 are single precision; B3 to B8 are double precision; and
-|		 B2 is double extended.
-|		 b) Even with the restriction above,
-|			|p - (exp(X)-1)| < |X| 2^(-70.6)
-|		 for all |X| <= 0.251.
-|		 Note that 0.251 is slightly bigger than 1/4.
-|		 c) To fully preserve accuracy, the polynomial is computed
-|		 as	X + ( S*B1 +	Q ) where S = X*X and
-|			Q	=	X*S*(B2 + X*(B3 + ... + X*B12))
-|		 d) To fully utilize the pipeline, Q is separated into
-|		 two independent pieces of roughly equal complexity
-|			Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +
-|				[ S*S*(B3 + S*(B5 + ... + S*B11)) ]
-|
-|	Step 10.	Calculate exp(X)-1 for |X| >= 70 log 2.
-|		10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all practical
-|		 purposes. Therefore, go to Step 1 of setox.
-|		10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical purposes.
-|		 ans := -1
-|		 Restore user FPCR
-|		 Return ans := ans + 2^(-126). Exit.
-|	Notes:	10.2 will always create an inexact and return -1 + tiny
-|		 in the user rounding precision and mode.
-|
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|setox	idnt	2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-L2:	.long	0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
-
-EXPA3:	.long	0x3FA55555,0x55554431
-EXPA2:	.long	0x3FC55555,0x55554018
-
-HUGE:	.long	0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
-TINY:	.long	0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
-
-EM1A4:	.long	0x3F811111,0x11174385
-EM1A3:	.long	0x3FA55555,0x55554F5A
-
-EM1A2:	.long	0x3FC55555,0x55555555,0x00000000,0x00000000
-
-EM1B8:	.long	0x3EC71DE3,0xA5774682
-EM1B7:	.long	0x3EFA01A0,0x19D7CB68
-
-EM1B6:	.long	0x3F2A01A0,0x1A019DF3
-EM1B5:	.long	0x3F56C16C,0x16C170E2
-
-EM1B4:	.long	0x3F811111,0x11111111
-EM1B3:	.long	0x3FA55555,0x55555555
-
-EM1B2:	.long	0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
-	.long	0x00000000
-
-TWO140:	.long	0x48B00000,0x00000000
-TWON140:	.long	0x37300000,0x00000000
-
-EXPTBL:
-	.long	0x3FFF0000,0x80000000,0x00000000,0x00000000
-	.long	0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
-	.long	0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
-	.long	0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
-	.long	0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
-	.long	0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
-	.long	0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
-	.long	0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
-	.long	0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
-	.long	0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
-	.long	0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
-	.long	0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
-	.long	0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
-	.long	0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
-	.long	0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
-	.long	0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
-	.long	0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
-	.long	0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
-	.long	0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
-	.long	0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
-	.long	0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
-	.long	0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
-	.long	0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
-	.long	0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
-	.long	0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
-	.long	0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
-	.long	0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
-	.long	0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
-	.long	0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
-	.long	0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
-	.long	0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
-	.long	0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
-	.long	0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
-	.long	0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
-	.long	0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
-	.long	0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
-	.long	0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
-	.long	0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
-	.long	0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
-	.long	0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
-	.long	0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
-	.long	0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
-	.long	0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
-	.long	0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
-	.long	0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
-	.long	0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
-	.long	0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
-	.long	0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
-	.long	0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
-	.long	0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
-	.long	0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
-	.long	0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
-	.long	0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
-	.long	0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
-	.long	0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
-	.long	0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
-	.long	0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
-	.long	0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
-	.long	0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
-	.long	0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
-	.long	0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
-	.long	0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
-	.long	0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
-	.long	0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
-
-	.set	ADJFLAG,L_SCR2
-	.set	SCALE,FP_SCR1
-	.set	ADJSCALE,FP_SCR2
-	.set	SC,FP_SCR3
-	.set	ONEBYSC,FP_SCR4
-
-	| xref	t_frcinx
-	|xref	t_extdnrm
-	|xref	t_unfl
-	|xref	t_ovfl
-
-	.global	setoxd
-setoxd:
-|--entry point for EXP(X), X is denormalized
-	movel		(%a0),%d0
-	andil		#0x80000000,%d0
-	oril		#0x00800000,%d0		| ...sign(X)*2^(-126)
-	movel		%d0,-(%sp)
-	fmoves		#0x3F800000,%fp0
-	fmovel		%d1,%fpcr
-	fadds		(%sp)+,%fp0
-	bra		t_frcinx
-
-	.global	setox
-setox:
-|--entry point for EXP(X), here X is finite, non-zero, and not NaN's
-
-|--Step 1.
-	movel		(%a0),%d0	 | ...load part of input X
-	andil		#0x7FFF0000,%d0	| ...biased expo. of X
-	cmpil		#0x3FBE0000,%d0	| ...2^(-65)
-	bges		EXPC1		| ...normal case
-	bra		EXPSM
-
-EXPC1:
-|--The case |X| >= 2^(-65)
-	movew		4(%a0),%d0	| ...expo. and partial sig. of |X|
-	cmpil		#0x400CB167,%d0	| ...16380 log2 trunc. 16 bits
-	blts		EXPMAIN	 | ...normal case
-	bra		EXPBIG
-
-EXPMAIN:
-|--Step 2.
-|--This is the normal branch:	2^(-65) <= |X| < 16380 log2.
-	fmovex		(%a0),%fp0	| ...load input from (a0)
-
-	fmovex		%fp0,%fp1
-	fmuls		#0x42B8AA3B,%fp0	| ...64/log2 * X
-	fmovemx	%fp2-%fp2/%fp3,-(%a7)		| ...save fp2
-	movel		#0,ADJFLAG(%a6)
-	fmovel		%fp0,%d0		| ...N = int( X * 64/log2 )
-	lea		EXPTBL,%a1
-	fmovel		%d0,%fp0		| ...convert to floating-format
-
-	movel		%d0,L_SCR1(%a6)	| ...save N temporarily
-	andil		#0x3F,%d0		| ...D0 is J = N mod 64
-	lsll		#4,%d0
-	addal		%d0,%a1		| ...address of 2^(J/64)
-	movel		L_SCR1(%a6),%d0
-	asrl		#6,%d0		| ...D0 is M
-	addiw		#0x3FFF,%d0	| ...biased expo. of 2^(M)
-	movew		L2,L_SCR1(%a6)	| ...prefetch L2, no need in CB
-
-EXPCONT1:
-|--Step 3.
-|--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
-|--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
-	fmovex		%fp0,%fp2
-	fmuls		#0xBC317218,%fp0	| ...N * L1, L1 = lead(-log2/64)
-	fmulx		L2,%fp2		| ...N * L2, L1+L2 = -log2/64
-	faddx		%fp1,%fp0		| ...X + N*L1
-	faddx		%fp2,%fp0		| ...fp0 is R, reduced arg.
-|	MOVE.W		#$3FA5,EXPA3	...load EXPA3 in cache
-
-|--Step 4.
-|--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
-|-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
-|--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
-|--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
-
-	fmovex		%fp0,%fp1
-	fmulx		%fp1,%fp1		| ...fp1 IS S = R*R
-
-	fmoves		#0x3AB60B70,%fp2	| ...fp2 IS A5
-|	MOVE.W		#0,2(%a1)	...load 2^(J/64) in cache
-
-	fmulx		%fp1,%fp2		| ...fp2 IS S*A5
-	fmovex		%fp1,%fp3
-	fmuls		#0x3C088895,%fp3	| ...fp3 IS S*A4
-
-	faddd		EXPA3,%fp2	| ...fp2 IS A3+S*A5
-	faddd		EXPA2,%fp3	| ...fp3 IS A2+S*A4
-
-	fmulx		%fp1,%fp2		| ...fp2 IS S*(A3+S*A5)
-	movew		%d0,SCALE(%a6)	| ...SCALE is 2^(M) in extended
-	clrw		SCALE+2(%a6)
-	movel		#0x80000000,SCALE+4(%a6)
-	clrl		SCALE+8(%a6)
-
-	fmulx		%fp1,%fp3		| ...fp3 IS S*(A2+S*A4)
-
-	fadds		#0x3F000000,%fp2	| ...fp2 IS A1+S*(A3+S*A5)
-	fmulx		%fp0,%fp3		| ...fp3 IS R*S*(A2+S*A4)
-
-	fmulx		%fp1,%fp2		| ...fp2 IS S*(A1+S*(A3+S*A5))
-	faddx		%fp3,%fp0		| ...fp0 IS R+R*S*(A2+S*A4),
-|					...fp3 released
-
-	fmovex		(%a1)+,%fp1	| ...fp1 is lead. pt. of 2^(J/64)
-	faddx		%fp2,%fp0		| ...fp0 is EXP(R) - 1
-|					...fp2 released
-
-|--Step 5
-|--final reconstruction process
-|--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
-
-	fmulx		%fp1,%fp0		| ...2^(J/64)*(Exp(R)-1)
-	fmovemx	(%a7)+,%fp2-%fp2/%fp3	| ...fp2 restored
-	fadds		(%a1),%fp0	| ...accurate 2^(J/64)
-
-	faddx		%fp1,%fp0		| ...2^(J/64) + 2^(J/64)*...
-	movel		ADJFLAG(%a6),%d0
-
-|--Step 6
-	tstl		%d0
-	beqs		NORMAL
-ADJUST:
-	fmulx		ADJSCALE(%a6),%fp0
-NORMAL:
-	fmovel		%d1,%FPCR		| ...restore user FPCR
-	fmulx		SCALE(%a6),%fp0	| ...multiply 2^(M)
-	bra		t_frcinx
-
-EXPSM:
-|--Step 7
-	fmovemx	(%a0),%fp0-%fp0	| ...in case X is denormalized
-	fmovel		%d1,%FPCR
-	fadds		#0x3F800000,%fp0	| ...1+X in user mode
-	bra		t_frcinx
-
-EXPBIG:
-|--Step 8
-	cmpil		#0x400CB27C,%d0	| ...16480 log2
-	bgts		EXP2BIG
-|--Steps 8.2 -- 8.6
-	fmovex		(%a0),%fp0	| ...load input from (a0)
-
-	fmovex		%fp0,%fp1
-	fmuls		#0x42B8AA3B,%fp0	| ...64/log2 * X
-	fmovemx	 %fp2-%fp2/%fp3,-(%a7)		| ...save fp2
-	movel		#1,ADJFLAG(%a6)
-	fmovel		%fp0,%d0		| ...N = int( X * 64/log2 )
-	lea		EXPTBL,%a1
-	fmovel		%d0,%fp0		| ...convert to floating-format
-	movel		%d0,L_SCR1(%a6)			| ...save N temporarily
-	andil		#0x3F,%d0		 | ...D0 is J = N mod 64
-	lsll		#4,%d0
-	addal		%d0,%a1			| ...address of 2^(J/64)
-	movel		L_SCR1(%a6),%d0
-	asrl		#6,%d0			| ...D0 is K
-	movel		%d0,L_SCR1(%a6)			| ...save K temporarily
-	asrl		#1,%d0			| ...D0 is M1
-	subl		%d0,L_SCR1(%a6)			| ...a1 is M
-	addiw		#0x3FFF,%d0		| ...biased expo. of 2^(M1)
-	movew		%d0,ADJSCALE(%a6)		| ...ADJSCALE := 2^(M1)
-	clrw		ADJSCALE+2(%a6)
-	movel		#0x80000000,ADJSCALE+4(%a6)
-	clrl		ADJSCALE+8(%a6)
-	movel		L_SCR1(%a6),%d0			| ...D0 is M
-	addiw		#0x3FFF,%d0		| ...biased expo. of 2^(M)
-	bra		EXPCONT1		| ...go back to Step 3
-
-EXP2BIG:
-|--Step 9
-	fmovel		%d1,%FPCR
-	movel		(%a0),%d0
-	bclrb		#sign_bit,(%a0)		| ...setox always returns positive
-	cmpil		#0,%d0
-	blt		t_unfl
-	bra		t_ovfl
-
-	.global	setoxm1d
-setoxm1d:
-|--entry point for EXPM1(X), here X is denormalized
-|--Step 0.
-	bra		t_extdnrm
-
-
-	.global	setoxm1
-setoxm1:
-|--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
-
-|--Step 1.
-|--Step 1.1
-	movel		(%a0),%d0	 | ...load part of input X
-	andil		#0x7FFF0000,%d0	| ...biased expo. of X
-	cmpil		#0x3FFD0000,%d0	| ...1/4
-	bges		EM1CON1	 | ...|X| >= 1/4
-	bra		EM1SM
-
-EM1CON1:
-|--Step 1.3
-|--The case |X| >= 1/4
-	movew		4(%a0),%d0	| ...expo. and partial sig. of |X|
-	cmpil		#0x4004C215,%d0	| ...70log2 rounded up to 16 bits
-	bles		EM1MAIN	 | ...1/4 <= |X| <= 70log2
-	bra		EM1BIG
-
-EM1MAIN:
-|--Step 2.
-|--This is the case:	1/4 <= |X| <= 70 log2.
-	fmovex		(%a0),%fp0	| ...load input from (a0)
-
-	fmovex		%fp0,%fp1
-	fmuls		#0x42B8AA3B,%fp0	| ...64/log2 * X
-	fmovemx	%fp2-%fp2/%fp3,-(%a7)		| ...save fp2
-|	MOVE.W		#$3F81,EM1A4		...prefetch in CB mode
-	fmovel		%fp0,%d0		| ...N = int( X * 64/log2 )
-	lea		EXPTBL,%a1
-	fmovel		%d0,%fp0		| ...convert to floating-format
-
-	movel		%d0,L_SCR1(%a6)			| ...save N temporarily
-	andil		#0x3F,%d0		 | ...D0 is J = N mod 64
-	lsll		#4,%d0
-	addal		%d0,%a1			| ...address of 2^(J/64)
-	movel		L_SCR1(%a6),%d0
-	asrl		#6,%d0			| ...D0 is M
-	movel		%d0,L_SCR1(%a6)			| ...save a copy of M
-|	MOVE.W		#$3FDC,L2		...prefetch L2 in CB mode
-
-|--Step 3.
-|--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
-|--a0 points to 2^(J/64), D0 and a1 both contain M
-	fmovex		%fp0,%fp2
-	fmuls		#0xBC317218,%fp0	| ...N * L1, L1 = lead(-log2/64)
-	fmulx		L2,%fp2		| ...N * L2, L1+L2 = -log2/64
-	faddx		%fp1,%fp0	 | ...X + N*L1
-	faddx		%fp2,%fp0	 | ...fp0 is R, reduced arg.
-|	MOVE.W		#$3FC5,EM1A2		...load EM1A2 in cache
-	addiw		#0x3FFF,%d0		| ...D0 is biased expo. of 2^M
-
-|--Step 4.
-|--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
-|-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
-|--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
-|--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
-
-	fmovex		%fp0,%fp1
-	fmulx		%fp1,%fp1		| ...fp1 IS S = R*R
-
-	fmoves		#0x3950097B,%fp2	| ...fp2 IS a6
-|	MOVE.W		#0,2(%a1)	...load 2^(J/64) in cache
-
-	fmulx		%fp1,%fp2		| ...fp2 IS S*A6
-	fmovex		%fp1,%fp3
-	fmuls		#0x3AB60B6A,%fp3	| ...fp3 IS S*A5
-
-	faddd		EM1A4,%fp2	| ...fp2 IS A4+S*A6
-	faddd		EM1A3,%fp3	| ...fp3 IS A3+S*A5
-	movew		%d0,SC(%a6)		| ...SC is 2^(M) in extended
-	clrw		SC+2(%a6)
-	movel		#0x80000000,SC+4(%a6)
-	clrl		SC+8(%a6)
-
-	fmulx		%fp1,%fp2		| ...fp2 IS S*(A4+S*A6)
-	movel		L_SCR1(%a6),%d0		| ...D0 is	M
-	negw		%d0		| ...D0 is -M
-	fmulx		%fp1,%fp3		| ...fp3 IS S*(A3+S*A5)
-	addiw		#0x3FFF,%d0	| ...biased expo. of 2^(-M)
-	faddd		EM1A2,%fp2	| ...fp2 IS A2+S*(A4+S*A6)
-	fadds		#0x3F000000,%fp3	| ...fp3 IS A1+S*(A3+S*A5)
-
-	fmulx		%fp1,%fp2		| ...fp2 IS S*(A2+S*(A4+S*A6))
-	oriw		#0x8000,%d0	| ...signed/expo. of -2^(-M)
-	movew		%d0,ONEBYSC(%a6)	| ...OnebySc is -2^(-M)
-	clrw		ONEBYSC+2(%a6)
-	movel		#0x80000000,ONEBYSC+4(%a6)
-	clrl		ONEBYSC+8(%a6)
-	fmulx		%fp3,%fp1		| ...fp1 IS S*(A1+S*(A3+S*A5))
-|					...fp3 released
-
-	fmulx		%fp0,%fp2		| ...fp2 IS R*S*(A2+S*(A4+S*A6))
-	faddx		%fp1,%fp0		| ...fp0 IS R+S*(A1+S*(A3+S*A5))
-|					...fp1 released
-
-	faddx		%fp2,%fp0		| ...fp0 IS EXP(R)-1
-|					...fp2 released
-	fmovemx	(%a7)+,%fp2-%fp2/%fp3	| ...fp2 restored
-
-|--Step 5
-|--Compute 2^(J/64)*p
-
-	fmulx		(%a1),%fp0	| ...2^(J/64)*(Exp(R)-1)
-
-|--Step 6
-|--Step 6.1
-	movel		L_SCR1(%a6),%d0		| ...retrieve M
-	cmpil		#63,%d0
-	bles		MLE63
-|--Step 6.2	M >= 64
-	fmoves		12(%a1),%fp1	| ...fp1 is t
-	faddx		ONEBYSC(%a6),%fp1	| ...fp1 is t+OnebySc
-	faddx		%fp1,%fp0		| ...p+(t+OnebySc), fp1 released
-	faddx		(%a1),%fp0	| ...T+(p+(t+OnebySc))
-	bras		EM1SCALE
-MLE63:
-|--Step 6.3	M <= 63
-	cmpil		#-3,%d0
-	bges		MGEN3
-MLTN3:
-|--Step 6.4	M <= -4
-	fadds		12(%a1),%fp0	| ...p+t
-	faddx		(%a1),%fp0	| ...T+(p+t)
-	faddx		ONEBYSC(%a6),%fp0	| ...OnebySc + (T+(p+t))
-	bras		EM1SCALE
-MGEN3:
-|--Step 6.5	-3 <= M <= 63
-	fmovex		(%a1)+,%fp1	| ...fp1 is T
-	fadds		(%a1),%fp0	| ...fp0 is p+t
-	faddx		ONEBYSC(%a6),%fp1	| ...fp1 is T+OnebySc
-	faddx		%fp1,%fp0		| ...(T+OnebySc)+(p+t)
-
-EM1SCALE:
-|--Step 6.6
-	fmovel		%d1,%FPCR
-	fmulx		SC(%a6),%fp0
-
-	bra		t_frcinx
-
-EM1SM:
-|--Step 7	|X| < 1/4.
-	cmpil		#0x3FBE0000,%d0	| ...2^(-65)
-	bges		EM1POLY
-
-EM1TINY:
-|--Step 8	|X| < 2^(-65)
-	cmpil		#0x00330000,%d0	| ...2^(-16312)
-	blts		EM12TINY
-|--Step 8.2
-	movel		#0x80010000,SC(%a6)	| ...SC is -2^(-16382)
-	movel		#0x80000000,SC+4(%a6)
-	clrl		SC+8(%a6)
-	fmovex		(%a0),%fp0
-	fmovel		%d1,%FPCR
-	faddx		SC(%a6),%fp0
-
-	bra		t_frcinx
-
-EM12TINY:
-|--Step 8.3
-	fmovex		(%a0),%fp0
-	fmuld		TWO140,%fp0
-	movel		#0x80010000,SC(%a6)
-	movel		#0x80000000,SC+4(%a6)
-	clrl		SC+8(%a6)
-	faddx		SC(%a6),%fp0
-	fmovel		%d1,%FPCR
-	fmuld		TWON140,%fp0
-
-	bra		t_frcinx
-
-EM1POLY:
-|--Step 9	exp(X)-1 by a simple polynomial
-	fmovex		(%a0),%fp0	| ...fp0 is X
-	fmulx		%fp0,%fp0		| ...fp0 is S := X*X
-	fmovemx	%fp2-%fp2/%fp3,-(%a7)	| ...save fp2
-	fmoves		#0x2F30CAA8,%fp1	| ...fp1 is B12
-	fmulx		%fp0,%fp1		| ...fp1 is S*B12
-	fmoves		#0x310F8290,%fp2	| ...fp2 is B11
-	fadds		#0x32D73220,%fp1	| ...fp1 is B10+S*B12
-
-	fmulx		%fp0,%fp2		| ...fp2 is S*B11
-	fmulx		%fp0,%fp1		| ...fp1 is S*(B10 + ...
-
-	fadds		#0x3493F281,%fp2	| ...fp2 is B9+S*...
-	faddd		EM1B8,%fp1	| ...fp1 is B8+S*...
-
-	fmulx		%fp0,%fp2		| ...fp2 is S*(B9+...
-	fmulx		%fp0,%fp1		| ...fp1 is S*(B8+...
-
-	faddd		EM1B7,%fp2	| ...fp2 is B7+S*...
-	faddd		EM1B6,%fp1	| ...fp1 is B6+S*...
-
-	fmulx		%fp0,%fp2		| ...fp2 is S*(B7+...
-	fmulx		%fp0,%fp1		| ...fp1 is S*(B6+...
-
-	faddd		EM1B5,%fp2	| ...fp2 is B5+S*...
-	faddd		EM1B4,%fp1	| ...fp1 is B4+S*...
-
-	fmulx		%fp0,%fp2		| ...fp2 is S*(B5+...
-	fmulx		%fp0,%fp1		| ...fp1 is S*(B4+...
-
-	faddd		EM1B3,%fp2	| ...fp2 is B3+S*...
-	faddx		EM1B2,%fp1	| ...fp1 is B2+S*...
-
-	fmulx		%fp0,%fp2		| ...fp2 is S*(B3+...
-	fmulx		%fp0,%fp1		| ...fp1 is S*(B2+...
-
-	fmulx		%fp0,%fp2		| ...fp2 is S*S*(B3+...)
-	fmulx		(%a0),%fp1	| ...fp1 is X*S*(B2...
-
-	fmuls		#0x3F000000,%fp0	| ...fp0 is S*B1
-	faddx		%fp2,%fp1		| ...fp1 is Q
-|					...fp2 released
-
-	fmovemx	(%a7)+,%fp2-%fp2/%fp3	| ...fp2 restored
-
-	faddx		%fp1,%fp0		| ...fp0 is S*B1+Q
-|					...fp1 released
-
-	fmovel		%d1,%FPCR
-	faddx		(%a0),%fp0
-
-	bra		t_frcinx
-
-EM1BIG:
-|--Step 10	|X| > 70 log2
-	movel		(%a0),%d0
-	cmpil		#0,%d0
-	bgt		EXPC1
-|--Step 10.2
-	fmoves		#0xBF800000,%fp0	| ...fp0 is -1
-	fmovel		%d1,%FPCR
-	fadds		#0x00800000,%fp0	| ...-1 + 2^(-126)
-
-	bra		t_frcinx
-
-	|end
diff --git a/arch/m68k/fpsp040/sgetem.S b/arch/m68k/fpsp040/sgetem.S
deleted file mode 100644
index d9234f4aed57c8801c2a23b99fe4a2d91bdd2ec5..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/sgetem.S
+++ /dev/null
@@ -1,140 +0,0 @@
-|
-|	sgetem.sa 3.1 12/10/90
-|
-|	The entry point sGETEXP returns the exponent portion
-|	of the input argument.  The exponent bias is removed
-|	and the exponent value is returned as an extended
-|	precision number in fp0.  sGETEXPD handles denormalized
-|	numbers.
-|
-|	The entry point sGETMAN extracts the mantissa of the
-|	input argument.  The mantissa is converted to an
-|	extended precision number and returned in fp0.  The
-|	range of the result is [1.0 - 2.0).
-|
-|
-|	Input:  Double-extended number X in the ETEMP space in
-|		the floating-point save stack.
-|
-|	Output:	The functions return exp(X) or man(X) in fp0.
-|
-|	Modified: fp0.
-|
-|
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|SGETEM	idnt	2,1 | Motorola 040 Floating Point Software Package
-
-	|section 8
-
-#include "fpsp.h"
-
-	|xref	nrm_set
-
-|
-| This entry point is used by the unimplemented instruction exception
-| handler.  It points a0 to the input operand.
-|
-|
-|
-|	SGETEXP
-|
-
-	.global	sgetexp
-sgetexp:
-	movew	LOCAL_EX(%a0),%d0	|get the exponent
-	bclrl	#15,%d0		|clear the sign bit
-	subw	#0x3fff,%d0	|subtract off the bias
-	fmovew  %d0,%fp0		|move the exp to fp0
-	rts
-
-	.global	sgetexpd
-sgetexpd:
-	bclrb	#sign_bit,LOCAL_EX(%a0)
-	bsr	nrm_set		|normalize (exp will go negative)
-	movew	LOCAL_EX(%a0),%d0	|load resulting exponent into d0
-	subw	#0x3fff,%d0	|subtract off the bias
-	fmovew	%d0,%fp0		|move the exp to fp0
-	rts
-|
-|
-| This entry point is used by the unimplemented instruction exception
-| handler.  It points a0 to the input operand.
-|
-|
-|
-|	SGETMAN
-|
-|
-| For normalized numbers, leave the mantissa alone, simply load
-| with an exponent of +/- $3fff.
-|
-	.global	sgetman
-sgetman:
-	movel	USER_FPCR(%a6),%d0
-	andil	#0xffffff00,%d0	|clear rounding precision and mode
-	fmovel	%d0,%fpcr		|this fpcr setting is used by the 882
-	movew	LOCAL_EX(%a0),%d0	|get the exp (really just want sign bit)
-	orw	#0x7fff,%d0	|clear old exp
-	bclrl	#14,%d0		|make it the new exp +-3fff
-	movew	%d0,LOCAL_EX(%a0)	|move the sign & exp back to fsave stack
-	fmovex	(%a0),%fp0	|put new value back in fp0
-	rts
-
-|
-| For denormalized numbers, shift the mantissa until the j-bit = 1,
-| then load the exponent with +/1 $3fff.
-|
-	.global	sgetmand
-sgetmand:
-	movel	LOCAL_HI(%a0),%d0	|load ms mant in d0
-	movel	LOCAL_LO(%a0),%d1	|load ls mant in d1
-	bsr	shft		|shift mantissa bits till msbit is set
-	movel	%d0,LOCAL_HI(%a0)	|put ms mant back on stack
-	movel	%d1,LOCAL_LO(%a0)	|put ls mant back on stack
-	bras	sgetman
-
-|
-|	SHFT
-|
-|	Shifts the mantissa bits until msbit is set.
-|	input:
-|		ms mantissa part in d0
-|		ls mantissa part in d1
-|	output:
-|		shifted bits in d0 and d1
-shft:
-	tstl	%d0		|if any bits set in ms mant
-	bnes	upper		|then branch
-|				;else no bits set in ms mant
-	tstl	%d1		|test if any bits set in ls mant
-	bnes	cont		|if set then continue
-	bras	shft_end	|else return
-cont:
-	movel	%d3,-(%a7)	|save d3
-	exg	%d0,%d1		|shift ls mant to ms mant
-	bfffo	%d0{#0:#32},%d3	|find first 1 in ls mant to d0
-	lsll	%d3,%d0		|shift first 1 to integer bit in ms mant
-	movel	(%a7)+,%d3	|restore d3
-	bras	shft_end
-upper:
-
-	moveml	%d3/%d5/%d6,-(%a7)	|save registers
-	bfffo	%d0{#0:#32},%d3	|find first 1 in ls mant to d0
-	lsll	%d3,%d0		|shift ms mant until j-bit is set
-	movel	%d1,%d6		|save ls mant in d6
-	lsll	%d3,%d1		|shift ls mant by count
-	movel	#32,%d5
-	subl	%d3,%d5		|sub 32 from shift for ls mant
-	lsrl	%d5,%d6		|shift off all bits but those that will
-|				;be shifted into ms mant
-	orl	%d6,%d0		|shift the ls mant bits into the ms mant
-	moveml	(%a7)+,%d3/%d5/%d6	|restore registers
-shft_end:
-	rts
-
-	|end
diff --git a/arch/m68k/fpsp040/sint.S b/arch/m68k/fpsp040/sint.S
deleted file mode 100644
index 0e92d4e5d231b4687348f83a79317560288fe218..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/sint.S
+++ /dev/null
@@ -1,246 +0,0 @@
-|
-|	sint.sa 3.1 12/10/90
-|
-|	The entry point sINT computes the rounded integer
-|	equivalent of the input argument, sINTRZ computes
-|	the integer rounded to zero of the input argument.
-|
-|	Entry points sint and sintrz are called from do_func
-|	to emulate the fint and fintrz unimplemented instructions,
-|	respectively.  Entry point sintdo is used by bindec.
-|
-|	Input: (Entry points sint and sintrz) Double-extended
-|		number X in the ETEMP space in the floating-point
-|		save stack.
-|	       (Entry point sintdo) Double-extended number X in
-|		location pointed to by the address register a0.
-|	       (Entry point sintd) Double-extended denormalized
-|		number X in the ETEMP space in the floating-point
-|		save stack.
-|
-|	Output: The function returns int(X) or intrz(X) in fp0.
-|
-|	Modifies: fp0.
-|
-|	Algorithm: (sint and sintrz)
-|
-|	1. If exp(X) >= 63, return X.
-|	   If exp(X) < 0, return +/- 0 or +/- 1, according to
-|	   the rounding mode.
-|
-|	2. (X is in range) set rsc = 63 - exp(X). Unnormalize the
-|	   result to the exponent $403e.
-|
-|	3. Round the result in the mode given in USER_FPCR. For
-|	   sintrz, force round-to-zero mode.
-|
-|	4. Normalize the rounded result; store in fp0.
-|
-|	For the denormalized cases, force the correct result
-|	for the given sign and rounding mode.
-|
-|		        Sign(X)
-|		RMODE   +    -
-|		-----  --------
-|		 RN    +0   -0
-|		 RZ    +0   -0
-|		 RM    +0   -1
-|		 RP    +1   -0
-|
-|
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|SINT    idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-	|xref	dnrm_lp
-	|xref	nrm_set
-	|xref	round
-	|xref	t_inx2
-	|xref	ld_pone
-	|xref	ld_mone
-	|xref	ld_pzero
-	|xref	ld_mzero
-	|xref	snzrinx
-
-|
-|	FINT
-|
-	.global	sint
-sint:
-	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|use user's mode for rounding
-|					;implicitly has extend precision
-|					;in upper word.
-	movel	%d1,L_SCR1(%a6)		|save mode bits
-	bras	sintexc
-
-|
-|	FINT with extended denorm inputs.
-|
-	.global	sintd
-sintd:
-	btstb	#5,FPCR_MODE(%a6)
-	beq	snzrinx		|if round nearest or round zero, +/- 0
-	btstb	#4,FPCR_MODE(%a6)
-	beqs	rnd_mns
-rnd_pls:
-	btstb	#sign_bit,LOCAL_EX(%a0)
-	bnes	sintmz
-	bsr	ld_pone		|if round plus inf and pos, answer is +1
-	bra	t_inx2
-rnd_mns:
-	btstb	#sign_bit,LOCAL_EX(%a0)
-	beqs	sintpz
-	bsr	ld_mone		|if round mns inf and neg, answer is -1
-	bra	t_inx2
-sintpz:
-	bsr	ld_pzero
-	bra	t_inx2
-sintmz:
-	bsr	ld_mzero
-	bra	t_inx2
-
-|
-|	FINTRZ
-|
-	.global	sintrz
-sintrz:
-	movel	#1,L_SCR1(%a6)		|use rz mode for rounding
-|					;implicitly has extend precision
-|					;in upper word.
-	bras	sintexc
-|
-|	SINTDO
-|
-|	Input:	a0 points to an IEEE extended format operand
-|	Output:	fp0 has the result
-|
-| Exceptions:
-|
-| If the subroutine results in an inexact operation, the inx2 and
-| ainx bits in the USER_FPSR are set.
-|
-|
-	.global	sintdo
-sintdo:
-	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|use user's mode for rounding
-|					;implicitly has ext precision
-|					;in upper word.
-	movel	%d1,L_SCR1(%a6)		|save mode bits
-|
-| Real work of sint is in sintexc
-|
-sintexc:
-	bclrb	#sign_bit,LOCAL_EX(%a0)	|convert to internal extended
-|					;format
-	sne	LOCAL_SGN(%a0)
-	cmpw	#0x403e,LOCAL_EX(%a0)	|check if (unbiased) exp > 63
-	bgts	out_rnge			|branch if exp < 63
-	cmpw	#0x3ffd,LOCAL_EX(%a0)	|check if (unbiased) exp < 0
-	bgt	in_rnge			|if 63 >= exp > 0, do calc
-|
-| Input is less than zero.  Restore sign, and check for directed
-| rounding modes.  L_SCR1 contains the rmode in the lower byte.
-|
-un_rnge:
-	btstb	#1,L_SCR1+3(%a6)		|check for rn and rz
-	beqs	un_rnrz
-	tstb	LOCAL_SGN(%a0)		|check for sign
-	bnes	un_rmrp_neg
-|
-| Sign is +.  If rp, load +1.0, if rm, load +0.0
-|
-	cmpib	#3,L_SCR1+3(%a6)		|check for rp
-	beqs	un_ldpone		|if rp, load +1.0
-	bsr	ld_pzero		|if rm, load +0.0
-	bra	t_inx2
-un_ldpone:
-	bsr	ld_pone
-	bra	t_inx2
-|
-| Sign is -.  If rm, load -1.0, if rp, load -0.0
-|
-un_rmrp_neg:
-	cmpib	#2,L_SCR1+3(%a6)		|check for rm
-	beqs	un_ldmone		|if rm, load -1.0
-	bsr	ld_mzero		|if rp, load -0.0
-	bra	t_inx2
-un_ldmone:
-	bsr	ld_mone
-	bra	t_inx2
-|
-| Rmode is rn or rz; return signed zero
-|
-un_rnrz:
-	tstb	LOCAL_SGN(%a0)		|check for sign
-	bnes	un_rnrz_neg
-	bsr	ld_pzero
-	bra	t_inx2
-un_rnrz_neg:
-	bsr	ld_mzero
-	bra	t_inx2
-
-|
-| Input is greater than 2^63.  All bits are significant.  Return
-| the input.
-|
-out_rnge:
-	bfclr	LOCAL_SGN(%a0){#0:#8}	|change back to IEEE ext format
-	beqs	intps
-	bsetb	#sign_bit,LOCAL_EX(%a0)
-intps:
-	fmovel	%fpcr,-(%sp)
-	fmovel	#0,%fpcr
-	fmovex LOCAL_EX(%a0),%fp0	|if exp > 63
-|					;then return X to the user
-|					;there are no fraction bits
-	fmovel	(%sp)+,%fpcr
-	rts
-
-in_rnge:
-|					;shift off fraction bits
-	clrl	%d0			|clear d0 - initial g,r,s for
-|					;dnrm_lp
-	movel	#0x403e,%d1		|set threshold for dnrm_lp
-|					;assumes a0 points to operand
-	bsr	dnrm_lp
-|					;returns unnormalized number
-|					;pointed by a0
-|					;output d0 supplies g,r,s
-|					;used by round
-	movel	L_SCR1(%a6),%d1		|use selected rounding mode
-|
-|
-	bsr	round			|round the unnorm based on users
-|					;input	a0 ptr to ext X
-|					;	d0 g,r,s bits
-|					;	d1 PREC/MODE info
-|					;output a0 ptr to rounded result
-|					;inexact flag set in USER_FPSR
-|					;if initial grs set
-|
-| normalize the rounded result and store value in fp0
-|
-	bsr	nrm_set			|normalize the unnorm
-|					;Input: a0 points to operand to
-|					;be normalized
-|					;Output: a0 points to normalized
-|					;result
-	bfclr	LOCAL_SGN(%a0){#0:#8}
-	beqs	nrmrndp
-	bsetb	#sign_bit,LOCAL_EX(%a0)	|return to IEEE extended format
-nrmrndp:
-	fmovel	%fpcr,-(%sp)
-	fmovel	#0,%fpcr
-	fmovex LOCAL_EX(%a0),%fp0	|move result to fp0
-	fmovel	(%sp)+,%fpcr
-	rts
-
-	|end
diff --git a/arch/m68k/fpsp040/skeleton.S b/arch/m68k/fpsp040/skeleton.S
deleted file mode 100644
index a8f41615d94a7283db6638117c59eb91b41f8df8..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/skeleton.S
+++ /dev/null
@@ -1,513 +0,0 @@
-|
-|	skeleton.sa 3.2 4/26/91
-|
-|	This file contains code that is system dependent and will
-|	need to be modified to install the FPSP.
-|
-|	Each entry point for exception 'xxxx' begins with a 'jmp fpsp_xxxx'.
-|	Put any target system specific handling that must be done immediately
-|	before the jump instruction.  If there no handling necessary, then
-|	the 'fpsp_xxxx' handler entry point should be placed in the exception
-|	table so that the 'jmp' can be eliminated. If the FPSP determines that the
-|	exception is one that must be reported then there will be a
-|	return from the package by a 'jmp real_xxxx'.  At that point
-|	the machine state will be identical to the state before
-|	the FPSP was entered.  In particular, whatever condition
-|	that caused the exception will still be pending when the FPSP
-|	package returns.  Thus, there will be system specific code
-|	to handle the exception.
-|
-|	If the exception was completely handled by the package, then
-|	the return will be via a 'jmp fpsp_done'.  Unless there is
-|	OS specific work to be done (such as handling a context switch or
-|	interrupt) the user program can be resumed via 'rte'.
-|
-|	In the following skeleton code, some typical 'real_xxxx' handling
-|	code is shown.  This code may need to be moved to an appropriate
-|	place in the target system, or rewritten.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|
-|	Modified for Linux-1.3.x by Jes Sorensen (jds@kom.auc.dk)
-|
-
-#include <linux/linkage.h>
-#include <asm/entry.h>
-#include <asm/asm-offsets.h>
-
-|SKELETON	idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section 15
-|
-|	The following counters are used for standalone testing
-|
-
-	|section 8
-
-#include "fpsp.h"
-
-	|xref	b1238_fix
-
-|
-|	Divide by Zero exception
-|
-|	All dz exceptions are 'real', hence no fpsp_dz entry point.
-|
-	.global	dz
-	.global	real_dz
-dz:
-real_dz:
-	link		%a6,#-LOCAL_SIZE
-	fsave		-(%sp)
-	bclrb		#E1,E_BYTE(%a6)
-	frestore	(%sp)+
-	unlk		%a6
-
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%sp,%sp@-		| stack frame pointer argument
-	bsrl	trap_c
-	addql	#4,%sp
-	bral	ret_from_exception
-
-|
-|	Inexact exception
-|
-|	All inexact exceptions are real, but the 'real' handler
-|	will probably want to clear the pending exception.
-|	The provided code will clear the E3 exception (if pending),
-|	otherwise clear the E1 exception.  The frestore is not really
-|	necessary for E1 exceptions.
-|
-| Code following the 'inex' label is to handle bug #1232.  In this
-| bug, if an E1 snan, ovfl, or unfl occurred, and the process was
-| swapped out before taking the exception, the exception taken on
-| return was inex, rather than the correct exception.  The snan, ovfl,
-| and unfl exception to be taken must not have been enabled.  The
-| fix is to check for E1, and the existence of one of snan, ovfl,
-| or unfl bits set in the fpsr.  If any of these are set, branch
-| to the appropriate  handler for the exception in the fpsr.  Note
-| that this fix is only for d43b parts, and is skipped if the
-| version number is not $40.
-|
-|
-	.global	real_inex
-	.global	inex
-inex:
-	link		%a6,#-LOCAL_SIZE
-	fsave		-(%sp)
-	cmpib		#VER_40,(%sp)		|test version number
-	bnes		not_fmt40
-	fmovel		%fpsr,-(%sp)
-	btstb		#E1,E_BYTE(%a6)		|test for E1 set
-	beqs		not_b1232
-	btstb		#snan_bit,2(%sp) |test for snan
-	beq		inex_ckofl
-	addl		#4,%sp
-	frestore	(%sp)+
-	unlk		%a6
-	bra		snan
-inex_ckofl:
-	btstb		#ovfl_bit,2(%sp) |test for ovfl
-	beq		inex_ckufl
-	addl		#4,%sp
-	frestore	(%sp)+
-	unlk		%a6
-	bra		ovfl
-inex_ckufl:
-	btstb		#unfl_bit,2(%sp) |test for unfl
-	beq		not_b1232
-	addl		#4,%sp
-	frestore	(%sp)+
-	unlk		%a6
-	bra		unfl
-
-|
-| We do not have the bug 1232 case.  Clean up the stack and call
-| real_inex.
-|
-not_b1232:
-	addl		#4,%sp
-	frestore	(%sp)+
-	unlk		%a6
-
-real_inex:
-
-	link		%a6,#-LOCAL_SIZE
-	fsave		-(%sp)
-not_fmt40:
-	bclrb		#E3,E_BYTE(%a6)		|clear and test E3 flag
-	beqs		inex_cke1
-|
-| Clear dirty bit on dest resister in the frame before branching
-| to b1238_fix.
-|
-	moveml		%d0/%d1,USER_DA(%a6)
-	bfextu		CMDREG1B(%a6){#6:#3},%d0		|get dest reg no
-	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
-	bsrl		b1238_fix		|test for bug1238 case
-	moveml		USER_DA(%a6),%d0/%d1
-	bras		inex_done
-inex_cke1:
-	bclrb		#E1,E_BYTE(%a6)
-inex_done:
-	frestore	(%sp)+
-	unlk		%a6
-
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%sp,%sp@-		| stack frame pointer argument
-	bsrl	trap_c
-	addql	#4,%sp
-	bral	ret_from_exception
-
-|
-|	Overflow exception
-|
-	|xref	fpsp_ovfl
-	.global	real_ovfl
-	.global	ovfl
-ovfl:
-	jmp	fpsp_ovfl
-real_ovfl:
-
-	link		%a6,#-LOCAL_SIZE
-	fsave		-(%sp)
-	bclrb		#E3,E_BYTE(%a6)		|clear and test E3 flag
-	bnes		ovfl_done
-	bclrb		#E1,E_BYTE(%a6)
-ovfl_done:
-	frestore	(%sp)+
-	unlk		%a6
-
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%sp,%sp@-		| stack frame pointer argument
-	bsrl	trap_c
-	addql	#4,%sp
-	bral	ret_from_exception
-
-|
-|	Underflow exception
-|
-	|xref	fpsp_unfl
-	.global	real_unfl
-	.global	unfl
-unfl:
-	jmp	fpsp_unfl
-real_unfl:
-
-	link		%a6,#-LOCAL_SIZE
-	fsave		-(%sp)
-	bclrb		#E3,E_BYTE(%a6)		|clear and test E3 flag
-	bnes		unfl_done
-	bclrb		#E1,E_BYTE(%a6)
-unfl_done:
-	frestore	(%sp)+
-	unlk		%a6
-
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%sp,%sp@-		| stack frame pointer argument
-	bsrl	trap_c
-	addql	#4,%sp
-	bral	ret_from_exception
-
-|
-|	Signalling NAN exception
-|
-	|xref	fpsp_snan
-	.global	real_snan
-	.global	snan
-snan:
-	jmp	fpsp_snan
-real_snan:
-	link		%a6,#-LOCAL_SIZE
-	fsave		-(%sp)
-	bclrb		#E1,E_BYTE(%a6)	|snan is always an E1 exception
-	frestore	(%sp)+
-	unlk		%a6
-
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%sp,%sp@-		| stack frame pointer argument
-	bsrl	trap_c
-	addql	#4,%sp
-	bral	ret_from_exception
-
-|
-|	Operand Error exception
-|
-	|xref	fpsp_operr
-	.global	real_operr
-	.global	operr
-operr:
-	jmp	fpsp_operr
-real_operr:
-	link		%a6,#-LOCAL_SIZE
-	fsave		-(%sp)
-	bclrb		#E1,E_BYTE(%a6)	|operr is always an E1 exception
-	frestore	(%sp)+
-	unlk		%a6
-
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%sp,%sp@-		| stack frame pointer argument
-	bsrl	trap_c
-	addql	#4,%sp
-	bral	ret_from_exception
-
-
-|
-|	BSUN exception
-|
-|	This sample handler simply clears the nan bit in the FPSR.
-|
-	|xref	fpsp_bsun
-	.global	real_bsun
-	.global	bsun
-bsun:
-	jmp	fpsp_bsun
-real_bsun:
-	link		%a6,#-LOCAL_SIZE
-	fsave		-(%sp)
-	bclrb		#E1,E_BYTE(%a6)	|bsun is always an E1 exception
-	fmovel		%FPSR,-(%sp)
-	bclrb		#nan_bit,(%sp)
-	fmovel		(%sp)+,%FPSR
-	frestore	(%sp)+
-	unlk		%a6
-
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%sp,%sp@-		| stack frame pointer argument
-	bsrl	trap_c
-	addql	#4,%sp
-	bral	ret_from_exception
-
-|
-|	F-line exception
-|
-|	A 'real' F-line exception is one that the FPSP isn't supposed to
-|	handle. E.g. an instruction with a co-processor ID that is not 1.
-|
-|
-	|xref	fpsp_fline
-	.global	real_fline
-	.global	fline
-fline:
-	jmp	fpsp_fline
-real_fline:
-
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%sp,%sp@-		| stack frame pointer argument
-	bsrl	trap_c
-	addql	#4,%sp
-	bral	ret_from_exception
-
-|
-|	Unsupported data type exception
-|
-	|xref	fpsp_unsupp
-	.global	real_unsupp
-	.global	unsupp
-unsupp:
-	jmp	fpsp_unsupp
-real_unsupp:
-	link		%a6,#-LOCAL_SIZE
-	fsave		-(%sp)
-	bclrb		#E1,E_BYTE(%a6)	|unsupp is always an E1 exception
-	frestore	(%sp)+
-	unlk		%a6
-
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%sp,%sp@-		| stack frame pointer argument
-	bsrl	trap_c
-	addql	#4,%sp
-	bral	ret_from_exception
-
-|
-|	Trace exception
-|
-	.global	real_trace
-real_trace:
-	|
-	bral	trap
-
-|
-|	fpsp_fmt_error --- exit point for frame format error
-|
-|	The fpu stack frame does not match the frames existing
-|	or planned at the time of this writing.  The fpsp is
-|	unable to handle frame sizes not in the following
-|	version:size pairs:
-|
-|	{4060, 4160} - busy frame
-|	{4028, 4130} - unimp frame
-|	{4000, 4100} - idle frame
-|
-|	This entry point simply holds an f-line illegal value.
-|	Replace this with a call to your kernel panic code or
-|	code to handle future revisions of the fpu.
-|
-	.global	fpsp_fmt_error
-fpsp_fmt_error:
-
-	.long	0xf27f0000	|f-line illegal
-
-|
-|	fpsp_done --- FPSP exit point
-|
-|	The exception has been handled by the package and we are ready
-|	to return to user mode, but there may be OS specific code
-|	to execute before we do.  If there is, do it now.
-|
-|
-
-	.global	fpsp_done
-fpsp_done:
-	btst	#0x5,%sp@		| supervisor bit set in saved SR?
-	beq	.Lnotkern
-	rte
-.Lnotkern:
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	| deliver signals, reschedule etc..
-	jra	ret_from_exception
-
-|
-|	mem_write --- write to user or supervisor address space
-|
-| Writes to memory while in supervisor mode.  copyout accomplishes
-| this via a 'moves' instruction.  copyout is a UNIX SVR3 (and later) function.
-| If you don't have copyout, use the local copy of the function below.
-|
-|	a0 - supervisor source address
-|	a1 - user destination address
-|	d0 - number of bytes to write (maximum count is 12)
-|
-| The supervisor source address is guaranteed to point into the supervisor
-| stack.  The result is that a UNIX
-| process is allowed to sleep as a consequence of a page fault during
-| copyout.  The probability of a page fault is exceedingly small because
-| the 68040 always reads the destination address and thus the page
-| faults should have already been handled.
-|
-| If the EXC_SR shows that the exception was from supervisor space,
-| then just do a dumb (and slow) memory move.  In a UNIX environment
-| there shouldn't be any supervisor mode floating point exceptions.
-|
-	.global	mem_write
-mem_write:
-	btstb	#5,EXC_SR(%a6)	|check for supervisor state
-	beqs	user_write
-super_write:
-	moveb	(%a0)+,(%a1)+
-	subql	#1,%d0
-	bnes	super_write
-	rts
-user_write:
-	movel	%d1,-(%sp)	|preserve d1 just in case
-	movel	%d0,-(%sp)
-	movel	%a1,-(%sp)
-	movel	%a0,-(%sp)
-	jsr		copyout
-	addw	#12,%sp
-	movel	(%sp)+,%d1
-	rts
-|
-|	mem_read --- read from user or supervisor address space
-|
-| Reads from memory while in supervisor mode.  copyin accomplishes
-| this via a 'moves' instruction.  copyin is a UNIX SVR3 (and later) function.
-| If you don't have copyin, use the local copy of the function below.
-|
-| The FPSP calls mem_read to read the original F-line instruction in order
-| to extract the data register number when the 'Dn' addressing mode is
-| used.
-|
-|Input:
-|	a0 - user source address
-|	a1 - supervisor destination address
-|	d0 - number of bytes to read (maximum count is 12)
-|
-| Like mem_write, mem_read always reads with a supervisor
-| destination address on the supervisor stack.  Also like mem_write,
-| the EXC_SR is checked and a simple memory copy is done if reading
-| from supervisor space is indicated.
-|
-	.global	mem_read
-mem_read:
-	btstb	#5,EXC_SR(%a6)	|check for supervisor state
-	beqs	user_read
-super_read:
-	moveb	(%a0)+,(%a1)+
-	subql	#1,%d0
-	bnes	super_read
-	rts
-user_read:
-	movel	%d1,-(%sp)	|preserve d1 just in case
-	movel	%d0,-(%sp)
-	movel	%a1,-(%sp)
-	movel	%a0,-(%sp)
-	jsr	copyin
-	addw	#12,%sp
-	movel	(%sp)+,%d1
-	rts
-
-|
-| Use these routines if your kernel doesn't have copyout/copyin equivalents.
-| Assumes that D0/D1/A0/A1 are scratch registers. copyout overwrites DFC,
-| and copyin overwrites SFC.
-|
-copyout:
-	movel	4(%sp),%a0	| source
-	movel	8(%sp),%a1	| destination
-	movel	12(%sp),%d0	| count
-	subl	#1,%d0		| dec count by 1 for dbra
-	movel	#1,%d1
-
-|	DFC is already set
-|	movec	%d1,%DFC		| set dfc for user data space
-moreout:
-	moveb	(%a0)+,%d1	| fetch supervisor byte
-out_ea:
-	movesb	%d1,(%a1)+	| write user byte
-	dbf	%d0,moreout
-	rts
-
-copyin:
-	movel	4(%sp),%a0	| source
-	movel	8(%sp),%a1	| destination
-	movel	12(%sp),%d0	| count
-	subl	#1,%d0		| dec count by 1 for dbra
-	movel	#1,%d1
-|	SFC is already set
-|	movec	%d1,%SFC		| set sfc for user space
-morein:
-in_ea:
-	movesb	(%a0)+,%d1	| fetch user byte
-	moveb	%d1,(%a1)+	| write supervisor byte
-	dbf	%d0,morein
-	rts
-
-	.section .fixup,#alloc,#execinstr
-	.even
-1:
-	jbra	fpsp040_die
-
-	.section __ex_table,#alloc
-	.align	4
-
-	.long	in_ea,1b
-	.long	out_ea,1b
-
-	|end
diff --git a/arch/m68k/fpsp040/slog2.S b/arch/m68k/fpsp040/slog2.S
deleted file mode 100644
index fac2c738382e87c2031055d4f773593a8e797557..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/slog2.S
+++ /dev/null
@@ -1,187 +0,0 @@
-|
-|	slog2.sa 3.1 12/10/90
-|
-|       The entry point slog10 computes the base-10
-|	logarithm of an input argument X.
-|	slog10d does the same except the input value is a
-|	denormalized number.
-|	sLog2 and sLog2d are the base-2 analogues.
-|
-|       INPUT:	Double-extended value in memory location pointed to
-|		by address register a0.
-|
-|       OUTPUT: log_10(X) or log_2(X) returned in floating-point
-|		register fp0.
-|
-|       ACCURACY and MONOTONICITY: The returned result is within 1.7
-|		ulps in 64 significant bit, i.e. within 0.5003 ulp
-|		to 53 bits if the result is subsequently rounded
-|		to double precision. The result is provably monotonic
-|		in double precision.
-|
-|       SPEED:	Two timings are measured, both in the copy-back mode.
-|		The first one is measured when the function is invoked
-|		the first time (so the instructions and data are not
-|		in cache), and the second one is measured when the
-|		function is reinvoked at the same input argument.
-|
-|       ALGORITHM and IMPLEMENTATION NOTES:
-|
-|       slog10d:
-|
-|       Step 0.   If X < 0, create a NaN and raise the invalid operation
-|                 flag. Otherwise, save FPCR in D1; set FpCR to default.
-|       Notes:    Default means round-to-nearest mode, no floating-point
-|                 traps, and precision control = double extended.
-|
-|       Step 1.   Call slognd to obtain Y = log(X), the natural log of X.
-|       Notes:    Even if X is denormalized, log(X) is always normalized.
-|
-|       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).
-|            2.1  Restore the user FPCR
-|            2.2  Return ans := Y * INV_L10.
-|
-|
-|       slog10:
-|
-|       Step 0.   If X < 0, create a NaN and raise the invalid operation
-|                 flag. Otherwise, save FPCR in D1; set FpCR to default.
-|       Notes:    Default means round-to-nearest mode, no floating-point
-|                 traps, and precision control = double extended.
-|
-|       Step 1.   Call sLogN to obtain Y = log(X), the natural log of X.
-|
-|       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).
-|            2.1  Restore the user FPCR
-|            2.2  Return ans := Y * INV_L10.
-|
-|
-|       sLog2d:
-|
-|       Step 0.   If X < 0, create a NaN and raise the invalid operation
-|                 flag. Otherwise, save FPCR in D1; set FpCR to default.
-|       Notes:    Default means round-to-nearest mode, no floating-point
-|                 traps, and precision control = double extended.
-|
-|       Step 1.   Call slognd to obtain Y = log(X), the natural log of X.
-|       Notes:    Even if X is denormalized, log(X) is always normalized.
-|
-|       Step 2.   Compute log_10(X) = log(X) * (1/log(2)).
-|            2.1  Restore the user FPCR
-|            2.2  Return ans := Y * INV_L2.
-|
-|
-|       sLog2:
-|
-|       Step 0.   If X < 0, create a NaN and raise the invalid operation
-|                 flag. Otherwise, save FPCR in D1; set FpCR to default.
-|       Notes:    Default means round-to-nearest mode, no floating-point
-|                 traps, and precision control = double extended.
-|
-|       Step 1.   If X is not an integer power of two, i.e., X != 2^k,
-|                 go to Step 3.
-|
-|       Step 2.   Return k.
-|            2.1  Get integer k, X = 2^k.
-|            2.2  Restore the user FPCR.
-|            2.3  Return ans := convert-to-double-extended(k).
-|
-|       Step 3.   Call sLogN to obtain Y = log(X), the natural log of X.
-|
-|       Step 4.   Compute log_2(X) = log(X) * (1/log(2)).
-|            4.1  Restore the user FPCR
-|            4.2  Return ans := Y * INV_L2.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|SLOG2    idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-	|xref	t_frcinx
-	|xref	t_operr
-	|xref	slogn
-	|xref	slognd
-
-INV_L10:  .long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
-
-INV_L2:   .long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
-
-	.global	slog10d
-slog10d:
-|--entry point for Log10(X), X is denormalized
-	movel		(%a0),%d0
-	blt		invalid
-	movel		%d1,-(%sp)
-	clrl		%d1
-	bsr		slognd			| ...log(X), X denorm.
-	fmovel		(%sp)+,%fpcr
-	fmulx		INV_L10,%fp0
-	bra		t_frcinx
-
-	.global	slog10
-slog10:
-|--entry point for Log10(X), X is normalized
-
-	movel		(%a0),%d0
-	blt		invalid
-	movel		%d1,-(%sp)
-	clrl		%d1
-	bsr		slogn			| ...log(X), X normal.
-	fmovel		(%sp)+,%fpcr
-	fmulx		INV_L10,%fp0
-	bra		t_frcinx
-
-
-	.global	slog2d
-slog2d:
-|--entry point for Log2(X), X is denormalized
-
-	movel		(%a0),%d0
-	blt		invalid
-	movel		%d1,-(%sp)
-	clrl		%d1
-	bsr		slognd			| ...log(X), X denorm.
-	fmovel		(%sp)+,%fpcr
-	fmulx		INV_L2,%fp0
-	bra		t_frcinx
-
-	.global	slog2
-slog2:
-|--entry point for Log2(X), X is normalized
-	movel		(%a0),%d0
-	blt		invalid
-
-	movel		8(%a0),%d0
-	bnes		continue		| ...X is not 2^k
-
-	movel		4(%a0),%d0
-	andl		#0x7FFFFFFF,%d0
-	tstl		%d0
-	bnes		continue
-
-|--X = 2^k.
-	movew		(%a0),%d0
-	andl		#0x00007FFF,%d0
-	subl		#0x3FFF,%d0
-	fmovel		%d1,%fpcr
-	fmovel		%d0,%fp0
-	bra		t_frcinx
-
-continue:
-	movel		%d1,-(%sp)
-	clrl		%d1
-	bsr		slogn			| ...log(X), X normal.
-	fmovel		(%sp)+,%fpcr
-	fmulx		INV_L2,%fp0
-	bra		t_frcinx
-
-invalid:
-	bra		t_operr
-
-	|end
diff --git a/arch/m68k/fpsp040/slogn.S b/arch/m68k/fpsp040/slogn.S
deleted file mode 100644
index d98eaf641ec4c65b6997ba01b309958120aaeb17..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/slogn.S
+++ /dev/null
@@ -1,591 +0,0 @@
-|
-|	slogn.sa 3.1 12/10/90
-|
-|	slogn computes the natural logarithm of an
-|	input value. slognd does the same except the input value is a
-|	denormalized number. slognp1 computes log(1+X), and slognp1d
-|	computes log(1+X) for denormalized X.
-|
-|	Input: Double-extended value in memory location pointed to by address
-|		register a0.
-|
-|	Output:	log(X) or log(1+X) returned in floating-point register Fp0.
-|
-|	Accuracy and Monotonicity: The returned result is within 2 ulps in
-|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
-|		result is subsequently rounded to double precision. The
-|		result is provably monotonic in double precision.
-|
-|	Speed: The program slogn takes approximately 190 cycles for input
-|		argument X such that |X-1| >= 1/16, which is the usual
-|		situation. For those arguments, slognp1 takes approximately
-|		 210 cycles. For the less common arguments, the program will
-|		 run no worse than 10% slower.
-|
-|	Algorithm:
-|	LOGN:
-|	Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
-|		u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
-|
-|	Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
-|		significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
-|		2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
-|
-|	Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
-|		log(1+u) = poly.
-|
-|	Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
-|		by k*log(2) + (log(F) + poly). The values of log(F) are calculated
-|		beforehand and stored in the program.
-|
-|	lognp1:
-|	Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
-|		u where u = 2X/(2+X). Otherwise, move on to Step 2.
-|
-|	Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
-|		of the algorithm for LOGN and compute log(1+X) as
-|		k*log(2) + log(F) + poly where poly approximates log(1+u),
-|		u = (Y-F)/F.
-|
-|	Implementation Notes:
-|	Note 1. There are 64 different possible values for F, thus 64 log(F)'s
-|		need to be tabulated. Moreover, the values of 1/F are also
-|		tabulated so that the division in (Y-F)/F can be performed by a
-|		multiplication.
-|
-|	Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
-|		Y-F has to be calculated carefully when 1/2 <= X < 3/2.
-|
-|	Note 3. To fully exploit the pipeline, polynomials are usually separated
-|		into two parts evaluated independently before being added up.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|slogn	idnt	2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-BOUNDS1:  .long 0x3FFEF07D,0x3FFF8841
-BOUNDS2:  .long 0x3FFE8000,0x3FFFC000
-
-LOGOF2:	.long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
-
-one:	.long 0x3F800000
-zero:	.long 0x00000000
-infty:	.long 0x7F800000
-negone:	.long 0xBF800000
-
-LOGA6:	.long 0x3FC2499A,0xB5E4040B
-LOGA5:	.long 0xBFC555B5,0x848CB7DB
-
-LOGA4:	.long 0x3FC99999,0x987D8730
-LOGA3:	.long 0xBFCFFFFF,0xFF6F7E97
-
-LOGA2:	.long 0x3FD55555,0x555555a4
-LOGA1:	.long 0xBFE00000,0x00000008
-
-LOGB5:	.long 0x3F175496,0xADD7DAD6
-LOGB4:	.long 0x3F3C71C2,0xFE80C7E0
-
-LOGB3:	.long 0x3F624924,0x928BCCFF
-LOGB2:	.long 0x3F899999,0x999995EC
-
-LOGB1:	.long 0x3FB55555,0x55555555
-TWO:	.long 0x40000000,0x00000000
-
-LTHOLD:	.long 0x3f990000,0x80000000,0x00000000,0x00000000
-
-LOGTBL:
-	.long  0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
-	.long  0x3FF70000,0xFF015358,0x833C47E2,0x00000000
-	.long  0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
-	.long  0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
-	.long  0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
-	.long  0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
-	.long  0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
-	.long  0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
-	.long  0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
-	.long  0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
-	.long  0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
-	.long  0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
-	.long  0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
-	.long  0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
-	.long  0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
-	.long  0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
-	.long  0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
-	.long  0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
-	.long  0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
-	.long  0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
-	.long  0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
-	.long  0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
-	.long  0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
-	.long  0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
-	.long  0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
-	.long  0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
-	.long  0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
-	.long  0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
-	.long  0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
-	.long  0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
-	.long  0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
-	.long  0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
-	.long  0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
-	.long  0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
-	.long  0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
-	.long  0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
-	.long  0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
-	.long  0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
-	.long  0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
-	.long  0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
-	.long  0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
-	.long  0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
-	.long  0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
-	.long  0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
-	.long  0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
-	.long  0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
-	.long  0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
-	.long  0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
-	.long  0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
-	.long  0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
-	.long  0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
-	.long  0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
-	.long  0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
-	.long  0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
-	.long  0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
-	.long  0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
-	.long  0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
-	.long  0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
-	.long  0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
-	.long  0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
-	.long  0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
-	.long  0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
-	.long  0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
-	.long  0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
-	.long  0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
-	.long  0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
-	.long  0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
-	.long  0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
-	.long  0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
-	.long  0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
-	.long  0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
-	.long  0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
-	.long  0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
-	.long  0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
-	.long  0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
-	.long  0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
-	.long  0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
-	.long  0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
-	.long  0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
-	.long  0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
-	.long  0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
-	.long  0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
-	.long  0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
-	.long  0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
-	.long  0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
-	.long  0x3FFE0000,0x825EFCED,0x49369330,0x00000000
-	.long  0x3FFE0000,0x9868C809,0x868C8098,0x00000000
-	.long  0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
-	.long  0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
-	.long  0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
-	.long  0x3FFE0000,0x95A02568,0x095A0257,0x00000000
-	.long  0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
-	.long  0x3FFE0000,0x94458094,0x45809446,0x00000000
-	.long  0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
-	.long  0x3FFE0000,0x92F11384,0x0497889C,0x00000000
-	.long  0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
-	.long  0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
-	.long  0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
-	.long  0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
-	.long  0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
-	.long  0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
-	.long  0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
-	.long  0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
-	.long  0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
-	.long  0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
-	.long  0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
-	.long  0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
-	.long  0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
-	.long  0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
-	.long  0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
-	.long  0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
-	.long  0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
-	.long  0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
-	.long  0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
-	.long  0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
-	.long  0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
-	.long  0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
-	.long  0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
-	.long  0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
-	.long  0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
-	.long  0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
-	.long  0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
-	.long  0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
-	.long  0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
-	.long  0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
-	.long  0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
-	.long  0x3FFE0000,0x80808080,0x80808081,0x00000000
-	.long  0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
-
-	.set	ADJK,L_SCR1
-
-	.set	X,FP_SCR1
-	.set	XDCARE,X+2
-	.set	XFRAC,X+4
-
-	.set	F,FP_SCR2
-	.set	FFRAC,F+4
-
-	.set	KLOG2,FP_SCR3
-
-	.set	SAVEU,FP_SCR4
-
-	| xref	t_frcinx
-	|xref	t_extdnrm
-	|xref	t_operr
-	|xref	t_dz
-
-	.global	slognd
-slognd:
-|--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
-
-	movel		#-100,ADJK(%a6)	| ...INPUT = 2^(ADJK) * FP0
-
-|----normalize the input value by left shifting k bits (k to be determined
-|----below), adjusting exponent and storing -k to  ADJK
-|----the value TWOTO100 is no longer needed.
-|----Note that this code assumes the denormalized input is NON-ZERO.
-
-     moveml	%d2-%d7,-(%a7)		| ...save some registers
-     movel	#0x00000000,%d3		| ...D3 is exponent of smallest norm. #
-     movel	4(%a0),%d4
-     movel	8(%a0),%d5		| ...(D4,D5) is (Hi_X,Lo_X)
-     clrl	%d2			| ...D2 used for holding K
-
-     tstl	%d4
-     bnes	HiX_not0
-
-HiX_0:
-     movel	%d5,%d4
-     clrl	%d5
-     movel	#32,%d2
-     clrl	%d6
-     bfffo      %d4{#0:#32},%d6
-     lsll      %d6,%d4
-     addl	%d6,%d2			| ...(D3,D4,D5) is normalized
-
-     movel	%d3,X(%a6)
-     movel	%d4,XFRAC(%a6)
-     movel	%d5,XFRAC+4(%a6)
-     negl	%d2
-     movel	%d2,ADJK(%a6)
-     fmovex	X(%a6),%fp0
-     moveml	(%a7)+,%d2-%d7		| ...restore registers
-     lea	X(%a6),%a0
-     bras	LOGBGN			| ...begin regular log(X)
-
-
-HiX_not0:
-     clrl	%d6
-     bfffo	%d4{#0:#32},%d6		| ...find first 1
-     movel	%d6,%d2			| ...get k
-     lsll	%d6,%d4
-     movel	%d5,%d7			| ...a copy of D5
-     lsll	%d6,%d5
-     negl	%d6
-     addil	#32,%d6
-     lsrl	%d6,%d7
-     orl	%d7,%d4			| ...(D3,D4,D5) normalized
-
-     movel	%d3,X(%a6)
-     movel	%d4,XFRAC(%a6)
-     movel	%d5,XFRAC+4(%a6)
-     negl	%d2
-     movel	%d2,ADJK(%a6)
-     fmovex	X(%a6),%fp0
-     moveml	(%a7)+,%d2-%d7		| ...restore registers
-     lea	X(%a6),%a0
-     bras	LOGBGN			| ...begin regular log(X)
-
-
-	.global	slogn
-slogn:
-|--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
-
-	fmovex		(%a0),%fp0	| ...LOAD INPUT
-	movel		#0x00000000,ADJK(%a6)
-
-LOGBGN:
-|--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
-|--A FINITE, NON-ZERO, NORMALIZED NUMBER.
-
-	movel	(%a0),%d0
-	movew	4(%a0),%d0
-
-	movel	(%a0),X(%a6)
-	movel	4(%a0),X+4(%a6)
-	movel	8(%a0),X+8(%a6)
-
-	cmpil	#0,%d0		| ...CHECK IF X IS NEGATIVE
-	blt	LOGNEG		| ...LOG OF NEGATIVE ARGUMENT IS INVALID
-	cmp2l	BOUNDS1,%d0	| ...X IS POSITIVE, CHECK IF X IS NEAR 1
-	bcc	LOGNEAR1	| ...BOUNDS IS ROUGHLY [15/16, 17/16]
-
-LOGMAIN:
-|--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
-
-|--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
-|--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
-|--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
-|--			 = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
-|--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
-|--LOG(1+U) CAN BE VERY EFFICIENT.
-|--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
-|--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
-
-|--GET K, Y, F, AND ADDRESS OF 1/F.
-	asrl	#8,%d0
-	asrl	#8,%d0		| ...SHIFTED 16 BITS, BIASED EXPO. OF X
-	subil	#0x3FFF,%d0	| ...THIS IS K
-	addl	ADJK(%a6),%d0	| ...ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
-	lea	LOGTBL,%a0	| ...BASE ADDRESS OF 1/F AND LOG(F)
-	fmovel	%d0,%fp1		| ...CONVERT K TO FLOATING-POINT FORMAT
-
-|--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
-	movel	#0x3FFF0000,X(%a6)	| ...X IS NOW Y, I.E. 2^(-K)*X
-	movel	XFRAC(%a6),FFRAC(%a6)
-	andil	#0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y
-	oril	#0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT
-	movel	FFRAC(%a6),%d0	| ...READY TO GET ADDRESS OF 1/F
-	andil	#0x7E000000,%d0
-	asrl	#8,%d0
-	asrl	#8,%d0
-	asrl	#4,%d0		| ...SHIFTED 20, D0 IS THE DISPLACEMENT
-	addal	%d0,%a0		| ...A0 IS THE ADDRESS FOR 1/F
-
-	fmovex	X(%a6),%fp0
-	movel	#0x3fff0000,F(%a6)
-	clrl	F+8(%a6)
-	fsubx	F(%a6),%fp0		| ...Y-F
-	fmovemx %fp2-%fp2/%fp3,-(%sp)	| ...SAVE FP2 WHILE FP0 IS NOT READY
-|--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
-|--REGISTERS SAVED: FPCR, FP1, FP2
-
-LP1CONT1:
-|--AN RE-ENTRY POINT FOR LOGNP1
-	fmulx	(%a0),%fp0	| ...FP0 IS U = (Y-F)/F
-	fmulx	LOGOF2,%fp1	| ...GET K*LOG2 WHILE FP0 IS NOT READY
-	fmovex	%fp0,%fp2
-	fmulx	%fp2,%fp2		| ...FP2 IS V=U*U
-	fmovex	%fp1,KLOG2(%a6)	| ...PUT K*LOG2 IN MEMORY, FREE FP1
-
-|--LOG(1+U) IS APPROXIMATED BY
-|--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
-|--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
-
-	fmovex	%fp2,%fp3
-	fmovex	%fp2,%fp1
-
-	fmuld	LOGA6,%fp1	| ...V*A6
-	fmuld	LOGA5,%fp2	| ...V*A5
-
-	faddd	LOGA4,%fp1	| ...A4+V*A6
-	faddd	LOGA3,%fp2	| ...A3+V*A5
-
-	fmulx	%fp3,%fp1		| ...V*(A4+V*A6)
-	fmulx	%fp3,%fp2		| ...V*(A3+V*A5)
-
-	faddd	LOGA2,%fp1	| ...A2+V*(A4+V*A6)
-	faddd	LOGA1,%fp2	| ...A1+V*(A3+V*A5)
-
-	fmulx	%fp3,%fp1		| ...V*(A2+V*(A4+V*A6))
-	addal	#16,%a0		| ...ADDRESS OF LOG(F)
-	fmulx	%fp3,%fp2		| ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
-
-	fmulx	%fp0,%fp1		| ...U*V*(A2+V*(A4+V*A6))
-	faddx	%fp2,%fp0		| ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
-
-	faddx	(%a0),%fp1	| ...LOG(F)+U*V*(A2+V*(A4+V*A6))
-	fmovemx  (%sp)+,%fp2-%fp2/%fp3	| ...RESTORE FP2
-	faddx	%fp1,%fp0		| ...FP0 IS LOG(F) + LOG(1+U)
-
-	fmovel	%d1,%fpcr
-	faddx	KLOG2(%a6),%fp0	| ...FINAL ADD
-	bra	t_frcinx
-
-
-LOGNEAR1:
-|--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
-	fmovex	%fp0,%fp1
-	fsubs	one,%fp1		| ...FP1 IS X-1
-	fadds	one,%fp0		| ...FP0 IS X+1
-	faddx	%fp1,%fp1		| ...FP1 IS 2(X-1)
-|--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
-|--IN U, U = 2(X-1)/(X+1) = FP1/FP0
-
-LP1CONT2:
-|--THIS IS AN RE-ENTRY POINT FOR LOGNP1
-	fdivx	%fp0,%fp1		| ...FP1 IS U
-	fmovemx %fp2-%fp2/%fp3,-(%sp)	 | ...SAVE FP2
-|--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
-|--LET V=U*U, W=V*V, CALCULATE
-|--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
-|--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
-	fmovex	%fp1,%fp0
-	fmulx	%fp0,%fp0	| ...FP0 IS V
-	fmovex	%fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1
-	fmovex	%fp0,%fp1
-	fmulx	%fp1,%fp1	| ...FP1 IS W
-
-	fmoved	LOGB5,%fp3
-	fmoved	LOGB4,%fp2
-
-	fmulx	%fp1,%fp3	| ...W*B5
-	fmulx	%fp1,%fp2	| ...W*B4
-
-	faddd	LOGB3,%fp3 | ...B3+W*B5
-	faddd	LOGB2,%fp2 | ...B2+W*B4
-
-	fmulx	%fp3,%fp1	| ...W*(B3+W*B5), FP3 RELEASED
-
-	fmulx	%fp0,%fp2	| ...V*(B2+W*B4)
-
-	faddd	LOGB1,%fp1 | ...B1+W*(B3+W*B5)
-	fmulx	SAVEU(%a6),%fp0 | ...FP0 IS U*V
-
-	faddx	%fp2,%fp1	| ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
-	fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED
-
-	fmulx	%fp1,%fp0	| ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
-
-	fmovel	%d1,%fpcr
-	faddx	SAVEU(%a6),%fp0
-	bra	t_frcinx
-	rts
-
-LOGNEG:
-|--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
-	bra	t_operr
-
-	.global	slognp1d
-slognp1d:
-|--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
-| Simply return the denorm
-
-	bra	t_extdnrm
-
-	.global	slognp1
-slognp1:
-|--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
-
-	fmovex	(%a0),%fp0	| ...LOAD INPUT
-	fabsx	%fp0		|test magnitude
-	fcmpx	LTHOLD,%fp0	|compare with min threshold
-	fbgt	LP1REAL		|if greater, continue
-	fmovel	#0,%fpsr		|clr N flag from compare
-	fmovel	%d1,%fpcr
-	fmovex	(%a0),%fp0	|return signed argument
-	bra	t_frcinx
-
-LP1REAL:
-	fmovex	(%a0),%fp0	| ...LOAD INPUT
-	movel	#0x00000000,ADJK(%a6)
-	fmovex	%fp0,%fp1	| ...FP1 IS INPUT Z
-	fadds	one,%fp0	| ...X := ROUND(1+Z)
-	fmovex	%fp0,X(%a6)
-	movew	XFRAC(%a6),XDCARE(%a6)
-	movel	X(%a6),%d0
-	cmpil	#0,%d0
-	ble	LP1NEG0	| ...LOG OF ZERO OR -VE
-	cmp2l	BOUNDS2,%d0
-	bcs	LOGMAIN	| ...BOUNDS2 IS [1/2,3/2]
-|--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
-|--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
-|--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
-
-LP1NEAR1:
-|--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
-	cmp2l	BOUNDS1,%d0
-	bcss	LP1CARE
-
-LP1ONE16:
-|--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
-|--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
-	faddx	%fp1,%fp1	| ...FP1 IS 2Z
-	fadds	one,%fp0	| ...FP0 IS 1+X
-|--U = FP1/FP0
-	bra	LP1CONT2
-
-LP1CARE:
-|--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
-|--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
-|--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
-|--THERE ARE ONLY TWO CASES.
-|--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
-|--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
-|--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
-|--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
-
-	movel	XFRAC(%a6),FFRAC(%a6)
-	andil	#0xFE000000,FFRAC(%a6)
-	oril	#0x01000000,FFRAC(%a6)	| ...F OBTAINED
-	cmpil	#0x3FFF8000,%d0	| ...SEE IF 1+Z > 1
-	bges	KISZERO
-
-KISNEG1:
-	fmoves	TWO,%fp0
-	movel	#0x3fff0000,F(%a6)
-	clrl	F+8(%a6)
-	fsubx	F(%a6),%fp0	| ...2-F
-	movel	FFRAC(%a6),%d0
-	andil	#0x7E000000,%d0
-	asrl	#8,%d0
-	asrl	#8,%d0
-	asrl	#4,%d0		| ...D0 CONTAINS DISPLACEMENT FOR 1/F
-	faddx	%fp1,%fp1		| ...GET 2Z
-	fmovemx %fp2-%fp2/%fp3,-(%sp)	| ...SAVE FP2
-	faddx	%fp1,%fp0		| ...FP0 IS Y-F = (2-F)+2Z
-	lea	LOGTBL,%a0	| ...A0 IS ADDRESS OF 1/F
-	addal	%d0,%a0
-	fmoves	negone,%fp1	| ...FP1 IS K = -1
-	bra	LP1CONT1
-
-KISZERO:
-	fmoves	one,%fp0
-	movel	#0x3fff0000,F(%a6)
-	clrl	F+8(%a6)
-	fsubx	F(%a6),%fp0		| ...1-F
-	movel	FFRAC(%a6),%d0
-	andil	#0x7E000000,%d0
-	asrl	#8,%d0
-	asrl	#8,%d0
-	asrl	#4,%d0
-	faddx	%fp1,%fp0		| ...FP0 IS Y-F
-	fmovemx %fp2-%fp2/%fp3,-(%sp)	| ...FP2 SAVED
-	lea	LOGTBL,%a0
-	addal	%d0,%a0		| ...A0 IS ADDRESS OF 1/F
-	fmoves	zero,%fp1	| ...FP1 IS K = 0
-	bra	LP1CONT1
-
-LP1NEG0:
-|--FPCR SAVED. D0 IS X IN COMPACT FORM.
-	cmpil	#0,%d0
-	blts	LP1NEG
-LP1ZERO:
-	fmoves	negone,%fp0
-
-	fmovel	%d1,%fpcr
-	bra t_dz
-
-LP1NEG:
-	fmoves	zero,%fp0
-
-	fmovel	%d1,%fpcr
-	bra	t_operr
-
-	|end
diff --git a/arch/m68k/fpsp040/smovecr.S b/arch/m68k/fpsp040/smovecr.S
deleted file mode 100644
index 73c36512081b1c66b56cb99f18fed73e8c736012..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/smovecr.S
+++ /dev/null
@@ -1,161 +0,0 @@
-|
-|	smovecr.sa 3.1 12/10/90
-|
-|	The entry point sMOVECR returns the constant at the
-|	offset given in the instruction field.
-|
-|	Input: An offset in the instruction word.
-|
-|	Output:	The constant rounded to the user's rounding
-|		mode unchecked for overflow.
-|
-|	Modified: fp0.
-|
-|
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|SMOVECR	idnt	2,1 | Motorola 040 Floating Point Software Package
-
-	|section 8
-
-#include "fpsp.h"
-
-	|xref	nrm_set
-	|xref	round
-	|xref	PIRN
-	|xref	PIRZRM
-	|xref	PIRP
-	|xref	SMALRN
-	|xref	SMALRZRM
-	|xref	SMALRP
-	|xref	BIGRN
-	|xref	BIGRZRM
-	|xref	BIGRP
-
-FZERO:	.long	00000000
-|
-|	FMOVECR
-|
-	.global	smovcr
-smovcr:
-	bfextu	CMDREG1B(%a6){#9:#7},%d0 |get offset
-	bfextu	USER_FPCR(%a6){#26:#2},%d1 |get rmode
-|
-| check range of offset
-|
-	tstb	%d0		|if zero, offset is to pi
-	beqs	PI_TBL		|it is pi
-	cmpib	#0x0a,%d0		|check range $01 - $0a
-	bles	Z_VAL		|if in this range, return zero
-	cmpib	#0x0e,%d0		|check range $0b - $0e
-	bles	SM_TBL		|valid constants in this range
-	cmpib	#0x2f,%d0		|check range $10 - $2f
-	bles	Z_VAL		|if in this range, return zero
-	cmpib	#0x3f,%d0		|check range $30 - $3f
-	ble	BG_TBL		|valid constants in this range
-Z_VAL:
-	fmoves	FZERO,%fp0
-	rts
-PI_TBL:
-	tstb	%d1		|offset is zero, check for rmode
-	beqs	PI_RN		|if zero, rn mode
-	cmpib	#0x3,%d1		|check for rp
-	beqs	PI_RP		|if 3, rp mode
-PI_RZRM:
-	leal	PIRZRM,%a0	|rmode is rz or rm, load PIRZRM in a0
-	bra	set_finx
-PI_RN:
-	leal	PIRN,%a0		|rmode is rn, load PIRN in a0
-	bra	set_finx
-PI_RP:
-	leal	PIRP,%a0		|rmode is rp, load PIRP in a0
-	bra	set_finx
-SM_TBL:
-	subil	#0xb,%d0		|make offset in 0 - 4 range
-	tstb	%d1		|check for rmode
-	beqs	SM_RN		|if zero, rn mode
-	cmpib	#0x3,%d1		|check for rp
-	beqs	SM_RP		|if 3, rp mode
-SM_RZRM:
-	leal	SMALRZRM,%a0	|rmode is rz or rm, load SMRZRM in a0
-	cmpib	#0x2,%d0		|check if result is inex
-	ble	set_finx	|if 0 - 2, it is inexact
-	bra	no_finx		|if 3, it is exact
-SM_RN:
-	leal	SMALRN,%a0	|rmode is rn, load SMRN in a0
-	cmpib	#0x2,%d0		|check if result is inex
-	ble	set_finx	|if 0 - 2, it is inexact
-	bra	no_finx		|if 3, it is exact
-SM_RP:
-	leal	SMALRP,%a0	|rmode is rp, load SMRP in a0
-	cmpib	#0x2,%d0		|check if result is inex
-	ble	set_finx	|if 0 - 2, it is inexact
-	bra	no_finx		|if 3, it is exact
-BG_TBL:
-	subil	#0x30,%d0		|make offset in 0 - f range
-	tstb	%d1		|check for rmode
-	beqs	BG_RN		|if zero, rn mode
-	cmpib	#0x3,%d1		|check for rp
-	beqs	BG_RP		|if 3, rp mode
-BG_RZRM:
-	leal	BIGRZRM,%a0	|rmode is rz or rm, load BGRZRM in a0
-	cmpib	#0x1,%d0		|check if result is inex
-	ble	set_finx	|if 0 - 1, it is inexact
-	cmpib	#0x7,%d0		|second check
-	ble	no_finx		|if 0 - 7, it is exact
-	bra	set_finx	|if 8 - f, it is inexact
-BG_RN:
-	leal	BIGRN,%a0	|rmode is rn, load BGRN in a0
-	cmpib	#0x1,%d0		|check if result is inex
-	ble	set_finx	|if 0 - 1, it is inexact
-	cmpib	#0x7,%d0		|second check
-	ble	no_finx		|if 0 - 7, it is exact
-	bra	set_finx	|if 8 - f, it is inexact
-BG_RP:
-	leal	BIGRP,%a0	|rmode is rp, load SMRP in a0
-	cmpib	#0x1,%d0		|check if result is inex
-	ble	set_finx	|if 0 - 1, it is inexact
-	cmpib	#0x7,%d0		|second check
-	ble	no_finx		|if 0 - 7, it is exact
-|	bra	set_finx	;if 8 - f, it is inexact
-set_finx:
-	orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
-no_finx:
-	mulul	#12,%d0			|use offset to point into tables
-	movel	%d1,L_SCR1(%a6)		|load mode for round call
-	bfextu	USER_FPCR(%a6){#24:#2},%d1	|get precision
-	tstl	%d1			|check if extended precision
-|
-| Precision is extended
-|
-	bnes	not_ext			|if extended, do not call round
-	fmovemx (%a0,%d0),%fp0-%fp0		|return result in fp0
-	rts
-|
-| Precision is single or double
-|
-not_ext:
-	swap	%d1			|rnd prec in upper word of d1
-	addl	L_SCR1(%a6),%d1		|merge rmode in low word of d1
-	movel	(%a0,%d0),FP_SCR1(%a6)	|load first word to temp storage
-	movel	4(%a0,%d0),FP_SCR1+4(%a6)	|load second word
-	movel	8(%a0,%d0),FP_SCR1+8(%a6)	|load third word
-	clrl	%d0			|clear g,r,s
-	lea	FP_SCR1(%a6),%a0
-	btstb	#sign_bit,LOCAL_EX(%a0)
-	sne	LOCAL_SGN(%a0)		|convert to internal ext. format
-
-	bsr	round			|go round the mantissa
-
-	bfclr	LOCAL_SGN(%a0){#0:#8}	|convert back to IEEE ext format
-	beqs	fin_fcr
-	bsetb	#sign_bit,LOCAL_EX(%a0)
-fin_fcr:
-	fmovemx (%a0),%fp0-%fp0
-	rts
-
-	|end
diff --git a/arch/m68k/fpsp040/srem_mod.S b/arch/m68k/fpsp040/srem_mod.S
deleted file mode 100644
index a27e70c9a0eb3608b21cbebc9c86770736697596..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/srem_mod.S
+++ /dev/null
@@ -1,421 +0,0 @@
-|
-|	srem_mod.sa 3.1 12/10/90
-|
-|      The entry point sMOD computes the floating point MOD of the
-|      input values X and Y. The entry point sREM computes the floating
-|      point (IEEE) REM of the input values X and Y.
-|
-|      INPUT
-|      -----
-|      Double-extended value Y is pointed to by address in register
-|      A0. Double-extended value X is located in -12(A0). The values
-|      of X and Y are both nonzero and finite; although either or both
-|      of them can be denormalized. The special cases of zeros, NaNs,
-|      and infinities are handled elsewhere.
-|
-|      OUTPUT
-|      ------
-|      FREM(X,Y) or FMOD(X,Y), depending on entry point.
-|
-|       ALGORITHM
-|       ---------
-|
-|       Step 1.  Save and strip signs of X and Y: signX := sign(X),
-|                signY := sign(Y), X := |X|, Y := |Y|,
-|                signQ := signX EOR signY. Record whether MOD or REM
-|                is requested.
-|
-|       Step 2.  Set L := expo(X)-expo(Y), k := 0, Q := 0.
-|                If (L < 0) then
-|                   R := X, go to Step 4.
-|                else
-|                   R := 2^(-L)X, j := L.
-|                endif
-|
-|       Step 3.  Perform MOD(X,Y)
-|            3.1 If R = Y, go to Step 9.
-|            3.2 If R > Y, then { R := R - Y, Q := Q + 1}
-|            3.3 If j = 0, go to Step 4.
-|            3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to
-|                Step 3.1.
-|
-|       Step 4.  At this point, R = X - QY = MOD(X,Y). Set
-|                Last_Subtract := false (used in Step 7 below). If
-|                MOD is requested, go to Step 6.
-|
-|       Step 5.  R = MOD(X,Y), but REM(X,Y) is requested.
-|            5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to
-|                Step 6.
-|            5.2 If R > Y/2, then { set Last_Subtract := true,
-|                Q := Q + 1, Y := signY*Y }. Go to Step 6.
-|            5.3 This is the tricky case of R = Y/2. If Q is odd,
-|                then { Q := Q + 1, signX := -signX }.
-|
-|       Step 6.  R := signX*R.
-|
-|       Step 7.  If Last_Subtract = true, R := R - Y.
-|
-|       Step 8.  Return signQ, last 7 bits of Q, and R as required.
-|
-|       Step 9.  At this point, R = 2^(-j)*X - Q Y = Y. Thus,
-|                X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),
-|                R := 0. Return signQ, last 7 bits of Q, and R.
-|
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-SREM_MOD:    |idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section    8
-
-#include "fpsp.h"
-
-	.set	Mod_Flag,L_SCR3
-	.set	SignY,FP_SCR3+4
-	.set	SignX,FP_SCR3+8
-	.set	SignQ,FP_SCR3+12
-	.set	Sc_Flag,FP_SCR4
-
-	.set	Y,FP_SCR1
-	.set	Y_Hi,Y+4
-	.set	Y_Lo,Y+8
-
-	.set	R,FP_SCR2
-	.set	R_Hi,R+4
-	.set	R_Lo,R+8
-
-
-Scale:     .long	0x00010000,0x80000000,0x00000000,0x00000000
-
-	|xref	t_avoid_unsupp
-
-        .global        smod
-smod:
-
-   movel               #0,Mod_Flag(%a6)
-   bras                Mod_Rem
-
-        .global        srem
-srem:
-
-   movel               #1,Mod_Flag(%a6)
-
-Mod_Rem:
-|..Save sign of X and Y
-   moveml              %d2-%d7,-(%a7)     | ...save data registers
-   movew               (%a0),%d3
-   movew               %d3,SignY(%a6)
-   andil               #0x00007FFF,%d3   | ...Y := |Y|
-
-|
-   movel               4(%a0),%d4
-   movel               8(%a0),%d5        | ...(D3,D4,D5) is |Y|
-
-   tstl                %d3
-   bnes                Y_Normal
-
-   movel               #0x00003FFE,%d3	| ...$3FFD + 1
-   tstl                %d4
-   bnes                HiY_not0
-
-HiY_0:
-   movel               %d5,%d4
-   clrl                %d5
-   subil               #32,%d3
-   clrl                %d6
-   bfffo                %d4{#0:#32},%d6
-   lsll                %d6,%d4
-   subl                %d6,%d3           | ...(D3,D4,D5) is normalized
-|                                       ...with bias $7FFD
-   bras                Chk_X
-
-HiY_not0:
-   clrl                %d6
-   bfffo                %d4{#0:#32},%d6
-   subl                %d6,%d3
-   lsll                %d6,%d4
-   movel               %d5,%d7           | ...a copy of D5
-   lsll                %d6,%d5
-   negl                %d6
-   addil               #32,%d6
-   lsrl                %d6,%d7
-   orl                 %d7,%d4           | ...(D3,D4,D5) normalized
-|                                       ...with bias $7FFD
-   bras                Chk_X
-
-Y_Normal:
-   addil               #0x00003FFE,%d3   | ...(D3,D4,D5) normalized
-|                                       ...with bias $7FFD
-
-Chk_X:
-   movew               -12(%a0),%d0
-   movew               %d0,SignX(%a6)
-   movew               SignY(%a6),%d1
-   eorl                %d0,%d1
-   andil               #0x00008000,%d1
-   movew               %d1,SignQ(%a6)	| ...sign(Q) obtained
-   andil               #0x00007FFF,%d0
-   movel               -8(%a0),%d1
-   movel               -4(%a0),%d2       | ...(D0,D1,D2) is |X|
-   tstl                %d0
-   bnes                X_Normal
-   movel               #0x00003FFE,%d0
-   tstl                %d1
-   bnes                HiX_not0
-
-HiX_0:
-   movel               %d2,%d1
-   clrl                %d2
-   subil               #32,%d0
-   clrl                %d6
-   bfffo                %d1{#0:#32},%d6
-   lsll                %d6,%d1
-   subl                %d6,%d0           | ...(D0,D1,D2) is normalized
-|                                       ...with bias $7FFD
-   bras                Init
-
-HiX_not0:
-   clrl                %d6
-   bfffo                %d1{#0:#32},%d6
-   subl                %d6,%d0
-   lsll                %d6,%d1
-   movel               %d2,%d7           | ...a copy of D2
-   lsll                %d6,%d2
-   negl                %d6
-   addil               #32,%d6
-   lsrl                %d6,%d7
-   orl                 %d7,%d1           | ...(D0,D1,D2) normalized
-|                                       ...with bias $7FFD
-   bras                Init
-
-X_Normal:
-   addil               #0x00003FFE,%d0   | ...(D0,D1,D2) normalized
-|                                       ...with bias $7FFD
-
-Init:
-|
-   movel               %d3,L_SCR1(%a6)   | ...save biased expo(Y)
-   movel		%d0,L_SCR2(%a6)	|save d0
-   subl                %d3,%d0           | ...L := expo(X)-expo(Y)
-|   Move.L               D0,L            ...D0 is j
-   clrl                %d6              | ...D6 := carry <- 0
-   clrl                %d3              | ...D3 is Q
-   moveal              #0,%a1           | ...A1 is k; j+k=L, Q=0
-
-|..(Carry,D1,D2) is R
-   tstl                %d0
-   bges                Mod_Loop
-
-|..expo(X) < expo(Y). Thus X = mod(X,Y)
-|
-   movel		L_SCR2(%a6),%d0	|restore d0
-   bra                Get_Mod
-
-|..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
-
-
-Mod_Loop:
-   tstl                %d6              | ...test carry bit
-   bgts                R_GT_Y
-
-|..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
-   cmpl                %d4,%d1           | ...compare hi(R) and hi(Y)
-   bnes                R_NE_Y
-   cmpl                %d5,%d2           | ...compare lo(R) and lo(Y)
-   bnes                R_NE_Y
-
-|..At this point, R = Y
-   bra                Rem_is_0
-
-R_NE_Y:
-|..use the borrow of the previous compare
-   bcss                R_LT_Y          | ...borrow is set iff R < Y
-
-R_GT_Y:
-|..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
-|..and Y < (D1,D2) < 2Y. Either way, perform R - Y
-   subl                %d5,%d2           | ...lo(R) - lo(Y)
-   subxl               %d4,%d1           | ...hi(R) - hi(Y)
-   clrl                %d6              | ...clear carry
-   addql               #1,%d3           | ...Q := Q + 1
-
-R_LT_Y:
-|..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
-   tstl                %d0              | ...see if j = 0.
-   beqs                PostLoop
-
-   addl                %d3,%d3           | ...Q := 2Q
-   addl                %d2,%d2           | ...lo(R) = 2lo(R)
-   roxll               #1,%d1           | ...hi(R) = 2hi(R) + carry
-   scs                  %d6              | ...set Carry if 2(R) overflows
-   addql               #1,%a1           | ...k := k+1
-   subql               #1,%d0           | ...j := j - 1
-|..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
-
-   bras                Mod_Loop
-
-PostLoop:
-|..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
-
-|..normalize R.
-   movel               L_SCR1(%a6),%d0           | ...new biased expo of R
-   tstl                %d1
-   bnes                HiR_not0
-
-HiR_0:
-   movel               %d2,%d1
-   clrl                %d2
-   subil               #32,%d0
-   clrl                %d6
-   bfffo                %d1{#0:#32},%d6
-   lsll                %d6,%d1
-   subl                %d6,%d0           | ...(D0,D1,D2) is normalized
-|                                       ...with bias $7FFD
-   bras                Get_Mod
-
-HiR_not0:
-   clrl                %d6
-   bfffo                %d1{#0:#32},%d6
-   bmis                Get_Mod         | ...already normalized
-   subl                %d6,%d0
-   lsll                %d6,%d1
-   movel               %d2,%d7           | ...a copy of D2
-   lsll                %d6,%d2
-   negl                %d6
-   addil               #32,%d6
-   lsrl                %d6,%d7
-   orl                 %d7,%d1           | ...(D0,D1,D2) normalized
-
-|
-Get_Mod:
-   cmpil		#0x000041FE,%d0
-   bges		No_Scale
-Do_Scale:
-   movew		%d0,R(%a6)
-   clrw		R+2(%a6)
-   movel		%d1,R_Hi(%a6)
-   movel		%d2,R_Lo(%a6)
-   movel		L_SCR1(%a6),%d6
-   movew		%d6,Y(%a6)
-   clrw		Y+2(%a6)
-   movel		%d4,Y_Hi(%a6)
-   movel		%d5,Y_Lo(%a6)
-   fmovex		R(%a6),%fp0		| ...no exception
-   movel		#1,Sc_Flag(%a6)
-   bras		ModOrRem
-No_Scale:
-   movel		%d1,R_Hi(%a6)
-   movel		%d2,R_Lo(%a6)
-   subil		#0x3FFE,%d0
-   movew		%d0,R(%a6)
-   clrw		R+2(%a6)
-   movel		L_SCR1(%a6),%d6
-   subil		#0x3FFE,%d6
-   movel		%d6,L_SCR1(%a6)
-   fmovex		R(%a6),%fp0
-   movew		%d6,Y(%a6)
-   movel		%d4,Y_Hi(%a6)
-   movel		%d5,Y_Lo(%a6)
-   movel		#0,Sc_Flag(%a6)
-
-|
-
-
-ModOrRem:
-   movel               Mod_Flag(%a6),%d6
-   beqs                Fix_Sign
-
-   movel               L_SCR1(%a6),%d6           | ...new biased expo(Y)
-   subql               #1,%d6           | ...biased expo(Y/2)
-   cmpl                %d6,%d0
-   blts                Fix_Sign
-   bgts                Last_Sub
-
-   cmpl                %d4,%d1
-   bnes                Not_EQ
-   cmpl                %d5,%d2
-   bnes                Not_EQ
-   bra                Tie_Case
-
-Not_EQ:
-   bcss                Fix_Sign
-
-Last_Sub:
-|
-   fsubx		Y(%a6),%fp0		| ...no exceptions
-   addql               #1,%d3           | ...Q := Q + 1
-
-|
-
-Fix_Sign:
-|..Get sign of X
-   movew               SignX(%a6),%d6
-   bges		Get_Q
-   fnegx		%fp0
-
-|..Get Q
-|
-Get_Q:
-   clrl		%d6
-   movew               SignQ(%a6),%d6        | ...D6 is sign(Q)
-   movel               #8,%d7
-   lsrl                %d7,%d6
-   andil               #0x0000007F,%d3   | ...7 bits of Q
-   orl                 %d6,%d3           | ...sign and bits of Q
-   swap                 %d3
-   fmovel              %fpsr,%d6
-   andil               #0xFF00FFFF,%d6
-   orl                 %d3,%d6
-   fmovel              %d6,%fpsr         | ...put Q in fpsr
-
-|
-Restore:
-   moveml              (%a7)+,%d2-%d7
-   fmovel              USER_FPCR(%a6),%fpcr
-   movel               Sc_Flag(%a6),%d0
-   beqs                Finish
-   fmulx		Scale(%pc),%fp0	| ...may cause underflow
-   bra			t_avoid_unsupp	|check for denorm as a
-|					;result of the scaling
-
-Finish:
-	fmovex		%fp0,%fp0		|capture exceptions & round
-	rts
-
-Rem_is_0:
-|..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
-   addql               #1,%d3
-   cmpil               #8,%d0           | ...D0 is j
-   bges                Q_Big
-
-   lsll                %d0,%d3
-   bras                Set_R_0
-
-Q_Big:
-   clrl                %d3
-
-Set_R_0:
-   fmoves		#0x00000000,%fp0
-   movel		#0,Sc_Flag(%a6)
-   bra                Fix_Sign
-
-Tie_Case:
-|..Check parity of Q
-   movel               %d3,%d6
-   andil               #0x00000001,%d6
-   tstl                %d6
-   beq                Fix_Sign	| ...Q is even
-
-|..Q is odd, Q := Q + 1, signX := -signX
-   addql               #1,%d3
-   movew               SignX(%a6),%d6
-   eoril               #0x00008000,%d6
-   movew               %d6,SignX(%a6)
-   bra                Fix_Sign
-
-   |end
diff --git a/arch/m68k/fpsp040/ssin.S b/arch/m68k/fpsp040/ssin.S
deleted file mode 100644
index a1ef8e01bf06703bd732abcd89053e7cec214226..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/ssin.S
+++ /dev/null
@@ -1,745 +0,0 @@
-|
-|	ssin.sa 3.3 7/29/91
-|
-|	The entry point sSIN computes the sine of an input argument
-|	sCOS computes the cosine, and sSINCOS computes both. The
-|	corresponding entry points with a "d" computes the same
-|	corresponding function values for denormalized inputs.
-|
-|	Input: Double-extended number X in location pointed to
-|		by address register a0.
-|
-|	Output: The function value sin(X) or cos(X) returned in Fp0 if SIN or
-|		COS is requested. Otherwise, for SINCOS, sin(X) is returned
-|		in Fp0, and cos(X) is returned in Fp1.
-|
-|	Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS.
-|
-|	Accuracy and Monotonicity: The returned result is within 1 ulp in
-|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
-|		result is subsequently rounded to double precision. The
-|		result is provably monotonic in double precision.
-|
-|	Speed: The programs sSIN and sCOS take approximately 150 cycles for
-|		input argument X such that |X| < 15Pi, which is the usual
-|		situation. The speed for sSINCOS is approximately 190 cycles.
-|
-|	Algorithm:
-|
-|	SIN and COS:
-|	1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.
-|
-|	2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.
-|
-|	3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
-|		k = N mod 4, so in particular, k = 0,1,2,or 3. Overwrite
-|		k by k := k + AdjN.
-|
-|	4. If k is even, go to 6.
-|
-|	5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r)
-|		where cos(r) is approximated by an even polynomial in r,
-|		1 + r*r*(B1+s*(B2+ ... + s*B8)),	s = r*r.
-|		Exit.
-|
-|	6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)
-|		where sin(r) is approximated by an odd polynomial in r
-|		r + r*s*(A1+s*(A2+ ... + s*A7)),	s = r*r.
-|		Exit.
-|
-|	7. If |X| > 1, go to 9.
-|
-|	8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1.
-|
-|	9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3.
-|
-|	SINCOS:
-|	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
-|
-|	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
-|		k = N mod 4, so in particular, k = 0,1,2,or 3.
-|
-|	3. If k is even, go to 5.
-|
-|	4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e.
-|		j1 exclusive or with the l.s.b. of k.
-|		sgn1 := (-1)**j1, sgn2 := (-1)**j2.
-|		SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where
-|		sin(r) and cos(r) are computed as odd and even polynomials
-|		in r, respectively. Exit
-|
-|	5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.
-|		SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where
-|		sin(r) and cos(r) are computed as odd and even polynomials
-|		in r, respectively. Exit
-|
-|	6. If |X| > 1, go to 8.
-|
-|	7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.
-|
-|	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|SSIN	idnt	2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-BOUNDS1:	.long 0x3FD78000,0x4004BC7E
-TWOBYPI:	.long 0x3FE45F30,0x6DC9C883
-
-SINA7:	.long 0xBD6AAA77,0xCCC994F5
-SINA6:	.long 0x3DE61209,0x7AAE8DA1
-
-SINA5:	.long 0xBE5AE645,0x2A118AE4
-SINA4:	.long 0x3EC71DE3,0xA5341531
-
-SINA3:	.long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
-
-SINA2:	.long 0x3FF80000,0x88888888,0x888859AF,0x00000000
-
-SINA1:	.long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
-
-COSB8:	.long 0x3D2AC4D0,0xD6011EE3
-COSB7:	.long 0xBDA9396F,0x9F45AC19
-
-COSB6:	.long 0x3E21EED9,0x0612C972
-COSB5:	.long 0xBE927E4F,0xB79D9FCF
-
-COSB4:	.long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
-
-COSB3:	.long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
-
-COSB2:	.long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
-COSB1:	.long 0xBF000000
-
-INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A
-
-TWOPI1:	.long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
-TWOPI2:	.long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
-
-	|xref	PITBL
-
-	.set	INARG,FP_SCR4
-
-	.set	X,FP_SCR5
-	.set	XDCARE,X+2
-	.set	XFRAC,X+4
-
-	.set	RPRIME,FP_SCR1
-	.set	SPRIME,FP_SCR2
-
-	.set	POSNEG1,L_SCR1
-	.set	TWOTO63,L_SCR1
-
-	.set	ENDFLAG,L_SCR2
-	.set	N,L_SCR2
-
-	.set	ADJN,L_SCR3
-
-	| xref	t_frcinx
-	|xref	t_extdnrm
-	|xref	sto_cos
-
-	.global	ssind
-ssind:
-|--SIN(X) = X FOR DENORMALIZED X
-	bra		t_extdnrm
-
-	.global	scosd
-scosd:
-|--COS(X) = 1 FOR DENORMALIZED X
-
-	fmoves		#0x3F800000,%fp0
-|
-|	9D25B Fix: Sometimes the previous fmove.s sets fpsr bits
-|
-	fmovel		#0,%fpsr
-|
-	bra		t_frcinx
-
-	.global	ssin
-ssin:
-|--SET ADJN TO 0
-	movel		#0,ADJN(%a6)
-	bras		SINBGN
-
-	.global	scos
-scos:
-|--SET ADJN TO 1
-	movel		#1,ADJN(%a6)
-
-SINBGN:
-|--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
-
-	fmovex		(%a0),%fp0	| ...LOAD INPUT
-
-	movel		(%a0),%d0
-	movew		4(%a0),%d0
-	fmovex		%fp0,X(%a6)
-	andil		#0x7FFFFFFF,%d0		| ...COMPACTIFY X
-
-	cmpil		#0x3FD78000,%d0		| ...|X| >= 2**(-40)?
-	bges		SOK1
-	bra		SINSM
-
-SOK1:
-	cmpil		#0x4004BC7E,%d0		| ...|X| < 15 PI?
-	blts		SINMAIN
-	bra		REDUCEX
-
-SINMAIN:
-|--THIS IS THE USUAL CASE, |X| <= 15 PI.
-|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
-	fmovex		%fp0,%fp1
-	fmuld		TWOBYPI,%fp1	| ...X*2/PI
-
-|--HIDE THE NEXT THREE INSTRUCTIONS
-	lea		PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
-
-
-|--FP1 IS NOW READY
-	fmovel		%fp1,N(%a6)		| ...CONVERT TO INTEGER
-
-	movel		N(%a6),%d0
-	asll		#4,%d0
-	addal		%d0,%a1	| ...A1 IS THE ADDRESS OF N*PIBY2
-|				...WHICH IS IN TWO PIECES Y1 & Y2
-
-	fsubx		(%a1)+,%fp0	| ...X-Y1
-|--HIDE THE NEXT ONE
-	fsubs		(%a1),%fp0	| ...FP0 IS R = (X-Y1)-Y2
-
-SINCONT:
-|--continuation from REDUCEX
-
-|--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
-	movel		N(%a6),%d0
-	addl		ADJN(%a6),%d0	| ...SEE IF D0 IS ODD OR EVEN
-	rorl		#1,%d0	| ...D0 WAS ODD IFF D0 IS NEGATIVE
-	cmpil		#0,%d0
-	blt		COSPOLY
-
-SINPOLY:
-|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
-|--THEN WE RETURN	SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
-|--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
-|--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
-|--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
-|--WHERE T=S*S.
-|--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
-|--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
-	fmovex		%fp0,X(%a6)	| ...X IS R
-	fmulx		%fp0,%fp0	| ...FP0 IS S
-|---HIDE THE NEXT TWO WHILE WAITING FOR FP0
-	fmoved		SINA7,%fp3
-	fmoved		SINA6,%fp2
-|--FP0 IS NOW READY
-	fmovex		%fp0,%fp1
-	fmulx		%fp1,%fp1	| ...FP1 IS T
-|--HIDE THE NEXT TWO WHILE WAITING FOR FP1
-
-	rorl		#1,%d0
-	andil		#0x80000000,%d0
-|				...LEAST SIG. BIT OF D0 IN SIGN POSITION
-	eorl		%d0,X(%a6)	| ...X IS NOW R'= SGN*R
-
-	fmulx		%fp1,%fp3	| ...TA7
-	fmulx		%fp1,%fp2	| ...TA6
-
-	faddd		SINA5,%fp3 | ...A5+TA7
-	faddd		SINA4,%fp2 | ...A4+TA6
-
-	fmulx		%fp1,%fp3	| ...T(A5+TA7)
-	fmulx		%fp1,%fp2	| ...T(A4+TA6)
-
-	faddd		SINA3,%fp3 | ...A3+T(A5+TA7)
-	faddx		SINA2,%fp2 | ...A2+T(A4+TA6)
-
-	fmulx		%fp3,%fp1	| ...T(A3+T(A5+TA7))
-
-	fmulx		%fp0,%fp2	| ...S(A2+T(A4+TA6))
-	faddx		SINA1,%fp1 | ...A1+T(A3+T(A5+TA7))
-	fmulx		X(%a6),%fp0	| ...R'*S
-
-	faddx		%fp2,%fp1	| ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
-|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
-|--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING
-
-
-	fmulx		%fp1,%fp0		| ...SIN(R')-R'
-|--FP1 RELEASED.
-
-	fmovel		%d1,%FPCR		|restore users exceptions
-	faddx		X(%a6),%fp0		|last inst - possible exception set
-	bra		t_frcinx
-
-
-COSPOLY:
-|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
-|--THEN WE RETURN	SGN*COS(R). SGN*COS(R) IS COMPUTED BY
-|--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
-|--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
-|--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
-|--WHERE T=S*S.
-|--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
-|--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
-|--AND IS THEREFORE STORED AS SINGLE PRECISION.
-
-	fmulx		%fp0,%fp0	| ...FP0 IS S
-|---HIDE THE NEXT TWO WHILE WAITING FOR FP0
-	fmoved		COSB8,%fp2
-	fmoved		COSB7,%fp3
-|--FP0 IS NOW READY
-	fmovex		%fp0,%fp1
-	fmulx		%fp1,%fp1	| ...FP1 IS T
-|--HIDE THE NEXT TWO WHILE WAITING FOR FP1
-	fmovex		%fp0,X(%a6)	| ...X IS S
-	rorl		#1,%d0
-	andil		#0x80000000,%d0
-|			...LEAST SIG. BIT OF D0 IN SIGN POSITION
-
-	fmulx		%fp1,%fp2	| ...TB8
-|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
-	eorl		%d0,X(%a6)	| ...X IS NOW S'= SGN*S
-	andil		#0x80000000,%d0
-
-	fmulx		%fp1,%fp3	| ...TB7
-|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
-	oril		#0x3F800000,%d0	| ...D0 IS SGN IN SINGLE
-	movel		%d0,POSNEG1(%a6)
-
-	faddd		COSB6,%fp2 | ...B6+TB8
-	faddd		COSB5,%fp3 | ...B5+TB7
-
-	fmulx		%fp1,%fp2	| ...T(B6+TB8)
-	fmulx		%fp1,%fp3	| ...T(B5+TB7)
-
-	faddd		COSB4,%fp2 | ...B4+T(B6+TB8)
-	faddx		COSB3,%fp3 | ...B3+T(B5+TB7)
-
-	fmulx		%fp1,%fp2	| ...T(B4+T(B6+TB8))
-	fmulx		%fp3,%fp1	| ...T(B3+T(B5+TB7))
-
-	faddx		COSB2,%fp2 | ...B2+T(B4+T(B6+TB8))
-	fadds		COSB1,%fp1 | ...B1+T(B3+T(B5+TB7))
-
-	fmulx		%fp2,%fp0	| ...S(B2+T(B4+T(B6+TB8)))
-|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
-|--FP2 RELEASED.
-
-
-	faddx		%fp1,%fp0
-|--FP1 RELEASED
-
-	fmulx		X(%a6),%fp0
-
-	fmovel		%d1,%FPCR		|restore users exceptions
-	fadds		POSNEG1(%a6),%fp0	|last inst - possible exception set
-	bra		t_frcinx
-
-
-SINBORS:
-|--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
-|--IF |X| < 2**(-40), RETURN X OR 1.
-	cmpil		#0x3FFF8000,%d0
-	bgts		REDUCEX
-
-
-SINSM:
-	movel		ADJN(%a6),%d0
-	cmpil		#0,%d0
-	bgts		COSTINY
-
-SINTINY:
-	movew		#0x0000,XDCARE(%a6)	| ...JUST IN CASE
-	fmovel		%d1,%FPCR		|restore users exceptions
-	fmovex		X(%a6),%fp0		|last inst - possible exception set
-	bra		t_frcinx
-
-
-COSTINY:
-	fmoves		#0x3F800000,%fp0
-
-	fmovel		%d1,%FPCR		|restore users exceptions
-	fsubs		#0x00800000,%fp0	|last inst - possible exception set
-	bra		t_frcinx
-
-
-REDUCEX:
-|--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
-|--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
-|--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
-
-	fmovemx	%fp2-%fp5,-(%a7)	| ...save FP2 through FP5
-	movel		%d2,-(%a7)
-        fmoves         #0x00000000,%fp1
-|--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
-|--there is a danger of unwanted overflow in first LOOP iteration.  In this
-|--case, reduce argument by one remainder step to make subsequent reduction
-|--safe.
-	cmpil	#0x7ffeffff,%d0		|is argument dangerously large?
-	bnes	LOOP
-	movel	#0x7ffe0000,FP_SCR2(%a6)	|yes
-|					;create 2**16383*PI/2
-	movel	#0xc90fdaa2,FP_SCR2+4(%a6)
-	clrl	FP_SCR2+8(%a6)
-	ftstx	%fp0			|test sign of argument
-	movel	#0x7fdc0000,FP_SCR3(%a6)	|create low half of 2**16383*
-|					;PI/2 at FP_SCR3
-	movel	#0x85a308d3,FP_SCR3+4(%a6)
-	clrl   FP_SCR3+8(%a6)
-	fblt	red_neg
-	orw	#0x8000,FP_SCR2(%a6)	|positive arg
-	orw	#0x8000,FP_SCR3(%a6)
-red_neg:
-	faddx  FP_SCR2(%a6),%fp0		|high part of reduction is exact
-	fmovex  %fp0,%fp1		|save high result in fp1
-	faddx  FP_SCR3(%a6),%fp0		|low part of reduction
-	fsubx  %fp0,%fp1			|determine low component of result
-	faddx  FP_SCR3(%a6),%fp1		|fp0/fp1 are reduced argument.
-
-|--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
-|--integer quotient will be stored in N
-|--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
-
-LOOP:
-	fmovex		%fp0,INARG(%a6)	| ...+-2**K * F, 1 <= F < 2
-	movew		INARG(%a6),%d0
-        movel          %d0,%a1		| ...save a copy of D0
-	andil		#0x00007FFF,%d0
-	subil		#0x00003FFF,%d0	| ...D0 IS K
-	cmpil		#28,%d0
-	bles		LASTLOOP
-CONTLOOP:
-	subil		#27,%d0	 | ...D0 IS L := K-27
-	movel		#0,ENDFLAG(%a6)
-	bras		WORK
-LASTLOOP:
-	clrl		%d0		| ...D0 IS L := 0
-	movel		#1,ENDFLAG(%a6)
-
-WORK:
-|--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
-|--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
-
-|--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
-|--2**L * (PIby2_1), 2**L * (PIby2_2)
-
-	movel		#0x00003FFE,%d2	| ...BIASED EXPO OF 2/PI
-	subl		%d0,%d2		| ...BIASED EXPO OF 2**(-L)*(2/PI)
-
-	movel		#0xA2F9836E,FP_SCR1+4(%a6)
-	movel		#0x4E44152A,FP_SCR1+8(%a6)
-	movew		%d2,FP_SCR1(%a6)	| ...FP_SCR1 is 2**(-L)*(2/PI)
-
-	fmovex		%fp0,%fp2
-	fmulx		FP_SCR1(%a6),%fp2
-|--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
-|--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
-|--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
-|--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
-|--US THE DESIRED VALUE IN FLOATING POINT.
-
-|--HIDE SIX CYCLES OF INSTRUCTION
-        movel		%a1,%d2
-        swap		%d2
-	andil		#0x80000000,%d2
-	oril		#0x5F000000,%d2	| ...D2 IS SIGN(INARG)*2**63 IN SGL
-	movel		%d2,TWOTO63(%a6)
-
-	movel		%d0,%d2
-	addil		#0x00003FFF,%d2	| ...BIASED EXPO OF 2**L * (PI/2)
-
-|--FP2 IS READY
-	fadds		TWOTO63(%a6),%fp2	| ...THE FRACTIONAL PART OF FP1 IS ROUNDED
-
-|--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
-        movew		%d2,FP_SCR2(%a6)
-	clrw           FP_SCR2+2(%a6)
-	movel		#0xC90FDAA2,FP_SCR2+4(%a6)
-	clrl		FP_SCR2+8(%a6)		| ...FP_SCR2 is  2**(L) * Piby2_1
-
-|--FP2 IS READY
-	fsubs		TWOTO63(%a6),%fp2		| ...FP2 is N
-
-	addil		#0x00003FDD,%d0
-        movew		%d0,FP_SCR3(%a6)
-	clrw           FP_SCR3+2(%a6)
-	movel		#0x85A308D3,FP_SCR3+4(%a6)
-	clrl		FP_SCR3+8(%a6)		| ...FP_SCR3 is 2**(L) * Piby2_2
-
-	movel		ENDFLAG(%a6),%d0
-
-|--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
-|--P2 = 2**(L) * Piby2_2
-	fmovex		%fp2,%fp4
-	fmulx		FP_SCR2(%a6),%fp4		| ...W = N*P1
-	fmovex		%fp2,%fp5
-	fmulx		FP_SCR3(%a6),%fp5		| ...w = N*P2
-	fmovex		%fp4,%fp3
-|--we want P+p = W+w  but  |p| <= half ulp of P
-|--Then, we need to compute  A := R-P   and  a := r-p
-	faddx		%fp5,%fp3			| ...FP3 is P
-	fsubx		%fp3,%fp4			| ...W-P
-
-	fsubx		%fp3,%fp0			| ...FP0 is A := R - P
-        faddx		%fp5,%fp4			| ...FP4 is p = (W-P)+w
-
-	fmovex		%fp0,%fp3			| ...FP3 A
-	fsubx		%fp4,%fp1			| ...FP1 is a := r - p
-
-|--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
-|--|r| <= half ulp of R.
-	faddx		%fp1,%fp0			| ...FP0 is R := A+a
-|--No need to calculate r if this is the last loop
-	cmpil		#0,%d0
-	bgt		RESTORE
-
-|--Need to calculate r
-	fsubx		%fp0,%fp3			| ...A-R
-	faddx		%fp3,%fp1			| ...FP1 is r := (A-R)+a
-	bra		LOOP
-
-RESTORE:
-        fmovel		%fp2,N(%a6)
-	movel		(%a7)+,%d2
-	fmovemx	(%a7)+,%fp2-%fp5
-
-
-	movel		ADJN(%a6),%d0
-	cmpil		#4,%d0
-
-	blt		SINCONT
-	bras		SCCONT
-
-	.global	ssincosd
-ssincosd:
-|--SIN AND COS OF X FOR DENORMALIZED X
-
-	fmoves		#0x3F800000,%fp1
-	bsr		sto_cos		|store cosine result
-	bra		t_extdnrm
-
-	.global	ssincos
-ssincos:
-|--SET ADJN TO 4
-	movel		#4,ADJN(%a6)
-
-	fmovex		(%a0),%fp0	| ...LOAD INPUT
-
-	movel		(%a0),%d0
-	movew		4(%a0),%d0
-	fmovex		%fp0,X(%a6)
-	andil		#0x7FFFFFFF,%d0		| ...COMPACTIFY X
-
-	cmpil		#0x3FD78000,%d0		| ...|X| >= 2**(-40)?
-	bges		SCOK1
-	bra		SCSM
-
-SCOK1:
-	cmpil		#0x4004BC7E,%d0		| ...|X| < 15 PI?
-	blts		SCMAIN
-	bra		REDUCEX
-
-
-SCMAIN:
-|--THIS IS THE USUAL CASE, |X| <= 15 PI.
-|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
-	fmovex		%fp0,%fp1
-	fmuld		TWOBYPI,%fp1	| ...X*2/PI
-
-|--HIDE THE NEXT THREE INSTRUCTIONS
-	lea		PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
-
-
-|--FP1 IS NOW READY
-	fmovel		%fp1,N(%a6)		| ...CONVERT TO INTEGER
-
-	movel		N(%a6),%d0
-	asll		#4,%d0
-	addal		%d0,%a1		| ...ADDRESS OF N*PIBY2, IN Y1, Y2
-
-	fsubx		(%a1)+,%fp0	| ...X-Y1
-        fsubs		(%a1),%fp0	| ...FP0 IS R = (X-Y1)-Y2
-
-SCCONT:
-|--continuation point from REDUCEX
-
-|--HIDE THE NEXT TWO
-	movel		N(%a6),%d0
-	rorl		#1,%d0
-
-	cmpil		#0,%d0		| ...D0 < 0 IFF N IS ODD
-	bge		NEVEN
-
-NODD:
-|--REGISTERS SAVED SO FAR: D0, A0, FP2.
-
-	fmovex		%fp0,RPRIME(%a6)
-	fmulx		%fp0,%fp0	 | ...FP0 IS S = R*R
-	fmoved		SINA7,%fp1	| ...A7
-	fmoved		COSB8,%fp2	| ...B8
-	fmulx		%fp0,%fp1	 | ...SA7
-	movel		%d2,-(%a7)
-	movel		%d0,%d2
-	fmulx		%fp0,%fp2	 | ...SB8
-	rorl		#1,%d2
-	andil		#0x80000000,%d2
-
-	faddd		SINA6,%fp1	| ...A6+SA7
-	eorl		%d0,%d2
-	andil		#0x80000000,%d2
-	faddd		COSB7,%fp2	| ...B7+SB8
-
-	fmulx		%fp0,%fp1	 | ...S(A6+SA7)
-	eorl		%d2,RPRIME(%a6)
-	movel		(%a7)+,%d2
-	fmulx		%fp0,%fp2	 | ...S(B7+SB8)
-	rorl		#1,%d0
-	andil		#0x80000000,%d0
-
-	faddd		SINA5,%fp1	| ...A5+S(A6+SA7)
-	movel		#0x3F800000,POSNEG1(%a6)
-	eorl		%d0,POSNEG1(%a6)
-	faddd		COSB6,%fp2	| ...B6+S(B7+SB8)
-
-	fmulx		%fp0,%fp1	 | ...S(A5+S(A6+SA7))
-	fmulx		%fp0,%fp2	 | ...S(B6+S(B7+SB8))
-	fmovex		%fp0,SPRIME(%a6)
-
-	faddd		SINA4,%fp1	| ...A4+S(A5+S(A6+SA7))
-	eorl		%d0,SPRIME(%a6)
-	faddd		COSB5,%fp2	| ...B5+S(B6+S(B7+SB8))
-
-	fmulx		%fp0,%fp1	 | ...S(A4+...)
-	fmulx		%fp0,%fp2	 | ...S(B5+...)
-
-	faddd		SINA3,%fp1	| ...A3+S(A4+...)
-	faddd		COSB4,%fp2	| ...B4+S(B5+...)
-
-	fmulx		%fp0,%fp1	 | ...S(A3+...)
-	fmulx		%fp0,%fp2	 | ...S(B4+...)
-
-	faddx		SINA2,%fp1	| ...A2+S(A3+...)
-	faddx		COSB3,%fp2	| ...B3+S(B4+...)
-
-	fmulx		%fp0,%fp1	 | ...S(A2+...)
-	fmulx		%fp0,%fp2	 | ...S(B3+...)
-
-	faddx		SINA1,%fp1	| ...A1+S(A2+...)
-	faddx		COSB2,%fp2	| ...B2+S(B3+...)
-
-	fmulx		%fp0,%fp1	 | ...S(A1+...)
-	fmulx		%fp2,%fp0	 | ...S(B2+...)
-
-
-
-	fmulx		RPRIME(%a6),%fp1	| ...R'S(A1+...)
-	fadds		COSB1,%fp0	| ...B1+S(B2...)
-	fmulx		SPRIME(%a6),%fp0	| ...S'(B1+S(B2+...))
-
-	movel		%d1,-(%sp)	|restore users mode & precision
-	andil		#0xff,%d1		|mask off all exceptions
-	fmovel		%d1,%FPCR
-	faddx		RPRIME(%a6),%fp1	| ...COS(X)
-	bsr		sto_cos		|store cosine result
-	fmovel		(%sp)+,%FPCR	|restore users exceptions
-	fadds		POSNEG1(%a6),%fp0	| ...SIN(X)
-
-	bra		t_frcinx
-
-
-NEVEN:
-|--REGISTERS SAVED SO FAR: FP2.
-
-	fmovex		%fp0,RPRIME(%a6)
-	fmulx		%fp0,%fp0	 | ...FP0 IS S = R*R
-	fmoved		COSB8,%fp1			| ...B8
-	fmoved		SINA7,%fp2			| ...A7
-	fmulx		%fp0,%fp1	 | ...SB8
-	fmovex		%fp0,SPRIME(%a6)
-	fmulx		%fp0,%fp2	 | ...SA7
-	rorl		#1,%d0
-	andil		#0x80000000,%d0
-	faddd		COSB7,%fp1	| ...B7+SB8
-	faddd		SINA6,%fp2	| ...A6+SA7
-	eorl		%d0,RPRIME(%a6)
-	eorl		%d0,SPRIME(%a6)
-	fmulx		%fp0,%fp1	 | ...S(B7+SB8)
-	oril		#0x3F800000,%d0
-	movel		%d0,POSNEG1(%a6)
-	fmulx		%fp0,%fp2	 | ...S(A6+SA7)
-
-	faddd		COSB6,%fp1	| ...B6+S(B7+SB8)
-	faddd		SINA5,%fp2	| ...A5+S(A6+SA7)
-
-	fmulx		%fp0,%fp1	 | ...S(B6+S(B7+SB8))
-	fmulx		%fp0,%fp2	 | ...S(A5+S(A6+SA7))
-
-	faddd		COSB5,%fp1	| ...B5+S(B6+S(B7+SB8))
-	faddd		SINA4,%fp2	| ...A4+S(A5+S(A6+SA7))
-
-	fmulx		%fp0,%fp1	 | ...S(B5+...)
-	fmulx		%fp0,%fp2	 | ...S(A4+...)
-
-	faddd		COSB4,%fp1	| ...B4+S(B5+...)
-	faddd		SINA3,%fp2	| ...A3+S(A4+...)
-
-	fmulx		%fp0,%fp1	 | ...S(B4+...)
-	fmulx		%fp0,%fp2	 | ...S(A3+...)
-
-	faddx		COSB3,%fp1	| ...B3+S(B4+...)
-	faddx		SINA2,%fp2	| ...A2+S(A3+...)
-
-	fmulx		%fp0,%fp1	 | ...S(B3+...)
-	fmulx		%fp0,%fp2	 | ...S(A2+...)
-
-	faddx		COSB2,%fp1	| ...B2+S(B3+...)
-	faddx		SINA1,%fp2	| ...A1+S(A2+...)
-
-	fmulx		%fp0,%fp1	 | ...S(B2+...)
-	fmulx		%fp2,%fp0	 | ...s(a1+...)
-
-
-
-	fadds		COSB1,%fp1	| ...B1+S(B2...)
-	fmulx		RPRIME(%a6),%fp0	| ...R'S(A1+...)
-	fmulx		SPRIME(%a6),%fp1	| ...S'(B1+S(B2+...))
-
-	movel		%d1,-(%sp)	|save users mode & precision
-	andil		#0xff,%d1		|mask off all exceptions
-	fmovel		%d1,%FPCR
-	fadds		POSNEG1(%a6),%fp1	| ...COS(X)
-	bsr		sto_cos		|store cosine result
-	fmovel		(%sp)+,%FPCR	|restore users exceptions
-	faddx		RPRIME(%a6),%fp0	| ...SIN(X)
-
-	bra		t_frcinx
-
-SCBORS:
-	cmpil		#0x3FFF8000,%d0
-	bgt		REDUCEX
-
-
-SCSM:
-	movew		#0x0000,XDCARE(%a6)
-	fmoves		#0x3F800000,%fp1
-
-	movel		%d1,-(%sp)	|save users mode & precision
-	andil		#0xff,%d1		|mask off all exceptions
-	fmovel		%d1,%FPCR
-	fsubs		#0x00800000,%fp1
-	bsr		sto_cos		|store cosine result
-	fmovel		(%sp)+,%FPCR	|restore users exceptions
-	fmovex		X(%a6),%fp0
-	bra		t_frcinx
-
-	|end
diff --git a/arch/m68k/fpsp040/ssinh.S b/arch/m68k/fpsp040/ssinh.S
deleted file mode 100644
index 8a560edc7653e903580c74490b0f4d69a2f43f2a..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/ssinh.S
+++ /dev/null
@@ -1,134 +0,0 @@
-|
-|	ssinh.sa 3.1 12/10/90
-|
-|       The entry point sSinh computes the hyperbolic sine of
-|       an input argument; sSinhd does the same except for denormalized
-|       input.
-|
-|       Input: Double-extended number X in location pointed to
-|		by address register a0.
-|
-|       Output: The value sinh(X) returned in floating-point register Fp0.
-|
-|       Accuracy and Monotonicity: The returned result is within 3 ulps in
-|               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
-|               result is subsequently rounded to double precision. The
-|               result is provably monotonic in double precision.
-|
-|       Speed: The program sSINH takes approximately 280 cycles.
-|
-|       Algorithm:
-|
-|       SINH
-|       1. If |X| > 16380 log2, go to 3.
-|
-|       2. (|X| <= 16380 log2) Sinh(X) is obtained by the formulae
-|               y = |X|, sgn = sign(X), and z = expm1(Y),
-|               sinh(X) = sgn*(1/2)*( z + z/(1+z) ).
-|          Exit.
-|
-|       3. If |X| > 16480 log2, go to 5.
-|
-|       4. (16380 log2 < |X| <= 16480 log2)
-|               sinh(X) = sign(X) * exp(|X|)/2.
-|          However, invoking exp(|X|) may cause premature overflow.
-|          Thus, we calculate sinh(X) as follows:
-|             Y       := |X|
-|             sgn     := sign(X)
-|             sgnFact := sgn * 2**(16380)
-|             Y'      := Y - 16381 log2
-|             sinh(X) := sgnFact * exp(Y').
-|          Exit.
-|
-|       5. (|X| > 16480 log2) sinh(X) must overflow. Return
-|          sign(X)*Huge*Huge to generate overflow and an infinity with
-|          the appropriate sign. Huge is the largest finite number in
-|          extended format. Exit.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|SSINH	idnt	2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-T1:	.long 0x40C62D38,0xD3D64634 | ... 16381 LOG2 LEAD
-T2:	.long 0x3D6F90AE,0xB1E75CC7 | ... 16381 LOG2 TRAIL
-
-	|xref	t_frcinx
-	|xref	t_ovfl
-	|xref	t_extdnrm
-	|xref	setox
-	|xref	setoxm1
-
-	.global	ssinhd
-ssinhd:
-|--SINH(X) = X FOR DENORMALIZED X
-
-	bra	t_extdnrm
-
-	.global	ssinh
-ssinh:
-	fmovex	(%a0),%fp0	| ...LOAD INPUT
-
-	movel	(%a0),%d0
-	movew	4(%a0),%d0
-	movel	%d0,%a1		| save a copy of original (compacted) operand
-	andl	#0x7FFFFFFF,%d0
-	cmpl	#0x400CB167,%d0
-	bgts	SINHBIG
-
-|--THIS IS THE USUAL CASE, |X| < 16380 LOG2
-|--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
-
-	fabsx	%fp0		| ...Y = |X|
-
-	moveml	%a1/%d1,-(%sp)
-	fmovemx %fp0-%fp0,(%a0)
-	clrl	%d1
-	bsr	setoxm1		| ...FP0 IS Z = EXPM1(Y)
-	fmovel	#0,%fpcr
-	moveml	(%sp)+,%a1/%d1
-
-	fmovex	%fp0,%fp1
-	fadds	#0x3F800000,%fp1	| ...1+Z
-	fmovex	%fp0,-(%sp)
-	fdivx	%fp1,%fp0		| ...Z/(1+Z)
-	movel	%a1,%d0
-	andl	#0x80000000,%d0
-	orl	#0x3F000000,%d0
-	faddx	(%sp)+,%fp0
-	movel	%d0,-(%sp)
-
-	fmovel	%d1,%fpcr
-	fmuls	(%sp)+,%fp0	|last fp inst - possible exceptions set
-
-	bra	t_frcinx
-
-SINHBIG:
-	cmpl	#0x400CB2B3,%d0
-	bgt	t_ovfl
-	fabsx	%fp0
-	fsubd	T1(%pc),%fp0	| ...(|X|-16381LOG2_LEAD)
-	movel	#0,-(%sp)
-	movel	#0x80000000,-(%sp)
-	movel	%a1,%d0
-	andl	#0x80000000,%d0
-	orl	#0x7FFB0000,%d0
-	movel	%d0,-(%sp)	| ...EXTENDED FMT
-	fsubd	T2(%pc),%fp0	| ...|X| - 16381 LOG2, ACCURATE
-
-	movel	%d1,-(%sp)
-	clrl	%d1
-	fmovemx %fp0-%fp0,(%a0)
-	bsr	setox
-	fmovel	(%sp)+,%fpcr
-
-	fmulx	(%sp)+,%fp0	|possible exception
-	bra	t_frcinx
-
-	|end
diff --git a/arch/m68k/fpsp040/stan.S b/arch/m68k/fpsp040/stan.S
deleted file mode 100644
index f8553aaececbc64d76be7908e57b43869089558e..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/stan.S
+++ /dev/null
@@ -1,454 +0,0 @@
-|
-|	stan.sa 3.3 7/29/91
-|
-|	The entry point stan computes the tangent of
-|	an input argument;
-|	stand does the same except for denormalized input.
-|
-|	Input: Double-extended number X in location pointed to
-|		by address register a0.
-|
-|	Output: The value tan(X) returned in floating-point register Fp0.
-|
-|	Accuracy and Monotonicity: The returned result is within 3 ulp in
-|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
-|		result is subsequently rounded to double precision. The
-|		result is provably monotonic in double precision.
-|
-|	Speed: The program sTAN takes approximately 170 cycles for
-|		input argument X such that |X| < 15Pi, which is the usual
-|		situation.
-|
-|	Algorithm:
-|
-|	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
-|
-|	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
-|		k = N mod 2, so in particular, k = 0 or 1.
-|
-|	3. If k is odd, go to 5.
-|
-|	4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
-|		rational function U/V where
-|		U = r + r*s*(P1 + s*(P2 + s*P3)), and
-|		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.
-|		Exit.
-|
-|	4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
-|		rational function U/V where
-|		U = r + r*s*(P1 + s*(P2 + s*P3)), and
-|		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
-|		-Cot(r) = -V/U. Exit.
-|
-|	6. If |X| > 1, go to 8.
-|
-|	7. (|X|<2**(-40)) Tan(X) = X. Exit.
-|
-|	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|STAN	idnt	2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-BOUNDS1:	.long 0x3FD78000,0x4004BC7E
-TWOBYPI:	.long 0x3FE45F30,0x6DC9C883
-
-TANQ4:	.long 0x3EA0B759,0xF50F8688
-TANP3:	.long 0xBEF2BAA5,0xA8924F04
-
-TANQ3:	.long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
-
-TANP2:	.long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
-
-TANQ2:	.long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
-
-TANP1:	.long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
-
-TANQ1:	.long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
-
-INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
-
-TWOPI1:	.long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
-TWOPI2:	.long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
-
-|--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
-|--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
-|--MOST 69 BITS LONG.
-	.global	PITBL
-PITBL:
-  .long  0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
-  .long  0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
-  .long  0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
-  .long  0xC0040000,0xB6365E22,0xEE46F000,0x21480000
-  .long  0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
-  .long  0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
-  .long  0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
-  .long  0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
-  .long  0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
-  .long  0xC0040000,0x90836524,0x88034B96,0x20B00000
-  .long  0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
-  .long  0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
-  .long  0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
-  .long  0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
-  .long  0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
-  .long  0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
-  .long  0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
-  .long  0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
-  .long  0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
-  .long  0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
-  .long  0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
-  .long  0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
-  .long  0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
-  .long  0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
-  .long  0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
-  .long  0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
-  .long  0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
-  .long  0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
-  .long  0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
-  .long  0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
-  .long  0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
-  .long  0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
-  .long  0x00000000,0x00000000,0x00000000,0x00000000
-  .long  0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
-  .long  0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
-  .long  0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
-  .long  0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
-  .long  0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
-  .long  0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
-  .long  0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
-  .long  0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
-  .long  0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
-  .long  0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
-  .long  0x40030000,0x8A3AE64F,0x76F80584,0x21080000
-  .long  0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
-  .long  0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
-  .long  0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
-  .long  0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
-  .long  0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
-  .long  0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
-  .long  0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
-  .long  0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
-  .long  0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
-  .long  0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
-  .long  0x40040000,0x8A3AE64F,0x76F80584,0x21880000
-  .long  0x40040000,0x90836524,0x88034B96,0xA0B00000
-  .long  0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
-  .long  0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
-  .long  0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
-  .long  0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
-  .long  0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
-  .long  0x40040000,0xB6365E22,0xEE46F000,0xA1480000
-  .long  0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
-  .long  0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
-  .long  0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
-
-	.set	INARG,FP_SCR4
-
-	.set	TWOTO63,L_SCR1
-	.set	ENDFLAG,L_SCR2
-	.set	N,L_SCR3
-
-	| xref	t_frcinx
-	|xref	t_extdnrm
-
-	.global	stand
-stand:
-|--TAN(X) = X FOR DENORMALIZED X
-
-	bra		t_extdnrm
-
-	.global	stan
-stan:
-	fmovex		(%a0),%fp0	| ...LOAD INPUT
-
-	movel		(%a0),%d0
-	movew		4(%a0),%d0
-	andil		#0x7FFFFFFF,%d0
-
-	cmpil		#0x3FD78000,%d0		| ...|X| >= 2**(-40)?
-	bges		TANOK1
-	bra		TANSM
-TANOK1:
-	cmpil		#0x4004BC7E,%d0		| ...|X| < 15 PI?
-	blts		TANMAIN
-	bra		REDUCEX
-
-
-TANMAIN:
-|--THIS IS THE USUAL CASE, |X| <= 15 PI.
-|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
-	fmovex		%fp0,%fp1
-	fmuld		TWOBYPI,%fp1	| ...X*2/PI
-
-|--HIDE THE NEXT TWO INSTRUCTIONS
-	leal		PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
-
-|--FP1 IS NOW READY
-	fmovel		%fp1,%d0		| ...CONVERT TO INTEGER
-
-	asll		#4,%d0
-	addal		%d0,%a1		| ...ADDRESS N*PIBY2 IN Y1, Y2
-
-	fsubx		(%a1)+,%fp0	| ...X-Y1
-|--HIDE THE NEXT ONE
-
-	fsubs		(%a1),%fp0	| ...FP0 IS R = (X-Y1)-Y2
-
-	rorl		#5,%d0
-	andil		#0x80000000,%d0	| ...D0 WAS ODD IFF D0 < 0
-
-TANCONT:
-
-	cmpil		#0,%d0
-	blt		NODD
-
-	fmovex		%fp0,%fp1
-	fmulx		%fp1,%fp1		| ...S = R*R
-
-	fmoved		TANQ4,%fp3
-	fmoved		TANP3,%fp2
-
-	fmulx		%fp1,%fp3		| ...SQ4
-	fmulx		%fp1,%fp2		| ...SP3
-
-	faddd		TANQ3,%fp3	| ...Q3+SQ4
-	faddx		TANP2,%fp2	| ...P2+SP3
-
-	fmulx		%fp1,%fp3		| ...S(Q3+SQ4)
-	fmulx		%fp1,%fp2		| ...S(P2+SP3)
-
-	faddx		TANQ2,%fp3	| ...Q2+S(Q3+SQ4)
-	faddx		TANP1,%fp2	| ...P1+S(P2+SP3)
-
-	fmulx		%fp1,%fp3		| ...S(Q2+S(Q3+SQ4))
-	fmulx		%fp1,%fp2		| ...S(P1+S(P2+SP3))
-
-	faddx		TANQ1,%fp3	| ...Q1+S(Q2+S(Q3+SQ4))
-	fmulx		%fp0,%fp2		| ...RS(P1+S(P2+SP3))
-
-	fmulx		%fp3,%fp1		| ...S(Q1+S(Q2+S(Q3+SQ4)))
-
-
-	faddx		%fp2,%fp0		| ...R+RS(P1+S(P2+SP3))
-
-
-	fadds		#0x3F800000,%fp1	| ...1+S(Q1+...)
-
-	fmovel		%d1,%fpcr		|restore users exceptions
-	fdivx		%fp1,%fp0		|last inst - possible exception set
-
-	bra		t_frcinx
-
-NODD:
-	fmovex		%fp0,%fp1
-	fmulx		%fp0,%fp0		| ...S = R*R
-
-	fmoved		TANQ4,%fp3
-	fmoved		TANP3,%fp2
-
-	fmulx		%fp0,%fp3		| ...SQ4
-	fmulx		%fp0,%fp2		| ...SP3
-
-	faddd		TANQ3,%fp3	| ...Q3+SQ4
-	faddx		TANP2,%fp2	| ...P2+SP3
-
-	fmulx		%fp0,%fp3		| ...S(Q3+SQ4)
-	fmulx		%fp0,%fp2		| ...S(P2+SP3)
-
-	faddx		TANQ2,%fp3	| ...Q2+S(Q3+SQ4)
-	faddx		TANP1,%fp2	| ...P1+S(P2+SP3)
-
-	fmulx		%fp0,%fp3		| ...S(Q2+S(Q3+SQ4))
-	fmulx		%fp0,%fp2		| ...S(P1+S(P2+SP3))
-
-	faddx		TANQ1,%fp3	| ...Q1+S(Q2+S(Q3+SQ4))
-	fmulx		%fp1,%fp2		| ...RS(P1+S(P2+SP3))
-
-	fmulx		%fp3,%fp0		| ...S(Q1+S(Q2+S(Q3+SQ4)))
-
-
-	faddx		%fp2,%fp1		| ...R+RS(P1+S(P2+SP3))
-	fadds		#0x3F800000,%fp0	| ...1+S(Q1+...)
-
-
-	fmovex		%fp1,-(%sp)
-	eoril		#0x80000000,(%sp)
-
-	fmovel		%d1,%fpcr		|restore users exceptions
-	fdivx		(%sp)+,%fp0	|last inst - possible exception set
-
-	bra		t_frcinx
-
-TANBORS:
-|--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
-|--IF |X| < 2**(-40), RETURN X OR 1.
-	cmpil		#0x3FFF8000,%d0
-	bgts		REDUCEX
-
-TANSM:
-
-	fmovex		%fp0,-(%sp)
-	fmovel		%d1,%fpcr		 |restore users exceptions
-	fmovex		(%sp)+,%fp0	|last inst - possible exception set
-
-	bra		t_frcinx
-
-
-REDUCEX:
-|--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
-|--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
-|--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
-
-	fmovemx	%fp2-%fp5,-(%a7)	| ...save FP2 through FP5
-	movel		%d2,-(%a7)
-        fmoves         #0x00000000,%fp1
-
-|--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
-|--there is a danger of unwanted overflow in first LOOP iteration.  In this
-|--case, reduce argument by one remainder step to make subsequent reduction
-|--safe.
-	cmpil	#0x7ffeffff,%d0		|is argument dangerously large?
-	bnes	LOOP
-	movel	#0x7ffe0000,FP_SCR2(%a6)	|yes
-|					;create 2**16383*PI/2
-	movel	#0xc90fdaa2,FP_SCR2+4(%a6)
-	clrl	FP_SCR2+8(%a6)
-	ftstx	%fp0			|test sign of argument
-	movel	#0x7fdc0000,FP_SCR3(%a6)	|create low half of 2**16383*
-|					;PI/2 at FP_SCR3
-	movel	#0x85a308d3,FP_SCR3+4(%a6)
-	clrl   FP_SCR3+8(%a6)
-	fblt	red_neg
-	orw	#0x8000,FP_SCR2(%a6)	|positive arg
-	orw	#0x8000,FP_SCR3(%a6)
-red_neg:
-	faddx  FP_SCR2(%a6),%fp0		|high part of reduction is exact
-	fmovex  %fp0,%fp1		|save high result in fp1
-	faddx  FP_SCR3(%a6),%fp0		|low part of reduction
-	fsubx  %fp0,%fp1			|determine low component of result
-	faddx  FP_SCR3(%a6),%fp1		|fp0/fp1 are reduced argument.
-
-|--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
-|--integer quotient will be stored in N
-|--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
-
-LOOP:
-	fmovex		%fp0,INARG(%a6)	| ...+-2**K * F, 1 <= F < 2
-	movew		INARG(%a6),%d0
-        movel          %d0,%a1		| ...save a copy of D0
-	andil		#0x00007FFF,%d0
-	subil		#0x00003FFF,%d0	| ...D0 IS K
-	cmpil		#28,%d0
-	bles		LASTLOOP
-CONTLOOP:
-	subil		#27,%d0	 | ...D0 IS L := K-27
-	movel		#0,ENDFLAG(%a6)
-	bras		WORK
-LASTLOOP:
-	clrl		%d0		| ...D0 IS L := 0
-	movel		#1,ENDFLAG(%a6)
-
-WORK:
-|--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
-|--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
-
-|--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
-|--2**L * (PIby2_1), 2**L * (PIby2_2)
-
-	movel		#0x00003FFE,%d2	| ...BIASED EXPO OF 2/PI
-	subl		%d0,%d2		| ...BIASED EXPO OF 2**(-L)*(2/PI)
-
-	movel		#0xA2F9836E,FP_SCR1+4(%a6)
-	movel		#0x4E44152A,FP_SCR1+8(%a6)
-	movew		%d2,FP_SCR1(%a6)	| ...FP_SCR1 is 2**(-L)*(2/PI)
-
-	fmovex		%fp0,%fp2
-	fmulx		FP_SCR1(%a6),%fp2
-|--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
-|--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
-|--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
-|--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
-|--US THE DESIRED VALUE IN FLOATING POINT.
-
-|--HIDE SIX CYCLES OF INSTRUCTION
-        movel		%a1,%d2
-        swap		%d2
-	andil		#0x80000000,%d2
-	oril		#0x5F000000,%d2	| ...D2 IS SIGN(INARG)*2**63 IN SGL
-	movel		%d2,TWOTO63(%a6)
-
-	movel		%d0,%d2
-	addil		#0x00003FFF,%d2	| ...BIASED EXPO OF 2**L * (PI/2)
-
-|--FP2 IS READY
-	fadds		TWOTO63(%a6),%fp2	| ...THE FRACTIONAL PART OF FP1 IS ROUNDED
-
-|--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
-        movew		%d2,FP_SCR2(%a6)
-	clrw           FP_SCR2+2(%a6)
-	movel		#0xC90FDAA2,FP_SCR2+4(%a6)
-	clrl		FP_SCR2+8(%a6)		| ...FP_SCR2 is  2**(L) * Piby2_1
-
-|--FP2 IS READY
-	fsubs		TWOTO63(%a6),%fp2		| ...FP2 is N
-
-	addil		#0x00003FDD,%d0
-        movew		%d0,FP_SCR3(%a6)
-	clrw           FP_SCR3+2(%a6)
-	movel		#0x85A308D3,FP_SCR3+4(%a6)
-	clrl		FP_SCR3+8(%a6)		| ...FP_SCR3 is 2**(L) * Piby2_2
-
-	movel		ENDFLAG(%a6),%d0
-
-|--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
-|--P2 = 2**(L) * Piby2_2
-	fmovex		%fp2,%fp4
-	fmulx		FP_SCR2(%a6),%fp4		| ...W = N*P1
-	fmovex		%fp2,%fp5
-	fmulx		FP_SCR3(%a6),%fp5		| ...w = N*P2
-	fmovex		%fp4,%fp3
-|--we want P+p = W+w  but  |p| <= half ulp of P
-|--Then, we need to compute  A := R-P   and  a := r-p
-	faddx		%fp5,%fp3			| ...FP3 is P
-	fsubx		%fp3,%fp4			| ...W-P
-
-	fsubx		%fp3,%fp0			| ...FP0 is A := R - P
-        faddx		%fp5,%fp4			| ...FP4 is p = (W-P)+w
-
-	fmovex		%fp0,%fp3			| ...FP3 A
-	fsubx		%fp4,%fp1			| ...FP1 is a := r - p
-
-|--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
-|--|r| <= half ulp of R.
-	faddx		%fp1,%fp0			| ...FP0 is R := A+a
-|--No need to calculate r if this is the last loop
-	cmpil		#0,%d0
-	bgt		RESTORE
-
-|--Need to calculate r
-	fsubx		%fp0,%fp3			| ...A-R
-	faddx		%fp3,%fp1			| ...FP1 is r := (A-R)+a
-	bra		LOOP
-
-RESTORE:
-        fmovel		%fp2,N(%a6)
-	movel		(%a7)+,%d2
-	fmovemx	(%a7)+,%fp2-%fp5
-
-
-	movel		N(%a6),%d0
-        rorl		#1,%d0
-
-
-	bra		TANCONT
-
-	|end
diff --git a/arch/m68k/fpsp040/stanh.S b/arch/m68k/fpsp040/stanh.S
deleted file mode 100644
index 7e12e59ee8c7cc02f163df53fe157aba7b7777e8..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/stanh.S
+++ /dev/null
@@ -1,184 +0,0 @@
-|
-|	stanh.sa 3.1 12/10/90
-|
-|	The entry point sTanh computes the hyperbolic tangent of
-|	an input argument; sTanhd does the same except for denormalized
-|	input.
-|
-|	Input: Double-extended number X in location pointed to
-|		by address register a0.
-|
-|	Output: The value tanh(X) returned in floating-point register Fp0.
-|
-|	Accuracy and Monotonicity: The returned result is within 3 ulps in
-|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
-|		result is subsequently rounded to double precision. The
-|		result is provably monotonic in double precision.
-|
-|	Speed: The program stanh takes approximately 270 cycles.
-|
-|	Algorithm:
-|
-|	TANH
-|	1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.
-|
-|	2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by
-|		sgn := sign(X), y := 2|X|, z := expm1(Y), and
-|		tanh(X) = sgn*( z/(2+z) ).
-|		Exit.
-|
-|	3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,
-|		go to 7.
-|
-|	4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.
-|
-|	5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by
-|		sgn := sign(X), y := 2|X|, z := exp(Y),
-|		tanh(X) = sgn - [ sgn*2/(1+z) ].
-|		Exit.
-|
-|	6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we
-|		calculate Tanh(X) by
-|		sgn := sign(X), Tiny := 2**(-126),
-|		tanh(X) := sgn - sgn*Tiny.
-|		Exit.
-|
-|	7. (|X| < 2**(-40)). Tanh(X) = X.	Exit.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|STANH	idnt	2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-	.set	X,FP_SCR5
-	.set	XDCARE,X+2
-	.set	XFRAC,X+4
-
-	.set	SGN,L_SCR3
-
-	.set	V,FP_SCR6
-
-BOUNDS1:	.long 0x3FD78000,0x3FFFDDCE | ... 2^(-40), (5/2)LOG2
-
-	|xref	t_frcinx
-	|xref	t_extdnrm
-	|xref	setox
-	|xref	setoxm1
-
-	.global	stanhd
-stanhd:
-|--TANH(X) = X FOR DENORMALIZED X
-
-	bra		t_extdnrm
-
-	.global	stanh
-stanh:
-	fmovex		(%a0),%fp0	| ...LOAD INPUT
-
-	fmovex		%fp0,X(%a6)
-	movel		(%a0),%d0
-	movew		4(%a0),%d0
-	movel		%d0,X(%a6)
-	andl		#0x7FFFFFFF,%d0
-	cmp2l		BOUNDS1(%pc),%d0	| ...2**(-40) < |X| < (5/2)LOG2 ?
-	bcss		TANHBORS
-
-|--THIS IS THE USUAL CASE
-|--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
-
-	movel		X(%a6),%d0
-	movel		%d0,SGN(%a6)
-	andl		#0x7FFF0000,%d0
-	addl		#0x00010000,%d0	| ...EXPONENT OF 2|X|
-	movel		%d0,X(%a6)
-	andl		#0x80000000,SGN(%a6)
-	fmovex		X(%a6),%fp0		| ...FP0 IS Y = 2|X|
-
-	movel		%d1,-(%a7)
-	clrl		%d1
-	fmovemx	%fp0-%fp0,(%a0)
-	bsr		setoxm1		| ...FP0 IS Z = EXPM1(Y)
-	movel		(%a7)+,%d1
-
-	fmovex		%fp0,%fp1
-	fadds		#0x40000000,%fp1	| ...Z+2
-	movel		SGN(%a6),%d0
-	fmovex		%fp1,V(%a6)
-	eorl		%d0,V(%a6)
-
-	fmovel		%d1,%FPCR		|restore users exceptions
-	fdivx		V(%a6),%fp0
-	bra		t_frcinx
-
-TANHBORS:
-	cmpl		#0x3FFF8000,%d0
-	blt		TANHSM
-
-	cmpl		#0x40048AA1,%d0
-	bgt		TANHHUGE
-
-|-- (5/2) LOG2 < |X| < 50 LOG2,
-|--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
-|--TANH(X) = SGN -	SGN*2/[EXP(Y)+1].
-
-	movel		X(%a6),%d0
-	movel		%d0,SGN(%a6)
-	andl		#0x7FFF0000,%d0
-	addl		#0x00010000,%d0	| ...EXPO OF 2|X|
-	movel		%d0,X(%a6)		| ...Y = 2|X|
-	andl		#0x80000000,SGN(%a6)
-	movel		SGN(%a6),%d0
-	fmovex		X(%a6),%fp0		| ...Y = 2|X|
-
-	movel		%d1,-(%a7)
-	clrl		%d1
-	fmovemx	%fp0-%fp0,(%a0)
-	bsr		setox		| ...FP0 IS EXP(Y)
-	movel		(%a7)+,%d1
-	movel		SGN(%a6),%d0
-	fadds		#0x3F800000,%fp0	| ...EXP(Y)+1
-
-	eorl		#0xC0000000,%d0	| ...-SIGN(X)*2
-	fmoves		%d0,%fp1		| ...-SIGN(X)*2 IN SGL FMT
-	fdivx		%fp0,%fp1		| ...-SIGN(X)2 / [EXP(Y)+1 ]
-
-	movel		SGN(%a6),%d0
-	orl		#0x3F800000,%d0	| ...SGN
-	fmoves		%d0,%fp0		| ...SGN IN SGL FMT
-
-	fmovel		%d1,%FPCR		|restore users exceptions
-	faddx		%fp1,%fp0
-
-	bra		t_frcinx
-
-TANHSM:
-	movew		#0x0000,XDCARE(%a6)
-
-	fmovel		%d1,%FPCR		|restore users exceptions
-	fmovex		X(%a6),%fp0		|last inst - possible exception set
-
-	bra		t_frcinx
-
-TANHHUGE:
-|---RETURN SGN(X) - SGN(X)EPS
-	movel		X(%a6),%d0
-	andl		#0x80000000,%d0
-	orl		#0x3F800000,%d0
-	fmoves		%d0,%fp0
-	andl		#0x80000000,%d0
-	eorl		#0x80800000,%d0	| ...-SIGN(X)*EPS
-
-	fmovel		%d1,%FPCR		|restore users exceptions
-	fadds		%d0,%fp0
-
-	bra		t_frcinx
-
-	|end
diff --git a/arch/m68k/fpsp040/sto_res.S b/arch/m68k/fpsp040/sto_res.S
deleted file mode 100644
index 484b47d4eaad3038a29a3094743f9ad7e5a0bd29..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/sto_res.S
+++ /dev/null
@@ -1,97 +0,0 @@
-|
-|	sto_res.sa 3.1 12/10/90
-|
-|	Takes the result and puts it in where the user expects it.
-|	Library functions return result in fp0.	If fp0 is not the
-|	users destination register then fp0 is moved to the
-|	correct floating-point destination register.  fp0 and fp1
-|	are then restored to the original contents.
-|
-|	Input:	result in fp0,fp1
-|
-|		d2 & a0 should be kept unmodified
-|
-|	Output:	moves the result to the true destination reg or mem
-|
-|	Modifies: destination floating point register
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-STO_RES:	|idnt	2,1 | Motorola 040 Floating Point Software Package
-
-
-	|section	8
-
-#include "fpsp.h"
-
-	.global	sto_cos
-sto_cos:
-	bfextu		CMDREG1B(%a6){#13:#3},%d0	|extract cos destination
-	cmpib		#3,%d0		|check for fp0/fp1 cases
-	bles		c_fp0123
-	fmovemx	%fp1-%fp1,-(%a7)
-	moveql		#7,%d1
-	subl		%d0,%d1		|d1 = 7- (dest. reg. no.)
-	clrl		%d0
-	bsetl		%d1,%d0		|d0 is dynamic register mask
-	fmovemx	(%a7)+,%d0
-	rts
-c_fp0123:
-	cmpib		#0,%d0
-	beqs		c_is_fp0
-	cmpib		#1,%d0
-	beqs		c_is_fp1
-	cmpib		#2,%d0
-	beqs		c_is_fp2
-c_is_fp3:
-	fmovemx	%fp1-%fp1,USER_FP3(%a6)
-	rts
-c_is_fp2:
-	fmovemx	%fp1-%fp1,USER_FP2(%a6)
-	rts
-c_is_fp1:
-	fmovemx	%fp1-%fp1,USER_FP1(%a6)
-	rts
-c_is_fp0:
-	fmovemx	%fp1-%fp1,USER_FP0(%a6)
-	rts
-
-
-	.global	sto_res
-sto_res:
-	bfextu		CMDREG1B(%a6){#6:#3},%d0	|extract destination register
-	cmpib		#3,%d0		|check for fp0/fp1 cases
-	bles		fp0123
-	fmovemx	%fp0-%fp0,-(%a7)
-	moveql		#7,%d1
-	subl		%d0,%d1		|d1 = 7- (dest. reg. no.)
-	clrl		%d0
-	bsetl		%d1,%d0		|d0 is dynamic register mask
-	fmovemx	(%a7)+,%d0
-	rts
-fp0123:
-	cmpib		#0,%d0
-	beqs		is_fp0
-	cmpib		#1,%d0
-	beqs		is_fp1
-	cmpib		#2,%d0
-	beqs		is_fp2
-is_fp3:
-	fmovemx	%fp0-%fp0,USER_FP3(%a6)
-	rts
-is_fp2:
-	fmovemx	%fp0-%fp0,USER_FP2(%a6)
-	rts
-is_fp1:
-	fmovemx	%fp0-%fp0,USER_FP1(%a6)
-	rts
-is_fp0:
-	fmovemx	%fp0-%fp0,USER_FP0(%a6)
-	rts
-
-	|end
diff --git a/arch/m68k/fpsp040/stwotox.S b/arch/m68k/fpsp040/stwotox.S
deleted file mode 100644
index 0d5e6a1436a638c59f0fc123974f43c418169d69..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/stwotox.S
+++ /dev/null
@@ -1,426 +0,0 @@
-|
-|	stwotox.sa 3.1 12/10/90
-|
-|	stwotox  --- 2**X
-|	stwotoxd --- 2**X for denormalized X
-|	stentox  --- 10**X
-|	stentoxd --- 10**X for denormalized X
-|
-|	Input: Double-extended number X in location pointed to
-|		by address register a0.
-|
-|	Output: The function values are returned in Fp0.
-|
-|	Accuracy and Monotonicity: The returned result is within 2 ulps in
-|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
-|		result is subsequently rounded to double precision. The
-|		result is provably monotonic in double precision.
-|
-|	Speed: The program stwotox takes approximately 190 cycles and the
-|		program stentox takes approximately 200 cycles.
-|
-|	Algorithm:
-|
-|	twotox
-|	1. If |X| > 16480, go to ExpBig.
-|
-|	2. If |X| < 2**(-70), go to ExpSm.
-|
-|	3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
-|		decompose N as
-|		 N = 64(M + M') + j,  j = 0,1,2,...,63.
-|
-|	4. Overwrite r := r * log2. Then
-|		2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
-|		Go to expr to compute that expression.
-|
-|	tentox
-|	1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
-|
-|	2. If |X| < 2**(-70), go to ExpSm.
-|
-|	3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
-|		N := round-to-int(y). Decompose N as
-|		 N = 64(M + M') + j,  j = 0,1,2,...,63.
-|
-|	4. Define r as
-|		r := ((X - N*L1)-N*L2) * L10
-|		where L1, L2 are the leading and trailing parts of log_10(2)/64
-|		and L10 is the natural log of 10. Then
-|		10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
-|		Go to expr to compute that expression.
-|
-|	expr
-|	1. Fetch 2**(j/64) from table as Fact1 and Fact2.
-|
-|	2. Overwrite Fact1 and Fact2 by
-|		Fact1 := 2**(M) * Fact1
-|		Fact2 := 2**(M) * Fact2
-|		Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
-|
-|	3. Calculate P where 1 + P approximates exp(r):
-|		P = r + r*r*(A1+r*(A2+...+r*A5)).
-|
-|	4. Let AdjFact := 2**(M'). Return
-|		AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
-|		Exit.
-|
-|	ExpBig
-|	1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
-|		underflow by Tiny * Tiny.
-|
-|	ExpSm
-|	1. Return 1 + X.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|STWOTOX	idnt	2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-BOUNDS1:	.long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480
-BOUNDS2:	.long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10
-
-L2TEN64:	.long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2
-L10TWO1:	.long 0x3F734413,0x509F8000 | ... LOG2/64LOG10
-
-L10TWO2:	.long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
-
-LOG10:	.long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
-
-LOG2:	.long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
-
-EXPA5:	.long 0x3F56C16D,0x6F7BD0B2
-EXPA4:	.long 0x3F811112,0x302C712C
-EXPA3:	.long 0x3FA55555,0x55554CC1
-EXPA2:	.long 0x3FC55555,0x55554A54
-EXPA1:	.long 0x3FE00000,0x00000000,0x00000000,0x00000000
-
-HUGE:	.long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
-TINY:	.long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
-
-EXPTBL:
-	.long  0x3FFF0000,0x80000000,0x00000000,0x3F738000
-	.long  0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
-	.long  0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
-	.long  0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
-	.long  0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
-	.long  0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
-	.long  0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
-	.long  0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
-	.long  0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
-	.long  0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
-	.long  0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
-	.long  0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
-	.long  0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
-	.long  0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
-	.long  0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
-	.long  0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
-	.long  0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
-	.long  0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
-	.long  0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
-	.long  0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
-	.long  0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
-	.long  0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
-	.long  0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
-	.long  0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
-	.long  0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
-	.long  0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
-	.long  0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
-	.long  0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
-	.long  0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
-	.long  0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
-	.long  0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
-	.long  0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
-	.long  0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
-	.long  0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
-	.long  0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
-	.long  0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
-	.long  0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
-	.long  0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
-	.long  0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
-	.long  0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
-	.long  0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
-	.long  0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
-	.long  0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
-	.long  0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
-	.long  0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
-	.long  0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
-	.long  0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
-	.long  0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
-	.long  0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
-	.long  0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
-	.long  0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
-	.long  0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
-	.long  0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
-	.long  0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
-	.long  0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
-	.long  0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
-	.long  0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
-	.long  0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
-	.long  0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
-	.long  0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
-	.long  0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
-	.long  0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
-	.long  0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
-	.long  0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
-
-	.set	N,L_SCR1
-
-	.set	X,FP_SCR1
-	.set	XDCARE,X+2
-	.set	XFRAC,X+4
-
-	.set	ADJFACT,FP_SCR2
-
-	.set	FACT1,FP_SCR3
-	.set	FACT1HI,FACT1+4
-	.set	FACT1LOW,FACT1+8
-
-	.set	FACT2,FP_SCR4
-	.set	FACT2HI,FACT2+4
-	.set	FACT2LOW,FACT2+8
-
-	| xref	t_unfl
-	|xref	t_ovfl
-	|xref	t_frcinx
-
-	.global	stwotoxd
-stwotoxd:
-|--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
-
-	fmovel		%d1,%fpcr		| ...set user's rounding mode/precision
-	fmoves		#0x3F800000,%fp0  | ...RETURN 1 + X
-	movel		(%a0),%d0
-	orl		#0x00800001,%d0
-	fadds		%d0,%fp0
-	bra		t_frcinx
-
-	.global	stwotox
-stwotox:
-|--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
-	fmovemx	(%a0),%fp0-%fp0	| ...LOAD INPUT, do not set cc's
-
-	movel		(%a0),%d0
-	movew		4(%a0),%d0
-	fmovex		%fp0,X(%a6)
-	andil		#0x7FFFFFFF,%d0
-
-	cmpil		#0x3FB98000,%d0		| ...|X| >= 2**(-70)?
-	bges		TWOOK1
-	bra		EXPBORS
-
-TWOOK1:
-	cmpil		#0x400D80C0,%d0		| ...|X| > 16480?
-	bles		TWOMAIN
-	bra		EXPBORS
-
-
-TWOMAIN:
-|--USUAL CASE, 2^(-70) <= |X| <= 16480
-
-	fmovex		%fp0,%fp1
-	fmuls		#0x42800000,%fp1  | ...64 * X
-
-	fmovel		%fp1,N(%a6)		| ...N = ROUND-TO-INT(64 X)
-	movel		%d2,-(%sp)
-	lea		EXPTBL,%a1	| ...LOAD ADDRESS OF TABLE OF 2^(J/64)
-	fmovel		N(%a6),%fp1		| ...N --> FLOATING FMT
-	movel		N(%a6),%d0
-	movel		%d0,%d2
-	andil		#0x3F,%d0		| ...D0 IS J
-	asll		#4,%d0		| ...DISPLACEMENT FOR 2^(J/64)
-	addal		%d0,%a1		| ...ADDRESS FOR 2^(J/64)
-	asrl		#6,%d2		| ...d2 IS L, N = 64L + J
-	movel		%d2,%d0
-	asrl		#1,%d0		| ...D0 IS M
-	subl		%d0,%d2		| ...d2 IS M', N = 64(M+M') + J
-	addil		#0x3FFF,%d2
-	movew		%d2,ADJFACT(%a6)	| ...ADJFACT IS 2^(M')
-	movel		(%sp)+,%d2
-|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
-|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
-|--ADJFACT = 2^(M').
-|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
-
-	fmuls		#0x3C800000,%fp1  | ...(1/64)*N
-	movel		(%a1)+,FACT1(%a6)
-	movel		(%a1)+,FACT1HI(%a6)
-	movel		(%a1)+,FACT1LOW(%a6)
-	movew		(%a1)+,FACT2(%a6)
-	clrw		FACT2+2(%a6)
-
-	fsubx		%fp1,%fp0		| ...X - (1/64)*INT(64 X)
-
-	movew		(%a1)+,FACT2HI(%a6)
-	clrw		FACT2HI+2(%a6)
-	clrl		FACT2LOW(%a6)
-	addw		%d0,FACT1(%a6)
-
-	fmulx		LOG2,%fp0	| ...FP0 IS R
-	addw		%d0,FACT2(%a6)
-
-	bra		expr
-
-EXPBORS:
-|--FPCR, D0 SAVED
-	cmpil		#0x3FFF8000,%d0
-	bgts		EXPBIG
-
-EXPSM:
-|--|X| IS SMALL, RETURN 1 + X
-
-	fmovel		%d1,%FPCR		|restore users exceptions
-	fadds		#0x3F800000,%fp0  | ...RETURN 1 + X
-
-	bra		t_frcinx
-
-EXPBIG:
-|--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
-|--REGISTERS SAVE SO FAR ARE FPCR AND  D0
-	movel		X(%a6),%d0
-	cmpil		#0,%d0
-	blts		EXPNEG
-
-	bclrb		#7,(%a0)		|t_ovfl expects positive value
-	bra		t_ovfl
-
-EXPNEG:
-	bclrb		#7,(%a0)		|t_unfl expects positive value
-	bra		t_unfl
-
-	.global	stentoxd
-stentoxd:
-|--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
-
-	fmovel		%d1,%fpcr		| ...set user's rounding mode/precision
-	fmoves		#0x3F800000,%fp0  | ...RETURN 1 + X
-	movel		(%a0),%d0
-	orl		#0x00800001,%d0
-	fadds		%d0,%fp0
-	bra		t_frcinx
-
-	.global	stentox
-stentox:
-|--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
-	fmovemx	(%a0),%fp0-%fp0	| ...LOAD INPUT, do not set cc's
-
-	movel		(%a0),%d0
-	movew		4(%a0),%d0
-	fmovex		%fp0,X(%a6)
-	andil		#0x7FFFFFFF,%d0
-
-	cmpil		#0x3FB98000,%d0		| ...|X| >= 2**(-70)?
-	bges		TENOK1
-	bra		EXPBORS
-
-TENOK1:
-	cmpil		#0x400B9B07,%d0		| ...|X| <= 16480*log2/log10 ?
-	bles		TENMAIN
-	bra		EXPBORS
-
-TENMAIN:
-|--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
-
-	fmovex		%fp0,%fp1
-	fmuld		L2TEN64,%fp1	| ...X*64*LOG10/LOG2
-
-	fmovel		%fp1,N(%a6)		| ...N=INT(X*64*LOG10/LOG2)
-	movel		%d2,-(%sp)
-	lea		EXPTBL,%a1	| ...LOAD ADDRESS OF TABLE OF 2^(J/64)
-	fmovel		N(%a6),%fp1		| ...N --> FLOATING FMT
-	movel		N(%a6),%d0
-	movel		%d0,%d2
-	andil		#0x3F,%d0		| ...D0 IS J
-	asll		#4,%d0		| ...DISPLACEMENT FOR 2^(J/64)
-	addal		%d0,%a1		| ...ADDRESS FOR 2^(J/64)
-	asrl		#6,%d2		| ...d2 IS L, N = 64L + J
-	movel		%d2,%d0
-	asrl		#1,%d0		| ...D0 IS M
-	subl		%d0,%d2		| ...d2 IS M', N = 64(M+M') + J
-	addil		#0x3FFF,%d2
-	movew		%d2,ADJFACT(%a6)	| ...ADJFACT IS 2^(M')
-	movel		(%sp)+,%d2
-
-|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
-|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
-|--ADJFACT = 2^(M').
-|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
-
-	fmovex		%fp1,%fp2
-
-	fmuld		L10TWO1,%fp1	| ...N*(LOG2/64LOG10)_LEAD
-	movel		(%a1)+,FACT1(%a6)
-
-	fmulx		L10TWO2,%fp2	| ...N*(LOG2/64LOG10)_TRAIL
-
-	movel		(%a1)+,FACT1HI(%a6)
-	movel		(%a1)+,FACT1LOW(%a6)
-	fsubx		%fp1,%fp0		| ...X - N L_LEAD
-	movew		(%a1)+,FACT2(%a6)
-
-	fsubx		%fp2,%fp0		| ...X - N L_TRAIL
-
-	clrw		FACT2+2(%a6)
-	movew		(%a1)+,FACT2HI(%a6)
-	clrw		FACT2HI+2(%a6)
-	clrl		FACT2LOW(%a6)
-
-	fmulx		LOG10,%fp0	| ...FP0 IS R
-
-	addw		%d0,FACT1(%a6)
-	addw		%d0,FACT2(%a6)
-
-expr:
-|--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
-|--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
-|--FP0 IS R. THE FOLLOWING CODE COMPUTES
-|--	2**(M'+M) * 2**(J/64) * EXP(R)
-
-	fmovex		%fp0,%fp1
-	fmulx		%fp1,%fp1		| ...FP1 IS S = R*R
-
-	fmoved		EXPA5,%fp2	| ...FP2 IS A5
-	fmoved		EXPA4,%fp3	| ...FP3 IS A4
-
-	fmulx		%fp1,%fp2		| ...FP2 IS S*A5
-	fmulx		%fp1,%fp3		| ...FP3 IS S*A4
-
-	faddd		EXPA3,%fp2	| ...FP2 IS A3+S*A5
-	faddd		EXPA2,%fp3	| ...FP3 IS A2+S*A4
-
-	fmulx		%fp1,%fp2		| ...FP2 IS S*(A3+S*A5)
-	fmulx		%fp1,%fp3		| ...FP3 IS S*(A2+S*A4)
-
-	faddd		EXPA1,%fp2	| ...FP2 IS A1+S*(A3+S*A5)
-	fmulx		%fp0,%fp3		| ...FP3 IS R*S*(A2+S*A4)
-
-	fmulx		%fp1,%fp2		| ...FP2 IS S*(A1+S*(A3+S*A5))
-	faddx		%fp3,%fp0		| ...FP0 IS R+R*S*(A2+S*A4)
-
-	faddx		%fp2,%fp0		| ...FP0 IS EXP(R) - 1
-
-
-|--FINAL RECONSTRUCTION PROCESS
-|--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
-
-	fmulx		FACT1(%a6),%fp0
-	faddx		FACT2(%a6),%fp0
-	faddx		FACT1(%a6),%fp0
-
-	fmovel		%d1,%FPCR		|restore users exceptions
-	clrw		ADJFACT+2(%a6)
-	movel		#0x80000000,ADJFACT+4(%a6)
-	clrl		ADJFACT+8(%a6)
-	fmulx		ADJFACT(%a6),%fp0	| ...FINAL ADJUSTMENT
-
-	bra		t_frcinx
-
-	|end
diff --git a/arch/m68k/fpsp040/tbldo.S b/arch/m68k/fpsp040/tbldo.S
deleted file mode 100644
index fd5c37a5a2b97cc24de7bfc0892ca79e54226d04..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/tbldo.S
+++ /dev/null
@@ -1,553 +0,0 @@
-|
-|	tbldo.sa 3.1 12/10/90
-|
-| Modified:
-|	8/16/90	chinds	The table was constructed to use only one level
-|			of indirection in do_func for monadic
-|			functions.  Dyadic functions require two
-|			levels, and the tables are still contained
-|			in do_func.  The table is arranged for
-|			index with a 10-bit index, with the first
-|			7 bits the opcode, and the remaining 3
-|			the stag.  For dyadic functions, all
-|			valid addresses are to the generic entry
-|			point.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|TBLDO	idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-	|xref	ld_pinf,ld_pone,ld_ppi2
-	|xref	t_dz2,t_operr
-	|xref	serror,sone,szero,sinf,snzrinx
-	|xref	sopr_inf,spi_2,src_nan,szr_inf
-
-	|xref	smovcr
-	|xref	pmod,prem,pscale
-	|xref	satanh,satanhd
-	|xref	sacos,sacosd,sasin,sasind,satan,satand
-	|xref	setox,setoxd,setoxm1,setoxm1d,setoxm1i
-	|xref	sgetexp,sgetexpd,sgetman,sgetmand
-	|xref	sint,sintd,sintrz
-	|xref	ssincos,ssincosd,ssincosi,ssincosnan,ssincosz
-	|xref	scos,scosd,ssin,ssind,stan,stand
-	|xref	scosh,scoshd,ssinh,ssinhd,stanh,stanhd
-	|xref	sslog10,sslog2,sslogn,sslognp1
-	|xref	sslog10d,sslog2d,sslognd,slognp1d
-	|xref	stentox,stentoxd,stwotox,stwotoxd
-
-|	instruction		;opcode-stag Notes
-	.global	tblpre
-tblpre:
-	.long	smovcr		|$00-0 fmovecr all
-	.long	smovcr		|$00-1 fmovecr all
-	.long	smovcr		|$00-2 fmovecr all
-	.long	smovcr		|$00-3 fmovecr all
-	.long	smovcr		|$00-4 fmovecr all
-	.long	smovcr		|$00-5 fmovecr all
-	.long	smovcr		|$00-6 fmovecr all
-	.long	smovcr		|$00-7 fmovecr all
-
-	.long	sint		|$01-0 fint norm
-	.long	szero		|$01-1 fint zero
-	.long	sinf		|$01-2 fint inf
-	.long	src_nan		|$01-3 fint nan
-	.long	sintd		|$01-4 fint denorm inx
-	.long	serror		|$01-5 fint ERROR
-	.long	serror		|$01-6 fint ERROR
-	.long	serror		|$01-7 fint ERROR
-
-	.long	ssinh		|$02-0 fsinh norm
-	.long	szero		|$02-1 fsinh zero
-	.long	sinf		|$02-2 fsinh inf
-	.long	src_nan		|$02-3 fsinh nan
-	.long	ssinhd		|$02-4 fsinh denorm
-	.long	serror		|$02-5 fsinh ERROR
-	.long	serror		|$02-6 fsinh ERROR
-	.long	serror		|$02-7 fsinh ERROR
-
-	.long	sintrz		|$03-0 fintrz norm
-	.long	szero		|$03-1 fintrz zero
-	.long	sinf		|$03-2 fintrz inf
-	.long	src_nan		|$03-3 fintrz nan
-	.long	snzrinx		|$03-4 fintrz denorm inx
-	.long	serror		|$03-5 fintrz ERROR
-	.long	serror		|$03-6 fintrz ERROR
-	.long	serror		|$03-7 fintrz ERROR
-
-	.long	serror		|$04-0 ERROR - illegal extension
-	.long	serror		|$04-1 ERROR - illegal extension
-	.long	serror		|$04-2 ERROR - illegal extension
-	.long	serror		|$04-3 ERROR - illegal extension
-	.long	serror		|$04-4 ERROR - illegal extension
-	.long	serror		|$04-5 ERROR - illegal extension
-	.long	serror		|$04-6 ERROR - illegal extension
-	.long	serror		|$04-7 ERROR - illegal extension
-
-	.long	serror		|$05-0 ERROR - illegal extension
-	.long	serror		|$05-1 ERROR - illegal extension
-	.long	serror		|$05-2 ERROR - illegal extension
-	.long	serror		|$05-3 ERROR - illegal extension
-	.long	serror		|$05-4 ERROR - illegal extension
-	.long	serror		|$05-5 ERROR - illegal extension
-	.long	serror		|$05-6 ERROR - illegal extension
-	.long	serror		|$05-7 ERROR - illegal extension
-
-	.long	sslognp1	|$06-0 flognp1 norm
-	.long	szero		|$06-1 flognp1 zero
-	.long	sopr_inf	|$06-2 flognp1 inf
-	.long	src_nan		|$06-3 flognp1 nan
-	.long	slognp1d	|$06-4 flognp1 denorm
-	.long	serror		|$06-5 flognp1 ERROR
-	.long	serror		|$06-6 flognp1 ERROR
-	.long	serror		|$06-7 flognp1 ERROR
-
-	.long	serror		|$07-0 ERROR - illegal extension
-	.long	serror		|$07-1 ERROR - illegal extension
-	.long	serror		|$07-2 ERROR - illegal extension
-	.long	serror		|$07-3 ERROR - illegal extension
-	.long	serror		|$07-4 ERROR - illegal extension
-	.long	serror		|$07-5 ERROR - illegal extension
-	.long	serror		|$07-6 ERROR - illegal extension
-	.long	serror		|$07-7 ERROR - illegal extension
-
-	.long	setoxm1		|$08-0 fetoxm1 norm
-	.long	szero		|$08-1 fetoxm1 zero
-	.long	setoxm1i	|$08-2 fetoxm1 inf
-	.long	src_nan		|$08-3 fetoxm1 nan
-	.long	setoxm1d	|$08-4 fetoxm1 denorm
-	.long	serror		|$08-5 fetoxm1 ERROR
-	.long	serror		|$08-6 fetoxm1 ERROR
-	.long	serror		|$08-7 fetoxm1 ERROR
-
-	.long	stanh		|$09-0 ftanh norm
-	.long	szero		|$09-1 ftanh zero
-	.long	sone		|$09-2 ftanh inf
-	.long	src_nan		|$09-3 ftanh nan
-	.long	stanhd		|$09-4 ftanh denorm
-	.long	serror		|$09-5 ftanh ERROR
-	.long	serror		|$09-6 ftanh ERROR
-	.long	serror		|$09-7 ftanh ERROR
-
-	.long	satan		|$0a-0 fatan norm
-	.long	szero		|$0a-1 fatan zero
-	.long	spi_2		|$0a-2 fatan inf
-	.long	src_nan		|$0a-3 fatan nan
-	.long	satand		|$0a-4 fatan denorm
-	.long	serror		|$0a-5 fatan ERROR
-	.long	serror		|$0a-6 fatan ERROR
-	.long	serror		|$0a-7 fatan ERROR
-
-	.long	serror		|$0b-0 ERROR - illegal extension
-	.long	serror		|$0b-1 ERROR - illegal extension
-	.long	serror		|$0b-2 ERROR - illegal extension
-	.long	serror		|$0b-3 ERROR - illegal extension
-	.long	serror		|$0b-4 ERROR - illegal extension
-	.long	serror		|$0b-5 ERROR - illegal extension
-	.long	serror		|$0b-6 ERROR - illegal extension
-	.long	serror		|$0b-7 ERROR - illegal extension
-
-	.long	sasin		|$0c-0 fasin norm
-	.long	szero		|$0c-1 fasin zero
-	.long	t_operr		|$0c-2 fasin inf
-	.long	src_nan		|$0c-3 fasin nan
-	.long	sasind		|$0c-4 fasin denorm
-	.long	serror		|$0c-5 fasin ERROR
-	.long	serror		|$0c-6 fasin ERROR
-	.long	serror		|$0c-7 fasin ERROR
-
-	.long	satanh		|$0d-0 fatanh norm
-	.long	szero		|$0d-1 fatanh zero
-	.long	t_operr		|$0d-2 fatanh inf
-	.long	src_nan		|$0d-3 fatanh nan
-	.long	satanhd		|$0d-4 fatanh denorm
-	.long	serror		|$0d-5 fatanh ERROR
-	.long	serror		|$0d-6 fatanh ERROR
-	.long	serror		|$0d-7 fatanh ERROR
-
-	.long	ssin		|$0e-0 fsin norm
-	.long	szero		|$0e-1 fsin zero
-	.long	t_operr		|$0e-2 fsin inf
-	.long	src_nan		|$0e-3 fsin nan
-	.long	ssind		|$0e-4 fsin denorm
-	.long	serror		|$0e-5 fsin ERROR
-	.long	serror		|$0e-6 fsin ERROR
-	.long	serror		|$0e-7 fsin ERROR
-
-	.long	stan		|$0f-0 ftan norm
-	.long	szero		|$0f-1 ftan zero
-	.long	t_operr		|$0f-2 ftan inf
-	.long	src_nan		|$0f-3 ftan nan
-	.long	stand		|$0f-4 ftan denorm
-	.long	serror		|$0f-5 ftan ERROR
-	.long	serror		|$0f-6 ftan ERROR
-	.long	serror		|$0f-7 ftan ERROR
-
-	.long	setox		|$10-0 fetox norm
-	.long	ld_pone		|$10-1 fetox zero
-	.long	szr_inf		|$10-2 fetox inf
-	.long	src_nan		|$10-3 fetox nan
-	.long	setoxd		|$10-4 fetox denorm
-	.long	serror		|$10-5 fetox ERROR
-	.long	serror		|$10-6 fetox ERROR
-	.long	serror		|$10-7 fetox ERROR
-
-	.long	stwotox		|$11-0 ftwotox norm
-	.long	ld_pone		|$11-1 ftwotox zero
-	.long	szr_inf		|$11-2 ftwotox inf
-	.long	src_nan		|$11-3 ftwotox nan
-	.long	stwotoxd	|$11-4 ftwotox denorm
-	.long	serror		|$11-5 ftwotox ERROR
-	.long	serror		|$11-6 ftwotox ERROR
-	.long	serror		|$11-7 ftwotox ERROR
-
-	.long	stentox		|$12-0 ftentox norm
-	.long	ld_pone		|$12-1 ftentox zero
-	.long	szr_inf		|$12-2 ftentox inf
-	.long	src_nan		|$12-3 ftentox nan
-	.long	stentoxd	|$12-4 ftentox denorm
-	.long	serror		|$12-5 ftentox ERROR
-	.long	serror		|$12-6 ftentox ERROR
-	.long	serror		|$12-7 ftentox ERROR
-
-	.long	serror		|$13-0 ERROR - illegal extension
-	.long	serror		|$13-1 ERROR - illegal extension
-	.long	serror		|$13-2 ERROR - illegal extension
-	.long	serror		|$13-3 ERROR - illegal extension
-	.long	serror		|$13-4 ERROR - illegal extension
-	.long	serror		|$13-5 ERROR - illegal extension
-	.long	serror		|$13-6 ERROR - illegal extension
-	.long	serror		|$13-7 ERROR - illegal extension
-
-	.long	sslogn		|$14-0 flogn norm
-	.long	t_dz2		|$14-1 flogn zero
-	.long	sopr_inf	|$14-2 flogn inf
-	.long	src_nan		|$14-3 flogn nan
-	.long	sslognd		|$14-4 flogn denorm
-	.long	serror		|$14-5 flogn ERROR
-	.long	serror		|$14-6 flogn ERROR
-	.long	serror		|$14-7 flogn ERROR
-
-	.long	sslog10		|$15-0 flog10 norm
-	.long	t_dz2		|$15-1 flog10 zero
-	.long	sopr_inf	|$15-2 flog10 inf
-	.long	src_nan		|$15-3 flog10 nan
-	.long	sslog10d	|$15-4 flog10 denorm
-	.long	serror		|$15-5 flog10 ERROR
-	.long	serror		|$15-6 flog10 ERROR
-	.long	serror		|$15-7 flog10 ERROR
-
-	.long	sslog2		|$16-0 flog2 norm
-	.long	t_dz2		|$16-1 flog2 zero
-	.long	sopr_inf	|$16-2 flog2 inf
-	.long	src_nan		|$16-3 flog2 nan
-	.long	sslog2d		|$16-4 flog2 denorm
-	.long	serror		|$16-5 flog2 ERROR
-	.long	serror		|$16-6 flog2 ERROR
-	.long	serror		|$16-7 flog2 ERROR
-
-	.long	serror		|$17-0 ERROR - illegal extension
-	.long	serror		|$17-1 ERROR - illegal extension
-	.long	serror		|$17-2 ERROR - illegal extension
-	.long	serror		|$17-3 ERROR - illegal extension
-	.long	serror		|$17-4 ERROR - illegal extension
-	.long	serror		|$17-5 ERROR - illegal extension
-	.long	serror		|$17-6 ERROR - illegal extension
-	.long	serror		|$17-7 ERROR - illegal extension
-
-	.long	serror		|$18-0 ERROR - illegal extension
-	.long	serror		|$18-1 ERROR - illegal extension
-	.long	serror		|$18-2 ERROR - illegal extension
-	.long	serror		|$18-3 ERROR - illegal extension
-	.long	serror		|$18-4 ERROR - illegal extension
-	.long	serror		|$18-5 ERROR - illegal extension
-	.long	serror		|$18-6 ERROR - illegal extension
-	.long	serror		|$18-7 ERROR - illegal extension
-
-	.long	scosh		|$19-0 fcosh norm
-	.long	ld_pone		|$19-1 fcosh zero
-	.long	ld_pinf		|$19-2 fcosh inf
-	.long	src_nan		|$19-3 fcosh nan
-	.long	scoshd		|$19-4 fcosh denorm
-	.long	serror		|$19-5 fcosh ERROR
-	.long	serror		|$19-6 fcosh ERROR
-	.long	serror		|$19-7 fcosh ERROR
-
-	.long	serror		|$1a-0 ERROR - illegal extension
-	.long	serror		|$1a-1 ERROR - illegal extension
-	.long	serror		|$1a-2 ERROR - illegal extension
-	.long	serror		|$1a-3 ERROR - illegal extension
-	.long	serror		|$1a-4 ERROR - illegal extension
-	.long	serror		|$1a-5 ERROR - illegal extension
-	.long	serror		|$1a-6 ERROR - illegal extension
-	.long	serror		|$1a-7 ERROR - illegal extension
-
-	.long	serror		|$1b-0 ERROR - illegal extension
-	.long	serror		|$1b-1 ERROR - illegal extension
-	.long	serror		|$1b-2 ERROR - illegal extension
-	.long	serror		|$1b-3 ERROR - illegal extension
-	.long	serror		|$1b-4 ERROR - illegal extension
-	.long	serror		|$1b-5 ERROR - illegal extension
-	.long	serror		|$1b-6 ERROR - illegal extension
-	.long	serror		|$1b-7 ERROR - illegal extension
-
-	.long	sacos		|$1c-0 facos norm
-	.long	ld_ppi2		|$1c-1 facos zero
-	.long	t_operr		|$1c-2 facos inf
-	.long	src_nan		|$1c-3 facos nan
-	.long	sacosd		|$1c-4 facos denorm
-	.long	serror		|$1c-5 facos ERROR
-	.long	serror		|$1c-6 facos ERROR
-	.long	serror		|$1c-7 facos ERROR
-
-	.long	scos		|$1d-0 fcos norm
-	.long	ld_pone		|$1d-1 fcos zero
-	.long	t_operr		|$1d-2 fcos inf
-	.long	src_nan		|$1d-3 fcos nan
-	.long	scosd		|$1d-4 fcos denorm
-	.long	serror		|$1d-5 fcos ERROR
-	.long	serror		|$1d-6 fcos ERROR
-	.long	serror		|$1d-7 fcos ERROR
-
-	.long	sgetexp		|$1e-0 fgetexp norm
-	.long	szero		|$1e-1 fgetexp zero
-	.long	t_operr		|$1e-2 fgetexp inf
-	.long	src_nan		|$1e-3 fgetexp nan
-	.long	sgetexpd	|$1e-4 fgetexp denorm
-	.long	serror		|$1e-5 fgetexp ERROR
-	.long	serror		|$1e-6 fgetexp ERROR
-	.long	serror		|$1e-7 fgetexp ERROR
-
-	.long	sgetman		|$1f-0 fgetman norm
-	.long	szero		|$1f-1 fgetman zero
-	.long	t_operr		|$1f-2 fgetman inf
-	.long	src_nan		|$1f-3 fgetman nan
-	.long	sgetmand	|$1f-4 fgetman denorm
-	.long	serror		|$1f-5 fgetman ERROR
-	.long	serror		|$1f-6 fgetman ERROR
-	.long	serror		|$1f-7 fgetman ERROR
-
-	.long	serror		|$20-0 ERROR - illegal extension
-	.long	serror		|$20-1 ERROR - illegal extension
-	.long	serror		|$20-2 ERROR - illegal extension
-	.long	serror		|$20-3 ERROR - illegal extension
-	.long	serror		|$20-4 ERROR - illegal extension
-	.long	serror		|$20-5 ERROR - illegal extension
-	.long	serror		|$20-6 ERROR - illegal extension
-	.long	serror		|$20-7 ERROR - illegal extension
-
-	.long	pmod		|$21-0 fmod all
-	.long	pmod		|$21-1 fmod all
-	.long	pmod		|$21-2 fmod all
-	.long	pmod		|$21-3 fmod all
-	.long	pmod		|$21-4 fmod all
-	.long	serror		|$21-5 fmod ERROR
-	.long	serror		|$21-6 fmod ERROR
-	.long	serror		|$21-7 fmod ERROR
-
-	.long	serror		|$22-0 ERROR - illegal extension
-	.long	serror		|$22-1 ERROR - illegal extension
-	.long	serror		|$22-2 ERROR - illegal extension
-	.long	serror		|$22-3 ERROR - illegal extension
-	.long	serror		|$22-4 ERROR - illegal extension
-	.long	serror		|$22-5 ERROR - illegal extension
-	.long	serror		|$22-6 ERROR - illegal extension
-	.long	serror		|$22-7 ERROR - illegal extension
-
-	.long	serror		|$23-0 ERROR - illegal extension
-	.long	serror		|$23-1 ERROR - illegal extension
-	.long	serror		|$23-2 ERROR - illegal extension
-	.long	serror		|$23-3 ERROR - illegal extension
-	.long	serror		|$23-4 ERROR - illegal extension
-	.long	serror		|$23-5 ERROR - illegal extension
-	.long	serror		|$23-6 ERROR - illegal extension
-	.long	serror		|$23-7 ERROR - illegal extension
-
-	.long	serror		|$24-0 ERROR - illegal extension
-	.long	serror		|$24-1 ERROR - illegal extension
-	.long	serror		|$24-2 ERROR - illegal extension
-	.long	serror		|$24-3 ERROR - illegal extension
-	.long	serror		|$24-4 ERROR - illegal extension
-	.long	serror		|$24-5 ERROR - illegal extension
-	.long	serror		|$24-6 ERROR - illegal extension
-	.long	serror		|$24-7 ERROR - illegal extension
-
-	.long	prem		|$25-0 frem all
-	.long	prem		|$25-1 frem all
-	.long	prem		|$25-2 frem all
-	.long	prem		|$25-3 frem all
-	.long	prem		|$25-4 frem all
-	.long	serror		|$25-5 frem ERROR
-	.long	serror		|$25-6 frem ERROR
-	.long	serror		|$25-7 frem ERROR
-
-	.long	pscale		|$26-0 fscale all
-	.long	pscale		|$26-1 fscale all
-	.long	pscale		|$26-2 fscale all
-	.long	pscale		|$26-3 fscale all
-	.long	pscale		|$26-4 fscale all
-	.long	serror		|$26-5 fscale ERROR
-	.long	serror		|$26-6 fscale ERROR
-	.long	serror		|$26-7 fscale ERROR
-
-	.long	serror		|$27-0 ERROR - illegal extension
-	.long	serror		|$27-1 ERROR - illegal extension
-	.long	serror		|$27-2 ERROR - illegal extension
-	.long	serror		|$27-3 ERROR - illegal extension
-	.long	serror		|$27-4 ERROR - illegal extension
-	.long	serror		|$27-5 ERROR - illegal extension
-	.long	serror		|$27-6 ERROR - illegal extension
-	.long	serror		|$27-7 ERROR - illegal extension
-
-	.long	serror		|$28-0 ERROR - illegal extension
-	.long	serror		|$28-1 ERROR - illegal extension
-	.long	serror		|$28-2 ERROR - illegal extension
-	.long	serror		|$28-3 ERROR - illegal extension
-	.long	serror		|$28-4 ERROR - illegal extension
-	.long	serror		|$28-5 ERROR - illegal extension
-	.long	serror		|$28-6 ERROR - illegal extension
-	.long	serror		|$28-7 ERROR - illegal extension
-
-	.long	serror		|$29-0 ERROR - illegal extension
-	.long	serror		|$29-1 ERROR - illegal extension
-	.long	serror		|$29-2 ERROR - illegal extension
-	.long	serror		|$29-3 ERROR - illegal extension
-	.long	serror		|$29-4 ERROR - illegal extension
-	.long	serror		|$29-5 ERROR - illegal extension
-	.long	serror		|$29-6 ERROR - illegal extension
-	.long	serror		|$29-7 ERROR - illegal extension
-
-	.long	serror		|$2a-0 ERROR - illegal extension
-	.long	serror		|$2a-1 ERROR - illegal extension
-	.long	serror		|$2a-2 ERROR - illegal extension
-	.long	serror		|$2a-3 ERROR - illegal extension
-	.long	serror		|$2a-4 ERROR - illegal extension
-	.long	serror		|$2a-5 ERROR - illegal extension
-	.long	serror		|$2a-6 ERROR - illegal extension
-	.long	serror		|$2a-7 ERROR - illegal extension
-
-	.long	serror		|$2b-0 ERROR - illegal extension
-	.long	serror		|$2b-1 ERROR - illegal extension
-	.long	serror		|$2b-2 ERROR - illegal extension
-	.long	serror		|$2b-3 ERROR - illegal extension
-	.long	serror		|$2b-4 ERROR - illegal extension
-	.long	serror		|$2b-5 ERROR - illegal extension
-	.long	serror		|$2b-6 ERROR - illegal extension
-	.long	serror		|$2b-7 ERROR - illegal extension
-
-	.long	serror		|$2c-0 ERROR - illegal extension
-	.long	serror		|$2c-1 ERROR - illegal extension
-	.long	serror		|$2c-2 ERROR - illegal extension
-	.long	serror		|$2c-3 ERROR - illegal extension
-	.long	serror		|$2c-4 ERROR - illegal extension
-	.long	serror		|$2c-5 ERROR - illegal extension
-	.long	serror		|$2c-6 ERROR - illegal extension
-	.long	serror		|$2c-7 ERROR - illegal extension
-
-	.long	serror		|$2d-0 ERROR - illegal extension
-	.long	serror		|$2d-1 ERROR - illegal extension
-	.long	serror		|$2d-2 ERROR - illegal extension
-	.long	serror		|$2d-3 ERROR - illegal extension
-	.long	serror		|$2d-4 ERROR - illegal extension
-	.long	serror		|$2d-5 ERROR - illegal extension
-	.long	serror		|$2d-6 ERROR - illegal extension
-	.long	serror		|$2d-7 ERROR - illegal extension
-
-	.long	serror		|$2e-0 ERROR - illegal extension
-	.long	serror		|$2e-1 ERROR - illegal extension
-	.long	serror		|$2e-2 ERROR - illegal extension
-	.long	serror		|$2e-3 ERROR - illegal extension
-	.long	serror		|$2e-4 ERROR - illegal extension
-	.long	serror		|$2e-5 ERROR - illegal extension
-	.long	serror		|$2e-6 ERROR - illegal extension
-	.long	serror		|$2e-7 ERROR - illegal extension
-
-	.long	serror		|$2f-0 ERROR - illegal extension
-	.long	serror		|$2f-1 ERROR - illegal extension
-	.long	serror		|$2f-2 ERROR - illegal extension
-	.long	serror		|$2f-3 ERROR - illegal extension
-	.long	serror		|$2f-4 ERROR - illegal extension
-	.long	serror		|$2f-5 ERROR - illegal extension
-	.long	serror		|$2f-6 ERROR - illegal extension
-	.long	serror		|$2f-7 ERROR - illegal extension
-
-	.long	ssincos		|$30-0 fsincos norm
-	.long	ssincosz	|$30-1 fsincos zero
-	.long	ssincosi	|$30-2 fsincos inf
-	.long	ssincosnan	|$30-3 fsincos nan
-	.long	ssincosd	|$30-4 fsincos denorm
-	.long	serror		|$30-5 fsincos ERROR
-	.long	serror		|$30-6 fsincos ERROR
-	.long	serror		|$30-7 fsincos ERROR
-
-	.long	ssincos		|$31-0 fsincos norm
-	.long	ssincosz	|$31-1 fsincos zero
-	.long	ssincosi	|$31-2 fsincos inf
-	.long	ssincosnan	|$31-3 fsincos nan
-	.long	ssincosd	|$31-4 fsincos denorm
-	.long	serror		|$31-5 fsincos ERROR
-	.long	serror		|$31-6 fsincos ERROR
-	.long	serror		|$31-7 fsincos ERROR
-
-	.long	ssincos		|$32-0 fsincos norm
-	.long	ssincosz	|$32-1 fsincos zero
-	.long	ssincosi	|$32-2 fsincos inf
-	.long	ssincosnan	|$32-3 fsincos nan
-	.long	ssincosd	|$32-4 fsincos denorm
-	.long	serror		|$32-5 fsincos ERROR
-	.long	serror		|$32-6 fsincos ERROR
-	.long	serror		|$32-7 fsincos ERROR
-
-	.long	ssincos		|$33-0 fsincos norm
-	.long	ssincosz	|$33-1 fsincos zero
-	.long	ssincosi	|$33-2 fsincos inf
-	.long	ssincosnan	|$33-3 fsincos nan
-	.long	ssincosd	|$33-4 fsincos denorm
-	.long	serror		|$33-5 fsincos ERROR
-	.long	serror		|$33-6 fsincos ERROR
-	.long	serror		|$33-7 fsincos ERROR
-
-	.long	ssincos		|$34-0 fsincos norm
-	.long	ssincosz	|$34-1 fsincos zero
-	.long	ssincosi	|$34-2 fsincos inf
-	.long	ssincosnan	|$34-3 fsincos nan
-	.long	ssincosd	|$34-4 fsincos denorm
-	.long	serror		|$34-5 fsincos ERROR
-	.long	serror		|$34-6 fsincos ERROR
-	.long	serror		|$34-7 fsincos ERROR
-
-	.long	ssincos		|$35-0 fsincos norm
-	.long	ssincosz	|$35-1 fsincos zero
-	.long	ssincosi	|$35-2 fsincos inf
-	.long	ssincosnan	|$35-3 fsincos nan
-	.long	ssincosd	|$35-4 fsincos denorm
-	.long	serror		|$35-5 fsincos ERROR
-	.long	serror		|$35-6 fsincos ERROR
-	.long	serror		|$35-7 fsincos ERROR
-
-	.long	ssincos		|$36-0 fsincos norm
-	.long	ssincosz	|$36-1 fsincos zero
-	.long	ssincosi	|$36-2 fsincos inf
-	.long	ssincosnan	|$36-3 fsincos nan
-	.long	ssincosd	|$36-4 fsincos denorm
-	.long	serror		|$36-5 fsincos ERROR
-	.long	serror		|$36-6 fsincos ERROR
-	.long	serror		|$36-7 fsincos ERROR
-
-	.long	ssincos		|$37-0 fsincos norm
-	.long	ssincosz	|$37-1 fsincos zero
-	.long	ssincosi	|$37-2 fsincos inf
-	.long	ssincosnan	|$37-3 fsincos nan
-	.long	ssincosd	|$37-4 fsincos denorm
-	.long	serror		|$37-5 fsincos ERROR
-	.long	serror		|$37-6 fsincos ERROR
-	.long	serror		|$37-7 fsincos ERROR
-
-	|end
diff --git a/arch/m68k/fpsp040/util.S b/arch/m68k/fpsp040/util.S
deleted file mode 100644
index 65b26fa88c60a4918368a2990396ecb1d19b44c6..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/util.S
+++ /dev/null
@@ -1,747 +0,0 @@
-|
-|	util.sa 3.7 7/29/91
-|
-|	This file contains routines used by other programs.
-|
-|	ovf_res: used by overflow to force the correct
-|		 result. ovf_r_k, ovf_r_x2, ovf_r_x3 are
-|		 derivatives of this routine.
-|	get_fline: get user's opcode word
-|	g_dfmtou: returns the destination format.
-|	g_opcls: returns the opclass of the float instruction.
-|	g_rndpr: returns the rounding precision.
-|	reg_dest: write byte, word, or long data to Dn
-|
-|
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-|UTIL	idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-	|xref	mem_read
-
-	.global	g_dfmtou
-	.global	g_opcls
-	.global	g_rndpr
-	.global	get_fline
-	.global	reg_dest
-
-|
-| Final result table for ovf_res. Note that the negative counterparts
-| are unnecessary as ovf_res always returns the sign separately from
-| the exponent.
-|					;+inf
-EXT_PINF:	.long	0x7fff0000,0x00000000,0x00000000,0x00000000
-|					;largest +ext
-EXT_PLRG:	.long	0x7ffe0000,0xffffffff,0xffffffff,0x00000000
-|					;largest magnitude +sgl in ext
-SGL_PLRG:	.long	0x407e0000,0xffffff00,0x00000000,0x00000000
-|					;largest magnitude +dbl in ext
-DBL_PLRG:	.long	0x43fe0000,0xffffffff,0xfffff800,0x00000000
-|					;largest -ext
-
-tblovfl:
-	.long	EXT_RN
-	.long	EXT_RZ
-	.long	EXT_RM
-	.long	EXT_RP
-	.long	SGL_RN
-	.long	SGL_RZ
-	.long	SGL_RM
-	.long	SGL_RP
-	.long	DBL_RN
-	.long	DBL_RZ
-	.long	DBL_RM
-	.long	DBL_RP
-	.long	error
-	.long	error
-	.long	error
-	.long	error
-
-
-|
-|	ovf_r_k --- overflow result calculation
-|
-| This entry point is used by kernel_ex.
-|
-| This forces the destination precision to be extended
-|
-| Input:	operand in ETEMP
-| Output:	a result is in ETEMP (internal extended format)
-|
-	.global	ovf_r_k
-ovf_r_k:
-	lea	ETEMP(%a6),%a0	|a0 points to source operand
-	bclrb	#sign_bit,ETEMP_EX(%a6)
-	sne	ETEMP_SGN(%a6)	|convert to internal IEEE format
-
-|
-|	ovf_r_x2 --- overflow result calculation
-|
-| This entry point used by x_ovfl.  (opclass 0 and 2)
-|
-| Input		a0  points to an operand in the internal extended format
-| Output	a0  points to the result in the internal extended format
-|
-| This sets the round precision according to the user's FPCR unless the
-| instruction is fsgldiv or fsglmul or fsadd, fdadd, fsub, fdsub, fsmul,
-| fdmul, fsdiv, fddiv, fssqrt, fsmove, fdmove, fsabs, fdabs, fsneg, fdneg.
-| If the instruction is fsgldiv of fsglmul, the rounding precision must be
-| extended.  If the instruction is not fsgldiv or fsglmul but a force-
-| precision instruction, the rounding precision is then set to the force
-| precision.
-
-	.global	ovf_r_x2
-ovf_r_x2:
-	btstb	#E3,E_BYTE(%a6)		|check for nu exception
-	beql	ovf_e1_exc		|it is cu exception
-ovf_e3_exc:
-	movew	CMDREG3B(%a6),%d0		|get the command word
-	andiw	#0x00000060,%d0		|clear all bits except 6 and 5
-	cmpil	#0x00000040,%d0
-	beql	ovff_sgl		|force precision is single
-	cmpil	#0x00000060,%d0
-	beql	ovff_dbl		|force precision is double
-	movew	CMDREG3B(%a6),%d0		|get the command word again
-	andil	#0x7f,%d0			|clear all except operation
-	cmpil	#0x33,%d0
-	beql	ovf_fsgl		|fsglmul or fsgldiv
-	cmpil	#0x30,%d0
-	beql	ovf_fsgl
-	bra	ovf_fpcr		|instruction is none of the above
-|					;use FPCR
-ovf_e1_exc:
-	movew	CMDREG1B(%a6),%d0		|get command word
-	andil	#0x00000044,%d0		|clear all bits except 6 and 2
-	cmpil	#0x00000040,%d0
-	beql	ovff_sgl		|the instruction is force single
-	cmpil	#0x00000044,%d0
-	beql	ovff_dbl		|the instruction is force double
-	movew	CMDREG1B(%a6),%d0		|again get the command word
-	andil	#0x0000007f,%d0		|clear all except the op code
-	cmpil	#0x00000027,%d0
-	beql	ovf_fsgl		|fsglmul
-	cmpil	#0x00000024,%d0
-	beql	ovf_fsgl		|fsgldiv
-	bra	ovf_fpcr		|none of the above, use FPCR
-|
-|
-| Inst is either fsgldiv or fsglmul.  Force extended precision.
-|
-ovf_fsgl:
-	clrl	%d0
-	bra	ovf_res
-
-ovff_sgl:
-	movel	#0x00000001,%d0		|set single
-	bra	ovf_res
-ovff_dbl:
-	movel	#0x00000002,%d0		|set double
-	bra	ovf_res
-|
-| The precision is in the fpcr.
-|
-ovf_fpcr:
-	bfextu	FPCR_MODE(%a6){#0:#2},%d0 |set round precision
-	bra	ovf_res
-
-|
-|
-|	ovf_r_x3 --- overflow result calculation
-|
-| This entry point used by x_ovfl. (opclass 3 only)
-|
-| Input		a0  points to an operand in the internal extended format
-| Output	a0  points to the result in the internal extended format
-|
-| This sets the round precision according to the destination size.
-|
-	.global	ovf_r_x3
-ovf_r_x3:
-	bsr	g_dfmtou	|get dest fmt in d0{1:0}
-|				;for fmovout, the destination format
-|				;is the rounding precision
-
-|
-|	ovf_res --- overflow result calculation
-|
-| Input:
-|	a0	points to operand in internal extended format
-| Output:
-|	a0	points to result in internal extended format
-|
-	.global	ovf_res
-ovf_res:
-	lsll	#2,%d0		|move round precision to d0{3:2}
-	bfextu	FPCR_MODE(%a6){#2:#2},%d1 |set round mode
-	orl	%d1,%d0		|index is fmt:mode in d0{3:0}
-	leal	tblovfl,%a1	|load a1 with table address
-	movel	%a1@(%d0:l:4),%a1	|use d0 as index to the table
-	jmp	(%a1)		|go to the correct routine
-|
-|case DEST_FMT = EXT
-|
-EXT_RN:
-	leal	EXT_PINF,%a1	|answer is +/- infinity
-	bsetb	#inf_bit,FPSR_CC(%a6)
-	bra	set_sign	|now go set the sign
-EXT_RZ:
-	leal	EXT_PLRG,%a1	|answer is +/- large number
-	bra	set_sign	|now go set the sign
-EXT_RM:
-	tstb	LOCAL_SGN(%a0)	|if negative overflow
-	beqs	e_rm_pos
-e_rm_neg:
-	leal	EXT_PINF,%a1	|answer is negative infinity
-	orl	#neginf_mask,USER_FPSR(%a6)
-	bra	end_ovfr
-e_rm_pos:
-	leal	EXT_PLRG,%a1	|answer is large positive number
-	bra	end_ovfr
-EXT_RP:
-	tstb	LOCAL_SGN(%a0)	|if negative overflow
-	beqs	e_rp_pos
-e_rp_neg:
-	leal	EXT_PLRG,%a1	|answer is large negative number
-	bsetb	#neg_bit,FPSR_CC(%a6)
-	bra	end_ovfr
-e_rp_pos:
-	leal	EXT_PINF,%a1	|answer is positive infinity
-	bsetb	#inf_bit,FPSR_CC(%a6)
-	bra	end_ovfr
-|
-|case DEST_FMT = DBL
-|
-DBL_RN:
-	leal	EXT_PINF,%a1	|answer is +/- infinity
-	bsetb	#inf_bit,FPSR_CC(%a6)
-	bra	set_sign
-DBL_RZ:
-	leal	DBL_PLRG,%a1	|answer is +/- large number
-	bra	set_sign	|now go set the sign
-DBL_RM:
-	tstb	LOCAL_SGN(%a0)	|if negative overflow
-	beqs	d_rm_pos
-d_rm_neg:
-	leal	EXT_PINF,%a1	|answer is negative infinity
-	orl	#neginf_mask,USER_FPSR(%a6)
-	bra	end_ovfr	|inf is same for all precisions (ext,dbl,sgl)
-d_rm_pos:
-	leal	DBL_PLRG,%a1	|answer is large positive number
-	bra	end_ovfr
-DBL_RP:
-	tstb	LOCAL_SGN(%a0)	|if negative overflow
-	beqs	d_rp_pos
-d_rp_neg:
-	leal	DBL_PLRG,%a1	|answer is large negative number
-	bsetb	#neg_bit,FPSR_CC(%a6)
-	bra	end_ovfr
-d_rp_pos:
-	leal	EXT_PINF,%a1	|answer is positive infinity
-	bsetb	#inf_bit,FPSR_CC(%a6)
-	bra	end_ovfr
-|
-|case DEST_FMT = SGL
-|
-SGL_RN:
-	leal	EXT_PINF,%a1	|answer is +/-  infinity
-	bsetb	#inf_bit,FPSR_CC(%a6)
-	bras	set_sign
-SGL_RZ:
-	leal	SGL_PLRG,%a1	|answer is +/- large number
-	bras	set_sign
-SGL_RM:
-	tstb	LOCAL_SGN(%a0)	|if negative overflow
-	beqs	s_rm_pos
-s_rm_neg:
-	leal	EXT_PINF,%a1	|answer is negative infinity
-	orl	#neginf_mask,USER_FPSR(%a6)
-	bras	end_ovfr
-s_rm_pos:
-	leal	SGL_PLRG,%a1	|answer is large positive number
-	bras	end_ovfr
-SGL_RP:
-	tstb	LOCAL_SGN(%a0)	|if negative overflow
-	beqs	s_rp_pos
-s_rp_neg:
-	leal	SGL_PLRG,%a1	|answer is large negative number
-	bsetb	#neg_bit,FPSR_CC(%a6)
-	bras	end_ovfr
-s_rp_pos:
-	leal	EXT_PINF,%a1	|answer is positive infinity
-	bsetb	#inf_bit,FPSR_CC(%a6)
-	bras	end_ovfr
-
-set_sign:
-	tstb	LOCAL_SGN(%a0)	|if negative overflow
-	beqs	end_ovfr
-neg_sign:
-	bsetb	#neg_bit,FPSR_CC(%a6)
-
-end_ovfr:
-	movew	LOCAL_EX(%a1),LOCAL_EX(%a0) |do not overwrite sign
-	movel	LOCAL_HI(%a1),LOCAL_HI(%a0)
-	movel	LOCAL_LO(%a1),LOCAL_LO(%a0)
-	rts
-
-
-|
-|	ERROR
-|
-error:
-	rts
-|
-|	get_fline --- get f-line opcode of interrupted instruction
-|
-|	Returns opcode in the low word of d0.
-|
-get_fline:
-	movel	USER_FPIAR(%a6),%a0	|opcode address
-	movel	#0,-(%a7)	|reserve a word on the stack
-	leal	2(%a7),%a1	|point to low word of temporary
-	movel	#2,%d0		|count
-	bsrl	mem_read
-	movel	(%a7)+,%d0
-	rts
-|
-|	g_rndpr --- put rounding precision in d0{1:0}
-|
-|	valid return codes are:
-|		00 - extended
-|		01 - single
-|		10 - double
-|
-| begin
-| get rounding precision (cmdreg3b{6:5})
-| begin
-|  case	opclass = 011 (move out)
-|	get destination format - this is the also the rounding precision
-|
-|  case	opclass = 0x0
-|	if E3
-|	    *case RndPr(from cmdreg3b{6:5} = 11  then RND_PREC = DBL
-|	    *case RndPr(from cmdreg3b{6:5} = 10  then RND_PREC = SGL
-|	     case RndPr(from cmdreg3b{6:5} = 00 | 01
-|		use precision from FPCR{7:6}
-|			case 00 then RND_PREC = EXT
-|			case 01 then RND_PREC = SGL
-|			case 10 then RND_PREC = DBL
-|	else E1
-|	     use precision in FPCR{7:6}
-|	     case 00 then RND_PREC = EXT
-|	     case 01 then RND_PREC = SGL
-|	     case 10 then RND_PREC = DBL
-| end
-|
-g_rndpr:
-	bsr	g_opcls		|get opclass in d0{2:0}
-	cmpw	#0x0003,%d0	|check for opclass 011
-	bnes	op_0x0
-
-|
-| For move out instructions (opclass 011) the destination format
-| is the same as the rounding precision.  Pass results from g_dfmtou.
-|
-	bsr	g_dfmtou
-	rts
-op_0x0:
-	btstb	#E3,E_BYTE(%a6)
-	beql	unf_e1_exc	|branch to e1 underflow
-unf_e3_exc:
-	movel	CMDREG3B(%a6),%d0	|rounding precision in d0{10:9}
-	bfextu	%d0{#9:#2},%d0	|move the rounding prec bits to d0{1:0}
-	cmpil	#0x2,%d0
-	beql	unff_sgl	|force precision is single
-	cmpil	#0x3,%d0		|force precision is double
-	beql	unff_dbl
-	movew	CMDREG3B(%a6),%d0	|get the command word again
-	andil	#0x7f,%d0		|clear all except operation
-	cmpil	#0x33,%d0
-	beql	unf_fsgl	|fsglmul or fsgldiv
-	cmpil	#0x30,%d0
-	beql	unf_fsgl	|fsgldiv or fsglmul
-	bra	unf_fpcr
-unf_e1_exc:
-	movel	CMDREG1B(%a6),%d0	|get 32 bits off the stack, 1st 16 bits
-|				;are the command word
-	andil	#0x00440000,%d0	|clear all bits except bits 6 and 2
-	cmpil	#0x00400000,%d0
-	beql	unff_sgl	|force single
-	cmpil	#0x00440000,%d0	|force double
-	beql	unff_dbl
-	movel	CMDREG1B(%a6),%d0	|get the command word again
-	andil	#0x007f0000,%d0	|clear all bits except the operation
-	cmpil	#0x00270000,%d0
-	beql	unf_fsgl	|fsglmul
-	cmpil	#0x00240000,%d0
-	beql	unf_fsgl	|fsgldiv
-	bra	unf_fpcr
-
-|
-| Convert to return format.  The values from cmdreg3b and the return
-| values are:
-|	cmdreg3b	return	     precision
-|	--------	------	     ---------
-|	  00,01		  0		ext
-|	   10		  1		sgl
-|	   11		  2		dbl
-| Force single
-|
-unff_sgl:
-	movel	#1,%d0		|return 1
-	rts
-|
-| Force double
-|
-unff_dbl:
-	movel	#2,%d0		|return 2
-	rts
-|
-| Force extended
-|
-unf_fsgl:
-	movel	#0,%d0
-	rts
-|
-| Get rounding precision set in FPCR{7:6}.
-|
-unf_fpcr:
-	movel	USER_FPCR(%a6),%d0 |rounding precision bits in d0{7:6}
-	bfextu	%d0{#24:#2},%d0	|move the rounding prec bits to d0{1:0}
-	rts
-|
-|	g_opcls --- put opclass in d0{2:0}
-|
-g_opcls:
-	btstb	#E3,E_BYTE(%a6)
-	beqs	opc_1b		|if set, go to cmdreg1b
-opc_3b:
-	clrl	%d0		|if E3, only opclass 0x0 is possible
-	rts
-opc_1b:
-	movel	CMDREG1B(%a6),%d0
-	bfextu	%d0{#0:#3},%d0	|shift opclass bits d0{31:29} to d0{2:0}
-	rts
-|
-|	g_dfmtou --- put destination format in d0{1:0}
-|
-|	If E1, the format is from cmdreg1b{12:10}
-|	If E3, the format is extended.
-|
-|	Dest. Fmt.
-|		extended  010 -> 00
-|		single    001 -> 01
-|		double    101 -> 10
-|
-g_dfmtou:
-	btstb	#E3,E_BYTE(%a6)
-	beqs	op011
-	clrl	%d0		|if E1, size is always ext
-	rts
-op011:
-	movel	CMDREG1B(%a6),%d0
-	bfextu	%d0{#3:#3},%d0	|dest fmt from cmdreg1b{12:10}
-	cmpb	#1,%d0		|check for single
-	bnes	not_sgl
-	movel	#1,%d0
-	rts
-not_sgl:
-	cmpb	#5,%d0		|check for double
-	bnes	not_dbl
-	movel	#2,%d0
-	rts
-not_dbl:
-	clrl	%d0		|must be extended
-	rts
-
-|
-|
-| Final result table for unf_sub. Note that the negative counterparts
-| are unnecessary as unf_sub always returns the sign separately from
-| the exponent.
-|					;+zero
-EXT_PZRO:	.long	0x00000000,0x00000000,0x00000000,0x00000000
-|					;+zero
-SGL_PZRO:	.long	0x3f810000,0x00000000,0x00000000,0x00000000
-|					;+zero
-DBL_PZRO:	.long	0x3c010000,0x00000000,0x00000000,0x00000000
-|					;smallest +ext denorm
-EXT_PSML:	.long	0x00000000,0x00000000,0x00000001,0x00000000
-|					;smallest +sgl denorm
-SGL_PSML:	.long	0x3f810000,0x00000100,0x00000000,0x00000000
-|					;smallest +dbl denorm
-DBL_PSML:	.long	0x3c010000,0x00000000,0x00000800,0x00000000
-|
-|	UNF_SUB --- underflow result calculation
-|
-| Input:
-|	d0	contains round precision
-|	a0	points to input operand in the internal extended format
-|
-| Output:
-|	a0	points to correct internal extended precision result.
-|
-
-tblunf:
-	.long	uEXT_RN
-	.long	uEXT_RZ
-	.long	uEXT_RM
-	.long	uEXT_RP
-	.long	uSGL_RN
-	.long	uSGL_RZ
-	.long	uSGL_RM
-	.long	uSGL_RP
-	.long	uDBL_RN
-	.long	uDBL_RZ
-	.long	uDBL_RM
-	.long	uDBL_RP
-	.long	uDBL_RN
-	.long	uDBL_RZ
-	.long	uDBL_RM
-	.long	uDBL_RP
-
-	.global	unf_sub
-unf_sub:
-	lsll	#2,%d0		|move round precision to d0{3:2}
-	bfextu	FPCR_MODE(%a6){#2:#2},%d1 |set round mode
-	orl	%d1,%d0		|index is fmt:mode in d0{3:0}
-	leal	tblunf,%a1	|load a1 with table address
-	movel	%a1@(%d0:l:4),%a1	|use d0 as index to the table
-	jmp	(%a1)		|go to the correct routine
-|
-|case DEST_FMT = EXT
-|
-uEXT_RN:
-	leal	EXT_PZRO,%a1	|answer is +/- zero
-	bsetb	#z_bit,FPSR_CC(%a6)
-	bra	uset_sign	|now go set the sign
-uEXT_RZ:
-	leal	EXT_PZRO,%a1	|answer is +/- zero
-	bsetb	#z_bit,FPSR_CC(%a6)
-	bra	uset_sign	|now go set the sign
-uEXT_RM:
-	tstb	LOCAL_SGN(%a0)	|if negative underflow
-	beqs	ue_rm_pos
-ue_rm_neg:
-	leal	EXT_PSML,%a1	|answer is negative smallest denorm
-	bsetb	#neg_bit,FPSR_CC(%a6)
-	bra	end_unfr
-ue_rm_pos:
-	leal	EXT_PZRO,%a1	|answer is positive zero
-	bsetb	#z_bit,FPSR_CC(%a6)
-	bra	end_unfr
-uEXT_RP:
-	tstb	LOCAL_SGN(%a0)	|if negative underflow
-	beqs	ue_rp_pos
-ue_rp_neg:
-	leal	EXT_PZRO,%a1	|answer is negative zero
-	oril	#negz_mask,USER_FPSR(%a6)
-	bra	end_unfr
-ue_rp_pos:
-	leal	EXT_PSML,%a1	|answer is positive smallest denorm
-	bra	end_unfr
-|
-|case DEST_FMT = DBL
-|
-uDBL_RN:
-	leal	DBL_PZRO,%a1	|answer is +/- zero
-	bsetb	#z_bit,FPSR_CC(%a6)
-	bra	uset_sign
-uDBL_RZ:
-	leal	DBL_PZRO,%a1	|answer is +/- zero
-	bsetb	#z_bit,FPSR_CC(%a6)
-	bra	uset_sign	|now go set the sign
-uDBL_RM:
-	tstb	LOCAL_SGN(%a0)	|if negative overflow
-	beqs	ud_rm_pos
-ud_rm_neg:
-	leal	DBL_PSML,%a1	|answer is smallest denormalized negative
-	bsetb	#neg_bit,FPSR_CC(%a6)
-	bra	end_unfr
-ud_rm_pos:
-	leal	DBL_PZRO,%a1	|answer is positive zero
-	bsetb	#z_bit,FPSR_CC(%a6)
-	bra	end_unfr
-uDBL_RP:
-	tstb	LOCAL_SGN(%a0)	|if negative overflow
-	beqs	ud_rp_pos
-ud_rp_neg:
-	leal	DBL_PZRO,%a1	|answer is negative zero
-	oril	#negz_mask,USER_FPSR(%a6)
-	bra	end_unfr
-ud_rp_pos:
-	leal	DBL_PSML,%a1	|answer is smallest denormalized negative
-	bra	end_unfr
-|
-|case DEST_FMT = SGL
-|
-uSGL_RN:
-	leal	SGL_PZRO,%a1	|answer is +/- zero
-	bsetb	#z_bit,FPSR_CC(%a6)
-	bras	uset_sign
-uSGL_RZ:
-	leal	SGL_PZRO,%a1	|answer is +/- zero
-	bsetb	#z_bit,FPSR_CC(%a6)
-	bras	uset_sign
-uSGL_RM:
-	tstb	LOCAL_SGN(%a0)	|if negative overflow
-	beqs	us_rm_pos
-us_rm_neg:
-	leal	SGL_PSML,%a1	|answer is smallest denormalized negative
-	bsetb	#neg_bit,FPSR_CC(%a6)
-	bras	end_unfr
-us_rm_pos:
-	leal	SGL_PZRO,%a1	|answer is positive zero
-	bsetb	#z_bit,FPSR_CC(%a6)
-	bras	end_unfr
-uSGL_RP:
-	tstb	LOCAL_SGN(%a0)	|if negative overflow
-	beqs	us_rp_pos
-us_rp_neg:
-	leal	SGL_PZRO,%a1	|answer is negative zero
-	oril	#negz_mask,USER_FPSR(%a6)
-	bras	end_unfr
-us_rp_pos:
-	leal	SGL_PSML,%a1	|answer is smallest denormalized positive
-	bras	end_unfr
-
-uset_sign:
-	tstb	LOCAL_SGN(%a0)	|if negative overflow
-	beqs	end_unfr
-uneg_sign:
-	bsetb	#neg_bit,FPSR_CC(%a6)
-
-end_unfr:
-	movew	LOCAL_EX(%a1),LOCAL_EX(%a0) |be careful not to overwrite sign
-	movel	LOCAL_HI(%a1),LOCAL_HI(%a0)
-	movel	LOCAL_LO(%a1),LOCAL_LO(%a0)
-	rts
-|
-|	reg_dest --- write byte, word, or long data to Dn
-|
-|
-| Input:
-|	L_SCR1: Data
-|	d1:     data size and dest register number formatted as:
-|
-|	32		5    4     3     2     1     0
-|       -----------------------------------------------
-|       |        0        |    Size   |  Dest Reg #   |
-|       -----------------------------------------------
-|
-|	Size is:
-|		0 - Byte
-|		1 - Word
-|		2 - Long/Single
-|
-pregdst:
-	.long	byte_d0
-	.long	byte_d1
-	.long	byte_d2
-	.long	byte_d3
-	.long	byte_d4
-	.long	byte_d5
-	.long	byte_d6
-	.long	byte_d7
-	.long	word_d0
-	.long	word_d1
-	.long	word_d2
-	.long	word_d3
-	.long	word_d4
-	.long	word_d5
-	.long	word_d6
-	.long	word_d7
-	.long	long_d0
-	.long	long_d1
-	.long	long_d2
-	.long	long_d3
-	.long	long_d4
-	.long	long_d5
-	.long	long_d6
-	.long	long_d7
-
-reg_dest:
-	leal	pregdst,%a0
-	movel	%a0@(%d1:l:4),%a0
-	jmp	(%a0)
-
-byte_d0:
-	moveb	L_SCR1(%a6),USER_D0+3(%a6)
-	rts
-byte_d1:
-	moveb	L_SCR1(%a6),USER_D1+3(%a6)
-	rts
-byte_d2:
-	moveb	L_SCR1(%a6),%d2
-	rts
-byte_d3:
-	moveb	L_SCR1(%a6),%d3
-	rts
-byte_d4:
-	moveb	L_SCR1(%a6),%d4
-	rts
-byte_d5:
-	moveb	L_SCR1(%a6),%d5
-	rts
-byte_d6:
-	moveb	L_SCR1(%a6),%d6
-	rts
-byte_d7:
-	moveb	L_SCR1(%a6),%d7
-	rts
-word_d0:
-	movew	L_SCR1(%a6),USER_D0+2(%a6)
-	rts
-word_d1:
-	movew	L_SCR1(%a6),USER_D1+2(%a6)
-	rts
-word_d2:
-	movew	L_SCR1(%a6),%d2
-	rts
-word_d3:
-	movew	L_SCR1(%a6),%d3
-	rts
-word_d4:
-	movew	L_SCR1(%a6),%d4
-	rts
-word_d5:
-	movew	L_SCR1(%a6),%d5
-	rts
-word_d6:
-	movew	L_SCR1(%a6),%d6
-	rts
-word_d7:
-	movew	L_SCR1(%a6),%d7
-	rts
-long_d0:
-	movel	L_SCR1(%a6),USER_D0(%a6)
-	rts
-long_d1:
-	movel	L_SCR1(%a6),USER_D1(%a6)
-	rts
-long_d2:
-	movel	L_SCR1(%a6),%d2
-	rts
-long_d3:
-	movel	L_SCR1(%a6),%d3
-	rts
-long_d4:
-	movel	L_SCR1(%a6),%d4
-	rts
-long_d5:
-	movel	L_SCR1(%a6),%d5
-	rts
-long_d6:
-	movel	L_SCR1(%a6),%d6
-	rts
-long_d7:
-	movel	L_SCR1(%a6),%d7
-	rts
-	|end
diff --git a/arch/m68k/fpsp040/x_bsun.S b/arch/m68k/fpsp040/x_bsun.S
deleted file mode 100644
index d5a576bfac79447b12d7117119c854fb5757a39d..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/x_bsun.S
+++ /dev/null
@@ -1,46 +0,0 @@
-|
-|	x_bsun.sa 3.3 7/1/91
-|
-|	fpsp_bsun --- FPSP handler for branch/set on unordered exception
-|
-|	Copy the PC to FPIAR to maintain 881/882 compatibility
-|
-|	The real_bsun handler will need to perform further corrective
-|	measures as outlined in the 040 User's Manual on pages
-|	9-41f, section 9.8.3.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-X_BSUN:	|idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-	|xref	real_bsun
-
-	.global	fpsp_bsun
-fpsp_bsun:
-|
-	link		%a6,#-LOCAL_SIZE
-	fsave		-(%a7)
-	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
-	fmovemx	%fp0-%fp3,USER_FP0(%a6)
-	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
-
-|
-	movel		EXC_PC(%a6),USER_FPIAR(%a6)
-|
-	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx	USER_FP0(%a6),%fp0-%fp3
-	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	frestore	(%a7)+
-	unlk		%a6
-	bral		real_bsun
-|
-	|end
diff --git a/arch/m68k/fpsp040/x_fline.S b/arch/m68k/fpsp040/x_fline.S
deleted file mode 100644
index 264e126d1db7bc19ffd2d6151b9abb6f5f30e7c2..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/x_fline.S
+++ /dev/null
@@ -1,103 +0,0 @@
-|
-|	x_fline.sa 3.3 1/10/91
-|
-|	fpsp_fline --- FPSP handler for fline exception
-|
-|	First determine if the exception is one of the unimplemented
-|	floating point instructions.  If so, let fpsp_unimp handle it.
-|	Next, determine if the instruction is an fmovecr with a non-zero
-|	<ea> field.  If so, handle here and return.  Otherwise, it
-|	must be a real F-line exception.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-X_FLINE:	|idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-	|xref	real_fline
-	|xref	fpsp_unimp
-	|xref	uni_2
-	|xref	mem_read
-	|xref	fpsp_fmt_error
-
-	.global	fpsp_fline
-fpsp_fline:
-|
-|	check for unimplemented vector first.  Use EXC_VEC-4 because
-|	the equate is valid only after a 'link a6' has pushed one more
-|	long onto the stack.
-|
-	cmpw	#UNIMP_VEC,EXC_VEC-4(%a7)
-	beql	fpsp_unimp
-
-|
-|	fmovecr with non-zero <ea> handling here
-|
-	subl	#4,%a7		|4 accounts for 2-word difference
-|				;between six word frame (unimp) and
-|				;four word frame
-	link	%a6,#-LOCAL_SIZE
-	fsave	-(%a7)
-	moveml	%d0-%d1/%a0-%a1,USER_DA(%a6)
-	moveal	EXC_PC+4(%a6),%a0	|get address of fline instruction
-	leal	L_SCR1(%a6),%a1	|use L_SCR1 as scratch
-	movel	#4,%d0
-	addl	#4,%a6		|to offset the sub.l #4,a7 above so that
-|				;a6 can point correctly to the stack frame
-|				;before branching to mem_read
-	bsrl	mem_read
-	subl	#4,%a6
-	movel	L_SCR1(%a6),%d0	|d0 contains the fline and command word
-	bfextu	%d0{#4:#3},%d1	|extract coprocessor id
-	cmpib	#1,%d1		|check if cpid=1
-	bne	not_mvcr	|exit if not
-	bfextu	%d0{#16:#6},%d1
-	cmpib	#0x17,%d1		|check if it is an FMOVECR encoding
-	bne	not_mvcr
-|				;if an FMOVECR instruction, fix stack
-|				;and go to FPSP_UNIMP
-fix_stack:
-	cmpib	#VER_40,(%a7)	|test for orig unimp frame
-	bnes	ck_rev
-	subl	#UNIMP_40_SIZE-4,%a7 |emulate an orig fsave
-	moveb	#VER_40,(%a7)
-	moveb	#UNIMP_40_SIZE-4,1(%a7)
-	clrw	2(%a7)
-	bras	fix_con
-ck_rev:
-	cmpib	#VER_41,(%a7)	|test for rev unimp frame
-	bnel	fpsp_fmt_error	|if not $40 or $41, exit with error
-	subl	#UNIMP_41_SIZE-4,%a7 |emulate a rev fsave
-	moveb	#VER_41,(%a7)
-	moveb	#UNIMP_41_SIZE-4,1(%a7)
-	clrw	2(%a7)
-fix_con:
-	movew	EXC_SR+4(%a6),EXC_SR(%a6) |move stacked sr to new position
-	movel	EXC_PC+4(%a6),EXC_PC(%a6) |move stacked pc to new position
-	fmovel	EXC_PC(%a6),%FPIAR |point FPIAR to fline inst
-	movel	#4,%d1
-	addl	%d1,EXC_PC(%a6)	|increment stacked pc value to next inst
-	movew	#0x202c,EXC_VEC(%a6) |reformat vector to unimp
-	clrl	EXC_EA(%a6)	|clear the EXC_EA field
-	movew	%d0,CMDREG1B(%a6) |move the lower word into CMDREG1B
-	clrl	E_BYTE(%a6)
-	bsetb	#UFLAG,T_BYTE(%a6)
-	moveml	USER_DA(%a6),%d0-%d1/%a0-%a1 |restore data registers
-	bral	uni_2
-
-not_mvcr:
-	moveml	USER_DA(%a6),%d0-%d1/%a0-%a1 |restore data registers
-	frestore (%a7)+
-	unlk	%a6
-	addl	#4,%a7
-	bral	real_fline
-
-	|end
diff --git a/arch/m68k/fpsp040/x_operr.S b/arch/m68k/fpsp040/x_operr.S
deleted file mode 100644
index e2c371c3a45dbf084f79076a9058fe6a87dbd087..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/x_operr.S
+++ /dev/null
@@ -1,355 +0,0 @@
-|
-|	x_operr.sa 3.5 7/1/91
-|
-|	fpsp_operr --- FPSP handler for operand error exception
-|
-|	See 68040 User's Manual pp. 9-44f
-|
-| Note 1: For trap disabled 040 does the following:
-| If the dest is a fp reg, then an extended precision non_signaling
-| NAN is stored in the dest reg.  If the dest format is b, w, or l and
-| the source op is a NAN, then garbage is stored as the result (actually
-| the upper 32 bits of the mantissa are sent to the integer unit). If
-| the dest format is integer (b, w, l) and the operr is caused by
-| integer overflow, or the source op is inf, then the result stored is
-| garbage.
-| There are three cases in which operr is incorrectly signaled on the
-| 040.  This occurs for move_out of format b, w, or l for the largest
-| negative integer (-2^7 for b, -2^15 for w, -2^31 for l).
-|
-|	  On opclass = 011 fmove.(b,w,l) that causes a conversion
-|	  overflow -> OPERR, the exponent in wbte (and fpte) is:
-|		byte    56 - (62 - exp)
-|		word    48 - (62 - exp)
-|		long    32 - (62 - exp)
-|
-|			where exp = (true exp) - 1
-|
-|  So, wbtemp and fptemp will contain the following on erroneously
-|	  signalled operr:
-|			fpts = 1
-|			fpte = $4000  (15 bit externally)
-|		byte	fptm = $ffffffff ffffff80
-|		word	fptm = $ffffffff ffff8000
-|		long	fptm = $ffffffff 80000000
-|
-| Note 2: For trap enabled 040 does the following:
-| If the inst is move_out, then same as Note 1.
-| If the inst is not move_out, the dest is not modified.
-| The exceptional operand is not defined for integer overflow
-| during a move_out.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-X_OPERR:	|idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-	|xref	mem_write
-	|xref	real_operr
-	|xref	real_inex
-	|xref	get_fline
-	|xref	fpsp_done
-	|xref	reg_dest
-
-	.global	fpsp_operr
-fpsp_operr:
-|
-	link		%a6,#-LOCAL_SIZE
-	fsave		-(%a7)
-	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
-	fmovemx	%fp0-%fp3,USER_FP0(%a6)
-	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
-
-|
-| Check if this is an opclass 3 instruction.
-|  If so, fall through, else branch to operr_end
-|
-	btstb	#TFLAG,T_BYTE(%a6)
-	beqs	operr_end
-
-|
-| If the destination size is B,W,or L, the operr must be
-| handled here.
-|
-	movel	CMDREG1B(%a6),%d0
-	bfextu	%d0{#3:#3},%d0	|0=long, 4=word, 6=byte
-	cmpib	#0,%d0		|determine size; check long
-	beq	operr_long
-	cmpib	#4,%d0		|check word
-	beq	operr_word
-	cmpib	#6,%d0		|check byte
-	beq	operr_byte
-
-|
-| The size is not B,W,or L, so the operr is handled by the
-| kernel handler.  Set the operr bits and clean up, leaving
-| only the integer exception frame on the stack, and the
-| fpu in the original exceptional state.
-|
-operr_end:
-	bsetb		#operr_bit,FPSR_EXCEPT(%a6)
-	bsetb		#aiop_bit,FPSR_AEXCEPT(%a6)
-
-	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx	USER_FP0(%a6),%fp0-%fp3
-	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	frestore	(%a7)+
-	unlk		%a6
-	bral		real_operr
-
-operr_long:
-	moveql	#4,%d1		|write size to d1
-	moveb	STAG(%a6),%d0	|test stag for nan
-	andib	#0xe0,%d0		|clr all but tag
-	cmpib	#0x60,%d0		|check for nan
-	beq	operr_nan
-	cmpil	#0x80000000,FPTEMP_LO(%a6) |test if ls lword is special
-	bnes	chklerr		|if not equal, check for incorrect operr
-	bsr	check_upper	|check if exp and ms mant are special
-	tstl	%d0
-	bnes	chklerr		|if d0 is true, check for incorrect operr
-	movel	#0x80000000,%d0	|store special case result
-	bsr	operr_store
-	bra	not_enabled	|clean and exit
-|
-|	CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
-|
-chklerr:
-	movew	FPTEMP_EX(%a6),%d0
-	andw	#0x7FFF,%d0	|ignore sign bit
-	cmpw	#0x3FFE,%d0	|this is the only possible exponent value
-	bnes	chklerr2
-fixlong:
-	movel	FPTEMP_LO(%a6),%d0
-	bsr	operr_store
-	bra	not_enabled
-chklerr2:
-	movew	FPTEMP_EX(%a6),%d0
-	andw	#0x7FFF,%d0	|ignore sign bit
-	cmpw	#0x4000,%d0
-	bcc	store_max	|exponent out of range
-
-	movel	FPTEMP_LO(%a6),%d0
-	andl	#0x7FFF0000,%d0	|look for all 1's on bits 30-16
-	cmpl	#0x7FFF0000,%d0
-	beqs	fixlong
-
-	tstl	FPTEMP_LO(%a6)
-	bpls	chklepos
-	cmpl	#0xFFFFFFFF,FPTEMP_HI(%a6)
-	beqs	fixlong
-	bra	store_max
-chklepos:
-	tstl	FPTEMP_HI(%a6)
-	beqs	fixlong
-	bra	store_max
-
-operr_word:
-	moveql	#2,%d1		|write size to d1
-	moveb	STAG(%a6),%d0	|test stag for nan
-	andib	#0xe0,%d0		|clr all but tag
-	cmpib	#0x60,%d0		|check for nan
-	beq	operr_nan
-	cmpil	#0xffff8000,FPTEMP_LO(%a6) |test if ls lword is special
-	bnes	chkwerr		|if not equal, check for incorrect operr
-	bsr	check_upper	|check if exp and ms mant are special
-	tstl	%d0
-	bnes	chkwerr		|if d0 is true, check for incorrect operr
-	movel	#0x80000000,%d0	|store special case result
-	bsr	operr_store
-	bra	not_enabled	|clean and exit
-|
-|	CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
-|
-chkwerr:
-	movew	FPTEMP_EX(%a6),%d0
-	andw	#0x7FFF,%d0	|ignore sign bit
-	cmpw	#0x3FFE,%d0	|this is the only possible exponent value
-	bnes	store_max
-	movel	FPTEMP_LO(%a6),%d0
-	swap	%d0
-	bsr	operr_store
-	bra	not_enabled
-
-operr_byte:
-	moveql	#1,%d1		|write size to d1
-	moveb	STAG(%a6),%d0	|test stag for nan
-	andib	#0xe0,%d0		|clr all but tag
-	cmpib	#0x60,%d0		|check for nan
-	beqs	operr_nan
-	cmpil	#0xffffff80,FPTEMP_LO(%a6) |test if ls lword is special
-	bnes	chkberr		|if not equal, check for incorrect operr
-	bsr	check_upper	|check if exp and ms mant are special
-	tstl	%d0
-	bnes	chkberr		|if d0 is true, check for incorrect operr
-	movel	#0x80000000,%d0	|store special case result
-	bsr	operr_store
-	bra	not_enabled	|clean and exit
-|
-|	CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
-|
-chkberr:
-	movew	FPTEMP_EX(%a6),%d0
-	andw	#0x7FFF,%d0	|ignore sign bit
-	cmpw	#0x3FFE,%d0	|this is the only possible exponent value
-	bnes	store_max
-	movel	FPTEMP_LO(%a6),%d0
-	asll	#8,%d0
-	swap	%d0
-	bsr	operr_store
-	bra	not_enabled
-
-|
-| This operr condition is not of the special case.  Set operr
-| and aiop and write the portion of the nan to memory for the
-| given size.
-|
-operr_nan:
-	orl	#opaop_mask,USER_FPSR(%a6) |set operr & aiop
-
-	movel	ETEMP_HI(%a6),%d0	|output will be from upper 32 bits
-	bsr	operr_store
-	bra	end_operr
-|
-| Store_max loads the max pos or negative for the size, sets
-| the operr and aiop bits, and clears inex and ainex, incorrectly
-| set by the 040.
-|
-store_max:
-	orl	#opaop_mask,USER_FPSR(%a6) |set operr & aiop
-	bclrb	#inex2_bit,FPSR_EXCEPT(%a6)
-	bclrb	#ainex_bit,FPSR_AEXCEPT(%a6)
-	fmovel	#0,%FPSR
-
-	tstw	FPTEMP_EX(%a6)	|check sign
-	blts	load_neg
-	movel	#0x7fffffff,%d0
-	bsr	operr_store
-	bra	end_operr
-load_neg:
-	movel	#0x80000000,%d0
-	bsr	operr_store
-	bra	end_operr
-
-|
-| This routine stores the data in d0, for the given size in d1,
-| to memory or data register as required.  A read of the fline
-| is required to determine the destination.
-|
-operr_store:
-	movel	%d0,L_SCR1(%a6)	|move write data to L_SCR1
-	movel	%d1,-(%a7)	|save register size
-	bsrl	get_fline	|fline returned in d0
-	movel	(%a7)+,%d1
-	bftst	%d0{#26:#3}		|if mode is zero, dest is Dn
-	bnes	dest_mem
-|
-| Destination is Dn.  Get register number from d0. Data is on
-| the stack at (a7). D1 has size: 1=byte,2=word,4=long/single
-|
-	andil	#7,%d0		|isolate register number
-	cmpil	#4,%d1
-	beqs	op_long		|the most frequent case
-	cmpil	#2,%d1
-	bnes	op_con
-	orl	#8,%d0
-	bras	op_con
-op_long:
-	orl	#0x10,%d0
-op_con:
-	movel	%d0,%d1		|format size:reg for reg_dest
-	bral	reg_dest	|call to reg_dest returns to caller
-|				;of operr_store
-|
-| Destination is memory.  Get <ea> from integer exception frame
-| and call mem_write.
-|
-dest_mem:
-	leal	L_SCR1(%a6),%a0	|put ptr to write data in a0
-	movel	EXC_EA(%a6),%a1	|put user destination address in a1
-	movel	%d1,%d0		|put size in d0
-	bsrl	mem_write
-	rts
-|
-| Check the exponent for $c000 and the upper 32 bits of the
-| mantissa for $ffffffff.  If both are true, return d0 clr
-| and store the lower n bits of the least lword of FPTEMP
-| to d0 for write out.  If not, it is a real operr, and set d0.
-|
-check_upper:
-	cmpil	#0xffffffff,FPTEMP_HI(%a6) |check if first byte is all 1's
-	bnes	true_operr	|if not all 1's then was true operr
-	cmpiw	#0xc000,FPTEMP_EX(%a6) |check if incorrectly signalled
-	beqs	not_true_operr	|branch if not true operr
-	cmpiw	#0xbfff,FPTEMP_EX(%a6) |check if incorrectly signalled
-	beqs	not_true_operr	|branch if not true operr
-true_operr:
-	movel	#1,%d0		|signal real operr
-	rts
-not_true_operr:
-	clrl	%d0		|signal no real operr
-	rts
-
-|
-| End_operr tests for operr enabled.  If not, it cleans up the stack
-| and does an rte.  If enabled, it cleans up the stack and branches
-| to the kernel operr handler with only the integer exception
-| frame on the stack and the fpu in the original exceptional state
-| with correct data written to the destination.
-|
-end_operr:
-	btstb		#operr_bit,FPCR_ENABLE(%a6)
-	beqs		not_enabled
-enabled:
-	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx	USER_FP0(%a6),%fp0-%fp3
-	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	frestore	(%a7)+
-	unlk		%a6
-	bral		real_operr
-
-not_enabled:
-|
-| It is possible to have either inex2 or inex1 exceptions with the
-| operr.  If the inex enable bit is set in the FPCR, and either
-| inex2 or inex1 occurred, we must clean up and branch to the
-| real inex handler.
-|
-ck_inex:
-	moveb	FPCR_ENABLE(%a6),%d0
-	andb	FPSR_EXCEPT(%a6),%d0
-	andib	#0x3,%d0
-	beq	operr_exit
-|
-| Inexact enabled and reported, and we must take an inexact exception.
-|
-take_inex:
-	moveb		#INEX_VEC,EXC_VEC+1(%a6)
-	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
-	orl		#sx_mask,E_BYTE(%a6)
-	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx	USER_FP0(%a6),%fp0-%fp3
-	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	frestore	(%a7)+
-	unlk		%a6
-	bral		real_inex
-|
-| Since operr is only an E1 exception, there is no need to frestore
-| any state back to the fpu.
-|
-operr_exit:
-	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx	USER_FP0(%a6),%fp0-%fp3
-	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	unlk		%a6
-	bral		fpsp_done
-
-	|end
diff --git a/arch/m68k/fpsp040/x_ovfl.S b/arch/m68k/fpsp040/x_ovfl.S
deleted file mode 100644
index 6fe4989ee31f7c5644f521dc42213fa870038551..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/x_ovfl.S
+++ /dev/null
@@ -1,185 +0,0 @@
-|
-|	x_ovfl.sa 3.5 7/1/91
-|
-|	fpsp_ovfl --- FPSP handler for overflow exception
-|
-|	Overflow occurs when a floating-point intermediate result is
-|	too large to be represented in a floating-point data register,
-|	or when storing to memory, the contents of a floating-point
-|	data register are too large to be represented in the
-|	destination format.
-|
-| Trap disabled results
-|
-| If the instruction is move_out, then garbage is stored in the
-| destination.  If the instruction is not move_out, then the
-| destination is not affected.  For 68881 compatibility, the
-| following values should be stored at the destination, based
-| on the current rounding mode:
-|
-|  RN	Infinity with the sign of the intermediate result.
-|  RZ	Largest magnitude number, with the sign of the
-|	intermediate result.
-|  RM   For pos overflow, the largest pos number. For neg overflow,
-|	-infinity
-|  RP   For pos overflow, +infinity. For neg overflow, the largest
-|	neg number
-|
-| Trap enabled results
-| All trap disabled code applies.  In addition the exceptional
-| operand needs to be made available to the users exception handler
-| with a bias of $6000 subtracted from the exponent.
-|
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-X_OVFL:	|idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-	|xref	ovf_r_x2
-	|xref	ovf_r_x3
-	|xref	store
-	|xref	real_ovfl
-	|xref	real_inex
-	|xref	fpsp_done
-	|xref	g_opcls
-	|xref	b1238_fix
-
-	.global	fpsp_ovfl
-fpsp_ovfl:
-	link		%a6,#-LOCAL_SIZE
-	fsave		-(%a7)
-	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
-	fmovemx	%fp0-%fp3,USER_FP0(%a6)
-	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
-
-|
-|	The 040 doesn't set the AINEX bit in the FPSR, the following
-|	line temporarily rectifies this error.
-|
-	bsetb	#ainex_bit,FPSR_AEXCEPT(%a6)
-|
-	bsrl	ovf_adj		|denormalize, round & store interm op
-|
-|	if overflow traps not enabled check for inexact exception
-|
-	btstb	#ovfl_bit,FPCR_ENABLE(%a6)
-	beqs	ck_inex
-|
-	btstb		#E3,E_BYTE(%a6)
-	beqs		no_e3_1
-	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
-	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
-	bsrl		b1238_fix
-	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
-	orl		#sx_mask,E_BYTE(%a6)
-no_e3_1:
-	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx	USER_FP0(%a6),%fp0-%fp3
-	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	frestore	(%a7)+
-	unlk		%a6
-	bral		real_ovfl
-|
-| It is possible to have either inex2 or inex1 exceptions with the
-| ovfl.  If the inex enable bit is set in the FPCR, and either
-| inex2 or inex1 occurred, we must clean up and branch to the
-| real inex handler.
-|
-ck_inex:
-|	move.b		FPCR_ENABLE(%a6),%d0
-|	and.b		FPSR_EXCEPT(%a6),%d0
-|	andi.b		#$3,%d0
-	btstb		#inex2_bit,FPCR_ENABLE(%a6)
-	beqs		ovfl_exit
-|
-| Inexact enabled and reported, and we must take an inexact exception.
-|
-take_inex:
-	btstb		#E3,E_BYTE(%a6)
-	beqs		no_e3_2
-	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
-	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
-	bsrl		b1238_fix
-	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
-	orl		#sx_mask,E_BYTE(%a6)
-no_e3_2:
-	moveb		#INEX_VEC,EXC_VEC+1(%a6)
-	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx	USER_FP0(%a6),%fp0-%fp3
-	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	frestore	(%a7)+
-	unlk		%a6
-	bral		real_inex
-
-ovfl_exit:
-	bclrb	#E3,E_BYTE(%a6)	|test and clear E3 bit
-	beqs	e1_set
-|
-| Clear dirty bit on dest resister in the frame before branching
-| to b1238_fix.
-|
-	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
-	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
-	bsrl		b1238_fix		|test for bug1238 case
-
-	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
-	orl		#sx_mask,E_BYTE(%a6)
-	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx	USER_FP0(%a6),%fp0-%fp3
-	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	frestore	(%a7)+
-	unlk		%a6
-	bral		fpsp_done
-e1_set:
-	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx	USER_FP0(%a6),%fp0-%fp3
-	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	unlk		%a6
-	bral		fpsp_done
-
-|
-|	ovf_adj
-|
-ovf_adj:
-|
-| Have a0 point to the correct operand.
-|
-	btstb	#E3,E_BYTE(%a6)	|test E3 bit
-	beqs	ovf_e1
-
-	lea	WBTEMP(%a6),%a0
-	bras	ovf_com
-ovf_e1:
-	lea	ETEMP(%a6),%a0
-
-ovf_com:
-	bclrb	#sign_bit,LOCAL_EX(%a0)
-	sne	LOCAL_SGN(%a0)
-
-	bsrl	g_opcls		|returns opclass in d0
-	cmpiw	#3,%d0		|check for opclass3
-	bnes	not_opc011
-
-|
-| FPSR_CC is saved and restored because ovf_r_x3 affects it. The
-| CCs are defined to be 'not affected' for the opclass3 instruction.
-|
-	moveb	FPSR_CC(%a6),L_SCR1(%a6)
-	bsrl	ovf_r_x3	|returns a0 pointing to result
-	moveb	L_SCR1(%a6),FPSR_CC(%a6)
-	bral	store		|stores to memory or register
-
-not_opc011:
-	bsrl	ovf_r_x2	|returns a0 pointing to result
-	bral	store		|stores to memory or register
-
-	|end
diff --git a/arch/m68k/fpsp040/x_snan.S b/arch/m68k/fpsp040/x_snan.S
deleted file mode 100644
index 4ed7664163781bccaebf477566550cb6404d776d..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/x_snan.S
+++ /dev/null
@@ -1,276 +0,0 @@
-|
-|	x_snan.sa 3.3 7/1/91
-|
-| fpsp_snan --- FPSP handler for signalling NAN exception
-|
-| SNAN for float -> integer conversions (integer conversion of
-| an SNAN) is a non-maskable run-time exception.
-|
-| For trap disabled the 040 does the following:
-| If the dest data format is s, d, or x, then the SNAN bit in the NAN
-| is set to one and the resulting non-signaling NAN (truncated if
-| necessary) is transferred to the dest.  If the dest format is b, w,
-| or l, then garbage is written to the dest (actually the upper 32 bits
-| of the mantissa are sent to the integer unit).
-|
-| For trap enabled the 040 does the following:
-| If the inst is move_out, then the results are the same as for trap
-| disabled with the exception posted.  If the instruction is not move_
-| out, the dest. is not modified, and the exception is posted.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-X_SNAN:	|idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-	|xref	get_fline
-	|xref	mem_write
-	|xref	real_snan
-	|xref	real_inex
-	|xref	fpsp_done
-	|xref	reg_dest
-
-	.global	fpsp_snan
-fpsp_snan:
-	link		%a6,#-LOCAL_SIZE
-	fsave		-(%a7)
-	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
-	fmovemx	%fp0-%fp3,USER_FP0(%a6)
-	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
-
-|
-| Check if trap enabled
-|
-	btstb		#snan_bit,FPCR_ENABLE(%a6)
-	bnes		ena		|If enabled, then branch
-
-	bsrl		move_out	|else SNAN disabled
-|
-| It is possible to have an inex1 exception with the
-| snan.  If the inex enable bit is set in the FPCR, and either
-| inex2 or inex1 occurred, we must clean up and branch to the
-| real inex handler.
-|
-ck_inex:
-	moveb	FPCR_ENABLE(%a6),%d0
-	andb	FPSR_EXCEPT(%a6),%d0
-	andib	#0x3,%d0
-	beq	end_snan
-|
-| Inexact enabled and reported, and we must take an inexact exception.
-|
-take_inex:
-	moveb		#INEX_VEC,EXC_VEC+1(%a6)
-	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx	USER_FP0(%a6),%fp0-%fp3
-	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	frestore	(%a7)+
-	unlk		%a6
-	bral		real_inex
-|
-| SNAN is enabled.  Check if inst is move_out.
-| Make any corrections to the 040 output as necessary.
-|
-ena:
-	btstb		#5,CMDREG1B(%a6) |if set, inst is move out
-	beq		not_out
-
-	bsrl		move_out
-
-report_snan:
-	moveb		(%a7),VER_TMP(%a6)
-	cmpib		#VER_40,(%a7)	|test for orig unimp frame
-	bnes		ck_rev
-	moveql		#13,%d0		|need to zero 14 lwords
-	bras		rep_con
-ck_rev:
-	moveql		#11,%d0		|need to zero 12 lwords
-rep_con:
-	clrl		(%a7)
-loop1:
-	clrl		-(%a7)		|clear and dec a7
-	dbra		%d0,loop1
-	moveb		VER_TMP(%a6),(%a7) |format a busy frame
-	moveb		#BUSY_SIZE-4,1(%a7)
-	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
-	orl		#sx_mask,E_BYTE(%a6)
-	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx	USER_FP0(%a6),%fp0-%fp3
-	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	frestore	(%a7)+
-	unlk		%a6
-	bral		real_snan
-|
-| Exit snan handler by expanding the unimp frame into a busy frame
-|
-end_snan:
-	bclrb		#E1,E_BYTE(%a6)
-
-	moveb		(%a7),VER_TMP(%a6)
-	cmpib		#VER_40,(%a7)	|test for orig unimp frame
-	bnes		ck_rev2
-	moveql		#13,%d0		|need to zero 14 lwords
-	bras		rep_con2
-ck_rev2:
-	moveql		#11,%d0		|need to zero 12 lwords
-rep_con2:
-	clrl		(%a7)
-loop2:
-	clrl		-(%a7)		|clear and dec a7
-	dbra		%d0,loop2
-	moveb		VER_TMP(%a6),(%a7) |format a busy frame
-	moveb		#BUSY_SIZE-4,1(%a7) |write busy size
-	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
-	orl		#sx_mask,E_BYTE(%a6)
-	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx	USER_FP0(%a6),%fp0-%fp3
-	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	frestore	(%a7)+
-	unlk		%a6
-	bral		fpsp_done
-
-|
-| Move_out
-|
-move_out:
-	movel		EXC_EA(%a6),%a0	|get <ea> from exc frame
-
-	bfextu		CMDREG1B(%a6){#3:#3},%d0 |move rx field to d0{2:0}
-	cmpil		#0,%d0		|check for long
-	beqs		sto_long	|branch if move_out long
-
-	cmpil		#4,%d0		|check for word
-	beqs		sto_word	|branch if move_out word
-
-	cmpil		#6,%d0		|check for byte
-	beqs		sto_byte	|branch if move_out byte
-
-|
-| Not byte, word or long
-|
-	rts
-|
-| Get the 32 most significant bits of etemp mantissa
-|
-sto_long:
-	movel		ETEMP_HI(%a6),%d1
-	movel		#4,%d0		|load byte count
-|
-| Set signalling nan bit
-|
-	bsetl		#30,%d1
-|
-| Store to the users destination address
-|
-	tstl		%a0		|check if <ea> is 0
-	beqs		wrt_dn		|destination is a data register
-
-	movel		%d1,-(%a7)	|move the snan onto the stack
-	movel		%a0,%a1		|load dest addr into a1
-	movel		%a7,%a0		|load src addr of snan into a0
-	bsrl		mem_write	|write snan to user memory
-	movel		(%a7)+,%d1	|clear off stack
-	rts
-|
-| Get the 16 most significant bits of etemp mantissa
-|
-sto_word:
-	movel		ETEMP_HI(%a6),%d1
-	movel		#2,%d0		|load byte count
-|
-| Set signalling nan bit
-|
-	bsetl		#30,%d1
-|
-| Store to the users destination address
-|
-	tstl		%a0		|check if <ea> is 0
-	beqs		wrt_dn		|destination is a data register
-
-	movel		%d1,-(%a7)	|move the snan onto the stack
-	movel		%a0,%a1		|load dest addr into a1
-	movel		%a7,%a0		|point to low word
-	bsrl		mem_write	|write snan to user memory
-	movel		(%a7)+,%d1	|clear off stack
-	rts
-|
-| Get the 8 most significant bits of etemp mantissa
-|
-sto_byte:
-	movel		ETEMP_HI(%a6),%d1
-	movel		#1,%d0		|load byte count
-|
-| Set signalling nan bit
-|
-	bsetl		#30,%d1
-|
-| Store to the users destination address
-|
-	tstl		%a0		|check if <ea> is 0
-	beqs		wrt_dn		|destination is a data register
-	movel		%d1,-(%a7)	|move the snan onto the stack
-	movel		%a0,%a1		|load dest addr into a1
-	movel		%a7,%a0		|point to source byte
-	bsrl		mem_write	|write snan to user memory
-	movel		(%a7)+,%d1	|clear off stack
-	rts
-
-|
-|	wrt_dn --- write to a data register
-|
-|	We get here with D1 containing the data to write and D0 the
-|	number of bytes to write: 1=byte,2=word,4=long.
-|
-wrt_dn:
-	movel		%d1,L_SCR1(%a6)	|data
-	movel		%d0,-(%a7)	|size
-	bsrl		get_fline	|returns fline word in d0
-	movel		%d0,%d1
-	andil		#0x7,%d1		|d1 now holds register number
-	movel		(%sp)+,%d0	|get original size
-	cmpil		#4,%d0
-	beqs		wrt_long
-	cmpil		#2,%d0
-	bnes		wrt_byte
-wrt_word:
-	orl		#0x8,%d1
-	bral		reg_dest
-wrt_long:
-	orl		#0x10,%d1
-	bral		reg_dest
-wrt_byte:
-	bral		reg_dest
-|
-| Check if it is a src nan or dst nan
-|
-not_out:
-	movel		DTAG(%a6),%d0
-	bfextu		%d0{#0:#3},%d0	|isolate dtag in lsbs
-
-	cmpib		#3,%d0		|check for nan in destination
-	bnes		issrc		|destination nan has priority
-dst_nan:
-	btstb		#6,FPTEMP_HI(%a6) |check if dest nan is an snan
-	bnes		issrc		|no, so check source for snan
-	movew		FPTEMP_EX(%a6),%d0
-	bras		cont
-issrc:
-	movew		ETEMP_EX(%a6),%d0
-cont:
-	btstl		#15,%d0		|test for sign of snan
-	beqs		clr_neg
-	bsetb		#neg_bit,FPSR_CC(%a6)
-	bra		report_snan
-clr_neg:
-	bclrb		#neg_bit,FPSR_CC(%a6)
-	bra		report_snan
-
-	|end
diff --git a/arch/m68k/fpsp040/x_store.S b/arch/m68k/fpsp040/x_store.S
deleted file mode 100644
index 402dc0c0ebc04d814b69f55636ae1320bf24b747..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/x_store.S
+++ /dev/null
@@ -1,255 +0,0 @@
-|
-|	x_store.sa 3.2 1/24/91
-|
-|	store --- store operand to memory or register
-|
-|	Used by underflow and overflow handlers.
-|
-|	a6 = points to fp value to be stored.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-X_STORE:	|idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-fpreg_mask:
-	.byte	0x80,0x40,0x20,0x10,0x08,0x04,0x02,0x01
-
-#include "fpsp.h"
-
-	|xref	mem_write
-	|xref	get_fline
-	|xref	g_opcls
-	|xref	g_dfmtou
-	|xref	reg_dest
-
-	.global	dest_ext
-	.global	dest_dbl
-	.global	dest_sgl
-
-	.global	store
-store:
-	btstb	#E3,E_BYTE(%a6)
-	beqs	E1_sto
-E3_sto:
-	movel	CMDREG3B(%a6),%d0
-	bfextu	%d0{#6:#3},%d0		|isolate dest. reg from cmdreg3b
-sto_fp:
-	lea	fpreg_mask,%a1
-	moveb	(%a1,%d0.w),%d0		|convert reg# to dynamic register mask
-	tstb	LOCAL_SGN(%a0)
-	beqs	is_pos
-	bsetb	#sign_bit,LOCAL_EX(%a0)
-is_pos:
-	fmovemx (%a0),%d0		|move to correct register
-|
-|	if fp0-fp3 is being modified, we must put a copy
-|	in the USER_FPn variable on the stack because all exception
-|	handlers restore fp0-fp3 from there.
-|
-	cmpb	#0x80,%d0
-	bnes	not_fp0
-	fmovemx %fp0-%fp0,USER_FP0(%a6)
-	rts
-not_fp0:
-	cmpb	#0x40,%d0
-	bnes	not_fp1
-	fmovemx %fp1-%fp1,USER_FP1(%a6)
-	rts
-not_fp1:
-	cmpb	#0x20,%d0
-	bnes	not_fp2
-	fmovemx %fp2-%fp2,USER_FP2(%a6)
-	rts
-not_fp2:
-	cmpb	#0x10,%d0
-	bnes	not_fp3
-	fmovemx %fp3-%fp3,USER_FP3(%a6)
-	rts
-not_fp3:
-	rts
-
-E1_sto:
-	bsrl	g_opcls		|returns opclass in d0
-	cmpib	#3,%d0
-	beq	opc011		|branch if opclass 3
-	movel	CMDREG1B(%a6),%d0
-	bfextu	%d0{#6:#3},%d0	|extract destination register
-	bras	sto_fp
-
-opc011:
-	bsrl	g_dfmtou	|returns dest format in d0
-|				;ext=00, sgl=01, dbl=10
-	movel	%a0,%a1		|save source addr in a1
-	movel	EXC_EA(%a6),%a0	|get the address
-	cmpil	#0,%d0		|if dest format is extended
-	beq	dest_ext	|then branch
-	cmpil	#1,%d0		|if dest format is single
-	beq	dest_sgl	|then branch
-|
-|	fall through to dest_dbl
-|
-
-|
-|	dest_dbl --- write double precision value to user space
-|
-|Input
-|	a0 -> destination address
-|	a1 -> source in extended precision
-|Output
-|	a0 -> destroyed
-|	a1 -> destroyed
-|	d0 -> 0
-|
-|Changes extended precision to double precision.
-| Note: no attempt is made to round the extended value to double.
-|	dbl_sign = ext_sign
-|	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)
-|	get rid of ext integer bit
-|	dbl_mant = ext_mant{62:12}
-|
-|		---------------   ---------------    ---------------
-|  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |
-|		---------------   ---------------    ---------------
-|		 95	    64    63 62	      32      31     11	  0
-|				     |			     |
-|				     |			     |
-|				     |			     |
-|			             v			     v
-|			      ---------------   ---------------
-|  double   ->		      |s|exp| mant  |   |  mant       |
-|			      ---------------   ---------------
-|			      63     51   32   31	       0
-|
-dest_dbl:
-	clrl	%d0		|clear d0
-	movew	LOCAL_EX(%a1),%d0	|get exponent
-	subw	#0x3fff,%d0	|subtract extended precision bias
-	cmpw	#0x4000,%d0	|check if inf
-	beqs	inf		|if so, special case
-	addw	#0x3ff,%d0	|add double precision bias
-	swap	%d0		|d0 now in upper word
-	lsll	#4,%d0		|d0 now in proper place for dbl prec exp
-	tstb	LOCAL_SGN(%a1)
-	beqs	get_mant	|if positive, go process mantissa
-	bsetl	#31,%d0		|if negative, put in sign information
-|				; before continuing
-	bras	get_mant	|go process mantissa
-inf:
-	movel	#0x7ff00000,%d0	|load dbl inf exponent
-	clrl	LOCAL_HI(%a1)	|clear msb
-	tstb	LOCAL_SGN(%a1)
-	beqs	dbl_inf		|if positive, go ahead and write it
-	bsetl	#31,%d0		|if negative put in sign information
-dbl_inf:
-	movel	%d0,LOCAL_EX(%a1)	|put the new exp back on the stack
-	bras	dbl_wrt
-get_mant:
-	movel	LOCAL_HI(%a1),%d1	|get ms mantissa
-	bfextu	%d1{#1:#20},%d1	|get upper 20 bits of ms
-	orl	%d1,%d0		|put these bits in ms word of double
-	movel	%d0,LOCAL_EX(%a1)	|put the new exp back on the stack
-	movel	LOCAL_HI(%a1),%d1	|get ms mantissa
-	movel	#21,%d0		|load shift count
-	lsll	%d0,%d1		|put lower 11 bits in upper bits
-	movel	%d1,LOCAL_HI(%a1)	|build lower lword in memory
-	movel	LOCAL_LO(%a1),%d1	|get ls mantissa
-	bfextu	%d1{#0:#21},%d0	|get ls 21 bits of double
-	orl	%d0,LOCAL_HI(%a1)	|put them in double result
-dbl_wrt:
-	movel	#0x8,%d0		|byte count for double precision number
-	exg	%a0,%a1		|a0=supervisor source, a1=user dest
-	bsrl	mem_write	|move the number to the user's memory
-	rts
-|
-|	dest_sgl --- write single precision value to user space
-|
-|Input
-|	a0 -> destination address
-|	a1 -> source in extended precision
-|
-|Output
-|	a0 -> destroyed
-|	a1 -> destroyed
-|	d0 -> 0
-|
-|Changes extended precision to single precision.
-|	sgl_sign = ext_sign
-|	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)
-|	get rid of ext integer bit
-|	sgl_mant = ext_mant{62:12}
-|
-|		---------------   ---------------    ---------------
-|  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |
-|		---------------   ---------------    ---------------
-|		 95	    64    63 62	   40 32      31     12	  0
-|				     |	   |
-|				     |	   |
-|				     |	   |
-|			             v     v
-|			      ---------------
-|  single   ->		      |s|exp| mant  |
-|			      ---------------
-|			      31     22     0
-|
-dest_sgl:
-	clrl	%d0
-	movew	LOCAL_EX(%a1),%d0	|get exponent
-	subw	#0x3fff,%d0	|subtract extended precision bias
-	cmpw	#0x4000,%d0	|check if inf
-	beqs	sinf		|if so, special case
-	addw	#0x7f,%d0		|add single precision bias
-	swap	%d0		|put exp in upper word of d0
-	lsll	#7,%d0		|shift it into single exp bits
-	tstb	LOCAL_SGN(%a1)
-	beqs	get_sman	|if positive, continue
-	bsetl	#31,%d0		|if negative, put in sign first
-	bras	get_sman	|get mantissa
-sinf:
-	movel	#0x7f800000,%d0	|load single inf exp to d0
-	tstb	LOCAL_SGN(%a1)
-	beqs	sgl_wrt		|if positive, continue
-	bsetl	#31,%d0		|if negative, put in sign info
-	bras	sgl_wrt
-
-get_sman:
-	movel	LOCAL_HI(%a1),%d1	|get ms mantissa
-	bfextu	%d1{#1:#23},%d1	|get upper 23 bits of ms
-	orl	%d1,%d0		|put these bits in ms word of single
-
-sgl_wrt:
-	movel	%d0,L_SCR1(%a6)	|put the new exp back on the stack
-	movel	#0x4,%d0		|byte count for single precision number
-	tstl	%a0		|users destination address
-	beqs	sgl_Dn		|destination is a data register
-	exg	%a0,%a1		|a0=supervisor source, a1=user dest
-	leal	L_SCR1(%a6),%a0	|point a0 to data
-	bsrl	mem_write	|move the number to the user's memory
-	rts
-sgl_Dn:
-	bsrl	get_fline	|returns fline word in d0
-	andw	#0x7,%d0		|isolate register number
-	movel	%d0,%d1		|d1 has size:reg formatted for reg_dest
-	orl	#0x10,%d1		|reg_dest wants size added to reg#
-	bral	reg_dest	|size is X, rts in reg_dest will
-|				;return to caller of dest_sgl
-
-dest_ext:
-	tstb	LOCAL_SGN(%a1)	|put back sign into exponent word
-	beqs	dstx_cont
-	bsetb	#sign_bit,LOCAL_EX(%a1)
-dstx_cont:
-	clrb	LOCAL_SGN(%a1)	|clear out the sign byte
-
-	movel	#0x0c,%d0		|byte count for extended number
-	exg	%a0,%a1		|a0=supervisor source, a1=user dest
-	bsrl	mem_write	|move the number to the user's memory
-	rts
-
-	|end
diff --git a/arch/m68k/fpsp040/x_unfl.S b/arch/m68k/fpsp040/x_unfl.S
deleted file mode 100644
index eb772ff3b812c71747de877f3a815b81300454cb..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/x_unfl.S
+++ /dev/null
@@ -1,268 +0,0 @@
-|
-|	x_unfl.sa 3.4 7/1/91
-|
-|	fpsp_unfl --- FPSP handler for underflow exception
-|
-| Trap disabled results
-|	For 881/2 compatibility, sw must denormalize the intermediate
-| result, then store the result.  Denormalization is accomplished
-| by taking the intermediate result (which is always normalized) and
-| shifting the mantissa right while incrementing the exponent until
-| it is equal to the denormalized exponent for the destination
-| format.  After denormalization, the result is rounded to the
-| destination format.
-|
-| Trap enabled results
-|	All trap disabled code applies.	In addition the exceptional
-| operand needs to made available to the user with a bias of $6000
-| added to the exponent.
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-X_UNFL:	|idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-	|xref	denorm
-	|xref	round
-	|xref	store
-	|xref	g_rndpr
-	|xref	g_opcls
-	|xref	g_dfmtou
-	|xref	real_unfl
-	|xref	real_inex
-	|xref	fpsp_done
-	|xref	b1238_fix
-
-	.global	fpsp_unfl
-fpsp_unfl:
-	link		%a6,#-LOCAL_SIZE
-	fsave		-(%a7)
-	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
-	fmovemx	%fp0-%fp3,USER_FP0(%a6)
-	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
-
-|
-	bsrl		unf_res	|denormalize, round & store interm op
-|
-| If underflow exceptions are not enabled, check for inexact
-| exception
-|
-	btstb		#unfl_bit,FPCR_ENABLE(%a6)
-	beqs		ck_inex
-
-	btstb		#E3,E_BYTE(%a6)
-	beqs		no_e3_1
-|
-| Clear dirty bit on dest resister in the frame before branching
-| to b1238_fix.
-|
-	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
-	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
-	bsrl		b1238_fix		|test for bug1238 case
-	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
-	orl		#sx_mask,E_BYTE(%a6)
-no_e3_1:
-	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx	USER_FP0(%a6),%fp0-%fp3
-	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	frestore	(%a7)+
-	unlk		%a6
-	bral		real_unfl
-|
-| It is possible to have either inex2 or inex1 exceptions with the
-| unfl.  If the inex enable bit is set in the FPCR, and either
-| inex2 or inex1 occurred, we must clean up and branch to the
-| real inex handler.
-|
-ck_inex:
-	moveb		FPCR_ENABLE(%a6),%d0
-	andb		FPSR_EXCEPT(%a6),%d0
-	andib		#0x3,%d0
-	beqs		unfl_done
-
-|
-| Inexact enabled and reported, and we must take an inexact exception
-|
-take_inex:
-	btstb		#E3,E_BYTE(%a6)
-	beqs		no_e3_2
-|
-| Clear dirty bit on dest resister in the frame before branching
-| to b1238_fix.
-|
-	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
-	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
-	bsrl		b1238_fix		|test for bug1238 case
-	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
-	orl		#sx_mask,E_BYTE(%a6)
-no_e3_2:
-	moveb		#INEX_VEC,EXC_VEC+1(%a6)
-	moveml         USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx        USER_FP0(%a6),%fp0-%fp3
-	fmoveml        USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	frestore        (%a7)+
-	unlk            %a6
-	bral		real_inex
-
-unfl_done:
-	bclrb		#E3,E_BYTE(%a6)
-	beqs		e1_set		|if set then branch
-|
-| Clear dirty bit on dest resister in the frame before branching
-| to b1238_fix.
-|
-	bfextu		CMDREG3B(%a6){#6:#3},%d0		|get dest reg no
-	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
-	bsrl		b1238_fix		|test for bug1238 case
-	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
-	orl		#sx_mask,E_BYTE(%a6)
-	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx	USER_FP0(%a6),%fp0-%fp3
-	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	frestore	(%a7)+
-	unlk		%a6
-	bral		fpsp_done
-e1_set:
-	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
-	fmovemx	USER_FP0(%a6),%fp0-%fp3
-	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
-	unlk		%a6
-	bral		fpsp_done
-|
-|	unf_res --- underflow result calculation
-|
-unf_res:
-	bsrl		g_rndpr		|returns RND_PREC in d0 0=ext,
-|					;1=sgl, 2=dbl
-|					;we need the RND_PREC in the
-|					;upper word for round
-	movew		#0,-(%a7)
-	movew		%d0,-(%a7)	|copy RND_PREC to stack
-|
-|
-| If the exception bit set is E3, the exceptional operand from the
-| fpu is in WBTEMP; else it is in FPTEMP.
-|
-	btstb		#E3,E_BYTE(%a6)
-	beqs		unf_E1
-unf_E3:
-	lea		WBTEMP(%a6),%a0	|a0 now points to operand
-|
-| Test for fsgldiv and fsglmul.  If the inst was one of these, then
-| force the precision to extended for the denorm routine.  Use
-| the user's precision for the round routine.
-|
-	movew		CMDREG3B(%a6),%d1	|check for fsgldiv or fsglmul
-	andiw		#0x7f,%d1
-	cmpiw		#0x30,%d1		|check for sgldiv
-	beqs		unf_sgl
-	cmpiw		#0x33,%d1		|check for sglmul
-	bnes		unf_cont	|if not, use fpcr prec in round
-unf_sgl:
-	clrl		%d0
-	movew		#0x1,(%a7)	|override g_rndpr precision
-|					;force single
-	bras		unf_cont
-unf_E1:
-	lea		FPTEMP(%a6),%a0	|a0 now points to operand
-unf_cont:
-	bclrb		#sign_bit,LOCAL_EX(%a0)	|clear sign bit
-	sne		LOCAL_SGN(%a0)		|store sign
-
-	bsrl		denorm		|returns denorm, a0 points to it
-|
-| WARNING:
-|				;d0 has guard,round sticky bit
-|				;make sure that it is not corrupted
-|				;before it reaches the round subroutine
-|				;also ensure that a0 isn't corrupted
-
-|
-| Set up d1 for round subroutine d1 contains the PREC/MODE
-| information respectively on upper/lower register halves.
-|
-	bfextu		FPCR_MODE(%a6){#2:#2},%d1	|get mode from FPCR
-|						;mode in lower d1
-	addl		(%a7)+,%d1		|merge PREC/MODE
-|
-| WARNING: a0 and d0 are assumed to be intact between the denorm and
-| round subroutines. All code between these two subroutines
-| must not corrupt a0 and d0.
-|
-|
-| Perform Round
-|	Input:		a0 points to input operand
-|			d0{31:29} has guard, round, sticky
-|			d1{01:00} has rounding mode
-|			d1{17:16} has rounding precision
-|	Output:		a0 points to rounded operand
-|
-
-	bsrl		round		|returns rounded denorm at (a0)
-|
-| Differentiate between store to memory vs. store to register
-|
-unf_store:
-	bsrl		g_opcls		|returns opclass in d0{2:0}
-	cmpib		#0x3,%d0
-	bnes		not_opc011
-|
-| At this point, a store to memory is pending
-|
-opc011:
-	bsrl		g_dfmtou
-	tstb		%d0
-	beqs		ext_opc011	|If extended, do not subtract
-|				;If destination format is sgl/dbl,
-	tstb		LOCAL_HI(%a0)	|If rounded result is normal,don't
-|					;subtract
-	bmis		ext_opc011
-	subqw		#1,LOCAL_EX(%a0)	|account for denorm bias vs.
-|				;normalized bias
-|				;          normalized   denormalized
-|				;single       $7f           $7e
-|				;double       $3ff          $3fe
-|
-ext_opc011:
-	bsrl		store		|stores to memory
-	bras		unf_done	|finish up
-
-|
-| At this point, a store to a float register is pending
-|
-not_opc011:
-	bsrl		store	|stores to float register
-|				;a0 is not corrupted on a store to a
-|				;float register.
-|
-| Set the condition codes according to result
-|
-	tstl		LOCAL_HI(%a0)	|check upper mantissa
-	bnes		ck_sgn
-	tstl		LOCAL_LO(%a0)	|check lower mantissa
-	bnes		ck_sgn
-	bsetb		#z_bit,FPSR_CC(%a6) |set condition codes if zero
-ck_sgn:
-	btstb		#sign_bit,LOCAL_EX(%a0)	|check the sign bit
-	beqs		unf_done
-	bsetb		#neg_bit,FPSR_CC(%a6)
-
-|
-| Finish.
-|
-unf_done:
-	btstb		#inex2_bit,FPSR_EXCEPT(%a6)
-	beqs		no_aunfl
-	bsetb		#aunfl_bit,FPSR_AEXCEPT(%a6)
-no_aunfl:
-	rts
-
-	|end
diff --git a/arch/m68k/fpsp040/x_unimp.S b/arch/m68k/fpsp040/x_unimp.S
deleted file mode 100644
index 6f382b21228b68c568e63c8567c7ca27873d78ae..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/x_unimp.S
+++ /dev/null
@@ -1,76 +0,0 @@
-|
-|	x_unimp.sa 3.3 7/1/91
-|
-|	fpsp_unimp --- FPSP handler for unimplemented instruction
-|	exception.
-|
-| Invoked when the user program encounters a floating-point
-| op-code that hardware does not support.  Trap vector# 11
-| (See table 8-1 MC68030 User's Manual).
-|
-|
-| Note: An fsave for an unimplemented inst. will create a short
-| fsave stack.
-|
-|  Input: 1. Six word stack frame for unimplemented inst, four word
-|            for illegal
-|            (See table 8-7 MC68030 User's Manual).
-|         2. Unimp (short) fsave state frame created here by fsave
-|            instruction.
-|
-|
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-X_UNIMP:	|idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-	|xref	get_op
-	|xref	do_func
-	|xref	sto_res
-	|xref	gen_except
-	|xref	fpsp_fmt_error
-
-	.global	fpsp_unimp
-	.global	uni_2
-fpsp_unimp:
-	link		%a6,#-LOCAL_SIZE
-	fsave		-(%a7)
-uni_2:
-	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
-	fmovemx	%fp0-%fp3,USER_FP0(%a6)
-	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
-	moveb		(%a7),%d0		|test for valid version num
-	andib		#0xf0,%d0		|test for $4x
-	cmpib		#VER_4,%d0	|must be $4x or exit
-	bnel		fpsp_fmt_error
-|
-|	Temporary D25B Fix
-|	The following lines are used to ensure that the FPSR
-|	exception byte and condition codes are clear before proceeding
-|
-	movel		USER_FPSR(%a6),%d0
-	andl		#0xFF00FF,%d0	|clear all but accrued exceptions
-	movel		%d0,USER_FPSR(%a6)
-	fmovel		#0,%FPSR |clear all user bits
-	fmovel		#0,%FPCR	|clear all user exceptions for FPSP
-
-	clrb		UFLG_TMP(%a6)	|clr flag for unsupp data
-
-	bsrl		get_op		|go get operand(s)
-	clrb		STORE_FLG(%a6)
-	bsrl		do_func		|do the function
-	fsave		-(%a7)		|capture possible exc state
-	tstb		STORE_FLG(%a6)
-	bnes		no_store	|if STORE_FLG is set, no store
-	bsrl		sto_res		|store the result in user space
-no_store:
-	bral		gen_except	|post any exceptions and return
-
-	|end
diff --git a/arch/m68k/fpsp040/x_unsupp.S b/arch/m68k/fpsp040/x_unsupp.S
deleted file mode 100644
index d7cf46208c62900833e892c1e09f9d3784de7b3d..0000000000000000000000000000000000000000
--- a/arch/m68k/fpsp040/x_unsupp.S
+++ /dev/null
@@ -1,82 +0,0 @@
-|
-|	x_unsupp.sa 3.3 7/1/91
-|
-|	fpsp_unsupp --- FPSP handler for unsupported data type exception
-|
-| Trap vector #55	(See table 8-1 Mc68030 User's manual).
-| Invoked when the user program encounters a data format (packed) that
-| hardware does not support or a data type (denormalized numbers or un-
-| normalized numbers).
-| Normalizes denorms and unnorms, unpacks packed numbers then stores
-| them back into the machine to let the 040 finish the operation.
-|
-| Unsupp calls two routines:
-|	1. get_op -  gets the operand(s)
-|	2. res_func - restore the function back into the 040 or
-|			if fmove.p fpm,<ea> then pack source (fpm)
-|			and store in users memory <ea>.
-|
-|  Input: Long fsave stack frame
-|
-|
-
-|		Copyright (C) Motorola, Inc. 1990
-|			All Rights Reserved
-|
-|       For details on the license for this file, please see the
-|       file, README, in this same directory.
-
-X_UNSUPP:	|idnt    2,1 | Motorola 040 Floating Point Software Package
-
-	|section	8
-
-#include "fpsp.h"
-
-	|xref	get_op
-	|xref	res_func
-	|xref	gen_except
-	|xref	fpsp_fmt_error
-
-	.global	fpsp_unsupp
-fpsp_unsupp:
-|
-	link		%a6,#-LOCAL_SIZE
-	fsave		-(%a7)
-	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
-	fmovemx	%fp0-%fp3,USER_FP0(%a6)
-	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
-
-
-	moveb		(%a7),VER_TMP(%a6) |save version number
-	moveb		(%a7),%d0		|test for valid version num
-	andib		#0xf0,%d0		|test for $4x
-	cmpib		#VER_4,%d0	|must be $4x or exit
-	bnel		fpsp_fmt_error
-
-	fmovel		#0,%FPSR		|clear all user status bits
-	fmovel		#0,%FPCR		|clear all user control bits
-|
-|	The following lines are used to ensure that the FPSR
-|	exception byte and condition codes are clear before proceeding,
-|	except in the case of fmove, which leaves the cc's intact.
-|
-unsupp_con:
-	movel		USER_FPSR(%a6),%d1
-	btst		#5,CMDREG1B(%a6)	|looking for fmove out
-	bne		fmove_con
-	andl		#0xFF00FF,%d1	|clear all but aexcs and qbyte
-	bras		end_fix
-fmove_con:
-	andl		#0x0FFF40FF,%d1	|clear all but cc's, snan bit, aexcs, and qbyte
-end_fix:
-	movel		%d1,USER_FPSR(%a6)
-
-	st		UFLG_TMP(%a6)	|set flag for unsupp data
-
-	bsrl		get_op		|everything okay, go get operand(s)
-	bsrl		res_func	|fix up stack frame so can restore it
-	clrl		-(%a7)
-	moveb		VER_TMP(%a6),(%a7) |move idle fmt word to top of stack
-	bral		gen_except
-|
-	|end
diff --git a/arch/m68k/hp300/reboot.S b/arch/m68k/hp300/reboot.S
deleted file mode 100644
index 52eb852e6b0444618c2719bcd4843bbb2405656f..0000000000000000000000000000000000000000
--- a/arch/m68k/hp300/reboot.S
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- *  linux/arch/m68k/hp300/reboot.S
- *
- *  Copyright (C) 1998 Philip Blundell <philb@gnu.org>
- *
- *  Do the dirty work of rebooting the machine.  Basically we need to undo all the
- *  good stuff that head.S did when we started up.  The caches and MMU must be
- *  disabled and then we jump back to the PROM.  This is a bit gruesome but we put
- *  a brave face on it.
- */
-
-/* XXX Doesn't work yet.  Not sure why and can't be bothered to fix it at the moment. */
-
-	.globl	hp300_reset
-hp300_reset:
-	jmp	hp300_reset
diff --git a/arch/m68k/ifpsp060/fskeleton.S b/arch/m68k/ifpsp060/fskeleton.S
deleted file mode 100644
index 0a1ae4f44130f4a1a8ed502314431f68d51eb360..0000000000000000000000000000000000000000
--- a/arch/m68k/ifpsp060/fskeleton.S
+++ /dev/null
@@ -1,342 +0,0 @@
-|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-|MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
-|M68000 Hi-Performance Microprocessor Division
-|M68060 Software Package
-|Production Release P1.00 -- October 10, 1994
-|
-|M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
-|
-|THE SOFTWARE is provided on an "AS IS" basis and without warranty.
-|To the maximum extent permitted by applicable law,
-|MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
-|INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
-|and any warranty against infringement with regard to the SOFTWARE
-|(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
-|
-|To the maximum extent permitted by applicable law,
-|IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
-|(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
-|BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
-|ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
-|Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
-|
-|You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
-|so long as this entire notice is retained without alteration in any modified and/or
-|redistributed versions, and that such modified versions are clearly identified as such.
-|No licenses are granted by implication, estoppel or otherwise under any patents
-|or trademarks of Motorola, Inc.
-|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-| fskeleton.s
-|
-| This file contains:
-|	(1) example "Call-out"s
-|	(2) example package entry code
-|	(3) example "Call-out" table
-|
-
-#include <linux/linkage.h>
-
-|################################
-| (1) EXAMPLE CALL-OUTS		#
-|				#
-| _060_fpsp_done()		#
-| _060_real_ovfl()		#
-| _060_real_unfl()		#
-| _060_real_operr()		#
-| _060_real_snan()		#
-| _060_real_dz()		#
-| _060_real_inex()		#
-| _060_real_bsun()		#
-| _060_real_fline()		#
-| _060_real_fpu_disabled()	#
-| _060_real_trap()		#
-|################################
-
-|
-| _060_fpsp_done():
-|
-| This is the main exit point for the 68060 Floating-Point
-| Software Package. For a normal exit, all 060FPSP routines call this
-| routine. The operating system can do system dependent clean-up or
-| simply execute an "rte" as with the sample code below.
-|
-	.global		_060_fpsp_done
-_060_fpsp_done:
-	bral	 _060_isp_done	| do the same as isp_done
-
-|
-| _060_real_ovfl():
-|
-| This is the exit point for the 060FPSP when an enabled overflow exception
-| is present. The routine below should point to the operating system handler
-| for enabled overflow conditions. The exception stack frame is an overflow
-| stack frame. The FP state frame holds the EXCEPTIONAL OPERAND.
-|
-| The sample routine below simply clears the exception status bit and
-| does an "rte".
-|
-	.global		_060_real_ovfl
-_060_real_ovfl:
-	fsave		-(%sp)
-	move.w		#0x6000,0x2(%sp)
-	frestore	(%sp)+
-	bral		trap	| jump to trap handler
-
-
-|
-| _060_real_unfl():
-|
-| This is the exit point for the 060FPSP when an enabled underflow exception
-| is present. The routine below should point to the operating system handler
-| for enabled underflow conditions. The exception stack frame is an underflow
-| stack frame. The FP state frame holds the EXCEPTIONAL OPERAND.
-|
-| The sample routine below simply clears the exception status bit and
-| does an "rte".
-|
-	.global		_060_real_unfl
-_060_real_unfl:
-	fsave		-(%sp)
-	move.w		#0x6000,0x2(%sp)
-	frestore	(%sp)+
-	bral		trap	| jump to trap handler
-
-|
-| _060_real_operr():
-|
-| This is the exit point for the 060FPSP when an enabled operand error exception
-| is present. The routine below should point to the operating system handler
-| for enabled operand error exceptions. The exception stack frame is an operand error
-| stack frame. The FP state frame holds the source operand of the faulting
-| instruction.
-|
-| The sample routine below simply clears the exception status bit and
-| does an "rte".
-|
-	.global		_060_real_operr
-_060_real_operr:
-	fsave		-(%sp)
-	move.w		#0x6000,0x2(%sp)
-	frestore	(%sp)+
-	bral		trap	| jump to trap handler
-
-|
-| _060_real_snan():
-|
-| This is the exit point for the 060FPSP when an enabled signalling NaN exception
-| is present. The routine below should point to the operating system handler
-| for enabled signalling NaN exceptions. The exception stack frame is a signalling NaN
-| stack frame. The FP state frame holds the source operand of the faulting
-| instruction.
-|
-| The sample routine below simply clears the exception status bit and
-| does an "rte".
-|
-	.global		_060_real_snan
-_060_real_snan:
-	fsave		-(%sp)
-	move.w		#0x6000,0x2(%sp)
-	frestore	(%sp)+
-	bral		trap	| jump to trap handler
-
-|
-| _060_real_dz():
-|
-| This is the exit point for the 060FPSP when an enabled divide-by-zero exception
-| is present. The routine below should point to the operating system handler
-| for enabled divide-by-zero exceptions. The exception stack frame is a divide-by-zero
-| stack frame. The FP state frame holds the source operand of the faulting
-| instruction.
-|
-| The sample routine below simply clears the exception status bit and
-| does an "rte".
-|
-	.global		_060_real_dz
-_060_real_dz:
-	fsave		-(%sp)
-	move.w		#0x6000,0x2(%sp)
-	frestore	(%sp)+
-	bral		trap	| jump to trap handler
-
-|
-| _060_real_inex():
-|
-| This is the exit point for the 060FPSP when an enabled inexact exception
-| is present. The routine below should point to the operating system handler
-| for enabled inexact exceptions. The exception stack frame is an inexact
-| stack frame. The FP state frame holds the source operand of the faulting
-| instruction.
-|
-| The sample routine below simply clears the exception status bit and
-| does an "rte".
-|
-	.global		_060_real_inex
-_060_real_inex:
-	fsave		-(%sp)
-	move.w		#0x6000,0x2(%sp)
-	frestore	(%sp)+
-	bral		trap	| jump to trap handler
-
-|
-| _060_real_bsun():
-|
-| This is the exit point for the 060FPSP when an enabled bsun exception
-| is present. The routine below should point to the operating system handler
-| for enabled bsun exceptions. The exception stack frame is a bsun
-| stack frame.
-|
-| The sample routine below clears the exception status bit, clears the NaN
-| bit in the FPSR, and does an "rte". The instruction that caused the
-| bsun will now be re-executed but with the NaN FPSR bit cleared.
-|
-	.global		_060_real_bsun
-_060_real_bsun:
-|	fsave		-(%sp)
-
-	fmove.l		%fpsr,-(%sp)
-	andi.b		#0xfe,(%sp)
-	fmove.l		(%sp)+,%fpsr
-
-	bral		trap	| jump to trap handler
-
-|
-| _060_real_fline():
-|
-| This is the exit point for the 060FPSP when an F-Line Illegal exception is
-| encountered. Three different types of exceptions can enter the F-Line exception
-| vector number 11: FP Unimplemented Instructions, FP implemented instructions when
-| the FPU is disabled, and F-Line Illegal instructions. The 060FPSP module
-| _fpsp_fline() distinguishes between the three and acts appropriately. F-Line
-| Illegals branch here.
-|
-	.global		_060_real_fline
-_060_real_fline:
-	bral		trap	| jump to trap handler
-
-|
-| _060_real_fpu_disabled():
-|
-| This is the exit point for the 060FPSP when an FPU disabled exception is
-| encountered. Three different types of exceptions can enter the F-Line exception
-| vector number 11: FP Unimplemented Instructions, FP implemented instructions when
-| the FPU is disabled, and F-Line Illegal instructions. The 060FPSP module
-| _fpsp_fline() distinguishes between the three and acts appropriately. FPU disabled
-| exceptions branch here.
-|
-| The sample code below enables the FPU, sets the PC field in the exception stack
-| frame to the PC of the instruction causing the exception, and does an "rte".
-| The execution of the instruction then proceeds with an enabled floating-point
-| unit.
-|
-	.global		_060_real_fpu_disabled
-_060_real_fpu_disabled:
-	move.l		%d0,-(%sp)		| enabled the fpu
-	.long	0x4E7A0808			|movec		pcr,%d0
-	bclr		#0x1,%d0
-	.long	0x4E7B0808			|movec		%d0,pcr
-	move.l		(%sp)+,%d0
-
-	move.l		0xc(%sp),0x2(%sp)	| set "Current PC"
-	rte
-
-|
-| _060_real_trap():
-|
-| This is the exit point for the 060FPSP when an emulated "ftrapcc" instruction
-| discovers that the trap condition is true and it should branch to the operating
-| system handler for the trap exception vector number 7.
-|
-| The sample code below simply executes an "rte".
-|
-	.global		_060_real_trap
-_060_real_trap:
-	bral		trap	| jump to trap handler
-
-|############################################################################
-
-|#################################
-| (2) EXAMPLE PACKAGE ENTRY CODE #
-|#################################
-
-	.global		_060_fpsp_snan
-_060_fpsp_snan:
-	bra.l		_FP_CALL_TOP+0x80+0x00
-
-	.global		_060_fpsp_operr
-_060_fpsp_operr:
-	bra.l		_FP_CALL_TOP+0x80+0x08
-
-	.global		_060_fpsp_ovfl
-_060_fpsp_ovfl:
-	bra.l		_FP_CALL_TOP+0x80+0x10
-
-	.global		_060_fpsp_unfl
-_060_fpsp_unfl:
-	bra.l		_FP_CALL_TOP+0x80+0x18
-
-	.global		_060_fpsp_dz
-_060_fpsp_dz:
-	bra.l		_FP_CALL_TOP+0x80+0x20
-
-	.global		_060_fpsp_inex
-_060_fpsp_inex:
-	bra.l		_FP_CALL_TOP+0x80+0x28
-
-	.global		_060_fpsp_fline
-_060_fpsp_fline:
-	bra.l		_FP_CALL_TOP+0x80+0x30
-
-	.global		_060_fpsp_unsupp
-_060_fpsp_unsupp:
-	bra.l		_FP_CALL_TOP+0x80+0x38
-
-	.global		_060_fpsp_effadd
-_060_fpsp_effadd:
-	bra.l		_FP_CALL_TOP+0x80+0x40
-
-|############################################################################
-
-|###############################
-| (3) EXAMPLE CALL-OUT SECTION #
-|###############################
-
-| The size of this section MUST be 128 bytes!!!
-
-_FP_CALL_TOP:
-	.long	_060_real_bsun		- _FP_CALL_TOP
-	.long	_060_real_snan		- _FP_CALL_TOP
-	.long	_060_real_operr		- _FP_CALL_TOP
-	.long	_060_real_ovfl		- _FP_CALL_TOP
-	.long	_060_real_unfl		- _FP_CALL_TOP
-	.long	_060_real_dz		- _FP_CALL_TOP
-	.long	_060_real_inex		- _FP_CALL_TOP
-	.long	_060_real_fline		- _FP_CALL_TOP
-	.long	_060_real_fpu_disabled	- _FP_CALL_TOP
-	.long	_060_real_trap		- _FP_CALL_TOP
-	.long	_060_real_trace		- _FP_CALL_TOP
-	.long	_060_real_access	- _FP_CALL_TOP
-	.long	_060_fpsp_done		- _FP_CALL_TOP
-
-	.long	0x00000000, 0x00000000, 0x00000000
-
-	.long	_060_imem_read		- _FP_CALL_TOP
-	.long	_060_dmem_read		- _FP_CALL_TOP
-	.long	_060_dmem_write		- _FP_CALL_TOP
-	.long	_060_imem_read_word	- _FP_CALL_TOP
-	.long	_060_imem_read_long	- _FP_CALL_TOP
-	.long	_060_dmem_read_byte	- _FP_CALL_TOP
-	.long	_060_dmem_read_word	- _FP_CALL_TOP
-	.long	_060_dmem_read_long	- _FP_CALL_TOP
-	.long	_060_dmem_write_byte	- _FP_CALL_TOP
-	.long	_060_dmem_write_word	- _FP_CALL_TOP
-	.long	_060_dmem_write_long	- _FP_CALL_TOP
-
-	.long	0x00000000
-
-	.long	0x00000000, 0x00000000, 0x00000000, 0x00000000
-
-|############################################################################
-
-| 060 FPSP KERNEL PACKAGE NEEDS TO GO HERE!!!
-
-#include "fpsp.sa"
diff --git a/arch/m68k/ifpsp060/iskeleton.S b/arch/m68k/ifpsp060/iskeleton.S
deleted file mode 100644
index 91a9c65fee8a9568b1bd1be5256b767d31476d9f..0000000000000000000000000000000000000000
--- a/arch/m68k/ifpsp060/iskeleton.S
+++ /dev/null
@@ -1,347 +0,0 @@
-|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-|MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
-|M68000 Hi-Performance Microprocessor Division
-|M68060 Software Package
-|Production Release P1.00 -- October 10, 1994
-|
-|M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
-|
-|THE SOFTWARE is provided on an "AS IS" basis and without warranty.
-|To the maximum extent permitted by applicable law,
-|MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
-|INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
-|and any warranty against infringement with regard to the SOFTWARE
-|(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
-|
-|To the maximum extent permitted by applicable law,
-|IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
-|(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
-|BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
-|ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
-|Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
-|
-|You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
-|so long as this entire notice is retained without alteration in any modified and/or
-|redistributed versions, and that such modified versions are clearly identified as such.
-|No licenses are granted by implication, estoppel or otherwise under any patents
-|or trademarks of Motorola, Inc.
-|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-| iskeleton.s
-|
-| This file contains:
-|	(1) example "Call-out"s
-|	(2) example package entry code
-|	(3) example "Call-out" table
-|
-
-#include <linux/linkage.h>
-#include <asm/entry.h>
-#include <asm/asm-offsets.h>
-
-
-|################################
-| (1) EXAMPLE CALL-OUTS		#
-|				#
-| _060_isp_done()		#
-| _060_real_chk()		#
-| _060_real_divbyzero()		#
-|				#
-| _060_real_cas()		#
-| _060_real_cas2()		#
-| _060_real_lock_page()		#
-| _060_real_unlock_page()	#
-|################################
-
-|
-| _060_isp_done():
-|
-| This is and example main exit point for the Unimplemented Integer
-| Instruction exception handler. For a normal exit, the
-| _isp_unimp() branches to here so that the operating system
-| can do any clean-up desired. The stack frame is the
-| Unimplemented Integer Instruction stack frame with
-| the PC pointing to the instruction following the instruction
-| just emulated.
-| To simply continue execution at the next instruction, just
-| do an "rte".
-|
-| Linux/68k: If returning to user space, check for needed reselections.
-
-	.global		_060_isp_done
-_060_isp_done:
-	btst	#0x5,%sp@		| supervisor bit set in saved SR?
-	beq	.Lnotkern
-	rte
-.Lnotkern:
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	| deliver signals, reschedule etc..
-	jra	ret_from_exception
-
-|
-| _060_real_chk():
-|
-| This is an alternate exit point for the Unimplemented Integer
-| Instruction exception handler. If the instruction was a "chk2"
-| and the operand was out of bounds, then _isp_unimp() creates
-| a CHK exception stack frame from the Unimplemented Integer Instrcution
-| stack frame and branches to this routine.
-|
-| Linux/68k: commented out test for tracing
-
-	.global		_060_real_chk
-_060_real_chk:
-|	tst.b		(%sp)			| is tracing enabled?
-|	bpls		real_chk_end		| no
-
-|
-|	    CHK FRAME		   TRACE FRAME
-|	*****************	*****************
-|	*   Current PC	*	*   Current PC	*
-|	*****************	*****************
-|	* 0x2 *  0x018	*	* 0x2 *  0x024	*
-|	*****************	*****************
-|	*     Next	*	*     Next	*
-|	*      PC	*	*      PC	*
-|	*****************	*****************
-|	*      SR	*	*      SR	*
-|	*****************	*****************
-|
-|	move.b		#0x24,0x7(%sp)		| set trace vecno
-|	bral		_060_real_trace
-
-real_chk_end:
-	bral		trap			| jump to trap handler
-
-|
-| _060_real_divbyzero:
-|
-| This is an alternate exit point for the Unimplemented Integer
-| Instruction exception handler isp_unimp(). If the instruction is a 64-bit
-| integer divide where the source operand is a zero, then the _isp_unimp()
-| creates a Divide-by-zero exception stack frame from the Unimplemented
-| Integer Instruction stack frame and branches to this routine.
-|
-| Remember that a trace exception may be pending. The code below performs
-| no action associated with the "chk" exception. If tracing is enabled,
-| then it create a Trace exception stack frame from the "chk" exception
-| stack frame and branches to the _real_trace() entry point.
-|
-| Linux/68k: commented out test for tracing
-
-	.global		_060_real_divbyzero
-_060_real_divbyzero:
-|	tst.b		(%sp)			| is tracing enabled?
-|	bpls		real_divbyzero_end	| no
-
-|
-|	 DIVBYZERO FRAME	   TRACE FRAME
-|	*****************	*****************
-|	*   Current PC	*	*   Current PC	*
-|	*****************	*****************
-|	* 0x2 *  0x014	*	* 0x2 *  0x024	*
-|	*****************	*****************
-|	*     Next	*	*     Next	*
-|	*      PC	*	*      PC	*
-|	*****************	*****************
-|	*      SR	*	*      SR	*
-|	*****************	*****************
-|
-|	move.b		#0x24,0x7(%sp)		| set trace vecno
-|	bral		_060_real_trace
-
-real_divbyzero_end:
-	bral		trap			| jump to trap handler
-
-|##########################
-
-|
-| _060_real_cas():
-|
-| Entry point for the selected cas emulation code implementation.
-| If the implementation provided by the 68060ISP is sufficient,
-| then this routine simply re-enters the package through _isp_cas.
-|
-	.global		_060_real_cas
-_060_real_cas:
-	bral		_I_CALL_TOP+0x80+0x08
-
-|
-| _060_real_cas2():
-|
-| Entry point for the selected cas2 emulation code implementation.
-| If the implementation provided by the 68060ISP is sufficient,
-| then this routine simply re-enters the package through _isp_cas2.
-|
-	.global		_060_real_cas2
-_060_real_cas2:
-	bral		_I_CALL_TOP+0x80+0x10
-
-|
-| _060_lock_page():
-|
-| Entry point for the operating system`s routine to "lock" a page
-| from being paged out. This routine is needed by the cas/cas2
-| algorithms so that no page faults occur within the "core" code
-| region. Note: the routine must lock two pages if the operand
-| spans two pages.
-| NOTE: THE ROUTINE SHOULD RETURN AN FSLW VALUE IN D0 ON FAILURE
-| SO THAT THE 060SP CAN CREATE A PROPER ACCESS ERROR FRAME.
-| Arguments:
-|	a0 = operand address
-|	d0 = `xxxxxxff -> supervisor; `xxxxxx00 -> user
-|	d1 = `xxxxxxff -> longword; `xxxxxx00 -> word
-| Expected outputs:
-|	d0 = 0 -> success; non-zero -> failure
-|
-| Linux/m68k: Make sure the page is properly paged in, so we use
-| plpaw and handle any exception here. The kernel must not be
-| preempted until _060_unlock_page(), so that the page stays mapped.
-|
-	.global		_060_real_lock_page
-_060_real_lock_page:
-	move.l	%d2,-(%sp)
-	| load sfc/dfc
-	tst.b	%d0
-	jne	1f
-	moveq	#1,%d0
-	jra	2f
-1:	moveq	#5,%d0
-2:	movec.l	%dfc,%d2
-	movec.l	%d0,%dfc
-	movec.l	%d0,%sfc
-
-	clr.l	%d0
-	| prefetch address
-	.chip	68060
-	move.l	%a0,%a1
-1:	plpaw	(%a1)
-	addq.w	#1,%a0
-	tst.b	%d1
-	jeq	2f
-	addq.w	#2,%a0
-2:	plpaw	(%a0)
-3:	.chip	68k
-
-	| restore sfc/dfc
-	movec.l	%d2,%dfc
-	movec.l	%d2,%sfc
-	move.l	(%sp)+,%d2
-	rts
-
-.section __ex_table,"a"
-	.align	4
-	.long	1b,11f
-	.long	2b,21f
-.previous
-.section .fixup,"ax"
-	.even
-11:	move.l	#0x020003c0,%d0
-	or.l	%d2,%d0
-	swap	%d0
-	jra	3b
-21:	move.l	#0x02000bc0,%d0
-	or.l	%d2,%d0
-	swap	%d0
-	jra	3b
-.previous
-
-|
-| _060_unlock_page():
-|
-| Entry point for the operating system`s routine to "unlock" a
-| page that has been "locked" previously with _real_lock_page.
-| Note: the routine must unlock two pages if the operand spans
-| two pages.
-| Arguments:
-|	a0 = operand address
-|	d0 = `xxxxxxff -> supervisor; `xxxxxx00 -> user
-|	d1 = `xxxxxxff -> longword; `xxxxxx00 -> word
-|
-| Linux/m68k: perhaps reenable preemption here...
-
-	.global		_060_real_unlock_page
-_060_real_unlock_page:
-	clr.l		%d0
-	rts
-
-|###########################################################################
-
-|#################################
-| (2) EXAMPLE PACKAGE ENTRY CODE #
-|#################################
-
-	.global		_060_isp_unimp
-_060_isp_unimp:
-	bral		_I_CALL_TOP+0x80+0x00
-
-	.global		_060_isp_cas
-_060_isp_cas:
-	bral		_I_CALL_TOP+0x80+0x08
-
-	.global		_060_isp_cas2
-_060_isp_cas2:
-	bral		_I_CALL_TOP+0x80+0x10
-
-	.global		_060_isp_cas_finish
-_060_isp_cas_finish:
-	bra.l		_I_CALL_TOP+0x80+0x18
-
-	.global		_060_isp_cas2_finish
-_060_isp_cas2_finish:
-	bral		_I_CALL_TOP+0x80+0x20
-
-	.global		_060_isp_cas_inrange
-_060_isp_cas_inrange:
-	bral		_I_CALL_TOP+0x80+0x28
-
-	.global		_060_isp_cas_terminate
-_060_isp_cas_terminate:
-	bral		_I_CALL_TOP+0x80+0x30
-
-	.global		_060_isp_cas_restart
-_060_isp_cas_restart:
-	bral		_I_CALL_TOP+0x80+0x38
-
-|###########################################################################
-
-|###############################
-| (3) EXAMPLE CALL-OUT SECTION #
-|###############################
-
-| The size of this section MUST be 128 bytes!!!
-
-_I_CALL_TOP:
-	.long	_060_real_chk		- _I_CALL_TOP
-	.long	_060_real_divbyzero	- _I_CALL_TOP
-	.long	_060_real_trace		- _I_CALL_TOP
-	.long	_060_real_access	- _I_CALL_TOP
-	.long	_060_isp_done		- _I_CALL_TOP
-
-	.long	_060_real_cas		- _I_CALL_TOP
-	.long	_060_real_cas2		- _I_CALL_TOP
-	.long	_060_real_lock_page	- _I_CALL_TOP
-	.long	_060_real_unlock_page	- _I_CALL_TOP
-
-	.long	0x00000000, 0x00000000, 0x00000000, 0x00000000
-	.long	0x00000000, 0x00000000, 0x00000000
-
-	.long	_060_imem_read		- _I_CALL_TOP
-	.long	_060_dmem_read		- _I_CALL_TOP
-	.long	_060_dmem_write		- _I_CALL_TOP
-	.long	_060_imem_read_word	- _I_CALL_TOP
-	.long	_060_imem_read_long	- _I_CALL_TOP
-	.long	_060_dmem_read_byte	- _I_CALL_TOP
-	.long	_060_dmem_read_word	- _I_CALL_TOP
-	.long	_060_dmem_read_long	- _I_CALL_TOP
-	.long	_060_dmem_write_byte	- _I_CALL_TOP
-	.long	_060_dmem_write_word	- _I_CALL_TOP
-	.long	_060_dmem_write_long	- _I_CALL_TOP
-
-	.long	0x00000000
-	.long	0x00000000, 0x00000000, 0x00000000, 0x00000000
-
-|###########################################################################
-
-| 060 INTEGER KERNEL PACKAGE MUST GO HERE!!!
-#include "isp.sa"
diff --git a/arch/m68k/ifpsp060/os.S b/arch/m68k/ifpsp060/os.S
deleted file mode 100644
index 7a0d6e42806656a59f7b5da80f019aed1d1626c6..0000000000000000000000000000000000000000
--- a/arch/m68k/ifpsp060/os.S
+++ /dev/null
@@ -1,396 +0,0 @@
-|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-|MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
-|M68000 Hi-Performance Microprocessor Division
-|M68060 Software Package
-|Production Release P1.00 -- October 10, 1994
-|
-|M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
-|
-|THE SOFTWARE is provided on an "AS IS" basis and without warranty.
-|To the maximum extent permitted by applicable law,
-|MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
-|INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
-|and any warranty against infringement with regard to the SOFTWARE
-|(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
-|
-|To the maximum extent permitted by applicable law,
-|IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
-|(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
-|BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
-|ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
-|Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
-|
-|You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
-|so long as this entire notice is retained without alteration in any modified and/or
-|redistributed versions, and that such modified versions are clearly identified as such.
-|No licenses are granted by implication, estoppel or otherwise under any patents
-|or trademarks of Motorola, Inc.
-|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-| os.s
-|
-| This file contains:
-|	- example "Call-Out"s required by both the ISP and FPSP.
-|
-
-#include <linux/linkage.h>
-
-|################################
-| EXAMPLE CALL-OUTS		#
-|				#
-| _060_dmem_write()		#
-| _060_dmem_read()		#
-| _060_imem_read()		#
-| _060_dmem_read_byte()		#
-| _060_dmem_read_word()		#
-| _060_dmem_read_long()		#
-| _060_imem_read_word()		#
-| _060_imem_read_long()		#
-| _060_dmem_write_byte()	#
-| _060_dmem_write_word()	#
-| _060_dmem_write_long()	#
-|				#
-| _060_real_trace()		#
-| _060_real_access()		#
-|################################
-
-|
-| Each IO routine checks to see if the memory write/read is to/from user
-| or supervisor application space. The examples below use simple "move"
-| instructions for supervisor mode applications and call _copyin()/_copyout()
-| for user mode applications.
-| When installing the 060SP, the _copyin()/_copyout() equivalents for a
-| given operating system should be substituted.
-|
-| The addresses within the 060SP are guaranteed to be on the stack.
-| The result is that Unix processes are allowed to sleep as a consequence
-| of a page fault during a _copyout.
-|
-| Linux/68k: The _060_[id]mem_{read,write}_{byte,word,long} functions
-| (i.e. all the known length <= 4) are implemented by single moves
-| statements instead of (more expensive) copy{in,out} calls, if
-| working in user space
-
-|
-| _060_dmem_write():
-|
-| Writes to data memory while in supervisor mode.
-|
-| INPUTS:
-|	a0 - supervisor source address
-|	a1 - user destination address
-|	d0 - number of bytes to write
-|	0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode
-| OUTPUTS:
-|	d1 - 0 = success, !0 = failure
-|
-	.global		_060_dmem_write
-_060_dmem_write:
-	subq.l		#1,%d0
-	btst		#0x5,0x4(%a6)		| check for supervisor state
-	beqs		user_write
-super_write:
-	move.b		(%a0)+,(%a1)+		| copy 1 byte
-	dbra		%d0,super_write		| quit if --ctr < 0
-	clr.l		%d1			| return success
-	rts
-user_write:
-	move.b		(%a0)+,%d1		| copy 1 byte
-copyoutae:
-	movs.b		%d1,(%a1)+
-	dbra		%d0,user_write		| quit if --ctr < 0
-	clr.l		%d1			| return success
-	rts
-
-|
-| _060_imem_read(), _060_dmem_read():
-|
-| Reads from data/instruction memory while in supervisor mode.
-|
-| INPUTS:
-|	a0 - user source address
-|	a1 - supervisor destination address
-|	d0 - number of bytes to read
-|	0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode
-| OUTPUTS:
-|	d1 - 0 = success, !0 = failure
-|
-	.global		_060_imem_read
-	.global		_060_dmem_read
-_060_imem_read:
-_060_dmem_read:
-	subq.l		#1,%d0
-	btst		#0x5,0x4(%a6)		| check for supervisor state
-	beqs		user_read
-super_read:
-	move.b		(%a0)+,(%a1)+		| copy 1 byte
-	dbra		%d0,super_read		| quit if --ctr < 0
-	clr.l		%d1			| return success
-	rts
-user_read:
-copyinae:
-	movs.b		(%a0)+,%d1
-	move.b		%d1,(%a1)+		| copy 1 byte
-	dbra		%d0,user_read		| quit if --ctr < 0
-	clr.l		%d1			| return success
-	rts
-
-|
-| _060_dmem_read_byte():
-|
-| Read a data byte from user memory.
-|
-| INPUTS:
-|	a0 - user source address
-|	0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode
-| OUTPUTS:
-|	d0 - data byte in d0
-|	d1 - 0 = success, !0 = failure
-|
-	.global		_060_dmem_read_byte
-_060_dmem_read_byte:
-	clr.l		%d0			| clear whole longword
-	clr.l		%d1			| assume success
-	btst		#0x5,0x4(%a6)		| check for supervisor state
-	bnes		dmrbs			| supervisor
-dmrbuae:movs.b		(%a0),%d0		| fetch user byte
-	rts
-dmrbs:	move.b		(%a0),%d0		| fetch super byte
-	rts
-
-|
-| _060_dmem_read_word():
-|
-| Read a data word from user memory.
-|
-| INPUTS:
-|	a0 - user source address
-|	0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode
-| OUTPUTS:
-|	d0 - data word in d0
-|	d1 - 0 = success, !0 = failure
-|
-| _060_imem_read_word():
-|
-| Read an instruction word from user memory.
-|
-| INPUTS:
-|	a0 - user source address
-|	0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode
-| OUTPUTS:
-|	d0 - instruction word in d0
-|	d1 - 0 = success, !0 = failure
-|
-	.global		_060_dmem_read_word
-	.global		_060_imem_read_word
-_060_dmem_read_word:
-_060_imem_read_word:
-	clr.l		%d1			| assume success
-	clr.l		%d0			| clear whole longword
-	btst		#0x5,0x4(%a6)		| check for supervisor state
-	bnes		dmrws			| supervisor
-dmrwuae:movs.w		(%a0), %d0		| fetch user word
-	rts
-dmrws:	move.w		(%a0), %d0		| fetch super word
-	rts
-
-|
-| _060_dmem_read_long():
-|
-
-|
-| INPUTS:
-|	a0 - user source address
-|	0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode
-| OUTPUTS:
-|	d0 - data longword in d0
-|	d1 - 0 = success, !0 = failure
-|
-| _060_imem_read_long():
-|
-| Read an instruction longword from user memory.
-|
-| INPUTS:
-|	a0 - user source address
-|	0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode
-| OUTPUTS:
-|	d0 - instruction longword in d0
-|	d1 - 0 = success, !0 = failure
-|
-	.global		_060_dmem_read_long
-	.global		_060_imem_read_long
-_060_dmem_read_long:
-_060_imem_read_long:
-	clr.l		%d1			| assume success
-	btst		#0x5,0x4(%a6)		| check for supervisor state
-	bnes		dmrls			| supervisor
-dmrluae:movs.l		(%a0),%d0		| fetch user longword
-	rts
-dmrls:	move.l		(%a0),%d0		| fetch super longword
-	rts
-
-|
-| _060_dmem_write_byte():
-|
-| Write a data byte to user memory.
-|
-| INPUTS:
-|	a0 - user destination address
-|	d0 - data byte in d0
-|	0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode
-| OUTPUTS:
-|	d1 - 0 = success, !0 = failure
-|
-	.global		_060_dmem_write_byte
-_060_dmem_write_byte:
-	clr.l		%d1			| assume success
-	btst		#0x5,0x4(%a6)		| check for supervisor state
-	bnes		dmwbs			| supervisor
-dmwbuae:movs.b		%d0,(%a0)		| store user byte
-	rts
-dmwbs:	move.b		%d0,(%a0)		| store super byte
-	rts
-
-|
-| _060_dmem_write_word():
-|
-| Write a data word to user memory.
-|
-| INPUTS:
-|	a0 - user destination address
-|	d0 - data word in d0
-|	0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode
-| OUTPUTS:
-|	d1 - 0 = success, !0 = failure
-|
-	.global		_060_dmem_write_word
-_060_dmem_write_word:
-	clr.l		%d1			| assume success
-	btst		#0x5,0x4(%a6)		| check for supervisor state
-	bnes		dmwws			| supervisor
-dmwwu:
-dmwwuae:movs.w		%d0,(%a0)		| store user word
-	bras		dmwwr
-dmwws:	move.w		%d0,(%a0)		| store super word
-dmwwr:	clr.l		%d1			| return success
-	rts
-
-|
-| _060_dmem_write_long():
-|
-| Write a data longword to user memory.
-|
-| INPUTS:
-|	a0 - user destination address
-|	d0 - data longword in d0
-|	0x4(%a6),bit5 - 1 = supervisor mode, 0 = user mode
-| OUTPUTS:
-|	d1 - 0 = success, !0 = failure
-|
-	.global		_060_dmem_write_long
-_060_dmem_write_long:
-	clr.l		%d1			| assume success
-	btst		#0x5,0x4(%a6)		| check for supervisor state
-	bnes		dmwls			| supervisor
-dmwluae:movs.l		%d0,(%a0)		| store user longword
-	rts
-dmwls:	move.l		%d0,(%a0)		| store super longword
-	rts
-
-
-#if 0
-|###############################################
-
-|
-| Use these routines if your kernel doesn't have _copyout/_copyin equivalents.
-| Assumes that D0/D1/A0/A1 are scratch registers. The _copyin/_copyout
-| below assume that the SFC/DFC have been set previously.
-|
-| Linux/68k: These are basically non-inlined versions of
-| memcpy_{to,from}fs, but without long-transfer optimization
-| Note: Assumed that SFC/DFC are pointing correctly to user data
-| space... Should be right, or are there any exceptions?
-
-|
-| int _copyout(supervisor_addr, user_addr, nbytes)
-|
-	.global		_copyout
-_copyout:
-	move.l		4(%sp),%a0		| source
-	move.l		8(%sp),%a1		| destination
-	move.l		12(%sp),%d0		| count
-	subq.l		#1,%d0
-moreout:
-	move.b		(%a0)+,%d1		| fetch supervisor byte
-copyoutae:
-	movs.b		%d1,(%a1)+		| store user byte
-	dbra		%d0,moreout		| are we through yet?
-	moveq		#0,%d0			| return success
-	rts
-
-|
-| int _copyin(user_addr, supervisor_addr, nbytes)
-|
-	.global		_copyin
-_copyin:
-	move.l		4(%sp),%a0		| source
-	move.l		8(%sp),%a1		| destination
-	move.l		12(%sp),%d0		| count
-    subq.l      #1,%d0
-morein:
-copyinae:
-	movs.b		(%a0)+,%d1		| fetch user byte
-	move.b		%d1,(%a1)+		| write supervisor byte
-	dbra		%d0,morein		| are we through yet?
-	moveq		#0,%d0			| return success
-	rts
-#endif
-
-|###########################################################################
-
-|
-| _060_real_trace():
-|
-| This is the exit point for the 060FPSP when an instruction is being traced
-| and there are no other higher priority exceptions pending for this instruction
-| or they have already been processed.
-|
-| The sample code below simply executes an "rte".
-|
-	.global		_060_real_trace
-_060_real_trace:
-	bral	trap
-
-|
-| _060_real_access():
-|
-| This is the exit point for the 060FPSP when an access error exception
-| is encountered. The routine below should point to the operating system
-| handler for access error exceptions. The exception stack frame is an
-| 8-word access error frame.
-|
-| The sample routine below simply executes an "rte" instruction which
-| is most likely the incorrect thing to do and could put the system
-| into an infinite loop.
-|
-	.global		_060_real_access
-_060_real_access:
-	bral	buserr
-
-
-
-| Execption handling for movs access to illegal memory
-	.section .fixup,#alloc,#execinstr
-	.even
-1:	moveq		#-1,%d1
-	rts
-.section __ex_table,#alloc
-	.align 4
-	.long	dmrbuae,1b
-	.long	dmrwuae,1b
-	.long	dmrluae,1b
-	.long	dmwbuae,1b
-	.long	dmwwuae,1b
-	.long	dmwluae,1b
-	.long	copyoutae,1b
-	.long	copyinae,1b
-	.text
diff --git a/arch/m68k/ifpsp060/src/fplsp.S b/arch/m68k/ifpsp060/src/fplsp.S
deleted file mode 100644
index 3b7ea2dc9f1bbf098fe6cf03fdffeb1bb934e5ab..0000000000000000000000000000000000000000
--- a/arch/m68k/ifpsp060/src/fplsp.S
+++ /dev/null
@@ -1,10980 +0,0 @@
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
-M68000 Hi-Performance Microprocessor Division
-M68060 Software Package
-Production Release P1.00 -- October 10, 1994
-
-M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
-
-THE SOFTWARE is provided on an "AS IS" basis and without warranty.
-To the maximum extent permitted by applicable law,
-MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
-INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
-and any warranty against infringement with regard to the SOFTWARE
-(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
-
-To the maximum extent permitted by applicable law,
-IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
-(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
-BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
-ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
-Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
-
-You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
-so long as this entire notice is retained without alteration in any modified and/or
-redistributed versions, and that such modified versions are clearly identified as such.
-No licenses are granted by implication, estoppel or otherwise under any patents
-or trademarks of Motorola, Inc.
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# lfptop.s:
-#	This file is appended to the top of the 060ILSP package
-# and contains the entry points into the package. The user, in
-# effect, branches to one of the branch table entries located here.
-#
-
-	bra.l	_facoss_
-	short	0x0000
-	bra.l	_facosd_
-	short	0x0000
-	bra.l	_facosx_
-	short	0x0000
-
-	bra.l	_fasins_
-	short	0x0000
-	bra.l	_fasind_
-	short	0x0000
-	bra.l	_fasinx_
-	short	0x0000
-
-	bra.l	_fatans_
-	short	0x0000
-	bra.l	_fatand_
-	short	0x0000
-	bra.l	_fatanx_
-	short	0x0000
-
-	bra.l	_fatanhs_
-	short	0x0000
-	bra.l	_fatanhd_
-	short	0x0000
-	bra.l	_fatanhx_
-	short	0x0000
-
-	bra.l	_fcoss_
-	short	0x0000
-	bra.l	_fcosd_
-	short	0x0000
-	bra.l	_fcosx_
-	short	0x0000
-
-	bra.l	_fcoshs_
-	short	0x0000
-	bra.l	_fcoshd_
-	short	0x0000
-	bra.l	_fcoshx_
-	short	0x0000
-
-	bra.l	_fetoxs_
-	short	0x0000
-	bra.l	_fetoxd_
-	short	0x0000
-	bra.l	_fetoxx_
-	short	0x0000
-
-	bra.l	_fetoxm1s_
-	short	0x0000
-	bra.l	_fetoxm1d_
-	short	0x0000
-	bra.l	_fetoxm1x_
-	short	0x0000
-
-	bra.l	_fgetexps_
-	short	0x0000
-	bra.l	_fgetexpd_
-	short	0x0000
-	bra.l	_fgetexpx_
-	short	0x0000
-
-	bra.l	_fgetmans_
-	short	0x0000
-	bra.l	_fgetmand_
-	short	0x0000
-	bra.l	_fgetmanx_
-	short	0x0000
-
-	bra.l	_flog10s_
-	short	0x0000
-	bra.l	_flog10d_
-	short	0x0000
-	bra.l	_flog10x_
-	short	0x0000
-
-	bra.l	_flog2s_
-	short	0x0000
-	bra.l	_flog2d_
-	short	0x0000
-	bra.l	_flog2x_
-	short	0x0000
-
-	bra.l	_flogns_
-	short	0x0000
-	bra.l	_flognd_
-	short	0x0000
-	bra.l	_flognx_
-	short	0x0000
-
-	bra.l	_flognp1s_
-	short	0x0000
-	bra.l	_flognp1d_
-	short	0x0000
-	bra.l	_flognp1x_
-	short	0x0000
-
-	bra.l	_fmods_
-	short	0x0000
-	bra.l	_fmodd_
-	short	0x0000
-	bra.l	_fmodx_
-	short	0x0000
-
-	bra.l	_frems_
-	short	0x0000
-	bra.l	_fremd_
-	short	0x0000
-	bra.l	_fremx_
-	short	0x0000
-
-	bra.l	_fscales_
-	short	0x0000
-	bra.l	_fscaled_
-	short	0x0000
-	bra.l	_fscalex_
-	short	0x0000
-
-	bra.l	_fsins_
-	short	0x0000
-	bra.l	_fsind_
-	short	0x0000
-	bra.l	_fsinx_
-	short	0x0000
-
-	bra.l	_fsincoss_
-	short	0x0000
-	bra.l	_fsincosd_
-	short	0x0000
-	bra.l	_fsincosx_
-	short	0x0000
-
-	bra.l	_fsinhs_
-	short	0x0000
-	bra.l	_fsinhd_
-	short	0x0000
-	bra.l	_fsinhx_
-	short	0x0000
-
-	bra.l	_ftans_
-	short	0x0000
-	bra.l	_ftand_
-	short	0x0000
-	bra.l	_ftanx_
-	short	0x0000
-
-	bra.l	_ftanhs_
-	short	0x0000
-	bra.l	_ftanhd_
-	short	0x0000
-	bra.l	_ftanhx_
-	short	0x0000
-
-	bra.l	_ftentoxs_
-	short	0x0000
-	bra.l	_ftentoxd_
-	short	0x0000
-	bra.l	_ftentoxx_
-	short	0x0000
-
-	bra.l	_ftwotoxs_
-	short	0x0000
-	bra.l	_ftwotoxd_
-	short	0x0000
-	bra.l	_ftwotoxx_
-	short	0x0000
-
-	bra.l	_fabss_
-	short	0x0000
-	bra.l	_fabsd_
-	short	0x0000
-	bra.l	_fabsx_
-	short	0x0000
-
-	bra.l	_fadds_
-	short	0x0000
-	bra.l	_faddd_
-	short	0x0000
-	bra.l	_faddx_
-	short	0x0000
-
-	bra.l	_fdivs_
-	short	0x0000
-	bra.l	_fdivd_
-	short	0x0000
-	bra.l	_fdivx_
-	short	0x0000
-
-	bra.l	_fints_
-	short	0x0000
-	bra.l	_fintd_
-	short	0x0000
-	bra.l	_fintx_
-	short	0x0000
-
-	bra.l	_fintrzs_
-	short	0x0000
-	bra.l	_fintrzd_
-	short	0x0000
-	bra.l	_fintrzx_
-	short	0x0000
-
-	bra.l	_fmuls_
-	short	0x0000
-	bra.l	_fmuld_
-	short	0x0000
-	bra.l	_fmulx_
-	short	0x0000
-
-	bra.l	_fnegs_
-	short	0x0000
-	bra.l	_fnegd_
-	short	0x0000
-	bra.l	_fnegx_
-	short	0x0000
-
-	bra.l	_fsqrts_
-	short	0x0000
-	bra.l	_fsqrtd_
-	short	0x0000
-	bra.l	_fsqrtx_
-	short	0x0000
-
-	bra.l	_fsubs_
-	short	0x0000
-	bra.l	_fsubd_
-	short	0x0000
-	bra.l	_fsubx_
-	short	0x0000
-
-# leave room for future possible additions
-	align	0x400
-
-#
-# This file contains a set of define statements for constants
-# in order to promote readability within the corecode itself.
-#
-
-set LOCAL_SIZE,		192			# stack frame size(bytes)
-set LV,			-LOCAL_SIZE		# stack offset
-
-set EXC_SR,		0x4			# stack status register
-set EXC_PC,		0x6			# stack pc
-set EXC_VOFF,		0xa			# stacked vector offset
-set EXC_EA,		0xc			# stacked <ea>
-
-set EXC_FP,		0x0			# frame pointer
-
-set EXC_AREGS,		-68			# offset of all address regs
-set EXC_DREGS,		-100			# offset of all data regs
-set EXC_FPREGS,		-36			# offset of all fp regs
-
-set EXC_A7,		EXC_AREGS+(7*4)		# offset of saved a7
-set OLD_A7,		EXC_AREGS+(6*4)		# extra copy of saved a7
-set EXC_A6,		EXC_AREGS+(6*4)		# offset of saved a6
-set EXC_A5,		EXC_AREGS+(5*4)
-set EXC_A4,		EXC_AREGS+(4*4)
-set EXC_A3,		EXC_AREGS+(3*4)
-set EXC_A2,		EXC_AREGS+(2*4)
-set EXC_A1,		EXC_AREGS+(1*4)
-set EXC_A0,		EXC_AREGS+(0*4)
-set EXC_D7,		EXC_DREGS+(7*4)
-set EXC_D6,		EXC_DREGS+(6*4)
-set EXC_D5,		EXC_DREGS+(5*4)
-set EXC_D4,		EXC_DREGS+(4*4)
-set EXC_D3,		EXC_DREGS+(3*4)
-set EXC_D2,		EXC_DREGS+(2*4)
-set EXC_D1,		EXC_DREGS+(1*4)
-set EXC_D0,		EXC_DREGS+(0*4)
-
-set EXC_FP0,		EXC_FPREGS+(0*12)	# offset of saved fp0
-set EXC_FP1,		EXC_FPREGS+(1*12)	# offset of saved fp1
-set EXC_FP2,		EXC_FPREGS+(2*12)	# offset of saved fp2 (not used)
-
-set FP_SCR1,		LV+80			# fp scratch 1
-set FP_SCR1_EX,		FP_SCR1+0
-set FP_SCR1_SGN,	FP_SCR1+2
-set FP_SCR1_HI,		FP_SCR1+4
-set FP_SCR1_LO,		FP_SCR1+8
-
-set FP_SCR0,		LV+68			# fp scratch 0
-set FP_SCR0_EX,		FP_SCR0+0
-set FP_SCR0_SGN,	FP_SCR0+2
-set FP_SCR0_HI,		FP_SCR0+4
-set FP_SCR0_LO,		FP_SCR0+8
-
-set FP_DST,		LV+56			# fp destination operand
-set FP_DST_EX,		FP_DST+0
-set FP_DST_SGN,		FP_DST+2
-set FP_DST_HI,		FP_DST+4
-set FP_DST_LO,		FP_DST+8
-
-set FP_SRC,		LV+44			# fp source operand
-set FP_SRC_EX,		FP_SRC+0
-set FP_SRC_SGN,		FP_SRC+2
-set FP_SRC_HI,		FP_SRC+4
-set FP_SRC_LO,		FP_SRC+8
-
-set USER_FPIAR,		LV+40			# FP instr address register
-
-set USER_FPSR,		LV+36			# FP status register
-set FPSR_CC,		USER_FPSR+0		# FPSR condition codes
-set FPSR_QBYTE,		USER_FPSR+1		# FPSR qoutient byte
-set FPSR_EXCEPT,	USER_FPSR+2		# FPSR exception status byte
-set FPSR_AEXCEPT,	USER_FPSR+3		# FPSR accrued exception byte
-
-set USER_FPCR,		LV+32			# FP control register
-set FPCR_ENABLE,	USER_FPCR+2		# FPCR exception enable
-set FPCR_MODE,		USER_FPCR+3		# FPCR rounding mode control
-
-set L_SCR3,		LV+28			# integer scratch 3
-set L_SCR2,		LV+24			# integer scratch 2
-set L_SCR1,		LV+20			# integer scratch 1
-
-set STORE_FLG,		LV+19			# flag: operand store (ie. not fcmp/ftst)
-
-set EXC_TEMP2,		LV+24			# temporary space
-set EXC_TEMP,		LV+16			# temporary space
-
-set DTAG,		LV+15			# destination operand type
-set STAG,		LV+14			# source operand type
-
-set SPCOND_FLG,		LV+10			# flag: special case (see below)
-
-set EXC_CC,		LV+8			# saved condition codes
-set EXC_EXTWPTR,	LV+4			# saved current PC (active)
-set EXC_EXTWORD,	LV+2			# saved extension word
-set EXC_CMDREG,		LV+2			# saved extension word
-set EXC_OPWORD,		LV+0			# saved operation word
-
-################################
-
-# Helpful macros
-
-set FTEMP,		0			# offsets within an
-set FTEMP_EX,		0			# extended precision
-set FTEMP_SGN,		2			# value saved in memory.
-set FTEMP_HI,		4
-set FTEMP_LO,		8
-set FTEMP_GRS,		12
-
-set LOCAL,		0			# offsets within an
-set LOCAL_EX,		0			# extended precision
-set LOCAL_SGN,		2			# value saved in memory.
-set LOCAL_HI,		4
-set LOCAL_LO,		8
-set LOCAL_GRS,		12
-
-set DST,		0			# offsets within an
-set DST_EX,		0			# extended precision
-set DST_HI,		4			# value saved in memory.
-set DST_LO,		8
-
-set SRC,		0			# offsets within an
-set SRC_EX,		0			# extended precision
-set SRC_HI,		4			# value saved in memory.
-set SRC_LO,		8
-
-set SGL_LO,		0x3f81			# min sgl prec exponent
-set SGL_HI,		0x407e			# max sgl prec exponent
-set DBL_LO,		0x3c01			# min dbl prec exponent
-set DBL_HI,		0x43fe			# max dbl prec exponent
-set EXT_LO,		0x0			# min ext prec exponent
-set EXT_HI,		0x7ffe			# max ext prec exponent
-
-set EXT_BIAS,		0x3fff			# extended precision bias
-set SGL_BIAS,		0x007f			# single precision bias
-set DBL_BIAS,		0x03ff			# double precision bias
-
-set NORM,		0x00			# operand type for STAG/DTAG
-set ZERO,		0x01			# operand type for STAG/DTAG
-set INF,		0x02			# operand type for STAG/DTAG
-set QNAN,		0x03			# operand type for STAG/DTAG
-set DENORM,		0x04			# operand type for STAG/DTAG
-set SNAN,		0x05			# operand type for STAG/DTAG
-set UNNORM,		0x06			# operand type for STAG/DTAG
-
-##################
-# FPSR/FPCR bits #
-##################
-set neg_bit,		0x3			# negative result
-set z_bit,		0x2			# zero result
-set inf_bit,		0x1			# infinite result
-set nan_bit,		0x0			# NAN result
-
-set q_sn_bit,		0x7			# sign bit of quotient byte
-
-set bsun_bit,		7			# branch on unordered
-set snan_bit,		6			# signalling NAN
-set operr_bit,		5			# operand error
-set ovfl_bit,		4			# overflow
-set unfl_bit,		3			# underflow
-set dz_bit,		2			# divide by zero
-set inex2_bit,		1			# inexact result 2
-set inex1_bit,		0			# inexact result 1
-
-set aiop_bit,		7			# accrued inexact operation bit
-set aovfl_bit,		6			# accrued overflow bit
-set aunfl_bit,		5			# accrued underflow bit
-set adz_bit,		4			# accrued dz bit
-set ainex_bit,		3			# accrued inexact bit
-
-#############################
-# FPSR individual bit masks #
-#############################
-set neg_mask,		0x08000000		# negative bit mask (lw)
-set inf_mask,		0x02000000		# infinity bit mask (lw)
-set z_mask,		0x04000000		# zero bit mask (lw)
-set nan_mask,		0x01000000		# nan bit mask (lw)
-
-set neg_bmask,		0x08			# negative bit mask (byte)
-set inf_bmask,		0x02			# infinity bit mask (byte)
-set z_bmask,		0x04			# zero bit mask (byte)
-set nan_bmask,		0x01			# nan bit mask (byte)
-
-set bsun_mask,		0x00008000		# bsun exception mask
-set snan_mask,		0x00004000		# snan exception mask
-set operr_mask,		0x00002000		# operr exception mask
-set ovfl_mask,		0x00001000		# overflow exception mask
-set unfl_mask,		0x00000800		# underflow exception mask
-set dz_mask,		0x00000400		# dz exception mask
-set inex2_mask,		0x00000200		# inex2 exception mask
-set inex1_mask,		0x00000100		# inex1 exception mask
-
-set aiop_mask,		0x00000080		# accrued illegal operation
-set aovfl_mask,		0x00000040		# accrued overflow
-set aunfl_mask,		0x00000020		# accrued underflow
-set adz_mask,		0x00000010		# accrued divide by zero
-set ainex_mask,		0x00000008		# accrued inexact
-
-######################################
-# FPSR combinations used in the FPSP #
-######################################
-set dzinf_mask,		inf_mask+dz_mask+adz_mask
-set opnan_mask,		nan_mask+operr_mask+aiop_mask
-set nzi_mask,		0x01ffffff		#clears N, Z, and I
-set unfinx_mask,	unfl_mask+inex2_mask+aunfl_mask+ainex_mask
-set unf2inx_mask,	unfl_mask+inex2_mask+ainex_mask
-set ovfinx_mask,	ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
-set inx1a_mask,		inex1_mask+ainex_mask
-set inx2a_mask,		inex2_mask+ainex_mask
-set snaniop_mask,	nan_mask+snan_mask+aiop_mask
-set snaniop2_mask,	snan_mask+aiop_mask
-set naniop_mask,	nan_mask+aiop_mask
-set neginf_mask,	neg_mask+inf_mask
-set infaiop_mask,	inf_mask+aiop_mask
-set negz_mask,		neg_mask+z_mask
-set opaop_mask,		operr_mask+aiop_mask
-set unfl_inx_mask,	unfl_mask+aunfl_mask+ainex_mask
-set ovfl_inx_mask,	ovfl_mask+aovfl_mask+ainex_mask
-
-#########
-# misc. #
-#########
-set rnd_stky_bit,	29			# stky bit pos in longword
-
-set sign_bit,		0x7			# sign bit
-set signan_bit,		0x6			# signalling nan bit
-
-set sgl_thresh,		0x3f81			# minimum sgl exponent
-set dbl_thresh,		0x3c01			# minimum dbl exponent
-
-set x_mode,		0x0			# extended precision
-set s_mode,		0x4			# single precision
-set d_mode,		0x8			# double precision
-
-set rn_mode,		0x0			# round-to-nearest
-set rz_mode,		0x1			# round-to-zero
-set rm_mode,		0x2			# round-tp-minus-infinity
-set rp_mode,		0x3			# round-to-plus-infinity
-
-set mantissalen,	64			# length of mantissa in bits
-
-set BYTE,		1			# len(byte) == 1 byte
-set WORD,		2			# len(word) == 2 bytes
-set LONG,		4			# len(longword) == 2 bytes
-
-set BSUN_VEC,		0xc0			# bsun    vector offset
-set INEX_VEC,		0xc4			# inexact vector offset
-set DZ_VEC,		0xc8			# dz      vector offset
-set UNFL_VEC,		0xcc			# unfl    vector offset
-set OPERR_VEC,		0xd0			# operr   vector offset
-set OVFL_VEC,		0xd4			# ovfl    vector offset
-set SNAN_VEC,		0xd8			# snan    vector offset
-
-###########################
-# SPecial CONDition FLaGs #
-###########################
-set ftrapcc_flg,	0x01			# flag bit: ftrapcc exception
-set fbsun_flg,		0x02			# flag bit: bsun exception
-set mia7_flg,		0x04			# flag bit: (a7)+ <ea>
-set mda7_flg,		0x08			# flag bit: -(a7) <ea>
-set fmovm_flg,		0x40			# flag bit: fmovm instruction
-set immed_flg,		0x80			# flag bit: &<data> <ea>
-
-set ftrapcc_bit,	0x0
-set fbsun_bit,		0x1
-set mia7_bit,		0x2
-set mda7_bit,		0x3
-set immed_bit,		0x7
-
-##################################
-# TRANSCENDENTAL "LAST-OP" FLAGS #
-##################################
-set FMUL_OP,		0x0			# fmul instr performed last
-set FDIV_OP,		0x1			# fdiv performed last
-set FADD_OP,		0x2			# fadd performed last
-set FMOV_OP,		0x3			# fmov performed last
-
-#############
-# CONSTANTS #
-#############
-T1:	long		0x40C62D38,0xD3D64634	# 16381 LOG2 LEAD
-T2:	long		0x3D6F90AE,0xB1E75CC7	# 16381 LOG2 TRAIL
-
-PI:	long		0x40000000,0xC90FDAA2,0x2168C235,0x00000000
-PIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
-
-TWOBYPI:
-	long		0x3FE45F30,0x6DC9C883
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_fsins_
-_fsins_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L0_2s
-	bsr.l		ssin			# operand is a NORM
-	bra.b		_L0_6s
-_L0_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L0_3s			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L0_6s
-_L0_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L0_4s			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L0_6s
-_L0_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L0_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L0_6s
-_L0_5s:
-	bsr.l		ssind			# operand is a DENORM
-_L0_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fsind_
-_fsind_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L0_2d
-	bsr.l		ssin			# operand is a NORM
-	bra.b		_L0_6d
-_L0_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L0_3d			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L0_6d
-_L0_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L0_4d			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L0_6d
-_L0_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L0_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L0_6d
-_L0_5d:
-	bsr.l		ssind			# operand is a DENORM
-_L0_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fsinx_
-_fsinx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L0_2x
-	bsr.l		ssin			# operand is a NORM
-	bra.b		_L0_6x
-_L0_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L0_3x			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L0_6x
-_L0_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L0_4x			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L0_6x
-_L0_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L0_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L0_6x
-_L0_5x:
-	bsr.l		ssind			# operand is a DENORM
-_L0_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_fcoss_
-_fcoss_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L1_2s
-	bsr.l		scos			# operand is a NORM
-	bra.b		_L1_6s
-_L1_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L1_3s			# no
-	bsr.l		ld_pone			# yes
-	bra.b		_L1_6s
-_L1_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L1_4s			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L1_6s
-_L1_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L1_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L1_6s
-_L1_5s:
-	bsr.l		scosd			# operand is a DENORM
-_L1_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fcosd_
-_fcosd_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L1_2d
-	bsr.l		scos			# operand is a NORM
-	bra.b		_L1_6d
-_L1_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L1_3d			# no
-	bsr.l		ld_pone			# yes
-	bra.b		_L1_6d
-_L1_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L1_4d			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L1_6d
-_L1_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L1_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L1_6d
-_L1_5d:
-	bsr.l		scosd			# operand is a DENORM
-_L1_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fcosx_
-_fcosx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L1_2x
-	bsr.l		scos			# operand is a NORM
-	bra.b		_L1_6x
-_L1_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L1_3x			# no
-	bsr.l		ld_pone			# yes
-	bra.b		_L1_6x
-_L1_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L1_4x			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L1_6x
-_L1_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L1_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L1_6x
-_L1_5x:
-	bsr.l		scosd			# operand is a DENORM
-_L1_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_fsinhs_
-_fsinhs_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L2_2s
-	bsr.l		ssinh			# operand is a NORM
-	bra.b		_L2_6s
-_L2_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L2_3s			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L2_6s
-_L2_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L2_4s			# no
-	bsr.l		src_inf			# yes
-	bra.b		_L2_6s
-_L2_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L2_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L2_6s
-_L2_5s:
-	bsr.l		ssinhd			# operand is a DENORM
-_L2_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fsinhd_
-_fsinhd_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L2_2d
-	bsr.l		ssinh			# operand is a NORM
-	bra.b		_L2_6d
-_L2_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L2_3d			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L2_6d
-_L2_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L2_4d			# no
-	bsr.l		src_inf			# yes
-	bra.b		_L2_6d
-_L2_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L2_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L2_6d
-_L2_5d:
-	bsr.l		ssinhd			# operand is a DENORM
-_L2_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fsinhx_
-_fsinhx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L2_2x
-	bsr.l		ssinh			# operand is a NORM
-	bra.b		_L2_6x
-_L2_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L2_3x			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L2_6x
-_L2_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L2_4x			# no
-	bsr.l		src_inf			# yes
-	bra.b		_L2_6x
-_L2_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L2_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L2_6x
-_L2_5x:
-	bsr.l		ssinhd			# operand is a DENORM
-_L2_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_flognp1s_
-_flognp1s_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L3_2s
-	bsr.l		slognp1			# operand is a NORM
-	bra.b		_L3_6s
-_L3_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L3_3s			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L3_6s
-_L3_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L3_4s			# no
-	bsr.l		sopr_inf			# yes
-	bra.b		_L3_6s
-_L3_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L3_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L3_6s
-_L3_5s:
-	bsr.l		slognp1d			# operand is a DENORM
-_L3_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_flognp1d_
-_flognp1d_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L3_2d
-	bsr.l		slognp1			# operand is a NORM
-	bra.b		_L3_6d
-_L3_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L3_3d			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L3_6d
-_L3_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L3_4d			# no
-	bsr.l		sopr_inf			# yes
-	bra.b		_L3_6d
-_L3_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L3_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L3_6d
-_L3_5d:
-	bsr.l		slognp1d			# operand is a DENORM
-_L3_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_flognp1x_
-_flognp1x_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L3_2x
-	bsr.l		slognp1			# operand is a NORM
-	bra.b		_L3_6x
-_L3_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L3_3x			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L3_6x
-_L3_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L3_4x			# no
-	bsr.l		sopr_inf			# yes
-	bra.b		_L3_6x
-_L3_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L3_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L3_6x
-_L3_5x:
-	bsr.l		slognp1d			# operand is a DENORM
-_L3_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_fetoxm1s_
-_fetoxm1s_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L4_2s
-	bsr.l		setoxm1			# operand is a NORM
-	bra.b		_L4_6s
-_L4_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L4_3s			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L4_6s
-_L4_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L4_4s			# no
-	bsr.l		setoxm1i			# yes
-	bra.b		_L4_6s
-_L4_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L4_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L4_6s
-_L4_5s:
-	bsr.l		setoxm1d			# operand is a DENORM
-_L4_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fetoxm1d_
-_fetoxm1d_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L4_2d
-	bsr.l		setoxm1			# operand is a NORM
-	bra.b		_L4_6d
-_L4_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L4_3d			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L4_6d
-_L4_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L4_4d			# no
-	bsr.l		setoxm1i			# yes
-	bra.b		_L4_6d
-_L4_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L4_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L4_6d
-_L4_5d:
-	bsr.l		setoxm1d			# operand is a DENORM
-_L4_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fetoxm1x_
-_fetoxm1x_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L4_2x
-	bsr.l		setoxm1			# operand is a NORM
-	bra.b		_L4_6x
-_L4_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L4_3x			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L4_6x
-_L4_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L4_4x			# no
-	bsr.l		setoxm1i			# yes
-	bra.b		_L4_6x
-_L4_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L4_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L4_6x
-_L4_5x:
-	bsr.l		setoxm1d			# operand is a DENORM
-_L4_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_ftanhs_
-_ftanhs_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L5_2s
-	bsr.l		stanh			# operand is a NORM
-	bra.b		_L5_6s
-_L5_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L5_3s			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L5_6s
-_L5_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L5_4s			# no
-	bsr.l		src_one			# yes
-	bra.b		_L5_6s
-_L5_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L5_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L5_6s
-_L5_5s:
-	bsr.l		stanhd			# operand is a DENORM
-_L5_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_ftanhd_
-_ftanhd_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L5_2d
-	bsr.l		stanh			# operand is a NORM
-	bra.b		_L5_6d
-_L5_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L5_3d			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L5_6d
-_L5_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L5_4d			# no
-	bsr.l		src_one			# yes
-	bra.b		_L5_6d
-_L5_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L5_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L5_6d
-_L5_5d:
-	bsr.l		stanhd			# operand is a DENORM
-_L5_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_ftanhx_
-_ftanhx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L5_2x
-	bsr.l		stanh			# operand is a NORM
-	bra.b		_L5_6x
-_L5_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L5_3x			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L5_6x
-_L5_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L5_4x			# no
-	bsr.l		src_one			# yes
-	bra.b		_L5_6x
-_L5_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L5_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L5_6x
-_L5_5x:
-	bsr.l		stanhd			# operand is a DENORM
-_L5_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_fatans_
-_fatans_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L6_2s
-	bsr.l		satan			# operand is a NORM
-	bra.b		_L6_6s
-_L6_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L6_3s			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L6_6s
-_L6_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L6_4s			# no
-	bsr.l		spi_2			# yes
-	bra.b		_L6_6s
-_L6_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L6_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L6_6s
-_L6_5s:
-	bsr.l		satand			# operand is a DENORM
-_L6_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fatand_
-_fatand_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L6_2d
-	bsr.l		satan			# operand is a NORM
-	bra.b		_L6_6d
-_L6_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L6_3d			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L6_6d
-_L6_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L6_4d			# no
-	bsr.l		spi_2			# yes
-	bra.b		_L6_6d
-_L6_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L6_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L6_6d
-_L6_5d:
-	bsr.l		satand			# operand is a DENORM
-_L6_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fatanx_
-_fatanx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L6_2x
-	bsr.l		satan			# operand is a NORM
-	bra.b		_L6_6x
-_L6_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L6_3x			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L6_6x
-_L6_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L6_4x			# no
-	bsr.l		spi_2			# yes
-	bra.b		_L6_6x
-_L6_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L6_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L6_6x
-_L6_5x:
-	bsr.l		satand			# operand is a DENORM
-_L6_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_fasins_
-_fasins_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L7_2s
-	bsr.l		sasin			# operand is a NORM
-	bra.b		_L7_6s
-_L7_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L7_3s			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L7_6s
-_L7_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L7_4s			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L7_6s
-_L7_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L7_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L7_6s
-_L7_5s:
-	bsr.l		sasind			# operand is a DENORM
-_L7_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fasind_
-_fasind_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L7_2d
-	bsr.l		sasin			# operand is a NORM
-	bra.b		_L7_6d
-_L7_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L7_3d			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L7_6d
-_L7_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L7_4d			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L7_6d
-_L7_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L7_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L7_6d
-_L7_5d:
-	bsr.l		sasind			# operand is a DENORM
-_L7_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fasinx_
-_fasinx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L7_2x
-	bsr.l		sasin			# operand is a NORM
-	bra.b		_L7_6x
-_L7_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L7_3x			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L7_6x
-_L7_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L7_4x			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L7_6x
-_L7_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L7_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L7_6x
-_L7_5x:
-	bsr.l		sasind			# operand is a DENORM
-_L7_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_fatanhs_
-_fatanhs_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L8_2s
-	bsr.l		satanh			# operand is a NORM
-	bra.b		_L8_6s
-_L8_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L8_3s			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L8_6s
-_L8_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L8_4s			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L8_6s
-_L8_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L8_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L8_6s
-_L8_5s:
-	bsr.l		satanhd			# operand is a DENORM
-_L8_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fatanhd_
-_fatanhd_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L8_2d
-	bsr.l		satanh			# operand is a NORM
-	bra.b		_L8_6d
-_L8_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L8_3d			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L8_6d
-_L8_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L8_4d			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L8_6d
-_L8_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L8_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L8_6d
-_L8_5d:
-	bsr.l		satanhd			# operand is a DENORM
-_L8_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fatanhx_
-_fatanhx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L8_2x
-	bsr.l		satanh			# operand is a NORM
-	bra.b		_L8_6x
-_L8_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L8_3x			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L8_6x
-_L8_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L8_4x			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L8_6x
-_L8_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L8_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L8_6x
-_L8_5x:
-	bsr.l		satanhd			# operand is a DENORM
-_L8_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_ftans_
-_ftans_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L9_2s
-	bsr.l		stan			# operand is a NORM
-	bra.b		_L9_6s
-_L9_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L9_3s			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L9_6s
-_L9_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L9_4s			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L9_6s
-_L9_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L9_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L9_6s
-_L9_5s:
-	bsr.l		stand			# operand is a DENORM
-_L9_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_ftand_
-_ftand_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L9_2d
-	bsr.l		stan			# operand is a NORM
-	bra.b		_L9_6d
-_L9_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L9_3d			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L9_6d
-_L9_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L9_4d			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L9_6d
-_L9_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L9_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L9_6d
-_L9_5d:
-	bsr.l		stand			# operand is a DENORM
-_L9_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_ftanx_
-_ftanx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L9_2x
-	bsr.l		stan			# operand is a NORM
-	bra.b		_L9_6x
-_L9_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L9_3x			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L9_6x
-_L9_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L9_4x			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L9_6x
-_L9_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L9_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L9_6x
-_L9_5x:
-	bsr.l		stand			# operand is a DENORM
-_L9_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_fetoxs_
-_fetoxs_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L10_2s
-	bsr.l		setox			# operand is a NORM
-	bra.b		_L10_6s
-_L10_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L10_3s			# no
-	bsr.l		ld_pone			# yes
-	bra.b		_L10_6s
-_L10_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L10_4s			# no
-	bsr.l		szr_inf			# yes
-	bra.b		_L10_6s
-_L10_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L10_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L10_6s
-_L10_5s:
-	bsr.l		setoxd			# operand is a DENORM
-_L10_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fetoxd_
-_fetoxd_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L10_2d
-	bsr.l		setox			# operand is a NORM
-	bra.b		_L10_6d
-_L10_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L10_3d			# no
-	bsr.l		ld_pone			# yes
-	bra.b		_L10_6d
-_L10_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L10_4d			# no
-	bsr.l		szr_inf			# yes
-	bra.b		_L10_6d
-_L10_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L10_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L10_6d
-_L10_5d:
-	bsr.l		setoxd			# operand is a DENORM
-_L10_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fetoxx_
-_fetoxx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L10_2x
-	bsr.l		setox			# operand is a NORM
-	bra.b		_L10_6x
-_L10_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L10_3x			# no
-	bsr.l		ld_pone			# yes
-	bra.b		_L10_6x
-_L10_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L10_4x			# no
-	bsr.l		szr_inf			# yes
-	bra.b		_L10_6x
-_L10_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L10_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L10_6x
-_L10_5x:
-	bsr.l		setoxd			# operand is a DENORM
-_L10_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_ftwotoxs_
-_ftwotoxs_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L11_2s
-	bsr.l		stwotox			# operand is a NORM
-	bra.b		_L11_6s
-_L11_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L11_3s			# no
-	bsr.l		ld_pone			# yes
-	bra.b		_L11_6s
-_L11_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L11_4s			# no
-	bsr.l		szr_inf			# yes
-	bra.b		_L11_6s
-_L11_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L11_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L11_6s
-_L11_5s:
-	bsr.l		stwotoxd			# operand is a DENORM
-_L11_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_ftwotoxd_
-_ftwotoxd_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L11_2d
-	bsr.l		stwotox			# operand is a NORM
-	bra.b		_L11_6d
-_L11_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L11_3d			# no
-	bsr.l		ld_pone			# yes
-	bra.b		_L11_6d
-_L11_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L11_4d			# no
-	bsr.l		szr_inf			# yes
-	bra.b		_L11_6d
-_L11_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L11_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L11_6d
-_L11_5d:
-	bsr.l		stwotoxd			# operand is a DENORM
-_L11_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_ftwotoxx_
-_ftwotoxx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L11_2x
-	bsr.l		stwotox			# operand is a NORM
-	bra.b		_L11_6x
-_L11_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L11_3x			# no
-	bsr.l		ld_pone			# yes
-	bra.b		_L11_6x
-_L11_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L11_4x			# no
-	bsr.l		szr_inf			# yes
-	bra.b		_L11_6x
-_L11_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L11_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L11_6x
-_L11_5x:
-	bsr.l		stwotoxd			# operand is a DENORM
-_L11_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_ftentoxs_
-_ftentoxs_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L12_2s
-	bsr.l		stentox			# operand is a NORM
-	bra.b		_L12_6s
-_L12_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L12_3s			# no
-	bsr.l		ld_pone			# yes
-	bra.b		_L12_6s
-_L12_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L12_4s			# no
-	bsr.l		szr_inf			# yes
-	bra.b		_L12_6s
-_L12_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L12_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L12_6s
-_L12_5s:
-	bsr.l		stentoxd			# operand is a DENORM
-_L12_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_ftentoxd_
-_ftentoxd_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L12_2d
-	bsr.l		stentox			# operand is a NORM
-	bra.b		_L12_6d
-_L12_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L12_3d			# no
-	bsr.l		ld_pone			# yes
-	bra.b		_L12_6d
-_L12_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L12_4d			# no
-	bsr.l		szr_inf			# yes
-	bra.b		_L12_6d
-_L12_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L12_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L12_6d
-_L12_5d:
-	bsr.l		stentoxd			# operand is a DENORM
-_L12_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_ftentoxx_
-_ftentoxx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L12_2x
-	bsr.l		stentox			# operand is a NORM
-	bra.b		_L12_6x
-_L12_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L12_3x			# no
-	bsr.l		ld_pone			# yes
-	bra.b		_L12_6x
-_L12_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L12_4x			# no
-	bsr.l		szr_inf			# yes
-	bra.b		_L12_6x
-_L12_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L12_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L12_6x
-_L12_5x:
-	bsr.l		stentoxd			# operand is a DENORM
-_L12_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_flogns_
-_flogns_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L13_2s
-	bsr.l		slogn			# operand is a NORM
-	bra.b		_L13_6s
-_L13_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L13_3s			# no
-	bsr.l		t_dz2			# yes
-	bra.b		_L13_6s
-_L13_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L13_4s			# no
-	bsr.l		sopr_inf			# yes
-	bra.b		_L13_6s
-_L13_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L13_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L13_6s
-_L13_5s:
-	bsr.l		slognd			# operand is a DENORM
-_L13_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_flognd_
-_flognd_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L13_2d
-	bsr.l		slogn			# operand is a NORM
-	bra.b		_L13_6d
-_L13_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L13_3d			# no
-	bsr.l		t_dz2			# yes
-	bra.b		_L13_6d
-_L13_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L13_4d			# no
-	bsr.l		sopr_inf			# yes
-	bra.b		_L13_6d
-_L13_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L13_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L13_6d
-_L13_5d:
-	bsr.l		slognd			# operand is a DENORM
-_L13_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_flognx_
-_flognx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L13_2x
-	bsr.l		slogn			# operand is a NORM
-	bra.b		_L13_6x
-_L13_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L13_3x			# no
-	bsr.l		t_dz2			# yes
-	bra.b		_L13_6x
-_L13_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L13_4x			# no
-	bsr.l		sopr_inf			# yes
-	bra.b		_L13_6x
-_L13_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L13_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L13_6x
-_L13_5x:
-	bsr.l		slognd			# operand is a DENORM
-_L13_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_flog10s_
-_flog10s_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L14_2s
-	bsr.l		slog10			# operand is a NORM
-	bra.b		_L14_6s
-_L14_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L14_3s			# no
-	bsr.l		t_dz2			# yes
-	bra.b		_L14_6s
-_L14_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L14_4s			# no
-	bsr.l		sopr_inf			# yes
-	bra.b		_L14_6s
-_L14_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L14_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L14_6s
-_L14_5s:
-	bsr.l		slog10d			# operand is a DENORM
-_L14_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_flog10d_
-_flog10d_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L14_2d
-	bsr.l		slog10			# operand is a NORM
-	bra.b		_L14_6d
-_L14_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L14_3d			# no
-	bsr.l		t_dz2			# yes
-	bra.b		_L14_6d
-_L14_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L14_4d			# no
-	bsr.l		sopr_inf			# yes
-	bra.b		_L14_6d
-_L14_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L14_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L14_6d
-_L14_5d:
-	bsr.l		slog10d			# operand is a DENORM
-_L14_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_flog10x_
-_flog10x_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L14_2x
-	bsr.l		slog10			# operand is a NORM
-	bra.b		_L14_6x
-_L14_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L14_3x			# no
-	bsr.l		t_dz2			# yes
-	bra.b		_L14_6x
-_L14_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L14_4x			# no
-	bsr.l		sopr_inf			# yes
-	bra.b		_L14_6x
-_L14_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L14_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L14_6x
-_L14_5x:
-	bsr.l		slog10d			# operand is a DENORM
-_L14_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_flog2s_
-_flog2s_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L15_2s
-	bsr.l		slog2			# operand is a NORM
-	bra.b		_L15_6s
-_L15_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L15_3s			# no
-	bsr.l		t_dz2			# yes
-	bra.b		_L15_6s
-_L15_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L15_4s			# no
-	bsr.l		sopr_inf			# yes
-	bra.b		_L15_6s
-_L15_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L15_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L15_6s
-_L15_5s:
-	bsr.l		slog2d			# operand is a DENORM
-_L15_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_flog2d_
-_flog2d_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L15_2d
-	bsr.l		slog2			# operand is a NORM
-	bra.b		_L15_6d
-_L15_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L15_3d			# no
-	bsr.l		t_dz2			# yes
-	bra.b		_L15_6d
-_L15_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L15_4d			# no
-	bsr.l		sopr_inf			# yes
-	bra.b		_L15_6d
-_L15_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L15_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L15_6d
-_L15_5d:
-	bsr.l		slog2d			# operand is a DENORM
-_L15_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_flog2x_
-_flog2x_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L15_2x
-	bsr.l		slog2			# operand is a NORM
-	bra.b		_L15_6x
-_L15_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L15_3x			# no
-	bsr.l		t_dz2			# yes
-	bra.b		_L15_6x
-_L15_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L15_4x			# no
-	bsr.l		sopr_inf			# yes
-	bra.b		_L15_6x
-_L15_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L15_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L15_6x
-_L15_5x:
-	bsr.l		slog2d			# operand is a DENORM
-_L15_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_fcoshs_
-_fcoshs_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L16_2s
-	bsr.l		scosh			# operand is a NORM
-	bra.b		_L16_6s
-_L16_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L16_3s			# no
-	bsr.l		ld_pone			# yes
-	bra.b		_L16_6s
-_L16_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L16_4s			# no
-	bsr.l		ld_pinf			# yes
-	bra.b		_L16_6s
-_L16_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L16_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L16_6s
-_L16_5s:
-	bsr.l		scoshd			# operand is a DENORM
-_L16_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fcoshd_
-_fcoshd_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L16_2d
-	bsr.l		scosh			# operand is a NORM
-	bra.b		_L16_6d
-_L16_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L16_3d			# no
-	bsr.l		ld_pone			# yes
-	bra.b		_L16_6d
-_L16_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L16_4d			# no
-	bsr.l		ld_pinf			# yes
-	bra.b		_L16_6d
-_L16_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L16_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L16_6d
-_L16_5d:
-	bsr.l		scoshd			# operand is a DENORM
-_L16_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fcoshx_
-_fcoshx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L16_2x
-	bsr.l		scosh			# operand is a NORM
-	bra.b		_L16_6x
-_L16_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L16_3x			# no
-	bsr.l		ld_pone			# yes
-	bra.b		_L16_6x
-_L16_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L16_4x			# no
-	bsr.l		ld_pinf			# yes
-	bra.b		_L16_6x
-_L16_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L16_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L16_6x
-_L16_5x:
-	bsr.l		scoshd			# operand is a DENORM
-_L16_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_facoss_
-_facoss_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L17_2s
-	bsr.l		sacos			# operand is a NORM
-	bra.b		_L17_6s
-_L17_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L17_3s			# no
-	bsr.l		ld_ppi2			# yes
-	bra.b		_L17_6s
-_L17_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L17_4s			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L17_6s
-_L17_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L17_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L17_6s
-_L17_5s:
-	bsr.l		sacosd			# operand is a DENORM
-_L17_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_facosd_
-_facosd_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L17_2d
-	bsr.l		sacos			# operand is a NORM
-	bra.b		_L17_6d
-_L17_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L17_3d			# no
-	bsr.l		ld_ppi2			# yes
-	bra.b		_L17_6d
-_L17_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L17_4d			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L17_6d
-_L17_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L17_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L17_6d
-_L17_5d:
-	bsr.l		sacosd			# operand is a DENORM
-_L17_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_facosx_
-_facosx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L17_2x
-	bsr.l		sacos			# operand is a NORM
-	bra.b		_L17_6x
-_L17_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L17_3x			# no
-	bsr.l		ld_ppi2			# yes
-	bra.b		_L17_6x
-_L17_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L17_4x			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L17_6x
-_L17_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L17_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L17_6x
-_L17_5x:
-	bsr.l		sacosd			# operand is a DENORM
-_L17_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_fgetexps_
-_fgetexps_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L18_2s
-	bsr.l		sgetexp			# operand is a NORM
-	bra.b		_L18_6s
-_L18_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L18_3s			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L18_6s
-_L18_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L18_4s			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L18_6s
-_L18_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L18_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L18_6s
-_L18_5s:
-	bsr.l		sgetexpd			# operand is a DENORM
-_L18_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fgetexpd_
-_fgetexpd_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L18_2d
-	bsr.l		sgetexp			# operand is a NORM
-	bra.b		_L18_6d
-_L18_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L18_3d			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L18_6d
-_L18_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L18_4d			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L18_6d
-_L18_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L18_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L18_6d
-_L18_5d:
-	bsr.l		sgetexpd			# operand is a DENORM
-_L18_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fgetexpx_
-_fgetexpx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L18_2x
-	bsr.l		sgetexp			# operand is a NORM
-	bra.b		_L18_6x
-_L18_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L18_3x			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L18_6x
-_L18_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L18_4x			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L18_6x
-_L18_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L18_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L18_6x
-_L18_5x:
-	bsr.l		sgetexpd			# operand is a DENORM
-_L18_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_fgetmans_
-_fgetmans_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L19_2s
-	bsr.l		sgetman			# operand is a NORM
-	bra.b		_L19_6s
-_L19_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L19_3s			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L19_6s
-_L19_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L19_4s			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L19_6s
-_L19_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L19_5s			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L19_6s
-_L19_5s:
-	bsr.l		sgetmand			# operand is a DENORM
-_L19_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fgetmand_
-_fgetmand_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L19_2d
-	bsr.l		sgetman			# operand is a NORM
-	bra.b		_L19_6d
-_L19_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L19_3d			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L19_6d
-_L19_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L19_4d			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L19_6d
-_L19_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L19_5d			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L19_6d
-_L19_5d:
-	bsr.l		sgetmand			# operand is a DENORM
-_L19_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fgetmanx_
-_fgetmanx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L19_2x
-	bsr.l		sgetman			# operand is a NORM
-	bra.b		_L19_6x
-_L19_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L19_3x			# no
-	bsr.l		src_zero			# yes
-	bra.b		_L19_6x
-_L19_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L19_4x			# no
-	bsr.l		t_operr			# yes
-	bra.b		_L19_6x
-_L19_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L19_5x			# no
-	bsr.l		src_qnan			# yes
-	bra.b		_L19_6x
-_L19_5x:
-	bsr.l		sgetmand			# operand is a DENORM
-_L19_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# MONADIC TEMPLATE							#
-#########################################################################
-	global		_fsincoss_
-_fsincoss_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L20_2s
-	bsr.l		ssincos			# operand is a NORM
-	bra.b		_L20_6s
-_L20_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L20_3s			# no
-	bsr.l		ssincosz			# yes
-	bra.b		_L20_6s
-_L20_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L20_4s			# no
-	bsr.l		ssincosi			# yes
-	bra.b		_L20_6s
-_L20_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L20_5s			# no
-	bsr.l		ssincosqnan			# yes
-	bra.b		_L20_6s
-_L20_5s:
-	bsr.l		ssincosd			# operand is a DENORM
-_L20_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		&0x03,-(%sp)		# store off fp0/fp1
-	fmovm.x		(%sp)+,&0x40		# fp0 now in fp1
-	fmovm.x		(%sp)+,&0x80		# fp1 now in fp0
-	unlk		%a6
-	rts
-
-	global		_fsincosd_
-_fsincosd_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl input
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	mov.b		%d1,STAG(%a6)
-	tst.b		%d1
-	bne.b		_L20_2d
-	bsr.l		ssincos			# operand is a NORM
-	bra.b		_L20_6d
-_L20_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L20_3d			# no
-	bsr.l		ssincosz			# yes
-	bra.b		_L20_6d
-_L20_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L20_4d			# no
-	bsr.l		ssincosi			# yes
-	bra.b		_L20_6d
-_L20_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L20_5d			# no
-	bsr.l		ssincosqnan			# yes
-	bra.b		_L20_6d
-_L20_5d:
-	bsr.l		ssincosd			# operand is a DENORM
-_L20_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		&0x03,-(%sp)		# store off fp0/fp1
-	fmovm.x		(%sp)+,&0x40		# fp0 now in fp1
-	fmovm.x		(%sp)+,&0x80		# fp1 now in fp0
-	unlk		%a6
-	rts
-
-	global		_fsincosx_
-_fsincosx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext input
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.b		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	tst.b		%d1
-	bne.b		_L20_2x
-	bsr.l		ssincos			# operand is a NORM
-	bra.b		_L20_6x
-_L20_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L20_3x			# no
-	bsr.l		ssincosz			# yes
-	bra.b		_L20_6x
-_L20_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L20_4x			# no
-	bsr.l		ssincosi			# yes
-	bra.b		_L20_6x
-_L20_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L20_5x			# no
-	bsr.l		ssincosqnan			# yes
-	bra.b		_L20_6x
-_L20_5x:
-	bsr.l		ssincosd			# operand is a DENORM
-_L20_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		&0x03,-(%sp)		# store off fp0/fp1
-	fmovm.x		(%sp)+,&0x40		# fp0 now in fp1
-	fmovm.x		(%sp)+,&0x80		# fp1 now in fp0
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# DYADIC TEMPLATE							#
-#########################################################################
-	global		_frems_
-_frems_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl dst
-	fmov.x		%fp0,FP_DST(%a6)
-	lea		FP_DST(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,DTAG(%a6)
-
-	fmov.s		0xc(%a6),%fp0		# load sgl src
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.l		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src
-	lea		FP_DST(%a6),%a1		# pass ptr to dst
-
-	tst.b		%d1
-	bne.b		_L21_2s
-	bsr.l		srem_snorm			# operand is a NORM
-	bra.b		_L21_6s
-_L21_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L21_3s			# no
-	bsr.l		srem_szero			# yes
-	bra.b		_L21_6s
-_L21_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L21_4s			# no
-	bsr.l		srem_sinf			# yes
-	bra.b		_L21_6s
-_L21_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L21_5s			# no
-	bsr.l		sop_sqnan			# yes
-	bra.b		_L21_6s
-_L21_5s:
-	bsr.l		srem_sdnrm			# operand is a DENORM
-_L21_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fremd_
-_fremd_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl dst
-	fmov.x		%fp0,FP_DST(%a6)
-	lea		FP_DST(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,DTAG(%a6)
-
-	fmov.d		0x10(%a6),%fp0		# load dbl src
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.l		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src
-	lea		FP_DST(%a6),%a1		# pass ptr to dst
-
-	tst.b		%d1
-	bne.b		_L21_2d
-	bsr.l		srem_snorm			# operand is a NORM
-	bra.b		_L21_6d
-_L21_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L21_3d			# no
-	bsr.l		srem_szero			# yes
-	bra.b		_L21_6d
-_L21_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L21_4d			# no
-	bsr.l		srem_sinf			# yes
-	bra.b		_L21_6d
-_L21_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L21_5d			# no
-	bsr.l		sop_sqnan			# yes
-	bra.b		_L21_6d
-_L21_5d:
-	bsr.l		srem_sdnrm			# operand is a DENORM
-_L21_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fremx_
-_fremx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_DST(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext dst
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,DTAG(%a6)
-
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x14+0x0(%a6),0x0(%a0)	# load ext src
-	mov.l		0x14+0x4(%a6),0x4(%a0)
-	mov.l		0x14+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.l		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src
-	lea		FP_DST(%a6),%a1		# pass ptr to dst
-
-	tst.b		%d1
-	bne.b		_L21_2x
-	bsr.l		srem_snorm			# operand is a NORM
-	bra.b		_L21_6x
-_L21_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L21_3x			# no
-	bsr.l		srem_szero			# yes
-	bra.b		_L21_6x
-_L21_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L21_4x			# no
-	bsr.l		srem_sinf			# yes
-	bra.b		_L21_6x
-_L21_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L21_5x			# no
-	bsr.l		sop_sqnan			# yes
-	bra.b		_L21_6x
-_L21_5x:
-	bsr.l		srem_sdnrm			# operand is a DENORM
-_L21_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# DYADIC TEMPLATE							#
-#########################################################################
-	global		_fmods_
-_fmods_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl dst
-	fmov.x		%fp0,FP_DST(%a6)
-	lea		FP_DST(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,DTAG(%a6)
-
-	fmov.s		0xc(%a6),%fp0		# load sgl src
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.l		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src
-	lea		FP_DST(%a6),%a1		# pass ptr to dst
-
-	tst.b		%d1
-	bne.b		_L22_2s
-	bsr.l		smod_snorm			# operand is a NORM
-	bra.b		_L22_6s
-_L22_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L22_3s			# no
-	bsr.l		smod_szero			# yes
-	bra.b		_L22_6s
-_L22_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L22_4s			# no
-	bsr.l		smod_sinf			# yes
-	bra.b		_L22_6s
-_L22_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L22_5s			# no
-	bsr.l		sop_sqnan			# yes
-	bra.b		_L22_6s
-_L22_5s:
-	bsr.l		smod_sdnrm			# operand is a DENORM
-_L22_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fmodd_
-_fmodd_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl dst
-	fmov.x		%fp0,FP_DST(%a6)
-	lea		FP_DST(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,DTAG(%a6)
-
-	fmov.d		0x10(%a6),%fp0		# load dbl src
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.l		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src
-	lea		FP_DST(%a6),%a1		# pass ptr to dst
-
-	tst.b		%d1
-	bne.b		_L22_2d
-	bsr.l		smod_snorm			# operand is a NORM
-	bra.b		_L22_6d
-_L22_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L22_3d			# no
-	bsr.l		smod_szero			# yes
-	bra.b		_L22_6d
-_L22_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L22_4d			# no
-	bsr.l		smod_sinf			# yes
-	bra.b		_L22_6d
-_L22_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L22_5d			# no
-	bsr.l		sop_sqnan			# yes
-	bra.b		_L22_6d
-_L22_5d:
-	bsr.l		smod_sdnrm			# operand is a DENORM
-_L22_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fmodx_
-_fmodx_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_DST(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext dst
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,DTAG(%a6)
-
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x14+0x0(%a6),0x0(%a0)	# load ext src
-	mov.l		0x14+0x4(%a6),0x4(%a0)
-	mov.l		0x14+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.l		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src
-	lea		FP_DST(%a6),%a1		# pass ptr to dst
-
-	tst.b		%d1
-	bne.b		_L22_2x
-	bsr.l		smod_snorm			# operand is a NORM
-	bra.b		_L22_6x
-_L22_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L22_3x			# no
-	bsr.l		smod_szero			# yes
-	bra.b		_L22_6x
-_L22_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L22_4x			# no
-	bsr.l		smod_sinf			# yes
-	bra.b		_L22_6x
-_L22_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L22_5x			# no
-	bsr.l		sop_sqnan			# yes
-	bra.b		_L22_6x
-_L22_5x:
-	bsr.l		smod_sdnrm			# operand is a DENORM
-_L22_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# DYADIC TEMPLATE							#
-#########################################################################
-	global		_fscales_
-_fscales_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.s		0x8(%a6),%fp0		# load sgl dst
-	fmov.x		%fp0,FP_DST(%a6)
-	lea		FP_DST(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,DTAG(%a6)
-
-	fmov.s		0xc(%a6),%fp0		# load sgl src
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.l		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src
-	lea		FP_DST(%a6),%a1		# pass ptr to dst
-
-	tst.b		%d1
-	bne.b		_L23_2s
-	bsr.l		sscale_snorm			# operand is a NORM
-	bra.b		_L23_6s
-_L23_2s:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L23_3s			# no
-	bsr.l		sscale_szero			# yes
-	bra.b		_L23_6s
-_L23_3s:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L23_4s			# no
-	bsr.l		sscale_sinf			# yes
-	bra.b		_L23_6s
-_L23_4s:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L23_5s			# no
-	bsr.l		sop_sqnan			# yes
-	bra.b		_L23_6s
-_L23_5s:
-	bsr.l		sscale_sdnrm			# operand is a DENORM
-_L23_6s:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fscaled_
-_fscaled_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	fmov.d		0x8(%a6),%fp0		# load dbl dst
-	fmov.x		%fp0,FP_DST(%a6)
-	lea		FP_DST(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,DTAG(%a6)
-
-	fmov.d		0x10(%a6),%fp0		# load dbl src
-	fmov.x		%fp0,FP_SRC(%a6)
-	lea		FP_SRC(%a6),%a0
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.l		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src
-	lea		FP_DST(%a6),%a1		# pass ptr to dst
-
-	tst.b		%d1
-	bne.b		_L23_2d
-	bsr.l		sscale_snorm			# operand is a NORM
-	bra.b		_L23_6d
-_L23_2d:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L23_3d			# no
-	bsr.l		sscale_szero			# yes
-	bra.b		_L23_6d
-_L23_3d:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L23_4d			# no
-	bsr.l		sscale_sinf			# yes
-	bra.b		_L23_6d
-_L23_4d:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L23_5d			# no
-	bsr.l		sop_sqnan			# yes
-	bra.b		_L23_6d
-_L23_5d:
-	bsr.l		sscale_sdnrm			# operand is a DENORM
-_L23_6d:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-	global		_fscalex_
-_fscalex_:
-	link		%a6,&-LOCAL_SIZE
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FP0(%a6)	# save fp0/fp1
-
-	fmov.l		&0x0,%fpcr		# zero FPCR
-
-#
-#	copy, convert, and tag input argument
-#
-	lea		FP_DST(%a6),%a0
-	mov.l		0x8+0x0(%a6),0x0(%a0)	# load ext dst
-	mov.l		0x8+0x4(%a6),0x4(%a0)
-	mov.l		0x8+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,DTAG(%a6)
-
-	lea		FP_SRC(%a6),%a0
-	mov.l		0x14+0x0(%a6),0x0(%a0)	# load ext src
-	mov.l		0x14+0x4(%a6),0x4(%a0)
-	mov.l		0x14+0x8(%a6),0x8(%a0)
-	bsr.l		tag			# fetch operand type
-	mov.b		%d0,STAG(%a6)
-	mov.l		%d0,%d1
-
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd mode,prec
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src
-	lea		FP_DST(%a6),%a1		# pass ptr to dst
-
-	tst.b		%d1
-	bne.b		_L23_2x
-	bsr.l		sscale_snorm			# operand is a NORM
-	bra.b		_L23_6x
-_L23_2x:
-	cmpi.b		%d1,&ZERO		# is operand a ZERO?
-	bne.b		_L23_3x			# no
-	bsr.l		sscale_szero			# yes
-	bra.b		_L23_6x
-_L23_3x:
-	cmpi.b		%d1,&INF		# is operand an INF?
-	bne.b		_L23_4x			# no
-	bsr.l		sscale_sinf			# yes
-	bra.b		_L23_6x
-_L23_4x:
-	cmpi.b		%d1,&QNAN		# is operand a QNAN?
-	bne.b		_L23_5x			# no
-	bsr.l		sop_sqnan			# yes
-	bra.b		_L23_6x
-_L23_5x:
-	bsr.l		sscale_sdnrm			# operand is a DENORM
-_L23_6x:
-
-#
-#	Result is now in FP0
-#
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
-	fmovm.x		EXC_FP1(%a6),&0x40	# restore fp1
-	unlk		%a6
-	rts
-
-
-#########################################################################
-# ssin():     computes the sine of a normalized input			#
-# ssind():    computes the sine of a denormalized input			#
-# scos():     computes the cosine of a normalized input			#
-# scosd():    computes the cosine of a denormalized input		#
-# ssincos():  computes the sine and cosine of a normalized input	#
-# ssincosd(): computes the sine and cosine of a denormalized input	#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 = sin(X) or cos(X)						#
-#									#
-#    For ssincos(X):							#
-#	fp0 = sin(X)							#
-#	fp1 = cos(X)							#
-#									#
-# ACCURACY and MONOTONICITY ******************************************* #
-#	The returned result is within 1 ulp in 64 significant bit, i.e.	#
-#	within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#	SIN and COS:							#
-#	1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.	#
-#									#
-#	2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.			#
-#									#
-#	3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
-#		k = N mod 4, so in particular, k = 0,1,2,or 3.		#
-#		Overwrite k by k := k + AdjN.				#
-#									#
-#	4. If k is even, go to 6.					#
-#									#
-#	5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j.			#
-#		Return sgn*cos(r) where cos(r) is approximated by an	#
-#		even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)),	#
-#		s = r*r.						#
-#		Exit.							#
-#									#
-#	6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)	#
-#		where sin(r) is approximated by an odd polynomial in r	#
-#		r + r*s*(A1+s*(A2+ ... + s*A7)),	s = r*r.	#
-#		Exit.							#
-#									#
-#	7. If |X| > 1, go to 9.						#
-#									#
-#	8. (|X|<2**(-40)) If SIN is invoked, return X;			#
-#		otherwise return 1.					#
-#									#
-#	9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi,		#
-#		go back to 3.						#
-#									#
-#	SINCOS:								#
-#	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.			#
-#									#
-#	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
-#		k = N mod 4, so in particular, k = 0,1,2,or 3.		#
-#									#
-#	3. If k is even, go to 5.					#
-#									#
-#	4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie.	#
-#		j1 exclusive or with the l.s.b. of k.			#
-#		sgn1 := (-1)**j1, sgn2 := (-1)**j2.			#
-#		SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where	#
-#		sin(r) and cos(r) are computed as odd and even		#
-#		polynomials in r, respectively. Exit			#
-#									#
-#	5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.			#
-#		SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where	#
-#		sin(r) and cos(r) are computed as odd and even		#
-#		polynomials in r, respectively. Exit			#
-#									#
-#	6. If |X| > 1, go to 8.						#
-#									#
-#	7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.		#
-#									#
-#	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi,		#
-#		go back to 2.						#
-#									#
-#########################################################################
-
-SINA7:	long		0xBD6AAA77,0xCCC994F5
-SINA6:	long		0x3DE61209,0x7AAE8DA1
-SINA5:	long		0xBE5AE645,0x2A118AE4
-SINA4:	long		0x3EC71DE3,0xA5341531
-SINA3:	long		0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
-SINA2:	long		0x3FF80000,0x88888888,0x888859AF,0x00000000
-SINA1:	long		0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
-
-COSB8:	long		0x3D2AC4D0,0xD6011EE3
-COSB7:	long		0xBDA9396F,0x9F45AC19
-COSB6:	long		0x3E21EED9,0x0612C972
-COSB5:	long		0xBE927E4F,0xB79D9FCF
-COSB4:	long		0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
-COSB3:	long		0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
-COSB2:	long		0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
-COSB1:	long		0xBF000000
-
-	set		INARG,FP_SCR0
-
-	set		X,FP_SCR0
-#	set		XDCARE,X+2
-	set		XFRAC,X+4
-
-	set		RPRIME,FP_SCR0
-	set		SPRIME,FP_SCR1
-
-	set		POSNEG1,L_SCR1
-	set		TWOTO63,L_SCR1
-
-	set		ENDFLAG,L_SCR2
-	set		INT,L_SCR2
-
-	set		ADJN,L_SCR3
-
-############################################
-	global		ssin
-ssin:
-	mov.l		&0,ADJN(%a6)		# yes; SET ADJN TO 0
-	bra.b		SINBGN
-
-############################################
-	global		scos
-scos:
-	mov.l		&1,ADJN(%a6)		# yes; SET ADJN TO 1
-
-############################################
-SINBGN:
-#--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
-
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-	fmov.x		%fp0,X(%a6)		# save input at X
-
-# "COMPACTIFY" X
-	mov.l		(%a0),%d1		# put exp in hi word
-	mov.w		4(%a0),%d1		# fetch hi(man)
-	and.l		&0x7FFFFFFF,%d1		# strip sign
-
-	cmpi.l		%d1,&0x3FD78000		# is |X| >= 2**(-40)?
-	bge.b		SOK1			# no
-	bra.w		SINSM			# yes; input is very small
-
-SOK1:
-	cmp.l		%d1,&0x4004BC7E		# is |X| < 15 PI?
-	blt.b		SINMAIN			# no
-	bra.w		SREDUCEX		# yes; input is very large
-
-#--THIS IS THE USUAL CASE, |X| <= 15 PI.
-#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
-SINMAIN:
-	fmov.x		%fp0,%fp1
-	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
-
-	lea		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
-
-	fmov.l		%fp1,INT(%a6)		# CONVERT TO INTEGER
-
-	mov.l		INT(%a6),%d1		# make a copy of N
-	asl.l		&4,%d1			# N *= 16
-	add.l		%d1,%a1			# tbl_addr = a1 + (N*16)
-
-# A1 IS THE ADDRESS OF N*PIBY2
-# ...WHICH IS IN TWO PIECES Y1 & Y2
-	fsub.x		(%a1)+,%fp0		# X-Y1
-	fsub.s		(%a1),%fp0		# fp0 = R = (X-Y1)-Y2
-
-SINCONT:
-#--continuation from REDUCEX
-
-#--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
-	mov.l		INT(%a6),%d1
-	add.l		ADJN(%a6),%d1		# SEE IF D0 IS ODD OR EVEN
-	ror.l		&1,%d1			# D0 WAS ODD IFF D0 IS NEGATIVE
-	cmp.l		%d1,&0
-	blt.w		COSPOLY
-
-#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
-#--THEN WE RETURN	SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
-#--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
-#--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
-#--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
-#--WHERE T=S*S.
-#--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
-#--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
-SINPOLY:
-	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
-
-	fmov.x		%fp0,X(%a6)		# X IS R
-	fmul.x		%fp0,%fp0		# FP0 IS S
-
-	fmov.d		SINA7(%pc),%fp3
-	fmov.d		SINA6(%pc),%fp2
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# FP1 IS T
-
-	ror.l		&1,%d1
-	and.l		&0x80000000,%d1
-# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
-	eor.l		%d1,X(%a6)		# X IS NOW R'= SGN*R
-
-	fmul.x		%fp1,%fp3		# TA7
-	fmul.x		%fp1,%fp2		# TA6
-
-	fadd.d		SINA5(%pc),%fp3		# A5+TA7
-	fadd.d		SINA4(%pc),%fp2		# A4+TA6
-
-	fmul.x		%fp1,%fp3		# T(A5+TA7)
-	fmul.x		%fp1,%fp2		# T(A4+TA6)
-
-	fadd.d		SINA3(%pc),%fp3		# A3+T(A5+TA7)
-	fadd.x		SINA2(%pc),%fp2		# A2+T(A4+TA6)
-
-	fmul.x		%fp3,%fp1		# T(A3+T(A5+TA7))
-
-	fmul.x		%fp0,%fp2		# S(A2+T(A4+TA6))
-	fadd.x		SINA1(%pc),%fp1		# A1+T(A3+T(A5+TA7))
-	fmul.x		X(%a6),%fp0		# R'*S
-
-	fadd.x		%fp2,%fp1		# [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
-
-	fmul.x		%fp1,%fp0		# SIN(R')-R'
-
-	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
-
-	fmov.l		%d0,%fpcr		# restore users round mode,prec
-	fadd.x		X(%a6),%fp0		# last inst - possible exception set
-	bra		t_inx2
-
-#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
-#--THEN WE RETURN	SGN*COS(R). SGN*COS(R) IS COMPUTED BY
-#--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
-#--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
-#--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
-#--WHERE T=S*S.
-#--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
-#--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
-#--AND IS THEREFORE STORED AS SINGLE PRECISION.
-COSPOLY:
-	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
-
-	fmul.x		%fp0,%fp0		# FP0 IS S
-
-	fmov.d		COSB8(%pc),%fp2
-	fmov.d		COSB7(%pc),%fp3
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# FP1 IS T
-
-	fmov.x		%fp0,X(%a6)		# X IS S
-	ror.l		&1,%d1
-	and.l		&0x80000000,%d1
-# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
-
-	fmul.x		%fp1,%fp2		# TB8
-
-	eor.l		%d1,X(%a6)		# X IS NOW S'= SGN*S
-	and.l		&0x80000000,%d1
-
-	fmul.x		%fp1,%fp3		# TB7
-
-	or.l		&0x3F800000,%d1		# D0 IS SGN IN SINGLE
-	mov.l		%d1,POSNEG1(%a6)
-
-	fadd.d		COSB6(%pc),%fp2		# B6+TB8
-	fadd.d		COSB5(%pc),%fp3		# B5+TB7
-
-	fmul.x		%fp1,%fp2		# T(B6+TB8)
-	fmul.x		%fp1,%fp3		# T(B5+TB7)
-
-	fadd.d		COSB4(%pc),%fp2		# B4+T(B6+TB8)
-	fadd.x		COSB3(%pc),%fp3		# B3+T(B5+TB7)
-
-	fmul.x		%fp1,%fp2		# T(B4+T(B6+TB8))
-	fmul.x		%fp3,%fp1		# T(B3+T(B5+TB7))
-
-	fadd.x		COSB2(%pc),%fp2		# B2+T(B4+T(B6+TB8))
-	fadd.s		COSB1(%pc),%fp1		# B1+T(B3+T(B5+TB7))
-
-	fmul.x		%fp2,%fp0		# S(B2+T(B4+T(B6+TB8)))
-
-	fadd.x		%fp1,%fp0
-
-	fmul.x		X(%a6),%fp0
-
-	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
-
-	fmov.l		%d0,%fpcr		# restore users round mode,prec
-	fadd.s		POSNEG1(%a6),%fp0	# last inst - possible exception set
-	bra		t_inx2
-
-##############################################
-
-# SINe: Big OR Small?
-#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
-#--IF |X| < 2**(-40), RETURN X OR 1.
-SINBORS:
-	cmp.l		%d1,&0x3FFF8000
-	bgt.l		SREDUCEX
-
-SINSM:
-	mov.l		ADJN(%a6),%d1
-	cmp.l		%d1,&0
-	bgt.b		COSTINY
-
-# here, the operation may underflow iff the precision is sgl or dbl.
-# extended denorms are handled through another entry point.
-SINTINY:
-#	mov.w		&0x0000,XDCARE(%a6)	# JUST IN CASE
-
-	fmov.l		%d0,%fpcr		# restore users round mode,prec
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		X(%a6),%fp0		# last inst - possible exception set
-	bra		t_catch
-
-COSTINY:
-	fmov.s		&0x3F800000,%fp0	# fp0 = 1.0
-	fmov.l		%d0,%fpcr		# restore users round mode,prec
-	fadd.s		&0x80800000,%fp0	# last inst - possible exception set
-	bra		t_pinx2
-
-################################################
-	global		ssind
-#--SIN(X) = X FOR DENORMALIZED X
-ssind:
-	bra		t_extdnrm
-
-############################################
-	global		scosd
-#--COS(X) = 1 FOR DENORMALIZED X
-scosd:
-	fmov.s		&0x3F800000,%fp0	# fp0 = 1.0
-	bra		t_pinx2
-
-##################################################
-
-	global		ssincos
-ssincos:
-#--SET ADJN TO 4
-	mov.l		&4,ADJN(%a6)
-
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-	fmov.x		%fp0,X(%a6)
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	and.l		&0x7FFFFFFF,%d1		# COMPACTIFY X
-
-	cmp.l		%d1,&0x3FD78000		# |X| >= 2**(-40)?
-	bge.b		SCOK1
-	bra.w		SCSM
-
-SCOK1:
-	cmp.l		%d1,&0x4004BC7E		# |X| < 15 PI?
-	blt.b		SCMAIN
-	bra.w		SREDUCEX
-
-
-#--THIS IS THE USUAL CASE, |X| <= 15 PI.
-#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
-SCMAIN:
-	fmov.x		%fp0,%fp1
-
-	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
-
-	lea		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
-
-	fmov.l		%fp1,INT(%a6)		# CONVERT TO INTEGER
-
-	mov.l		INT(%a6),%d1
-	asl.l		&4,%d1
-	add.l		%d1,%a1			# ADDRESS OF N*PIBY2, IN Y1, Y2
-
-	fsub.x		(%a1)+,%fp0		# X-Y1
-	fsub.s		(%a1),%fp0		# FP0 IS R = (X-Y1)-Y2
-
-SCCONT:
-#--continuation point from REDUCEX
-
-	mov.l		INT(%a6),%d1
-	ror.l		&1,%d1
-	cmp.l		%d1,&0			# D0 < 0 IFF N IS ODD
-	bge.w		NEVEN
-
-SNODD:
-#--REGISTERS SAVED SO FAR: D0, A0, FP2.
-	fmovm.x		&0x04,-(%sp)		# save fp2
-
-	fmov.x		%fp0,RPRIME(%a6)
-	fmul.x		%fp0,%fp0		# FP0 IS S = R*R
-	fmov.d		SINA7(%pc),%fp1		# A7
-	fmov.d		COSB8(%pc),%fp2		# B8
-	fmul.x		%fp0,%fp1		# SA7
-	fmul.x		%fp0,%fp2		# SB8
-
-	mov.l		%d2,-(%sp)
-	mov.l		%d1,%d2
-	ror.l		&1,%d2
-	and.l		&0x80000000,%d2
-	eor.l		%d1,%d2
-	and.l		&0x80000000,%d2
-
-	fadd.d		SINA6(%pc),%fp1		# A6+SA7
-	fadd.d		COSB7(%pc),%fp2		# B7+SB8
-
-	fmul.x		%fp0,%fp1		# S(A6+SA7)
-	eor.l		%d2,RPRIME(%a6)
-	mov.l		(%sp)+,%d2
-	fmul.x		%fp0,%fp2		# S(B7+SB8)
-	ror.l		&1,%d1
-	and.l		&0x80000000,%d1
-	mov.l		&0x3F800000,POSNEG1(%a6)
-	eor.l		%d1,POSNEG1(%a6)
-
-	fadd.d		SINA5(%pc),%fp1		# A5+S(A6+SA7)
-	fadd.d		COSB6(%pc),%fp2		# B6+S(B7+SB8)
-
-	fmul.x		%fp0,%fp1		# S(A5+S(A6+SA7))
-	fmul.x		%fp0,%fp2		# S(B6+S(B7+SB8))
-	fmov.x		%fp0,SPRIME(%a6)
-
-	fadd.d		SINA4(%pc),%fp1		# A4+S(A5+S(A6+SA7))
-	eor.l		%d1,SPRIME(%a6)
-	fadd.d		COSB5(%pc),%fp2		# B5+S(B6+S(B7+SB8))
-
-	fmul.x		%fp0,%fp1		# S(A4+...)
-	fmul.x		%fp0,%fp2		# S(B5+...)
-
-	fadd.d		SINA3(%pc),%fp1		# A3+S(A4+...)
-	fadd.d		COSB4(%pc),%fp2		# B4+S(B5+...)
-
-	fmul.x		%fp0,%fp1		# S(A3+...)
-	fmul.x		%fp0,%fp2		# S(B4+...)
-
-	fadd.x		SINA2(%pc),%fp1		# A2+S(A3+...)
-	fadd.x		COSB3(%pc),%fp2		# B3+S(B4+...)
-
-	fmul.x		%fp0,%fp1		# S(A2+...)
-	fmul.x		%fp0,%fp2		# S(B3+...)
-
-	fadd.x		SINA1(%pc),%fp1		# A1+S(A2+...)
-	fadd.x		COSB2(%pc),%fp2		# B2+S(B3+...)
-
-	fmul.x		%fp0,%fp1		# S(A1+...)
-	fmul.x		%fp2,%fp0		# S(B2+...)
-
-	fmul.x		RPRIME(%a6),%fp1	# R'S(A1+...)
-	fadd.s		COSB1(%pc),%fp0		# B1+S(B2...)
-	fmul.x		SPRIME(%a6),%fp0	# S'(B1+S(B2+...))
-
-	fmovm.x		(%sp)+,&0x20		# restore fp2
-
-	fmov.l		%d0,%fpcr
-	fadd.x		RPRIME(%a6),%fp1	# COS(X)
-	bsr		sto_cos			# store cosine result
-	fadd.s		POSNEG1(%a6),%fp0	# SIN(X)
-	bra		t_inx2
-
-NEVEN:
-#--REGISTERS SAVED SO FAR: FP2.
-	fmovm.x		&0x04,-(%sp)		# save fp2
-
-	fmov.x		%fp0,RPRIME(%a6)
-	fmul.x		%fp0,%fp0		# FP0 IS S = R*R
-
-	fmov.d		COSB8(%pc),%fp1		# B8
-	fmov.d		SINA7(%pc),%fp2		# A7
-
-	fmul.x		%fp0,%fp1		# SB8
-	fmov.x		%fp0,SPRIME(%a6)
-	fmul.x		%fp0,%fp2		# SA7
-
-	ror.l		&1,%d1
-	and.l		&0x80000000,%d1
-
-	fadd.d		COSB7(%pc),%fp1		# B7+SB8
-	fadd.d		SINA6(%pc),%fp2		# A6+SA7
-
-	eor.l		%d1,RPRIME(%a6)
-	eor.l		%d1,SPRIME(%a6)
-
-	fmul.x		%fp0,%fp1		# S(B7+SB8)
-
-	or.l		&0x3F800000,%d1
-	mov.l		%d1,POSNEG1(%a6)
-
-	fmul.x		%fp0,%fp2		# S(A6+SA7)
-
-	fadd.d		COSB6(%pc),%fp1		# B6+S(B7+SB8)
-	fadd.d		SINA5(%pc),%fp2		# A5+S(A6+SA7)
-
-	fmul.x		%fp0,%fp1		# S(B6+S(B7+SB8))
-	fmul.x		%fp0,%fp2		# S(A5+S(A6+SA7))
-
-	fadd.d		COSB5(%pc),%fp1		# B5+S(B6+S(B7+SB8))
-	fadd.d		SINA4(%pc),%fp2		# A4+S(A5+S(A6+SA7))
-
-	fmul.x		%fp0,%fp1		# S(B5+...)
-	fmul.x		%fp0,%fp2		# S(A4+...)
-
-	fadd.d		COSB4(%pc),%fp1		# B4+S(B5+...)
-	fadd.d		SINA3(%pc),%fp2		# A3+S(A4+...)
-
-	fmul.x		%fp0,%fp1		# S(B4+...)
-	fmul.x		%fp0,%fp2		# S(A3+...)
-
-	fadd.x		COSB3(%pc),%fp1		# B3+S(B4+...)
-	fadd.x		SINA2(%pc),%fp2		# A2+S(A3+...)
-
-	fmul.x		%fp0,%fp1		# S(B3+...)
-	fmul.x		%fp0,%fp2		# S(A2+...)
-
-	fadd.x		COSB2(%pc),%fp1		# B2+S(B3+...)
-	fadd.x		SINA1(%pc),%fp2		# A1+S(A2+...)
-
-	fmul.x		%fp0,%fp1		# S(B2+...)
-	fmul.x		%fp2,%fp0		# s(a1+...)
-
-
-	fadd.s		COSB1(%pc),%fp1		# B1+S(B2...)
-	fmul.x		RPRIME(%a6),%fp0	# R'S(A1+...)
-	fmul.x		SPRIME(%a6),%fp1	# S'(B1+S(B2+...))
-
-	fmovm.x		(%sp)+,&0x20		# restore fp2
-
-	fmov.l		%d0,%fpcr
-	fadd.s		POSNEG1(%a6),%fp1	# COS(X)
-	bsr		sto_cos			# store cosine result
-	fadd.x		RPRIME(%a6),%fp0	# SIN(X)
-	bra		t_inx2
-
-################################################
-
-SCBORS:
-	cmp.l		%d1,&0x3FFF8000
-	bgt.w		SREDUCEX
-
-################################################
-
-SCSM:
-#	mov.w		&0x0000,XDCARE(%a6)
-	fmov.s		&0x3F800000,%fp1
-
-	fmov.l		%d0,%fpcr
-	fsub.s		&0x00800000,%fp1
-	bsr		sto_cos			# store cosine result
-	fmov.l		%fpcr,%d0		# d0 must have fpcr,too
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		X(%a6),%fp0
-	bra		t_catch
-
-##############################################
-
-	global		ssincosd
-#--SIN AND COS OF X FOR DENORMALIZED X
-ssincosd:
-	mov.l		%d0,-(%sp)		# save d0
-	fmov.s		&0x3F800000,%fp1
-	bsr		sto_cos			# store cosine result
-	mov.l		(%sp)+,%d0		# restore d0
-	bra		t_extdnrm
-
-############################################
-
-#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
-#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
-#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
-SREDUCEX:
-	fmovm.x		&0x3c,-(%sp)		# save {fp2-fp5}
-	mov.l		%d2,-(%sp)		# save d2
-	fmov.s		&0x00000000,%fp1	# fp1 = 0
-
-#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
-#--there is a danger of unwanted overflow in first LOOP iteration.  In this
-#--case, reduce argument by one remainder step to make subsequent reduction
-#--safe.
-	cmp.l		%d1,&0x7ffeffff		# is arg dangerously large?
-	bne.b		SLOOP			# no
-
-# yes; create 2**16383*PI/2
-	mov.w		&0x7ffe,FP_SCR0_EX(%a6)
-	mov.l		&0xc90fdaa2,FP_SCR0_HI(%a6)
-	clr.l		FP_SCR0_LO(%a6)
-
-# create low half of 2**16383*PI/2 at FP_SCR1
-	mov.w		&0x7fdc,FP_SCR1_EX(%a6)
-	mov.l		&0x85a308d3,FP_SCR1_HI(%a6)
-	clr.l		FP_SCR1_LO(%a6)
-
-	ftest.x		%fp0			# test sign of argument
-	fblt.w		sred_neg
-
-	or.b		&0x80,FP_SCR0_EX(%a6)	# positive arg
-	or.b		&0x80,FP_SCR1_EX(%a6)
-sred_neg:
-	fadd.x		FP_SCR0(%a6),%fp0	# high part of reduction is exact
-	fmov.x		%fp0,%fp1		# save high result in fp1
-	fadd.x		FP_SCR1(%a6),%fp0	# low part of reduction
-	fsub.x		%fp0,%fp1		# determine low component of result
-	fadd.x		FP_SCR1(%a6),%fp1	# fp0/fp1 are reduced argument.
-
-#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
-#--integer quotient will be stored in N
-#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
-SLOOP:
-	fmov.x		%fp0,INARG(%a6)		# +-2**K * F, 1 <= F < 2
-	mov.w		INARG(%a6),%d1
-	mov.l		%d1,%a1			# save a copy of D0
-	and.l		&0x00007FFF,%d1
-	sub.l		&0x00003FFF,%d1		# d0 = K
-	cmp.l		%d1,&28
-	ble.b		SLASTLOOP
-SCONTLOOP:
-	sub.l		&27,%d1			# d0 = L := K-27
-	mov.b		&0,ENDFLAG(%a6)
-	bra.b		SWORK
-SLASTLOOP:
-	clr.l		%d1			# d0 = L := 0
-	mov.b		&1,ENDFLAG(%a6)
-
-SWORK:
-#--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
-#--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
-
-#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
-#--2**L * (PIby2_1), 2**L * (PIby2_2)
-
-	mov.l		&0x00003FFE,%d2		# BIASED EXP OF 2/PI
-	sub.l		%d1,%d2			# BIASED EXP OF 2**(-L)*(2/PI)
-
-	mov.l		&0xA2F9836E,FP_SCR0_HI(%a6)
-	mov.l		&0x4E44152A,FP_SCR0_LO(%a6)
-	mov.w		%d2,FP_SCR0_EX(%a6)	# FP_SCR0 = 2**(-L)*(2/PI)
-
-	fmov.x		%fp0,%fp2
-	fmul.x		FP_SCR0(%a6),%fp2	# fp2 = X * 2**(-L)*(2/PI)
-
-#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
-#--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
-#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
-#--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
-#--US THE DESIRED VALUE IN FLOATING POINT.
-	mov.l		%a1,%d2
-	swap		%d2
-	and.l		&0x80000000,%d2
-	or.l		&0x5F000000,%d2		# d2 = SIGN(INARG)*2**63 IN SGL
-	mov.l		%d2,TWOTO63(%a6)
-	fadd.s		TWOTO63(%a6),%fp2	# THE FRACTIONAL PART OF FP1 IS ROUNDED
-	fsub.s		TWOTO63(%a6),%fp2	# fp2 = N
-#	fint.x		%fp2
-
-#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
-	mov.l		%d1,%d2			# d2 = L
-
-	add.l		&0x00003FFF,%d2		# BIASED EXP OF 2**L * (PI/2)
-	mov.w		%d2,FP_SCR0_EX(%a6)
-	mov.l		&0xC90FDAA2,FP_SCR0_HI(%a6)
-	clr.l		FP_SCR0_LO(%a6)		# FP_SCR0 = 2**(L) * Piby2_1
-
-	add.l		&0x00003FDD,%d1
-	mov.w		%d1,FP_SCR1_EX(%a6)
-	mov.l		&0x85A308D3,FP_SCR1_HI(%a6)
-	clr.l		FP_SCR1_LO(%a6)		# FP_SCR1 = 2**(L) * Piby2_2
-
-	mov.b		ENDFLAG(%a6),%d1
-
-#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
-#--P2 = 2**(L) * Piby2_2
-	fmov.x		%fp2,%fp4		# fp4 = N
-	fmul.x		FP_SCR0(%a6),%fp4	# fp4 = W = N*P1
-	fmov.x		%fp2,%fp5		# fp5 = N
-	fmul.x		FP_SCR1(%a6),%fp5	# fp5 = w = N*P2
-	fmov.x		%fp4,%fp3		# fp3 = W = N*P1
-
-#--we want P+p = W+w  but  |p| <= half ulp of P
-#--Then, we need to compute  A := R-P   and  a := r-p
-	fadd.x		%fp5,%fp3		# fp3 = P
-	fsub.x		%fp3,%fp4		# fp4 = W-P
-
-	fsub.x		%fp3,%fp0		# fp0 = A := R - P
-	fadd.x		%fp5,%fp4		# fp4 = p = (W-P)+w
-
-	fmov.x		%fp0,%fp3		# fp3 = A
-	fsub.x		%fp4,%fp1		# fp1 = a := r - p
-
-#--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
-#--|r| <= half ulp of R.
-	fadd.x		%fp1,%fp0		# fp0 = R := A+a
-#--No need to calculate r if this is the last loop
-	cmp.b		%d1,&0
-	bgt.w		SRESTORE
-
-#--Need to calculate r
-	fsub.x		%fp0,%fp3		# fp3 = A-R
-	fadd.x		%fp3,%fp1		# fp1 = r := (A-R)+a
-	bra.w		SLOOP
-
-SRESTORE:
-	fmov.l		%fp2,INT(%a6)
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		(%sp)+,&0x3c		# restore {fp2-fp5}
-
-	mov.l		ADJN(%a6),%d1
-	cmp.l		%d1,&4
-
-	blt.w		SINCONT
-	bra.w		SCCONT
-
-#########################################################################
-# stan():  computes the tangent of a normalized input			#
-# stand(): computes the tangent of a denormalized input			#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 = tan(X)							#
-#									#
-# ACCURACY and MONOTONICITY ******************************************* #
-#	The returned result is within 3 ulp in 64 significant bit, i.e. #
-#	within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM *********************************************************** #
-#									#
-#	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.			#
-#									#
-#	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
-#		k = N mod 2, so in particular, k = 0 or 1.		#
-#									#
-#	3. If k is odd, go to 5.					#
-#									#
-#	4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a	#
-#		rational function U/V where				#
-#		U = r + r*s*(P1 + s*(P2 + s*P3)), and			#
-#		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.	#
-#		Exit.							#
-#									#
-#	4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
-#		a rational function U/V where				#
-#		U = r + r*s*(P1 + s*(P2 + s*P3)), and			#
-#		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,	#
-#		-Cot(r) = -V/U. Exit.					#
-#									#
-#	6. If |X| > 1, go to 8.						#
-#									#
-#	7. (|X|<2**(-40)) Tan(X) = X. Exit.				#
-#									#
-#	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back	#
-#		to 2.							#
-#									#
-#########################################################################
-
-TANQ4:
-	long		0x3EA0B759,0xF50F8688
-TANP3:
-	long		0xBEF2BAA5,0xA8924F04
-
-TANQ3:
-	long		0xBF346F59,0xB39BA65F,0x00000000,0x00000000
-
-TANP2:
-	long		0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
-
-TANQ2:
-	long		0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
-
-TANP1:
-	long		0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
-
-TANQ1:
-	long		0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
-
-INVTWOPI:
-	long		0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
-
-TWOPI1:
-	long		0x40010000,0xC90FDAA2,0x00000000,0x00000000
-TWOPI2:
-	long		0x3FDF0000,0x85A308D4,0x00000000,0x00000000
-
-#--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
-#--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
-#--MOST 69 BITS LONG.
-#	global		PITBL
-PITBL:
-	long		0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
-	long		0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
-	long		0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
-	long		0xC0040000,0xB6365E22,0xEE46F000,0x21480000
-	long		0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
-	long		0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
-	long		0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
-	long		0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
-	long		0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
-	long		0xC0040000,0x90836524,0x88034B96,0x20B00000
-	long		0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
-	long		0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
-	long		0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
-	long		0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
-	long		0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
-	long		0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
-	long		0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
-	long		0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
-	long		0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
-	long		0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
-	long		0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
-	long		0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
-	long		0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
-	long		0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
-	long		0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
-	long		0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
-	long		0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
-	long		0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
-	long		0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
-	long		0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
-	long		0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
-	long		0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
-	long		0x00000000,0x00000000,0x00000000,0x00000000
-	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
-	long		0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
-	long		0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
-	long		0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
-	long		0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
-	long		0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
-	long		0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
-	long		0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
-	long		0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
-	long		0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
-	long		0x40030000,0x8A3AE64F,0x76F80584,0x21080000
-	long		0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
-	long		0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
-	long		0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
-	long		0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
-	long		0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
-	long		0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
-	long		0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
-	long		0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
-	long		0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
-	long		0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
-	long		0x40040000,0x8A3AE64F,0x76F80584,0x21880000
-	long		0x40040000,0x90836524,0x88034B96,0xA0B00000
-	long		0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
-	long		0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
-	long		0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
-	long		0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
-	long		0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
-	long		0x40040000,0xB6365E22,0xEE46F000,0xA1480000
-	long		0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
-	long		0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
-	long		0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
-
-	set		INARG,FP_SCR0
-
-	set		TWOTO63,L_SCR1
-	set		INT,L_SCR1
-	set		ENDFLAG,L_SCR2
-
-	global		stan
-stan:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	and.l		&0x7FFFFFFF,%d1
-
-	cmp.l		%d1,&0x3FD78000		# |X| >= 2**(-40)?
-	bge.b		TANOK1
-	bra.w		TANSM
-TANOK1:
-	cmp.l		%d1,&0x4004BC7E		# |X| < 15 PI?
-	blt.b		TANMAIN
-	bra.w		REDUCEX
-
-TANMAIN:
-#--THIS IS THE USUAL CASE, |X| <= 15 PI.
-#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
-	fmov.x		%fp0,%fp1
-	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
-
-	lea.l		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
-
-	fmov.l		%fp1,%d1		# CONVERT TO INTEGER
-
-	asl.l		&4,%d1
-	add.l		%d1,%a1			# ADDRESS N*PIBY2 IN Y1, Y2
-
-	fsub.x		(%a1)+,%fp0		# X-Y1
-
-	fsub.s		(%a1),%fp0		# FP0 IS R = (X-Y1)-Y2
-
-	ror.l		&5,%d1
-	and.l		&0x80000000,%d1		# D0 WAS ODD IFF D0 < 0
-
-TANCONT:
-	fmovm.x		&0x0c,-(%sp)		# save fp2,fp3
-
-	cmp.l		%d1,&0
-	blt.w		NODD
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# S = R*R
-
-	fmov.d		TANQ4(%pc),%fp3
-	fmov.d		TANP3(%pc),%fp2
-
-	fmul.x		%fp1,%fp3		# SQ4
-	fmul.x		%fp1,%fp2		# SP3
-
-	fadd.d		TANQ3(%pc),%fp3		# Q3+SQ4
-	fadd.x		TANP2(%pc),%fp2		# P2+SP3
-
-	fmul.x		%fp1,%fp3		# S(Q3+SQ4)
-	fmul.x		%fp1,%fp2		# S(P2+SP3)
-
-	fadd.x		TANQ2(%pc),%fp3		# Q2+S(Q3+SQ4)
-	fadd.x		TANP1(%pc),%fp2		# P1+S(P2+SP3)
-
-	fmul.x		%fp1,%fp3		# S(Q2+S(Q3+SQ4))
-	fmul.x		%fp1,%fp2		# S(P1+S(P2+SP3))
-
-	fadd.x		TANQ1(%pc),%fp3		# Q1+S(Q2+S(Q3+SQ4))
-	fmul.x		%fp0,%fp2		# RS(P1+S(P2+SP3))
-
-	fmul.x		%fp3,%fp1		# S(Q1+S(Q2+S(Q3+SQ4)))
-
-	fadd.x		%fp2,%fp0		# R+RS(P1+S(P2+SP3))
-
-	fadd.s		&0x3F800000,%fp1	# 1+S(Q1+...)
-
-	fmovm.x		(%sp)+,&0x30		# restore fp2,fp3
-
-	fmov.l		%d0,%fpcr		# restore users round mode,prec
-	fdiv.x		%fp1,%fp0		# last inst - possible exception set
-	bra		t_inx2
-
-NODD:
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp0,%fp0		# S = R*R
-
-	fmov.d		TANQ4(%pc),%fp3
-	fmov.d		TANP3(%pc),%fp2
-
-	fmul.x		%fp0,%fp3		# SQ4
-	fmul.x		%fp0,%fp2		# SP3
-
-	fadd.d		TANQ3(%pc),%fp3		# Q3+SQ4
-	fadd.x		TANP2(%pc),%fp2		# P2+SP3
-
-	fmul.x		%fp0,%fp3		# S(Q3+SQ4)
-	fmul.x		%fp0,%fp2		# S(P2+SP3)
-
-	fadd.x		TANQ2(%pc),%fp3		# Q2+S(Q3+SQ4)
-	fadd.x		TANP1(%pc),%fp2		# P1+S(P2+SP3)
-
-	fmul.x		%fp0,%fp3		# S(Q2+S(Q3+SQ4))
-	fmul.x		%fp0,%fp2		# S(P1+S(P2+SP3))
-
-	fadd.x		TANQ1(%pc),%fp3		# Q1+S(Q2+S(Q3+SQ4))
-	fmul.x		%fp1,%fp2		# RS(P1+S(P2+SP3))
-
-	fmul.x		%fp3,%fp0		# S(Q1+S(Q2+S(Q3+SQ4)))
-
-	fadd.x		%fp2,%fp1		# R+RS(P1+S(P2+SP3))
-	fadd.s		&0x3F800000,%fp0	# 1+S(Q1+...)
-
-	fmovm.x		(%sp)+,&0x30		# restore fp2,fp3
-
-	fmov.x		%fp1,-(%sp)
-	eor.l		&0x80000000,(%sp)
-
-	fmov.l		%d0,%fpcr		# restore users round mode,prec
-	fdiv.x		(%sp)+,%fp0		# last inst - possible exception set
-	bra		t_inx2
-
-TANBORS:
-#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
-#--IF |X| < 2**(-40), RETURN X OR 1.
-	cmp.l		%d1,&0x3FFF8000
-	bgt.b		REDUCEX
-
-TANSM:
-	fmov.x		%fp0,-(%sp)
-	fmov.l		%d0,%fpcr		# restore users round mode,prec
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		(%sp)+,%fp0		# last inst - posibble exception set
-	bra		t_catch
-
-	global		stand
-#--TAN(X) = X FOR DENORMALIZED X
-stand:
-	bra		t_extdnrm
-
-#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
-#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
-#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
-REDUCEX:
-	fmovm.x		&0x3c,-(%sp)		# save {fp2-fp5}
-	mov.l		%d2,-(%sp)		# save d2
-	fmov.s		&0x00000000,%fp1	# fp1 = 0
-
-#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
-#--there is a danger of unwanted overflow in first LOOP iteration.  In this
-#--case, reduce argument by one remainder step to make subsequent reduction
-#--safe.
-	cmp.l		%d1,&0x7ffeffff		# is arg dangerously large?
-	bne.b		LOOP			# no
-
-# yes; create 2**16383*PI/2
-	mov.w		&0x7ffe,FP_SCR0_EX(%a6)
-	mov.l		&0xc90fdaa2,FP_SCR0_HI(%a6)
-	clr.l		FP_SCR0_LO(%a6)
-
-# create low half of 2**16383*PI/2 at FP_SCR1
-	mov.w		&0x7fdc,FP_SCR1_EX(%a6)
-	mov.l		&0x85a308d3,FP_SCR1_HI(%a6)
-	clr.l		FP_SCR1_LO(%a6)
-
-	ftest.x		%fp0			# test sign of argument
-	fblt.w		red_neg
-
-	or.b		&0x80,FP_SCR0_EX(%a6)	# positive arg
-	or.b		&0x80,FP_SCR1_EX(%a6)
-red_neg:
-	fadd.x		FP_SCR0(%a6),%fp0	# high part of reduction is exact
-	fmov.x		%fp0,%fp1		# save high result in fp1
-	fadd.x		FP_SCR1(%a6),%fp0	# low part of reduction
-	fsub.x		%fp0,%fp1		# determine low component of result
-	fadd.x		FP_SCR1(%a6),%fp1	# fp0/fp1 are reduced argument.
-
-#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
-#--integer quotient will be stored in N
-#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
-LOOP:
-	fmov.x		%fp0,INARG(%a6)		# +-2**K * F, 1 <= F < 2
-	mov.w		INARG(%a6),%d1
-	mov.l		%d1,%a1			# save a copy of D0
-	and.l		&0x00007FFF,%d1
-	sub.l		&0x00003FFF,%d1		# d0 = K
-	cmp.l		%d1,&28
-	ble.b		LASTLOOP
-CONTLOOP:
-	sub.l		&27,%d1			# d0 = L := K-27
-	mov.b		&0,ENDFLAG(%a6)
-	bra.b		WORK
-LASTLOOP:
-	clr.l		%d1			# d0 = L := 0
-	mov.b		&1,ENDFLAG(%a6)
-
-WORK:
-#--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
-#--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
-
-#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
-#--2**L * (PIby2_1), 2**L * (PIby2_2)
-
-	mov.l		&0x00003FFE,%d2		# BIASED EXP OF 2/PI
-	sub.l		%d1,%d2			# BIASED EXP OF 2**(-L)*(2/PI)
-
-	mov.l		&0xA2F9836E,FP_SCR0_HI(%a6)
-	mov.l		&0x4E44152A,FP_SCR0_LO(%a6)
-	mov.w		%d2,FP_SCR0_EX(%a6)	# FP_SCR0 = 2**(-L)*(2/PI)
-
-	fmov.x		%fp0,%fp2
-	fmul.x		FP_SCR0(%a6),%fp2	# fp2 = X * 2**(-L)*(2/PI)
-
-#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
-#--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
-#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
-#--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
-#--US THE DESIRED VALUE IN FLOATING POINT.
-	mov.l		%a1,%d2
-	swap		%d2
-	and.l		&0x80000000,%d2
-	or.l		&0x5F000000,%d2		# d2 = SIGN(INARG)*2**63 IN SGL
-	mov.l		%d2,TWOTO63(%a6)
-	fadd.s		TWOTO63(%a6),%fp2	# THE FRACTIONAL PART OF FP1 IS ROUNDED
-	fsub.s		TWOTO63(%a6),%fp2	# fp2 = N
-#	fintrz.x	%fp2,%fp2
-
-#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
-	mov.l		%d1,%d2			# d2 = L
-
-	add.l		&0x00003FFF,%d2		# BIASED EXP OF 2**L * (PI/2)
-	mov.w		%d2,FP_SCR0_EX(%a6)
-	mov.l		&0xC90FDAA2,FP_SCR0_HI(%a6)
-	clr.l		FP_SCR0_LO(%a6)		# FP_SCR0 = 2**(L) * Piby2_1
-
-	add.l		&0x00003FDD,%d1
-	mov.w		%d1,FP_SCR1_EX(%a6)
-	mov.l		&0x85A308D3,FP_SCR1_HI(%a6)
-	clr.l		FP_SCR1_LO(%a6)		# FP_SCR1 = 2**(L) * Piby2_2
-
-	mov.b		ENDFLAG(%a6),%d1
-
-#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
-#--P2 = 2**(L) * Piby2_2
-	fmov.x		%fp2,%fp4		# fp4 = N
-	fmul.x		FP_SCR0(%a6),%fp4	# fp4 = W = N*P1
-	fmov.x		%fp2,%fp5		# fp5 = N
-	fmul.x		FP_SCR1(%a6),%fp5	# fp5 = w = N*P2
-	fmov.x		%fp4,%fp3		# fp3 = W = N*P1
-
-#--we want P+p = W+w  but  |p| <= half ulp of P
-#--Then, we need to compute  A := R-P   and  a := r-p
-	fadd.x		%fp5,%fp3		# fp3 = P
-	fsub.x		%fp3,%fp4		# fp4 = W-P
-
-	fsub.x		%fp3,%fp0		# fp0 = A := R - P
-	fadd.x		%fp5,%fp4		# fp4 = p = (W-P)+w
-
-	fmov.x		%fp0,%fp3		# fp3 = A
-	fsub.x		%fp4,%fp1		# fp1 = a := r - p
-
-#--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
-#--|r| <= half ulp of R.
-	fadd.x		%fp1,%fp0		# fp0 = R := A+a
-#--No need to calculate r if this is the last loop
-	cmp.b		%d1,&0
-	bgt.w		RESTORE
-
-#--Need to calculate r
-	fsub.x		%fp0,%fp3		# fp3 = A-R
-	fadd.x		%fp3,%fp1		# fp1 = r := (A-R)+a
-	bra.w		LOOP
-
-RESTORE:
-	fmov.l		%fp2,INT(%a6)
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		(%sp)+,&0x3c		# restore {fp2-fp5}
-
-	mov.l		INT(%a6),%d1
-	ror.l		&1,%d1
-
-	bra.w		TANCONT
-
-#########################################################################
-# satan():  computes the arctangent of a normalized number		#
-# satand(): computes the arctangent of a denormalized number		#
-#									#
-# INPUT	*************************************************************** #
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 = arctan(X)							#
-#									#
-# ACCURACY and MONOTONICITY ******************************************* #
-#	The returned result is within 2 ulps in	64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM *********************************************************** #
-#	Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.		#
-#									#
-#	Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x.			#
-#		Note that k = -4, -3,..., or 3.				#
-#		Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5	#
-#		significant bits of X with a bit-1 attached at the 6-th	#
-#		bit position. Define u to be u = (X-F) / (1 + X*F).	#
-#									#
-#	Step 3. Approximate arctan(u) by a polynomial poly.		#
-#									#
-#	Step 4. Return arctan(F) + poly, arctan(F) is fetched from a	#
-#		table of values calculated beforehand. Exit.		#
-#									#
-#	Step 5. If |X| >= 16, go to Step 7.				#
-#									#
-#	Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.	#
-#									#
-#	Step 7. Define X' = -1/X. Approximate arctan(X') by an odd	#
-#		polynomial in X'.					#
-#		Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.		#
-#									#
-#########################################################################
-
-ATANA3:	long		0xBFF6687E,0x314987D8
-ATANA2:	long		0x4002AC69,0x34A26DB3
-ATANA1:	long		0xBFC2476F,0x4E1DA28E
-
-ATANB6:	long		0x3FB34444,0x7F876989
-ATANB5:	long		0xBFB744EE,0x7FAF45DB
-ATANB4:	long		0x3FBC71C6,0x46940220
-ATANB3:	long		0xBFC24924,0x921872F9
-ATANB2:	long		0x3FC99999,0x99998FA9
-ATANB1:	long		0xBFD55555,0x55555555
-
-ATANC5:	long		0xBFB70BF3,0x98539E6A
-ATANC4:	long		0x3FBC7187,0x962D1D7D
-ATANC3:	long		0xBFC24924,0x827107B8
-ATANC2:	long		0x3FC99999,0x9996263E
-ATANC1:	long		0xBFD55555,0x55555536
-
-PPIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
-NPIBY2:	long		0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
-
-PTINY:	long		0x00010000,0x80000000,0x00000000,0x00000000
-NTINY:	long		0x80010000,0x80000000,0x00000000,0x00000000
-
-ATANTBL:
-	long		0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
-	long		0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
-	long		0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
-	long		0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
-	long		0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
-	long		0x3FFB0000,0xAB98E943,0x62765619,0x00000000
-	long		0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
-	long		0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
-	long		0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
-	long		0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
-	long		0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
-	long		0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
-	long		0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
-	long		0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
-	long		0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
-	long		0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
-	long		0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
-	long		0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
-	long		0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
-	long		0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
-	long		0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
-	long		0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
-	long		0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
-	long		0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
-	long		0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
-	long		0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
-	long		0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
-	long		0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
-	long		0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
-	long		0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
-	long		0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
-	long		0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
-	long		0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
-	long		0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
-	long		0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
-	long		0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
-	long		0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
-	long		0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
-	long		0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
-	long		0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
-	long		0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
-	long		0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
-	long		0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
-	long		0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
-	long		0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
-	long		0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
-	long		0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
-	long		0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
-	long		0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
-	long		0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
-	long		0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
-	long		0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
-	long		0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
-	long		0x3FFE0000,0x97731420,0x365E538C,0x00000000
-	long		0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
-	long		0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
-	long		0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
-	long		0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
-	long		0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
-	long		0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
-	long		0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
-	long		0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
-	long		0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
-	long		0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
-	long		0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
-	long		0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
-	long		0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
-	long		0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
-	long		0x3FFE0000,0xE8771129,0xC4353259,0x00000000
-	long		0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
-	long		0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
-	long		0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
-	long		0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
-	long		0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
-	long		0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
-	long		0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
-	long		0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
-	long		0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
-	long		0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
-	long		0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
-	long		0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
-	long		0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
-	long		0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
-	long		0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
-	long		0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
-	long		0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
-	long		0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
-	long		0x3FFF0000,0x9F100575,0x006CC571,0x00000000
-	long		0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
-	long		0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
-	long		0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
-	long		0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
-	long		0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
-	long		0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
-	long		0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
-	long		0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
-	long		0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
-	long		0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
-	long		0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
-	long		0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
-	long		0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
-	long		0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
-	long		0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
-	long		0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
-	long		0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
-	long		0x3FFF0000,0xB525529D,0x562246BD,0x00000000
-	long		0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
-	long		0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
-	long		0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
-	long		0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
-	long		0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
-	long		0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
-	long		0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
-	long		0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
-	long		0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
-	long		0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
-	long		0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
-	long		0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
-	long		0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
-	long		0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
-	long		0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
-	long		0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
-	long		0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
-	long		0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
-	long		0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
-	long		0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
-	long		0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
-	long		0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
-
-	set		X,FP_SCR0
-	set		XDCARE,X+2
-	set		XFRAC,X+4
-	set		XFRACLO,X+8
-
-	set		ATANF,FP_SCR1
-	set		ATANFHI,ATANF+4
-	set		ATANFLO,ATANF+8
-
-	global		satan
-#--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
-satan:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	fmov.x		%fp0,X(%a6)
-	and.l		&0x7FFFFFFF,%d1
-
-	cmp.l		%d1,&0x3FFB8000		# |X| >= 1/16?
-	bge.b		ATANOK1
-	bra.w		ATANSM
-
-ATANOK1:
-	cmp.l		%d1,&0x4002FFFF		# |X| < 16 ?
-	ble.b		ATANMAIN
-	bra.w		ATANBIG
-
-#--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
-#--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
-#--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
-#--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
-#--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
-#--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
-#--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
-#--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
-#--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
-#--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
-#--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
-#--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
-#--WILL INVOLVE A VERY LONG POLYNOMIAL.
-
-#--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
-#--WE CHOSE F TO BE +-2^K * 1.BBBB1
-#--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
-#--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
-#--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
-#-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
-
-ATANMAIN:
-
-	and.l		&0xF8000000,XFRAC(%a6)	# FIRST 5 BITS
-	or.l		&0x04000000,XFRAC(%a6)	# SET 6-TH BIT TO 1
-	mov.l		&0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
-
-	fmov.x		%fp0,%fp1		# FP1 IS X
-	fmul.x		X(%a6),%fp1		# FP1 IS X*F, NOTE THAT X*F > 0
-	fsub.x		X(%a6),%fp0		# FP0 IS X-F
-	fadd.s		&0x3F800000,%fp1	# FP1 IS 1 + X*F
-	fdiv.x		%fp1,%fp0		# FP0 IS U = (X-F)/(1+X*F)
-
-#--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
-#--CREATE ATAN(F) AND STORE IT IN ATANF, AND
-#--SAVE REGISTERS FP2.
-
-	mov.l		%d2,-(%sp)		# SAVE d2 TEMPORARILY
-	mov.l		%d1,%d2			# THE EXP AND 16 BITS OF X
-	and.l		&0x00007800,%d1		# 4 VARYING BITS OF F'S FRACTION
-	and.l		&0x7FFF0000,%d2		# EXPONENT OF F
-	sub.l		&0x3FFB0000,%d2		# K+4
-	asr.l		&1,%d2
-	add.l		%d2,%d1			# THE 7 BITS IDENTIFYING F
-	asr.l		&7,%d1			# INDEX INTO TBL OF ATAN(|F|)
-	lea		ATANTBL(%pc),%a1
-	add.l		%d1,%a1			# ADDRESS OF ATAN(|F|)
-	mov.l		(%a1)+,ATANF(%a6)
-	mov.l		(%a1)+,ATANFHI(%a6)
-	mov.l		(%a1)+,ATANFLO(%a6)	# ATANF IS NOW ATAN(|F|)
-	mov.l		X(%a6),%d1		# LOAD SIGN AND EXPO. AGAIN
-	and.l		&0x80000000,%d1		# SIGN(F)
-	or.l		%d1,ATANF(%a6)		# ATANF IS NOW SIGN(F)*ATAN(|F|)
-	mov.l		(%sp)+,%d2		# RESTORE d2
-
-#--THAT'S ALL I HAVE TO DO FOR NOW,
-#--BUT ALAS, THE DIVIDE IS STILL CRANKING!
-
-#--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
-#--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
-#--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
-#--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
-#--WHAT WE HAVE HERE IS MERELY	A1 = A3, A2 = A1/A3, A3 = A2/A3.
-#--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
-#--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
-
-	fmovm.x		&0x04,-(%sp)		# save fp2
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1
-	fmov.d		ATANA3(%pc),%fp2
-	fadd.x		%fp1,%fp2		# A3+V
-	fmul.x		%fp1,%fp2		# V*(A3+V)
-	fmul.x		%fp0,%fp1		# U*V
-	fadd.d		ATANA2(%pc),%fp2	# A2+V*(A3+V)
-	fmul.d		ATANA1(%pc),%fp1	# A1*U*V
-	fmul.x		%fp2,%fp1		# A1*U*V*(A2+V*(A3+V))
-	fadd.x		%fp1,%fp0		# ATAN(U), FP1 RELEASED
-
-	fmovm.x		(%sp)+,&0x20		# restore fp2
-
-	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
-	fadd.x		ATANF(%a6),%fp0		# ATAN(X)
-	bra		t_inx2
-
-ATANBORS:
-#--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
-#--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
-	cmp.l		%d1,&0x3FFF8000
-	bgt.w		ATANBIG			# I.E. |X| >= 16
-
-ATANSM:
-#--|X| <= 1/16
-#--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
-#--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
-#--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
-#--WHERE Y = X*X, AND Z = Y*Y.
-
-	cmp.l		%d1,&0x3FD78000
-	blt.w		ATANTINY
-
-#--COMPUTE POLYNOMIAL
-	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
-
-	fmul.x		%fp0,%fp0		# FPO IS Y = X*X
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# FP1 IS Z = Y*Y
-
-	fmov.d		ATANB6(%pc),%fp2
-	fmov.d		ATANB5(%pc),%fp3
-
-	fmul.x		%fp1,%fp2		# Z*B6
-	fmul.x		%fp1,%fp3		# Z*B5
-
-	fadd.d		ATANB4(%pc),%fp2	# B4+Z*B6
-	fadd.d		ATANB3(%pc),%fp3	# B3+Z*B5
-
-	fmul.x		%fp1,%fp2		# Z*(B4+Z*B6)
-	fmul.x		%fp3,%fp1		# Z*(B3+Z*B5)
-
-	fadd.d		ATANB2(%pc),%fp2	# B2+Z*(B4+Z*B6)
-	fadd.d		ATANB1(%pc),%fp1	# B1+Z*(B3+Z*B5)
-
-	fmul.x		%fp0,%fp2		# Y*(B2+Z*(B4+Z*B6))
-	fmul.x		X(%a6),%fp0		# X*Y
-
-	fadd.x		%fp2,%fp1		# [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
-
-	fmul.x		%fp1,%fp0		# X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
-
-	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
-
-	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
-	fadd.x		X(%a6),%fp0
-	bra		t_inx2
-
-ATANTINY:
-#--|X| < 2^(-40), ATAN(X) = X
-
-	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		X(%a6),%fp0		# last inst - possible exception set
-
-	bra		t_catch
-
-ATANBIG:
-#--IF |X| > 2^(100), RETURN	SIGN(X)*(PI/2 - TINY). OTHERWISE,
-#--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
-	cmp.l		%d1,&0x40638000
-	bgt.w		ATANHUGE
-
-#--APPROXIMATE ATAN(-1/X) BY
-#--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
-#--THIS CAN BE RE-WRITTEN AS
-#--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
-
-	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
-
-	fmov.s		&0xBF800000,%fp1	# LOAD -1
-	fdiv.x		%fp0,%fp1		# FP1 IS -1/X
-
-#--DIVIDE IS STILL CRANKING
-
-	fmov.x		%fp1,%fp0		# FP0 IS X'
-	fmul.x		%fp0,%fp0		# FP0 IS Y = X'*X'
-	fmov.x		%fp1,X(%a6)		# X IS REALLY X'
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# FP1 IS Z = Y*Y
-
-	fmov.d		ATANC5(%pc),%fp3
-	fmov.d		ATANC4(%pc),%fp2
-
-	fmul.x		%fp1,%fp3		# Z*C5
-	fmul.x		%fp1,%fp2		# Z*B4
-
-	fadd.d		ATANC3(%pc),%fp3	# C3+Z*C5
-	fadd.d		ATANC2(%pc),%fp2	# C2+Z*C4
-
-	fmul.x		%fp3,%fp1		# Z*(C3+Z*C5), FP3 RELEASED
-	fmul.x		%fp0,%fp2		# Y*(C2+Z*C4)
-
-	fadd.d		ATANC1(%pc),%fp1	# C1+Z*(C3+Z*C5)
-	fmul.x		X(%a6),%fp0		# X'*Y
-
-	fadd.x		%fp2,%fp1		# [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
-
-	fmul.x		%fp1,%fp0		# X'*Y*([B1+Z*(B3+Z*B5)]
-#					...	+[Y*(B2+Z*(B4+Z*B6))])
-	fadd.x		X(%a6),%fp0
-
-	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
-
-	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
-	tst.b		(%a0)
-	bpl.b		pos_big
-
-neg_big:
-	fadd.x		NPIBY2(%pc),%fp0
-	bra		t_minx2
-
-pos_big:
-	fadd.x		PPIBY2(%pc),%fp0
-	bra		t_pinx2
-
-ATANHUGE:
-#--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
-	tst.b		(%a0)
-	bpl.b		pos_huge
-
-neg_huge:
-	fmov.x		NPIBY2(%pc),%fp0
-	fmov.l		%d0,%fpcr
-	fadd.x		PTINY(%pc),%fp0
-	bra		t_minx2
-
-pos_huge:
-	fmov.x		PPIBY2(%pc),%fp0
-	fmov.l		%d0,%fpcr
-	fadd.x		NTINY(%pc),%fp0
-	bra		t_pinx2
-
-	global		satand
-#--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
-satand:
-	bra		t_extdnrm
-
-#########################################################################
-# sasin():  computes the inverse sine of a normalized input		#
-# sasind(): computes the inverse sine of a denormalized input		#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = arcsin(X)							#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 3 ulps in	64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#	ASIN								#
-#	1. If |X| >= 1, go to 3.					#
-#									#
-#	2. (|X| < 1) Calculate asin(X) by				#
-#		z := sqrt( [1-X][1+X] )					#
-#		asin(X) = atan( x / z ).				#
-#		Exit.							#
-#									#
-#	3. If |X| > 1, go to 5.						#
-#									#
-#	4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
-#									#
-#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
-#		Exit.							#
-#									#
-#########################################################################
-
-	global		sasin
-sasin:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	and.l		&0x7FFFFFFF,%d1
-	cmp.l		%d1,&0x3FFF8000
-	bge.b		ASINBIG
-
-# This catch is added here for the '060 QSP. Originally, the call to
-# satan() would handle this case by causing the exception which would
-# not be caught until gen_except(). Now, with the exceptions being
-# detected inside of satan(), the exception would have been handled there
-# instead of inside sasin() as expected.
-	cmp.l		%d1,&0x3FD78000
-	blt.w		ASINTINY
-
-#--THIS IS THE USUAL CASE, |X| < 1
-#--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
-
-ASINMAIN:
-	fmov.s		&0x3F800000,%fp1
-	fsub.x		%fp0,%fp1		# 1-X
-	fmovm.x		&0x4,-(%sp)		#  {fp2}
-	fmov.s		&0x3F800000,%fp2
-	fadd.x		%fp0,%fp2		# 1+X
-	fmul.x		%fp2,%fp1		# (1+X)(1-X)
-	fmovm.x		(%sp)+,&0x20		#  {fp2}
-	fsqrt.x		%fp1			# SQRT([1-X][1+X])
-	fdiv.x		%fp1,%fp0		# X/SQRT([1-X][1+X])
-	fmovm.x		&0x01,-(%sp)		# save X/SQRT(...)
-	lea		(%sp),%a0		# pass ptr to X/SQRT(...)
-	bsr		satan
-	add.l		&0xc,%sp		# clear X/SQRT(...) from stack
-	bra		t_inx2
-
-ASINBIG:
-	fabs.x		%fp0			# |X|
-	fcmp.s		%fp0,&0x3F800000
-	fbgt		t_operr			# cause an operr exception
-
-#--|X| = 1, ASIN(X) = +- PI/2.
-ASINONE:
-	fmov.x		PIBY2(%pc),%fp0
-	mov.l		(%a0),%d1
-	and.l		&0x80000000,%d1		# SIGN BIT OF X
-	or.l		&0x3F800000,%d1		# +-1 IN SGL FORMAT
-	mov.l		%d1,-(%sp)		# push SIGN(X) IN SGL-FMT
-	fmov.l		%d0,%fpcr
-	fmul.s		(%sp)+,%fp0
-	bra		t_inx2
-
-#--|X| < 2^(-40), ATAN(X) = X
-ASINTINY:
-	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		(%a0),%fp0		# last inst - possible exception
-	bra		t_catch
-
-	global		sasind
-#--ASIN(X) = X FOR DENORMALIZED X
-sasind:
-	bra		t_extdnrm
-
-#########################################################################
-# sacos():  computes the inverse cosine of a normalized input		#
-# sacosd(): computes the inverse cosine of a denormalized input		#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 = arccos(X)							#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 3 ulps in	64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM *********************************************************** #
-#									#
-#	ACOS								#
-#	1. If |X| >= 1, go to 3.					#
-#									#
-#	2. (|X| < 1) Calculate acos(X) by				#
-#		z := (1-X) / (1+X)					#
-#		acos(X) = 2 * atan( sqrt(z) ).				#
-#		Exit.							#
-#									#
-#	3. If |X| > 1, go to 5.						#
-#									#
-#	4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.	#
-#									#
-#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
-#		Exit.							#
-#									#
-#########################################################################
-
-	global		sacos
-sacos:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-
-	mov.l		(%a0),%d1		# pack exp w/ upper 16 fraction
-	mov.w		4(%a0),%d1
-	and.l		&0x7FFFFFFF,%d1
-	cmp.l		%d1,&0x3FFF8000
-	bge.b		ACOSBIG
-
-#--THIS IS THE USUAL CASE, |X| < 1
-#--ACOS(X) = 2 * ATAN(	SQRT( (1-X)/(1+X) ) )
-
-ACOSMAIN:
-	fmov.s		&0x3F800000,%fp1
-	fadd.x		%fp0,%fp1		# 1+X
-	fneg.x		%fp0			# -X
-	fadd.s		&0x3F800000,%fp0	# 1-X
-	fdiv.x		%fp1,%fp0		# (1-X)/(1+X)
-	fsqrt.x		%fp0			# SQRT((1-X)/(1+X))
-	mov.l		%d0,-(%sp)		# save original users fpcr
-	clr.l		%d0
-	fmovm.x		&0x01,-(%sp)		# save SQRT(...) to stack
-	lea		(%sp),%a0		# pass ptr to sqrt
-	bsr		satan			# ATAN(SQRT([1-X]/[1+X]))
-	add.l		&0xc,%sp		# clear SQRT(...) from stack
-
-	fmov.l		(%sp)+,%fpcr		# restore users round prec,mode
-	fadd.x		%fp0,%fp0		# 2 * ATAN( STUFF )
-	bra		t_pinx2
-
-ACOSBIG:
-	fabs.x		%fp0
-	fcmp.s		%fp0,&0x3F800000
-	fbgt		t_operr			# cause an operr exception
-
-#--|X| = 1, ACOS(X) = 0 OR PI
-	tst.b		(%a0)			# is X positive or negative?
-	bpl.b		ACOSP1
-
-#--X = -1
-#Returns PI and inexact exception
-ACOSM1:
-	fmov.x		PI(%pc),%fp0		# load PI
-	fmov.l		%d0,%fpcr		# load round mode,prec
-	fadd.s		&0x00800000,%fp0	# add a small value
-	bra		t_pinx2
-
-ACOSP1:
-	bra		ld_pzero		# answer is positive zero
-
-	global		sacosd
-#--ACOS(X) = PI/2 FOR DENORMALIZED X
-sacosd:
-	fmov.l		%d0,%fpcr		# load user's rnd mode/prec
-	fmov.x		PIBY2(%pc),%fp0
-	bra		t_pinx2
-
-#########################################################################
-# setox():    computes the exponential for a normalized input		#
-# setoxd():   computes the exponential for a denormalized input		#
-# setoxm1():  computes the exponential minus 1 for a normalized input	#
-# setoxm1d(): computes the exponential minus 1 for a denormalized input	#
-#									#
-# INPUT	*************************************************************** #
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 = exp(X) or exp(X)-1					#
-#									#
-# ACCURACY and MONOTONICITY ******************************************* #
-#	The returned result is within 0.85 ulps in 64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM and IMPLEMENTATION **************************************** #
-#									#
-#	setoxd								#
-#	------								#
-#	Step 1.	Set ans := 1.0						#
-#									#
-#	Step 2.	Return	ans := ans + sign(X)*2^(-126). Exit.		#
-#	Notes:	This will always generate one exception -- inexact.	#
-#									#
-#									#
-#	setox								#
-#	-----								#
-#									#
-#	Step 1.	Filter out extreme cases of input argument.		#
-#		1.1	If |X| >= 2^(-65), go to Step 1.3.		#
-#		1.2	Go to Step 7.					#
-#		1.3	If |X| < 16380 log(2), go to Step 2.		#
-#		1.4	Go to Step 8.					#
-#	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.#
-#		To avoid the use of floating-point comparisons, a	#
-#		compact representation of |X| is used. This format is a	#
-#		32-bit integer, the upper (more significant) 16 bits	#
-#		are the sign and biased exponent field of |X|; the	#
-#		lower 16 bits are the 16 most significant fraction	#
-#		(including the explicit bit) bits of |X|. Consequently,	#
-#		the comparisons in Steps 1.1 and 1.3 can be performed	#
-#		by integer comparison. Note also that the constant	#
-#		16380 log(2) used in Step 1.3 is also in the compact	#
-#		form. Thus taking the branch to Step 2 guarantees	#
-#		|X| < 16380 log(2). There is no harm to have a small	#
-#		number of cases where |X| is less than,	but close to,	#
-#		16380 log(2) and the branch to Step 9 is taken.		#
-#									#
-#	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).	#
-#		2.1	Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
-#			was taken)					#
-#		2.2	N := round-to-nearest-integer( X * 64/log2 ).	#
-#		2.3	Calculate	J = N mod 64; so J = 0,1,2,..., #
-#			or 63.						#
-#		2.4	Calculate	M = (N - J)/64; so N = 64M + J.	#
-#		2.5	Calculate the address of the stored value of	#
-#			2^(J/64).					#
-#		2.6	Create the value Scale = 2^M.			#
-#	Notes:	The calculation in 2.2 is really performed by		#
-#			Z := X * constant				#
-#			N := round-to-nearest-integer(Z)		#
-#		where							#
-#			constant := single-precision( 64/log 2 ).	#
-#									#
-#		Using a single-precision constant avoids memory		#
-#		access. Another effect of using a single-precision	#
-#		"constant" is that the calculated value Z is		#
-#									#
-#			Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).	#
-#									#
-#		This error has to be considered later in Steps 3 and 4.	#
-#									#
-#	Step 3.	Calculate X - N*log2/64.				#
-#		3.1	R := X + N*L1,					#
-#				where L1 := single-precision(-log2/64).	#
-#		3.2	R := R + N*L2,					#
-#				L2 := extended-precision(-log2/64 - L1).#
-#	Notes:	a) The way L1 and L2 are chosen ensures L1+L2		#
-#		approximate the value -log2/64 to 88 bits of accuracy.	#
-#		b) N*L1 is exact because N is no longer than 22 bits	#
-#		and L1 is no longer than 24 bits.			#
-#		c) The calculation X+N*L1 is also exact due to		#
-#		cancellation. Thus, R is practically X+N(L1+L2) to full	#
-#		64 bits.						#
-#		d) It is important to estimate how large can |R| be	#
-#		after Step 3.2.						#
-#									#
-#		N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)	#
-#		X*64/log2 (1+eps)	=	N + f,	|f| <= 0.5	#
-#		X*64/log2 - N	=	f - eps*X 64/log2		#
-#		X - N*log2/64	=	f*log2/64 - eps*X		#
-#									#
-#									#
-#		Now |X| <= 16446 log2, thus				#
-#									#
-#			|X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64	#
-#					<= 0.57 log2/64.		#
-#		 This bound will be used in Step 4.			#
-#									#
-#	Step 4.	Approximate exp(R)-1 by a polynomial			#
-#		p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))	#
-#	Notes:	a) In order to reduce memory access, the coefficients	#
-#		are made as "short" as possible: A1 (which is 1/2), A4	#
-#		and A5 are single precision; A2 and A3 are double	#
-#		precision.						#
-#		b) Even with the restrictions above,			#
-#		   |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.	#
-#		Note that 0.0062 is slightly bigger than 0.57 log2/64.	#
-#		c) To fully utilize the pipeline, p is separated into	#
-#		two independent pieces of roughly equal complexities	#
-#			p = [ R + R*S*(A2 + S*A4) ]	+		#
-#				[ S*(A1 + S*(A3 + S*A5)) ]		#
-#		where S = R*R.						#
-#									#
-#	Step 5.	Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by		#
-#				ans := T + ( T*p + t)			#
-#		where T and t are the stored values for 2^(J/64).	#
-#	Notes:	2^(J/64) is stored as T and t where T+t approximates	#
-#		2^(J/64) to roughly 85 bits; T is in extended precision	#
-#		and t is in single precision. Note also that T is	#
-#		rounded to 62 bits so that the last two bits of T are	#
-#		zero. The reason for such a special form is that T-1,	#
-#		T-2, and T-8 will all be exact --- a property that will	#
-#		give much more accurate computation of the function	#
-#		EXPM1.							#
-#									#
-#	Step 6.	Reconstruction of exp(X)				#
-#			exp(X) = 2^M * 2^(J/64) * exp(R).		#
-#		6.1	If AdjFlag = 0, go to 6.3			#
-#		6.2	ans := ans * AdjScale				#
-#		6.3	Restore the user FPCR				#
-#		6.4	Return ans := ans * Scale. Exit.		#
-#	Notes:	If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,	#
-#		|M| <= 16380, and Scale = 2^M. Moreover, exp(X) will	#
-#		neither overflow nor underflow. If AdjFlag = 1, that	#
-#		means that						#
-#			X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380.	#
-#		Hence, exp(X) may overflow or underflow or neither.	#
-#		When that is the case, AdjScale = 2^(M1) where M1 is	#
-#		approximately M. Thus 6.2 will never cause		#
-#		over/underflow. Possible exception in 6.4 is overflow	#
-#		or underflow. The inexact exception is not generated in	#
-#		6.4. Although one can argue that the inexact flag	#
-#		should always be raised, to simulate that exception	#
-#		cost to much than the flag is worth in practical uses.	#
-#									#
-#	Step 7.	Return 1 + X.						#
-#		7.1	ans := X					#
-#		7.2	Restore user FPCR.				#
-#		7.3	Return ans := 1 + ans. Exit			#
-#	Notes:	For non-zero X, the inexact exception will always be	#
-#		raised by 7.3. That is the only exception raised by 7.3.#
-#		Note also that we use the FMOVEM instruction to move X	#
-#		in Step 7.1 to avoid unnecessary trapping. (Although	#
-#		the FMOVEM may not seem relevant since X is normalized,	#
-#		the precaution will be useful in the library version of	#
-#		this code where the separate entry for denormalized	#
-#		inputs will be done away with.)				#
-#									#
-#	Step 8.	Handle exp(X) where |X| >= 16380log2.			#
-#		8.1	If |X| > 16480 log2, go to Step 9.		#
-#		(mimic 2.2 - 2.6)					#
-#		8.2	N := round-to-integer( X * 64/log2 )		#
-#		8.3	Calculate J = N mod 64, J = 0,1,...,63		#
-#		8.4	K := (N-J)/64, M1 := truncate(K/2), M = K-M1,	#
-#			AdjFlag := 1.					#
-#		8.5	Calculate the address of the stored value	#
-#			2^(J/64).					#
-#		8.6	Create the values Scale = 2^M, AdjScale = 2^M1.	#
-#		8.7	Go to Step 3.					#
-#	Notes:	Refer to notes for 2.2 - 2.6.				#
-#									#
-#	Step 9.	Handle exp(X), |X| > 16480 log2.			#
-#		9.1	If X < 0, go to 9.3				#
-#		9.2	ans := Huge, go to 9.4				#
-#		9.3	ans := Tiny.					#
-#		9.4	Restore user FPCR.				#
-#		9.5	Return ans := ans * ans. Exit.			#
-#	Notes:	Exp(X) will surely overflow or underflow, depending on	#
-#		X's sign. "Huge" and "Tiny" are respectively large/tiny	#
-#		extended-precision numbers whose square over/underflow	#
-#		with an inexact result. Thus, 9.5 always raises the	#
-#		inexact together with either overflow or underflow.	#
-#									#
-#	setoxm1d							#
-#	--------							#
-#									#
-#	Step 1.	Set ans := 0						#
-#									#
-#	Step 2.	Return	ans := X + ans. Exit.				#
-#	Notes:	This will return X with the appropriate rounding	#
-#		 precision prescribed by the user FPCR.			#
-#									#
-#	setoxm1								#
-#	-------								#
-#									#
-#	Step 1.	Check |X|						#
-#		1.1	If |X| >= 1/4, go to Step 1.3.			#
-#		1.2	Go to Step 7.					#
-#		1.3	If |X| < 70 log(2), go to Step 2.		#
-#		1.4	Go to Step 10.					#
-#	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.#
-#		However, it is conceivable |X| can be small very often	#
-#		because EXPM1 is intended to evaluate exp(X)-1		#
-#		accurately when |X| is small. For further details on	#
-#		the comparisons, see the notes on Step 1 of setox.	#
-#									#
-#	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).	#
-#		2.1	N := round-to-nearest-integer( X * 64/log2 ).	#
-#		2.2	Calculate	J = N mod 64; so J = 0,1,2,..., #
-#			or 63.						#
-#		2.3	Calculate	M = (N - J)/64; so N = 64M + J.	#
-#		2.4	Calculate the address of the stored value of	#
-#			2^(J/64).					#
-#		2.5	Create the values Sc = 2^M and			#
-#			OnebySc := -2^(-M).				#
-#	Notes:	See the notes on Step 2 of setox.			#
-#									#
-#	Step 3.	Calculate X - N*log2/64.				#
-#		3.1	R := X + N*L1,					#
-#				where L1 := single-precision(-log2/64).	#
-#		3.2	R := R + N*L2,					#
-#				L2 := extended-precision(-log2/64 - L1).#
-#	Notes:	Applying the analysis of Step 3 of setox in this case	#
-#		shows that |R| <= 0.0055 (note that |X| <= 70 log2 in	#
-#		this case).						#
-#									#
-#	Step 4.	Approximate exp(R)-1 by a polynomial			#
-#			p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6)))))	#
-#	Notes:	a) In order to reduce memory access, the coefficients	#
-#		are made as "short" as possible: A1 (which is 1/2), A5	#
-#		and A6 are single precision; A2, A3 and A4 are double	#
-#		precision.						#
-#		b) Even with the restriction above,			#
-#			|p - (exp(R)-1)| <	|R| * 2^(-72.7)		#
-#		for all |R| <= 0.0055.					#
-#		c) To fully utilize the pipeline, p is separated into	#
-#		two independent pieces of roughly equal complexity	#
-#			p = [ R*S*(A2 + S*(A4 + S*A6)) ]	+	#
-#				[ R + S*(A1 + S*(A3 + S*A5)) ]		#
-#		where S = R*R.						#
-#									#
-#	Step 5.	Compute 2^(J/64)*p by					#
-#				p := T*p				#
-#		where T and t are the stored values for 2^(J/64).	#
-#	Notes:	2^(J/64) is stored as T and t where T+t approximates	#
-#		2^(J/64) to roughly 85 bits; T is in extended precision	#
-#		and t is in single precision. Note also that T is	#
-#		rounded to 62 bits so that the last two bits of T are	#
-#		zero. The reason for such a special form is that T-1,	#
-#		T-2, and T-8 will all be exact --- a property that will	#
-#		be exploited in Step 6 below. The total relative error	#
-#		in p is no bigger than 2^(-67.7) compared to the final	#
-#		result.							#
-#									#
-#	Step 6.	Reconstruction of exp(X)-1				#
-#			exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).	#
-#		6.1	If M <= 63, go to Step 6.3.			#
-#		6.2	ans := T + (p + (t + OnebySc)). Go to 6.6	#
-#		6.3	If M >= -3, go to 6.5.				#
-#		6.4	ans := (T + (p + t)) + OnebySc. Go to 6.6	#
-#		6.5	ans := (T + OnebySc) + (p + t).			#
-#		6.6	Restore user FPCR.				#
-#		6.7	Return ans := Sc * ans. Exit.			#
-#	Notes:	The various arrangements of the expressions give	#
-#		accurate evaluations.					#
-#									#
-#	Step 7.	exp(X)-1 for |X| < 1/4.					#
-#		7.1	If |X| >= 2^(-65), go to Step 9.		#
-#		7.2	Go to Step 8.					#
-#									#
-#	Step 8.	Calculate exp(X)-1, |X| < 2^(-65).			#
-#		8.1	If |X| < 2^(-16312), goto 8.3			#
-#		8.2	Restore FPCR; return ans := X - 2^(-16382).	#
-#			Exit.						#
-#		8.3	X := X * 2^(140).				#
-#		8.4	Restore FPCR; ans := ans - 2^(-16382).		#
-#		 Return ans := ans*2^(140). Exit			#
-#	Notes:	The idea is to return "X - tiny" under the user		#
-#		precision and rounding modes. To avoid unnecessary	#
-#		inefficiency, we stay away from denormalized numbers	#
-#		the best we can. For |X| >= 2^(-16312), the		#
-#		straightforward 8.2 generates the inexact exception as	#
-#		the case warrants.					#
-#									#
-#	Step 9.	Calculate exp(X)-1, |X| < 1/4, by a polynomial		#
-#			p = X + X*X*(B1 + X*(B2 + ... + X*B12))		#
-#	Notes:	a) In order to reduce memory access, the coefficients	#
-#		are made as "short" as possible: B1 (which is 1/2), B9	#
-#		to B12 are single precision; B3 to B8 are double	#
-#		precision; and B2 is double extended.			#
-#		b) Even with the restriction above,			#
-#			|p - (exp(X)-1)| < |X| 2^(-70.6)		#
-#		for all |X| <= 0.251.					#
-#		Note that 0.251 is slightly bigger than 1/4.		#
-#		c) To fully preserve accuracy, the polynomial is	#
-#		computed as						#
-#			X + ( S*B1 +	Q ) where S = X*X and		#
-#			Q	=	X*S*(B2 + X*(B3 + ... + X*B12))	#
-#		d) To fully utilize the pipeline, Q is separated into	#
-#		two independent pieces of roughly equal complexity	#
-#			Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +	#
-#				[ S*S*(B3 + S*(B5 + ... + S*B11)) ]	#
-#									#
-#	Step 10. Calculate exp(X)-1 for |X| >= 70 log 2.		#
-#		10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all	#
-#		practical purposes. Therefore, go to Step 1 of setox.	#
-#		10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical	#
-#		purposes.						#
-#		ans := -1						#
-#		Restore user FPCR					#
-#		Return ans := ans + 2^(-126). Exit.			#
-#	Notes:	10.2 will always create an inexact and return -1 + tiny	#
-#		in the user rounding precision and mode.		#
-#									#
-#########################################################################
-
-L2:	long		0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
-
-EEXPA3:	long		0x3FA55555,0x55554CC1
-EEXPA2:	long		0x3FC55555,0x55554A54
-
-EM1A4:	long		0x3F811111,0x11174385
-EM1A3:	long		0x3FA55555,0x55554F5A
-
-EM1A2:	long		0x3FC55555,0x55555555,0x00000000,0x00000000
-
-EM1B8:	long		0x3EC71DE3,0xA5774682
-EM1B7:	long		0x3EFA01A0,0x19D7CB68
-
-EM1B6:	long		0x3F2A01A0,0x1A019DF3
-EM1B5:	long		0x3F56C16C,0x16C170E2
-
-EM1B4:	long		0x3F811111,0x11111111
-EM1B3:	long		0x3FA55555,0x55555555
-
-EM1B2:	long		0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
-	long		0x00000000
-
-TWO140:	long		0x48B00000,0x00000000
-TWON140:
-	long		0x37300000,0x00000000
-
-EEXPTBL:
-	long		0x3FFF0000,0x80000000,0x00000000,0x00000000
-	long		0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
-	long		0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
-	long		0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
-	long		0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
-	long		0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
-	long		0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
-	long		0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
-	long		0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
-	long		0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
-	long		0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
-	long		0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
-	long		0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
-	long		0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
-	long		0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
-	long		0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
-	long		0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
-	long		0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
-	long		0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
-	long		0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
-	long		0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
-	long		0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
-	long		0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
-	long		0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
-	long		0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
-	long		0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
-	long		0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
-	long		0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
-	long		0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
-	long		0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
-	long		0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
-	long		0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
-	long		0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
-	long		0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
-	long		0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
-	long		0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
-	long		0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
-	long		0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
-	long		0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
-	long		0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
-	long		0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
-	long		0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
-	long		0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
-	long		0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
-	long		0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
-	long		0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
-	long		0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
-	long		0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
-	long		0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
-	long		0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
-	long		0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
-	long		0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
-	long		0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
-	long		0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
-	long		0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
-	long		0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
-	long		0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
-	long		0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
-	long		0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
-	long		0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
-	long		0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
-	long		0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
-	long		0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
-	long		0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
-
-	set		ADJFLAG,L_SCR2
-	set		SCALE,FP_SCR0
-	set		ADJSCALE,FP_SCR1
-	set		SC,FP_SCR0
-	set		ONEBYSC,FP_SCR1
-
-	global		setox
-setox:
-#--entry point for EXP(X), here X is finite, non-zero, and not NaN's
-
-#--Step 1.
-	mov.l		(%a0),%d1		# load part of input X
-	and.l		&0x7FFF0000,%d1		# biased expo. of X
-	cmp.l		%d1,&0x3FBE0000		# 2^(-65)
-	bge.b		EXPC1			# normal case
-	bra		EXPSM
-
-EXPC1:
-#--The case |X| >= 2^(-65)
-	mov.w		4(%a0),%d1		# expo. and partial sig. of |X|
-	cmp.l		%d1,&0x400CB167		# 16380 log2 trunc. 16 bits
-	blt.b		EXPMAIN			# normal case
-	bra		EEXPBIG
-
-EXPMAIN:
-#--Step 2.
-#--This is the normal branch:	2^(-65) <= |X| < 16380 log2.
-	fmov.x		(%a0),%fp0		# load input from (a0)
-
-	fmov.x		%fp0,%fp1
-	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
-	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
-	mov.l		&0,ADJFLAG(%a6)
-	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
-	lea		EEXPTBL(%pc),%a1
-	fmov.l		%d1,%fp0		# convert to floating-format
-
-	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
-	and.l		&0x3F,%d1		# D0 is J = N mod 64
-	lsl.l		&4,%d1
-	add.l		%d1,%a1			# address of 2^(J/64)
-	mov.l		L_SCR1(%a6),%d1
-	asr.l		&6,%d1			# D0 is M
-	add.w		&0x3FFF,%d1		# biased expo. of 2^(M)
-	mov.w		L2(%pc),L_SCR1(%a6)	# prefetch L2, no need in CB
-
-EXPCONT1:
-#--Step 3.
-#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
-#--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
-	fmov.x		%fp0,%fp2
-	fmul.s		&0xBC317218,%fp0	# N * L1, L1 = lead(-log2/64)
-	fmul.x		L2(%pc),%fp2		# N * L2, L1+L2 = -log2/64
-	fadd.x		%fp1,%fp0		# X + N*L1
-	fadd.x		%fp2,%fp0		# fp0 is R, reduced arg.
-
-#--Step 4.
-#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
-#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
-#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
-#--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# fp1 IS S = R*R
-
-	fmov.s		&0x3AB60B70,%fp2	# fp2 IS A5
-
-	fmul.x		%fp1,%fp2		# fp2 IS S*A5
-	fmov.x		%fp1,%fp3
-	fmul.s		&0x3C088895,%fp3	# fp3 IS S*A4
-
-	fadd.d		EEXPA3(%pc),%fp2	# fp2 IS A3+S*A5
-	fadd.d		EEXPA2(%pc),%fp3	# fp3 IS A2+S*A4
-
-	fmul.x		%fp1,%fp2		# fp2 IS S*(A3+S*A5)
-	mov.w		%d1,SCALE(%a6)		# SCALE is 2^(M) in extended
-	mov.l		&0x80000000,SCALE+4(%a6)
-	clr.l		SCALE+8(%a6)
-
-	fmul.x		%fp1,%fp3		# fp3 IS S*(A2+S*A4)
-
-	fadd.s		&0x3F000000,%fp2	# fp2 IS A1+S*(A3+S*A5)
-	fmul.x		%fp0,%fp3		# fp3 IS R*S*(A2+S*A4)
-
-	fmul.x		%fp1,%fp2		# fp2 IS S*(A1+S*(A3+S*A5))
-	fadd.x		%fp3,%fp0		# fp0 IS R+R*S*(A2+S*A4),
-
-	fmov.x		(%a1)+,%fp1		# fp1 is lead. pt. of 2^(J/64)
-	fadd.x		%fp2,%fp0		# fp0 is EXP(R) - 1
-
-#--Step 5
-#--final reconstruction process
-#--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
-
-	fmul.x		%fp1,%fp0		# 2^(J/64)*(Exp(R)-1)
-	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
-	fadd.s		(%a1),%fp0		# accurate 2^(J/64)
-
-	fadd.x		%fp1,%fp0		# 2^(J/64) + 2^(J/64)*...
-	mov.l		ADJFLAG(%a6),%d1
-
-#--Step 6
-	tst.l		%d1
-	beq.b		NORMAL
-ADJUST:
-	fmul.x		ADJSCALE(%a6),%fp0
-NORMAL:
-	fmov.l		%d0,%fpcr		# restore user FPCR
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.x		SCALE(%a6),%fp0		# multiply 2^(M)
-	bra		t_catch
-
-EXPSM:
-#--Step 7
-	fmovm.x		(%a0),&0x80		# load X
-	fmov.l		%d0,%fpcr
-	fadd.s		&0x3F800000,%fp0	# 1+X in user mode
-	bra		t_pinx2
-
-EEXPBIG:
-#--Step 8
-	cmp.l		%d1,&0x400CB27C		# 16480 log2
-	bgt.b		EXP2BIG
-#--Steps 8.2 -- 8.6
-	fmov.x		(%a0),%fp0		# load input from (a0)
-
-	fmov.x		%fp0,%fp1
-	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
-	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
-	mov.l		&1,ADJFLAG(%a6)
-	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
-	lea		EEXPTBL(%pc),%a1
-	fmov.l		%d1,%fp0		# convert to floating-format
-	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
-	and.l		&0x3F,%d1		# D0 is J = N mod 64
-	lsl.l		&4,%d1
-	add.l		%d1,%a1			# address of 2^(J/64)
-	mov.l		L_SCR1(%a6),%d1
-	asr.l		&6,%d1			# D0 is K
-	mov.l		%d1,L_SCR1(%a6)		# save K temporarily
-	asr.l		&1,%d1			# D0 is M1
-	sub.l		%d1,L_SCR1(%a6)		# a1 is M
-	add.w		&0x3FFF,%d1		# biased expo. of 2^(M1)
-	mov.w		%d1,ADJSCALE(%a6)	# ADJSCALE := 2^(M1)
-	mov.l		&0x80000000,ADJSCALE+4(%a6)
-	clr.l		ADJSCALE+8(%a6)
-	mov.l		L_SCR1(%a6),%d1		# D0 is M
-	add.w		&0x3FFF,%d1		# biased expo. of 2^(M)
-	bra.w		EXPCONT1		# go back to Step 3
-
-EXP2BIG:
-#--Step 9
-	tst.b		(%a0)			# is X positive or negative?
-	bmi		t_unfl2
-	bra		t_ovfl2
-
-	global		setoxd
-setoxd:
-#--entry point for EXP(X), X is denormalized
-	mov.l		(%a0),-(%sp)
-	andi.l		&0x80000000,(%sp)
-	ori.l		&0x00800000,(%sp)	# sign(X)*2^(-126)
-
-	fmov.s		&0x3F800000,%fp0
-
-	fmov.l		%d0,%fpcr
-	fadd.s		(%sp)+,%fp0
-	bra		t_pinx2
-
-	global		setoxm1
-setoxm1:
-#--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
-
-#--Step 1.
-#--Step 1.1
-	mov.l		(%a0),%d1		# load part of input X
-	and.l		&0x7FFF0000,%d1		# biased expo. of X
-	cmp.l		%d1,&0x3FFD0000		# 1/4
-	bge.b		EM1CON1			# |X| >= 1/4
-	bra		EM1SM
-
-EM1CON1:
-#--Step 1.3
-#--The case |X| >= 1/4
-	mov.w		4(%a0),%d1		# expo. and partial sig. of |X|
-	cmp.l		%d1,&0x4004C215		# 70log2 rounded up to 16 bits
-	ble.b		EM1MAIN			# 1/4 <= |X| <= 70log2
-	bra		EM1BIG
-
-EM1MAIN:
-#--Step 2.
-#--This is the case:	1/4 <= |X| <= 70 log2.
-	fmov.x		(%a0),%fp0		# load input from (a0)
-
-	fmov.x		%fp0,%fp1
-	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
-	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
-	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
-	lea		EEXPTBL(%pc),%a1
-	fmov.l		%d1,%fp0		# convert to floating-format
-
-	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
-	and.l		&0x3F,%d1		# D0 is J = N mod 64
-	lsl.l		&4,%d1
-	add.l		%d1,%a1			# address of 2^(J/64)
-	mov.l		L_SCR1(%a6),%d1
-	asr.l		&6,%d1			# D0 is M
-	mov.l		%d1,L_SCR1(%a6)		# save a copy of M
-
-#--Step 3.
-#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
-#--a0 points to 2^(J/64), D0 and a1 both contain M
-	fmov.x		%fp0,%fp2
-	fmul.s		&0xBC317218,%fp0	# N * L1, L1 = lead(-log2/64)
-	fmul.x		L2(%pc),%fp2		# N * L2, L1+L2 = -log2/64
-	fadd.x		%fp1,%fp0		# X + N*L1
-	fadd.x		%fp2,%fp0		# fp0 is R, reduced arg.
-	add.w		&0x3FFF,%d1		# D0 is biased expo. of 2^M
-
-#--Step 4.
-#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
-#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
-#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
-#--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# fp1 IS S = R*R
-
-	fmov.s		&0x3950097B,%fp2	# fp2 IS a6
-
-	fmul.x		%fp1,%fp2		# fp2 IS S*A6
-	fmov.x		%fp1,%fp3
-	fmul.s		&0x3AB60B6A,%fp3	# fp3 IS S*A5
-
-	fadd.d		EM1A4(%pc),%fp2		# fp2 IS A4+S*A6
-	fadd.d		EM1A3(%pc),%fp3		# fp3 IS A3+S*A5
-	mov.w		%d1,SC(%a6)		# SC is 2^(M) in extended
-	mov.l		&0x80000000,SC+4(%a6)
-	clr.l		SC+8(%a6)
-
-	fmul.x		%fp1,%fp2		# fp2 IS S*(A4+S*A6)
-	mov.l		L_SCR1(%a6),%d1		# D0 is	M
-	neg.w		%d1			# D0 is -M
-	fmul.x		%fp1,%fp3		# fp3 IS S*(A3+S*A5)
-	add.w		&0x3FFF,%d1		# biased expo. of 2^(-M)
-	fadd.d		EM1A2(%pc),%fp2		# fp2 IS A2+S*(A4+S*A6)
-	fadd.s		&0x3F000000,%fp3	# fp3 IS A1+S*(A3+S*A5)
-
-	fmul.x		%fp1,%fp2		# fp2 IS S*(A2+S*(A4+S*A6))
-	or.w		&0x8000,%d1		# signed/expo. of -2^(-M)
-	mov.w		%d1,ONEBYSC(%a6)	# OnebySc is -2^(-M)
-	mov.l		&0x80000000,ONEBYSC+4(%a6)
-	clr.l		ONEBYSC+8(%a6)
-	fmul.x		%fp3,%fp1		# fp1 IS S*(A1+S*(A3+S*A5))
-
-	fmul.x		%fp0,%fp2		# fp2 IS R*S*(A2+S*(A4+S*A6))
-	fadd.x		%fp1,%fp0		# fp0 IS R+S*(A1+S*(A3+S*A5))
-
-	fadd.x		%fp2,%fp0		# fp0 IS EXP(R)-1
-
-	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
-
-#--Step 5
-#--Compute 2^(J/64)*p
-
-	fmul.x		(%a1),%fp0		# 2^(J/64)*(Exp(R)-1)
-
-#--Step 6
-#--Step 6.1
-	mov.l		L_SCR1(%a6),%d1		# retrieve M
-	cmp.l		%d1,&63
-	ble.b		MLE63
-#--Step 6.2	M >= 64
-	fmov.s		12(%a1),%fp1		# fp1 is t
-	fadd.x		ONEBYSC(%a6),%fp1	# fp1 is t+OnebySc
-	fadd.x		%fp1,%fp0		# p+(t+OnebySc), fp1 released
-	fadd.x		(%a1),%fp0		# T+(p+(t+OnebySc))
-	bra		EM1SCALE
-MLE63:
-#--Step 6.3	M <= 63
-	cmp.l		%d1,&-3
-	bge.b		MGEN3
-MLTN3:
-#--Step 6.4	M <= -4
-	fadd.s		12(%a1),%fp0		# p+t
-	fadd.x		(%a1),%fp0		# T+(p+t)
-	fadd.x		ONEBYSC(%a6),%fp0	# OnebySc + (T+(p+t))
-	bra		EM1SCALE
-MGEN3:
-#--Step 6.5	-3 <= M <= 63
-	fmov.x		(%a1)+,%fp1		# fp1 is T
-	fadd.s		(%a1),%fp0		# fp0 is p+t
-	fadd.x		ONEBYSC(%a6),%fp1	# fp1 is T+OnebySc
-	fadd.x		%fp1,%fp0		# (T+OnebySc)+(p+t)
-
-EM1SCALE:
-#--Step 6.6
-	fmov.l		%d0,%fpcr
-	fmul.x		SC(%a6),%fp0
-	bra		t_inx2
-
-EM1SM:
-#--Step 7	|X| < 1/4.
-	cmp.l		%d1,&0x3FBE0000		# 2^(-65)
-	bge.b		EM1POLY
-
-EM1TINY:
-#--Step 8	|X| < 2^(-65)
-	cmp.l		%d1,&0x00330000		# 2^(-16312)
-	blt.b		EM12TINY
-#--Step 8.2
-	mov.l		&0x80010000,SC(%a6)	# SC is -2^(-16382)
-	mov.l		&0x80000000,SC+4(%a6)
-	clr.l		SC+8(%a6)
-	fmov.x		(%a0),%fp0
-	fmov.l		%d0,%fpcr
-	mov.b		&FADD_OP,%d1		# last inst is ADD
-	fadd.x		SC(%a6),%fp0
-	bra		t_catch
-
-EM12TINY:
-#--Step 8.3
-	fmov.x		(%a0),%fp0
-	fmul.d		TWO140(%pc),%fp0
-	mov.l		&0x80010000,SC(%a6)
-	mov.l		&0x80000000,SC+4(%a6)
-	clr.l		SC+8(%a6)
-	fadd.x		SC(%a6),%fp0
-	fmov.l		%d0,%fpcr
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.d		TWON140(%pc),%fp0
-	bra		t_catch
-
-EM1POLY:
-#--Step 9	exp(X)-1 by a simple polynomial
-	fmov.x		(%a0),%fp0		# fp0 is X
-	fmul.x		%fp0,%fp0		# fp0 is S := X*X
-	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
-	fmov.s		&0x2F30CAA8,%fp1	# fp1 is B12
-	fmul.x		%fp0,%fp1		# fp1 is S*B12
-	fmov.s		&0x310F8290,%fp2	# fp2 is B11
-	fadd.s		&0x32D73220,%fp1	# fp1 is B10+S*B12
-
-	fmul.x		%fp0,%fp2		# fp2 is S*B11
-	fmul.x		%fp0,%fp1		# fp1 is S*(B10 + ...
-
-	fadd.s		&0x3493F281,%fp2	# fp2 is B9+S*...
-	fadd.d		EM1B8(%pc),%fp1		# fp1 is B8+S*...
-
-	fmul.x		%fp0,%fp2		# fp2 is S*(B9+...
-	fmul.x		%fp0,%fp1		# fp1 is S*(B8+...
-
-	fadd.d		EM1B7(%pc),%fp2		# fp2 is B7+S*...
-	fadd.d		EM1B6(%pc),%fp1		# fp1 is B6+S*...
-
-	fmul.x		%fp0,%fp2		# fp2 is S*(B7+...
-	fmul.x		%fp0,%fp1		# fp1 is S*(B6+...
-
-	fadd.d		EM1B5(%pc),%fp2		# fp2 is B5+S*...
-	fadd.d		EM1B4(%pc),%fp1		# fp1 is B4+S*...
-
-	fmul.x		%fp0,%fp2		# fp2 is S*(B5+...
-	fmul.x		%fp0,%fp1		# fp1 is S*(B4+...
-
-	fadd.d		EM1B3(%pc),%fp2		# fp2 is B3+S*...
-	fadd.x		EM1B2(%pc),%fp1		# fp1 is B2+S*...
-
-	fmul.x		%fp0,%fp2		# fp2 is S*(B3+...
-	fmul.x		%fp0,%fp1		# fp1 is S*(B2+...
-
-	fmul.x		%fp0,%fp2		# fp2 is S*S*(B3+...)
-	fmul.x		(%a0),%fp1		# fp1 is X*S*(B2...
-
-	fmul.s		&0x3F000000,%fp0	# fp0 is S*B1
-	fadd.x		%fp2,%fp1		# fp1 is Q
-
-	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
-
-	fadd.x		%fp1,%fp0		# fp0 is S*B1+Q
-
-	fmov.l		%d0,%fpcr
-	fadd.x		(%a0),%fp0
-	bra		t_inx2
-
-EM1BIG:
-#--Step 10	|X| > 70 log2
-	mov.l		(%a0),%d1
-	cmp.l		%d1,&0
-	bgt.w		EXPC1
-#--Step 10.2
-	fmov.s		&0xBF800000,%fp0	# fp0 is -1
-	fmov.l		%d0,%fpcr
-	fadd.s		&0x00800000,%fp0	# -1 + 2^(-126)
-	bra		t_minx2
-
-	global		setoxm1d
-setoxm1d:
-#--entry point for EXPM1(X), here X is denormalized
-#--Step 0.
-	bra		t_extdnrm
-
-#########################################################################
-# sgetexp():  returns the exponent portion of the input argument.	#
-#	      The exponent bias is removed and the exponent value is	#
-#	      returned as an extended precision number in fp0.		#
-# sgetexpd(): handles denormalized numbers.				#
-#									#
-# sgetman():  extracts the mantissa of the input argument. The		#
-#	      mantissa is converted to an extended precision number w/	#
-#	      an exponent of $3fff and is returned in fp0. The range of #
-#	      the result is [1.0 - 2.0).				#
-# sgetmand(): handles denormalized numbers.				#
-#									#
-# INPUT *************************************************************** #
-#	a0  = pointer to extended precision input			#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 = exponent(X) or mantissa(X)				#
-#									#
-#########################################################################
-
-	global		sgetexp
-sgetexp:
-	mov.w		SRC_EX(%a0),%d0		# get the exponent
-	bclr		&0xf,%d0		# clear the sign bit
-	subi.w		&0x3fff,%d0		# subtract off the bias
-	fmov.w		%d0,%fp0		# return exp in fp0
-	blt.b		sgetexpn		# it's negative
-	rts
-
-sgetexpn:
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
-	rts
-
-	global		sgetexpd
-sgetexpd:
-	bsr.l		norm			# normalize
-	neg.w		%d0			# new exp = -(shft amt)
-	subi.w		&0x3fff,%d0		# subtract off the bias
-	fmov.w		%d0,%fp0		# return exp in fp0
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
-	rts
-
-	global		sgetman
-sgetman:
-	mov.w		SRC_EX(%a0),%d0		# get the exp
-	ori.w		&0x7fff,%d0		# clear old exp
-	bclr		&0xe,%d0		# make it the new exp +-3fff
-
-# here, we build the result in a tmp location so as not to disturb the input
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
-	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
-	fmov.x		FP_SCR0(%a6),%fp0	# put new value back in fp0
-	bmi.b		sgetmann		# it's negative
-	rts
-
-sgetmann:
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
-	rts
-
-#
-# For denormalized numbers, shift the mantissa until the j-bit = 1,
-# then load the exponent with +/1 $3fff.
-#
-	global		sgetmand
-sgetmand:
-	bsr.l		norm			# normalize exponent
-	bra.b		sgetman
-
-#########################################################################
-# scosh():  computes the hyperbolic cosine of a normalized input	#
-# scoshd(): computes the hyperbolic cosine of a denormalized input	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = cosh(X)							#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 3 ulps in 64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#	COSH								#
-#	1. If |X| > 16380 log2, go to 3.				#
-#									#
-#	2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae	#
-#		y = |X|, z = exp(Y), and				#
-#		cosh(X) = (1/2)*( z + 1/z ).				#
-#		Exit.							#
-#									#
-#	3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.		#
-#									#
-#	4. (16380 log2 < |X| <= 16480 log2)				#
-#		cosh(X) = sign(X) * exp(|X|)/2.				#
-#		However, invoking exp(|X|) may cause premature		#
-#		overflow. Thus, we calculate sinh(X) as follows:	#
-#		Y	:= |X|						#
-#		Fact	:=	2**(16380)				#
-#		Y'	:= Y - 16381 log2				#
-#		cosh(X) := Fact * exp(Y').				#
-#		Exit.							#
-#									#
-#	5. (|X| > 16480 log2) sinh(X) must overflow. Return		#
-#		Huge*Huge to generate overflow and an infinity with	#
-#		the appropriate sign. Huge is the largest finite number	#
-#		in extended format. Exit.				#
-#									#
-#########################################################################
-
-TWO16380:
-	long		0x7FFB0000,0x80000000,0x00000000,0x00000000
-
-	global		scosh
-scosh:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	and.l		&0x7FFFFFFF,%d1
-	cmp.l		%d1,&0x400CB167
-	bgt.b		COSHBIG
-
-#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
-#--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
-
-	fabs.x		%fp0			# |X|
-
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	fmovm.x		&0x01,-(%sp)		# save |X| to stack
-	lea		(%sp),%a0		# pass ptr to |X|
-	bsr		setox			# FP0 IS EXP(|X|)
-	add.l		&0xc,%sp		# erase |X| from stack
-	fmul.s		&0x3F000000,%fp0	# (1/2)EXP(|X|)
-	mov.l		(%sp)+,%d0
-
-	fmov.s		&0x3E800000,%fp1	# (1/4)
-	fdiv.x		%fp0,%fp1		# 1/(2 EXP(|X|))
-
-	fmov.l		%d0,%fpcr
-	mov.b		&FADD_OP,%d1		# last inst is ADD
-	fadd.x		%fp1,%fp0
-	bra		t_catch
-
-COSHBIG:
-	cmp.l		%d1,&0x400CB2B3
-	bgt.b		COSHHUGE
-
-	fabs.x		%fp0
-	fsub.d		T1(%pc),%fp0		# (|X|-16381LOG2_LEAD)
-	fsub.d		T2(%pc),%fp0		# |X| - 16381 LOG2, ACCURATE
-
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	fmovm.x		&0x01,-(%sp)		# save fp0 to stack
-	lea		(%sp),%a0		# pass ptr to fp0
-	bsr		setox
-	add.l		&0xc,%sp		# clear fp0 from stack
-	mov.l		(%sp)+,%d0
-
-	fmov.l		%d0,%fpcr
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.x		TWO16380(%pc),%fp0
-	bra		t_catch
-
-COSHHUGE:
-	bra		t_ovfl2
-
-	global		scoshd
-#--COSH(X) = 1 FOR DENORMALIZED X
-scoshd:
-	fmov.s		&0x3F800000,%fp0
-
-	fmov.l		%d0,%fpcr
-	fadd.s		&0x00800000,%fp0
-	bra		t_pinx2
-
-#########################################################################
-# ssinh():  computes the hyperbolic sine of a normalized input		#
-# ssinhd(): computes the hyperbolic sine of a denormalized input	#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 = sinh(X)							#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 3 ulps in 64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM *********************************************************** #
-#									#
-#       SINH								#
-#       1. If |X| > 16380 log2, go to 3.				#
-#									#
-#       2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula	#
-#               y = |X|, sgn = sign(X), and z = expm1(Y),		#
-#               sinh(X) = sgn*(1/2)*( z + z/(1+z) ).			#
-#          Exit.							#
-#									#
-#       3. If |X| > 16480 log2, go to 5.				#
-#									#
-#       4. (16380 log2 < |X| <= 16480 log2)				#
-#               sinh(X) = sign(X) * exp(|X|)/2.				#
-#          However, invoking exp(|X|) may cause premature overflow.	#
-#          Thus, we calculate sinh(X) as follows:			#
-#             Y       := |X|						#
-#             sgn     := sign(X)					#
-#             sgnFact := sgn * 2**(16380)				#
-#             Y'      := Y - 16381 log2					#
-#             sinh(X) := sgnFact * exp(Y').				#
-#          Exit.							#
-#									#
-#       5. (|X| > 16480 log2) sinh(X) must overflow. Return		#
-#          sign(X)*Huge*Huge to generate overflow and an infinity with	#
-#          the appropriate sign. Huge is the largest finite number in	#
-#          extended format. Exit.					#
-#									#
-#########################################################################
-
-	global		ssinh
-ssinh:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	mov.l		%d1,%a1			# save (compacted) operand
-	and.l		&0x7FFFFFFF,%d1
-	cmp.l		%d1,&0x400CB167
-	bgt.b		SINHBIG
-
-#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
-#--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
-
-	fabs.x		%fp0			# Y = |X|
-
-	movm.l		&0x8040,-(%sp)		# {a1/d0}
-	fmovm.x		&0x01,-(%sp)		# save Y on stack
-	lea		(%sp),%a0		# pass ptr to Y
-	clr.l		%d0
-	bsr		setoxm1			# FP0 IS Z = EXPM1(Y)
-	add.l		&0xc,%sp		# clear Y from stack
-	fmov.l		&0,%fpcr
-	movm.l		(%sp)+,&0x0201		# {a1/d0}
-
-	fmov.x		%fp0,%fp1
-	fadd.s		&0x3F800000,%fp1	# 1+Z
-	fmov.x		%fp0,-(%sp)
-	fdiv.x		%fp1,%fp0		# Z/(1+Z)
-	mov.l		%a1,%d1
-	and.l		&0x80000000,%d1
-	or.l		&0x3F000000,%d1
-	fadd.x		(%sp)+,%fp0
-	mov.l		%d1,-(%sp)
-
-	fmov.l		%d0,%fpcr
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.s		(%sp)+,%fp0		# last fp inst - possible exceptions set
-	bra		t_catch
-
-SINHBIG:
-	cmp.l		%d1,&0x400CB2B3
-	bgt		t_ovfl
-	fabs.x		%fp0
-	fsub.d		T1(%pc),%fp0		# (|X|-16381LOG2_LEAD)
-	mov.l		&0,-(%sp)
-	mov.l		&0x80000000,-(%sp)
-	mov.l		%a1,%d1
-	and.l		&0x80000000,%d1
-	or.l		&0x7FFB0000,%d1
-	mov.l		%d1,-(%sp)		# EXTENDED FMT
-	fsub.d		T2(%pc),%fp0		# |X| - 16381 LOG2, ACCURATE
-
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	fmovm.x		&0x01,-(%sp)		# save fp0 on stack
-	lea		(%sp),%a0		# pass ptr to fp0
-	bsr		setox
-	add.l		&0xc,%sp		# clear fp0 from stack
-
-	mov.l		(%sp)+,%d0
-	fmov.l		%d0,%fpcr
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.x		(%sp)+,%fp0		# possible exception
-	bra		t_catch
-
-	global		ssinhd
-#--SINH(X) = X FOR DENORMALIZED X
-ssinhd:
-	bra		t_extdnrm
-
-#########################################################################
-# stanh():  computes the hyperbolic tangent of a normalized input	#
-# stanhd(): computes the hyperbolic tangent of a denormalized input	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = tanh(X)							#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 3 ulps in 64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#	TANH								#
-#	1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.		#
-#									#
-#	2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by		#
-#		sgn := sign(X), y := 2|X|, z := expm1(Y), and		#
-#		tanh(X) = sgn*( z/(2+z) ).				#
-#		Exit.							#
-#									#
-#	3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,		#
-#		go to 7.						#
-#									#
-#	4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.		#
-#									#
-#	5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by		#
-#		sgn := sign(X), y := 2|X|, z := exp(Y),			#
-#		tanh(X) = sgn - [ sgn*2/(1+z) ].			#
-#		Exit.							#
-#									#
-#	6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we	#
-#		calculate Tanh(X) by					#
-#		sgn := sign(X), Tiny := 2**(-126),			#
-#		tanh(X) := sgn - sgn*Tiny.				#
-#		Exit.							#
-#									#
-#	7. (|X| < 2**(-40)). Tanh(X) = X.	Exit.			#
-#									#
-#########################################################################
-
-	set		X,FP_SCR0
-	set		XFRAC,X+4
-
-	set		SGN,L_SCR3
-
-	set		V,FP_SCR0
-
-	global		stanh
-stanh:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-
-	fmov.x		%fp0,X(%a6)
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	mov.l		%d1,X(%a6)
-	and.l		&0x7FFFFFFF,%d1
-	cmp.l		%d1, &0x3fd78000	# is |X| < 2^(-40)?
-	blt.w		TANHBORS		# yes
-	cmp.l		%d1, &0x3fffddce	# is |X| > (5/2)LOG2?
-	bgt.w		TANHBORS		# yes
-
-#--THIS IS THE USUAL CASE
-#--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
-
-	mov.l		X(%a6),%d1
-	mov.l		%d1,SGN(%a6)
-	and.l		&0x7FFF0000,%d1
-	add.l		&0x00010000,%d1		# EXPONENT OF 2|X|
-	mov.l		%d1,X(%a6)
-	and.l		&0x80000000,SGN(%a6)
-	fmov.x		X(%a6),%fp0		# FP0 IS Y = 2|X|
-
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	fmovm.x		&0x1,-(%sp)		# save Y on stack
-	lea		(%sp),%a0		# pass ptr to Y
-	bsr		setoxm1			# FP0 IS Z = EXPM1(Y)
-	add.l		&0xc,%sp		# clear Y from stack
-	mov.l		(%sp)+,%d0
-
-	fmov.x		%fp0,%fp1
-	fadd.s		&0x40000000,%fp1	# Z+2
-	mov.l		SGN(%a6),%d1
-	fmov.x		%fp1,V(%a6)
-	eor.l		%d1,V(%a6)
-
-	fmov.l		%d0,%fpcr		# restore users round prec,mode
-	fdiv.x		V(%a6),%fp0
-	bra		t_inx2
-
-TANHBORS:
-	cmp.l		%d1,&0x3FFF8000
-	blt.w		TANHSM
-
-	cmp.l		%d1,&0x40048AA1
-	bgt.w		TANHHUGE
-
-#-- (5/2) LOG2 < |X| < 50 LOG2,
-#--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
-#--TANH(X) = SGN -	SGN*2/[EXP(Y)+1].
-
-	mov.l		X(%a6),%d1
-	mov.l		%d1,SGN(%a6)
-	and.l		&0x7FFF0000,%d1
-	add.l		&0x00010000,%d1		# EXPO OF 2|X|
-	mov.l		%d1,X(%a6)		# Y = 2|X|
-	and.l		&0x80000000,SGN(%a6)
-	mov.l		SGN(%a6),%d1
-	fmov.x		X(%a6),%fp0		# Y = 2|X|
-
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	fmovm.x		&0x01,-(%sp)		# save Y on stack
-	lea		(%sp),%a0		# pass ptr to Y
-	bsr		setox			# FP0 IS EXP(Y)
-	add.l		&0xc,%sp		# clear Y from stack
-	mov.l		(%sp)+,%d0
-	mov.l		SGN(%a6),%d1
-	fadd.s		&0x3F800000,%fp0	# EXP(Y)+1
-
-	eor.l		&0xC0000000,%d1		# -SIGN(X)*2
-	fmov.s		%d1,%fp1		# -SIGN(X)*2 IN SGL FMT
-	fdiv.x		%fp0,%fp1		# -SIGN(X)2 / [EXP(Y)+1 ]
-
-	mov.l		SGN(%a6),%d1
-	or.l		&0x3F800000,%d1		# SGN
-	fmov.s		%d1,%fp0		# SGN IN SGL FMT
-
-	fmov.l		%d0,%fpcr		# restore users round prec,mode
-	mov.b		&FADD_OP,%d1		# last inst is ADD
-	fadd.x		%fp1,%fp0
-	bra		t_inx2
-
-TANHSM:
-	fmov.l		%d0,%fpcr		# restore users round prec,mode
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		X(%a6),%fp0		# last inst - possible exception set
-	bra		t_catch
-
-#---RETURN SGN(X) - SGN(X)EPS
-TANHHUGE:
-	mov.l		X(%a6),%d1
-	and.l		&0x80000000,%d1
-	or.l		&0x3F800000,%d1
-	fmov.s		%d1,%fp0
-	and.l		&0x80000000,%d1
-	eor.l		&0x80800000,%d1		# -SIGN(X)*EPS
-
-	fmov.l		%d0,%fpcr		# restore users round prec,mode
-	fadd.s		%d1,%fp0
-	bra		t_inx2
-
-	global		stanhd
-#--TANH(X) = X FOR DENORMALIZED X
-stanhd:
-	bra		t_extdnrm
-
-#########################################################################
-# slogn():    computes the natural logarithm of a normalized input	#
-# slognd():   computes the natural logarithm of a denormalized input	#
-# slognp1():  computes the log(1+X) of a normalized input		#
-# slognp1d(): computes the log(1+X) of a denormalized input		#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = log(X) or log(1+X)					#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 2 ulps in 64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM ***********************************************************	#
-#	LOGN:								#
-#	Step 1. If |X-1| < 1/16, approximate log(X) by an odd		#
-#		polynomial in u, where u = 2(X-1)/(X+1). Otherwise,	#
-#		move on to Step 2.					#
-#									#
-#	Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first	#
-#		seven significant bits of Y plus 2**(-7), i.e.		#
-#		F = 1.xxxxxx1 in base 2 where the six "x" match those	#
-#		of Y. Note that |Y-F| <= 2**(-7).			#
-#									#
-#	Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a		#
-#		polynomial in u, log(1+u) = poly.			#
-#									#
-#	Step 4. Reconstruct						#
-#		log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)	#
-#		by k*log(2) + (log(F) + poly). The values of log(F) are	#
-#		calculated beforehand and stored in the program.	#
-#									#
-#	lognp1:								#
-#	Step 1: If |X| < 1/16, approximate log(1+X) by an odd		#
-#		polynomial in u where u = 2X/(2+X). Otherwise, move on	#
-#		to Step 2.						#
-#									#
-#	Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done	#
-#		in Step 2 of the algorithm for LOGN and compute		#
-#		log(1+X) as k*log(2) + log(F) + poly where poly		#
-#		approximates log(1+u), u = (Y-F)/F.			#
-#									#
-#	Implementation Notes:						#
-#	Note 1. There are 64 different possible values for F, thus 64	#
-#		log(F)'s need to be tabulated. Moreover, the values of	#
-#		1/F are also tabulated so that the division in (Y-F)/F	#
-#		can be performed by a multiplication.			#
-#									#
-#	Note 2. In Step 2 of lognp1, in order to preserved accuracy,	#
-#		the value Y-F has to be calculated carefully when	#
-#		1/2 <= X < 3/2.						#
-#									#
-#	Note 3. To fully exploit the pipeline, polynomials are usually	#
-#		separated into two parts evaluated independently before	#
-#		being added up.						#
-#									#
-#########################################################################
-LOGOF2:
-	long		0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
-
-one:
-	long		0x3F800000
-zero:
-	long		0x00000000
-infty:
-	long		0x7F800000
-negone:
-	long		0xBF800000
-
-LOGA6:
-	long		0x3FC2499A,0xB5E4040B
-LOGA5:
-	long		0xBFC555B5,0x848CB7DB
-
-LOGA4:
-	long		0x3FC99999,0x987D8730
-LOGA3:
-	long		0xBFCFFFFF,0xFF6F7E97
-
-LOGA2:
-	long		0x3FD55555,0x555555A4
-LOGA1:
-	long		0xBFE00000,0x00000008
-
-LOGB5:
-	long		0x3F175496,0xADD7DAD6
-LOGB4:
-	long		0x3F3C71C2,0xFE80C7E0
-
-LOGB3:
-	long		0x3F624924,0x928BCCFF
-LOGB2:
-	long		0x3F899999,0x999995EC
-
-LOGB1:
-	long		0x3FB55555,0x55555555
-TWO:
-	long		0x40000000,0x00000000
-
-LTHOLD:
-	long		0x3f990000,0x80000000,0x00000000,0x00000000
-
-LOGTBL:
-	long		0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
-	long		0x3FF70000,0xFF015358,0x833C47E2,0x00000000
-	long		0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
-	long		0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
-	long		0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
-	long		0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
-	long		0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
-	long		0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
-	long		0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
-	long		0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
-	long		0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
-	long		0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
-	long		0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
-	long		0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
-	long		0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
-	long		0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
-	long		0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
-	long		0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
-	long		0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
-	long		0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
-	long		0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
-	long		0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
-	long		0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
-	long		0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
-	long		0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
-	long		0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
-	long		0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
-	long		0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
-	long		0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
-	long		0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
-	long		0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
-	long		0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
-	long		0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
-	long		0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
-	long		0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
-	long		0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
-	long		0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
-	long		0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
-	long		0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
-	long		0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
-	long		0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
-	long		0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
-	long		0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
-	long		0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
-	long		0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
-	long		0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
-	long		0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
-	long		0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
-	long		0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
-	long		0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
-	long		0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
-	long		0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
-	long		0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
-	long		0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
-	long		0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
-	long		0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
-	long		0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
-	long		0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
-	long		0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
-	long		0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
-	long		0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
-	long		0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
-	long		0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
-	long		0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
-	long		0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
-	long		0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
-	long		0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
-	long		0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
-	long		0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
-	long		0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
-	long		0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
-	long		0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
-	long		0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
-	long		0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
-	long		0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
-	long		0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
-	long		0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
-	long		0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
-	long		0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
-	long		0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
-	long		0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
-	long		0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
-	long		0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
-	long		0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
-	long		0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
-	long		0x3FFE0000,0x825EFCED,0x49369330,0x00000000
-	long		0x3FFE0000,0x9868C809,0x868C8098,0x00000000
-	long		0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
-	long		0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
-	long		0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
-	long		0x3FFE0000,0x95A02568,0x095A0257,0x00000000
-	long		0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
-	long		0x3FFE0000,0x94458094,0x45809446,0x00000000
-	long		0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
-	long		0x3FFE0000,0x92F11384,0x0497889C,0x00000000
-	long		0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
-	long		0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
-	long		0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
-	long		0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
-	long		0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
-	long		0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
-	long		0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
-	long		0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
-	long		0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
-	long		0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
-	long		0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
-	long		0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
-	long		0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
-	long		0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
-	long		0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
-	long		0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
-	long		0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
-	long		0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
-	long		0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
-	long		0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
-	long		0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
-	long		0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
-	long		0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
-	long		0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
-	long		0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
-	long		0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
-	long		0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
-	long		0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
-	long		0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
-	long		0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
-	long		0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
-	long		0x3FFE0000,0x80808080,0x80808081,0x00000000
-	long		0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
-
-	set		ADJK,L_SCR1
-
-	set		X,FP_SCR0
-	set		XDCARE,X+2
-	set		XFRAC,X+4
-
-	set		F,FP_SCR1
-	set		FFRAC,F+4
-
-	set		KLOG2,FP_SCR0
-
-	set		SAVEU,FP_SCR0
-
-	global		slogn
-#--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
-slogn:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-	mov.l		&0x00000000,ADJK(%a6)
-
-LOGBGN:
-#--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
-#--A FINITE, NON-ZERO, NORMALIZED NUMBER.
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-
-	mov.l		(%a0),X(%a6)
-	mov.l		4(%a0),X+4(%a6)
-	mov.l		8(%a0),X+8(%a6)
-
-	cmp.l		%d1,&0			# CHECK IF X IS NEGATIVE
-	blt.w		LOGNEG			# LOG OF NEGATIVE ARGUMENT IS INVALID
-# X IS POSITIVE, CHECK IF X IS NEAR 1
-	cmp.l		%d1,&0x3ffef07d		# IS X < 15/16?
-	blt.b		LOGMAIN			# YES
-	cmp.l		%d1,&0x3fff8841		# IS X > 17/16?
-	ble.w		LOGNEAR1		# NO
-
-LOGMAIN:
-#--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
-
-#--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
-#--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
-#--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
-#--			 = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
-#--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
-#--LOG(1+U) CAN BE VERY EFFICIENT.
-#--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
-#--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
-
-#--GET K, Y, F, AND ADDRESS OF 1/F.
-	asr.l		&8,%d1
-	asr.l		&8,%d1			# SHIFTED 16 BITS, BIASED EXPO. OF X
-	sub.l		&0x3FFF,%d1		# THIS IS K
-	add.l		ADJK(%a6),%d1		# ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
-	lea		LOGTBL(%pc),%a0		# BASE ADDRESS OF 1/F AND LOG(F)
-	fmov.l		%d1,%fp1		# CONVERT K TO FLOATING-POINT FORMAT
-
-#--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
-	mov.l		&0x3FFF0000,X(%a6)	# X IS NOW Y, I.E. 2^(-K)*X
-	mov.l		XFRAC(%a6),FFRAC(%a6)
-	and.l		&0xFE000000,FFRAC(%a6)	# FIRST 7 BITS OF Y
-	or.l		&0x01000000,FFRAC(%a6)	# GET F: ATTACH A 1 AT THE EIGHTH BIT
-	mov.l		FFRAC(%a6),%d1	# READY TO GET ADDRESS OF 1/F
-	and.l		&0x7E000000,%d1
-	asr.l		&8,%d1
-	asr.l		&8,%d1
-	asr.l		&4,%d1			# SHIFTED 20, D0 IS THE DISPLACEMENT
-	add.l		%d1,%a0			# A0 IS THE ADDRESS FOR 1/F
-
-	fmov.x		X(%a6),%fp0
-	mov.l		&0x3fff0000,F(%a6)
-	clr.l		F+8(%a6)
-	fsub.x		F(%a6),%fp0		# Y-F
-	fmovm.x		&0xc,-(%sp)		# SAVE FP2-3 WHILE FP0 IS NOT READY
-#--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
-#--REGISTERS SAVED: FPCR, FP1, FP2
-
-LP1CONT1:
-#--AN RE-ENTRY POINT FOR LOGNP1
-	fmul.x		(%a0),%fp0		# FP0 IS U = (Y-F)/F
-	fmul.x		LOGOF2(%pc),%fp1	# GET K*LOG2 WHILE FP0 IS NOT READY
-	fmov.x		%fp0,%fp2
-	fmul.x		%fp2,%fp2		# FP2 IS V=U*U
-	fmov.x		%fp1,KLOG2(%a6)		# PUT K*LOG2 IN MEMEORY, FREE FP1
-
-#--LOG(1+U) IS APPROXIMATED BY
-#--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
-#--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
-
-	fmov.x		%fp2,%fp3
-	fmov.x		%fp2,%fp1
-
-	fmul.d		LOGA6(%pc),%fp1		# V*A6
-	fmul.d		LOGA5(%pc),%fp2		# V*A5
-
-	fadd.d		LOGA4(%pc),%fp1		# A4+V*A6
-	fadd.d		LOGA3(%pc),%fp2		# A3+V*A5
-
-	fmul.x		%fp3,%fp1		# V*(A4+V*A6)
-	fmul.x		%fp3,%fp2		# V*(A3+V*A5)
-
-	fadd.d		LOGA2(%pc),%fp1		# A2+V*(A4+V*A6)
-	fadd.d		LOGA1(%pc),%fp2		# A1+V*(A3+V*A5)
-
-	fmul.x		%fp3,%fp1		# V*(A2+V*(A4+V*A6))
-	add.l		&16,%a0			# ADDRESS OF LOG(F)
-	fmul.x		%fp3,%fp2		# V*(A1+V*(A3+V*A5))
-
-	fmul.x		%fp0,%fp1		# U*V*(A2+V*(A4+V*A6))
-	fadd.x		%fp2,%fp0		# U+V*(A1+V*(A3+V*A5))
-
-	fadd.x		(%a0),%fp1		# LOG(F)+U*V*(A2+V*(A4+V*A6))
-	fmovm.x		(%sp)+,&0x30		# RESTORE FP2-3
-	fadd.x		%fp1,%fp0		# FP0 IS LOG(F) + LOG(1+U)
-
-	fmov.l		%d0,%fpcr
-	fadd.x		KLOG2(%a6),%fp0		# FINAL ADD
-	bra		t_inx2
-
-
-LOGNEAR1:
-
-# if the input is exactly equal to one, then exit through ld_pzero.
-# if these 2 lines weren't here, the correct answer would be returned
-# but the INEX2 bit would be set.
-	fcmp.b		%fp0,&0x1		# is it equal to one?
-	fbeq.l		ld_pzero		# yes
-
-#--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
-	fmov.x		%fp0,%fp1
-	fsub.s		one(%pc),%fp1		# FP1 IS X-1
-	fadd.s		one(%pc),%fp0		# FP0 IS X+1
-	fadd.x		%fp1,%fp1		# FP1 IS 2(X-1)
-#--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
-#--IN U, U = 2(X-1)/(X+1) = FP1/FP0
-
-LP1CONT2:
-#--THIS IS AN RE-ENTRY POINT FOR LOGNP1
-	fdiv.x		%fp0,%fp1		# FP1 IS U
-	fmovm.x		&0xc,-(%sp)		# SAVE FP2-3
-#--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
-#--LET V=U*U, W=V*V, CALCULATE
-#--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
-#--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
-	fmov.x		%fp1,%fp0
-	fmul.x		%fp0,%fp0		# FP0 IS V
-	fmov.x		%fp1,SAVEU(%a6)		# STORE U IN MEMORY, FREE FP1
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# FP1 IS W
-
-	fmov.d		LOGB5(%pc),%fp3
-	fmov.d		LOGB4(%pc),%fp2
-
-	fmul.x		%fp1,%fp3		# W*B5
-	fmul.x		%fp1,%fp2		# W*B4
-
-	fadd.d		LOGB3(%pc),%fp3		# B3+W*B5
-	fadd.d		LOGB2(%pc),%fp2		# B2+W*B4
-
-	fmul.x		%fp3,%fp1		# W*(B3+W*B5), FP3 RELEASED
-
-	fmul.x		%fp0,%fp2		# V*(B2+W*B4)
-
-	fadd.d		LOGB1(%pc),%fp1		# B1+W*(B3+W*B5)
-	fmul.x		SAVEU(%a6),%fp0		# FP0 IS U*V
-
-	fadd.x		%fp2,%fp1		# B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
-	fmovm.x		(%sp)+,&0x30		# FP2-3 RESTORED
-
-	fmul.x		%fp1,%fp0		# U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
-
-	fmov.l		%d0,%fpcr
-	fadd.x		SAVEU(%a6),%fp0
-	bra		t_inx2
-
-#--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
-LOGNEG:
-	bra		t_operr
-
-	global		slognd
-slognd:
-#--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
-
-	mov.l		&-100,ADJK(%a6)		# INPUT = 2^(ADJK) * FP0
-
-#----normalize the input value by left shifting k bits (k to be determined
-#----below), adjusting exponent and storing -k to  ADJK
-#----the value TWOTO100 is no longer needed.
-#----Note that this code assumes the denormalized input is NON-ZERO.
-
-	movm.l		&0x3f00,-(%sp)		# save some registers  {d2-d7}
-	mov.l		(%a0),%d3		# D3 is exponent of smallest norm. #
-	mov.l		4(%a0),%d4
-	mov.l		8(%a0),%d5		# (D4,D5) is (Hi_X,Lo_X)
-	clr.l		%d2			# D2 used for holding K
-
-	tst.l		%d4
-	bne.b		Hi_not0
-
-Hi_0:
-	mov.l		%d5,%d4
-	clr.l		%d5
-	mov.l		&32,%d2
-	clr.l		%d6
-	bfffo		%d4{&0:&32},%d6
-	lsl.l		%d6,%d4
-	add.l		%d6,%d2			# (D3,D4,D5) is normalized
-
-	mov.l		%d3,X(%a6)
-	mov.l		%d4,XFRAC(%a6)
-	mov.l		%d5,XFRAC+4(%a6)
-	neg.l		%d2
-	mov.l		%d2,ADJK(%a6)
-	fmov.x		X(%a6),%fp0
-	movm.l		(%sp)+,&0xfc		# restore registers {d2-d7}
-	lea		X(%a6),%a0
-	bra.w		LOGBGN			# begin regular log(X)
-
-Hi_not0:
-	clr.l		%d6
-	bfffo		%d4{&0:&32},%d6		# find first 1
-	mov.l		%d6,%d2			# get k
-	lsl.l		%d6,%d4
-	mov.l		%d5,%d7			# a copy of D5
-	lsl.l		%d6,%d5
-	neg.l		%d6
-	add.l		&32,%d6
-	lsr.l		%d6,%d7
-	or.l		%d7,%d4			# (D3,D4,D5) normalized
-
-	mov.l		%d3,X(%a6)
-	mov.l		%d4,XFRAC(%a6)
-	mov.l		%d5,XFRAC+4(%a6)
-	neg.l		%d2
-	mov.l		%d2,ADJK(%a6)
-	fmov.x		X(%a6),%fp0
-	movm.l		(%sp)+,&0xfc		# restore registers {d2-d7}
-	lea		X(%a6),%a0
-	bra.w		LOGBGN			# begin regular log(X)
-
-	global		slognp1
-#--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
-slognp1:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-	fabs.x		%fp0			# test magnitude
-	fcmp.x		%fp0,LTHOLD(%pc)	# compare with min threshold
-	fbgt.w		LP1REAL			# if greater, continue
-	fmov.l		%d0,%fpcr
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		(%a0),%fp0		# return signed argument
-	bra		t_catch
-
-LP1REAL:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-	mov.l		&0x00000000,ADJK(%a6)
-	fmov.x		%fp0,%fp1		# FP1 IS INPUT Z
-	fadd.s		one(%pc),%fp0		# X := ROUND(1+Z)
-	fmov.x		%fp0,X(%a6)
-	mov.w		XFRAC(%a6),XDCARE(%a6)
-	mov.l		X(%a6),%d1
-	cmp.l		%d1,&0
-	ble.w		LP1NEG0			# LOG OF ZERO OR -VE
-	cmp.l		%d1,&0x3ffe8000		# IS BOUNDS [1/2,3/2]?
-	blt.w		LOGMAIN
-	cmp.l		%d1,&0x3fffc000
-	bgt.w		LOGMAIN
-#--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
-#--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
-#--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
-
-LP1NEAR1:
-#--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
-	cmp.l		%d1,&0x3ffef07d
-	blt.w		LP1CARE
-	cmp.l		%d1,&0x3fff8841
-	bgt.w		LP1CARE
-
-LP1ONE16:
-#--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
-#--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
-	fadd.x		%fp1,%fp1		# FP1 IS 2Z
-	fadd.s		one(%pc),%fp0		# FP0 IS 1+X
-#--U = FP1/FP0
-	bra.w		LP1CONT2
-
-LP1CARE:
-#--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
-#--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
-#--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
-#--THERE ARE ONLY TWO CASES.
-#--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
-#--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
-#--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
-#--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
-
-	mov.l		XFRAC(%a6),FFRAC(%a6)
-	and.l		&0xFE000000,FFRAC(%a6)
-	or.l		&0x01000000,FFRAC(%a6)	# F OBTAINED
-	cmp.l		%d1,&0x3FFF8000		# SEE IF 1+Z > 1
-	bge.b		KISZERO
-
-KISNEG1:
-	fmov.s		TWO(%pc),%fp0
-	mov.l		&0x3fff0000,F(%a6)
-	clr.l		F+8(%a6)
-	fsub.x		F(%a6),%fp0		# 2-F
-	mov.l		FFRAC(%a6),%d1
-	and.l		&0x7E000000,%d1
-	asr.l		&8,%d1
-	asr.l		&8,%d1
-	asr.l		&4,%d1			# D0 CONTAINS DISPLACEMENT FOR 1/F
-	fadd.x		%fp1,%fp1		# GET 2Z
-	fmovm.x		&0xc,-(%sp)		# SAVE FP2  {%fp2/%fp3}
-	fadd.x		%fp1,%fp0		# FP0 IS Y-F = (2-F)+2Z
-	lea		LOGTBL(%pc),%a0		# A0 IS ADDRESS OF 1/F
-	add.l		%d1,%a0
-	fmov.s		negone(%pc),%fp1	# FP1 IS K = -1
-	bra.w		LP1CONT1
-
-KISZERO:
-	fmov.s		one(%pc),%fp0
-	mov.l		&0x3fff0000,F(%a6)
-	clr.l		F+8(%a6)
-	fsub.x		F(%a6),%fp0		# 1-F
-	mov.l		FFRAC(%a6),%d1
-	and.l		&0x7E000000,%d1
-	asr.l		&8,%d1
-	asr.l		&8,%d1
-	asr.l		&4,%d1
-	fadd.x		%fp1,%fp0		# FP0 IS Y-F
-	fmovm.x		&0xc,-(%sp)		# FP2 SAVED {%fp2/%fp3}
-	lea		LOGTBL(%pc),%a0
-	add.l		%d1,%a0			# A0 IS ADDRESS OF 1/F
-	fmov.s		zero(%pc),%fp1		# FP1 IS K = 0
-	bra.w		LP1CONT1
-
-LP1NEG0:
-#--FPCR SAVED. D0 IS X IN COMPACT FORM.
-	cmp.l		%d1,&0
-	blt.b		LP1NEG
-LP1ZERO:
-	fmov.s		negone(%pc),%fp0
-
-	fmov.l		%d0,%fpcr
-	bra		t_dz
-
-LP1NEG:
-	fmov.s		zero(%pc),%fp0
-
-	fmov.l		%d0,%fpcr
-	bra		t_operr
-
-	global		slognp1d
-#--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
-# Simply return the denorm
-slognp1d:
-	bra		t_extdnrm
-
-#########################################################################
-# satanh():  computes the inverse hyperbolic tangent of a norm input	#
-# satanhd(): computes the inverse hyperbolic tangent of a denorm input	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = arctanh(X)						#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 3 ulps in	64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#	ATANH								#
-#	1. If |X| >= 1, go to 3.					#
-#									#
-#	2. (|X| < 1) Calculate atanh(X) by				#
-#		sgn := sign(X)						#
-#		y := |X|						#
-#		z := 2y/(1-y)						#
-#		atanh(X) := sgn * (1/2) * logp1(z)			#
-#		Exit.							#
-#									#
-#	3. If |X| > 1, go to 5.						#
-#									#
-#	4. (|X| = 1) Generate infinity with an appropriate sign and	#
-#		divide-by-zero by					#
-#		sgn := sign(X)						#
-#		atan(X) := sgn / (+0).					#
-#		Exit.							#
-#									#
-#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
-#		Exit.							#
-#									#
-#########################################################################
-
-	global		satanh
-satanh:
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	and.l		&0x7FFFFFFF,%d1
-	cmp.l		%d1,&0x3FFF8000
-	bge.b		ATANHBIG
-
-#--THIS IS THE USUAL CASE, |X| < 1
-#--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
-
-	fabs.x		(%a0),%fp0		# Y = |X|
-	fmov.x		%fp0,%fp1
-	fneg.x		%fp1			# -Y
-	fadd.x		%fp0,%fp0		# 2Y
-	fadd.s		&0x3F800000,%fp1	# 1-Y
-	fdiv.x		%fp1,%fp0		# 2Y/(1-Y)
-	mov.l		(%a0),%d1
-	and.l		&0x80000000,%d1
-	or.l		&0x3F000000,%d1		# SIGN(X)*HALF
-	mov.l		%d1,-(%sp)
-
-	mov.l		%d0,-(%sp)		# save rnd prec,mode
-	clr.l		%d0			# pass ext prec,RN
-	fmovm.x		&0x01,-(%sp)		# save Z on stack
-	lea		(%sp),%a0		# pass ptr to Z
-	bsr		slognp1			# LOG1P(Z)
-	add.l		&0xc,%sp		# clear Z from stack
-
-	mov.l		(%sp)+,%d0		# fetch old prec,mode
-	fmov.l		%d0,%fpcr		# load it
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.s		(%sp)+,%fp0
-	bra		t_catch
-
-ATANHBIG:
-	fabs.x		(%a0),%fp0		# |X|
-	fcmp.s		%fp0,&0x3F800000
-	fbgt		t_operr
-	bra		t_dz
-
-	global		satanhd
-#--ATANH(X) = X FOR DENORMALIZED X
-satanhd:
-	bra		t_extdnrm
-
-#########################################################################
-# slog10():  computes the base-10 logarithm of a normalized input	#
-# slog10d(): computes the base-10 logarithm of a denormalized input	#
-# slog2():   computes the base-2 logarithm of a normalized input	#
-# slog2d():  computes the base-2 logarithm of a denormalized input	#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = log_10(X) or log_2(X)					#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 1.7 ulps in 64 significant bit,	#
-#	i.e. within 0.5003 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#       slog10d:							#
-#									#
-#       Step 0.	If X < 0, create a NaN and raise the invalid operation	#
-#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
-#       Notes:  Default means round-to-nearest mode, no floating-point	#
-#               traps, and precision control = double extended.		#
-#									#
-#       Step 1. Call slognd to obtain Y = log(X), the natural log of X.	#
-#       Notes:  Even if X is denormalized, log(X) is always normalized.	#
-#									#
-#       Step 2.  Compute log_10(X) = log(X) * (1/log(10)).		#
-#            2.1 Restore the user FPCR					#
-#            2.2 Return ans := Y * INV_L10.				#
-#									#
-#       slog10:								#
-#									#
-#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
-#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
-#       Notes:  Default means round-to-nearest mode, no floating-point	#
-#               traps, and precision control = double extended.		#
-#									#
-#       Step 1. Call sLogN to obtain Y = log(X), the natural log of X.	#
-#									#
-#       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).		#
-#            2.1  Restore the user FPCR					#
-#            2.2  Return ans := Y * INV_L10.				#
-#									#
-#       sLog2d:								#
-#									#
-#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
-#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
-#       Notes:  Default means round-to-nearest mode, no floating-point	#
-#               traps, and precision control = double extended.		#
-#									#
-#       Step 1. Call slognd to obtain Y = log(X), the natural log of X.	#
-#       Notes:  Even if X is denormalized, log(X) is always normalized.	#
-#									#
-#       Step 2.   Compute log_10(X) = log(X) * (1/log(2)).		#
-#            2.1  Restore the user FPCR					#
-#            2.2  Return ans := Y * INV_L2.				#
-#									#
-#       sLog2:								#
-#									#
-#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
-#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
-#       Notes:  Default means round-to-nearest mode, no floating-point	#
-#               traps, and precision control = double extended.		#
-#									#
-#       Step 1. If X is not an integer power of two, i.e., X != 2^k,	#
-#               go to Step 3.						#
-#									#
-#       Step 2.   Return k.						#
-#            2.1  Get integer k, X = 2^k.				#
-#            2.2  Restore the user FPCR.				#
-#            2.3  Return ans := convert-to-double-extended(k).		#
-#									#
-#       Step 3. Call sLogN to obtain Y = log(X), the natural log of X.	#
-#									#
-#       Step 4.   Compute log_2(X) = log(X) * (1/log(2)).		#
-#            4.1  Restore the user FPCR					#
-#            4.2  Return ans := Y * INV_L2.				#
-#									#
-#########################################################################
-
-INV_L10:
-	long		0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
-
-INV_L2:
-	long		0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
-
-	global		slog10
-#--entry point for Log10(X), X is normalized
-slog10:
-	fmov.b		&0x1,%fp0
-	fcmp.x		%fp0,(%a0)		# if operand == 1,
-	fbeq.l		ld_pzero		# return an EXACT zero
-
-	mov.l		(%a0),%d1
-	blt.w		invalid
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	bsr		slogn			# log(X), X normal.
-	fmov.l		(%sp)+,%fpcr
-	fmul.x		INV_L10(%pc),%fp0
-	bra		t_inx2
-
-	global		slog10d
-#--entry point for Log10(X), X is denormalized
-slog10d:
-	mov.l		(%a0),%d1
-	blt.w		invalid
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	bsr		slognd			# log(X), X denorm.
-	fmov.l		(%sp)+,%fpcr
-	fmul.x		INV_L10(%pc),%fp0
-	bra		t_minx2
-
-	global		slog2
-#--entry point for Log2(X), X is normalized
-slog2:
-	mov.l		(%a0),%d1
-	blt.w		invalid
-
-	mov.l		8(%a0),%d1
-	bne.b		continue		# X is not 2^k
-
-	mov.l		4(%a0),%d1
-	and.l		&0x7FFFFFFF,%d1
-	bne.b		continue
-
-#--X = 2^k.
-	mov.w		(%a0),%d1
-	and.l		&0x00007FFF,%d1
-	sub.l		&0x3FFF,%d1
-	beq.l		ld_pzero
-	fmov.l		%d0,%fpcr
-	fmov.l		%d1,%fp0
-	bra		t_inx2
-
-continue:
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	bsr		slogn			# log(X), X normal.
-	fmov.l		(%sp)+,%fpcr
-	fmul.x		INV_L2(%pc),%fp0
-	bra		t_inx2
-
-invalid:
-	bra		t_operr
-
-	global		slog2d
-#--entry point for Log2(X), X is denormalized
-slog2d:
-	mov.l		(%a0),%d1
-	blt.w		invalid
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	bsr		slognd			# log(X), X denorm.
-	fmov.l		(%sp)+,%fpcr
-	fmul.x		INV_L2(%pc),%fp0
-	bra		t_minx2
-
-#########################################################################
-# stwotox():  computes 2**X for a normalized input			#
-# stwotoxd(): computes 2**X for a denormalized input			#
-# stentox():  computes 10**X for a normalized input			#
-# stentoxd(): computes 10**X for a denormalized input			#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = 2**X or 10**X						#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 2 ulps in 64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#	twotox								#
-#	1. If |X| > 16480, go to ExpBig.				#
-#									#
-#	2. If |X| < 2**(-70), go to ExpSm.				#
-#									#
-#	3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore	#
-#		decompose N as						#
-#		 N = 64(M + M') + j,  j = 0,1,2,...,63.			#
-#									#
-#	4. Overwrite r := r * log2. Then				#
-#		2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).		#
-#		Go to expr to compute that expression.			#
-#									#
-#	tentox								#
-#	1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.	#
-#									#
-#	2. If |X| < 2**(-70), go to ExpSm.				#
-#									#
-#	3. Set y := X*log_2(10)*64 (base 2 log of 10). Set		#
-#		N := round-to-int(y). Decompose N as			#
-#		 N = 64(M + M') + j,  j = 0,1,2,...,63.			#
-#									#
-#	4. Define r as							#
-#		r := ((X - N*L1)-N*L2) * L10				#
-#		where L1, L2 are the leading and trailing parts of	#
-#		log_10(2)/64 and L10 is the natural log of 10. Then	#
-#		10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).		#
-#		Go to expr to compute that expression.			#
-#									#
-#	expr								#
-#	1. Fetch 2**(j/64) from table as Fact1 and Fact2.		#
-#									#
-#	2. Overwrite Fact1 and Fact2 by					#
-#		Fact1 := 2**(M) * Fact1					#
-#		Fact2 := 2**(M) * Fact2					#
-#		Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).		#
-#									#
-#	3. Calculate P where 1 + P approximates exp(r):			#
-#		P = r + r*r*(A1+r*(A2+...+r*A5)).			#
-#									#
-#	4. Let AdjFact := 2**(M'). Return				#
-#		AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).		#
-#		Exit.							#
-#									#
-#	ExpBig								#
-#	1. Generate overflow by Huge * Huge if X > 0; otherwise,	#
-#	        generate underflow by Tiny * Tiny.			#
-#									#
-#	ExpSm								#
-#	1. Return 1 + X.						#
-#									#
-#########################################################################
-
-L2TEN64:
-	long		0x406A934F,0x0979A371	# 64LOG10/LOG2
-L10TWO1:
-	long		0x3F734413,0x509F8000	# LOG2/64LOG10
-
-L10TWO2:
-	long		0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
-
-LOG10:	long		0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
-
-LOG2:	long		0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
-
-EXPA5:	long		0x3F56C16D,0x6F7BD0B2
-EXPA4:	long		0x3F811112,0x302C712C
-EXPA3:	long		0x3FA55555,0x55554CC1
-EXPA2:	long		0x3FC55555,0x55554A54
-EXPA1:	long		0x3FE00000,0x00000000,0x00000000,0x00000000
-
-TEXPTBL:
-	long		0x3FFF0000,0x80000000,0x00000000,0x3F738000
-	long		0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
-	long		0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
-	long		0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
-	long		0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
-	long		0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
-	long		0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
-	long		0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
-	long		0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
-	long		0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
-	long		0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
-	long		0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
-	long		0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
-	long		0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
-	long		0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
-	long		0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
-	long		0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
-	long		0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
-	long		0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
-	long		0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
-	long		0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
-	long		0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
-	long		0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
-	long		0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
-	long		0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
-	long		0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
-	long		0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
-	long		0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
-	long		0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
-	long		0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
-	long		0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
-	long		0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
-	long		0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
-	long		0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
-	long		0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
-	long		0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
-	long		0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
-	long		0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
-	long		0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
-	long		0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
-	long		0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
-	long		0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
-	long		0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
-	long		0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
-	long		0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
-	long		0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
-	long		0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
-	long		0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
-	long		0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
-	long		0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
-	long		0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
-	long		0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
-	long		0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
-	long		0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
-	long		0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
-	long		0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
-	long		0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
-	long		0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
-	long		0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
-	long		0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
-	long		0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
-	long		0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
-	long		0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
-	long		0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
-
-	set		INT,L_SCR1
-
-	set		X,FP_SCR0
-	set		XDCARE,X+2
-	set		XFRAC,X+4
-
-	set		ADJFACT,FP_SCR0
-
-	set		FACT1,FP_SCR0
-	set		FACT1HI,FACT1+4
-	set		FACT1LOW,FACT1+8
-
-	set		FACT2,FP_SCR1
-	set		FACT2HI,FACT2+4
-	set		FACT2LOW,FACT2+8
-
-	global		stwotox
-#--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
-stwotox:
-	fmovm.x		(%a0),&0x80		# LOAD INPUT
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	fmov.x		%fp0,X(%a6)
-	and.l		&0x7FFFFFFF,%d1
-
-	cmp.l		%d1,&0x3FB98000		# |X| >= 2**(-70)?
-	bge.b		TWOOK1
-	bra.w		EXPBORS
-
-TWOOK1:
-	cmp.l		%d1,&0x400D80C0		# |X| > 16480?
-	ble.b		TWOMAIN
-	bra.w		EXPBORS
-
-TWOMAIN:
-#--USUAL CASE, 2^(-70) <= |X| <= 16480
-
-	fmov.x		%fp0,%fp1
-	fmul.s		&0x42800000,%fp1	# 64 * X
-	fmov.l		%fp1,INT(%a6)		# N = ROUND-TO-INT(64 X)
-	mov.l		%d2,-(%sp)
-	lea		TEXPTBL(%pc),%a1	# LOAD ADDRESS OF TABLE OF 2^(J/64)
-	fmov.l		INT(%a6),%fp1		# N --> FLOATING FMT
-	mov.l		INT(%a6),%d1
-	mov.l		%d1,%d2
-	and.l		&0x3F,%d1		# D0 IS J
-	asl.l		&4,%d1			# DISPLACEMENT FOR 2^(J/64)
-	add.l		%d1,%a1			# ADDRESS FOR 2^(J/64)
-	asr.l		&6,%d2			# d2 IS L, N = 64L + J
-	mov.l		%d2,%d1
-	asr.l		&1,%d1			# D0 IS M
-	sub.l		%d1,%d2			# d2 IS M', N = 64(M+M') + J
-	add.l		&0x3FFF,%d2
-
-#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
-#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
-#--ADJFACT = 2^(M').
-#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
-
-	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
-
-	fmul.s		&0x3C800000,%fp1	# (1/64)*N
-	mov.l		(%a1)+,FACT1(%a6)
-	mov.l		(%a1)+,FACT1HI(%a6)
-	mov.l		(%a1)+,FACT1LOW(%a6)
-	mov.w		(%a1)+,FACT2(%a6)
-
-	fsub.x		%fp1,%fp0		# X - (1/64)*INT(64 X)
-
-	mov.w		(%a1)+,FACT2HI(%a6)
-	clr.w		FACT2HI+2(%a6)
-	clr.l		FACT2LOW(%a6)
-	add.w		%d1,FACT1(%a6)
-	fmul.x		LOG2(%pc),%fp0		# FP0 IS R
-	add.w		%d1,FACT2(%a6)
-
-	bra.w		expr
-
-EXPBORS:
-#--FPCR, D0 SAVED
-	cmp.l		%d1,&0x3FFF8000
-	bgt.b		TEXPBIG
-
-#--|X| IS SMALL, RETURN 1 + X
-
-	fmov.l		%d0,%fpcr		# restore users round prec,mode
-	fadd.s		&0x3F800000,%fp0	# RETURN 1 + X
-	bra		t_pinx2
-
-TEXPBIG:
-#--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
-#--REGISTERS SAVE SO FAR ARE FPCR AND  D0
-	mov.l		X(%a6),%d1
-	cmp.l		%d1,&0
-	blt.b		EXPNEG
-
-	bra		t_ovfl2			# t_ovfl expects positive value
-
-EXPNEG:
-	bra		t_unfl2			# t_unfl expects positive value
-
-	global		stwotoxd
-stwotoxd:
-#--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
-
-	fmov.l		%d0,%fpcr		# set user's rounding mode/precision
-	fmov.s		&0x3F800000,%fp0	# RETURN 1 + X
-	mov.l		(%a0),%d1
-	or.l		&0x00800001,%d1
-	fadd.s		%d1,%fp0
-	bra		t_pinx2
-
-	global		stentox
-#--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
-stentox:
-	fmovm.x		(%a0),&0x80		# LOAD INPUT
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	fmov.x		%fp0,X(%a6)
-	and.l		&0x7FFFFFFF,%d1
-
-	cmp.l		%d1,&0x3FB98000		# |X| >= 2**(-70)?
-	bge.b		TENOK1
-	bra.w		EXPBORS
-
-TENOK1:
-	cmp.l		%d1,&0x400B9B07		# |X| <= 16480*log2/log10 ?
-	ble.b		TENMAIN
-	bra.w		EXPBORS
-
-TENMAIN:
-#--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
-
-	fmov.x		%fp0,%fp1
-	fmul.d		L2TEN64(%pc),%fp1	# X*64*LOG10/LOG2
-	fmov.l		%fp1,INT(%a6)		# N=INT(X*64*LOG10/LOG2)
-	mov.l		%d2,-(%sp)
-	lea		TEXPTBL(%pc),%a1	# LOAD ADDRESS OF TABLE OF 2^(J/64)
-	fmov.l		INT(%a6),%fp1		# N --> FLOATING FMT
-	mov.l		INT(%a6),%d1
-	mov.l		%d1,%d2
-	and.l		&0x3F,%d1		# D0 IS J
-	asl.l		&4,%d1			# DISPLACEMENT FOR 2^(J/64)
-	add.l		%d1,%a1			# ADDRESS FOR 2^(J/64)
-	asr.l		&6,%d2			# d2 IS L, N = 64L + J
-	mov.l		%d2,%d1
-	asr.l		&1,%d1			# D0 IS M
-	sub.l		%d1,%d2			# d2 IS M', N = 64(M+M') + J
-	add.l		&0x3FFF,%d2
-
-#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
-#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
-#--ADJFACT = 2^(M').
-#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
-	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
-
-	fmov.x		%fp1,%fp2
-
-	fmul.d		L10TWO1(%pc),%fp1	# N*(LOG2/64LOG10)_LEAD
-	mov.l		(%a1)+,FACT1(%a6)
-
-	fmul.x		L10TWO2(%pc),%fp2	# N*(LOG2/64LOG10)_TRAIL
-
-	mov.l		(%a1)+,FACT1HI(%a6)
-	mov.l		(%a1)+,FACT1LOW(%a6)
-	fsub.x		%fp1,%fp0		# X - N L_LEAD
-	mov.w		(%a1)+,FACT2(%a6)
-
-	fsub.x		%fp2,%fp0		# X - N L_TRAIL
-
-	mov.w		(%a1)+,FACT2HI(%a6)
-	clr.w		FACT2HI+2(%a6)
-	clr.l		FACT2LOW(%a6)
-
-	fmul.x		LOG10(%pc),%fp0		# FP0 IS R
-	add.w		%d1,FACT1(%a6)
-	add.w		%d1,FACT2(%a6)
-
-expr:
-#--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
-#--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
-#--FP0 IS R. THE FOLLOWING CODE COMPUTES
-#--	2**(M'+M) * 2**(J/64) * EXP(R)
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# FP1 IS S = R*R
-
-	fmov.d		EXPA5(%pc),%fp2		# FP2 IS A5
-	fmov.d		EXPA4(%pc),%fp3		# FP3 IS A4
-
-	fmul.x		%fp1,%fp2		# FP2 IS S*A5
-	fmul.x		%fp1,%fp3		# FP3 IS S*A4
-
-	fadd.d		EXPA3(%pc),%fp2		# FP2 IS A3+S*A5
-	fadd.d		EXPA2(%pc),%fp3		# FP3 IS A2+S*A4
-
-	fmul.x		%fp1,%fp2		# FP2 IS S*(A3+S*A5)
-	fmul.x		%fp1,%fp3		# FP3 IS S*(A2+S*A4)
-
-	fadd.d		EXPA1(%pc),%fp2		# FP2 IS A1+S*(A3+S*A5)
-	fmul.x		%fp0,%fp3		# FP3 IS R*S*(A2+S*A4)
-
-	fmul.x		%fp1,%fp2		# FP2 IS S*(A1+S*(A3+S*A5))
-	fadd.x		%fp3,%fp0		# FP0 IS R+R*S*(A2+S*A4)
-	fadd.x		%fp2,%fp0		# FP0 IS EXP(R) - 1
-
-	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
-
-#--FINAL RECONSTRUCTION PROCESS
-#--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
-
-	fmul.x		FACT1(%a6),%fp0
-	fadd.x		FACT2(%a6),%fp0
-	fadd.x		FACT1(%a6),%fp0
-
-	fmov.l		%d0,%fpcr		# restore users round prec,mode
-	mov.w		%d2,ADJFACT(%a6)	# INSERT EXPONENT
-	mov.l		(%sp)+,%d2
-	mov.l		&0x80000000,ADJFACT+4(%a6)
-	clr.l		ADJFACT+8(%a6)
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.x		ADJFACT(%a6),%fp0	# FINAL ADJUSTMENT
-	bra		t_catch
-
-	global		stentoxd
-stentoxd:
-#--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
-
-	fmov.l		%d0,%fpcr		# set user's rounding mode/precision
-	fmov.s		&0x3F800000,%fp0	# RETURN 1 + X
-	mov.l		(%a0),%d1
-	or.l		&0x00800001,%d1
-	fadd.s		%d1,%fp0
-	bra		t_pinx2
-
-#########################################################################
-# sscale(): computes the destination operand scaled by the source	#
-#	    operand. If the absoulute value of the source operand is	#
-#	    >= 2^14, an overflow or underflow is returned.		#
-#									#
-# INPUT *************************************************************** #
-#	a0  = pointer to double-extended source operand X		#
-#	a1  = pointer to double-extended destination operand Y		#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 =  scale(X,Y)						#
-#									#
-#########################################################################
-
-set	SIGN,		L_SCR1
-
-	global		sscale
-sscale:
-	mov.l		%d0,-(%sp)		# store off ctrl bits for now
-
-	mov.w		DST_EX(%a1),%d1		# get dst exponent
-	smi.b		SIGN(%a6)		# use SIGN to hold dst sign
-	andi.l		&0x00007fff,%d1		# strip sign from dst exp
-
-	mov.w		SRC_EX(%a0),%d0		# check src bounds
-	andi.w		&0x7fff,%d0		# clr src sign bit
-	cmpi.w		%d0,&0x3fff		# is src ~ ZERO?
-	blt.w		src_small		# yes
-	cmpi.w		%d0,&0x400c		# no; is src too big?
-	bgt.w		src_out			# yes
-
-#
-# Source is within 2^14 range.
-#
-src_ok:
-	fintrz.x	SRC(%a0),%fp0		# calc int of src
-	fmov.l		%fp0,%d0		# int src to d0
-# don't want any accrued bits from the fintrz showing up later since
-# we may need to read the fpsr for the last fp op in t_catch2().
-	fmov.l		&0x0,%fpsr
-
-	tst.b		DST_HI(%a1)		# is dst denormalized?
-	bmi.b		sok_norm
-
-# the dst is a DENORM. normalize the DENORM and add the adjustment to
-# the src value. then, jump to the norm part of the routine.
-sok_dnrm:
-	mov.l		%d0,-(%sp)		# save src for now
-
-	mov.w		DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
-	mov.l		DST_HI(%a1),FP_SCR0_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
-
-	lea		FP_SCR0(%a6),%a0	# pass ptr to DENORM
-	bsr.l		norm			# normalize the DENORM
-	neg.l		%d0
-	add.l		(%sp)+,%d0		# add adjustment to src
-
-	fmovm.x		FP_SCR0(%a6),&0x80	# load normalized DENORM
-
-	cmpi.w		%d0,&-0x3fff		# is the shft amt really low?
-	bge.b		sok_norm2		# thank goodness no
-
-# the multiply factor that we're trying to create should be a denorm
-# for the multiply to work. therefore, we're going to actually do a
-# multiply with a denorm which will cause an unimplemented data type
-# exception to be put into the machine which will be caught and corrected
-# later. we don't do this with the DENORMs above because this method
-# is slower. but, don't fret, I don't see it being used much either.
-	fmov.l		(%sp)+,%fpcr		# restore user fpcr
-	mov.l		&0x80000000,%d1		# load normalized mantissa
-	subi.l		&-0x3fff,%d0		# how many should we shift?
-	neg.l		%d0			# make it positive
-	cmpi.b		%d0,&0x20		# is it > 32?
-	bge.b		sok_dnrm_32		# yes
-	lsr.l		%d0,%d1			# no; bit stays in upper lw
-	clr.l		-(%sp)			# insert zero low mantissa
-	mov.l		%d1,-(%sp)		# insert new high mantissa
-	clr.l		-(%sp)			# make zero exponent
-	bra.b		sok_norm_cont
-sok_dnrm_32:
-	subi.b		&0x20,%d0		# get shift count
-	lsr.l		%d0,%d1			# make low mantissa longword
-	mov.l		%d1,-(%sp)		# insert new low mantissa
-	clr.l		-(%sp)			# insert zero high mantissa
-	clr.l		-(%sp)			# make zero exponent
-	bra.b		sok_norm_cont
-
-# the src will force the dst to a DENORM value or worse. so, let's
-# create an fp multiply that will create the result.
-sok_norm:
-	fmovm.x		DST(%a1),&0x80		# load fp0 with normalized src
-sok_norm2:
-	fmov.l		(%sp)+,%fpcr		# restore user fpcr
-
-	addi.w		&0x3fff,%d0		# turn src amt into exp value
-	swap		%d0			# put exponent in high word
-	clr.l		-(%sp)			# insert new exponent
-	mov.l		&0x80000000,-(%sp)	# insert new high mantissa
-	mov.l		%d0,-(%sp)		# insert new lo mantissa
-
-sok_norm_cont:
-	fmov.l		%fpcr,%d0		# d0 needs fpcr for t_catch2
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.x		(%sp)+,%fp0		# do the multiply
-	bra		t_catch2		# catch any exceptions
-
-#
-# Source is outside of 2^14 range.  Test the sign and branch
-# to the appropriate exception handler.
-#
-src_out:
-	mov.l		(%sp)+,%d0		# restore ctrl bits
-	exg		%a0,%a1			# swap src,dst ptrs
-	tst.b		SRC_EX(%a1)		# is src negative?
-	bmi		t_unfl			# yes; underflow
-	bra		t_ovfl_sc		# no; overflow
-
-#
-# The source input is below 1, so we check for denormalized numbers
-# and set unfl.
-#
-src_small:
-	tst.b		DST_HI(%a1)		# is dst denormalized?
-	bpl.b		ssmall_done		# yes
-
-	mov.l		(%sp)+,%d0
-	fmov.l		%d0,%fpcr		# no; load control bits
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		DST(%a1),%fp0		# simply return dest
-	bra		t_catch2
-ssmall_done:
-	mov.l		(%sp)+,%d0		# load control bits into d1
-	mov.l		%a1,%a0			# pass ptr to dst
-	bra		t_resdnrm
-
-#########################################################################
-# smod(): computes the fp MOD of the input values X,Y.			#
-# srem(): computes the fp (IEEE) REM of the input values X,Y.		#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer to extended precision input X			#
-#	a1 = pointer to extended precision input Y			#
-#	d0 = round precision,mode					#
-#									#
-#	The input operands X and Y can be either normalized or		#
-#	denormalized.							#
-#									#
-# OUTPUT ************************************************************** #
-#      fp0 = FREM(X,Y) or FMOD(X,Y)					#
-#									#
-# ALGORITHM *********************************************************** #
-#									#
-#       Step 1.  Save and strip signs of X and Y: signX := sign(X),	#
-#                signY := sign(Y), X := |X|, Y := |Y|,			#
-#                signQ := signX EOR signY. Record whether MOD or REM	#
-#                is requested.						#
-#									#
-#       Step 2.  Set L := expo(X)-expo(Y), k := 0, Q := 0.		#
-#                If (L < 0) then					#
-#                   R := X, go to Step 4.				#
-#                else							#
-#                   R := 2^(-L)X, j := L.				#
-#                endif							#
-#									#
-#       Step 3.  Perform MOD(X,Y)					#
-#            3.1 If R = Y, go to Step 9.				#
-#            3.2 If R > Y, then { R := R - Y, Q := Q + 1}		#
-#            3.3 If j = 0, go to Step 4.				#
-#            3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to	#
-#                Step 3.1.						#
-#									#
-#       Step 4.  At this point, R = X - QY = MOD(X,Y). Set		#
-#                Last_Subtract := false (used in Step 7 below). If	#
-#                MOD is requested, go to Step 6.			#
-#									#
-#       Step 5.  R = MOD(X,Y), but REM(X,Y) is requested.		#
-#            5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to	#
-#                Step 6.						#
-#            5.2 If R > Y/2, then { set Last_Subtract := true,		#
-#                Q := Q + 1, Y := signY*Y }. Go to Step 6.		#
-#            5.3 This is the tricky case of R = Y/2. If Q is odd,	#
-#                then { Q := Q + 1, signX := -signX }.			#
-#									#
-#       Step 6.  R := signX*R.						#
-#									#
-#       Step 7.  If Last_Subtract = true, R := R - Y.			#
-#									#
-#       Step 8.  Return signQ, last 7 bits of Q, and R as required.	#
-#									#
-#       Step 9.  At this point, R = 2^(-j)*X - Q Y = Y. Thus,		#
-#                X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),		#
-#                R := 0. Return signQ, last 7 bits of Q, and R.		#
-#									#
-#########################################################################
-
-	set		Mod_Flag,L_SCR3
-	set		Sc_Flag,L_SCR3+1
-
-	set		SignY,L_SCR2
-	set		SignX,L_SCR2+2
-	set		SignQ,L_SCR3+2
-
-	set		Y,FP_SCR0
-	set		Y_Hi,Y+4
-	set		Y_Lo,Y+8
-
-	set		R,FP_SCR1
-	set		R_Hi,R+4
-	set		R_Lo,R+8
-
-Scale:
-	long		0x00010000,0x80000000,0x00000000,0x00000000
-
-	global		smod
-smod:
-	clr.b		FPSR_QBYTE(%a6)
-	mov.l		%d0,-(%sp)		# save ctrl bits
-	clr.b		Mod_Flag(%a6)
-	bra.b		Mod_Rem
-
-	global		srem
-srem:
-	clr.b		FPSR_QBYTE(%a6)
-	mov.l		%d0,-(%sp)		# save ctrl bits
-	mov.b		&0x1,Mod_Flag(%a6)
-
-Mod_Rem:
-#..Save sign of X and Y
-	movm.l		&0x3f00,-(%sp)		# save data registers
-	mov.w		SRC_EX(%a0),%d3
-	mov.w		%d3,SignY(%a6)
-	and.l		&0x00007FFF,%d3		# Y := |Y|
-
-#
-	mov.l		SRC_HI(%a0),%d4
-	mov.l		SRC_LO(%a0),%d5		# (D3,D4,D5) is |Y|
-
-	tst.l		%d3
-	bne.b		Y_Normal
-
-	mov.l		&0x00003FFE,%d3		# $3FFD + 1
-	tst.l		%d4
-	bne.b		HiY_not0
-
-HiY_0:
-	mov.l		%d5,%d4
-	clr.l		%d5
-	sub.l		&32,%d3
-	clr.l		%d6
-	bfffo		%d4{&0:&32},%d6
-	lsl.l		%d6,%d4
-	sub.l		%d6,%d3			# (D3,D4,D5) is normalized
-#	                                        ...with bias $7FFD
-	bra.b		Chk_X
-
-HiY_not0:
-	clr.l		%d6
-	bfffo		%d4{&0:&32},%d6
-	sub.l		%d6,%d3
-	lsl.l		%d6,%d4
-	mov.l		%d5,%d7			# a copy of D5
-	lsl.l		%d6,%d5
-	neg.l		%d6
-	add.l		&32,%d6
-	lsr.l		%d6,%d7
-	or.l		%d7,%d4			# (D3,D4,D5) normalized
-#                                       ...with bias $7FFD
-	bra.b		Chk_X
-
-Y_Normal:
-	add.l		&0x00003FFE,%d3		# (D3,D4,D5) normalized
-#                                       ...with bias $7FFD
-
-Chk_X:
-	mov.w		DST_EX(%a1),%d0
-	mov.w		%d0,SignX(%a6)
-	mov.w		SignY(%a6),%d1
-	eor.l		%d0,%d1
-	and.l		&0x00008000,%d1
-	mov.w		%d1,SignQ(%a6)		# sign(Q) obtained
-	and.l		&0x00007FFF,%d0
-	mov.l		DST_HI(%a1),%d1
-	mov.l		DST_LO(%a1),%d2		# (D0,D1,D2) is |X|
-	tst.l		%d0
-	bne.b		X_Normal
-	mov.l		&0x00003FFE,%d0
-	tst.l		%d1
-	bne.b		HiX_not0
-
-HiX_0:
-	mov.l		%d2,%d1
-	clr.l		%d2
-	sub.l		&32,%d0
-	clr.l		%d6
-	bfffo		%d1{&0:&32},%d6
-	lsl.l		%d6,%d1
-	sub.l		%d6,%d0			# (D0,D1,D2) is normalized
-#                                       ...with bias $7FFD
-	bra.b		Init
-
-HiX_not0:
-	clr.l		%d6
-	bfffo		%d1{&0:&32},%d6
-	sub.l		%d6,%d0
-	lsl.l		%d6,%d1
-	mov.l		%d2,%d7			# a copy of D2
-	lsl.l		%d6,%d2
-	neg.l		%d6
-	add.l		&32,%d6
-	lsr.l		%d6,%d7
-	or.l		%d7,%d1			# (D0,D1,D2) normalized
-#                                       ...with bias $7FFD
-	bra.b		Init
-
-X_Normal:
-	add.l		&0x00003FFE,%d0		# (D0,D1,D2) normalized
-#                                       ...with bias $7FFD
-
-Init:
-#
-	mov.l		%d3,L_SCR1(%a6)		# save biased exp(Y)
-	mov.l		%d0,-(%sp)		# save biased exp(X)
-	sub.l		%d3,%d0			# L := expo(X)-expo(Y)
-
-	clr.l		%d6			# D6 := carry <- 0
-	clr.l		%d3			# D3 is Q
-	mov.l		&0,%a1			# A1 is k; j+k=L, Q=0
-
-#..(Carry,D1,D2) is R
-	tst.l		%d0
-	bge.b		Mod_Loop_pre
-
-#..expo(X) < expo(Y). Thus X = mod(X,Y)
-#
-	mov.l		(%sp)+,%d0		# restore d0
-	bra.w		Get_Mod
-
-Mod_Loop_pre:
-	addq.l		&0x4,%sp		# erase exp(X)
-#..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
-Mod_Loop:
-	tst.l		%d6			# test carry bit
-	bgt.b		R_GT_Y
-
-#..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
-	cmp.l		%d1,%d4			# compare hi(R) and hi(Y)
-	bne.b		R_NE_Y
-	cmp.l		%d2,%d5			# compare lo(R) and lo(Y)
-	bne.b		R_NE_Y
-
-#..At this point, R = Y
-	bra.w		Rem_is_0
-
-R_NE_Y:
-#..use the borrow of the previous compare
-	bcs.b		R_LT_Y			# borrow is set iff R < Y
-
-R_GT_Y:
-#..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
-#..and Y < (D1,D2) < 2Y. Either way, perform R - Y
-	sub.l		%d5,%d2			# lo(R) - lo(Y)
-	subx.l		%d4,%d1			# hi(R) - hi(Y)
-	clr.l		%d6			# clear carry
-	addq.l		&1,%d3			# Q := Q + 1
-
-R_LT_Y:
-#..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
-	tst.l		%d0			# see if j = 0.
-	beq.b		PostLoop
-
-	add.l		%d3,%d3			# Q := 2Q
-	add.l		%d2,%d2			# lo(R) = 2lo(R)
-	roxl.l		&1,%d1			# hi(R) = 2hi(R) + carry
-	scs		%d6			# set Carry if 2(R) overflows
-	addq.l		&1,%a1			# k := k+1
-	subq.l		&1,%d0			# j := j - 1
-#..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
-
-	bra.b		Mod_Loop
-
-PostLoop:
-#..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
-
-#..normalize R.
-	mov.l		L_SCR1(%a6),%d0		# new biased expo of R
-	tst.l		%d1
-	bne.b		HiR_not0
-
-HiR_0:
-	mov.l		%d2,%d1
-	clr.l		%d2
-	sub.l		&32,%d0
-	clr.l		%d6
-	bfffo		%d1{&0:&32},%d6
-	lsl.l		%d6,%d1
-	sub.l		%d6,%d0			# (D0,D1,D2) is normalized
-#                                       ...with bias $7FFD
-	bra.b		Get_Mod
-
-HiR_not0:
-	clr.l		%d6
-	bfffo		%d1{&0:&32},%d6
-	bmi.b		Get_Mod			# already normalized
-	sub.l		%d6,%d0
-	lsl.l		%d6,%d1
-	mov.l		%d2,%d7			# a copy of D2
-	lsl.l		%d6,%d2
-	neg.l		%d6
-	add.l		&32,%d6
-	lsr.l		%d6,%d7
-	or.l		%d7,%d1			# (D0,D1,D2) normalized
-
-#
-Get_Mod:
-	cmp.l		%d0,&0x000041FE
-	bge.b		No_Scale
-Do_Scale:
-	mov.w		%d0,R(%a6)
-	mov.l		%d1,R_Hi(%a6)
-	mov.l		%d2,R_Lo(%a6)
-	mov.l		L_SCR1(%a6),%d6
-	mov.w		%d6,Y(%a6)
-	mov.l		%d4,Y_Hi(%a6)
-	mov.l		%d5,Y_Lo(%a6)
-	fmov.x		R(%a6),%fp0		# no exception
-	mov.b		&1,Sc_Flag(%a6)
-	bra.b		ModOrRem
-No_Scale:
-	mov.l		%d1,R_Hi(%a6)
-	mov.l		%d2,R_Lo(%a6)
-	sub.l		&0x3FFE,%d0
-	mov.w		%d0,R(%a6)
-	mov.l		L_SCR1(%a6),%d6
-	sub.l		&0x3FFE,%d6
-	mov.l		%d6,L_SCR1(%a6)
-	fmov.x		R(%a6),%fp0
-	mov.w		%d6,Y(%a6)
-	mov.l		%d4,Y_Hi(%a6)
-	mov.l		%d5,Y_Lo(%a6)
-	clr.b		Sc_Flag(%a6)
-
-#
-ModOrRem:
-	tst.b		Mod_Flag(%a6)
-	beq.b		Fix_Sign
-
-	mov.l		L_SCR1(%a6),%d6		# new biased expo(Y)
-	subq.l		&1,%d6			# biased expo(Y/2)
-	cmp.l		%d0,%d6
-	blt.b		Fix_Sign
-	bgt.b		Last_Sub
-
-	cmp.l		%d1,%d4
-	bne.b		Not_EQ
-	cmp.l		%d2,%d5
-	bne.b		Not_EQ
-	bra.w		Tie_Case
-
-Not_EQ:
-	bcs.b		Fix_Sign
-
-Last_Sub:
-#
-	fsub.x		Y(%a6),%fp0		# no exceptions
-	addq.l		&1,%d3			# Q := Q + 1
-
-#
-Fix_Sign:
-#..Get sign of X
-	mov.w		SignX(%a6),%d6
-	bge.b		Get_Q
-	fneg.x		%fp0
-
-#..Get Q
-#
-Get_Q:
-	clr.l		%d6
-	mov.w		SignQ(%a6),%d6		# D6 is sign(Q)
-	mov.l		&8,%d7
-	lsr.l		%d7,%d6
-	and.l		&0x0000007F,%d3		# 7 bits of Q
-	or.l		%d6,%d3			# sign and bits of Q
-#	swap		%d3
-#	fmov.l		%fpsr,%d6
-#	and.l		&0xFF00FFFF,%d6
-#	or.l		%d3,%d6
-#	fmov.l		%d6,%fpsr		# put Q in fpsr
-	mov.b		%d3,FPSR_QBYTE(%a6)	# put Q in fpsr
-
-#
-Restore:
-	movm.l		(%sp)+,&0xfc		#  {%d2-%d7}
-	mov.l		(%sp)+,%d0
-	fmov.l		%d0,%fpcr
-	tst.b		Sc_Flag(%a6)
-	beq.b		Finish
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.x		Scale(%pc),%fp0		# may cause underflow
-	bra		t_catch2
-# the '040 package did this apparently to see if the dst operand for the
-# preceding fmul was a denorm. but, it better not have been since the
-# algorithm just got done playing with fp0 and expected no exceptions
-# as a result. trust me...
-#	bra		t_avoid_unsupp		# check for denorm as a
-#						;result of the scaling
-
-Finish:
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		%fp0,%fp0		# capture exceptions & round
-	bra		t_catch2
-
-Rem_is_0:
-#..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
-	addq.l		&1,%d3
-	cmp.l		%d0,&8			# D0 is j
-	bge.b		Q_Big
-
-	lsl.l		%d0,%d3
-	bra.b		Set_R_0
-
-Q_Big:
-	clr.l		%d3
-
-Set_R_0:
-	fmov.s		&0x00000000,%fp0
-	clr.b		Sc_Flag(%a6)
-	bra.w		Fix_Sign
-
-Tie_Case:
-#..Check parity of Q
-	mov.l		%d3,%d6
-	and.l		&0x00000001,%d6
-	tst.l		%d6
-	beq.w		Fix_Sign		# Q is even
-
-#..Q is odd, Q := Q + 1, signX := -signX
-	addq.l		&1,%d3
-	mov.w		SignX(%a6),%d6
-	eor.l		&0x00008000,%d6
-	mov.w		%d6,SignX(%a6)
-	bra.w		Fix_Sign
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	tag(): return the optype of the input ext fp number		#
-#									#
-#	This routine is used by the 060FPLSP.				#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision operand			#
-#									#
-# OUTPUT **************************************************************	#
-#	d0 = value of type tag						#
-#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Simply test the exponent, j-bit, and mantissa values to		#
-# determine the type of operand.					#
-#	If it's an unnormalized zero, alter the operand and force it	#
-# to be a normal zero.							#
-#									#
-#########################################################################
-
-	global		tag
-tag:
-	mov.w		FTEMP_EX(%a0), %d0	# extract exponent
-	andi.w		&0x7fff, %d0		# strip off sign
-	cmpi.w		%d0, &0x7fff		# is (EXP == MAX)?
-	beq.b		inf_or_nan_x
-not_inf_or_nan_x:
-	btst		&0x7,FTEMP_HI(%a0)
-	beq.b		not_norm_x
-is_norm_x:
-	mov.b		&NORM, %d0
-	rts
-not_norm_x:
-	tst.w		%d0			# is exponent = 0?
-	bne.b		is_unnorm_x
-not_unnorm_x:
-	tst.l		FTEMP_HI(%a0)
-	bne.b		is_denorm_x
-	tst.l		FTEMP_LO(%a0)
-	bne.b		is_denorm_x
-is_zero_x:
-	mov.b		&ZERO, %d0
-	rts
-is_denorm_x:
-	mov.b		&DENORM, %d0
-	rts
-is_unnorm_x:
-	bsr.l		unnorm_fix		# convert to norm,denorm,or zero
-	rts
-is_unnorm_reg_x:
-	mov.b		&UNNORM, %d0
-	rts
-inf_or_nan_x:
-	tst.l		FTEMP_LO(%a0)
-	bne.b		is_nan_x
-	mov.l		FTEMP_HI(%a0), %d0
-	and.l		&0x7fffffff, %d0	# msb is a don't care!
-	bne.b		is_nan_x
-is_inf_x:
-	mov.b		&INF, %d0
-	rts
-is_nan_x:
-	mov.b		&QNAN, %d0
-	rts
-
-#############################################################
-
-qnan:	long		0x7fff0000, 0xffffffff, 0xffffffff
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	t_dz(): Handle 060FPLSP dz exception for "flogn" emulation.	#
-#	t_dz2(): Handle 060FPLSP dz exception for "fatanh" emulation.	#
-#									#
-#	These rouitnes are used by the 060FPLSP package.		#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand.		#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = default DZ result.					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Transcendental emulation for the 060FPLSP has detected that	#
-# a DZ exception should occur for the instruction. If DZ is disabled,	#
-# return the default result.						#
-#	If DZ is enabled, the dst operand should be returned unscathed	#
-# in fp0 while fp1 is used to create a DZ exception so that the		#
-# operating system can log that such an event occurred.			#
-#									#
-#########################################################################
-
-	global		t_dz
-t_dz:
-	tst.b		SRC_EX(%a0)		# check sign for neg or pos
-	bpl.b		dz_pinf			# branch if pos sign
-
-	global		t_dz2
-t_dz2:
-	ori.l		&dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
-
-	btst		&dz_bit,FPCR_ENABLE(%a6)
-	bne.b		dz_minf_ena
-
-# dz is disabled. return a -INF.
-	fmov.s		&0xff800000,%fp0	# return -INF
-	rts
-
-# dz is enabled. create a dz exception so the user can record it
-# but use fp1 instead. return the dst operand unscathed in fp0.
-dz_minf_ena:
-	fmovm.x		EXC_FP0(%a6),&0x80	# return fp0 unscathed
-	fmov.l		USER_FPCR(%a6),%fpcr
-	fmov.s		&0xbf800000,%fp1	# load -1
-	fdiv.s		&0x00000000,%fp1	# -1 / 0
-	rts
-
-dz_pinf:
-	ori.l		&dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
-
-	btst		&dz_bit,FPCR_ENABLE(%a6)
-	bne.b		dz_pinf_ena
-
-# dz is disabled. return a +INF.
-	fmov.s		&0x7f800000,%fp0	# return +INF
-	rts
-
-# dz is enabled. create a dz exception so the user can record it
-# but use fp1 instead. return the dst operand unscathed in fp0.
-dz_pinf_ena:
-	fmovm.x		EXC_FP0(%a6),&0x80	# return fp0 unscathed
-	fmov.l		USER_FPCR(%a6),%fpcr
-	fmov.s		&0x3f800000,%fp1	# load +1
-	fdiv.s		&0x00000000,%fp1	# +1 / 0
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	t_operr(): Handle 060FPLSP OPERR exception during emulation.	#
-#									#
-#	This routine is used by the 060FPLSP package.			#
-#									#
-# XREF ****************************************************************	#
-#	None.								#
-#									#
-# INPUT ***************************************************************	#
-#	fp1 = source operand						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = default result						#
-#	fp1 = unchanged							#
-#									#
-# ALGORITHM ***********************************************************	#
-#	An operand error should occur as the result of transcendental	#
-# emulation in the 060FPLSP. If OPERR is disabled, just return a NAN	#
-# in fp0. If OPERR is enabled, return the dst operand unscathed in fp0	#
-# and the source operand in fp1. Use fp2 to create an OPERR exception	#
-# so that the operating system can log the event.			#
-#									#
-#########################################################################
-
-	global		t_operr
-t_operr:
-	ori.l		&opnan_mask,USER_FPSR(%a6) # set NAN/OPERR/AIOP
-
-	btst		&operr_bit,FPCR_ENABLE(%a6)
-	bne.b		operr_ena
-
-# operr is disabled. return a QNAN in fp0
-	fmovm.x		qnan(%pc),&0x80		# return QNAN
-	rts
-
-# operr is enabled. create an operr exception so the user can record it
-# but use fp2 instead. return the dst operand unscathed in fp0.
-operr_ena:
-	fmovm.x		EXC_FP0(%a6),&0x80	# return fp0 unscathed
-	fmov.l		USER_FPCR(%a6),%fpcr
-	fmovm.x		&0x04,-(%sp)		# save fp2
-	fmov.s		&0x7f800000,%fp2	# load +INF
-	fmul.s		&0x00000000,%fp2	# +INF x 0
-	fmovm.x		(%sp)+,&0x20		# restore fp2
-	rts
-
-pls_huge:
-	long		0x7ffe0000,0xffffffff,0xffffffff
-mns_huge:
-	long		0xfffe0000,0xffffffff,0xffffffff
-pls_tiny:
-	long		0x00000000,0x80000000,0x00000000
-mns_tiny:
-	long		0x80000000,0x80000000,0x00000000
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	t_unfl(): Handle 060FPLSP underflow exception during emulation.	#
-#	t_unfl2(): Handle 060FPLSP underflow exception during		#
-#	           emulation. result always positive.			#
-#									#
-#	This routine is used by the 060FPLSP package.			#
-#									#
-# XREF ****************************************************************	#
-#	None.								#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = default underflow result					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	An underflow should occur as the result of transcendental	#
-# emulation in the 060FPLSP. Create an underflow by using "fmul"	#
-# and two very small numbers of appropriate sign so the operating	#
-# system can log the event.						#
-#									#
-#########################################################################
-
-	global		t_unfl
-t_unfl:
-	tst.b		SRC_EX(%a0)
-	bpl.b		unf_pos
-
-	global		t_unfl2
-t_unfl2:
-	ori.l		&unfinx_mask+neg_mask,USER_FPSR(%a6) # set N/UNFL/INEX2/AUNFL/AINEX
-
-	fmov.l		USER_FPCR(%a6),%fpcr
-	fmovm.x		mns_tiny(%pc),&0x80
-	fmul.x		pls_tiny(%pc),%fp0
-
-	fmov.l		%fpsr,%d0
-	rol.l		&0x8,%d0
-	mov.b		%d0,FPSR_CC(%a6)
-	rts
-unf_pos:
-	ori.w		&unfinx_mask,FPSR_EXCEPT(%a6) # set UNFL/INEX2/AUNFL/AINEX
-
-	fmov.l		USER_FPCR(%a6),%fpcr
-	fmovm.x		pls_tiny(%pc),&0x80
-	fmul.x		%fp0,%fp0
-
-	fmov.l		%fpsr,%d0
-	rol.l		&0x8,%d0
-	mov.b		%d0,FPSR_CC(%a6)
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	t_ovfl(): Handle 060FPLSP overflow exception during emulation.	#
-#		  (monadic)						#
-#	t_ovfl2(): Handle 060FPLSP overflow exception during		#
-#	           emulation. result always positive. (dyadic)		#
-#	t_ovfl_sc(): Handle 060FPLSP overflow exception during		#
-#	             emulation for "fscale".				#
-#									#
-#	This routine is used by the 060FPLSP package.			#
-#									#
-# XREF ****************************************************************	#
-#	None.								#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = default underflow result					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	An overflow should occur as the result of transcendental	#
-# emulation in the 060FPLSP. Create an overflow by using "fmul"		#
-# and two very lareg numbers of appropriate sign so the operating	#
-# system can log the event.						#
-#	For t_ovfl_sc() we take special care not to lose the INEX2 bit.	#
-#									#
-#########################################################################
-
-	global		t_ovfl_sc
-t_ovfl_sc:
-	ori.l		&ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
-
-	mov.b		%d0,%d1			# fetch rnd prec,mode
-	andi.b		&0xc0,%d1		# extract prec
-	beq.w		ovfl_work
-
-# dst op is a DENORM. we have to normalize the mantissa to see if the
-# result would be inexact for the given precision. make a copy of the
-# dst so we don't screw up the version passed to us.
-	mov.w		LOCAL_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		LOCAL_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		LOCAL_LO(%a0),FP_SCR0_LO(%a6)
-	lea		FP_SCR0(%a6),%a0	# pass ptr to FP_SCR0
-	movm.l		&0xc080,-(%sp)		# save d0-d1/a0
-	bsr.l		norm			# normalize mantissa
-	movm.l		(%sp)+,&0x0103		# restore d0-d1/a0
-
-	cmpi.b		%d1,&0x40		# is precision sgl?
-	bne.b		ovfl_sc_dbl		# no; dbl
-ovfl_sc_sgl:
-	tst.l		LOCAL_LO(%a0)		# is lo lw of sgl set?
-	bne.b		ovfl_sc_inx		# yes
-	tst.b		3+LOCAL_HI(%a0)		# is lo byte of hi lw set?
-	bne.b		ovfl_sc_inx		# yes
-	bra.w		ovfl_work		# don't set INEX2
-ovfl_sc_dbl:
-	mov.l		LOCAL_LO(%a0),%d1	# are any of lo 11 bits of
-	andi.l		&0x7ff,%d1		# dbl mantissa set?
-	beq.w		ovfl_work		# no; don't set INEX2
-ovfl_sc_inx:
-	ori.l		&inex2_mask,USER_FPSR(%a6) # set INEX2
-	bra.b		ovfl_work		# continue
-
-	global		t_ovfl
-t_ovfl:
-	ori.w		&ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX
-ovfl_work:
-	tst.b		SRC_EX(%a0)
-	bpl.b		ovfl_p
-ovfl_m:
-	fmov.l		USER_FPCR(%a6),%fpcr
-	fmovm.x		mns_huge(%pc),&0x80
-	fmul.x		pls_huge(%pc),%fp0
-
-	fmov.l		%fpsr,%d0
-	rol.l		&0x8,%d0
-	ori.b		&neg_mask,%d0
-	mov.b		%d0,FPSR_CC(%a6)
-	rts
-ovfl_p:
-	fmov.l		USER_FPCR(%a6),%fpcr
-	fmovm.x		pls_huge(%pc),&0x80
-	fmul.x		pls_huge(%pc),%fp0
-
-	fmov.l		%fpsr,%d0
-	rol.l		&0x8,%d0
-	mov.b		%d0,FPSR_CC(%a6)
-	rts
-
-	global		t_ovfl2
-t_ovfl2:
-	ori.w		&ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX
-	fmov.l		USER_FPCR(%a6),%fpcr
-	fmovm.x		pls_huge(%pc),&0x80
-	fmul.x		pls_huge(%pc),%fp0
-
-	fmov.l		%fpsr,%d0
-	rol.l		&0x8,%d0
-	mov.b		%d0,FPSR_CC(%a6)
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	t_catch(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during	#
-#		   emulation.						#
-#	t_catch2(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during	#
-#		    emulation.						#
-#									#
-#	These routines are used by the 060FPLSP package.		#
-#									#
-# XREF ****************************************************************	#
-#	None.								#
-#									#
-# INPUT ***************************************************************	#
-#	fp0 = default underflow or overflow result			#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = default result						#
-#									#
-# ALGORITHM ***********************************************************	#
-#	If an overflow or underflow occurred during the last		#
-# instruction of transcendental 060FPLSP emulation, then it has already	#
-# occurred and has been logged. Now we need to see if an inexact	#
-# exception should occur.						#
-#									#
-#########################################################################
-
-	global		t_catch2
-t_catch2:
-	fmov.l		%fpsr,%d0
-	or.l		%d0,USER_FPSR(%a6)
-	bra.b		inx2_work
-
-	global		t_catch
-t_catch:
-	fmov.l		%fpsr,%d0
-	or.l		%d0,USER_FPSR(%a6)
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	t_inx2(): Handle inexact 060FPLSP exception during emulation.	#
-#	t_pinx2(): Handle inexact 060FPLSP exception for "+" results.	#
-#	t_minx2(): Handle inexact 060FPLSP exception for "-" results.	#
-#									#
-# XREF ****************************************************************	#
-#	None.								#
-#									#
-# INPUT ***************************************************************	#
-#	fp0 = default result						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = default result						#
-#									#
-# ALGORITHM ***********************************************************	#
-#	The last instruction of transcendental emulation for the	#
-# 060FPLSP should be inexact. So, if inexact is enabled, then we create	#
-# the event here by adding a large and very small number together	#
-# so that the operating system can log the event.			#
-#	Must check, too, if the result was zero, in which case we just	#
-# set the FPSR bits and return.						#
-#									#
-#########################################################################
-
-	global		t_inx2
-t_inx2:
-	fblt.w		t_minx2
-	fbeq.w		inx2_zero
-
-	global		t_pinx2
-t_pinx2:
-	ori.w		&inx2a_mask,FPSR_EXCEPT(%a6) # set INEX2/AINEX
-	bra.b		inx2_work
-
-	global		t_minx2
-t_minx2:
-	ori.l		&inx2a_mask+neg_mask,USER_FPSR(%a6)
-
-inx2_work:
-	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
-	bne.b		inx2_work_ena		# yes
-	rts
-inx2_work_ena:
-	fmov.l		USER_FPCR(%a6),%fpcr	# insert user's exceptions
-	fmov.s		&0x3f800000,%fp1	# load +1
-	fadd.x		pls_tiny(%pc),%fp1	# cause exception
-	rts
-
-inx2_zero:
-	mov.b		&z_bmask,FPSR_CC(%a6)
-	ori.w		&inx2a_mask,2+USER_FPSR(%a6) # set INEX/AINEX
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	t_extdnrm(): Handle DENORM inputs in 060FPLSP.			#
-#	t_resdnrm(): Handle DENORM inputs in 060FPLSP for "fscale".	#
-#									#
-#	This routine is used by the 060FPLSP package.			#
-#									#
-# XREF ****************************************************************	#
-#	None.								#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision input operand		#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = default result						#
-#									#
-# ALGORITHM ***********************************************************	#
-#	For all functions that have a denormalized input and that	#
-# f(x)=x, this is the entry point.					#
-#	DENORM value is moved using "fmove" which triggers an exception	#
-# if enabled so the operating system can log the event.			#
-#									#
-#########################################################################
-
-	global		t_extdnrm
-t_extdnrm:
-	fmov.l		USER_FPCR(%a6),%fpcr
-	fmov.x		SRC_EX(%a0),%fp0
-	fmov.l		%fpsr,%d0
-	ori.l		&unfinx_mask,%d0
-	or.l		%d0,USER_FPSR(%a6)
-	rts
-
-	global		t_resdnrm
-t_resdnrm:
-	fmov.l		USER_FPCR(%a6),%fpcr
-	fmov.x		SRC_EX(%a0),%fp0
-	fmov.l		%fpsr,%d0
-	or.l		%d0,USER_FPSR(%a6)
-	rts
-
-##########################################
-
-#
-# sto_cos:
-#	This is used by fsincos library emulation. The correct
-# values are already in fp0 and fp1 so we do nothing here.
-#
-	global		sto_cos
-sto_cos:
-	rts
-
-##########################################
-
-#
-#	dst_qnan --- force result when destination is a NaN
-#
-	global		dst_qnan
-dst_qnan:
-	fmov.x		DST(%a1),%fp0
-	tst.b		DST_EX(%a1)
-	bmi.b		dst_qnan_m
-dst_qnan_p:
-	mov.b		&nan_bmask,FPSR_CC(%a6)
-	rts
-dst_qnan_m:
-	mov.b		&nan_bmask+neg_bmask,FPSR_CC(%a6)
-	rts
-
-#
-#	src_qnan --- force result when source is a NaN
-#
-	global		src_qnan
-src_qnan:
-	fmov.x		SRC(%a0),%fp0
-	tst.b		SRC_EX(%a0)
-	bmi.b		src_qnan_m
-src_qnan_p:
-	mov.b		&nan_bmask,FPSR_CC(%a6)
-	rts
-src_qnan_m:
-	mov.b		&nan_bmask+neg_bmask,FPSR_CC(%a6)
-	rts
-
-##########################################
-
-#
-#	Native instruction support
-#
-#	Some systems may need entry points even for 68060 native
-#	instructions.  These routines are provided for
-#	convenience.
-#
-	global		_fadds_
-_fadds_:
-	fmov.l		%fpcr,-(%sp)		# save fpcr
-	fmov.l		&0x00000000,%fpcr	# clear fpcr for load
-	fmov.s		0x8(%sp),%fp0		# load sgl dst
-	fmov.l		(%sp)+,%fpcr		# restore fpcr
-	fadd.s		0x8(%sp),%fp0		# fadd w/ sgl src
-	rts
-
-	global		_faddd_
-_faddd_:
-	fmov.l		%fpcr,-(%sp)		# save fpcr
-	fmov.l		&0x00000000,%fpcr	# clear fpcr for load
-	fmov.d		0x8(%sp),%fp0		# load dbl dst
-	fmov.l		(%sp)+,%fpcr		# restore fpcr
-	fadd.d		0xc(%sp),%fp0		# fadd w/ dbl src
-	rts
-
-	global		_faddx_
-_faddx_:
-	fmovm.x		0x4(%sp),&0x80		# load ext dst
-	fadd.x		0x10(%sp),%fp0		# fadd w/ ext src
-	rts
-
-	global		_fsubs_
-_fsubs_:
-	fmov.l		%fpcr,-(%sp)		# save fpcr
-	fmov.l		&0x00000000,%fpcr	# clear fpcr for load
-	fmov.s		0x8(%sp),%fp0		# load sgl dst
-	fmov.l		(%sp)+,%fpcr		# restore fpcr
-	fsub.s		0x8(%sp),%fp0		# fsub w/ sgl src
-	rts
-
-	global		_fsubd_
-_fsubd_:
-	fmov.l		%fpcr,-(%sp)		# save fpcr
-	fmov.l		&0x00000000,%fpcr	# clear fpcr for load
-	fmov.d		0x8(%sp),%fp0		# load dbl dst
-	fmov.l		(%sp)+,%fpcr		# restore fpcr
-	fsub.d		0xc(%sp),%fp0		# fsub w/ dbl src
-	rts
-
-	global		_fsubx_
-_fsubx_:
-	fmovm.x		0x4(%sp),&0x80		# load ext dst
-	fsub.x		0x10(%sp),%fp0		# fsub w/ ext src
-	rts
-
-	global		_fmuls_
-_fmuls_:
-	fmov.l		%fpcr,-(%sp)		# save fpcr
-	fmov.l		&0x00000000,%fpcr	# clear fpcr for load
-	fmov.s		0x8(%sp),%fp0		# load sgl dst
-	fmov.l		(%sp)+,%fpcr		# restore fpcr
-	fmul.s		0x8(%sp),%fp0		# fmul w/ sgl src
-	rts
-
-	global		_fmuld_
-_fmuld_:
-	fmov.l		%fpcr,-(%sp)		# save fpcr
-	fmov.l		&0x00000000,%fpcr	# clear fpcr for load
-	fmov.d		0x8(%sp),%fp0		# load dbl dst
-	fmov.l		(%sp)+,%fpcr		# restore fpcr
-	fmul.d		0xc(%sp),%fp0		# fmul w/ dbl src
-	rts
-
-	global		_fmulx_
-_fmulx_:
-	fmovm.x		0x4(%sp),&0x80		# load ext dst
-	fmul.x		0x10(%sp),%fp0		# fmul w/ ext src
-	rts
-
-	global		_fdivs_
-_fdivs_:
-	fmov.l		%fpcr,-(%sp)		# save fpcr
-	fmov.l		&0x00000000,%fpcr	# clear fpcr for load
-	fmov.s		0x8(%sp),%fp0		# load sgl dst
-	fmov.l		(%sp)+,%fpcr		# restore fpcr
-	fdiv.s		0x8(%sp),%fp0		# fdiv w/ sgl src
-	rts
-
-	global		_fdivd_
-_fdivd_:
-	fmov.l		%fpcr,-(%sp)		# save fpcr
-	fmov.l		&0x00000000,%fpcr	# clear fpcr for load
-	fmov.d		0x8(%sp),%fp0		# load dbl dst
-	fmov.l		(%sp)+,%fpcr		# restore fpcr
-	fdiv.d		0xc(%sp),%fp0		# fdiv w/ dbl src
-	rts
-
-	global		_fdivx_
-_fdivx_:
-	fmovm.x		0x4(%sp),&0x80		# load ext dst
-	fdiv.x		0x10(%sp),%fp0		# fdiv w/ ext src
-	rts
-
-	global		_fabss_
-_fabss_:
-	fabs.s		0x4(%sp),%fp0		# fabs w/ sgl src
-	rts
-
-	global		_fabsd_
-_fabsd_:
-	fabs.d		0x4(%sp),%fp0		# fabs w/ dbl src
-	rts
-
-	global		_fabsx_
-_fabsx_:
-	fabs.x		0x4(%sp),%fp0		# fabs w/ ext src
-	rts
-
-	global		_fnegs_
-_fnegs_:
-	fneg.s		0x4(%sp),%fp0		# fneg w/ sgl src
-	rts
-
-	global		_fnegd_
-_fnegd_:
-	fneg.d		0x4(%sp),%fp0		# fneg w/ dbl src
-	rts
-
-	global		_fnegx_
-_fnegx_:
-	fneg.x		0x4(%sp),%fp0		# fneg w/ ext src
-	rts
-
-	global		_fsqrts_
-_fsqrts_:
-	fsqrt.s		0x4(%sp),%fp0		# fsqrt w/ sgl src
-	rts
-
-	global		_fsqrtd_
-_fsqrtd_:
-	fsqrt.d		0x4(%sp),%fp0		# fsqrt w/ dbl src
-	rts
-
-	global		_fsqrtx_
-_fsqrtx_:
-	fsqrt.x		0x4(%sp),%fp0		# fsqrt w/ ext src
-	rts
-
-	global		_fints_
-_fints_:
-	fint.s		0x4(%sp),%fp0		# fint w/ sgl src
-	rts
-
-	global		_fintd_
-_fintd_:
-	fint.d		0x4(%sp),%fp0		# fint w/ dbl src
-	rts
-
-	global		_fintx_
-_fintx_:
-	fint.x		0x4(%sp),%fp0		# fint w/ ext src
-	rts
-
-	global		_fintrzs_
-_fintrzs_:
-	fintrz.s	0x4(%sp),%fp0		# fintrz w/ sgl src
-	rts
-
-	global		_fintrzd_
-_fintrzd_:
-	fintrz.d	0x4(%sp),%fp0		# fintrx w/ dbl src
-	rts
-
-	global		_fintrzx_
-_fintrzx_:
-	fintrz.x	0x4(%sp),%fp0		# fintrz w/ ext src
-	rts
-
-########################################################################
-
-#########################################################################
-# src_zero(): Return signed zero according to sign of src operand.	#
-#########################################################################
-	global		src_zero
-src_zero:
-	tst.b		SRC_EX(%a0)		# get sign of src operand
-	bmi.b		ld_mzero		# if neg, load neg zero
-
-#
-# ld_pzero(): return a positive zero.
-#
-	global		ld_pzero
-ld_pzero:
-	fmov.s		&0x00000000,%fp0	# load +0
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
-	rts
-
-# ld_mzero(): return a negative zero.
-	global		ld_mzero
-ld_mzero:
-	fmov.s		&0x80000000,%fp0	# load -0
-	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
-	rts
-
-#########################################################################
-# dst_zero(): Return signed zero according to sign of dst operand.	#
-#########################################################################
-	global		dst_zero
-dst_zero:
-	tst.b		DST_EX(%a1)		# get sign of dst operand
-	bmi.b		ld_mzero		# if neg, load neg zero
-	bra.b		ld_pzero		# load positive zero
-
-#########################################################################
-# src_inf(): Return signed inf according to sign of src operand.	#
-#########################################################################
-	global		src_inf
-src_inf:
-	tst.b		SRC_EX(%a0)		# get sign of src operand
-	bmi.b		ld_minf			# if negative branch
-
-#
-# ld_pinf(): return a positive infinity.
-#
-	global		ld_pinf
-ld_pinf:
-	fmov.s		&0x7f800000,%fp0	# load +INF
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'INF' ccode bit
-	rts
-
-#
-# ld_minf():return a negative infinity.
-#
-	global		ld_minf
-ld_minf:
-	fmov.s		&0xff800000,%fp0	# load -INF
-	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
-	rts
-
-#########################################################################
-# dst_inf(): Return signed inf according to sign of dst operand.	#
-#########################################################################
-	global		dst_inf
-dst_inf:
-	tst.b		DST_EX(%a1)		# get sign of dst operand
-	bmi.b		ld_minf			# if negative branch
-	bra.b		ld_pinf
-
-	global		szr_inf
-#################################################################
-# szr_inf(): Return +ZERO for a negative src operand or		#
-#	            +INF for a positive src operand.		#
-#	     Routine used for fetox, ftwotox, and ftentox.	#
-#################################################################
-szr_inf:
-	tst.b		SRC_EX(%a0)		# check sign of source
-	bmi.b		ld_pzero
-	bra.b		ld_pinf
-
-#########################################################################
-# sopr_inf(): Return +INF for a positive src operand or			#
-#	      jump to operand error routine for a negative src operand.	#
-#	      Routine used for flogn, flognp1, flog10, and flog2.	#
-#########################################################################
-	global		sopr_inf
-sopr_inf:
-	tst.b		SRC_EX(%a0)		# check sign of source
-	bmi.w		t_operr
-	bra.b		ld_pinf
-
-#################################################################
-# setoxm1i(): Return minus one for a negative src operand or	#
-#	      positive infinity for a positive src operand.	#
-#	      Routine used for fetoxm1.				#
-#################################################################
-	global		setoxm1i
-setoxm1i:
-	tst.b		SRC_EX(%a0)		# check sign of source
-	bmi.b		ld_mone
-	bra.b		ld_pinf
-
-#########################################################################
-# src_one(): Return signed one according to sign of src operand.	#
-#########################################################################
-	global		src_one
-src_one:
-	tst.b		SRC_EX(%a0)		# check sign of source
-	bmi.b		ld_mone
-
-#
-# ld_pone(): return positive one.
-#
-	global		ld_pone
-ld_pone:
-	fmov.s		&0x3f800000,%fp0	# load +1
-	clr.b		FPSR_CC(%a6)
-	rts
-
-#
-# ld_mone(): return negative one.
-#
-	global		ld_mone
-ld_mone:
-	fmov.s		&0xbf800000,%fp0	# load -1
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
-	rts
-
-ppiby2:	long		0x3fff0000, 0xc90fdaa2, 0x2168c235
-mpiby2:	long		0xbfff0000, 0xc90fdaa2, 0x2168c235
-
-#################################################################
-# spi_2(): Return signed PI/2 according to sign of src operand.	#
-#################################################################
-	global		spi_2
-spi_2:
-	tst.b		SRC_EX(%a0)		# check sign of source
-	bmi.b		ld_mpi2
-
-#
-# ld_ppi2(): return positive PI/2.
-#
-	global		ld_ppi2
-ld_ppi2:
-	fmov.l		%d0,%fpcr
-	fmov.x		ppiby2(%pc),%fp0	# load +pi/2
-	bra.w		t_pinx2			# set INEX2
-
-#
-# ld_mpi2(): return negative PI/2.
-#
-	global		ld_mpi2
-ld_mpi2:
-	fmov.l		%d0,%fpcr
-	fmov.x		mpiby2(%pc),%fp0	# load -pi/2
-	bra.w		t_minx2			# set INEX2
-
-####################################################
-# The following routines give support for fsincos. #
-####################################################
-
-#
-# ssincosz(): When the src operand is ZERO, store a one in the
-#	      cosine register and return a ZERO in fp0 w/ the same sign
-#	      as the src operand.
-#
-	global		ssincosz
-ssincosz:
-	fmov.s		&0x3f800000,%fp1
-	tst.b		SRC_EX(%a0)		# test sign
-	bpl.b		sincoszp
-	fmov.s		&0x80000000,%fp0	# return sin result in fp0
-	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)
-	rts
-sincoszp:
-	fmov.s		&0x00000000,%fp0	# return sin result in fp0
-	mov.b		&z_bmask,FPSR_CC(%a6)
-	rts
-
-#
-# ssincosi(): When the src operand is INF, store a QNAN in the cosine
-#	      register and jump to the operand error routine for negative
-#	      src operands.
-#
-	global		ssincosi
-ssincosi:
-	fmov.x		qnan(%pc),%fp1		# load NAN
-	bra.w		t_operr
-
-#
-# ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
-#		 register and branch to the src QNAN routine.
-#
-	global		ssincosqnan
-ssincosqnan:
-	fmov.x		LOCAL_EX(%a0),%fp1
-	bra.w		src_qnan
-
-########################################################################
-
-	global		smod_sdnrm
-	global		smod_snorm
-smod_sdnrm:
-smod_snorm:
-	mov.b		DTAG(%a6),%d1
-	beq.l		smod
-	cmpi.b		%d1,&ZERO
-	beq.w		smod_zro
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		smod
-	bra.l		dst_qnan
-
-	global		smod_szero
-smod_szero:
-	mov.b		DTAG(%a6),%d1
-	beq.l		t_operr
-	cmpi.b		%d1,&ZERO
-	beq.l		t_operr
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		t_operr
-	bra.l		dst_qnan
-
-	global		smod_sinf
-smod_sinf:
-	mov.b		DTAG(%a6),%d1
-	beq.l		smod_fpn
-	cmpi.b		%d1,&ZERO
-	beq.l		smod_zro
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		smod_fpn
-	bra.l		dst_qnan
-
-smod_zro:
-srem_zro:
-	mov.b		SRC_EX(%a0),%d1		# get src sign
-	mov.b		DST_EX(%a1),%d0		# get dst sign
-	eor.b		%d0,%d1			# get qbyte sign
-	andi.b		&0x80,%d1
-	mov.b		%d1,FPSR_QBYTE(%a6)
-	tst.b		%d0
-	bpl.w		ld_pzero
-	bra.w		ld_mzero
-
-smod_fpn:
-srem_fpn:
-	clr.b		FPSR_QBYTE(%a6)
-	mov.l		%d0,-(%sp)
-	mov.b		SRC_EX(%a0),%d1		# get src sign
-	mov.b		DST_EX(%a1),%d0		# get dst sign
-	eor.b		%d0,%d1			# get qbyte sign
-	andi.b		&0x80,%d1
-	mov.b		%d1,FPSR_QBYTE(%a6)
-	cmpi.b		DTAG(%a6),&DENORM
-	bne.b		smod_nrm
-	lea		DST(%a1),%a0
-	mov.l		(%sp)+,%d0
-	bra		t_resdnrm
-smod_nrm:
-	fmov.l		(%sp)+,%fpcr
-	fmov.x		DST(%a1),%fp0
-	tst.b		DST_EX(%a1)
-	bmi.b		smod_nrm_neg
-	rts
-
-smod_nrm_neg:
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' code
-	rts
-
-#########################################################################
-	global		srem_snorm
-	global		srem_sdnrm
-srem_sdnrm:
-srem_snorm:
-	mov.b		DTAG(%a6),%d1
-	beq.l		srem
-	cmpi.b		%d1,&ZERO
-	beq.w		srem_zro
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		srem
-	bra.l		dst_qnan
-
-	global		srem_szero
-srem_szero:
-	mov.b		DTAG(%a6),%d1
-	beq.l		t_operr
-	cmpi.b		%d1,&ZERO
-	beq.l		t_operr
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		t_operr
-	bra.l		dst_qnan
-
-	global		srem_sinf
-srem_sinf:
-	mov.b		DTAG(%a6),%d1
-	beq.w		srem_fpn
-	cmpi.b		%d1,&ZERO
-	beq.w		srem_zro
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		srem_fpn
-	bra.l		dst_qnan
-
-#########################################################################
-
-	global		sscale_snorm
-	global		sscale_sdnrm
-sscale_snorm:
-sscale_sdnrm:
-	mov.b		DTAG(%a6),%d1
-	beq.l		sscale
-	cmpi.b		%d1,&ZERO
-	beq.l		dst_zero
-	cmpi.b		%d1,&INF
-	beq.l		dst_inf
-	cmpi.b		%d1,&DENORM
-	beq.l		sscale
-	bra.l		dst_qnan
-
-	global		sscale_szero
-sscale_szero:
-	mov.b		DTAG(%a6),%d1
-	beq.l		sscale
-	cmpi.b		%d1,&ZERO
-	beq.l		dst_zero
-	cmpi.b		%d1,&INF
-	beq.l		dst_inf
-	cmpi.b		%d1,&DENORM
-	beq.l		sscale
-	bra.l		dst_qnan
-
-	global		sscale_sinf
-sscale_sinf:
-	mov.b		DTAG(%a6),%d1
-	beq.l		t_operr
-	cmpi.b		%d1,&QNAN
-	beq.l		dst_qnan
-	bra.l		t_operr
-
-########################################################################
-
-	global		sop_sqnan
-sop_sqnan:
-	mov.b		DTAG(%a6),%d1
-	cmpi.b		%d1,&QNAN
-	beq.l		dst_qnan
-	bra.l		src_qnan
-
-#########################################################################
-# norm(): normalize the mantissa of an extended precision input. the	#
-#	  input operand should not be normalized already.		#
-#									#
-# XDEF ****************************************************************	#
-#	norm()								#
-#									#
-# XREF **************************************************************** #
-#	none								#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer fp extended precision operand to normalize		#
-#									#
-# OUTPUT ************************************************************** #
-#	d0 = number of bit positions the mantissa was shifted		#
-#	a0 = the input operand's mantissa is normalized; the exponent	#
-#	     is unchanged.						#
-#									#
-#########################################################################
-	global		norm
-norm:
-	mov.l		%d2, -(%sp)		# create some temp regs
-	mov.l		%d3, -(%sp)
-
-	mov.l		FTEMP_HI(%a0), %d0	# load hi(mantissa)
-	mov.l		FTEMP_LO(%a0), %d1	# load lo(mantissa)
-
-	bfffo		%d0{&0:&32}, %d2	# how many places to shift?
-	beq.b		norm_lo			# hi(man) is all zeroes!
-
-norm_hi:
-	lsl.l		%d2, %d0		# left shift hi(man)
-	bfextu		%d1{&0:%d2}, %d3	# extract lo bits
-
-	or.l		%d3, %d0		# create hi(man)
-	lsl.l		%d2, %d1		# create lo(man)
-
-	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
-	mov.l		%d1, FTEMP_LO(%a0)	# store new lo(man)
-
-	mov.l		%d2, %d0		# return shift amount
-
-	mov.l		(%sp)+, %d3		# restore temp regs
-	mov.l		(%sp)+, %d2
-
-	rts
-
-norm_lo:
-	bfffo		%d1{&0:&32}, %d2	# how many places to shift?
-	lsl.l		%d2, %d1		# shift lo(man)
-	add.l		&32, %d2		# add 32 to shft amount
-
-	mov.l		%d1, FTEMP_HI(%a0)	# store hi(man)
-	clr.l		FTEMP_LO(%a0)		# lo(man) is now zero
-
-	mov.l		%d2, %d0		# return shift amount
-
-	mov.l		(%sp)+, %d3		# restore temp regs
-	mov.l		(%sp)+, %d2
-
-	rts
-
-#########################################################################
-# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO	#
-#		- returns corresponding optype tag			#
-#									#
-# XDEF ****************************************************************	#
-#	unnorm_fix()							#
-#									#
-# XREF **************************************************************** #
-#	norm() - normalize the mantissa					#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer to unnormalized extended precision number		#
-#									#
-# OUTPUT ************************************************************** #
-#	d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO	#
-#	a0 = input operand has been converted to a norm, denorm, or	#
-#	     zero; both the exponent and mantissa are changed.		#
-#									#
-#########################################################################
-
-	global		unnorm_fix
-unnorm_fix:
-	bfffo		FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
-	bne.b		unnorm_shift		# hi(man) is not all zeroes
-
-#
-# hi(man) is all zeroes so see if any bits in lo(man) are set
-#
-unnorm_chk_lo:
-	bfffo		FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
-	beq.w		unnorm_zero		# yes
-
-	add.w		&32, %d0		# no; fix shift distance
-
-#
-# d0 = # shifts needed for complete normalization
-#
-unnorm_shift:
-	clr.l		%d1			# clear top word
-	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
-	and.w		&0x7fff, %d1		# strip off sgn
-
-	cmp.w		%d0, %d1		# will denorm push exp < 0?
-	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
-
-#
-# exponent would not go < 0. therefore, number stays normalized
-#
-	sub.w		%d0, %d1		# shift exponent value
-	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
-	and.w		&0x8000, %d0		# save old sign
-	or.w		%d0, %d1		# {sgn,new exp}
-	mov.w		%d1, FTEMP_EX(%a0)	# insert new exponent
-
-	bsr.l		norm			# normalize UNNORM
-
-	mov.b		&NORM, %d0		# return new optype tag
-	rts
-
-#
-# exponent would go < 0, so only denormalize until exp = 0
-#
-unnorm_nrm_zero:
-	cmp.b		%d1, &32		# is exp <= 32?
-	bgt.b		unnorm_nrm_zero_lrg	# no; go handle large exponent
-
-	bfextu		FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
-	mov.l		%d0, FTEMP_HI(%a0)	# save new hi(man)
-
-	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
-	lsl.l		%d1, %d0		# extract new lo(man)
-	mov.l		%d0, FTEMP_LO(%a0)	# save new lo(man)
-
-	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
-
-	mov.b		&DENORM, %d0		# return new optype tag
-	rts
-
-#
-# only mantissa bits set are in lo(man)
-#
-unnorm_nrm_zero_lrg:
-	sub.w		&32, %d1		# adjust shft amt by 32
-
-	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
-	lsl.l		%d1, %d0		# left shift lo(man)
-
-	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
-	clr.l		FTEMP_LO(%a0)		# lo(man) = 0
-
-	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
-
-	mov.b		&DENORM, %d0		# return new optype tag
-	rts
-
-#
-# whole mantissa is zero so this UNNORM is actually a zero
-#
-unnorm_zero:
-	and.w		&0x8000, FTEMP_EX(%a0)	# force exponent to zero
-
-	mov.b		&ZERO, %d0		# fix optype tag
-	rts
diff --git a/arch/m68k/ifpsp060/src/fpsp.S b/arch/m68k/ifpsp060/src/fpsp.S
deleted file mode 100644
index 9bbffebe3eb504833ed0937670bd4168751d61a4..0000000000000000000000000000000000000000
--- a/arch/m68k/ifpsp060/src/fpsp.S
+++ /dev/null
@@ -1,24785 +0,0 @@
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
-M68000 Hi-Performance Microprocessor Division
-M68060 Software Package
-Production Release P1.00 -- October 10, 1994
-
-M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
-
-THE SOFTWARE is provided on an "AS IS" basis and without warranty.
-To the maximum extent permitted by applicable law,
-MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
-INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
-and any warranty against infringement with regard to the SOFTWARE
-(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
-
-To the maximum extent permitted by applicable law,
-IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
-(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
-BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
-ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
-Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
-
-You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
-so long as this entire notice is retained without alteration in any modified and/or
-redistributed versions, and that such modified versions are clearly identified as such.
-No licenses are granted by implication, estoppel or otherwise under any patents
-or trademarks of Motorola, Inc.
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# freal.s:
-#	This file is appended to the top of the 060FPSP package
-# and contains the entry points into the package. The user, in
-# effect, branches to one of the branch table entries located
-# after _060FPSP_TABLE.
-#	Also, subroutine stubs exist in this file (_fpsp_done for
-# example) that are referenced by the FPSP package itself in order
-# to call a given routine. The stub routine actually performs the
-# callout. The FPSP code does a "bsr" to the stub routine. This
-# extra layer of hierarchy adds a slight performance penalty but
-# it makes the FPSP code easier to read and more mainatinable.
-#
-
-set	_off_bsun,	0x00
-set	_off_snan,	0x04
-set	_off_operr,	0x08
-set	_off_ovfl,	0x0c
-set	_off_unfl,	0x10
-set	_off_dz,	0x14
-set	_off_inex,	0x18
-set	_off_fline,	0x1c
-set	_off_fpu_dis,	0x20
-set	_off_trap,	0x24
-set	_off_trace,	0x28
-set	_off_access,	0x2c
-set	_off_done,	0x30
-
-set	_off_imr,	0x40
-set	_off_dmr,	0x44
-set	_off_dmw,	0x48
-set	_off_irw,	0x4c
-set	_off_irl,	0x50
-set	_off_drb,	0x54
-set	_off_drw,	0x58
-set	_off_drl,	0x5c
-set	_off_dwb,	0x60
-set	_off_dww,	0x64
-set	_off_dwl,	0x68
-
-_060FPSP_TABLE:
-
-###############################################################
-
-# Here's the table of ENTRY POINTS for those linking the package.
-	bra.l		_fpsp_snan
-	short		0x0000
-	bra.l		_fpsp_operr
-	short		0x0000
-	bra.l		_fpsp_ovfl
-	short		0x0000
-	bra.l		_fpsp_unfl
-	short		0x0000
-	bra.l		_fpsp_dz
-	short		0x0000
-	bra.l		_fpsp_inex
-	short		0x0000
-	bra.l		_fpsp_fline
-	short		0x0000
-	bra.l		_fpsp_unsupp
-	short		0x0000
-	bra.l		_fpsp_effadd
-	short		0x0000
-
-	space		56
-
-###############################################################
-	global		_fpsp_done
-_fpsp_done:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_done,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_ovfl
-_real_ovfl:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_unfl
-_real_unfl:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_inex
-_real_inex:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_bsun
-_real_bsun:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_operr
-_real_operr:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_snan
-_real_snan:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_dz
-_real_dz:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_fline
-_real_fline:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_fpu_disabled
-_real_fpu_disabled:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_trap
-_real_trap:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_trace
-_real_trace:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_access
-_real_access:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_access,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-#######################################
-
-	global		_imem_read
-_imem_read:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_read
-_dmem_read:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_write
-_dmem_write:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_imem_read_word
-_imem_read_word:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_imem_read_long
-_imem_read_long:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_read_byte
-_dmem_read_byte:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_read_word
-_dmem_read_word:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_read_long
-_dmem_read_long:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_write_byte
-_dmem_write_byte:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_write_word
-_dmem_write_word:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_write_long
-_dmem_write_long:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-#
-# This file contains a set of define statements for constants
-# in order to promote readability within the corecode itself.
-#
-
-set LOCAL_SIZE,		192			# stack frame size(bytes)
-set LV,			-LOCAL_SIZE		# stack offset
-
-set EXC_SR,		0x4			# stack status register
-set EXC_PC,		0x6			# stack pc
-set EXC_VOFF,		0xa			# stacked vector offset
-set EXC_EA,		0xc			# stacked <ea>
-
-set EXC_FP,		0x0			# frame pointer
-
-set EXC_AREGS,		-68			# offset of all address regs
-set EXC_DREGS,		-100			# offset of all data regs
-set EXC_FPREGS,		-36			# offset of all fp regs
-
-set EXC_A7,		EXC_AREGS+(7*4)		# offset of saved a7
-set OLD_A7,		EXC_AREGS+(6*4)		# extra copy of saved a7
-set EXC_A6,		EXC_AREGS+(6*4)		# offset of saved a6
-set EXC_A5,		EXC_AREGS+(5*4)
-set EXC_A4,		EXC_AREGS+(4*4)
-set EXC_A3,		EXC_AREGS+(3*4)
-set EXC_A2,		EXC_AREGS+(2*4)
-set EXC_A1,		EXC_AREGS+(1*4)
-set EXC_A0,		EXC_AREGS+(0*4)
-set EXC_D7,		EXC_DREGS+(7*4)
-set EXC_D6,		EXC_DREGS+(6*4)
-set EXC_D5,		EXC_DREGS+(5*4)
-set EXC_D4,		EXC_DREGS+(4*4)
-set EXC_D3,		EXC_DREGS+(3*4)
-set EXC_D2,		EXC_DREGS+(2*4)
-set EXC_D1,		EXC_DREGS+(1*4)
-set EXC_D0,		EXC_DREGS+(0*4)
-
-set EXC_FP0,		EXC_FPREGS+(0*12)	# offset of saved fp0
-set EXC_FP1,		EXC_FPREGS+(1*12)	# offset of saved fp1
-set EXC_FP2,		EXC_FPREGS+(2*12)	# offset of saved fp2 (not used)
-
-set FP_SCR1,		LV+80			# fp scratch 1
-set FP_SCR1_EX,		FP_SCR1+0
-set FP_SCR1_SGN,	FP_SCR1+2
-set FP_SCR1_HI,		FP_SCR1+4
-set FP_SCR1_LO,		FP_SCR1+8
-
-set FP_SCR0,		LV+68			# fp scratch 0
-set FP_SCR0_EX,		FP_SCR0+0
-set FP_SCR0_SGN,	FP_SCR0+2
-set FP_SCR0_HI,		FP_SCR0+4
-set FP_SCR0_LO,		FP_SCR0+8
-
-set FP_DST,		LV+56			# fp destination operand
-set FP_DST_EX,		FP_DST+0
-set FP_DST_SGN,		FP_DST+2
-set FP_DST_HI,		FP_DST+4
-set FP_DST_LO,		FP_DST+8
-
-set FP_SRC,		LV+44			# fp source operand
-set FP_SRC_EX,		FP_SRC+0
-set FP_SRC_SGN,		FP_SRC+2
-set FP_SRC_HI,		FP_SRC+4
-set FP_SRC_LO,		FP_SRC+8
-
-set USER_FPIAR,		LV+40			# FP instr address register
-
-set USER_FPSR,		LV+36			# FP status register
-set FPSR_CC,		USER_FPSR+0		# FPSR condition codes
-set FPSR_QBYTE,		USER_FPSR+1		# FPSR qoutient byte
-set FPSR_EXCEPT,	USER_FPSR+2		# FPSR exception status byte
-set FPSR_AEXCEPT,	USER_FPSR+3		# FPSR accrued exception byte
-
-set USER_FPCR,		LV+32			# FP control register
-set FPCR_ENABLE,	USER_FPCR+2		# FPCR exception enable
-set FPCR_MODE,		USER_FPCR+3		# FPCR rounding mode control
-
-set L_SCR3,		LV+28			# integer scratch 3
-set L_SCR2,		LV+24			# integer scratch 2
-set L_SCR1,		LV+20			# integer scratch 1
-
-set STORE_FLG,		LV+19			# flag: operand store (ie. not fcmp/ftst)
-
-set EXC_TEMP2,		LV+24			# temporary space
-set EXC_TEMP,		LV+16			# temporary space
-
-set DTAG,		LV+15			# destination operand type
-set STAG,		LV+14			# source operand type
-
-set SPCOND_FLG,		LV+10			# flag: special case (see below)
-
-set EXC_CC,		LV+8			# saved condition codes
-set EXC_EXTWPTR,	LV+4			# saved current PC (active)
-set EXC_EXTWORD,	LV+2			# saved extension word
-set EXC_CMDREG,		LV+2			# saved extension word
-set EXC_OPWORD,		LV+0			# saved operation word
-
-################################
-
-# Helpful macros
-
-set FTEMP,		0			# offsets within an
-set FTEMP_EX,		0			# extended precision
-set FTEMP_SGN,		2			# value saved in memory.
-set FTEMP_HI,		4
-set FTEMP_LO,		8
-set FTEMP_GRS,		12
-
-set LOCAL,		0			# offsets within an
-set LOCAL_EX,		0			# extended precision
-set LOCAL_SGN,		2			# value saved in memory.
-set LOCAL_HI,		4
-set LOCAL_LO,		8
-set LOCAL_GRS,		12
-
-set DST,		0			# offsets within an
-set DST_EX,		0			# extended precision
-set DST_HI,		4			# value saved in memory.
-set DST_LO,		8
-
-set SRC,		0			# offsets within an
-set SRC_EX,		0			# extended precision
-set SRC_HI,		4			# value saved in memory.
-set SRC_LO,		8
-
-set SGL_LO,		0x3f81			# min sgl prec exponent
-set SGL_HI,		0x407e			# max sgl prec exponent
-set DBL_LO,		0x3c01			# min dbl prec exponent
-set DBL_HI,		0x43fe			# max dbl prec exponent
-set EXT_LO,		0x0			# min ext prec exponent
-set EXT_HI,		0x7ffe			# max ext prec exponent
-
-set EXT_BIAS,		0x3fff			# extended precision bias
-set SGL_BIAS,		0x007f			# single precision bias
-set DBL_BIAS,		0x03ff			# double precision bias
-
-set NORM,		0x00			# operand type for STAG/DTAG
-set ZERO,		0x01			# operand type for STAG/DTAG
-set INF,		0x02			# operand type for STAG/DTAG
-set QNAN,		0x03			# operand type for STAG/DTAG
-set DENORM,		0x04			# operand type for STAG/DTAG
-set SNAN,		0x05			# operand type for STAG/DTAG
-set UNNORM,		0x06			# operand type for STAG/DTAG
-
-##################
-# FPSR/FPCR bits #
-##################
-set neg_bit,		0x3			# negative result
-set z_bit,		0x2			# zero result
-set inf_bit,		0x1			# infinite result
-set nan_bit,		0x0			# NAN result
-
-set q_sn_bit,		0x7			# sign bit of quotient byte
-
-set bsun_bit,		7			# branch on unordered
-set snan_bit,		6			# signalling NAN
-set operr_bit,		5			# operand error
-set ovfl_bit,		4			# overflow
-set unfl_bit,		3			# underflow
-set dz_bit,		2			# divide by zero
-set inex2_bit,		1			# inexact result 2
-set inex1_bit,		0			# inexact result 1
-
-set aiop_bit,		7			# accrued inexact operation bit
-set aovfl_bit,		6			# accrued overflow bit
-set aunfl_bit,		5			# accrued underflow bit
-set adz_bit,		4			# accrued dz bit
-set ainex_bit,		3			# accrued inexact bit
-
-#############################
-# FPSR individual bit masks #
-#############################
-set neg_mask,		0x08000000		# negative bit mask (lw)
-set inf_mask,		0x02000000		# infinity bit mask (lw)
-set z_mask,		0x04000000		# zero bit mask (lw)
-set nan_mask,		0x01000000		# nan bit mask (lw)
-
-set neg_bmask,		0x08			# negative bit mask (byte)
-set inf_bmask,		0x02			# infinity bit mask (byte)
-set z_bmask,		0x04			# zero bit mask (byte)
-set nan_bmask,		0x01			# nan bit mask (byte)
-
-set bsun_mask,		0x00008000		# bsun exception mask
-set snan_mask,		0x00004000		# snan exception mask
-set operr_mask,		0x00002000		# operr exception mask
-set ovfl_mask,		0x00001000		# overflow exception mask
-set unfl_mask,		0x00000800		# underflow exception mask
-set dz_mask,		0x00000400		# dz exception mask
-set inex2_mask,		0x00000200		# inex2 exception mask
-set inex1_mask,		0x00000100		# inex1 exception mask
-
-set aiop_mask,		0x00000080		# accrued illegal operation
-set aovfl_mask,		0x00000040		# accrued overflow
-set aunfl_mask,		0x00000020		# accrued underflow
-set adz_mask,		0x00000010		# accrued divide by zero
-set ainex_mask,		0x00000008		# accrued inexact
-
-######################################
-# FPSR combinations used in the FPSP #
-######################################
-set dzinf_mask,		inf_mask+dz_mask+adz_mask
-set opnan_mask,		nan_mask+operr_mask+aiop_mask
-set nzi_mask,		0x01ffffff		#clears N, Z, and I
-set unfinx_mask,	unfl_mask+inex2_mask+aunfl_mask+ainex_mask
-set unf2inx_mask,	unfl_mask+inex2_mask+ainex_mask
-set ovfinx_mask,	ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
-set inx1a_mask,		inex1_mask+ainex_mask
-set inx2a_mask,		inex2_mask+ainex_mask
-set snaniop_mask,	nan_mask+snan_mask+aiop_mask
-set snaniop2_mask,	snan_mask+aiop_mask
-set naniop_mask,	nan_mask+aiop_mask
-set neginf_mask,	neg_mask+inf_mask
-set infaiop_mask,	inf_mask+aiop_mask
-set negz_mask,		neg_mask+z_mask
-set opaop_mask,		operr_mask+aiop_mask
-set unfl_inx_mask,	unfl_mask+aunfl_mask+ainex_mask
-set ovfl_inx_mask,	ovfl_mask+aovfl_mask+ainex_mask
-
-#########
-# misc. #
-#########
-set rnd_stky_bit,	29			# stky bit pos in longword
-
-set sign_bit,		0x7			# sign bit
-set signan_bit,		0x6			# signalling nan bit
-
-set sgl_thresh,		0x3f81			# minimum sgl exponent
-set dbl_thresh,		0x3c01			# minimum dbl exponent
-
-set x_mode,		0x0			# extended precision
-set s_mode,		0x4			# single precision
-set d_mode,		0x8			# double precision
-
-set rn_mode,		0x0			# round-to-nearest
-set rz_mode,		0x1			# round-to-zero
-set rm_mode,		0x2			# round-tp-minus-infinity
-set rp_mode,		0x3			# round-to-plus-infinity
-
-set mantissalen,	64			# length of mantissa in bits
-
-set BYTE,		1			# len(byte) == 1 byte
-set WORD,		2			# len(word) == 2 bytes
-set LONG,		4			# len(longword) == 2 bytes
-
-set BSUN_VEC,		0xc0			# bsun    vector offset
-set INEX_VEC,		0xc4			# inexact vector offset
-set DZ_VEC,		0xc8			# dz      vector offset
-set UNFL_VEC,		0xcc			# unfl    vector offset
-set OPERR_VEC,		0xd0			# operr   vector offset
-set OVFL_VEC,		0xd4			# ovfl    vector offset
-set SNAN_VEC,		0xd8			# snan    vector offset
-
-###########################
-# SPecial CONDition FLaGs #
-###########################
-set ftrapcc_flg,	0x01			# flag bit: ftrapcc exception
-set fbsun_flg,		0x02			# flag bit: bsun exception
-set mia7_flg,		0x04			# flag bit: (a7)+ <ea>
-set mda7_flg,		0x08			# flag bit: -(a7) <ea>
-set fmovm_flg,		0x40			# flag bit: fmovm instruction
-set immed_flg,		0x80			# flag bit: &<data> <ea>
-
-set ftrapcc_bit,	0x0
-set fbsun_bit,		0x1
-set mia7_bit,		0x2
-set mda7_bit,		0x3
-set immed_bit,		0x7
-
-##################################
-# TRANSCENDENTAL "LAST-OP" FLAGS #
-##################################
-set FMUL_OP,		0x0			# fmul instr performed last
-set FDIV_OP,		0x1			# fdiv performed last
-set FADD_OP,		0x2			# fadd performed last
-set FMOV_OP,		0x3			# fmov performed last
-
-#############
-# CONSTANTS #
-#############
-T1:	long		0x40C62D38,0xD3D64634	# 16381 LOG2 LEAD
-T2:	long		0x3D6F90AE,0xB1E75CC7	# 16381 LOG2 TRAIL
-
-PI:	long		0x40000000,0xC90FDAA2,0x2168C235,0x00000000
-PIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
-
-TWOBYPI:
-	long		0x3FE45F30,0x6DC9C883
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.	#
-#									#
-#	This handler should be the first code executed upon taking the	#
-#	FP Overflow exception in an operating system.			#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_long() - read instruction longword			#
-#	fix_skewed_ops() - adjust src operand in fsave frame		#
-#	set_tag_x() - determine optype of src/dst operands		#
-#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
-#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
-#	load_fpn2() - load dst operand from FP regfile			#
-#	fout() - emulate an opclass 3 instruction			#
-#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
-#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
-#	_real_ovfl() - "callout" for Overflow exception enabled code	#
-#	_real_inex() - "callout" for Inexact exception enabled code	#
-#	_real_trace() - "callout" for Trace exception code		#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the FP Ovfl exception stack frame	#
-#	- The fsave frame contains the source operand			#
-#									#
-# OUTPUT **************************************************************	#
-#	Overflow Exception enabled:					#
-#	- The system stack is unchanged					#
-#	- The fsave frame contains the adjusted src op for opclass 0,2	#
-#	Overflow Exception disabled:					#
-#	- The system stack is unchanged					#
-#	- The "exception present" flag in the fsave frame is cleared	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	On the 060, if an FP overflow is present as the result of any	#
-# instruction, the 060 will take an overflow exception whether the	#
-# exception is enabled or disabled in the FPCR. For the disabled case,	#
-# This handler emulates the instruction to determine what the correct	#
-# default result should be for the operation. This default result is	#
-# then stored in either the FP regfile, data regfile, or memory.	#
-# Finally, the handler exits through the "callout" _fpsp_done()		#
-# denoting that no exceptional conditions exist within the machine.	#
-#	If the exception is enabled, then this handler must create the	#
-# exceptional operand and plave it in the fsave state frame, and store	#
-# the default result (only if the instruction is opclass 3). For	#
-# exceptions enabled, this handler must exit through the "callout"	#
-# _real_ovfl() so that the operating system enabled overflow handler	#
-# can handle this case.							#
-#	Two other conditions exist. First, if overflow was disabled	#
-# but the inexact exception was enabled, this handler must exit		#
-# through the "callout" _real_inex() regardless of whether the result	#
-# was inexact.								#
-#	Also, in the case of an opclass three instruction where		#
-# overflow was disabled and the trace exception was enabled, this	#
-# handler must exit through the "callout" _real_trace().		#
-#									#
-#########################################################################
-
-	global		_fpsp_ovfl
-_fpsp_ovfl:
-
-#$#	sub.l		&24,%sp			# make room for src/dst
-
-	link.w		%a6,&-LOCAL_SIZE	# init stack frame
-
-	fsave		FP_SRC(%a6)		# grab the "busy" frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
-
-# the FPIAR holds the "current PC" of the faulting instruction
-	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)
-
-##############################################################################
-
-	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
-	bne.w		fovfl_out
-
-
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		fix_skewed_ops		# fix src op
-
-# since, I believe, only NORMs and DENORMs can come through here,
-# maybe we can avoid the subroutine call.
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		set_tag_x		# tag the operand type
-	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
-
-# bit five of the fp extension word separates the monadic and dyadic operations
-# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
-# will never take this exception.
-	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
-	beq.b		fovfl_extract		# monadic
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
-	bsr.l		load_fpn2		# load dst into FP_DST
-
-	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
-	bsr.l		set_tag_x		# tag the operand type
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		fovfl_op2_done		# no
-	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
-fovfl_op2_done:
-	mov.b		%d0,DTAG(%a6)		# save dst optype tag
-
-fovfl_extract:
-
-#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
-#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
-#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
-#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
-#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
-#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
-
-	mov.b		1+EXC_CMDREG(%a6),%d1
-	andi.w		&0x007f,%d1		# extract extension
-
-	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-	lea		FP_SRC(%a6),%a0
-	lea		FP_DST(%a6),%a1
-
-# maybe we can make these entry points ONLY the OVFL entry points of each routine.
-	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
-	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
-
-# the operation has been emulated. the result is in fp0.
-# the EXOP, if an exception occurred, is in fp1.
-# we must save the default result regardless of whether
-# traps are enabled or disabled.
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
-	bsr.l		store_fpreg
-
-# the exceptional possibilities we have left ourselves with are ONLY overflow
-# and inexact. and, the inexact is such that overflow occurred and was disabled
-# but inexact was enabled.
-	btst		&ovfl_bit,FPCR_ENABLE(%a6)
-	bne.b		fovfl_ovfl_on
-
-	btst		&inex2_bit,FPCR_ENABLE(%a6)
-	bne.b		fovfl_inex_on
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-#$#	add.l		&24,%sp
-	bra.l		_fpsp_done
-
-# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
-# in fp1. now, simply jump to _real_ovfl()!
-fovfl_ovfl_on:
-	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
-
-	mov.w		&0xe005,2+FP_SRC(%a6)	# save exc status
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
-
-	unlk		%a6
-
-	bra.l		_real_ovfl
-
-# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
-# we must jump to real_inex().
-fovfl_inex_on:
-
-	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
-
-	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
-	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
-
-	unlk		%a6
-
-	bra.l		_real_inex
-
-########################################################################
-fovfl_out:
-
-
-#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
-#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
-#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
-
-# the src operand is definitely a NORM(!), so tag it as such
-	mov.b		&NORM,STAG(%a6)		# set src optype tag
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
-
-	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
-
-	bsr.l		fout
-
-	btst		&ovfl_bit,FPCR_ENABLE(%a6)
-	bne.w		fovfl_ovfl_on
-
-	btst		&inex2_bit,FPCR_ENABLE(%a6)
-	bne.w		fovfl_inex_on
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-#$#	add.l		&24,%sp
-
-	btst		&0x7,(%sp)		# is trace on?
-	beq.l		_fpsp_done		# no
-
-	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
-	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
-	bra.l		_real_trace
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_unfl(): 060FPSP entry point for FP Underflow exception.	#
-#									#
-#	This handler should be the first code executed upon taking the	#
-#	FP Underflow exception in an operating system.			#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_long() - read instruction longword			#
-#	fix_skewed_ops() - adjust src operand in fsave frame		#
-#	set_tag_x() - determine optype of src/dst operands		#
-#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
-#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
-#	load_fpn2() - load dst operand from FP regfile			#
-#	fout() - emulate an opclass 3 instruction			#
-#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
-#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
-#	_real_ovfl() - "callout" for Overflow exception enabled code	#
-#	_real_inex() - "callout" for Inexact exception enabled code	#
-#	_real_trace() - "callout" for Trace exception code		#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the FP Unfl exception stack frame	#
-#	- The fsave frame contains the source operand			#
-#									#
-# OUTPUT **************************************************************	#
-#	Underflow Exception enabled:					#
-#	- The system stack is unchanged					#
-#	- The fsave frame contains the adjusted src op for opclass 0,2	#
-#	Underflow Exception disabled:					#
-#	- The system stack is unchanged					#
-#	- The "exception present" flag in the fsave frame is cleared	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	On the 060, if an FP underflow is present as the result of any	#
-# instruction, the 060 will take an underflow exception whether the	#
-# exception is enabled or disabled in the FPCR. For the disabled case,	#
-# This handler emulates the instruction to determine what the correct	#
-# default result should be for the operation. This default result is	#
-# then stored in either the FP regfile, data regfile, or memory.	#
-# Finally, the handler exits through the "callout" _fpsp_done()		#
-# denoting that no exceptional conditions exist within the machine.	#
-#	If the exception is enabled, then this handler must create the	#
-# exceptional operand and plave it in the fsave state frame, and store	#
-# the default result (only if the instruction is opclass 3). For	#
-# exceptions enabled, this handler must exit through the "callout"	#
-# _real_unfl() so that the operating system enabled overflow handler	#
-# can handle this case.							#
-#	Two other conditions exist. First, if underflow was disabled	#
-# but the inexact exception was enabled and the result was inexact,	#
-# this handler must exit through the "callout" _real_inex().		#
-# was inexact.								#
-#	Also, in the case of an opclass three instruction where		#
-# underflow was disabled and the trace exception was enabled, this	#
-# handler must exit through the "callout" _real_trace().		#
-#									#
-#########################################################################
-
-	global		_fpsp_unfl
-_fpsp_unfl:
-
-#$#	sub.l		&24,%sp			# make room for src/dst
-
-	link.w		%a6,&-LOCAL_SIZE	# init stack frame
-
-	fsave		FP_SRC(%a6)		# grab the "busy" frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
-
-# the FPIAR holds the "current PC" of the faulting instruction
-	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)
-
-##############################################################################
-
-	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
-	bne.w		funfl_out
-
-
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		fix_skewed_ops		# fix src op
-
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		set_tag_x		# tag the operand type
-	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
-
-# bit five of the fp ext word separates the monadic and dyadic operations
-# that can pass through fpsp_unfl(). remember that fcmp, and ftst
-# will never take this exception.
-	btst		&0x5,1+EXC_CMDREG(%a6)	# is op monadic or dyadic?
-	beq.b		funfl_extract		# monadic
-
-# now, what's left that's not dyadic is fsincos. we can distinguish it
-# from all dyadics by the '0110xxx pattern
-	btst		&0x4,1+EXC_CMDREG(%a6)	# is op an fsincos?
-	bne.b		funfl_extract		# yes
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
-	bsr.l		load_fpn2		# load dst into FP_DST
-
-	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
-	bsr.l		set_tag_x		# tag the operand type
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		funfl_op2_done		# no
-	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
-funfl_op2_done:
-	mov.b		%d0,DTAG(%a6)		# save dst optype tag
-
-funfl_extract:
-
-#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
-#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
-#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
-#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
-#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
-#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
-
-	mov.b		1+EXC_CMDREG(%a6),%d1
-	andi.w		&0x007f,%d1		# extract extension
-
-	andi.l		&0x00ff01ff,USER_FPSR(%a6)
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-	lea		FP_SRC(%a6),%a0
-	lea		FP_DST(%a6),%a1
-
-# maybe we can make these entry points ONLY the OVFL entry points of each routine.
-	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
-	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
-	bsr.l		store_fpreg
-
-# The `060 FPU multiplier hardware is such that if the result of a
-# multiply operation is the smallest possible normalized number
-# (0x00000000_80000000_00000000), then the machine will take an
-# underflow exception. Since this is incorrect, we need to check
-# if our emulation, after re-doing the operation, decided that
-# no underflow was called for. We do these checks only in
-# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
-# special case will simply exit gracefully with the correct result.
-
-# the exceptional possibilities we have left ourselves with are ONLY overflow
-# and inexact. and, the inexact is such that overflow occurred and was disabled
-# but inexact was enabled.
-	btst		&unfl_bit,FPCR_ENABLE(%a6)
-	bne.b		funfl_unfl_on
-
-funfl_chkinex:
-	btst		&inex2_bit,FPCR_ENABLE(%a6)
-	bne.b		funfl_inex_on
-
-funfl_exit:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-#$#	add.l		&24,%sp
-	bra.l		_fpsp_done
-
-# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
-# in fp1 (don't forget to save fp0). what to do now?
-# well, we simply have to get to go to _real_unfl()!
-funfl_unfl_on:
-
-# The `060 FPU multiplier hardware is such that if the result of a
-# multiply operation is the smallest possible normalized number
-# (0x00000000_80000000_00000000), then the machine will take an
-# underflow exception. Since this is incorrect, we check here to see
-# if our emulation, after re-doing the operation, decided that
-# no underflow was called for.
-	btst		&unfl_bit,FPSR_EXCEPT(%a6)
-	beq.w		funfl_chkinex
-
-funfl_unfl_on2:
-	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
-
-	mov.w		&0xe003,2+FP_SRC(%a6)	# save exc status
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
-
-	unlk		%a6
-
-	bra.l		_real_unfl
-
-# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
-# we must jump to real_inex().
-funfl_inex_on:
-
-# The `060 FPU multiplier hardware is such that if the result of a
-# multiply operation is the smallest possible normalized number
-# (0x00000000_80000000_00000000), then the machine will take an
-# underflow exception.
-# But, whether bogus or not, if inexact is enabled AND it occurred,
-# then we have to branch to real_inex.
-
-	btst		&inex2_bit,FPSR_EXCEPT(%a6)
-	beq.w		funfl_exit
-
-funfl_inex_on2:
-
-	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to stack
-
-	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
-	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
-
-	unlk		%a6
-
-	bra.l		_real_inex
-
-#######################################################################
-funfl_out:
-
-
-#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
-#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
-#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
-
-# the src operand is definitely a NORM(!), so tag it as such
-	mov.b		&NORM,STAG(%a6)		# set src optype tag
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
-
-	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
-
-	bsr.l		fout
-
-	btst		&unfl_bit,FPCR_ENABLE(%a6)
-	bne.w		funfl_unfl_on2
-
-	btst		&inex2_bit,FPCR_ENABLE(%a6)
-	bne.w		funfl_inex_on2
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-#$#	add.l		&24,%sp
-
-	btst		&0x7,(%sp)		# is trace on?
-	beq.l		_fpsp_done		# no
-
-	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
-	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
-	bra.l		_real_trace
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented	#
-#		        Data Type" exception.				#
-#									#
-#	This handler should be the first code executed upon taking the	#
-#	FP Unimplemented Data Type exception in an operating system.	#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_{word,long}() - read instruction word/longword	#
-#	fix_skewed_ops() - adjust src operand in fsave frame		#
-#	set_tag_x() - determine optype of src/dst operands		#
-#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
-#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
-#	load_fpn2() - load dst operand from FP regfile			#
-#	load_fpn1() - load src operand from FP regfile			#
-#	fout() - emulate an opclass 3 instruction			#
-#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
-#	_real_inex() - "callout" to operating system inexact handler	#
-#	_fpsp_done() - "callout" for exit; work all done		#
-#	_real_trace() - "callout" for Trace enabled exception		#
-#	funimp_skew() - adjust fsave src ops to "incorrect" value	#
-#	_real_snan() - "callout" for SNAN exception			#
-#	_real_operr() - "callout" for OPERR exception			#
-#	_real_ovfl() - "callout" for OVFL exception			#
-#	_real_unfl() - "callout" for UNFL exception			#
-#	get_packed() - fetch packed operand from memory			#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the "Unimp Data Type" stk frame	#
-#	- The fsave frame contains the ssrc op (for UNNORM/DENORM)	#
-#									#
-# OUTPUT **************************************************************	#
-#	If Inexact exception (opclass 3):				#
-#	- The system stack is changed to an Inexact exception stk frame	#
-#	If SNAN exception (opclass 3):					#
-#	- The system stack is changed to an SNAN exception stk frame	#
-#	If OPERR exception (opclass 3):					#
-#	- The system stack is changed to an OPERR exception stk frame	#
-#	If OVFL exception (opclass 3):					#
-#	- The system stack is changed to an OVFL exception stk frame	#
-#	If UNFL exception (opclass 3):					#
-#	- The system stack is changed to an UNFL exception stack frame	#
-#	If Trace exception enabled:					#
-#	- The system stack is changed to a Trace exception stack frame	#
-#	Else: (normal case)						#
-#	- Correct result has been stored as appropriate			#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Two main instruction types can enter here: (1) DENORM or UNNORM	#
-# unimplemented data types. These can be either opclass 0,2 or 3	#
-# instructions, and (2) PACKED unimplemented data format instructions	#
-# also of opclasses 0,2, or 3.						#
-#	For UNNORM/DENORM opclass 0 and 2, the handler fetches the src	#
-# operand from the fsave state frame and the dst operand (if dyadic)	#
-# from the FP register file. The instruction is then emulated by	#
-# choosing an emulation routine from a table of routines indexed by	#
-# instruction type. Once the instruction has been emulated and result	#
-# saved, then we check to see if any enabled exceptions resulted from	#
-# instruction emulation. If none, then we exit through the "callout"	#
-# _fpsp_done(). If there is an enabled FP exception, then we insert	#
-# this exception into the FPU in the fsave state frame and then exit	#
-# through _fpsp_done().							#
-#	PACKED opclass 0 and 2 is similar in how the instruction is	#
-# emulated and exceptions handled. The differences occur in how the	#
-# handler loads the packed op (by calling get_packed() routine) and	#
-# by the fact that a Trace exception could be pending for PACKED ops.	#
-# If a Trace exception is pending, then the current exception stack	#
-# frame is changed to a Trace exception stack frame and an exit is	#
-# made through _real_trace().						#
-#	For UNNORM/DENORM opclass 3, the actual move out to memory is	#
-# performed by calling the routine fout(). If no exception should occur	#
-# as the result of emulation, then an exit either occurs through	#
-# _fpsp_done() or through _real_trace() if a Trace exception is pending	#
-# (a Trace stack frame must be created here, too). If an FP exception	#
-# should occur, then we must create an exception stack frame of that	#
-# type and jump to either _real_snan(), _real_operr(), _real_inex(),	#
-# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3	#
-# emulation is performed in a similar manner.				#
-#									#
-#########################################################################
-
-#
-# (1) DENORM and UNNORM (unimplemented) data types:
-#
-#				post-instruction
-#				*****************
-#				*      EA	*
-#	 pre-instruction	*		*
-#	*****************	*****************
-#	* 0x0 *  0x0dc  *	* 0x3 *  0x0dc  *
-#	*****************	*****************
-#	*     Next	*	*     Next	*
-#	*      PC	*	*      PC	*
-#	*****************	*****************
-#	*      SR	*	*      SR	*
-#	*****************	*****************
-#
-# (2) PACKED format (unsupported) opclasses two and three:
-#	*****************
-#	*      EA	*
-#	*		*
-#	*****************
-#	* 0x2 *  0x0dc	*
-#	*****************
-#	*     Next	*
-#	*      PC	*
-#	*****************
-#	*      SR	*
-#	*****************
-#
-	global		_fpsp_unsupp
-_fpsp_unsupp:
-
-	link.w		%a6,&-LOCAL_SIZE	# init stack frame
-
-	fsave		FP_SRC(%a6)		# save fp state
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
-
-	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
-	bne.b		fu_s
-fu_u:
-	mov.l		%usp,%a0		# fetch user stack pointer
-	mov.l		%a0,EXC_A7(%a6)		# save on stack
-	bra.b		fu_cont
-# if the exception is an opclass zero or two unimplemented data type
-# exception, then the a7' calculated here is wrong since it doesn't
-# stack an ea. however, we don't need an a7' for this case anyways.
-fu_s:
-	lea		0x4+EXC_EA(%a6),%a0	# load old a7'
-	mov.l		%a0,EXC_A7(%a6)		# save on stack
-
-fu_cont:
-
-# the FPIAR holds the "current PC" of the faulting instruction
-# the FPIAR should be set correctly for ALL exceptions passing through
-# this point.
-	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
-
-############################
-
-	clr.b		SPCOND_FLG(%a6)		# clear special condition flag
-
-# Separate opclass three (fpn-to-mem) ops since they have a different
-# stack frame and protocol.
-	btst		&0x5,EXC_CMDREG(%a6)	# is it an fmove out?
-	bne.w		fu_out			# yes
-
-# Separate packed opclass two instructions.
-	bfextu		EXC_CMDREG(%a6){&0:&6},%d0
-	cmpi.b		%d0,&0x13
-	beq.w		fu_in_pack
-
-
-# I'm not sure at this point what FPSR bits are valid for this instruction.
-# so, since the emulation routines re-create them anyways, zero exception field
-	andi.l		&0x00ff00ff,USER_FPSR(%a6) # zero exception field
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-# Opclass two w/ memory-to-fpn operation will have an incorrect extended
-# precision format if the src format was single or double and the
-# source data type was an INF, NAN, DENORM, or UNNORM
-	lea		FP_SRC(%a6),%a0		# pass ptr to input
-	bsr.l		fix_skewed_ops
-
-# we don't know whether the src operand or the dst operand (or both) is the
-# UNNORM or DENORM. call the function that tags the operand type. if the
-# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		set_tag_x		# tag the operand type
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		fu_op2			# no
-	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
-
-fu_op2:
-	mov.b		%d0,STAG(%a6)		# save src optype tag
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
-
-# bit five of the fp extension word separates the monadic and dyadic operations
-# at this point
-	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
-	beq.b		fu_extract		# monadic
-	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
-	beq.b		fu_extract		# yes, so it's monadic, too
-
-	bsr.l		load_fpn2		# load dst into FP_DST
-
-	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
-	bsr.l		set_tag_x		# tag the operand type
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		fu_op2_done		# no
-	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
-fu_op2_done:
-	mov.b		%d0,DTAG(%a6)		# save dst optype tag
-
-fu_extract:
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
-
-	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
-
-	lea		FP_SRC(%a6),%a0
-	lea		FP_DST(%a6),%a1
-
-	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
-	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
-
-#
-# Exceptions in order of precedence:
-#	BSUN	: none
-#	SNAN	: all dyadic ops
-#	OPERR	: fsqrt(-NORM)
-#	OVFL	: all except ftst,fcmp
-#	UNFL	: all except ftst,fcmp
-#	DZ	: fdiv
-#	INEX2	: all except ftst,fcmp
-#	INEX1	: none (packed doesn't go through here)
-#
-
-# we determine the highest priority exception(if any) set by the
-# emulation routine that has also been enabled by the user.
-	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions set
-	bne.b		fu_in_ena		# some are enabled
-
-fu_in_cont:
-# fcmp and ftst do not store any result.
-	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
-	andi.b		&0x38,%d0		# extract bits 3-5
-	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
-	beq.b		fu_in_exit		# yes
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
-	bsr.l		store_fpreg		# store the result
-
-fu_in_exit:
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-
-	bra.l		_fpsp_done
-
-fu_in_ena:
-	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
-	bfffo		%d0{&24:&8},%d0		# find highest priority exception
-	bne.b		fu_in_exc		# there is at least one set
-
-#
-# No exceptions occurred that were also enabled. Now:
-#
-#	if (OVFL && ovfl_disabled && inexact_enabled) {
-#	    branch to _real_inex() (even if the result was exact!);
-#	} else {
-#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
-#	    return;
-#	}
-#
-	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
-	beq.b		fu_in_cont		# no
-
-fu_in_ovflchk:
-	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
-	beq.b		fu_in_cont		# no
-	bra.w		fu_in_exc_ovfl		# go insert overflow frame
-
-#
-# An exception occurred and that exception was enabled:
-#
-#	shift enabled exception field into lo byte of d0;
-#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
-#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
-#		/*
-#		 * this is the case where we must call _real_inex() now or else
-#		 * there will be no other way to pass it the exceptional operand
-#		 */
-#		call _real_inex();
-#	} else {
-#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
-#	}
-#
-fu_in_exc:
-	subi.l		&24,%d0			# fix offset to be 0-8
-	cmpi.b		%d0,&0x6		# is exception INEX? (6)
-	bne.b		fu_in_exc_exit		# no
-
-# the enabled exception was inexact
-	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
-	bne.w		fu_in_exc_unfl		# yes
-	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
-	bne.w		fu_in_exc_ovfl		# yes
-
-# here, we insert the correct fsave status value into the fsave frame for the
-# corresponding exception. the operand in the fsave frame should be the original
-# src operand.
-fu_in_exc_exit:
-	mov.l		%d0,-(%sp)		# save d0
-	bsr.l		funimp_skew		# skew sgl or dbl inputs
-	mov.l		(%sp)+,%d0		# restore d0
-
-	mov.w		(tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# restore src op
-
-	unlk		%a6
-
-	bra.l		_fpsp_done
-
-tbl_except:
-	short		0xe000,0xe006,0xe004,0xe005
-	short		0xe003,0xe002,0xe001,0xe001
-
-fu_in_exc_unfl:
-	mov.w		&0x4,%d0
-	bra.b		fu_in_exc_exit
-fu_in_exc_ovfl:
-	mov.w		&0x03,%d0
-	bra.b		fu_in_exc_exit
-
-# If the input operand to this operation was opclass two and a single
-# or double precision denorm, inf, or nan, the operand needs to be
-# "corrected" in order to have the proper equivalent extended precision
-# number.
-	global		fix_skewed_ops
-fix_skewed_ops:
-	bfextu		EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
-	cmpi.b		%d0,&0x11		# is class = 2 & fmt = sgl?
-	beq.b		fso_sgl			# yes
-	cmpi.b		%d0,&0x15		# is class = 2 & fmt = dbl?
-	beq.b		fso_dbl			# yes
-	rts					# no
-
-fso_sgl:
-	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
-	andi.w		&0x7fff,%d0		# strip sign
-	cmpi.w		%d0,&0x3f80		# is |exp| == $3f80?
-	beq.b		fso_sgl_dnrm_zero	# yes
-	cmpi.w		%d0,&0x407f		# no; is |exp| == $407f?
-	beq.b		fso_infnan		# yes
-	rts					# no
-
-fso_sgl_dnrm_zero:
-	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
-	beq.b		fso_zero		# it's a skewed zero
-fso_sgl_dnrm:
-# here, we count on norm not to alter a0...
-	bsr.l		norm			# normalize mantissa
-	neg.w		%d0			# -shft amt
-	addi.w		&0x3f81,%d0		# adjust new exponent
-	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
-	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
-	rts
-
-fso_zero:
-	andi.w		&0x8000,LOCAL_EX(%a0)	# clear bogus exponent
-	rts
-
-fso_infnan:
-	andi.b		&0x7f,LOCAL_HI(%a0)	# clear j-bit
-	ori.w		&0x7fff,LOCAL_EX(%a0)	# make exponent = $7fff
-	rts
-
-fso_dbl:
-	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
-	andi.w		&0x7fff,%d0		# strip sign
-	cmpi.w		%d0,&0x3c00		# is |exp| == $3c00?
-	beq.b		fso_dbl_dnrm_zero	# yes
-	cmpi.w		%d0,&0x43ff		# no; is |exp| == $43ff?
-	beq.b		fso_infnan		# yes
-	rts					# no
-
-fso_dbl_dnrm_zero:
-	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
-	bne.b		fso_dbl_dnrm		# it's a skewed denorm
-	tst.l		LOCAL_LO(%a0)		# is it a zero?
-	beq.b		fso_zero		# yes
-fso_dbl_dnrm:
-# here, we count on norm not to alter a0...
-	bsr.l		norm			# normalize mantissa
-	neg.w		%d0			# -shft amt
-	addi.w		&0x3c01,%d0		# adjust new exponent
-	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
-	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
-	rts
-
-#################################################################
-
-# fmove out took an unimplemented data type exception.
-# the src operand is in FP_SRC. Call _fout() to write out the result and
-# to determine which exceptions, if any, to take.
-fu_out:
-
-# Separate packed move outs from the UNNORM and DENORM move outs.
-	bfextu		EXC_CMDREG(%a6){&3:&3},%d0
-	cmpi.b		%d0,&0x3
-	beq.w		fu_out_pack
-	cmpi.b		%d0,&0x7
-	beq.w		fu_out_pack
-
-
-# I'm not sure at this point what FPSR bits are valid for this instruction.
-# so, since the emulation routines re-create them anyways, zero exception field.
-# fmove out doesn't affect ccodes.
-	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
-# call here. just figure out what it is...
-	mov.w		FP_SRC_EX(%a6),%d0	# get exponent
-	andi.w		&0x7fff,%d0		# strip sign
-	beq.b		fu_out_denorm		# it's a DENORM
-
-	lea		FP_SRC(%a6),%a0
-	bsr.l		unnorm_fix		# yes; fix it
-
-	mov.b		%d0,STAG(%a6)
-
-	bra.b		fu_out_cont
-fu_out_denorm:
-	mov.b		&DENORM,STAG(%a6)
-fu_out_cont:
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
-
-	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
-	bsr.l		fout			# call fmove out routine
-
-# Exceptions in order of precedence:
-#	BSUN	: none
-#	SNAN	: none
-#	OPERR	: fmove.{b,w,l} out of large UNNORM
-#	OVFL	: fmove.{s,d}
-#	UNFL	: fmove.{s,d,x}
-#	DZ	: none
-#	INEX2	: all
-#	INEX1	: none (packed doesn't travel through here)
-
-# determine the highest priority exception(if any) set by the
-# emulation routine that has also been enabled by the user.
-	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
-	bne.w		fu_out_ena		# some are enabled
-
-fu_out_done:
-
-	mov.l		EXC_A6(%a6),(%a6)	# in case a6 changed
-
-# on extended precision opclass three instructions using pre-decrement or
-# post-increment addressing mode, the address register is not updated. is the
-# address register was the stack pointer used from user mode, then let's update
-# it here. if it was used from supervisor mode, then we have to handle this
-# as a special case.
-	btst		&0x5,EXC_SR(%a6)
-	bne.b		fu_out_done_s
-
-	mov.l		EXC_A7(%a6),%a0		# restore a7
-	mov.l		%a0,%usp
-
-fu_out_done_cont:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-
-	btst		&0x7,(%sp)		# is trace on?
-	bne.b		fu_out_trace		# yes
-
-	bra.l		_fpsp_done
-
-# is the ea mode pre-decrement of the stack pointer from supervisor mode?
-# ("fmov.x fpm,-(a7)") if so,
-fu_out_done_s:
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
-	bne.b		fu_out_done_cont
-
-# the extended precision result is still in fp0. but, we need to save it
-# somewhere on the stack until we can copy it to its final resting place.
-# here, we're counting on the top of the stack to be the old place-holders
-# for fp0/fp1 which have already been restored. that way, we can write
-# over those destinations with the shifted stack frame.
-	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.l		(%a6),%a6		# restore frame pointer
-
-	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
-	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
-
-# now, copy the result to the proper place on the stack
-	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
-	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
-	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
-
-	add.l		&LOCAL_SIZE-0x8,%sp
-
-	btst		&0x7,(%sp)
-	bne.b		fu_out_trace
-
-	bra.l		_fpsp_done
-
-fu_out_ena:
-	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
-	bfffo		%d0{&24:&8},%d0		# find highest priority exception
-	bne.b		fu_out_exc		# there is at least one set
-
-# no exceptions were set.
-# if a disabled overflow occurred and inexact was enabled but the result
-# was exact, then a branch to _real_inex() is made.
-	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
-	beq.w		fu_out_done		# no
-
-fu_out_ovflchk:
-	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
-	beq.w		fu_out_done		# no
-	bra.w		fu_inex			# yes
-
-#
-# The fp move out that took the "Unimplemented Data Type" exception was
-# being traced. Since the stack frames are similar, get the "current" PC
-# from FPIAR and put it in the trace stack frame then jump to _real_trace().
-#
-#		  UNSUPP FRAME		   TRACE FRAME
-#		*****************	*****************
-#		*      EA	*	*    Current	*
-#		*		*	*      PC	*
-#		*****************	*****************
-#		* 0x3 *  0x0dc	*	* 0x2 *  0x024	*
-#		*****************	*****************
-#		*     Next	*	*     Next	*
-#		*      PC	*	*      PC	*
-#		*****************	*****************
-#		*      SR	*	*      SR	*
-#		*****************	*****************
-#
-fu_out_trace:
-	mov.w		&0x2024,0x6(%sp)
-	fmov.l		%fpiar,0x8(%sp)
-	bra.l		_real_trace
-
-# an exception occurred and that exception was enabled.
-fu_out_exc:
-	subi.l		&24,%d0			# fix offset to be 0-8
-
-# we don't mess with the existing fsave frame. just re-insert it and
-# jump to the "_real_{}()" handler...
-	mov.w		(tbl_fu_out.b,%pc,%d0.w*2),%d0
-	jmp		(tbl_fu_out.b,%pc,%d0.w*1)
-
-	swbeg		&0x8
-tbl_fu_out:
-	short		tbl_fu_out	- tbl_fu_out	# BSUN can't happen
-	short		tbl_fu_out	- tbl_fu_out	# SNAN can't happen
-	short		fu_operr	- tbl_fu_out	# OPERR
-	short		fu_ovfl		- tbl_fu_out	# OVFL
-	short		fu_unfl		- tbl_fu_out	# UNFL
-	short		tbl_fu_out	- tbl_fu_out	# DZ can't happen
-	short		fu_inex		- tbl_fu_out	# INEX2
-	short		tbl_fu_out	- tbl_fu_out	# INEX1 won't make it here
-
-# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
-# frestore it.
-fu_snan:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd8
-	mov.w		&0xe006,2+FP_SRC(%a6)
-
-	frestore	FP_SRC(%a6)
-
-	unlk		%a6
-
-
-	bra.l		_real_snan
-
-fu_operr:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
-	mov.w		&0xe004,2+FP_SRC(%a6)
-
-	frestore	FP_SRC(%a6)
-
-	unlk		%a6
-
-
-	bra.l		_real_operr
-
-fu_ovfl:
-	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30d4,EXC_VOFF(%a6)	# vector offset = 0xd4
-	mov.w		&0xe005,2+FP_SRC(%a6)
-
-	frestore	FP_SRC(%a6)		# restore EXOP
-
-	unlk		%a6
-
-	bra.l		_real_ovfl
-
-# underflow can happen for extended precision. extended precision opclass
-# three instruction exceptions don't update the stack pointer. so, if the
-# exception occurred from user mode, then simply update a7 and exit normally.
-# if the exception occurred from supervisor mode, check if
-fu_unfl:
-	mov.l		EXC_A6(%a6),(%a6)	# restore a6
-
-	btst		&0x5,EXC_SR(%a6)
-	bne.w		fu_unfl_s
-
-	mov.l		EXC_A7(%a6),%a0		# restore a7 whether we need
-	mov.l		%a0,%usp		# to or not...
-
-fu_unfl_cont:
-	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
-	mov.w		&0xe003,2+FP_SRC(%a6)
-
-	frestore	FP_SRC(%a6)		# restore EXOP
-
-	unlk		%a6
-
-	bra.l		_real_unfl
-
-fu_unfl_s:
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
-	bne.b		fu_unfl_cont
-
-# the extended precision result is still in fp0. but, we need to save it
-# somewhere on the stack until we can copy it to its final resting place
-# (where the exc frame is currently). make sure it's not at the top of the
-# frame or it will get overwritten when the exc stack frame is shifted "down".
-	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
-	fmovm.x		&0x40,FP_DST(%a6)	# put EXOP on stack
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
-	mov.w		&0xe003,2+FP_DST(%a6)
-
-	frestore	FP_DST(%a6)		# restore EXOP
-
-	mov.l		(%a6),%a6		# restore frame pointer
-
-	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
-	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
-	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
-
-# now, copy the result to the proper place on the stack
-	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
-	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
-	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
-
-	add.l		&LOCAL_SIZE-0x8,%sp
-
-	bra.l		_real_unfl
-
-# fmove in and out enter here.
-fu_inex:
-	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
-	mov.w		&0xe001,2+FP_SRC(%a6)
-
-	frestore	FP_SRC(%a6)		# restore EXOP
-
-	unlk		%a6
-
-
-	bra.l		_real_inex
-
-#########################################################################
-#########################################################################
-fu_in_pack:
-
-
-# I'm not sure at this point what FPSR bits are valid for this instruction.
-# so, since the emulation routines re-create them anyways, zero exception field
-	andi.l		&0x0ff00ff,USER_FPSR(%a6) # zero exception field
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-	bsr.l		get_packed		# fetch packed src operand
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src
-	bsr.l		set_tag_x		# set src optype tag
-
-	mov.b		%d0,STAG(%a6)		# save src optype tag
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
-
-# bit five of the fp extension word separates the monadic and dyadic operations
-# at this point
-	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
-	beq.b		fu_extract_p		# monadic
-	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
-	beq.b		fu_extract_p		# yes, so it's monadic, too
-
-	bsr.l		load_fpn2		# load dst into FP_DST
-
-	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
-	bsr.l		set_tag_x		# tag the operand type
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		fu_op2_done_p		# no
-	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
-fu_op2_done_p:
-	mov.b		%d0,DTAG(%a6)		# save dst optype tag
-
-fu_extract_p:
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
-
-	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
-
-	lea		FP_SRC(%a6),%a0
-	lea		FP_DST(%a6),%a1
-
-	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
-	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
-
-#
-# Exceptions in order of precedence:
-#	BSUN	: none
-#	SNAN	: all dyadic ops
-#	OPERR	: fsqrt(-NORM)
-#	OVFL	: all except ftst,fcmp
-#	UNFL	: all except ftst,fcmp
-#	DZ	: fdiv
-#	INEX2	: all except ftst,fcmp
-#	INEX1	: all
-#
-
-# we determine the highest priority exception(if any) set by the
-# emulation routine that has also been enabled by the user.
-	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
-	bne.w		fu_in_ena_p		# some are enabled
-
-fu_in_cont_p:
-# fcmp and ftst do not store any result.
-	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
-	andi.b		&0x38,%d0		# extract bits 3-5
-	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
-	beq.b		fu_in_exit_p		# yes
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
-	bsr.l		store_fpreg		# store the result
-
-fu_in_exit_p:
-
-	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
-	bne.w		fu_in_exit_s_p		# supervisor
-
-	mov.l		EXC_A7(%a6),%a0		# update user a7
-	mov.l		%a0,%usp
-
-fu_in_exit_cont_p:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6			# unravel stack frame
-
-	btst		&0x7,(%sp)		# is trace on?
-	bne.w		fu_trace_p		# yes
-
-	bra.l		_fpsp_done		# exit to os
-
-# the exception occurred in supervisor mode. check to see if the
-# addressing mode was (a7)+. if so, we'll need to shift the
-# stack frame "up".
-fu_in_exit_s_p:
-	btst		&mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
-	beq.b		fu_in_exit_cont_p	# no
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6			# unravel stack frame
-
-# shift the stack frame "up". we don't really care about the <ea> field.
-	mov.l		0x4(%sp),0x10(%sp)
-	mov.l		0x0(%sp),0xc(%sp)
-	add.l		&0xc,%sp
-
-	btst		&0x7,(%sp)		# is trace on?
-	bne.w		fu_trace_p		# yes
-
-	bra.l		_fpsp_done		# exit to os
-
-fu_in_ena_p:
-	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled & set
-	bfffo		%d0{&24:&8},%d0		# find highest priority exception
-	bne.b		fu_in_exc_p		# at least one was set
-
-#
-# No exceptions occurred that were also enabled. Now:
-#
-#	if (OVFL && ovfl_disabled && inexact_enabled) {
-#	    branch to _real_inex() (even if the result was exact!);
-#	} else {
-#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
-#	    return;
-#	}
-#
-	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
-	beq.w		fu_in_cont_p		# no
-
-fu_in_ovflchk_p:
-	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
-	beq.w		fu_in_cont_p		# no
-	bra.w		fu_in_exc_ovfl_p	# do _real_inex() now
-
-#
-# An exception occurred and that exception was enabled:
-#
-#	shift enabled exception field into lo byte of d0;
-#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
-#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
-#		/*
-#		 * this is the case where we must call _real_inex() now or else
-#		 * there will be no other way to pass it the exceptional operand
-#		 */
-#		call _real_inex();
-#	} else {
-#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
-#	}
-#
-fu_in_exc_p:
-	subi.l		&24,%d0			# fix offset to be 0-8
-	cmpi.b		%d0,&0x6		# is exception INEX? (6 or 7)
-	blt.b		fu_in_exc_exit_p	# no
-
-# the enabled exception was inexact
-	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
-	bne.w		fu_in_exc_unfl_p	# yes
-	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
-	bne.w		fu_in_exc_ovfl_p	# yes
-
-# here, we insert the correct fsave status value into the fsave frame for the
-# corresponding exception. the operand in the fsave frame should be the original
-# src operand.
-# as a reminder for future predicted pain and agony, we are passing in fsave the
-# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
-# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
-fu_in_exc_exit_p:
-	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
-	bne.w		fu_in_exc_exit_s_p	# supervisor
-
-	mov.l		EXC_A7(%a6),%a0		# update user a7
-	mov.l		%a0,%usp
-
-fu_in_exc_exit_cont_p:
-	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# restore src op
-
-	unlk		%a6
-
-	btst		&0x7,(%sp)		# is trace enabled?
-	bne.w		fu_trace_p		# yes
-
-	bra.l		_fpsp_done
-
-tbl_except_p:
-	short		0xe000,0xe006,0xe004,0xe005
-	short		0xe003,0xe002,0xe001,0xe001
-
-fu_in_exc_ovfl_p:
-	mov.w		&0x3,%d0
-	bra.w		fu_in_exc_exit_p
-
-fu_in_exc_unfl_p:
-	mov.w		&0x4,%d0
-	bra.w		fu_in_exc_exit_p
-
-fu_in_exc_exit_s_p:
-	btst		&mia7_bit,SPCOND_FLG(%a6)
-	beq.b		fu_in_exc_exit_cont_p
-
-	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# restore src op
-
-	unlk		%a6			# unravel stack frame
-
-# shift stack frame "up". who cares about <ea> field.
-	mov.l		0x4(%sp),0x10(%sp)
-	mov.l		0x0(%sp),0xc(%sp)
-	add.l		&0xc,%sp
-
-	btst		&0x7,(%sp)		# is trace on?
-	bne.b		fu_trace_p		# yes
-
-	bra.l		_fpsp_done		# exit to os
-
-#
-# The opclass two PACKED instruction that took an "Unimplemented Data Type"
-# exception was being traced. Make the "current" PC the FPIAR and put it in the
-# trace stack frame then jump to _real_trace().
-#
-#		  UNSUPP FRAME		   TRACE FRAME
-#		*****************	*****************
-#		*      EA	*	*    Current	*
-#		*		*	*      PC	*
-#		*****************	*****************
-#		* 0x2 *	0x0dc	*	* 0x2 *  0x024	*
-#		*****************	*****************
-#		*     Next	*	*     Next	*
-#		*      PC	*	*      PC	*
-#		*****************	*****************
-#		*      SR	*	*      SR	*
-#		*****************	*****************
-fu_trace_p:
-	mov.w		&0x2024,0x6(%sp)
-	fmov.l		%fpiar,0x8(%sp)
-
-	bra.l		_real_trace
-
-#########################################################
-#########################################################
-fu_out_pack:
-
-
-# I'm not sure at this point what FPSR bits are valid for this instruction.
-# so, since the emulation routines re-create them anyways, zero exception field.
-# fmove out doesn't affect ccodes.
-	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
-	bsr.l		load_fpn1
-
-# unlike other opclass 3, unimplemented data type exceptions, packed must be
-# able to detect all operand types.
-	lea		FP_SRC(%a6),%a0
-	bsr.l		set_tag_x		# tag the operand type
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		fu_op2_p		# no
-	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
-
-fu_op2_p:
-	mov.b		%d0,STAG(%a6)		# save src optype tag
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
-
-	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
-	bsr.l		fout			# call fmove out routine
-
-# Exceptions in order of precedence:
-#	BSUN	: no
-#	SNAN	: yes
-#	OPERR	: if ((k_factor > +17) || (dec. exp exceeds 3 digits))
-#	OVFL	: no
-#	UNFL	: no
-#	DZ	: no
-#	INEX2	: yes
-#	INEX1	: no
-
-# determine the highest priority exception(if any) set by the
-# emulation routine that has also been enabled by the user.
-	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
-	bne.w		fu_out_ena_p		# some are enabled
-
-fu_out_exit_p:
-	mov.l		EXC_A6(%a6),(%a6)	# restore a6
-
-	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
-	bne.b		fu_out_exit_s_p		# supervisor
-
-	mov.l		EXC_A7(%a6),%a0		# update user a7
-	mov.l		%a0,%usp
-
-fu_out_exit_cont_p:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6			# unravel stack frame
-
-	btst		&0x7,(%sp)		# is trace on?
-	bne.w		fu_trace_p		# yes
-
-	bra.l		_fpsp_done		# exit to os
-
-# the exception occurred in supervisor mode. check to see if the
-# addressing mode was -(a7). if so, we'll need to shift the
-# stack frame "down".
-fu_out_exit_s_p:
-	btst		&mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
-	beq.b		fu_out_exit_cont_p	# no
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.l		(%a6),%a6		# restore frame pointer
-
-	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
-	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
-
-# now, copy the result to the proper place on the stack
-	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
-	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
-	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
-
-	add.l		&LOCAL_SIZE-0x8,%sp
-
-	btst		&0x7,(%sp)
-	bne.w		fu_trace_p
-
-	bra.l		_fpsp_done
-
-fu_out_ena_p:
-	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
-	bfffo		%d0{&24:&8},%d0		# find highest priority exception
-	beq.w		fu_out_exit_p
-
-	mov.l		EXC_A6(%a6),(%a6)	# restore a6
-
-# an exception occurred and that exception was enabled.
-# the only exception possible on packed move out are INEX, OPERR, and SNAN.
-fu_out_exc_p:
-	cmpi.b		%d0,&0x1a
-	bgt.w		fu_inex_p2
-	beq.w		fu_operr_p
-
-fu_snan_p:
-	btst		&0x5,EXC_SR(%a6)
-	bne.b		fu_snan_s_p
-
-	mov.l		EXC_A7(%a6),%a0
-	mov.l		%a0,%usp
-	bra.w		fu_snan
-
-fu_snan_s_p:
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
-	bne.w		fu_snan
-
-# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
-# the strategy is to move the exception frame "down" 12 bytes. then, we
-# can store the default result where the exception frame was.
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd0
-	mov.w		&0xe006,2+FP_SRC(%a6)	# set fsave status
-
-	frestore	FP_SRC(%a6)		# restore src operand
-
-	mov.l		(%a6),%a6		# restore frame pointer
-
-	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
-	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
-	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
-
-# now, we copy the default result to its proper location
-	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
-	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
-	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
-
-	add.l		&LOCAL_SIZE-0x8,%sp
-
-
-	bra.l		_real_snan
-
-fu_operr_p:
-	btst		&0x5,EXC_SR(%a6)
-	bne.w		fu_operr_p_s
-
-	mov.l		EXC_A7(%a6),%a0
-	mov.l		%a0,%usp
-	bra.w		fu_operr
-
-fu_operr_p_s:
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
-	bne.w		fu_operr
-
-# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
-# the strategy is to move the exception frame "down" 12 bytes. then, we
-# can store the default result where the exception frame was.
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
-	mov.w		&0xe004,2+FP_SRC(%a6)	# set fsave status
-
-	frestore	FP_SRC(%a6)		# restore src operand
-
-	mov.l		(%a6),%a6		# restore frame pointer
-
-	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
-	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
-	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
-
-# now, we copy the default result to its proper location
-	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
-	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
-	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
-
-	add.l		&LOCAL_SIZE-0x8,%sp
-
-
-	bra.l		_real_operr
-
-fu_inex_p2:
-	btst		&0x5,EXC_SR(%a6)
-	bne.w		fu_inex_s_p2
-
-	mov.l		EXC_A7(%a6),%a0
-	mov.l		%a0,%usp
-	bra.w		fu_inex
-
-fu_inex_s_p2:
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
-	bne.w		fu_inex
-
-# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
-# the strategy is to move the exception frame "down" 12 bytes. then, we
-# can store the default result where the exception frame was.
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
-	mov.w		&0xe001,2+FP_SRC(%a6)	# set fsave status
-
-	frestore	FP_SRC(%a6)		# restore src operand
-
-	mov.l		(%a6),%a6		# restore frame pointer
-
-	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
-	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
-	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
-
-# now, we copy the default result to its proper location
-	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
-	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
-	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
-
-	add.l		&LOCAL_SIZE-0x8,%sp
-
-
-	bra.l		_real_inex
-
-#########################################################################
-
-#
-# if we're stuffing a source operand back into an fsave frame then we
-# have to make sure that for single or double source operands that the
-# format stuffed is as weird as the hardware usually makes it.
-#
-	global		funimp_skew
-funimp_skew:
-	bfextu		EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
-	cmpi.b		%d0,&0x1		# was src sgl?
-	beq.b		funimp_skew_sgl		# yes
-	cmpi.b		%d0,&0x5		# was src dbl?
-	beq.b		funimp_skew_dbl		# yes
-	rts
-
-funimp_skew_sgl:
-	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
-	andi.w		&0x7fff,%d0		# strip sign
-	beq.b		funimp_skew_sgl_not
-	cmpi.w		%d0,&0x3f80
-	bgt.b		funimp_skew_sgl_not
-	neg.w		%d0			# make exponent negative
-	addi.w		&0x3f81,%d0		# find amt to shift
-	mov.l		FP_SRC_HI(%a6),%d1	# fetch DENORM hi(man)
-	lsr.l		%d0,%d1			# shift it
-	bset		&31,%d1			# set j-bit
-	mov.l		%d1,FP_SRC_HI(%a6)	# insert new hi(man)
-	andi.w		&0x8000,FP_SRC_EX(%a6)	# clear old exponent
-	ori.w		&0x3f80,FP_SRC_EX(%a6)	# insert new "skewed" exponent
-funimp_skew_sgl_not:
-	rts
-
-funimp_skew_dbl:
-	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
-	andi.w		&0x7fff,%d0		# strip sign
-	beq.b		funimp_skew_dbl_not
-	cmpi.w		%d0,&0x3c00
-	bgt.b		funimp_skew_dbl_not
-
-	tst.b		FP_SRC_EX(%a6)		# make "internal format"
-	smi.b		0x2+FP_SRC(%a6)
-	mov.w		%d0,FP_SRC_EX(%a6)	# insert exponent with cleared sign
-	clr.l		%d0			# clear g,r,s
-	lea		FP_SRC(%a6),%a0		# pass ptr to src op
-	mov.w		&0x3c01,%d1		# pass denorm threshold
-	bsr.l		dnrm_lp			# denorm it
-	mov.w		&0x3c00,%d0		# new exponent
-	tst.b		0x2+FP_SRC(%a6)		# is sign set?
-	beq.b		fss_dbl_denorm_done	# no
-	bset		&15,%d0			# set sign
-fss_dbl_denorm_done:
-	bset		&0x7,FP_SRC_HI(%a6)	# set j-bit
-	mov.w		%d0,FP_SRC_EX(%a6)	# insert new exponent
-funimp_skew_dbl_not:
-	rts
-
-#########################################################################
-	global		_mem_write2
-_mem_write2:
-	btst		&0x5,EXC_SR(%a6)
-	beq.l		_dmem_write
-	mov.l		0x0(%a0),FP_DST_EX(%a6)
-	mov.l		0x4(%a0),FP_DST_HI(%a6)
-	mov.l		0x8(%a0),FP_DST_LO(%a6)
-	clr.l		%d1
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_effadd(): 060FPSP entry point for FP "Unimplemented	#
-#			effective address" exception.			#
-#									#
-#	This handler should be the first code executed upon taking the	#
-#	FP Unimplemented Effective Address exception in an operating	#
-#	system.								#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_long() - read instruction longword			#
-#	fix_skewed_ops() - adjust src operand in fsave frame		#
-#	set_tag_x() - determine optype of src/dst operands		#
-#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
-#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
-#	load_fpn2() - load dst operand from FP regfile			#
-#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
-#	decbin() - convert packed data to FP binary data		#
-#	_real_fpu_disabled() - "callout" for "FPU disabled" exception	#
-#	_real_access() - "callout" for access error exception		#
-#	_mem_read() - read extended immediate operand from memory	#
-#	_fpsp_done() - "callout" for exit; work all done		#
-#	_real_trace() - "callout" for Trace enabled exception		#
-#	fmovm_dynamic() - emulate dynamic fmovm instruction		#
-#	fmovm_ctrl() - emulate fmovm control instruction		#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the "Unimplemented <ea>" stk frame	#
-#									#
-# OUTPUT **************************************************************	#
-#	If access error:						#
-#	- The system stack is changed to an access error stack frame	#
-#	If FPU disabled:						#
-#	- The system stack is changed to an FPU disabled stack frame	#
-#	If Trace exception enabled:					#
-#	- The system stack is changed to a Trace exception stack frame	#
-#	Else: (normal case)						#
-#	- None (correct result has been stored as appropriate)		#
-#									#
-# ALGORITHM ***********************************************************	#
-#	This exception handles 3 types of operations:			#
-# (1) FP Instructions using extended precision or packed immediate	#
-#     addressing mode.							#
-# (2) The "fmovm.x" instruction w/ dynamic register specification.	#
-# (3) The "fmovm.l" instruction w/ 2 or 3 control registers.		#
-#									#
-#	For immediate data operations, the data is read in w/ a		#
-# _mem_read() "callout", converted to FP binary (if packed), and used	#
-# as the source operand to the instruction specified by the instruction	#
-# word. If no FP exception should be reported ads a result of the	#
-# emulation, then the result is stored to the destination register and	#
-# the handler exits through _fpsp_done(). If an enabled exc has been	#
-# signalled as a result of emulation, then an fsave state frame		#
-# corresponding to the FP exception type must be entered into the 060	#
-# FPU before exiting. In either the enabled or disabled cases, we	#
-# must also check if a Trace exception is pending, in which case, we	#
-# must create a Trace exception stack frame from the current exception	#
-# stack frame. If no Trace is pending, we simply exit through		#
-# _fpsp_done().								#
-#	For "fmovm.x", call the routine fmovm_dynamic() which will	#
-# decode and emulate the instruction. No FP exceptions can be pending	#
-# as a result of this operation emulation. A Trace exception can be	#
-# pending, though, which means the current stack frame must be changed	#
-# to a Trace stack frame and an exit made through _real_trace().	#
-# For the case of "fmovm.x Dn,-(a7)", where the offending instruction	#
-# was executed from supervisor mode, this handler must store the FP	#
-# register file values to the system stack by itself since		#
-# fmovm_dynamic() can't handle this. A normal exit is made through	#
-# fpsp_done().								#
-#	For "fmovm.l", fmovm_ctrl() is used to emulate the instruction.	#
-# Again, a Trace exception may be pending and an exit made through	#
-# _real_trace(). Else, a normal exit is made through _fpsp_done().	#
-#									#
-#	Before any of the above is attempted, it must be checked to	#
-# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken	#
-# before the "FPU disabled" exception, but the "FPU disabled" exception	#
-# has higher priority, we check the disabled bit in the PCR. If set,	#
-# then we must create an 8 word "FPU disabled" exception stack frame	#
-# from the current 4 word exception stack frame. This includes		#
-# reproducing the effective address of the instruction to put on the	#
-# new stack frame.							#
-#									#
-#	In the process of all emulation work, if a _mem_read()		#
-# "callout" returns a failing result indicating an access error, then	#
-# we must create an access error stack frame from the current stack	#
-# frame. This information includes a faulting address and a fault-	#
-# status-longword. These are created within this handler.		#
-#									#
-#########################################################################
-
-	global		_fpsp_effadd
-_fpsp_effadd:
-
-# This exception type takes priority over the "Line F Emulator"
-# exception. Therefore, the FPU could be disabled when entering here.
-# So, we must check to see if it's disabled and handle that case separately.
-	mov.l		%d0,-(%sp)		# save d0
-	movc		%pcr,%d0		# load proc cr
-	btst		&0x1,%d0		# is FPU disabled?
-	bne.w		iea_disabled		# yes
-	mov.l		(%sp)+,%d0		# restore d0
-
-	link		%a6,&-LOCAL_SIZE	# init stack frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
-
-# PC of instruction that took the exception is the PC in the frame
-	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
-
-#########################################################################
-
-	tst.w		%d0			# is operation fmovem?
-	bmi.w		iea_fmovm		# yes
-
-#
-# here, we will have:
-#	fabs	fdabs	fsabs		facos		fmod
-#	fadd	fdadd	fsadd		fasin		frem
-#	fcmp				fatan		fscale
-#	fdiv	fddiv	fsdiv		fatanh		fsin
-#	fint				fcos		fsincos
-#	fintrz				fcosh		fsinh
-#	fmove	fdmove	fsmove		fetox		ftan
-#	fmul	fdmul	fsmul		fetoxm1		ftanh
-#	fneg	fdneg	fsneg		fgetexp		ftentox
-#	fsgldiv				fgetman		ftwotox
-#	fsglmul				flog10
-#	fsqrt				flog2
-#	fsub	fdsub	fssub		flogn
-#	ftst				flognp1
-# which can all use f<op>.{x,p}
-# so, now it's immediate data extended precision AND PACKED FORMAT!
-#
-iea_op:
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	btst		&0xa,%d0		# is src fmt x or p?
-	bne.b		iea_op_pack		# packed
-
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
-	lea		FP_SRC(%a6),%a1		# pass: ptr to super addr
-	mov.l		&0xc,%d0		# pass: 12 bytes
-	bsr.l		_imem_read		# read extended immediate
-
-	tst.l		%d1			# did ifetch fail?
-	bne.w		iea_iacc		# yes
-
-	bra.b		iea_op_setsrc
-
-iea_op_pack:
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
-	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
-	mov.l		&0xc,%d0		# pass: 12 bytes
-	bsr.l		_imem_read		# read packed operand
-
-	tst.l		%d1			# did ifetch fail?
-	bne.w		iea_iacc		# yes
-
-# The packed operand is an INF or a NAN if the exponent field is all ones.
-	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
-	cmpi.w		%d0,&0x7fff		# INF or NAN?
-	beq.b		iea_op_setsrc		# operand is an INF or NAN
-
-# The packed operand is a zero if the mantissa is all zero, else it's
-# a normal packed op.
-	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
-	andi.b		&0x0f,%d0		# clear all but last nybble
-	bne.b		iea_op_gp_not_spec	# not a zero
-	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
-	bne.b		iea_op_gp_not_spec	# not a zero
-	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
-	beq.b		iea_op_setsrc		# operand is a ZERO
-iea_op_gp_not_spec:
-	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
-	bsr.l		decbin			# convert to extended
-	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
-
-iea_op_setsrc:
-	addi.l		&0xc,EXC_EXTWPTR(%a6)	# update extension word pointer
-
-# FP_SRC now holds the src operand.
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		set_tag_x		# tag the operand type
-	mov.b		%d0,STAG(%a6)		# could be ANYTHING!!!
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		iea_op_getdst		# no
-	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
-	mov.b		%d0,STAG(%a6)		# set new optype tag
-iea_op_getdst:
-	clr.b		STORE_FLG(%a6)		# clear "store result" boolean
-
-	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
-	beq.b		iea_op_extract		# monadic
-	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation fsincos,ftst,fcmp?
-	bne.b		iea_op_spec		# yes
-
-iea_op_loaddst:
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
-	bsr.l		load_fpn2		# load dst operand
-
-	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
-	bsr.l		set_tag_x		# tag the operand type
-	mov.b		%d0,DTAG(%a6)		# could be ANYTHING!!!
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		iea_op_extract		# no
-	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
-	mov.b		%d0,DTAG(%a6)		# set new optype tag
-	bra.b		iea_op_extract
-
-# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
-iea_op_spec:
-	btst		&0x3,1+EXC_CMDREG(%a6)	# is operation fsincos?
-	beq.b		iea_op_extract		# yes
-# now, we're left with ftst and fcmp. so, first let's tag them so that they don't
-# store a result. then, only fcmp will branch back and pick up a dst operand.
-	st		STORE_FLG(%a6)		# don't store a final result
-	btst		&0x1,1+EXC_CMDREG(%a6)	# is operation fcmp?
-	beq.b		iea_op_loaddst		# yes
-
-iea_op_extract:
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass: rnd mode,prec
-
-	mov.b		1+EXC_CMDREG(%a6),%d1
-	andi.w		&0x007f,%d1		# extract extension
-
-	fmov.l		&0x0,%fpcr
-	fmov.l		&0x0,%fpsr
-
-	lea		FP_SRC(%a6),%a0
-	lea		FP_DST(%a6),%a1
-
-	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
-	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
-
-#
-# Exceptions in order of precedence:
-#	BSUN	: none
-#	SNAN	: all operations
-#	OPERR	: all reg-reg or mem-reg operations that can normally operr
-#	OVFL	: same as OPERR
-#	UNFL	: same as OPERR
-#	DZ	: same as OPERR
-#	INEX2	: same as OPERR
-#	INEX1	: all packed immediate operations
-#
-
-# we determine the highest priority exception(if any) set by the
-# emulation routine that has also been enabled by the user.
-	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
-	bne.b		iea_op_ena		# some are enabled
-
-# now, we save the result, unless, of course, the operation was ftst or fcmp.
-# these don't save results.
-iea_op_save:
-	tst.b		STORE_FLG(%a6)		# does this op store a result?
-	bne.b		iea_op_exit1		# exit with no frestore
-
-iea_op_store:
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
-	bsr.l		store_fpreg		# store the result
-
-iea_op_exit1:
-	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
-	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6			# unravel the frame
-
-	btst		&0x7,(%sp)		# is trace on?
-	bne.w		iea_op_trace		# yes
-
-	bra.l		_fpsp_done		# exit to os
-
-iea_op_ena:
-	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enable and set
-	bfffo		%d0{&24:&8},%d0		# find highest priority exception
-	bne.b		iea_op_exc		# at least one was set
-
-# no exception occurred. now, did a disabled, exact overflow occur with inexact
-# enabled? if so, then we have to stuff an overflow frame into the FPU.
-	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
-	beq.b		iea_op_save
-
-iea_op_ovfl:
-	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
-	beq.b		iea_op_store		# no
-	bra.b		iea_op_exc_ovfl		# yes
-
-# an enabled exception occurred. we have to insert the exception type back into
-# the machine.
-iea_op_exc:
-	subi.l		&24,%d0			# fix offset to be 0-8
-	cmpi.b		%d0,&0x6		# is exception INEX?
-	bne.b		iea_op_exc_force	# no
-
-# the enabled exception was inexact. so, if it occurs with an overflow
-# or underflow that was disabled, then we have to force an overflow or
-# underflow frame.
-	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
-	bne.b		iea_op_exc_ovfl		# yes
-	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
-	bne.b		iea_op_exc_unfl		# yes
-
-iea_op_exc_force:
-	mov.w		(tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
-	bra.b		iea_op_exit2		# exit with frestore
-
-tbl_iea_except:
-	short		0xe002, 0xe006, 0xe004, 0xe005
-	short		0xe003, 0xe002, 0xe001, 0xe001
-
-iea_op_exc_ovfl:
-	mov.w		&0xe005,2+FP_SRC(%a6)
-	bra.b		iea_op_exit2
-
-iea_op_exc_unfl:
-	mov.w		&0xe003,2+FP_SRC(%a6)
-
-iea_op_exit2:
-	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
-	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# restore exceptional state
-
-	unlk		%a6			# unravel the frame
-
-	btst		&0x7,(%sp)		# is trace on?
-	bne.b		iea_op_trace		# yes
-
-	bra.l		_fpsp_done		# exit to os
-
-#
-# The opclass two instruction that took an "Unimplemented Effective Address"
-# exception was being traced. Make the "current" PC the FPIAR and put it in
-# the trace stack frame then jump to _real_trace().
-#
-#		 UNIMP EA FRAME		   TRACE FRAME
-#		*****************	*****************
-#		* 0x0 *  0x0f0	*	*    Current	*
-#		*****************	*      PC	*
-#		*    Current	*	*****************
-#		*      PC	*	* 0x2 *  0x024	*
-#		*****************	*****************
-#		*      SR	*	*     Next	*
-#		*****************	*      PC	*
-#					*****************
-#					*      SR	*
-#					*****************
-iea_op_trace:
-	mov.l		(%sp),-(%sp)		# shift stack frame "down"
-	mov.w		0x8(%sp),0x4(%sp)
-	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
-	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
-
-	bra.l		_real_trace
-
-#########################################################################
-iea_fmovm:
-	btst		&14,%d0			# ctrl or data reg
-	beq.w		iea_fmovm_ctrl
-
-iea_fmovm_data:
-
-	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode
-	bne.b		iea_fmovm_data_s
-
-iea_fmovm_data_u:
-	mov.l		%usp,%a0
-	mov.l		%a0,EXC_A7(%a6)		# store current a7
-	bsr.l		fmovm_dynamic		# do dynamic fmovm
-	mov.l		EXC_A7(%a6),%a0		# load possibly new a7
-	mov.l		%a0,%usp		# update usp
-	bra.w		iea_fmovm_exit
-
-iea_fmovm_data_s:
-	clr.b		SPCOND_FLG(%a6)
-	lea		0x2+EXC_VOFF(%a6),%a0
-	mov.l		%a0,EXC_A7(%a6)
-	bsr.l		fmovm_dynamic		# do dynamic fmovm
-
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
-	beq.w		iea_fmovm_data_predec
-	cmpi.b		SPCOND_FLG(%a6),&mia7_flg
-	bne.w		iea_fmovm_exit
-
-# right now, d0 = the size.
-# the data has been fetched from the supervisor stack, but we have not
-# incremented the stack pointer by the appropriate number of bytes.
-# do it here.
-iea_fmovm_data_postinc:
-	btst		&0x7,EXC_SR(%a6)
-	bne.b		iea_fmovm_data_pi_trace
-
-	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
-	mov.l		EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
-	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
-
-	lea		(EXC_SR,%a6,%d0),%a0
-	mov.l		%a0,EXC_SR(%a6)
-
-	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-	mov.l		(%sp)+,%sp
-	bra.l		_fpsp_done
-
-iea_fmovm_data_pi_trace:
-	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
-	mov.l		EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
-	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
-	mov.l		EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
-
-	lea		(EXC_SR-0x4,%a6,%d0),%a0
-	mov.l		%a0,EXC_SR(%a6)
-
-	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-	mov.l		(%sp)+,%sp
-	bra.l		_real_trace
-
-# right now, d1 = size and d0 = the strg.
-iea_fmovm_data_predec:
-	mov.b		%d1,EXC_VOFF(%a6)	# store strg
-	mov.b		%d0,0x1+EXC_VOFF(%a6)	# store size
-
-	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.l		(%a6),-(%sp)		# make a copy of a6
-	mov.l		%d0,-(%sp)		# save d0
-	mov.l		%d1,-(%sp)		# save d1
-	mov.l		EXC_EXTWPTR(%a6),-(%sp)	# make a copy of Next PC
-
-	clr.l		%d0
-	mov.b		0x1+EXC_VOFF(%a6),%d0	# fetch size
-	neg.l		%d0			# get negative of size
-
-	btst		&0x7,EXC_SR(%a6)	# is trace enabled?
-	beq.b		iea_fmovm_data_p2
-
-	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
-	mov.l		EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
-	mov.l		(%sp)+,(EXC_PC-0x4,%a6,%d0)
-	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
-
-	pea		(%a6,%d0)		# create final sp
-	bra.b		iea_fmovm_data_p3
-
-iea_fmovm_data_p2:
-	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
-	mov.l		(%sp)+,(EXC_PC,%a6,%d0)
-	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
-
-	pea		(0x4,%a6,%d0)		# create final sp
-
-iea_fmovm_data_p3:
-	clr.l		%d1
-	mov.b		EXC_VOFF(%a6),%d1	# fetch strg
-
-	tst.b		%d1
-	bpl.b		fm_1
-	fmovm.x		&0x80,(0x4+0x8,%a6,%d0)
-	addi.l		&0xc,%d0
-fm_1:
-	lsl.b		&0x1,%d1
-	bpl.b		fm_2
-	fmovm.x		&0x40,(0x4+0x8,%a6,%d0)
-	addi.l		&0xc,%d0
-fm_2:
-	lsl.b		&0x1,%d1
-	bpl.b		fm_3
-	fmovm.x		&0x20,(0x4+0x8,%a6,%d0)
-	addi.l		&0xc,%d0
-fm_3:
-	lsl.b		&0x1,%d1
-	bpl.b		fm_4
-	fmovm.x		&0x10,(0x4+0x8,%a6,%d0)
-	addi.l		&0xc,%d0
-fm_4:
-	lsl.b		&0x1,%d1
-	bpl.b		fm_5
-	fmovm.x		&0x08,(0x4+0x8,%a6,%d0)
-	addi.l		&0xc,%d0
-fm_5:
-	lsl.b		&0x1,%d1
-	bpl.b		fm_6
-	fmovm.x		&0x04,(0x4+0x8,%a6,%d0)
-	addi.l		&0xc,%d0
-fm_6:
-	lsl.b		&0x1,%d1
-	bpl.b		fm_7
-	fmovm.x		&0x02,(0x4+0x8,%a6,%d0)
-	addi.l		&0xc,%d0
-fm_7:
-	lsl.b		&0x1,%d1
-	bpl.b		fm_end
-	fmovm.x		&0x01,(0x4+0x8,%a6,%d0)
-fm_end:
-	mov.l		0x4(%sp),%d1
-	mov.l		0x8(%sp),%d0
-	mov.l		0xc(%sp),%a6
-	mov.l		(%sp)+,%sp
-
-	btst		&0x7,(%sp)		# is trace enabled?
-	beq.l		_fpsp_done
-	bra.l		_real_trace
-
-#########################################################################
-iea_fmovm_ctrl:
-
-	bsr.l		fmovm_ctrl		# load ctrl regs
-
-iea_fmovm_exit:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	btst		&0x7,EXC_SR(%a6)	# is trace on?
-	bne.b		iea_fmovm_trace		# yes
-
-	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
-
-	unlk		%a6			# unravel the frame
-
-	bra.l		_fpsp_done		# exit to os
-
-#
-# The control reg instruction that took an "Unimplemented Effective Address"
-# exception was being traced. The "Current PC" for the trace frame is the
-# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
-# After fixing the stack frame, jump to _real_trace().
-#
-#		 UNIMP EA FRAME		   TRACE FRAME
-#		*****************	*****************
-#		* 0x0 *  0x0f0	*	*    Current	*
-#		*****************	*      PC	*
-#		*    Current	*	*****************
-#		*      PC	*	* 0x2 *  0x024	*
-#		*****************	*****************
-#		*      SR	*	*     Next	*
-#		*****************	*      PC	*
-#					*****************
-#					*      SR	*
-#					*****************
-# this ain't a pretty solution, but it works:
-# -restore a6 (not with unlk)
-# -shift stack frame down over where old a6 used to be
-# -add LOCAL_SIZE to stack pointer
-iea_fmovm_trace:
-	mov.l		(%a6),%a6		# restore frame pointer
-	mov.w		EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
-	mov.l		EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
-	mov.l		EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
-	mov.w		&0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
-	add.l		&LOCAL_SIZE,%sp		# clear stack frame
-
-	bra.l		_real_trace
-
-#########################################################################
-# The FPU is disabled and so we should really have taken the "Line
-# F Emulator" exception. So, here we create an 8-word stack frame
-# from our 4-word stack frame. This means we must calculate the length
-# the faulting instruction to get the "next PC". This is trivial for
-# immediate operands but requires some extra work for fmovm dynamic
-# which can use most addressing modes.
-iea_disabled:
-	mov.l		(%sp)+,%d0		# restore d0
-
-	link		%a6,&-LOCAL_SIZE	# init stack frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-
-# PC of instruction that took the exception is the PC in the frame
-	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
-
-	tst.w		%d0			# is instr fmovm?
-	bmi.b		iea_dis_fmovm		# yes
-# instruction is using an extended precision immediate operand. Therefore,
-# the total instruction length is 16 bytes.
-iea_dis_immed:
-	mov.l		&0x10,%d0		# 16 bytes of instruction
-	bra.b		iea_dis_cont
-iea_dis_fmovm:
-	btst		&0xe,%d0		# is instr fmovm ctrl
-	bne.b		iea_dis_fmovm_data	# no
-# the instruction is a fmovm.l with 2 or 3 registers.
-	bfextu		%d0{&19:&3},%d1
-	mov.l		&0xc,%d0
-	cmpi.b		%d1,&0x7		# move all regs?
-	bne.b		iea_dis_cont
-	addq.l		&0x4,%d0
-	bra.b		iea_dis_cont
-# the instruction is an fmovm.x dynamic which can use many addressing
-# modes and thus can have several different total instruction lengths.
-# call fmovm_calc_ea which will go through the ea calc process and,
-# as a by-product, will tell us how long the instruction is.
-iea_dis_fmovm_data:
-	clr.l		%d0
-	bsr.l		fmovm_calc_ea
-	mov.l		EXC_EXTWPTR(%a6),%d0
-	sub.l		EXC_PC(%a6),%d0
-iea_dis_cont:
-	mov.w		%d0,EXC_VOFF(%a6)	# store stack shift value
-
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-
-# here, we actually create the 8-word frame from the 4-word frame,
-# with the "next PC" as additional info.
-# the <ea> field is let as undefined.
-	subq.l		&0x8,%sp		# make room for new stack
-	mov.l		%d0,-(%sp)		# save d0
-	mov.w		0xc(%sp),0x4(%sp)	# move SR
-	mov.l		0xe(%sp),0x6(%sp)	# move Current PC
-	clr.l		%d0
-	mov.w		0x12(%sp),%d0
-	mov.l		0x6(%sp),0x10(%sp)	# move Current PC
-	add.l		%d0,0x6(%sp)		# make Next PC
-	mov.w		&0x402c,0xa(%sp)	# insert offset,frame format
-	mov.l		(%sp)+,%d0		# restore d0
-
-	bra.l		_real_fpu_disabled
-
-##########
-
-iea_iacc:
-	movc		%pcr,%d0
-	btst		&0x1,%d0
-	bne.b		iea_iacc_cont
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
-iea_iacc_cont:
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-
-	subq.w		&0x8,%sp		# make stack frame bigger
-	mov.l		0x8(%sp),(%sp)		# store SR,hi(PC)
-	mov.w		0xc(%sp),0x4(%sp)	# store lo(PC)
-	mov.w		&0x4008,0x6(%sp)	# store voff
-	mov.l		0x2(%sp),0x8(%sp)	# store ea
-	mov.l		&0x09428001,0xc(%sp)	# store fslw
-
-iea_acc_done:
-	btst		&0x5,(%sp)		# user or supervisor mode?
-	beq.b		iea_acc_done2		# user
-	bset		&0x2,0xd(%sp)		# set supervisor TM bit
-
-iea_acc_done2:
-	bra.l		_real_access
-
-iea_dacc:
-	lea		-LOCAL_SIZE(%a6),%sp
-
-	movc		%pcr,%d1
-	btst		&0x1,%d1
-	bne.b		iea_dacc_cont
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
-	fmovm.l		LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
-iea_dacc_cont:
-	mov.l		(%a6),%a6
-
-	mov.l		0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
-	mov.w		0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
-	mov.w		&0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
-	mov.l		%a0,-0x8+0xc+LOCAL_SIZE(%sp)
-	mov.w		%d0,-0x8+0x10+LOCAL_SIZE(%sp)
-	mov.w		&0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
-
-	movm.l		LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
-	add.w		&LOCAL_SIZE-0x4,%sp
-
-	bra.b		iea_acc_done
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_operr(): 060FPSP entry point for FP Operr exception.	#
-#									#
-#	This handler should be the first code executed upon taking the	#
-#	FP Operand Error exception in an operating system.		#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_long() - read instruction longword			#
-#	fix_skewed_ops() - adjust src operand in fsave frame		#
-#	_real_operr() - "callout" to operating system operr handler	#
-#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
-#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
-#	facc_out_{b,w,l}() - store to memory took access error (opcl 3)	#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the FP Operr exception frame	#
-#	- The fsave frame contains the source operand			#
-#									#
-# OUTPUT **************************************************************	#
-#	No access error:						#
-#	- The system stack is unchanged					#
-#	- The fsave frame contains the adjusted src op for opclass 0,2	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	In a system where the FP Operr exception is enabled, the goal	#
-# is to get to the handler specified at _real_operr(). But, on the 060,	#
-# for opclass zero and two instruction taking this exception, the	#
-# input operand in the fsave frame may be incorrect for some cases	#
-# and needs to be corrected. This handler calls fix_skewed_ops() to	#
-# do just this and then exits through _real_operr().			#
-#	For opclass 3 instructions, the 060 doesn't store the default	#
-# operr result out to memory or data register file as it should.	#
-# This code must emulate the move out before finally exiting through	#
-# _real_inex(). The move out, if to memory, is performed using		#
-# _mem_write() "callout" routines that may return a failing result.	#
-# In this special case, the handler must exit through facc_out()	#
-# which creates an access error stack frame from the current operr	#
-# stack frame.								#
-#									#
-#########################################################################
-
-	global		_fpsp_operr
-_fpsp_operr:
-
-	link.w		%a6,&-LOCAL_SIZE	# init stack frame
-
-	fsave		FP_SRC(%a6)		# grab the "busy" frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
-
-# the FPIAR holds the "current PC" of the faulting instruction
-	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)
-
-##############################################################################
-
-	btst		&13,%d0			# is instr an fmove out?
-	bne.b		foperr_out		# fmove out
-
-
-# here, we simply see if the operand in the fsave frame needs to be "unskewed".
-# this would be the case for opclass two operations with a source infinity or
-# denorm operand in the sgl or dbl format. NANs also become skewed, but can't
-# cause an operr so we don't need to check for them here.
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		fix_skewed_ops		# fix src op
-
-foperr_exit:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)
-
-	unlk		%a6
-	bra.l		_real_operr
-
-########################################################################
-
-#
-# the hardware does not save the default result to memory on enabled
-# operand error exceptions. we do this here before passing control to
-# the user operand error handler.
-#
-# byte, word, and long destination format operations can pass
-# through here. we simply need to test the sign of the src
-# operand and save the appropriate minimum or maximum integer value
-# to the effective address as pointed to by the stacked effective address.
-#
-# although packed opclass three operations can take operand error
-# exceptions, they won't pass through here since they are caught
-# first by the unsupported data format exception handler. that handler
-# sends them directly to _real_operr() if necessary.
-#
-foperr_out:
-
-	mov.w		FP_SRC_EX(%a6),%d1	# fetch exponent
-	andi.w		&0x7fff,%d1
-	cmpi.w		%d1,&0x7fff
-	bne.b		foperr_out_not_qnan
-# the operand is either an infinity or a QNAN.
-	tst.l		FP_SRC_LO(%a6)
-	bne.b		foperr_out_qnan
-	mov.l		FP_SRC_HI(%a6),%d1
-	andi.l		&0x7fffffff,%d1
-	beq.b		foperr_out_not_qnan
-foperr_out_qnan:
-	mov.l		FP_SRC_HI(%a6),L_SCR1(%a6)
-	bra.b		foperr_out_jmp
-
-foperr_out_not_qnan:
-	mov.l		&0x7fffffff,%d1
-	tst.b		FP_SRC_EX(%a6)
-	bpl.b		foperr_out_not_qnan2
-	addq.l		&0x1,%d1
-foperr_out_not_qnan2:
-	mov.l		%d1,L_SCR1(%a6)
-
-foperr_out_jmp:
-	bfextu		%d0{&19:&3},%d0		# extract dst format field
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
-	mov.w		(tbl_operr.b,%pc,%d0.w*2),%a0
-	jmp		(tbl_operr.b,%pc,%a0)
-
-tbl_operr:
-	short		foperr_out_l - tbl_operr # long word integer
-	short		tbl_operr    - tbl_operr # sgl prec shouldn't happen
-	short		tbl_operr    - tbl_operr # ext prec shouldn't happen
-	short		foperr_exit  - tbl_operr # packed won't enter here
-	short		foperr_out_w - tbl_operr # word integer
-	short		tbl_operr    - tbl_operr # dbl prec shouldn't happen
-	short		foperr_out_b - tbl_operr # byte integer
-	short		tbl_operr    - tbl_operr # packed won't enter here
-
-foperr_out_b:
-	mov.b		L_SCR1(%a6),%d0		# load positive default result
-	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
-	ble.b		foperr_out_b_save_dn	# yes
-	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
-	bsr.l		_dmem_write_byte	# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_b		# yes
-
-	bra.w		foperr_exit
-foperr_out_b_save_dn:
-	andi.w		&0x0007,%d1
-	bsr.l		store_dreg_b		# store result to regfile
-	bra.w		foperr_exit
-
-foperr_out_w:
-	mov.w		L_SCR1(%a6),%d0		# load positive default result
-	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
-	ble.b		foperr_out_w_save_dn	# yes
-	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
-	bsr.l		_dmem_write_word	# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_w		# yes
-
-	bra.w		foperr_exit
-foperr_out_w_save_dn:
-	andi.w		&0x0007,%d1
-	bsr.l		store_dreg_w		# store result to regfile
-	bra.w		foperr_exit
-
-foperr_out_l:
-	mov.l		L_SCR1(%a6),%d0		# load positive default result
-	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
-	ble.b		foperr_out_l_save_dn	# yes
-	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
-	bsr.l		_dmem_write_long	# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_l		# yes
-
-	bra.w		foperr_exit
-foperr_out_l_save_dn:
-	andi.w		&0x0007,%d1
-	bsr.l		store_dreg_l		# store result to regfile
-	bra.w		foperr_exit
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_snan(): 060FPSP entry point for FP SNAN exception.	#
-#									#
-#	This handler should be the first code executed upon taking the	#
-#	FP Signalling NAN exception in an operating system.		#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_long() - read instruction longword			#
-#	fix_skewed_ops() - adjust src operand in fsave frame		#
-#	_real_snan() - "callout" to operating system SNAN handler	#
-#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
-#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
-#	facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)	#
-#	_calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>	#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the FP SNAN exception frame		#
-#	- The fsave frame contains the source operand			#
-#									#
-# OUTPUT **************************************************************	#
-#	No access error:						#
-#	- The system stack is unchanged					#
-#	- The fsave frame contains the adjusted src op for opclass 0,2	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	In a system where the FP SNAN exception is enabled, the goal	#
-# is to get to the handler specified at _real_snan(). But, on the 060,	#
-# for opclass zero and two instructions taking this exception, the	#
-# input operand in the fsave frame may be incorrect for some cases	#
-# and needs to be corrected. This handler calls fix_skewed_ops() to	#
-# do just this and then exits through _real_snan().			#
-#	For opclass 3 instructions, the 060 doesn't store the default	#
-# SNAN result out to memory or data register file as it should.		#
-# This code must emulate the move out before finally exiting through	#
-# _real_snan(). The move out, if to memory, is performed using		#
-# _mem_write() "callout" routines that may return a failing result.	#
-# In this special case, the handler must exit through facc_out()	#
-# which creates an access error stack frame from the current SNAN	#
-# stack frame.								#
-#	For the case of an extended precision opclass 3 instruction,	#
-# if the effective addressing mode was -() or ()+, then the address	#
-# register must get updated by calling _calc_ea_fout(). If the <ea>	#
-# was -(a7) from supervisor mode, then the exception frame currently	#
-# on the system stack must be carefully moved "down" to make room	#
-# for the operand being moved.						#
-#									#
-#########################################################################
-
-	global		_fpsp_snan
-_fpsp_snan:
-
-	link.w		%a6,&-LOCAL_SIZE	# init stack frame
-
-	fsave		FP_SRC(%a6)		# grab the "busy" frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
-
-# the FPIAR holds the "current PC" of the faulting instruction
-	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)
-
-##############################################################################
-
-	btst		&13,%d0			# is instr an fmove out?
-	bne.w		fsnan_out		# fmove out
-
-
-# here, we simply see if the operand in the fsave frame needs to be "unskewed".
-# this would be the case for opclass two operations with a source infinity or
-# denorm operand in the sgl or dbl format. NANs also become skewed and must be
-# fixed here.
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		fix_skewed_ops		# fix src op
-
-fsnan_exit:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)
-
-	unlk		%a6
-	bra.l		_real_snan
-
-########################################################################
-
-#
-# the hardware does not save the default result to memory on enabled
-# snan exceptions. we do this here before passing control to
-# the user snan handler.
-#
-# byte, word, long, and packed destination format operations can pass
-# through here. since packed format operations already were handled by
-# fpsp_unsupp(), then we need to do nothing else for them here.
-# for byte, word, and long, we simply need to test the sign of the src
-# operand and save the appropriate minimum or maximum integer value
-# to the effective address as pointed to by the stacked effective address.
-#
-fsnan_out:
-
-	bfextu		%d0{&19:&3},%d0		# extract dst format field
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
-	mov.w		(tbl_snan.b,%pc,%d0.w*2),%a0
-	jmp		(tbl_snan.b,%pc,%a0)
-
-tbl_snan:
-	short		fsnan_out_l - tbl_snan # long word integer
-	short		fsnan_out_s - tbl_snan # sgl prec shouldn't happen
-	short		fsnan_out_x - tbl_snan # ext prec shouldn't happen
-	short		tbl_snan    - tbl_snan # packed needs no help
-	short		fsnan_out_w - tbl_snan # word integer
-	short		fsnan_out_d - tbl_snan # dbl prec shouldn't happen
-	short		fsnan_out_b - tbl_snan # byte integer
-	short		tbl_snan    - tbl_snan # packed needs no help
-
-fsnan_out_b:
-	mov.b		FP_SRC_HI(%a6),%d0	# load upper byte of SNAN
-	bset		&6,%d0			# set SNAN bit
-	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
-	ble.b		fsnan_out_b_dn		# yes
-	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
-	bsr.l		_dmem_write_byte	# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_b		# yes
-
-	bra.w		fsnan_exit
-fsnan_out_b_dn:
-	andi.w		&0x0007,%d1
-	bsr.l		store_dreg_b		# store result to regfile
-	bra.w		fsnan_exit
-
-fsnan_out_w:
-	mov.w		FP_SRC_HI(%a6),%d0	# load upper word of SNAN
-	bset		&14,%d0			# set SNAN bit
-	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
-	ble.b		fsnan_out_w_dn		# yes
-	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
-	bsr.l		_dmem_write_word	# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_w		# yes
-
-	bra.w		fsnan_exit
-fsnan_out_w_dn:
-	andi.w		&0x0007,%d1
-	bsr.l		store_dreg_w		# store result to regfile
-	bra.w		fsnan_exit
-
-fsnan_out_l:
-	mov.l		FP_SRC_HI(%a6),%d0	# load upper longword of SNAN
-	bset		&30,%d0			# set SNAN bit
-	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
-	ble.b		fsnan_out_l_dn		# yes
-	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
-	bsr.l		_dmem_write_long	# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_l		# yes
-
-	bra.w		fsnan_exit
-fsnan_out_l_dn:
-	andi.w		&0x0007,%d1
-	bsr.l		store_dreg_l		# store result to regfile
-	bra.w		fsnan_exit
-
-fsnan_out_s:
-	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
-	ble.b		fsnan_out_d_dn		# yes
-	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
-	andi.l		&0x80000000,%d0		# keep sign
-	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
-	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
-	lsr.l		&0x8,%d1		# shift mantissa for sgl
-	or.l		%d1,%d0			# create sgl SNAN
-	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
-	bsr.l		_dmem_write_long	# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_l		# yes
-
-	bra.w		fsnan_exit
-fsnan_out_d_dn:
-	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
-	andi.l		&0x80000000,%d0		# keep sign
-	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
-	mov.l		%d1,-(%sp)
-	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
-	lsr.l		&0x8,%d1		# shift mantissa for sgl
-	or.l		%d1,%d0			# create sgl SNAN
-	mov.l		(%sp)+,%d1
-	andi.w		&0x0007,%d1
-	bsr.l		store_dreg_l		# store result to regfile
-	bra.w		fsnan_exit
-
-fsnan_out_d:
-	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
-	andi.l		&0x80000000,%d0		# keep sign
-	ori.l		&0x7ff80000,%d0		# insert new exponent,SNAN bit
-	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
-	mov.l		%d0,FP_SCR0_EX(%a6)	# store to temp space
-	mov.l		&11,%d0			# load shift amt
-	lsr.l		%d0,%d1
-	or.l		%d1,FP_SCR0_EX(%a6)	# create dbl hi
-	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
-	andi.l		&0x000007ff,%d1
-	ror.l		%d0,%d1
-	mov.l		%d1,FP_SCR0_HI(%a6)	# store to temp space
-	mov.l		FP_SRC_LO(%a6),%d1	# load lo mantissa
-	lsr.l		%d0,%d1
-	or.l		%d1,FP_SCR0_HI(%a6)	# create dbl lo
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
-	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
-	movq.l		&0x8,%d0		# pass: size of 8 bytes
-	bsr.l		_dmem_write		# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_d		# yes
-
-	bra.w		fsnan_exit
-
-# for extended precision, if the addressing mode is pre-decrement or
-# post-increment, then the address register did not get updated.
-# in addition, for pre-decrement, the stacked <ea> is incorrect.
-fsnan_out_x:
-	clr.b		SPCOND_FLG(%a6)		# clear special case flag
-
-	mov.w		FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
-	clr.w		2+FP_SCR0(%a6)
-	mov.l		FP_SRC_HI(%a6),%d0
-	bset		&30,%d0
-	mov.l		%d0,FP_SCR0_HI(%a6)
-	mov.l		FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
-
-	btst		&0x5,EXC_SR(%a6)	# supervisor mode exception?
-	bne.b		fsnan_out_x_s		# yes
-
-	mov.l		%usp,%a0		# fetch user stack pointer
-	mov.l		%a0,EXC_A7(%a6)		# save on stack for calc_ea()
-	mov.l		(%a6),EXC_A6(%a6)
-
-	bsr.l		_calc_ea_fout		# find the correct ea,update An
-	mov.l		%a0,%a1
-	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
-
-	mov.l		EXC_A7(%a6),%a0
-	mov.l		%a0,%usp		# restore user stack pointer
-	mov.l		EXC_A6(%a6),(%a6)
-
-fsnan_out_x_save:
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
-	movq.l		&0xc,%d0		# pass: size of extended
-	bsr.l		_dmem_write		# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_x		# yes
-
-	bra.w		fsnan_exit
-
-fsnan_out_x_s:
-	mov.l		(%a6),EXC_A6(%a6)
-
-	bsr.l		_calc_ea_fout		# find the correct ea,update An
-	mov.l		%a0,%a1
-	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
-
-	mov.l		EXC_A6(%a6),(%a6)
-
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
-	bne.b		fsnan_out_x_save	# no
-
-# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)
-
-	mov.l		EXC_A6(%a6),%a6		# restore frame pointer
-
-	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
-	mov.l		LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
-	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
-
-	mov.l		LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
-	mov.l		LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
-	mov.l		LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
-
-	add.l		&LOCAL_SIZE-0x8,%sp
-
-	bra.l		_real_snan
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_inex(): 060FPSP entry point for FP Inexact exception.	#
-#									#
-#	This handler should be the first code executed upon taking the	#
-#	FP Inexact exception in an operating system.			#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_long() - read instruction longword			#
-#	fix_skewed_ops() - adjust src operand in fsave frame		#
-#	set_tag_x() - determine optype of src/dst operands		#
-#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
-#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
-#	load_fpn2() - load dst operand from FP regfile			#
-#	smovcr() - emulate an "fmovcr" instruction			#
-#	fout() - emulate an opclass 3 instruction			#
-#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
-#	_real_inex() - "callout" to operating system inexact handler	#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the FP Inexact exception frame	#
-#	- The fsave frame contains the source operand			#
-#									#
-# OUTPUT **************************************************************	#
-#	- The system stack is unchanged					#
-#	- The fsave frame contains the adjusted src op for opclass 0,2	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	In a system where the FP Inexact exception is enabled, the goal	#
-# is to get to the handler specified at _real_inex(). But, on the 060,	#
-# for opclass zero and two instruction taking this exception, the	#
-# hardware doesn't store the correct result to the destination FP	#
-# register as did the '040 and '881/2. This handler must emulate the	#
-# instruction in order to get this value and then store it to the	#
-# correct register before calling _real_inex().				#
-#	For opclass 3 instructions, the 060 doesn't store the default	#
-# inexact result out to memory or data register file as it should.	#
-# This code must emulate the move out by calling fout() before finally	#
-# exiting through _real_inex().						#
-#									#
-#########################################################################
-
-	global		_fpsp_inex
-_fpsp_inex:
-
-	link.w		%a6,&-LOCAL_SIZE	# init stack frame
-
-	fsave		FP_SRC(%a6)		# grab the "busy" frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
-
-# the FPIAR holds the "current PC" of the faulting instruction
-	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)
-
-##############################################################################
-
-	btst		&13,%d0			# is instr an fmove out?
-	bne.w		finex_out		# fmove out
-
-
-# the hardware, for "fabs" and "fneg" w/ a long source format, puts the
-# longword integer directly into the upper longword of the mantissa along
-# w/ an exponent value of 0x401e. we convert this to extended precision here.
-	bfextu		%d0{&19:&3},%d0		# fetch instr size
-	bne.b		finex_cont		# instr size is not long
-	cmpi.w		FP_SRC_EX(%a6),&0x401e	# is exponent 0x401e?
-	bne.b		finex_cont		# no
-	fmov.l		&0x0,%fpcr
-	fmov.l		FP_SRC_HI(%a6),%fp0	# load integer src
-	fmov.x		%fp0,FP_SRC(%a6)	# store integer as extended precision
-	mov.w		&0xe001,0x2+FP_SRC(%a6)
-
-finex_cont:
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		fix_skewed_ops		# fix src op
-
-# Here, we zero the ccode and exception byte field since we're going to
-# emulate the whole instruction. Notice, though, that we don't kill the
-# INEX1 bit. This is because a packed op has long since been converted
-# to extended before arriving here. Therefore, we need to retain the
-# INEX1 bit from when the operand was first converted.
-	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-	bfextu		EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
-	cmpi.b		%d1,&0x17		# is op an fmovecr?
-	beq.w		finex_fmovcr		# yes
-
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		set_tag_x		# tag the operand type
-	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
-
-# bits four and five of the fp extension word separate the monadic and dyadic
-# operations that can pass through fpsp_inex(). remember that fcmp and ftst
-# will never take this exception, but fsincos will.
-	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
-	beq.b		finex_extract		# monadic
-
-	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation an fsincos?
-	bne.b		finex_extract		# yes
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
-	bsr.l		load_fpn2		# load dst into FP_DST
-
-	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
-	bsr.l		set_tag_x		# tag the operand type
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		finex_op2_done		# no
-	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
-finex_op2_done:
-	mov.b		%d0,DTAG(%a6)		# save dst optype tag
-
-finex_extract:
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
-
-	mov.b		1+EXC_CMDREG(%a6),%d1
-	andi.w		&0x007f,%d1		# extract extension
-
-	lea		FP_SRC(%a6),%a0
-	lea		FP_DST(%a6),%a1
-
-	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
-	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
-
-# the operation has been emulated. the result is in fp0.
-finex_save:
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
-	bsr.l		store_fpreg
-
-finex_exit:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)
-
-	unlk		%a6
-	bra.l		_real_inex
-
-finex_fmovcr:
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
-	mov.b		1+EXC_CMDREG(%a6),%d1
-	andi.l		&0x0000007f,%d1		# pass rom offset
-	bsr.l		smovcr
-	bra.b		finex_save
-
-########################################################################
-
-#
-# the hardware does not save the default result to memory on enabled
-# inexact exceptions. we do this here before passing control to
-# the user inexact handler.
-#
-# byte, word, and long destination format operations can pass
-# through here. so can double and single precision.
-# although packed opclass three operations can take inexact
-# exceptions, they won't pass through here since they are caught
-# first by the unsupported data format exception handler. that handler
-# sends them directly to _real_inex() if necessary.
-#
-finex_out:
-
-	mov.b		&NORM,STAG(%a6)		# src is a NORM
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
-
-	andi.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
-
-	bsr.l		fout			# store the default result
-
-	bra.b		finex_exit
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_dz(): 060FPSP entry point for FP DZ exception.		#
-#									#
-#	This handler should be the first code executed upon taking	#
-#	the FP DZ exception in an operating system.			#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_long() - read instruction longword from memory	#
-#	fix_skewed_ops() - adjust fsave operand				#
-#	_real_dz() - "callout" exit point from FP DZ handler		#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the FP DZ exception stack.		#
-#	- The fsave frame contains the source operand.			#
-#									#
-# OUTPUT **************************************************************	#
-#	- The system stack contains the FP DZ exception stack.		#
-#	- The fsave frame contains the adjusted source operand.		#
-#									#
-# ALGORITHM ***********************************************************	#
-#	In a system where the DZ exception is enabled, the goal is to	#
-# get to the handler specified at _real_dz(). But, on the 060, when the	#
-# exception is taken, the input operand in the fsave state frame may	#
-# be incorrect for some cases and need to be adjusted. So, this package	#
-# adjusts the operand using fix_skewed_ops() and then branches to	#
-# _real_dz().								#
-#									#
-#########################################################################
-
-	global		_fpsp_dz
-_fpsp_dz:
-
-	link.w		%a6,&-LOCAL_SIZE	# init stack frame
-
-	fsave		FP_SRC(%a6)		# grab the "busy" frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
-
-# the FPIAR holds the "current PC" of the faulting instruction
-	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)
-
-##############################################################################
-
-
-# here, we simply see if the operand in the fsave frame needs to be "unskewed".
-# this would be the case for opclass two operations with a source zero
-# in the sgl or dbl format.
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		fix_skewed_ops		# fix src op
-
-fdz_exit:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)
-
-	unlk		%a6
-	bra.l		_real_dz
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_fline(): 060FPSP entry point for "Line F emulator" exc.	#
-#									#
-#	This handler should be the first code executed upon taking the	#
-#	"Line F Emulator" exception in an operating system.		#
-#									#
-# XREF ****************************************************************	#
-#	_fpsp_unimp() - handle "FP Unimplemented" exceptions		#
-#	_real_fpu_disabled() - handle "FPU disabled" exceptions		#
-#	_real_fline() - handle "FLINE" exceptions			#
-#	_imem_read_long() - read instruction longword			#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains a "Line F Emulator" exception	#
-#	  stack frame.							#
-#									#
-# OUTPUT **************************************************************	#
-#	- The system stack is unchanged					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	When a "Line F Emulator" exception occurs, there are 3 possible	#
-# exception types, denoted by the exception stack frame format number:	#
-#	(1) FPU unimplemented instruction (6 word stack frame)		#
-#	(2) FPU disabled (8 word stack frame)				#
-#	(3) Line F (4 word stack frame)					#
-#									#
-#	This module determines which and forks the flow off to the	#
-# appropriate "callout" (for "disabled" and "Line F") or to the		#
-# correct emulation code (for "FPU unimplemented").			#
-#	This code also must check for "fmovecr" instructions w/ a	#
-# non-zero <ea> field. These may get flagged as "Line F" but should	#
-# really be flagged as "FPU Unimplemented". (This is a "feature" on	#
-# the '060.								#
-#									#
-#########################################################################
-
-	global		_fpsp_fline
-_fpsp_fline:
-
-# check to see if this exception is a "FP Unimplemented Instruction"
-# exception. if so, branch directly to that handler's entry point.
-	cmpi.w		0x6(%sp),&0x202c
-	beq.l		_fpsp_unimp
-
-# check to see if the FPU is disabled. if so, jump to the OS entry
-# point for that condition.
-	cmpi.w		0x6(%sp),&0x402c
-	beq.l		_real_fpu_disabled
-
-# the exception was an "F-Line Illegal" exception. we check to see
-# if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if
-# so, convert the F-Line exception stack frame to an FP Unimplemented
-# Instruction exception stack frame else branch to the OS entry
-# point for the F-Line exception handler.
-	link.w		%a6,&-LOCAL_SIZE	# init stack frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-
-	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch instruction words
-
-	bfextu		%d0{&0:&10},%d1		# is it an fmovecr?
-	cmpi.w		%d1,&0x03c8
-	bne.b		fline_fline		# no
-
-	bfextu		%d0{&16:&6},%d1		# is it an fmovecr?
-	cmpi.b		%d1,&0x17
-	bne.b		fline_fline		# no
-
-# it's an fmovecr w/ a non-zero <ea> that has entered through
-# the F-Line Illegal exception.
-# so, we need to convert the F-Line exception stack frame into an
-# FP Unimplemented Instruction stack frame and jump to that entry
-# point.
-#
-# but, if the FPU is disabled, then we need to jump to the FPU disabled
-# entry point.
-	movc		%pcr,%d0
-	btst		&0x1,%d0
-	beq.b		fline_fmovcr
-
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-
-	sub.l		&0x8,%sp		# make room for "Next PC", <ea>
-	mov.w		0x8(%sp),(%sp)
-	mov.l		0xa(%sp),0x2(%sp)	# move "Current PC"
-	mov.w		&0x402c,0x6(%sp)
-	mov.l		0x2(%sp),0xc(%sp)
-	addq.l		&0x4,0x2(%sp)		# set "Next PC"
-
-	bra.l		_real_fpu_disabled
-
-fline_fmovcr:
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-
-	fmov.l		0x2(%sp),%fpiar		# set current PC
-	addq.l		&0x4,0x2(%sp)		# set Next PC
-
-	mov.l		(%sp),-(%sp)
-	mov.l		0x8(%sp),0x4(%sp)
-	mov.b		&0x20,0x6(%sp)
-
-	bra.l		_fpsp_unimp
-
-fline_fline:
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-
-	bra.l		_real_fline
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_unimp(): 060FPSP entry point for FP "Unimplemented	#
-#		       Instruction" exception.				#
-#									#
-#	This handler should be the first code executed upon taking the	#
-#	FP Unimplemented Instruction exception in an operating system.	#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_{word,long}() - read instruction word/longword	#
-#	load_fop() - load src/dst ops from memory and/or FP regfile	#
-#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
-#	tbl_trans - addr of table of emulation routines for trnscndls	#
-#	_real_access() - "callout" for access error exception		#
-#	_fpsp_done() - "callout" for exit; work all done		#
-#	_real_trace() - "callout" for Trace enabled exception		#
-#	smovcr() - emulate "fmovecr" instruction			#
-#	funimp_skew() - adjust fsave src ops to "incorrect" value	#
-#	_ftrapcc() - emulate an "ftrapcc" instruction			#
-#	_fdbcc() - emulate an "fdbcc" instruction			#
-#	_fscc() - emulate an "fscc" instruction				#
-#	_real_trap() - "callout" for Trap exception			#
-#	_real_bsun() - "callout" for enabled Bsun exception		#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the "Unimplemented Instr" stk frame	#
-#									#
-# OUTPUT **************************************************************	#
-#	If access error:						#
-#	- The system stack is changed to an access error stack frame	#
-#	If Trace exception enabled:					#
-#	- The system stack is changed to a Trace exception stack frame	#
-#	Else: (normal case)						#
-#	- Correct result has been stored as appropriate			#
-#									#
-# ALGORITHM ***********************************************************	#
-#	There are two main cases of instructions that may enter here to	#
-# be emulated: (1) the FPgen instructions, most of which were also	#
-# unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc".	#
-#	For the first set, this handler calls the routine load_fop()	#
-# to load the source and destination (for dyadic) operands to be used	#
-# for instruction emulation. The correct emulation routine is then	#
-# chosen by decoding the instruction type and indexing into an		#
-# emulation subroutine index table. After emulation returns, this	#
-# handler checks to see if an exception should occur as a result of the #
-# FP instruction emulation. If so, then an FP exception of the correct	#
-# type is inserted into the FPU state frame using the "frestore"	#
-# instruction before exiting through _fpsp_done(). In either the	#
-# exceptional or non-exceptional cases, we must check to see if the	#
-# Trace exception is enabled. If so, then we must create a Trace	#
-# exception frame from the current exception frame and exit through	#
-# _real_trace().							#
-#	For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines	#
-# _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three	#
-# may flag that a BSUN exception should be taken. If so, then the	#
-# current exception stack frame is converted into a BSUN exception	#
-# stack frame and an exit is made through _real_bsun(). If the		#
-# instruction was "ftrapcc" and a Trap exception should result, a Trap	#
-# exception stack frame is created from the current frame and an exit	#
-# is made through _real_trap(). If a Trace exception is pending, then	#
-# a Trace exception frame is created from the current frame and a jump	#
-# is made to _real_trace(). Finally, if none of these conditions exist,	#
-# then the handler exits though the callout _fpsp_done().		#
-#									#
-#	In any of the above scenarios, if a _mem_read() or _mem_write()	#
-# "callout" returns a failing value, then an access error stack frame	#
-# is created from the current stack frame and an exit is made through	#
-# _real_access().							#
-#									#
-#########################################################################
-
-#
-# FP UNIMPLEMENTED INSTRUCTION STACK FRAME:
-#
-#	*****************
-#	*		* => <ea> of fp unimp instr.
-#	-      EA	-
-#	*		*
-#	*****************
-#	* 0x2 *  0x02c	* => frame format and vector offset(vector #11)
-#	*****************
-#	*		*
-#	-    Next PC	- => PC of instr to execute after exc handling
-#	*		*
-#	*****************
-#	*      SR	* => SR at the time the exception was taken
-#	*****************
-#
-# Note: the !NULL bit does not get set in the fsave frame when the
-# machine encounters an fp unimp exception. Therefore, it must be set
-# before leaving this handler.
-#
-	global		_fpsp_unimp
-_fpsp_unimp:
-
-	link.w		%a6,&-LOCAL_SIZE	# init stack frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1
-
-	btst		&0x5,EXC_SR(%a6)	# user mode exception?
-	bne.b		funimp_s		# no; supervisor mode
-
-# save the value of the user stack pointer onto the stack frame
-funimp_u:
-	mov.l		%usp,%a0		# fetch user stack pointer
-	mov.l		%a0,EXC_A7(%a6)		# store in stack frame
-	bra.b		funimp_cont
-
-# store the value of the supervisor stack pointer BEFORE the exc occurred.
-# old_sp is address just above stacked effective address.
-funimp_s:
-	lea		4+EXC_EA(%a6),%a0	# load old a7'
-	mov.l		%a0,EXC_A7(%a6)		# store a7'
-	mov.l		%a0,OLD_A7(%a6)		# make a copy
-
-funimp_cont:
-
-# the FPIAR holds the "current PC" of the faulting instruction.
-	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)
-
-############################################################################
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	clr.b		SPCOND_FLG(%a6)		# clear "special case" flag
-
-# Divide the fp instructions into 8 types based on the TYPE field in
-# bits 6-8 of the opword(classes 6,7 are undefined).
-# (for the '060, only two types  can take this exception)
-#	bftst		%d0{&7:&3}		# test TYPE
-	btst		&22,%d0			# type 0 or 1 ?
-	bne.w		funimp_misc		# type 1
-
-#########################################
-# TYPE == 0: General instructions	#
-#########################################
-funimp_gen:
-
-	clr.b		STORE_FLG(%a6)		# clear "store result" flag
-
-# clear the ccode byte and exception status byte
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	bfextu		%d0{&16:&6},%d1		# extract upper 6 of cmdreg
-	cmpi.b		%d1,&0x17		# is op an fmovecr?
-	beq.w		funimp_fmovcr		# yes
-
-funimp_gen_op:
-	bsr.l		_load_fop		# load
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode
-
-	mov.b		1+EXC_CMDREG(%a6),%d1
-	andi.w		&0x003f,%d1		# extract extension bits
-	lsl.w		&0x3,%d1		# shift right 3 bits
-	or.b		STAG(%a6),%d1		# insert src optag bits
-
-	lea		FP_DST(%a6),%a1		# pass dst ptr in a1
-	lea		FP_SRC(%a6),%a0		# pass src ptr in a0
-
-	mov.w		(tbl_trans.w,%pc,%d1.w*2),%d1
-	jsr		(tbl_trans.w,%pc,%d1.w*1) # emulate
-
-funimp_fsave:
-	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
-	bne.w		funimp_ena		# some are enabled
-
-funimp_store:
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch Dn
-	bsr.l		store_fpreg		# store result to fp regfile
-
-funimp_gen_exit:
-	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-funimp_gen_exit_cmp:
-	cmpi.b		SPCOND_FLG(%a6),&mia7_flg # was the ea mode (sp)+ ?
-	beq.b		funimp_gen_exit_a7	# yes
-
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the ea mode -(sp) ?
-	beq.b		funimp_gen_exit_a7	# yes
-
-funimp_gen_exit_cont:
-	unlk		%a6
-
-funimp_gen_exit_cont2:
-	btst		&0x7,(%sp)		# is trace on?
-	beq.l		_fpsp_done		# no
-
-# this catches a problem with the case where an exception will be re-inserted
-# into the machine. the frestore has already been executed...so, the fmov.l
-# alone of the control register would trigger an unwanted exception.
-# until I feel like fixing this, we'll sidestep the exception.
-	fsave		-(%sp)
-	fmov.l		%fpiar,0x14(%sp)	# "Current PC" is in FPIAR
-	frestore	(%sp)+
-	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x24
-	bra.l		_real_trace
-
-funimp_gen_exit_a7:
-	btst		&0x5,EXC_SR(%a6)	# supervisor or user mode?
-	bne.b		funimp_gen_exit_a7_s	# supervisor
-
-	mov.l		%a0,-(%sp)
-	mov.l		EXC_A7(%a6),%a0
-	mov.l		%a0,%usp
-	mov.l		(%sp)+,%a0
-	bra.b		funimp_gen_exit_cont
-
-# if the instruction was executed from supervisor mode and the addressing
-# mode was (a7)+, then the stack frame for the rte must be shifted "up"
-# "n" bytes where "n" is the size of the src operand type.
-# f<op>.{b,w,l,s,d,x,p}
-funimp_gen_exit_a7_s:
-	mov.l		%d0,-(%sp)		# save d0
-	mov.l		EXC_A7(%a6),%d0		# load new a7'
-	sub.l		OLD_A7(%a6),%d0		# subtract old a7'
-	mov.l		0x2+EXC_PC(%a6),(0x2+EXC_PC,%a6,%d0) # shift stack frame
-	mov.l		EXC_SR(%a6),(EXC_SR,%a6,%d0) # shift stack frame
-	mov.w		%d0,EXC_SR(%a6)		# store incr number
-	mov.l		(%sp)+,%d0		# restore d0
-
-	unlk		%a6
-
-	add.w		(%sp),%sp		# stack frame shifted
-	bra.b		funimp_gen_exit_cont2
-
-######################
-# fmovecr.x #ccc,fpn #
-######################
-funimp_fmovcr:
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0
-	mov.b		1+EXC_CMDREG(%a6),%d1
-	andi.l		&0x0000007f,%d1		# pass rom offset in d1
-	bsr.l		smovcr
-	bra.w		funimp_fsave
-
-#########################################################################
-
-#
-# the user has enabled some exceptions. we figure not to see this too
-# often so that's why it gets lower priority.
-#
-funimp_ena:
-
-# was an exception set that was also enabled?
-	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled and set
-	bfffo		%d0{&24:&8},%d0		# find highest priority exception
-	bne.b		funimp_exc		# at least one was set
-
-# no exception that was enabled was set BUT if we got an exact overflow
-# and overflow wasn't enabled but inexact was (yech!) then this is
-# an inexact exception; otherwise, return to normal non-exception flow.
-	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
-	beq.w		funimp_store		# no; return to normal flow
-
-# the overflow w/ exact result happened but was inexact set in the FPCR?
-funimp_ovfl:
-	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
-	beq.w		funimp_store		# no; return to normal flow
-	bra.b		funimp_exc_ovfl		# yes
-
-# some exception happened that was actually enabled.
-# we'll insert this new exception into the FPU and then return.
-funimp_exc:
-	subi.l		&24,%d0			# fix offset to be 0-8
-	cmpi.b		%d0,&0x6		# is exception INEX?
-	bne.b		funimp_exc_force	# no
-
-# the enabled exception was inexact. so, if it occurs with an overflow
-# or underflow that was disabled, then we have to force an overflow or
-# underflow frame. the eventual overflow or underflow handler will see that
-# it's actually an inexact and act appropriately. this is the only easy
-# way to have the EXOP available for the enabled inexact handler when
-# a disabled overflow or underflow has also happened.
-	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
-	bne.b		funimp_exc_ovfl		# yes
-	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
-	bne.b		funimp_exc_unfl		# yes
-
-# force the fsave exception status bits to signal an exception of the
-# appropriate type. don't forget to "skew" the source operand in case we
-# "unskewed" the one the hardware initially gave us.
-funimp_exc_force:
-	mov.l		%d0,-(%sp)		# save d0
-	bsr.l		funimp_skew		# check for special case
-	mov.l		(%sp)+,%d0		# restore d0
-	mov.w		(tbl_funimp_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
-	bra.b		funimp_gen_exit2	# exit with frestore
-
-tbl_funimp_except:
-	short		0xe002, 0xe006, 0xe004, 0xe005
-	short		0xe003, 0xe002, 0xe001, 0xe001
-
-# insert an overflow frame
-funimp_exc_ovfl:
-	bsr.l		funimp_skew		# check for special case
-	mov.w		&0xe005,2+FP_SRC(%a6)
-	bra.b		funimp_gen_exit2
-
-# insert an underflow frame
-funimp_exc_unfl:
-	bsr.l		funimp_skew		# check for special case
-	mov.w		&0xe003,2+FP_SRC(%a6)
-
-# this is the general exit point for an enabled exception that will be
-# restored into the machine for the instruction just emulated.
-funimp_gen_exit2:
-	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# insert exceptional status
-
-	bra.w		funimp_gen_exit_cmp
-
-############################################################################
-
-#
-# TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc>
-#
-# These instructions were implemented on the '881/2 and '040 in hardware but
-# are emulated in software on the '060.
-#
-funimp_misc:
-	bfextu		%d0{&10:&3},%d1		# extract mode field
-	cmpi.b		%d1,&0x1		# is it an fdb<cc>?
-	beq.w		funimp_fdbcc		# yes
-	cmpi.b		%d1,&0x7		# is it an fs<cc>?
-	bne.w		funimp_fscc		# yes
-	bfextu		%d0{&13:&3},%d1
-	cmpi.b		%d1,&0x2		# is it an fs<cc>?
-	blt.w		funimp_fscc		# yes
-
-#########################
-# ftrap<cc>		#
-# ftrap<cc>.w #<data>	#
-# ftrap<cc>.l #<data>	#
-#########################
-funimp_ftrapcc:
-
-	bsr.l		_ftrapcc		# FTRAP<cc>()
-
-	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
-	beq.w		funimp_bsun		# yes
-
-	cmpi.b		SPCOND_FLG(%a6),&ftrapcc_flg # should a trap occur?
-	bne.w		funimp_done		# no
-
-#	 FP UNIMP FRAME		   TRAP  FRAME
-#	*****************	*****************
-#	**    <EA>     **	**  Current PC **
-#	*****************	*****************
-#	* 0x2 *  0x02c	*	* 0x2 *  0x01c  *
-#	*****************	*****************
-#	**   Next PC   **	**   Next PC   **
-#	*****************	*****************
-#	*      SR	*	*      SR	*
-#	*****************	*****************
-#	    (6 words)		    (6 words)
-#
-# the ftrapcc instruction should take a trap. so, here we must create a
-# trap stack frame from an unimplemented fp instruction stack frame and
-# jump to the user supplied entry point for the trap exception
-funimp_ftrapcc_tp:
-	mov.l		USER_FPIAR(%a6),EXC_EA(%a6) # Address = Current PC
-	mov.w		&0x201c,EXC_VOFF(%a6)	# Vector Offset = 0x01c
-
-	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-	bra.l		_real_trap
-
-#########################
-# fdb<cc> Dn,<label>	#
-#########################
-funimp_fdbcc:
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word		# read displacement
-
-	tst.l		%d1			# did ifetch fail?
-	bne.w		funimp_iacc		# yes
-
-	ext.l		%d0			# sign extend displacement
-
-	bsr.l		_fdbcc			# FDB<cc>()
-
-	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
-	beq.w		funimp_bsun
-
-	bra.w		funimp_done		# branch to finish
-
-#################
-# fs<cc>.b <ea>	#
-#################
-funimp_fscc:
-
-	bsr.l		_fscc			# FS<cc>()
-
-# I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction
-# does not need to update "An" before taking a bsun exception.
-	cmpi.b		SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
-	beq.w		funimp_bsun
-
-	btst		&0x5,EXC_SR(%a6)	# yes; is it a user mode exception?
-	bne.b		funimp_fscc_s		# no
-
-funimp_fscc_u:
-	mov.l		EXC_A7(%a6),%a0		# yes; set new USP
-	mov.l		%a0,%usp
-	bra.w		funimp_done		# branch to finish
-
-# remember, I'm assuming that post-increment is bogus...(it IS!!!)
-# so, the least significant WORD of the stacked effective address got
-# overwritten by the "fs<cc> -(An)". We must shift the stack frame "down"
-# so that the rte will work correctly without destroying the result.
-# even though the operation size is byte, the stack ptr is decr by 2.
-#
-# remember, also, this instruction may be traced.
-funimp_fscc_s:
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was a7 modified?
-	bne.w		funimp_done		# no
-
-	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-
-	btst		&0x7,(%sp)		# is trace enabled?
-	bne.b		funimp_fscc_s_trace	# yes
-
-	subq.l		&0x2,%sp
-	mov.l		0x2(%sp),(%sp)		# shift SR,hi(PC) "down"
-	mov.l		0x6(%sp),0x4(%sp)	# shift lo(PC),voff "down"
-	bra.l		_fpsp_done
-
-funimp_fscc_s_trace:
-	subq.l		&0x2,%sp
-	mov.l		0x2(%sp),(%sp)		# shift SR,hi(PC) "down"
-	mov.w		0x6(%sp),0x4(%sp)	# shift lo(PC)
-	mov.w		&0x2024,0x6(%sp)	# fmt/voff = $2024
-	fmov.l		%fpiar,0x8(%sp)		# insert "current PC"
-
-	bra.l		_real_trace
-
-#
-# The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert
-# the fp unimplemented instruction exception stack frame into a bsun stack frame,
-# restore a bsun exception into the machine, and branch to the user
-# supplied bsun hook.
-#
-#	 FP UNIMP FRAME		   BSUN FRAME
-#	*****************	*****************
-#	**    <EA>     **	* 0x0 * 0x0c0	*
-#	*****************	*****************
-#	* 0x2 *  0x02c  *	** Current PC  **
-#	*****************	*****************
-#	**   Next PC   **	*      SR	*
-#	*****************	*****************
-#	*      SR	*	    (4 words)
-#	*****************
-#	    (6 words)
-#
-funimp_bsun:
-	mov.w		&0x00c0,2+EXC_EA(%a6)	# Fmt = 0x0; Vector Offset = 0x0c0
-	mov.l		USER_FPIAR(%a6),EXC_VOFF(%a6) # PC = Current PC
-	mov.w		EXC_SR(%a6),2+EXC_PC(%a6) # shift SR "up"
-
-	mov.w		&0xe000,2+FP_SRC(%a6)	# bsun exception enabled
-
-	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# restore bsun exception
-
-	unlk		%a6
-
-	addq.l		&0x4,%sp		# erase sludge
-
-	bra.l		_real_bsun		# branch to user bsun hook
-
-#
-# all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame
-# and return.
-#
-# as usual, we have to check for trace mode being on here. since instructions
-# modifying the supervisor stack frame don't pass through here, this is a
-# relatively easy task.
-#
-funimp_done:
-	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-
-	btst		&0x7,(%sp)		# is trace enabled?
-	bne.b		funimp_trace		# yes
-
-	bra.l		_fpsp_done
-
-#	 FP UNIMP FRAME		  TRACE  FRAME
-#	*****************	*****************
-#	**    <EA>     **	**  Current PC **
-#	*****************	*****************
-#	* 0x2 *  0x02c	*	* 0x2 *  0x024  *
-#	*****************	*****************
-#	**   Next PC   **	**   Next PC   **
-#	*****************	*****************
-#	*      SR	*	*      SR	*
-#	*****************	*****************
-#	    (6 words)		    (6 words)
-#
-# the fscc instruction should take a trace trap. so, here we must create a
-# trace stack frame from an unimplemented fp instruction stack frame and
-# jump to the user supplied entry point for the trace exception
-funimp_trace:
-	fmov.l		%fpiar,0x8(%sp)		# current PC is in fpiar
-	mov.b		&0x24,0x7(%sp)		# vector offset = 0x024
-
-	bra.l		_real_trace
-
-################################################################
-
-	global		tbl_trans
-	swbeg		&0x1c0
-tbl_trans:
-	short		tbl_trans - tbl_trans	# $00-0 fmovecr all
-	short		tbl_trans - tbl_trans	# $00-1 fmovecr all
-	short		tbl_trans - tbl_trans	# $00-2 fmovecr all
-	short		tbl_trans - tbl_trans	# $00-3 fmovecr all
-	short		tbl_trans - tbl_trans	# $00-4 fmovecr all
-	short		tbl_trans - tbl_trans	# $00-5 fmovecr all
-	short		tbl_trans - tbl_trans	# $00-6 fmovecr all
-	short		tbl_trans - tbl_trans	# $00-7 fmovecr all
-
-	short		tbl_trans - tbl_trans	# $01-0 fint norm
-	short		tbl_trans - tbl_trans	# $01-1 fint zero
-	short		tbl_trans - tbl_trans	# $01-2 fint inf
-	short		tbl_trans - tbl_trans	# $01-3 fint qnan
-	short		tbl_trans - tbl_trans	# $01-5 fint denorm
-	short		tbl_trans - tbl_trans	# $01-4 fint snan
-	short		tbl_trans - tbl_trans	# $01-6 fint unnorm
-	short		tbl_trans - tbl_trans	# $01-7 ERROR
-
-	short		ssinh	 - tbl_trans	# $02-0 fsinh norm
-	short		src_zero - tbl_trans	# $02-1 fsinh zero
-	short		src_inf	 - tbl_trans	# $02-2 fsinh inf
-	short		src_qnan - tbl_trans	# $02-3 fsinh qnan
-	short		ssinhd	 - tbl_trans	# $02-5 fsinh denorm
-	short		src_snan - tbl_trans	# $02-4 fsinh snan
-	short		tbl_trans - tbl_trans	# $02-6 fsinh unnorm
-	short		tbl_trans - tbl_trans	# $02-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $03-0 fintrz norm
-	short		tbl_trans - tbl_trans	# $03-1 fintrz zero
-	short		tbl_trans - tbl_trans	# $03-2 fintrz inf
-	short		tbl_trans - tbl_trans	# $03-3 fintrz qnan
-	short		tbl_trans - tbl_trans	# $03-5 fintrz denorm
-	short		tbl_trans - tbl_trans	# $03-4 fintrz snan
-	short		tbl_trans - tbl_trans	# $03-6 fintrz unnorm
-	short		tbl_trans - tbl_trans	# $03-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $04-0 fsqrt norm
-	short		tbl_trans - tbl_trans	# $04-1 fsqrt zero
-	short		tbl_trans - tbl_trans	# $04-2 fsqrt inf
-	short		tbl_trans - tbl_trans	# $04-3 fsqrt qnan
-	short		tbl_trans - tbl_trans	# $04-5 fsqrt denorm
-	short		tbl_trans - tbl_trans	# $04-4 fsqrt snan
-	short		tbl_trans - tbl_trans	# $04-6 fsqrt unnorm
-	short		tbl_trans - tbl_trans	# $04-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $05-0 ERROR
-	short		tbl_trans - tbl_trans	# $05-1 ERROR
-	short		tbl_trans - tbl_trans	# $05-2 ERROR
-	short		tbl_trans - tbl_trans	# $05-3 ERROR
-	short		tbl_trans - tbl_trans	# $05-4 ERROR
-	short		tbl_trans - tbl_trans	# $05-5 ERROR
-	short		tbl_trans - tbl_trans	# $05-6 ERROR
-	short		tbl_trans - tbl_trans	# $05-7 ERROR
-
-	short		slognp1	 - tbl_trans	# $06-0 flognp1 norm
-	short		src_zero - tbl_trans	# $06-1 flognp1 zero
-	short		sopr_inf - tbl_trans	# $06-2 flognp1 inf
-	short		src_qnan - tbl_trans	# $06-3 flognp1 qnan
-	short		slognp1d - tbl_trans	# $06-5 flognp1 denorm
-	short		src_snan - tbl_trans	# $06-4 flognp1 snan
-	short		tbl_trans - tbl_trans	# $06-6 flognp1 unnorm
-	short		tbl_trans - tbl_trans	# $06-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $07-0 ERROR
-	short		tbl_trans - tbl_trans	# $07-1 ERROR
-	short		tbl_trans - tbl_trans	# $07-2 ERROR
-	short		tbl_trans - tbl_trans	# $07-3 ERROR
-	short		tbl_trans - tbl_trans	# $07-4 ERROR
-	short		tbl_trans - tbl_trans	# $07-5 ERROR
-	short		tbl_trans - tbl_trans	# $07-6 ERROR
-	short		tbl_trans - tbl_trans	# $07-7 ERROR
-
-	short		setoxm1	 - tbl_trans	# $08-0 fetoxm1 norm
-	short		src_zero - tbl_trans	# $08-1 fetoxm1 zero
-	short		setoxm1i - tbl_trans	# $08-2 fetoxm1 inf
-	short		src_qnan - tbl_trans	# $08-3 fetoxm1 qnan
-	short		setoxm1d - tbl_trans	# $08-5 fetoxm1 denorm
-	short		src_snan - tbl_trans	# $08-4 fetoxm1 snan
-	short		tbl_trans - tbl_trans	# $08-6 fetoxm1 unnorm
-	short		tbl_trans - tbl_trans	# $08-7 ERROR
-
-	short		stanh	 - tbl_trans	# $09-0 ftanh norm
-	short		src_zero - tbl_trans	# $09-1 ftanh zero
-	short		src_one	 - tbl_trans	# $09-2 ftanh inf
-	short		src_qnan - tbl_trans	# $09-3 ftanh qnan
-	short		stanhd	 - tbl_trans	# $09-5 ftanh denorm
-	short		src_snan - tbl_trans	# $09-4 ftanh snan
-	short		tbl_trans - tbl_trans	# $09-6 ftanh unnorm
-	short		tbl_trans - tbl_trans	# $09-7 ERROR
-
-	short		satan	 - tbl_trans	# $0a-0 fatan norm
-	short		src_zero - tbl_trans	# $0a-1 fatan zero
-	short		spi_2	 - tbl_trans	# $0a-2 fatan inf
-	short		src_qnan - tbl_trans	# $0a-3 fatan qnan
-	short		satand	 - tbl_trans	# $0a-5 fatan denorm
-	short		src_snan - tbl_trans	# $0a-4 fatan snan
-	short		tbl_trans - tbl_trans	# $0a-6 fatan unnorm
-	short		tbl_trans - tbl_trans	# $0a-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $0b-0 ERROR
-	short		tbl_trans - tbl_trans	# $0b-1 ERROR
-	short		tbl_trans - tbl_trans	# $0b-2 ERROR
-	short		tbl_trans - tbl_trans	# $0b-3 ERROR
-	short		tbl_trans - tbl_trans	# $0b-4 ERROR
-	short		tbl_trans - tbl_trans	# $0b-5 ERROR
-	short		tbl_trans - tbl_trans	# $0b-6 ERROR
-	short		tbl_trans - tbl_trans	# $0b-7 ERROR
-
-	short		sasin	 - tbl_trans	# $0c-0 fasin norm
-	short		src_zero - tbl_trans	# $0c-1 fasin zero
-	short		t_operr	 - tbl_trans	# $0c-2 fasin inf
-	short		src_qnan - tbl_trans	# $0c-3 fasin qnan
-	short		sasind	 - tbl_trans	# $0c-5 fasin denorm
-	short		src_snan - tbl_trans	# $0c-4 fasin snan
-	short		tbl_trans - tbl_trans	# $0c-6 fasin unnorm
-	short		tbl_trans - tbl_trans	# $0c-7 ERROR
-
-	short		satanh	 - tbl_trans	# $0d-0 fatanh norm
-	short		src_zero - tbl_trans	# $0d-1 fatanh zero
-	short		t_operr	 - tbl_trans	# $0d-2 fatanh inf
-	short		src_qnan - tbl_trans	# $0d-3 fatanh qnan
-	short		satanhd	 - tbl_trans	# $0d-5 fatanh denorm
-	short		src_snan - tbl_trans	# $0d-4 fatanh snan
-	short		tbl_trans - tbl_trans	# $0d-6 fatanh unnorm
-	short		tbl_trans - tbl_trans	# $0d-7 ERROR
-
-	short		ssin	 - tbl_trans	# $0e-0 fsin norm
-	short		src_zero - tbl_trans	# $0e-1 fsin zero
-	short		t_operr	 - tbl_trans	# $0e-2 fsin inf
-	short		src_qnan - tbl_trans	# $0e-3 fsin qnan
-	short		ssind	 - tbl_trans	# $0e-5 fsin denorm
-	short		src_snan - tbl_trans	# $0e-4 fsin snan
-	short		tbl_trans - tbl_trans	# $0e-6 fsin unnorm
-	short		tbl_trans - tbl_trans	# $0e-7 ERROR
-
-	short		stan	 - tbl_trans	# $0f-0 ftan norm
-	short		src_zero - tbl_trans	# $0f-1 ftan zero
-	short		t_operr	 - tbl_trans	# $0f-2 ftan inf
-	short		src_qnan - tbl_trans	# $0f-3 ftan qnan
-	short		stand	 - tbl_trans	# $0f-5 ftan denorm
-	short		src_snan - tbl_trans	# $0f-4 ftan snan
-	short		tbl_trans - tbl_trans	# $0f-6 ftan unnorm
-	short		tbl_trans - tbl_trans	# $0f-7 ERROR
-
-	short		setox	 - tbl_trans	# $10-0 fetox norm
-	short		ld_pone	 - tbl_trans	# $10-1 fetox zero
-	short		szr_inf	 - tbl_trans	# $10-2 fetox inf
-	short		src_qnan - tbl_trans	# $10-3 fetox qnan
-	short		setoxd	 - tbl_trans	# $10-5 fetox denorm
-	short		src_snan - tbl_trans	# $10-4 fetox snan
-	short		tbl_trans - tbl_trans	# $10-6 fetox unnorm
-	short		tbl_trans - tbl_trans	# $10-7 ERROR
-
-	short		stwotox	 - tbl_trans	# $11-0 ftwotox norm
-	short		ld_pone	 - tbl_trans	# $11-1 ftwotox zero
-	short		szr_inf	 - tbl_trans	# $11-2 ftwotox inf
-	short		src_qnan - tbl_trans	# $11-3 ftwotox qnan
-	short		stwotoxd - tbl_trans	# $11-5 ftwotox denorm
-	short		src_snan - tbl_trans	# $11-4 ftwotox snan
-	short		tbl_trans - tbl_trans	# $11-6 ftwotox unnorm
-	short		tbl_trans - tbl_trans	# $11-7 ERROR
-
-	short		stentox	 - tbl_trans	# $12-0 ftentox norm
-	short		ld_pone	 - tbl_trans	# $12-1 ftentox zero
-	short		szr_inf	 - tbl_trans	# $12-2 ftentox inf
-	short		src_qnan - tbl_trans	# $12-3 ftentox qnan
-	short		stentoxd - tbl_trans	# $12-5 ftentox denorm
-	short		src_snan - tbl_trans	# $12-4 ftentox snan
-	short		tbl_trans - tbl_trans	# $12-6 ftentox unnorm
-	short		tbl_trans - tbl_trans	# $12-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $13-0 ERROR
-	short		tbl_trans - tbl_trans	# $13-1 ERROR
-	short		tbl_trans - tbl_trans	# $13-2 ERROR
-	short		tbl_trans - tbl_trans	# $13-3 ERROR
-	short		tbl_trans - tbl_trans	# $13-4 ERROR
-	short		tbl_trans - tbl_trans	# $13-5 ERROR
-	short		tbl_trans - tbl_trans	# $13-6 ERROR
-	short		tbl_trans - tbl_trans	# $13-7 ERROR
-
-	short		slogn	 - tbl_trans	# $14-0 flogn norm
-	short		t_dz2	 - tbl_trans	# $14-1 flogn zero
-	short		sopr_inf - tbl_trans	# $14-2 flogn inf
-	short		src_qnan - tbl_trans	# $14-3 flogn qnan
-	short		slognd	 - tbl_trans	# $14-5 flogn denorm
-	short		src_snan - tbl_trans	# $14-4 flogn snan
-	short		tbl_trans - tbl_trans	# $14-6 flogn unnorm
-	short		tbl_trans - tbl_trans	# $14-7 ERROR
-
-	short		slog10	 - tbl_trans	# $15-0 flog10 norm
-	short		t_dz2	 - tbl_trans	# $15-1 flog10 zero
-	short		sopr_inf - tbl_trans	# $15-2 flog10 inf
-	short		src_qnan - tbl_trans	# $15-3 flog10 qnan
-	short		slog10d	 - tbl_trans	# $15-5 flog10 denorm
-	short		src_snan - tbl_trans	# $15-4 flog10 snan
-	short		tbl_trans - tbl_trans	# $15-6 flog10 unnorm
-	short		tbl_trans - tbl_trans	# $15-7 ERROR
-
-	short		slog2	 - tbl_trans	# $16-0 flog2 norm
-	short		t_dz2	 - tbl_trans	# $16-1 flog2 zero
-	short		sopr_inf - tbl_trans	# $16-2 flog2 inf
-	short		src_qnan - tbl_trans	# $16-3 flog2 qnan
-	short		slog2d	 - tbl_trans	# $16-5 flog2 denorm
-	short		src_snan - tbl_trans	# $16-4 flog2 snan
-	short		tbl_trans - tbl_trans	# $16-6 flog2 unnorm
-	short		tbl_trans - tbl_trans	# $16-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $17-0 ERROR
-	short		tbl_trans - tbl_trans	# $17-1 ERROR
-	short		tbl_trans - tbl_trans	# $17-2 ERROR
-	short		tbl_trans - tbl_trans	# $17-3 ERROR
-	short		tbl_trans - tbl_trans	# $17-4 ERROR
-	short		tbl_trans - tbl_trans	# $17-5 ERROR
-	short		tbl_trans - tbl_trans	# $17-6 ERROR
-	short		tbl_trans - tbl_trans	# $17-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $18-0 fabs norm
-	short		tbl_trans - tbl_trans	# $18-1 fabs zero
-	short		tbl_trans - tbl_trans	# $18-2 fabs inf
-	short		tbl_trans - tbl_trans	# $18-3 fabs qnan
-	short		tbl_trans - tbl_trans	# $18-5 fabs denorm
-	short		tbl_trans - tbl_trans	# $18-4 fabs snan
-	short		tbl_trans - tbl_trans	# $18-6 fabs unnorm
-	short		tbl_trans - tbl_trans	# $18-7 ERROR
-
-	short		scosh	 - tbl_trans	# $19-0 fcosh norm
-	short		ld_pone	 - tbl_trans	# $19-1 fcosh zero
-	short		ld_pinf	 - tbl_trans	# $19-2 fcosh inf
-	short		src_qnan - tbl_trans	# $19-3 fcosh qnan
-	short		scoshd	 - tbl_trans	# $19-5 fcosh denorm
-	short		src_snan - tbl_trans	# $19-4 fcosh snan
-	short		tbl_trans - tbl_trans	# $19-6 fcosh unnorm
-	short		tbl_trans - tbl_trans	# $19-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $1a-0 fneg norm
-	short		tbl_trans - tbl_trans	# $1a-1 fneg zero
-	short		tbl_trans - tbl_trans	# $1a-2 fneg inf
-	short		tbl_trans - tbl_trans	# $1a-3 fneg qnan
-	short		tbl_trans - tbl_trans	# $1a-5 fneg denorm
-	short		tbl_trans - tbl_trans	# $1a-4 fneg snan
-	short		tbl_trans - tbl_trans	# $1a-6 fneg unnorm
-	short		tbl_trans - tbl_trans	# $1a-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $1b-0 ERROR
-	short		tbl_trans - tbl_trans	# $1b-1 ERROR
-	short		tbl_trans - tbl_trans	# $1b-2 ERROR
-	short		tbl_trans - tbl_trans	# $1b-3 ERROR
-	short		tbl_trans - tbl_trans	# $1b-4 ERROR
-	short		tbl_trans - tbl_trans	# $1b-5 ERROR
-	short		tbl_trans - tbl_trans	# $1b-6 ERROR
-	short		tbl_trans - tbl_trans	# $1b-7 ERROR
-
-	short		sacos	 - tbl_trans	# $1c-0 facos norm
-	short		ld_ppi2	 - tbl_trans	# $1c-1 facos zero
-	short		t_operr	 - tbl_trans	# $1c-2 facos inf
-	short		src_qnan - tbl_trans	# $1c-3 facos qnan
-	short		sacosd	 - tbl_trans	# $1c-5 facos denorm
-	short		src_snan - tbl_trans	# $1c-4 facos snan
-	short		tbl_trans - tbl_trans	# $1c-6 facos unnorm
-	short		tbl_trans - tbl_trans	# $1c-7 ERROR
-
-	short		scos	 - tbl_trans	# $1d-0 fcos norm
-	short		ld_pone	 - tbl_trans	# $1d-1 fcos zero
-	short		t_operr	 - tbl_trans	# $1d-2 fcos inf
-	short		src_qnan - tbl_trans	# $1d-3 fcos qnan
-	short		scosd	 - tbl_trans	# $1d-5 fcos denorm
-	short		src_snan - tbl_trans	# $1d-4 fcos snan
-	short		tbl_trans - tbl_trans	# $1d-6 fcos unnorm
-	short		tbl_trans - tbl_trans	# $1d-7 ERROR
-
-	short		sgetexp	 - tbl_trans	# $1e-0 fgetexp norm
-	short		src_zero - tbl_trans	# $1e-1 fgetexp zero
-	short		t_operr	 - tbl_trans	# $1e-2 fgetexp inf
-	short		src_qnan - tbl_trans	# $1e-3 fgetexp qnan
-	short		sgetexpd - tbl_trans	# $1e-5 fgetexp denorm
-	short		src_snan - tbl_trans	# $1e-4 fgetexp snan
-	short		tbl_trans - tbl_trans	# $1e-6 fgetexp unnorm
-	short		tbl_trans - tbl_trans	# $1e-7 ERROR
-
-	short		sgetman	 - tbl_trans	# $1f-0 fgetman norm
-	short		src_zero - tbl_trans	# $1f-1 fgetman zero
-	short		t_operr	 - tbl_trans	# $1f-2 fgetman inf
-	short		src_qnan - tbl_trans	# $1f-3 fgetman qnan
-	short		sgetmand - tbl_trans	# $1f-5 fgetman denorm
-	short		src_snan - tbl_trans	# $1f-4 fgetman snan
-	short		tbl_trans - tbl_trans	# $1f-6 fgetman unnorm
-	short		tbl_trans - tbl_trans	# $1f-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $20-0 fdiv norm
-	short		tbl_trans - tbl_trans	# $20-1 fdiv zero
-	short		tbl_trans - tbl_trans	# $20-2 fdiv inf
-	short		tbl_trans - tbl_trans	# $20-3 fdiv qnan
-	short		tbl_trans - tbl_trans	# $20-5 fdiv denorm
-	short		tbl_trans - tbl_trans	# $20-4 fdiv snan
-	short		tbl_trans - tbl_trans	# $20-6 fdiv unnorm
-	short		tbl_trans - tbl_trans	# $20-7 ERROR
-
-	short		smod_snorm - tbl_trans	# $21-0 fmod norm
-	short		smod_szero - tbl_trans	# $21-1 fmod zero
-	short		smod_sinf - tbl_trans	# $21-2 fmod inf
-	short		sop_sqnan - tbl_trans	# $21-3 fmod qnan
-	short		smod_sdnrm - tbl_trans	# $21-5 fmod denorm
-	short		sop_ssnan - tbl_trans	# $21-4 fmod snan
-	short		tbl_trans - tbl_trans	# $21-6 fmod unnorm
-	short		tbl_trans - tbl_trans	# $21-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $22-0 fadd norm
-	short		tbl_trans - tbl_trans	# $22-1 fadd zero
-	short		tbl_trans - tbl_trans	# $22-2 fadd inf
-	short		tbl_trans - tbl_trans	# $22-3 fadd qnan
-	short		tbl_trans - tbl_trans	# $22-5 fadd denorm
-	short		tbl_trans - tbl_trans	# $22-4 fadd snan
-	short		tbl_trans - tbl_trans	# $22-6 fadd unnorm
-	short		tbl_trans - tbl_trans	# $22-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $23-0 fmul norm
-	short		tbl_trans - tbl_trans	# $23-1 fmul zero
-	short		tbl_trans - tbl_trans	# $23-2 fmul inf
-	short		tbl_trans - tbl_trans	# $23-3 fmul qnan
-	short		tbl_trans - tbl_trans	# $23-5 fmul denorm
-	short		tbl_trans - tbl_trans	# $23-4 fmul snan
-	short		tbl_trans - tbl_trans	# $23-6 fmul unnorm
-	short		tbl_trans - tbl_trans	# $23-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $24-0 fsgldiv norm
-	short		tbl_trans - tbl_trans	# $24-1 fsgldiv zero
-	short		tbl_trans - tbl_trans	# $24-2 fsgldiv inf
-	short		tbl_trans - tbl_trans	# $24-3 fsgldiv qnan
-	short		tbl_trans - tbl_trans	# $24-5 fsgldiv denorm
-	short		tbl_trans - tbl_trans	# $24-4 fsgldiv snan
-	short		tbl_trans - tbl_trans	# $24-6 fsgldiv unnorm
-	short		tbl_trans - tbl_trans	# $24-7 ERROR
-
-	short		srem_snorm - tbl_trans	# $25-0 frem norm
-	short		srem_szero - tbl_trans	# $25-1 frem zero
-	short		srem_sinf - tbl_trans	# $25-2 frem inf
-	short		sop_sqnan - tbl_trans	# $25-3 frem qnan
-	short		srem_sdnrm - tbl_trans	# $25-5 frem denorm
-	short		sop_ssnan - tbl_trans	# $25-4 frem snan
-	short		tbl_trans - tbl_trans	# $25-6 frem unnorm
-	short		tbl_trans - tbl_trans	# $25-7 ERROR
-
-	short		sscale_snorm - tbl_trans # $26-0 fscale norm
-	short		sscale_szero - tbl_trans # $26-1 fscale zero
-	short		sscale_sinf - tbl_trans	# $26-2 fscale inf
-	short		sop_sqnan - tbl_trans	# $26-3 fscale qnan
-	short		sscale_sdnrm - tbl_trans # $26-5 fscale denorm
-	short		sop_ssnan - tbl_trans	# $26-4 fscale snan
-	short		tbl_trans - tbl_trans	# $26-6 fscale unnorm
-	short		tbl_trans - tbl_trans	# $26-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $27-0 fsglmul norm
-	short		tbl_trans - tbl_trans	# $27-1 fsglmul zero
-	short		tbl_trans - tbl_trans	# $27-2 fsglmul inf
-	short		tbl_trans - tbl_trans	# $27-3 fsglmul qnan
-	short		tbl_trans - tbl_trans	# $27-5 fsglmul denorm
-	short		tbl_trans - tbl_trans	# $27-4 fsglmul snan
-	short		tbl_trans - tbl_trans	# $27-6 fsglmul unnorm
-	short		tbl_trans - tbl_trans	# $27-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $28-0 fsub norm
-	short		tbl_trans - tbl_trans	# $28-1 fsub zero
-	short		tbl_trans - tbl_trans	# $28-2 fsub inf
-	short		tbl_trans - tbl_trans	# $28-3 fsub qnan
-	short		tbl_trans - tbl_trans	# $28-5 fsub denorm
-	short		tbl_trans - tbl_trans	# $28-4 fsub snan
-	short		tbl_trans - tbl_trans	# $28-6 fsub unnorm
-	short		tbl_trans - tbl_trans	# $28-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $29-0 ERROR
-	short		tbl_trans - tbl_trans	# $29-1 ERROR
-	short		tbl_trans - tbl_trans	# $29-2 ERROR
-	short		tbl_trans - tbl_trans	# $29-3 ERROR
-	short		tbl_trans - tbl_trans	# $29-4 ERROR
-	short		tbl_trans - tbl_trans	# $29-5 ERROR
-	short		tbl_trans - tbl_trans	# $29-6 ERROR
-	short		tbl_trans - tbl_trans	# $29-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $2a-0 ERROR
-	short		tbl_trans - tbl_trans	# $2a-1 ERROR
-	short		tbl_trans - tbl_trans	# $2a-2 ERROR
-	short		tbl_trans - tbl_trans	# $2a-3 ERROR
-	short		tbl_trans - tbl_trans	# $2a-4 ERROR
-	short		tbl_trans - tbl_trans	# $2a-5 ERROR
-	short		tbl_trans - tbl_trans	# $2a-6 ERROR
-	short		tbl_trans - tbl_trans	# $2a-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $2b-0 ERROR
-	short		tbl_trans - tbl_trans	# $2b-1 ERROR
-	short		tbl_trans - tbl_trans	# $2b-2 ERROR
-	short		tbl_trans - tbl_trans	# $2b-3 ERROR
-	short		tbl_trans - tbl_trans	# $2b-4 ERROR
-	short		tbl_trans - tbl_trans	# $2b-5 ERROR
-	short		tbl_trans - tbl_trans	# $2b-6 ERROR
-	short		tbl_trans - tbl_trans	# $2b-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $2c-0 ERROR
-	short		tbl_trans - tbl_trans	# $2c-1 ERROR
-	short		tbl_trans - tbl_trans	# $2c-2 ERROR
-	short		tbl_trans - tbl_trans	# $2c-3 ERROR
-	short		tbl_trans - tbl_trans	# $2c-4 ERROR
-	short		tbl_trans - tbl_trans	# $2c-5 ERROR
-	short		tbl_trans - tbl_trans	# $2c-6 ERROR
-	short		tbl_trans - tbl_trans	# $2c-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $2d-0 ERROR
-	short		tbl_trans - tbl_trans	# $2d-1 ERROR
-	short		tbl_trans - tbl_trans	# $2d-2 ERROR
-	short		tbl_trans - tbl_trans	# $2d-3 ERROR
-	short		tbl_trans - tbl_trans	# $2d-4 ERROR
-	short		tbl_trans - tbl_trans	# $2d-5 ERROR
-	short		tbl_trans - tbl_trans	# $2d-6 ERROR
-	short		tbl_trans - tbl_trans	# $2d-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $2e-0 ERROR
-	short		tbl_trans - tbl_trans	# $2e-1 ERROR
-	short		tbl_trans - tbl_trans	# $2e-2 ERROR
-	short		tbl_trans - tbl_trans	# $2e-3 ERROR
-	short		tbl_trans - tbl_trans	# $2e-4 ERROR
-	short		tbl_trans - tbl_trans	# $2e-5 ERROR
-	short		tbl_trans - tbl_trans	# $2e-6 ERROR
-	short		tbl_trans - tbl_trans	# $2e-7 ERROR
-
-	short		tbl_trans - tbl_trans	# $2f-0 ERROR
-	short		tbl_trans - tbl_trans	# $2f-1 ERROR
-	short		tbl_trans - tbl_trans	# $2f-2 ERROR
-	short		tbl_trans - tbl_trans	# $2f-3 ERROR
-	short		tbl_trans - tbl_trans	# $2f-4 ERROR
-	short		tbl_trans - tbl_trans	# $2f-5 ERROR
-	short		tbl_trans - tbl_trans	# $2f-6 ERROR
-	short		tbl_trans - tbl_trans	# $2f-7 ERROR
-
-	short		ssincos	 - tbl_trans	# $30-0 fsincos norm
-	short		ssincosz - tbl_trans	# $30-1 fsincos zero
-	short		ssincosi - tbl_trans	# $30-2 fsincos inf
-	short		ssincosqnan - tbl_trans	# $30-3 fsincos qnan
-	short		ssincosd - tbl_trans	# $30-5 fsincos denorm
-	short		ssincossnan - tbl_trans	# $30-4 fsincos snan
-	short		tbl_trans - tbl_trans	# $30-6 fsincos unnorm
-	short		tbl_trans - tbl_trans	# $30-7 ERROR
-
-	short		ssincos	 - tbl_trans	# $31-0 fsincos norm
-	short		ssincosz - tbl_trans	# $31-1 fsincos zero
-	short		ssincosi - tbl_trans	# $31-2 fsincos inf
-	short		ssincosqnan - tbl_trans	# $31-3 fsincos qnan
-	short		ssincosd - tbl_trans	# $31-5 fsincos denorm
-	short		ssincossnan - tbl_trans	# $31-4 fsincos snan
-	short		tbl_trans - tbl_trans	# $31-6 fsincos unnorm
-	short		tbl_trans - tbl_trans	# $31-7 ERROR
-
-	short		ssincos	 - tbl_trans	# $32-0 fsincos norm
-	short		ssincosz - tbl_trans	# $32-1 fsincos zero
-	short		ssincosi - tbl_trans	# $32-2 fsincos inf
-	short		ssincosqnan - tbl_trans	# $32-3 fsincos qnan
-	short		ssincosd - tbl_trans	# $32-5 fsincos denorm
-	short		ssincossnan - tbl_trans	# $32-4 fsincos snan
-	short		tbl_trans - tbl_trans	# $32-6 fsincos unnorm
-	short		tbl_trans - tbl_trans	# $32-7 ERROR
-
-	short		ssincos	 - tbl_trans	# $33-0 fsincos norm
-	short		ssincosz - tbl_trans	# $33-1 fsincos zero
-	short		ssincosi - tbl_trans	# $33-2 fsincos inf
-	short		ssincosqnan - tbl_trans	# $33-3 fsincos qnan
-	short		ssincosd - tbl_trans	# $33-5 fsincos denorm
-	short		ssincossnan - tbl_trans	# $33-4 fsincos snan
-	short		tbl_trans - tbl_trans	# $33-6 fsincos unnorm
-	short		tbl_trans - tbl_trans	# $33-7 ERROR
-
-	short		ssincos	 - tbl_trans	# $34-0 fsincos norm
-	short		ssincosz - tbl_trans	# $34-1 fsincos zero
-	short		ssincosi - tbl_trans	# $34-2 fsincos inf
-	short		ssincosqnan - tbl_trans	# $34-3 fsincos qnan
-	short		ssincosd - tbl_trans	# $34-5 fsincos denorm
-	short		ssincossnan - tbl_trans	# $34-4 fsincos snan
-	short		tbl_trans - tbl_trans	# $34-6 fsincos unnorm
-	short		tbl_trans - tbl_trans	# $34-7 ERROR
-
-	short		ssincos	 - tbl_trans	# $35-0 fsincos norm
-	short		ssincosz - tbl_trans	# $35-1 fsincos zero
-	short		ssincosi - tbl_trans	# $35-2 fsincos inf
-	short		ssincosqnan - tbl_trans	# $35-3 fsincos qnan
-	short		ssincosd - tbl_trans	# $35-5 fsincos denorm
-	short		ssincossnan - tbl_trans	# $35-4 fsincos snan
-	short		tbl_trans - tbl_trans	# $35-6 fsincos unnorm
-	short		tbl_trans - tbl_trans	# $35-7 ERROR
-
-	short		ssincos	 - tbl_trans	# $36-0 fsincos norm
-	short		ssincosz - tbl_trans	# $36-1 fsincos zero
-	short		ssincosi - tbl_trans	# $36-2 fsincos inf
-	short		ssincosqnan - tbl_trans	# $36-3 fsincos qnan
-	short		ssincosd - tbl_trans	# $36-5 fsincos denorm
-	short		ssincossnan - tbl_trans	# $36-4 fsincos snan
-	short		tbl_trans - tbl_trans	# $36-6 fsincos unnorm
-	short		tbl_trans - tbl_trans	# $36-7 ERROR
-
-	short		ssincos	 - tbl_trans	# $37-0 fsincos norm
-	short		ssincosz - tbl_trans	# $37-1 fsincos zero
-	short		ssincosi - tbl_trans	# $37-2 fsincos inf
-	short		ssincosqnan - tbl_trans	# $37-3 fsincos qnan
-	short		ssincosd - tbl_trans	# $37-5 fsincos denorm
-	short		ssincossnan - tbl_trans	# $37-4 fsincos snan
-	short		tbl_trans - tbl_trans	# $37-6 fsincos unnorm
-	short		tbl_trans - tbl_trans	# $37-7 ERROR
-
-##########
-
-# the instruction fetch access for the displacement word for the
-# fdbcc emulation failed. here, we create an access error frame
-# from the current frame and branch to _real_access().
-funimp_iacc:
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-
-	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
-
-	unlk		%a6
-
-	mov.l		(%sp),-(%sp)		# store SR,hi(PC)
-	mov.w		0x8(%sp),0x4(%sp)	# store lo(PC)
-	mov.w		&0x4008,0x6(%sp)	# store voff
-	mov.l		0x2(%sp),0x8(%sp)	# store EA
-	mov.l		&0x09428001,0xc(%sp)	# store FSLW
-
-	btst		&0x5,(%sp)		# user or supervisor mode?
-	beq.b		funimp_iacc_end		# user
-	bset		&0x2,0xd(%sp)		# set supervisor TM bit
-
-funimp_iacc_end:
-	bra.l		_real_access
-
-#########################################################################
-# ssin():     computes the sine of a normalized input			#
-# ssind():    computes the sine of a denormalized input			#
-# scos():     computes the cosine of a normalized input			#
-# scosd():    computes the cosine of a denormalized input		#
-# ssincos():  computes the sine and cosine of a normalized input	#
-# ssincosd(): computes the sine and cosine of a denormalized input	#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 = sin(X) or cos(X)						#
-#									#
-#    For ssincos(X):							#
-#	fp0 = sin(X)							#
-#	fp1 = cos(X)							#
-#									#
-# ACCURACY and MONOTONICITY ******************************************* #
-#	The returned result is within 1 ulp in 64 significant bit, i.e.	#
-#	within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#	SIN and COS:							#
-#	1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.	#
-#									#
-#	2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.			#
-#									#
-#	3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
-#		k = N mod 4, so in particular, k = 0,1,2,or 3.		#
-#		Overwrite k by k := k + AdjN.				#
-#									#
-#	4. If k is even, go to 6.					#
-#									#
-#	5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j.			#
-#		Return sgn*cos(r) where cos(r) is approximated by an	#
-#		even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)),	#
-#		s = r*r.						#
-#		Exit.							#
-#									#
-#	6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)	#
-#		where sin(r) is approximated by an odd polynomial in r	#
-#		r + r*s*(A1+s*(A2+ ... + s*A7)),	s = r*r.	#
-#		Exit.							#
-#									#
-#	7. If |X| > 1, go to 9.						#
-#									#
-#	8. (|X|<2**(-40)) If SIN is invoked, return X;			#
-#		otherwise return 1.					#
-#									#
-#	9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi,		#
-#		go back to 3.						#
-#									#
-#	SINCOS:								#
-#	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.			#
-#									#
-#	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
-#		k = N mod 4, so in particular, k = 0,1,2,or 3.		#
-#									#
-#	3. If k is even, go to 5.					#
-#									#
-#	4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie.	#
-#		j1 exclusive or with the l.s.b. of k.			#
-#		sgn1 := (-1)**j1, sgn2 := (-1)**j2.			#
-#		SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where	#
-#		sin(r) and cos(r) are computed as odd and even		#
-#		polynomials in r, respectively. Exit			#
-#									#
-#	5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.			#
-#		SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where	#
-#		sin(r) and cos(r) are computed as odd and even		#
-#		polynomials in r, respectively. Exit			#
-#									#
-#	6. If |X| > 1, go to 8.						#
-#									#
-#	7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.		#
-#									#
-#	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi,		#
-#		go back to 2.						#
-#									#
-#########################################################################
-
-SINA7:	long		0xBD6AAA77,0xCCC994F5
-SINA6:	long		0x3DE61209,0x7AAE8DA1
-SINA5:	long		0xBE5AE645,0x2A118AE4
-SINA4:	long		0x3EC71DE3,0xA5341531
-SINA3:	long		0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
-SINA2:	long		0x3FF80000,0x88888888,0x888859AF,0x00000000
-SINA1:	long		0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
-
-COSB8:	long		0x3D2AC4D0,0xD6011EE3
-COSB7:	long		0xBDA9396F,0x9F45AC19
-COSB6:	long		0x3E21EED9,0x0612C972
-COSB5:	long		0xBE927E4F,0xB79D9FCF
-COSB4:	long		0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
-COSB3:	long		0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
-COSB2:	long		0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
-COSB1:	long		0xBF000000
-
-	set		INARG,FP_SCR0
-
-	set		X,FP_SCR0
-#	set		XDCARE,X+2
-	set		XFRAC,X+4
-
-	set		RPRIME,FP_SCR0
-	set		SPRIME,FP_SCR1
-
-	set		POSNEG1,L_SCR1
-	set		TWOTO63,L_SCR1
-
-	set		ENDFLAG,L_SCR2
-	set		INT,L_SCR2
-
-	set		ADJN,L_SCR3
-
-############################################
-	global		ssin
-ssin:
-	mov.l		&0,ADJN(%a6)		# yes; SET ADJN TO 0
-	bra.b		SINBGN
-
-############################################
-	global		scos
-scos:
-	mov.l		&1,ADJN(%a6)		# yes; SET ADJN TO 1
-
-############################################
-SINBGN:
-#--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
-
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-	fmov.x		%fp0,X(%a6)		# save input at X
-
-# "COMPACTIFY" X
-	mov.l		(%a0),%d1		# put exp in hi word
-	mov.w		4(%a0),%d1		# fetch hi(man)
-	and.l		&0x7FFFFFFF,%d1		# strip sign
-
-	cmpi.l		%d1,&0x3FD78000		# is |X| >= 2**(-40)?
-	bge.b		SOK1			# no
-	bra.w		SINSM			# yes; input is very small
-
-SOK1:
-	cmp.l		%d1,&0x4004BC7E		# is |X| < 15 PI?
-	blt.b		SINMAIN			# no
-	bra.w		SREDUCEX		# yes; input is very large
-
-#--THIS IS THE USUAL CASE, |X| <= 15 PI.
-#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
-SINMAIN:
-	fmov.x		%fp0,%fp1
-	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
-
-	lea		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
-
-	fmov.l		%fp1,INT(%a6)		# CONVERT TO INTEGER
-
-	mov.l		INT(%a6),%d1		# make a copy of N
-	asl.l		&4,%d1			# N *= 16
-	add.l		%d1,%a1			# tbl_addr = a1 + (N*16)
-
-# A1 IS THE ADDRESS OF N*PIBY2
-# ...WHICH IS IN TWO PIECES Y1 & Y2
-	fsub.x		(%a1)+,%fp0		# X-Y1
-	fsub.s		(%a1),%fp0		# fp0 = R = (X-Y1)-Y2
-
-SINCONT:
-#--continuation from REDUCEX
-
-#--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
-	mov.l		INT(%a6),%d1
-	add.l		ADJN(%a6),%d1		# SEE IF D0 IS ODD OR EVEN
-	ror.l		&1,%d1			# D0 WAS ODD IFF D0 IS NEGATIVE
-	cmp.l		%d1,&0
-	blt.w		COSPOLY
-
-#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
-#--THEN WE RETURN	SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
-#--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
-#--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
-#--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
-#--WHERE T=S*S.
-#--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
-#--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
-SINPOLY:
-	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
-
-	fmov.x		%fp0,X(%a6)		# X IS R
-	fmul.x		%fp0,%fp0		# FP0 IS S
-
-	fmov.d		SINA7(%pc),%fp3
-	fmov.d		SINA6(%pc),%fp2
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# FP1 IS T
-
-	ror.l		&1,%d1
-	and.l		&0x80000000,%d1
-# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
-	eor.l		%d1,X(%a6)		# X IS NOW R'= SGN*R
-
-	fmul.x		%fp1,%fp3		# TA7
-	fmul.x		%fp1,%fp2		# TA6
-
-	fadd.d		SINA5(%pc),%fp3		# A5+TA7
-	fadd.d		SINA4(%pc),%fp2		# A4+TA6
-
-	fmul.x		%fp1,%fp3		# T(A5+TA7)
-	fmul.x		%fp1,%fp2		# T(A4+TA6)
-
-	fadd.d		SINA3(%pc),%fp3		# A3+T(A5+TA7)
-	fadd.x		SINA2(%pc),%fp2		# A2+T(A4+TA6)
-
-	fmul.x		%fp3,%fp1		# T(A3+T(A5+TA7))
-
-	fmul.x		%fp0,%fp2		# S(A2+T(A4+TA6))
-	fadd.x		SINA1(%pc),%fp1		# A1+T(A3+T(A5+TA7))
-	fmul.x		X(%a6),%fp0		# R'*S
-
-	fadd.x		%fp2,%fp1		# [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
-
-	fmul.x		%fp1,%fp0		# SIN(R')-R'
-
-	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
-
-	fmov.l		%d0,%fpcr		# restore users round mode,prec
-	fadd.x		X(%a6),%fp0		# last inst - possible exception set
-	bra		t_inx2
-
-#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
-#--THEN WE RETURN	SGN*COS(R). SGN*COS(R) IS COMPUTED BY
-#--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
-#--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
-#--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
-#--WHERE T=S*S.
-#--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
-#--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
-#--AND IS THEREFORE STORED AS SINGLE PRECISION.
-COSPOLY:
-	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
-
-	fmul.x		%fp0,%fp0		# FP0 IS S
-
-	fmov.d		COSB8(%pc),%fp2
-	fmov.d		COSB7(%pc),%fp3
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# FP1 IS T
-
-	fmov.x		%fp0,X(%a6)		# X IS S
-	ror.l		&1,%d1
-	and.l		&0x80000000,%d1
-# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
-
-	fmul.x		%fp1,%fp2		# TB8
-
-	eor.l		%d1,X(%a6)		# X IS NOW S'= SGN*S
-	and.l		&0x80000000,%d1
-
-	fmul.x		%fp1,%fp3		# TB7
-
-	or.l		&0x3F800000,%d1		# D0 IS SGN IN SINGLE
-	mov.l		%d1,POSNEG1(%a6)
-
-	fadd.d		COSB6(%pc),%fp2		# B6+TB8
-	fadd.d		COSB5(%pc),%fp3		# B5+TB7
-
-	fmul.x		%fp1,%fp2		# T(B6+TB8)
-	fmul.x		%fp1,%fp3		# T(B5+TB7)
-
-	fadd.d		COSB4(%pc),%fp2		# B4+T(B6+TB8)
-	fadd.x		COSB3(%pc),%fp3		# B3+T(B5+TB7)
-
-	fmul.x		%fp1,%fp2		# T(B4+T(B6+TB8))
-	fmul.x		%fp3,%fp1		# T(B3+T(B5+TB7))
-
-	fadd.x		COSB2(%pc),%fp2		# B2+T(B4+T(B6+TB8))
-	fadd.s		COSB1(%pc),%fp1		# B1+T(B3+T(B5+TB7))
-
-	fmul.x		%fp2,%fp0		# S(B2+T(B4+T(B6+TB8)))
-
-	fadd.x		%fp1,%fp0
-
-	fmul.x		X(%a6),%fp0
-
-	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
-
-	fmov.l		%d0,%fpcr		# restore users round mode,prec
-	fadd.s		POSNEG1(%a6),%fp0	# last inst - possible exception set
-	bra		t_inx2
-
-##############################################
-
-# SINe: Big OR Small?
-#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
-#--IF |X| < 2**(-40), RETURN X OR 1.
-SINBORS:
-	cmp.l		%d1,&0x3FFF8000
-	bgt.l		SREDUCEX
-
-SINSM:
-	mov.l		ADJN(%a6),%d1
-	cmp.l		%d1,&0
-	bgt.b		COSTINY
-
-# here, the operation may underflow iff the precision is sgl or dbl.
-# extended denorms are handled through another entry point.
-SINTINY:
-#	mov.w		&0x0000,XDCARE(%a6)	# JUST IN CASE
-
-	fmov.l		%d0,%fpcr		# restore users round mode,prec
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		X(%a6),%fp0		# last inst - possible exception set
-	bra		t_catch
-
-COSTINY:
-	fmov.s		&0x3F800000,%fp0	# fp0 = 1.0
-	fmov.l		%d0,%fpcr		# restore users round mode,prec
-	fadd.s		&0x80800000,%fp0	# last inst - possible exception set
-	bra		t_pinx2
-
-################################################
-	global		ssind
-#--SIN(X) = X FOR DENORMALIZED X
-ssind:
-	bra		t_extdnrm
-
-############################################
-	global		scosd
-#--COS(X) = 1 FOR DENORMALIZED X
-scosd:
-	fmov.s		&0x3F800000,%fp0	# fp0 = 1.0
-	bra		t_pinx2
-
-##################################################
-
-	global		ssincos
-ssincos:
-#--SET ADJN TO 4
-	mov.l		&4,ADJN(%a6)
-
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-	fmov.x		%fp0,X(%a6)
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	and.l		&0x7FFFFFFF,%d1		# COMPACTIFY X
-
-	cmp.l		%d1,&0x3FD78000		# |X| >= 2**(-40)?
-	bge.b		SCOK1
-	bra.w		SCSM
-
-SCOK1:
-	cmp.l		%d1,&0x4004BC7E		# |X| < 15 PI?
-	blt.b		SCMAIN
-	bra.w		SREDUCEX
-
-
-#--THIS IS THE USUAL CASE, |X| <= 15 PI.
-#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
-SCMAIN:
-	fmov.x		%fp0,%fp1
-
-	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
-
-	lea		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
-
-	fmov.l		%fp1,INT(%a6)		# CONVERT TO INTEGER
-
-	mov.l		INT(%a6),%d1
-	asl.l		&4,%d1
-	add.l		%d1,%a1			# ADDRESS OF N*PIBY2, IN Y1, Y2
-
-	fsub.x		(%a1)+,%fp0		# X-Y1
-	fsub.s		(%a1),%fp0		# FP0 IS R = (X-Y1)-Y2
-
-SCCONT:
-#--continuation point from REDUCEX
-
-	mov.l		INT(%a6),%d1
-	ror.l		&1,%d1
-	cmp.l		%d1,&0			# D0 < 0 IFF N IS ODD
-	bge.w		NEVEN
-
-SNODD:
-#--REGISTERS SAVED SO FAR: D0, A0, FP2.
-	fmovm.x		&0x04,-(%sp)		# save fp2
-
-	fmov.x		%fp0,RPRIME(%a6)
-	fmul.x		%fp0,%fp0		# FP0 IS S = R*R
-	fmov.d		SINA7(%pc),%fp1		# A7
-	fmov.d		COSB8(%pc),%fp2		# B8
-	fmul.x		%fp0,%fp1		# SA7
-	fmul.x		%fp0,%fp2		# SB8
-
-	mov.l		%d2,-(%sp)
-	mov.l		%d1,%d2
-	ror.l		&1,%d2
-	and.l		&0x80000000,%d2
-	eor.l		%d1,%d2
-	and.l		&0x80000000,%d2
-
-	fadd.d		SINA6(%pc),%fp1		# A6+SA7
-	fadd.d		COSB7(%pc),%fp2		# B7+SB8
-
-	fmul.x		%fp0,%fp1		# S(A6+SA7)
-	eor.l		%d2,RPRIME(%a6)
-	mov.l		(%sp)+,%d2
-	fmul.x		%fp0,%fp2		# S(B7+SB8)
-	ror.l		&1,%d1
-	and.l		&0x80000000,%d1
-	mov.l		&0x3F800000,POSNEG1(%a6)
-	eor.l		%d1,POSNEG1(%a6)
-
-	fadd.d		SINA5(%pc),%fp1		# A5+S(A6+SA7)
-	fadd.d		COSB6(%pc),%fp2		# B6+S(B7+SB8)
-
-	fmul.x		%fp0,%fp1		# S(A5+S(A6+SA7))
-	fmul.x		%fp0,%fp2		# S(B6+S(B7+SB8))
-	fmov.x		%fp0,SPRIME(%a6)
-
-	fadd.d		SINA4(%pc),%fp1		# A4+S(A5+S(A6+SA7))
-	eor.l		%d1,SPRIME(%a6)
-	fadd.d		COSB5(%pc),%fp2		# B5+S(B6+S(B7+SB8))
-
-	fmul.x		%fp0,%fp1		# S(A4+...)
-	fmul.x		%fp0,%fp2		# S(B5+...)
-
-	fadd.d		SINA3(%pc),%fp1		# A3+S(A4+...)
-	fadd.d		COSB4(%pc),%fp2		# B4+S(B5+...)
-
-	fmul.x		%fp0,%fp1		# S(A3+...)
-	fmul.x		%fp0,%fp2		# S(B4+...)
-
-	fadd.x		SINA2(%pc),%fp1		# A2+S(A3+...)
-	fadd.x		COSB3(%pc),%fp2		# B3+S(B4+...)
-
-	fmul.x		%fp0,%fp1		# S(A2+...)
-	fmul.x		%fp0,%fp2		# S(B3+...)
-
-	fadd.x		SINA1(%pc),%fp1		# A1+S(A2+...)
-	fadd.x		COSB2(%pc),%fp2		# B2+S(B3+...)
-
-	fmul.x		%fp0,%fp1		# S(A1+...)
-	fmul.x		%fp2,%fp0		# S(B2+...)
-
-	fmul.x		RPRIME(%a6),%fp1	# R'S(A1+...)
-	fadd.s		COSB1(%pc),%fp0		# B1+S(B2...)
-	fmul.x		SPRIME(%a6),%fp0	# S'(B1+S(B2+...))
-
-	fmovm.x		(%sp)+,&0x20		# restore fp2
-
-	fmov.l		%d0,%fpcr
-	fadd.x		RPRIME(%a6),%fp1	# COS(X)
-	bsr		sto_cos			# store cosine result
-	fadd.s		POSNEG1(%a6),%fp0	# SIN(X)
-	bra		t_inx2
-
-NEVEN:
-#--REGISTERS SAVED SO FAR: FP2.
-	fmovm.x		&0x04,-(%sp)		# save fp2
-
-	fmov.x		%fp0,RPRIME(%a6)
-	fmul.x		%fp0,%fp0		# FP0 IS S = R*R
-
-	fmov.d		COSB8(%pc),%fp1		# B8
-	fmov.d		SINA7(%pc),%fp2		# A7
-
-	fmul.x		%fp0,%fp1		# SB8
-	fmov.x		%fp0,SPRIME(%a6)
-	fmul.x		%fp0,%fp2		# SA7
-
-	ror.l		&1,%d1
-	and.l		&0x80000000,%d1
-
-	fadd.d		COSB7(%pc),%fp1		# B7+SB8
-	fadd.d		SINA6(%pc),%fp2		# A6+SA7
-
-	eor.l		%d1,RPRIME(%a6)
-	eor.l		%d1,SPRIME(%a6)
-
-	fmul.x		%fp0,%fp1		# S(B7+SB8)
-
-	or.l		&0x3F800000,%d1
-	mov.l		%d1,POSNEG1(%a6)
-
-	fmul.x		%fp0,%fp2		# S(A6+SA7)
-
-	fadd.d		COSB6(%pc),%fp1		# B6+S(B7+SB8)
-	fadd.d		SINA5(%pc),%fp2		# A5+S(A6+SA7)
-
-	fmul.x		%fp0,%fp1		# S(B6+S(B7+SB8))
-	fmul.x		%fp0,%fp2		# S(A5+S(A6+SA7))
-
-	fadd.d		COSB5(%pc),%fp1		# B5+S(B6+S(B7+SB8))
-	fadd.d		SINA4(%pc),%fp2		# A4+S(A5+S(A6+SA7))
-
-	fmul.x		%fp0,%fp1		# S(B5+...)
-	fmul.x		%fp0,%fp2		# S(A4+...)
-
-	fadd.d		COSB4(%pc),%fp1		# B4+S(B5+...)
-	fadd.d		SINA3(%pc),%fp2		# A3+S(A4+...)
-
-	fmul.x		%fp0,%fp1		# S(B4+...)
-	fmul.x		%fp0,%fp2		# S(A3+...)
-
-	fadd.x		COSB3(%pc),%fp1		# B3+S(B4+...)
-	fadd.x		SINA2(%pc),%fp2		# A2+S(A3+...)
-
-	fmul.x		%fp0,%fp1		# S(B3+...)
-	fmul.x		%fp0,%fp2		# S(A2+...)
-
-	fadd.x		COSB2(%pc),%fp1		# B2+S(B3+...)
-	fadd.x		SINA1(%pc),%fp2		# A1+S(A2+...)
-
-	fmul.x		%fp0,%fp1		# S(B2+...)
-	fmul.x		%fp2,%fp0		# s(a1+...)
-
-
-	fadd.s		COSB1(%pc),%fp1		# B1+S(B2...)
-	fmul.x		RPRIME(%a6),%fp0	# R'S(A1+...)
-	fmul.x		SPRIME(%a6),%fp1	# S'(B1+S(B2+...))
-
-	fmovm.x		(%sp)+,&0x20		# restore fp2
-
-	fmov.l		%d0,%fpcr
-	fadd.s		POSNEG1(%a6),%fp1	# COS(X)
-	bsr		sto_cos			# store cosine result
-	fadd.x		RPRIME(%a6),%fp0	# SIN(X)
-	bra		t_inx2
-
-################################################
-
-SCBORS:
-	cmp.l		%d1,&0x3FFF8000
-	bgt.w		SREDUCEX
-
-################################################
-
-SCSM:
-#	mov.w		&0x0000,XDCARE(%a6)
-	fmov.s		&0x3F800000,%fp1
-
-	fmov.l		%d0,%fpcr
-	fsub.s		&0x00800000,%fp1
-	bsr		sto_cos			# store cosine result
-	fmov.l		%fpcr,%d0		# d0 must have fpcr,too
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		X(%a6),%fp0
-	bra		t_catch
-
-##############################################
-
-	global		ssincosd
-#--SIN AND COS OF X FOR DENORMALIZED X
-ssincosd:
-	mov.l		%d0,-(%sp)		# save d0
-	fmov.s		&0x3F800000,%fp1
-	bsr		sto_cos			# store cosine result
-	mov.l		(%sp)+,%d0		# restore d0
-	bra		t_extdnrm
-
-############################################
-
-#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
-#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
-#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
-SREDUCEX:
-	fmovm.x		&0x3c,-(%sp)		# save {fp2-fp5}
-	mov.l		%d2,-(%sp)		# save d2
-	fmov.s		&0x00000000,%fp1	# fp1 = 0
-
-#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
-#--there is a danger of unwanted overflow in first LOOP iteration.  In this
-#--case, reduce argument by one remainder step to make subsequent reduction
-#--safe.
-	cmp.l		%d1,&0x7ffeffff		# is arg dangerously large?
-	bne.b		SLOOP			# no
-
-# yes; create 2**16383*PI/2
-	mov.w		&0x7ffe,FP_SCR0_EX(%a6)
-	mov.l		&0xc90fdaa2,FP_SCR0_HI(%a6)
-	clr.l		FP_SCR0_LO(%a6)
-
-# create low half of 2**16383*PI/2 at FP_SCR1
-	mov.w		&0x7fdc,FP_SCR1_EX(%a6)
-	mov.l		&0x85a308d3,FP_SCR1_HI(%a6)
-	clr.l		FP_SCR1_LO(%a6)
-
-	ftest.x		%fp0			# test sign of argument
-	fblt.w		sred_neg
-
-	or.b		&0x80,FP_SCR0_EX(%a6)	# positive arg
-	or.b		&0x80,FP_SCR1_EX(%a6)
-sred_neg:
-	fadd.x		FP_SCR0(%a6),%fp0	# high part of reduction is exact
-	fmov.x		%fp0,%fp1		# save high result in fp1
-	fadd.x		FP_SCR1(%a6),%fp0	# low part of reduction
-	fsub.x		%fp0,%fp1		# determine low component of result
-	fadd.x		FP_SCR1(%a6),%fp1	# fp0/fp1 are reduced argument.
-
-#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
-#--integer quotient will be stored in N
-#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
-SLOOP:
-	fmov.x		%fp0,INARG(%a6)		# +-2**K * F, 1 <= F < 2
-	mov.w		INARG(%a6),%d1
-	mov.l		%d1,%a1			# save a copy of D0
-	and.l		&0x00007FFF,%d1
-	sub.l		&0x00003FFF,%d1		# d0 = K
-	cmp.l		%d1,&28
-	ble.b		SLASTLOOP
-SCONTLOOP:
-	sub.l		&27,%d1			# d0 = L := K-27
-	mov.b		&0,ENDFLAG(%a6)
-	bra.b		SWORK
-SLASTLOOP:
-	clr.l		%d1			# d0 = L := 0
-	mov.b		&1,ENDFLAG(%a6)
-
-SWORK:
-#--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
-#--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
-
-#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
-#--2**L * (PIby2_1), 2**L * (PIby2_2)
-
-	mov.l		&0x00003FFE,%d2		# BIASED EXP OF 2/PI
-	sub.l		%d1,%d2			# BIASED EXP OF 2**(-L)*(2/PI)
-
-	mov.l		&0xA2F9836E,FP_SCR0_HI(%a6)
-	mov.l		&0x4E44152A,FP_SCR0_LO(%a6)
-	mov.w		%d2,FP_SCR0_EX(%a6)	# FP_SCR0 = 2**(-L)*(2/PI)
-
-	fmov.x		%fp0,%fp2
-	fmul.x		FP_SCR0(%a6),%fp2	# fp2 = X * 2**(-L)*(2/PI)
-
-#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
-#--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
-#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
-#--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
-#--US THE DESIRED VALUE IN FLOATING POINT.
-	mov.l		%a1,%d2
-	swap		%d2
-	and.l		&0x80000000,%d2
-	or.l		&0x5F000000,%d2		# d2 = SIGN(INARG)*2**63 IN SGL
-	mov.l		%d2,TWOTO63(%a6)
-	fadd.s		TWOTO63(%a6),%fp2	# THE FRACTIONAL PART OF FP1 IS ROUNDED
-	fsub.s		TWOTO63(%a6),%fp2	# fp2 = N
-#	fint.x		%fp2
-
-#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
-	mov.l		%d1,%d2			# d2 = L
-
-	add.l		&0x00003FFF,%d2		# BIASED EXP OF 2**L * (PI/2)
-	mov.w		%d2,FP_SCR0_EX(%a6)
-	mov.l		&0xC90FDAA2,FP_SCR0_HI(%a6)
-	clr.l		FP_SCR0_LO(%a6)		# FP_SCR0 = 2**(L) * Piby2_1
-
-	add.l		&0x00003FDD,%d1
-	mov.w		%d1,FP_SCR1_EX(%a6)
-	mov.l		&0x85A308D3,FP_SCR1_HI(%a6)
-	clr.l		FP_SCR1_LO(%a6)		# FP_SCR1 = 2**(L) * Piby2_2
-
-	mov.b		ENDFLAG(%a6),%d1
-
-#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
-#--P2 = 2**(L) * Piby2_2
-	fmov.x		%fp2,%fp4		# fp4 = N
-	fmul.x		FP_SCR0(%a6),%fp4	# fp4 = W = N*P1
-	fmov.x		%fp2,%fp5		# fp5 = N
-	fmul.x		FP_SCR1(%a6),%fp5	# fp5 = w = N*P2
-	fmov.x		%fp4,%fp3		# fp3 = W = N*P1
-
-#--we want P+p = W+w  but  |p| <= half ulp of P
-#--Then, we need to compute  A := R-P   and  a := r-p
-	fadd.x		%fp5,%fp3		# fp3 = P
-	fsub.x		%fp3,%fp4		# fp4 = W-P
-
-	fsub.x		%fp3,%fp0		# fp0 = A := R - P
-	fadd.x		%fp5,%fp4		# fp4 = p = (W-P)+w
-
-	fmov.x		%fp0,%fp3		# fp3 = A
-	fsub.x		%fp4,%fp1		# fp1 = a := r - p
-
-#--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
-#--|r| <= half ulp of R.
-	fadd.x		%fp1,%fp0		# fp0 = R := A+a
-#--No need to calculate r if this is the last loop
-	cmp.b		%d1,&0
-	bgt.w		SRESTORE
-
-#--Need to calculate r
-	fsub.x		%fp0,%fp3		# fp3 = A-R
-	fadd.x		%fp3,%fp1		# fp1 = r := (A-R)+a
-	bra.w		SLOOP
-
-SRESTORE:
-	fmov.l		%fp2,INT(%a6)
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		(%sp)+,&0x3c		# restore {fp2-fp5}
-
-	mov.l		ADJN(%a6),%d1
-	cmp.l		%d1,&4
-
-	blt.w		SINCONT
-	bra.w		SCCONT
-
-#########################################################################
-# stan():  computes the tangent of a normalized input			#
-# stand(): computes the tangent of a denormalized input			#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 = tan(X)							#
-#									#
-# ACCURACY and MONOTONICITY ******************************************* #
-#	The returned result is within 3 ulp in 64 significant bit, i.e. #
-#	within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM *********************************************************** #
-#									#
-#	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.			#
-#									#
-#	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let	#
-#		k = N mod 2, so in particular, k = 0 or 1.		#
-#									#
-#	3. If k is odd, go to 5.					#
-#									#
-#	4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a	#
-#		rational function U/V where				#
-#		U = r + r*s*(P1 + s*(P2 + s*P3)), and			#
-#		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.	#
-#		Exit.							#
-#									#
-#	4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
-#		a rational function U/V where				#
-#		U = r + r*s*(P1 + s*(P2 + s*P3)), and			#
-#		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,	#
-#		-Cot(r) = -V/U. Exit.					#
-#									#
-#	6. If |X| > 1, go to 8.						#
-#									#
-#	7. (|X|<2**(-40)) Tan(X) = X. Exit.				#
-#									#
-#	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back	#
-#		to 2.							#
-#									#
-#########################################################################
-
-TANQ4:
-	long		0x3EA0B759,0xF50F8688
-TANP3:
-	long		0xBEF2BAA5,0xA8924F04
-
-TANQ3:
-	long		0xBF346F59,0xB39BA65F,0x00000000,0x00000000
-
-TANP2:
-	long		0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
-
-TANQ2:
-	long		0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
-
-TANP1:
-	long		0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
-
-TANQ1:
-	long		0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
-
-INVTWOPI:
-	long		0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
-
-TWOPI1:
-	long		0x40010000,0xC90FDAA2,0x00000000,0x00000000
-TWOPI2:
-	long		0x3FDF0000,0x85A308D4,0x00000000,0x00000000
-
-#--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
-#--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
-#--MOST 69 BITS LONG.
-#	global		PITBL
-PITBL:
-	long		0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
-	long		0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
-	long		0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
-	long		0xC0040000,0xB6365E22,0xEE46F000,0x21480000
-	long		0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
-	long		0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
-	long		0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
-	long		0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
-	long		0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
-	long		0xC0040000,0x90836524,0x88034B96,0x20B00000
-	long		0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
-	long		0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
-	long		0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
-	long		0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
-	long		0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
-	long		0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
-	long		0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
-	long		0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
-	long		0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
-	long		0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
-	long		0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
-	long		0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
-	long		0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
-	long		0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
-	long		0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
-	long		0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
-	long		0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
-	long		0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
-	long		0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
-	long		0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
-	long		0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
-	long		0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
-	long		0x00000000,0x00000000,0x00000000,0x00000000
-	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
-	long		0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
-	long		0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
-	long		0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
-	long		0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
-	long		0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
-	long		0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
-	long		0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
-	long		0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
-	long		0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
-	long		0x40030000,0x8A3AE64F,0x76F80584,0x21080000
-	long		0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
-	long		0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
-	long		0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
-	long		0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
-	long		0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
-	long		0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
-	long		0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
-	long		0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
-	long		0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
-	long		0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
-	long		0x40040000,0x8A3AE64F,0x76F80584,0x21880000
-	long		0x40040000,0x90836524,0x88034B96,0xA0B00000
-	long		0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
-	long		0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
-	long		0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
-	long		0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
-	long		0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
-	long		0x40040000,0xB6365E22,0xEE46F000,0xA1480000
-	long		0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
-	long		0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
-	long		0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
-
-	set		INARG,FP_SCR0
-
-	set		TWOTO63,L_SCR1
-	set		INT,L_SCR1
-	set		ENDFLAG,L_SCR2
-
-	global		stan
-stan:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	and.l		&0x7FFFFFFF,%d1
-
-	cmp.l		%d1,&0x3FD78000		# |X| >= 2**(-40)?
-	bge.b		TANOK1
-	bra.w		TANSM
-TANOK1:
-	cmp.l		%d1,&0x4004BC7E		# |X| < 15 PI?
-	blt.b		TANMAIN
-	bra.w		REDUCEX
-
-TANMAIN:
-#--THIS IS THE USUAL CASE, |X| <= 15 PI.
-#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
-	fmov.x		%fp0,%fp1
-	fmul.d		TWOBYPI(%pc),%fp1	# X*2/PI
-
-	lea.l		PITBL+0x200(%pc),%a1	# TABLE OF N*PI/2, N = -32,...,32
-
-	fmov.l		%fp1,%d1		# CONVERT TO INTEGER
-
-	asl.l		&4,%d1
-	add.l		%d1,%a1			# ADDRESS N*PIBY2 IN Y1, Y2
-
-	fsub.x		(%a1)+,%fp0		# X-Y1
-
-	fsub.s		(%a1),%fp0		# FP0 IS R = (X-Y1)-Y2
-
-	ror.l		&5,%d1
-	and.l		&0x80000000,%d1		# D0 WAS ODD IFF D0 < 0
-
-TANCONT:
-	fmovm.x		&0x0c,-(%sp)		# save fp2,fp3
-
-	cmp.l		%d1,&0
-	blt.w		NODD
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# S = R*R
-
-	fmov.d		TANQ4(%pc),%fp3
-	fmov.d		TANP3(%pc),%fp2
-
-	fmul.x		%fp1,%fp3		# SQ4
-	fmul.x		%fp1,%fp2		# SP3
-
-	fadd.d		TANQ3(%pc),%fp3		# Q3+SQ4
-	fadd.x		TANP2(%pc),%fp2		# P2+SP3
-
-	fmul.x		%fp1,%fp3		# S(Q3+SQ4)
-	fmul.x		%fp1,%fp2		# S(P2+SP3)
-
-	fadd.x		TANQ2(%pc),%fp3		# Q2+S(Q3+SQ4)
-	fadd.x		TANP1(%pc),%fp2		# P1+S(P2+SP3)
-
-	fmul.x		%fp1,%fp3		# S(Q2+S(Q3+SQ4))
-	fmul.x		%fp1,%fp2		# S(P1+S(P2+SP3))
-
-	fadd.x		TANQ1(%pc),%fp3		# Q1+S(Q2+S(Q3+SQ4))
-	fmul.x		%fp0,%fp2		# RS(P1+S(P2+SP3))
-
-	fmul.x		%fp3,%fp1		# S(Q1+S(Q2+S(Q3+SQ4)))
-
-	fadd.x		%fp2,%fp0		# R+RS(P1+S(P2+SP3))
-
-	fadd.s		&0x3F800000,%fp1	# 1+S(Q1+...)
-
-	fmovm.x		(%sp)+,&0x30		# restore fp2,fp3
-
-	fmov.l		%d0,%fpcr		# restore users round mode,prec
-	fdiv.x		%fp1,%fp0		# last inst - possible exception set
-	bra		t_inx2
-
-NODD:
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp0,%fp0		# S = R*R
-
-	fmov.d		TANQ4(%pc),%fp3
-	fmov.d		TANP3(%pc),%fp2
-
-	fmul.x		%fp0,%fp3		# SQ4
-	fmul.x		%fp0,%fp2		# SP3
-
-	fadd.d		TANQ3(%pc),%fp3		# Q3+SQ4
-	fadd.x		TANP2(%pc),%fp2		# P2+SP3
-
-	fmul.x		%fp0,%fp3		# S(Q3+SQ4)
-	fmul.x		%fp0,%fp2		# S(P2+SP3)
-
-	fadd.x		TANQ2(%pc),%fp3		# Q2+S(Q3+SQ4)
-	fadd.x		TANP1(%pc),%fp2		# P1+S(P2+SP3)
-
-	fmul.x		%fp0,%fp3		# S(Q2+S(Q3+SQ4))
-	fmul.x		%fp0,%fp2		# S(P1+S(P2+SP3))
-
-	fadd.x		TANQ1(%pc),%fp3		# Q1+S(Q2+S(Q3+SQ4))
-	fmul.x		%fp1,%fp2		# RS(P1+S(P2+SP3))
-
-	fmul.x		%fp3,%fp0		# S(Q1+S(Q2+S(Q3+SQ4)))
-
-	fadd.x		%fp2,%fp1		# R+RS(P1+S(P2+SP3))
-	fadd.s		&0x3F800000,%fp0	# 1+S(Q1+...)
-
-	fmovm.x		(%sp)+,&0x30		# restore fp2,fp3
-
-	fmov.x		%fp1,-(%sp)
-	eor.l		&0x80000000,(%sp)
-
-	fmov.l		%d0,%fpcr		# restore users round mode,prec
-	fdiv.x		(%sp)+,%fp0		# last inst - possible exception set
-	bra		t_inx2
-
-TANBORS:
-#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
-#--IF |X| < 2**(-40), RETURN X OR 1.
-	cmp.l		%d1,&0x3FFF8000
-	bgt.b		REDUCEX
-
-TANSM:
-	fmov.x		%fp0,-(%sp)
-	fmov.l		%d0,%fpcr		# restore users round mode,prec
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		(%sp)+,%fp0		# last inst - posibble exception set
-	bra		t_catch
-
-	global		stand
-#--TAN(X) = X FOR DENORMALIZED X
-stand:
-	bra		t_extdnrm
-
-#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
-#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
-#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
-REDUCEX:
-	fmovm.x		&0x3c,-(%sp)		# save {fp2-fp5}
-	mov.l		%d2,-(%sp)		# save d2
-	fmov.s		&0x00000000,%fp1	# fp1 = 0
-
-#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
-#--there is a danger of unwanted overflow in first LOOP iteration.  In this
-#--case, reduce argument by one remainder step to make subsequent reduction
-#--safe.
-	cmp.l		%d1,&0x7ffeffff		# is arg dangerously large?
-	bne.b		LOOP			# no
-
-# yes; create 2**16383*PI/2
-	mov.w		&0x7ffe,FP_SCR0_EX(%a6)
-	mov.l		&0xc90fdaa2,FP_SCR0_HI(%a6)
-	clr.l		FP_SCR0_LO(%a6)
-
-# create low half of 2**16383*PI/2 at FP_SCR1
-	mov.w		&0x7fdc,FP_SCR1_EX(%a6)
-	mov.l		&0x85a308d3,FP_SCR1_HI(%a6)
-	clr.l		FP_SCR1_LO(%a6)
-
-	ftest.x		%fp0			# test sign of argument
-	fblt.w		red_neg
-
-	or.b		&0x80,FP_SCR0_EX(%a6)	# positive arg
-	or.b		&0x80,FP_SCR1_EX(%a6)
-red_neg:
-	fadd.x		FP_SCR0(%a6),%fp0	# high part of reduction is exact
-	fmov.x		%fp0,%fp1		# save high result in fp1
-	fadd.x		FP_SCR1(%a6),%fp0	# low part of reduction
-	fsub.x		%fp0,%fp1		# determine low component of result
-	fadd.x		FP_SCR1(%a6),%fp1	# fp0/fp1 are reduced argument.
-
-#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
-#--integer quotient will be stored in N
-#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
-LOOP:
-	fmov.x		%fp0,INARG(%a6)		# +-2**K * F, 1 <= F < 2
-	mov.w		INARG(%a6),%d1
-	mov.l		%d1,%a1			# save a copy of D0
-	and.l		&0x00007FFF,%d1
-	sub.l		&0x00003FFF,%d1		# d0 = K
-	cmp.l		%d1,&28
-	ble.b		LASTLOOP
-CONTLOOP:
-	sub.l		&27,%d1			# d0 = L := K-27
-	mov.b		&0,ENDFLAG(%a6)
-	bra.b		WORK
-LASTLOOP:
-	clr.l		%d1			# d0 = L := 0
-	mov.b		&1,ENDFLAG(%a6)
-
-WORK:
-#--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
-#--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
-
-#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
-#--2**L * (PIby2_1), 2**L * (PIby2_2)
-
-	mov.l		&0x00003FFE,%d2		# BIASED EXP OF 2/PI
-	sub.l		%d1,%d2			# BIASED EXP OF 2**(-L)*(2/PI)
-
-	mov.l		&0xA2F9836E,FP_SCR0_HI(%a6)
-	mov.l		&0x4E44152A,FP_SCR0_LO(%a6)
-	mov.w		%d2,FP_SCR0_EX(%a6)	# FP_SCR0 = 2**(-L)*(2/PI)
-
-	fmov.x		%fp0,%fp2
-	fmul.x		FP_SCR0(%a6),%fp2	# fp2 = X * 2**(-L)*(2/PI)
-
-#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
-#--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
-#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
-#--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
-#--US THE DESIRED VALUE IN FLOATING POINT.
-	mov.l		%a1,%d2
-	swap		%d2
-	and.l		&0x80000000,%d2
-	or.l		&0x5F000000,%d2		# d2 = SIGN(INARG)*2**63 IN SGL
-	mov.l		%d2,TWOTO63(%a6)
-	fadd.s		TWOTO63(%a6),%fp2	# THE FRACTIONAL PART OF FP1 IS ROUNDED
-	fsub.s		TWOTO63(%a6),%fp2	# fp2 = N
-#	fintrz.x	%fp2,%fp2
-
-#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
-	mov.l		%d1,%d2			# d2 = L
-
-	add.l		&0x00003FFF,%d2		# BIASED EXP OF 2**L * (PI/2)
-	mov.w		%d2,FP_SCR0_EX(%a6)
-	mov.l		&0xC90FDAA2,FP_SCR0_HI(%a6)
-	clr.l		FP_SCR0_LO(%a6)		# FP_SCR0 = 2**(L) * Piby2_1
-
-	add.l		&0x00003FDD,%d1
-	mov.w		%d1,FP_SCR1_EX(%a6)
-	mov.l		&0x85A308D3,FP_SCR1_HI(%a6)
-	clr.l		FP_SCR1_LO(%a6)		# FP_SCR1 = 2**(L) * Piby2_2
-
-	mov.b		ENDFLAG(%a6),%d1
-
-#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
-#--P2 = 2**(L) * Piby2_2
-	fmov.x		%fp2,%fp4		# fp4 = N
-	fmul.x		FP_SCR0(%a6),%fp4	# fp4 = W = N*P1
-	fmov.x		%fp2,%fp5		# fp5 = N
-	fmul.x		FP_SCR1(%a6),%fp5	# fp5 = w = N*P2
-	fmov.x		%fp4,%fp3		# fp3 = W = N*P1
-
-#--we want P+p = W+w  but  |p| <= half ulp of P
-#--Then, we need to compute  A := R-P   and  a := r-p
-	fadd.x		%fp5,%fp3		# fp3 = P
-	fsub.x		%fp3,%fp4		# fp4 = W-P
-
-	fsub.x		%fp3,%fp0		# fp0 = A := R - P
-	fadd.x		%fp5,%fp4		# fp4 = p = (W-P)+w
-
-	fmov.x		%fp0,%fp3		# fp3 = A
-	fsub.x		%fp4,%fp1		# fp1 = a := r - p
-
-#--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
-#--|r| <= half ulp of R.
-	fadd.x		%fp1,%fp0		# fp0 = R := A+a
-#--No need to calculate r if this is the last loop
-	cmp.b		%d1,&0
-	bgt.w		RESTORE
-
-#--Need to calculate r
-	fsub.x		%fp0,%fp3		# fp3 = A-R
-	fadd.x		%fp3,%fp1		# fp1 = r := (A-R)+a
-	bra.w		LOOP
-
-RESTORE:
-	fmov.l		%fp2,INT(%a6)
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		(%sp)+,&0x3c		# restore {fp2-fp5}
-
-	mov.l		INT(%a6),%d1
-	ror.l		&1,%d1
-
-	bra.w		TANCONT
-
-#########################################################################
-# satan():  computes the arctangent of a normalized number		#
-# satand(): computes the arctangent of a denormalized number		#
-#									#
-# INPUT	*************************************************************** #
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 = arctan(X)							#
-#									#
-# ACCURACY and MONOTONICITY ******************************************* #
-#	The returned result is within 2 ulps in	64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM *********************************************************** #
-#	Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.		#
-#									#
-#	Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x.			#
-#		Note that k = -4, -3,..., or 3.				#
-#		Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5	#
-#		significant bits of X with a bit-1 attached at the 6-th	#
-#		bit position. Define u to be u = (X-F) / (1 + X*F).	#
-#									#
-#	Step 3. Approximate arctan(u) by a polynomial poly.		#
-#									#
-#	Step 4. Return arctan(F) + poly, arctan(F) is fetched from a	#
-#		table of values calculated beforehand. Exit.		#
-#									#
-#	Step 5. If |X| >= 16, go to Step 7.				#
-#									#
-#	Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.	#
-#									#
-#	Step 7. Define X' = -1/X. Approximate arctan(X') by an odd	#
-#		polynomial in X'.					#
-#		Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.		#
-#									#
-#########################################################################
-
-ATANA3:	long		0xBFF6687E,0x314987D8
-ATANA2:	long		0x4002AC69,0x34A26DB3
-ATANA1:	long		0xBFC2476F,0x4E1DA28E
-
-ATANB6:	long		0x3FB34444,0x7F876989
-ATANB5:	long		0xBFB744EE,0x7FAF45DB
-ATANB4:	long		0x3FBC71C6,0x46940220
-ATANB3:	long		0xBFC24924,0x921872F9
-ATANB2:	long		0x3FC99999,0x99998FA9
-ATANB1:	long		0xBFD55555,0x55555555
-
-ATANC5:	long		0xBFB70BF3,0x98539E6A
-ATANC4:	long		0x3FBC7187,0x962D1D7D
-ATANC3:	long		0xBFC24924,0x827107B8
-ATANC2:	long		0x3FC99999,0x9996263E
-ATANC1:	long		0xBFD55555,0x55555536
-
-PPIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
-NPIBY2:	long		0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
-
-PTINY:	long		0x00010000,0x80000000,0x00000000,0x00000000
-NTINY:	long		0x80010000,0x80000000,0x00000000,0x00000000
-
-ATANTBL:
-	long		0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
-	long		0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
-	long		0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
-	long		0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
-	long		0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
-	long		0x3FFB0000,0xAB98E943,0x62765619,0x00000000
-	long		0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
-	long		0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
-	long		0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
-	long		0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
-	long		0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
-	long		0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
-	long		0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
-	long		0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
-	long		0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
-	long		0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
-	long		0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
-	long		0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
-	long		0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
-	long		0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
-	long		0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
-	long		0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
-	long		0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
-	long		0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
-	long		0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
-	long		0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
-	long		0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
-	long		0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
-	long		0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
-	long		0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
-	long		0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
-	long		0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
-	long		0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
-	long		0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
-	long		0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
-	long		0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
-	long		0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
-	long		0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
-	long		0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
-	long		0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
-	long		0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
-	long		0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
-	long		0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
-	long		0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
-	long		0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
-	long		0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
-	long		0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
-	long		0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
-	long		0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
-	long		0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
-	long		0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
-	long		0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
-	long		0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
-	long		0x3FFE0000,0x97731420,0x365E538C,0x00000000
-	long		0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
-	long		0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
-	long		0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
-	long		0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
-	long		0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
-	long		0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
-	long		0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
-	long		0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
-	long		0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
-	long		0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
-	long		0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
-	long		0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
-	long		0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
-	long		0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
-	long		0x3FFE0000,0xE8771129,0xC4353259,0x00000000
-	long		0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
-	long		0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
-	long		0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
-	long		0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
-	long		0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
-	long		0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
-	long		0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
-	long		0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
-	long		0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
-	long		0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
-	long		0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
-	long		0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
-	long		0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
-	long		0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
-	long		0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
-	long		0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
-	long		0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
-	long		0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
-	long		0x3FFF0000,0x9F100575,0x006CC571,0x00000000
-	long		0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
-	long		0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
-	long		0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
-	long		0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
-	long		0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
-	long		0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
-	long		0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
-	long		0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
-	long		0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
-	long		0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
-	long		0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
-	long		0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
-	long		0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
-	long		0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
-	long		0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
-	long		0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
-	long		0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
-	long		0x3FFF0000,0xB525529D,0x562246BD,0x00000000
-	long		0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
-	long		0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
-	long		0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
-	long		0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
-	long		0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
-	long		0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
-	long		0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
-	long		0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
-	long		0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
-	long		0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
-	long		0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
-	long		0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
-	long		0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
-	long		0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
-	long		0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
-	long		0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
-	long		0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
-	long		0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
-	long		0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
-	long		0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
-	long		0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
-	long		0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
-
-	set		X,FP_SCR0
-	set		XDCARE,X+2
-	set		XFRAC,X+4
-	set		XFRACLO,X+8
-
-	set		ATANF,FP_SCR1
-	set		ATANFHI,ATANF+4
-	set		ATANFLO,ATANF+8
-
-	global		satan
-#--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
-satan:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	fmov.x		%fp0,X(%a6)
-	and.l		&0x7FFFFFFF,%d1
-
-	cmp.l		%d1,&0x3FFB8000		# |X| >= 1/16?
-	bge.b		ATANOK1
-	bra.w		ATANSM
-
-ATANOK1:
-	cmp.l		%d1,&0x4002FFFF		# |X| < 16 ?
-	ble.b		ATANMAIN
-	bra.w		ATANBIG
-
-#--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
-#--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
-#--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
-#--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
-#--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
-#--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
-#--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
-#--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
-#--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
-#--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
-#--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
-#--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
-#--WILL INVOLVE A VERY LONG POLYNOMIAL.
-
-#--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
-#--WE CHOSE F TO BE +-2^K * 1.BBBB1
-#--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
-#--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
-#--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
-#-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
-
-ATANMAIN:
-
-	and.l		&0xF8000000,XFRAC(%a6)	# FIRST 5 BITS
-	or.l		&0x04000000,XFRAC(%a6)	# SET 6-TH BIT TO 1
-	mov.l		&0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
-
-	fmov.x		%fp0,%fp1		# FP1 IS X
-	fmul.x		X(%a6),%fp1		# FP1 IS X*F, NOTE THAT X*F > 0
-	fsub.x		X(%a6),%fp0		# FP0 IS X-F
-	fadd.s		&0x3F800000,%fp1	# FP1 IS 1 + X*F
-	fdiv.x		%fp1,%fp0		# FP0 IS U = (X-F)/(1+X*F)
-
-#--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
-#--CREATE ATAN(F) AND STORE IT IN ATANF, AND
-#--SAVE REGISTERS FP2.
-
-	mov.l		%d2,-(%sp)		# SAVE d2 TEMPORARILY
-	mov.l		%d1,%d2			# THE EXP AND 16 BITS OF X
-	and.l		&0x00007800,%d1		# 4 VARYING BITS OF F'S FRACTION
-	and.l		&0x7FFF0000,%d2		# EXPONENT OF F
-	sub.l		&0x3FFB0000,%d2		# K+4
-	asr.l		&1,%d2
-	add.l		%d2,%d1			# THE 7 BITS IDENTIFYING F
-	asr.l		&7,%d1			# INDEX INTO TBL OF ATAN(|F|)
-	lea		ATANTBL(%pc),%a1
-	add.l		%d1,%a1			# ADDRESS OF ATAN(|F|)
-	mov.l		(%a1)+,ATANF(%a6)
-	mov.l		(%a1)+,ATANFHI(%a6)
-	mov.l		(%a1)+,ATANFLO(%a6)	# ATANF IS NOW ATAN(|F|)
-	mov.l		X(%a6),%d1		# LOAD SIGN AND EXPO. AGAIN
-	and.l		&0x80000000,%d1		# SIGN(F)
-	or.l		%d1,ATANF(%a6)		# ATANF IS NOW SIGN(F)*ATAN(|F|)
-	mov.l		(%sp)+,%d2		# RESTORE d2
-
-#--THAT'S ALL I HAVE TO DO FOR NOW,
-#--BUT ALAS, THE DIVIDE IS STILL CRANKING!
-
-#--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
-#--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
-#--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
-#--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
-#--WHAT WE HAVE HERE IS MERELY	A1 = A3, A2 = A1/A3, A3 = A2/A3.
-#--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
-#--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
-
-	fmovm.x		&0x04,-(%sp)		# save fp2
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1
-	fmov.d		ATANA3(%pc),%fp2
-	fadd.x		%fp1,%fp2		# A3+V
-	fmul.x		%fp1,%fp2		# V*(A3+V)
-	fmul.x		%fp0,%fp1		# U*V
-	fadd.d		ATANA2(%pc),%fp2	# A2+V*(A3+V)
-	fmul.d		ATANA1(%pc),%fp1	# A1*U*V
-	fmul.x		%fp2,%fp1		# A1*U*V*(A2+V*(A3+V))
-	fadd.x		%fp1,%fp0		# ATAN(U), FP1 RELEASED
-
-	fmovm.x		(%sp)+,&0x20		# restore fp2
-
-	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
-	fadd.x		ATANF(%a6),%fp0		# ATAN(X)
-	bra		t_inx2
-
-ATANBORS:
-#--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
-#--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
-	cmp.l		%d1,&0x3FFF8000
-	bgt.w		ATANBIG			# I.E. |X| >= 16
-
-ATANSM:
-#--|X| <= 1/16
-#--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
-#--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
-#--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
-#--WHERE Y = X*X, AND Z = Y*Y.
-
-	cmp.l		%d1,&0x3FD78000
-	blt.w		ATANTINY
-
-#--COMPUTE POLYNOMIAL
-	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
-
-	fmul.x		%fp0,%fp0		# FPO IS Y = X*X
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# FP1 IS Z = Y*Y
-
-	fmov.d		ATANB6(%pc),%fp2
-	fmov.d		ATANB5(%pc),%fp3
-
-	fmul.x		%fp1,%fp2		# Z*B6
-	fmul.x		%fp1,%fp3		# Z*B5
-
-	fadd.d		ATANB4(%pc),%fp2	# B4+Z*B6
-	fadd.d		ATANB3(%pc),%fp3	# B3+Z*B5
-
-	fmul.x		%fp1,%fp2		# Z*(B4+Z*B6)
-	fmul.x		%fp3,%fp1		# Z*(B3+Z*B5)
-
-	fadd.d		ATANB2(%pc),%fp2	# B2+Z*(B4+Z*B6)
-	fadd.d		ATANB1(%pc),%fp1	# B1+Z*(B3+Z*B5)
-
-	fmul.x		%fp0,%fp2		# Y*(B2+Z*(B4+Z*B6))
-	fmul.x		X(%a6),%fp0		# X*Y
-
-	fadd.x		%fp2,%fp1		# [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
-
-	fmul.x		%fp1,%fp0		# X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
-
-	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
-
-	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
-	fadd.x		X(%a6),%fp0
-	bra		t_inx2
-
-ATANTINY:
-#--|X| < 2^(-40), ATAN(X) = X
-
-	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		X(%a6),%fp0		# last inst - possible exception set
-
-	bra		t_catch
-
-ATANBIG:
-#--IF |X| > 2^(100), RETURN	SIGN(X)*(PI/2 - TINY). OTHERWISE,
-#--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
-	cmp.l		%d1,&0x40638000
-	bgt.w		ATANHUGE
-
-#--APPROXIMATE ATAN(-1/X) BY
-#--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
-#--THIS CAN BE RE-WRITTEN AS
-#--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
-
-	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
-
-	fmov.s		&0xBF800000,%fp1	# LOAD -1
-	fdiv.x		%fp0,%fp1		# FP1 IS -1/X
-
-#--DIVIDE IS STILL CRANKING
-
-	fmov.x		%fp1,%fp0		# FP0 IS X'
-	fmul.x		%fp0,%fp0		# FP0 IS Y = X'*X'
-	fmov.x		%fp1,X(%a6)		# X IS REALLY X'
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# FP1 IS Z = Y*Y
-
-	fmov.d		ATANC5(%pc),%fp3
-	fmov.d		ATANC4(%pc),%fp2
-
-	fmul.x		%fp1,%fp3		# Z*C5
-	fmul.x		%fp1,%fp2		# Z*B4
-
-	fadd.d		ATANC3(%pc),%fp3	# C3+Z*C5
-	fadd.d		ATANC2(%pc),%fp2	# C2+Z*C4
-
-	fmul.x		%fp3,%fp1		# Z*(C3+Z*C5), FP3 RELEASED
-	fmul.x		%fp0,%fp2		# Y*(C2+Z*C4)
-
-	fadd.d		ATANC1(%pc),%fp1	# C1+Z*(C3+Z*C5)
-	fmul.x		X(%a6),%fp0		# X'*Y
-
-	fadd.x		%fp2,%fp1		# [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
-
-	fmul.x		%fp1,%fp0		# X'*Y*([B1+Z*(B3+Z*B5)]
-#					...	+[Y*(B2+Z*(B4+Z*B6))])
-	fadd.x		X(%a6),%fp0
-
-	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
-
-	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
-	tst.b		(%a0)
-	bpl.b		pos_big
-
-neg_big:
-	fadd.x		NPIBY2(%pc),%fp0
-	bra		t_minx2
-
-pos_big:
-	fadd.x		PPIBY2(%pc),%fp0
-	bra		t_pinx2
-
-ATANHUGE:
-#--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
-	tst.b		(%a0)
-	bpl.b		pos_huge
-
-neg_huge:
-	fmov.x		NPIBY2(%pc),%fp0
-	fmov.l		%d0,%fpcr
-	fadd.x		PTINY(%pc),%fp0
-	bra		t_minx2
-
-pos_huge:
-	fmov.x		PPIBY2(%pc),%fp0
-	fmov.l		%d0,%fpcr
-	fadd.x		NTINY(%pc),%fp0
-	bra		t_pinx2
-
-	global		satand
-#--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
-satand:
-	bra		t_extdnrm
-
-#########################################################################
-# sasin():  computes the inverse sine of a normalized input		#
-# sasind(): computes the inverse sine of a denormalized input		#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = arcsin(X)							#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 3 ulps in	64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#	ASIN								#
-#	1. If |X| >= 1, go to 3.					#
-#									#
-#	2. (|X| < 1) Calculate asin(X) by				#
-#		z := sqrt( [1-X][1+X] )					#
-#		asin(X) = atan( x / z ).				#
-#		Exit.							#
-#									#
-#	3. If |X| > 1, go to 5.						#
-#									#
-#	4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
-#									#
-#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
-#		Exit.							#
-#									#
-#########################################################################
-
-	global		sasin
-sasin:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	and.l		&0x7FFFFFFF,%d1
-	cmp.l		%d1,&0x3FFF8000
-	bge.b		ASINBIG
-
-# This catch is added here for the '060 QSP. Originally, the call to
-# satan() would handle this case by causing the exception which would
-# not be caught until gen_except(). Now, with the exceptions being
-# detected inside of satan(), the exception would have been handled there
-# instead of inside sasin() as expected.
-	cmp.l		%d1,&0x3FD78000
-	blt.w		ASINTINY
-
-#--THIS IS THE USUAL CASE, |X| < 1
-#--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
-
-ASINMAIN:
-	fmov.s		&0x3F800000,%fp1
-	fsub.x		%fp0,%fp1		# 1-X
-	fmovm.x		&0x4,-(%sp)		#  {fp2}
-	fmov.s		&0x3F800000,%fp2
-	fadd.x		%fp0,%fp2		# 1+X
-	fmul.x		%fp2,%fp1		# (1+X)(1-X)
-	fmovm.x		(%sp)+,&0x20		#  {fp2}
-	fsqrt.x		%fp1			# SQRT([1-X][1+X])
-	fdiv.x		%fp1,%fp0		# X/SQRT([1-X][1+X])
-	fmovm.x		&0x01,-(%sp)		# save X/SQRT(...)
-	lea		(%sp),%a0		# pass ptr to X/SQRT(...)
-	bsr		satan
-	add.l		&0xc,%sp		# clear X/SQRT(...) from stack
-	bra		t_inx2
-
-ASINBIG:
-	fabs.x		%fp0			# |X|
-	fcmp.s		%fp0,&0x3F800000
-	fbgt		t_operr			# cause an operr exception
-
-#--|X| = 1, ASIN(X) = +- PI/2.
-ASINONE:
-	fmov.x		PIBY2(%pc),%fp0
-	mov.l		(%a0),%d1
-	and.l		&0x80000000,%d1		# SIGN BIT OF X
-	or.l		&0x3F800000,%d1		# +-1 IN SGL FORMAT
-	mov.l		%d1,-(%sp)		# push SIGN(X) IN SGL-FMT
-	fmov.l		%d0,%fpcr
-	fmul.s		(%sp)+,%fp0
-	bra		t_inx2
-
-#--|X| < 2^(-40), ATAN(X) = X
-ASINTINY:
-	fmov.l		%d0,%fpcr		# restore users rnd mode,prec
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		(%a0),%fp0		# last inst - possible exception
-	bra		t_catch
-
-	global		sasind
-#--ASIN(X) = X FOR DENORMALIZED X
-sasind:
-	bra		t_extdnrm
-
-#########################################################################
-# sacos():  computes the inverse cosine of a normalized input		#
-# sacosd(): computes the inverse cosine of a denormalized input		#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 = arccos(X)							#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 3 ulps in	64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM *********************************************************** #
-#									#
-#	ACOS								#
-#	1. If |X| >= 1, go to 3.					#
-#									#
-#	2. (|X| < 1) Calculate acos(X) by				#
-#		z := (1-X) / (1+X)					#
-#		acos(X) = 2 * atan( sqrt(z) ).				#
-#		Exit.							#
-#									#
-#	3. If |X| > 1, go to 5.						#
-#									#
-#	4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.	#
-#									#
-#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
-#		Exit.							#
-#									#
-#########################################################################
-
-	global		sacos
-sacos:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-
-	mov.l		(%a0),%d1		# pack exp w/ upper 16 fraction
-	mov.w		4(%a0),%d1
-	and.l		&0x7FFFFFFF,%d1
-	cmp.l		%d1,&0x3FFF8000
-	bge.b		ACOSBIG
-
-#--THIS IS THE USUAL CASE, |X| < 1
-#--ACOS(X) = 2 * ATAN(	SQRT( (1-X)/(1+X) ) )
-
-ACOSMAIN:
-	fmov.s		&0x3F800000,%fp1
-	fadd.x		%fp0,%fp1		# 1+X
-	fneg.x		%fp0			# -X
-	fadd.s		&0x3F800000,%fp0	# 1-X
-	fdiv.x		%fp1,%fp0		# (1-X)/(1+X)
-	fsqrt.x		%fp0			# SQRT((1-X)/(1+X))
-	mov.l		%d0,-(%sp)		# save original users fpcr
-	clr.l		%d0
-	fmovm.x		&0x01,-(%sp)		# save SQRT(...) to stack
-	lea		(%sp),%a0		# pass ptr to sqrt
-	bsr		satan			# ATAN(SQRT([1-X]/[1+X]))
-	add.l		&0xc,%sp		# clear SQRT(...) from stack
-
-	fmov.l		(%sp)+,%fpcr		# restore users round prec,mode
-	fadd.x		%fp0,%fp0		# 2 * ATAN( STUFF )
-	bra		t_pinx2
-
-ACOSBIG:
-	fabs.x		%fp0
-	fcmp.s		%fp0,&0x3F800000
-	fbgt		t_operr			# cause an operr exception
-
-#--|X| = 1, ACOS(X) = 0 OR PI
-	tst.b		(%a0)			# is X positive or negative?
-	bpl.b		ACOSP1
-
-#--X = -1
-#Returns PI and inexact exception
-ACOSM1:
-	fmov.x		PI(%pc),%fp0		# load PI
-	fmov.l		%d0,%fpcr		# load round mode,prec
-	fadd.s		&0x00800000,%fp0	# add a small value
-	bra		t_pinx2
-
-ACOSP1:
-	bra		ld_pzero		# answer is positive zero
-
-	global		sacosd
-#--ACOS(X) = PI/2 FOR DENORMALIZED X
-sacosd:
-	fmov.l		%d0,%fpcr		# load user's rnd mode/prec
-	fmov.x		PIBY2(%pc),%fp0
-	bra		t_pinx2
-
-#########################################################################
-# setox():    computes the exponential for a normalized input		#
-# setoxd():   computes the exponential for a denormalized input		#
-# setoxm1():  computes the exponential minus 1 for a normalized input	#
-# setoxm1d(): computes the exponential minus 1 for a denormalized input	#
-#									#
-# INPUT	*************************************************************** #
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 = exp(X) or exp(X)-1					#
-#									#
-# ACCURACY and MONOTONICITY ******************************************* #
-#	The returned result is within 0.85 ulps in 64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM and IMPLEMENTATION **************************************** #
-#									#
-#	setoxd								#
-#	------								#
-#	Step 1.	Set ans := 1.0						#
-#									#
-#	Step 2.	Return	ans := ans + sign(X)*2^(-126). Exit.		#
-#	Notes:	This will always generate one exception -- inexact.	#
-#									#
-#									#
-#	setox								#
-#	-----								#
-#									#
-#	Step 1.	Filter out extreme cases of input argument.		#
-#		1.1	If |X| >= 2^(-65), go to Step 1.3.		#
-#		1.2	Go to Step 7.					#
-#		1.3	If |X| < 16380 log(2), go to Step 2.		#
-#		1.4	Go to Step 8.					#
-#	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.#
-#		To avoid the use of floating-point comparisons, a	#
-#		compact representation of |X| is used. This format is a	#
-#		32-bit integer, the upper (more significant) 16 bits	#
-#		are the sign and biased exponent field of |X|; the	#
-#		lower 16 bits are the 16 most significant fraction	#
-#		(including the explicit bit) bits of |X|. Consequently,	#
-#		the comparisons in Steps 1.1 and 1.3 can be performed	#
-#		by integer comparison. Note also that the constant	#
-#		16380 log(2) used in Step 1.3 is also in the compact	#
-#		form. Thus taking the branch to Step 2 guarantees	#
-#		|X| < 16380 log(2). There is no harm to have a small	#
-#		number of cases where |X| is less than,	but close to,	#
-#		16380 log(2) and the branch to Step 9 is taken.		#
-#									#
-#	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).	#
-#		2.1	Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
-#			was taken)					#
-#		2.2	N := round-to-nearest-integer( X * 64/log2 ).	#
-#		2.3	Calculate	J = N mod 64; so J = 0,1,2,..., #
-#			or 63.						#
-#		2.4	Calculate	M = (N - J)/64; so N = 64M + J.	#
-#		2.5	Calculate the address of the stored value of	#
-#			2^(J/64).					#
-#		2.6	Create the value Scale = 2^M.			#
-#	Notes:	The calculation in 2.2 is really performed by		#
-#			Z := X * constant				#
-#			N := round-to-nearest-integer(Z)		#
-#		where							#
-#			constant := single-precision( 64/log 2 ).	#
-#									#
-#		Using a single-precision constant avoids memory		#
-#		access. Another effect of using a single-precision	#
-#		"constant" is that the calculated value Z is		#
-#									#
-#			Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).	#
-#									#
-#		This error has to be considered later in Steps 3 and 4.	#
-#									#
-#	Step 3.	Calculate X - N*log2/64.				#
-#		3.1	R := X + N*L1,					#
-#				where L1 := single-precision(-log2/64).	#
-#		3.2	R := R + N*L2,					#
-#				L2 := extended-precision(-log2/64 - L1).#
-#	Notes:	a) The way L1 and L2 are chosen ensures L1+L2		#
-#		approximate the value -log2/64 to 88 bits of accuracy.	#
-#		b) N*L1 is exact because N is no longer than 22 bits	#
-#		and L1 is no longer than 24 bits.			#
-#		c) The calculation X+N*L1 is also exact due to		#
-#		cancellation. Thus, R is practically X+N(L1+L2) to full	#
-#		64 bits.						#
-#		d) It is important to estimate how large can |R| be	#
-#		after Step 3.2.						#
-#									#
-#		N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)	#
-#		X*64/log2 (1+eps)	=	N + f,	|f| <= 0.5	#
-#		X*64/log2 - N	=	f - eps*X 64/log2		#
-#		X - N*log2/64	=	f*log2/64 - eps*X		#
-#									#
-#									#
-#		Now |X| <= 16446 log2, thus				#
-#									#
-#			|X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64	#
-#					<= 0.57 log2/64.		#
-#		 This bound will be used in Step 4.			#
-#									#
-#	Step 4.	Approximate exp(R)-1 by a polynomial			#
-#		p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))	#
-#	Notes:	a) In order to reduce memory access, the coefficients	#
-#		are made as "short" as possible: A1 (which is 1/2), A4	#
-#		and A5 are single precision; A2 and A3 are double	#
-#		precision.						#
-#		b) Even with the restrictions above,			#
-#		   |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.	#
-#		Note that 0.0062 is slightly bigger than 0.57 log2/64.	#
-#		c) To fully utilize the pipeline, p is separated into	#
-#		two independent pieces of roughly equal complexities	#
-#			p = [ R + R*S*(A2 + S*A4) ]	+		#
-#				[ S*(A1 + S*(A3 + S*A5)) ]		#
-#		where S = R*R.						#
-#									#
-#	Step 5.	Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by		#
-#				ans := T + ( T*p + t)			#
-#		where T and t are the stored values for 2^(J/64).	#
-#	Notes:	2^(J/64) is stored as T and t where T+t approximates	#
-#		2^(J/64) to roughly 85 bits; T is in extended precision	#
-#		and t is in single precision. Note also that T is	#
-#		rounded to 62 bits so that the last two bits of T are	#
-#		zero. The reason for such a special form is that T-1,	#
-#		T-2, and T-8 will all be exact --- a property that will	#
-#		give much more accurate computation of the function	#
-#		EXPM1.							#
-#									#
-#	Step 6.	Reconstruction of exp(X)				#
-#			exp(X) = 2^M * 2^(J/64) * exp(R).		#
-#		6.1	If AdjFlag = 0, go to 6.3			#
-#		6.2	ans := ans * AdjScale				#
-#		6.3	Restore the user FPCR				#
-#		6.4	Return ans := ans * Scale. Exit.		#
-#	Notes:	If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,	#
-#		|M| <= 16380, and Scale = 2^M. Moreover, exp(X) will	#
-#		neither overflow nor underflow. If AdjFlag = 1, that	#
-#		means that						#
-#			X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380.	#
-#		Hence, exp(X) may overflow or underflow or neither.	#
-#		When that is the case, AdjScale = 2^(M1) where M1 is	#
-#		approximately M. Thus 6.2 will never cause		#
-#		over/underflow. Possible exception in 6.4 is overflow	#
-#		or underflow. The inexact exception is not generated in	#
-#		6.4. Although one can argue that the inexact flag	#
-#		should always be raised, to simulate that exception	#
-#		cost to much than the flag is worth in practical uses.	#
-#									#
-#	Step 7.	Return 1 + X.						#
-#		7.1	ans := X					#
-#		7.2	Restore user FPCR.				#
-#		7.3	Return ans := 1 + ans. Exit			#
-#	Notes:	For non-zero X, the inexact exception will always be	#
-#		raised by 7.3. That is the only exception raised by 7.3.#
-#		Note also that we use the FMOVEM instruction to move X	#
-#		in Step 7.1 to avoid unnecessary trapping. (Although	#
-#		the FMOVEM may not seem relevant since X is normalized,	#
-#		the precaution will be useful in the library version of	#
-#		this code where the separate entry for denormalized	#
-#		inputs will be done away with.)				#
-#									#
-#	Step 8.	Handle exp(X) where |X| >= 16380log2.			#
-#		8.1	If |X| > 16480 log2, go to Step 9.		#
-#		(mimic 2.2 - 2.6)					#
-#		8.2	N := round-to-integer( X * 64/log2 )		#
-#		8.3	Calculate J = N mod 64, J = 0,1,...,63		#
-#		8.4	K := (N-J)/64, M1 := truncate(K/2), M = K-M1,	#
-#			AdjFlag := 1.					#
-#		8.5	Calculate the address of the stored value	#
-#			2^(J/64).					#
-#		8.6	Create the values Scale = 2^M, AdjScale = 2^M1.	#
-#		8.7	Go to Step 3.					#
-#	Notes:	Refer to notes for 2.2 - 2.6.				#
-#									#
-#	Step 9.	Handle exp(X), |X| > 16480 log2.			#
-#		9.1	If X < 0, go to 9.3				#
-#		9.2	ans := Huge, go to 9.4				#
-#		9.3	ans := Tiny.					#
-#		9.4	Restore user FPCR.				#
-#		9.5	Return ans := ans * ans. Exit.			#
-#	Notes:	Exp(X) will surely overflow or underflow, depending on	#
-#		X's sign. "Huge" and "Tiny" are respectively large/tiny	#
-#		extended-precision numbers whose square over/underflow	#
-#		with an inexact result. Thus, 9.5 always raises the	#
-#		inexact together with either overflow or underflow.	#
-#									#
-#	setoxm1d							#
-#	--------							#
-#									#
-#	Step 1.	Set ans := 0						#
-#									#
-#	Step 2.	Return	ans := X + ans. Exit.				#
-#	Notes:	This will return X with the appropriate rounding	#
-#		 precision prescribed by the user FPCR.			#
-#									#
-#	setoxm1								#
-#	-------								#
-#									#
-#	Step 1.	Check |X|						#
-#		1.1	If |X| >= 1/4, go to Step 1.3.			#
-#		1.2	Go to Step 7.					#
-#		1.3	If |X| < 70 log(2), go to Step 2.		#
-#		1.4	Go to Step 10.					#
-#	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.#
-#		However, it is conceivable |X| can be small very often	#
-#		because EXPM1 is intended to evaluate exp(X)-1		#
-#		accurately when |X| is small. For further details on	#
-#		the comparisons, see the notes on Step 1 of setox.	#
-#									#
-#	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).	#
-#		2.1	N := round-to-nearest-integer( X * 64/log2 ).	#
-#		2.2	Calculate	J = N mod 64; so J = 0,1,2,..., #
-#			or 63.						#
-#		2.3	Calculate	M = (N - J)/64; so N = 64M + J.	#
-#		2.4	Calculate the address of the stored value of	#
-#			2^(J/64).					#
-#		2.5	Create the values Sc = 2^M and			#
-#			OnebySc := -2^(-M).				#
-#	Notes:	See the notes on Step 2 of setox.			#
-#									#
-#	Step 3.	Calculate X - N*log2/64.				#
-#		3.1	R := X + N*L1,					#
-#				where L1 := single-precision(-log2/64).	#
-#		3.2	R := R + N*L2,					#
-#				L2 := extended-precision(-log2/64 - L1).#
-#	Notes:	Applying the analysis of Step 3 of setox in this case	#
-#		shows that |R| <= 0.0055 (note that |X| <= 70 log2 in	#
-#		this case).						#
-#									#
-#	Step 4.	Approximate exp(R)-1 by a polynomial			#
-#			p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6)))))	#
-#	Notes:	a) In order to reduce memory access, the coefficients	#
-#		are made as "short" as possible: A1 (which is 1/2), A5	#
-#		and A6 are single precision; A2, A3 and A4 are double	#
-#		precision.						#
-#		b) Even with the restriction above,			#
-#			|p - (exp(R)-1)| <	|R| * 2^(-72.7)		#
-#		for all |R| <= 0.0055.					#
-#		c) To fully utilize the pipeline, p is separated into	#
-#		two independent pieces of roughly equal complexity	#
-#			p = [ R*S*(A2 + S*(A4 + S*A6)) ]	+	#
-#				[ R + S*(A1 + S*(A3 + S*A5)) ]		#
-#		where S = R*R.						#
-#									#
-#	Step 5.	Compute 2^(J/64)*p by					#
-#				p := T*p				#
-#		where T and t are the stored values for 2^(J/64).	#
-#	Notes:	2^(J/64) is stored as T and t where T+t approximates	#
-#		2^(J/64) to roughly 85 bits; T is in extended precision	#
-#		and t is in single precision. Note also that T is	#
-#		rounded to 62 bits so that the last two bits of T are	#
-#		zero. The reason for such a special form is that T-1,	#
-#		T-2, and T-8 will all be exact --- a property that will	#
-#		be exploited in Step 6 below. The total relative error	#
-#		in p is no bigger than 2^(-67.7) compared to the final	#
-#		result.							#
-#									#
-#	Step 6.	Reconstruction of exp(X)-1				#
-#			exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).	#
-#		6.1	If M <= 63, go to Step 6.3.			#
-#		6.2	ans := T + (p + (t + OnebySc)). Go to 6.6	#
-#		6.3	If M >= -3, go to 6.5.				#
-#		6.4	ans := (T + (p + t)) + OnebySc. Go to 6.6	#
-#		6.5	ans := (T + OnebySc) + (p + t).			#
-#		6.6	Restore user FPCR.				#
-#		6.7	Return ans := Sc * ans. Exit.			#
-#	Notes:	The various arrangements of the expressions give	#
-#		accurate evaluations.					#
-#									#
-#	Step 7.	exp(X)-1 for |X| < 1/4.					#
-#		7.1	If |X| >= 2^(-65), go to Step 9.		#
-#		7.2	Go to Step 8.					#
-#									#
-#	Step 8.	Calculate exp(X)-1, |X| < 2^(-65).			#
-#		8.1	If |X| < 2^(-16312), goto 8.3			#
-#		8.2	Restore FPCR; return ans := X - 2^(-16382).	#
-#			Exit.						#
-#		8.3	X := X * 2^(140).				#
-#		8.4	Restore FPCR; ans := ans - 2^(-16382).		#
-#		 Return ans := ans*2^(140). Exit			#
-#	Notes:	The idea is to return "X - tiny" under the user		#
-#		precision and rounding modes. To avoid unnecessary	#
-#		inefficiency, we stay away from denormalized numbers	#
-#		the best we can. For |X| >= 2^(-16312), the		#
-#		straightforward 8.2 generates the inexact exception as	#
-#		the case warrants.					#
-#									#
-#	Step 9.	Calculate exp(X)-1, |X| < 1/4, by a polynomial		#
-#			p = X + X*X*(B1 + X*(B2 + ... + X*B12))		#
-#	Notes:	a) In order to reduce memory access, the coefficients	#
-#		are made as "short" as possible: B1 (which is 1/2), B9	#
-#		to B12 are single precision; B3 to B8 are double	#
-#		precision; and B2 is double extended.			#
-#		b) Even with the restriction above,			#
-#			|p - (exp(X)-1)| < |X| 2^(-70.6)		#
-#		for all |X| <= 0.251.					#
-#		Note that 0.251 is slightly bigger than 1/4.		#
-#		c) To fully preserve accuracy, the polynomial is	#
-#		computed as						#
-#			X + ( S*B1 +	Q ) where S = X*X and		#
-#			Q	=	X*S*(B2 + X*(B3 + ... + X*B12))	#
-#		d) To fully utilize the pipeline, Q is separated into	#
-#		two independent pieces of roughly equal complexity	#
-#			Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +	#
-#				[ S*S*(B3 + S*(B5 + ... + S*B11)) ]	#
-#									#
-#	Step 10. Calculate exp(X)-1 for |X| >= 70 log 2.		#
-#		10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all	#
-#		practical purposes. Therefore, go to Step 1 of setox.	#
-#		10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical	#
-#		purposes.						#
-#		ans := -1						#
-#		Restore user FPCR					#
-#		Return ans := ans + 2^(-126). Exit.			#
-#	Notes:	10.2 will always create an inexact and return -1 + tiny	#
-#		in the user rounding precision and mode.		#
-#									#
-#########################################################################
-
-L2:	long		0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
-
-EEXPA3:	long		0x3FA55555,0x55554CC1
-EEXPA2:	long		0x3FC55555,0x55554A54
-
-EM1A4:	long		0x3F811111,0x11174385
-EM1A3:	long		0x3FA55555,0x55554F5A
-
-EM1A2:	long		0x3FC55555,0x55555555,0x00000000,0x00000000
-
-EM1B8:	long		0x3EC71DE3,0xA5774682
-EM1B7:	long		0x3EFA01A0,0x19D7CB68
-
-EM1B6:	long		0x3F2A01A0,0x1A019DF3
-EM1B5:	long		0x3F56C16C,0x16C170E2
-
-EM1B4:	long		0x3F811111,0x11111111
-EM1B3:	long		0x3FA55555,0x55555555
-
-EM1B2:	long		0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
-	long		0x00000000
-
-TWO140:	long		0x48B00000,0x00000000
-TWON140:
-	long		0x37300000,0x00000000
-
-EEXPTBL:
-	long		0x3FFF0000,0x80000000,0x00000000,0x00000000
-	long		0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
-	long		0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
-	long		0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
-	long		0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
-	long		0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
-	long		0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
-	long		0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
-	long		0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
-	long		0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
-	long		0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
-	long		0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
-	long		0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
-	long		0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
-	long		0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
-	long		0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
-	long		0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
-	long		0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
-	long		0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
-	long		0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
-	long		0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
-	long		0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
-	long		0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
-	long		0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
-	long		0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
-	long		0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
-	long		0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
-	long		0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
-	long		0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
-	long		0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
-	long		0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
-	long		0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
-	long		0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
-	long		0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
-	long		0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
-	long		0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
-	long		0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
-	long		0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
-	long		0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
-	long		0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
-	long		0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
-	long		0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
-	long		0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
-	long		0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
-	long		0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
-	long		0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
-	long		0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
-	long		0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
-	long		0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
-	long		0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
-	long		0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
-	long		0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
-	long		0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
-	long		0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
-	long		0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
-	long		0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
-	long		0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
-	long		0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
-	long		0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
-	long		0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
-	long		0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
-	long		0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
-	long		0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
-	long		0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
-
-	set		ADJFLAG,L_SCR2
-	set		SCALE,FP_SCR0
-	set		ADJSCALE,FP_SCR1
-	set		SC,FP_SCR0
-	set		ONEBYSC,FP_SCR1
-
-	global		setox
-setox:
-#--entry point for EXP(X), here X is finite, non-zero, and not NaN's
-
-#--Step 1.
-	mov.l		(%a0),%d1		# load part of input X
-	and.l		&0x7FFF0000,%d1		# biased expo. of X
-	cmp.l		%d1,&0x3FBE0000		# 2^(-65)
-	bge.b		EXPC1			# normal case
-	bra		EXPSM
-
-EXPC1:
-#--The case |X| >= 2^(-65)
-	mov.w		4(%a0),%d1		# expo. and partial sig. of |X|
-	cmp.l		%d1,&0x400CB167		# 16380 log2 trunc. 16 bits
-	blt.b		EXPMAIN			# normal case
-	bra		EEXPBIG
-
-EXPMAIN:
-#--Step 2.
-#--This is the normal branch:	2^(-65) <= |X| < 16380 log2.
-	fmov.x		(%a0),%fp0		# load input from (a0)
-
-	fmov.x		%fp0,%fp1
-	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
-	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
-	mov.l		&0,ADJFLAG(%a6)
-	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
-	lea		EEXPTBL(%pc),%a1
-	fmov.l		%d1,%fp0		# convert to floating-format
-
-	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
-	and.l		&0x3F,%d1		# D0 is J = N mod 64
-	lsl.l		&4,%d1
-	add.l		%d1,%a1			# address of 2^(J/64)
-	mov.l		L_SCR1(%a6),%d1
-	asr.l		&6,%d1			# D0 is M
-	add.w		&0x3FFF,%d1		# biased expo. of 2^(M)
-	mov.w		L2(%pc),L_SCR1(%a6)	# prefetch L2, no need in CB
-
-EXPCONT1:
-#--Step 3.
-#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
-#--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
-	fmov.x		%fp0,%fp2
-	fmul.s		&0xBC317218,%fp0	# N * L1, L1 = lead(-log2/64)
-	fmul.x		L2(%pc),%fp2		# N * L2, L1+L2 = -log2/64
-	fadd.x		%fp1,%fp0		# X + N*L1
-	fadd.x		%fp2,%fp0		# fp0 is R, reduced arg.
-
-#--Step 4.
-#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
-#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
-#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
-#--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# fp1 IS S = R*R
-
-	fmov.s		&0x3AB60B70,%fp2	# fp2 IS A5
-
-	fmul.x		%fp1,%fp2		# fp2 IS S*A5
-	fmov.x		%fp1,%fp3
-	fmul.s		&0x3C088895,%fp3	# fp3 IS S*A4
-
-	fadd.d		EEXPA3(%pc),%fp2	# fp2 IS A3+S*A5
-	fadd.d		EEXPA2(%pc),%fp3	# fp3 IS A2+S*A4
-
-	fmul.x		%fp1,%fp2		# fp2 IS S*(A3+S*A5)
-	mov.w		%d1,SCALE(%a6)		# SCALE is 2^(M) in extended
-	mov.l		&0x80000000,SCALE+4(%a6)
-	clr.l		SCALE+8(%a6)
-
-	fmul.x		%fp1,%fp3		# fp3 IS S*(A2+S*A4)
-
-	fadd.s		&0x3F000000,%fp2	# fp2 IS A1+S*(A3+S*A5)
-	fmul.x		%fp0,%fp3		# fp3 IS R*S*(A2+S*A4)
-
-	fmul.x		%fp1,%fp2		# fp2 IS S*(A1+S*(A3+S*A5))
-	fadd.x		%fp3,%fp0		# fp0 IS R+R*S*(A2+S*A4),
-
-	fmov.x		(%a1)+,%fp1		# fp1 is lead. pt. of 2^(J/64)
-	fadd.x		%fp2,%fp0		# fp0 is EXP(R) - 1
-
-#--Step 5
-#--final reconstruction process
-#--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
-
-	fmul.x		%fp1,%fp0		# 2^(J/64)*(Exp(R)-1)
-	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
-	fadd.s		(%a1),%fp0		# accurate 2^(J/64)
-
-	fadd.x		%fp1,%fp0		# 2^(J/64) + 2^(J/64)*...
-	mov.l		ADJFLAG(%a6),%d1
-
-#--Step 6
-	tst.l		%d1
-	beq.b		NORMAL
-ADJUST:
-	fmul.x		ADJSCALE(%a6),%fp0
-NORMAL:
-	fmov.l		%d0,%fpcr		# restore user FPCR
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.x		SCALE(%a6),%fp0		# multiply 2^(M)
-	bra		t_catch
-
-EXPSM:
-#--Step 7
-	fmovm.x		(%a0),&0x80		# load X
-	fmov.l		%d0,%fpcr
-	fadd.s		&0x3F800000,%fp0	# 1+X in user mode
-	bra		t_pinx2
-
-EEXPBIG:
-#--Step 8
-	cmp.l		%d1,&0x400CB27C		# 16480 log2
-	bgt.b		EXP2BIG
-#--Steps 8.2 -- 8.6
-	fmov.x		(%a0),%fp0		# load input from (a0)
-
-	fmov.x		%fp0,%fp1
-	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
-	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
-	mov.l		&1,ADJFLAG(%a6)
-	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
-	lea		EEXPTBL(%pc),%a1
-	fmov.l		%d1,%fp0		# convert to floating-format
-	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
-	and.l		&0x3F,%d1		# D0 is J = N mod 64
-	lsl.l		&4,%d1
-	add.l		%d1,%a1			# address of 2^(J/64)
-	mov.l		L_SCR1(%a6),%d1
-	asr.l		&6,%d1			# D0 is K
-	mov.l		%d1,L_SCR1(%a6)		# save K temporarily
-	asr.l		&1,%d1			# D0 is M1
-	sub.l		%d1,L_SCR1(%a6)		# a1 is M
-	add.w		&0x3FFF,%d1		# biased expo. of 2^(M1)
-	mov.w		%d1,ADJSCALE(%a6)	# ADJSCALE := 2^(M1)
-	mov.l		&0x80000000,ADJSCALE+4(%a6)
-	clr.l		ADJSCALE+8(%a6)
-	mov.l		L_SCR1(%a6),%d1		# D0 is M
-	add.w		&0x3FFF,%d1		# biased expo. of 2^(M)
-	bra.w		EXPCONT1		# go back to Step 3
-
-EXP2BIG:
-#--Step 9
-	tst.b		(%a0)			# is X positive or negative?
-	bmi		t_unfl2
-	bra		t_ovfl2
-
-	global		setoxd
-setoxd:
-#--entry point for EXP(X), X is denormalized
-	mov.l		(%a0),-(%sp)
-	andi.l		&0x80000000,(%sp)
-	ori.l		&0x00800000,(%sp)	# sign(X)*2^(-126)
-
-	fmov.s		&0x3F800000,%fp0
-
-	fmov.l		%d0,%fpcr
-	fadd.s		(%sp)+,%fp0
-	bra		t_pinx2
-
-	global		setoxm1
-setoxm1:
-#--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
-
-#--Step 1.
-#--Step 1.1
-	mov.l		(%a0),%d1		# load part of input X
-	and.l		&0x7FFF0000,%d1		# biased expo. of X
-	cmp.l		%d1,&0x3FFD0000		# 1/4
-	bge.b		EM1CON1			# |X| >= 1/4
-	bra		EM1SM
-
-EM1CON1:
-#--Step 1.3
-#--The case |X| >= 1/4
-	mov.w		4(%a0),%d1		# expo. and partial sig. of |X|
-	cmp.l		%d1,&0x4004C215		# 70log2 rounded up to 16 bits
-	ble.b		EM1MAIN			# 1/4 <= |X| <= 70log2
-	bra		EM1BIG
-
-EM1MAIN:
-#--Step 2.
-#--This is the case:	1/4 <= |X| <= 70 log2.
-	fmov.x		(%a0),%fp0		# load input from (a0)
-
-	fmov.x		%fp0,%fp1
-	fmul.s		&0x42B8AA3B,%fp0	# 64/log2 * X
-	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
-	fmov.l		%fp0,%d1		# N = int( X * 64/log2 )
-	lea		EEXPTBL(%pc),%a1
-	fmov.l		%d1,%fp0		# convert to floating-format
-
-	mov.l		%d1,L_SCR1(%a6)		# save N temporarily
-	and.l		&0x3F,%d1		# D0 is J = N mod 64
-	lsl.l		&4,%d1
-	add.l		%d1,%a1			# address of 2^(J/64)
-	mov.l		L_SCR1(%a6),%d1
-	asr.l		&6,%d1			# D0 is M
-	mov.l		%d1,L_SCR1(%a6)		# save a copy of M
-
-#--Step 3.
-#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
-#--a0 points to 2^(J/64), D0 and a1 both contain M
-	fmov.x		%fp0,%fp2
-	fmul.s		&0xBC317218,%fp0	# N * L1, L1 = lead(-log2/64)
-	fmul.x		L2(%pc),%fp2		# N * L2, L1+L2 = -log2/64
-	fadd.x		%fp1,%fp0		# X + N*L1
-	fadd.x		%fp2,%fp0		# fp0 is R, reduced arg.
-	add.w		&0x3FFF,%d1		# D0 is biased expo. of 2^M
-
-#--Step 4.
-#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
-#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
-#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
-#--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# fp1 IS S = R*R
-
-	fmov.s		&0x3950097B,%fp2	# fp2 IS a6
-
-	fmul.x		%fp1,%fp2		# fp2 IS S*A6
-	fmov.x		%fp1,%fp3
-	fmul.s		&0x3AB60B6A,%fp3	# fp3 IS S*A5
-
-	fadd.d		EM1A4(%pc),%fp2		# fp2 IS A4+S*A6
-	fadd.d		EM1A3(%pc),%fp3		# fp3 IS A3+S*A5
-	mov.w		%d1,SC(%a6)		# SC is 2^(M) in extended
-	mov.l		&0x80000000,SC+4(%a6)
-	clr.l		SC+8(%a6)
-
-	fmul.x		%fp1,%fp2		# fp2 IS S*(A4+S*A6)
-	mov.l		L_SCR1(%a6),%d1		# D0 is	M
-	neg.w		%d1			# D0 is -M
-	fmul.x		%fp1,%fp3		# fp3 IS S*(A3+S*A5)
-	add.w		&0x3FFF,%d1		# biased expo. of 2^(-M)
-	fadd.d		EM1A2(%pc),%fp2		# fp2 IS A2+S*(A4+S*A6)
-	fadd.s		&0x3F000000,%fp3	# fp3 IS A1+S*(A3+S*A5)
-
-	fmul.x		%fp1,%fp2		# fp2 IS S*(A2+S*(A4+S*A6))
-	or.w		&0x8000,%d1		# signed/expo. of -2^(-M)
-	mov.w		%d1,ONEBYSC(%a6)	# OnebySc is -2^(-M)
-	mov.l		&0x80000000,ONEBYSC+4(%a6)
-	clr.l		ONEBYSC+8(%a6)
-	fmul.x		%fp3,%fp1		# fp1 IS S*(A1+S*(A3+S*A5))
-
-	fmul.x		%fp0,%fp2		# fp2 IS R*S*(A2+S*(A4+S*A6))
-	fadd.x		%fp1,%fp0		# fp0 IS R+S*(A1+S*(A3+S*A5))
-
-	fadd.x		%fp2,%fp0		# fp0 IS EXP(R)-1
-
-	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
-
-#--Step 5
-#--Compute 2^(J/64)*p
-
-	fmul.x		(%a1),%fp0		# 2^(J/64)*(Exp(R)-1)
-
-#--Step 6
-#--Step 6.1
-	mov.l		L_SCR1(%a6),%d1		# retrieve M
-	cmp.l		%d1,&63
-	ble.b		MLE63
-#--Step 6.2	M >= 64
-	fmov.s		12(%a1),%fp1		# fp1 is t
-	fadd.x		ONEBYSC(%a6),%fp1	# fp1 is t+OnebySc
-	fadd.x		%fp1,%fp0		# p+(t+OnebySc), fp1 released
-	fadd.x		(%a1),%fp0		# T+(p+(t+OnebySc))
-	bra		EM1SCALE
-MLE63:
-#--Step 6.3	M <= 63
-	cmp.l		%d1,&-3
-	bge.b		MGEN3
-MLTN3:
-#--Step 6.4	M <= -4
-	fadd.s		12(%a1),%fp0		# p+t
-	fadd.x		(%a1),%fp0		# T+(p+t)
-	fadd.x		ONEBYSC(%a6),%fp0	# OnebySc + (T+(p+t))
-	bra		EM1SCALE
-MGEN3:
-#--Step 6.5	-3 <= M <= 63
-	fmov.x		(%a1)+,%fp1		# fp1 is T
-	fadd.s		(%a1),%fp0		# fp0 is p+t
-	fadd.x		ONEBYSC(%a6),%fp1	# fp1 is T+OnebySc
-	fadd.x		%fp1,%fp0		# (T+OnebySc)+(p+t)
-
-EM1SCALE:
-#--Step 6.6
-	fmov.l		%d0,%fpcr
-	fmul.x		SC(%a6),%fp0
-	bra		t_inx2
-
-EM1SM:
-#--Step 7	|X| < 1/4.
-	cmp.l		%d1,&0x3FBE0000		# 2^(-65)
-	bge.b		EM1POLY
-
-EM1TINY:
-#--Step 8	|X| < 2^(-65)
-	cmp.l		%d1,&0x00330000		# 2^(-16312)
-	blt.b		EM12TINY
-#--Step 8.2
-	mov.l		&0x80010000,SC(%a6)	# SC is -2^(-16382)
-	mov.l		&0x80000000,SC+4(%a6)
-	clr.l		SC+8(%a6)
-	fmov.x		(%a0),%fp0
-	fmov.l		%d0,%fpcr
-	mov.b		&FADD_OP,%d1		# last inst is ADD
-	fadd.x		SC(%a6),%fp0
-	bra		t_catch
-
-EM12TINY:
-#--Step 8.3
-	fmov.x		(%a0),%fp0
-	fmul.d		TWO140(%pc),%fp0
-	mov.l		&0x80010000,SC(%a6)
-	mov.l		&0x80000000,SC+4(%a6)
-	clr.l		SC+8(%a6)
-	fadd.x		SC(%a6),%fp0
-	fmov.l		%d0,%fpcr
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.d		TWON140(%pc),%fp0
-	bra		t_catch
-
-EM1POLY:
-#--Step 9	exp(X)-1 by a simple polynomial
-	fmov.x		(%a0),%fp0		# fp0 is X
-	fmul.x		%fp0,%fp0		# fp0 is S := X*X
-	fmovm.x		&0xc,-(%sp)		# save fp2 {%fp2/%fp3}
-	fmov.s		&0x2F30CAA8,%fp1	# fp1 is B12
-	fmul.x		%fp0,%fp1		# fp1 is S*B12
-	fmov.s		&0x310F8290,%fp2	# fp2 is B11
-	fadd.s		&0x32D73220,%fp1	# fp1 is B10+S*B12
-
-	fmul.x		%fp0,%fp2		# fp2 is S*B11
-	fmul.x		%fp0,%fp1		# fp1 is S*(B10 + ...
-
-	fadd.s		&0x3493F281,%fp2	# fp2 is B9+S*...
-	fadd.d		EM1B8(%pc),%fp1		# fp1 is B8+S*...
-
-	fmul.x		%fp0,%fp2		# fp2 is S*(B9+...
-	fmul.x		%fp0,%fp1		# fp1 is S*(B8+...
-
-	fadd.d		EM1B7(%pc),%fp2		# fp2 is B7+S*...
-	fadd.d		EM1B6(%pc),%fp1		# fp1 is B6+S*...
-
-	fmul.x		%fp0,%fp2		# fp2 is S*(B7+...
-	fmul.x		%fp0,%fp1		# fp1 is S*(B6+...
-
-	fadd.d		EM1B5(%pc),%fp2		# fp2 is B5+S*...
-	fadd.d		EM1B4(%pc),%fp1		# fp1 is B4+S*...
-
-	fmul.x		%fp0,%fp2		# fp2 is S*(B5+...
-	fmul.x		%fp0,%fp1		# fp1 is S*(B4+...
-
-	fadd.d		EM1B3(%pc),%fp2		# fp2 is B3+S*...
-	fadd.x		EM1B2(%pc),%fp1		# fp1 is B2+S*...
-
-	fmul.x		%fp0,%fp2		# fp2 is S*(B3+...
-	fmul.x		%fp0,%fp1		# fp1 is S*(B2+...
-
-	fmul.x		%fp0,%fp2		# fp2 is S*S*(B3+...)
-	fmul.x		(%a0),%fp1		# fp1 is X*S*(B2...
-
-	fmul.s		&0x3F000000,%fp0	# fp0 is S*B1
-	fadd.x		%fp2,%fp1		# fp1 is Q
-
-	fmovm.x		(%sp)+,&0x30		# fp2 restored {%fp2/%fp3}
-
-	fadd.x		%fp1,%fp0		# fp0 is S*B1+Q
-
-	fmov.l		%d0,%fpcr
-	fadd.x		(%a0),%fp0
-	bra		t_inx2
-
-EM1BIG:
-#--Step 10	|X| > 70 log2
-	mov.l		(%a0),%d1
-	cmp.l		%d1,&0
-	bgt.w		EXPC1
-#--Step 10.2
-	fmov.s		&0xBF800000,%fp0	# fp0 is -1
-	fmov.l		%d0,%fpcr
-	fadd.s		&0x00800000,%fp0	# -1 + 2^(-126)
-	bra		t_minx2
-
-	global		setoxm1d
-setoxm1d:
-#--entry point for EXPM1(X), here X is denormalized
-#--Step 0.
-	bra		t_extdnrm
-
-#########################################################################
-# sgetexp():  returns the exponent portion of the input argument.	#
-#	      The exponent bias is removed and the exponent value is	#
-#	      returned as an extended precision number in fp0.		#
-# sgetexpd(): handles denormalized numbers.				#
-#									#
-# sgetman():  extracts the mantissa of the input argument. The		#
-#	      mantissa is converted to an extended precision number w/	#
-#	      an exponent of $3fff and is returned in fp0. The range of #
-#	      the result is [1.0 - 2.0).				#
-# sgetmand(): handles denormalized numbers.				#
-#									#
-# INPUT *************************************************************** #
-#	a0  = pointer to extended precision input			#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 = exponent(X) or mantissa(X)				#
-#									#
-#########################################################################
-
-	global		sgetexp
-sgetexp:
-	mov.w		SRC_EX(%a0),%d0		# get the exponent
-	bclr		&0xf,%d0		# clear the sign bit
-	subi.w		&0x3fff,%d0		# subtract off the bias
-	fmov.w		%d0,%fp0		# return exp in fp0
-	blt.b		sgetexpn		# it's negative
-	rts
-
-sgetexpn:
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
-	rts
-
-	global		sgetexpd
-sgetexpd:
-	bsr.l		norm			# normalize
-	neg.w		%d0			# new exp = -(shft amt)
-	subi.w		&0x3fff,%d0		# subtract off the bias
-	fmov.w		%d0,%fp0		# return exp in fp0
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
-	rts
-
-	global		sgetman
-sgetman:
-	mov.w		SRC_EX(%a0),%d0		# get the exp
-	ori.w		&0x7fff,%d0		# clear old exp
-	bclr		&0xe,%d0		# make it the new exp +-3fff
-
-# here, we build the result in a tmp location so as not to disturb the input
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
-	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
-	fmov.x		FP_SCR0(%a6),%fp0	# put new value back in fp0
-	bmi.b		sgetmann		# it's negative
-	rts
-
-sgetmann:
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
-	rts
-
-#
-# For denormalized numbers, shift the mantissa until the j-bit = 1,
-# then load the exponent with +/1 $3fff.
-#
-	global		sgetmand
-sgetmand:
-	bsr.l		norm			# normalize exponent
-	bra.b		sgetman
-
-#########################################################################
-# scosh():  computes the hyperbolic cosine of a normalized input	#
-# scoshd(): computes the hyperbolic cosine of a denormalized input	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = cosh(X)							#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 3 ulps in 64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#	COSH								#
-#	1. If |X| > 16380 log2, go to 3.				#
-#									#
-#	2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae	#
-#		y = |X|, z = exp(Y), and				#
-#		cosh(X) = (1/2)*( z + 1/z ).				#
-#		Exit.							#
-#									#
-#	3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.		#
-#									#
-#	4. (16380 log2 < |X| <= 16480 log2)				#
-#		cosh(X) = sign(X) * exp(|X|)/2.				#
-#		However, invoking exp(|X|) may cause premature		#
-#		overflow. Thus, we calculate sinh(X) as follows:	#
-#		Y	:= |X|						#
-#		Fact	:=	2**(16380)				#
-#		Y'	:= Y - 16381 log2				#
-#		cosh(X) := Fact * exp(Y').				#
-#		Exit.							#
-#									#
-#	5. (|X| > 16480 log2) sinh(X) must overflow. Return		#
-#		Huge*Huge to generate overflow and an infinity with	#
-#		the appropriate sign. Huge is the largest finite number	#
-#		in extended format. Exit.				#
-#									#
-#########################################################################
-
-TWO16380:
-	long		0x7FFB0000,0x80000000,0x00000000,0x00000000
-
-	global		scosh
-scosh:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	and.l		&0x7FFFFFFF,%d1
-	cmp.l		%d1,&0x400CB167
-	bgt.b		COSHBIG
-
-#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
-#--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
-
-	fabs.x		%fp0			# |X|
-
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	fmovm.x		&0x01,-(%sp)		# save |X| to stack
-	lea		(%sp),%a0		# pass ptr to |X|
-	bsr		setox			# FP0 IS EXP(|X|)
-	add.l		&0xc,%sp		# erase |X| from stack
-	fmul.s		&0x3F000000,%fp0	# (1/2)EXP(|X|)
-	mov.l		(%sp)+,%d0
-
-	fmov.s		&0x3E800000,%fp1	# (1/4)
-	fdiv.x		%fp0,%fp1		# 1/(2 EXP(|X|))
-
-	fmov.l		%d0,%fpcr
-	mov.b		&FADD_OP,%d1		# last inst is ADD
-	fadd.x		%fp1,%fp0
-	bra		t_catch
-
-COSHBIG:
-	cmp.l		%d1,&0x400CB2B3
-	bgt.b		COSHHUGE
-
-	fabs.x		%fp0
-	fsub.d		T1(%pc),%fp0		# (|X|-16381LOG2_LEAD)
-	fsub.d		T2(%pc),%fp0		# |X| - 16381 LOG2, ACCURATE
-
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	fmovm.x		&0x01,-(%sp)		# save fp0 to stack
-	lea		(%sp),%a0		# pass ptr to fp0
-	bsr		setox
-	add.l		&0xc,%sp		# clear fp0 from stack
-	mov.l		(%sp)+,%d0
-
-	fmov.l		%d0,%fpcr
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.x		TWO16380(%pc),%fp0
-	bra		t_catch
-
-COSHHUGE:
-	bra		t_ovfl2
-
-	global		scoshd
-#--COSH(X) = 1 FOR DENORMALIZED X
-scoshd:
-	fmov.s		&0x3F800000,%fp0
-
-	fmov.l		%d0,%fpcr
-	fadd.s		&0x00800000,%fp0
-	bra		t_pinx2
-
-#########################################################################
-# ssinh():  computes the hyperbolic sine of a normalized input		#
-# ssinhd(): computes the hyperbolic sine of a denormalized input	#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 = sinh(X)							#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 3 ulps in 64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM *********************************************************** #
-#									#
-#       SINH								#
-#       1. If |X| > 16380 log2, go to 3.				#
-#									#
-#       2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula	#
-#               y = |X|, sgn = sign(X), and z = expm1(Y),		#
-#               sinh(X) = sgn*(1/2)*( z + z/(1+z) ).			#
-#          Exit.							#
-#									#
-#       3. If |X| > 16480 log2, go to 5.				#
-#									#
-#       4. (16380 log2 < |X| <= 16480 log2)				#
-#               sinh(X) = sign(X) * exp(|X|)/2.				#
-#          However, invoking exp(|X|) may cause premature overflow.	#
-#          Thus, we calculate sinh(X) as follows:			#
-#             Y       := |X|						#
-#             sgn     := sign(X)					#
-#             sgnFact := sgn * 2**(16380)				#
-#             Y'      := Y - 16381 log2					#
-#             sinh(X) := sgnFact * exp(Y').				#
-#          Exit.							#
-#									#
-#       5. (|X| > 16480 log2) sinh(X) must overflow. Return		#
-#          sign(X)*Huge*Huge to generate overflow and an infinity with	#
-#          the appropriate sign. Huge is the largest finite number in	#
-#          extended format. Exit.					#
-#									#
-#########################################################################
-
-	global		ssinh
-ssinh:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	mov.l		%d1,%a1			# save (compacted) operand
-	and.l		&0x7FFFFFFF,%d1
-	cmp.l		%d1,&0x400CB167
-	bgt.b		SINHBIG
-
-#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
-#--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
-
-	fabs.x		%fp0			# Y = |X|
-
-	movm.l		&0x8040,-(%sp)		# {a1/d0}
-	fmovm.x		&0x01,-(%sp)		# save Y on stack
-	lea		(%sp),%a0		# pass ptr to Y
-	clr.l		%d0
-	bsr		setoxm1			# FP0 IS Z = EXPM1(Y)
-	add.l		&0xc,%sp		# clear Y from stack
-	fmov.l		&0,%fpcr
-	movm.l		(%sp)+,&0x0201		# {a1/d0}
-
-	fmov.x		%fp0,%fp1
-	fadd.s		&0x3F800000,%fp1	# 1+Z
-	fmov.x		%fp0,-(%sp)
-	fdiv.x		%fp1,%fp0		# Z/(1+Z)
-	mov.l		%a1,%d1
-	and.l		&0x80000000,%d1
-	or.l		&0x3F000000,%d1
-	fadd.x		(%sp)+,%fp0
-	mov.l		%d1,-(%sp)
-
-	fmov.l		%d0,%fpcr
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.s		(%sp)+,%fp0		# last fp inst - possible exceptions set
-	bra		t_catch
-
-SINHBIG:
-	cmp.l		%d1,&0x400CB2B3
-	bgt		t_ovfl
-	fabs.x		%fp0
-	fsub.d		T1(%pc),%fp0		# (|X|-16381LOG2_LEAD)
-	mov.l		&0,-(%sp)
-	mov.l		&0x80000000,-(%sp)
-	mov.l		%a1,%d1
-	and.l		&0x80000000,%d1
-	or.l		&0x7FFB0000,%d1
-	mov.l		%d1,-(%sp)		# EXTENDED FMT
-	fsub.d		T2(%pc),%fp0		# |X| - 16381 LOG2, ACCURATE
-
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	fmovm.x		&0x01,-(%sp)		# save fp0 on stack
-	lea		(%sp),%a0		# pass ptr to fp0
-	bsr		setox
-	add.l		&0xc,%sp		# clear fp0 from stack
-
-	mov.l		(%sp)+,%d0
-	fmov.l		%d0,%fpcr
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.x		(%sp)+,%fp0		# possible exception
-	bra		t_catch
-
-	global		ssinhd
-#--SINH(X) = X FOR DENORMALIZED X
-ssinhd:
-	bra		t_extdnrm
-
-#########################################################################
-# stanh():  computes the hyperbolic tangent of a normalized input	#
-# stanhd(): computes the hyperbolic tangent of a denormalized input	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = tanh(X)							#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 3 ulps in 64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#	TANH								#
-#	1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.		#
-#									#
-#	2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by		#
-#		sgn := sign(X), y := 2|X|, z := expm1(Y), and		#
-#		tanh(X) = sgn*( z/(2+z) ).				#
-#		Exit.							#
-#									#
-#	3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,		#
-#		go to 7.						#
-#									#
-#	4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.		#
-#									#
-#	5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by		#
-#		sgn := sign(X), y := 2|X|, z := exp(Y),			#
-#		tanh(X) = sgn - [ sgn*2/(1+z) ].			#
-#		Exit.							#
-#									#
-#	6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we	#
-#		calculate Tanh(X) by					#
-#		sgn := sign(X), Tiny := 2**(-126),			#
-#		tanh(X) := sgn - sgn*Tiny.				#
-#		Exit.							#
-#									#
-#	7. (|X| < 2**(-40)). Tanh(X) = X.	Exit.			#
-#									#
-#########################################################################
-
-	set		X,FP_SCR0
-	set		XFRAC,X+4
-
-	set		SGN,L_SCR3
-
-	set		V,FP_SCR0
-
-	global		stanh
-stanh:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-
-	fmov.x		%fp0,X(%a6)
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	mov.l		%d1,X(%a6)
-	and.l		&0x7FFFFFFF,%d1
-	cmp.l		%d1, &0x3fd78000	# is |X| < 2^(-40)?
-	blt.w		TANHBORS		# yes
-	cmp.l		%d1, &0x3fffddce	# is |X| > (5/2)LOG2?
-	bgt.w		TANHBORS		# yes
-
-#--THIS IS THE USUAL CASE
-#--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
-
-	mov.l		X(%a6),%d1
-	mov.l		%d1,SGN(%a6)
-	and.l		&0x7FFF0000,%d1
-	add.l		&0x00010000,%d1		# EXPONENT OF 2|X|
-	mov.l		%d1,X(%a6)
-	and.l		&0x80000000,SGN(%a6)
-	fmov.x		X(%a6),%fp0		# FP0 IS Y = 2|X|
-
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	fmovm.x		&0x1,-(%sp)		# save Y on stack
-	lea		(%sp),%a0		# pass ptr to Y
-	bsr		setoxm1			# FP0 IS Z = EXPM1(Y)
-	add.l		&0xc,%sp		# clear Y from stack
-	mov.l		(%sp)+,%d0
-
-	fmov.x		%fp0,%fp1
-	fadd.s		&0x40000000,%fp1	# Z+2
-	mov.l		SGN(%a6),%d1
-	fmov.x		%fp1,V(%a6)
-	eor.l		%d1,V(%a6)
-
-	fmov.l		%d0,%fpcr		# restore users round prec,mode
-	fdiv.x		V(%a6),%fp0
-	bra		t_inx2
-
-TANHBORS:
-	cmp.l		%d1,&0x3FFF8000
-	blt.w		TANHSM
-
-	cmp.l		%d1,&0x40048AA1
-	bgt.w		TANHHUGE
-
-#-- (5/2) LOG2 < |X| < 50 LOG2,
-#--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
-#--TANH(X) = SGN -	SGN*2/[EXP(Y)+1].
-
-	mov.l		X(%a6),%d1
-	mov.l		%d1,SGN(%a6)
-	and.l		&0x7FFF0000,%d1
-	add.l		&0x00010000,%d1		# EXPO OF 2|X|
-	mov.l		%d1,X(%a6)		# Y = 2|X|
-	and.l		&0x80000000,SGN(%a6)
-	mov.l		SGN(%a6),%d1
-	fmov.x		X(%a6),%fp0		# Y = 2|X|
-
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	fmovm.x		&0x01,-(%sp)		# save Y on stack
-	lea		(%sp),%a0		# pass ptr to Y
-	bsr		setox			# FP0 IS EXP(Y)
-	add.l		&0xc,%sp		# clear Y from stack
-	mov.l		(%sp)+,%d0
-	mov.l		SGN(%a6),%d1
-	fadd.s		&0x3F800000,%fp0	# EXP(Y)+1
-
-	eor.l		&0xC0000000,%d1		# -SIGN(X)*2
-	fmov.s		%d1,%fp1		# -SIGN(X)*2 IN SGL FMT
-	fdiv.x		%fp0,%fp1		# -SIGN(X)2 / [EXP(Y)+1 ]
-
-	mov.l		SGN(%a6),%d1
-	or.l		&0x3F800000,%d1		# SGN
-	fmov.s		%d1,%fp0		# SGN IN SGL FMT
-
-	fmov.l		%d0,%fpcr		# restore users round prec,mode
-	mov.b		&FADD_OP,%d1		# last inst is ADD
-	fadd.x		%fp1,%fp0
-	bra		t_inx2
-
-TANHSM:
-	fmov.l		%d0,%fpcr		# restore users round prec,mode
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		X(%a6),%fp0		# last inst - possible exception set
-	bra		t_catch
-
-#---RETURN SGN(X) - SGN(X)EPS
-TANHHUGE:
-	mov.l		X(%a6),%d1
-	and.l		&0x80000000,%d1
-	or.l		&0x3F800000,%d1
-	fmov.s		%d1,%fp0
-	and.l		&0x80000000,%d1
-	eor.l		&0x80800000,%d1		# -SIGN(X)*EPS
-
-	fmov.l		%d0,%fpcr		# restore users round prec,mode
-	fadd.s		%d1,%fp0
-	bra		t_inx2
-
-	global		stanhd
-#--TANH(X) = X FOR DENORMALIZED X
-stanhd:
-	bra		t_extdnrm
-
-#########################################################################
-# slogn():    computes the natural logarithm of a normalized input	#
-# slognd():   computes the natural logarithm of a denormalized input	#
-# slognp1():  computes the log(1+X) of a normalized input		#
-# slognp1d(): computes the log(1+X) of a denormalized input		#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = log(X) or log(1+X)					#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 2 ulps in 64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM ***********************************************************	#
-#	LOGN:								#
-#	Step 1. If |X-1| < 1/16, approximate log(X) by an odd		#
-#		polynomial in u, where u = 2(X-1)/(X+1). Otherwise,	#
-#		move on to Step 2.					#
-#									#
-#	Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first	#
-#		seven significant bits of Y plus 2**(-7), i.e.		#
-#		F = 1.xxxxxx1 in base 2 where the six "x" match those	#
-#		of Y. Note that |Y-F| <= 2**(-7).			#
-#									#
-#	Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a		#
-#		polynomial in u, log(1+u) = poly.			#
-#									#
-#	Step 4. Reconstruct						#
-#		log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)	#
-#		by k*log(2) + (log(F) + poly). The values of log(F) are	#
-#		calculated beforehand and stored in the program.	#
-#									#
-#	lognp1:								#
-#	Step 1: If |X| < 1/16, approximate log(1+X) by an odd		#
-#		polynomial in u where u = 2X/(2+X). Otherwise, move on	#
-#		to Step 2.						#
-#									#
-#	Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done	#
-#		in Step 2 of the algorithm for LOGN and compute		#
-#		log(1+X) as k*log(2) + log(F) + poly where poly		#
-#		approximates log(1+u), u = (Y-F)/F.			#
-#									#
-#	Implementation Notes:						#
-#	Note 1. There are 64 different possible values for F, thus 64	#
-#		log(F)'s need to be tabulated. Moreover, the values of	#
-#		1/F are also tabulated so that the division in (Y-F)/F	#
-#		can be performed by a multiplication.			#
-#									#
-#	Note 2. In Step 2 of lognp1, in order to preserved accuracy,	#
-#		the value Y-F has to be calculated carefully when	#
-#		1/2 <= X < 3/2.						#
-#									#
-#	Note 3. To fully exploit the pipeline, polynomials are usually	#
-#		separated into two parts evaluated independently before	#
-#		being added up.						#
-#									#
-#########################################################################
-LOGOF2:
-	long		0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
-
-one:
-	long		0x3F800000
-zero:
-	long		0x00000000
-infty:
-	long		0x7F800000
-negone:
-	long		0xBF800000
-
-LOGA6:
-	long		0x3FC2499A,0xB5E4040B
-LOGA5:
-	long		0xBFC555B5,0x848CB7DB
-
-LOGA4:
-	long		0x3FC99999,0x987D8730
-LOGA3:
-	long		0xBFCFFFFF,0xFF6F7E97
-
-LOGA2:
-	long		0x3FD55555,0x555555A4
-LOGA1:
-	long		0xBFE00000,0x00000008
-
-LOGB5:
-	long		0x3F175496,0xADD7DAD6
-LOGB4:
-	long		0x3F3C71C2,0xFE80C7E0
-
-LOGB3:
-	long		0x3F624924,0x928BCCFF
-LOGB2:
-	long		0x3F899999,0x999995EC
-
-LOGB1:
-	long		0x3FB55555,0x55555555
-TWO:
-	long		0x40000000,0x00000000
-
-LTHOLD:
-	long		0x3f990000,0x80000000,0x00000000,0x00000000
-
-LOGTBL:
-	long		0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
-	long		0x3FF70000,0xFF015358,0x833C47E2,0x00000000
-	long		0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
-	long		0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
-	long		0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
-	long		0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
-	long		0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
-	long		0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
-	long		0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
-	long		0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
-	long		0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
-	long		0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
-	long		0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
-	long		0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
-	long		0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
-	long		0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
-	long		0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
-	long		0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
-	long		0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
-	long		0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
-	long		0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
-	long		0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
-	long		0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
-	long		0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
-	long		0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
-	long		0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
-	long		0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
-	long		0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
-	long		0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
-	long		0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
-	long		0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
-	long		0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
-	long		0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
-	long		0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
-	long		0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
-	long		0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
-	long		0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
-	long		0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
-	long		0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
-	long		0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
-	long		0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
-	long		0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
-	long		0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
-	long		0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
-	long		0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
-	long		0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
-	long		0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
-	long		0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
-	long		0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
-	long		0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
-	long		0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
-	long		0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
-	long		0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
-	long		0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
-	long		0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
-	long		0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
-	long		0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
-	long		0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
-	long		0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
-	long		0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
-	long		0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
-	long		0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
-	long		0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
-	long		0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
-	long		0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
-	long		0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
-	long		0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
-	long		0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
-	long		0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
-	long		0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
-	long		0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
-	long		0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
-	long		0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
-	long		0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
-	long		0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
-	long		0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
-	long		0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
-	long		0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
-	long		0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
-	long		0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
-	long		0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
-	long		0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
-	long		0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
-	long		0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
-	long		0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
-	long		0x3FFE0000,0x825EFCED,0x49369330,0x00000000
-	long		0x3FFE0000,0x9868C809,0x868C8098,0x00000000
-	long		0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
-	long		0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
-	long		0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
-	long		0x3FFE0000,0x95A02568,0x095A0257,0x00000000
-	long		0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
-	long		0x3FFE0000,0x94458094,0x45809446,0x00000000
-	long		0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
-	long		0x3FFE0000,0x92F11384,0x0497889C,0x00000000
-	long		0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
-	long		0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
-	long		0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
-	long		0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
-	long		0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
-	long		0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
-	long		0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
-	long		0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
-	long		0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
-	long		0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
-	long		0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
-	long		0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
-	long		0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
-	long		0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
-	long		0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
-	long		0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
-	long		0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
-	long		0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
-	long		0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
-	long		0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
-	long		0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
-	long		0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
-	long		0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
-	long		0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
-	long		0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
-	long		0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
-	long		0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
-	long		0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
-	long		0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
-	long		0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
-	long		0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
-	long		0x3FFE0000,0x80808080,0x80808081,0x00000000
-	long		0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
-
-	set		ADJK,L_SCR1
-
-	set		X,FP_SCR0
-	set		XDCARE,X+2
-	set		XFRAC,X+4
-
-	set		F,FP_SCR1
-	set		FFRAC,F+4
-
-	set		KLOG2,FP_SCR0
-
-	set		SAVEU,FP_SCR0
-
-	global		slogn
-#--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
-slogn:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-	mov.l		&0x00000000,ADJK(%a6)
-
-LOGBGN:
-#--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
-#--A FINITE, NON-ZERO, NORMALIZED NUMBER.
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-
-	mov.l		(%a0),X(%a6)
-	mov.l		4(%a0),X+4(%a6)
-	mov.l		8(%a0),X+8(%a6)
-
-	cmp.l		%d1,&0			# CHECK IF X IS NEGATIVE
-	blt.w		LOGNEG			# LOG OF NEGATIVE ARGUMENT IS INVALID
-# X IS POSITIVE, CHECK IF X IS NEAR 1
-	cmp.l		%d1,&0x3ffef07d		# IS X < 15/16?
-	blt.b		LOGMAIN			# YES
-	cmp.l		%d1,&0x3fff8841		# IS X > 17/16?
-	ble.w		LOGNEAR1		# NO
-
-LOGMAIN:
-#--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
-
-#--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
-#--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
-#--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
-#--			 = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
-#--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
-#--LOG(1+U) CAN BE VERY EFFICIENT.
-#--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
-#--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
-
-#--GET K, Y, F, AND ADDRESS OF 1/F.
-	asr.l		&8,%d1
-	asr.l		&8,%d1			# SHIFTED 16 BITS, BIASED EXPO. OF X
-	sub.l		&0x3FFF,%d1		# THIS IS K
-	add.l		ADJK(%a6),%d1		# ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
-	lea		LOGTBL(%pc),%a0		# BASE ADDRESS OF 1/F AND LOG(F)
-	fmov.l		%d1,%fp1		# CONVERT K TO FLOATING-POINT FORMAT
-
-#--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
-	mov.l		&0x3FFF0000,X(%a6)	# X IS NOW Y, I.E. 2^(-K)*X
-	mov.l		XFRAC(%a6),FFRAC(%a6)
-	and.l		&0xFE000000,FFRAC(%a6)	# FIRST 7 BITS OF Y
-	or.l		&0x01000000,FFRAC(%a6)	# GET F: ATTACH A 1 AT THE EIGHTH BIT
-	mov.l		FFRAC(%a6),%d1	# READY TO GET ADDRESS OF 1/F
-	and.l		&0x7E000000,%d1
-	asr.l		&8,%d1
-	asr.l		&8,%d1
-	asr.l		&4,%d1			# SHIFTED 20, D0 IS THE DISPLACEMENT
-	add.l		%d1,%a0			# A0 IS THE ADDRESS FOR 1/F
-
-	fmov.x		X(%a6),%fp0
-	mov.l		&0x3fff0000,F(%a6)
-	clr.l		F+8(%a6)
-	fsub.x		F(%a6),%fp0		# Y-F
-	fmovm.x		&0xc,-(%sp)		# SAVE FP2-3 WHILE FP0 IS NOT READY
-#--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
-#--REGISTERS SAVED: FPCR, FP1, FP2
-
-LP1CONT1:
-#--AN RE-ENTRY POINT FOR LOGNP1
-	fmul.x		(%a0),%fp0		# FP0 IS U = (Y-F)/F
-	fmul.x		LOGOF2(%pc),%fp1	# GET K*LOG2 WHILE FP0 IS NOT READY
-	fmov.x		%fp0,%fp2
-	fmul.x		%fp2,%fp2		# FP2 IS V=U*U
-	fmov.x		%fp1,KLOG2(%a6)		# PUT K*LOG2 IN MEMEORY, FREE FP1
-
-#--LOG(1+U) IS APPROXIMATED BY
-#--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
-#--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
-
-	fmov.x		%fp2,%fp3
-	fmov.x		%fp2,%fp1
-
-	fmul.d		LOGA6(%pc),%fp1		# V*A6
-	fmul.d		LOGA5(%pc),%fp2		# V*A5
-
-	fadd.d		LOGA4(%pc),%fp1		# A4+V*A6
-	fadd.d		LOGA3(%pc),%fp2		# A3+V*A5
-
-	fmul.x		%fp3,%fp1		# V*(A4+V*A6)
-	fmul.x		%fp3,%fp2		# V*(A3+V*A5)
-
-	fadd.d		LOGA2(%pc),%fp1		# A2+V*(A4+V*A6)
-	fadd.d		LOGA1(%pc),%fp2		# A1+V*(A3+V*A5)
-
-	fmul.x		%fp3,%fp1		# V*(A2+V*(A4+V*A6))
-	add.l		&16,%a0			# ADDRESS OF LOG(F)
-	fmul.x		%fp3,%fp2		# V*(A1+V*(A3+V*A5))
-
-	fmul.x		%fp0,%fp1		# U*V*(A2+V*(A4+V*A6))
-	fadd.x		%fp2,%fp0		# U+V*(A1+V*(A3+V*A5))
-
-	fadd.x		(%a0),%fp1		# LOG(F)+U*V*(A2+V*(A4+V*A6))
-	fmovm.x		(%sp)+,&0x30		# RESTORE FP2-3
-	fadd.x		%fp1,%fp0		# FP0 IS LOG(F) + LOG(1+U)
-
-	fmov.l		%d0,%fpcr
-	fadd.x		KLOG2(%a6),%fp0		# FINAL ADD
-	bra		t_inx2
-
-
-LOGNEAR1:
-
-# if the input is exactly equal to one, then exit through ld_pzero.
-# if these 2 lines weren't here, the correct answer would be returned
-# but the INEX2 bit would be set.
-	fcmp.b		%fp0,&0x1		# is it equal to one?
-	fbeq.l		ld_pzero		# yes
-
-#--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
-	fmov.x		%fp0,%fp1
-	fsub.s		one(%pc),%fp1		# FP1 IS X-1
-	fadd.s		one(%pc),%fp0		# FP0 IS X+1
-	fadd.x		%fp1,%fp1		# FP1 IS 2(X-1)
-#--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
-#--IN U, U = 2(X-1)/(X+1) = FP1/FP0
-
-LP1CONT2:
-#--THIS IS AN RE-ENTRY POINT FOR LOGNP1
-	fdiv.x		%fp0,%fp1		# FP1 IS U
-	fmovm.x		&0xc,-(%sp)		# SAVE FP2-3
-#--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
-#--LET V=U*U, W=V*V, CALCULATE
-#--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
-#--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
-	fmov.x		%fp1,%fp0
-	fmul.x		%fp0,%fp0		# FP0 IS V
-	fmov.x		%fp1,SAVEU(%a6)		# STORE U IN MEMORY, FREE FP1
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# FP1 IS W
-
-	fmov.d		LOGB5(%pc),%fp3
-	fmov.d		LOGB4(%pc),%fp2
-
-	fmul.x		%fp1,%fp3		# W*B5
-	fmul.x		%fp1,%fp2		# W*B4
-
-	fadd.d		LOGB3(%pc),%fp3		# B3+W*B5
-	fadd.d		LOGB2(%pc),%fp2		# B2+W*B4
-
-	fmul.x		%fp3,%fp1		# W*(B3+W*B5), FP3 RELEASED
-
-	fmul.x		%fp0,%fp2		# V*(B2+W*B4)
-
-	fadd.d		LOGB1(%pc),%fp1		# B1+W*(B3+W*B5)
-	fmul.x		SAVEU(%a6),%fp0		# FP0 IS U*V
-
-	fadd.x		%fp2,%fp1		# B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
-	fmovm.x		(%sp)+,&0x30		# FP2-3 RESTORED
-
-	fmul.x		%fp1,%fp0		# U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
-
-	fmov.l		%d0,%fpcr
-	fadd.x		SAVEU(%a6),%fp0
-	bra		t_inx2
-
-#--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
-LOGNEG:
-	bra		t_operr
-
-	global		slognd
-slognd:
-#--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
-
-	mov.l		&-100,ADJK(%a6)		# INPUT = 2^(ADJK) * FP0
-
-#----normalize the input value by left shifting k bits (k to be determined
-#----below), adjusting exponent and storing -k to  ADJK
-#----the value TWOTO100 is no longer needed.
-#----Note that this code assumes the denormalized input is NON-ZERO.
-
-	movm.l		&0x3f00,-(%sp)		# save some registers  {d2-d7}
-	mov.l		(%a0),%d3		# D3 is exponent of smallest norm. #
-	mov.l		4(%a0),%d4
-	mov.l		8(%a0),%d5		# (D4,D5) is (Hi_X,Lo_X)
-	clr.l		%d2			# D2 used for holding K
-
-	tst.l		%d4
-	bne.b		Hi_not0
-
-Hi_0:
-	mov.l		%d5,%d4
-	clr.l		%d5
-	mov.l		&32,%d2
-	clr.l		%d6
-	bfffo		%d4{&0:&32},%d6
-	lsl.l		%d6,%d4
-	add.l		%d6,%d2			# (D3,D4,D5) is normalized
-
-	mov.l		%d3,X(%a6)
-	mov.l		%d4,XFRAC(%a6)
-	mov.l		%d5,XFRAC+4(%a6)
-	neg.l		%d2
-	mov.l		%d2,ADJK(%a6)
-	fmov.x		X(%a6),%fp0
-	movm.l		(%sp)+,&0xfc		# restore registers {d2-d7}
-	lea		X(%a6),%a0
-	bra.w		LOGBGN			# begin regular log(X)
-
-Hi_not0:
-	clr.l		%d6
-	bfffo		%d4{&0:&32},%d6		# find first 1
-	mov.l		%d6,%d2			# get k
-	lsl.l		%d6,%d4
-	mov.l		%d5,%d7			# a copy of D5
-	lsl.l		%d6,%d5
-	neg.l		%d6
-	add.l		&32,%d6
-	lsr.l		%d6,%d7
-	or.l		%d7,%d4			# (D3,D4,D5) normalized
-
-	mov.l		%d3,X(%a6)
-	mov.l		%d4,XFRAC(%a6)
-	mov.l		%d5,XFRAC+4(%a6)
-	neg.l		%d2
-	mov.l		%d2,ADJK(%a6)
-	fmov.x		X(%a6),%fp0
-	movm.l		(%sp)+,&0xfc		# restore registers {d2-d7}
-	lea		X(%a6),%a0
-	bra.w		LOGBGN			# begin regular log(X)
-
-	global		slognp1
-#--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
-slognp1:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-	fabs.x		%fp0			# test magnitude
-	fcmp.x		%fp0,LTHOLD(%pc)	# compare with min threshold
-	fbgt.w		LP1REAL			# if greater, continue
-	fmov.l		%d0,%fpcr
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		(%a0),%fp0		# return signed argument
-	bra		t_catch
-
-LP1REAL:
-	fmov.x		(%a0),%fp0		# LOAD INPUT
-	mov.l		&0x00000000,ADJK(%a6)
-	fmov.x		%fp0,%fp1		# FP1 IS INPUT Z
-	fadd.s		one(%pc),%fp0		# X := ROUND(1+Z)
-	fmov.x		%fp0,X(%a6)
-	mov.w		XFRAC(%a6),XDCARE(%a6)
-	mov.l		X(%a6),%d1
-	cmp.l		%d1,&0
-	ble.w		LP1NEG0			# LOG OF ZERO OR -VE
-	cmp.l		%d1,&0x3ffe8000		# IS BOUNDS [1/2,3/2]?
-	blt.w		LOGMAIN
-	cmp.l		%d1,&0x3fffc000
-	bgt.w		LOGMAIN
-#--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
-#--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
-#--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
-
-LP1NEAR1:
-#--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
-	cmp.l		%d1,&0x3ffef07d
-	blt.w		LP1CARE
-	cmp.l		%d1,&0x3fff8841
-	bgt.w		LP1CARE
-
-LP1ONE16:
-#--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
-#--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
-	fadd.x		%fp1,%fp1		# FP1 IS 2Z
-	fadd.s		one(%pc),%fp0		# FP0 IS 1+X
-#--U = FP1/FP0
-	bra.w		LP1CONT2
-
-LP1CARE:
-#--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
-#--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
-#--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
-#--THERE ARE ONLY TWO CASES.
-#--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
-#--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
-#--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
-#--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
-
-	mov.l		XFRAC(%a6),FFRAC(%a6)
-	and.l		&0xFE000000,FFRAC(%a6)
-	or.l		&0x01000000,FFRAC(%a6)	# F OBTAINED
-	cmp.l		%d1,&0x3FFF8000		# SEE IF 1+Z > 1
-	bge.b		KISZERO
-
-KISNEG1:
-	fmov.s		TWO(%pc),%fp0
-	mov.l		&0x3fff0000,F(%a6)
-	clr.l		F+8(%a6)
-	fsub.x		F(%a6),%fp0		# 2-F
-	mov.l		FFRAC(%a6),%d1
-	and.l		&0x7E000000,%d1
-	asr.l		&8,%d1
-	asr.l		&8,%d1
-	asr.l		&4,%d1			# D0 CONTAINS DISPLACEMENT FOR 1/F
-	fadd.x		%fp1,%fp1		# GET 2Z
-	fmovm.x		&0xc,-(%sp)		# SAVE FP2  {%fp2/%fp3}
-	fadd.x		%fp1,%fp0		# FP0 IS Y-F = (2-F)+2Z
-	lea		LOGTBL(%pc),%a0		# A0 IS ADDRESS OF 1/F
-	add.l		%d1,%a0
-	fmov.s		negone(%pc),%fp1	# FP1 IS K = -1
-	bra.w		LP1CONT1
-
-KISZERO:
-	fmov.s		one(%pc),%fp0
-	mov.l		&0x3fff0000,F(%a6)
-	clr.l		F+8(%a6)
-	fsub.x		F(%a6),%fp0		# 1-F
-	mov.l		FFRAC(%a6),%d1
-	and.l		&0x7E000000,%d1
-	asr.l		&8,%d1
-	asr.l		&8,%d1
-	asr.l		&4,%d1
-	fadd.x		%fp1,%fp0		# FP0 IS Y-F
-	fmovm.x		&0xc,-(%sp)		# FP2 SAVED {%fp2/%fp3}
-	lea		LOGTBL(%pc),%a0
-	add.l		%d1,%a0			# A0 IS ADDRESS OF 1/F
-	fmov.s		zero(%pc),%fp1		# FP1 IS K = 0
-	bra.w		LP1CONT1
-
-LP1NEG0:
-#--FPCR SAVED. D0 IS X IN COMPACT FORM.
-	cmp.l		%d1,&0
-	blt.b		LP1NEG
-LP1ZERO:
-	fmov.s		negone(%pc),%fp0
-
-	fmov.l		%d0,%fpcr
-	bra		t_dz
-
-LP1NEG:
-	fmov.s		zero(%pc),%fp0
-
-	fmov.l		%d0,%fpcr
-	bra		t_operr
-
-	global		slognp1d
-#--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
-# Simply return the denorm
-slognp1d:
-	bra		t_extdnrm
-
-#########################################################################
-# satanh():  computes the inverse hyperbolic tangent of a norm input	#
-# satanhd(): computes the inverse hyperbolic tangent of a denorm input	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = arctanh(X)						#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 3 ulps in	64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#	ATANH								#
-#	1. If |X| >= 1, go to 3.					#
-#									#
-#	2. (|X| < 1) Calculate atanh(X) by				#
-#		sgn := sign(X)						#
-#		y := |X|						#
-#		z := 2y/(1-y)						#
-#		atanh(X) := sgn * (1/2) * logp1(z)			#
-#		Exit.							#
-#									#
-#	3. If |X| > 1, go to 5.						#
-#									#
-#	4. (|X| = 1) Generate infinity with an appropriate sign and	#
-#		divide-by-zero by					#
-#		sgn := sign(X)						#
-#		atan(X) := sgn / (+0).					#
-#		Exit.							#
-#									#
-#	5. (|X| > 1) Generate an invalid operation by 0 * infinity.	#
-#		Exit.							#
-#									#
-#########################################################################
-
-	global		satanh
-satanh:
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	and.l		&0x7FFFFFFF,%d1
-	cmp.l		%d1,&0x3FFF8000
-	bge.b		ATANHBIG
-
-#--THIS IS THE USUAL CASE, |X| < 1
-#--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
-
-	fabs.x		(%a0),%fp0		# Y = |X|
-	fmov.x		%fp0,%fp1
-	fneg.x		%fp1			# -Y
-	fadd.x		%fp0,%fp0		# 2Y
-	fadd.s		&0x3F800000,%fp1	# 1-Y
-	fdiv.x		%fp1,%fp0		# 2Y/(1-Y)
-	mov.l		(%a0),%d1
-	and.l		&0x80000000,%d1
-	or.l		&0x3F000000,%d1		# SIGN(X)*HALF
-	mov.l		%d1,-(%sp)
-
-	mov.l		%d0,-(%sp)		# save rnd prec,mode
-	clr.l		%d0			# pass ext prec,RN
-	fmovm.x		&0x01,-(%sp)		# save Z on stack
-	lea		(%sp),%a0		# pass ptr to Z
-	bsr		slognp1			# LOG1P(Z)
-	add.l		&0xc,%sp		# clear Z from stack
-
-	mov.l		(%sp)+,%d0		# fetch old prec,mode
-	fmov.l		%d0,%fpcr		# load it
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.s		(%sp)+,%fp0
-	bra		t_catch
-
-ATANHBIG:
-	fabs.x		(%a0),%fp0		# |X|
-	fcmp.s		%fp0,&0x3F800000
-	fbgt		t_operr
-	bra		t_dz
-
-	global		satanhd
-#--ATANH(X) = X FOR DENORMALIZED X
-satanhd:
-	bra		t_extdnrm
-
-#########################################################################
-# slog10():  computes the base-10 logarithm of a normalized input	#
-# slog10d(): computes the base-10 logarithm of a denormalized input	#
-# slog2():   computes the base-2 logarithm of a normalized input	#
-# slog2d():  computes the base-2 logarithm of a denormalized input	#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = log_10(X) or log_2(X)					#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 1.7 ulps in 64 significant bit,	#
-#	i.e. within 0.5003 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#       slog10d:							#
-#									#
-#       Step 0.	If X < 0, create a NaN and raise the invalid operation	#
-#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
-#       Notes:  Default means round-to-nearest mode, no floating-point	#
-#               traps, and precision control = double extended.		#
-#									#
-#       Step 1. Call slognd to obtain Y = log(X), the natural log of X.	#
-#       Notes:  Even if X is denormalized, log(X) is always normalized.	#
-#									#
-#       Step 2.  Compute log_10(X) = log(X) * (1/log(10)).		#
-#            2.1 Restore the user FPCR					#
-#            2.2 Return ans := Y * INV_L10.				#
-#									#
-#       slog10:								#
-#									#
-#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
-#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
-#       Notes:  Default means round-to-nearest mode, no floating-point	#
-#               traps, and precision control = double extended.		#
-#									#
-#       Step 1. Call sLogN to obtain Y = log(X), the natural log of X.	#
-#									#
-#       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).		#
-#            2.1  Restore the user FPCR					#
-#            2.2  Return ans := Y * INV_L10.				#
-#									#
-#       sLog2d:								#
-#									#
-#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
-#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
-#       Notes:  Default means round-to-nearest mode, no floating-point	#
-#               traps, and precision control = double extended.		#
-#									#
-#       Step 1. Call slognd to obtain Y = log(X), the natural log of X.	#
-#       Notes:  Even if X is denormalized, log(X) is always normalized.	#
-#									#
-#       Step 2.   Compute log_10(X) = log(X) * (1/log(2)).		#
-#            2.1  Restore the user FPCR					#
-#            2.2  Return ans := Y * INV_L2.				#
-#									#
-#       sLog2:								#
-#									#
-#       Step 0. If X < 0, create a NaN and raise the invalid operation	#
-#               flag. Otherwise, save FPCR in D1; set FpCR to default.	#
-#       Notes:  Default means round-to-nearest mode, no floating-point	#
-#               traps, and precision control = double extended.		#
-#									#
-#       Step 1. If X is not an integer power of two, i.e., X != 2^k,	#
-#               go to Step 3.						#
-#									#
-#       Step 2.   Return k.						#
-#            2.1  Get integer k, X = 2^k.				#
-#            2.2  Restore the user FPCR.				#
-#            2.3  Return ans := convert-to-double-extended(k).		#
-#									#
-#       Step 3. Call sLogN to obtain Y = log(X), the natural log of X.	#
-#									#
-#       Step 4.   Compute log_2(X) = log(X) * (1/log(2)).		#
-#            4.1  Restore the user FPCR					#
-#            4.2  Return ans := Y * INV_L2.				#
-#									#
-#########################################################################
-
-INV_L10:
-	long		0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
-
-INV_L2:
-	long		0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
-
-	global		slog10
-#--entry point for Log10(X), X is normalized
-slog10:
-	fmov.b		&0x1,%fp0
-	fcmp.x		%fp0,(%a0)		# if operand == 1,
-	fbeq.l		ld_pzero		# return an EXACT zero
-
-	mov.l		(%a0),%d1
-	blt.w		invalid
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	bsr		slogn			# log(X), X normal.
-	fmov.l		(%sp)+,%fpcr
-	fmul.x		INV_L10(%pc),%fp0
-	bra		t_inx2
-
-	global		slog10d
-#--entry point for Log10(X), X is denormalized
-slog10d:
-	mov.l		(%a0),%d1
-	blt.w		invalid
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	bsr		slognd			# log(X), X denorm.
-	fmov.l		(%sp)+,%fpcr
-	fmul.x		INV_L10(%pc),%fp0
-	bra		t_minx2
-
-	global		slog2
-#--entry point for Log2(X), X is normalized
-slog2:
-	mov.l		(%a0),%d1
-	blt.w		invalid
-
-	mov.l		8(%a0),%d1
-	bne.b		continue		# X is not 2^k
-
-	mov.l		4(%a0),%d1
-	and.l		&0x7FFFFFFF,%d1
-	bne.b		continue
-
-#--X = 2^k.
-	mov.w		(%a0),%d1
-	and.l		&0x00007FFF,%d1
-	sub.l		&0x3FFF,%d1
-	beq.l		ld_pzero
-	fmov.l		%d0,%fpcr
-	fmov.l		%d1,%fp0
-	bra		t_inx2
-
-continue:
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	bsr		slogn			# log(X), X normal.
-	fmov.l		(%sp)+,%fpcr
-	fmul.x		INV_L2(%pc),%fp0
-	bra		t_inx2
-
-invalid:
-	bra		t_operr
-
-	global		slog2d
-#--entry point for Log2(X), X is denormalized
-slog2d:
-	mov.l		(%a0),%d1
-	blt.w		invalid
-	mov.l		%d0,-(%sp)
-	clr.l		%d0
-	bsr		slognd			# log(X), X denorm.
-	fmov.l		(%sp)+,%fpcr
-	fmul.x		INV_L2(%pc),%fp0
-	bra		t_minx2
-
-#########################################################################
-# stwotox():  computes 2**X for a normalized input			#
-# stwotoxd(): computes 2**X for a denormalized input			#
-# stentox():  computes 10**X for a normalized input			#
-# stentoxd(): computes 10**X for a denormalized input			#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision input			#
-#	d0 = round precision,mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = 2**X or 10**X						#
-#									#
-# ACCURACY and MONOTONICITY *******************************************	#
-#	The returned result is within 2 ulps in 64 significant bit,	#
-#	i.e. within 0.5001 ulp to 53 bits if the result is subsequently	#
-#	rounded to double precision. The result is provably monotonic	#
-#	in double precision.						#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#	twotox								#
-#	1. If |X| > 16480, go to ExpBig.				#
-#									#
-#	2. If |X| < 2**(-70), go to ExpSm.				#
-#									#
-#	3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore	#
-#		decompose N as						#
-#		 N = 64(M + M') + j,  j = 0,1,2,...,63.			#
-#									#
-#	4. Overwrite r := r * log2. Then				#
-#		2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).		#
-#		Go to expr to compute that expression.			#
-#									#
-#	tentox								#
-#	1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.	#
-#									#
-#	2. If |X| < 2**(-70), go to ExpSm.				#
-#									#
-#	3. Set y := X*log_2(10)*64 (base 2 log of 10). Set		#
-#		N := round-to-int(y). Decompose N as			#
-#		 N = 64(M + M') + j,  j = 0,1,2,...,63.			#
-#									#
-#	4. Define r as							#
-#		r := ((X - N*L1)-N*L2) * L10				#
-#		where L1, L2 are the leading and trailing parts of	#
-#		log_10(2)/64 and L10 is the natural log of 10. Then	#
-#		10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).		#
-#		Go to expr to compute that expression.			#
-#									#
-#	expr								#
-#	1. Fetch 2**(j/64) from table as Fact1 and Fact2.		#
-#									#
-#	2. Overwrite Fact1 and Fact2 by					#
-#		Fact1 := 2**(M) * Fact1					#
-#		Fact2 := 2**(M) * Fact2					#
-#		Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).		#
-#									#
-#	3. Calculate P where 1 + P approximates exp(r):			#
-#		P = r + r*r*(A1+r*(A2+...+r*A5)).			#
-#									#
-#	4. Let AdjFact := 2**(M'). Return				#
-#		AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).		#
-#		Exit.							#
-#									#
-#	ExpBig								#
-#	1. Generate overflow by Huge * Huge if X > 0; otherwise,	#
-#	        generate underflow by Tiny * Tiny.			#
-#									#
-#	ExpSm								#
-#	1. Return 1 + X.						#
-#									#
-#########################################################################
-
-L2TEN64:
-	long		0x406A934F,0x0979A371	# 64LOG10/LOG2
-L10TWO1:
-	long		0x3F734413,0x509F8000	# LOG2/64LOG10
-
-L10TWO2:
-	long		0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
-
-LOG10:	long		0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
-
-LOG2:	long		0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
-
-EXPA5:	long		0x3F56C16D,0x6F7BD0B2
-EXPA4:	long		0x3F811112,0x302C712C
-EXPA3:	long		0x3FA55555,0x55554CC1
-EXPA2:	long		0x3FC55555,0x55554A54
-EXPA1:	long		0x3FE00000,0x00000000,0x00000000,0x00000000
-
-TEXPTBL:
-	long		0x3FFF0000,0x80000000,0x00000000,0x3F738000
-	long		0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
-	long		0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
-	long		0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
-	long		0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
-	long		0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
-	long		0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
-	long		0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
-	long		0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
-	long		0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
-	long		0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
-	long		0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
-	long		0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
-	long		0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
-	long		0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
-	long		0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
-	long		0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
-	long		0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
-	long		0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
-	long		0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
-	long		0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
-	long		0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
-	long		0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
-	long		0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
-	long		0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
-	long		0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
-	long		0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
-	long		0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
-	long		0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
-	long		0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
-	long		0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
-	long		0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
-	long		0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
-	long		0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
-	long		0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
-	long		0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
-	long		0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
-	long		0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
-	long		0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
-	long		0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
-	long		0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
-	long		0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
-	long		0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
-	long		0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
-	long		0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
-	long		0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
-	long		0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
-	long		0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
-	long		0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
-	long		0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
-	long		0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
-	long		0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
-	long		0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
-	long		0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
-	long		0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
-	long		0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
-	long		0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
-	long		0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
-	long		0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
-	long		0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
-	long		0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
-	long		0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
-	long		0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
-	long		0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
-
-	set		INT,L_SCR1
-
-	set		X,FP_SCR0
-	set		XDCARE,X+2
-	set		XFRAC,X+4
-
-	set		ADJFACT,FP_SCR0
-
-	set		FACT1,FP_SCR0
-	set		FACT1HI,FACT1+4
-	set		FACT1LOW,FACT1+8
-
-	set		FACT2,FP_SCR1
-	set		FACT2HI,FACT2+4
-	set		FACT2LOW,FACT2+8
-
-	global		stwotox
-#--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
-stwotox:
-	fmovm.x		(%a0),&0x80		# LOAD INPUT
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	fmov.x		%fp0,X(%a6)
-	and.l		&0x7FFFFFFF,%d1
-
-	cmp.l		%d1,&0x3FB98000		# |X| >= 2**(-70)?
-	bge.b		TWOOK1
-	bra.w		EXPBORS
-
-TWOOK1:
-	cmp.l		%d1,&0x400D80C0		# |X| > 16480?
-	ble.b		TWOMAIN
-	bra.w		EXPBORS
-
-TWOMAIN:
-#--USUAL CASE, 2^(-70) <= |X| <= 16480
-
-	fmov.x		%fp0,%fp1
-	fmul.s		&0x42800000,%fp1	# 64 * X
-	fmov.l		%fp1,INT(%a6)		# N = ROUND-TO-INT(64 X)
-	mov.l		%d2,-(%sp)
-	lea		TEXPTBL(%pc),%a1	# LOAD ADDRESS OF TABLE OF 2^(J/64)
-	fmov.l		INT(%a6),%fp1		# N --> FLOATING FMT
-	mov.l		INT(%a6),%d1
-	mov.l		%d1,%d2
-	and.l		&0x3F,%d1		# D0 IS J
-	asl.l		&4,%d1			# DISPLACEMENT FOR 2^(J/64)
-	add.l		%d1,%a1			# ADDRESS FOR 2^(J/64)
-	asr.l		&6,%d2			# d2 IS L, N = 64L + J
-	mov.l		%d2,%d1
-	asr.l		&1,%d1			# D0 IS M
-	sub.l		%d1,%d2			# d2 IS M', N = 64(M+M') + J
-	add.l		&0x3FFF,%d2
-
-#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
-#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
-#--ADJFACT = 2^(M').
-#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
-
-	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
-
-	fmul.s		&0x3C800000,%fp1	# (1/64)*N
-	mov.l		(%a1)+,FACT1(%a6)
-	mov.l		(%a1)+,FACT1HI(%a6)
-	mov.l		(%a1)+,FACT1LOW(%a6)
-	mov.w		(%a1)+,FACT2(%a6)
-
-	fsub.x		%fp1,%fp0		# X - (1/64)*INT(64 X)
-
-	mov.w		(%a1)+,FACT2HI(%a6)
-	clr.w		FACT2HI+2(%a6)
-	clr.l		FACT2LOW(%a6)
-	add.w		%d1,FACT1(%a6)
-	fmul.x		LOG2(%pc),%fp0		# FP0 IS R
-	add.w		%d1,FACT2(%a6)
-
-	bra.w		expr
-
-EXPBORS:
-#--FPCR, D0 SAVED
-	cmp.l		%d1,&0x3FFF8000
-	bgt.b		TEXPBIG
-
-#--|X| IS SMALL, RETURN 1 + X
-
-	fmov.l		%d0,%fpcr		# restore users round prec,mode
-	fadd.s		&0x3F800000,%fp0	# RETURN 1 + X
-	bra		t_pinx2
-
-TEXPBIG:
-#--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
-#--REGISTERS SAVE SO FAR ARE FPCR AND  D0
-	mov.l		X(%a6),%d1
-	cmp.l		%d1,&0
-	blt.b		EXPNEG
-
-	bra		t_ovfl2			# t_ovfl expects positive value
-
-EXPNEG:
-	bra		t_unfl2			# t_unfl expects positive value
-
-	global		stwotoxd
-stwotoxd:
-#--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
-
-	fmov.l		%d0,%fpcr		# set user's rounding mode/precision
-	fmov.s		&0x3F800000,%fp0	# RETURN 1 + X
-	mov.l		(%a0),%d1
-	or.l		&0x00800001,%d1
-	fadd.s		%d1,%fp0
-	bra		t_pinx2
-
-	global		stentox
-#--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
-stentox:
-	fmovm.x		(%a0),&0x80		# LOAD INPUT
-
-	mov.l		(%a0),%d1
-	mov.w		4(%a0),%d1
-	fmov.x		%fp0,X(%a6)
-	and.l		&0x7FFFFFFF,%d1
-
-	cmp.l		%d1,&0x3FB98000		# |X| >= 2**(-70)?
-	bge.b		TENOK1
-	bra.w		EXPBORS
-
-TENOK1:
-	cmp.l		%d1,&0x400B9B07		# |X| <= 16480*log2/log10 ?
-	ble.b		TENMAIN
-	bra.w		EXPBORS
-
-TENMAIN:
-#--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
-
-	fmov.x		%fp0,%fp1
-	fmul.d		L2TEN64(%pc),%fp1	# X*64*LOG10/LOG2
-	fmov.l		%fp1,INT(%a6)		# N=INT(X*64*LOG10/LOG2)
-	mov.l		%d2,-(%sp)
-	lea		TEXPTBL(%pc),%a1	# LOAD ADDRESS OF TABLE OF 2^(J/64)
-	fmov.l		INT(%a6),%fp1		# N --> FLOATING FMT
-	mov.l		INT(%a6),%d1
-	mov.l		%d1,%d2
-	and.l		&0x3F,%d1		# D0 IS J
-	asl.l		&4,%d1			# DISPLACEMENT FOR 2^(J/64)
-	add.l		%d1,%a1			# ADDRESS FOR 2^(J/64)
-	asr.l		&6,%d2			# d2 IS L, N = 64L + J
-	mov.l		%d2,%d1
-	asr.l		&1,%d1			# D0 IS M
-	sub.l		%d1,%d2			# d2 IS M', N = 64(M+M') + J
-	add.l		&0x3FFF,%d2
-
-#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
-#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
-#--ADJFACT = 2^(M').
-#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
-	fmovm.x		&0x0c,-(%sp)		# save fp2/fp3
-
-	fmov.x		%fp1,%fp2
-
-	fmul.d		L10TWO1(%pc),%fp1	# N*(LOG2/64LOG10)_LEAD
-	mov.l		(%a1)+,FACT1(%a6)
-
-	fmul.x		L10TWO2(%pc),%fp2	# N*(LOG2/64LOG10)_TRAIL
-
-	mov.l		(%a1)+,FACT1HI(%a6)
-	mov.l		(%a1)+,FACT1LOW(%a6)
-	fsub.x		%fp1,%fp0		# X - N L_LEAD
-	mov.w		(%a1)+,FACT2(%a6)
-
-	fsub.x		%fp2,%fp0		# X - N L_TRAIL
-
-	mov.w		(%a1)+,FACT2HI(%a6)
-	clr.w		FACT2HI+2(%a6)
-	clr.l		FACT2LOW(%a6)
-
-	fmul.x		LOG10(%pc),%fp0		# FP0 IS R
-	add.w		%d1,FACT1(%a6)
-	add.w		%d1,FACT2(%a6)
-
-expr:
-#--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
-#--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
-#--FP0 IS R. THE FOLLOWING CODE COMPUTES
-#--	2**(M'+M) * 2**(J/64) * EXP(R)
-
-	fmov.x		%fp0,%fp1
-	fmul.x		%fp1,%fp1		# FP1 IS S = R*R
-
-	fmov.d		EXPA5(%pc),%fp2		# FP2 IS A5
-	fmov.d		EXPA4(%pc),%fp3		# FP3 IS A4
-
-	fmul.x		%fp1,%fp2		# FP2 IS S*A5
-	fmul.x		%fp1,%fp3		# FP3 IS S*A4
-
-	fadd.d		EXPA3(%pc),%fp2		# FP2 IS A3+S*A5
-	fadd.d		EXPA2(%pc),%fp3		# FP3 IS A2+S*A4
-
-	fmul.x		%fp1,%fp2		# FP2 IS S*(A3+S*A5)
-	fmul.x		%fp1,%fp3		# FP3 IS S*(A2+S*A4)
-
-	fadd.d		EXPA1(%pc),%fp2		# FP2 IS A1+S*(A3+S*A5)
-	fmul.x		%fp0,%fp3		# FP3 IS R*S*(A2+S*A4)
-
-	fmul.x		%fp1,%fp2		# FP2 IS S*(A1+S*(A3+S*A5))
-	fadd.x		%fp3,%fp0		# FP0 IS R+R*S*(A2+S*A4)
-	fadd.x		%fp2,%fp0		# FP0 IS EXP(R) - 1
-
-	fmovm.x		(%sp)+,&0x30		# restore fp2/fp3
-
-#--FINAL RECONSTRUCTION PROCESS
-#--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
-
-	fmul.x		FACT1(%a6),%fp0
-	fadd.x		FACT2(%a6),%fp0
-	fadd.x		FACT1(%a6),%fp0
-
-	fmov.l		%d0,%fpcr		# restore users round prec,mode
-	mov.w		%d2,ADJFACT(%a6)	# INSERT EXPONENT
-	mov.l		(%sp)+,%d2
-	mov.l		&0x80000000,ADJFACT+4(%a6)
-	clr.l		ADJFACT+8(%a6)
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.x		ADJFACT(%a6),%fp0	# FINAL ADJUSTMENT
-	bra		t_catch
-
-	global		stentoxd
-stentoxd:
-#--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
-
-	fmov.l		%d0,%fpcr		# set user's rounding mode/precision
-	fmov.s		&0x3F800000,%fp0	# RETURN 1 + X
-	mov.l		(%a0),%d1
-	or.l		&0x00800001,%d1
-	fadd.s		%d1,%fp0
-	bra		t_pinx2
-
-#########################################################################
-# smovcr(): returns the ROM constant at the offset specified in d1	#
-#	    rounded to the mode and precision specified in d0.		#
-#									#
-# INPUT	***************************************************************	#
-#	d0 = rnd prec,mode						#
-#	d1 = ROM offset							#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = the ROM constant rounded to the user's rounding mode,prec	#
-#									#
-#########################################################################
-
-	global		smovcr
-smovcr:
-	mov.l		%d1,-(%sp)		# save rom offset for a sec
-
-	lsr.b		&0x4,%d0		# shift ctrl bits to lo
-	mov.l		%d0,%d1			# make a copy
-	andi.w		&0x3,%d1		# extract rnd mode
-	andi.w		&0xc,%d0		# extract rnd prec
-	swap		%d0			# put rnd prec in hi
-	mov.w		%d1,%d0			# put rnd mode in lo
-
-	mov.l		(%sp)+,%d1		# get rom offset
-
-#
-# check range of offset
-#
-	tst.b		%d1			# if zero, offset is to pi
-	beq.b		pi_tbl			# it is pi
-	cmpi.b		%d1,&0x0a		# check range $01 - $0a
-	ble.b		z_val			# if in this range, return zero
-	cmpi.b		%d1,&0x0e		# check range $0b - $0e
-	ble.b		sm_tbl			# valid constants in this range
-	cmpi.b		%d1,&0x2f		# check range $10 - $2f
-	ble.b		z_val			# if in this range, return zero
-	cmpi.b		%d1,&0x3f		# check range $30 - $3f
-	ble.b		bg_tbl			# valid constants in this range
-
-z_val:
-	bra.l		ld_pzero		# return a zero
-
-#
-# the answer is PI rounded to the proper precision.
-#
-# fetch a pointer to the answer table relating to the proper rounding
-# precision.
-#
-pi_tbl:
-	tst.b		%d0			# is rmode RN?
-	bne.b		pi_not_rn		# no
-pi_rn:
-	lea.l		PIRN(%pc),%a0		# yes; load PI RN table addr
-	bra.w		set_finx
-pi_not_rn:
-	cmpi.b		%d0,&rp_mode		# is rmode RP?
-	beq.b		pi_rp			# yes
-pi_rzrm:
-	lea.l		PIRZRM(%pc),%a0		# no; load PI RZ,RM table addr
-	bra.b		set_finx
-pi_rp:
-	lea.l		PIRP(%pc),%a0		# load PI RP table addr
-	bra.b		set_finx
-
-#
-# the answer is one of:
-#	$0B	log10(2)	(inexact)
-#	$0C	e		(inexact)
-#	$0D	log2(e)		(inexact)
-#	$0E	log10(e)	(exact)
-#
-# fetch a pointer to the answer table relating to the proper rounding
-# precision.
-#
-sm_tbl:
-	subi.b		&0xb,%d1		# make offset in 0-4 range
-	tst.b		%d0			# is rmode RN?
-	bne.b		sm_not_rn		# no
-sm_rn:
-	lea.l		SMALRN(%pc),%a0		# yes; load RN table addr
-sm_tbl_cont:
-	cmpi.b		%d1,&0x2		# is result log10(e)?
-	ble.b		set_finx		# no; answer is inexact
-	bra.b		no_finx			# yes; answer is exact
-sm_not_rn:
-	cmpi.b		%d0,&rp_mode		# is rmode RP?
-	beq.b		sm_rp			# yes
-sm_rzrm:
-	lea.l		SMALRZRM(%pc),%a0	# no; load RZ,RM table addr
-	bra.b		sm_tbl_cont
-sm_rp:
-	lea.l		SMALRP(%pc),%a0		# load RP table addr
-	bra.b		sm_tbl_cont
-
-#
-# the answer is one of:
-#	$30	ln(2)		(inexact)
-#	$31	ln(10)		(inexact)
-#	$32	10^0		(exact)
-#	$33	10^1		(exact)
-#	$34	10^2		(exact)
-#	$35	10^4		(exact)
-#	$36	10^8		(exact)
-#	$37	10^16		(exact)
-#	$38	10^32		(inexact)
-#	$39	10^64		(inexact)
-#	$3A	10^128		(inexact)
-#	$3B	10^256		(inexact)
-#	$3C	10^512		(inexact)
-#	$3D	10^1024		(inexact)
-#	$3E	10^2048		(inexact)
-#	$3F	10^4096		(inexact)
-#
-# fetch a pointer to the answer table relating to the proper rounding
-# precision.
-#
-bg_tbl:
-	subi.b		&0x30,%d1		# make offset in 0-f range
-	tst.b		%d0			# is rmode RN?
-	bne.b		bg_not_rn		# no
-bg_rn:
-	lea.l		BIGRN(%pc),%a0		# yes; load RN table addr
-bg_tbl_cont:
-	cmpi.b		%d1,&0x1		# is offset <= $31?
-	ble.b		set_finx		# yes; answer is inexact
-	cmpi.b		%d1,&0x7		# is $32 <= offset <= $37?
-	ble.b		no_finx			# yes; answer is exact
-	bra.b		set_finx		# no; answer is inexact
-bg_not_rn:
-	cmpi.b		%d0,&rp_mode		# is rmode RP?
-	beq.b		bg_rp			# yes
-bg_rzrm:
-	lea.l		BIGRZRM(%pc),%a0	# no; load RZ,RM table addr
-	bra.b		bg_tbl_cont
-bg_rp:
-	lea.l		BIGRP(%pc),%a0		# load RP table addr
-	bra.b		bg_tbl_cont
-
-# answer is inexact, so set INEX2 and AINEX in the user's FPSR.
-set_finx:
-	ori.l		&inx2a_mask,USER_FPSR(%a6) # set INEX2/AINEX
-no_finx:
-	mulu.w		&0xc,%d1		# offset points into tables
-	swap		%d0			# put rnd prec in lo word
-	tst.b		%d0			# is precision extended?
-
-	bne.b		not_ext			# if xprec, do not call round
-
-# Precision is extended
-	fmovm.x		(%a0,%d1.w),&0x80	# return result in fp0
-	rts
-
-# Precision is single or double
-not_ext:
-	swap		%d0			# rnd prec in upper word
-
-# call round() to round the answer to the proper precision.
-# exponents out of range for single or double DO NOT cause underflow
-# or overflow.
-	mov.w		0x0(%a0,%d1.w),FP_SCR1_EX(%a6) # load first word
-	mov.l		0x4(%a0,%d1.w),FP_SCR1_HI(%a6) # load second word
-	mov.l		0x8(%a0,%d1.w),FP_SCR1_LO(%a6) # load third word
-	mov.l		%d0,%d1
-	clr.l		%d0			# clear g,r,s
-	lea		FP_SCR1(%a6),%a0	# pass ptr to answer
-	clr.w		LOCAL_SGN(%a0)		# sign always positive
-	bsr.l		_round			# round the mantissa
-
-	fmovm.x		(%a0),&0x80		# return rounded result in fp0
-	rts
-
-	align		0x4
-
-PIRN:	long		0x40000000,0xc90fdaa2,0x2168c235	# pi
-PIRZRM:	long		0x40000000,0xc90fdaa2,0x2168c234	# pi
-PIRP:	long		0x40000000,0xc90fdaa2,0x2168c235	# pi
-
-SMALRN:	long		0x3ffd0000,0x9a209a84,0xfbcff798	# log10(2)
-	long		0x40000000,0xadf85458,0xa2bb4a9a	# e
-	long		0x3fff0000,0xb8aa3b29,0x5c17f0bc	# log2(e)
-	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
-	long		0x00000000,0x00000000,0x00000000	# 0.0
-
-SMALRZRM:
-	long		0x3ffd0000,0x9a209a84,0xfbcff798	# log10(2)
-	long		0x40000000,0xadf85458,0xa2bb4a9a	# e
-	long		0x3fff0000,0xb8aa3b29,0x5c17f0bb	# log2(e)
-	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
-	long		0x00000000,0x00000000,0x00000000	# 0.0
-
-SMALRP:	long		0x3ffd0000,0x9a209a84,0xfbcff799	# log10(2)
-	long		0x40000000,0xadf85458,0xa2bb4a9b	# e
-	long		0x3fff0000,0xb8aa3b29,0x5c17f0bc	# log2(e)
-	long		0x3ffd0000,0xde5bd8a9,0x37287195	# log10(e)
-	long		0x00000000,0x00000000,0x00000000	# 0.0
-
-BIGRN:	long		0x3ffe0000,0xb17217f7,0xd1cf79ac	# ln(2)
-	long		0x40000000,0x935d8ddd,0xaaa8ac17	# ln(10)
-
-	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
-	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
-	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
-	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
-	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
-	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
-	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
-	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
-	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
-	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
-	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
-	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
-	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
-	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
-
-BIGRZRM:
-	long		0x3ffe0000,0xb17217f7,0xd1cf79ab	# ln(2)
-	long		0x40000000,0x935d8ddd,0xaaa8ac16	# ln(10)
-
-	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
-	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
-	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
-	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
-	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
-	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
-	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
-	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
-	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
-	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
-	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
-	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
-	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
-	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
-
-BIGRP:
-	long		0x3ffe0000,0xb17217f7,0xd1cf79ac	# ln(2)
-	long		0x40000000,0x935d8ddd,0xaaa8ac17	# ln(10)
-
-	long		0x3fff0000,0x80000000,0x00000000	# 10 ^ 0
-	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
-	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
-	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
-	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
-	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
-	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
-	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
-	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
-	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
-	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
-	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
-	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
-	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
-
-#########################################################################
-# sscale(): computes the destination operand scaled by the source	#
-#	    operand. If the absoulute value of the source operand is	#
-#	    >= 2^14, an overflow or underflow is returned.		#
-#									#
-# INPUT *************************************************************** #
-#	a0  = pointer to double-extended source operand X		#
-#	a1  = pointer to double-extended destination operand Y		#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 =  scale(X,Y)						#
-#									#
-#########################################################################
-
-set	SIGN,		L_SCR1
-
-	global		sscale
-sscale:
-	mov.l		%d0,-(%sp)		# store off ctrl bits for now
-
-	mov.w		DST_EX(%a1),%d1		# get dst exponent
-	smi.b		SIGN(%a6)		# use SIGN to hold dst sign
-	andi.l		&0x00007fff,%d1		# strip sign from dst exp
-
-	mov.w		SRC_EX(%a0),%d0		# check src bounds
-	andi.w		&0x7fff,%d0		# clr src sign bit
-	cmpi.w		%d0,&0x3fff		# is src ~ ZERO?
-	blt.w		src_small		# yes
-	cmpi.w		%d0,&0x400c		# no; is src too big?
-	bgt.w		src_out			# yes
-
-#
-# Source is within 2^14 range.
-#
-src_ok:
-	fintrz.x	SRC(%a0),%fp0		# calc int of src
-	fmov.l		%fp0,%d0		# int src to d0
-# don't want any accrued bits from the fintrz showing up later since
-# we may need to read the fpsr for the last fp op in t_catch2().
-	fmov.l		&0x0,%fpsr
-
-	tst.b		DST_HI(%a1)		# is dst denormalized?
-	bmi.b		sok_norm
-
-# the dst is a DENORM. normalize the DENORM and add the adjustment to
-# the src value. then, jump to the norm part of the routine.
-sok_dnrm:
-	mov.l		%d0,-(%sp)		# save src for now
-
-	mov.w		DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
-	mov.l		DST_HI(%a1),FP_SCR0_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
-
-	lea		FP_SCR0(%a6),%a0	# pass ptr to DENORM
-	bsr.l		norm			# normalize the DENORM
-	neg.l		%d0
-	add.l		(%sp)+,%d0		# add adjustment to src
-
-	fmovm.x		FP_SCR0(%a6),&0x80	# load normalized DENORM
-
-	cmpi.w		%d0,&-0x3fff		# is the shft amt really low?
-	bge.b		sok_norm2		# thank goodness no
-
-# the multiply factor that we're trying to create should be a denorm
-# for the multiply to work. Therefore, we're going to actually do a
-# multiply with a denorm which will cause an unimplemented data type
-# exception to be put into the machine which will be caught and corrected
-# later. we don't do this with the DENORMs above because this method
-# is slower. but, don't fret, I don't see it being used much either.
-	fmov.l		(%sp)+,%fpcr		# restore user fpcr
-	mov.l		&0x80000000,%d1		# load normalized mantissa
-	subi.l		&-0x3fff,%d0		# how many should we shift?
-	neg.l		%d0			# make it positive
-	cmpi.b		%d0,&0x20		# is it > 32?
-	bge.b		sok_dnrm_32		# yes
-	lsr.l		%d0,%d1			# no; bit stays in upper lw
-	clr.l		-(%sp)			# insert zero low mantissa
-	mov.l		%d1,-(%sp)		# insert new high mantissa
-	clr.l		-(%sp)			# make zero exponent
-	bra.b		sok_norm_cont
-sok_dnrm_32:
-	subi.b		&0x20,%d0		# get shift count
-	lsr.l		%d0,%d1			# make low mantissa longword
-	mov.l		%d1,-(%sp)		# insert new low mantissa
-	clr.l		-(%sp)			# insert zero high mantissa
-	clr.l		-(%sp)			# make zero exponent
-	bra.b		sok_norm_cont
-
-# the src will force the dst to a DENORM value or worse. so, let's
-# create an fp multiply that will create the result.
-sok_norm:
-	fmovm.x		DST(%a1),&0x80		# load fp0 with normalized src
-sok_norm2:
-	fmov.l		(%sp)+,%fpcr		# restore user fpcr
-
-	addi.w		&0x3fff,%d0		# turn src amt into exp value
-	swap		%d0			# put exponent in high word
-	clr.l		-(%sp)			# insert new exponent
-	mov.l		&0x80000000,-(%sp)	# insert new high mantissa
-	mov.l		%d0,-(%sp)		# insert new lo mantissa
-
-sok_norm_cont:
-	fmov.l		%fpcr,%d0		# d0 needs fpcr for t_catch2
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.x		(%sp)+,%fp0		# do the multiply
-	bra		t_catch2		# catch any exceptions
-
-#
-# Source is outside of 2^14 range.  Test the sign and branch
-# to the appropriate exception handler.
-#
-src_out:
-	mov.l		(%sp)+,%d0		# restore ctrl bits
-	exg		%a0,%a1			# swap src,dst ptrs
-	tst.b		SRC_EX(%a1)		# is src negative?
-	bmi		t_unfl			# yes; underflow
-	bra		t_ovfl_sc		# no; overflow
-
-#
-# The source input is below 1, so we check for denormalized numbers
-# and set unfl.
-#
-src_small:
-	tst.b		DST_HI(%a1)		# is dst denormalized?
-	bpl.b		ssmall_done		# yes
-
-	mov.l		(%sp)+,%d0
-	fmov.l		%d0,%fpcr		# no; load control bits
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		DST(%a1),%fp0		# simply return dest
-	bra		t_catch2
-ssmall_done:
-	mov.l		(%sp)+,%d0		# load control bits into d1
-	mov.l		%a1,%a0			# pass ptr to dst
-	bra		t_resdnrm
-
-#########################################################################
-# smod(): computes the fp MOD of the input values X,Y.			#
-# srem(): computes the fp (IEEE) REM of the input values X,Y.		#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer to extended precision input X			#
-#	a1 = pointer to extended precision input Y			#
-#	d0 = round precision,mode					#
-#									#
-#	The input operands X and Y can be either normalized or		#
-#	denormalized.							#
-#									#
-# OUTPUT ************************************************************** #
-#      fp0 = FREM(X,Y) or FMOD(X,Y)					#
-#									#
-# ALGORITHM *********************************************************** #
-#									#
-#       Step 1.  Save and strip signs of X and Y: signX := sign(X),	#
-#                signY := sign(Y), X := |X|, Y := |Y|,			#
-#                signQ := signX EOR signY. Record whether MOD or REM	#
-#                is requested.						#
-#									#
-#       Step 2.  Set L := expo(X)-expo(Y), k := 0, Q := 0.		#
-#                If (L < 0) then					#
-#                   R := X, go to Step 4.				#
-#                else							#
-#                   R := 2^(-L)X, j := L.				#
-#                endif							#
-#									#
-#       Step 3.  Perform MOD(X,Y)					#
-#            3.1 If R = Y, go to Step 9.				#
-#            3.2 If R > Y, then { R := R - Y, Q := Q + 1}		#
-#            3.3 If j = 0, go to Step 4.				#
-#            3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to	#
-#                Step 3.1.						#
-#									#
-#       Step 4.  At this point, R = X - QY = MOD(X,Y). Set		#
-#                Last_Subtract := false (used in Step 7 below). If	#
-#                MOD is requested, go to Step 6.			#
-#									#
-#       Step 5.  R = MOD(X,Y), but REM(X,Y) is requested.		#
-#            5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to	#
-#                Step 6.						#
-#            5.2 If R > Y/2, then { set Last_Subtract := true,		#
-#                Q := Q + 1, Y := signY*Y }. Go to Step 6.		#
-#            5.3 This is the tricky case of R = Y/2. If Q is odd,	#
-#                then { Q := Q + 1, signX := -signX }.			#
-#									#
-#       Step 6.  R := signX*R.						#
-#									#
-#       Step 7.  If Last_Subtract = true, R := R - Y.			#
-#									#
-#       Step 8.  Return signQ, last 7 bits of Q, and R as required.	#
-#									#
-#       Step 9.  At this point, R = 2^(-j)*X - Q Y = Y. Thus,		#
-#                X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),		#
-#                R := 0. Return signQ, last 7 bits of Q, and R.		#
-#									#
-#########################################################################
-
-	set		Mod_Flag,L_SCR3
-	set		Sc_Flag,L_SCR3+1
-
-	set		SignY,L_SCR2
-	set		SignX,L_SCR2+2
-	set		SignQ,L_SCR3+2
-
-	set		Y,FP_SCR0
-	set		Y_Hi,Y+4
-	set		Y_Lo,Y+8
-
-	set		R,FP_SCR1
-	set		R_Hi,R+4
-	set		R_Lo,R+8
-
-Scale:
-	long		0x00010000,0x80000000,0x00000000,0x00000000
-
-	global		smod
-smod:
-	clr.b		FPSR_QBYTE(%a6)
-	mov.l		%d0,-(%sp)		# save ctrl bits
-	clr.b		Mod_Flag(%a6)
-	bra.b		Mod_Rem
-
-	global		srem
-srem:
-	clr.b		FPSR_QBYTE(%a6)
-	mov.l		%d0,-(%sp)		# save ctrl bits
-	mov.b		&0x1,Mod_Flag(%a6)
-
-Mod_Rem:
-#..Save sign of X and Y
-	movm.l		&0x3f00,-(%sp)		# save data registers
-	mov.w		SRC_EX(%a0),%d3
-	mov.w		%d3,SignY(%a6)
-	and.l		&0x00007FFF,%d3		# Y := |Y|
-
-#
-	mov.l		SRC_HI(%a0),%d4
-	mov.l		SRC_LO(%a0),%d5		# (D3,D4,D5) is |Y|
-
-	tst.l		%d3
-	bne.b		Y_Normal
-
-	mov.l		&0x00003FFE,%d3		# $3FFD + 1
-	tst.l		%d4
-	bne.b		HiY_not0
-
-HiY_0:
-	mov.l		%d5,%d4
-	clr.l		%d5
-	sub.l		&32,%d3
-	clr.l		%d6
-	bfffo		%d4{&0:&32},%d6
-	lsl.l		%d6,%d4
-	sub.l		%d6,%d3			# (D3,D4,D5) is normalized
-#	                                        ...with bias $7FFD
-	bra.b		Chk_X
-
-HiY_not0:
-	clr.l		%d6
-	bfffo		%d4{&0:&32},%d6
-	sub.l		%d6,%d3
-	lsl.l		%d6,%d4
-	mov.l		%d5,%d7			# a copy of D5
-	lsl.l		%d6,%d5
-	neg.l		%d6
-	add.l		&32,%d6
-	lsr.l		%d6,%d7
-	or.l		%d7,%d4			# (D3,D4,D5) normalized
-#                                       ...with bias $7FFD
-	bra.b		Chk_X
-
-Y_Normal:
-	add.l		&0x00003FFE,%d3		# (D3,D4,D5) normalized
-#                                       ...with bias $7FFD
-
-Chk_X:
-	mov.w		DST_EX(%a1),%d0
-	mov.w		%d0,SignX(%a6)
-	mov.w		SignY(%a6),%d1
-	eor.l		%d0,%d1
-	and.l		&0x00008000,%d1
-	mov.w		%d1,SignQ(%a6)		# sign(Q) obtained
-	and.l		&0x00007FFF,%d0
-	mov.l		DST_HI(%a1),%d1
-	mov.l		DST_LO(%a1),%d2		# (D0,D1,D2) is |X|
-	tst.l		%d0
-	bne.b		X_Normal
-	mov.l		&0x00003FFE,%d0
-	tst.l		%d1
-	bne.b		HiX_not0
-
-HiX_0:
-	mov.l		%d2,%d1
-	clr.l		%d2
-	sub.l		&32,%d0
-	clr.l		%d6
-	bfffo		%d1{&0:&32},%d6
-	lsl.l		%d6,%d1
-	sub.l		%d6,%d0			# (D0,D1,D2) is normalized
-#                                       ...with bias $7FFD
-	bra.b		Init
-
-HiX_not0:
-	clr.l		%d6
-	bfffo		%d1{&0:&32},%d6
-	sub.l		%d6,%d0
-	lsl.l		%d6,%d1
-	mov.l		%d2,%d7			# a copy of D2
-	lsl.l		%d6,%d2
-	neg.l		%d6
-	add.l		&32,%d6
-	lsr.l		%d6,%d7
-	or.l		%d7,%d1			# (D0,D1,D2) normalized
-#                                       ...with bias $7FFD
-	bra.b		Init
-
-X_Normal:
-	add.l		&0x00003FFE,%d0		# (D0,D1,D2) normalized
-#                                       ...with bias $7FFD
-
-Init:
-#
-	mov.l		%d3,L_SCR1(%a6)		# save biased exp(Y)
-	mov.l		%d0,-(%sp)		# save biased exp(X)
-	sub.l		%d3,%d0			# L := expo(X)-expo(Y)
-
-	clr.l		%d6			# D6 := carry <- 0
-	clr.l		%d3			# D3 is Q
-	mov.l		&0,%a1			# A1 is k; j+k=L, Q=0
-
-#..(Carry,D1,D2) is R
-	tst.l		%d0
-	bge.b		Mod_Loop_pre
-
-#..expo(X) < expo(Y). Thus X = mod(X,Y)
-#
-	mov.l		(%sp)+,%d0		# restore d0
-	bra.w		Get_Mod
-
-Mod_Loop_pre:
-	addq.l		&0x4,%sp		# erase exp(X)
-#..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
-Mod_Loop:
-	tst.l		%d6			# test carry bit
-	bgt.b		R_GT_Y
-
-#..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
-	cmp.l		%d1,%d4			# compare hi(R) and hi(Y)
-	bne.b		R_NE_Y
-	cmp.l		%d2,%d5			# compare lo(R) and lo(Y)
-	bne.b		R_NE_Y
-
-#..At this point, R = Y
-	bra.w		Rem_is_0
-
-R_NE_Y:
-#..use the borrow of the previous compare
-	bcs.b		R_LT_Y			# borrow is set iff R < Y
-
-R_GT_Y:
-#..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
-#..and Y < (D1,D2) < 2Y. Either way, perform R - Y
-	sub.l		%d5,%d2			# lo(R) - lo(Y)
-	subx.l		%d4,%d1			# hi(R) - hi(Y)
-	clr.l		%d6			# clear carry
-	addq.l		&1,%d3			# Q := Q + 1
-
-R_LT_Y:
-#..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
-	tst.l		%d0			# see if j = 0.
-	beq.b		PostLoop
-
-	add.l		%d3,%d3			# Q := 2Q
-	add.l		%d2,%d2			# lo(R) = 2lo(R)
-	roxl.l		&1,%d1			# hi(R) = 2hi(R) + carry
-	scs		%d6			# set Carry if 2(R) overflows
-	addq.l		&1,%a1			# k := k+1
-	subq.l		&1,%d0			# j := j - 1
-#..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
-
-	bra.b		Mod_Loop
-
-PostLoop:
-#..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
-
-#..normalize R.
-	mov.l		L_SCR1(%a6),%d0		# new biased expo of R
-	tst.l		%d1
-	bne.b		HiR_not0
-
-HiR_0:
-	mov.l		%d2,%d1
-	clr.l		%d2
-	sub.l		&32,%d0
-	clr.l		%d6
-	bfffo		%d1{&0:&32},%d6
-	lsl.l		%d6,%d1
-	sub.l		%d6,%d0			# (D0,D1,D2) is normalized
-#                                       ...with bias $7FFD
-	bra.b		Get_Mod
-
-HiR_not0:
-	clr.l		%d6
-	bfffo		%d1{&0:&32},%d6
-	bmi.b		Get_Mod			# already normalized
-	sub.l		%d6,%d0
-	lsl.l		%d6,%d1
-	mov.l		%d2,%d7			# a copy of D2
-	lsl.l		%d6,%d2
-	neg.l		%d6
-	add.l		&32,%d6
-	lsr.l		%d6,%d7
-	or.l		%d7,%d1			# (D0,D1,D2) normalized
-
-#
-Get_Mod:
-	cmp.l		%d0,&0x000041FE
-	bge.b		No_Scale
-Do_Scale:
-	mov.w		%d0,R(%a6)
-	mov.l		%d1,R_Hi(%a6)
-	mov.l		%d2,R_Lo(%a6)
-	mov.l		L_SCR1(%a6),%d6
-	mov.w		%d6,Y(%a6)
-	mov.l		%d4,Y_Hi(%a6)
-	mov.l		%d5,Y_Lo(%a6)
-	fmov.x		R(%a6),%fp0		# no exception
-	mov.b		&1,Sc_Flag(%a6)
-	bra.b		ModOrRem
-No_Scale:
-	mov.l		%d1,R_Hi(%a6)
-	mov.l		%d2,R_Lo(%a6)
-	sub.l		&0x3FFE,%d0
-	mov.w		%d0,R(%a6)
-	mov.l		L_SCR1(%a6),%d6
-	sub.l		&0x3FFE,%d6
-	mov.l		%d6,L_SCR1(%a6)
-	fmov.x		R(%a6),%fp0
-	mov.w		%d6,Y(%a6)
-	mov.l		%d4,Y_Hi(%a6)
-	mov.l		%d5,Y_Lo(%a6)
-	clr.b		Sc_Flag(%a6)
-
-#
-ModOrRem:
-	tst.b		Mod_Flag(%a6)
-	beq.b		Fix_Sign
-
-	mov.l		L_SCR1(%a6),%d6		# new biased expo(Y)
-	subq.l		&1,%d6			# biased expo(Y/2)
-	cmp.l		%d0,%d6
-	blt.b		Fix_Sign
-	bgt.b		Last_Sub
-
-	cmp.l		%d1,%d4
-	bne.b		Not_EQ
-	cmp.l		%d2,%d5
-	bne.b		Not_EQ
-	bra.w		Tie_Case
-
-Not_EQ:
-	bcs.b		Fix_Sign
-
-Last_Sub:
-#
-	fsub.x		Y(%a6),%fp0		# no exceptions
-	addq.l		&1,%d3			# Q := Q + 1
-
-#
-Fix_Sign:
-#..Get sign of X
-	mov.w		SignX(%a6),%d6
-	bge.b		Get_Q
-	fneg.x		%fp0
-
-#..Get Q
-#
-Get_Q:
-	clr.l		%d6
-	mov.w		SignQ(%a6),%d6		# D6 is sign(Q)
-	mov.l		&8,%d7
-	lsr.l		%d7,%d6
-	and.l		&0x0000007F,%d3		# 7 bits of Q
-	or.l		%d6,%d3			# sign and bits of Q
-#	swap		%d3
-#	fmov.l		%fpsr,%d6
-#	and.l		&0xFF00FFFF,%d6
-#	or.l		%d3,%d6
-#	fmov.l		%d6,%fpsr		# put Q in fpsr
-	mov.b		%d3,FPSR_QBYTE(%a6)	# put Q in fpsr
-
-#
-Restore:
-	movm.l		(%sp)+,&0xfc		#  {%d2-%d7}
-	mov.l		(%sp)+,%d0
-	fmov.l		%d0,%fpcr
-	tst.b		Sc_Flag(%a6)
-	beq.b		Finish
-	mov.b		&FMUL_OP,%d1		# last inst is MUL
-	fmul.x		Scale(%pc),%fp0		# may cause underflow
-	bra		t_catch2
-# the '040 package did this apparently to see if the dst operand for the
-# preceding fmul was a denorm. but, it better not have been since the
-# algorithm just got done playing with fp0 and expected no exceptions
-# as a result. trust me...
-#	bra		t_avoid_unsupp		# check for denorm as a
-#						;result of the scaling
-
-Finish:
-	mov.b		&FMOV_OP,%d1		# last inst is MOVE
-	fmov.x		%fp0,%fp0		# capture exceptions & round
-	bra		t_catch2
-
-Rem_is_0:
-#..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
-	addq.l		&1,%d3
-	cmp.l		%d0,&8			# D0 is j
-	bge.b		Q_Big
-
-	lsl.l		%d0,%d3
-	bra.b		Set_R_0
-
-Q_Big:
-	clr.l		%d3
-
-Set_R_0:
-	fmov.s		&0x00000000,%fp0
-	clr.b		Sc_Flag(%a6)
-	bra.w		Fix_Sign
-
-Tie_Case:
-#..Check parity of Q
-	mov.l		%d3,%d6
-	and.l		&0x00000001,%d6
-	tst.l		%d6
-	beq.w		Fix_Sign		# Q is even
-
-#..Q is odd, Q := Q + 1, signX := -signX
-	addq.l		&1,%d3
-	mov.w		SignX(%a6),%d6
-	eor.l		&0x00008000,%d6
-	mov.w		%d6,SignX(%a6)
-	bra.w		Fix_Sign
-
-qnan:	long		0x7fff0000, 0xffffffff, 0xffffffff
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	t_dz(): Handle DZ exception during transcendental emulation.	#
-#	        Sets N bit according to sign of source operand.		#
-#	t_dz2(): Handle DZ exception during transcendental emulation.	#
-#		 Sets N bit always.					#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to source operand					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = default result						#
-#									#
-# ALGORITHM ***********************************************************	#
-#	- Store properly signed INF into fp0.				#
-#	- Set FPSR exception status dz bit, ccode inf bit, and		#
-#	  accrued dz bit.						#
-#									#
-#########################################################################
-
-	global		t_dz
-t_dz:
-	tst.b		SRC_EX(%a0)		# no; is src negative?
-	bmi.b		t_dz2			# yes
-
-dz_pinf:
-	fmov.s		&0x7f800000,%fp0	# return +INF in fp0
-	ori.l		&dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
-	rts
-
-	global		t_dz2
-t_dz2:
-	fmov.s		&0xff800000,%fp0	# return -INF in fp0
-	ori.l		&dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
-	rts
-
-#################################################################
-# OPERR exception:						#
-#	- set FPSR exception status operr bit, condition code	#
-#	  nan bit; Store default NAN into fp0			#
-#################################################################
-	global		t_operr
-t_operr:
-	ori.l		&opnan_mask,USER_FPSR(%a6) # set NaN/OPERR/AIOP
-	fmovm.x		qnan(%pc),&0x80		# return default NAN in fp0
-	rts
-
-#################################################################
-# Extended DENORM:						#
-#	- For all functions that have a denormalized input and	#
-#	  that f(x)=x, this is the entry point.			#
-#	- we only return the EXOP here if either underflow or	#
-#	  inexact is enabled.					#
-#################################################################
-
-# Entry point for scale w/ extended denorm. The function does
-# NOT set INEX2/AUNFL/AINEX.
-	global		t_resdnrm
-t_resdnrm:
-	ori.l		&unfl_mask,USER_FPSR(%a6) # set UNFL
-	bra.b		xdnrm_con
-
-	global		t_extdnrm
-t_extdnrm:
-	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
-
-xdnrm_con:
-	mov.l		%a0,%a1			# make copy of src ptr
-	mov.l		%d0,%d1			# make copy of rnd prec,mode
-	andi.b		&0xc0,%d1		# extended precision?
-	bne.b		xdnrm_sd		# no
-
-# result precision is extended.
-	tst.b		LOCAL_EX(%a0)		# is denorm negative?
-	bpl.b		xdnrm_exit		# no
-
-	bset		&neg_bit,FPSR_CC(%a6)	# yes; set 'N' ccode bit
-	bra.b		xdnrm_exit
-
-# result precision is single or double
-xdnrm_sd:
-	mov.l		%a1,-(%sp)
-	tst.b		LOCAL_EX(%a0)		# is denorm pos or neg?
-	smi.b		%d1			# set d0 accordingly
-	bsr.l		unf_sub
-	mov.l		(%sp)+,%a1
-xdnrm_exit:
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-
-	mov.b		FPCR_ENABLE(%a6),%d0
-	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
-	bne.b		xdnrm_ena		# yes
-	rts
-
-################
-# unfl enabled #
-################
-# we have a DENORM that needs to be converted into an EXOP.
-# so, normalize the mantissa, add 0x6000 to the new exponent,
-# and return the result in fp1.
-xdnrm_ena:
-	mov.w		LOCAL_EX(%a1),FP_SCR0_EX(%a6)
-	mov.l		LOCAL_HI(%a1),FP_SCR0_HI(%a6)
-	mov.l		LOCAL_LO(%a1),FP_SCR0_LO(%a6)
-
-	lea		FP_SCR0(%a6),%a0
-	bsr.l		norm			# normalize mantissa
-	addi.l		&0x6000,%d0		# add extra bias
-	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep old sign
-	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
-
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	rts
-
-#################################################################
-# UNFL exception:						#
-#	- This routine is for cases where even an EXOP isn't	#
-#	  large enough to hold the range of this result.	#
-#	  In such a case, the EXOP equals zero.			#
-#	- Return the default result to the proper precision	#
-#	  with the sign of this result being the same as that	#
-#	  of the src operand.					#
-#	- t_unfl2() is provided to force the result sign to	#
-#	  positive which is the desired result for fetox().	#
-#################################################################
-	global		t_unfl
-t_unfl:
-	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
-
-	tst.b		(%a0)			# is result pos or neg?
-	smi.b		%d1			# set d1 accordingly
-	bsr.l		unf_sub			# calc default unfl result
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-
-	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
-	rts
-
-# t_unfl2 ALWAYS tells unf_sub to create a positive result
-	global		t_unfl2
-t_unfl2:
-	ori.l		&unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
-
-	sf.b		%d1			# set d0 to represent positive
-	bsr.l		unf_sub			# calc default unfl result
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-
-	fmov.s		&0x0000000,%fp1		# return EXOP in fp1
-	rts
-
-#################################################################
-# OVFL exception:						#
-#	- This routine is for cases where even an EXOP isn't	#
-#	  large enough to hold the range of this result.	#
-#	- Return the default result to the proper precision	#
-#	  with the sign of this result being the same as that	#
-#	  of the src operand.					#
-#	- t_ovfl2() is provided to force the result sign to	#
-#	  positive which is the desired result for fcosh().	#
-#	- t_ovfl_sc() is provided for scale() which only sets	#
-#	  the inexact bits if the number is inexact for the	#
-#	  precision indicated.					#
-#################################################################
-
-	global		t_ovfl_sc
-t_ovfl_sc:
-	ori.l		&ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
-
-	mov.b		%d0,%d1			# fetch rnd mode/prec
-	andi.b		&0xc0,%d1		# extract rnd prec
-	beq.b		ovfl_work		# prec is extended
-
-	tst.b		LOCAL_HI(%a0)		# is dst a DENORM?
-	bmi.b		ovfl_sc_norm		# no
-
-# dst op is a DENORM. we have to normalize the mantissa to see if the
-# result would be inexact for the given precision. make a copy of the
-# dst so we don't screw up the version passed to us.
-	mov.w		LOCAL_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		LOCAL_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		LOCAL_LO(%a0),FP_SCR0_LO(%a6)
-	lea		FP_SCR0(%a6),%a0	# pass ptr to FP_SCR0
-	movm.l		&0xc080,-(%sp)		# save d0-d1/a0
-	bsr.l		norm			# normalize mantissa
-	movm.l		(%sp)+,&0x0103		# restore d0-d1/a0
-
-ovfl_sc_norm:
-	cmpi.b		%d1,&0x40		# is prec dbl?
-	bne.b		ovfl_sc_dbl		# no; sgl
-ovfl_sc_sgl:
-	tst.l		LOCAL_LO(%a0)		# is lo lw of sgl set?
-	bne.b		ovfl_sc_inx		# yes
-	tst.b		3+LOCAL_HI(%a0)		# is lo byte of hi lw set?
-	bne.b		ovfl_sc_inx		# yes
-	bra.b		ovfl_work		# don't set INEX2
-ovfl_sc_dbl:
-	mov.l		LOCAL_LO(%a0),%d1	# are any of lo 11 bits of
-	andi.l		&0x7ff,%d1		# dbl mantissa set?
-	beq.b		ovfl_work		# no; don't set INEX2
-ovfl_sc_inx:
-	ori.l		&inex2_mask,USER_FPSR(%a6) # set INEX2
-	bra.b		ovfl_work		# continue
-
-	global		t_ovfl
-t_ovfl:
-	ori.l		&ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
-
-ovfl_work:
-	tst.b		LOCAL_EX(%a0)		# what is the sign?
-	smi.b		%d1			# set d1 accordingly
-	bsr.l		ovf_res			# calc default ovfl result
-	mov.b		%d0,FPSR_CC(%a6)	# insert new ccodes
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-
-	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
-	rts
-
-# t_ovfl2 ALWAYS tells ovf_res to create a positive result
-	global		t_ovfl2
-t_ovfl2:
-	ori.l		&ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
-
-	sf.b		%d1			# clear sign flag for positive
-	bsr.l		ovf_res			# calc default ovfl result
-	mov.b		%d0,FPSR_CC(%a6)	# insert new ccodes
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-
-	fmov.s		&0x00000000,%fp1	# return EXOP in fp1
-	rts
-
-#################################################################
-# t_catch():							#
-#	- the last operation of a transcendental emulation	#
-#	  routine may have caused an underflow or overflow.	#
-#	  we find out if this occurred by doing an fsave and	#
-#	  checking the exception bit. if one did occur, then we	#
-#	  jump to fgen_except() which creates the default	#
-#	  result and EXOP for us.				#
-#################################################################
-	global		t_catch
-t_catch:
-
-	fsave		-(%sp)
-	tst.b		0x2(%sp)
-	bmi.b		catch
-	add.l		&0xc,%sp
-
-#################################################################
-# INEX2 exception:						#
-#	- The inex2 and ainex bits are set.			#
-#################################################################
-	global		t_inx2
-t_inx2:
-	fblt.w		t_minx2
-	fbeq.w		inx2_zero
-
-	global		t_pinx2
-t_pinx2:
-	ori.w		&inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
-	rts
-
-	global		t_minx2
-t_minx2:
-	ori.l		&inx2a_mask+neg_mask,USER_FPSR(%a6) # set N/INEX2/AINEX
-	rts
-
-inx2_zero:
-	mov.b		&z_bmask,FPSR_CC(%a6)
-	ori.w		&inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
-	rts
-
-# an underflow or overflow exception occurred.
-# we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not!
-catch:
-	ori.w		&inx2a_mask,FPSR_EXCEPT(%a6)
-catch2:
-	bsr.l		fgen_except
-	add.l		&0xc,%sp
-	rts
-
-	global		t_catch2
-t_catch2:
-
-	fsave		-(%sp)
-
-	tst.b		0x2(%sp)
-	bmi.b		catch2
-	add.l		&0xc,%sp
-
-	fmov.l		%fpsr,%d0
-	or.l		%d0,USER_FPSR(%a6)
-
-	rts
-
-#########################################################################
-
-#########################################################################
-# unf_res(): underflow default result calculation for transcendentals	#
-#									#
-# INPUT:								#
-#	d0   : rnd mode,precision					#
-#	d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+))	#
-# OUTPUT:								#
-#	a0   : points to result (in instruction memory)			#
-#########################################################################
-unf_sub:
-	ori.l		&unfinx_mask,USER_FPSR(%a6)
-
-	andi.w		&0x10,%d1		# keep sign bit in 4th spot
-
-	lsr.b		&0x4,%d0		# shift rnd prec,mode to lo bits
-	andi.b		&0xf,%d0		# strip hi rnd mode bit
-	or.b		%d1,%d0			# concat {sgn,mode,prec}
-
-	mov.l		%d0,%d1			# make a copy
-	lsl.b		&0x1,%d1		# mult index 2 by 2
-
-	mov.b		(tbl_unf_cc.b,%pc,%d0.w*1),FPSR_CC(%a6) # insert ccode bits
-	lea		(tbl_unf_result.b,%pc,%d1.w*8),%a0 # grab result ptr
-	rts
-
-tbl_unf_cc:
-	byte		0x4, 0x4, 0x4, 0x0
-	byte		0x4, 0x4, 0x4, 0x0
-	byte		0x4, 0x4, 0x4, 0x0
-	byte		0x0, 0x0, 0x0, 0x0
-	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
-	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
-	byte		0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
-
-tbl_unf_result:
-	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
-	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
-	long		0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
-	long		0x00000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
-
-	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
-	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
-	long		0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
-	long		0x3f810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
-
-	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
-	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZER0;dbl
-	long		0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
-	long		0x3c010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
-
-	long		0x0,0x0,0x0,0x0
-	long		0x0,0x0,0x0,0x0
-	long		0x0,0x0,0x0,0x0
-	long		0x0,0x0,0x0,0x0
-
-	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
-	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
-	long		0x80000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
-	long		0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
-
-	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
-	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
-	long		0xbf810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
-	long		0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
-
-	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
-	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
-	long		0xbc010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
-	long		0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
-
-############################################################
-
-#########################################################################
-# src_zero(): Return signed zero according to sign of src operand.	#
-#########################################################################
-	global		src_zero
-src_zero:
-	tst.b		SRC_EX(%a0)		# get sign of src operand
-	bmi.b		ld_mzero		# if neg, load neg zero
-
-#
-# ld_pzero(): return a positive zero.
-#
-	global		ld_pzero
-ld_pzero:
-	fmov.s		&0x00000000,%fp0	# load +0
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
-	rts
-
-# ld_mzero(): return a negative zero.
-	global		ld_mzero
-ld_mzero:
-	fmov.s		&0x80000000,%fp0	# load -0
-	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
-	rts
-
-#########################################################################
-# dst_zero(): Return signed zero according to sign of dst operand.	#
-#########################################################################
-	global		dst_zero
-dst_zero:
-	tst.b		DST_EX(%a1)		# get sign of dst operand
-	bmi.b		ld_mzero		# if neg, load neg zero
-	bra.b		ld_pzero		# load positive zero
-
-#########################################################################
-# src_inf(): Return signed inf according to sign of src operand.	#
-#########################################################################
-	global		src_inf
-src_inf:
-	tst.b		SRC_EX(%a0)		# get sign of src operand
-	bmi.b		ld_minf			# if negative branch
-
-#
-# ld_pinf(): return a positive infinity.
-#
-	global		ld_pinf
-ld_pinf:
-	fmov.s		&0x7f800000,%fp0	# load +INF
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'INF' ccode bit
-	rts
-
-#
-# ld_minf():return a negative infinity.
-#
-	global		ld_minf
-ld_minf:
-	fmov.s		&0xff800000,%fp0	# load -INF
-	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
-	rts
-
-#########################################################################
-# dst_inf(): Return signed inf according to sign of dst operand.	#
-#########################################################################
-	global		dst_inf
-dst_inf:
-	tst.b		DST_EX(%a1)		# get sign of dst operand
-	bmi.b		ld_minf			# if negative branch
-	bra.b		ld_pinf
-
-	global		szr_inf
-#################################################################
-# szr_inf(): Return +ZERO for a negative src operand or		#
-#	            +INF for a positive src operand.		#
-#	     Routine used for fetox, ftwotox, and ftentox.	#
-#################################################################
-szr_inf:
-	tst.b		SRC_EX(%a0)		# check sign of source
-	bmi.b		ld_pzero
-	bra.b		ld_pinf
-
-#########################################################################
-# sopr_inf(): Return +INF for a positive src operand or			#
-#	      jump to operand error routine for a negative src operand.	#
-#	      Routine used for flogn, flognp1, flog10, and flog2.	#
-#########################################################################
-	global		sopr_inf
-sopr_inf:
-	tst.b		SRC_EX(%a0)		# check sign of source
-	bmi.w		t_operr
-	bra.b		ld_pinf
-
-#################################################################
-# setoxm1i(): Return minus one for a negative src operand or	#
-#	      positive infinity for a positive src operand.	#
-#	      Routine used for fetoxm1.				#
-#################################################################
-	global		setoxm1i
-setoxm1i:
-	tst.b		SRC_EX(%a0)		# check sign of source
-	bmi.b		ld_mone
-	bra.b		ld_pinf
-
-#########################################################################
-# src_one(): Return signed one according to sign of src operand.	#
-#########################################################################
-	global		src_one
-src_one:
-	tst.b		SRC_EX(%a0)		# check sign of source
-	bmi.b		ld_mone
-
-#
-# ld_pone(): return positive one.
-#
-	global		ld_pone
-ld_pone:
-	fmov.s		&0x3f800000,%fp0	# load +1
-	clr.b		FPSR_CC(%a6)
-	rts
-
-#
-# ld_mone(): return negative one.
-#
-	global		ld_mone
-ld_mone:
-	fmov.s		&0xbf800000,%fp0	# load -1
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
-	rts
-
-ppiby2:	long		0x3fff0000, 0xc90fdaa2, 0x2168c235
-mpiby2:	long		0xbfff0000, 0xc90fdaa2, 0x2168c235
-
-#################################################################
-# spi_2(): Return signed PI/2 according to sign of src operand.	#
-#################################################################
-	global		spi_2
-spi_2:
-	tst.b		SRC_EX(%a0)		# check sign of source
-	bmi.b		ld_mpi2
-
-#
-# ld_ppi2(): return positive PI/2.
-#
-	global		ld_ppi2
-ld_ppi2:
-	fmov.l		%d0,%fpcr
-	fmov.x		ppiby2(%pc),%fp0	# load +pi/2
-	bra.w		t_pinx2			# set INEX2
-
-#
-# ld_mpi2(): return negative PI/2.
-#
-	global		ld_mpi2
-ld_mpi2:
-	fmov.l		%d0,%fpcr
-	fmov.x		mpiby2(%pc),%fp0	# load -pi/2
-	bra.w		t_minx2			# set INEX2
-
-####################################################
-# The following routines give support for fsincos. #
-####################################################
-
-#
-# ssincosz(): When the src operand is ZERO, store a one in the
-#	      cosine register and return a ZERO in fp0 w/ the same sign
-#	      as the src operand.
-#
-	global		ssincosz
-ssincosz:
-	fmov.s		&0x3f800000,%fp1
-	tst.b		SRC_EX(%a0)		# test sign
-	bpl.b		sincoszp
-	fmov.s		&0x80000000,%fp0	# return sin result in fp0
-	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)
-	bra.b		sto_cos			# store cosine result
-sincoszp:
-	fmov.s		&0x00000000,%fp0	# return sin result in fp0
-	mov.b		&z_bmask,FPSR_CC(%a6)
-	bra.b		sto_cos			# store cosine result
-
-#
-# ssincosi(): When the src operand is INF, store a QNAN in the cosine
-#	      register and jump to the operand error routine for negative
-#	      src operands.
-#
-	global		ssincosi
-ssincosi:
-	fmov.x		qnan(%pc),%fp1		# load NAN
-	bsr.l		sto_cos			# store cosine result
-	bra.w		t_operr
-
-#
-# ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
-#		 register and branch to the src QNAN routine.
-#
-	global		ssincosqnan
-ssincosqnan:
-	fmov.x		LOCAL_EX(%a0),%fp1
-	bsr.l		sto_cos
-	bra.w		src_qnan
-
-#
-# ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set
-#		 in the cosine register and branch to the src SNAN routine.
-#
-	global		ssincossnan
-ssincossnan:
-	fmov.x		LOCAL_EX(%a0),%fp1
-	bsr.l		sto_cos
-	bra.w		src_snan
-
-########################################################################
-
-#########################################################################
-# sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field.	#
-#	     fp1 holds the result of the cosine portion of ssincos().	#
-#	     the value in fp1 will not take any exceptions when moved.	#
-# INPUT:								#
-#	fp1 : fp value to store						#
-# MODIFIED:								#
-#	d0								#
-#########################################################################
-	global		sto_cos
-sto_cos:
-	mov.b		1+EXC_CMDREG(%a6),%d0
-	andi.w		&0x7,%d0
-	mov.w		(tbl_sto_cos.b,%pc,%d0.w*2),%d0
-	jmp		(tbl_sto_cos.b,%pc,%d0.w*1)
-
-tbl_sto_cos:
-	short		sto_cos_0 - tbl_sto_cos
-	short		sto_cos_1 - tbl_sto_cos
-	short		sto_cos_2 - tbl_sto_cos
-	short		sto_cos_3 - tbl_sto_cos
-	short		sto_cos_4 - tbl_sto_cos
-	short		sto_cos_5 - tbl_sto_cos
-	short		sto_cos_6 - tbl_sto_cos
-	short		sto_cos_7 - tbl_sto_cos
-
-sto_cos_0:
-	fmovm.x		&0x40,EXC_FP0(%a6)
-	rts
-sto_cos_1:
-	fmovm.x		&0x40,EXC_FP1(%a6)
-	rts
-sto_cos_2:
-	fmov.x		%fp1,%fp2
-	rts
-sto_cos_3:
-	fmov.x		%fp1,%fp3
-	rts
-sto_cos_4:
-	fmov.x		%fp1,%fp4
-	rts
-sto_cos_5:
-	fmov.x		%fp1,%fp5
-	rts
-sto_cos_6:
-	fmov.x		%fp1,%fp6
-	rts
-sto_cos_7:
-	fmov.x		%fp1,%fp7
-	rts
-
-##################################################################
-	global		smod_sdnrm
-	global		smod_snorm
-smod_sdnrm:
-smod_snorm:
-	mov.b		DTAG(%a6),%d1
-	beq.l		smod
-	cmpi.b		%d1,&ZERO
-	beq.w		smod_zro
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		smod
-	cmpi.b		%d1,&SNAN
-	beq.l		dst_snan
-	bra.l		dst_qnan
-
-	global		smod_szero
-smod_szero:
-	mov.b		DTAG(%a6),%d1
-	beq.l		t_operr
-	cmpi.b		%d1,&ZERO
-	beq.l		t_operr
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		t_operr
-	cmpi.b		%d1,&QNAN
-	beq.l		dst_qnan
-	bra.l		dst_snan
-
-	global		smod_sinf
-smod_sinf:
-	mov.b		DTAG(%a6),%d1
-	beq.l		smod_fpn
-	cmpi.b		%d1,&ZERO
-	beq.l		smod_zro
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		smod_fpn
-	cmpi.b		%d1,&QNAN
-	beq.l		dst_qnan
-	bra.l		dst_snan
-
-smod_zro:
-srem_zro:
-	mov.b		SRC_EX(%a0),%d1		# get src sign
-	mov.b		DST_EX(%a1),%d0		# get dst sign
-	eor.b		%d0,%d1			# get qbyte sign
-	andi.b		&0x80,%d1
-	mov.b		%d1,FPSR_QBYTE(%a6)
-	tst.b		%d0
-	bpl.w		ld_pzero
-	bra.w		ld_mzero
-
-smod_fpn:
-srem_fpn:
-	clr.b		FPSR_QBYTE(%a6)
-	mov.l		%d0,-(%sp)
-	mov.b		SRC_EX(%a0),%d1		# get src sign
-	mov.b		DST_EX(%a1),%d0		# get dst sign
-	eor.b		%d0,%d1			# get qbyte sign
-	andi.b		&0x80,%d1
-	mov.b		%d1,FPSR_QBYTE(%a6)
-	cmpi.b		DTAG(%a6),&DENORM
-	bne.b		smod_nrm
-	lea		DST(%a1),%a0
-	mov.l		(%sp)+,%d0
-	bra		t_resdnrm
-smod_nrm:
-	fmov.l		(%sp)+,%fpcr
-	fmov.x		DST(%a1),%fp0
-	tst.b		DST_EX(%a1)
-	bmi.b		smod_nrm_neg
-	rts
-
-smod_nrm_neg:
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode
-	rts
-
-#########################################################################
-	global		srem_snorm
-	global		srem_sdnrm
-srem_sdnrm:
-srem_snorm:
-	mov.b		DTAG(%a6),%d1
-	beq.l		srem
-	cmpi.b		%d1,&ZERO
-	beq.w		srem_zro
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		srem
-	cmpi.b		%d1,&QNAN
-	beq.l		dst_qnan
-	bra.l		dst_snan
-
-	global		srem_szero
-srem_szero:
-	mov.b		DTAG(%a6),%d1
-	beq.l		t_operr
-	cmpi.b		%d1,&ZERO
-	beq.l		t_operr
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		t_operr
-	cmpi.b		%d1,&QNAN
-	beq.l		dst_qnan
-	bra.l		dst_snan
-
-	global		srem_sinf
-srem_sinf:
-	mov.b		DTAG(%a6),%d1
-	beq.w		srem_fpn
-	cmpi.b		%d1,&ZERO
-	beq.w		srem_zro
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		srem_fpn
-	cmpi.b		%d1,&QNAN
-	beq.l		dst_qnan
-	bra.l		dst_snan
-
-#########################################################################
-	global		sscale_snorm
-	global		sscale_sdnrm
-sscale_snorm:
-sscale_sdnrm:
-	mov.b		DTAG(%a6),%d1
-	beq.l		sscale
-	cmpi.b		%d1,&ZERO
-	beq.l		dst_zero
-	cmpi.b		%d1,&INF
-	beq.l		dst_inf
-	cmpi.b		%d1,&DENORM
-	beq.l		sscale
-	cmpi.b		%d1,&QNAN
-	beq.l		dst_qnan
-	bra.l		dst_snan
-
-	global		sscale_szero
-sscale_szero:
-	mov.b		DTAG(%a6),%d1
-	beq.l		sscale
-	cmpi.b		%d1,&ZERO
-	beq.l		dst_zero
-	cmpi.b		%d1,&INF
-	beq.l		dst_inf
-	cmpi.b		%d1,&DENORM
-	beq.l		sscale
-	cmpi.b		%d1,&QNAN
-	beq.l		dst_qnan
-	bra.l		dst_snan
-
-	global		sscale_sinf
-sscale_sinf:
-	mov.b		DTAG(%a6),%d1
-	beq.l		t_operr
-	cmpi.b		%d1,&QNAN
-	beq.l		dst_qnan
-	cmpi.b		%d1,&SNAN
-	beq.l		dst_snan
-	bra.l		t_operr
-
-########################################################################
-
-#
-# sop_sqnan(): The src op for frem/fmod/fscale was a QNAN.
-#
-	global		sop_sqnan
-sop_sqnan:
-	mov.b		DTAG(%a6),%d1
-	cmpi.b		%d1,&QNAN
-	beq.b		dst_qnan
-	cmpi.b		%d1,&SNAN
-	beq.b		dst_snan
-	bra.b		src_qnan
-
-#
-# sop_ssnan(): The src op for frem/fmod/fscale was an SNAN.
-#
-	global		sop_ssnan
-sop_ssnan:
-	mov.b		DTAG(%a6),%d1
-	cmpi.b		%d1,&QNAN
-	beq.b		dst_qnan_src_snan
-	cmpi.b		%d1,&SNAN
-	beq.b		dst_snan
-	bra.b		src_snan
-
-dst_qnan_src_snan:
-	ori.l		&snaniop_mask,USER_FPSR(%a6) # set NAN/SNAN/AIOP
-	bra.b		dst_qnan
-
-#
-# dst_qnan(): Return the dst SNAN w/ the SNAN bit set.
-#
-	global		dst_snan
-dst_snan:
-	fmov.x		DST(%a1),%fp0		# the fmove sets the SNAN bit
-	fmov.l		%fpsr,%d0		# catch resulting status
-	or.l		%d0,USER_FPSR(%a6)	# store status
-	rts
-
-#
-# dst_qnan(): Return the dst QNAN.
-#
-	global		dst_qnan
-dst_qnan:
-	fmov.x		DST(%a1),%fp0		# return the non-signalling nan
-	tst.b		DST_EX(%a1)		# set ccodes according to QNAN sign
-	bmi.b		dst_qnan_m
-dst_qnan_p:
-	mov.b		&nan_bmask,FPSR_CC(%a6)
-	rts
-dst_qnan_m:
-	mov.b		&neg_bmask+nan_bmask,FPSR_CC(%a6)
-	rts
-
-#
-# src_snan(): Return the src SNAN w/ the SNAN bit set.
-#
-	global		src_snan
-src_snan:
-	fmov.x		SRC(%a0),%fp0		# the fmove sets the SNAN bit
-	fmov.l		%fpsr,%d0		# catch resulting status
-	or.l		%d0,USER_FPSR(%a6)	# store status
-	rts
-
-#
-# src_qnan(): Return the src QNAN.
-#
-	global		src_qnan
-src_qnan:
-	fmov.x		SRC(%a0),%fp0		# return the non-signalling nan
-	tst.b		SRC_EX(%a0)		# set ccodes according to QNAN sign
-	bmi.b		dst_qnan_m
-src_qnan_p:
-	mov.b		&nan_bmask,FPSR_CC(%a6)
-	rts
-src_qnan_m:
-	mov.b		&neg_bmask+nan_bmask,FPSR_CC(%a6)
-	rts
-
-#
-# fkern2.s:
-#	These entry points are used by the exception handler
-# routines where an instruction is selected by an index into
-# a large jump table corresponding to a given instruction which
-# has been decoded. Flow continues here where we now decode
-# further according to the source operand type.
-#
-
-	global		fsinh
-fsinh:
-	mov.b		STAG(%a6),%d1
-	beq.l		ssinh
-	cmpi.b		%d1,&ZERO
-	beq.l		src_zero
-	cmpi.b		%d1,&INF
-	beq.l		src_inf
-	cmpi.b		%d1,&DENORM
-	beq.l		ssinhd
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		flognp1
-flognp1:
-	mov.b		STAG(%a6),%d1
-	beq.l		slognp1
-	cmpi.b		%d1,&ZERO
-	beq.l		src_zero
-	cmpi.b		%d1,&INF
-	beq.l		sopr_inf
-	cmpi.b		%d1,&DENORM
-	beq.l		slognp1d
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		fetoxm1
-fetoxm1:
-	mov.b		STAG(%a6),%d1
-	beq.l		setoxm1
-	cmpi.b		%d1,&ZERO
-	beq.l		src_zero
-	cmpi.b		%d1,&INF
-	beq.l		setoxm1i
-	cmpi.b		%d1,&DENORM
-	beq.l		setoxm1d
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		ftanh
-ftanh:
-	mov.b		STAG(%a6),%d1
-	beq.l		stanh
-	cmpi.b		%d1,&ZERO
-	beq.l		src_zero
-	cmpi.b		%d1,&INF
-	beq.l		src_one
-	cmpi.b		%d1,&DENORM
-	beq.l		stanhd
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		fatan
-fatan:
-	mov.b		STAG(%a6),%d1
-	beq.l		satan
-	cmpi.b		%d1,&ZERO
-	beq.l		src_zero
-	cmpi.b		%d1,&INF
-	beq.l		spi_2
-	cmpi.b		%d1,&DENORM
-	beq.l		satand
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		fasin
-fasin:
-	mov.b		STAG(%a6),%d1
-	beq.l		sasin
-	cmpi.b		%d1,&ZERO
-	beq.l		src_zero
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		sasind
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		fatanh
-fatanh:
-	mov.b		STAG(%a6),%d1
-	beq.l		satanh
-	cmpi.b		%d1,&ZERO
-	beq.l		src_zero
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		satanhd
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		fsine
-fsine:
-	mov.b		STAG(%a6),%d1
-	beq.l		ssin
-	cmpi.b		%d1,&ZERO
-	beq.l		src_zero
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		ssind
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		ftan
-ftan:
-	mov.b		STAG(%a6),%d1
-	beq.l		stan
-	cmpi.b		%d1,&ZERO
-	beq.l		src_zero
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		stand
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		fetox
-fetox:
-	mov.b		STAG(%a6),%d1
-	beq.l		setox
-	cmpi.b		%d1,&ZERO
-	beq.l		ld_pone
-	cmpi.b		%d1,&INF
-	beq.l		szr_inf
-	cmpi.b		%d1,&DENORM
-	beq.l		setoxd
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		ftwotox
-ftwotox:
-	mov.b		STAG(%a6),%d1
-	beq.l		stwotox
-	cmpi.b		%d1,&ZERO
-	beq.l		ld_pone
-	cmpi.b		%d1,&INF
-	beq.l		szr_inf
-	cmpi.b		%d1,&DENORM
-	beq.l		stwotoxd
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		ftentox
-ftentox:
-	mov.b		STAG(%a6),%d1
-	beq.l		stentox
-	cmpi.b		%d1,&ZERO
-	beq.l		ld_pone
-	cmpi.b		%d1,&INF
-	beq.l		szr_inf
-	cmpi.b		%d1,&DENORM
-	beq.l		stentoxd
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		flogn
-flogn:
-	mov.b		STAG(%a6),%d1
-	beq.l		slogn
-	cmpi.b		%d1,&ZERO
-	beq.l		t_dz2
-	cmpi.b		%d1,&INF
-	beq.l		sopr_inf
-	cmpi.b		%d1,&DENORM
-	beq.l		slognd
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		flog10
-flog10:
-	mov.b		STAG(%a6),%d1
-	beq.l		slog10
-	cmpi.b		%d1,&ZERO
-	beq.l		t_dz2
-	cmpi.b		%d1,&INF
-	beq.l		sopr_inf
-	cmpi.b		%d1,&DENORM
-	beq.l		slog10d
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		flog2
-flog2:
-	mov.b		STAG(%a6),%d1
-	beq.l		slog2
-	cmpi.b		%d1,&ZERO
-	beq.l		t_dz2
-	cmpi.b		%d1,&INF
-	beq.l		sopr_inf
-	cmpi.b		%d1,&DENORM
-	beq.l		slog2d
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		fcosh
-fcosh:
-	mov.b		STAG(%a6),%d1
-	beq.l		scosh
-	cmpi.b		%d1,&ZERO
-	beq.l		ld_pone
-	cmpi.b		%d1,&INF
-	beq.l		ld_pinf
-	cmpi.b		%d1,&DENORM
-	beq.l		scoshd
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		facos
-facos:
-	mov.b		STAG(%a6),%d1
-	beq.l		sacos
-	cmpi.b		%d1,&ZERO
-	beq.l		ld_ppi2
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		sacosd
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		fcos
-fcos:
-	mov.b		STAG(%a6),%d1
-	beq.l		scos
-	cmpi.b		%d1,&ZERO
-	beq.l		ld_pone
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		scosd
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		fgetexp
-fgetexp:
-	mov.b		STAG(%a6),%d1
-	beq.l		sgetexp
-	cmpi.b		%d1,&ZERO
-	beq.l		src_zero
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		sgetexpd
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		fgetman
-fgetman:
-	mov.b		STAG(%a6),%d1
-	beq.l		sgetman
-	cmpi.b		%d1,&ZERO
-	beq.l		src_zero
-	cmpi.b		%d1,&INF
-	beq.l		t_operr
-	cmpi.b		%d1,&DENORM
-	beq.l		sgetmand
-	cmpi.b		%d1,&QNAN
-	beq.l		src_qnan
-	bra.l		src_snan
-
-	global		fsincos
-fsincos:
-	mov.b		STAG(%a6),%d1
-	beq.l		ssincos
-	cmpi.b		%d1,&ZERO
-	beq.l		ssincosz
-	cmpi.b		%d1,&INF
-	beq.l		ssincosi
-	cmpi.b		%d1,&DENORM
-	beq.l		ssincosd
-	cmpi.b		%d1,&QNAN
-	beq.l		ssincosqnan
-	bra.l		ssincossnan
-
-	global		fmod
-fmod:
-	mov.b		STAG(%a6),%d1
-	beq.l		smod_snorm
-	cmpi.b		%d1,&ZERO
-	beq.l		smod_szero
-	cmpi.b		%d1,&INF
-	beq.l		smod_sinf
-	cmpi.b		%d1,&DENORM
-	beq.l		smod_sdnrm
-	cmpi.b		%d1,&QNAN
-	beq.l		sop_sqnan
-	bra.l		sop_ssnan
-
-	global		frem
-frem:
-	mov.b		STAG(%a6),%d1
-	beq.l		srem_snorm
-	cmpi.b		%d1,&ZERO
-	beq.l		srem_szero
-	cmpi.b		%d1,&INF
-	beq.l		srem_sinf
-	cmpi.b		%d1,&DENORM
-	beq.l		srem_sdnrm
-	cmpi.b		%d1,&QNAN
-	beq.l		sop_sqnan
-	bra.l		sop_ssnan
-
-	global		fscale
-fscale:
-	mov.b		STAG(%a6),%d1
-	beq.l		sscale_snorm
-	cmpi.b		%d1,&ZERO
-	beq.l		sscale_szero
-	cmpi.b		%d1,&INF
-	beq.l		sscale_sinf
-	cmpi.b		%d1,&DENORM
-	beq.l		sscale_sdnrm
-	cmpi.b		%d1,&QNAN
-	beq.l		sop_sqnan
-	bra.l		sop_ssnan
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fgen_except(): catch an exception during transcendental		#
-#		       emulation					#
-#									#
-# XREF ****************************************************************	#
-#	fmul() - emulate a multiply instruction				#
-#	fadd() - emulate an add instruction				#
-#	fin() - emulate an fmove instruction				#
-#									#
-# INPUT ***************************************************************	#
-#	fp0 = destination operand					#
-#	d0  = type of instruction that took exception			#
-#	fsave frame = source operand					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP							#
-#									#
-# ALGORITHM ***********************************************************	#
-#	An exception occurred on the last instruction of the		#
-# transcendental emulation. hopefully, this won't be happening much	#
-# because it will be VERY slow.						#
-#	The only exceptions capable of passing through here are		#
-# Overflow, Underflow, and Unsupported Data Type.			#
-#									#
-#########################################################################
-
-	global		fgen_except
-fgen_except:
-	cmpi.b		0x3(%sp),&0x7		# is exception UNSUPP?
-	beq.b		fge_unsupp		# yes
-
-	mov.b		&NORM,STAG(%a6)
-
-fge_cont:
-	mov.b		&NORM,DTAG(%a6)
-
-# ok, I have a problem with putting the dst op at FP_DST. the emulation
-# routines aren't supposed to alter the operands but we've just squashed
-# FP_DST here...
-
-# 8/17/93 - this turns out to be more of a "cleanliness" standpoint
-# then a potential bug. to begin with, only the dyadic functions
-# frem,fmod, and fscale would get the dst trashed here. But, for
-# the 060SP, the FP_DST is never used again anyways.
-	fmovm.x		&0x80,FP_DST(%a6)	# dst op is in fp0
-
-	lea		0x4(%sp),%a0		# pass: ptr to src op
-	lea		FP_DST(%a6),%a1		# pass: ptr to dst op
-
-	cmpi.b		%d1,&FMOV_OP
-	beq.b		fge_fin			# it was an "fmov"
-	cmpi.b		%d1,&FADD_OP
-	beq.b		fge_fadd		# it was an "fadd"
-fge_fmul:
-	bsr.l		fmul
-	rts
-fge_fadd:
-	bsr.l		fadd
-	rts
-fge_fin:
-	bsr.l		fin
-	rts
-
-fge_unsupp:
-	mov.b		&DENORM,STAG(%a6)
-	bra.b		fge_cont
-
-#
-# This table holds the offsets of the emulation routines for each individual
-# math operation relative to the address of this table. Included are
-# routines like fadd/fmul/fabs as well as the transcendentals.
-# The location within the table is determined by the extension bits of the
-# operation longword.
-#
-
-	swbeg		&109
-tbl_unsupp:
-	long		fin		- tbl_unsupp	# 00: fmove
-	long		fint		- tbl_unsupp	# 01: fint
-	long		fsinh		- tbl_unsupp	# 02: fsinh
-	long		fintrz		- tbl_unsupp	# 03: fintrz
-	long		fsqrt		- tbl_unsupp	# 04: fsqrt
-	long		tbl_unsupp	- tbl_unsupp
-	long		flognp1		- tbl_unsupp	# 06: flognp1
-	long		tbl_unsupp	- tbl_unsupp
-	long		fetoxm1		- tbl_unsupp	# 08: fetoxm1
-	long		ftanh		- tbl_unsupp	# 09: ftanh
-	long		fatan		- tbl_unsupp	# 0a: fatan
-	long		tbl_unsupp	- tbl_unsupp
-	long		fasin		- tbl_unsupp	# 0c: fasin
-	long		fatanh		- tbl_unsupp	# 0d: fatanh
-	long		fsine		- tbl_unsupp	# 0e: fsin
-	long		ftan		- tbl_unsupp	# 0f: ftan
-	long		fetox		- tbl_unsupp	# 10: fetox
-	long		ftwotox		- tbl_unsupp	# 11: ftwotox
-	long		ftentox		- tbl_unsupp	# 12: ftentox
-	long		tbl_unsupp	- tbl_unsupp
-	long		flogn		- tbl_unsupp	# 14: flogn
-	long		flog10		- tbl_unsupp	# 15: flog10
-	long		flog2		- tbl_unsupp	# 16: flog2
-	long		tbl_unsupp	- tbl_unsupp
-	long		fabs		- tbl_unsupp	# 18: fabs
-	long		fcosh		- tbl_unsupp	# 19: fcosh
-	long		fneg		- tbl_unsupp	# 1a: fneg
-	long		tbl_unsupp	- tbl_unsupp
-	long		facos		- tbl_unsupp	# 1c: facos
-	long		fcos		- tbl_unsupp	# 1d: fcos
-	long		fgetexp		- tbl_unsupp	# 1e: fgetexp
-	long		fgetman		- tbl_unsupp	# 1f: fgetman
-	long		fdiv		- tbl_unsupp	# 20: fdiv
-	long		fmod		- tbl_unsupp	# 21: fmod
-	long		fadd		- tbl_unsupp	# 22: fadd
-	long		fmul		- tbl_unsupp	# 23: fmul
-	long		fsgldiv		- tbl_unsupp	# 24: fsgldiv
-	long		frem		- tbl_unsupp	# 25: frem
-	long		fscale		- tbl_unsupp	# 26: fscale
-	long		fsglmul		- tbl_unsupp	# 27: fsglmul
-	long		fsub		- tbl_unsupp	# 28: fsub
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		fsincos		- tbl_unsupp	# 30: fsincos
-	long		fsincos		- tbl_unsupp	# 31: fsincos
-	long		fsincos		- tbl_unsupp	# 32: fsincos
-	long		fsincos		- tbl_unsupp	# 33: fsincos
-	long		fsincos		- tbl_unsupp	# 34: fsincos
-	long		fsincos		- tbl_unsupp	# 35: fsincos
-	long		fsincos		- tbl_unsupp	# 36: fsincos
-	long		fsincos		- tbl_unsupp	# 37: fsincos
-	long		fcmp		- tbl_unsupp	# 38: fcmp
-	long		tbl_unsupp	- tbl_unsupp
-	long		ftst		- tbl_unsupp	# 3a: ftst
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		fsin		- tbl_unsupp	# 40: fsmove
-	long		fssqrt		- tbl_unsupp	# 41: fssqrt
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		fdin		- tbl_unsupp	# 44: fdmove
-	long		fdsqrt		- tbl_unsupp	# 45: fdsqrt
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		fsabs		- tbl_unsupp	# 58: fsabs
-	long		tbl_unsupp	- tbl_unsupp
-	long		fsneg		- tbl_unsupp	# 5a: fsneg
-	long		tbl_unsupp	- tbl_unsupp
-	long		fdabs		- tbl_unsupp	# 5c: fdabs
-	long		tbl_unsupp	- tbl_unsupp
-	long		fdneg		- tbl_unsupp	# 5e: fdneg
-	long		tbl_unsupp	- tbl_unsupp
-	long		fsdiv		- tbl_unsupp	# 60: fsdiv
-	long		tbl_unsupp	- tbl_unsupp
-	long		fsadd		- tbl_unsupp	# 62: fsadd
-	long		fsmul		- tbl_unsupp	# 63: fsmul
-	long		fddiv		- tbl_unsupp	# 64: fddiv
-	long		tbl_unsupp	- tbl_unsupp
-	long		fdadd		- tbl_unsupp	# 66: fdadd
-	long		fdmul		- tbl_unsupp	# 67: fdmul
-	long		fssub		- tbl_unsupp	# 68: fssub
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		fdsub		- tbl_unsupp	# 6c: fdsub
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fmul(): emulates the fmul instruction				#
-#	fsmul(): emulates the fsmul instruction				#
-#	fdmul(): emulates the fdmul instruction				#
-#									#
-# XREF ****************************************************************	#
-#	scale_to_zero_src() - scale src exponent to zero		#
-#	scale_to_zero_dst() - scale dst exponent to zero		#
-#	unf_res() - return default underflow result			#
-#	ovf_res() - return default overflow result			#
-#	res_qnan() - return QNAN result					#
-#	res_snan() - return SNAN result					#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	a1 = pointer to extended precision destination operand		#
-#	d0  rnd prec,mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms/denorms into ext/sgl/dbl precision.				#
-#	For norms/denorms, scale the exponents such that a multiply	#
-# instruction won't cause an exception. Use the regular fmul to		#
-# compute a result. Check if the regular operands would have taken	#
-# an exception. If so, return the default overflow/underflow result	#
-# and return the EXOP if exceptions are enabled. Else, scale the	#
-# result operand to the proper exponent.				#
-#									#
-#########################################################################
-
-	align		0x10
-tbl_fmul_ovfl:
-	long		0x3fff - 0x7ffe		# ext_max
-	long		0x3fff - 0x407e		# sgl_max
-	long		0x3fff - 0x43fe		# dbl_max
-tbl_fmul_unfl:
-	long		0x3fff + 0x0001		# ext_unfl
-	long		0x3fff - 0x3f80		# sgl_unfl
-	long		0x3fff - 0x3c00		# dbl_unfl
-
-	global		fsmul
-fsmul:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl prec
-	bra.b		fmul
-
-	global		fdmul
-fdmul:
-	andi.b		&0x30,%d0
-	ori.b		&d_mode*0x10,%d0	# insert dbl prec
-
-	global		fmul
-fmul:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-
-	clr.w		%d1
-	mov.b		DTAG(%a6),%d1
-	lsl.b		&0x3,%d1
-	or.b		STAG(%a6),%d1		# combine src tags
-	bne.w		fmul_not_norm		# optimize on non-norm input
-
-fmul_norm:
-	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
-	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	bsr.l		scale_to_zero_src	# scale src exponent
-	mov.l		%d0,-(%sp)		# save scale factor 1
-
-	bsr.l		scale_to_zero_dst	# scale dst exponent
-
-	add.l		%d0,(%sp)		# SCALE_FACTOR = scale1 + scale2
-
-	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
-	lsr.b		&0x6,%d1		# shift to lo bits
-	mov.l		(%sp)+,%d0		# load S.F.
-	cmp.l		%d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
-	beq.w		fmul_may_ovfl		# result may rnd to overflow
-	blt.w		fmul_ovfl		# result will overflow
-
-	cmp.l		%d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
-	beq.w		fmul_may_unfl		# result may rnd to no unfl
-	bgt.w		fmul_unfl		# result will underflow
-
-#
-# NORMAL:
-# - the result of the multiply operation will neither overflow nor underflow.
-# - do the multiply to the proper precision and rounding mode.
-# - scale the result exponent using the scale factor. if both operands were
-# normalized then we really don't need to go through this scaling. but for now,
-# this will do.
-#
-fmul_normal:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fmul_normal_exit:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# OVERFLOW:
-# - the result of the multiply operation is an overflow.
-# - do the multiply to the proper precision and rounding mode in order to
-# set the inexact bits.
-# - calculate the default result and return it in fp0.
-# - if overflow or inexact is enabled, we need a multiply result rounded to
-# extended precision. if the original operation was extended, then we have this
-# result. if the original operation was single or double, we have to do another
-# multiply using extended precision and the correct rounding mode. the result
-# of this operation then has its exponent scaled by -0x6000 to create the
-# exceptional operand.
-#
-fmul_ovfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-# save setting this until now because this is where fmul_may_ovfl may jump in
-fmul_ovfl_tst:
-	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fmul_ovfl_ena		# yes
-
-# calculate the default result
-fmul_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass rnd prec,mode
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	rts
-
-#
-# OVFL is enabled; Create EXOP:
-# - if precision is extended, then we have the EXOP. simply bias the exponent
-# with an extra -0x6000. if the precision is single or double, we need to
-# calculate a result rounded to extended precision.
-#
-fmul_ovfl_ena:
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# test the rnd prec
-	bne.b		fmul_ovfl_ena_sd	# it's sgl or dbl
-
-fmul_ovfl_ena_cont:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.w		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# add scale factor
-	subi.l		&0x6000,%d1		# subtract bias
-	andi.w		&0x7fff,%d1		# clear sign bit
-	andi.w		&0x8000,%d2		# keep old sign
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.b		fmul_ovfl_dis
-
-fmul_ovfl_ena_sd:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# keep rnd mode only
-	fmov.l		%d1,%fpcr		# set FPCR
-
-	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	bra.b		fmul_ovfl_ena_cont
-
-#
-# may OVERFLOW:
-# - the result of the multiply operation MAY overflow.
-# - do the multiply to the proper precision and rounding mode in order to
-# set the inexact bits.
-# - calculate the default result and return it in fp0.
-#
-fmul_may_ovfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
-	fbge.w		fmul_ovfl_tst		# yes; overflow has occurred
-
-# no, it didn't overflow; we have correct result
-	bra.w		fmul_normal_exit
-
-#
-# UNDERFLOW:
-# - the result of the multiply operation is an underflow.
-# - do the multiply to the proper precision and rounding mode in order to
-# set the inexact bits.
-# - calculate the default result and return it in fp0.
-# - if overflow or inexact is enabled, we need a multiply result rounded to
-# extended precision. if the original operation was extended, then we have this
-# result. if the original operation was single or double, we have to do another
-# multiply using extended precision and the correct rounding mode. the result
-# of this operation then has its exponent scaled by -0x6000 to create the
-# exceptional operand.
-#
-fmul_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-# for fun, let's use only extended precision, round to zero. then, let
-# the unf_res() routine figure out all the rest.
-# will we get the correct answer.
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
-
-	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fmul_unfl_ena		# yes
-
-fmul_unfl_dis:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# unf_res2 may have set 'Z'
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# UNFL is enabled.
-#
-fmul_unfl_ena:
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# is precision extended?
-	bne.b		fmul_unfl_ena_sd	# no, sgl or dbl
-
-# if the rnd mode is anything but RZ, then we have to re-do the above
-# multiplication because we used RZ for all.
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-fmul_unfl_ena_cont:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	addi.l		&0x6000,%d1		# add bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.w		fmul_unfl_dis
-
-fmul_unfl_ena_sd:
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# use only rnd mode
-	fmov.l		%d1,%fpcr		# set FPCR
-
-	bra.b		fmul_unfl_ena_cont
-
-# MAY UNDERFLOW:
-# -use the correct rounding mode and precision. this code favors operations
-# that do not underflow.
-fmul_may_unfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
-	fbgt.w		fmul_normal_exit	# no; no underflow occurred
-	fblt.w		fmul_unfl		# yes; underflow occurred
-
-#
-# we still don't know if underflow occurred. result is ~ equal to 2. but,
-# we don't know if the result was an underflow that rounded up to a 2 or
-# a normalized number that rounded down to a 2. so, redo the entire operation
-# using RZ as the rounding mode to see what the pre-rounded result is.
-# this case should be relatively rare.
-#
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst operand
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# keep rnd prec
-	ori.b		&rz_mode*0x10,%d1	# insert RZ
-
-	fmov.l		%d1,%fpcr		# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fabs.x		%fp1			# make absolute value
-	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
-	fbge.w		fmul_normal_exit	# no; no underflow occurred
-	bra.w		fmul_unfl		# yes, underflow occurred
-
-################################################################################
-
-#
-# Multiply: inputs are not both normalized; what are they?
-#
-fmul_not_norm:
-	mov.w		(tbl_fmul_op.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_fmul_op.b,%pc,%d1.w)
-
-	swbeg		&48
-tbl_fmul_op:
-	short		fmul_norm	- tbl_fmul_op # NORM x NORM
-	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
-	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
-	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
-	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
-	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
-	short		tbl_fmul_op	- tbl_fmul_op #
-	short		tbl_fmul_op	- tbl_fmul_op #
-
-	short		fmul_zero	- tbl_fmul_op # ZERO x NORM
-	short		fmul_zero	- tbl_fmul_op # ZERO x ZERO
-	short		fmul_res_operr	- tbl_fmul_op # ZERO x INF
-	short		fmul_res_qnan	- tbl_fmul_op # ZERO x QNAN
-	short		fmul_zero	- tbl_fmul_op # ZERO x DENORM
-	short		fmul_res_snan	- tbl_fmul_op # ZERO x SNAN
-	short		tbl_fmul_op	- tbl_fmul_op #
-	short		tbl_fmul_op	- tbl_fmul_op #
-
-	short		fmul_inf_dst	- tbl_fmul_op # INF x NORM
-	short		fmul_res_operr	- tbl_fmul_op # INF x ZERO
-	short		fmul_inf_dst	- tbl_fmul_op # INF x INF
-	short		fmul_res_qnan	- tbl_fmul_op # INF x QNAN
-	short		fmul_inf_dst	- tbl_fmul_op # INF x DENORM
-	short		fmul_res_snan	- tbl_fmul_op # INF x SNAN
-	short		tbl_fmul_op	- tbl_fmul_op #
-	short		tbl_fmul_op	- tbl_fmul_op #
-
-	short		fmul_res_qnan	- tbl_fmul_op # QNAN x NORM
-	short		fmul_res_qnan	- tbl_fmul_op # QNAN x ZERO
-	short		fmul_res_qnan	- tbl_fmul_op # QNAN x INF
-	short		fmul_res_qnan	- tbl_fmul_op # QNAN x QNAN
-	short		fmul_res_qnan	- tbl_fmul_op # QNAN x DENORM
-	short		fmul_res_snan	- tbl_fmul_op # QNAN x SNAN
-	short		tbl_fmul_op	- tbl_fmul_op #
-	short		tbl_fmul_op	- tbl_fmul_op #
-
-	short		fmul_norm	- tbl_fmul_op # NORM x NORM
-	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
-	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
-	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
-	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
-	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
-	short		tbl_fmul_op	- tbl_fmul_op #
-	short		tbl_fmul_op	- tbl_fmul_op #
-
-	short		fmul_res_snan	- tbl_fmul_op # SNAN x NORM
-	short		fmul_res_snan	- tbl_fmul_op # SNAN x ZERO
-	short		fmul_res_snan	- tbl_fmul_op # SNAN x INF
-	short		fmul_res_snan	- tbl_fmul_op # SNAN x QNAN
-	short		fmul_res_snan	- tbl_fmul_op # SNAN x DENORM
-	short		fmul_res_snan	- tbl_fmul_op # SNAN x SNAN
-	short		tbl_fmul_op	- tbl_fmul_op #
-	short		tbl_fmul_op	- tbl_fmul_op #
-
-fmul_res_operr:
-	bra.l		res_operr
-fmul_res_snan:
-	bra.l		res_snan
-fmul_res_qnan:
-	bra.l		res_qnan
-
-#
-# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
-#
-	global		fmul_zero		# global for fsglmul
-fmul_zero:
-	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d0,%d1
-	bpl.b		fmul_zero_p		# result ZERO is pos.
-fmul_zero_n:
-	fmov.s		&0x80000000,%fp0	# load -ZERO
-	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
-	rts
-fmul_zero_p:
-	fmov.s		&0x00000000,%fp0	# load +ZERO
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
-	rts
-
-#
-# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
-#
-# Note: The j-bit for an infinity is a don't-care. However, to be
-# strictly compatible w/ the 68881/882, we make sure to return an
-# INF w/ the j-bit set if the input INF j-bit was set. Destination
-# INFs take priority.
-#
-	global		fmul_inf_dst		# global for fsglmul
-fmul_inf_dst:
-	fmovm.x		DST(%a1),&0x80		# return INF result in fp0
-	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d0,%d1
-	bpl.b		fmul_inf_dst_p		# result INF is pos.
-fmul_inf_dst_n:
-	fabs.x		%fp0			# clear result sign
-	fneg.x		%fp0			# set result sign
-	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
-	rts
-fmul_inf_dst_p:
-	fabs.x		%fp0			# clear result sign
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
-	rts
-
-	global		fmul_inf_src		# global for fsglmul
-fmul_inf_src:
-	fmovm.x		SRC(%a0),&0x80		# return INF result in fp0
-	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d0,%d1
-	bpl.b		fmul_inf_dst_p		# result INF is pos.
-	bra.b		fmul_inf_dst_n
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fin(): emulates the fmove instruction				#
-#	fsin(): emulates the fsmove instruction				#
-#	fdin(): emulates the fdmove instruction				#
-#									#
-# XREF ****************************************************************	#
-#	norm() - normalize mantissa for EXOP on denorm			#
-#	scale_to_zero_src() - scale src exponent to zero		#
-#	ovf_res() - return default overflow result			#
-#	unf_res() - return default underflow result			#
-#	res_qnan_1op() - return QNAN result				#
-#	res_snan_1op() - return SNAN result				#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	d0 = round prec/mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms into extended, single, and double precision.			#
-#	Norms can be emulated w/ a regular fmove instruction. For	#
-# sgl/dbl, must scale exponent and perform an "fmove". Check to see	#
-# if the result would have overflowed/underflowed. If so, use unf_res()	#
-# or ovf_res() to return the default result. Also return EXOP if	#
-# exception is enabled. If no exception, return the default result.	#
-#	Unnorms don't pass through here.				#
-#									#
-#########################################################################
-
-	global		fsin
-fsin:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl precision
-	bra.b		fin
-
-	global		fdin
-fdin:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&d_mode*0x10,%d0	# insert dbl precision
-
-	global		fin
-fin:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-
-	mov.b		STAG(%a6),%d1		# fetch src optype tag
-	bne.w		fin_not_norm		# optimize on non-norm input
-
-#
-# FP MOVE IN: NORMs and DENORMs ONLY!
-#
-fin_norm:
-	andi.b		&0xc0,%d0		# is precision extended?
-	bne.w		fin_not_ext		# no, so go handle dbl or sgl
-
-#
-# precision selected is extended. so...we cannot get an underflow
-# or overflow because of rounding to the correct precision. so...
-# skip the scaling and unscaling...
-#
-	tst.b		SRC_EX(%a0)		# is the operand negative?
-	bpl.b		fin_norm_done		# no
-	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
-fin_norm_done:
-	fmovm.x		SRC(%a0),&0x80		# return result in fp0
-	rts
-
-#
-# for an extended precision DENORM, the UNFL exception bit is set
-# the accrued bit is NOT set in this instance(no inexactness!)
-#
-fin_denorm:
-	andi.b		&0xc0,%d0		# is precision extended?
-	bne.w		fin_not_ext		# no, so go handle dbl or sgl
-
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-	tst.b		SRC_EX(%a0)		# is the operand negative?
-	bpl.b		fin_denorm_done		# no
-	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
-fin_denorm_done:
-	fmovm.x		SRC(%a0),&0x80		# return result in fp0
-	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
-	bne.b		fin_denorm_unfl_ena	# yes
-	rts
-
-#
-# the input is an extended DENORM and underflow is enabled in the FPCR.
-# normalize the mantissa and add the bias of 0x6000 to the resulting negative
-# exponent and insert back into the operand.
-#
-fin_denorm_unfl_ena:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
-	bsr.l		norm			# normalize result
-	neg.w		%d0			# new exponent = -(shft val)
-	addi.w		&0x6000,%d0		# add new bias to exponent
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
-	andi.w		&0x8000,%d1		# keep old sign
-	andi.w		&0x7fff,%d0		# clear sign position
-	or.w		%d1,%d0			# concat new exo,old sign
-	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	rts
-
-#
-# operand is to be rounded to single or double precision
-#
-fin_not_ext:
-	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
-	bne.b		fin_dbl
-
-#
-# operand is to be rounded to single precision
-#
-fin_sgl:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	bsr.l		scale_to_zero_src	# calculate scale factor
-
-	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
-	bge.w		fin_sd_unfl		# yes; go handle underflow
-	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
-	beq.w		fin_sd_may_ovfl		# maybe; go check
-	blt.w		fin_sd_ovfl		# yes; go handle overflow
-
-#
-# operand will NOT overflow or underflow when moved into the fp reg file
-#
-fin_sd_normal:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fmov.x		FP_SCR0(%a6),%fp0	# perform move
-
-	fmov.l		%fpsr,%d1		# save FPSR
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fin_sd_normal_exit:
-	mov.l		%d2,-(%sp)		# save d2
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
-	mov.w		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# add scale factor
-	andi.w		&0x8000,%d2		# keep old sign
-	or.w		%d1,%d2			# concat old sign,new exponent
-	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-#
-# operand is to be rounded to double precision
-#
-fin_dbl:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	bsr.l		scale_to_zero_src	# calculate scale factor
-
-	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
-	bge.w		fin_sd_unfl		# yes; go handle underflow
-	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
-	beq.w		fin_sd_may_ovfl		# maybe; go check
-	blt.w		fin_sd_ovfl		# yes; go handle overflow
-	bra.w		fin_sd_normal		# no; ho handle normalized op
-
-#
-# operand WILL underflow when moved in to the fp register file
-#
-fin_sd_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	tst.b		FP_SCR0_EX(%a6)		# is operand negative?
-	bpl.b		fin_sd_unfl_tst
-	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
-
-# if underflow or inexact is enabled, then go calculate the EXOP first.
-fin_sd_unfl_tst:
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fin_sd_unfl_ena		# yes
-
-fin_sd_unfl_dis:
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# operand will underflow AND underflow or inexact is enabled.
-# Therefore, we must return the result rounded to extended precision.
-#
-fin_sd_unfl_ena:
-	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
-	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
-	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# subtract scale factor
-	andi.w		&0x8000,%d2		# extract old sign
-	addi.l		&0x6000,%d1		# add new bias
-	andi.w		&0x7fff,%d1
-	or.w		%d1,%d2			# concat old sign,new exp
-	mov.w		%d2,FP_SCR1_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
-	mov.l		(%sp)+,%d2		# restore d2
-	bra.b		fin_sd_unfl_dis
-
-#
-# operand WILL overflow.
-#
-fin_sd_ovfl:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fmov.x		FP_SCR0(%a6),%fp0	# perform move
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# save FPSR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fin_sd_ovfl_tst:
-	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fin_sd_ovfl_ena		# yes
-
-#
-# OVFL is not enabled; therefore, we must create the default result by
-# calling ovf_res().
-#
-fin_sd_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	rts
-
-#
-# OVFL is enabled.
-# the INEX2 bit has already been updated by the round to the correct precision.
-# now, round to extended(and don't alter the FPSR).
-#
-fin_sd_ovfl_ena:
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	sub.l		&0x6000,%d1		# subtract bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.b		fin_sd_ovfl_dis
-
-#
-# the move in MAY overflow. so...
-#
-fin_sd_may_ovfl:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fmov.x		FP_SCR0(%a6),%fp0	# perform the move
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
-	fbge.w		fin_sd_ovfl_tst		# yes; overflow has occurred
-
-# no, it didn't overflow; we have correct result
-	bra.w		fin_sd_normal_exit
-
-##########################################################################
-
-#
-# operand is not a NORM: check its optype and branch accordingly
-#
-fin_not_norm:
-	cmpi.b		%d1,&DENORM		# weed out DENORM
-	beq.w		fin_denorm
-	cmpi.b		%d1,&SNAN		# weed out SNANs
-	beq.l		res_snan_1op
-	cmpi.b		%d1,&QNAN		# weed out QNANs
-	beq.l		res_qnan_1op
-
-#
-# do the fmove in; at this point, only possible ops are ZERO and INF.
-# use fmov to determine ccodes.
-# prec:mode should be zero at this point but it won't affect answer anyways.
-#
-	fmov.x		SRC(%a0),%fp0		# do fmove in
-	fmov.l		%fpsr,%d0		# no exceptions possible
-	rol.l		&0x8,%d0		# put ccodes in lo byte
-	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fdiv(): emulates the fdiv instruction				#
-#	fsdiv(): emulates the fsdiv instruction				#
-#	fddiv(): emulates the fddiv instruction				#
-#									#
-# XREF ****************************************************************	#
-#	scale_to_zero_src() - scale src exponent to zero		#
-#	scale_to_zero_dst() - scale dst exponent to zero		#
-#	unf_res() - return default underflow result			#
-#	ovf_res() - return default overflow result			#
-#	res_qnan() - return QNAN result					#
-#	res_snan() - return SNAN result					#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	a1 = pointer to extended precision destination operand		#
-#	d0  rnd prec,mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms/denorms into ext/sgl/dbl precision.				#
-#	For norms/denorms, scale the exponents such that a divide	#
-# instruction won't cause an exception. Use the regular fdiv to		#
-# compute a result. Check if the regular operands would have taken	#
-# an exception. If so, return the default overflow/underflow result	#
-# and return the EXOP if exceptions are enabled. Else, scale the	#
-# result operand to the proper exponent.				#
-#									#
-#########################################################################
-
-	align		0x10
-tbl_fdiv_unfl:
-	long		0x3fff - 0x0000		# ext_unfl
-	long		0x3fff - 0x3f81		# sgl_unfl
-	long		0x3fff - 0x3c01		# dbl_unfl
-
-tbl_fdiv_ovfl:
-	long		0x3fff - 0x7ffe		# ext overflow exponent
-	long		0x3fff - 0x407e		# sgl overflow exponent
-	long		0x3fff - 0x43fe		# dbl overflow exponent
-
-	global		fsdiv
-fsdiv:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl prec
-	bra.b		fdiv
-
-	global		fddiv
-fddiv:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&d_mode*0x10,%d0	# insert dbl prec
-
-	global		fdiv
-fdiv:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-
-	clr.w		%d1
-	mov.b		DTAG(%a6),%d1
-	lsl.b		&0x3,%d1
-	or.b		STAG(%a6),%d1		# combine src tags
-
-	bne.w		fdiv_not_norm		# optimize on non-norm input
-
-#
-# DIVIDE: NORMs and DENORMs ONLY!
-#
-fdiv_norm:
-	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
-	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	bsr.l		scale_to_zero_src	# scale src exponent
-	mov.l		%d0,-(%sp)		# save scale factor 1
-
-	bsr.l		scale_to_zero_dst	# scale dst exponent
-
-	neg.l		(%sp)			# SCALE FACTOR = scale1 - scale2
-	add.l		%d0,(%sp)
-
-	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
-	lsr.b		&0x6,%d1		# shift to lo bits
-	mov.l		(%sp)+,%d0		# load S.F.
-	cmp.l		%d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
-	ble.w		fdiv_may_ovfl		# result will overflow
-
-	cmp.l		%d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
-	beq.w		fdiv_may_unfl		# maybe
-	bgt.w		fdiv_unfl		# yes; go handle underflow
-
-fdiv_normal:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fdiv.x		FP_SCR0(%a6),%fp0	# perform divide
-
-	fmov.l		%fpsr,%d1		# save FPSR
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fdiv_normal_exit:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
-	mov.l		%d2,-(%sp)		# store d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-tbl_fdiv_ovfl2:
-	long		0x7fff
-	long		0x407f
-	long		0x43ff
-
-fdiv_no_ovfl:
-	mov.l		(%sp)+,%d0		# restore scale factor
-	bra.b		fdiv_normal_exit
-
-fdiv_may_ovfl:
-	mov.l		%d0,-(%sp)		# save scale factor
-
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# set FPSR
-
-	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
-
-	fmov.l		%fpsr,%d0
-	fmov.l		&0x0,%fpcr
-
-	or.l		%d0,USER_FPSR(%a6)	# save INEX,N
-
-	fmovm.x		&0x01,-(%sp)		# save result to stack
-	mov.w		(%sp),%d0		# fetch new exponent
-	add.l		&0xc,%sp		# clear result from stack
-	andi.l		&0x7fff,%d0		# strip sign
-	sub.l		(%sp),%d0		# add scale factor
-	cmp.l		%d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
-	blt.b		fdiv_no_ovfl
-	mov.l		(%sp)+,%d0
-
-fdiv_ovfl_tst:
-	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fdiv_ovfl_ena		# yes
-
-fdiv_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	rts
-
-fdiv_ovfl_ena:
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# is precision extended?
-	bne.b		fdiv_ovfl_ena_sd	# no, do sgl or dbl
-
-fdiv_ovfl_ena_cont:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.w		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# add scale factor
-	subi.l		&0x6000,%d1		# subtract bias
-	andi.w		&0x7fff,%d1		# clear sign bit
-	andi.w		&0x8000,%d2		# keep old sign
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.b		fdiv_ovfl_dis
-
-fdiv_ovfl_ena_sd:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# keep rnd mode
-	fmov.l		%d1,%fpcr		# set FPCR
-
-	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	bra.b		fdiv_ovfl_ena_cont
-
-fdiv_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fdiv_unfl_ena		# yes
-
-fdiv_unfl_dis:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# UNFL is enabled.
-#
-fdiv_unfl_ena:
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# is precision extended?
-	bne.b		fdiv_unfl_ena_sd	# no, sgl or dbl
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-fdiv_unfl_ena_cont:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factoer
-	addi.l		&0x6000,%d1		# add bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exp
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.w		fdiv_unfl_dis
-
-fdiv_unfl_ena_sd:
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# use only rnd mode
-	fmov.l		%d1,%fpcr		# set FPCR
-
-	bra.b		fdiv_unfl_ena_cont
-
-#
-# the divide operation MAY underflow:
-#
-fdiv_may_unfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
-	fbgt.w		fdiv_normal_exit	# no; no underflow occurred
-	fblt.w		fdiv_unfl		# yes; underflow occurred
-
-#
-# we still don't know if underflow occurred. result is ~ equal to 1. but,
-# we don't know if the result was an underflow that rounded up to a 1
-# or a normalized number that rounded down to a 1. so, redo the entire
-# operation using RZ as the rounding mode to see what the pre-rounded
-# result is. this case should be relatively rare.
-#
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# keep rnd prec
-	ori.b		&rz_mode*0x10,%d1	# insert RZ
-
-	fmov.l		%d1,%fpcr		# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fabs.x		%fp1			# make absolute value
-	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
-	fbge.w		fdiv_normal_exit	# no; no underflow occurred
-	bra.w		fdiv_unfl		# yes; underflow occurred
-
-############################################################################
-
-#
-# Divide: inputs are not both normalized; what are they?
-#
-fdiv_not_norm:
-	mov.w		(tbl_fdiv_op.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_fdiv_op.b,%pc,%d1.w*1)
-
-	swbeg		&48
-tbl_fdiv_op:
-	short		fdiv_norm	- tbl_fdiv_op # NORM / NORM
-	short		fdiv_inf_load	- tbl_fdiv_op # NORM / ZERO
-	short		fdiv_zero_load	- tbl_fdiv_op # NORM / INF
-	short		fdiv_res_qnan	- tbl_fdiv_op # NORM / QNAN
-	short		fdiv_norm	- tbl_fdiv_op # NORM / DENORM
-	short		fdiv_res_snan	- tbl_fdiv_op # NORM / SNAN
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-
-	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / NORM
-	short		fdiv_res_operr	- tbl_fdiv_op # ZERO / ZERO
-	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / INF
-	short		fdiv_res_qnan	- tbl_fdiv_op # ZERO / QNAN
-	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / DENORM
-	short		fdiv_res_snan	- tbl_fdiv_op # ZERO / SNAN
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-
-	short		fdiv_inf_dst	- tbl_fdiv_op # INF / NORM
-	short		fdiv_inf_dst	- tbl_fdiv_op # INF / ZERO
-	short		fdiv_res_operr	- tbl_fdiv_op # INF / INF
-	short		fdiv_res_qnan	- tbl_fdiv_op # INF / QNAN
-	short		fdiv_inf_dst	- tbl_fdiv_op # INF / DENORM
-	short		fdiv_res_snan	- tbl_fdiv_op # INF / SNAN
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-
-	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / NORM
-	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / ZERO
-	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / INF
-	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / QNAN
-	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / DENORM
-	short		fdiv_res_snan	- tbl_fdiv_op # QNAN / SNAN
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-
-	short		fdiv_norm	- tbl_fdiv_op # DENORM / NORM
-	short		fdiv_inf_load	- tbl_fdiv_op # DENORM / ZERO
-	short		fdiv_zero_load	- tbl_fdiv_op # DENORM / INF
-	short		fdiv_res_qnan	- tbl_fdiv_op # DENORM / QNAN
-	short		fdiv_norm	- tbl_fdiv_op # DENORM / DENORM
-	short		fdiv_res_snan	- tbl_fdiv_op # DENORM / SNAN
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-
-	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / NORM
-	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / ZERO
-	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / INF
-	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / QNAN
-	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / DENORM
-	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / SNAN
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-
-fdiv_res_qnan:
-	bra.l		res_qnan
-fdiv_res_snan:
-	bra.l		res_snan
-fdiv_res_operr:
-	bra.l		res_operr
-
-	global		fdiv_zero_load		# global for fsgldiv
-fdiv_zero_load:
-	mov.b		SRC_EX(%a0),%d0		# result sign is exclusive
-	mov.b		DST_EX(%a1),%d1		# or of input signs.
-	eor.b		%d0,%d1
-	bpl.b		fdiv_zero_load_p	# result is positive
-	fmov.s		&0x80000000,%fp0	# load a -ZERO
-	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/N
-	rts
-fdiv_zero_load_p:
-	fmov.s		&0x00000000,%fp0	# load a +ZERO
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
-	rts
-
-#
-# The destination was In Range and the source was a ZERO. The result,
-# Therefore, is an INF w/ the proper sign.
-# So, determine the sign and return a new INF (w/ the j-bit cleared).
-#
-	global		fdiv_inf_load		# global for fsgldiv
-fdiv_inf_load:
-	ori.w		&dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
-	mov.b		SRC_EX(%a0),%d0		# load both signs
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d0,%d1
-	bpl.b		fdiv_inf_load_p		# result is positive
-	fmov.s		&0xff800000,%fp0	# make result -INF
-	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
-	rts
-fdiv_inf_load_p:
-	fmov.s		&0x7f800000,%fp0	# make result +INF
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
-	rts
-
-#
-# The destination was an INF w/ an In Range or ZERO source, the result is
-# an INF w/ the proper sign.
-# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
-# dst INF is set, then then j-bit of the result INF is also set).
-#
-	global		fdiv_inf_dst		# global for fsgldiv
-fdiv_inf_dst:
-	mov.b		DST_EX(%a1),%d0		# load both signs
-	mov.b		SRC_EX(%a0),%d1
-	eor.b		%d0,%d1
-	bpl.b		fdiv_inf_dst_p		# result is positive
-
-	fmovm.x		DST(%a1),&0x80		# return result in fp0
-	fabs.x		%fp0			# clear sign bit
-	fneg.x		%fp0			# set sign bit
-	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
-	rts
-
-fdiv_inf_dst_p:
-	fmovm.x		DST(%a1),&0x80		# return result in fp0
-	fabs.x		%fp0			# return positive INF
-	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fneg(): emulates the fneg instruction				#
-#	fsneg(): emulates the fsneg instruction				#
-#	fdneg(): emulates the fdneg instruction				#
-#									#
-# XREF ****************************************************************	#
-#	norm() - normalize a denorm to provide EXOP			#
-#	scale_to_zero_src() - scale sgl/dbl source exponent		#
-#	ovf_res() - return default overflow result			#
-#	unf_res() - return default underflow result			#
-#	res_qnan_1op() - return QNAN result				#
-#	res_snan_1op() - return SNAN result				#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	d0 = rnd prec,mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, zeroes, and infinities as special cases. Separate	#
-# norms/denorms into ext/sgl/dbl precisions. Extended precision can be	#
-# emulated by simply setting sign bit. Sgl/dbl operands must be scaled	#
-# and an actual fneg performed to see if overflow/underflow would have	#
-# occurred. If so, return default underflow/overflow result. Else,	#
-# scale the result exponent and return result. FPSR gets set based on	#
-# the result value.							#
-#									#
-#########################################################################
-
-	global		fsneg
-fsneg:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl precision
-	bra.b		fneg
-
-	global		fdneg
-fdneg:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&d_mode*0x10,%d0	# insert dbl prec
-
-	global		fneg
-fneg:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-	mov.b		STAG(%a6),%d1
-	bne.w		fneg_not_norm		# optimize on non-norm input
-
-#
-# NEGATE SIGN : norms and denorms ONLY!
-#
-fneg_norm:
-	andi.b		&0xc0,%d0		# is precision extended?
-	bne.w		fneg_not_ext		# no; go handle sgl or dbl
-
-#
-# precision selected is extended. so...we can not get an underflow
-# or overflow because of rounding to the correct precision. so...
-# skip the scaling and unscaling...
-#
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	mov.w		SRC_EX(%a0),%d0
-	eori.w		&0x8000,%d0		# negate sign
-	bpl.b		fneg_norm_load		# sign is positive
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
-fneg_norm_load:
-	mov.w		%d0,FP_SCR0_EX(%a6)
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-#
-# for an extended precision DENORM, the UNFL exception bit is set
-# the accrued bit is NOT set in this instance(no inexactness!)
-#
-fneg_denorm:
-	andi.b		&0xc0,%d0		# is precision extended?
-	bne.b		fneg_not_ext		# no; go handle sgl or dbl
-
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	mov.w		SRC_EX(%a0),%d0
-	eori.w		&0x8000,%d0		# negate sign
-	bpl.b		fneg_denorm_done	# no
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# yes, set 'N' ccode bit
-fneg_denorm_done:
-	mov.w		%d0,FP_SCR0_EX(%a6)
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-
-	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
-	bne.b		fneg_ext_unfl_ena	# yes
-	rts
-
-#
-# the input is an extended DENORM and underflow is enabled in the FPCR.
-# normalize the mantissa and add the bias of 0x6000 to the resulting negative
-# exponent and insert back into the operand.
-#
-fneg_ext_unfl_ena:
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
-	bsr.l		norm			# normalize result
-	neg.w		%d0			# new exponent = -(shft val)
-	addi.w		&0x6000,%d0		# add new bias to exponent
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
-	andi.w		&0x8000,%d1		# keep old sign
-	andi.w		&0x7fff,%d0		# clear sign position
-	or.w		%d1,%d0			# concat old sign, new exponent
-	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	rts
-
-#
-# operand is either single or double
-#
-fneg_not_ext:
-	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
-	bne.b		fneg_dbl
-
-#
-# operand is to be rounded to single precision
-#
-fneg_sgl:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	bsr.l		scale_to_zero_src	# calculate scale factor
-
-	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
-	bge.w		fneg_sd_unfl		# yes; go handle underflow
-	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
-	beq.w		fneg_sd_may_ovfl	# maybe; go check
-	blt.w		fneg_sd_ovfl		# yes; go handle overflow
-
-#
-# operand will NOT overflow or underflow when moved in to the fp reg file
-#
-fneg_sd_normal:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
-
-	fmov.l		%fpsr,%d1		# save FPSR
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fneg_sd_normal_exit:
-	mov.l		%d2,-(%sp)		# save d2
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
-	mov.w		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# add scale factor
-	andi.w		&0x8000,%d2		# keep old sign
-	or.w		%d1,%d2			# concat old sign,new exp
-	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-#
-# operand is to be rounded to double precision
-#
-fneg_dbl:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	bsr.l		scale_to_zero_src	# calculate scale factor
-
-	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
-	bge.b		fneg_sd_unfl		# yes; go handle underflow
-	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
-	beq.w		fneg_sd_may_ovfl	# maybe; go check
-	blt.w		fneg_sd_ovfl		# yes; go handle overflow
-	bra.w		fneg_sd_normal		# no; ho handle normalized op
-
-#
-# operand WILL underflow when moved in to the fp register file
-#
-fneg_sd_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	eori.b		&0x80,FP_SCR0_EX(%a6)	# negate sign
-	bpl.b		fneg_sd_unfl_tst
-	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
-
-# if underflow or inexact is enabled, go calculate EXOP first.
-fneg_sd_unfl_tst:
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fneg_sd_unfl_ena	# yes
-
-fneg_sd_unfl_dis:
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# operand will underflow AND underflow is enabled.
-# Therefore, we must return the result rounded to extended precision.
-#
-fneg_sd_unfl_ena:
-	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
-	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
-	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# subtract scale factor
-	addi.l		&0x6000,%d1		# add new bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat new sign,new exp
-	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
-	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
-	mov.l		(%sp)+,%d2		# restore d2
-	bra.b		fneg_sd_unfl_dis
-
-#
-# operand WILL overflow.
-#
-fneg_sd_ovfl:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# save FPSR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fneg_sd_ovfl_tst:
-	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fneg_sd_ovfl_ena	# yes
-
-#
-# OVFL is not enabled; therefore, we must create the default result by
-# calling ovf_res().
-#
-fneg_sd_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	rts
-
-#
-# OVFL is enabled.
-# the INEX2 bit has already been updated by the round to the correct precision.
-# now, round to extended(and don't alter the FPSR).
-#
-fneg_sd_ovfl_ena:
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	subi.l		&0x6000,%d1		# subtract bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat sign,exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	mov.l		(%sp)+,%d2		# restore d2
-	bra.b		fneg_sd_ovfl_dis
-
-#
-# the move in MAY underflow. so...
-#
-fneg_sd_may_ovfl:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
-	fbge.w		fneg_sd_ovfl_tst	# yes; overflow has occurred
-
-# no, it didn't overflow; we have correct result
-	bra.w		fneg_sd_normal_exit
-
-##########################################################################
-
-#
-# input is not normalized; what is it?
-#
-fneg_not_norm:
-	cmpi.b		%d1,&DENORM		# weed out DENORM
-	beq.w		fneg_denorm
-	cmpi.b		%d1,&SNAN		# weed out SNAN
-	beq.l		res_snan_1op
-	cmpi.b		%d1,&QNAN		# weed out QNAN
-	beq.l		res_qnan_1op
-
-#
-# do the fneg; at this point, only possible ops are ZERO and INF.
-# use fneg to determine ccodes.
-# prec:mode should be zero at this point but it won't affect answer anyways.
-#
-	fneg.x		SRC_EX(%a0),%fp0	# do fneg
-	fmov.l		%fpsr,%d0
-	rol.l		&0x8,%d0		# put ccodes in lo byte
-	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	ftst(): emulates the ftest instruction				#
-#									#
-# XREF ****************************************************************	#
-#	res{s,q}nan_1op() - set NAN result for monadic instruction	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#									#
-# OUTPUT **************************************************************	#
-#	none								#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Check the source operand tag (STAG) and set the FPCR according	#
-# to the operand type and sign.						#
-#									#
-#########################################################################
-
-	global		ftst
-ftst:
-	mov.b		STAG(%a6),%d1
-	bne.b		ftst_not_norm		# optimize on non-norm input
-
-#
-# Norm:
-#
-ftst_norm:
-	tst.b		SRC_EX(%a0)		# is operand negative?
-	bmi.b		ftst_norm_m		# yes
-	rts
-ftst_norm_m:
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
-	rts
-
-#
-# input is not normalized; what is it?
-#
-ftst_not_norm:
-	cmpi.b		%d1,&ZERO		# weed out ZERO
-	beq.b		ftst_zero
-	cmpi.b		%d1,&INF		# weed out INF
-	beq.b		ftst_inf
-	cmpi.b		%d1,&SNAN		# weed out SNAN
-	beq.l		res_snan_1op
-	cmpi.b		%d1,&QNAN		# weed out QNAN
-	beq.l		res_qnan_1op
-
-#
-# Denorm:
-#
-ftst_denorm:
-	tst.b		SRC_EX(%a0)		# is operand negative?
-	bmi.b		ftst_denorm_m		# yes
-	rts
-ftst_denorm_m:
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
-	rts
-
-#
-# Infinity:
-#
-ftst_inf:
-	tst.b		SRC_EX(%a0)		# is operand negative?
-	bmi.b		ftst_inf_m		# yes
-ftst_inf_p:
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
-	rts
-ftst_inf_m:
-	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
-	rts
-
-#
-# Zero:
-#
-ftst_zero:
-	tst.b		SRC_EX(%a0)		# is operand negative?
-	bmi.b		ftst_zero_m		# yes
-ftst_zero_p:
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
-	rts
-ftst_zero_m:
-	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fint(): emulates the fint instruction				#
-#									#
-# XREF ****************************************************************	#
-#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	d0 = round precision/mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Separate according to operand type. Unnorms don't pass through	#
-# here. For norms, load the rounding mode/prec, execute a "fint", then	#
-# store the resulting FPSR bits.					#
-#	For denorms, force the j-bit to a one and do the same as for	#
-# norms. Denorms are so low that the answer will either be a zero or a	#
-# one.									#
-#	For zeroes/infs/NANs, return the same while setting the FPSR	#
-# as appropriate.							#
-#									#
-#########################################################################
-
-	global		fint
-fint:
-	mov.b		STAG(%a6),%d1
-	bne.b		fint_not_norm		# optimize on non-norm input
-
-#
-# Norm:
-#
-fint_norm:
-	andi.b		&0x30,%d0		# set prec = ext
-
-	fmov.l		%d0,%fpcr		# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fint.x		SRC(%a0),%fp0		# execute fint
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d0		# save FPSR
-	or.l		%d0,USER_FPSR(%a6)	# set exception bits
-
-	rts
-
-#
-# input is not normalized; what is it?
-#
-fint_not_norm:
-	cmpi.b		%d1,&ZERO		# weed out ZERO
-	beq.b		fint_zero
-	cmpi.b		%d1,&INF		# weed out INF
-	beq.b		fint_inf
-	cmpi.b		%d1,&DENORM		# weed out DENORM
-	beq.b		fint_denorm
-	cmpi.b		%d1,&SNAN		# weed out SNAN
-	beq.l		res_snan_1op
-	bra.l		res_qnan_1op		# weed out QNAN
-
-#
-# Denorm:
-#
-# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
-# also, the INEX2 and AINEX exception bits will be set.
-# so, we could either set these manually or force the DENORM
-# to a very small NORM and ship it to the NORM routine.
-# I do the latter.
-#
-fint_denorm:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
-	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
-	lea		FP_SCR0(%a6),%a0
-	bra.b		fint_norm
-
-#
-# Zero:
-#
-fint_zero:
-	tst.b		SRC_EX(%a0)		# is ZERO negative?
-	bmi.b		fint_zero_m		# yes
-fint_zero_p:
-	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
-	rts
-fint_zero_m:
-	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
-	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
-	rts
-
-#
-# Infinity:
-#
-fint_inf:
-	fmovm.x		SRC(%a0),&0x80		# return result in fp0
-	tst.b		SRC_EX(%a0)		# is INF negative?
-	bmi.b		fint_inf_m		# yes
-fint_inf_p:
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
-	rts
-fint_inf_m:
-	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fintrz(): emulates the fintrz instruction			#
-#									#
-# XREF ****************************************************************	#
-#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	d0 = round precision/mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Separate according to operand type. Unnorms don't pass through	#
-# here. For norms, load the rounding mode/prec, execute a "fintrz",	#
-# then store the resulting FPSR bits.					#
-#	For denorms, force the j-bit to a one and do the same as for	#
-# norms. Denorms are so low that the answer will either be a zero or a	#
-# one.									#
-#	For zeroes/infs/NANs, return the same while setting the FPSR	#
-# as appropriate.							#
-#									#
-#########################################################################
-
-	global		fintrz
-fintrz:
-	mov.b		STAG(%a6),%d1
-	bne.b		fintrz_not_norm		# optimize on non-norm input
-
-#
-# Norm:
-#
-fintrz_norm:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fintrz.x	SRC(%a0),%fp0		# execute fintrz
-
-	fmov.l		%fpsr,%d0		# save FPSR
-	or.l		%d0,USER_FPSR(%a6)	# set exception bits
-
-	rts
-
-#
-# input is not normalized; what is it?
-#
-fintrz_not_norm:
-	cmpi.b		%d1,&ZERO		# weed out ZERO
-	beq.b		fintrz_zero
-	cmpi.b		%d1,&INF		# weed out INF
-	beq.b		fintrz_inf
-	cmpi.b		%d1,&DENORM		# weed out DENORM
-	beq.b		fintrz_denorm
-	cmpi.b		%d1,&SNAN		# weed out SNAN
-	beq.l		res_snan_1op
-	bra.l		res_qnan_1op		# weed out QNAN
-
-#
-# Denorm:
-#
-# for DENORMs, the result will be (+/-)ZERO.
-# also, the INEX2 and AINEX exception bits will be set.
-# so, we could either set these manually or force the DENORM
-# to a very small NORM and ship it to the NORM routine.
-# I do the latter.
-#
-fintrz_denorm:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
-	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
-	lea		FP_SCR0(%a6),%a0
-	bra.b		fintrz_norm
-
-#
-# Zero:
-#
-fintrz_zero:
-	tst.b		SRC_EX(%a0)		# is ZERO negative?
-	bmi.b		fintrz_zero_m		# yes
-fintrz_zero_p:
-	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
-	rts
-fintrz_zero_m:
-	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
-	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
-	rts
-
-#
-# Infinity:
-#
-fintrz_inf:
-	fmovm.x		SRC(%a0),&0x80		# return result in fp0
-	tst.b		SRC_EX(%a0)		# is INF negative?
-	bmi.b		fintrz_inf_m		# yes
-fintrz_inf_p:
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
-	rts
-fintrz_inf_m:
-	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fabs():  emulates the fabs instruction				#
-#	fsabs(): emulates the fsabs instruction				#
-#	fdabs(): emulates the fdabs instruction				#
-#									#
-# XREF **************************************************************** #
-#	norm() - normalize denorm mantissa to provide EXOP		#
-#	scale_to_zero_src() - make exponent. = 0; get scale factor	#
-#	unf_res() - calculate underflow result				#
-#	ovf_res() - calculate overflow result				#
-#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer to extended precision source operand		#
-#	d0 = rnd precision/mode						#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms into extended, single, and double precision.			#
-#	Simply clear sign for extended precision norm. Ext prec denorm	#
-# gets an EXOP created for it since it's an underflow.			#
-#	Double and single precision can overflow and underflow. First,	#
-# scale the operand such that the exponent is zero. Perform an "fabs"	#
-# using the correct rnd mode/prec. Check to see if the original		#
-# exponent would take an exception. If so, use unf_res() or ovf_res()	#
-# to calculate the default result. Also, create the EXOP for the	#
-# exceptional case. If no exception should occur, insert the correct	#
-# result exponent and return.						#
-#	Unnorms don't pass through here.				#
-#									#
-#########################################################################
-
-	global		fsabs
-fsabs:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl precision
-	bra.b		fabs
-
-	global		fdabs
-fdabs:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&d_mode*0x10,%d0	# insert dbl precision
-
-	global		fabs
-fabs:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-	mov.b		STAG(%a6),%d1
-	bne.w		fabs_not_norm		# optimize on non-norm input
-
-#
-# ABSOLUTE VALUE: norms and denorms ONLY!
-#
-fabs_norm:
-	andi.b		&0xc0,%d0		# is precision extended?
-	bne.b		fabs_not_ext		# no; go handle sgl or dbl
-
-#
-# precision selected is extended. so...we can not get an underflow
-# or overflow because of rounding to the correct precision. so...
-# skip the scaling and unscaling...
-#
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	mov.w		SRC_EX(%a0),%d1
-	bclr		&15,%d1			# force absolute value
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert exponent
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-#
-# for an extended precision DENORM, the UNFL exception bit is set
-# the accrued bit is NOT set in this instance(no inexactness!)
-#
-fabs_denorm:
-	andi.b		&0xc0,%d0		# is precision extended?
-	bne.b		fabs_not_ext		# no
-
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	mov.w		SRC_EX(%a0),%d0
-	bclr		&15,%d0			# clear sign
-	mov.w		%d0,FP_SCR0_EX(%a6)	# insert exponent
-
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-
-	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
-	bne.b		fabs_ext_unfl_ena
-	rts
-
-#
-# the input is an extended DENORM and underflow is enabled in the FPCR.
-# normalize the mantissa and add the bias of 0x6000 to the resulting negative
-# exponent and insert back into the operand.
-#
-fabs_ext_unfl_ena:
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
-	bsr.l		norm			# normalize result
-	neg.w		%d0			# new exponent = -(shft val)
-	addi.w		&0x6000,%d0		# add new bias to exponent
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
-	andi.w		&0x8000,%d1		# keep old sign
-	andi.w		&0x7fff,%d0		# clear sign position
-	or.w		%d1,%d0			# concat old sign, new exponent
-	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	rts
-
-#
-# operand is either single or double
-#
-fabs_not_ext:
-	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
-	bne.b		fabs_dbl
-
-#
-# operand is to be rounded to single precision
-#
-fabs_sgl:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	bsr.l		scale_to_zero_src	# calculate scale factor
-
-	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
-	bge.w		fabs_sd_unfl		# yes; go handle underflow
-	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
-	beq.w		fabs_sd_may_ovfl	# maybe; go check
-	blt.w		fabs_sd_ovfl		# yes; go handle overflow
-
-#
-# operand will NOT overflow or underflow when moved in to the fp reg file
-#
-fabs_sd_normal:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
-
-	fmov.l		%fpsr,%d1		# save FPSR
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fabs_sd_normal_exit:
-	mov.l		%d2,-(%sp)		# save d2
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# add scale factor
-	andi.w		&0x8000,%d2		# keep old sign
-	or.w		%d1,%d2			# concat old sign,new exp
-	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-#
-# operand is to be rounded to double precision
-#
-fabs_dbl:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	bsr.l		scale_to_zero_src	# calculate scale factor
-
-	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
-	bge.b		fabs_sd_unfl		# yes; go handle underflow
-	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
-	beq.w		fabs_sd_may_ovfl	# maybe; go check
-	blt.w		fabs_sd_ovfl		# yes; go handle overflow
-	bra.w		fabs_sd_normal		# no; ho handle normalized op
-
-#
-# operand WILL underflow when moved in to the fp register file
-#
-fabs_sd_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	bclr		&0x7,FP_SCR0_EX(%a6)	# force absolute value
-
-# if underflow or inexact is enabled, go calculate EXOP first.
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fabs_sd_unfl_ena	# yes
-
-fabs_sd_unfl_dis:
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# operand will underflow AND underflow is enabled.
-# Therefore, we must return the result rounded to extended precision.
-#
-fabs_sd_unfl_ena:
-	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
-	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
-	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# subtract scale factor
-	addi.l		&0x6000,%d1		# add new bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat new sign,new exp
-	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
-	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
-	mov.l		(%sp)+,%d2		# restore d2
-	bra.b		fabs_sd_unfl_dis
-
-#
-# operand WILL overflow.
-#
-fabs_sd_ovfl:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# save FPSR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fabs_sd_ovfl_tst:
-	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fabs_sd_ovfl_ena	# yes
-
-#
-# OVFL is not enabled; therefore, we must create the default result by
-# calling ovf_res().
-#
-fabs_sd_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	rts
-
-#
-# OVFL is enabled.
-# the INEX2 bit has already been updated by the round to the correct precision.
-# now, round to extended(and don't alter the FPSR).
-#
-fabs_sd_ovfl_ena:
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	subi.l		&0x6000,%d1		# subtract bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat sign,exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	mov.l		(%sp)+,%d2		# restore d2
-	bra.b		fabs_sd_ovfl_dis
-
-#
-# the move in MAY underflow. so...
-#
-fabs_sd_may_ovfl:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
-	fbge.w		fabs_sd_ovfl_tst	# yes; overflow has occurred
-
-# no, it didn't overflow; we have correct result
-	bra.w		fabs_sd_normal_exit
-
-##########################################################################
-
-#
-# input is not normalized; what is it?
-#
-fabs_not_norm:
-	cmpi.b		%d1,&DENORM		# weed out DENORM
-	beq.w		fabs_denorm
-	cmpi.b		%d1,&SNAN		# weed out SNAN
-	beq.l		res_snan_1op
-	cmpi.b		%d1,&QNAN		# weed out QNAN
-	beq.l		res_qnan_1op
-
-	fabs.x		SRC(%a0),%fp0		# force absolute value
-
-	cmpi.b		%d1,&INF		# weed out INF
-	beq.b		fabs_inf
-fabs_zero:
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
-	rts
-fabs_inf:
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fcmp(): fp compare op routine					#
-#									#
-# XREF ****************************************************************	#
-#	res_qnan() - return QNAN result					#
-#	res_snan() - return SNAN result					#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	a1 = pointer to extended precision destination operand		#
-#	d0 = round prec/mode						#
-#									#
-# OUTPUT ************************************************************** #
-#	None								#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs and denorms as special cases. For everything else,	#
-# just use the actual fcmp instruction to produce the correct condition	#
-# codes.								#
-#									#
-#########################################################################
-
-	global		fcmp
-fcmp:
-	clr.w		%d1
-	mov.b		DTAG(%a6),%d1
-	lsl.b		&0x3,%d1
-	or.b		STAG(%a6),%d1
-	bne.b		fcmp_not_norm		# optimize on non-norm input
-
-#
-# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
-#
-fcmp_norm:
-	fmovm.x		DST(%a1),&0x80		# load dst op
-
-	fcmp.x		%fp0,SRC(%a0)		# do compare
-
-	fmov.l		%fpsr,%d0		# save FPSR
-	rol.l		&0x8,%d0		# extract ccode bits
-	mov.b		%d0,FPSR_CC(%a6)	# set ccode bits(no exc bits are set)
-
-	rts
-
-#
-# fcmp: inputs are not both normalized; what are they?
-#
-fcmp_not_norm:
-	mov.w		(tbl_fcmp_op.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_fcmp_op.b,%pc,%d1.w*1)
-
-	swbeg		&48
-tbl_fcmp_op:
-	short		fcmp_norm	- tbl_fcmp_op # NORM - NORM
-	short		fcmp_norm	- tbl_fcmp_op # NORM - ZERO
-	short		fcmp_norm	- tbl_fcmp_op # NORM - INF
-	short		fcmp_res_qnan	- tbl_fcmp_op # NORM - QNAN
-	short		fcmp_nrm_dnrm	- tbl_fcmp_op # NORM - DENORM
-	short		fcmp_res_snan	- tbl_fcmp_op # NORM - SNAN
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-
-	short		fcmp_norm	- tbl_fcmp_op # ZERO - NORM
-	short		fcmp_norm	- tbl_fcmp_op # ZERO - ZERO
-	short		fcmp_norm	- tbl_fcmp_op # ZERO - INF
-	short		fcmp_res_qnan	- tbl_fcmp_op # ZERO - QNAN
-	short		fcmp_dnrm_s	- tbl_fcmp_op # ZERO - DENORM
-	short		fcmp_res_snan	- tbl_fcmp_op # ZERO - SNAN
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-
-	short		fcmp_norm	- tbl_fcmp_op # INF - NORM
-	short		fcmp_norm	- tbl_fcmp_op # INF - ZERO
-	short		fcmp_norm	- tbl_fcmp_op # INF - INF
-	short		fcmp_res_qnan	- tbl_fcmp_op # INF - QNAN
-	short		fcmp_dnrm_s	- tbl_fcmp_op # INF - DENORM
-	short		fcmp_res_snan	- tbl_fcmp_op # INF - SNAN
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-
-	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - NORM
-	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - ZERO
-	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - INF
-	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - QNAN
-	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - DENORM
-	short		fcmp_res_snan	- tbl_fcmp_op # QNAN - SNAN
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-
-	short		fcmp_dnrm_nrm	- tbl_fcmp_op # DENORM - NORM
-	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - ZERO
-	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - INF
-	short		fcmp_res_qnan	- tbl_fcmp_op # DENORM - QNAN
-	short		fcmp_dnrm_sd	- tbl_fcmp_op # DENORM - DENORM
-	short		fcmp_res_snan	- tbl_fcmp_op # DENORM - SNAN
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-
-	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - NORM
-	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - ZERO
-	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - INF
-	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - QNAN
-	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - DENORM
-	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - SNAN
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-
-# unlike all other functions for QNAN and SNAN, fcmp does NOT set the
-# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
-fcmp_res_qnan:
-	bsr.l		res_qnan
-	andi.b		&0xf7,FPSR_CC(%a6)
-	rts
-fcmp_res_snan:
-	bsr.l		res_snan
-	andi.b		&0xf7,FPSR_CC(%a6)
-	rts
-
-#
-# DENORMs are a little more difficult.
-# If you have a 2 DENORMs, then you can just force the j-bit to a one
-# and use the fcmp_norm routine.
-# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
-# and use the fcmp_norm routine.
-# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
-# But with a DENORM and a NORM of the same sign, the neg bit is set if the
-# (1) signs are (+) and the DENORM is the dst or
-# (2) signs are (-) and the DENORM is the src
-#
-
-fcmp_dnrm_s:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),%d0
-	bset		&31,%d0			# DENORM src; make into small norm
-	mov.l		%d0,FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	lea		FP_SCR0(%a6),%a0
-	bra.w		fcmp_norm
-
-fcmp_dnrm_d:
-	mov.l		DST_EX(%a1),FP_SCR0_EX(%a6)
-	mov.l		DST_HI(%a1),%d0
-	bset		&31,%d0			# DENORM src; make into small norm
-	mov.l		%d0,FP_SCR0_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
-	lea		FP_SCR0(%a6),%a1
-	bra.w		fcmp_norm
-
-fcmp_dnrm_sd:
-	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		DST_HI(%a1),%d0
-	bset		&31,%d0			# DENORM dst; make into small norm
-	mov.l		%d0,FP_SCR1_HI(%a6)
-	mov.l		SRC_HI(%a0),%d0
-	bset		&31,%d0			# DENORM dst; make into small norm
-	mov.l		%d0,FP_SCR0_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	lea		FP_SCR1(%a6),%a1
-	lea		FP_SCR0(%a6),%a0
-	bra.w		fcmp_norm
-
-fcmp_nrm_dnrm:
-	mov.b		SRC_EX(%a0),%d0		# determine if like signs
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d0,%d1
-	bmi.w		fcmp_dnrm_s
-
-# signs are the same, so must determine the answer ourselves.
-	tst.b		%d0			# is src op negative?
-	bmi.b		fcmp_nrm_dnrm_m		# yes
-	rts
-fcmp_nrm_dnrm_m:
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
-	rts
-
-fcmp_dnrm_nrm:
-	mov.b		SRC_EX(%a0),%d0		# determine if like signs
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d0,%d1
-	bmi.w		fcmp_dnrm_d
-
-# signs are the same, so must determine the answer ourselves.
-	tst.b		%d0			# is src op negative?
-	bpl.b		fcmp_dnrm_nrm_m		# no
-	rts
-fcmp_dnrm_nrm_m:
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fsglmul(): emulates the fsglmul instruction			#
-#									#
-# XREF ****************************************************************	#
-#	scale_to_zero_src() - scale src exponent to zero		#
-#	scale_to_zero_dst() - scale dst exponent to zero		#
-#	unf_res4() - return default underflow result for sglop		#
-#	ovf_res() - return default overflow result			#
-#	res_qnan() - return QNAN result					#
-#	res_snan() - return SNAN result					#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	a1 = pointer to extended precision destination operand		#
-#	d0  rnd prec,mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms/denorms into ext/sgl/dbl precision.				#
-#	For norms/denorms, scale the exponents such that a multiply	#
-# instruction won't cause an exception. Use the regular fsglmul to	#
-# compute a result. Check if the regular operands would have taken	#
-# an exception. If so, return the default overflow/underflow result	#
-# and return the EXOP if exceptions are enabled. Else, scale the	#
-# result operand to the proper exponent.				#
-#									#
-#########################################################################
-
-	global		fsglmul
-fsglmul:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-
-	clr.w		%d1
-	mov.b		DTAG(%a6),%d1
-	lsl.b		&0x3,%d1
-	or.b		STAG(%a6),%d1
-
-	bne.w		fsglmul_not_norm	# optimize on non-norm input
-
-fsglmul_norm:
-	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
-	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	bsr.l		scale_to_zero_src	# scale exponent
-	mov.l		%d0,-(%sp)		# save scale factor 1
-
-	bsr.l		scale_to_zero_dst	# scale dst exponent
-
-	add.l		(%sp)+,%d0		# SCALE_FACTOR = scale1 + scale2
-
-	cmpi.l		%d0,&0x3fff-0x7ffe	# would result ovfl?
-	beq.w		fsglmul_may_ovfl	# result may rnd to overflow
-	blt.w		fsglmul_ovfl		# result will overflow
-
-	cmpi.l		%d0,&0x3fff+0x0001	# would result unfl?
-	beq.w		fsglmul_may_unfl	# result may rnd to no unfl
-	bgt.w		fsglmul_unfl		# result will underflow
-
-fsglmul_normal:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fsglmul_normal_exit:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-fsglmul_ovfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fsglmul_ovfl_tst:
-
-# save setting this until now because this is where fsglmul_may_ovfl may jump in
-	or.l		&ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fsglmul_ovfl_ena	# yes
-
-fsglmul_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
-	andi.b		&0x30,%d0		# force prec = ext
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	rts
-
-fsglmul_ovfl_ena:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# add scale factor
-	subi.l		&0x6000,%d1		# subtract bias
-	andi.w		&0x7fff,%d1
-	andi.w		&0x8000,%d2		# keep old sign
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.b		fsglmul_ovfl_dis
-
-fsglmul_may_ovfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
-	fbge.w		fsglmul_ovfl_tst	# yes; overflow has occurred
-
-# no, it didn't overflow; we have correct result
-	bra.w		fsglmul_normal_exit
-
-fsglmul_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fsglmul_unfl_ena	# yes
-
-fsglmul_unfl_dis:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res4		# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# UNFL is enabled.
-#
-fsglmul_unfl_ena:
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	addi.l		&0x6000,%d1		# add bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.w		fsglmul_unfl_dis
-
-fsglmul_may_unfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
-	fbgt.w		fsglmul_normal_exit	# no; no underflow occurred
-	fblt.w		fsglmul_unfl		# yes; underflow occurred
-
-#
-# we still don't know if underflow occurred. result is ~ equal to 2. but,
-# we don't know if the result was an underflow that rounded up to a 2 or
-# a normalized number that rounded down to a 2. so, redo the entire operation
-# using RZ as the rounding mode to see what the pre-rounded result is.
-# this case should be relatively rare.
-#
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# keep rnd prec
-	ori.b		&rz_mode*0x10,%d1	# insert RZ
-
-	fmov.l		%d1,%fpcr		# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fabs.x		%fp1			# make absolute value
-	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
-	fbge.w		fsglmul_normal_exit	# no; no underflow occurred
-	bra.w		fsglmul_unfl		# yes, underflow occurred
-
-##############################################################################
-
-#
-# Single Precision Multiply: inputs are not both normalized; what are they?
-#
-fsglmul_not_norm:
-	mov.w		(tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_fsglmul_op.b,%pc,%d1.w*1)
-
-	swbeg		&48
-tbl_fsglmul_op:
-	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
-	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
-	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
-	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
-	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-
-	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x NORM
-	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x ZERO
-	short		fsglmul_res_operr	- tbl_fsglmul_op # ZERO x INF
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # ZERO x QNAN
-	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x DENORM
-	short		fsglmul_res_snan	- tbl_fsglmul_op # ZERO x SNAN
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-
-	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x NORM
-	short		fsglmul_res_operr	- tbl_fsglmul_op # INF x ZERO
-	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x INF
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # INF x QNAN
-	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x DENORM
-	short		fsglmul_res_snan	- tbl_fsglmul_op # INF x SNAN
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x NORM
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x ZERO
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x INF
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x QNAN
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x DENORM
-	short		fsglmul_res_snan	- tbl_fsglmul_op # QNAN x SNAN
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-
-	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
-	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
-	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
-	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
-	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-
-	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x NORM
-	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x ZERO
-	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x INF
-	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x QNAN
-	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x DENORM
-	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x SNAN
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-
-fsglmul_res_operr:
-	bra.l		res_operr
-fsglmul_res_snan:
-	bra.l		res_snan
-fsglmul_res_qnan:
-	bra.l		res_qnan
-fsglmul_zero:
-	bra.l		fmul_zero
-fsglmul_inf_src:
-	bra.l		fmul_inf_src
-fsglmul_inf_dst:
-	bra.l		fmul_inf_dst
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fsgldiv(): emulates the fsgldiv instruction			#
-#									#
-# XREF ****************************************************************	#
-#	scale_to_zero_src() - scale src exponent to zero		#
-#	scale_to_zero_dst() - scale dst exponent to zero		#
-#	unf_res4() - return default underflow result for sglop		#
-#	ovf_res() - return default overflow result			#
-#	res_qnan() - return QNAN result					#
-#	res_snan() - return SNAN result					#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	a1 = pointer to extended precision destination operand		#
-#	d0  rnd prec,mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms/denorms into ext/sgl/dbl precision.				#
-#	For norms/denorms, scale the exponents such that a divide	#
-# instruction won't cause an exception. Use the regular fsgldiv to	#
-# compute a result. Check if the regular operands would have taken	#
-# an exception. If so, return the default overflow/underflow result	#
-# and return the EXOP if exceptions are enabled. Else, scale the	#
-# result operand to the proper exponent.				#
-#									#
-#########################################################################
-
-	global		fsgldiv
-fsgldiv:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-
-	clr.w		%d1
-	mov.b		DTAG(%a6),%d1
-	lsl.b		&0x3,%d1
-	or.b		STAG(%a6),%d1		# combine src tags
-
-	bne.w		fsgldiv_not_norm	# optimize on non-norm input
-
-#
-# DIVIDE: NORMs and DENORMs ONLY!
-#
-fsgldiv_norm:
-	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
-	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	bsr.l		scale_to_zero_src	# calculate scale factor 1
-	mov.l		%d0,-(%sp)		# save scale factor 1
-
-	bsr.l		scale_to_zero_dst	# calculate scale factor 2
-
-	neg.l		(%sp)			# S.F. = scale1 - scale2
-	add.l		%d0,(%sp)
-
-	mov.w		2+L_SCR3(%a6),%d1	# fetch precision,mode
-	lsr.b		&0x6,%d1
-	mov.l		(%sp)+,%d0
-	cmpi.l		%d0,&0x3fff-0x7ffe
-	ble.w		fsgldiv_may_ovfl
-
-	cmpi.l		%d0,&0x3fff-0x0000	# will result underflow?
-	beq.w		fsgldiv_may_unfl	# maybe
-	bgt.w		fsgldiv_unfl		# yes; go handle underflow
-
-fsgldiv_normal:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsgldiv.x	FP_SCR0(%a6),%fp0	# perform sgl divide
-
-	fmov.l		%fpsr,%d1		# save FPSR
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fsgldiv_normal_exit:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-fsgldiv_may_ovfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# set FPSR
-
-	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute divide
-
-	fmov.l		%fpsr,%d1
-	fmov.l		&0x0,%fpcr
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
-
-	fmovm.x		&0x01,-(%sp)		# save result to stack
-	mov.w		(%sp),%d1		# fetch new exponent
-	add.l		&0xc,%sp		# clear result
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# add scale factor
-	cmp.l		%d1,&0x7fff		# did divide overflow?
-	blt.b		fsgldiv_normal_exit
-
-fsgldiv_ovfl_tst:
-	or.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fsgldiv_ovfl_ena	# yes
-
-fsgldiv_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
-	andi.b		&0x30,%d0		# kill precision
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	rts
-
-fsgldiv_ovfl_ena:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	subi.l		&0x6000,%d1		# subtract new bias
-	andi.w		&0x7fff,%d1		# clear ms bit
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.b		fsgldiv_ovfl_dis
-
-fsgldiv_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fsgldiv_unfl_ena	# yes
-
-fsgldiv_unfl_dis:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res4		# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# UNFL is enabled.
-#
-fsgldiv_unfl_ena:
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	addi.l		&0x6000,%d1		# add bias
-	andi.w		&0x7fff,%d1		# clear top bit
-	or.w		%d2,%d1			# concat old sign, new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.b		fsgldiv_unfl_dis
-
-#
-# the divide operation MAY underflow:
-#
-fsgldiv_may_unfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
-	fbgt.w		fsgldiv_normal_exit	# no; no underflow occurred
-	fblt.w		fsgldiv_unfl		# yes; underflow occurred
-
-#
-# we still don't know if underflow occurred. result is ~ equal to 1. but,
-# we don't know if the result was an underflow that rounded up to a 1
-# or a normalized number that rounded down to a 1. so, redo the entire
-# operation using RZ as the rounding mode to see what the pre-rounded
-# result is. this case should be relatively rare.
-#
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into %fp1
-
-	clr.l		%d1			# clear scratch register
-	ori.b		&rz_mode*0x10,%d1	# force RZ rnd mode
-
-	fmov.l		%d1,%fpcr		# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fabs.x		%fp1			# make absolute value
-	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
-	fbge.w		fsgldiv_normal_exit	# no; no underflow occurred
-	bra.w		fsgldiv_unfl		# yes; underflow occurred
-
-############################################################################
-
-#
-# Divide: inputs are not both normalized; what are they?
-#
-fsgldiv_not_norm:
-	mov.w		(tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_fsgldiv_op.b,%pc,%d1.w*1)
-
-	swbeg		&48
-tbl_fsgldiv_op:
-	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / NORM
-	short		fsgldiv_inf_load	- tbl_fsgldiv_op # NORM / ZERO
-	short		fsgldiv_zero_load	- tbl_fsgldiv_op # NORM / INF
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # NORM / QNAN
-	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / DENORM
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # NORM / SNAN
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-
-	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / NORM
-	short		fsgldiv_res_operr	- tbl_fsgldiv_op # ZERO / ZERO
-	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / INF
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # ZERO / QNAN
-	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / DENORM
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # ZERO / SNAN
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-
-	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / NORM
-	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / ZERO
-	short		fsgldiv_res_operr	- tbl_fsgldiv_op # INF / INF
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # INF / QNAN
-	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / DENORM
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # INF / SNAN
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / NORM
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / ZERO
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / INF
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / QNAN
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / DENORM
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # QNAN / SNAN
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-
-	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / NORM
-	short		fsgldiv_inf_load	- tbl_fsgldiv_op # DENORM / ZERO
-	short		fsgldiv_zero_load	- tbl_fsgldiv_op # DENORM / INF
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # DENORM / QNAN
-	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / DENORM
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # DENORM / SNAN
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / NORM
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / ZERO
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / INF
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / QNAN
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / DENORM
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / SNAN
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-
-fsgldiv_res_qnan:
-	bra.l		res_qnan
-fsgldiv_res_snan:
-	bra.l		res_snan
-fsgldiv_res_operr:
-	bra.l		res_operr
-fsgldiv_inf_load:
-	bra.l		fdiv_inf_load
-fsgldiv_zero_load:
-	bra.l		fdiv_zero_load
-fsgldiv_inf_dst:
-	bra.l		fdiv_inf_dst
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fadd(): emulates the fadd instruction				#
-#	fsadd(): emulates the fadd instruction				#
-#	fdadd(): emulates the fdadd instruction				#
-#									#
-# XREF ****************************************************************	#
-#	addsub_scaler2() - scale the operands so they won't take exc	#
-#	ovf_res() - return default overflow result			#
-#	unf_res() - return default underflow result			#
-#	res_qnan() - set QNAN result					#
-#	res_snan() - set SNAN result					#
-#	res_operr() - set OPERR result					#
-#	scale_to_zero_src() - set src operand exponent equal to zero	#
-#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	a1 = pointer to extended precision destination operand		#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms into extended, single, and double precision.			#
-#	Do addition after scaling exponents such that exception won't	#
-# occur. Then, check result exponent to see if exception would have	#
-# occurred. If so, return default result and maybe EXOP. Else, insert	#
-# the correct result exponent and return. Set FPSR bits as appropriate.	#
-#									#
-#########################################################################
-
-	global		fsadd
-fsadd:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl prec
-	bra.b		fadd
-
-	global		fdadd
-fdadd:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&d_mode*0x10,%d0	# insert dbl prec
-
-	global		fadd
-fadd:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-
-	clr.w		%d1
-	mov.b		DTAG(%a6),%d1
-	lsl.b		&0x3,%d1
-	or.b		STAG(%a6),%d1		# combine src tags
-
-	bne.w		fadd_not_norm		# optimize on non-norm input
-
-#
-# ADD: norms and denorms
-#
-fadd_norm:
-	bsr.l		addsub_scaler2		# scale exponents
-
-fadd_zero_entry:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fadd.x		FP_SCR0(%a6),%fp0	# execute add
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# fetch INEX2,N,Z
-
-	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
-
-	fbeq.w		fadd_zero_exit		# if result is zero, end now
-
-	mov.l		%d2,-(%sp)		# save d2
-
-	fmovm.x		&0x01,-(%sp)		# save result to stack
-
-	mov.w		2+L_SCR3(%a6),%d1
-	lsr.b		&0x6,%d1
-
-	mov.w		(%sp),%d2		# fetch new sign, exp
-	andi.l		&0x7fff,%d2		# strip sign
-	sub.l		%d0,%d2			# add scale factor
-
-	cmp.l		%d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
-	bge.b		fadd_ovfl		# yes
-
-	cmp.l		%d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
-	blt.w		fadd_unfl		# yes
-	beq.w		fadd_may_unfl		# maybe; go find out
-
-fadd_normal:
-	mov.w		(%sp),%d1
-	andi.w		&0x8000,%d1		# keep sign
-	or.w		%d2,%d1			# concat sign,new exp
-	mov.w		%d1,(%sp)		# insert new exponent
-
-	fmovm.x		(%sp)+,&0x80		# return result in fp0
-
-	mov.l		(%sp)+,%d2		# restore d2
-	rts
-
-fadd_zero_exit:
-#	fmov.s		&0x00000000,%fp0	# return zero in fp0
-	rts
-
-tbl_fadd_ovfl:
-	long		0x7fff			# ext ovfl
-	long		0x407f			# sgl ovfl
-	long		0x43ff			# dbl ovfl
-
-tbl_fadd_unfl:
-	long	        0x0000			# ext unfl
-	long		0x3f81			# sgl unfl
-	long		0x3c01			# dbl unfl
-
-fadd_ovfl:
-	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fadd_ovfl_ena		# yes
-
-	add.l		&0xc,%sp
-fadd_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	mov.l		(%sp)+,%d2		# restore d2
-	rts
-
-fadd_ovfl_ena:
-	mov.b		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# is precision extended?
-	bne.b		fadd_ovfl_ena_sd	# no; prec = sgl or dbl
-
-fadd_ovfl_ena_cont:
-	mov.w		(%sp),%d1
-	andi.w		&0x8000,%d1		# keep sign
-	subi.l		&0x6000,%d2		# add extra bias
-	andi.w		&0x7fff,%d2
-	or.w		%d2,%d1			# concat sign,new exp
-	mov.w		%d1,(%sp)		# insert new exponent
-
-	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
-	bra.b		fadd_ovfl_dis
-
-fadd_ovfl_ena_sd:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# keep rnd mode
-	fmov.l		%d1,%fpcr		# set FPCR
-
-	fadd.x		FP_SCR0(%a6),%fp0	# execute add
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	add.l		&0xc,%sp
-	fmovm.x		&0x01,-(%sp)
-	bra.b		fadd_ovfl_ena_cont
-
-fadd_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	add.l		&0xc,%sp
-
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fadd.x		FP_SCR0(%a6),%fp0	# execute add
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# save status
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fadd_unfl_ena		# yes
-
-fadd_unfl_dis:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	mov.l		(%sp)+,%d2		# restore d2
-	rts
-
-fadd_unfl_ena:
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# is precision extended?
-	bne.b		fadd_unfl_ena_sd	# no; sgl or dbl
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-fadd_unfl_ena_cont:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fadd.x		FP_SCR0(%a6),%fp1	# execute multiply
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	addi.l		&0x6000,%d1		# add new bias
-	andi.w		&0x7fff,%d1		# clear top bit
-	or.w		%d2,%d1			# concat sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.w		fadd_unfl_dis
-
-fadd_unfl_ena_sd:
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# use only rnd mode
-	fmov.l		%d1,%fpcr		# set FPCR
-
-	bra.b		fadd_unfl_ena_cont
-
-#
-# result is equal to the smallest normalized number in the selected precision
-# if the precision is extended, this result could not have come from an
-# underflow that rounded up.
-#
-fadd_may_unfl:
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1
-	beq.w		fadd_normal		# yes; no underflow occurred
-
-	mov.l		0x4(%sp),%d1		# extract hi(man)
-	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
-	bne.w		fadd_normal		# no; no underflow occurred
-
-	tst.l		0x8(%sp)		# is lo(man) = 0x0?
-	bne.w		fadd_normal		# no; no underflow occurred
-
-	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
-	beq.w		fadd_normal		# no; no underflow occurred
-
-#
-# ok, so now the result has a exponent equal to the smallest normalized
-# exponent for the selected precision. also, the mantissa is equal to
-# 0x8000000000000000 and this mantissa is the result of rounding non-zero
-# g,r,s.
-# now, we must determine whether the pre-rounded result was an underflow
-# rounded "up" or a normalized number rounded "down".
-# so, we do this be re-executing the add using RZ as the rounding mode and
-# seeing if the new result is smaller or equal to the current result.
-#
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# keep rnd prec
-	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
-	fmov.l		%d1,%fpcr		# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fadd.x		FP_SCR0(%a6),%fp1	# execute add
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fabs.x		%fp0			# compare absolute values
-	fabs.x		%fp1
-	fcmp.x		%fp0,%fp1		# is first result > second?
-
-	fbgt.w		fadd_unfl		# yes; it's an underflow
-	bra.w		fadd_normal		# no; it's not an underflow
-
-##########################################################################
-
-#
-# Add: inputs are not both normalized; what are they?
-#
-fadd_not_norm:
-	mov.w		(tbl_fadd_op.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_fadd_op.b,%pc,%d1.w*1)
-
-	swbeg		&48
-tbl_fadd_op:
-	short		fadd_norm	- tbl_fadd_op # NORM + NORM
-	short		fadd_zero_src	- tbl_fadd_op # NORM + ZERO
-	short		fadd_inf_src	- tbl_fadd_op # NORM + INF
-	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
-	short		fadd_norm	- tbl_fadd_op # NORM + DENORM
-	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
-	short		tbl_fadd_op	- tbl_fadd_op #
-	short		tbl_fadd_op	- tbl_fadd_op #
-
-	short		fadd_zero_dst	- tbl_fadd_op # ZERO + NORM
-	short		fadd_zero_2	- tbl_fadd_op # ZERO + ZERO
-	short		fadd_inf_src	- tbl_fadd_op # ZERO + INF
-	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
-	short		fadd_zero_dst	- tbl_fadd_op # ZERO + DENORM
-	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
-	short		tbl_fadd_op	- tbl_fadd_op #
-	short		tbl_fadd_op	- tbl_fadd_op #
-
-	short		fadd_inf_dst	- tbl_fadd_op # INF + NORM
-	short		fadd_inf_dst	- tbl_fadd_op # INF + ZERO
-	short		fadd_inf_2	- tbl_fadd_op # INF + INF
-	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
-	short		fadd_inf_dst	- tbl_fadd_op # INF + DENORM
-	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
-	short		tbl_fadd_op	- tbl_fadd_op #
-	short		tbl_fadd_op	- tbl_fadd_op #
-
-	short		fadd_res_qnan	- tbl_fadd_op # QNAN + NORM
-	short		fadd_res_qnan	- tbl_fadd_op # QNAN + ZERO
-	short		fadd_res_qnan	- tbl_fadd_op # QNAN + INF
-	short		fadd_res_qnan	- tbl_fadd_op # QNAN + QNAN
-	short		fadd_res_qnan	- tbl_fadd_op # QNAN + DENORM
-	short		fadd_res_snan	- tbl_fadd_op # QNAN + SNAN
-	short		tbl_fadd_op	- tbl_fadd_op #
-	short		tbl_fadd_op	- tbl_fadd_op #
-
-	short		fadd_norm	- tbl_fadd_op # DENORM + NORM
-	short		fadd_zero_src	- tbl_fadd_op # DENORM + ZERO
-	short		fadd_inf_src	- tbl_fadd_op # DENORM + INF
-	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
-	short		fadd_norm	- tbl_fadd_op # DENORM + DENORM
-	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
-	short		tbl_fadd_op	- tbl_fadd_op #
-	short		tbl_fadd_op	- tbl_fadd_op #
-
-	short		fadd_res_snan	- tbl_fadd_op # SNAN + NORM
-	short		fadd_res_snan	- tbl_fadd_op # SNAN + ZERO
-	short		fadd_res_snan	- tbl_fadd_op # SNAN + INF
-	short		fadd_res_snan	- tbl_fadd_op # SNAN + QNAN
-	short		fadd_res_snan	- tbl_fadd_op # SNAN + DENORM
-	short		fadd_res_snan	- tbl_fadd_op # SNAN + SNAN
-	short		tbl_fadd_op	- tbl_fadd_op #
-	short		tbl_fadd_op	- tbl_fadd_op #
-
-fadd_res_qnan:
-	bra.l		res_qnan
-fadd_res_snan:
-	bra.l		res_snan
-
-#
-# both operands are ZEROes
-#
-fadd_zero_2:
-	mov.b		SRC_EX(%a0),%d0		# are the signs opposite
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d0,%d1
-	bmi.w		fadd_zero_2_chk_rm	# weed out (-ZERO)+(+ZERO)
-
-# the signs are the same. so determine whether they are positive or negative
-# and return the appropriately signed zero.
-	tst.b		%d0			# are ZEROes positive or negative?
-	bmi.b		fadd_zero_rm		# negative
-	fmov.s		&0x00000000,%fp0	# return +ZERO
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
-	rts
-
-#
-# the ZEROes have opposite signs:
-# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
-# - -ZERO is returned in the case of RM.
-#
-fadd_zero_2_chk_rm:
-	mov.b		3+L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# extract rnd mode
-	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode == RM?
-	beq.b		fadd_zero_rm		# yes
-	fmov.s		&0x00000000,%fp0	# return +ZERO
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
-	rts
-
-fadd_zero_rm:
-	fmov.s		&0x80000000,%fp0	# return -ZERO
-	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
-	rts
-
-#
-# one operand is a ZERO and the other is a DENORM or NORM. scale
-# the DENORM or NORM and jump to the regular fadd routine.
-#
-fadd_zero_dst:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	bsr.l		scale_to_zero_src	# scale the operand
-	clr.w		FP_SCR1_EX(%a6)
-	clr.l		FP_SCR1_HI(%a6)
-	clr.l		FP_SCR1_LO(%a6)
-	bra.w		fadd_zero_entry		# go execute fadd
-
-fadd_zero_src:
-	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
-	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
-	bsr.l		scale_to_zero_dst	# scale the operand
-	clr.w		FP_SCR0_EX(%a6)
-	clr.l		FP_SCR0_HI(%a6)
-	clr.l		FP_SCR0_LO(%a6)
-	bra.w		fadd_zero_entry		# go execute fadd
-
-#
-# both operands are INFs. an OPERR will result if the INFs have
-# different signs. else, an INF of the same sign is returned
-#
-fadd_inf_2:
-	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d1,%d0
-	bmi.l		res_operr		# weed out (-INF)+(+INF)
-
-# ok, so it's not an OPERR. but, we do have to remember to return the
-# src INF since that's where the 881/882 gets the j-bit from...
-
-#
-# operands are INF and one of {ZERO, INF, DENORM, NORM}
-#
-fadd_inf_src:
-	fmovm.x		SRC(%a0),&0x80		# return src INF
-	tst.b		SRC_EX(%a0)		# is INF positive?
-	bpl.b		fadd_inf_done		# yes; we're done
-	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
-	rts
-
-#
-# operands are INF and one of {ZERO, INF, DENORM, NORM}
-#
-fadd_inf_dst:
-	fmovm.x		DST(%a1),&0x80		# return dst INF
-	tst.b		DST_EX(%a1)		# is INF positive?
-	bpl.b		fadd_inf_done		# yes; we're done
-	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
-	rts
-
-fadd_inf_done:
-	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fsub(): emulates the fsub instruction				#
-#	fssub(): emulates the fssub instruction				#
-#	fdsub(): emulates the fdsub instruction				#
-#									#
-# XREF ****************************************************************	#
-#	addsub_scaler2() - scale the operands so they won't take exc	#
-#	ovf_res() - return default overflow result			#
-#	unf_res() - return default underflow result			#
-#	res_qnan() - set QNAN result					#
-#	res_snan() - set SNAN result					#
-#	res_operr() - set OPERR result					#
-#	scale_to_zero_src() - set src operand exponent equal to zero	#
-#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	a1 = pointer to extended precision destination operand		#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms into extended, single, and double precision.			#
-#	Do subtraction after scaling exponents such that exception won't#
-# occur. Then, check result exponent to see if exception would have	#
-# occurred. If so, return default result and maybe EXOP. Else, insert	#
-# the correct result exponent and return. Set FPSR bits as appropriate.	#
-#									#
-#########################################################################
-
-	global		fssub
-fssub:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl prec
-	bra.b		fsub
-
-	global		fdsub
-fdsub:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&d_mode*0x10,%d0	# insert dbl prec
-
-	global		fsub
-fsub:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-
-	clr.w		%d1
-	mov.b		DTAG(%a6),%d1
-	lsl.b		&0x3,%d1
-	or.b		STAG(%a6),%d1		# combine src tags
-
-	bne.w		fsub_not_norm		# optimize on non-norm input
-
-#
-# SUB: norms and denorms
-#
-fsub_norm:
-	bsr.l		addsub_scaler2		# scale exponents
-
-fsub_zero_entry:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# fetch INEX2, N, Z
-
-	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
-
-	fbeq.w		fsub_zero_exit		# if result zero, end now
-
-	mov.l		%d2,-(%sp)		# save d2
-
-	fmovm.x		&0x01,-(%sp)		# save result to stack
-
-	mov.w		2+L_SCR3(%a6),%d1
-	lsr.b		&0x6,%d1
-
-	mov.w		(%sp),%d2		# fetch new exponent
-	andi.l		&0x7fff,%d2		# strip sign
-	sub.l		%d0,%d2			# add scale factor
-
-	cmp.l		%d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
-	bge.b		fsub_ovfl		# yes
-
-	cmp.l		%d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
-	blt.w		fsub_unfl		# yes
-	beq.w		fsub_may_unfl		# maybe; go find out
-
-fsub_normal:
-	mov.w		(%sp),%d1
-	andi.w		&0x8000,%d1		# keep sign
-	or.w		%d2,%d1			# insert new exponent
-	mov.w		%d1,(%sp)		# insert new exponent
-
-	fmovm.x		(%sp)+,&0x80		# return result in fp0
-
-	mov.l		(%sp)+,%d2		# restore d2
-	rts
-
-fsub_zero_exit:
-#	fmov.s		&0x00000000,%fp0	# return zero in fp0
-	rts
-
-tbl_fsub_ovfl:
-	long		0x7fff			# ext ovfl
-	long		0x407f			# sgl ovfl
-	long		0x43ff			# dbl ovfl
-
-tbl_fsub_unfl:
-	long	        0x0000			# ext unfl
-	long		0x3f81			# sgl unfl
-	long		0x3c01			# dbl unfl
-
-fsub_ovfl:
-	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fsub_ovfl_ena		# yes
-
-	add.l		&0xc,%sp
-fsub_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	mov.l		(%sp)+,%d2		# restore d2
-	rts
-
-fsub_ovfl_ena:
-	mov.b		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# is precision extended?
-	bne.b		fsub_ovfl_ena_sd	# no
-
-fsub_ovfl_ena_cont:
-	mov.w		(%sp),%d1		# fetch {sgn,exp}
-	andi.w		&0x8000,%d1		# keep sign
-	subi.l		&0x6000,%d2		# subtract new bias
-	andi.w		&0x7fff,%d2		# clear top bit
-	or.w		%d2,%d1			# concat sign,exp
-	mov.w		%d1,(%sp)		# insert new exponent
-
-	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
-	bra.b		fsub_ovfl_dis
-
-fsub_ovfl_ena_sd:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# clear rnd prec
-	fmov.l		%d1,%fpcr		# set FPCR
-
-	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	add.l		&0xc,%sp
-	fmovm.x		&0x01,-(%sp)
-	bra.b		fsub_ovfl_ena_cont
-
-fsub_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	add.l		&0xc,%sp
-
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# save status
-
-	or.l		%d1,USER_FPSR(%a6)
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fsub_unfl_ena		# yes
-
-fsub_unfl_dis:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	mov.l		(%sp)+,%d2		# restore d2
-	rts
-
-fsub_unfl_ena:
-	fmovm.x		FP_SCR1(%a6),&0x40
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# is precision extended?
-	bne.b		fsub_unfl_ena_sd	# no
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-fsub_unfl_ena_cont:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fmovm.x		&0x40,FP_SCR0(%a6)	# store result to stack
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	addi.l		&0x6000,%d1		# subtract new bias
-	andi.w		&0x7fff,%d1		# clear top bit
-	or.w		%d2,%d1			# concat sgn,exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.w		fsub_unfl_dis
-
-fsub_unfl_ena_sd:
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# clear rnd prec
-	fmov.l		%d1,%fpcr		# set FPCR
-
-	bra.b		fsub_unfl_ena_cont
-
-#
-# result is equal to the smallest normalized number in the selected precision
-# if the precision is extended, this result could not have come from an
-# underflow that rounded up.
-#
-fsub_may_unfl:
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# fetch rnd prec
-	beq.w		fsub_normal		# yes; no underflow occurred
-
-	mov.l		0x4(%sp),%d1
-	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
-	bne.w		fsub_normal		# no; no underflow occurred
-
-	tst.l		0x8(%sp)		# is lo(man) = 0x0?
-	bne.w		fsub_normal		# no; no underflow occurred
-
-	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
-	beq.w		fsub_normal		# no; no underflow occurred
-
-#
-# ok, so now the result has a exponent equal to the smallest normalized
-# exponent for the selected precision. also, the mantissa is equal to
-# 0x8000000000000000 and this mantissa is the result of rounding non-zero
-# g,r,s.
-# now, we must determine whether the pre-rounded result was an underflow
-# rounded "up" or a normalized number rounded "down".
-# so, we do this be re-executing the add using RZ as the rounding mode and
-# seeing if the new result is smaller or equal to the current result.
-#
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# keep rnd prec
-	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
-	fmov.l		%d1,%fpcr		# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fabs.x		%fp0			# compare absolute values
-	fabs.x		%fp1
-	fcmp.x		%fp0,%fp1		# is first result > second?
-
-	fbgt.w		fsub_unfl		# yes; it's an underflow
-	bra.w		fsub_normal		# no; it's not an underflow
-
-##########################################################################
-
-#
-# Sub: inputs are not both normalized; what are they?
-#
-fsub_not_norm:
-	mov.w		(tbl_fsub_op.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_fsub_op.b,%pc,%d1.w*1)
-
-	swbeg		&48
-tbl_fsub_op:
-	short		fsub_norm	- tbl_fsub_op # NORM - NORM
-	short		fsub_zero_src	- tbl_fsub_op # NORM - ZERO
-	short		fsub_inf_src	- tbl_fsub_op # NORM - INF
-	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
-	short		fsub_norm	- tbl_fsub_op # NORM - DENORM
-	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
-	short		tbl_fsub_op	- tbl_fsub_op #
-	short		tbl_fsub_op	- tbl_fsub_op #
-
-	short		fsub_zero_dst	- tbl_fsub_op # ZERO - NORM
-	short		fsub_zero_2	- tbl_fsub_op # ZERO - ZERO
-	short		fsub_inf_src	- tbl_fsub_op # ZERO - INF
-	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
-	short		fsub_zero_dst	- tbl_fsub_op # ZERO - DENORM
-	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
-	short		tbl_fsub_op	- tbl_fsub_op #
-	short		tbl_fsub_op	- tbl_fsub_op #
-
-	short		fsub_inf_dst	- tbl_fsub_op # INF - NORM
-	short		fsub_inf_dst	- tbl_fsub_op # INF - ZERO
-	short		fsub_inf_2	- tbl_fsub_op # INF - INF
-	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
-	short		fsub_inf_dst	- tbl_fsub_op # INF - DENORM
-	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
-	short		tbl_fsub_op	- tbl_fsub_op #
-	short		tbl_fsub_op	- tbl_fsub_op #
-
-	short		fsub_res_qnan	- tbl_fsub_op # QNAN - NORM
-	short		fsub_res_qnan	- tbl_fsub_op # QNAN - ZERO
-	short		fsub_res_qnan	- tbl_fsub_op # QNAN - INF
-	short		fsub_res_qnan	- tbl_fsub_op # QNAN - QNAN
-	short		fsub_res_qnan	- tbl_fsub_op # QNAN - DENORM
-	short		fsub_res_snan	- tbl_fsub_op # QNAN - SNAN
-	short		tbl_fsub_op	- tbl_fsub_op #
-	short		tbl_fsub_op	- tbl_fsub_op #
-
-	short		fsub_norm	- tbl_fsub_op # DENORM - NORM
-	short		fsub_zero_src	- tbl_fsub_op # DENORM - ZERO
-	short		fsub_inf_src	- tbl_fsub_op # DENORM - INF
-	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
-	short		fsub_norm	- tbl_fsub_op # DENORM - DENORM
-	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
-	short		tbl_fsub_op	- tbl_fsub_op #
-	short		tbl_fsub_op	- tbl_fsub_op #
-
-	short		fsub_res_snan	- tbl_fsub_op # SNAN - NORM
-	short		fsub_res_snan	- tbl_fsub_op # SNAN - ZERO
-	short		fsub_res_snan	- tbl_fsub_op # SNAN - INF
-	short		fsub_res_snan	- tbl_fsub_op # SNAN - QNAN
-	short		fsub_res_snan	- tbl_fsub_op # SNAN - DENORM
-	short		fsub_res_snan	- tbl_fsub_op # SNAN - SNAN
-	short		tbl_fsub_op	- tbl_fsub_op #
-	short		tbl_fsub_op	- tbl_fsub_op #
-
-fsub_res_qnan:
-	bra.l		res_qnan
-fsub_res_snan:
-	bra.l		res_snan
-
-#
-# both operands are ZEROes
-#
-fsub_zero_2:
-	mov.b		SRC_EX(%a0),%d0
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d1,%d0
-	bpl.b		fsub_zero_2_chk_rm
-
-# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
-	tst.b		%d0			# is dst negative?
-	bmi.b		fsub_zero_2_rm		# yes
-	fmov.s		&0x00000000,%fp0	# no; return +ZERO
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
-	rts
-
-#
-# the ZEROes have the same signs:
-# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
-# - -ZERO is returned in the case of RM.
-#
-fsub_zero_2_chk_rm:
-	mov.b		3+L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# extract rnd mode
-	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode = RM?
-	beq.b		fsub_zero_2_rm		# yes
-	fmov.s		&0x00000000,%fp0	# no; return +ZERO
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
-	rts
-
-fsub_zero_2_rm:
-	fmov.s		&0x80000000,%fp0	# return -ZERO
-	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/NEG
-	rts
-
-#
-# one operand is a ZERO and the other is a DENORM or a NORM.
-# scale the DENORM or NORM and jump to the regular fsub routine.
-#
-fsub_zero_dst:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	bsr.l		scale_to_zero_src	# scale the operand
-	clr.w		FP_SCR1_EX(%a6)
-	clr.l		FP_SCR1_HI(%a6)
-	clr.l		FP_SCR1_LO(%a6)
-	bra.w		fsub_zero_entry		# go execute fsub
-
-fsub_zero_src:
-	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
-	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
-	bsr.l		scale_to_zero_dst	# scale the operand
-	clr.w		FP_SCR0_EX(%a6)
-	clr.l		FP_SCR0_HI(%a6)
-	clr.l		FP_SCR0_LO(%a6)
-	bra.w		fsub_zero_entry		# go execute fsub
-
-#
-# both operands are INFs. an OPERR will result if the INFs have the
-# same signs. else,
-#
-fsub_inf_2:
-	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d1,%d0
-	bpl.l		res_operr		# weed out (-INF)+(+INF)
-
-# ok, so it's not an OPERR. but we do have to remember to return
-# the src INF since that's where the 881/882 gets the j-bit.
-
-fsub_inf_src:
-	fmovm.x		SRC(%a0),&0x80		# return src INF
-	fneg.x		%fp0			# invert sign
-	fbge.w		fsub_inf_done		# sign is now positive
-	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
-	rts
-
-fsub_inf_dst:
-	fmovm.x		DST(%a1),&0x80		# return dst INF
-	tst.b		DST_EX(%a1)		# is INF negative?
-	bpl.b		fsub_inf_done		# no
-	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
-	rts
-
-fsub_inf_done:
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fsqrt(): emulates the fsqrt instruction				#
-#	fssqrt(): emulates the fssqrt instruction			#
-#	fdsqrt(): emulates the fdsqrt instruction			#
-#									#
-# XREF ****************************************************************	#
-#	scale_sqrt() - scale the source operand				#
-#	unf_res() - return default underflow result			#
-#	ovf_res() - return default overflow result			#
-#	res_qnan_1op() - return QNAN result				#
-#	res_snan_1op() - return SNAN result				#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	d0  rnd prec,mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms/denorms into ext/sgl/dbl precision.				#
-#	For norms/denorms, scale the exponents such that a sqrt		#
-# instruction won't cause an exception. Use the regular fsqrt to	#
-# compute a result. Check if the regular operands would have taken	#
-# an exception. If so, return the default overflow/underflow result	#
-# and return the EXOP if exceptions are enabled. Else, scale the	#
-# result operand to the proper exponent.				#
-#									#
-#########################################################################
-
-	global		fssqrt
-fssqrt:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl precision
-	bra.b		fsqrt
-
-	global		fdsqrt
-fdsqrt:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&d_mode*0x10,%d0	# insert dbl precision
-
-	global		fsqrt
-fsqrt:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-	clr.w		%d1
-	mov.b		STAG(%a6),%d1
-	bne.w		fsqrt_not_norm		# optimize on non-norm input
-
-#
-# SQUARE ROOT: norms and denorms ONLY!
-#
-fsqrt_norm:
-	tst.b		SRC_EX(%a0)		# is operand negative?
-	bmi.l		res_operr		# yes
-
-	andi.b		&0xc0,%d0		# is precision extended?
-	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsqrt.x		(%a0),%fp0		# execute square root
-
-	fmov.l		%fpsr,%d1
-	or.l		%d1,USER_FPSR(%a6)	# set N,INEX
-
-	rts
-
-fsqrt_denorm:
-	tst.b		SRC_EX(%a0)		# is operand negative?
-	bmi.l		res_operr		# yes
-
-	andi.b		&0xc0,%d0		# is precision extended?
-	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	bsr.l		scale_sqrt		# calculate scale factor
-
-	bra.w		fsqrt_sd_normal
-
-#
-# operand is either single or double
-#
-fsqrt_not_ext:
-	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
-	bne.w		fsqrt_dbl
-
-#
-# operand is to be rounded to single precision
-#
-fsqrt_sgl:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	bsr.l		scale_sqrt		# calculate scale factor
-
-	cmpi.l		%d0,&0x3fff-0x3f81	# will move in underflow?
-	beq.w		fsqrt_sd_may_unfl
-	bgt.w		fsqrt_sd_unfl		# yes; go handle underflow
-	cmpi.l		%d0,&0x3fff-0x407f	# will move in overflow?
-	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
-	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
-
-#
-# operand will NOT overflow or underflow when moved in to the fp reg file
-#
-fsqrt_sd_normal:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
-
-	fmov.l		%fpsr,%d1		# save FPSR
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fsqrt_sd_normal_exit:
-	mov.l		%d2,-(%sp)		# save d2
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# add scale factor
-	andi.w		&0x8000,%d2		# keep old sign
-	or.w		%d1,%d2			# concat old sign,new exp
-	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-#
-# operand is to be rounded to double precision
-#
-fsqrt_dbl:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	bsr.l		scale_sqrt		# calculate scale factor
-
-	cmpi.l		%d0,&0x3fff-0x3c01	# will move in underflow?
-	beq.w		fsqrt_sd_may_unfl
-	bgt.b		fsqrt_sd_unfl		# yes; go handle underflow
-	cmpi.l		%d0,&0x3fff-0x43ff	# will move in overflow?
-	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
-	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
-	bra.w		fsqrt_sd_normal		# no; ho handle normalized op
-
-# we're on the line here and the distinguising characteristic is whether
-# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
-# elsewise fall through to underflow.
-fsqrt_sd_may_unfl:
-	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
-	bne.w		fsqrt_sd_normal		# yes, so no underflow
-
-#
-# operand WILL underflow when moved in to the fp register file
-#
-fsqrt_sd_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsqrt.x		FP_SCR0(%a6),%fp0	# execute square root
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-# if underflow or inexact is enabled, go calculate EXOP first.
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fsqrt_sd_unfl_ena	# yes
-
-fsqrt_sd_unfl_dis:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# operand will underflow AND underflow is enabled.
-# Therefore, we must return the result rounded to extended precision.
-#
-fsqrt_sd_unfl_ena:
-	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
-	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
-	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# subtract scale factor
-	addi.l		&0x6000,%d1		# add new bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat new sign,new exp
-	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
-	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
-	mov.l		(%sp)+,%d2		# restore d2
-	bra.b		fsqrt_sd_unfl_dis
-
-#
-# operand WILL overflow.
-#
-fsqrt_sd_ovfl:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fsqrt.x		FP_SCR0(%a6),%fp0	# perform square root
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# save FPSR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fsqrt_sd_ovfl_tst:
-	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fsqrt_sd_ovfl_ena	# yes
-
-#
-# OVFL is not enabled; therefore, we must create the default result by
-# calling ovf_res().
-#
-fsqrt_sd_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	rts
-
-#
-# OVFL is enabled.
-# the INEX2 bit has already been updated by the round to the correct precision.
-# now, round to extended(and don't alter the FPSR).
-#
-fsqrt_sd_ovfl_ena:
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	subi.l		&0x6000,%d1		# subtract bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat sign,exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	mov.l		(%sp)+,%d2		# restore d2
-	bra.b		fsqrt_sd_ovfl_dis
-
-#
-# the move in MAY underflow. so...
-#
-fsqrt_sd_may_ovfl:
-	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
-	bne.w		fsqrt_sd_ovfl		# yes, so overflow
-
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fmov.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x1		# is |result| >= 1.b?
-	fbge.w		fsqrt_sd_ovfl_tst	# yes; overflow has occurred
-
-# no, it didn't overflow; we have correct result
-	bra.w		fsqrt_sd_normal_exit
-
-##########################################################################
-
-#
-# input is not normalized; what is it?
-#
-fsqrt_not_norm:
-	cmpi.b		%d1,&DENORM		# weed out DENORM
-	beq.w		fsqrt_denorm
-	cmpi.b		%d1,&ZERO		# weed out ZERO
-	beq.b		fsqrt_zero
-	cmpi.b		%d1,&INF		# weed out INF
-	beq.b		fsqrt_inf
-	cmpi.b		%d1,&SNAN		# weed out SNAN
-	beq.l		res_snan_1op
-	bra.l		res_qnan_1op
-
-#
-#	fsqrt(+0) = +0
-#	fsqrt(-0) = -0
-#	fsqrt(+INF) = +INF
-#	fsqrt(-INF) = OPERR
-#
-fsqrt_zero:
-	tst.b		SRC_EX(%a0)		# is ZERO positive or negative?
-	bmi.b		fsqrt_zero_m		# negative
-fsqrt_zero_p:
-	fmov.s		&0x00000000,%fp0	# return +ZERO
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
-	rts
-fsqrt_zero_m:
-	fmov.s		&0x80000000,%fp0	# return -ZERO
-	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
-	rts
-
-fsqrt_inf:
-	tst.b		SRC_EX(%a0)		# is INF positive or negative?
-	bmi.l		res_operr		# negative
-fsqrt_inf_p:
-	fmovm.x		SRC(%a0),&0x80		# return +INF in fp0
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
-	rts
-
-##########################################################################
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	addsub_scaler2(): scale inputs to fadd/fsub such that no	#
-#			  OVFL/UNFL exceptions will result		#
-#									#
-# XREF ****************************************************************	#
-#	norm() - normalize mantissa after adjusting exponent		#
-#									#
-# INPUT ***************************************************************	#
-#	FP_SRC(a6) = fp op1(src)					#
-#	FP_DST(a6) = fp op2(dst)					#
-#									#
-# OUTPUT **************************************************************	#
-#	FP_SRC(a6) = fp op1 scaled(src)					#
-#	FP_DST(a6) = fp op2 scaled(dst)					#
-#	d0         = scale amount					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	If the DST exponent is > the SRC exponent, set the DST exponent	#
-# equal to 0x3fff and scale the SRC exponent by the value that the	#
-# DST exponent was scaled by. If the SRC exponent is greater or equal,	#
-# do the opposite. Return this scale factor in d0.			#
-#	If the two exponents differ by > the number of mantissa bits	#
-# plus two, then set the smallest exponent to a very small value as a	#
-# quick shortcut.							#
-#									#
-#########################################################################
-
-	global		addsub_scaler2
-addsub_scaler2:
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
-	mov.w		SRC_EX(%a0),%d0
-	mov.w		DST_EX(%a1),%d1
-	mov.w		%d0,FP_SCR0_EX(%a6)
-	mov.w		%d1,FP_SCR1_EX(%a6)
-
-	andi.w		&0x7fff,%d0
-	andi.w		&0x7fff,%d1
-	mov.w		%d0,L_SCR1(%a6)		# store src exponent
-	mov.w		%d1,2+L_SCR1(%a6)	# store dst exponent
-
-	cmp.w		%d0, %d1		# is src exp >= dst exp?
-	bge.l		src_exp_ge2
-
-# dst exp is >  src exp; scale dst to exp = 0x3fff
-dst_exp_gt2:
-	bsr.l		scale_to_zero_dst
-	mov.l		%d0,-(%sp)		# save scale factor
-
-	cmpi.b		STAG(%a6),&DENORM	# is dst denormalized?
-	bne.b		cmpexp12
-
-	lea		FP_SCR0(%a6),%a0
-	bsr.l		norm			# normalize the denorm; result is new exp
-	neg.w		%d0			# new exp = -(shft val)
-	mov.w		%d0,L_SCR1(%a6)		# inset new exp
-
-cmpexp12:
-	mov.w		2+L_SCR1(%a6),%d0
-	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
-
-	cmp.w		%d0,L_SCR1(%a6)		# is difference >= len(mantissa)+2?
-	bge.b		quick_scale12
-
-	mov.w		L_SCR1(%a6),%d0
-	add.w		0x2(%sp),%d0		# scale src exponent by scale factor
-	mov.w		FP_SCR0_EX(%a6),%d1
-	and.w		&0x8000,%d1
-	or.w		%d1,%d0			# concat {sgn,new exp}
-	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new dst exponent
-
-	mov.l		(%sp)+,%d0		# return SCALE factor
-	rts
-
-quick_scale12:
-	andi.w		&0x8000,FP_SCR0_EX(%a6)	# zero src exponent
-	bset		&0x0,1+FP_SCR0_EX(%a6)	# set exp = 1
-
-	mov.l		(%sp)+,%d0		# return SCALE factor
-	rts
-
-# src exp is >= dst exp; scale src to exp = 0x3fff
-src_exp_ge2:
-	bsr.l		scale_to_zero_src
-	mov.l		%d0,-(%sp)		# save scale factor
-
-	cmpi.b		DTAG(%a6),&DENORM	# is dst denormalized?
-	bne.b		cmpexp22
-	lea		FP_SCR1(%a6),%a0
-	bsr.l		norm			# normalize the denorm; result is new exp
-	neg.w		%d0			# new exp = -(shft val)
-	mov.w		%d0,2+L_SCR1(%a6)	# inset new exp
-
-cmpexp22:
-	mov.w		L_SCR1(%a6),%d0
-	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
-
-	cmp.w		%d0,2+L_SCR1(%a6)	# is difference >= len(mantissa)+2?
-	bge.b		quick_scale22
-
-	mov.w		2+L_SCR1(%a6),%d0
-	add.w		0x2(%sp),%d0		# scale dst exponent by scale factor
-	mov.w		FP_SCR1_EX(%a6),%d1
-	andi.w		&0x8000,%d1
-	or.w		%d1,%d0			# concat {sgn,new exp}
-	mov.w		%d0,FP_SCR1_EX(%a6)	# insert new dst exponent
-
-	mov.l		(%sp)+,%d0		# return SCALE factor
-	rts
-
-quick_scale22:
-	andi.w		&0x8000,FP_SCR1_EX(%a6)	# zero dst exponent
-	bset		&0x0,1+FP_SCR1_EX(%a6)	# set exp = 1
-
-	mov.l		(%sp)+,%d0		# return SCALE factor
-	rts
-
-##########################################################################
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	scale_to_zero_src(): scale the exponent of extended precision	#
-#			     value at FP_SCR0(a6).			#
-#									#
-# XREF ****************************************************************	#
-#	norm() - normalize the mantissa if the operand was a DENORM	#
-#									#
-# INPUT ***************************************************************	#
-#	FP_SCR0(a6) = extended precision operand to be scaled		#
-#									#
-# OUTPUT **************************************************************	#
-#	FP_SCR0(a6) = scaled extended precision operand			#
-#	d0	    = scale value					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Set the exponent of the input operand to 0x3fff. Save the value	#
-# of the difference between the original and new exponent. Then,	#
-# normalize the operand if it was a DENORM. Add this normalization	#
-# value to the previous value. Return the result.			#
-#									#
-#########################################################################
-
-	global		scale_to_zero_src
-scale_to_zero_src:
-	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
-	mov.w		%d1,%d0			# make a copy
-
-	andi.l		&0x7fff,%d1		# extract operand's exponent
-
-	andi.w		&0x8000,%d0		# extract operand's sgn
-	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
-
-	mov.w		%d0,FP_SCR0_EX(%a6)	# insert biased exponent
-
-	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
-	beq.b		stzs_denorm		# normalize the DENORM
-
-stzs_norm:
-	mov.l		&0x3fff,%d0
-	sub.l		%d1,%d0			# scale = BIAS + (-exp)
-
-	rts
-
-stzs_denorm:
-	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
-	bsr.l		norm			# normalize denorm
-	neg.l		%d0			# new exponent = -(shft val)
-	mov.l		%d0,%d1			# prepare for op_norm call
-	bra.b		stzs_norm		# finish scaling
-
-###
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	scale_sqrt(): scale the input operand exponent so a subsequent	#
-#		      fsqrt operation won't take an exception.		#
-#									#
-# XREF ****************************************************************	#
-#	norm() - normalize the mantissa if the operand was a DENORM	#
-#									#
-# INPUT ***************************************************************	#
-#	FP_SCR0(a6) = extended precision operand to be scaled		#
-#									#
-# OUTPUT **************************************************************	#
-#	FP_SCR0(a6) = scaled extended precision operand			#
-#	d0	    = scale value					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	If the input operand is a DENORM, normalize it.			#
-#	If the exponent of the input operand is even, set the exponent	#
-# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the	#
-# exponent of the input operand is off, set the exponent to ox3fff and	#
-# return a scale factor of "(exp-0x3fff)/2".				#
-#									#
-#########################################################################
-
-	global		scale_sqrt
-scale_sqrt:
-	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
-	beq.b		ss_denorm		# normalize the DENORM
-
-	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
-	andi.l		&0x7fff,%d1		# extract operand's exponent
-
-	andi.w		&0x8000,FP_SCR0_EX(%a6)	# extract operand's sgn
-
-	btst		&0x0,%d1		# is exp even or odd?
-	beq.b		ss_norm_even
-
-	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
-
-	mov.l		&0x3fff,%d0
-	sub.l		%d1,%d0			# scale = BIAS + (-exp)
-	asr.l		&0x1,%d0		# divide scale factor by 2
-	rts
-
-ss_norm_even:
-	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
-
-	mov.l		&0x3ffe,%d0
-	sub.l		%d1,%d0			# scale = BIAS + (-exp)
-	asr.l		&0x1,%d0		# divide scale factor by 2
-	rts
-
-ss_denorm:
-	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
-	bsr.l		norm			# normalize denorm
-
-	btst		&0x0,%d0		# is exp even or odd?
-	beq.b		ss_denorm_even
-
-	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
-
-	add.l		&0x3fff,%d0
-	asr.l		&0x1,%d0		# divide scale factor by 2
-	rts
-
-ss_denorm_even:
-	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
-
-	add.l		&0x3ffe,%d0
-	asr.l		&0x1,%d0		# divide scale factor by 2
-	rts
-
-###
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	scale_to_zero_dst(): scale the exponent of extended precision	#
-#			     value at FP_SCR1(a6).			#
-#									#
-# XREF ****************************************************************	#
-#	norm() - normalize the mantissa if the operand was a DENORM	#
-#									#
-# INPUT ***************************************************************	#
-#	FP_SCR1(a6) = extended precision operand to be scaled		#
-#									#
-# OUTPUT **************************************************************	#
-#	FP_SCR1(a6) = scaled extended precision operand			#
-#	d0	    = scale value					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Set the exponent of the input operand to 0x3fff. Save the value	#
-# of the difference between the original and new exponent. Then,	#
-# normalize the operand if it was a DENORM. Add this normalization	#
-# value to the previous value. Return the result.			#
-#									#
-#########################################################################
-
-	global		scale_to_zero_dst
-scale_to_zero_dst:
-	mov.w		FP_SCR1_EX(%a6),%d1	# extract operand's {sgn,exp}
-	mov.w		%d1,%d0			# make a copy
-
-	andi.l		&0x7fff,%d1		# extract operand's exponent
-
-	andi.w		&0x8000,%d0		# extract operand's sgn
-	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
-
-	mov.w		%d0,FP_SCR1_EX(%a6)	# insert biased exponent
-
-	cmpi.b		DTAG(%a6),&DENORM	# is operand normalized?
-	beq.b		stzd_denorm		# normalize the DENORM
-
-stzd_norm:
-	mov.l		&0x3fff,%d0
-	sub.l		%d1,%d0			# scale = BIAS + (-exp)
-	rts
-
-stzd_denorm:
-	lea		FP_SCR1(%a6),%a0	# pass ptr to dst op
-	bsr.l		norm			# normalize denorm
-	neg.l		%d0			# new exponent = -(shft val)
-	mov.l		%d0,%d1			# prepare for op_norm call
-	bra.b		stzd_norm		# finish scaling
-
-##########################################################################
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	res_qnan(): return default result w/ QNAN operand for dyadic	#
-#	res_snan(): return default result w/ SNAN operand for dyadic	#
-#	res_qnan_1op(): return dflt result w/ QNAN operand for monadic	#
-#	res_snan_1op(): return dflt result w/ SNAN operand for monadic	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	FP_SRC(a6) = pointer to extended precision src operand		#
-#	FP_DST(a6) = pointer to extended precision dst operand		#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = default result						#
-#									#
-# ALGORITHM ***********************************************************	#
-#	If either operand (but not both operands) of an operation is a	#
-# nonsignalling NAN, then that NAN is returned as the result. If both	#
-# operands are nonsignalling NANs, then the destination operand		#
-# nonsignalling NAN is returned as the result.				#
-#	If either operand to an operation is a signalling NAN (SNAN),	#
-# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap	#
-# enable bit is set in the FPCR, then the trap is taken and the		#
-# destination is not modified. If the SNAN trap enable bit is not set,	#
-# then the SNAN is converted to a nonsignalling NAN (by setting the	#
-# SNAN bit in the operand to one), and the operation continues as	#
-# described in the preceding paragraph, for nonsignalling NANs.		#
-#	Make sure the appropriate FPSR bits are set before exiting.	#
-#									#
-#########################################################################
-
-	global		res_qnan
-	global		res_snan
-res_qnan:
-res_snan:
-	cmp.b		DTAG(%a6), &SNAN	# is the dst an SNAN?
-	beq.b		dst_snan2
-	cmp.b		DTAG(%a6), &QNAN	# is the dst a  QNAN?
-	beq.b		dst_qnan2
-src_nan:
-	cmp.b		STAG(%a6), &QNAN
-	beq.b		src_qnan2
-	global		res_snan_1op
-res_snan_1op:
-src_snan2:
-	bset		&0x6, FP_SRC_HI(%a6)	# set SNAN bit
-	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
-	lea		FP_SRC(%a6), %a0
-	bra.b		nan_comp
-	global		res_qnan_1op
-res_qnan_1op:
-src_qnan2:
-	or.l		&nan_mask, USER_FPSR(%a6)
-	lea		FP_SRC(%a6), %a0
-	bra.b		nan_comp
-dst_snan2:
-	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
-	bset		&0x6, FP_DST_HI(%a6)	# set SNAN bit
-	lea		FP_DST(%a6), %a0
-	bra.b		nan_comp
-dst_qnan2:
-	lea		FP_DST(%a6), %a0
-	cmp.b		STAG(%a6), &SNAN
-	bne		nan_done
-	or.l		&aiop_mask+snan_mask, USER_FPSR(%a6)
-nan_done:
-	or.l		&nan_mask, USER_FPSR(%a6)
-nan_comp:
-	btst		&0x7, FTEMP_EX(%a0)	# is NAN neg?
-	beq.b		nan_not_neg
-	or.l		&neg_mask, USER_FPSR(%a6)
-nan_not_neg:
-	fmovm.x		(%a0), &0x80
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	res_operr(): return default result during operand error		#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	None								#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = default operand error result				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	An nonsignalling NAN is returned as the default result when	#
-# an operand error occurs for the following cases:			#
-#									#
-#	Multiply: (Infinity x Zero)					#
-#	Divide  : (Zero / Zero) || (Infinity / Infinity)		#
-#									#
-#########################################################################
-
-	global		res_operr
-res_operr:
-	or.l		&nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
-	fmovm.x		nan_return(%pc), &0x80
-	rts
-
-nan_return:
-	long		0x7fff0000, 0xffffffff, 0xffffffff
-
-#########################################################################
-# fdbcc(): routine to emulate the fdbcc instruction			#
-#									#
-# XDEF **************************************************************** #
-#	_fdbcc()							#
-#									#
-# XREF **************************************************************** #
-#	fetch_dreg() - fetch Dn value					#
-#	store_dreg_l() - store updated Dn value				#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = displacement						#
-#									#
-# OUTPUT ************************************************************** #
-#	none								#
-#									#
-# ALGORITHM ***********************************************************	#
-#	This routine checks which conditional predicate is specified by	#
-# the stacked fdbcc instruction opcode and then branches to a routine	#
-# for that predicate. The corresponding fbcc instruction is then used	#
-# to see whether the condition (specified by the stacked FPSR) is true	#
-# or false.								#
-#	If a BSUN exception should be indicated, the BSUN and ABSUN	#
-# bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
-# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an	#
-# enabled BSUN should not be flagged and the predicate is true, then	#
-# Dn is fetched and decremented by one. If Dn is not equal to -1, add	#
-# the displacement value to the stacked PC so that when an "rte" is	#
-# finally executed, the branch occurs.					#
-#									#
-#########################################################################
-	global		_fdbcc
-_fdbcc:
-	mov.l		%d0,L_SCR1(%a6)		# save displacement
-
-	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
-
-	clr.l		%d1			# clear scratch reg
-	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
-	ror.l		&0x8,%d1		# rotate to top byte
-	fmov.l		%d1,%fpsr		# insert into FPSR
-
-	mov.w		(tbl_fdbcc.b,%pc,%d0.w*2),%d1 # load table
-	jmp		(tbl_fdbcc.b,%pc,%d1.w) # jump to fdbcc routine
-
-tbl_fdbcc:
-	short		fdbcc_f		-	tbl_fdbcc	# 00
-	short		fdbcc_eq	-	tbl_fdbcc	# 01
-	short		fdbcc_ogt	-	tbl_fdbcc	# 02
-	short		fdbcc_oge	-	tbl_fdbcc	# 03
-	short		fdbcc_olt	-	tbl_fdbcc	# 04
-	short		fdbcc_ole	-	tbl_fdbcc	# 05
-	short		fdbcc_ogl	-	tbl_fdbcc	# 06
-	short		fdbcc_or	-	tbl_fdbcc	# 07
-	short		fdbcc_un	-	tbl_fdbcc	# 08
-	short		fdbcc_ueq	-	tbl_fdbcc	# 09
-	short		fdbcc_ugt	-	tbl_fdbcc	# 10
-	short		fdbcc_uge	-	tbl_fdbcc	# 11
-	short		fdbcc_ult	-	tbl_fdbcc	# 12
-	short		fdbcc_ule	-	tbl_fdbcc	# 13
-	short		fdbcc_neq	-	tbl_fdbcc	# 14
-	short		fdbcc_t		-	tbl_fdbcc	# 15
-	short		fdbcc_sf	-	tbl_fdbcc	# 16
-	short		fdbcc_seq	-	tbl_fdbcc	# 17
-	short		fdbcc_gt	-	tbl_fdbcc	# 18
-	short		fdbcc_ge	-	tbl_fdbcc	# 19
-	short		fdbcc_lt	-	tbl_fdbcc	# 20
-	short		fdbcc_le	-	tbl_fdbcc	# 21
-	short		fdbcc_gl	-	tbl_fdbcc	# 22
-	short		fdbcc_gle	-	tbl_fdbcc	# 23
-	short		fdbcc_ngle	-	tbl_fdbcc	# 24
-	short		fdbcc_ngl	-	tbl_fdbcc	# 25
-	short		fdbcc_nle	-	tbl_fdbcc	# 26
-	short		fdbcc_nlt	-	tbl_fdbcc	# 27
-	short		fdbcc_nge	-	tbl_fdbcc	# 28
-	short		fdbcc_ngt	-	tbl_fdbcc	# 29
-	short		fdbcc_sneq	-	tbl_fdbcc	# 30
-	short		fdbcc_st	-	tbl_fdbcc	# 31
-
-#########################################################################
-#									#
-# IEEE Nonaware tests							#
-#									#
-# For the IEEE nonaware tests, only the false branch changes the	#
-# counter. However, the true branch may set bsun so we check to see	#
-# if the NAN bit is set, in which case BSUN and AIOP will be set.	#
-#									#
-# The cases EQ and NE are shared by the Aware and Nonaware groups	#
-# and are incapable of setting the BSUN exception bit.			#
-#									#
-# Typically, only one of the two possible branch directions could	#
-# have the NAN bit set.							#
-# (This is assuming the mutual exclusiveness of FPSR cc bit groupings	#
-#  is preserved.)							#
-#									#
-#########################################################################
-
-#
-# equal:
-#
-#	Z
-#
-fdbcc_eq:
-	fbeq.w		fdbcc_eq_yes		# equal?
-fdbcc_eq_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_eq_yes:
-	rts
-
-#
-# not equal:
-#	_
-#	Z
-#
-fdbcc_neq:
-	fbneq.w		fdbcc_neq_yes		# not equal?
-fdbcc_neq_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_neq_yes:
-	rts
-
-#
-# greater than:
-#	_______
-#	NANvZvN
-#
-fdbcc_gt:
-	fbgt.w		fdbcc_gt_yes		# greater than?
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fdbcc_false		# no;go handle counter
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_gt_yes:
-	rts					# do nothing
-
-#
-# not greater than:
-#
-#	NANvZvN
-#
-fdbcc_ngt:
-	fbngt.w		fdbcc_ngt_yes		# not greater than?
-fdbcc_ngt_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_ngt_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.b		fdbcc_ngt_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-fdbcc_ngt_done:
-	rts					# no; do nothing
-
-#
-# greater than or equal:
-#	   _____
-#	Zv(NANvN)
-#
-fdbcc_ge:
-	fbge.w		fdbcc_ge_yes		# greater than or equal?
-fdbcc_ge_no:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fdbcc_false		# no;go handle counter
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_ge_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.b		fdbcc_ge_yes_done	# no;go do nothing
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-fdbcc_ge_yes_done:
-	rts					# do nothing
-
-#
-# not (greater than or equal):
-#	       _
-#	NANv(N^Z)
-#
-fdbcc_nge:
-	fbnge.w		fdbcc_nge_yes		# not (greater than or equal)?
-fdbcc_nge_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_nge_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.b		fdbcc_nge_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-fdbcc_nge_done:
-	rts					# no; do nothing
-
-#
-# less than:
-#	   _____
-#	N^(NANvZ)
-#
-fdbcc_lt:
-	fblt.w		fdbcc_lt_yes		# less than?
-fdbcc_lt_no:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fdbcc_false		# no; go handle counter
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_lt_yes:
-	rts					# do nothing
-
-#
-# not less than:
-#	       _
-#	NANv(ZvN)
-#
-fdbcc_nlt:
-	fbnlt.w		fdbcc_nlt_yes		# not less than?
-fdbcc_nlt_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_nlt_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.b		fdbcc_nlt_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-fdbcc_nlt_done:
-	rts					# no; do nothing
-
-#
-# less than or equal:
-#	     ___
-#	Zv(N^NAN)
-#
-fdbcc_le:
-	fble.w		fdbcc_le_yes		# less than or equal?
-fdbcc_le_no:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fdbcc_false		# no; go handle counter
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_le_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.b		fdbcc_le_yes_done	# no; go do nothing
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-fdbcc_le_yes_done:
-	rts					# do nothing
-
-#
-# not (less than or equal):
-#	     ___
-#	NANv(NvZ)
-#
-fdbcc_nle:
-	fbnle.w		fdbcc_nle_yes		# not (less than or equal)?
-fdbcc_nle_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_nle_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fdbcc_nle_done		# no; go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-fdbcc_nle_done:
-	rts					# no; do nothing
-
-#
-# greater or less than:
-#	_____
-#	NANvZ
-#
-fdbcc_gl:
-	fbgl.w		fdbcc_gl_yes		# greater or less than?
-fdbcc_gl_no:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fdbcc_false		# no; handle counter
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_gl_yes:
-	rts					# do nothing
-
-#
-# not (greater or less than):
-#
-#	NANvZ
-#
-fdbcc_ngl:
-	fbngl.w		fdbcc_ngl_yes		# not (greater or less than)?
-fdbcc_ngl_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_ngl_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.b		fdbcc_ngl_done		# no; go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-fdbcc_ngl_done:
-	rts					# no; do nothing
-
-#
-# greater, less, or equal:
-#	___
-#	NAN
-#
-fdbcc_gle:
-	fbgle.w		fdbcc_gle_yes		# greater, less, or equal?
-fdbcc_gle_no:
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_gle_yes:
-	rts					# do nothing
-
-#
-# not (greater, less, or equal):
-#
-#	NAN
-#
-fdbcc_ngle:
-	fbngle.w	fdbcc_ngle_yes		# not (greater, less, or equal)?
-fdbcc_ngle_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_ngle_yes:
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-	rts					# no; do nothing
-
-#########################################################################
-#									#
-# Miscellaneous tests							#
-#									#
-# For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. #
-#									#
-#########################################################################
-
-#
-# false:
-#
-#	False
-#
-fdbcc_f:					# no bsun possible
-	bra.w		fdbcc_false		# go handle counter
-
-#
-# true:
-#
-#	True
-#
-fdbcc_t:					# no bsun possible
-	rts					# do nothing
-
-#
-# signalling false:
-#
-#	False
-#
-fdbcc_sf:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
-	beq.w		fdbcc_false		# no;go handle counter
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-	bra.w		fdbcc_false		# go handle counter
-
-#
-# signalling true:
-#
-#	True
-#
-fdbcc_st:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
-	beq.b		fdbcc_st_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-fdbcc_st_done:
-	rts
-
-#
-# signalling equal:
-#
-#	Z
-#
-fdbcc_seq:
-	fbseq.w		fdbcc_seq_yes		# signalling equal?
-fdbcc_seq_no:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
-	beq.w		fdbcc_false		# no;go handle counter
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-	bra.w		fdbcc_false		# go handle counter
-fdbcc_seq_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
-	beq.b		fdbcc_seq_yes_done	# no;go do nothing
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-fdbcc_seq_yes_done:
-	rts					# yes; do nothing
-
-#
-# signalling not equal:
-#	_
-#	Z
-#
-fdbcc_sneq:
-	fbsneq.w	fdbcc_sneq_yes		# signalling not equal?
-fdbcc_sneq_no:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set?
-	beq.w		fdbcc_false		# no;go handle counter
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-	bra.w		fdbcc_false		# go handle counter
-fdbcc_sneq_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
-	beq.w		fdbcc_sneq_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
-	bne.w		fdbcc_bsun		# yes; we have an exception
-fdbcc_sneq_done:
-	rts
-
-#########################################################################
-#									#
-# IEEE Aware tests							#
-#									#
-# For the IEEE aware tests, action is only taken if the result is false.#
-# Therefore, the opposite branch type is used to jump to the decrement	#
-# routine.								#
-# The BSUN exception will not be set for any of these tests.		#
-#									#
-#########################################################################
-
-#
-# ordered greater than:
-#	_______
-#	NANvZvN
-#
-fdbcc_ogt:
-	fbogt.w		fdbcc_ogt_yes		# ordered greater than?
-fdbcc_ogt_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_ogt_yes:
-	rts					# yes; do nothing
-
-#
-# unordered or less or equal:
-#	_______
-#	NANvZvN
-#
-fdbcc_ule:
-	fbule.w		fdbcc_ule_yes		# unordered or less or equal?
-fdbcc_ule_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_ule_yes:
-	rts					# yes; do nothing
-
-#
-# ordered greater than or equal:
-#	   _____
-#	Zv(NANvN)
-#
-fdbcc_oge:
-	fboge.w		fdbcc_oge_yes		# ordered greater than or equal?
-fdbcc_oge_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_oge_yes:
-	rts					# yes; do nothing
-
-#
-# unordered or less than:
-#	       _
-#	NANv(N^Z)
-#
-fdbcc_ult:
-	fbult.w		fdbcc_ult_yes		# unordered or less than?
-fdbcc_ult_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_ult_yes:
-	rts					# yes; do nothing
-
-#
-# ordered less than:
-#	   _____
-#	N^(NANvZ)
-#
-fdbcc_olt:
-	fbolt.w		fdbcc_olt_yes		# ordered less than?
-fdbcc_olt_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_olt_yes:
-	rts					# yes; do nothing
-
-#
-# unordered or greater or equal:
-#
-#	NANvZvN
-#
-fdbcc_uge:
-	fbuge.w		fdbcc_uge_yes		# unordered or greater than?
-fdbcc_uge_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_uge_yes:
-	rts					# yes; do nothing
-
-#
-# ordered less than or equal:
-#	     ___
-#	Zv(N^NAN)
-#
-fdbcc_ole:
-	fbole.w		fdbcc_ole_yes		# ordered greater or less than?
-fdbcc_ole_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_ole_yes:
-	rts					# yes; do nothing
-
-#
-# unordered or greater than:
-#	     ___
-#	NANv(NvZ)
-#
-fdbcc_ugt:
-	fbugt.w		fdbcc_ugt_yes		# unordered or greater than?
-fdbcc_ugt_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_ugt_yes:
-	rts					# yes; do nothing
-
-#
-# ordered greater or less than:
-#	_____
-#	NANvZ
-#
-fdbcc_ogl:
-	fbogl.w		fdbcc_ogl_yes		# ordered greater or less than?
-fdbcc_ogl_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_ogl_yes:
-	rts					# yes; do nothing
-
-#
-# unordered or equal:
-#
-#	NANvZ
-#
-fdbcc_ueq:
-	fbueq.w		fdbcc_ueq_yes		# unordered or equal?
-fdbcc_ueq_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_ueq_yes:
-	rts					# yes; do nothing
-
-#
-# ordered:
-#	___
-#	NAN
-#
-fdbcc_or:
-	fbor.w		fdbcc_or_yes		# ordered?
-fdbcc_or_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_or_yes:
-	rts					# yes; do nothing
-
-#
-# unordered:
-#
-#	NAN
-#
-fdbcc_un:
-	fbun.w		fdbcc_un_yes		# unordered?
-fdbcc_un_no:
-	bra.w		fdbcc_false		# no; go handle counter
-fdbcc_un_yes:
-	rts					# yes; do nothing
-
-#######################################################################
-
-#
-# the bsun exception bit was not set.
-#
-# (1) subtract 1 from the count register
-# (2) if (cr == -1) then
-#	pc = pc of next instruction
-#     else
-#	pc += sign_ext(16-bit displacement)
-#
-fdbcc_false:
-	mov.b		1+EXC_OPWORD(%a6), %d1	# fetch lo opword
-	andi.w		&0x7, %d1		# extract count register
-
-	bsr.l		fetch_dreg		# fetch count value
-# make sure that d0 isn't corrupted between calls...
-
-	subq.w		&0x1, %d0		# Dn - 1 -> Dn
-
-	bsr.l		store_dreg_l		# store new count value
-
-	cmpi.w		%d0, &-0x1		# is (Dn == -1)?
-	bne.b		fdbcc_false_cont	# no;
-	rts
-
-fdbcc_false_cont:
-	mov.l		L_SCR1(%a6),%d0		# fetch displacement
-	add.l		USER_FPIAR(%a6),%d0	# add instruction PC
-	addq.l		&0x4,%d0		# add instruction length
-	mov.l		%d0,EXC_PC(%a6)		# set new PC
-	rts
-
-# the emulation routine set bsun and BSUN was enabled. have to
-# fix stack and jump to the bsun handler.
-# let the caller of this routine shift the stack frame up to
-# eliminate the effective address field.
-fdbcc_bsun:
-	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
-	rts
-
-#########################################################################
-# ftrapcc(): routine to emulate the ftrapcc instruction			#
-#									#
-# XDEF ****************************************************************	#
-#	_ftrapcc()							#
-#									#
-# XREF ****************************************************************	#
-#	none								#
-#									#
-# INPUT *************************************************************** #
-#	none								#
-#									#
-# OUTPUT ************************************************************** #
-#	none								#
-#									#
-# ALGORITHM *********************************************************** #
-#	This routine checks which conditional predicate is specified by	#
-# the stacked ftrapcc instruction opcode and then branches to a routine	#
-# for that predicate. The corresponding fbcc instruction is then used	#
-# to see whether the condition (specified by the stacked FPSR) is true	#
-# or false.								#
-#	If a BSUN exception should be indicated, the BSUN and ABSUN	#
-# bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
-# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an	#
-# enabled BSUN should not be flagged and the predicate is true, then	#
-# the ftrapcc_flg is set in the SPCOND_FLG location. These special	#
-# flags indicate to the calling routine to emulate the exceptional	#
-# condition.								#
-#									#
-#########################################################################
-
-	global		_ftrapcc
-_ftrapcc:
-	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
-
-	clr.l		%d1			# clear scratch reg
-	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
-	ror.l		&0x8,%d1		# rotate to top byte
-	fmov.l		%d1,%fpsr		# insert into FPSR
-
-	mov.w		(tbl_ftrapcc.b,%pc,%d0.w*2), %d1 # load table
-	jmp		(tbl_ftrapcc.b,%pc,%d1.w) # jump to ftrapcc routine
-
-tbl_ftrapcc:
-	short		ftrapcc_f	-	tbl_ftrapcc	# 00
-	short		ftrapcc_eq	-	tbl_ftrapcc	# 01
-	short		ftrapcc_ogt	-	tbl_ftrapcc	# 02
-	short		ftrapcc_oge	-	tbl_ftrapcc	# 03
-	short		ftrapcc_olt	-	tbl_ftrapcc	# 04
-	short		ftrapcc_ole	-	tbl_ftrapcc	# 05
-	short		ftrapcc_ogl	-	tbl_ftrapcc	# 06
-	short		ftrapcc_or	-	tbl_ftrapcc	# 07
-	short		ftrapcc_un	-	tbl_ftrapcc	# 08
-	short		ftrapcc_ueq	-	tbl_ftrapcc	# 09
-	short		ftrapcc_ugt	-	tbl_ftrapcc	# 10
-	short		ftrapcc_uge	-	tbl_ftrapcc	# 11
-	short		ftrapcc_ult	-	tbl_ftrapcc	# 12
-	short		ftrapcc_ule	-	tbl_ftrapcc	# 13
-	short		ftrapcc_neq	-	tbl_ftrapcc	# 14
-	short		ftrapcc_t	-	tbl_ftrapcc	# 15
-	short		ftrapcc_sf	-	tbl_ftrapcc	# 16
-	short		ftrapcc_seq	-	tbl_ftrapcc	# 17
-	short		ftrapcc_gt	-	tbl_ftrapcc	# 18
-	short		ftrapcc_ge	-	tbl_ftrapcc	# 19
-	short		ftrapcc_lt	-	tbl_ftrapcc	# 20
-	short		ftrapcc_le	-	tbl_ftrapcc	# 21
-	short		ftrapcc_gl	-	tbl_ftrapcc	# 22
-	short		ftrapcc_gle	-	tbl_ftrapcc	# 23
-	short		ftrapcc_ngle	-	tbl_ftrapcc	# 24
-	short		ftrapcc_ngl	-	tbl_ftrapcc	# 25
-	short		ftrapcc_nle	-	tbl_ftrapcc	# 26
-	short		ftrapcc_nlt	-	tbl_ftrapcc	# 27
-	short		ftrapcc_nge	-	tbl_ftrapcc	# 28
-	short		ftrapcc_ngt	-	tbl_ftrapcc	# 29
-	short		ftrapcc_sneq	-	tbl_ftrapcc	# 30
-	short		ftrapcc_st	-	tbl_ftrapcc	# 31
-
-#########################################################################
-#									#
-# IEEE Nonaware tests							#
-#									#
-# For the IEEE nonaware tests, we set the result based on the		#
-# floating point condition codes. In addition, we check to see		#
-# if the NAN bit is set, in which case BSUN and AIOP will be set.	#
-#									#
-# The cases EQ and NE are shared by the Aware and Nonaware groups	#
-# and are incapable of setting the BSUN exception bit.			#
-#									#
-# Typically, only one of the two possible branch directions could	#
-# have the NAN bit set.							#
-#									#
-#########################################################################
-
-#
-# equal:
-#
-#	Z
-#
-ftrapcc_eq:
-	fbeq.w		ftrapcc_trap		# equal?
-ftrapcc_eq_no:
-	rts					# do nothing
-
-#
-# not equal:
-#	_
-#	Z
-#
-ftrapcc_neq:
-	fbneq.w		ftrapcc_trap		# not equal?
-ftrapcc_neq_no:
-	rts					# do nothing
-
-#
-# greater than:
-#	_______
-#	NANvZvN
-#
-ftrapcc_gt:
-	fbgt.w		ftrapcc_trap		# greater than?
-ftrapcc_gt_no:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.b		ftrapcc_gt_done		# no
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-ftrapcc_gt_done:
-	rts					# no; do nothing
-
-#
-# not greater than:
-#
-#	NANvZvN
-#
-ftrapcc_ngt:
-	fbngt.w		ftrapcc_ngt_yes		# not greater than?
-ftrapcc_ngt_no:
-	rts					# do nothing
-ftrapcc_ngt_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		ftrapcc_trap		# no; go take trap
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-	bra.w		ftrapcc_trap		# no; go take trap
-
-#
-# greater than or equal:
-#	   _____
-#	Zv(NANvN)
-#
-ftrapcc_ge:
-	fbge.w		ftrapcc_ge_yes		# greater than or equal?
-ftrapcc_ge_no:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.b		ftrapcc_ge_done		# no; go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-ftrapcc_ge_done:
-	rts					# no; do nothing
-ftrapcc_ge_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		ftrapcc_trap		# no; go take trap
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-	bra.w		ftrapcc_trap		# no; go take trap
-
-#
-# not (greater than or equal):
-#	       _
-#	NANv(N^Z)
-#
-ftrapcc_nge:
-	fbnge.w		ftrapcc_nge_yes		# not (greater than or equal)?
-ftrapcc_nge_no:
-	rts					# do nothing
-ftrapcc_nge_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		ftrapcc_trap		# no; go take trap
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-	bra.w		ftrapcc_trap		# no; go take trap
-
-#
-# less than:
-#	   _____
-#	N^(NANvZ)
-#
-ftrapcc_lt:
-	fblt.w		ftrapcc_trap		# less than?
-ftrapcc_lt_no:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.b		ftrapcc_lt_done		# no; go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-ftrapcc_lt_done:
-	rts					# no; do nothing
-
-#
-# not less than:
-#	       _
-#	NANv(ZvN)
-#
-ftrapcc_nlt:
-	fbnlt.w		ftrapcc_nlt_yes		# not less than?
-ftrapcc_nlt_no:
-	rts					# do nothing
-ftrapcc_nlt_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		ftrapcc_trap		# no; go take trap
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-	bra.w		ftrapcc_trap		# no; go take trap
-
-#
-# less than or equal:
-#	     ___
-#	Zv(N^NAN)
-#
-ftrapcc_le:
-	fble.w		ftrapcc_le_yes		# less than or equal?
-ftrapcc_le_no:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.b		ftrapcc_le_done		# no; go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-ftrapcc_le_done:
-	rts					# no; do nothing
-ftrapcc_le_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		ftrapcc_trap		# no; go take trap
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-	bra.w		ftrapcc_trap		# no; go take trap
-
-#
-# not (less than or equal):
-#	     ___
-#	NANv(NvZ)
-#
-ftrapcc_nle:
-	fbnle.w		ftrapcc_nle_yes		# not (less than or equal)?
-ftrapcc_nle_no:
-	rts					# do nothing
-ftrapcc_nle_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		ftrapcc_trap		# no; go take trap
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-	bra.w		ftrapcc_trap		# no; go take trap
-
-#
-# greater or less than:
-#	_____
-#	NANvZ
-#
-ftrapcc_gl:
-	fbgl.w		ftrapcc_trap		# greater or less than?
-ftrapcc_gl_no:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.b		ftrapcc_gl_done		# no; go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-ftrapcc_gl_done:
-	rts					# no; do nothing
-
-#
-# not (greater or less than):
-#
-#	NANvZ
-#
-ftrapcc_ngl:
-	fbngl.w		ftrapcc_ngl_yes		# not (greater or less than)?
-ftrapcc_ngl_no:
-	rts					# do nothing
-ftrapcc_ngl_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		ftrapcc_trap		# no; go take trap
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-	bra.w		ftrapcc_trap		# no; go take trap
-
-#
-# greater, less, or equal:
-#	___
-#	NAN
-#
-ftrapcc_gle:
-	fbgle.w		ftrapcc_trap		# greater, less, or equal?
-ftrapcc_gle_no:
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-	rts					# no; do nothing
-
-#
-# not (greater, less, or equal):
-#
-#	NAN
-#
-ftrapcc_ngle:
-	fbngle.w	ftrapcc_ngle_yes	# not (greater, less, or equal)?
-ftrapcc_ngle_no:
-	rts					# do nothing
-ftrapcc_ngle_yes:
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-	bra.w		ftrapcc_trap		# no; go take trap
-
-#########################################################################
-#									#
-# Miscellaneous tests							#
-#									#
-# For the IEEE aware tests, we only have to set the result based on the	#
-# floating point condition codes. The BSUN exception will not be	#
-# set for any of these tests.						#
-#									#
-#########################################################################
-
-#
-# false:
-#
-#	False
-#
-ftrapcc_f:
-	rts					# do nothing
-
-#
-# true:
-#
-#	True
-#
-ftrapcc_t:
-	bra.w		ftrapcc_trap		# go take trap
-
-#
-# signalling false:
-#
-#	False
-#
-ftrapcc_sf:
-	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
-	beq.b		ftrapcc_sf_done		# no; go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-ftrapcc_sf_done:
-	rts					# no; do nothing
-
-#
-# signalling true:
-#
-#	True
-#
-ftrapcc_st:
-	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
-	beq.w		ftrapcc_trap		# no; go take trap
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-	bra.w		ftrapcc_trap		# no; go take trap
-
-#
-# signalling equal:
-#
-#	Z
-#
-ftrapcc_seq:
-	fbseq.w		ftrapcc_seq_yes		# signalling equal?
-ftrapcc_seq_no:
-	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
-	beq.w		ftrapcc_seq_done	# no; go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-ftrapcc_seq_done:
-	rts					# no; do nothing
-ftrapcc_seq_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
-	beq.w		ftrapcc_trap		# no; go take trap
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-	bra.w		ftrapcc_trap		# no; go take trap
-
-#
-# signalling not equal:
-#	_
-#	Z
-#
-ftrapcc_sneq:
-	fbsneq.w	ftrapcc_sneq_yes	# signalling equal?
-ftrapcc_sneq_no:
-	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
-	beq.w		ftrapcc_sneq_no_done	# no; go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-ftrapcc_sneq_no_done:
-	rts					# do nothing
-ftrapcc_sneq_yes:
-	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
-	beq.w		ftrapcc_trap		# no; go take trap
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	btst		&bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		ftrapcc_bsun		# yes
-	bra.w		ftrapcc_trap		# no; go take trap
-
-#########################################################################
-#									#
-# IEEE Aware tests							#
-#									#
-# For the IEEE aware tests, we only have to set the result based on the	#
-# floating point condition codes. The BSUN exception will not be	#
-# set for any of these tests.						#
-#									#
-#########################################################################
-
-#
-# ordered greater than:
-#	_______
-#	NANvZvN
-#
-ftrapcc_ogt:
-	fbogt.w		ftrapcc_trap		# ordered greater than?
-ftrapcc_ogt_no:
-	rts					# do nothing
-
-#
-# unordered or less or equal:
-#	_______
-#	NANvZvN
-#
-ftrapcc_ule:
-	fbule.w		ftrapcc_trap		# unordered or less or equal?
-ftrapcc_ule_no:
-	rts					# do nothing
-
-#
-# ordered greater than or equal:
-#	   _____
-#	Zv(NANvN)
-#
-ftrapcc_oge:
-	fboge.w		ftrapcc_trap		# ordered greater than or equal?
-ftrapcc_oge_no:
-	rts					# do nothing
-
-#
-# unordered or less than:
-#	       _
-#	NANv(N^Z)
-#
-ftrapcc_ult:
-	fbult.w		ftrapcc_trap		# unordered or less than?
-ftrapcc_ult_no:
-	rts					# do nothing
-
-#
-# ordered less than:
-#	   _____
-#	N^(NANvZ)
-#
-ftrapcc_olt:
-	fbolt.w		ftrapcc_trap		# ordered less than?
-ftrapcc_olt_no:
-	rts					# do nothing
-
-#
-# unordered or greater or equal:
-#
-#	NANvZvN
-#
-ftrapcc_uge:
-	fbuge.w		ftrapcc_trap		# unordered or greater than?
-ftrapcc_uge_no:
-	rts					# do nothing
-
-#
-# ordered less than or equal:
-#	     ___
-#	Zv(N^NAN)
-#
-ftrapcc_ole:
-	fbole.w		ftrapcc_trap		# ordered greater or less than?
-ftrapcc_ole_no:
-	rts					# do nothing
-
-#
-# unordered or greater than:
-#	     ___
-#	NANv(NvZ)
-#
-ftrapcc_ugt:
-	fbugt.w		ftrapcc_trap		# unordered or greater than?
-ftrapcc_ugt_no:
-	rts					# do nothing
-
-#
-# ordered greater or less than:
-#	_____
-#	NANvZ
-#
-ftrapcc_ogl:
-	fbogl.w		ftrapcc_trap		# ordered greater or less than?
-ftrapcc_ogl_no:
-	rts					# do nothing
-
-#
-# unordered or equal:
-#
-#	NANvZ
-#
-ftrapcc_ueq:
-	fbueq.w		ftrapcc_trap		# unordered or equal?
-ftrapcc_ueq_no:
-	rts					# do nothing
-
-#
-# ordered:
-#	___
-#	NAN
-#
-ftrapcc_or:
-	fbor.w		ftrapcc_trap		# ordered?
-ftrapcc_or_no:
-	rts					# do nothing
-
-#
-# unordered:
-#
-#	NAN
-#
-ftrapcc_un:
-	fbun.w		ftrapcc_trap		# unordered?
-ftrapcc_un_no:
-	rts					# do nothing
-
-#######################################################################
-
-# the bsun exception bit was not set.
-# we will need to jump to the ftrapcc vector. the stack frame
-# is the same size as that of the fp unimp instruction. the
-# only difference is that the <ea> field should hold the PC
-# of the ftrapcc instruction and the vector offset field
-# should denote the ftrapcc trap.
-ftrapcc_trap:
-	mov.b		&ftrapcc_flg,SPCOND_FLG(%a6)
-	rts
-
-# the emulation routine set bsun and BSUN was enabled. have to
-# fix stack and jump to the bsun handler.
-# let the caller of this routine shift the stack frame up to
-# eliminate the effective address field.
-ftrapcc_bsun:
-	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
-	rts
-
-#########################################################################
-# fscc(): routine to emulate the fscc instruction			#
-#									#
-# XDEF **************************************************************** #
-#	_fscc()								#
-#									#
-# XREF **************************************************************** #
-#	store_dreg_b() - store result to data register file		#
-#	dec_areg() - decrement an areg for -(an) mode			#
-#	inc_areg() - increment an areg for (an)+ mode			#
-#	_dmem_write_byte() - store result to memory			#
-#									#
-# INPUT ***************************************************************	#
-#	none								#
-#									#
-# OUTPUT ************************************************************** #
-#	none								#
-#									#
-# ALGORITHM ***********************************************************	#
-#	This routine checks which conditional predicate is specified by	#
-# the stacked fscc instruction opcode and then branches to a routine	#
-# for that predicate. The corresponding fbcc instruction is then used	#
-# to see whether the condition (specified by the stacked FPSR) is true	#
-# or false.								#
-#	If a BSUN exception should be indicated, the BSUN and ABSUN	#
-# bits are set in the stacked FPSR. If the BSUN exception is enabled,	#
-# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an	#
-# enabled BSUN should not be flagged and the predicate is true, then	#
-# the result is stored to the data register file or memory		#
-#									#
-#########################################################################
-
-	global		_fscc
-_fscc:
-	mov.w		EXC_CMDREG(%a6),%d0	# fetch predicate
-
-	clr.l		%d1			# clear scratch reg
-	mov.b		FPSR_CC(%a6),%d1	# fetch fp ccodes
-	ror.l		&0x8,%d1		# rotate to top byte
-	fmov.l		%d1,%fpsr		# insert into FPSR
-
-	mov.w		(tbl_fscc.b,%pc,%d0.w*2),%d1 # load table
-	jmp		(tbl_fscc.b,%pc,%d1.w)	# jump to fscc routine
-
-tbl_fscc:
-	short		fscc_f		-	tbl_fscc	# 00
-	short		fscc_eq		-	tbl_fscc	# 01
-	short		fscc_ogt	-	tbl_fscc	# 02
-	short		fscc_oge	-	tbl_fscc	# 03
-	short		fscc_olt	-	tbl_fscc	# 04
-	short		fscc_ole	-	tbl_fscc	# 05
-	short		fscc_ogl	-	tbl_fscc	# 06
-	short		fscc_or		-	tbl_fscc	# 07
-	short		fscc_un		-	tbl_fscc	# 08
-	short		fscc_ueq	-	tbl_fscc	# 09
-	short		fscc_ugt	-	tbl_fscc	# 10
-	short		fscc_uge	-	tbl_fscc	# 11
-	short		fscc_ult	-	tbl_fscc	# 12
-	short		fscc_ule	-	tbl_fscc	# 13
-	short		fscc_neq	-	tbl_fscc	# 14
-	short		fscc_t		-	tbl_fscc	# 15
-	short		fscc_sf		-	tbl_fscc	# 16
-	short		fscc_seq	-	tbl_fscc	# 17
-	short		fscc_gt		-	tbl_fscc	# 18
-	short		fscc_ge		-	tbl_fscc	# 19
-	short		fscc_lt		-	tbl_fscc	# 20
-	short		fscc_le		-	tbl_fscc	# 21
-	short		fscc_gl		-	tbl_fscc	# 22
-	short		fscc_gle	-	tbl_fscc	# 23
-	short		fscc_ngle	-	tbl_fscc	# 24
-	short		fscc_ngl	-	tbl_fscc	# 25
-	short		fscc_nle	-	tbl_fscc	# 26
-	short		fscc_nlt	-	tbl_fscc	# 27
-	short		fscc_nge	-	tbl_fscc	# 28
-	short		fscc_ngt	-	tbl_fscc	# 29
-	short		fscc_sneq	-	tbl_fscc	# 30
-	short		fscc_st		-	tbl_fscc	# 31
-
-#########################################################################
-#									#
-# IEEE Nonaware tests							#
-#									#
-# For the IEEE nonaware tests, we set the result based on the		#
-# floating point condition codes. In addition, we check to see		#
-# if the NAN bit is set, in which case BSUN and AIOP will be set.	#
-#									#
-# The cases EQ and NE are shared by the Aware and Nonaware groups	#
-# and are incapable of setting the BSUN exception bit.			#
-#									#
-# Typically, only one of the two possible branch directions could	#
-# have the NAN bit set.							#
-#									#
-#########################################################################
-
-#
-# equal:
-#
-#	Z
-#
-fscc_eq:
-	fbeq.w		fscc_eq_yes		# equal?
-fscc_eq_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_eq_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# not equal:
-#	_
-#	Z
-#
-fscc_neq:
-	fbneq.w		fscc_neq_yes		# not equal?
-fscc_neq_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_neq_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# greater than:
-#	_______
-#	NANvZvN
-#
-fscc_gt:
-	fbgt.w		fscc_gt_yes		# greater than?
-fscc_gt_no:
-	clr.b		%d0			# set false
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-fscc_gt_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# not greater than:
-#
-#	NANvZvN
-#
-fscc_ngt:
-	fbngt.w		fscc_ngt_yes		# not greater than?
-fscc_ngt_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_ngt_yes:
-	st		%d0			# set true
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-
-#
-# greater than or equal:
-#	   _____
-#	Zv(NANvN)
-#
-fscc_ge:
-	fbge.w		fscc_ge_yes		# greater than or equal?
-fscc_ge_no:
-	clr.b		%d0			# set false
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-fscc_ge_yes:
-	st		%d0			# set true
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-
-#
-# not (greater than or equal):
-#	       _
-#	NANv(N^Z)
-#
-fscc_nge:
-	fbnge.w		fscc_nge_yes		# not (greater than or equal)?
-fscc_nge_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_nge_yes:
-	st		%d0			# set true
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-
-#
-# less than:
-#	   _____
-#	N^(NANvZ)
-#
-fscc_lt:
-	fblt.w		fscc_lt_yes		# less than?
-fscc_lt_no:
-	clr.b		%d0			# set false
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-fscc_lt_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# not less than:
-#	       _
-#	NANv(ZvN)
-#
-fscc_nlt:
-	fbnlt.w		fscc_nlt_yes		# not less than?
-fscc_nlt_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_nlt_yes:
-	st		%d0			# set true
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-
-#
-# less than or equal:
-#	     ___
-#	Zv(N^NAN)
-#
-fscc_le:
-	fble.w		fscc_le_yes		# less than or equal?
-fscc_le_no:
-	clr.b		%d0			# set false
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-fscc_le_yes:
-	st		%d0			# set true
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-
-#
-# not (less than or equal):
-#	     ___
-#	NANv(NvZ)
-#
-fscc_nle:
-	fbnle.w		fscc_nle_yes		# not (less than or equal)?
-fscc_nle_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_nle_yes:
-	st		%d0			# set true
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-
-#
-# greater or less than:
-#	_____
-#	NANvZ
-#
-fscc_gl:
-	fbgl.w		fscc_gl_yes		# greater or less than?
-fscc_gl_no:
-	clr.b		%d0			# set false
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-fscc_gl_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# not (greater or less than):
-#
-#	NANvZ
-#
-fscc_ngl:
-	fbngl.w		fscc_ngl_yes		# not (greater or less than)?
-fscc_ngl_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_ngl_yes:
-	st		%d0			# set true
-	btst		&nan_bit, FPSR_CC(%a6)	# is NAN set in cc?
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-
-#
-# greater, less, or equal:
-#	___
-#	NAN
-#
-fscc_gle:
-	fbgle.w		fscc_gle_yes		# greater, less, or equal?
-fscc_gle_no:
-	clr.b		%d0			# set false
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-fscc_gle_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# not (greater, less, or equal):
-#
-#	NAN
-#
-fscc_ngle:
-	fbngle.w		fscc_ngle_yes	# not (greater, less, or equal)?
-fscc_ngle_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_ngle_yes:
-	st		%d0			# set true
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-
-#########################################################################
-#									#
-# Miscellaneous tests							#
-#									#
-# For the IEEE aware tests, we only have to set the result based on the	#
-# floating point condition codes. The BSUN exception will not be	#
-# set for any of these tests.						#
-#									#
-#########################################################################
-
-#
-# false:
-#
-#	False
-#
-fscc_f:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-
-#
-# true:
-#
-#	True
-#
-fscc_t:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# signalling false:
-#
-#	False
-#
-fscc_sf:
-	clr.b		%d0			# set false
-	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-
-#
-# signalling true:
-#
-#	True
-#
-fscc_st:
-	st		%d0			# set false
-	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-
-#
-# signalling equal:
-#
-#	Z
-#
-fscc_seq:
-	fbseq.w		fscc_seq_yes		# signalling equal?
-fscc_seq_no:
-	clr.b		%d0			# set false
-	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-fscc_seq_yes:
-	st		%d0			# set true
-	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-
-#
-# signalling not equal:
-#	_
-#	Z
-#
-fscc_sneq:
-	fbsneq.w	fscc_sneq_yes		# signalling equal?
-fscc_sneq_no:
-	clr.b		%d0			# set false
-	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-fscc_sneq_yes:
-	st		%d0			# set true
-	btst		&nan_bit, FPSR_CC(%a6)	# set BSUN exc bit
-	beq.w		fscc_done		# no;go finish
-	ori.l		&bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
-	bra.w		fscc_chk_bsun		# go finish
-
-#########################################################################
-#									#
-# IEEE Aware tests							#
-#									#
-# For the IEEE aware tests, we only have to set the result based on the	#
-# floating point condition codes. The BSUN exception will not be	#
-# set for any of these tests.						#
-#									#
-#########################################################################
-
-#
-# ordered greater than:
-#	_______
-#	NANvZvN
-#
-fscc_ogt:
-	fbogt.w		fscc_ogt_yes		# ordered greater than?
-fscc_ogt_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_ogt_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# unordered or less or equal:
-#	_______
-#	NANvZvN
-#
-fscc_ule:
-	fbule.w		fscc_ule_yes		# unordered or less or equal?
-fscc_ule_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_ule_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# ordered greater than or equal:
-#	   _____
-#	Zv(NANvN)
-#
-fscc_oge:
-	fboge.w		fscc_oge_yes		# ordered greater than or equal?
-fscc_oge_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_oge_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# unordered or less than:
-#	       _
-#	NANv(N^Z)
-#
-fscc_ult:
-	fbult.w		fscc_ult_yes		# unordered or less than?
-fscc_ult_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_ult_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# ordered less than:
-#	   _____
-#	N^(NANvZ)
-#
-fscc_olt:
-	fbolt.w		fscc_olt_yes		# ordered less than?
-fscc_olt_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_olt_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# unordered or greater or equal:
-#
-#	NANvZvN
-#
-fscc_uge:
-	fbuge.w		fscc_uge_yes		# unordered or greater than?
-fscc_uge_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_uge_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# ordered less than or equal:
-#	     ___
-#	Zv(N^NAN)
-#
-fscc_ole:
-	fbole.w		fscc_ole_yes		# ordered greater or less than?
-fscc_ole_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_ole_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# unordered or greater than:
-#	     ___
-#	NANv(NvZ)
-#
-fscc_ugt:
-	fbugt.w		fscc_ugt_yes		# unordered or greater than?
-fscc_ugt_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_ugt_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# ordered greater or less than:
-#	_____
-#	NANvZ
-#
-fscc_ogl:
-	fbogl.w		fscc_ogl_yes		# ordered greater or less than?
-fscc_ogl_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_ogl_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# unordered or equal:
-#
-#	NANvZ
-#
-fscc_ueq:
-	fbueq.w		fscc_ueq_yes		# unordered or equal?
-fscc_ueq_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_ueq_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# ordered:
-#	___
-#	NAN
-#
-fscc_or:
-	fbor.w		fscc_or_yes		# ordered?
-fscc_or_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_or_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#
-# unordered:
-#
-#	NAN
-#
-fscc_un:
-	fbun.w		fscc_un_yes		# unordered?
-fscc_un_no:
-	clr.b		%d0			# set false
-	bra.w		fscc_done		# go finish
-fscc_un_yes:
-	st		%d0			# set true
-	bra.w		fscc_done		# go finish
-
-#######################################################################
-
-#
-# the bsun exception bit was set. now, check to see is BSUN
-# is enabled. if so, don't store result and correct stack frame
-# for a bsun exception.
-#
-fscc_chk_bsun:
-	btst		&bsun_bit,FPCR_ENABLE(%a6) # was BSUN set?
-	bne.w		fscc_bsun
-
-#
-# the bsun exception bit was not set.
-# the result has been selected.
-# now, check to see if the result is to be stored in the data register
-# file or in memory.
-#
-fscc_done:
-	mov.l		%d0,%a0			# save result for a moment
-
-	mov.b		1+EXC_OPWORD(%a6),%d1	# fetch lo opword
-	mov.l		%d1,%d0			# make a copy
-	andi.b		&0x38,%d1		# extract src mode
-
-	bne.b		fscc_mem_op		# it's a memory operation
-
-	mov.l		%d0,%d1
-	andi.w		&0x7,%d1		# pass index in d1
-	mov.l		%a0,%d0			# pass result in d0
-	bsr.l		store_dreg_b		# save result in regfile
-	rts
-
-#
-# the stacked <ea> is correct with the exception of:
-#	-> Dn : <ea> is garbage
-#
-# if the addressing mode is post-increment or pre-decrement,
-# then the address registers have not been updated.
-#
-fscc_mem_op:
-	cmpi.b		%d1,&0x18		# is <ea> (An)+ ?
-	beq.b		fscc_mem_inc		# yes
-	cmpi.b		%d1,&0x20		# is <ea> -(An) ?
-	beq.b		fscc_mem_dec		# yes
-
-	mov.l		%a0,%d0			# pass result in d0
-	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
-	bsr.l		_dmem_write_byte	# write result byte
-
-	tst.l		%d1			# did dstore fail?
-	bne.w		fscc_err		# yes
-
-	rts
-
-# addressing mode is post-increment. write the result byte. if the write
-# fails then don't update the address register. if write passes then
-# call inc_areg() to update the address register.
-fscc_mem_inc:
-	mov.l		%a0,%d0			# pass result in d0
-	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
-	bsr.l		_dmem_write_byte	# write result byte
-
-	tst.l		%d1			# did dstore fail?
-	bne.w		fscc_err		# yes
-
-	mov.b		0x1+EXC_OPWORD(%a6),%d1	# fetch opword
-	andi.w		&0x7,%d1		# pass index in d1
-	movq.l		&0x1,%d0		# pass amt to inc by
-	bsr.l		inc_areg		# increment address register
-
-	rts
-
-# addressing mode is pre-decrement. write the result byte. if the write
-# fails then don't update the address register. if the write passes then
-# call dec_areg() to update the address register.
-fscc_mem_dec:
-	mov.l		%a0,%d0			# pass result in d0
-	mov.l		EXC_EA(%a6),%a0		# fetch <ea>
-	bsr.l		_dmem_write_byte	# write result byte
-
-	tst.l		%d1			# did dstore fail?
-	bne.w		fscc_err		# yes
-
-	mov.b		0x1+EXC_OPWORD(%a6),%d1	# fetch opword
-	andi.w		&0x7,%d1		# pass index in d1
-	movq.l		&0x1,%d0		# pass amt to dec by
-	bsr.l		dec_areg		# decrement address register
-
-	rts
-
-# the emulation routine set bsun and BSUN was enabled. have to
-# fix stack and jump to the bsun handler.
-# let the caller of this routine shift the stack frame up to
-# eliminate the effective address field.
-fscc_bsun:
-	mov.b		&fbsun_flg,SPCOND_FLG(%a6)
-	rts
-
-# the byte write to memory has failed. pass the failing effective address
-# and a FSLW to funimp_dacc().
-fscc_err:
-	mov.w		&0x00a1,EXC_VOFF(%a6)
-	bra.l		facc_finish
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fmovm_dynamic(): emulate "fmovm" dynamic instruction		#
-#									#
-# XREF ****************************************************************	#
-#	fetch_dreg() - fetch data register				#
-#	{i,d,}mem_read() - fetch data from memory			#
-#	_mem_write() - write data to memory				#
-#	iea_iacc() - instruction memory access error occurred		#
-#	iea_dacc() - data memory access error occurred			#
-#	restore() - restore An index regs if access error occurred	#
-#									#
-# INPUT ***************************************************************	#
-#	None								#
-#									#
-# OUTPUT **************************************************************	#
-#	If instr is "fmovm Dn,-(A7)" from supervisor mode,		#
-#		d0 = size of dump					#
-#		d1 = Dn							#
-#	Else if instruction access error,				#
-#		d0 = FSLW						#
-#	Else if data access error,					#
-#		d0 = FSLW						#
-#		a0 = address of fault					#
-#	Else								#
-#		none.							#
-#									#
-# ALGORITHM ***********************************************************	#
-#	The effective address must be calculated since this is entered	#
-# from an "Unimplemented Effective Address" exception handler. So, we	#
-# have our own fcalc_ea() routine here. If an access error is flagged	#
-# by a _{i,d,}mem_read() call, we must exit through the special		#
-# handler.								#
-#	The data register is determined and its value loaded to get the	#
-# string of FP registers affected. This value is used as an index into	#
-# a lookup table such that we can determine the number of bytes		#
-# involved.								#
-#	If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used	#
-# to read in all FP values. Again, _mem_read() may fail and require a	#
-# special exit.								#
-#	If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used	#
-# to write all FP values. _mem_write() may also fail.			#
-#	If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,	#
-# then we return the size of the dump and the string to the caller	#
-# so that the move can occur outside of this routine. This special	#
-# case is required so that moves to the system stack are handled	#
-# correctly.								#
-#									#
-# DYNAMIC:								#
-#	fmovm.x	dn, <ea>						#
-#	fmovm.x	<ea>, dn						#
-#									#
-#	      <WORD 1>		      <WORD2>				#
-#	1111 0010 00 |<ea>|	11@& 1000 0$$$ 0000			#
-#									#
-#	& = (0): predecrement addressing mode				#
-#	    (1): postincrement or control addressing mode		#
-#	@ = (0): move listed regs from memory to the FPU		#
-#	    (1): move listed regs from the FPU to memory		#
-#	$$$    : index of data register holding reg select mask		#
-#									#
-# NOTES:								#
-#	If the data register holds a zero, then the			#
-#	instruction is a nop.						#
-#									#
-#########################################################################
-
-	global		fmovm_dynamic
-fmovm_dynamic:
-
-# extract the data register in which the bit string resides...
-	mov.b		1+EXC_EXTWORD(%a6),%d1	# fetch extword
-	andi.w		&0x70,%d1		# extract reg bits
-	lsr.b		&0x4,%d1		# shift into lo bits
-
-# fetch the bit string into d0...
-	bsr.l		fetch_dreg		# fetch reg string
-
-	andi.l		&0x000000ff,%d0		# keep only lo byte
-
-	mov.l		%d0,-(%sp)		# save strg
-	mov.b		(tbl_fmovm_size.w,%pc,%d0),%d0
-	mov.l		%d0,-(%sp)		# save size
-	bsr.l		fmovm_calc_ea		# calculate <ea>
-	mov.l		(%sp)+,%d0		# restore size
-	mov.l		(%sp)+,%d1		# restore strg
-
-# if the bit string is a zero, then the operation is a no-op
-# but, make sure that we've calculated ea and advanced the opword pointer
-	beq.w		fmovm_data_done
-
-# separate move ins from move outs...
-	btst		&0x5,EXC_EXTWORD(%a6)	# is it a move in or out?
-	beq.w		fmovm_data_in		# it's a move out
-
-#############
-# MOVE OUT: #
-#############
-fmovm_data_out:
-	btst		&0x4,EXC_EXTWORD(%a6)	# control or predecrement?
-	bne.w		fmovm_out_ctrl		# control
-
-############################
-fmovm_out_predec:
-# for predecrement mode, the bit string is the opposite of both control
-# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
-# here, we convert it to be just like the others...
-	mov.b		(tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
-
-	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
-	beq.b		fmovm_out_ctrl		# user
-
-fmovm_out_predec_s:
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
-	bne.b		fmovm_out_ctrl
-
-# the operation was unfortunately an: fmovm.x dn,-(sp)
-# called from supervisor mode.
-# we're also passing "size" and "strg" back to the calling routine
-	rts
-
-############################
-fmovm_out_ctrl:
-	mov.l		%a0,%a1			# move <ea> to a1
-
-	sub.l		%d0,%sp			# subtract size of dump
-	lea		(%sp),%a0
-
-	tst.b		%d1			# should FP0 be moved?
-	bpl.b		fmovm_out_ctrl_fp1	# no
-
-	mov.l		0x0+EXC_FP0(%a6),(%a0)+	# yes
-	mov.l		0x4+EXC_FP0(%a6),(%a0)+
-	mov.l		0x8+EXC_FP0(%a6),(%a0)+
-
-fmovm_out_ctrl_fp1:
-	lsl.b		&0x1,%d1		# should FP1 be moved?
-	bpl.b		fmovm_out_ctrl_fp2	# no
-
-	mov.l		0x0+EXC_FP1(%a6),(%a0)+	# yes
-	mov.l		0x4+EXC_FP1(%a6),(%a0)+
-	mov.l		0x8+EXC_FP1(%a6),(%a0)+
-
-fmovm_out_ctrl_fp2:
-	lsl.b		&0x1,%d1		# should FP2 be moved?
-	bpl.b		fmovm_out_ctrl_fp3	# no
-
-	fmovm.x		&0x20,(%a0)		# yes
-	add.l		&0xc,%a0
-
-fmovm_out_ctrl_fp3:
-	lsl.b		&0x1,%d1		# should FP3 be moved?
-	bpl.b		fmovm_out_ctrl_fp4	# no
-
-	fmovm.x		&0x10,(%a0)		# yes
-	add.l		&0xc,%a0
-
-fmovm_out_ctrl_fp4:
-	lsl.b		&0x1,%d1		# should FP4 be moved?
-	bpl.b		fmovm_out_ctrl_fp5	# no
-
-	fmovm.x		&0x08,(%a0)		# yes
-	add.l		&0xc,%a0
-
-fmovm_out_ctrl_fp5:
-	lsl.b		&0x1,%d1		# should FP5 be moved?
-	bpl.b		fmovm_out_ctrl_fp6	# no
-
-	fmovm.x		&0x04,(%a0)		# yes
-	add.l		&0xc,%a0
-
-fmovm_out_ctrl_fp6:
-	lsl.b		&0x1,%d1		# should FP6 be moved?
-	bpl.b		fmovm_out_ctrl_fp7	# no
-
-	fmovm.x		&0x02,(%a0)		# yes
-	add.l		&0xc,%a0
-
-fmovm_out_ctrl_fp7:
-	lsl.b		&0x1,%d1		# should FP7 be moved?
-	bpl.b		fmovm_out_ctrl_done	# no
-
-	fmovm.x		&0x01,(%a0)		# yes
-	add.l		&0xc,%a0
-
-fmovm_out_ctrl_done:
-	mov.l		%a1,L_SCR1(%a6)
-
-	lea		(%sp),%a0		# pass: supervisor src
-	mov.l		%d0,-(%sp)		# save size
-	bsr.l		_dmem_write		# copy data to user mem
-
-	mov.l		(%sp)+,%d0
-	add.l		%d0,%sp			# clear fpreg data from stack
-
-	tst.l		%d1			# did dstore err?
-	bne.w		fmovm_out_err		# yes
-
-	rts
-
-############
-# MOVE IN: #
-############
-fmovm_data_in:
-	mov.l		%a0,L_SCR1(%a6)
-
-	sub.l		%d0,%sp			# make room for fpregs
-	lea		(%sp),%a1
-
-	mov.l		%d1,-(%sp)		# save bit string for later
-	mov.l		%d0,-(%sp)		# save # of bytes
-
-	bsr.l		_dmem_read		# copy data from user mem
-
-	mov.l		(%sp)+,%d0		# retrieve # of bytes
-
-	tst.l		%d1			# did dfetch fail?
-	bne.w		fmovm_in_err		# yes
-
-	mov.l		(%sp)+,%d1		# load bit string
-
-	lea		(%sp),%a0		# addr of stack
-
-	tst.b		%d1			# should FP0 be moved?
-	bpl.b		fmovm_data_in_fp1	# no
-
-	mov.l		(%a0)+,0x0+EXC_FP0(%a6)	# yes
-	mov.l		(%a0)+,0x4+EXC_FP0(%a6)
-	mov.l		(%a0)+,0x8+EXC_FP0(%a6)
-
-fmovm_data_in_fp1:
-	lsl.b		&0x1,%d1		# should FP1 be moved?
-	bpl.b		fmovm_data_in_fp2	# no
-
-	mov.l		(%a0)+,0x0+EXC_FP1(%a6)	# yes
-	mov.l		(%a0)+,0x4+EXC_FP1(%a6)
-	mov.l		(%a0)+,0x8+EXC_FP1(%a6)
-
-fmovm_data_in_fp2:
-	lsl.b		&0x1,%d1		# should FP2 be moved?
-	bpl.b		fmovm_data_in_fp3	# no
-
-	fmovm.x		(%a0)+,&0x20		# yes
-
-fmovm_data_in_fp3:
-	lsl.b		&0x1,%d1		# should FP3 be moved?
-	bpl.b		fmovm_data_in_fp4	# no
-
-	fmovm.x		(%a0)+,&0x10		# yes
-
-fmovm_data_in_fp4:
-	lsl.b		&0x1,%d1		# should FP4 be moved?
-	bpl.b		fmovm_data_in_fp5	# no
-
-	fmovm.x		(%a0)+,&0x08		# yes
-
-fmovm_data_in_fp5:
-	lsl.b		&0x1,%d1		# should FP5 be moved?
-	bpl.b		fmovm_data_in_fp6	# no
-
-	fmovm.x		(%a0)+,&0x04		# yes
-
-fmovm_data_in_fp6:
-	lsl.b		&0x1,%d1		# should FP6 be moved?
-	bpl.b		fmovm_data_in_fp7	# no
-
-	fmovm.x		(%a0)+,&0x02		# yes
-
-fmovm_data_in_fp7:
-	lsl.b		&0x1,%d1		# should FP7 be moved?
-	bpl.b		fmovm_data_in_done	# no
-
-	fmovm.x		(%a0)+,&0x01		# yes
-
-fmovm_data_in_done:
-	add.l		%d0,%sp			# remove fpregs from stack
-	rts
-
-#####################################
-
-fmovm_data_done:
-	rts
-
-##############################################################################
-
-#
-# table indexed by the operation's bit string that gives the number
-# of bytes that will be moved.
-#
-# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
-#
-tbl_fmovm_size:
-	byte	0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
-	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
-	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
-	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
-	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
-	byte	0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
-
-#
-# table to convert a pre-decrement bit string into a post-increment
-# or control bit string.
-# ex:	0x00	==>	0x00
-#	0x01	==>	0x80
-#	0x02	==>	0x40
-#		.
-#		.
-#	0xfd	==>	0xbf
-#	0xfe	==>	0x7f
-#	0xff	==>	0xff
-#
-tbl_fmovm_convert:
-	byte	0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
-	byte	0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
-	byte	0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
-	byte	0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
-	byte	0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
-	byte	0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
-	byte	0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
-	byte	0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
-	byte	0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
-	byte	0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
-	byte	0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
-	byte	0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
-	byte	0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
-	byte	0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
-	byte	0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
-	byte	0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
-	byte	0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
-	byte	0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
-	byte	0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
-	byte	0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
-	byte	0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
-	byte	0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
-	byte	0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
-	byte	0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
-	byte	0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
-	byte	0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
-	byte	0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
-	byte	0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
-	byte	0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
-	byte	0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
-	byte	0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
-	byte	0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
-
-	global		fmovm_calc_ea
-###############################################
-# _fmovm_calc_ea: calculate effective address #
-###############################################
-fmovm_calc_ea:
-	mov.l		%d0,%a0			# move # bytes to a0
-
-# currently, MODE and REG are taken from the EXC_OPWORD. this could be
-# easily changed if they were inputs passed in registers.
-	mov.w		EXC_OPWORD(%a6),%d0	# fetch opcode word
-	mov.w		%d0,%d1			# make a copy
-
-	andi.w		&0x3f,%d0		# extract mode field
-	andi.l		&0x7,%d1		# extract reg  field
-
-# jump to the corresponding function for each {MODE,REG} pair.
-	mov.w		(tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
-	jmp		(tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
-
-	swbeg		&64
-tbl_fea_mode:
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-
-	short		faddr_ind_a0	-	tbl_fea_mode
-	short		faddr_ind_a1	-	tbl_fea_mode
-	short		faddr_ind_a2	-	tbl_fea_mode
-	short		faddr_ind_a3	-	tbl_fea_mode
-	short		faddr_ind_a4	-	tbl_fea_mode
-	short		faddr_ind_a5	-	tbl_fea_mode
-	short		faddr_ind_a6	-	tbl_fea_mode
-	short		faddr_ind_a7	-	tbl_fea_mode
-
-	short		faddr_ind_p_a0	-	tbl_fea_mode
-	short		faddr_ind_p_a1	-	tbl_fea_mode
-	short		faddr_ind_p_a2	-	tbl_fea_mode
-	short		faddr_ind_p_a3	-	tbl_fea_mode
-	short		faddr_ind_p_a4	-	tbl_fea_mode
-	short		faddr_ind_p_a5	-	tbl_fea_mode
-	short		faddr_ind_p_a6	-	tbl_fea_mode
-	short		faddr_ind_p_a7	-	tbl_fea_mode
-
-	short		faddr_ind_m_a0	-	tbl_fea_mode
-	short		faddr_ind_m_a1	-	tbl_fea_mode
-	short		faddr_ind_m_a2	-	tbl_fea_mode
-	short		faddr_ind_m_a3	-	tbl_fea_mode
-	short		faddr_ind_m_a4	-	tbl_fea_mode
-	short		faddr_ind_m_a5	-	tbl_fea_mode
-	short		faddr_ind_m_a6	-	tbl_fea_mode
-	short		faddr_ind_m_a7	-	tbl_fea_mode
-
-	short		faddr_ind_disp_a0	-	tbl_fea_mode
-	short		faddr_ind_disp_a1	-	tbl_fea_mode
-	short		faddr_ind_disp_a2	-	tbl_fea_mode
-	short		faddr_ind_disp_a3	-	tbl_fea_mode
-	short		faddr_ind_disp_a4	-	tbl_fea_mode
-	short		faddr_ind_disp_a5	-	tbl_fea_mode
-	short		faddr_ind_disp_a6	-	tbl_fea_mode
-	short		faddr_ind_disp_a7	-	tbl_fea_mode
-
-	short		faddr_ind_ext	-	tbl_fea_mode
-	short		faddr_ind_ext	-	tbl_fea_mode
-	short		faddr_ind_ext	-	tbl_fea_mode
-	short		faddr_ind_ext	-	tbl_fea_mode
-	short		faddr_ind_ext	-	tbl_fea_mode
-	short		faddr_ind_ext	-	tbl_fea_mode
-	short		faddr_ind_ext	-	tbl_fea_mode
-	short		faddr_ind_ext	-	tbl_fea_mode
-
-	short		fabs_short	-	tbl_fea_mode
-	short		fabs_long	-	tbl_fea_mode
-	short		fpc_ind		-	tbl_fea_mode
-	short		fpc_ind_ext	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-
-###################################
-# Address register indirect: (An) #
-###################################
-faddr_ind_a0:
-	mov.l		EXC_DREGS+0x8(%a6),%a0	# Get current a0
-	rts
-
-faddr_ind_a1:
-	mov.l		EXC_DREGS+0xc(%a6),%a0	# Get current a1
-	rts
-
-faddr_ind_a2:
-	mov.l		%a2,%a0			# Get current a2
-	rts
-
-faddr_ind_a3:
-	mov.l		%a3,%a0			# Get current a3
-	rts
-
-faddr_ind_a4:
-	mov.l		%a4,%a0			# Get current a4
-	rts
-
-faddr_ind_a5:
-	mov.l		%a5,%a0			# Get current a5
-	rts
-
-faddr_ind_a6:
-	mov.l		(%a6),%a0		# Get current a6
-	rts
-
-faddr_ind_a7:
-	mov.l		EXC_A7(%a6),%a0		# Get current a7
-	rts
-
-#####################################################
-# Address register indirect w/ postincrement: (An)+ #
-#####################################################
-faddr_ind_p_a0:
-	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
-	mov.l		%d0,%d1
-	add.l		%a0,%d1			# Increment
-	mov.l		%d1,EXC_DREGS+0x8(%a6)	# Save incr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_p_a1:
-	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
-	mov.l		%d0,%d1
-	add.l		%a0,%d1			# Increment
-	mov.l		%d1,EXC_DREGS+0xc(%a6)	# Save incr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_p_a2:
-	mov.l		%a2,%d0			# Get current a2
-	mov.l		%d0,%d1
-	add.l		%a0,%d1			# Increment
-	mov.l		%d1,%a2			# Save incr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_p_a3:
-	mov.l		%a3,%d0			# Get current a3
-	mov.l		%d0,%d1
-	add.l		%a0,%d1			# Increment
-	mov.l		%d1,%a3			# Save incr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_p_a4:
-	mov.l		%a4,%d0			# Get current a4
-	mov.l		%d0,%d1
-	add.l		%a0,%d1			# Increment
-	mov.l		%d1,%a4			# Save incr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_p_a5:
-	mov.l		%a5,%d0			# Get current a5
-	mov.l		%d0,%d1
-	add.l		%a0,%d1			# Increment
-	mov.l		%d1,%a5			# Save incr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_p_a6:
-	mov.l		(%a6),%d0		# Get current a6
-	mov.l		%d0,%d1
-	add.l		%a0,%d1			# Increment
-	mov.l		%d1,(%a6)		# Save incr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_p_a7:
-	mov.b		&mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
-
-	mov.l		EXC_A7(%a6),%d0		# Get current a7
-	mov.l		%d0,%d1
-	add.l		%a0,%d1			# Increment
-	mov.l		%d1,EXC_A7(%a6)		# Save incr value
-	mov.l		%d0,%a0
-	rts
-
-####################################################
-# Address register indirect w/ predecrement: -(An) #
-####################################################
-faddr_ind_m_a0:
-	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,EXC_DREGS+0x8(%a6)	# Save decr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_m_a1:
-	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,EXC_DREGS+0xc(%a6)	# Save decr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_m_a2:
-	mov.l		%a2,%d0			# Get current a2
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,%a2			# Save decr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_m_a3:
-	mov.l		%a3,%d0			# Get current a3
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,%a3			# Save decr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_m_a4:
-	mov.l		%a4,%d0			# Get current a4
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,%a4			# Save decr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_m_a5:
-	mov.l		%a5,%d0			# Get current a5
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,%a5			# Save decr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_m_a6:
-	mov.l		(%a6),%d0		# Get current a6
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,(%a6)		# Save decr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_m_a7:
-	mov.b		&mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
-
-	mov.l		EXC_A7(%a6),%d0		# Get current a7
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,EXC_A7(%a6)		# Save decr value
-	mov.l		%d0,%a0
-	rts
-
-########################################################
-# Address register indirect w/ displacement: (d16, An) #
-########################################################
-faddr_ind_disp_a0:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		EXC_DREGS+0x8(%a6),%a0	# a0 + d16
-	rts
-
-faddr_ind_disp_a1:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		EXC_DREGS+0xc(%a6),%a0	# a1 + d16
-	rts
-
-faddr_ind_disp_a2:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		%a2,%a0			# a2 + d16
-	rts
-
-faddr_ind_disp_a3:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		%a3,%a0			# a3 + d16
-	rts
-
-faddr_ind_disp_a4:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		%a4,%a0			# a4 + d16
-	rts
-
-faddr_ind_disp_a5:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		%a5,%a0			# a5 + d16
-	rts
-
-faddr_ind_disp_a6:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		(%a6),%a0		# a6 + d16
-	rts
-
-faddr_ind_disp_a7:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		EXC_A7(%a6),%a0		# a7 + d16
-	rts
-
-########################################################################
-# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
-#    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
-# Memory indirect postindexed: ([bd, An], Xn, od)		       #
-# Memory indirect preindexed: ([bd, An, Xn], od)		       #
-########################################################################
-faddr_ind_ext:
-	addq.l		&0x8,%d1
-	bsr.l		fetch_dreg		# fetch base areg
-	mov.l		%d0,-(%sp)
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word		# fetch extword in d0
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		(%sp)+,%a0
-
-	btst		&0x8,%d0
-	bne.w		fcalc_mem_ind
-
-	mov.l		%d0,L_SCR1(%a6)		# hold opword
-
-	mov.l		%d0,%d1
-	rol.w		&0x4,%d1
-	andi.w		&0xf,%d1		# extract index regno
-
-# count on fetch_dreg() not to alter a0...
-	bsr.l		fetch_dreg		# fetch index
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.l		L_SCR1(%a6),%d2		# fetch opword
-
-	btst		&0xb,%d2		# is it word or long?
-	bne.b		faii8_long
-	ext.l		%d0			# sign extend word index
-faii8_long:
-	mov.l		%d2,%d1
-	rol.w		&0x7,%d1
-	andi.l		&0x3,%d1		# extract scale value
-
-	lsl.l		%d1,%d0			# shift index by scale
-
-	extb.l		%d2			# sign extend displacement
-	add.l		%d2,%d0			# index + disp
-	add.l		%d0,%a0			# An + (index + disp)
-
-	mov.l		(%sp)+,%d2		# restore old d2
-	rts
-
-###########################
-# Absolute short: (XXX).W #
-###########################
-fabs_short:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word		# fetch short address
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# return <ea> in a0
-	rts
-
-##########################
-# Absolute long: (XXX).L #
-##########################
-fabs_long:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch long address
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,%a0			# return <ea> in a0
-	rts
-
-#######################################################
-# Program counter indirect w/ displacement: (d16, PC) #
-#######################################################
-fpc_ind:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word		# fetch word displacement
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		EXC_EXTWPTR(%a6),%a0	# pc + d16
-
-# _imem_read_word() increased the extwptr by 2. need to adjust here.
-	subq.l		&0x2,%a0		# adjust <ea>
-	rts
-
-##########################################################
-# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
-# "     "     w/   "  (base displacement): (bd, PC, An)  #
-# PC memory indirect postindexed: ([bd, PC], Xn, od)     #
-# PC memory indirect preindexed: ([bd, PC, Xn], od)      #
-##########################################################
-fpc_ind_ext:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word		# fetch ext word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# put base in a0
-	subq.l		&0x2,%a0		# adjust base
-
-	btst		&0x8,%d0		# is disp only 8 bits?
-	bne.w		fcalc_mem_ind		# calc memory indirect
-
-	mov.l		%d0,L_SCR1(%a6)		# store opword
-
-	mov.l		%d0,%d1			# make extword copy
-	rol.w		&0x4,%d1		# rotate reg num into place
-	andi.w		&0xf,%d1		# extract register number
-
-# count on fetch_dreg() not to alter a0...
-	bsr.l		fetch_dreg		# fetch index
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.l		L_SCR1(%a6),%d2		# fetch opword
-
-	btst		&0xb,%d2		# is index word or long?
-	bne.b		fpii8_long		# long
-	ext.l		%d0			# sign extend word index
-fpii8_long:
-	mov.l		%d2,%d1
-	rol.w		&0x7,%d1		# rotate scale value into place
-	andi.l		&0x3,%d1		# extract scale value
-
-	lsl.l		%d1,%d0			# shift index by scale
-
-	extb.l		%d2			# sign extend displacement
-	add.l		%d2,%d0			# disp + index
-	add.l		%d0,%a0			# An + (index + disp)
-
-	mov.l		(%sp)+,%d2		# restore temp register
-	rts
-
-# d2 = index
-# d3 = base
-# d4 = od
-# d5 = extword
-fcalc_mem_ind:
-	btst		&0x6,%d0		# is the index suppressed?
-	beq.b		fcalc_index
-
-	movm.l		&0x3c00,-(%sp)		# save d2-d5
-
-	mov.l		%d0,%d5			# put extword in d5
-	mov.l		%a0,%d3			# put base in d3
-
-	clr.l		%d2			# yes, so index = 0
-	bra.b		fbase_supp_ck
-
-# index:
-fcalc_index:
-	mov.l		%d0,L_SCR1(%a6)		# save d0 (opword)
-	bfextu		%d0{&16:&4},%d1		# fetch dreg index
-	bsr.l		fetch_dreg
-
-	movm.l		&0x3c00,-(%sp)		# save d2-d5
-	mov.l		%d0,%d2			# put index in d2
-	mov.l		L_SCR1(%a6),%d5
-	mov.l		%a0,%d3
-
-	btst		&0xb,%d5		# is index word or long?
-	bne.b		fno_ext
-	ext.l		%d2
-
-fno_ext:
-	bfextu		%d5{&21:&2},%d0
-	lsl.l		%d0,%d2
-
-# base address (passed as parameter in d3):
-# we clear the value here if it should actually be suppressed.
-fbase_supp_ck:
-	btst		&0x7,%d5		# is the bd suppressed?
-	beq.b		fno_base_sup
-	clr.l		%d3
-
-# base displacement:
-fno_base_sup:
-	bfextu		%d5{&26:&2},%d0		# get bd size
-#	beq.l		fmovm_error		# if (size == 0) it's reserved
-
-	cmpi.b		%d0,&0x2
-	blt.b		fno_bd
-	beq.b		fget_word_bd
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		fcea_iacc		# yes
-
-	bra.b		fchk_ind
-
-fget_word_bd:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		fcea_iacc		# yes
-
-	ext.l		%d0			# sign extend bd
-
-fchk_ind:
-	add.l		%d0,%d3			# base += bd
-
-# outer displacement:
-fno_bd:
-	bfextu		%d5{&30:&2},%d0		# is od suppressed?
-	beq.w		faii_bd
-
-	cmpi.b		%d0,&0x2
-	blt.b		fnull_od
-	beq.b		fword_od
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		fcea_iacc		# yes
-
-	bra.b		fadd_them
-
-fword_od:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		fcea_iacc		# yes
-
-	ext.l		%d0			# sign extend od
-	bra.b		fadd_them
-
-fnull_od:
-	clr.l		%d0
-
-fadd_them:
-	mov.l		%d0,%d4
-
-	btst		&0x2,%d5		# pre or post indexing?
-	beq.b		fpre_indexed
-
-	mov.l		%d3,%a0
-	bsr.l		_dmem_read_long
-
-	tst.l		%d1			# did dfetch fail?
-	bne.w		fcea_err		# yes
-
-	add.l		%d2,%d0			# <ea> += index
-	add.l		%d4,%d0			# <ea> += od
-	bra.b		fdone_ea
-
-fpre_indexed:
-	add.l		%d2,%d3			# preindexing
-	mov.l		%d3,%a0
-	bsr.l		_dmem_read_long
-
-	tst.l		%d1			# did dfetch fail?
-	bne.w		fcea_err		# yes
-
-	add.l		%d4,%d0			# ea += od
-	bra.b		fdone_ea
-
-faii_bd:
-	add.l		%d2,%d3			# ea = (base + bd) + index
-	mov.l		%d3,%d0
-fdone_ea:
-	mov.l		%d0,%a0
-
-	movm.l		(%sp)+,&0x003c		# restore d2-d5
-	rts
-
-#########################################################
-fcea_err:
-	mov.l		%d3,%a0
-
-	movm.l		(%sp)+,&0x003c		# restore d2-d5
-	mov.w		&0x0101,%d0
-	bra.l		iea_dacc
-
-fcea_iacc:
-	movm.l		(%sp)+,&0x003c		# restore d2-d5
-	bra.l		iea_iacc
-
-fmovm_out_err:
-	bsr.l		restore
-	mov.w		&0x00e1,%d0
-	bra.b		fmovm_err
-
-fmovm_in_err:
-	bsr.l		restore
-	mov.w		&0x0161,%d0
-
-fmovm_err:
-	mov.l		L_SCR1(%a6),%a0
-	bra.l		iea_dacc
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fmovm_ctrl(): emulate fmovm.l of control registers instr	#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_long() - read longword from memory			#
-#	iea_iacc() - _imem_read_long() failed; error recovery		#
-#									#
-# INPUT ***************************************************************	#
-#	None								#
-#									#
-# OUTPUT **************************************************************	#
-#	If _imem_read_long() doesn't fail:				#
-#		USER_FPCR(a6)  = new FPCR value				#
-#		USER_FPSR(a6)  = new FPSR value				#
-#		USER_FPIAR(a6) = new FPIAR value			#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Decode the instruction type by looking at the extension word	#
-# in order to see how many control registers to fetch from memory.	#
-# Fetch them using _imem_read_long(). If this fetch fails, exit through	#
-# the special access error exit handler iea_iacc().			#
-#									#
-# Instruction word decoding:						#
-#									#
-#	fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}				#
-#									#
-#		WORD1			WORD2				#
-#	1111 0010 00 111100	100$ $$00 0000 0000			#
-#									#
-#	$$$ (100): FPCR							#
-#	    (010): FPSR							#
-#	    (001): FPIAR						#
-#	    (000): FPIAR						#
-#									#
-#########################################################################
-
-	global		fmovm_ctrl
-fmovm_ctrl:
-	mov.b		EXC_EXTWORD(%a6),%d0	# fetch reg select bits
-	cmpi.b		%d0,&0x9c		# fpcr & fpsr & fpiar ?
-	beq.w		fctrl_in_7		# yes
-	cmpi.b		%d0,&0x98		# fpcr & fpsr ?
-	beq.w		fctrl_in_6		# yes
-	cmpi.b		%d0,&0x94		# fpcr & fpiar ?
-	beq.b		fctrl_in_5		# yes
-
-# fmovem.l #<data>, fpsr/fpiar
-fctrl_in_3:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPSR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to stack
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPIAR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
-	rts
-
-# fmovem.l #<data>, fpcr/fpiar
-fctrl_in_5:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPCR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to stack
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPIAR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
-	rts
-
-# fmovem.l #<data>, fpcr/fpsr
-fctrl_in_6:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPCR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPSR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
-	rts
-
-# fmovem.l #<data>, fpcr/fpsr/fpiar
-fctrl_in_7:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPCR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPSR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPIAR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to mem
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_dcalc_ea(): calc correct <ea> from <ea> stacked on exception	#
-#									#
-# XREF ****************************************************************	#
-#	inc_areg() - increment an address register			#
-#	dec_areg() - decrement an address register			#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = number of bytes to adjust <ea> by				#
-#									#
-# OUTPUT **************************************************************	#
-#	None								#
-#									#
-# ALGORITHM ***********************************************************	#
-# "Dummy" CALCulate Effective Address:					#
-#	The stacked <ea> for FP unimplemented instructions and opclass	#
-#	two packed instructions is correct with the exception of...	#
-#									#
-#	1) -(An)   : The register is not updated regardless of size.	#
-#		     Also, for extended precision and packed, the	#
-#		     stacked <ea> value is 8 bytes too big		#
-#	2) (An)+   : The register is not updated.			#
-#	3) #<data> : The upper longword of the immediate operand is	#
-#		     stacked b,w,l and s sizes are completely stacked.	#
-#		     d,x, and p are not.				#
-#									#
-#########################################################################
-
-	global		_dcalc_ea
-_dcalc_ea:
-	mov.l		%d0, %a0		# move # bytes to %a0
-
-	mov.b		1+EXC_OPWORD(%a6), %d0	# fetch opcode word
-	mov.l		%d0, %d1		# make a copy
-
-	andi.w		&0x38, %d0		# extract mode field
-	andi.l		&0x7, %d1		# extract reg  field
-
-	cmpi.b		%d0,&0x18		# is mode (An)+ ?
-	beq.b		dcea_pi			# yes
-
-	cmpi.b		%d0,&0x20		# is mode -(An) ?
-	beq.b		dcea_pd			# yes
-
-	or.w		%d1,%d0			# concat mode,reg
-	cmpi.b		%d0,&0x3c		# is mode #<data>?
-
-	beq.b		dcea_imm		# yes
-
-	mov.l		EXC_EA(%a6),%a0		# return <ea>
-	rts
-
-# need to set immediate data flag here since we'll need to do
-# an imem_read to fetch this later.
-dcea_imm:
-	mov.b		&immed_flg,SPCOND_FLG(%a6)
-	lea		([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
-	rts
-
-# here, the <ea> is stacked correctly. however, we must update the
-# address register...
-dcea_pi:
-	mov.l		%a0,%d0			# pass amt to inc by
-	bsr.l		inc_areg		# inc addr register
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-	rts
-
-# the <ea> is stacked correctly for all but extended and packed which
-# the <ea>s are 8 bytes too large.
-# it would make no sense to have a pre-decrement to a7 in supervisor
-# mode so we don't even worry about this tricky case here : )
-dcea_pd:
-	mov.l		%a0,%d0			# pass amt to dec by
-	bsr.l		dec_areg		# dec addr register
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-
-	cmpi.b		%d0,&0xc		# is opsize ext or packed?
-	beq.b		dcea_pd2		# yes
-	rts
-dcea_pd2:
-	sub.l		&0x8,%a0		# correct <ea>
-	mov.l		%a0,EXC_EA(%a6)		# put correct <ea> on stack
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_calc_ea_fout(): calculate correct stacked <ea> for extended	#
-#			 and packed data opclass 3 operations.		#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	None								#
-#									#
-# OUTPUT **************************************************************	#
-#	a0 = return correct effective address				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	For opclass 3 extended and packed data operations, the <ea>	#
-# stacked for the exception is incorrect for -(an) and (an)+ addressing	#
-# modes. Also, while we're at it, the index register itself must get	#
-# updated.								#
-#	So, for -(an), we must subtract 8 off of the stacked <ea> value	#
-# and return that value as the correct <ea> and store that value in An.	#
-# For (an)+, the stacked <ea> is correct but we must adjust An by +12.	#
-#									#
-#########################################################################
-
-# This calc_ea is currently used to retrieve the correct <ea>
-# for fmove outs of type extended and packed.
-	global		_calc_ea_fout
-_calc_ea_fout:
-	mov.b		1+EXC_OPWORD(%a6),%d0	# fetch opcode word
-	mov.l		%d0,%d1			# make a copy
-
-	andi.w		&0x38,%d0		# extract mode field
-	andi.l		&0x7,%d1		# extract reg  field
-
-	cmpi.b		%d0,&0x18		# is mode (An)+ ?
-	beq.b		ceaf_pi			# yes
-
-	cmpi.b		%d0,&0x20		# is mode -(An) ?
-	beq.w		ceaf_pd			# yes
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-	rts
-
-# (An)+ : extended and packed fmove out
-#	: stacked <ea> is correct
-#	: "An" not updated
-ceaf_pi:
-	mov.w		(tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
-	mov.l		EXC_EA(%a6),%a0
-	jmp		(tbl_ceaf_pi.b,%pc,%d1.w*1)
-
-	swbeg		&0x8
-tbl_ceaf_pi:
-	short		ceaf_pi0 - tbl_ceaf_pi
-	short		ceaf_pi1 - tbl_ceaf_pi
-	short		ceaf_pi2 - tbl_ceaf_pi
-	short		ceaf_pi3 - tbl_ceaf_pi
-	short		ceaf_pi4 - tbl_ceaf_pi
-	short		ceaf_pi5 - tbl_ceaf_pi
-	short		ceaf_pi6 - tbl_ceaf_pi
-	short		ceaf_pi7 - tbl_ceaf_pi
-
-ceaf_pi0:
-	addi.l		&0xc,EXC_DREGS+0x8(%a6)
-	rts
-ceaf_pi1:
-	addi.l		&0xc,EXC_DREGS+0xc(%a6)
-	rts
-ceaf_pi2:
-	add.l		&0xc,%a2
-	rts
-ceaf_pi3:
-	add.l		&0xc,%a3
-	rts
-ceaf_pi4:
-	add.l		&0xc,%a4
-	rts
-ceaf_pi5:
-	add.l		&0xc,%a5
-	rts
-ceaf_pi6:
-	addi.l		&0xc,EXC_A6(%a6)
-	rts
-ceaf_pi7:
-	mov.b		&mia7_flg,SPCOND_FLG(%a6)
-	addi.l		&0xc,EXC_A7(%a6)
-	rts
-
-# -(An) : extended and packed fmove out
-#	: stacked <ea> = actual <ea> + 8
-#	: "An" not updated
-ceaf_pd:
-	mov.w		(tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
-	mov.l		EXC_EA(%a6),%a0
-	sub.l		&0x8,%a0
-	sub.l		&0x8,EXC_EA(%a6)
-	jmp		(tbl_ceaf_pd.b,%pc,%d1.w*1)
-
-	swbeg		&0x8
-tbl_ceaf_pd:
-	short		ceaf_pd0 - tbl_ceaf_pd
-	short		ceaf_pd1 - tbl_ceaf_pd
-	short		ceaf_pd2 - tbl_ceaf_pd
-	short		ceaf_pd3 - tbl_ceaf_pd
-	short		ceaf_pd4 - tbl_ceaf_pd
-	short		ceaf_pd5 - tbl_ceaf_pd
-	short		ceaf_pd6 - tbl_ceaf_pd
-	short		ceaf_pd7 - tbl_ceaf_pd
-
-ceaf_pd0:
-	mov.l		%a0,EXC_DREGS+0x8(%a6)
-	rts
-ceaf_pd1:
-	mov.l		%a0,EXC_DREGS+0xc(%a6)
-	rts
-ceaf_pd2:
-	mov.l		%a0,%a2
-	rts
-ceaf_pd3:
-	mov.l		%a0,%a3
-	rts
-ceaf_pd4:
-	mov.l		%a0,%a4
-	rts
-ceaf_pd5:
-	mov.l		%a0,%a5
-	rts
-ceaf_pd6:
-	mov.l		%a0,EXC_A6(%a6)
-	rts
-ceaf_pd7:
-	mov.l		%a0,EXC_A7(%a6)
-	mov.b		&mda7_flg,SPCOND_FLG(%a6)
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_load_fop(): load operand for unimplemented FP exception	#
-#									#
-# XREF ****************************************************************	#
-#	set_tag_x() - determine ext prec optype tag			#
-#	set_tag_s() - determine sgl prec optype tag			#
-#	set_tag_d() - determine dbl prec optype tag			#
-#	unnorm_fix() - convert normalized number to denorm or zero	#
-#	norm() - normalize a denormalized number			#
-#	get_packed() - fetch a packed operand from memory		#
-#	_dcalc_ea() - calculate <ea>, fixing An in process		#
-#									#
-#	_imem_read_{word,long}() - read from instruction memory		#
-#	_dmem_read() - read from data memory				#
-#	_dmem_read_{byte,word,long}() - read from data memory		#
-#									#
-#	facc_in_{b,w,l,d,x}() - mem read failed; special exit point	#
-#									#
-# INPUT ***************************************************************	#
-#	None								#
-#									#
-# OUTPUT **************************************************************	#
-#	If memory access doesn't fail:					#
-#		FP_SRC(a6) = source operand in extended precision	#
-#		FP_DST(a6) = destination operand in extended precision	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	This is called from the Unimplemented FP exception handler in	#
-# order to load the source and maybe destination operand into		#
-# FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load	#
-# the source and destination from the FP register file. Set the optype	#
-# tags for both if dyadic, one for monadic. If a number is an UNNORM,	#
-# convert it to a DENORM or a ZERO.					#
-#	If the instruction is opclass two (memory->reg), then fetch	#
-# the destination from the register file and the source operand from	#
-# memory. Tag and fix both as above w/ opclass zero instructions.	#
-#	If the source operand is byte,word,long, or single, it may be	#
-# in the data register file. If it's actually out in memory, use one of	#
-# the mem_read() routines to fetch it. If the mem_read() access returns	#
-# a failing value, exit through the special facc_in() routine which	#
-# will create an access error exception frame from the current exception #
-# frame.								#
-#	Immediate data and regular data accesses are separated because	#
-# if an immediate data access fails, the resulting fault status		#
-# longword stacked for the access error exception must have the		#
-# instruction bit set.							#
-#									#
-#########################################################################
-
-	global		_load_fop
-_load_fop:
-
-#  15     13 12 10  9 7  6       0
-# /        \ /   \ /  \ /         \
-# ---------------------------------
-# | opclass | RX  | RY | EXTENSION |  (2nd word of general FP instruction)
-# ---------------------------------
-#
-
-#	bfextu		EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass
-#	cmpi.b		%d0, &0x2		# which class is it? ('000,'010,'011)
-#	beq.w		op010			# handle <ea> -> fpn
-#	bgt.w		op011			# handle fpn -> <ea>
-
-# we're not using op011 for now...
-	btst		&0x6,EXC_CMDREG(%a6)
-	bne.b		op010
-
-############################
-# OPCLASS '000: reg -> reg #
-############################
-op000:
-	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension word lo
-	btst		&0x5,%d0		# testing extension bits
-	beq.b		op000_src		# (bit 5 == 0) => monadic
-	btst		&0x4,%d0		# (bit 5 == 1)
-	beq.b		op000_dst		# (bit 4 == 0) => dyadic
-	and.w		&0x007f,%d0		# extract extension bits {6:0}
-	cmpi.w		%d0,&0x0038		# is it an fcmp (dyadic) ?
-	bne.b		op000_src		# it's an fcmp
-
-op000_dst:
-	bfextu		EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
-	bsr.l		load_fpn2		# fetch dst fpreg into FP_DST
-
-	bsr.l		set_tag_x		# get dst optype tag
-
-	cmpi.b		%d0, &UNNORM		# is dst fpreg an UNNORM?
-	beq.b		op000_dst_unnorm	# yes
-op000_dst_cont:
-	mov.b		%d0, DTAG(%a6)		# store the dst optype tag
-
-op000_src:
-	bfextu		EXC_CMDREG(%a6){&3:&3}, %d0 # extract src field
-	bsr.l		load_fpn1		# fetch src fpreg into FP_SRC
-
-	bsr.l		set_tag_x		# get src optype tag
-
-	cmpi.b		%d0, &UNNORM		# is src fpreg an UNNORM?
-	beq.b		op000_src_unnorm	# yes
-op000_src_cont:
-	mov.b		%d0, STAG(%a6)		# store the src optype tag
-	rts
-
-op000_dst_unnorm:
-	bsr.l		unnorm_fix		# fix the dst UNNORM
-	bra.b		op000_dst_cont
-op000_src_unnorm:
-	bsr.l		unnorm_fix		# fix the src UNNORM
-	bra.b		op000_src_cont
-
-#############################
-# OPCLASS '010: <ea> -> reg #
-#############################
-op010:
-	mov.w		EXC_CMDREG(%a6),%d0	# fetch extension word
-	btst		&0x5,%d0		# testing extension bits
-	beq.b		op010_src		# (bit 5 == 0) => monadic
-	btst		&0x4,%d0		# (bit 5 == 1)
-	beq.b		op010_dst		# (bit 4 == 0) => dyadic
-	and.w		&0x007f,%d0		# extract extension bits {6:0}
-	cmpi.w		%d0,&0x0038		# is it an fcmp (dyadic) ?
-	bne.b		op010_src		# it's an fcmp
-
-op010_dst:
-	bfextu		EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
-	bsr.l		load_fpn2		# fetch dst fpreg ptr
-
-	bsr.l		set_tag_x		# get dst type tag
-
-	cmpi.b		%d0, &UNNORM		# is dst fpreg an UNNORM?
-	beq.b		op010_dst_unnorm	# yes
-op010_dst_cont:
-	mov.b		%d0, DTAG(%a6)		# store the dst optype tag
-
-op010_src:
-	bfextu		EXC_CMDREG(%a6){&3:&3}, %d0 # extract src type field
-
-	bfextu		EXC_OPWORD(%a6){&10:&3}, %d1 # extract <ea> mode field
-	bne.w		fetch_from_mem		# src op is in memory
-
-op010_dreg:
-	clr.b		STAG(%a6)		# either NORM or ZERO
-	bfextu		EXC_OPWORD(%a6){&13:&3}, %d1 # extract src reg field
-
-	mov.w		(tbl_op010_dreg.b,%pc,%d0.w*2), %d0 # jmp based on optype
-	jmp		(tbl_op010_dreg.b,%pc,%d0.w*1) # fetch src from dreg
-
-op010_dst_unnorm:
-	bsr.l		unnorm_fix		# fix the dst UNNORM
-	bra.b		op010_dst_cont
-
-	swbeg		&0x8
-tbl_op010_dreg:
-	short		opd_long	- tbl_op010_dreg
-	short		opd_sgl		- tbl_op010_dreg
-	short		tbl_op010_dreg	- tbl_op010_dreg
-	short		tbl_op010_dreg	- tbl_op010_dreg
-	short		opd_word	- tbl_op010_dreg
-	short		tbl_op010_dreg	- tbl_op010_dreg
-	short		opd_byte	- tbl_op010_dreg
-	short		tbl_op010_dreg	- tbl_op010_dreg
-
-#
-# LONG: can be either NORM or ZERO...
-#
-opd_long:
-	bsr.l		fetch_dreg		# fetch long in d0
-	fmov.l		%d0, %fp0		# load a long
-	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
-	fbeq.w		opd_long_zero		# long is a ZERO
-	rts
-opd_long_zero:
-	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
-	rts
-
-#
-# WORD: can be either NORM or ZERO...
-#
-opd_word:
-	bsr.l		fetch_dreg		# fetch word in d0
-	fmov.w		%d0, %fp0		# load a word
-	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
-	fbeq.w		opd_word_zero		# WORD is a ZERO
-	rts
-opd_word_zero:
-	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
-	rts
-
-#
-# BYTE: can be either NORM or ZERO...
-#
-opd_byte:
-	bsr.l		fetch_dreg		# fetch word in d0
-	fmov.b		%d0, %fp0		# load a byte
-	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
-	fbeq.w		opd_byte_zero		# byte is a ZERO
-	rts
-opd_byte_zero:
-	mov.b		&ZERO, STAG(%a6)	# set ZERO optype flag
-	rts
-
-#
-# SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM
-#
-# separate SNANs and DENORMs so they can be loaded w/ special care.
-# all others can simply be moved "in" using fmove.
-#
-opd_sgl:
-	bsr.l		fetch_dreg		# fetch sgl in d0
-	mov.l		%d0,L_SCR1(%a6)
-
-	lea		L_SCR1(%a6), %a0	# pass: ptr to the sgl
-	bsr.l		set_tag_s		# determine sgl type
-	mov.b		%d0, STAG(%a6)		# save the src tag
-
-	cmpi.b		%d0, &SNAN		# is it an SNAN?
-	beq.w		get_sgl_snan		# yes
-
-	cmpi.b		%d0, &DENORM		# is it a DENORM?
-	beq.w		get_sgl_denorm		# yes
-
-	fmov.s		(%a0), %fp0		# no, so can load it regular
-	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
-	rts
-
-##############################################################################
-
-#########################################################################
-# fetch_from_mem():							#
-# - src is out in memory. must:						#
-#	(1) calc ea - must read AFTER you know the src type since	#
-#		      if the ea is -() or ()+, need to know # of bytes.	#
-#	(2) read it in from either user or supervisor space		#
-#	(3) if (b || w || l) then simply read in			#
-#	    if (s || d || x) then check for SNAN,UNNORM,DENORM		#
-#	    if (packed) then punt for now				#
-# INPUT:								#
-#	%d0 : src type field						#
-#########################################################################
-fetch_from_mem:
-	clr.b		STAG(%a6)		# either NORM or ZERO
-
-	mov.w		(tbl_fp_type.b,%pc,%d0.w*2), %d0 # index by src type field
-	jmp		(tbl_fp_type.b,%pc,%d0.w*1)
-
-	swbeg		&0x8
-tbl_fp_type:
-	short		load_long	- tbl_fp_type
-	short		load_sgl	- tbl_fp_type
-	short		load_ext	- tbl_fp_type
-	short		load_packed	- tbl_fp_type
-	short		load_word	- tbl_fp_type
-	short		load_dbl	- tbl_fp_type
-	short		load_byte	- tbl_fp_type
-	short		tbl_fp_type	- tbl_fp_type
-
-#########################################
-# load a LONG into %fp0:		#
-#	-number can't fault		#
-#	(1) calc ea			#
-#	(2) read 4 bytes into L_SCR1	#
-#	(3) fmov.l into %fp0		#
-#########################################
-load_long:
-	movq.l		&0x4, %d0		# pass: 4 (bytes)
-	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
-
-	cmpi.b		SPCOND_FLG(%a6),&immed_flg
-	beq.b		load_long_immed
-
-	bsr.l		_dmem_read_long		# fetch src operand from memory
-
-	tst.l		%d1			# did dfetch fail?
-	bne.l		facc_in_l		# yes
-
-load_long_cont:
-	fmov.l		%d0, %fp0		# read into %fp0;convert to xprec
-	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
-
-	fbeq.w		load_long_zero		# src op is a ZERO
-	rts
-load_long_zero:
-	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
-	rts
-
-load_long_immed:
-	bsr.l		_imem_read_long		# fetch src operand immed data
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		funimp_iacc		# yes
-	bra.b		load_long_cont
-
-#########################################
-# load a WORD into %fp0:		#
-#	-number can't fault		#
-#	(1) calc ea			#
-#	(2) read 2 bytes into L_SCR1	#
-#	(3) fmov.w into %fp0		#
-#########################################
-load_word:
-	movq.l		&0x2, %d0		# pass: 2 (bytes)
-	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
-
-	cmpi.b		SPCOND_FLG(%a6),&immed_flg
-	beq.b		load_word_immed
-
-	bsr.l		_dmem_read_word		# fetch src operand from memory
-
-	tst.l		%d1			# did dfetch fail?
-	bne.l		facc_in_w		# yes
-
-load_word_cont:
-	fmov.w		%d0, %fp0		# read into %fp0;convert to xprec
-	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
-
-	fbeq.w		load_word_zero		# src op is a ZERO
-	rts
-load_word_zero:
-	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
-	rts
-
-load_word_immed:
-	bsr.l		_imem_read_word		# fetch src operand immed data
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		funimp_iacc		# yes
-	bra.b		load_word_cont
-
-#########################################
-# load a BYTE into %fp0:		#
-#	-number can't fault		#
-#	(1) calc ea			#
-#	(2) read 1 byte into L_SCR1	#
-#	(3) fmov.b into %fp0		#
-#########################################
-load_byte:
-	movq.l		&0x1, %d0		# pass: 1 (byte)
-	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
-
-	cmpi.b		SPCOND_FLG(%a6),&immed_flg
-	beq.b		load_byte_immed
-
-	bsr.l		_dmem_read_byte		# fetch src operand from memory
-
-	tst.l		%d1			# did dfetch fail?
-	bne.l		facc_in_b		# yes
-
-load_byte_cont:
-	fmov.b		%d0, %fp0		# read into %fp0;convert to xprec
-	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
-
-	fbeq.w		load_byte_zero		# src op is a ZERO
-	rts
-load_byte_zero:
-	mov.b		&ZERO, STAG(%a6)	# set optype tag to ZERO
-	rts
-
-load_byte_immed:
-	bsr.l		_imem_read_word		# fetch src operand immed data
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		funimp_iacc		# yes
-	bra.b		load_byte_cont
-
-#########################################
-# load a SGL into %fp0:			#
-#	-number can't fault		#
-#	(1) calc ea			#
-#	(2) read 4 bytes into L_SCR1	#
-#	(3) fmov.s into %fp0		#
-#########################################
-load_sgl:
-	movq.l		&0x4, %d0		# pass: 4 (bytes)
-	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
-
-	cmpi.b		SPCOND_FLG(%a6),&immed_flg
-	beq.b		load_sgl_immed
-
-	bsr.l		_dmem_read_long		# fetch src operand from memory
-	mov.l		%d0, L_SCR1(%a6)	# store src op on stack
-
-	tst.l		%d1			# did dfetch fail?
-	bne.l		facc_in_l		# yes
-
-load_sgl_cont:
-	lea		L_SCR1(%a6), %a0	# pass: ptr to sgl src op
-	bsr.l		set_tag_s		# determine src type tag
-	mov.b		%d0, STAG(%a6)		# save src optype tag on stack
-
-	cmpi.b		%d0, &DENORM		# is it a sgl DENORM?
-	beq.w		get_sgl_denorm		# yes
-
-	cmpi.b		%d0, &SNAN		# is it a sgl SNAN?
-	beq.w		get_sgl_snan		# yes
-
-	fmov.s		L_SCR1(%a6), %fp0	# read into %fp0;convert to xprec
-	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
-	rts
-
-load_sgl_immed:
-	bsr.l		_imem_read_long		# fetch src operand immed data
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		funimp_iacc		# yes
-	bra.b		load_sgl_cont
-
-# must convert sgl denorm format to an Xprec denorm fmt suitable for
-# normalization...
-# %a0 : points to sgl denorm
-get_sgl_denorm:
-	clr.w		FP_SRC_EX(%a6)
-	bfextu		(%a0){&9:&23}, %d0	# fetch sgl hi(_mantissa)
-	lsl.l		&0x8, %d0
-	mov.l		%d0, FP_SRC_HI(%a6)	# set ext hi(_mantissa)
-	clr.l		FP_SRC_LO(%a6)		# set ext lo(_mantissa)
-
-	clr.w		FP_SRC_EX(%a6)
-	btst		&0x7, (%a0)		# is sgn bit set?
-	beq.b		sgl_dnrm_norm
-	bset		&0x7, FP_SRC_EX(%a6)	# set sgn of xprec value
-
-sgl_dnrm_norm:
-	lea		FP_SRC(%a6), %a0
-	bsr.l		norm			# normalize number
-	mov.w		&0x3f81, %d1		# xprec exp = 0x3f81
-	sub.w		%d0, %d1		# exp = 0x3f81 - shft amt.
-	or.w		%d1, FP_SRC_EX(%a6)	# {sgn,exp}
-
-	mov.b		&NORM, STAG(%a6)	# fix src type tag
-	rts
-
-# convert sgl to ext SNAN
-# %a0 : points to sgl SNAN
-get_sgl_snan:
-	mov.w		&0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
-	bfextu		(%a0){&9:&23}, %d0
-	lsl.l		&0x8, %d0		# extract and insert hi(man)
-	mov.l		%d0, FP_SRC_HI(%a6)
-	clr.l		FP_SRC_LO(%a6)
-
-	btst		&0x7, (%a0)		# see if sign of SNAN is set
-	beq.b		no_sgl_snan_sgn
-	bset		&0x7, FP_SRC_EX(%a6)
-no_sgl_snan_sgn:
-	rts
-
-#########################################
-# load a DBL into %fp0:			#
-#	-number can't fault		#
-#	(1) calc ea			#
-#	(2) read 8 bytes into L_SCR(1,2)#
-#	(3) fmov.d into %fp0		#
-#########################################
-load_dbl:
-	movq.l		&0x8, %d0		# pass: 8 (bytes)
-	bsr.l		_dcalc_ea		# calc <ea>; <ea> in %a0
-
-	cmpi.b		SPCOND_FLG(%a6),&immed_flg
-	beq.b		load_dbl_immed
-
-	lea		L_SCR1(%a6), %a1	# pass: ptr to input dbl tmp space
-	movq.l		&0x8, %d0		# pass: # bytes to read
-	bsr.l		_dmem_read		# fetch src operand from memory
-
-	tst.l		%d1			# did dfetch fail?
-	bne.l		facc_in_d		# yes
-
-load_dbl_cont:
-	lea		L_SCR1(%a6), %a0	# pass: ptr to input dbl
-	bsr.l		set_tag_d		# determine src type tag
-	mov.b		%d0, STAG(%a6)		# set src optype tag
-
-	cmpi.b		%d0, &DENORM		# is it a dbl DENORM?
-	beq.w		get_dbl_denorm		# yes
-
-	cmpi.b		%d0, &SNAN		# is it a dbl SNAN?
-	beq.w		get_dbl_snan		# yes
-
-	fmov.d		L_SCR1(%a6), %fp0	# read into %fp0;convert to xprec
-	fmovm.x		&0x80, FP_SRC(%a6)	# return src op in FP_SRC
-	rts
-
-load_dbl_immed:
-	lea		L_SCR1(%a6), %a1	# pass: ptr to input dbl tmp space
-	movq.l		&0x8, %d0		# pass: # bytes to read
-	bsr.l		_imem_read		# fetch src operand from memory
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		funimp_iacc		# yes
-	bra.b		load_dbl_cont
-
-# must convert dbl denorm format to an Xprec denorm fmt suitable for
-# normalization...
-# %a0 : loc. of dbl denorm
-get_dbl_denorm:
-	clr.w		FP_SRC_EX(%a6)
-	bfextu		(%a0){&12:&31}, %d0	# fetch hi(_mantissa)
-	mov.l		%d0, FP_SRC_HI(%a6)
-	bfextu		4(%a0){&11:&21}, %d0	# fetch lo(_mantissa)
-	mov.l		&0xb, %d1
-	lsl.l		%d1, %d0
-	mov.l		%d0, FP_SRC_LO(%a6)
-
-	btst		&0x7, (%a0)		# is sgn bit set?
-	beq.b		dbl_dnrm_norm
-	bset		&0x7, FP_SRC_EX(%a6)	# set sgn of xprec value
-
-dbl_dnrm_norm:
-	lea		FP_SRC(%a6), %a0
-	bsr.l		norm			# normalize number
-	mov.w		&0x3c01, %d1		# xprec exp = 0x3c01
-	sub.w		%d0, %d1		# exp = 0x3c01 - shft amt.
-	or.w		%d1, FP_SRC_EX(%a6)	# {sgn,exp}
-
-	mov.b		&NORM, STAG(%a6)	# fix src type tag
-	rts
-
-# convert dbl to ext SNAN
-# %a0 : points to dbl SNAN
-get_dbl_snan:
-	mov.w		&0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
-
-	bfextu		(%a0){&12:&31}, %d0	# fetch hi(_mantissa)
-	mov.l		%d0, FP_SRC_HI(%a6)
-	bfextu		4(%a0){&11:&21}, %d0	# fetch lo(_mantissa)
-	mov.l		&0xb, %d1
-	lsl.l		%d1, %d0
-	mov.l		%d0, FP_SRC_LO(%a6)
-
-	btst		&0x7, (%a0)		# see if sign of SNAN is set
-	beq.b		no_dbl_snan_sgn
-	bset		&0x7, FP_SRC_EX(%a6)
-no_dbl_snan_sgn:
-	rts
-
-#################################################
-# load a Xprec into %fp0:			#
-#	-number can't fault			#
-#	(1) calc ea				#
-#	(2) read 12 bytes into L_SCR(1,2)	#
-#	(3) fmov.x into %fp0			#
-#################################################
-load_ext:
-	mov.l		&0xc, %d0		# pass: 12 (bytes)
-	bsr.l		_dcalc_ea		# calc <ea>
-
-	lea		FP_SRC(%a6), %a1	# pass: ptr to input ext tmp space
-	mov.l		&0xc, %d0		# pass: # of bytes to read
-	bsr.l		_dmem_read		# fetch src operand from memory
-
-	tst.l		%d1			# did dfetch fail?
-	bne.l		facc_in_x		# yes
-
-	lea		FP_SRC(%a6), %a0	# pass: ptr to src op
-	bsr.l		set_tag_x		# determine src type tag
-
-	cmpi.b		%d0, &UNNORM		# is the src op an UNNORM?
-	beq.b		load_ext_unnorm		# yes
-
-	mov.b		%d0, STAG(%a6)		# store the src optype tag
-	rts
-
-load_ext_unnorm:
-	bsr.l		unnorm_fix		# fix the src UNNORM
-	mov.b		%d0, STAG(%a6)		# store the src optype tag
-	rts
-
-#################################################
-# load a packed into %fp0:			#
-#	-number can't fault			#
-#	(1) calc ea				#
-#	(2) read 12 bytes into L_SCR(1,2,3)	#
-#	(3) fmov.x into %fp0			#
-#################################################
-load_packed:
-	bsr.l		get_packed
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src op
-	bsr.l		set_tag_x		# determine src type tag
-	cmpi.b		%d0,&UNNORM		# is the src op an UNNORM ZERO?
-	beq.b		load_packed_unnorm	# yes
-
-	mov.b		%d0,STAG(%a6)		# store the src optype tag
-	rts
-
-load_packed_unnorm:
-	bsr.l		unnorm_fix		# fix the UNNORM ZERO
-	mov.b		%d0,STAG(%a6)		# store the src optype tag
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fout(): move from fp register to memory or data register	#
-#									#
-# XREF ****************************************************************	#
-#	_round() - needed to create EXOP for sgl/dbl precision		#
-#	norm() - needed to create EXOP for extended precision		#
-#	ovf_res() - create default overflow result for sgl/dbl precision#
-#	unf_res() - create default underflow result for sgl/dbl prec.	#
-#	dst_dbl() - create rounded dbl precision result.		#
-#	dst_sgl() - create rounded sgl precision result.		#
-#	fetch_dreg() - fetch dynamic k-factor reg for packed.		#
-#	bindec() - convert FP binary number to packed number.		#
-#	_mem_write() - write data to memory.				#
-#	_mem_write2() - write data to memory unless supv mode -(a7) exc.#
-#	_dmem_write_{byte,word,long}() - write data to memory.		#
-#	store_dreg_{b,w,l}() - store data to data register file.	#
-#	facc_out_{b,w,l,d,x}() - data access error occurred.		#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	d0 = round prec,mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 : intermediate underflow or overflow result if		#
-#	      OVFL/UNFL occurred for a sgl or dbl operand		#
-#									#
-# ALGORITHM ***********************************************************	#
-#	This routine is accessed by many handlers that need to do an	#
-# opclass three move of an operand out to memory.			#
-#	Decode an fmove out (opclass 3) instruction to determine if	#
-# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data	#
-# register or memory. The algorithm uses a standard "fmove" to create	#
-# the rounded result. Also, since exceptions are disabled, this also	#
-# create the correct OPERR default result if appropriate.		#
-#	For sgl or dbl precision, overflow or underflow can occur. If	#
-# either occurs and is enabled, the EXOP.				#
-#	For extended precision, the stacked <ea> must be fixed along	#
-# w/ the address index register as appropriate w/ _calc_ea_fout(). If	#
-# the source is a denorm and if underflow is enabled, an EXOP must be	#
-# created.								#
-#	For packed, the k-factor must be fetched from the instruction	#
-# word or a data register. The <ea> must be fixed as w/ extended	#
-# precision. Then, bindec() is called to create the appropriate		#
-# packed result.							#
-#	If at any time an access error is flagged by one of the move-	#
-# to-memory routines, then a special exit must be made so that the	#
-# access error can be handled properly.					#
-#									#
-#########################################################################
-
-	global		fout
-fout:
-	bfextu		EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
-	mov.w		(tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
-	jmp		(tbl_fout.b,%pc,%a1)	# jump to routine
-
-	swbeg		&0x8
-tbl_fout:
-	short		fout_long	-	tbl_fout
-	short		fout_sgl	-	tbl_fout
-	short		fout_ext	-	tbl_fout
-	short		fout_pack	-	tbl_fout
-	short		fout_word	-	tbl_fout
-	short		fout_dbl	-	tbl_fout
-	short		fout_byte	-	tbl_fout
-	short		fout_pack	-	tbl_fout
-
-#################################################################
-# fmove.b out ###################################################
-#################################################################
-
-# Only "Unimplemented Data Type" exceptions enter here. The operand
-# is either a DENORM or a NORM.
-fout_byte:
-	tst.b		STAG(%a6)		# is operand normalized?
-	bne.b		fout_byte_denorm	# no
-
-	fmovm.x		SRC(%a0),&0x80		# load value
-
-fout_byte_norm:
-	fmov.l		%d0,%fpcr		# insert rnd prec,mode
-
-	fmov.b		%fp0,%d0		# exec move out w/ correct rnd mode
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# fetch FPSR
-	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
-
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
-	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
-	beq.b		fout_byte_dn		# must save to integer regfile
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-	bsr.l		_dmem_write_byte	# write byte
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_b		# yes
-
-	rts
-
-fout_byte_dn:
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
-	andi.w		&0x7,%d1
-	bsr.l		store_dreg_b
-	rts
-
-fout_byte_denorm:
-	mov.l		SRC_EX(%a0),%d1
-	andi.l		&0x80000000,%d1		# keep DENORM sign
-	ori.l		&0x00800000,%d1		# make smallest sgl
-	fmov.s		%d1,%fp0
-	bra.b		fout_byte_norm
-
-#################################################################
-# fmove.w out ###################################################
-#################################################################
-
-# Only "Unimplemented Data Type" exceptions enter here. The operand
-# is either a DENORM or a NORM.
-fout_word:
-	tst.b		STAG(%a6)		# is operand normalized?
-	bne.b		fout_word_denorm	# no
-
-	fmovm.x		SRC(%a0),&0x80		# load value
-
-fout_word_norm:
-	fmov.l		%d0,%fpcr		# insert rnd prec:mode
-
-	fmov.w		%fp0,%d0		# exec move out w/ correct rnd mode
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# fetch FPSR
-	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
-
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
-	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
-	beq.b		fout_word_dn		# must save to integer regfile
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-	bsr.l		_dmem_write_word	# write word
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_w		# yes
-
-	rts
-
-fout_word_dn:
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
-	andi.w		&0x7,%d1
-	bsr.l		store_dreg_w
-	rts
-
-fout_word_denorm:
-	mov.l		SRC_EX(%a0),%d1
-	andi.l		&0x80000000,%d1		# keep DENORM sign
-	ori.l		&0x00800000,%d1		# make smallest sgl
-	fmov.s		%d1,%fp0
-	bra.b		fout_word_norm
-
-#################################################################
-# fmove.l out ###################################################
-#################################################################
-
-# Only "Unimplemented Data Type" exceptions enter here. The operand
-# is either a DENORM or a NORM.
-fout_long:
-	tst.b		STAG(%a6)		# is operand normalized?
-	bne.b		fout_long_denorm	# no
-
-	fmovm.x		SRC(%a0),&0x80		# load value
-
-fout_long_norm:
-	fmov.l		%d0,%fpcr		# insert rnd prec:mode
-
-	fmov.l		%fp0,%d0		# exec move out w/ correct rnd mode
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# fetch FPSR
-	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
-
-fout_long_write:
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
-	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
-	beq.b		fout_long_dn		# must save to integer regfile
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-	bsr.l		_dmem_write_long	# write long
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_l		# yes
-
-	rts
-
-fout_long_dn:
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
-	andi.w		&0x7,%d1
-	bsr.l		store_dreg_l
-	rts
-
-fout_long_denorm:
-	mov.l		SRC_EX(%a0),%d1
-	andi.l		&0x80000000,%d1		# keep DENORM sign
-	ori.l		&0x00800000,%d1		# make smallest sgl
-	fmov.s		%d1,%fp0
-	bra.b		fout_long_norm
-
-#################################################################
-# fmove.x out ###################################################
-#################################################################
-
-# Only "Unimplemented Data Type" exceptions enter here. The operand
-# is either a DENORM or a NORM.
-# The DENORM causes an Underflow exception.
-fout_ext:
-
-# we copy the extended precision result to FP_SCR0 so that the reserved
-# 16-bit field gets zeroed. we do this since we promise not to disturb
-# what's at SRC(a0).
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	clr.w		2+FP_SCR0_EX(%a6)	# clear reserved field
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	fmovm.x		SRC(%a0),&0x80		# return result
-
-	bsr.l		_calc_ea_fout		# fix stacked <ea>
-
-	mov.l		%a0,%a1			# pass: dst addr
-	lea		FP_SCR0(%a6),%a0	# pass: src addr
-	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
-
-# we must not yet write the extended precision data to the stack
-# in the pre-decrement case from supervisor mode or else we'll corrupt
-# the stack frame. so, leave it in FP_SRC for now and deal with it later...
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
-	beq.b		fout_ext_a7
-
-	bsr.l		_dmem_write		# write ext prec number to memory
-
-	tst.l		%d1			# did dstore fail?
-	bne.w		fout_ext_err		# yes
-
-	tst.b		STAG(%a6)		# is operand normalized?
-	bne.b		fout_ext_denorm		# no
-	rts
-
-# the number is a DENORM. must set the underflow exception bit
-fout_ext_denorm:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
-
-	mov.b		FPCR_ENABLE(%a6),%d0
-	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
-	bne.b		fout_ext_exc		# yes
-	rts
-
-# we don't want to do the write if the exception occurred in supervisor mode
-# so _mem_write2() handles this for us.
-fout_ext_a7:
-	bsr.l		_mem_write2		# write ext prec number to memory
-
-	tst.l		%d1			# did dstore fail?
-	bne.w		fout_ext_err		# yes
-
-	tst.b		STAG(%a6)		# is operand normalized?
-	bne.b		fout_ext_denorm		# no
-	rts
-
-fout_ext_exc:
-	lea		FP_SCR0(%a6),%a0
-	bsr.l		norm			# normalize the mantissa
-	neg.w		%d0			# new exp = -(shft amt)
-	andi.w		&0x7fff,%d0
-	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep only old sign
-	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	rts
-
-fout_ext_err:
-	mov.l		EXC_A6(%a6),(%a6)	# fix stacked a6
-	bra.l		facc_out_x
-
-#########################################################################
-# fmove.s out ###########################################################
-#########################################################################
-fout_sgl:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl prec
-	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
-
-#
-# operand is a normalized number. first, we check to see if the move out
-# would cause either an underflow or overflow. these cases are handled
-# separately. otherwise, set the FPCR to the proper rounding mode and
-# execute the move.
-#
-	mov.w		SRC_EX(%a0),%d0		# extract exponent
-	andi.w		&0x7fff,%d0		# strip sign
-
-	cmpi.w		%d0,&SGL_HI		# will operand overflow?
-	bgt.w		fout_sgl_ovfl		# yes; go handle OVFL
-	beq.w		fout_sgl_may_ovfl	# maybe; go handle possible OVFL
-	cmpi.w		%d0,&SGL_LO		# will operand underflow?
-	blt.w		fout_sgl_unfl		# yes; go handle underflow
-
-#
-# NORMs(in range) can be stored out by a simple "fmov.s"
-# Unnormalized inputs can come through this point.
-#
-fout_sgl_exg:
-	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmov.s		%fp0,%d0		# store does convert and round
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# save FPSR
-
-	or.w		%d1,2+USER_FPSR(%a6)	# set possible inex2/ainex
-
-fout_sgl_exg_write:
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
-	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
-	beq.b		fout_sgl_exg_write_dn	# must save to integer regfile
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-	bsr.l		_dmem_write_long	# write long
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_l		# yes
-
-	rts
-
-fout_sgl_exg_write_dn:
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
-	andi.w		&0x7,%d1
-	bsr.l		store_dreg_l
-	rts
-
-#
-# here, we know that the operand would UNFL if moved out to single prec,
-# so, denorm and round and then use generic store single routine to
-# write the value to memory.
-#
-fout_sgl_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	mov.l		%a0,-(%sp)
-
-	clr.l		%d0			# pass: S.F. = 0
-
-	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
-	bne.b		fout_sgl_unfl_cont	# let DENORMs fall through
-
-	lea		FP_SCR0(%a6),%a0
-	bsr.l		norm			# normalize the DENORM
-
-fout_sgl_unfl_cont:
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calc default underflow result
-
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
-	bsr.l		dst_sgl			# convert to single prec
-
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
-	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
-	beq.b		fout_sgl_unfl_dn	# must save to integer regfile
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-	bsr.l		_dmem_write_long	# write long
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_l		# yes
-
-	bra.b		fout_sgl_unfl_chkexc
-
-fout_sgl_unfl_dn:
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
-	andi.w		&0x7,%d1
-	bsr.l		store_dreg_l
-
-fout_sgl_unfl_chkexc:
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
-	bne.w		fout_sd_exc_unfl	# yes
-	addq.l		&0x4,%sp
-	rts
-
-#
-# it's definitely an overflow so call ovf_res to get the correct answer
-#
-fout_sgl_ovfl:
-	tst.b		3+SRC_HI(%a0)		# is result inexact?
-	bne.b		fout_sgl_ovfl_inex2
-	tst.l		SRC_LO(%a0)		# is result inexact?
-	bne.b		fout_sgl_ovfl_inex2
-	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
-	bra.b		fout_sgl_ovfl_cont
-fout_sgl_ovfl_inex2:
-	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
-
-fout_sgl_ovfl_cont:
-	mov.l		%a0,-(%sp)
-
-# call ovf_res() w/ sgl prec and the correct rnd mode to create the default
-# overflow result. DON'T save the returned ccodes from ovf_res() since
-# fmove out doesn't alter them.
-	tst.b		SRC_EX(%a0)		# is operand negative?
-	smi		%d1			# set if so
-	mov.l		L_SCR3(%a6),%d0		# pass: sgl prec,rnd mode
-	bsr.l		ovf_res			# calc OVFL result
-	fmovm.x		(%a0),&0x80		# load default overflow result
-	fmov.s		%fp0,%d0		# store to single
-
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
-	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
-	beq.b		fout_sgl_ovfl_dn	# must save to integer regfile
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-	bsr.l		_dmem_write_long	# write long
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_l		# yes
-
-	bra.b		fout_sgl_ovfl_chkexc
-
-fout_sgl_ovfl_dn:
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
-	andi.w		&0x7,%d1
-	bsr.l		store_dreg_l
-
-fout_sgl_ovfl_chkexc:
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
-	bne.w		fout_sd_exc_ovfl	# yes
-	addq.l		&0x4,%sp
-	rts
-
-#
-# move out MAY overflow:
-# (1) force the exp to 0x3fff
-# (2) do a move w/ appropriate rnd mode
-# (3) if exp still equals zero, then insert original exponent
-#	for the correct result.
-#     if exp now equals one, then it overflowed so call ovf_res.
-#
-fout_sgl_may_ovfl:
-	mov.w		SRC_EX(%a0),%d1		# fetch current sign
-	andi.w		&0x8000,%d1		# keep it,clear exp
-	ori.w		&0x3fff,%d1		# insert exp = 0
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fabs.x		%fp0			# need absolute value
-	fcmp.b		%fp0,&0x2		# did exponent increase?
-	fblt.w		fout_sgl_exg		# no; go finish NORM
-	bra.w		fout_sgl_ovfl		# yes; go handle overflow
-
-################
-
-fout_sd_exc_unfl:
-	mov.l		(%sp)+,%a0
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	cmpi.b		STAG(%a6),&DENORM	# was src a DENORM?
-	bne.b		fout_sd_exc_cont	# no
-
-	lea		FP_SCR0(%a6),%a0
-	bsr.l		norm
-	neg.l		%d0
-	andi.w		&0x7fff,%d0
-	bfins		%d0,FP_SCR0_EX(%a6){&1:&15}
-	bra.b		fout_sd_exc_cont
-
-fout_sd_exc:
-fout_sd_exc_ovfl:
-	mov.l		(%sp)+,%a0		# restore a0
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-fout_sd_exc_cont:
-	bclr		&0x7,FP_SCR0_EX(%a6)	# clear sign bit
-	sne.b		2+FP_SCR0_EX(%a6)	# set internal sign bit
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to DENORM
-
-	mov.b		3+L_SCR3(%a6),%d1
-	lsr.b		&0x4,%d1
-	andi.w		&0x0c,%d1
-	swap		%d1
-	mov.b		3+L_SCR3(%a6),%d1
-	lsr.b		&0x4,%d1
-	andi.w		&0x03,%d1
-	clr.l		%d0			# pass: zero g,r,s
-	bsr.l		_round			# round the DENORM
-
-	tst.b		2+FP_SCR0_EX(%a6)	# is EXOP negative?
-	beq.b		fout_sd_exc_done	# no
-	bset		&0x7,FP_SCR0_EX(%a6)	# yes
-
-fout_sd_exc_done:
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	rts
-
-#################################################################
-# fmove.d out ###################################################
-#################################################################
-fout_dbl:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&d_mode*0x10,%d0	# insert dbl prec
-	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
-
-#
-# operand is a normalized number. first, we check to see if the move out
-# would cause either an underflow or overflow. these cases are handled
-# separately. otherwise, set the FPCR to the proper rounding mode and
-# execute the move.
-#
-	mov.w		SRC_EX(%a0),%d0		# extract exponent
-	andi.w		&0x7fff,%d0		# strip sign
-
-	cmpi.w		%d0,&DBL_HI		# will operand overflow?
-	bgt.w		fout_dbl_ovfl		# yes; go handle OVFL
-	beq.w		fout_dbl_may_ovfl	# maybe; go handle possible OVFL
-	cmpi.w		%d0,&DBL_LO		# will operand underflow?
-	blt.w		fout_dbl_unfl		# yes; go handle underflow
-
-#
-# NORMs(in range) can be stored out by a simple "fmov.d"
-# Unnormalized inputs can come through this point.
-#
-fout_dbl_exg:
-	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmov.d		%fp0,L_SCR1(%a6)	# store does convert and round
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d0		# save FPSR
-
-	or.w		%d0,2+USER_FPSR(%a6)	# set possible inex2/ainex
-
-	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
-	lea		L_SCR1(%a6),%a0		# pass: src addr
-	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
-	bsr.l		_dmem_write		# store dbl fop to memory
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_d		# yes
-
-	rts					# no; so we're finished
-
-#
-# here, we know that the operand would UNFL if moved out to double prec,
-# so, denorm and round and then use generic store double routine to
-# write the value to memory.
-#
-fout_dbl_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	mov.l		%a0,-(%sp)
-
-	clr.l		%d0			# pass: S.F. = 0
-
-	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
-	bne.b		fout_dbl_unfl_cont	# let DENORMs fall through
-
-	lea		FP_SCR0(%a6),%a0
-	bsr.l		norm			# normalize the DENORM
-
-fout_dbl_unfl_cont:
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calc default underflow result
-
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
-	bsr.l		dst_dbl			# convert to single prec
-	mov.l		%d0,L_SCR1(%a6)
-	mov.l		%d1,L_SCR2(%a6)
-
-	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
-	lea		L_SCR1(%a6),%a0		# pass: src addr
-	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
-	bsr.l		_dmem_write		# store dbl fop to memory
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_d		# yes
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
-	bne.w		fout_sd_exc_unfl	# yes
-	addq.l		&0x4,%sp
-	rts
-
-#
-# it's definitely an overflow so call ovf_res to get the correct answer
-#
-fout_dbl_ovfl:
-	mov.w		2+SRC_LO(%a0),%d0
-	andi.w		&0x7ff,%d0
-	bne.b		fout_dbl_ovfl_inex2
-
-	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
-	bra.b		fout_dbl_ovfl_cont
-fout_dbl_ovfl_inex2:
-	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
-
-fout_dbl_ovfl_cont:
-	mov.l		%a0,-(%sp)
-
-# call ovf_res() w/ dbl prec and the correct rnd mode to create the default
-# overflow result. DON'T save the returned ccodes from ovf_res() since
-# fmove out doesn't alter them.
-	tst.b		SRC_EX(%a0)		# is operand negative?
-	smi		%d1			# set if so
-	mov.l		L_SCR3(%a6),%d0		# pass: dbl prec,rnd mode
-	bsr.l		ovf_res			# calc OVFL result
-	fmovm.x		(%a0),&0x80		# load default overflow result
-	fmov.d		%fp0,L_SCR1(%a6)	# store to double
-
-	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
-	lea		L_SCR1(%a6),%a0		# pass: src addr
-	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
-	bsr.l		_dmem_write		# store dbl fop to memory
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_d		# yes
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
-	bne.w		fout_sd_exc_ovfl	# yes
-	addq.l		&0x4,%sp
-	rts
-
-#
-# move out MAY overflow:
-# (1) force the exp to 0x3fff
-# (2) do a move w/ appropriate rnd mode
-# (3) if exp still equals zero, then insert original exponent
-#	for the correct result.
-#     if exp now equals one, then it overflowed so call ovf_res.
-#
-fout_dbl_may_ovfl:
-	mov.w		SRC_EX(%a0),%d1		# fetch current sign
-	andi.w		&0x8000,%d1		# keep it,clear exp
-	ori.w		&0x3fff,%d1		# insert exp = 0
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fabs.x		%fp0			# need absolute value
-	fcmp.b		%fp0,&0x2		# did exponent increase?
-	fblt.w		fout_dbl_exg		# no; go finish NORM
-	bra.w		fout_dbl_ovfl		# yes; go handle overflow
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	dst_dbl(): create double precision value from extended prec.	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to source operand in extended precision		#
-#									#
-# OUTPUT **************************************************************	#
-#	d0 = hi(double precision result)				#
-#	d1 = lo(double precision result)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#  Changes extended precision to double precision.			#
-#  Note: no attempt is made to round the extended value to double.	#
-#	dbl_sign = ext_sign						#
-#	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)		#
-#	get rid of ext integer bit					#
-#	dbl_mant = ext_mant{62:12}					#
-#									#
-#		---------------   ---------------    ---------------	#
-#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
-#		---------------   ---------------    ---------------	#
-#		 95	    64    63 62	      32      31     11	  0	#
-#				     |			     |		#
-#				     |			     |		#
-#				     |			     |		#
-#			             v			     v		#
-#			      ---------------   ---------------		#
-#  double   ->		      |s|exp| mant  |   |  mant       |		#
-#			      ---------------   ---------------		#
-#			      63     51   32   31	       0	#
-#									#
-#########################################################################
-
-dst_dbl:
-	clr.l		%d0			# clear d0
-	mov.w		FTEMP_EX(%a0),%d0	# get exponent
-	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
-	addi.w		&DBL_BIAS,%d0		# add double precision bias
-	tst.b		FTEMP_HI(%a0)		# is number a denorm?
-	bmi.b		dst_get_dupper		# no
-	subq.w		&0x1,%d0		# yes; denorm bias = DBL_BIAS - 1
-dst_get_dupper:
-	swap		%d0			# d0 now in upper word
-	lsl.l		&0x4,%d0		# d0 in proper place for dbl prec exp
-	tst.b		FTEMP_EX(%a0)		# test sign
-	bpl.b		dst_get_dman		# if positive, go process mantissa
-	bset		&0x1f,%d0		# if negative, set sign
-dst_get_dman:
-	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
-	bfextu		%d1{&1:&20},%d1		# get upper 20 bits of ms
-	or.l		%d1,%d0			# put these bits in ms word of double
-	mov.l		%d0,L_SCR1(%a6)		# put the new exp back on the stack
-	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
-	mov.l		&21,%d0			# load shift count
-	lsl.l		%d0,%d1			# put lower 11 bits in upper bits
-	mov.l		%d1,L_SCR2(%a6)		# build lower lword in memory
-	mov.l		FTEMP_LO(%a0),%d1	# get ls mantissa
-	bfextu		%d1{&0:&21},%d0		# get ls 21 bits of double
-	mov.l		L_SCR2(%a6),%d1
-	or.l		%d0,%d1			# put them in double result
-	mov.l		L_SCR1(%a6),%d0
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	dst_sgl(): create single precision value from extended prec	#
-#									#
-# XREF ****************************************************************	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to source operand in extended precision		#
-#									#
-# OUTPUT **************************************************************	#
-#	d0 = single precision result					#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-# Changes extended precision to single precision.			#
-#	sgl_sign = ext_sign						#
-#	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)		#
-#	get rid of ext integer bit					#
-#	sgl_mant = ext_mant{62:12}					#
-#									#
-#		---------------   ---------------    ---------------	#
-#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
-#		---------------   ---------------    ---------------	#
-#		 95	    64    63 62	   40 32      31     12	  0	#
-#				     |	   |				#
-#				     |	   |				#
-#				     |	   |				#
-#			             v     v				#
-#			      ---------------				#
-#  single   ->		      |s|exp| mant  |				#
-#			      ---------------				#
-#			      31     22     0				#
-#									#
-#########################################################################
-
-dst_sgl:
-	clr.l		%d0
-	mov.w		FTEMP_EX(%a0),%d0	# get exponent
-	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
-	addi.w		&SGL_BIAS,%d0		# add single precision bias
-	tst.b		FTEMP_HI(%a0)		# is number a denorm?
-	bmi.b		dst_get_supper		# no
-	subq.w		&0x1,%d0		# yes; denorm bias = SGL_BIAS - 1
-dst_get_supper:
-	swap		%d0			# put exp in upper word of d0
-	lsl.l		&0x7,%d0		# shift it into single exp bits
-	tst.b		FTEMP_EX(%a0)		# test sign
-	bpl.b		dst_get_sman		# if positive, continue
-	bset		&0x1f,%d0		# if negative, put in sign first
-dst_get_sman:
-	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
-	andi.l		&0x7fffff00,%d1		# get upper 23 bits of ms
-	lsr.l		&0x8,%d1		# and put them flush right
-	or.l		%d1,%d0			# put these bits in ms word of single
-	rts
-
-##############################################################################
-fout_pack:
-	bsr.l		_calc_ea_fout		# fetch the <ea>
-	mov.l		%a0,-(%sp)
-
-	mov.b		STAG(%a6),%d0		# fetch input type
-	bne.w		fout_pack_not_norm	# input is not NORM
-
-fout_pack_norm:
-	btst		&0x4,EXC_CMDREG(%a6)	# static or dynamic?
-	beq.b		fout_pack_s		# static
-
-fout_pack_d:
-	mov.b		1+EXC_CMDREG(%a6),%d1	# fetch dynamic reg
-	lsr.b		&0x4,%d1
-	andi.w		&0x7,%d1
-
-	bsr.l		fetch_dreg		# fetch Dn w/ k-factor
-
-	bra.b		fout_pack_type
-fout_pack_s:
-	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch static field
-
-fout_pack_type:
-	bfexts		%d0{&25:&7},%d0		# extract k-factor
-	mov.l	%d0,-(%sp)
-
-	lea		FP_SRC(%a6),%a0		# pass: ptr to input
-
-# bindec is currently scrambling FP_SRC for denorm inputs.
-# we'll have to change this, but for now, tough luck!!!
-	bsr.l		bindec			# convert xprec to packed
-
-#	andi.l		&0xcfff000f,FP_SCR0(%a6) # clear unused fields
-	andi.l		&0xcffff00f,FP_SCR0(%a6) # clear unused fields
-
-	mov.l	(%sp)+,%d0
-
-	tst.b		3+FP_SCR0_EX(%a6)
-	bne.b		fout_pack_set
-	tst.l		FP_SCR0_HI(%a6)
-	bne.b		fout_pack_set
-	tst.l		FP_SCR0_LO(%a6)
-	bne.b		fout_pack_set
-
-# add the extra condition that only if the k-factor was zero, too, should
-# we zero the exponent
-	tst.l		%d0
-	bne.b		fout_pack_set
-# "mantissa" is all zero which means that the answer is zero. but, the '040
-# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
-# if the mantissa is zero, I will zero the exponent, too.
-# the question now is whether the exponents sign bit is allowed to be non-zero
-# for a zero, also...
-	andi.w		&0xf000,FP_SCR0(%a6)
-
-fout_pack_set:
-
-	lea		FP_SCR0(%a6),%a0	# pass: src addr
-
-fout_pack_write:
-	mov.l		(%sp)+,%a1		# pass: dst addr
-	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
-
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
-	beq.b		fout_pack_a7
-
-	bsr.l		_dmem_write		# write ext prec number to memory
-
-	tst.l		%d1			# did dstore fail?
-	bne.w		fout_ext_err		# yes
-
-	rts
-
-# we don't want to do the write if the exception occurred in supervisor mode
-# so _mem_write2() handles this for us.
-fout_pack_a7:
-	bsr.l		_mem_write2		# write ext prec number to memory
-
-	tst.l		%d1			# did dstore fail?
-	bne.w		fout_ext_err		# yes
-
-	rts
-
-fout_pack_not_norm:
-	cmpi.b		%d0,&DENORM		# is it a DENORM?
-	beq.w		fout_pack_norm		# yes
-	lea		FP_SRC(%a6),%a0
-	clr.w		2+FP_SRC_EX(%a6)
-	cmpi.b		%d0,&SNAN		# is it an SNAN?
-	beq.b		fout_pack_snan		# yes
-	bra.b		fout_pack_write		# no
-
-fout_pack_snan:
-	ori.w		&snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
-	bset		&0x6,FP_SRC_HI(%a6)	# set snan bit
-	bra.b		fout_pack_write
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fetch_dreg(): fetch register according to index in d1		#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	d1 = index of register to fetch from				#
-#									#
-# OUTPUT **************************************************************	#
-#	d0 = value of register fetched					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	According to the index value in d1 which can range from zero	#
-# to fifteen, load the corresponding register file value (where		#
-# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the	#
-# stack. The rest should still be in their original places.		#
-#									#
-#########################################################################
-
-# this routine leaves d1 intact for subsequent store_dreg calls.
-	global		fetch_dreg
-fetch_dreg:
-	mov.w		(tbl_fdreg.b,%pc,%d1.w*2),%d0
-	jmp		(tbl_fdreg.b,%pc,%d0.w*1)
-
-tbl_fdreg:
-	short		fdreg0 - tbl_fdreg
-	short		fdreg1 - tbl_fdreg
-	short		fdreg2 - tbl_fdreg
-	short		fdreg3 - tbl_fdreg
-	short		fdreg4 - tbl_fdreg
-	short		fdreg5 - tbl_fdreg
-	short		fdreg6 - tbl_fdreg
-	short		fdreg7 - tbl_fdreg
-	short		fdreg8 - tbl_fdreg
-	short		fdreg9 - tbl_fdreg
-	short		fdrega - tbl_fdreg
-	short		fdregb - tbl_fdreg
-	short		fdregc - tbl_fdreg
-	short		fdregd - tbl_fdreg
-	short		fdrege - tbl_fdreg
-	short		fdregf - tbl_fdreg
-
-fdreg0:
-	mov.l		EXC_DREGS+0x0(%a6),%d0
-	rts
-fdreg1:
-	mov.l		EXC_DREGS+0x4(%a6),%d0
-	rts
-fdreg2:
-	mov.l		%d2,%d0
-	rts
-fdreg3:
-	mov.l		%d3,%d0
-	rts
-fdreg4:
-	mov.l		%d4,%d0
-	rts
-fdreg5:
-	mov.l		%d5,%d0
-	rts
-fdreg6:
-	mov.l		%d6,%d0
-	rts
-fdreg7:
-	mov.l		%d7,%d0
-	rts
-fdreg8:
-	mov.l		EXC_DREGS+0x8(%a6),%d0
-	rts
-fdreg9:
-	mov.l		EXC_DREGS+0xc(%a6),%d0
-	rts
-fdrega:
-	mov.l		%a2,%d0
-	rts
-fdregb:
-	mov.l		%a3,%d0
-	rts
-fdregc:
-	mov.l		%a4,%d0
-	rts
-fdregd:
-	mov.l		%a5,%d0
-	rts
-fdrege:
-	mov.l		(%a6),%d0
-	rts
-fdregf:
-	mov.l		EXC_A7(%a6),%d0
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	store_dreg_l(): store longword to data register specified by d1	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = longowrd value to store					#
-#	d1 = index of register to fetch from				#
-#									#
-# OUTPUT **************************************************************	#
-#	(data register is updated)					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	According to the index value in d1, store the longword value	#
-# in d0 to the corresponding data register. D0/D1 are on the stack	#
-# while the rest are in their initial places.				#
-#									#
-#########################################################################
-
-	global		store_dreg_l
-store_dreg_l:
-	mov.w		(tbl_sdregl.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_sdregl.b,%pc,%d1.w*1)
-
-tbl_sdregl:
-	short		sdregl0 - tbl_sdregl
-	short		sdregl1 - tbl_sdregl
-	short		sdregl2 - tbl_sdregl
-	short		sdregl3 - tbl_sdregl
-	short		sdregl4 - tbl_sdregl
-	short		sdregl5 - tbl_sdregl
-	short		sdregl6 - tbl_sdregl
-	short		sdregl7 - tbl_sdregl
-
-sdregl0:
-	mov.l		%d0,EXC_DREGS+0x0(%a6)
-	rts
-sdregl1:
-	mov.l		%d0,EXC_DREGS+0x4(%a6)
-	rts
-sdregl2:
-	mov.l		%d0,%d2
-	rts
-sdregl3:
-	mov.l		%d0,%d3
-	rts
-sdregl4:
-	mov.l		%d0,%d4
-	rts
-sdregl5:
-	mov.l		%d0,%d5
-	rts
-sdregl6:
-	mov.l		%d0,%d6
-	rts
-sdregl7:
-	mov.l		%d0,%d7
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	store_dreg_w(): store word to data register specified by d1	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = word value to store					#
-#	d1 = index of register to fetch from				#
-#									#
-# OUTPUT **************************************************************	#
-#	(data register is updated)					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	According to the index value in d1, store the word value	#
-# in d0 to the corresponding data register. D0/D1 are on the stack	#
-# while the rest are in their initial places.				#
-#									#
-#########################################################################
-
-	global		store_dreg_w
-store_dreg_w:
-	mov.w		(tbl_sdregw.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_sdregw.b,%pc,%d1.w*1)
-
-tbl_sdregw:
-	short		sdregw0 - tbl_sdregw
-	short		sdregw1 - tbl_sdregw
-	short		sdregw2 - tbl_sdregw
-	short		sdregw3 - tbl_sdregw
-	short		sdregw4 - tbl_sdregw
-	short		sdregw5 - tbl_sdregw
-	short		sdregw6 - tbl_sdregw
-	short		sdregw7 - tbl_sdregw
-
-sdregw0:
-	mov.w		%d0,2+EXC_DREGS+0x0(%a6)
-	rts
-sdregw1:
-	mov.w		%d0,2+EXC_DREGS+0x4(%a6)
-	rts
-sdregw2:
-	mov.w		%d0,%d2
-	rts
-sdregw3:
-	mov.w		%d0,%d3
-	rts
-sdregw4:
-	mov.w		%d0,%d4
-	rts
-sdregw5:
-	mov.w		%d0,%d5
-	rts
-sdregw6:
-	mov.w		%d0,%d6
-	rts
-sdregw7:
-	mov.w		%d0,%d7
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	store_dreg_b(): store byte to data register specified by d1	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = byte value to store					#
-#	d1 = index of register to fetch from				#
-#									#
-# OUTPUT **************************************************************	#
-#	(data register is updated)					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	According to the index value in d1, store the byte value	#
-# in d0 to the corresponding data register. D0/D1 are on the stack	#
-# while the rest are in their initial places.				#
-#									#
-#########################################################################
-
-	global		store_dreg_b
-store_dreg_b:
-	mov.w		(tbl_sdregb.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_sdregb.b,%pc,%d1.w*1)
-
-tbl_sdregb:
-	short		sdregb0 - tbl_sdregb
-	short		sdregb1 - tbl_sdregb
-	short		sdregb2 - tbl_sdregb
-	short		sdregb3 - tbl_sdregb
-	short		sdregb4 - tbl_sdregb
-	short		sdregb5 - tbl_sdregb
-	short		sdregb6 - tbl_sdregb
-	short		sdregb7 - tbl_sdregb
-
-sdregb0:
-	mov.b		%d0,3+EXC_DREGS+0x0(%a6)
-	rts
-sdregb1:
-	mov.b		%d0,3+EXC_DREGS+0x4(%a6)
-	rts
-sdregb2:
-	mov.b		%d0,%d2
-	rts
-sdregb3:
-	mov.b		%d0,%d3
-	rts
-sdregb4:
-	mov.b		%d0,%d4
-	rts
-sdregb5:
-	mov.b		%d0,%d5
-	rts
-sdregb6:
-	mov.b		%d0,%d6
-	rts
-sdregb7:
-	mov.b		%d0,%d7
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	inc_areg(): increment an address register by the value in d0	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = amount to increment by					#
-#	d1 = index of address register to increment			#
-#									#
-# OUTPUT **************************************************************	#
-#	(address register is updated)					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Typically used for an instruction w/ a post-increment <ea>,	#
-# this routine adds the increment value in d0 to the address register	#
-# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
-# in their original places.						#
-#	For a7, if the increment amount is one, then we have to		#
-# increment by two. For any a7 update, set the mia7_flag so that if	#
-# an access error exception occurs later in emulation, this address	#
-# register update can be undone.					#
-#									#
-#########################################################################
-
-	global		inc_areg
-inc_areg:
-	mov.w		(tbl_iareg.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_iareg.b,%pc,%d1.w*1)
-
-tbl_iareg:
-	short		iareg0 - tbl_iareg
-	short		iareg1 - tbl_iareg
-	short		iareg2 - tbl_iareg
-	short		iareg3 - tbl_iareg
-	short		iareg4 - tbl_iareg
-	short		iareg5 - tbl_iareg
-	short		iareg6 - tbl_iareg
-	short		iareg7 - tbl_iareg
-
-iareg0:	add.l		%d0,EXC_DREGS+0x8(%a6)
-	rts
-iareg1:	add.l		%d0,EXC_DREGS+0xc(%a6)
-	rts
-iareg2:	add.l		%d0,%a2
-	rts
-iareg3:	add.l		%d0,%a3
-	rts
-iareg4:	add.l		%d0,%a4
-	rts
-iareg5:	add.l		%d0,%a5
-	rts
-iareg6:	add.l		%d0,(%a6)
-	rts
-iareg7:	mov.b		&mia7_flg,SPCOND_FLG(%a6)
-	cmpi.b		%d0,&0x1
-	beq.b		iareg7b
-	add.l		%d0,EXC_A7(%a6)
-	rts
-iareg7b:
-	addq.l		&0x2,EXC_A7(%a6)
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	dec_areg(): decrement an address register by the value in d0	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = amount to decrement by					#
-#	d1 = index of address register to decrement			#
-#									#
-# OUTPUT **************************************************************	#
-#	(address register is updated)					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Typically used for an instruction w/ a pre-decrement <ea>,	#
-# this routine adds the decrement value in d0 to the address register	#
-# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
-# in their original places.						#
-#	For a7, if the decrement amount is one, then we have to		#
-# decrement by two. For any a7 update, set the mda7_flag so that if	#
-# an access error exception occurs later in emulation, this address	#
-# register update can be undone.					#
-#									#
-#########################################################################
-
-	global		dec_areg
-dec_areg:
-	mov.w		(tbl_dareg.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_dareg.b,%pc,%d1.w*1)
-
-tbl_dareg:
-	short		dareg0 - tbl_dareg
-	short		dareg1 - tbl_dareg
-	short		dareg2 - tbl_dareg
-	short		dareg3 - tbl_dareg
-	short		dareg4 - tbl_dareg
-	short		dareg5 - tbl_dareg
-	short		dareg6 - tbl_dareg
-	short		dareg7 - tbl_dareg
-
-dareg0:	sub.l		%d0,EXC_DREGS+0x8(%a6)
-	rts
-dareg1:	sub.l		%d0,EXC_DREGS+0xc(%a6)
-	rts
-dareg2:	sub.l		%d0,%a2
-	rts
-dareg3:	sub.l		%d0,%a3
-	rts
-dareg4:	sub.l		%d0,%a4
-	rts
-dareg5:	sub.l		%d0,%a5
-	rts
-dareg6:	sub.l		%d0,(%a6)
-	rts
-dareg7:	mov.b		&mda7_flg,SPCOND_FLG(%a6)
-	cmpi.b		%d0,&0x1
-	beq.b		dareg7b
-	sub.l		%d0,EXC_A7(%a6)
-	rts
-dareg7b:
-	subq.l		&0x2,EXC_A7(%a6)
-	rts
-
-##############################################################################
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	load_fpn1(): load FP register value into FP_SRC(a6).		#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = index of FP register to load				#
-#									#
-# OUTPUT **************************************************************	#
-#	FP_SRC(a6) = value loaded from FP register file			#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Using the index in d0, load FP_SRC(a6) with a number from the	#
-# FP register file.							#
-#									#
-#########################################################################
-
-	global		load_fpn1
-load_fpn1:
-	mov.w		(tbl_load_fpn1.b,%pc,%d0.w*2), %d0
-	jmp		(tbl_load_fpn1.b,%pc,%d0.w*1)
-
-tbl_load_fpn1:
-	short		load_fpn1_0 - tbl_load_fpn1
-	short		load_fpn1_1 - tbl_load_fpn1
-	short		load_fpn1_2 - tbl_load_fpn1
-	short		load_fpn1_3 - tbl_load_fpn1
-	short		load_fpn1_4 - tbl_load_fpn1
-	short		load_fpn1_5 - tbl_load_fpn1
-	short		load_fpn1_6 - tbl_load_fpn1
-	short		load_fpn1_7 - tbl_load_fpn1
-
-load_fpn1_0:
-	mov.l		0+EXC_FP0(%a6), 0+FP_SRC(%a6)
-	mov.l		4+EXC_FP0(%a6), 4+FP_SRC(%a6)
-	mov.l		8+EXC_FP0(%a6), 8+FP_SRC(%a6)
-	lea		FP_SRC(%a6), %a0
-	rts
-load_fpn1_1:
-	mov.l		0+EXC_FP1(%a6), 0+FP_SRC(%a6)
-	mov.l		4+EXC_FP1(%a6), 4+FP_SRC(%a6)
-	mov.l		8+EXC_FP1(%a6), 8+FP_SRC(%a6)
-	lea		FP_SRC(%a6), %a0
-	rts
-load_fpn1_2:
-	fmovm.x		&0x20, FP_SRC(%a6)
-	lea		FP_SRC(%a6), %a0
-	rts
-load_fpn1_3:
-	fmovm.x		&0x10, FP_SRC(%a6)
-	lea		FP_SRC(%a6), %a0
-	rts
-load_fpn1_4:
-	fmovm.x		&0x08, FP_SRC(%a6)
-	lea		FP_SRC(%a6), %a0
-	rts
-load_fpn1_5:
-	fmovm.x		&0x04, FP_SRC(%a6)
-	lea		FP_SRC(%a6), %a0
-	rts
-load_fpn1_6:
-	fmovm.x		&0x02, FP_SRC(%a6)
-	lea		FP_SRC(%a6), %a0
-	rts
-load_fpn1_7:
-	fmovm.x		&0x01, FP_SRC(%a6)
-	lea		FP_SRC(%a6), %a0
-	rts
-
-#############################################################################
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	load_fpn2(): load FP register value into FP_DST(a6).		#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = index of FP register to load				#
-#									#
-# OUTPUT **************************************************************	#
-#	FP_DST(a6) = value loaded from FP register file			#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Using the index in d0, load FP_DST(a6) with a number from the	#
-# FP register file.							#
-#									#
-#########################################################################
-
-	global		load_fpn2
-load_fpn2:
-	mov.w		(tbl_load_fpn2.b,%pc,%d0.w*2), %d0
-	jmp		(tbl_load_fpn2.b,%pc,%d0.w*1)
-
-tbl_load_fpn2:
-	short		load_fpn2_0 - tbl_load_fpn2
-	short		load_fpn2_1 - tbl_load_fpn2
-	short		load_fpn2_2 - tbl_load_fpn2
-	short		load_fpn2_3 - tbl_load_fpn2
-	short		load_fpn2_4 - tbl_load_fpn2
-	short		load_fpn2_5 - tbl_load_fpn2
-	short		load_fpn2_6 - tbl_load_fpn2
-	short		load_fpn2_7 - tbl_load_fpn2
-
-load_fpn2_0:
-	mov.l		0+EXC_FP0(%a6), 0+FP_DST(%a6)
-	mov.l		4+EXC_FP0(%a6), 4+FP_DST(%a6)
-	mov.l		8+EXC_FP0(%a6), 8+FP_DST(%a6)
-	lea		FP_DST(%a6), %a0
-	rts
-load_fpn2_1:
-	mov.l		0+EXC_FP1(%a6), 0+FP_DST(%a6)
-	mov.l		4+EXC_FP1(%a6), 4+FP_DST(%a6)
-	mov.l		8+EXC_FP1(%a6), 8+FP_DST(%a6)
-	lea		FP_DST(%a6), %a0
-	rts
-load_fpn2_2:
-	fmovm.x		&0x20, FP_DST(%a6)
-	lea		FP_DST(%a6), %a0
-	rts
-load_fpn2_3:
-	fmovm.x		&0x10, FP_DST(%a6)
-	lea		FP_DST(%a6), %a0
-	rts
-load_fpn2_4:
-	fmovm.x		&0x08, FP_DST(%a6)
-	lea		FP_DST(%a6), %a0
-	rts
-load_fpn2_5:
-	fmovm.x		&0x04, FP_DST(%a6)
-	lea		FP_DST(%a6), %a0
-	rts
-load_fpn2_6:
-	fmovm.x		&0x02, FP_DST(%a6)
-	lea		FP_DST(%a6), %a0
-	rts
-load_fpn2_7:
-	fmovm.x		&0x01, FP_DST(%a6)
-	lea		FP_DST(%a6), %a0
-	rts
-
-#############################################################################
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	store_fpreg(): store an fp value to the fpreg designated d0.	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	fp0 = extended precision value to store				#
-#	d0  = index of floating-point register				#
-#									#
-# OUTPUT **************************************************************	#
-#	None								#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Store the value in fp0 to the FP register designated by the	#
-# value in d0. The FP number can be DENORM or SNAN so we have to be	#
-# careful that we don't take an exception here.				#
-#									#
-#########################################################################
-
-	global		store_fpreg
-store_fpreg:
-	mov.w		(tbl_store_fpreg.b,%pc,%d0.w*2), %d0
-	jmp		(tbl_store_fpreg.b,%pc,%d0.w*1)
-
-tbl_store_fpreg:
-	short		store_fpreg_0 - tbl_store_fpreg
-	short		store_fpreg_1 - tbl_store_fpreg
-	short		store_fpreg_2 - tbl_store_fpreg
-	short		store_fpreg_3 - tbl_store_fpreg
-	short		store_fpreg_4 - tbl_store_fpreg
-	short		store_fpreg_5 - tbl_store_fpreg
-	short		store_fpreg_6 - tbl_store_fpreg
-	short		store_fpreg_7 - tbl_store_fpreg
-
-store_fpreg_0:
-	fmovm.x		&0x80, EXC_FP0(%a6)
-	rts
-store_fpreg_1:
-	fmovm.x		&0x80, EXC_FP1(%a6)
-	rts
-store_fpreg_2:
-	fmovm.x		&0x01, -(%sp)
-	fmovm.x		(%sp)+, &0x20
-	rts
-store_fpreg_3:
-	fmovm.x		&0x01, -(%sp)
-	fmovm.x		(%sp)+, &0x10
-	rts
-store_fpreg_4:
-	fmovm.x		&0x01, -(%sp)
-	fmovm.x		(%sp)+, &0x08
-	rts
-store_fpreg_5:
-	fmovm.x		&0x01, -(%sp)
-	fmovm.x		(%sp)+, &0x04
-	rts
-store_fpreg_6:
-	fmovm.x		&0x01, -(%sp)
-	fmovm.x		(%sp)+, &0x02
-	rts
-store_fpreg_7:
-	fmovm.x		&0x01, -(%sp)
-	fmovm.x		(%sp)+, &0x01
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_denorm(): denormalize an intermediate result			#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT *************************************************************** #
-#	a0 = points to the operand to be denormalized			#
-#		(in the internal extended format)			#
-#									#
-#	d0 = rounding precision						#
-#									#
-# OUTPUT **************************************************************	#
-#	a0 = pointer to the denormalized result				#
-#		(in the internal extended format)			#
-#									#
-#	d0 = guard,round,sticky						#
-#									#
-# ALGORITHM ***********************************************************	#
-#	According to the exponent underflow threshold for the given	#
-# precision, shift the mantissa bits to the right in order raise the	#
-# exponent of the operand to the threshold value. While shifting the	#
-# mantissa bits right, maintain the value of the guard, round, and	#
-# sticky bits.								#
-# other notes:								#
-#	(1) _denorm() is called by the underflow routines		#
-#	(2) _denorm() does NOT affect the status register		#
-#									#
-#########################################################################
-
-#
-# table of exponent threshold values for each precision
-#
-tbl_thresh:
-	short		0x0
-	short		sgl_thresh
-	short		dbl_thresh
-
-	global		_denorm
-_denorm:
-#
-# Load the exponent threshold for the precision selected and check
-# to see if (threshold - exponent) is > 65 in which case we can
-# simply calculate the sticky bit and zero the mantissa. otherwise
-# we have to call the denormalization routine.
-#
-	lsr.b		&0x2, %d0		# shift prec to lo bits
-	mov.w		(tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
-	mov.w		%d1, %d0		# copy d1 into d0
-	sub.w		FTEMP_EX(%a0), %d0	# diff = threshold - exp
-	cmpi.w		%d0, &66		# is diff > 65? (mant + g,r bits)
-	bpl.b		denorm_set_stky		# yes; just calc sticky
-
-	clr.l		%d0			# clear g,r,s
-	btst		&inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
-	beq.b		denorm_call		# no; don't change anything
-	bset		&29, %d0		# yes; set sticky bit
-
-denorm_call:
-	bsr.l		dnrm_lp			# denormalize the number
-	rts
-
-#
-# all bit would have been shifted off during the denorm so simply
-# calculate if the sticky should be set and clear the entire mantissa.
-#
-denorm_set_stky:
-	mov.l		&0x20000000, %d0	# set sticky bit in return value
-	mov.w		%d1, FTEMP_EX(%a0)	# load exp with threshold
-	clr.l		FTEMP_HI(%a0)		# set d1 = 0 (ms mantissa)
-	clr.l		FTEMP_LO(%a0)		# set d2 = 0 (ms mantissa)
-	rts
-
-#									#
-# dnrm_lp(): normalize exponent/mantissa to specified threshold		#
-#									#
-# INPUT:								#
-#	%a0	   : points to the operand to be denormalized		#
-#	%d0{31:29} : initial guard,round,sticky				#
-#	%d1{15:0}  : denormalization threshold				#
-# OUTPUT:								#
-#	%a0	   : points to the denormalized operand			#
-#	%d0{31:29} : final guard,round,sticky				#
-#									#
-
-# *** Local Equates *** #
-set	GRS,		L_SCR2			# g,r,s temp storage
-set	FTEMP_LO2,	L_SCR1			# FTEMP_LO copy
-
-	global		dnrm_lp
-dnrm_lp:
-
-#
-# make a copy of FTEMP_LO and place the g,r,s bits directly after it
-# in memory so as to make the bitfield extraction for denormalization easier.
-#
-	mov.l		FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
-	mov.l		%d0, GRS(%a6)		# place g,r,s after it
-
-#
-# check to see how much less than the underflow threshold the operand
-# exponent is.
-#
-	mov.l		%d1, %d0		# copy the denorm threshold
-	sub.w		FTEMP_EX(%a0), %d1	# d1 = threshold - uns exponent
-	ble.b		dnrm_no_lp		# d1 <= 0
-	cmpi.w		%d1, &0x20		# is ( 0 <= d1 < 32) ?
-	blt.b		case_1			# yes
-	cmpi.w		%d1, &0x40		# is (32 <= d1 < 64) ?
-	blt.b		case_2			# yes
-	bra.w		case_3			# (d1 >= 64)
-
-#
-# No normalization necessary
-#
-dnrm_no_lp:
-	mov.l		GRS(%a6), %d0		# restore original g,r,s
-	rts
-
-#
-# case (0<d1<32)
-#
-# %d0 = denorm threshold
-# %d1 = "n" = amt to shift
-#
-#	---------------------------------------------------------
-#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
-#	---------------------------------------------------------
-#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
-#	\	   \		      \			 \
-#	 \	    \		       \		  \
-#	  \	     \			\		   \
-#	   \	      \			 \		    \
-#	    \	       \		  \		     \
-#	     \		\		   \		      \
-#	      \		 \		    \		       \
-#	       \	  \		     \			\
-#	<-(n)-><-(32 - n)-><------(32)-------><------(32)------->
-#	---------------------------------------------------------
-#	|0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs		|
-#	---------------------------------------------------------
-#
-case_1:
-	mov.l		%d2, -(%sp)		# create temp storage
-
-	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
-	mov.l		&32, %d0
-	sub.w		%d1, %d0		# %d0 = 32 - %d1
-
-	cmpi.w		%d1, &29		# is shft amt >= 29
-	blt.b		case1_extract		# no; no fix needed
-	mov.b		GRS(%a6), %d2
-	or.b		%d2, 3+FTEMP_LO2(%a6)
-
-case1_extract:
-	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
-	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
-	bfextu		FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
-
-	mov.l		%d2, FTEMP_HI(%a0)	# store new FTEMP_HI
-	mov.l		%d1, FTEMP_LO(%a0)	# store new FTEMP_LO
-
-	bftst		%d0{&2:&30}		# were bits shifted off?
-	beq.b		case1_sticky_clear	# no; go finish
-	bset		&rnd_stky_bit, %d0	# yes; set sticky bit
-
-case1_sticky_clear:
-	and.l		&0xe0000000, %d0	# clear all but G,R,S
-	mov.l		(%sp)+, %d2		# restore temp register
-	rts
-
-#
-# case (32<=d1<64)
-#
-# %d0 = denorm threshold
-# %d1 = "n" = amt to shift
-#
-#	---------------------------------------------------------
-#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
-#	---------------------------------------------------------
-#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
-#	\	   \		      \
-#	 \	    \		       \
-#	  \	     \			-------------------
-#	   \	      --------------------		   \
-#	    -------------------		  \		    \
-#			       \	   \		     \
-#				\	    \		      \
-#				 \	     \		       \
-#	<-------(32)------><-(n)-><-(32 - n)-><------(32)------->
-#	---------------------------------------------------------
-#	|0...............0|0....0| NEW_LO     |grs		|
-#	---------------------------------------------------------
-#
-case_2:
-	mov.l		%d2, -(%sp)		# create temp storage
-
-	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
-	subi.w		&0x20, %d1		# %d1 now between 0 and 32
-	mov.l		&0x20, %d0
-	sub.w		%d1, %d0		# %d0 = 32 - %d1
-
-# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
-# the number of bits to check for the sticky detect.
-# it only plays a role in shift amounts of 61-63.
-	mov.b		GRS(%a6), %d2
-	or.b		%d2, 3+FTEMP_LO2(%a6)
-
-	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
-	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
-
-	bftst		%d1{&2:&30}		# were any bits shifted off?
-	bne.b		case2_set_sticky	# yes; set sticky bit
-	bftst		FTEMP_LO2(%a6){%d0:&31}	# were any bits shifted off?
-	bne.b		case2_set_sticky	# yes; set sticky bit
-
-	mov.l		%d1, %d0		# move new G,R,S to %d0
-	bra.b		case2_end
-
-case2_set_sticky:
-	mov.l		%d1, %d0		# move new G,R,S to %d0
-	bset		&rnd_stky_bit, %d0	# set sticky bit
-
-case2_end:
-	clr.l		FTEMP_HI(%a0)		# store FTEMP_HI = 0
-	mov.l		%d2, FTEMP_LO(%a0)	# store FTEMP_LO
-	and.l		&0xe0000000, %d0	# clear all but G,R,S
-
-	mov.l		(%sp)+,%d2		# restore temp register
-	rts
-
-#
-# case (d1>=64)
-#
-# %d0 = denorm threshold
-# %d1 = amt to shift
-#
-case_3:
-	mov.w		%d0, FTEMP_EX(%a0)	# insert denorm threshold
-
-	cmpi.w		%d1, &65		# is shift amt > 65?
-	blt.b		case3_64		# no; it's == 64
-	beq.b		case3_65		# no; it's == 65
-
-#
-# case (d1>65)
-#
-# Shift value is > 65 and out of range. All bits are shifted off.
-# Return a zero mantissa with the sticky bit set
-#
-	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
-	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
-	mov.l		&0x20000000, %d0	# set sticky bit
-	rts
-
-#
-# case (d1 == 64)
-#
-#	---------------------------------------------------------
-#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
-#	---------------------------------------------------------
-#	<-------(32)------>
-#	\		   \
-#	 \		    \
-#	  \		     \
-#	   \		      ------------------------------
-#	    -------------------------------		    \
-#					   \		     \
-#					    \		      \
-#					     \		       \
-#					      <-------(32)------>
-#	---------------------------------------------------------
-#	|0...............0|0................0|grs		|
-#	---------------------------------------------------------
-#
-case3_64:
-	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
-	mov.l		%d0, %d1		# make a copy
-	and.l		&0xc0000000, %d0	# extract G,R
-	and.l		&0x3fffffff, %d1	# extract other bits
-
-	bra.b		case3_complete
-
-#
-# case (d1 == 65)
-#
-#	---------------------------------------------------------
-#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
-#	---------------------------------------------------------
-#	<-------(32)------>
-#	\		   \
-#	 \		    \
-#	  \		     \
-#	   \		      ------------------------------
-#	    --------------------------------		    \
-#					    \		     \
-#					     \		      \
-#					      \		       \
-#					       <-------(31)----->
-#	---------------------------------------------------------
-#	|0...............0|0................0|0rs		|
-#	---------------------------------------------------------
-#
-case3_65:
-	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
-	and.l		&0x80000000, %d0	# extract R bit
-	lsr.l		&0x1, %d0		# shift high bit into R bit
-	and.l		&0x7fffffff, %d1	# extract other bits
-
-case3_complete:
-# last operation done was an "and" of the bits shifted off so the condition
-# codes are already set so branch accordingly.
-	bne.b		case3_set_sticky	# yes; go set new sticky
-	tst.l		FTEMP_LO(%a0)		# were any bits shifted off?
-	bne.b		case3_set_sticky	# yes; go set new sticky
-	tst.b		GRS(%a6)		# were any bits shifted off?
-	bne.b		case3_set_sticky	# yes; go set new sticky
-
-#
-# no bits were shifted off so don't set the sticky bit.
-# the guard and
-# the entire mantissa is zero.
-#
-	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
-	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
-	rts
-
-#
-# some bits were shifted off so set the sticky bit.
-# the entire mantissa is zero.
-#
-case3_set_sticky:
-	bset		&rnd_stky_bit,%d0	# set new sticky bit
-	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
-	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_round(): round result according to precision/mode		#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	a0	  = ptr to input operand in internal extended format	#
-#	d1(hi)    = contains rounding precision:			#
-#			ext = $0000xxxx					#
-#			sgl = $0004xxxx					#
-#			dbl = $0008xxxx					#
-#	d1(lo)	  = contains rounding mode:				#
-#			RN  = $xxxx0000					#
-#			RZ  = $xxxx0001					#
-#			RM  = $xxxx0002					#
-#			RP  = $xxxx0003					#
-#	d0{31:29} = contains the g,r,s bits (extended)			#
-#									#
-# OUTPUT **************************************************************	#
-#	a0 = pointer to rounded result					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	On return the value pointed to by a0 is correctly rounded,	#
-#	a0 is preserved and the g-r-s bits in d0 are cleared.		#
-#	The result is not typed - the tag field is invalid.  The	#
-#	result is still in the internal extended format.		#
-#									#
-#	The INEX bit of USER_FPSR will be set if the rounded result was	#
-#	inexact (i.e. if any of the g-r-s bits were set).		#
-#									#
-#########################################################################
-
-	global		_round
-_round:
-#
-# ext_grs() looks at the rounding precision and sets the appropriate
-# G,R,S bits.
-# If (G,R,S == 0) then result is exact and round is done, else set
-# the inex flag in status reg and continue.
-#
-	bsr.l		ext_grs			# extract G,R,S
-
-	tst.l		%d0			# are G,R,S zero?
-	beq.w		truncate		# yes; round is complete
-
-	or.w		&inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
-
-#
-# Use rounding mode as an index into a jump table for these modes.
-# All of the following assumes grs != 0.
-#
-	mov.w		(tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
-	jmp		(tbl_mode.b,%pc,%a1)	# jmp to rnd mode handler
-
-tbl_mode:
-	short		rnd_near - tbl_mode
-	short		truncate - tbl_mode	# RZ always truncates
-	short		rnd_mnus - tbl_mode
-	short		rnd_plus - tbl_mode
-
-#################################################################
-#	ROUND PLUS INFINITY					#
-#								#
-#	If sign of fp number = 0 (positive), then add 1 to l.	#
-#################################################################
-rnd_plus:
-	tst.b		FTEMP_SGN(%a0)		# check for sign
-	bmi.w		truncate		# if positive then truncate
-
-	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
-	swap		%d1			# set up d1 for round prec.
-
-	cmpi.b		%d1, &s_mode		# is prec = sgl?
-	beq.w		add_sgl			# yes
-	bgt.w		add_dbl			# no; it's dbl
-	bra.w		add_ext			# no; it's ext
-
-#################################################################
-#	ROUND MINUS INFINITY					#
-#								#
-#	If sign of fp number = 1 (negative), then add 1 to l.	#
-#################################################################
-rnd_mnus:
-	tst.b		FTEMP_SGN(%a0)		# check for sign
-	bpl.w		truncate		# if negative then truncate
-
-	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
-	swap		%d1			# set up d1 for round prec.
-
-	cmpi.b		%d1, &s_mode		# is prec = sgl?
-	beq.w		add_sgl			# yes
-	bgt.w		add_dbl			# no; it's dbl
-	bra.w		add_ext			# no; it's ext
-
-#################################################################
-#	ROUND NEAREST						#
-#								#
-#	If (g=1), then add 1 to l and if (r=s=0), then clear l	#
-#	Note that this will round to even in case of a tie.	#
-#################################################################
-rnd_near:
-	asl.l		&0x1, %d0		# shift g-bit to c-bit
-	bcc.w		truncate		# if (g=1) then
-
-	swap		%d1			# set up d1 for round prec.
-
-	cmpi.b		%d1, &s_mode		# is prec = sgl?
-	beq.w		add_sgl			# yes
-	bgt.w		add_dbl			# no; it's dbl
-	bra.w		add_ext			# no; it's ext
-
-# *** LOCAL EQUATES ***
-set	ad_1_sgl,	0x00000100	# constant to add 1 to l-bit in sgl prec
-set	ad_1_dbl,	0x00000800	# constant to add 1 to l-bit in dbl prec
-
-#########################
-#	ADD SINGLE	#
-#########################
-add_sgl:
-	add.l		&ad_1_sgl, FTEMP_HI(%a0)
-	bcc.b		scc_clr			# no mantissa overflow
-	roxr.w		FTEMP_HI(%a0)		# shift v-bit back in
-	roxr.w		FTEMP_HI+2(%a0)		# shift v-bit back in
-	add.w		&0x1, FTEMP_EX(%a0)	# and incr exponent
-scc_clr:
-	tst.l		%d0			# test for rs = 0
-	bne.b		sgl_done
-	and.w		&0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
-sgl_done:
-	and.l		&0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
-	clr.l		FTEMP_LO(%a0)		# clear d2
-	rts
-
-#########################
-#	ADD EXTENDED	#
-#########################
-add_ext:
-	addq.l		&1,FTEMP_LO(%a0)	# add 1 to l-bit
-	bcc.b		xcc_clr			# test for carry out
-	addq.l		&1,FTEMP_HI(%a0)	# propagate carry
-	bcc.b		xcc_clr
-	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
-	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
-	roxr.w		FTEMP_LO(%a0)
-	roxr.w		FTEMP_LO+2(%a0)
-	add.w		&0x1,FTEMP_EX(%a0)	# and inc exp
-xcc_clr:
-	tst.l		%d0			# test rs = 0
-	bne.b		add_ext_done
-	and.b		&0xfe,FTEMP_LO+3(%a0)	# clear the l bit
-add_ext_done:
-	rts
-
-#########################
-#	ADD DOUBLE	#
-#########################
-add_dbl:
-	add.l		&ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
-	bcc.b		dcc_clr			# no carry
-	addq.l		&0x1, FTEMP_HI(%a0)	# propagate carry
-	bcc.b		dcc_clr			# no carry
-
-	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
-	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
-	roxr.w		FTEMP_LO(%a0)
-	roxr.w		FTEMP_LO+2(%a0)
-	addq.w		&0x1, FTEMP_EX(%a0)	# incr exponent
-dcc_clr:
-	tst.l		%d0			# test for rs = 0
-	bne.b		dbl_done
-	and.w		&0xf000, FTEMP_LO+2(%a0) # clear the l-bit
-
-dbl_done:
-	and.l		&0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
-	rts
-
-###########################
-# Truncate all other bits #
-###########################
-truncate:
-	swap		%d1			# select rnd prec
-
-	cmpi.b		%d1, &s_mode		# is prec sgl?
-	beq.w		sgl_done		# yes
-	bgt.b		dbl_done		# no; it's dbl
-	rts					# no; it's ext
-
-
-#
-# ext_grs(): extract guard, round and sticky bits according to
-#	     rounding precision.
-#
-# INPUT
-#	d0	   = extended precision g,r,s (in d0{31:29})
-#	d1	   = {PREC,ROUND}
-# OUTPUT
-#	d0{31:29}  = guard, round, sticky
-#
-# The ext_grs extract the guard/round/sticky bits according to the
-# selected rounding precision. It is called by the round subroutine
-# only.  All registers except d0 are kept intact. d0 becomes an
-# updated guard,round,sticky in d0{31:29}
-#
-# Notes: the ext_grs uses the round PREC, and therefore has to swap d1
-#	 prior to usage, and needs to restore d1 to original. this
-#	 routine is tightly tied to the round routine and not meant to
-#	 uphold standard subroutine calling practices.
-#
-
-ext_grs:
-	swap		%d1			# have d1.w point to round precision
-	tst.b		%d1			# is rnd prec = extended?
-	bne.b		ext_grs_not_ext		# no; go handle sgl or dbl
-
-#
-# %d0 actually already hold g,r,s since _round() had it before calling
-# this function. so, as long as we don't disturb it, we are "returning" it.
-#
-ext_grs_ext:
-	swap		%d1			# yes; return to correct positions
-	rts
-
-ext_grs_not_ext:
-	movm.l		&0x3000, -(%sp)		# make some temp registers {d2/d3}
-
-	cmpi.b		%d1, &s_mode		# is rnd prec = sgl?
-	bne.b		ext_grs_dbl		# no; go handle dbl
-
-#
-# sgl:
-#	96		64	  40	32		0
-#	-----------------------------------------------------
-#	| EXP	|XXXXXXX|	  |xx	|		|grs|
-#	-----------------------------------------------------
-#			<--(24)--->nn\			   /
-#				   ee ---------------------
-#				   ww		|
-#						v
-#				   gr	   new sticky
-#
-ext_grs_sgl:
-	bfextu		FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
-	mov.l		&30, %d2		# of the sgl prec. limits
-	lsl.l		%d2, %d3		# shift g-r bits to MSB of d3
-	mov.l		FTEMP_HI(%a0), %d2	# get word 2 for s-bit test
-	and.l		&0x0000003f, %d2	# s bit is the or of all other
-	bne.b		ext_grs_st_stky		# bits to the right of g-r
-	tst.l		FTEMP_LO(%a0)		# test lower mantissa
-	bne.b		ext_grs_st_stky		# if any are set, set sticky
-	tst.l		%d0			# test original g,r,s
-	bne.b		ext_grs_st_stky		# if any are set, set sticky
-	bra.b		ext_grs_end_sd		# if words 3 and 4 are clr, exit
-
-#
-# dbl:
-#	96		64		32	 11	0
-#	-----------------------------------------------------
-#	| EXP	|XXXXXXX|		|	 |xx	|grs|
-#	-----------------------------------------------------
-#						  nn\	    /
-#						  ee -------
-#						  ww	|
-#							v
-#						  gr	new sticky
-#
-ext_grs_dbl:
-	bfextu		FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
-	mov.l		&30, %d2		# of the dbl prec. limits
-	lsl.l		%d2, %d3		# shift g-r bits to the MSB of d3
-	mov.l		FTEMP_LO(%a0), %d2	# get lower mantissa  for s-bit test
-	and.l		&0x000001ff, %d2	# s bit is the or-ing of all
-	bne.b		ext_grs_st_stky		# other bits to the right of g-r
-	tst.l		%d0			# test word original g,r,s
-	bne.b		ext_grs_st_stky		# if any are set, set sticky
-	bra.b		ext_grs_end_sd		# if clear, exit
-
-ext_grs_st_stky:
-	bset		&rnd_stky_bit, %d3	# set sticky bit
-ext_grs_end_sd:
-	mov.l		%d3, %d0		# return grs to d0
-
-	movm.l		(%sp)+, &0xc		# restore scratch registers {d2/d3}
-
-	swap		%d1			# restore d1 to original
-	rts
-
-#########################################################################
-# norm(): normalize the mantissa of an extended precision input. the	#
-#	  input operand should not be normalized already.		#
-#									#
-# XDEF ****************************************************************	#
-#	norm()								#
-#									#
-# XREF **************************************************************** #
-#	none								#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer fp extended precision operand to normalize		#
-#									#
-# OUTPUT ************************************************************** #
-#	d0 = number of bit positions the mantissa was shifted		#
-#	a0 = the input operand's mantissa is normalized; the exponent	#
-#	     is unchanged.						#
-#									#
-#########################################################################
-	global		norm
-norm:
-	mov.l		%d2, -(%sp)		# create some temp regs
-	mov.l		%d3, -(%sp)
-
-	mov.l		FTEMP_HI(%a0), %d0	# load hi(mantissa)
-	mov.l		FTEMP_LO(%a0), %d1	# load lo(mantissa)
-
-	bfffo		%d0{&0:&32}, %d2	# how many places to shift?
-	beq.b		norm_lo			# hi(man) is all zeroes!
-
-norm_hi:
-	lsl.l		%d2, %d0		# left shift hi(man)
-	bfextu		%d1{&0:%d2}, %d3	# extract lo bits
-
-	or.l		%d3, %d0		# create hi(man)
-	lsl.l		%d2, %d1		# create lo(man)
-
-	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
-	mov.l		%d1, FTEMP_LO(%a0)	# store new lo(man)
-
-	mov.l		%d2, %d0		# return shift amount
-
-	mov.l		(%sp)+, %d3		# restore temp regs
-	mov.l		(%sp)+, %d2
-
-	rts
-
-norm_lo:
-	bfffo		%d1{&0:&32}, %d2	# how many places to shift?
-	lsl.l		%d2, %d1		# shift lo(man)
-	add.l		&32, %d2		# add 32 to shft amount
-
-	mov.l		%d1, FTEMP_HI(%a0)	# store hi(man)
-	clr.l		FTEMP_LO(%a0)		# lo(man) is now zero
-
-	mov.l		%d2, %d0		# return shift amount
-
-	mov.l		(%sp)+, %d3		# restore temp regs
-	mov.l		(%sp)+, %d2
-
-	rts
-
-#########################################################################
-# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO	#
-#		- returns corresponding optype tag			#
-#									#
-# XDEF ****************************************************************	#
-#	unnorm_fix()							#
-#									#
-# XREF **************************************************************** #
-#	norm() - normalize the mantissa					#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer to unnormalized extended precision number		#
-#									#
-# OUTPUT ************************************************************** #
-#	d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO	#
-#	a0 = input operand has been converted to a norm, denorm, or	#
-#	     zero; both the exponent and mantissa are changed.		#
-#									#
-#########################################################################
-
-	global		unnorm_fix
-unnorm_fix:
-	bfffo		FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
-	bne.b		unnorm_shift		# hi(man) is not all zeroes
-
-#
-# hi(man) is all zeroes so see if any bits in lo(man) are set
-#
-unnorm_chk_lo:
-	bfffo		FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
-	beq.w		unnorm_zero		# yes
-
-	add.w		&32, %d0		# no; fix shift distance
-
-#
-# d0 = # shifts needed for complete normalization
-#
-unnorm_shift:
-	clr.l		%d1			# clear top word
-	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
-	and.w		&0x7fff, %d1		# strip off sgn
-
-	cmp.w		%d0, %d1		# will denorm push exp < 0?
-	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
-
-#
-# exponent would not go < 0. Therefore, number stays normalized
-#
-	sub.w		%d0, %d1		# shift exponent value
-	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
-	and.w		&0x8000, %d0		# save old sign
-	or.w		%d0, %d1		# {sgn,new exp}
-	mov.w		%d1, FTEMP_EX(%a0)	# insert new exponent
-
-	bsr.l		norm			# normalize UNNORM
-
-	mov.b		&NORM, %d0		# return new optype tag
-	rts
-
-#
-# exponent would go < 0, so only denormalize until exp = 0
-#
-unnorm_nrm_zero:
-	cmp.b		%d1, &32		# is exp <= 32?
-	bgt.b		unnorm_nrm_zero_lrg	# no; go handle large exponent
-
-	bfextu		FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
-	mov.l		%d0, FTEMP_HI(%a0)	# save new hi(man)
-
-	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
-	lsl.l		%d1, %d0		# extract new lo(man)
-	mov.l		%d0, FTEMP_LO(%a0)	# save new lo(man)
-
-	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
-
-	mov.b		&DENORM, %d0		# return new optype tag
-	rts
-
-#
-# only mantissa bits set are in lo(man)
-#
-unnorm_nrm_zero_lrg:
-	sub.w		&32, %d1		# adjust shft amt by 32
-
-	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
-	lsl.l		%d1, %d0		# left shift lo(man)
-
-	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
-	clr.l		FTEMP_LO(%a0)		# lo(man) = 0
-
-	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
-
-	mov.b		&DENORM, %d0		# return new optype tag
-	rts
-
-#
-# whole mantissa is zero so this UNNORM is actually a zero
-#
-unnorm_zero:
-	and.w		&0x8000, FTEMP_EX(%a0)	# force exponent to zero
-
-	mov.b		&ZERO, %d0		# fix optype tag
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	set_tag_x(): return the optype of the input ext fp number	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision operand			#
-#									#
-# OUTPUT **************************************************************	#
-#	d0 = value of type tag						#
-#		one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Simply test the exponent, j-bit, and mantissa values to		#
-# determine the type of operand.					#
-#	If it's an unnormalized zero, alter the operand and force it	#
-# to be a normal zero.							#
-#									#
-#########################################################################
-
-	global		set_tag_x
-set_tag_x:
-	mov.w		FTEMP_EX(%a0), %d0	# extract exponent
-	andi.w		&0x7fff, %d0		# strip off sign
-	cmpi.w		%d0, &0x7fff		# is (EXP == MAX)?
-	beq.b		inf_or_nan_x
-not_inf_or_nan_x:
-	btst		&0x7,FTEMP_HI(%a0)
-	beq.b		not_norm_x
-is_norm_x:
-	mov.b		&NORM, %d0
-	rts
-not_norm_x:
-	tst.w		%d0			# is exponent = 0?
-	bne.b		is_unnorm_x
-not_unnorm_x:
-	tst.l		FTEMP_HI(%a0)
-	bne.b		is_denorm_x
-	tst.l		FTEMP_LO(%a0)
-	bne.b		is_denorm_x
-is_zero_x:
-	mov.b		&ZERO, %d0
-	rts
-is_denorm_x:
-	mov.b		&DENORM, %d0
-	rts
-# must distinguish now "Unnormalized zeroes" which we
-# must convert to zero.
-is_unnorm_x:
-	tst.l		FTEMP_HI(%a0)
-	bne.b		is_unnorm_reg_x
-	tst.l		FTEMP_LO(%a0)
-	bne.b		is_unnorm_reg_x
-# it's an "unnormalized zero". let's convert it to an actual zero...
-	andi.w		&0x8000,FTEMP_EX(%a0)	# clear exponent
-	mov.b		&ZERO, %d0
-	rts
-is_unnorm_reg_x:
-	mov.b		&UNNORM, %d0
-	rts
-inf_or_nan_x:
-	tst.l		FTEMP_LO(%a0)
-	bne.b		is_nan_x
-	mov.l		FTEMP_HI(%a0), %d0
-	and.l		&0x7fffffff, %d0	# msb is a don't care!
-	bne.b		is_nan_x
-is_inf_x:
-	mov.b		&INF, %d0
-	rts
-is_nan_x:
-	btst		&0x6, FTEMP_HI(%a0)
-	beq.b		is_snan_x
-	mov.b		&QNAN, %d0
-	rts
-is_snan_x:
-	mov.b		&SNAN, %d0
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	set_tag_d(): return the optype of the input dbl fp number	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = points to double precision operand				#
-#									#
-# OUTPUT **************************************************************	#
-#	d0 = value of type tag						#
-#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Simply test the exponent, j-bit, and mantissa values to		#
-# determine the type of operand.					#
-#									#
-#########################################################################
-
-	global		set_tag_d
-set_tag_d:
-	mov.l		FTEMP(%a0), %d0
-	mov.l		%d0, %d1
-
-	andi.l		&0x7ff00000, %d0
-	beq.b		zero_or_denorm_d
-
-	cmpi.l		%d0, &0x7ff00000
-	beq.b		inf_or_nan_d
-
-is_norm_d:
-	mov.b		&NORM, %d0
-	rts
-zero_or_denorm_d:
-	and.l		&0x000fffff, %d1
-	bne		is_denorm_d
-	tst.l		4+FTEMP(%a0)
-	bne		is_denorm_d
-is_zero_d:
-	mov.b		&ZERO, %d0
-	rts
-is_denorm_d:
-	mov.b		&DENORM, %d0
-	rts
-inf_or_nan_d:
-	and.l		&0x000fffff, %d1
-	bne		is_nan_d
-	tst.l		4+FTEMP(%a0)
-	bne		is_nan_d
-is_inf_d:
-	mov.b		&INF, %d0
-	rts
-is_nan_d:
-	btst		&19, %d1
-	bne		is_qnan_d
-is_snan_d:
-	mov.b		&SNAN, %d0
-	rts
-is_qnan_d:
-	mov.b		&QNAN, %d0
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	set_tag_s(): return the optype of the input sgl fp number	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to single precision operand			#
-#									#
-# OUTPUT **************************************************************	#
-#	d0 = value of type tag						#
-#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Simply test the exponent, j-bit, and mantissa values to		#
-# determine the type of operand.					#
-#									#
-#########################################################################
-
-	global		set_tag_s
-set_tag_s:
-	mov.l		FTEMP(%a0), %d0
-	mov.l		%d0, %d1
-
-	andi.l		&0x7f800000, %d0
-	beq.b		zero_or_denorm_s
-
-	cmpi.l		%d0, &0x7f800000
-	beq.b		inf_or_nan_s
-
-is_norm_s:
-	mov.b		&NORM, %d0
-	rts
-zero_or_denorm_s:
-	and.l		&0x007fffff, %d1
-	bne		is_denorm_s
-is_zero_s:
-	mov.b		&ZERO, %d0
-	rts
-is_denorm_s:
-	mov.b		&DENORM, %d0
-	rts
-inf_or_nan_s:
-	and.l		&0x007fffff, %d1
-	bne		is_nan_s
-is_inf_s:
-	mov.b		&INF, %d0
-	rts
-is_nan_s:
-	btst		&22, %d1
-	bne		is_qnan_s
-is_snan_s:
-	mov.b		&SNAN, %d0
-	rts
-is_qnan_s:
-	mov.b		&QNAN, %d0
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	unf_res(): routine to produce default underflow result of a	#
-#		   scaled extended precision number; this is used by	#
-#		   fadd/fdiv/fmul/etc. emulation routines.		#
-#	unf_res4(): same as above but for fsglmul/fsgldiv which use	#
-#		    single round prec and extended prec mode.		#
-#									#
-# XREF ****************************************************************	#
-#	_denorm() - denormalize according to scale factor		#
-#	_round() - round denormalized number according to rnd prec	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precison operand			#
-#	d0 = scale factor						#
-#	d1 = rounding precision/mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	a0 = pointer to default underflow result in extended precision	#
-#	d0.b = result FPSR_cc which caller may or may not want to save	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Convert the input operand to "internal format" which means the	#
-# exponent is extended to 16 bits and the sign is stored in the unused	#
-# portion of the extended precison operand. Denormalize the number	#
-# according to the scale factor passed in d0. Then, round the		#
-# denormalized result.							#
-#	Set the FPSR_exc bits as appropriate but return the cc bits in	#
-# d0 in case the caller doesn't want to save them (as is the case for	#
-# fmove out).								#
-#	unf_res4() for fsglmul/fsgldiv forces the denorm to extended	#
-# precision and the rounding mode to single.				#
-#									#
-#########################################################################
-	global		unf_res
-unf_res:
-	mov.l		%d1, -(%sp)		# save rnd prec,mode on stack
-
-	btst		&0x7, FTEMP_EX(%a0)	# make "internal" format
-	sne		FTEMP_SGN(%a0)
-
-	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
-	and.w		&0x7fff, %d1
-	sub.w		%d0, %d1
-	mov.w		%d1, FTEMP_EX(%a0)	# insert 16 bit exponent
-
-	mov.l		%a0, -(%sp)		# save operand ptr during calls
-
-	mov.l		0x4(%sp),%d0		# pass rnd prec.
-	andi.w		&0x00c0,%d0
-	lsr.w		&0x4,%d0
-	bsr.l		_denorm			# denorm result
-
-	mov.l		(%sp),%a0
-	mov.w		0x6(%sp),%d1		# load prec:mode into %d1
-	andi.w		&0xc0,%d1		# extract rnd prec
-	lsr.w		&0x4,%d1
-	swap		%d1
-	mov.w		0x6(%sp),%d1
-	andi.w		&0x30,%d1
-	lsr.w		&0x4,%d1
-	bsr.l		_round			# round the denorm
-
-	mov.l		(%sp)+, %a0
-
-# result is now rounded properly. convert back to normal format
-	bclr		&0x7, FTEMP_EX(%a0)	# clear sgn first; may have residue
-	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
-	beq.b		unf_res_chkifzero	# no; result is positive
-	bset		&0x7, FTEMP_EX(%a0)	# set result sgn
-	clr.b		FTEMP_SGN(%a0)		# clear temp sign
-
-# the number may have become zero after rounding. set ccodes accordingly.
-unf_res_chkifzero:
-	clr.l		%d0
-	tst.l		FTEMP_HI(%a0)		# is value now a zero?
-	bne.b		unf_res_cont		# no
-	tst.l		FTEMP_LO(%a0)
-	bne.b		unf_res_cont		# no
-#	bset		&z_bit, FPSR_CC(%a6)	# yes; set zero ccode bit
-	bset		&z_bit, %d0		# yes; set zero ccode bit
-
-unf_res_cont:
-
-#
-# can inex1 also be set along with unfl and inex2???
-#
-# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
-#
-	btst		&inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
-	beq.b		unf_res_end		# no
-	bset		&aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
-
-unf_res_end:
-	add.l		&0x4, %sp		# clear stack
-	rts
-
-# unf_res() for fsglmul() and fsgldiv().
-	global		unf_res4
-unf_res4:
-	mov.l		%d1,-(%sp)		# save rnd prec,mode on stack
-
-	btst		&0x7,FTEMP_EX(%a0)	# make "internal" format
-	sne		FTEMP_SGN(%a0)
-
-	mov.w		FTEMP_EX(%a0),%d1	# extract exponent
-	and.w		&0x7fff,%d1
-	sub.w		%d0,%d1
-	mov.w		%d1,FTEMP_EX(%a0)	# insert 16 bit exponent
-
-	mov.l		%a0,-(%sp)		# save operand ptr during calls
-
-	clr.l		%d0			# force rnd prec = ext
-	bsr.l		_denorm			# denorm result
-
-	mov.l		(%sp),%a0
-	mov.w		&s_mode,%d1		# force rnd prec = sgl
-	swap		%d1
-	mov.w		0x6(%sp),%d1		# load rnd mode
-	andi.w		&0x30,%d1		# extract rnd prec
-	lsr.w		&0x4,%d1
-	bsr.l		_round			# round the denorm
-
-	mov.l		(%sp)+,%a0
-
-# result is now rounded properly. convert back to normal format
-	bclr		&0x7,FTEMP_EX(%a0)	# clear sgn first; may have residue
-	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
-	beq.b		unf_res4_chkifzero	# no; result is positive
-	bset		&0x7,FTEMP_EX(%a0)	# set result sgn
-	clr.b		FTEMP_SGN(%a0)		# clear temp sign
-
-# the number may have become zero after rounding. set ccodes accordingly.
-unf_res4_chkifzero:
-	clr.l		%d0
-	tst.l		FTEMP_HI(%a0)		# is value now a zero?
-	bne.b		unf_res4_cont		# no
-	tst.l		FTEMP_LO(%a0)
-	bne.b		unf_res4_cont		# no
-#	bset		&z_bit,FPSR_CC(%a6)	# yes; set zero ccode bit
-	bset		&z_bit,%d0		# yes; set zero ccode bit
-
-unf_res4_cont:
-
-#
-# can inex1 also be set along with unfl and inex2???
-#
-# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
-#
-	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
-	beq.b		unf_res4_end		# no
-	bset		&aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
-
-unf_res4_end:
-	add.l		&0x4,%sp		# clear stack
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	ovf_res(): routine to produce the default overflow result of	#
-#		   an overflowing number.				#
-#	ovf_res2(): same as above but the rnd mode/prec are passed	#
-#		    differently.					#
-#									#
-# XREF ****************************************************************	#
-#	none								#
-#									#
-# INPUT ***************************************************************	#
-#	d1.b	= '-1' => (-); '0' => (+)				#
-#   ovf_res():								#
-#	d0	= rnd mode/prec						#
-#   ovf_res2():								#
-#	hi(d0)	= rnd prec						#
-#	lo(d0)	= rnd mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	a0	= points to extended precision result			#
-#	d0.b	= condition code bits					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	The default overflow result can be determined by the sign of	#
-# the result and the rounding mode/prec in effect. These bits are	#
-# concatenated together to create an index into the default result	#
-# table. A pointer to the correct result is returned in a0. The		#
-# resulting condition codes are returned in d0 in case the caller	#
-# doesn't want FPSR_cc altered (as is the case for fmove out).		#
-#									#
-#########################################################################
-
-	global		ovf_res
-ovf_res:
-	andi.w		&0x10,%d1		# keep result sign
-	lsr.b		&0x4,%d0		# shift prec/mode
-	or.b		%d0,%d1			# concat the two
-	mov.w		%d1,%d0			# make a copy
-	lsl.b		&0x1,%d1		# multiply d1 by 2
-	bra.b		ovf_res_load
-
-	global		ovf_res2
-ovf_res2:
-	and.w		&0x10, %d1		# keep result sign
-	or.b		%d0, %d1		# insert rnd mode
-	swap		%d0
-	or.b		%d0, %d1		# insert rnd prec
-	mov.w		%d1, %d0		# make a copy
-	lsl.b		&0x1, %d1		# shift left by 1
-
-#
-# use the rounding mode, precision, and result sign as in index into the
-# two tables below to fetch the default result and the result ccodes.
-#
-ovf_res_load:
-	mov.b		(tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
-	lea		(tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
-
-	rts
-
-tbl_ovfl_cc:
-	byte		0x2, 0x0, 0x0, 0x2
-	byte		0x2, 0x0, 0x0, 0x2
-	byte		0x2, 0x0, 0x0, 0x2
-	byte		0x0, 0x0, 0x0, 0x0
-	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
-	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
-	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
-
-tbl_ovfl_result:
-	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
-	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
-	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
-	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
-
-	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
-	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
-	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
-	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
-
-	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
-	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
-	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
-	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
-
-	long		0x00000000,0x00000000,0x00000000,0x00000000
-	long		0x00000000,0x00000000,0x00000000,0x00000000
-	long		0x00000000,0x00000000,0x00000000,0x00000000
-	long		0x00000000,0x00000000,0x00000000,0x00000000
-
-	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
-	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
-	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
-	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
-
-	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
-	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
-	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
-	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
-
-	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
-	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
-	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
-	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	get_packed(): fetch a packed operand from memory and then	#
-#		      convert it to a floating-point binary number.	#
-#									#
-# XREF ****************************************************************	#
-#	_dcalc_ea() - calculate the correct <ea>			#
-#	_mem_read() - fetch the packed operand from memory		#
-#	facc_in_x() - the fetch failed so jump to special exit code	#
-#	decbin()    - convert packed to binary extended precision	#
-#									#
-# INPUT ***************************************************************	#
-#	None								#
-#									#
-# OUTPUT **************************************************************	#
-#	If no failure on _mem_read():					#
-#	FP_SRC(a6) = packed operand now as a binary FP number		#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Get the correct <ea> which is the value on the exception stack	#
-# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.	#
-# Then, fetch the operand from memory. If the fetch fails, exit		#
-# through facc_in_x().							#
-#	If the packed operand is a ZERO,NAN, or INF, convert it to	#
-# its binary representation here. Else, call decbin() which will	#
-# convert the packed value to an extended precision binary value.	#
-#									#
-#########################################################################
-
-# the stacked <ea> for packed is correct except for -(An).
-# the base reg must be updated for both -(An) and (An)+.
-	global		get_packed
-get_packed:
-	mov.l		&0xc,%d0		# packed is 12 bytes
-	bsr.l		_dcalc_ea		# fetch <ea>; correct An
-
-	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
-	mov.l		&0xc,%d0		# pass: 12 bytes
-	bsr.l		_dmem_read		# read packed operand
-
-	tst.l		%d1			# did dfetch fail?
-	bne.l		facc_in_x		# yes
-
-# The packed operand is an INF or a NAN if the exponent field is all ones.
-	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
-	cmpi.w		%d0,&0x7fff		# INF or NAN?
-	bne.b		gp_try_zero		# no
-	rts					# operand is an INF or NAN
-
-# The packed operand is a zero if the mantissa is all zero, else it's
-# a normal packed op.
-gp_try_zero:
-	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
-	andi.b		&0x0f,%d0		# clear all but last nybble
-	bne.b		gp_not_spec		# not a zero
-	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
-	bne.b		gp_not_spec		# not a zero
-	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
-	bne.b		gp_not_spec		# not a zero
-	rts					# operand is a ZERO
-gp_not_spec:
-	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
-	bsr.l		decbin			# convert to extended
-	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
-	rts
-
-#########################################################################
-# decbin(): Converts normalized packed bcd value pointed to by register	#
-#	    a0 to extended-precision value in fp0.			#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to normalized packed bcd value			#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = exact fp representation of the packed bcd value.		#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,	#
-#	and NaN operands are dispatched without entering this routine)	#
-#	value in 68881/882 format at location (a0).			#
-#									#
-#	A1. Convert the bcd exponent to binary by successive adds and	#
-#	muls. Set the sign according to SE. Subtract 16 to compensate	#
-#	for the mantissa which is to be interpreted as 17 integer	#
-#	digits, rather than 1 integer and 16 fraction digits.		#
-#	Note: this operation can never overflow.			#
-#									#
-#	A2. Convert the bcd mantissa to binary by successive		#
-#	adds and muls in FP0. Set the sign according to SM.		#
-#	The mantissa digits will be converted with the decimal point	#
-#	assumed following the least-significant digit.			#
-#	Note: this operation can never overflow.			#
-#									#
-#	A3. Count the number of leading/trailing zeros in the		#
-#	bcd string.  If SE is positive, count the leading zeros;	#
-#	if negative, count the trailing zeros.  Set the adjusted	#
-#	exponent equal to the exponent from A1 and the zero count	#
-#	added if SM = 1 and subtracted if SM = 0.  Scale the		#
-#	mantissa the equivalent of forcing in the bcd value:		#
-#									#
-#	SM = 0	a non-zero digit in the integer position		#
-#	SM = 1	a non-zero digit in Mant0, lsd of the fraction		#
-#									#
-#	this will insure that any value, regardless of its		#
-#	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted	#
-#	consistently.							#
-#									#
-#	A4. Calculate the factor 10^exp in FP1 using a table of		#
-#	10^(2^n) values.  To reduce the error in forming factors	#
-#	greater than 10^27, a directed rounding scheme is used with	#
-#	tables rounded to RN, RM, and RP, according to the table	#
-#	in the comments of the pwrten section.				#
-#									#
-#	A5. Form the final binary number by scaling the mantissa by	#
-#	the exponent factor.  This is done by multiplying the		#
-#	mantissa in FP0 by the factor in FP1 if the adjusted		#
-#	exponent sign is positive, and dividing FP0 by FP1 if		#
-#	it is negative.							#
-#									#
-#	Clean up and return. Check if the final mul or div was inexact.	#
-#	If so, set INEX1 in USER_FPSR.					#
-#									#
-#########################################################################
-
-#
-#	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
-#	to nearest, minus, and plus, respectively.  The tables include
-#	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
-#	is required until the power is greater than 27, however, all
-#	tables include the first 5 for ease of indexing.
-#
-RTABLE:
-	byte		0,0,0,0
-	byte		2,3,2,3
-	byte		2,3,3,2
-	byte		3,2,2,3
-
-	set		FNIBS,7
-	set		FSTRT,0
-
-	set		ESTRT,4
-	set		EDIGITS,2
-
-	global		decbin
-decbin:
-	mov.l		0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
-	mov.l		0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
-	mov.l		0x8(%a0),FP_SCR0_LO(%a6)
-
-	lea		FP_SCR0(%a6),%a0
-
-	movm.l		&0x3c00,-(%sp)		# save d2-d5
-	fmovm.x		&0x1,-(%sp)		# save fp1
-#
-# Calculate exponent:
-#  1. Copy bcd value in memory for use as a working copy.
-#  2. Calculate absolute value of exponent in d1 by mul and add.
-#  3. Correct for exponent sign.
-#  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
-#     (i.e., all digits assumed left of the decimal point.)
-#
-# Register usage:
-#
-#  calc_e:
-#	(*)  d0: temp digit storage
-#	(*)  d1: accumulator for binary exponent
-#	(*)  d2: digit count
-#	(*)  d3: offset pointer
-#	( )  d4: first word of bcd
-#	( )  a0: pointer to working bcd value
-#	( )  a6: pointer to original bcd value
-#	(*)  FP_SCR1: working copy of original bcd value
-#	(*)  L_SCR1: copy of original exponent word
-#
-calc_e:
-	mov.l		&EDIGITS,%d2		# # of nibbles (digits) in fraction part
-	mov.l		&ESTRT,%d3		# counter to pick up digits
-	mov.l		(%a0),%d4		# get first word of bcd
-	clr.l		%d1			# zero d1 for accumulator
-e_gd:
-	mulu.l		&0xa,%d1		# mul partial product by one digit place
-	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend into d0
-	add.l		%d0,%d1			# d1 = d1 + d0
-	addq.b		&4,%d3			# advance d3 to the next digit
-	dbf.w		%d2,e_gd		# if we have used all 3 digits, exit loop
-	btst		&30,%d4			# get SE
-	beq.b		e_pos			# don't negate if pos
-	neg.l		%d1			# negate before subtracting
-e_pos:
-	sub.l		&16,%d1			# sub to compensate for shift of mant
-	bge.b		e_save			# if still pos, do not neg
-	neg.l		%d1			# now negative, make pos and set SE
-	or.l		&0x40000000,%d4		# set SE in d4,
-	or.l		&0x40000000,(%a0)	# and in working bcd
-e_save:
-	mov.l		%d1,-(%sp)		# save exp on stack
-#
-#
-# Calculate mantissa:
-#  1. Calculate absolute value of mantissa in fp0 by mul and add.
-#  2. Correct for mantissa sign.
-#     (i.e., all digits assumed left of the decimal point.)
-#
-# Register usage:
-#
-#  calc_m:
-#	(*)  d0: temp digit storage
-#	(*)  d1: lword counter
-#	(*)  d2: digit count
-#	(*)  d3: offset pointer
-#	( )  d4: words 2 and 3 of bcd
-#	( )  a0: pointer to working bcd value
-#	( )  a6: pointer to original bcd value
-#	(*) fp0: mantissa accumulator
-#	( )  FP_SCR1: working copy of original bcd value
-#	( )  L_SCR1: copy of original exponent word
-#
-calc_m:
-	mov.l		&1,%d1			# word counter, init to 1
-	fmov.s		&0x00000000,%fp0	# accumulator
-#
-#
-#  Since the packed number has a long word between the first & second parts,
-#  get the integer digit then skip down & get the rest of the
-#  mantissa.  We will unroll the loop once.
-#
-	bfextu		(%a0){&28:&4},%d0	# integer part is ls digit in long word
-	fadd.b		%d0,%fp0		# add digit to sum in fp0
-#
-#
-#  Get the rest of the mantissa.
-#
-loadlw:
-	mov.l		(%a0,%d1.L*4),%d4	# load mantissa lonqword into d4
-	mov.l		&FSTRT,%d3		# counter to pick up digits
-	mov.l		&FNIBS,%d2		# reset number of digits per a0 ptr
-md2b:
-	fmul.s		&0x41200000,%fp0	# fp0 = fp0 * 10
-	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend
-	fadd.b		%d0,%fp0		# fp0 = fp0 + digit
-#
-#
-#  If all the digits (8) in that long word have been converted (d2=0),
-#  then inc d1 (=2) to point to the next long word and reset d3 to 0
-#  to initialize the digit offset, and set d2 to 7 for the digit count;
-#  else continue with this long word.
-#
-	addq.b		&4,%d3			# advance d3 to the next digit
-	dbf.w		%d2,md2b		# check for last digit in this lw
-nextlw:
-	addq.l		&1,%d1			# inc lw pointer in mantissa
-	cmp.l		%d1,&2			# test for last lw
-	ble.b		loadlw			# if not, get last one
-#
-#  Check the sign of the mant and make the value in fp0 the same sign.
-#
-m_sign:
-	btst		&31,(%a0)		# test sign of the mantissa
-	beq.b		ap_st_z			# if clear, go to append/strip zeros
-	fneg.x		%fp0			# if set, negate fp0
-#
-# Append/strip zeros:
-#
-#  For adjusted exponents which have an absolute value greater than 27*,
-#  this routine calculates the amount needed to normalize the mantissa
-#  for the adjusted exponent.  That number is subtracted from the exp
-#  if the exp was positive, and added if it was negative.  The purpose
-#  of this is to reduce the value of the exponent and the possibility
-#  of error in calculation of pwrten.
-#
-#  1. Branch on the sign of the adjusted exponent.
-#  2p.(positive exp)
-#   2. Check M16 and the digits in lwords 2 and 3 in descending order.
-#   3. Add one for each zero encountered until a non-zero digit.
-#   4. Subtract the count from the exp.
-#   5. Check if the exp has crossed zero in #3 above; make the exp abs
-#	   and set SE.
-#	6. Multiply the mantissa by 10**count.
-#  2n.(negative exp)
-#   2. Check the digits in lwords 3 and 2 in descending order.
-#   3. Add one for each zero encountered until a non-zero digit.
-#   4. Add the count to the exp.
-#   5. Check if the exp has crossed zero in #3 above; clear SE.
-#   6. Divide the mantissa by 10**count.
-#
-#  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
-#   any adjustment due to append/strip zeros will drive the resultane
-#   exponent towards zero.  Since all pwrten constants with a power
-#   of 27 or less are exact, there is no need to use this routine to
-#   attempt to lessen the resultant exponent.
-#
-# Register usage:
-#
-#  ap_st_z:
-#	(*)  d0: temp digit storage
-#	(*)  d1: zero count
-#	(*)  d2: digit count
-#	(*)  d3: offset pointer
-#	( )  d4: first word of bcd
-#	(*)  d5: lword counter
-#	( )  a0: pointer to working bcd value
-#	( )  FP_SCR1: working copy of original bcd value
-#	( )  L_SCR1: copy of original exponent word
-#
-#
-# First check the absolute value of the exponent to see if this
-# routine is necessary.  If so, then check the sign of the exponent
-# and do append (+) or strip (-) zeros accordingly.
-# This section handles a positive adjusted exponent.
-#
-ap_st_z:
-	mov.l		(%sp),%d1		# load expA for range test
-	cmp.l		%d1,&27			# test is with 27
-	ble.w		pwrten			# if abs(expA) <28, skip ap/st zeros
-	btst		&30,(%a0)		# check sign of exp
-	bne.b		ap_st_n			# if neg, go to neg side
-	clr.l		%d1			# zero count reg
-	mov.l		(%a0),%d4		# load lword 1 to d4
-	bfextu		%d4{&28:&4},%d0		# get M16 in d0
-	bne.b		ap_p_fx			# if M16 is non-zero, go fix exp
-	addq.l		&1,%d1			# inc zero count
-	mov.l		&1,%d5			# init lword counter
-	mov.l		(%a0,%d5.L*4),%d4	# get lword 2 to d4
-	bne.b		ap_p_cl			# if lw 2 is zero, skip it
-	addq.l		&8,%d1			# and inc count by 8
-	addq.l		&1,%d5			# inc lword counter
-	mov.l		(%a0,%d5.L*4),%d4	# get lword 3 to d4
-ap_p_cl:
-	clr.l		%d3			# init offset reg
-	mov.l		&7,%d2			# init digit counter
-ap_p_gd:
-	bfextu		%d4{%d3:&4},%d0		# get digit
-	bne.b		ap_p_fx			# if non-zero, go to fix exp
-	addq.l		&4,%d3			# point to next digit
-	addq.l		&1,%d1			# inc digit counter
-	dbf.w		%d2,ap_p_gd		# get next digit
-ap_p_fx:
-	mov.l		%d1,%d0			# copy counter to d2
-	mov.l		(%sp),%d1		# get adjusted exp from memory
-	sub.l		%d0,%d1			# subtract count from exp
-	bge.b		ap_p_fm			# if still pos, go to pwrten
-	neg.l		%d1			# now its neg; get abs
-	mov.l		(%a0),%d4		# load lword 1 to d4
-	or.l		&0x40000000,%d4		# and set SE in d4
-	or.l		&0x40000000,(%a0)	# and in memory
-#
-# Calculate the mantissa multiplier to compensate for the striping of
-# zeros from the mantissa.
-#
-ap_p_fm:
-	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
-	clr.l		%d3			# init table index
-	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
-	mov.l		&3,%d2			# init d2 to count bits in counter
-ap_p_el:
-	asr.l		&1,%d0			# shift lsb into carry
-	bcc.b		ap_p_en			# if 1, mul fp1 by pwrten factor
-	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
-ap_p_en:
-	add.l		&12,%d3			# inc d3 to next rtable entry
-	tst.l		%d0			# check if d0 is zero
-	bne.b		ap_p_el			# if not, get next bit
-	fmul.x		%fp1,%fp0		# mul mantissa by 10**(no_bits_shifted)
-	bra.b		pwrten			# go calc pwrten
-#
-# This section handles a negative adjusted exponent.
-#
-ap_st_n:
-	clr.l		%d1			# clr counter
-	mov.l		&2,%d5			# set up d5 to point to lword 3
-	mov.l		(%a0,%d5.L*4),%d4	# get lword 3
-	bne.b		ap_n_cl			# if not zero, check digits
-	sub.l		&1,%d5			# dec d5 to point to lword 2
-	addq.l		&8,%d1			# inc counter by 8
-	mov.l		(%a0,%d5.L*4),%d4	# get lword 2
-ap_n_cl:
-	mov.l		&28,%d3			# point to last digit
-	mov.l		&7,%d2			# init digit counter
-ap_n_gd:
-	bfextu		%d4{%d3:&4},%d0		# get digit
-	bne.b		ap_n_fx			# if non-zero, go to exp fix
-	subq.l		&4,%d3			# point to previous digit
-	addq.l		&1,%d1			# inc digit counter
-	dbf.w		%d2,ap_n_gd		# get next digit
-ap_n_fx:
-	mov.l		%d1,%d0			# copy counter to d0
-	mov.l		(%sp),%d1		# get adjusted exp from memory
-	sub.l		%d0,%d1			# subtract count from exp
-	bgt.b		ap_n_fm			# if still pos, go fix mantissa
-	neg.l		%d1			# take abs of exp and clr SE
-	mov.l		(%a0),%d4		# load lword 1 to d4
-	and.l		&0xbfffffff,%d4		# and clr SE in d4
-	and.l		&0xbfffffff,(%a0)	# and in memory
-#
-# Calculate the mantissa multiplier to compensate for the appending of
-# zeros to the mantissa.
-#
-ap_n_fm:
-	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
-	clr.l		%d3			# init table index
-	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
-	mov.l		&3,%d2			# init d2 to count bits in counter
-ap_n_el:
-	asr.l		&1,%d0			# shift lsb into carry
-	bcc.b		ap_n_en			# if 1, mul fp1 by pwrten factor
-	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
-ap_n_en:
-	add.l		&12,%d3			# inc d3 to next rtable entry
-	tst.l		%d0			# check if d0 is zero
-	bne.b		ap_n_el			# if not, get next bit
-	fdiv.x		%fp1,%fp0		# div mantissa by 10**(no_bits_shifted)
-#
-#
-# Calculate power-of-ten factor from adjusted and shifted exponent.
-#
-# Register usage:
-#
-#  pwrten:
-#	(*)  d0: temp
-#	( )  d1: exponent
-#	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
-#	(*)  d3: FPCR work copy
-#	( )  d4: first word of bcd
-#	(*)  a1: RTABLE pointer
-#  calc_p:
-#	(*)  d0: temp
-#	( )  d1: exponent
-#	(*)  d3: PWRTxx table index
-#	( )  a0: pointer to working copy of bcd
-#	(*)  a1: PWRTxx pointer
-#	(*) fp1: power-of-ten accumulator
-#
-# Pwrten calculates the exponent factor in the selected rounding mode
-# according to the following table:
-#
-#	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
-#
-#	ANY	  ANY	RN	RN
-#
-#	 +	   +	RP	RP
-#	 -	   +	RP	RM
-#	 +	   -	RP	RM
-#	 -	   -	RP	RP
-#
-#	 +	   +	RM	RM
-#	 -	   +	RM	RP
-#	 +	   -	RM	RP
-#	 -	   -	RM	RM
-#
-#	 +	   +	RZ	RM
-#	 -	   +	RZ	RM
-#	 +	   -	RZ	RP
-#	 -	   -	RZ	RP
-#
-#
-pwrten:
-	mov.l		USER_FPCR(%a6),%d3	# get user's FPCR
-	bfextu		%d3{&26:&2},%d2		# isolate rounding mode bits
-	mov.l		(%a0),%d4		# reload 1st bcd word to d4
-	asl.l		&2,%d2			# format d2 to be
-	bfextu		%d4{&0:&2},%d0		# {FPCR[6],FPCR[5],SM,SE}
-	add.l		%d0,%d2			# in d2 as index into RTABLE
-	lea.l		RTABLE(%pc),%a1		# load rtable base
-	mov.b		(%a1,%d2),%d0		# load new rounding bits from table
-	clr.l		%d3			# clear d3 to force no exc and extended
-	bfins		%d0,%d3{&26:&2}		# stuff new rounding bits in FPCR
-	fmov.l		%d3,%fpcr		# write new FPCR
-	asr.l		&1,%d0			# write correct PTENxx table
-	bcc.b		not_rp			# to a1
-	lea.l		PTENRP(%pc),%a1		# it is RP
-	bra.b		calc_p			# go to init section
-not_rp:
-	asr.l		&1,%d0			# keep checking
-	bcc.b		not_rm
-	lea.l		PTENRM(%pc),%a1		# it is RM
-	bra.b		calc_p			# go to init section
-not_rm:
-	lea.l		PTENRN(%pc),%a1		# it is RN
-calc_p:
-	mov.l		%d1,%d0			# copy exp to d0;use d0
-	bpl.b		no_neg			# if exp is negative,
-	neg.l		%d0			# invert it
-	or.l		&0x40000000,(%a0)	# and set SE bit
-no_neg:
-	clr.l		%d3			# table index
-	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
-e_loop:
-	asr.l		&1,%d0			# shift next bit into carry
-	bcc.b		e_next			# if zero, skip the mul
-	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
-e_next:
-	add.l		&12,%d3			# inc d3 to next rtable entry
-	tst.l		%d0			# check if d0 is zero
-	bne.b		e_loop			# not zero, continue shifting
-#
-#
-#  Check the sign of the adjusted exp and make the value in fp0 the
-#  same sign. If the exp was pos then multiply fp1*fp0;
-#  else divide fp0/fp1.
-#
-# Register Usage:
-#  norm:
-#	( )  a0: pointer to working bcd value
-#	(*) fp0: mantissa accumulator
-#	( ) fp1: scaling factor - 10**(abs(exp))
-#
-pnorm:
-	btst		&30,(%a0)		# test the sign of the exponent
-	beq.b		mul			# if clear, go to multiply
-div:
-	fdiv.x		%fp1,%fp0		# exp is negative, so divide mant by exp
-	bra.b		end_dec
-mul:
-	fmul.x		%fp1,%fp0		# exp is positive, so multiply by exp
-#
-#
-# Clean up and return with result in fp0.
-#
-# If the final mul/div in decbin incurred an inex exception,
-# it will be inex2, but will be reported as inex1 by get_op.
-#
-end_dec:
-	fmov.l		%fpsr,%d0		# get status register
-	bclr		&inex2_bit+8,%d0	# test for inex2 and clear it
-	beq.b		no_exc			# skip this if no exc
-	ori.w		&inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
-no_exc:
-	add.l		&0x4,%sp		# clear 1 lw param
-	fmovm.x		(%sp)+,&0x40		# restore fp1
-	movm.l		(%sp)+,&0x3c		# restore d2-d5
-	fmov.l		&0x0,%fpcr
-	fmov.l		&0x0,%fpsr
-	rts
-
-#########################################################################
-# bindec(): Converts an input in extended precision format to bcd format#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to the input extended precision value in memory.	#
-#	     the input may be either normalized, unnormalized, or	#
-#	     denormalized.						#
-#	d0 = contains the k-factor sign-extended to 32-bits.		#
-#									#
-# OUTPUT **************************************************************	#
-#	FP_SCR0(a6) = bcd format result on the stack.			#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#	A1.	Set RM and size ext;  Set SIGMA = sign of input.	#
-#		The k-factor is saved for use in d7. Clear the		#
-#		BINDEC_FLG for separating normalized/denormalized	#
-#		input.  If input is unnormalized or denormalized,	#
-#		normalize it.						#
-#									#
-#	A2.	Set X = abs(input).					#
-#									#
-#	A3.	Compute ILOG.						#
-#		ILOG is the log base 10 of the input value.  It is	#
-#		approximated by adding e + 0.f when the original	#
-#		value is viewed as 2^^e * 1.f in extended precision.	#
-#		This value is stored in d6.				#
-#									#
-#	A4.	Clr INEX bit.						#
-#		The operation in A3 above may have set INEX2.		#
-#									#
-#	A5.	Set ICTR = 0;						#
-#		ICTR is a flag used in A13.  It must be set before the	#
-#		loop entry A6.						#
-#									#
-#	A6.	Calculate LEN.						#
-#		LEN is the number of digits to be displayed.  The	#
-#		k-factor can dictate either the total number of digits,	#
-#		if it is a positive number, or the number of digits	#
-#		after the decimal point which are to be included as	#
-#		significant.  See the 68882 manual for examples.	#
-#		If LEN is computed to be greater than 17, set OPERR in	#
-#		USER_FPSR.  LEN is stored in d4.			#
-#									#
-#	A7.	Calculate SCALE.					#
-#		SCALE is equal to 10^ISCALE, where ISCALE is the number	#
-#		of decimal places needed to insure LEN integer digits	#
-#		in the output before conversion to bcd. LAMBDA is the	#
-#		sign of ISCALE, used in A9. Fp1 contains		#
-#		10^^(abs(ISCALE)) using a rounding mode which is a	#
-#		function of the original rounding mode and the signs	#
-#		of ISCALE and X.  A table is given in the code.		#
-#									#
-#	A8.	Clr INEX; Force RZ.					#
-#		The operation in A3 above may have set INEX2.		#
-#		RZ mode is forced for the scaling operation to insure	#
-#		only one rounding error.  The grs bits are collected in #
-#		the INEX flag for use in A10.				#
-#									#
-#	A9.	Scale X -> Y.						#
-#		The mantissa is scaled to the desired number of		#
-#		significant digits.  The excess digits are collected	#
-#		in INEX2.						#
-#									#
-#	A10.	Or in INEX.						#
-#		If INEX is set, round error occurred.  This is		#
-#		compensated for by 'or-ing' in the INEX2 flag to	#
-#		the lsb of Y.						#
-#									#
-#	A11.	Restore original FPCR; set size ext.			#
-#		Perform FINT operation in the user's rounding mode.	#
-#		Keep the size to extended.				#
-#									#
-#	A12.	Calculate YINT = FINT(Y) according to user's rounding	#
-#		mode.  The FPSP routine sintd0 is used.  The output	#
-#		is in fp0.						#
-#									#
-#	A13.	Check for LEN digits.					#
-#		If the int operation results in more than LEN digits,	#
-#		or less than LEN -1 digits, adjust ILOG and repeat from	#
-#		A6.  This test occurs only on the first pass.  If the	#
-#		result is exactly 10^LEN, decrement ILOG and divide	#
-#		the mantissa by 10.					#
-#									#
-#	A14.	Convert the mantissa to bcd.				#
-#		The binstr routine is used to convert the LEN digit	#
-#		mantissa to bcd in memory.  The input to binstr is	#
-#		to be a fraction; i.e. (mantissa)/10^LEN and adjusted	#
-#		such that the decimal point is to the left of bit 63.	#
-#		The bcd digits are stored in the correct position in	#
-#		the final string area in memory.			#
-#									#
-#	A15.	Convert the exponent to bcd.				#
-#		As in A14 above, the exp is converted to bcd and the	#
-#		digits are stored in the final string.			#
-#		Test the length of the final exponent string.  If the	#
-#		length is 4, set operr.					#
-#									#
-#	A16.	Write sign bits to final string.			#
-#									#
-#########################################################################
-
-set	BINDEC_FLG,	EXC_TEMP	# DENORM flag
-
-# Constants in extended precision
-PLOG2:
-	long		0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
-PLOG2UP1:
-	long		0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
-
-# Constants in single precision
-FONE:
-	long		0x3F800000,0x00000000,0x00000000,0x00000000
-FTWO:
-	long		0x40000000,0x00000000,0x00000000,0x00000000
-FTEN:
-	long		0x41200000,0x00000000,0x00000000,0x00000000
-F4933:
-	long		0x459A2800,0x00000000,0x00000000,0x00000000
-
-RBDTBL:
-	byte		0,0,0,0
-	byte		3,3,2,2
-	byte		3,2,2,3
-	byte		2,3,3,2
-
-#	Implementation Notes:
-#
-#	The registers are used as follows:
-#
-#		d0: scratch; LEN input to binstr
-#		d1: scratch
-#		d2: upper 32-bits of mantissa for binstr
-#		d3: scratch;lower 32-bits of mantissa for binstr
-#		d4: LEN
-#		d5: LAMBDA/ICTR
-#		d6: ILOG
-#		d7: k-factor
-#		a0: ptr for original operand/final result
-#		a1: scratch pointer
-#		a2: pointer to FP_X; abs(original value) in ext
-#		fp0: scratch
-#		fp1: scratch
-#		fp2: scratch
-#		F_SCR1:
-#		F_SCR2:
-#		L_SCR1:
-#		L_SCR2:
-
-	global		bindec
-bindec:
-	movm.l		&0x3f20,-(%sp)	#  {%d2-%d7/%a2}
-	fmovm.x		&0x7,-(%sp)	#  {%fp0-%fp2}
-
-# A1. Set RM and size ext. Set SIGMA = sign input;
-#     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
-#     separating  normalized/denormalized input.  If the input
-#     is a denormalized number, set the BINDEC_FLG memory word
-#     to signal denorm.  If the input is unnormalized, normalize
-#     the input and test for denormalized result.
-#
-	fmov.l		&rm_mode*0x10,%fpcr	# set RM and ext
-	mov.l		(%a0),L_SCR2(%a6)	# save exponent for sign check
-	mov.l		%d0,%d7		# move k-factor to d7
-
-	clr.b		BINDEC_FLG(%a6)	# clr norm/denorm flag
-	cmpi.b		STAG(%a6),&DENORM # is input a DENORM?
-	bne.w		A2_str		# no; input is a NORM
-
-#
-# Normalize the denorm
-#
-un_de_norm:
-	mov.w		(%a0),%d0
-	and.w		&0x7fff,%d0	# strip sign of normalized exp
-	mov.l		4(%a0),%d1
-	mov.l		8(%a0),%d2
-norm_loop:
-	sub.w		&1,%d0
-	lsl.l		&1,%d2
-	roxl.l		&1,%d1
-	tst.l		%d1
-	bge.b		norm_loop
-#
-# Test if the normalized input is denormalized
-#
-	tst.w		%d0
-	bgt.b		pos_exp		# if greater than zero, it is a norm
-	st		BINDEC_FLG(%a6)	# set flag for denorm
-pos_exp:
-	and.w		&0x7fff,%d0	# strip sign of normalized exp
-	mov.w		%d0,(%a0)
-	mov.l		%d1,4(%a0)
-	mov.l		%d2,8(%a0)
-
-# A2. Set X = abs(input).
-#
-A2_str:
-	mov.l		(%a0),FP_SCR1(%a6)	# move input to work space
-	mov.l		4(%a0),FP_SCR1+4(%a6)	# move input to work space
-	mov.l		8(%a0),FP_SCR1+8(%a6)	# move input to work space
-	and.l		&0x7fffffff,FP_SCR1(%a6)	# create abs(X)
-
-# A3. Compute ILOG.
-#     ILOG is the log base 10 of the input value.  It is approx-
-#     imated by adding e + 0.f when the original value is viewed
-#     as 2^^e * 1.f in extended precision.  This value is stored
-#     in d6.
-#
-# Register usage:
-#	Input/Output
-#	d0: k-factor/exponent
-#	d2: x/x
-#	d3: x/x
-#	d4: x/x
-#	d5: x/x
-#	d6: x/ILOG
-#	d7: k-factor/Unchanged
-#	a0: ptr for original operand/final result
-#	a1: x/x
-#	a2: x/x
-#	fp0: x/float(ILOG)
-#	fp1: x/x
-#	fp2: x/x
-#	F_SCR1:x/x
-#	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
-#	L_SCR1:x/x
-#	L_SCR2:first word of X packed/Unchanged
-
-	tst.b		BINDEC_FLG(%a6)	# check for denorm
-	beq.b		A3_cont		# if clr, continue with norm
-	mov.l		&-4933,%d6	# force ILOG = -4933
-	bra.b		A4_str
-A3_cont:
-	mov.w		FP_SCR1(%a6),%d0	# move exp to d0
-	mov.w		&0x3fff,FP_SCR1(%a6)	# replace exponent with 0x3fff
-	fmov.x		FP_SCR1(%a6),%fp0	# now fp0 has 1.f
-	sub.w		&0x3fff,%d0	# strip off bias
-	fadd.w		%d0,%fp0	# add in exp
-	fsub.s		FONE(%pc),%fp0	# subtract off 1.0
-	fbge.w		pos_res		# if pos, branch
-	fmul.x		PLOG2UP1(%pc),%fp0	# if neg, mul by LOG2UP1
-	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
-	bra.b		A4_str		# go move out ILOG
-pos_res:
-	fmul.x		PLOG2(%pc),%fp0	# if pos, mul by LOG2
-	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
-
-
-# A4. Clr INEX bit.
-#     The operation in A3 above may have set INEX2.
-
-A4_str:
-	fmov.l		&0,%fpsr	# zero all of fpsr - nothing needed
-
-
-# A5. Set ICTR = 0;
-#     ICTR is a flag used in A13.  It must be set before the
-#     loop entry A6. The lower word of d5 is used for ICTR.
-
-	clr.w		%d5		# clear ICTR
-
-# A6. Calculate LEN.
-#     LEN is the number of digits to be displayed.  The k-factor
-#     can dictate either the total number of digits, if it is
-#     a positive number, or the number of digits after the
-#     original decimal point which are to be included as
-#     significant.  See the 68882 manual for examples.
-#     If LEN is computed to be greater than 17, set OPERR in
-#     USER_FPSR.  LEN is stored in d4.
-#
-# Register usage:
-#	Input/Output
-#	d0: exponent/Unchanged
-#	d2: x/x/scratch
-#	d3: x/x
-#	d4: exc picture/LEN
-#	d5: ICTR/Unchanged
-#	d6: ILOG/Unchanged
-#	d7: k-factor/Unchanged
-#	a0: ptr for original operand/final result
-#	a1: x/x
-#	a2: x/x
-#	fp0: float(ILOG)/Unchanged
-#	fp1: x/x
-#	fp2: x/x
-#	F_SCR1:x/x
-#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
-#	L_SCR1:x/x
-#	L_SCR2:first word of X packed/Unchanged
-
-A6_str:
-	tst.l		%d7		# branch on sign of k
-	ble.b		k_neg		# if k <= 0, LEN = ILOG + 1 - k
-	mov.l		%d7,%d4		# if k > 0, LEN = k
-	bra.b		len_ck		# skip to LEN check
-k_neg:
-	mov.l		%d6,%d4		# first load ILOG to d4
-	sub.l		%d7,%d4		# subtract off k
-	addq.l		&1,%d4		# add in the 1
-len_ck:
-	tst.l		%d4		# LEN check: branch on sign of LEN
-	ble.b		LEN_ng		# if neg, set LEN = 1
-	cmp.l		%d4,&17		# test if LEN > 17
-	ble.b		A7_str		# if not, forget it
-	mov.l		&17,%d4		# set max LEN = 17
-	tst.l		%d7		# if negative, never set OPERR
-	ble.b		A7_str		# if positive, continue
-	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
-	bra.b		A7_str		# finished here
-LEN_ng:
-	mov.l		&1,%d4		# min LEN is 1
-
-
-# A7. Calculate SCALE.
-#     SCALE is equal to 10^ISCALE, where ISCALE is the number
-#     of decimal places needed to insure LEN integer digits
-#     in the output before conversion to bcd. LAMBDA is the sign
-#     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
-#     the rounding mode as given in the following table (see
-#     Coonen, p. 7.23 as ref.; however, the SCALE variable is
-#     of opposite sign in bindec.sa from Coonen).
-#
-#	Initial					USE
-#	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
-#	----------------------------------------------
-#	 RN	00	   0	   0		00/0	RN
-#	 RN	00	   0	   1		00/0	RN
-#	 RN	00	   1	   0		00/0	RN
-#	 RN	00	   1	   1		00/0	RN
-#	 RZ	01	   0	   0		11/3	RP
-#	 RZ	01	   0	   1		11/3	RP
-#	 RZ	01	   1	   0		10/2	RM
-#	 RZ	01	   1	   1		10/2	RM
-#	 RM	10	   0	   0		11/3	RP
-#	 RM	10	   0	   1		10/2	RM
-#	 RM	10	   1	   0		10/2	RM
-#	 RM	10	   1	   1		11/3	RP
-#	 RP	11	   0	   0		10/2	RM
-#	 RP	11	   0	   1		11/3	RP
-#	 RP	11	   1	   0		11/3	RP
-#	 RP	11	   1	   1		10/2	RM
-#
-# Register usage:
-#	Input/Output
-#	d0: exponent/scratch - final is 0
-#	d2: x/0 or 24 for A9
-#	d3: x/scratch - offset ptr into PTENRM array
-#	d4: LEN/Unchanged
-#	d5: 0/ICTR:LAMBDA
-#	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
-#	d7: k-factor/Unchanged
-#	a0: ptr for original operand/final result
-#	a1: x/ptr to PTENRM array
-#	a2: x/x
-#	fp0: float(ILOG)/Unchanged
-#	fp1: x/10^ISCALE
-#	fp2: x/x
-#	F_SCR1:x/x
-#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
-#	L_SCR1:x/x
-#	L_SCR2:first word of X packed/Unchanged
-
-A7_str:
-	tst.l		%d7		# test sign of k
-	bgt.b		k_pos		# if pos and > 0, skip this
-	cmp.l		%d7,%d6		# test k - ILOG
-	blt.b		k_pos		# if ILOG >= k, skip this
-	mov.l		%d7,%d6		# if ((k<0) & (ILOG < k)) ILOG = k
-k_pos:
-	mov.l		%d6,%d0		# calc ILOG + 1 - LEN in d0
-	addq.l		&1,%d0		# add the 1
-	sub.l		%d4,%d0		# sub off LEN
-	swap		%d5		# use upper word of d5 for LAMBDA
-	clr.w		%d5		# set it zero initially
-	clr.w		%d2		# set up d2 for very small case
-	tst.l		%d0		# test sign of ISCALE
-	bge.b		iscale		# if pos, skip next inst
-	addq.w		&1,%d5		# if neg, set LAMBDA true
-	cmp.l		%d0,&0xffffecd4	# test iscale <= -4908
-	bgt.b		no_inf		# if false, skip rest
-	add.l		&24,%d0		# add in 24 to iscale
-	mov.l		&24,%d2		# put 24 in d2 for A9
-no_inf:
-	neg.l		%d0		# and take abs of ISCALE
-iscale:
-	fmov.s		FONE(%pc),%fp1	# init fp1 to 1
-	bfextu		USER_FPCR(%a6){&26:&2},%d1	# get initial rmode bits
-	lsl.w		&1,%d1		# put them in bits 2:1
-	add.w		%d5,%d1		# add in LAMBDA
-	lsl.w		&1,%d1		# put them in bits 3:1
-	tst.l		L_SCR2(%a6)	# test sign of original x
-	bge.b		x_pos		# if pos, don't set bit 0
-	addq.l		&1,%d1		# if neg, set bit 0
-x_pos:
-	lea.l		RBDTBL(%pc),%a2	# load rbdtbl base
-	mov.b		(%a2,%d1),%d3	# load d3 with new rmode
-	lsl.l		&4,%d3		# put bits in proper position
-	fmov.l		%d3,%fpcr	# load bits into fpu
-	lsr.l		&4,%d3		# put bits in proper position
-	tst.b		%d3		# decode new rmode for pten table
-	bne.b		not_rn		# if zero, it is RN
-	lea.l		PTENRN(%pc),%a1	# load a1 with RN table base
-	bra.b		rmode		# exit decode
-not_rn:
-	lsr.b		&1,%d3		# get lsb in carry
-	bcc.b		not_rp2		# if carry clear, it is RM
-	lea.l		PTENRP(%pc),%a1	# load a1 with RP table base
-	bra.b		rmode		# exit decode
-not_rp2:
-	lea.l		PTENRM(%pc),%a1	# load a1 with RM table base
-rmode:
-	clr.l		%d3		# clr table index
-e_loop2:
-	lsr.l		&1,%d0		# shift next bit into carry
-	bcc.b		e_next2		# if zero, skip the mul
-	fmul.x		(%a1,%d3),%fp1	# mul by 10**(d3_bit_no)
-e_next2:
-	add.l		&12,%d3		# inc d3 to next pwrten table entry
-	tst.l		%d0		# test if ISCALE is zero
-	bne.b		e_loop2		# if not, loop
-
-# A8. Clr INEX; Force RZ.
-#     The operation in A3 above may have set INEX2.
-#     RZ mode is forced for the scaling operation to insure
-#     only one rounding error.  The grs bits are collected in
-#     the INEX flag for use in A10.
-#
-# Register usage:
-#	Input/Output
-
-	fmov.l		&0,%fpsr	# clr INEX
-	fmov.l		&rz_mode*0x10,%fpcr	# set RZ rounding mode
-
-# A9. Scale X -> Y.
-#     The mantissa is scaled to the desired number of significant
-#     digits.  The excess digits are collected in INEX2. If mul,
-#     Check d2 for excess 10 exponential value.  If not zero,
-#     the iscale value would have caused the pwrten calculation
-#     to overflow.  Only a negative iscale can cause this, so
-#     multiply by 10^(d2), which is now only allowed to be 24,
-#     with a multiply by 10^8 and 10^16, which is exact since
-#     10^24 is exact.  If the input was denormalized, we must
-#     create a busy stack frame with the mul command and the
-#     two operands, and allow the fpu to complete the multiply.
-#
-# Register usage:
-#	Input/Output
-#	d0: FPCR with RZ mode/Unchanged
-#	d2: 0 or 24/unchanged
-#	d3: x/x
-#	d4: LEN/Unchanged
-#	d5: ICTR:LAMBDA
-#	d6: ILOG/Unchanged
-#	d7: k-factor/Unchanged
-#	a0: ptr for original operand/final result
-#	a1: ptr to PTENRM array/Unchanged
-#	a2: x/x
-#	fp0: float(ILOG)/X adjusted for SCALE (Y)
-#	fp1: 10^ISCALE/Unchanged
-#	fp2: x/x
-#	F_SCR1:x/x
-#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
-#	L_SCR1:x/x
-#	L_SCR2:first word of X packed/Unchanged
-
-A9_str:
-	fmov.x		(%a0),%fp0	# load X from memory
-	fabs.x		%fp0		# use abs(X)
-	tst.w		%d5		# LAMBDA is in lower word of d5
-	bne.b		sc_mul		# if neg (LAMBDA = 1), scale by mul
-	fdiv.x		%fp1,%fp0	# calculate X / SCALE -> Y to fp0
-	bra.w		A10_st		# branch to A10
-
-sc_mul:
-	tst.b		BINDEC_FLG(%a6)	# check for denorm
-	beq.w		A9_norm		# if norm, continue with mul
-
-# for DENORM, we must calculate:
-#	fp0 = input_op * 10^ISCALE * 10^24
-# since the input operand is a DENORM, we can't multiply it directly.
-# so, we do the multiplication of the exponents and mantissas separately.
-# in this way, we avoid underflow on intermediate stages of the
-# multiplication and guarantee a result without exception.
-	fmovm.x		&0x2,-(%sp)	# save 10^ISCALE to stack
-
-	mov.w		(%sp),%d3	# grab exponent
-	andi.w		&0x7fff,%d3	# clear sign
-	ori.w		&0x8000,(%a0)	# make DENORM exp negative
-	add.w		(%a0),%d3	# add DENORM exp to 10^ISCALE exp
-	subi.w		&0x3fff,%d3	# subtract BIAS
-	add.w		36(%a1),%d3
-	subi.w		&0x3fff,%d3	# subtract BIAS
-	add.w		48(%a1),%d3
-	subi.w		&0x3fff,%d3	# subtract BIAS
-
-	bmi.w		sc_mul_err	# is result is DENORM, punt!!!
-
-	andi.w		&0x8000,(%sp)	# keep sign
-	or.w		%d3,(%sp)	# insert new exponent
-	andi.w		&0x7fff,(%a0)	# clear sign bit on DENORM again
-	mov.l		0x8(%a0),-(%sp) # put input op mantissa on stk
-	mov.l		0x4(%a0),-(%sp)
-	mov.l		&0x3fff0000,-(%sp) # force exp to zero
-	fmovm.x		(%sp)+,&0x80	# load normalized DENORM into fp0
-	fmul.x		(%sp)+,%fp0
-
-#	fmul.x	36(%a1),%fp0	# multiply fp0 by 10^8
-#	fmul.x	48(%a1),%fp0	# multiply fp0 by 10^16
-	mov.l		36+8(%a1),-(%sp) # get 10^8 mantissa
-	mov.l		36+4(%a1),-(%sp)
-	mov.l		&0x3fff0000,-(%sp) # force exp to zero
-	mov.l		48+8(%a1),-(%sp) # get 10^16 mantissa
-	mov.l		48+4(%a1),-(%sp)
-	mov.l		&0x3fff0000,-(%sp)# force exp to zero
-	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^8
-	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^16
-	bra.b		A10_st
-
-sc_mul_err:
-	bra.b		sc_mul_err
-
-A9_norm:
-	tst.w		%d2		# test for small exp case
-	beq.b		A9_con		# if zero, continue as normal
-	fmul.x		36(%a1),%fp0	# multiply fp0 by 10^8
-	fmul.x		48(%a1),%fp0	# multiply fp0 by 10^16
-A9_con:
-	fmul.x		%fp1,%fp0	# calculate X * SCALE -> Y to fp0
-
-# A10. Or in INEX.
-#      If INEX is set, round error occurred.  This is compensated
-#      for by 'or-ing' in the INEX2 flag to the lsb of Y.
-#
-# Register usage:
-#	Input/Output
-#	d0: FPCR with RZ mode/FPSR with INEX2 isolated
-#	d2: x/x
-#	d3: x/x
-#	d4: LEN/Unchanged
-#	d5: ICTR:LAMBDA
-#	d6: ILOG/Unchanged
-#	d7: k-factor/Unchanged
-#	a0: ptr for original operand/final result
-#	a1: ptr to PTENxx array/Unchanged
-#	a2: x/ptr to FP_SCR1(a6)
-#	fp0: Y/Y with lsb adjusted
-#	fp1: 10^ISCALE/Unchanged
-#	fp2: x/x
-
-A10_st:
-	fmov.l		%fpsr,%d0	# get FPSR
-	fmov.x		%fp0,FP_SCR1(%a6)	# move Y to memory
-	lea.l		FP_SCR1(%a6),%a2	# load a2 with ptr to FP_SCR1
-	btst		&9,%d0		# check if INEX2 set
-	beq.b		A11_st		# if clear, skip rest
-	or.l		&1,8(%a2)	# or in 1 to lsb of mantissa
-	fmov.x		FP_SCR1(%a6),%fp0	# write adjusted Y back to fpu
-
-
-# A11. Restore original FPCR; set size ext.
-#      Perform FINT operation in the user's rounding mode.  Keep
-#      the size to extended.  The sintdo entry point in the sint
-#      routine expects the FPCR value to be in USER_FPCR for
-#      mode and precision.  The original FPCR is saved in L_SCR1.
-
-A11_st:
-	mov.l		USER_FPCR(%a6),L_SCR1(%a6)	# save it for later
-	and.l		&0x00000030,USER_FPCR(%a6)	# set size to ext,
-#					;block exceptions
-
-
-# A12. Calculate YINT = FINT(Y) according to user's rounding mode.
-#      The FPSP routine sintd0 is used.  The output is in fp0.
-#
-# Register usage:
-#	Input/Output
-#	d0: FPSR with AINEX cleared/FPCR with size set to ext
-#	d2: x/x/scratch
-#	d3: x/x
-#	d4: LEN/Unchanged
-#	d5: ICTR:LAMBDA/Unchanged
-#	d6: ILOG/Unchanged
-#	d7: k-factor/Unchanged
-#	a0: ptr for original operand/src ptr for sintdo
-#	a1: ptr to PTENxx array/Unchanged
-#	a2: ptr to FP_SCR1(a6)/Unchanged
-#	a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
-#	fp0: Y/YINT
-#	fp1: 10^ISCALE/Unchanged
-#	fp2: x/x
-#	F_SCR1:x/x
-#	F_SCR2:Y adjusted for inex/Y with original exponent
-#	L_SCR1:x/original USER_FPCR
-#	L_SCR2:first word of X packed/Unchanged
-
-A12_st:
-	movm.l	&0xc0c0,-(%sp)	# save regs used by sintd0	 {%d0-%d1/%a0-%a1}
-	mov.l	L_SCR1(%a6),-(%sp)
-	mov.l	L_SCR2(%a6),-(%sp)
-
-	lea.l		FP_SCR1(%a6),%a0	# a0 is ptr to FP_SCR1(a6)
-	fmov.x		%fp0,(%a0)	# move Y to memory at FP_SCR1(a6)
-	tst.l		L_SCR2(%a6)	# test sign of original operand
-	bge.b		do_fint12		# if pos, use Y
-	or.l		&0x80000000,(%a0)	# if neg, use -Y
-do_fint12:
-	mov.l	USER_FPSR(%a6),-(%sp)
-#	bsr	sintdo		# sint routine returns int in fp0
-
-	fmov.l	USER_FPCR(%a6),%fpcr
-	fmov.l	&0x0,%fpsr			# clear the AEXC bits!!!
-##	mov.l		USER_FPCR(%a6),%d0	# ext prec/keep rnd mode
-##	andi.l		&0x00000030,%d0
-##	fmov.l		%d0,%fpcr
-	fint.x		FP_SCR1(%a6),%fp0	# do fint()
-	fmov.l	%fpsr,%d0
-	or.w	%d0,FPSR_EXCEPT(%a6)
-##	fmov.l		&0x0,%fpcr
-##	fmov.l		%fpsr,%d0		# don't keep ccodes
-##	or.w		%d0,FPSR_EXCEPT(%a6)
-
-	mov.b	(%sp),USER_FPSR(%a6)
-	add.l	&4,%sp
-
-	mov.l	(%sp)+,L_SCR2(%a6)
-	mov.l	(%sp)+,L_SCR1(%a6)
-	movm.l	(%sp)+,&0x303	# restore regs used by sint	 {%d0-%d1/%a0-%a1}
-
-	mov.l	L_SCR2(%a6),FP_SCR1(%a6)	# restore original exponent
-	mov.l	L_SCR1(%a6),USER_FPCR(%a6)	# restore user's FPCR
-
-# A13. Check for LEN digits.
-#      If the int operation results in more than LEN digits,
-#      or less than LEN -1 digits, adjust ILOG and repeat from
-#      A6.  This test occurs only on the first pass.  If the
-#      result is exactly 10^LEN, decrement ILOG and divide
-#      the mantissa by 10.  The calculation of 10^LEN cannot
-#      be inexact, since all powers of ten up to 10^27 are exact
-#      in extended precision, so the use of a previous power-of-ten
-#      table will introduce no error.
-#
-#
-# Register usage:
-#	Input/Output
-#	d0: FPCR with size set to ext/scratch final = 0
-#	d2: x/x
-#	d3: x/scratch final = x
-#	d4: LEN/LEN adjusted
-#	d5: ICTR:LAMBDA/LAMBDA:ICTR
-#	d6: ILOG/ILOG adjusted
-#	d7: k-factor/Unchanged
-#	a0: pointer into memory for packed bcd string formation
-#	a1: ptr to PTENxx array/Unchanged
-#	a2: ptr to FP_SCR1(a6)/Unchanged
-#	fp0: int portion of Y/abs(YINT) adjusted
-#	fp1: 10^ISCALE/Unchanged
-#	fp2: x/10^LEN
-#	F_SCR1:x/x
-#	F_SCR2:Y with original exponent/Unchanged
-#	L_SCR1:original USER_FPCR/Unchanged
-#	L_SCR2:first word of X packed/Unchanged
-
-A13_st:
-	swap		%d5		# put ICTR in lower word of d5
-	tst.w		%d5		# check if ICTR = 0
-	bne		not_zr		# if non-zero, go to second test
-#
-# Compute 10^(LEN-1)
-#
-	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
-	mov.l		%d4,%d0		# put LEN in d0
-	subq.l		&1,%d0		# d0 = LEN -1
-	clr.l		%d3		# clr table index
-l_loop:
-	lsr.l		&1,%d0		# shift next bit into carry
-	bcc.b		l_next		# if zero, skip the mul
-	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
-l_next:
-	add.l		&12,%d3		# inc d3 to next pwrten table entry
-	tst.l		%d0		# test if LEN is zero
-	bne.b		l_loop		# if not, loop
-#
-# 10^LEN-1 is computed for this test and A14.  If the input was
-# denormalized, check only the case in which YINT > 10^LEN.
-#
-	tst.b		BINDEC_FLG(%a6)	# check if input was norm
-	beq.b		A13_con		# if norm, continue with checking
-	fabs.x		%fp0		# take abs of YINT
-	bra		test_2
-#
-# Compare abs(YINT) to 10^(LEN-1) and 10^LEN
-#
-A13_con:
-	fabs.x		%fp0		# take abs of YINT
-	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^(LEN-1)
-	fbge.w		test_2		# if greater, do next test
-	subq.l		&1,%d6		# subtract 1 from ILOG
-	mov.w		&1,%d5		# set ICTR
-	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
-	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
-	bra.w		A6_str		# return to A6 and recompute YINT
-test_2:
-	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
-	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^LEN
-	fblt.w		A14_st		# if less, all is ok, go to A14
-	fbgt.w		fix_ex		# if greater, fix and redo
-	fdiv.s		FTEN(%pc),%fp0	# if equal, divide by 10
-	addq.l		&1,%d6		# and inc ILOG
-	bra.b		A14_st		# and continue elsewhere
-fix_ex:
-	addq.l		&1,%d6		# increment ILOG by 1
-	mov.w		&1,%d5		# set ICTR
-	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
-	bra.w		A6_str		# return to A6 and recompute YINT
-#
-# Since ICTR <> 0, we have already been through one adjustment,
-# and shouldn't have another; this is to check if abs(YINT) = 10^LEN
-# 10^LEN is again computed using whatever table is in a1 since the
-# value calculated cannot be inexact.
-#
-not_zr:
-	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
-	mov.l		%d4,%d0		# put LEN in d0
-	clr.l		%d3		# clr table index
-z_loop:
-	lsr.l		&1,%d0		# shift next bit into carry
-	bcc.b		z_next		# if zero, skip the mul
-	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
-z_next:
-	add.l		&12,%d3		# inc d3 to next pwrten table entry
-	tst.l		%d0		# test if LEN is zero
-	bne.b		z_loop		# if not, loop
-	fabs.x		%fp0		# get abs(YINT)
-	fcmp.x		%fp0,%fp2	# check if abs(YINT) = 10^LEN
-	fbneq.w		A14_st		# if not, skip this
-	fdiv.s		FTEN(%pc),%fp0	# divide abs(YINT) by 10
-	addq.l		&1,%d6		# and inc ILOG by 1
-	addq.l		&1,%d4		# and inc LEN
-	fmul.s		FTEN(%pc),%fp2	# if LEN++, the get 10^^LEN
-
-# A14. Convert the mantissa to bcd.
-#      The binstr routine is used to convert the LEN digit
-#      mantissa to bcd in memory.  The input to binstr is
-#      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
-#      such that the decimal point is to the left of bit 63.
-#      The bcd digits are stored in the correct position in
-#      the final string area in memory.
-#
-#
-# Register usage:
-#	Input/Output
-#	d0: x/LEN call to binstr - final is 0
-#	d1: x/0
-#	d2: x/ms 32-bits of mant of abs(YINT)
-#	d3: x/ls 32-bits of mant of abs(YINT)
-#	d4: LEN/Unchanged
-#	d5: ICTR:LAMBDA/LAMBDA:ICTR
-#	d6: ILOG
-#	d7: k-factor/Unchanged
-#	a0: pointer into memory for packed bcd string formation
-#	    /ptr to first mantissa byte in result string
-#	a1: ptr to PTENxx array/Unchanged
-#	a2: ptr to FP_SCR1(a6)/Unchanged
-#	fp0: int portion of Y/abs(YINT) adjusted
-#	fp1: 10^ISCALE/Unchanged
-#	fp2: 10^LEN/Unchanged
-#	F_SCR1:x/Work area for final result
-#	F_SCR2:Y with original exponent/Unchanged
-#	L_SCR1:original USER_FPCR/Unchanged
-#	L_SCR2:first word of X packed/Unchanged
-
-A14_st:
-	fmov.l		&rz_mode*0x10,%fpcr	# force rz for conversion
-	fdiv.x		%fp2,%fp0	# divide abs(YINT) by 10^LEN
-	lea.l		FP_SCR0(%a6),%a0
-	fmov.x		%fp0,(%a0)	# move abs(YINT)/10^LEN to memory
-	mov.l		4(%a0),%d2	# move 2nd word of FP_RES to d2
-	mov.l		8(%a0),%d3	# move 3rd word of FP_RES to d3
-	clr.l		4(%a0)		# zero word 2 of FP_RES
-	clr.l		8(%a0)		# zero word 3 of FP_RES
-	mov.l		(%a0),%d0	# move exponent to d0
-	swap		%d0		# put exponent in lower word
-	beq.b		no_sft		# if zero, don't shift
-	sub.l		&0x3ffd,%d0	# sub bias less 2 to make fract
-	tst.l		%d0		# check if > 1
-	bgt.b		no_sft		# if so, don't shift
-	neg.l		%d0		# make exp positive
-m_loop:
-	lsr.l		&1,%d2		# shift d2:d3 right, add 0s
-	roxr.l		&1,%d3		# the number of places
-	dbf.w		%d0,m_loop	# given in d0
-no_sft:
-	tst.l		%d2		# check for mantissa of zero
-	bne.b		no_zr		# if not, go on
-	tst.l		%d3		# continue zero check
-	beq.b		zer_m		# if zero, go directly to binstr
-no_zr:
-	clr.l		%d1		# put zero in d1 for addx
-	add.l		&0x00000080,%d3	# inc at bit 7
-	addx.l		%d1,%d2		# continue inc
-	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
-zer_m:
-	mov.l		%d4,%d0		# put LEN in d0 for binstr call
-	addq.l		&3,%a0		# a0 points to M16 byte in result
-	bsr		binstr		# call binstr to convert mant
-
-
-# A15. Convert the exponent to bcd.
-#      As in A14 above, the exp is converted to bcd and the
-#      digits are stored in the final string.
-#
-#      Digits are stored in L_SCR1(a6) on return from BINDEC as:
-#
-#	 32               16 15                0
-#	-----------------------------------------
-#	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
-#	-----------------------------------------
-#
-# And are moved into their proper places in FP_SCR0.  If digit e4
-# is non-zero, OPERR is signaled.  In all cases, all 4 digits are
-# written as specified in the 881/882 manual for packed decimal.
-#
-# Register usage:
-#	Input/Output
-#	d0: x/LEN call to binstr - final is 0
-#	d1: x/scratch (0);shift count for final exponent packing
-#	d2: x/ms 32-bits of exp fraction/scratch
-#	d3: x/ls 32-bits of exp fraction
-#	d4: LEN/Unchanged
-#	d5: ICTR:LAMBDA/LAMBDA:ICTR
-#	d6: ILOG
-#	d7: k-factor/Unchanged
-#	a0: ptr to result string/ptr to L_SCR1(a6)
-#	a1: ptr to PTENxx array/Unchanged
-#	a2: ptr to FP_SCR1(a6)/Unchanged
-#	fp0: abs(YINT) adjusted/float(ILOG)
-#	fp1: 10^ISCALE/Unchanged
-#	fp2: 10^LEN/Unchanged
-#	F_SCR1:Work area for final result/BCD result
-#	F_SCR2:Y with original exponent/ILOG/10^4
-#	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
-#	L_SCR2:first word of X packed/Unchanged
-
-A15_st:
-	tst.b		BINDEC_FLG(%a6)	# check for denorm
-	beq.b		not_denorm
-	ftest.x		%fp0		# test for zero
-	fbeq.w		den_zero	# if zero, use k-factor or 4933
-	fmov.l		%d6,%fp0	# float ILOG
-	fabs.x		%fp0		# get abs of ILOG
-	bra.b		convrt
-den_zero:
-	tst.l		%d7		# check sign of the k-factor
-	blt.b		use_ilog	# if negative, use ILOG
-	fmov.s		F4933(%pc),%fp0	# force exponent to 4933
-	bra.b		convrt		# do it
-use_ilog:
-	fmov.l		%d6,%fp0	# float ILOG
-	fabs.x		%fp0		# get abs of ILOG
-	bra.b		convrt
-not_denorm:
-	ftest.x		%fp0		# test for zero
-	fbneq.w		not_zero	# if zero, force exponent
-	fmov.s		FONE(%pc),%fp0	# force exponent to 1
-	bra.b		convrt		# do it
-not_zero:
-	fmov.l		%d6,%fp0	# float ILOG
-	fabs.x		%fp0		# get abs of ILOG
-convrt:
-	fdiv.x		24(%a1),%fp0	# compute ILOG/10^4
-	fmov.x		%fp0,FP_SCR1(%a6)	# store fp0 in memory
-	mov.l		4(%a2),%d2	# move word 2 to d2
-	mov.l		8(%a2),%d3	# move word 3 to d3
-	mov.w		(%a2),%d0	# move exp to d0
-	beq.b		x_loop_fin	# if zero, skip the shift
-	sub.w		&0x3ffd,%d0	# subtract off bias
-	neg.w		%d0		# make exp positive
-x_loop:
-	lsr.l		&1,%d2		# shift d2:d3 right
-	roxr.l		&1,%d3		# the number of places
-	dbf.w		%d0,x_loop	# given in d0
-x_loop_fin:
-	clr.l		%d1		# put zero in d1 for addx
-	add.l		&0x00000080,%d3	# inc at bit 6
-	addx.l		%d1,%d2		# continue inc
-	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
-	mov.l		&4,%d0		# put 4 in d0 for binstr call
-	lea.l		L_SCR1(%a6),%a0	# a0 is ptr to L_SCR1 for exp digits
-	bsr		binstr		# call binstr to convert exp
-	mov.l		L_SCR1(%a6),%d0	# load L_SCR1 lword to d0
-	mov.l		&12,%d1		# use d1 for shift count
-	lsr.l		%d1,%d0		# shift d0 right by 12
-	bfins		%d0,FP_SCR0(%a6){&4:&12}	# put e3:e2:e1 in FP_SCR0
-	lsr.l		%d1,%d0		# shift d0 right by 12
-	bfins		%d0,FP_SCR0(%a6){&16:&4}	# put e4 in FP_SCR0
-	tst.b		%d0		# check if e4 is zero
-	beq.b		A16_st		# if zero, skip rest
-	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
-
-
-# A16. Write sign bits to final string.
-#	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
-#
-# Register usage:
-#	Input/Output
-#	d0: x/scratch - final is x
-#	d2: x/x
-#	d3: x/x
-#	d4: LEN/Unchanged
-#	d5: ICTR:LAMBDA/LAMBDA:ICTR
-#	d6: ILOG/ILOG adjusted
-#	d7: k-factor/Unchanged
-#	a0: ptr to L_SCR1(a6)/Unchanged
-#	a1: ptr to PTENxx array/Unchanged
-#	a2: ptr to FP_SCR1(a6)/Unchanged
-#	fp0: float(ILOG)/Unchanged
-#	fp1: 10^ISCALE/Unchanged
-#	fp2: 10^LEN/Unchanged
-#	F_SCR1:BCD result with correct signs
-#	F_SCR2:ILOG/10^4
-#	L_SCR1:Exponent digits on return from binstr
-#	L_SCR2:first word of X packed/Unchanged
-
-A16_st:
-	clr.l		%d0		# clr d0 for collection of signs
-	and.b		&0x0f,FP_SCR0(%a6)	# clear first nibble of FP_SCR0
-	tst.l		L_SCR2(%a6)	# check sign of original mantissa
-	bge.b		mant_p		# if pos, don't set SM
-	mov.l		&2,%d0		# move 2 in to d0 for SM
-mant_p:
-	tst.l		%d6		# check sign of ILOG
-	bge.b		wr_sgn		# if pos, don't set SE
-	addq.l		&1,%d0		# set bit 0 in d0 for SE
-wr_sgn:
-	bfins		%d0,FP_SCR0(%a6){&0:&2}	# insert SM and SE into FP_SCR0
-
-# Clean up and restore all registers used.
-
-	fmov.l		&0,%fpsr	# clear possible inex2/ainex bits
-	fmovm.x		(%sp)+,&0xe0	#  {%fp0-%fp2}
-	movm.l		(%sp)+,&0x4fc	#  {%d2-%d7/%a2}
-	rts
-
-	global		PTENRN
-PTENRN:
-	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
-	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
-	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
-	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
-	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
-	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
-	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
-	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
-	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
-	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
-	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
-	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
-	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
-
-	global		PTENRP
-PTENRP:
-	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
-	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
-	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
-	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
-	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
-	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
-	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
-	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
-	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
-	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
-	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
-	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
-	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
-
-	global		PTENRM
-PTENRM:
-	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
-	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
-	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
-	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
-	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
-	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
-	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
-	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
-	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
-	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
-	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
-	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
-	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
-
-#########################################################################
-# binstr(): Converts a 64-bit binary integer to bcd.			#
-#									#
-# INPUT *************************************************************** #
-#	d2:d3 = 64-bit binary integer					#
-#	d0    = desired length (LEN)					#
-#	a0    = pointer to start in memory for bcd characters		#
-#		(This pointer must point to byte 4 of the first		#
-#		 lword of the packed decimal memory string.)		#
-#									#
-# OUTPUT ************************************************************** #
-#	a0 = pointer to LEN bcd digits representing the 64-bit integer.	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	The 64-bit binary is assumed to have a decimal point before	#
-#	bit 63.  The fraction is multiplied by 10 using a mul by 2	#
-#	shift and a mul by 8 shift.  The bits shifted out of the	#
-#	msb form a decimal digit.  This process is iterated until	#
-#	LEN digits are formed.						#
-#									#
-# A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the	#
-#     digit formed will be assumed the least significant.  This is	#
-#     to force the first byte formed to have a 0 in the upper 4 bits.	#
-#									#
-# A2. Beginning of the loop:						#
-#     Copy the fraction in d2:d3 to d4:d5.				#
-#									#
-# A3. Multiply the fraction in d2:d3 by 8 using bit-field		#
-#     extracts and shifts.  The three msbs from d2 will go into d1.	#
-#									#
-# A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb	#
-#     will be collected by the carry.					#
-#									#
-# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5	#
-#     into d2:d3.  D1 will contain the bcd digit formed.		#
-#									#
-# A6. Test d7.  If zero, the digit formed is the ms digit.  If non-	#
-#     zero, it is the ls digit.  Put the digit in its place in the	#
-#     upper word of d0.  If it is the ls digit, write the word		#
-#     from d0 to memory.						#
-#									#
-# A7. Decrement d6 (LEN counter) and repeat the loop until zero.	#
-#									#
-#########################################################################
-
-#	Implementation Notes:
-#
-#	The registers are used as follows:
-#
-#		d0: LEN counter
-#		d1: temp used to form the digit
-#		d2: upper 32-bits of fraction for mul by 8
-#		d3: lower 32-bits of fraction for mul by 8
-#		d4: upper 32-bits of fraction for mul by 2
-#		d5: lower 32-bits of fraction for mul by 2
-#		d6: temp for bit-field extracts
-#		d7: byte digit formation word;digit count {0,1}
-#		a0: pointer into memory for packed bcd string formation
-#
-
-	global		binstr
-binstr:
-	movm.l		&0xff00,-(%sp)	#  {%d0-%d7}
-
-#
-# A1: Init d7
-#
-	mov.l		&1,%d7		# init d7 for second digit
-	subq.l		&1,%d0		# for dbf d0 would have LEN+1 passes
-#
-# A2. Copy d2:d3 to d4:d5.  Start loop.
-#
-loop:
-	mov.l		%d2,%d4		# copy the fraction before muls
-	mov.l		%d3,%d5		# to d4:d5
-#
-# A3. Multiply d2:d3 by 8; extract msbs into d1.
-#
-	bfextu		%d2{&0:&3},%d1	# copy 3 msbs of d2 into d1
-	asl.l		&3,%d2		# shift d2 left by 3 places
-	bfextu		%d3{&0:&3},%d6	# copy 3 msbs of d3 into d6
-	asl.l		&3,%d3		# shift d3 left by 3 places
-	or.l		%d6,%d2		# or in msbs from d3 into d2
-#
-# A4. Multiply d4:d5 by 2; add carry out to d1.
-#
-	asl.l		&1,%d5		# mul d5 by 2
-	roxl.l		&1,%d4		# mul d4 by 2
-	swap		%d6		# put 0 in d6 lower word
-	addx.w		%d6,%d1		# add in extend from mul by 2
-#
-# A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
-#
-	add.l		%d5,%d3		# add lower 32 bits
-	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
-	addx.l		%d4,%d2		# add with extend upper 32 bits
-	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
-	addx.w		%d6,%d1		# add in extend from add to d1
-	swap		%d6		# with d6 = 0; put 0 in upper word
-#
-# A6. Test d7 and branch.
-#
-	tst.w		%d7		# if zero, store digit & to loop
-	beq.b		first_d		# if non-zero, form byte & write
-sec_d:
-	swap		%d7		# bring first digit to word d7b
-	asl.w		&4,%d7		# first digit in upper 4 bits d7b
-	add.w		%d1,%d7		# add in ls digit to d7b
-	mov.b		%d7,(%a0)+	# store d7b byte in memory
-	swap		%d7		# put LEN counter in word d7a
-	clr.w		%d7		# set d7a to signal no digits done
-	dbf.w		%d0,loop	# do loop some more!
-	bra.b		end_bstr	# finished, so exit
-first_d:
-	swap		%d7		# put digit word in d7b
-	mov.w		%d1,%d7		# put new digit in d7b
-	swap		%d7		# put LEN counter in word d7a
-	addq.w		&1,%d7		# set d7a to signal first digit done
-	dbf.w		%d0,loop	# do loop some more!
-	swap		%d7		# put last digit in string
-	lsl.w		&4,%d7		# move it to upper 4 bits
-	mov.b		%d7,(%a0)+	# store it in memory string
-#
-# Clean up and return with result in fp0.
-#
-end_bstr:
-	movm.l		(%sp)+,&0xff	#  {%d0-%d7}
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	facc_in_b(): dmem_read_byte failed				#
-#	facc_in_w(): dmem_read_word failed				#
-#	facc_in_l(): dmem_read_long failed				#
-#	facc_in_d(): dmem_read of dbl prec failed			#
-#	facc_in_x(): dmem_read of ext prec failed			#
-#									#
-#	facc_out_b(): dmem_write_byte failed				#
-#	facc_out_w(): dmem_write_word failed				#
-#	facc_out_l(): dmem_write_long failed				#
-#	facc_out_d(): dmem_write of dbl prec failed			#
-#	facc_out_x(): dmem_write of ext prec failed			#
-#									#
-# XREF ****************************************************************	#
-#	_real_access() - exit through access error handler		#
-#									#
-# INPUT ***************************************************************	#
-#	None								#
-#									#
-# OUTPUT **************************************************************	#
-#	None								#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Flow jumps here when an FP data fetch call gets an error	#
-# result. This means the operating system wants an access error frame	#
-# made out of the current exception stack frame.			#
-#	So, we first call restore() which makes sure that any updated	#
-# -(an)+ register gets returned to its pre-exception value and then	#
-# we change the stack to an access error stack frame.			#
-#									#
-#########################################################################
-
-facc_in_b:
-	movq.l		&0x1,%d0			# one byte
-	bsr.w		restore				# fix An
-
-	mov.w		&0x0121,EXC_VOFF(%a6)		# set FSLW
-	bra.w		facc_finish
-
-facc_in_w:
-	movq.l		&0x2,%d0			# two bytes
-	bsr.w		restore				# fix An
-
-	mov.w		&0x0141,EXC_VOFF(%a6)		# set FSLW
-	bra.b		facc_finish
-
-facc_in_l:
-	movq.l		&0x4,%d0			# four bytes
-	bsr.w		restore				# fix An
-
-	mov.w		&0x0101,EXC_VOFF(%a6)		# set FSLW
-	bra.b		facc_finish
-
-facc_in_d:
-	movq.l		&0x8,%d0			# eight bytes
-	bsr.w		restore				# fix An
-
-	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
-	bra.b		facc_finish
-
-facc_in_x:
-	movq.l		&0xc,%d0			# twelve bytes
-	bsr.w		restore				# fix An
-
-	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
-	bra.b		facc_finish
-
-################################################################
-
-facc_out_b:
-	movq.l		&0x1,%d0			# one byte
-	bsr.w		restore				# restore An
-
-	mov.w		&0x00a1,EXC_VOFF(%a6)		# set FSLW
-	bra.b		facc_finish
-
-facc_out_w:
-	movq.l		&0x2,%d0			# two bytes
-	bsr.w		restore				# restore An
-
-	mov.w		&0x00c1,EXC_VOFF(%a6)		# set FSLW
-	bra.b		facc_finish
-
-facc_out_l:
-	movq.l		&0x4,%d0			# four bytes
-	bsr.w		restore				# restore An
-
-	mov.w		&0x0081,EXC_VOFF(%a6)		# set FSLW
-	bra.b		facc_finish
-
-facc_out_d:
-	movq.l		&0x8,%d0			# eight bytes
-	bsr.w		restore				# restore An
-
-	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
-	bra.b		facc_finish
-
-facc_out_x:
-	mov.l		&0xc,%d0			# twelve bytes
-	bsr.w		restore				# restore An
-
-	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
-
-# here's where we actually create the access error frame from the
-# current exception stack frame.
-facc_finish:
-	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-
-	mov.l		(%sp),-(%sp)		# store SR, hi(PC)
-	mov.l		0x8(%sp),0x4(%sp)	# store lo(PC)
-	mov.l		0xc(%sp),0x8(%sp)	# store EA
-	mov.l		&0x00000001,0xc(%sp)	# store FSLW
-	mov.w		0x6(%sp),0xc(%sp)	# fix FSLW (size)
-	mov.w		&0x4008,0x6(%sp)	# store voff
-
-	btst		&0x5,(%sp)		# supervisor or user mode?
-	beq.b		facc_out2		# user
-	bset		&0x2,0xd(%sp)		# set supervisor TM bit
-
-facc_out2:
-	bra.l		_real_access
-
-##################################################################
-
-# if the effective addressing mode was predecrement or postincrement,
-# the emulation has already changed its value to the correct post-
-# instruction value. but since we're exiting to the access error
-# handler, then AN must be returned to its pre-instruction value.
-# we do that here.
-restore:
-	mov.b		EXC_OPWORD+0x1(%a6),%d1
-	andi.b		&0x38,%d1		# extract opmode
-	cmpi.b		%d1,&0x18		# postinc?
-	beq.w		rest_inc
-	cmpi.b		%d1,&0x20		# predec?
-	beq.w		rest_dec
-	rts
-
-rest_inc:
-	mov.b		EXC_OPWORD+0x1(%a6),%d1
-	andi.w		&0x0007,%d1		# fetch An
-
-	mov.w		(tbl_rest_inc.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_rest_inc.b,%pc,%d1.w*1)
-
-tbl_rest_inc:
-	short		ri_a0 - tbl_rest_inc
-	short		ri_a1 - tbl_rest_inc
-	short		ri_a2 - tbl_rest_inc
-	short		ri_a3 - tbl_rest_inc
-	short		ri_a4 - tbl_rest_inc
-	short		ri_a5 - tbl_rest_inc
-	short		ri_a6 - tbl_rest_inc
-	short		ri_a7 - tbl_rest_inc
-
-ri_a0:
-	sub.l		%d0,EXC_DREGS+0x8(%a6)	# fix stacked a0
-	rts
-ri_a1:
-	sub.l		%d0,EXC_DREGS+0xc(%a6)	# fix stacked a1
-	rts
-ri_a2:
-	sub.l		%d0,%a2			# fix a2
-	rts
-ri_a3:
-	sub.l		%d0,%a3			# fix a3
-	rts
-ri_a4:
-	sub.l		%d0,%a4			# fix a4
-	rts
-ri_a5:
-	sub.l		%d0,%a5			# fix a5
-	rts
-ri_a6:
-	sub.l		%d0,(%a6)		# fix stacked a6
-	rts
-# if it's a fmove out instruction, we don't have to fix a7
-# because we hadn't changed it yet. if it's an opclass two
-# instruction (data moved in) and the exception was in supervisor
-# mode, then also also wasn't updated. if it was user mode, then
-# restore the correct a7 which is in the USP currently.
-ri_a7:
-	cmpi.b		EXC_VOFF(%a6),&0x30	# move in or out?
-	bne.b		ri_a7_done		# out
-
-	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
-	bne.b		ri_a7_done		# supervisor
-	movc		%usp,%a0		# restore USP
-	sub.l		%d0,%a0
-	movc		%a0,%usp
-ri_a7_done:
-	rts
-
-# need to invert adjustment value if the <ea> was predec
-rest_dec:
-	neg.l		%d0
-	bra.b		rest_inc
diff --git a/arch/m68k/ifpsp060/src/ftest.S b/arch/m68k/ifpsp060/src/ftest.S
deleted file mode 100644
index 1f947915d81e1d945a6aa36014e45c7b9e707376..0000000000000000000000000000000000000000
--- a/arch/m68k/ifpsp060/src/ftest.S
+++ /dev/null
@@ -1,1456 +0,0 @@
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
-M68000 Hi-Performance Microprocessor Division
-M68060 Software Package
-Production Release P1.00 -- October 10, 1994
-
-M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
-
-THE SOFTWARE is provided on an "AS IS" basis and without warranty.
-To the maximum extent permitted by applicable law,
-MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
-INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
-and any warranty against infringement with regard to the SOFTWARE
-(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
-
-To the maximum extent permitted by applicable law,
-IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
-(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
-BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
-ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
-Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
-
-You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
-so long as this entire notice is retained without alteration in any modified and/or
-redistributed versions, and that such modified versions are clearly identified as such.
-No licenses are granted by implication, estoppel or otherwise under any patents
-or trademarks of Motorola, Inc.
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#############################################
-set	SREGS,		-64
-set	IREGS,		-128
-set	IFPREGS,	-224
-set	SFPREGS,	-320
-set	IFPCREGS,	-332
-set	SFPCREGS,	-344
-set	ICCR,		-346
-set	SCCR,		-348
-set	TESTCTR,	-352
-set	DATA,		-384
-
-#############################################
-TESTTOP:
-	bra.l		_060TESTS_
-	short		0x0000
-
-	bra.l		_060TESTS_unimp
-	short		0x0000
-
-	bra.l		_060TESTS_enable
-	short		0x0000
-
-start_str:
-	string		"Testing 68060 FPSP started:\n"
-
-start_str_unimp:
-	string		"Testing 68060 FPSP unimplemented instruction started:\n"
-
-start_str_enable:
-	string		"Testing 68060 FPSP exception enabled started:\n"
-
-pass_str:
-	string		"passed\n"
-
-fail_str:
-	string		" failed\n"
-
-	align		0x4
-chk_test:
-	tst.l		%d0
-	bne.b		test_fail
-test_pass:
-	pea		pass_str(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-	rts
-test_fail:
-	mov.l		%d1,-(%sp)
-	bsr.l		_print_num
-	addq.l		&0x4,%sp
-
-	pea		fail_str(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-	rts
-
-#############################################
-_060TESTS_:
-	link		%a6,&-384
-
-	movm.l		&0x3f3c,-(%sp)
-	fmovm.x		&0xff,-(%sp)
-
-	pea		start_str(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-
-### effadd
-	clr.l		TESTCTR(%a6)
-	pea		effadd_str(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-
-	bsr.l		effadd_0
-
-	bsr.l		chk_test
-
-### unsupp
-	clr.l		TESTCTR(%a6)
-	pea		unsupp_str(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-
-	bsr.l		unsupp_0
-
-	bsr.l		chk_test
-
-### ovfl non-maskable
-	clr.l		TESTCTR(%a6)
-	pea		ovfl_nm_str(%pc)
-	bsr.l		_print_str
-	bsr.l		ovfl_nm_0
-
-	bsr.l		chk_test
-
-### unfl non-maskable
-	clr.l		TESTCTR(%a6)
-	pea		unfl_nm_str(%pc)
-	bsr.l		_print_str
-	bsr.l		unfl_nm_0
-
-	bsr.l		chk_test
-
-	movm.l		(%sp)+,&0x3cfc
-	fmovm.x		(%sp)+,&0xff
-
-	unlk		%a6
-	rts
-
-_060TESTS_unimp:
-	link		%a6,&-384
-
-	movm.l		&0x3f3c,-(%sp)
-	fmovm.x		&0xff,-(%sp)
-
-	pea		start_str_unimp(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-
-### unimp
-	clr.l		TESTCTR(%a6)
-	pea		unimp_str(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-
-	bsr.l		unimp_0
-
-	bsr.l		chk_test
-
-	movm.l		(%sp)+,&0x3cfc
-	fmovm.x		(%sp)+,&0xff
-
-	unlk		%a6
-	rts
-
-_060TESTS_enable:
-	link		%a6,&-384
-
-	movm.l		&0x3f3c,-(%sp)
-	fmovm.x		&0xff,-(%sp)
-
-	pea		start_str_enable(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-
-### snan
-	clr.l		TESTCTR(%a6)
-	pea		snan_str(%pc)
-	bsr.l		_print_str
-	bsr.l		snan_0
-
-	bsr.l		chk_test
-
-### operr
-	clr.l		TESTCTR(%a6)
-	pea		operr_str(%pc)
-	bsr.l		_print_str
-	bsr.l		operr_0
-
-	bsr.l		chk_test
-
-### ovfl
-	clr.l		TESTCTR(%a6)
-	pea		ovfl_str(%pc)
-	bsr.l		_print_str
-	bsr.l		ovfl_0
-
-	bsr.l		chk_test
-
-### unfl
-	clr.l		TESTCTR(%a6)
-	pea		unfl_str(%pc)
-	bsr.l		_print_str
-	bsr.l		unfl_0
-
-	bsr.l		chk_test
-
-### dz
-	clr.l		TESTCTR(%a6)
-	pea		dz_str(%pc)
-	bsr.l		_print_str
-	bsr.l		dz_0
-
-	bsr.l		chk_test
-
-### inexact
-	clr.l		TESTCTR(%a6)
-	pea		inex_str(%pc)
-	bsr.l		_print_str
-	bsr.l		inex_0
-
-	bsr.l		chk_test
-
-	movm.l		(%sp)+,&0x3cfc
-	fmovm.x		(%sp)+,&0xff
-
-	unlk		%a6
-	rts
-
-#############################################
-#############################################
-
-unimp_str:
-	string		"\tUnimplemented FP instructions..."
-
-	align		0x4
-unimp_0:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	mov.l		&0x40000000,DATA+0x0(%a6)
-	mov.l		&0xc90fdaa2,DATA+0x4(%a6)
-	mov.l		&0x2168c235,DATA+0x8(%a6)
-
-	mov.w		&0x0000,%cc
-unimp_0_pc:
-	fsin.x		DATA(%a6),%fp0
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	mov.l		&0xbfbf0000,IFPREGS+0x0(%a6)
-	mov.l		&0x80000000,IFPREGS+0x4(%a6)
-	mov.l		&0x00000000,IFPREGS+0x8(%a6)
-	mov.l		&0x08000208,IFPCREGS+0x4(%a6)
-	lea		unimp_0_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-unimp_1:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	mov.l		&0x3ffe0000,DATA+0x0(%a6)
-	mov.l		&0xc90fdaa2,DATA+0x4(%a6)
-	mov.l		&0x2168c235,DATA+0x8(%a6)
-
-	mov.w		&0x0000,%cc
-unimp_1_pc:
-	ftan.x		DATA(%a6),%fp0
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	mov.l		&0x3fff0000,IFPREGS+0x0(%a6)
-	mov.l		&0x80000000,IFPREGS+0x4(%a6)
-	mov.l		&0x00000000,IFPREGS+0x8(%a6)
-	mov.l		&0x00000208,IFPCREGS+0x4(%a6)
-	lea		unimp_1_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-# fmovecr
-unimp_2:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	mov.w		&0x0000,%cc
-unimp_2_pc:
-	fmovcr.x	&0x31,%fp0
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	mov.l		&0x40000000,IFPREGS+0x0(%a6)
-	mov.l		&0x935d8ddd,IFPREGS+0x4(%a6)
-	mov.l		&0xaaa8ac17,IFPREGS+0x8(%a6)
-	mov.l		&0x00000208,IFPCREGS+0x4(%a6)
-	lea		unimp_2_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-# fscc
-unimp_3:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	fmov.l		&0x0f000000,%fpsr
-	mov.l		&0x00,%d7
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	mov.w		&0x0000,%cc
-unimp_3_pc:
-	fsgt		%d7
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-	mov.l		&0x0f008080,IFPCREGS+0x4(%a6)
-	lea		unimp_3_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-# fdbcc
-unimp_4:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	fmov.l		&0x0f000000,%fpsr
-	mov.l		&0x2,%d7
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	mov.w		&0x0000,%cc
-unimp_4_pc:
-	fdbgt.w		%d7,unimp_4_pc
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-	mov.w		&0xffff,IREGS+28+2(%a6)
-	mov.l		&0x0f008080,IFPCREGS+0x4(%a6)
-	lea		unimp_4_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-# ftrapcc
-unimp_5:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	fmov.l		&0x0f000000,%fpsr
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	mov.w		&0x0000,%cc
-unimp_5_pc:
-	ftpgt.l		&0xabcdef01
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-	mov.l		&0x0f008080,IFPCREGS+0x4(%a6)
-	lea		unimp_5_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-	clr.l		%d0
-	rts
-
-#############################################
-
-effadd_str:
-	string		"\tUnimplemented <ea>..."
-
-	align		0x4
-effadd_0:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	fmov.b		&0x2,%fp0
-
-	mov.w		&0x0000,%cc
-effadd_0_pc:
-	fmul.x		&0xc00000008000000000000000,%fp0
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	mov.l		&0xc0010000,IFPREGS+0x0(%a6)
-	mov.l		&0x80000000,IFPREGS+0x4(%a6)
-	mov.l		&0x00000000,IFPREGS+0x8(%a6)
-	mov.l		&0x08000000,IFPCREGS+0x4(%a6)
-	lea		effadd_0_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-effadd_1:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	mov.w		&0x0000,%cc
-effadd_1_pc:
-	fabs.p		&0xc12300012345678912345678,%fp0
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	mov.l		&0x3e660000,IFPREGS+0x0(%a6)
-	mov.l		&0xd0ed23e8,IFPREGS+0x4(%a6)
-	mov.l		&0xd14035bc,IFPREGS+0x8(%a6)
-	mov.l		&0x00000108,IFPCREGS+0x4(%a6)
-	lea		effadd_1_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-fmovml_0:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	fmovm.l		&0xffffffffffffffff,%fpcr,%fpsr
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-	mov.l		&0x0000fff0,IFPCREGS+0x0(%a6)
-	mov.l		&0x0ffffff8,IFPCREGS+0x4(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-fmovml_1:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	fmovm.l		&0xffffffffffffffff,%fpcr,%fpiar
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-	mov.l		&0x0000fff0,IFPCREGS+0x0(%a6)
-	mov.l		&0xffffffff,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-fmovml_2:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	fmovm.l		&0xffffffffffffffff,%fpsr,%fpiar
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-	mov.l		&0x0ffffff8,IFPCREGS+0x4(%a6)
-	mov.l		&0xffffffff,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-fmovml_3:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	fmovm.l		&0xffffffffffffffffffffffff,%fpcr,%fpsr,%fpiar
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-	mov.l		&0x0000fff0,IFPCREGS+0x0(%a6)
-	mov.l		&0x0ffffff8,IFPCREGS+0x4(%a6)
-	mov.l		&0xffffffff,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-# fmovmx dynamic
-fmovmx_0:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	fmov.b		&0x1,%fp0
-	fmov.b		&0x2,%fp1
-	fmov.b		&0x3,%fp2
-	fmov.b		&0x4,%fp3
-	fmov.b		&0x5,%fp4
-	fmov.b		&0x6,%fp5
-	fmov.b		&0x7,%fp6
-	fmov.b		&0x8,%fp7
-
-	fmov.l		&0x0,%fpiar
-	mov.l		&0xffffffaa,%d0
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0xffff,IREGS(%a6)
-
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-
-	mov.w		&0x0000,%cc
-
-	fmovm.x		%d0,-(%sp)
-
-	mov.w		%cc,SCCR(%a6)
-
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	fmov.s		&0x7f800000,%fp1
-	fmov.s		&0x7f800000,%fp3
-	fmov.s		&0x7f800000,%fp5
-	fmov.s		&0x7f800000,%fp7
-
-	fmov.x		(%sp)+,%fp1
-	fmov.x		(%sp)+,%fp3
-	fmov.x		(%sp)+,%fp5
-	fmov.x		(%sp)+,%fp7
-
-	movm.l		&0xffff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-fmovmx_1:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	fmov.b		&0x1,%fp0
-	fmov.b		&0x2,%fp1
-	fmov.b		&0x3,%fp2
-	fmov.b		&0x4,%fp3
-	fmov.b		&0x5,%fp4
-	fmov.b		&0x6,%fp5
-	fmov.b		&0x7,%fp6
-	fmov.b		&0x8,%fp7
-
-	fmov.x		%fp6,-(%sp)
-	fmov.x		%fp4,-(%sp)
-	fmov.x		%fp2,-(%sp)
-	fmov.x		%fp0,-(%sp)
-
-	fmovm.x		&0xff,IFPREGS(%a6)
-
-	fmov.s		&0x7f800000,%fp6
-	fmov.s		&0x7f800000,%fp4
-	fmov.s		&0x7f800000,%fp2
-	fmov.s		&0x7f800000,%fp0
-
-	fmov.l		&0x0,%fpiar
-	fmov.l		&0x0,%fpsr
-	mov.l		&0xffffffaa,%d0
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0xffff,IREGS(%a6)
-
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	mov.w		&0x0000,%cc
-
-	fmovm.x		(%sp)+,%d0
-
-	mov.w		%cc,SCCR(%a6)
-
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	movm.l		&0xffff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-fmovmx_2:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	fmov.b		&0x1,%fp0
-	fmov.b		&0x2,%fp1
-	fmov.b		&0x3,%fp2
-	fmov.b		&0x4,%fp3
-	fmov.b		&0x5,%fp4
-	fmov.b		&0x6,%fp5
-	fmov.b		&0x7,%fp6
-	fmov.b		&0x8,%fp7
-
-	fmov.l		&0x0,%fpiar
-	mov.l		&0xffffff00,%d0
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0xffff,IREGS(%a6)
-
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-
-	mov.w		&0x0000,%cc
-
-	fmovm.x		%d0,-(%sp)
-
-	mov.w		%cc,SCCR(%a6)
-
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	movm.l		&0xffff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-	clr.l		%d0
-	rts
-
-###########################################################
-
-# This test will take a non-maskable overflow directly.
-ovfl_nm_str:
-	string		"\tNon-maskable overflow..."
-
-	align		0x4
-ovfl_nm_0:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	fmov.b		&0x2,%fp0
-	mov.l		&0x7ffe0000,DATA+0x0(%a6)
-	mov.l		&0x80000000,DATA+0x4(%a6)
-	mov.l		&0x00000000,DATA+0x8(%a6)
-
-	mov.w		&0x0000,%cc
-ovfl_nm_0_pc:
-	fmul.x		DATA(%a6),%fp0
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	mov.l		&0x7fff0000,IFPREGS+0x0(%a6)
-	mov.l		&0x00000000,IFPREGS+0x4(%a6)
-	mov.l		&0x00000000,IFPREGS+0x8(%a6)
-	mov.l		&0x02001048,IFPCREGS+0x4(%a6)
-	lea		ovfl_nm_0_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-	clr.l		%d0
-	rts
-
-###########################################################
-
-# This test will take an overflow directly.
-ovfl_str:
-	string		"\tEnabled overflow..."
-
-	align		0x4
-ovfl_0:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmov.l		&0x00001000,%fpcr
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	fmov.b		&0x2,%fp0
-	mov.l		&0x7ffe0000,DATA+0x0(%a6)
-	mov.l		&0x80000000,DATA+0x4(%a6)
-	mov.l		&0x00000000,DATA+0x8(%a6)
-
-	mov.w		&0x0000,%cc
-ovfl_0_pc:
-	fmul.x		DATA(%a6),%fp0
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	mov.l		&0x7fff0000,IFPREGS+0x0(%a6)
-	mov.l		&0x00000000,IFPREGS+0x4(%a6)
-	mov.l		&0x00000000,IFPREGS+0x8(%a6)
-	mov.l		&0x02001048,IFPCREGS+0x4(%a6)
-	lea		ovfl_0_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-	clr.l		%d0
-	rts
-
-#####################################################################
-
-# This test will take an underflow directly.
-unfl_str:
-	string		"\tEnabled underflow..."
-
-	align		0x4
-unfl_0:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmov.l		&0x00000800,%fpcr
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	mov.l		&0x00000000,DATA+0x0(%a6)
-	mov.l		&0x80000000,DATA+0x4(%a6)
-	mov.l		&0x00000000,DATA+0x8(%a6)
-	fmovm.x		DATA(%a6),&0x80
-
-	mov.w		&0x0000,%cc
-unfl_0_pc:
-	fdiv.b		&0x2,%fp0
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	mov.l		&0x00000000,IFPREGS+0x0(%a6)
-	mov.l		&0x40000000,IFPREGS+0x4(%a6)
-	mov.l		&0x00000000,IFPREGS+0x8(%a6)
-	mov.l		&0x00000800,IFPCREGS+0x4(%a6)
-	lea		unfl_0_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-	clr.l		%d0
-	rts
-
-#####################################################################
-
-# This test will take a non-maskable underflow directly.
-unfl_nm_str:
-	string		"\tNon-maskable underflow..."
-
-	align		0x4
-unfl_nm_0:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	mov.l		&0x00000000,DATA+0x0(%a6)
-	mov.l		&0x80000000,DATA+0x4(%a6)
-	mov.l		&0x00000000,DATA+0x8(%a6)
-	fmovm.x		DATA(%a6),&0x80
-
-	mov.w		&0x0000,%cc
-unfl_nm_0_pc:
-	fdiv.b		&0x2,%fp0
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	mov.l		&0x00000000,IFPREGS+0x0(%a6)
-	mov.l		&0x40000000,IFPREGS+0x4(%a6)
-	mov.l		&0x00000000,IFPREGS+0x8(%a6)
-	mov.l		&0x00000800,IFPCREGS+0x4(%a6)
-	lea		unfl_nm_0_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-	clr.l		%d0
-	rts
-
-#####################################################################
-
-inex_str:
-	string		"\tEnabled inexact..."
-
-	align		0x4
-inex_0:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmov.l		&0x00000200,%fpcr		# enable inexact
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	mov.l		&0x50000000,DATA+0x0(%a6)
-	mov.l		&0x80000000,DATA+0x4(%a6)
-	mov.l		&0x00000000,DATA+0x8(%a6)
-	fmovm.x		DATA(%a6),&0x80
-
-	mov.w		&0x0000,%cc
-inex_0_pc:
-	fadd.b		&0x2,%fp0
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	mov.l		&0x50000000,IFPREGS+0x0(%a6)
-	mov.l		&0x80000000,IFPREGS+0x4(%a6)
-	mov.l		&0x00000000,IFPREGS+0x8(%a6)
-	mov.l		&0x00000208,IFPCREGS+0x4(%a6)
-	lea		inex_0_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-	clr.l		%d0
-	rts
-
-#####################################################################
-
-snan_str:
-	string		"\tEnabled SNAN..."
-
-	align		0x4
-snan_0:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmov.l		&0x00004000,%fpcr		# enable SNAN
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	mov.l		&0xffff0000,DATA+0x0(%a6)
-	mov.l		&0x00000000,DATA+0x4(%a6)
-	mov.l		&0x00000001,DATA+0x8(%a6)
-	fmovm.x		DATA(%a6),&0x80
-
-	mov.w		&0x0000,%cc
-snan_0_pc:
-	fadd.b		&0x2,%fp0
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	mov.l		&0xffff0000,IFPREGS+0x0(%a6)
-	mov.l		&0x00000000,IFPREGS+0x4(%a6)
-	mov.l		&0x00000001,IFPREGS+0x8(%a6)
-	mov.l		&0x09004080,IFPCREGS+0x4(%a6)
-	lea		snan_0_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-	clr.l		%d0
-	rts
-
-#####################################################################
-
-operr_str:
-	string		"\tEnabled OPERR..."
-
-	align		0x4
-operr_0:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmov.l		&0x00002000,%fpcr		# enable OPERR
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	mov.l		&0xffff0000,DATA+0x0(%a6)
-	mov.l		&0x00000000,DATA+0x4(%a6)
-	mov.l		&0x00000000,DATA+0x8(%a6)
-	fmovm.x		DATA(%a6),&0x80
-
-	mov.w		&0x0000,%cc
-operr_0_pc:
-	fadd.s		&0x7f800000,%fp0
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	mov.l		&0xffff0000,IFPREGS+0x0(%a6)
-	mov.l		&0x00000000,IFPREGS+0x4(%a6)
-	mov.l		&0x00000000,IFPREGS+0x8(%a6)
-	mov.l		&0x01002080,IFPCREGS+0x4(%a6)
-	lea		operr_0_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-	clr.l		%d0
-	rts
-
-#####################################################################
-
-dz_str:
-	string		"\tEnabled DZ..."
-
-	align		0x4
-dz_0:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmov.l		&0x00000400,%fpcr		# enable DZ
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	mov.l		&0x40000000,DATA+0x0(%a6)
-	mov.l		&0x80000000,DATA+0x4(%a6)
-	mov.l		&0x00000000,DATA+0x8(%a6)
-	fmovm.x		DATA(%a6),&0x80
-
-	mov.w		&0x0000,%cc
-dz_0_pc:
-	fdiv.b		&0x0,%fp0
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	mov.l		&0x40000000,IFPREGS+0x0(%a6)
-	mov.l		&0x80000000,IFPREGS+0x4(%a6)
-	mov.l		&0x00000000,IFPREGS+0x8(%a6)
-	mov.l		&0x02000410,IFPCREGS+0x4(%a6)
-	lea		dz_0_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-	clr.l		%d0
-	rts
-
-#####################################################################
-
-unsupp_str:
-	string		"\tUnimplemented data type/format..."
-
-# an unnormalized number
-	align		0x4
-unsupp_0:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	mov.l		&0xc03f0000,DATA+0x0(%a6)
-	mov.l		&0x00000000,DATA+0x4(%a6)
-	mov.l		&0x00000001,DATA+0x8(%a6)
-	fmov.b		&0x2,%fp0
-	mov.w		&0x0000,%cc
-unsupp_0_pc:
-	fmul.x		DATA(%a6),%fp0
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	mov.l		&0xc0010000,IFPREGS+0x0(%a6)
-	mov.l		&0x80000000,IFPREGS+0x4(%a6)
-	mov.l		&0x00000000,IFPREGS+0x8(%a6)
-	mov.l		&0x08000000,IFPCREGS+0x4(%a6)
-	lea		unsupp_0_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-# a denormalized number
-unsupp_1:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	mov.l		&0x80000000,DATA+0x0(%a6)
-	mov.l		&0x01000000,DATA+0x4(%a6)
-	mov.l		&0x00000000,DATA+0x8(%a6)
-	fmov.l		&0x7fffffff,%fp0
-
-	mov.w		&0x0000,%cc
-unsupp_1_pc:
-	fmul.x		DATA(%a6),%fp0
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	mov.l		&0x80170000,IFPREGS+0x0(%a6)
-	mov.l		&0xfffffffe,IFPREGS+0x4(%a6)
-	mov.l		&0x00000000,IFPREGS+0x8(%a6)
-	mov.l		&0x08000000,IFPCREGS+0x4(%a6)
-	lea		unsupp_1_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-# packed
-unsupp_2:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	fmovm.x		DEF_FPREGS(%pc),&0xff
-	fmovm.l		DEF_FPCREGS(%pc),%fpcr,%fpsr,%fpiar
-
-	mov.w		&0x0000,ICCR(%a6)
-	movm.l		&0x7fff,IREGS(%a6)
-	fmovm.x		&0xff,IFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,IFPCREGS(%a6)
-
-	mov.l		&0xc1230001,DATA+0x0(%a6)
-	mov.l		&0x23456789,DATA+0x4(%a6)
-	mov.l		&0x12345678,DATA+0x8(%a6)
-
-	mov.w		&0x0000,%cc
-unsupp_2_pc:
-	fabs.p		DATA(%a6),%fp0
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	fmovm.x		&0xff,SFPREGS(%a6)
-	fmovm.l		%fpcr,%fpsr,%fpiar,SFPCREGS(%a6)
-
-	mov.l		&0x3e660000,IFPREGS+0x0(%a6)
-	mov.l		&0xd0ed23e8,IFPREGS+0x4(%a6)
-	mov.l		&0xd14035bc,IFPREGS+0x8(%a6)
-	mov.l		&0x00000108,IFPCREGS+0x4(%a6)
-	lea		unsupp_2_pc(%pc),%a0
-	mov.l		%a0,IFPCREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	bsr.l		chkfpregs
-	tst.b		%d0
-	bne.l		error
-
-	clr.l		%d0
-	rts
-
-###########################################################
-###########################################################
-
-chkregs:
-	lea		IREGS(%a6),%a0
-	lea		SREGS(%a6),%a1
-	mov.l		&14,%d0
-chkregs_loop:
-	cmp.l		(%a0)+,(%a1)+
-	bne.l		chkregs_error
-	dbra.w		%d0,chkregs_loop
-
-	mov.w		ICCR(%a6),%d0
-	mov.w		SCCR(%a6),%d1
-	cmp.w		%d0,%d1
-	bne.l		chkregs_error
-
-	clr.l		%d0
-	rts
-
-chkregs_error:
-	movq.l		&0x1,%d0
-	rts
-
-error:
-	mov.l		TESTCTR(%a6),%d1
-	movq.l		&0x1,%d0
-	rts
-
-chkfpregs:
-	lea		IFPREGS(%a6),%a0
-	lea		SFPREGS(%a6),%a1
-	mov.l		&23,%d0
-chkfpregs_loop:
-	cmp.l		(%a0)+,(%a1)+
-	bne.l		chkfpregs_error
-	dbra.w		%d0,chkfpregs_loop
-
-	lea		IFPCREGS(%a6),%a0
-	lea		SFPCREGS(%a6),%a1
-	cmp.l		(%a0)+,(%a1)+
-	bne.l		chkfpregs_error
-	cmp.l		(%a0)+,(%a1)+
-	bne.l		chkfpregs_error
-	cmp.l		(%a0)+,(%a1)+
-	bne.l		chkfpregs_error
-
-	clr.l		%d0
-	rts
-
-chkfpregs_error:
-	movq.l		&0x1,%d0
-	rts
-
-DEF_REGS:
-	long		0xacacacac, 0xacacacac, 0xacacacac, 0xacacacac
-	long		0xacacacac, 0xacacacac, 0xacacacac, 0xacacacac
-
-	long		0xacacacac, 0xacacacac, 0xacacacac, 0xacacacac
-	long		0xacacacac, 0xacacacac, 0xacacacac, 0xacacacac
-
-DEF_FPREGS:
-	long		0x7fff0000, 0xffffffff, 0xffffffff
-	long		0x7fff0000, 0xffffffff, 0xffffffff
-	long		0x7fff0000, 0xffffffff, 0xffffffff
-	long		0x7fff0000, 0xffffffff, 0xffffffff
-	long		0x7fff0000, 0xffffffff, 0xffffffff
-	long		0x7fff0000, 0xffffffff, 0xffffffff
-	long		0x7fff0000, 0xffffffff, 0xffffffff
-	long		0x7fff0000, 0xffffffff, 0xffffffff
-
-DEF_FPCREGS:
-	long		0x00000000, 0x00000000, 0x00000000
-
-############################################################
-
-_print_str:
-	mov.l		%d0,-(%sp)
-	mov.l		(TESTTOP-0x80+0x0,%pc),%d0
-	pea		(TESTTOP-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-_print_num:
-	mov.l		%d0,-(%sp)
-	mov.l		(TESTTOP-0x80+0x4,%pc),%d0
-	pea		(TESTTOP-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-############################################################
diff --git a/arch/m68k/ifpsp060/src/ilsp.S b/arch/m68k/ifpsp060/src/ilsp.S
deleted file mode 100644
index dd5b2c357e953db3c6ecdb1a6f04805517142def..0000000000000000000000000000000000000000
--- a/arch/m68k/ifpsp060/src/ilsp.S
+++ /dev/null
@@ -1,932 +0,0 @@
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
-M68000 Hi-Performance Microprocessor Division
-M68060 Software Package
-Production Release P1.00 -- October 10, 1994
-
-M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
-
-THE SOFTWARE is provided on an "AS IS" basis and without warranty.
-To the maximum extent permitted by applicable law,
-MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
-INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
-and any warranty against infringement with regard to the SOFTWARE
-(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
-
-To the maximum extent permitted by applicable law,
-IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
-(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
-BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
-ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
-Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
-
-You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
-so long as this entire notice is retained without alteration in any modified and/or
-redistributed versions, and that such modified versions are clearly identified as such.
-No licenses are granted by implication, estoppel or otherwise under any patents
-or trademarks of Motorola, Inc.
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-# litop.s:
-#	This file is appended to the top of the 060FPLSP package
-# and contains the entry points into the package. The user, in
-# effect, branches to one of the branch table entries located here.
-#
-
-	bra.l	_060LSP__idivs64_
-	short	0x0000
-	bra.l	_060LSP__idivu64_
-	short	0x0000
-
-	bra.l	_060LSP__imuls64_
-	short	0x0000
-	bra.l	_060LSP__imulu64_
-	short	0x0000
-
-	bra.l	_060LSP__cmp2_Ab_
-	short	0x0000
-	bra.l	_060LSP__cmp2_Aw_
-	short	0x0000
-	bra.l	_060LSP__cmp2_Al_
-	short	0x0000
-	bra.l	_060LSP__cmp2_Db_
-	short	0x0000
-	bra.l	_060LSP__cmp2_Dw_
-	short	0x0000
-	bra.l	_060LSP__cmp2_Dl_
-	short	0x0000
-
-# leave room for future possible aditions.
-	align	0x200
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_060LSP__idivu64_(): Emulate 64-bit unsigned div instruction.	#
-#	_060LSP__idivs64_(): Emulate 64-bit signed div instruction.	#
-#									#
-#	This is the library version which is accessed as a subroutine	#
-#	and therefore does not work exactly like the 680X0 div{s,u}.l	#
-#	64-bit divide instruction.					#
-#									#
-# XREF ****************************************************************	#
-#	None.								#
-#									#
-# INPUT ***************************************************************	#
-#	0x4(sp)  = divisor						#
-#	0x8(sp)  = hi(dividend)						#
-#	0xc(sp)  = lo(dividend)						#
-#	0x10(sp) = pointer to location to place quotient/remainder	#
-#									#
-# OUTPUT **************************************************************	#
-#	0x10(sp) = points to location of remainder/quotient.		#
-#		   remainder is in first longword, quotient is in 2nd.	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	If the operands are signed, make them unsigned and save the	#
-# sign info for later. Separate out special cases like divide-by-zero	#
-# or 32-bit divides if possible. Else, use a special math algorithm	#
-# to calculate the result.						#
-#	Restore sign info if signed instruction. Set the condition	#
-# codes before performing the final "rts". If the divisor was equal to	#
-# zero, then perform a divide-by-zero using a 16-bit implemented	#
-# divide instruction. This way, the operating system can record that	#
-# the event occurred even though it may not point to the correct place.	#
-#									#
-#########################################################################
-
-set	POSNEG,		-1
-set	NDIVISOR,	-2
-set	NDIVIDEND,	-3
-set	DDSECOND,	-4
-set	DDNORMAL,	-8
-set	DDQUOTIENT,	-12
-set	DIV64_CC,	-16
-
-##########
-# divs.l #
-##########
-	global		_060LSP__idivs64_
-_060LSP__idivs64_:
-# PROLOGUE BEGIN ########################################################
-	link.w		%a6,&-16
-	movm.l		&0x3f00,-(%sp)		# save d2-d7
-#	fmovm.l		&0x0,-(%sp)		# save no fpregs
-# PROLOGUE END ##########################################################
-
-	mov.w		%cc,DIV64_CC(%a6)
-	st		POSNEG(%a6)		# signed operation
-	bra.b		ldiv64_cont
-
-##########
-# divu.l #
-##########
-	global		_060LSP__idivu64_
-_060LSP__idivu64_:
-# PROLOGUE BEGIN ########################################################
-	link.w		%a6,&-16
-	movm.l		&0x3f00,-(%sp)		# save d2-d7
-#	fmovm.l		&0x0,-(%sp)		# save no fpregs
-# PROLOGUE END ##########################################################
-
-	mov.w		%cc,DIV64_CC(%a6)
-	sf		POSNEG(%a6)		# unsigned operation
-
-ldiv64_cont:
-	mov.l		0x8(%a6),%d7		# fetch divisor
-
-	beq.w		ldiv64eq0		# divisor is = 0!!!
-
-	mov.l		0xc(%a6), %d5		# get dividend hi
-	mov.l		0x10(%a6), %d6		# get dividend lo
-
-# separate signed and unsigned divide
-	tst.b		POSNEG(%a6)		# signed or unsigned?
-	beq.b		ldspecialcases		# use positive divide
-
-# save the sign of the divisor
-# make divisor unsigned if it's negative
-	tst.l		%d7			# chk sign of divisor
-	slt		NDIVISOR(%a6)		# save sign of divisor
-	bpl.b		ldsgndividend
-	neg.l		%d7			# complement negative divisor
-
-# save the sign of the dividend
-# make dividend unsigned if it's negative
-ldsgndividend:
-	tst.l		%d5			# chk sign of hi(dividend)
-	slt		NDIVIDEND(%a6)		# save sign of dividend
-	bpl.b		ldspecialcases
-
-	mov.w		&0x0, %cc		# clear 'X' cc bit
-	negx.l		%d6			# complement signed dividend
-	negx.l		%d5
-
-# extract some special cases:
-#	- is (dividend == 0) ?
-#	- is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
-ldspecialcases:
-	tst.l		%d5			# is (hi(dividend) == 0)
-	bne.b		ldnormaldivide		# no, so try it the long way
-
-	tst.l		%d6			# is (lo(dividend) == 0), too
-	beq.w		lddone			# yes, so (dividend == 0)
-
-	cmp.l		%d7,%d6			# is (divisor <= lo(dividend))
-	bls.b		ld32bitdivide		# yes, so use 32 bit divide
-
-	exg		%d5,%d6			# q = 0, r = dividend
-	bra.w		ldivfinish		# can't divide, we're done.
-
-ld32bitdivide:
-	tdivu.l		%d7, %d5:%d6		# it's only a 32/32 bit div!
-
-	bra.b		ldivfinish
-
-ldnormaldivide:
-# last special case:
-#	- is hi(dividend) >= divisor ? if yes, then overflow
-	cmp.l		%d7,%d5
-	bls.b		lddovf			# answer won't fit in 32 bits
-
-# perform the divide algorithm:
-	bsr.l		ldclassical		# do int divide
-
-# separate into signed and unsigned finishes.
-ldivfinish:
-	tst.b		POSNEG(%a6)		# do divs, divu separately
-	beq.b		lddone			# divu has no processing!!!
-
-# it was a divs.l, so ccode setting is a little more complicated...
-	tst.b		NDIVIDEND(%a6)		# remainder has same sign
-	beq.b		ldcc			# as dividend.
-	neg.l		%d5			# sgn(rem) = sgn(dividend)
-ldcc:
-	mov.b		NDIVISOR(%a6), %d0
-	eor.b		%d0, NDIVIDEND(%a6)	# chk if quotient is negative
-	beq.b		ldqpos			# branch to quot positive
-
-# 0x80000000 is the largest number representable as a 32-bit negative
-# number. the negative of 0x80000000 is 0x80000000.
-	cmpi.l		%d6, &0x80000000	# will (-quot) fit in 32 bits?
-	bhi.b		lddovf
-
-	neg.l		%d6			# make (-quot) 2's comp
-
-	bra.b		lddone
-
-ldqpos:
-	btst		&0x1f, %d6		# will (+quot) fit in 32 bits?
-	bne.b		lddovf
-
-lddone:
-# if the register numbers are the same, only the quotient gets saved.
-# so, if we always save the quotient second, we save ourselves a cmp&beq
-	andi.w		&0x10,DIV64_CC(%a6)
-	mov.w		DIV64_CC(%a6),%cc
-	tst.l		%d6			# may set 'N' ccode bit
-
-# here, the result is in d1 and d0. the current strategy is to save
-# the values at the location pointed to by a0.
-# use movm here to not disturb the condition codes.
-ldexit:
-	movm.l		&0x0060,([0x14,%a6])	# save result
-
-# EPILOGUE BEGIN ########################################################
-#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
-	movm.l		(%sp)+,&0x00fc		# restore d2-d7
-	unlk		%a6
-# EPILOGUE END ##########################################################
-
-	rts
-
-# the result should be the unchanged dividend
-lddovf:
-	mov.l		0xc(%a6), %d5		# get dividend hi
-	mov.l		0x10(%a6), %d6		# get dividend lo
-
-	andi.w		&0x1c,DIV64_CC(%a6)
-	ori.w		&0x02,DIV64_CC(%a6)	# set 'V' ccode bit
-	mov.w		DIV64_CC(%a6),%cc
-
-	bra.b		ldexit
-
-ldiv64eq0:
-	mov.l		0xc(%a6),([0x14,%a6])
-	mov.l		0x10(%a6),([0x14,%a6],0x4)
-
-	mov.w		DIV64_CC(%a6),%cc
-
-# EPILOGUE BEGIN ########################################################
-#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
-	movm.l		(%sp)+,&0x00fc		# restore d2-d7
-	unlk		%a6
-# EPILOGUE END ##########################################################
-
-	divu.w		&0x0,%d0		# force a divbyzero exception
-	rts
-
-###########################################################################
-#########################################################################
-# This routine uses the 'classical' Algorithm D from Donald Knuth's	#
-# Art of Computer Programming, vol II, Seminumerical Algorithms.	#
-# For this implementation b=2**16, and the target is U1U2U3U4/V1V2,	#
-# where U,V are words of the quadword dividend and longword divisor,	#
-# and U1, V1 are the most significant words.				#
-#									#
-# The most sig. longword of the 64 bit dividend must be in %d5, least	#
-# in %d6. The divisor must be in the variable ddivisor, and the		#
-# signed/unsigned flag ddusign must be set (0=unsigned,1=signed).	#
-# The quotient is returned in %d6, remainder in %d5, unless the		#
-# v (overflow) bit is set in the saved %ccr. If overflow, the dividend	#
-# is unchanged.								#
-#########################################################################
-ldclassical:
-# if the divisor msw is 0, use simpler algorithm then the full blown
-# one at ddknuth:
-
-	cmpi.l		%d7, &0xffff
-	bhi.b		lddknuth		# go use D. Knuth algorithm
-
-# Since the divisor is only a word (and larger than the mslw of the dividend),
-# a simpler algorithm may be used :
-# In the general case, four quotient words would be created by
-# dividing the divisor word into each dividend word. In this case,
-# the first two quotient words must be zero, or overflow would occur.
-# Since we already checked this case above, we can treat the most significant
-# longword of the dividend as (0) remainder (see Knuth) and merely complete
-# the last two divisions to get a quotient longword and word remainder:
-
-	clr.l		%d1
-	swap		%d5			# same as r*b if previous step rqd
-	swap		%d6			# get u3 to lsw position
-	mov.w		%d6, %d5		# rb + u3
-
-	divu.w		%d7, %d5
-
-	mov.w		%d5, %d1		# first quotient word
-	swap		%d6			# get u4
-	mov.w		%d6, %d5		# rb + u4
-
-	divu.w		%d7, %d5
-
-	swap		%d1
-	mov.w		%d5, %d1		# 2nd quotient 'digit'
-	clr.w		%d5
-	swap		%d5			# now remainder
-	mov.l		%d1, %d6		# and quotient
-
-	rts
-
-lddknuth:
-# In this algorithm, the divisor is treated as a 2 digit (word) number
-# which is divided into a 3 digit (word) dividend to get one quotient
-# digit (word). After subtraction, the dividend is shifted and the
-# process repeated. Before beginning, the divisor and quotient are
-# 'normalized' so that the process of estimating the quotient digit
-# will yield verifiably correct results..
-
-	clr.l		DDNORMAL(%a6)		# count of shifts for normalization
-	clr.b		DDSECOND(%a6)		# clear flag for quotient digits
-	clr.l		%d1			# %d1 will hold trial quotient
-lddnchk:
-	btst		&31, %d7		# must we normalize? first word of
-	bne.b		lddnormalized		# divisor (V1) must be >= 65536/2
-	addq.l		&0x1, DDNORMAL(%a6)	# count normalization shifts
-	lsl.l		&0x1, %d7		# shift the divisor
-	lsl.l		&0x1, %d6		# shift u4,u3 with overflow to u2
-	roxl.l		&0x1, %d5		# shift u1,u2
-	bra.w		lddnchk
-lddnormalized:
-
-# Now calculate an estimate of the quotient words (msw first, then lsw).
-# The comments use subscripts for the first quotient digit determination.
-	mov.l		%d7, %d3		# divisor
-	mov.l		%d5, %d2		# dividend mslw
-	swap		%d2
-	swap		%d3
-	cmp.w		%d2, %d3		# V1 = U1 ?
-	bne.b		lddqcalc1
-	mov.w		&0xffff, %d1		# use max trial quotient word
-	bra.b		lddadj0
-lddqcalc1:
-	mov.l		%d5, %d1
-
-	divu.w		%d3, %d1		# use quotient of mslw/msw
-
-	andi.l		&0x0000ffff, %d1	# zero any remainder
-lddadj0:
-
-# now test the trial quotient and adjust. This step plus the
-# normalization assures (according to Knuth) that the trial
-# quotient will be at worst 1 too large.
-	mov.l		%d6, -(%sp)
-	clr.w		%d6			# word u3 left
-	swap		%d6			# in lsw position
-lddadj1: mov.l		%d7, %d3
-	mov.l		%d1, %d2
-	mulu.w		%d7, %d2		# V2q
-	swap		%d3
-	mulu.w		%d1, %d3		# V1q
-	mov.l		%d5, %d4		# U1U2
-	sub.l		%d3, %d4		# U1U2 - V1q
-
-	swap		%d4
-
-	mov.w		%d4,%d0
-	mov.w		%d6,%d4			# insert lower word (U3)
-
-	tst.w		%d0			# is upper word set?
-	bne.w		lddadjd1
-
-#	add.l		%d6, %d4		# (U1U2 - V1q) + U3
-
-	cmp.l		%d2, %d4
-	bls.b		lddadjd1		# is V2q > (U1U2-V1q) + U3 ?
-	subq.l		&0x1, %d1		# yes, decrement and recheck
-	bra.b		lddadj1
-lddadjd1:
-# now test the word by multiplying it by the divisor (V1V2) and comparing
-# the 3 digit (word) result with the current dividend words
-	mov.l		%d5, -(%sp)		# save %d5 (%d6 already saved)
-	mov.l		%d1, %d6
-	swap		%d6			# shift answer to ms 3 words
-	mov.l		%d7, %d5
-	bsr.l		ldmm2
-	mov.l		%d5, %d2		# now %d2,%d3 are trial*divisor
-	mov.l		%d6, %d3
-	mov.l		(%sp)+, %d5		# restore dividend
-	mov.l		(%sp)+, %d6
-	sub.l		%d3, %d6
-	subx.l		%d2, %d5		# subtract double precision
-	bcc		ldd2nd			# no carry, do next quotient digit
-	subq.l		&0x1, %d1		# q is one too large
-# need to add back divisor longword to current ms 3 digits of dividend
-# - according to Knuth, this is done only 2 out of 65536 times for random
-# divisor, dividend selection.
-	clr.l		%d2
-	mov.l		%d7, %d3
-	swap		%d3
-	clr.w		%d3			# %d3 now ls word of divisor
-	add.l		%d3, %d6		# aligned with 3rd word of dividend
-	addx.l		%d2, %d5
-	mov.l		%d7, %d3
-	clr.w		%d3			# %d3 now ms word of divisor
-	swap		%d3			# aligned with 2nd word of dividend
-	add.l		%d3, %d5
-ldd2nd:
-	tst.b		DDSECOND(%a6)	# both q words done?
-	bne.b		lddremain
-# first quotient digit now correct. store digit and shift the
-# (subtracted) dividend
-	mov.w		%d1, DDQUOTIENT(%a6)
-	clr.l		%d1
-	swap		%d5
-	swap		%d6
-	mov.w		%d6, %d5
-	clr.w		%d6
-	st		DDSECOND(%a6)		# second digit
-	bra.w		lddnormalized
-lddremain:
-# add 2nd word to quotient, get the remainder.
-	mov.w		%d1, DDQUOTIENT+2(%a6)
-# shift down one word/digit to renormalize remainder.
-	mov.w		%d5, %d6
-	swap		%d6
-	swap		%d5
-	mov.l		DDNORMAL(%a6), %d7	# get norm shift count
-	beq.b		lddrn
-	subq.l		&0x1, %d7		# set for loop count
-lddnlp:
-	lsr.l		&0x1, %d5		# shift into %d6
-	roxr.l		&0x1, %d6
-	dbf		%d7, lddnlp
-lddrn:
-	mov.l		%d6, %d5		# remainder
-	mov.l		DDQUOTIENT(%a6), %d6	# quotient
-
-	rts
-ldmm2:
-# factors for the 32X32->64 multiplication are in %d5 and %d6.
-# returns 64 bit result in %d5 (hi) %d6(lo).
-# destroys %d2,%d3,%d4.
-
-# multiply hi,lo words of each factor to get 4 intermediate products
-	mov.l		%d6, %d2
-	mov.l		%d6, %d3
-	mov.l		%d5, %d4
-	swap		%d3
-	swap		%d4
-	mulu.w		%d5, %d6		# %d6 <- lsw*lsw
-	mulu.w		%d3, %d5		# %d5 <- msw-dest*lsw-source
-	mulu.w		%d4, %d2		# %d2 <- msw-source*lsw-dest
-	mulu.w		%d4, %d3		# %d3 <- msw*msw
-# now use swap and addx to consolidate to two longwords
-	clr.l		%d4
-	swap		%d6
-	add.w		%d5, %d6		# add msw of l*l to lsw of m*l product
-	addx.w		%d4, %d3		# add any carry to m*m product
-	add.w		%d2, %d6		# add in lsw of other m*l product
-	addx.w		%d4, %d3		# add any carry to m*m product
-	swap		%d6			# %d6 is low 32 bits of final product
-	clr.w		%d5
-	clr.w		%d2			# lsw of two mixed products used,
-	swap		%d5			# now use msws of longwords
-	swap		%d2
-	add.l		%d2, %d5
-	add.l		%d3, %d5	# %d5 now ms 32 bits of final product
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction	#
-#	_060LSP__imuls64_(): Emulate 64-bit signed mul instruction.	#
-#									#
-#	This is the library version which is accessed as a subroutine	#
-#	and therefore does not work exactly like the 680X0 mul{s,u}.l	#
-#	64-bit multiply instruction.					#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	0x4(sp) = multiplier						#
-#	0x8(sp) = multiplicand						#
-#	0xc(sp) = pointer to location to place 64-bit result		#
-#									#
-# OUTPUT **************************************************************	#
-#	0xc(sp) = points to location of 64-bit result			#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Perform the multiply in pieces using 16x16->32 unsigned		#
-# multiplies and "add" instructions.					#
-#	Set the condition codes as appropriate before performing an	#
-# "rts".								#
-#									#
-#########################################################################
-
-set MUL64_CC, -4
-
-	global		_060LSP__imulu64_
-_060LSP__imulu64_:
-
-# PROLOGUE BEGIN ########################################################
-	link.w		%a6,&-4
-	movm.l		&0x3800,-(%sp)		# save d2-d4
-#	fmovm.l		&0x0,-(%sp)		# save no fpregs
-# PROLOGUE END ##########################################################
-
-	mov.w		%cc,MUL64_CC(%a6)	# save incoming ccodes
-
-	mov.l		0x8(%a6),%d0		# store multiplier in d0
-	beq.w		mulu64_zero		# handle zero separately
-
-	mov.l		0xc(%a6),%d1		# get multiplicand in d1
-	beq.w		mulu64_zero		# handle zero separately
-
-#########################################################################
-#	63			   32				0	#
-#	----------------------------					#
-#	| hi(mplier) * hi(mplicand)|					#
-#	----------------------------					#
-#		     -----------------------------			#
-#		     | hi(mplier) * lo(mplicand) |			#
-#		     -----------------------------			#
-#		     -----------------------------			#
-#		     | lo(mplier) * hi(mplicand) |			#
-#		     -----------------------------			#
-#	  |			   -----------------------------	#
-#	--|--			   | lo(mplier) * lo(mplicand) |	#
-#	  |			   -----------------------------	#
-#	========================================================	#
-#	--------------------------------------------------------	#
-#	|	hi(result)	   |	    lo(result)         |	#
-#	--------------------------------------------------------	#
-#########################################################################
-mulu64_alg:
-# load temp registers with operands
-	mov.l		%d0,%d2			# mr in d2
-	mov.l		%d0,%d3			# mr in d3
-	mov.l		%d1,%d4			# md in d4
-	swap		%d3			# hi(mr) in lo d3
-	swap		%d4			# hi(md) in lo d4
-
-# complete necessary multiplies:
-	mulu.w		%d1,%d0			# [1] lo(mr) * lo(md)
-	mulu.w		%d3,%d1			# [2] hi(mr) * lo(md)
-	mulu.w		%d4,%d2			# [3] lo(mr) * hi(md)
-	mulu.w		%d4,%d3			# [4] hi(mr) * hi(md)
-
-# add lo portions of [2],[3] to hi portion of [1].
-# add carries produced from these adds to [4].
-# lo([1]) is the final lo 16 bits of the result.
-	clr.l		%d4			# load d4 w/ zero value
-	swap		%d0			# hi([1]) <==> lo([1])
-	add.w		%d1,%d0			# hi([1]) + lo([2])
-	addx.l		%d4,%d3			#    [4]  + carry
-	add.w		%d2,%d0			# hi([1]) + lo([3])
-	addx.l		%d4,%d3			#    [4]  + carry
-	swap		%d0			# lo([1]) <==> hi([1])
-
-# lo portions of [2],[3] have been added in to final result.
-# now, clear lo, put hi in lo reg, and add to [4]
-	clr.w		%d1			# clear lo([2])
-	clr.w		%d2			# clear hi([3])
-	swap		%d1			# hi([2]) in lo d1
-	swap		%d2			# hi([3]) in lo d2
-	add.l		%d2,%d1			#    [4]  + hi([2])
-	add.l		%d3,%d1			#    [4]  + hi([3])
-
-# now, grab the condition codes. only one that can be set is 'N'.
-# 'N' CAN be set if the operation is unsigned if bit 63 is set.
-	mov.w		MUL64_CC(%a6),%d4
-	andi.b		&0x10,%d4		# keep old 'X' bit
-	tst.l		%d1			# may set 'N' bit
-	bpl.b		mulu64_ddone
-	ori.b		&0x8,%d4		# set 'N' bit
-mulu64_ddone:
-	mov.w		%d4,%cc
-
-# here, the result is in d1 and d0. the current strategy is to save
-# the values at the location pointed to by a0.
-# use movm here to not disturb the condition codes.
-mulu64_end:
-	exg		%d1,%d0
-	movm.l		&0x0003,([0x10,%a6])		# save result
-
-# EPILOGUE BEGIN ########################################################
-#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
-	movm.l		(%sp)+,&0x001c		# restore d2-d4
-	unlk		%a6
-# EPILOGUE END ##########################################################
-
-	rts
-
-# one or both of the operands is zero so the result is also zero.
-# save the zero result to the register file and set the 'Z' ccode bit.
-mulu64_zero:
-	clr.l		%d0
-	clr.l		%d1
-
-	mov.w		MUL64_CC(%a6),%d4
-	andi.b		&0x10,%d4
-	ori.b		&0x4,%d4
-	mov.w		%d4,%cc			# set 'Z' ccode bit
-
-	bra.b		mulu64_end
-
-##########
-# muls.l #
-##########
-	global		_060LSP__imuls64_
-_060LSP__imuls64_:
-
-# PROLOGUE BEGIN ########################################################
-	link.w		%a6,&-4
-	movm.l		&0x3c00,-(%sp)		# save d2-d5
-#	fmovm.l		&0x0,-(%sp)		# save no fpregs
-# PROLOGUE END ##########################################################
-
-	mov.w		%cc,MUL64_CC(%a6)	# save incoming ccodes
-
-	mov.l		0x8(%a6),%d0		# store multiplier in d0
-	beq.b		mulu64_zero		# handle zero separately
-
-	mov.l		0xc(%a6),%d1		# get multiplicand in d1
-	beq.b		mulu64_zero		# handle zero separately
-
-	clr.b		%d5			# clear sign tag
-	tst.l		%d0			# is multiplier negative?
-	bge.b		muls64_chk_md_sgn	# no
-	neg.l		%d0			# make multiplier positive
-
-	ori.b		&0x1,%d5		# save multiplier sgn
-
-# the result sign is the exclusive or of the operand sign bits.
-muls64_chk_md_sgn:
-	tst.l		%d1			# is multiplicand negative?
-	bge.b		muls64_alg		# no
-	neg.l		%d1			# make multiplicand positive
-
-	eori.b		&0x1,%d5		# calculate correct sign
-
-#########################################################################
-#	63			   32				0	#
-#	----------------------------					#
-#	| hi(mplier) * hi(mplicand)|					#
-#	----------------------------					#
-#		     -----------------------------			#
-#		     | hi(mplier) * lo(mplicand) |			#
-#		     -----------------------------			#
-#		     -----------------------------			#
-#		     | lo(mplier) * hi(mplicand) |			#
-#		     -----------------------------			#
-#	  |			   -----------------------------	#
-#	--|--			   | lo(mplier) * lo(mplicand) |	#
-#	  |			   -----------------------------	#
-#	========================================================	#
-#	--------------------------------------------------------	#
-#	|	hi(result)	   |	    lo(result)         |	#
-#	--------------------------------------------------------	#
-#########################################################################
-muls64_alg:
-# load temp registers with operands
-	mov.l		%d0,%d2			# mr in d2
-	mov.l		%d0,%d3			# mr in d3
-	mov.l		%d1,%d4			# md in d4
-	swap		%d3			# hi(mr) in lo d3
-	swap		%d4			# hi(md) in lo d4
-
-# complete necessary multiplies:
-	mulu.w		%d1,%d0			# [1] lo(mr) * lo(md)
-	mulu.w		%d3,%d1			# [2] hi(mr) * lo(md)
-	mulu.w		%d4,%d2			# [3] lo(mr) * hi(md)
-	mulu.w		%d4,%d3			# [4] hi(mr) * hi(md)
-
-# add lo portions of [2],[3] to hi portion of [1].
-# add carries produced from these adds to [4].
-# lo([1]) is the final lo 16 bits of the result.
-	clr.l		%d4			# load d4 w/ zero value
-	swap		%d0			# hi([1]) <==> lo([1])
-	add.w		%d1,%d0			# hi([1]) + lo([2])
-	addx.l		%d4,%d3			#    [4]  + carry
-	add.w		%d2,%d0			# hi([1]) + lo([3])
-	addx.l		%d4,%d3			#    [4]  + carry
-	swap		%d0			# lo([1]) <==> hi([1])
-
-# lo portions of [2],[3] have been added in to final result.
-# now, clear lo, put hi in lo reg, and add to [4]
-	clr.w		%d1			# clear lo([2])
-	clr.w		%d2			# clear hi([3])
-	swap		%d1			# hi([2]) in lo d1
-	swap		%d2			# hi([3]) in lo d2
-	add.l		%d2,%d1			#    [4]  + hi([2])
-	add.l		%d3,%d1			#    [4]  + hi([3])
-
-	tst.b		%d5			# should result be signed?
-	beq.b		muls64_done		# no
-
-# result should be a signed negative number.
-# compute 2's complement of the unsigned number:
-#   -negate all bits and add 1
-muls64_neg:
-	not.l		%d0			# negate lo(result) bits
-	not.l		%d1			# negate hi(result) bits
-	addq.l		&1,%d0			# add 1 to lo(result)
-	addx.l		%d4,%d1			# add carry to hi(result)
-
-muls64_done:
-	mov.w		MUL64_CC(%a6),%d4
-	andi.b		&0x10,%d4		# keep old 'X' bit
-	tst.l		%d1			# may set 'N' bit
-	bpl.b		muls64_ddone
-	ori.b		&0x8,%d4		# set 'N' bit
-muls64_ddone:
-	mov.w		%d4,%cc
-
-# here, the result is in d1 and d0. the current strategy is to save
-# the values at the location pointed to by a0.
-# use movm here to not disturb the condition codes.
-muls64_end:
-	exg		%d1,%d0
-	movm.l		&0x0003,([0x10,%a6])	# save result at (a0)
-
-# EPILOGUE BEGIN ########################################################
-#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
-	movm.l		(%sp)+,&0x003c		# restore d2-d5
-	unlk		%a6
-# EPILOGUE END ##########################################################
-
-	rts
-
-# one or both of the operands is zero so the result is also zero.
-# save the zero result to the register file and set the 'Z' ccode bit.
-muls64_zero:
-	clr.l		%d0
-	clr.l		%d1
-
-	mov.w		MUL64_CC(%a6),%d4
-	andi.b		&0x10,%d4
-	ori.b		&0x4,%d4
-	mov.w		%d4,%cc			# set 'Z' ccode bit
-
-	bra.b		muls64_end
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>".			#
-#	_060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>".			#
-#	_060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>".			#
-#	_060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>".			#
-#	_060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>".			#
-#	_060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>".			#
-#									#
-#	This is the library version which is accessed as a subroutine	#
-#	and therefore does not work exactly like the 680X0 "cmp2"	#
-#	instruction.							#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	0x4(sp) = Rn							#
-#	0x8(sp) = pointer to boundary pair				#
-#									#
-# OUTPUT **************************************************************	#
-#	cc = condition codes are set correctly				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	In the interest of simplicity, all operands are converted to	#
-# longword size whether the operation is byte, word, or long. The	#
-# bounds are sign extended accordingly. If Rn is a data register, Rn is #
-# also sign extended. If Rn is an address register, it need not be sign #
-# extended since the full register is always used.			#
-#	The condition codes are set correctly before the final "rts".	#
-#									#
-#########################################################################
-
-set	CMP2_CC,	-4
-
-	global		_060LSP__cmp2_Ab_
-_060LSP__cmp2_Ab_:
-
-# PROLOGUE BEGIN ########################################################
-	link.w		%a6,&-4
-	movm.l		&0x3800,-(%sp)		# save d2-d4
-#	fmovm.l		&0x0,-(%sp)		# save no fpregs
-# PROLOGUE END ##########################################################
-
-	mov.w		%cc,CMP2_CC(%a6)
-	mov.l		0x8(%a6), %d2		# get regval
-
-	mov.b		([0xc,%a6],0x0),%d0
-	mov.b		([0xc,%a6],0x1),%d1
-
-	extb.l		%d0			# sign extend lo bnd
-	extb.l		%d1			# sign extend hi bnd
-	bra.w		l_cmp2_cmp		# go do the compare emulation
-
-	global		_060LSP__cmp2_Aw_
-_060LSP__cmp2_Aw_:
-
-# PROLOGUE BEGIN ########################################################
-	link.w		%a6,&-4
-	movm.l		&0x3800,-(%sp)		# save d2-d4
-#	fmovm.l		&0x0,-(%sp)		# save no fpregs
-# PROLOGUE END ##########################################################
-
-	mov.w		%cc,CMP2_CC(%a6)
-	mov.l		0x8(%a6), %d2		# get regval
-
-	mov.w		([0xc,%a6],0x0),%d0
-	mov.w		([0xc,%a6],0x2),%d1
-
-	ext.l		%d0			# sign extend lo bnd
-	ext.l		%d1			# sign extend hi bnd
-	bra.w		l_cmp2_cmp		# go do the compare emulation
-
-	global		_060LSP__cmp2_Al_
-_060LSP__cmp2_Al_:
-
-# PROLOGUE BEGIN ########################################################
-	link.w		%a6,&-4
-	movm.l		&0x3800,-(%sp)		# save d2-d4
-#	fmovm.l		&0x0,-(%sp)		# save no fpregs
-# PROLOGUE END ##########################################################
-
-	mov.w		%cc,CMP2_CC(%a6)
-	mov.l		0x8(%a6), %d2		# get regval
-
-	mov.l		([0xc,%a6],0x0),%d0
-	mov.l		([0xc,%a6],0x4),%d1
-	bra.w		l_cmp2_cmp		# go do the compare emulation
-
-	global		_060LSP__cmp2_Db_
-_060LSP__cmp2_Db_:
-
-# PROLOGUE BEGIN ########################################################
-	link.w		%a6,&-4
-	movm.l		&0x3800,-(%sp)		# save d2-d4
-#	fmovm.l		&0x0,-(%sp)		# save no fpregs
-# PROLOGUE END ##########################################################
-
-	mov.w		%cc,CMP2_CC(%a6)
-	mov.l		0x8(%a6), %d2		# get regval
-
-	mov.b		([0xc,%a6],0x0),%d0
-	mov.b		([0xc,%a6],0x1),%d1
-
-	extb.l		%d0			# sign extend lo bnd
-	extb.l		%d1			# sign extend hi bnd
-
-# operation is a data register compare.
-# sign extend byte to long so we can do simple longword compares.
-	extb.l		%d2			# sign extend data byte
-	bra.w		l_cmp2_cmp		# go do the compare emulation
-
-	global		_060LSP__cmp2_Dw_
-_060LSP__cmp2_Dw_:
-
-# PROLOGUE BEGIN ########################################################
-	link.w		%a6,&-4
-	movm.l		&0x3800,-(%sp)		# save d2-d4
-#	fmovm.l		&0x0,-(%sp)		# save no fpregs
-# PROLOGUE END ##########################################################
-
-	mov.w		%cc,CMP2_CC(%a6)
-	mov.l		0x8(%a6), %d2		# get regval
-
-	mov.w		([0xc,%a6],0x0),%d0
-	mov.w		([0xc,%a6],0x2),%d1
-
-	ext.l		%d0			# sign extend lo bnd
-	ext.l		%d1			# sign extend hi bnd
-
-# operation is a data register compare.
-# sign extend word to long so we can do simple longword compares.
-	ext.l		%d2			# sign extend data word
-	bra.w		l_cmp2_cmp		# go emulate compare
-
-	global		_060LSP__cmp2_Dl_
-_060LSP__cmp2_Dl_:
-
-# PROLOGUE BEGIN ########################################################
-	link.w		%a6,&-4
-	movm.l		&0x3800,-(%sp)		# save d2-d4
-#	fmovm.l		&0x0,-(%sp)		# save no fpregs
-# PROLOGUE END ##########################################################
-
-	mov.w		%cc,CMP2_CC(%a6)
-	mov.l		0x8(%a6), %d2		# get regval
-
-	mov.l		([0xc,%a6],0x0),%d0
-	mov.l		([0xc,%a6],0x4),%d1
-
-#
-# To set the ccodes correctly:
-#	(1) save 'Z' bit from (Rn - lo)
-#	(2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
-#	(3) keep 'X', 'N', and 'V' from before instruction
-#	(4) combine ccodes
-#
-l_cmp2_cmp:
-	sub.l		%d0, %d2		# (Rn - lo)
-	mov.w		%cc, %d3		# fetch resulting ccodes
-	andi.b		&0x4, %d3		# keep 'Z' bit
-	sub.l		%d0, %d1		# (hi - lo)
-	cmp.l		%d1,%d2			# ((hi - lo) - (Rn - hi))
-
-	mov.w		%cc, %d4		# fetch resulting ccodes
-	or.b		%d4, %d3		# combine w/ earlier ccodes
-	andi.b		&0x5, %d3		# keep 'Z' and 'N'
-
-	mov.w		CMP2_CC(%a6), %d4	# fetch old ccodes
-	andi.b		&0x1a, %d4		# keep 'X','N','V' bits
-	or.b		%d3, %d4		# insert new ccodes
-	mov.w		%d4,%cc			# save new ccodes
-
-# EPILOGUE BEGIN ########################################################
-#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
-	movm.l		(%sp)+,&0x001c		# restore d2-d4
-	unlk		%a6
-# EPILOGUE END ##########################################################
-
-	rts
diff --git a/arch/m68k/ifpsp060/src/isp.S b/arch/m68k/ifpsp060/src/isp.S
deleted file mode 100644
index 29a9f8629b9dc9cb9a0b22c7505af02d0687c4f5..0000000000000000000000000000000000000000
--- a/arch/m68k/ifpsp060/src/isp.S
+++ /dev/null
@@ -1,4299 +0,0 @@
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
-M68000 Hi-Performance Microprocessor Division
-M68060 Software Package
-Production Release P1.00 -- October 10, 1994
-
-M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
-
-THE SOFTWARE is provided on an "AS IS" basis and without warranty.
-To the maximum extent permitted by applicable law,
-MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
-INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
-and any warranty against infringement with regard to the SOFTWARE
-(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
-
-To the maximum extent permitted by applicable law,
-IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
-(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
-BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
-ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
-Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
-
-You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
-so long as this entire notice is retained without alteration in any modified and/or
-redistributed versions, and that such modified versions are clearly identified as such.
-No licenses are granted by implication, estoppel or otherwise under any patents
-or trademarks of Motorola, Inc.
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-# ireal.s:
-#	This file is appended to the top of the 060ISP package
-# and contains the entry points into the package. The user, in
-# effect, branches to one of the branch table entries located
-# after _060ISP_TABLE.
-#	Also, subroutine stubs exist in this file (_isp_done for
-# example) that are referenced by the ISP package itself in order
-# to call a given routine. The stub routine actually performs the
-# callout. The ISP code does a "bsr" to the stub routine. This
-# extra layer of hierarchy adds a slight performance penalty but
-# it makes the ISP code easier to read and more mainatinable.
-#
-
-set	_off_chk,	0x00
-set	_off_divbyzero,	0x04
-set	_off_trace,	0x08
-set	_off_access,	0x0c
-set	_off_done,	0x10
-
-set	_off_cas,	0x14
-set	_off_cas2,	0x18
-set	_off_lock,	0x1c
-set	_off_unlock,	0x20
-
-set	_off_imr,	0x40
-set	_off_dmr,	0x44
-set	_off_dmw,	0x48
-set	_off_irw,	0x4c
-set	_off_irl,	0x50
-set	_off_drb,	0x54
-set	_off_drw,	0x58
-set	_off_drl,	0x5c
-set	_off_dwb,	0x60
-set	_off_dww,	0x64
-set	_off_dwl,	0x68
-
-_060ISP_TABLE:
-
-# Here's the table of ENTRY POINTS for those linking the package.
-	bra.l		_isp_unimp
-	short		0x0000
-
-	bra.l		_isp_cas
-	short		0x0000
-
-	bra.l		_isp_cas2
-	short		0x0000
-
-	bra.l		_isp_cas_finish
-	short		0x0000
-
-	bra.l		_isp_cas2_finish
-	short		0x0000
-
-	bra.l		_isp_cas_inrange
-	short		0x0000
-
-	bra.l		_isp_cas_terminate
-	short		0x0000
-
-	bra.l		_isp_cas_restart
-	short		0x0000
-
-	space		64
-
-#############################################################
-
-	global		_real_chk
-_real_chk:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_chk,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_divbyzero
-_real_divbyzero:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_divbyzero,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_trace
-_real_trace:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_trace,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_access
-_real_access:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_access,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_isp_done
-_isp_done:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_done,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-#######################################
-
-	global		_real_cas
-_real_cas:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_cas,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_cas2
-_real_cas2:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_cas2,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_lock_page
-_real_lock_page:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_lock,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_unlock_page
-_real_unlock_page:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_unlock,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-#######################################
-
-	global		_imem_read
-_imem_read:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_imr,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_read
-_dmem_read:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_dmr,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_write
-_dmem_write:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_dmw,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_imem_read_word
-_imem_read_word:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_irw,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_imem_read_long
-_imem_read_long:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_irl,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_read_byte
-_dmem_read_byte:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_drb,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_read_word
-_dmem_read_word:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_drw,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_read_long
-_dmem_read_long:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_drl,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_write_byte
-_dmem_write_byte:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_dwb,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_write_word
-_dmem_write_word:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_dww,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_write_long
-_dmem_write_long:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060ISP_TABLE-0x80+_off_dwl,%pc),%d0
-	pea.l		(_060ISP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-#
-# This file contains a set of define statements for constants
-# in oreder to promote readability within the core code itself.
-#
-
-set LOCAL_SIZE,		96			# stack frame size(bytes)
-set LV,			-LOCAL_SIZE		# stack offset
-
-set EXC_ISR,		0x4			# stack status register
-set EXC_IPC,		0x6			# stack pc
-set EXC_IVOFF,		0xa			# stacked vector offset
-
-set EXC_AREGS,		LV+64			# offset of all address regs
-set EXC_DREGS,		LV+32			# offset of all data regs
-
-set EXC_A7,		EXC_AREGS+(7*4)		# offset of a7
-set EXC_A6,		EXC_AREGS+(6*4)		# offset of a6
-set EXC_A5,		EXC_AREGS+(5*4)		# offset of a5
-set EXC_A4,		EXC_AREGS+(4*4)		# offset of a4
-set EXC_A3,		EXC_AREGS+(3*4)		# offset of a3
-set EXC_A2,		EXC_AREGS+(2*4)		# offset of a2
-set EXC_A1,		EXC_AREGS+(1*4)		# offset of a1
-set EXC_A0,		EXC_AREGS+(0*4)		# offset of a0
-set EXC_D7,		EXC_DREGS+(7*4)		# offset of d7
-set EXC_D6,		EXC_DREGS+(6*4)		# offset of d6
-set EXC_D5,		EXC_DREGS+(5*4)		# offset of d5
-set EXC_D4,		EXC_DREGS+(4*4)		# offset of d4
-set EXC_D3,		EXC_DREGS+(3*4)		# offset of d3
-set EXC_D2,		EXC_DREGS+(2*4)		# offset of d2
-set EXC_D1,		EXC_DREGS+(1*4)		# offset of d1
-set EXC_D0,		EXC_DREGS+(0*4)		# offset of d0
-
-set EXC_TEMP,		LV+16			# offset of temp stack space
-
-set EXC_SAVVAL,		LV+12			# offset of old areg value
-set EXC_SAVREG,		LV+11			# offset of old areg index
-
-set SPCOND_FLG,		LV+10			# offset of spc condition flg
-
-set EXC_CC,		LV+8			# offset of cc register
-set EXC_EXTWPTR,	LV+4			# offset of current PC
-set EXC_EXTWORD,	LV+2			# offset of current ext opword
-set EXC_OPWORD,		LV+0			# offset of current opword
-
-###########################
-# SPecial CONDition FLaGs #
-###########################
-set mia7_flg,		0x04			# (a7)+ flag
-set mda7_flg,		0x08			# -(a7) flag
-set ichk_flg,		0x10			# chk exception flag
-set idbyz_flg,		0x20			# divbyzero flag
-set restore_flg,	0x40			# restore -(an)+ flag
-set immed_flg,		0x80			# immediate data flag
-
-set mia7_bit,		0x2			# (a7)+ bit
-set mda7_bit,		0x3			# -(a7) bit
-set ichk_bit,		0x4			# chk exception bit
-set idbyz_bit,		0x5			# divbyzero bit
-set restore_bit,	0x6			# restore -(a7)+ bit
-set immed_bit,		0x7			# immediate data bit
-
-#########
-# Misc. #
-#########
-set BYTE,		1			# len(byte) == 1 byte
-set WORD,		2			# len(word) == 2 bytes
-set LONG,		4			# len(longword) == 4 bytes
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_isp_unimp(): 060ISP entry point for Unimplemented Instruction	#
-#									#
-#	This handler should be the first code executed upon taking the	#
-#	"Unimplemented Integer Instruction" exception in an operating	#
-#	system.								#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_{word,long}() - read instruction word/longword	#
-#	_mul64() - emulate 64-bit multiply				#
-#	_div64() - emulate 64-bit divide				#
-#	_moveperipheral() - emulate "movep"				#
-#	_compandset() - emulate misaligned "cas"			#
-#	_compandset2() - emulate "cas2"					#
-#	_chk2_cmp2() - emulate "cmp2" and "chk2"			#
-#	_isp_done() - "callout" for normal final exit			#
-#	_real_trace() - "callout" for Trace exception			#
-#	_real_chk() - "callout" for Chk exception			#
-#	_real_divbyzero() - "callout" for DZ exception			#
-#	_real_access() - "callout" for access error exception		#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the Unimp Int Instr stack frame	#
-#									#
-# OUTPUT **************************************************************	#
-#	If Trace exception:						#
-#	- The system stack changed to contain Trace exc stack frame	#
-#	If Chk exception:						#
-#	- The system stack changed to contain Chk exc stack frame	#
-#	If DZ exception:						#
-#	- The system stack changed to contain DZ exc stack frame	#
-#	If access error exception:					#
-#	- The system stack changed to contain access err exc stk frame	#
-#	Else:								#
-#	- Results saved as appropriate					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	This handler fetches the first instruction longword from	#
-# memory and decodes it to determine which of the unimplemented		#
-# integer instructions caused this exception. This handler then calls	#
-# one of _mul64(), _div64(), _moveperipheral(), _compandset(),		#
-# _compandset2(), or _chk2_cmp2() as appropriate.			#
-#	Some of these instructions, by their nature, may produce other	#
-# types of exceptions. "div" can produce a divide-by-zero exception,	#
-# and "chk2" can cause a "Chk" exception. In both cases, the current	#
-# exception stack frame must be converted to an exception stack frame	#
-# of the correct exception type and an exit must be made through	#
-# _real_divbyzero() or _real_chk() as appropriate. In addition, all	#
-# instructions may be executing while Trace is enabled. If so, then	#
-# a Trace exception stack frame must be created and an exit made	#
-# through _real_trace().						#
-#	Meanwhile, if any read or write to memory using the		#
-# _mem_{read,write}() "callout"s returns a failing value, then an	#
-# access error frame must be created and an exit made through		#
-# _real_access().							#
-#	If none of these occur, then a normal exit is made through	#
-# _isp_done().								#
-#									#
-#	This handler, upon entry, saves almost all user-visible		#
-# address and data registers to the stack. Although this may seem to	#
-# cause excess memory traffic, it was found that due to having to	#
-# access these register files for things like data retrieval and <ea>	#
-# calculations, it was more efficient to have them on the stack where	#
-# they could be accessed by indexing rather than to make subroutine	#
-# calls to retrieve a register of a particular index.			#
-#									#
-#########################################################################
-
-	global		_isp_unimp
-_isp_unimp:
-	link.w		%a6,&-LOCAL_SIZE	# create room for stack frame
-
-	movm.l		&0x3fff,EXC_DREGS(%a6)	# store d0-d7/a0-a5
-	mov.l		(%a6),EXC_A6(%a6)	# store a6
-
-	btst		&0x5,EXC_ISR(%a6)	# from s or u mode?
-	bne.b		uieh_s			# supervisor mode
-uieh_u:
-	mov.l		%usp,%a0		# fetch user stack pointer
-	mov.l		%a0,EXC_A7(%a6)		# store a7
-	bra.b		uieh_cont
-uieh_s:
-	lea		0xc(%a6),%a0
-	mov.l		%a0,EXC_A7(%a6)		# store corrected sp
-
-###############################################################################
-
-uieh_cont:
-	clr.b		SPCOND_FLG(%a6)		# clear "special case" flag
-
-	mov.w		EXC_ISR(%a6),EXC_CC(%a6) # store cc copy on stack
-	mov.l		EXC_IPC(%a6),EXC_EXTWPTR(%a6) # store extwptr on stack
-
-#
-# fetch the opword and first extension word pointed to by the stacked pc
-# and store them to the stack for now
-#
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch opword & extword
-	mov.l		%d0,EXC_OPWORD(%a6)	# store extword on stack
-
-
-#########################################################################
-# muls.l	0100 1100 00 |<ea>|	0*** 1100 0000 0***		#
-# mulu.l	0100 1100 00 |<ea>|	0*** 0100 0000 0***		#
-#									#
-# divs.l	0100 1100 01 |<ea>|	0*** 1100 0000 0***		#
-# divu.l	0100 1100 01 |<ea>|	0*** 0100 0000 0***		#
-#									#
-# movep.w m2r	0000 ***1 00 001***	| <displacement>  |		#
-# movep.l m2r	0000 ***1 01 001***	| <displacement>  |		#
-# movep.w r2m	0000 ***1 10 001***	| <displacement>  |		#
-# movep.l r2m	0000 ***1 11 001***	| <displacement>  |		#
-#									#
-# cas.w		0000 1100 11 |<ea>|	0000 000* **00 0***		#
-# cas.l		0000 1110 11 |<ea>|	0000 000* **00 0***		#
-#									#
-# cas2.w	0000 1100 11 111100	**** 000* **00 0***		#
-#					**** 000* **00 0***		#
-# cas2.l	0000 1110 11 111100	**** 000* **00 0***		#
-#					**** 000* **00 0***		#
-#									#
-# chk2.b	0000 0000 11 |<ea>|	**** 1000 0000 0000		#
-# chk2.w	0000 0010 11 |<ea>|	**** 1000 0000 0000		#
-# chk2.l	0000 0100 11 |<ea>|	**** 1000 0000 0000		#
-#									#
-# cmp2.b	0000 0000 11 |<ea>|	**** 0000 0000 0000		#
-# cmp2.w	0000 0010 11 |<ea>|	**** 0000 0000 0000		#
-# cmp2.l	0000 0100 11 |<ea>|	**** 0000 0000 0000		#
-#########################################################################
-
-#
-# using bit 14 of the operation word, separate into 2 groups:
-# (group1) mul64, div64
-# (group2) movep, chk2, cmp2, cas2, cas
-#
-	btst		&0x1e,%d0		# group1 or group2
-	beq.b		uieh_group2		# go handle group2
-
-#
-# now, w/ group1, make mul64's decode the fastest since it will
-# most likely be used the most.
-#
-uieh_group1:
-	btst		&0x16,%d0		# test for div64
-	bne.b		uieh_div64		# go handle div64
-
-uieh_mul64:
-# mul64() may use ()+ addressing and may, therefore, alter a7
-
-	bsr.l		_mul64			# _mul64()
-
-	btst		&0x5,EXC_ISR(%a6)	# supervisor mode?
-	beq.w		uieh_done
-	btst		&mia7_bit,SPCOND_FLG(%a6) # was a7 changed?
-	beq.w		uieh_done		# no
-	btst		&0x7,EXC_ISR(%a6)	# is trace enabled?
-	bne.w		uieh_trace_a7		# yes
-	bra.w		uieh_a7			# no
-
-uieh_div64:
-# div64() may use ()+ addressing and may, therefore, alter a7.
-# div64() may take a divide by zero exception.
-
-	bsr.l		_div64			# _div64()
-
-# here, we sort out all of the special cases that may have happened.
-	btst		&mia7_bit,SPCOND_FLG(%a6) # was a7 changed?
-	bne.b		uieh_div64_a7		# yes
-uieh_div64_dbyz:
-	btst		&idbyz_bit,SPCOND_FLG(%a6) # did divide-by-zero occur?
-	bne.w		uieh_divbyzero		# yes
-	bra.w		uieh_done		# no
-uieh_div64_a7:
-	btst		&0x5,EXC_ISR(%a6)	# supervisor mode?
-	beq.b		uieh_div64_dbyz		# no
-# here, a7 has been incremented by 4 bytes in supervisor mode. we still
-# may have the following 3 cases:
-#	(i)	(a7)+
-#	(ii)	(a7)+; trace
-#	(iii)	(a7)+; divide-by-zero
-#
-	btst		&idbyz_bit,SPCOND_FLG(%a6) # did divide-by-zero occur?
-	bne.w		uieh_divbyzero_a7	# yes
-	tst.b		EXC_ISR(%a6)		# no; is trace enabled?
-	bmi.w		uieh_trace_a7		# yes
-	bra.w		uieh_a7			# no
-
-#
-# now, w/ group2, make movep's decode the fastest since it will
-# most likely be used the most.
-#
-uieh_group2:
-	btst		&0x18,%d0		# test for not movep
-	beq.b		uieh_not_movep
-
-
-	bsr.l		_moveperipheral		# _movep()
-	bra.w		uieh_done
-
-uieh_not_movep:
-	btst		&0x1b,%d0		# test for chk2,cmp2
-	beq.b		uieh_chk2cmp2		# go handle chk2,cmp2
-
-	swap		%d0			# put opword in lo word
-	cmpi.b		%d0,&0xfc		# test for cas2
-	beq.b		uieh_cas2		# go handle cas2
-
-uieh_cas:
-
-	bsr.l		_compandset		# _cas()
-
-# the cases of "cas Dc,Du,(a7)+" and "cas Dc,Du,-(a7)" used from supervisor
-# mode are simply not considered valid and therefore are not handled.
-
-	bra.w		uieh_done
-
-uieh_cas2:
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word		# read extension word
-
-	tst.l		%d1			# ifetch error?
-	bne.w		isp_iacc		# yes
-
-	bsr.l		_compandset2		# _cas2()
-	bra.w		uieh_done
-
-uieh_chk2cmp2:
-# chk2 may take a chk exception
-
-	bsr.l		_chk2_cmp2		# _chk2_cmp2()
-
-# here we check to see if a chk trap should be taken
-	cmpi.b		SPCOND_FLG(%a6),&ichk_flg
-	bne.w		uieh_done
-	bra.b		uieh_chk_trap
-
-###########################################################################
-
-#
-# the required emulation has been completed. now, clean up the necessary stack
-# info and prepare for rte
-#
-uieh_done:
-	mov.b		EXC_CC+1(%a6),EXC_ISR+1(%a6) # insert new ccodes
-
-# if exception occurred in user mode, then we have to restore a7 in case it
-# changed. we don't have to update a7  for supervisor mose because that case
-# doesn't flow through here
-	btst		&0x5,EXC_ISR(%a6)	# user or supervisor?
-	bne.b		uieh_finish		# supervisor
-
-	mov.l		EXC_A7(%a6),%a0		# fetch user stack pointer
-	mov.l		%a0,%usp		# restore it
-
-uieh_finish:
-	movm.l		EXC_DREGS(%a6),&0x3fff	# restore d0-d7/a0-a5
-
-	btst		&0x7,EXC_ISR(%a6)	# is trace mode on?
-	bne.b		uieh_trace		# yes;go handle trace mode
-
-	mov.l		EXC_EXTWPTR(%a6),EXC_IPC(%a6) # new pc on stack frame
-	mov.l		EXC_A6(%a6),(%a6)	# prepare new a6 for unlink
-	unlk		%a6			# unlink stack frame
-	bra.l		_isp_done
-
-#
-# The instruction that was just emulated was also being traced. The trace
-# trap for this instruction will be lost unless we jump to the trace handler.
-# So, here we create a Trace Exception format number two exception stack
-# frame from the Unimplemented Integer Intruction Exception stack frame
-# format number zero and jump to the user supplied hook "_real_trace()".
-#
-#		   UIEH FRAME		   TRACE FRAME
-#		*****************	*****************
-#		* 0x0 *  0x0f4	*	*    Current	*
-#		*****************	*      PC	*
-#		*    Current	*	*****************
-#		*      PC	*	* 0x2 *  0x024	*
-#		*****************	*****************
-#		*      SR	*	*     Next	*
-#		*****************	*      PC	*
-#	      ->*     Old	*	*****************
-#  from link -->*      A6	*	*      SR	*
-#	        *****************	*****************
-#	       /*      A7	*	*      New	* <-- for final unlink
-#	      / *		*	*      A6	*
-# link frame <  *****************	*****************
-#	      \ ~		~	~		~
-#	       \*****************	*****************
-#
-uieh_trace:
-	mov.l		EXC_A6(%a6),-0x4(%a6)
-	mov.w		EXC_ISR(%a6),0x0(%a6)
-	mov.l		EXC_IPC(%a6),0x8(%a6)
-	mov.l		EXC_EXTWPTR(%a6),0x2(%a6)
-	mov.w		&0x2024,0x6(%a6)
-	sub.l		&0x4,%a6
-	unlk		%a6
-	bra.l		_real_trace
-
-#
-#	   UIEH FRAME		    CHK FRAME
-#	*****************	*****************
-#	* 0x0 *  0x0f4	*	*    Current	*
-#	*****************	*      PC	*
-#	*    Current	*	*****************
-#	*      PC	*	* 0x2 *  0x018	*
-#	*****************	*****************
-#	*      SR	*	*     Next	*
-#	*****************	*      PC	*
-#	    (4 words)		*****************
-#				*      SR	*
-#				*****************
-#				    (6 words)
-#
-# the chk2 instruction should take a chk trap. so, here we must create a
-# chk stack frame from an unimplemented integer instruction exception frame
-# and jump to the user supplied entry point "_real_chk()".
-#
-uieh_chk_trap:
-	mov.b		EXC_CC+1(%a6),EXC_ISR+1(%a6) # insert new ccodes
-	movm.l		EXC_DREGS(%a6),&0x3fff	# restore d0-d7/a0-a5
-
-	mov.w		EXC_ISR(%a6),(%a6)	# put new SR on stack
-	mov.l		EXC_IPC(%a6),0x8(%a6)	# put "Current PC" on stack
-	mov.l		EXC_EXTWPTR(%a6),0x2(%a6) # put "Next PC" on stack
-	mov.w		&0x2018,0x6(%a6)	# put Vector Offset on stack
-
-	mov.l		EXC_A6(%a6),%a6		# restore a6
-	add.l		&LOCAL_SIZE,%sp		# clear stack frame
-
-	bra.l		_real_chk
-
-#
-#	   UIEH FRAME		 DIVBYZERO FRAME
-#	*****************	*****************
-#	* 0x0 *  0x0f4	*	*    Current	*
-#	*****************	*      PC	*
-#	*    Current	*	*****************
-#	*      PC	*	* 0x2 *  0x014	*
-#	*****************	*****************
-#	*      SR	*	*     Next	*
-#	*****************	*      PC	*
-#	    (4 words)		*****************
-#				*      SR	*
-#				*****************
-#				    (6 words)
-#
-# the divide instruction should take an integer divide by zero trap. so, here
-# we must create a divbyzero stack frame from an unimplemented integer
-# instruction exception frame and jump to the user supplied entry point
-# "_real_divbyzero()".
-#
-uieh_divbyzero:
-	mov.b		EXC_CC+1(%a6),EXC_ISR+1(%a6) # insert new ccodes
-	movm.l		EXC_DREGS(%a6),&0x3fff	# restore d0-d7/a0-a5
-
-	mov.w		EXC_ISR(%a6),(%a6)	# put new SR on stack
-	mov.l		EXC_IPC(%a6),0x8(%a6)	# put "Current PC" on stack
-	mov.l		EXC_EXTWPTR(%a6),0x2(%a6) # put "Next PC" on stack
-	mov.w		&0x2014,0x6(%a6)	# put Vector Offset on stack
-
-	mov.l		EXC_A6(%a6),%a6		# restore a6
-	add.l		&LOCAL_SIZE,%sp		# clear stack frame
-
-	bra.l		_real_divbyzero
-
-#
-#				 DIVBYZERO FRAME
-#				*****************
-#				*    Current	*
-#	   UIEH FRAME		*      PC	*
-#	*****************	*****************
-#	* 0x0 *  0x0f4	*	* 0x2 * 0x014	*
-#	*****************	*****************
-#	*    Current	*	*     Next	*
-#	*      PC	*	*      PC	*
-#	*****************	*****************
-#	*      SR	*	*      SR	*
-#	*****************	*****************
-#	    (4 words)		    (6 words)
-#
-# the divide instruction should take an integer divide by zero trap. so, here
-# we must create a divbyzero stack frame from an unimplemented integer
-# instruction exception frame and jump to the user supplied entry point
-# "_real_divbyzero()".
-#
-# However, we must also deal with the fact that (a7)+ was used from supervisor
-# mode, thereby shifting the stack frame up 4 bytes.
-#
-uieh_divbyzero_a7:
-	mov.b		EXC_CC+1(%a6),EXC_ISR+1(%a6) # insert new ccodes
-	movm.l		EXC_DREGS(%a6),&0x3fff	# restore d0-d7/a0-a5
-
-	mov.l		EXC_IPC(%a6),0xc(%a6)	# put "Current PC" on stack
-	mov.w		&0x2014,0xa(%a6)	# put Vector Offset on stack
-	mov.l		EXC_EXTWPTR(%a6),0x6(%a6) # put "Next PC" on stack
-
-	mov.l		EXC_A6(%a6),%a6		# restore a6
-	add.l		&4+LOCAL_SIZE,%sp	# clear stack frame
-
-	bra.l		_real_divbyzero
-
-#
-#				   TRACE FRAME
-#				*****************
-#				*    Current	*
-#	   UIEH FRAME		*      PC	*
-#	*****************	*****************
-#	* 0x0 *  0x0f4	*	* 0x2 * 0x024	*
-#	*****************	*****************
-#	*    Current	*	*     Next	*
-#	*      PC	*	*      PC	*
-#	*****************	*****************
-#	*      SR	*	*      SR	*
-#	*****************	*****************
-#	    (4 words)		    (6 words)
-#
-#
-# The instruction that was just emulated was also being traced. The trace
-# trap for this instruction will be lost unless we jump to the trace handler.
-# So, here we create a Trace Exception format number two exception stack
-# frame from the Unimplemented Integer Intruction Exception stack frame
-# format number zero and jump to the user supplied hook "_real_trace()".
-#
-# However, we must also deal with the fact that (a7)+ was used from supervisor
-# mode, thereby shifting the stack frame up 4 bytes.
-#
-uieh_trace_a7:
-	mov.b		EXC_CC+1(%a6),EXC_ISR+1(%a6) # insert new ccodes
-	movm.l		EXC_DREGS(%a6),&0x3fff	# restore d0-d7/a0-a5
-
-	mov.l		EXC_IPC(%a6),0xc(%a6)	# put "Current PC" on stack
-	mov.w		&0x2024,0xa(%a6)	# put Vector Offset on stack
-	mov.l		EXC_EXTWPTR(%a6),0x6(%a6) # put "Next PC" on stack
-
-	mov.l		EXC_A6(%a6),%a6		# restore a6
-	add.l		&4+LOCAL_SIZE,%sp	# clear stack frame
-
-	bra.l		_real_trace
-
-#
-#				   UIEH FRAME
-#				*****************
-#				* 0x0 * 0x0f4	*
-#	   UIEH FRAME		*****************
-#	*****************	*     Next	*
-#	* 0x0 *  0x0f4	*	*      PC	*
-#	*****************	*****************
-#	*    Current	*	*      SR	*
-#	*      PC	*	*****************
-#	*****************	    (4 words)
-#	*      SR	*
-#	*****************
-#	    (4 words)
-uieh_a7:
-	mov.b		EXC_CC+1(%a6),EXC_ISR+1(%a6) # insert new ccodes
-	movm.l		EXC_DREGS(%a6),&0x3fff	# restore d0-d7/a0-a5
-
-	mov.w		&0x00f4,0xe(%a6)	# put Vector Offset on stack
-	mov.l		EXC_EXTWPTR(%a6),0xa(%a6) # put "Next PC" on stack
-	mov.w		EXC_ISR(%a6),0x8(%a6)	# put SR on stack
-
-	mov.l		EXC_A6(%a6),%a6		# restore a6
-	add.l		&8+LOCAL_SIZE,%sp	# clear stack frame
-	bra.l		_isp_done
-
-##########
-
-# this is the exit point if a data read or write fails.
-# a0 = failing address
-# d0 = fslw
-isp_dacc:
-	mov.l		%a0,(%a6)		# save address
-	mov.l		%d0,-0x4(%a6)		# save partial fslw
-
-	lea		-64(%a6),%sp
-	movm.l		(%sp)+,&0x7fff		# restore d0-d7/a0-a6
-
-	mov.l		0xc(%sp),-(%sp)		# move voff,hi(pc)
-	mov.l		0x4(%sp),0x10(%sp)	# store fslw
-	mov.l		0xc(%sp),0x4(%sp)	# store sr,lo(pc)
-	mov.l		0x8(%sp),0xc(%sp)	# store address
-	mov.l		(%sp)+,0x4(%sp)		# store voff,hi(pc)
-	mov.w		&0x4008,0x6(%sp)	# store new voff
-
-	bra.b		isp_acc_exit
-
-# this is the exit point if an instruction word read fails.
-# FSLW:
-#	misaligned = true
-#	read = true
-#	size = word
-#	instruction = true
-#	software emulation error = true
-isp_iacc:
-	movm.l		EXC_DREGS(%a6),&0x3fff	# restore d0-d7/a0-a5
-	unlk		%a6			# unlink frame
-	sub.w		&0x8,%sp		# make room for acc frame
-	mov.l		0x8(%sp),(%sp)		# store sr,lo(pc)
-	mov.w		0xc(%sp),0x4(%sp)	# store hi(pc)
-	mov.w		&0x4008,0x6(%sp)	# store new voff
-	mov.l		0x2(%sp),0x8(%sp)	# store address (=pc)
-	mov.l		&0x09428001,0xc(%sp)	# store fslw
-
-isp_acc_exit:
-	btst		&0x5,(%sp)		# user or supervisor?
-	beq.b		isp_acc_exit2		# user
-	bset		&0x2,0xd(%sp)		# set supervisor TM bit
-isp_acc_exit2:
-	bra.l		_real_access
-
-# if the addressing mode was (an)+ or -(an), the address register must
-# be restored to its pre-exception value before entering _real_access.
-isp_restore:
-	cmpi.b		SPCOND_FLG(%a6),&restore_flg # do we need a restore?
-	bne.b		isp_restore_done	# no
-	clr.l		%d0
-	mov.b		EXC_SAVREG(%a6),%d0	# regno to restore
-	mov.l		EXC_SAVVAL(%a6),(EXC_AREGS,%a6,%d0.l*4) # restore value
-isp_restore_done:
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_calc_ea(): routine to calculate effective address		#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_word() - read instruction word			#
-#	_imem_read_long() - read instruction longword			#
-#	_dmem_read_long() - read data longword (for memory indirect)	#
-#	isp_iacc() - handle instruction access error exception		#
-#	isp_dacc() - handle data access error exception			#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = number of bytes related to effective address (w,l)		#
-#									#
-# OUTPUT **************************************************************	#
-#	If exiting through isp_dacc...					#
-#		a0 = failing address					#
-#		d0 = FSLW						#
-#	elsif exiting though isp_iacc...				#
-#		none							#
-#	else								#
-#		a0 = effective address					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	The effective address type is decoded from the opword residing	#
-# on the stack. A jump table is used to vector to a routine for the	#
-# appropriate mode. Since none of the emulated integer instructions	#
-# uses byte-sized operands, only handle word and long operations.	#
-#									#
-#	Dn,An	- shouldn't enter here					#
-#	(An)	- fetch An value from stack				#
-#	-(An)	- fetch An value from stack; return decr value;		#
-#		  place decr value on stack; store old value in case of	#
-#		  future access error; if -(a7), set mda7_flg in	#
-#		  SPCOND_FLG						#
-#	(An)+	- fetch An value from stack; return value;		#
-#		  place incr value on stack; store old value in case of	#
-#		  future access error; if (a7)+, set mia7_flg in	#
-#		  SPCOND_FLG						#
-#	(d16,An) - fetch An value from stack; read d16 using		#
-#		  _imem_read_word(); fetch may fail -> branch to	#
-#		  isp_iacc()						#
-#	(xxx).w,(xxx).l - use _imem_read_{word,long}() to fetch		#
-#		  address; fetch may fail				#
-#	#<data> - return address of immediate value; set immed_flg	#
-#		  in SPCOND_FLG						#
-#	(d16,PC) - fetch stacked PC value; read d16 using		#
-#		  _imem_read_word(); fetch may fail -> branch to	#
-#		  isp_iacc()						#
-#	everything else - read needed displacements as appropriate w/	#
-#		  _imem_read_{word,long}(); read may fail; if memory	#
-#		  indirect, read indirect address using			#
-#		  _dmem_read_long() which may also fail			#
-#									#
-#########################################################################
-
-	global		_calc_ea
-_calc_ea:
-	mov.l		%d0,%a0			# move # bytes to a0
-
-# MODE and REG are taken from the EXC_OPWORD.
-	mov.w		EXC_OPWORD(%a6),%d0	# fetch opcode word
-	mov.w		%d0,%d1			# make a copy
-
-	andi.w		&0x3f,%d0		# extract mode field
-	andi.l		&0x7,%d1		# extract reg  field
-
-# jump to the corresponding function for each {MODE,REG} pair.
-	mov.w		(tbl_ea_mode.b,%pc,%d0.w*2), %d0 # fetch jmp distance
-	jmp		(tbl_ea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
-
-	swbeg		&64
-tbl_ea_mode:
-	short		tbl_ea_mode	-	tbl_ea_mode
-	short		tbl_ea_mode	-	tbl_ea_mode
-	short		tbl_ea_mode	-	tbl_ea_mode
-	short		tbl_ea_mode	-	tbl_ea_mode
-	short		tbl_ea_mode	-	tbl_ea_mode
-	short		tbl_ea_mode	-	tbl_ea_mode
-	short		tbl_ea_mode	-	tbl_ea_mode
-	short		tbl_ea_mode	-	tbl_ea_mode
-
-	short		tbl_ea_mode	-	tbl_ea_mode
-	short		tbl_ea_mode	-	tbl_ea_mode
-	short		tbl_ea_mode	-	tbl_ea_mode
-	short		tbl_ea_mode	-	tbl_ea_mode
-	short		tbl_ea_mode	-	tbl_ea_mode
-	short		tbl_ea_mode	-	tbl_ea_mode
-	short		tbl_ea_mode	-	tbl_ea_mode
-	short		tbl_ea_mode	-	tbl_ea_mode
-
-	short		addr_ind_a0	-	tbl_ea_mode
-	short		addr_ind_a1	-	tbl_ea_mode
-	short		addr_ind_a2	-	tbl_ea_mode
-	short		addr_ind_a3	-	tbl_ea_mode
-	short		addr_ind_a4	-	tbl_ea_mode
-	short		addr_ind_a5	-	tbl_ea_mode
-	short		addr_ind_a6	-	tbl_ea_mode
-	short		addr_ind_a7	-	tbl_ea_mode
-
-	short		addr_ind_p_a0	-	tbl_ea_mode
-	short		addr_ind_p_a1	-	tbl_ea_mode
-	short		addr_ind_p_a2	-	tbl_ea_mode
-	short		addr_ind_p_a3	-	tbl_ea_mode
-	short		addr_ind_p_a4	-	tbl_ea_mode
-	short		addr_ind_p_a5	-	tbl_ea_mode
-	short		addr_ind_p_a6	-	tbl_ea_mode
-	short		addr_ind_p_a7	-	tbl_ea_mode
-
-	short		addr_ind_m_a0		-	tbl_ea_mode
-	short		addr_ind_m_a1		-	tbl_ea_mode
-	short		addr_ind_m_a2		-	tbl_ea_mode
-	short		addr_ind_m_a3		-	tbl_ea_mode
-	short		addr_ind_m_a4		-	tbl_ea_mode
-	short		addr_ind_m_a5		-	tbl_ea_mode
-	short		addr_ind_m_a6		-	tbl_ea_mode
-	short		addr_ind_m_a7		-	tbl_ea_mode
-
-	short		addr_ind_disp_a0	-	tbl_ea_mode
-	short		addr_ind_disp_a1	-	tbl_ea_mode
-	short		addr_ind_disp_a2	-	tbl_ea_mode
-	short		addr_ind_disp_a3	-	tbl_ea_mode
-	short		addr_ind_disp_a4	-	tbl_ea_mode
-	short		addr_ind_disp_a5	-	tbl_ea_mode
-	short		addr_ind_disp_a6	-	tbl_ea_mode
-	short		addr_ind_disp_a7	-	tbl_ea_mode
-
-	short		_addr_ind_ext		-	tbl_ea_mode
-	short		_addr_ind_ext		-	tbl_ea_mode
-	short		_addr_ind_ext		-	tbl_ea_mode
-	short		_addr_ind_ext		-	tbl_ea_mode
-	short		_addr_ind_ext		-	tbl_ea_mode
-	short		_addr_ind_ext		-	tbl_ea_mode
-	short		_addr_ind_ext		-	tbl_ea_mode
-	short		_addr_ind_ext		-	tbl_ea_mode
-
-	short		abs_short		-	tbl_ea_mode
-	short		abs_long		-	tbl_ea_mode
-	short		pc_ind			-	tbl_ea_mode
-	short		pc_ind_ext		-	tbl_ea_mode
-	short		immediate		-	tbl_ea_mode
-	short		tbl_ea_mode		-	tbl_ea_mode
-	short		tbl_ea_mode		-	tbl_ea_mode
-	short		tbl_ea_mode		-	tbl_ea_mode
-
-###################################
-# Address register indirect: (An) #
-###################################
-addr_ind_a0:
-	mov.l		EXC_A0(%a6),%a0		# Get current a0
-	rts
-
-addr_ind_a1:
-	mov.l		EXC_A1(%a6),%a0		# Get current a1
-	rts
-
-addr_ind_a2:
-	mov.l		EXC_A2(%a6),%a0		# Get current a2
-	rts
-
-addr_ind_a3:
-	mov.l		EXC_A3(%a6),%a0		# Get current a3
-	rts
-
-addr_ind_a4:
-	mov.l		EXC_A4(%a6),%a0		# Get current a4
-	rts
-
-addr_ind_a5:
-	mov.l		EXC_A5(%a6),%a0		# Get current a5
-	rts
-
-addr_ind_a6:
-	mov.l		EXC_A6(%a6),%a0		# Get current a6
-	rts
-
-addr_ind_a7:
-	mov.l		EXC_A7(%a6),%a0		# Get current a7
-	rts
-
-#####################################################
-# Address register indirect w/ postincrement: (An)+ #
-#####################################################
-addr_ind_p_a0:
-	mov.l		%a0,%d0			# copy no. bytes
-	mov.l		EXC_A0(%a6),%a0		# load current value
-	add.l		%a0,%d0			# increment
-	mov.l		%d0,EXC_A0(%a6)		# save incremented value
-
-	mov.l		%a0,EXC_SAVVAL(%a6)	# save in case of access error
-	mov.b		&0x0,EXC_SAVREG(%a6)	# save regno, too
-	mov.b		&restore_flg,SPCOND_FLG(%a6) # set flag
-	rts
-
-addr_ind_p_a1:
-	mov.l		%a0,%d0			# copy no. bytes
-	mov.l		EXC_A1(%a6),%a0		# load current value
-	add.l		%a0,%d0			# increment
-	mov.l		%d0,EXC_A1(%a6)		# save incremented value
-
-	mov.l		%a0,EXC_SAVVAL(%a6)	# save in case of access error
-	mov.b		&0x1,EXC_SAVREG(%a6)	# save regno, too
-	mov.b		&restore_flg,SPCOND_FLG(%a6) # set flag
-	rts
-
-addr_ind_p_a2:
-	mov.l		%a0,%d0			# copy no. bytes
-	mov.l		EXC_A2(%a6),%a0		# load current value
-	add.l		%a0,%d0			# increment
-	mov.l		%d0,EXC_A2(%a6)		# save incremented value
-
-	mov.l		%a0,EXC_SAVVAL(%a6)	# save in case of access error
-	mov.b		&0x2,EXC_SAVREG(%a6)	# save regno, too
-	mov.b		&restore_flg,SPCOND_FLG(%a6) # set flag
-	rts
-
-addr_ind_p_a3:
-	mov.l		%a0,%d0			# copy no. bytes
-	mov.l		EXC_A3(%a6),%a0		# load current value
-	add.l		%a0,%d0			# increment
-	mov.l		%d0,EXC_A3(%a6)		# save incremented value
-
-	mov.l		%a0,EXC_SAVVAL(%a6)	# save in case of access error
-	mov.b		&0x3,EXC_SAVREG(%a6)	# save regno, too
-	mov.b		&restore_flg,SPCOND_FLG(%a6) # set flag
-	rts
-
-addr_ind_p_a4:
-	mov.l		%a0,%d0			# copy no. bytes
-	mov.l		EXC_A4(%a6),%a0		# load current value
-	add.l		%a0,%d0			# increment
-	mov.l		%d0,EXC_A4(%a6)		# save incremented value
-
-	mov.l		%a0,EXC_SAVVAL(%a6)	# save in case of access error
-	mov.b		&0x4,EXC_SAVREG(%a6)	# save regno, too
-	mov.b		&restore_flg,SPCOND_FLG(%a6) # set flag
-	rts
-
-addr_ind_p_a5:
-	mov.l		%a0,%d0			# copy no. bytes
-	mov.l		EXC_A5(%a6),%a0		# load current value
-	add.l		%a0,%d0			# increment
-	mov.l		%d0,EXC_A5(%a6)		# save incremented value
-
-	mov.l		%a0,EXC_SAVVAL(%a6)	# save in case of access error
-	mov.b		&0x5,EXC_SAVREG(%a6)	# save regno, too
-	mov.b		&restore_flg,SPCOND_FLG(%a6) # set flag
-	rts
-
-addr_ind_p_a6:
-	mov.l		%a0,%d0			# copy no. bytes
-	mov.l		EXC_A6(%a6),%a0		# load current value
-	add.l		%a0,%d0			# increment
-	mov.l		%d0,EXC_A6(%a6)		# save incremented value
-
-	mov.l		%a0,EXC_SAVVAL(%a6)	# save in case of access error
-	mov.b		&0x6,EXC_SAVREG(%a6)	# save regno, too
-	mov.b		&restore_flg,SPCOND_FLG(%a6) # set flag
-	rts
-
-addr_ind_p_a7:
-	mov.b		&mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
-
-	mov.l		%a0,%d0			# copy no. bytes
-	mov.l		EXC_A7(%a6),%a0		# load current value
-	add.l		%a0,%d0			# increment
-	mov.l		%d0,EXC_A7(%a6)		# save incremented value
-	rts
-
-####################################################
-# Address register indirect w/ predecrement: -(An) #
-####################################################
-addr_ind_m_a0:
-	mov.l		EXC_A0(%a6),%d0		# Get current a0
-	mov.l		%d0,EXC_SAVVAL(%a6)	# save in case of access error
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,EXC_A0(%a6)		# Save decr value
-	mov.l		%d0,%a0
-
-	mov.b		&0x0,EXC_SAVREG(%a6)	# save regno, too
-	mov.b		&restore_flg,SPCOND_FLG(%a6) # set flag
-	rts
-
-addr_ind_m_a1:
-	mov.l		EXC_A1(%a6),%d0		# Get current a1
-	mov.l		%d0,EXC_SAVVAL(%a6)	# save in case of access error
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,EXC_A1(%a6)		# Save decr value
-	mov.l		%d0,%a0
-
-	mov.b		&0x1,EXC_SAVREG(%a6)	# save regno, too
-	mov.b		&restore_flg,SPCOND_FLG(%a6) # set flag
-	rts
-
-addr_ind_m_a2:
-	mov.l		EXC_A2(%a6),%d0		# Get current a2
-	mov.l		%d0,EXC_SAVVAL(%a6)	# save in case of access error
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,EXC_A2(%a6)		# Save decr value
-	mov.l		%d0,%a0
-
-	mov.b		&0x2,EXC_SAVREG(%a6)	# save regno, too
-	mov.b		&restore_flg,SPCOND_FLG(%a6) # set flag
-	rts
-
-addr_ind_m_a3:
-	mov.l		EXC_A3(%a6),%d0		# Get current a3
-	mov.l		%d0,EXC_SAVVAL(%a6)	# save in case of access error
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,EXC_A3(%a6)		# Save decr value
-	mov.l		%d0,%a0
-
-	mov.b		&0x3,EXC_SAVREG(%a6)	# save regno, too
-	mov.b		&restore_flg,SPCOND_FLG(%a6) # set flag
-	rts
-
-addr_ind_m_a4:
-	mov.l		EXC_A4(%a6),%d0		# Get current a4
-	mov.l		%d0,EXC_SAVVAL(%a6)	# save in case of access error
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,EXC_A4(%a6)		# Save decr value
-	mov.l		%d0,%a0
-
-	mov.b		&0x4,EXC_SAVREG(%a6)	# save regno, too
-	mov.b		&restore_flg,SPCOND_FLG(%a6) # set flag
-	rts
-
-addr_ind_m_a5:
-	mov.l		EXC_A5(%a6),%d0		# Get current a5
-	mov.l		%d0,EXC_SAVVAL(%a6)	# save in case of access error
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,EXC_A5(%a6)		# Save decr value
-	mov.l		%d0,%a0
-
-	mov.b		&0x5,EXC_SAVREG(%a6)	# save regno, too
-	mov.b		&restore_flg,SPCOND_FLG(%a6) # set flag
-	rts
-
-addr_ind_m_a6:
-	mov.l		EXC_A6(%a6),%d0		# Get current a6
-	mov.l		%d0,EXC_SAVVAL(%a6)	# save in case of access error
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,EXC_A6(%a6)		# Save decr value
-	mov.l		%d0,%a0
-
-	mov.b		&0x6,EXC_SAVREG(%a6)	# save regno, too
-	mov.b		&restore_flg,SPCOND_FLG(%a6) # set flag
-	rts
-
-addr_ind_m_a7:
-	mov.b		&mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
-
-	mov.l		EXC_A7(%a6),%d0		# Get current a7
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,EXC_A7(%a6)		# Save decr value
-	mov.l		%d0,%a0
-	rts
-
-########################################################
-# Address register indirect w/ displacement: (d16, An) #
-########################################################
-addr_ind_disp_a0:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-	add.l		EXC_A0(%a6),%a0		# a0 + d16
-	rts
-
-addr_ind_disp_a1:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-	add.l		EXC_A1(%a6),%a0		# a1 + d16
-	rts
-
-addr_ind_disp_a2:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-	add.l		EXC_A2(%a6),%a0		# a2 + d16
-	rts
-
-addr_ind_disp_a3:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-	add.l		EXC_A3(%a6),%a0		# a3 + d16
-	rts
-
-addr_ind_disp_a4:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-	add.l		EXC_A4(%a6),%a0		# a4 + d16
-	rts
-
-addr_ind_disp_a5:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-	add.l		EXC_A5(%a6),%a0		# a5 + d16
-	rts
-
-addr_ind_disp_a6:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-	add.l		EXC_A6(%a6),%a0		# a6 + d16
-	rts
-
-addr_ind_disp_a7:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-	add.l		EXC_A7(%a6),%a0		# a7 + d16
-	rts
-
-########################################################################
-# Address register indirect w/ index(8-bit displacement): (dn, An, Xn) #
-#    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
-# Memory indirect postindexed: ([bd, An], Xn, od)		       #
-# Memory indirect preindexed: ([bd, An, Xn], od)		       #
-########################################################################
-_addr_ind_ext:
-	mov.l		%d1,-(%sp)
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word		# fetch extword in d0
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	mov.l		(%sp)+,%d1
-
-	mov.l		(EXC_AREGS,%a6,%d1.w*4),%a0 # put base in a0
-
-	btst		&0x8,%d0
-	beq.b		addr_ind_index_8bit	# for ext word or not?
-
-	movm.l		&0x3c00,-(%sp)		# save d2-d5
-
-	mov.l		%d0,%d5			# put extword in d5
-	mov.l		%a0,%d3			# put base in d3
-
-	bra.l		calc_mem_ind		# calc memory indirect
-
-addr_ind_index_8bit:
-	mov.l		%d2,-(%sp)		# save old d2
-
-	mov.l		%d0,%d1
-	rol.w		&0x4,%d1
-	andi.w		&0xf,%d1		# extract index regno
-
-	mov.l		(EXC_DREGS,%a6,%d1.w*4),%d1 # fetch index reg value
-
-	btst		&0xb,%d0		# is it word or long?
-	bne.b		aii8_long
-	ext.l		%d1			# sign extend word index
-aii8_long:
-	mov.l		%d0,%d2
-	rol.w		&0x7,%d2
-	andi.l		&0x3,%d2		# extract scale value
-
-	lsl.l		%d2,%d1			# shift index by scale
-
-	extb.l		%d0			# sign extend displacement
-	add.l		%d1,%d0			# index + disp
-	add.l		%d0,%a0			# An + (index + disp)
-
-	mov.l		(%sp)+,%d2		# restore old d2
-	rts
-
-######################
-# Immediate: #<data> #
-#########################################################################
-# word, long: <ea> of the data is the current extension word		#
-#	pointer value. new extension word pointer is simply the old	#
-#	plus the number of bytes in the data type(2 or 4).		#
-#########################################################################
-immediate:
-	mov.b		&immed_flg,SPCOND_FLG(%a6) # set immediate flag
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch extension word ptr
-	rts
-
-###########################
-# Absolute short: (XXX).W #
-###########################
-abs_short:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word		# fetch short address
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	mov.w		%d0,%a0			# return <ea> in a0
-	rts
-
-##########################
-# Absolute long: (XXX).L #
-##########################
-abs_long:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch long address
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	mov.l		%d0,%a0			# return <ea> in a0
-	rts
-
-#######################################################
-# Program counter indirect w/ displacement: (d16, PC) #
-#######################################################
-pc_ind:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word		# fetch word displacement
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		EXC_EXTWPTR(%a6),%a0	# pc + d16
-
-# _imem_read_word() increased the extwptr by 2. need to adjust here.
-	subq.l		&0x2,%a0		# adjust <ea>
-
-	rts
-
-##########################################################
-# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
-# "     "     w/   "  (base displacement): (bd, PC, An)  #
-# PC memory indirect postindexed: ([bd, PC], Xn, od)     #
-# PC memory indirect preindexed: ([bd, PC, Xn], od)      #
-##########################################################
-pc_ind_ext:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word		# fetch ext word
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# put base in a0
-	subq.l		&0x2,%a0		# adjust base
-
-	btst		&0x8,%d0		# is disp only 8 bits?
-	beq.b		pc_ind_index_8bit	# yes
-
-# the indexed addressing mode uses a base displacement of size
-# word or long
-	movm.l		&0x3c00,-(%sp)		# save d2-d5
-
-	mov.l		%d0,%d5			# put extword in d5
-	mov.l		%a0,%d3			# put base in d3
-
-	bra.l		calc_mem_ind		# calc memory indirect
-
-pc_ind_index_8bit:
-	mov.l		%d2,-(%sp)		# create a temp register
-
-	mov.l		%d0,%d1			# make extword copy
-	rol.w		&0x4,%d1		# rotate reg num into place
-	andi.w		&0xf,%d1		# extract register number
-
-	mov.l		(EXC_DREGS,%a6,%d1.w*4),%d1 # fetch index reg value
-
-	btst		&0xb,%d0		# is index word or long?
-	bne.b		pii8_long		# long
-	ext.l		%d1			# sign extend word index
-pii8_long:
-	mov.l		%d0,%d2			# make extword copy
-	rol.w		&0x7,%d2		# rotate scale value into place
-	andi.l		&0x3,%d2		# extract scale value
-
-	lsl.l		%d2,%d1			# shift index by scale
-
-	extb.l		%d0			# sign extend displacement
-	add.l		%d1,%d0			# index + disp
-	add.l		%d0,%a0			# An + (index + disp)
-
-	mov.l		(%sp)+,%d2		# restore temp register
-
-	rts
-
-# a5 = exc_extwptr	(global to uaeh)
-# a4 = exc_opword	(global to uaeh)
-# a3 = exc_dregs	(global to uaeh)
-
-# d2 = index		(internal "     "    )
-# d3 = base		(internal "     "    )
-# d4 = od		(internal "     "    )
-# d5 = extword		(internal "     "    )
-calc_mem_ind:
-	btst		&0x6,%d5		# is the index suppressed?
-	beq.b		calc_index
-	clr.l		%d2			# yes, so index = 0
-	bra.b		base_supp_ck
-calc_index:
-	bfextu		%d5{&16:&4},%d2
-	mov.l		(EXC_DREGS,%a6,%d2.w*4),%d2
-	btst		&0xb,%d5		# is index word or long?
-	bne.b		no_ext
-	ext.l		%d2
-no_ext:
-	bfextu		%d5{&21:&2},%d0
-	lsl.l		%d0,%d2
-base_supp_ck:
-	btst		&0x7,%d5		# is the bd suppressed?
-	beq.b		no_base_sup
-	clr.l		%d3
-no_base_sup:
-	bfextu		%d5{&26:&2},%d0	# get bd size
-#	beq.l		_error			# if (size == 0) it's reserved
-	cmpi.b		%d0,&2
-	blt.b		no_bd
-	beq.b		get_word_bd
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	bra.b		chk_ind
-get_word_bd:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	ext.l		%d0			# sign extend bd
-
-chk_ind:
-	add.l		%d0,%d3			# base += bd
-no_bd:
-	bfextu		%d5{&30:&2},%d0		# is od suppressed?
-	beq.w		aii_bd
-	cmpi.b		%d0,&0x2
-	blt.b		null_od
-	beq.b		word_od
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	bra.b		add_them
-
-word_od:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	ext.l		%d0			# sign extend od
-	bra.b		add_them
-
-null_od:
-	clr.l		%d0
-add_them:
-	mov.l		%d0,%d4
-	btst		&0x2,%d5		# pre or post indexing?
-	beq.b		pre_indexed
-
-	mov.l		%d3,%a0
-	bsr.l		_dmem_read_long
-
-	tst.l		%d1			# dfetch error?
-	bne.b		calc_ea_err		# yes
-
-	add.l		%d2,%d0			# <ea> += index
-	add.l		%d4,%d0			# <ea> += od
-	bra.b		done_ea
-
-pre_indexed:
-	add.l		%d2,%d3			# preindexing
-	mov.l		%d3,%a0
-	bsr.l		_dmem_read_long
-
-	tst.l		%d1			# ifetch error?
-	bne.b		calc_ea_err		# yes
-
-	add.l		%d4,%d0			# ea += od
-	bra.b		done_ea
-
-aii_bd:
-	add.l		%d2,%d3			# ea = (base + bd) + index
-	mov.l		%d3,%d0
-done_ea:
-	mov.l		%d0,%a0
-
-	movm.l		(%sp)+,&0x003c		# restore d2-d5
-	rts
-
-# if dmem_read_long() returns a fail message in d1, the package
-# must create an access error frame. here, we pass a skeleton fslw
-# and the failing address to the routine that creates the new frame.
-# FSLW:
-#	read = true
-#	size = longword
-#	TM = data
-#	software emulation error = true
-calc_ea_err:
-	mov.l		%d3,%a0			# pass failing address
-	mov.l		&0x01010001,%d0		# pass fslw
-	bra.l		isp_dacc
-
-#########################################################################
-# XDEF **************************************************************** #
-#	_moveperipheral(): routine to emulate movep instruction		#
-#									#
-# XREF **************************************************************** #
-#	_dmem_read_byte() - read byte from memory			#
-#	_dmem_write_byte() - write byte to memory			#
-#	isp_dacc() - handle data access error exception			#
-#									#
-# INPUT *************************************************************** #
-#	none								#
-#									#
-# OUTPUT ************************************************************** #
-#	If exiting through isp_dacc...					#
-#		a0 = failing address					#
-#		d0 = FSLW						#
-#	else								#
-#		none							#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Decode the movep instruction words stored at EXC_OPWORD and	#
-# either read or write the required bytes from/to memory. Use the	#
-# _dmem_{read,write}_byte() routines. If one of the memory routines	#
-# returns a failing value, we must pass the failing address and	a FSLW	#
-# to the _isp_dacc() routine.						#
-#	Since this instruction is used to access peripherals, make sure	#
-# to only access the required bytes.					#
-#									#
-#########################################################################
-
-###########################
-# movep.(w,l)	Dx,(d,Ay) #
-# movep.(w,l)	(d,Ay),Dx #
-###########################
-	global		_moveperipheral
-_moveperipheral:
-	mov.w		EXC_OPWORD(%a6),%d1	# fetch the opcode word
-
-	mov.b		%d1,%d0
-	and.w		&0x7,%d0		# extract Ay from opcode word
-
-	mov.l		(EXC_AREGS,%a6,%d0.w*4),%a0 # fetch ay
-
-	add.w		EXC_EXTWORD(%a6),%a0	# add: an + sgn_ext(disp)
-
-	btst		&0x7,%d1		# (reg 2 mem) or (mem 2 reg)
-	beq.w		mem2reg
-
-# reg2mem: fetch dx, then write it to memory
-reg2mem:
-	mov.w		%d1,%d0
-	rol.w		&0x7,%d0
-	and.w		&0x7,%d0		# extract Dx from opcode word
-
-	mov.l		(EXC_DREGS,%a6,%d0.w*4), %d0 # fetch dx
-
-	btst		&0x6,%d1		# word or long operation?
-	beq.b		r2mwtrans
-
-# a0 = dst addr
-# d0 = Dx
-r2mltrans:
-	mov.l		%d0,%d2			# store data
-	mov.l		%a0,%a2			# store addr
-	rol.l		&0x8,%d2
-	mov.l		%d2,%d0
-
-	bsr.l		_dmem_write_byte	# os  : write hi
-
-	tst.l		%d1			# dfetch error?
-	bne.w		movp_write_err		# yes
-
-	add.w		&0x2,%a2		# incr addr
-	mov.l		%a2,%a0
-	rol.l		&0x8,%d2
-	mov.l		%d2,%d0
-
-	bsr.l		_dmem_write_byte	# os  : write lo
-
-	tst.l		%d1			# dfetch error?
-	bne.w		movp_write_err		# yes
-
-	add.w		&0x2,%a2		# incr addr
-	mov.l		%a2,%a0
-	rol.l		&0x8,%d2
-	mov.l		%d2,%d0
-
-	bsr.l		_dmem_write_byte	# os  : write lo
-
-	tst.l		%d1			# dfetch error?
-	bne.w		movp_write_err		# yes
-
-	add.w		&0x2,%a2		# incr addr
-	mov.l		%a2,%a0
-	rol.l		&0x8,%d2
-	mov.l		%d2,%d0
-
-	bsr.l		_dmem_write_byte	# os  : write lo
-
-	tst.l		%d1			# dfetch error?
-	bne.w		movp_write_err		# yes
-
-	rts
-
-# a0 = dst addr
-# d0 = Dx
-r2mwtrans:
-	mov.l		%d0,%d2			# store data
-	mov.l		%a0,%a2			# store addr
-	lsr.w		&0x8,%d0
-
-	bsr.l		_dmem_write_byte	# os  : write hi
-
-	tst.l		%d1			# dfetch error?
-	bne.w		movp_write_err		# yes
-
-	add.w		&0x2,%a2
-	mov.l		%a2,%a0
-	mov.l		%d2,%d0
-
-	bsr.l		_dmem_write_byte	# os  : write lo
-
-	tst.l		%d1			# dfetch error?
-	bne.w		movp_write_err		# yes
-
-	rts
-
-# mem2reg: read bytes from memory.
-# determines the dest register, and then writes the bytes into it.
-mem2reg:
-	btst		&0x6,%d1		# word or long operation?
-	beq.b		m2rwtrans
-
-# a0 = dst addr
-m2rltrans:
-	mov.l		%a0,%a2			# store addr
-
-	bsr.l		_dmem_read_byte		# read first byte
-
-	tst.l		%d1			# dfetch error?
-	bne.w		movp_read_err		# yes
-
-	mov.l		%d0,%d2
-
-	add.w		&0x2,%a2		# incr addr by 2 bytes
-	mov.l		%a2,%a0
-
-	bsr.l		_dmem_read_byte		# read second byte
-
-	tst.l		%d1			# dfetch error?
-	bne.w		movp_read_err		# yes
-
-	lsl.w		&0x8,%d2
-	mov.b		%d0,%d2			# append bytes
-
-	add.w		&0x2,%a2		# incr addr by 2 bytes
-	mov.l		%a2,%a0
-
-	bsr.l		_dmem_read_byte		# read second byte
-
-	tst.l		%d1			# dfetch error?
-	bne.w		movp_read_err		# yes
-
-	lsl.l		&0x8,%d2
-	mov.b		%d0,%d2			# append bytes
-
-	add.w		&0x2,%a2		# incr addr by 2 bytes
-	mov.l		%a2,%a0
-
-	bsr.l		_dmem_read_byte		# read second byte
-
-	tst.l		%d1			# dfetch error?
-	bne.w		movp_read_err		# yes
-
-	lsl.l		&0x8,%d2
-	mov.b		%d0,%d2			# append bytes
-
-	mov.b		EXC_OPWORD(%a6),%d1
-	lsr.b		&0x1,%d1
-	and.w		&0x7,%d1		# extract Dx from opcode word
-
-	mov.l		%d2,(EXC_DREGS,%a6,%d1.w*4) # store dx
-
-	rts
-
-# a0 = dst addr
-m2rwtrans:
-	mov.l		%a0,%a2			# store addr
-
-	bsr.l		_dmem_read_byte		# read first byte
-
-	tst.l		%d1			# dfetch error?
-	bne.w		movp_read_err		# yes
-
-	mov.l		%d0,%d2
-
-	add.w		&0x2,%a2		# incr addr by 2 bytes
-	mov.l		%a2,%a0
-
-	bsr.l		_dmem_read_byte		# read second byte
-
-	tst.l		%d1			# dfetch error?
-	bne.w		movp_read_err		# yes
-
-	lsl.w		&0x8,%d2
-	mov.b		%d0,%d2			# append bytes
-
-	mov.b		EXC_OPWORD(%a6),%d1
-	lsr.b		&0x1,%d1
-	and.w		&0x7,%d1		# extract Dx from opcode word
-
-	mov.w		%d2,(EXC_DREGS+2,%a6,%d1.w*4) # store dx
-
-	rts
-
-# if dmem_{read,write}_byte() returns a fail message in d1, the package
-# must create an access error frame. here, we pass a skeleton fslw
-# and the failing address to the routine that creates the new frame.
-# FSLW:
-#	write = true
-#	size = byte
-#	TM = data
-#	software emulation error = true
-movp_write_err:
-	mov.l		%a2,%a0			# pass failing address
-	mov.l		&0x00a10001,%d0		# pass fslw
-	bra.l		isp_dacc
-
-# FSLW:
-#	read = true
-#	size = byte
-#	TM = data
-#	software emulation error = true
-movp_read_err:
-	mov.l		%a2,%a0			# pass failing address
-	mov.l		&0x01210001,%d0		# pass fslw
-	bra.l		isp_dacc
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_chk2_cmp2(): routine to emulate chk2/cmp2 instructions		#
-#									#
-# XREF ****************************************************************	#
-#	_calc_ea(): calculate effective address				#
-#	_dmem_read_long(): read operands				#
-#	_dmem_read_word(): read operands				#
-#	isp_dacc(): handle data access error exception			#
-#									#
-# INPUT ***************************************************************	#
-#	none								#
-#									#
-# OUTPUT **************************************************************	#
-#	If exiting through isp_dacc...					#
-#		a0 = failing address					#
-#		d0 = FSLW						#
-#	else								#
-#		none							#
-#									#
-# ALGORITHM ***********************************************************	#
-#	First, calculate the effective address, then fetch the byte,	#
-# word, or longword sized operands. Then, in the interest of		#
-# simplicity, all operands are converted to longword size whether the	#
-# operation is byte, word, or long. The bounds are sign extended	#
-# accordingly. If Rn is a data register, Rn is also sign extended. If	#
-# Rn is an address register, it need not be sign extended since the	#
-# full register is always used.						#
-#	The comparisons are made and the condition codes calculated.	#
-# If the instruction is chk2 and the Rn value is out-of-bounds, set	#
-# the ichk_flg in SPCOND_FLG.						#
-#	If the memory fetch returns a failing value, pass the failing	#
-# address and FSLW to the isp_dacc() routine.				#
-#									#
-#########################################################################
-
-	global		_chk2_cmp2
-_chk2_cmp2:
-
-# passing size parameter doesn't matter since chk2 & cmp2 can't do
-# either predecrement, postincrement, or immediate.
-	bsr.l		_calc_ea		# calculate <ea>
-
-	mov.b		EXC_EXTWORD(%a6), %d0	# fetch hi extension word
-	rol.b		&0x4, %d0		# rotate reg bits into lo
-	and.w		&0xf, %d0		# extract reg bits
-
-	mov.l		(EXC_DREGS,%a6,%d0.w*4), %d2 # get regval
-
-	cmpi.b		EXC_OPWORD(%a6), &0x2	# what size is operation?
-	blt.b		chk2_cmp2_byte		# size == byte
-	beq.b		chk2_cmp2_word		# size == word
-
-# the bounds are longword size. call routine to read the lower
-# bound into d0 and the higher bound into d1.
-chk2_cmp2_long:
-	mov.l		%a0,%a2			# save copy of <ea>
-	bsr.l		_dmem_read_long		# fetch long lower bound
-
-	tst.l		%d1			# dfetch error?
-	bne.w		chk2_cmp2_err_l		# yes
-
-	mov.l		%d0,%d3			# save long lower bound
-	addq.l		&0x4,%a2
-	mov.l		%a2,%a0			# pass <ea> of long upper bound
-	bsr.l		_dmem_read_long		# fetch long upper bound
-
-	tst.l		%d1			# dfetch error?
-	bne.w		chk2_cmp2_err_l		# yes
-
-	mov.l		%d0,%d1			# long upper bound in d1
-	mov.l		%d3,%d0			# long lower bound in d0
-	bra.w		chk2_cmp2_compare	# go do the compare emulation
-
-# the bounds are word size. fetch them in one subroutine call by
-# reading a longword. sign extend both. if it's a data operation,
-# sign extend Rn to long, also.
-chk2_cmp2_word:
-	mov.l		%a0,%a2
-	bsr.l		_dmem_read_long		# fetch 2 word bounds
-
-	tst.l		%d1			# dfetch error?
-	bne.w		chk2_cmp2_err_l		# yes
-
-	mov.w		%d0, %d1		# place hi in %d1
-	swap		%d0			# place lo in %d0
-
-	ext.l		%d0			# sign extend lo bnd
-	ext.l		%d1			# sign extend hi bnd
-
-	btst		&0x7, EXC_EXTWORD(%a6)	# address compare?
-	bne.w		chk2_cmp2_compare	# yes; don't sign extend
-
-# operation is a data register compare.
-# sign extend word to long so we can do simple longword compares.
-	ext.l		%d2			# sign extend data word
-	bra.w		chk2_cmp2_compare	# go emulate compare
-
-# the bounds are byte size. fetch them in one subroutine call by
-# reading a word. sign extend both. if it's a data operation,
-# sign extend Rn to long, also.
-chk2_cmp2_byte:
-	mov.l		%a0,%a2
-	bsr.l		_dmem_read_word		# fetch 2 byte bounds
-
-	tst.l		%d1			# dfetch error?
-	bne.w		chk2_cmp2_err_w		# yes
-
-	mov.b		%d0, %d1		# place hi in %d1
-	lsr.w		&0x8, %d0		# place lo in %d0
-
-	extb.l		%d0			# sign extend lo bnd
-	extb.l		%d1			# sign extend hi bnd
-
-	btst		&0x7, EXC_EXTWORD(%a6)	# address compare?
-	bne.b		chk2_cmp2_compare	# yes; don't sign extend
-
-# operation is a data register compare.
-# sign extend byte to long so we can do simple longword compares.
-	extb.l		%d2			# sign extend data byte
-
-#
-# To set the ccodes correctly:
-#	(1) save 'Z' bit from (Rn - lo)
-#	(2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
-#	(3) keep 'X', 'N', and 'V' from before instruction
-#	(4) combine ccodes
-#
-chk2_cmp2_compare:
-	sub.l		%d0, %d2		# (Rn - lo)
-	mov.w		%cc, %d3		# fetch resulting ccodes
-	andi.b		&0x4, %d3		# keep 'Z' bit
-	sub.l		%d0, %d1		# (hi - lo)
-	cmp.l		%d1,%d2			# ((hi - lo) - (Rn - hi))
-
-	mov.w		%cc, %d4		# fetch resulting ccodes
-	or.b		%d4, %d3		# combine w/ earlier ccodes
-	andi.b		&0x5, %d3		# keep 'Z' and 'N'
-
-	mov.w		EXC_CC(%a6), %d4	# fetch old ccodes
-	andi.b		&0x1a, %d4		# keep 'X','N','V' bits
-	or.b		%d3, %d4		# insert new ccodes
-	mov.w		%d4, EXC_CC(%a6)	# save new ccodes
-
-	btst		&0x3, EXC_EXTWORD(%a6)	# separate chk2,cmp2
-	bne.b		chk2_finish		# it's a chk2
-
-	rts
-
-# this code handles the only difference between chk2 and cmp2. chk2 would
-# have trapped out if the value was out of bounds. we check this by seeing
-# if the 'N' bit was set by the operation.
-chk2_finish:
-	btst		&0x0, %d4		# is 'N' bit set?
-	bne.b		chk2_trap		# yes;chk2 should trap
-	rts
-chk2_trap:
-	mov.b		&ichk_flg,SPCOND_FLG(%a6) # set "special case" flag
-	rts
-
-# if dmem_read_{long,word}() returns a fail message in d1, the package
-# must create an access error frame. here, we pass a skeleton fslw
-# and the failing address to the routine that creates the new frame.
-# FSLW:
-#	read = true
-#	size = longword
-#	TM = data
-#	software emulation error = true
-chk2_cmp2_err_l:
-	mov.l		%a2,%a0			# pass failing address
-	mov.l		&0x01010001,%d0		# pass fslw
-	bra.l		isp_dacc
-
-# FSLW:
-#	read = true
-#	size = word
-#	TM = data
-#	software emulation error = true
-chk2_cmp2_err_w:
-	mov.l		%a2,%a0			# pass failing address
-	mov.l		&0x01410001,%d0		# pass fslw
-	bra.l		isp_dacc
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_div64(): routine to emulate div{u,s}.l <ea>,Dr:Dq		#
-#							64/32->32r:32q	#
-#									#
-# XREF ****************************************************************	#
-#	_calc_ea() - calculate effective address			#
-#	isp_iacc() - handle instruction access error exception		#
-#	isp_dacc() - handle data access error exception			#
-#	isp_restore() - restore An on access error w/ -() or ()+	#
-#									#
-# INPUT ***************************************************************	#
-#	none								#
-#									#
-# OUTPUT **************************************************************	#
-#	If exiting through isp_dacc...					#
-#		a0 = failing address					#
-#		d0 = FSLW						#
-#	else								#
-#		none							#
-#									#
-# ALGORITHM ***********************************************************	#
-#	First, decode the operand location. If it's in Dn, fetch from	#
-# the stack. If it's in memory, use _calc_ea() to calculate the		#
-# effective address. Use _dmem_read_long() to fetch at that address.	#
-# Unless the operand is immediate data. Then use _imem_read_long().	#
-# Send failures to isp_dacc() or isp_iacc() as appropriate.		#
-#	If the operands are signed, make them unsigned and save	the	#
-# sign info for later. Separate out special cases like divide-by-zero	#
-# or 32-bit divides if possible. Else, use a special math algorithm	#
-# to calculate the result.						#
-#	Restore sign info if signed instruction. Set the condition	#
-# codes. Set idbyz_flg in SPCOND_FLG if divisor was zero. Store the	#
-# quotient and remainder in the appropriate data registers on the stack.#
-#									#
-#########################################################################
-
-set	NDIVISOR,	EXC_TEMP+0x0
-set	NDIVIDEND,	EXC_TEMP+0x1
-set	NDRSAVE,	EXC_TEMP+0x2
-set	NDQSAVE,	EXC_TEMP+0x4
-set	DDSECOND,	EXC_TEMP+0x6
-set	DDQUOTIENT,	EXC_TEMP+0x8
-set	DDNORMAL,	EXC_TEMP+0xc
-
-	global		_div64
-#############
-# div(u,s)l #
-#############
-_div64:
-	mov.b		EXC_OPWORD+1(%a6), %d0
-	andi.b		&0x38, %d0		# extract src mode
-
-	bne.w		dcontrolmodel_s		# %dn dest or control mode?
-
-	mov.b		EXC_OPWORD+1(%a6), %d0	# extract Dn from opcode
-	andi.w		&0x7, %d0
-	mov.l		(EXC_DREGS,%a6,%d0.w*4), %d7 # fetch divisor from register
-
-dgotsrcl:
-	beq.w		div64eq0		# divisor is = 0!!!
-
-	mov.b		EXC_EXTWORD+1(%a6), %d0	# extract Dr from extword
-	mov.b		EXC_EXTWORD(%a6), %d1	# extract Dq from extword
-	and.w		&0x7, %d0
-	lsr.b		&0x4, %d1
-	and.w		&0x7, %d1
-	mov.w		%d0, NDRSAVE(%a6)	# save Dr for later
-	mov.w		%d1, NDQSAVE(%a6)	# save Dq for later
-
-# fetch %dr and %dq directly off stack since all regs are saved there
-	mov.l		(EXC_DREGS,%a6,%d0.w*4), %d5 # get dividend hi
-	mov.l		(EXC_DREGS,%a6,%d1.w*4), %d6 # get dividend lo
-
-# separate signed and unsigned divide
-	btst		&0x3, EXC_EXTWORD(%a6)	# signed or unsigned?
-	beq.b		dspecialcases		# use positive divide
-
-# save the sign of the divisor
-# make divisor unsigned if it's negative
-	tst.l		%d7			# chk sign of divisor
-	slt		NDIVISOR(%a6)		# save sign of divisor
-	bpl.b		dsgndividend
-	neg.l		%d7			# complement negative divisor
-
-# save the sign of the dividend
-# make dividend unsigned if it's negative
-dsgndividend:
-	tst.l		%d5			# chk sign of hi(dividend)
-	slt		NDIVIDEND(%a6)		# save sign of dividend
-	bpl.b		dspecialcases
-
-	mov.w		&0x0, %cc		# clear 'X' cc bit
-	negx.l		%d6			# complement signed dividend
-	negx.l		%d5
-
-# extract some special cases:
-#	- is (dividend == 0) ?
-#	- is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
-dspecialcases:
-	tst.l		%d5			# is (hi(dividend) == 0)
-	bne.b		dnormaldivide		# no, so try it the long way
-
-	tst.l		%d6			# is (lo(dividend) == 0), too
-	beq.w		ddone			# yes, so (dividend == 0)
-
-	cmp.l		%d7,%d6			# is (divisor <= lo(dividend))
-	bls.b		d32bitdivide		# yes, so use 32 bit divide
-
-	exg		%d5,%d6			# q = 0, r = dividend
-	bra.w		divfinish		# can't divide, we're done.
-
-d32bitdivide:
-	tdivu.l		%d7, %d5:%d6		# it's only a 32/32 bit div!
-
-	bra.b		divfinish
-
-dnormaldivide:
-# last special case:
-#	- is hi(dividend) >= divisor ? if yes, then overflow
-	cmp.l		%d7,%d5
-	bls.b		ddovf			# answer won't fit in 32 bits
-
-# perform the divide algorithm:
-	bsr.l		dclassical		# do int divide
-
-# separate into signed and unsigned finishes.
-divfinish:
-	btst		&0x3, EXC_EXTWORD(%a6)	# do divs, divu separately
-	beq.b		ddone			# divu has no processing!!!
-
-# it was a divs.l, so ccode setting is a little more complicated...
-	tst.b		NDIVIDEND(%a6)		# remainder has same sign
-	beq.b		dcc			# as dividend.
-	neg.l		%d5			# sgn(rem) = sgn(dividend)
-dcc:
-	mov.b		NDIVISOR(%a6), %d0
-	eor.b		%d0, NDIVIDEND(%a6)	# chk if quotient is negative
-	beq.b		dqpos			# branch to quot positive
-
-# 0x80000000 is the largest number representable as a 32-bit negative
-# number. the negative of 0x80000000 is 0x80000000.
-	cmpi.l		%d6, &0x80000000	# will (-quot) fit in 32 bits?
-	bhi.b		ddovf
-
-	neg.l		%d6			# make (-quot) 2's comp
-
-	bra.b		ddone
-
-dqpos:
-	btst		&0x1f, %d6		# will (+quot) fit in 32 bits?
-	bne.b		ddovf
-
-ddone:
-# at this point, result is normal so ccodes are set based on result.
-	mov.w		EXC_CC(%a6), %cc
-	tst.l		%d6			# set %ccode bits
-	mov.w		%cc, EXC_CC(%a6)
-
-	mov.w		NDRSAVE(%a6), %d0	# get Dr off stack
-	mov.w		NDQSAVE(%a6), %d1	# get Dq off stack
-
-# if the register numbers are the same, only the quotient gets saved.
-# so, if we always save the quotient second, we save ourselves a cmp&beq
-	mov.l		%d5, (EXC_DREGS,%a6,%d0.w*4) # save remainder
-	mov.l		%d6, (EXC_DREGS,%a6,%d1.w*4) # save quotient
-
-	rts
-
-ddovf:
-	bset		&0x1, EXC_CC+1(%a6)	# 'V' set on overflow
-	bclr		&0x0, EXC_CC+1(%a6)	# 'C' cleared on overflow
-
-	rts
-
-div64eq0:
-	andi.b		&0x1e, EXC_CC+1(%a6)	# clear 'C' bit on divbyzero
-	ori.b		&idbyz_flg,SPCOND_FLG(%a6) # set "special case" flag
-	rts
-
-###########################################################################
-#########################################################################
-# This routine uses the 'classical' Algorithm D from Donald Knuth's	#
-# Art of Computer Programming, vol II, Seminumerical Algorithms.	#
-# For this implementation b=2**16, and the target is U1U2U3U4/V1V2,	#
-# where U,V are words of the quadword dividend and longword divisor,	#
-# and U1, V1 are the most significant words.				#
-#									#
-# The most sig. longword of the 64 bit dividend must be in %d5, least	#
-# in %d6. The divisor must be in the variable ddivisor, and the		#
-# signed/unsigned flag ddusign must be set (0=unsigned,1=signed).	#
-# The quotient is returned in %d6, remainder in %d5, unless the		#
-# v (overflow) bit is set in the saved %ccr. If overflow, the dividend	#
-# is unchanged.								#
-#########################################################################
-dclassical:
-# if the divisor msw is 0, use simpler algorithm then the full blown
-# one at ddknuth:
-
-	cmpi.l		%d7, &0xffff
-	bhi.b		ddknuth			# go use D. Knuth algorithm
-
-# Since the divisor is only a word (and larger than the mslw of the dividend),
-# a simpler algorithm may be used :
-# In the general case, four quotient words would be created by
-# dividing the divisor word into each dividend word. In this case,
-# the first two quotient words must be zero, or overflow would occur.
-# Since we already checked this case above, we can treat the most significant
-# longword of the dividend as (0) remainder (see Knuth) and merely complete
-# the last two divisions to get a quotient longword and word remainder:
-
-	clr.l		%d1
-	swap		%d5			# same as r*b if previous step rqd
-	swap		%d6			# get u3 to lsw position
-	mov.w		%d6, %d5		# rb + u3
-
-	divu.w		%d7, %d5
-
-	mov.w		%d5, %d1		# first quotient word
-	swap		%d6			# get u4
-	mov.w		%d6, %d5		# rb + u4
-
-	divu.w		%d7, %d5
-
-	swap		%d1
-	mov.w		%d5, %d1		# 2nd quotient 'digit'
-	clr.w		%d5
-	swap		%d5			# now remainder
-	mov.l		%d1, %d6		# and quotient
-
-	rts
-
-ddknuth:
-# In this algorithm, the divisor is treated as a 2 digit (word) number
-# which is divided into a 3 digit (word) dividend to get one quotient
-# digit (word). After subtraction, the dividend is shifted and the
-# process repeated. Before beginning, the divisor and quotient are
-# 'normalized' so that the process of estimating the quotient digit
-# will yield verifiably correct results..
-
-	clr.l		DDNORMAL(%a6)		# count of shifts for normalization
-	clr.b		DDSECOND(%a6)		# clear flag for quotient digits
-	clr.l		%d1			# %d1 will hold trial quotient
-ddnchk:
-	btst		&31, %d7		# must we normalize? first word of
-	bne.b		ddnormalized		# divisor (V1) must be >= 65536/2
-	addq.l		&0x1, DDNORMAL(%a6)	# count normalization shifts
-	lsl.l		&0x1, %d7		# shift the divisor
-	lsl.l		&0x1, %d6		# shift u4,u3 with overflow to u2
-	roxl.l		&0x1, %d5		# shift u1,u2
-	bra.w		ddnchk
-ddnormalized:
-
-# Now calculate an estimate of the quotient words (msw first, then lsw).
-# The comments use subscripts for the first quotient digit determination.
-	mov.l		%d7, %d3		# divisor
-	mov.l		%d5, %d2		# dividend mslw
-	swap		%d2
-	swap		%d3
-	cmp.w		%d2, %d3		# V1 = U1 ?
-	bne.b		ddqcalc1
-	mov.w		&0xffff, %d1		# use max trial quotient word
-	bra.b		ddadj0
-ddqcalc1:
-	mov.l		%d5, %d1
-
-	divu.w		%d3, %d1		# use quotient of mslw/msw
-
-	andi.l		&0x0000ffff, %d1	# zero any remainder
-ddadj0:
-
-# now test the trial quotient and adjust. This step plus the
-# normalization assures (according to Knuth) that the trial
-# quotient will be at worst 1 too large.
-	mov.l		%d6, -(%sp)
-	clr.w		%d6			# word u3 left
-	swap		%d6			# in lsw position
-ddadj1: mov.l		%d7, %d3
-	mov.l		%d1, %d2
-	mulu.w		%d7, %d2		# V2q
-	swap		%d3
-	mulu.w		%d1, %d3		# V1q
-	mov.l		%d5, %d4		# U1U2
-	sub.l		%d3, %d4		# U1U2 - V1q
-
-	swap		%d4
-
-	mov.w		%d4,%d0
-	mov.w		%d6,%d4			# insert lower word (U3)
-
-	tst.w		%d0			# is upper word set?
-	bne.w		ddadjd1
-
-#	add.l		%d6, %d4		# (U1U2 - V1q) + U3
-
-	cmp.l		%d2, %d4
-	bls.b		ddadjd1			# is V2q > (U1U2-V1q) + U3 ?
-	subq.l		&0x1, %d1		# yes, decrement and recheck
-	bra.b		ddadj1
-ddadjd1:
-# now test the word by multiplying it by the divisor (V1V2) and comparing
-# the 3 digit (word) result with the current dividend words
-	mov.l		%d5, -(%sp)		# save %d5 (%d6 already saved)
-	mov.l		%d1, %d6
-	swap		%d6			# shift answer to ms 3 words
-	mov.l		%d7, %d5
-	bsr.l		dmm2
-	mov.l		%d5, %d2		# now %d2,%d3 are trial*divisor
-	mov.l		%d6, %d3
-	mov.l		(%sp)+, %d5		# restore dividend
-	mov.l		(%sp)+, %d6
-	sub.l		%d3, %d6
-	subx.l		%d2, %d5		# subtract double precision
-	bcc		dd2nd			# no carry, do next quotient digit
-	subq.l		&0x1, %d1		# q is one too large
-# need to add back divisor longword to current ms 3 digits of dividend
-# - according to Knuth, this is done only 2 out of 65536 times for random
-# divisor, dividend selection.
-	clr.l		%d2
-	mov.l		%d7, %d3
-	swap		%d3
-	clr.w		%d3			# %d3 now ls word of divisor
-	add.l		%d3, %d6		# aligned with 3rd word of dividend
-	addx.l		%d2, %d5
-	mov.l		%d7, %d3
-	clr.w		%d3			# %d3 now ms word of divisor
-	swap		%d3			# aligned with 2nd word of dividend
-	add.l		%d3, %d5
-dd2nd:
-	tst.b		DDSECOND(%a6)		# both q words done?
-	bne.b		ddremain
-# first quotient digit now correct. store digit and shift the
-# (subtracted) dividend
-	mov.w		%d1, DDQUOTIENT(%a6)
-	clr.l		%d1
-	swap		%d5
-	swap		%d6
-	mov.w		%d6, %d5
-	clr.w		%d6
-	st		DDSECOND(%a6)		# second digit
-	bra.w		ddnormalized
-ddremain:
-# add 2nd word to quotient, get the remainder.
-	mov.w		%d1, DDQUOTIENT+2(%a6)
-# shift down one word/digit to renormalize remainder.
-	mov.w		%d5, %d6
-	swap		%d6
-	swap		%d5
-	mov.l		DDNORMAL(%a6), %d7	# get norm shift count
-	beq.b		ddrn
-	subq.l		&0x1, %d7		# set for loop count
-ddnlp:
-	lsr.l		&0x1, %d5		# shift into %d6
-	roxr.l		&0x1, %d6
-	dbf		%d7, ddnlp
-ddrn:
-	mov.l		%d6, %d5		# remainder
-	mov.l		DDQUOTIENT(%a6), %d6	# quotient
-
-	rts
-dmm2:
-# factors for the 32X32->64 multiplication are in %d5 and %d6.
-# returns 64 bit result in %d5 (hi) %d6(lo).
-# destroys %d2,%d3,%d4.
-
-# multiply hi,lo words of each factor to get 4 intermediate products
-	mov.l		%d6, %d2
-	mov.l		%d6, %d3
-	mov.l		%d5, %d4
-	swap		%d3
-	swap		%d4
-	mulu.w		%d5, %d6		# %d6 <- lsw*lsw
-	mulu.w		%d3, %d5		# %d5 <- msw-dest*lsw-source
-	mulu.w		%d4, %d2		# %d2 <- msw-source*lsw-dest
-	mulu.w		%d4, %d3		# %d3 <- msw*msw
-# now use swap and addx to consolidate to two longwords
-	clr.l		%d4
-	swap		%d6
-	add.w		%d5, %d6		# add msw of l*l to lsw of m*l product
-	addx.w		%d4, %d3		# add any carry to m*m product
-	add.w		%d2, %d6		# add in lsw of other m*l product
-	addx.w		%d4, %d3		# add any carry to m*m product
-	swap		%d6			# %d6 is low 32 bits of final product
-	clr.w		%d5
-	clr.w		%d2			# lsw of two mixed products used,
-	swap		%d5			# now use msws of longwords
-	swap		%d2
-	add.l		%d2, %d5
-	add.l		%d3, %d5		# %d5 now ms 32 bits of final product
-	rts
-
-##########
-dcontrolmodel_s:
-	movq.l		&LONG,%d0
-	bsr.l		_calc_ea		# calc <ea>
-
-	cmpi.b		SPCOND_FLG(%a6),&immed_flg # immediate addressing mode?
-	beq.b		dimmed			# yes
-
-	mov.l		%a0,%a2
-	bsr.l		_dmem_read_long		# fetch divisor from <ea>
-
-	tst.l		%d1			# dfetch error?
-	bne.b		div64_err		# yes
-
-	mov.l		%d0, %d7
-	bra.w		dgotsrcl
-
-# we have to split out immediate data here because it must be read using
-# imem_read() instead of dmem_read(). this becomes especially important
-# if the fetch runs into some deadly fault.
-dimmed:
-	addq.l		&0x4,EXC_EXTWPTR(%a6)
-	bsr.l		_imem_read_long		# read immediate value
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	mov.l		%d0,%d7
-	bra.w		dgotsrcl
-
-##########
-
-# if dmem_read_long() returns a fail message in d1, the package
-# must create an access error frame. here, we pass a skeleton fslw
-# and the failing address to the routine that creates the new frame.
-# also, we call isp_restore in case the effective addressing mode was
-# (an)+ or -(an) in which case the previous "an" value must be restored.
-# FSLW:
-#	read = true
-#	size = longword
-#	TM = data
-#	software emulation error = true
-div64_err:
-	bsr.l		isp_restore		# restore addr reg
-	mov.l		%a2,%a0			# pass failing address
-	mov.l		&0x01010001,%d0		# pass fslw
-	bra.l		isp_dacc
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_mul64(): routine to emulate mul{u,s}.l <ea>,Dh:Dl 32x32->64	#
-#									#
-# XREF ****************************************************************	#
-#	_calc_ea() - calculate effective address			#
-#	isp_iacc() - handle instruction access error exception		#
-#	isp_dacc() - handle data access error exception			#
-#	isp_restore() - restore An on access error w/ -() or ()+	#
-#									#
-# INPUT ***************************************************************	#
-#	none								#
-#									#
-# OUTPUT **************************************************************	#
-#	If exiting through isp_dacc...					#
-#		a0 = failing address					#
-#		d0 = FSLW						#
-#	else								#
-#		none							#
-#									#
-# ALGORITHM ***********************************************************	#
-#	First, decode the operand location. If it's in Dn, fetch from	#
-# the stack. If it's in memory, use _calc_ea() to calculate the		#
-# effective address. Use _dmem_read_long() to fetch at that address.	#
-# Unless the operand is immediate data. Then use _imem_read_long().	#
-# Send failures to isp_dacc() or isp_iacc() as appropriate.		#
-#	If the operands are signed, make them unsigned and save the	#
-# sign info for later. Perform the multiplication using 16x16->32	#
-# unsigned multiplies and "add" instructions. Store the high and low	#
-# portions of the result in the appropriate data registers on the	#
-# stack. Calculate the condition codes, also.				#
-#									#
-#########################################################################
-
-#############
-# mul(u,s)l #
-#############
-	global		_mul64
-_mul64:
-	mov.b		EXC_OPWORD+1(%a6), %d0	# extract src {mode,reg}
-	cmpi.b		%d0, &0x7		# is src mode Dn or other?
-	bgt.w		mul64_memop		# src is in memory
-
-# multiplier operand in the data register file.
-# must extract the register number and fetch the operand from the stack.
-mul64_regop:
-	andi.w		&0x7, %d0		# extract Dn
-	mov.l		(EXC_DREGS,%a6,%d0.w*4), %d3 # fetch multiplier
-
-# multiplier is in %d3. now, extract Dl and Dh fields and fetch the
-# multiplicand from the data register specified by Dl.
-mul64_multiplicand:
-	mov.w		EXC_EXTWORD(%a6), %d2	# fetch ext word
-	clr.w		%d1			# clear Dh reg
-	mov.b		%d2, %d1		# grab Dh
-	rol.w		&0x4, %d2		# align Dl byte
-	andi.w		&0x7, %d2		# extract Dl
-
-	mov.l		(EXC_DREGS,%a6,%d2.w*4), %d4 # get multiplicand
-
-# check for the case of "zero" result early
-	tst.l		%d4			# test multiplicand
-	beq.w		mul64_zero		# handle zero separately
-	tst.l		%d3			# test multiplier
-	beq.w		mul64_zero		# handle zero separately
-
-# multiplier is in %d3 and multiplicand is in %d4.
-# if the operation is to be signed, then the operands are converted
-# to unsigned and the result sign is saved for the end.
-	clr.b		EXC_TEMP(%a6)		# clear temp space
-	btst		&0x3, EXC_EXTWORD(%a6)	# signed or unsigned?
-	beq.b		mul64_alg		# unsigned; skip sgn calc
-
-	tst.l		%d3			# is multiplier negative?
-	bge.b		mul64_chk_md_sgn	# no
-	neg.l		%d3			# make multiplier positive
-	ori.b		&0x1, EXC_TEMP(%a6)	# save multiplier sgn
-
-# the result sign is the exclusive or of the operand sign bits.
-mul64_chk_md_sgn:
-	tst.l		%d4			# is multiplicand negative?
-	bge.b		mul64_alg		# no
-	neg.l		%d4			# make multiplicand positive
-	eori.b		&0x1, EXC_TEMP(%a6)	# calculate correct sign
-
-#########################################################################
-#	63			   32				0	#
-#	----------------------------					#
-#	| hi(mplier) * hi(mplicand)|					#
-#	----------------------------					#
-#		     -----------------------------			#
-#		     | hi(mplier) * lo(mplicand) |			#
-#		     -----------------------------			#
-#		     -----------------------------			#
-#		     | lo(mplier) * hi(mplicand) |			#
-#		     -----------------------------			#
-#	  |			   -----------------------------	#
-#	--|--			   | lo(mplier) * lo(mplicand) |	#
-#	  |			   -----------------------------	#
-#	========================================================	#
-#	--------------------------------------------------------	#
-#	|	hi(result)	   |	    lo(result)         |	#
-#	--------------------------------------------------------	#
-#########################################################################
-mul64_alg:
-# load temp registers with operands
-	mov.l		%d3, %d5		# mr in %d5
-	mov.l		%d3, %d6		# mr in %d6
-	mov.l		%d4, %d7		# md in %d7
-	swap		%d6			# hi(mr) in lo %d6
-	swap		%d7			# hi(md) in lo %d7
-
-# complete necessary multiplies:
-	mulu.w		%d4, %d3		# [1] lo(mr) * lo(md)
-	mulu.w		%d6, %d4		# [2] hi(mr) * lo(md)
-	mulu.w		%d7, %d5		# [3] lo(mr) * hi(md)
-	mulu.w		%d7, %d6		# [4] hi(mr) * hi(md)
-
-# add lo portions of [2],[3] to hi portion of [1].
-# add carries produced from these adds to [4].
-# lo([1]) is the final lo 16 bits of the result.
-	clr.l		%d7			# load %d7 w/ zero value
-	swap		%d3			# hi([1]) <==> lo([1])
-	add.w		%d4, %d3		# hi([1]) + lo([2])
-	addx.l		%d7, %d6		#    [4]  + carry
-	add.w		%d5, %d3		# hi([1]) + lo([3])
-	addx.l		%d7, %d6		#    [4]  + carry
-	swap		%d3			# lo([1]) <==> hi([1])
-
-# lo portions of [2],[3] have been added in to final result.
-# now, clear lo, put hi in lo reg, and add to [4]
-	clr.w		%d4			# clear lo([2])
-	clr.w		%d5			# clear hi([3])
-	swap		%d4			# hi([2]) in lo %d4
-	swap		%d5			# hi([3]) in lo %d5
-	add.l		%d5, %d4		#    [4]  + hi([2])
-	add.l		%d6, %d4		#    [4]  + hi([3])
-
-# unsigned result is now in {%d4,%d3}
-	tst.b		EXC_TEMP(%a6)		# should result be signed?
-	beq.b		mul64_done		# no
-
-# result should be a signed negative number.
-# compute 2's complement of the unsigned number:
-#   -negate all bits and add 1
-mul64_neg:
-	not.l		%d3			# negate lo(result) bits
-	not.l		%d4			# negate hi(result) bits
-	addq.l		&1, %d3			# add 1 to lo(result)
-	addx.l		%d7, %d4		# add carry to hi(result)
-
-# the result is saved to the register file.
-# for '040 compatibility, if Dl == Dh then only the hi(result) is
-# saved. so, saving hi after lo accomplishes this without need to
-# check Dl,Dh equality.
-mul64_done:
-	mov.l		%d3, (EXC_DREGS,%a6,%d2.w*4) # save lo(result)
-	mov.w		&0x0, %cc
-	mov.l		%d4, (EXC_DREGS,%a6,%d1.w*4) # save hi(result)
-
-# now, grab the condition codes. only one that can be set is 'N'.
-# 'N' CAN be set if the operation is unsigned if bit 63 is set.
-	mov.w		%cc, %d7		# fetch %ccr to see if 'N' set
-	andi.b		&0x8, %d7		# extract 'N' bit
-
-mul64_ccode_set:
-	mov.b		EXC_CC+1(%a6), %d6	# fetch previous %ccr
-	andi.b		&0x10, %d6		# all but 'X' bit changes
-
-	or.b		%d7, %d6		# group 'X' and 'N'
-	mov.b		%d6, EXC_CC+1(%a6)	# save new %ccr
-
-	rts
-
-# one or both of the operands is zero so the result is also zero.
-# save the zero result to the register file and set the 'Z' ccode bit.
-mul64_zero:
-	clr.l		(EXC_DREGS,%a6,%d2.w*4) # save lo(result)
-	clr.l		(EXC_DREGS,%a6,%d1.w*4) # save hi(result)
-
-	movq.l		&0x4, %d7		# set 'Z' ccode bit
-	bra.b		mul64_ccode_set		# finish ccode set
-
-##########
-
-# multiplier operand is in memory at the effective address.
-# must calculate the <ea> and go fetch the 32-bit operand.
-mul64_memop:
-	movq.l		&LONG, %d0		# pass # of bytes
-	bsr.l		_calc_ea		# calculate <ea>
-
-	cmpi.b		SPCOND_FLG(%a6),&immed_flg # immediate addressing mode?
-	beq.b		mul64_immed		# yes
-
-	mov.l		%a0,%a2
-	bsr.l		_dmem_read_long		# fetch src from addr (%a0)
-
-	tst.l		%d1			# dfetch error?
-	bne.w		mul64_err		# yes
-
-	mov.l		%d0, %d3		# store multiplier in %d3
-
-	bra.w		mul64_multiplicand
-
-# we have to split out immediate data here because it must be read using
-# imem_read() instead of dmem_read(). this becomes especially important
-# if the fetch runs into some deadly fault.
-mul64_immed:
-	addq.l		&0x4,EXC_EXTWPTR(%a6)
-	bsr.l		_imem_read_long		# read immediate value
-
-	tst.l		%d1			# ifetch error?
-	bne.l		isp_iacc		# yes
-
-	mov.l		%d0,%d3
-	bra.w		mul64_multiplicand
-
-##########
-
-# if dmem_read_long() returns a fail message in d1, the package
-# must create an access error frame. here, we pass a skeleton fslw
-# and the failing address to the routine that creates the new frame.
-# also, we call isp_restore in case the effective addressing mode was
-# (an)+ or -(an) in which case the previous "an" value must be restored.
-# FSLW:
-#	read = true
-#	size = longword
-#	TM = data
-#	software emulation error = true
-mul64_err:
-	bsr.l		isp_restore		# restore addr reg
-	mov.l		%a2,%a0			# pass failing address
-	mov.l		&0x01010001,%d0		# pass fslw
-	bra.l		isp_dacc
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_compandset2(): routine to emulate cas2()			#
-#			(internal to package)				#
-#									#
-#	_isp_cas2_finish(): store ccodes, store compare regs		#
-#			    (external to package)			#
-#									#
-# XREF ****************************************************************	#
-#	_real_lock_page() - "callout" to lock op's page from page-outs	#
-#	_cas_terminate2() - access error exit				#
-#	_real_cas2() - "callout" to core cas2 emulation code		#
-#	_real_unlock_page() - "callout" to unlock page			#
-#									#
-# INPUT ***************************************************************	#
-# _compandset2():							#
-#	d0 = instruction extension word					#
-#									#
-# _isp_cas2_finish():							#
-#	see cas2 core emulation code					#
-#									#
-# OUTPUT **************************************************************	#
-# _compandset2():							#
-#	see cas2 core emulation code					#
-#									#
-# _isp_cas_finish():							#
-#	None (register file or memroy changed as appropriate)		#
-#									#
-# ALGORITHM ***********************************************************	#
-# compandset2():							#
-#	Decode the instruction and fetch the appropriate Update and	#
-# Compare operands. Then call the "callout" _real_lock_page() for each	#
-# memory operand address so that the operating system can keep these	#
-# pages from being paged out. If either _real_lock_page() fails, exit	#
-# through _cas_terminate2(). Don't forget to unlock the 1st locked page	#
-# using _real_unlock_paged() if the 2nd lock-page fails.		#
-# Finally, branch to the core cas2 emulation code by calling the	#
-# "callout" _real_cas2().						#
-#									#
-# _isp_cas2_finish():							#
-#	Re-perform the comparison so we can determine the condition	#
-# codes which were too much trouble to keep around during the locked	#
-# emulation. Then unlock each operands page by calling the "callout"	#
-# _real_unlock_page().							#
-#									#
-#########################################################################
-
-set ADDR1,	EXC_TEMP+0xc
-set ADDR2,	EXC_TEMP+0x0
-set DC2,	EXC_TEMP+0xa
-set DC1,	EXC_TEMP+0x8
-
-	global		_compandset2
-_compandset2:
-	mov.l		%d0,EXC_TEMP+0x4(%a6)		# store for possible restart
-	mov.l		%d0,%d1			# extension word in d0
-
-	rol.w		&0x4,%d0
-	andi.w		&0xf,%d0		# extract Rn2
-	mov.l		(EXC_DREGS,%a6,%d0.w*4),%a1 # fetch ADDR2
-	mov.l		%a1,ADDR2(%a6)
-
-	mov.l		%d1,%d0
-
-	lsr.w		&0x6,%d1
-	andi.w		&0x7,%d1		# extract Du2
-	mov.l		(EXC_DREGS,%a6,%d1.w*4),%d5 # fetch Update2 Op
-
-	andi.w		&0x7,%d0		# extract Dc2
-	mov.l		(EXC_DREGS,%a6,%d0.w*4),%d3 # fetch Compare2 Op
-	mov.w		%d0,DC2(%a6)
-
-	mov.w		EXC_EXTWORD(%a6),%d0
-	mov.l		%d0,%d1
-
-	rol.w		&0x4,%d0
-	andi.w		&0xf,%d0		# extract Rn1
-	mov.l		(EXC_DREGS,%a6,%d0.w*4),%a0 # fetch ADDR1
-	mov.l		%a0,ADDR1(%a6)
-
-	mov.l		%d1,%d0
-
-	lsr.w		&0x6,%d1
-	andi.w		&0x7,%d1		# extract Du1
-	mov.l		(EXC_DREGS,%a6,%d1.w*4),%d4 # fetch Update1 Op
-
-	andi.w		&0x7,%d0		# extract Dc1
-	mov.l		(EXC_DREGS,%a6,%d0.w*4),%d2 # fetch Compare1 Op
-	mov.w		%d0,DC1(%a6)
-
-	btst		&0x1,EXC_OPWORD(%a6)	# word or long?
-	sne		%d7
-
-	btst		&0x5,EXC_ISR(%a6)	# user or supervisor?
-	sne		%d6
-
-	mov.l		%a0,%a2
-	mov.l		%a1,%a3
-
-	mov.l		%d7,%d1			# pass size
-	mov.l		%d6,%d0			# pass mode
-	bsr.l		_real_lock_page		# lock page
-	mov.l		%a2,%a0
-	tst.l		%d0			# error?
-	bne.l		_cas_terminate2		# yes
-
-	mov.l		%d7,%d1			# pass size
-	mov.l		%d6,%d0			# pass mode
-	mov.l		%a3,%a0			# pass addr
-	bsr.l		_real_lock_page		# lock page
-	mov.l		%a3,%a0
-	tst.l		%d0			# error?
-	bne.b		cas_preterm		# yes
-
-	mov.l		%a2,%a0
-	mov.l		%a3,%a1
-
-	bra.l		_real_cas2
-
-# if the 2nd lock attempt fails, then we must still unlock the
-# first page(s).
-cas_preterm:
-	mov.l		%d0,-(%sp)		# save FSLW
-	mov.l		%d7,%d1			# pass size
-	mov.l		%d6,%d0			# pass mode
-	mov.l		%a2,%a0			# pass ADDR1
-	bsr.l		_real_unlock_page	# unlock first page(s)
-	mov.l		(%sp)+,%d0		# restore FSLW
-	mov.l		%a3,%a0			# pass failing addr
-	bra.l		_cas_terminate2
-
-#############################################################
-
-	global		_isp_cas2_finish
-_isp_cas2_finish:
-	btst		&0x1,EXC_OPWORD(%a6)
-	bne.b		cas2_finish_l
-
-	mov.w		EXC_CC(%a6),%cc		# load old ccodes
-	cmp.w		%d0,%d2
-	bne.b		cas2_finish_w_save
-	cmp.w		%d1,%d3
-cas2_finish_w_save:
-	mov.w		%cc,EXC_CC(%a6)		# save new ccodes
-
-	tst.b		%d4			# update compare reg?
-	bne.b		cas2_finish_w_done	# no
-
-	mov.w		DC2(%a6),%d3		# fetch Dc2
-	mov.w		%d1,(2+EXC_DREGS,%a6,%d3.w*4) # store new Compare2 Op
-
-	mov.w		DC1(%a6),%d2		# fetch Dc1
-	mov.w		%d0,(2+EXC_DREGS,%a6,%d2.w*4) # store new Compare1 Op
-
-cas2_finish_w_done:
-	btst		&0x5,EXC_ISR(%a6)
-	sne		%d2
-	mov.l		%d2,%d0			# pass mode
-	sf		%d1			# pass size
-	mov.l		ADDR1(%a6),%a0		# pass ADDR1
-	bsr.l		_real_unlock_page	# unlock page
-
-	mov.l		%d2,%d0			# pass mode
-	sf		%d1			# pass size
-	mov.l		ADDR2(%a6),%a0		# pass ADDR2
-	bsr.l		_real_unlock_page	# unlock page
-	rts
-
-cas2_finish_l:
-	mov.w		EXC_CC(%a6),%cc		# load old ccodes
-	cmp.l		%d0,%d2
-	bne.b		cas2_finish_l_save
-	cmp.l		%d1,%d3
-cas2_finish_l_save:
-	mov.w		%cc,EXC_CC(%a6)		# save new ccodes
-
-	tst.b		%d4			# update compare reg?
-	bne.b		cas2_finish_l_done	# no
-
-	mov.w		DC2(%a6),%d3		# fetch Dc2
-	mov.l		%d1,(EXC_DREGS,%a6,%d3.w*4) # store new Compare2 Op
-
-	mov.w		DC1(%a6),%d2		# fetch Dc1
-	mov.l		%d0,(EXC_DREGS,%a6,%d2.w*4) # store new Compare1 Op
-
-cas2_finish_l_done:
-	btst		&0x5,EXC_ISR(%a6)
-	sne		%d2
-	mov.l		%d2,%d0			# pass mode
-	st		%d1			# pass size
-	mov.l		ADDR1(%a6),%a0		# pass ADDR1
-	bsr.l		_real_unlock_page	# unlock page
-
-	mov.l		%d2,%d0			# pass mode
-	st		%d1			# pass size
-	mov.l		ADDR2(%a6),%a0		# pass ADDR2
-	bsr.l		_real_unlock_page	# unlock page
-	rts
-
-########
-	global		cr_cas2
-cr_cas2:
-	mov.l		EXC_TEMP+0x4(%a6),%d0
-	bra.w		_compandset2
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_compandset(): routine to emulate cas w/ misaligned <ea>	#
-#		       (internal to package)				#
-#	_isp_cas_finish(): routine called when cas emulation completes	#
-#			   (external and internal to package)		#
-#	_isp_cas_restart(): restart cas emulation after a fault		#
-#			    (external to package)			#
-#	_isp_cas_terminate(): create access error stack frame on fault	#
-#			      (external and internal to package)	#
-#	_isp_cas_inrange(): checks whether instr addess is within range	#
-#			    of core cas/cas2emulation code		#
-#			    (external to package)			#
-#									#
-# XREF ****************************************************************	#
-#	_calc_ea(): calculate effective address				#
-#									#
-# INPUT ***************************************************************	#
-# compandset():								#
-#	none								#
-# _isp_cas_restart():							#
-#	d6 = previous sfc/dfc						#
-# _isp_cas_finish():							#
-# _isp_cas_terminate():							#
-#	a0 = failing address						#
-#	d0 = FSLW							#
-#	d6 = previous sfc/dfc						#
-# _isp_cas_inrange():							#
-#	a0 = instruction address to be checked				#
-#									#
-# OUTPUT **************************************************************	#
-# compandset():								#
-#		none							#
-# _isp_cas_restart():							#
-#	a0 = effective address						#
-#	d7 = word or longword flag					#
-# _isp_cas_finish():							#
-#	a0 = effective address						#
-# _isp_cas_terminate():							#
-#	initial register set before emulation exception			#
-# _isp_cas_inrange():							#
-#	d0 = 0 => in range; -1 => out of range				#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-# compandset():								#
-#	First, calculate the effective address. Then, decode the	#
-# instruction word and fetch the "compare" (DC) and "update" (Du)	#
-# operands.								#
-#	Next, call the external routine _real_lock_page() so that the	#
-# operating system can keep this page from being paged out while we're	#
-# in this routine. If this call fails, jump to _cas_terminate2().	#
-#	The routine then branches to _real_cas(). This external routine	#
-# that actually emulates cas can be supplied by the external os or	#
-# made to point directly back into the 060ISP which has a routine for	#
-# this purpose.								#
-#									#
-# _isp_cas_finish():							#
-#	Either way, after emulation, the package is re-entered at	#
-# _isp_cas_finish(). This routine re-compares the operands in order to	#
-# set the condition codes. Finally, these routines will call		#
-# _real_unlock_page() in order to unlock the pages that were previously	#
-# locked.								#
-#									#
-# _isp_cas_restart():							#
-#	This routine can be entered from an access error handler where	#
-# the emulation sequence should be re-started from the beginning.	#
-#									#
-# _isp_cas_terminate():							#
-#	This routine can be entered from an access error handler where	#
-# an emulation operand access failed and the operating system would	#
-# like an access error stack frame created instead of the current	#
-# unimplemented integer instruction frame.				#
-#	Also, the package enters here if a call to _real_lock_page()	#
-# fails.								#
-#									#
-# _isp_cas_inrange():							#
-#	Checks to see whether the instruction address passed to it in	#
-# a0 is within the software package cas/cas2 emulation routines. This	#
-# can be helpful for an operating system to determine whether an access	#
-# error during emulation was due to a cas/cas2 emulation access.	#
-#									#
-#########################################################################
-
-set DC,		EXC_TEMP+0x8
-set ADDR,	EXC_TEMP+0x4
-
-	global		_compandset
-_compandset:
-	btst		&0x1,EXC_OPWORD(%a6)	# word or long operation?
-	bne.b		compandsetl		# long
-
-compandsetw:
-	movq.l		&0x2,%d0		# size = 2 bytes
-	bsr.l		_calc_ea		# a0 = calculated <ea>
-	mov.l		%a0,ADDR(%a6)		# save <ea> for possible restart
-	sf		%d7			# clear d7 for word size
-	bra.b		compandsetfetch
-
-compandsetl:
-	movq.l		&0x4,%d0		# size = 4 bytes
-	bsr.l		_calc_ea		# a0 = calculated <ea>
-	mov.l		%a0,ADDR(%a6)		# save <ea> for possible restart
-	st		%d7			# set d7 for longword size
-
-compandsetfetch:
-	mov.w		EXC_EXTWORD(%a6),%d0	# fetch cas extension word
-	mov.l		%d0,%d1			# make a copy
-
-	lsr.w		&0x6,%d0
-	andi.w		&0x7,%d0		# extract Du
-	mov.l		(EXC_DREGS,%a6,%d0.w*4),%d2 # get update operand
-
-	andi.w		&0x7,%d1		# extract Dc
-	mov.l		(EXC_DREGS,%a6,%d1.w*4),%d4 # get compare operand
-	mov.w		%d1,DC(%a6)		# save Dc
-
-	btst		&0x5,EXC_ISR(%a6)	# which mode for exception?
-	sne		%d6			# set on supervisor mode
-
-	mov.l		%a0,%a2			# save temporarily
-	mov.l		%d7,%d1			# pass size
-	mov.l		%d6,%d0			# pass mode
-	bsr.l		_real_lock_page		# lock page
-	tst.l		%d0			# did error occur?
-	bne.w		_cas_terminate2		# yes, clean up the mess
-	mov.l		%a2,%a0			# pass addr in a0
-
-	bra.l		_real_cas
-
-########
-	global		_isp_cas_finish
-_isp_cas_finish:
-	btst		&0x1,EXC_OPWORD(%a6)
-	bne.b		cas_finish_l
-
-# just do the compare again since it's faster than saving the ccodes
-# from the locked routine...
-cas_finish_w:
-	mov.w		EXC_CC(%a6),%cc		# restore cc
-	cmp.w		%d0,%d4			# do word compare
-	mov.w		%cc,EXC_CC(%a6)		# save cc
-
-	tst.b		%d1			# update compare reg?
-	bne.b		cas_finish_w_done	# no
-
-	mov.w		DC(%a6),%d3
-	mov.w		%d0,(EXC_DREGS+2,%a6,%d3.w*4) # Dc = destination
-
-cas_finish_w_done:
-	mov.l		ADDR(%a6),%a0		# pass addr
-	sf		%d1			# pass size
-	btst		&0x5,EXC_ISR(%a6)
-	sne		%d0			# pass mode
-	bsr.l		_real_unlock_page	# unlock page
-	rts
-
-# just do the compare again since it's faster than saving the ccodes
-# from the locked routine...
-cas_finish_l:
-	mov.w		EXC_CC(%a6),%cc		# restore cc
-	cmp.l		%d0,%d4			# do longword compare
-	mov.w		%cc,EXC_CC(%a6)		# save cc
-
-	tst.b		%d1			# update compare reg?
-	bne.b		cas_finish_l_done	# no
-
-	mov.w		DC(%a6),%d3
-	mov.l		%d0,(EXC_DREGS,%a6,%d3.w*4) # Dc = destination
-
-cas_finish_l_done:
-	mov.l		ADDR(%a6),%a0		# pass addr
-	st		%d1			# pass size
-	btst		&0x5,EXC_ISR(%a6)
-	sne		%d0			# pass mode
-	bsr.l		_real_unlock_page	# unlock page
-	rts
-
-########
-
-	global		_isp_cas_restart
-_isp_cas_restart:
-	mov.l		%d6,%sfc		# restore previous sfc
-	mov.l		%d6,%dfc		# restore previous dfc
-
-	cmpi.b		EXC_OPWORD+1(%a6),&0xfc	# cas or cas2?
-	beq.l		cr_cas2			# cas2
-cr_cas:
-	mov.l		ADDR(%a6),%a0		# load <ea>
-	btst		&0x1,EXC_OPWORD(%a6)	# word or long operation?
-	sne		%d7			# set d7 accordingly
-	bra.w		compandsetfetch
-
-########
-
-# At this stage, it would be nice if d0 held the FSLW.
-	global		_isp_cas_terminate
-_isp_cas_terminate:
-	mov.l		%d6,%sfc		# restore previous sfc
-	mov.l		%d6,%dfc		# restore previous dfc
-
-	global		_cas_terminate2
-_cas_terminate2:
-	mov.l		%a0,%a2			# copy failing addr to a2
-
-	mov.l		%d0,-(%sp)
-	bsr.l		isp_restore		# restore An (if ()+ or -())
-	mov.l		(%sp)+,%d0
-
-	addq.l		&0x4,%sp		# remove sub return addr
-	subq.l		&0x8,%sp		# make room for bigger stack
-	subq.l		&0x8,%a6		# shift frame ptr down, too
-	mov.l		&26,%d1			# want to move 51 longwords
-	lea		0x8(%sp),%a0		# get address of old stack
-	lea		0x0(%sp),%a1		# get address of new stack
-cas_term_cont:
-	mov.l		(%a0)+,(%a1)+		# move a longword
-	dbra.w		%d1,cas_term_cont	# keep going
-
-	mov.w		&0x4008,EXC_IVOFF(%a6)	# put new stk fmt, voff
-	mov.l		%a2,EXC_IVOFF+0x2(%a6)	# put faulting addr on stack
-	mov.l		%d0,EXC_IVOFF+0x6(%a6)	# put FSLW on stack
-	movm.l		EXC_DREGS(%a6),&0x3fff	# restore user regs
-	unlk		%a6			# unlink stack frame
-	bra.l		_real_access
-
-########
-
-	global		_isp_cas_inrange
-_isp_cas_inrange:
-	clr.l		%d0			# clear return result
-	lea		_CASHI(%pc),%a1		# load end of CAS core code
-	cmp.l		%a1,%a0			# is PC in range?
-	blt.b		cin_no			# no
-	lea		_CASLO(%pc),%a1		# load begin of CAS core code
-	cmp.l		%a0,%a1			# is PC in range?
-	blt.b		cin_no			# no
-	rts					# yes; return d0 = 0
-cin_no:
-	mov.l		&-0x1,%d0		# out of range; return d0 = -1
-	rts
-
-#################################################################
-#################################################################
-#################################################################
-# This is the start of the cas and cas2 "core" emulation code.	#
-# This is the section that may need to be replaced by the host	#
-# OS if it is too operating system-specific.			#
-# Please refer to the package documentation to see how to	#
-# "replace" this section, if necessary.				#
-#################################################################
-#################################################################
-#################################################################
-
-#       ######      ##      ######     ####
-#       #	   #  #     #         #    #
-#	#	  ######    ######        #
-#	#	  #    #         #      #
-#       ######    #    #    ######    ######
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_isp_cas2(): "core" emulation code for the cas2 instruction	#
-#									#
-# XREF ****************************************************************	#
-#	_isp_cas2_finish() - only exit point for this emulation code;	#
-#			     do clean-up; calculate ccodes; store	#
-#			     Compare Ops if appropriate.		#
-#									#
-# INPUT ***************************************************************	#
-#	*see chart below*						#
-#									#
-# OUTPUT **************************************************************	#
-#	*see chart below*						#
-#									#
-# ALGORITHM ***********************************************************	#
-#	(1) Make several copies of the effective address.		#
-#	(2) Save current SR; Then mask off all maskable interrupts.	#
-#	(3) Save current SFC/DFC (ASSUMED TO BE EQUAL!!!); Then set	#
-#	    according to whether exception occurred in user or		#
-#	    supervisor mode.						#
-#	(4) Use "plpaw" instruction to pre-load ATC with effective	#
-#	    address pages(s). THIS SHOULD NOT FAULT!!! The relevant	#
-#	    page(s) should have already been made resident prior to	#
-#	    entering this routine.					#
-#	(5) Push the operand lines from the cache w/ "cpushl".		#
-#	    In the 68040, this was done within the locked region. In	#
-#	    the 68060, it is done outside of the locked region.		#
-#	(6) Use "plpar" instruction to do a re-load of ATC entries for	#
-#	    ADDR1 since ADDR2 entries may have pushed ADDR1 out of the	#
-#	    ATC.							#
-#	(7) Pre-fetch the core emulation instructions by executing	#
-#	    one branch within each physical line (16 bytes) of the code	#
-#	    before actually executing the code.				#
-#	(8) Load the BUSCR w/ the bus lock value.			#
-#	(9) Fetch the source operands using "moves".			#
-#	(10)Do the compares. If both equal, go to step (13).		#
-#	(11)Unequal. No update occurs. But, we do write the DST1 op	#
-#	    back to itself (as w/ the '040) so we can gracefully unlock	#
-#	    the bus (and assert LOCKE*) using BUSCR and the final move.	#
-#	(12)Exit.							#
-#	(13)Write update operand to the DST locations. Use BUSCR to	#
-#	    assert LOCKE* for the final write operation.		#
-#	(14)Exit.							#
-#									#
-#	The algorithm is actually implemented slightly differently	#
-# depending on the size of the operation and the misalignment of the	#
-# operands. A misaligned operand must be written in aligned chunks or	#
-# else the BUSCR register control gets confused.			#
-#									#
-#########################################################################
-
-#################################################################
-# THIS IS THE STATE OF THE INTEGER REGISTER FILE UPON		#
-# ENTERING _isp_cas2().						#
-#								#
-# D0 = xxxxxxxx							#
-# D1 = xxxxxxxx							#
-# D2 = cmp operand 1						#
-# D3 = cmp operand 2						#
-# D4 = update oper 1						#
-# D5 = update oper 2						#
-# D6 = 'xxxxxxff if supervisor mode; 'xxxxxx00 if user mode	#
-# D7 = 'xxxxxxff if longword operation; 'xxxxxx00 if word	#
-# A0 = ADDR1							#
-# A1 = ADDR2							#
-# A2 = xxxxxxxx							#
-# A3 = xxxxxxxx							#
-# A4 = xxxxxxxx							#
-# A5 = xxxxxxxx							#
-# A6 = frame pointer						#
-# A7 = stack pointer						#
-#################################################################
-
-#	align		0x1000
-# beginning label used by _isp_cas_inrange()
-	global		_CASLO
-_CASLO:
-
-	global		_isp_cas2
-_isp_cas2:
-	tst.b		%d6			# user or supervisor mode?
-	bne.b		cas2_supervisor		# supervisor
-cas2_user:
-	movq.l		&0x1,%d0		# load user data fc
-	bra.b		cas2_cont
-cas2_supervisor:
-	movq.l		&0x5,%d0		# load supervisor data fc
-cas2_cont:
-	tst.b		%d7			# word or longword?
-	beq.w		cas2w			# word
-
-####
-cas2l:
-	mov.l		%a0,%a2			# copy ADDR1
-	mov.l		%a1,%a3			# copy ADDR2
-	mov.l		%a0,%a4			# copy ADDR1
-	mov.l		%a1,%a5			# copy ADDR2
-
-	addq.l		&0x3,%a4		# ADDR1+3
-	addq.l		&0x3,%a5		# ADDR2+3
-	mov.l		%a2,%d1			# ADDR1
-
-# mask interrupts levels 0-6. save old mask value.
-	mov.w		%sr,%d7			# save current SR
-	ori.w		&0x0700,%sr		# inhibit interrupts
-
-# load the SFC and DFC with the appropriate mode.
-	movc		%sfc,%d6		# save old SFC/DFC
-	movc		%d0,%sfc		# store new SFC
-	movc		%d0,%dfc		# store new DFC
-
-# pre-load the operand ATC. no page faults should occur here because
-# _real_lock_page() should have taken care of this.
-	plpaw		(%a2)			# load atc for ADDR1
-	plpaw		(%a4)			# load atc for ADDR1+3
-	plpaw		(%a3)			# load atc for ADDR2
-	plpaw		(%a5)			# load atc for ADDR2+3
-
-# push the operand lines from the cache if they exist.
-	cpushl		%dc,(%a2)		# push line for ADDR1
-	cpushl		%dc,(%a4)		# push line for ADDR1+3
-	cpushl		%dc,(%a3)		# push line for ADDR2
-	cpushl		%dc,(%a5)		# push line for ADDR2+2
-
-	mov.l		%d1,%a2			# ADDR1
-	addq.l		&0x3,%d1
-	mov.l		%d1,%a4			# ADDR1+3
-# if ADDR1 was ATC resident before the above "plpaw" and was executed
-# and it was the next entry scheduled for replacement and ADDR2
-# shares the same set, then the "plpaw" for ADDR2 can push the ADDR1
-# entries from the ATC. so, we do a second set of "plpa"s.
-	plpar		(%a2)			# load atc for ADDR1
-	plpar		(%a4)			# load atc for ADDR1+3
-
-# load the BUSCR values.
-	mov.l		&0x80000000,%a2		# assert LOCK* buscr value
-	mov.l		&0xa0000000,%a3		# assert LOCKE* buscr value
-	mov.l		&0x00000000,%a4		# buscr unlock value
-
-# there are three possible mis-aligned cases for longword cas. they
-# are separated because the final write which asserts LOCKE* must
-# be aligned.
-	mov.l		%a0,%d0			# is ADDR1 misaligned?
-	andi.b		&0x3,%d0
-	beq.b		CAS2L_ENTER		# no
-	cmpi.b		%d0,&0x2
-	beq.w		CAS2L2_ENTER		# yes; word misaligned
-	bra.w		CAS2L3_ENTER		# yes; byte misaligned
-
-#
-# D0 = dst operand 1 <-
-# D1 = dst operand 2 <-
-# D2 = cmp operand 1
-# D3 = cmp operand 2
-# D4 = update oper 1
-# D5 = update oper 2
-# D6 = old SFC/DFC
-# D7 = old SR
-# A0 = ADDR1
-# A1 = ADDR2
-# A2 = bus LOCK*  value
-# A3 = bus LOCKE* value
-# A4 = bus unlock value
-# A5 = xxxxxxxx
-#
-	align		0x10
-CAS2L_START:
-	movc		%a2,%buscr		# assert LOCK*
-	movs.l		(%a1),%d1		# fetch Dest2[31:0]
-	movs.l		(%a0),%d0		# fetch Dest1[31:0]
-	bra.b		CAS2L_CONT
-CAS2L_ENTER:
-	bra.b		~+16
-
-CAS2L_CONT:
-	cmp.l		%d0,%d2			# Dest1 - Compare1
-	bne.b		CAS2L_NOUPDATE
-	cmp.l		%d1,%d3			# Dest2 - Compare2
-	bne.b		CAS2L_NOUPDATE
-	movs.l		%d5,(%a1)		# Update2[31:0] -> DEST2
-	bra.b		CAS2L_UPDATE
-	bra.b		~+16
-
-CAS2L_UPDATE:
-	movc		%a3,%buscr		# assert LOCKE*
-	movs.l		%d4,(%a0)		# Update1[31:0] -> DEST1
-	movc		%a4,%buscr		# unlock the bus
-	bra.b		cas2l_update_done
-	bra.b		~+16
-
-CAS2L_NOUPDATE:
-	movc		%a3,%buscr		# assert LOCKE*
-	movs.l		%d0,(%a0)		# Dest1[31:0] -> DEST1
-	movc		%a4,%buscr		# unlock the bus
-	bra.b		cas2l_noupdate_done
-	bra.b		~+16
-
-CAS2L_FILLER:
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	bra.b		CAS2L_START
-
-####
-
-#################################################################
-# THIS MUST BE THE STATE OF THE INTEGER REGISTER FILE UPON	#
-# ENTERING _isp_cas2().						#
-#								#
-# D0 = destination[31:0] operand 1				#
-# D1 = destination[31:0] operand 2				#
-# D2 = cmp[31:0] operand 1					#
-# D3 = cmp[31:0] operand 2					#
-# D4 = 'xxxxxx11 -> no reg update; 'xxxxxx00 -> update required	#
-# D5 = xxxxxxxx							#
-# D6 = xxxxxxxx							#
-# D7 = xxxxxxxx							#
-# A0 = xxxxxxxx							#
-# A1 = xxxxxxxx							#
-# A2 = xxxxxxxx							#
-# A3 = xxxxxxxx							#
-# A4 = xxxxxxxx							#
-# A5 = xxxxxxxx							#
-# A6 = frame pointer						#
-# A7 = stack pointer						#
-#################################################################
-
-cas2l_noupdate_done:
-
-# restore previous SFC/DFC value.
-	movc		%d6,%sfc		# restore old SFC
-	movc		%d6,%dfc		# restore old DFC
-
-# restore previous interrupt mask level.
-	mov.w		%d7,%sr			# restore old SR
-
-	sf		%d4			# indicate no update was done
-	bra.l		_isp_cas2_finish
-
-cas2l_update_done:
-
-# restore previous SFC/DFC value.
-	movc		%d6,%sfc		# restore old SFC
-	movc		%d6,%dfc		# restore old DFC
-
-# restore previous interrupt mask level.
-	mov.w		%d7,%sr			# restore old SR
-
-	st		%d4			# indicate update was done
-	bra.l		_isp_cas2_finish
-####
-
-	align		0x10
-CAS2L2_START:
-	movc		%a2,%buscr		# assert LOCK*
-	movs.l		(%a1),%d1		# fetch Dest2[31:0]
-	movs.l		(%a0),%d0		# fetch Dest1[31:0]
-	bra.b		CAS2L2_CONT
-CAS2L2_ENTER:
-	bra.b		~+16
-
-CAS2L2_CONT:
-	cmp.l		%d0,%d2			# Dest1 - Compare1
-	bne.b		CAS2L2_NOUPDATE
-	cmp.l		%d1,%d3			# Dest2 - Compare2
-	bne.b		CAS2L2_NOUPDATE
-	movs.l		%d5,(%a1)		# Update2[31:0] -> Dest2
-	bra.b		CAS2L2_UPDATE
-	bra.b		~+16
-
-CAS2L2_UPDATE:
-	swap		%d4			# get Update1[31:16]
-	movs.w		%d4,(%a0)+		# Update1[31:16] -> DEST1
-	movc		%a3,%buscr		# assert LOCKE*
-	swap		%d4			# get Update1[15:0]
-	bra.b		CAS2L2_UPDATE2
-	bra.b		~+16
-
-CAS2L2_UPDATE2:
-	movs.w		%d4,(%a0)		# Update1[15:0] -> DEST1+0x2
-	movc		%a4,%buscr		# unlock the bus
-	bra.w		cas2l_update_done
-	nop
-	bra.b		~+16
-
-CAS2L2_NOUPDATE:
-	swap		%d0			# get Dest1[31:16]
-	movs.w		%d0,(%a0)+		# Dest1[31:16] -> DEST1
-	movc		%a3,%buscr		# assert LOCKE*
-	swap		%d0			# get Dest1[15:0]
-	bra.b		CAS2L2_NOUPDATE2
-	bra.b		~+16
-
-CAS2L2_NOUPDATE2:
-	movs.w		%d0,(%a0)		# Dest1[15:0] -> DEST1+0x2
-	movc		%a4,%buscr		# unlock the bus
-	bra.w		cas2l_noupdate_done
-	nop
-	bra.b		~+16
-
-CAS2L2_FILLER:
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	bra.b		CAS2L2_START
-
-#################################
-
-	align		0x10
-CAS2L3_START:
-	movc		%a2,%buscr		# assert LOCK*
-	movs.l		(%a1),%d1		# fetch Dest2[31:0]
-	movs.l		(%a0),%d0		# fetch Dest1[31:0]
-	bra.b		CAS2L3_CONT
-CAS2L3_ENTER:
-	bra.b		~+16
-
-CAS2L3_CONT:
-	cmp.l		%d0,%d2			# Dest1 - Compare1
-	bne.b		CAS2L3_NOUPDATE
-	cmp.l		%d1,%d3			# Dest2 - Compare2
-	bne.b		CAS2L3_NOUPDATE
-	movs.l		%d5,(%a1)		# Update2[31:0] -> DEST2
-	bra.b		CAS2L3_UPDATE
-	bra.b		~+16
-
-CAS2L3_UPDATE:
-	rol.l		&0x8,%d4		# get Update1[31:24]
-	movs.b		%d4,(%a0)+		# Update1[31:24] -> DEST1
-	swap		%d4			# get Update1[23:8]
-	movs.w		%d4,(%a0)+		# Update1[23:8] -> DEST1+0x1
-	bra.b		CAS2L3_UPDATE2
-	bra.b		~+16
-
-CAS2L3_UPDATE2:
-	rol.l		&0x8,%d4		# get Update1[7:0]
-	movc		%a3,%buscr		# assert LOCKE*
-	movs.b		%d4,(%a0)		# Update1[7:0] -> DEST1+0x3
-	bra.b		CAS2L3_UPDATE3
-	nop
-	bra.b		~+16
-
-CAS2L3_UPDATE3:
-	movc		%a4,%buscr		# unlock the bus
-	bra.w		cas2l_update_done
-	nop
-	nop
-	nop
-	bra.b		~+16
-
-CAS2L3_NOUPDATE:
-	rol.l		&0x8,%d0		# get Dest1[31:24]
-	movs.b		%d0,(%a0)+		# Dest1[31:24] -> DEST1
-	swap		%d0			# get Dest1[23:8]
-	movs.w		%d0,(%a0)+		# Dest1[23:8] -> DEST1+0x1
-	bra.b		CAS2L3_NOUPDATE2
-	bra.b		~+16
-
-CAS2L3_NOUPDATE2:
-	rol.l		&0x8,%d0		# get Dest1[7:0]
-	movc		%a3,%buscr		# assert LOCKE*
-	movs.b		%d0,(%a0)		# Update1[7:0] -> DEST1+0x3
-	bra.b		CAS2L3_NOUPDATE3
-	nop
-	bra.b		~+16
-
-CAS2L3_NOUPDATE3:
-	movc		%a4,%buscr		# unlock the bus
-	bra.w		cas2l_noupdate_done
-	nop
-	nop
-	nop
-	bra.b		~+14
-
-CAS2L3_FILLER:
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	bra.w		CAS2L3_START
-
-#############################################################
-#############################################################
-
-cas2w:
-	mov.l		%a0,%a2			# copy ADDR1
-	mov.l		%a1,%a3			# copy ADDR2
-	mov.l		%a0,%a4			# copy ADDR1
-	mov.l		%a1,%a5			# copy ADDR2
-
-	addq.l		&0x1,%a4		# ADDR1+1
-	addq.l		&0x1,%a5		# ADDR2+1
-	mov.l		%a2,%d1			# ADDR1
-
-# mask interrupt levels 0-6. save old mask value.
-	mov.w		%sr,%d7			# save current SR
-	ori.w		&0x0700,%sr		# inhibit interrupts
-
-# load the SFC and DFC with the appropriate mode.
-	movc		%sfc,%d6		# save old SFC/DFC
-	movc		%d0,%sfc		# store new SFC
-	movc		%d0,%dfc		# store new DFC
-
-# pre-load the operand ATC. no page faults should occur because
-# _real_lock_page() should have taken care of this.
-	plpaw		(%a2)			# load atc for ADDR1
-	plpaw		(%a4)			# load atc for ADDR1+1
-	plpaw		(%a3)			# load atc for ADDR2
-	plpaw		(%a5)			# load atc for ADDR2+1
-
-# push the operand cache lines from the cache if they exist.
-	cpushl		%dc,(%a2)		# push line for ADDR1
-	cpushl		%dc,(%a4)		# push line for ADDR1+1
-	cpushl		%dc,(%a3)		# push line for ADDR2
-	cpushl		%dc,(%a5)		# push line for ADDR2+1
-
-	mov.l		%d1,%a2			# ADDR1
-	addq.l		&0x3,%d1
-	mov.l		%d1,%a4			# ADDR1+3
-# if ADDR1 was ATC resident before the above "plpaw" and was executed
-# and it was the next entry scheduled for replacement and ADDR2
-# shares the same set, then the "plpaw" for ADDR2 can push the ADDR1
-# entries from the ATC. so, we do a second set of "plpa"s.
-	plpar		(%a2)			# load atc for ADDR1
-	plpar		(%a4)			# load atc for ADDR1+3
-
-# load the BUSCR values.
-	mov.l		&0x80000000,%a2		# assert LOCK* buscr value
-	mov.l		&0xa0000000,%a3		# assert LOCKE* buscr value
-	mov.l		&0x00000000,%a4		# buscr unlock value
-
-# there are two possible mis-aligned cases for word cas. they
-# are separated because the final write which asserts LOCKE* must
-# be aligned.
-	mov.l		%a0,%d0			# is ADDR1 misaligned?
-	btst		&0x0,%d0
-	bne.w		CAS2W2_ENTER		# yes
-	bra.b		CAS2W_ENTER		# no
-
-#
-# D0 = dst operand 1 <-
-# D1 = dst operand 2 <-
-# D2 = cmp operand 1
-# D3 = cmp operand 2
-# D4 = update oper 1
-# D5 = update oper 2
-# D6 = old SFC/DFC
-# D7 = old SR
-# A0 = ADDR1
-# A1 = ADDR2
-# A2 = bus LOCK*  value
-# A3 = bus LOCKE* value
-# A4 = bus unlock value
-# A5 = xxxxxxxx
-#
-	align		0x10
-CAS2W_START:
-	movc		%a2,%buscr		# assert LOCK*
-	movs.w		(%a1),%d1		# fetch Dest2[15:0]
-	movs.w		(%a0),%d0		# fetch Dest1[15:0]
-	bra.b		CAS2W_CONT2
-CAS2W_ENTER:
-	bra.b		~+16
-
-CAS2W_CONT2:
-	cmp.w		%d0,%d2			# Dest1 - Compare1
-	bne.b		CAS2W_NOUPDATE
-	cmp.w		%d1,%d3			# Dest2 - Compare2
-	bne.b		CAS2W_NOUPDATE
-	movs.w		%d5,(%a1)		# Update2[15:0] -> DEST2
-	bra.b		CAS2W_UPDATE
-	bra.b		~+16
-
-CAS2W_UPDATE:
-	movc		%a3,%buscr		# assert LOCKE*
-	movs.w		%d4,(%a0)		# Update1[15:0] -> DEST1
-	movc		%a4,%buscr		# unlock the bus
-	bra.b		cas2w_update_done
-	bra.b		~+16
-
-CAS2W_NOUPDATE:
-	movc		%a3,%buscr		# assert LOCKE*
-	movs.w		%d0,(%a0)		# Dest1[15:0] -> DEST1
-	movc		%a4,%buscr		# unlock the bus
-	bra.b		cas2w_noupdate_done
-	bra.b		~+16
-
-CAS2W_FILLER:
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	bra.b		CAS2W_START
-
-####
-
-#################################################################
-# THIS MUST BE THE STATE OF THE INTEGER REGISTER FILE UPON	#
-# ENTERING _isp_cas2().						#
-#								#
-# D0 = destination[15:0] operand 1				#
-# D1 = destination[15:0] operand 2				#
-# D2 = cmp[15:0] operand 1					#
-# D3 = cmp[15:0] operand 2					#
-# D4 = 'xxxxxx11 -> no reg update; 'xxxxxx00 -> update required	#
-# D5 = xxxxxxxx							#
-# D6 = xxxxxxxx							#
-# D7 = xxxxxxxx							#
-# A0 = xxxxxxxx							#
-# A1 = xxxxxxxx							#
-# A2 = xxxxxxxx							#
-# A3 = xxxxxxxx							#
-# A4 = xxxxxxxx							#
-# A5 = xxxxxxxx							#
-# A6 = frame pointer						#
-# A7 = stack pointer						#
-#################################################################
-
-cas2w_noupdate_done:
-
-# restore previous SFC/DFC value.
-	movc		%d6,%sfc		# restore old SFC
-	movc		%d6,%dfc		# restore old DFC
-
-# restore previous interrupt mask level.
-	mov.w		%d7,%sr			# restore old SR
-
-	sf		%d4			# indicate no update was done
-	bra.l		_isp_cas2_finish
-
-cas2w_update_done:
-
-# restore previous SFC/DFC value.
-	movc		%d6,%sfc		# restore old SFC
-	movc		%d6,%dfc		# restore old DFC
-
-# restore previous interrupt mask level.
-	mov.w		%d7,%sr			# restore old SR
-
-	st		%d4			# indicate update was done
-	bra.l		_isp_cas2_finish
-####
-
-	align		0x10
-CAS2W2_START:
-	movc		%a2,%buscr		# assert LOCK*
-	movs.w		(%a1),%d1		# fetch Dest2[15:0]
-	movs.w		(%a0),%d0		# fetch Dest1[15:0]
-	bra.b		CAS2W2_CONT2
-CAS2W2_ENTER:
-	bra.b		~+16
-
-CAS2W2_CONT2:
-	cmp.w		%d0,%d2			# Dest1 - Compare1
-	bne.b		CAS2W2_NOUPDATE
-	cmp.w		%d1,%d3			# Dest2 - Compare2
-	bne.b		CAS2W2_NOUPDATE
-	movs.w		%d5,(%a1)		# Update2[15:0] -> DEST2
-	bra.b		CAS2W2_UPDATE
-	bra.b		~+16
-
-CAS2W2_UPDATE:
-	ror.l		&0x8,%d4		# get Update1[15:8]
-	movs.b		%d4,(%a0)+		# Update1[15:8] -> DEST1
-	movc		%a3,%buscr		# assert LOCKE*
-	rol.l		&0x8,%d4		# get Update1[7:0]
-	bra.b		CAS2W2_UPDATE2
-	bra.b		~+16
-
-CAS2W2_UPDATE2:
-	movs.b		%d4,(%a0)		# Update1[7:0] -> DEST1+0x1
-	movc		%a4,%buscr		# unlock the bus
-	bra.w		cas2w_update_done
-	nop
-	bra.b		~+16
-
-CAS2W2_NOUPDATE:
-	ror.l		&0x8,%d0		# get Dest1[15:8]
-	movs.b		%d0,(%a0)+		# Dest1[15:8] -> DEST1
-	movc		%a3,%buscr		# assert LOCKE*
-	rol.l		&0x8,%d0		# get Dest1[7:0]
-	bra.b		CAS2W2_NOUPDATE2
-	bra.b		~+16
-
-CAS2W2_NOUPDATE2:
-	movs.b		%d0,(%a0)		# Dest1[7:0] -> DEST1+0x1
-	movc		%a4,%buscr		# unlock the bus
-	bra.w		cas2w_noupdate_done
-	nop
-	bra.b		~+16
-
-CAS2W2_FILLER:
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	bra.b		CAS2W2_START
-
-#       ######      ##      ######
-#       #	   #  #     #
-#	#	  ######    ######
-#	#	  #    #         #
-#       ######    #    #    ######
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_isp_cas(): "core" emulation code for the cas instruction	#
-#									#
-# XREF ****************************************************************	#
-#	_isp_cas_finish() - only exit point for this emulation code;	#
-#			    do clean-up					#
-#									#
-# INPUT ***************************************************************	#
-#	*see entry chart below*						#
-#									#
-# OUTPUT **************************************************************	#
-#	*see exit chart below*						#
-#									#
-# ALGORITHM ***********************************************************	#
-#	(1) Make several copies of the effective address.		#
-#	(2) Save current SR; Then mask off all maskable interrupts.	#
-#	(3) Save current DFC/SFC (ASSUMED TO BE EQUAL!!!); Then set	#
-#	    SFC/DFC according to whether exception occurred in user or	#
-#	    supervisor mode.						#
-#	(4) Use "plpaw" instruction to pre-load ATC with effective	#
-#	    address page(s). THIS SHOULD NOT FAULT!!! The relevant	#
-#	    page(s) should have been made resident prior to entering	#
-#	    this routine.						#
-#	(5) Push the operand lines from the cache w/ "cpushl".		#
-#	    In the 68040, this was done within the locked region. In	#
-#	    the 68060, it is done outside of the locked region.		#
-#	(6) Pre-fetch the core emulation instructions by executing one	#
-#	    branch within each physical line (16 bytes) of the code	#
-#	    before actually executing the code.				#
-#	(7) Load the BUSCR with the bus lock value.			#
-#	(8) Fetch the source operand.					#
-#	(9) Do the compare. If equal, go to step (12).			#
-#	(10)Unequal. No update occurs. But, we do write the DST op back	#
-#	    to itself (as w/ the '040) so we can gracefully unlock	#
-#	    the bus (and assert LOCKE*) using BUSCR and the final move.	#
-#	(11)Exit.							#
-#	(12)Write update operand to the DST location. Use BUSCR to	#
-#	    assert LOCKE* for the final write operation.		#
-#	(13)Exit.							#
-#									#
-#	The algorithm is actually implemented slightly differently	#
-# depending on the size of the operation and the misalignment of the	#
-# operand. A misaligned operand must be written in aligned chunks or	#
-# else the BUSCR register control gets confused.			#
-#									#
-#########################################################################
-
-#########################################################
-# THIS IS THE STATE OF THE INTEGER REGISTER FILE UPON	#
-# ENTERING _isp_cas().					#
-#							#
-# D0 = xxxxxxxx						#
-# D1 = xxxxxxxx						#
-# D2 = update operand					#
-# D3 = xxxxxxxx						#
-# D4 = compare operand					#
-# D5 = xxxxxxxx						#
-# D6 = supervisor ('xxxxxxff) or user mode ('xxxxxx00)	#
-# D7 = longword ('xxxxxxff) or word size ('xxxxxx00)	#
-# A0 = ADDR						#
-# A1 = xxxxxxxx						#
-# A2 = xxxxxxxx						#
-# A3 = xxxxxxxx						#
-# A4 = xxxxxxxx						#
-# A5 = xxxxxxxx						#
-# A6 = frame pointer					#
-# A7 = stack pointer					#
-#########################################################
-
-	global		_isp_cas
-_isp_cas:
-	tst.b		%d6			# user or supervisor mode?
-	bne.b		cas_super		# supervisor
-cas_user:
-	movq.l		&0x1,%d0		# load user data fc
-	bra.b		cas_cont
-cas_super:
-	movq.l		&0x5,%d0		# load supervisor data fc
-
-cas_cont:
-	tst.b		%d7			# word or longword?
-	bne.w		casl			# longword
-
-####
-casw:
-	mov.l		%a0,%a1			# make copy for plpaw1
-	mov.l		%a0,%a2			# make copy for plpaw2
-	addq.l		&0x1,%a2		# plpaw2 points to end of word
-
-	mov.l		%d2,%d3			# d3 = update[7:0]
-	lsr.w		&0x8,%d2		# d2 = update[15:8]
-
-# mask interrupt levels 0-6. save old mask value.
-	mov.w		%sr,%d7			# save current SR
-	ori.w		&0x0700,%sr		# inhibit interrupts
-
-# load the SFC and DFC with the appropriate mode.
-	movc		%sfc,%d6		# save old SFC/DFC
-	movc		%d0,%sfc		# load new sfc
-	movc		%d0,%dfc		# load new dfc
-
-# pre-load the operand ATC. no page faults should occur here because
-# _real_lock_page() should have taken care of this.
-	plpaw		(%a1)			# load atc for ADDR
-	plpaw		(%a2)			# load atc for ADDR+1
-
-# push the operand lines from the cache if they exist.
-	cpushl		%dc,(%a1)		# push dirty data
-	cpushl		%dc,(%a2)		# push dirty data
-
-# load the BUSCR values.
-	mov.l		&0x80000000,%a1		# assert LOCK* buscr value
-	mov.l		&0xa0000000,%a2		# assert LOCKE* buscr value
-	mov.l		&0x00000000,%a3		# buscr unlock value
-
-# pre-load the instruction cache for the following algorithm.
-# this will minimize the number of cycles that LOCK* will be asserted.
-	bra.b		CASW_ENTER		# start pre-loading icache
-
-#
-# D0 = dst operand <-
-# D1 = update[15:8] operand
-# D2 = update[7:0]  operand
-# D3 = xxxxxxxx
-# D4 = compare[15:0] operand
-# D5 = xxxxxxxx
-# D6 = old SFC/DFC
-# D7 = old SR
-# A0 = ADDR
-# A1 = bus LOCK*  value
-# A2 = bus LOCKE* value
-# A3 = bus unlock value
-# A4 = xxxxxxxx
-# A5 = xxxxxxxx
-#
-	align		0x10
-CASW_START:
-	movc		%a1,%buscr		# assert LOCK*
-	movs.w		(%a0),%d0		# fetch Dest[15:0]
-	cmp.w		%d0,%d4			# Dest - Compare
-	bne.b		CASW_NOUPDATE
-	bra.b		CASW_UPDATE
-CASW_ENTER:
-	bra.b		~+16
-
-CASW_UPDATE:
-	movs.b		%d2,(%a0)+		# Update[15:8] -> DEST
-	movc		%a2,%buscr		# assert LOCKE*
-	movs.b		%d3,(%a0)		# Update[7:0] -> DEST+0x1
-	bra.b		CASW_UPDATE2
-	bra.b		~+16
-
-CASW_UPDATE2:
-	movc		%a3,%buscr		# unlock the bus
-	bra.b		casw_update_done
-	nop
-	nop
-	nop
-	nop
-	bra.b		~+16
-
-CASW_NOUPDATE:
-	ror.l		&0x8,%d0		# get Dest[15:8]
-	movs.b		%d0,(%a0)+		# Dest[15:8] -> DEST
-	movc		%a2,%buscr		# assert LOCKE*
-	rol.l		&0x8,%d0		# get Dest[7:0]
-	bra.b		CASW_NOUPDATE2
-	bra.b		~+16
-
-CASW_NOUPDATE2:
-	movs.b		%d0,(%a0)		# Dest[7:0] -> DEST+0x1
-	movc		%a3,%buscr		# unlock the bus
-	bra.b		casw_noupdate_done
-	nop
-	nop
-	bra.b		~+16
-
-CASW_FILLER:
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	bra.b		CASW_START
-
-#################################################################
-# THIS MUST BE THE STATE OF THE INTEGER REGISTER FILE UPON	#
-# CALLING _isp_cas_finish().					#
-#								#
-# D0 = destination[15:0] operand				#
-# D1 = 'xxxxxx11 -> no reg update; 'xxxxxx00 -> update required	#
-# D2 = xxxxxxxx							#
-# D3 = xxxxxxxx							#
-# D4 = compare[15:0] operand					#
-# D5 = xxxxxxxx							#
-# D6 = xxxxxxxx							#
-# D7 = xxxxxxxx							#
-# A0 = xxxxxxxx							#
-# A1 = xxxxxxxx							#
-# A2 = xxxxxxxx							#
-# A3 = xxxxxxxx							#
-# A4 = xxxxxxxx							#
-# A5 = xxxxxxxx							#
-# A6 = frame pointer						#
-# A7 = stack pointer						#
-#################################################################
-
-casw_noupdate_done:
-
-# restore previous SFC/DFC value.
-	movc		%d6,%sfc		# restore old SFC
-	movc		%d6,%dfc		# restore old DFC
-
-# restore previous interrupt mask level.
-	mov.w		%d7,%sr			# restore old SR
-
-	sf		%d1			# indicate no update was done
-	bra.l		_isp_cas_finish
-
-casw_update_done:
-
-# restore previous SFC/DFC value.
-	movc		%d6,%sfc		# restore old SFC
-	movc		%d6,%dfc		# restore old DFC
-
-# restore previous interrupt mask level.
-	mov.w		%d7,%sr			# restore old SR
-
-	st		%d1			# indicate update was done
-	bra.l		_isp_cas_finish
-
-################
-
-# there are two possible mis-aligned cases for longword cas. they
-# are separated because the final write which asserts LOCKE* must
-# be an aligned write.
-casl:
-	mov.l		%a0,%a1			# make copy for plpaw1
-	mov.l		%a0,%a2			# make copy for plpaw2
-	addq.l		&0x3,%a2		# plpaw2 points to end of longword
-
-	mov.l		%a0,%d1			# byte or word misaligned?
-	btst		&0x0,%d1
-	bne.w		casl2			# byte misaligned
-
-	mov.l		%d2,%d3			# d3 = update[15:0]
-	swap		%d2			# d2 = update[31:16]
-
-# mask interrupts levels 0-6. save old mask value.
-	mov.w		%sr,%d7			# save current SR
-	ori.w		&0x0700,%sr		# inhibit interrupts
-
-# load the SFC and DFC with the appropriate mode.
-	movc		%sfc,%d6		# save old SFC/DFC
-	movc		%d0,%sfc		# load new sfc
-	movc		%d0,%dfc		# load new dfc
-
-# pre-load the operand ATC. no page faults should occur here because
-# _real_lock_page() should have taken care of this.
-	plpaw		(%a1)			# load atc for ADDR
-	plpaw		(%a2)			# load atc for ADDR+3
-
-# push the operand lines from the cache if they exist.
-	cpushl		%dc,(%a1)		# push dirty data
-	cpushl		%dc,(%a2)		# push dirty data
-
-# load the BUSCR values.
-	mov.l		&0x80000000,%a1		# assert LOCK* buscr value
-	mov.l		&0xa0000000,%a2		# assert LOCKE* buscr value
-	mov.l		&0x00000000,%a3		# buscr unlock value
-
-	bra.b		CASL_ENTER		# start pre-loading icache
-
-#
-# D0 = dst operand <-
-# D1 = xxxxxxxx
-# D2 = update[31:16] operand
-# D3 = update[15:0]  operand
-# D4 = compare[31:0] operand
-# D5 = xxxxxxxx
-# D6 = old SFC/DFC
-# D7 = old SR
-# A0 = ADDR
-# A1 = bus LOCK*  value
-# A2 = bus LOCKE* value
-# A3 = bus unlock value
-# A4 = xxxxxxxx
-# A5 = xxxxxxxx
-#
-	align		0x10
-CASL_START:
-	movc		%a1,%buscr		# assert LOCK*
-	movs.l		(%a0),%d0		# fetch Dest[31:0]
-	cmp.l		%d0,%d4			# Dest - Compare
-	bne.b		CASL_NOUPDATE
-	bra.b		CASL_UPDATE
-CASL_ENTER:
-	bra.b		~+16
-
-CASL_UPDATE:
-	movs.w		%d2,(%a0)+		# Update[31:16] -> DEST
-	movc		%a2,%buscr		# assert LOCKE*
-	movs.w		%d3,(%a0)		# Update[15:0] -> DEST+0x2
-	bra.b		CASL_UPDATE2
-	bra.b		~+16
-
-CASL_UPDATE2:
-	movc		%a3,%buscr		# unlock the bus
-	bra.b		casl_update_done
-	nop
-	nop
-	nop
-	nop
-	bra.b		~+16
-
-CASL_NOUPDATE:
-	swap		%d0			# get Dest[31:16]
-	movs.w		%d0,(%a0)+		# Dest[31:16] -> DEST
-	swap		%d0			# get Dest[15:0]
-	movc		%a2,%buscr		# assert LOCKE*
-	bra.b		CASL_NOUPDATE2
-	bra.b		~+16
-
-CASL_NOUPDATE2:
-	movs.w		%d0,(%a0)		# Dest[15:0] -> DEST+0x2
-	movc		%a3,%buscr		# unlock the bus
-	bra.b		casl_noupdate_done
-	nop
-	nop
-	bra.b		~+16
-
-CASL_FILLER:
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	bra.b		CASL_START
-
-#################################################################
-# THIS MUST BE THE STATE OF THE INTEGER REGISTER FILE UPON	#
-# CALLING _isp_cas_finish().					#
-#								#
-# D0 = destination[31:0] operand				#
-# D1 = 'xxxxxx11 -> no reg update; 'xxxxxx00 -> update required	#
-# D2 = xxxxxxxx							#
-# D3 = xxxxxxxx							#
-# D4 = compare[31:0] operand					#
-# D5 = xxxxxxxx							#
-# D6 = xxxxxxxx							#
-# D7 = xxxxxxxx							#
-# A0 = xxxxxxxx							#
-# A1 = xxxxxxxx							#
-# A2 = xxxxxxxx							#
-# A3 = xxxxxxxx							#
-# A4 = xxxxxxxx							#
-# A5 = xxxxxxxx							#
-# A6 = frame pointer						#
-# A7 = stack pointer						#
-#################################################################
-
-casl_noupdate_done:
-
-# restore previous SFC/DFC value.
-	movc		%d6,%sfc		# restore old SFC
-	movc		%d6,%dfc		# restore old DFC
-
-# restore previous interrupt mask level.
-	mov.w		%d7,%sr			# restore old SR
-
-	sf		%d1			# indicate no update was done
-	bra.l		_isp_cas_finish
-
-casl_update_done:
-
-# restore previous SFC/DFC value.
-	movc		%d6,%sfc		# restore old SFC
-	movc		%d6,%dfc		# restore old DFC
-
-# restore previous interrupts mask level.
-	mov.w		%d7,%sr			# restore old SR
-
-	st		%d1			# indicate update was done
-	bra.l		_isp_cas_finish
-
-#######################################
-casl2:
-	mov.l		%d2,%d5			# d5 = Update[7:0]
-	lsr.l		&0x8,%d2
-	mov.l		%d2,%d3			# d3 = Update[23:8]
-	swap		%d2			# d2 = Update[31:24]
-
-# mask interrupts levels 0-6. save old mask value.
-	mov.w		%sr,%d7			# save current SR
-	ori.w		&0x0700,%sr		# inhibit interrupts
-
-# load the SFC and DFC with the appropriate mode.
-	movc		%sfc,%d6		# save old SFC/DFC
-	movc		%d0,%sfc		# load new sfc
-	movc		%d0,%dfc		# load new dfc
-
-# pre-load the operand ATC. no page faults should occur here because
-# _real_lock_page() should have taken care of this already.
-	plpaw		(%a1)			# load atc for ADDR
-	plpaw		(%a2)			# load atc for ADDR+3
-
-# puch the operand lines from the cache if they exist.
-	cpushl		%dc,(%a1)		# push dirty data
-	cpushl		%dc,(%a2)		# push dirty data
-
-# load the BUSCR values.
-	mov.l		&0x80000000,%a1		# assert LOCK* buscr value
-	mov.l		&0xa0000000,%a2		# assert LOCKE* buscr value
-	mov.l		&0x00000000,%a3		# buscr unlock value
-
-# pre-load the instruction cache for the following algorithm.
-# this will minimize the number of cycles that LOCK* will be asserted.
-	bra.b		CASL2_ENTER		# start pre-loading icache
-
-#
-# D0 = dst operand <-
-# D1 = xxxxxxxx
-# D2 = update[31:24] operand
-# D3 = update[23:8]  operand
-# D4 = compare[31:0] operand
-# D5 = update[7:0]  operand
-# D6 = old SFC/DFC
-# D7 = old SR
-# A0 = ADDR
-# A1 = bus LOCK*  value
-# A2 = bus LOCKE* value
-# A3 = bus unlock value
-# A4 = xxxxxxxx
-# A5 = xxxxxxxx
-#
-	align		0x10
-CASL2_START:
-	movc		%a1,%buscr		# assert LOCK*
-	movs.l		(%a0),%d0		# fetch Dest[31:0]
-	cmp.l		%d0,%d4			# Dest - Compare
-	bne.b		CASL2_NOUPDATE
-	bra.b		CASL2_UPDATE
-CASL2_ENTER:
-	bra.b		~+16
-
-CASL2_UPDATE:
-	movs.b		%d2,(%a0)+		# Update[31:24] -> DEST
-	movs.w		%d3,(%a0)+		# Update[23:8] -> DEST+0x1
-	movc		%a2,%buscr		# assert LOCKE*
-	bra.b		CASL2_UPDATE2
-	bra.b		~+16
-
-CASL2_UPDATE2:
-	movs.b		%d5,(%a0)		# Update[7:0] -> DEST+0x3
-	movc		%a3,%buscr		# unlock the bus
-	bra.w		casl_update_done
-	nop
-	bra.b		~+16
-
-CASL2_NOUPDATE:
-	rol.l		&0x8,%d0		# get Dest[31:24]
-	movs.b		%d0,(%a0)+		# Dest[31:24] -> DEST
-	swap		%d0			# get Dest[23:8]
-	movs.w		%d0,(%a0)+		# Dest[23:8] -> DEST+0x1
-	bra.b		CASL2_NOUPDATE2
-	bra.b		~+16
-
-CASL2_NOUPDATE2:
-	rol.l		&0x8,%d0		# get Dest[7:0]
-	movc		%a2,%buscr		# assert LOCKE*
-	movs.b		%d0,(%a0)		# Dest[7:0] -> DEST+0x3
-	bra.b		CASL2_NOUPDATE3
-	nop
-	bra.b		~+16
-
-CASL2_NOUPDATE3:
-	movc		%a3,%buscr		# unlock the bus
-	bra.w		casl_noupdate_done
-	nop
-	nop
-	nop
-	bra.b		~+16
-
-CASL2_FILLER:
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	bra.b		CASL2_START
-
-####
-####
-# end label used by _isp_cas_inrange()
-	global		_CASHI
-_CASHI:
diff --git a/arch/m68k/ifpsp060/src/itest.S b/arch/m68k/ifpsp060/src/itest.S
deleted file mode 100644
index beca47e7d514891ed31bee07c649d4a6e79d8eec..0000000000000000000000000000000000000000
--- a/arch/m68k/ifpsp060/src/itest.S
+++ /dev/null
@@ -1,6386 +0,0 @@
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
-M68000 Hi-Performance Microprocessor Division
-M68060 Software Package
-Production Release P1.00 -- October 10, 1994
-
-M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
-
-THE SOFTWARE is provided on an "AS IS" basis and without warranty.
-To the maximum extent permitted by applicable law,
-MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
-INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
-and any warranty against infringement with regard to the SOFTWARE
-(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
-
-To the maximum extent permitted by applicable law,
-IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
-(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
-BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
-ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
-Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
-
-You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
-so long as this entire notice is retained without alteration in any modified and/or
-redistributed versions, and that such modified versions are clearly identified as such.
-No licenses are granted by implication, estoppel or otherwise under any patents
-or trademarks of Motorola, Inc.
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#############################################
-set	SREGS,		-64
-set	IREGS,		-128
-set	SCCR,		-130
-set	ICCR,		-132
-set	TESTCTR,	-136
-set	EAMEM,		-140
-set	EASTORE,	-144
-set	DATA,		-160
-
-#############################################
-TESTTOP:
-	bra.l		_060TESTS_
-
-start_str:
-	string		"Testing 68060 ISP started:\n"
-
-pass_str:
-	string		"passed\n"
-fail_str:
-	string		" failed\n"
-
-	align		0x4
-chk_test:
-	tst.l		%d0
-	bne.b		test_fail
-test_pass:
-	pea		pass_str(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-	rts
-test_fail:
-	mov.l		%d1,-(%sp)
-	bsr.l		_print_num
-	addq.l		&0x4,%sp
-
-	pea		fail_str(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-	rts
-
-#############################################
-_060TESTS_:
-	link		%a6,&-160
-
-	movm.l		&0x3f3c,-(%sp)
-
-	pea		start_str(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-
-### mul
-	clr.l		TESTCTR(%a6)
-	pea		mulul_str(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-
-	bsr.l		mulul_0
-
-	bsr.l		chk_test
-
-### div
-	clr.l		TESTCTR(%a6)
-	pea		divul_str(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-
-	bsr.l		divul_0
-
-	bsr.l		chk_test
-
-### cmp2
-	clr.l		TESTCTR(%a6)
-	pea		cmp2_str(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-
-	bsr.l		cmp2_1
-
-	bsr.l		chk_test
-
-### movp
-	clr.l		TESTCTR(%a6)
-	pea		movp_str(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-
-	bsr.l		movp_0
-
-	bsr.l		chk_test
-
-### ea
-	clr.l		TESTCTR(%a6)
-	pea		ea_str(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-
-	mov.l		&0x2,EAMEM(%a6)
-	bsr.l		ea_0
-
-	bsr.l		chk_test
-
-### cas
-	clr.l		TESTCTR(%a6)
-	pea		cas_str(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-
-	bsr.l		cas0
-
-	bsr.l		chk_test
-
-### cas2
-	clr.l		TESTCTR(%a6)
-	pea		cas2_str(%pc)
-	bsr.l		_print_str
-	addq.l		&0x4,%sp
-
-	bsr.l		cas20
-
-	bsr.l		chk_test
-
-###
-	movm.l		(%sp)+,&0x3cfc
-
-	unlk		%a6
-	rts
-
-#############################################
-#############################################
-
-mulul_str:
-	string		"\t64-bit multiply..."
-
-	align		0x4
-mulul_0:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d1
-	mov.l		&0x99999999,%d2
-	mov.l		&0x88888888,%d3
-
-	mov.w		&0x0004,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	mulu.l		%d1,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	clr.l		IREGS+0x8(%a6)
-	clr.l		IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-mulul_1:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x77777777,%d1
-	mov.l		&0x99999999,%d2
-	mov.l		&0x00000000,%d3
-
-	mov.w		&0x0004,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	mulu.l		%d1,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	clr.l		IREGS+0x8(%a6)
-	clr.l		IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-mulul_2:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x00000010,%d1
-	mov.l		&0x66666666,%d2
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	mulu.l		%d1,%d2:%d2
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0x00000006,IREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-mulul_3:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x55555555,%d1
-	mov.l		&0x00000000,%d2
-	mov.l		&0x00000003,%d3
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	mulu.l		%d1,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0x00000000,IREGS+0x8(%a6)
-	mov.l		&0xffffffff,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-mulul_4:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x40000000,%d1
-	mov.l		&0x00000000,%d2
-	mov.l		&0x00000004,%d3
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	mulu.l		%d1,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0x00000001,IREGS+0x8(%a6)
-	mov.l		&0x00000000,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-mulul_5:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0xffffffff,%d1
-	mov.l		&0x00000000,%d2
-	mov.l		&0xffffffff,%d3
-
-	mov.w		&0x0008,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	mulu.l		%d1,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0xfffffffe,IREGS+0x8(%a6)
-	mov.l		&0x00000001,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-mulul_6:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x80000000,%d1
-	mov.l		&0x00000000,%d2
-	mov.l		&0xffffffff,%d3
-
-	mov.w		&0x00000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	muls.l		%d1,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0x00000000,IREGS+0x8(%a6)
-	mov.l		&0x80000000,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-mulul_7:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x80000000,%d1
-	mov.l		&0x00000000,%d2
-	mov.l		&0x00000001,%d3
-
-	mov.w		&0x0008,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	muls.l		%d1,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0xffffffff,IREGS+0x8(%a6)
-	mov.l		&0x80000000,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-mulul_8:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x00000001,%d1
-	mov.l		&0x00000000,%d2
-	mov.l		&0x80000000,%d3
-
-	mov.w		&0x0008,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	muls.l		%d1,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0xffffffff,IREGS+0x8(%a6)
-	mov.l		&0x80000000,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	mov.l		TESTCTR(%a6),%d1
-	clr.l		%d0
-	rts
-
-#############################################
-
-movp_str:
-	string	"\tmovep..."
-
-	align	0x4
-###############################
-# movep.w	%d0,(0x0,%a0) #
-###############################
-movp_0:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA(%a6),%a0
-	mov.w	&0xaaaa,%d0
-	clr.b	0x0(%a0)
-	clr.b	0x2(%a0)
-
-	mov.w	&0x001f,ICCR(%a6)
-	mov.w	&0x001f,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.w	%d0,(0x0,%a0)
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-
-	mov.b	0x2(%a0),%d1
-	lsl.w	&0x8,%d1
-	mov.b	0x0(%a0),%d1
-
-	cmp.w	%d0,%d1
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-###############################
-# movep.w	%d0,(0x0,%a0) #
-###############################
-movp_1:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA+0x4(%a6),%a0
-	mov.w	&0xaaaa,%d0
-	clr.l	-0x4(%a0)
-	clr.l	(%a0)
-	clr.l	0x4(%a0)
-
-	mov.w	&0x001f,ICCR(%a6)
-	mov.w	&0x001f,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.w	%d0,(0x0,%a0)
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-
-	tst.l	-0x4(%a0)
-	bne.l	error
-	tst.l	0x4(%a0)
-	bne.l	error
-	cmpi.l	(%a0),&0xaa00aa00
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-#####################################################
-# movep.w	%d0,(0x0,%a0)			    #
-#	- this test has %cc initially equal to zero #
-#####################################################
-movp_2:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA(%a6),%a0
-	mov.w	&0xaaaa,%d0
-	clr.b	0x0(%a0)
-	clr.b	0x2(%a0)
-
-	mov.w	&0x0000,ICCR(%a6)
-	mov.w	&0x0000,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.w	%d0,(0x0,%a0)
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-
-	mov.b	0x2(%a0),%d1
-	lsl.w	&0x8,%d1
-	mov.b	0x0(%a0),%d1
-
-	cmp.w	%d0,%d1
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-###############################
-# movep.w	(0x0,%a0),%d0 #
-###############################
-movp_3:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA(%a6),%a0
-	mov.b	&0xaa,0x0(%a0)
-	mov.b	&0xaa,0x2(%a0)
-
-	mov.w	&0x001f,ICCR(%a6)
-	mov.w	&0x001f,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.w	(0x0,%a0),%d0
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-	mov.w	&0xaaaa,IREGS+0x2(%a6)
-
-	mov.w	&0xaaaa,%d1
-
-	cmp.w	%d0,%d1
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-###############################
-# movep.l	%d0,(0x0,%a0) #
-###############################
-movp_4:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA(%a6),%a0
-	mov.l	&0xaaaaaaaa,%d0
-	clr.b	0x0(%a0)
-	clr.b	0x2(%a0)
-	clr.b	0x4(%a0)
-	clr.b	0x6(%a0)
-
-	mov.w	&0x001f,ICCR(%a6)
-	mov.w	&0x001f,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.l	%d0,(0x0,%a0)
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-
-	mov.b	0x6(%a0),%d1
-	lsl.l	&0x8,%d1
-	mov.b	0x4(%a0),%d1
-	lsl.l	&0x8,%d1
-	mov.b	0x2(%a0),%d1
-	lsl.l	&0x8,%d1
-	mov.b	0x0(%a0),%d1
-
-	cmp.l	%d0,%d1
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-###############################
-# movep.l	%d0,(0x0,%a0) #
-###############################
-movp_5:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA+0x4(%a6),%a0
-	mov.l	&0xaaaaaaaa,%d0
-	clr.l	-0x4(%a0)
-	clr.l	(%a0)
-	clr.l	0x4(%a0)
-	clr.l	0x8(%a0)
-
-	mov.w	&0x001f,ICCR(%a6)
-	mov.w	&0x001f,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.l	%d0,(0x0,%a0)
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-
-	tst.l	-0x4(%a0)
-	bne.l	error
-	tst.l	0x8(%a0)
-	bne.l	error
-	cmpi.l	(%a0),&0xaa00aa00
-	bne.l	error
-	cmpi.l	0x4(%a0),&0xaa00aa00
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-###############################
-# movep.l	(0x0,%a0),%d0 #
-###############################
-movp_6:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA(%a6),%a0
-	mov.b	&0xaa,0x0(%a0)
-	mov.b	&0xaa,0x2(%a0)
-	mov.b	&0xaa,0x4(%a0)
-	mov.b	&0xaa,0x6(%a0)
-
-	mov.w	&0x001f,ICCR(%a6)
-	mov.w	&0x001f,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.l	(0x0,%a0),%d0
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-	mov.l	&0xaaaaaaaa,IREGS(%a6)
-
-	mov.l	&0xaaaaaaaa,%d1
-
-	cmp.l	%d0,%d1
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-###############################
-# movep.w	%d7,(0x0,%a0) #
-###############################
-movp_7:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA(%a6),%a0
-	mov.w	&0xaaaa,%d7
-	clr.b	0x0(%a0)
-	clr.b	0x2(%a0)
-
-	mov.w	&0x001f,ICCR(%a6)
-	mov.w	&0x001f,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.w	%d7,(0x0,%a0)
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-
-	mov.b	0x2(%a0),%d1
-	lsl.w	&0x8,%d1
-	mov.b	0x0(%a0),%d1
-
-	cmp.w	%d7,%d1
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-###############################
-# movep.w	(0x0,%a0),%d7 #
-###############################
-movp_8:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA(%a6),%a0
-	mov.b	&0xaa,0x0(%a0)
-	mov.b	&0xaa,0x2(%a0)
-
-	mov.w	&0x001f,ICCR(%a6)
-	mov.w	&0x001f,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.w	(0x0,%a0),%d7
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-	mov.w	&0xaaaa,IREGS+30(%a6)
-
-	mov.w	&0xaaaa,%d1
-
-	cmp.w	%d7,%d1
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-###############################
-# movep.w	%d0,(0x0,%a0) #
-###############################
-movp_9:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA(%a6),%a0
-	mov.w	&0xaaaa,%d0
-	clr.b	0x0(%a0)
-	clr.b	0x2(%a0)
-
-	mov.w	&0x001f,ICCR(%a6)
-	mov.w	&0x001f,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.w	%d0,(0x0,%a0)
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-
-	mov.b	0x2(%a0),%d1
-	lsl.w	&0x8,%d1
-	mov.b	0x0(%a0),%d1
-
-	cmp.w	%d0,%d1
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-###############################
-# movep.w	%d0,(0x8,%a0) #
-###############################
-movp_10:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA(%a6),%a0
-	mov.w	&0xaaaa,%d0
-	clr.b	0x0+0x8(%a0)
-	clr.b	0x2+0x8(%a0)
-
-	mov.w	&0x001f,ICCR(%a6)
-	mov.w	&0x1f,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.w	%d0,(0x8,%a0)
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-
-	mov.b	0x2+0x8(%a0),%d1
-	lsl.w	&0x8,%d1
-	mov.b	0x0+0x8(%a0),%d1
-
-	cmp.w	%d0,%d1
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-###############################
-# movep.w	(0x8,%a0),%d0 #
-###############################
-movp_11:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA(%a6),%a0
-	mov.b	&0xaa,0x0+0x8(%a0)
-	mov.b	&0xaa,0x2+0x8(%a0)
-
-	mov.w	&0x001f,ICCR(%a6)
-	mov.w	&0x1f,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.w	(0x8,%a0),%d0
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-	mov.w	&0xaaaa,IREGS+0x2(%a6)
-
-	mov.w	&0xaaaa,%d1
-
-	cmp.w	%d0,%d1
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-###############################
-# movep.l	%d0,(0x8,%a0) #
-###############################
-movp_12:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA(%a6),%a0
-	mov.l	&0xaaaaaaaa,%d0
-	clr.b	0x0+0x8(%a0)
-	clr.b	0x2+0x8(%a0)
-	clr.b	0x4+0x8(%a0)
-	clr.b	0x6+0x8(%a0)
-
-	mov.w	&0x001f,ICCR(%a6)
-	mov.w	&0x1f,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.l	%d0,(0x8,%a0)
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-
-	mov.b	0x6+0x8(%a0),%d1
-	lsl.l	&0x8,%d1
-	mov.b	0x4+0x8(%a0),%d1
-	lsl.l	&0x8,%d1
-	mov.b	0x2+0x8(%a0),%d1
-	lsl.l	&0x8,%d1
-	mov.b	0x0+0x8(%a0),%d1
-
-	cmp.l	%d0,%d1
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-###############################
-# movep.l	(0x8,%a0),%d0 #
-###############################
-movp_13:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA(%a6),%a0
-	mov.b	&0xaa,0x0+0x8(%a0)
-	mov.b	&0xaa,0x2+0x8(%a0)
-	mov.b	&0xaa,0x4+0x8(%a0)
-	mov.b	&0xaa,0x6+0x8(%a0)
-
-	mov.w	&0x001f,ICCR(%a6)
-	mov.w	&0x1f,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.l	(0x8,%a0),%d0
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-	mov.l	&0xaaaaaaaa,IREGS(%a6)
-
-	mov.l	&0xaaaaaaaa,%d1
-
-	cmp.l	%d0,%d1
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-################################
-# movep.w	%d0,(-0x8,%a0) #
-################################
-movp_14:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA+0x8(%a6),%a0
-	mov.w	&0xaaaa,%d0
-	clr.b	0x0-0x8(%a0)
-	clr.b	0x2-0x8(%a0)
-
-	mov.w	&0x001f,ICCR(%a6)
-	mov.w	&0x1f,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.w	%d0,(-0x8,%a0)
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-
-	mov.b	0x2-0x8(%a0),%d1
-	lsl.w	&0x8,%d1
-	mov.b	0x0-0x8(%a0),%d1
-
-	cmp.w	%d0,%d1
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-################################
-# movep.w	(-0x8,%a0),%d0 #
-################################
-movp_15:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA+0x8(%a6),%a0
-	mov.b	&0xaa,0x0-0x8(%a0)
-	mov.b	&0xaa,0x2-0x8(%a0)
-
-	mov.w	&0x001f,ICCR(%a6)
-	mov.w	&0x1f,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.w	(-0x8,%a0),%d0
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-	mov.w	&0xaaaa,IREGS+0x2(%a6)
-
-	mov.w	&0xaaaa,%d1
-
-	cmp.w	%d0,%d1
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-################################
-# movep.l	%d0,(-0x8,%a0) #
-################################
-movp_16:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA+0x8(%a6),%a0
-	mov.l	&0xaaaaaaaa,%d0
-	clr.b	0x0-0x8(%a0)
-	clr.b	0x2-0x8(%a0)
-	clr.b	0x4-0x8(%a0)
-	clr.b	0x8-0x8(%a0)
-
-	mov.w	&0x001f,ICCR(%a6)
-	mov.w	&0x1f,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.l	%d0,(-0x8,%a0)
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-
-	mov.b	0x6-0x8(%a0),%d1
-	lsl.l	&0x8,%d1
-	mov.b	0x4-0x8(%a0),%d1
-	lsl.l	&0x8,%d1
-	mov.b	0x2-0x8(%a0),%d1
-	lsl.l	&0x8,%d1
-	mov.b	0x0-0x8(%a0),%d1
-
-	cmp.l	%d0,%d1
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-################################
-# movep.l	(-0x8,%a0),%d0 #
-################################
-movp_17:
-	addq.l	&0x1,TESTCTR(%a6)
-
-	movm.l	DEF_REGS(%pc),&0x3fff
-
-	lea	DATA+0x8(%a6),%a0
-	mov.b	&0xaa,0x0-0x8(%a0)
-	mov.b	&0xaa,0x2-0x8(%a0)
-	mov.b	&0xaa,0x4-0x8(%a0)
-	mov.b	&0xaa,0x8-0x8(%a0)
-
-	mov.w	&0x001f,ICCR(%a6)
-	mov.w	&0x1f,%cc
-	movm.l	&0x7fff,IREGS(%a6)
-
-	movp.l	(-0x8,%a0),%d0
-
-	mov.w	%cc,SCCR(%a6)
-	movm.l	&0x7fff,SREGS(%a6)
-	mov.l	&0xaaaaaaaa,IREGS(%a6)
-
-	mov.l	&0xaaaaaaaa,%d1
-
-	cmp.l	%d0,%d1
-	bne.l	error
-
-	bsr.l	chkregs
-	tst.b	%d0
-	bne.l	error
-
-	mov.l	TESTCTR(%a6),%d1
-	clr.l	%d0
-	rts
-
-###########################################################
-
-divul_str:
-	string		"\t64-bit divide..."
-
-	align		0x4
-divul_0:
-	addq.l		&0x1,TESTCTR(%a6)
-
-#	movm.l		DEF_REGS(%pc),&0x3fff
-
-#	clr.l		%d1
-#	mov.l		&0x99999999,%d2
-#	mov.l		&0x88888888,%d3
-
-#	mov.w		&0x001e,ICCR(%a6)
-#	mov.w		&0x001f,%cc
-#	movm.l		&0x7fff,IREGS(%a6)
-
-#	divu.l		%d1,%d2:%d3
-
-#	mov.w		%cc,SCCR(%a6)
-#	movm.l		&0x7fff,SREGS(%a6)
-
-#	bsr.l		chkregs
-#	tst.b		%d0
-#	bne.l		error
-
-divul_1:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x00000001,%d1
-	mov.l		&0x00000000,%d2
-	mov.l		&0x00000000,%d3
-
-	mov.w		&0x0014,ICCR(%a6)
-	mov.w		&0x001f,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	divu.l		%d1,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-divul_2:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x44444444,%d1
-	mov.l		&0x00000000,%d2
-	mov.l		&0x55555555,%d3
-
-	mov.w		&0x0010,ICCR(%a6)
-	mov.w		&0x001f,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	divu.l		%d1,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0x11111111,IREGS+0x8(%a6)
-	mov.l		&0x00000001,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-divul_3:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x55555555,%d1
-	mov.l		&0x00000000,%d2
-	mov.l		&0x44444444,%d3
-
-	mov.w		&0x0014,ICCR(%a6)
-	mov.w		&0x001f,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	divu.l		%d1,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0x44444444,IREGS+0x8(%a6)
-	mov.l		&0x00000000,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-divul_4:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x11111111,%d1
-	mov.l		&0x44444444,%d2
-	mov.l		&0x44444444,%d3
-
-	mov.w		&0x001e,ICCR(%a6)
-	mov.w		&0x001d,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	divu.l		%d1,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-divul_5:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0xfffffffe,%d1
-	mov.l		&0x00000001,%d2
-	mov.l		&0x00000002,%d3
-
-	mov.w		&0x001e,ICCR(%a6)
-	mov.w		&0x001d,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	divs.l		%d1,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-divul_6:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0xfffffffe,%d1
-	mov.l		&0x00000001,%d2
-	mov.l		&0x00000000,%d3
-
-	mov.w		&0x0018,ICCR(%a6)
-	mov.w		&0x001d,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	divs.l		%d1,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0x00000000,IREGS+0x8(%a6)
-	mov.l		&0x80000000,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-divul_7:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x00000002,%d1
-	mov.l		&0x00000001,%d2
-	mov.l		&0x00000000,%d3
-
-	mov.w		&0x001e,ICCR(%a6)
-	mov.w		&0x001d,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	divs.l		%d1,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-divul_8:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0xffffffff,%d1
-	mov.l		&0xfffffffe,%d2
-	mov.l		&0xffffffff,%d3
-
-	mov.w		&0x0008,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	divu.l		%d1,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-divul_9:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0xffffffff,%d1
-	mov.l		&0xfffffffe,%d2
-	mov.l		&0xffffffff,%d3
-
-	mov.w		&0x0008,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	divu.l		&0xffffffff,%d2:%d2
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0xffffffff,IREGS+0x8(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-divul_10:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x0000ffff,%d1
-	mov.l		&0x00000001,%d2
-	mov.l		&0x55555555,%d3
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	divu.l		%d1,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0x0000aaab,IREGS+0x8(%a6)
-	mov.l		&0x00015556,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	mov.l		TESTCTR(%a6),%d1
-	clr.l		%d0
-	rts
-
-###########################################################
-
-cas_str:
-	string		"\tcas..."
-
-	align		0x4
-cas0:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x1(%a6),%a0
-
-	mov.w		&0xaaaa,(%a0)
-
-	mov.w		&0xaaaa,%d1
-	mov.w		&0xbbbb,%d2
-
-	mov.w		&0x0014,ICCR(%a6)
-	mov.w		&0x0010,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas.w		%d1,%d2,(%a0)			# Dc,Du,<ea>
-
-	mov.w		%cc,SCCR(%a6)
-	mov.w		(%a0),%d3
-	mov.w		&0xbbbb,IREGS+0xc+0x2(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas1:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x1(%a6),%a0
-
-	mov.w		&0xeeee,(%a0)
-
-	mov.w		&0x0000aaaa,%d1
-	mov.w		&0x0000bbbb,%d2
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas.w		%d1,%d2,(%a0)			# Dc,Du,<ea>
-
-	mov.w		%cc,SCCR(%a6)
-	mov.w		(%a0),%d3
-	mov.w		&0xeeee,IREGS+0x4+0x2(%a6)
-	mov.w		&0xeeee,IREGS+0xc+0x2(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas2:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x2(%a6),%a0
-
-	mov.l		&0xaaaaaaaa,(%a0)
-
-	mov.l		&0xaaaaaaaa,%d1
-	mov.l		&0xbbbbbbbb,%d2
-
-	mov.w		&0x0004,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas.l		%d1,%d2,(%a0)			# Dc,Du,<ea>
-
-	mov.w		%cc,SCCR(%a6)
-	mov.l		(%a0),%d3
-	mov.l		&0xbbbbbbbb,IREGS+0xc(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas3:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x2(%a6),%a0
-
-	mov.l		&0xeeeeeeee,(%a0)
-
-	mov.l		&0xaaaaaaaa,%d1
-	mov.l		&0xbbbbbbbb,%d2
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas.l		%d1,%d2,(%a0)			# Dc,Du,<ea>
-
-	mov.w		%cc,SCCR(%a6)
-	mov.l		(%a0),%d3
-	mov.l		&0xeeeeeeee,IREGS+0x4(%a6)
-	mov.l		&0xeeeeeeee,IREGS+0xc(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas4:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x1(%a6),%a0
-
-	mov.l		&0xaaaaaaaa,(%a0)
-
-	mov.l		&0xaaaaaaaa,%d1
-	mov.l		&0xbbbbbbbb,%d2
-
-	mov.w		&0x0004,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas.l		%d1,%d2,(%a0)			# Dc,Du,<ea>
-
-	mov.w		%cc,SCCR(%a6)
-	mov.l		(%a0),%d3
-	mov.l		&0xbbbbbbbb,IREGS+0xc(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas5:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x1(%a6),%a0
-
-	mov.l		&0x7fffffff,(%a0)
-
-	mov.l		&0x80000000,%d1
-	mov.l		&0xbbbbbbbb,%d2
-
-	mov.w		&0x001b,ICCR(%a6)
-	mov.w		&0x0010,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas.l		%d1,%d2,(%a0)			# Dc,Du,<ea>
-
-	mov.w		%cc,SCCR(%a6)
-	mov.l		(%a0),%d3
-	mov.l		&0x7fffffff,IREGS+0x4(%a6)
-	mov.l		&0x7fffffff,IREGS+0xc(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	mov.l		TESTCTR(%a6),%d1
-	clr.l		%d0
-	rts
-
-###########################################################
-
-cas2_str:
-	string		"\tcas2..."
-
-	align		0x4
-cas20:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x0(%a6),%a0
-	lea		DATA+0x4(%a6),%a1
-
-	mov.l		&0xaaaaaaaa,(%a0)
-	mov.l		&0xbbbbbbbb,(%a1)
-
-	mov.l		&0xaaaaaaaa,%d1
-	mov.l		&0xbbbbbbbb,%d2
-	mov.l		&0xcccccccc,%d3
-	mov.l		&0xdddddddd,%d4
-
-	mov.w		&0x0014,ICCR(%a6)
-	mov.w		&0x0010,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas2.l		%d1:%d2,%d3:%d4,(%a0):(%a1)	# Dc1:Dc2,Du1:Du2,(Rn1):(Rn2)
-
-	mov.w		%cc,SCCR(%a6)
-	mov.l		(%a0),%d5
-	mov.l		(%a1),%d6
-	mov.l		&0xcccccccc,IREGS+0x14(%a6)
-	mov.l		&0xdddddddd,IREGS+0x18(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas21:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x1(%a6),%a0
-	lea		DATA+0x5(%a6),%a1
-
-	mov.l		&0xaaaaaaaa,(%a0)
-	mov.l		&0xbbbbbbbb,(%a1)
-
-	mov.l		&0xaaaaaaaa,%d1
-	mov.l		&0xbbbbbbbb,%d2
-	mov.l		&0xcccccccc,%d3
-	mov.l		&0xdddddddd,%d4
-
-	mov.w		&0x0014,ICCR(%a6)
-	mov.w		&0x0010,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas2.l		%d1:%d2,%d3:%d4,(%a0):(%a1)	# Dc1:Dc2,Du1:Du2,(Rn1):(Rn2)
-
-	mov.w		%cc,SCCR(%a6)
-	mov.l		(%a0),%d5
-	mov.l		(%a1),%d6
-	mov.l		&0xcccccccc,IREGS+0x14(%a6)
-	mov.l		&0xdddddddd,IREGS+0x18(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas22:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x2(%a6),%a0
-	lea		DATA+0x6(%a6),%a1
-
-	mov.l		&0xaaaaaaaa,(%a0)
-	mov.l		&0xbbbbbbbb,(%a1)
-
-	mov.l		&0xaaaaaaaa,%d1
-	mov.l		&0xbbbbbbbb,%d2
-	mov.l		&0xcccccccc,%d3
-	mov.l		&0xdddddddd,%d4
-
-	mov.w		&0x0014,ICCR(%a6)
-	mov.w		&0x0010,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas2.l		%d1:%d2,%d3:%d4,(%a0):(%a1)	# Dc1:Dc2,Du1:Du2,(Rn1):(Rn2)
-
-	mov.w		%cc,SCCR(%a6)
-	mov.l		(%a0),%d5
-	mov.l		(%a1),%d6
-	mov.l		&0xcccccccc,IREGS+0x14(%a6)
-	mov.l		&0xdddddddd,IREGS+0x18(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas23:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x0(%a6),%a0
-	lea		DATA+0x4(%a6),%a1
-
-	mov.l		&0xeeeeeeee,(%a0)
-	mov.l		&0xbbbbbbbb,(%a1)
-
-	mov.l		&0xaaaaaaaa,%d1
-	mov.l		&0xbbbbbbbb,%d2
-	mov.l		&0xcccccccc,%d3
-	mov.l		&0xdddddddd,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas2.l		%d1:%d2,%d3:%d4,(%a0):(%a1)	# Dc1:Dc2,Du1:Du2,(Rn1):(Rn2)
-
-	mov.w		%cc,SCCR(%a6)
-	mov.l		(%a0),%d5
-	mov.l		(%a1),%d6
-	mov.l		&0xeeeeeeee,IREGS+0x4(%a6)
-	mov.l		&0xbbbbbbbb,IREGS+0x8(%a6)
-	mov.l		&0xeeeeeeee,IREGS+0x14(%a6)
-	mov.l		&0xbbbbbbbb,IREGS+0x18(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas24:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x1(%a6),%a0
-	lea		DATA+0x5(%a6),%a1
-
-	mov.l		&0xeeeeeeee,(%a0)
-	mov.l		&0xbbbbbbbb,(%a1)
-
-	mov.l		&0xaaaaaaaa,%d1
-	mov.l		&0xbbbbbbbb,%d2
-	mov.l		&0xcccccccc,%d3
-	mov.l		&0xdddddddd,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas2.l		%d1:%d2,%d3:%d4,(%a0):(%a1)	# Dc1:Dc2,Du1:Du2,(Rn1):(Rn2)
-
-	mov.w		%cc,SCCR(%a6)
-	mov.l		(%a0),%d5
-	mov.l		(%a1),%d6
-	mov.l		&0xeeeeeeee,IREGS+0x4(%a6)
-	mov.l		&0xbbbbbbbb,IREGS+0x8(%a6)
-	mov.l		&0xeeeeeeee,IREGS+0x14(%a6)
-	mov.l		&0xbbbbbbbb,IREGS+0x18(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas25:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x2(%a6),%a0
-	lea		DATA+0x6(%a6),%a1
-
-	mov.l		&0xeeeeeeee,(%a0)
-	mov.l		&0xbbbbbbbb,(%a1)
-
-	mov.l		&0xaaaaaaaa,%d1
-	mov.l		&0xbbbbbbbb,%d2
-	mov.l		&0xcccccccc,%d3
-	mov.l		&0xdddddddd,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas2.l		%d1:%d2,%d3:%d4,(%a0):(%a1)	# Dc1:Dc2,Du1:Du2,(Rn1):(Rn2)
-
-	mov.w		%cc,SCCR(%a6)
-	mov.l		(%a0),%d5
-	mov.l		(%a1),%d6
-	mov.l		&0xeeeeeeee,IREGS+0x4(%a6)
-	mov.l		&0xbbbbbbbb,IREGS+0x8(%a6)
-	mov.l		&0xeeeeeeee,IREGS+0x14(%a6)
-	mov.l		&0xbbbbbbbb,IREGS+0x18(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas26:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x0(%a6),%a0
-	lea		DATA+0x4(%a6),%a1
-
-	mov.l		&0xaaaaaaaa,(%a0)
-	mov.l		&0xeeeeeeee,(%a1)
-
-	mov.l		&0xaaaaaaaa,%d1
-	mov.l		&0xbbbbbbbb,%d2
-	mov.l		&0xcccccccc,%d3
-	mov.l		&0xdddddddd,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas2.l		%d1:%d2,%d3:%d4,(%a0):(%a1)	# Dc1:Dc2,Du1:Du2,(Rn1):(Rn2)
-
-	mov.w		%cc,SCCR(%a6)
-	mov.l		(%a0),%d5
-	mov.l		(%a1),%d6
-	mov.l		&0xaaaaaaaa,IREGS+0x4(%a6)
-	mov.l		&0xeeeeeeee,IREGS+0x8(%a6)
-	mov.l		&0xaaaaaaaa,IREGS+0x14(%a6)
-	mov.l		&0xeeeeeeee,IREGS+0x18(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas27:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x1(%a6),%a0
-	lea		DATA+0x5(%a6),%a1
-
-	mov.l		&0xaaaaaaaa,(%a0)
-	mov.l		&0xeeeeeeee,(%a1)
-
-	mov.l		&0xaaaaaaaa,%d1
-	mov.l		&0xbbbbbbbb,%d2
-	mov.l		&0xcccccccc,%d3
-	mov.l		&0xdddddddd,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas2.l		%d1:%d2,%d3:%d4,(%a0):(%a1)	# Dc1:Dc2,Du1:Du2,(Rn1):(Rn2)
-
-	mov.w		%cc,SCCR(%a6)
-	mov.l		(%a0),%d5
-	mov.l		(%a1),%d6
-	mov.l		&0xaaaaaaaa,IREGS+0x4(%a6)
-	mov.l		&0xeeeeeeee,IREGS+0x8(%a6)
-	mov.l		&0xaaaaaaaa,IREGS+0x14(%a6)
-	mov.l		&0xeeeeeeee,IREGS+0x18(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas28:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x2(%a6),%a0
-	lea		DATA+0x6(%a6),%a1
-
-	mov.l		&0xaaaaaaaa,(%a0)
-	mov.l		&0x7fffffff,(%a1)
-
-	mov.l		&0xaaaaaaaa,%d1
-	mov.l		&0x80000000,%d2
-	mov.l		&0xcccccccc,%d3
-	mov.l		&0xdddddddd,%d4
-
-	mov.w		&0x000b,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas2.l		%d1:%d2,%d3:%d4,(%a0):(%a1)	# Dc1:Dc2,Du1:Du2,(Rn1):(Rn2)
-
-	mov.w		%cc,SCCR(%a6)
-	mov.l		(%a0),%d5
-	mov.l		(%a1),%d6
-	mov.l		&0xaaaaaaaa,IREGS+0x4(%a6)
-	mov.l		&0x7fffffff,IREGS+0x8(%a6)
-	mov.l		&0xaaaaaaaa,IREGS+0x14(%a6)
-	mov.l		&0x7fffffff,IREGS+0x18(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-##################################
-cas29:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x0(%a6),%a0
-	lea		DATA+0x4(%a6),%a1
-
-	mov.w		&0xaaaa,(%a0)
-	mov.w		&0xbbbb,(%a1)
-
-	mov.w		&0xaaaa,%d1
-	mov.w		&0xbbbb,%d2
-	mov.w		&0xcccc,%d3
-	mov.w		&0xdddd,%d4
-
-	mov.w		&0x0014,ICCR(%a6)
-	mov.w		&0x0010,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas2.w		%d1:%d2,%d3:%d4,(%a0):(%a1)	# Dc1:Dc2,Du1:Du2,(Rn1):(Rn2)
-
-	mov.w		%cc,SCCR(%a6)
-	mov.w		(%a0),%d5
-	mov.w		(%a1),%d6
-	mov.w		&0xcccc,IREGS+0x14+0x2(%a6)
-	mov.w		&0xdddd,IREGS+0x18+0x2(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas210:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x1(%a6),%a0
-	lea		DATA+0x5(%a6),%a1
-
-	mov.w		&0xaaaa,(%a0)
-	mov.w		&0xbbbb,(%a1)
-
-	mov.w		&0xaaaa,%d1
-	mov.w		&0xbbbb,%d2
-	mov.w		&0xcccc,%d3
-	mov.w		&0xdddd,%d4
-
-	mov.w		&0x0004,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas2.w		%d1:%d2,%d3:%d4,(%a0):(%a1)	# Dc1:Dc2,Du1:Du2,(Rn1):(Rn2)
-
-	mov.w		%cc,SCCR(%a6)
-	mov.w		(%a0),%d5
-	mov.w		(%a1),%d6
-	mov.w		&0xcccc,IREGS+0x14+0x2(%a6)
-	mov.w		&0xdddd,IREGS+0x18+0x2(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas211:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x0(%a6),%a0
-	lea		DATA+0x4(%a6),%a1
-
-	mov.w		&0xeeee,(%a0)
-	mov.w		&0xbbbb,(%a1)
-
-	mov.w		&0xaaaa,%d1
-	mov.w		&0xbbbb,%d2
-	mov.w		&0xcccc,%d3
-	mov.w		&0xdddd,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas2.w		%d1:%d2,%d3:%d4,(%a0):(%a1)	# Dc1:Dc2,Du1:Du2,(Rn1):(Rn2)
-
-	mov.w		%cc,SCCR(%a6)
-	mov.w		(%a0),%d5
-	mov.w		(%a1),%d6
-	mov.w		&0xeeee,IREGS+0x4+0x2(%a6)
-	mov.w		&0xbbbb,IREGS+0x8+0x2(%a6)
-	mov.w		&0xeeee,IREGS+0x14+0x2(%a6)
-	mov.w		&0xbbbb,IREGS+0x18+0x2(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas212:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x1(%a6),%a0
-	lea		DATA+0x5(%a6),%a1
-
-	mov.w		&0xeeee,(%a0)
-	mov.w		&0xbbbb,(%a1)
-
-	mov.w		&0xaaaa,%d1
-	mov.w		&0xbbbb,%d2
-	mov.w		&0xcccc,%d3
-	mov.w		&0xdddd,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas2.w		%d1:%d2,%d3:%d4,(%a0):(%a1)	# Dc1:Dc2,Du1:Du2,(Rn1):(Rn2)
-
-	mov.w		%cc,SCCR(%a6)
-	mov.w		(%a0),%d5
-	mov.w		(%a1),%d6
-	mov.w		&0xeeee,IREGS+0x4+0x2(%a6)
-	mov.w		&0xbbbb,IREGS+0x8+0x2(%a6)
-	mov.w		&0xeeee,IREGS+0x14+0x2(%a6)
-	mov.w		&0xbbbb,IREGS+0x18+0x2(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas213:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x0(%a6),%a0
-	lea		DATA+0x4(%a6),%a1
-
-	mov.w		&0xaaaa,(%a0)
-	mov.w		&0xeeee,(%a1)
-
-	mov.w		&0xaaaa,%d1
-	mov.w		&0xbbbb,%d2
-	mov.w		&0xcccc,%d3
-	mov.w		&0xdddd,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas2.w		%d1:%d2,%d3:%d4,(%a0):(%a1)	# Dc1:Dc2,Du1:Du2,(Rn1):(Rn2)
-
-	mov.w		%cc,SCCR(%a6)
-	mov.w		(%a0),%d5
-	mov.w		(%a1),%d6
-	mov.w		&0xaaaa,IREGS+0x4+0x2(%a6)
-	mov.w		&0xeeee,IREGS+0x8+0x2(%a6)
-	mov.w		&0xaaaa,IREGS+0x14+0x2(%a6)
-	mov.w		&0xeeee,IREGS+0x18+0x2(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cas214:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	lea		DATA+0x1(%a6),%a0
-	lea		DATA+0x5(%a6),%a1
-
-	mov.w		&0xaaaa,(%a0)
-	mov.w		&0x7fff,(%a1)
-
-	mov.w		&0xaaaa,%d1
-	mov.w		&0x8000,%d2
-	mov.w		&0xcccc,%d3
-	mov.w		&0xdddd,%d4
-
-	mov.w		&0x001b,ICCR(%a6)
-	mov.w		&0x0010,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cas2.w		%d1:%d2,%d3:%d4,(%a0):(%a1)	# Dc1:Dc2,Du1:Du2,(Rn1):(Rn2)
-
-	mov.w		%cc,SCCR(%a6)
-	mov.w		(%a0),%d5
-	mov.w		(%a1),%d6
-	mov.w		&0xaaaa,IREGS+0x4+0x2(%a6)
-	mov.w		&0x7fff,IREGS+0x8+0x2(%a6)
-	mov.w		&0xaaaa,IREGS+0x14+0x2(%a6)
-	mov.w		&0x7fff,IREGS+0x18+0x2(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	mov.l		TESTCTR(%a6),%d1
-	clr.l		%d0
-	rts
-
-###########################################################
-
-cmp2_str:
-	string		"\tcmp2,chk2..."
-
-	align		0x4
-# unsigned - small,small
-cmp2_1:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0x2040,DATA(%a6)
-	mov.l		&0x11111120,%d1
-
-	mov.w		&0x0004,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.b		%d1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_2:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0x2040,DATA(%a6)
-	mov.l		&0x00000040,%a1
-
-	mov.w		&0x0004,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.b		%a1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_3:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0x2040,DATA(%a6)
-	mov.l		&0x11111130,%d1
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	chk2.b		DATA(%a6),%d1
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_4:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0x2040,DATA(%a6)
-	mov.l		&0x00000010,%a1
-
-	mov.w		&0x0001,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.b		%a1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_5:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0x2040,DATA(%a6)
-	mov.l		&0x11111150,%d1
-
-	mov.w		&0x0001,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.b		%d1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_6:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0x2040,DATA(%a6)
-	mov.l		&0x00000090,%a1
-
-	mov.w		&0x0001,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.b		%a1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-# unsigned - small,large
-cmp2_7:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x2000a000,DATA(%a6)
-	mov.l		&0x11112000,%d1
-
-	mov.w		&0x0004,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.w		%d1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_8:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x2000a000,DATA(%a6)
-	mov.l		&0xffffa000,%a1
-
-	mov.w		&0x0004,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.w		%a1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_9:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x2000a000,DATA(%a6)
-	mov.l		&0x11113000,%d1
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	chk2.w		DATA(%a6),%d1
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_10:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x2000a000,DATA(%a6)
-	mov.l		&0xffff9000,%a1
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.w		%a1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_11:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x2000a000,DATA(%a6)
-	mov.l		&0x11111000,%d1
-
-	mov.w		&0x0001,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.w		%d1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_12:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0x2000a000,DATA(%a6)
-	mov.l		&0xffffb000,%a1
-
-	mov.w		&0x0001,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.w		%a1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-# unsigned - large,large
-cmp2_13:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0xa0000000,DATA(%a6)
-	mov.l		&0xc0000000,DATA+0x4(%a6)
-	mov.l		&0xa0000000,%d1
-
-	mov.w		&0x000c,ICCR(%a6)
-	mov.w		&0x0008,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.l		%d1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_14:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0xa0000000,DATA(%a6)
-	mov.l		&0xc0000000,DATA+0x4(%a6)
-	mov.l		&0xc0000000,%a1
-
-	mov.w		&0x000c,ICCR(%a6)
-	mov.w		&0x0008,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.l		%a1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_15:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0xa0000000,DATA(%a6)
-	mov.l		&0xc0000000,DATA+0x4(%a6)
-	mov.l		&0xb0000000,%d1
-
-	mov.w		&0x0008,ICCR(%a6)
-	mov.w		&0x0008,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	chk2.l		DATA(%a6),%d1
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_16:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0xa0000000,DATA(%a6)
-	mov.l		&0xc0000000,DATA+0x4(%a6)
-	mov.l		&0x10000000,%a1
-
-	mov.w		&0x0009,ICCR(%a6)
-	mov.w		&0x0008,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.l		%a1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_17:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0xa0000000,DATA(%a6)
-	mov.l		&0xc0000000,DATA+0x4(%a6)
-	mov.l		&0x90000000,%d1
-
-	mov.w		&0x0009,ICCR(%a6)
-	mov.w		&0x0008,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.l		%d1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_18:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		&0xa0000000,DATA(%a6)
-	mov.l		&0xc0000000,DATA+0x4(%a6)
-	mov.l		&0xd0000000,%a1
-
-	mov.w		&0x0009,ICCR(%a6)
-	mov.w		&0x0008,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.l		%a1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-# signed - negative,positive
-cmp2_19:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0xa040,DATA(%a6)
-	mov.l		&0x111111a0,%d1
-
-	mov.w		&0x0004,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.b		%d1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_20:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0xa040,DATA(%a6)
-	mov.l		&0x00000040,%a1
-
-	mov.w		&0x0004,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	chk2.b		DATA(%a6),%a1
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_21:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0xa040,DATA(%a6)
-	mov.l		&0x111111b0,%d1
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.b		%d1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_22:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0xa040,DATA(%a6)
-	mov.l		&0x00000010,%a1
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.b		%a1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_23:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0xa040,DATA(%a6)
-	mov.l		&0x11111190,%d1
-
-	mov.w		&0x0001,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.b		%d1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_24:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0xa040,DATA(%a6)
-	mov.l		&0x00000050,%a1
-
-	mov.w		&0x0001,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.b		%a1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-# signed - negative,negative
-cmp2_25:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0xa0c0,DATA(%a6)
-	mov.l		&0x111111a0,%d1
-
-	mov.w		&0x0004,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.b		%d1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_26:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0xa0c0,DATA(%a6)
-	mov.l		&0xffffffc0,%a1
-
-	mov.w		&0x0004,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.b		%a1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_27:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0xa0c0,DATA(%a6)
-	mov.l		&0x111111b0,%d1
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	chk2.b		DATA(%a6),%d1
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_28:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0xa0c0,DATA(%a6)
-	mov.l		&0x11111190,%a1
-
-	mov.w		&0x0001,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.b		%a1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_29:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0xa0c0,DATA(%a6)
-	mov.l		&0x111111d0,%d1
-
-	mov.w		&0x0001,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.b		%d1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-cmp2_30:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.w		&0xa0c0,DATA(%a6)
-	mov.l		&0x00000050,%a1
-
-	mov.w		&0x001b,ICCR(%a6)
-	mov.w		&0x001f,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	cmp2.b		%a1,DATA(%a6)
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	mov.l		TESTCTR(%a6),%d1
-	clr.l		%d0
-	rts
-
-###########################################################
-
-ea_str:
-	string		"\tEffective addresses..."
-
-	align		0x4
-ea_0:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(%a0),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_1:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(%a0)+,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-	lea		EAMEM+0x4(%a6),%a0
-	mov.l		%a0,IREGS+0x20(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_2:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM+0x4(%a6),%a0
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		-(%a0),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-	lea		EAMEM(%a6),%a0
-	mov.l		%a0,IREGS+0x20(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_3:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM+0x1000(%a6),%a0
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(-0x1000,%a0),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_4:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a0
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x1000,%a0),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_5:
-	addq.l		&0x1,TESTCTR(%a6)
-
-#	movm.l		DEF_REGS(%pc),&0x3fff
-
-#	clr.l		%d2
-#	mov.l		&0x00000002,%d3
-
-#	mov.w		&0x0000,ICCR(%a6)
-#	mov.w		&0x0000,%cc
-#	movm.l		&0xffff,IREGS(%a6)
-
-#	mulu.l		EAMEM.w,%d2:%d3
-
-#	mov.w		%cc,SCCR(%a6)
-#	movm.l		&0xffff,SREGS(%a6)
-#	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-#	bsr.l		chkregs
-#	tst.b		%d0
-#	bne.l		error
-
-ea_6:
-	addq.l		&0x1,TESTCTR(%a6)
-
-#	movm.l		DEF_REGS(%pc),&0x3fff
-
-#	clr.l		%d2
-#	mov.l		&0x00000002,%d3
-
-#	mov.w		&0x0000,ICCR(%a6)
-#	mov.w		&0x0000,%cc
-#	movm.l		&0xffff,IREGS(%a6)
-
-#	mulu.l		EAMEM.l,%d2:%d3
-
-#	mov.w		%cc,SCCR(%a6)
-#	movm.l		&0xffff,SREGS(%a6)
-#	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-#	bsr.l		chkregs
-#	tst.b		%d0
-#	bne.l		error
-
-ea_7:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		&0x00000002,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_8:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_8_next
-ea_8_mem:
-	long		0x00000002
-ea_8_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(ea_8_mem.w,%pc),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_9:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM+0x4(%a6),%a1
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		-(%a1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-	lea		EAMEM(%a6),%a0
-	mov.l		%a0,IREGS+0x24(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_10:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM+0x4(%a6),%a2
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		-(%a2),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-	lea		EAMEM(%a6),%a0
-	mov.l		%a0,IREGS+0x28(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_11:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM+0x4(%a6),%a3
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		-(%a3),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-	lea		EAMEM(%a6),%a0
-	mov.l		%a0,IREGS+0x2c(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_12:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM+0x4(%a6),%a4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		-(%a4),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-	lea		EAMEM(%a6),%a0
-	mov.l		%a0,IREGS+0x30(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_13:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM+0x4(%a6),%a5
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		-(%a5),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-	lea		EAMEM(%a6),%a0
-	mov.l		%a0,IREGS+0x34(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_14:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		%a6,%a1
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM+0x4(%a1),%a6
-
-	mov.w		&0x0000,ICCR(%a1)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a1)
-
-	mulu.l		-(%a6),%d2:%d3
-
-	mov.w		%cc,SCCR(%a1)
-	movm.l		&0xffff,SREGS(%a1)
-	mov.l		&0x00000004,IREGS+0xc(%a1)
-	lea		EAMEM(%a1),%a0
-	mov.l		%a0,IREGS+0x38(%a1)
-
-	mov.l		%a1,%a6
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_15:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	mov.l		%a7,%a0
-	lea		EAMEM+0x4(%a6),%a7
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		-(%a7),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-	lea		EAMEM(%a6),%a1
-	mov.l		%a1,IREGS+0x3c(%a6)
-
-	mov.l		%a0,%a7
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_16:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.b,%a0,%d4.w*1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_17:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x8,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.b,%a0,%d4.w*2),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_18:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.b,%a0,%d4.w*4),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_19:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.b,%a0,%d4.w*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_20:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.b,%a0,%d4.l*1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_21:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x8,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.b,%a0,%d4.l*2),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_22:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.b,%a0,%d4.l*4),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_23:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.b,%a0,%d4.l*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_24:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x2,%a4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.b,%a0,%a4.l*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_25:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&0x2,%a4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(-0x10.b,%a0,%a4.l*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_26:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a1
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.b,%a1,%d4.w*1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_27:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a2
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.b,%a2,%d4.w*1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_28:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.b,%a3,%d4.w*1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_29:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a4
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.b,%a4,%d4.w*1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_30:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a5
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.b,%a5,%d4.w*1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_31:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		%a6,%a1
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a1),%a6
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a1)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a1)
-
-	mulu.l		(0x10.b,%a6,%d4.w*1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a1)
-	movm.l		&0xffff,SREGS(%a1)
-	mov.l		&0x00000004,IREGS+0xc(%a1)
-
-	mov.l		%a1,%a6
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_32:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	mov.l		%a7,%a0
-	lea		EAMEM(%a6),%a7
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.b,%a7,%d4.w*1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	mov.l		%a0,%a7
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_33:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a1
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(%a1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_34:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a2
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(%a2),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_35:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(%a3),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_36:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(%a4),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_37:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a5
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(%a5),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_38:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		%a6,%a1
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a1),%a6
-
-	mov.w		&0x0000,ICCR(%a1)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a1)
-
-	mulu.l		(%a6),%d2:%d3
-
-	mov.w		%cc,SCCR(%a1)
-	movm.l		&0xffff,SREGS(%a1)
-	mov.l		&0x00000004,IREGS+0xc(%a1)
-
-	mov.l		%a1,%a6
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_39:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	mov.l		%a7,%a0
-	lea		EAMEM(%a6),%a7
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(%a7),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	mov.l		%a0,%a7
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_40:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a1
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(%a1)+,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-	lea		EAMEM+0x4(%a6),%a0
-	mov.l		%a0,IREGS+0x24(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_41:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a2
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(%a2)+,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-	lea		EAMEM+0x4(%a6),%a0
-	mov.l		%a0,IREGS+0x28(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_42:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(%a3)+,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-	lea		EAMEM+0x4(%a6),%a0
-	mov.l		%a0,IREGS+0x2c(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_43:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(%a4)+,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-	lea		EAMEM+0x4(%a6),%a0
-	mov.l		%a0,IREGS+0x30(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_44:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a5
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(%a5)+,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-	lea		EAMEM+0x4(%a6),%a0
-	mov.l		%a0,IREGS+0x34(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_45:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		%a6,%a1
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a1),%a6
-
-	mov.w		&0x0000,ICCR(%a1)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a1)
-
-	mulu.l		(%a6)+,%d2:%d3
-
-	mov.w		%cc,SCCR(%a1)
-	movm.l		&0xffff,SREGS(%a1)
-	mov.l		&0x00000004,IREGS+0xc(%a1)
-	lea		EAMEM+0x4(%a1),%a0
-	mov.l		%a0,IREGS+0x38(%a1)
-
-	mov.l		%a1,%a6
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_46:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	mov.l		%a7,%a0
-	lea		EAMEM(%a6),%a7
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(%a7)+,%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-	lea		EAMEM+0x4(%a6),%a1
-	mov.l		%a1,IREGS+0x3c(%a6)
-
-	mov.l		%a0,%a7
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_47:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a1
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x1000,%a1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_48:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a2
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x1000,%a2),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_49:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a3
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x1000,%a3),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_50:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x1000,%a4),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_51:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a5
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x1000,%a5),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_52:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		%a6,%a1
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a1),%a6
-
-	mov.w		&0x0000,ICCR(%a1)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a1)
-
-	mulu.l		(0x1000,%a6),%d2:%d3
-
-	mov.w		%cc,SCCR(%a1)
-	movm.l		&0xffff,SREGS(%a1)
-	mov.l		&0x00000004,IREGS+0xc(%a1)
-
-	mov.l		%a1,%a6
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_53:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	mov.l		%a7,%a0
-	lea		EAMEM-0x1000(%a6),%a7
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x1000,%a7),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	mov.l		%a0,%a7
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_54:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM+0x1000(%a6),%a0
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(-0x1000,%a0),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_55:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_55_next
-
-ea_55_data:
-	long		0x00000002
-ea_55_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(ea_55_data.w,%pc),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_56:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.w,%a3,%d4.w*1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_57:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-	mov.l		&-0x8,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.w,%a3,%d4.w*2),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_58:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-	mov.l		&-0x4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.w,%a3,%d4.w*4),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_59:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.w,%a3,%d4.w*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_60:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.w,%a3,%d4.l*1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_61:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-	mov.l		&-0x8,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.w,%a3,%d4.l*2),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_62:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-	mov.l		&-0x4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.w,%a3,%d4.l*4),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_63:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x10.w,%a3,%d4.l*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_64:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-	mov.l		&0x2,%a4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(-0x10.w,%a3,%a4.l*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_65:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-	mov.l		&0x2,%a4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(0x00.w,%a3,%za4.l*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_66:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-	mov.l		%a3,%a4
-	add.l		&0x10,%a4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(-0x10.w,%za3,%a4.l*1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_67:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-	mov.l		&0x2,%a4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(-0x10.l,%a3,%a4.l*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_68:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_68_next
-ea_68_mem:
-	long		0x00000002
-ea_68_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(ea_68_mem+0x10.w,%pc,%d4.w*1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_69:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_69_next
-ea_69_mem:
-	long		0x00000002
-ea_69_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x8,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	mulu.l		(ea_69_mem+0x10.w,%pc,%d4.w*2),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_70:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_70_next
-ea_70_mem:
-	long		0x00000002
-ea_70_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	mulu.l		(ea_70_mem+0x10.w,%pc,%d4.w*4),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_71:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_71_next
-ea_71_mem:
-	long		0x00000002
-ea_71_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	mulu.l		(ea_71_mem+0x10.w,%pc,%d4.w*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_72:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_72_next
-ea_72_mem:
-	long		0x00000002
-ea_72_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	mulu.l		(ea_72_mem+0x10.w,%pc,%d4.l*1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_73:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_73_next
-ea_73_mem:
-	long		0x00000002
-ea_73_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x8,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	mulu.l		(ea_73_mem+0x10.w,%pc,%d4.l*2),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_74:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_74_next
-ea_74_mem:
-	long		0x00000002
-ea_74_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	mulu.l		(ea_74_mem+0x10.w,%pc,%d4.l*4),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_75:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_75_next
-ea_75_mem:
-	long		0x00000002
-ea_75_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0x7fff,IREGS(%a6)
-
-	mulu.l		(ea_75_mem+0x10.w,%pc,%d4.l*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0x7fff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_76:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_76_next
-ea_76_mem:
-	long		0x00000002
-ea_76_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-	mov.l		&-0x2,%a4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(ea_76_mem+0x10.w,%pc,%a4.l*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_77:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_77_next
-ea_77_mem:
-	long		0x00000002
-ea_77_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a3
-	mov.l		&0x2,%a4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(ea_77_mem+0x00.w,%pc,%za4.l*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_78:
-	addq.l		&0x1,TESTCTR(%a6)
-
-#	movm.l		DEF_REGS(%pc),&0x3fff
-
-#	clr.l		%d2
-#	mov.l		&0x00000002,%d3
-#	lea		EAMEM,%a3
-#	mov.l		%a3,%a4
-#	add.l		&0x10,%a4
-
-#	mov.w		&0x0000,ICCR(%a6)
-#	mov.w		&0x0000,%cc
-#	movm.l		&0xffff,IREGS(%a6)
-
-#	mulu.l		(EAMEM-0x10.w,%zpc,%a4.l*1),%d2:%d3
-
-#	mov.w		%cc,SCCR(%a6)
-#	movm.l		&0xffff,SREGS(%a6)
-#	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-#	bsr.l		chkregs
-#	tst.b		%d0
-#	bne.l		error
-
-ea_79:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM,%a3
-	mov.l		&0x2,%a4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(ea_79_mem-0x10.l,%pc,%a4.l*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bra.b		ea_79_next
-ea_79_mem:
-	long		0x00000002
-ea_79_next:
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_80:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_80_next
-ea_80_mem:
-	long		0x00000002
-ea_80_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a1
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(ea_80_mem+0x10.b,%pc,%d4.w*1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_81:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_81_next
-ea_81_mem:
-	long		0x00000002
-ea_81_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x8,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(ea_81_mem+0x10.b,%pc,%d4.w*2),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_82:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_82_next
-ea_82_mem:
-	long		0x00000002
-ea_82_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(ea_82_mem+0x10.b,%pc,%d4.w*4),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_83:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_83_next
-ea_83_mem:
-	long		0x00000002
-ea_83_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(ea_83_mem+0x10.b,%pc,%d4.w*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_84:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_84_next
-ea_84_mem:
-	long		0x00000002
-ea_84_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(ea_84_mem+0x10.b,%pc,%d4.l*1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_85:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_85_next
-ea_85_mem:
-	long		0x00000002
-ea_85_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x8,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(ea_85_mem+0x10.b,%pc,%d4.l*2),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_86:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_86_next
-ea_86_mem:
-	long		0x00000002
-ea_86_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(ea_86_mem+0x10.b,%pc,%d4.l*4),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_87:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	bra.b		ea_87_next
-ea_87_mem:
-	long		0x00000002
-ea_87_next:
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(ea_87_mem+0x10.b,%pc,%d4.l*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_88:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a6),%a0
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		(ea_88_mem+0x10.b,%pc,%d4.l*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bra.b		ea_88_next
-ea_88_mem:
-	long		0x00000002
-ea_88_next:
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_89:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.w,%a4,%d4.w*1],0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_90:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x8,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.w,%a4,%d4.w*2],0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_91:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.w,%a4,%d4.w*4],0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_92:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.w,%a4,%d4.w*8],0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_93:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.w,%a4,%d4.l*1],0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_94:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x8,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.w,%a4,%d4.l*2],0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_95:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.w,%a4,%d4.l*4],0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_96:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.w,%a4,%d4.l*8],0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_97:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.l,%a4,%d4.l*8],0x1000.l),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_98:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x00.l,%a4,%zd4.l*8],0x1000.l),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_99:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([%a4,%zd4.l*8],0x1000.l),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_100:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x10,%d4
-	add.l		%a4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.l,%za4,%d4.l*1],0x1000.l),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_101:
-	addq.l		&0x1,TESTCTR(%a6)
-
-#	movm.l		DEF_REGS(%pc),&0x3fff
-
-#	clr.l		%d2
-#	mov.l		&0x00000002,%d3
-#	lea		EAMEM(%a6),%a3
-#	lea		EASTORE(%a6),%a4
-#	mov.l		%a3,(%a4)
-#	mov.l		&-0x10,%d4
-
-#	mov.w		&0x0000,ICCR(%a6)
-#	mov.w		&0x0000,%cc
-#	movm.l		&0xffff,IREGS(%a6)
-
-#	mulu.l		([EASTORE.l,%za4,%zd4.l*1]),%d2:%d3
-
-#	mov.w		%cc,SCCR(%a6)
-#	movm.l		&0xffff,SREGS(%a6)
-#	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-#	bsr.l		chkregs
-#	tst.b		%d0
-#	bne.l		error
-
-ea_102:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		%a6,%a1
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM+0x1000(%a1),%a3
-	lea		EASTORE(%a1),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x2,%a6
-
-	mov.w		&0x0000,ICCR(%a1)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a1)
-
-	mulu.l		([0x10.w,%a4,%a6.l*8],-0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a1)
-	movm.l		&0xffff,SREGS(%a1)
-	mov.l		&0x00000004,IREGS+0xc(%a1)
-
-	mov.l		%a1,%a6
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_103:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		%a6,%a1
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM+0x1000(%a1),%a3
-	lea		EASTORE(%a1),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&0x2,%a6
-
-	mov.w		&0x0000,ICCR(%a1)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a1)
-
-	mulu.l		([-0x10.w,%a4,%a6.l*8],-0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a1)
-	movm.l		&0xffff,SREGS(%a1)
-	mov.l		&0x00000004,IREGS+0xc(%a1)
-
-	mov.l		%a1,%a6
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_104:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.w,%a4],%d4.w*1,0x10.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_105:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x8,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.w,%a4],%d4.w*2,0x10.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_106:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.w,%a4],%d4.w*4,0x10.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_107:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.w,%a4],%d4.w*8,0x10.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_108:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.w,%a4],%d4.l*1,0x10.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_109:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x8,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.w,%a4],%d4.w*2,0x10.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_110:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.w,%a4],%d4.l*4,0x10.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_111:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.w,%a4],%d4.l*8,0x10.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_112:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.l,%a4],%d4.l*8,0x10.l),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_113:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%a6),%a3
-	lea		EASTORE(%a6),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x00.l,%a4],%zd4.l*8,0x20.l),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_114:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		%a7,%a0
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%a6),%a3
-	lea		EASTORE(%a6),%a7
-	mov.l		%a3,(%a7)
-	mov.l		&0x20,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([%a7],%d4.l*1),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	mov.l		%a0,%a7
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_115:
-	addq.l		&0x1,TESTCTR(%a6)
-
-#	movm.l		DEF_REGS(%pc),&0x3fff
-
-#	clr.l		%d2
-#	mov.l		&0x00000002,%d3
-#	lea		EAMEM-0x20(%pc),%a3
-#	lea		EASTORE(%pc),%a4
-#	mov.l		%a3,(%a4)
-#	mov.l		&0x2,%d4
-
-#	mov.w		&0x0000,ICCR(%a6)
-#	mov.w		&0x0000,%cc
-#	movm.l		&0xffff,IREGS(%a6)
-
-#	mulu.l		([EASTORE.l,%za4],%zd4.l*8,0x20.l),%d2:%d3
-
-#	mov.w		%cc,SCCR(%a6)
-#	movm.l		&0xffff,SREGS(%a6)
-#	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-#	bsr.l		chkregs
-#	tst.b		%d0
-#	bne.l		error
-
-ea_116:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		%a6,%a1
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%a1),%a3
-	lea		EASTORE(%a1),%a6
-	mov.l		%a3,(%a6)
-	add.l		&0x10,%a6
-	mov.l		&-0x2,%a5
-
-	mov.w		&0x0000,ICCR(%a1)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a1)
-
-	mulu.l		([-0x10.w,%a6],%a5.l*8,0x10.l),%d2:%d3
-
-	mov.w		%cc,SCCR(%a1)
-	movm.l		&0xffff,SREGS(%a1)
-	mov.l		&0x00000004,IREGS+0xc(%a1)
-
-	mov.l		%a1,%a6
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	mov.l		TESTCTR(%a6),%d1
-	clr.l		%d0
-	rts
-
-ea_117:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE+0x10.w,%pc,%d4.w*1],0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_118:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x8,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE+0x10.w,%pc,%d4.w*2],0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_119:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE+0x10.w,%pc,%d4.w*4],0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_120:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE+0x10.w,%pc,%d4.w*8],0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_121:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE+0x10.w,%pc,%d4.l*1],0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_122:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x8,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE+0x10.w,%pc,%d4.l*2],0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_123:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE+0x10.w,%pc,%d4.l*4],0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_124:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE+0x10.w,%pc,%d4.l*8],0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_125:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-	mulu.l		([EASTORE+0x10.l,%pc,%d4.l*8],0x1000.l),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_126:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE+0x00.l,%pc,%zd4.l*8],0x1000.l),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_127:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	mov.l		%a4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([%zpc,%d4.l*1],0x1000.l),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_128:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x1000(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x10,%d4
-	add.l		%a4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([0x10.l,%zpc,%d4.l*1],0x1000.l),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_129:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&-0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE.l,%zpc,%zd4.l*1]),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_130:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM+0x1000(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&0x2,%a6
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE-0x10.w,%pc,%a6.l*8],-0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_131:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		%a7,%a0
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM+0x1000(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&0x2,%a7
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE-0x10.w,%pc,%a7.l*8],-0x1000.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	mov.l		%a0,%a7
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_132:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE.w,%pc],%d4.w*1,0x10.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_133:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x8,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE.w,%pc],%d4.w*2,0x10.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_134:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE.w,%pc],%d4.w*4,0x10.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_135:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE.w,%pc],%d4.w*8,0x10.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_136:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x10,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE.w,%pc],%d4.l*1,0x10.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_137:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x8,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE.w,%pc],%d4.w*2,0x10.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_138:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE.w,%pc],%d4.l*4,0x10.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_139:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE.w,%pc],%d4.l*8,0x10.w),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_140:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	sub.l		&0x10,%a4
-	mov.l		&0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE.l,%pc],%d4.l*8,0x10.l),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_141:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&0x2,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE.l,%pc],%zd4.l*8,0x20.l),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_142:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM-0x20(%pc),%a3
-	lea		EASTORE(%pc),%a4
-	mov.l		%a3,(%a4)
-	mov.l		&0x4,%d4
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE.l,%zpc],%d4.l*8),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-ea_143:
-	addq.l		&0x1,TESTCTR(%a6)
-
-	movm.l		DEF_REGS(%pc),&0x3fff
-
-	mov.l		%a7,%a0
-	clr.l		%d2
-	mov.l		&0x00000002,%d3
-	lea		EAMEM(%pc),%a3
-	lea		EASTORE(%pc),%a6
-	mov.l		%a3,(%a6)
-	add.l		&0x10,%a6
-	mov.l		&-0x2,%a7
-
-	mov.w		&0x0000,ICCR(%a6)
-	mov.w		&0x0000,%cc
-	movm.l		&0xffff,IREGS(%a6)
-
-	mulu.l		([EASTORE.w,%pc],%a7.l*8,0x10.l),%d2:%d3
-
-	mov.w		%cc,SCCR(%a6)
-	movm.l		&0xffff,SREGS(%a6)
-	mov.l		&0x00000004,IREGS+0xc(%a6)
-
-	mov.l		%a0,%a7
-	bsr.l		chkregs
-	tst.b		%d0
-	bne.l		error
-
-	clr.l		%d0
-	rts
-
-###########################################################
-###########################################################
-chkregs:
-	lea		IREGS(%a6),%a0
-	lea		SREGS(%a6),%a1
-	mov.l		&14,%d0
-chkregs_loop:
-	cmp.l		(%a0)+,(%a1)+
-	bne.l		chkregs_error
-	dbra.w		%d0,chkregs_loop
-
-	mov.w		ICCR(%a6),%d0
-	mov.w		SCCR(%a6),%d1
-	cmp.w		%d0,%d1
-	bne.l		chkregs_error
-
-	clr.l		%d0
-	rts
-
-chkregs_error:
-	movq.l		&0x1,%d0
-	rts
-
-error:
-	mov.l		TESTCTR(%a6),%d1
-	movq.l		&0x1,%d0
-	rts
-
-DEF_REGS:
-	long		0xacacacac, 0xacacacac, 0xacacacac, 0xacacacac
-	long		0xacacacac, 0xacacacac, 0xacacacac, 0xacacacac
-
-	long		0xacacacac, 0xacacacac, 0xacacacac, 0xacacacac
-	long		0xacacacac, 0xacacacac, 0xacacacac, 0xacacacac
-
-############################################################
-
-_print_str:
-	mov.l		%d0,-(%sp)
-	mov.l		(TESTTOP-0x80+0x0,%pc),%d0
-	pea		(TESTTOP-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-_print_num:
-	mov.l		%d0,-(%sp)
-	mov.l		(TESTTOP-0x80+0x4,%pc),%d0
-	pea		(TESTTOP-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-############################################################
diff --git a/arch/m68k/ifpsp060/src/pfpsp.S b/arch/m68k/ifpsp060/src/pfpsp.S
deleted file mode 100644
index 3535e6c87eec611bbe03c965c0fc5aeeaeff5343..0000000000000000000000000000000000000000
--- a/arch/m68k/ifpsp060/src/pfpsp.S
+++ /dev/null
@@ -1,14745 +0,0 @@
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
-M68000 Hi-Performance Microprocessor Division
-M68060 Software Package
-Production Release P1.00 -- October 10, 1994
-
-M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
-
-THE SOFTWARE is provided on an "AS IS" basis and without warranty.
-To the maximum extent permitted by applicable law,
-MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
-INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
-and any warranty against infringement with regard to the SOFTWARE
-(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
-
-To the maximum extent permitted by applicable law,
-IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
-(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
-BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
-ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
-Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
-
-You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
-so long as this entire notice is retained without alteration in any modified and/or
-redistributed versions, and that such modified versions are clearly identified as such.
-No licenses are granted by implication, estoppel or otherwise under any patents
-or trademarks of Motorola, Inc.
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-# freal.s:
-#	This file is appended to the top of the 060FPSP package
-# and contains the entry points into the package. The user, in
-# effect, branches to one of the branch table entries located
-# after _060FPSP_TABLE.
-#	Also, subroutine stubs exist in this file (_fpsp_done for
-# example) that are referenced by the FPSP package itself in order
-# to call a given routine. The stub routine actually performs the
-# callout. The FPSP code does a "bsr" to the stub routine. This
-# extra layer of hierarchy adds a slight performance penalty but
-# it makes the FPSP code easier to read and more mainatinable.
-#
-
-set	_off_bsun,	0x00
-set	_off_snan,	0x04
-set	_off_operr,	0x08
-set	_off_ovfl,	0x0c
-set	_off_unfl,	0x10
-set	_off_dz,	0x14
-set	_off_inex,	0x18
-set	_off_fline,	0x1c
-set	_off_fpu_dis,	0x20
-set	_off_trap,	0x24
-set	_off_trace,	0x28
-set	_off_access,	0x2c
-set	_off_done,	0x30
-
-set	_off_imr,	0x40
-set	_off_dmr,	0x44
-set	_off_dmw,	0x48
-set	_off_irw,	0x4c
-set	_off_irl,	0x50
-set	_off_drb,	0x54
-set	_off_drw,	0x58
-set	_off_drl,	0x5c
-set	_off_dwb,	0x60
-set	_off_dww,	0x64
-set	_off_dwl,	0x68
-
-_060FPSP_TABLE:
-
-###############################################################
-
-# Here's the table of ENTRY POINTS for those linking the package.
-	bra.l		_fpsp_snan
-	short		0x0000
-	bra.l		_fpsp_operr
-	short		0x0000
-	bra.l		_fpsp_ovfl
-	short		0x0000
-	bra.l		_fpsp_unfl
-	short		0x0000
-	bra.l		_fpsp_dz
-	short		0x0000
-	bra.l		_fpsp_inex
-	short		0x0000
-	bra.l		_fpsp_fline
-	short		0x0000
-	bra.l		_fpsp_unsupp
-	short		0x0000
-	bra.l		_fpsp_effadd
-	short		0x0000
-
-	space		56
-
-###############################################################
-	global		_fpsp_done
-_fpsp_done:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_done,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_ovfl
-_real_ovfl:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_unfl
-_real_unfl:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_inex
-_real_inex:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_bsun
-_real_bsun:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_operr
-_real_operr:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_snan
-_real_snan:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_dz
-_real_dz:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_fline
-_real_fline:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_fpu_disabled
-_real_fpu_disabled:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_trap
-_real_trap:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_trace
-_real_trace:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_real_access
-_real_access:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_access,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-#######################################
-
-	global		_imem_read
-_imem_read:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_read
-_dmem_read:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_write
-_dmem_write:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_imem_read_word
-_imem_read_word:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_imem_read_long
-_imem_read_long:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_read_byte
-_dmem_read_byte:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_read_word
-_dmem_read_word:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_read_long
-_dmem_read_long:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_write_byte
-_dmem_write_byte:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_write_word
-_dmem_write_word:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-	global		_dmem_write_long
-_dmem_write_long:
-	mov.l		%d0,-(%sp)
-	mov.l		(_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
-	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
-	mov.l		0x4(%sp),%d0
-	rtd		&0x4
-
-#
-# This file contains a set of define statements for constants
-# in order to promote readability within the corecode itself.
-#
-
-set LOCAL_SIZE,		192			# stack frame size(bytes)
-set LV,			-LOCAL_SIZE		# stack offset
-
-set EXC_SR,		0x4			# stack status register
-set EXC_PC,		0x6			# stack pc
-set EXC_VOFF,		0xa			# stacked vector offset
-set EXC_EA,		0xc			# stacked <ea>
-
-set EXC_FP,		0x0			# frame pointer
-
-set EXC_AREGS,		-68			# offset of all address regs
-set EXC_DREGS,		-100			# offset of all data regs
-set EXC_FPREGS,		-36			# offset of all fp regs
-
-set EXC_A7,		EXC_AREGS+(7*4)		# offset of saved a7
-set OLD_A7,		EXC_AREGS+(6*4)		# extra copy of saved a7
-set EXC_A6,		EXC_AREGS+(6*4)		# offset of saved a6
-set EXC_A5,		EXC_AREGS+(5*4)
-set EXC_A4,		EXC_AREGS+(4*4)
-set EXC_A3,		EXC_AREGS+(3*4)
-set EXC_A2,		EXC_AREGS+(2*4)
-set EXC_A1,		EXC_AREGS+(1*4)
-set EXC_A0,		EXC_AREGS+(0*4)
-set EXC_D7,		EXC_DREGS+(7*4)
-set EXC_D6,		EXC_DREGS+(6*4)
-set EXC_D5,		EXC_DREGS+(5*4)
-set EXC_D4,		EXC_DREGS+(4*4)
-set EXC_D3,		EXC_DREGS+(3*4)
-set EXC_D2,		EXC_DREGS+(2*4)
-set EXC_D1,		EXC_DREGS+(1*4)
-set EXC_D0,		EXC_DREGS+(0*4)
-
-set EXC_FP0,		EXC_FPREGS+(0*12)	# offset of saved fp0
-set EXC_FP1,		EXC_FPREGS+(1*12)	# offset of saved fp1
-set EXC_FP2,		EXC_FPREGS+(2*12)	# offset of saved fp2 (not used)
-
-set FP_SCR1,		LV+80			# fp scratch 1
-set FP_SCR1_EX,		FP_SCR1+0
-set FP_SCR1_SGN,	FP_SCR1+2
-set FP_SCR1_HI,		FP_SCR1+4
-set FP_SCR1_LO,		FP_SCR1+8
-
-set FP_SCR0,		LV+68			# fp scratch 0
-set FP_SCR0_EX,		FP_SCR0+0
-set FP_SCR0_SGN,	FP_SCR0+2
-set FP_SCR0_HI,		FP_SCR0+4
-set FP_SCR0_LO,		FP_SCR0+8
-
-set FP_DST,		LV+56			# fp destination operand
-set FP_DST_EX,		FP_DST+0
-set FP_DST_SGN,		FP_DST+2
-set FP_DST_HI,		FP_DST+4
-set FP_DST_LO,		FP_DST+8
-
-set FP_SRC,		LV+44			# fp source operand
-set FP_SRC_EX,		FP_SRC+0
-set FP_SRC_SGN,		FP_SRC+2
-set FP_SRC_HI,		FP_SRC+4
-set FP_SRC_LO,		FP_SRC+8
-
-set USER_FPIAR,		LV+40			# FP instr address register
-
-set USER_FPSR,		LV+36			# FP status register
-set FPSR_CC,		USER_FPSR+0		# FPSR condition codes
-set FPSR_QBYTE,		USER_FPSR+1		# FPSR qoutient byte
-set FPSR_EXCEPT,	USER_FPSR+2		# FPSR exception status byte
-set FPSR_AEXCEPT,	USER_FPSR+3		# FPSR accrued exception byte
-
-set USER_FPCR,		LV+32			# FP control register
-set FPCR_ENABLE,	USER_FPCR+2		# FPCR exception enable
-set FPCR_MODE,		USER_FPCR+3		# FPCR rounding mode control
-
-set L_SCR3,		LV+28			# integer scratch 3
-set L_SCR2,		LV+24			# integer scratch 2
-set L_SCR1,		LV+20			# integer scratch 1
-
-set STORE_FLG,		LV+19			# flag: operand store (ie. not fcmp/ftst)
-
-set EXC_TEMP2,		LV+24			# temporary space
-set EXC_TEMP,		LV+16			# temporary space
-
-set DTAG,		LV+15			# destination operand type
-set STAG,		LV+14			# source operand type
-
-set SPCOND_FLG,		LV+10			# flag: special case (see below)
-
-set EXC_CC,		LV+8			# saved condition codes
-set EXC_EXTWPTR,	LV+4			# saved current PC (active)
-set EXC_EXTWORD,	LV+2			# saved extension word
-set EXC_CMDREG,		LV+2			# saved extension word
-set EXC_OPWORD,		LV+0			# saved operation word
-
-################################
-
-# Helpful macros
-
-set FTEMP,		0			# offsets within an
-set FTEMP_EX,		0			# extended precision
-set FTEMP_SGN,		2			# value saved in memory.
-set FTEMP_HI,		4
-set FTEMP_LO,		8
-set FTEMP_GRS,		12
-
-set LOCAL,		0			# offsets within an
-set LOCAL_EX,		0			# extended precision
-set LOCAL_SGN,		2			# value saved in memory.
-set LOCAL_HI,		4
-set LOCAL_LO,		8
-set LOCAL_GRS,		12
-
-set DST,		0			# offsets within an
-set DST_EX,		0			# extended precision
-set DST_HI,		4			# value saved in memory.
-set DST_LO,		8
-
-set SRC,		0			# offsets within an
-set SRC_EX,		0			# extended precision
-set SRC_HI,		4			# value saved in memory.
-set SRC_LO,		8
-
-set SGL_LO,		0x3f81			# min sgl prec exponent
-set SGL_HI,		0x407e			# max sgl prec exponent
-set DBL_LO,		0x3c01			# min dbl prec exponent
-set DBL_HI,		0x43fe			# max dbl prec exponent
-set EXT_LO,		0x0			# min ext prec exponent
-set EXT_HI,		0x7ffe			# max ext prec exponent
-
-set EXT_BIAS,		0x3fff			# extended precision bias
-set SGL_BIAS,		0x007f			# single precision bias
-set DBL_BIAS,		0x03ff			# double precision bias
-
-set NORM,		0x00			# operand type for STAG/DTAG
-set ZERO,		0x01			# operand type for STAG/DTAG
-set INF,		0x02			# operand type for STAG/DTAG
-set QNAN,		0x03			# operand type for STAG/DTAG
-set DENORM,		0x04			# operand type for STAG/DTAG
-set SNAN,		0x05			# operand type for STAG/DTAG
-set UNNORM,		0x06			# operand type for STAG/DTAG
-
-##################
-# FPSR/FPCR bits #
-##################
-set neg_bit,		0x3			# negative result
-set z_bit,		0x2			# zero result
-set inf_bit,		0x1			# infinite result
-set nan_bit,		0x0			# NAN result
-
-set q_sn_bit,		0x7			# sign bit of quotient byte
-
-set bsun_bit,		7			# branch on unordered
-set snan_bit,		6			# signalling NAN
-set operr_bit,		5			# operand error
-set ovfl_bit,		4			# overflow
-set unfl_bit,		3			# underflow
-set dz_bit,		2			# divide by zero
-set inex2_bit,		1			# inexact result 2
-set inex1_bit,		0			# inexact result 1
-
-set aiop_bit,		7			# accrued inexact operation bit
-set aovfl_bit,		6			# accrued overflow bit
-set aunfl_bit,		5			# accrued underflow bit
-set adz_bit,		4			# accrued dz bit
-set ainex_bit,		3			# accrued inexact bit
-
-#############################
-# FPSR individual bit masks #
-#############################
-set neg_mask,		0x08000000		# negative bit mask (lw)
-set inf_mask,		0x02000000		# infinity bit mask (lw)
-set z_mask,		0x04000000		# zero bit mask (lw)
-set nan_mask,		0x01000000		# nan bit mask (lw)
-
-set neg_bmask,		0x08			# negative bit mask (byte)
-set inf_bmask,		0x02			# infinity bit mask (byte)
-set z_bmask,		0x04			# zero bit mask (byte)
-set nan_bmask,		0x01			# nan bit mask (byte)
-
-set bsun_mask,		0x00008000		# bsun exception mask
-set snan_mask,		0x00004000		# snan exception mask
-set operr_mask,		0x00002000		# operr exception mask
-set ovfl_mask,		0x00001000		# overflow exception mask
-set unfl_mask,		0x00000800		# underflow exception mask
-set dz_mask,		0x00000400		# dz exception mask
-set inex2_mask,		0x00000200		# inex2 exception mask
-set inex1_mask,		0x00000100		# inex1 exception mask
-
-set aiop_mask,		0x00000080		# accrued illegal operation
-set aovfl_mask,		0x00000040		# accrued overflow
-set aunfl_mask,		0x00000020		# accrued underflow
-set adz_mask,		0x00000010		# accrued divide by zero
-set ainex_mask,		0x00000008		# accrued inexact
-
-######################################
-# FPSR combinations used in the FPSP #
-######################################
-set dzinf_mask,		inf_mask+dz_mask+adz_mask
-set opnan_mask,		nan_mask+operr_mask+aiop_mask
-set nzi_mask,		0x01ffffff		#clears N, Z, and I
-set unfinx_mask,	unfl_mask+inex2_mask+aunfl_mask+ainex_mask
-set unf2inx_mask,	unfl_mask+inex2_mask+ainex_mask
-set ovfinx_mask,	ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
-set inx1a_mask,		inex1_mask+ainex_mask
-set inx2a_mask,		inex2_mask+ainex_mask
-set snaniop_mask,	nan_mask+snan_mask+aiop_mask
-set snaniop2_mask,	snan_mask+aiop_mask
-set naniop_mask,	nan_mask+aiop_mask
-set neginf_mask,	neg_mask+inf_mask
-set infaiop_mask,	inf_mask+aiop_mask
-set negz_mask,		neg_mask+z_mask
-set opaop_mask,		operr_mask+aiop_mask
-set unfl_inx_mask,	unfl_mask+aunfl_mask+ainex_mask
-set ovfl_inx_mask,	ovfl_mask+aovfl_mask+ainex_mask
-
-#########
-# misc. #
-#########
-set rnd_stky_bit,	29			# stky bit pos in longword
-
-set sign_bit,		0x7			# sign bit
-set signan_bit,		0x6			# signalling nan bit
-
-set sgl_thresh,		0x3f81			# minimum sgl exponent
-set dbl_thresh,		0x3c01			# minimum dbl exponent
-
-set x_mode,		0x0			# extended precision
-set s_mode,		0x4			# single precision
-set d_mode,		0x8			# double precision
-
-set rn_mode,		0x0			# round-to-nearest
-set rz_mode,		0x1			# round-to-zero
-set rm_mode,		0x2			# round-tp-minus-infinity
-set rp_mode,		0x3			# round-to-plus-infinity
-
-set mantissalen,	64			# length of mantissa in bits
-
-set BYTE,		1			# len(byte) == 1 byte
-set WORD,		2			# len(word) == 2 bytes
-set LONG,		4			# len(longword) == 2 bytes
-
-set BSUN_VEC,		0xc0			# bsun    vector offset
-set INEX_VEC,		0xc4			# inexact vector offset
-set DZ_VEC,		0xc8			# dz      vector offset
-set UNFL_VEC,		0xcc			# unfl    vector offset
-set OPERR_VEC,		0xd0			# operr   vector offset
-set OVFL_VEC,		0xd4			# ovfl    vector offset
-set SNAN_VEC,		0xd8			# snan    vector offset
-
-###########################
-# SPecial CONDition FLaGs #
-###########################
-set ftrapcc_flg,	0x01			# flag bit: ftrapcc exception
-set fbsun_flg,		0x02			# flag bit: bsun exception
-set mia7_flg,		0x04			# flag bit: (a7)+ <ea>
-set mda7_flg,		0x08			# flag bit: -(a7) <ea>
-set fmovm_flg,		0x40			# flag bit: fmovm instruction
-set immed_flg,		0x80			# flag bit: &<data> <ea>
-
-set ftrapcc_bit,	0x0
-set fbsun_bit,		0x1
-set mia7_bit,		0x2
-set mda7_bit,		0x3
-set immed_bit,		0x7
-
-##################################
-# TRANSCENDENTAL "LAST-OP" FLAGS #
-##################################
-set FMUL_OP,		0x0			# fmul instr performed last
-set FDIV_OP,		0x1			# fdiv performed last
-set FADD_OP,		0x2			# fadd performed last
-set FMOV_OP,		0x3			# fmov performed last
-
-#############
-# CONSTANTS #
-#############
-T1:	long		0x40C62D38,0xD3D64634	# 16381 LOG2 LEAD
-T2:	long		0x3D6F90AE,0xB1E75CC7	# 16381 LOG2 TRAIL
-
-PI:	long		0x40000000,0xC90FDAA2,0x2168C235,0x00000000
-PIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
-
-TWOBYPI:
-	long		0x3FE45F30,0x6DC9C883
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.	#
-#									#
-#	This handler should be the first code executed upon taking the	#
-#	FP Overflow exception in an operating system.			#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_long() - read instruction longword			#
-#	fix_skewed_ops() - adjust src operand in fsave frame		#
-#	set_tag_x() - determine optype of src/dst operands		#
-#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
-#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
-#	load_fpn2() - load dst operand from FP regfile			#
-#	fout() - emulate an opclass 3 instruction			#
-#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
-#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
-#	_real_ovfl() - "callout" for Overflow exception enabled code	#
-#	_real_inex() - "callout" for Inexact exception enabled code	#
-#	_real_trace() - "callout" for Trace exception code		#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the FP Ovfl exception stack frame	#
-#	- The fsave frame contains the source operand			#
-#									#
-# OUTPUT **************************************************************	#
-#	Overflow Exception enabled:					#
-#	- The system stack is unchanged					#
-#	- The fsave frame contains the adjusted src op for opclass 0,2	#
-#	Overflow Exception disabled:					#
-#	- The system stack is unchanged					#
-#	- The "exception present" flag in the fsave frame is cleared	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	On the 060, if an FP overflow is present as the result of any	#
-# instruction, the 060 will take an overflow exception whether the	#
-# exception is enabled or disabled in the FPCR. For the disabled case,	#
-# This handler emulates the instruction to determine what the correct	#
-# default result should be for the operation. This default result is	#
-# then stored in either the FP regfile, data regfile, or memory.	#
-# Finally, the handler exits through the "callout" _fpsp_done()		#
-# denoting that no exceptional conditions exist within the machine.	#
-#	If the exception is enabled, then this handler must create the	#
-# exceptional operand and plave it in the fsave state frame, and store	#
-# the default result (only if the instruction is opclass 3). For	#
-# exceptions enabled, this handler must exit through the "callout"	#
-# _real_ovfl() so that the operating system enabled overflow handler	#
-# can handle this case.							#
-#	Two other conditions exist. First, if overflow was disabled	#
-# but the inexact exception was enabled, this handler must exit		#
-# through the "callout" _real_inex() regardless of whether the result	#
-# was inexact.								#
-#	Also, in the case of an opclass three instruction where		#
-# overflow was disabled and the trace exception was enabled, this	#
-# handler must exit through the "callout" _real_trace().		#
-#									#
-#########################################################################
-
-	global		_fpsp_ovfl
-_fpsp_ovfl:
-
-#$#	sub.l		&24,%sp			# make room for src/dst
-
-	link.w		%a6,&-LOCAL_SIZE	# init stack frame
-
-	fsave		FP_SRC(%a6)		# grab the "busy" frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
-
-# the FPIAR holds the "current PC" of the faulting instruction
-	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)
-
-##############################################################################
-
-	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
-	bne.w		fovfl_out
-
-
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		fix_skewed_ops		# fix src op
-
-# since, I believe, only NORMs and DENORMs can come through here,
-# maybe we can avoid the subroutine call.
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		set_tag_x		# tag the operand type
-	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
-
-# bit five of the fp extension word separates the monadic and dyadic operations
-# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
-# will never take this exception.
-	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
-	beq.b		fovfl_extract		# monadic
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
-	bsr.l		load_fpn2		# load dst into FP_DST
-
-	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
-	bsr.l		set_tag_x		# tag the operand type
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		fovfl_op2_done		# no
-	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
-fovfl_op2_done:
-	mov.b		%d0,DTAG(%a6)		# save dst optype tag
-
-fovfl_extract:
-
-#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
-#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
-#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
-#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
-#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
-#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
-
-	mov.b		1+EXC_CMDREG(%a6),%d1
-	andi.w		&0x007f,%d1		# extract extension
-
-	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-	lea		FP_SRC(%a6),%a0
-	lea		FP_DST(%a6),%a1
-
-# maybe we can make these entry points ONLY the OVFL entry points of each routine.
-	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
-	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
-
-# the operation has been emulated. the result is in fp0.
-# the EXOP, if an exception occurred, is in fp1.
-# we must save the default result regardless of whether
-# traps are enabled or disabled.
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
-	bsr.l		store_fpreg
-
-# the exceptional possibilities we have left ourselves with are ONLY overflow
-# and inexact. and, the inexact is such that overflow occurred and was disabled
-# but inexact was enabled.
-	btst		&ovfl_bit,FPCR_ENABLE(%a6)
-	bne.b		fovfl_ovfl_on
-
-	btst		&inex2_bit,FPCR_ENABLE(%a6)
-	bne.b		fovfl_inex_on
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-#$#	add.l		&24,%sp
-	bra.l		_fpsp_done
-
-# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
-# in fp1. now, simply jump to _real_ovfl()!
-fovfl_ovfl_on:
-	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
-
-	mov.w		&0xe005,2+FP_SRC(%a6)	# save exc status
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
-
-	unlk		%a6
-
-	bra.l		_real_ovfl
-
-# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
-# we must jump to real_inex().
-fovfl_inex_on:
-
-	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
-
-	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
-	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
-
-	unlk		%a6
-
-	bra.l		_real_inex
-
-########################################################################
-fovfl_out:
-
-
-#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
-#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
-#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
-
-# the src operand is definitely a NORM(!), so tag it as such
-	mov.b		&NORM,STAG(%a6)		# set src optype tag
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
-
-	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
-
-	bsr.l		fout
-
-	btst		&ovfl_bit,FPCR_ENABLE(%a6)
-	bne.w		fovfl_ovfl_on
-
-	btst		&inex2_bit,FPCR_ENABLE(%a6)
-	bne.w		fovfl_inex_on
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-#$#	add.l		&24,%sp
-
-	btst		&0x7,(%sp)		# is trace on?
-	beq.l		_fpsp_done		# no
-
-	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
-	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
-	bra.l		_real_trace
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_unfl(): 060FPSP entry point for FP Underflow exception.	#
-#									#
-#	This handler should be the first code executed upon taking the	#
-#	FP Underflow exception in an operating system.			#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_long() - read instruction longword			#
-#	fix_skewed_ops() - adjust src operand in fsave frame		#
-#	set_tag_x() - determine optype of src/dst operands		#
-#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
-#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
-#	load_fpn2() - load dst operand from FP regfile			#
-#	fout() - emulate an opclass 3 instruction			#
-#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
-#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
-#	_real_ovfl() - "callout" for Overflow exception enabled code	#
-#	_real_inex() - "callout" for Inexact exception enabled code	#
-#	_real_trace() - "callout" for Trace exception code		#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the FP Unfl exception stack frame	#
-#	- The fsave frame contains the source operand			#
-#									#
-# OUTPUT **************************************************************	#
-#	Underflow Exception enabled:					#
-#	- The system stack is unchanged					#
-#	- The fsave frame contains the adjusted src op for opclass 0,2	#
-#	Underflow Exception disabled:					#
-#	- The system stack is unchanged					#
-#	- The "exception present" flag in the fsave frame is cleared	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	On the 060, if an FP underflow is present as the result of any	#
-# instruction, the 060 will take an underflow exception whether the	#
-# exception is enabled or disabled in the FPCR. For the disabled case,	#
-# This handler emulates the instruction to determine what the correct	#
-# default result should be for the operation. This default result is	#
-# then stored in either the FP regfile, data regfile, or memory.	#
-# Finally, the handler exits through the "callout" _fpsp_done()		#
-# denoting that no exceptional conditions exist within the machine.	#
-#	If the exception is enabled, then this handler must create the	#
-# exceptional operand and plave it in the fsave state frame, and store	#
-# the default result (only if the instruction is opclass 3). For	#
-# exceptions enabled, this handler must exit through the "callout"	#
-# _real_unfl() so that the operating system enabled overflow handler	#
-# can handle this case.							#
-#	Two other conditions exist. First, if underflow was disabled	#
-# but the inexact exception was enabled and the result was inexact,	#
-# this handler must exit through the "callout" _real_inex().		#
-# was inexact.								#
-#	Also, in the case of an opclass three instruction where		#
-# underflow was disabled and the trace exception was enabled, this	#
-# handler must exit through the "callout" _real_trace().		#
-#									#
-#########################################################################
-
-	global		_fpsp_unfl
-_fpsp_unfl:
-
-#$#	sub.l		&24,%sp			# make room for src/dst
-
-	link.w		%a6,&-LOCAL_SIZE	# init stack frame
-
-	fsave		FP_SRC(%a6)		# grab the "busy" frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
-
-# the FPIAR holds the "current PC" of the faulting instruction
-	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)
-
-##############################################################################
-
-	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
-	bne.w		funfl_out
-
-
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		fix_skewed_ops		# fix src op
-
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		set_tag_x		# tag the operand type
-	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
-
-# bit five of the fp ext word separates the monadic and dyadic operations
-# that can pass through fpsp_unfl(). remember that fcmp, and ftst
-# will never take this exception.
-	btst		&0x5,1+EXC_CMDREG(%a6)	# is op monadic or dyadic?
-	beq.b		funfl_extract		# monadic
-
-# now, what's left that's not dyadic is fsincos. we can distinguish it
-# from all dyadics by the '0110xxx pattern
-	btst		&0x4,1+EXC_CMDREG(%a6)	# is op an fsincos?
-	bne.b		funfl_extract		# yes
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
-	bsr.l		load_fpn2		# load dst into FP_DST
-
-	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
-	bsr.l		set_tag_x		# tag the operand type
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		funfl_op2_done		# no
-	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
-funfl_op2_done:
-	mov.b		%d0,DTAG(%a6)		# save dst optype tag
-
-funfl_extract:
-
-#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
-#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
-#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
-#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
-#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
-#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
-
-	mov.b		1+EXC_CMDREG(%a6),%d1
-	andi.w		&0x007f,%d1		# extract extension
-
-	andi.l		&0x00ff01ff,USER_FPSR(%a6)
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-	lea		FP_SRC(%a6),%a0
-	lea		FP_DST(%a6),%a1
-
-# maybe we can make these entry points ONLY the OVFL entry points of each routine.
-	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
-	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
-	bsr.l		store_fpreg
-
-# The `060 FPU multiplier hardware is such that if the result of a
-# multiply operation is the smallest possible normalized number
-# (0x00000000_80000000_00000000), then the machine will take an
-# underflow exception. Since this is incorrect, we need to check
-# if our emulation, after re-doing the operation, decided that
-# no underflow was called for. We do these checks only in
-# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
-# special case will simply exit gracefully with the correct result.
-
-# the exceptional possibilities we have left ourselves with are ONLY overflow
-# and inexact. and, the inexact is such that overflow occurred and was disabled
-# but inexact was enabled.
-	btst		&unfl_bit,FPCR_ENABLE(%a6)
-	bne.b		funfl_unfl_on
-
-funfl_chkinex:
-	btst		&inex2_bit,FPCR_ENABLE(%a6)
-	bne.b		funfl_inex_on
-
-funfl_exit:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-#$#	add.l		&24,%sp
-	bra.l		_fpsp_done
-
-# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
-# in fp1 (don't forget to save fp0). what to do now?
-# well, we simply have to get to go to _real_unfl()!
-funfl_unfl_on:
-
-# The `060 FPU multiplier hardware is such that if the result of a
-# multiply operation is the smallest possible normalized number
-# (0x00000000_80000000_00000000), then the machine will take an
-# underflow exception. Since this is incorrect, we check here to see
-# if our emulation, after re-doing the operation, decided that
-# no underflow was called for.
-	btst		&unfl_bit,FPSR_EXCEPT(%a6)
-	beq.w		funfl_chkinex
-
-funfl_unfl_on2:
-	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
-
-	mov.w		&0xe003,2+FP_SRC(%a6)	# save exc status
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
-
-	unlk		%a6
-
-	bra.l		_real_unfl
-
-# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
-# we must jump to real_inex().
-funfl_inex_on:
-
-# The `060 FPU multiplier hardware is such that if the result of a
-# multiply operation is the smallest possible normalized number
-# (0x00000000_80000000_00000000), then the machine will take an
-# underflow exception.
-# But, whether bogus or not, if inexact is enabled AND it occurred,
-# then we have to branch to real_inex.
-
-	btst		&inex2_bit,FPSR_EXCEPT(%a6)
-	beq.w		funfl_exit
-
-funfl_inex_on2:
-
-	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to stack
-
-	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
-	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
-
-	unlk		%a6
-
-	bra.l		_real_inex
-
-#######################################################################
-funfl_out:
-
-
-#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
-#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
-#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
-
-# the src operand is definitely a NORM(!), so tag it as such
-	mov.b		&NORM,STAG(%a6)		# set src optype tag
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
-
-	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
-
-	bsr.l		fout
-
-	btst		&unfl_bit,FPCR_ENABLE(%a6)
-	bne.w		funfl_unfl_on2
-
-	btst		&inex2_bit,FPCR_ENABLE(%a6)
-	bne.w		funfl_inex_on2
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-#$#	add.l		&24,%sp
-
-	btst		&0x7,(%sp)		# is trace on?
-	beq.l		_fpsp_done		# no
-
-	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
-	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
-	bra.l		_real_trace
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented	#
-#		        Data Type" exception.				#
-#									#
-#	This handler should be the first code executed upon taking the	#
-#	FP Unimplemented Data Type exception in an operating system.	#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_{word,long}() - read instruction word/longword	#
-#	fix_skewed_ops() - adjust src operand in fsave frame		#
-#	set_tag_x() - determine optype of src/dst operands		#
-#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
-#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
-#	load_fpn2() - load dst operand from FP regfile			#
-#	load_fpn1() - load src operand from FP regfile			#
-#	fout() - emulate an opclass 3 instruction			#
-#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
-#	_real_inex() - "callout" to operating system inexact handler	#
-#	_fpsp_done() - "callout" for exit; work all done		#
-#	_real_trace() - "callout" for Trace enabled exception		#
-#	funimp_skew() - adjust fsave src ops to "incorrect" value	#
-#	_real_snan() - "callout" for SNAN exception			#
-#	_real_operr() - "callout" for OPERR exception			#
-#	_real_ovfl() - "callout" for OVFL exception			#
-#	_real_unfl() - "callout" for UNFL exception			#
-#	get_packed() - fetch packed operand from memory			#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the "Unimp Data Type" stk frame	#
-#	- The fsave frame contains the ssrc op (for UNNORM/DENORM)	#
-#									#
-# OUTPUT **************************************************************	#
-#	If Inexact exception (opclass 3):				#
-#	- The system stack is changed to an Inexact exception stk frame	#
-#	If SNAN exception (opclass 3):					#
-#	- The system stack is changed to an SNAN exception stk frame	#
-#	If OPERR exception (opclass 3):					#
-#	- The system stack is changed to an OPERR exception stk frame	#
-#	If OVFL exception (opclass 3):					#
-#	- The system stack is changed to an OVFL exception stk frame	#
-#	If UNFL exception (opclass 3):					#
-#	- The system stack is changed to an UNFL exception stack frame	#
-#	If Trace exception enabled:					#
-#	- The system stack is changed to a Trace exception stack frame	#
-#	Else: (normal case)						#
-#	- Correct result has been stored as appropriate			#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Two main instruction types can enter here: (1) DENORM or UNNORM	#
-# unimplemented data types. These can be either opclass 0,2 or 3	#
-# instructions, and (2) PACKED unimplemented data format instructions	#
-# also of opclasses 0,2, or 3.						#
-#	For UNNORM/DENORM opclass 0 and 2, the handler fetches the src	#
-# operand from the fsave state frame and the dst operand (if dyadic)	#
-# from the FP register file. The instruction is then emulated by	#
-# choosing an emulation routine from a table of routines indexed by	#
-# instruction type. Once the instruction has been emulated and result	#
-# saved, then we check to see if any enabled exceptions resulted from	#
-# instruction emulation. If none, then we exit through the "callout"	#
-# _fpsp_done(). If there is an enabled FP exception, then we insert	#
-# this exception into the FPU in the fsave state frame and then exit	#
-# through _fpsp_done().							#
-#	PACKED opclass 0 and 2 is similar in how the instruction is	#
-# emulated and exceptions handled. The differences occur in how the	#
-# handler loads the packed op (by calling get_packed() routine) and	#
-# by the fact that a Trace exception could be pending for PACKED ops.	#
-# If a Trace exception is pending, then the current exception stack	#
-# frame is changed to a Trace exception stack frame and an exit is	#
-# made through _real_trace().						#
-#	For UNNORM/DENORM opclass 3, the actual move out to memory is	#
-# performed by calling the routine fout(). If no exception should occur	#
-# as the result of emulation, then an exit either occurs through	#
-# _fpsp_done() or through _real_trace() if a Trace exception is pending	#
-# (a Trace stack frame must be created here, too). If an FP exception	#
-# should occur, then we must create an exception stack frame of that	#
-# type and jump to either _real_snan(), _real_operr(), _real_inex(),	#
-# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3	#
-# emulation is performed in a similar manner.				#
-#									#
-#########################################################################
-
-#
-# (1) DENORM and UNNORM (unimplemented) data types:
-#
-#				post-instruction
-#				*****************
-#				*      EA	*
-#	 pre-instruction	*		*
-#	*****************	*****************
-#	* 0x0 *  0x0dc  *	* 0x3 *  0x0dc  *
-#	*****************	*****************
-#	*     Next	*	*     Next	*
-#	*      PC	*	*      PC	*
-#	*****************	*****************
-#	*      SR	*	*      SR	*
-#	*****************	*****************
-#
-# (2) PACKED format (unsupported) opclasses two and three:
-#	*****************
-#	*      EA	*
-#	*		*
-#	*****************
-#	* 0x2 *  0x0dc	*
-#	*****************
-#	*     Next	*
-#	*      PC	*
-#	*****************
-#	*      SR	*
-#	*****************
-#
-	global		_fpsp_unsupp
-_fpsp_unsupp:
-
-	link.w		%a6,&-LOCAL_SIZE	# init stack frame
-
-	fsave		FP_SRC(%a6)		# save fp state
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
-
-	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
-	bne.b		fu_s
-fu_u:
-	mov.l		%usp,%a0		# fetch user stack pointer
-	mov.l		%a0,EXC_A7(%a6)		# save on stack
-	bra.b		fu_cont
-# if the exception is an opclass zero or two unimplemented data type
-# exception, then the a7' calculated here is wrong since it doesn't
-# stack an ea. however, we don't need an a7' for this case anyways.
-fu_s:
-	lea		0x4+EXC_EA(%a6),%a0	# load old a7'
-	mov.l		%a0,EXC_A7(%a6)		# save on stack
-
-fu_cont:
-
-# the FPIAR holds the "current PC" of the faulting instruction
-# the FPIAR should be set correctly for ALL exceptions passing through
-# this point.
-	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
-
-############################
-
-	clr.b		SPCOND_FLG(%a6)		# clear special condition flag
-
-# Separate opclass three (fpn-to-mem) ops since they have a different
-# stack frame and protocol.
-	btst		&0x5,EXC_CMDREG(%a6)	# is it an fmove out?
-	bne.w		fu_out			# yes
-
-# Separate packed opclass two instructions.
-	bfextu		EXC_CMDREG(%a6){&0:&6},%d0
-	cmpi.b		%d0,&0x13
-	beq.w		fu_in_pack
-
-
-# I'm not sure at this point what FPSR bits are valid for this instruction.
-# so, since the emulation routines re-create them anyways, zero exception field
-	andi.l		&0x00ff00ff,USER_FPSR(%a6) # zero exception field
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-# Opclass two w/ memory-to-fpn operation will have an incorrect extended
-# precision format if the src format was single or double and the
-# source data type was an INF, NAN, DENORM, or UNNORM
-	lea		FP_SRC(%a6),%a0		# pass ptr to input
-	bsr.l		fix_skewed_ops
-
-# we don't know whether the src operand or the dst operand (or both) is the
-# UNNORM or DENORM. call the function that tags the operand type. if the
-# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		set_tag_x		# tag the operand type
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		fu_op2			# no
-	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
-
-fu_op2:
-	mov.b		%d0,STAG(%a6)		# save src optype tag
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
-
-# bit five of the fp extension word separates the monadic and dyadic operations
-# at this point
-	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
-	beq.b		fu_extract		# monadic
-	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
-	beq.b		fu_extract		# yes, so it's monadic, too
-
-	bsr.l		load_fpn2		# load dst into FP_DST
-
-	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
-	bsr.l		set_tag_x		# tag the operand type
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		fu_op2_done		# no
-	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
-fu_op2_done:
-	mov.b		%d0,DTAG(%a6)		# save dst optype tag
-
-fu_extract:
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
-
-	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
-
-	lea		FP_SRC(%a6),%a0
-	lea		FP_DST(%a6),%a1
-
-	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
-	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
-
-#
-# Exceptions in order of precedence:
-#	BSUN	: none
-#	SNAN	: all dyadic ops
-#	OPERR	: fsqrt(-NORM)
-#	OVFL	: all except ftst,fcmp
-#	UNFL	: all except ftst,fcmp
-#	DZ	: fdiv
-#	INEX2	: all except ftst,fcmp
-#	INEX1	: none (packed doesn't go through here)
-#
-
-# we determine the highest priority exception(if any) set by the
-# emulation routine that has also been enabled by the user.
-	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions set
-	bne.b		fu_in_ena		# some are enabled
-
-fu_in_cont:
-# fcmp and ftst do not store any result.
-	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
-	andi.b		&0x38,%d0		# extract bits 3-5
-	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
-	beq.b		fu_in_exit		# yes
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
-	bsr.l		store_fpreg		# store the result
-
-fu_in_exit:
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-
-	bra.l		_fpsp_done
-
-fu_in_ena:
-	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
-	bfffo		%d0{&24:&8},%d0		# find highest priority exception
-	bne.b		fu_in_exc		# there is at least one set
-
-#
-# No exceptions occurred that were also enabled. Now:
-#
-#	if (OVFL && ovfl_disabled && inexact_enabled) {
-#	    branch to _real_inex() (even if the result was exact!);
-#	} else {
-#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
-#	    return;
-#	}
-#
-	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
-	beq.b		fu_in_cont		# no
-
-fu_in_ovflchk:
-	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
-	beq.b		fu_in_cont		# no
-	bra.w		fu_in_exc_ovfl		# go insert overflow frame
-
-#
-# An exception occurred and that exception was enabled:
-#
-#	shift enabled exception field into lo byte of d0;
-#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
-#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
-#		/*
-#		 * this is the case where we must call _real_inex() now or else
-#		 * there will be no other way to pass it the exceptional operand
-#		 */
-#		call _real_inex();
-#	} else {
-#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
-#	}
-#
-fu_in_exc:
-	subi.l		&24,%d0			# fix offset to be 0-8
-	cmpi.b		%d0,&0x6		# is exception INEX? (6)
-	bne.b		fu_in_exc_exit		# no
-
-# the enabled exception was inexact
-	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
-	bne.w		fu_in_exc_unfl		# yes
-	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
-	bne.w		fu_in_exc_ovfl		# yes
-
-# here, we insert the correct fsave status value into the fsave frame for the
-# corresponding exception. the operand in the fsave frame should be the original
-# src operand.
-fu_in_exc_exit:
-	mov.l		%d0,-(%sp)		# save d0
-	bsr.l		funimp_skew		# skew sgl or dbl inputs
-	mov.l		(%sp)+,%d0		# restore d0
-
-	mov.w		(tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# restore src op
-
-	unlk		%a6
-
-	bra.l		_fpsp_done
-
-tbl_except:
-	short		0xe000,0xe006,0xe004,0xe005
-	short		0xe003,0xe002,0xe001,0xe001
-
-fu_in_exc_unfl:
-	mov.w		&0x4,%d0
-	bra.b		fu_in_exc_exit
-fu_in_exc_ovfl:
-	mov.w		&0x03,%d0
-	bra.b		fu_in_exc_exit
-
-# If the input operand to this operation was opclass two and a single
-# or double precision denorm, inf, or nan, the operand needs to be
-# "corrected" in order to have the proper equivalent extended precision
-# number.
-	global		fix_skewed_ops
-fix_skewed_ops:
-	bfextu		EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
-	cmpi.b		%d0,&0x11		# is class = 2 & fmt = sgl?
-	beq.b		fso_sgl			# yes
-	cmpi.b		%d0,&0x15		# is class = 2 & fmt = dbl?
-	beq.b		fso_dbl			# yes
-	rts					# no
-
-fso_sgl:
-	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
-	andi.w		&0x7fff,%d0		# strip sign
-	cmpi.w		%d0,&0x3f80		# is |exp| == $3f80?
-	beq.b		fso_sgl_dnrm_zero	# yes
-	cmpi.w		%d0,&0x407f		# no; is |exp| == $407f?
-	beq.b		fso_infnan		# yes
-	rts					# no
-
-fso_sgl_dnrm_zero:
-	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
-	beq.b		fso_zero		# it's a skewed zero
-fso_sgl_dnrm:
-# here, we count on norm not to alter a0...
-	bsr.l		norm			# normalize mantissa
-	neg.w		%d0			# -shft amt
-	addi.w		&0x3f81,%d0		# adjust new exponent
-	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
-	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
-	rts
-
-fso_zero:
-	andi.w		&0x8000,LOCAL_EX(%a0)	# clear bogus exponent
-	rts
-
-fso_infnan:
-	andi.b		&0x7f,LOCAL_HI(%a0)	# clear j-bit
-	ori.w		&0x7fff,LOCAL_EX(%a0)	# make exponent = $7fff
-	rts
-
-fso_dbl:
-	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
-	andi.w		&0x7fff,%d0		# strip sign
-	cmpi.w		%d0,&0x3c00		# is |exp| == $3c00?
-	beq.b		fso_dbl_dnrm_zero	# yes
-	cmpi.w		%d0,&0x43ff		# no; is |exp| == $43ff?
-	beq.b		fso_infnan		# yes
-	rts					# no
-
-fso_dbl_dnrm_zero:
-	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
-	bne.b		fso_dbl_dnrm		# it's a skewed denorm
-	tst.l		LOCAL_LO(%a0)		# is it a zero?
-	beq.b		fso_zero		# yes
-fso_dbl_dnrm:
-# here, we count on norm not to alter a0...
-	bsr.l		norm			# normalize mantissa
-	neg.w		%d0			# -shft amt
-	addi.w		&0x3c01,%d0		# adjust new exponent
-	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
-	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
-	rts
-
-#################################################################
-
-# fmove out took an unimplemented data type exception.
-# the src operand is in FP_SRC. Call _fout() to write out the result and
-# to determine which exceptions, if any, to take.
-fu_out:
-
-# Separate packed move outs from the UNNORM and DENORM move outs.
-	bfextu		EXC_CMDREG(%a6){&3:&3},%d0
-	cmpi.b		%d0,&0x3
-	beq.w		fu_out_pack
-	cmpi.b		%d0,&0x7
-	beq.w		fu_out_pack
-
-
-# I'm not sure at this point what FPSR bits are valid for this instruction.
-# so, since the emulation routines re-create them anyways, zero exception field.
-# fmove out doesn't affect ccodes.
-	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
-# call here. just figure out what it is...
-	mov.w		FP_SRC_EX(%a6),%d0	# get exponent
-	andi.w		&0x7fff,%d0		# strip sign
-	beq.b		fu_out_denorm		# it's a DENORM
-
-	lea		FP_SRC(%a6),%a0
-	bsr.l		unnorm_fix		# yes; fix it
-
-	mov.b		%d0,STAG(%a6)
-
-	bra.b		fu_out_cont
-fu_out_denorm:
-	mov.b		&DENORM,STAG(%a6)
-fu_out_cont:
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
-
-	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
-	bsr.l		fout			# call fmove out routine
-
-# Exceptions in order of precedence:
-#	BSUN	: none
-#	SNAN	: none
-#	OPERR	: fmove.{b,w,l} out of large UNNORM
-#	OVFL	: fmove.{s,d}
-#	UNFL	: fmove.{s,d,x}
-#	DZ	: none
-#	INEX2	: all
-#	INEX1	: none (packed doesn't travel through here)
-
-# determine the highest priority exception(if any) set by the
-# emulation routine that has also been enabled by the user.
-	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
-	bne.w		fu_out_ena		# some are enabled
-
-fu_out_done:
-
-	mov.l		EXC_A6(%a6),(%a6)	# in case a6 changed
-
-# on extended precision opclass three instructions using pre-decrement or
-# post-increment addressing mode, the address register is not updated. is the
-# address register was the stack pointer used from user mode, then let's update
-# it here. if it was used from supervisor mode, then we have to handle this
-# as a special case.
-	btst		&0x5,EXC_SR(%a6)
-	bne.b		fu_out_done_s
-
-	mov.l		EXC_A7(%a6),%a0		# restore a7
-	mov.l		%a0,%usp
-
-fu_out_done_cont:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-
-	btst		&0x7,(%sp)		# is trace on?
-	bne.b		fu_out_trace		# yes
-
-	bra.l		_fpsp_done
-
-# is the ea mode pre-decrement of the stack pointer from supervisor mode?
-# ("fmov.x fpm,-(a7)") if so,
-fu_out_done_s:
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
-	bne.b		fu_out_done_cont
-
-# the extended precision result is still in fp0. but, we need to save it
-# somewhere on the stack until we can copy it to its final resting place.
-# here, we're counting on the top of the stack to be the old place-holders
-# for fp0/fp1 which have already been restored. that way, we can write
-# over those destinations with the shifted stack frame.
-	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.l		(%a6),%a6		# restore frame pointer
-
-	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
-	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
-
-# now, copy the result to the proper place on the stack
-	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
-	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
-	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
-
-	add.l		&LOCAL_SIZE-0x8,%sp
-
-	btst		&0x7,(%sp)
-	bne.b		fu_out_trace
-
-	bra.l		_fpsp_done
-
-fu_out_ena:
-	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
-	bfffo		%d0{&24:&8},%d0		# find highest priority exception
-	bne.b		fu_out_exc		# there is at least one set
-
-# no exceptions were set.
-# if a disabled overflow occurred and inexact was enabled but the result
-# was exact, then a branch to _real_inex() is made.
-	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
-	beq.w		fu_out_done		# no
-
-fu_out_ovflchk:
-	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
-	beq.w		fu_out_done		# no
-	bra.w		fu_inex			# yes
-
-#
-# The fp move out that took the "Unimplemented Data Type" exception was
-# being traced. Since the stack frames are similar, get the "current" PC
-# from FPIAR and put it in the trace stack frame then jump to _real_trace().
-#
-#		  UNSUPP FRAME		   TRACE FRAME
-#		*****************	*****************
-#		*      EA	*	*    Current	*
-#		*		*	*      PC	*
-#		*****************	*****************
-#		* 0x3 *  0x0dc	*	* 0x2 *  0x024	*
-#		*****************	*****************
-#		*     Next	*	*     Next	*
-#		*      PC	*	*      PC	*
-#		*****************	*****************
-#		*      SR	*	*      SR	*
-#		*****************	*****************
-#
-fu_out_trace:
-	mov.w		&0x2024,0x6(%sp)
-	fmov.l		%fpiar,0x8(%sp)
-	bra.l		_real_trace
-
-# an exception occurred and that exception was enabled.
-fu_out_exc:
-	subi.l		&24,%d0			# fix offset to be 0-8
-
-# we don't mess with the existing fsave frame. just re-insert it and
-# jump to the "_real_{}()" handler...
-	mov.w		(tbl_fu_out.b,%pc,%d0.w*2),%d0
-	jmp		(tbl_fu_out.b,%pc,%d0.w*1)
-
-	swbeg		&0x8
-tbl_fu_out:
-	short		tbl_fu_out	- tbl_fu_out	# BSUN can't happen
-	short		tbl_fu_out	- tbl_fu_out	# SNAN can't happen
-	short		fu_operr	- tbl_fu_out	# OPERR
-	short		fu_ovfl		- tbl_fu_out	# OVFL
-	short		fu_unfl		- tbl_fu_out	# UNFL
-	short		tbl_fu_out	- tbl_fu_out	# DZ can't happen
-	short		fu_inex		- tbl_fu_out	# INEX2
-	short		tbl_fu_out	- tbl_fu_out	# INEX1 won't make it here
-
-# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
-# frestore it.
-fu_snan:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd8
-	mov.w		&0xe006,2+FP_SRC(%a6)
-
-	frestore	FP_SRC(%a6)
-
-	unlk		%a6
-
-
-	bra.l		_real_snan
-
-fu_operr:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
-	mov.w		&0xe004,2+FP_SRC(%a6)
-
-	frestore	FP_SRC(%a6)
-
-	unlk		%a6
-
-
-	bra.l		_real_operr
-
-fu_ovfl:
-	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30d4,EXC_VOFF(%a6)	# vector offset = 0xd4
-	mov.w		&0xe005,2+FP_SRC(%a6)
-
-	frestore	FP_SRC(%a6)		# restore EXOP
-
-	unlk		%a6
-
-	bra.l		_real_ovfl
-
-# underflow can happen for extended precision. extended precision opclass
-# three instruction exceptions don't update the stack pointer. so, if the
-# exception occurred from user mode, then simply update a7 and exit normally.
-# if the exception occurred from supervisor mode, check if
-fu_unfl:
-	mov.l		EXC_A6(%a6),(%a6)	# restore a6
-
-	btst		&0x5,EXC_SR(%a6)
-	bne.w		fu_unfl_s
-
-	mov.l		EXC_A7(%a6),%a0		# restore a7 whether we need
-	mov.l		%a0,%usp		# to or not...
-
-fu_unfl_cont:
-	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
-	mov.w		&0xe003,2+FP_SRC(%a6)
-
-	frestore	FP_SRC(%a6)		# restore EXOP
-
-	unlk		%a6
-
-	bra.l		_real_unfl
-
-fu_unfl_s:
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
-	bne.b		fu_unfl_cont
-
-# the extended precision result is still in fp0. but, we need to save it
-# somewhere on the stack until we can copy it to its final resting place
-# (where the exc frame is currently). make sure it's not at the top of the
-# frame or it will get overwritten when the exc stack frame is shifted "down".
-	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
-	fmovm.x		&0x40,FP_DST(%a6)	# put EXOP on stack
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
-	mov.w		&0xe003,2+FP_DST(%a6)
-
-	frestore	FP_DST(%a6)		# restore EXOP
-
-	mov.l		(%a6),%a6		# restore frame pointer
-
-	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
-	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
-	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
-
-# now, copy the result to the proper place on the stack
-	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
-	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
-	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
-
-	add.l		&LOCAL_SIZE-0x8,%sp
-
-	bra.l		_real_unfl
-
-# fmove in and out enter here.
-fu_inex:
-	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
-	mov.w		&0xe001,2+FP_SRC(%a6)
-
-	frestore	FP_SRC(%a6)		# restore EXOP
-
-	unlk		%a6
-
-
-	bra.l		_real_inex
-
-#########################################################################
-#########################################################################
-fu_in_pack:
-
-
-# I'm not sure at this point what FPSR bits are valid for this instruction.
-# so, since the emulation routines re-create them anyways, zero exception field
-	andi.l		&0x0ff00ff,USER_FPSR(%a6) # zero exception field
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-	bsr.l		get_packed		# fetch packed src operand
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src
-	bsr.l		set_tag_x		# set src optype tag
-
-	mov.b		%d0,STAG(%a6)		# save src optype tag
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
-
-# bit five of the fp extension word separates the monadic and dyadic operations
-# at this point
-	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
-	beq.b		fu_extract_p		# monadic
-	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
-	beq.b		fu_extract_p		# yes, so it's monadic, too
-
-	bsr.l		load_fpn2		# load dst into FP_DST
-
-	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
-	bsr.l		set_tag_x		# tag the operand type
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		fu_op2_done_p		# no
-	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
-fu_op2_done_p:
-	mov.b		%d0,DTAG(%a6)		# save dst optype tag
-
-fu_extract_p:
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
-
-	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
-
-	lea		FP_SRC(%a6),%a0
-	lea		FP_DST(%a6),%a1
-
-	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
-	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
-
-#
-# Exceptions in order of precedence:
-#	BSUN	: none
-#	SNAN	: all dyadic ops
-#	OPERR	: fsqrt(-NORM)
-#	OVFL	: all except ftst,fcmp
-#	UNFL	: all except ftst,fcmp
-#	DZ	: fdiv
-#	INEX2	: all except ftst,fcmp
-#	INEX1	: all
-#
-
-# we determine the highest priority exception(if any) set by the
-# emulation routine that has also been enabled by the user.
-	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
-	bne.w		fu_in_ena_p		# some are enabled
-
-fu_in_cont_p:
-# fcmp and ftst do not store any result.
-	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
-	andi.b		&0x38,%d0		# extract bits 3-5
-	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
-	beq.b		fu_in_exit_p		# yes
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
-	bsr.l		store_fpreg		# store the result
-
-fu_in_exit_p:
-
-	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
-	bne.w		fu_in_exit_s_p		# supervisor
-
-	mov.l		EXC_A7(%a6),%a0		# update user a7
-	mov.l		%a0,%usp
-
-fu_in_exit_cont_p:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6			# unravel stack frame
-
-	btst		&0x7,(%sp)		# is trace on?
-	bne.w		fu_trace_p		# yes
-
-	bra.l		_fpsp_done		# exit to os
-
-# the exception occurred in supervisor mode. check to see if the
-# addressing mode was (a7)+. if so, we'll need to shift the
-# stack frame "up".
-fu_in_exit_s_p:
-	btst		&mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
-	beq.b		fu_in_exit_cont_p	# no
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6			# unravel stack frame
-
-# shift the stack frame "up". we don't really care about the <ea> field.
-	mov.l		0x4(%sp),0x10(%sp)
-	mov.l		0x0(%sp),0xc(%sp)
-	add.l		&0xc,%sp
-
-	btst		&0x7,(%sp)		# is trace on?
-	bne.w		fu_trace_p		# yes
-
-	bra.l		_fpsp_done		# exit to os
-
-fu_in_ena_p:
-	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled & set
-	bfffo		%d0{&24:&8},%d0		# find highest priority exception
-	bne.b		fu_in_exc_p		# at least one was set
-
-#
-# No exceptions occurred that were also enabled. Now:
-#
-#	if (OVFL && ovfl_disabled && inexact_enabled) {
-#	    branch to _real_inex() (even if the result was exact!);
-#	} else {
-#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
-#	    return;
-#	}
-#
-	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
-	beq.w		fu_in_cont_p		# no
-
-fu_in_ovflchk_p:
-	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
-	beq.w		fu_in_cont_p		# no
-	bra.w		fu_in_exc_ovfl_p	# do _real_inex() now
-
-#
-# An exception occurred and that exception was enabled:
-#
-#	shift enabled exception field into lo byte of d0;
-#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
-#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
-#		/*
-#		 * this is the case where we must call _real_inex() now or else
-#		 * there will be no other way to pass it the exceptional operand
-#		 */
-#		call _real_inex();
-#	} else {
-#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
-#	}
-#
-fu_in_exc_p:
-	subi.l		&24,%d0			# fix offset to be 0-8
-	cmpi.b		%d0,&0x6		# is exception INEX? (6 or 7)
-	blt.b		fu_in_exc_exit_p	# no
-
-# the enabled exception was inexact
-	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
-	bne.w		fu_in_exc_unfl_p	# yes
-	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
-	bne.w		fu_in_exc_ovfl_p	# yes
-
-# here, we insert the correct fsave status value into the fsave frame for the
-# corresponding exception. the operand in the fsave frame should be the original
-# src operand.
-# as a reminder for future predicted pain and agony, we are passing in fsave the
-# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
-# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
-fu_in_exc_exit_p:
-	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
-	bne.w		fu_in_exc_exit_s_p	# supervisor
-
-	mov.l		EXC_A7(%a6),%a0		# update user a7
-	mov.l		%a0,%usp
-
-fu_in_exc_exit_cont_p:
-	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# restore src op
-
-	unlk		%a6
-
-	btst		&0x7,(%sp)		# is trace enabled?
-	bne.w		fu_trace_p		# yes
-
-	bra.l		_fpsp_done
-
-tbl_except_p:
-	short		0xe000,0xe006,0xe004,0xe005
-	short		0xe003,0xe002,0xe001,0xe001
-
-fu_in_exc_ovfl_p:
-	mov.w		&0x3,%d0
-	bra.w		fu_in_exc_exit_p
-
-fu_in_exc_unfl_p:
-	mov.w		&0x4,%d0
-	bra.w		fu_in_exc_exit_p
-
-fu_in_exc_exit_s_p:
-	btst		&mia7_bit,SPCOND_FLG(%a6)
-	beq.b		fu_in_exc_exit_cont_p
-
-	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# restore src op
-
-	unlk		%a6			# unravel stack frame
-
-# shift stack frame "up". who cares about <ea> field.
-	mov.l		0x4(%sp),0x10(%sp)
-	mov.l		0x0(%sp),0xc(%sp)
-	add.l		&0xc,%sp
-
-	btst		&0x7,(%sp)		# is trace on?
-	bne.b		fu_trace_p		# yes
-
-	bra.l		_fpsp_done		# exit to os
-
-#
-# The opclass two PACKED instruction that took an "Unimplemented Data Type"
-# exception was being traced. Make the "current" PC the FPIAR and put it in the
-# trace stack frame then jump to _real_trace().
-#
-#		  UNSUPP FRAME		   TRACE FRAME
-#		*****************	*****************
-#		*      EA	*	*    Current	*
-#		*		*	*      PC	*
-#		*****************	*****************
-#		* 0x2 *	0x0dc	*	* 0x2 *  0x024	*
-#		*****************	*****************
-#		*     Next	*	*     Next	*
-#		*      PC	*	*      PC	*
-#		*****************	*****************
-#		*      SR	*	*      SR	*
-#		*****************	*****************
-fu_trace_p:
-	mov.w		&0x2024,0x6(%sp)
-	fmov.l		%fpiar,0x8(%sp)
-
-	bra.l		_real_trace
-
-#########################################################
-#########################################################
-fu_out_pack:
-
-
-# I'm not sure at this point what FPSR bits are valid for this instruction.
-# so, since the emulation routines re-create them anyways, zero exception field.
-# fmove out doesn't affect ccodes.
-	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
-	bsr.l		load_fpn1
-
-# unlike other opclass 3, unimplemented data type exceptions, packed must be
-# able to detect all operand types.
-	lea		FP_SRC(%a6),%a0
-	bsr.l		set_tag_x		# tag the operand type
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		fu_op2_p		# no
-	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
-
-fu_op2_p:
-	mov.b		%d0,STAG(%a6)		# save src optype tag
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
-
-	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
-	bsr.l		fout			# call fmove out routine
-
-# Exceptions in order of precedence:
-#	BSUN	: no
-#	SNAN	: yes
-#	OPERR	: if ((k_factor > +17) || (dec. exp exceeds 3 digits))
-#	OVFL	: no
-#	UNFL	: no
-#	DZ	: no
-#	INEX2	: yes
-#	INEX1	: no
-
-# determine the highest priority exception(if any) set by the
-# emulation routine that has also been enabled by the user.
-	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
-	bne.w		fu_out_ena_p		# some are enabled
-
-fu_out_exit_p:
-	mov.l		EXC_A6(%a6),(%a6)	# restore a6
-
-	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
-	bne.b		fu_out_exit_s_p		# supervisor
-
-	mov.l		EXC_A7(%a6),%a0		# update user a7
-	mov.l		%a0,%usp
-
-fu_out_exit_cont_p:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6			# unravel stack frame
-
-	btst		&0x7,(%sp)		# is trace on?
-	bne.w		fu_trace_p		# yes
-
-	bra.l		_fpsp_done		# exit to os
-
-# the exception occurred in supervisor mode. check to see if the
-# addressing mode was -(a7). if so, we'll need to shift the
-# stack frame "down".
-fu_out_exit_s_p:
-	btst		&mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
-	beq.b		fu_out_exit_cont_p	# no
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.l		(%a6),%a6		# restore frame pointer
-
-	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
-	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
-
-# now, copy the result to the proper place on the stack
-	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
-	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
-	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
-
-	add.l		&LOCAL_SIZE-0x8,%sp
-
-	btst		&0x7,(%sp)
-	bne.w		fu_trace_p
-
-	bra.l		_fpsp_done
-
-fu_out_ena_p:
-	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
-	bfffo		%d0{&24:&8},%d0		# find highest priority exception
-	beq.w		fu_out_exit_p
-
-	mov.l		EXC_A6(%a6),(%a6)	# restore a6
-
-# an exception occurred and that exception was enabled.
-# the only exception possible on packed move out are INEX, OPERR, and SNAN.
-fu_out_exc_p:
-	cmpi.b		%d0,&0x1a
-	bgt.w		fu_inex_p2
-	beq.w		fu_operr_p
-
-fu_snan_p:
-	btst		&0x5,EXC_SR(%a6)
-	bne.b		fu_snan_s_p
-
-	mov.l		EXC_A7(%a6),%a0
-	mov.l		%a0,%usp
-	bra.w		fu_snan
-
-fu_snan_s_p:
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
-	bne.w		fu_snan
-
-# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
-# the strategy is to move the exception frame "down" 12 bytes. then, we
-# can store the default result where the exception frame was.
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd0
-	mov.w		&0xe006,2+FP_SRC(%a6)	# set fsave status
-
-	frestore	FP_SRC(%a6)		# restore src operand
-
-	mov.l		(%a6),%a6		# restore frame pointer
-
-	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
-	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
-	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
-
-# now, we copy the default result to its proper location
-	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
-	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
-	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
-
-	add.l		&LOCAL_SIZE-0x8,%sp
-
-
-	bra.l		_real_snan
-
-fu_operr_p:
-	btst		&0x5,EXC_SR(%a6)
-	bne.w		fu_operr_p_s
-
-	mov.l		EXC_A7(%a6),%a0
-	mov.l		%a0,%usp
-	bra.w		fu_operr
-
-fu_operr_p_s:
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
-	bne.w		fu_operr
-
-# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
-# the strategy is to move the exception frame "down" 12 bytes. then, we
-# can store the default result where the exception frame was.
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
-	mov.w		&0xe004,2+FP_SRC(%a6)	# set fsave status
-
-	frestore	FP_SRC(%a6)		# restore src operand
-
-	mov.l		(%a6),%a6		# restore frame pointer
-
-	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
-	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
-	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
-
-# now, we copy the default result to its proper location
-	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
-	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
-	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
-
-	add.l		&LOCAL_SIZE-0x8,%sp
-
-
-	bra.l		_real_operr
-
-fu_inex_p2:
-	btst		&0x5,EXC_SR(%a6)
-	bne.w		fu_inex_s_p2
-
-	mov.l		EXC_A7(%a6),%a0
-	mov.l		%a0,%usp
-	bra.w		fu_inex
-
-fu_inex_s_p2:
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
-	bne.w		fu_inex
-
-# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
-# the strategy is to move the exception frame "down" 12 bytes. then, we
-# can store the default result where the exception frame was.
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
-	mov.w		&0xe001,2+FP_SRC(%a6)	# set fsave status
-
-	frestore	FP_SRC(%a6)		# restore src operand
-
-	mov.l		(%a6),%a6		# restore frame pointer
-
-	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
-	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
-	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
-
-# now, we copy the default result to its proper location
-	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
-	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
-	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
-
-	add.l		&LOCAL_SIZE-0x8,%sp
-
-
-	bra.l		_real_inex
-
-#########################################################################
-
-#
-# if we're stuffing a source operand back into an fsave frame then we
-# have to make sure that for single or double source operands that the
-# format stuffed is as weird as the hardware usually makes it.
-#
-	global		funimp_skew
-funimp_skew:
-	bfextu		EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
-	cmpi.b		%d0,&0x1		# was src sgl?
-	beq.b		funimp_skew_sgl		# yes
-	cmpi.b		%d0,&0x5		# was src dbl?
-	beq.b		funimp_skew_dbl		# yes
-	rts
-
-funimp_skew_sgl:
-	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
-	andi.w		&0x7fff,%d0		# strip sign
-	beq.b		funimp_skew_sgl_not
-	cmpi.w		%d0,&0x3f80
-	bgt.b		funimp_skew_sgl_not
-	neg.w		%d0			# make exponent negative
-	addi.w		&0x3f81,%d0		# find amt to shift
-	mov.l		FP_SRC_HI(%a6),%d1	# fetch DENORM hi(man)
-	lsr.l		%d0,%d1			# shift it
-	bset		&31,%d1			# set j-bit
-	mov.l		%d1,FP_SRC_HI(%a6)	# insert new hi(man)
-	andi.w		&0x8000,FP_SRC_EX(%a6)	# clear old exponent
-	ori.w		&0x3f80,FP_SRC_EX(%a6)	# insert new "skewed" exponent
-funimp_skew_sgl_not:
-	rts
-
-funimp_skew_dbl:
-	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
-	andi.w		&0x7fff,%d0		# strip sign
-	beq.b		funimp_skew_dbl_not
-	cmpi.w		%d0,&0x3c00
-	bgt.b		funimp_skew_dbl_not
-
-	tst.b		FP_SRC_EX(%a6)		# make "internal format"
-	smi.b		0x2+FP_SRC(%a6)
-	mov.w		%d0,FP_SRC_EX(%a6)	# insert exponent with cleared sign
-	clr.l		%d0			# clear g,r,s
-	lea		FP_SRC(%a6),%a0		# pass ptr to src op
-	mov.w		&0x3c01,%d1		# pass denorm threshold
-	bsr.l		dnrm_lp			# denorm it
-	mov.w		&0x3c00,%d0		# new exponent
-	tst.b		0x2+FP_SRC(%a6)		# is sign set?
-	beq.b		fss_dbl_denorm_done	# no
-	bset		&15,%d0			# set sign
-fss_dbl_denorm_done:
-	bset		&0x7,FP_SRC_HI(%a6)	# set j-bit
-	mov.w		%d0,FP_SRC_EX(%a6)	# insert new exponent
-funimp_skew_dbl_not:
-	rts
-
-#########################################################################
-	global		_mem_write2
-_mem_write2:
-	btst		&0x5,EXC_SR(%a6)
-	beq.l		_dmem_write
-	mov.l		0x0(%a0),FP_DST_EX(%a6)
-	mov.l		0x4(%a0),FP_DST_HI(%a6)
-	mov.l		0x8(%a0),FP_DST_LO(%a6)
-	clr.l		%d1
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_effadd(): 060FPSP entry point for FP "Unimplemented	#
-#			effective address" exception.			#
-#									#
-#	This handler should be the first code executed upon taking the	#
-#	FP Unimplemented Effective Address exception in an operating	#
-#	system.								#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_long() - read instruction longword			#
-#	fix_skewed_ops() - adjust src operand in fsave frame		#
-#	set_tag_x() - determine optype of src/dst operands		#
-#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
-#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
-#	load_fpn2() - load dst operand from FP regfile			#
-#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
-#	decbin() - convert packed data to FP binary data		#
-#	_real_fpu_disabled() - "callout" for "FPU disabled" exception	#
-#	_real_access() - "callout" for access error exception		#
-#	_mem_read() - read extended immediate operand from memory	#
-#	_fpsp_done() - "callout" for exit; work all done		#
-#	_real_trace() - "callout" for Trace enabled exception		#
-#	fmovm_dynamic() - emulate dynamic fmovm instruction		#
-#	fmovm_ctrl() - emulate fmovm control instruction		#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the "Unimplemented <ea>" stk frame	#
-#									#
-# OUTPUT **************************************************************	#
-#	If access error:						#
-#	- The system stack is changed to an access error stack frame	#
-#	If FPU disabled:						#
-#	- The system stack is changed to an FPU disabled stack frame	#
-#	If Trace exception enabled:					#
-#	- The system stack is changed to a Trace exception stack frame	#
-#	Else: (normal case)						#
-#	- None (correct result has been stored as appropriate)		#
-#									#
-# ALGORITHM ***********************************************************	#
-#	This exception handles 3 types of operations:			#
-# (1) FP Instructions using extended precision or packed immediate	#
-#     addressing mode.							#
-# (2) The "fmovm.x" instruction w/ dynamic register specification.	#
-# (3) The "fmovm.l" instruction w/ 2 or 3 control registers.		#
-#									#
-#	For immediate data operations, the data is read in w/ a		#
-# _mem_read() "callout", converted to FP binary (if packed), and used	#
-# as the source operand to the instruction specified by the instruction	#
-# word. If no FP exception should be reported ads a result of the	#
-# emulation, then the result is stored to the destination register and	#
-# the handler exits through _fpsp_done(). If an enabled exc has been	#
-# signalled as a result of emulation, then an fsave state frame		#
-# corresponding to the FP exception type must be entered into the 060	#
-# FPU before exiting. In either the enabled or disabled cases, we	#
-# must also check if a Trace exception is pending, in which case, we	#
-# must create a Trace exception stack frame from the current exception	#
-# stack frame. If no Trace is pending, we simply exit through		#
-# _fpsp_done().								#
-#	For "fmovm.x", call the routine fmovm_dynamic() which will	#
-# decode and emulate the instruction. No FP exceptions can be pending	#
-# as a result of this operation emulation. A Trace exception can be	#
-# pending, though, which means the current stack frame must be changed	#
-# to a Trace stack frame and an exit made through _real_trace().	#
-# For the case of "fmovm.x Dn,-(a7)", where the offending instruction	#
-# was executed from supervisor mode, this handler must store the FP	#
-# register file values to the system stack by itself since		#
-# fmovm_dynamic() can't handle this. A normal exit is made through	#
-# fpsp_done().								#
-#	For "fmovm.l", fmovm_ctrl() is used to emulate the instruction.	#
-# Again, a Trace exception may be pending and an exit made through	#
-# _real_trace(). Else, a normal exit is made through _fpsp_done().	#
-#									#
-#	Before any of the above is attempted, it must be checked to	#
-# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken	#
-# before the "FPU disabled" exception, but the "FPU disabled" exception	#
-# has higher priority, we check the disabled bit in the PCR. If set,	#
-# then we must create an 8 word "FPU disabled" exception stack frame	#
-# from the current 4 word exception stack frame. This includes		#
-# reproducing the effective address of the instruction to put on the	#
-# new stack frame.							#
-#									#
-#	In the process of all emulation work, if a _mem_read()		#
-# "callout" returns a failing result indicating an access error, then	#
-# we must create an access error stack frame from the current stack	#
-# frame. This information includes a faulting address and a fault-	#
-# status-longword. These are created within this handler.		#
-#									#
-#########################################################################
-
-	global		_fpsp_effadd
-_fpsp_effadd:
-
-# This exception type takes priority over the "Line F Emulator"
-# exception. Therefore, the FPU could be disabled when entering here.
-# So, we must check to see if it's disabled and handle that case separately.
-	mov.l		%d0,-(%sp)		# save d0
-	movc		%pcr,%d0		# load proc cr
-	btst		&0x1,%d0		# is FPU disabled?
-	bne.w		iea_disabled		# yes
-	mov.l		(%sp)+,%d0		# restore d0
-
-	link		%a6,&-LOCAL_SIZE	# init stack frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
-
-# PC of instruction that took the exception is the PC in the frame
-	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
-
-#########################################################################
-
-	tst.w		%d0			# is operation fmovem?
-	bmi.w		iea_fmovm		# yes
-
-#
-# here, we will have:
-#	fabs	fdabs	fsabs		facos		fmod
-#	fadd	fdadd	fsadd		fasin		frem
-#	fcmp				fatan		fscale
-#	fdiv	fddiv	fsdiv		fatanh		fsin
-#	fint				fcos		fsincos
-#	fintrz				fcosh		fsinh
-#	fmove	fdmove	fsmove		fetox		ftan
-#	fmul	fdmul	fsmul		fetoxm1		ftanh
-#	fneg	fdneg	fsneg		fgetexp		ftentox
-#	fsgldiv				fgetman		ftwotox
-#	fsglmul				flog10
-#	fsqrt				flog2
-#	fsub	fdsub	fssub		flogn
-#	ftst				flognp1
-# which can all use f<op>.{x,p}
-# so, now it's immediate data extended precision AND PACKED FORMAT!
-#
-iea_op:
-	andi.l		&0x00ff00ff,USER_FPSR(%a6)
-
-	btst		&0xa,%d0		# is src fmt x or p?
-	bne.b		iea_op_pack		# packed
-
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
-	lea		FP_SRC(%a6),%a1		# pass: ptr to super addr
-	mov.l		&0xc,%d0		# pass: 12 bytes
-	bsr.l		_imem_read		# read extended immediate
-
-	tst.l		%d1			# did ifetch fail?
-	bne.w		iea_iacc		# yes
-
-	bra.b		iea_op_setsrc
-
-iea_op_pack:
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
-	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
-	mov.l		&0xc,%d0		# pass: 12 bytes
-	bsr.l		_imem_read		# read packed operand
-
-	tst.l		%d1			# did ifetch fail?
-	bne.w		iea_iacc		# yes
-
-# The packed operand is an INF or a NAN if the exponent field is all ones.
-	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
-	cmpi.w		%d0,&0x7fff		# INF or NAN?
-	beq.b		iea_op_setsrc		# operand is an INF or NAN
-
-# The packed operand is a zero if the mantissa is all zero, else it's
-# a normal packed op.
-	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
-	andi.b		&0x0f,%d0		# clear all but last nybble
-	bne.b		iea_op_gp_not_spec	# not a zero
-	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
-	bne.b		iea_op_gp_not_spec	# not a zero
-	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
-	beq.b		iea_op_setsrc		# operand is a ZERO
-iea_op_gp_not_spec:
-	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
-	bsr.l		decbin			# convert to extended
-	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
-
-iea_op_setsrc:
-	addi.l		&0xc,EXC_EXTWPTR(%a6)	# update extension word pointer
-
-# FP_SRC now holds the src operand.
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		set_tag_x		# tag the operand type
-	mov.b		%d0,STAG(%a6)		# could be ANYTHING!!!
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		iea_op_getdst		# no
-	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
-	mov.b		%d0,STAG(%a6)		# set new optype tag
-iea_op_getdst:
-	clr.b		STORE_FLG(%a6)		# clear "store result" boolean
-
-	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
-	beq.b		iea_op_extract		# monadic
-	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation fsincos,ftst,fcmp?
-	bne.b		iea_op_spec		# yes
-
-iea_op_loaddst:
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
-	bsr.l		load_fpn2		# load dst operand
-
-	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
-	bsr.l		set_tag_x		# tag the operand type
-	mov.b		%d0,DTAG(%a6)		# could be ANYTHING!!!
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		iea_op_extract		# no
-	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
-	mov.b		%d0,DTAG(%a6)		# set new optype tag
-	bra.b		iea_op_extract
-
-# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
-iea_op_spec:
-	btst		&0x3,1+EXC_CMDREG(%a6)	# is operation fsincos?
-	beq.b		iea_op_extract		# yes
-# now, we're left with ftst and fcmp. so, first let's tag them so that they don't
-# store a result. then, only fcmp will branch back and pick up a dst operand.
-	st		STORE_FLG(%a6)		# don't store a final result
-	btst		&0x1,1+EXC_CMDREG(%a6)	# is operation fcmp?
-	beq.b		iea_op_loaddst		# yes
-
-iea_op_extract:
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass: rnd mode,prec
-
-	mov.b		1+EXC_CMDREG(%a6),%d1
-	andi.w		&0x007f,%d1		# extract extension
-
-	fmov.l		&0x0,%fpcr
-	fmov.l		&0x0,%fpsr
-
-	lea		FP_SRC(%a6),%a0
-	lea		FP_DST(%a6),%a1
-
-	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
-	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
-
-#
-# Exceptions in order of precedence:
-#	BSUN	: none
-#	SNAN	: all operations
-#	OPERR	: all reg-reg or mem-reg operations that can normally operr
-#	OVFL	: same as OPERR
-#	UNFL	: same as OPERR
-#	DZ	: same as OPERR
-#	INEX2	: same as OPERR
-#	INEX1	: all packed immediate operations
-#
-
-# we determine the highest priority exception(if any) set by the
-# emulation routine that has also been enabled by the user.
-	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
-	bne.b		iea_op_ena		# some are enabled
-
-# now, we save the result, unless, of course, the operation was ftst or fcmp.
-# these don't save results.
-iea_op_save:
-	tst.b		STORE_FLG(%a6)		# does this op store a result?
-	bne.b		iea_op_exit1		# exit with no frestore
-
-iea_op_store:
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
-	bsr.l		store_fpreg		# store the result
-
-iea_op_exit1:
-	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
-	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6			# unravel the frame
-
-	btst		&0x7,(%sp)		# is trace on?
-	bne.w		iea_op_trace		# yes
-
-	bra.l		_fpsp_done		# exit to os
-
-iea_op_ena:
-	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enable and set
-	bfffo		%d0{&24:&8},%d0		# find highest priority exception
-	bne.b		iea_op_exc		# at least one was set
-
-# no exception occurred. now, did a disabled, exact overflow occur with inexact
-# enabled? if so, then we have to stuff an overflow frame into the FPU.
-	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
-	beq.b		iea_op_save
-
-iea_op_ovfl:
-	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
-	beq.b		iea_op_store		# no
-	bra.b		iea_op_exc_ovfl		# yes
-
-# an enabled exception occurred. we have to insert the exception type back into
-# the machine.
-iea_op_exc:
-	subi.l		&24,%d0			# fix offset to be 0-8
-	cmpi.b		%d0,&0x6		# is exception INEX?
-	bne.b		iea_op_exc_force	# no
-
-# the enabled exception was inexact. so, if it occurs with an overflow
-# or underflow that was disabled, then we have to force an overflow or
-# underflow frame.
-	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
-	bne.b		iea_op_exc_ovfl		# yes
-	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
-	bne.b		iea_op_exc_unfl		# yes
-
-iea_op_exc_force:
-	mov.w		(tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
-	bra.b		iea_op_exit2		# exit with frestore
-
-tbl_iea_except:
-	short		0xe002, 0xe006, 0xe004, 0xe005
-	short		0xe003, 0xe002, 0xe001, 0xe001
-
-iea_op_exc_ovfl:
-	mov.w		&0xe005,2+FP_SRC(%a6)
-	bra.b		iea_op_exit2
-
-iea_op_exc_unfl:
-	mov.w		&0xe003,2+FP_SRC(%a6)
-
-iea_op_exit2:
-	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
-	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)		# restore exceptional state
-
-	unlk		%a6			# unravel the frame
-
-	btst		&0x7,(%sp)		# is trace on?
-	bne.b		iea_op_trace		# yes
-
-	bra.l		_fpsp_done		# exit to os
-
-#
-# The opclass two instruction that took an "Unimplemented Effective Address"
-# exception was being traced. Make the "current" PC the FPIAR and put it in
-# the trace stack frame then jump to _real_trace().
-#
-#		 UNIMP EA FRAME		   TRACE FRAME
-#		*****************	*****************
-#		* 0x0 *  0x0f0	*	*    Current	*
-#		*****************	*      PC	*
-#		*    Current	*	*****************
-#		*      PC	*	* 0x2 *  0x024	*
-#		*****************	*****************
-#		*      SR	*	*     Next	*
-#		*****************	*      PC	*
-#					*****************
-#					*      SR	*
-#					*****************
-iea_op_trace:
-	mov.l		(%sp),-(%sp)		# shift stack frame "down"
-	mov.w		0x8(%sp),0x4(%sp)
-	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
-	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
-
-	bra.l		_real_trace
-
-#########################################################################
-iea_fmovm:
-	btst		&14,%d0			# ctrl or data reg
-	beq.w		iea_fmovm_ctrl
-
-iea_fmovm_data:
-
-	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode
-	bne.b		iea_fmovm_data_s
-
-iea_fmovm_data_u:
-	mov.l		%usp,%a0
-	mov.l		%a0,EXC_A7(%a6)		# store current a7
-	bsr.l		fmovm_dynamic		# do dynamic fmovm
-	mov.l		EXC_A7(%a6),%a0		# load possibly new a7
-	mov.l		%a0,%usp		# update usp
-	bra.w		iea_fmovm_exit
-
-iea_fmovm_data_s:
-	clr.b		SPCOND_FLG(%a6)
-	lea		0x2+EXC_VOFF(%a6),%a0
-	mov.l		%a0,EXC_A7(%a6)
-	bsr.l		fmovm_dynamic		# do dynamic fmovm
-
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
-	beq.w		iea_fmovm_data_predec
-	cmpi.b		SPCOND_FLG(%a6),&mia7_flg
-	bne.w		iea_fmovm_exit
-
-# right now, d0 = the size.
-# the data has been fetched from the supervisor stack, but we have not
-# incremented the stack pointer by the appropriate number of bytes.
-# do it here.
-iea_fmovm_data_postinc:
-	btst		&0x7,EXC_SR(%a6)
-	bne.b		iea_fmovm_data_pi_trace
-
-	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
-	mov.l		EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
-	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
-
-	lea		(EXC_SR,%a6,%d0),%a0
-	mov.l		%a0,EXC_SR(%a6)
-
-	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-	mov.l		(%sp)+,%sp
-	bra.l		_fpsp_done
-
-iea_fmovm_data_pi_trace:
-	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
-	mov.l		EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
-	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
-	mov.l		EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
-
-	lea		(EXC_SR-0x4,%a6,%d0),%a0
-	mov.l		%a0,EXC_SR(%a6)
-
-	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-	mov.l		(%sp)+,%sp
-	bra.l		_real_trace
-
-# right now, d1 = size and d0 = the strg.
-iea_fmovm_data_predec:
-	mov.b		%d1,EXC_VOFF(%a6)	# store strg
-	mov.b		%d0,0x1+EXC_VOFF(%a6)	# store size
-
-	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	mov.l		(%a6),-(%sp)		# make a copy of a6
-	mov.l		%d0,-(%sp)		# save d0
-	mov.l		%d1,-(%sp)		# save d1
-	mov.l		EXC_EXTWPTR(%a6),-(%sp)	# make a copy of Next PC
-
-	clr.l		%d0
-	mov.b		0x1+EXC_VOFF(%a6),%d0	# fetch size
-	neg.l		%d0			# get negative of size
-
-	btst		&0x7,EXC_SR(%a6)	# is trace enabled?
-	beq.b		iea_fmovm_data_p2
-
-	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
-	mov.l		EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
-	mov.l		(%sp)+,(EXC_PC-0x4,%a6,%d0)
-	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
-
-	pea		(%a6,%d0)		# create final sp
-	bra.b		iea_fmovm_data_p3
-
-iea_fmovm_data_p2:
-	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
-	mov.l		(%sp)+,(EXC_PC,%a6,%d0)
-	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
-
-	pea		(0x4,%a6,%d0)		# create final sp
-
-iea_fmovm_data_p3:
-	clr.l		%d1
-	mov.b		EXC_VOFF(%a6),%d1	# fetch strg
-
-	tst.b		%d1
-	bpl.b		fm_1
-	fmovm.x		&0x80,(0x4+0x8,%a6,%d0)
-	addi.l		&0xc,%d0
-fm_1:
-	lsl.b		&0x1,%d1
-	bpl.b		fm_2
-	fmovm.x		&0x40,(0x4+0x8,%a6,%d0)
-	addi.l		&0xc,%d0
-fm_2:
-	lsl.b		&0x1,%d1
-	bpl.b		fm_3
-	fmovm.x		&0x20,(0x4+0x8,%a6,%d0)
-	addi.l		&0xc,%d0
-fm_3:
-	lsl.b		&0x1,%d1
-	bpl.b		fm_4
-	fmovm.x		&0x10,(0x4+0x8,%a6,%d0)
-	addi.l		&0xc,%d0
-fm_4:
-	lsl.b		&0x1,%d1
-	bpl.b		fm_5
-	fmovm.x		&0x08,(0x4+0x8,%a6,%d0)
-	addi.l		&0xc,%d0
-fm_5:
-	lsl.b		&0x1,%d1
-	bpl.b		fm_6
-	fmovm.x		&0x04,(0x4+0x8,%a6,%d0)
-	addi.l		&0xc,%d0
-fm_6:
-	lsl.b		&0x1,%d1
-	bpl.b		fm_7
-	fmovm.x		&0x02,(0x4+0x8,%a6,%d0)
-	addi.l		&0xc,%d0
-fm_7:
-	lsl.b		&0x1,%d1
-	bpl.b		fm_end
-	fmovm.x		&0x01,(0x4+0x8,%a6,%d0)
-fm_end:
-	mov.l		0x4(%sp),%d1
-	mov.l		0x8(%sp),%d0
-	mov.l		0xc(%sp),%a6
-	mov.l		(%sp)+,%sp
-
-	btst		&0x7,(%sp)		# is trace enabled?
-	beq.l		_fpsp_done
-	bra.l		_real_trace
-
-#########################################################################
-iea_fmovm_ctrl:
-
-	bsr.l		fmovm_ctrl		# load ctrl regs
-
-iea_fmovm_exit:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	btst		&0x7,EXC_SR(%a6)	# is trace on?
-	bne.b		iea_fmovm_trace		# yes
-
-	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
-
-	unlk		%a6			# unravel the frame
-
-	bra.l		_fpsp_done		# exit to os
-
-#
-# The control reg instruction that took an "Unimplemented Effective Address"
-# exception was being traced. The "Current PC" for the trace frame is the
-# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
-# After fixing the stack frame, jump to _real_trace().
-#
-#		 UNIMP EA FRAME		   TRACE FRAME
-#		*****************	*****************
-#		* 0x0 *  0x0f0	*	*    Current	*
-#		*****************	*      PC	*
-#		*    Current	*	*****************
-#		*      PC	*	* 0x2 *  0x024	*
-#		*****************	*****************
-#		*      SR	*	*     Next	*
-#		*****************	*      PC	*
-#					*****************
-#					*      SR	*
-#					*****************
-# this ain't a pretty solution, but it works:
-# -restore a6 (not with unlk)
-# -shift stack frame down over where old a6 used to be
-# -add LOCAL_SIZE to stack pointer
-iea_fmovm_trace:
-	mov.l		(%a6),%a6		# restore frame pointer
-	mov.w		EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
-	mov.l		EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
-	mov.l		EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
-	mov.w		&0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
-	add.l		&LOCAL_SIZE,%sp		# clear stack frame
-
-	bra.l		_real_trace
-
-#########################################################################
-# The FPU is disabled and so we should really have taken the "Line
-# F Emulator" exception. So, here we create an 8-word stack frame
-# from our 4-word stack frame. This means we must calculate the length
-# the faulting instruction to get the "next PC". This is trivial for
-# immediate operands but requires some extra work for fmovm dynamic
-# which can use most addressing modes.
-iea_disabled:
-	mov.l		(%sp)+,%d0		# restore d0
-
-	link		%a6,&-LOCAL_SIZE	# init stack frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-
-# PC of instruction that took the exception is the PC in the frame
-	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
-
-	tst.w		%d0			# is instr fmovm?
-	bmi.b		iea_dis_fmovm		# yes
-# instruction is using an extended precision immediate operand. Therefore,
-# the total instruction length is 16 bytes.
-iea_dis_immed:
-	mov.l		&0x10,%d0		# 16 bytes of instruction
-	bra.b		iea_dis_cont
-iea_dis_fmovm:
-	btst		&0xe,%d0		# is instr fmovm ctrl
-	bne.b		iea_dis_fmovm_data	# no
-# the instruction is a fmovm.l with 2 or 3 registers.
-	bfextu		%d0{&19:&3},%d1
-	mov.l		&0xc,%d0
-	cmpi.b		%d1,&0x7		# move all regs?
-	bne.b		iea_dis_cont
-	addq.l		&0x4,%d0
-	bra.b		iea_dis_cont
-# the instruction is an fmovm.x dynamic which can use many addressing
-# modes and thus can have several different total instruction lengths.
-# call fmovm_calc_ea which will go through the ea calc process and,
-# as a by-product, will tell us how long the instruction is.
-iea_dis_fmovm_data:
-	clr.l		%d0
-	bsr.l		fmovm_calc_ea
-	mov.l		EXC_EXTWPTR(%a6),%d0
-	sub.l		EXC_PC(%a6),%d0
-iea_dis_cont:
-	mov.w		%d0,EXC_VOFF(%a6)	# store stack shift value
-
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-
-# here, we actually create the 8-word frame from the 4-word frame,
-# with the "next PC" as additional info.
-# the <ea> field is let as undefined.
-	subq.l		&0x8,%sp		# make room for new stack
-	mov.l		%d0,-(%sp)		# save d0
-	mov.w		0xc(%sp),0x4(%sp)	# move SR
-	mov.l		0xe(%sp),0x6(%sp)	# move Current PC
-	clr.l		%d0
-	mov.w		0x12(%sp),%d0
-	mov.l		0x6(%sp),0x10(%sp)	# move Current PC
-	add.l		%d0,0x6(%sp)		# make Next PC
-	mov.w		&0x402c,0xa(%sp)	# insert offset,frame format
-	mov.l		(%sp)+,%d0		# restore d0
-
-	bra.l		_real_fpu_disabled
-
-##########
-
-iea_iacc:
-	movc		%pcr,%d0
-	btst		&0x1,%d0
-	bne.b		iea_iacc_cont
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
-iea_iacc_cont:
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-
-	subq.w		&0x8,%sp		# make stack frame bigger
-	mov.l		0x8(%sp),(%sp)		# store SR,hi(PC)
-	mov.w		0xc(%sp),0x4(%sp)	# store lo(PC)
-	mov.w		&0x4008,0x6(%sp)	# store voff
-	mov.l		0x2(%sp),0x8(%sp)	# store ea
-	mov.l		&0x09428001,0xc(%sp)	# store fslw
-
-iea_acc_done:
-	btst		&0x5,(%sp)		# user or supervisor mode?
-	beq.b		iea_acc_done2		# user
-	bset		&0x2,0xd(%sp)		# set supervisor TM bit
-
-iea_acc_done2:
-	bra.l		_real_access
-
-iea_dacc:
-	lea		-LOCAL_SIZE(%a6),%sp
-
-	movc		%pcr,%d1
-	btst		&0x1,%d1
-	bne.b		iea_dacc_cont
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
-	fmovm.l		LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
-iea_dacc_cont:
-	mov.l		(%a6),%a6
-
-	mov.l		0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
-	mov.w		0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
-	mov.w		&0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
-	mov.l		%a0,-0x8+0xc+LOCAL_SIZE(%sp)
-	mov.w		%d0,-0x8+0x10+LOCAL_SIZE(%sp)
-	mov.w		&0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
-
-	movm.l		LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
-	add.w		&LOCAL_SIZE-0x4,%sp
-
-	bra.b		iea_acc_done
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_operr(): 060FPSP entry point for FP Operr exception.	#
-#									#
-#	This handler should be the first code executed upon taking the	#
-#	FP Operand Error exception in an operating system.		#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_long() - read instruction longword			#
-#	fix_skewed_ops() - adjust src operand in fsave frame		#
-#	_real_operr() - "callout" to operating system operr handler	#
-#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
-#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
-#	facc_out_{b,w,l}() - store to memory took access error (opcl 3)	#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the FP Operr exception frame	#
-#	- The fsave frame contains the source operand			#
-#									#
-# OUTPUT **************************************************************	#
-#	No access error:						#
-#	- The system stack is unchanged					#
-#	- The fsave frame contains the adjusted src op for opclass 0,2	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	In a system where the FP Operr exception is enabled, the goal	#
-# is to get to the handler specified at _real_operr(). But, on the 060,	#
-# for opclass zero and two instruction taking this exception, the	#
-# input operand in the fsave frame may be incorrect for some cases	#
-# and needs to be corrected. This handler calls fix_skewed_ops() to	#
-# do just this and then exits through _real_operr().			#
-#	For opclass 3 instructions, the 060 doesn't store the default	#
-# operr result out to memory or data register file as it should.	#
-# This code must emulate the move out before finally exiting through	#
-# _real_inex(). The move out, if to memory, is performed using		#
-# _mem_write() "callout" routines that may return a failing result.	#
-# In this special case, the handler must exit through facc_out()	#
-# which creates an access error stack frame from the current operr	#
-# stack frame.								#
-#									#
-#########################################################################
-
-	global		_fpsp_operr
-_fpsp_operr:
-
-	link.w		%a6,&-LOCAL_SIZE	# init stack frame
-
-	fsave		FP_SRC(%a6)		# grab the "busy" frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
-
-# the FPIAR holds the "current PC" of the faulting instruction
-	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)
-
-##############################################################################
-
-	btst		&13,%d0			# is instr an fmove out?
-	bne.b		foperr_out		# fmove out
-
-
-# here, we simply see if the operand in the fsave frame needs to be "unskewed".
-# this would be the case for opclass two operations with a source infinity or
-# denorm operand in the sgl or dbl format. NANs also become skewed, but can't
-# cause an operr so we don't need to check for them here.
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		fix_skewed_ops		# fix src op
-
-foperr_exit:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)
-
-	unlk		%a6
-	bra.l		_real_operr
-
-########################################################################
-
-#
-# the hardware does not save the default result to memory on enabled
-# operand error exceptions. we do this here before passing control to
-# the user operand error handler.
-#
-# byte, word, and long destination format operations can pass
-# through here. we simply need to test the sign of the src
-# operand and save the appropriate minimum or maximum integer value
-# to the effective address as pointed to by the stacked effective address.
-#
-# although packed opclass three operations can take operand error
-# exceptions, they won't pass through here since they are caught
-# first by the unsupported data format exception handler. that handler
-# sends them directly to _real_operr() if necessary.
-#
-foperr_out:
-
-	mov.w		FP_SRC_EX(%a6),%d1	# fetch exponent
-	andi.w		&0x7fff,%d1
-	cmpi.w		%d1,&0x7fff
-	bne.b		foperr_out_not_qnan
-# the operand is either an infinity or a QNAN.
-	tst.l		FP_SRC_LO(%a6)
-	bne.b		foperr_out_qnan
-	mov.l		FP_SRC_HI(%a6),%d1
-	andi.l		&0x7fffffff,%d1
-	beq.b		foperr_out_not_qnan
-foperr_out_qnan:
-	mov.l		FP_SRC_HI(%a6),L_SCR1(%a6)
-	bra.b		foperr_out_jmp
-
-foperr_out_not_qnan:
-	mov.l		&0x7fffffff,%d1
-	tst.b		FP_SRC_EX(%a6)
-	bpl.b		foperr_out_not_qnan2
-	addq.l		&0x1,%d1
-foperr_out_not_qnan2:
-	mov.l		%d1,L_SCR1(%a6)
-
-foperr_out_jmp:
-	bfextu		%d0{&19:&3},%d0		# extract dst format field
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
-	mov.w		(tbl_operr.b,%pc,%d0.w*2),%a0
-	jmp		(tbl_operr.b,%pc,%a0)
-
-tbl_operr:
-	short		foperr_out_l - tbl_operr # long word integer
-	short		tbl_operr    - tbl_operr # sgl prec shouldn't happen
-	short		tbl_operr    - tbl_operr # ext prec shouldn't happen
-	short		foperr_exit  - tbl_operr # packed won't enter here
-	short		foperr_out_w - tbl_operr # word integer
-	short		tbl_operr    - tbl_operr # dbl prec shouldn't happen
-	short		foperr_out_b - tbl_operr # byte integer
-	short		tbl_operr    - tbl_operr # packed won't enter here
-
-foperr_out_b:
-	mov.b		L_SCR1(%a6),%d0		# load positive default result
-	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
-	ble.b		foperr_out_b_save_dn	# yes
-	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
-	bsr.l		_dmem_write_byte	# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_b		# yes
-
-	bra.w		foperr_exit
-foperr_out_b_save_dn:
-	andi.w		&0x0007,%d1
-	bsr.l		store_dreg_b		# store result to regfile
-	bra.w		foperr_exit
-
-foperr_out_w:
-	mov.w		L_SCR1(%a6),%d0		# load positive default result
-	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
-	ble.b		foperr_out_w_save_dn	# yes
-	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
-	bsr.l		_dmem_write_word	# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_w		# yes
-
-	bra.w		foperr_exit
-foperr_out_w_save_dn:
-	andi.w		&0x0007,%d1
-	bsr.l		store_dreg_w		# store result to regfile
-	bra.w		foperr_exit
-
-foperr_out_l:
-	mov.l		L_SCR1(%a6),%d0		# load positive default result
-	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
-	ble.b		foperr_out_l_save_dn	# yes
-	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
-	bsr.l		_dmem_write_long	# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_l		# yes
-
-	bra.w		foperr_exit
-foperr_out_l_save_dn:
-	andi.w		&0x0007,%d1
-	bsr.l		store_dreg_l		# store result to regfile
-	bra.w		foperr_exit
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_snan(): 060FPSP entry point for FP SNAN exception.	#
-#									#
-#	This handler should be the first code executed upon taking the	#
-#	FP Signalling NAN exception in an operating system.		#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_long() - read instruction longword			#
-#	fix_skewed_ops() - adjust src operand in fsave frame		#
-#	_real_snan() - "callout" to operating system SNAN handler	#
-#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
-#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
-#	facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)	#
-#	_calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>	#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the FP SNAN exception frame		#
-#	- The fsave frame contains the source operand			#
-#									#
-# OUTPUT **************************************************************	#
-#	No access error:						#
-#	- The system stack is unchanged					#
-#	- The fsave frame contains the adjusted src op for opclass 0,2	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	In a system where the FP SNAN exception is enabled, the goal	#
-# is to get to the handler specified at _real_snan(). But, on the 060,	#
-# for opclass zero and two instructions taking this exception, the	#
-# input operand in the fsave frame may be incorrect for some cases	#
-# and needs to be corrected. This handler calls fix_skewed_ops() to	#
-# do just this and then exits through _real_snan().			#
-#	For opclass 3 instructions, the 060 doesn't store the default	#
-# SNAN result out to memory or data register file as it should.		#
-# This code must emulate the move out before finally exiting through	#
-# _real_snan(). The move out, if to memory, is performed using		#
-# _mem_write() "callout" routines that may return a failing result.	#
-# In this special case, the handler must exit through facc_out()	#
-# which creates an access error stack frame from the current SNAN	#
-# stack frame.								#
-#	For the case of an extended precision opclass 3 instruction,	#
-# if the effective addressing mode was -() or ()+, then the address	#
-# register must get updated by calling _calc_ea_fout(). If the <ea>	#
-# was -(a7) from supervisor mode, then the exception frame currently	#
-# on the system stack must be carefully moved "down" to make room	#
-# for the operand being moved.						#
-#									#
-#########################################################################
-
-	global		_fpsp_snan
-_fpsp_snan:
-
-	link.w		%a6,&-LOCAL_SIZE	# init stack frame
-
-	fsave		FP_SRC(%a6)		# grab the "busy" frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
-
-# the FPIAR holds the "current PC" of the faulting instruction
-	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)
-
-##############################################################################
-
-	btst		&13,%d0			# is instr an fmove out?
-	bne.w		fsnan_out		# fmove out
-
-
-# here, we simply see if the operand in the fsave frame needs to be "unskewed".
-# this would be the case for opclass two operations with a source infinity or
-# denorm operand in the sgl or dbl format. NANs also become skewed and must be
-# fixed here.
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		fix_skewed_ops		# fix src op
-
-fsnan_exit:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)
-
-	unlk		%a6
-	bra.l		_real_snan
-
-########################################################################
-
-#
-# the hardware does not save the default result to memory on enabled
-# snan exceptions. we do this here before passing control to
-# the user snan handler.
-#
-# byte, word, long, and packed destination format operations can pass
-# through here. since packed format operations already were handled by
-# fpsp_unsupp(), then we need to do nothing else for them here.
-# for byte, word, and long, we simply need to test the sign of the src
-# operand and save the appropriate minimum or maximum integer value
-# to the effective address as pointed to by the stacked effective address.
-#
-fsnan_out:
-
-	bfextu		%d0{&19:&3},%d0		# extract dst format field
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
-	mov.w		(tbl_snan.b,%pc,%d0.w*2),%a0
-	jmp		(tbl_snan.b,%pc,%a0)
-
-tbl_snan:
-	short		fsnan_out_l - tbl_snan # long word integer
-	short		fsnan_out_s - tbl_snan # sgl prec shouldn't happen
-	short		fsnan_out_x - tbl_snan # ext prec shouldn't happen
-	short		tbl_snan    - tbl_snan # packed needs no help
-	short		fsnan_out_w - tbl_snan # word integer
-	short		fsnan_out_d - tbl_snan # dbl prec shouldn't happen
-	short		fsnan_out_b - tbl_snan # byte integer
-	short		tbl_snan    - tbl_snan # packed needs no help
-
-fsnan_out_b:
-	mov.b		FP_SRC_HI(%a6),%d0	# load upper byte of SNAN
-	bset		&6,%d0			# set SNAN bit
-	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
-	ble.b		fsnan_out_b_dn		# yes
-	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
-	bsr.l		_dmem_write_byte	# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_b		# yes
-
-	bra.w		fsnan_exit
-fsnan_out_b_dn:
-	andi.w		&0x0007,%d1
-	bsr.l		store_dreg_b		# store result to regfile
-	bra.w		fsnan_exit
-
-fsnan_out_w:
-	mov.w		FP_SRC_HI(%a6),%d0	# load upper word of SNAN
-	bset		&14,%d0			# set SNAN bit
-	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
-	ble.b		fsnan_out_w_dn		# yes
-	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
-	bsr.l		_dmem_write_word	# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_w		# yes
-
-	bra.w		fsnan_exit
-fsnan_out_w_dn:
-	andi.w		&0x0007,%d1
-	bsr.l		store_dreg_w		# store result to regfile
-	bra.w		fsnan_exit
-
-fsnan_out_l:
-	mov.l		FP_SRC_HI(%a6),%d0	# load upper longword of SNAN
-	bset		&30,%d0			# set SNAN bit
-	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
-	ble.b		fsnan_out_l_dn		# yes
-	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
-	bsr.l		_dmem_write_long	# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_l		# yes
-
-	bra.w		fsnan_exit
-fsnan_out_l_dn:
-	andi.w		&0x0007,%d1
-	bsr.l		store_dreg_l		# store result to regfile
-	bra.w		fsnan_exit
-
-fsnan_out_s:
-	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
-	ble.b		fsnan_out_d_dn		# yes
-	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
-	andi.l		&0x80000000,%d0		# keep sign
-	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
-	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
-	lsr.l		&0x8,%d1		# shift mantissa for sgl
-	or.l		%d1,%d0			# create sgl SNAN
-	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
-	bsr.l		_dmem_write_long	# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_l		# yes
-
-	bra.w		fsnan_exit
-fsnan_out_d_dn:
-	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
-	andi.l		&0x80000000,%d0		# keep sign
-	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
-	mov.l		%d1,-(%sp)
-	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
-	lsr.l		&0x8,%d1		# shift mantissa for sgl
-	or.l		%d1,%d0			# create sgl SNAN
-	mov.l		(%sp)+,%d1
-	andi.w		&0x0007,%d1
-	bsr.l		store_dreg_l		# store result to regfile
-	bra.w		fsnan_exit
-
-fsnan_out_d:
-	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
-	andi.l		&0x80000000,%d0		# keep sign
-	ori.l		&0x7ff80000,%d0		# insert new exponent,SNAN bit
-	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
-	mov.l		%d0,FP_SCR0_EX(%a6)	# store to temp space
-	mov.l		&11,%d0			# load shift amt
-	lsr.l		%d0,%d1
-	or.l		%d1,FP_SCR0_EX(%a6)	# create dbl hi
-	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
-	andi.l		&0x000007ff,%d1
-	ror.l		%d0,%d1
-	mov.l		%d1,FP_SCR0_HI(%a6)	# store to temp space
-	mov.l		FP_SRC_LO(%a6),%d1	# load lo mantissa
-	lsr.l		%d0,%d1
-	or.l		%d1,FP_SCR0_HI(%a6)	# create dbl lo
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
-	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
-	movq.l		&0x8,%d0		# pass: size of 8 bytes
-	bsr.l		_dmem_write		# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_d		# yes
-
-	bra.w		fsnan_exit
-
-# for extended precision, if the addressing mode is pre-decrement or
-# post-increment, then the address register did not get updated.
-# in addition, for pre-decrement, the stacked <ea> is incorrect.
-fsnan_out_x:
-	clr.b		SPCOND_FLG(%a6)		# clear special case flag
-
-	mov.w		FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
-	clr.w		2+FP_SCR0(%a6)
-	mov.l		FP_SRC_HI(%a6),%d0
-	bset		&30,%d0
-	mov.l		%d0,FP_SCR0_HI(%a6)
-	mov.l		FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
-
-	btst		&0x5,EXC_SR(%a6)	# supervisor mode exception?
-	bne.b		fsnan_out_x_s		# yes
-
-	mov.l		%usp,%a0		# fetch user stack pointer
-	mov.l		%a0,EXC_A7(%a6)		# save on stack for calc_ea()
-	mov.l		(%a6),EXC_A6(%a6)
-
-	bsr.l		_calc_ea_fout		# find the correct ea,update An
-	mov.l		%a0,%a1
-	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
-
-	mov.l		EXC_A7(%a6),%a0
-	mov.l		%a0,%usp		# restore user stack pointer
-	mov.l		EXC_A6(%a6),(%a6)
-
-fsnan_out_x_save:
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
-	movq.l		&0xc,%d0		# pass: size of extended
-	bsr.l		_dmem_write		# write the default result
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_x		# yes
-
-	bra.w		fsnan_exit
-
-fsnan_out_x_s:
-	mov.l		(%a6),EXC_A6(%a6)
-
-	bsr.l		_calc_ea_fout		# find the correct ea,update An
-	mov.l		%a0,%a1
-	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
-
-	mov.l		EXC_A6(%a6),(%a6)
-
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
-	bne.b		fsnan_out_x_save	# no
-
-# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)
-
-	mov.l		EXC_A6(%a6),%a6		# restore frame pointer
-
-	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
-	mov.l		LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
-	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
-
-	mov.l		LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
-	mov.l		LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
-	mov.l		LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
-
-	add.l		&LOCAL_SIZE-0x8,%sp
-
-	bra.l		_real_snan
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_inex(): 060FPSP entry point for FP Inexact exception.	#
-#									#
-#	This handler should be the first code executed upon taking the	#
-#	FP Inexact exception in an operating system.			#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_long() - read instruction longword			#
-#	fix_skewed_ops() - adjust src operand in fsave frame		#
-#	set_tag_x() - determine optype of src/dst operands		#
-#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
-#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
-#	load_fpn2() - load dst operand from FP regfile			#
-#	smovcr() - emulate an "fmovcr" instruction			#
-#	fout() - emulate an opclass 3 instruction			#
-#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
-#	_real_inex() - "callout" to operating system inexact handler	#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the FP Inexact exception frame	#
-#	- The fsave frame contains the source operand			#
-#									#
-# OUTPUT **************************************************************	#
-#	- The system stack is unchanged					#
-#	- The fsave frame contains the adjusted src op for opclass 0,2	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	In a system where the FP Inexact exception is enabled, the goal	#
-# is to get to the handler specified at _real_inex(). But, on the 060,	#
-# for opclass zero and two instruction taking this exception, the	#
-# hardware doesn't store the correct result to the destination FP	#
-# register as did the '040 and '881/2. This handler must emulate the	#
-# instruction in order to get this value and then store it to the	#
-# correct register before calling _real_inex().				#
-#	For opclass 3 instructions, the 060 doesn't store the default	#
-# inexact result out to memory or data register file as it should.	#
-# This code must emulate the move out by calling fout() before finally	#
-# exiting through _real_inex().						#
-#									#
-#########################################################################
-
-	global		_fpsp_inex
-_fpsp_inex:
-
-	link.w		%a6,&-LOCAL_SIZE	# init stack frame
-
-	fsave		FP_SRC(%a6)		# grab the "busy" frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
-
-# the FPIAR holds the "current PC" of the faulting instruction
-	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)
-
-##############################################################################
-
-	btst		&13,%d0			# is instr an fmove out?
-	bne.w		finex_out		# fmove out
-
-
-# the hardware, for "fabs" and "fneg" w/ a long source format, puts the
-# longword integer directly into the upper longword of the mantissa along
-# w/ an exponent value of 0x401e. we convert this to extended precision here.
-	bfextu		%d0{&19:&3},%d0		# fetch instr size
-	bne.b		finex_cont		# instr size is not long
-	cmpi.w		FP_SRC_EX(%a6),&0x401e	# is exponent 0x401e?
-	bne.b		finex_cont		# no
-	fmov.l		&0x0,%fpcr
-	fmov.l		FP_SRC_HI(%a6),%fp0	# load integer src
-	fmov.x		%fp0,FP_SRC(%a6)	# store integer as extended precision
-	mov.w		&0xe001,0x2+FP_SRC(%a6)
-
-finex_cont:
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		fix_skewed_ops		# fix src op
-
-# Here, we zero the ccode and exception byte field since we're going to
-# emulate the whole instruction. Notice, though, that we don't kill the
-# INEX1 bit. This is because a packed op has long since been converted
-# to extended before arriving here. Therefore, we need to retain the
-# INEX1 bit from when the operand was first converted.
-	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
-
-	fmov.l		&0x0,%fpcr		# zero current control regs
-	fmov.l		&0x0,%fpsr
-
-	bfextu		EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
-	cmpi.b		%d1,&0x17		# is op an fmovecr?
-	beq.w		finex_fmovcr		# yes
-
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		set_tag_x		# tag the operand type
-	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
-
-# bits four and five of the fp extension word separate the monadic and dyadic
-# operations that can pass through fpsp_inex(). remember that fcmp and ftst
-# will never take this exception, but fsincos will.
-	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
-	beq.b		finex_extract		# monadic
-
-	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation an fsincos?
-	bne.b		finex_extract		# yes
-
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
-	bsr.l		load_fpn2		# load dst into FP_DST
-
-	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
-	bsr.l		set_tag_x		# tag the operand type
-	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
-	bne.b		finex_op2_done		# no
-	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
-finex_op2_done:
-	mov.b		%d0,DTAG(%a6)		# save dst optype tag
-
-finex_extract:
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
-
-	mov.b		1+EXC_CMDREG(%a6),%d1
-	andi.w		&0x007f,%d1		# extract extension
-
-	lea		FP_SRC(%a6),%a0
-	lea		FP_DST(%a6),%a1
-
-	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
-	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
-
-# the operation has been emulated. the result is in fp0.
-finex_save:
-	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
-	bsr.l		store_fpreg
-
-finex_exit:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)
-
-	unlk		%a6
-	bra.l		_real_inex
-
-finex_fmovcr:
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
-	mov.b		1+EXC_CMDREG(%a6),%d1
-	andi.l		&0x0000007f,%d1		# pass rom offset
-	bsr.l		smovcr
-	bra.b		finex_save
-
-########################################################################
-
-#
-# the hardware does not save the default result to memory on enabled
-# inexact exceptions. we do this here before passing control to
-# the user inexact handler.
-#
-# byte, word, and long destination format operations can pass
-# through here. so can double and single precision.
-# although packed opclass three operations can take inexact
-# exceptions, they won't pass through here since they are caught
-# first by the unsupported data format exception handler. that handler
-# sends them directly to _real_inex() if necessary.
-#
-finex_out:
-
-	mov.b		&NORM,STAG(%a6)		# src is a NORM
-
-	clr.l		%d0
-	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
-
-	andi.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
-
-	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
-
-	bsr.l		fout			# store the default result
-
-	bra.b		finex_exit
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_dz(): 060FPSP entry point for FP DZ exception.		#
-#									#
-#	This handler should be the first code executed upon taking	#
-#	the FP DZ exception in an operating system.			#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_long() - read instruction longword from memory	#
-#	fix_skewed_ops() - adjust fsave operand				#
-#	_real_dz() - "callout" exit point from FP DZ handler		#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains the FP DZ exception stack.		#
-#	- The fsave frame contains the source operand.			#
-#									#
-# OUTPUT **************************************************************	#
-#	- The system stack contains the FP DZ exception stack.		#
-#	- The fsave frame contains the adjusted source operand.		#
-#									#
-# ALGORITHM ***********************************************************	#
-#	In a system where the DZ exception is enabled, the goal is to	#
-# get to the handler specified at _real_dz(). But, on the 060, when the	#
-# exception is taken, the input operand in the fsave state frame may	#
-# be incorrect for some cases and need to be adjusted. So, this package	#
-# adjusts the operand using fix_skewed_ops() and then branches to	#
-# _real_dz().								#
-#									#
-#########################################################################
-
-	global		_fpsp_dz
-_fpsp_dz:
-
-	link.w		%a6,&-LOCAL_SIZE	# init stack frame
-
-	fsave		FP_SRC(%a6)		# grab the "busy" frame
-
-	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
-	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
-	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
-
-# the FPIAR holds the "current PC" of the faulting instruction
-	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch the instruction words
-	mov.l		%d0,EXC_OPWORD(%a6)
-
-##############################################################################
-
-
-# here, we simply see if the operand in the fsave frame needs to be "unskewed".
-# this would be the case for opclass two operations with a source zero
-# in the sgl or dbl format.
-	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
-	bsr.l		fix_skewed_ops		# fix src op
-
-fdz_exit:
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	frestore	FP_SRC(%a6)
-
-	unlk		%a6
-	bra.l		_real_dz
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_fpsp_fline(): 060FPSP entry point for "Line F emulator"	#
-#		       exception when the "reduced" version of the	#
-#		       FPSP is implemented that does not emulate	#
-#		       FP unimplemented instructions.			#
-#									#
-#	This handler should be the first code executed upon taking a	#
-#	"Line F Emulator" exception in an operating system integrating	#
-#	the reduced version of 060FPSP.					#
-#									#
-# XREF ****************************************************************	#
-#	_real_fpu_disabled() - Handle "FPU disabled" exceptions		#
-#	_real_fline() - Handle all other cases (treated equally)	#
-#									#
-# INPUT ***************************************************************	#
-#	- The system stack contains a "Line F Emulator" exception	#
-#	  stack frame.							#
-#									#
-# OUTPUT **************************************************************	#
-#	- The system stack is unchanged.				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	When a "Line F Emulator" exception occurs in a system where	#
-# "FPU Unimplemented" instructions will not be emulated, the exception	#
-# can occur because then FPU is disabled or the instruction is to be	#
-# classifed as "Line F". This module determines which case exists and	#
-# calls the appropriate "callout".					#
-#									#
-#########################################################################
-
-	global		_fpsp_fline
-_fpsp_fline:
-
-# check to see if the FPU is disabled. if so, jump to the OS entry
-# point for that condition.
-	cmpi.w		0x6(%sp),&0x402c
-	beq.l		_real_fpu_disabled
-
-	bra.l		_real_fline
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_dcalc_ea(): calc correct <ea> from <ea> stacked on exception	#
-#									#
-# XREF ****************************************************************	#
-#	inc_areg() - increment an address register			#
-#	dec_areg() - decrement an address register			#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = number of bytes to adjust <ea> by				#
-#									#
-# OUTPUT **************************************************************	#
-#	None								#
-#									#
-# ALGORITHM ***********************************************************	#
-# "Dummy" CALCulate Effective Address:					#
-#	The stacked <ea> for FP unimplemented instructions and opclass	#
-#	two packed instructions is correct with the exception of...	#
-#									#
-#	1) -(An)   : The register is not updated regardless of size.	#
-#		     Also, for extended precision and packed, the	#
-#		     stacked <ea> value is 8 bytes too big		#
-#	2) (An)+   : The register is not updated.			#
-#	3) #<data> : The upper longword of the immediate operand is	#
-#		     stacked b,w,l and s sizes are completely stacked.	#
-#		     d,x, and p are not.				#
-#									#
-#########################################################################
-
-	global		_dcalc_ea
-_dcalc_ea:
-	mov.l		%d0, %a0		# move # bytes to %a0
-
-	mov.b		1+EXC_OPWORD(%a6), %d0	# fetch opcode word
-	mov.l		%d0, %d1		# make a copy
-
-	andi.w		&0x38, %d0		# extract mode field
-	andi.l		&0x7, %d1		# extract reg  field
-
-	cmpi.b		%d0,&0x18		# is mode (An)+ ?
-	beq.b		dcea_pi			# yes
-
-	cmpi.b		%d0,&0x20		# is mode -(An) ?
-	beq.b		dcea_pd			# yes
-
-	or.w		%d1,%d0			# concat mode,reg
-	cmpi.b		%d0,&0x3c		# is mode #<data>?
-
-	beq.b		dcea_imm		# yes
-
-	mov.l		EXC_EA(%a6),%a0		# return <ea>
-	rts
-
-# need to set immediate data flag here since we'll need to do
-# an imem_read to fetch this later.
-dcea_imm:
-	mov.b		&immed_flg,SPCOND_FLG(%a6)
-	lea		([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
-	rts
-
-# here, the <ea> is stacked correctly. however, we must update the
-# address register...
-dcea_pi:
-	mov.l		%a0,%d0			# pass amt to inc by
-	bsr.l		inc_areg		# inc addr register
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-	rts
-
-# the <ea> is stacked correctly for all but extended and packed which
-# the <ea>s are 8 bytes too large.
-# it would make no sense to have a pre-decrement to a7 in supervisor
-# mode so we don't even worry about this tricky case here : )
-dcea_pd:
-	mov.l		%a0,%d0			# pass amt to dec by
-	bsr.l		dec_areg		# dec addr register
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-
-	cmpi.b		%d0,&0xc		# is opsize ext or packed?
-	beq.b		dcea_pd2		# yes
-	rts
-dcea_pd2:
-	sub.l		&0x8,%a0		# correct <ea>
-	mov.l		%a0,EXC_EA(%a6)		# put correct <ea> on stack
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_calc_ea_fout(): calculate correct stacked <ea> for extended	#
-#			 and packed data opclass 3 operations.		#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	None								#
-#									#
-# OUTPUT **************************************************************	#
-#	a0 = return correct effective address				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	For opclass 3 extended and packed data operations, the <ea>	#
-# stacked for the exception is incorrect for -(an) and (an)+ addressing	#
-# modes. Also, while we're at it, the index register itself must get	#
-# updated.								#
-#	So, for -(an), we must subtract 8 off of the stacked <ea> value	#
-# and return that value as the correct <ea> and store that value in An.	#
-# For (an)+, the stacked <ea> is correct but we must adjust An by +12.	#
-#									#
-#########################################################################
-
-# This calc_ea is currently used to retrieve the correct <ea>
-# for fmove outs of type extended and packed.
-	global		_calc_ea_fout
-_calc_ea_fout:
-	mov.b		1+EXC_OPWORD(%a6),%d0	# fetch opcode word
-	mov.l		%d0,%d1			# make a copy
-
-	andi.w		&0x38,%d0		# extract mode field
-	andi.l		&0x7,%d1		# extract reg  field
-
-	cmpi.b		%d0,&0x18		# is mode (An)+ ?
-	beq.b		ceaf_pi			# yes
-
-	cmpi.b		%d0,&0x20		# is mode -(An) ?
-	beq.w		ceaf_pd			# yes
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-	rts
-
-# (An)+ : extended and packed fmove out
-#	: stacked <ea> is correct
-#	: "An" not updated
-ceaf_pi:
-	mov.w		(tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
-	mov.l		EXC_EA(%a6),%a0
-	jmp		(tbl_ceaf_pi.b,%pc,%d1.w*1)
-
-	swbeg		&0x8
-tbl_ceaf_pi:
-	short		ceaf_pi0 - tbl_ceaf_pi
-	short		ceaf_pi1 - tbl_ceaf_pi
-	short		ceaf_pi2 - tbl_ceaf_pi
-	short		ceaf_pi3 - tbl_ceaf_pi
-	short		ceaf_pi4 - tbl_ceaf_pi
-	short		ceaf_pi5 - tbl_ceaf_pi
-	short		ceaf_pi6 - tbl_ceaf_pi
-	short		ceaf_pi7 - tbl_ceaf_pi
-
-ceaf_pi0:
-	addi.l		&0xc,EXC_DREGS+0x8(%a6)
-	rts
-ceaf_pi1:
-	addi.l		&0xc,EXC_DREGS+0xc(%a6)
-	rts
-ceaf_pi2:
-	add.l		&0xc,%a2
-	rts
-ceaf_pi3:
-	add.l		&0xc,%a3
-	rts
-ceaf_pi4:
-	add.l		&0xc,%a4
-	rts
-ceaf_pi5:
-	add.l		&0xc,%a5
-	rts
-ceaf_pi6:
-	addi.l		&0xc,EXC_A6(%a6)
-	rts
-ceaf_pi7:
-	mov.b		&mia7_flg,SPCOND_FLG(%a6)
-	addi.l		&0xc,EXC_A7(%a6)
-	rts
-
-# -(An) : extended and packed fmove out
-#	: stacked <ea> = actual <ea> + 8
-#	: "An" not updated
-ceaf_pd:
-	mov.w		(tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
-	mov.l		EXC_EA(%a6),%a0
-	sub.l		&0x8,%a0
-	sub.l		&0x8,EXC_EA(%a6)
-	jmp		(tbl_ceaf_pd.b,%pc,%d1.w*1)
-
-	swbeg		&0x8
-tbl_ceaf_pd:
-	short		ceaf_pd0 - tbl_ceaf_pd
-	short		ceaf_pd1 - tbl_ceaf_pd
-	short		ceaf_pd2 - tbl_ceaf_pd
-	short		ceaf_pd3 - tbl_ceaf_pd
-	short		ceaf_pd4 - tbl_ceaf_pd
-	short		ceaf_pd5 - tbl_ceaf_pd
-	short		ceaf_pd6 - tbl_ceaf_pd
-	short		ceaf_pd7 - tbl_ceaf_pd
-
-ceaf_pd0:
-	mov.l		%a0,EXC_DREGS+0x8(%a6)
-	rts
-ceaf_pd1:
-	mov.l		%a0,EXC_DREGS+0xc(%a6)
-	rts
-ceaf_pd2:
-	mov.l		%a0,%a2
-	rts
-ceaf_pd3:
-	mov.l		%a0,%a3
-	rts
-ceaf_pd4:
-	mov.l		%a0,%a4
-	rts
-ceaf_pd5:
-	mov.l		%a0,%a5
-	rts
-ceaf_pd6:
-	mov.l		%a0,EXC_A6(%a6)
-	rts
-ceaf_pd7:
-	mov.l		%a0,EXC_A7(%a6)
-	mov.b		&mda7_flg,SPCOND_FLG(%a6)
-	rts
-
-#
-# This table holds the offsets of the emulation routines for each individual
-# math operation relative to the address of this table. Included are
-# routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
-# this table is for the version if the 060FPSP without transcendentals.
-# The location within the table is determined by the extension bits of the
-# operation longword.
-#
-
-	swbeg		&109
-tbl_unsupp:
-	long		fin		- tbl_unsupp	# 00: fmove
-	long		fint		- tbl_unsupp	# 01: fint
-	long		tbl_unsupp	- tbl_unsupp	# 02: fsinh
-	long		fintrz		- tbl_unsupp	# 03: fintrz
-	long		fsqrt		- tbl_unsupp	# 04: fsqrt
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp	# 06: flognp1
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp	# 08: fetoxm1
-	long		tbl_unsupp	- tbl_unsupp	# 09: ftanh
-	long		tbl_unsupp	- tbl_unsupp	# 0a: fatan
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp	# 0c: fasin
-	long		tbl_unsupp	- tbl_unsupp	# 0d: fatanh
-	long		tbl_unsupp	- tbl_unsupp	# 0e: fsin
-	long		tbl_unsupp	- tbl_unsupp	# 0f: ftan
-	long		tbl_unsupp	- tbl_unsupp	# 10: fetox
-	long		tbl_unsupp	- tbl_unsupp	# 11: ftwotox
-	long		tbl_unsupp	- tbl_unsupp	# 12: ftentox
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp	# 14: flogn
-	long		tbl_unsupp	- tbl_unsupp	# 15: flog10
-	long		tbl_unsupp	- tbl_unsupp	# 16: flog2
-	long		tbl_unsupp	- tbl_unsupp
-	long		fabs		- tbl_unsupp	# 18: fabs
-	long		tbl_unsupp	- tbl_unsupp	# 19: fcosh
-	long		fneg		- tbl_unsupp	# 1a: fneg
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp	# 1c: facos
-	long		tbl_unsupp	- tbl_unsupp	# 1d: fcos
-	long		tbl_unsupp	- tbl_unsupp	# 1e: fgetexp
-	long		tbl_unsupp	- tbl_unsupp	# 1f: fgetman
-	long		fdiv		- tbl_unsupp	# 20: fdiv
-	long		tbl_unsupp	- tbl_unsupp	# 21: fmod
-	long		fadd		- tbl_unsupp	# 22: fadd
-	long		fmul		- tbl_unsupp	# 23: fmul
-	long		fsgldiv		- tbl_unsupp	# 24: fsgldiv
-	long		tbl_unsupp	- tbl_unsupp	# 25: frem
-	long		tbl_unsupp	- tbl_unsupp	# 26: fscale
-	long		fsglmul		- tbl_unsupp	# 27: fsglmul
-	long		fsub		- tbl_unsupp	# 28: fsub
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp	# 30: fsincos
-	long		tbl_unsupp	- tbl_unsupp	# 31: fsincos
-	long		tbl_unsupp	- tbl_unsupp	# 32: fsincos
-	long		tbl_unsupp	- tbl_unsupp	# 33: fsincos
-	long		tbl_unsupp	- tbl_unsupp	# 34: fsincos
-	long		tbl_unsupp	- tbl_unsupp	# 35: fsincos
-	long		tbl_unsupp	- tbl_unsupp	# 36: fsincos
-	long		tbl_unsupp	- tbl_unsupp	# 37: fsincos
-	long		fcmp		- tbl_unsupp	# 38: fcmp
-	long		tbl_unsupp	- tbl_unsupp
-	long		ftst		- tbl_unsupp	# 3a: ftst
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		fsin		- tbl_unsupp	# 40: fsmove
-	long		fssqrt		- tbl_unsupp	# 41: fssqrt
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		fdin		- tbl_unsupp	# 44: fdmove
-	long		fdsqrt		- tbl_unsupp	# 45: fdsqrt
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		fsabs		- tbl_unsupp	# 58: fsabs
-	long		tbl_unsupp	- tbl_unsupp
-	long		fsneg		- tbl_unsupp	# 5a: fsneg
-	long		tbl_unsupp	- tbl_unsupp
-	long		fdabs		- tbl_unsupp	# 5c: fdabs
-	long		tbl_unsupp	- tbl_unsupp
-	long		fdneg		- tbl_unsupp	# 5e: fdneg
-	long		tbl_unsupp	- tbl_unsupp
-	long		fsdiv		- tbl_unsupp	# 60: fsdiv
-	long		tbl_unsupp	- tbl_unsupp
-	long		fsadd		- tbl_unsupp	# 62: fsadd
-	long		fsmul		- tbl_unsupp	# 63: fsmul
-	long		fddiv		- tbl_unsupp	# 64: fddiv
-	long		tbl_unsupp	- tbl_unsupp
-	long		fdadd		- tbl_unsupp	# 66: fdadd
-	long		fdmul		- tbl_unsupp	# 67: fdmul
-	long		fssub		- tbl_unsupp	# 68: fssub
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		tbl_unsupp	- tbl_unsupp
-	long		fdsub		- tbl_unsupp	# 6c: fdsub
-
-#################################################
-# Add this here so non-fp modules can compile.
-# (smovcr is called from fpsp_inex.)
-	global		smovcr
-smovcr:
-	bra.b		smovcr
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fmovm_dynamic(): emulate "fmovm" dynamic instruction		#
-#									#
-# XREF ****************************************************************	#
-#	fetch_dreg() - fetch data register				#
-#	{i,d,}mem_read() - fetch data from memory			#
-#	_mem_write() - write data to memory				#
-#	iea_iacc() - instruction memory access error occurred		#
-#	iea_dacc() - data memory access error occurred			#
-#	restore() - restore An index regs if access error occurred	#
-#									#
-# INPUT ***************************************************************	#
-#	None								#
-#									#
-# OUTPUT **************************************************************	#
-#	If instr is "fmovm Dn,-(A7)" from supervisor mode,		#
-#		d0 = size of dump					#
-#		d1 = Dn							#
-#	Else if instruction access error,				#
-#		d0 = FSLW						#
-#	Else if data access error,					#
-#		d0 = FSLW						#
-#		a0 = address of fault					#
-#	Else								#
-#		none.							#
-#									#
-# ALGORITHM ***********************************************************	#
-#	The effective address must be calculated since this is entered	#
-# from an "Unimplemented Effective Address" exception handler. So, we	#
-# have our own fcalc_ea() routine here. If an access error is flagged	#
-# by a _{i,d,}mem_read() call, we must exit through the special		#
-# handler.								#
-#	The data register is determined and its value loaded to get the	#
-# string of FP registers affected. This value is used as an index into	#
-# a lookup table such that we can determine the number of bytes		#
-# involved.								#
-#	If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used	#
-# to read in all FP values. Again, _mem_read() may fail and require a	#
-# special exit.								#
-#	If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used	#
-# to write all FP values. _mem_write() may also fail.			#
-#	If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,	#
-# then we return the size of the dump and the string to the caller	#
-# so that the move can occur outside of this routine. This special	#
-# case is required so that moves to the system stack are handled	#
-# correctly.								#
-#									#
-# DYNAMIC:								#
-#	fmovm.x	dn, <ea>						#
-#	fmovm.x	<ea>, dn						#
-#									#
-#	      <WORD 1>		      <WORD2>				#
-#	1111 0010 00 |<ea>|	11@& 1000 0$$$ 0000			#
-#									#
-#	& = (0): predecrement addressing mode				#
-#	    (1): postincrement or control addressing mode		#
-#	@ = (0): move listed regs from memory to the FPU		#
-#	    (1): move listed regs from the FPU to memory		#
-#	$$$    : index of data register holding reg select mask		#
-#									#
-# NOTES:								#
-#	If the data register holds a zero, then the			#
-#	instruction is a nop.						#
-#									#
-#########################################################################
-
-	global		fmovm_dynamic
-fmovm_dynamic:
-
-# extract the data register in which the bit string resides...
-	mov.b		1+EXC_EXTWORD(%a6),%d1	# fetch extword
-	andi.w		&0x70,%d1		# extract reg bits
-	lsr.b		&0x4,%d1		# shift into lo bits
-
-# fetch the bit string into d0...
-	bsr.l		fetch_dreg		# fetch reg string
-
-	andi.l		&0x000000ff,%d0		# keep only lo byte
-
-	mov.l		%d0,-(%sp)		# save strg
-	mov.b		(tbl_fmovm_size.w,%pc,%d0),%d0
-	mov.l		%d0,-(%sp)		# save size
-	bsr.l		fmovm_calc_ea		# calculate <ea>
-	mov.l		(%sp)+,%d0		# restore size
-	mov.l		(%sp)+,%d1		# restore strg
-
-# if the bit string is a zero, then the operation is a no-op
-# but, make sure that we've calculated ea and advanced the opword pointer
-	beq.w		fmovm_data_done
-
-# separate move ins from move outs...
-	btst		&0x5,EXC_EXTWORD(%a6)	# is it a move in or out?
-	beq.w		fmovm_data_in		# it's a move out
-
-#############
-# MOVE OUT: #
-#############
-fmovm_data_out:
-	btst		&0x4,EXC_EXTWORD(%a6)	# control or predecrement?
-	bne.w		fmovm_out_ctrl		# control
-
-############################
-fmovm_out_predec:
-# for predecrement mode, the bit string is the opposite of both control
-# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
-# here, we convert it to be just like the others...
-	mov.b		(tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
-
-	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
-	beq.b		fmovm_out_ctrl		# user
-
-fmovm_out_predec_s:
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
-	bne.b		fmovm_out_ctrl
-
-# the operation was unfortunately an: fmovm.x dn,-(sp)
-# called from supervisor mode.
-# we're also passing "size" and "strg" back to the calling routine
-	rts
-
-############################
-fmovm_out_ctrl:
-	mov.l		%a0,%a1			# move <ea> to a1
-
-	sub.l		%d0,%sp			# subtract size of dump
-	lea		(%sp),%a0
-
-	tst.b		%d1			# should FP0 be moved?
-	bpl.b		fmovm_out_ctrl_fp1	# no
-
-	mov.l		0x0+EXC_FP0(%a6),(%a0)+	# yes
-	mov.l		0x4+EXC_FP0(%a6),(%a0)+
-	mov.l		0x8+EXC_FP0(%a6),(%a0)+
-
-fmovm_out_ctrl_fp1:
-	lsl.b		&0x1,%d1		# should FP1 be moved?
-	bpl.b		fmovm_out_ctrl_fp2	# no
-
-	mov.l		0x0+EXC_FP1(%a6),(%a0)+	# yes
-	mov.l		0x4+EXC_FP1(%a6),(%a0)+
-	mov.l		0x8+EXC_FP1(%a6),(%a0)+
-
-fmovm_out_ctrl_fp2:
-	lsl.b		&0x1,%d1		# should FP2 be moved?
-	bpl.b		fmovm_out_ctrl_fp3	# no
-
-	fmovm.x		&0x20,(%a0)		# yes
-	add.l		&0xc,%a0
-
-fmovm_out_ctrl_fp3:
-	lsl.b		&0x1,%d1		# should FP3 be moved?
-	bpl.b		fmovm_out_ctrl_fp4	# no
-
-	fmovm.x		&0x10,(%a0)		# yes
-	add.l		&0xc,%a0
-
-fmovm_out_ctrl_fp4:
-	lsl.b		&0x1,%d1		# should FP4 be moved?
-	bpl.b		fmovm_out_ctrl_fp5	# no
-
-	fmovm.x		&0x08,(%a0)		# yes
-	add.l		&0xc,%a0
-
-fmovm_out_ctrl_fp5:
-	lsl.b		&0x1,%d1		# should FP5 be moved?
-	bpl.b		fmovm_out_ctrl_fp6	# no
-
-	fmovm.x		&0x04,(%a0)		# yes
-	add.l		&0xc,%a0
-
-fmovm_out_ctrl_fp6:
-	lsl.b		&0x1,%d1		# should FP6 be moved?
-	bpl.b		fmovm_out_ctrl_fp7	# no
-
-	fmovm.x		&0x02,(%a0)		# yes
-	add.l		&0xc,%a0
-
-fmovm_out_ctrl_fp7:
-	lsl.b		&0x1,%d1		# should FP7 be moved?
-	bpl.b		fmovm_out_ctrl_done	# no
-
-	fmovm.x		&0x01,(%a0)		# yes
-	add.l		&0xc,%a0
-
-fmovm_out_ctrl_done:
-	mov.l		%a1,L_SCR1(%a6)
-
-	lea		(%sp),%a0		# pass: supervisor src
-	mov.l		%d0,-(%sp)		# save size
-	bsr.l		_dmem_write		# copy data to user mem
-
-	mov.l		(%sp)+,%d0
-	add.l		%d0,%sp			# clear fpreg data from stack
-
-	tst.l		%d1			# did dstore err?
-	bne.w		fmovm_out_err		# yes
-
-	rts
-
-############
-# MOVE IN: #
-############
-fmovm_data_in:
-	mov.l		%a0,L_SCR1(%a6)
-
-	sub.l		%d0,%sp			# make room for fpregs
-	lea		(%sp),%a1
-
-	mov.l		%d1,-(%sp)		# save bit string for later
-	mov.l		%d0,-(%sp)		# save # of bytes
-
-	bsr.l		_dmem_read		# copy data from user mem
-
-	mov.l		(%sp)+,%d0		# retrieve # of bytes
-
-	tst.l		%d1			# did dfetch fail?
-	bne.w		fmovm_in_err		# yes
-
-	mov.l		(%sp)+,%d1		# load bit string
-
-	lea		(%sp),%a0		# addr of stack
-
-	tst.b		%d1			# should FP0 be moved?
-	bpl.b		fmovm_data_in_fp1	# no
-
-	mov.l		(%a0)+,0x0+EXC_FP0(%a6)	# yes
-	mov.l		(%a0)+,0x4+EXC_FP0(%a6)
-	mov.l		(%a0)+,0x8+EXC_FP0(%a6)
-
-fmovm_data_in_fp1:
-	lsl.b		&0x1,%d1		# should FP1 be moved?
-	bpl.b		fmovm_data_in_fp2	# no
-
-	mov.l		(%a0)+,0x0+EXC_FP1(%a6)	# yes
-	mov.l		(%a0)+,0x4+EXC_FP1(%a6)
-	mov.l		(%a0)+,0x8+EXC_FP1(%a6)
-
-fmovm_data_in_fp2:
-	lsl.b		&0x1,%d1		# should FP2 be moved?
-	bpl.b		fmovm_data_in_fp3	# no
-
-	fmovm.x		(%a0)+,&0x20		# yes
-
-fmovm_data_in_fp3:
-	lsl.b		&0x1,%d1		# should FP3 be moved?
-	bpl.b		fmovm_data_in_fp4	# no
-
-	fmovm.x		(%a0)+,&0x10		# yes
-
-fmovm_data_in_fp4:
-	lsl.b		&0x1,%d1		# should FP4 be moved?
-	bpl.b		fmovm_data_in_fp5	# no
-
-	fmovm.x		(%a0)+,&0x08		# yes
-
-fmovm_data_in_fp5:
-	lsl.b		&0x1,%d1		# should FP5 be moved?
-	bpl.b		fmovm_data_in_fp6	# no
-
-	fmovm.x		(%a0)+,&0x04		# yes
-
-fmovm_data_in_fp6:
-	lsl.b		&0x1,%d1		# should FP6 be moved?
-	bpl.b		fmovm_data_in_fp7	# no
-
-	fmovm.x		(%a0)+,&0x02		# yes
-
-fmovm_data_in_fp7:
-	lsl.b		&0x1,%d1		# should FP7 be moved?
-	bpl.b		fmovm_data_in_done	# no
-
-	fmovm.x		(%a0)+,&0x01		# yes
-
-fmovm_data_in_done:
-	add.l		%d0,%sp			# remove fpregs from stack
-	rts
-
-#####################################
-
-fmovm_data_done:
-	rts
-
-##############################################################################
-
-#
-# table indexed by the operation's bit string that gives the number
-# of bytes that will be moved.
-#
-# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
-#
-tbl_fmovm_size:
-	byte	0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
-	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
-	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
-	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
-	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
-	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
-	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
-	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
-	byte	0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
-
-#
-# table to convert a pre-decrement bit string into a post-increment
-# or control bit string.
-# ex:	0x00	==>	0x00
-#	0x01	==>	0x80
-#	0x02	==>	0x40
-#		.
-#		.
-#	0xfd	==>	0xbf
-#	0xfe	==>	0x7f
-#	0xff	==>	0xff
-#
-tbl_fmovm_convert:
-	byte	0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
-	byte	0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
-	byte	0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
-	byte	0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
-	byte	0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
-	byte	0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
-	byte	0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
-	byte	0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
-	byte	0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
-	byte	0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
-	byte	0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
-	byte	0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
-	byte	0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
-	byte	0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
-	byte	0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
-	byte	0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
-	byte	0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
-	byte	0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
-	byte	0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
-	byte	0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
-	byte	0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
-	byte	0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
-	byte	0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
-	byte	0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
-	byte	0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
-	byte	0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
-	byte	0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
-	byte	0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
-	byte	0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
-	byte	0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
-	byte	0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
-	byte	0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
-
-	global		fmovm_calc_ea
-###############################################
-# _fmovm_calc_ea: calculate effective address #
-###############################################
-fmovm_calc_ea:
-	mov.l		%d0,%a0			# move # bytes to a0
-
-# currently, MODE and REG are taken from the EXC_OPWORD. this could be
-# easily changed if they were inputs passed in registers.
-	mov.w		EXC_OPWORD(%a6),%d0	# fetch opcode word
-	mov.w		%d0,%d1			# make a copy
-
-	andi.w		&0x3f,%d0		# extract mode field
-	andi.l		&0x7,%d1		# extract reg  field
-
-# jump to the corresponding function for each {MODE,REG} pair.
-	mov.w		(tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
-	jmp		(tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
-
-	swbeg		&64
-tbl_fea_mode:
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-
-	short		faddr_ind_a0	-	tbl_fea_mode
-	short		faddr_ind_a1	-	tbl_fea_mode
-	short		faddr_ind_a2	-	tbl_fea_mode
-	short		faddr_ind_a3	-	tbl_fea_mode
-	short		faddr_ind_a4	-	tbl_fea_mode
-	short		faddr_ind_a5	-	tbl_fea_mode
-	short		faddr_ind_a6	-	tbl_fea_mode
-	short		faddr_ind_a7	-	tbl_fea_mode
-
-	short		faddr_ind_p_a0	-	tbl_fea_mode
-	short		faddr_ind_p_a1	-	tbl_fea_mode
-	short		faddr_ind_p_a2	-	tbl_fea_mode
-	short		faddr_ind_p_a3	-	tbl_fea_mode
-	short		faddr_ind_p_a4	-	tbl_fea_mode
-	short		faddr_ind_p_a5	-	tbl_fea_mode
-	short		faddr_ind_p_a6	-	tbl_fea_mode
-	short		faddr_ind_p_a7	-	tbl_fea_mode
-
-	short		faddr_ind_m_a0	-	tbl_fea_mode
-	short		faddr_ind_m_a1	-	tbl_fea_mode
-	short		faddr_ind_m_a2	-	tbl_fea_mode
-	short		faddr_ind_m_a3	-	tbl_fea_mode
-	short		faddr_ind_m_a4	-	tbl_fea_mode
-	short		faddr_ind_m_a5	-	tbl_fea_mode
-	short		faddr_ind_m_a6	-	tbl_fea_mode
-	short		faddr_ind_m_a7	-	tbl_fea_mode
-
-	short		faddr_ind_disp_a0	-	tbl_fea_mode
-	short		faddr_ind_disp_a1	-	tbl_fea_mode
-	short		faddr_ind_disp_a2	-	tbl_fea_mode
-	short		faddr_ind_disp_a3	-	tbl_fea_mode
-	short		faddr_ind_disp_a4	-	tbl_fea_mode
-	short		faddr_ind_disp_a5	-	tbl_fea_mode
-	short		faddr_ind_disp_a6	-	tbl_fea_mode
-	short		faddr_ind_disp_a7	-	tbl_fea_mode
-
-	short		faddr_ind_ext	-	tbl_fea_mode
-	short		faddr_ind_ext	-	tbl_fea_mode
-	short		faddr_ind_ext	-	tbl_fea_mode
-	short		faddr_ind_ext	-	tbl_fea_mode
-	short		faddr_ind_ext	-	tbl_fea_mode
-	short		faddr_ind_ext	-	tbl_fea_mode
-	short		faddr_ind_ext	-	tbl_fea_mode
-	short		faddr_ind_ext	-	tbl_fea_mode
-
-	short		fabs_short	-	tbl_fea_mode
-	short		fabs_long	-	tbl_fea_mode
-	short		fpc_ind		-	tbl_fea_mode
-	short		fpc_ind_ext	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-	short		tbl_fea_mode	-	tbl_fea_mode
-
-###################################
-# Address register indirect: (An) #
-###################################
-faddr_ind_a0:
-	mov.l		EXC_DREGS+0x8(%a6),%a0	# Get current a0
-	rts
-
-faddr_ind_a1:
-	mov.l		EXC_DREGS+0xc(%a6),%a0	# Get current a1
-	rts
-
-faddr_ind_a2:
-	mov.l		%a2,%a0			# Get current a2
-	rts
-
-faddr_ind_a3:
-	mov.l		%a3,%a0			# Get current a3
-	rts
-
-faddr_ind_a4:
-	mov.l		%a4,%a0			# Get current a4
-	rts
-
-faddr_ind_a5:
-	mov.l		%a5,%a0			# Get current a5
-	rts
-
-faddr_ind_a6:
-	mov.l		(%a6),%a0		# Get current a6
-	rts
-
-faddr_ind_a7:
-	mov.l		EXC_A7(%a6),%a0		# Get current a7
-	rts
-
-#####################################################
-# Address register indirect w/ postincrement: (An)+ #
-#####################################################
-faddr_ind_p_a0:
-	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
-	mov.l		%d0,%d1
-	add.l		%a0,%d1			# Increment
-	mov.l		%d1,EXC_DREGS+0x8(%a6)	# Save incr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_p_a1:
-	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
-	mov.l		%d0,%d1
-	add.l		%a0,%d1			# Increment
-	mov.l		%d1,EXC_DREGS+0xc(%a6)	# Save incr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_p_a2:
-	mov.l		%a2,%d0			# Get current a2
-	mov.l		%d0,%d1
-	add.l		%a0,%d1			# Increment
-	mov.l		%d1,%a2			# Save incr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_p_a3:
-	mov.l		%a3,%d0			# Get current a3
-	mov.l		%d0,%d1
-	add.l		%a0,%d1			# Increment
-	mov.l		%d1,%a3			# Save incr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_p_a4:
-	mov.l		%a4,%d0			# Get current a4
-	mov.l		%d0,%d1
-	add.l		%a0,%d1			# Increment
-	mov.l		%d1,%a4			# Save incr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_p_a5:
-	mov.l		%a5,%d0			# Get current a5
-	mov.l		%d0,%d1
-	add.l		%a0,%d1			# Increment
-	mov.l		%d1,%a5			# Save incr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_p_a6:
-	mov.l		(%a6),%d0		# Get current a6
-	mov.l		%d0,%d1
-	add.l		%a0,%d1			# Increment
-	mov.l		%d1,(%a6)		# Save incr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_p_a7:
-	mov.b		&mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
-
-	mov.l		EXC_A7(%a6),%d0		# Get current a7
-	mov.l		%d0,%d1
-	add.l		%a0,%d1			# Increment
-	mov.l		%d1,EXC_A7(%a6)		# Save incr value
-	mov.l		%d0,%a0
-	rts
-
-####################################################
-# Address register indirect w/ predecrement: -(An) #
-####################################################
-faddr_ind_m_a0:
-	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,EXC_DREGS+0x8(%a6)	# Save decr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_m_a1:
-	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,EXC_DREGS+0xc(%a6)	# Save decr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_m_a2:
-	mov.l		%a2,%d0			# Get current a2
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,%a2			# Save decr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_m_a3:
-	mov.l		%a3,%d0			# Get current a3
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,%a3			# Save decr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_m_a4:
-	mov.l		%a4,%d0			# Get current a4
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,%a4			# Save decr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_m_a5:
-	mov.l		%a5,%d0			# Get current a5
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,%a5			# Save decr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_m_a6:
-	mov.l		(%a6),%d0		# Get current a6
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,(%a6)		# Save decr value
-	mov.l		%d0,%a0
-	rts
-
-faddr_ind_m_a7:
-	mov.b		&mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
-
-	mov.l		EXC_A7(%a6),%d0		# Get current a7
-	sub.l		%a0,%d0			# Decrement
-	mov.l		%d0,EXC_A7(%a6)		# Save decr value
-	mov.l		%d0,%a0
-	rts
-
-########################################################
-# Address register indirect w/ displacement: (d16, An) #
-########################################################
-faddr_ind_disp_a0:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		EXC_DREGS+0x8(%a6),%a0	# a0 + d16
-	rts
-
-faddr_ind_disp_a1:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		EXC_DREGS+0xc(%a6),%a0	# a1 + d16
-	rts
-
-faddr_ind_disp_a2:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		%a2,%a0			# a2 + d16
-	rts
-
-faddr_ind_disp_a3:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		%a3,%a0			# a3 + d16
-	rts
-
-faddr_ind_disp_a4:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		%a4,%a0			# a4 + d16
-	rts
-
-faddr_ind_disp_a5:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		%a5,%a0			# a5 + d16
-	rts
-
-faddr_ind_disp_a6:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		(%a6),%a0		# a6 + d16
-	rts
-
-faddr_ind_disp_a7:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		EXC_A7(%a6),%a0		# a7 + d16
-	rts
-
-########################################################################
-# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
-#    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
-# Memory indirect postindexed: ([bd, An], Xn, od)		       #
-# Memory indirect preindexed: ([bd, An, Xn], od)		       #
-########################################################################
-faddr_ind_ext:
-	addq.l		&0x8,%d1
-	bsr.l		fetch_dreg		# fetch base areg
-	mov.l		%d0,-(%sp)
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word		# fetch extword in d0
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		(%sp)+,%a0
-
-	btst		&0x8,%d0
-	bne.w		fcalc_mem_ind
-
-	mov.l		%d0,L_SCR1(%a6)		# hold opword
-
-	mov.l		%d0,%d1
-	rol.w		&0x4,%d1
-	andi.w		&0xf,%d1		# extract index regno
-
-# count on fetch_dreg() not to alter a0...
-	bsr.l		fetch_dreg		# fetch index
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.l		L_SCR1(%a6),%d2		# fetch opword
-
-	btst		&0xb,%d2		# is it word or long?
-	bne.b		faii8_long
-	ext.l		%d0			# sign extend word index
-faii8_long:
-	mov.l		%d2,%d1
-	rol.w		&0x7,%d1
-	andi.l		&0x3,%d1		# extract scale value
-
-	lsl.l		%d1,%d0			# shift index by scale
-
-	extb.l		%d2			# sign extend displacement
-	add.l		%d2,%d0			# index + disp
-	add.l		%d0,%a0			# An + (index + disp)
-
-	mov.l		(%sp)+,%d2		# restore old d2
-	rts
-
-###########################
-# Absolute short: (XXX).W #
-###########################
-fabs_short:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word		# fetch short address
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# return <ea> in a0
-	rts
-
-##########################
-# Absolute long: (XXX).L #
-##########################
-fabs_long:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch long address
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,%a0			# return <ea> in a0
-	rts
-
-#######################################################
-# Program counter indirect w/ displacement: (d16, PC) #
-#######################################################
-fpc_ind:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word		# fetch word displacement
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.w		%d0,%a0			# sign extend displacement
-
-	add.l		EXC_EXTWPTR(%a6),%a0	# pc + d16
-
-# _imem_read_word() increased the extwptr by 2. need to adjust here.
-	subq.l		&0x2,%a0		# adjust <ea>
-	rts
-
-##########################################################
-# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
-# "     "     w/   "  (base displacement): (bd, PC, An)  #
-# PC memory indirect postindexed: ([bd, PC], Xn, od)     #
-# PC memory indirect preindexed: ([bd, PC, Xn], od)      #
-##########################################################
-fpc_ind_ext:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word		# fetch ext word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# put base in a0
-	subq.l		&0x2,%a0		# adjust base
-
-	btst		&0x8,%d0		# is disp only 8 bits?
-	bne.w		fcalc_mem_ind		# calc memory indirect
-
-	mov.l		%d0,L_SCR1(%a6)		# store opword
-
-	mov.l		%d0,%d1			# make extword copy
-	rol.w		&0x4,%d1		# rotate reg num into place
-	andi.w		&0xf,%d1		# extract register number
-
-# count on fetch_dreg() not to alter a0...
-	bsr.l		fetch_dreg		# fetch index
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.l		L_SCR1(%a6),%d2		# fetch opword
-
-	btst		&0xb,%d2		# is index word or long?
-	bne.b		fpii8_long		# long
-	ext.l		%d0			# sign extend word index
-fpii8_long:
-	mov.l		%d2,%d1
-	rol.w		&0x7,%d1		# rotate scale value into place
-	andi.l		&0x3,%d1		# extract scale value
-
-	lsl.l		%d1,%d0			# shift index by scale
-
-	extb.l		%d2			# sign extend displacement
-	add.l		%d2,%d0			# disp + index
-	add.l		%d0,%a0			# An + (index + disp)
-
-	mov.l		(%sp)+,%d2		# restore temp register
-	rts
-
-# d2 = index
-# d3 = base
-# d4 = od
-# d5 = extword
-fcalc_mem_ind:
-	btst		&0x6,%d0		# is the index suppressed?
-	beq.b		fcalc_index
-
-	movm.l		&0x3c00,-(%sp)		# save d2-d5
-
-	mov.l		%d0,%d5			# put extword in d5
-	mov.l		%a0,%d3			# put base in d3
-
-	clr.l		%d2			# yes, so index = 0
-	bra.b		fbase_supp_ck
-
-# index:
-fcalc_index:
-	mov.l		%d0,L_SCR1(%a6)		# save d0 (opword)
-	bfextu		%d0{&16:&4},%d1		# fetch dreg index
-	bsr.l		fetch_dreg
-
-	movm.l		&0x3c00,-(%sp)		# save d2-d5
-	mov.l		%d0,%d2			# put index in d2
-	mov.l		L_SCR1(%a6),%d5
-	mov.l		%a0,%d3
-
-	btst		&0xb,%d5		# is index word or long?
-	bne.b		fno_ext
-	ext.l		%d2
-
-fno_ext:
-	bfextu		%d5{&21:&2},%d0
-	lsl.l		%d0,%d2
-
-# base address (passed as parameter in d3):
-# we clear the value here if it should actually be suppressed.
-fbase_supp_ck:
-	btst		&0x7,%d5		# is the bd suppressed?
-	beq.b		fno_base_sup
-	clr.l		%d3
-
-# base displacement:
-fno_base_sup:
-	bfextu		%d5{&26:&2},%d0		# get bd size
-#	beq.l		fmovm_error		# if (size == 0) it's reserved
-
-	cmpi.b		%d0,&0x2
-	blt.b		fno_bd
-	beq.b		fget_word_bd
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		fcea_iacc		# yes
-
-	bra.b		fchk_ind
-
-fget_word_bd:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		fcea_iacc		# yes
-
-	ext.l		%d0			# sign extend bd
-
-fchk_ind:
-	add.l		%d0,%d3			# base += bd
-
-# outer displacement:
-fno_bd:
-	bfextu		%d5{&30:&2},%d0		# is od suppressed?
-	beq.w		faii_bd
-
-	cmpi.b		%d0,&0x2
-	blt.b		fnull_od
-	beq.b		fword_od
-
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		fcea_iacc		# yes
-
-	bra.b		fadd_them
-
-fword_od:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_word
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		fcea_iacc		# yes
-
-	ext.l		%d0			# sign extend od
-	bra.b		fadd_them
-
-fnull_od:
-	clr.l		%d0
-
-fadd_them:
-	mov.l		%d0,%d4
-
-	btst		&0x2,%d5		# pre or post indexing?
-	beq.b		fpre_indexed
-
-	mov.l		%d3,%a0
-	bsr.l		_dmem_read_long
-
-	tst.l		%d1			# did dfetch fail?
-	bne.w		fcea_err		# yes
-
-	add.l		%d2,%d0			# <ea> += index
-	add.l		%d4,%d0			# <ea> += od
-	bra.b		fdone_ea
-
-fpre_indexed:
-	add.l		%d2,%d3			# preindexing
-	mov.l		%d3,%a0
-	bsr.l		_dmem_read_long
-
-	tst.l		%d1			# did dfetch fail?
-	bne.w		fcea_err		# yes
-
-	add.l		%d4,%d0			# ea += od
-	bra.b		fdone_ea
-
-faii_bd:
-	add.l		%d2,%d3			# ea = (base + bd) + index
-	mov.l		%d3,%d0
-fdone_ea:
-	mov.l		%d0,%a0
-
-	movm.l		(%sp)+,&0x003c		# restore d2-d5
-	rts
-
-#########################################################
-fcea_err:
-	mov.l		%d3,%a0
-
-	movm.l		(%sp)+,&0x003c		# restore d2-d5
-	mov.w		&0x0101,%d0
-	bra.l		iea_dacc
-
-fcea_iacc:
-	movm.l		(%sp)+,&0x003c		# restore d2-d5
-	bra.l		iea_iacc
-
-fmovm_out_err:
-	bsr.l		restore
-	mov.w		&0x00e1,%d0
-	bra.b		fmovm_err
-
-fmovm_in_err:
-	bsr.l		restore
-	mov.w		&0x0161,%d0
-
-fmovm_err:
-	mov.l		L_SCR1(%a6),%a0
-	bra.l		iea_dacc
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fmovm_ctrl(): emulate fmovm.l of control registers instr	#
-#									#
-# XREF ****************************************************************	#
-#	_imem_read_long() - read longword from memory			#
-#	iea_iacc() - _imem_read_long() failed; error recovery		#
-#									#
-# INPUT ***************************************************************	#
-#	None								#
-#									#
-# OUTPUT **************************************************************	#
-#	If _imem_read_long() doesn't fail:				#
-#		USER_FPCR(a6)  = new FPCR value				#
-#		USER_FPSR(a6)  = new FPSR value				#
-#		USER_FPIAR(a6) = new FPIAR value			#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Decode the instruction type by looking at the extension word	#
-# in order to see how many control registers to fetch from memory.	#
-# Fetch them using _imem_read_long(). If this fetch fails, exit through	#
-# the special access error exit handler iea_iacc().			#
-#									#
-# Instruction word decoding:						#
-#									#
-#	fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}				#
-#									#
-#		WORD1			WORD2				#
-#	1111 0010 00 111100	100$ $$00 0000 0000			#
-#									#
-#	$$$ (100): FPCR							#
-#	    (010): FPSR							#
-#	    (001): FPIAR						#
-#	    (000): FPIAR						#
-#									#
-#########################################################################
-
-	global		fmovm_ctrl
-fmovm_ctrl:
-	mov.b		EXC_EXTWORD(%a6),%d0	# fetch reg select bits
-	cmpi.b		%d0,&0x9c		# fpcr & fpsr & fpiar ?
-	beq.w		fctrl_in_7		# yes
-	cmpi.b		%d0,&0x98		# fpcr & fpsr ?
-	beq.w		fctrl_in_6		# yes
-	cmpi.b		%d0,&0x94		# fpcr & fpiar ?
-	beq.b		fctrl_in_5		# yes
-
-# fmovem.l #<data>, fpsr/fpiar
-fctrl_in_3:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPSR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to stack
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPIAR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
-	rts
-
-# fmovem.l #<data>, fpcr/fpiar
-fctrl_in_5:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPCR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to stack
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPIAR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
-	rts
-
-# fmovem.l #<data>, fpcr/fpsr
-fctrl_in_6:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPCR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPSR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
-	rts
-
-# fmovem.l #<data>, fpcr/fpsr/fpiar
-fctrl_in_7:
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPCR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPSR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
-	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
-	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
-	bsr.l		_imem_read_long		# fetch FPIAR from mem
-
-	tst.l		%d1			# did ifetch fail?
-	bne.l		iea_iacc		# yes
-
-	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to mem
-	rts
-
-##########################################################################
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	addsub_scaler2(): scale inputs to fadd/fsub such that no	#
-#			  OVFL/UNFL exceptions will result		#
-#									#
-# XREF ****************************************************************	#
-#	norm() - normalize mantissa after adjusting exponent		#
-#									#
-# INPUT ***************************************************************	#
-#	FP_SRC(a6) = fp op1(src)					#
-#	FP_DST(a6) = fp op2(dst)					#
-#									#
-# OUTPUT **************************************************************	#
-#	FP_SRC(a6) = fp op1 scaled(src)					#
-#	FP_DST(a6) = fp op2 scaled(dst)					#
-#	d0         = scale amount					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	If the DST exponent is > the SRC exponent, set the DST exponent	#
-# equal to 0x3fff and scale the SRC exponent by the value that the	#
-# DST exponent was scaled by. If the SRC exponent is greater or equal,	#
-# do the opposite. Return this scale factor in d0.			#
-#	If the two exponents differ by > the number of mantissa bits	#
-# plus two, then set the smallest exponent to a very small value as a	#
-# quick shortcut.							#
-#									#
-#########################################################################
-
-	global		addsub_scaler2
-addsub_scaler2:
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
-	mov.w		SRC_EX(%a0),%d0
-	mov.w		DST_EX(%a1),%d1
-	mov.w		%d0,FP_SCR0_EX(%a6)
-	mov.w		%d1,FP_SCR1_EX(%a6)
-
-	andi.w		&0x7fff,%d0
-	andi.w		&0x7fff,%d1
-	mov.w		%d0,L_SCR1(%a6)		# store src exponent
-	mov.w		%d1,2+L_SCR1(%a6)	# store dst exponent
-
-	cmp.w		%d0, %d1		# is src exp >= dst exp?
-	bge.l		src_exp_ge2
-
-# dst exp is >  src exp; scale dst to exp = 0x3fff
-dst_exp_gt2:
-	bsr.l		scale_to_zero_dst
-	mov.l		%d0,-(%sp)		# save scale factor
-
-	cmpi.b		STAG(%a6),&DENORM	# is dst denormalized?
-	bne.b		cmpexp12
-
-	lea		FP_SCR0(%a6),%a0
-	bsr.l		norm			# normalize the denorm; result is new exp
-	neg.w		%d0			# new exp = -(shft val)
-	mov.w		%d0,L_SCR1(%a6)		# inset new exp
-
-cmpexp12:
-	mov.w		2+L_SCR1(%a6),%d0
-	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
-
-	cmp.w		%d0,L_SCR1(%a6)		# is difference >= len(mantissa)+2?
-	bge.b		quick_scale12
-
-	mov.w		L_SCR1(%a6),%d0
-	add.w		0x2(%sp),%d0		# scale src exponent by scale factor
-	mov.w		FP_SCR0_EX(%a6),%d1
-	and.w		&0x8000,%d1
-	or.w		%d1,%d0			# concat {sgn,new exp}
-	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new dst exponent
-
-	mov.l		(%sp)+,%d0		# return SCALE factor
-	rts
-
-quick_scale12:
-	andi.w		&0x8000,FP_SCR0_EX(%a6)	# zero src exponent
-	bset		&0x0,1+FP_SCR0_EX(%a6)	# set exp = 1
-
-	mov.l		(%sp)+,%d0		# return SCALE factor
-	rts
-
-# src exp is >= dst exp; scale src to exp = 0x3fff
-src_exp_ge2:
-	bsr.l		scale_to_zero_src
-	mov.l		%d0,-(%sp)		# save scale factor
-
-	cmpi.b		DTAG(%a6),&DENORM	# is dst denormalized?
-	bne.b		cmpexp22
-	lea		FP_SCR1(%a6),%a0
-	bsr.l		norm			# normalize the denorm; result is new exp
-	neg.w		%d0			# new exp = -(shft val)
-	mov.w		%d0,2+L_SCR1(%a6)	# inset new exp
-
-cmpexp22:
-	mov.w		L_SCR1(%a6),%d0
-	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
-
-	cmp.w		%d0,2+L_SCR1(%a6)	# is difference >= len(mantissa)+2?
-	bge.b		quick_scale22
-
-	mov.w		2+L_SCR1(%a6),%d0
-	add.w		0x2(%sp),%d0		# scale dst exponent by scale factor
-	mov.w		FP_SCR1_EX(%a6),%d1
-	andi.w		&0x8000,%d1
-	or.w		%d1,%d0			# concat {sgn,new exp}
-	mov.w		%d0,FP_SCR1_EX(%a6)	# insert new dst exponent
-
-	mov.l		(%sp)+,%d0		# return SCALE factor
-	rts
-
-quick_scale22:
-	andi.w		&0x8000,FP_SCR1_EX(%a6)	# zero dst exponent
-	bset		&0x0,1+FP_SCR1_EX(%a6)	# set exp = 1
-
-	mov.l		(%sp)+,%d0		# return SCALE factor
-	rts
-
-##########################################################################
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	scale_to_zero_src(): scale the exponent of extended precision	#
-#			     value at FP_SCR0(a6).			#
-#									#
-# XREF ****************************************************************	#
-#	norm() - normalize the mantissa if the operand was a DENORM	#
-#									#
-# INPUT ***************************************************************	#
-#	FP_SCR0(a6) = extended precision operand to be scaled		#
-#									#
-# OUTPUT **************************************************************	#
-#	FP_SCR0(a6) = scaled extended precision operand			#
-#	d0	    = scale value					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Set the exponent of the input operand to 0x3fff. Save the value	#
-# of the difference between the original and new exponent. Then,	#
-# normalize the operand if it was a DENORM. Add this normalization	#
-# value to the previous value. Return the result.			#
-#									#
-#########################################################################
-
-	global		scale_to_zero_src
-scale_to_zero_src:
-	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
-	mov.w		%d1,%d0			# make a copy
-
-	andi.l		&0x7fff,%d1		# extract operand's exponent
-
-	andi.w		&0x8000,%d0		# extract operand's sgn
-	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
-
-	mov.w		%d0,FP_SCR0_EX(%a6)	# insert biased exponent
-
-	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
-	beq.b		stzs_denorm		# normalize the DENORM
-
-stzs_norm:
-	mov.l		&0x3fff,%d0
-	sub.l		%d1,%d0			# scale = BIAS + (-exp)
-
-	rts
-
-stzs_denorm:
-	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
-	bsr.l		norm			# normalize denorm
-	neg.l		%d0			# new exponent = -(shft val)
-	mov.l		%d0,%d1			# prepare for op_norm call
-	bra.b		stzs_norm		# finish scaling
-
-###
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	scale_sqrt(): scale the input operand exponent so a subsequent	#
-#		      fsqrt operation won't take an exception.		#
-#									#
-# XREF ****************************************************************	#
-#	norm() - normalize the mantissa if the operand was a DENORM	#
-#									#
-# INPUT ***************************************************************	#
-#	FP_SCR0(a6) = extended precision operand to be scaled		#
-#									#
-# OUTPUT **************************************************************	#
-#	FP_SCR0(a6) = scaled extended precision operand			#
-#	d0	    = scale value					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	If the input operand is a DENORM, normalize it.			#
-#	If the exponent of the input operand is even, set the exponent	#
-# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the	#
-# exponent of the input operand is off, set the exponent to ox3fff and	#
-# return a scale factor of "(exp-0x3fff)/2".				#
-#									#
-#########################################################################
-
-	global		scale_sqrt
-scale_sqrt:
-	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
-	beq.b		ss_denorm		# normalize the DENORM
-
-	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
-	andi.l		&0x7fff,%d1		# extract operand's exponent
-
-	andi.w		&0x8000,FP_SCR0_EX(%a6)	# extract operand's sgn
-
-	btst		&0x0,%d1		# is exp even or odd?
-	beq.b		ss_norm_even
-
-	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
-
-	mov.l		&0x3fff,%d0
-	sub.l		%d1,%d0			# scale = BIAS + (-exp)
-	asr.l		&0x1,%d0		# divide scale factor by 2
-	rts
-
-ss_norm_even:
-	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
-
-	mov.l		&0x3ffe,%d0
-	sub.l		%d1,%d0			# scale = BIAS + (-exp)
-	asr.l		&0x1,%d0		# divide scale factor by 2
-	rts
-
-ss_denorm:
-	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
-	bsr.l		norm			# normalize denorm
-
-	btst		&0x0,%d0		# is exp even or odd?
-	beq.b		ss_denorm_even
-
-	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
-
-	add.l		&0x3fff,%d0
-	asr.l		&0x1,%d0		# divide scale factor by 2
-	rts
-
-ss_denorm_even:
-	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
-
-	add.l		&0x3ffe,%d0
-	asr.l		&0x1,%d0		# divide scale factor by 2
-	rts
-
-###
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	scale_to_zero_dst(): scale the exponent of extended precision	#
-#			     value at FP_SCR1(a6).			#
-#									#
-# XREF ****************************************************************	#
-#	norm() - normalize the mantissa if the operand was a DENORM	#
-#									#
-# INPUT ***************************************************************	#
-#	FP_SCR1(a6) = extended precision operand to be scaled		#
-#									#
-# OUTPUT **************************************************************	#
-#	FP_SCR1(a6) = scaled extended precision operand			#
-#	d0	    = scale value					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Set the exponent of the input operand to 0x3fff. Save the value	#
-# of the difference between the original and new exponent. Then,	#
-# normalize the operand if it was a DENORM. Add this normalization	#
-# value to the previous value. Return the result.			#
-#									#
-#########################################################################
-
-	global		scale_to_zero_dst
-scale_to_zero_dst:
-	mov.w		FP_SCR1_EX(%a6),%d1	# extract operand's {sgn,exp}
-	mov.w		%d1,%d0			# make a copy
-
-	andi.l		&0x7fff,%d1		# extract operand's exponent
-
-	andi.w		&0x8000,%d0		# extract operand's sgn
-	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
-
-	mov.w		%d0,FP_SCR1_EX(%a6)	# insert biased exponent
-
-	cmpi.b		DTAG(%a6),&DENORM	# is operand normalized?
-	beq.b		stzd_denorm		# normalize the DENORM
-
-stzd_norm:
-	mov.l		&0x3fff,%d0
-	sub.l		%d1,%d0			# scale = BIAS + (-exp)
-	rts
-
-stzd_denorm:
-	lea		FP_SCR1(%a6),%a0	# pass ptr to dst op
-	bsr.l		norm			# normalize denorm
-	neg.l		%d0			# new exponent = -(shft val)
-	mov.l		%d0,%d1			# prepare for op_norm call
-	bra.b		stzd_norm		# finish scaling
-
-##########################################################################
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	res_qnan(): return default result w/ QNAN operand for dyadic	#
-#	res_snan(): return default result w/ SNAN operand for dyadic	#
-#	res_qnan_1op(): return dflt result w/ QNAN operand for monadic	#
-#	res_snan_1op(): return dflt result w/ SNAN operand for monadic	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	FP_SRC(a6) = pointer to extended precision src operand		#
-#	FP_DST(a6) = pointer to extended precision dst operand		#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = default result						#
-#									#
-# ALGORITHM ***********************************************************	#
-#	If either operand (but not both operands) of an operation is a	#
-# nonsignalling NAN, then that NAN is returned as the result. If both	#
-# operands are nonsignalling NANs, then the destination operand		#
-# nonsignalling NAN is returned as the result.				#
-#	If either operand to an operation is a signalling NAN (SNAN),	#
-# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap	#
-# enable bit is set in the FPCR, then the trap is taken and the		#
-# destination is not modified. If the SNAN trap enable bit is not set,	#
-# then the SNAN is converted to a nonsignalling NAN (by setting the	#
-# SNAN bit in the operand to one), and the operation continues as	#
-# described in the preceding paragraph, for nonsignalling NANs.		#
-#	Make sure the appropriate FPSR bits are set before exiting.	#
-#									#
-#########################################################################
-
-	global		res_qnan
-	global		res_snan
-res_qnan:
-res_snan:
-	cmp.b		DTAG(%a6), &SNAN	# is the dst an SNAN?
-	beq.b		dst_snan2
-	cmp.b		DTAG(%a6), &QNAN	# is the dst a  QNAN?
-	beq.b		dst_qnan2
-src_nan:
-	cmp.b		STAG(%a6), &QNAN
-	beq.b		src_qnan2
-	global		res_snan_1op
-res_snan_1op:
-src_snan2:
-	bset		&0x6, FP_SRC_HI(%a6)	# set SNAN bit
-	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
-	lea		FP_SRC(%a6), %a0
-	bra.b		nan_comp
-	global		res_qnan_1op
-res_qnan_1op:
-src_qnan2:
-	or.l		&nan_mask, USER_FPSR(%a6)
-	lea		FP_SRC(%a6), %a0
-	bra.b		nan_comp
-dst_snan2:
-	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
-	bset		&0x6, FP_DST_HI(%a6)	# set SNAN bit
-	lea		FP_DST(%a6), %a0
-	bra.b		nan_comp
-dst_qnan2:
-	lea		FP_DST(%a6), %a0
-	cmp.b		STAG(%a6), &SNAN
-	bne		nan_done
-	or.l		&aiop_mask+snan_mask, USER_FPSR(%a6)
-nan_done:
-	or.l		&nan_mask, USER_FPSR(%a6)
-nan_comp:
-	btst		&0x7, FTEMP_EX(%a0)	# is NAN neg?
-	beq.b		nan_not_neg
-	or.l		&neg_mask, USER_FPSR(%a6)
-nan_not_neg:
-	fmovm.x		(%a0), &0x80
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	res_operr(): return default result during operand error		#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	None								#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = default operand error result				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	An nonsignalling NAN is returned as the default result when	#
-# an operand error occurs for the following cases:			#
-#									#
-#	Multiply: (Infinity x Zero)					#
-#	Divide  : (Zero / Zero) || (Infinity / Infinity)		#
-#									#
-#########################################################################
-
-	global		res_operr
-res_operr:
-	or.l		&nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
-	fmovm.x		nan_return(%pc), &0x80
-	rts
-
-nan_return:
-	long		0x7fff0000, 0xffffffff, 0xffffffff
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_denorm(): denormalize an intermediate result			#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT *************************************************************** #
-#	a0 = points to the operand to be denormalized			#
-#		(in the internal extended format)			#
-#									#
-#	d0 = rounding precision						#
-#									#
-# OUTPUT **************************************************************	#
-#	a0 = pointer to the denormalized result				#
-#		(in the internal extended format)			#
-#									#
-#	d0 = guard,round,sticky						#
-#									#
-# ALGORITHM ***********************************************************	#
-#	According to the exponent underflow threshold for the given	#
-# precision, shift the mantissa bits to the right in order raise the	#
-# exponent of the operand to the threshold value. While shifting the	#
-# mantissa bits right, maintain the value of the guard, round, and	#
-# sticky bits.								#
-# other notes:								#
-#	(1) _denorm() is called by the underflow routines		#
-#	(2) _denorm() does NOT affect the status register		#
-#									#
-#########################################################################
-
-#
-# table of exponent threshold values for each precision
-#
-tbl_thresh:
-	short		0x0
-	short		sgl_thresh
-	short		dbl_thresh
-
-	global		_denorm
-_denorm:
-#
-# Load the exponent threshold for the precision selected and check
-# to see if (threshold - exponent) is > 65 in which case we can
-# simply calculate the sticky bit and zero the mantissa. otherwise
-# we have to call the denormalization routine.
-#
-	lsr.b		&0x2, %d0		# shift prec to lo bits
-	mov.w		(tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
-	mov.w		%d1, %d0		# copy d1 into d0
-	sub.w		FTEMP_EX(%a0), %d0	# diff = threshold - exp
-	cmpi.w		%d0, &66		# is diff > 65? (mant + g,r bits)
-	bpl.b		denorm_set_stky		# yes; just calc sticky
-
-	clr.l		%d0			# clear g,r,s
-	btst		&inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
-	beq.b		denorm_call		# no; don't change anything
-	bset		&29, %d0		# yes; set sticky bit
-
-denorm_call:
-	bsr.l		dnrm_lp			# denormalize the number
-	rts
-
-#
-# all bit would have been shifted off during the denorm so simply
-# calculate if the sticky should be set and clear the entire mantissa.
-#
-denorm_set_stky:
-	mov.l		&0x20000000, %d0	# set sticky bit in return value
-	mov.w		%d1, FTEMP_EX(%a0)	# load exp with threshold
-	clr.l		FTEMP_HI(%a0)		# set d1 = 0 (ms mantissa)
-	clr.l		FTEMP_LO(%a0)		# set d2 = 0 (ms mantissa)
-	rts
-
-#									#
-# dnrm_lp(): normalize exponent/mantissa to specified threshold		#
-#									#
-# INPUT:								#
-#	%a0	   : points to the operand to be denormalized		#
-#	%d0{31:29} : initial guard,round,sticky				#
-#	%d1{15:0}  : denormalization threshold				#
-# OUTPUT:								#
-#	%a0	   : points to the denormalized operand			#
-#	%d0{31:29} : final guard,round,sticky				#
-#									#
-
-# *** Local Equates *** #
-set	GRS,		L_SCR2			# g,r,s temp storage
-set	FTEMP_LO2,	L_SCR1			# FTEMP_LO copy
-
-	global		dnrm_lp
-dnrm_lp:
-
-#
-# make a copy of FTEMP_LO and place the g,r,s bits directly after it
-# in memory so as to make the bitfield extraction for denormalization easier.
-#
-	mov.l		FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
-	mov.l		%d0, GRS(%a6)		# place g,r,s after it
-
-#
-# check to see how much less than the underflow threshold the operand
-# exponent is.
-#
-	mov.l		%d1, %d0		# copy the denorm threshold
-	sub.w		FTEMP_EX(%a0), %d1	# d1 = threshold - uns exponent
-	ble.b		dnrm_no_lp		# d1 <= 0
-	cmpi.w		%d1, &0x20		# is ( 0 <= d1 < 32) ?
-	blt.b		case_1			# yes
-	cmpi.w		%d1, &0x40		# is (32 <= d1 < 64) ?
-	blt.b		case_2			# yes
-	bra.w		case_3			# (d1 >= 64)
-
-#
-# No normalization necessary
-#
-dnrm_no_lp:
-	mov.l		GRS(%a6), %d0		# restore original g,r,s
-	rts
-
-#
-# case (0<d1<32)
-#
-# %d0 = denorm threshold
-# %d1 = "n" = amt to shift
-#
-#	---------------------------------------------------------
-#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
-#	---------------------------------------------------------
-#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
-#	\	   \		      \			 \
-#	 \	    \		       \		  \
-#	  \	     \			\		   \
-#	   \	      \			 \		    \
-#	    \	       \		  \		     \
-#	     \		\		   \		      \
-#	      \		 \		    \		       \
-#	       \	  \		     \			\
-#	<-(n)-><-(32 - n)-><------(32)-------><------(32)------->
-#	---------------------------------------------------------
-#	|0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs		|
-#	---------------------------------------------------------
-#
-case_1:
-	mov.l		%d2, -(%sp)		# create temp storage
-
-	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
-	mov.l		&32, %d0
-	sub.w		%d1, %d0		# %d0 = 32 - %d1
-
-	cmpi.w		%d1, &29		# is shft amt >= 29
-	blt.b		case1_extract		# no; no fix needed
-	mov.b		GRS(%a6), %d2
-	or.b		%d2, 3+FTEMP_LO2(%a6)
-
-case1_extract:
-	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
-	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
-	bfextu		FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
-
-	mov.l		%d2, FTEMP_HI(%a0)	# store new FTEMP_HI
-	mov.l		%d1, FTEMP_LO(%a0)	# store new FTEMP_LO
-
-	bftst		%d0{&2:&30}		# were bits shifted off?
-	beq.b		case1_sticky_clear	# no; go finish
-	bset		&rnd_stky_bit, %d0	# yes; set sticky bit
-
-case1_sticky_clear:
-	and.l		&0xe0000000, %d0	# clear all but G,R,S
-	mov.l		(%sp)+, %d2		# restore temp register
-	rts
-
-#
-# case (32<=d1<64)
-#
-# %d0 = denorm threshold
-# %d1 = "n" = amt to shift
-#
-#	---------------------------------------------------------
-#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
-#	---------------------------------------------------------
-#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
-#	\	   \		      \
-#	 \	    \		       \
-#	  \	     \			-------------------
-#	   \	      --------------------		   \
-#	    -------------------		  \		    \
-#			       \	   \		     \
-#				\	    \		      \
-#				 \	     \		       \
-#	<-------(32)------><-(n)-><-(32 - n)-><------(32)------->
-#	---------------------------------------------------------
-#	|0...............0|0....0| NEW_LO     |grs		|
-#	---------------------------------------------------------
-#
-case_2:
-	mov.l		%d2, -(%sp)		# create temp storage
-
-	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
-	subi.w		&0x20, %d1		# %d1 now between 0 and 32
-	mov.l		&0x20, %d0
-	sub.w		%d1, %d0		# %d0 = 32 - %d1
-
-# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
-# the number of bits to check for the sticky detect.
-# it only plays a role in shift amounts of 61-63.
-	mov.b		GRS(%a6), %d2
-	or.b		%d2, 3+FTEMP_LO2(%a6)
-
-	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
-	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
-
-	bftst		%d1{&2:&30}		# were any bits shifted off?
-	bne.b		case2_set_sticky	# yes; set sticky bit
-	bftst		FTEMP_LO2(%a6){%d0:&31}	# were any bits shifted off?
-	bne.b		case2_set_sticky	# yes; set sticky bit
-
-	mov.l		%d1, %d0		# move new G,R,S to %d0
-	bra.b		case2_end
-
-case2_set_sticky:
-	mov.l		%d1, %d0		# move new G,R,S to %d0
-	bset		&rnd_stky_bit, %d0	# set sticky bit
-
-case2_end:
-	clr.l		FTEMP_HI(%a0)		# store FTEMP_HI = 0
-	mov.l		%d2, FTEMP_LO(%a0)	# store FTEMP_LO
-	and.l		&0xe0000000, %d0	# clear all but G,R,S
-
-	mov.l		(%sp)+,%d2		# restore temp register
-	rts
-
-#
-# case (d1>=64)
-#
-# %d0 = denorm threshold
-# %d1 = amt to shift
-#
-case_3:
-	mov.w		%d0, FTEMP_EX(%a0)	# insert denorm threshold
-
-	cmpi.w		%d1, &65		# is shift amt > 65?
-	blt.b		case3_64		# no; it's == 64
-	beq.b		case3_65		# no; it's == 65
-
-#
-# case (d1>65)
-#
-# Shift value is > 65 and out of range. All bits are shifted off.
-# Return a zero mantissa with the sticky bit set
-#
-	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
-	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
-	mov.l		&0x20000000, %d0	# set sticky bit
-	rts
-
-#
-# case (d1 == 64)
-#
-#	---------------------------------------------------------
-#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
-#	---------------------------------------------------------
-#	<-------(32)------>
-#	\		   \
-#	 \		    \
-#	  \		     \
-#	   \		      ------------------------------
-#	    -------------------------------		    \
-#					   \		     \
-#					    \		      \
-#					     \		       \
-#					      <-------(32)------>
-#	---------------------------------------------------------
-#	|0...............0|0................0|grs		|
-#	---------------------------------------------------------
-#
-case3_64:
-	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
-	mov.l		%d0, %d1		# make a copy
-	and.l		&0xc0000000, %d0	# extract G,R
-	and.l		&0x3fffffff, %d1	# extract other bits
-
-	bra.b		case3_complete
-
-#
-# case (d1 == 65)
-#
-#	---------------------------------------------------------
-#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
-#	---------------------------------------------------------
-#	<-------(32)------>
-#	\		   \
-#	 \		    \
-#	  \		     \
-#	   \		      ------------------------------
-#	    --------------------------------		    \
-#					    \		     \
-#					     \		      \
-#					      \		       \
-#					       <-------(31)----->
-#	---------------------------------------------------------
-#	|0...............0|0................0|0rs		|
-#	---------------------------------------------------------
-#
-case3_65:
-	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
-	and.l		&0x80000000, %d0	# extract R bit
-	lsr.l		&0x1, %d0		# shift high bit into R bit
-	and.l		&0x7fffffff, %d1	# extract other bits
-
-case3_complete:
-# last operation done was an "and" of the bits shifted off so the condition
-# codes are already set so branch accordingly.
-	bne.b		case3_set_sticky	# yes; go set new sticky
-	tst.l		FTEMP_LO(%a0)		# were any bits shifted off?
-	bne.b		case3_set_sticky	# yes; go set new sticky
-	tst.b		GRS(%a6)		# were any bits shifted off?
-	bne.b		case3_set_sticky	# yes; go set new sticky
-
-#
-# no bits were shifted off so don't set the sticky bit.
-# the guard and
-# the entire mantissa is zero.
-#
-	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
-	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
-	rts
-
-#
-# some bits were shifted off so set the sticky bit.
-# the entire mantissa is zero.
-#
-case3_set_sticky:
-	bset		&rnd_stky_bit,%d0	# set new sticky bit
-	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
-	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	_round(): round result according to precision/mode		#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	a0	  = ptr to input operand in internal extended format	#
-#	d1(hi)    = contains rounding precision:			#
-#			ext = $0000xxxx					#
-#			sgl = $0004xxxx					#
-#			dbl = $0008xxxx					#
-#	d1(lo)	  = contains rounding mode:				#
-#			RN  = $xxxx0000					#
-#			RZ  = $xxxx0001					#
-#			RM  = $xxxx0002					#
-#			RP  = $xxxx0003					#
-#	d0{31:29} = contains the g,r,s bits (extended)			#
-#									#
-# OUTPUT **************************************************************	#
-#	a0 = pointer to rounded result					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	On return the value pointed to by a0 is correctly rounded,	#
-#	a0 is preserved and the g-r-s bits in d0 are cleared.		#
-#	The result is not typed - the tag field is invalid.  The	#
-#	result is still in the internal extended format.		#
-#									#
-#	The INEX bit of USER_FPSR will be set if the rounded result was	#
-#	inexact (i.e. if any of the g-r-s bits were set).		#
-#									#
-#########################################################################
-
-	global		_round
-_round:
-#
-# ext_grs() looks at the rounding precision and sets the appropriate
-# G,R,S bits.
-# If (G,R,S == 0) then result is exact and round is done, else set
-# the inex flag in status reg and continue.
-#
-	bsr.l		ext_grs			# extract G,R,S
-
-	tst.l		%d0			# are G,R,S zero?
-	beq.w		truncate		# yes; round is complete
-
-	or.w		&inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
-
-#
-# Use rounding mode as an index into a jump table for these modes.
-# All of the following assumes grs != 0.
-#
-	mov.w		(tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
-	jmp		(tbl_mode.b,%pc,%a1)	# jmp to rnd mode handler
-
-tbl_mode:
-	short		rnd_near - tbl_mode
-	short		truncate - tbl_mode	# RZ always truncates
-	short		rnd_mnus - tbl_mode
-	short		rnd_plus - tbl_mode
-
-#################################################################
-#	ROUND PLUS INFINITY					#
-#								#
-#	If sign of fp number = 0 (positive), then add 1 to l.	#
-#################################################################
-rnd_plus:
-	tst.b		FTEMP_SGN(%a0)		# check for sign
-	bmi.w		truncate		# if positive then truncate
-
-	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
-	swap		%d1			# set up d1 for round prec.
-
-	cmpi.b		%d1, &s_mode		# is prec = sgl?
-	beq.w		add_sgl			# yes
-	bgt.w		add_dbl			# no; it's dbl
-	bra.w		add_ext			# no; it's ext
-
-#################################################################
-#	ROUND MINUS INFINITY					#
-#								#
-#	If sign of fp number = 1 (negative), then add 1 to l.	#
-#################################################################
-rnd_mnus:
-	tst.b		FTEMP_SGN(%a0)		# check for sign
-	bpl.w		truncate		# if negative then truncate
-
-	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
-	swap		%d1			# set up d1 for round prec.
-
-	cmpi.b		%d1, &s_mode		# is prec = sgl?
-	beq.w		add_sgl			# yes
-	bgt.w		add_dbl			# no; it's dbl
-	bra.w		add_ext			# no; it's ext
-
-#################################################################
-#	ROUND NEAREST						#
-#								#
-#	If (g=1), then add 1 to l and if (r=s=0), then clear l	#
-#	Note that this will round to even in case of a tie.	#
-#################################################################
-rnd_near:
-	asl.l		&0x1, %d0		# shift g-bit to c-bit
-	bcc.w		truncate		# if (g=1) then
-
-	swap		%d1			# set up d1 for round prec.
-
-	cmpi.b		%d1, &s_mode		# is prec = sgl?
-	beq.w		add_sgl			# yes
-	bgt.w		add_dbl			# no; it's dbl
-	bra.w		add_ext			# no; it's ext
-
-# *** LOCAL EQUATES ***
-set	ad_1_sgl,	0x00000100	# constant to add 1 to l-bit in sgl prec
-set	ad_1_dbl,	0x00000800	# constant to add 1 to l-bit in dbl prec
-
-#########################
-#	ADD SINGLE	#
-#########################
-add_sgl:
-	add.l		&ad_1_sgl, FTEMP_HI(%a0)
-	bcc.b		scc_clr			# no mantissa overflow
-	roxr.w		FTEMP_HI(%a0)		# shift v-bit back in
-	roxr.w		FTEMP_HI+2(%a0)		# shift v-bit back in
-	add.w		&0x1, FTEMP_EX(%a0)	# and incr exponent
-scc_clr:
-	tst.l		%d0			# test for rs = 0
-	bne.b		sgl_done
-	and.w		&0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
-sgl_done:
-	and.l		&0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
-	clr.l		FTEMP_LO(%a0)		# clear d2
-	rts
-
-#########################
-#	ADD EXTENDED	#
-#########################
-add_ext:
-	addq.l		&1,FTEMP_LO(%a0)	# add 1 to l-bit
-	bcc.b		xcc_clr			# test for carry out
-	addq.l		&1,FTEMP_HI(%a0)	# propagate carry
-	bcc.b		xcc_clr
-	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
-	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
-	roxr.w		FTEMP_LO(%a0)
-	roxr.w		FTEMP_LO+2(%a0)
-	add.w		&0x1,FTEMP_EX(%a0)	# and inc exp
-xcc_clr:
-	tst.l		%d0			# test rs = 0
-	bne.b		add_ext_done
-	and.b		&0xfe,FTEMP_LO+3(%a0)	# clear the l bit
-add_ext_done:
-	rts
-
-#########################
-#	ADD DOUBLE	#
-#########################
-add_dbl:
-	add.l		&ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
-	bcc.b		dcc_clr			# no carry
-	addq.l		&0x1, FTEMP_HI(%a0)	# propagate carry
-	bcc.b		dcc_clr			# no carry
-
-	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
-	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
-	roxr.w		FTEMP_LO(%a0)
-	roxr.w		FTEMP_LO+2(%a0)
-	addq.w		&0x1, FTEMP_EX(%a0)	# incr exponent
-dcc_clr:
-	tst.l		%d0			# test for rs = 0
-	bne.b		dbl_done
-	and.w		&0xf000, FTEMP_LO+2(%a0) # clear the l-bit
-
-dbl_done:
-	and.l		&0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
-	rts
-
-###########################
-# Truncate all other bits #
-###########################
-truncate:
-	swap		%d1			# select rnd prec
-
-	cmpi.b		%d1, &s_mode		# is prec sgl?
-	beq.w		sgl_done		# yes
-	bgt.b		dbl_done		# no; it's dbl
-	rts					# no; it's ext
-
-
-#
-# ext_grs(): extract guard, round and sticky bits according to
-#	     rounding precision.
-#
-# INPUT
-#	d0	   = extended precision g,r,s (in d0{31:29})
-#	d1	   = {PREC,ROUND}
-# OUTPUT
-#	d0{31:29}  = guard, round, sticky
-#
-# The ext_grs extract the guard/round/sticky bits according to the
-# selected rounding precision. It is called by the round subroutine
-# only.  All registers except d0 are kept intact. d0 becomes an
-# updated guard,round,sticky in d0{31:29}
-#
-# Notes: the ext_grs uses the round PREC, and therefore has to swap d1
-#	 prior to usage, and needs to restore d1 to original. this
-#	 routine is tightly tied to the round routine and not meant to
-#	 uphold standard subroutine calling practices.
-#
-
-ext_grs:
-	swap		%d1			# have d1.w point to round precision
-	tst.b		%d1			# is rnd prec = extended?
-	bne.b		ext_grs_not_ext		# no; go handle sgl or dbl
-
-#
-# %d0 actually already hold g,r,s since _round() had it before calling
-# this function. so, as long as we don't disturb it, we are "returning" it.
-#
-ext_grs_ext:
-	swap		%d1			# yes; return to correct positions
-	rts
-
-ext_grs_not_ext:
-	movm.l		&0x3000, -(%sp)		# make some temp registers {d2/d3}
-
-	cmpi.b		%d1, &s_mode		# is rnd prec = sgl?
-	bne.b		ext_grs_dbl		# no; go handle dbl
-
-#
-# sgl:
-#	96		64	  40	32		0
-#	-----------------------------------------------------
-#	| EXP	|XXXXXXX|	  |xx	|		|grs|
-#	-----------------------------------------------------
-#			<--(24)--->nn\			   /
-#				   ee ---------------------
-#				   ww		|
-#						v
-#				   gr	   new sticky
-#
-ext_grs_sgl:
-	bfextu		FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
-	mov.l		&30, %d2		# of the sgl prec. limits
-	lsl.l		%d2, %d3		# shift g-r bits to MSB of d3
-	mov.l		FTEMP_HI(%a0), %d2	# get word 2 for s-bit test
-	and.l		&0x0000003f, %d2	# s bit is the or of all other
-	bne.b		ext_grs_st_stky		# bits to the right of g-r
-	tst.l		FTEMP_LO(%a0)		# test lower mantissa
-	bne.b		ext_grs_st_stky		# if any are set, set sticky
-	tst.l		%d0			# test original g,r,s
-	bne.b		ext_grs_st_stky		# if any are set, set sticky
-	bra.b		ext_grs_end_sd		# if words 3 and 4 are clr, exit
-
-#
-# dbl:
-#	96		64		32	 11	0
-#	-----------------------------------------------------
-#	| EXP	|XXXXXXX|		|	 |xx	|grs|
-#	-----------------------------------------------------
-#						  nn\	    /
-#						  ee -------
-#						  ww	|
-#							v
-#						  gr	new sticky
-#
-ext_grs_dbl:
-	bfextu		FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
-	mov.l		&30, %d2		# of the dbl prec. limits
-	lsl.l		%d2, %d3		# shift g-r bits to the MSB of d3
-	mov.l		FTEMP_LO(%a0), %d2	# get lower mantissa  for s-bit test
-	and.l		&0x000001ff, %d2	# s bit is the or-ing of all
-	bne.b		ext_grs_st_stky		# other bits to the right of g-r
-	tst.l		%d0			# test word original g,r,s
-	bne.b		ext_grs_st_stky		# if any are set, set sticky
-	bra.b		ext_grs_end_sd		# if clear, exit
-
-ext_grs_st_stky:
-	bset		&rnd_stky_bit, %d3	# set sticky bit
-ext_grs_end_sd:
-	mov.l		%d3, %d0		# return grs to d0
-
-	movm.l		(%sp)+, &0xc		# restore scratch registers {d2/d3}
-
-	swap		%d1			# restore d1 to original
-	rts
-
-#########################################################################
-# norm(): normalize the mantissa of an extended precision input. the	#
-#	  input operand should not be normalized already.		#
-#									#
-# XDEF ****************************************************************	#
-#	norm()								#
-#									#
-# XREF **************************************************************** #
-#	none								#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer fp extended precision operand to normalize		#
-#									#
-# OUTPUT ************************************************************** #
-#	d0 = number of bit positions the mantissa was shifted		#
-#	a0 = the input operand's mantissa is normalized; the exponent	#
-#	     is unchanged.						#
-#									#
-#########################################################################
-	global		norm
-norm:
-	mov.l		%d2, -(%sp)		# create some temp regs
-	mov.l		%d3, -(%sp)
-
-	mov.l		FTEMP_HI(%a0), %d0	# load hi(mantissa)
-	mov.l		FTEMP_LO(%a0), %d1	# load lo(mantissa)
-
-	bfffo		%d0{&0:&32}, %d2	# how many places to shift?
-	beq.b		norm_lo			# hi(man) is all zeroes!
-
-norm_hi:
-	lsl.l		%d2, %d0		# left shift hi(man)
-	bfextu		%d1{&0:%d2}, %d3	# extract lo bits
-
-	or.l		%d3, %d0		# create hi(man)
-	lsl.l		%d2, %d1		# create lo(man)
-
-	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
-	mov.l		%d1, FTEMP_LO(%a0)	# store new lo(man)
-
-	mov.l		%d2, %d0		# return shift amount
-
-	mov.l		(%sp)+, %d3		# restore temp regs
-	mov.l		(%sp)+, %d2
-
-	rts
-
-norm_lo:
-	bfffo		%d1{&0:&32}, %d2	# how many places to shift?
-	lsl.l		%d2, %d1		# shift lo(man)
-	add.l		&32, %d2		# add 32 to shft amount
-
-	mov.l		%d1, FTEMP_HI(%a0)	# store hi(man)
-	clr.l		FTEMP_LO(%a0)		# lo(man) is now zero
-
-	mov.l		%d2, %d0		# return shift amount
-
-	mov.l		(%sp)+, %d3		# restore temp regs
-	mov.l		(%sp)+, %d2
-
-	rts
-
-#########################################################################
-# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO	#
-#		- returns corresponding optype tag			#
-#									#
-# XDEF ****************************************************************	#
-#	unnorm_fix()							#
-#									#
-# XREF **************************************************************** #
-#	norm() - normalize the mantissa					#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer to unnormalized extended precision number		#
-#									#
-# OUTPUT ************************************************************** #
-#	d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO	#
-#	a0 = input operand has been converted to a norm, denorm, or	#
-#	     zero; both the exponent and mantissa are changed.		#
-#									#
-#########################################################################
-
-	global		unnorm_fix
-unnorm_fix:
-	bfffo		FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
-	bne.b		unnorm_shift		# hi(man) is not all zeroes
-
-#
-# hi(man) is all zeroes so see if any bits in lo(man) are set
-#
-unnorm_chk_lo:
-	bfffo		FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
-	beq.w		unnorm_zero		# yes
-
-	add.w		&32, %d0		# no; fix shift distance
-
-#
-# d0 = # shifts needed for complete normalization
-#
-unnorm_shift:
-	clr.l		%d1			# clear top word
-	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
-	and.w		&0x7fff, %d1		# strip off sgn
-
-	cmp.w		%d0, %d1		# will denorm push exp < 0?
-	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
-
-#
-# exponent would not go < 0. Therefore, number stays normalized
-#
-	sub.w		%d0, %d1		# shift exponent value
-	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
-	and.w		&0x8000, %d0		# save old sign
-	or.w		%d0, %d1		# {sgn,new exp}
-	mov.w		%d1, FTEMP_EX(%a0)	# insert new exponent
-
-	bsr.l		norm			# normalize UNNORM
-
-	mov.b		&NORM, %d0		# return new optype tag
-	rts
-
-#
-# exponent would go < 0, so only denormalize until exp = 0
-#
-unnorm_nrm_zero:
-	cmp.b		%d1, &32		# is exp <= 32?
-	bgt.b		unnorm_nrm_zero_lrg	# no; go handle large exponent
-
-	bfextu		FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
-	mov.l		%d0, FTEMP_HI(%a0)	# save new hi(man)
-
-	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
-	lsl.l		%d1, %d0		# extract new lo(man)
-	mov.l		%d0, FTEMP_LO(%a0)	# save new lo(man)
-
-	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
-
-	mov.b		&DENORM, %d0		# return new optype tag
-	rts
-
-#
-# only mantissa bits set are in lo(man)
-#
-unnorm_nrm_zero_lrg:
-	sub.w		&32, %d1		# adjust shft amt by 32
-
-	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
-	lsl.l		%d1, %d0		# left shift lo(man)
-
-	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
-	clr.l		FTEMP_LO(%a0)		# lo(man) = 0
-
-	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
-
-	mov.b		&DENORM, %d0		# return new optype tag
-	rts
-
-#
-# whole mantissa is zero so this UNNORM is actually a zero
-#
-unnorm_zero:
-	and.w		&0x8000, FTEMP_EX(%a0)	# force exponent to zero
-
-	mov.b		&ZERO, %d0		# fix optype tag
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	set_tag_x(): return the optype of the input ext fp number	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision operand			#
-#									#
-# OUTPUT **************************************************************	#
-#	d0 = value of type tag						#
-#		one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Simply test the exponent, j-bit, and mantissa values to		#
-# determine the type of operand.					#
-#	If it's an unnormalized zero, alter the operand and force it	#
-# to be a normal zero.							#
-#									#
-#########################################################################
-
-	global		set_tag_x
-set_tag_x:
-	mov.w		FTEMP_EX(%a0), %d0	# extract exponent
-	andi.w		&0x7fff, %d0		# strip off sign
-	cmpi.w		%d0, &0x7fff		# is (EXP == MAX)?
-	beq.b		inf_or_nan_x
-not_inf_or_nan_x:
-	btst		&0x7,FTEMP_HI(%a0)
-	beq.b		not_norm_x
-is_norm_x:
-	mov.b		&NORM, %d0
-	rts
-not_norm_x:
-	tst.w		%d0			# is exponent = 0?
-	bne.b		is_unnorm_x
-not_unnorm_x:
-	tst.l		FTEMP_HI(%a0)
-	bne.b		is_denorm_x
-	tst.l		FTEMP_LO(%a0)
-	bne.b		is_denorm_x
-is_zero_x:
-	mov.b		&ZERO, %d0
-	rts
-is_denorm_x:
-	mov.b		&DENORM, %d0
-	rts
-# must distinguish now "Unnormalized zeroes" which we
-# must convert to zero.
-is_unnorm_x:
-	tst.l		FTEMP_HI(%a0)
-	bne.b		is_unnorm_reg_x
-	tst.l		FTEMP_LO(%a0)
-	bne.b		is_unnorm_reg_x
-# it's an "unnormalized zero". let's convert it to an actual zero...
-	andi.w		&0x8000,FTEMP_EX(%a0)	# clear exponent
-	mov.b		&ZERO, %d0
-	rts
-is_unnorm_reg_x:
-	mov.b		&UNNORM, %d0
-	rts
-inf_or_nan_x:
-	tst.l		FTEMP_LO(%a0)
-	bne.b		is_nan_x
-	mov.l		FTEMP_HI(%a0), %d0
-	and.l		&0x7fffffff, %d0	# msb is a don't care!
-	bne.b		is_nan_x
-is_inf_x:
-	mov.b		&INF, %d0
-	rts
-is_nan_x:
-	btst		&0x6, FTEMP_HI(%a0)
-	beq.b		is_snan_x
-	mov.b		&QNAN, %d0
-	rts
-is_snan_x:
-	mov.b		&SNAN, %d0
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	set_tag_d(): return the optype of the input dbl fp number	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = points to double precision operand				#
-#									#
-# OUTPUT **************************************************************	#
-#	d0 = value of type tag						#
-#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Simply test the exponent, j-bit, and mantissa values to		#
-# determine the type of operand.					#
-#									#
-#########################################################################
-
-	global		set_tag_d
-set_tag_d:
-	mov.l		FTEMP(%a0), %d0
-	mov.l		%d0, %d1
-
-	andi.l		&0x7ff00000, %d0
-	beq.b		zero_or_denorm_d
-
-	cmpi.l		%d0, &0x7ff00000
-	beq.b		inf_or_nan_d
-
-is_norm_d:
-	mov.b		&NORM, %d0
-	rts
-zero_or_denorm_d:
-	and.l		&0x000fffff, %d1
-	bne		is_denorm_d
-	tst.l		4+FTEMP(%a0)
-	bne		is_denorm_d
-is_zero_d:
-	mov.b		&ZERO, %d0
-	rts
-is_denorm_d:
-	mov.b		&DENORM, %d0
-	rts
-inf_or_nan_d:
-	and.l		&0x000fffff, %d1
-	bne		is_nan_d
-	tst.l		4+FTEMP(%a0)
-	bne		is_nan_d
-is_inf_d:
-	mov.b		&INF, %d0
-	rts
-is_nan_d:
-	btst		&19, %d1
-	bne		is_qnan_d
-is_snan_d:
-	mov.b		&SNAN, %d0
-	rts
-is_qnan_d:
-	mov.b		&QNAN, %d0
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	set_tag_s(): return the optype of the input sgl fp number	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to single precision operand			#
-#									#
-# OUTPUT **************************************************************	#
-#	d0 = value of type tag						#
-#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Simply test the exponent, j-bit, and mantissa values to		#
-# determine the type of operand.					#
-#									#
-#########################################################################
-
-	global		set_tag_s
-set_tag_s:
-	mov.l		FTEMP(%a0), %d0
-	mov.l		%d0, %d1
-
-	andi.l		&0x7f800000, %d0
-	beq.b		zero_or_denorm_s
-
-	cmpi.l		%d0, &0x7f800000
-	beq.b		inf_or_nan_s
-
-is_norm_s:
-	mov.b		&NORM, %d0
-	rts
-zero_or_denorm_s:
-	and.l		&0x007fffff, %d1
-	bne		is_denorm_s
-is_zero_s:
-	mov.b		&ZERO, %d0
-	rts
-is_denorm_s:
-	mov.b		&DENORM, %d0
-	rts
-inf_or_nan_s:
-	and.l		&0x007fffff, %d1
-	bne		is_nan_s
-is_inf_s:
-	mov.b		&INF, %d0
-	rts
-is_nan_s:
-	btst		&22, %d1
-	bne		is_qnan_s
-is_snan_s:
-	mov.b		&SNAN, %d0
-	rts
-is_qnan_s:
-	mov.b		&QNAN, %d0
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	unf_res(): routine to produce default underflow result of a	#
-#		   scaled extended precision number; this is used by	#
-#		   fadd/fdiv/fmul/etc. emulation routines.		#
-#	unf_res4(): same as above but for fsglmul/fsgldiv which use	#
-#		    single round prec and extended prec mode.		#
-#									#
-# XREF ****************************************************************	#
-#	_denorm() - denormalize according to scale factor		#
-#	_round() - round denormalized number according to rnd prec	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precison operand			#
-#	d0 = scale factor						#
-#	d1 = rounding precision/mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	a0 = pointer to default underflow result in extended precision	#
-#	d0.b = result FPSR_cc which caller may or may not want to save	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Convert the input operand to "internal format" which means the	#
-# exponent is extended to 16 bits and the sign is stored in the unused	#
-# portion of the extended precison operand. Denormalize the number	#
-# according to the scale factor passed in d0. Then, round the		#
-# denormalized result.							#
-#	Set the FPSR_exc bits as appropriate but return the cc bits in	#
-# d0 in case the caller doesn't want to save them (as is the case for	#
-# fmove out).								#
-#	unf_res4() for fsglmul/fsgldiv forces the denorm to extended	#
-# precision and the rounding mode to single.				#
-#									#
-#########################################################################
-	global		unf_res
-unf_res:
-	mov.l		%d1, -(%sp)		# save rnd prec,mode on stack
-
-	btst		&0x7, FTEMP_EX(%a0)	# make "internal" format
-	sne		FTEMP_SGN(%a0)
-
-	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
-	and.w		&0x7fff, %d1
-	sub.w		%d0, %d1
-	mov.w		%d1, FTEMP_EX(%a0)	# insert 16 bit exponent
-
-	mov.l		%a0, -(%sp)		# save operand ptr during calls
-
-	mov.l		0x4(%sp),%d0		# pass rnd prec.
-	andi.w		&0x00c0,%d0
-	lsr.w		&0x4,%d0
-	bsr.l		_denorm			# denorm result
-
-	mov.l		(%sp),%a0
-	mov.w		0x6(%sp),%d1		# load prec:mode into %d1
-	andi.w		&0xc0,%d1		# extract rnd prec
-	lsr.w		&0x4,%d1
-	swap		%d1
-	mov.w		0x6(%sp),%d1
-	andi.w		&0x30,%d1
-	lsr.w		&0x4,%d1
-	bsr.l		_round			# round the denorm
-
-	mov.l		(%sp)+, %a0
-
-# result is now rounded properly. convert back to normal format
-	bclr		&0x7, FTEMP_EX(%a0)	# clear sgn first; may have residue
-	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
-	beq.b		unf_res_chkifzero	# no; result is positive
-	bset		&0x7, FTEMP_EX(%a0)	# set result sgn
-	clr.b		FTEMP_SGN(%a0)		# clear temp sign
-
-# the number may have become zero after rounding. set ccodes accordingly.
-unf_res_chkifzero:
-	clr.l		%d0
-	tst.l		FTEMP_HI(%a0)		# is value now a zero?
-	bne.b		unf_res_cont		# no
-	tst.l		FTEMP_LO(%a0)
-	bne.b		unf_res_cont		# no
-#	bset		&z_bit, FPSR_CC(%a6)	# yes; set zero ccode bit
-	bset		&z_bit, %d0		# yes; set zero ccode bit
-
-unf_res_cont:
-
-#
-# can inex1 also be set along with unfl and inex2???
-#
-# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
-#
-	btst		&inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
-	beq.b		unf_res_end		# no
-	bset		&aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
-
-unf_res_end:
-	add.l		&0x4, %sp		# clear stack
-	rts
-
-# unf_res() for fsglmul() and fsgldiv().
-	global		unf_res4
-unf_res4:
-	mov.l		%d1,-(%sp)		# save rnd prec,mode on stack
-
-	btst		&0x7,FTEMP_EX(%a0)	# make "internal" format
-	sne		FTEMP_SGN(%a0)
-
-	mov.w		FTEMP_EX(%a0),%d1	# extract exponent
-	and.w		&0x7fff,%d1
-	sub.w		%d0,%d1
-	mov.w		%d1,FTEMP_EX(%a0)	# insert 16 bit exponent
-
-	mov.l		%a0,-(%sp)		# save operand ptr during calls
-
-	clr.l		%d0			# force rnd prec = ext
-	bsr.l		_denorm			# denorm result
-
-	mov.l		(%sp),%a0
-	mov.w		&s_mode,%d1		# force rnd prec = sgl
-	swap		%d1
-	mov.w		0x6(%sp),%d1		# load rnd mode
-	andi.w		&0x30,%d1		# extract rnd prec
-	lsr.w		&0x4,%d1
-	bsr.l		_round			# round the denorm
-
-	mov.l		(%sp)+,%a0
-
-# result is now rounded properly. convert back to normal format
-	bclr		&0x7,FTEMP_EX(%a0)	# clear sgn first; may have residue
-	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
-	beq.b		unf_res4_chkifzero	# no; result is positive
-	bset		&0x7,FTEMP_EX(%a0)	# set result sgn
-	clr.b		FTEMP_SGN(%a0)		# clear temp sign
-
-# the number may have become zero after rounding. set ccodes accordingly.
-unf_res4_chkifzero:
-	clr.l		%d0
-	tst.l		FTEMP_HI(%a0)		# is value now a zero?
-	bne.b		unf_res4_cont		# no
-	tst.l		FTEMP_LO(%a0)
-	bne.b		unf_res4_cont		# no
-#	bset		&z_bit,FPSR_CC(%a6)	# yes; set zero ccode bit
-	bset		&z_bit,%d0		# yes; set zero ccode bit
-
-unf_res4_cont:
-
-#
-# can inex1 also be set along with unfl and inex2???
-#
-# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
-#
-	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
-	beq.b		unf_res4_end		# no
-	bset		&aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
-
-unf_res4_end:
-	add.l		&0x4,%sp		# clear stack
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	ovf_res(): routine to produce the default overflow result of	#
-#		   an overflowing number.				#
-#	ovf_res2(): same as above but the rnd mode/prec are passed	#
-#		    differently.					#
-#									#
-# XREF ****************************************************************	#
-#	none								#
-#									#
-# INPUT ***************************************************************	#
-#	d1.b	= '-1' => (-); '0' => (+)				#
-#   ovf_res():								#
-#	d0	= rnd mode/prec						#
-#   ovf_res2():								#
-#	hi(d0)	= rnd prec						#
-#	lo(d0)	= rnd mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	a0	= points to extended precision result			#
-#	d0.b	= condition code bits					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	The default overflow result can be determined by the sign of	#
-# the result and the rounding mode/prec in effect. These bits are	#
-# concatenated together to create an index into the default result	#
-# table. A pointer to the correct result is returned in a0. The		#
-# resulting condition codes are returned in d0 in case the caller	#
-# doesn't want FPSR_cc altered (as is the case for fmove out).		#
-#									#
-#########################################################################
-
-	global		ovf_res
-ovf_res:
-	andi.w		&0x10,%d1		# keep result sign
-	lsr.b		&0x4,%d0		# shift prec/mode
-	or.b		%d0,%d1			# concat the two
-	mov.w		%d1,%d0			# make a copy
-	lsl.b		&0x1,%d1		# multiply d1 by 2
-	bra.b		ovf_res_load
-
-	global		ovf_res2
-ovf_res2:
-	and.w		&0x10, %d1		# keep result sign
-	or.b		%d0, %d1		# insert rnd mode
-	swap		%d0
-	or.b		%d0, %d1		# insert rnd prec
-	mov.w		%d1, %d0		# make a copy
-	lsl.b		&0x1, %d1		# shift left by 1
-
-#
-# use the rounding mode, precision, and result sign as in index into the
-# two tables below to fetch the default result and the result ccodes.
-#
-ovf_res_load:
-	mov.b		(tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
-	lea		(tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
-
-	rts
-
-tbl_ovfl_cc:
-	byte		0x2, 0x0, 0x0, 0x2
-	byte		0x2, 0x0, 0x0, 0x2
-	byte		0x2, 0x0, 0x0, 0x2
-	byte		0x0, 0x0, 0x0, 0x0
-	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
-	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
-	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
-
-tbl_ovfl_result:
-	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
-	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
-	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
-	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
-
-	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
-	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
-	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
-	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
-
-	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
-	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
-	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
-	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
-
-	long		0x00000000,0x00000000,0x00000000,0x00000000
-	long		0x00000000,0x00000000,0x00000000,0x00000000
-	long		0x00000000,0x00000000,0x00000000,0x00000000
-	long		0x00000000,0x00000000,0x00000000,0x00000000
-
-	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
-	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
-	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
-	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
-
-	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
-	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
-	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
-	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
-
-	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
-	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
-	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
-	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fout(): move from fp register to memory or data register	#
-#									#
-# XREF ****************************************************************	#
-#	_round() - needed to create EXOP for sgl/dbl precision		#
-#	norm() - needed to create EXOP for extended precision		#
-#	ovf_res() - create default overflow result for sgl/dbl precision#
-#	unf_res() - create default underflow result for sgl/dbl prec.	#
-#	dst_dbl() - create rounded dbl precision result.		#
-#	dst_sgl() - create rounded sgl precision result.		#
-#	fetch_dreg() - fetch dynamic k-factor reg for packed.		#
-#	bindec() - convert FP binary number to packed number.		#
-#	_mem_write() - write data to memory.				#
-#	_mem_write2() - write data to memory unless supv mode -(a7) exc.#
-#	_dmem_write_{byte,word,long}() - write data to memory.		#
-#	store_dreg_{b,w,l}() - store data to data register file.	#
-#	facc_out_{b,w,l,d,x}() - data access error occurred.		#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	d0 = round prec,mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 : intermediate underflow or overflow result if		#
-#	      OVFL/UNFL occurred for a sgl or dbl operand		#
-#									#
-# ALGORITHM ***********************************************************	#
-#	This routine is accessed by many handlers that need to do an	#
-# opclass three move of an operand out to memory.			#
-#	Decode an fmove out (opclass 3) instruction to determine if	#
-# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data	#
-# register or memory. The algorithm uses a standard "fmove" to create	#
-# the rounded result. Also, since exceptions are disabled, this also	#
-# create the correct OPERR default result if appropriate.		#
-#	For sgl or dbl precision, overflow or underflow can occur. If	#
-# either occurs and is enabled, the EXOP.				#
-#	For extended precision, the stacked <ea> must be fixed along	#
-# w/ the address index register as appropriate w/ _calc_ea_fout(). If	#
-# the source is a denorm and if underflow is enabled, an EXOP must be	#
-# created.								#
-#	For packed, the k-factor must be fetched from the instruction	#
-# word or a data register. The <ea> must be fixed as w/ extended	#
-# precision. Then, bindec() is called to create the appropriate		#
-# packed result.							#
-#	If at any time an access error is flagged by one of the move-	#
-# to-memory routines, then a special exit must be made so that the	#
-# access error can be handled properly.					#
-#									#
-#########################################################################
-
-	global		fout
-fout:
-	bfextu		EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
-	mov.w		(tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
-	jmp		(tbl_fout.b,%pc,%a1)	# jump to routine
-
-	swbeg		&0x8
-tbl_fout:
-	short		fout_long	-	tbl_fout
-	short		fout_sgl	-	tbl_fout
-	short		fout_ext	-	tbl_fout
-	short		fout_pack	-	tbl_fout
-	short		fout_word	-	tbl_fout
-	short		fout_dbl	-	tbl_fout
-	short		fout_byte	-	tbl_fout
-	short		fout_pack	-	tbl_fout
-
-#################################################################
-# fmove.b out ###################################################
-#################################################################
-
-# Only "Unimplemented Data Type" exceptions enter here. The operand
-# is either a DENORM or a NORM.
-fout_byte:
-	tst.b		STAG(%a6)		# is operand normalized?
-	bne.b		fout_byte_denorm	# no
-
-	fmovm.x		SRC(%a0),&0x80		# load value
-
-fout_byte_norm:
-	fmov.l		%d0,%fpcr		# insert rnd prec,mode
-
-	fmov.b		%fp0,%d0		# exec move out w/ correct rnd mode
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# fetch FPSR
-	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
-
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
-	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
-	beq.b		fout_byte_dn		# must save to integer regfile
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-	bsr.l		_dmem_write_byte	# write byte
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_b		# yes
-
-	rts
-
-fout_byte_dn:
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
-	andi.w		&0x7,%d1
-	bsr.l		store_dreg_b
-	rts
-
-fout_byte_denorm:
-	mov.l		SRC_EX(%a0),%d1
-	andi.l		&0x80000000,%d1		# keep DENORM sign
-	ori.l		&0x00800000,%d1		# make smallest sgl
-	fmov.s		%d1,%fp0
-	bra.b		fout_byte_norm
-
-#################################################################
-# fmove.w out ###################################################
-#################################################################
-
-# Only "Unimplemented Data Type" exceptions enter here. The operand
-# is either a DENORM or a NORM.
-fout_word:
-	tst.b		STAG(%a6)		# is operand normalized?
-	bne.b		fout_word_denorm	# no
-
-	fmovm.x		SRC(%a0),&0x80		# load value
-
-fout_word_norm:
-	fmov.l		%d0,%fpcr		# insert rnd prec:mode
-
-	fmov.w		%fp0,%d0		# exec move out w/ correct rnd mode
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# fetch FPSR
-	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
-
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
-	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
-	beq.b		fout_word_dn		# must save to integer regfile
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-	bsr.l		_dmem_write_word	# write word
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_w		# yes
-
-	rts
-
-fout_word_dn:
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
-	andi.w		&0x7,%d1
-	bsr.l		store_dreg_w
-	rts
-
-fout_word_denorm:
-	mov.l		SRC_EX(%a0),%d1
-	andi.l		&0x80000000,%d1		# keep DENORM sign
-	ori.l		&0x00800000,%d1		# make smallest sgl
-	fmov.s		%d1,%fp0
-	bra.b		fout_word_norm
-
-#################################################################
-# fmove.l out ###################################################
-#################################################################
-
-# Only "Unimplemented Data Type" exceptions enter here. The operand
-# is either a DENORM or a NORM.
-fout_long:
-	tst.b		STAG(%a6)		# is operand normalized?
-	bne.b		fout_long_denorm	# no
-
-	fmovm.x		SRC(%a0),&0x80		# load value
-
-fout_long_norm:
-	fmov.l		%d0,%fpcr		# insert rnd prec:mode
-
-	fmov.l		%fp0,%d0		# exec move out w/ correct rnd mode
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# fetch FPSR
-	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
-
-fout_long_write:
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
-	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
-	beq.b		fout_long_dn		# must save to integer regfile
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-	bsr.l		_dmem_write_long	# write long
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_l		# yes
-
-	rts
-
-fout_long_dn:
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
-	andi.w		&0x7,%d1
-	bsr.l		store_dreg_l
-	rts
-
-fout_long_denorm:
-	mov.l		SRC_EX(%a0),%d1
-	andi.l		&0x80000000,%d1		# keep DENORM sign
-	ori.l		&0x00800000,%d1		# make smallest sgl
-	fmov.s		%d1,%fp0
-	bra.b		fout_long_norm
-
-#################################################################
-# fmove.x out ###################################################
-#################################################################
-
-# Only "Unimplemented Data Type" exceptions enter here. The operand
-# is either a DENORM or a NORM.
-# The DENORM causes an Underflow exception.
-fout_ext:
-
-# we copy the extended precision result to FP_SCR0 so that the reserved
-# 16-bit field gets zeroed. we do this since we promise not to disturb
-# what's at SRC(a0).
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	clr.w		2+FP_SCR0_EX(%a6)	# clear reserved field
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	fmovm.x		SRC(%a0),&0x80		# return result
-
-	bsr.l		_calc_ea_fout		# fix stacked <ea>
-
-	mov.l		%a0,%a1			# pass: dst addr
-	lea		FP_SCR0(%a6),%a0	# pass: src addr
-	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
-
-# we must not yet write the extended precision data to the stack
-# in the pre-decrement case from supervisor mode or else we'll corrupt
-# the stack frame. so, leave it in FP_SRC for now and deal with it later...
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
-	beq.b		fout_ext_a7
-
-	bsr.l		_dmem_write		# write ext prec number to memory
-
-	tst.l		%d1			# did dstore fail?
-	bne.w		fout_ext_err		# yes
-
-	tst.b		STAG(%a6)		# is operand normalized?
-	bne.b		fout_ext_denorm		# no
-	rts
-
-# the number is a DENORM. must set the underflow exception bit
-fout_ext_denorm:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
-
-	mov.b		FPCR_ENABLE(%a6),%d0
-	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
-	bne.b		fout_ext_exc		# yes
-	rts
-
-# we don't want to do the write if the exception occurred in supervisor mode
-# so _mem_write2() handles this for us.
-fout_ext_a7:
-	bsr.l		_mem_write2		# write ext prec number to memory
-
-	tst.l		%d1			# did dstore fail?
-	bne.w		fout_ext_err		# yes
-
-	tst.b		STAG(%a6)		# is operand normalized?
-	bne.b		fout_ext_denorm		# no
-	rts
-
-fout_ext_exc:
-	lea		FP_SCR0(%a6),%a0
-	bsr.l		norm			# normalize the mantissa
-	neg.w		%d0			# new exp = -(shft amt)
-	andi.w		&0x7fff,%d0
-	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep only old sign
-	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	rts
-
-fout_ext_err:
-	mov.l		EXC_A6(%a6),(%a6)	# fix stacked a6
-	bra.l		facc_out_x
-
-#########################################################################
-# fmove.s out ###########################################################
-#########################################################################
-fout_sgl:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl prec
-	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
-
-#
-# operand is a normalized number. first, we check to see if the move out
-# would cause either an underflow or overflow. these cases are handled
-# separately. otherwise, set the FPCR to the proper rounding mode and
-# execute the move.
-#
-	mov.w		SRC_EX(%a0),%d0		# extract exponent
-	andi.w		&0x7fff,%d0		# strip sign
-
-	cmpi.w		%d0,&SGL_HI		# will operand overflow?
-	bgt.w		fout_sgl_ovfl		# yes; go handle OVFL
-	beq.w		fout_sgl_may_ovfl	# maybe; go handle possible OVFL
-	cmpi.w		%d0,&SGL_LO		# will operand underflow?
-	blt.w		fout_sgl_unfl		# yes; go handle underflow
-
-#
-# NORMs(in range) can be stored out by a simple "fmov.s"
-# Unnormalized inputs can come through this point.
-#
-fout_sgl_exg:
-	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmov.s		%fp0,%d0		# store does convert and round
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# save FPSR
-
-	or.w		%d1,2+USER_FPSR(%a6)	# set possible inex2/ainex
-
-fout_sgl_exg_write:
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
-	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
-	beq.b		fout_sgl_exg_write_dn	# must save to integer regfile
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-	bsr.l		_dmem_write_long	# write long
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_l		# yes
-
-	rts
-
-fout_sgl_exg_write_dn:
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
-	andi.w		&0x7,%d1
-	bsr.l		store_dreg_l
-	rts
-
-#
-# here, we know that the operand would UNFL if moved out to single prec,
-# so, denorm and round and then use generic store single routine to
-# write the value to memory.
-#
-fout_sgl_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	mov.l		%a0,-(%sp)
-
-	clr.l		%d0			# pass: S.F. = 0
-
-	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
-	bne.b		fout_sgl_unfl_cont	# let DENORMs fall through
-
-	lea		FP_SCR0(%a6),%a0
-	bsr.l		norm			# normalize the DENORM
-
-fout_sgl_unfl_cont:
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calc default underflow result
-
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
-	bsr.l		dst_sgl			# convert to single prec
-
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
-	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
-	beq.b		fout_sgl_unfl_dn	# must save to integer regfile
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-	bsr.l		_dmem_write_long	# write long
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_l		# yes
-
-	bra.b		fout_sgl_unfl_chkexc
-
-fout_sgl_unfl_dn:
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
-	andi.w		&0x7,%d1
-	bsr.l		store_dreg_l
-
-fout_sgl_unfl_chkexc:
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
-	bne.w		fout_sd_exc_unfl	# yes
-	addq.l		&0x4,%sp
-	rts
-
-#
-# it's definitely an overflow so call ovf_res to get the correct answer
-#
-fout_sgl_ovfl:
-	tst.b		3+SRC_HI(%a0)		# is result inexact?
-	bne.b		fout_sgl_ovfl_inex2
-	tst.l		SRC_LO(%a0)		# is result inexact?
-	bne.b		fout_sgl_ovfl_inex2
-	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
-	bra.b		fout_sgl_ovfl_cont
-fout_sgl_ovfl_inex2:
-	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
-
-fout_sgl_ovfl_cont:
-	mov.l		%a0,-(%sp)
-
-# call ovf_res() w/ sgl prec and the correct rnd mode to create the default
-# overflow result. DON'T save the returned ccodes from ovf_res() since
-# fmove out doesn't alter them.
-	tst.b		SRC_EX(%a0)		# is operand negative?
-	smi		%d1			# set if so
-	mov.l		L_SCR3(%a6),%d0		# pass: sgl prec,rnd mode
-	bsr.l		ovf_res			# calc OVFL result
-	fmovm.x		(%a0),&0x80		# load default overflow result
-	fmov.s		%fp0,%d0		# store to single
-
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
-	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
-	beq.b		fout_sgl_ovfl_dn	# must save to integer regfile
-
-	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
-	bsr.l		_dmem_write_long	# write long
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_l		# yes
-
-	bra.b		fout_sgl_ovfl_chkexc
-
-fout_sgl_ovfl_dn:
-	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
-	andi.w		&0x7,%d1
-	bsr.l		store_dreg_l
-
-fout_sgl_ovfl_chkexc:
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
-	bne.w		fout_sd_exc_ovfl	# yes
-	addq.l		&0x4,%sp
-	rts
-
-#
-# move out MAY overflow:
-# (1) force the exp to 0x3fff
-# (2) do a move w/ appropriate rnd mode
-# (3) if exp still equals zero, then insert original exponent
-#	for the correct result.
-#     if exp now equals one, then it overflowed so call ovf_res.
-#
-fout_sgl_may_ovfl:
-	mov.w		SRC_EX(%a0),%d1		# fetch current sign
-	andi.w		&0x8000,%d1		# keep it,clear exp
-	ori.w		&0x3fff,%d1		# insert exp = 0
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fabs.x		%fp0			# need absolute value
-	fcmp.b		%fp0,&0x2		# did exponent increase?
-	fblt.w		fout_sgl_exg		# no; go finish NORM
-	bra.w		fout_sgl_ovfl		# yes; go handle overflow
-
-################
-
-fout_sd_exc_unfl:
-	mov.l		(%sp)+,%a0
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	cmpi.b		STAG(%a6),&DENORM	# was src a DENORM?
-	bne.b		fout_sd_exc_cont	# no
-
-	lea		FP_SCR0(%a6),%a0
-	bsr.l		norm
-	neg.l		%d0
-	andi.w		&0x7fff,%d0
-	bfins		%d0,FP_SCR0_EX(%a6){&1:&15}
-	bra.b		fout_sd_exc_cont
-
-fout_sd_exc:
-fout_sd_exc_ovfl:
-	mov.l		(%sp)+,%a0		# restore a0
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-fout_sd_exc_cont:
-	bclr		&0x7,FP_SCR0_EX(%a6)	# clear sign bit
-	sne.b		2+FP_SCR0_EX(%a6)	# set internal sign bit
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to DENORM
-
-	mov.b		3+L_SCR3(%a6),%d1
-	lsr.b		&0x4,%d1
-	andi.w		&0x0c,%d1
-	swap		%d1
-	mov.b		3+L_SCR3(%a6),%d1
-	lsr.b		&0x4,%d1
-	andi.w		&0x03,%d1
-	clr.l		%d0			# pass: zero g,r,s
-	bsr.l		_round			# round the DENORM
-
-	tst.b		2+FP_SCR0_EX(%a6)	# is EXOP negative?
-	beq.b		fout_sd_exc_done	# no
-	bset		&0x7,FP_SCR0_EX(%a6)	# yes
-
-fout_sd_exc_done:
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	rts
-
-#################################################################
-# fmove.d out ###################################################
-#################################################################
-fout_dbl:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&d_mode*0x10,%d0	# insert dbl prec
-	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
-
-#
-# operand is a normalized number. first, we check to see if the move out
-# would cause either an underflow or overflow. these cases are handled
-# separately. otherwise, set the FPCR to the proper rounding mode and
-# execute the move.
-#
-	mov.w		SRC_EX(%a0),%d0		# extract exponent
-	andi.w		&0x7fff,%d0		# strip sign
-
-	cmpi.w		%d0,&DBL_HI		# will operand overflow?
-	bgt.w		fout_dbl_ovfl		# yes; go handle OVFL
-	beq.w		fout_dbl_may_ovfl	# maybe; go handle possible OVFL
-	cmpi.w		%d0,&DBL_LO		# will operand underflow?
-	blt.w		fout_dbl_unfl		# yes; go handle underflow
-
-#
-# NORMs(in range) can be stored out by a simple "fmov.d"
-# Unnormalized inputs can come through this point.
-#
-fout_dbl_exg:
-	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmov.d		%fp0,L_SCR1(%a6)	# store does convert and round
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d0		# save FPSR
-
-	or.w		%d0,2+USER_FPSR(%a6)	# set possible inex2/ainex
-
-	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
-	lea		L_SCR1(%a6),%a0		# pass: src addr
-	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
-	bsr.l		_dmem_write		# store dbl fop to memory
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_d		# yes
-
-	rts					# no; so we're finished
-
-#
-# here, we know that the operand would UNFL if moved out to double prec,
-# so, denorm and round and then use generic store double routine to
-# write the value to memory.
-#
-fout_dbl_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	mov.l		%a0,-(%sp)
-
-	clr.l		%d0			# pass: S.F. = 0
-
-	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
-	bne.b		fout_dbl_unfl_cont	# let DENORMs fall through
-
-	lea		FP_SCR0(%a6),%a0
-	bsr.l		norm			# normalize the DENORM
-
-fout_dbl_unfl_cont:
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calc default underflow result
-
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
-	bsr.l		dst_dbl			# convert to single prec
-	mov.l		%d0,L_SCR1(%a6)
-	mov.l		%d1,L_SCR2(%a6)
-
-	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
-	lea		L_SCR1(%a6),%a0		# pass: src addr
-	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
-	bsr.l		_dmem_write		# store dbl fop to memory
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_d		# yes
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
-	bne.w		fout_sd_exc_unfl	# yes
-	addq.l		&0x4,%sp
-	rts
-
-#
-# it's definitely an overflow so call ovf_res to get the correct answer
-#
-fout_dbl_ovfl:
-	mov.w		2+SRC_LO(%a0),%d0
-	andi.w		&0x7ff,%d0
-	bne.b		fout_dbl_ovfl_inex2
-
-	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
-	bra.b		fout_dbl_ovfl_cont
-fout_dbl_ovfl_inex2:
-	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
-
-fout_dbl_ovfl_cont:
-	mov.l		%a0,-(%sp)
-
-# call ovf_res() w/ dbl prec and the correct rnd mode to create the default
-# overflow result. DON'T save the returned ccodes from ovf_res() since
-# fmove out doesn't alter them.
-	tst.b		SRC_EX(%a0)		# is operand negative?
-	smi		%d1			# set if so
-	mov.l		L_SCR3(%a6),%d0		# pass: dbl prec,rnd mode
-	bsr.l		ovf_res			# calc OVFL result
-	fmovm.x		(%a0),&0x80		# load default overflow result
-	fmov.d		%fp0,L_SCR1(%a6)	# store to double
-
-	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
-	lea		L_SCR1(%a6),%a0		# pass: src addr
-	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
-	bsr.l		_dmem_write		# store dbl fop to memory
-
-	tst.l		%d1			# did dstore fail?
-	bne.l		facc_out_d		# yes
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
-	bne.w		fout_sd_exc_ovfl	# yes
-	addq.l		&0x4,%sp
-	rts
-
-#
-# move out MAY overflow:
-# (1) force the exp to 0x3fff
-# (2) do a move w/ appropriate rnd mode
-# (3) if exp still equals zero, then insert original exponent
-#	for the correct result.
-#     if exp now equals one, then it overflowed so call ovf_res.
-#
-fout_dbl_may_ovfl:
-	mov.w		SRC_EX(%a0),%d1		# fetch current sign
-	andi.w		&0x8000,%d1		# keep it,clear exp
-	ori.w		&0x3fff,%d1		# insert exp = 0
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fabs.x		%fp0			# need absolute value
-	fcmp.b		%fp0,&0x2		# did exponent increase?
-	fblt.w		fout_dbl_exg		# no; go finish NORM
-	bra.w		fout_dbl_ovfl		# yes; go handle overflow
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	dst_dbl(): create double precision value from extended prec.	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to source operand in extended precision		#
-#									#
-# OUTPUT **************************************************************	#
-#	d0 = hi(double precision result)				#
-#	d1 = lo(double precision result)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#  Changes extended precision to double precision.			#
-#  Note: no attempt is made to round the extended value to double.	#
-#	dbl_sign = ext_sign						#
-#	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)		#
-#	get rid of ext integer bit					#
-#	dbl_mant = ext_mant{62:12}					#
-#									#
-#		---------------   ---------------    ---------------	#
-#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
-#		---------------   ---------------    ---------------	#
-#		 95	    64    63 62	      32      31     11	  0	#
-#				     |			     |		#
-#				     |			     |		#
-#				     |			     |		#
-#			             v			     v		#
-#			      ---------------   ---------------		#
-#  double   ->		      |s|exp| mant  |   |  mant       |		#
-#			      ---------------   ---------------		#
-#			      63     51   32   31	       0	#
-#									#
-#########################################################################
-
-dst_dbl:
-	clr.l		%d0			# clear d0
-	mov.w		FTEMP_EX(%a0),%d0	# get exponent
-	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
-	addi.w		&DBL_BIAS,%d0		# add double precision bias
-	tst.b		FTEMP_HI(%a0)		# is number a denorm?
-	bmi.b		dst_get_dupper		# no
-	subq.w		&0x1,%d0		# yes; denorm bias = DBL_BIAS - 1
-dst_get_dupper:
-	swap		%d0			# d0 now in upper word
-	lsl.l		&0x4,%d0		# d0 in proper place for dbl prec exp
-	tst.b		FTEMP_EX(%a0)		# test sign
-	bpl.b		dst_get_dman		# if positive, go process mantissa
-	bset		&0x1f,%d0		# if negative, set sign
-dst_get_dman:
-	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
-	bfextu		%d1{&1:&20},%d1		# get upper 20 bits of ms
-	or.l		%d1,%d0			# put these bits in ms word of double
-	mov.l		%d0,L_SCR1(%a6)		# put the new exp back on the stack
-	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
-	mov.l		&21,%d0			# load shift count
-	lsl.l		%d0,%d1			# put lower 11 bits in upper bits
-	mov.l		%d1,L_SCR2(%a6)		# build lower lword in memory
-	mov.l		FTEMP_LO(%a0),%d1	# get ls mantissa
-	bfextu		%d1{&0:&21},%d0		# get ls 21 bits of double
-	mov.l		L_SCR2(%a6),%d1
-	or.l		%d0,%d1			# put them in double result
-	mov.l		L_SCR1(%a6),%d0
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	dst_sgl(): create single precision value from extended prec	#
-#									#
-# XREF ****************************************************************	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to source operand in extended precision		#
-#									#
-# OUTPUT **************************************************************	#
-#	d0 = single precision result					#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-# Changes extended precision to single precision.			#
-#	sgl_sign = ext_sign						#
-#	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)		#
-#	get rid of ext integer bit					#
-#	sgl_mant = ext_mant{62:12}					#
-#									#
-#		---------------   ---------------    ---------------	#
-#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
-#		---------------   ---------------    ---------------	#
-#		 95	    64    63 62	   40 32      31     12	  0	#
-#				     |	   |				#
-#				     |	   |				#
-#				     |	   |				#
-#			             v     v				#
-#			      ---------------				#
-#  single   ->		      |s|exp| mant  |				#
-#			      ---------------				#
-#			      31     22     0				#
-#									#
-#########################################################################
-
-dst_sgl:
-	clr.l		%d0
-	mov.w		FTEMP_EX(%a0),%d0	# get exponent
-	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
-	addi.w		&SGL_BIAS,%d0		# add single precision bias
-	tst.b		FTEMP_HI(%a0)		# is number a denorm?
-	bmi.b		dst_get_supper		# no
-	subq.w		&0x1,%d0		# yes; denorm bias = SGL_BIAS - 1
-dst_get_supper:
-	swap		%d0			# put exp in upper word of d0
-	lsl.l		&0x7,%d0		# shift it into single exp bits
-	tst.b		FTEMP_EX(%a0)		# test sign
-	bpl.b		dst_get_sman		# if positive, continue
-	bset		&0x1f,%d0		# if negative, put in sign first
-dst_get_sman:
-	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
-	andi.l		&0x7fffff00,%d1		# get upper 23 bits of ms
-	lsr.l		&0x8,%d1		# and put them flush right
-	or.l		%d1,%d0			# put these bits in ms word of single
-	rts
-
-##############################################################################
-fout_pack:
-	bsr.l		_calc_ea_fout		# fetch the <ea>
-	mov.l		%a0,-(%sp)
-
-	mov.b		STAG(%a6),%d0		# fetch input type
-	bne.w		fout_pack_not_norm	# input is not NORM
-
-fout_pack_norm:
-	btst		&0x4,EXC_CMDREG(%a6)	# static or dynamic?
-	beq.b		fout_pack_s		# static
-
-fout_pack_d:
-	mov.b		1+EXC_CMDREG(%a6),%d1	# fetch dynamic reg
-	lsr.b		&0x4,%d1
-	andi.w		&0x7,%d1
-
-	bsr.l		fetch_dreg		# fetch Dn w/ k-factor
-
-	bra.b		fout_pack_type
-fout_pack_s:
-	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch static field
-
-fout_pack_type:
-	bfexts		%d0{&25:&7},%d0		# extract k-factor
-	mov.l	%d0,-(%sp)
-
-	lea		FP_SRC(%a6),%a0		# pass: ptr to input
-
-# bindec is currently scrambling FP_SRC for denorm inputs.
-# we'll have to change this, but for now, tough luck!!!
-	bsr.l		bindec			# convert xprec to packed
-
-#	andi.l		&0xcfff000f,FP_SCR0(%a6) # clear unused fields
-	andi.l		&0xcffff00f,FP_SCR0(%a6) # clear unused fields
-
-	mov.l	(%sp)+,%d0
-
-	tst.b		3+FP_SCR0_EX(%a6)
-	bne.b		fout_pack_set
-	tst.l		FP_SCR0_HI(%a6)
-	bne.b		fout_pack_set
-	tst.l		FP_SCR0_LO(%a6)
-	bne.b		fout_pack_set
-
-# add the extra condition that only if the k-factor was zero, too, should
-# we zero the exponent
-	tst.l		%d0
-	bne.b		fout_pack_set
-# "mantissa" is all zero which means that the answer is zero. but, the '040
-# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
-# if the mantissa is zero, I will zero the exponent, too.
-# the question now is whether the exponents sign bit is allowed to be non-zero
-# for a zero, also...
-	andi.w		&0xf000,FP_SCR0(%a6)
-
-fout_pack_set:
-
-	lea		FP_SCR0(%a6),%a0	# pass: src addr
-
-fout_pack_write:
-	mov.l		(%sp)+,%a1		# pass: dst addr
-	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
-
-	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
-	beq.b		fout_pack_a7
-
-	bsr.l		_dmem_write		# write ext prec number to memory
-
-	tst.l		%d1			# did dstore fail?
-	bne.w		fout_ext_err		# yes
-
-	rts
-
-# we don't want to do the write if the exception occurred in supervisor mode
-# so _mem_write2() handles this for us.
-fout_pack_a7:
-	bsr.l		_mem_write2		# write ext prec number to memory
-
-	tst.l		%d1			# did dstore fail?
-	bne.w		fout_ext_err		# yes
-
-	rts
-
-fout_pack_not_norm:
-	cmpi.b		%d0,&DENORM		# is it a DENORM?
-	beq.w		fout_pack_norm		# yes
-	lea		FP_SRC(%a6),%a0
-	clr.w		2+FP_SRC_EX(%a6)
-	cmpi.b		%d0,&SNAN		# is it an SNAN?
-	beq.b		fout_pack_snan		# yes
-	bra.b		fout_pack_write		# no
-
-fout_pack_snan:
-	ori.w		&snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
-	bset		&0x6,FP_SRC_HI(%a6)	# set snan bit
-	bra.b		fout_pack_write
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fmul(): emulates the fmul instruction				#
-#	fsmul(): emulates the fsmul instruction				#
-#	fdmul(): emulates the fdmul instruction				#
-#									#
-# XREF ****************************************************************	#
-#	scale_to_zero_src() - scale src exponent to zero		#
-#	scale_to_zero_dst() - scale dst exponent to zero		#
-#	unf_res() - return default underflow result			#
-#	ovf_res() - return default overflow result			#
-#	res_qnan() - return QNAN result					#
-#	res_snan() - return SNAN result					#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	a1 = pointer to extended precision destination operand		#
-#	d0  rnd prec,mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms/denorms into ext/sgl/dbl precision.				#
-#	For norms/denorms, scale the exponents such that a multiply	#
-# instruction won't cause an exception. Use the regular fmul to		#
-# compute a result. Check if the regular operands would have taken	#
-# an exception. If so, return the default overflow/underflow result	#
-# and return the EXOP if exceptions are enabled. Else, scale the	#
-# result operand to the proper exponent.				#
-#									#
-#########################################################################
-
-	align		0x10
-tbl_fmul_ovfl:
-	long		0x3fff - 0x7ffe		# ext_max
-	long		0x3fff - 0x407e		# sgl_max
-	long		0x3fff - 0x43fe		# dbl_max
-tbl_fmul_unfl:
-	long		0x3fff + 0x0001		# ext_unfl
-	long		0x3fff - 0x3f80		# sgl_unfl
-	long		0x3fff - 0x3c00		# dbl_unfl
-
-	global		fsmul
-fsmul:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl prec
-	bra.b		fmul
-
-	global		fdmul
-fdmul:
-	andi.b		&0x30,%d0
-	ori.b		&d_mode*0x10,%d0	# insert dbl prec
-
-	global		fmul
-fmul:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-
-	clr.w		%d1
-	mov.b		DTAG(%a6),%d1
-	lsl.b		&0x3,%d1
-	or.b		STAG(%a6),%d1		# combine src tags
-	bne.w		fmul_not_norm		# optimize on non-norm input
-
-fmul_norm:
-	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
-	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	bsr.l		scale_to_zero_src	# scale src exponent
-	mov.l		%d0,-(%sp)		# save scale factor 1
-
-	bsr.l		scale_to_zero_dst	# scale dst exponent
-
-	add.l		%d0,(%sp)		# SCALE_FACTOR = scale1 + scale2
-
-	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
-	lsr.b		&0x6,%d1		# shift to lo bits
-	mov.l		(%sp)+,%d0		# load S.F.
-	cmp.l		%d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
-	beq.w		fmul_may_ovfl		# result may rnd to overflow
-	blt.w		fmul_ovfl		# result will overflow
-
-	cmp.l		%d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
-	beq.w		fmul_may_unfl		# result may rnd to no unfl
-	bgt.w		fmul_unfl		# result will underflow
-
-#
-# NORMAL:
-# - the result of the multiply operation will neither overflow nor underflow.
-# - do the multiply to the proper precision and rounding mode.
-# - scale the result exponent using the scale factor. if both operands were
-# normalized then we really don't need to go through this scaling. but for now,
-# this will do.
-#
-fmul_normal:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fmul_normal_exit:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# OVERFLOW:
-# - the result of the multiply operation is an overflow.
-# - do the multiply to the proper precision and rounding mode in order to
-# set the inexact bits.
-# - calculate the default result and return it in fp0.
-# - if overflow or inexact is enabled, we need a multiply result rounded to
-# extended precision. if the original operation was extended, then we have this
-# result. if the original operation was single or double, we have to do another
-# multiply using extended precision and the correct rounding mode. the result
-# of this operation then has its exponent scaled by -0x6000 to create the
-# exceptional operand.
-#
-fmul_ovfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-# save setting this until now because this is where fmul_may_ovfl may jump in
-fmul_ovfl_tst:
-	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fmul_ovfl_ena		# yes
-
-# calculate the default result
-fmul_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass rnd prec,mode
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	rts
-
-#
-# OVFL is enabled; Create EXOP:
-# - if precision is extended, then we have the EXOP. simply bias the exponent
-# with an extra -0x6000. if the precision is single or double, we need to
-# calculate a result rounded to extended precision.
-#
-fmul_ovfl_ena:
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# test the rnd prec
-	bne.b		fmul_ovfl_ena_sd	# it's sgl or dbl
-
-fmul_ovfl_ena_cont:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.w		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# add scale factor
-	subi.l		&0x6000,%d1		# subtract bias
-	andi.w		&0x7fff,%d1		# clear sign bit
-	andi.w		&0x8000,%d2		# keep old sign
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.b		fmul_ovfl_dis
-
-fmul_ovfl_ena_sd:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# keep rnd mode only
-	fmov.l		%d1,%fpcr		# set FPCR
-
-	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	bra.b		fmul_ovfl_ena_cont
-
-#
-# may OVERFLOW:
-# - the result of the multiply operation MAY overflow.
-# - do the multiply to the proper precision and rounding mode in order to
-# set the inexact bits.
-# - calculate the default result and return it in fp0.
-#
-fmul_may_ovfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
-	fbge.w		fmul_ovfl_tst		# yes; overflow has occurred
-
-# no, it didn't overflow; we have correct result
-	bra.w		fmul_normal_exit
-
-#
-# UNDERFLOW:
-# - the result of the multiply operation is an underflow.
-# - do the multiply to the proper precision and rounding mode in order to
-# set the inexact bits.
-# - calculate the default result and return it in fp0.
-# - if overflow or inexact is enabled, we need a multiply result rounded to
-# extended precision. if the original operation was extended, then we have this
-# result. if the original operation was single or double, we have to do another
-# multiply using extended precision and the correct rounding mode. the result
-# of this operation then has its exponent scaled by -0x6000 to create the
-# exceptional operand.
-#
-fmul_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-# for fun, let's use only extended precision, round to zero. then, let
-# the unf_res() routine figure out all the rest.
-# will we get the correct answer.
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
-
-	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fmul_unfl_ena		# yes
-
-fmul_unfl_dis:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# unf_res2 may have set 'Z'
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# UNFL is enabled.
-#
-fmul_unfl_ena:
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# is precision extended?
-	bne.b		fmul_unfl_ena_sd	# no, sgl or dbl
-
-# if the rnd mode is anything but RZ, then we have to re-do the above
-# multiplication because we used RZ for all.
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-fmul_unfl_ena_cont:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	addi.l		&0x6000,%d1		# add bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.w		fmul_unfl_dis
-
-fmul_unfl_ena_sd:
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# use only rnd mode
-	fmov.l		%d1,%fpcr		# set FPCR
-
-	bra.b		fmul_unfl_ena_cont
-
-# MAY UNDERFLOW:
-# -use the correct rounding mode and precision. this code favors operations
-# that do not underflow.
-fmul_may_unfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
-	fbgt.w		fmul_normal_exit	# no; no underflow occurred
-	fblt.w		fmul_unfl		# yes; underflow occurred
-
-#
-# we still don't know if underflow occurred. result is ~ equal to 2. but,
-# we don't know if the result was an underflow that rounded up to a 2 or
-# a normalized number that rounded down to a 2. so, redo the entire operation
-# using RZ as the rounding mode to see what the pre-rounded result is.
-# this case should be relatively rare.
-#
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst operand
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# keep rnd prec
-	ori.b		&rz_mode*0x10,%d1	# insert RZ
-
-	fmov.l		%d1,%fpcr		# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fabs.x		%fp1			# make absolute value
-	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
-	fbge.w		fmul_normal_exit	# no; no underflow occurred
-	bra.w		fmul_unfl		# yes, underflow occurred
-
-################################################################################
-
-#
-# Multiply: inputs are not both normalized; what are they?
-#
-fmul_not_norm:
-	mov.w		(tbl_fmul_op.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_fmul_op.b,%pc,%d1.w)
-
-	swbeg		&48
-tbl_fmul_op:
-	short		fmul_norm	- tbl_fmul_op # NORM x NORM
-	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
-	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
-	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
-	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
-	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
-	short		tbl_fmul_op	- tbl_fmul_op #
-	short		tbl_fmul_op	- tbl_fmul_op #
-
-	short		fmul_zero	- tbl_fmul_op # ZERO x NORM
-	short		fmul_zero	- tbl_fmul_op # ZERO x ZERO
-	short		fmul_res_operr	- tbl_fmul_op # ZERO x INF
-	short		fmul_res_qnan	- tbl_fmul_op # ZERO x QNAN
-	short		fmul_zero	- tbl_fmul_op # ZERO x DENORM
-	short		fmul_res_snan	- tbl_fmul_op # ZERO x SNAN
-	short		tbl_fmul_op	- tbl_fmul_op #
-	short		tbl_fmul_op	- tbl_fmul_op #
-
-	short		fmul_inf_dst	- tbl_fmul_op # INF x NORM
-	short		fmul_res_operr	- tbl_fmul_op # INF x ZERO
-	short		fmul_inf_dst	- tbl_fmul_op # INF x INF
-	short		fmul_res_qnan	- tbl_fmul_op # INF x QNAN
-	short		fmul_inf_dst	- tbl_fmul_op # INF x DENORM
-	short		fmul_res_snan	- tbl_fmul_op # INF x SNAN
-	short		tbl_fmul_op	- tbl_fmul_op #
-	short		tbl_fmul_op	- tbl_fmul_op #
-
-	short		fmul_res_qnan	- tbl_fmul_op # QNAN x NORM
-	short		fmul_res_qnan	- tbl_fmul_op # QNAN x ZERO
-	short		fmul_res_qnan	- tbl_fmul_op # QNAN x INF
-	short		fmul_res_qnan	- tbl_fmul_op # QNAN x QNAN
-	short		fmul_res_qnan	- tbl_fmul_op # QNAN x DENORM
-	short		fmul_res_snan	- tbl_fmul_op # QNAN x SNAN
-	short		tbl_fmul_op	- tbl_fmul_op #
-	short		tbl_fmul_op	- tbl_fmul_op #
-
-	short		fmul_norm	- tbl_fmul_op # NORM x NORM
-	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
-	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
-	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
-	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
-	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
-	short		tbl_fmul_op	- tbl_fmul_op #
-	short		tbl_fmul_op	- tbl_fmul_op #
-
-	short		fmul_res_snan	- tbl_fmul_op # SNAN x NORM
-	short		fmul_res_snan	- tbl_fmul_op # SNAN x ZERO
-	short		fmul_res_snan	- tbl_fmul_op # SNAN x INF
-	short		fmul_res_snan	- tbl_fmul_op # SNAN x QNAN
-	short		fmul_res_snan	- tbl_fmul_op # SNAN x DENORM
-	short		fmul_res_snan	- tbl_fmul_op # SNAN x SNAN
-	short		tbl_fmul_op	- tbl_fmul_op #
-	short		tbl_fmul_op	- tbl_fmul_op #
-
-fmul_res_operr:
-	bra.l		res_operr
-fmul_res_snan:
-	bra.l		res_snan
-fmul_res_qnan:
-	bra.l		res_qnan
-
-#
-# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
-#
-	global		fmul_zero		# global for fsglmul
-fmul_zero:
-	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d0,%d1
-	bpl.b		fmul_zero_p		# result ZERO is pos.
-fmul_zero_n:
-	fmov.s		&0x80000000,%fp0	# load -ZERO
-	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
-	rts
-fmul_zero_p:
-	fmov.s		&0x00000000,%fp0	# load +ZERO
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
-	rts
-
-#
-# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
-#
-# Note: The j-bit for an infinity is a don't-care. However, to be
-# strictly compatible w/ the 68881/882, we make sure to return an
-# INF w/ the j-bit set if the input INF j-bit was set. Destination
-# INFs take priority.
-#
-	global		fmul_inf_dst		# global for fsglmul
-fmul_inf_dst:
-	fmovm.x		DST(%a1),&0x80		# return INF result in fp0
-	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d0,%d1
-	bpl.b		fmul_inf_dst_p		# result INF is pos.
-fmul_inf_dst_n:
-	fabs.x		%fp0			# clear result sign
-	fneg.x		%fp0			# set result sign
-	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
-	rts
-fmul_inf_dst_p:
-	fabs.x		%fp0			# clear result sign
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
-	rts
-
-	global		fmul_inf_src		# global for fsglmul
-fmul_inf_src:
-	fmovm.x		SRC(%a0),&0x80		# return INF result in fp0
-	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d0,%d1
-	bpl.b		fmul_inf_dst_p		# result INF is pos.
-	bra.b		fmul_inf_dst_n
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fin(): emulates the fmove instruction				#
-#	fsin(): emulates the fsmove instruction				#
-#	fdin(): emulates the fdmove instruction				#
-#									#
-# XREF ****************************************************************	#
-#	norm() - normalize mantissa for EXOP on denorm			#
-#	scale_to_zero_src() - scale src exponent to zero		#
-#	ovf_res() - return default overflow result			#
-#	unf_res() - return default underflow result			#
-#	res_qnan_1op() - return QNAN result				#
-#	res_snan_1op() - return SNAN result				#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	d0 = round prec/mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms into extended, single, and double precision.			#
-#	Norms can be emulated w/ a regular fmove instruction. For	#
-# sgl/dbl, must scale exponent and perform an "fmove". Check to see	#
-# if the result would have overflowed/underflowed. If so, use unf_res()	#
-# or ovf_res() to return the default result. Also return EXOP if	#
-# exception is enabled. If no exception, return the default result.	#
-#	Unnorms don't pass through here.				#
-#									#
-#########################################################################
-
-	global		fsin
-fsin:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl precision
-	bra.b		fin
-
-	global		fdin
-fdin:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&d_mode*0x10,%d0	# insert dbl precision
-
-	global		fin
-fin:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-
-	mov.b		STAG(%a6),%d1		# fetch src optype tag
-	bne.w		fin_not_norm		# optimize on non-norm input
-
-#
-# FP MOVE IN: NORMs and DENORMs ONLY!
-#
-fin_norm:
-	andi.b		&0xc0,%d0		# is precision extended?
-	bne.w		fin_not_ext		# no, so go handle dbl or sgl
-
-#
-# precision selected is extended. so...we cannot get an underflow
-# or overflow because of rounding to the correct precision. so...
-# skip the scaling and unscaling...
-#
-	tst.b		SRC_EX(%a0)		# is the operand negative?
-	bpl.b		fin_norm_done		# no
-	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
-fin_norm_done:
-	fmovm.x		SRC(%a0),&0x80		# return result in fp0
-	rts
-
-#
-# for an extended precision DENORM, the UNFL exception bit is set
-# the accrued bit is NOT set in this instance(no inexactness!)
-#
-fin_denorm:
-	andi.b		&0xc0,%d0		# is precision extended?
-	bne.w		fin_not_ext		# no, so go handle dbl or sgl
-
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-	tst.b		SRC_EX(%a0)		# is the operand negative?
-	bpl.b		fin_denorm_done		# no
-	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
-fin_denorm_done:
-	fmovm.x		SRC(%a0),&0x80		# return result in fp0
-	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
-	bne.b		fin_denorm_unfl_ena	# yes
-	rts
-
-#
-# the input is an extended DENORM and underflow is enabled in the FPCR.
-# normalize the mantissa and add the bias of 0x6000 to the resulting negative
-# exponent and insert back into the operand.
-#
-fin_denorm_unfl_ena:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
-	bsr.l		norm			# normalize result
-	neg.w		%d0			# new exponent = -(shft val)
-	addi.w		&0x6000,%d0		# add new bias to exponent
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
-	andi.w		&0x8000,%d1		# keep old sign
-	andi.w		&0x7fff,%d0		# clear sign position
-	or.w		%d1,%d0			# concat new exo,old sign
-	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	rts
-
-#
-# operand is to be rounded to single or double precision
-#
-fin_not_ext:
-	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
-	bne.b		fin_dbl
-
-#
-# operand is to be rounded to single precision
-#
-fin_sgl:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	bsr.l		scale_to_zero_src	# calculate scale factor
-
-	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
-	bge.w		fin_sd_unfl		# yes; go handle underflow
-	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
-	beq.w		fin_sd_may_ovfl		# maybe; go check
-	blt.w		fin_sd_ovfl		# yes; go handle overflow
-
-#
-# operand will NOT overflow or underflow when moved into the fp reg file
-#
-fin_sd_normal:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fmov.x		FP_SCR0(%a6),%fp0	# perform move
-
-	fmov.l		%fpsr,%d1		# save FPSR
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fin_sd_normal_exit:
-	mov.l		%d2,-(%sp)		# save d2
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
-	mov.w		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# add scale factor
-	andi.w		&0x8000,%d2		# keep old sign
-	or.w		%d1,%d2			# concat old sign,new exponent
-	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-#
-# operand is to be rounded to double precision
-#
-fin_dbl:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	bsr.l		scale_to_zero_src	# calculate scale factor
-
-	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
-	bge.w		fin_sd_unfl		# yes; go handle underflow
-	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
-	beq.w		fin_sd_may_ovfl		# maybe; go check
-	blt.w		fin_sd_ovfl		# yes; go handle overflow
-	bra.w		fin_sd_normal		# no; ho handle normalized op
-
-#
-# operand WILL underflow when moved in to the fp register file
-#
-fin_sd_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	tst.b		FP_SCR0_EX(%a6)		# is operand negative?
-	bpl.b		fin_sd_unfl_tst
-	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
-
-# if underflow or inexact is enabled, then go calculate the EXOP first.
-fin_sd_unfl_tst:
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fin_sd_unfl_ena		# yes
-
-fin_sd_unfl_dis:
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# operand will underflow AND underflow or inexact is enabled.
-# Therefore, we must return the result rounded to extended precision.
-#
-fin_sd_unfl_ena:
-	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
-	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
-	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# subtract scale factor
-	andi.w		&0x8000,%d2		# extract old sign
-	addi.l		&0x6000,%d1		# add new bias
-	andi.w		&0x7fff,%d1
-	or.w		%d1,%d2			# concat old sign,new exp
-	mov.w		%d2,FP_SCR1_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
-	mov.l		(%sp)+,%d2		# restore d2
-	bra.b		fin_sd_unfl_dis
-
-#
-# operand WILL overflow.
-#
-fin_sd_ovfl:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fmov.x		FP_SCR0(%a6),%fp0	# perform move
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# save FPSR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fin_sd_ovfl_tst:
-	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fin_sd_ovfl_ena		# yes
-
-#
-# OVFL is not enabled; therefore, we must create the default result by
-# calling ovf_res().
-#
-fin_sd_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	rts
-
-#
-# OVFL is enabled.
-# the INEX2 bit has already been updated by the round to the correct precision.
-# now, round to extended(and don't alter the FPSR).
-#
-fin_sd_ovfl_ena:
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	sub.l		&0x6000,%d1		# subtract bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.b		fin_sd_ovfl_dis
-
-#
-# the move in MAY overflow. so...
-#
-fin_sd_may_ovfl:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fmov.x		FP_SCR0(%a6),%fp0	# perform the move
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
-	fbge.w		fin_sd_ovfl_tst		# yes; overflow has occurred
-
-# no, it didn't overflow; we have correct result
-	bra.w		fin_sd_normal_exit
-
-##########################################################################
-
-#
-# operand is not a NORM: check its optype and branch accordingly
-#
-fin_not_norm:
-	cmpi.b		%d1,&DENORM		# weed out DENORM
-	beq.w		fin_denorm
-	cmpi.b		%d1,&SNAN		# weed out SNANs
-	beq.l		res_snan_1op
-	cmpi.b		%d1,&QNAN		# weed out QNANs
-	beq.l		res_qnan_1op
-
-#
-# do the fmove in; at this point, only possible ops are ZERO and INF.
-# use fmov to determine ccodes.
-# prec:mode should be zero at this point but it won't affect answer anyways.
-#
-	fmov.x		SRC(%a0),%fp0		# do fmove in
-	fmov.l		%fpsr,%d0		# no exceptions possible
-	rol.l		&0x8,%d0		# put ccodes in lo byte
-	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fdiv(): emulates the fdiv instruction				#
-#	fsdiv(): emulates the fsdiv instruction				#
-#	fddiv(): emulates the fddiv instruction				#
-#									#
-# XREF ****************************************************************	#
-#	scale_to_zero_src() - scale src exponent to zero		#
-#	scale_to_zero_dst() - scale dst exponent to zero		#
-#	unf_res() - return default underflow result			#
-#	ovf_res() - return default overflow result			#
-#	res_qnan() - return QNAN result					#
-#	res_snan() - return SNAN result					#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	a1 = pointer to extended precision destination operand		#
-#	d0  rnd prec,mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms/denorms into ext/sgl/dbl precision.				#
-#	For norms/denorms, scale the exponents such that a divide	#
-# instruction won't cause an exception. Use the regular fdiv to		#
-# compute a result. Check if the regular operands would have taken	#
-# an exception. If so, return the default overflow/underflow result	#
-# and return the EXOP if exceptions are enabled. Else, scale the	#
-# result operand to the proper exponent.				#
-#									#
-#########################################################################
-
-	align		0x10
-tbl_fdiv_unfl:
-	long		0x3fff - 0x0000		# ext_unfl
-	long		0x3fff - 0x3f81		# sgl_unfl
-	long		0x3fff - 0x3c01		# dbl_unfl
-
-tbl_fdiv_ovfl:
-	long		0x3fff - 0x7ffe		# ext overflow exponent
-	long		0x3fff - 0x407e		# sgl overflow exponent
-	long		0x3fff - 0x43fe		# dbl overflow exponent
-
-	global		fsdiv
-fsdiv:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl prec
-	bra.b		fdiv
-
-	global		fddiv
-fddiv:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&d_mode*0x10,%d0	# insert dbl prec
-
-	global		fdiv
-fdiv:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-
-	clr.w		%d1
-	mov.b		DTAG(%a6),%d1
-	lsl.b		&0x3,%d1
-	or.b		STAG(%a6),%d1		# combine src tags
-
-	bne.w		fdiv_not_norm		# optimize on non-norm input
-
-#
-# DIVIDE: NORMs and DENORMs ONLY!
-#
-fdiv_norm:
-	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
-	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	bsr.l		scale_to_zero_src	# scale src exponent
-	mov.l		%d0,-(%sp)		# save scale factor 1
-
-	bsr.l		scale_to_zero_dst	# scale dst exponent
-
-	neg.l		(%sp)			# SCALE FACTOR = scale1 - scale2
-	add.l		%d0,(%sp)
-
-	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
-	lsr.b		&0x6,%d1		# shift to lo bits
-	mov.l		(%sp)+,%d0		# load S.F.
-	cmp.l		%d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
-	ble.w		fdiv_may_ovfl		# result will overflow
-
-	cmp.l		%d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
-	beq.w		fdiv_may_unfl		# maybe
-	bgt.w		fdiv_unfl		# yes; go handle underflow
-
-fdiv_normal:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fdiv.x		FP_SCR0(%a6),%fp0	# perform divide
-
-	fmov.l		%fpsr,%d1		# save FPSR
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fdiv_normal_exit:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
-	mov.l		%d2,-(%sp)		# store d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-tbl_fdiv_ovfl2:
-	long		0x7fff
-	long		0x407f
-	long		0x43ff
-
-fdiv_no_ovfl:
-	mov.l		(%sp)+,%d0		# restore scale factor
-	bra.b		fdiv_normal_exit
-
-fdiv_may_ovfl:
-	mov.l		%d0,-(%sp)		# save scale factor
-
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# set FPSR
-
-	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
-
-	fmov.l		%fpsr,%d0
-	fmov.l		&0x0,%fpcr
-
-	or.l		%d0,USER_FPSR(%a6)	# save INEX,N
-
-	fmovm.x		&0x01,-(%sp)		# save result to stack
-	mov.w		(%sp),%d0		# fetch new exponent
-	add.l		&0xc,%sp		# clear result from stack
-	andi.l		&0x7fff,%d0		# strip sign
-	sub.l		(%sp),%d0		# add scale factor
-	cmp.l		%d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
-	blt.b		fdiv_no_ovfl
-	mov.l		(%sp)+,%d0
-
-fdiv_ovfl_tst:
-	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fdiv_ovfl_ena		# yes
-
-fdiv_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	rts
-
-fdiv_ovfl_ena:
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# is precision extended?
-	bne.b		fdiv_ovfl_ena_sd	# no, do sgl or dbl
-
-fdiv_ovfl_ena_cont:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.w		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# add scale factor
-	subi.l		&0x6000,%d1		# subtract bias
-	andi.w		&0x7fff,%d1		# clear sign bit
-	andi.w		&0x8000,%d2		# keep old sign
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.b		fdiv_ovfl_dis
-
-fdiv_ovfl_ena_sd:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# keep rnd mode
-	fmov.l		%d1,%fpcr		# set FPCR
-
-	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	bra.b		fdiv_ovfl_ena_cont
-
-fdiv_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fdiv_unfl_ena		# yes
-
-fdiv_unfl_dis:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# UNFL is enabled.
-#
-fdiv_unfl_ena:
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# is precision extended?
-	bne.b		fdiv_unfl_ena_sd	# no, sgl or dbl
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-fdiv_unfl_ena_cont:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factoer
-	addi.l		&0x6000,%d1		# add bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exp
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.w		fdiv_unfl_dis
-
-fdiv_unfl_ena_sd:
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# use only rnd mode
-	fmov.l		%d1,%fpcr		# set FPCR
-
-	bra.b		fdiv_unfl_ena_cont
-
-#
-# the divide operation MAY underflow:
-#
-fdiv_may_unfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
-	fbgt.w		fdiv_normal_exit	# no; no underflow occurred
-	fblt.w		fdiv_unfl		# yes; underflow occurred
-
-#
-# we still don't know if underflow occurred. result is ~ equal to 1. but,
-# we don't know if the result was an underflow that rounded up to a 1
-# or a normalized number that rounded down to a 1. so, redo the entire
-# operation using RZ as the rounding mode to see what the pre-rounded
-# result is. this case should be relatively rare.
-#
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# keep rnd prec
-	ori.b		&rz_mode*0x10,%d1	# insert RZ
-
-	fmov.l		%d1,%fpcr		# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fabs.x		%fp1			# make absolute value
-	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
-	fbge.w		fdiv_normal_exit	# no; no underflow occurred
-	bra.w		fdiv_unfl		# yes; underflow occurred
-
-############################################################################
-
-#
-# Divide: inputs are not both normalized; what are they?
-#
-fdiv_not_norm:
-	mov.w		(tbl_fdiv_op.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_fdiv_op.b,%pc,%d1.w*1)
-
-	swbeg		&48
-tbl_fdiv_op:
-	short		fdiv_norm	- tbl_fdiv_op # NORM / NORM
-	short		fdiv_inf_load	- tbl_fdiv_op # NORM / ZERO
-	short		fdiv_zero_load	- tbl_fdiv_op # NORM / INF
-	short		fdiv_res_qnan	- tbl_fdiv_op # NORM / QNAN
-	short		fdiv_norm	- tbl_fdiv_op # NORM / DENORM
-	short		fdiv_res_snan	- tbl_fdiv_op # NORM / SNAN
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-
-	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / NORM
-	short		fdiv_res_operr	- tbl_fdiv_op # ZERO / ZERO
-	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / INF
-	short		fdiv_res_qnan	- tbl_fdiv_op # ZERO / QNAN
-	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / DENORM
-	short		fdiv_res_snan	- tbl_fdiv_op # ZERO / SNAN
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-
-	short		fdiv_inf_dst	- tbl_fdiv_op # INF / NORM
-	short		fdiv_inf_dst	- tbl_fdiv_op # INF / ZERO
-	short		fdiv_res_operr	- tbl_fdiv_op # INF / INF
-	short		fdiv_res_qnan	- tbl_fdiv_op # INF / QNAN
-	short		fdiv_inf_dst	- tbl_fdiv_op # INF / DENORM
-	short		fdiv_res_snan	- tbl_fdiv_op # INF / SNAN
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-
-	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / NORM
-	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / ZERO
-	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / INF
-	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / QNAN
-	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / DENORM
-	short		fdiv_res_snan	- tbl_fdiv_op # QNAN / SNAN
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-
-	short		fdiv_norm	- tbl_fdiv_op # DENORM / NORM
-	short		fdiv_inf_load	- tbl_fdiv_op # DENORM / ZERO
-	short		fdiv_zero_load	- tbl_fdiv_op # DENORM / INF
-	short		fdiv_res_qnan	- tbl_fdiv_op # DENORM / QNAN
-	short		fdiv_norm	- tbl_fdiv_op # DENORM / DENORM
-	short		fdiv_res_snan	- tbl_fdiv_op # DENORM / SNAN
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-
-	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / NORM
-	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / ZERO
-	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / INF
-	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / QNAN
-	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / DENORM
-	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / SNAN
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-	short		tbl_fdiv_op	- tbl_fdiv_op #
-
-fdiv_res_qnan:
-	bra.l		res_qnan
-fdiv_res_snan:
-	bra.l		res_snan
-fdiv_res_operr:
-	bra.l		res_operr
-
-	global		fdiv_zero_load		# global for fsgldiv
-fdiv_zero_load:
-	mov.b		SRC_EX(%a0),%d0		# result sign is exclusive
-	mov.b		DST_EX(%a1),%d1		# or of input signs.
-	eor.b		%d0,%d1
-	bpl.b		fdiv_zero_load_p	# result is positive
-	fmov.s		&0x80000000,%fp0	# load a -ZERO
-	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/N
-	rts
-fdiv_zero_load_p:
-	fmov.s		&0x00000000,%fp0	# load a +ZERO
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
-	rts
-
-#
-# The destination was In Range and the source was a ZERO. The result,
-# Therefore, is an INF w/ the proper sign.
-# So, determine the sign and return a new INF (w/ the j-bit cleared).
-#
-	global		fdiv_inf_load		# global for fsgldiv
-fdiv_inf_load:
-	ori.w		&dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
-	mov.b		SRC_EX(%a0),%d0		# load both signs
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d0,%d1
-	bpl.b		fdiv_inf_load_p		# result is positive
-	fmov.s		&0xff800000,%fp0	# make result -INF
-	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
-	rts
-fdiv_inf_load_p:
-	fmov.s		&0x7f800000,%fp0	# make result +INF
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
-	rts
-
-#
-# The destination was an INF w/ an In Range or ZERO source, the result is
-# an INF w/ the proper sign.
-# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
-# dst INF is set, then then j-bit of the result INF is also set).
-#
-	global		fdiv_inf_dst		# global for fsgldiv
-fdiv_inf_dst:
-	mov.b		DST_EX(%a1),%d0		# load both signs
-	mov.b		SRC_EX(%a0),%d1
-	eor.b		%d0,%d1
-	bpl.b		fdiv_inf_dst_p		# result is positive
-
-	fmovm.x		DST(%a1),&0x80		# return result in fp0
-	fabs.x		%fp0			# clear sign bit
-	fneg.x		%fp0			# set sign bit
-	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
-	rts
-
-fdiv_inf_dst_p:
-	fmovm.x		DST(%a1),&0x80		# return result in fp0
-	fabs.x		%fp0			# return positive INF
-	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fneg(): emulates the fneg instruction				#
-#	fsneg(): emulates the fsneg instruction				#
-#	fdneg(): emulates the fdneg instruction				#
-#									#
-# XREF ****************************************************************	#
-#	norm() - normalize a denorm to provide EXOP			#
-#	scale_to_zero_src() - scale sgl/dbl source exponent		#
-#	ovf_res() - return default overflow result			#
-#	unf_res() - return default underflow result			#
-#	res_qnan_1op() - return QNAN result				#
-#	res_snan_1op() - return SNAN result				#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	d0 = rnd prec,mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, zeroes, and infinities as special cases. Separate	#
-# norms/denorms into ext/sgl/dbl precisions. Extended precision can be	#
-# emulated by simply setting sign bit. Sgl/dbl operands must be scaled	#
-# and an actual fneg performed to see if overflow/underflow would have	#
-# occurred. If so, return default underflow/overflow result. Else,	#
-# scale the result exponent and return result. FPSR gets set based on	#
-# the result value.							#
-#									#
-#########################################################################
-
-	global		fsneg
-fsneg:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl precision
-	bra.b		fneg
-
-	global		fdneg
-fdneg:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&d_mode*0x10,%d0	# insert dbl prec
-
-	global		fneg
-fneg:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-	mov.b		STAG(%a6),%d1
-	bne.w		fneg_not_norm		# optimize on non-norm input
-
-#
-# NEGATE SIGN : norms and denorms ONLY!
-#
-fneg_norm:
-	andi.b		&0xc0,%d0		# is precision extended?
-	bne.w		fneg_not_ext		# no; go handle sgl or dbl
-
-#
-# precision selected is extended. so...we can not get an underflow
-# or overflow because of rounding to the correct precision. so...
-# skip the scaling and unscaling...
-#
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	mov.w		SRC_EX(%a0),%d0
-	eori.w		&0x8000,%d0		# negate sign
-	bpl.b		fneg_norm_load		# sign is positive
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
-fneg_norm_load:
-	mov.w		%d0,FP_SCR0_EX(%a6)
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-#
-# for an extended precision DENORM, the UNFL exception bit is set
-# the accrued bit is NOT set in this instance(no inexactness!)
-#
-fneg_denorm:
-	andi.b		&0xc0,%d0		# is precision extended?
-	bne.b		fneg_not_ext		# no; go handle sgl or dbl
-
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	mov.w		SRC_EX(%a0),%d0
-	eori.w		&0x8000,%d0		# negate sign
-	bpl.b		fneg_denorm_done	# no
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# yes, set 'N' ccode bit
-fneg_denorm_done:
-	mov.w		%d0,FP_SCR0_EX(%a6)
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-
-	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
-	bne.b		fneg_ext_unfl_ena	# yes
-	rts
-
-#
-# the input is an extended DENORM and underflow is enabled in the FPCR.
-# normalize the mantissa and add the bias of 0x6000 to the resulting negative
-# exponent and insert back into the operand.
-#
-fneg_ext_unfl_ena:
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
-	bsr.l		norm			# normalize result
-	neg.w		%d0			# new exponent = -(shft val)
-	addi.w		&0x6000,%d0		# add new bias to exponent
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
-	andi.w		&0x8000,%d1		# keep old sign
-	andi.w		&0x7fff,%d0		# clear sign position
-	or.w		%d1,%d0			# concat old sign, new exponent
-	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	rts
-
-#
-# operand is either single or double
-#
-fneg_not_ext:
-	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
-	bne.b		fneg_dbl
-
-#
-# operand is to be rounded to single precision
-#
-fneg_sgl:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	bsr.l		scale_to_zero_src	# calculate scale factor
-
-	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
-	bge.w		fneg_sd_unfl		# yes; go handle underflow
-	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
-	beq.w		fneg_sd_may_ovfl	# maybe; go check
-	blt.w		fneg_sd_ovfl		# yes; go handle overflow
-
-#
-# operand will NOT overflow or underflow when moved in to the fp reg file
-#
-fneg_sd_normal:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
-
-	fmov.l		%fpsr,%d1		# save FPSR
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fneg_sd_normal_exit:
-	mov.l		%d2,-(%sp)		# save d2
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
-	mov.w		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# add scale factor
-	andi.w		&0x8000,%d2		# keep old sign
-	or.w		%d1,%d2			# concat old sign,new exp
-	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-#
-# operand is to be rounded to double precision
-#
-fneg_dbl:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	bsr.l		scale_to_zero_src	# calculate scale factor
-
-	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
-	bge.b		fneg_sd_unfl		# yes; go handle underflow
-	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
-	beq.w		fneg_sd_may_ovfl	# maybe; go check
-	blt.w		fneg_sd_ovfl		# yes; go handle overflow
-	bra.w		fneg_sd_normal		# no; ho handle normalized op
-
-#
-# operand WILL underflow when moved in to the fp register file
-#
-fneg_sd_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	eori.b		&0x80,FP_SCR0_EX(%a6)	# negate sign
-	bpl.b		fneg_sd_unfl_tst
-	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
-
-# if underflow or inexact is enabled, go calculate EXOP first.
-fneg_sd_unfl_tst:
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fneg_sd_unfl_ena	# yes
-
-fneg_sd_unfl_dis:
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# operand will underflow AND underflow is enabled.
-# Therefore, we must return the result rounded to extended precision.
-#
-fneg_sd_unfl_ena:
-	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
-	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
-	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# subtract scale factor
-	addi.l		&0x6000,%d1		# add new bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat new sign,new exp
-	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
-	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
-	mov.l		(%sp)+,%d2		# restore d2
-	bra.b		fneg_sd_unfl_dis
-
-#
-# operand WILL overflow.
-#
-fneg_sd_ovfl:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# save FPSR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fneg_sd_ovfl_tst:
-	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fneg_sd_ovfl_ena	# yes
-
-#
-# OVFL is not enabled; therefore, we must create the default result by
-# calling ovf_res().
-#
-fneg_sd_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	rts
-
-#
-# OVFL is enabled.
-# the INEX2 bit has already been updated by the round to the correct precision.
-# now, round to extended(and don't alter the FPSR).
-#
-fneg_sd_ovfl_ena:
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	subi.l		&0x6000,%d1		# subtract bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat sign,exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	mov.l		(%sp)+,%d2		# restore d2
-	bra.b		fneg_sd_ovfl_dis
-
-#
-# the move in MAY underflow. so...
-#
-fneg_sd_may_ovfl:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
-	fbge.w		fneg_sd_ovfl_tst	# yes; overflow has occurred
-
-# no, it didn't overflow; we have correct result
-	bra.w		fneg_sd_normal_exit
-
-##########################################################################
-
-#
-# input is not normalized; what is it?
-#
-fneg_not_norm:
-	cmpi.b		%d1,&DENORM		# weed out DENORM
-	beq.w		fneg_denorm
-	cmpi.b		%d1,&SNAN		# weed out SNAN
-	beq.l		res_snan_1op
-	cmpi.b		%d1,&QNAN		# weed out QNAN
-	beq.l		res_qnan_1op
-
-#
-# do the fneg; at this point, only possible ops are ZERO and INF.
-# use fneg to determine ccodes.
-# prec:mode should be zero at this point but it won't affect answer anyways.
-#
-	fneg.x		SRC_EX(%a0),%fp0	# do fneg
-	fmov.l		%fpsr,%d0
-	rol.l		&0x8,%d0		# put ccodes in lo byte
-	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	ftst(): emulates the ftest instruction				#
-#									#
-# XREF ****************************************************************	#
-#	res{s,q}nan_1op() - set NAN result for monadic instruction	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#									#
-# OUTPUT **************************************************************	#
-#	none								#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Check the source operand tag (STAG) and set the FPCR according	#
-# to the operand type and sign.						#
-#									#
-#########################################################################
-
-	global		ftst
-ftst:
-	mov.b		STAG(%a6),%d1
-	bne.b		ftst_not_norm		# optimize on non-norm input
-
-#
-# Norm:
-#
-ftst_norm:
-	tst.b		SRC_EX(%a0)		# is operand negative?
-	bmi.b		ftst_norm_m		# yes
-	rts
-ftst_norm_m:
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
-	rts
-
-#
-# input is not normalized; what is it?
-#
-ftst_not_norm:
-	cmpi.b		%d1,&ZERO		# weed out ZERO
-	beq.b		ftst_zero
-	cmpi.b		%d1,&INF		# weed out INF
-	beq.b		ftst_inf
-	cmpi.b		%d1,&SNAN		# weed out SNAN
-	beq.l		res_snan_1op
-	cmpi.b		%d1,&QNAN		# weed out QNAN
-	beq.l		res_qnan_1op
-
-#
-# Denorm:
-#
-ftst_denorm:
-	tst.b		SRC_EX(%a0)		# is operand negative?
-	bmi.b		ftst_denorm_m		# yes
-	rts
-ftst_denorm_m:
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
-	rts
-
-#
-# Infinity:
-#
-ftst_inf:
-	tst.b		SRC_EX(%a0)		# is operand negative?
-	bmi.b		ftst_inf_m		# yes
-ftst_inf_p:
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
-	rts
-ftst_inf_m:
-	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
-	rts
-
-#
-# Zero:
-#
-ftst_zero:
-	tst.b		SRC_EX(%a0)		# is operand negative?
-	bmi.b		ftst_zero_m		# yes
-ftst_zero_p:
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
-	rts
-ftst_zero_m:
-	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fint(): emulates the fint instruction				#
-#									#
-# XREF ****************************************************************	#
-#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	d0 = round precision/mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Separate according to operand type. Unnorms don't pass through	#
-# here. For norms, load the rounding mode/prec, execute a "fint", then	#
-# store the resulting FPSR bits.					#
-#	For denorms, force the j-bit to a one and do the same as for	#
-# norms. Denorms are so low that the answer will either be a zero or a	#
-# one.									#
-#	For zeroes/infs/NANs, return the same while setting the FPSR	#
-# as appropriate.							#
-#									#
-#########################################################################
-
-	global		fint
-fint:
-	mov.b		STAG(%a6),%d1
-	bne.b		fint_not_norm		# optimize on non-norm input
-
-#
-# Norm:
-#
-fint_norm:
-	andi.b		&0x30,%d0		# set prec = ext
-
-	fmov.l		%d0,%fpcr		# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fint.x		SRC(%a0),%fp0		# execute fint
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d0		# save FPSR
-	or.l		%d0,USER_FPSR(%a6)	# set exception bits
-
-	rts
-
-#
-# input is not normalized; what is it?
-#
-fint_not_norm:
-	cmpi.b		%d1,&ZERO		# weed out ZERO
-	beq.b		fint_zero
-	cmpi.b		%d1,&INF		# weed out INF
-	beq.b		fint_inf
-	cmpi.b		%d1,&DENORM		# weed out DENORM
-	beq.b		fint_denorm
-	cmpi.b		%d1,&SNAN		# weed out SNAN
-	beq.l		res_snan_1op
-	bra.l		res_qnan_1op		# weed out QNAN
-
-#
-# Denorm:
-#
-# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
-# also, the INEX2 and AINEX exception bits will be set.
-# so, we could either set these manually or force the DENORM
-# to a very small NORM and ship it to the NORM routine.
-# I do the latter.
-#
-fint_denorm:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
-	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
-	lea		FP_SCR0(%a6),%a0
-	bra.b		fint_norm
-
-#
-# Zero:
-#
-fint_zero:
-	tst.b		SRC_EX(%a0)		# is ZERO negative?
-	bmi.b		fint_zero_m		# yes
-fint_zero_p:
-	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
-	rts
-fint_zero_m:
-	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
-	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
-	rts
-
-#
-# Infinity:
-#
-fint_inf:
-	fmovm.x		SRC(%a0),&0x80		# return result in fp0
-	tst.b		SRC_EX(%a0)		# is INF negative?
-	bmi.b		fint_inf_m		# yes
-fint_inf_p:
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
-	rts
-fint_inf_m:
-	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fintrz(): emulates the fintrz instruction			#
-#									#
-# XREF ****************************************************************	#
-#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	d0 = round precision/mode					#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Separate according to operand type. Unnorms don't pass through	#
-# here. For norms, load the rounding mode/prec, execute a "fintrz",	#
-# then store the resulting FPSR bits.					#
-#	For denorms, force the j-bit to a one and do the same as for	#
-# norms. Denorms are so low that the answer will either be a zero or a	#
-# one.									#
-#	For zeroes/infs/NANs, return the same while setting the FPSR	#
-# as appropriate.							#
-#									#
-#########################################################################
-
-	global		fintrz
-fintrz:
-	mov.b		STAG(%a6),%d1
-	bne.b		fintrz_not_norm		# optimize on non-norm input
-
-#
-# Norm:
-#
-fintrz_norm:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fintrz.x	SRC(%a0),%fp0		# execute fintrz
-
-	fmov.l		%fpsr,%d0		# save FPSR
-	or.l		%d0,USER_FPSR(%a6)	# set exception bits
-
-	rts
-
-#
-# input is not normalized; what is it?
-#
-fintrz_not_norm:
-	cmpi.b		%d1,&ZERO		# weed out ZERO
-	beq.b		fintrz_zero
-	cmpi.b		%d1,&INF		# weed out INF
-	beq.b		fintrz_inf
-	cmpi.b		%d1,&DENORM		# weed out DENORM
-	beq.b		fintrz_denorm
-	cmpi.b		%d1,&SNAN		# weed out SNAN
-	beq.l		res_snan_1op
-	bra.l		res_qnan_1op		# weed out QNAN
-
-#
-# Denorm:
-#
-# for DENORMs, the result will be (+/-)ZERO.
-# also, the INEX2 and AINEX exception bits will be set.
-# so, we could either set these manually or force the DENORM
-# to a very small NORM and ship it to the NORM routine.
-# I do the latter.
-#
-fintrz_denorm:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
-	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
-	lea		FP_SCR0(%a6),%a0
-	bra.b		fintrz_norm
-
-#
-# Zero:
-#
-fintrz_zero:
-	tst.b		SRC_EX(%a0)		# is ZERO negative?
-	bmi.b		fintrz_zero_m		# yes
-fintrz_zero_p:
-	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
-	rts
-fintrz_zero_m:
-	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
-	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
-	rts
-
-#
-# Infinity:
-#
-fintrz_inf:
-	fmovm.x		SRC(%a0),&0x80		# return result in fp0
-	tst.b		SRC_EX(%a0)		# is INF negative?
-	bmi.b		fintrz_inf_m		# yes
-fintrz_inf_p:
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
-	rts
-fintrz_inf_m:
-	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fabs():  emulates the fabs instruction				#
-#	fsabs(): emulates the fsabs instruction				#
-#	fdabs(): emulates the fdabs instruction				#
-#									#
-# XREF **************************************************************** #
-#	norm() - normalize denorm mantissa to provide EXOP		#
-#	scale_to_zero_src() - make exponent. = 0; get scale factor	#
-#	unf_res() - calculate underflow result				#
-#	ovf_res() - calculate overflow result				#
-#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
-#									#
-# INPUT *************************************************************** #
-#	a0 = pointer to extended precision source operand		#
-#	d0 = rnd precision/mode						#
-#									#
-# OUTPUT ************************************************************** #
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms into extended, single, and double precision.			#
-#	Simply clear sign for extended precision norm. Ext prec denorm	#
-# gets an EXOP created for it since it's an underflow.			#
-#	Double and single precision can overflow and underflow. First,	#
-# scale the operand such that the exponent is zero. Perform an "fabs"	#
-# using the correct rnd mode/prec. Check to see if the original		#
-# exponent would take an exception. If so, use unf_res() or ovf_res()	#
-# to calculate the default result. Also, create the EXOP for the	#
-# exceptional case. If no exception should occur, insert the correct	#
-# result exponent and return.						#
-#	Unnorms don't pass through here.				#
-#									#
-#########################################################################
-
-	global		fsabs
-fsabs:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl precision
-	bra.b		fabs
-
-	global		fdabs
-fdabs:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&d_mode*0x10,%d0	# insert dbl precision
-
-	global		fabs
-fabs:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-	mov.b		STAG(%a6),%d1
-	bne.w		fabs_not_norm		# optimize on non-norm input
-
-#
-# ABSOLUTE VALUE: norms and denorms ONLY!
-#
-fabs_norm:
-	andi.b		&0xc0,%d0		# is precision extended?
-	bne.b		fabs_not_ext		# no; go handle sgl or dbl
-
-#
-# precision selected is extended. so...we can not get an underflow
-# or overflow because of rounding to the correct precision. so...
-# skip the scaling and unscaling...
-#
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	mov.w		SRC_EX(%a0),%d1
-	bclr		&15,%d1			# force absolute value
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert exponent
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-#
-# for an extended precision DENORM, the UNFL exception bit is set
-# the accrued bit is NOT set in this instance(no inexactness!)
-#
-fabs_denorm:
-	andi.b		&0xc0,%d0		# is precision extended?
-	bne.b		fabs_not_ext		# no
-
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	mov.w		SRC_EX(%a0),%d0
-	bclr		&15,%d0			# clear sign
-	mov.w		%d0,FP_SCR0_EX(%a6)	# insert exponent
-
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-
-	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
-	bne.b		fabs_ext_unfl_ena
-	rts
-
-#
-# the input is an extended DENORM and underflow is enabled in the FPCR.
-# normalize the mantissa and add the bias of 0x6000 to the resulting negative
-# exponent and insert back into the operand.
-#
-fabs_ext_unfl_ena:
-	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
-	bsr.l		norm			# normalize result
-	neg.w		%d0			# new exponent = -(shft val)
-	addi.w		&0x6000,%d0		# add new bias to exponent
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
-	andi.w		&0x8000,%d1		# keep old sign
-	andi.w		&0x7fff,%d0		# clear sign position
-	or.w		%d1,%d0			# concat old sign, new exponent
-	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	rts
-
-#
-# operand is either single or double
-#
-fabs_not_ext:
-	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
-	bne.b		fabs_dbl
-
-#
-# operand is to be rounded to single precision
-#
-fabs_sgl:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	bsr.l		scale_to_zero_src	# calculate scale factor
-
-	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
-	bge.w		fabs_sd_unfl		# yes; go handle underflow
-	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
-	beq.w		fabs_sd_may_ovfl	# maybe; go check
-	blt.w		fabs_sd_ovfl		# yes; go handle overflow
-
-#
-# operand will NOT overflow or underflow when moved in to the fp reg file
-#
-fabs_sd_normal:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
-
-	fmov.l		%fpsr,%d1		# save FPSR
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fabs_sd_normal_exit:
-	mov.l		%d2,-(%sp)		# save d2
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# add scale factor
-	andi.w		&0x8000,%d2		# keep old sign
-	or.w		%d1,%d2			# concat old sign,new exp
-	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-#
-# operand is to be rounded to double precision
-#
-fabs_dbl:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	bsr.l		scale_to_zero_src	# calculate scale factor
-
-	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
-	bge.b		fabs_sd_unfl		# yes; go handle underflow
-	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
-	beq.w		fabs_sd_may_ovfl	# maybe; go check
-	blt.w		fabs_sd_ovfl		# yes; go handle overflow
-	bra.w		fabs_sd_normal		# no; ho handle normalized op
-
-#
-# operand WILL underflow when moved in to the fp register file
-#
-fabs_sd_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	bclr		&0x7,FP_SCR0_EX(%a6)	# force absolute value
-
-# if underflow or inexact is enabled, go calculate EXOP first.
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fabs_sd_unfl_ena	# yes
-
-fabs_sd_unfl_dis:
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# operand will underflow AND underflow is enabled.
-# Therefore, we must return the result rounded to extended precision.
-#
-fabs_sd_unfl_ena:
-	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
-	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
-	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# subtract scale factor
-	addi.l		&0x6000,%d1		# add new bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat new sign,new exp
-	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
-	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
-	mov.l		(%sp)+,%d2		# restore d2
-	bra.b		fabs_sd_unfl_dis
-
-#
-# operand WILL overflow.
-#
-fabs_sd_ovfl:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# save FPSR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fabs_sd_ovfl_tst:
-	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fabs_sd_ovfl_ena	# yes
-
-#
-# OVFL is not enabled; therefore, we must create the default result by
-# calling ovf_res().
-#
-fabs_sd_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	rts
-
-#
-# OVFL is enabled.
-# the INEX2 bit has already been updated by the round to the correct precision.
-# now, round to extended(and don't alter the FPSR).
-#
-fabs_sd_ovfl_ena:
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	subi.l		&0x6000,%d1		# subtract bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat sign,exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	mov.l		(%sp)+,%d2		# restore d2
-	bra.b		fabs_sd_ovfl_dis
-
-#
-# the move in MAY underflow. so...
-#
-fabs_sd_may_ovfl:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
-	fbge.w		fabs_sd_ovfl_tst	# yes; overflow has occurred
-
-# no, it didn't overflow; we have correct result
-	bra.w		fabs_sd_normal_exit
-
-##########################################################################
-
-#
-# input is not normalized; what is it?
-#
-fabs_not_norm:
-	cmpi.b		%d1,&DENORM		# weed out DENORM
-	beq.w		fabs_denorm
-	cmpi.b		%d1,&SNAN		# weed out SNAN
-	beq.l		res_snan_1op
-	cmpi.b		%d1,&QNAN		# weed out QNAN
-	beq.l		res_qnan_1op
-
-	fabs.x		SRC(%a0),%fp0		# force absolute value
-
-	cmpi.b		%d1,&INF		# weed out INF
-	beq.b		fabs_inf
-fabs_zero:
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
-	rts
-fabs_inf:
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fcmp(): fp compare op routine					#
-#									#
-# XREF ****************************************************************	#
-#	res_qnan() - return QNAN result					#
-#	res_snan() - return SNAN result					#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	a1 = pointer to extended precision destination operand		#
-#	d0 = round prec/mode						#
-#									#
-# OUTPUT ************************************************************** #
-#	None								#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs and denorms as special cases. For everything else,	#
-# just use the actual fcmp instruction to produce the correct condition	#
-# codes.								#
-#									#
-#########################################################################
-
-	global		fcmp
-fcmp:
-	clr.w		%d1
-	mov.b		DTAG(%a6),%d1
-	lsl.b		&0x3,%d1
-	or.b		STAG(%a6),%d1
-	bne.b		fcmp_not_norm		# optimize on non-norm input
-
-#
-# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
-#
-fcmp_norm:
-	fmovm.x		DST(%a1),&0x80		# load dst op
-
-	fcmp.x		%fp0,SRC(%a0)		# do compare
-
-	fmov.l		%fpsr,%d0		# save FPSR
-	rol.l		&0x8,%d0		# extract ccode bits
-	mov.b		%d0,FPSR_CC(%a6)	# set ccode bits(no exc bits are set)
-
-	rts
-
-#
-# fcmp: inputs are not both normalized; what are they?
-#
-fcmp_not_norm:
-	mov.w		(tbl_fcmp_op.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_fcmp_op.b,%pc,%d1.w*1)
-
-	swbeg		&48
-tbl_fcmp_op:
-	short		fcmp_norm	- tbl_fcmp_op # NORM - NORM
-	short		fcmp_norm	- tbl_fcmp_op # NORM - ZERO
-	short		fcmp_norm	- tbl_fcmp_op # NORM - INF
-	short		fcmp_res_qnan	- tbl_fcmp_op # NORM - QNAN
-	short		fcmp_nrm_dnrm	- tbl_fcmp_op # NORM - DENORM
-	short		fcmp_res_snan	- tbl_fcmp_op # NORM - SNAN
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-
-	short		fcmp_norm	- tbl_fcmp_op # ZERO - NORM
-	short		fcmp_norm	- tbl_fcmp_op # ZERO - ZERO
-	short		fcmp_norm	- tbl_fcmp_op # ZERO - INF
-	short		fcmp_res_qnan	- tbl_fcmp_op # ZERO - QNAN
-	short		fcmp_dnrm_s	- tbl_fcmp_op # ZERO - DENORM
-	short		fcmp_res_snan	- tbl_fcmp_op # ZERO - SNAN
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-
-	short		fcmp_norm	- tbl_fcmp_op # INF - NORM
-	short		fcmp_norm	- tbl_fcmp_op # INF - ZERO
-	short		fcmp_norm	- tbl_fcmp_op # INF - INF
-	short		fcmp_res_qnan	- tbl_fcmp_op # INF - QNAN
-	short		fcmp_dnrm_s	- tbl_fcmp_op # INF - DENORM
-	short		fcmp_res_snan	- tbl_fcmp_op # INF - SNAN
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-
-	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - NORM
-	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - ZERO
-	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - INF
-	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - QNAN
-	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - DENORM
-	short		fcmp_res_snan	- tbl_fcmp_op # QNAN - SNAN
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-
-	short		fcmp_dnrm_nrm	- tbl_fcmp_op # DENORM - NORM
-	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - ZERO
-	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - INF
-	short		fcmp_res_qnan	- tbl_fcmp_op # DENORM - QNAN
-	short		fcmp_dnrm_sd	- tbl_fcmp_op # DENORM - DENORM
-	short		fcmp_res_snan	- tbl_fcmp_op # DENORM - SNAN
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-
-	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - NORM
-	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - ZERO
-	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - INF
-	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - QNAN
-	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - DENORM
-	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - SNAN
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-	short		tbl_fcmp_op	- tbl_fcmp_op #
-
-# unlike all other functions for QNAN and SNAN, fcmp does NOT set the
-# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
-fcmp_res_qnan:
-	bsr.l		res_qnan
-	andi.b		&0xf7,FPSR_CC(%a6)
-	rts
-fcmp_res_snan:
-	bsr.l		res_snan
-	andi.b		&0xf7,FPSR_CC(%a6)
-	rts
-
-#
-# DENORMs are a little more difficult.
-# If you have a 2 DENORMs, then you can just force the j-bit to a one
-# and use the fcmp_norm routine.
-# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
-# and use the fcmp_norm routine.
-# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
-# But with a DENORM and a NORM of the same sign, the neg bit is set if the
-# (1) signs are (+) and the DENORM is the dst or
-# (2) signs are (-) and the DENORM is the src
-#
-
-fcmp_dnrm_s:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),%d0
-	bset		&31,%d0			# DENORM src; make into small norm
-	mov.l		%d0,FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	lea		FP_SCR0(%a6),%a0
-	bra.w		fcmp_norm
-
-fcmp_dnrm_d:
-	mov.l		DST_EX(%a1),FP_SCR0_EX(%a6)
-	mov.l		DST_HI(%a1),%d0
-	bset		&31,%d0			# DENORM src; make into small norm
-	mov.l		%d0,FP_SCR0_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
-	lea		FP_SCR0(%a6),%a1
-	bra.w		fcmp_norm
-
-fcmp_dnrm_sd:
-	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		DST_HI(%a1),%d0
-	bset		&31,%d0			# DENORM dst; make into small norm
-	mov.l		%d0,FP_SCR1_HI(%a6)
-	mov.l		SRC_HI(%a0),%d0
-	bset		&31,%d0			# DENORM dst; make into small norm
-	mov.l		%d0,FP_SCR0_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	lea		FP_SCR1(%a6),%a1
-	lea		FP_SCR0(%a6),%a0
-	bra.w		fcmp_norm
-
-fcmp_nrm_dnrm:
-	mov.b		SRC_EX(%a0),%d0		# determine if like signs
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d0,%d1
-	bmi.w		fcmp_dnrm_s
-
-# signs are the same, so must determine the answer ourselves.
-	tst.b		%d0			# is src op negative?
-	bmi.b		fcmp_nrm_dnrm_m		# yes
-	rts
-fcmp_nrm_dnrm_m:
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
-	rts
-
-fcmp_dnrm_nrm:
-	mov.b		SRC_EX(%a0),%d0		# determine if like signs
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d0,%d1
-	bmi.w		fcmp_dnrm_d
-
-# signs are the same, so must determine the answer ourselves.
-	tst.b		%d0			# is src op negative?
-	bpl.b		fcmp_dnrm_nrm_m		# no
-	rts
-fcmp_dnrm_nrm_m:
-	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fsglmul(): emulates the fsglmul instruction			#
-#									#
-# XREF ****************************************************************	#
-#	scale_to_zero_src() - scale src exponent to zero		#
-#	scale_to_zero_dst() - scale dst exponent to zero		#
-#	unf_res4() - return default underflow result for sglop		#
-#	ovf_res() - return default overflow result			#
-#	res_qnan() - return QNAN result					#
-#	res_snan() - return SNAN result					#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	a1 = pointer to extended precision destination operand		#
-#	d0  rnd prec,mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms/denorms into ext/sgl/dbl precision.				#
-#	For norms/denorms, scale the exponents such that a multiply	#
-# instruction won't cause an exception. Use the regular fsglmul to	#
-# compute a result. Check if the regular operands would have taken	#
-# an exception. If so, return the default overflow/underflow result	#
-# and return the EXOP if exceptions are enabled. Else, scale the	#
-# result operand to the proper exponent.				#
-#									#
-#########################################################################
-
-	global		fsglmul
-fsglmul:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-
-	clr.w		%d1
-	mov.b		DTAG(%a6),%d1
-	lsl.b		&0x3,%d1
-	or.b		STAG(%a6),%d1
-
-	bne.w		fsglmul_not_norm	# optimize on non-norm input
-
-fsglmul_norm:
-	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
-	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	bsr.l		scale_to_zero_src	# scale exponent
-	mov.l		%d0,-(%sp)		# save scale factor 1
-
-	bsr.l		scale_to_zero_dst	# scale dst exponent
-
-	add.l		(%sp)+,%d0		# SCALE_FACTOR = scale1 + scale2
-
-	cmpi.l		%d0,&0x3fff-0x7ffe	# would result ovfl?
-	beq.w		fsglmul_may_ovfl	# result may rnd to overflow
-	blt.w		fsglmul_ovfl		# result will overflow
-
-	cmpi.l		%d0,&0x3fff+0x0001	# would result unfl?
-	beq.w		fsglmul_may_unfl	# result may rnd to no unfl
-	bgt.w		fsglmul_unfl		# result will underflow
-
-fsglmul_normal:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fsglmul_normal_exit:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-fsglmul_ovfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fsglmul_ovfl_tst:
-
-# save setting this until now because this is where fsglmul_may_ovfl may jump in
-	or.l		&ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fsglmul_ovfl_ena	# yes
-
-fsglmul_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
-	andi.b		&0x30,%d0		# force prec = ext
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	rts
-
-fsglmul_ovfl_ena:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# add scale factor
-	subi.l		&0x6000,%d1		# subtract bias
-	andi.w		&0x7fff,%d1
-	andi.w		&0x8000,%d2		# keep old sign
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.b		fsglmul_ovfl_dis
-
-fsglmul_may_ovfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
-	fbge.w		fsglmul_ovfl_tst	# yes; overflow has occurred
-
-# no, it didn't overflow; we have correct result
-	bra.w		fsglmul_normal_exit
-
-fsglmul_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fsglmul_unfl_ena	# yes
-
-fsglmul_unfl_dis:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res4		# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# UNFL is enabled.
-#
-fsglmul_unfl_ena:
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	addi.l		&0x6000,%d1		# add bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.w		fsglmul_unfl_dis
-
-fsglmul_may_unfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
-	fbgt.w		fsglmul_normal_exit	# no; no underflow occurred
-	fblt.w		fsglmul_unfl		# yes; underflow occurred
-
-#
-# we still don't know if underflow occurred. result is ~ equal to 2. but,
-# we don't know if the result was an underflow that rounded up to a 2 or
-# a normalized number that rounded down to a 2. so, redo the entire operation
-# using RZ as the rounding mode to see what the pre-rounded result is.
-# this case should be relatively rare.
-#
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# keep rnd prec
-	ori.b		&rz_mode*0x10,%d1	# insert RZ
-
-	fmov.l		%d1,%fpcr		# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fabs.x		%fp1			# make absolute value
-	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
-	fbge.w		fsglmul_normal_exit	# no; no underflow occurred
-	bra.w		fsglmul_unfl		# yes, underflow occurred
-
-##############################################################################
-
-#
-# Single Precision Multiply: inputs are not both normalized; what are they?
-#
-fsglmul_not_norm:
-	mov.w		(tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_fsglmul_op.b,%pc,%d1.w*1)
-
-	swbeg		&48
-tbl_fsglmul_op:
-	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
-	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
-	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
-	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
-	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-
-	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x NORM
-	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x ZERO
-	short		fsglmul_res_operr	- tbl_fsglmul_op # ZERO x INF
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # ZERO x QNAN
-	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x DENORM
-	short		fsglmul_res_snan	- tbl_fsglmul_op # ZERO x SNAN
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-
-	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x NORM
-	short		fsglmul_res_operr	- tbl_fsglmul_op # INF x ZERO
-	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x INF
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # INF x QNAN
-	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x DENORM
-	short		fsglmul_res_snan	- tbl_fsglmul_op # INF x SNAN
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x NORM
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x ZERO
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x INF
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x QNAN
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x DENORM
-	short		fsglmul_res_snan	- tbl_fsglmul_op # QNAN x SNAN
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-
-	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
-	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
-	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
-	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
-	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
-	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-
-	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x NORM
-	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x ZERO
-	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x INF
-	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x QNAN
-	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x DENORM
-	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x SNAN
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-	short		tbl_fsglmul_op		- tbl_fsglmul_op #
-
-fsglmul_res_operr:
-	bra.l		res_operr
-fsglmul_res_snan:
-	bra.l		res_snan
-fsglmul_res_qnan:
-	bra.l		res_qnan
-fsglmul_zero:
-	bra.l		fmul_zero
-fsglmul_inf_src:
-	bra.l		fmul_inf_src
-fsglmul_inf_dst:
-	bra.l		fmul_inf_dst
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fsgldiv(): emulates the fsgldiv instruction			#
-#									#
-# XREF ****************************************************************	#
-#	scale_to_zero_src() - scale src exponent to zero		#
-#	scale_to_zero_dst() - scale dst exponent to zero		#
-#	unf_res4() - return default underflow result for sglop		#
-#	ovf_res() - return default overflow result			#
-#	res_qnan() - return QNAN result					#
-#	res_snan() - return SNAN result					#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	a1 = pointer to extended precision destination operand		#
-#	d0  rnd prec,mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms/denorms into ext/sgl/dbl precision.				#
-#	For norms/denorms, scale the exponents such that a divide	#
-# instruction won't cause an exception. Use the regular fsgldiv to	#
-# compute a result. Check if the regular operands would have taken	#
-# an exception. If so, return the default overflow/underflow result	#
-# and return the EXOP if exceptions are enabled. Else, scale the	#
-# result operand to the proper exponent.				#
-#									#
-#########################################################################
-
-	global		fsgldiv
-fsgldiv:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-
-	clr.w		%d1
-	mov.b		DTAG(%a6),%d1
-	lsl.b		&0x3,%d1
-	or.b		STAG(%a6),%d1		# combine src tags
-
-	bne.w		fsgldiv_not_norm	# optimize on non-norm input
-
-#
-# DIVIDE: NORMs and DENORMs ONLY!
-#
-fsgldiv_norm:
-	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
-	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	bsr.l		scale_to_zero_src	# calculate scale factor 1
-	mov.l		%d0,-(%sp)		# save scale factor 1
-
-	bsr.l		scale_to_zero_dst	# calculate scale factor 2
-
-	neg.l		(%sp)			# S.F. = scale1 - scale2
-	add.l		%d0,(%sp)
-
-	mov.w		2+L_SCR3(%a6),%d1	# fetch precision,mode
-	lsr.b		&0x6,%d1
-	mov.l		(%sp)+,%d0
-	cmpi.l		%d0,&0x3fff-0x7ffe
-	ble.w		fsgldiv_may_ovfl
-
-	cmpi.l		%d0,&0x3fff-0x0000	# will result underflow?
-	beq.w		fsgldiv_may_unfl	# maybe
-	bgt.w		fsgldiv_unfl		# yes; go handle underflow
-
-fsgldiv_normal:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsgldiv.x	FP_SCR0(%a6),%fp0	# perform sgl divide
-
-	fmov.l		%fpsr,%d1		# save FPSR
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fsgldiv_normal_exit:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-fsgldiv_may_ovfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# set FPSR
-
-	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute divide
-
-	fmov.l		%fpsr,%d1
-	fmov.l		&0x0,%fpcr
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
-
-	fmovm.x		&0x01,-(%sp)		# save result to stack
-	mov.w		(%sp),%d1		# fetch new exponent
-	add.l		&0xc,%sp		# clear result
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# add scale factor
-	cmp.l		%d1,&0x7fff		# did divide overflow?
-	blt.b		fsgldiv_normal_exit
-
-fsgldiv_ovfl_tst:
-	or.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fsgldiv_ovfl_ena	# yes
-
-fsgldiv_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
-	andi.b		&0x30,%d0		# kill precision
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	rts
-
-fsgldiv_ovfl_ena:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	subi.l		&0x6000,%d1		# subtract new bias
-	andi.w		&0x7fff,%d1		# clear ms bit
-	or.w		%d2,%d1			# concat old sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.b		fsgldiv_ovfl_dis
-
-fsgldiv_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fsgldiv_unfl_ena	# yes
-
-fsgldiv_unfl_dis:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res4		# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# UNFL is enabled.
-#
-fsgldiv_unfl_ena:
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	addi.l		&0x6000,%d1		# add bias
-	andi.w		&0x7fff,%d1		# clear top bit
-	or.w		%d2,%d1			# concat old sign, new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.b		fsgldiv_unfl_dis
-
-#
-# the divide operation MAY underflow:
-#
-fsgldiv_may_unfl:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fabs.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
-	fbgt.w		fsgldiv_normal_exit	# no; no underflow occurred
-	fblt.w		fsgldiv_unfl		# yes; underflow occurred
-
-#
-# we still don't know if underflow occurred. result is ~ equal to 1. but,
-# we don't know if the result was an underflow that rounded up to a 1
-# or a normalized number that rounded down to a 1. so, redo the entire
-# operation using RZ as the rounding mode to see what the pre-rounded
-# result is. this case should be relatively rare.
-#
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into %fp1
-
-	clr.l		%d1			# clear scratch register
-	ori.b		&rz_mode*0x10,%d1	# force RZ rnd mode
-
-	fmov.l		%d1,%fpcr		# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fabs.x		%fp1			# make absolute value
-	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
-	fbge.w		fsgldiv_normal_exit	# no; no underflow occurred
-	bra.w		fsgldiv_unfl		# yes; underflow occurred
-
-############################################################################
-
-#
-# Divide: inputs are not both normalized; what are they?
-#
-fsgldiv_not_norm:
-	mov.w		(tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_fsgldiv_op.b,%pc,%d1.w*1)
-
-	swbeg		&48
-tbl_fsgldiv_op:
-	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / NORM
-	short		fsgldiv_inf_load	- tbl_fsgldiv_op # NORM / ZERO
-	short		fsgldiv_zero_load	- tbl_fsgldiv_op # NORM / INF
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # NORM / QNAN
-	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / DENORM
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # NORM / SNAN
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-
-	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / NORM
-	short		fsgldiv_res_operr	- tbl_fsgldiv_op # ZERO / ZERO
-	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / INF
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # ZERO / QNAN
-	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / DENORM
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # ZERO / SNAN
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-
-	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / NORM
-	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / ZERO
-	short		fsgldiv_res_operr	- tbl_fsgldiv_op # INF / INF
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # INF / QNAN
-	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / DENORM
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # INF / SNAN
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / NORM
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / ZERO
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / INF
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / QNAN
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / DENORM
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # QNAN / SNAN
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-
-	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / NORM
-	short		fsgldiv_inf_load	- tbl_fsgldiv_op # DENORM / ZERO
-	short		fsgldiv_zero_load	- tbl_fsgldiv_op # DENORM / INF
-	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # DENORM / QNAN
-	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / DENORM
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # DENORM / SNAN
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / NORM
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / ZERO
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / INF
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / QNAN
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / DENORM
-	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / SNAN
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
-
-fsgldiv_res_qnan:
-	bra.l		res_qnan
-fsgldiv_res_snan:
-	bra.l		res_snan
-fsgldiv_res_operr:
-	bra.l		res_operr
-fsgldiv_inf_load:
-	bra.l		fdiv_inf_load
-fsgldiv_zero_load:
-	bra.l		fdiv_zero_load
-fsgldiv_inf_dst:
-	bra.l		fdiv_inf_dst
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fadd(): emulates the fadd instruction				#
-#	fsadd(): emulates the fadd instruction				#
-#	fdadd(): emulates the fdadd instruction				#
-#									#
-# XREF ****************************************************************	#
-#	addsub_scaler2() - scale the operands so they won't take exc	#
-#	ovf_res() - return default overflow result			#
-#	unf_res() - return default underflow result			#
-#	res_qnan() - set QNAN result					#
-#	res_snan() - set SNAN result					#
-#	res_operr() - set OPERR result					#
-#	scale_to_zero_src() - set src operand exponent equal to zero	#
-#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	a1 = pointer to extended precision destination operand		#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms into extended, single, and double precision.			#
-#	Do addition after scaling exponents such that exception won't	#
-# occur. Then, check result exponent to see if exception would have	#
-# occurred. If so, return default result and maybe EXOP. Else, insert	#
-# the correct result exponent and return. Set FPSR bits as appropriate.	#
-#									#
-#########################################################################
-
-	global		fsadd
-fsadd:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl prec
-	bra.b		fadd
-
-	global		fdadd
-fdadd:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&d_mode*0x10,%d0	# insert dbl prec
-
-	global		fadd
-fadd:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-
-	clr.w		%d1
-	mov.b		DTAG(%a6),%d1
-	lsl.b		&0x3,%d1
-	or.b		STAG(%a6),%d1		# combine src tags
-
-	bne.w		fadd_not_norm		# optimize on non-norm input
-
-#
-# ADD: norms and denorms
-#
-fadd_norm:
-	bsr.l		addsub_scaler2		# scale exponents
-
-fadd_zero_entry:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fadd.x		FP_SCR0(%a6),%fp0	# execute add
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# fetch INEX2,N,Z
-
-	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
-
-	fbeq.w		fadd_zero_exit		# if result is zero, end now
-
-	mov.l		%d2,-(%sp)		# save d2
-
-	fmovm.x		&0x01,-(%sp)		# save result to stack
-
-	mov.w		2+L_SCR3(%a6),%d1
-	lsr.b		&0x6,%d1
-
-	mov.w		(%sp),%d2		# fetch new sign, exp
-	andi.l		&0x7fff,%d2		# strip sign
-	sub.l		%d0,%d2			# add scale factor
-
-	cmp.l		%d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
-	bge.b		fadd_ovfl		# yes
-
-	cmp.l		%d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
-	blt.w		fadd_unfl		# yes
-	beq.w		fadd_may_unfl		# maybe; go find out
-
-fadd_normal:
-	mov.w		(%sp),%d1
-	andi.w		&0x8000,%d1		# keep sign
-	or.w		%d2,%d1			# concat sign,new exp
-	mov.w		%d1,(%sp)		# insert new exponent
-
-	fmovm.x		(%sp)+,&0x80		# return result in fp0
-
-	mov.l		(%sp)+,%d2		# restore d2
-	rts
-
-fadd_zero_exit:
-#	fmov.s		&0x00000000,%fp0	# return zero in fp0
-	rts
-
-tbl_fadd_ovfl:
-	long		0x7fff			# ext ovfl
-	long		0x407f			# sgl ovfl
-	long		0x43ff			# dbl ovfl
-
-tbl_fadd_unfl:
-	long	        0x0000			# ext unfl
-	long		0x3f81			# sgl unfl
-	long		0x3c01			# dbl unfl
-
-fadd_ovfl:
-	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fadd_ovfl_ena		# yes
-
-	add.l		&0xc,%sp
-fadd_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	mov.l		(%sp)+,%d2		# restore d2
-	rts
-
-fadd_ovfl_ena:
-	mov.b		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# is precision extended?
-	bne.b		fadd_ovfl_ena_sd	# no; prec = sgl or dbl
-
-fadd_ovfl_ena_cont:
-	mov.w		(%sp),%d1
-	andi.w		&0x8000,%d1		# keep sign
-	subi.l		&0x6000,%d2		# add extra bias
-	andi.w		&0x7fff,%d2
-	or.w		%d2,%d1			# concat sign,new exp
-	mov.w		%d1,(%sp)		# insert new exponent
-
-	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
-	bra.b		fadd_ovfl_dis
-
-fadd_ovfl_ena_sd:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# keep rnd mode
-	fmov.l		%d1,%fpcr		# set FPCR
-
-	fadd.x		FP_SCR0(%a6),%fp0	# execute add
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	add.l		&0xc,%sp
-	fmovm.x		&0x01,-(%sp)
-	bra.b		fadd_ovfl_ena_cont
-
-fadd_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	add.l		&0xc,%sp
-
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fadd.x		FP_SCR0(%a6),%fp0	# execute add
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# save status
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fadd_unfl_ena		# yes
-
-fadd_unfl_dis:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	mov.l		(%sp)+,%d2		# restore d2
-	rts
-
-fadd_unfl_ena:
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# is precision extended?
-	bne.b		fadd_unfl_ena_sd	# no; sgl or dbl
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-fadd_unfl_ena_cont:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fadd.x		FP_SCR0(%a6),%fp1	# execute multiply
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	addi.l		&0x6000,%d1		# add new bias
-	andi.w		&0x7fff,%d1		# clear top bit
-	or.w		%d2,%d1			# concat sign,new exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.w		fadd_unfl_dis
-
-fadd_unfl_ena_sd:
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# use only rnd mode
-	fmov.l		%d1,%fpcr		# set FPCR
-
-	bra.b		fadd_unfl_ena_cont
-
-#
-# result is equal to the smallest normalized number in the selected precision
-# if the precision is extended, this result could not have come from an
-# underflow that rounded up.
-#
-fadd_may_unfl:
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1
-	beq.w		fadd_normal		# yes; no underflow occurred
-
-	mov.l		0x4(%sp),%d1		# extract hi(man)
-	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
-	bne.w		fadd_normal		# no; no underflow occurred
-
-	tst.l		0x8(%sp)		# is lo(man) = 0x0?
-	bne.w		fadd_normal		# no; no underflow occurred
-
-	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
-	beq.w		fadd_normal		# no; no underflow occurred
-
-#
-# ok, so now the result has a exponent equal to the smallest normalized
-# exponent for the selected precision. also, the mantissa is equal to
-# 0x8000000000000000 and this mantissa is the result of rounding non-zero
-# g,r,s.
-# now, we must determine whether the pre-rounded result was an underflow
-# rounded "up" or a normalized number rounded "down".
-# so, we do this be re-executing the add using RZ as the rounding mode and
-# seeing if the new result is smaller or equal to the current result.
-#
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# keep rnd prec
-	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
-	fmov.l		%d1,%fpcr		# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fadd.x		FP_SCR0(%a6),%fp1	# execute add
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fabs.x		%fp0			# compare absolute values
-	fabs.x		%fp1
-	fcmp.x		%fp0,%fp1		# is first result > second?
-
-	fbgt.w		fadd_unfl		# yes; it's an underflow
-	bra.w		fadd_normal		# no; it's not an underflow
-
-##########################################################################
-
-#
-# Add: inputs are not both normalized; what are they?
-#
-fadd_not_norm:
-	mov.w		(tbl_fadd_op.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_fadd_op.b,%pc,%d1.w*1)
-
-	swbeg		&48
-tbl_fadd_op:
-	short		fadd_norm	- tbl_fadd_op # NORM + NORM
-	short		fadd_zero_src	- tbl_fadd_op # NORM + ZERO
-	short		fadd_inf_src	- tbl_fadd_op # NORM + INF
-	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
-	short		fadd_norm	- tbl_fadd_op # NORM + DENORM
-	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
-	short		tbl_fadd_op	- tbl_fadd_op #
-	short		tbl_fadd_op	- tbl_fadd_op #
-
-	short		fadd_zero_dst	- tbl_fadd_op # ZERO + NORM
-	short		fadd_zero_2	- tbl_fadd_op # ZERO + ZERO
-	short		fadd_inf_src	- tbl_fadd_op # ZERO + INF
-	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
-	short		fadd_zero_dst	- tbl_fadd_op # ZERO + DENORM
-	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
-	short		tbl_fadd_op	- tbl_fadd_op #
-	short		tbl_fadd_op	- tbl_fadd_op #
-
-	short		fadd_inf_dst	- tbl_fadd_op # INF + NORM
-	short		fadd_inf_dst	- tbl_fadd_op # INF + ZERO
-	short		fadd_inf_2	- tbl_fadd_op # INF + INF
-	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
-	short		fadd_inf_dst	- tbl_fadd_op # INF + DENORM
-	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
-	short		tbl_fadd_op	- tbl_fadd_op #
-	short		tbl_fadd_op	- tbl_fadd_op #
-
-	short		fadd_res_qnan	- tbl_fadd_op # QNAN + NORM
-	short		fadd_res_qnan	- tbl_fadd_op # QNAN + ZERO
-	short		fadd_res_qnan	- tbl_fadd_op # QNAN + INF
-	short		fadd_res_qnan	- tbl_fadd_op # QNAN + QNAN
-	short		fadd_res_qnan	- tbl_fadd_op # QNAN + DENORM
-	short		fadd_res_snan	- tbl_fadd_op # QNAN + SNAN
-	short		tbl_fadd_op	- tbl_fadd_op #
-	short		tbl_fadd_op	- tbl_fadd_op #
-
-	short		fadd_norm	- tbl_fadd_op # DENORM + NORM
-	short		fadd_zero_src	- tbl_fadd_op # DENORM + ZERO
-	short		fadd_inf_src	- tbl_fadd_op # DENORM + INF
-	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
-	short		fadd_norm	- tbl_fadd_op # DENORM + DENORM
-	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
-	short		tbl_fadd_op	- tbl_fadd_op #
-	short		tbl_fadd_op	- tbl_fadd_op #
-
-	short		fadd_res_snan	- tbl_fadd_op # SNAN + NORM
-	short		fadd_res_snan	- tbl_fadd_op # SNAN + ZERO
-	short		fadd_res_snan	- tbl_fadd_op # SNAN + INF
-	short		fadd_res_snan	- tbl_fadd_op # SNAN + QNAN
-	short		fadd_res_snan	- tbl_fadd_op # SNAN + DENORM
-	short		fadd_res_snan	- tbl_fadd_op # SNAN + SNAN
-	short		tbl_fadd_op	- tbl_fadd_op #
-	short		tbl_fadd_op	- tbl_fadd_op #
-
-fadd_res_qnan:
-	bra.l		res_qnan
-fadd_res_snan:
-	bra.l		res_snan
-
-#
-# both operands are ZEROes
-#
-fadd_zero_2:
-	mov.b		SRC_EX(%a0),%d0		# are the signs opposite
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d0,%d1
-	bmi.w		fadd_zero_2_chk_rm	# weed out (-ZERO)+(+ZERO)
-
-# the signs are the same. so determine whether they are positive or negative
-# and return the appropriately signed zero.
-	tst.b		%d0			# are ZEROes positive or negative?
-	bmi.b		fadd_zero_rm		# negative
-	fmov.s		&0x00000000,%fp0	# return +ZERO
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
-	rts
-
-#
-# the ZEROes have opposite signs:
-# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
-# - -ZERO is returned in the case of RM.
-#
-fadd_zero_2_chk_rm:
-	mov.b		3+L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# extract rnd mode
-	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode == RM?
-	beq.b		fadd_zero_rm		# yes
-	fmov.s		&0x00000000,%fp0	# return +ZERO
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
-	rts
-
-fadd_zero_rm:
-	fmov.s		&0x80000000,%fp0	# return -ZERO
-	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
-	rts
-
-#
-# one operand is a ZERO and the other is a DENORM or NORM. scale
-# the DENORM or NORM and jump to the regular fadd routine.
-#
-fadd_zero_dst:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	bsr.l		scale_to_zero_src	# scale the operand
-	clr.w		FP_SCR1_EX(%a6)
-	clr.l		FP_SCR1_HI(%a6)
-	clr.l		FP_SCR1_LO(%a6)
-	bra.w		fadd_zero_entry		# go execute fadd
-
-fadd_zero_src:
-	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
-	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
-	bsr.l		scale_to_zero_dst	# scale the operand
-	clr.w		FP_SCR0_EX(%a6)
-	clr.l		FP_SCR0_HI(%a6)
-	clr.l		FP_SCR0_LO(%a6)
-	bra.w		fadd_zero_entry		# go execute fadd
-
-#
-# both operands are INFs. an OPERR will result if the INFs have
-# different signs. else, an INF of the same sign is returned
-#
-fadd_inf_2:
-	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d1,%d0
-	bmi.l		res_operr		# weed out (-INF)+(+INF)
-
-# ok, so it's not an OPERR. but, we do have to remember to return the
-# src INF since that's where the 881/882 gets the j-bit from...
-
-#
-# operands are INF and one of {ZERO, INF, DENORM, NORM}
-#
-fadd_inf_src:
-	fmovm.x		SRC(%a0),&0x80		# return src INF
-	tst.b		SRC_EX(%a0)		# is INF positive?
-	bpl.b		fadd_inf_done		# yes; we're done
-	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
-	rts
-
-#
-# operands are INF and one of {ZERO, INF, DENORM, NORM}
-#
-fadd_inf_dst:
-	fmovm.x		DST(%a1),&0x80		# return dst INF
-	tst.b		DST_EX(%a1)		# is INF positive?
-	bpl.b		fadd_inf_done		# yes; we're done
-	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
-	rts
-
-fadd_inf_done:
-	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fsub(): emulates the fsub instruction				#
-#	fssub(): emulates the fssub instruction				#
-#	fdsub(): emulates the fdsub instruction				#
-#									#
-# XREF ****************************************************************	#
-#	addsub_scaler2() - scale the operands so they won't take exc	#
-#	ovf_res() - return default overflow result			#
-#	unf_res() - return default underflow result			#
-#	res_qnan() - set QNAN result					#
-#	res_snan() - set SNAN result					#
-#	res_operr() - set OPERR result					#
-#	scale_to_zero_src() - set src operand exponent equal to zero	#
-#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	a1 = pointer to extended precision destination operand		#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms into extended, single, and double precision.			#
-#	Do subtraction after scaling exponents such that exception won't#
-# occur. Then, check result exponent to see if exception would have	#
-# occurred. If so, return default result and maybe EXOP. Else, insert	#
-# the correct result exponent and return. Set FPSR bits as appropriate.	#
-#									#
-#########################################################################
-
-	global		fssub
-fssub:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl prec
-	bra.b		fsub
-
-	global		fdsub
-fdsub:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&d_mode*0x10,%d0	# insert dbl prec
-
-	global		fsub
-fsub:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-
-	clr.w		%d1
-	mov.b		DTAG(%a6),%d1
-	lsl.b		&0x3,%d1
-	or.b		STAG(%a6),%d1		# combine src tags
-
-	bne.w		fsub_not_norm		# optimize on non-norm input
-
-#
-# SUB: norms and denorms
-#
-fsub_norm:
-	bsr.l		addsub_scaler2		# scale exponents
-
-fsub_zero_entry:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# fetch INEX2, N, Z
-
-	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
-
-	fbeq.w		fsub_zero_exit		# if result zero, end now
-
-	mov.l		%d2,-(%sp)		# save d2
-
-	fmovm.x		&0x01,-(%sp)		# save result to stack
-
-	mov.w		2+L_SCR3(%a6),%d1
-	lsr.b		&0x6,%d1
-
-	mov.w		(%sp),%d2		# fetch new exponent
-	andi.l		&0x7fff,%d2		# strip sign
-	sub.l		%d0,%d2			# add scale factor
-
-	cmp.l		%d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
-	bge.b		fsub_ovfl		# yes
-
-	cmp.l		%d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
-	blt.w		fsub_unfl		# yes
-	beq.w		fsub_may_unfl		# maybe; go find out
-
-fsub_normal:
-	mov.w		(%sp),%d1
-	andi.w		&0x8000,%d1		# keep sign
-	or.w		%d2,%d1			# insert new exponent
-	mov.w		%d1,(%sp)		# insert new exponent
-
-	fmovm.x		(%sp)+,&0x80		# return result in fp0
-
-	mov.l		(%sp)+,%d2		# restore d2
-	rts
-
-fsub_zero_exit:
-#	fmov.s		&0x00000000,%fp0	# return zero in fp0
-	rts
-
-tbl_fsub_ovfl:
-	long		0x7fff			# ext ovfl
-	long		0x407f			# sgl ovfl
-	long		0x43ff			# dbl ovfl
-
-tbl_fsub_unfl:
-	long	        0x0000			# ext unfl
-	long		0x3f81			# sgl unfl
-	long		0x3c01			# dbl unfl
-
-fsub_ovfl:
-	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fsub_ovfl_ena		# yes
-
-	add.l		&0xc,%sp
-fsub_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	mov.l		(%sp)+,%d2		# restore d2
-	rts
-
-fsub_ovfl_ena:
-	mov.b		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# is precision extended?
-	bne.b		fsub_ovfl_ena_sd	# no
-
-fsub_ovfl_ena_cont:
-	mov.w		(%sp),%d1		# fetch {sgn,exp}
-	andi.w		&0x8000,%d1		# keep sign
-	subi.l		&0x6000,%d2		# subtract new bias
-	andi.w		&0x7fff,%d2		# clear top bit
-	or.w		%d2,%d1			# concat sign,exp
-	mov.w		%d1,(%sp)		# insert new exponent
-
-	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
-	bra.b		fsub_ovfl_dis
-
-fsub_ovfl_ena_sd:
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# clear rnd prec
-	fmov.l		%d1,%fpcr		# set FPCR
-
-	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	add.l		&0xc,%sp
-	fmovm.x		&0x01,-(%sp)
-	bra.b		fsub_ovfl_ena_cont
-
-fsub_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	add.l		&0xc,%sp
-
-	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
-
-	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# save status
-
-	or.l		%d1,USER_FPSR(%a6)
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fsub_unfl_ena		# yes
-
-fsub_unfl_dis:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	mov.l		(%sp)+,%d2		# restore d2
-	rts
-
-fsub_unfl_ena:
-	fmovm.x		FP_SCR1(%a6),&0x40
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# is precision extended?
-	bne.b		fsub_unfl_ena_sd	# no
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-fsub_unfl_ena_cont:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fmovm.x		&0x40,FP_SCR0(%a6)	# store result to stack
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	addi.l		&0x6000,%d1		# subtract new bias
-	andi.w		&0x7fff,%d1		# clear top bit
-	or.w		%d2,%d1			# concat sgn,exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	bra.w		fsub_unfl_dis
-
-fsub_unfl_ena_sd:
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# clear rnd prec
-	fmov.l		%d1,%fpcr		# set FPCR
-
-	bra.b		fsub_unfl_ena_cont
-
-#
-# result is equal to the smallest normalized number in the selected precision
-# if the precision is extended, this result could not have come from an
-# underflow that rounded up.
-#
-fsub_may_unfl:
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# fetch rnd prec
-	beq.w		fsub_normal		# yes; no underflow occurred
-
-	mov.l		0x4(%sp),%d1
-	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
-	bne.w		fsub_normal		# no; no underflow occurred
-
-	tst.l		0x8(%sp)		# is lo(man) = 0x0?
-	bne.w		fsub_normal		# no; no underflow occurred
-
-	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
-	beq.w		fsub_normal		# no; no underflow occurred
-
-#
-# ok, so now the result has a exponent equal to the smallest normalized
-# exponent for the selected precision. also, the mantissa is equal to
-# 0x8000000000000000 and this mantissa is the result of rounding non-zero
-# g,r,s.
-# now, we must determine whether the pre-rounded result was an underflow
-# rounded "up" or a normalized number rounded "down".
-# so, we do this be re-executing the add using RZ as the rounding mode and
-# seeing if the new result is smaller or equal to the current result.
-#
-	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
-
-	mov.l		L_SCR3(%a6),%d1
-	andi.b		&0xc0,%d1		# keep rnd prec
-	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
-	fmov.l		%d1,%fpcr		# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	fabs.x		%fp0			# compare absolute values
-	fabs.x		%fp1
-	fcmp.x		%fp0,%fp1		# is first result > second?
-
-	fbgt.w		fsub_unfl		# yes; it's an underflow
-	bra.w		fsub_normal		# no; it's not an underflow
-
-##########################################################################
-
-#
-# Sub: inputs are not both normalized; what are they?
-#
-fsub_not_norm:
-	mov.w		(tbl_fsub_op.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_fsub_op.b,%pc,%d1.w*1)
-
-	swbeg		&48
-tbl_fsub_op:
-	short		fsub_norm	- tbl_fsub_op # NORM - NORM
-	short		fsub_zero_src	- tbl_fsub_op # NORM - ZERO
-	short		fsub_inf_src	- tbl_fsub_op # NORM - INF
-	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
-	short		fsub_norm	- tbl_fsub_op # NORM - DENORM
-	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
-	short		tbl_fsub_op	- tbl_fsub_op #
-	short		tbl_fsub_op	- tbl_fsub_op #
-
-	short		fsub_zero_dst	- tbl_fsub_op # ZERO - NORM
-	short		fsub_zero_2	- tbl_fsub_op # ZERO - ZERO
-	short		fsub_inf_src	- tbl_fsub_op # ZERO - INF
-	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
-	short		fsub_zero_dst	- tbl_fsub_op # ZERO - DENORM
-	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
-	short		tbl_fsub_op	- tbl_fsub_op #
-	short		tbl_fsub_op	- tbl_fsub_op #
-
-	short		fsub_inf_dst	- tbl_fsub_op # INF - NORM
-	short		fsub_inf_dst	- tbl_fsub_op # INF - ZERO
-	short		fsub_inf_2	- tbl_fsub_op # INF - INF
-	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
-	short		fsub_inf_dst	- tbl_fsub_op # INF - DENORM
-	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
-	short		tbl_fsub_op	- tbl_fsub_op #
-	short		tbl_fsub_op	- tbl_fsub_op #
-
-	short		fsub_res_qnan	- tbl_fsub_op # QNAN - NORM
-	short		fsub_res_qnan	- tbl_fsub_op # QNAN - ZERO
-	short		fsub_res_qnan	- tbl_fsub_op # QNAN - INF
-	short		fsub_res_qnan	- tbl_fsub_op # QNAN - QNAN
-	short		fsub_res_qnan	- tbl_fsub_op # QNAN - DENORM
-	short		fsub_res_snan	- tbl_fsub_op # QNAN - SNAN
-	short		tbl_fsub_op	- tbl_fsub_op #
-	short		tbl_fsub_op	- tbl_fsub_op #
-
-	short		fsub_norm	- tbl_fsub_op # DENORM - NORM
-	short		fsub_zero_src	- tbl_fsub_op # DENORM - ZERO
-	short		fsub_inf_src	- tbl_fsub_op # DENORM - INF
-	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
-	short		fsub_norm	- tbl_fsub_op # DENORM - DENORM
-	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
-	short		tbl_fsub_op	- tbl_fsub_op #
-	short		tbl_fsub_op	- tbl_fsub_op #
-
-	short		fsub_res_snan	- tbl_fsub_op # SNAN - NORM
-	short		fsub_res_snan	- tbl_fsub_op # SNAN - ZERO
-	short		fsub_res_snan	- tbl_fsub_op # SNAN - INF
-	short		fsub_res_snan	- tbl_fsub_op # SNAN - QNAN
-	short		fsub_res_snan	- tbl_fsub_op # SNAN - DENORM
-	short		fsub_res_snan	- tbl_fsub_op # SNAN - SNAN
-	short		tbl_fsub_op	- tbl_fsub_op #
-	short		tbl_fsub_op	- tbl_fsub_op #
-
-fsub_res_qnan:
-	bra.l		res_qnan
-fsub_res_snan:
-	bra.l		res_snan
-
-#
-# both operands are ZEROes
-#
-fsub_zero_2:
-	mov.b		SRC_EX(%a0),%d0
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d1,%d0
-	bpl.b		fsub_zero_2_chk_rm
-
-# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
-	tst.b		%d0			# is dst negative?
-	bmi.b		fsub_zero_2_rm		# yes
-	fmov.s		&0x00000000,%fp0	# no; return +ZERO
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
-	rts
-
-#
-# the ZEROes have the same signs:
-# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
-# - -ZERO is returned in the case of RM.
-#
-fsub_zero_2_chk_rm:
-	mov.b		3+L_SCR3(%a6),%d1
-	andi.b		&0x30,%d1		# extract rnd mode
-	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode = RM?
-	beq.b		fsub_zero_2_rm		# yes
-	fmov.s		&0x00000000,%fp0	# no; return +ZERO
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
-	rts
-
-fsub_zero_2_rm:
-	fmov.s		&0x80000000,%fp0	# return -ZERO
-	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/NEG
-	rts
-
-#
-# one operand is a ZERO and the other is a DENORM or a NORM.
-# scale the DENORM or NORM and jump to the regular fsub routine.
-#
-fsub_zero_dst:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-	bsr.l		scale_to_zero_src	# scale the operand
-	clr.w		FP_SCR1_EX(%a6)
-	clr.l		FP_SCR1_HI(%a6)
-	clr.l		FP_SCR1_LO(%a6)
-	bra.w		fsub_zero_entry		# go execute fsub
-
-fsub_zero_src:
-	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
-	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
-	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
-	bsr.l		scale_to_zero_dst	# scale the operand
-	clr.w		FP_SCR0_EX(%a6)
-	clr.l		FP_SCR0_HI(%a6)
-	clr.l		FP_SCR0_LO(%a6)
-	bra.w		fsub_zero_entry		# go execute fsub
-
-#
-# both operands are INFs. an OPERR will result if the INFs have the
-# same signs. else,
-#
-fsub_inf_2:
-	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
-	mov.b		DST_EX(%a1),%d1
-	eor.b		%d1,%d0
-	bpl.l		res_operr		# weed out (-INF)+(+INF)
-
-# ok, so it's not an OPERR. but we do have to remember to return
-# the src INF since that's where the 881/882 gets the j-bit.
-
-fsub_inf_src:
-	fmovm.x		SRC(%a0),&0x80		# return src INF
-	fneg.x		%fp0			# invert sign
-	fbge.w		fsub_inf_done		# sign is now positive
-	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
-	rts
-
-fsub_inf_dst:
-	fmovm.x		DST(%a1),&0x80		# return dst INF
-	tst.b		DST_EX(%a1)		# is INF negative?
-	bpl.b		fsub_inf_done		# no
-	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
-	rts
-
-fsub_inf_done:
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fsqrt(): emulates the fsqrt instruction				#
-#	fssqrt(): emulates the fssqrt instruction			#
-#	fdsqrt(): emulates the fdsqrt instruction			#
-#									#
-# XREF ****************************************************************	#
-#	scale_sqrt() - scale the source operand				#
-#	unf_res() - return default underflow result			#
-#	ovf_res() - return default overflow result			#
-#	res_qnan_1op() - return QNAN result				#
-#	res_snan_1op() - return SNAN result				#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to extended precision source operand		#
-#	d0  rnd prec,mode						#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = result							#
-#	fp1 = EXOP (if exception occurred)				#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Handle NANs, infinities, and zeroes as special cases. Divide	#
-# norms/denorms into ext/sgl/dbl precision.				#
-#	For norms/denorms, scale the exponents such that a sqrt		#
-# instruction won't cause an exception. Use the regular fsqrt to	#
-# compute a result. Check if the regular operands would have taken	#
-# an exception. If so, return the default overflow/underflow result	#
-# and return the EXOP if exceptions are enabled. Else, scale the	#
-# result operand to the proper exponent.				#
-#									#
-#########################################################################
-
-	global		fssqrt
-fssqrt:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&s_mode*0x10,%d0	# insert sgl precision
-	bra.b		fsqrt
-
-	global		fdsqrt
-fdsqrt:
-	andi.b		&0x30,%d0		# clear rnd prec
-	ori.b		&d_mode*0x10,%d0	# insert dbl precision
-
-	global		fsqrt
-fsqrt:
-	mov.l		%d0,L_SCR3(%a6)		# store rnd info
-	clr.w		%d1
-	mov.b		STAG(%a6),%d1
-	bne.w		fsqrt_not_norm		# optimize on non-norm input
-
-#
-# SQUARE ROOT: norms and denorms ONLY!
-#
-fsqrt_norm:
-	tst.b		SRC_EX(%a0)		# is operand negative?
-	bmi.l		res_operr		# yes
-
-	andi.b		&0xc0,%d0		# is precision extended?
-	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
-
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsqrt.x		(%a0),%fp0		# execute square root
-
-	fmov.l		%fpsr,%d1
-	or.l		%d1,USER_FPSR(%a6)	# set N,INEX
-
-	rts
-
-fsqrt_denorm:
-	tst.b		SRC_EX(%a0)		# is operand negative?
-	bmi.l		res_operr		# yes
-
-	andi.b		&0xc0,%d0		# is precision extended?
-	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
-
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	bsr.l		scale_sqrt		# calculate scale factor
-
-	bra.w		fsqrt_sd_normal
-
-#
-# operand is either single or double
-#
-fsqrt_not_ext:
-	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
-	bne.w		fsqrt_dbl
-
-#
-# operand is to be rounded to single precision
-#
-fsqrt_sgl:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	bsr.l		scale_sqrt		# calculate scale factor
-
-	cmpi.l		%d0,&0x3fff-0x3f81	# will move in underflow?
-	beq.w		fsqrt_sd_may_unfl
-	bgt.w		fsqrt_sd_unfl		# yes; go handle underflow
-	cmpi.l		%d0,&0x3fff-0x407f	# will move in overflow?
-	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
-	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
-
-#
-# operand will NOT overflow or underflow when moved in to the fp reg file
-#
-fsqrt_sd_normal:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
-
-	fmov.l		%fpsr,%d1		# save FPSR
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fsqrt_sd_normal_exit:
-	mov.l		%d2,-(%sp)		# save d2
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	sub.l		%d0,%d1			# add scale factor
-	andi.w		&0x8000,%d2		# keep old sign
-	or.w		%d1,%d2			# concat old sign,new exp
-	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
-	mov.l		(%sp)+,%d2		# restore d2
-	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
-	rts
-
-#
-# operand is to be rounded to double precision
-#
-fsqrt_dbl:
-	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
-	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
-	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
-
-	bsr.l		scale_sqrt		# calculate scale factor
-
-	cmpi.l		%d0,&0x3fff-0x3c01	# will move in underflow?
-	beq.w		fsqrt_sd_may_unfl
-	bgt.b		fsqrt_sd_unfl		# yes; go handle underflow
-	cmpi.l		%d0,&0x3fff-0x43ff	# will move in overflow?
-	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
-	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
-	bra.w		fsqrt_sd_normal		# no; ho handle normalized op
-
-# we're on the line here and the distinguising characteristic is whether
-# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
-# elsewise fall through to underflow.
-fsqrt_sd_may_unfl:
-	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
-	bne.w		fsqrt_sd_normal		# yes, so no underflow
-
-#
-# operand WILL underflow when moved in to the fp register file
-#
-fsqrt_sd_unfl:
-	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
-
-	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
-	fmov.l		&0x0,%fpsr		# clear FPSR
-
-	fsqrt.x		FP_SCR0(%a6),%fp0	# execute square root
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-# if underflow or inexact is enabled, go calculate EXOP first.
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
-	bne.b		fsqrt_sd_unfl_ena	# yes
-
-fsqrt_sd_unfl_dis:
-	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
-
-	lea		FP_SCR0(%a6),%a0	# pass: result addr
-	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
-	bsr.l		unf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
-	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
-	rts
-
-#
-# operand will underflow AND underflow is enabled.
-# Therefore, we must return the result rounded to extended precision.
-#
-fsqrt_sd_unfl_ena:
-	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
-	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
-	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
-
-	mov.l		%d2,-(%sp)		# save d2
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# subtract scale factor
-	addi.l		&0x6000,%d1		# add new bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat new sign,new exp
-	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
-	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
-	mov.l		(%sp)+,%d2		# restore d2
-	bra.b		fsqrt_sd_unfl_dis
-
-#
-# operand WILL overflow.
-#
-fsqrt_sd_ovfl:
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fsqrt.x		FP_SCR0(%a6),%fp0	# perform square root
-
-	fmov.l		&0x0,%fpcr		# clear FPCR
-	fmov.l		%fpsr,%d1		# save FPSR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-fsqrt_sd_ovfl_tst:
-	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
-
-	mov.b		FPCR_ENABLE(%a6),%d1
-	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
-	bne.b		fsqrt_sd_ovfl_ena	# yes
-
-#
-# OVFL is not enabled; therefore, we must create the default result by
-# calling ovf_res().
-#
-fsqrt_sd_ovfl_dis:
-	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
-	sne		%d1			# set sign param accordingly
-	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
-	bsr.l		ovf_res			# calculate default result
-	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
-	fmovm.x		(%a0),&0x80		# return default result in fp0
-	rts
-
-#
-# OVFL is enabled.
-# the INEX2 bit has already been updated by the round to the correct precision.
-# now, round to extended(and don't alter the FPSR).
-#
-fsqrt_sd_ovfl_ena:
-	mov.l		%d2,-(%sp)		# save d2
-	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
-	mov.l		%d1,%d2			# make a copy
-	andi.l		&0x7fff,%d1		# strip sign
-	andi.w		&0x8000,%d2		# keep old sign
-	sub.l		%d0,%d1			# add scale factor
-	subi.l		&0x6000,%d1		# subtract bias
-	andi.w		&0x7fff,%d1
-	or.w		%d2,%d1			# concat sign,exp
-	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
-	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
-	mov.l		(%sp)+,%d2		# restore d2
-	bra.b		fsqrt_sd_ovfl_dis
-
-#
-# the move in MAY underflow. so...
-#
-fsqrt_sd_may_ovfl:
-	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
-	bne.w		fsqrt_sd_ovfl		# yes, so overflow
-
-	fmov.l		&0x0,%fpsr		# clear FPSR
-	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
-
-	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
-
-	fmov.l		%fpsr,%d1		# save status
-	fmov.l		&0x0,%fpcr		# clear FPCR
-
-	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
-
-	fmov.x		%fp0,%fp1		# make a copy of result
-	fcmp.b		%fp1,&0x1		# is |result| >= 1.b?
-	fbge.w		fsqrt_sd_ovfl_tst	# yes; overflow has occurred
-
-# no, it didn't overflow; we have correct result
-	bra.w		fsqrt_sd_normal_exit
-
-##########################################################################
-
-#
-# input is not normalized; what is it?
-#
-fsqrt_not_norm:
-	cmpi.b		%d1,&DENORM		# weed out DENORM
-	beq.w		fsqrt_denorm
-	cmpi.b		%d1,&ZERO		# weed out ZERO
-	beq.b		fsqrt_zero
-	cmpi.b		%d1,&INF		# weed out INF
-	beq.b		fsqrt_inf
-	cmpi.b		%d1,&SNAN		# weed out SNAN
-	beq.l		res_snan_1op
-	bra.l		res_qnan_1op
-
-#
-#	fsqrt(+0) = +0
-#	fsqrt(-0) = -0
-#	fsqrt(+INF) = +INF
-#	fsqrt(-INF) = OPERR
-#
-fsqrt_zero:
-	tst.b		SRC_EX(%a0)		# is ZERO positive or negative?
-	bmi.b		fsqrt_zero_m		# negative
-fsqrt_zero_p:
-	fmov.s		&0x00000000,%fp0	# return +ZERO
-	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
-	rts
-fsqrt_zero_m:
-	fmov.s		&0x80000000,%fp0	# return -ZERO
-	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
-	rts
-
-fsqrt_inf:
-	tst.b		SRC_EX(%a0)		# is INF positive or negative?
-	bmi.l		res_operr		# negative
-fsqrt_inf_p:
-	fmovm.x		SRC(%a0),&0x80		# return +INF in fp0
-	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	fetch_dreg(): fetch register according to index in d1		#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	d1 = index of register to fetch from				#
-#									#
-# OUTPUT **************************************************************	#
-#	d0 = value of register fetched					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	According to the index value in d1 which can range from zero	#
-# to fifteen, load the corresponding register file value (where		#
-# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the	#
-# stack. The rest should still be in their original places.		#
-#									#
-#########################################################################
-
-# this routine leaves d1 intact for subsequent store_dreg calls.
-	global		fetch_dreg
-fetch_dreg:
-	mov.w		(tbl_fdreg.b,%pc,%d1.w*2),%d0
-	jmp		(tbl_fdreg.b,%pc,%d0.w*1)
-
-tbl_fdreg:
-	short		fdreg0 - tbl_fdreg
-	short		fdreg1 - tbl_fdreg
-	short		fdreg2 - tbl_fdreg
-	short		fdreg3 - tbl_fdreg
-	short		fdreg4 - tbl_fdreg
-	short		fdreg5 - tbl_fdreg
-	short		fdreg6 - tbl_fdreg
-	short		fdreg7 - tbl_fdreg
-	short		fdreg8 - tbl_fdreg
-	short		fdreg9 - tbl_fdreg
-	short		fdrega - tbl_fdreg
-	short		fdregb - tbl_fdreg
-	short		fdregc - tbl_fdreg
-	short		fdregd - tbl_fdreg
-	short		fdrege - tbl_fdreg
-	short		fdregf - tbl_fdreg
-
-fdreg0:
-	mov.l		EXC_DREGS+0x0(%a6),%d0
-	rts
-fdreg1:
-	mov.l		EXC_DREGS+0x4(%a6),%d0
-	rts
-fdreg2:
-	mov.l		%d2,%d0
-	rts
-fdreg3:
-	mov.l		%d3,%d0
-	rts
-fdreg4:
-	mov.l		%d4,%d0
-	rts
-fdreg5:
-	mov.l		%d5,%d0
-	rts
-fdreg6:
-	mov.l		%d6,%d0
-	rts
-fdreg7:
-	mov.l		%d7,%d0
-	rts
-fdreg8:
-	mov.l		EXC_DREGS+0x8(%a6),%d0
-	rts
-fdreg9:
-	mov.l		EXC_DREGS+0xc(%a6),%d0
-	rts
-fdrega:
-	mov.l		%a2,%d0
-	rts
-fdregb:
-	mov.l		%a3,%d0
-	rts
-fdregc:
-	mov.l		%a4,%d0
-	rts
-fdregd:
-	mov.l		%a5,%d0
-	rts
-fdrege:
-	mov.l		(%a6),%d0
-	rts
-fdregf:
-	mov.l		EXC_A7(%a6),%d0
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	store_dreg_l(): store longword to data register specified by d1	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = longowrd value to store					#
-#	d1 = index of register to fetch from				#
-#									#
-# OUTPUT **************************************************************	#
-#	(data register is updated)					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	According to the index value in d1, store the longword value	#
-# in d0 to the corresponding data register. D0/D1 are on the stack	#
-# while the rest are in their initial places.				#
-#									#
-#########################################################################
-
-	global		store_dreg_l
-store_dreg_l:
-	mov.w		(tbl_sdregl.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_sdregl.b,%pc,%d1.w*1)
-
-tbl_sdregl:
-	short		sdregl0 - tbl_sdregl
-	short		sdregl1 - tbl_sdregl
-	short		sdregl2 - tbl_sdregl
-	short		sdregl3 - tbl_sdregl
-	short		sdregl4 - tbl_sdregl
-	short		sdregl5 - tbl_sdregl
-	short		sdregl6 - tbl_sdregl
-	short		sdregl7 - tbl_sdregl
-
-sdregl0:
-	mov.l		%d0,EXC_DREGS+0x0(%a6)
-	rts
-sdregl1:
-	mov.l		%d0,EXC_DREGS+0x4(%a6)
-	rts
-sdregl2:
-	mov.l		%d0,%d2
-	rts
-sdregl3:
-	mov.l		%d0,%d3
-	rts
-sdregl4:
-	mov.l		%d0,%d4
-	rts
-sdregl5:
-	mov.l		%d0,%d5
-	rts
-sdregl6:
-	mov.l		%d0,%d6
-	rts
-sdregl7:
-	mov.l		%d0,%d7
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	store_dreg_w(): store word to data register specified by d1	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = word value to store					#
-#	d1 = index of register to fetch from				#
-#									#
-# OUTPUT **************************************************************	#
-#	(data register is updated)					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	According to the index value in d1, store the word value	#
-# in d0 to the corresponding data register. D0/D1 are on the stack	#
-# while the rest are in their initial places.				#
-#									#
-#########################################################################
-
-	global		store_dreg_w
-store_dreg_w:
-	mov.w		(tbl_sdregw.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_sdregw.b,%pc,%d1.w*1)
-
-tbl_sdregw:
-	short		sdregw0 - tbl_sdregw
-	short		sdregw1 - tbl_sdregw
-	short		sdregw2 - tbl_sdregw
-	short		sdregw3 - tbl_sdregw
-	short		sdregw4 - tbl_sdregw
-	short		sdregw5 - tbl_sdregw
-	short		sdregw6 - tbl_sdregw
-	short		sdregw7 - tbl_sdregw
-
-sdregw0:
-	mov.w		%d0,2+EXC_DREGS+0x0(%a6)
-	rts
-sdregw1:
-	mov.w		%d0,2+EXC_DREGS+0x4(%a6)
-	rts
-sdregw2:
-	mov.w		%d0,%d2
-	rts
-sdregw3:
-	mov.w		%d0,%d3
-	rts
-sdregw4:
-	mov.w		%d0,%d4
-	rts
-sdregw5:
-	mov.w		%d0,%d5
-	rts
-sdregw6:
-	mov.w		%d0,%d6
-	rts
-sdregw7:
-	mov.w		%d0,%d7
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	store_dreg_b(): store byte to data register specified by d1	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = byte value to store					#
-#	d1 = index of register to fetch from				#
-#									#
-# OUTPUT **************************************************************	#
-#	(data register is updated)					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	According to the index value in d1, store the byte value	#
-# in d0 to the corresponding data register. D0/D1 are on the stack	#
-# while the rest are in their initial places.				#
-#									#
-#########################################################################
-
-	global		store_dreg_b
-store_dreg_b:
-	mov.w		(tbl_sdregb.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_sdregb.b,%pc,%d1.w*1)
-
-tbl_sdregb:
-	short		sdregb0 - tbl_sdregb
-	short		sdregb1 - tbl_sdregb
-	short		sdregb2 - tbl_sdregb
-	short		sdregb3 - tbl_sdregb
-	short		sdregb4 - tbl_sdregb
-	short		sdregb5 - tbl_sdregb
-	short		sdregb6 - tbl_sdregb
-	short		sdregb7 - tbl_sdregb
-
-sdregb0:
-	mov.b		%d0,3+EXC_DREGS+0x0(%a6)
-	rts
-sdregb1:
-	mov.b		%d0,3+EXC_DREGS+0x4(%a6)
-	rts
-sdregb2:
-	mov.b		%d0,%d2
-	rts
-sdregb3:
-	mov.b		%d0,%d3
-	rts
-sdregb4:
-	mov.b		%d0,%d4
-	rts
-sdregb5:
-	mov.b		%d0,%d5
-	rts
-sdregb6:
-	mov.b		%d0,%d6
-	rts
-sdregb7:
-	mov.b		%d0,%d7
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	inc_areg(): increment an address register by the value in d0	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = amount to increment by					#
-#	d1 = index of address register to increment			#
-#									#
-# OUTPUT **************************************************************	#
-#	(address register is updated)					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Typically used for an instruction w/ a post-increment <ea>,	#
-# this routine adds the increment value in d0 to the address register	#
-# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
-# in their original places.						#
-#	For a7, if the increment amount is one, then we have to		#
-# increment by two. For any a7 update, set the mia7_flag so that if	#
-# an access error exception occurs later in emulation, this address	#
-# register update can be undone.					#
-#									#
-#########################################################################
-
-	global		inc_areg
-inc_areg:
-	mov.w		(tbl_iareg.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_iareg.b,%pc,%d1.w*1)
-
-tbl_iareg:
-	short		iareg0 - tbl_iareg
-	short		iareg1 - tbl_iareg
-	short		iareg2 - tbl_iareg
-	short		iareg3 - tbl_iareg
-	short		iareg4 - tbl_iareg
-	short		iareg5 - tbl_iareg
-	short		iareg6 - tbl_iareg
-	short		iareg7 - tbl_iareg
-
-iareg0:	add.l		%d0,EXC_DREGS+0x8(%a6)
-	rts
-iareg1:	add.l		%d0,EXC_DREGS+0xc(%a6)
-	rts
-iareg2:	add.l		%d0,%a2
-	rts
-iareg3:	add.l		%d0,%a3
-	rts
-iareg4:	add.l		%d0,%a4
-	rts
-iareg5:	add.l		%d0,%a5
-	rts
-iareg6:	add.l		%d0,(%a6)
-	rts
-iareg7:	mov.b		&mia7_flg,SPCOND_FLG(%a6)
-	cmpi.b		%d0,&0x1
-	beq.b		iareg7b
-	add.l		%d0,EXC_A7(%a6)
-	rts
-iareg7b:
-	addq.l		&0x2,EXC_A7(%a6)
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	dec_areg(): decrement an address register by the value in d0	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = amount to decrement by					#
-#	d1 = index of address register to decrement			#
-#									#
-# OUTPUT **************************************************************	#
-#	(address register is updated)					#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Typically used for an instruction w/ a pre-decrement <ea>,	#
-# this routine adds the decrement value in d0 to the address register	#
-# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
-# in their original places.						#
-#	For a7, if the decrement amount is one, then we have to		#
-# decrement by two. For any a7 update, set the mda7_flag so that if	#
-# an access error exception occurs later in emulation, this address	#
-# register update can be undone.					#
-#									#
-#########################################################################
-
-	global		dec_areg
-dec_areg:
-	mov.w		(tbl_dareg.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_dareg.b,%pc,%d1.w*1)
-
-tbl_dareg:
-	short		dareg0 - tbl_dareg
-	short		dareg1 - tbl_dareg
-	short		dareg2 - tbl_dareg
-	short		dareg3 - tbl_dareg
-	short		dareg4 - tbl_dareg
-	short		dareg5 - tbl_dareg
-	short		dareg6 - tbl_dareg
-	short		dareg7 - tbl_dareg
-
-dareg0:	sub.l		%d0,EXC_DREGS+0x8(%a6)
-	rts
-dareg1:	sub.l		%d0,EXC_DREGS+0xc(%a6)
-	rts
-dareg2:	sub.l		%d0,%a2
-	rts
-dareg3:	sub.l		%d0,%a3
-	rts
-dareg4:	sub.l		%d0,%a4
-	rts
-dareg5:	sub.l		%d0,%a5
-	rts
-dareg6:	sub.l		%d0,(%a6)
-	rts
-dareg7:	mov.b		&mda7_flg,SPCOND_FLG(%a6)
-	cmpi.b		%d0,&0x1
-	beq.b		dareg7b
-	sub.l		%d0,EXC_A7(%a6)
-	rts
-dareg7b:
-	subq.l		&0x2,EXC_A7(%a6)
-	rts
-
-##############################################################################
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	load_fpn1(): load FP register value into FP_SRC(a6).		#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = index of FP register to load				#
-#									#
-# OUTPUT **************************************************************	#
-#	FP_SRC(a6) = value loaded from FP register file			#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Using the index in d0, load FP_SRC(a6) with a number from the	#
-# FP register file.							#
-#									#
-#########################################################################
-
-	global		load_fpn1
-load_fpn1:
-	mov.w		(tbl_load_fpn1.b,%pc,%d0.w*2), %d0
-	jmp		(tbl_load_fpn1.b,%pc,%d0.w*1)
-
-tbl_load_fpn1:
-	short		load_fpn1_0 - tbl_load_fpn1
-	short		load_fpn1_1 - tbl_load_fpn1
-	short		load_fpn1_2 - tbl_load_fpn1
-	short		load_fpn1_3 - tbl_load_fpn1
-	short		load_fpn1_4 - tbl_load_fpn1
-	short		load_fpn1_5 - tbl_load_fpn1
-	short		load_fpn1_6 - tbl_load_fpn1
-	short		load_fpn1_7 - tbl_load_fpn1
-
-load_fpn1_0:
-	mov.l		0+EXC_FP0(%a6), 0+FP_SRC(%a6)
-	mov.l		4+EXC_FP0(%a6), 4+FP_SRC(%a6)
-	mov.l		8+EXC_FP0(%a6), 8+FP_SRC(%a6)
-	lea		FP_SRC(%a6), %a0
-	rts
-load_fpn1_1:
-	mov.l		0+EXC_FP1(%a6), 0+FP_SRC(%a6)
-	mov.l		4+EXC_FP1(%a6), 4+FP_SRC(%a6)
-	mov.l		8+EXC_FP1(%a6), 8+FP_SRC(%a6)
-	lea		FP_SRC(%a6), %a0
-	rts
-load_fpn1_2:
-	fmovm.x		&0x20, FP_SRC(%a6)
-	lea		FP_SRC(%a6), %a0
-	rts
-load_fpn1_3:
-	fmovm.x		&0x10, FP_SRC(%a6)
-	lea		FP_SRC(%a6), %a0
-	rts
-load_fpn1_4:
-	fmovm.x		&0x08, FP_SRC(%a6)
-	lea		FP_SRC(%a6), %a0
-	rts
-load_fpn1_5:
-	fmovm.x		&0x04, FP_SRC(%a6)
-	lea		FP_SRC(%a6), %a0
-	rts
-load_fpn1_6:
-	fmovm.x		&0x02, FP_SRC(%a6)
-	lea		FP_SRC(%a6), %a0
-	rts
-load_fpn1_7:
-	fmovm.x		&0x01, FP_SRC(%a6)
-	lea		FP_SRC(%a6), %a0
-	rts
-
-#############################################################################
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	load_fpn2(): load FP register value into FP_DST(a6).		#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	d0 = index of FP register to load				#
-#									#
-# OUTPUT **************************************************************	#
-#	FP_DST(a6) = value loaded from FP register file			#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Using the index in d0, load FP_DST(a6) with a number from the	#
-# FP register file.							#
-#									#
-#########################################################################
-
-	global		load_fpn2
-load_fpn2:
-	mov.w		(tbl_load_fpn2.b,%pc,%d0.w*2), %d0
-	jmp		(tbl_load_fpn2.b,%pc,%d0.w*1)
-
-tbl_load_fpn2:
-	short		load_fpn2_0 - tbl_load_fpn2
-	short		load_fpn2_1 - tbl_load_fpn2
-	short		load_fpn2_2 - tbl_load_fpn2
-	short		load_fpn2_3 - tbl_load_fpn2
-	short		load_fpn2_4 - tbl_load_fpn2
-	short		load_fpn2_5 - tbl_load_fpn2
-	short		load_fpn2_6 - tbl_load_fpn2
-	short		load_fpn2_7 - tbl_load_fpn2
-
-load_fpn2_0:
-	mov.l		0+EXC_FP0(%a6), 0+FP_DST(%a6)
-	mov.l		4+EXC_FP0(%a6), 4+FP_DST(%a6)
-	mov.l		8+EXC_FP0(%a6), 8+FP_DST(%a6)
-	lea		FP_DST(%a6), %a0
-	rts
-load_fpn2_1:
-	mov.l		0+EXC_FP1(%a6), 0+FP_DST(%a6)
-	mov.l		4+EXC_FP1(%a6), 4+FP_DST(%a6)
-	mov.l		8+EXC_FP1(%a6), 8+FP_DST(%a6)
-	lea		FP_DST(%a6), %a0
-	rts
-load_fpn2_2:
-	fmovm.x		&0x20, FP_DST(%a6)
-	lea		FP_DST(%a6), %a0
-	rts
-load_fpn2_3:
-	fmovm.x		&0x10, FP_DST(%a6)
-	lea		FP_DST(%a6), %a0
-	rts
-load_fpn2_4:
-	fmovm.x		&0x08, FP_DST(%a6)
-	lea		FP_DST(%a6), %a0
-	rts
-load_fpn2_5:
-	fmovm.x		&0x04, FP_DST(%a6)
-	lea		FP_DST(%a6), %a0
-	rts
-load_fpn2_6:
-	fmovm.x		&0x02, FP_DST(%a6)
-	lea		FP_DST(%a6), %a0
-	rts
-load_fpn2_7:
-	fmovm.x		&0x01, FP_DST(%a6)
-	lea		FP_DST(%a6), %a0
-	rts
-
-#############################################################################
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	store_fpreg(): store an fp value to the fpreg designated d0.	#
-#									#
-# XREF ****************************************************************	#
-#	None								#
-#									#
-# INPUT ***************************************************************	#
-#	fp0 = extended precision value to store				#
-#	d0  = index of floating-point register				#
-#									#
-# OUTPUT **************************************************************	#
-#	None								#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Store the value in fp0 to the FP register designated by the	#
-# value in d0. The FP number can be DENORM or SNAN so we have to be	#
-# careful that we don't take an exception here.				#
-#									#
-#########################################################################
-
-	global		store_fpreg
-store_fpreg:
-	mov.w		(tbl_store_fpreg.b,%pc,%d0.w*2), %d0
-	jmp		(tbl_store_fpreg.b,%pc,%d0.w*1)
-
-tbl_store_fpreg:
-	short		store_fpreg_0 - tbl_store_fpreg
-	short		store_fpreg_1 - tbl_store_fpreg
-	short		store_fpreg_2 - tbl_store_fpreg
-	short		store_fpreg_3 - tbl_store_fpreg
-	short		store_fpreg_4 - tbl_store_fpreg
-	short		store_fpreg_5 - tbl_store_fpreg
-	short		store_fpreg_6 - tbl_store_fpreg
-	short		store_fpreg_7 - tbl_store_fpreg
-
-store_fpreg_0:
-	fmovm.x		&0x80, EXC_FP0(%a6)
-	rts
-store_fpreg_1:
-	fmovm.x		&0x80, EXC_FP1(%a6)
-	rts
-store_fpreg_2:
-	fmovm.x		&0x01, -(%sp)
-	fmovm.x		(%sp)+, &0x20
-	rts
-store_fpreg_3:
-	fmovm.x		&0x01, -(%sp)
-	fmovm.x		(%sp)+, &0x10
-	rts
-store_fpreg_4:
-	fmovm.x		&0x01, -(%sp)
-	fmovm.x		(%sp)+, &0x08
-	rts
-store_fpreg_5:
-	fmovm.x		&0x01, -(%sp)
-	fmovm.x		(%sp)+, &0x04
-	rts
-store_fpreg_6:
-	fmovm.x		&0x01, -(%sp)
-	fmovm.x		(%sp)+, &0x02
-	rts
-store_fpreg_7:
-	fmovm.x		&0x01, -(%sp)
-	fmovm.x		(%sp)+, &0x01
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	get_packed(): fetch a packed operand from memory and then	#
-#		      convert it to a floating-point binary number.	#
-#									#
-# XREF ****************************************************************	#
-#	_dcalc_ea() - calculate the correct <ea>			#
-#	_mem_read() - fetch the packed operand from memory		#
-#	facc_in_x() - the fetch failed so jump to special exit code	#
-#	decbin()    - convert packed to binary extended precision	#
-#									#
-# INPUT ***************************************************************	#
-#	None								#
-#									#
-# OUTPUT **************************************************************	#
-#	If no failure on _mem_read():					#
-#	FP_SRC(a6) = packed operand now as a binary FP number		#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Get the correct <ea> which is the value on the exception stack	#
-# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.	#
-# Then, fetch the operand from memory. If the fetch fails, exit		#
-# through facc_in_x().							#
-#	If the packed operand is a ZERO,NAN, or INF, convert it to	#
-# its binary representation here. Else, call decbin() which will	#
-# convert the packed value to an extended precision binary value.	#
-#									#
-#########################################################################
-
-# the stacked <ea> for packed is correct except for -(An).
-# the base reg must be updated for both -(An) and (An)+.
-	global		get_packed
-get_packed:
-	mov.l		&0xc,%d0		# packed is 12 bytes
-	bsr.l		_dcalc_ea		# fetch <ea>; correct An
-
-	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
-	mov.l		&0xc,%d0		# pass: 12 bytes
-	bsr.l		_dmem_read		# read packed operand
-
-	tst.l		%d1			# did dfetch fail?
-	bne.l		facc_in_x		# yes
-
-# The packed operand is an INF or a NAN if the exponent field is all ones.
-	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
-	cmpi.w		%d0,&0x7fff		# INF or NAN?
-	bne.b		gp_try_zero		# no
-	rts					# operand is an INF or NAN
-
-# The packed operand is a zero if the mantissa is all zero, else it's
-# a normal packed op.
-gp_try_zero:
-	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
-	andi.b		&0x0f,%d0		# clear all but last nybble
-	bne.b		gp_not_spec		# not a zero
-	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
-	bne.b		gp_not_spec		# not a zero
-	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
-	bne.b		gp_not_spec		# not a zero
-	rts					# operand is a ZERO
-gp_not_spec:
-	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
-	bsr.l		decbin			# convert to extended
-	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
-	rts
-
-#########################################################################
-# decbin(): Converts normalized packed bcd value pointed to by register	#
-#	    a0 to extended-precision value in fp0.			#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to normalized packed bcd value			#
-#									#
-# OUTPUT **************************************************************	#
-#	fp0 = exact fp representation of the packed bcd value.		#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,	#
-#	and NaN operands are dispatched without entering this routine)	#
-#	value in 68881/882 format at location (a0).			#
-#									#
-#	A1. Convert the bcd exponent to binary by successive adds and	#
-#	muls. Set the sign according to SE. Subtract 16 to compensate	#
-#	for the mantissa which is to be interpreted as 17 integer	#
-#	digits, rather than 1 integer and 16 fraction digits.		#
-#	Note: this operation can never overflow.			#
-#									#
-#	A2. Convert the bcd mantissa to binary by successive		#
-#	adds and muls in FP0. Set the sign according to SM.		#
-#	The mantissa digits will be converted with the decimal point	#
-#	assumed following the least-significant digit.			#
-#	Note: this operation can never overflow.			#
-#									#
-#	A3. Count the number of leading/trailing zeros in the		#
-#	bcd string.  If SE is positive, count the leading zeros;	#
-#	if negative, count the trailing zeros.  Set the adjusted	#
-#	exponent equal to the exponent from A1 and the zero count	#
-#	added if SM = 1 and subtracted if SM = 0.  Scale the		#
-#	mantissa the equivalent of forcing in the bcd value:		#
-#									#
-#	SM = 0	a non-zero digit in the integer position		#
-#	SM = 1	a non-zero digit in Mant0, lsd of the fraction		#
-#									#
-#	this will insure that any value, regardless of its		#
-#	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted	#
-#	consistently.							#
-#									#
-#	A4. Calculate the factor 10^exp in FP1 using a table of		#
-#	10^(2^n) values.  To reduce the error in forming factors	#
-#	greater than 10^27, a directed rounding scheme is used with	#
-#	tables rounded to RN, RM, and RP, according to the table	#
-#	in the comments of the pwrten section.				#
-#									#
-#	A5. Form the final binary number by scaling the mantissa by	#
-#	the exponent factor.  This is done by multiplying the		#
-#	mantissa in FP0 by the factor in FP1 if the adjusted		#
-#	exponent sign is positive, and dividing FP0 by FP1 if		#
-#	it is negative.							#
-#									#
-#	Clean up and return. Check if the final mul or div was inexact.	#
-#	If so, set INEX1 in USER_FPSR.					#
-#									#
-#########################################################################
-
-#
-#	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
-#	to nearest, minus, and plus, respectively.  The tables include
-#	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
-#	is required until the power is greater than 27, however, all
-#	tables include the first 5 for ease of indexing.
-#
-RTABLE:
-	byte		0,0,0,0
-	byte		2,3,2,3
-	byte		2,3,3,2
-	byte		3,2,2,3
-
-	set		FNIBS,7
-	set		FSTRT,0
-
-	set		ESTRT,4
-	set		EDIGITS,2
-
-	global		decbin
-decbin:
-	mov.l		0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
-	mov.l		0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
-	mov.l		0x8(%a0),FP_SCR0_LO(%a6)
-
-	lea		FP_SCR0(%a6),%a0
-
-	movm.l		&0x3c00,-(%sp)		# save d2-d5
-	fmovm.x		&0x1,-(%sp)		# save fp1
-#
-# Calculate exponent:
-#  1. Copy bcd value in memory for use as a working copy.
-#  2. Calculate absolute value of exponent in d1 by mul and add.
-#  3. Correct for exponent sign.
-#  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
-#     (i.e., all digits assumed left of the decimal point.)
-#
-# Register usage:
-#
-#  calc_e:
-#	(*)  d0: temp digit storage
-#	(*)  d1: accumulator for binary exponent
-#	(*)  d2: digit count
-#	(*)  d3: offset pointer
-#	( )  d4: first word of bcd
-#	( )  a0: pointer to working bcd value
-#	( )  a6: pointer to original bcd value
-#	(*)  FP_SCR1: working copy of original bcd value
-#	(*)  L_SCR1: copy of original exponent word
-#
-calc_e:
-	mov.l		&EDIGITS,%d2		# # of nibbles (digits) in fraction part
-	mov.l		&ESTRT,%d3		# counter to pick up digits
-	mov.l		(%a0),%d4		# get first word of bcd
-	clr.l		%d1			# zero d1 for accumulator
-e_gd:
-	mulu.l		&0xa,%d1		# mul partial product by one digit place
-	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend into d0
-	add.l		%d0,%d1			# d1 = d1 + d0
-	addq.b		&4,%d3			# advance d3 to the next digit
-	dbf.w		%d2,e_gd		# if we have used all 3 digits, exit loop
-	btst		&30,%d4			# get SE
-	beq.b		e_pos			# don't negate if pos
-	neg.l		%d1			# negate before subtracting
-e_pos:
-	sub.l		&16,%d1			# sub to compensate for shift of mant
-	bge.b		e_save			# if still pos, do not neg
-	neg.l		%d1			# now negative, make pos and set SE
-	or.l		&0x40000000,%d4		# set SE in d4,
-	or.l		&0x40000000,(%a0)	# and in working bcd
-e_save:
-	mov.l		%d1,-(%sp)		# save exp on stack
-#
-#
-# Calculate mantissa:
-#  1. Calculate absolute value of mantissa in fp0 by mul and add.
-#  2. Correct for mantissa sign.
-#     (i.e., all digits assumed left of the decimal point.)
-#
-# Register usage:
-#
-#  calc_m:
-#	(*)  d0: temp digit storage
-#	(*)  d1: lword counter
-#	(*)  d2: digit count
-#	(*)  d3: offset pointer
-#	( )  d4: words 2 and 3 of bcd
-#	( )  a0: pointer to working bcd value
-#	( )  a6: pointer to original bcd value
-#	(*) fp0: mantissa accumulator
-#	( )  FP_SCR1: working copy of original bcd value
-#	( )  L_SCR1: copy of original exponent word
-#
-calc_m:
-	mov.l		&1,%d1			# word counter, init to 1
-	fmov.s		&0x00000000,%fp0	# accumulator
-#
-#
-#  Since the packed number has a long word between the first & second parts,
-#  get the integer digit then skip down & get the rest of the
-#  mantissa.  We will unroll the loop once.
-#
-	bfextu		(%a0){&28:&4},%d0	# integer part is ls digit in long word
-	fadd.b		%d0,%fp0		# add digit to sum in fp0
-#
-#
-#  Get the rest of the mantissa.
-#
-loadlw:
-	mov.l		(%a0,%d1.L*4),%d4	# load mantissa lonqword into d4
-	mov.l		&FSTRT,%d3		# counter to pick up digits
-	mov.l		&FNIBS,%d2		# reset number of digits per a0 ptr
-md2b:
-	fmul.s		&0x41200000,%fp0	# fp0 = fp0 * 10
-	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend
-	fadd.b		%d0,%fp0		# fp0 = fp0 + digit
-#
-#
-#  If all the digits (8) in that long word have been converted (d2=0),
-#  then inc d1 (=2) to point to the next long word and reset d3 to 0
-#  to initialize the digit offset, and set d2 to 7 for the digit count;
-#  else continue with this long word.
-#
-	addq.b		&4,%d3			# advance d3 to the next digit
-	dbf.w		%d2,md2b		# check for last digit in this lw
-nextlw:
-	addq.l		&1,%d1			# inc lw pointer in mantissa
-	cmp.l		%d1,&2			# test for last lw
-	ble.b		loadlw			# if not, get last one
-#
-#  Check the sign of the mant and make the value in fp0 the same sign.
-#
-m_sign:
-	btst		&31,(%a0)		# test sign of the mantissa
-	beq.b		ap_st_z			# if clear, go to append/strip zeros
-	fneg.x		%fp0			# if set, negate fp0
-#
-# Append/strip zeros:
-#
-#  For adjusted exponents which have an absolute value greater than 27*,
-#  this routine calculates the amount needed to normalize the mantissa
-#  for the adjusted exponent.  That number is subtracted from the exp
-#  if the exp was positive, and added if it was negative.  The purpose
-#  of this is to reduce the value of the exponent and the possibility
-#  of error in calculation of pwrten.
-#
-#  1. Branch on the sign of the adjusted exponent.
-#  2p.(positive exp)
-#   2. Check M16 and the digits in lwords 2 and 3 in descending order.
-#   3. Add one for each zero encountered until a non-zero digit.
-#   4. Subtract the count from the exp.
-#   5. Check if the exp has crossed zero in #3 above; make the exp abs
-#	   and set SE.
-#	6. Multiply the mantissa by 10**count.
-#  2n.(negative exp)
-#   2. Check the digits in lwords 3 and 2 in descending order.
-#   3. Add one for each zero encountered until a non-zero digit.
-#   4. Add the count to the exp.
-#   5. Check if the exp has crossed zero in #3 above; clear SE.
-#   6. Divide the mantissa by 10**count.
-#
-#  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
-#   any adjustment due to append/strip zeros will drive the resultane
-#   exponent towards zero.  Since all pwrten constants with a power
-#   of 27 or less are exact, there is no need to use this routine to
-#   attempt to lessen the resultant exponent.
-#
-# Register usage:
-#
-#  ap_st_z:
-#	(*)  d0: temp digit storage
-#	(*)  d1: zero count
-#	(*)  d2: digit count
-#	(*)  d3: offset pointer
-#	( )  d4: first word of bcd
-#	(*)  d5: lword counter
-#	( )  a0: pointer to working bcd value
-#	( )  FP_SCR1: working copy of original bcd value
-#	( )  L_SCR1: copy of original exponent word
-#
-#
-# First check the absolute value of the exponent to see if this
-# routine is necessary.  If so, then check the sign of the exponent
-# and do append (+) or strip (-) zeros accordingly.
-# This section handles a positive adjusted exponent.
-#
-ap_st_z:
-	mov.l		(%sp),%d1		# load expA for range test
-	cmp.l		%d1,&27			# test is with 27
-	ble.w		pwrten			# if abs(expA) <28, skip ap/st zeros
-	btst		&30,(%a0)		# check sign of exp
-	bne.b		ap_st_n			# if neg, go to neg side
-	clr.l		%d1			# zero count reg
-	mov.l		(%a0),%d4		# load lword 1 to d4
-	bfextu		%d4{&28:&4},%d0		# get M16 in d0
-	bne.b		ap_p_fx			# if M16 is non-zero, go fix exp
-	addq.l		&1,%d1			# inc zero count
-	mov.l		&1,%d5			# init lword counter
-	mov.l		(%a0,%d5.L*4),%d4	# get lword 2 to d4
-	bne.b		ap_p_cl			# if lw 2 is zero, skip it
-	addq.l		&8,%d1			# and inc count by 8
-	addq.l		&1,%d5			# inc lword counter
-	mov.l		(%a0,%d5.L*4),%d4	# get lword 3 to d4
-ap_p_cl:
-	clr.l		%d3			# init offset reg
-	mov.l		&7,%d2			# init digit counter
-ap_p_gd:
-	bfextu		%d4{%d3:&4},%d0		# get digit
-	bne.b		ap_p_fx			# if non-zero, go to fix exp
-	addq.l		&4,%d3			# point to next digit
-	addq.l		&1,%d1			# inc digit counter
-	dbf.w		%d2,ap_p_gd		# get next digit
-ap_p_fx:
-	mov.l		%d1,%d0			# copy counter to d2
-	mov.l		(%sp),%d1		# get adjusted exp from memory
-	sub.l		%d0,%d1			# subtract count from exp
-	bge.b		ap_p_fm			# if still pos, go to pwrten
-	neg.l		%d1			# now its neg; get abs
-	mov.l		(%a0),%d4		# load lword 1 to d4
-	or.l		&0x40000000,%d4		# and set SE in d4
-	or.l		&0x40000000,(%a0)	# and in memory
-#
-# Calculate the mantissa multiplier to compensate for the striping of
-# zeros from the mantissa.
-#
-ap_p_fm:
-	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
-	clr.l		%d3			# init table index
-	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
-	mov.l		&3,%d2			# init d2 to count bits in counter
-ap_p_el:
-	asr.l		&1,%d0			# shift lsb into carry
-	bcc.b		ap_p_en			# if 1, mul fp1 by pwrten factor
-	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
-ap_p_en:
-	add.l		&12,%d3			# inc d3 to next rtable entry
-	tst.l		%d0			# check if d0 is zero
-	bne.b		ap_p_el			# if not, get next bit
-	fmul.x		%fp1,%fp0		# mul mantissa by 10**(no_bits_shifted)
-	bra.b		pwrten			# go calc pwrten
-#
-# This section handles a negative adjusted exponent.
-#
-ap_st_n:
-	clr.l		%d1			# clr counter
-	mov.l		&2,%d5			# set up d5 to point to lword 3
-	mov.l		(%a0,%d5.L*4),%d4	# get lword 3
-	bne.b		ap_n_cl			# if not zero, check digits
-	sub.l		&1,%d5			# dec d5 to point to lword 2
-	addq.l		&8,%d1			# inc counter by 8
-	mov.l		(%a0,%d5.L*4),%d4	# get lword 2
-ap_n_cl:
-	mov.l		&28,%d3			# point to last digit
-	mov.l		&7,%d2			# init digit counter
-ap_n_gd:
-	bfextu		%d4{%d3:&4},%d0		# get digit
-	bne.b		ap_n_fx			# if non-zero, go to exp fix
-	subq.l		&4,%d3			# point to previous digit
-	addq.l		&1,%d1			# inc digit counter
-	dbf.w		%d2,ap_n_gd		# get next digit
-ap_n_fx:
-	mov.l		%d1,%d0			# copy counter to d0
-	mov.l		(%sp),%d1		# get adjusted exp from memory
-	sub.l		%d0,%d1			# subtract count from exp
-	bgt.b		ap_n_fm			# if still pos, go fix mantissa
-	neg.l		%d1			# take abs of exp and clr SE
-	mov.l		(%a0),%d4		# load lword 1 to d4
-	and.l		&0xbfffffff,%d4		# and clr SE in d4
-	and.l		&0xbfffffff,(%a0)	# and in memory
-#
-# Calculate the mantissa multiplier to compensate for the appending of
-# zeros to the mantissa.
-#
-ap_n_fm:
-	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
-	clr.l		%d3			# init table index
-	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
-	mov.l		&3,%d2			# init d2 to count bits in counter
-ap_n_el:
-	asr.l		&1,%d0			# shift lsb into carry
-	bcc.b		ap_n_en			# if 1, mul fp1 by pwrten factor
-	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
-ap_n_en:
-	add.l		&12,%d3			# inc d3 to next rtable entry
-	tst.l		%d0			# check if d0 is zero
-	bne.b		ap_n_el			# if not, get next bit
-	fdiv.x		%fp1,%fp0		# div mantissa by 10**(no_bits_shifted)
-#
-#
-# Calculate power-of-ten factor from adjusted and shifted exponent.
-#
-# Register usage:
-#
-#  pwrten:
-#	(*)  d0: temp
-#	( )  d1: exponent
-#	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
-#	(*)  d3: FPCR work copy
-#	( )  d4: first word of bcd
-#	(*)  a1: RTABLE pointer
-#  calc_p:
-#	(*)  d0: temp
-#	( )  d1: exponent
-#	(*)  d3: PWRTxx table index
-#	( )  a0: pointer to working copy of bcd
-#	(*)  a1: PWRTxx pointer
-#	(*) fp1: power-of-ten accumulator
-#
-# Pwrten calculates the exponent factor in the selected rounding mode
-# according to the following table:
-#
-#	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
-#
-#	ANY	  ANY	RN	RN
-#
-#	 +	   +	RP	RP
-#	 -	   +	RP	RM
-#	 +	   -	RP	RM
-#	 -	   -	RP	RP
-#
-#	 +	   +	RM	RM
-#	 -	   +	RM	RP
-#	 +	   -	RM	RP
-#	 -	   -	RM	RM
-#
-#	 +	   +	RZ	RM
-#	 -	   +	RZ	RM
-#	 +	   -	RZ	RP
-#	 -	   -	RZ	RP
-#
-#
-pwrten:
-	mov.l		USER_FPCR(%a6),%d3	# get user's FPCR
-	bfextu		%d3{&26:&2},%d2		# isolate rounding mode bits
-	mov.l		(%a0),%d4		# reload 1st bcd word to d4
-	asl.l		&2,%d2			# format d2 to be
-	bfextu		%d4{&0:&2},%d0		# {FPCR[6],FPCR[5],SM,SE}
-	add.l		%d0,%d2			# in d2 as index into RTABLE
-	lea.l		RTABLE(%pc),%a1		# load rtable base
-	mov.b		(%a1,%d2),%d0		# load new rounding bits from table
-	clr.l		%d3			# clear d3 to force no exc and extended
-	bfins		%d0,%d3{&26:&2}		# stuff new rounding bits in FPCR
-	fmov.l		%d3,%fpcr		# write new FPCR
-	asr.l		&1,%d0			# write correct PTENxx table
-	bcc.b		not_rp			# to a1
-	lea.l		PTENRP(%pc),%a1		# it is RP
-	bra.b		calc_p			# go to init section
-not_rp:
-	asr.l		&1,%d0			# keep checking
-	bcc.b		not_rm
-	lea.l		PTENRM(%pc),%a1		# it is RM
-	bra.b		calc_p			# go to init section
-not_rm:
-	lea.l		PTENRN(%pc),%a1		# it is RN
-calc_p:
-	mov.l		%d1,%d0			# copy exp to d0;use d0
-	bpl.b		no_neg			# if exp is negative,
-	neg.l		%d0			# invert it
-	or.l		&0x40000000,(%a0)	# and set SE bit
-no_neg:
-	clr.l		%d3			# table index
-	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
-e_loop:
-	asr.l		&1,%d0			# shift next bit into carry
-	bcc.b		e_next			# if zero, skip the mul
-	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
-e_next:
-	add.l		&12,%d3			# inc d3 to next rtable entry
-	tst.l		%d0			# check if d0 is zero
-	bne.b		e_loop			# not zero, continue shifting
-#
-#
-#  Check the sign of the adjusted exp and make the value in fp0 the
-#  same sign. If the exp was pos then multiply fp1*fp0;
-#  else divide fp0/fp1.
-#
-# Register Usage:
-#  norm:
-#	( )  a0: pointer to working bcd value
-#	(*) fp0: mantissa accumulator
-#	( ) fp1: scaling factor - 10**(abs(exp))
-#
-pnorm:
-	btst		&30,(%a0)		# test the sign of the exponent
-	beq.b		mul			# if clear, go to multiply
-div:
-	fdiv.x		%fp1,%fp0		# exp is negative, so divide mant by exp
-	bra.b		end_dec
-mul:
-	fmul.x		%fp1,%fp0		# exp is positive, so multiply by exp
-#
-#
-# Clean up and return with result in fp0.
-#
-# If the final mul/div in decbin incurred an inex exception,
-# it will be inex2, but will be reported as inex1 by get_op.
-#
-end_dec:
-	fmov.l		%fpsr,%d0		# get status register
-	bclr		&inex2_bit+8,%d0	# test for inex2 and clear it
-	beq.b		no_exc			# skip this if no exc
-	ori.w		&inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
-no_exc:
-	add.l		&0x4,%sp		# clear 1 lw param
-	fmovm.x		(%sp)+,&0x40		# restore fp1
-	movm.l		(%sp)+,&0x3c		# restore d2-d5
-	fmov.l		&0x0,%fpcr
-	fmov.l		&0x0,%fpsr
-	rts
-
-#########################################################################
-# bindec(): Converts an input in extended precision format to bcd format#
-#									#
-# INPUT ***************************************************************	#
-#	a0 = pointer to the input extended precision value in memory.	#
-#	     the input may be either normalized, unnormalized, or	#
-#	     denormalized.						#
-#	d0 = contains the k-factor sign-extended to 32-bits.		#
-#									#
-# OUTPUT **************************************************************	#
-#	FP_SCR0(a6) = bcd format result on the stack.			#
-#									#
-# ALGORITHM ***********************************************************	#
-#									#
-#	A1.	Set RM and size ext;  Set SIGMA = sign of input.	#
-#		The k-factor is saved for use in d7. Clear the		#
-#		BINDEC_FLG for separating normalized/denormalized	#
-#		input.  If input is unnormalized or denormalized,	#
-#		normalize it.						#
-#									#
-#	A2.	Set X = abs(input).					#
-#									#
-#	A3.	Compute ILOG.						#
-#		ILOG is the log base 10 of the input value.  It is	#
-#		approximated by adding e + 0.f when the original	#
-#		value is viewed as 2^^e * 1.f in extended precision.	#
-#		This value is stored in d6.				#
-#									#
-#	A4.	Clr INEX bit.						#
-#		The operation in A3 above may have set INEX2.		#
-#									#
-#	A5.	Set ICTR = 0;						#
-#		ICTR is a flag used in A13.  It must be set before the	#
-#		loop entry A6.						#
-#									#
-#	A6.	Calculate LEN.						#
-#		LEN is the number of digits to be displayed.  The	#
-#		k-factor can dictate either the total number of digits,	#
-#		if it is a positive number, or the number of digits	#
-#		after the decimal point which are to be included as	#
-#		significant.  See the 68882 manual for examples.	#
-#		If LEN is computed to be greater than 17, set OPERR in	#
-#		USER_FPSR.  LEN is stored in d4.			#
-#									#
-#	A7.	Calculate SCALE.					#
-#		SCALE is equal to 10^ISCALE, where ISCALE is the number	#
-#		of decimal places needed to insure LEN integer digits	#
-#		in the output before conversion to bcd. LAMBDA is the	#
-#		sign of ISCALE, used in A9. Fp1 contains		#
-#		10^^(abs(ISCALE)) using a rounding mode which is a	#
-#		function of the original rounding mode and the signs	#
-#		of ISCALE and X.  A table is given in the code.		#
-#									#
-#	A8.	Clr INEX; Force RZ.					#
-#		The operation in A3 above may have set INEX2.		#
-#		RZ mode is forced for the scaling operation to insure	#
-#		only one rounding error.  The grs bits are collected in #
-#		the INEX flag for use in A10.				#
-#									#
-#	A9.	Scale X -> Y.						#
-#		The mantissa is scaled to the desired number of		#
-#		significant digits.  The excess digits are collected	#
-#		in INEX2.						#
-#									#
-#	A10.	Or in INEX.						#
-#		If INEX is set, round error occurred.  This is		#
-#		compensated for by 'or-ing' in the INEX2 flag to	#
-#		the lsb of Y.						#
-#									#
-#	A11.	Restore original FPCR; set size ext.			#
-#		Perform FINT operation in the user's rounding mode.	#
-#		Keep the size to extended.				#
-#									#
-#	A12.	Calculate YINT = FINT(Y) according to user's rounding	#
-#		mode.  The FPSP routine sintd0 is used.  The output	#
-#		is in fp0.						#
-#									#
-#	A13.	Check for LEN digits.					#
-#		If the int operation results in more than LEN digits,	#
-#		or less than LEN -1 digits, adjust ILOG and repeat from	#
-#		A6.  This test occurs only on the first pass.  If the	#
-#		result is exactly 10^LEN, decrement ILOG and divide	#
-#		the mantissa by 10.					#
-#									#
-#	A14.	Convert the mantissa to bcd.				#
-#		The binstr routine is used to convert the LEN digit	#
-#		mantissa to bcd in memory.  The input to binstr is	#
-#		to be a fraction; i.e. (mantissa)/10^LEN and adjusted	#
-#		such that the decimal point is to the left of bit 63.	#
-#		The bcd digits are stored in the correct position in	#
-#		the final string area in memory.			#
-#									#
-#	A15.	Convert the exponent to bcd.				#
-#		As in A14 above, the exp is converted to bcd and the	#
-#		digits are stored in the final string.			#
-#		Test the length of the final exponent string.  If the	#
-#		length is 4, set operr.					#
-#									#
-#	A16.	Write sign bits to final string.			#
-#									#
-#########################################################################
-
-set	BINDEC_FLG,	EXC_TEMP	# DENORM flag
-
-# Constants in extended precision
-PLOG2:
-	long		0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
-PLOG2UP1:
-	long		0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
-
-# Constants in single precision
-FONE:
-	long		0x3F800000,0x00000000,0x00000000,0x00000000
-FTWO:
-	long		0x40000000,0x00000000,0x00000000,0x00000000
-FTEN:
-	long		0x41200000,0x00000000,0x00000000,0x00000000
-F4933:
-	long		0x459A2800,0x00000000,0x00000000,0x00000000
-
-RBDTBL:
-	byte		0,0,0,0
-	byte		3,3,2,2
-	byte		3,2,2,3
-	byte		2,3,3,2
-
-#	Implementation Notes:
-#
-#	The registers are used as follows:
-#
-#		d0: scratch; LEN input to binstr
-#		d1: scratch
-#		d2: upper 32-bits of mantissa for binstr
-#		d3: scratch;lower 32-bits of mantissa for binstr
-#		d4: LEN
-#		d5: LAMBDA/ICTR
-#		d6: ILOG
-#		d7: k-factor
-#		a0: ptr for original operand/final result
-#		a1: scratch pointer
-#		a2: pointer to FP_X; abs(original value) in ext
-#		fp0: scratch
-#		fp1: scratch
-#		fp2: scratch
-#		F_SCR1:
-#		F_SCR2:
-#		L_SCR1:
-#		L_SCR2:
-
-	global		bindec
-bindec:
-	movm.l		&0x3f20,-(%sp)	#  {%d2-%d7/%a2}
-	fmovm.x		&0x7,-(%sp)	#  {%fp0-%fp2}
-
-# A1. Set RM and size ext. Set SIGMA = sign input;
-#     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
-#     separating  normalized/denormalized input.  If the input
-#     is a denormalized number, set the BINDEC_FLG memory word
-#     to signal denorm.  If the input is unnormalized, normalize
-#     the input and test for denormalized result.
-#
-	fmov.l		&rm_mode*0x10,%fpcr	# set RM and ext
-	mov.l		(%a0),L_SCR2(%a6)	# save exponent for sign check
-	mov.l		%d0,%d7		# move k-factor to d7
-
-	clr.b		BINDEC_FLG(%a6)	# clr norm/denorm flag
-	cmpi.b		STAG(%a6),&DENORM # is input a DENORM?
-	bne.w		A2_str		# no; input is a NORM
-
-#
-# Normalize the denorm
-#
-un_de_norm:
-	mov.w		(%a0),%d0
-	and.w		&0x7fff,%d0	# strip sign of normalized exp
-	mov.l		4(%a0),%d1
-	mov.l		8(%a0),%d2
-norm_loop:
-	sub.w		&1,%d0
-	lsl.l		&1,%d2
-	roxl.l		&1,%d1
-	tst.l		%d1
-	bge.b		norm_loop
-#
-# Test if the normalized input is denormalized
-#
-	tst.w		%d0
-	bgt.b		pos_exp		# if greater than zero, it is a norm
-	st		BINDEC_FLG(%a6)	# set flag for denorm
-pos_exp:
-	and.w		&0x7fff,%d0	# strip sign of normalized exp
-	mov.w		%d0,(%a0)
-	mov.l		%d1,4(%a0)
-	mov.l		%d2,8(%a0)
-
-# A2. Set X = abs(input).
-#
-A2_str:
-	mov.l		(%a0),FP_SCR1(%a6)	# move input to work space
-	mov.l		4(%a0),FP_SCR1+4(%a6)	# move input to work space
-	mov.l		8(%a0),FP_SCR1+8(%a6)	# move input to work space
-	and.l		&0x7fffffff,FP_SCR1(%a6)	# create abs(X)
-
-# A3. Compute ILOG.
-#     ILOG is the log base 10 of the input value.  It is approx-
-#     imated by adding e + 0.f when the original value is viewed
-#     as 2^^e * 1.f in extended precision.  This value is stored
-#     in d6.
-#
-# Register usage:
-#	Input/Output
-#	d0: k-factor/exponent
-#	d2: x/x
-#	d3: x/x
-#	d4: x/x
-#	d5: x/x
-#	d6: x/ILOG
-#	d7: k-factor/Unchanged
-#	a0: ptr for original operand/final result
-#	a1: x/x
-#	a2: x/x
-#	fp0: x/float(ILOG)
-#	fp1: x/x
-#	fp2: x/x
-#	F_SCR1:x/x
-#	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
-#	L_SCR1:x/x
-#	L_SCR2:first word of X packed/Unchanged
-
-	tst.b		BINDEC_FLG(%a6)	# check for denorm
-	beq.b		A3_cont		# if clr, continue with norm
-	mov.l		&-4933,%d6	# force ILOG = -4933
-	bra.b		A4_str
-A3_cont:
-	mov.w		FP_SCR1(%a6),%d0	# move exp to d0
-	mov.w		&0x3fff,FP_SCR1(%a6)	# replace exponent with 0x3fff
-	fmov.x		FP_SCR1(%a6),%fp0	# now fp0 has 1.f
-	sub.w		&0x3fff,%d0	# strip off bias
-	fadd.w		%d0,%fp0	# add in exp
-	fsub.s		FONE(%pc),%fp0	# subtract off 1.0
-	fbge.w		pos_res		# if pos, branch
-	fmul.x		PLOG2UP1(%pc),%fp0	# if neg, mul by LOG2UP1
-	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
-	bra.b		A4_str		# go move out ILOG
-pos_res:
-	fmul.x		PLOG2(%pc),%fp0	# if pos, mul by LOG2
-	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
-
-
-# A4. Clr INEX bit.
-#     The operation in A3 above may have set INEX2.
-
-A4_str:
-	fmov.l		&0,%fpsr	# zero all of fpsr - nothing needed
-
-
-# A5. Set ICTR = 0;
-#     ICTR is a flag used in A13.  It must be set before the
-#     loop entry A6. The lower word of d5 is used for ICTR.
-
-	clr.w		%d5		# clear ICTR
-
-# A6. Calculate LEN.
-#     LEN is the number of digits to be displayed.  The k-factor
-#     can dictate either the total number of digits, if it is
-#     a positive number, or the number of digits after the
-#     original decimal point which are to be included as
-#     significant.  See the 68882 manual for examples.
-#     If LEN is computed to be greater than 17, set OPERR in
-#     USER_FPSR.  LEN is stored in d4.
-#
-# Register usage:
-#	Input/Output
-#	d0: exponent/Unchanged
-#	d2: x/x/scratch
-#	d3: x/x
-#	d4: exc picture/LEN
-#	d5: ICTR/Unchanged
-#	d6: ILOG/Unchanged
-#	d7: k-factor/Unchanged
-#	a0: ptr for original operand/final result
-#	a1: x/x
-#	a2: x/x
-#	fp0: float(ILOG)/Unchanged
-#	fp1: x/x
-#	fp2: x/x
-#	F_SCR1:x/x
-#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
-#	L_SCR1:x/x
-#	L_SCR2:first word of X packed/Unchanged
-
-A6_str:
-	tst.l		%d7		# branch on sign of k
-	ble.b		k_neg		# if k <= 0, LEN = ILOG + 1 - k
-	mov.l		%d7,%d4		# if k > 0, LEN = k
-	bra.b		len_ck		# skip to LEN check
-k_neg:
-	mov.l		%d6,%d4		# first load ILOG to d4
-	sub.l		%d7,%d4		# subtract off k
-	addq.l		&1,%d4		# add in the 1
-len_ck:
-	tst.l		%d4		# LEN check: branch on sign of LEN
-	ble.b		LEN_ng		# if neg, set LEN = 1
-	cmp.l		%d4,&17		# test if LEN > 17
-	ble.b		A7_str		# if not, forget it
-	mov.l		&17,%d4		# set max LEN = 17
-	tst.l		%d7		# if negative, never set OPERR
-	ble.b		A7_str		# if positive, continue
-	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
-	bra.b		A7_str		# finished here
-LEN_ng:
-	mov.l		&1,%d4		# min LEN is 1
-
-
-# A7. Calculate SCALE.
-#     SCALE is equal to 10^ISCALE, where ISCALE is the number
-#     of decimal places needed to insure LEN integer digits
-#     in the output before conversion to bcd. LAMBDA is the sign
-#     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
-#     the rounding mode as given in the following table (see
-#     Coonen, p. 7.23 as ref.; however, the SCALE variable is
-#     of opposite sign in bindec.sa from Coonen).
-#
-#	Initial					USE
-#	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
-#	----------------------------------------------
-#	 RN	00	   0	   0		00/0	RN
-#	 RN	00	   0	   1		00/0	RN
-#	 RN	00	   1	   0		00/0	RN
-#	 RN	00	   1	   1		00/0	RN
-#	 RZ	01	   0	   0		11/3	RP
-#	 RZ	01	   0	   1		11/3	RP
-#	 RZ	01	   1	   0		10/2	RM
-#	 RZ	01	   1	   1		10/2	RM
-#	 RM	10	   0	   0		11/3	RP
-#	 RM	10	   0	   1		10/2	RM
-#	 RM	10	   1	   0		10/2	RM
-#	 RM	10	   1	   1		11/3	RP
-#	 RP	11	   0	   0		10/2	RM
-#	 RP	11	   0	   1		11/3	RP
-#	 RP	11	   1	   0		11/3	RP
-#	 RP	11	   1	   1		10/2	RM
-#
-# Register usage:
-#	Input/Output
-#	d0: exponent/scratch - final is 0
-#	d2: x/0 or 24 for A9
-#	d3: x/scratch - offset ptr into PTENRM array
-#	d4: LEN/Unchanged
-#	d5: 0/ICTR:LAMBDA
-#	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
-#	d7: k-factor/Unchanged
-#	a0: ptr for original operand/final result
-#	a1: x/ptr to PTENRM array
-#	a2: x/x
-#	fp0: float(ILOG)/Unchanged
-#	fp1: x/10^ISCALE
-#	fp2: x/x
-#	F_SCR1:x/x
-#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
-#	L_SCR1:x/x
-#	L_SCR2:first word of X packed/Unchanged
-
-A7_str:
-	tst.l		%d7		# test sign of k
-	bgt.b		k_pos		# if pos and > 0, skip this
-	cmp.l		%d7,%d6		# test k - ILOG
-	blt.b		k_pos		# if ILOG >= k, skip this
-	mov.l		%d7,%d6		# if ((k<0) & (ILOG < k)) ILOG = k
-k_pos:
-	mov.l		%d6,%d0		# calc ILOG + 1 - LEN in d0
-	addq.l		&1,%d0		# add the 1
-	sub.l		%d4,%d0		# sub off LEN
-	swap		%d5		# use upper word of d5 for LAMBDA
-	clr.w		%d5		# set it zero initially
-	clr.w		%d2		# set up d2 for very small case
-	tst.l		%d0		# test sign of ISCALE
-	bge.b		iscale		# if pos, skip next inst
-	addq.w		&1,%d5		# if neg, set LAMBDA true
-	cmp.l		%d0,&0xffffecd4	# test iscale <= -4908
-	bgt.b		no_inf		# if false, skip rest
-	add.l		&24,%d0		# add in 24 to iscale
-	mov.l		&24,%d2		# put 24 in d2 for A9
-no_inf:
-	neg.l		%d0		# and take abs of ISCALE
-iscale:
-	fmov.s		FONE(%pc),%fp1	# init fp1 to 1
-	bfextu		USER_FPCR(%a6){&26:&2},%d1	# get initial rmode bits
-	lsl.w		&1,%d1		# put them in bits 2:1
-	add.w		%d5,%d1		# add in LAMBDA
-	lsl.w		&1,%d1		# put them in bits 3:1
-	tst.l		L_SCR2(%a6)	# test sign of original x
-	bge.b		x_pos		# if pos, don't set bit 0
-	addq.l		&1,%d1		# if neg, set bit 0
-x_pos:
-	lea.l		RBDTBL(%pc),%a2	# load rbdtbl base
-	mov.b		(%a2,%d1),%d3	# load d3 with new rmode
-	lsl.l		&4,%d3		# put bits in proper position
-	fmov.l		%d3,%fpcr	# load bits into fpu
-	lsr.l		&4,%d3		# put bits in proper position
-	tst.b		%d3		# decode new rmode for pten table
-	bne.b		not_rn		# if zero, it is RN
-	lea.l		PTENRN(%pc),%a1	# load a1 with RN table base
-	bra.b		rmode		# exit decode
-not_rn:
-	lsr.b		&1,%d3		# get lsb in carry
-	bcc.b		not_rp2		# if carry clear, it is RM
-	lea.l		PTENRP(%pc),%a1	# load a1 with RP table base
-	bra.b		rmode		# exit decode
-not_rp2:
-	lea.l		PTENRM(%pc),%a1	# load a1 with RM table base
-rmode:
-	clr.l		%d3		# clr table index
-e_loop2:
-	lsr.l		&1,%d0		# shift next bit into carry
-	bcc.b		e_next2		# if zero, skip the mul
-	fmul.x		(%a1,%d3),%fp1	# mul by 10**(d3_bit_no)
-e_next2:
-	add.l		&12,%d3		# inc d3 to next pwrten table entry
-	tst.l		%d0		# test if ISCALE is zero
-	bne.b		e_loop2		# if not, loop
-
-# A8. Clr INEX; Force RZ.
-#     The operation in A3 above may have set INEX2.
-#     RZ mode is forced for the scaling operation to insure
-#     only one rounding error.  The grs bits are collected in
-#     the INEX flag for use in A10.
-#
-# Register usage:
-#	Input/Output
-
-	fmov.l		&0,%fpsr	# clr INEX
-	fmov.l		&rz_mode*0x10,%fpcr	# set RZ rounding mode
-
-# A9. Scale X -> Y.
-#     The mantissa is scaled to the desired number of significant
-#     digits.  The excess digits are collected in INEX2. If mul,
-#     Check d2 for excess 10 exponential value.  If not zero,
-#     the iscale value would have caused the pwrten calculation
-#     to overflow.  Only a negative iscale can cause this, so
-#     multiply by 10^(d2), which is now only allowed to be 24,
-#     with a multiply by 10^8 and 10^16, which is exact since
-#     10^24 is exact.  If the input was denormalized, we must
-#     create a busy stack frame with the mul command and the
-#     two operands, and allow the fpu to complete the multiply.
-#
-# Register usage:
-#	Input/Output
-#	d0: FPCR with RZ mode/Unchanged
-#	d2: 0 or 24/unchanged
-#	d3: x/x
-#	d4: LEN/Unchanged
-#	d5: ICTR:LAMBDA
-#	d6: ILOG/Unchanged
-#	d7: k-factor/Unchanged
-#	a0: ptr for original operand/final result
-#	a1: ptr to PTENRM array/Unchanged
-#	a2: x/x
-#	fp0: float(ILOG)/X adjusted for SCALE (Y)
-#	fp1: 10^ISCALE/Unchanged
-#	fp2: x/x
-#	F_SCR1:x/x
-#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
-#	L_SCR1:x/x
-#	L_SCR2:first word of X packed/Unchanged
-
-A9_str:
-	fmov.x		(%a0),%fp0	# load X from memory
-	fabs.x		%fp0		# use abs(X)
-	tst.w		%d5		# LAMBDA is in lower word of d5
-	bne.b		sc_mul		# if neg (LAMBDA = 1), scale by mul
-	fdiv.x		%fp1,%fp0	# calculate X / SCALE -> Y to fp0
-	bra.w		A10_st		# branch to A10
-
-sc_mul:
-	tst.b		BINDEC_FLG(%a6)	# check for denorm
-	beq.w		A9_norm		# if norm, continue with mul
-
-# for DENORM, we must calculate:
-#	fp0 = input_op * 10^ISCALE * 10^24
-# since the input operand is a DENORM, we can't multiply it directly.
-# so, we do the multiplication of the exponents and mantissas separately.
-# in this way, we avoid underflow on intermediate stages of the
-# multiplication and guarantee a result without exception.
-	fmovm.x		&0x2,-(%sp)	# save 10^ISCALE to stack
-
-	mov.w		(%sp),%d3	# grab exponent
-	andi.w		&0x7fff,%d3	# clear sign
-	ori.w		&0x8000,(%a0)	# make DENORM exp negative
-	add.w		(%a0),%d3	# add DENORM exp to 10^ISCALE exp
-	subi.w		&0x3fff,%d3	# subtract BIAS
-	add.w		36(%a1),%d3
-	subi.w		&0x3fff,%d3	# subtract BIAS
-	add.w		48(%a1),%d3
-	subi.w		&0x3fff,%d3	# subtract BIAS
-
-	bmi.w		sc_mul_err	# is result is DENORM, punt!!!
-
-	andi.w		&0x8000,(%sp)	# keep sign
-	or.w		%d3,(%sp)	# insert new exponent
-	andi.w		&0x7fff,(%a0)	# clear sign bit on DENORM again
-	mov.l		0x8(%a0),-(%sp) # put input op mantissa on stk
-	mov.l		0x4(%a0),-(%sp)
-	mov.l		&0x3fff0000,-(%sp) # force exp to zero
-	fmovm.x		(%sp)+,&0x80	# load normalized DENORM into fp0
-	fmul.x		(%sp)+,%fp0
-
-#	fmul.x	36(%a1),%fp0	# multiply fp0 by 10^8
-#	fmul.x	48(%a1),%fp0	# multiply fp0 by 10^16
-	mov.l		36+8(%a1),-(%sp) # get 10^8 mantissa
-	mov.l		36+4(%a1),-(%sp)
-	mov.l		&0x3fff0000,-(%sp) # force exp to zero
-	mov.l		48+8(%a1),-(%sp) # get 10^16 mantissa
-	mov.l		48+4(%a1),-(%sp)
-	mov.l		&0x3fff0000,-(%sp)# force exp to zero
-	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^8
-	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^16
-	bra.b		A10_st
-
-sc_mul_err:
-	bra.b		sc_mul_err
-
-A9_norm:
-	tst.w		%d2		# test for small exp case
-	beq.b		A9_con		# if zero, continue as normal
-	fmul.x		36(%a1),%fp0	# multiply fp0 by 10^8
-	fmul.x		48(%a1),%fp0	# multiply fp0 by 10^16
-A9_con:
-	fmul.x		%fp1,%fp0	# calculate X * SCALE -> Y to fp0
-
-# A10. Or in INEX.
-#      If INEX is set, round error occurred.  This is compensated
-#      for by 'or-ing' in the INEX2 flag to the lsb of Y.
-#
-# Register usage:
-#	Input/Output
-#	d0: FPCR with RZ mode/FPSR with INEX2 isolated
-#	d2: x/x
-#	d3: x/x
-#	d4: LEN/Unchanged
-#	d5: ICTR:LAMBDA
-#	d6: ILOG/Unchanged
-#	d7: k-factor/Unchanged
-#	a0: ptr for original operand/final result
-#	a1: ptr to PTENxx array/Unchanged
-#	a2: x/ptr to FP_SCR1(a6)
-#	fp0: Y/Y with lsb adjusted
-#	fp1: 10^ISCALE/Unchanged
-#	fp2: x/x
-
-A10_st:
-	fmov.l		%fpsr,%d0	# get FPSR
-	fmov.x		%fp0,FP_SCR1(%a6)	# move Y to memory
-	lea.l		FP_SCR1(%a6),%a2	# load a2 with ptr to FP_SCR1
-	btst		&9,%d0		# check if INEX2 set
-	beq.b		A11_st		# if clear, skip rest
-	or.l		&1,8(%a2)	# or in 1 to lsb of mantissa
-	fmov.x		FP_SCR1(%a6),%fp0	# write adjusted Y back to fpu
-
-
-# A11. Restore original FPCR; set size ext.
-#      Perform FINT operation in the user's rounding mode.  Keep
-#      the size to extended.  The sintdo entry point in the sint
-#      routine expects the FPCR value to be in USER_FPCR for
-#      mode and precision.  The original FPCR is saved in L_SCR1.
-
-A11_st:
-	mov.l		USER_FPCR(%a6),L_SCR1(%a6)	# save it for later
-	and.l		&0x00000030,USER_FPCR(%a6)	# set size to ext,
-#					;block exceptions
-
-
-# A12. Calculate YINT = FINT(Y) according to user's rounding mode.
-#      The FPSP routine sintd0 is used.  The output is in fp0.
-#
-# Register usage:
-#	Input/Output
-#	d0: FPSR with AINEX cleared/FPCR with size set to ext
-#	d2: x/x/scratch
-#	d3: x/x
-#	d4: LEN/Unchanged
-#	d5: ICTR:LAMBDA/Unchanged
-#	d6: ILOG/Unchanged
-#	d7: k-factor/Unchanged
-#	a0: ptr for original operand/src ptr for sintdo
-#	a1: ptr to PTENxx array/Unchanged
-#	a2: ptr to FP_SCR1(a6)/Unchanged
-#	a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
-#	fp0: Y/YINT
-#	fp1: 10^ISCALE/Unchanged
-#	fp2: x/x
-#	F_SCR1:x/x
-#	F_SCR2:Y adjusted for inex/Y with original exponent
-#	L_SCR1:x/original USER_FPCR
-#	L_SCR2:first word of X packed/Unchanged
-
-A12_st:
-	movm.l	&0xc0c0,-(%sp)	# save regs used by sintd0	 {%d0-%d1/%a0-%a1}
-	mov.l	L_SCR1(%a6),-(%sp)
-	mov.l	L_SCR2(%a6),-(%sp)
-
-	lea.l		FP_SCR1(%a6),%a0	# a0 is ptr to FP_SCR1(a6)
-	fmov.x		%fp0,(%a0)	# move Y to memory at FP_SCR1(a6)
-	tst.l		L_SCR2(%a6)	# test sign of original operand
-	bge.b		do_fint12		# if pos, use Y
-	or.l		&0x80000000,(%a0)	# if neg, use -Y
-do_fint12:
-	mov.l	USER_FPSR(%a6),-(%sp)
-#	bsr	sintdo		# sint routine returns int in fp0
-
-	fmov.l	USER_FPCR(%a6),%fpcr
-	fmov.l	&0x0,%fpsr			# clear the AEXC bits!!!
-##	mov.l		USER_FPCR(%a6),%d0	# ext prec/keep rnd mode
-##	andi.l		&0x00000030,%d0
-##	fmov.l		%d0,%fpcr
-	fint.x		FP_SCR1(%a6),%fp0	# do fint()
-	fmov.l	%fpsr,%d0
-	or.w	%d0,FPSR_EXCEPT(%a6)
-##	fmov.l		&0x0,%fpcr
-##	fmov.l		%fpsr,%d0		# don't keep ccodes
-##	or.w		%d0,FPSR_EXCEPT(%a6)
-
-	mov.b	(%sp),USER_FPSR(%a6)
-	add.l	&4,%sp
-
-	mov.l	(%sp)+,L_SCR2(%a6)
-	mov.l	(%sp)+,L_SCR1(%a6)
-	movm.l	(%sp)+,&0x303	# restore regs used by sint	 {%d0-%d1/%a0-%a1}
-
-	mov.l	L_SCR2(%a6),FP_SCR1(%a6)	# restore original exponent
-	mov.l	L_SCR1(%a6),USER_FPCR(%a6)	# restore user's FPCR
-
-# A13. Check for LEN digits.
-#      If the int operation results in more than LEN digits,
-#      or less than LEN -1 digits, adjust ILOG and repeat from
-#      A6.  This test occurs only on the first pass.  If the
-#      result is exactly 10^LEN, decrement ILOG and divide
-#      the mantissa by 10.  The calculation of 10^LEN cannot
-#      be inexact, since all powers of ten up to 10^27 are exact
-#      in extended precision, so the use of a previous power-of-ten
-#      table will introduce no error.
-#
-#
-# Register usage:
-#	Input/Output
-#	d0: FPCR with size set to ext/scratch final = 0
-#	d2: x/x
-#	d3: x/scratch final = x
-#	d4: LEN/LEN adjusted
-#	d5: ICTR:LAMBDA/LAMBDA:ICTR
-#	d6: ILOG/ILOG adjusted
-#	d7: k-factor/Unchanged
-#	a0: pointer into memory for packed bcd string formation
-#	a1: ptr to PTENxx array/Unchanged
-#	a2: ptr to FP_SCR1(a6)/Unchanged
-#	fp0: int portion of Y/abs(YINT) adjusted
-#	fp1: 10^ISCALE/Unchanged
-#	fp2: x/10^LEN
-#	F_SCR1:x/x
-#	F_SCR2:Y with original exponent/Unchanged
-#	L_SCR1:original USER_FPCR/Unchanged
-#	L_SCR2:first word of X packed/Unchanged
-
-A13_st:
-	swap		%d5		# put ICTR in lower word of d5
-	tst.w		%d5		# check if ICTR = 0
-	bne		not_zr		# if non-zero, go to second test
-#
-# Compute 10^(LEN-1)
-#
-	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
-	mov.l		%d4,%d0		# put LEN in d0
-	subq.l		&1,%d0		# d0 = LEN -1
-	clr.l		%d3		# clr table index
-l_loop:
-	lsr.l		&1,%d0		# shift next bit into carry
-	bcc.b		l_next		# if zero, skip the mul
-	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
-l_next:
-	add.l		&12,%d3		# inc d3 to next pwrten table entry
-	tst.l		%d0		# test if LEN is zero
-	bne.b		l_loop		# if not, loop
-#
-# 10^LEN-1 is computed for this test and A14.  If the input was
-# denormalized, check only the case in which YINT > 10^LEN.
-#
-	tst.b		BINDEC_FLG(%a6)	# check if input was norm
-	beq.b		A13_con		# if norm, continue with checking
-	fabs.x		%fp0		# take abs of YINT
-	bra		test_2
-#
-# Compare abs(YINT) to 10^(LEN-1) and 10^LEN
-#
-A13_con:
-	fabs.x		%fp0		# take abs of YINT
-	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^(LEN-1)
-	fbge.w		test_2		# if greater, do next test
-	subq.l		&1,%d6		# subtract 1 from ILOG
-	mov.w		&1,%d5		# set ICTR
-	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
-	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
-	bra.w		A6_str		# return to A6 and recompute YINT
-test_2:
-	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
-	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^LEN
-	fblt.w		A14_st		# if less, all is ok, go to A14
-	fbgt.w		fix_ex		# if greater, fix and redo
-	fdiv.s		FTEN(%pc),%fp0	# if equal, divide by 10
-	addq.l		&1,%d6		# and inc ILOG
-	bra.b		A14_st		# and continue elsewhere
-fix_ex:
-	addq.l		&1,%d6		# increment ILOG by 1
-	mov.w		&1,%d5		# set ICTR
-	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
-	bra.w		A6_str		# return to A6 and recompute YINT
-#
-# Since ICTR <> 0, we have already been through one adjustment,
-# and shouldn't have another; this is to check if abs(YINT) = 10^LEN
-# 10^LEN is again computed using whatever table is in a1 since the
-# value calculated cannot be inexact.
-#
-not_zr:
-	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
-	mov.l		%d4,%d0		# put LEN in d0
-	clr.l		%d3		# clr table index
-z_loop:
-	lsr.l		&1,%d0		# shift next bit into carry
-	bcc.b		z_next		# if zero, skip the mul
-	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
-z_next:
-	add.l		&12,%d3		# inc d3 to next pwrten table entry
-	tst.l		%d0		# test if LEN is zero
-	bne.b		z_loop		# if not, loop
-	fabs.x		%fp0		# get abs(YINT)
-	fcmp.x		%fp0,%fp2	# check if abs(YINT) = 10^LEN
-	fbneq.w		A14_st		# if not, skip this
-	fdiv.s		FTEN(%pc),%fp0	# divide abs(YINT) by 10
-	addq.l		&1,%d6		# and inc ILOG by 1
-	addq.l		&1,%d4		# and inc LEN
-	fmul.s		FTEN(%pc),%fp2	# if LEN++, the get 10^^LEN
-
-# A14. Convert the mantissa to bcd.
-#      The binstr routine is used to convert the LEN digit
-#      mantissa to bcd in memory.  The input to binstr is
-#      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
-#      such that the decimal point is to the left of bit 63.
-#      The bcd digits are stored in the correct position in
-#      the final string area in memory.
-#
-#
-# Register usage:
-#	Input/Output
-#	d0: x/LEN call to binstr - final is 0
-#	d1: x/0
-#	d2: x/ms 32-bits of mant of abs(YINT)
-#	d3: x/ls 32-bits of mant of abs(YINT)
-#	d4: LEN/Unchanged
-#	d5: ICTR:LAMBDA/LAMBDA:ICTR
-#	d6: ILOG
-#	d7: k-factor/Unchanged
-#	a0: pointer into memory for packed bcd string formation
-#	    /ptr to first mantissa byte in result string
-#	a1: ptr to PTENxx array/Unchanged
-#	a2: ptr to FP_SCR1(a6)/Unchanged
-#	fp0: int portion of Y/abs(YINT) adjusted
-#	fp1: 10^ISCALE/Unchanged
-#	fp2: 10^LEN/Unchanged
-#	F_SCR1:x/Work area for final result
-#	F_SCR2:Y with original exponent/Unchanged
-#	L_SCR1:original USER_FPCR/Unchanged
-#	L_SCR2:first word of X packed/Unchanged
-
-A14_st:
-	fmov.l		&rz_mode*0x10,%fpcr	# force rz for conversion
-	fdiv.x		%fp2,%fp0	# divide abs(YINT) by 10^LEN
-	lea.l		FP_SCR0(%a6),%a0
-	fmov.x		%fp0,(%a0)	# move abs(YINT)/10^LEN to memory
-	mov.l		4(%a0),%d2	# move 2nd word of FP_RES to d2
-	mov.l		8(%a0),%d3	# move 3rd word of FP_RES to d3
-	clr.l		4(%a0)		# zero word 2 of FP_RES
-	clr.l		8(%a0)		# zero word 3 of FP_RES
-	mov.l		(%a0),%d0	# move exponent to d0
-	swap		%d0		# put exponent in lower word
-	beq.b		no_sft		# if zero, don't shift
-	sub.l		&0x3ffd,%d0	# sub bias less 2 to make fract
-	tst.l		%d0		# check if > 1
-	bgt.b		no_sft		# if so, don't shift
-	neg.l		%d0		# make exp positive
-m_loop:
-	lsr.l		&1,%d2		# shift d2:d3 right, add 0s
-	roxr.l		&1,%d3		# the number of places
-	dbf.w		%d0,m_loop	# given in d0
-no_sft:
-	tst.l		%d2		# check for mantissa of zero
-	bne.b		no_zr		# if not, go on
-	tst.l		%d3		# continue zero check
-	beq.b		zer_m		# if zero, go directly to binstr
-no_zr:
-	clr.l		%d1		# put zero in d1 for addx
-	add.l		&0x00000080,%d3	# inc at bit 7
-	addx.l		%d1,%d2		# continue inc
-	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
-zer_m:
-	mov.l		%d4,%d0		# put LEN in d0 for binstr call
-	addq.l		&3,%a0		# a0 points to M16 byte in result
-	bsr		binstr		# call binstr to convert mant
-
-
-# A15. Convert the exponent to bcd.
-#      As in A14 above, the exp is converted to bcd and the
-#      digits are stored in the final string.
-#
-#      Digits are stored in L_SCR1(a6) on return from BINDEC as:
-#
-#	 32               16 15                0
-#	-----------------------------------------
-#	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
-#	-----------------------------------------
-#
-# And are moved into their proper places in FP_SCR0.  If digit e4
-# is non-zero, OPERR is signaled.  In all cases, all 4 digits are
-# written as specified in the 881/882 manual for packed decimal.
-#
-# Register usage:
-#	Input/Output
-#	d0: x/LEN call to binstr - final is 0
-#	d1: x/scratch (0);shift count for final exponent packing
-#	d2: x/ms 32-bits of exp fraction/scratch
-#	d3: x/ls 32-bits of exp fraction
-#	d4: LEN/Unchanged
-#	d5: ICTR:LAMBDA/LAMBDA:ICTR
-#	d6: ILOG
-#	d7: k-factor/Unchanged
-#	a0: ptr to result string/ptr to L_SCR1(a6)
-#	a1: ptr to PTENxx array/Unchanged
-#	a2: ptr to FP_SCR1(a6)/Unchanged
-#	fp0: abs(YINT) adjusted/float(ILOG)
-#	fp1: 10^ISCALE/Unchanged
-#	fp2: 10^LEN/Unchanged
-#	F_SCR1:Work area for final result/BCD result
-#	F_SCR2:Y with original exponent/ILOG/10^4
-#	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
-#	L_SCR2:first word of X packed/Unchanged
-
-A15_st:
-	tst.b		BINDEC_FLG(%a6)	# check for denorm
-	beq.b		not_denorm
-	ftest.x		%fp0		# test for zero
-	fbeq.w		den_zero	# if zero, use k-factor or 4933
-	fmov.l		%d6,%fp0	# float ILOG
-	fabs.x		%fp0		# get abs of ILOG
-	bra.b		convrt
-den_zero:
-	tst.l		%d7		# check sign of the k-factor
-	blt.b		use_ilog	# if negative, use ILOG
-	fmov.s		F4933(%pc),%fp0	# force exponent to 4933
-	bra.b		convrt		# do it
-use_ilog:
-	fmov.l		%d6,%fp0	# float ILOG
-	fabs.x		%fp0		# get abs of ILOG
-	bra.b		convrt
-not_denorm:
-	ftest.x		%fp0		# test for zero
-	fbneq.w		not_zero	# if zero, force exponent
-	fmov.s		FONE(%pc),%fp0	# force exponent to 1
-	bra.b		convrt		# do it
-not_zero:
-	fmov.l		%d6,%fp0	# float ILOG
-	fabs.x		%fp0		# get abs of ILOG
-convrt:
-	fdiv.x		24(%a1),%fp0	# compute ILOG/10^4
-	fmov.x		%fp0,FP_SCR1(%a6)	# store fp0 in memory
-	mov.l		4(%a2),%d2	# move word 2 to d2
-	mov.l		8(%a2),%d3	# move word 3 to d3
-	mov.w		(%a2),%d0	# move exp to d0
-	beq.b		x_loop_fin	# if zero, skip the shift
-	sub.w		&0x3ffd,%d0	# subtract off bias
-	neg.w		%d0		# make exp positive
-x_loop:
-	lsr.l		&1,%d2		# shift d2:d3 right
-	roxr.l		&1,%d3		# the number of places
-	dbf.w		%d0,x_loop	# given in d0
-x_loop_fin:
-	clr.l		%d1		# put zero in d1 for addx
-	add.l		&0x00000080,%d3	# inc at bit 6
-	addx.l		%d1,%d2		# continue inc
-	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
-	mov.l		&4,%d0		# put 4 in d0 for binstr call
-	lea.l		L_SCR1(%a6),%a0	# a0 is ptr to L_SCR1 for exp digits
-	bsr		binstr		# call binstr to convert exp
-	mov.l		L_SCR1(%a6),%d0	# load L_SCR1 lword to d0
-	mov.l		&12,%d1		# use d1 for shift count
-	lsr.l		%d1,%d0		# shift d0 right by 12
-	bfins		%d0,FP_SCR0(%a6){&4:&12}	# put e3:e2:e1 in FP_SCR0
-	lsr.l		%d1,%d0		# shift d0 right by 12
-	bfins		%d0,FP_SCR0(%a6){&16:&4}	# put e4 in FP_SCR0
-	tst.b		%d0		# check if e4 is zero
-	beq.b		A16_st		# if zero, skip rest
-	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
-
-
-# A16. Write sign bits to final string.
-#	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
-#
-# Register usage:
-#	Input/Output
-#	d0: x/scratch - final is x
-#	d2: x/x
-#	d3: x/x
-#	d4: LEN/Unchanged
-#	d5: ICTR:LAMBDA/LAMBDA:ICTR
-#	d6: ILOG/ILOG adjusted
-#	d7: k-factor/Unchanged
-#	a0: ptr to L_SCR1(a6)/Unchanged
-#	a1: ptr to PTENxx array/Unchanged
-#	a2: ptr to FP_SCR1(a6)/Unchanged
-#	fp0: float(ILOG)/Unchanged
-#	fp1: 10^ISCALE/Unchanged
-#	fp2: 10^LEN/Unchanged
-#	F_SCR1:BCD result with correct signs
-#	F_SCR2:ILOG/10^4
-#	L_SCR1:Exponent digits on return from binstr
-#	L_SCR2:first word of X packed/Unchanged
-
-A16_st:
-	clr.l		%d0		# clr d0 for collection of signs
-	and.b		&0x0f,FP_SCR0(%a6)	# clear first nibble of FP_SCR0
-	tst.l		L_SCR2(%a6)	# check sign of original mantissa
-	bge.b		mant_p		# if pos, don't set SM
-	mov.l		&2,%d0		# move 2 in to d0 for SM
-mant_p:
-	tst.l		%d6		# check sign of ILOG
-	bge.b		wr_sgn		# if pos, don't set SE
-	addq.l		&1,%d0		# set bit 0 in d0 for SE
-wr_sgn:
-	bfins		%d0,FP_SCR0(%a6){&0:&2}	# insert SM and SE into FP_SCR0
-
-# Clean up and restore all registers used.
-
-	fmov.l		&0,%fpsr	# clear possible inex2/ainex bits
-	fmovm.x		(%sp)+,&0xe0	#  {%fp0-%fp2}
-	movm.l		(%sp)+,&0x4fc	#  {%d2-%d7/%a2}
-	rts
-
-	global		PTENRN
-PTENRN:
-	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
-	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
-	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
-	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
-	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
-	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
-	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
-	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
-	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
-	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
-	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
-	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
-	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
-
-	global		PTENRP
-PTENRP:
-	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
-	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
-	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
-	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
-	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
-	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
-	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
-	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
-	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
-	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
-	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
-	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
-	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
-
-	global		PTENRM
-PTENRM:
-	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
-	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
-	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
-	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
-	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
-	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
-	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
-	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
-	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
-	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
-	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
-	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
-	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
-
-#########################################################################
-# binstr(): Converts a 64-bit binary integer to bcd.			#
-#									#
-# INPUT *************************************************************** #
-#	d2:d3 = 64-bit binary integer					#
-#	d0    = desired length (LEN)					#
-#	a0    = pointer to start in memory for bcd characters		#
-#		(This pointer must point to byte 4 of the first		#
-#		 lword of the packed decimal memory string.)		#
-#									#
-# OUTPUT ************************************************************** #
-#	a0 = pointer to LEN bcd digits representing the 64-bit integer.	#
-#									#
-# ALGORITHM ***********************************************************	#
-#	The 64-bit binary is assumed to have a decimal point before	#
-#	bit 63.  The fraction is multiplied by 10 using a mul by 2	#
-#	shift and a mul by 8 shift.  The bits shifted out of the	#
-#	msb form a decimal digit.  This process is iterated until	#
-#	LEN digits are formed.						#
-#									#
-# A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the	#
-#     digit formed will be assumed the least significant.  This is	#
-#     to force the first byte formed to have a 0 in the upper 4 bits.	#
-#									#
-# A2. Beginning of the loop:						#
-#     Copy the fraction in d2:d3 to d4:d5.				#
-#									#
-# A3. Multiply the fraction in d2:d3 by 8 using bit-field		#
-#     extracts and shifts.  The three msbs from d2 will go into d1.	#
-#									#
-# A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb	#
-#     will be collected by the carry.					#
-#									#
-# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5	#
-#     into d2:d3.  D1 will contain the bcd digit formed.		#
-#									#
-# A6. Test d7.  If zero, the digit formed is the ms digit.  If non-	#
-#     zero, it is the ls digit.  Put the digit in its place in the	#
-#     upper word of d0.  If it is the ls digit, write the word		#
-#     from d0 to memory.						#
-#									#
-# A7. Decrement d6 (LEN counter) and repeat the loop until zero.	#
-#									#
-#########################################################################
-
-#	Implementation Notes:
-#
-#	The registers are used as follows:
-#
-#		d0: LEN counter
-#		d1: temp used to form the digit
-#		d2: upper 32-bits of fraction for mul by 8
-#		d3: lower 32-bits of fraction for mul by 8
-#		d4: upper 32-bits of fraction for mul by 2
-#		d5: lower 32-bits of fraction for mul by 2
-#		d6: temp for bit-field extracts
-#		d7: byte digit formation word;digit count {0,1}
-#		a0: pointer into memory for packed bcd string formation
-#
-
-	global		binstr
-binstr:
-	movm.l		&0xff00,-(%sp)	#  {%d0-%d7}
-
-#
-# A1: Init d7
-#
-	mov.l		&1,%d7		# init d7 for second digit
-	subq.l		&1,%d0		# for dbf d0 would have LEN+1 passes
-#
-# A2. Copy d2:d3 to d4:d5.  Start loop.
-#
-loop:
-	mov.l		%d2,%d4		# copy the fraction before muls
-	mov.l		%d3,%d5		# to d4:d5
-#
-# A3. Multiply d2:d3 by 8; extract msbs into d1.
-#
-	bfextu		%d2{&0:&3},%d1	# copy 3 msbs of d2 into d1
-	asl.l		&3,%d2		# shift d2 left by 3 places
-	bfextu		%d3{&0:&3},%d6	# copy 3 msbs of d3 into d6
-	asl.l		&3,%d3		# shift d3 left by 3 places
-	or.l		%d6,%d2		# or in msbs from d3 into d2
-#
-# A4. Multiply d4:d5 by 2; add carry out to d1.
-#
-	asl.l		&1,%d5		# mul d5 by 2
-	roxl.l		&1,%d4		# mul d4 by 2
-	swap		%d6		# put 0 in d6 lower word
-	addx.w		%d6,%d1		# add in extend from mul by 2
-#
-# A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
-#
-	add.l		%d5,%d3		# add lower 32 bits
-	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
-	addx.l		%d4,%d2		# add with extend upper 32 bits
-	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
-	addx.w		%d6,%d1		# add in extend from add to d1
-	swap		%d6		# with d6 = 0; put 0 in upper word
-#
-# A6. Test d7 and branch.
-#
-	tst.w		%d7		# if zero, store digit & to loop
-	beq.b		first_d		# if non-zero, form byte & write
-sec_d:
-	swap		%d7		# bring first digit to word d7b
-	asl.w		&4,%d7		# first digit in upper 4 bits d7b
-	add.w		%d1,%d7		# add in ls digit to d7b
-	mov.b		%d7,(%a0)+	# store d7b byte in memory
-	swap		%d7		# put LEN counter in word d7a
-	clr.w		%d7		# set d7a to signal no digits done
-	dbf.w		%d0,loop	# do loop some more!
-	bra.b		end_bstr	# finished, so exit
-first_d:
-	swap		%d7		# put digit word in d7b
-	mov.w		%d1,%d7		# put new digit in d7b
-	swap		%d7		# put LEN counter in word d7a
-	addq.w		&1,%d7		# set d7a to signal first digit done
-	dbf.w		%d0,loop	# do loop some more!
-	swap		%d7		# put last digit in string
-	lsl.w		&4,%d7		# move it to upper 4 bits
-	mov.b		%d7,(%a0)+	# store it in memory string
-#
-# Clean up and return with result in fp0.
-#
-end_bstr:
-	movm.l		(%sp)+,&0xff	#  {%d0-%d7}
-	rts
-
-#########################################################################
-# XDEF ****************************************************************	#
-#	facc_in_b(): dmem_read_byte failed				#
-#	facc_in_w(): dmem_read_word failed				#
-#	facc_in_l(): dmem_read_long failed				#
-#	facc_in_d(): dmem_read of dbl prec failed			#
-#	facc_in_x(): dmem_read of ext prec failed			#
-#									#
-#	facc_out_b(): dmem_write_byte failed				#
-#	facc_out_w(): dmem_write_word failed				#
-#	facc_out_l(): dmem_write_long failed				#
-#	facc_out_d(): dmem_write of dbl prec failed			#
-#	facc_out_x(): dmem_write of ext prec failed			#
-#									#
-# XREF ****************************************************************	#
-#	_real_access() - exit through access error handler		#
-#									#
-# INPUT ***************************************************************	#
-#	None								#
-#									#
-# OUTPUT **************************************************************	#
-#	None								#
-#									#
-# ALGORITHM ***********************************************************	#
-#	Flow jumps here when an FP data fetch call gets an error	#
-# result. This means the operating system wants an access error frame	#
-# made out of the current exception stack frame.			#
-#	So, we first call restore() which makes sure that any updated	#
-# -(an)+ register gets returned to its pre-exception value and then	#
-# we change the stack to an access error stack frame.			#
-#									#
-#########################################################################
-
-facc_in_b:
-	movq.l		&0x1,%d0			# one byte
-	bsr.w		restore				# fix An
-
-	mov.w		&0x0121,EXC_VOFF(%a6)		# set FSLW
-	bra.w		facc_finish
-
-facc_in_w:
-	movq.l		&0x2,%d0			# two bytes
-	bsr.w		restore				# fix An
-
-	mov.w		&0x0141,EXC_VOFF(%a6)		# set FSLW
-	bra.b		facc_finish
-
-facc_in_l:
-	movq.l		&0x4,%d0			# four bytes
-	bsr.w		restore				# fix An
-
-	mov.w		&0x0101,EXC_VOFF(%a6)		# set FSLW
-	bra.b		facc_finish
-
-facc_in_d:
-	movq.l		&0x8,%d0			# eight bytes
-	bsr.w		restore				# fix An
-
-	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
-	bra.b		facc_finish
-
-facc_in_x:
-	movq.l		&0xc,%d0			# twelve bytes
-	bsr.w		restore				# fix An
-
-	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
-	bra.b		facc_finish
-
-################################################################
-
-facc_out_b:
-	movq.l		&0x1,%d0			# one byte
-	bsr.w		restore				# restore An
-
-	mov.w		&0x00a1,EXC_VOFF(%a6)		# set FSLW
-	bra.b		facc_finish
-
-facc_out_w:
-	movq.l		&0x2,%d0			# two bytes
-	bsr.w		restore				# restore An
-
-	mov.w		&0x00c1,EXC_VOFF(%a6)		# set FSLW
-	bra.b		facc_finish
-
-facc_out_l:
-	movq.l		&0x4,%d0			# four bytes
-	bsr.w		restore				# restore An
-
-	mov.w		&0x0081,EXC_VOFF(%a6)		# set FSLW
-	bra.b		facc_finish
-
-facc_out_d:
-	movq.l		&0x8,%d0			# eight bytes
-	bsr.w		restore				# restore An
-
-	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
-	bra.b		facc_finish
-
-facc_out_x:
-	mov.l		&0xc,%d0			# twelve bytes
-	bsr.w		restore				# restore An
-
-	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
-
-# here's where we actually create the access error frame from the
-# current exception stack frame.
-facc_finish:
-	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
-
-	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
-	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
-	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
-
-	unlk		%a6
-
-	mov.l		(%sp),-(%sp)		# store SR, hi(PC)
-	mov.l		0x8(%sp),0x4(%sp)	# store lo(PC)
-	mov.l		0xc(%sp),0x8(%sp)	# store EA
-	mov.l		&0x00000001,0xc(%sp)	# store FSLW
-	mov.w		0x6(%sp),0xc(%sp)	# fix FSLW (size)
-	mov.w		&0x4008,0x6(%sp)	# store voff
-
-	btst		&0x5,(%sp)		# supervisor or user mode?
-	beq.b		facc_out2		# user
-	bset		&0x2,0xd(%sp)		# set supervisor TM bit
-
-facc_out2:
-	bra.l		_real_access
-
-##################################################################
-
-# if the effective addressing mode was predecrement or postincrement,
-# the emulation has already changed its value to the correct post-
-# instruction value. but since we're exiting to the access error
-# handler, then AN must be returned to its pre-instruction value.
-# we do that here.
-restore:
-	mov.b		EXC_OPWORD+0x1(%a6),%d1
-	andi.b		&0x38,%d1		# extract opmode
-	cmpi.b		%d1,&0x18		# postinc?
-	beq.w		rest_inc
-	cmpi.b		%d1,&0x20		# predec?
-	beq.w		rest_dec
-	rts
-
-rest_inc:
-	mov.b		EXC_OPWORD+0x1(%a6),%d1
-	andi.w		&0x0007,%d1		# fetch An
-
-	mov.w		(tbl_rest_inc.b,%pc,%d1.w*2),%d1
-	jmp		(tbl_rest_inc.b,%pc,%d1.w*1)
-
-tbl_rest_inc:
-	short		ri_a0 - tbl_rest_inc
-	short		ri_a1 - tbl_rest_inc
-	short		ri_a2 - tbl_rest_inc
-	short		ri_a3 - tbl_rest_inc
-	short		ri_a4 - tbl_rest_inc
-	short		ri_a5 - tbl_rest_inc
-	short		ri_a6 - tbl_rest_inc
-	short		ri_a7 - tbl_rest_inc
-
-ri_a0:
-	sub.l		%d0,EXC_DREGS+0x8(%a6)	# fix stacked a0
-	rts
-ri_a1:
-	sub.l		%d0,EXC_DREGS+0xc(%a6)	# fix stacked a1
-	rts
-ri_a2:
-	sub.l		%d0,%a2			# fix a2
-	rts
-ri_a3:
-	sub.l		%d0,%a3			# fix a3
-	rts
-ri_a4:
-	sub.l		%d0,%a4			# fix a4
-	rts
-ri_a5:
-	sub.l		%d0,%a5			# fix a5
-	rts
-ri_a6:
-	sub.l		%d0,(%a6)		# fix stacked a6
-	rts
-# if it's a fmove out instruction, we don't have to fix a7
-# because we hadn't changed it yet. if it's an opclass two
-# instruction (data moved in) and the exception was in supervisor
-# mode, then also also wasn't updated. if it was user mode, then
-# restore the correct a7 which is in the USP currently.
-ri_a7:
-	cmpi.b		EXC_VOFF(%a6),&0x30	# move in or out?
-	bne.b		ri_a7_done		# out
-
-	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
-	bne.b		ri_a7_done		# supervisor
-	movc		%usp,%a0		# restore USP
-	sub.l		%d0,%a0
-	movc		%a0,%usp
-ri_a7_done:
-	rts
-
-# need to invert adjustment value if the <ea> was predec
-rest_dec:
-	neg.l		%d0
-	bra.b		rest_inc
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
deleted file mode 100644
index 97cd3ea5f10b837949b124024951912d6973e8f6..0000000000000000000000000000000000000000
--- a/arch/m68k/kernel/entry.S
+++ /dev/null
@@ -1,429 +0,0 @@
-/* -*- mode: asm -*-
- *
- *  linux/arch/m68k/kernel/entry.S
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file README.legal in the main directory of this archive
- * for more details.
- *
- * Linux/m68k support by Hamish Macdonald
- *
- * 68060 fixes by Jesper Skov
- *
- */
-
-/*
- * entry.S  contains the system-call and fault low-level handling routines.
- * This also contains the timer-interrupt handler, as well as all interrupts
- * and faults that can result in a task-switch.
- *
- * NOTE: This code handles signal-recognition, which happens every time
- * after a timer-interrupt and after each system call.
- *
- */
-
-/*
- * 12/03/96 Jes: Currently we only support m68k single-cpu systems, so
- *               all pointers that used to be 'current' are now entry
- *               number 0 in the 'current_set' list.
- *
- *  6/05/00 RZ:	 addedd writeback completion after return from sighandler
- *		 for 68040
- */
-
-#include <linux/linkage.h>
-#include <asm/errno.h>
-#include <asm/setup.h>
-#include <asm/segment.h>
-#include <asm/traps.h>
-#include <asm/unistd.h>
-#include <asm/asm-offsets.h>
-#include <asm/entry.h>
-
-.globl system_call, buserr, trap, resume
-.globl sys_call_table
-.globl __sys_fork, __sys_clone, __sys_vfork
-.globl bad_interrupt
-.globl auto_irqhandler_fixup
-.globl user_irqvec_fixup
-
-.text
-ENTRY(__sys_fork)
-	SAVE_SWITCH_STACK
-	jbsr	sys_fork
-	lea     %sp@(24),%sp
-	rts
-
-ENTRY(__sys_clone)
-	SAVE_SWITCH_STACK
-	pea	%sp@(SWITCH_STACK_SIZE)
-	jbsr	m68k_clone
-	lea     %sp@(28),%sp
-	rts
-
-ENTRY(__sys_vfork)
-	SAVE_SWITCH_STACK
-	jbsr	sys_vfork
-	lea     %sp@(24),%sp
-	rts
-
-ENTRY(sys_sigreturn)
-	SAVE_SWITCH_STACK
-	movel	%sp,%sp@-		  | switch_stack pointer
-	pea	%sp@(SWITCH_STACK_SIZE+4) | pt_regs pointer
-	jbsr	do_sigreturn
-	addql	#8,%sp
-	RESTORE_SWITCH_STACK
-	rts
-
-ENTRY(sys_rt_sigreturn)
-	SAVE_SWITCH_STACK
-	movel	%sp,%sp@-		  | switch_stack pointer
-	pea	%sp@(SWITCH_STACK_SIZE+4) | pt_regs pointer
-	jbsr	do_rt_sigreturn
-	addql	#8,%sp
-	RESTORE_SWITCH_STACK
-	rts
-
-ENTRY(buserr)
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%sp,%sp@-		| stack frame pointer argument
-	jbsr	buserr_c
-	addql	#4,%sp
-	jra	ret_from_exception
-
-ENTRY(trap)
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%sp,%sp@-		| stack frame pointer argument
-	jbsr	trap_c
-	addql	#4,%sp
-	jra	ret_from_exception
-
-	| After a fork we jump here directly from resume,
-	| so that %d1 contains the previous task
-	| schedule_tail now used regardless of CONFIG_SMP
-ENTRY(ret_from_fork)
-	movel	%d1,%sp@-
-	jsr	schedule_tail
-	addql	#4,%sp
-	jra	ret_from_exception
-
-ENTRY(ret_from_kernel_thread)
-	| a3 contains the kernel thread payload, d7 - its argument
-	movel	%d1,%sp@-
-	jsr	schedule_tail
-	movel	%d7,(%sp)
-	jsr	%a3@
-	addql	#4,%sp
-	jra	ret_from_exception
-
-#if defined(CONFIG_COLDFIRE) || !defined(CONFIG_MMU)
-
-#ifdef TRAP_DBG_INTERRUPT
-
-.globl dbginterrupt
-ENTRY(dbginterrupt)
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%sp,%sp@- 		/* stack frame pointer argument */
-	jsr	dbginterrupt_c
-	addql	#4,%sp
-	jra	ret_from_exception
-#endif
-
-ENTRY(reschedule)
-	/* save top of frame */
-	pea	%sp@
-	jbsr	set_esp0
-	addql	#4,%sp
-	pea	ret_from_exception
-	jmp	schedule
-
-ENTRY(ret_from_user_signal)
-	moveq #__NR_sigreturn,%d0
-	trap #0
-
-ENTRY(ret_from_user_rt_signal)
-	movel #__NR_rt_sigreturn,%d0
-	trap #0
-
-#else
-
-do_trace_entry:
-	movel	#-ENOSYS,%sp@(PT_OFF_D0)| needed for strace
-	subql	#4,%sp
-	SAVE_SWITCH_STACK
-	jbsr	syscall_trace
-	RESTORE_SWITCH_STACK
-	addql	#4,%sp
-	movel	%sp@(PT_OFF_ORIG_D0),%d0
-	cmpl	#NR_syscalls,%d0
-	jcs	syscall
-badsys:
-	movel	#-ENOSYS,%sp@(PT_OFF_D0)
-	jra	ret_from_syscall
-
-do_trace_exit:
-	subql	#4,%sp
-	SAVE_SWITCH_STACK
-	jbsr	syscall_trace
-	RESTORE_SWITCH_STACK
-	addql	#4,%sp
-	jra	.Lret_from_exception
-
-ENTRY(ret_from_signal)
-	movel	%curptr@(TASK_STACK),%a1
-	tstb	%a1@(TINFO_FLAGS+2)
-	jge	1f
-	jbsr	syscall_trace
-1:	RESTORE_SWITCH_STACK
-	addql	#4,%sp
-/* on 68040 complete pending writebacks if any */
-#ifdef CONFIG_M68040
-	bfextu	%sp@(PT_OFF_FORMATVEC){#0,#4},%d0
-	subql	#7,%d0				| bus error frame ?
-	jbne	1f
-	movel	%sp,%sp@-
-	jbsr	berr_040cleanup
-	addql	#4,%sp
-1:
-#endif
-	jra	.Lret_from_exception
-
-ENTRY(system_call)
-	SAVE_ALL_SYS
-
-	GET_CURRENT(%d1)
-	movel	%d1,%a1
-
-	| save top of frame
-	movel	%sp,%curptr@(TASK_THREAD+THREAD_ESP0)
-
-	| syscall trace?
-	tstb	%a1@(TINFO_FLAGS+2)
-	jmi	do_trace_entry
-	cmpl	#NR_syscalls,%d0
-	jcc	badsys
-syscall:
-	jbsr	@(sys_call_table,%d0:l:4)@(0)
-	movel	%d0,%sp@(PT_OFF_D0)	| save the return value
-ret_from_syscall:
-	|oriw	#0x0700,%sr
-	movel	%curptr@(TASK_STACK),%a1
-	movew	%a1@(TINFO_FLAGS+2),%d0
-	jne	syscall_exit_work
-1:	RESTORE_ALL
-
-syscall_exit_work:
-	btst	#5,%sp@(PT_OFF_SR)	| check if returning to kernel
-	bnes	1b			| if so, skip resched, signals
-	lslw	#1,%d0
-	jcs	do_trace_exit
-	jmi	do_delayed_trace
-	lslw	#8,%d0
-	jne	do_signal_return
-	pea	resume_userspace
-	jra	schedule
-
-
-ENTRY(ret_from_exception)
-.Lret_from_exception:
-	btst	#5,%sp@(PT_OFF_SR)	| check if returning to kernel
-	bnes	1f			| if so, skip resched, signals
-	| only allow interrupts when we are really the last one on the
-	| kernel stack, otherwise stack overflow can occur during
-	| heavy interrupt load
-	andw	#ALLOWINT,%sr
-
-resume_userspace:
-	movel	%curptr@(TASK_STACK),%a1
-	moveb	%a1@(TINFO_FLAGS+3),%d0
-	jne	exit_work
-1:	RESTORE_ALL
-
-exit_work:
-	| save top of frame
-	movel	%sp,%curptr@(TASK_THREAD+THREAD_ESP0)
-	lslb	#1,%d0
-	jne	do_signal_return
-	pea	resume_userspace
-	jra	schedule
-
-
-do_signal_return:
-	|andw	#ALLOWINT,%sr
-	subql	#4,%sp			| dummy return address
-	SAVE_SWITCH_STACK
-	pea	%sp@(SWITCH_STACK_SIZE)
-	bsrl	do_notify_resume
-	addql	#4,%sp
-	RESTORE_SWITCH_STACK
-	addql	#4,%sp
-	jbra	resume_userspace
-
-do_delayed_trace:
-	bclr	#7,%sp@(PT_OFF_SR)	| clear trace bit in SR
-	pea	1			| send SIGTRAP
-	movel	%curptr,%sp@-
-	pea	LSIGTRAP
-	jbsr	send_sig
-	addql	#8,%sp
-	addql	#4,%sp
-	jbra	resume_userspace
-
-
-/* This is the main interrupt handler for autovector interrupts */
-
-ENTRY(auto_inthandler)
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-					|  put exception # in d0
-	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
-	subw	#VEC_SPUR,%d0
-
-	movel	%sp,%sp@-
-	movel	%d0,%sp@-		|  put vector # on stack
-auto_irqhandler_fixup = . + 2
-	jsr	do_IRQ			|  process the IRQ
-	addql	#8,%sp			|  pop parameters off stack
-	jra	ret_from_exception
-
-/* Handler for user defined interrupt vectors */
-
-ENTRY(user_inthandler)
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-					|  put exception # in d0
-	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
-user_irqvec_fixup = . + 2
-	subw	#VEC_USER,%d0
-
-	movel	%sp,%sp@-
-	movel	%d0,%sp@-		|  put vector # on stack
-	jsr	do_IRQ			|  process the IRQ
-	addql	#8,%sp			|  pop parameters off stack
-	jra	ret_from_exception
-
-/* Handler for uninitialized and spurious interrupts */
-
-ENTRY(bad_inthandler)
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-
-	movel	%sp,%sp@-
-	jsr	handle_badint
-	addql	#4,%sp
-	jra	ret_from_exception
-
-resume:
-	/*
-	 * Beware - when entering resume, prev (the current task) is
-	 * in a0, next (the new task) is in a1,so don't change these
-	 * registers until their contents are no longer needed.
-	 */
-
-	/* save sr */
-	movew	%sr,%a0@(TASK_THREAD+THREAD_SR)
-
-	/* save fs (sfc,%dfc) (may be pointing to kernel memory) */
-	movec	%sfc,%d0
-	movew	%d0,%a0@(TASK_THREAD+THREAD_FS)
-
-	/* save usp */
-	/* it is better to use a movel here instead of a movew 8*) */
-	movec	%usp,%d0
-	movel	%d0,%a0@(TASK_THREAD+THREAD_USP)
-
-	/* save non-scratch registers on stack */
-	SAVE_SWITCH_STACK
-
-	/* save current kernel stack pointer */
-	movel	%sp,%a0@(TASK_THREAD+THREAD_KSP)
-
-	/* save floating point context */
-#ifndef CONFIG_M68KFPU_EMU_ONLY
-#ifdef CONFIG_M68KFPU_EMU
-	tstl	m68k_fputype
-	jeq	3f
-#endif
-	fsave	%a0@(TASK_THREAD+THREAD_FPSTATE)
-
-#if defined(CONFIG_M68060)
-#if !defined(CPU_M68060_ONLY)
-	btst	#3,m68k_cputype+3
-	beqs	1f
-#endif
-	/* The 060 FPU keeps status in bits 15-8 of the first longword */
-	tstb	%a0@(TASK_THREAD+THREAD_FPSTATE+2)
-	jeq	3f
-#if !defined(CPU_M68060_ONLY)
-	jra	2f
-#endif
-#endif /* CONFIG_M68060 */
-#if !defined(CPU_M68060_ONLY)
-1:	tstb	%a0@(TASK_THREAD+THREAD_FPSTATE)
-	jeq	3f
-#endif
-2:	fmovemx	%fp0-%fp7,%a0@(TASK_THREAD+THREAD_FPREG)
-	fmoveml	%fpcr/%fpsr/%fpiar,%a0@(TASK_THREAD+THREAD_FPCNTL)
-3:
-#endif	/* CONFIG_M68KFPU_EMU_ONLY */
-	/* Return previous task in %d1 */
-	movel	%curptr,%d1
-
-	/* switch to new task (a1 contains new task) */
-	movel	%a1,%curptr
-
-	/* restore floating point context */
-#ifndef CONFIG_M68KFPU_EMU_ONLY
-#ifdef CONFIG_M68KFPU_EMU
-	tstl	m68k_fputype
-	jeq	4f
-#endif
-#if defined(CONFIG_M68060)
-#if !defined(CPU_M68060_ONLY)
-	btst	#3,m68k_cputype+3
-	beqs	1f
-#endif
-	/* The 060 FPU keeps status in bits 15-8 of the first longword */
-	tstb	%a1@(TASK_THREAD+THREAD_FPSTATE+2)
-	jeq	3f
-#if !defined(CPU_M68060_ONLY)
-	jra	2f
-#endif
-#endif /* CONFIG_M68060 */
-#if !defined(CPU_M68060_ONLY)
-1:	tstb	%a1@(TASK_THREAD+THREAD_FPSTATE)
-	jeq	3f
-#endif
-2:	fmovemx	%a1@(TASK_THREAD+THREAD_FPREG),%fp0-%fp7
-	fmoveml	%a1@(TASK_THREAD+THREAD_FPCNTL),%fpcr/%fpsr/%fpiar
-3:	frestore %a1@(TASK_THREAD+THREAD_FPSTATE)
-4:
-#endif	/* CONFIG_M68KFPU_EMU_ONLY */
-
-	/* restore the kernel stack pointer */
-	movel	%a1@(TASK_THREAD+THREAD_KSP),%sp
-
-	/* restore non-scratch registers */
-	RESTORE_SWITCH_STACK
-
-	/* restore user stack pointer */
-	movel	%a1@(TASK_THREAD+THREAD_USP),%a0
-	movel	%a0,%usp
-
-	/* restore fs (sfc,%dfc) */
-	movew	%a1@(TASK_THREAD+THREAD_FS),%a0
-	movec	%a0,%sfc
-	movec	%a0,%dfc
-
-	/* restore status register */
-	movew	%a1@(TASK_THREAD+THREAD_SR),%sr
-
-	rts
-
-#endif /* CONFIG_MMU && !CONFIG_COLDFIRE */
diff --git a/arch/m68k/kernel/head.S b/arch/m68k/kernel/head.S
deleted file mode 100644
index a54788458ca36b2c8b3b000825a344bb2778e905..0000000000000000000000000000000000000000
--- a/arch/m68k/kernel/head.S
+++ /dev/null
@@ -1,3867 +0,0 @@
-/* -*- mode: asm -*-
-**
-** head.S -- This file contains the initial boot code for the
-**	     Linux/68k kernel.
-**
-** Copyright 1993 by Hamish Macdonald
-**
-** 68040 fixes by Michael Rausch
-** 68060 fixes by Roman Hodek
-** MMU cleanup by Randy Thelen
-** Final MMU cleanup by Roman Zippel
-**
-** Atari support by Andreas Schwab, using ideas of Robert de Vries
-** and Bjoern Brauel
-** VME Support by Richard Hirst
-**
-** 94/11/14 Andreas Schwab: put kernel at PAGESIZE
-** 94/11/18 Andreas Schwab: remove identity mapping of STRAM for Atari
-** ++ Bjoern & Roman: ATARI-68040 support for the Medusa
-** 95/11/18 Richard Hirst: Added MVME166 support
-** 96/04/26 Guenther Kelleter: fixed identity mapping for Falcon with
-**			      Magnum- and FX-alternate ram
-** 98/04/25 Phil Blundell: added HP300 support
-** 1998/08/30 David Kilzer: Added support for font_desc structures
-**            for linux-2.1.115
-** 1999/02/11  Richard Zidlicky: added Q40 support (initial version 99/01/01)
-** 2004/05/13 Kars de Jong: Finalised HP300 support
-**
-** This file is subject to the terms and conditions of the GNU General Public
-** License. See the file README.legal in the main directory of this archive
-** for more details.
-**
-*/
-
-/*
- * Linux startup code.
- *
- * At this point, the boot loader has:
- * Disabled interrupts
- * Disabled caches
- * Put us in supervisor state.
- *
- * The kernel setup code takes the following steps:
- * .  Raise interrupt level
- * .  Set up initial kernel memory mapping.
- *    .  This sets up a mapping of the 4M of memory the kernel is located in.
- *    .  It also does a mapping of any initial machine specific areas.
- * .  Enable the MMU
- * .  Enable cache memories
- * .  Jump to kernel startup
- *
- * Much of the file restructuring was to accomplish:
- * 1) Remove register dependency through-out the file.
- * 2) Increase use of subroutines to perform functions
- * 3) Increase readability of the code
- *
- * Of course, readability is a subjective issue, so it will never be
- * argued that that goal was accomplished.  It was merely a goal.
- * A key way to help make code more readable is to give good
- * documentation.  So, the first thing you will find is exaustive
- * write-ups on the structure of the file, and the features of the
- * functional subroutines.
- *
- * General Structure:
- * ------------------
- *	Without a doubt the single largest chunk of head.S is spent
- * mapping the kernel and I/O physical space into the logical range
- * for the kernel.
- *	There are new subroutines and data structures to make MMU
- * support cleaner and easier to understand.
- *	First, you will find a routine call "mmu_map" which maps
- * a logical to a physical region for some length given a cache
- * type on behalf of the caller.  This routine makes writing the
- * actual per-machine specific code very simple.
- *	A central part of the code, but not a subroutine in itself,
- * is the mmu_init code which is broken down into mapping the kernel
- * (the same for all machines) and mapping machine-specific I/O
- * regions.
- *	Also, there will be a description of engaging the MMU and
- * caches.
- *	You will notice that there is a chunk of code which
- * can emit the entire MMU mapping of the machine.  This is present
- * only in debug modes and can be very helpful.
- *	Further, there is a new console driver in head.S that is
- * also only engaged in debug mode.  Currently, it's only supported
- * on the Macintosh class of machines.  However, it is hoped that
- * others will plug-in support for specific machines.
- *
- * ######################################################################
- *
- * mmu_map
- * -------
- *	mmu_map was written for two key reasons.  First, it was clear
- * that it was very difficult to read the previous code for mapping
- * regions of memory.  Second, the Macintosh required such extensive
- * memory allocations that it didn't make sense to propagate the
- * existing code any further.
- *	mmu_map requires some parameters:
- *
- *	mmu_map (logical, physical, length, cache_type)
- *
- *	While this essentially describes the function in the abstract, you'll
- * find more indepth description of other parameters at the implementation site.
- *
- * mmu_get_root_table_entry
- * ------------------------
- * mmu_get_ptr_table_entry
- * -----------------------
- * mmu_get_page_table_entry
- * ------------------------
- *
- *	These routines are used by other mmu routines to get a pointer into
- * a table, if necessary a new table is allocated. These routines are working
- * basically like pmd_alloc() and pte_alloc() in <asm/pgtable.h>. The root
- * table needs of course only to be allocated once in mmu_get_root_table_entry,
- * so that here also some mmu specific initialization is done. The second page
- * at the start of the kernel (the first page is unmapped later) is used for
- * the kernel_pg_dir. It must be at a position known at link time (as it's used
- * to initialize the init task struct) and since it needs special cache
- * settings, it's the easiest to use this page, the rest of the page is used
- * for further pointer tables.
- * mmu_get_page_table_entry allocates always a whole page for page tables, this
- * means 1024 pages and so 4MB of memory can be mapped. It doesn't make sense
- * to manage page tables in smaller pieces as nearly all mappings have that
- * size.
- *
- * ######################################################################
- *
- *
- * ######################################################################
- *
- * mmu_engage
- * ----------
- *	Thanks to a small helping routine enabling the mmu got quite simple
- * and there is only one way left. mmu_engage makes a complete a new mapping
- * that only includes the absolute necessary to be able to jump to the final
- * position and to restore the original mapping.
- * As this code doesn't need a transparent translation register anymore this
- * means all registers are free to be used by machines that needs them for
- * other purposes.
- *
- * ######################################################################
- *
- * mmu_print
- * ---------
- *	This algorithm will print out the page tables of the system as
- * appropriate for an 030 or an 040.  This is useful for debugging purposes
- * and as such is enclosed in #ifdef MMU_PRINT/#endif clauses.
- *
- * ######################################################################
- *
- * console_init
- * ------------
- *	The console is also able to be turned off.  The console in head.S
- * is specifically for debugging and can be very useful.  It is surrounded by
- * #ifdef / #endif clauses so it doesn't have to ship in known-good
- * kernels.  It's basic algorithm is to determine the size of the screen
- * (in height/width and bit depth) and then use that information for
- * displaying an 8x8 font or an 8x16 (widthxheight).  I prefer the 8x8 for
- * debugging so I can see more good data.  But it was trivial to add support
- * for both fonts, so I included it.
- *	Also, the algorithm for plotting pixels is abstracted so that in
- * theory other platforms could add support for different kinds of frame
- * buffers.  This could be very useful.
- *
- * console_put_penguin
- * -------------------
- *	An important part of any Linux bring up is the penguin and there's
- * nothing like getting the Penguin on the screen!  This algorithm will work
- * on any machine for which there is a console_plot_pixel.
- *
- * console_scroll
- * --------------
- *	My hope is that the scroll algorithm does the right thing on the
- * various platforms, but it wouldn't be hard to add the test conditions
- * and new code if it doesn't.
- *
- * console_putc
- * -------------
- *
- * ######################################################################
- *
- *	Register usage has greatly simplified within head.S. Every subroutine
- * saves and restores all registers that it modifies (except it returns a
- * value in there of course). So the only register that needs to be initialized
- * is the stack pointer.
- * All other init code and data is now placed in the init section, so it will
- * be automatically freed at the end of the kernel initialization.
- *
- * ######################################################################
- *
- * options
- * -------
- *	There are many options available in a build of this file.  I've
- * taken the time to describe them here to save you the time of searching
- * for them and trying to understand what they mean.
- *
- * CONFIG_xxx:	These are the obvious machine configuration defines created
- * during configuration.  These are defined in autoconf.h.
- *
- * CONSOLE_DEBUG:  Only supports a Mac frame buffer but could easily be
- * extended to support other platforms.
- *
- * TEST_MMU:	This is a test harness for running on any given machine but
- * getting an MMU dump for another class of machine.  The classes of machines
- * that can be tested are any of the makes (Atari, Amiga, Mac, VME, etc.)
- * and any of the models (030, 040, 060, etc.).
- *
- *	NOTE:	TEST_MMU is NOT permanent!  It is scheduled to be removed
- *		When head.S boots on Atari, Amiga, Macintosh, and VME
- *		machines.  At that point the underlying logic will be
- *		believed to be solid enough to be trusted, and TEST_MMU
- *		can be dropped.  Do note that that will clean up the
- *		head.S code significantly as large blocks of #if/#else
- *		clauses can be removed.
- *
- * MMU_NOCACHE_KERNEL:	On the Macintosh platform there was an inquiry into
- * determing why devices don't appear to work.  A test case was to remove
- * the cacheability of the kernel bits.
- *
- * MMU_PRINT:	There is a routine built into head.S that can display the
- * MMU data structures.  It outputs its result through the serial_putc
- * interface.  So where ever that winds up driving data, that's where the
- * mmu struct will appear.
- *
- * SERIAL_DEBUG:	There are a series of putc() macro statements
- * scattered through out the code to give progress of status to the
- * person sitting at the console.  This constant determines whether those
- * are used.
- *
- * DEBUG:	This is the standard DEBUG flag that can be set for building
- *		the kernel.  It has the effect adding additional tests into
- *		the code.
- *
- * FONT_6x11:
- * FONT_8x8:
- * FONT_8x16:
- *		In theory these could be determined at run time or handed
- *		over by the booter.  But, let's be real, it's a fine hard
- *		coded value.  (But, you will notice the code is run-time
- *		flexible!)  A pointer to the font's struct font_desc
- *		is kept locally in Lconsole_font.  It is used to determine
- *		font size information dynamically.
- *
- * Atari constants:
- * USE_PRINTER:	Use the printer port for serial debug.
- * USE_SCC_B:	Use the SCC port A (Serial2) for serial debug.
- * USE_SCC_A:	Use the SCC port B (Modem2) for serial debug.
- * USE_MFP:	Use the ST-MFP port (Modem1) for serial debug.
- *
- * Macintosh constants:
- * MAC_USE_SCC_A: Use SCC port A (modem) for serial debug.
- * MAC_USE_SCC_B: Use SCC port B (printer) for serial debug.
- */
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/bootinfo.h>
-#include <asm/bootinfo-amiga.h>
-#include <asm/bootinfo-atari.h>
-#include <asm/bootinfo-hp300.h>
-#include <asm/bootinfo-mac.h>
-#include <asm/bootinfo-q40.h>
-#include <asm/bootinfo-vme.h>
-#include <asm/setup.h>
-#include <asm/entry.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/asm-offsets.h>
-#ifdef CONFIG_MAC
-#  include <asm/machw.h>
-#endif
-
-#ifdef CONFIG_EARLY_PRINTK
-#  define SERIAL_DEBUG
-#  if defined(CONFIG_MAC) && defined(CONFIG_FONT_SUPPORT)
-#    define CONSOLE_DEBUG
-#  endif
-#endif
-
-#undef MMU_PRINT
-#undef MMU_NOCACHE_KERNEL
-#undef DEBUG
-
-/*
- * For the head.S console, there are three supported fonts, 6x11, 8x16 and 8x8.
- * The 8x8 font is harder to read but fits more on the screen.
- */
-#define FONT_8x8	/* default */
-/* #define FONT_8x16 */	/* 2nd choice */
-/* #define FONT_6x11 */	/* 3rd choice */
-
-.globl kernel_pg_dir
-.globl availmem
-.globl m68k_init_mapped_size
-.globl m68k_pgtable_cachemode
-.globl m68k_supervisor_cachemode
-#ifdef CONFIG_MVME16x
-.globl mvme_bdid
-#endif
-#ifdef CONFIG_Q40
-.globl q40_mem_cptr
-#endif
-
-CPUTYPE_040	= 1	/* indicates an 040 */
-CPUTYPE_060	= 2	/* indicates an 060 */
-CPUTYPE_0460	= 3	/* if either above are set, this is set */
-CPUTYPE_020	= 4	/* indicates an 020 */
-
-/* Translation control register */
-TC_ENABLE = 0x8000
-TC_PAGE8K = 0x4000
-TC_PAGE4K = 0x0000
-
-/* Transparent translation registers */
-TTR_ENABLE	= 0x8000	/* enable transparent translation */
-TTR_ANYMODE	= 0x4000	/* user and kernel mode access */
-TTR_KERNELMODE	= 0x2000	/* only kernel mode access */
-TTR_USERMODE	= 0x0000	/* only user mode access */
-TTR_CI		= 0x0400	/* inhibit cache */
-TTR_RW		= 0x0200	/* read/write mode */
-TTR_RWM		= 0x0100	/* read/write mask */
-TTR_FCB2	= 0x0040	/* function code base bit 2 */
-TTR_FCB1	= 0x0020	/* function code base bit 1 */
-TTR_FCB0	= 0x0010	/* function code base bit 0 */
-TTR_FCM2	= 0x0004	/* function code mask bit 2 */
-TTR_FCM1	= 0x0002	/* function code mask bit 1 */
-TTR_FCM0	= 0x0001	/* function code mask bit 0 */
-
-/* Cache Control registers */
-CC6_ENABLE_D	= 0x80000000	/* enable data cache (680[46]0) */
-CC6_FREEZE_D	= 0x40000000	/* freeze data cache (68060) */
-CC6_ENABLE_SB	= 0x20000000	/* enable store buffer (68060) */
-CC6_PUSH_DPI	= 0x10000000	/* disable CPUSH invalidation (68060) */
-CC6_HALF_D	= 0x08000000	/* half-cache mode for data cache (68060) */
-CC6_ENABLE_B	= 0x00800000	/* enable branch cache (68060) */
-CC6_CLRA_B	= 0x00400000	/* clear all entries in branch cache (68060) */
-CC6_CLRU_B	= 0x00200000	/* clear user entries in branch cache (68060) */
-CC6_ENABLE_I	= 0x00008000	/* enable instruction cache (680[46]0) */
-CC6_FREEZE_I	= 0x00004000	/* freeze instruction cache (68060) */
-CC6_HALF_I	= 0x00002000	/* half-cache mode for instruction cache (68060) */
-CC3_ALLOC_WRITE	= 0x00002000	/* write allocate mode(68030) */
-CC3_ENABLE_DB	= 0x00001000	/* enable data burst (68030) */
-CC3_CLR_D	= 0x00000800	/* clear data cache (68030) */
-CC3_CLRE_D	= 0x00000400	/* clear entry in data cache (68030) */
-CC3_FREEZE_D	= 0x00000200	/* freeze data cache (68030) */
-CC3_ENABLE_D	= 0x00000100	/* enable data cache (68030) */
-CC3_ENABLE_IB	= 0x00000010	/* enable instruction burst (68030) */
-CC3_CLR_I	= 0x00000008	/* clear instruction cache (68030) */
-CC3_CLRE_I	= 0x00000004	/* clear entry in instruction cache (68030) */
-CC3_FREEZE_I	= 0x00000002	/* freeze instruction cache (68030) */
-CC3_ENABLE_I	= 0x00000001	/* enable instruction cache (68030) */
-
-/* Miscellaneous definitions */
-PAGESIZE	= 4096
-PAGESHIFT	= 12
-
-ROOT_TABLE_SIZE	= 128
-PTR_TABLE_SIZE	= 128
-PAGE_TABLE_SIZE	= 64
-ROOT_INDEX_SHIFT = 25
-PTR_INDEX_SHIFT  = 18
-PAGE_INDEX_SHIFT = 12
-
-#ifdef DEBUG
-/* When debugging use readable names for labels */
-#ifdef __STDC__
-#define L(name) .head.S.##name
-#else
-#define L(name) .head.S./**/name
-#endif
-#else
-#ifdef __STDC__
-#define L(name) .L##name
-#else
-#define L(name) .L/**/name
-#endif
-#endif
-
-/* The __INITDATA stuff is a no-op when ftrace or kgdb are turned on */
-#ifndef __INITDATA
-#define __INITDATA	.data
-#define __FINIT		.previous
-#endif
-
-/* Several macros to make the writing of subroutines easier:
- * - func_start marks the beginning of the routine which setups the frame
- *   register and saves the registers, it also defines another macro
- *   to automatically restore the registers again.
- * - func_return marks the end of the routine and simply calls the prepared
- *   macro to restore registers and jump back to the caller.
- * - func_define generates another macro to automatically put arguments
- *   onto the stack call the subroutine and cleanup the stack again.
- */
-
-/* Within subroutines these macros can be used to access the arguments
- * on the stack. With STACK some allocated memory on the stack can be
- * accessed and ARG0 points to the return address (used by mmu_engage).
- */
-#define	STACK	%a6@(stackstart)
-#define ARG0	%a6@(4)
-#define ARG1	%a6@(8)
-#define ARG2	%a6@(12)
-#define ARG3	%a6@(16)
-#define ARG4	%a6@(20)
-
-.macro	func_start	name,saveregs,stack=0
-L(\name):
-	linkw	%a6,#-\stack
-	moveml	\saveregs,%sp@-
-.set	stackstart,-\stack
-
-.macro	func_return_\name
-	moveml	%sp@+,\saveregs
-	unlk	%a6
-	rts
-.endm
-.endm
-
-.macro	func_return	name
-	func_return_\name
-.endm
-
-.macro	func_call	name
-	jbsr	L(\name)
-.endm
-
-.macro	move_stack	nr,arg1,arg2,arg3,arg4
-.if	\nr
-	move_stack	"(\nr-1)",\arg2,\arg3,\arg4
-	movel	\arg1,%sp@-
-.endif
-.endm
-
-.macro	func_define	name,nr=0
-.macro	\name	arg1,arg2,arg3,arg4
-	move_stack	\nr,\arg1,\arg2,\arg3,\arg4
-	func_call	\name
-.if	\nr
-	lea	%sp@(\nr*4),%sp
-.endif
-.endm
-.endm
-
-func_define	mmu_map,4
-func_define	mmu_map_tt,4
-func_define	mmu_fixup_page_mmu_cache,1
-func_define	mmu_temp_map,2
-func_define	mmu_engage
-func_define	mmu_get_root_table_entry,1
-func_define	mmu_get_ptr_table_entry,2
-func_define	mmu_get_page_table_entry,2
-func_define	mmu_print
-func_define	get_new_page
-#if defined(CONFIG_HP300) || defined(CONFIG_APOLLO)
-func_define	set_leds
-#endif
-
-.macro	mmu_map_eq	arg1,arg2,arg3
-	mmu_map	\arg1,\arg1,\arg2,\arg3
-.endm
-
-.macro	get_bi_record	record
-	pea	\record
-	func_call	get_bi_record
-	addql	#4,%sp
-.endm
-
-func_define	serial_putc,1
-func_define	console_putc,1
-
-func_define	console_init
-func_define	console_put_penguin
-func_define	console_plot_pixel,3
-func_define	console_scroll
-
-.macro	putc	ch
-#if defined(CONSOLE_DEBUG) || defined(SERIAL_DEBUG)
-	pea	\ch
-#endif
-#ifdef CONSOLE_DEBUG
-	func_call	console_putc
-#endif
-#ifdef SERIAL_DEBUG
-	func_call	serial_putc
-#endif
-#if defined(CONSOLE_DEBUG) || defined(SERIAL_DEBUG)
-	addql	#4,%sp
-#endif
-.endm
-
-.macro	dputc	ch
-#ifdef DEBUG
-	putc	\ch
-#endif
-.endm
-
-func_define	putn,1
-
-.macro	dputn	nr
-#ifdef DEBUG
-	putn	\nr
-#endif
-.endm
-
-.macro	puts		string
-#if defined(CONSOLE_DEBUG) || defined(SERIAL_DEBUG)
-	__INITDATA
-.Lstr\@:
-	.string	"\string"
-	__FINIT
-	pea	%pc@(.Lstr\@)
-	func_call	puts
-	addql	#4,%sp
-#endif
-.endm
-
-.macro	dputs	string
-#ifdef DEBUG
-	puts	"\string"
-#endif
-.endm
-
-#define is_not_amiga(lab) cmpl &MACH_AMIGA,%pc@(m68k_machtype); jne lab
-#define is_not_atari(lab) cmpl &MACH_ATARI,%pc@(m68k_machtype); jne lab
-#define is_not_mac(lab) cmpl &MACH_MAC,%pc@(m68k_machtype); jne lab
-#define is_not_mvme147(lab) cmpl &MACH_MVME147,%pc@(m68k_machtype); jne lab
-#define is_not_mvme16x(lab) cmpl &MACH_MVME16x,%pc@(m68k_machtype); jne lab
-#define is_not_bvme6000(lab) cmpl &MACH_BVME6000,%pc@(m68k_machtype); jne lab
-#define is_mvme147(lab) cmpl &MACH_MVME147,%pc@(m68k_machtype); jeq lab
-#define is_mvme16x(lab) cmpl &MACH_MVME16x,%pc@(m68k_machtype); jeq lab
-#define is_bvme6000(lab) cmpl &MACH_BVME6000,%pc@(m68k_machtype); jeq lab
-#define is_not_hp300(lab) cmpl &MACH_HP300,%pc@(m68k_machtype); jne lab
-#define is_not_apollo(lab) cmpl &MACH_APOLLO,%pc@(m68k_machtype); jne lab
-#define is_not_q40(lab) cmpl &MACH_Q40,%pc@(m68k_machtype); jne lab
-#define is_not_sun3x(lab) cmpl &MACH_SUN3X,%pc@(m68k_machtype); jne lab
-
-#define hasnt_leds(lab) cmpl &MACH_HP300,%pc@(m68k_machtype); \
-			jeq 42f; \
-			cmpl &MACH_APOLLO,%pc@(m68k_machtype); \
-			jne lab ;\
-		42:\
-
-#define is_040_or_060(lab)	btst &CPUTYPE_0460,%pc@(L(cputype)+3); jne lab
-#define is_not_040_or_060(lab)	btst &CPUTYPE_0460,%pc@(L(cputype)+3); jeq lab
-#define is_040(lab)		btst &CPUTYPE_040,%pc@(L(cputype)+3); jne lab
-#define is_060(lab)		btst &CPUTYPE_060,%pc@(L(cputype)+3); jne lab
-#define is_not_060(lab)		btst &CPUTYPE_060,%pc@(L(cputype)+3); jeq lab
-#define is_020(lab)		btst &CPUTYPE_020,%pc@(L(cputype)+3); jne lab
-#define is_not_020(lab)		btst &CPUTYPE_020,%pc@(L(cputype)+3); jeq lab
-
-/* On the HP300 we use the on-board LEDs for debug output before
-   the console is running.  Writing a 1 bit turns the corresponding LED
-   _off_ - on the 340 bit 7 is towards the back panel of the machine.  */
-.macro	leds	mask
-#if defined(CONFIG_HP300) || defined(CONFIG_APOLLO)
-	hasnt_leds(.Lled\@)
-	pea	\mask
-	func_call	set_leds
-	addql	#4,%sp
-.Lled\@:
-#endif
-.endm
-
-__HEAD
-ENTRY(_stext)
-/*
- * Version numbers of the bootinfo interface
- * The area from _stext to _start will later be used as kernel pointer table
- */
-	bras	1f	/* Jump over bootinfo version numbers */
-
-	.long	BOOTINFOV_MAGIC
-	.long	MACH_AMIGA, AMIGA_BOOTI_VERSION
-	.long	MACH_ATARI, ATARI_BOOTI_VERSION
-	.long	MACH_MVME147, MVME147_BOOTI_VERSION
-	.long	MACH_MVME16x, MVME16x_BOOTI_VERSION
-	.long	MACH_BVME6000, BVME6000_BOOTI_VERSION
-	.long	MACH_MAC, MAC_BOOTI_VERSION
-	.long	MACH_Q40, Q40_BOOTI_VERSION
-	.long	MACH_HP300, HP300_BOOTI_VERSION
-	.long	0
-1:	jra	__start
-
-.equ	kernel_pg_dir,_stext
-
-.equ	.,_stext+PAGESIZE
-
-ENTRY(_start)
-	jra	__start
-__INIT
-ENTRY(__start)
-/*
- * Setup initial stack pointer
- */
-	lea	%pc@(_stext),%sp
-
-/*
- * Record the CPU and machine type.
- */
-	get_bi_record	BI_MACHTYPE
-	lea	%pc@(m68k_machtype),%a1
-	movel	%a0@,%a1@
-
-	get_bi_record	BI_FPUTYPE
-	lea	%pc@(m68k_fputype),%a1
-	movel	%a0@,%a1@
-
-	get_bi_record	BI_MMUTYPE
-	lea	%pc@(m68k_mmutype),%a1
-	movel	%a0@,%a1@
-
-	get_bi_record	BI_CPUTYPE
-	lea	%pc@(m68k_cputype),%a1
-	movel	%a0@,%a1@
-
-	leds	0x1
-
-#ifdef CONFIG_MAC
-/*
- * For Macintosh, we need to determine the display parameters early (at least
- * while debugging it).
- */
-
-	is_not_mac(L(test_notmac))
-
-	get_bi_record	BI_MAC_VADDR
-	lea	%pc@(L(mac_videobase)),%a1
-	movel	%a0@,%a1@
-
-	get_bi_record	BI_MAC_VDEPTH
-	lea	%pc@(L(mac_videodepth)),%a1
-	movel	%a0@,%a1@
-
-	get_bi_record	BI_MAC_VDIM
-	lea	%pc@(L(mac_dimensions)),%a1
-	movel	%a0@,%a1@
-
-	get_bi_record	BI_MAC_VROW
-	lea	%pc@(L(mac_rowbytes)),%a1
-	movel	%a0@,%a1@
-
-	get_bi_record	BI_MAC_SCCBASE
-	lea	%pc@(L(mac_sccbase)),%a1
-	movel	%a0@,%a1@
-
-L(test_notmac):
-#endif /* CONFIG_MAC */
-
-
-/*
- * There are ultimately two pieces of information we want for all kinds of
- * processors CpuType and CacheBits.  The CPUTYPE was passed in from booter
- * and is converted here from a booter type definition to a separate bit
- * number which allows for the standard is_0x0 macro tests.
- */
-	movel	%pc@(m68k_cputype),%d0
-	/*
-	 * Assume it's an 030
-	 */
-	clrl	%d1
-
-	/*
-	 * Test the BootInfo cputype for 060
-	 */
-	btst	#CPUB_68060,%d0
-	jeq	1f
-	bset	#CPUTYPE_060,%d1
-	bset	#CPUTYPE_0460,%d1
-	jra	3f
-1:
-	/*
-	 * Test the BootInfo cputype for 040
-	 */
-	btst	#CPUB_68040,%d0
-	jeq	2f
-	bset	#CPUTYPE_040,%d1
-	bset	#CPUTYPE_0460,%d1
-	jra	3f
-2:
-	/*
-	 * Test the BootInfo cputype for 020
-	 */
-	btst	#CPUB_68020,%d0
-	jeq	3f
-	bset	#CPUTYPE_020,%d1
-	jra	3f
-3:
-	/*
-	 * Record the cpu type
-	 */
-	lea	%pc@(L(cputype)),%a0
-	movel	%d1,%a0@
-
-	/*
-	 * NOTE:
-	 *
-	 * Now the macros are valid:
-	 *	is_040_or_060
-	 *	is_not_040_or_060
-	 *	is_040
-	 *	is_060
-	 *	is_not_060
-	 */
-
-	/*
-	 * Determine the cache mode for pages holding MMU tables
-	 * and for supervisor mode, unused for '020 and '030
-	 */
-	clrl	%d0
-	clrl	%d1
-
-	is_not_040_or_060(L(save_cachetype))
-
-	/*
-	 * '040 or '060
-	 * d1 := cacheable write-through
-	 * NOTE: The 68040 manual strongly recommends non-cached for MMU tables,
-	 * but we have been using write-through since at least 2.0.29 so I
-	 * guess it is OK.
-	 */
-#ifdef CONFIG_060_WRITETHROUGH
-	/*
-	 * If this is a 68060 board using drivers with cache coherency
-	 * problems, then supervisor memory accesses need to be write-through
-	 * also; otherwise, we want copyback.
-	 */
-
-	is_not_060(1f)
-	movel	#_PAGE_CACHE040W,%d0
-	jra	L(save_cachetype)
-#endif /* CONFIG_060_WRITETHROUGH */
-1:
-	movew	#_PAGE_CACHE040,%d0
-
-	movel	#_PAGE_CACHE040W,%d1
-
-L(save_cachetype):
-	/* Save cache mode for supervisor mode and page tables
-	 */
-	lea	%pc@(m68k_supervisor_cachemode),%a0
-	movel	%d0,%a0@
-	lea	%pc@(m68k_pgtable_cachemode),%a0
-	movel	%d1,%a0@
-
-/*
- * raise interrupt level
- */
-	movew	#0x2700,%sr
-
-/*
-   If running on an Atari, determine the I/O base of the
-   serial port and test if we are running on a Medusa or Hades.
-   This test is necessary here, because on the Hades the serial
-   port is only accessible in the high I/O memory area.
-
-   The test whether it is a Medusa is done by writing to the byte at
-   phys. 0x0. This should result in a bus error on all other machines.
-
-   ...should, but doesn't. The Afterburner040 for the Falcon has the
-   same behaviour (0x0..0x7 are no ROM shadow). So we have to do
-   another test to distinguish Medusa and AB040. This is a
-   read attempt for 0x00ff82fe phys. that should bus error on a Falcon
-   (+AB040), but is in the range where the Medusa always asserts DTACK.
-
-   The test for the Hades is done by reading address 0xb0000000. This
-   should give a bus error on the Medusa.
- */
-
-#ifdef CONFIG_ATARI
-	is_not_atari(L(notypetest))
-
-	/* get special machine type (Medusa/Hades/AB40) */
-	moveq	#0,%d3 /* default if tag doesn't exist */
-	get_bi_record	BI_ATARI_MCH_TYPE
-	tstl	%d0
-	jbmi	1f
-	movel	%a0@,%d3
-	lea	%pc@(atari_mch_type),%a0
-	movel	%d3,%a0@
-1:
-	/* On the Hades, the iobase must be set up before opening the
-	 * serial port. There are no I/O regs at 0x00ffxxxx at all. */
-	moveq	#0,%d0
-	cmpl	#ATARI_MACH_HADES,%d3
-	jbne	1f
-	movel	#0xff000000,%d0		/* Hades I/O base addr: 0xff000000 */
-1:	lea     %pc@(L(iobase)),%a0
-	movel   %d0,%a0@
-
-L(notypetest):
-#endif
-
-#ifdef CONFIG_VME
-	is_mvme147(L(getvmetype))
-	is_bvme6000(L(getvmetype))
-	is_not_mvme16x(L(gvtdone))
-
-	/* See if the loader has specified the BI_VME_TYPE tag.  Recent
-	 * versions of VMELILO and TFTPLILO do this.  We have to do this
-	 * early so we know how to handle console output.  If the tag
-	 * doesn't exist then we use the Bug for output on MVME16x.
-	 */
-L(getvmetype):
-	get_bi_record	BI_VME_TYPE
-	tstl	%d0
-	jbmi	1f
-	movel	%a0@,%d3
-	lea	%pc@(vme_brdtype),%a0
-	movel	%d3,%a0@
-1:
-#ifdef CONFIG_MVME16x
-	is_not_mvme16x(L(gvtdone))
-
-	/* Need to get the BRD_ID info to differentiate between 162, 167,
-	 * etc.  This is available as a BI_VME_BRDINFO tag with later
-	 * versions of VMELILO and TFTPLILO, otherwise we call the Bug.
-	 */
-	get_bi_record	BI_VME_BRDINFO
-	tstl	%d0
-	jpl	1f
-
-	/* Get pointer to board ID data from Bug */
-	movel	%d2,%sp@-
-	trap	#15
-	.word	0x70		/* trap 0x70 - .BRD_ID */
-	movel	%sp@+,%a0
-1:
-	lea	%pc@(mvme_bdid),%a1
-	/* Structure is 32 bytes long */
-	movel	%a0@+,%a1@+
-	movel	%a0@+,%a1@+
-	movel	%a0@+,%a1@+
-	movel	%a0@+,%a1@+
-	movel	%a0@+,%a1@+
-	movel	%a0@+,%a1@+
-	movel	%a0@+,%a1@+
-	movel	%a0@+,%a1@+
-#endif
-
-L(gvtdone):
-
-#endif
-
-#ifdef CONFIG_HP300
-	is_not_hp300(L(nothp))
-
-	/* Get the address of the UART for serial debugging */
-	get_bi_record	BI_HP300_UART_ADDR
-	tstl	%d0
-	jbmi	1f
-	movel	%a0@,%d3
-	lea	%pc@(L(uartbase)),%a0
-	movel	%d3,%a0@
-	get_bi_record	BI_HP300_UART_SCODE
-	tstl	%d0
-	jbmi	1f
-	movel	%a0@,%d3
-	lea	%pc@(L(uart_scode)),%a0
-	movel	%d3,%a0@
-1:
-L(nothp):
-#endif
-
-/*
- * Initialize serial port
- */
-	jbsr	L(serial_init)
-
-/*
- * Initialize console
- */
-#ifdef CONFIG_MAC
-	is_not_mac(L(nocon))
-#  ifdef CONSOLE_DEBUG
-	console_init
-#    ifdef CONFIG_LOGO
-	console_put_penguin
-#    endif /* CONFIG_LOGO */
-#  endif /* CONSOLE_DEBUG */
-L(nocon):
-#endif /* CONFIG_MAC */
-
-
-	putc	'\n'
-	putc	'A'
-	leds	0x2
-	dputn	%pc@(L(cputype))
-	dputn	%pc@(m68k_supervisor_cachemode)
-	dputn	%pc@(m68k_pgtable_cachemode)
-	dputc	'\n'
-
-/*
- * Save physical start address of kernel
- */
-	lea	%pc@(L(phys_kernel_start)),%a0
-	lea	%pc@(_stext),%a1
-	subl	#_stext,%a1
-	addl	#PAGE_OFFSET,%a1
-	movel	%a1,%a0@
-
-	putc	'B'
-
-	leds	0x4
-
-/*
- *	mmu_init
- *
- *	This block of code does what's necessary to map in the various kinds
- *	of machines for execution of Linux.
- *	First map the first 4, 8, or 16 MB of kernel code & data
- */
-
-	get_bi_record BI_MEMCHUNK
-	movel	%a0@(4),%d0
-	movel	#16*1024*1024,%d1
-	cmpl	%d0,%d1
-	jls	1f
-	lsrl	#1,%d1
-	cmpl	%d0,%d1
-	jls	1f
-	lsrl	#1,%d1
-1:
-	lea	%pc@(m68k_init_mapped_size),%a0
-	movel	%d1,%a0@
-	mmu_map	#PAGE_OFFSET,%pc@(L(phys_kernel_start)),%d1,\
-		%pc@(m68k_supervisor_cachemode)
-
-	putc	'C'
-
-#ifdef CONFIG_AMIGA
-
-L(mmu_init_amiga):
-
-	is_not_amiga(L(mmu_init_not_amiga))
-/*
- * mmu_init_amiga
- */
-
-	putc	'D'
-
-	is_not_040_or_060(1f)
-
-	/*
-	 * 040: Map the 16Meg range physical 0x0 up to logical 0x8000.0000
-	 */
-	mmu_map		#0x80000000,#0,#0x01000000,#_PAGE_NOCACHE_S
-	/*
-	 * Map the Zorro III I/O space with transparent translation
-	 * for frame buffer memory etc.
-	 */
-	mmu_map_tt	#1,#0x40000000,#0x20000000,#_PAGE_NOCACHE_S
-
-	jbra	L(mmu_init_done)
-
-1:
-	/*
-	 * 030:	Map the 32Meg range physical 0x0 up to logical 0x8000.0000
-	 */
-	mmu_map		#0x80000000,#0,#0x02000000,#_PAGE_NOCACHE030
-	mmu_map_tt	#1,#0x40000000,#0x20000000,#_PAGE_NOCACHE030
-
-	jbra	L(mmu_init_done)
-
-L(mmu_init_not_amiga):
-#endif
-
-#ifdef CONFIG_ATARI
-
-L(mmu_init_atari):
-
-	is_not_atari(L(mmu_init_not_atari))
-
-	putc	'E'
-
-/* On the Atari, we map the I/O region (phys. 0x00ffxxxx) by mapping
-   the last 16 MB of virtual address space to the first 16 MB (i.e.
-   0xffxxxxxx -> 0x00xxxxxx). For this, an additional pointer table is
-   needed. I/O ranges are marked non-cachable.
-
-   For the Medusa it is better to map the I/O region transparently
-   (i.e. 0xffxxxxxx -> 0xffxxxxxx), because some I/O registers are
-   accessible only in the high area.
-
-   On the Hades all I/O registers are only accessible in the high
-   area.
-*/
-
-	/* I/O base addr for non-Medusa, non-Hades: 0x00000000 */
-	moveq	#0,%d0
-	movel	%pc@(atari_mch_type),%d3
-	cmpl	#ATARI_MACH_MEDUSA,%d3
-	jbeq	2f
-	cmpl	#ATARI_MACH_HADES,%d3
-	jbne	1f
-2:	movel	#0xff000000,%d0 /* Medusa/Hades base addr: 0xff000000 */
-1:	movel	%d0,%d3
-
-	is_040_or_060(L(spata68040))
-
-	/* Map everything non-cacheable, though not all parts really
-	 * need to disable caches (crucial only for 0xff8000..0xffffff
-	 * (standard I/O) and 0xf00000..0xf3ffff (IDE)). The remainder
-	 * isn't really used, except for sometimes peeking into the
-	 * ROMs (mirror at phys. 0x0), so caching isn't necessary for
-	 * this. */
-	mmu_map	#0xff000000,%d3,#0x01000000,#_PAGE_NOCACHE030
-
-	jbra	L(mmu_init_done)
-
-L(spata68040):
-
-	mmu_map	#0xff000000,%d3,#0x01000000,#_PAGE_NOCACHE_S
-
-	jbra	L(mmu_init_done)
-
-L(mmu_init_not_atari):
-#endif
-
-#ifdef CONFIG_Q40
-	is_not_q40(L(notq40))
-	/*
-	 * add transparent mapping for 0xff00 0000 - 0xffff ffff
-	 * non-cached serialized etc..
-	 * this includes master chip, DAC, RTC and ISA ports
-	 * 0xfe000000-0xfeffffff is for screen and ROM
-	 */
-
-	putc    'Q'
-
-	mmu_map_tt	#0,#0xfe000000,#0x01000000,#_PAGE_CACHE040W
-	mmu_map_tt	#1,#0xff000000,#0x01000000,#_PAGE_NOCACHE_S
-
-	jbra	L(mmu_init_done)
-
-L(notq40):
-#endif
-
-#ifdef CONFIG_HP300
-	is_not_hp300(L(nothp300))
-
-	/* On the HP300, we map the ROM, INTIO and DIO regions (phys. 0x00xxxxxx)
-	 * by mapping 32MB (on 020/030) or 16 MB (on 040) from 0xf0xxxxxx -> 0x00xxxxxx).
-	 * The ROM mapping is needed because the LEDs are mapped there too.
-	 */
-
-	is_040(1f)
-
-	/*
-	 * 030: Map the 32Meg range physical 0x0 up to logical 0xf000.0000
-	 */
-	mmu_map	#0xf0000000,#0,#0x02000000,#_PAGE_NOCACHE030
-
-	jbra	L(mmu_init_done)
-
-1:
-	/*
-	 * 040: Map the 16Meg range physical 0x0 up to logical 0xf000.0000
-	 */
-	mmu_map #0xf0000000,#0,#0x01000000,#_PAGE_NOCACHE_S
-
-	jbra	L(mmu_init_done)
-
-L(nothp300):
-#endif /* CONFIG_HP300 */
-
-#ifdef CONFIG_MVME147
-
-	is_not_mvme147(L(not147))
-
-	/*
-	 * On MVME147 we have already created kernel page tables for
-	 * 4MB of RAM at address 0, so now need to do a transparent
-	 * mapping of the top of memory space.  Make it 0.5GByte for now,
-	 * so we can access on-board i/o areas.
-	 */
-
-	mmu_map_tt	#1,#0xe0000000,#0x20000000,#_PAGE_NOCACHE030
-
-	jbra	L(mmu_init_done)
-
-L(not147):
-#endif /* CONFIG_MVME147 */
-
-#ifdef CONFIG_MVME16x
-
-	is_not_mvme16x(L(not16x))
-
-	/*
-	 * On MVME16x we have already created kernel page tables for
-	 * 4MB of RAM at address 0, so now need to do a transparent
-	 * mapping of the top of memory space.  Make it 0.5GByte for now.
-	 * Supervisor only access, so transparent mapping doesn't
-	 * clash with User code virtual address space.
-	 * this covers IO devices, PROM and SRAM.  The PROM and SRAM
-	 * mapping is needed to allow 167Bug to run.
-	 * IO is in the range 0xfff00000 to 0xfffeffff.
-	 * PROM is 0xff800000->0xffbfffff and SRAM is
-	 * 0xffe00000->0xffe1ffff.
-	 */
-
-	mmu_map_tt	#1,#0xe0000000,#0x20000000,#_PAGE_NOCACHE_S
-
-	jbra	L(mmu_init_done)
-
-L(not16x):
-#endif	/* CONFIG_MVME162 | CONFIG_MVME167 */
-
-#ifdef CONFIG_BVME6000
-
-	is_not_bvme6000(L(not6000))
-
-	/*
-	 * On BVME6000 we have already created kernel page tables for
-	 * 4MB of RAM at address 0, so now need to do a transparent
-	 * mapping of the top of memory space.  Make it 0.5GByte for now,
-	 * so we can access on-board i/o areas.
-	 * Supervisor only access, so transparent mapping doesn't
-	 * clash with User code virtual address space.
-	 */
-
-	mmu_map_tt	#1,#0xe0000000,#0x20000000,#_PAGE_NOCACHE_S
-
-	jbra	L(mmu_init_done)
-
-L(not6000):
-#endif /* CONFIG_BVME6000 */
-
-/*
- * mmu_init_mac
- *
- * The Macintosh mappings are less clear.
- *
- * Even as of this writing, it is unclear how the
- * Macintosh mappings will be done.  However, as
- * the first author of this code I'm proposing the
- * following model:
- *
- * Map the kernel (that's already done),
- * Map the I/O (on most machines that's the
- * 0x5000.0000 ... 0x5300.0000 range,
- * Map the video frame buffer using as few pages
- * as absolutely (this requirement mostly stems from
- * the fact that when the frame buffer is at
- * 0x0000.0000 then we know there is valid RAM just
- * above the screen that we don't want to waste!).
- *
- * By the way, if the frame buffer is at 0x0000.0000
- * then the Macintosh is known as an RBV based Mac.
- *
- * By the way 2, the code currently maps in a bunch of
- * regions.  But I'd like to cut that out.  (And move most
- * of the mappings up into the kernel proper ... or only
- * map what's necessary.)
- */
-
-#ifdef CONFIG_MAC
-
-L(mmu_init_mac):
-
-	is_not_mac(L(mmu_init_not_mac))
-
-	putc	'F'
-
-	is_not_040_or_060(1f)
-
-	moveq	#_PAGE_NOCACHE_S,%d3
-	jbra	2f
-1:
-	moveq	#_PAGE_NOCACHE030,%d3
-2:
-	/*
-	 * Mac Note: screen address of logical 0xF000.0000 -> <screen physical>
-	 *	     we simply map the 4MB that contains the videomem
-	 */
-
-	movel	#VIDEOMEMMASK,%d0
-	andl	%pc@(L(mac_videobase)),%d0
-
-	mmu_map		#VIDEOMEMBASE,%d0,#VIDEOMEMSIZE,%d3
-	/* ROM from 4000 0000 to 4200 0000 (only for mac_reset()) */
-	mmu_map_eq	#0x40000000,#0x02000000,%d3
-	/* IO devices (incl. serial port) from 5000 0000 to 5300 0000 */
-	mmu_map_eq	#0x50000000,#0x03000000,%d3
-	/* Nubus slot space (video at 0xF0000000, rom at 0xF0F80000) */
-	mmu_map_tt	#1,#0xf8000000,#0x08000000,%d3
-
-	jbra	L(mmu_init_done)
-
-L(mmu_init_not_mac):
-#endif
-
-#ifdef CONFIG_SUN3X
-	is_not_sun3x(L(notsun3x))
-
-	/* oh, the pain..  We're gonna want the prom code after
-	 * starting the MMU, so we copy the mappings, translating
-	 * from 8k -> 4k pages as we go.
-	 */
-
-	/* copy maps from 0xfee00000 to 0xff000000 */
-	movel	#0xfee00000, %d0
-	moveq	#ROOT_INDEX_SHIFT, %d1
-	lsrl	%d1,%d0
-	mmu_get_root_table_entry	%d0
-
-	movel	#0xfee00000, %d0
-	moveq	#PTR_INDEX_SHIFT, %d1
-	lsrl	%d1,%d0
-	andl	#PTR_TABLE_SIZE-1, %d0
-	mmu_get_ptr_table_entry		%a0,%d0
-
-	movel	#0xfee00000, %d0
-	moveq	#PAGE_INDEX_SHIFT, %d1
-	lsrl	%d1,%d0
-	andl	#PAGE_TABLE_SIZE-1, %d0
-	mmu_get_page_table_entry	%a0,%d0
-
-	/* this is where the prom page table lives */
-	movel	0xfefe00d4, %a1
-	movel	%a1@, %a1
-
-	movel	#((0x200000 >> 13)-1), %d1
-
-1:
-	movel	%a1@+, %d3
-	movel	%d3,%a0@+
-	addl	#0x1000,%d3
-	movel	%d3,%a0@+
-
-	dbra	%d1,1b
-
-	/* setup tt1 for I/O */
-	mmu_map_tt	#1,#0x40000000,#0x40000000,#_PAGE_NOCACHE_S
-	jbra	L(mmu_init_done)
-
-L(notsun3x):
-#endif
-
-#ifdef CONFIG_APOLLO
-	is_not_apollo(L(notapollo))
-
-	putc	'P'
-	mmu_map         #0x80000000,#0,#0x02000000,#_PAGE_NOCACHE030
-
-L(notapollo):
-	jbra	L(mmu_init_done)
-#endif
-
-L(mmu_init_done):
-
-	putc	'G'
-	leds	0x8
-
-/*
- * mmu_fixup
- *
- * On the 040 class machines, all pages that are used for the
- * mmu have to be fixed up. According to Motorola, pages holding mmu
- * tables should be non-cacheable on a '040 and write-through on a
- * '060. But analysis of the reasons for this, and practical
- * experience, showed that write-through also works on a '040.
- *
- * Allocated memory so far goes from kernel_end to memory_start that
- * is used for all kind of tables, for that the cache attributes
- * are now fixed.
- */
-L(mmu_fixup):
-
-	is_not_040_or_060(L(mmu_fixup_done))
-
-#ifdef MMU_NOCACHE_KERNEL
-	jbra	L(mmu_fixup_done)
-#endif
-
-	/* first fix the page at the start of the kernel, that
-	 * contains also kernel_pg_dir.
-	 */
-	movel	%pc@(L(phys_kernel_start)),%d0
-	subl	#PAGE_OFFSET,%d0
-	lea	%pc@(_stext),%a0
-	subl	%d0,%a0
-	mmu_fixup_page_mmu_cache	%a0
-
-	movel	%pc@(L(kernel_end)),%a0
-	subl	%d0,%a0
-	movel	%pc@(L(memory_start)),%a1
-	subl	%d0,%a1
-	bra	2f
-1:
-	mmu_fixup_page_mmu_cache	%a0
-	addw	#PAGESIZE,%a0
-2:
-	cmpl	%a0,%a1
-	jgt	1b
-
-L(mmu_fixup_done):
-
-#ifdef MMU_PRINT
-	mmu_print
-#endif
-
-/*
- * mmu_engage
- *
- * This chunk of code performs the gruesome task of engaging the MMU.
- * The reason its gruesome is because when the MMU becomes engaged it
- * maps logical addresses to physical addresses.  The Program Counter
- * register is then passed through the MMU before the next instruction
- * is fetched (the instruction following the engage MMU instruction).
- * This may mean one of two things:
- * 1. The Program Counter falls within the logical address space of
- *    the kernel of which there are two sub-possibilities:
- *    A. The PC maps to the correct instruction (logical PC == physical
- *       code location), or
- *    B. The PC does not map through and the processor will read some
- *       data (or instruction) which is not the logically next instr.
- *    As you can imagine, A is good and B is bad.
- * Alternatively,
- * 2. The Program Counter does not map through the MMU.  The processor
- *    will take a Bus Error.
- * Clearly, 2 is bad.
- * It doesn't take a wiz kid to figure you want 1.A.
- * This code creates that possibility.
- * There are two possible 1.A. states (we now ignore the other above states):
- * A. The kernel is located at physical memory addressed the same as
- *    the logical memory for the kernel, i.e., 0x01000.
- * B. The kernel is located some where else.  e.g., 0x0400.0000
- *
- *    Under some conditions the Macintosh can look like A or B.
- * [A friend and I once noted that Apple hardware engineers should be
- * wacked twice each day: once when they show up at work (as in, Whack!,
- * "This is for the screwy hardware we know you're going to design today."),
- * and also at the end of the day (as in, Whack! "I don't know what
- * you designed today, but I'm sure it wasn't good."). -- rst]
- *
- * This code works on the following premise:
- * If the kernel start (%d5) is within the first 16 Meg of RAM,
- * then create a mapping for the kernel at logical 0x8000.0000 to
- * the physical location of the pc.  And, create a transparent
- * translation register for the first 16 Meg.  Then, after the MMU
- * is engaged, the PC can be moved up into the 0x8000.0000 range
- * and then the transparent translation can be turned off and then
- * the PC can jump to the correct logical location and it will be
- * home (finally).  This is essentially the code that the Amiga used
- * to use.  Now, it's generalized for all processors.  Which means
- * that a fresh (but temporary) mapping has to be created.  The mapping
- * is made in page 0 (an as of yet unused location -- except for the
- * stack!).  This temporary mapping will only require 1 pointer table
- * and a single page table (it can map 256K).
- *
- * OK, alternatively, imagine that the Program Counter is not within
- * the first 16 Meg.  Then, just use Transparent Translation registers
- * to do the right thing.
- *
- * Last, if _start is already at 0x01000, then there's nothing special
- * to do (in other words, in a degenerate case of the first case above,
- * do nothing).
- *
- * Let's do it.
- *
- *
- */
-
-	putc	'H'
-
-	mmu_engage
-
-/*
- * After this point no new memory is allocated and
- * the start of available memory is stored in availmem.
- * (The bootmem allocator requires now the physicall address.)
- */
-
-	movel	L(memory_start),availmem
-
-#ifdef CONFIG_AMIGA
-	is_not_amiga(1f)
-	/* fixup the Amiga custom register location before printing */
-	clrl	L(custom)
-1:
-#endif
-
-#ifdef CONFIG_ATARI
-	is_not_atari(1f)
-	/* fixup the Atari iobase register location before printing */
-	movel	#0xff000000,L(iobase)
-1:
-#endif
-
-#ifdef CONFIG_MAC
-	is_not_mac(1f)
-	movel	#~VIDEOMEMMASK,%d0
-	andl	L(mac_videobase),%d0
-	addl	#VIDEOMEMBASE,%d0
-	movel	%d0,L(mac_videobase)
-#ifdef CONSOLE_DEBUG
-	movel	%pc@(L(phys_kernel_start)),%d0
-	subl	#PAGE_OFFSET,%d0
-	subl	%d0,L(console_font)
-	subl	%d0,L(console_font_data)
-#endif
-	orl	#0x50000000,L(mac_sccbase)
-1:
-#endif
-
-#ifdef CONFIG_HP300
-	is_not_hp300(2f)
-	/*
-	 * Fix up the iobase register to point to the new location of the LEDs.
-	 */
-	movel	#0xf0000000,L(iobase)
-
-	/*
-	 * Energise the FPU and caches.
-	 */
-	is_040(1f)
-	movel	#0x60,0xf05f400c
-	jbra	2f
-
-	/*
-	 * 040: slightly different, apparently.
-	 */
-1:	movew	#0,0xf05f400e
-	movew	#0x64,0xf05f400e
-2:
-#endif
-
-#ifdef CONFIG_SUN3X
-	is_not_sun3x(1f)
-
-	/* enable copro */
-	oriw	#0x4000,0x61000000
-1:
-#endif
-
-#ifdef CONFIG_APOLLO
-	is_not_apollo(1f)
-
-	/*
-	 * Fix up the iobase before printing
-	 */
-	movel	#0x80000000,L(iobase)
-1:
-#endif
-
-	putc	'I'
-	leds	0x10
-
-/*
- * Enable caches
- */
-
-	is_not_040_or_060(L(cache_not_680460))
-
-L(cache680460):
-	.chip	68040
-	nop
-	cpusha	%bc
-	nop
-
-	is_060(L(cache68060))
-
-	movel	#CC6_ENABLE_D+CC6_ENABLE_I,%d0
-	/* MMU stuff works in copyback mode now, so enable the cache */
-	movec	%d0,%cacr
-	jra	L(cache_done)
-
-L(cache68060):
-	movel	#CC6_ENABLE_D+CC6_ENABLE_I+CC6_ENABLE_SB+CC6_PUSH_DPI+CC6_ENABLE_B+CC6_CLRA_B,%d0
-	/* MMU stuff works in copyback mode now, so enable the cache */
-	movec	%d0,%cacr
-	/* enable superscalar dispatch in PCR */
-	moveq	#1,%d0
-	.chip	68060
-	movec	%d0,%pcr
-
-	jbra	L(cache_done)
-L(cache_not_680460):
-L(cache68030):
-	.chip	68030
-	movel	#CC3_ENABLE_DB+CC3_CLR_D+CC3_ENABLE_D+CC3_ENABLE_IB+CC3_CLR_I+CC3_ENABLE_I,%d0
-	movec	%d0,%cacr
-
-	jra	L(cache_done)
-	.chip	68k
-L(cache_done):
-
-	putc	'J'
-
-/*
- * Setup initial stack pointer
- */
-	lea	init_task,%curptr
-	lea	init_thread_union+THREAD_SIZE,%sp
-
-	putc	'K'
-
-	subl	%a6,%a6		/* clear a6 for gdb */
-
-/*
- * The new 64bit printf support requires an early exception initialization.
- */
-	jbsr	base_trap_init
-
-/* jump to the kernel start */
-
-	putc	'\n'
-	leds	0x55
-
-	jbsr	start_kernel
-
-/*
- * Find a tag record in the bootinfo structure
- * The bootinfo structure is located right after the kernel
- * Returns: d0: size (-1 if not found)
- *          a0: data pointer (end-of-records if not found)
- */
-func_start	get_bi_record,%d1
-
-	movel	ARG1,%d0
-	lea	%pc@(_end),%a0
-1:	tstw	%a0@(BIR_TAG)
-	jeq	3f
-	cmpw	%a0@(BIR_TAG),%d0
-	jeq	2f
-	addw	%a0@(BIR_SIZE),%a0
-	jra	1b
-2:	moveq	#0,%d0
-	movew	%a0@(BIR_SIZE),%d0
-	lea	%a0@(BIR_DATA),%a0
-	jra	4f
-3:	moveq	#-1,%d0
-	lea	%a0@(BIR_SIZE),%a0
-4:
-func_return	get_bi_record
-
-
-/*
- *	MMU Initialization Begins Here
- *
- *	The structure of the MMU tables on the 68k machines
- *	is thus:
- *	Root Table
- *		Logical addresses are translated through
- *	a hierarchical translation mechanism where the high-order
- *	seven bits of the logical address (LA) are used as an
- *	index into the "root table."  Each entry in the root
- *	table has a bit which specifies if it's a valid pointer to a
- *	pointer table.  Each entry defines a 32KMeg range of memory.
- *	If an entry is invalid then that logical range of 32M is
- *	invalid and references to that range of memory (when the MMU
- *	is enabled) will fault.  If the entry is valid, then it does
- *	one of two things.  On 040/060 class machines, it points to
- *	a pointer table which then describes more finely the memory
- *	within that 32M range.  On 020/030 class machines, a technique
- *	called "early terminating descriptors" are used.  This technique
- *	allows an entire 32Meg to be described by a single entry in the
- *	root table.  Thus, this entry in the root table, contains the
- *	physical address of the memory or I/O at the logical address
- *	which the entry represents and it also contains the necessary
- *	cache bits for this region.
- *
- *	Pointer Tables
- *		Per the Root Table, there will be one or more
- *	pointer tables.  Each pointer table defines a 32M range.
- *	Not all of the 32M range need be defined.  Again, the next
- *	seven bits of the logical address are used an index into
- *	the pointer table to point to page tables (if the pointer
- *	is valid).  There will undoubtedly be more than one
- *	pointer table for the kernel because each pointer table
- *	defines a range of only 32M.  Valid pointer table entries
- *	point to page tables, or are early terminating entries
- *	themselves.
- *
- *	Page Tables
- *		Per the Pointer Tables, each page table entry points
- *	to the physical page in memory that supports the logical
- *	address that translates to the particular index.
- *
- *	In short, the Logical Address gets translated as follows:
- *		bits 31..26 - index into the Root Table
- *		bits 25..18 - index into the Pointer Table
- *		bits 17..12 - index into the Page Table
- *		bits 11..0  - offset into a particular 4K page
- *
- *	The algorithms which follows do one thing: they abstract
- *	the MMU hardware.  For example, there are three kinds of
- *	cache settings that are relevant.  Either, memory is
- *	being mapped in which case it is either Kernel Code (or
- *	the RamDisk) or it is MMU data.  On the 030, the MMU data
- *	option also describes the kernel.  Or, I/O is being mapped
- *	in which case it has its own kind of cache bits.  There
- *	are constants which abstract these notions from the code that
- *	actually makes the call to map some range of memory.
- *
- *
- *
- */
-
-#ifdef MMU_PRINT
-/*
- *	mmu_print
- *
- *	This algorithm will print out the current MMU mappings.
- *
- *	Input:
- *		%a5 points to the root table.  Everything else is calculated
- *			from this.
- */
-
-#define mmu_next_valid		0
-#define mmu_start_logical	4
-#define mmu_next_logical	8
-#define mmu_start_physical	12
-#define mmu_next_physical	16
-
-#define MMU_PRINT_INVALID		-1
-#define MMU_PRINT_VALID			1
-#define MMU_PRINT_UNINITED		0
-
-#define putZc(z,n)		jbne 1f; putc z; jbra 2f; 1: putc n; 2:
-
-func_start	mmu_print,%a0-%a6/%d0-%d7
-
-	movel	%pc@(L(kernel_pgdir_ptr)),%a5
-	lea	%pc@(L(mmu_print_data)),%a0
-	movel	#MMU_PRINT_UNINITED,%a0@(mmu_next_valid)
-
-	is_not_040_or_060(mmu_030_print)
-
-mmu_040_print:
-	puts	"\nMMU040\n"
-	puts	"rp:"
-	putn	%a5
-	putc	'\n'
-#if 0
-	/*
-	 * The following #if/#endif block is a tight algorithm for dumping the 040
-	 * MMU Map in gory detail.  It really isn't that practical unless the
-	 * MMU Map algorithm appears to go awry and you need to debug it at the
-	 * entry per entry level.
-	 */
-	movel	#ROOT_TABLE_SIZE,%d5
-#if 0
-	movel	%a5@+,%d7		| Burn an entry to skip the kernel mappings,
-	subql	#1,%d5			| they (might) work
-#endif
-1:	tstl	%d5
-	jbeq	mmu_print_done
-	subq	#1,%d5
-	movel	%a5@+,%d7
-	btst	#1,%d7
-	jbeq	1b
-
-2:	putn	%d7
-	andil	#0xFFFFFE00,%d7
-	movel	%d7,%a4
-	movel	#PTR_TABLE_SIZE,%d4
-	putc	' '
-3:	tstl	%d4
-	jbeq	11f
-	subq	#1,%d4
-	movel	%a4@+,%d7
-	btst	#1,%d7
-	jbeq	3b
-
-4:	putn	%d7
-	andil	#0xFFFFFF00,%d7
-	movel	%d7,%a3
-	movel	#PAGE_TABLE_SIZE,%d3
-5:	movel	#8,%d2
-6:	tstl	%d3
-	jbeq	31f
-	subq	#1,%d3
-	movel	%a3@+,%d6
-	btst	#0,%d6
-	jbeq	6b
-7:	tstl	%d2
-	jbeq	8f
-	subq	#1,%d2
-	putc	' '
-	jbra	91f
-8:	putc	'\n'
-	movel	#8+1+8+1+1,%d2
-9:	putc	' '
-	dbra	%d2,9b
-	movel	#7,%d2
-91:	putn	%d6
-	jbra	6b
-
-31:	putc	'\n'
-	movel	#8+1,%d2
-32:	putc	' '
-	dbra	%d2,32b
-	jbra	3b
-
-11:	putc	'\n'
-	jbra	1b
-#endif /* MMU 040 Dumping code that's gory and detailed */
-
-	lea	%pc@(kernel_pg_dir),%a5
-	movel	%a5,%a0			/* a0 has the address of the root table ptr */
-	movel	#0x00000000,%a4		/* logical address */
-	moveql	#0,%d0
-40:
-	/* Increment the logical address and preserve in d5 */
-	movel	%a4,%d5
-	addil	#PAGESIZE<<13,%d5
-	movel	%a0@+,%d6
-	btst	#1,%d6
-	jbne	41f
-	jbsr	mmu_print_tuple_invalidate
-	jbra	48f
-41:
-	movel	#0,%d1
-	andil	#0xfffffe00,%d6
-	movel	%d6,%a1
-42:
-	movel	%a4,%d5
-	addil	#PAGESIZE<<6,%d5
-	movel	%a1@+,%d6
-	btst	#1,%d6
-	jbne	43f
-	jbsr	mmu_print_tuple_invalidate
-	jbra	47f
-43:
-	movel	#0,%d2
-	andil	#0xffffff00,%d6
-	movel	%d6,%a2
-44:
-	movel	%a4,%d5
-	addil	#PAGESIZE,%d5
-	movel	%a2@+,%d6
-	btst	#0,%d6
-	jbne	45f
-	jbsr	mmu_print_tuple_invalidate
-	jbra	46f
-45:
-	moveml	%d0-%d1,%sp@-
-	movel	%a4,%d0
-	movel	%d6,%d1
-	andil	#0xfffff4e0,%d1
-	lea	%pc@(mmu_040_print_flags),%a6
-	jbsr	mmu_print_tuple
-	moveml	%sp@+,%d0-%d1
-46:
-	movel	%d5,%a4
-	addq	#1,%d2
-	cmpib	#64,%d2
-	jbne	44b
-47:
-	movel	%d5,%a4
-	addq	#1,%d1
-	cmpib	#128,%d1
-	jbne	42b
-48:
-	movel	%d5,%a4			/* move to the next logical address */
-	addq	#1,%d0
-	cmpib	#128,%d0
-	jbne	40b
-
-	.chip	68040
-	movec	%dtt1,%d0
-	movel	%d0,%d1
-	andiw	#0x8000,%d1		/* is it valid ? */
-	jbeq	1f			/* No, bail out */
-
-	movel	%d0,%d1
-	andil	#0xff000000,%d1		/* Get the address */
-	putn	%d1
-	puts	"=="
-	putn	%d1
-
-	movel	%d0,%d6
-	jbsr	mmu_040_print_flags_tt
-1:
-	movec	%dtt0,%d0
-	movel	%d0,%d1
-	andiw	#0x8000,%d1		/* is it valid ? */
-	jbeq	1f			/* No, bail out */
-
-	movel	%d0,%d1
-	andil	#0xff000000,%d1		/* Get the address */
-	putn	%d1
-	puts	"=="
-	putn	%d1
-
-	movel	%d0,%d6
-	jbsr	mmu_040_print_flags_tt
-1:
-	.chip	68k
-
-	jbra	mmu_print_done
-
-mmu_040_print_flags:
-	btstl	#10,%d6
-	putZc(' ','G')	/* global bit */
-	btstl	#7,%d6
-	putZc(' ','S')	/* supervisor bit */
-mmu_040_print_flags_tt:
-	btstl	#6,%d6
-	jbne	3f
-	putc	'C'
-	btstl	#5,%d6
-	putZc('w','c')	/* write through or copy-back */
-	jbra	4f
-3:
-	putc	'N'
-	btstl	#5,%d6
-	putZc('s',' ')	/* serialized non-cacheable, or non-cacheable */
-4:
-	rts
-
-mmu_030_print_flags:
-	btstl	#6,%d6
-	putZc('C','I')	/* write through or copy-back */
-	rts
-
-mmu_030_print:
-	puts	"\nMMU030\n"
-	puts	"\nrp:"
-	putn	%a5
-	putc	'\n'
-	movel	%a5,%d0
-	andil	#0xfffffff0,%d0
-	movel	%d0,%a0
-	movel	#0x00000000,%a4		/* logical address */
-	movel	#0,%d0
-30:
-	movel	%a4,%d5
-	addil	#PAGESIZE<<13,%d5
-	movel	%a0@+,%d6
-	btst	#1,%d6			/* is it a table ptr? */
-	jbne	31f			/* yes */
-	btst	#0,%d6			/* is it early terminating? */
-	jbeq	1f			/* no */
-	jbsr	mmu_030_print_helper
-	jbra	38f
-1:
-	jbsr	mmu_print_tuple_invalidate
-	jbra	38f
-31:
-	movel	#0,%d1
-	andil	#0xfffffff0,%d6
-	movel	%d6,%a1
-32:
-	movel	%a4,%d5
-	addil	#PAGESIZE<<6,%d5
-	movel	%a1@+,%d6
-	btst	#1,%d6			/* is it a table ptr? */
-	jbne	33f			/* yes */
-	btst	#0,%d6			/* is it a page descriptor? */
-	jbeq	1f			/* no */
-	jbsr	mmu_030_print_helper
-	jbra	37f
-1:
-	jbsr	mmu_print_tuple_invalidate
-	jbra	37f
-33:
-	movel	#0,%d2
-	andil	#0xfffffff0,%d6
-	movel	%d6,%a2
-34:
-	movel	%a4,%d5
-	addil	#PAGESIZE,%d5
-	movel	%a2@+,%d6
-	btst	#0,%d6
-	jbne	35f
-	jbsr	mmu_print_tuple_invalidate
-	jbra	36f
-35:
-	jbsr	mmu_030_print_helper
-36:
-	movel	%d5,%a4
-	addq	#1,%d2
-	cmpib	#64,%d2
-	jbne	34b
-37:
-	movel	%d5,%a4
-	addq	#1,%d1
-	cmpib	#128,%d1
-	jbne	32b
-38:
-	movel	%d5,%a4			/* move to the next logical address */
-	addq	#1,%d0
-	cmpib	#128,%d0
-	jbne	30b
-
-mmu_print_done:
-	puts	"\n"
-
-func_return	mmu_print
-
-
-mmu_030_print_helper:
-	moveml	%d0-%d1,%sp@-
-	movel	%a4,%d0
-	movel	%d6,%d1
-	lea	%pc@(mmu_030_print_flags),%a6
-	jbsr	mmu_print_tuple
-	moveml	%sp@+,%d0-%d1
-	rts
-
-mmu_print_tuple_invalidate:
-	moveml	%a0/%d7,%sp@-
-
-	lea	%pc@(L(mmu_print_data)),%a0
-	tstl	%a0@(mmu_next_valid)
-	jbmi	mmu_print_tuple_invalidate_exit
-
-	movel	#MMU_PRINT_INVALID,%a0@(mmu_next_valid)
-
-	putn	%a4
-
-	puts	"##\n"
-
-mmu_print_tuple_invalidate_exit:
-	moveml	%sp@+,%a0/%d7
-	rts
-
-
-mmu_print_tuple:
-	moveml	%d0-%d7/%a0,%sp@-
-
-	lea	%pc@(L(mmu_print_data)),%a0
-
-	tstl	%a0@(mmu_next_valid)
-	jble	mmu_print_tuple_print
-
-	cmpl	%a0@(mmu_next_physical),%d1
-	jbeq	mmu_print_tuple_increment
-
-mmu_print_tuple_print:
-	putn	%d0
-	puts	"->"
-	putn	%d1
-
-	movel	%d1,%d6
-	jbsr	%a6@
-
-mmu_print_tuple_record:
-	movel	#MMU_PRINT_VALID,%a0@(mmu_next_valid)
-
-	movel	%d1,%a0@(mmu_next_physical)
-
-mmu_print_tuple_increment:
-	movel	%d5,%d7
-	subl	%a4,%d7
-	addl	%d7,%a0@(mmu_next_physical)
-
-mmu_print_tuple_exit:
-	moveml	%sp@+,%d0-%d7/%a0
-	rts
-
-mmu_print_machine_cpu_types:
-	puts	"machine: "
-
-	is_not_amiga(1f)
-	puts	"amiga"
-	jbra	9f
-1:
-	is_not_atari(2f)
-	puts	"atari"
-	jbra	9f
-2:
-	is_not_mac(3f)
-	puts	"macintosh"
-	jbra	9f
-3:	puts	"unknown"
-9:	putc	'\n'
-
-	puts	"cputype: 0"
-	is_not_060(1f)
-	putc	'6'
-	jbra	9f
-1:
-	is_not_040_or_060(2f)
-	putc	'4'
-	jbra	9f
-2:	putc	'3'
-9:	putc	'0'
-	putc	'\n'
-
-	rts
-#endif /* MMU_PRINT */
-
-/*
- * mmu_map_tt
- *
- * This is a specific function which works on all 680x0 machines.
- * On 030, 040 & 060 it will attempt to use Transparent Translation
- * registers (tt1).
- * On 020 it will call the standard mmu_map which will use early
- * terminating descriptors.
- */
-func_start	mmu_map_tt,%d0/%d1/%a0,4
-
-	dputs	"mmu_map_tt:"
-	dputn	ARG1
-	dputn	ARG2
-	dputn	ARG3
-	dputn	ARG4
-	dputc	'\n'
-
-	is_020(L(do_map))
-
-	/* Extract the highest bit set
-	 */
-	bfffo	ARG3{#0,#32},%d1
-	cmpw	#8,%d1
-	jcc	L(do_map)
-
-	/* And get the mask
-	 */
-	moveq	#-1,%d0
-	lsrl	%d1,%d0
-	lsrl	#1,%d0
-
-	/* Mask the address
-	 */
-	movel	%d0,%d1
-	notl	%d1
-	andl	ARG2,%d1
-
-	/* Generate the upper 16bit of the tt register
-	 */
-	lsrl	#8,%d0
-	orl	%d0,%d1
-	clrw	%d1
-
-	is_040_or_060(L(mmu_map_tt_040))
-
-	/* set 030 specific bits (read/write access for supervisor mode
-	 * (highest function code set, lower two bits masked))
-	 */
-	orw	#TTR_ENABLE+TTR_RWM+TTR_FCB2+TTR_FCM1+TTR_FCM0,%d1
-	movel	ARG4,%d0
-	btst	#6,%d0
-	jeq	1f
-	orw	#TTR_CI,%d1
-
-1:	lea	STACK,%a0
-	dputn	%d1
-	movel	%d1,%a0@
-	.chip	68030
-	tstl	ARG1
-	jne	1f
-	pmove	%a0@,%tt0
-	jra	2f
-1:	pmove	%a0@,%tt1
-2:	.chip	68k
-	jra	L(mmu_map_tt_done)
-
-	/* set 040 specific bits
-	 */
-L(mmu_map_tt_040):
-	orw	#TTR_ENABLE+TTR_KERNELMODE,%d1
-	orl	ARG4,%d1
-	dputn	%d1
-
-	.chip	68040
-	tstl	ARG1
-	jne	1f
-	movec	%d1,%itt0
-	movec	%d1,%dtt0
-	jra	2f
-1:	movec	%d1,%itt1
-	movec	%d1,%dtt1
-2:	.chip	68k
-
-	jra	L(mmu_map_tt_done)
-
-L(do_map):
-	mmu_map_eq	ARG2,ARG3,ARG4
-
-L(mmu_map_tt_done):
-
-func_return	mmu_map_tt
-
-/*
- *	mmu_map
- *
- *	This routine will map a range of memory using a pointer
- *	table and allocating the pages on the fly from the kernel.
- *	The pointer table does not have to be already linked into
- *	the root table, this routine will do that if necessary.
- *
- *	NOTE
- *	This routine will assert failure and use the serial_putc
- *	routines in the case of a run-time error.  For example,
- *	if the address is already mapped.
- *
- *	NOTE-2
- *	This routine will use early terminating descriptors
- *	where possible for the 68020+68851 and 68030 type
- *	processors.
- */
-func_start	mmu_map,%d0-%d4/%a0-%a4
-
-	dputs	"\nmmu_map:"
-	dputn	ARG1
-	dputn	ARG2
-	dputn	ARG3
-	dputn	ARG4
-	dputc	'\n'
-
-	/* Get logical address and round it down to 256KB
-	 */
-	movel	ARG1,%d0
-	andl	#-(PAGESIZE*PAGE_TABLE_SIZE),%d0
-	movel	%d0,%a3
-
-	/* Get the end address
-	 */
-	movel	ARG1,%a4
-	addl	ARG3,%a4
-	subql	#1,%a4
-
-	/* Get physical address and round it down to 256KB
-	 */
-	movel	ARG2,%d0
-	andl	#-(PAGESIZE*PAGE_TABLE_SIZE),%d0
-	movel	%d0,%a2
-
-	/* Add page attributes to the physical address
-	 */
-	movel	ARG4,%d0
-	orw	#_PAGE_PRESENT+_PAGE_ACCESSED+_PAGE_DIRTY,%d0
-	addw	%d0,%a2
-
-	dputn	%a2
-	dputn	%a3
-	dputn	%a4
-
-	is_not_040_or_060(L(mmu_map_030))
-
-	addw	#_PAGE_GLOBAL040,%a2
-/*
- *	MMU 040 & 060 Support
- *
- *	The MMU usage for the 040 and 060 is different enough from
- *	the 030 and 68851 that there is separate code.  This comment
- *	block describes the data structures and algorithms built by
- *	this code.
- *
- *	The 040 does not support early terminating descriptors, as
- *	the 030 does.  Therefore, a third level of table is needed
- *	for the 040, and that would be the page table.  In Linux,
- *	page tables are allocated directly from the memory above the
- *	kernel.
- *
- */
-
-L(mmu_map_040):
-	/* Calculate the offset into the root table
-	 */
-	movel	%a3,%d0
-	moveq	#ROOT_INDEX_SHIFT,%d1
-	lsrl	%d1,%d0
-	mmu_get_root_table_entry	%d0
-
-	/* Calculate the offset into the pointer table
-	 */
-	movel	%a3,%d0
-	moveq	#PTR_INDEX_SHIFT,%d1
-	lsrl	%d1,%d0
-	andl	#PTR_TABLE_SIZE-1,%d0
-	mmu_get_ptr_table_entry		%a0,%d0
-
-	/* Calculate the offset into the page table
-	 */
-	movel	%a3,%d0
-	moveq	#PAGE_INDEX_SHIFT,%d1
-	lsrl	%d1,%d0
-	andl	#PAGE_TABLE_SIZE-1,%d0
-	mmu_get_page_table_entry	%a0,%d0
-
-	/* The page table entry must not no be busy
-	 */
-	tstl	%a0@
-	jne	L(mmu_map_error)
-
-	/* Do the mapping and advance the pointers
-	 */
-	movel	%a2,%a0@
-2:
-	addw	#PAGESIZE,%a2
-	addw	#PAGESIZE,%a3
-
-	/* Ready with mapping?
-	 */
-	lea	%a3@(-1),%a0
-	cmpl	%a0,%a4
-	jhi	L(mmu_map_040)
-	jra	L(mmu_map_done)
-
-L(mmu_map_030):
-	/* Calculate the offset into the root table
-	 */
-	movel	%a3,%d0
-	moveq	#ROOT_INDEX_SHIFT,%d1
-	lsrl	%d1,%d0
-	mmu_get_root_table_entry	%d0
-
-	/* Check if logical address 32MB aligned,
-	 * so we can try to map it once
-	 */
-	movel	%a3,%d0
-	andl	#(PTR_TABLE_SIZE*PAGE_TABLE_SIZE*PAGESIZE-1)&(-ROOT_TABLE_SIZE),%d0
-	jne	1f
-
-	/* Is there enough to map for 32MB at once
-	 */
-	lea	%a3@(PTR_TABLE_SIZE*PAGE_TABLE_SIZE*PAGESIZE-1),%a1
-	cmpl	%a1,%a4
-	jcs	1f
-
-	addql	#1,%a1
-
-	/* The root table entry must not no be busy
-	 */
-	tstl	%a0@
-	jne	L(mmu_map_error)
-
-	/* Do the mapping and advance the pointers
-	 */
-	dputs	"early term1"
-	dputn	%a2
-	dputn	%a3
-	dputn	%a1
-	dputc	'\n'
-	movel	%a2,%a0@
-
-	movel	%a1,%a3
-	lea	%a2@(PTR_TABLE_SIZE*PAGE_TABLE_SIZE*PAGESIZE),%a2
-	jra	L(mmu_mapnext_030)
-1:
-	/* Calculate the offset into the pointer table
-	 */
-	movel	%a3,%d0
-	moveq	#PTR_INDEX_SHIFT,%d1
-	lsrl	%d1,%d0
-	andl	#PTR_TABLE_SIZE-1,%d0
-	mmu_get_ptr_table_entry		%a0,%d0
-
-	/* The pointer table entry must not no be busy
-	 */
-	tstl	%a0@
-	jne	L(mmu_map_error)
-
-	/* Do the mapping and advance the pointers
-	 */
-	dputs	"early term2"
-	dputn	%a2
-	dputn	%a3
-	dputc	'\n'
-	movel	%a2,%a0@
-
-	addl	#PAGE_TABLE_SIZE*PAGESIZE,%a2
-	addl	#PAGE_TABLE_SIZE*PAGESIZE,%a3
-
-L(mmu_mapnext_030):
-	/* Ready with mapping?
-	 */
-	lea	%a3@(-1),%a0
-	cmpl	%a0,%a4
-	jhi	L(mmu_map_030)
-	jra	L(mmu_map_done)
-
-L(mmu_map_error):
-
-	dputs	"mmu_map error:"
-	dputn	%a2
-	dputn	%a3
-	dputc	'\n'
-
-L(mmu_map_done):
-
-func_return	mmu_map
-
-/*
- *	mmu_fixup
- *
- *	On the 040 class machines, all pages that are used for the
- *	mmu have to be fixed up.
- */
-
-func_start	mmu_fixup_page_mmu_cache,%d0/%a0
-
-	dputs	"mmu_fixup_page_mmu_cache"
-	dputn	ARG1
-
-	/* Calculate the offset into the root table
-	 */
-	movel	ARG1,%d0
-	moveq	#ROOT_INDEX_SHIFT,%d1
-	lsrl	%d1,%d0
-	mmu_get_root_table_entry	%d0
-
-	/* Calculate the offset into the pointer table
-	 */
-	movel	ARG1,%d0
-	moveq	#PTR_INDEX_SHIFT,%d1
-	lsrl	%d1,%d0
-	andl	#PTR_TABLE_SIZE-1,%d0
-	mmu_get_ptr_table_entry		%a0,%d0
-
-	/* Calculate the offset into the page table
-	 */
-	movel	ARG1,%d0
-	moveq	#PAGE_INDEX_SHIFT,%d1
-	lsrl	%d1,%d0
-	andl	#PAGE_TABLE_SIZE-1,%d0
-	mmu_get_page_table_entry	%a0,%d0
-
-	movel	%a0@,%d0
-	andil	#_CACHEMASK040,%d0
-	orl	%pc@(m68k_pgtable_cachemode),%d0
-	movel	%d0,%a0@
-
-	dputc	'\n'
-
-func_return	mmu_fixup_page_mmu_cache
-
-/*
- *	mmu_temp_map
- *
- *	create a temporary mapping to enable the mmu,
- *	this we don't need any transparation translation tricks.
- */
-
-func_start	mmu_temp_map,%d0/%d1/%a0/%a1
-
-	dputs	"mmu_temp_map"
-	dputn	ARG1
-	dputn	ARG2
-	dputc	'\n'
-
-	lea	%pc@(L(temp_mmap_mem)),%a1
-
-	/* Calculate the offset in the root table
-	 */
-	movel	ARG2,%d0
-	moveq	#ROOT_INDEX_SHIFT,%d1
-	lsrl	%d1,%d0
-	mmu_get_root_table_entry	%d0
-
-	/* Check if the table is temporary allocated, so we have to reuse it
-	 */
-	movel	%a0@,%d0
-	cmpl	%pc@(L(memory_start)),%d0
-	jcc	1f
-
-	/* Temporary allocate a ptr table and insert it into the root table
-	 */
-	movel	%a1@,%d0
-	addl	#PTR_TABLE_SIZE*4,%a1@
-	orw	#_PAGE_TABLE+_PAGE_ACCESSED,%d0
-	movel	%d0,%a0@
-	dputs	" (new)"
-1:
-	dputn	%d0
-	/* Mask the root table entry for the ptr table
-	 */
-	andw	#-ROOT_TABLE_SIZE,%d0
-	movel	%d0,%a0
-
-	/* Calculate the offset into the pointer table
-	 */
-	movel	ARG2,%d0
-	moveq	#PTR_INDEX_SHIFT,%d1
-	lsrl	%d1,%d0
-	andl	#PTR_TABLE_SIZE-1,%d0
-	lea	%a0@(%d0*4),%a0
-	dputn	%a0
-
-	/* Check if a temporary page table is already allocated
-	 */
-	movel	%a0@,%d0
-	jne	1f
-
-	/* Temporary allocate a page table and insert it into the ptr table
-	 */
-	movel	%a1@,%d0
-	/* The 512 should be PAGE_TABLE_SIZE*4, but that violates the
-	   alignment restriction for pointer tables on the '0[46]0.  */
-	addl	#512,%a1@
-	orw	#_PAGE_TABLE+_PAGE_ACCESSED,%d0
-	movel	%d0,%a0@
-	dputs	" (new)"
-1:
-	dputn	%d0
-	/* Mask the ptr table entry for the page table
-	 */
-	andw	#-PTR_TABLE_SIZE,%d0
-	movel	%d0,%a0
-
-	/* Calculate the offset into the page table
-	 */
-	movel	ARG2,%d0
-	moveq	#PAGE_INDEX_SHIFT,%d1
-	lsrl	%d1,%d0
-	andl	#PAGE_TABLE_SIZE-1,%d0
-	lea	%a0@(%d0*4),%a0
-	dputn	%a0
-
-	/* Insert the address into the page table
-	 */
-	movel	ARG1,%d0
-	andw	#-PAGESIZE,%d0
-	orw	#_PAGE_PRESENT+_PAGE_ACCESSED+_PAGE_DIRTY,%d0
-	movel	%d0,%a0@
-	dputn	%d0
-
-	dputc	'\n'
-
-func_return	mmu_temp_map
-
-func_start	mmu_engage,%d0-%d2/%a0-%a3
-
-	moveq	#ROOT_TABLE_SIZE-1,%d0
-	/* Temporarily use a different root table.  */
-	lea	%pc@(L(kernel_pgdir_ptr)),%a0
-	movel	%a0@,%a2
-	movel	%pc@(L(memory_start)),%a1
-	movel	%a1,%a0@
-	movel	%a2,%a0
-1:
-	movel	%a0@+,%a1@+
-	dbra	%d0,1b
-
-	lea	%pc@(L(temp_mmap_mem)),%a0
-	movel	%a1,%a0@
-
-	movew	#PAGESIZE-1,%d0
-1:
-	clrl	%a1@+
-	dbra	%d0,1b
-
-	lea	%pc@(1b),%a0
-	movel	#1b,%a1
-	/* Skip temp mappings if phys == virt */
-	cmpl	%a0,%a1
-	jeq	1f
-
-	mmu_temp_map	%a0,%a0
-	mmu_temp_map	%a0,%a1
-
-	addw	#PAGESIZE,%a0
-	addw	#PAGESIZE,%a1
-	mmu_temp_map	%a0,%a0
-	mmu_temp_map	%a0,%a1
-1:
-	movel	%pc@(L(memory_start)),%a3
-	movel	%pc@(L(phys_kernel_start)),%d2
-
-	is_not_040_or_060(L(mmu_engage_030))
-
-L(mmu_engage_040):
-	.chip	68040
-	nop
-	cinva	%bc
-	nop
-	pflusha
-	nop
-	movec	%a3,%srp
-	movel	#TC_ENABLE+TC_PAGE4K,%d0
-	movec	%d0,%tc		/* enable the MMU */
-	jmp	1f:l
-1:	nop
-	movec	%a2,%srp
-	nop
-	cinva	%bc
-	nop
-	pflusha
-	.chip	68k
-	jra	L(mmu_engage_cleanup)
-
-L(mmu_engage_030_temp):
-	.space	12
-L(mmu_engage_030):
-	.chip	68030
-	lea	%pc@(L(mmu_engage_030_temp)),%a0
-	movel	#0x80000002,%a0@
-	movel	%a3,%a0@(4)
-	movel	#0x0808,%d0
-	movec	%d0,%cacr
-	pmove	%a0@,%srp
-	pflusha
-	/*
-	 * enable,super root enable,4096 byte pages,7 bit root index,
-	 * 7 bit pointer index, 6 bit page table index.
-	 */
-	movel	#0x82c07760,%a0@(8)
-	pmove	%a0@(8),%tc	/* enable the MMU */
-	jmp	1f:l
-1:	movel	%a2,%a0@(4)
-	movel	#0x0808,%d0
-	movec	%d0,%cacr
-	pmove	%a0@,%srp
-	pflusha
-	.chip	68k
-
-L(mmu_engage_cleanup):
-	subl	#PAGE_OFFSET,%d2
-	subl	%d2,%a2
-	movel	%a2,L(kernel_pgdir_ptr)
-	subl	%d2,%fp
-	subl	%d2,%sp
-	subl	%d2,ARG0
-
-func_return	mmu_engage
-
-func_start	mmu_get_root_table_entry,%d0/%a1
-
-#if 0
-	dputs	"mmu_get_root_table_entry:"
-	dputn	ARG1
-	dputs	" ="
-#endif
-
-	movel	%pc@(L(kernel_pgdir_ptr)),%a0
-	tstl	%a0
-	jne	2f
-
-	dputs	"\nmmu_init:"
-
-	/* Find the start of free memory, get_bi_record does this for us,
-	 * as the bootinfo structure is located directly behind the kernel
-	 * and and we simply search for the last entry.
-	 */
-	get_bi_record	BI_LAST
-	addw	#PAGESIZE-1,%a0
-	movel	%a0,%d0
-	andw	#-PAGESIZE,%d0
-
-	dputn	%d0
-
-	lea	%pc@(L(memory_start)),%a0
-	movel	%d0,%a0@
-	lea	%pc@(L(kernel_end)),%a0
-	movel	%d0,%a0@
-
-	/* we have to return the first page at _stext since the init code
-	 * in mm/init.c simply expects kernel_pg_dir there, the rest of
-	 * page is used for further ptr tables in get_ptr_table.
-	 */
-	lea	%pc@(_stext),%a0
-	lea	%pc@(L(mmu_cached_pointer_tables)),%a1
-	movel	%a0,%a1@
-	addl	#ROOT_TABLE_SIZE*4,%a1@
-
-	lea	%pc@(L(mmu_num_pointer_tables)),%a1
-	addql	#1,%a1@
-
-	/* clear the page
-	 */
-	movel	%a0,%a1
-	movew	#PAGESIZE/4-1,%d0
-1:
-	clrl	%a1@+
-	dbra	%d0,1b
-
-	lea	%pc@(L(kernel_pgdir_ptr)),%a1
-	movel	%a0,%a1@
-
-	dputn	%a0
-	dputc	'\n'
-2:
-	movel	ARG1,%d0
-	lea	%a0@(%d0*4),%a0
-
-#if 0
-	dputn	%a0
-	dputc	'\n'
-#endif
-
-func_return	mmu_get_root_table_entry
-
-
-
-func_start	mmu_get_ptr_table_entry,%d0/%a1
-
-#if 0
-	dputs	"mmu_get_ptr_table_entry:"
-	dputn	ARG1
-	dputn	ARG2
-	dputs	" ="
-#endif
-
-	movel	ARG1,%a0
-	movel	%a0@,%d0
-	jne	2f
-
-	/* Keep track of the number of pointer tables we use
-	 */
-	dputs	"\nmmu_get_new_ptr_table:"
-	lea	%pc@(L(mmu_num_pointer_tables)),%a0
-	movel	%a0@,%d0
-	addql	#1,%a0@
-
-	/* See if there is a free pointer table in our cache of pointer tables
-	 */
-	lea	%pc@(L(mmu_cached_pointer_tables)),%a1
-	andw	#7,%d0
-	jne	1f
-
-	/* Get a new pointer table page from above the kernel memory
-	 */
-	get_new_page
-	movel	%a0,%a1@
-1:
-	/* There is an unused pointer table in our cache... use it
-	 */
-	movel	%a1@,%d0
-	addl	#PTR_TABLE_SIZE*4,%a1@
-
-	dputn	%d0
-	dputc	'\n'
-
-	/* Insert the new pointer table into the root table
-	 */
-	movel	ARG1,%a0
-	orw	#_PAGE_TABLE+_PAGE_ACCESSED,%d0
-	movel	%d0,%a0@
-2:
-	/* Extract the pointer table entry
-	 */
-	andw	#-PTR_TABLE_SIZE,%d0
-	movel	%d0,%a0
-	movel	ARG2,%d0
-	lea	%a0@(%d0*4),%a0
-
-#if 0
-	dputn	%a0
-	dputc	'\n'
-#endif
-
-func_return	mmu_get_ptr_table_entry
-
-
-func_start	mmu_get_page_table_entry,%d0/%a1
-
-#if 0
-	dputs	"mmu_get_page_table_entry:"
-	dputn	ARG1
-	dputn	ARG2
-	dputs	" ="
-#endif
-
-	movel	ARG1,%a0
-	movel	%a0@,%d0
-	jne	2f
-
-	/* If the page table entry doesn't exist, we allocate a complete new
-	 * page and use it as one continues big page table which can cover
-	 * 4MB of memory, nearly almost all mappings have that alignment.
-	 */
-	get_new_page
-	addw	#_PAGE_TABLE+_PAGE_ACCESSED,%a0
-
-	/* align pointer table entry for a page of page tables
-	 */
-	movel	ARG1,%d0
-	andw	#-(PAGESIZE/PAGE_TABLE_SIZE),%d0
-	movel	%d0,%a1
-
-	/* Insert the page tables into the pointer entries
-	 */
-	moveq	#PAGESIZE/PAGE_TABLE_SIZE/4-1,%d0
-1:
-	movel	%a0,%a1@+
-	lea	%a0@(PAGE_TABLE_SIZE*4),%a0
-	dbra	%d0,1b
-
-	/* Now we can get the initialized pointer table entry
-	 */
-	movel	ARG1,%a0
-	movel	%a0@,%d0
-2:
-	/* Extract the page table entry
-	 */
-	andw	#-PAGE_TABLE_SIZE,%d0
-	movel	%d0,%a0
-	movel	ARG2,%d0
-	lea	%a0@(%d0*4),%a0
-
-#if 0
-	dputn	%a0
-	dputc	'\n'
-#endif
-
-func_return	mmu_get_page_table_entry
-
-/*
- *	get_new_page
- *
- *	Return a new page from the memory start and clear it.
- */
-func_start	get_new_page,%d0/%a1
-
-	dputs	"\nget_new_page:"
-
-	/* allocate the page and adjust memory_start
-	 */
-	lea	%pc@(L(memory_start)),%a0
-	movel	%a0@,%a1
-	addl	#PAGESIZE,%a0@
-
-	/* clear the new page
-	 */
-	movel	%a1,%a0
-	movew	#PAGESIZE/4-1,%d0
-1:
-	clrl	%a1@+
-	dbra	%d0,1b
-
-	dputn	%a0
-	dputc	'\n'
-
-func_return	get_new_page
-
-
-
-/*
- * Debug output support
- * Atarians have a choice between the parallel port, the serial port
- * from the MFP or a serial port of the SCC
- */
-
-#ifdef CONFIG_MAC
-/* You may define either or both of these. */
-#define MAC_USE_SCC_A /* Modem port */
-#define MAC_USE_SCC_B /* Printer port */
-
-#if defined(MAC_USE_SCC_A) || defined(MAC_USE_SCC_B)
-/* Initialisation table for SCC with 3.6864 MHz PCLK */
-L(scc_initable_mac):
-	.byte	4,0x44		/* x16, 1 stopbit, no parity */
-	.byte	3,0xc0		/* receiver: 8 bpc */
-	.byte	5,0xe2		/* transmitter: 8 bpc, assert dtr/rts */
-	.byte	10,0		/* NRZ */
-	.byte	11,0x50		/* use baud rate generator */
-	.byte	12,1,13,0	/* 38400 baud */
-	.byte	14,1		/* Baud rate generator enable */
-	.byte	3,0xc1		/* enable receiver */
-	.byte	5,0xea		/* enable transmitter */
-	.byte	-1
-	.even
-#endif
-#endif /* CONFIG_MAC */
-
-#ifdef CONFIG_ATARI
-/* #define USE_PRINTER */
-/* #define USE_SCC_B */
-/* #define USE_SCC_A */
-#define USE_MFP
-
-#if defined(USE_SCC_A) || defined(USE_SCC_B)
-/* Initialisation table for SCC with 7.9872 MHz PCLK */
-/* PCLK == 8.0539 gives baud == 9680.1 */
-L(scc_initable_atari):
-	.byte	4,0x44		/* x16, 1 stopbit, no parity */
-	.byte	3,0xc0		/* receiver: 8 bpc */
-	.byte	5,0xe2		/* transmitter: 8 bpc, assert dtr/rts */
-	.byte	10,0		/* NRZ */
-	.byte	11,0x50		/* use baud rate generator */
-	.byte	12,24,13,0	/* 9600 baud */
-	.byte	14,2,14,3	/* use master clock for BRG, enable */
-	.byte	3,0xc1		/* enable receiver */
-	.byte	5,0xea		/* enable transmitter */
-	.byte	-1
-	.even
-#endif
-
-#ifdef USE_PRINTER
-
-LPSG_SELECT	= 0xff8800
-LPSG_READ	= 0xff8800
-LPSG_WRITE	= 0xff8802
-LPSG_IO_A	= 14
-LPSG_IO_B	= 15
-LPSG_CONTROL	= 7
-LSTMFP_GPIP	= 0xfffa01
-LSTMFP_DDR	= 0xfffa05
-LSTMFP_IERB	= 0xfffa09
-
-#elif defined(USE_SCC_B)
-
-LSCC_CTRL	= 0xff8c85
-LSCC_DATA	= 0xff8c87
-
-#elif defined(USE_SCC_A)
-
-LSCC_CTRL	= 0xff8c81
-LSCC_DATA	= 0xff8c83
-
-#elif defined(USE_MFP)
-
-LMFP_UCR     = 0xfffa29
-LMFP_TDCDR   = 0xfffa1d
-LMFP_TDDR    = 0xfffa25
-LMFP_TSR     = 0xfffa2d
-LMFP_UDR     = 0xfffa2f
-
-#endif
-#endif	/* CONFIG_ATARI */
-
-/*
- * Serial port output support.
- */
-
-/*
- * Initialize serial port hardware
- */
-func_start	serial_init,%d0/%d1/%a0/%a1
-	/*
-	 *	Some of the register usage that follows
-	 *	CONFIG_AMIGA
-	 *		a0 = pointer to boot info record
-	 *		d0 = boot info offset
-	 *	CONFIG_ATARI
-	 *		a0 = address of SCC
-	 *		a1 = Liobase address/address of scc_initable_atari
-	 *		d0 = init data for serial port
-	 *	CONFIG_MAC
-	 *		a0 = address of SCC
-	 *		a1 = address of scc_initable_mac
-	 *		d0 = init data for serial port
-	 */
-
-#ifdef CONFIG_AMIGA
-#define SERIAL_DTR	7
-#define SERIAL_CNTRL	CIABBASE+C_PRA
-
-	is_not_amiga(1f)
-	lea	%pc@(L(custom)),%a0
-	movel	#-ZTWOBASE,%a0@
-	bclr	#SERIAL_DTR,SERIAL_CNTRL-ZTWOBASE
-	get_bi_record	BI_AMIGA_SERPER
-	movew	%a0@,CUSTOMBASE+C_SERPER-ZTWOBASE
-|	movew	#61,CUSTOMBASE+C_SERPER-ZTWOBASE
-1:
-#endif
-
-#ifdef CONFIG_ATARI
-	is_not_atari(4f)
-	movel	%pc@(L(iobase)),%a1
-#if defined(USE_PRINTER)
-	bclr	#0,%a1@(LSTMFP_IERB)
-	bclr	#0,%a1@(LSTMFP_DDR)
-	moveb	#LPSG_CONTROL,%a1@(LPSG_SELECT)
-	moveb	#0xff,%a1@(LPSG_WRITE)
-	moveb	#LPSG_IO_B,%a1@(LPSG_SELECT)
-	clrb	%a1@(LPSG_WRITE)
-	moveb	#LPSG_IO_A,%a1@(LPSG_SELECT)
-	moveb	%a1@(LPSG_READ),%d0
-	bset	#5,%d0
-	moveb	%d0,%a1@(LPSG_WRITE)
-#elif defined(USE_SCC_A) || defined(USE_SCC_B)
-	lea	%a1@(LSCC_CTRL),%a0
-	/* Reset SCC register pointer */
-	moveb	%a0@,%d0
-	/* Reset SCC device: write register pointer then register value */
-	moveb	#9,%a0@
-	moveb	#0xc0,%a0@
-	/* Wait for 5 PCLK cycles, which is about 63 CPU cycles */
-	/* 5 / 7.9872 MHz = approx. 0.63 us = 63 / 100 MHz */
-	movel	#32,%d0
-2:
-	subq	#1,%d0
-	jne	2b
-	/* Initialize channel */
-	lea	%pc@(L(scc_initable_atari)),%a1
-2:	moveb	%a1@+,%d0
-	jmi	3f
-	moveb	%d0,%a0@
-	moveb	%a1@+,%a0@
-	jra	2b
-3:	clrb	%a0@
-#elif defined(USE_MFP)
-	bclr	#1,%a1@(LMFP_TSR)
-	moveb   #0x88,%a1@(LMFP_UCR)
-	andb	#0x70,%a1@(LMFP_TDCDR)
-	moveb   #2,%a1@(LMFP_TDDR)
-	orb	#1,%a1@(LMFP_TDCDR)
-	bset	#1,%a1@(LMFP_TSR)
-#endif
-	jra	L(serial_init_done)
-4:
-#endif
-
-#ifdef CONFIG_MAC
-	is_not_mac(L(serial_init_not_mac))
-#if defined(MAC_USE_SCC_A) || defined(MAC_USE_SCC_B)
-#define mac_scc_cha_b_ctrl_offset	0x0
-#define mac_scc_cha_a_ctrl_offset	0x2
-#define mac_scc_cha_b_data_offset	0x4
-#define mac_scc_cha_a_data_offset	0x6
-	movel	%pc@(L(mac_sccbase)),%a0
-	/* Reset SCC register pointer */
-	moveb	%a0@(mac_scc_cha_a_ctrl_offset),%d0
-	/* Reset SCC device: write register pointer then register value */
-	moveb	#9,%a0@(mac_scc_cha_a_ctrl_offset)
-	moveb	#0xc0,%a0@(mac_scc_cha_a_ctrl_offset)
-	/* Wait for 5 PCLK cycles, which is about 68 CPU cycles */
-	/* 5 / 3.6864 MHz = approx. 1.36 us = 68 / 50 MHz */
-	movel	#35,%d0
-5:
-	subq	#1,%d0
-	jne	5b
-#endif
-#ifdef MAC_USE_SCC_A
-	/* Initialize channel A */
-	lea	%pc@(L(scc_initable_mac)),%a1
-5:	moveb	%a1@+,%d0
-	jmi	6f
-	moveb	%d0,%a0@(mac_scc_cha_a_ctrl_offset)
-	moveb	%a1@+,%a0@(mac_scc_cha_a_ctrl_offset)
-	jra	5b
-6:
-#endif	/* MAC_USE_SCC_A */
-#ifdef MAC_USE_SCC_B
-	/* Initialize channel B */
-	lea	%pc@(L(scc_initable_mac)),%a1
-7:	moveb	%a1@+,%d0
-	jmi	8f
-	moveb	%d0,%a0@(mac_scc_cha_b_ctrl_offset)
-	moveb	%a1@+,%a0@(mac_scc_cha_b_ctrl_offset)
-	jra	7b
-8:
-#endif	/* MAC_USE_SCC_B */
-	jra	L(serial_init_done)
-L(serial_init_not_mac):
-#endif	/* CONFIG_MAC */
-
-#ifdef CONFIG_Q40
-	is_not_q40(2f)
-/* debug output goes into SRAM, so we don't do it unless requested
-   - check for '%LX$' signature in SRAM   */
-	lea	%pc@(q40_mem_cptr),%a1
-	move.l	#0xff020010,%a1@  /* must be inited - also used by debug=mem */
-	move.l	#0xff020000,%a1
-	cmp.b	#'%',%a1@
-	bne	2f	/*nodbg*/
-	addq.w	#4,%a1
-	cmp.b	#'L',%a1@
-	bne	2f	/*nodbg*/
-	addq.w	#4,%a1
-	cmp.b	#'X',%a1@
-	bne	2f	/*nodbg*/
-	addq.w	#4,%a1
-	cmp.b	#'$',%a1@
-	bne	2f	/*nodbg*/
-	/* signature OK */
-	lea	%pc@(L(q40_do_debug)),%a1
-	tas	%a1@
-/*nodbg: q40_do_debug is 0 by default*/
-2:
-#endif
-
-#ifdef CONFIG_MVME16x
-	is_not_mvme16x(L(serial_init_not_mvme16x))
-	moveb	#0x10,M167_PCSCCMICR
-	moveb	#0x10,M167_PCSCCTICR
-	moveb	#0x10,M167_PCSCCRICR
-	jra	L(serial_init_done)
-L(serial_init_not_mvme16x):
-#endif
-
-#ifdef CONFIG_APOLLO
-/* We count on the PROM initializing SIO1 */
-#endif
-
-#ifdef CONFIG_HP300
-/* We count on the boot loader initialising the UART */
-#endif
-
-L(serial_init_done):
-func_return	serial_init
-
-/*
- * Output character on serial port.
- */
-func_start	serial_putc,%d0/%d1/%a0/%a1
-
-	movel	ARG1,%d0
-	cmpib	#'\n',%d0
-	jbne	1f
-
-	/* A little safe recursion is good for the soul */
-	serial_putc	#'\r'
-1:
-
-#ifdef CONFIG_AMIGA
-	is_not_amiga(2f)
-	andw	#0x00ff,%d0
-	oriw	#0x0100,%d0
-	movel	%pc@(L(custom)),%a0
-	movew	%d0,%a0@(CUSTOMBASE+C_SERDAT)
-1:	movew	%a0@(CUSTOMBASE+C_SERDATR),%d0
-	andw	#0x2000,%d0
-	jeq	1b
-	jra	L(serial_putc_done)
-2:
-#endif
-
-#ifdef CONFIG_MAC
-	is_not_mac(5f)
-#if defined(MAC_USE_SCC_A) || defined(MAC_USE_SCC_B)
-	movel	%pc@(L(mac_sccbase)),%a1
-#endif
-#ifdef MAC_USE_SCC_A
-3:	btst	#2,%a1@(mac_scc_cha_a_ctrl_offset)
-	jeq	3b
-	moveb	%d0,%a1@(mac_scc_cha_a_data_offset)
-#endif	/* MAC_USE_SCC_A */
-#ifdef MAC_USE_SCC_B
-4:	btst	#2,%a1@(mac_scc_cha_b_ctrl_offset)
-	jeq	4b
-	moveb	%d0,%a1@(mac_scc_cha_b_data_offset)
-#endif	/* MAC_USE_SCC_B */
-	jra	L(serial_putc_done)
-5:
-#endif	/* CONFIG_MAC */
-
-#ifdef CONFIG_ATARI
-	is_not_atari(4f)
-	movel	%pc@(L(iobase)),%a1
-#if defined(USE_PRINTER)
-3:	btst	#0,%a1@(LSTMFP_GPIP)
-	jne	3b
-	moveb	#LPSG_IO_B,%a1@(LPSG_SELECT)
-	moveb	%d0,%a1@(LPSG_WRITE)
-	moveb	#LPSG_IO_A,%a1@(LPSG_SELECT)
-	moveb	%a1@(LPSG_READ),%d0
-	bclr	#5,%d0
-	moveb	%d0,%a1@(LPSG_WRITE)
-	nop
-	nop
-	bset	#5,%d0
-	moveb	%d0,%a1@(LPSG_WRITE)
-#elif defined(USE_SCC_A) || defined(USE_SCC_B)
-3:	btst	#2,%a1@(LSCC_CTRL)
-	jeq	3b
-	moveb	%d0,%a1@(LSCC_DATA)
-#elif defined(USE_MFP)
-3:	btst	#7,%a1@(LMFP_TSR)
-	jeq	3b
-	moveb	%d0,%a1@(LMFP_UDR)
-#endif
-	jra	L(serial_putc_done)
-4:
-#endif	/* CONFIG_ATARI */
-
-#ifdef CONFIG_MVME147
-	is_not_mvme147(2f)
-1:	btst	#2,M147_SCC_CTRL_A
-	jeq	1b
-	moveb	%d0,M147_SCC_DATA_A
-	jbra	L(serial_putc_done)
-2:
-#endif
-
-#ifdef CONFIG_MVME16x
-	is_not_mvme16x(2f)
-	/*
-	 * If the loader gave us a board type then we can use that to
-	 * select an appropriate output routine; otherwise we just use
-	 * the Bug code.  If we have to use the Bug that means the Bug
-	 * workspace has to be valid, which means the Bug has to use
-	 * the SRAM, which is non-standard.
-	 */
-	moveml	%d0-%d7/%a2-%a6,%sp@-
-	movel	vme_brdtype,%d1
-	jeq	1f			| No tag - use the Bug
-	cmpi	#VME_TYPE_MVME162,%d1
-	jeq	6f
-	cmpi	#VME_TYPE_MVME172,%d1
-	jne	5f
-	/* 162/172; it's an SCC */
-6:	btst	#2,M162_SCC_CTRL_A
-	nop
-	nop
-	nop
-	jeq	6b
-	moveb	#8,M162_SCC_CTRL_A
-	nop
-	nop
-	nop
-	moveb	%d0,M162_SCC_CTRL_A
-	jra	3f
-5:
-	/* 166/167/177; it's a CD2401 */
-	moveb	#0,M167_CYCAR
-	moveb	M167_CYIER,%d2
-	moveb	#0x02,M167_CYIER
-7:
-	btst	#5,M167_PCSCCTICR
-	jeq	7b
-	moveb	M167_PCTPIACKR,%d1
-	moveb	M167_CYLICR,%d1
-	jeq	8f
-	moveb	#0x08,M167_CYTEOIR
-	jra	7b
-8:
-	moveb	%d0,M167_CYTDR
-	moveb	#0,M167_CYTEOIR
-	moveb	%d2,M167_CYIER
-	jra	3f
-1:
-	moveb	%d0,%sp@-
-	trap	#15
-	.word	0x0020	/* TRAP 0x020 */
-3:
-	moveml	%sp@+,%d0-%d7/%a2-%a6
-	jbra	L(serial_putc_done)
-2:
-#endif /* CONFIG_MVME16x */
-
-#ifdef CONFIG_BVME6000
-	is_not_bvme6000(2f)
-	/*
-	 * The BVME6000 machine has a serial port ...
-	 */
-1:	btst	#2,BVME_SCC_CTRL_A
-	jeq	1b
-	moveb	%d0,BVME_SCC_DATA_A
-	jbra	L(serial_putc_done)
-2:
-#endif
-
-#ifdef CONFIG_SUN3X
-	is_not_sun3x(2f)
-	movel	%d0,-(%sp)
-	movel	0xFEFE0018,%a1
-	jbsr	(%a1)
-	addq	#4,%sp
-	jbra	L(serial_putc_done)
-2:
-#endif
-
-#ifdef CONFIG_Q40
-	is_not_q40(2f)
-	tst.l	%pc@(L(q40_do_debug))	/* only debug if requested */
-	beq	2f
-	lea	%pc@(q40_mem_cptr),%a1
-	move.l	%a1@,%a0
-	move.b	%d0,%a0@
-	addq.l	#4,%a0
-	move.l	%a0,%a1@
-	jbra    L(serial_putc_done)
-2:
-#endif
-
-#ifdef CONFIG_APOLLO
-	is_not_apollo(2f)
-	movl    %pc@(L(iobase)),%a1
-	moveb	%d0,%a1@(LTHRB0)
-1:      moveb   %a1@(LSRB0),%d0
-	andb	#0x4,%d0
-	beq	1b
-	jbra	L(serial_putc_done)
-2:
-#endif
-
-#ifdef CONFIG_HP300
-	is_not_hp300(3f)
-	movl    %pc@(L(iobase)),%a1
-	addl	%pc@(L(uartbase)),%a1
-	movel	%pc@(L(uart_scode)),%d1	/* Check the scode */
-	jmi	3f			/* Unset? Exit */
-	cmpi	#256,%d1		/* APCI scode? */
-	jeq	2f
-1:      moveb   %a1@(DCALSR),%d1	/* Output to DCA */
-	andb	#0x20,%d1
-	beq	1b
-	moveb	%d0,%a1@(DCADATA)
-	jbra	L(serial_putc_done)
-2:	moveb	%a1@(APCILSR),%d1	/* Output to APCI */
-	andb	#0x20,%d1
-	beq	2b
-	moveb	%d0,%a1@(APCIDATA)
-	jbra	L(serial_putc_done)
-3:
-#endif
-
-L(serial_putc_done):
-func_return	serial_putc
-
-/*
- * Output a string.
- */
-func_start	puts,%d0/%a0
-
-	movel	ARG1,%a0
-	jra	2f
-1:
-#ifdef CONSOLE_DEBUG
-	console_putc	%d0
-#endif
-#ifdef SERIAL_DEBUG
-	serial_putc	%d0
-#endif
-2:	moveb	%a0@+,%d0
-	jne	1b
-
-func_return	puts
-
-/*
- * Output number in hex notation.
- */
-
-func_start	putn,%d0-%d2
-
-	putc	' '
-
-	movel	ARG1,%d0
-	moveq	#7,%d1
-1:	roll	#4,%d0
-	move	%d0,%d2
-	andb	#0x0f,%d2
-	addb	#'0',%d2
-	cmpb	#'9',%d2
-	jls	2f
-	addb	#'A'-('9'+1),%d2
-2:
-#ifdef CONSOLE_DEBUG
-	console_putc	%d2
-#endif
-#ifdef SERIAL_DEBUG
-	serial_putc	%d2
-#endif
-	dbra	%d1,1b
-
-func_return	putn
-
-#ifdef CONFIG_EARLY_PRINTK
-/*
- *	This routine takes its parameters on the stack.  It then
- *	turns around and calls the internal routines.  This routine
- *	is used by the boot console.
- *
- *	The calling parameters are:
- *		void debug_cons_nputs(const char *str, unsigned length)
- *
- *	This routine does NOT understand variable arguments only
- *	simple strings!
- */
-ENTRY(debug_cons_nputs)
-	moveml	%d0/%d1/%a0,%sp@-
-	movew	%sr,%sp@-
-	ori	#0x0700,%sr
-	movel	%sp@(18),%a0		/* fetch parameter */
-	movel	%sp@(22),%d1		/* fetch parameter */
-	jra	2f
-1:
-#ifdef CONSOLE_DEBUG
-	console_putc	%d0
-#endif
-#ifdef SERIAL_DEBUG
-	serial_putc	%d0
-#endif
-	subq	#1,%d1
-2:	jeq	3f
-	moveb	%a0@+,%d0
-	jne	1b
-3:
-	movew	%sp@+,%sr
-	moveml	%sp@+,%d0/%d1/%a0
-	rts
-#endif /* CONFIG_EARLY_PRINTK */
-
-#if defined(CONFIG_HP300) || defined(CONFIG_APOLLO)
-func_start	set_leds,%d0/%a0
-	movel	ARG1,%d0
-#ifdef CONFIG_HP300
-	is_not_hp300(1f)
-	movel	%pc@(L(iobase)),%a0
-	moveb	%d0,%a0@(0x1ffff)
-	jra	2f
-#endif
-1:
-#ifdef CONFIG_APOLLO
-	movel   %pc@(L(iobase)),%a0
-	lsll    #8,%d0
-	eorw    #0xff00,%d0
-	moveb	%d0,%a0@(LCPUCTRL)
-#endif
-2:
-func_return	set_leds
-#endif
-
-#ifdef CONSOLE_DEBUG
-/*
- *	For continuity, see the data alignment
- *	to which this structure is tied.
- */
-#define Lconsole_struct_cur_column	0
-#define Lconsole_struct_cur_row		4
-#define Lconsole_struct_num_columns	8
-#define Lconsole_struct_num_rows	12
-#define Lconsole_struct_left_edge	16
-
-func_start	console_init,%a0-%a4/%d0-%d7
-	/*
-	 *	Some of the register usage that follows
-	 *		a0 = pointer to boot_info
-	 *		a1 = pointer to screen
-	 *		a2 = pointer to console_globals
-	 *		d3 = pixel width of screen
-	 *		d4 = pixel height of screen
-	 *		(d3,d4) ~= (x,y) of a point just below
-	 *			and to the right of the screen
-	 *			NOT on the screen!
-	 *		d5 = number of bytes per scan line
-	 *		d6 = number of bytes on the entire screen
-	 */
-
-	lea	%pc@(L(console_globals)),%a2
-	movel	%pc@(L(mac_videobase)),%a1
-	movel	%pc@(L(mac_rowbytes)),%d5
-	movel	%pc@(L(mac_dimensions)),%d3	/* -> low byte */
-	movel	%d3,%d4
-	swap	%d4		/* -> high byte */
-	andl	#0xffff,%d3	/* d3 = screen width in pixels */
-	andl	#0xffff,%d4	/* d4 = screen height in pixels */
-
-	movel	%d5,%d6
-|	subl	#20,%d6
-	mulul	%d4,%d6		/* scan line bytes x num scan lines */
-	divul	#8,%d6		/* we'll clear 8 bytes at a time */
-	moveq	#-1,%d0		/* Mac_black */
-	subq	#1,%d6
-
-L(console_clear_loop):
-	movel	%d0,%a1@+
-	movel	%d0,%a1@+
-	dbra	%d6,L(console_clear_loop)
-
-	/* Calculate font size */
-
-#if   defined(FONT_8x8) && defined(CONFIG_FONT_8x8)
-	lea	%pc@(font_vga_8x8),%a0
-#elif defined(FONT_8x16) && defined(CONFIG_FONT_8x16)
-	lea	%pc@(font_vga_8x16),%a0
-#elif defined(FONT_6x11) && defined(CONFIG_FONT_6x11)
-	lea	%pc@(font_vga_6x11),%a0
-#elif defined(CONFIG_FONT_8x8) /* default */
-	lea	%pc@(font_vga_8x8),%a0
-#else /* no compiled-in font */
-	lea	0,%a0
-#endif
-
-	/*
-	 *	At this point we make a shift in register usage
-	 *	a1 = address of console_font pointer
-	 */
-	lea	%pc@(L(console_font)),%a1
-	movel	%a0,%a1@	/* store pointer to struct fbcon_font_desc in console_font */
-	tstl	%a0
-	jeq	1f
-	lea	%pc@(L(console_font_data)),%a4
-	movel	%a0@(FONT_DESC_DATA),%d0
-	subl	#L(console_font),%a1
-	addl	%a1,%d0
-	movel	%d0,%a4@
-
-	/*
-	 *	Calculate global maxs
-	 *	Note - we can use either an
-	 *	8 x 16 or 8 x 8 character font
-	 *	6 x 11 also supported
-	 */
-		/* ASSERT: a0 = contents of Lconsole_font */
-	movel	%d3,%d0				/* screen width in pixels */
-	divul	%a0@(FONT_DESC_WIDTH),%d0	/* d0 = max num chars per row */
-
-	movel	%d4,%d1				/* screen height in pixels */
-	divul	%a0@(FONT_DESC_HEIGHT),%d1	/* d1 = max num rows */
-
-	movel	%d0,%a2@(Lconsole_struct_num_columns)
-	movel	%d1,%a2@(Lconsole_struct_num_rows)
-
-	/*
-	 *	Clear the current row and column
-	 */
-	clrl	%a2@(Lconsole_struct_cur_column)
-	clrl	%a2@(Lconsole_struct_cur_row)
-	clrl	%a2@(Lconsole_struct_left_edge)
-
-	/*
-	 * Initialization is complete
-	 */
-1:
-func_return	console_init
-
-#ifdef CONFIG_LOGO
-func_start	console_put_penguin,%a0-%a1/%d0-%d7
-	/*
-	 *	Get 'that_penguin' onto the screen in the upper right corner
-	 *	penguin is 64 x 74 pixels, align against right edge of screen
-	 */
-	lea	%pc@(L(mac_dimensions)),%a0
-	movel	%a0@,%d0
-	andil	#0xffff,%d0
-	subil	#64,%d0		/* snug up against the right edge */
-	clrl	%d1		/* start at the top */
-	movel	#73,%d7
-	lea	%pc@(L(that_penguin)),%a1
-L(console_penguin_row):
-	movel	#31,%d6
-L(console_penguin_pixel_pair):
-	moveb	%a1@,%d2
-	lsrb	#4,%d2
-	console_plot_pixel %d0,%d1,%d2
-	addq	#1,%d0
-	moveb	%a1@+,%d2
-	console_plot_pixel %d0,%d1,%d2
-	addq	#1,%d0
-	dbra	%d6,L(console_penguin_pixel_pair)
-
-	subil	#64,%d0
-	addq	#1,%d1
-	dbra	%d7,L(console_penguin_row)
-
-func_return	console_put_penguin
-
-/* include penguin bitmap */
-L(that_penguin):
-#include "../mac/mac_penguin.S"
-#endif
-
-	/*
-	 * Calculate source and destination addresses
-	 *	output	a1 = dest
-	 *		a2 = source
-	 */
-
-func_start	console_scroll,%a0-%a4/%d0-%d7
-	lea	%pc@(L(mac_videobase)),%a0
-	movel	%a0@,%a1
-	movel	%a1,%a2
-	lea	%pc@(L(mac_rowbytes)),%a0
-	movel	%a0@,%d5
-	movel	%pc@(L(console_font)),%a0
-	tstl	%a0
-	jeq	1f
-	mulul	%a0@(FONT_DESC_HEIGHT),%d5	/* account for # scan lines per character */
-	addal	%d5,%a2
-
-	/*
-	 * Get dimensions
-	 */
-	lea	%pc@(L(mac_dimensions)),%a0
-	movel	%a0@,%d3
-	movel	%d3,%d4
-	swap	%d4
-	andl	#0xffff,%d3	/* d3 = screen width in pixels */
-	andl	#0xffff,%d4	/* d4 = screen height in pixels */
-
-	/*
-	 * Calculate number of bytes to move
-	 */
-	lea	%pc@(L(mac_rowbytes)),%a0
-	movel	%a0@,%d6
-	movel	%pc@(L(console_font)),%a0
-	subl	%a0@(FONT_DESC_HEIGHT),%d4	/* we're not scrolling the top row! */
-	mulul	%d4,%d6		/* scan line bytes x num scan lines */
-	divul	#32,%d6		/* we'll move 8 longs at a time */
-	subq	#1,%d6
-
-L(console_scroll_loop):
-	movel	%a2@+,%a1@+
-	movel	%a2@+,%a1@+
-	movel	%a2@+,%a1@+
-	movel	%a2@+,%a1@+
-	movel	%a2@+,%a1@+
-	movel	%a2@+,%a1@+
-	movel	%a2@+,%a1@+
-	movel	%a2@+,%a1@+
-	dbra	%d6,L(console_scroll_loop)
-
-	lea	%pc@(L(mac_rowbytes)),%a0
-	movel	%a0@,%d6
-	movel	%pc@(L(console_font)),%a0
-	mulul	%a0@(FONT_DESC_HEIGHT),%d6	/* scan line bytes x font height */
-	divul	#32,%d6			/* we'll move 8 words at a time */
-	subq	#1,%d6
-
-	moveq	#-1,%d0
-L(console_scroll_clear_loop):
-	movel	%d0,%a1@+
-	movel	%d0,%a1@+
-	movel	%d0,%a1@+
-	movel	%d0,%a1@+
-	movel	%d0,%a1@+
-	movel	%d0,%a1@+
-	movel	%d0,%a1@+
-	movel	%d0,%a1@+
-	dbra	%d6,L(console_scroll_clear_loop)
-
-1:
-func_return	console_scroll
-
-
-func_start	console_putc,%a0/%a1/%d0-%d7
-
-	is_not_mac(L(console_exit))
-	tstl	%pc@(L(console_font))
-	jeq	L(console_exit)
-
-	/* Output character in d7 on console.
-	 */
-	movel	ARG1,%d7
-	cmpib	#'\n',%d7
-	jbne	1f
-
-	/* A little safe recursion is good for the soul */
-	console_putc	#'\r'
-1:
-	lea	%pc@(L(console_globals)),%a0
-
-	cmpib	#10,%d7
-	jne	L(console_not_lf)
-	movel	%a0@(Lconsole_struct_cur_row),%d0
-	addil	#1,%d0
-	movel	%d0,%a0@(Lconsole_struct_cur_row)
-	movel	%a0@(Lconsole_struct_num_rows),%d1
-	cmpl	%d1,%d0
-	jcs	1f
-	subil	#1,%d0
-	movel	%d0,%a0@(Lconsole_struct_cur_row)
-	console_scroll
-1:
-	jra	L(console_exit)
-
-L(console_not_lf):
-	cmpib	#13,%d7
-	jne	L(console_not_cr)
-	clrl	%a0@(Lconsole_struct_cur_column)
-	jra	L(console_exit)
-
-L(console_not_cr):
-	cmpib	#1,%d7
-	jne	L(console_not_home)
-	clrl	%a0@(Lconsole_struct_cur_row)
-	clrl	%a0@(Lconsole_struct_cur_column)
-	jra	L(console_exit)
-
-/*
- *	At this point we know that the %d7 character is going to be
- *	rendered on the screen.  Register usage is -
- *		a0 = pointer to console globals
- *		a1 = font data
- *		d0 = cursor column
- *		d1 = cursor row to draw the character
- *		d7 = character number
- */
-L(console_not_home):
-	movel	%a0@(Lconsole_struct_cur_column),%d0
-	addql	#1,%a0@(Lconsole_struct_cur_column)
-	movel	%a0@(Lconsole_struct_num_columns),%d1
-	cmpl	%d1,%d0
-	jcs	1f
-	console_putc	#'\n'	/* recursion is OK! */
-1:
-	movel	%a0@(Lconsole_struct_cur_row),%d1
-
-	/*
-	 *	At this point we make a shift in register usage
-	 *	a0 = address of pointer to font data (fbcon_font_desc)
-	 */
-	movel	%pc@(L(console_font)),%a0
-	movel	%pc@(L(console_font_data)),%a1	/* Load fbcon_font_desc.data into a1 */
-	andl	#0x000000ff,%d7
-		/* ASSERT: a0 = contents of Lconsole_font */
-	mulul	%a0@(FONT_DESC_HEIGHT),%d7	/* d7 = index into font data */
-	addl	%d7,%a1			/* a1 = points to char image */
-
-	/*
-	 *	At this point we make a shift in register usage
-	 *	d0 = pixel coordinate, x
-	 *	d1 = pixel coordinate, y
-	 *	d2 = (bit 0) 1/0 for white/black (!) pixel on screen
-	 *	d3 = font scan line data (8 pixels)
-	 *	d6 = count down for the font's pixel width (8)
-	 *	d7 = count down for the font's pixel count in height
-	 */
-		/* ASSERT: a0 = contents of Lconsole_font */
-	mulul	%a0@(FONT_DESC_WIDTH),%d0
-	mulul	%a0@(FONT_DESC_HEIGHT),%d1
-	movel	%a0@(FONT_DESC_HEIGHT),%d7	/* Load fbcon_font_desc.height into d7 */
-	subq	#1,%d7
-L(console_read_char_scanline):
-	moveb	%a1@+,%d3
-
-		/* ASSERT: a0 = contents of Lconsole_font */
-	movel	%a0@(FONT_DESC_WIDTH),%d6	/* Load fbcon_font_desc.width into d6 */
-	subql	#1,%d6
-
-L(console_do_font_scanline):
-	lslb	#1,%d3
-	scsb	%d2		/* convert 1 bit into a byte */
-	console_plot_pixel %d0,%d1,%d2
-	addq	#1,%d0
-	dbra	%d6,L(console_do_font_scanline)
-
-		/* ASSERT: a0 = contents of Lconsole_font */
-	subl	%a0@(FONT_DESC_WIDTH),%d0
-	addq	#1,%d1
-	dbra	%d7,L(console_read_char_scanline)
-
-L(console_exit):
-func_return	console_putc
-
-	/*
-	 *	Input:
-	 *		d0 = x coordinate
-	 *		d1 = y coordinate
-	 *		d2 = (bit 0) 1/0 for white/black (!)
-	 *	All registers are preserved
-	 */
-func_start	console_plot_pixel,%a0-%a1/%d0-%d4
-
-	movel	%pc@(L(mac_videobase)),%a1
-	movel	%pc@(L(mac_videodepth)),%d3
-	movel	ARG1,%d0
-	movel	ARG2,%d1
-	mulul	%pc@(L(mac_rowbytes)),%d1
-	movel	ARG3,%d2
-
-	/*
-	 *	Register usage:
-	 *		d0 = x coord becomes byte offset into frame buffer
-	 *		d1 = y coord
-	 *		d2 = black or white (0/1)
-	 *		d3 = video depth
-	 *		d4 = temp of x (d0) for many bit depths
-	 */
-L(test_1bit):
-	cmpb	#1,%d3
-	jbne	L(test_2bit)
-	movel	%d0,%d4		/* we need the low order 3 bits! */
-	divul	#8,%d0
-	addal	%d0,%a1
-	addal	%d1,%a1
-	andb	#7,%d4
-	eorb	#7,%d4		/* reverse the x-coordinate w/ screen-bit # */
-	andb	#1,%d2
-	jbne	L(white_1)
-	bsetb	%d4,%a1@
-	jbra	L(console_plot_pixel_exit)
-L(white_1):
-	bclrb	%d4,%a1@
-	jbra	L(console_plot_pixel_exit)
-
-L(test_2bit):
-	cmpb	#2,%d3
-	jbne	L(test_4bit)
-	movel	%d0,%d4		/* we need the low order 2 bits! */
-	divul	#4,%d0
-	addal	%d0,%a1
-	addal	%d1,%a1
-	andb	#3,%d4
-	eorb	#3,%d4		/* reverse the x-coordinate w/ screen-bit # */
-	lsll	#1,%d4		/* ! */
-	andb	#1,%d2
-	jbne	L(white_2)
-	bsetb	%d4,%a1@
-	addq	#1,%d4
-	bsetb	%d4,%a1@
-	jbra	L(console_plot_pixel_exit)
-L(white_2):
-	bclrb	%d4,%a1@
-	addq	#1,%d4
-	bclrb	%d4,%a1@
-	jbra	L(console_plot_pixel_exit)
-
-L(test_4bit):
-	cmpb	#4,%d3
-	jbne	L(test_8bit)
-	movel	%d0,%d4		/* we need the low order bit! */
-	divul	#2,%d0
-	addal	%d0,%a1
-	addal	%d1,%a1
-	andb	#1,%d4
-	eorb	#1,%d4
-	lsll	#2,%d4		/* ! */
-	andb	#1,%d2
-	jbne	L(white_4)
-	bsetb	%d4,%a1@
-	addq	#1,%d4
-	bsetb	%d4,%a1@
-	addq	#1,%d4
-	bsetb	%d4,%a1@
-	addq	#1,%d4
-	bsetb	%d4,%a1@
-	jbra	L(console_plot_pixel_exit)
-L(white_4):
-	bclrb	%d4,%a1@
-	addq	#1,%d4
-	bclrb	%d4,%a1@
-	addq	#1,%d4
-	bclrb	%d4,%a1@
-	addq	#1,%d4
-	bclrb	%d4,%a1@
-	jbra	L(console_plot_pixel_exit)
-
-L(test_8bit):
-	cmpb	#8,%d3
-	jbne	L(test_16bit)
-	addal	%d0,%a1
-	addal	%d1,%a1
-	andb	#1,%d2
-	jbne	L(white_8)
-	moveb	#0xff,%a1@
-	jbra	L(console_plot_pixel_exit)
-L(white_8):
-	clrb	%a1@
-	jbra	L(console_plot_pixel_exit)
-
-L(test_16bit):
-	cmpb	#16,%d3
-	jbne	L(console_plot_pixel_exit)
-	addal	%d0,%a1
-	addal	%d0,%a1
-	addal	%d1,%a1
-	andb	#1,%d2
-	jbne	L(white_16)
-	clrw	%a1@
-	jbra	L(console_plot_pixel_exit)
-L(white_16):
-	movew	#0x0fff,%a1@
-	jbra	L(console_plot_pixel_exit)
-
-L(console_plot_pixel_exit):
-func_return	console_plot_pixel
-#endif /* CONSOLE_DEBUG */
-
-
-__INITDATA
-	.align	4
-
-m68k_init_mapped_size:
-	.long	0
-
-#if defined(CONFIG_ATARI) || defined(CONFIG_AMIGA) || \
-    defined(CONFIG_HP300) || defined(CONFIG_APOLLO)
-L(custom):
-L(iobase):
-	.long 0
-#endif
-
-#ifdef CONSOLE_DEBUG
-L(console_globals):
-	.long	0		/* cursor column */
-	.long	0		/* cursor row */
-	.long	0		/* max num columns */
-	.long	0		/* max num rows */
-	.long	0		/* left edge */
-L(console_font):
-	.long	0		/* pointer to console font (struct font_desc) */
-L(console_font_data):
-	.long	0		/* pointer to console font data */
-#endif /* CONSOLE_DEBUG */
-
-#if defined(MMU_PRINT)
-L(mmu_print_data):
-	.long	0		/* valid flag */
-	.long	0		/* start logical */
-	.long	0		/* next logical */
-	.long	0		/* start physical */
-	.long	0		/* next physical */
-#endif /* MMU_PRINT */
-
-L(cputype):
-	.long	0
-L(mmu_cached_pointer_tables):
-	.long	0
-L(mmu_num_pointer_tables):
-	.long	0
-L(phys_kernel_start):
-	.long	0
-L(kernel_end):
-	.long	0
-L(memory_start):
-	.long	0
-L(kernel_pgdir_ptr):
-	.long	0
-L(temp_mmap_mem):
-	.long	0
-
-#if defined (CONFIG_MVME147)
-M147_SCC_CTRL_A = 0xfffe3002
-M147_SCC_DATA_A = 0xfffe3003
-#endif
-
-#if defined (CONFIG_MVME16x)
-M162_SCC_CTRL_A = 0xfff45005
-M167_CYCAR = 0xfff450ee
-M167_CYIER = 0xfff45011
-M167_CYLICR = 0xfff45026
-M167_CYTEOIR = 0xfff45085
-M167_CYTDR = 0xfff450f8
-M167_PCSCCMICR = 0xfff4201d
-M167_PCSCCTICR = 0xfff4201e
-M167_PCSCCRICR = 0xfff4201f
-M167_PCTPIACKR = 0xfff42025
-#endif
-
-#if defined (CONFIG_BVME6000)
-BVME_SCC_CTRL_A	= 0xffb0000b
-BVME_SCC_DATA_A	= 0xffb0000f
-#endif
-
-#if defined(CONFIG_MAC)
-L(mac_videobase):
-	.long	0
-L(mac_videodepth):
-	.long	0
-L(mac_dimensions):
-	.long	0
-L(mac_rowbytes):
-	.long	0
-L(mac_sccbase):
-	.long	0
-#endif /* CONFIG_MAC */
-
-#if defined (CONFIG_APOLLO)
-LSRB0        = 0x10412
-LTHRB0       = 0x10416
-LCPUCTRL     = 0x10100
-#endif
-
-#if defined(CONFIG_HP300)
-DCADATA	     = 0x11
-DCALSR	     = 0x1b
-APCIDATA     = 0x00
-APCILSR      = 0x14
-L(uartbase):
-	.long	0
-L(uart_scode):
-	.long	-1
-#endif
-
-__FINIT
-	.data
-	.align	4
-
-availmem:
-	.long	0
-m68k_pgtable_cachemode:
-	.long	0
-m68k_supervisor_cachemode:
-	.long	0
-#if defined(CONFIG_MVME16x)
-mvme_bdid:
-	.long	0,0,0,0,0,0,0,0
-#endif
-#if defined(CONFIG_Q40)
-q40_mem_cptr:
-	.long	0
-L(q40_do_debug):
-	.long	0
-#endif
diff --git a/arch/m68k/kernel/relocate_kernel.S b/arch/m68k/kernel/relocate_kernel.S
deleted file mode 100644
index ab0f1e7d465353b87ffb243cf798a6d04bff4b19..0000000000000000000000000000000000000000
--- a/arch/m68k/kernel/relocate_kernel.S
+++ /dev/null
@@ -1,160 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-
-#include <asm/asm-offsets.h>
-#include <asm/page.h>
-#include <asm/setup.h>
-
-
-#define MMU_BASE	8		/* MMU flags base in cpu_mmu_flags */
-
-.text
-
-ENTRY(relocate_new_kernel)
-	movel %sp@(4),%a0		/* a0 = ptr */
-	movel %sp@(8),%a1		/* a1 = start */
-	movel %sp@(12),%d1		/* d1 = cpu_mmu_flags */
-	movew #PAGE_MASK,%d2		/* d2 = PAGE_MASK */
-
-	/* Disable MMU */
-
-	btst #MMU_BASE + MMUB_68851,%d1
-	jeq 3f
-
-1:	/* 68851 or 68030 */
-
-	lea %pc@(.Lcopy),%a4
-2:	addl #0x00000000,%a4		/* virt_to_phys() */
-
-	.section ".m68k_fixup","aw"
-	.long M68K_FIXUP_MEMOFFSET, 2b+2
-	.previous
-
-	.chip 68030
-	pmove %tc,%d0			/* Disable MMU */
-	bclr #7,%d0
-	pmove %d0,%tc
-	jmp %a4@			/* Jump to physical .Lcopy */
-	.chip 68k
-
-3:
-	btst #MMU_BASE + MMUB_68030,%d1
-	jne 1b
-
-	btst #MMU_BASE + MMUB_68040,%d1
-	jeq 6f
-
-4:	/* 68040 or 68060 */
-
-	lea %pc@(.Lcont040),%a4
-5:	addl #0x00000000,%a4		/* virt_to_phys() */
-
-	.section ".m68k_fixup","aw"
-	.long M68K_FIXUP_MEMOFFSET, 5b+2
-	.previous
-
-	movel %a4,%d0
-	andl #0xff000000,%d0
-	orw #0xe020,%d0			/* Map 16 MiB, enable, cacheable */
-	.chip 68040
-	movec %d0,%itt0
-	movec %d0,%dtt0
-	.chip 68k
-	jmp %a4@			/* Jump to physical .Lcont040 */
-
-.Lcont040:
-	moveq #0,%d0
-	.chip 68040
-	movec %d0,%tc			/* Disable MMU */
-	movec %d0,%itt0
-	movec %d0,%itt1
-	movec %d0,%dtt0
-	movec %d0,%dtt1
-	.chip 68k
-	jra .Lcopy
-
-6:
-	btst #MMU_BASE + MMUB_68060,%d1
-	jne 4b
-
-.Lcopy:
-	movel %a0@+,%d0			/* d0 = entry = *ptr */
-	jeq .Lflush
-
-	btst #2,%d0			/* entry & IND_DONE? */
-	jne .Lflush
-
-	btst #1,%d0			/* entry & IND_INDIRECTION? */
-	jeq 1f
-	andw %d2,%d0
-	movel %d0,%a0			/* ptr = entry & PAGE_MASK */
-	jra .Lcopy
-
-1:
-	btst #0,%d0			/* entry & IND_DESTINATION? */
-	jeq 2f
-	andw %d2,%d0
-	movel %d0,%a2			/* a2 = dst = entry & PAGE_MASK */
-	jra .Lcopy
-
-2:
-	btst #3,%d0			/* entry & IND_SOURCE? */
-	jeq .Lcopy
-
-	andw %d2,%d0
-	movel %d0,%a3			/* a3 = src = entry & PAGE_MASK */
-	movew #PAGE_SIZE/32 - 1,%d0	/* d0 = PAGE_SIZE/32 - 1 */
-3:
-	movel %a3@+,%a2@+		/* *dst++ = *src++ */
-	movel %a3@+,%a2@+		/* *dst++ = *src++ */
-	movel %a3@+,%a2@+		/* *dst++ = *src++ */
-	movel %a3@+,%a2@+		/* *dst++ = *src++ */
-	movel %a3@+,%a2@+		/* *dst++ = *src++ */
-	movel %a3@+,%a2@+		/* *dst++ = *src++ */
-	movel %a3@+,%a2@+		/* *dst++ = *src++ */
-	movel %a3@+,%a2@+		/* *dst++ = *src++ */
-	dbf %d0, 3b
-	jra .Lcopy
-
-.Lflush:
-	/* Flush all caches */
-
-	btst #CPUB_68020,%d1
-	jeq 2f
-
-1:	/* 68020 or 68030 */
-	.chip 68030
-	movec %cacr,%d0
-	orw #0x808,%d0
-	movec %d0,%cacr
-	.chip 68k
-	jra .Lreincarnate
-
-2:
-	btst #CPUB_68030,%d1
-	jne 1b
-
-	btst #CPUB_68040,%d1
-	jeq 4f
-
-3:	/* 68040 or 68060 */
-	.chip 68040
-	nop
-	cpusha %bc
-	nop
-	cinva %bc
-	nop
-	.chip 68k
-	jra .Lreincarnate
-
-4:
-	btst #CPUB_68060,%d1
-	jne 3b
-
-.Lreincarnate:
-	jmp %a1@
-
-relocate_new_kernel_end:
-
-ENTRY(relocate_new_kernel_size)
-	.long relocate_new_kernel_end - relocate_new_kernel
diff --git a/arch/m68k/kernel/sun3-head.S b/arch/m68k/kernel/sun3-head.S
deleted file mode 100644
index d19a94754d563369c60dc4bb5153f56cc4813eec..0000000000000000000000000000000000000000
--- a/arch/m68k/kernel/sun3-head.S
+++ /dev/null
@@ -1,90 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <linux/init.h>
-
-#include <asm/entry.h>
-#include <asm/page.h>
-#include <asm/contregs.h>
-#include <asm/sun3-head.h>
-
-PSL_HIGHIPL     = 0x2700
-NBSG            = 0x20000
-ICACHE_ONLY	= 0x00000009
-CACHES_OFF	= 0x00000008	| actually a clear and disable --m
-#define MAS_STACK INT_STACK
-ROOT_TABLE_SIZE = 128
-PAGESIZE	= 8192
-SUN3_INVALID_PMEG = 255
-.globl bootup_user_stack
-.globl bootup_kernel_stack
-.globl pg0
-.globl swapper_pg_dir
-.globl kernel_pmd_table
-.globl availmem
-.global m68k_pgtable_cachemode
-.global kpt
-| todo: all these should be in bss!
-swapper_pg_dir:                .skip 0x2000
-pg0:                           .skip 0x2000
-kernel_pmd_table:              .skip 0x2000
-
-.globl kernel_pg_dir
-.equ    kernel_pg_dir,kernel_pmd_table
-
-	__HEAD
-ENTRY(_stext)
-ENTRY(_start)
-
-/* Firstly, disable interrupts and set up function codes. */
-	movew	#PSL_HIGHIPL, %sr
-	moveq	#FC_CONTROL, %d0
-	movec	%d0, %sfc
-	movec	%d0, %dfc
-
-/* Make sure we're in context zero. */
-	moveq	#0, %d0
-	movsb	%d0, AC_CONTEXT
-
-/* map everything the bootloader left us into high memory, clean up the
-   excess later */
-	lea	(AC_SEGMAP+0),%a0
-	lea	(AC_SEGMAP+KERNBASE),%a1
-1:
-	movsb	%a0@, %d1
-	movsb	%d1, %a1@
-	cmpib	#SUN3_INVALID_PMEG, %d1
-	beq	2f
-	addl	#NBSG,%a0
-	addl	#NBSG,%a1
-	jmp	1b
-
-2:
-
-/* Disable caches and jump to high code. */
-	moveq	#ICACHE_ONLY,%d0	| Cache disabled until we're ready to enable it
-	movc	%d0, %cacr	|   is this the right value? (yes --m)
-	jmp	1f:l
-
-/* Following code executes at high addresses (0xE000xxx). */
-1:	lea	init_task,%curptr			| get initial thread...
-	lea	init_thread_union+THREAD_SIZE,%sp	| ...and its stack.
-
-/* Point MSP at an invalid page to trap if it's used. --m */
-	movl	#(PAGESIZE),%d0
-	movc	%d0,%msp
-	moveq	#-1,%d0
-	movsb	%d0,(AC_SEGMAP+0x0)
-
-	jbsr	sun3_init
-
-	jbsr	base_trap_init
-
-        jbsr    start_kernel
-	trap	#15
-
-        .data
-        .even
-kpt:
-        .long 0
-availmem:
-        .long 0
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
deleted file mode 100644
index d329cc7b481cd1a4b3a72f05b766f4f684ab443e..0000000000000000000000000000000000000000
--- a/arch/m68k/kernel/syscalltable.S
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  Copyright (C) 2002, Greg Ungerer (gerg@snapgear.com)
- *
- *  Based on older entry.S files, the following copyrights apply:
- *
- *  Copyright (C) 1998  D. Jeff Dionne <jeff@lineo.ca>,
- *                      Kenneth Albanowski <kjahds@kjahds.com>,
- *  Copyright (C) 2000  Lineo Inc. (www.lineo.com) 
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- *  Linux/m68k support by Hamish Macdonald
- */
-
-#include <linux/linkage.h>
-
-#ifndef CONFIG_MMU
-#define sys_mmap2	sys_mmap_pgoff
-#endif
-
-#define __SYSCALL(nr, entry, nargs) .long entry
-	.section .rodata
-ALIGN
-ENTRY(sys_call_table)
-#include <asm/syscall_table.h>
-#undef __SYSCALL
diff --git a/arch/m68k/kernel/vmlinux.lds.S b/arch/m68k/kernel/vmlinux.lds.S
deleted file mode 100644
index d3d3c305ebc95b10f5b3b5377b8dc10ce8fbca28..0000000000000000000000000000000000000000
--- a/arch/m68k/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#if defined(CONFIG_MMU) && !defined(CONFIG_COLDFIRE)
-PHDRS
-{
-  text PT_LOAD FILEHDR PHDRS FLAGS (7);
-  data PT_LOAD FLAGS (7);
-}
-#ifdef CONFIG_SUN3
-#include "vmlinux-sun3.lds"
-#else
-#include "vmlinux-std.lds"
-#endif
-#else
-#include "vmlinux-nommu.lds"
-#endif
diff --git a/arch/m68k/lib/divsi3.S b/arch/m68k/lib/divsi3.S
deleted file mode 100644
index 3a2143f51631a0e6a819388f53ee5a910a676917..0000000000000000000000000000000000000000
--- a/arch/m68k/lib/divsi3.S
+++ /dev/null
@@ -1,123 +0,0 @@
-/* libgcc1 routines for 68000 w/o floating-point hardware.
-   Copyright (C) 1994, 1996, 1997, 1998 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
-later version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file with other programs, and to distribute
-those programs without any restriction coming from the use of this
-file.  (The General Public License restrictions do apply in other
-respects; for example, they cover modification of the file, and
-distribution when not linked into another program.)
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details. */
-
-/* As a special exception, if you link this library with files
-   compiled with GCC to produce an executable, this does not cause
-   the resulting executable to be covered by the GNU General Public License.
-   This exception does not however invalidate any other reasons why
-   the executable file might be covered by the GNU General Public License.  */
-
-/* Use this one for any 680x0; assumes no floating point hardware.
-   The trailing " '" appearing on some lines is for ANSI preprocessors.  Yuk.
-   Some of this code comes from MINIX, via the folks at ericsson.
-   D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
-*/
-
-#include <asm/export.h>
-
-/* These are predefined by new versions of GNU cpp.  */
-
-#ifndef __USER_LABEL_PREFIX__
-#define __USER_LABEL_PREFIX__ _
-#endif
-
-#ifndef __REGISTER_PREFIX__
-#define __REGISTER_PREFIX__
-#endif
-
-#ifndef __IMMEDIATE_PREFIX__
-#define __IMMEDIATE_PREFIX__ #
-#endif
-
-/* ANSI concatenation macros.  */
-
-#define CONCAT1(a, b) CONCAT2(a, b)
-#define CONCAT2(a, b) a ## b
-
-/* Use the right prefix for global labels.  */
-
-#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
-
-/* Use the right prefix for registers.  */
-
-#define REG(x) CONCAT1 (__REGISTER_PREFIX__, x)
-
-/* Use the right prefix for immediate values.  */
-
-#define IMM(x) CONCAT1 (__IMMEDIATE_PREFIX__, x)
-
-#define d0 REG (d0)
-#define d1 REG (d1)
-#define d2 REG (d2)
-#define d3 REG (d3)
-#define d4 REG (d4)
-#define d5 REG (d5)
-#define d6 REG (d6)
-#define d7 REG (d7)
-#define a0 REG (a0)
-#define a1 REG (a1)
-#define a2 REG (a2)
-#define a3 REG (a3)
-#define a4 REG (a4)
-#define a5 REG (a5)
-#define a6 REG (a6)
-#define fp REG (fp)
-#define sp REG (sp)
-
-	.text
-	.proc
-	.globl	SYM (__divsi3)
-SYM (__divsi3):
-	movel	d2, sp@-
-
-	moveq	IMM (1), d2	/* sign of result stored in d2 (=1 or =-1) */
-	movel	sp@(12), d1	/* d1 = divisor */
-	jpl	L1
-	negl	d1
-#if !(defined(__mcf5200__) || defined(__mcoldfire__))
-	negb	d2		/* change sign because divisor <0  */
-#else
-	negl	d2		/* change sign because divisor <0  */
-#endif
-L1:	movel	sp@(8), d0	/* d0 = dividend */
-	jpl	L2
-	negl	d0
-#if !(defined(__mcf5200__) || defined(__mcoldfire__))
-	negb	d2
-#else
-	negl	d2
-#endif
-
-L2:	movel	d1, sp@-
-	movel	d0, sp@-
-	jbsr	SYM (__udivsi3)	/* divide abs(dividend) by abs(divisor) */
-	addql	IMM (8), sp
-
-	tstb	d2
-	jpl	L3
-	negl	d0
-
-L3:	movel	sp@+, d2
-	rts
-
-	EXPORT_SYMBOL(__divsi3)
diff --git a/arch/m68k/lib/modsi3.S b/arch/m68k/lib/modsi3.S
deleted file mode 100644
index 1c967649a4e0e07c47331c8bb228d97602756ac9..0000000000000000000000000000000000000000
--- a/arch/m68k/lib/modsi3.S
+++ /dev/null
@@ -1,111 +0,0 @@
-/* libgcc1 routines for 68000 w/o floating-point hardware.
-   Copyright (C) 1994, 1996, 1997, 1998 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
-later version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file with other programs, and to distribute
-those programs without any restriction coming from the use of this
-file.  (The General Public License restrictions do apply in other
-respects; for example, they cover modification of the file, and
-distribution when not linked into another program.)
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details. */
-
-/* As a special exception, if you link this library with files
-   compiled with GCC to produce an executable, this does not cause
-   the resulting executable to be covered by the GNU General Public License.
-   This exception does not however invalidate any other reasons why
-   the executable file might be covered by the GNU General Public License.  */
-
-/* Use this one for any 680x0; assumes no floating point hardware.
-   The trailing " '" appearing on some lines is for ANSI preprocessors.  Yuk.
-   Some of this code comes from MINIX, via the folks at ericsson.
-   D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
-*/
-
-#include <asm/export.h>
-
-/* These are predefined by new versions of GNU cpp.  */
-
-#ifndef __USER_LABEL_PREFIX__
-#define __USER_LABEL_PREFIX__ _
-#endif
-
-#ifndef __REGISTER_PREFIX__
-#define __REGISTER_PREFIX__
-#endif
-
-#ifndef __IMMEDIATE_PREFIX__
-#define __IMMEDIATE_PREFIX__ #
-#endif
-
-/* ANSI concatenation macros.  */
-
-#define CONCAT1(a, b) CONCAT2(a, b)
-#define CONCAT2(a, b) a ## b
-
-/* Use the right prefix for global labels.  */
-
-#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
-
-/* Use the right prefix for registers.  */
-
-#define REG(x) CONCAT1 (__REGISTER_PREFIX__, x)
-
-/* Use the right prefix for immediate values.  */
-
-#define IMM(x) CONCAT1 (__IMMEDIATE_PREFIX__, x)
-
-#define d0 REG (d0)
-#define d1 REG (d1)
-#define d2 REG (d2)
-#define d3 REG (d3)
-#define d4 REG (d4)
-#define d5 REG (d5)
-#define d6 REG (d6)
-#define d7 REG (d7)
-#define a0 REG (a0)
-#define a1 REG (a1)
-#define a2 REG (a2)
-#define a3 REG (a3)
-#define a4 REG (a4)
-#define a5 REG (a5)
-#define a6 REG (a6)
-#define fp REG (fp)
-#define sp REG (sp)
-
-	.text
-	.proc
-	.globl	SYM (__modsi3)
-SYM (__modsi3):
-	movel	sp@(8), d1	/* d1 = divisor */
-	movel	sp@(4), d0	/* d0 = dividend */
-	movel	d1, sp@-
-	movel	d0, sp@-
-	jbsr	SYM (__divsi3)
-	addql	IMM (8), sp
-	movel	sp@(8), d1	/* d1 = divisor */
-#if !(defined(__mcf5200__) || defined(__mcoldfire__))
-	movel	d1, sp@-
-	movel	d0, sp@-
-	jbsr	SYM (__mulsi3)	/* d0 = (a/b)*b */
-	addql	IMM (8), sp
-#else
-	mulsl	d1,d0
-#endif
-	movel	sp@(4), d1	/* d1 = dividend */
-	subl	d0, d1		/* d1 = a - (a/b)*b */
-	movel	d1, d0
-	rts
-
-	EXPORT_SYMBOL(__modsi3)
diff --git a/arch/m68k/lib/mulsi3.S b/arch/m68k/lib/mulsi3.S
deleted file mode 100644
index 855675e69a8a2bb50e1c45e728ff2a218732a893..0000000000000000000000000000000000000000
--- a/arch/m68k/lib/mulsi3.S
+++ /dev/null
@@ -1,105 +0,0 @@
-/* libgcc1 routines for 68000 w/o floating-point hardware.
-   Copyright (C) 1994, 1996, 1997, 1998 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
-later version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file with other programs, and to distribute
-those programs without any restriction coming from the use of this
-file.  (The General Public License restrictions do apply in other
-respects; for example, they cover modification of the file, and
-distribution when not linked into another program.)
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details. */
-
-/* As a special exception, if you link this library with files
-   compiled with GCC to produce an executable, this does not cause
-   the resulting executable to be covered by the GNU General Public License.
-   This exception does not however invalidate any other reasons why
-   the executable file might be covered by the GNU General Public License.  */
-
-/* Use this one for any 680x0; assumes no floating point hardware.
-   The trailing " '" appearing on some lines is for ANSI preprocessors.  Yuk.
-   Some of this code comes from MINIX, via the folks at ericsson.
-   D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
-*/
-#include <asm/export.h>
-/* These are predefined by new versions of GNU cpp.  */
-
-#ifndef __USER_LABEL_PREFIX__
-#define __USER_LABEL_PREFIX__ _
-#endif
-
-#ifndef __REGISTER_PREFIX__
-#define __REGISTER_PREFIX__
-#endif
-
-#ifndef __IMMEDIATE_PREFIX__
-#define __IMMEDIATE_PREFIX__ #
-#endif
-
-/* ANSI concatenation macros.  */
-
-#define CONCAT1(a, b) CONCAT2(a, b)
-#define CONCAT2(a, b) a ## b
-
-/* Use the right prefix for global labels.  */
-
-#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
-
-/* Use the right prefix for registers.  */
-
-#define REG(x) CONCAT1 (__REGISTER_PREFIX__, x)
-
-/* Use the right prefix for immediate values.  */
-
-#define IMM(x) CONCAT1 (__IMMEDIATE_PREFIX__, x)
-
-#define d0 REG (d0)
-#define d1 REG (d1)
-#define d2 REG (d2)
-#define d3 REG (d3)
-#define d4 REG (d4)
-#define d5 REG (d5)
-#define d6 REG (d6)
-#define d7 REG (d7)
-#define a0 REG (a0)
-#define a1 REG (a1)
-#define a2 REG (a2)
-#define a3 REG (a3)
-#define a4 REG (a4)
-#define a5 REG (a5)
-#define a6 REG (a6)
-#define fp REG (fp)
-#define sp REG (sp)
-
-	.text
-	.proc
-	.globl	SYM (__mulsi3)
-SYM (__mulsi3):
-	movew	sp@(4), d0	/* x0 -> d0 */
-	muluw	sp@(10), d0	/* x0*y1 */
-	movew	sp@(6), d1	/* x1 -> d1 */
-	muluw	sp@(8), d1	/* x1*y0 */
-#if !(defined(__mcf5200__) || defined(__mcoldfire__))
-	addw	d1, d0
-#else
-	addl	d1, d0
-#endif
-	swap	d0
-	clrw	d0
-	movew	sp@(6), d1	/* x1 -> d1 */
-	muluw	sp@(10), d1	/* x1*y1 */
-	addl	d1, d0
-
-	rts
-	EXPORT_SYMBOL(__mulsi3)
diff --git a/arch/m68k/lib/udivsi3.S b/arch/m68k/lib/udivsi3.S
deleted file mode 100644
index 78440ae513bf318e34a6463494c9c7dcfea48bf7..0000000000000000000000000000000000000000
--- a/arch/m68k/lib/udivsi3.S
+++ /dev/null
@@ -1,157 +0,0 @@
-/* libgcc1 routines for 68000 w/o floating-point hardware.
-   Copyright (C) 1994, 1996, 1997, 1998 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
-later version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file with other programs, and to distribute
-those programs without any restriction coming from the use of this
-file.  (The General Public License restrictions do apply in other
-respects; for example, they cover modification of the file, and
-distribution when not linked into another program.)
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details. */
-
-/* As a special exception, if you link this library with files
-   compiled with GCC to produce an executable, this does not cause
-   the resulting executable to be covered by the GNU General Public License.
-   This exception does not however invalidate any other reasons why
-   the executable file might be covered by the GNU General Public License.  */
-
-/* Use this one for any 680x0; assumes no floating point hardware.
-   The trailing " '" appearing on some lines is for ANSI preprocessors.  Yuk.
-   Some of this code comes from MINIX, via the folks at ericsson.
-   D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
-*/
-#include <asm/export.h>
-/* These are predefined by new versions of GNU cpp.  */
-
-#ifndef __USER_LABEL_PREFIX__
-#define __USER_LABEL_PREFIX__ _
-#endif
-
-#ifndef __REGISTER_PREFIX__
-#define __REGISTER_PREFIX__
-#endif
-
-#ifndef __IMMEDIATE_PREFIX__
-#define __IMMEDIATE_PREFIX__ #
-#endif
-
-/* ANSI concatenation macros.  */
-
-#define CONCAT1(a, b) CONCAT2(a, b)
-#define CONCAT2(a, b) a ## b
-
-/* Use the right prefix for global labels.  */
-
-#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
-
-/* Use the right prefix for registers.  */
-
-#define REG(x) CONCAT1 (__REGISTER_PREFIX__, x)
-
-/* Use the right prefix for immediate values.  */
-
-#define IMM(x) CONCAT1 (__IMMEDIATE_PREFIX__, x)
-
-#define d0 REG (d0)
-#define d1 REG (d1)
-#define d2 REG (d2)
-#define d3 REG (d3)
-#define d4 REG (d4)
-#define d5 REG (d5)
-#define d6 REG (d6)
-#define d7 REG (d7)
-#define a0 REG (a0)
-#define a1 REG (a1)
-#define a2 REG (a2)
-#define a3 REG (a3)
-#define a4 REG (a4)
-#define a5 REG (a5)
-#define a6 REG (a6)
-#define fp REG (fp)
-#define sp REG (sp)
-
-	.text
-	.proc
-	.globl	SYM (__udivsi3)
-SYM (__udivsi3):
-#if !(defined(__mcf5200__) || defined(__mcoldfire__))
-	movel	d2, sp@-
-	movel	sp@(12), d1	/* d1 = divisor */
-	movel	sp@(8), d0	/* d0 = dividend */
-
-	cmpl	IMM (0x10000), d1 /* divisor >= 2 ^ 16 ?   */
-	jcc	L3		/* then try next algorithm */
-	movel	d0, d2
-	clrw	d2
-	swap	d2
-	divu	d1, d2          /* high quotient in lower word */
-	movew	d2, d0		/* save high quotient */
-	swap	d0
-	movew	sp@(10), d2	/* get low dividend + high rest */
-	divu	d1, d2		/* low quotient */
-	movew	d2, d0
-	jra	L6
-
-L3:	movel	d1, d2		/* use d2 as divisor backup */
-L4:	lsrl	IMM (1), d1	/* shift divisor */
-	lsrl	IMM (1), d0	/* shift dividend */
-	cmpl	IMM (0x10000), d1 /* still divisor >= 2 ^ 16 ?  */
-	jcc	L4
-	divu	d1, d0		/* now we have 16 bit divisor */
-	andl	IMM (0xffff), d0 /* mask out divisor, ignore remainder */
-
-/* Multiply the 16 bit tentative quotient with the 32 bit divisor.  Because of
-   the operand ranges, this might give a 33 bit product.  If this product is
-   greater than the dividend, the tentative quotient was too large. */
-	movel	d2, d1
-	mulu	d0, d1		/* low part, 32 bits */
-	swap	d2
-	mulu	d0, d2		/* high part, at most 17 bits */
-	swap	d2		/* align high part with low part */
-	tstw	d2		/* high part 17 bits? */
-	jne	L5		/* if 17 bits, quotient was too large */
-	addl	d2, d1		/* add parts */
-	jcs	L5		/* if sum is 33 bits, quotient was too large */
-	cmpl	sp@(8), d1	/* compare the sum with the dividend */
-	jls	L6		/* if sum > dividend, quotient was too large */
-L5:	subql	IMM (1), d0	/* adjust quotient */
-
-L6:	movel	sp@+, d2
-	rts
-
-#else /* __mcf5200__ || __mcoldfire__ */
-
-/* Coldfire implementation of non-restoring division algorithm from
-   Hennessy & Patterson, Appendix A. */
-	link	a6,IMM (-12)
-	moveml	d2-d4,sp@
-	movel	a6@(8),d0
-	movel	a6@(12),d1
-	clrl	d2		| clear p
-	moveq	IMM (31),d4
-L1:	addl	d0,d0		| shift reg pair (p,a) one bit left
-	addxl	d2,d2
-	movl	d2,d3		| subtract b from p, store in tmp.
-	subl	d1,d3
-	jcs	L2		| if no carry,
-	bset	IMM (0),d0	| set the low order bit of a to 1,
-	movl	d3,d2		| and store tmp in p.
-L2:	subql	IMM (1),d4
-	jcc	L1
-	moveml	sp@,d2-d4	| restore data registers
-	unlk	a6		| and return
-	rts
-#endif /* __mcf5200__ || __mcoldfire__ */
-	EXPORT_SYMBOL(__udivsi3)
diff --git a/arch/m68k/lib/umodsi3.S b/arch/m68k/lib/umodsi3.S
deleted file mode 100644
index b6fd11f58948debdce515dd7114ca256540fb776..0000000000000000000000000000000000000000
--- a/arch/m68k/lib/umodsi3.S
+++ /dev/null
@@ -1,108 +0,0 @@
-/* libgcc1 routines for 68000 w/o floating-point hardware.
-   Copyright (C) 1994, 1996, 1997, 1998 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
-later version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file with other programs, and to distribute
-those programs without any restriction coming from the use of this
-file.  (The General Public License restrictions do apply in other
-respects; for example, they cover modification of the file, and
-distribution when not linked into another program.)
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details. */
-
-/* As a special exception, if you link this library with files
-   compiled with GCC to produce an executable, this does not cause
-   the resulting executable to be covered by the GNU General Public License.
-   This exception does not however invalidate any other reasons why
-   the executable file might be covered by the GNU General Public License.  */
-
-/* Use this one for any 680x0; assumes no floating point hardware.
-   The trailing " '" appearing on some lines is for ANSI preprocessors.  Yuk.
-   Some of this code comes from MINIX, via the folks at ericsson.
-   D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
-*/
-#include <asm/export.h>
-/* These are predefined by new versions of GNU cpp.  */
-
-#ifndef __USER_LABEL_PREFIX__
-#define __USER_LABEL_PREFIX__ _
-#endif
-
-#ifndef __REGISTER_PREFIX__
-#define __REGISTER_PREFIX__
-#endif
-
-#ifndef __IMMEDIATE_PREFIX__
-#define __IMMEDIATE_PREFIX__ #
-#endif
-
-/* ANSI concatenation macros.  */
-
-#define CONCAT1(a, b) CONCAT2(a, b)
-#define CONCAT2(a, b) a ## b
-
-/* Use the right prefix for global labels.  */
-
-#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
-
-/* Use the right prefix for registers.  */
-
-#define REG(x) CONCAT1 (__REGISTER_PREFIX__, x)
-
-/* Use the right prefix for immediate values.  */
-
-#define IMM(x) CONCAT1 (__IMMEDIATE_PREFIX__, x)
-
-#define d0 REG (d0)
-#define d1 REG (d1)
-#define d2 REG (d2)
-#define d3 REG (d3)
-#define d4 REG (d4)
-#define d5 REG (d5)
-#define d6 REG (d6)
-#define d7 REG (d7)
-#define a0 REG (a0)
-#define a1 REG (a1)
-#define a2 REG (a2)
-#define a3 REG (a3)
-#define a4 REG (a4)
-#define a5 REG (a5)
-#define a6 REG (a6)
-#define fp REG (fp)
-#define sp REG (sp)
-
-	.text
-	.proc
-	.globl	SYM (__umodsi3)
-SYM (__umodsi3):
-	movel	sp@(8), d1	/* d1 = divisor */
-	movel	sp@(4), d0	/* d0 = dividend */
-	movel	d1, sp@-
-	movel	d0, sp@-
-	jbsr	SYM (__udivsi3)
-	addql	IMM (8), sp
-	movel	sp@(8), d1	/* d1 = divisor */
-#if !(defined(__mcf5200__) || defined(__mcoldfire__))
-	movel	d1, sp@-
-	movel	d0, sp@-
-	jbsr	SYM (__mulsi3)	/* d0 = (a/b)*b */
-	addql	IMM (8), sp
-#else
-	mulsl	d1,d0
-#endif
-	movel	sp@(4), d1	/* d1 = dividend */
-	subl	d0, d1		/* d1 = a - (a/b)*b */
-	movel	d1, d0
-	rts
-	EXPORT_SYMBOL(__umodsi3)
diff --git a/arch/m68k/mac/mac_penguin.S b/arch/m68k/mac/mac_penguin.S
deleted file mode 100644
index 359e63bb632d78e0375b7ce3f5b24fb89559b0a6..0000000000000000000000000000000000000000
--- a/arch/m68k/mac/mac_penguin.S
+++ /dev/null
@@ -1,76 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-.byte \
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x0F,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xF0,0x0F,0xFF,0xFF,0xF0,0x00,0x0F,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0x00,0x00,0xFF,0xFF,0x00,0x00,0x00,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0x00,0x00,0xFF,0xFF,0x00,0x00,0x00,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF,0xF0,0xFF,0xFF,0x0F,0xF0,0xF0,0x0F,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF,0x00,0xFF,0xFF,0x0F,0xFF,0x00,0x0F,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF,0xF0,0x0F,0xFF,0x0F,0xFF,0xF0,0x0F,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0x0F,0xFF,0x00,0x0F,0x0F,0xFF,0xF0,0x0F,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0x0F,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x0F,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xF0,0x00,0x00,0x00,0x00,0xFF,0x00,0xFF,0xF0,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0x00,0x00,0x0F,0xF0,0x00,0x00,0xFF,0xF0,0x0F,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xF0,0x00,0x0F,0xF0,0xFF,0xFF,0x00,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0x00,0x00,0x00,0xFF,0xF0,0x00,0x0F,0xFF,0xF0,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0x0F,0xFF,0x00,0xFF,0xF0,0x00,0x00,0x0F,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xF0,0x00,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xF0,0x00,0x0F,0xFF,0xF0,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xF0,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0x0F,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xF0,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0x0F,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0x0F,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xF0,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xF0,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0x0F,0xFF,0xFF,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0x0F,0xFF,0xFF,0xF0,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0x0F,0xFF,0xFF,0xF0,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0x0F,0xFF,0xFF,0xF0,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x0F,0xF0,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x0F,0xF0,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0xFF,0xF0,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0x0F,0xFF,0xFF,0xFF,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x0F,0xFF,0xF0,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0x0F,0xFF,0xFF,0xFF,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x0F,0xFF,0xF0,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0x0F,0xFF,0xFF,0xFF,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0x0F,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x0F,0xF0,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0xF0,0x00,0x00,0x0F,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xF0,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0xF0,0x00,0x00,0x00,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0x00,0x0F,0xFF,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0xF0,0x00,0x00,0x00,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0x00,0x0F,0xFF,0xFF,0xFF,0x00,0x00,0xF0,0x00,0x00,\
-0x00,0x00,0x00,0xFF,0x00,0x00,0x00,0x00,0x0F,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0x00,0x00,0xFF,0xFF,0xF0,0x00,0x00,0xF0,0x00,0x00,\
-0x00,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x00,0x00,\
-0x0F,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0x00,0x00,\
-0x0F,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0x00,0x00,\
-0x0F,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xF0,0x00,\
-0x0F,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xF0,\
-0x0F,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0x0F,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,\
-0x0F,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0x0F,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x0F,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0x0F,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,\
-0x0F,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0x0F,0xF0,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0x0F,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,\
-0x0F,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xF0,0xFF,0xF0,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0x0F,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0x00,\
-0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0x00,0x00,\
-0x0F,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xF0,0xFF,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0x00,0x00,0x00,\
-0x0F,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xF0,0xFF,0x00,0x00,0x00,0x00,0x00,0xFF,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x0F,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xF0,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xF0,0x00,0x00,0x00,0xFF,0xF0,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0x00,0x00,0x00,0x00,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xF0,0x00,0x00,0x0F,0xFF,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0x00,0x0F,0xFF,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,\
-0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xFF,0xF0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00
diff --git a/arch/m68k/math-emu/fp_cond.S b/arch/m68k/math-emu/fp_cond.S
deleted file mode 100644
index ddae8b1b8b83b4b76cf85efbf4678585496fa713..0000000000000000000000000000000000000000
--- a/arch/m68k/math-emu/fp_cond.S
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
- * fp_cond.S
- *
- * Copyright Roman Zippel, 1997.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, and the entire permission notice in its entirety,
- *    including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- *    products derived from this software without specific prior
- *    written permission.
- *
- * ALTERNATIVELY, this product may be distributed under the terms of
- * the GNU General Public License, in which case the provisions of the GPL are
- * required INSTEAD OF the above restrictions.  (This clause is
- * necessary due to a potential bad interaction between the GPL and
- * the restrictions contained in a BSD-style copyright.)
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
- * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "fp_emu.h"
-#include "fp_decode.h"
-
-	.globl	fp_fscc, fp_fbccw, fp_fbccl
-
-#ifdef FPU_EMU_DEBUG
-fp_fnop:
-	printf	PDECODE,"fnop\n"
-	jra	fp_end
-#else
-#define fp_fnop fp_end
-#endif
-
-fp_fbccw:
-	tst.w	%d2
-	jeq	fp_fnop
-	printf	PDECODE,"fbccw "
-	fp_get_pc %a0
-	lea	(-2,%a0,%d2.w),%a0
-	jra	1f
-
-fp_fbccl:
-	printf	PDECODE,"fbccl "
-	fp_get_pc %a0
-	move.l	%d2,%d0
-	swap	%d0
-	fp_get_instr_word %d0,fp_err_ua1
-	lea	(-2,%a0,%d0.l),%a0
-1:	printf	PDECODE,"%x",1,%a0
-	move.l	%d2,%d0
-	swap	%d0
-	jsr	fp_compute_cond
-	tst.l	%d0
-	jeq	1f
-	fp_put_pc %a0,1
-1:	printf	PDECODE,"\n"
-	jra	fp_end
-
-fp_fdbcc:
-	printf	PDECODE,"fdbcc "
-	fp_get_pc %a1				| calculate new pc
-	fp_get_instr_word %d0,fp_err_ua1
-	add.w	%d0,%a1
-	fp_decode_addr_reg
-	printf	PDECODE,"d%d,%x\n",2,%d0,%a1
-	swap	%d1				| test condition in %d1
-	tst.w	%d1
-	jne	2f
-	move.l	%d0,%d1
-	jsr	fp_get_data_reg
-	subq.w	#1,%d0
-	jcs	1f
-	fp_put_pc %a1,1
-1:	jsr	fp_put_data_reg
-2:	jra	fp_end
-
-| set flags for decode macros for fs<cc>
-do_fscc=1
-do_no_pc_mode=1
-
-fp_fscc:
-	printf	PDECODE,"fscc "
-	move.l	%d2,%d0
-	jsr	fp_compute_cond
-	move.w	%d0,%d1
-	swap	%d1
-
-	| decode addressing mode
-	fp_decode_addr_mode
-
-	.long	fp_data, fp_fdbcc
-	.long	fp_indirect, fp_postinc
-	.long	fp_predecr, fp_disp16
-	.long	fp_extmode0, fp_extmode1
-
-	| addressing mode: data register direct
-fp_data:
-	fp_mode_data_direct
-	move.w	%d0,%d1			| save register nr
-	jsr	fp_get_data_reg
-	swap	%d1
-	move.b	%d1,%d0
-	swap	%d1
-	jsr	fp_put_data_reg
-	printf	PDECODE,"\n"
-	jra	fp_end
-
-fp_indirect:
-	fp_mode_addr_indirect
-	jra	fp_do_scc
-
-fp_postinc:
-	fp_mode_addr_indirect_postinc
-	jra	fp_do_scc
-
-fp_predecr:
-	fp_mode_addr_indirect_predec
-	jra	fp_do_scc
-
-fp_disp16:
-	fp_mode_addr_indirect_disp16
-	jra	fp_do_scc
-
-fp_extmode0:
-	fp_mode_addr_indirect_extmode0
-	jra	fp_do_scc
-
-fp_extmode1:
-	bfextu	%d2{#13,#3},%d0
-	jmp	([0f:w,%pc,%d0*4])
-
-	.align	4
-0:
-	.long	fp_absolute_short, fp_absolute_long
-	.long	fp_ill, fp_ill		| NOTE: jump here to ftrap.x
-	.long	fp_ill, fp_ill
-	.long	fp_ill, fp_ill
-
-fp_absolute_short:
-	fp_mode_abs_short
-	jra	fp_do_scc
-
-fp_absolute_long:
-	fp_mode_abs_long
-|	jra	fp_do_scc
-
-fp_do_scc:
-	swap	%d1
-	putuser.b %d1,(%a0),fp_err_ua1,%a0
-	printf	PDECODE,"\n"
-	jra	fp_end
-
-
-#define tst_NAN	btst #24,%d1
-#define tst_Z	btst #26,%d1
-#define tst_N	btst #27,%d1
-
-fp_compute_cond:
-	move.l	(FPD_FPSR,FPDATA),%d1
-	btst	#4,%d0
-	jeq	1f
-	tst_NAN
-	jeq	1f
-	bset	#15,%d1
-	bset	#7,%d1
-	move.l	%d1,(FPD_FPSR,FPDATA)
-1:	and.w	#0xf,%d0
-	jmp	([0f:w,%pc,%d0.w*4])
-
-	.align	4
-0:
-	.long	fp_f  , fp_eq , fp_ogt, fp_oge
-	.long	fp_olt, fp_ole, fp_ogl, fp_or
-	.long	fp_un , fp_ueq, fp_ugt, fp_uge
-	.long	fp_ult, fp_ule, fp_ne , fp_t
-
-fp_f:
-	moveq	#0,%d0
-	rts
-
-fp_eq:
-	moveq	#0,%d0
-	tst_Z
-	jeq	1f
-	moveq	#-1,%d0
-1:	rts
-
-fp_ogt:
-	moveq	#0,%d0
-	tst_NAN
-	jne	1f
-	tst_Z
-	jne	1f
-	tst_N
-	jne	1f
-	moveq	#-1,%d0
-1:	rts
-
-fp_oge:
-	moveq	#-1,%d0
-	tst_Z
-	jne	2f
-	tst_NAN
-	jne	1f
-	tst_N
-	jeq	2f
-1:	moveq	#0,%d0
-2:	rts
-
-fp_olt:
-	moveq	#0,%d0
-	tst_NAN
-	jne	1f
-	tst_Z
-	jne	1f
-	tst_N
-	jeq	1f
-	moveq	#-1,%d0
-1:	rts
-
-fp_ole:
-	moveq	#-1,%d0
-	tst_Z
-	jne	2f
-	tst_NAN
-	jne	1f
-	tst_N
-	jne	2f
-1:	moveq	#0,%d0
-2:	rts
-
-fp_ogl:
-	moveq	#0,%d0
-	tst_NAN
-	jne	1f
-	tst_Z
-	jne	1f
-	moveq	#-1,%d0
-1:	rts
-
-fp_or:
-	moveq	#0,%d0
-	tst_NAN
-	jne	1f
-	moveq	#-1,%d0
-1:	rts
-
-fp_un:
-	moveq	#0,%d0
-	tst_NAN
-	jeq	1f
-	moveq	#-1,%d0
-	rts
-
-fp_ueq:
-	moveq	#-1,%d0
-	tst_NAN
-	jne	1f
-	tst_Z
-	jne	1f
-	moveq	#0,%d0
-1:	rts
-
-fp_ugt:
-	moveq	#-1,%d0
-	tst_NAN
-	jne	2f
-	tst_N
-	jne	1f
-	tst_Z
-	jeq	2f
-1:	moveq	#0,%d0
-2:	rts
-
-fp_uge:
-	moveq	#-1,%d0
-	tst_NAN
-	jne	1f
-	tst_Z
-	jne	1f
-	tst_N
-	jeq	1f
-	moveq	#0,%d0
-1:	rts
-
-fp_ult:
-	moveq	#-1,%d0
-	tst_NAN
-	jne	2f
-	tst_Z
-	jne	1f
-	tst_N
-	jne	2f
-1:	moveq	#0,%d0
-2:	rts
-
-fp_ule:
-	moveq	#-1,%d0
-	tst_NAN
-	jne	1f
-	tst_Z
-	jne	1f
-	tst_N
-	jne	1f
-	moveq	#0,%d0
-1:	rts
-
-fp_ne:
-	moveq	#0,%d0
-	tst_Z
-	jne	1f
-	moveq	#-1,%d0
-1:	rts
-
-fp_t:
-	moveq	#-1,%d0
-	rts
diff --git a/arch/m68k/math-emu/fp_entry.S b/arch/m68k/math-emu/fp_entry.S
deleted file mode 100644
index a3fe1f348dfe7433811a50cd774dd69461e19e88..0000000000000000000000000000000000000000
--- a/arch/m68k/math-emu/fp_entry.S
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * fp_emu.S
- *
- * Copyright Roman Zippel, 1997.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, and the entire permission notice in its entirety,
- *    including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- *    products derived from this software without specific prior
- *    written permission.
- *
- * ALTERNATIVELY, this product may be distributed under the terms of
- * the GNU General Public License, in which case the provisions of the GPL are
- * required INSTEAD OF the above restrictions.  (This clause is
- * necessary due to a potential bad interaction between the GPL and
- * the restrictions contained in a BSD-style copyright.)
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
- * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include <asm/entry.h>
-
-#include "fp_emu.h"
-
-	.globl	fpu_emu
-	.globl	fp_debugprint
-	.globl	fp_err_ua1,fp_err_ua2
-
-	.text
-fpu_emu:
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-
-#if defined(CPU_M68020_OR_M68030) && defined(CPU_M68040_OR_M68060)
-        tst.l	m68k_is040or060
-        jeq	1f
-#endif
-#if defined(CPU_M68040_OR_M68060)
-	move.l	(FPS_PC2,%sp),(FPS_PC,%sp)
-#endif
-1:
-	| emulate the instruction
-	jsr	fp_scan
-
-#if defined(CONFIG_M68060)
-#if !defined(CPU_M68060_ONLY)
-	btst	#3,m68k_cputype+3
-	jeq	1f
-#endif
-	btst	#7,(FPS_SR,%sp)
-	jne	fp_sendtrace060
-#endif
-1:
-	| emulation successful?
-	tst.l	%d0
-	jeq	ret_from_exception
-
-	| send some signal to program here
-
-	jra	ret_from_exception
-
-	| we jump here after an access error while trying to access
-	| user space, we correct stackpointer and send a SIGSEGV to
-	| the user process
-fp_err_ua2:
-	addq.l	#4,%sp
-fp_err_ua1:
-	addq.l	#4,%sp
-	move.l	%a0,-(%sp)
-	pea	LSEGV_MAPERR
-	pea	LSIGSEGV
-	jsr	fpemu_signal
-	add.w	#12,%sp
-	jra	ret_from_exception
-
-#if defined(CONFIG_M68060)
-	| send a trace signal if we are debugged
-	| it does not really belong here, but...
-fp_sendtrace060:
-	move.l	(FPS_PC,%sp),-(%sp)
-	pea	LTRAP_TRACE
-	pea	LSIGTRAP
-	jsr	fpemu_signal
-	add.w	#12,%sp
-	jra	ret_from_exception
-#endif
-
-	.globl	fp_get_data_reg, fp_put_data_reg
-	.globl	fp_get_addr_reg, fp_put_addr_reg
-
-	| Entry points to get/put a register. Some of them can be get/put
-	| directly, others are on the stack, as we read/write the stack
-	| directly here, these function may only be called from within
-	| instruction decoding, otherwise the stack pointer is incorrect
-	| and the stack gets corrupted.
-fp_get_data_reg:
-	jmp	([0f:w,%pc,%d0.w*4])
-
-	.align	4
-0:
-	.long	fp_get_d0, fp_get_d1
-	.long	fp_get_d2, fp_get_d3
-	.long	fp_get_d4, fp_get_d5
-	.long	fp_get_d6, fp_get_d7
-
-fp_get_d0:
-	move.l	(PT_OFF_D0+8,%sp),%d0
-	printf	PREGISTER,"{d0->%08x}",1,%d0
-	rts
-
-fp_get_d1:
-	move.l	(PT_OFF_D1+8,%sp),%d0
-	printf	PREGISTER,"{d1->%08x}",1,%d0
-	rts
-
-fp_get_d2:
-	move.l	(PT_OFF_D2+8,%sp),%d0
-	printf	PREGISTER,"{d2->%08x}",1,%d0
-	rts
-
-fp_get_d3:
-	move.l	%d3,%d0
-	printf	PREGISTER,"{d3->%08x}",1,%d0
-	rts
-
-fp_get_d4:
-	move.l	%d4,%d0
-	printf	PREGISTER,"{d4->%08x}",1,%d0
-	rts
-
-fp_get_d5:
-	move.l	%d5,%d0
-	printf	PREGISTER,"{d5->%08x}",1,%d0
-	rts
-
-fp_get_d6:
-	move.l	%d6,%d0
-	printf	PREGISTER,"{d6->%08x}",1,%d0
-	rts
-
-fp_get_d7:
-	move.l	%d7,%d0
-	printf	PREGISTER,"{d7->%08x}",1,%d0
-	rts
-
-fp_put_data_reg:
-	jmp	([0f:w,%pc,%d1.w*4])
-
-	.align	4
-0:
-	.long	fp_put_d0, fp_put_d1
-	.long	fp_put_d2, fp_put_d3
-	.long	fp_put_d4, fp_put_d5
-	.long	fp_put_d6, fp_put_d7
-
-fp_put_d0:
-	printf	PREGISTER,"{d0<-%08x}",1,%d0
-	move.l	%d0,(PT_OFF_D0+8,%sp)
-	rts
-
-fp_put_d1:
-	printf	PREGISTER,"{d1<-%08x}",1,%d0
-	move.l	%d0,(PT_OFF_D1+8,%sp)
-	rts
-
-fp_put_d2:
-	printf	PREGISTER,"{d2<-%08x}",1,%d0
-	move.l	%d0,(PT_OFF_D2+8,%sp)
-	rts
-
-fp_put_d3:
-	printf	PREGISTER,"{d3<-%08x}",1,%d0
-|	move.l	%d0,%d3
-	move.l	%d0,(PT_OFF_D3+8,%sp)
-	rts
-
-fp_put_d4:
-	printf	PREGISTER,"{d4<-%08x}",1,%d0
-|	move.l	%d0,%d4
-	move.l	%d0,(PT_OFF_D4+8,%sp)
-	rts
-
-fp_put_d5:
-	printf	PREGISTER,"{d5<-%08x}",1,%d0
-|	move.l	%d0,%d5
-	move.l	%d0,(PT_OFF_D5+8,%sp)
-	rts
-
-fp_put_d6:
-	printf	PREGISTER,"{d6<-%08x}",1,%d0
-	move.l	%d0,%d6
-	rts
-
-fp_put_d7:
-	printf	PREGISTER,"{d7<-%08x}",1,%d0
-	move.l	%d0,%d7
-	rts
-
-fp_get_addr_reg:
-	jmp	([0f:w,%pc,%d0.w*4])
-
-	.align	4
-0:
-	.long	fp_get_a0, fp_get_a1
-	.long	fp_get_a2, fp_get_a3
-	.long	fp_get_a4, fp_get_a5
-	.long	fp_get_a6, fp_get_a7
-
-fp_get_a0:
-	move.l	(PT_OFF_A0+8,%sp),%a0
-	printf	PREGISTER,"{a0->%08x}",1,%a0
-	rts
-
-fp_get_a1:
-	move.l	(PT_OFF_A1+8,%sp),%a0
-	printf	PREGISTER,"{a1->%08x}",1,%a0
-	rts
-
-fp_get_a2:
-	move.l	(PT_OFF_A2+8,%sp),%a0
-	printf	PREGISTER,"{a2->%08x}",1,%a0
-	rts
-
-fp_get_a3:
-	move.l	%a3,%a0
-	printf	PREGISTER,"{a3->%08x}",1,%a0
-	rts
-
-fp_get_a4:
-	move.l	%a4,%a0
-	printf	PREGISTER,"{a4->%08x}",1,%a0
-	rts
-
-fp_get_a5:
-	move.l	%a5,%a0
-	printf	PREGISTER,"{a5->%08x}",1,%a0
-	rts
-
-fp_get_a6:
-	move.l	%a6,%a0
-	printf	PREGISTER,"{a6->%08x}",1,%a0
-	rts
-
-fp_get_a7:
-	move.l	%usp,%a0
-	printf	PREGISTER,"{a7->%08x}",1,%a0
-	rts
-
-fp_put_addr_reg:
-	jmp	([0f:w,%pc,%d0.w*4])
-
-	.align	4
-0:
-	.long	fp_put_a0, fp_put_a1
-	.long	fp_put_a2, fp_put_a3
-	.long	fp_put_a4, fp_put_a5
-	.long	fp_put_a6, fp_put_a7
-
-fp_put_a0:
-	printf	PREGISTER,"{a0<-%08x}",1,%a0
-	move.l	%a0,(PT_OFF_A0+8,%sp)
-	rts
-
-fp_put_a1:
-	printf	PREGISTER,"{a1<-%08x}",1,%a0
-	move.l	%a0,(PT_OFF_A1+8,%sp)
-	rts
-
-fp_put_a2:
-	printf	PREGISTER,"{a2<-%08x}",1,%a0
-	move.l	%a0,(PT_OFF_A2+8,%sp)
-	rts
-
-fp_put_a3:
-	printf	PREGISTER,"{a3<-%08x}",1,%a0
-	move.l	%a0,%a3
-	rts
-
-fp_put_a4:
-	printf	PREGISTER,"{a4<-%08x}",1,%a0
-	move.l	%a0,%a4
-	rts
-
-fp_put_a5:
-	printf	PREGISTER,"{a5<-%08x}",1,%a0
-	move.l	%a0,%a5
-	rts
-
-fp_put_a6:
-	printf	PREGISTER,"{a6<-%08x}",1,%a0
-	move.l	%a0,%a6
-	rts
-
-fp_put_a7:
-	printf	PREGISTER,"{a7<-%08x}",1,%a0
-	move.l	%a0,%usp
-	rts
-
-	.data
-	.align	4
-
-fp_debugprint:
-|	.long	PMDECODE
-	.long	PMINSTR+PMDECODE+PMCONV+PMNORM
-|	.long	PMCONV+PMNORM+PMINSTR
-|	.long	0
diff --git a/arch/m68k/math-emu/fp_move.S b/arch/m68k/math-emu/fp_move.S
deleted file mode 100644
index 71bdf83ba61a84a1632781364bb43cc0f0abd0db..0000000000000000000000000000000000000000
--- a/arch/m68k/math-emu/fp_move.S
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * fp_move.S
- *
- * Copyright Roman Zippel, 1997.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, and the entire permission notice in its entirety,
- *    including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- *    products derived from this software without specific prior
- *    written permission.
- *
- * ALTERNATIVELY, this product may be distributed under the terms of
- * the GNU General Public License, in which case the provisions of the GPL are
- * required INSTEAD OF the above restrictions.  (This clause is
- * necessary due to a potential bad interaction between the GPL and
- * the restrictions contained in a BSD-style copyright.)
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
- * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "fp_emu.h"
-#include "fp_decode.h"
-
-do_no_pc_mode=1
-
-	.globl	fp_fmove_fp2mem
-
-fp_fmove_fp2mem:
-	clr.b	(2+FPD_FPSR,FPDATA)
-	fp_decode_dest_format
-	move.w	%d0,%d1			| store data size twice in %d1
-	swap	%d1			| one can be trashed below
-	move.w	%d0,%d1
-#ifdef FPU_EMU_DEBUG
-	lea	0f,%a0
-	clr.l	%d0
-	move.b	(%a0,%d1.w),%d0
-	printf	PDECODE,"fmove.%c ",1,%d0
-	fp_decode_src_reg
-	printf	PDECODE,"fp%d,",1,%d0
-
-	.data
-0:	.byte	'l','s','x','p','w','d','b','p'
-	.previous
-#endif
-
-	| encode addressing mode for dest
-	fp_decode_addr_mode
-
-	.long	fp_data, fp_ill
-	.long	fp_indirect, fp_postinc
-	.long	fp_predecr, fp_disp16
-	.long	fp_extmode0, fp_extmode1
-
-	| addressing mode: data register direct
-fp_data:
-	fp_mode_data_direct
-	move.w	%d0,%d1
-	fp_decode_src_reg
-	fp_get_fp_reg
-	lea	(FPD_TEMPFP1,FPDATA),%a1
-	move.l	(%a0)+,(%a1)+
-	move.l	(%a0)+,(%a1)+
-	move.l	(%a0),(%a1)
-	lea	(-8,%a1),%a0
-	swap	%d1
-	move.l	%d1,%d2
-	printf	PDECODE,"\n"
-	jmp	([0f:w,%pc,%d1.w*4])
-
-	.align	4
-0:
-	.long	fp_data_long, fp_data_single
-	.long	fp_ill, fp_ill
-	.long	fp_data_word, fp_ill
-	.long	fp_data_byte, fp_ill
-
-fp_data_byte:
-	jsr	fp_normalize_ext
-	jsr	fp_conv_ext2byte
-	move.l	%d0,%d1
-	swap	%d2
-	move.w	%d2,%d0
-	jsr	fp_get_data_reg
-	move.b	%d1,%d0
-	move.w	%d2,%d1
-	jsr	fp_put_data_reg
-	jra	fp_final
-
-fp_data_word:
-	jsr	fp_normalize_ext
-	jsr	fp_conv_ext2short
-	move.l	%d0,%d1
-	swap	%d2
-	move.w	%d2,%d0
-	jsr	fp_get_data_reg
-	move.w	%d1,%d0
-	move.l	%d2,%d1
-	jsr	fp_put_data_reg
-	jra	fp_final
-
-fp_data_long:
-	jsr	fp_normalize_ext
-	jsr	fp_conv_ext2long
-	swap	%d2
-	move.w	%d2,%d1
-	jsr	fp_put_data_reg
-	jra	fp_final
-
-fp_data_single:
-	jsr	fp_normalize_ext
-	jsr	fp_conv_ext2single
-	swap	%d2
-	move.w	%d2,%d1
-	jsr	fp_put_data_reg
-	jra	fp_final
-
-	| addressing mode: address register indirect
-fp_indirect:
-	fp_mode_addr_indirect
-	jra	fp_putdest
-
-	| addressing mode: address register indirect with postincrement
-fp_postinc:
-	fp_mode_addr_indirect_postinc
-	jra	fp_putdest
-
-	| addressing mode: address register indirect with predecrement
-fp_predecr:
-	fp_mode_addr_indirect_predec
-	jra	fp_putdest
-
-	| addressing mode: address register indirect with 16bit displacement
-fp_disp16:
-	fp_mode_addr_indirect_disp16
-	jra     fp_putdest
-
-fp_extmode0:
-	fp_mode_addr_indirect_extmode0
-	jra	fp_putdest
-
-fp_extmode1:
-	fp_decode_addr_reg
-	jmp	([0f:w,%pc,%d0*4])
-
-	.align	4
-0:
-	.long	fp_abs_short, fp_abs_long
-	.long	fp_ill, fp_ill
-	.long	fp_ill, fp_ill
-	.long	fp_ill, fp_ill
-
-fp_abs_short:
-	fp_mode_abs_short
-	jra	fp_putdest
-
-fp_abs_long:
-	fp_mode_abs_long
-	jra	fp_putdest
-
-fp_putdest:
-	move.l	%a0,%a1
-	fp_decode_src_reg
-	move.l	%d1,%d2			| save size
-	fp_get_fp_reg
-	printf	PDECODE,"\n"
-	addq.l	#8,%a0
-	move.l	(%a0),-(%sp)
-	move.l	-(%a0),-(%sp)
-	move.l	-(%a0),-(%sp)
-	move.l	%sp,%a0
-	jsr	fp_normalize_ext
-
-	swap	%d2
-	jmp	([0f:w,%pc,%d2.w*4])
-
-	.align	4
-0:
-	.long	fp_format_long, fp_format_single
-	.long	fp_format_extended, fp_format_packed
-	.long	fp_format_word, fp_format_double
-	.long	fp_format_byte, fp_format_packed
-
-fp_format_long:
-	jsr	fp_conv_ext2long
-	putuser.l %d0,(%a1),fp_err_ua1,%a1
-	jra	fp_finish_move
-
-fp_format_single:
-	jsr	fp_conv_ext2single
-	putuser.l %d0,(%a1),fp_err_ua1,%a1
-	jra	fp_finish_move
-
-fp_format_extended:
-	move.l	(%a0)+,%d0
-	lsl.w	#1,%d0
-	lsl.l	#7,%d0
-	lsl.l	#8,%d0
-	putuser.l %d0,(%a1)+,fp_err_ua1,%a1
-	move.l	(%a0)+,%d0
-	putuser.l %d0,(%a1)+,fp_err_ua1,%a1
-	move.l	(%a0),%d0
-	putuser.l %d0,(%a1),fp_err_ua1,%a1
-	jra	fp_finish_move
-
-fp_format_packed:
-	/* not supported yet */
-	lea	(12,%sp),%sp
-	jra	fp_ill
-
-fp_format_word:
-	jsr	fp_conv_ext2short
-	putuser.w %d0,(%a1),fp_err_ua1,%a1
-	jra	fp_finish_move
-
-fp_format_double:
-	jsr	fp_conv_ext2double
-	jra	fp_finish_move
-
-fp_format_byte:
-	jsr	fp_conv_ext2byte
-	putuser.b %d0,(%a1),fp_err_ua1,%a1
-|	jra	fp_finish_move
-
-fp_finish_move:
-	lea	(12,%sp),%sp
-	jra	fp_final
diff --git a/arch/m68k/math-emu/fp_movem.S b/arch/m68k/math-emu/fp_movem.S
deleted file mode 100644
index 8354d39e6c47b31941326e049ea45f799e84ce38..0000000000000000000000000000000000000000
--- a/arch/m68k/math-emu/fp_movem.S
+++ /dev/null
@@ -1,368 +0,0 @@
-/*
- * fp_movem.S
- *
- * Copyright Roman Zippel, 1997.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, and the entire permission notice in its entirety,
- *    including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- *    products derived from this software without specific prior
- *    written permission.
- *
- * ALTERNATIVELY, this product may be distributed under the terms of
- * the GNU General Public License, in which case the provisions of the GPL are
- * required INSTEAD OF the above restrictions.  (This clause is
- * necessary due to a potential bad interaction between the GPL and
- * the restrictions contained in a BSD-style copyright.)
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
- * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "fp_emu.h"
-#include "fp_decode.h"
-
-| set flags for decode macros for fmovem
-do_fmovem=1
-
-	.globl	fp_fmovem_fp, fp_fmovem_cr
-
-| %d1 contains the mask and count of the register list
-| for other register usage see fp_decode.h
-
-fp_fmovem_fp:
-	printf	PDECODE,"fmovem.x "
-	| get register list and count them
-	btst	#11,%d2
-	jne	1f
-	bfextu	%d2{#24,#8},%d0		| static register list
-	jra	2f
-1:	bfextu	%d2{#25,#3},%d0		| dynamic register list
-	jsr	fp_get_data_reg
-2:	move.l	%d0,%d1
-	swap	%d1
-	jra	2f
-1:	addq.w	#1,%d1			| count the # of registers in
-2:	lsr.b	#1,%d0			| register list and keep it in %d1
-	jcs	1b
-	jne	2b
-	printf	PDECODE,"#%08x",1,%d1
-#ifdef FPU_EMU_DEBUG
-	btst	#12,%d2
-	jne	1f
-	printf	PDECODE,"-"		| decremental move
-	jra	2f
-1:	printf	PDECODE,"+"		| incremental move
-2:	btst	#13,%d2
-	jeq	1f
-	printf	PDECODE,"->"		| fpu -> cpu
-	jra	2f
-1:	printf	PDECODE,"<-"		| fpu <- cpu
-2:
-#endif
-
-	| decode address mode
-	fp_decode_addr_mode
-
-	.long	fp_ill, fp_ill
-	.long	fpr_indirect, fpr_postinc
-	.long	fpr_predecr, fpr_disp16
-	.long	fpr_extmode0, fpr_extmode1
-
-	| addressing mode: address register indirect
-fpr_indirect:
-	fp_mode_addr_indirect
-	jra	fpr_do_movem
-
-	| addressing mode: address register indirect with postincrement
-fpr_postinc:
-	fp_mode_addr_indirect_postinc
-	jra	fpr_do_movem
-
-fpr_predecr:
-	fp_mode_addr_indirect_predec
-	jra	fpr_do_movem
-
-	| addressing mode: address register/programm counter indirect
-	|		   with 16bit displacement
-fpr_disp16:
-	fp_mode_addr_indirect_disp16
-	jra	fpr_do_movem
-
-fpr_extmode0:
-	fp_mode_addr_indirect_extmode0
-	jra	fpr_do_movem
-
-fpr_extmode1:
-	fp_decode_addr_reg
-	jmp	([0f:w,%pc,%d0*4])
-
-	.align	4
-0:
-	.long	fpr_absolute_short, fpr_absolute_long
-	.long	fpr_disp16, fpr_extmode0
-	.long	fp_ill, fp_ill
-	.long	fp_ill, fp_ill
-
-fpr_absolute_short:
-	fp_mode_abs_short
-	jra	fpr_do_movem
-
-fpr_absolute_long:
-	fp_mode_abs_long
-|	jra	fpr_do_movem
-
-fpr_do_movem:
-	swap	%d1			| get fpu register list
-	lea	(FPD_FPREG,FPDATA),%a1
-	moveq	#12,%d0
-	btst	#12,%d2
-	jne	1f
-	lea	(-12,%a1,%d0*8),%a1
-	neg.l	%d0
-1:	btst	#13,%d2
-	jne	4f
-	| move register from memory into fpu
-	jra	3f
-1:	printf	PMOVEM,"(%p>%p)",2,%a0,%a1
-	getuser.l (%a0)+,%d2,fp_err_ua1,%a0
-	lsr.l	#8,%d2
-	lsr.l	#7,%d2
-	lsr.w	#1,%d2
-	move.l	%d2,(%a1)+
-	getuser.l (%a0)+,%d2,fp_err_ua1,%a0
-	move.l	%d2,(%a1)+
-	getuser.l (%a0),%d2,fp_err_ua1,%a0
-	move.l	%d2,(%a1)
-	subq.l	#8,%a0
-	subq.l	#8,%a1
-	add.l	%d0,%a0
-2:	add.l	%d0,%a1
-3:	lsl.b	#1,%d1
-	jcs	1b
-	jne	2b
-	jra	5f
-	| move register from fpu into memory
-1:	printf	PMOVEM,"(%p>%p)",2,%a1,%a0
-	move.l	(%a1)+,%d2
-	lsl.w	#1,%d2
-	lsl.l	#7,%d2
-	lsl.l	#8,%d2
-	putuser.l %d2,(%a0)+,fp_err_ua1,%a0
-	move.l	(%a1)+,%d2
-	putuser.l %d2,(%a0)+,fp_err_ua1,%a0
-	move.l	(%a1),%d2
-	putuser.l %d2,(%a0),fp_err_ua1,%a0
-	subq.l	#8,%a1
-	subq.l	#8,%a0
-	add.l	%d0,%a0
-2:	add.l	%d0,%a1
-4:	lsl.b	#1,%d1
-	jcs	1b
-	jne	2b
-5:
-	printf	PDECODE,"\n"
-#if 0
-	lea	(FPD_FPREG,FPDATA),%a0
-	printf	PMOVEM,"fp:"
-	printx	PMOVEM,%a0@(0)
-	printx	PMOVEM,%a0@(12)
-	printf	PMOVEM,"\n   "
-	printx	PMOVEM,%a0@(24)
-	printx	PMOVEM,%a0@(36)
-	printf	PMOVEM,"\n   "
-	printx	PMOVEM,%a0@(48)
-	printx	PMOVEM,%a0@(60)
-	printf	PMOVEM,"\n   "
-	printx	PMOVEM,%a0@(72)
-	printx	PMOVEM,%a0@(84)
-	printf	PMOVEM,"\n"
-#endif
-	jra	fp_end
-
-| set flags for decode macros for fmovem control register
-do_fmovem=1
-do_fmovem_cr=1
-
-fp_fmovem_cr:
-	printf	PDECODE,"fmovem.cr "
-	| get register list and count them
-	bfextu	%d2{#19,#3},%d0
-	move.l	%d0,%d1
-	swap	%d1
-	jra	2f
-1:	addq.w	#1,%d1
-2:	lsr.l	#1,%d0
-	jcs	1b
-	jne	2b
-	printf	PDECODE,"#%08x",1,%d1
-#ifdef FPU_EMU_DEBUG
-	btst	#13,%d2
-	jeq	1f
-	printf	PDECODE,"->"		| fpu -> cpu
-	jra	2f
-1:	printf	PDECODE,"<-"		| fpu <- cpu
-2:
-#endif
-
-	| decode address mode
-	fp_decode_addr_mode
-
-	.long	fpc_data, fpc_addr
-	.long	fpc_indirect, fpc_postinc
-	.long	fpc_predecr, fpc_disp16
-	.long	fpc_extmode0, fpc_extmode1
-
-fpc_data:
-	fp_mode_data_direct
-	move.w	%d0,%d1
-	bfffo	%d2{#19,#3},%d0
-	sub.w	#19,%d0
-	lea	(FPD_FPCR,FPDATA,%d0.w*4),%a1
-	btst	#13,%d2
-	jne	1f
-	move.w	%d1,%d0
-	jsr	fp_get_data_reg
-	move.l	%d0,(%a1)
-	jra	fpc_movem_fin
-1:	move.l	(%a1),%d0
-	jsr	fp_put_data_reg
-	jra	fpc_movem_fin
-
-fpc_addr:
-	fp_decode_addr_reg
-	printf	PDECODE,"a%d",1,%d0
-	btst	#13,%d2
-	jne	1f
-	jsr	fp_get_addr_reg
-	move.l	%a0,(FPD_FPIAR,FPDATA)
-	jra	fpc_movem_fin
-1:	move.l	(FPD_FPIAR,FPDATA),%a0
-	jsr	fp_put_addr_reg
-	jra	fpc_movem_fin
-
-fpc_indirect:
-	fp_mode_addr_indirect
-	jra	fpc_do_movem
-
-fpc_postinc:
-	fp_mode_addr_indirect_postinc
-	jra	fpc_do_movem
-
-fpc_predecr:
-	fp_mode_addr_indirect_predec
-	jra	fpc_do_movem
-
-fpc_disp16:
-	fp_mode_addr_indirect_disp16
-	jra	fpc_do_movem
-
-fpc_extmode0:
-	fp_mode_addr_indirect_extmode0
-	jra	fpc_do_movem
-
-fpc_extmode1:
-	fp_decode_addr_reg
-	jmp	([0f:w,%pc,%d0*4])
-
-	.align	4
-0:
-	.long	fpc_absolute_short, fpc_absolute_long
-	.long	fpc_disp16, fpc_extmode0
-	.long	fpc_immediate, fp_ill
-	.long	fp_ill, fp_ill
-
-fpc_absolute_short:
-	fp_mode_abs_short
-	jra	fpc_do_movem
-
-fpc_absolute_long:
-	fp_mode_abs_long
-	jra	fpc_do_movem
-
-fpc_immediate:
-	fp_get_pc %a0
-	lea	(%a0,%d1.w*4),%a1
-	fp_put_pc %a1
-	printf	PDECODE,"#imm"
-|	jra	fpc_do_movem
-#if 0
-	swap	%d1
-	lsl.l	#5,%d1
-	lea	(FPD_FPCR,FPDATA),%a0
-	jra	3f
-1:	move.l	%d0,(%a0)
-2:	addq.l	#4,%a0
-3:	lsl.b	#1,%d1
-	jcs	1b
-	jne	2b
-	jra	fpc_movem_fin
-#endif
-
-fpc_do_movem:
-	swap	%d1			| get fpu register list
-	lsl.l	#5,%d1
-	lea	(FPD_FPCR,FPDATA),%a1
-1:	btst	#13,%d2
-	jne	4f
-
-	| move register from memory into fpu
-	jra	3f
-1:	printf	PMOVEM,"(%p>%p)",2,%a0,%a1
-	getuser.l (%a0)+,%d0,fp_err_ua1,%a0
-	move.l	%d0,(%a1)
-2:	addq.l	#4,%a1
-3:	lsl.b	#1,%d1
-	jcs	1b
-	jne	2b
-	jra	fpc_movem_fin
-
-	| move register from fpu into memory
-1:	printf	PMOVEM,"(%p>%p)",2,%a1,%a0
-	move.l	(%a1),%d0
-	putuser.l %d0,(%a0)+,fp_err_ua1,%a0
-2:	addq.l	#4,%a1
-4:	lsl.b	#1,%d1
-	jcs	1b
-	jne	2b
-
-fpc_movem_fin:
-	and.l	#0x0000fff0,(FPD_FPCR,FPDATA)
-	and.l	#0x0ffffff8,(FPD_FPSR,FPDATA)
-	move.l	(FPD_FPCR,FPDATA),%d0
-	lsr.l	#4,%d0
-	moveq	#3,%d1
-	and.l	%d0,%d1
-	move.w	%d1,(FPD_RND,FPDATA)
-	lsr.l	#2,%d0
-	moveq	#3,%d1
-	and.l	%d0,%d1
-	move.w	%d1,(FPD_PREC,FPDATA)
-	printf	PDECODE,"\n"
-#if 0
-	printf	PMOVEM,"fpcr : %08x\n",1,FPDATA@(FPD_FPCR)
-	printf	PMOVEM,"fpsr : %08x\n",1,FPDATA@(FPD_FPSR)
-	printf	PMOVEM,"fpiar: %08x\n",1,FPDATA@(FPD_FPIAR)
-	clr.l	%d0
-	move.w	(FPD_PREC,FPDATA),%d0
-	printf	PMOVEM,"prec : %04x\n",1,%d0
-	move.w	(FPD_RND,FPDATA),%d0
-	printf	PMOVEM,"rnd  : %04x\n",1,%d0
-#endif
-	jra	fp_end
diff --git a/arch/m68k/math-emu/fp_scan.S b/arch/m68k/math-emu/fp_scan.S
deleted file mode 100644
index e4146ed574db92c2ffe31734f6f9c71c346a2177..0000000000000000000000000000000000000000
--- a/arch/m68k/math-emu/fp_scan.S
+++ /dev/null
@@ -1,478 +0,0 @@
-/*
- * fp_scan.S
- *
- * Copyright Roman Zippel, 1997.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, and the entire permission notice in its entirety,
- *    including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- *    products derived from this software without specific prior
- *    written permission.
- *
- * ALTERNATIVELY, this product may be distributed under the terms of
- * the GNU General Public License, in which case the provisions of the GPL are
- * required INSTEAD OF the above restrictions.  (This clause is
- * necessary due to a potential bad interaction between the GPL and
- * the restrictions contained in a BSD-style copyright.)
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
- * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "fp_emu.h"
-#include "fp_decode.h"
-
-	.globl	fp_scan, fp_datasize
-
-	.data
-
-| %d2 - first two instr words
-| %d1 - operand size
-
-/* operand formats are:
-
-	Long = 0,		i.e. fmove.l
-	Single,			i.e. fmove.s
-	Extended,		i.e. fmove.x
-	Packed-BCD,		i.e. fmove.p
-	Word,			i.e. fmove.w
-	Double,			i.e. fmove.d
-*/
-
-	.text
-
-| On entry:
-| FPDATA - base of emulated FPU registers
-
-fp_scan:
-| normal fpu instruction? (this excludes fsave/frestore)
-	fp_get_pc %a0
-	printf	PDECODE,"%08x: ",1,%a0
-	getuser.b (%a0),%d0,fp_err_ua1,%a0
-#if 1
-	cmp.b	#0xf2,%d0		| cpid = 1
-#else
-	cmp.b	#0xfc,%d0		| cpid = 6
-#endif
-	jne	fp_nonstd
-| first two instruction words are kept in %d2
-	getuser.l (%a0)+,%d2,fp_err_ua1,%a0
-	fp_put_pc %a0
-fp_decode_cond:				| separate conditional instr
-	fp_decode_cond_instr_type
-
-	.long	fp_decode_move, fp_fscc
-	.long	fp_fbccw, fp_fbccl
-
-fp_decode_move:				| separate move instr
-	fp_decode_move_instr_type
-
-	.long	fp_fgen_fp, fp_ill
-	.long	fp_fgen_ea, fp_fmove_fp2mem
-	.long	fp_fmovem_cr, fp_fmovem_cr
-	.long	fp_fmovem_fp, fp_fmovem_fp
-
-| now all arithmetic instr and a few move instr are left
-fp_fgen_fp:				| source is a fpu register
-	clr.b	(FPD_FPSR+2,FPDATA)	| clear the exception byte
-	fp_decode_sourcespec
-	printf	PDECODE,"f<op>.x fp%d",1,%d0
-	fp_get_fp_reg
-	lea	(FPD_TEMPFP1,FPDATA),%a1 | copy src into a temp location
-	move.l	(%a0)+,(%a1)+
-	move.l	(%a0)+,(%a1)+
-	move.l	(%a0),(%a1)
-	lea	(-8,%a1),%a0
-	jra	fp_getdest
-
-fp_fgen_ea:				| source is <ea>
-	clr.b	(FPD_FPSR+2,FPDATA)	| clear the exception byte
-	| sort out fmovecr, keep data size in %d1
-	fp_decode_sourcespec
-	cmp.w	#7,%d0
-	jeq	fp_fmovecr
-	move.w	%d0,%d1			| store data size twice in %d1
-	swap	%d1			| one can be trashed below
-	move.w	%d0,%d1
-#ifdef FPU_EMU_DEBUG
-	lea	0f,%a0
-	clr.l	%d0
-	move.b	(%a0,%d1.w),%d0
-	printf	PDECODE,"f<op>.%c ",1,%d0
-
-	.data
-0:	.byte	'l','s','x','p','w','d','b',0
-	.previous
-#endif
-
-/*
-	fp_getsource, fp_getdest
-
-	basically, we end up with a pointer to the source operand in
-	%a1, and a pointer to the destination operand in %a0.  both
-	are, of course, 96-bit extended floating point numbers.
-*/
-
-fp_getsource:
-	| decode addressing mode for source
-	fp_decode_addr_mode
-
-	.long	fp_data, fp_ill
-	.long	fp_indirect, fp_postinc
-	.long	fp_predecr, fp_disp16
-	.long	fp_extmode0, fp_extmode1
-
-	| addressing mode: data register direct
-fp_data:
-	fp_mode_data_direct
-	jsr	fp_get_data_reg
-	lea	(FPD_TEMPFP1,FPDATA),%a0
-	jmp	([0f:w,%pc,%d1.w*4])
-
-	.align	4
-0:
-	.long	fp_data_long, fp_data_single
-	.long	fp_ill, fp_ill
-	.long	fp_data_word, fp_ill
-	.long	fp_data_byte, fp_ill
-
-	| data types that fit in an integer data register
-fp_data_byte:
-	extb.l	%d0
-	jra	fp_data_long
-
-fp_data_word:
-	ext.l	%d0
-
-fp_data_long:
-	jsr	fp_conv_long2ext
-	jra	fp_getdest
-
-fp_data_single:
-	jsr	fp_conv_single2ext
-	jra	fp_getdest
-
-	| addressing mode: address register indirect
-fp_indirect:
-	fp_mode_addr_indirect
-	jra	fp_fetchsource
-
-	| addressing mode: address register indirect with postincrement
-fp_postinc:
-	fp_mode_addr_indirect_postinc
-	jra	fp_fetchsource
-
-	| addressing mode: address register indirect with predecrement
-fp_predecr:
-	fp_mode_addr_indirect_predec
-	jra	fp_fetchsource
-
-	| addressing mode: address register/programm counter indirect
-	|		   with 16bit displacement
-fp_disp16:
-	fp_mode_addr_indirect_disp16
-	jra	fp_fetchsource
-
-	| all other indirect addressing modes will finally end up here
-fp_extmode0:
-	fp_mode_addr_indirect_extmode0
-	jra	fp_fetchsource
-
-| all pc relative addressing modes and immediate/absolute modes end up here
-| the first ones are sent to fp_extmode0 or fp_disp16
-| and only the latter are handled here
-fp_extmode1:
-	fp_decode_addr_reg
-	jmp	([0f:w,%pc,%d0*4])
-
-	.align	4
-0:
-	.long	fp_abs_short, fp_abs_long
-	.long	fp_disp16, fp_extmode0
-	.long	fp_immediate, fp_ill
-	.long	fp_ill, fp_ill
-
-	| addressing mode: absolute short
-fp_abs_short:
-	fp_mode_abs_short
-	jra	fp_fetchsource
-
-	| addressing mode: absolute long
-fp_abs_long:
-	fp_mode_abs_long
-	jra	fp_fetchsource
-
-	| addressing mode: immediate data
-fp_immediate:
-	printf	PDECODE,"#"
-	fp_get_pc %a0
-	move.w	(fp_datasize,%d1.w*2),%d0
-	addq.w	#1,%d0
-	and.w	#-2,%d0
-#ifdef FPU_EMU_DEBUG
-	movem.l	%d0/%d1,-(%sp)
-	movel	%a0,%a1
-	clr.l	%d1
-	jra	2f
-1:	getuser.b (%a1)+,%d1,fp_err_ua1,%a1
-	printf	PDECODE,"%02x",1,%d1
-2:	dbra	%d0,1b
-	movem.l	(%sp)+,%d0/%d1
-#endif
-	lea	(%a0,%d0.w),%a1
-	fp_put_pc %a1
-|	jra	fp_fetchsource
-
-fp_fetchsource:
-	move.l	%a0,%a1
-	swap	%d1
-	lea	(FPD_TEMPFP1,FPDATA),%a0
-	jmp	([0f:w,%pc,%d1.w*4])
-
-	.align	4
-0:	.long	fp_long, fp_single
-	.long	fp_ext, fp_pack
-	.long	fp_word, fp_double
-	.long	fp_byte, fp_ill
-
-fp_long:
-	getuser.l (%a1),%d0,fp_err_ua1,%a1
-	jsr	fp_conv_long2ext
-	jra	fp_getdest
-
-fp_single:
-	getuser.l (%a1),%d0,fp_err_ua1,%a1
-	jsr	fp_conv_single2ext
-	jra	fp_getdest
-
-fp_ext:
-	getuser.l (%a1)+,%d0,fp_err_ua1,%a1
-	lsr.l	#8,%d0
-	lsr.l	#7,%d0
-	lsr.w	#1,%d0
-	move.l	%d0,(%a0)+
-	getuser.l (%a1)+,%d0,fp_err_ua1,%a1
-	move.l	%d0,(%a0)+
-	getuser.l (%a1),%d0,fp_err_ua1,%a1
-	move.l	%d0,(%a0)
-	subq.l	#8,%a0
-	jra	fp_getdest
-
-fp_pack:
-	/* not supported yet */
-	jra	fp_ill
-
-fp_word:
-	getuser.w (%a1),%d0,fp_err_ua1,%a1
-	ext.l	%d0
-	jsr	fp_conv_long2ext
-	jra	fp_getdest
-
-fp_double:
-	jsr	fp_conv_double2ext
-	jra	fp_getdest
-
-fp_byte:
-	getuser.b (%a1),%d0,fp_err_ua1,%a1
-	extb.l	%d0
-	jsr	fp_conv_long2ext
-|	jra	fp_getdest
-
-fp_getdest:
-	move.l	%a0,%a1
-	bfextu	%d2{#22,#3},%d0
-	printf	PDECODE,",fp%d\n",1,%d0
-	fp_get_fp_reg
-	movem.l	%a0/%a1,-(%sp)
-	pea	fp_finalrounding
-	bfextu	%d2{#25,#7},%d0
-	jmp	([0f:w,%pc,%d0*4])
-
-	.align	4
-0:
-	.long	fp_fmove_mem2fp, fp_fint, fp_fsinh, fp_fintrz
-	.long	fp_fsqrt, fp_ill, fp_flognp1, fp_ill
-	.long	fp_fetoxm1, fp_ftanh, fp_fatan, fp_ill
-	.long	fp_fasin, fp_fatanh, fp_fsin, fp_ftan
-	.long	fp_fetox, fp_ftwotox, fp_ftentox, fp_ill
-	.long	fp_flogn, fp_flog10, fp_flog2, fp_ill
-	.long	fp_fabs, fp_fcosh, fp_fneg, fp_ill
-	.long	fp_facos, fp_fcos, fp_fgetexp, fp_fgetman
-	.long	fp_fdiv, fp_fmod, fp_fadd, fp_fmul
-	.long	fpa_fsgldiv, fp_frem, fp_fscale, fpa_fsglmul
-	.long	fp_fsub, fp_ill, fp_ill, fp_ill
-	.long	fp_ill, fp_ill, fp_ill, fp_ill
-	.long	fp_fsincos0, fp_fsincos1, fp_fsincos2, fp_fsincos3
-	.long	fp_fsincos4, fp_fsincos5, fp_fsincos6, fp_fsincos7
-	.long	fp_fcmp, fp_ill, fp_ftst, fp_ill
-	.long	fp_ill, fp_ill, fp_ill, fp_ill
-	.long	fp_fsmove, fp_fssqrt, fp_ill, fp_ill
-	.long	fp_fdmove, fp_fdsqrt, fp_ill, fp_ill
-	.long	fp_ill, fp_ill, fp_ill, fp_ill
-	.long	fp_ill, fp_ill, fp_ill, fp_ill
-	.long	fp_ill, fp_ill, fp_ill, fp_ill
-	.long	fp_ill, fp_ill, fp_ill, fp_ill
-	.long	fp_fsabs, fp_ill, fp_fsneg, fp_ill
-	.long	fp_fdabs, fp_ill, fp_fdneg, fp_ill
-	.long	fp_fsdiv, fp_ill, fp_fsadd, fp_fsmul
-	.long	fp_fddiv, fp_ill, fp_fdadd, fp_fdmul
-	.long	fp_fssub, fp_ill, fp_ill, fp_ill
-	.long	fp_fdsub, fp_ill, fp_ill, fp_ill
-	.long	fp_ill, fp_ill, fp_ill, fp_ill
-	.long	fp_ill, fp_ill, fp_ill, fp_ill
-	.long	fp_ill, fp_ill, fp_ill, fp_ill
-	.long	fp_ill, fp_ill, fp_ill, fp_ill
-
-	| Instructions follow
-
-	| Move an (emulated) ROM constant
-fp_fmovecr:
-	bfextu	%d2{#27,#5},%d0
-	printf	PINSTR,"fp_fmovecr #%d",1,%d0
-	move.l	%d0,%d1
-	add.l	%d0,%d0
-	add.l	%d1,%d0
-	lea	(fp_constants,%d0*4),%a0
-	move.l	#0x801cc0ff,%d0
-	addq.l	#1,%d1
-	lsl.l	%d1,%d0
-	jcc	1f
-	fp_set_sr FPSR_EXC_INEX2			| INEX2 exception
-1:	moveq	#-128,%d0				| continue with fmove
-	and.l	%d0,%d2
-	jra	fp_getdest
-
-	.data
-	.align	4
-fp_constants:
-	.long	0x00004000,0xc90fdaa2,0x2168c235	| pi
-	.extend	0,0,0,0,0,0,0,0,0,0
-	.long	0x00003ffd,0x9a209a84,0xfbcff798	| log10(2)
-	.long	0x00004000,0xadf85458,0xa2bb4a9a	| e
-	.long	0x00003fff,0xb8aa3b29,0x5c17f0bc	| log2(e)
-	.long	0x00003ffd,0xde5bd8a9,0x37287195	| log10(e)
-	.long	0x00000000,0x00000000,0x00000000	| 0.0
-	.long	0x00003ffe,0xb17217f7,0xd1cf79ac	| 1n(2)
-	.long	0x00004000,0x935d8ddd,0xaaa8ac17	| 1n(10)
-	| read this as "1.0 * 2^0" - note the high bit in the mantissa
-	.long	0x00003fff,0x80000000,0x00000000	| 10^0
-	.long	0x00004002,0xa0000000,0x00000000	| 10^1
-	.long	0x00004005,0xc8000000,0x00000000	| 10^2
-	.long	0x0000400c,0x9c400000,0x00000000	| 10^4
-	.long	0x00004019,0xbebc2000,0x00000000	| 10^8
-	.long	0x00004034,0x8e1bc9bf,0x04000000	| 10^16
-	.long	0x00004069,0x9dc5ada8,0x2b70b59e	| 10^32
-	.long	0x000040d3,0xc2781f49,0xffcfa6d5	| 10^64
-	.long	0x000041a8,0x93ba47c9,0x80e98ce0	| 10^128
-	.long	0x00004351,0xaa7eebfb,0x9df9de8e	| 10^256
-	.long	0x000046a3,0xe319a0ae,0xa60e91c7	| 10^512
-	.long	0x00004d48,0xc9767586,0x81750c17	| 10^1024
-	.long	0x00005a92,0x9e8b3b5d,0xc53d5de5	| 10^2048
-	.long	0x00007525,0xc4605202,0x8a20979b	| 10^4096
-	.previous
-
-fp_fmove_mem2fp:
-	printf	PINSTR,"fmove %p,%p\n",2,%a0,%a1
-	move.l	(%a1)+,(%a0)+
-	move.l	(%a1)+,(%a0)+
-	move.l	(%a1),(%a0)
-	subq.l	#8,%a0
-	rts
-
-fpa_fsglmul:
-	move.l	#fp_finalrounding_single_fast,(%sp)
-	jra	fp_fsglmul
-
-fpa_fsgldiv:
-	move.l	#fp_finalrounding_single_fast,(%sp)
-	jra	fp_fsgldiv
-
-.macro	fp_dosingleprec instr
-	printf	PINSTR,"single "
-	move.l	#fp_finalrounding_single,(%sp)
-	jra	\instr
-.endm
-
-.macro	fp_dodoubleprec instr
-	printf	PINSTR,"double "
-	move.l	#fp_finalrounding_double,(%sp)
-	jra	\instr
-.endm
-
-fp_fsmove:
-	fp_dosingleprec fp_fmove_mem2fp
-
-fp_fssqrt:
-	fp_dosingleprec fp_fsqrt
-
-fp_fdmove:
-	fp_dodoubleprec fp_fmove_mem2fp
-
-fp_fdsqrt:
-	fp_dodoubleprec fp_fsqrt
-
-fp_fsabs:
-	fp_dosingleprec fp_fabs
-
-fp_fsneg:
-	fp_dosingleprec fp_fneg
-
-fp_fdabs:
-	fp_dodoubleprec fp_fabs
-
-fp_fdneg:
-	fp_dodoubleprec fp_fneg
-
-fp_fsdiv:
-	fp_dosingleprec fp_fdiv
-
-fp_fsadd:
-	fp_dosingleprec fp_fadd
-
-fp_fsmul:
-	fp_dosingleprec fp_fmul
-
-fp_fddiv:
-	fp_dodoubleprec fp_fdiv
-
-fp_fdadd:
-	fp_dodoubleprec fp_fadd
-
-fp_fdmul:
-	fp_dodoubleprec fp_fmul
-
-fp_fssub:
-	fp_dosingleprec fp_fsub
-
-fp_fdsub:
-	fp_dodoubleprec fp_fsub
-
-fp_nonstd:
-	fp_get_pc %a0
-	getuser.l (%a0),%d0,fp_err_ua1,%a0
-	printf	,"nonstd ((%08x)=%08x)\n",2,%a0,%d0
-	moveq	#-1,%d0
-	rts
-
-	.data
-	.align	4
-
-	| data sizes corresponding to the operand formats
-fp_datasize:
-	.word	4, 4, 12, 12, 2, 8, 1, 0
diff --git a/arch/m68k/math-emu/fp_util.S b/arch/m68k/math-emu/fp_util.S
deleted file mode 100644
index b093b85fcdd2b1c46dde2c205b6dfe6415929baa..0000000000000000000000000000000000000000
--- a/arch/m68k/math-emu/fp_util.S
+++ /dev/null
@@ -1,1454 +0,0 @@
-/*
- * fp_util.S
- *
- * Copyright Roman Zippel, 1997.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, and the entire permission notice in its entirety,
- *    including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- *    products derived from this software without specific prior
- *    written permission.
- *
- * ALTERNATIVELY, this product may be distributed under the terms of
- * the GNU General Public License, in which case the provisions of the GPL are
- * required INSTEAD OF the above restrictions.  (This clause is
- * necessary due to a potential bad interaction between the GPL and
- * the restrictions contained in a BSD-style copyright.)
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
- * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "fp_emu.h"
-
-/*
- * Here are lots of conversion and normalization functions mainly
- * used by fp_scan.S
- * Note that these functions are optimized for "normal" numbers,
- * these are handled first and exit as fast as possible, this is
- * especially important for fp_normalize_ext/fp_conv_ext2ext, as
- * it's called very often.
- * The register usage is optimized for fp_scan.S and which register
- * is currently at that time unused, be careful if you want change
- * something here. %d0 and %d1 is always usable, sometimes %d2 (or
- * only the lower half) most function have to return the %a0
- * unmodified, so that the caller can immediately reuse it.
- */
-
-	.globl	fp_ill, fp_end
-
-	| exits from fp_scan:
-	| illegal instruction
-fp_ill:
-	printf	,"fp_illegal\n"
-	rts
-	| completed instruction
-fp_end:
-	tst.l	(TASK_MM-8,%a2)
-	jmi	1f
-	tst.l	(TASK_MM-4,%a2)
-	jmi	1f
-	tst.l	(TASK_MM,%a2)
-	jpl	2f
-1:	printf	,"oops:%p,%p,%p\n",3,%a2@(TASK_MM-8),%a2@(TASK_MM-4),%a2@(TASK_MM)
-2:	clr.l	%d0
-	rts
-
-	.globl	fp_conv_long2ext, fp_conv_single2ext
-	.globl	fp_conv_double2ext, fp_conv_ext2ext
-	.globl	fp_normalize_ext, fp_normalize_double
-	.globl	fp_normalize_single, fp_normalize_single_fast
-	.globl	fp_conv_ext2double, fp_conv_ext2single
-	.globl	fp_conv_ext2long, fp_conv_ext2short
-	.globl	fp_conv_ext2byte
-	.globl	fp_finalrounding_single, fp_finalrounding_single_fast
-	.globl	fp_finalrounding_double
-	.globl	fp_finalrounding, fp_finaltest, fp_final
-
-/*
- * First several conversion functions from a source operand
- * into the extended format. Note, that only fp_conv_ext2ext
- * normalizes the number and is always called after the other
- * conversion functions, which only move the information into
- * fp_ext structure.
- */
-
-	| fp_conv_long2ext:
-	|
-	| args:	%d0 = source (32-bit long)
-	|	%a0 = destination (ptr to struct fp_ext)
-
-fp_conv_long2ext:
-	printf	PCONV,"l2e: %p -> %p(",2,%d0,%a0
-	clr.l	%d1			| sign defaults to zero
-	tst.l	%d0
-	jeq	fp_l2e_zero		| is source zero?
-	jpl	1f			| positive?
-	moveq	#1,%d1
-	neg.l	%d0
-1:	swap	%d1
-	move.w	#0x3fff+31,%d1
-	move.l	%d1,(%a0)+		| set sign / exp
-	move.l	%d0,(%a0)+		| set mantissa
-	clr.l	(%a0)
-	subq.l	#8,%a0			| restore %a0
-	printx	PCONV,%a0@
-	printf	PCONV,")\n"
-	rts
-	| source is zero
-fp_l2e_zero:
-	clr.l	(%a0)+
-	clr.l	(%a0)+
-	clr.l	(%a0)
-	subq.l	#8,%a0
-	printx	PCONV,%a0@
-	printf	PCONV,")\n"
-	rts
-
-	| fp_conv_single2ext
-	| args:	%d0 = source (single-precision fp value)
-	|	%a0 = dest (struct fp_ext *)
-
-fp_conv_single2ext:
-	printf	PCONV,"s2e: %p -> %p(",2,%d0,%a0
-	move.l	%d0,%d1
-	lsl.l	#8,%d0			| shift mantissa
-	lsr.l	#8,%d1			| exponent / sign
-	lsr.l	#7,%d1
-	lsr.w	#8,%d1
-	jeq	fp_s2e_small		| zero / denormal?
-	cmp.w	#0xff,%d1		| NaN / Inf?
-	jeq	fp_s2e_large
-	bset	#31,%d0			| set explizit bit
-	add.w	#0x3fff-0x7f,%d1	| re-bias the exponent.
-9:	move.l	%d1,(%a0)+		| fp_ext.sign, fp_ext.exp
-	move.l	%d0,(%a0)+		| high lword of fp_ext.mant
-	clr.l	(%a0)			| low lword = 0
-	subq.l	#8,%a0
-	printx	PCONV,%a0@
-	printf	PCONV,")\n"
-	rts
-	| zeros and denormalized
-fp_s2e_small:
-	| exponent is zero, so explizit bit is already zero too
-	tst.l	%d0
-	jeq	9b
-	move.w	#0x4000-0x7f,%d1
-	jra	9b
-	| infinities and NAN
-fp_s2e_large:
-	bclr	#31,%d0			| clear explizit bit
-	move.w	#0x7fff,%d1
-	jra	9b
-
-fp_conv_double2ext:
-#ifdef FPU_EMU_DEBUG
-	getuser.l %a1@(0),%d0,fp_err_ua2,%a1
-	getuser.l %a1@(4),%d1,fp_err_ua2,%a1
-	printf	PCONV,"d2e: %p%p -> %p(",3,%d0,%d1,%a0
-#endif
-	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
-	move.l	%d0,%d1
-	lsl.l	#8,%d0			| shift high mantissa
-	lsl.l	#3,%d0
-	lsr.l	#8,%d1			| exponent / sign
-	lsr.l	#7,%d1
-	lsr.w	#5,%d1
-	jeq	fp_d2e_small		| zero / denormal?
-	cmp.w	#0x7ff,%d1		| NaN / Inf?
-	jeq	fp_d2e_large
-	bset	#31,%d0			| set explizit bit
-	add.w	#0x3fff-0x3ff,%d1	| re-bias the exponent.
-9:	move.l	%d1,(%a0)+		| fp_ext.sign, fp_ext.exp
-	move.l	%d0,(%a0)+
-	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
-	move.l	%d0,%d1
-	lsl.l	#8,%d0
-	lsl.l	#3,%d0
-	move.l	%d0,(%a0)
-	moveq	#21,%d0
-	lsr.l	%d0,%d1
-	or.l	%d1,-(%a0)
-	subq.l	#4,%a0
-	printx	PCONV,%a0@
-	printf	PCONV,")\n"
-	rts
-	| zeros and denormalized
-fp_d2e_small:
-	| exponent is zero, so explizit bit is already zero too
-	tst.l	%d0
-	jeq	9b
-	move.w	#0x4000-0x3ff,%d1
-	jra	9b
-	| infinities and NAN
-fp_d2e_large:
-	bclr	#31,%d0			| clear explizit bit
-	move.w	#0x7fff,%d1
-	jra	9b
-
-	| fp_conv_ext2ext:
-	| originally used to get longdouble from userspace, now it's
-	| called before arithmetic operations to make sure the number
-	| is normalized [maybe rename it?].
-	| args:	%a0 = dest (struct fp_ext *)
-	| returns 0 in %d0 for a NaN, otherwise 1
-
-fp_conv_ext2ext:
-	printf	PCONV,"e2e: %p(",1,%a0
-	printx	PCONV,%a0@
-	printf	PCONV,"), "
-	move.l	(%a0)+,%d0
-	cmp.w	#0x7fff,%d0		| Inf / NaN?
-	jeq	fp_e2e_large
-	move.l	(%a0),%d0
-	jpl	fp_e2e_small		| zero / denorm?
-	| The high bit is set, so normalization is irrelevant.
-fp_e2e_checkround:
-	subq.l	#4,%a0
-#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
-	move.b	(%a0),%d0
-	jne	fp_e2e_round
-#endif
-	printf	PCONV,"%p(",1,%a0
-	printx	PCONV,%a0@
-	printf	PCONV,")\n"
-	moveq	#1,%d0
-	rts
-#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
-fp_e2e_round:
-	fp_set_sr FPSR_EXC_INEX2
-	clr.b	(%a0)
-	move.w	(FPD_RND,FPDATA),%d2
-	jne	fp_e2e_roundother	| %d2 == 0, round to nearest
-	tst.b	%d0			| test guard bit
-	jpl	9f			| zero is closer
-	btst	#0,(11,%a0)		| test lsb bit
-	jne	fp_e2e_doroundup	| round to infinity
-	lsl.b	#1,%d0			| check low bits
-	jeq	9f			| round to zero
-fp_e2e_doroundup:
-	addq.l	#1,(8,%a0)
-	jcc	9f
-	addq.l	#1,(4,%a0)
-	jcc	9f
-	move.w	#0x8000,(4,%a0)
-	addq.w	#1,(2,%a0)
-9:	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts
-fp_e2e_roundother:
-	subq.w	#2,%d2
-	jcs	9b			| %d2 < 2, round to zero
-	jhi	1f			| %d2 > 2, round to +infinity
-	tst.b	(1,%a0)			| to -inf
-	jne	fp_e2e_doroundup	| negative, round to infinity
-	jra	9b			| positive, round to zero
-1:	tst.b	(1,%a0)			| to +inf
-	jeq	fp_e2e_doroundup	| positive, round to infinity
-	jra	9b			| negative, round to zero
-#endif
-	| zeros and subnormals:
-	| try to normalize these anyway.
-fp_e2e_small:
-	jne	fp_e2e_small1		| high lword zero?
-	move.l	(4,%a0),%d0
-	jne	fp_e2e_small2
-#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
-	clr.l	%d0
-	move.b	(-4,%a0),%d0
-	jne	fp_e2e_small3
-#endif
-	| Genuine zero.
-	clr.w	-(%a0)
-	subq.l	#2,%a0
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	moveq	#1,%d0
-	rts
-	| definitely subnormal, need to shift all 64 bits
-fp_e2e_small1:
-	bfffo	%d0{#0,#32},%d1
-	move.w	-(%a0),%d2
-	sub.w	%d1,%d2
-	jcc	1f
-	| Pathologically small, denormalize.
-	add.w	%d2,%d1
-	clr.w	%d2
-1:	move.w	%d2,(%a0)+
-	move.w	%d1,%d2
-	jeq	fp_e2e_checkround
-	| fancy 64-bit double-shift begins here
-	lsl.l	%d2,%d0
-	move.l	%d0,(%a0)+
-	move.l	(%a0),%d0
-	move.l	%d0,%d1
-	lsl.l	%d2,%d0
-	move.l	%d0,(%a0)
-	neg.w	%d2
-	and.w	#0x1f,%d2
-	lsr.l	%d2,%d1
-	or.l	%d1,-(%a0)
-#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
-fp_e2e_extra1:
-	clr.l	%d0
-	move.b	(-4,%a0),%d0
-	neg.w	%d2
-	add.w	#24,%d2
-	jcc	1f
-	clr.b	(-4,%a0)
-	lsl.l	%d2,%d0
-	or.l	%d0,(4,%a0)
-	jra	fp_e2e_checkround
-1:	addq.w	#8,%d2
-	lsl.l	%d2,%d0
-	move.b	%d0,(-4,%a0)
-	lsr.l	#8,%d0
-	or.l	%d0,(4,%a0)
-#endif
-	jra	fp_e2e_checkround
-	| pathologically small subnormal
-fp_e2e_small2:
-	bfffo	%d0{#0,#32},%d1
-	add.w	#32,%d1
-	move.w	-(%a0),%d2
-	sub.w	%d1,%d2
-	jcc	1f
-	| Beyond pathologically small, denormalize.
-	add.w	%d2,%d1
-	clr.w	%d2
-1:	move.w	%d2,(%a0)+
-	ext.l	%d1
-	jeq	fp_e2e_checkround
-	clr.l	(4,%a0)
-	sub.w	#32,%d2
-	jcs	1f
-	lsl.l	%d1,%d0			| lower lword needs only to be shifted
-	move.l	%d0,(%a0)		| into the higher lword
-#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
-	clr.l	%d0
-	move.b	(-4,%a0),%d0
-	clr.b	(-4,%a0)
-	neg.w	%d1
-	add.w	#32,%d1
-	bfins	%d0,(%a0){%d1,#8}
-#endif
-	jra	fp_e2e_checkround
-1:	neg.w	%d1			| lower lword is splitted between
-	bfins	%d0,(%a0){%d1,#32}	| higher and lower lword
-#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
-	jra	fp_e2e_checkround
-#else
-	move.w	%d1,%d2
-	jra	fp_e2e_extra1
-	| These are extremely small numbers, that will mostly end up as zero
-	| anyway, so this is only important for correct rounding.
-fp_e2e_small3:
-	bfffo	%d0{#24,#8},%d1
-	add.w	#40,%d1
-	move.w	-(%a0),%d2
-	sub.w	%d1,%d2
-	jcc	1f
-	| Pathologically small, denormalize.
-	add.w	%d2,%d1
-	clr.w	%d2
-1:	move.w	%d2,(%a0)+
-	ext.l	%d1
-	jeq	fp_e2e_checkround
-	cmp.w	#8,%d1
-	jcs	2f
-1:	clr.b	(-4,%a0)
-	sub.w	#64,%d1
-	jcs	1f
-	add.w	#24,%d1
-	lsl.l	%d1,%d0
-	move.l	%d0,(%a0)
-	jra	fp_e2e_checkround
-1:	neg.w	%d1
-	bfins	%d0,(%a0){%d1,#8}
-	jra	fp_e2e_checkround
-2:	lsl.l	%d1,%d0
-	move.b	%d0,(-4,%a0)
-	lsr.l	#8,%d0
-	move.b	%d0,(7,%a0)
-	jra	fp_e2e_checkround
-#endif
-1:	move.l	%d0,%d1			| lower lword is splitted between
-	lsl.l	%d2,%d0			| higher and lower lword
-	move.l	%d0,(%a0)
-	move.l	%d1,%d0
-	neg.w	%d2
-	add.w	#32,%d2
-	lsr.l	%d2,%d0
-	move.l	%d0,-(%a0)
-	jra	fp_e2e_checkround
-	| Infinities and NaNs
-fp_e2e_large:
-	move.l	(%a0)+,%d0
-	jne	3f
-1:	tst.l	(%a0)
-	jne	4f
-	moveq	#1,%d0
-2:	subq.l	#8,%a0
-	printf	PCONV,"%p(",1,%a0
-	printx	PCONV,%a0@
-	printf	PCONV,")\n"
-	rts
-	| we have maybe a NaN, shift off the highest bit
-3:	lsl.l	#1,%d0
-	jeq	1b
-	| we have a NaN, clear the return value
-4:	clrl	%d0
-	jra	2b
-
-
-/*
- * Normalization functions.  Call these on the output of general
- * FP operators, and before any conversion into the destination
- * formats. fp_normalize_ext has always to be called first, the
- * following conversion functions expect an already normalized
- * number.
- */
-
-	| fp_normalize_ext:
-	| normalize an extended in extended (unpacked) format, basically
-	| it does the same as fp_conv_ext2ext, additionally it also does
-	| the necessary postprocessing checks.
-	| args:	%a0 (struct fp_ext *)
-	| NOTE: it does _not_ modify %a0/%a1 and the upper word of %d2
-
-fp_normalize_ext:
-	printf	PNORM,"ne: %p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,"), "
-	move.l	(%a0)+,%d0
-	cmp.w	#0x7fff,%d0		| Inf / NaN?
-	jeq	fp_ne_large
-	move.l	(%a0),%d0
-	jpl	fp_ne_small		| zero / denorm?
-	| The high bit is set, so normalization is irrelevant.
-fp_ne_checkround:
-	subq.l	#4,%a0
-#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
-	move.b	(%a0),%d0
-	jne	fp_ne_round
-#endif
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts
-#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
-fp_ne_round:
-	fp_set_sr FPSR_EXC_INEX2
-	clr.b	(%a0)
-	move.w	(FPD_RND,FPDATA),%d2
-	jne	fp_ne_roundother	| %d2 == 0, round to nearest
-	tst.b	%d0			| test guard bit
-	jpl	9f			| zero is closer
-	btst	#0,(11,%a0)		| test lsb bit
-	jne	fp_ne_doroundup		| round to infinity
-	lsl.b	#1,%d0			| check low bits
-	jeq	9f			| round to zero
-fp_ne_doroundup:
-	addq.l	#1,(8,%a0)
-	jcc	9f
-	addq.l	#1,(4,%a0)
-	jcc	9f
-	addq.w	#1,(2,%a0)
-	move.w	#0x8000,(4,%a0)
-9:	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts
-fp_ne_roundother:
-	subq.w	#2,%d2
-	jcs	9b			| %d2 < 2, round to zero
-	jhi	1f			| %d2 > 2, round to +infinity
-	tst.b	(1,%a0)			| to -inf
-	jne	fp_ne_doroundup		| negative, round to infinity
-	jra	9b			| positive, round to zero
-1:	tst.b	(1,%a0)			| to +inf
-	jeq	fp_ne_doroundup		| positive, round to infinity
-	jra	9b			| negative, round to zero
-#endif
-	| Zeros and subnormal numbers
-	| These are probably merely subnormal, rather than "denormalized"
-	|  numbers, so we will try to make them normal again.
-fp_ne_small:
-	jne	fp_ne_small1		| high lword zero?
-	move.l	(4,%a0),%d0
-	jne	fp_ne_small2
-#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
-	clr.l	%d0
-	move.b	(-4,%a0),%d0
-	jne	fp_ne_small3
-#endif
-	| Genuine zero.
-	clr.w	-(%a0)
-	subq.l	#2,%a0
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts
-	| Subnormal.
-fp_ne_small1:
-	bfffo	%d0{#0,#32},%d1
-	move.w	-(%a0),%d2
-	sub.w	%d1,%d2
-	jcc	1f
-	| Pathologically small, denormalize.
-	add.w	%d2,%d1
-	clr.w	%d2
-	fp_set_sr FPSR_EXC_UNFL
-1:	move.w	%d2,(%a0)+
-	move.w	%d1,%d2
-	jeq	fp_ne_checkround
-	| This is exactly the same 64-bit double shift as seen above.
-	lsl.l	%d2,%d0
-	move.l	%d0,(%a0)+
-	move.l	(%a0),%d0
-	move.l	%d0,%d1
-	lsl.l	%d2,%d0
-	move.l	%d0,(%a0)
-	neg.w	%d2
-	and.w	#0x1f,%d2
-	lsr.l	%d2,%d1
-	or.l	%d1,-(%a0)
-#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
-fp_ne_extra1:
-	clr.l	%d0
-	move.b	(-4,%a0),%d0
-	neg.w	%d2
-	add.w	#24,%d2
-	jcc	1f
-	clr.b	(-4,%a0)
-	lsl.l	%d2,%d0
-	or.l	%d0,(4,%a0)
-	jra	fp_ne_checkround
-1:	addq.w	#8,%d2
-	lsl.l	%d2,%d0
-	move.b	%d0,(-4,%a0)
-	lsr.l	#8,%d0
-	or.l	%d0,(4,%a0)
-#endif
-	jra	fp_ne_checkround
-	| May or may not be subnormal, if so, only 32 bits to shift.
-fp_ne_small2:
-	bfffo	%d0{#0,#32},%d1
-	add.w	#32,%d1
-	move.w	-(%a0),%d2
-	sub.w	%d1,%d2
-	jcc	1f
-	| Beyond pathologically small, denormalize.
-	add.w	%d2,%d1
-	clr.w	%d2
-	fp_set_sr FPSR_EXC_UNFL
-1:	move.w	%d2,(%a0)+
-	ext.l	%d1
-	jeq	fp_ne_checkround
-	clr.l	(4,%a0)
-	sub.w	#32,%d1
-	jcs	1f
-	lsl.l	%d1,%d0			| lower lword needs only to be shifted
-	move.l	%d0,(%a0)		| into the higher lword
-#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
-	clr.l	%d0
-	move.b	(-4,%a0),%d0
-	clr.b	(-4,%a0)
-	neg.w	%d1
-	add.w	#32,%d1
-	bfins	%d0,(%a0){%d1,#8}
-#endif
-	jra	fp_ne_checkround
-1:	neg.w	%d1			| lower lword is splitted between
-	bfins	%d0,(%a0){%d1,#32}	| higher and lower lword
-#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
-	jra	fp_ne_checkround
-#else
-	move.w	%d1,%d2
-	jra	fp_ne_extra1
-	| These are extremely small numbers, that will mostly end up as zero
-	| anyway, so this is only important for correct rounding.
-fp_ne_small3:
-	bfffo	%d0{#24,#8},%d1
-	add.w	#40,%d1
-	move.w	-(%a0),%d2
-	sub.w	%d1,%d2
-	jcc	1f
-	| Pathologically small, denormalize.
-	add.w	%d2,%d1
-	clr.w	%d2
-1:	move.w	%d2,(%a0)+
-	ext.l	%d1
-	jeq	fp_ne_checkround
-	cmp.w	#8,%d1
-	jcs	2f
-1:	clr.b	(-4,%a0)
-	sub.w	#64,%d1
-	jcs	1f
-	add.w	#24,%d1
-	lsl.l	%d1,%d0
-	move.l	%d0,(%a0)
-	jra	fp_ne_checkround
-1:	neg.w	%d1
-	bfins	%d0,(%a0){%d1,#8}
-	jra	fp_ne_checkround
-2:	lsl.l	%d1,%d0
-	move.b	%d0,(-4,%a0)
-	lsr.l	#8,%d0
-	move.b	%d0,(7,%a0)
-	jra	fp_ne_checkround
-#endif
-	| Infinities and NaNs, again, same as above.
-fp_ne_large:
-	move.l	(%a0)+,%d0
-	jne	3f
-1:	tst.l	(%a0)
-	jne	4f
-2:	subq.l	#8,%a0
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts
-	| we have maybe a NaN, shift off the highest bit
-3:	move.l	%d0,%d1
-	lsl.l	#1,%d1
-	jne	4f
-	clr.l	(-4,%a0)
-	jra	1b
-	| we have a NaN, test if it is signaling
-4:	bset	#30,%d0
-	jne	2b
-	fp_set_sr FPSR_EXC_SNAN
-	move.l	%d0,(-4,%a0)
-	jra	2b
-
-	| these next two do rounding as per the IEEE standard.
-	| values for the rounding modes appear to be:
-	| 0:	Round to nearest
-	| 1:	Round to zero
-	| 2:	Round to -Infinity
-	| 3:	Round to +Infinity
-	| both functions expect that fp_normalize was already
-	| called (and extended argument is already normalized
-	| as far as possible), these are used if there is different
-	| rounding precision is selected and before converting
-	| into single/double
-
-	| fp_normalize_double:
-	| normalize an extended with double (52-bit) precision
-	| args:	 %a0 (struct fp_ext *)
-
-fp_normalize_double:
-	printf	PNORM,"nd: %p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,"), "
-	move.l	(%a0)+,%d2
-	tst.w	%d2
-	jeq	fp_nd_zero		| zero / denormalized
-	cmp.w	#0x7fff,%d2
-	jeq	fp_nd_huge		| NaN / infinitive.
-	sub.w	#0x4000-0x3ff,%d2	| will the exponent fit?
-	jcs	fp_nd_small		| too small.
-	cmp.w	#0x7fe,%d2
-	jcc	fp_nd_large		| too big.
-	addq.l	#4,%a0
-	move.l	(%a0),%d0		| low lword of mantissa
-	| now, round off the low 11 bits.
-fp_nd_round:
-	moveq	#21,%d1
-	lsl.l	%d1,%d0			| keep 11 low bits.
-	jne	fp_nd_checkround	| Are they non-zero?
-	| nothing to do here
-9:	subq.l	#8,%a0
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts
-	| Be careful with the X bit! It contains the lsb
-	| from the shift above, it is needed for round to nearest.
-fp_nd_checkround:
-	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
-	and.w	#0xf800,(2,%a0)		| clear bits 0-10
-	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
-	jne	2f			| %d2 == 0, round to nearest
-	tst.l	%d0			| test guard bit
-	jpl	9b			| zero is closer
-	| here we test the X bit by adding it to %d2
-	clr.w	%d2			| first set z bit, addx only clears it
-	addx.w	%d2,%d2			| test lsb bit
-	| IEEE754-specified "round to even" behaviour.  If the guard
-	| bit is set, then the number is odd, so rounding works like
-	| in grade-school arithmetic (i.e. 1.5 rounds to 2.0)
-	| Otherwise, an equal distance rounds towards zero, so as not
-	| to produce an odd number.  This is strange, but it is what
-	| the standard says.
-	jne	fp_nd_doroundup		| round to infinity
-	lsl.l	#1,%d0			| check low bits
-	jeq	9b			| round to zero
-fp_nd_doroundup:
-	| round (the mantissa, that is) towards infinity
-	add.l	#0x800,(%a0)
-	jcc	9b			| no overflow, good.
-	addq.l	#1,-(%a0)		| extend to high lword
-	jcc	1f			| no overflow, good.
-	| Yow! we have managed to overflow the mantissa.  Since this
-	| only happens when %d1 was 0xfffff800, it is now zero, so
-	| reset the high bit, and increment the exponent.
-	move.w	#0x8000,(%a0)
-	addq.w	#1,-(%a0)
-	cmp.w	#0x43ff,(%a0)+		| exponent now overflown?
-	jeq	fp_nd_large		| yes, so make it infinity.
-1:	subq.l	#4,%a0
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts
-2:	subq.w	#2,%d2
-	jcs	9b			| %d2 < 2, round to zero
-	jhi	3f			| %d2 > 2, round to +infinity
-	| Round to +Inf or -Inf.  High word of %d2 contains the
-	| sign of the number, by the way.
-	swap	%d2			| to -inf
-	tst.b	%d2
-	jne	fp_nd_doroundup		| negative, round to infinity
-	jra	9b			| positive, round to zero
-3:	swap	%d2			| to +inf
-	tst.b	%d2
-	jeq	fp_nd_doroundup		| positive, round to infinity
-	jra	9b			| negative, round to zero
-	| Exponent underflow.  Try to make a denormal, and set it to
-	| the smallest possible fraction if this fails.
-fp_nd_small:
-	fp_set_sr FPSR_EXC_UNFL		| set UNFL bit
-	move.w	#0x3c01,(-2,%a0)	| 2**-1022
-	neg.w	%d2			| degree of underflow
-	cmp.w	#32,%d2			| single or double shift?
-	jcc	1f
-	| Again, another 64-bit double shift.
-	move.l	(%a0),%d0
-	move.l	%d0,%d1
-	lsr.l	%d2,%d0
-	move.l	%d0,(%a0)+
-	move.l	(%a0),%d0
-	lsr.l	%d2,%d0
-	neg.w	%d2
-	add.w	#32,%d2
-	lsl.l	%d2,%d1
-	or.l	%d1,%d0
-	move.l	(%a0),%d1
-	move.l	%d0,(%a0)
-	| Check to see if we shifted off any significant bits
-	lsl.l	%d2,%d1
-	jeq	fp_nd_round		| Nope, round.
-	bset	#0,%d0			| Yes, so set the "sticky bit".
-	jra	fp_nd_round		| Now, round.
-	| Another 64-bit single shift and store
-1:	sub.w	#32,%d2
-	cmp.w	#32,%d2			| Do we really need to shift?
-	jcc	2f			| No, the number is too small.
-	move.l	(%a0),%d0
-	clr.l	(%a0)+
-	move.l	%d0,%d1
-	lsr.l	%d2,%d0
-	neg.w	%d2
-	add.w	#32,%d2
-	| Again, check to see if we shifted off any significant bits.
-	tst.l	(%a0)
-	jeq	1f
-	bset	#0,%d0			| Sticky bit.
-1:	move.l	%d0,(%a0)
-	lsl.l	%d2,%d1
-	jeq	fp_nd_round
-	bset	#0,%d0
-	jra	fp_nd_round
-	| Sorry, the number is just too small.
-2:	clr.l	(%a0)+
-	clr.l	(%a0)
-	moveq	#1,%d0			| Smallest possible fraction,
-	jra	fp_nd_round		| round as desired.
-	| zero and denormalized
-fp_nd_zero:
-	tst.l	(%a0)+
-	jne	1f
-	tst.l	(%a0)
-	jne	1f
-	subq.l	#8,%a0
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts				| zero.  nothing to do.
-	| These are not merely subnormal numbers, but true denormals,
-	| i.e. pathologically small (exponent is 2**-16383) numbers.
-	| It is clearly impossible for even a normal extended number
-	| with that exponent to fit into double precision, so just
-	| write these ones off as "too darn small".
-1:	fp_set_sr FPSR_EXC_UNFL		| Set UNFL bit
-	clr.l	(%a0)
-	clr.l	-(%a0)
-	move.w	#0x3c01,-(%a0)		| i.e. 2**-1022
-	addq.l	#6,%a0
-	moveq	#1,%d0
-	jra	fp_nd_round		| round.
-	| Exponent overflow.  Just call it infinity.
-fp_nd_large:
-	move.w	#0x7ff,%d0
-	and.w	(6,%a0),%d0
-	jeq	1f
-	fp_set_sr FPSR_EXC_INEX2
-1:	fp_set_sr FPSR_EXC_OVFL
-	move.w	(FPD_RND,FPDATA),%d2
-	jne	3f			| %d2 = 0 round to nearest
-1:	move.w	#0x7fff,(-2,%a0)
-	clr.l	(%a0)+
-	clr.l	(%a0)
-2:	subq.l	#8,%a0
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts
-3:	subq.w	#2,%d2
-	jcs	5f			| %d2 < 2, round to zero
-	jhi	4f			| %d2 > 2, round to +infinity
-	tst.b	(-3,%a0)		| to -inf
-	jne	1b
-	jra	5f
-4:	tst.b	(-3,%a0)		| to +inf
-	jeq	1b
-5:	move.w	#0x43fe,(-2,%a0)
-	moveq	#-1,%d0
-	move.l	%d0,(%a0)+
-	move.w	#0xf800,%d0
-	move.l	%d0,(%a0)
-	jra	2b
-	| Infinities or NaNs
-fp_nd_huge:
-	subq.l	#4,%a0
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts
-
-	| fp_normalize_single:
-	| normalize an extended with single (23-bit) precision
-	| args:	 %a0 (struct fp_ext *)
-
-fp_normalize_single:
-	printf	PNORM,"ns: %p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,") "
-	addq.l	#2,%a0
-	move.w	(%a0)+,%d2
-	jeq	fp_ns_zero		| zero / denormalized
-	cmp.w	#0x7fff,%d2
-	jeq	fp_ns_huge		| NaN / infinitive.
-	sub.w	#0x4000-0x7f,%d2	| will the exponent fit?
-	jcs	fp_ns_small		| too small.
-	cmp.w	#0xfe,%d2
-	jcc	fp_ns_large		| too big.
-	move.l	(%a0)+,%d0		| get high lword of mantissa
-fp_ns_round:
-	tst.l	(%a0)			| check the low lword
-	jeq	1f
-	| Set a sticky bit if it is non-zero.  This should only
-	| affect the rounding in what would otherwise be equal-
-	| distance situations, which is what we want it to do.
-	bset	#0,%d0
-1:	clr.l	(%a0)			| zap it from memory.
-	| now, round off the low 8 bits of the hi lword.
-	tst.b	%d0			| 8 low bits.
-	jne	fp_ns_checkround	| Are they non-zero?
-	| nothing to do here
-	subq.l	#8,%a0
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts
-fp_ns_checkround:
-	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
-	clr.b	-(%a0)			| clear low byte of high lword
-	subq.l	#3,%a0
-	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
-	jne	2f			| %d2 == 0, round to nearest
-	tst.b	%d0			| test guard bit
-	jpl	9f			| zero is closer
-	btst	#8,%d0			| test lsb bit
-	| round to even behaviour, see above.
-	jne	fp_ns_doroundup		| round to infinity
-	lsl.b	#1,%d0			| check low bits
-	jeq	9f			| round to zero
-fp_ns_doroundup:
-	| round (the mantissa, that is) towards infinity
-	add.l	#0x100,(%a0)
-	jcc	9f			| no overflow, good.
-	| Overflow.  This means that the %d1 was 0xffffff00, so it
-	| is now zero.  We will set the mantissa to reflect this, and
-	| increment the exponent (checking for overflow there too)
-	move.w	#0x8000,(%a0)
-	addq.w	#1,-(%a0)
-	cmp.w	#0x407f,(%a0)+		| exponent now overflown?
-	jeq	fp_ns_large		| yes, so make it infinity.
-9:	subq.l	#4,%a0
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts
-	| check nondefault rounding modes
-2:	subq.w	#2,%d2
-	jcs	9b			| %d2 < 2, round to zero
-	jhi	3f			| %d2 > 2, round to +infinity
-	tst.b	(-3,%a0)		| to -inf
-	jne	fp_ns_doroundup		| negative, round to infinity
-	jra	9b			| positive, round to zero
-3:	tst.b	(-3,%a0)		| to +inf
-	jeq	fp_ns_doroundup		| positive, round to infinity
-	jra	9b			| negative, round to zero
-	| Exponent underflow.  Try to make a denormal, and set it to
-	| the smallest possible fraction if this fails.
-fp_ns_small:
-	fp_set_sr FPSR_EXC_UNFL		| set UNFL bit
-	move.w	#0x3f81,(-2,%a0)	| 2**-126
-	neg.w	%d2			| degree of underflow
-	cmp.w	#32,%d2			| single or double shift?
-	jcc	2f
-	| a 32-bit shift.
-	move.l	(%a0),%d0
-	move.l	%d0,%d1
-	lsr.l	%d2,%d0
-	move.l	%d0,(%a0)+
-	| Check to see if we shifted off any significant bits.
-	neg.w	%d2
-	add.w	#32,%d2
-	lsl.l	%d2,%d1
-	jeq	1f
-	bset	#0,%d0			| Sticky bit.
-	| Check the lower lword
-1:	tst.l	(%a0)
-	jeq	fp_ns_round
-	clr	(%a0)
-	bset	#0,%d0			| Sticky bit.
-	jra	fp_ns_round
-	| Sorry, the number is just too small.
-2:	clr.l	(%a0)+
-	clr.l	(%a0)
-	moveq	#1,%d0			| Smallest possible fraction,
-	jra	fp_ns_round		| round as desired.
-	| Exponent overflow.  Just call it infinity.
-fp_ns_large:
-	tst.b	(3,%a0)
-	jeq	1f
-	fp_set_sr FPSR_EXC_INEX2
-1:	fp_set_sr FPSR_EXC_OVFL
-	move.w	(FPD_RND,FPDATA),%d2
-	jne	3f			| %d2 = 0 round to nearest
-1:	move.w	#0x7fff,(-2,%a0)
-	clr.l	(%a0)+
-	clr.l	(%a0)
-2:	subq.l	#8,%a0
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts
-3:	subq.w	#2,%d2
-	jcs	5f			| %d2 < 2, round to zero
-	jhi	4f			| %d2 > 2, round to +infinity
-	tst.b	(-3,%a0)		| to -inf
-	jne	1b
-	jra	5f
-4:	tst.b	(-3,%a0)		| to +inf
-	jeq	1b
-5:	move.w	#0x407e,(-2,%a0)
-	move.l	#0xffffff00,(%a0)+
-	clr.l	(%a0)
-	jra	2b
-	| zero and denormalized
-fp_ns_zero:
-	tst.l	(%a0)+
-	jne	1f
-	tst.l	(%a0)
-	jne	1f
-	subq.l	#8,%a0
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts				| zero.  nothing to do.
-	| These are not merely subnormal numbers, but true denormals,
-	| i.e. pathologically small (exponent is 2**-16383) numbers.
-	| It is clearly impossible for even a normal extended number
-	| with that exponent to fit into single precision, so just
-	| write these ones off as "too darn small".
-1:	fp_set_sr FPSR_EXC_UNFL		| Set UNFL bit
-	clr.l	(%a0)
-	clr.l	-(%a0)
-	move.w	#0x3f81,-(%a0)		| i.e. 2**-126
-	addq.l	#6,%a0
-	moveq	#1,%d0
-	jra	fp_ns_round		| round.
-	| Infinities or NaNs
-fp_ns_huge:
-	subq.l	#4,%a0
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts
-
-	| fp_normalize_single_fast:
-	| normalize an extended with single (23-bit) precision
-	| this is only used by fsgldiv/fsgdlmul, where the
-	| operand is not completly normalized.
-	| args:	 %a0 (struct fp_ext *)
-
-fp_normalize_single_fast:
-	printf	PNORM,"nsf: %p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,") "
-	addq.l	#2,%a0
-	move.w	(%a0)+,%d2
-	cmp.w	#0x7fff,%d2
-	jeq	fp_nsf_huge		| NaN / infinitive.
-	move.l	(%a0)+,%d0		| get high lword of mantissa
-fp_nsf_round:
-	tst.l	(%a0)			| check the low lword
-	jeq	1f
-	| Set a sticky bit if it is non-zero.  This should only
-	| affect the rounding in what would otherwise be equal-
-	| distance situations, which is what we want it to do.
-	bset	#0,%d0
-1:	clr.l	(%a0)			| zap it from memory.
-	| now, round off the low 8 bits of the hi lword.
-	tst.b	%d0			| 8 low bits.
-	jne	fp_nsf_checkround	| Are they non-zero?
-	| nothing to do here
-	subq.l	#8,%a0
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts
-fp_nsf_checkround:
-	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
-	clr.b	-(%a0)			| clear low byte of high lword
-	subq.l	#3,%a0
-	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
-	jne	2f			| %d2 == 0, round to nearest
-	tst.b	%d0			| test guard bit
-	jpl	9f			| zero is closer
-	btst	#8,%d0			| test lsb bit
-	| round to even behaviour, see above.
-	jne	fp_nsf_doroundup		| round to infinity
-	lsl.b	#1,%d0			| check low bits
-	jeq	9f			| round to zero
-fp_nsf_doroundup:
-	| round (the mantissa, that is) towards infinity
-	add.l	#0x100,(%a0)
-	jcc	9f			| no overflow, good.
-	| Overflow.  This means that the %d1 was 0xffffff00, so it
-	| is now zero.  We will set the mantissa to reflect this, and
-	| increment the exponent (checking for overflow there too)
-	move.w	#0x8000,(%a0)
-	addq.w	#1,-(%a0)
-	cmp.w	#0x407f,(%a0)+		| exponent now overflown?
-	jeq	fp_nsf_large		| yes, so make it infinity.
-9:	subq.l	#4,%a0
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts
-	| check nondefault rounding modes
-2:	subq.w	#2,%d2
-	jcs	9b			| %d2 < 2, round to zero
-	jhi	3f			| %d2 > 2, round to +infinity
-	tst.b	(-3,%a0)		| to -inf
-	jne	fp_nsf_doroundup	| negative, round to infinity
-	jra	9b			| positive, round to zero
-3:	tst.b	(-3,%a0)		| to +inf
-	jeq	fp_nsf_doroundup		| positive, round to infinity
-	jra	9b			| negative, round to zero
-	| Exponent overflow.  Just call it infinity.
-fp_nsf_large:
-	tst.b	(3,%a0)
-	jeq	1f
-	fp_set_sr FPSR_EXC_INEX2
-1:	fp_set_sr FPSR_EXC_OVFL
-	move.w	(FPD_RND,FPDATA),%d2
-	jne	3f			| %d2 = 0 round to nearest
-1:	move.w	#0x7fff,(-2,%a0)
-	clr.l	(%a0)+
-	clr.l	(%a0)
-2:	subq.l	#8,%a0
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts
-3:	subq.w	#2,%d2
-	jcs	5f			| %d2 < 2, round to zero
-	jhi	4f			| %d2 > 2, round to +infinity
-	tst.b	(-3,%a0)		| to -inf
-	jne	1b
-	jra	5f
-4:	tst.b	(-3,%a0)		| to +inf
-	jeq	1b
-5:	move.w	#0x407e,(-2,%a0)
-	move.l	#0xffffff00,(%a0)+
-	clr.l	(%a0)
-	jra	2b
-	| Infinities or NaNs
-fp_nsf_huge:
-	subq.l	#4,%a0
-	printf	PNORM,"%p(",1,%a0
-	printx	PNORM,%a0@
-	printf	PNORM,")\n"
-	rts
-
-	| conv_ext2int (macro):
-	| Generates a subroutine that converts an extended value to an
-	| integer of a given size, again, with the appropriate type of
-	| rounding.
-
-	| Macro arguments:
-	| s:	size, as given in an assembly instruction.
-	| b:	number of bits in that size.
-
-	| Subroutine arguments:
-	| %a0:	source (struct fp_ext *)
-
-	| Returns the integer in %d0 (like it should)
-
-.macro conv_ext2int s,b
-	.set	inf,(1<<(\b-1))-1	| i.e. MAXINT
-	printf	PCONV,"e2i%d: %p(",2,#\b,%a0
-	printx	PCONV,%a0@
-	printf	PCONV,") "
-	addq.l	#2,%a0
-	move.w	(%a0)+,%d2		| exponent
-	jeq	fp_e2i_zero\b		| zero / denorm (== 0, here)
-	cmp.w	#0x7fff,%d2
-	jeq	fp_e2i_huge\b		| Inf / NaN
-	sub.w	#0x3ffe,%d2
-	jcs	fp_e2i_small\b
-	cmp.w	#\b,%d2
-	jhi	fp_e2i_large\b
-	move.l	(%a0),%d0
-	move.l	%d0,%d1
-	lsl.l	%d2,%d1
-	jne	fp_e2i_round\b
-	tst.l	(4,%a0)
-	jne	fp_e2i_round\b
-	neg.w	%d2
-	add.w	#32,%d2
-	lsr.l	%d2,%d0
-9:	tst.w	(-4,%a0)
-	jne	1f
-	tst.\s	%d0
-	jmi	fp_e2i_large\b
-	printf	PCONV,"-> %p\n",1,%d0
-	rts
-1:	neg.\s	%d0
-	jeq	1f
-	jpl	fp_e2i_large\b
-1:	printf	PCONV,"-> %p\n",1,%d0
-	rts
-fp_e2i_round\b:
-	fp_set_sr FPSR_EXC_INEX2	| INEX2 bit
-	neg.w	%d2
-	add.w	#32,%d2
-	.if	\b>16
-	jeq	5f
-	.endif
-	lsr.l	%d2,%d0
-	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
-	jne	2f			| %d2 == 0, round to nearest
-	tst.l	%d1			| test guard bit
-	jpl	9b			| zero is closer
-	btst	%d2,%d0			| test lsb bit (%d2 still 0)
-	jne	fp_e2i_doroundup\b
-	lsl.l	#1,%d1			| check low bits
-	jne	fp_e2i_doroundup\b
-	tst.l	(4,%a0)
-	jeq	9b
-fp_e2i_doroundup\b:
-	addq.l	#1,%d0
-	jra	9b
-	| check nondefault rounding modes
-2:	subq.w	#2,%d2
-	jcs	9b			| %d2 < 2, round to zero
-	jhi	3f			| %d2 > 2, round to +infinity
-	tst.w	(-4,%a0)		| to -inf
-	jne	fp_e2i_doroundup\b	| negative, round to infinity
-	jra	9b			| positive, round to zero
-3:	tst.w	(-4,%a0)		| to +inf
-	jeq	fp_e2i_doroundup\b	| positive, round to infinity
-	jra	9b	| negative, round to zero
-	| we are only want -2**127 get correctly rounded here,
-	| since the guard bit is in the lower lword.
-	| everything else ends up anyway as overflow.
-	.if	\b>16
-5:	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
-	jne	2b			| %d2 == 0, round to nearest
-	move.l	(4,%a0),%d1		| test guard bit
-	jpl	9b			| zero is closer
-	lsl.l	#1,%d1			| check low bits
-	jne	fp_e2i_doroundup\b
-	jra	9b
-	.endif
-fp_e2i_zero\b:
-	clr.l	%d0
-	tst.l	(%a0)+
-	jne	1f
-	tst.l	(%a0)
-	jeq	3f
-1:	subq.l	#4,%a0
-	fp_clr_sr FPSR_EXC_UNFL		| fp_normalize_ext has set this bit
-fp_e2i_small\b:
-	fp_set_sr FPSR_EXC_INEX2
-	clr.l	%d0
-	move.w	(FPD_RND,FPDATA),%d2	| rounding mode
-	subq.w	#2,%d2
-	jcs	3f			| %d2 < 2, round to nearest/zero
-	jhi	2f			| %d2 > 2, round to +infinity
-	tst.w	(-4,%a0)		| to -inf
-	jeq	3f
-	subq.\s	#1,%d0
-	jra	3f
-2:	tst.w	(-4,%a0)		| to +inf
-	jne	3f
-	addq.\s	#1,%d0
-3:	printf	PCONV,"-> %p\n",1,%d0
-	rts
-fp_e2i_large\b:
-	fp_set_sr FPSR_EXC_OPERR
-	move.\s	#inf,%d0
-	tst.w	(-4,%a0)
-	jeq	1f
-	addq.\s	#1,%d0
-1:	printf	PCONV,"-> %p\n",1,%d0
-	rts
-fp_e2i_huge\b:
-	move.\s	(%a0),%d0
-	tst.l	(%a0)
-	jne	1f
-	tst.l	(%a0)
-	jeq	fp_e2i_large\b
-	| fp_normalize_ext has set this bit already
-	| and made the number nonsignaling
-1:	fp_tst_sr FPSR_EXC_SNAN
-	jne	1f
-	fp_set_sr FPSR_EXC_OPERR
-1:	printf	PCONV,"-> %p\n",1,%d0
-	rts
-.endm
-
-fp_conv_ext2long:
-	conv_ext2int l,32
-
-fp_conv_ext2short:
-	conv_ext2int w,16
-
-fp_conv_ext2byte:
-	conv_ext2int b,8
-
-fp_conv_ext2double:
-	jsr	fp_normalize_double
-	printf	PCONV,"e2d: %p(",1,%a0
-	printx	PCONV,%a0@
-	printf	PCONV,"), "
-	move.l	(%a0)+,%d2
-	cmp.w	#0x7fff,%d2
-	jne	1f
-	move.w	#0x7ff,%d2
-	move.l	(%a0)+,%d0
-	jra	2f
-1:	sub.w	#0x3fff-0x3ff,%d2
-	move.l	(%a0)+,%d0
-	jmi	2f
-	clr.w	%d2
-2:	lsl.w	#5,%d2
-	lsl.l	#7,%d2
-	lsl.l	#8,%d2
-	move.l	%d0,%d1
-	lsl.l	#1,%d0
-	lsr.l	#4,%d0
-	lsr.l	#8,%d0
-	or.l	%d2,%d0
-	putuser.l %d0,(%a1)+,fp_err_ua2,%a1
-	moveq	#21,%d0
-	lsl.l	%d0,%d1
-	move.l	(%a0),%d0
-	lsr.l	#4,%d0
-	lsr.l	#7,%d0
-	or.l	%d1,%d0
-	putuser.l %d0,(%a1),fp_err_ua2,%a1
-#ifdef FPU_EMU_DEBUG
-	getuser.l %a1@(-4),%d0,fp_err_ua2,%a1
-	getuser.l %a1@(0),%d1,fp_err_ua2,%a1
-	printf	PCONV,"%p(%08x%08x)\n",3,%a1,%d0,%d1
-#endif
-	rts
-
-fp_conv_ext2single:
-	jsr	fp_normalize_single
-	printf	PCONV,"e2s: %p(",1,%a0
-	printx	PCONV,%a0@
-	printf	PCONV,"), "
-	move.l	(%a0)+,%d1
-	cmp.w	#0x7fff,%d1
-	jne	1f
-	move.w	#0xff,%d1
-	move.l	(%a0)+,%d0
-	jra	2f
-1:	sub.w	#0x3fff-0x7f,%d1
-	move.l	(%a0)+,%d0
-	jmi	2f
-	clr.w	%d1
-2:	lsl.w	#8,%d1
-	lsl.l	#7,%d1
-	lsl.l	#8,%d1
-	bclr	#31,%d0
-	lsr.l	#8,%d0
-	or.l	%d1,%d0
-	printf	PCONV,"%08x\n",1,%d0
-	rts
-
-	| special return addresses for instr that
-	| encode the rounding precision in the opcode
-	| (e.g. fsmove,fdmove)
-
-fp_finalrounding_single:
-	addq.l	#8,%sp
-	jsr	fp_normalize_ext
-	jsr	fp_normalize_single
-	jra	fp_finaltest
-
-fp_finalrounding_single_fast:
-	addq.l	#8,%sp
-	jsr	fp_normalize_ext
-	jsr	fp_normalize_single_fast
-	jra	fp_finaltest
-
-fp_finalrounding_double:
-	addq.l	#8,%sp
-	jsr	fp_normalize_ext
-	jsr	fp_normalize_double
-	jra	fp_finaltest
-
-	| fp_finaltest:
-	| set the emulated status register based on the outcome of an
-	| emulated instruction.
-
-fp_finalrounding:
-	addq.l	#8,%sp
-|	printf	,"f: %p\n",1,%a0
-	jsr	fp_normalize_ext
-	move.w	(FPD_PREC,FPDATA),%d0
-	subq.w	#1,%d0
-	jcs	fp_finaltest
-	jne	1f
-	jsr	fp_normalize_single
-	jra	2f
-1:	jsr	fp_normalize_double
-2:|	printf	,"f: %p\n",1,%a0
-fp_finaltest:
-	| First, we do some of the obvious tests for the exception
-	| status byte and condition code bytes of fp_sr here, so that
-	| they do not have to be handled individually by every
-	| emulated instruction.
-	clr.l	%d0
-	addq.l	#1,%a0
-	tst.b	(%a0)+			| sign
-	jeq	1f
-	bset	#FPSR_CC_NEG-24,%d0	| N bit
-1:	cmp.w	#0x7fff,(%a0)+		| exponent
-	jeq	2f
-	| test for zero
-	moveq	#FPSR_CC_Z-24,%d1
-	tst.l	(%a0)+
-	jne	9f
-	tst.l	(%a0)
-	jne	9f
-	jra	8f
-	| infinitiv and NAN
-2:	moveq	#FPSR_CC_NAN-24,%d1
-	move.l	(%a0)+,%d2
-	lsl.l	#1,%d2			| ignore high bit
-	jne	8f
-	tst.l	(%a0)
-	jne	8f
-	moveq	#FPSR_CC_INF-24,%d1
-8:	bset	%d1,%d0
-9:	move.b	%d0,(FPD_FPSR+0,FPDATA)	| set condition test result
-	| move instructions enter here
-	| Here, we test things in the exception status byte, and set
-	| other things in the accrued exception byte accordingly.
-	| Emulated instructions can set various things in the former,
-	| as defined in fp_emu.h.
-fp_final:
-	move.l	(FPD_FPSR,FPDATA),%d0
-#if 0
-	btst	#FPSR_EXC_SNAN,%d0	| EXC_SNAN
-	jne	1f
-	btst	#FPSR_EXC_OPERR,%d0	| EXC_OPERR
-	jeq	2f
-1:	bset	#FPSR_AEXC_IOP,%d0	| set IOP bit
-2:	btst	#FPSR_EXC_OVFL,%d0	| EXC_OVFL
-	jeq	1f
-	bset	#FPSR_AEXC_OVFL,%d0	| set OVFL bit
-1:	btst	#FPSR_EXC_UNFL,%d0	| EXC_UNFL
-	jeq	1f
-	btst	#FPSR_EXC_INEX2,%d0	| EXC_INEX2
-	jeq	1f
-	bset	#FPSR_AEXC_UNFL,%d0	| set UNFL bit
-1:	btst	#FPSR_EXC_DZ,%d0	| EXC_INEX1
-	jeq	1f
-	bset	#FPSR_AEXC_DZ,%d0	| set DZ bit
-1:	btst	#FPSR_EXC_OVFL,%d0	| EXC_OVFL
-	jne	1f
-	btst	#FPSR_EXC_INEX2,%d0	| EXC_INEX2
-	jne	1f
-	btst	#FPSR_EXC_INEX1,%d0	| EXC_INEX1
-	jeq	2f
-1:	bset	#FPSR_AEXC_INEX,%d0	| set INEX bit
-2:	move.l	%d0,(FPD_FPSR,FPDATA)
-#else
-	| same as above, greatly optimized, but untested (yet)
-	move.l	%d0,%d2
-	lsr.l	#5,%d0
-	move.l	%d0,%d1
-	lsr.l	#4,%d1
-	or.l	%d0,%d1
-	and.b	#0x08,%d1
-	move.l	%d2,%d0
-	lsr.l	#6,%d0
-	or.l	%d1,%d0
-	move.l	%d2,%d1
-	lsr.l	#4,%d1
-	or.b	#0xdf,%d1
-	and.b	%d1,%d0
-	move.l	%d2,%d1
-	lsr.l	#7,%d1
-	and.b	#0x80,%d1
-	or.b	%d1,%d0
-	and.b	#0xf8,%d0
-	or.b	%d0,%d2
-	move.l	%d2,(FPD_FPSR,FPDATA)
-#endif
-	move.b	(FPD_FPSR+2,FPDATA),%d0
-	and.b	(FPD_FPCR+2,FPDATA),%d0
-	jeq	1f
-	printf	,"send signal!!!\n"
-1:	jra	fp_end
diff --git a/arch/microblaze/boot/dts/linked_dtb.S b/arch/microblaze/boot/dts/linked_dtb.S
deleted file mode 100644
index 23345af3721f287d8d011f2c72da0f2ef69f0a15..0000000000000000000000000000000000000000
--- a/arch/microblaze/boot/dts/linked_dtb.S
+++ /dev/null
@@ -1,2 +0,0 @@
-.section __fdt_blob,"a"
-.incbin "arch/microblaze/boot/dts/system.dtb"
diff --git a/arch/microblaze/kernel/entry-nommu.S b/arch/microblaze/kernel/entry-nommu.S
deleted file mode 100644
index 7e394fc2c43944c787dfa415b9697ec53f043831..0000000000000000000000000000000000000000
--- a/arch/microblaze/kernel/entry-nommu.S
+++ /dev/null
@@ -1,622 +0,0 @@
-/*
- * Copyright (C) 2007-2009 Michal Simek <monstr@monstr.eu>
- * Copyright (C) 2007-2009 PetaLogix
- * Copyright (C) 2006 Atmark Techno, Inc.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <linux/errno.h>
-#include <asm/entry.h>
-#include <asm/asm-offsets.h>
-#include <asm/registers.h>
-#include <asm/unistd.h>
-#include <asm/percpu.h>
-#include <asm/signal.h>
-
-#if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR
-	.macro	disable_irq
-	msrclr r0, MSR_IE
-	.endm
-
-	.macro	enable_irq
-	msrset r0, MSR_IE
-	.endm
-
-	.macro	clear_bip
-	msrclr r0, MSR_BIP
-	.endm
-#else
-	.macro	disable_irq
-	mfs r11, rmsr
-	andi r11, r11, ~MSR_IE
-	mts rmsr, r11
-	.endm
-
-	.macro	enable_irq
-	mfs r11, rmsr
-	ori r11, r11, MSR_IE
-	mts rmsr, r11
-	.endm
-
-	.macro	clear_bip
-	mfs r11, rmsr
-	andi r11, r11, ~MSR_BIP
-	mts rmsr, r11
-	.endm
-#endif
-
-ENTRY(_interrupt)
-	swi	r1, r0, PER_CPU(ENTRY_SP)	/* save the current sp */
-	swi	r11, r0, PER_CPU(R11_SAVE)	/* temporarily save r11 */
-	lwi	r11, r0, PER_CPU(KM)		/* load mode indicator */
-	beqid	r11, 1f
-	nop
-	brid	2f				/* jump over */
-	addik	r1, r1, (-PT_SIZE)	/* room for pt_regs (delay slot) */
-1:						/* switch to kernel stack */
-	lwi	r1, r0, PER_CPU(CURRENT_SAVE)	/* get the saved current */
-	lwi	r1, r1, TS_THREAD_INFO		/* get the thread info */
-	/* calculate kernel stack pointer */
-	addik	r1, r1, THREAD_SIZE - PT_SIZE
-2:
-	swi	r11, r1, PT_MODE		/* store the mode */
-	lwi	r11, r0, PER_CPU(R11_SAVE)	/* reload r11 */
-	swi	r2, r1, PT_R2
-	swi	r3, r1, PT_R3
-	swi	r4, r1, PT_R4
-	swi	r5, r1, PT_R5
-	swi	r6, r1, PT_R6
-	swi	r7, r1, PT_R7
-	swi	r8, r1, PT_R8
-	swi	r9, r1, PT_R9
-	swi	r10, r1, PT_R10
-	swi	r11, r1, PT_R11
-	swi	r12, r1, PT_R12
-	swi	r13, r1, PT_R13
-	swi	r14, r1, PT_R14
-	swi	r14, r1, PT_PC
-	swi	r15, r1, PT_R15
-	swi	r16, r1, PT_R16
-	swi	r17, r1, PT_R17
-	swi	r18, r1, PT_R18
-	swi	r19, r1, PT_R19
-	swi	r20, r1, PT_R20
-	swi	r21, r1, PT_R21
-	swi	r22, r1, PT_R22
-	swi	r23, r1, PT_R23
-	swi	r24, r1, PT_R24
-	swi	r25, r1, PT_R25
-	swi	r26, r1, PT_R26
-	swi	r27, r1, PT_R27
-	swi	r28, r1, PT_R28
-	swi	r29, r1, PT_R29
-	swi	r30, r1, PT_R30
-	swi	r31, r1, PT_R31
-	/* special purpose registers */
-	mfs	r11, rmsr
-	swi	r11, r1, PT_MSR
-	mfs	r11, rear
-	swi	r11, r1, PT_EAR
-	mfs	r11, resr
-	swi	r11, r1, PT_ESR
-	mfs	r11, rfsr
-	swi	r11, r1, PT_FSR
-	/* reload original stack pointer and save it */
-	lwi	r11, r0, PER_CPU(ENTRY_SP)
-	swi	r11, r1, PT_R1
-	/* update mode indicator we are in kernel mode */
-	addik	r11, r0, 1
-	swi	r11, r0, PER_CPU(KM)
-	/* restore r31 */
-	lwi	r31, r0, PER_CPU(CURRENT_SAVE)
-	/* prepare the link register, the argument and jump */
-	addik	r15, r0, ret_from_intr - 8
-	addk	r6, r0, r15
-	braid	do_IRQ
-	add	r5, r0, r1
-
-ret_from_intr:
-	lwi	r11, r1, PT_MODE
-	bneid	r11, no_intr_resched
-
-3:
-	lwi	r6, r31, TS_THREAD_INFO	/* get thread info */
-	lwi	r19, r6, TI_FLAGS	/* get flags in thread info */
-				/* do an extra work if any bits are set */
-
-	andi	r11, r19, _TIF_NEED_RESCHED
-	beqi	r11, 1f
-	bralid	r15, schedule
-	nop
-	bri	3b
-1:	andi	r11, r19, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME
-	beqid	r11, no_intr_resched
-	addk	r5, r1, r0
-	bralid	r15, do_notify_resume
-	addk	r6, r0, r0
-	bri	3b
-
-no_intr_resched:
-	/* Disable interrupts, we are now committed to the state restore */
-	disable_irq
-
-	/* save mode indicator */
-	lwi	r11, r1, PT_MODE
-	swi	r11, r0, PER_CPU(KM)
-
-	/* save r31 */
-	swi	r31, r0, PER_CPU(CURRENT_SAVE)
-restore_context:
-	/* special purpose registers */
-	lwi	r11, r1, PT_FSR
-	mts	rfsr, r11
-	lwi	r11, r1, PT_ESR
-	mts	resr, r11
-	lwi	r11, r1, PT_EAR
-	mts	rear, r11
-	lwi	r11, r1, PT_MSR
-	mts	rmsr, r11
-
-	lwi	r31, r1, PT_R31
-	lwi	r30, r1, PT_R30
-	lwi	r29, r1, PT_R29
-	lwi	r28, r1, PT_R28
-	lwi	r27, r1, PT_R27
-	lwi	r26, r1, PT_R26
-	lwi	r25, r1, PT_R25
-	lwi	r24, r1, PT_R24
-	lwi	r23, r1, PT_R23
-	lwi	r22, r1, PT_R22
-	lwi	r21, r1, PT_R21
-	lwi	r20, r1, PT_R20
-	lwi	r19, r1, PT_R19
-	lwi	r18, r1, PT_R18
-	lwi	r17, r1, PT_R17
-	lwi	r16, r1, PT_R16
-	lwi	r15, r1, PT_R15
-	lwi	r14, r1, PT_PC
-	lwi	r13, r1, PT_R13
-	lwi	r12, r1, PT_R12
-	lwi	r11, r1, PT_R11
-	lwi	r10, r1, PT_R10
-	lwi	r9, r1, PT_R9
-	lwi	r8, r1, PT_R8
-	lwi	r7, r1, PT_R7
-	lwi	r6, r1, PT_R6
-	lwi	r5, r1, PT_R5
-	lwi	r4, r1, PT_R4
-	lwi	r3, r1, PT_R3
-	lwi	r2, r1, PT_R2
-	lwi	r1, r1, PT_R1
-	rtid	r14, 0
-	nop
-
-ENTRY(_reset)
-	brai	0;
-
-ENTRY(_user_exception)
-	swi	r1, r0, PER_CPU(ENTRY_SP)	/* save the current sp */
-	swi	r11, r0, PER_CPU(R11_SAVE)	/* temporarily save r11 */
-	lwi	r11, r0, PER_CPU(KM)		/* load mode indicator */
-	beqid	r11, 1f				/* Already in kernel mode? */
-	nop
-	brid	2f				/* jump over */
-	addik	r1, r1, (-PT_SIZE)	/* Room for pt_regs (delay slot) */
-1:						/* Switch to kernel stack */
-	lwi	r1, r0, PER_CPU(CURRENT_SAVE)	/* get the saved current */
-	lwi	r1, r1, TS_THREAD_INFO		/* get the thread info */
-	/* calculate kernel stack pointer */
-	addik	r1, r1, THREAD_SIZE - PT_SIZE
-2:
-	swi	r11, r1, PT_MODE		/* store the mode */
-	lwi	r11, r0, PER_CPU(R11_SAVE)	/* reload r11 */
-	/* save them on stack */
-	swi	r2, r1, PT_R2
-	swi	r3, r1, PT_R3 /* r3: _always_ in clobber list; see unistd.h */
-	swi	r4, r1, PT_R4 /* r4: _always_ in clobber list; see unistd.h */
-	swi	r5, r1, PT_R5
-	swi	r6, r1, PT_R6
-	swi	r7, r1, PT_R7
-	swi	r8, r1, PT_R8
-	swi	r9, r1, PT_R9
-	swi	r10, r1, PT_R10
-	swi	r11, r1, PT_R11
-	/* r12: _always_ in clobber list; see unistd.h */
-	swi	r12, r1, PT_R12
-	swi	r13, r1, PT_R13
-	/* r14: _always_ in clobber list; see unistd.h */
-	swi	r14, r1, PT_R14
-	/* but we want to return to the next inst. */
-	addik	r14, r14, 0x4
-	swi	r14, r1, PT_PC		/* increment by 4 and store in pc */
-	swi	r15, r1, PT_R15
-	swi	r16, r1, PT_R16
-	swi	r17, r1, PT_R17
-	swi	r18, r1, PT_R18
-	swi	r19, r1, PT_R19
-	swi	r20, r1, PT_R20
-	swi	r21, r1, PT_R21
-	swi	r22, r1, PT_R22
-	swi	r23, r1, PT_R23
-	swi	r24, r1, PT_R24
-	swi	r25, r1, PT_R25
-	swi	r26, r1, PT_R26
-	swi	r27, r1, PT_R27
-	swi	r28, r1, PT_R28
-	swi	r29, r1, PT_R29
-	swi	r30, r1, PT_R30
-	swi	r31, r1, PT_R31
-
-	disable_irq
-	nop		/* make sure IE bit is in effect */
-	clear_bip	/* once IE is in effect it is safe to clear BIP */
-	nop
-
-	/* special purpose registers */
-	mfs	r11, rmsr
-	swi	r11, r1, PT_MSR
-	mfs	r11, rear
-	swi	r11, r1, PT_EAR
-	mfs	r11, resr
-	swi	r11, r1, PT_ESR
-	mfs	r11, rfsr
-	swi	r11, r1, PT_FSR
-	/* reload original stack pointer and save it */
-	lwi	r11, r0, PER_CPU(ENTRY_SP)
-	swi	r11, r1, PT_R1
-	/* update mode indicator we are in kernel mode */
-	addik	r11, r0, 1
-	swi	r11, r0, PER_CPU(KM)
-	/* restore r31 */
-	lwi	r31, r0, PER_CPU(CURRENT_SAVE)
-	/* re-enable interrupts now we are in kernel mode */
-	enable_irq
-
-	/* See if the system call number is valid. */
-	addi	r11, r12, -__NR_syscalls
-	bgei	r11, 1f			/* return to user if not valid */
-	/* Figure out which function to use for this system call. */
-	/* Note Microblaze barrel shift is optional, so don't rely on it */
-	add	r12, r12, r12			/* convert num -> ptr */
-	addik	r30, r0, 1			/* restarts allowed */
-	add	r12, r12, r12
-	lwi	r12, r12, sys_call_table	/* Get function pointer */
-	addik	r15, r0, ret_to_user-8		/* set return address */
-	bra	r12				/* Make the system call. */
-	bri	0				/* won't reach here */
-1:
-	brid	ret_to_user			/* jump to syscall epilogue */
-	addi	r3, r0, -ENOSYS			/* set errno in delay slot */
-
-/*
- * Debug traps are like a system call, but entered via brki r14, 0x60
- * All we need to do is send the SIGTRAP signal to current, ptrace and
- * do_notify_resume will handle the rest
- */
-ENTRY(_debug_exception)
-	swi	r1, r0, PER_CPU(ENTRY_SP)	/* save the current sp */
-	lwi	r1, r0, PER_CPU(CURRENT_SAVE)	/* get the saved current */
-	lwi	r1, r1, TS_THREAD_INFO		/* get the thread info */
-	addik	r1, r1, THREAD_SIZE - PT_SIZE	/* get the kernel stack */
-	swi	r11, r0, PER_CPU(R11_SAVE)	/* temporarily save r11 */
-	lwi	r11, r0, PER_CPU(KM)		/* load mode indicator */
-//save_context:
-	swi	r11, r1, PT_MODE	/* store the mode */
-	lwi	r11, r0, PER_CPU(R11_SAVE)	/* reload r11 */
-	/* save them on stack */
-	swi	r2, r1, PT_R2
-	swi	r3, r1, PT_R3 /* r3: _always_ in clobber list; see unistd.h */
-	swi	r4, r1, PT_R4 /* r4: _always_ in clobber list; see unistd.h */
-	swi	r5, r1, PT_R5
-	swi	r6, r1, PT_R6
-	swi	r7, r1, PT_R7
-	swi	r8, r1, PT_R8
-	swi	r9, r1, PT_R9
-	swi	r10, r1, PT_R10
-	swi	r11, r1, PT_R11
-	/* r12: _always_ in clobber list; see unistd.h */
-	swi	r12, r1, PT_R12
-	swi	r13, r1, PT_R13
-	/* r14: _always_ in clobber list; see unistd.h */
-	swi	r14, r1, PT_R14
-	swi	r14, r1, PT_PC /* Will return to interrupted instruction */
-	swi	r15, r1, PT_R15
-	swi	r16, r1, PT_R16
-	swi	r17, r1, PT_R17
-	swi	r18, r1, PT_R18
-	swi	r19, r1, PT_R19
-	swi	r20, r1, PT_R20
-	swi	r21, r1, PT_R21
-	swi	r22, r1, PT_R22
-	swi	r23, r1, PT_R23
-	swi	r24, r1, PT_R24
-	swi	r25, r1, PT_R25
-	swi	r26, r1, PT_R26
-	swi	r27, r1, PT_R27
-	swi	r28, r1, PT_R28
-	swi	r29, r1, PT_R29
-	swi	r30, r1, PT_R30
-	swi	r31, r1, PT_R31
-
-	disable_irq
-	nop		/* make sure IE bit is in effect */
-	clear_bip	/* once IE is in effect it is safe to clear BIP */
-	nop
-
-	/* special purpose registers */
-	mfs	r11, rmsr
-	swi	r11, r1, PT_MSR
-	mfs	r11, rear
-	swi	r11, r1, PT_EAR
-	mfs	r11, resr
-	swi	r11, r1, PT_ESR
-	mfs	r11, rfsr
-	swi	r11, r1, PT_FSR
-	/* reload original stack pointer and save it */
-	lwi	r11, r0, PER_CPU(ENTRY_SP)
-	swi	r11, r1, PT_R1
-	/* update mode indicator we are in kernel mode */
-	addik	r11, r0, 1
-	swi	r11, r0, PER_CPU(KM)
-	/* restore r31 */
-	lwi	r31, r0, PER_CPU(CURRENT_SAVE)
-	/* re-enable interrupts now we are in kernel mode */
-	enable_irq
-
-	addi	r5, r0, SIGTRAP			/* sending the trap signal */
-	add	r6, r0, r31			/* to current */
-	bralid	r15, send_sig
-	add	r7, r0, r0			/* 3rd param zero */
-
-	addik	r30, r0, 1			/* restarts allowed ??? */
-	/* Restore r3/r4 to work around how ret_to_user works */
-	lwi	r3, r1, PT_R3
-	lwi	r4, r1, PT_R4
-	bri	ret_to_user
-
-ENTRY(_break)
-	bri	0
-
-/* struct task_struct *_switch_to(struct thread_info *prev,
-					struct thread_info *next); */
-ENTRY(_switch_to)
-	/* prepare return value */
-	addk	r3, r0, r31
-
-	/* save registers in cpu_context */
-	/* use r11 and r12, volatile registers, as temp register */
-	addik	r11, r5, TI_CPU_CONTEXT
-	swi	r1, r11, CC_R1
-	swi	r2, r11, CC_R2
-	/* skip volatile registers.
-	 * they are saved on stack when we jumped to _switch_to() */
-	/* dedicated registers */
-	swi	r13, r11, CC_R13
-	swi	r14, r11, CC_R14
-	swi	r15, r11, CC_R15
-	swi	r16, r11, CC_R16
-	swi	r17, r11, CC_R17
-	swi	r18, r11, CC_R18
-	/* save non-volatile registers */
-	swi	r19, r11, CC_R19
-	swi	r20, r11, CC_R20
-	swi	r21, r11, CC_R21
-	swi	r22, r11, CC_R22
-	swi	r23, r11, CC_R23
-	swi	r24, r11, CC_R24
-	swi	r25, r11, CC_R25
-	swi	r26, r11, CC_R26
-	swi	r27, r11, CC_R27
-	swi	r28, r11, CC_R28
-	swi	r29, r11, CC_R29
-	swi	r30, r11, CC_R30
-	/* special purpose registers */
-	mfs	r12, rmsr
-	swi	r12, r11, CC_MSR
-	mfs	r12, rear
-	swi	r12, r11, CC_EAR
-	mfs	r12, resr
-	swi	r12, r11, CC_ESR
-	mfs	r12, rfsr
-	swi	r12, r11, CC_FSR
-
-	/* update r31, the current */
-	lwi	r31, r6, TI_TASK
-	swi	r31, r0, PER_CPU(CURRENT_SAVE)
-
-	/* get new process' cpu context and restore */
-	addik	r11, r6, TI_CPU_CONTEXT
-
-	/* special purpose registers */
-	lwi	r12, r11, CC_FSR
-	mts	rfsr, r12
-	lwi	r12, r11, CC_ESR
-	mts	resr, r12
-	lwi	r12, r11, CC_EAR
-	mts	rear, r12
-	lwi	r12, r11, CC_MSR
-	mts	rmsr, r12
-	/* non-volatile registers */
-	lwi	r30, r11, CC_R30
-	lwi	r29, r11, CC_R29
-	lwi	r28, r11, CC_R28
-	lwi	r27, r11, CC_R27
-	lwi	r26, r11, CC_R26
-	lwi	r25, r11, CC_R25
-	lwi	r24, r11, CC_R24
-	lwi	r23, r11, CC_R23
-	lwi	r22, r11, CC_R22
-	lwi	r21, r11, CC_R21
-	lwi	r20, r11, CC_R20
-	lwi	r19, r11, CC_R19
-	/* dedicated registers */
-	lwi	r18, r11, CC_R18
-	lwi	r17, r11, CC_R17
-	lwi	r16, r11, CC_R16
-	lwi	r15, r11, CC_R15
-	lwi	r14, r11, CC_R14
-	lwi	r13, r11, CC_R13
-	/* skip volatile registers */
-	lwi	r2, r11, CC_R2
-	lwi	r1, r11, CC_R1
-
-	rtsd	r15, 8
-	nop
-
-ENTRY(ret_from_fork)
-	addk	r5, r0, r3
-	brlid	r15, schedule_tail
-	nop
-	swi	r31, r1, PT_R31		/* save r31 in user context. */
-			/* will soon be restored to r31 in ret_to_user */
-	addk	r3, r0, r0
-	brid	ret_to_user
-	nop
-
-ENTRY(ret_from_kernel_thread)
-	brlid	r15, schedule_tail
-	addk	r5, r0, r3
-	brald	r15, r20
-	addk	r5, r0, r19
-	brid	ret_to_user
-	addk	r3, r0, r0
-
-work_pending:
-	lwi	r11, r1, PT_MODE
-	bneid	r11, 2f
-3:
-	enable_irq
-	andi	r11, r19, _TIF_NEED_RESCHED
-	beqi	r11, 1f
-	bralid	r15, schedule
-	nop
-	bri	4f
-1:	andi	r11, r19, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME
-	beqi	r11, no_work_pending
-	addk	r5, r30, r0
-	bralid	r15, do_notify_resume
-	addik	r6, r0, 1
-	addk	r30, r0, r0	/* no restarts from now on */
-4:
-	disable_irq
-	lwi	r6, r31, TS_THREAD_INFO /* get thread info */
-	lwi	r19, r6, TI_FLAGS /* get flags in thread info */
-	bri	3b
-
-ENTRY(ret_to_user)
-	disable_irq
-
-	swi	r4, r1, PT_R4		/* return val */
-	swi	r3, r1, PT_R3		/* return val */
-
-	lwi	r6, r31, TS_THREAD_INFO /* get thread info */
-	lwi	r19, r6, TI_FLAGS /* get flags in thread info */
-	bnei	r19, work_pending /* do an extra work if any bits are set */
-no_work_pending:
-	disable_irq
-
-2:
-	/* save r31 */
-	swi	r31, r0, PER_CPU(CURRENT_SAVE)
-	/* save mode indicator */
-	lwi	r18, r1, PT_MODE
-	swi	r18, r0, PER_CPU(KM)
-//restore_context:
-	/* special purpose registers */
-	lwi	r18, r1, PT_FSR
-	mts	rfsr, r18
-	lwi	r18, r1, PT_ESR
-	mts	resr, r18
-	lwi	r18, r1, PT_EAR
-	mts	rear, r18
-	lwi	r18, r1, PT_MSR
-	mts	rmsr, r18
-
-	lwi	r31, r1, PT_R31
-	lwi	r30, r1, PT_R30
-	lwi	r29, r1, PT_R29
-	lwi	r28, r1, PT_R28
-	lwi	r27, r1, PT_R27
-	lwi	r26, r1, PT_R26
-	lwi	r25, r1, PT_R25
-	lwi	r24, r1, PT_R24
-	lwi	r23, r1, PT_R23
-	lwi	r22, r1, PT_R22
-	lwi	r21, r1, PT_R21
-	lwi	r20, r1, PT_R20
-	lwi	r19, r1, PT_R19
-	lwi	r18, r1, PT_R18
-	lwi	r17, r1, PT_R17
-	lwi	r16, r1, PT_R16
-	lwi	r15, r1, PT_R15
-	lwi	r14, r1, PT_PC
-	lwi	r13, r1, PT_R13
-	lwi	r12, r1, PT_R12
-	lwi	r11, r1, PT_R11
-	lwi	r10, r1, PT_R10
-	lwi	r9, r1, PT_R9
-	lwi	r8, r1, PT_R8
-	lwi	r7, r1, PT_R7
-	lwi	r6, r1, PT_R6
-	lwi	r5, r1, PT_R5
-	lwi	r4, r1, PT_R4		/* return val */
-	lwi	r3, r1, PT_R3		/* return val */
-	lwi	r2, r1, PT_R2
-	lwi	r1, r1, PT_R1
-
-	rtid	r14, 0
-	nop
-
-sys_rt_sigreturn_wrapper:
-	addk	r30, r0, r0		/* no restarts for this one */
-	brid	sys_rt_sigreturn
-	addk	r5, r1, r0
-
-	/* Interrupt vector table */
-	.section	.init.ivt, "ax"
-	.org 0x0
-	brai	_reset
-	brai	_user_exception
-	brai	_interrupt
-	brai	_break
-	brai	_hw_exception_handler
-	.org 0x60
-	brai	_debug_exception
-
-.section .rodata,"a"
-#include "syscall_table.S"
-
-syscall_table_size=(.-sys_call_table)
-
-type_SYSCALL:
-	.ascii "SYSCALL\0"
-type_IRQ:
-	.ascii "IRQ\0"
-type_IRQ_PREEMPT:
-	.ascii "IRQ (PREEMPTED)\0"
-type_SYSCALL_PREEMPT:
-	.ascii " SYSCALL (PREEMPTED)\0"
-
-	/*
-	 * Trap decoding for stack unwinder
-	 * Tuples are (start addr, end addr, string)
-	 * If return address lies on [start addr, end addr],
-	 * unwinder displays 'string'
-	 */
-
-	.align 4
-.global microblaze_trap_handlers
-microblaze_trap_handlers:
-	/* Exact matches come first */
-	.word ret_to_user  ; .word ret_to_user    ; .word type_SYSCALL
-	.word ret_from_intr; .word ret_from_intr  ; .word type_IRQ
-	/* Fuzzy matches go here */
-	.word ret_from_intr; .word no_intr_resched; .word type_IRQ_PREEMPT
-	.word work_pending ; .word no_work_pending; .word type_SYSCALL_PREEMPT
-	/* End of table */
-	.word 0             ; .word 0               ; .word 0
diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S
deleted file mode 100644
index 4e1b567becd6a86e6edfa7dc1c8d7bf6d82a4b18..0000000000000000000000000000000000000000
--- a/arch/microblaze/kernel/entry.S
+++ /dev/null
@@ -1,1015 +0,0 @@
-/*
- * Low-level system-call handling, trap handlers and context-switching
- *
- * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
- * Copyright (C) 2008-2009 PetaLogix
- * Copyright (C) 2003		John Williams <jwilliams@itee.uq.edu.au>
- * Copyright (C) 2001,2002	NEC Corporation
- * Copyright (C) 2001,2002	Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- * Heavily modified by John Williams for Microblaze
- */
-
-#include <linux/sys.h>
-#include <linux/linkage.h>
-
-#include <asm/entry.h>
-#include <asm/current.h>
-#include <asm/processor.h>
-#include <asm/exceptions.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-
-#include <asm/page.h>
-#include <asm/unistd.h>
-
-#include <linux/errno.h>
-#include <asm/signal.h>
-
-#undef DEBUG
-
-#ifdef DEBUG
-/* Create space for syscalls counting. */
-.section .data
-.global syscall_debug_table
-.align 4
-syscall_debug_table:
-	.space	(__NR_syscalls * 4)
-#endif /* DEBUG */
-
-#define C_ENTRY(name)	.globl name; .align 4; name
-
-/*
- * Various ways of setting and clearing BIP in flags reg.
- * This is mucky, but necessary using microblaze version that
- * allows msr ops to write to BIP
- */
-#if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR
-	.macro	clear_bip
-	msrclr	r0, MSR_BIP
-	.endm
-
-	.macro	set_bip
-	msrset	r0, MSR_BIP
-	.endm
-
-	.macro	clear_eip
-	msrclr	r0, MSR_EIP
-	.endm
-
-	.macro	set_ee
-	msrset	r0, MSR_EE
-	.endm
-
-	.macro	disable_irq
-	msrclr	r0, MSR_IE
-	.endm
-
-	.macro	enable_irq
-	msrset	r0, MSR_IE
-	.endm
-
-	.macro	set_ums
-	msrset	r0, MSR_UMS
-	msrclr	r0, MSR_VMS
-	.endm
-
-	.macro	set_vms
-	msrclr	r0, MSR_UMS
-	msrset	r0, MSR_VMS
-	.endm
-
-	.macro	clear_ums
-	msrclr	r0, MSR_UMS
-	.endm
-
-	.macro	clear_vms_ums
-	msrclr	r0, MSR_VMS | MSR_UMS
-	.endm
-#else
-	.macro	clear_bip
-	mfs	r11, rmsr
-	andi	r11, r11, ~MSR_BIP
-	mts	rmsr, r11
-	.endm
-
-	.macro	set_bip
-	mfs	r11, rmsr
-	ori	r11, r11, MSR_BIP
-	mts	rmsr, r11
-	.endm
-
-	.macro	clear_eip
-	mfs	r11, rmsr
-	andi	r11, r11, ~MSR_EIP
-	mts	rmsr, r11
-	.endm
-
-	.macro	set_ee
-	mfs	r11, rmsr
-	ori	r11, r11, MSR_EE
-	mts	rmsr, r11
-	.endm
-
-	.macro	disable_irq
-	mfs	r11, rmsr
-	andi	r11, r11, ~MSR_IE
-	mts	rmsr, r11
-	.endm
-
-	.macro	enable_irq
-	mfs	r11, rmsr
-	ori	r11, r11, MSR_IE
-	mts	rmsr, r11
-	.endm
-
-	.macro set_ums
-	mfs	r11, rmsr
-	ori	r11, r11, MSR_VMS
-	andni	r11, r11, MSR_UMS
-	mts	rmsr, r11
-	.endm
-
-	.macro	set_vms
-	mfs	r11, rmsr
-	ori	r11, r11, MSR_VMS
-	andni	r11, r11, MSR_UMS
-	mts	rmsr, r11
-	.endm
-
-	.macro	clear_ums
-	mfs	r11, rmsr
-	andni	r11, r11, MSR_UMS
-	mts	rmsr,r11
-	.endm
-
-	.macro	clear_vms_ums
-	mfs	r11, rmsr
-	andni	r11, r11, (MSR_VMS|MSR_UMS)
-	mts	rmsr,r11
-	.endm
-#endif
-
-/* Define how to call high-level functions. With MMU, virtual mode must be
- * enabled when calling the high-level function. Clobbers R11.
- * VM_ON, VM_OFF, DO_JUMP_BIPCLR, DO_CALL
- */
-
-/* turn on virtual protected mode save */
-#define VM_ON		\
-	set_ums;	\
-	rted	r0, 2f;	\
-	nop; \
-2:
-
-/* turn off virtual protected mode save and user mode save*/
-#define VM_OFF			\
-	clear_vms_ums;		\
-	rted	r0, TOPHYS(1f);	\
-	nop; \
-1:
-
-#define SAVE_REGS \
-	swi	r2, r1, PT_R2;	/* Save SDA */			\
-	swi	r3, r1, PT_R3;					\
-	swi	r4, r1, PT_R4;					\
-	swi	r5, r1, PT_R5;					\
-	swi	r6, r1, PT_R6;					\
-	swi	r7, r1, PT_R7;					\
-	swi	r8, r1, PT_R8;					\
-	swi	r9, r1, PT_R9;					\
-	swi	r10, r1, PT_R10;					\
-	swi	r11, r1, PT_R11;	/* save clobbered regs after rval */\
-	swi	r12, r1, PT_R12;					\
-	swi	r13, r1, PT_R13;	/* Save SDA2 */			\
-	swi	r14, r1, PT_PC;	/* PC, before IRQ/trap */	\
-	swi	r15, r1, PT_R15;	/* Save LP */			\
-	swi	r16, r1, PT_R16;					\
-	swi	r17, r1, PT_R17;					\
-	swi	r18, r1, PT_R18;	/* Save asm scratch reg */	\
-	swi	r19, r1, PT_R19;					\
-	swi	r20, r1, PT_R20;					\
-	swi	r21, r1, PT_R21;					\
-	swi	r22, r1, PT_R22;					\
-	swi	r23, r1, PT_R23;					\
-	swi	r24, r1, PT_R24;					\
-	swi	r25, r1, PT_R25;					\
-	swi	r26, r1, PT_R26;					\
-	swi	r27, r1, PT_R27;					\
-	swi	r28, r1, PT_R28;					\
-	swi	r29, r1, PT_R29;					\
-	swi	r30, r1, PT_R30;					\
-	swi	r31, r1, PT_R31;	/* Save current task reg */	\
-	mfs	r11, rmsr;		/* save MSR */			\
-	swi	r11, r1, PT_MSR;
-
-#define RESTORE_REGS_GP \
-	lwi	r2, r1, PT_R2;	/* restore SDA */		\
-	lwi	r3, r1, PT_R3;					\
-	lwi	r4, r1, PT_R4;					\
-	lwi	r5, r1, PT_R5;					\
-	lwi	r6, r1, PT_R6;					\
-	lwi	r7, r1, PT_R7;					\
-	lwi	r8, r1, PT_R8;					\
-	lwi	r9, r1, PT_R9;					\
-	lwi	r10, r1, PT_R10;					\
-	lwi	r11, r1, PT_R11;	/* restore clobbered regs after rval */\
-	lwi	r12, r1, PT_R12;					\
-	lwi	r13, r1, PT_R13;	/* restore SDA2 */		\
-	lwi	r14, r1, PT_PC;	/* RESTORE_LINK PC, before IRQ/trap */\
-	lwi	r15, r1, PT_R15;	/* restore LP */		\
-	lwi	r16, r1, PT_R16;					\
-	lwi	r17, r1, PT_R17;					\
-	lwi	r18, r1, PT_R18;	/* restore asm scratch reg */	\
-	lwi	r19, r1, PT_R19;					\
-	lwi	r20, r1, PT_R20;					\
-	lwi	r21, r1, PT_R21;					\
-	lwi	r22, r1, PT_R22;					\
-	lwi	r23, r1, PT_R23;					\
-	lwi	r24, r1, PT_R24;					\
-	lwi	r25, r1, PT_R25;					\
-	lwi	r26, r1, PT_R26;					\
-	lwi	r27, r1, PT_R27;					\
-	lwi	r28, r1, PT_R28;					\
-	lwi	r29, r1, PT_R29;					\
-	lwi	r30, r1, PT_R30;					\
-	lwi	r31, r1, PT_R31;	/* Restore cur task reg */
-
-#define RESTORE_REGS \
-	lwi	r11, r1, PT_MSR;					\
-	mts	rmsr , r11;						\
-	RESTORE_REGS_GP
-
-#define RESTORE_REGS_RTBD \
-	lwi	r11, r1, PT_MSR;					\
-	andni	r11, r11, MSR_EIP;          /* clear EIP */             \
-	ori	r11, r11, MSR_EE | MSR_BIP; /* set EE and BIP */        \
-	mts	rmsr , r11;						\
-	RESTORE_REGS_GP
-
-#define SAVE_STATE	\
-	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)); /* save stack */	\
-	/* See if already in kernel mode.*/				\
-	mfs	r1, rmsr;						\
-	andi	r1, r1, MSR_UMS;					\
-	bnei	r1, 1f;						\
-	/* Kernel-mode state save.  */					\
-	/* Reload kernel stack-ptr. */					\
-	lwi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP));			\
-	/* FIXME: I can add these two lines to one */			\
-	/* tophys(r1,r1); */						\
-	/* addik	r1, r1, -PT_SIZE; */				\
-	addik	r1, r1, CONFIG_KERNEL_BASE_ADDR - CONFIG_KERNEL_START - PT_SIZE; \
-	SAVE_REGS							\
-	brid	2f;							\
-	swi	r1, r1, PT_MODE; 	 				\
-1:	/* User-mode state save.  */					\
-	lwi	r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */\
-	tophys(r1,r1);							\
-	lwi	r1, r1, TS_THREAD_INFO;	/* get the thread info */	\
-	/* MS these three instructions can be added to one */		\
-	/* addik	r1, r1, THREAD_SIZE; */				\
-	/* tophys(r1,r1); */						\
-	/* addik	r1, r1, -PT_SIZE; */			\
-	addik r1, r1, THREAD_SIZE + CONFIG_KERNEL_BASE_ADDR - CONFIG_KERNEL_START - PT_SIZE; \
-	SAVE_REGS							\
-	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP));			\
-	swi	r11, r1, PT_R1; /* Store user SP.  */		\
-	swi	r0, r1, PT_MODE; /* Was in user-mode.  */		\
-	/* MS: I am clearing UMS even in case when I come from kernel space */ \
-	clear_ums; 							\
-2:	lwi	CURRENT_TASK, r0, TOPHYS(PER_CPU(CURRENT_SAVE));
-
-.text
-
-/*
- * User trap.
- *
- * System calls are handled here.
- *
- * Syscall protocol:
- * Syscall number in r12, args in r5-r10
- * Return value in r3
- *
- * Trap entered via brki instruction, so BIP bit is set, and interrupts
- * are masked. This is nice, means we don't have to CLI before state save
- */
-C_ENTRY(_user_exception):
-	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */
-	addi	r14, r14, 4	/* return address is 4 byte after call */
-
-	lwi	r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */
-	tophys(r1,r1);
-	lwi	r1, r1, TS_THREAD_INFO;	/* get stack from task_struct */
-/* calculate kernel stack pointer from task struct 8k */
-	addik	r1, r1, THREAD_SIZE;
-	tophys(r1,r1);
-
-	addik	r1, r1, -PT_SIZE; /* Make room on the stack.  */
-	SAVE_REGS
-	swi	r0, r1, PT_R3
-	swi	r0, r1, PT_R4
-
-	swi	r0, r1, PT_MODE;			/* Was in user-mode. */
-	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP));
-	swi	r11, r1, PT_R1;		/* Store user SP.  */
-	clear_ums;
-2:	lwi	CURRENT_TASK, r0, TOPHYS(PER_CPU(CURRENT_SAVE));
-	/* Save away the syscall number.  */
-	swi	r12, r1, PT_R0;
-	tovirt(r1,r1)
-
-/* where the trap should return need -8 to adjust for rtsd r15, 8*/
-/* Jump to the appropriate function for the system call number in r12
- * (r12 is not preserved), or return an error if r12 is not valid. The LP
- * register should point to the location where
- * the called function should return.  [note that MAKE_SYS_CALL uses label 1] */
-
-	/* Step into virtual mode */
-	rtbd	r0, 3f
-	nop
-3:
-	lwi	r11, CURRENT_TASK, TS_THREAD_INFO /* get thread info */
-	lwi	r11, r11, TI_FLAGS	 /* get flags in thread info */
-	andi	r11, r11, _TIF_WORK_SYSCALL_MASK
-	beqi	r11, 4f
-
-	addik	r3, r0, -ENOSYS
-	swi	r3, r1, PT_R3
-	brlid	r15, do_syscall_trace_enter
-	addik	r5, r1, PT_R0
-
-	# do_syscall_trace_enter returns the new syscall nr.
-	addk	r12, r0, r3
-	lwi	r5, r1, PT_R5;
-	lwi	r6, r1, PT_R6;
-	lwi	r7, r1, PT_R7;
-	lwi	r8, r1, PT_R8;
-	lwi	r9, r1, PT_R9;
-	lwi	r10, r1, PT_R10;
-4:
-/* Jump to the appropriate function for the system call number in r12
- * (r12 is not preserved), or return an error if r12 is not valid.
- * The LP register should point to the location where the called function
- * should return.  [note that MAKE_SYS_CALL uses label 1] */
-	/* See if the system call number is valid */
-	blti	r12, 5f
-	addi	r11, r12, -__NR_syscalls;
-	bgei	r11, 5f;
-	/* Figure out which function to use for this system call.  */
-	/* Note Microblaze barrel shift is optional, so don't rely on it */
-	add	r12, r12, r12;			/* convert num -> ptr */
-	add	r12, r12, r12;
-	addi	r30, r0, 1			/* restarts allowed */
-
-#ifdef DEBUG
-	/* Trac syscalls and stored them to syscall_debug_table */
-	/* The first syscall location stores total syscall number */
-	lwi	r3, r0, syscall_debug_table
-	addi	r3, r3, 1
-	swi	r3, r0, syscall_debug_table
-	lwi	r3, r12, syscall_debug_table
-	addi	r3, r3, 1
-	swi	r3, r12, syscall_debug_table
-#endif
-
-	# Find and jump into the syscall handler.
-	lwi	r12, r12, sys_call_table
-	/* where the trap should return need -8 to adjust for rtsd r15, 8 */
-	addi	r15, r0, ret_from_trap-8
-	bra	r12
-
-	/* The syscall number is invalid, return an error.  */
-5:
-	braid	ret_from_trap
-	addi	r3, r0, -ENOSYS;
-
-/* Entry point used to return from a syscall/trap */
-/* We re-enable BIP bit before state restore */
-C_ENTRY(ret_from_trap):
-	swi	r3, r1, PT_R3
-	swi	r4, r1, PT_R4
-
-	lwi	r11, r1, PT_MODE;
-/* See if returning to kernel mode, if so, skip resched &c.  */
-	bnei	r11, 2f;
-	/* We're returning to user mode, so check for various conditions that
-	 * trigger rescheduling. */
-	/* FIXME: Restructure all these flag checks. */
-	lwi	r11, CURRENT_TASK, TS_THREAD_INFO;	/* get thread info */
-	lwi	r11, r11, TI_FLAGS;		/* get flags in thread info */
-	andi	r11, r11, _TIF_WORK_SYSCALL_MASK
-	beqi	r11, 1f
-
-	brlid	r15, do_syscall_trace_leave
-	addik	r5, r1, PT_R0
-1:
-	/* We're returning to user mode, so check for various conditions that
-	 * trigger rescheduling. */
-	/* get thread info from current task */
-	lwi	r11, CURRENT_TASK, TS_THREAD_INFO;
-	lwi	r19, r11, TI_FLAGS;		/* get flags in thread info */
-	andi	r11, r19, _TIF_NEED_RESCHED;
-	beqi	r11, 5f;
-
-	bralid	r15, schedule;	/* Call scheduler */
-	nop;				/* delay slot */
-	bri	1b
-
-	/* Maybe handle a signal */
-5:
-	andi	r11, r19, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME;
-	beqi	r11, 4f;		/* Signals to handle, handle them */
-
-	addik	r5, r1, 0;		/* Arg 1: struct pt_regs *regs */
-	bralid	r15, do_notify_resume;	/* Handle any signals */
-	add	r6, r30, r0;		/* Arg 2: int in_syscall */
-	add	r30, r0, r0		/* no more restarts */
-	bri	1b
-
-/* Finally, return to user state.  */
-4:	set_bip;			/*  Ints masked for state restore */
-	swi	CURRENT_TASK, r0, PER_CPU(CURRENT_SAVE); /* save current */
-	VM_OFF;
-	tophys(r1,r1);
-	RESTORE_REGS_RTBD;
-	addik	r1, r1, PT_SIZE		/* Clean up stack space.  */
-	lwi	r1, r1, PT_R1 - PT_SIZE;/* Restore user stack pointer. */
-	bri	6f;
-
-/* Return to kernel state.  */
-2:	set_bip;			/*  Ints masked for state restore */
-	VM_OFF;
-	tophys(r1,r1);
-	RESTORE_REGS_RTBD;
-	addik	r1, r1, PT_SIZE		/* Clean up stack space.  */
-	tovirt(r1,r1);
-6:
-TRAP_return:		/* Make global symbol for debugging */
-	rtbd	r14, 0;	/* Instructions to return from an IRQ */
-	nop;
-
-
-/* This the initial entry point for a new child thread, with an appropriate
-   stack in place that makes it look the the child is in the middle of an
-   syscall.  This function is actually `returned to' from switch_thread
-   (copy_thread makes ret_from_fork the return address in each new thread's
-   saved context).  */
-C_ENTRY(ret_from_fork):
-	bralid	r15, schedule_tail; /* ...which is schedule_tail's arg */
-	add	r5, r3, r0;	/* switch_thread returns the prev task */
-				/* ( in the delay slot ) */
-	brid	ret_from_trap;	/* Do normal trap return */
-	add	r3, r0, r0;	/* Child's fork call should return 0. */
-
-C_ENTRY(ret_from_kernel_thread):
-	bralid	r15, schedule_tail; /* ...which is schedule_tail's arg */
-	add	r5, r3, r0;	/* switch_thread returns the prev task */
-				/* ( in the delay slot ) */
-	brald	r15, r20	/* fn was left in r20 */
-	addk	r5, r0, r19	/* ... and argument - in r19 */
-	brid	ret_from_trap
-	add	r3, r0, r0
-
-C_ENTRY(sys_rt_sigreturn_wrapper):
-	addik	r30, r0, 0		/* no restarts */
-	brid	sys_rt_sigreturn	/* Do real work */
-	addik	r5, r1, 0;		/* add user context as 1st arg */
-
-/*
- * HW EXCEPTION rutine start
- */
-C_ENTRY(full_exception_trap):
-	/* adjust exception address for privileged instruction
-	 * for finding where is it */
-	addik	r17, r17, -4
-	SAVE_STATE /* Save registers */
-	/* PC, before IRQ/trap - this is one instruction above */
-	swi	r17, r1, PT_PC;
-	tovirt(r1,r1)
-	/* FIXME this can be store directly in PT_ESR reg.
-	 * I tested it but there is a fault */
-	/* where the trap should return need -8 to adjust for rtsd r15, 8 */
-	addik	r15, r0, ret_from_exc - 8
-	mfs	r6, resr
-	mfs	r7, rfsr;		/* save FSR */
-	mts	rfsr, r0;	/* Clear sticky fsr */
-	rted	r0, full_exception
-	addik	r5, r1, 0		 /* parameter struct pt_regs * regs */
-
-/*
- * Unaligned data trap.
- *
- * Unaligned data trap last on 4k page is handled here.
- *
- * Trap entered via exception, so EE bit is set, and interrupts
- * are masked.  This is nice, means we don't have to CLI before state save
- *
- * The assembler routine is in "arch/microblaze/kernel/hw_exception_handler.S"
- */
-C_ENTRY(unaligned_data_trap):
-	/* MS: I have to save r11 value and then restore it because
-	 * set_bit, clear_eip, set_ee use r11 as temp register if MSR
-	 * instructions are not used. We don't need to do if MSR instructions
-	 * are used and they use r0 instead of r11.
-	 * I am using ENTRY_SP which should be primary used only for stack
-	 * pointer saving. */
-	swi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP));
-	set_bip;        /* equalize initial state for all possible entries */
-	clear_eip;
-	set_ee;
-	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP));
-	SAVE_STATE		/* Save registers.*/
-	/* PC, before IRQ/trap - this is one instruction above */
-	swi	r17, r1, PT_PC;
-	tovirt(r1,r1)
-	/* where the trap should return need -8 to adjust for rtsd r15, 8 */
-	addik	r15, r0, ret_from_exc-8
-	mfs	r3, resr		/* ESR */
-	mfs	r4, rear		/* EAR */
-	rtbd	r0, _unaligned_data_exception
-	addik	r7, r1, 0		/* parameter struct pt_regs * regs */
-
-/*
- * Page fault traps.
- *
- * If the real exception handler (from hw_exception_handler.S) didn't find
- * the mapping for the process, then we're thrown here to handle such situation.
- *
- * Trap entered via exceptions, so EE bit is set, and interrupts
- * are masked.  This is nice, means we don't have to CLI before state save
- *
- * Build a standard exception frame for TLB Access errors.  All TLB exceptions
- * will bail out to this point if they can't resolve the lightweight TLB fault.
- *
- * The C function called is in "arch/microblaze/mm/fault.c", declared as:
- * void do_page_fault(struct pt_regs *regs,
- *				unsigned long address,
- *				unsigned long error_code)
- */
-/* data and intruction trap - which is choose is resolved int fault.c */
-C_ENTRY(page_fault_data_trap):
-	SAVE_STATE		/* Save registers.*/
-	/* PC, before IRQ/trap - this is one instruction above */
-	swi	r17, r1, PT_PC;
-	tovirt(r1,r1)
-	/* where the trap should return need -8 to adjust for rtsd r15, 8 */
-	addik	r15, r0, ret_from_exc-8
-	mfs	r6, rear		/* parameter unsigned long address */
-	mfs	r7, resr		/* parameter unsigned long error_code */
-	rted	r0, do_page_fault
-	addik	r5, r1, 0		/* parameter struct pt_regs * regs */
-
-C_ENTRY(page_fault_instr_trap):
-	SAVE_STATE		/* Save registers.*/
-	/* PC, before IRQ/trap - this is one instruction above */
-	swi	r17, r1, PT_PC;
-	tovirt(r1,r1)
-	/* where the trap should return need -8 to adjust for rtsd r15, 8 */
-	addik	r15, r0, ret_from_exc-8
-	mfs	r6, rear		/* parameter unsigned long address */
-	ori	r7, r0, 0		/* parameter unsigned long error_code */
-	rted	r0, do_page_fault
-	addik	r5, r1, 0		/* parameter struct pt_regs * regs */
-
-/* Entry point used to return from an exception.  */
-C_ENTRY(ret_from_exc):
-	lwi	r11, r1, PT_MODE;
-	bnei	r11, 2f;		/* See if returning to kernel mode, */
-					/* ... if so, skip resched &c.  */
-
-	/* We're returning to user mode, so check for various conditions that
-	   trigger rescheduling. */
-1:
-	lwi	r11, CURRENT_TASK, TS_THREAD_INFO;	/* get thread info */
-	lwi	r19, r11, TI_FLAGS;	/* get flags in thread info */
-	andi	r11, r19, _TIF_NEED_RESCHED;
-	beqi	r11, 5f;
-
-/* Call the scheduler before returning from a syscall/trap. */
-	bralid	r15, schedule;	/* Call scheduler */
-	nop;				/* delay slot */
-	bri	1b
-
-	/* Maybe handle a signal */
-5:	andi	r11, r19, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME;
-	beqi	r11, 4f;		/* Signals to handle, handle them */
-
-	/*
-	 * Handle a signal return; Pending signals should be in r18.
-	 *
-	 * Not all registers are saved by the normal trap/interrupt entry
-	 * points (for instance, call-saved registers (because the normal
-	 * C-compiler calling sequence in the kernel makes sure they're
-	 * preserved), and call-clobbered registers in the case of
-	 * traps), but signal handlers may want to examine or change the
-	 * complete register state.  Here we save anything not saved by
-	 * the normal entry sequence, so that it may be safely restored
-	 * (in a possibly modified form) after do_notify_resume returns. */
-	addik	r5, r1, 0;		/* Arg 1: struct pt_regs *regs */
-	bralid	r15, do_notify_resume;	/* Handle any signals */
-	addi	r6, r0, 0;		/* Arg 2: int in_syscall */
-	bri	1b
-
-/* Finally, return to user state.  */
-4:	set_bip;			/* Ints masked for state restore */
-	swi	CURRENT_TASK, r0, PER_CPU(CURRENT_SAVE); /* save current */
-	VM_OFF;
-	tophys(r1,r1);
-
-	RESTORE_REGS_RTBD;
-	addik	r1, r1, PT_SIZE		/* Clean up stack space.  */
-
-	lwi	r1, r1, PT_R1 - PT_SIZE; /* Restore user stack pointer. */
-	bri	6f;
-/* Return to kernel state.  */
-2:	set_bip;			/* Ints masked for state restore */
-	VM_OFF;
-	tophys(r1,r1);
-	RESTORE_REGS_RTBD;
-	addik	r1, r1, PT_SIZE		/* Clean up stack space.  */
-
-	tovirt(r1,r1);
-6:
-EXC_return:		/* Make global symbol for debugging */
-	rtbd	r14, 0;	/* Instructions to return from an IRQ */
-	nop;
-
-/*
- * HW EXCEPTION rutine end
- */
-
-/*
- * Hardware maskable interrupts.
- *
- * The stack-pointer (r1) should have already been saved to the memory
- * location PER_CPU(ENTRY_SP).
- */
-C_ENTRY(_interrupt):
-/* MS: we are in physical address */
-/* Save registers, switch to proper stack, convert SP to virtual.*/
-	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP))
-	/* MS: See if already in kernel mode. */
-	mfs	r1, rmsr
-	nop
-	andi	r1, r1, MSR_UMS
-	bnei	r1, 1f
-
-/* Kernel-mode state save. */
-	lwi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP))
-	tophys(r1,r1); /* MS: I have in r1 physical address where stack is */
-	/* save registers */
-/* MS: Make room on the stack -> activation record */
-	addik	r1, r1, -PT_SIZE;
-	SAVE_REGS
-	brid	2f;
-	swi	r1, r1, PT_MODE; /* 0 - user mode, 1 - kernel mode */
-1:
-/* User-mode state save. */
- /* MS: get the saved current */
-	lwi	r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE));
-	tophys(r1,r1);
-	lwi	r1, r1, TS_THREAD_INFO;
-	addik	r1, r1, THREAD_SIZE;
-	tophys(r1,r1);
-	/* save registers */
-	addik	r1, r1, -PT_SIZE;
-	SAVE_REGS
-	/* calculate mode */
-	swi	r0, r1, PT_MODE;
-	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP));
-	swi	r11, r1, PT_R1;
-	clear_ums;
-2:
-	lwi	CURRENT_TASK, r0, TOPHYS(PER_CPU(CURRENT_SAVE));
-	tovirt(r1,r1)
-	addik	r15, r0, irq_call;
-irq_call:rtbd	r0, do_IRQ;
-	addik	r5, r1, 0;
-
-/* MS: we are in virtual mode */
-ret_from_irq:
-	lwi	r11, r1, PT_MODE;
-	bnei	r11, 2f;
-
-1:
-	lwi	r11, CURRENT_TASK, TS_THREAD_INFO;
-	lwi	r19, r11, TI_FLAGS; /* MS: get flags from thread info */
-	andi	r11, r19, _TIF_NEED_RESCHED;
-	beqi	r11, 5f
-	bralid	r15, schedule;
-	nop; /* delay slot */
-	bri	1b
-
-    /* Maybe handle a signal */
-5:	andi	r11, r19, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME;
-	beqid	r11, no_intr_resched
-/* Handle a signal return; Pending signals should be in r18. */
-	addik	r5, r1, 0; /* Arg 1: struct pt_regs *regs */
-	bralid	r15, do_notify_resume;	/* Handle any signals */
-	addi	r6, r0, 0; /* Arg 2: int in_syscall */
-	bri	1b
-
-/* Finally, return to user state. */
-no_intr_resched:
-    /* Disable interrupts, we are now committed to the state restore */
-	disable_irq
-	swi	CURRENT_TASK, r0, PER_CPU(CURRENT_SAVE);
-	VM_OFF;
-	tophys(r1,r1);
-	RESTORE_REGS
-	addik	r1, r1, PT_SIZE /* MS: Clean up stack space. */
-	lwi	r1, r1, PT_R1 - PT_SIZE;
-	bri	6f;
-/* MS: Return to kernel state. */
-2:
-#ifdef CONFIG_PREEMPT
-	lwi	r11, CURRENT_TASK, TS_THREAD_INFO;
-	/* MS: get preempt_count from thread info */
-	lwi	r5, r11, TI_PREEMPT_COUNT;
-	bgti	r5, restore;
-
-	lwi	r5, r11, TI_FLAGS;		/* get flags in thread info */
-	andi	r5, r5, _TIF_NEED_RESCHED;
-	beqi	r5, restore /* if zero jump over */
-
-preempt:
-	/* interrupts are off that's why I am calling preempt_chedule_irq */
-	bralid	r15, preempt_schedule_irq
-	nop
-	lwi	r11, CURRENT_TASK, TS_THREAD_INFO;	/* get thread info */
-	lwi	r5, r11, TI_FLAGS;		/* get flags in thread info */
-	andi	r5, r5, _TIF_NEED_RESCHED;
-	bnei	r5, preempt /* if non zero jump to resched */
-restore:
-#endif
-	VM_OFF /* MS: turn off MMU */
-	tophys(r1,r1)
-	RESTORE_REGS
-	addik	r1, r1, PT_SIZE	/* MS: Clean up stack space. */
-	tovirt(r1,r1);
-6:
-IRQ_return: /* MS: Make global symbol for debugging */
-	rtid	r14, 0
-	nop
-
-/*
- * Debug trap for KGDB. Enter to _debug_exception by brki r16, 0x18
- * and call handling function with saved pt_regs
- */
-C_ENTRY(_debug_exception):
-	/* BIP bit is set on entry, no interrupts can occur */
-	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP))
-
-	mfs	r1, rmsr
-	nop
-	andi	r1, r1, MSR_UMS
-	bnei	r1, 1f
-/* MS: Kernel-mode state save - kgdb */
-	lwi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)); /* Reload kernel stack-ptr*/
-
-	/* BIP bit is set on entry, no interrupts can occur */
-	addik   r1, r1, CONFIG_KERNEL_BASE_ADDR - CONFIG_KERNEL_START - PT_SIZE;
-	SAVE_REGS;
-	/* save all regs to pt_reg structure */
-	swi	r0, r1, PT_R0;	/* R0 must be saved too */
-	swi	r14, r1, PT_R14	/* rewrite saved R14 value */
-	swi	r16, r1, PT_PC; /* PC and r16 are the same */
-	/* save special purpose registers to pt_regs */
-	mfs	r11, rear;
-	swi	r11, r1, PT_EAR;
-	mfs	r11, resr;
-	swi	r11, r1, PT_ESR;
-	mfs	r11, rfsr;
-	swi	r11, r1, PT_FSR;
-
-	/* stack pointer is in physical address at it is decrease
-	 * by PT_SIZE but we need to get correct R1 value */
-	addik   r11, r1, CONFIG_KERNEL_START - CONFIG_KERNEL_BASE_ADDR + PT_SIZE;
-	swi	r11, r1, PT_R1
-	/* MS: r31 - current pointer isn't changed */
-	tovirt(r1,r1)
-#ifdef CONFIG_KGDB
-	addi	r5, r1, 0 /* pass pt_reg address as the first arg */
-	addik	r15, r0, dbtrap_call; /* return address */
-	rtbd	r0, microblaze_kgdb_break
-	nop;
-#endif
-	/* MS: Place handler for brki from kernel space if KGDB is OFF.
-	 * It is very unlikely that another brki instruction is called. */
-	bri 0
-
-/* MS: User-mode state save - gdb */
-1:	lwi	r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */
-	tophys(r1,r1);
-	lwi	r1, r1, TS_THREAD_INFO;	/* get the thread info */
-	addik	r1, r1, THREAD_SIZE;	/* calculate kernel stack pointer */
-	tophys(r1,r1);
-
-	addik	r1, r1, -PT_SIZE; /* Make room on the stack.  */
-	SAVE_REGS;
-	swi	r16, r1, PT_PC;	/* Save LP */
-	swi	r0, r1, PT_MODE; /* Was in user-mode.  */
-	lwi	r11, r0, TOPHYS(PER_CPU(ENTRY_SP));
-	swi	r11, r1, PT_R1; /* Store user SP.  */
-	lwi	CURRENT_TASK, r0, TOPHYS(PER_CPU(CURRENT_SAVE));
-	tovirt(r1,r1)
-	set_vms;
-	addik	r5, r1, 0;
-	addik	r15, r0, dbtrap_call;
-dbtrap_call: /* Return point for kernel/user entry + 8 because of rtsd r15, 8 */
-	rtbd	r0, sw_exception
-	nop
-
-	/* MS: The first instruction for the second part of the gdb/kgdb */
-	set_bip; /* Ints masked for state restore */
-	lwi	r11, r1, PT_MODE;
-	bnei	r11, 2f;
-/* MS: Return to user space - gdb */
-1:
-	/* Get current task ptr into r11 */
-	lwi	r11, CURRENT_TASK, TS_THREAD_INFO;	/* get thread info */
-	lwi	r19, r11, TI_FLAGS;	/* get flags in thread info */
-	andi	r11, r19, _TIF_NEED_RESCHED;
-	beqi	r11, 5f;
-
-	/* Call the scheduler before returning from a syscall/trap. */
-	bralid	r15, schedule;	/* Call scheduler */
-	nop;				/* delay slot */
-	bri	1b
-
-	/* Maybe handle a signal */
-5:	andi	r11, r19, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME;
-	beqi	r11, 4f;		/* Signals to handle, handle them */
-
-	addik	r5, r1, 0;		/* Arg 1: struct pt_regs *regs */
-	bralid	r15, do_notify_resume;	/* Handle any signals */
-	addi  r6, r0, 0;	/* Arg 2: int in_syscall */
-	bri	1b
-
-/* Finally, return to user state.  */
-4:	swi	CURRENT_TASK, r0, PER_CPU(CURRENT_SAVE); /* save current */
-	VM_OFF;
-	tophys(r1,r1);
-	/* MS: Restore all regs */
-	RESTORE_REGS_RTBD
-	addik	r1, r1, PT_SIZE	 /* Clean up stack space */
-	lwi	r1, r1, PT_R1 - PT_SIZE; /* Restore user stack pointer */
-DBTRAP_return_user: /* MS: Make global symbol for debugging */
-	rtbd	r16, 0; /* MS: Instructions to return from a debug trap */
-	nop;
-
-/* MS: Return to kernel state - kgdb */
-2:	VM_OFF;
-	tophys(r1,r1);
-	/* MS: Restore all regs */
-	RESTORE_REGS_RTBD
-	lwi	r14, r1, PT_R14;
-	lwi	r16, r1, PT_PC;
-	addik	r1, r1, PT_SIZE; /* MS: Clean up stack space */
-	tovirt(r1,r1);
-DBTRAP_return_kernel: /* MS: Make global symbol for debugging */
-	rtbd	r16, 0; /* MS: Instructions to return from a debug trap */
-	nop;
-
-
-ENTRY(_switch_to)
-	/* prepare return value */
-	addk	r3, r0, CURRENT_TASK
-
-	/* save registers in cpu_context */
-	/* use r11 and r12, volatile registers, as temp register */
-	/* give start of cpu_context for previous process */
-	addik	r11, r5, TI_CPU_CONTEXT
-	swi	r1, r11, CC_R1
-	swi	r2, r11, CC_R2
-	/* skip volatile registers.
-	 * they are saved on stack when we jumped to _switch_to() */
-	/* dedicated registers */
-	swi	r13, r11, CC_R13
-	swi	r14, r11, CC_R14
-	swi	r15, r11, CC_R15
-	swi	r16, r11, CC_R16
-	swi	r17, r11, CC_R17
-	swi	r18, r11, CC_R18
-	/* save non-volatile registers */
-	swi	r19, r11, CC_R19
-	swi	r20, r11, CC_R20
-	swi	r21, r11, CC_R21
-	swi	r22, r11, CC_R22
-	swi	r23, r11, CC_R23
-	swi	r24, r11, CC_R24
-	swi	r25, r11, CC_R25
-	swi	r26, r11, CC_R26
-	swi	r27, r11, CC_R27
-	swi	r28, r11, CC_R28
-	swi	r29, r11, CC_R29
-	swi	r30, r11, CC_R30
-	/* special purpose registers */
-	mfs	r12, rmsr
-	swi	r12, r11, CC_MSR
-	mfs	r12, rear
-	swi	r12, r11, CC_EAR
-	mfs	r12, resr
-	swi	r12, r11, CC_ESR
-	mfs	r12, rfsr
-	swi	r12, r11, CC_FSR
-
-	/* update r31, the current-give me pointer to task which will be next */
-	lwi	CURRENT_TASK, r6, TI_TASK
-	/* stored it to current_save too */
-	swi	CURRENT_TASK, r0, PER_CPU(CURRENT_SAVE)
-
-	/* get new process' cpu context and restore */
-	/* give me start where start context of next task */
-	addik	r11, r6, TI_CPU_CONTEXT
-
-	/* non-volatile registers */
-	lwi	r30, r11, CC_R30
-	lwi	r29, r11, CC_R29
-	lwi	r28, r11, CC_R28
-	lwi	r27, r11, CC_R27
-	lwi	r26, r11, CC_R26
-	lwi	r25, r11, CC_R25
-	lwi	r24, r11, CC_R24
-	lwi	r23, r11, CC_R23
-	lwi	r22, r11, CC_R22
-	lwi	r21, r11, CC_R21
-	lwi	r20, r11, CC_R20
-	lwi	r19, r11, CC_R19
-	/* dedicated registers */
-	lwi	r18, r11, CC_R18
-	lwi	r17, r11, CC_R17
-	lwi	r16, r11, CC_R16
-	lwi	r15, r11, CC_R15
-	lwi	r14, r11, CC_R14
-	lwi	r13, r11, CC_R13
-	/* skip volatile registers */
-	lwi	r2, r11, CC_R2
-	lwi	r1, r11, CC_R1
-
-	/* special purpose registers */
-	lwi	r12, r11, CC_FSR
-	mts	rfsr, r12
-	lwi	r12, r11, CC_MSR
-	mts	rmsr, r12
-
-	rtsd	r15, 8
-	nop
-
-ENTRY(_reset)
-	brai	0; /* Jump to reset vector */
-
-	/* These are compiled and loaded into high memory, then
-	 * copied into place in mach_early_setup */
-	.section	.init.ivt, "ax"
-#if CONFIG_MANUAL_RESET_VECTOR
-	.org	0x0
-	brai	CONFIG_MANUAL_RESET_VECTOR
-#endif
-	.org	0x8
-	brai	TOPHYS(_user_exception); /* syscall handler */
-	.org	0x10
-	brai	TOPHYS(_interrupt);	/* Interrupt handler */
-	.org	0x18
-	brai	TOPHYS(_debug_exception);	/* debug trap handler */
-	.org	0x20
-	brai	TOPHYS(_hw_exception_handler);	/* HW exception handler */
-
-.section .rodata,"a"
-#include "syscall_table.S"
-
-syscall_table_size=(.-sys_call_table)
-
-type_SYSCALL:
-	.ascii "SYSCALL\0"
-type_IRQ:
-	.ascii "IRQ\0"
-type_IRQ_PREEMPT:
-	.ascii "IRQ (PREEMPTED)\0"
-type_SYSCALL_PREEMPT:
-	.ascii " SYSCALL (PREEMPTED)\0"
-
-	/*
-	 * Trap decoding for stack unwinder
-	 * Tuples are (start addr, end addr, string)
-	 * If return address lies on [start addr, end addr],
-	 * unwinder displays 'string'
-	 */
-
-	.align 4
-.global microblaze_trap_handlers
-microblaze_trap_handlers:
-	/* Exact matches come first */
-	.word ret_from_trap; .word ret_from_trap   ; .word type_SYSCALL
-	.word ret_from_irq ; .word ret_from_irq    ; .word type_IRQ
-	/* Fuzzy matches go here */
-	.word ret_from_irq ; .word no_intr_resched ; .word type_IRQ_PREEMPT
-	.word ret_from_trap; .word TRAP_return     ; .word type_SYSCALL_PREEMPT
-	/* End of table */
-	.word 0               ; .word 0               ; .word 0
diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S
deleted file mode 100644
index f264fdcf152aa85ea86ea66016851172099300f3..0000000000000000000000000000000000000000
--- a/arch/microblaze/kernel/head.S
+++ /dev/null
@@ -1,386 +0,0 @@
-/*
- * Copyright (C) 2007-2009 Michal Simek <monstr@monstr.eu>
- * Copyright (C) 2007-2009 PetaLogix
- * Copyright (C) 2006 Atmark Techno, Inc.
- *
- * MMU code derived from arch/ppc/kernel/head_4xx.S:
- *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
- *      Initial PowerPC version.
- *    Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
- *      Rewritten for PReP
- *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
- *      Low-level exception handers, MMU support, and rewrite.
- *    Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
- *      PowerPC 8xx modifications.
- *    Copyright (c) 1998-1999 TiVo, Inc.
- *      PowerPC 403GCX modifications.
- *    Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
- *      PowerPC 403GCX/405GP modifications.
- *    Copyright 2000 MontaVista Software Inc.
- *	PPC405 modifications
- *      PowerPC 403GCX/405GP modifications.
- * 	Author: MontaVista Software, Inc.
- *         	frank_rowand@mvista.com or source@mvista.com
- * 	   	debbie_chu@mvista.com
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <asm/page.h>
-#include <linux/of_fdt.h>		/* for OF_DT_HEADER */
-
-#ifdef CONFIG_MMU
-#include <asm/setup.h> /* COMMAND_LINE_SIZE */
-#include <asm/mmu.h>
-#include <asm/processor.h>
-
-.section .data
-.global empty_zero_page
-.align 12
-empty_zero_page:
-	.space	PAGE_SIZE
-.global swapper_pg_dir
-swapper_pg_dir:
-	.space	PAGE_SIZE
-
-#endif /* CONFIG_MMU */
-
-.section .rodata
-.align 4
-endian_check:
-	.word	1
-
-	__HEAD
-ENTRY(_start)
-#if CONFIG_KERNEL_BASE_ADDR == 0
-	brai	TOPHYS(real_start)
-	.org	0x100
-real_start:
-#endif
-
-	mts	rmsr, r0
-/* Disable stack protection from bootloader */
-	mts	rslr, r0
-	addi	r8, r0, 0xFFFFFFFF
-	mts	rshr, r8
-/*
- * According to Xilinx, msrclr instruction behaves like 'mfs rX,rpc'
- * if the msrclr instruction is not enabled. We use this to detect
- * if the opcode is available, by issuing msrclr and then testing the result.
- * r8 == 0 - msr instructions are implemented
- * r8 != 0 - msr instructions are not implemented
- */
-	mfs	r1, rmsr
-	msrclr	r8, 0 /* clear nothing - just read msr for test */
-	cmpu	r8, r8, r1 /* r1 must contain msr reg content */
-
-/* r7 may point to an FDT, or there may be one linked in.
-   if it's in r7, we've got to save it away ASAP.
-   We ensure r7 points to a valid FDT, just in case the bootloader
-   is broken or non-existent */
-	beqi	r7, no_fdt_arg			/* NULL pointer?  don't copy */
-/* Does r7 point to a valid FDT? Load HEADER magic number */
-	/* Run time Big/Little endian platform */
-	/* Save 1 as word and load byte - 0 - BIG, 1 - LITTLE */
-	lbui	r11, r0, TOPHYS(endian_check)
-	beqid	r11, big_endian /* DO NOT break delay stop dependency */
-	lw	r11, r0, r7 /* Big endian load in delay slot */
-	lwr	r11, r0, r7 /* Little endian load */
-big_endian:
-	rsubi	r11, r11, OF_DT_HEADER	/* Check FDT header */
-	beqi	r11, _prepare_copy_fdt
-	or	r7, r0, r0		/* clear R7 when not valid DTB */
-	bnei	r11, no_fdt_arg			/* No - get out of here */
-_prepare_copy_fdt:
-	or	r11, r0, r0 /* incremment */
-	ori	r4, r0, TOPHYS(_fdt_start)
-	ori	r3, r0, (0x8000 - 4)
-_copy_fdt:
-	lw	r12, r7, r11 /* r12 = r7 + r11 */
-	sw	r12, r4, r11 /* addr[r4 + r11] = r12 */
-	addik	r11, r11, 4 /* increment counting */
-	bgtid	r3, _copy_fdt /* loop for all entries */
-	addik	r3, r3, -4 /* descrement loop */
-no_fdt_arg:
-
-#ifdef CONFIG_MMU
-
-#ifndef CONFIG_CMDLINE_BOOL
-/*
- * handling command line
- * copy command line directly to cmd_line placed in data section.
- */
-	beqid	r5, skip	/* Skip if NULL pointer */
-	or	r11, r0, r0		/* incremment */
-	ori	r4, r0, cmd_line	/* load address of command line */
-	tophys(r4,r4)			/* convert to phys address */
-	ori	r3, r0, COMMAND_LINE_SIZE - 1 /* number of loops */
-_copy_command_line:
-	/* r2=r5+r6 - r5 contain pointer to command line */
-	lbu	r2, r5, r11
-	beqid	r2, skip		/* Skip if no data */
-	sb	r2, r4, r11		/* addr[r4+r6]= r2 */
-	addik	r11, r11, 1		/* increment counting */
-	bgtid	r3, _copy_command_line	/* loop for all entries       */
-	addik	r3, r3, -1		/* decrement loop */
-	addik	r5, r4, 0		/* add new space for command line */
-	tovirt(r5,r5)
-skip:
-#endif /* CONFIG_CMDLINE_BOOL */
-
-#ifdef NOT_COMPILE
-/* save bram context */
-	or	r11, r0, r0				/* incremment */
-	ori	r4, r0, TOPHYS(_bram_load_start)	/* save bram context */
-	ori	r3, r0, (LMB_SIZE - 4)
-_copy_bram:
-	lw	r7, r0, r11		/* r7 = r0 + r6 */
-	sw	r7, r4, r11		/* addr[r4 + r6] = r7 */
-	addik	r11, r11, 4		/* increment counting */
-	bgtid	r3, _copy_bram		/* loop for all entries */
-	addik	r3, r3, -4		/* descrement loop */
-#endif
-	/* We have to turn on the MMU right away. */
-
-	/*
-	 * Set up the initial MMU state so we can do the first level of
-	 * kernel initialization.  This maps the first 16 MBytes of memory 1:1
-	 * virtual to physical.
-	 */
-	nop
-	addik	r3, r0, MICROBLAZE_TLB_SIZE -1	/* Invalidate all TLB entries */
-_invalidate:
-	mts	rtlbx, r3
-	mts	rtlbhi, r0			/* flush: ensure V is clear   */
-	mts	rtlblo, r0
-	bgtid	r3, _invalidate		/* loop for all entries       */
-	addik	r3, r3, -1
-	/* sync */
-
-	/* Setup the kernel PID */
-	mts	rpid,r0			/* Load the kernel PID */
-	nop
-	bri	4
-
-	/*
-	 * We should still be executing code at physical address area
-	 * RAM_BASEADDR at this point. However, kernel code is at
-	 * a virtual address. So, set up a TLB mapping to cover this once
-	 * translation is enabled.
-	 */
-
-	addik	r3,r0, CONFIG_KERNEL_START /* Load the kernel virtual address */
-	tophys(r4,r3)			/* Load the kernel physical address */
-
-	/* start to do TLB calculation */
-	addik	r12, r0, _end
-	rsub	r12, r3, r12
-	addik	r12, r12, CONFIG_LOWMEM_SIZE >> PTE_SHIFT /* that's the pad */
-
-	or r9, r0, r0 /* TLB0 = 0 */
-	or r10, r0, r0 /* TLB1 = 0 */
-
-	addik	r11, r12, -0x1000000
-	bgei	r11, GT16 /* size is greater than 16MB */
-	addik	r11, r12, -0x0800000
-	bgei	r11, GT8 /* size is greater than 8MB */
-	addik	r11, r12, -0x0400000
-	bgei	r11, GT4 /* size is greater than 4MB */
-	/* size is less than 4MB */
-	addik	r11, r12, -0x0200000
-	bgei	r11, GT2 /* size is greater than 2MB */
-	addik	r9, r0, 0x0100000 /* TLB0 must be 1MB */
-	addik	r11, r12, -0x0100000
-	bgei	r11, GT1 /* size is greater than 1MB */
-	/* TLB1 is 0 which is setup above */
-	bri tlb_end
-GT4: /* r11 contains the rest - will be either 1 or 4 */
-	ori r9, r0, 0x400000 /* TLB0 is 4MB */
-	bri TLB1
-GT16: /* TLB0 is 16MB */
-	addik	r9, r0, 0x1000000 /* means TLB0 is 16MB */
-TLB1:
-	/* must be used r2 because of subtract if failed */
-	addik	r2, r11, -0x0400000
-	bgei	r2, GT20 /* size is greater than 16MB */
-	/* size is >16MB and <20MB */
-	addik	r11, r11, -0x0100000
-	bgei	r11, GT17 /* size is greater than 17MB */
-	/* kernel is >16MB and < 17MB */
-GT1:
-	addik	r10, r0, 0x0100000 /* means TLB1 is 1MB */
-	bri tlb_end
-GT2: /* TLB0 is 0 and TLB1 will be 4MB */
-GT17: /* TLB1 is 4MB - kernel size <20MB */
-	addik	r10, r0, 0x0400000 /* means TLB1 is 4MB */
-	bri tlb_end
-GT8: /* TLB0 is still zero that's why I can use only TLB1 */
-GT20: /* TLB1 is 16MB - kernel size >20MB */
-	addik	r10, r0, 0x1000000 /* means TLB1 is 16MB */
-tlb_end:
-
-	/*
-	 * Configure and load two entries into TLB slots 0 and 1.
-	 * In case we are pinning TLBs, these are reserved in by the
-	 * other TLB functions.  If not reserving, then it doesn't
-	 * matter where they are loaded.
-	 */
-	andi	r4,r4,0xfffffc00	/* Mask off the real page number */
-	ori	r4,r4,(TLB_WR | TLB_EX)	/* Set the write and execute bits */
-
-	/*
-	 * TLB0 is always used - check if is not zero (r9 stores TLB0 value)
-	 * if is use TLB1 value and clear it (r10 stores TLB1 value)
-	 */
-	bnei	r9, tlb0_not_zero
-	add	r9, r10, r0
-	add	r10, r0, r0
-tlb0_not_zero:
-
-	/* look at the code below */
-	ori	r30, r0, 0x200
-	andi	r29, r9, 0x100000
-	bneid	r29, 1f
-	addik	r30, r30, 0x80
-	andi	r29, r9, 0x400000
-	bneid	r29, 1f
-	addik	r30, r30, 0x80
-	andi	r29, r9, 0x1000000
-	bneid	r29, 1f
-	addik	r30, r30, 0x80
-1:
-	andi	r3,r3,0xfffffc00	/* Mask off the effective page number */
-	ori	r3,r3,(TLB_VALID)
-	or	r3, r3, r30
-
-	/* Load tlb_skip size value which is index to first unused TLB entry */
-	lwi	r11, r0, TOPHYS(tlb_skip)
-	mts     rtlbx,r11		/* TLB slow 0 */
-
-	mts	rtlblo,r4		/* Load the data portion of the entry */
-	mts	rtlbhi,r3		/* Load the tag portion of the entry */
-
-	/* Increase tlb_skip size */
-	addik	r11, r11, 1
-	swi	r11, r0, TOPHYS(tlb_skip)
-
-	/* TLB1 can be zeroes that's why we not setup it */
-	beqi	r10, jump_over2
-
-	/* look at the code below */
-	ori	r30, r0, 0x200
-	andi	r29, r10, 0x100000
-	bneid	r29, 1f
-	addik	r30, r30, 0x80
-	andi	r29, r10, 0x400000
-	bneid	r29, 1f
-	addik	r30, r30, 0x80
-	andi	r29, r10, 0x1000000
-	bneid	r29, 1f
-	addik	r30, r30, 0x80
-1:
-	addk	r4, r4, r9	/* previous addr + TLB0 size */
-	addk	r3, r3, r9
-
-	andi	r3,r3,0xfffffc00	/* Mask off the effective page number */
-	ori	r3,r3,(TLB_VALID)
-	or	r3, r3, r30
-
-	lwi	r11, r0, TOPHYS(tlb_skip)
-	mts     rtlbx, r11		/* r11 is used from TLB0 */
-
-	mts	rtlblo,r4		/* Load the data portion of the entry */
-	mts	rtlbhi,r3		/* Load the tag portion of the entry */
-
-	/* Increase tlb_skip size */
-	addik	r11, r11, 1
-	swi	r11, r0, TOPHYS(tlb_skip)
-
-jump_over2:
-	/*
-	 * Load a TLB entry for LMB, since we need access to
-	 * the exception vectors, using a 4k real==virtual mapping.
-	 */
-	/* Use temporary TLB_ID for LMB - clear this temporary mapping later */
-	ori	r11, r0, MICROBLAZE_LMB_TLB_ID
-	mts     rtlbx,r11
-
-	ori	r4,r0,(TLB_WR | TLB_EX)
-	ori	r3,r0,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K))
-
-	mts	rtlblo,r4		/* Load the data portion of the entry */
-	mts	rtlbhi,r3		/* Load the tag portion of the entry */
-
-	/*
-	 * We now have the lower 16 Meg of RAM mapped into TLB entries, and the
-	 * caches ready to work.
-	 */
-turn_on_mmu:
-	ori	r15,r0,start_here
-	ori	r4,r0,MSR_KERNEL_VMS
-	mts	rmsr,r4
-	nop
-	rted	r15,0			/* enables MMU */
-	nop
-
-start_here:
-#endif /* CONFIG_MMU */
-
-	/* Initialize small data anchors */
-	addik	r13, r0, _KERNEL_SDA_BASE_
-	addik	r2, r0, _KERNEL_SDA2_BASE_
-
-	/* Initialize stack pointer */
-	addik	r1, r0, init_thread_union + THREAD_SIZE - 4
-
-	/* Initialize r31 with current task address */
-	addik	r31, r0, init_task
-
-	addik	r11, r0, machine_early_init
-	brald	r15, r11
-	nop
-
-#ifndef CONFIG_MMU
-	addik	r15, r0, machine_halt
-	braid	start_kernel
-	nop
-#else
-	/*
-	 * Initialize the MMU.
-	 */
-	bralid	r15, mmu_init
-	nop
-
-	/* Go back to running unmapped so we can load up new values
-	 * and change to using our exception vectors.
-	 * On the MicroBlaze, all we invalidate the used TLB entries to clear
-	 * the old 16M byte TLB mappings.
-	 */
-	ori	r15,r0,TOPHYS(kernel_load_context)
-	ori	r4,r0,MSR_KERNEL
-	mts	rmsr,r4
-	nop
-	bri	4
-	rted	r15,0
-	nop
-
-	/* Load up the kernel context */
-kernel_load_context:
-	ori	r5, r0, MICROBLAZE_LMB_TLB_ID
-	mts     rtlbx,r5
-	nop
-	mts	rtlbhi,r0
-	nop
-	addi	r15, r0, machine_halt
-	ori	r17, r0, start_kernel
-	ori	r4, r0, MSR_KERNEL_VMS
-	mts	rmsr, r4
-	nop
-	rted	r17, 0		/* enable MMU and jump to start_kernel */
-	nop
-#endif /* CONFIG_MMU */
diff --git a/arch/microblaze/kernel/hw_exception_handler.S b/arch/microblaze/kernel/hw_exception_handler.S
deleted file mode 100644
index 95558f32d60a89ddc76a8732e06f8137f6430262..0000000000000000000000000000000000000000
--- a/arch/microblaze/kernel/hw_exception_handler.S
+++ /dev/null
@@ -1,1222 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Exception handling for Microblaze
- *
- * Rewriten interrupt handling
- *
- * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
- * Copyright (C) 2008-2009 PetaLogix
- *
- * uClinux customisation (C) 2005 John Williams
- *
- * MMU code derived from arch/ppc/kernel/head_4xx.S:
- *	Copyright (C) 1995-1996 Gary Thomas <gdt@linuxppc.org>
- *		Initial PowerPC version.
- *	Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
- *		Rewritten for PReP
- *	Copyright (C) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
- *		Low-level exception handers, MMU support, and rewrite.
- *	Copyright (C) 1997 Dan Malek <dmalek@jlc.net>
- *		PowerPC 8xx modifications.
- *	Copyright (C) 1998-1999 TiVo, Inc.
- *		PowerPC 403GCX modifications.
- *	Copyright (C) 1999 Grant Erickson <grant@lcse.umn.edu>
- *		PowerPC 403GCX/405GP modifications.
- *	Copyright 2000 MontaVista Software Inc.
- *		PPC405 modifications
- *	PowerPC 403GCX/405GP modifications.
- *		Author: MontaVista Software, Inc.
- *		frank_rowand@mvista.com or source@mvista.com
- *		debbie_chu@mvista.com
- *
- * Original code
- * Copyright (C) 2004 Xilinx, Inc.
- */
-
-/*
- * Here are the handlers which don't require enabling translation
- * and calling other kernel code thus we can keep their design very simple
- * and do all processing in real mode. All what they need is a valid current
- * (that is an issue for the CONFIG_REGISTER_TASK_PTR case)
- * This handlers use r3,r4,r5,r6 and optionally r[current] to work therefore
- * these registers are saved/restored
- * The handlers which require translation are in entry.S --KAA
- *
- * Microblaze HW Exception Handler
- * - Non self-modifying exception handler for the following exception conditions
- *   - Unalignment
- *   - Instruction bus error
- *   - Data bus error
- *   - Illegal instruction opcode
- *   - Divide-by-zero
- *
- *   - Privileged instruction exception (MMU)
- *   - Data storage exception (MMU)
- *   - Instruction storage exception (MMU)
- *   - Data TLB miss exception (MMU)
- *   - Instruction TLB miss exception (MMU)
- *
- * Note we disable interrupts during exception handling, otherwise we will
- * possibly get multiple re-entrancy if interrupt handles themselves cause
- * exceptions. JW
- */
-
-#include <asm/exceptions.h>
-#include <asm/unistd.h>
-#include <asm/page.h>
-
-#include <asm/entry.h>
-#include <asm/current.h>
-#include <linux/linkage.h>
-
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/signal.h>
-#include <asm/registers.h>
-#include <asm/asm-offsets.h>
-
-#undef DEBUG
-
-/* Helpful Macros */
-#define NUM_TO_REG(num)		r ## num
-
-#ifdef CONFIG_MMU
-	#define RESTORE_STATE			\
-		lwi	r5, r1, 0;		\
-		mts	rmsr, r5;		\
-		nop;				\
-		lwi	r3, r1, PT_R3;		\
-		lwi	r4, r1, PT_R4;		\
-		lwi	r5, r1, PT_R5;		\
-		lwi	r6, r1, PT_R6;		\
-		lwi	r11, r1, PT_R11;	\
-		lwi	r31, r1, PT_R31;	\
-		lwi	r1, r1, PT_R1;
-#endif /* CONFIG_MMU */
-
-#define LWREG_NOP			\
-	bri	ex_handler_unhandled;	\
-	nop;
-
-#define SWREG_NOP			\
-	bri	ex_handler_unhandled;	\
-	nop;
-
-/* FIXME this is weird - for noMMU kernel is not possible to use brid
- * instruction which can shorten executed time
- */
-
-/* r3 is the source */
-#define R3_TO_LWREG_V(regnum)				\
-	swi	r3, r1, 4 * regnum;				\
-	bri	ex_handler_done;
-
-/* r3 is the source */
-#define R3_TO_LWREG(regnum)				\
-	or	NUM_TO_REG (regnum), r0, r3;		\
-	bri	ex_handler_done;
-
-/* r3 is the target */
-#define SWREG_TO_R3_V(regnum)				\
-	lwi	r3, r1, 4 * regnum;				\
-	bri	ex_sw_tail;
-
-/* r3 is the target */
-#define SWREG_TO_R3(regnum)				\
-	or	r3, r0, NUM_TO_REG (regnum);		\
-	bri	ex_sw_tail;
-
-#ifdef CONFIG_MMU
-	#define R3_TO_LWREG_VM_V(regnum)		\
-		brid	ex_lw_end_vm;			\
-		swi	r3, r7, 4 * regnum;
-
-	#define R3_TO_LWREG_VM(regnum)			\
-		brid	ex_lw_end_vm;			\
-		or	NUM_TO_REG (regnum), r0, r3;
-
-	#define SWREG_TO_R3_VM_V(regnum)		\
-		brid	ex_sw_tail_vm;			\
-		lwi	r3, r7, 4 * regnum;
-
-	#define SWREG_TO_R3_VM(regnum)			\
-		brid	ex_sw_tail_vm;			\
-		or	r3, r0, NUM_TO_REG (regnum);
-
-	/* Shift right instruction depending on available configuration */
-	#if CONFIG_XILINX_MICROBLAZE0_USE_BARREL == 0
-	/* Only the used shift constants defined here - add more if needed */
-	#define BSRLI2(rD, rA)				\
-		srl rD, rA;		/* << 1 */	\
-		srl rD, rD;		/* << 2 */
-	#define BSRLI4(rD, rA)		\
-		BSRLI2(rD, rA);		\
-		BSRLI2(rD, rD)
-	#define BSRLI10(rD, rA)				\
-		srl rD, rA;		/* << 1 */	\
-		srl rD, rD;		/* << 2 */	\
-		srl rD, rD;		/* << 3 */	\
-		srl rD, rD;		/* << 4 */	\
-		srl rD, rD;		/* << 5 */	\
-		srl rD, rD;		/* << 6 */	\
-		srl rD, rD;		/* << 7 */	\
-		srl rD, rD;		/* << 8 */	\
-		srl rD, rD;		/* << 9 */	\
-		srl rD, rD		/* << 10 */
-	#define BSRLI20(rD, rA)		\
-		BSRLI10(rD, rA);	\
-		BSRLI10(rD, rD)
-
-	.macro	bsrli, rD, rA, IMM
-	.if (\IMM) == 2
-		BSRLI2(\rD, \rA)
-	.elseif (\IMM) == 10
-		BSRLI10(\rD, \rA)
-	.elseif (\IMM) == 12
-		BSRLI2(\rD, \rA)
-		BSRLI10(\rD, \rD)
-	.elseif (\IMM) == 14
-		BSRLI4(\rD, \rA)
-		BSRLI10(\rD, \rD)
-	.elseif (\IMM) == 20
-		BSRLI20(\rD, \rA)
-	.elseif (\IMM) == 24
-		BSRLI4(\rD, \rA)
-		BSRLI20(\rD, \rD)
-	.elseif (\IMM) == 28
-		BSRLI4(\rD, \rA)
-		BSRLI4(\rD, \rD)
-		BSRLI20(\rD, \rD)
-	.else
-	.error "BSRLI shift macros \IMM"
-	.endif
-	.endm
-	#endif
-
-#endif /* CONFIG_MMU */
-
-.extern other_exception_handler /* Defined in exception.c */
-
-/*
- * hw_exception_handler - Handler for exceptions
- *
- * Exception handler notes:
- * - Handles all exceptions
- * - Does not handle unaligned exceptions during load into r17, r1, r0.
- * - Does not handle unaligned exceptions during store from r17 (cannot be
- *   done) and r1 (slows down common case)
- *
- *  Relevant register structures
- *
- *  EAR - |----|----|----|----|----|----|----|----|
- *      - <  ##   32 bit faulting address     ##  >
- *
- *  ESR - |----|----|----|----|----| - | - |-----|-----|
- *      -                            W   S   REG   EXC
- *
- *
- * STACK FRAME STRUCTURE (for CONFIG_MMU=n)
- * ----------------------------------------
- *
- *      +-------------+         + 0
- *      |     MSR     |
- *      +-------------+         + 4
- *      |     r1      |
- *      |      .      |
- *      |      .      |
- *      |      .      |
- *      |      .      |
- *      |     r18     |
- *      +-------------+         + 76
- *      |      .      |
- *      |      .      |
- *
- * MMU kernel uses the same 'pt_pool_space' pointed space
- * which is used for storing register values - noMMu style was, that values were
- * stored in stack but in case of failure you lost information about register.
- * Currently you can see register value in memory in specific place.
- * In compare to with previous solution the speed should be the same.
- *
- * MMU exception handler has different handling compare to no MMU kernel.
- * Exception handler use jump table for directing of what happen. For MMU kernel
- * is this approach better because MMU relate exception are handled by asm code
- * in this file. In compare to with MMU expect of unaligned exception
- * is everything handled by C code.
- */
-
-/*
- * every of these handlers is entered having R3/4/5/6/11/current saved on stack
- * and clobbered so care should be taken to restore them if someone is going to
- * return from exception
- */
-
-/* wrappers to restore state before coming to entry.S */
-#ifdef CONFIG_MMU
-.section .data
-.align 4
-pt_pool_space:
-	.space	PT_SIZE
-
-#ifdef DEBUG
-/* Create space for exception counting. */
-.section .data
-.global exception_debug_table
-.align 4
-exception_debug_table:
-	/* Look at exception vector table. There is 32 exceptions * word size */
-	.space	(32 * 4)
-#endif /* DEBUG */
-
-.section .rodata
-.align 4
-_MB_HW_ExceptionVectorTable:
-/*  0 - Undefined */
-	.long	TOPHYS(ex_handler_unhandled)
-/*  1 - Unaligned data access exception */
-	.long	TOPHYS(handle_unaligned_ex)
-/*  2 - Illegal op-code exception */
-	.long	TOPHYS(full_exception_trapw)
-/*  3 - Instruction bus error exception */
-	.long	TOPHYS(full_exception_trapw)
-/*  4 - Data bus error exception */
-	.long	TOPHYS(full_exception_trapw)
-/*  5 - Divide by zero exception */
-	.long	TOPHYS(full_exception_trapw)
-/*  6 - Floating point unit exception */
-	.long	TOPHYS(full_exception_trapw)
-/*  7 - Privileged instruction exception */
-	.long	TOPHYS(full_exception_trapw)
-/*  8 - 15 - Undefined */
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-/* 16 - Data storage exception */
-	.long	TOPHYS(handle_data_storage_exception)
-/* 17 - Instruction storage exception */
-	.long	TOPHYS(handle_instruction_storage_exception)
-/* 18 - Data TLB miss exception */
-	.long	TOPHYS(handle_data_tlb_miss_exception)
-/* 19 - Instruction TLB miss exception */
-	.long	TOPHYS(handle_instruction_tlb_miss_exception)
-/* 20 - 31 - Undefined */
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-	.long	TOPHYS(ex_handler_unhandled)
-#endif
-
-.global _hw_exception_handler
-.section .text
-.align 4
-.ent _hw_exception_handler
-_hw_exception_handler:
-#ifndef CONFIG_MMU
-	addik	r1, r1, -(EX_HANDLER_STACK_SIZ); /* Create stack frame */
-#else
-	swi	r1, r0, TOPHYS(pt_pool_space + PT_R1); /* GET_SP */
-	/* Save date to kernel memory. Here is the problem
-	 * when you came from user space */
-	ori	r1, r0, TOPHYS(pt_pool_space);
-#endif
-	swi	r3, r1, PT_R3
-	swi	r4, r1, PT_R4
-	swi	r5, r1, PT_R5
-	swi	r6, r1, PT_R6
-
-#ifdef CONFIG_MMU
-	swi	r11, r1, PT_R11
-	swi	r31, r1, PT_R31
-	lwi	r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)) /* get saved current */
-#endif
-
-	mfs	r5, rmsr;
-	nop
-	swi	r5, r1, 0;
-	mfs	r4, resr
-	nop
-	mfs	r3, rear;
-	nop
-
-#ifndef CONFIG_MMU
-	andi	r5, r4, 0x1000;		/* Check ESR[DS] */
-	beqi	r5, not_in_delay_slot;	/* Branch if ESR[DS] not set */
-	mfs	r17, rbtr;	/* ESR[DS] set - return address in BTR */
-	nop
-not_in_delay_slot:
-	swi	r17, r1, PT_R17
-#endif
-
-	andi	r5, r4, 0x1F;		/* Extract ESR[EXC] */
-
-#ifdef CONFIG_MMU
-	/* Calculate exception vector offset = r5 << 2 */
-	addk	r6, r5, r5; /* << 1 */
-	addk	r6, r6, r6; /* << 2 */
-
-#ifdef DEBUG
-/* counting which exception happen */
-	lwi	r5, r0, TOPHYS(exception_debug_table)
-	addi	r5, r5, 1
-	swi	r5, r0, TOPHYS(exception_debug_table)
-	lwi	r5, r6, TOPHYS(exception_debug_table)
-	addi	r5, r5, 1
-	swi	r5, r6, TOPHYS(exception_debug_table)
-#endif
-/* end */
-	/* Load the HW Exception vector */
-	lwi	r6, r6, TOPHYS(_MB_HW_ExceptionVectorTable)
-	bra	r6
-
-full_exception_trapw:
-	RESTORE_STATE
-	bri	full_exception_trap
-#else
-	/* Exceptions enabled here. This will allow nested exceptions */
-	mfs	r6, rmsr;
-	nop
-	swi	r6, r1, 0; /* RMSR_OFFSET */
-	ori	r6, r6, 0x100; /* Turn ON the EE bit */
-	andi	r6, r6, ~2; /* Disable interrupts */
-	mts	rmsr, r6;
-	nop
-
-	xori	r6, r5, 1; /* 00001 = Unaligned Exception */
-	/* Jump to unalignment exception handler */
-	beqi	r6, handle_unaligned_ex;
-
-handle_other_ex: /* Handle Other exceptions here */
-	/* Save other volatiles before we make procedure calls below */
-	swi	r7, r1, PT_R7
-	swi	r8, r1, PT_R8
-	swi	r9, r1, PT_R9
-	swi	r10, r1, PT_R10
-	swi	r11, r1, PT_R11
-	swi	r12, r1, PT_R12
-	swi	r14, r1, PT_R14
-	swi	r15, r1, PT_R15
-	swi	r18, r1, PT_R18
-
-	or	r5, r1, r0
-	andi	r6, r4, 0x1F; /* Load ESR[EC] */
-	lwi	r7, r0, PER_CPU(KM) /* MS: saving current kernel mode to regs */
-	swi	r7, r1, PT_MODE
-	mfs	r7, rfsr
-	nop
-	addk	r8, r17, r0; /* Load exception address */
-	bralid	r15, full_exception; /* Branch to the handler */
-	nop;
-	mts	rfsr, r0;	/* Clear sticky fsr */
-	nop
-
-	/*
-	 * Trigger execution of the signal handler by enabling
-	 * interrupts and calling an invalid syscall.
-	 */
-	mfs	r5, rmsr;
-	nop
-	ori	r5, r5, 2;
-	mts	rmsr, r5; /* enable interrupt */
-	nop
-	addi	r12, r0, __NR_syscalls;
-	brki	r14, 0x08;
-	mfs	r5, rmsr; /* disable interrupt */
-	nop
-	andi	r5, r5, ~2;
-	mts	rmsr, r5;
-	nop
-
-	lwi	r7, r1, PT_R7
-	lwi	r8, r1, PT_R8
-	lwi	r9, r1, PT_R9
-	lwi	r10, r1, PT_R10
-	lwi	r11, r1, PT_R11
-	lwi	r12, r1, PT_R12
-	lwi	r14, r1, PT_R14
-	lwi	r15, r1, PT_R15
-	lwi	r18, r1, PT_R18
-
-	bri	ex_handler_done; /* Complete exception handling */
-#endif
-
-/* 0x01 - Unaligned data access exception
- * This occurs when a word access is not aligned on a word boundary,
- * or when a 16-bit access is not aligned on a 16-bit boundary.
- * This handler perform the access, and returns, except for MMU when
- * the unaligned address is last on a 4k page or the physical address is
- * not found in the page table, in which case unaligned_data_trap is called.
- */
-handle_unaligned_ex:
-	/* Working registers already saved: R3, R4, R5, R6
-	 *  R4 = ESR
-	 *  R3 = EAR
-	 */
-#ifdef CONFIG_MMU
-	andi	r6, r4, 0x1000			/* Check ESR[DS] */
-	beqi	r6, _no_delayslot		/* Branch if ESR[DS] not set */
-	mfs	r17, rbtr;	/* ESR[DS] set - return address in BTR */
-	nop
-_no_delayslot:
-	/* jump to high level unaligned handler */
-	RESTORE_STATE;
-	bri	unaligned_data_trap
-#endif
-	andi	r6, r4, 0x3E0; /* Mask and extract the register operand */
-	srl	r6, r6; /* r6 >> 5 */
-	srl	r6, r6;
-	srl	r6, r6;
-	srl	r6, r6;
-	srl	r6, r6;
-	/* Store the register operand in a temporary location */
-	sbi	r6, r0, TOPHYS(ex_reg_op);
-
-	andi	r6, r4, 0x400; /* Extract ESR[S] */
-	bnei	r6, ex_sw;
-ex_lw:
-	andi	r6, r4, 0x800; /* Extract ESR[W] */
-	beqi	r6, ex_lhw;
-	lbui	r5, r3, 0; /* Exception address in r3 */
-	/* Load a word, byte-by-byte from destination address
-		and save it in tmp space */
-	sbi	r5, r0, TOPHYS(ex_tmp_data_loc_0);
-	lbui	r5, r3, 1;
-	sbi	r5, r0, TOPHYS(ex_tmp_data_loc_1);
-	lbui	r5, r3, 2;
-	sbi	r5, r0, TOPHYS(ex_tmp_data_loc_2);
-	lbui	r5, r3, 3;
-	sbi	r5, r0, TOPHYS(ex_tmp_data_loc_3);
-	/* Get the destination register value into r4 */
-	lwi	r4, r0, TOPHYS(ex_tmp_data_loc_0);
-	bri	ex_lw_tail;
-ex_lhw:
-	lbui	r5, r3, 0; /* Exception address in r3 */
-	/* Load a half-word, byte-by-byte from destination
-		address and save it in tmp space */
-	sbi	r5, r0, TOPHYS(ex_tmp_data_loc_0);
-	lbui	r5, r3, 1;
-	sbi	r5, r0, TOPHYS(ex_tmp_data_loc_1);
-	/* Get the destination register value into r4 */
-	lhui	r4, r0, TOPHYS(ex_tmp_data_loc_0);
-ex_lw_tail:
-	/* Get the destination register number into r5 */
-	lbui	r5, r0, TOPHYS(ex_reg_op);
-	/* Form load_word jump table offset (lw_table + (8 * regnum)) */
-	addik	r6, r0, TOPHYS(lw_table);
-	addk	r5, r5, r5;
-	addk	r5, r5, r5;
-	addk	r5, r5, r5;
-	addk	r5, r5, r6;
-	bra	r5;
-ex_lw_end: /* Exception handling of load word, ends */
-ex_sw:
-	/* Get the destination register number into r5 */
-	lbui	r5, r0, TOPHYS(ex_reg_op);
-	/* Form store_word jump table offset (sw_table + (8 * regnum)) */
-	addik	r6, r0, TOPHYS(sw_table);
-	add	r5, r5, r5;
-	add	r5, r5, r5;
-	add	r5, r5, r5;
-	add	r5, r5, r6;
-	bra	r5;
-ex_sw_tail:
-	mfs	r6, resr;
-	nop
-	andi	r6, r6, 0x800; /* Extract ESR[W] */
-	beqi	r6, ex_shw;
-	/* Get the word - delay slot */
-	swi	r4, r0, TOPHYS(ex_tmp_data_loc_0);
-	/* Store the word, byte-by-byte into destination address */
-	lbui	r4, r0, TOPHYS(ex_tmp_data_loc_0);
-	sbi	r4, r3, 0;
-	lbui	r4, r0, TOPHYS(ex_tmp_data_loc_1);
-	sbi	r4, r3, 1;
-	lbui	r4, r0, TOPHYS(ex_tmp_data_loc_2);
-	sbi	r4, r3, 2;
-	lbui	r4, r0, TOPHYS(ex_tmp_data_loc_3);
-	sbi	r4, r3, 3;
-	bri	ex_handler_done;
-
-ex_shw:
-	/* Store the lower half-word, byte-by-byte into destination address */
-	swi	r4, r0, TOPHYS(ex_tmp_data_loc_0);
-	lbui	r4, r0, TOPHYS(ex_tmp_data_loc_2);
-	sbi	r4, r3, 0;
-	lbui	r4, r0, TOPHYS(ex_tmp_data_loc_3);
-	sbi	r4, r3, 1;
-ex_sw_end: /* Exception handling of store word, ends. */
-
-ex_handler_done:
-#ifndef CONFIG_MMU
-	lwi	r5, r1, 0 /* RMSR */
-	mts	rmsr, r5
-	nop
-	lwi	r3, r1, PT_R3
-	lwi	r4, r1, PT_R4
-	lwi	r5, r1, PT_R5
-	lwi	r6, r1, PT_R6
-	lwi	r17, r1, PT_R17
-
-	rted	r17, 0
-	addik	r1, r1, (EX_HANDLER_STACK_SIZ); /* Restore stack frame */
-#else
-	RESTORE_STATE;
-	rted	r17, 0
-	nop
-#endif
-
-#ifdef CONFIG_MMU
-	/* Exception vector entry code. This code runs with address translation
-	 * turned off (i.e. using physical addresses). */
-
-	/* Exception vectors. */
-
-	/* 0x10 - Data Storage Exception
-	 * This happens for just a few reasons. U0 set (but we don't do that),
-	 * or zone protection fault (user violation, write to protected page).
-	 * If this is just an update of modified status, we do that quickly
-	 * and exit. Otherwise, we call heavyweight functions to do the work.
-	 */
-	handle_data_storage_exception:
-		/* Working registers already saved: R3, R4, R5, R6
-		 * R3 = ESR
-		 */
-		mfs	r11, rpid
-		nop
-		/* If we are faulting a kernel address, we have to use the
-		 * kernel page tables.
-		 */
-		ori	r5, r0, CONFIG_KERNEL_START
-		cmpu	r5, r3, r5
-		bgti	r5, ex3
-		/* First, check if it was a zone fault (which means a user
-		 * tried to access a kernel or read-protected page - always
-		 * a SEGV). All other faults here must be stores, so no
-		 * need to check ESR_S as well. */
-		andi	r4, r4, ESR_DIZ		/* ESR_Z - zone protection */
-		bnei	r4, ex2
-
-		ori	r4, r0, swapper_pg_dir
-		mts	rpid, r0		/* TLB will have 0 TID */
-		nop
-		bri	ex4
-
-		/* Get the PGD for the current thread. */
-	ex3:
-		/* First, check if it was a zone fault (which means a user
-		 * tried to access a kernel or read-protected page - always
-		 * a SEGV). All other faults here must be stores, so no
-		 * need to check ESR_S as well. */
-		andi	r4, r4, ESR_DIZ		/* ESR_Z */
-		bnei	r4, ex2
-		/* get current task address */
-		addi	r4 ,CURRENT_TASK, TOPHYS(0);
-		lwi	r4, r4, TASK_THREAD+PGDIR
-	ex4:
-		tophys(r4,r4)
-		/* Create L1 (pgdir/pmd) address */
-		bsrli	r5, r3, PGDIR_SHIFT - 2
-		andi	r5, r5, PAGE_SIZE - 4
-/* Assume pgdir aligned on 4K boundary, no need for "andi r4,r4,0xfffff003" */
-		or	r4, r4, r5
-		lwi	r4, r4, 0		/* Get L1 entry */
-		andi	r5, r4, PAGE_MASK /* Extract L2 (pte) base address */
-		beqi	r5, ex2			/* Bail if no table */
-
-		tophys(r5,r5)
-		bsrli	r6, r3, PTE_SHIFT /* Compute PTE address */
-		andi	r6, r6, PAGE_SIZE - 4
-		or	r5, r5, r6
-		lwi	r4, r5, 0		/* Get Linux PTE */
-
-		andi	r6, r4, _PAGE_RW	/* Is it writeable? */
-		beqi	r6, ex2			/* Bail if not */
-
-		/* Update 'changed' */
-		ori	r4, r4, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
-		swi	r4, r5, 0		/* Update Linux page table */
-
-		/* Most of the Linux PTE is ready to load into the TLB LO.
-		 * We set ZSEL, where only the LS-bit determines user access.
-		 * We set execute, because we don't have the granularity to
-		 * properly set this at the page level (Linux problem).
-		 * If shared is set, we cause a zero PID->TID load.
-		 * Many of these bits are software only. Bits we don't set
-		 * here we (properly should) assume have the appropriate value.
-		 */
-/* Ignore memory coherent, just LSB on ZSEL is used + EX/WR */
-		andi	r4, r4, PAGE_MASK | TLB_EX | TLB_WR | \
-						TLB_ZSEL(1) | TLB_ATTR_MASK
-		ori	r4, r4, _PAGE_HWEXEC	/* make it executable */
-
-		/* find the TLB index that caused the fault. It has to be here*/
-		mts	rtlbsx, r3
-		nop
-		mfs	r5, rtlbx		/* DEBUG: TBD */
-		nop
-		mts	rtlblo, r4		/* Load TLB LO */
-		nop
-						/* Will sync shadow TLBs */
-
-		/* Done...restore registers and get out of here. */
-		mts	rpid, r11
-		nop
-		bri 4
-
-		RESTORE_STATE;
-		rted	r17, 0
-		nop
-	ex2:
-		/* The bailout. Restore registers to pre-exception conditions
-		 * and call the heavyweights to help us out. */
-		mts	rpid, r11
-		nop
-		bri 4
-		RESTORE_STATE;
-		bri	page_fault_data_trap
-
-
-	/* 0x11 - Instruction Storage Exception
-	 * This is caused by a fetch from non-execute or guarded pages. */
-	handle_instruction_storage_exception:
-		/* Working registers already saved: R3, R4, R5, R6
-		 * R3 = ESR
-		 */
-
-		RESTORE_STATE;
-		bri	page_fault_instr_trap
-
-	/* 0x12 - Data TLB Miss Exception
-	 * As the name implies, translation is not in the MMU, so search the
-	 * page tables and fix it. The only purpose of this function is to
-	 * load TLB entries from the page table if they exist.
-	 */
-	handle_data_tlb_miss_exception:
-		/* Working registers already saved: R3, R4, R5, R6
-		 * R3 = EAR, R4 = ESR
-		 */
-		mfs	r11, rpid
-		nop
-
-		/* If we are faulting a kernel address, we have to use the
-		 * kernel page tables. */
-		ori	r6, r0, CONFIG_KERNEL_START
-		cmpu	r4, r3, r6
-		bgti	r4, ex5
-		ori	r4, r0, swapper_pg_dir
-		mts	rpid, r0		/* TLB will have 0 TID */
-		nop
-		bri	ex6
-
-		/* Get the PGD for the current thread. */
-	ex5:
-		/* get current task address */
-		addi	r4 ,CURRENT_TASK, TOPHYS(0);
-		lwi	r4, r4, TASK_THREAD+PGDIR
-	ex6:
-		tophys(r4,r4)
-		/* Create L1 (pgdir/pmd) address */
-		bsrli	r5, r3, PGDIR_SHIFT - 2
-		andi	r5, r5, PAGE_SIZE - 4
-/* Assume pgdir aligned on 4K boundary, no need for "andi r4,r4,0xfffff003" */
-		or	r4, r4, r5
-		lwi	r4, r4, 0		/* Get L1 entry */
-		andi	r5, r4, PAGE_MASK /* Extract L2 (pte) base address */
-		beqi	r5, ex7			/* Bail if no table */
-
-		tophys(r5,r5)
-		bsrli	r6, r3, PTE_SHIFT /* Compute PTE address */
-		andi	r6, r6, PAGE_SIZE - 4
-		or	r5, r5, r6
-		lwi	r4, r5, 0		/* Get Linux PTE */
-
-		andi	r6, r4, _PAGE_PRESENT
-		beqi	r6, ex7
-
-		ori	r4, r4, _PAGE_ACCESSED
-		swi	r4, r5, 0
-
-		/* Most of the Linux PTE is ready to load into the TLB LO.
-		 * We set ZSEL, where only the LS-bit determines user access.
-		 * We set execute, because we don't have the granularity to
-		 * properly set this at the page level (Linux problem).
-		 * If shared is set, we cause a zero PID->TID load.
-		 * Many of these bits are software only. Bits we don't set
-		 * here we (properly should) assume have the appropriate value.
-		 */
-		brid	finish_tlb_load
-		andi	r4, r4, PAGE_MASK | TLB_EX | TLB_WR | \
-						TLB_ZSEL(1) | TLB_ATTR_MASK
-	ex7:
-		/* The bailout. Restore registers to pre-exception conditions
-		 * and call the heavyweights to help us out.
-		 */
-		mts	rpid, r11
-		nop
-		bri	4
-		RESTORE_STATE;
-		bri	page_fault_data_trap
-
-	/* 0x13 - Instruction TLB Miss Exception
-	 * Nearly the same as above, except we get our information from
-	 * different registers and bailout to a different point.
-	 */
-	handle_instruction_tlb_miss_exception:
-		/* Working registers already saved: R3, R4, R5, R6
-		 *  R3 = ESR
-		 */
-		mfs	r11, rpid
-		nop
-
-		/* If we are faulting a kernel address, we have to use the
-		 * kernel page tables.
-		 */
-		ori	r4, r0, CONFIG_KERNEL_START
-		cmpu	r4, r3, r4
-		bgti	r4, ex8
-		ori	r4, r0, swapper_pg_dir
-		mts	rpid, r0		/* TLB will have 0 TID */
-		nop
-		bri	ex9
-
-		/* Get the PGD for the current thread. */
-	ex8:
-		/* get current task address */
-		addi	r4 ,CURRENT_TASK, TOPHYS(0);
-		lwi	r4, r4, TASK_THREAD+PGDIR
-	ex9:
-		tophys(r4,r4)
-		/* Create L1 (pgdir/pmd) address */
-		bsrli	r5, r3, PGDIR_SHIFT - 2
-		andi	r5, r5, PAGE_SIZE - 4
-/* Assume pgdir aligned on 4K boundary, no need for "andi r4,r4,0xfffff003" */
-		or	r4, r4, r5
-		lwi	r4, r4, 0		/* Get L1 entry */
-		andi	r5, r4, PAGE_MASK /* Extract L2 (pte) base address */
-		beqi	r5, ex10		/* Bail if no table */
-
-		tophys(r5,r5)
-		bsrli	r6, r3, PTE_SHIFT /* Compute PTE address */
-		andi	r6, r6, PAGE_SIZE - 4
-		or	r5, r5, r6
-		lwi	r4, r5, 0		/* Get Linux PTE */
-
-		andi	r6, r4, _PAGE_PRESENT
-		beqi	r6, ex10
-
-		ori	r4, r4, _PAGE_ACCESSED
-		swi	r4, r5, 0
-
-		/* Most of the Linux PTE is ready to load into the TLB LO.
-		 * We set ZSEL, where only the LS-bit determines user access.
-		 * We set execute, because we don't have the granularity to
-		 * properly set this at the page level (Linux problem).
-		 * If shared is set, we cause a zero PID->TID load.
-		 * Many of these bits are software only. Bits we don't set
-		 * here we (properly should) assume have the appropriate value.
-		 */
-		brid	finish_tlb_load
-		andi	r4, r4, PAGE_MASK | TLB_EX | TLB_WR | \
-						TLB_ZSEL(1) | TLB_ATTR_MASK
-	ex10:
-		/* The bailout. Restore registers to pre-exception conditions
-		 * and call the heavyweights to help us out.
-		 */
-		mts	rpid, r11
-		nop
-		bri 4
-		RESTORE_STATE;
-		bri	page_fault_instr_trap
-
-/* Both the instruction and data TLB miss get to this point to load the TLB.
- *	r3 - EA of fault
- *	r4 - TLB LO (info from Linux PTE)
- *	r5, r6 - available to use
- *	PID - loaded with proper value when we get here
- *	Upon exit, we reload everything and RFI.
- * A common place to load the TLB.
- */
-.section .data
-.align 4
-.global tlb_skip
-	tlb_skip:
-		.long	MICROBLAZE_TLB_SKIP
-	tlb_index:
-		/* MS: storing last used tlb index */
-		.long	MICROBLAZE_TLB_SIZE/2
-.previous
-	finish_tlb_load:
-		/* MS: load the last used TLB index. */
-		lwi	r5, r0, TOPHYS(tlb_index)
-		addik	r5, r5, 1 /* MS: inc tlb_index -> use next one */
-
-/* MS: FIXME this is potential fault, because this is mask not count */
-		andi	r5, r5, MICROBLAZE_TLB_SIZE - 1
-		ori	r6, r0, 1
-		cmp	r31, r5, r6
-		blti	r31, ex12
-		lwi	r5, r0, TOPHYS(tlb_skip)
-	ex12:
-		/* MS: save back current TLB index */
-		swi	r5, r0, TOPHYS(tlb_index)
-
-		ori	r4, r4, _PAGE_HWEXEC	/* make it executable */
-		mts	rtlbx, r5		/* MS: save current TLB */
-		nop
-		mts	rtlblo,	r4		/* MS: save to TLB LO */
-		nop
-
-		/* Create EPN. This is the faulting address plus a static
-		 * set of bits. These are size, valid, E, U0, and ensure
-		 * bits 20 and 21 are zero.
-		 */
-		andi	r3, r3, PAGE_MASK
-#ifdef CONFIG_MICROBLAZE_64K_PAGES
-		ori	r3, r3, TLB_VALID | TLB_PAGESZ(PAGESZ_64K)
-#elif CONFIG_MICROBLAZE_16K_PAGES
-		ori	r3, r3, TLB_VALID | TLB_PAGESZ(PAGESZ_16K)
-#else
-		ori	r3, r3, TLB_VALID | TLB_PAGESZ(PAGESZ_4K)
-#endif
-		mts	rtlbhi,	r3		/* Load TLB HI */
-		nop
-
-		/* Done...restore registers and get out of here. */
-		mts	rpid, r11
-		nop
-		bri 4
-		RESTORE_STATE;
-		rted	r17, 0
-		nop
-
-	/* extern void giveup_fpu(struct task_struct *prev)
-	 *
-	 * The MicroBlaze processor may have an FPU, so this should not just
-	 * return: TBD.
-	 */
-	.globl giveup_fpu;
-	.align 4;
-	giveup_fpu:
-		bralid	r15,0			/* TBD */
-		nop
-
-	/* At present, this routine just hangs. - extern void abort(void) */
-	.globl abort;
-	.align 4;
-	abort:
-		br	r0
-
-	.globl set_context;
-	.align 4;
-	set_context:
-		mts	rpid, r5	/* Shadow TLBs are automatically */
-		nop
-		bri	4		/* flushed by changing PID */
-		rtsd	r15,8
-		nop
-
-#endif
-.end _hw_exception_handler
-
-#ifdef CONFIG_MMU
-/* Unaligned data access exception last on a 4k page for MMU.
- * When this is called, we are in virtual mode with exceptions enabled
- * and registers 1-13,15,17,18 saved.
- *
- * R3 = ESR
- * R4 = EAR
- * R7 = pointer to saved registers (struct pt_regs *regs)
- *
- * This handler perform the access, and returns via ret_from_exc.
- */
-.global _unaligned_data_exception
-.ent _unaligned_data_exception
-_unaligned_data_exception:
-	andi	r8, r3, 0x3E0;	/* Mask and extract the register operand */
-	bsrli   r8, r8, 2;		/* r8 >> 2 = register operand * 8 */
-	andi	r6, r3, 0x400;	/* Extract ESR[S] */
-	bneid	r6, ex_sw_vm;
-	andi	r6, r3, 0x800;	/* Extract ESR[W] - delay slot */
-ex_lw_vm:
-	beqid	r6, ex_lhw_vm;
-load1:	lbui	r5, r4, 0;	/* Exception address in r4 - delay slot */
-/* Load a word, byte-by-byte from destination address and save it in tmp space*/
-	addik	r6, r0, ex_tmp_data_loc_0;
-	sbi	r5, r6, 0;
-load2:	lbui	r5, r4, 1;
-	sbi	r5, r6, 1;
-load3:	lbui	r5, r4, 2;
-	sbi	r5, r6, 2;
-load4:	lbui	r5, r4, 3;
-	sbi	r5, r6, 3;
-	brid	ex_lw_tail_vm;
-/* Get the destination register value into r3 - delay slot */
-	lwi	r3, r6, 0;
-ex_lhw_vm:
-	/* Load a half-word, byte-by-byte from destination address and
-	 * save it in tmp space */
-	addik	r6, r0, ex_tmp_data_loc_0;
-	sbi	r5, r6, 0;
-load5:	lbui	r5, r4, 1;
-	sbi	r5, r6, 1;
-	lhui	r3, r6, 0;	/* Get the destination register value into r3 */
-ex_lw_tail_vm:
-	/* Form load_word jump table offset (lw_table_vm + (8 * regnum)) */
-	addik	r5, r8, lw_table_vm;
-	bra	r5;
-ex_lw_end_vm:			/* Exception handling of load word, ends */
-	brai	ret_from_exc;
-ex_sw_vm:
-/* Form store_word jump table offset (sw_table_vm + (8 * regnum)) */
-	addik	r5, r8, sw_table_vm;
-	bra	r5;
-ex_sw_tail_vm:
-	addik	r5, r0, ex_tmp_data_loc_0;
-	beqid	r6, ex_shw_vm;
-	swi	r3, r5, 0;	/* Get the word - delay slot */
-	/* Store the word, byte-by-byte into destination address */
-	lbui	r3, r5, 0;
-store1:	sbi	r3, r4, 0;
-	lbui	r3, r5, 1;
-store2:	sbi	r3, r4, 1;
-	lbui	r3, r5, 2;
-store3:	sbi	r3, r4, 2;
-	lbui	r3, r5, 3;
-	brid	ret_from_exc;
-store4:	sbi	r3, r4, 3;	/* Delay slot */
-ex_shw_vm:
-	/* Store the lower half-word, byte-by-byte into destination address */
-#ifdef __MICROBLAZEEL__
-	lbui	r3, r5, 0;
-store5:	sbi	r3, r4, 0;
-	lbui	r3, r5, 1;
-	brid	ret_from_exc;
-store6:	sbi	r3, r4, 1;	/* Delay slot */
-#else
-	lbui	r3, r5, 2;
-store5:	sbi	r3, r4, 0;
-	lbui	r3, r5, 3;
-	brid	ret_from_exc;
-store6:	sbi	r3, r4, 1;	/* Delay slot */
-#endif
-
-ex_sw_end_vm:			/* Exception handling of store word, ends. */
-
-/* We have to prevent cases that get/put_user macros get unaligned pointer
- * to bad page area. We have to find out which origin instruction caused it
- * and called fixup for that origin instruction not instruction in unaligned
- * handler */
-ex_unaligned_fixup:
-	ori	r5, r7, 0 /* setup pointer to pt_regs */
-	lwi	r6, r7, PT_PC; /* faulting address is one instruction above */
-	addik	r6, r6, -4 /* for finding proper fixup */
-	swi	r6, r7, PT_PC; /* a save back it to PT_PC */
-	addik	r7, r0, SIGSEGV
-	/* call bad_page_fault for finding aligned fixup, fixup address is saved
-	 * in PT_PC which is used as return address from exception */
-	addik	r15, r0, ret_from_exc-8 /* setup return address */
-	brid	bad_page_fault
-	nop
-
-/* We prevent all load/store because it could failed any attempt to access */
-.section __ex_table,"a";
-	.word	load1,ex_unaligned_fixup;
-	.word	load2,ex_unaligned_fixup;
-	.word	load3,ex_unaligned_fixup;
-	.word	load4,ex_unaligned_fixup;
-	.word	load5,ex_unaligned_fixup;
-	.word	store1,ex_unaligned_fixup;
-	.word	store2,ex_unaligned_fixup;
-	.word	store3,ex_unaligned_fixup;
-	.word	store4,ex_unaligned_fixup;
-	.word	store5,ex_unaligned_fixup;
-	.word	store6,ex_unaligned_fixup;
-.previous;
-.end _unaligned_data_exception
-#endif /* CONFIG_MMU */
-
-.global ex_handler_unhandled
-ex_handler_unhandled:
-/* FIXME add handle function for unhandled exception - dump register */
-	bri 0
-
-/*
- * hw_exception_handler Jump Table
- * - Contains code snippets for each register that caused the unalign exception
- * - Hence exception handler is NOT self-modifying
- * - Separate table for load exceptions and store exceptions.
- * - Each table is of size: (8 * 32) = 256 bytes
- */
-
-.section .text
-.align 4
-lw_table:
-lw_r0:		R3_TO_LWREG	(0);
-lw_r1:		LWREG_NOP;
-lw_r2:		R3_TO_LWREG	(2);
-lw_r3:		R3_TO_LWREG_V	(3);
-lw_r4:		R3_TO_LWREG_V	(4);
-lw_r5:		R3_TO_LWREG_V	(5);
-lw_r6:		R3_TO_LWREG_V	(6);
-lw_r7:		R3_TO_LWREG	(7);
-lw_r8:		R3_TO_LWREG	(8);
-lw_r9:		R3_TO_LWREG	(9);
-lw_r10:		R3_TO_LWREG	(10);
-lw_r11:		R3_TO_LWREG	(11);
-lw_r12:		R3_TO_LWREG	(12);
-lw_r13:		R3_TO_LWREG	(13);
-lw_r14:		R3_TO_LWREG	(14);
-lw_r15:		R3_TO_LWREG	(15);
-lw_r16:		R3_TO_LWREG	(16);
-lw_r17:		LWREG_NOP;
-lw_r18:		R3_TO_LWREG	(18);
-lw_r19:		R3_TO_LWREG	(19);
-lw_r20:		R3_TO_LWREG	(20);
-lw_r21:		R3_TO_LWREG	(21);
-lw_r22:		R3_TO_LWREG	(22);
-lw_r23:		R3_TO_LWREG	(23);
-lw_r24:		R3_TO_LWREG	(24);
-lw_r25:		R3_TO_LWREG	(25);
-lw_r26:		R3_TO_LWREG	(26);
-lw_r27:		R3_TO_LWREG	(27);
-lw_r28:		R3_TO_LWREG	(28);
-lw_r29:		R3_TO_LWREG	(29);
-lw_r30:		R3_TO_LWREG	(30);
-#ifdef CONFIG_MMU
-lw_r31: 	R3_TO_LWREG_V	(31);
-#else
-lw_r31:		R3_TO_LWREG	(31);
-#endif
-
-sw_table:
-sw_r0:		SWREG_TO_R3	(0);
-sw_r1:		SWREG_NOP;
-sw_r2:		SWREG_TO_R3	(2);
-sw_r3:		SWREG_TO_R3_V	(3);
-sw_r4:		SWREG_TO_R3_V	(4);
-sw_r5:		SWREG_TO_R3_V	(5);
-sw_r6:		SWREG_TO_R3_V	(6);
-sw_r7:		SWREG_TO_R3	(7);
-sw_r8:		SWREG_TO_R3	(8);
-sw_r9:		SWREG_TO_R3	(9);
-sw_r10:		SWREG_TO_R3	(10);
-sw_r11:		SWREG_TO_R3	(11);
-sw_r12:		SWREG_TO_R3	(12);
-sw_r13:		SWREG_TO_R3	(13);
-sw_r14:		SWREG_TO_R3	(14);
-sw_r15:		SWREG_TO_R3	(15);
-sw_r16:		SWREG_TO_R3	(16);
-sw_r17:		SWREG_NOP;
-sw_r18:		SWREG_TO_R3	(18);
-sw_r19:		SWREG_TO_R3	(19);
-sw_r20:		SWREG_TO_R3	(20);
-sw_r21:		SWREG_TO_R3	(21);
-sw_r22:		SWREG_TO_R3	(22);
-sw_r23:		SWREG_TO_R3	(23);
-sw_r24:		SWREG_TO_R3	(24);
-sw_r25:		SWREG_TO_R3	(25);
-sw_r26:		SWREG_TO_R3	(26);
-sw_r27:		SWREG_TO_R3	(27);
-sw_r28:		SWREG_TO_R3	(28);
-sw_r29:		SWREG_TO_R3	(29);
-sw_r30:		SWREG_TO_R3	(30);
-#ifdef CONFIG_MMU
-sw_r31:		SWREG_TO_R3_V	(31);
-#else
-sw_r31:		SWREG_TO_R3	(31);
-#endif
-
-#ifdef CONFIG_MMU
-lw_table_vm:
-lw_r0_vm:	R3_TO_LWREG_VM		(0);
-lw_r1_vm:	R3_TO_LWREG_VM_V	(1);
-lw_r2_vm:	R3_TO_LWREG_VM_V	(2);
-lw_r3_vm:	R3_TO_LWREG_VM_V	(3);
-lw_r4_vm:	R3_TO_LWREG_VM_V	(4);
-lw_r5_vm:	R3_TO_LWREG_VM_V	(5);
-lw_r6_vm:	R3_TO_LWREG_VM_V	(6);
-lw_r7_vm:	R3_TO_LWREG_VM_V	(7);
-lw_r8_vm:	R3_TO_LWREG_VM_V	(8);
-lw_r9_vm:	R3_TO_LWREG_VM_V	(9);
-lw_r10_vm:	R3_TO_LWREG_VM_V	(10);
-lw_r11_vm:	R3_TO_LWREG_VM_V	(11);
-lw_r12_vm:	R3_TO_LWREG_VM_V	(12);
-lw_r13_vm:	R3_TO_LWREG_VM_V	(13);
-lw_r14_vm:	R3_TO_LWREG_VM_V	(14);
-lw_r15_vm:	R3_TO_LWREG_VM_V	(15);
-lw_r16_vm:	R3_TO_LWREG_VM_V	(16);
-lw_r17_vm:	R3_TO_LWREG_VM_V	(17);
-lw_r18_vm:	R3_TO_LWREG_VM_V	(18);
-lw_r19_vm:	R3_TO_LWREG_VM_V	(19);
-lw_r20_vm:	R3_TO_LWREG_VM_V	(20);
-lw_r21_vm:	R3_TO_LWREG_VM_V	(21);
-lw_r22_vm:	R3_TO_LWREG_VM_V	(22);
-lw_r23_vm:	R3_TO_LWREG_VM_V	(23);
-lw_r24_vm:	R3_TO_LWREG_VM_V	(24);
-lw_r25_vm:	R3_TO_LWREG_VM_V	(25);
-lw_r26_vm:	R3_TO_LWREG_VM_V	(26);
-lw_r27_vm:	R3_TO_LWREG_VM_V	(27);
-lw_r28_vm:	R3_TO_LWREG_VM_V	(28);
-lw_r29_vm:	R3_TO_LWREG_VM_V	(29);
-lw_r30_vm:	R3_TO_LWREG_VM_V	(30);
-lw_r31_vm:	R3_TO_LWREG_VM_V	(31);
-
-sw_table_vm:
-sw_r0_vm:	SWREG_TO_R3_VM		(0);
-sw_r1_vm:	SWREG_TO_R3_VM_V	(1);
-sw_r2_vm:	SWREG_TO_R3_VM_V	(2);
-sw_r3_vm:	SWREG_TO_R3_VM_V	(3);
-sw_r4_vm:	SWREG_TO_R3_VM_V	(4);
-sw_r5_vm:	SWREG_TO_R3_VM_V	(5);
-sw_r6_vm:	SWREG_TO_R3_VM_V	(6);
-sw_r7_vm:	SWREG_TO_R3_VM_V	(7);
-sw_r8_vm:	SWREG_TO_R3_VM_V	(8);
-sw_r9_vm:	SWREG_TO_R3_VM_V	(9);
-sw_r10_vm:	SWREG_TO_R3_VM_V	(10);
-sw_r11_vm:	SWREG_TO_R3_VM_V	(11);
-sw_r12_vm:	SWREG_TO_R3_VM_V	(12);
-sw_r13_vm:	SWREG_TO_R3_VM_V	(13);
-sw_r14_vm:	SWREG_TO_R3_VM_V	(14);
-sw_r15_vm:	SWREG_TO_R3_VM_V	(15);
-sw_r16_vm:	SWREG_TO_R3_VM_V	(16);
-sw_r17_vm:	SWREG_TO_R3_VM_V	(17);
-sw_r18_vm:	SWREG_TO_R3_VM_V	(18);
-sw_r19_vm:	SWREG_TO_R3_VM_V	(19);
-sw_r20_vm:	SWREG_TO_R3_VM_V	(20);
-sw_r21_vm:	SWREG_TO_R3_VM_V	(21);
-sw_r22_vm:	SWREG_TO_R3_VM_V	(22);
-sw_r23_vm:	SWREG_TO_R3_VM_V	(23);
-sw_r24_vm:	SWREG_TO_R3_VM_V	(24);
-sw_r25_vm:	SWREG_TO_R3_VM_V	(25);
-sw_r26_vm:	SWREG_TO_R3_VM_V	(26);
-sw_r27_vm:	SWREG_TO_R3_VM_V	(27);
-sw_r28_vm:	SWREG_TO_R3_VM_V	(28);
-sw_r29_vm:	SWREG_TO_R3_VM_V	(29);
-sw_r30_vm:	SWREG_TO_R3_VM_V	(30);
-sw_r31_vm:	SWREG_TO_R3_VM_V	(31);
-#endif /* CONFIG_MMU */
-
-/* Temporary data structures used in the handler */
-.section .data
-.align 4
-ex_tmp_data_loc_0:
-	.byte 0
-ex_tmp_data_loc_1:
-	.byte 0
-ex_tmp_data_loc_2:
-	.byte 0
-ex_tmp_data_loc_3:
-	.byte 0
-ex_reg_op:
-	.byte 0
diff --git a/arch/microblaze/kernel/mcount.S b/arch/microblaze/kernel/mcount.S
deleted file mode 100644
index fed9da5de8c43caa577827d6b2c82d895d181d3a..0000000000000000000000000000000000000000
--- a/arch/microblaze/kernel/mcount.S
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Low-level ftrace handling
- *
- * Copyright (C) 2009 Michal Simek <monstr@monstr.eu>
- * Copyright (C) 2009 PetaLogix
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file COPYING in the main directory of this
- * archive for more details.
- */
-
-#include <linux/linkage.h>
-
-#define NOALIGN_ENTRY(name)	.globl name; name:
-
-/* FIXME MS: I think that I don't need to save all regs */
-#define SAVE_REGS		\
-	addik	r1, r1, -120;	\
-	swi	r2, r1, 4;	\
-	swi	r3, r1, 8;	\
-	swi	r4, r1, 12;	\
-	swi	r5, r1, 116;	\
-	swi	r6, r1, 16;	\
-	swi	r7, r1, 20;	\
-	swi	r8, r1, 24;	\
-	swi	r9, r1, 28;	\
-	swi	r10, r1, 32;	\
-	swi	r11, r1, 36;	\
-	swi	r12, r1, 40;	\
-	swi	r13, r1, 44;	\
-	swi	r14, r1, 48;	\
-	swi	r16, r1, 52;	\
-	swi	r17, r1, 56;	\
-	swi	r18, r1, 60;	\
-	swi	r19, r1, 64;	\
-	swi	r20, r1, 68;	\
-	swi	r21, r1, 72;	\
-	swi	r22, r1, 76;	\
-	swi	r23, r1, 80;	\
-	swi	r24, r1, 84;	\
-	swi	r25, r1, 88;	\
-	swi	r26, r1, 92;	\
-	swi	r27, r1, 96;	\
-	swi	r28, r1, 100;	\
-	swi	r29, r1, 104;	\
-	swi	r30, r1, 108;	\
-	swi	r31, r1, 112;
-
-#define RESTORE_REGS		\
-	lwi	r2, r1, 4;	\
-	lwi	r3, r1, 8;	\
-	lwi	r4, r1, 12;	\
-	lwi	r5, r1, 116;	\
-	lwi	r6, r1, 16;	\
-	lwi	r7, r1, 20;	\
-	lwi	r8, r1, 24;	\
-	lwi	r9, r1, 28;	\
-	lwi	r10, r1, 32;	\
-	lwi	r11, r1, 36;	\
-	lwi	r12, r1, 40;	\
-	lwi	r13, r1, 44;	\
-	lwi	r14, r1, 48;	\
-	lwi	r16, r1, 52;	\
-	lwi	r17, r1, 56;	\
-	lwi	r18, r1, 60;	\
-	lwi	r19, r1, 64;	\
-	lwi	r20, r1, 68;	\
-	lwi	r21, r1, 72;	\
-	lwi	r22, r1, 76;	\
-	lwi	r23, r1, 80;	\
-	lwi	r24, r1, 84;	\
-	lwi	r25, r1, 88;	\
-	lwi	r26, r1, 92;	\
-	lwi	r27, r1, 96;	\
-	lwi	r28, r1, 100;	\
-	lwi	r29, r1, 104;	\
-	lwi	r30, r1, 108;	\
-	lwi	r31, r1, 112;	\
-	addik	r1, r1, 120;
-
-ENTRY(ftrace_stub)
-	rtsd	r15, 8;
-	nop;
-
-ENTRY(_mcount)
-#ifdef CONFIG_DYNAMIC_FTRACE
-ENTRY(ftrace_caller)
-	/* MS: It is just barrier which is removed from C code */
-	rtsd	r15, 8
-	nop
-#endif /* CONFIG_DYNAMIC_FTRACE */
-	SAVE_REGS
-	swi	r15, r1, 0;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-#ifndef CONFIG_DYNAMIC_FTRACE
-	lwi	r5, r0, ftrace_graph_return;
-	addik	r6, r0, ftrace_stub; /* asm implementation */
-	cmpu	r5, r5, r6; /* ftrace_graph_return != ftrace_stub */
-	beqid	r5, end_graph_tracer;
-	nop;
-
-	lwi	r6, r0, ftrace_graph_entry;
-	addik	r5, r0, ftrace_graph_entry_stub; /* implemented in C */
-	cmpu	r5, r5, r6; /* ftrace_graph_entry != ftrace_graph_entry_stub */
-	beqid	r5, end_graph_tracer;
-	nop;
-#else /* CONFIG_DYNAMIC_FTRACE */
-NOALIGN_ENTRY(ftrace_call_graph)
-	/* MS: jump over graph function - replaced from C code */
-	bri	end_graph_tracer
-#endif /* CONFIG_DYNAMIC_FTRACE */
-	addik	r5, r1, 120; /* MS: load parent addr */
-	addik	r6, r15, 0; /* MS: load current function addr */
-	bralid	r15, prepare_ftrace_return;
-	nop;
-	/* MS: graph was taken that's why - can jump over function trace */
-	brid	end;
-	nop;
-end_graph_tracer:
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-#ifndef CONFIG_DYNAMIC_FTRACE
-	/* MS: test function trace if is taken or not */
-	lwi	r20, r0, ftrace_trace_function;
-	addik	r6, r0, ftrace_stub;
-	cmpu	r5, r20, r6; /* ftrace_trace_function != ftrace_stub */
-	beqid	r5, end; /* MS: not taken -> jump over */
-	nop;
-#else /* CONFIG_DYNAMIC_FTRACE */
-NOALIGN_ENTRY(ftrace_call)
-/* instruction for setup imm FUNC_part1, addik r20, r0, FUNC_part2 */
-	nop
-	nop
-#endif /* CONFIG_DYNAMIC_FTRACE */
-/* static normal trace */
-	lwi	r6, r1, 120; /* MS: load parent addr */
-	addik	r5, r15, -4; /* MS: load current function addr */
-	/* MS: here is dependency on previous code */
-	brald	r15, r20; /* MS: jump to ftrace handler */
-	nop;
-end:
-	lwi	r15, r1, 0;
-	RESTORE_REGS
-
-	rtsd	r15, 8; /* MS: jump back */
-	nop;
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(return_to_handler)
-	nop; /* MS: just barrier for rtsd r15, 8 */
-	nop;
-	SAVE_REGS
-	swi	r15, r1, 0;
-
-	/* MS: find out returning address */
-	bralid	r15, ftrace_return_to_handler;
-	nop;
-
-	/* MS: return value from ftrace_return_to_handler is my returning addr
-	 * must be before restore regs because I have to restore r3 content */
-	addik	r15, r3, 0;
-	RESTORE_REGS
-
-	rtsd	r15, 8; /* MS: jump back */
-	nop;
-#endif	/* CONFIG_FUNCTION_TRACER */
diff --git a/arch/microblaze/kernel/misc.S b/arch/microblaze/kernel/misc.S
deleted file mode 100644
index 6759af6884515215f1e035dfe28ca1731b909c1b..0000000000000000000000000000000000000000
--- a/arch/microblaze/kernel/misc.S
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Miscellaneous low-level MMU functions.
- *
- * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
- * Copyright (C) 2008-2009 PetaLogix
- * Copyright (C) 2007 Xilinx, Inc.  All rights reserved.
- *
- * Derived from arch/ppc/kernel/misc.S
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file COPYING in the main directory of this
- * archive for more details.
- */
-
-#include <linux/linkage.h>
-#include <linux/sys.h>
-#include <asm/unistd.h>
-#include <linux/errno.h>
-#include <asm/mmu.h>
-#include <asm/page.h>
-
-	.text
-/*
- * Flush MMU TLB
- *
- * We avoid flushing the pinned 0, 1 and possibly 2 entries.
- */
-.globl _tlbia;
-.type  _tlbia, @function
-.align 4;
-_tlbia:
-	lwi	r12, r0, tlb_skip;
-	/* isync */
-_tlbia_1:
-	mts	rtlbx, r12
-	nop
-	mts	rtlbhi, r0 /* flush: ensure V is clear */
-	nop
-	rsubi	r11, r12, MICROBLAZE_TLB_SIZE - 1
-	bneid	r11, _tlbia_1 /* loop for all entries */
-	addik	r12, r12, 1
-	/* sync */
-	rtsd	r15, 8
-	nop
-	.size  _tlbia, . - _tlbia
-
-/*
- * Flush MMU TLB for a particular address (in r5)
- */
-.globl _tlbie;
-.type  _tlbie, @function
-.align 4;
-_tlbie:
-	mts	rtlbsx, r5 /* look up the address in TLB */
-	nop
-	mfs	r12, rtlbx /* Retrieve index */
-	nop
-	blti	r12, _tlbie_1 /* Check if found */
-	mts	rtlbhi, r0 /* flush: ensure V is clear */
-	nop
-_tlbie_1:
-	rtsd	r15, 8
-	nop
-
-	.size  _tlbie, . - _tlbie
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
deleted file mode 100644
index ce006646f741ca58120b61d1329ced4708349de9..0000000000000000000000000000000000000000
--- a/arch/microblaze/kernel/syscall_table.S
+++ /dev/null
@@ -1,6 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#define __SYSCALL(nr, entry, nargs) .long entry
-ENTRY(sys_call_table)
-#include <asm/syscall_table.h>
-#undef __SYSCALL
diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S
deleted file mode 100644
index e1f3e8741292e4db61e4e6861985a23591ef1686..0000000000000000000000000000000000000000
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
- * Copyright (C) 2008-2009 PetaLogix
- * Copyright (C) 2006 Atmark Techno, Inc.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-OUTPUT_ARCH(microblaze)
-ENTRY(microblaze_start)
-
-#include <asm/page.h>
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/thread_info.h>
-
-#ifdef __MICROBLAZEEL__
-jiffies = jiffies_64;
-#else
-jiffies = jiffies_64 + 4;
-#endif
-
-SECTIONS {
-	. = CONFIG_KERNEL_START;
-	microblaze_start = CONFIG_KERNEL_BASE_ADDR;
-	.text : AT(ADDR(.text) - LOAD_OFFSET) {
-		_text = . ;
-		_stext = . ;
-		HEAD_TEXT
-		TEXT_TEXT
-		*(.fixup)
-		EXIT_TEXT
-		EXIT_CALL
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		IRQENTRY_TEXT
-		SOFTIRQENTRY_TEXT
-		. = ALIGN (4) ;
-		_etext = . ;
-	}
-
-	. = ALIGN (4) ;
-	__fdt_blob : AT(ADDR(__fdt_blob) - LOAD_OFFSET) {
-		_fdt_start = . ;		/* place for fdt blob */
-		*(__fdt_blob) ;			/* Any link-placed DTB */
-	        . = _fdt_start + 0x8000;	/* Pad up to 32kbyte */
-		_fdt_end = . ;
-	}
-
-	. = ALIGN(16);
-	RODATA
-	EXCEPTION_TABLE(16)
-	NOTES
-
-	/*
-	 * sdata2 section can go anywhere, but must be word aligned
-	 * and SDA2_BASE must point to the middle of it
-	 */
-	.sdata2 : AT(ADDR(.sdata2) - LOAD_OFFSET) {
-		_ssrw = .;
-		. = ALIGN(PAGE_SIZE); /* page aligned when MMU used */
-		*(.sdata2)
-	. = ALIGN(8);
-	_essrw = .;
-	_ssrw_size = _essrw - _ssrw;
-	_KERNEL_SDA2_BASE_ = _ssrw + (_ssrw_size / 2);
-	}
-
-	_sdata = . ;
-	RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
-	_edata = . ;
-
-	/* Under the microblaze ABI, .sdata and .sbss must be contiguous */
-	. = ALIGN(8);
-	.sdata : AT(ADDR(.sdata) - LOAD_OFFSET) {
-		_ssro = .;
-		*(.sdata)
-	}
-
-	.sbss :	AT(ADDR(.sbss) - LOAD_OFFSET) {
-		_ssbss = .;
-		*(.sbss)
-		_esbss = .;
-		_essro = .;
-		_ssro_size = _essro - _ssro ;
-		_KERNEL_SDA_BASE_ = _ssro + (_ssro_size / 2) ;
-	}
-
-	. = ALIGN(PAGE_SIZE);
-	__init_begin = .;
-
-	INIT_TEXT_SECTION(PAGE_SIZE)
-
-	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
-		INIT_DATA
-	}
-
-	. = ALIGN(4);
-	.init.ivt : AT(ADDR(.init.ivt) - LOAD_OFFSET) {
-		__ivt_start = .;
-		*(.init.ivt)
-		__ivt_end = .;
-	}
-
-	.init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
-		INIT_SETUP(0)
-	}
-
-	.initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET ) {
-		INIT_CALLS
-	}
-
-	.con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
-		CON_INITCALL
-	}
-
-	__init_end_before_initramfs = .;
-
-	.init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
-		INIT_RAM_FS
-	}
-
-	__init_end = .;
-
-	.bss ALIGN (PAGE_SIZE) : AT(ADDR(.bss) - LOAD_OFFSET) {
-		/* page aligned when MMU used */
-		__bss_start = . ;
-			*(.bss*)
-			*(COMMON)
-		. = ALIGN (4) ;
-		__bss_stop = . ;
-	}
-	. = ALIGN(PAGE_SIZE);
-	_end = .;
-
-	DISCARDS
-}
diff --git a/arch/microblaze/lib/divsi3.S b/arch/microblaze/lib/divsi3.S
deleted file mode 100644
index 919fb69f858953ef238ce99f378c40bb872b88ad..0000000000000000000000000000000000000000
--- a/arch/microblaze/lib/divsi3.S
+++ /dev/null
@@ -1,74 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-
-/*
-* Divide operation for 32 bit integers.
-*	Input :	Dividend in Reg r5
-*		Divisor in Reg r6
-*	Output: Result in Reg r3
-*/
-	.text
-	.globl	__divsi3
-	.type __divsi3, @function
-	.ent __divsi3
-__divsi3:
-	.frame	r1, 0, r15
-
-	addik	r1, r1, -16
-	swi	r28, r1, 0
-	swi	r29, r1, 4
-	swi	r30, r1, 8
-	swi	r31, r1, 12
-
-	beqi	r6, div_by_zero /* div_by_zero - division error */
-	beqi	r5, result_is_zero /* result is zero */
-	bgeid	r5, r5_pos
-	xor	r28, r5, r6 /* get the sign of the result */
-	rsubi	r5, r5, 0 /* make r5 positive */
-r5_pos:
-	bgei	r6, r6_pos
-	rsubi	r6, r6, 0 /* make r6 positive */
-r6_pos:
-	addik	r30, r0, 0 /* clear mod */
-	addik	r3, r0, 0 /* clear div */
-	addik	r29, r0, 32 /* initialize the loop count */
-
-	/* first part try to find the first '1' in the r5 */
-div0:
-	blti	r5, div2 /* this traps r5 == 0x80000000 */
-div1:
-	add	r5, r5, r5 /* left shift logical r5 */
-	bgtid	r5, div1
-	addik	r29, r29, -1
-div2:
-	/* left shift logical r5 get the '1' into the carry */
-	add	r5, r5, r5
-	addc	r30, r30, r30 /* move that bit into the mod register */
-	rsub	r31, r6, r30 /* try to subtract (r30 a r6) */
-	blti	r31, mod_too_small
-	/* move the r31 to mod since the result was positive */
-	or	r30, r0, r31
-	addik	r3, r3, 1
-mod_too_small:
-	addik	r29, r29, -1
-	beqi	r29, loop_end
-	add	r3, r3, r3 /* shift in the '1' into div */
-	bri	div2 /* div2 */
-loop_end:
-	bgei	r28, return_here
-	brid	return_here
-	rsubi	r3, r3, 0 /* negate the result */
-div_by_zero:
-result_is_zero:
-	or	r3, r0, r0 /* set result to 0 */
-return_here:
-/* restore values of csrs and that of r3 and the divisor and the dividend */
-	lwi	r28, r1, 0
-	lwi	r29, r1, 4
-	lwi	r30, r1, 8
-	lwi	r31, r1, 12
-	rtsd	r15, 8
-	addik	r1, r1, 16
-
-.size __divsi3, . - __divsi3
-.end __divsi3
diff --git a/arch/microblaze/lib/fastcopy.S b/arch/microblaze/lib/fastcopy.S
deleted file mode 100644
index fdc48bb065d89fe3b2443ef054d1a6ece3744b54..0000000000000000000000000000000000000000
--- a/arch/microblaze/lib/fastcopy.S
+++ /dev/null
@@ -1,666 +0,0 @@
-/*
- * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
- * Copyright (C) 2008-2009 PetaLogix
- * Copyright (C) 2008 Jim Law - Iris LP  All rights reserved.
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * Written by Jim Law <jlaw@irispower.com>
- *
- * intended to replace:
- *	memcpy in memcpy.c and
- *	memmove in memmove.c
- * ... in arch/microblaze/lib
- *
- *
- * assly_fastcopy.S
- *
- * Attempt at quicker memcpy and memmove for MicroBlaze
- *	Input :	Operand1 in Reg r5 - destination address
- *		Operand2 in Reg r6 - source address
- *		Operand3 in Reg r7 - number of bytes to transfer
- *	Output: Result in Reg r3 - starting destinaition address
- *
- *
- * Explanation:
- *	Perform (possibly unaligned) copy of a block of memory
- *	between mem locations with size of xfer spec'd in bytes
- */
-
-#include <linux/linkage.h>
-	.text
-	.globl	memcpy
-	.type  memcpy, @function
-	.ent	memcpy
-
-memcpy:
-fast_memcpy_ascending:
-	/* move d to return register as value of function */
-	addi	r3, r5, 0
-
-	addi	r4, r0, 4	/* n = 4 */
-	cmpu	r4, r4, r7	/* n = c - n  (unsigned) */
-	blti	r4, a_xfer_end	/* if n < 0, less than one word to transfer */
-
-	/* transfer first 0~3 bytes to get aligned dest address */
-	andi	r4, r5, 3		/* n = d & 3 */
-	/* if zero, destination already aligned */
-	beqi	r4, a_dalign_done
-	/* n = 4 - n (yields 3, 2, 1 transfers for 1, 2, 3 addr offset) */
-	rsubi	r4, r4, 4
-	rsub	r7, r4, r7		/* c = c - n adjust c */
-
-a_xfer_first_loop:
-	/* if no bytes left to transfer, transfer the bulk */
-	beqi	r4, a_dalign_done
-	lbui	r11, r6, 0		/* h = *s */
-	sbi	r11, r5, 0		/* *d = h */
-	addi	r6, r6, 1		/* s++ */
-	addi	r5, r5, 1		/* d++ */
-	brid	a_xfer_first_loop	/* loop */
-	addi	r4, r4, -1		/* n-- (IN DELAY SLOT) */
-
-a_dalign_done:
-	addi	r4, r0, 32		/* n = 32 */
-	cmpu	r4, r4, r7		/* n = c - n  (unsigned) */
-	/* if n < 0, less than one block to transfer */
-	blti	r4, a_block_done
-
-a_block_xfer:
-	andi	r4, r7, 0xffffffe0	/* n = c & ~31 */
-	rsub	r7, r4, r7		/* c = c - n */
-
-	andi	r9, r6, 3		/* t1 = s & 3 */
-	/* if temp != 0, unaligned transfers needed */
-	bnei	r9, a_block_unaligned
-
-a_block_aligned:
-	lwi	r9, r6, 0		/* t1 = *(s + 0) */
-	lwi	r10, r6, 4		/* t2 = *(s + 4) */
-	lwi	r11, r6, 8		/* t3 = *(s + 8) */
-	lwi	r12, r6, 12		/* t4 = *(s + 12) */
-	swi	r9, r5, 0		/* *(d + 0) = t1 */
-	swi	r10, r5, 4		/* *(d + 4) = t2 */
-	swi	r11, r5, 8		/* *(d + 8) = t3 */
-	swi	r12, r5, 12		/* *(d + 12) = t4 */
-	lwi	r9, r6, 16		/* t1 = *(s + 16) */
-	lwi	r10, r6, 20		/* t2 = *(s + 20) */
-	lwi	r11, r6, 24		/* t3 = *(s + 24) */
-	lwi	r12, r6, 28		/* t4 = *(s + 28) */
-	swi	r9, r5, 16		/* *(d + 16) = t1 */
-	swi	r10, r5, 20		/* *(d + 20) = t2 */
-	swi	r11, r5, 24		/* *(d + 24) = t3 */
-	swi	r12, r5, 28		/* *(d + 28) = t4 */
-	addi	r6, r6, 32		/* s = s + 32 */
-	addi	r4, r4, -32		/* n = n - 32 */
-	bneid	r4, a_block_aligned	/* while (n) loop */
-	addi	r5, r5, 32		/* d = d + 32 (IN DELAY SLOT) */
-	bri	a_block_done
-
-a_block_unaligned:
-	andi	r8, r6, 0xfffffffc	/* as = s & ~3 */
-	add	r6, r6, r4		/* s = s + n */
-	lwi	r11, r8, 0		/* h = *(as + 0) */
-
-	addi	r9, r9, -1
-	beqi	r9, a_block_u1		/* t1 was 1 => 1 byte offset */
-	addi	r9, r9, -1
-	beqi	r9, a_block_u2		/* t1 was 2 => 2 byte offset */
-
-a_block_u3:
-	bslli	r11, r11, 24	/* h = h << 24 */
-a_bu3_loop:
-	lwi	r12, r8, 4	/* v = *(as + 4) */
-	bsrli	r9, r12, 8	/* t1 = v >> 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 0	/* *(d + 0) = t1 */
-	bslli	r11, r12, 24	/* h = v << 24 */
-	lwi	r12, r8, 8	/* v = *(as + 8) */
-	bsrli	r9, r12, 8	/* t1 = v >> 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 4	/* *(d + 4) = t1 */
-	bslli	r11, r12, 24	/* h = v << 24 */
-	lwi	r12, r8, 12	/* v = *(as + 12) */
-	bsrli	r9, r12, 8	/* t1 = v >> 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 8	/* *(d + 8) = t1 */
-	bslli	r11, r12, 24	/* h = v << 24 */
-	lwi	r12, r8, 16	/* v = *(as + 16) */
-	bsrli	r9, r12, 8	/* t1 = v >> 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 12	/* *(d + 12) = t1 */
-	bslli	r11, r12, 24	/* h = v << 24 */
-	lwi	r12, r8, 20	/* v = *(as + 20) */
-	bsrli	r9, r12, 8	/* t1 = v >> 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 16	/* *(d + 16) = t1 */
-	bslli	r11, r12, 24	/* h = v << 24 */
-	lwi	r12, r8, 24	/* v = *(as + 24) */
-	bsrli	r9, r12, 8	/* t1 = v >> 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 20	/* *(d + 20) = t1 */
-	bslli	r11, r12, 24	/* h = v << 24 */
-	lwi	r12, r8, 28	/* v = *(as + 28) */
-	bsrli	r9, r12, 8	/* t1 = v >> 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 24	/* *(d + 24) = t1 */
-	bslli	r11, r12, 24	/* h = v << 24 */
-	lwi	r12, r8, 32	/* v = *(as + 32) */
-	bsrli	r9, r12, 8	/* t1 = v >> 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 28	/* *(d + 28) = t1 */
-	bslli	r11, r12, 24	/* h = v << 24 */
-	addi	r8, r8, 32	/* as = as + 32 */
-	addi	r4, r4, -32	/* n = n - 32 */
-	bneid	r4, a_bu3_loop	/* while (n) loop */
-	addi	r5, r5, 32	/* d = d + 32 (IN DELAY SLOT) */
-	bri	a_block_done
-
-a_block_u1:
-	bslli	r11, r11, 8	/* h = h << 8 */
-a_bu1_loop:
-	lwi	r12, r8, 4	/* v = *(as + 4) */
-	bsrli	r9, r12, 24	/* t1 = v >> 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 0	/* *(d + 0) = t1 */
-	bslli	r11, r12, 8	/* h = v << 8 */
-	lwi	r12, r8, 8	/* v = *(as + 8) */
-	bsrli	r9, r12, 24	/* t1 = v >> 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 4	/* *(d + 4) = t1 */
-	bslli	r11, r12, 8	/* h = v << 8 */
-	lwi	r12, r8, 12	/* v = *(as + 12) */
-	bsrli	r9, r12, 24	/* t1 = v >> 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 8	/* *(d + 8) = t1 */
-	bslli	r11, r12, 8	/* h = v << 8 */
-	lwi	r12, r8, 16	/* v = *(as + 16) */
-	bsrli	r9, r12, 24	/* t1 = v >> 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 12	/* *(d + 12) = t1 */
-	bslli	r11, r12, 8	/* h = v << 8 */
-	lwi	r12, r8, 20	/* v = *(as + 20) */
-	bsrli	r9, r12, 24	/* t1 = v >> 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 16	/* *(d + 16) = t1 */
-	bslli	r11, r12, 8	/* h = v << 8 */
-	lwi	r12, r8, 24	/* v = *(as + 24) */
-	bsrli	r9, r12, 24	/* t1 = v >> 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 20	/* *(d + 20) = t1 */
-	bslli	r11, r12, 8	/* h = v << 8 */
-	lwi	r12, r8, 28	/* v = *(as + 28) */
-	bsrli	r9, r12, 24	/* t1 = v >> 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 24	/* *(d + 24) = t1 */
-	bslli	r11, r12, 8	/* h = v << 8 */
-	lwi	r12, r8, 32	/* v = *(as + 32) */
-	bsrli	r9, r12, 24	/* t1 = v >> 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 28	/* *(d + 28) = t1 */
-	bslli	r11, r12, 8	/* h = v << 8 */
-	addi	r8, r8, 32	/* as = as + 32 */
-	addi	r4, r4, -32	/* n = n - 32 */
-	bneid	r4, a_bu1_loop	/* while (n) loop */
-	addi	r5, r5, 32	/* d = d + 32 (IN DELAY SLOT) */
-	bri	a_block_done
-
-a_block_u2:
-	bslli	r11, r11, 16	/* h = h << 16 */
-a_bu2_loop:
-	lwi	r12, r8, 4	/* v = *(as + 4) */
-	bsrli	r9, r12, 16	/* t1 = v >> 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 0	/* *(d + 0) = t1 */
-	bslli	r11, r12, 16	/* h = v << 16 */
-	lwi	r12, r8, 8	/* v = *(as + 8) */
-	bsrli	r9, r12, 16	/* t1 = v >> 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 4	/* *(d + 4) = t1 */
-	bslli	r11, r12, 16	/* h = v << 16 */
-	lwi	r12, r8, 12	/* v = *(as + 12) */
-	bsrli	r9, r12, 16	/* t1 = v >> 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 8	/* *(d + 8) = t1 */
-	bslli	r11, r12, 16	/* h = v << 16 */
-	lwi	r12, r8, 16	/* v = *(as + 16) */
-	bsrli	r9, r12, 16	/* t1 = v >> 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 12	/* *(d + 12) = t1 */
-	bslli	r11, r12, 16	/* h = v << 16 */
-	lwi	r12, r8, 20	/* v = *(as + 20) */
-	bsrli	r9, r12, 16	/* t1 = v >> 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 16	/* *(d + 16) = t1 */
-	bslli	r11, r12, 16	/* h = v << 16 */
-	lwi	r12, r8, 24	/* v = *(as + 24) */
-	bsrli	r9, r12, 16	/* t1 = v >> 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 20	/* *(d + 20) = t1 */
-	bslli	r11, r12, 16	/* h = v << 16 */
-	lwi	r12, r8, 28	/* v = *(as + 28) */
-	bsrli	r9, r12, 16	/* t1 = v >> 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 24	/* *(d + 24) = t1 */
-	bslli	r11, r12, 16	/* h = v << 16 */
-	lwi	r12, r8, 32	/* v = *(as + 32) */
-	bsrli	r9, r12, 16	/* t1 = v >> 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 28	/* *(d + 28) = t1 */
-	bslli	r11, r12, 16	/* h = v << 16 */
-	addi	r8, r8, 32	/* as = as + 32 */
-	addi	r4, r4, -32	/* n = n - 32 */
-	bneid	r4, a_bu2_loop	/* while (n) loop */
-	addi	r5, r5, 32	/* d = d + 32 (IN DELAY SLOT) */
-
-a_block_done:
-	addi	r4, r0, 4	/* n = 4 */
-	cmpu	r4, r4, r7	/* n = c - n  (unsigned) */
-	blti	r4, a_xfer_end	/* if n < 0, less than one word to transfer */
-
-a_word_xfer:
-	andi	r4, r7, 0xfffffffc	/* n = c & ~3 */
-	addi	r10, r0, 0		/* offset = 0 */
-
-	andi	r9, r6, 3		/* t1 = s & 3 */
-	/* if temp != 0, unaligned transfers needed */
-	bnei	r9, a_word_unaligned
-
-a_word_aligned:
-	lw	r9, r6, r10		/* t1 = *(s+offset) */
-	sw	r9, r5, r10		/* *(d+offset) = t1 */
-	addi	r4, r4,-4		/* n-- */
-	bneid	r4, a_word_aligned	/* loop */
-	addi	r10, r10, 4		/* offset++ (IN DELAY SLOT) */
-
-	bri	a_word_done
-
-a_word_unaligned:
-	andi	r8, r6, 0xfffffffc	/* as = s & ~3 */
-	lwi	r11, r8, 0		/* h = *(as + 0) */
-	addi	r8, r8, 4		/* as = as + 4 */
-
-	addi	r9, r9, -1
-	beqi	r9, a_word_u1		/* t1 was 1 => 1 byte offset */
-	addi	r9, r9, -1
-	beqi	r9, a_word_u2		/* t1 was 2 => 2 byte offset */
-
-a_word_u3:
-	bslli	r11, r11, 24	/* h = h << 24 */
-a_wu3_loop:
-	lw	r12, r8, r10	/* v = *(as + offset) */
-	bsrli	r9, r12, 8	/* t1 = v >> 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	sw	r9, r5, r10	/* *(d + offset) = t1 */
-	bslli	r11, r12, 24	/* h = v << 24 */
-	addi	r4, r4,-4	/* n = n - 4 */
-	bneid	r4, a_wu3_loop	/* while (n) loop */
-	addi	r10, r10, 4	/* offset = ofset + 4 (IN DELAY SLOT) */
-
-	bri	a_word_done
-
-a_word_u1:
-	bslli	r11, r11, 8	/* h = h << 8 */
-a_wu1_loop:
-	lw	r12, r8, r10	/* v = *(as + offset) */
-	bsrli	r9, r12, 24	/* t1 = v >> 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	sw	r9, r5, r10	/* *(d + offset) = t1 */
-	bslli	r11, r12, 8	/* h = v << 8 */
-	addi	r4, r4,-4	/* n = n - 4 */
-	bneid	r4, a_wu1_loop	/* while (n) loop */
-	addi	r10, r10, 4	/* offset = ofset + 4 (IN DELAY SLOT) */
-
-	bri	a_word_done
-
-a_word_u2:
-	bslli	r11, r11, 16	/* h = h << 16 */
-a_wu2_loop:
-	lw	r12, r8, r10	/* v = *(as + offset) */
-	bsrli	r9, r12, 16	/* t1 = v >> 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	sw	r9, r5, r10	/* *(d + offset) = t1 */
-	bslli	r11, r12, 16	/* h = v << 16 */
-	addi	r4, r4,-4	/* n = n - 4 */
-	bneid	r4, a_wu2_loop	/* while (n) loop */
-	addi	r10, r10, 4	/* offset = ofset + 4 (IN DELAY SLOT) */
-
-a_word_done:
-	add	r5, r5, r10	/* d = d + offset */
-	add	r6, r6, r10	/* s = s + offset */
-	rsub	r7, r10, r7	/* c = c - offset */
-
-a_xfer_end:
-a_xfer_end_loop:
-	beqi	r7, a_done		/* while (c) */
-	lbui	r9, r6, 0		/* t1 = *s */
-	addi	r6, r6, 1		/* s++ */
-	sbi	r9, r5, 0		/* *d = t1 */
-	addi	r7, r7, -1		/* c-- */
-	brid	a_xfer_end_loop		/* loop */
-	addi	r5, r5, 1		/* d++ (IN DELAY SLOT) */
-
-a_done:
-	rtsd	r15, 8
-	nop
-
-.size  memcpy, . - memcpy
-.end memcpy
-/*----------------------------------------------------------------------------*/
-	.globl	memmove
-	.type  memmove, @function
-	.ent	memmove
-
-memmove:
-	cmpu	r4, r5, r6	/* n = s - d */
-	bgei	r4,fast_memcpy_ascending
-
-fast_memcpy_descending:
-	/* move d to return register as value of function */
-	addi	r3, r5, 0
-
-	add	r5, r5, r7	/* d = d + c */
-	add	r6, r6, r7	/* s = s + c */
-
-	addi	r4, r0, 4	/* n = 4 */
-	cmpu	r4, r4, r7	/* n = c - n  (unsigned) */
-	blti	r4,d_xfer_end	/* if n < 0, less than one word to transfer */
-
-	/* transfer first 0~3 bytes to get aligned dest address */
-	andi	r4, r5, 3		/* n = d & 3 */
-	/* if zero, destination already aligned */
-	beqi	r4,d_dalign_done
-	rsub	r7, r4, r7		/* c = c - n adjust c */
-
-d_xfer_first_loop:
-	/* if no bytes left to transfer, transfer the bulk */
-	beqi	r4,d_dalign_done
-	addi	r6, r6, -1		/* s-- */
-	addi	r5, r5, -1		/* d-- */
-	lbui	r11, r6, 0		/* h = *s */
-	sbi	r11, r5, 0		/* *d = h */
-	brid	d_xfer_first_loop	/* loop */
-	addi	r4, r4, -1		/* n-- (IN DELAY SLOT) */
-
-d_dalign_done:
-	addi	r4, r0, 32	/* n = 32 */
-	cmpu	r4, r4, r7	/* n = c - n  (unsigned) */
-	/* if n < 0, less than one block to transfer */
-	blti	r4, d_block_done
-
-d_block_xfer:
-	andi	r4, r7, 0xffffffe0	/* n = c & ~31 */
-	rsub	r7, r4, r7		/* c = c - n */
-
-	andi	r9, r6, 3		/* t1 = s & 3 */
-	/* if temp != 0, unaligned transfers needed */
-	bnei	r9, d_block_unaligned
-
-d_block_aligned:
-	addi	r6, r6, -32		/* s = s - 32 */
-	addi	r5, r5, -32		/* d = d - 32 */
-	lwi	r9, r6, 28		/* t1 = *(s + 28) */
-	lwi	r10, r6, 24		/* t2 = *(s + 24) */
-	lwi	r11, r6, 20		/* t3 = *(s + 20) */
-	lwi	r12, r6, 16		/* t4 = *(s + 16) */
-	swi	r9, r5, 28		/* *(d + 28) = t1 */
-	swi	r10, r5, 24		/* *(d + 24) = t2 */
-	swi	r11, r5, 20		/* *(d + 20) = t3 */
-	swi	r12, r5, 16		/* *(d + 16) = t4 */
-	lwi	r9, r6, 12		/* t1 = *(s + 12) */
-	lwi	r10, r6, 8		/* t2 = *(s + 8) */
-	lwi	r11, r6, 4		/* t3 = *(s + 4) */
-	lwi	r12, r6, 0		/* t4 = *(s + 0) */
-	swi	r9, r5, 12		/* *(d + 12) = t1 */
-	swi	r10, r5, 8		/* *(d + 8) = t2 */
-	swi	r11, r5, 4		/* *(d + 4) = t3 */
-	addi	r4, r4, -32		/* n = n - 32 */
-	bneid	r4, d_block_aligned	/* while (n) loop */
-	swi	r12, r5, 0		/* *(d + 0) = t4 (IN DELAY SLOT) */
-	bri	d_block_done
-
-d_block_unaligned:
-	andi	r8, r6, 0xfffffffc	/* as = s & ~3 */
-	rsub	r6, r4, r6		/* s = s - n */
-	lwi	r11, r8, 0		/* h = *(as + 0) */
-
-	addi	r9, r9, -1
-	beqi	r9,d_block_u1		/* t1 was 1 => 1 byte offset */
-	addi	r9, r9, -1
-	beqi	r9,d_block_u2		/* t1 was 2 => 2 byte offset */
-
-d_block_u3:
-	bsrli	r11, r11, 8	/* h = h >> 8 */
-d_bu3_loop:
-	addi	r8, r8, -32	/* as = as - 32 */
-	addi	r5, r5, -32	/* d = d - 32 */
-	lwi	r12, r8, 28	/* v = *(as + 28) */
-	bslli	r9, r12, 24	/* t1 = v << 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 28	/* *(d + 28) = t1 */
-	bsrli	r11, r12, 8	/* h = v >> 8 */
-	lwi	r12, r8, 24	/* v = *(as + 24) */
-	bslli	r9, r12, 24	/* t1 = v << 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 24	/* *(d + 24) = t1 */
-	bsrli	r11, r12, 8	/* h = v >> 8 */
-	lwi	r12, r8, 20	/* v = *(as + 20) */
-	bslli	r9, r12, 24	/* t1 = v << 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 20	/* *(d + 20) = t1 */
-	bsrli	r11, r12, 8	/* h = v >> 8 */
-	lwi	r12, r8, 16	/* v = *(as + 16) */
-	bslli	r9, r12, 24	/* t1 = v << 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 16	/* *(d + 16) = t1 */
-	bsrli	r11, r12, 8	/* h = v >> 8 */
-	lwi	r12, r8, 12	/* v = *(as + 12) */
-	bslli	r9, r12, 24	/* t1 = v << 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 12	/* *(d + 112) = t1 */
-	bsrli	r11, r12, 8	/* h = v >> 8 */
-	lwi	r12, r8, 8	/* v = *(as + 8) */
-	bslli	r9, r12, 24	/* t1 = v << 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 8	/* *(d + 8) = t1 */
-	bsrli	r11, r12, 8	/* h = v >> 8 */
-	lwi	r12, r8, 4	/* v = *(as + 4) */
-	bslli	r9, r12, 24	/* t1 = v << 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 4	/* *(d + 4) = t1 */
-	bsrli	r11, r12, 8	/* h = v >> 8 */
-	lwi	r12, r8, 0	/* v = *(as + 0) */
-	bslli	r9, r12, 24	/* t1 = v << 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 0	/* *(d + 0) = t1 */
-	addi	r4, r4, -32	/* n = n - 32 */
-	bneid	r4, d_bu3_loop	/* while (n) loop */
-	bsrli	r11, r12, 8	/* h = v >> 8 (IN DELAY SLOT) */
-	bri	d_block_done
-
-d_block_u1:
-	bsrli	r11, r11, 24	/* h = h >> 24 */
-d_bu1_loop:
-	addi	r8, r8, -32	/* as = as - 32 */
-	addi	r5, r5, -32	/* d = d - 32 */
-	lwi	r12, r8, 28	/* v = *(as + 28) */
-	bslli	r9, r12, 8	/* t1 = v << 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 28	/* *(d + 28) = t1 */
-	bsrli	r11, r12, 24	/* h = v >> 24 */
-	lwi	r12, r8, 24	/* v = *(as + 24) */
-	bslli	r9, r12, 8	/* t1 = v << 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 24	/* *(d + 24) = t1 */
-	bsrli	r11, r12, 24	/* h = v >> 24 */
-	lwi	r12, r8, 20	/* v = *(as + 20) */
-	bslli	r9, r12, 8	/* t1 = v << 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 20	/* *(d + 20) = t1 */
-	bsrli	r11, r12, 24	/* h = v >> 24 */
-	lwi	r12, r8, 16	/* v = *(as + 16) */
-	bslli	r9, r12, 8	/* t1 = v << 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 16	/* *(d + 16) = t1 */
-	bsrli	r11, r12, 24	/* h = v >> 24 */
-	lwi	r12, r8, 12	/* v = *(as + 12) */
-	bslli	r9, r12, 8	/* t1 = v << 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 12	/* *(d + 112) = t1 */
-	bsrli	r11, r12, 24	/* h = v >> 24 */
-	lwi	r12, r8, 8	/* v = *(as + 8) */
-	bslli	r9, r12, 8	/* t1 = v << 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 8	/* *(d + 8) = t1 */
-	bsrli	r11, r12, 24	/* h = v >> 24 */
-	lwi	r12, r8, 4	/* v = *(as + 4) */
-	bslli	r9, r12, 8	/* t1 = v << 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 4	/* *(d + 4) = t1 */
-	bsrli	r11, r12, 24	/* h = v >> 24 */
-	lwi	r12, r8, 0	/* v = *(as + 0) */
-	bslli	r9, r12, 8	/* t1 = v << 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 0	/* *(d + 0) = t1 */
-	addi	r4, r4, -32	/* n = n - 32 */
-	bneid	r4, d_bu1_loop	/* while (n) loop */
-	bsrli	r11, r12, 24	/* h = v >> 24 (IN DELAY SLOT) */
-	bri	d_block_done
-
-d_block_u2:
-	bsrli	r11, r11, 16	/* h = h >> 16 */
-d_bu2_loop:
-	addi	r8, r8, -32	/* as = as - 32 */
-	addi	r5, r5, -32	/* d = d - 32 */
-	lwi	r12, r8, 28	/* v = *(as + 28) */
-	bslli	r9, r12, 16	/* t1 = v << 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 28	/* *(d + 28) = t1 */
-	bsrli	r11, r12, 16	/* h = v >> 16 */
-	lwi	r12, r8, 24	/* v = *(as + 24) */
-	bslli	r9, r12, 16	/* t1 = v << 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 24	/* *(d + 24) = t1 */
-	bsrli	r11, r12, 16	/* h = v >> 16 */
-	lwi	r12, r8, 20	/* v = *(as + 20) */
-	bslli	r9, r12, 16	/* t1 = v << 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 20	/* *(d + 20) = t1 */
-	bsrli	r11, r12, 16	/* h = v >> 16 */
-	lwi	r12, r8, 16	/* v = *(as + 16) */
-	bslli	r9, r12, 16	/* t1 = v << 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 16	/* *(d + 16) = t1 */
-	bsrli	r11, r12, 16	/* h = v >> 16 */
-	lwi	r12, r8, 12	/* v = *(as + 12) */
-	bslli	r9, r12, 16	/* t1 = v << 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 12	/* *(d + 112) = t1 */
-	bsrli	r11, r12, 16	/* h = v >> 16 */
-	lwi	r12, r8, 8	/* v = *(as + 8) */
-	bslli	r9, r12, 16	/* t1 = v << 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 8	/* *(d + 8) = t1 */
-	bsrli	r11, r12, 16	/* h = v >> 16 */
-	lwi	r12, r8, 4	/* v = *(as + 4) */
-	bslli	r9, r12, 16	/* t1 = v << 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 4	/* *(d + 4) = t1 */
-	bsrli	r11, r12, 16	/* h = v >> 16 */
-	lwi	r12, r8, 0	/* v = *(as + 0) */
-	bslli	r9, r12, 16	/* t1 = v << 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	swi	r9, r5, 0	/* *(d + 0) = t1 */
-	addi	r4, r4, -32	/* n = n - 32 */
-	bneid	r4, d_bu2_loop	/* while (n) loop */
-	bsrli	r11, r12, 16	/* h = v >> 16 (IN DELAY SLOT) */
-
-d_block_done:
-	addi	r4, r0, 4	/* n = 4 */
-	cmpu	r4, r4, r7	/* n = c - n  (unsigned) */
-	blti	r4,d_xfer_end	/* if n < 0, less than one word to transfer */
-
-d_word_xfer:
-	andi	r4, r7, 0xfffffffc	/* n = c & ~3 */
-	rsub	r5, r4, r5		/* d = d - n */
-	rsub	r6, r4, r6		/* s = s - n */
-	rsub	r7, r4, r7		/* c = c - n */
-
-	andi	r9, r6, 3		/* t1 = s & 3 */
-	/* if temp != 0, unaligned transfers needed */
-	bnei	r9, d_word_unaligned
-
-d_word_aligned:
-	addi	r4, r4,-4		/* n-- */
-	lw	r9, r6, r4		/* t1 = *(s+n) */
-	bneid	r4, d_word_aligned	/* loop */
-	sw	r9, r5, r4		/* *(d+n) = t1 (IN DELAY SLOT) */
-
-	bri	d_word_done
-
-d_word_unaligned:
-	andi	r8, r6, 0xfffffffc	/* as = s & ~3 */
-	lw	r11, r8, r4		/* h = *(as + n) */
-
-	addi	r9, r9, -1
-	beqi	r9,d_word_u1		/* t1 was 1 => 1 byte offset */
-	addi	r9, r9, -1
-	beqi	r9,d_word_u2		/* t1 was 2 => 2 byte offset */
-
-d_word_u3:
-	bsrli	r11, r11, 8	/* h = h >> 8 */
-d_wu3_loop:
-	addi	r4, r4,-4	/* n = n - 4 */
-	lw	r12, r8, r4	/* v = *(as + n) */
-	bslli	r9, r12, 24	/* t1 = v << 24 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	sw	r9, r5, r4	/* *(d + n) = t1 */
-	bneid	r4, d_wu3_loop	/* while (n) loop */
-	bsrli	r11, r12, 8	/* h = v >> 8 (IN DELAY SLOT) */
-
-	bri	d_word_done
-
-d_word_u1:
-	bsrli	r11, r11, 24	/* h = h >> 24 */
-d_wu1_loop:
-	addi	r4, r4,-4	/* n = n - 4 */
-	lw	r12, r8, r4	/* v = *(as + n) */
-	bslli	r9, r12, 8	/* t1 = v << 8 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	sw	r9, r5, r4	/* *(d + n) = t1 */
-	bneid	r4, d_wu1_loop	/* while (n) loop */
-	bsrli	r11, r12, 24	/* h = v >> 24 (IN DELAY SLOT) */
-
-	bri	d_word_done
-
-d_word_u2:
-	bsrli	r11, r11, 16	/* h = h >> 16 */
-d_wu2_loop:
-	addi	r4, r4,-4	/* n = n - 4 */
-	lw	r12, r8, r4	/* v = *(as + n) */
-	bslli	r9, r12, 16	/* t1 = v << 16 */
-	or	r9, r11, r9	/* t1 = h | t1 */
-	sw	r9, r5, r4	/* *(d + n) = t1 */
-	bneid	r4, d_wu2_loop	/* while (n) loop */
-	bsrli	r11, r12, 16	/* h = v >> 16 (IN DELAY SLOT) */
-
-d_word_done:
-
-d_xfer_end:
-d_xfer_end_loop:
-	beqi	r7, a_done		/* while (c) */
-	addi	r6, r6, -1		/* s-- */
-	lbui	r9, r6, 0		/* t1 = *s */
-	addi	r5, r5, -1		/* d-- */
-	sbi	r9, r5, 0		/* *d = t1 */
-	brid	d_xfer_end_loop		/* loop */
-	addi	r7, r7, -1		/* c-- (IN DELAY SLOT) */
-
-d_done:
-	rtsd	r15, 8
-	nop
-
-.size  memmove, . - memmove
-.end memmove
diff --git a/arch/microblaze/lib/modsi3.S b/arch/microblaze/lib/modsi3.S
deleted file mode 100644
index 64154865c1c5865d860b30284f43d7a2bb734480..0000000000000000000000000000000000000000
--- a/arch/microblaze/lib/modsi3.S
+++ /dev/null
@@ -1,74 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-
-/*
-* modulo operation for 32 bit integers.
-*	Input :	op1 in Reg r5
-*		op2 in Reg r6
-*	Output: op1 mod op2 in Reg r3
-*/
-
-	.text
-	.globl	__modsi3
-	.type __modsi3,  @function
-	.ent __modsi3
-
-__modsi3:
-	.frame	r1, 0, r15
-
-	addik	r1, r1, -16
-	swi	r28, r1, 0
-	swi	r29, r1, 4
-	swi	r30, r1, 8
-	swi	r31, r1, 12
-
-	beqi	r6, div_by_zero /* div_by_zero division error */
-	beqi	r5, result_is_zero /* result is zero */
-	bgeid	r5, r5_pos
-	/* get the sign of the result [ depends only on the first arg] */
-	add	r28, r5, r0
-	rsubi	r5, r5, 0	 /* make r5 positive */
-r5_pos:
-	bgei	r6, r6_pos
-	rsubi	r6, r6, 0	 /* make r6 positive */
-r6_pos:
-	addik	r3, r0, 0 /* clear mod */
-	addik	r30, r0, 0 /* clear div */
-	addik	r29, r0, 32 /* initialize the loop count */
-/* first part try to find the first '1' in the r5 */
-div1:
-	add	r5, r5, r5 /* left shift logical r5 */
-	bgeid	r5, div1
-	addik	r29, r29, -1
-div2:
-	/* left shift logical r5 get the '1' into the carry */
-	add	r5, r5, r5
-	addc	r3, r3, r3 /* move that bit into the mod register */
-	rsub	r31, r6, r3 /* try to subtract (r30 a r6) */
-	blti	r31, mod_too_small
-	/* move the r31 to mod since the result was positive */
-	or	r3, r0, r31
-	addik	r30, r30, 1
-mod_too_small:
-	addik	r29, r29, -1
-	beqi	r29, loop_end
-	add	r30, r30, r30 /* shift in the '1' into div */
-	bri	div2 /* div2 */
-loop_end:
-	bgei	r28, return_here
-	brid	return_here
-	rsubi	r3, r3, 0 /* negate the result */
-div_by_zero:
-result_is_zero:
-	or	r3, r0, r0 /* set result to 0 [both mod as well as div are 0] */
-return_here:
-/* restore values of csrs and that of r3 and the divisor and the dividend */
-	lwi	r28, r1, 0
-	lwi	r29, r1, 4
-	lwi	r30, r1, 8
-	lwi	r31, r1, 12
-	rtsd	r15, 8
-	addik	r1, r1, 16
-
-.size __modsi3,  . - __modsi3
-.end __modsi3
diff --git a/arch/microblaze/lib/mulsi3.S b/arch/microblaze/lib/mulsi3.S
deleted file mode 100644
index d6703a4d91f610b7f325c43bd9baff175a00dd0a..0000000000000000000000000000000000000000
--- a/arch/microblaze/lib/mulsi3.S
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-
-/*
- * Multiply operation for 32 bit integers.
- *	Input :	Operand1 in Reg r5
- *		Operand2 in Reg r6
- *	Output: Result [op1 * op2] in Reg r3
- */
-	.text
-	.globl	__mulsi3
-	.type __mulsi3,  @function
-	.ent __mulsi3
-
-__mulsi3:
-	.frame	r1, 0, r15
-	add	r3, r0, r0
-	beqi	r5, result_is_zero /* multiply by zero */
-	beqi	r6, result_is_zero /* multiply by zero */
-	bgeid	r5, r5_pos
-	xor	r4, r5, r6 /* get the sign of the result */
-	rsubi	r5, r5, 0 /* make r5 positive */
-r5_pos:
-	bgei	r6, r6_pos
-	rsubi	r6, r6, 0 /* make r6 positive */
-r6_pos:
-	bri	l1
-l2:
-	add	r5, r5, r5
-l1:
-	srl	r6, r6
-	addc	r7, r0, r0
-	beqi	r7, l2
-	bneid	r6, l2
-	add	r3, r3, r5
-	blti	r4, negateresult
-	rtsd	r15, 8
-	nop
-negateresult:
-	rtsd	r15, 8
-	rsub	r3, r3, r0
-result_is_zero:
-	rtsd	r15, 8
-	addi	r3, r0, 0
-
-.size __mulsi3,  . - __mulsi3
-.end __mulsi3
diff --git a/arch/microblaze/lib/uaccess_old.S b/arch/microblaze/lib/uaccess_old.S
deleted file mode 100644
index 0e8cc2710c27fb3503eee296f9041cc38659a99c..0000000000000000000000000000000000000000
--- a/arch/microblaze/lib/uaccess_old.S
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Copyright (C) 2009 Michal Simek <monstr@monstr.eu>
- * Copyright (C) 2009 PetaLogix
- * Copyright (C) 2007 LynuxWorks, Inc.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/errno.h>
-#include <linux/linkage.h>
-#include <asm/page.h>
-
-/*
- * int __strncpy_user(char *to, char *from, int len);
- *
- * Returns:
- *  -EFAULT  for an exception
- *  len      if we hit the buffer limit
- *  bytes copied
- */
-
-	.text
-.globl __strncpy_user;
-.type  __strncpy_user, @function
-.align 4;
-__strncpy_user:
-
-	/*
-	 * r5 - to
-	 * r6 - from
-	 * r7 - len
-	 * r3 - temp count
-	 * r4 - temp val
-	 */
-	beqid	r7,3f
-	addik	r3,r7,0		/* temp_count = len */
-1:
-	lbu	r4,r6,r0
-	beqid	r4,2f
-	sb	r4,r5,r0
-
-	addik	r5,r5,1
-	addik	r6,r6,1		/* delay slot */
-
-	addik	r3,r3,-1
-	bnei	r3,1b		/* break on len */
-2:
-	rsubk	r3,r3,r7	/* temp_count = len - temp_count */
-3:
-	rtsd	r15,8
-	nop
-	.size   __strncpy_user, . - __strncpy_user
-
-	.section	.fixup, "ax"
-	.align	2
-4:
-	brid	3b
-	addik	r3,r0, -EFAULT
-
-	.section	__ex_table, "a"
-	.word	1b,4b
-
-/*
- * int __strnlen_user(char __user *str, int maxlen);
- *
- * Returns:
- *  0 on error
- *  maxlen + 1  if no NUL byte found within maxlen bytes
- *  size of the string (including NUL byte)
- */
-
-	.text
-.globl __strnlen_user;
-.type  __strnlen_user, @function
-.align 4;
-__strnlen_user:
-	beqid	r6,3f
-	addik	r3,r6,0
-1:
-	lbu	r4,r5,r0
-	beqid	r4,2f		/* break on NUL */
-	addik	r3,r3,-1	/* delay slot */
-
-	bneid	r3,1b
-	addik	r5,r5,1		/* delay slot */
-
-	addik	r3,r3,-1	/* for break on len */
-2:
-	rsubk	r3,r3,r6
-3:
-	rtsd	r15,8
-	nop
-	.size   __strnlen_user, . - __strnlen_user
-
-	.section	.fixup,"ax"
-4:
-	brid	3b
-	addk	r3,r0,r0
-
-	.section	__ex_table,"a"
-	.word	1b,4b
-
-/* Loop unrolling for __copy_tofrom_user */
-#define COPY(offset)	\
-1:	lwi	r4 , r6, 0x0000 + offset;	\
-2:	lwi	r19, r6, 0x0004 + offset;	\
-3:	lwi	r20, r6, 0x0008 + offset;	\
-4:	lwi	r21, r6, 0x000C + offset;	\
-5:	lwi	r22, r6, 0x0010 + offset;	\
-6:	lwi	r23, r6, 0x0014 + offset;	\
-7:	lwi	r24, r6, 0x0018 + offset;	\
-8:	lwi	r25, r6, 0x001C + offset;	\
-9:	swi	r4 , r5, 0x0000 + offset;	\
-10:	swi	r19, r5, 0x0004 + offset;	\
-11:	swi	r20, r5, 0x0008 + offset;	\
-12:	swi	r21, r5, 0x000C + offset;	\
-13:	swi	r22, r5, 0x0010 + offset;	\
-14:	swi	r23, r5, 0x0014 + offset;	\
-15:	swi	r24, r5, 0x0018 + offset;	\
-16:	swi	r25, r5, 0x001C + offset;	\
-	.section __ex_table,"a";		\
-	.word	1b, 33f;			\
-	.word	2b, 33f;			\
-	.word	3b, 33f;			\
-	.word	4b, 33f;			\
-	.word	5b, 33f;			\
-	.word	6b, 33f;			\
-	.word	7b, 33f;			\
-	.word	8b, 33f;			\
-	.word	9b, 33f;			\
-	.word	10b, 33f;			\
-	.word	11b, 33f;			\
-	.word	12b, 33f;			\
-	.word	13b, 33f;			\
-	.word	14b, 33f;			\
-	.word	15b, 33f;			\
-	.word	16b, 33f;			\
-	.text
-
-#define COPY_80(offset)	\
-	COPY(0x00 + offset);\
-	COPY(0x20 + offset);\
-	COPY(0x40 + offset);\
-	COPY(0x60 + offset);
-
-/*
- * int __copy_tofrom_user(char *to, char *from, int len)
- * Return:
- *   0 on success
- *   number of not copied bytes on error
- */
-	.text
-.globl __copy_tofrom_user;
-.type  __copy_tofrom_user, @function
-.align 4;
-__copy_tofrom_user:
-	/*
-	 * r5 - to
-	 * r6 - from
-	 * r7, r3 - count
-	 * r4 - tempval
-	 */
-	beqid	r7, 0f /* zero size is not likely */
-	or	r3, r5, r6 /* find if is any to/from unaligned */
-	or	r3, r3, r7 /* find if count is unaligned */
-	andi	r3, r3, 0x3 /* mask last 3 bits */
-	bneid	r3, bu1 /* if r3 is not zero then byte copying */
-	or	r3, r0, r0
-
-	rsubi	r3, r7, PAGE_SIZE /* detect PAGE_SIZE */
-	beqid	r3, page;
-	or	r3, r0, r0
-
-w1:	lw	r4, r6, r3 /* at least one 4 byte copy */
-w2:	sw	r4, r5, r3
-	addik	r7, r7, -4
-	bneid	r7, w1
-	addik	r3, r3, 4
-	addik	r3, r7, 0
-	rtsd	r15, 8
-	nop
-
-	.section	__ex_table,"a"
-	.word	w1, 0f;
-	.word	w2, 0f;
-	.text
-
-.align 4 /* Alignment is important to keep icache happy */
-page:	/* Create room on stack and save registers for storign values */
-	addik   r1, r1, -40
-	swi	r5, r1, 0
-	swi	r6, r1, 4
-	swi	r7, r1, 8
-	swi	r19, r1, 12
-	swi	r20, r1, 16
-	swi	r21, r1, 20
-	swi	r22, r1, 24
-	swi	r23, r1, 28
-	swi	r24, r1, 32
-	swi	r25, r1, 36
-loop:	/* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */
-	/* Loop unrolling to get performance boost */
-	COPY_80(0x000);
-	COPY_80(0x080);
-	COPY_80(0x100);
-	COPY_80(0x180);
-	/* copy loop */
-	addik   r6, r6, 0x200
-	addik   r7, r7, -0x200
-	bneid   r7, loop
-	addik   r5, r5, 0x200
-
-	/* Restore register content */
-	lwi	r5, r1, 0
-	lwi	r6, r1, 4
-	lwi	r7, r1, 8
-	lwi	r19, r1, 12
-	lwi	r20, r1, 16
-	lwi	r21, r1, 20
-	lwi	r22, r1, 24
-	lwi	r23, r1, 28
-	lwi	r24, r1, 32
-	lwi	r25, r1, 36
-	addik   r1, r1, 40
-	/* return back */
-	addik	r3, r0, 0
-	rtsd	r15, 8
-	nop
-
-/* Fault case - return temp count */
-33:
-	addik	r3, r7, 0
-	/* Restore register content */
-	lwi	r5, r1, 0
-	lwi	r6, r1, 4
-	lwi	r7, r1, 8
-	lwi	r19, r1, 12
-	lwi	r20, r1, 16
-	lwi	r21, r1, 20
-	lwi	r22, r1, 24
-	lwi	r23, r1, 28
-	lwi	r24, r1, 32
-	lwi	r25, r1, 36
-	addik   r1, r1, 40
-	/* return back */
-	rtsd	r15, 8
-	nop
-
-.align 4 /* Alignment is important to keep icache happy */
-bu1:	lbu	r4,r6,r3
-bu2:	sb	r4,r5,r3
-	addik	r7,r7,-1
-	bneid	r7,bu1
-	addik	r3,r3,1		/* delay slot */
-0:
-	addik	r3,r7,0
-	rtsd	r15,8
-	nop
-	.size   __copy_tofrom_user, . - __copy_tofrom_user
-
-	.section	__ex_table,"a"
-	.word	bu1, 0b;
-	.word	bu2, 0b;
-	.text
diff --git a/arch/microblaze/lib/udivsi3.S b/arch/microblaze/lib/udivsi3.S
deleted file mode 100644
index acdc667235c4747eaf4ce01921e1e5ee2fdb2330..0000000000000000000000000000000000000000
--- a/arch/microblaze/lib/udivsi3.S
+++ /dev/null
@@ -1,85 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-
-/*
-* Unsigned divide operation.
-*	Input :	Divisor in Reg r5
-*		Dividend in Reg r6
-*	Output: Result in Reg r3
-*/
-
-	.text
-	.globl	__udivsi3
-	.type __udivsi3, @function
-	.ent __udivsi3
-
-__udivsi3:
-
-	.frame	r1, 0, r15
-
-	addik	r1, r1, -12
-	swi	r29, r1, 0
-	swi	r30, r1, 4
-	swi	r31, r1, 8
-
-	beqi	r6, div_by_zero /* div_by_zero /* division error */
-	beqid	r5, result_is_zero /* result is zero */
-	addik	r30, r0, 0 /* clear mod */
-	addik	r29, r0, 32 /* initialize the loop count */
-
-/* check if r6 and r5 are equal - if yes, return 1 */
-	rsub	r18, r5, r6
-	beqid	r18, return_here
-	addik	r3, r0, 1
-
-/* check if (uns)r6 is greater than (uns)r5. in that case, just return 0 */
-	xor	r18, r5, r6
-	bgeid	r18, 16
-	add	r3, r0, r0 /* we would anyways clear r3 */
-	blti	r6, return_here /* r6[bit 31 = 1] hence is greater */
-	bri	checkr6
-	rsub	r18, r6, r5 /* microblazecmp */
-	blti	r18, return_here
-
-/* if r6 [bit 31] is set, then return result as 1 */
-checkr6:
-	bgti	r6, div0
-	brid	return_here
-	addik	r3, r0, 1
-
-/* first part try to find the first '1' in the r5 */
-div0:
-	blti	r5, div2
-div1:
-	add	r5, r5, r5 /* left shift logical r5 */
-	bgtid	r5, div1
-	addik	r29, r29, -1
-div2:
-/* left shift logical r5 get the '1' into the carry */
-	add	r5, r5, r5
-	addc	r30, r30, r30 /* move that bit into the mod register */
-	rsub	r31, r6, r30 /* try to subtract (r30 a r6) */
-	blti	r31, mod_too_small
-/* move the r31 to mod since the result was positive */
-	or	r30, r0, r31
-	addik	r3, r3, 1
-mod_too_small:
-	addik	r29, r29, -1
-	beqi	r29, loop_end
-	add	r3, r3, r3 /* shift in the '1' into div */
-	bri	div2 /* div2 */
-loop_end:
-	bri	return_here
-div_by_zero:
-result_is_zero:
-	or	r3, r0, r0 /* set result to 0 */
-return_here:
-/* restore values of csrs and that of r3 and the divisor and the dividend */
-	lwi	r29, r1, 0
-	lwi	r30, r1, 4
-	lwi	r31, r1, 8
-	rtsd	r15, 8
-	addik	r1, r1, 12
-
-.size __udivsi3, . - __udivsi3
-.end __udivsi3
diff --git a/arch/microblaze/lib/umodsi3.S b/arch/microblaze/lib/umodsi3.S
deleted file mode 100644
index f4b814e88ce317b9a4ea6b8e217c2592fbe9badb..0000000000000000000000000000000000000000
--- a/arch/microblaze/lib/umodsi3.S
+++ /dev/null
@@ -1,87 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-
-/*
- * Unsigned modulo operation for 32 bit integers.
- *	Input :	op1 in Reg r5
- *		op2 in Reg r6
- *	Output: op1 mod op2 in Reg r3
- */
-
-	.text
-	.globl	__umodsi3
-	.type __umodsi3, @function
-	.ent __umodsi3
-
-__umodsi3:
-	.frame	r1, 0, r15
-
-	addik	r1, r1, -12
-	swi	r29, r1, 0
-	swi	r30, r1, 4
-	swi	r31, r1, 8
-
-	beqi	r6, div_by_zero /* div_by_zero - division error */
-	beqid	r5, result_is_zero /* result is zero */
-	addik	r3, r0, 0 /* clear div */
-	addik	r30, r0, 0 /* clear mod */
-	addik	r29, r0, 32 /* initialize the loop count */
-
-/* check if r6 and r5 are equal /* if yes, return 0 */
-	rsub	r18, r5, r6
-	beqi	r18, return_here
-
-/* check if (uns)r6 is greater than (uns)r5. in that case, just return r5 */
-	xor	r18, r5, r6
-	bgeid	r18, 16
-	addik	r3, r5, 0
-	blti	r6, return_here
-	bri	$lcheckr6
-	rsub	r18, r5, r6 /* microblazecmp */
-	bgti	r18, return_here
-
-/* if r6 [bit 31] is set, then return result as r5-r6 */
-$lcheckr6:
-	bgtid	r6, div0
-	addik	r3, r0, 0
-	addik	r18, r0, 0x7fffffff
-	and	r5, r5, r18
-	and	r6, r6, r18
-	brid	return_here
-	rsub	r3, r6, r5
-/* first part: try to find the first '1' in the r5 */
-div0:
-	blti	r5, div2
-div1:
-	add	r5, r5, r5 /* left shift logical r5 */
-	bgeid	r5, div1
-	addik	r29, r29, -1
-div2:
-	/* left shift logical r5 get the '1' into the carry */
-	add	r5, r5, r5
-	addc	r3, r3, r3 /* move that bit into the mod register */
-	rsub	r31, r6, r3 /* try to subtract (r3 a r6) */
-	blti	r31, mod_too_small
-	/* move the r31 to mod since the result was positive */
-	or	r3, r0, r31
-	addik	r30, r30, 1
-mod_too_small:
-	addik	r29, r29, -1
-	beqi	r29, loop_end
-	add	r30, r30, r30 /* shift in the '1' into div */
-	bri	div2 /* div2 */
-loop_end:
-	bri	return_here
-div_by_zero:
-result_is_zero:
-	or	r3, r0, r0 /* set result to 0 */
-return_here:
-/* restore values of csrs and that of r3 and the divisor and the dividend */
-	lwi	r29, r1, 0
-	lwi	r30, r1, 4
-	lwi	r31, r1, 8
-	rtsd	r15, 8
-	addik	r1, r1, 12
-
-.size __umodsi3, . - __umodsi3
-.end __umodsi3
diff --git a/arch/mips/alchemy/common/sleeper.S b/arch/mips/alchemy/common/sleeper.S
deleted file mode 100644
index 13586d224314d8de6b41efe60fe62a7c2b6fda6c..0000000000000000000000000000000000000000
--- a/arch/mips/alchemy/common/sleeper.S
+++ /dev/null
@@ -1,266 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright 2002 Embedded Edge, LLC
- * Author: dan@embeddededge.com
- *
- * Sleep helper for Au1xxx sleep mode.
- */
-
-#include <asm/asm.h>
-#include <asm/mipsregs.h>
-#include <asm/regdef.h>
-#include <asm/stackframe.h>
-
-	.extern __flush_cache_all
-
-	.text
-	.set noreorder
-	.set noat
-	.align	5
-
-
-/* preparatory stuff */
-.macro	SETUP_SLEEP
-	subu	sp, PT_SIZE
-	sw	$1, PT_R1(sp)
-	sw	$2, PT_R2(sp)
-	sw	$3, PT_R3(sp)
-	sw	$4, PT_R4(sp)
-	sw	$5, PT_R5(sp)
-	sw	$6, PT_R6(sp)
-	sw	$7, PT_R7(sp)
-	sw	$16, PT_R16(sp)
-	sw	$17, PT_R17(sp)
-	sw	$18, PT_R18(sp)
-	sw	$19, PT_R19(sp)
-	sw	$20, PT_R20(sp)
-	sw	$21, PT_R21(sp)
-	sw	$22, PT_R22(sp)
-	sw	$23, PT_R23(sp)
-	sw	$26, PT_R26(sp)
-	sw	$27, PT_R27(sp)
-	sw	$28, PT_R28(sp)
-	sw	$30, PT_R30(sp)
-	sw	$31, PT_R31(sp)
-	mfc0	k0, CP0_STATUS
-	sw	k0, 0x20(sp)
-	mfc0	k0, CP0_CONTEXT
-	sw	k0, 0x1c(sp)
-	mfc0	k0, CP0_PAGEMASK
-	sw	k0, 0x18(sp)
-	mfc0	k0, CP0_CONFIG
-	sw	k0, 0x14(sp)
-
-	/* flush caches to make sure context is in memory */
-	la	t1, __flush_cache_all
-	lw	t0, 0(t1)
-	jalr	t0
-	 nop
-
-	/* Now set up the scratch registers so the boot rom will
-	 * return to this point upon wakeup.
-	 * sys_scratch0 : SP
-	 * sys_scratch1 : RA
-	 */
-	lui	t3, 0xb190		/* sys_xxx */
-	sw	sp, 0x0018(t3)
-	la	k0, alchemy_sleep_wakeup	/* resume path */
-	sw	k0, 0x001c(t3)
-.endm
-
-.macro	DO_SLEEP
-	/* put power supply and processor to sleep */
-	sw	zero, 0x0078(t3)	/* sys_slppwr */
-	sync
-	sw	zero, 0x007c(t3)	/* sys_sleep */
-	sync
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-.endm
-
-/* sleep code for Au1000/Au1100/Au1500 memory controller type */
-LEAF(alchemy_sleep_au1000)
-
-	SETUP_SLEEP
-
-	/* cache following instructions, as memory gets put to sleep */
-	la	t0, 1f
-	.set	arch=r4000
-	cache	0x14, 0(t0)
-	cache	0x14, 32(t0)
-	cache	0x14, 64(t0)
-	cache	0x14, 96(t0)
-	.set	mips0
-
-1:	lui	a0, 0xb400		/* mem_xxx */
-	sw	zero, 0x001c(a0)	/* Precharge */
-	sync
-	sw	zero, 0x0020(a0)	/* Auto Refresh */
-	sync
-	sw	zero, 0x0030(a0)	/* Sleep */
-	sync
-
-	DO_SLEEP
-
-END(alchemy_sleep_au1000)
-
-/* sleep code for Au1550/Au1200 memory controller type */
-LEAF(alchemy_sleep_au1550)
-
-	SETUP_SLEEP
-
-	/* cache following instructions, as memory gets put to sleep */
-	la	t0, 1f
-	.set	arch=r4000
-	cache	0x14, 0(t0)
-	cache	0x14, 32(t0)
-	cache	0x14, 64(t0)
-	cache	0x14, 96(t0)
-	.set	mips0
-
-1:	lui	a0, 0xb400		/* mem_xxx */
-	sw	zero, 0x08c0(a0)	/* Precharge */
-	sync
-	sw	zero, 0x08d0(a0)	/* Self Refresh */
-	sync
-
-	/* wait for sdram to enter self-refresh mode */
-	lui	t0, 0x0100
-2:	lw	t1, 0x0850(a0)		/* mem_sdstat */
-	and	t2, t1, t0
-	beq	t2, zero, 2b
-	 nop
-
-	/* disable SDRAM clocks */
-	lui	t0, 0xcfff
-	ori	t0, t0, 0xffff
-	lw	t1, 0x0840(a0)		/* mem_sdconfiga */
-	and	t1, t0, t1		/* clear CE[1:0] */
-	sw	t1, 0x0840(a0)		/* mem_sdconfiga */
-	sync
-
-	DO_SLEEP
-
-END(alchemy_sleep_au1550)
-
-/* sleepcode for Au1300 memory controller type */
-LEAF(alchemy_sleep_au1300)
-
-	SETUP_SLEEP
-
-	/* cache following instructions, as memory gets put to sleep */
-	la	t0, 2f
-	la	t1, 4f
-	subu	t2, t1, t0
-
-	.set	arch=r4000
-
-1:	cache	0x14, 0(t0)
-	subu	t2, t2, 32
-	bgez	t2, 1b
-	 addu	t0, t0, 32
-
-	.set	mips0
-
-2:	lui	a0, 0xb400		/* mem_xxx */
-
-	/* disable all ports in mem_sdportcfga */
-	sw	zero, 0x868(a0)		/* mem_sdportcfga */
-	sync
-
-	/* disable ODT */
-	li	t0, 0x03010000
-	sw	t0, 0x08d8(a0)		/* mem_sdcmd0 */
-	sw	t0, 0x08dc(a0)		/* mem_sdcmd1 */
-	sync
-
-	/* precharge */
-	li	t0, 0x23000400
-	sw	t0, 0x08dc(a0)		/* mem_sdcmd1 */
-	sw	t0, 0x08d8(a0)		/* mem_sdcmd0 */
-	sync
-
-	/* auto refresh */
-	sw	zero, 0x08c8(a0)	/* mem_sdautoref */
-	sync
-
-	/* block access to the DDR */
-	lw	t0, 0x0848(a0)		/* mem_sdconfigb */
-	li	t1, (1 << 7 | 0x3F)
-	or	t0, t0, t1
-	sw	t0, 0x0848(a0)		/* mem_sdconfigb */
-	sync
-
-	/* issue the Self Refresh command */
-	li	t0, 0x10000000
-	sw	t0, 0x08dc(a0)		/* mem_sdcmd1 */
-	sw	t0, 0x08d8(a0)		/* mem_sdcmd0 */
-	sync
-
-	/* wait for sdram to enter self-refresh mode */
-	lui	t0, 0x0300
-3:	lw	t1, 0x0850(a0)		/* mem_sdstat */
-	and	t2, t1, t0
-	bne	t2, t0, 3b
-	 nop
-
-	/* disable SDRAM clocks */
-	li	t0, ~(3<<28)
-	lw	t1, 0x0840(a0)		/* mem_sdconfiga */
-	and	t1, t1, t0		/* clear CE[1:0] */
-	sw	t1, 0x0840(a0)		/* mem_sdconfiga */
-	sync
-
-	DO_SLEEP
-4:
-
-END(alchemy_sleep_au1300)
-
-
-	/* This is where we return upon wakeup.
-	 * Reload all of the registers and return.
-	 */
-LEAF(alchemy_sleep_wakeup)
-	lw	k0, 0x20(sp)
-	mtc0	k0, CP0_STATUS
-	lw	k0, 0x1c(sp)
-	mtc0	k0, CP0_CONTEXT
-	lw	k0, 0x18(sp)
-	mtc0	k0, CP0_PAGEMASK
-	lw	k0, 0x14(sp)
-	mtc0	k0, CP0_CONFIG
-
-	/* We need to catch the early Alchemy SOCs with
-	 * the write-only Config[OD] bit and set it back to one...
-	 */
-	jal	au1x00_fixup_config_od
-	 nop
-	lw	$1, PT_R1(sp)
-	lw	$2, PT_R2(sp)
-	lw	$3, PT_R3(sp)
-	lw	$4, PT_R4(sp)
-	lw	$5, PT_R5(sp)
-	lw	$6, PT_R6(sp)
-	lw	$7, PT_R7(sp)
-	lw	$16, PT_R16(sp)
-	lw	$17, PT_R17(sp)
-	lw	$18, PT_R18(sp)
-	lw	$19, PT_R19(sp)
-	lw	$20, PT_R20(sp)
-	lw	$21, PT_R21(sp)
-	lw	$22, PT_R22(sp)
-	lw	$23, PT_R23(sp)
-	lw	$26, PT_R26(sp)
-	lw	$27, PT_R27(sp)
-	lw	$28, PT_R28(sp)
-	lw	$30, PT_R30(sp)
-	lw	$31, PT_R31(sp)
-	jr	ra
-	 addiu	sp, PT_SIZE
-END(alchemy_sleep_wakeup)
diff --git a/arch/mips/boot/compressed/head.S b/arch/mips/boot/compressed/head.S
deleted file mode 100644
index 409cb483a9ffc5bf1045a0a928b44c9a78904f28..0000000000000000000000000000000000000000
--- a/arch/mips/boot/compressed/head.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1994, 1995 Waldorf Electronics
- * Written by Ralf Baechle and Andreas Busse
- * Copyright (C) 1995 - 1999 Ralf Baechle
- * Copyright (C) 1996 Paul M. Antoine
- * Modified for DECStation and hence R3000 support by Paul M. Antoine
- * Further modifications by David S. Miller and Harald Koerfgen
- * Copyright (C) 1999 Silicon Graphics, Inc.
- */
-
-#include <asm/asm.h>
-#include <asm/regdef.h>
-
-	.set noreorder
-	.cprestore
-	LEAF(start)
-start:
-	/* Save boot rom start args */
-	move	s0, a0
-	move	s1, a1
-	move	s2, a2
-	move	s3, a3
-
-	/* Clear BSS */
-	PTR_LA	a0, _edata
-	PTR_LA	a2, _end
-1:	sw	zero, 0(a0)
-	bne	a2, a0, 1b
-	 addiu	a0, a0, 4
-
-	PTR_LA	a0, (.heap)	     /* heap address */
-	PTR_LA	sp, (.stack + 8192)  /* stack address */
-
-	PTR_LA	ra, 2f
-	PTR_LA	k0, decompress_kernel
-	jr	k0
-	 nop
-2:
-	move	a0, s0
-	move	a1, s1
-	move	a2, s2
-	move	a3, s3
-	PTR_LI	k0, KERNEL_ENTRY
-	jr	k0
-	 nop
-3:
-	b	3b
-	 nop
-	END(start)
-
-	.comm .heap,BOOT_HEAP_SIZE,4
-	.comm .stack,4096*2,4
diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S
deleted file mode 100644
index 0a7c9834b81c19bbc6314bf0e14b00c6144d5597..0000000000000000000000000000000000000000
--- a/arch/mips/cavium-octeon/octeon-memcpy.S
+++ /dev/null
@@ -1,482 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Unified implementation of memcpy, memmove and the __copy_user backend.
- *
- * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org)
- * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
- * Copyright (C) 2002 Broadcom, Inc.
- *   memcpy/copy_user author: Mark Vandevoorde
- *
- * Mnemonic names for arguments to memcpy/__copy_user
- */
-
-#include <asm/asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/export.h>
-#include <asm/regdef.h>
-
-#define dst a0
-#define src a1
-#define len a2
-
-/*
- * Spec
- *
- * memcpy copies len bytes from src to dst and sets v0 to dst.
- * It assumes that
- *   - src and dst don't overlap
- *   - src is readable
- *   - dst is writable
- * memcpy uses the standard calling convention
- *
- * __copy_user copies up to len bytes from src to dst and sets a2 (len) to
- * the number of uncopied bytes due to an exception caused by a read or write.
- * __copy_user assumes that src and dst don't overlap, and that the call is
- * implementing one of the following:
- *   copy_to_user
- *     - src is readable  (no exceptions when reading src)
- *   copy_from_user
- *     - dst is writable  (no exceptions when writing dst)
- * __copy_user uses a non-standard calling convention; see
- * arch/mips/include/asm/uaccess.h
- *
- * When an exception happens on a load, the handler must
- # ensure that all of the destination buffer is overwritten to prevent
- * leaking information to user mode programs.
- */
-
-/*
- * Implementation
- */
-
-/*
- * The exception handler for loads requires that:
- *  1- AT contain the address of the byte just past the end of the source
- *     of the copy,
- *  2- src_entry <= src < AT, and
- *  3- (dst - src) == (dst_entry - src_entry),
- * The _entry suffix denotes values when __copy_user was called.
- *
- * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user
- * (2) is met by incrementing src by the number of bytes copied
- * (3) is met by not doing loads between a pair of increments of dst and src
- *
- * The exception handlers for stores adjust len (if necessary) and return.
- * These handlers do not need to overwrite any data.
- *
- * For __rmemcpy and memmove an exception is always a kernel bug, therefore
- * they're not protected.
- */
-
-#define EXC(inst_reg,addr,handler)		\
-9:	inst_reg, addr;				\
-	.section __ex_table,"a";		\
-	PTR	9b, handler;			\
-	.previous
-
-/*
- * Only on the 64-bit kernel we can made use of 64-bit registers.
- */
-
-#define LOAD   ld
-#define LOADL  ldl
-#define LOADR  ldr
-#define STOREL sdl
-#define STORER sdr
-#define STORE  sd
-#define ADD    daddu
-#define SUB    dsubu
-#define SRL    dsrl
-#define SRA    dsra
-#define SLL    dsll
-#define SLLV   dsllv
-#define SRLV   dsrlv
-#define NBYTES 8
-#define LOG_NBYTES 3
-
-/*
- * As we are sharing code base with the mips32 tree (which use the o32 ABI
- * register definitions). We need to redefine the register definitions from
- * the n64 ABI register naming to the o32 ABI register naming.
- */
-#undef t0
-#undef t1
-#undef t2
-#undef t3
-#define t0	$8
-#define t1	$9
-#define t2	$10
-#define t3	$11
-#define t4	$12
-#define t5	$13
-#define t6	$14
-#define t7	$15
-
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-#define LDFIRST LOADR
-#define LDREST	LOADL
-#define STFIRST STORER
-#define STREST	STOREL
-#define SHIFT_DISCARD SLLV
-#else
-#define LDFIRST LOADL
-#define LDREST	LOADR
-#define STFIRST STOREL
-#define STREST	STORER
-#define SHIFT_DISCARD SRLV
-#endif
-
-#define FIRST(unit) ((unit)*NBYTES)
-#define REST(unit)  (FIRST(unit)+NBYTES-1)
-#define UNIT(unit)  FIRST(unit)
-
-#define ADDRMASK (NBYTES-1)
-
-	.text
-	.set	noreorder
-	.set	noat
-
-/*
- * A combined memcpy/__copy_user
- * __copy_user sets len to 0 for success; else to an upper bound of
- * the number of uncopied bytes.
- * memcpy sets v0 to dst.
- */
-	.align	5
-LEAF(memcpy)					/* a0=dst a1=src a2=len */
-EXPORT_SYMBOL(memcpy)
-	move	v0, dst				/* return value */
-__memcpy:
-FEXPORT(__copy_user)
-EXPORT_SYMBOL(__copy_user)
-	/*
-	 * Note: dst & src may be unaligned, len may be 0
-	 * Temps
-	 */
-	#
-	# Octeon doesn't care if the destination is unaligned. The hardware
-	# can fix it faster than we can special case the assembly.
-	#
-	pref	0, 0(src)
-	sltu	t0, len, NBYTES		# Check if < 1 word
-	bnez	t0, copy_bytes_checklen
-	 and	t0, src, ADDRMASK	# Check if src unaligned
-	bnez	t0, src_unaligned
-	 sltu	t0, len, 4*NBYTES	# Check if < 4 words
-	bnez	t0, less_than_4units
-	 sltu	t0, len, 8*NBYTES	# Check if < 8 words
-	bnez	t0, less_than_8units
-	 sltu	t0, len, 16*NBYTES	# Check if < 16 words
-	bnez	t0, cleanup_both_aligned
-	 sltu	t0, len, 128+1		# Check if len < 129
-	bnez	t0, 1f			# Skip prefetch if len is too short
-	 sltu	t0, len, 256+1		# Check if len < 257
-	bnez	t0, 1f			# Skip prefetch if len is too short
-	 pref	0, 128(src)		# We must not prefetch invalid addresses
-	#
-	# This is where we loop if there is more than 128 bytes left
-2:	pref	0, 256(src)		# We must not prefetch invalid addresses
-	#
-	# This is where we loop if we can't prefetch anymore
-1:
-EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
-EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
-EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
-EXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
-	SUB	len, len, 16*NBYTES
-EXC(	STORE	t0, UNIT(0)(dst),	s_exc_p16u)
-EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p15u)
-EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p14u)
-EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p13u)
-EXC(	LOAD	t0, UNIT(4)(src),	l_exc_copy)
-EXC(	LOAD	t1, UNIT(5)(src),	l_exc_copy)
-EXC(	LOAD	t2, UNIT(6)(src),	l_exc_copy)
-EXC(	LOAD	t3, UNIT(7)(src),	l_exc_copy)
-EXC(	STORE	t0, UNIT(4)(dst),	s_exc_p12u)
-EXC(	STORE	t1, UNIT(5)(dst),	s_exc_p11u)
-EXC(	STORE	t2, UNIT(6)(dst),	s_exc_p10u)
-	ADD	src, src, 16*NBYTES
-EXC(	STORE	t3, UNIT(7)(dst),	s_exc_p9u)
-	ADD	dst, dst, 16*NBYTES
-EXC(	LOAD	t0, UNIT(-8)(src),	l_exc_copy_rewind16)
-EXC(	LOAD	t1, UNIT(-7)(src),	l_exc_copy_rewind16)
-EXC(	LOAD	t2, UNIT(-6)(src),	l_exc_copy_rewind16)
-EXC(	LOAD	t3, UNIT(-5)(src),	l_exc_copy_rewind16)
-EXC(	STORE	t0, UNIT(-8)(dst),	s_exc_p8u)
-EXC(	STORE	t1, UNIT(-7)(dst),	s_exc_p7u)
-EXC(	STORE	t2, UNIT(-6)(dst),	s_exc_p6u)
-EXC(	STORE	t3, UNIT(-5)(dst),	s_exc_p5u)
-EXC(	LOAD	t0, UNIT(-4)(src),	l_exc_copy_rewind16)
-EXC(	LOAD	t1, UNIT(-3)(src),	l_exc_copy_rewind16)
-EXC(	LOAD	t2, UNIT(-2)(src),	l_exc_copy_rewind16)
-EXC(	LOAD	t3, UNIT(-1)(src),	l_exc_copy_rewind16)
-EXC(	STORE	t0, UNIT(-4)(dst),	s_exc_p4u)
-EXC(	STORE	t1, UNIT(-3)(dst),	s_exc_p3u)
-EXC(	STORE	t2, UNIT(-2)(dst),	s_exc_p2u)
-EXC(	STORE	t3, UNIT(-1)(dst),	s_exc_p1u)
-	sltu	t0, len, 256+1		# See if we can prefetch more
-	beqz	t0, 2b
-	 sltu	t0, len, 128		# See if we can loop more time
-	beqz	t0, 1b
-	 nop
-	#
-	# Jump here if there are less than 16*NBYTES left.
-	#
-cleanup_both_aligned:
-	beqz	len, done
-	 sltu	t0, len, 8*NBYTES
-	bnez	t0, less_than_8units
-	 nop
-EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
-EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
-EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
-EXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
-	SUB	len, len, 8*NBYTES
-EXC(	STORE	t0, UNIT(0)(dst),	s_exc_p8u)
-EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p7u)
-EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p6u)
-EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p5u)
-EXC(	LOAD	t0, UNIT(4)(src),	l_exc_copy)
-EXC(	LOAD	t1, UNIT(5)(src),	l_exc_copy)
-EXC(	LOAD	t2, UNIT(6)(src),	l_exc_copy)
-EXC(	LOAD	t3, UNIT(7)(src),	l_exc_copy)
-EXC(	STORE	t0, UNIT(4)(dst),	s_exc_p4u)
-EXC(	STORE	t1, UNIT(5)(dst),	s_exc_p3u)
-EXC(	STORE	t2, UNIT(6)(dst),	s_exc_p2u)
-EXC(	STORE	t3, UNIT(7)(dst),	s_exc_p1u)
-	ADD	src, src, 8*NBYTES
-	beqz	len, done
-	 ADD	dst, dst, 8*NBYTES
-	#
-	# Jump here if there are less than 8*NBYTES left.
-	#
-less_than_8units:
-	sltu	t0, len, 4*NBYTES
-	bnez	t0, less_than_4units
-	 nop
-EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
-EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
-EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
-EXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
-	SUB	len, len, 4*NBYTES
-EXC(	STORE	t0, UNIT(0)(dst),	s_exc_p4u)
-EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p3u)
-EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p2u)
-EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p1u)
-	ADD	src, src, 4*NBYTES
-	beqz	len, done
-	 ADD	dst, dst, 4*NBYTES
-	#
-	# Jump here if there are less than 4*NBYTES left. This means
-	# we may need to copy up to 3 NBYTES words.
-	#
-less_than_4units:
-	sltu	t0, len, 1*NBYTES
-	bnez	t0, copy_bytes_checklen
-	 nop
-	#
-	# 1) Copy NBYTES, then check length again
-	#
-EXC(	LOAD	t0, 0(src),		l_exc)
-	SUB	len, len, NBYTES
-	sltu	t1, len, 8
-EXC(	STORE	t0, 0(dst),		s_exc_p1u)
-	ADD	src, src, NBYTES
-	bnez	t1, copy_bytes_checklen
-	 ADD	dst, dst, NBYTES
-	#
-	# 2) Copy NBYTES, then check length again
-	#
-EXC(	LOAD	t0, 0(src),		l_exc)
-	SUB	len, len, NBYTES
-	sltu	t1, len, 8
-EXC(	STORE	t0, 0(dst),		s_exc_p1u)
-	ADD	src, src, NBYTES
-	bnez	t1, copy_bytes_checklen
-	 ADD	dst, dst, NBYTES
-	#
-	# 3) Copy NBYTES, then check length again
-	#
-EXC(	LOAD	t0, 0(src),		l_exc)
-	SUB	len, len, NBYTES
-	ADD	src, src, NBYTES
-	ADD	dst, dst, NBYTES
-	b copy_bytes_checklen
-EXC(	 STORE	t0, -8(dst),		s_exc_p1u)
-
-src_unaligned:
-#define rem t8
-	SRL	t0, len, LOG_NBYTES+2	 # +2 for 4 units/iter
-	beqz	t0, cleanup_src_unaligned
-	 and	rem, len, (4*NBYTES-1)	 # rem = len % 4*NBYTES
-1:
-/*
- * Avoid consecutive LD*'s to the same register since some mips
- * implementations can't issue them in the same cycle.
- * It's OK to load FIRST(N+1) before REST(N) because the two addresses
- * are to the same unit (unless src is aligned, but it's not).
- */
-EXC(	LDFIRST t0, FIRST(0)(src),	l_exc)
-EXC(	LDFIRST t1, FIRST(1)(src),	l_exc_copy)
-	SUB	len, len, 4*NBYTES
-EXC(	LDREST	t0, REST(0)(src),	l_exc_copy)
-EXC(	LDREST	t1, REST(1)(src),	l_exc_copy)
-EXC(	LDFIRST t2, FIRST(2)(src),	l_exc_copy)
-EXC(	LDFIRST t3, FIRST(3)(src),	l_exc_copy)
-EXC(	LDREST	t2, REST(2)(src),	l_exc_copy)
-EXC(	LDREST	t3, REST(3)(src),	l_exc_copy)
-	ADD	src, src, 4*NBYTES
-EXC(	STORE	t0, UNIT(0)(dst),	s_exc_p4u)
-EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p3u)
-EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p2u)
-EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p1u)
-	bne	len, rem, 1b
-	 ADD	dst, dst, 4*NBYTES
-
-cleanup_src_unaligned:
-	beqz	len, done
-	 and	rem, len, NBYTES-1  # rem = len % NBYTES
-	beq	rem, len, copy_bytes
-	 nop
-1:
-EXC(	LDFIRST t0, FIRST(0)(src),	l_exc)
-EXC(	LDREST	t0, REST(0)(src),	l_exc_copy)
-	SUB	len, len, NBYTES
-EXC(	STORE	t0, 0(dst),		s_exc_p1u)
-	ADD	src, src, NBYTES
-	bne	len, rem, 1b
-	 ADD	dst, dst, NBYTES
-
-copy_bytes_checklen:
-	beqz	len, done
-	 nop
-copy_bytes:
-	/* 0 < len < NBYTES  */
-#define COPY_BYTE(N)			\
-EXC(	lb	t0, N(src), l_exc);	\
-	SUB	len, len, 1;		\
-	beqz	len, done;		\
-EXC(	 sb	t0, N(dst), s_exc_p1)
-
-	COPY_BYTE(0)
-	COPY_BYTE(1)
-	COPY_BYTE(2)
-	COPY_BYTE(3)
-	COPY_BYTE(4)
-	COPY_BYTE(5)
-EXC(	lb	t0, NBYTES-2(src), l_exc)
-	SUB	len, len, 1
-	jr	ra
-EXC(	 sb	t0, NBYTES-2(dst), s_exc_p1)
-done:
-	jr	ra
-	 nop
-	END(memcpy)
-
-l_exc_copy_rewind16:
-	/* Rewind src and dst by 16*NBYTES for l_exc_copy */
-	SUB	src, src, 16*NBYTES
-	SUB	dst, dst, 16*NBYTES
-l_exc_copy:
-	/*
-	 * Copy bytes from src until faulting load address (or until a
-	 * lb faults)
-	 *
-	 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
-	 * may be more than a byte beyond the last address.
-	 * Hence, the lb below may get an exception.
-	 *
-	 * Assumes src < THREAD_BUADDR($28)
-	 */
-	LOAD	t0, TI_TASK($28)
-	LOAD	t0, THREAD_BUADDR(t0)
-1:
-EXC(	lb	t1, 0(src),	l_exc)
-	ADD	src, src, 1
-	sb	t1, 0(dst)	# can't fault -- we're copy_from_user
-	bne	src, t0, 1b
-	 ADD	dst, dst, 1
-l_exc:
-	LOAD	t0, TI_TASK($28)
-	LOAD	t0, THREAD_BUADDR(t0)	# t0 is just past last good address
-	SUB	len, AT, t0		# len number of uncopied bytes
-	jr	ra
-	 nop
-
-
-#define SEXC(n)				\
-s_exc_p ## n ## u:			\
-	jr	ra;			\
-	 ADD	len, len, n*NBYTES
-
-SEXC(16)
-SEXC(15)
-SEXC(14)
-SEXC(13)
-SEXC(12)
-SEXC(11)
-SEXC(10)
-SEXC(9)
-SEXC(8)
-SEXC(7)
-SEXC(6)
-SEXC(5)
-SEXC(4)
-SEXC(3)
-SEXC(2)
-SEXC(1)
-
-s_exc_p1:
-	jr	ra
-	 ADD	len, len, 1
-s_exc:
-	jr	ra
-	 nop
-
-	.align	5
-LEAF(memmove)
-EXPORT_SYMBOL(memmove)
-	ADD	t0, a0, a2
-	ADD	t1, a1, a2
-	sltu	t0, a1, t0			# dst + len <= src -> memcpy
-	sltu	t1, a0, t1			# dst >= src + len -> memcpy
-	and	t0, t1
-	beqz	t0, __memcpy
-	 move	v0, a0				/* return value */
-	beqz	a2, r_out
-	END(memmove)
-
-	/* fall through to __rmemcpy */
-LEAF(__rmemcpy)					/* a0=dst a1=src a2=len */
-	 sltu	t0, a1, a0
-	beqz	t0, r_end_bytes_up		# src >= dst
-	 nop
-	ADD	a0, a2				# dst = dst + len
-	ADD	a1, a2				# src = src + len
-
-r_end_bytes:
-	lb	t0, -1(a1)
-	SUB	a2, a2, 0x1
-	sb	t0, -1(a0)
-	SUB	a1, a1, 0x1
-	bnez	a2, r_end_bytes
-	 SUB	a0, a0, 0x1
-
-r_out:
-	jr	ra
-	 move	a2, zero
-
-r_end_bytes_up:
-	lb	t0, (a1)
-	SUB	a2, a2, 0x1
-	sb	t0, (a0)
-	ADD	a1, a1, 0x1
-	bnez	a2, r_end_bytes_up
-	 ADD	a0, a0, 0x1
-
-	jr	ra
-	 move	a2, zero
-	END(__rmemcpy)
diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S
deleted file mode 100644
index a25ef822e7250d554757585051336f9bace86225..0000000000000000000000000000000000000000
--- a/arch/mips/dec/int-handler.S
+++ /dev/null
@@ -1,311 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 1995, 1996, 1997 Paul M. Antoine and Harald Koerfgen
- * Copyright (C) 2000, 2001, 2002, 2003, 2005  Maciej W. Rozycki
- *
- * Written by Ralf Baechle and Andreas Busse, modified for DECstation
- * support by Paul Antoine and Harald Koerfgen.
- *
- * completely rewritten:
- * Copyright (C) 1998 Harald Koerfgen
- *
- * Rewritten extensively for controller-driven IRQ support
- * by Maciej W. Rozycki.
- */
-
-#include <asm/addrspace.h>
-#include <asm/asm.h>
-#include <asm/mipsregs.h>
-#include <asm/regdef.h>
-#include <asm/stackframe.h>
-
-#include <asm/dec/interrupts.h>
-#include <asm/dec/ioasic_addrs.h>
-#include <asm/dec/ioasic_ints.h>
-#include <asm/dec/kn01.h>
-#include <asm/dec/kn02.h>
-#include <asm/dec/kn02xa.h>
-#include <asm/dec/kn03.h>
-
-#define KN02_CSR_BASE		CKSEG1ADDR(KN02_SLOT_BASE + KN02_CSR)
-#define KN02XA_IOASIC_BASE	CKSEG1ADDR(KN02XA_SLOT_BASE + IOASIC_IOCTL)
-#define KN03_IOASIC_BASE	CKSEG1ADDR(KN03_SLOT_BASE + IOASIC_IOCTL)
-
-		.text
-		.set	noreorder
-/*
- * plat_irq_dispatch: Interrupt handler for DECstations
- *
- * We follow the model in the Indy interrupt code by David Miller, where he
- * says: a lot of complication here is taken away because:
- *
- * 1) We handle one interrupt and return, sitting in a loop
- *    and moving across all the pending IRQ bits in the cause
- *    register is _NOT_ the answer, the common case is one
- *    pending IRQ so optimize in that direction.
- *
- * 2) We need not check against bits in the status register
- *    IRQ mask, that would make this routine slow as hell.
- *
- * 3) Linux only thinks in terms of all IRQs on or all IRQs
- *    off, nothing in between like BSD spl() brain-damage.
- *
- * Furthermore, the IRQs on the DECstations look basically (barring
- * software IRQs which we don't use at all) like...
- *
- * DS2100/3100's, aka kn01, aka Pmax:
- *
- *	MIPS IRQ	Source
- *	--------	------
- *	       0	Software (ignored)
- *	       1	Software (ignored)
- *	       2	SCSI
- *	       3	Lance Ethernet
- *	       4	DZ11 serial
- *	       5	RTC
- *	       6	Memory Controller & Video
- *	       7	FPU
- *
- * DS5000/200, aka kn02, aka 3max:
- *
- *	MIPS IRQ	Source
- *	--------	------
- *	       0	Software (ignored)
- *	       1	Software (ignored)
- *	       2	TurboChannel
- *	       3	RTC
- *	       4	Reserved
- *	       5	Memory Controller
- *	       6	Reserved
- *	       7	FPU
- *
- * DS5000/1xx's, aka kn02ba, aka 3min:
- *
- *	MIPS IRQ	Source
- *	--------	------
- *	       0	Software (ignored)
- *	       1	Software (ignored)
- *	       2	TurboChannel Slot 0
- *	       3	TurboChannel Slot 1
- *	       4	TurboChannel Slot 2
- *	       5	TurboChannel Slot 3 (ASIC)
- *	       6	Halt button
- *	       7	FPU/R4k timer
- *
- * DS5000/2x's, aka kn02ca, aka maxine:
- *
- *	MIPS IRQ	Source
- *	--------	------
- *	       0	Software (ignored)
- *	       1	Software (ignored)
- *	       2	Periodic Interrupt (100usec)
- *	       3	RTC
- *	       4	I/O write timeout
- *	       5	TurboChannel (ASIC)
- *	       6	Halt Keycode from Access.Bus keyboard (CTRL-ALT-ENTER)
- *	       7	FPU/R4k timer
- *
- * DS5000/2xx's, aka kn03, aka 3maxplus:
- *
- *	MIPS IRQ	Source
- *	--------	------
- *	       0	Software (ignored)
- *	       1	Software (ignored)
- *	       2	System Board (ASIC)
- *	       3	RTC
- *	       4	Reserved
- *	       5	Memory
- *	       6	Halt Button
- *	       7	FPU/R4k timer
- *
- * We handle the IRQ according to _our_ priority (see setup.c),
- * then we just return.  If multiple IRQs are pending then we will
- * just take another exception, big deal.
- */
-		.align	5
-		NESTED(plat_irq_dispatch, PT_SIZE, ra)
-		.set	noreorder
-
-		/*
-		 * Get pending Interrupts
-		 */
-		mfc0	t0,CP0_CAUSE		# get pending interrupts
-		mfc0	t1,CP0_STATUS
-#ifdef CONFIG_32BIT
-		lw	t2,cpu_fpu_mask
-#endif
-		andi	t0,ST0_IM		# CAUSE.CE may be non-zero!
-		and	t0,t1			# isolate allowed ones
-
-		beqz	t0,spurious
-
-#ifdef CONFIG_32BIT
-		 and	t2,t0
-		bnez	t2,fpu			# handle FPU immediately
-#endif
-
-		/*
-		 * Find irq with highest priority
-		 */
-		# open coded PTR_LA t1, cpu_mask_nr_tbl
-#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32)
-		# open coded la t1, cpu_mask_nr_tbl
-		lui	t1, %hi(cpu_mask_nr_tbl)
-		addiu	t1, %lo(cpu_mask_nr_tbl)
-#else
-#error GCC `-msym32' option required for 64-bit DECstation builds
-#endif
-1:		lw	t2,(t1)
-		nop
-		and	t2,t0
-		beqz	t2,1b
-		 addu	t1,2*PTRSIZE		# delay slot
-
-		/*
-		 * Do the low-level stuff
-		 */
-		lw	a0,(-PTRSIZE)(t1)
-		nop
-		bgez	a0,handle_it		# irq_nr >= 0?
-						# irq_nr < 0: it is an address
-		 nop
-		jr	a0
-						# a trick to save a branch:
-		 lui	t2,(KN03_IOASIC_BASE>>16)&0xffff
-						# upper part of IOASIC Address
-
-/*
- * Handle "IRQ Controller" Interrupts
- * Masked Interrupts are still visible and have to be masked "by hand".
- */
-		FEXPORT(kn02_io_int)		# 3max
-		lui	t0,(KN02_CSR_BASE>>16)&0xffff
-						# get interrupt status and mask
-		lw	t0,(t0)
-		nop
-		andi	t1,t0,KN02_IRQ_ALL
-		b	1f
-		 srl	t0,16			# shift interrupt mask
-
-		FEXPORT(kn02xa_io_int)		# 3min/maxine
-		lui	t2,(KN02XA_IOASIC_BASE>>16)&0xffff
-						# upper part of IOASIC Address
-
-		FEXPORT(kn03_io_int)		# 3max+ (t2 loaded earlier)
-		lw	t0,IO_REG_SIR(t2)	# get status: IOASIC sir
-		lw	t1,IO_REG_SIMR(t2)	# get mask:   IOASIC simr
-		nop
-
-1:		and	t0,t1			# mask out allowed ones
-
-		beqz	t0,spurious
-
-		/*
-		 * Find irq with highest priority
-		 */
-		# open coded PTR_LA t1,asic_mask_nr_tbl
-#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32)
-		# open coded la t1, asic_mask_nr_tbl
-		lui	t1, %hi(asic_mask_nr_tbl)
-		addiu	t1, %lo(asic_mask_nr_tbl)
-#else
-#error GCC `-msym32' option required for 64-bit DECstation builds
-#endif
-2:		lw	t2,(t1)
-		nop
-		and	t2,t0
-		beq	zero,t2,2b
-		 addu	t1,2*PTRSIZE		# delay slot
-
-		/*
-		 * Do the low-level stuff
-		 */
-		lw	a0,%lo(-PTRSIZE)(t1)
-		nop
-		bgez	a0,handle_it		# irq_nr >= 0?
-						# irq_nr < 0: it is an address
-		 nop
-		jr	a0
-		 nop				# delay slot
-
-/*
- * Dispatch low-priority interrupts.  We reconsider all status
- * bits again, which looks like a lose, but it makes the code
- * simple and O(log n), so it gets compensated.
- */
-		FEXPORT(cpu_all_int)		# HALT, timers, software junk
-		li	a0,DEC_CPU_IRQ_BASE
-		srl	t0,CAUSEB_IP
-		li	t1,CAUSEF_IP>>CAUSEB_IP	# mask
-		b	1f
-		 li	t2,4			# nr of bits / 2
-
-		FEXPORT(kn02_all_int)		# impossible ?
-		li	a0,KN02_IRQ_BASE
-		li	t1,KN02_IRQ_ALL		# mask
-		b	1f
-		 li	t2,4			# nr of bits / 2
-
-		FEXPORT(asic_all_int)		# various I/O ASIC junk
-		li	a0,IO_IRQ_BASE
-		li	t1,IO_IRQ_ALL		# mask
-		b	1f
-		 li	t2,8			# nr of bits / 2
-
-/*
- * Dispatch DMA interrupts -- O(log n).
- */
-		FEXPORT(asic_dma_int)		# I/O ASIC DMA events
-		li	a0,IO_IRQ_BASE+IO_INR_DMA
-		srl	t0,IO_INR_DMA
-		li	t1,IO_IRQ_DMA>>IO_INR_DMA # mask
-		li	t2,8			# nr of bits / 2
-
-		/*
-		 * Find irq with highest priority.
-		 * Highest irq number takes precedence.
-		 */
-1:		srlv	t3,t1,t2
-2:		xor	t1,t3
-		and	t3,t0,t1
-		beqz	t3,3f
-		 nop
-		move	t0,t3
-		addu	a0,t2
-3:		srl	t2,1
-		bnez	t2,2b
-		 srlv	t3,t1,t2
-
-handle_it:
-		j	dec_irq_dispatch
-		 nop
-
-#ifdef CONFIG_32BIT
-fpu:
-		lw	t0,fpu_kstat_irq
-		nop
-		lw	t1,(t0)
-		nop
-		addu	t1,1
-		j	handle_fpe_int
-		 sw	t1,(t0)
-#endif
-
-spurious:
-		j	spurious_interrupt
-		 nop
-		END(plat_irq_dispatch)
-
-/*
- * Generic unimplemented interrupt routines -- cpu_mask_nr_tbl
- * and asic_mask_nr_tbl are initialized to point all interrupts here.
- * The tables are then filled in by machine-specific initialisation
- * in dec_setup().
- */
-		FEXPORT(dec_intr_unimplemented)
-		move	a1,t0			# cheats way of printing an arg!
-		PANIC("Unimplemented cpu interrupt! CP0_CAUSE: 0x%08x");
-
-		FEXPORT(asic_intr_unimplemented)
-		move	a1,t0			# cheats way of printing an arg!
-		PANIC("Unimplemented asic interrupt! ASIC ISR: 0x%08x");
diff --git a/arch/mips/dec/prom/locore.S b/arch/mips/dec/prom/locore.S
deleted file mode 100644
index 0eb8fab62ab04613521db591834fc146124e8702..0000000000000000000000000000000000000000
--- a/arch/mips/dec/prom/locore.S
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * locore.S
- */
-#include <asm/asm.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-
-	.text
-
-/*
- * Simple general exception handling routine. This one is used for the
- * Memory sizing routine for pmax machines. HK
- */
-
-NESTED(genexcept_early, 0, sp)
-	.set	noat
-	.set	noreorder
-
-	mfc0	k0, CP0_STATUS
-	la	k1, mem_err
-
-	sw	k0, 0(k1)
-
-	mfc0	k0, CP0_EPC
-	nop
-	addiu	k0, 4		# skip the causing instruction
-	jr	k0
-	 rfe
-END(genexcept_early)
diff --git a/arch/mips/fw/lib/call_o32.S b/arch/mips/fw/lib/call_o32.S
deleted file mode 100644
index ee856709e0b600aa1949b389a98f6952e23cffa9..0000000000000000000000000000000000000000
--- a/arch/mips/fw/lib/call_o32.S
+++ /dev/null
@@ -1,104 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *	O32 interface for the 64 (or N32) ABI.
- *
- *	Copyright (C) 2002, 2014  Maciej W. Rozycki
- */
-
-#include <asm/asm.h>
-#include <asm/regdef.h>
-
-/* O32 register size.  */
-#define O32_SZREG	4
-/* Maximum number of arguments supported.  Must be even!  */
-#define O32_ARGC	32
-/* Number of static registers we save.  */
-#define O32_STATC	11
-/* Argument area frame size.  */
-#define O32_ARGSZ	(O32_SZREG * O32_ARGC)
-/* Static register save area frame size.  */
-#define O32_STATSZ	(SZREG * O32_STATC)
-/* Stack pointer register save area frame size.  */
-#define O32_SPSZ	SZREG
-/* Combined area frame size.  */
-#define O32_FRAMESZ	(O32_ARGSZ + O32_SPSZ + O32_STATSZ)
-/* Switched stack frame size.  */
-#define O32_NFRAMESZ	(O32_ARGSZ + O32_SPSZ)
-
-		.text
-
-/*
- * O32 function call dispatcher, for interfacing 32-bit ROM routines.
- *
- * The standard 64 (N32) calling sequence is supported, with a0 holding
- * a function pointer, a1 a pointer to the new stack to call the
- * function with or 0 if no stack switching is requested, a2-a7 -- the
- * function call's first six arguments, and the stack -- the remaining
- * arguments (up to O32_ARGC, including a2-a7).  Static registers, gp
- * and fp are preserved, v0 holds the result.  This code relies on the
- * called o32 function for sp and ra restoration and this dispatcher has
- * to be placed in a KSEGx (or KUSEG) address space.  Any pointers
- * passed have to point to addresses within one of these spaces as well.
- */
-NESTED(call_o32, O32_FRAMESZ, ra)
-		REG_SUBU	sp,O32_FRAMESZ
-
-		REG_S		ra,O32_FRAMESZ-1*SZREG(sp)
-		REG_S		fp,O32_FRAMESZ-2*SZREG(sp)
-		REG_S		gp,O32_FRAMESZ-3*SZREG(sp)
-		REG_S		s7,O32_FRAMESZ-4*SZREG(sp)
-		REG_S		s6,O32_FRAMESZ-5*SZREG(sp)
-		REG_S		s5,O32_FRAMESZ-6*SZREG(sp)
-		REG_S		s4,O32_FRAMESZ-7*SZREG(sp)
-		REG_S		s3,O32_FRAMESZ-8*SZREG(sp)
-		REG_S		s2,O32_FRAMESZ-9*SZREG(sp)
-		REG_S		s1,O32_FRAMESZ-10*SZREG(sp)
-		REG_S		s0,O32_FRAMESZ-11*SZREG(sp)
-
-		move		jp,a0
-
-		move		fp,sp
-		beqz		a1,0f
-		REG_SUBU	fp,a1,O32_NFRAMESZ
-0:
-		REG_S		sp,O32_NFRAMESZ-1*SZREG(fp)
-
-		sll		a0,a2,zero
-		sll		a1,a3,zero
-		sll		a2,a4,zero
-		sll		a3,a5,zero
-		sw		a6,4*O32_SZREG(fp)
-		sw		a7,5*O32_SZREG(fp)
-
-		PTR_LA		t0,O32_FRAMESZ(sp)
-		PTR_LA		t1,6*O32_SZREG(fp)
-		li		t2,O32_ARGC-6
-1:
-		lw		t3,(t0)
-		REG_ADDU	t0,SZREG
-		sw		t3,(t1)
-		REG_SUBU	t2,1
-		REG_ADDU	t1,O32_SZREG
-		bnez		t2,1b
-
-		move		sp,fp
-
-		jalr		jp
-
-		REG_L		sp,O32_NFRAMESZ-1*SZREG(sp)
-
-		REG_L		s0,O32_FRAMESZ-11*SZREG(sp)
-		REG_L		s1,O32_FRAMESZ-10*SZREG(sp)
-		REG_L		s2,O32_FRAMESZ-9*SZREG(sp)
-		REG_L		s3,O32_FRAMESZ-8*SZREG(sp)
-		REG_L		s4,O32_FRAMESZ-7*SZREG(sp)
-		REG_L		s5,O32_FRAMESZ-6*SZREG(sp)
-		REG_L		s6,O32_FRAMESZ-5*SZREG(sp)
-		REG_L		s7,O32_FRAMESZ-4*SZREG(sp)
-		REG_L		gp,O32_FRAMESZ-3*SZREG(sp)
-		REG_L		fp,O32_FRAMESZ-2*SZREG(sp)
-		REG_L		ra,O32_FRAMESZ-1*SZREG(sp)
-
-		REG_ADDU	sp,O32_FRAMESZ
-		jr		ra
-END(call_o32)
diff --git a/arch/mips/generic/board-boston.its.S b/arch/mips/generic/board-boston.its.S
deleted file mode 100644
index a7f51f97b9102f79dd4e75fa1894ee42e24e099e..0000000000000000000000000000000000000000
--- a/arch/mips/generic/board-boston.its.S
+++ /dev/null
@@ -1,22 +0,0 @@
-/ {
-	images {
-		fdt@boston {
-			description = "img,boston Device Tree";
-			data = /incbin/("boot/dts/img/boston.dtb");
-			type = "flat_dt";
-			arch = "mips";
-			compression = "none";
-			hash@0 {
-				algo = "sha1";
-			};
-		};
-	};
-
-	configurations {
-		conf@boston {
-			description = "Boston Linux kernel";
-			kernel = "kernel@0";
-			fdt = "fdt@boston";
-		};
-	};
-};
diff --git a/arch/mips/generic/board-ni169445.its.S b/arch/mips/generic/board-ni169445.its.S
deleted file mode 100644
index e4cb4f95a8cc1015f31d63570c962703b7fddca8..0000000000000000000000000000000000000000
--- a/arch/mips/generic/board-ni169445.its.S
+++ /dev/null
@@ -1,22 +0,0 @@
-/ {
-	images {
-		fdt@ni169445 {
-			description = "NI 169445 device tree";
-			data = /incbin/("boot/dts/ni/169445.dtb");
-			type = "flat_dt";
-			arch = "mips";
-			compression = "none";
-			hash@0 {
-				algo = "sha1";
-			};
-		};
-	};
-
-	configurations {
-		conf@ni169445 {
-			description = "NI 169445 Linux Kernel";
-			kernel = "kernel@0";
-			fdt = "fdt@ni169445";
-		};
-	};
-};
diff --git a/arch/mips/generic/board-ocelot.its.S b/arch/mips/generic/board-ocelot.its.S
deleted file mode 100644
index 3da23988149a66e0fec419f2fee93108d4fce64a..0000000000000000000000000000000000000000
--- a/arch/mips/generic/board-ocelot.its.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
-/ {
-	images {
-		fdt@ocelot_pcb123 {
-			description = "MSCC Ocelot PCB123 Device Tree";
-			data = /incbin/("boot/dts/mscc/ocelot_pcb123.dtb");
-			type = "flat_dt";
-			arch = "mips";
-			compression = "none";
-			hash@0 {
-				algo = "sha1";
-			};
-		};
-
-		fdt@ocelot_pcb120 {
-			description = "MSCC Ocelot PCB120 Device Tree";
-			data = /incbin/("boot/dts/mscc/ocelot_pcb120.dtb");
-			type = "flat_dt";
-			arch = "mips";
-			compression = "none";
-			hash@0 {
-				algo = "sha1";
-			};
-		};
-	};
-
-	configurations {
-		conf@ocelot_pcb123 {
-			description = "Ocelot Linux kernel";
-			kernel = "kernel@0";
-			fdt = "fdt@ocelot_pcb123";
-		};
-
-		conf@ocelot_pcb120 {
-			description = "Ocelot Linux kernel";
-			kernel = "kernel@0";
-			fdt = "fdt@ocelot_pcb120";
-		};
-	};
-};
diff --git a/arch/mips/generic/board-xilfpga.its.S b/arch/mips/generic/board-xilfpga.its.S
deleted file mode 100644
index a2e773d3f14f411c51d9c4dd039e12a6b8288cd6..0000000000000000000000000000000000000000
--- a/arch/mips/generic/board-xilfpga.its.S
+++ /dev/null
@@ -1,22 +0,0 @@
-/ {
-	images {
-		fdt@xilfpga {
-			description = "MIPSfpga (xilfpga) Device Tree";
-			data = /incbin/("boot/dts/xilfpga/nexys4ddr.dtb");
-			type = "flat_dt";
-			arch = "mips";
-			compression = "none";
-			hash@0 {
-				algo = "sha1";
-			};
-		};
-	};
-
-	configurations {
-		conf@xilfpga {
-			description = "MIPSfpga Linux kernel";
-			kernel = "kernel@0";
-			fdt = "fdt@xilfpga";
-		};
-	};
-};
diff --git a/arch/mips/generic/vmlinux.its.S b/arch/mips/generic/vmlinux.its.S
deleted file mode 100644
index 1a08438fd8930c43eb543cc64bbc2ba787a1b939..0000000000000000000000000000000000000000
--- a/arch/mips/generic/vmlinux.its.S
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/dts-v1/;
-
-/ {
-	description = KERNEL_NAME;
-	#address-cells = <ADDR_CELLS>;
-
-	images {
-		kernel@0 {
-			description = KERNEL_NAME;
-			data = /incbin/(VMLINUX_BINARY);
-			type = "kernel";
-			arch = "mips";
-			os = "linux";
-			compression = VMLINUX_COMPRESSION;
-			load = /bits/ ADDR_BITS <VMLINUX_LOAD_ADDRESS>;
-			entry = /bits/ ADDR_BITS <VMLINUX_ENTRY_ADDRESS>;
-			hash@0 {
-				algo = "sha1";
-			};
-		};
-	};
-
-	configurations {
-		default = "conf@default";
-
-		conf@default {
-			description = "Generic Linux kernel";
-			kernel = "kernel@0";
-		};
-	};
-};
diff --git a/arch/mips/kernel/bmips_5xxx_init.S b/arch/mips/kernel/bmips_5xxx_init.S
deleted file mode 100644
index 9e422d186a176d37ea1d203ce4d956d58717956b..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/bmips_5xxx_init.S
+++ /dev/null
@@ -1,747 +0,0 @@
-
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2011-2012 by Broadcom Corporation
- *
- * Init for bmips 5000.
- * Used to init second core in dual core 5000's.
- */
-
-#include <linux/init.h>
-
-#include <asm/asm.h>
-#include <asm/asmmacro.h>
-#include <asm/cacheops.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-#include <asm/stackframe.h>
-#include <asm/addrspace.h>
-#include <asm/hazards.h>
-#include <asm/bmips.h>
-
-#ifdef CONFIG_CPU_BMIPS5000
-
-
-#define cacheop(kva, size, linesize, op) 	\
-	.set noreorder			;	\
-	addu		t1, kva, size	;	\
-	subu		t2, linesize, 1	;	\
-	not		t2		;	\
-	and		t0, kva, t2	;	\
-	addiu		t1, t1, -1	;	\
-	and		t1, t2		;	\
-9:	cache		op, 0(t0)	;	\
-	bne		t0, t1, 9b	;	\
-	 addu		t0, linesize	;	\
-	.set reorder			;
-
-
-
-#define	IS_SHIFT	22
-#define	IL_SHIFT	19
-#define	IA_SHIFT	16
-#define	DS_SHIFT	13
-#define	DL_SHIFT	10
-#define	DA_SHIFT	 7
-#define	IS_MASK		 7
-#define	IL_MASK		 7
-#define	IA_MASK		 7
-#define	DS_MASK		 7
-#define	DL_MASK		 7
-#define	DA_MASK		 7
-#define	ICE_MASK	0x80000000
-#define	DCE_MASK	0x40000000
-
-#define CP0_BRCM_CONFIG0	$22, 0
-#define CP0_BRCM_MODE		$22, 1
-#define	CP0_CONFIG_K0_MASK	7
-
-#define CP0_ICACHE_TAG_LO	$28
-#define CP0_ICACHE_DATA_LO	$28, 1
-#define CP0_DCACHE_TAG_LO	$28, 2
-#define CP0_D_SEC_CACHE_DATA_LO	$28, 3
-#define CP0_ICACHE_TAG_HI	$29
-#define CP0_ICACHE_DATA_HI	$29, 1
-#define CP0_DCACHE_TAG_HI	$29, 2
-
-#define CP0_BRCM_MODE_Luc_MASK		(1 << 11)
-#define	CP0_BRCM_CONFIG0_CWF_MASK	(1 << 20)
-#define	CP0_BRCM_CONFIG0_TSE_MASK	(1 << 19)
-#define CP0_BRCM_MODE_SET_MASK		(1 << 7)
-#define CP0_BRCM_MODE_ClkRATIO_MASK	(7 << 4)
-#define CP0_BRCM_MODE_BrPRED_MASK 	(3 << 24)
-#define CP0_BRCM_MODE_BrPRED_SHIFT	24
-#define CP0_BRCM_MODE_BrHIST_MASK 	(0x1f << 20)
-#define CP0_BRCM_MODE_BrHIST_SHIFT	20
-
-/* ZSC L2 Cache Register Access Register Definitions */
-#define BRCM_ZSC_ALL_REGS_SELECT		0x7 << 24
-
-#define BRCM_ZSC_CONFIG_REG			0 << 3
-#define BRCM_ZSC_REQ_BUFFER_REG			2 << 3
-#define BRCM_ZSC_RBUS_ADDR_MAPPING_REG0		4 << 3
-#define BRCM_ZSC_RBUS_ADDR_MAPPING_REG1		6 << 3
-#define BRCM_ZSC_RBUS_ADDR_MAPPING_REG2		8 << 3
-
-#define BRCM_ZSC_SCB0_ADDR_MAPPING_REG0		0xa << 3
-#define BRCM_ZSC_SCB0_ADDR_MAPPING_REG1		0xc << 3
-
-#define BRCM_ZSC_SCB1_ADDR_MAPPING_REG0		0xe << 3
-#define BRCM_ZSC_SCB1_ADDR_MAPPING_REG1		0x10 << 3
-
-#define BRCM_ZSC_CONFIG_LMB1En			1 << (15)
-#define BRCM_ZSC_CONFIG_LMB0En			1 << (14)
-
-/* branch predition values */
-
-#define BRCM_BrPRED_ALL_TAKEN		(0x0)
-#define BRCM_BrPRED_ALL_NOT_TAKEN	(0x1)
-#define BRCM_BrPRED_BHT_ENABLE		(0x2)
-#define BRCM_BrPRED_PREDICT_BACKWARD	(0x3)
-
-
-
-.align 2
-/*
- * Function: 	size_i_cache
- * Arguments: 	None
- * Returns:	v0 = i cache size, v1 = I cache line size
- * Description: compute the I-cache size and I-cache line size
- * Trashes:	v0, v1, a0, t0
- *
- *	pseudo code:
- *
- */
-
-LEAF(size_i_cache)
-	.set	noreorder
-
-	mfc0	a0, CP0_CONFIG, 1
-	move	t0, a0
-
-	/*
-	 * Determine sets per way: IS
-	 *
-	 * This field contains the number of sets (i.e., indices) per way of
-	 * the instruction cache:
-	 * i) 0x0: 64, ii) 0x1: 128, iii) 0x2: 256, iv) 0x3: 512, v) 0x4: 1k
-	 * vi) 0x5 - 0x7: Reserved.
-	 */
-
-	srl	a0, a0, IS_SHIFT
-	and	a0, a0, IS_MASK
-
-	/* sets per way = (64<<IS) */
-
-	li	v0, 0x40
-	sllv	v0, v0, a0
-
-	/*
-	 * Determine line size
-	 *
-	 * This field contains the line size of the instruction cache:
-	 * i) 0x0: No I-cache present, i) 0x3: 16 bytes, ii) 0x4: 32 bytes, iii)
-	 * 0x5: 64 bytes, iv) the rest: Reserved.
-	 */
-
-	move	a0, t0
-
-	srl	a0, a0, IL_SHIFT
-	and	a0, a0, IL_MASK
-
-	beqz	a0, no_i_cache
-	nop
-
-	/* line size = 2 ^ (IL+1) */
-
-	addi	a0, a0, 1
-	li	v1, 1
-	sll	v1, v1, a0
-
-	/* v0 now have sets per way, multiply it by line size now
-	 * that will give the set size
-	 */
-
-	sll	v0, v0, a0
-
-	/*
-	 * Determine set associativity
-	 *
-	 * This field contains the set associativity of the instruction cache.
-	 * i) 0x0: Direct mapped, ii) 0x1: 2-way, iii) 0x2: 3-way, iv) 0x3:
-	 * 4-way, v) 0x4 - 0x7: Reserved.
-	 */
-
-	move	a0, t0
-
-	srl	a0, a0, IA_SHIFT
-	and	a0, a0, IA_MASK
-	addi	a0, a0, 0x1
-
-	/* v0 has the set size, multiply it by
-	 * set associativiy, to get the cache size
-	 */
-
-	multu	v0, a0	/*multu is interlocked, so no need to insert nops */
-	mflo	v0
-	b	1f
-	nop
-
-no_i_cache:
-	move	v0, zero
-	move	v1, zero
-1:
-	jr	ra
-	nop
-	.set	reorder
-
-END(size_i_cache)
-
-/*
- * Function: 	size_d_cache
- * Arguments: 	None
- * Returns:	v0 = d cache size, v1 = d cache line size
- * Description: compute the D-cache size and D-cache line size.
- * Trashes:	v0, v1, a0, t0
- *
- */
-
-LEAF(size_d_cache)
-	.set	noreorder
-
-	mfc0	a0, CP0_CONFIG, 1
-	move	t0, a0
-
-	/*
-	 * Determine sets per way: IS
-	 *
-	 * This field contains the number of sets (i.e., indices) per way of
-	 * the instruction cache:
-	 * i) 0x0: 64, ii) 0x1: 128, iii) 0x2: 256, iv) 0x3: 512, v) 0x4: 1k
-	 * vi) 0x5 - 0x7: Reserved.
-	 */
-
-	srl	a0, a0, DS_SHIFT
-	and	a0, a0, DS_MASK
-
-	/* sets per way = (64<<IS) */
-
-	li	v0, 0x40
-	sllv	v0, v0, a0
-
-	/*
-	 * Determine line size
-	 *
-	 * This field contains the line size of the instruction cache:
-	 * i) 0x0: No I-cache present, i) 0x3: 16 bytes, ii) 0x4: 32 bytes, iii)
-	 * 0x5: 64 bytes, iv) the rest: Reserved.
-	 */
-	move	a0, t0
-
-	srl	a0, a0, DL_SHIFT
-	and	a0, a0, DL_MASK
-
-	beqz	a0, no_d_cache
-	nop
-
-	/* line size = 2 ^ (IL+1) */
-
-	addi	a0, a0, 1
-	li	v1, 1
-	sll	v1, v1, a0
-
-	/* v0 now have sets per way, multiply it by line size now
-	 * that will give the set size
-	 */
-
-	sll	v0, v0, a0
-
-	/* determine set associativity
-	 *
-	 * This field contains the set associativity of the instruction cache.
-	 * i) 0x0: Direct mapped, ii) 0x1: 2-way, iii) 0x2: 3-way, iv) 0x3:
-	 * 4-way, v) 0x4 - 0x7: Reserved.
-	 */
-
-	move	a0, t0
-
-	srl	a0, a0, DA_SHIFT
-	and	a0, a0, DA_MASK
-	addi	a0, a0, 0x1
-
-	/* v0 has the set size, multiply it by
-	 * set associativiy, to get the cache size
-	 */
-
-	multu	v0, a0	/*multu is interlocked, so no need to insert nops */
-	mflo	v0
-
-	b	1f
-	nop
-
-no_d_cache:
-	move	v0, zero
-	move	v1, zero
-1:
-	jr	ra
-	nop
-	.set	reorder
-
-END(size_d_cache)
-
-
-/*
- * Function: enable_ID
- * Arguments: 	None
- * Returns:	None
- * Description: Enable I and D caches, initialize I and D-caches, also set
- *		hardware delay for d-cache (TP0).
- * Trashes:	t0
- *
- */
-	.global	enable_ID
-	.ent	enable_ID
-	.set	noreorder
-enable_ID:
-	mfc0	t0, CP0_BRCM_CONFIG0
-	or	t0, t0, (ICE_MASK | DCE_MASK)
-	mtc0	t0, CP0_BRCM_CONFIG0
-	jr	ra
-	nop
-
-	.end	enable_ID
-	.set	reorder
-
-
-/*
- * Function: l1_init
- * Arguments: 	None
- * Returns:	None
- * Description: Enable I and D caches, and initialize I and D-caches
- * Trashes:	a0, v0, v1, t0, t1, t2, t8
- *
- */
-	.globl	l1_init
-	.ent	l1_init
-	.set	noreorder
-l1_init:
-
-	/* save return address */
-	move	t8, ra
-
-
-	/* initialize I and D cache Data and Tag registers.  */
-	mtc0	zero, CP0_ICACHE_TAG_LO
-	mtc0	zero, CP0_ICACHE_TAG_HI
-	mtc0	zero, CP0_ICACHE_DATA_LO
-	mtc0	zero, CP0_ICACHE_DATA_HI
-	mtc0	zero, CP0_DCACHE_TAG_LO
-	mtc0	zero, CP0_DCACHE_TAG_HI
-
-	/* Enable Caches before Clearing. If the caches are disabled
-	 * then the cache operations to clear the cache will be ignored
-	 */
-
-	jal	enable_ID
-	nop
-
-	jal	size_i_cache	/* v0 = i-cache size, v1 = i-cache line size */
-	nop
-
-	/* run uncached in kseg 1 */
-	la	k0, 1f
-	lui	k1, 0x2000
-	or	k0, k1, k0
-	jr	k0
-	nop
-1:
-
-	/*
-	 * set K0 cache mode
-	 */
-
-	mfc0	t0, CP0_CONFIG
-	and	t0, t0, ~CP0_CONFIG_K0_MASK
-	or	t0, t0, 3	/* Write Back mode */
-	mtc0	t0, CP0_CONFIG
-
-	/*
-	 * Initialize instruction cache.
-	 */
-
-	li	a0, KSEG0
-	cacheop(a0, v0, v1, Index_Store_Tag_I)
-
-	/*
-	 * Now we can run from I-$, kseg 0
-	 */
-	la	k0, 1f
-	lui	k1, 0x2000
-	or	k0, k1, k0
-	xor	k0, k1, k0
-	jr	k0
-	nop
-1:
-	/*
-	 * Initialize data cache.
-	 */
-
-	jal	size_d_cache	/* v0 = d-cache size, v1 = d-cache line size */
-	nop
-
-
-	li	a0, KSEG0
-	cacheop(a0, v0, v1, Index_Store_Tag_D)
-
-	jr	t8
-	nop
-
-	.end 	l1_init
-	.set	reorder
-
-
-/*
- * Function: 	set_other_config
- * Arguments:	none
- * Returns:	None
- * Description: initialize other remainder configuration to defaults.
- * Trashes:	t0, t1
- *
- *	pseudo code:
- *
- */
-LEAF(set_other_config)
-	.set noreorder
-
-	/* enable Bus error for I-fetch */
-	mfc0	t0, CP0_CACHEERR, 0
-	li	t1, 0x4
-	or	t0, t1
-	mtc0	t0, CP0_CACHEERR, 0
-
-	/* enable Bus error for Load */
-	mfc0	t0, CP0_CACHEERR, 1
-	li	t1, 0x4
-	or	t0, t1
-	mtc0	t0, CP0_CACHEERR, 1
-
-	/* enable Bus Error for Store */
-	mfc0	t0, CP0_CACHEERR, 2
-	li	t1, 0x4
-	or	t0, t1
-	mtc0	t0, CP0_CACHEERR, 2
-
-	jr	ra
-	nop
-	.set reorder
-END(set_other_config)
-
-/*
- * Function: 	set_branch_pred
- * Arguments:	none
- * Returns:	None
- * Description:
- * Trashes:	t0, t1
- *
- *	pseudo code:
- *
- */
-
-LEAF(set_branch_pred)
-	.set noreorder
-	mfc0	t0, CP0_BRCM_MODE
-	li	t1, ~(CP0_BRCM_MODE_BrPRED_MASK | CP0_BRCM_MODE_BrHIST_MASK )
-	and	t0, t0, t1
-
-	/* enable Branch prediction */
-	li	t1, BRCM_BrPRED_BHT_ENABLE
-	sll	t1, CP0_BRCM_MODE_BrPRED_SHIFT
-	or	t0, t0, t1
-
-	/* set history count to 8 */
-	li	t1, 8
-	sll	t1, CP0_BRCM_MODE_BrHIST_SHIFT
-	or	t0, t0, t1
-
-	mtc0	t0, CP0_BRCM_MODE
-	jr	ra
-	nop
-	.set	reorder
-END(set_branch_pred)
-
-
-/*
- * Function: 	set_luc
- * Arguments:	set link uncached.
- * Returns:	None
- * Description:
- * Trashes:	t0, t1
- *
- */
-LEAF(set_luc)
-	.set noreorder
-	mfc0	t0, CP0_BRCM_MODE
-	li	t1, ~(CP0_BRCM_MODE_Luc_MASK)
-	and	t0, t0, t1
-
-	/* set Luc */
-	ori	t0, t0, CP0_BRCM_MODE_Luc_MASK
-
-	mtc0	t0, CP0_BRCM_MODE
-	jr	ra
-	nop
-	.set	reorder
-END(set_luc)
-
-/*
- * Function: 	set_cwf_tse
- * Arguments:	set CWF and TSE bits
- * Returns:	None
- * Description:
- * Trashes:	t0, t1
- *
- */
-LEAF(set_cwf_tse)
-	.set noreorder
-	mfc0	t0, CP0_BRCM_CONFIG0
-	li	t1, (CP0_BRCM_CONFIG0_CWF_MASK | CP0_BRCM_CONFIG0_TSE_MASK)
-	or	t0, t0, t1
-
-	mtc0	t0, CP0_BRCM_CONFIG0
-	jr	ra
-	nop
-	.set	reorder
-END(set_cwf_tse)
-
-/*
- * Function: 	set_clock_ratio
- * Arguments:	set clock ratio specified by a0
- * Returns:	None
- * Description:
- * Trashes:	v0, v1, a0, a1
- *
- *	pseudo code:
- *
- */
-LEAF(set_clock_ratio)
-	.set noreorder
-
-	mfc0	t0, CP0_BRCM_MODE
-	li	t1, ~(CP0_BRCM_MODE_SET_MASK | CP0_BRCM_MODE_ClkRATIO_MASK)
-	and	t0, t0, t1
-	li	t1, CP0_BRCM_MODE_SET_MASK
-	or	t0, t0, t1
-	or	t0, t0, a0
-	mtc0	t0, CP0_BRCM_MODE
-	jr	ra
-	nop
-	.set	reorder
-END(set_clock_ratio)
-/*
- * Function: set_zephyr
- * Arguments:	None
- * Returns:	None
- * Description: Set any zephyr bits
- * Trashes:	t0 & t1
- *
- */
-LEAF(set_zephyr)
-	.set	noreorder
-
-	/* enable read/write of CP0 #22 sel. 8 */
-	li	t0, 0x5a455048
-	.word	0x4088b00f	/* mtc0    t0, $22, 15 */
-
-	.word	0x4008b008	/* mfc0    t0, $22, 8 */
-	li	t1, 0x09008000	/* turn off pref, jtb */
-	or	t0, t0, t1
-	.word	0x4088b008	/* mtc0    t0, $22, 8 */
-	sync
-
-	/* disable read/write of CP0 #22 sel 8 */
-	li	t0, 0x0
-	.word	0x4088b00f	/* mtc0    t0, $22, 15 */
-
-
-	jr	ra
-	nop
-	.set reorder
-
-END(set_zephyr)
-
-
-/*
- * Function:	set_llmb
- * Arguments:	a0=0 disable llmb, a0=1 enables llmb
- * Returns:	None
- * Description:
- * Trashes:	t0, t1, t2
- *
- *      pseudo code:
- *
- */
-LEAF(set_llmb)
-	.set noreorder
-
-	li	t2, 0x90000000 | BRCM_ZSC_ALL_REGS_SELECT | BRCM_ZSC_CONFIG_REG
-	sync
-	cache	0x7, 0x0(t2)
-	sync
-	mfc0	t0, CP0_D_SEC_CACHE_DATA_LO
-	li	t1, ~(BRCM_ZSC_CONFIG_LMB1En | BRCM_ZSC_CONFIG_LMB0En)
-	and	t0, t0, t1
-
-	beqz	a0, svlmb
-	nop
-
-enable_lmb:
-	li	t1, (BRCM_ZSC_CONFIG_LMB1En | BRCM_ZSC_CONFIG_LMB0En)
-	or	t0, t0, t1
-
-svlmb:
-	mtc0	t0, CP0_D_SEC_CACHE_DATA_LO
-	sync
-	cache	0xb, 0x0(t2)
-	sync
-
-	jr	ra
-	nop
-	.set reorder
-
-END(set_llmb)
-/*
- * Function: 	core_init
- * Arguments:	none
- * Returns:	None
- * Description: initialize core related configuration
- * Trashes:	v0,v1,a0,a1,t8
- *
- *	pseudo code:
- *
- */
-	.globl	core_init
-	.ent	core_init
-	.set	noreorder
-core_init:
-	move	t8, ra
-
-	/* set Zephyr bits. */
-	bal	set_zephyr
-	nop
-
-	/* set low latency memory bus */
-	li	a0, 1
-	bal	set_llmb
-	nop
-
-	/* set branch prediction (TP0 only) */
-	bal	set_branch_pred
-	nop
-
-	/* set link uncached */
-	bal	set_luc
-	nop
-
-	/* set CWF and TSE */
-	bal	set_cwf_tse
-	nop
-
-	/*
-	 *set clock ratio by setting 1 to 'set'
-	 * and 0 to ClkRatio, (TP0 only)
-	 */
-	li	a0, 0
-	bal	set_clock_ratio
-	nop
-
-	/* set other configuration to defaults */
-	bal	set_other_config
-	nop
-
-	move	ra, t8
-	jr	ra
-	nop
-
-	.set reorder
-	.end	core_init
-
-/*
- * Function: 	clear_jump_target_buffer
- * Arguments:	None
- * Returns:	None
- * Description:
- * Trashes:	t0, t1, t2
- *
- */
-#define RESET_CALL_RETURN_STACK_THIS_THREAD		(0x06<<16)
-#define RESET_JUMP_TARGET_BUFFER_THIS_THREAD		(0x04<<16)
-#define JTB_CS_CNTL_MASK				(0xFF<<16)
-
-	.globl	clear_jump_target_buffer
-	.ent	clear_jump_target_buffer
-	.set	noreorder
-clear_jump_target_buffer:
-
-	mfc0	t0, $22, 2
-	nop
-	nop
-
-	li	t1, ~JTB_CS_CNTL_MASK
-	and	t0, t0, t1
-	li	t2, RESET_CALL_RETURN_STACK_THIS_THREAD
-	or	t0, t0, t2
-	mtc0	t0, $22, 2
-	nop
-	nop
-
-	and	t0, t0, t1
-	li	t2, RESET_JUMP_TARGET_BUFFER_THIS_THREAD
-	or	t0, t0, t2
-	mtc0	t0, $22, 2
-	nop
-	nop
-	jr	ra
-	nop
-
-	.end	clear_jump_target_buffer
-	.set	reorder
-/*
- * Function: 	bmips_cache_init
- * Arguments: 	None
- * Returns:	None
- * Description: Enable I and D caches, and initialize I and D-caches
- * Trashes:	v0, v1, t0, t1, t2, t5, t7, t8
- *
- */
-	.globl	bmips_5xxx_init
-	.ent	bmips_5xxx_init
-	.set	noreorder
-bmips_5xxx_init:
-
-	/* save return address and A0 */
-	move	t7, ra
-	move	t5, a0
-
-	jal	l1_init
-	nop
-
-	jal	core_init
-	nop
-
-	jal	clear_jump_target_buffer
-	nop
-
-	mtc0	zero, CP0_CAUSE
-
-	move 	a0, t5
-	jr	t7
-	nop
-
-	.end 	bmips_5xxx_init
-	.set	reorder
-
-
-#endif
diff --git a/arch/mips/kernel/bmips_vec.S b/arch/mips/kernel/bmips_vec.S
deleted file mode 100644
index 921a5fa55da6eae3ea704d5e0eeb6101da0217ef..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/bmips_vec.S
+++ /dev/null
@@ -1,322 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2011 by Kevin Cernekee (cernekee@gmail.com)
- *
- * Reset/NMI/re-entry vectors for BMIPS processors
- */
-
-
-#include <asm/asm.h>
-#include <asm/asmmacro.h>
-#include <asm/cacheops.h>
-#include <asm/cpu.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-#include <asm/stackframe.h>
-#include <asm/addrspace.h>
-#include <asm/hazards.h>
-#include <asm/bmips.h>
-
-	.macro	BARRIER
-	.set	mips32
-	_ssnop
-	_ssnop
-	_ssnop
-	.set	mips0
-	.endm
-
-/***********************************************************************
- * Alternate CPU1 startup vector for BMIPS4350
- *
- * On some systems the bootloader has already started CPU1 and configured
- * it to resume execution at 0x8000_0200 (!BEV IV vector) when it is
- * triggered by the SW1 interrupt.  If that is the case we try to move
- * it to a more convenient place: BMIPS_WARM_RESTART_VEC @ 0x8000_0380.
- ***********************************************************************/
-
-LEAF(bmips_smp_movevec)
-	la	k0, 1f
-	li	k1, CKSEG1
-	or	k0, k1
-	jr	k0
-
-1:
-	/* clear IV, pending IPIs */
-	mtc0	zero, CP0_CAUSE
-
-	/* re-enable IRQs to wait for SW1 */
-	li	k0, ST0_IE | ST0_BEV | STATUSF_IP1
-	mtc0	k0, CP0_STATUS
-
-	/* set up CPU1 CBR; move BASE to 0xa000_0000 */
-	li	k0, 0xff400000
-	mtc0	k0, $22, 6
-	/* set up relocation vector address based on thread ID */
-	mfc0	k1, $22, 3
-	srl	k1, 16
-	andi	k1, 0x8000
-	or	k1, CKSEG1 | BMIPS_RELO_VECTOR_CONTROL_0
-	or	k0, k1
-	li	k1, 0xa0080000
-	sw	k1, 0(k0)
-
-	/* wait here for SW1 interrupt from bmips_boot_secondary() */
-	wait
-
-	la	k0, bmips_reset_nmi_vec
-	li	k1, CKSEG1
-	or	k0, k1
-	jr	k0
-END(bmips_smp_movevec)
-
-/***********************************************************************
- * Reset/NMI vector
- * For BMIPS processors that can relocate their exception vectors, this
- * entire function gets copied to 0x8000_0000.
- ***********************************************************************/
-
-NESTED(bmips_reset_nmi_vec, PT_SIZE, sp)
-	.set	push
-	.set	noat
-	.align	4
-
-#ifdef CONFIG_SMP
-	/* if the NMI bit is clear, assume this is a CPU1 reset instead */
-	li	k1, (1 << 19)
-	mfc0	k0, CP0_STATUS
-	and	k0, k1
-	beqz	k0, soft_reset
-
-#if defined(CONFIG_CPU_BMIPS5000)
-	mfc0	k0, CP0_PRID
-	li	k1, PRID_IMP_BMIPS5000
-	/* mask with PRID_IMP_BMIPS5000 to cover both variants */
-	andi	k0, PRID_IMP_BMIPS5000
-	bne	k0, k1, 1f
-
-	/* if we're not on core 0, this must be the SMP boot signal */
-	li	k1, (3 << 25)
-	mfc0	k0, $22
-	and	k0, k1
-	bnez	k0, bmips_smp_entry
-1:
-#endif /* CONFIG_CPU_BMIPS5000 */
-#endif /* CONFIG_SMP */
-
-	/* nope, it's just a regular NMI */
-	SAVE_ALL
-	move	a0, sp
-
-	/* clear EXL, ERL, BEV so that TLB refills still work */
-	mfc0	k0, CP0_STATUS
-	li	k1, ST0_ERL | ST0_EXL | ST0_BEV | ST0_IE
-	or	k0, k1
-	xor	k0, k1
-	mtc0	k0, CP0_STATUS
-	BARRIER
-
-	/* jump to the NMI handler function */
-	la	k0, nmi_handler
-	jr	k0
-
-	RESTORE_ALL
-	.set	arch=r4000
-	eret
-
-#ifdef CONFIG_SMP
-soft_reset:
-
-#if defined(CONFIG_CPU_BMIPS5000)
-	mfc0	k0, CP0_PRID
-	andi	k0, 0xff00
-	li	k1, PRID_IMP_BMIPS5200
-	bne	k0, k1, bmips_smp_entry
-
-        /* if running on TP 1, jump  to  bmips_smp_entry */
-        mfc0    k0, $22
-        li      k1, (1 << 24)
-        and     k1, k0
-        bnez    k1, bmips_smp_entry
-        nop
-
-        /*
-         * running on TP0, can not be core 0 (the boot core).
-         * Check for soft reset.  Indicates a warm boot
-         */
-        mfc0    k0, $12
-        li      k1, (1 << 20)
-        and     k0, k1
-        beqz    k0, bmips_smp_entry
-
-        /*
-         * Warm boot.
-         * Cache init is only done on TP0
-         */
-        la      k0, bmips_5xxx_init
-        jalr    k0
-        nop
-
-        b       bmips_smp_entry
-        nop
-#endif
-
-/***********************************************************************
- * CPU1 reset vector (used for the initial boot only)
- * This is still part of bmips_reset_nmi_vec().
- ***********************************************************************/
-
-bmips_smp_entry:
-
-	/* set up CP0 STATUS; enable FPU */
-	li	k0, 0x30000000
-	mtc0	k0, CP0_STATUS
-	BARRIER
-
-	/* set local CP0 CONFIG to make kseg0 cacheable, write-back */
-	mfc0	k0, CP0_CONFIG
-	ori	k0, 0x07
-	xori	k0, 0x04
-	mtc0	k0, CP0_CONFIG
-
-	mfc0	k0, CP0_PRID
-	andi	k0, 0xff00
-#if defined(CONFIG_CPU_BMIPS4350) || defined(CONFIG_CPU_BMIPS4380)
-	li	k1, PRID_IMP_BMIPS43XX
-	bne	k0, k1, 2f
-
-	/* initialize CPU1's local I-cache */
-	li	k0, 0x80000000
-	li	k1, 0x80010000
-	mtc0	zero, $28
-	mtc0	zero, $28, 1
-	BARRIER
-
-1:	cache	Index_Store_Tag_I, 0(k0)
-	addiu	k0, 16
-	bne	k0, k1, 1b
-
-	b	3f
-2:
-#endif /* CONFIG_CPU_BMIPS4350 || CONFIG_CPU_BMIPS4380 */
-#if defined(CONFIG_CPU_BMIPS5000)
-	/* mask with PRID_IMP_BMIPS5000 to cover both variants */
-	li	k1, PRID_IMP_BMIPS5000
-	andi	k0, PRID_IMP_BMIPS5000
-	bne	k0, k1, 3f
-
-	/* set exception vector base */
-	la	k0, ebase
-	lw	k0, 0(k0)
-	mtc0	k0, $15, 1
-	BARRIER
-#endif /* CONFIG_CPU_BMIPS5000 */
-3:
-	/* jump back to kseg0 in case we need to remap the kseg1 area */
-	la	k0, 1f
-	jr	k0
-1:
-	la	k0, bmips_enable_xks01
-	jalr	k0
-
-	/* use temporary stack to set up upper memory TLB */
-	li	sp, BMIPS_WARM_RESTART_VEC
-	la	k0, plat_wired_tlb_setup
-	jalr	k0
-
-	/* switch to permanent stack and continue booting */
-
-	.global bmips_secondary_reentry
-bmips_secondary_reentry:
-	la	k0, bmips_smp_boot_sp
-	lw	sp, 0(k0)
-	la	k0, bmips_smp_boot_gp
-	lw	gp, 0(k0)
-	la	k0, start_secondary
-	jr	k0
-
-#endif /* CONFIG_SMP */
-
-	.align	4
-	.global bmips_reset_nmi_vec_end
-bmips_reset_nmi_vec_end:
-
-END(bmips_reset_nmi_vec)
-
-	.set	pop
-
-/***********************************************************************
- * CPU1 warm restart vector (used for second and subsequent boots).
- * Also used for S2 standby recovery (PM).
- * This entire function gets copied to (BMIPS_WARM_RESTART_VEC)
- ***********************************************************************/
-
-LEAF(bmips_smp_int_vec)
-
-	.align	4
-	mfc0	k0, CP0_STATUS
-	ori	k0, 0x01
-	xori	k0, 0x01
-	mtc0	k0, CP0_STATUS
-	eret
-
-	.align	4
-	.global bmips_smp_int_vec_end
-bmips_smp_int_vec_end:
-
-END(bmips_smp_int_vec)
-
-/***********************************************************************
- * XKS01 support
- * Certain CPUs support extending kseg0 to 1024MB.
- ***********************************************************************/
-
-LEAF(bmips_enable_xks01)
-
-#if defined(CONFIG_XKS01)
-	mfc0	t0, CP0_PRID
-	andi	t2, t0, 0xff00
-#if defined(CONFIG_CPU_BMIPS4380)
-	li	t1, PRID_IMP_BMIPS43XX
-	bne	t2, t1, 1f
-
-	andi	t0, 0xff
-	addiu	t1, t0, -PRID_REV_BMIPS4380_HI
-	bgtz	t1, 2f
-	addiu	t0, -PRID_REV_BMIPS4380_LO
-	bltz	t0, 2f
-
-	mfc0	t0, $22, 3
-	li	t1, 0x1ff0
-	li	t2, (1 << 12) | (1 << 9)
-	or	t0, t1
-	xor	t0, t1
-	or	t0, t2
-	mtc0	t0, $22, 3
-	BARRIER
-	b	2f
-1:
-#endif /* CONFIG_CPU_BMIPS4380 */
-#if defined(CONFIG_CPU_BMIPS5000)
-	li	t1, PRID_IMP_BMIPS5000
-	/* mask with PRID_IMP_BMIPS5000 to cover both variants */
-	andi	t2, PRID_IMP_BMIPS5000
-	bne	t2, t1, 2f
-
-	mfc0	t0, $22, 5
-	li	t1, 0x01ff
-	li	t2, (1 << 8) | (1 << 5)
-	or	t0, t1
-	xor	t0, t1
-	or	t0, t2
-	mtc0	t0, $22, 5
-	BARRIER
-#endif /* CONFIG_CPU_BMIPS5000 */
-2:
-#endif /* defined(CONFIG_XKS01) */
-
-	jr	ra
-
-END(bmips_enable_xks01)
diff --git a/arch/mips/kernel/cps-vec-ns16550.S b/arch/mips/kernel/cps-vec-ns16550.S
deleted file mode 100644
index d5a67b4ce9f60bf79aacecb5e80c22a9c130b08c..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/cps-vec-ns16550.S
+++ /dev/null
@@ -1,198 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2015 Imagination Technologies
- * Author: Paul Burton <paul.burton@mips.com>
- */
-
-#include <asm/addrspace.h>
-#include <asm/asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/mipsregs.h>
-#include <asm/regdef.h>
-#include <linux/serial_reg.h>
-
-#define UART_TX_OFS	(UART_TX << CONFIG_MIPS_CPS_NS16550_SHIFT)
-#define UART_LSR_OFS	(UART_LSR << CONFIG_MIPS_CPS_NS16550_SHIFT)
-
-/**
- * _mips_cps_putc() - write a character to the UART
- * @a0: ASCII character to write
- * @t9: UART base address
- */
-LEAF(_mips_cps_putc)
-1:	lw		t0, UART_LSR_OFS(t9)
-	andi		t0, t0, UART_LSR_TEMT
-	beqz		t0, 1b
-	sb		a0, UART_TX_OFS(t9)
-	jr		ra
-	END(_mips_cps_putc)
-
-/**
- * _mips_cps_puts() - write a string to the UART
- * @a0: pointer to NULL-terminated ASCII string
- * @t9: UART base address
- *
- * Write a null-terminated ASCII string to the UART.
- */
-NESTED(_mips_cps_puts, 0, ra)
-	move		s7, ra
-	move		s6, a0
-
-1:	lb		a0, 0(s6)
-	beqz		a0, 2f
-	jal		_mips_cps_putc
-	PTR_ADDIU	s6, s6, 1
-	b		1b
-
-2:	jr		s7
-	END(_mips_cps_puts)
-
-/**
- * _mips_cps_putx4 - write a 4b hex value to the UART
- * @a0: the 4b value to write to the UART
- * @t9: UART base address
- *
- * Write a single hexadecimal character to the UART.
- */
-NESTED(_mips_cps_putx4, 0, ra)
-	andi		a0, a0, 0xf
-	li		t0, '0'
-	blt		a0, 10, 1f
-	li		t0, 'a'
-	addiu		a0, a0, -10
-1:	addu		a0, a0, t0
-	b		_mips_cps_putc
-	END(_mips_cps_putx4)
-
-/**
- * _mips_cps_putx8 - write an 8b hex value to the UART
- * @a0: the 8b value to write to the UART
- * @t9: UART base address
- *
- * Write an 8 bit value (ie. 2 hexadecimal characters) to the UART.
- */
-NESTED(_mips_cps_putx8, 0, ra)
-	move		s3, ra
-	move		s2, a0
-	srl		a0, a0, 4
-	jal		_mips_cps_putx4
-	move		a0, s2
-	move		ra, s3
-	b		_mips_cps_putx4
-	END(_mips_cps_putx8)
-
-/**
- * _mips_cps_putx16 - write a 16b hex value to the UART
- * @a0: the 16b value to write to the UART
- * @t9: UART base address
- *
- * Write a 16 bit value (ie. 4 hexadecimal characters) to the UART.
- */
-NESTED(_mips_cps_putx16, 0, ra)
-	move		s5, ra
-	move		s4, a0
-	srl		a0, a0, 8
-	jal		_mips_cps_putx8
-	move		a0, s4
-	move		ra, s5
-	b		_mips_cps_putx8
-	END(_mips_cps_putx16)
-
-/**
- * _mips_cps_putx32 - write a 32b hex value to the UART
- * @a0: the 32b value to write to the UART
- * @t9: UART base address
- *
- * Write a 32 bit value (ie. 8 hexadecimal characters) to the UART.
- */
-NESTED(_mips_cps_putx32, 0, ra)
-	move		s7, ra
-	move		s6, a0
-	srl		a0, a0, 16
-	jal		_mips_cps_putx16
-	move		a0, s6
-	move		ra, s7
-	b		_mips_cps_putx16
-	END(_mips_cps_putx32)
-
-#ifdef CONFIG_64BIT
-
-/**
- * _mips_cps_putx64 - write a 64b hex value to the UART
- * @a0: the 64b value to write to the UART
- * @t9: UART base address
- *
- * Write a 64 bit value (ie. 16 hexadecimal characters) to the UART.
- */
-NESTED(_mips_cps_putx64, 0, ra)
-	move		sp, ra
-	move		s8, a0
-	dsrl32		a0, a0, 0
-	jal		_mips_cps_putx32
-	move		a0, s8
-	move		ra, sp
-	b		_mips_cps_putx32
-	END(_mips_cps_putx64)
-
-#define _mips_cps_putxlong _mips_cps_putx64
-
-#else /* !CONFIG_64BIT */
-
-#define _mips_cps_putxlong _mips_cps_putx32
-
-#endif /* !CONFIG_64BIT */
-
-/**
- * mips_cps_bev_dump() - dump relevant exception state to UART
- * @a0: pointer to NULL-terminated ASCII string naming the exception
- *
- * Write information that may be useful in debugging an exception to the
- * UART configured by CONFIG_MIPS_CPS_NS16550_*. As this BEV exception
- * will only be run if something goes horribly wrong very early during
- * the bringup of a core and it is very likely to be unsafe to perform
- * memory accesses at that point (cache state indeterminate, EVA may not
- * be configured, coherence may be disabled) let alone have a stack,
- * this is all written in assembly using only registers & unmapped
- * uncached access to the UART registers.
- */
-LEAF(mips_cps_bev_dump)
-	move		s0, ra
-	move		s1, a0
-
-	li		t9, CKSEG1ADDR(CONFIG_MIPS_CPS_NS16550_BASE)
-
-	PTR_LA		a0, str_newline
-	jal		_mips_cps_puts
-	PTR_LA		a0, str_bev
-	jal		_mips_cps_puts
-	move		a0, s1
-	jal		_mips_cps_puts
-	PTR_LA		a0, str_newline
-	jal		_mips_cps_puts
-	PTR_LA		a0, str_newline
-	jal		_mips_cps_puts
-
-#define DUMP_COP0_REG(reg, name, sz, _mfc0)		\
-	PTR_LA		a0, 8f;				\
-	jal		_mips_cps_puts;			\
-	_mfc0		a0, reg;			\
-	jal		_mips_cps_putx##sz;		\
-	PTR_LA		a0, str_newline;		\
-	jal		_mips_cps_puts;			\
-	TEXT(name)
-
-	DUMP_COP0_REG(CP0_CAUSE,    "Cause:    0x", 32, mfc0)
-	DUMP_COP0_REG(CP0_STATUS,   "Status:   0x", 32, mfc0)
-	DUMP_COP0_REG(CP0_EBASE,    "EBase:    0x", long, MFC0)
-	DUMP_COP0_REG(CP0_BADVADDR, "BadVAddr: 0x", long, MFC0)
-	DUMP_COP0_REG(CP0_BADINSTR, "BadInstr: 0x", 32, mfc0)
-
-	PTR_LA		a0, str_newline
-	jal		_mips_cps_puts
-	jr		s0
-	END(mips_cps_bev_dump)
-
-.pushsection	.data
-str_bev: .asciiz "BEV Exception: "
-str_newline: .asciiz "\r\n"
-.popsection
diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
deleted file mode 100644
index 4db7ff055c9f79b0e62454ded35b3d8a9bce880c..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/cps-vec.S
+++ /dev/null
@@ -1,630 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2013 Imagination Technologies
- * Author: Paul Burton <paul.burton@mips.com>
- */
-
-#include <asm/addrspace.h>
-#include <asm/asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/asmmacro.h>
-#include <asm/cacheops.h>
-#include <asm/eva.h>
-#include <asm/mipsregs.h>
-#include <asm/mipsmtregs.h>
-#include <asm/pm.h>
-
-#define GCR_CPC_BASE_OFS	0x0088
-#define GCR_CL_COHERENCE_OFS	0x2008
-#define GCR_CL_ID_OFS		0x2028
-
-#define CPC_CL_VC_STOP_OFS	0x2020
-#define CPC_CL_VC_RUN_OFS	0x2028
-
-.extern mips_cm_base
-
-.set noreorder
-
-#ifdef CONFIG_64BIT
-# define STATUS_BITDEPS		ST0_KX
-#else
-# define STATUS_BITDEPS		0
-#endif
-
-#ifdef CONFIG_MIPS_CPS_NS16550
-
-#define DUMP_EXCEP(name)		\
-	PTR_LA	a0, 8f;			\
-	jal	mips_cps_bev_dump;	\
-	 nop;				\
-	TEXT(name)
-
-#else /* !CONFIG_MIPS_CPS_NS16550 */
-
-#define DUMP_EXCEP(name)
-
-#endif /* !CONFIG_MIPS_CPS_NS16550 */
-
-	/*
-	 * Set dest to non-zero if the core supports the MT ASE, else zero. If
-	 * MT is not supported then branch to nomt.
-	 */
-	.macro	has_mt	dest, nomt
-	mfc0	\dest, CP0_CONFIG, 1
-	bgez	\dest, \nomt
-	 mfc0	\dest, CP0_CONFIG, 2
-	bgez	\dest, \nomt
-	 mfc0	\dest, CP0_CONFIG, 3
-	andi	\dest, \dest, MIPS_CONF3_MT
-	beqz	\dest, \nomt
-	 nop
-	.endm
-
-	/*
-	 * Set dest to non-zero if the core supports MIPSr6 multithreading
-	 * (ie. VPs), else zero. If MIPSr6 multithreading is not supported then
-	 * branch to nomt.
-	 */
-	.macro	has_vp	dest, nomt
-	mfc0	\dest, CP0_CONFIG, 1
-	bgez	\dest, \nomt
-	 mfc0	\dest, CP0_CONFIG, 2
-	bgez	\dest, \nomt
-	 mfc0	\dest, CP0_CONFIG, 3
-	bgez	\dest, \nomt
-	 mfc0	\dest, CP0_CONFIG, 4
-	bgez	\dest, \nomt
-	 mfc0	\dest, CP0_CONFIG, 5
-	andi	\dest, \dest, MIPS_CONF5_VP
-	beqz	\dest, \nomt
-	 nop
-	.endm
-
-	/* Calculate an uncached address for the CM GCRs */
-	.macro	cmgcrb	dest
-	.set	push
-	.set	noat
-	MFC0	$1, CP0_CMGCRBASE
-	PTR_SLL	$1, $1, 4
-	PTR_LI	\dest, UNCAC_BASE
-	PTR_ADDU \dest, \dest, $1
-	.set	pop
-	.endm
-
-.section .text.cps-vec
-.balign 0x1000
-
-LEAF(mips_cps_core_entry)
-	/*
-	 * These first 4 bytes will be patched by cps_smp_setup to load the
-	 * CCA to use into register s0.
-	 */
-	.word	0
-
-	/* Check whether we're here due to an NMI */
-	mfc0	k0, CP0_STATUS
-	and	k0, k0, ST0_NMI
-	beqz	k0, not_nmi
-	 nop
-
-	/* This is an NMI */
-	PTR_LA	k0, nmi_handler
-	jr	k0
-	 nop
-
-not_nmi:
-	/* Setup Cause */
-	li	t0, CAUSEF_IV
-	mtc0	t0, CP0_CAUSE
-
-	/* Setup Status */
-	li	t0, ST0_CU1 | ST0_CU0 | ST0_BEV | STATUS_BITDEPS
-	mtc0	t0, CP0_STATUS
-
-	/* Skip cache & coherence setup if we're already coherent */
-	cmgcrb	v1
-	lw	s7, GCR_CL_COHERENCE_OFS(v1)
-	bnez	s7, 1f
-	 nop
-
-	/* Initialize the L1 caches */
-	jal	mips_cps_cache_init
-	 nop
-
-	/* Enter the coherent domain */
-	li	t0, 0xff
-	sw	t0, GCR_CL_COHERENCE_OFS(v1)
-	ehb
-
-	/* Set Kseg0 CCA to that in s0 */
-1:	mfc0	t0, CP0_CONFIG
-	ori	t0, 0x7
-	xori	t0, 0x7
-	or	t0, t0, s0
-	mtc0	t0, CP0_CONFIG
-	ehb
-
-	/* Jump to kseg0 */
-	PTR_LA	t0, 1f
-	jr	t0
-	 nop
-
-	/*
-	 * We're up, cached & coherent. Perform any EVA initialization necessary
-	 * before we access memory.
-	 */
-1:	eva_init
-
-	/* Retrieve boot configuration pointers */
-	jal	mips_cps_get_bootcfg
-	 nop
-
-	/* Skip core-level init if we started up coherent */
-	bnez	s7, 1f
-	 nop
-
-	/* Perform any further required core-level initialisation */
-	jal	mips_cps_core_init
-	 nop
-
-	/*
-	 * Boot any other VPEs within this core that should be online, and
-	 * deactivate this VPE if it should be offline.
-	 */
-	move	a1, t9
-	jal	mips_cps_boot_vpes
-	 move	a0, v0
-
-	/* Off we go! */
-1:	PTR_L	t1, VPEBOOTCFG_PC(v1)
-	PTR_L	gp, VPEBOOTCFG_GP(v1)
-	PTR_L	sp, VPEBOOTCFG_SP(v1)
-	jr	t1
-	 nop
-	END(mips_cps_core_entry)
-
-.org 0x200
-LEAF(excep_tlbfill)
-	DUMP_EXCEP("TLB Fill")
-	b	.
-	 nop
-	END(excep_tlbfill)
-
-.org 0x280
-LEAF(excep_xtlbfill)
-	DUMP_EXCEP("XTLB Fill")
-	b	.
-	 nop
-	END(excep_xtlbfill)
-
-.org 0x300
-LEAF(excep_cache)
-	DUMP_EXCEP("Cache")
-	b	.
-	 nop
-	END(excep_cache)
-
-.org 0x380
-LEAF(excep_genex)
-	DUMP_EXCEP("General")
-	b	.
-	 nop
-	END(excep_genex)
-
-.org 0x400
-LEAF(excep_intex)
-	DUMP_EXCEP("Interrupt")
-	b	.
-	 nop
-	END(excep_intex)
-
-.org 0x480
-LEAF(excep_ejtag)
-	PTR_LA	k0, ejtag_debug_handler
-	jr	k0
-	 nop
-	END(excep_ejtag)
-
-LEAF(mips_cps_core_init)
-#ifdef CONFIG_MIPS_MT_SMP
-	/* Check that the core implements the MT ASE */
-	has_mt	t0, 3f
-
-	.set	push
-	.set	MIPS_ISA_LEVEL_RAW
-	.set	mt
-
-	/* Only allow 1 TC per VPE to execute... */
-	dmt
-
-	/* ...and for the moment only 1 VPE */
-	dvpe
-	PTR_LA	t1, 1f
-	jr.hb	t1
-	 nop
-
-	/* Enter VPE configuration state */
-1:	mfc0	t0, CP0_MVPCONTROL
-	ori	t0, t0, MVPCONTROL_VPC
-	mtc0	t0, CP0_MVPCONTROL
-
-	/* Retrieve the number of VPEs within the core */
-	mfc0	t0, CP0_MVPCONF0
-	srl	t0, t0, MVPCONF0_PVPE_SHIFT
-	andi	t0, t0, (MVPCONF0_PVPE >> MVPCONF0_PVPE_SHIFT)
-	addiu	ta3, t0, 1
-
-	/* If there's only 1, we're done */
-	beqz	t0, 2f
-	 nop
-
-	/* Loop through each VPE within this core */
-	li	ta1, 1
-
-1:	/* Operate on the appropriate TC */
-	mtc0	ta1, CP0_VPECONTROL
-	ehb
-
-	/* Bind TC to VPE (1:1 TC:VPE mapping) */
-	mttc0	ta1, CP0_TCBIND
-
-	/* Set exclusive TC, non-active, master */
-	li	t0, VPECONF0_MVP
-	sll	t1, ta1, VPECONF0_XTC_SHIFT
-	or	t0, t0, t1
-	mttc0	t0, CP0_VPECONF0
-
-	/* Set TC non-active, non-allocatable */
-	mttc0	zero, CP0_TCSTATUS
-
-	/* Set TC halted */
-	li	t0, TCHALT_H
-	mttc0	t0, CP0_TCHALT
-
-	/* Next VPE */
-	addiu	ta1, ta1, 1
-	slt	t0, ta1, ta3
-	bnez	t0, 1b
-	 nop
-
-	/* Leave VPE configuration state */
-2:	mfc0	t0, CP0_MVPCONTROL
-	xori	t0, t0, MVPCONTROL_VPC
-	mtc0	t0, CP0_MVPCONTROL
-
-3:	.set	pop
-#endif
-	jr	ra
-	 nop
-	END(mips_cps_core_init)
-
-/**
- * mips_cps_get_bootcfg() - retrieve boot configuration pointers
- *
- * Returns: pointer to struct core_boot_config in v0, pointer to
- *          struct vpe_boot_config in v1, VPE ID in t9
- */
-LEAF(mips_cps_get_bootcfg)
-	/* Calculate a pointer to this cores struct core_boot_config */
-	cmgcrb	t0
-	lw	t0, GCR_CL_ID_OFS(t0)
-	li	t1, COREBOOTCFG_SIZE
-	mul	t0, t0, t1
-	PTR_LA	t1, mips_cps_core_bootcfg
-	PTR_L	t1, 0(t1)
-	PTR_ADDU v0, t0, t1
-
-	/* Calculate this VPEs ID. If the core doesn't support MT use 0 */
-	li	t9, 0
-#if defined(CONFIG_CPU_MIPSR6)
-	has_vp	ta2, 1f
-
-	/*
-	 * Assume non-contiguous numbering. Perhaps some day we'll need
-	 * to handle contiguous VP numbering, but no such systems yet
-	 * exist.
-	 */
-	mfc0	t9, CP0_GLOBALNUMBER
-	andi	t9, t9, MIPS_GLOBALNUMBER_VP
-#elif defined(CONFIG_MIPS_MT_SMP)
-	has_mt	ta2, 1f
-
-	/* Find the number of VPEs present in the core */
-	mfc0	t1, CP0_MVPCONF0
-	srl	t1, t1, MVPCONF0_PVPE_SHIFT
-	andi	t1, t1, MVPCONF0_PVPE >> MVPCONF0_PVPE_SHIFT
-	addiu	t1, t1, 1
-
-	/* Calculate a mask for the VPE ID from EBase.CPUNum */
-	clz	t1, t1
-	li	t2, 31
-	subu	t1, t2, t1
-	li	t2, 1
-	sll	t1, t2, t1
-	addiu	t1, t1, -1
-
-	/* Retrieve the VPE ID from EBase.CPUNum */
-	mfc0	t9, $15, 1
-	and	t9, t9, t1
-#endif
-
-1:	/* Calculate a pointer to this VPEs struct vpe_boot_config */
-	li	t1, VPEBOOTCFG_SIZE
-	mul	v1, t9, t1
-	PTR_L	ta3, COREBOOTCFG_VPECONFIG(v0)
-	PTR_ADDU v1, v1, ta3
-
-	jr	ra
-	 nop
-	END(mips_cps_get_bootcfg)
-
-LEAF(mips_cps_boot_vpes)
-	lw	ta2, COREBOOTCFG_VPEMASK(a0)
-	PTR_L	ta3, COREBOOTCFG_VPECONFIG(a0)
-
-#if defined(CONFIG_CPU_MIPSR6)
-
-	has_vp	t0, 5f
-
-	/* Find base address of CPC */
-	cmgcrb	t3
-	PTR_L	t1, GCR_CPC_BASE_OFS(t3)
-	PTR_LI	t2, ~0x7fff
-	and	t1, t1, t2
-	PTR_LI	t2, UNCAC_BASE
-	PTR_ADD	t1, t1, t2
-
-	/* Start any other VPs that ought to be running */
-	PTR_S	ta2, CPC_CL_VC_RUN_OFS(t1)
-
-	/* Ensure this VP stops running if it shouldn't be */
-	not	ta2
-	PTR_S	ta2, CPC_CL_VC_STOP_OFS(t1)
-	ehb
-
-#elif defined(CONFIG_MIPS_MT)
-
-	/* If the core doesn't support MT then return */
-	has_mt	t0, 5f
-
-	/* Enter VPE configuration state */
-	.set	push
-	.set	MIPS_ISA_LEVEL_RAW
-	.set	mt
-	dvpe
-	.set	pop
-
-	PTR_LA	t1, 1f
-	jr.hb	t1
-	 nop
-1:	mfc0	t1, CP0_MVPCONTROL
-	ori	t1, t1, MVPCONTROL_VPC
-	mtc0	t1, CP0_MVPCONTROL
-	ehb
-
-	/* Loop through each VPE */
-	move	t8, ta2
-	li	ta1, 0
-
-	/* Check whether the VPE should be running. If not, skip it */
-1:	andi	t0, ta2, 1
-	beqz	t0, 2f
-	 nop
-
-	/* Operate on the appropriate TC */
-	mfc0	t0, CP0_VPECONTROL
-	ori	t0, t0, VPECONTROL_TARGTC
-	xori	t0, t0, VPECONTROL_TARGTC
-	or	t0, t0, ta1
-	mtc0	t0, CP0_VPECONTROL
-	ehb
-
-	.set	push
-	.set	MIPS_ISA_LEVEL_RAW
-	.set	mt
-
-	/* Skip the VPE if its TC is not halted */
-	mftc0	t0, CP0_TCHALT
-	beqz	t0, 2f
-	 nop
-
-	/* Calculate a pointer to the VPEs struct vpe_boot_config */
-	li	t0, VPEBOOTCFG_SIZE
-	mul	t0, t0, ta1
-	addu	t0, t0, ta3
-
-	/* Set the TC restart PC */
-	lw	t1, VPEBOOTCFG_PC(t0)
-	mttc0	t1, CP0_TCRESTART
-
-	/* Set the TC stack pointer */
-	lw	t1, VPEBOOTCFG_SP(t0)
-	mttgpr	t1, sp
-
-	/* Set the TC global pointer */
-	lw	t1, VPEBOOTCFG_GP(t0)
-	mttgpr	t1, gp
-
-	/* Copy config from this VPE */
-	mfc0	t0, CP0_CONFIG
-	mttc0	t0, CP0_CONFIG
-
-	/*
-	 * Copy the EVA config from this VPE if the CPU supports it.
-	 * CONFIG3 must exist to be running MT startup - just read it.
-	 */
-	mfc0	t0, CP0_CONFIG, 3
-	and	t0, t0, MIPS_CONF3_SC
-	beqz	t0, 3f
-	 nop
-	mfc0    t0, CP0_SEGCTL0
-	mttc0	t0, CP0_SEGCTL0
-	mfc0    t0, CP0_SEGCTL1
-	mttc0	t0, CP0_SEGCTL1
-	mfc0    t0, CP0_SEGCTL2
-	mttc0	t0, CP0_SEGCTL2
-3:
-	/* Ensure no software interrupts are pending */
-	mttc0	zero, CP0_CAUSE
-	mttc0	zero, CP0_STATUS
-
-	/* Set TC active, not interrupt exempt */
-	mftc0	t0, CP0_TCSTATUS
-	li	t1, ~TCSTATUS_IXMT
-	and	t0, t0, t1
-	ori	t0, t0, TCSTATUS_A
-	mttc0	t0, CP0_TCSTATUS
-
-	/* Clear the TC halt bit */
-	mttc0	zero, CP0_TCHALT
-
-	/* Set VPE active */
-	mftc0	t0, CP0_VPECONF0
-	ori	t0, t0, VPECONF0_VPA
-	mttc0	t0, CP0_VPECONF0
-
-	/* Next VPE */
-2:	srl	ta2, ta2, 1
-	addiu	ta1, ta1, 1
-	bnez	ta2, 1b
-	 nop
-
-	/* Leave VPE configuration state */
-	mfc0	t1, CP0_MVPCONTROL
-	xori	t1, t1, MVPCONTROL_VPC
-	mtc0	t1, CP0_MVPCONTROL
-	ehb
-	evpe
-
-	.set	pop
-
-	/* Check whether this VPE is meant to be running */
-	li	t0, 1
-	sll	t0, t0, a1
-	and	t0, t0, t8
-	bnez	t0, 2f
-	 nop
-
-	/* This VPE should be offline, halt the TC */
-	li	t0, TCHALT_H
-	mtc0	t0, CP0_TCHALT
-	PTR_LA	t0, 1f
-1:	jr.hb	t0
-	 nop
-
-2:
-
-#endif /* CONFIG_MIPS_MT_SMP */
-
-	/* Return */
-5:	jr	ra
-	 nop
-	END(mips_cps_boot_vpes)
-
-LEAF(mips_cps_cache_init)
-	/*
-	 * Clear the bits used to index the caches. Note that the architecture
-	 * dictates that writing to any of TagLo or TagHi selects 0 or 2 should
-	 * be valid for all MIPS32 CPUs, even those for which said writes are
-	 * unnecessary.
-	 */
-	mtc0	zero, CP0_TAGLO, 0
-	mtc0	zero, CP0_TAGHI, 0
-	mtc0	zero, CP0_TAGLO, 2
-	mtc0	zero, CP0_TAGHI, 2
-	ehb
-
-	/* Primary cache configuration is indicated by Config1 */
-	mfc0	v0, CP0_CONFIG, 1
-
-	/* Detect I-cache line size */
-	_EXT	t0, v0, MIPS_CONF1_IL_SHF, MIPS_CONF1_IL_SZ
-	beqz	t0, icache_done
-	 li	t1, 2
-	sllv	t0, t1, t0
-
-	/* Detect I-cache size */
-	_EXT	t1, v0, MIPS_CONF1_IS_SHF, MIPS_CONF1_IS_SZ
-	xori	t2, t1, 0x7
-	beqz	t2, 1f
-	 li	t3, 32
-	addiu	t1, t1, 1
-	sllv	t1, t3, t1
-1:	/* At this point t1 == I-cache sets per way */
-	_EXT	t2, v0, MIPS_CONF1_IA_SHF, MIPS_CONF1_IA_SZ
-	addiu	t2, t2, 1
-	mul	t1, t1, t0
-	mul	t1, t1, t2
-
-	li	a0, CKSEG0
-	PTR_ADD	a1, a0, t1
-1:	cache	Index_Store_Tag_I, 0(a0)
-	PTR_ADD	a0, a0, t0
-	bne	a0, a1, 1b
-	 nop
-icache_done:
-
-	/* Detect D-cache line size */
-	_EXT	t0, v0, MIPS_CONF1_DL_SHF, MIPS_CONF1_DL_SZ
-	beqz	t0, dcache_done
-	 li	t1, 2
-	sllv	t0, t1, t0
-
-	/* Detect D-cache size */
-	_EXT	t1, v0, MIPS_CONF1_DS_SHF, MIPS_CONF1_DS_SZ
-	xori	t2, t1, 0x7
-	beqz	t2, 1f
-	 li	t3, 32
-	addiu	t1, t1, 1
-	sllv	t1, t3, t1
-1:	/* At this point t1 == D-cache sets per way */
-	_EXT	t2, v0, MIPS_CONF1_DA_SHF, MIPS_CONF1_DA_SZ
-	addiu	t2, t2, 1
-	mul	t1, t1, t0
-	mul	t1, t1, t2
-
-	li	a0, CKSEG0
-	PTR_ADDU a1, a0, t1
-	PTR_SUBU a1, a1, t0
-1:	cache	Index_Store_Tag_D, 0(a0)
-	bne	a0, a1, 1b
-	 PTR_ADD a0, a0, t0
-dcache_done:
-
-	jr	ra
-	 nop
-	END(mips_cps_cache_init)
-
-#if defined(CONFIG_MIPS_CPS_PM) && defined(CONFIG_CPU_PM)
-
-	/* Calculate a pointer to this CPUs struct mips_static_suspend_state */
-	.macro	psstate	dest
-	.set	push
-	.set	noat
-	lw	$1, TI_CPU(gp)
-	sll	$1, $1, LONGLOG
-	PTR_LA	\dest, __per_cpu_offset
-	addu	$1, $1, \dest
-	lw	$1, 0($1)
-	PTR_LA	\dest, cps_cpu_state
-	addu	\dest, \dest, $1
-	.set	pop
-	.endm
-
-LEAF(mips_cps_pm_save)
-	/* Save CPU state */
-	SUSPEND_SAVE_REGS
-	psstate	t1
-	SUSPEND_SAVE_STATIC
-	jr	v0
-	 nop
-	END(mips_cps_pm_save)
-
-LEAF(mips_cps_pm_restore)
-	/* Restore CPU state */
-	psstate	t1
-	RESUME_RESTORE_STATIC
-	RESUME_RESTORE_REGS_RETURN
-	END(mips_cps_pm_restore)
-
-#endif /* CONFIG_MIPS_CPS_PM && CONFIG_CPU_PM */
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
deleted file mode 100644
index 5469d43b696685d38025367eac7a3ba56bac2d0b..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/entry.S
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1994 - 2000, 2001, 2003 Ralf Baechle
- * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
- * Copyright (C) 2001 MIPS Technologies, Inc.
- */
-
-#include <asm/asm.h>
-#include <asm/asmmacro.h>
-#include <asm/compiler.h>
-#include <asm/irqflags.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-#include <asm/stackframe.h>
-#include <asm/isadep.h>
-#include <asm/thread_info.h>
-#include <asm/war.h>
-
-#ifndef CONFIG_PREEMPT
-#define resume_kernel	restore_all
-#else
-#define __ret_from_irq	ret_from_exception
-#endif
-
-	.text
-	.align	5
-#ifndef CONFIG_PREEMPT
-FEXPORT(ret_from_exception)
-	local_irq_disable			# preempt stop
-	b	__ret_from_irq
-#endif
-FEXPORT(ret_from_irq)
-	LONG_S	s0, TI_REGS($28)
-FEXPORT(__ret_from_irq)
-/*
- * We can be coming here from a syscall done in the kernel space,
- * e.g. a failed kernel_execve().
- */
-resume_userspace_check:
-	LONG_L	t0, PT_STATUS(sp)		# returning to kernel mode?
-	andi	t0, t0, KU_USER
-	beqz	t0, resume_kernel
-
-resume_userspace:
-	local_irq_disable		# make sure we dont miss an
-					# interrupt setting need_resched
-					# between sampling and return
-	LONG_L	a2, TI_FLAGS($28)	# current->work
-	andi	t0, a2, _TIF_WORK_MASK	# (ignoring syscall_trace)
-	bnez	t0, work_pending
-	j	restore_all
-
-#ifdef CONFIG_PREEMPT
-resume_kernel:
-	local_irq_disable
-	lw	t0, TI_PRE_COUNT($28)
-	bnez	t0, restore_all
-	LONG_L	t0, TI_FLAGS($28)
-	andi	t1, t0, _TIF_NEED_RESCHED
-	beqz	t1, restore_all
-	LONG_L	t0, PT_STATUS(sp)		# Interrupts off?
-	andi	t0, 1
-	beqz	t0, restore_all
-	PTR_LA	ra, restore_all
-	j	preempt_schedule_irq
-#endif
-
-FEXPORT(ret_from_kernel_thread)
-	jal	schedule_tail		# a0 = struct task_struct *prev
-	move	a0, s1
-	jal	s0
-	j	syscall_exit
-
-FEXPORT(ret_from_fork)
-	jal	schedule_tail		# a0 = struct task_struct *prev
-
-FEXPORT(syscall_exit)
-#ifdef CONFIG_DEBUG_RSEQ
-	move	a0, sp
-	jal	rseq_syscall
-#endif
-	local_irq_disable		# make sure need_resched and
-					# signals dont change between
-					# sampling and return
-	LONG_L	a2, TI_FLAGS($28)	# current->work
-	li	t0, _TIF_ALLWORK_MASK
-	and	t0, a2, t0
-	bnez	t0, syscall_exit_work
-
-restore_all:				# restore full frame
-	.set	noat
-	RESTORE_TEMP
-	RESTORE_AT
-	RESTORE_STATIC
-restore_partial:		# restore partial frame
-#ifdef CONFIG_TRACE_IRQFLAGS
-	SAVE_STATIC
-	SAVE_AT
-	SAVE_TEMP
-	LONG_L	v0, PT_STATUS(sp)
-#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
-	and	v0, ST0_IEP
-#else
-	and	v0, ST0_IE
-#endif
-	beqz	v0, 1f
-	jal	trace_hardirqs_on
-	b	2f
-1:	jal	trace_hardirqs_off
-2:
-	RESTORE_TEMP
-	RESTORE_AT
-	RESTORE_STATIC
-#endif
-	RESTORE_SOME
-	RESTORE_SP_AND_RET
-	.set	at
-
-work_pending:
-	andi	t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS
-	beqz	t0, work_notifysig
-work_resched:
-	TRACE_IRQS_OFF
-	jal	schedule
-
-	local_irq_disable		# make sure need_resched and
-					# signals dont change between
-					# sampling and return
-	LONG_L	a2, TI_FLAGS($28)
-	andi	t0, a2, _TIF_WORK_MASK	# is there any work to be done
-					# other than syscall tracing?
-	beqz	t0, restore_all
-	andi	t0, a2, _TIF_NEED_RESCHED
-	bnez	t0, work_resched
-
-work_notifysig:				# deal with pending signals and
-					# notify-resume requests
-	move	a0, sp
-	li	a1, 0
-	jal	do_notify_resume	# a2 already loaded
-	j	resume_userspace_check
-
-FEXPORT(syscall_exit_partial)
-#ifdef CONFIG_DEBUG_RSEQ
-	move	a0, sp
-	jal	rseq_syscall
-#endif
-	local_irq_disable		# make sure need_resched doesn't
-					# change between and return
-	LONG_L	a2, TI_FLAGS($28)	# current->work
-	li	t0, _TIF_ALLWORK_MASK
-	and	t0, a2
-	beqz	t0, restore_partial
-	SAVE_STATIC
-syscall_exit_work:
-	LONG_L	t0, PT_STATUS(sp)		# returning to kernel mode?
-	andi	t0, t0, KU_USER
-	beqz	t0, resume_kernel
-	li	t0, _TIF_WORK_SYSCALL_EXIT
-	and	t0, a2			# a2 is preloaded with TI_FLAGS
-	beqz	t0, work_pending	# trace bit set?
-	local_irq_enable		# could let syscall_trace_leave()
-					# call schedule() instead
-	TRACE_IRQS_ON
-	move	a0, sp
-	jal	syscall_trace_leave
-	b	resume_userspace
-
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) || \
-    defined(CONFIG_MIPS_MT)
-
-/*
- * MIPS32R2 Instruction Hazard Barrier - must be called
- *
- * For C code use the inline version named instruction_hazard().
- */
-LEAF(mips_ihb)
-	.set	MIPS_ISA_LEVEL_RAW
-	jr.hb	ra
-	nop
-	END(mips_ihb)
-
-#endif /* CONFIG_CPU_MIPSR2 or CONFIG_CPU_MIPSR6 or CONFIG_MIPS_MT */
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
deleted file mode 100644
index 9c5f8a5d097f2f6c37b1f41f5e922b6be72f08f1..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/genex.S
+++ /dev/null
@@ -1,666 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1994 - 2000, 2001, 2003 Ralf Baechle
- * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
- * Copyright (C) 2002, 2007  Maciej W. Rozycki
- * Copyright (C) 2001, 2012 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/init.h>
-
-#include <asm/asm.h>
-#include <asm/asmmacro.h>
-#include <asm/cacheops.h>
-#include <asm/irqflags.h>
-#include <asm/regdef.h>
-#include <asm/fpregdef.h>
-#include <asm/mipsregs.h>
-#include <asm/stackframe.h>
-#include <asm/war.h>
-#include <asm/thread_info.h>
-
-	__INIT
-
-/*
- * General exception vector for all other CPUs.
- *
- * Be careful when changing this, it has to be at most 128 bytes
- * to fit into space reserved for the exception handler.
- */
-NESTED(except_vec3_generic, 0, sp)
-	.set	push
-	.set	noat
-	mfc0	k1, CP0_CAUSE
-	andi	k1, k1, 0x7c
-#ifdef CONFIG_64BIT
-	dsll	k1, k1, 1
-#endif
-	PTR_L	k0, exception_handlers(k1)
-	jr	k0
-	.set	pop
-	END(except_vec3_generic)
-
-/*
- * General exception handler for CPUs with virtual coherency exception.
- *
- * Be careful when changing this, it has to be at most 256 (as a special
- * exception) bytes to fit into space reserved for the exception handler.
- */
-NESTED(except_vec3_r4000, 0, sp)
-	.set	push
-	.set	arch=r4000
-	.set	noat
-	mfc0	k1, CP0_CAUSE
-	li	k0, 31<<2
-	andi	k1, k1, 0x7c
-	.set	push
-	.set	noreorder
-	.set	nomacro
-	beq	k1, k0, handle_vced
-	 li	k0, 14<<2
-	beq	k1, k0, handle_vcei
-#ifdef CONFIG_64BIT
-	 dsll	k1, k1, 1
-#endif
-	.set	pop
-	PTR_L	k0, exception_handlers(k1)
-	jr	k0
-
-	/*
-	 * Big shit, we now may have two dirty primary cache lines for the same
-	 * physical address.  We can safely invalidate the line pointed to by
-	 * c0_badvaddr because after return from this exception handler the
-	 * load / store will be re-executed.
-	 */
-handle_vced:
-	MFC0	k0, CP0_BADVADDR
-	li	k1, -4					# Is this ...
-	and	k0, k1					# ... really needed?
-	mtc0	zero, CP0_TAGLO
-	cache	Index_Store_Tag_D, (k0)
-	cache	Hit_Writeback_Inv_SD, (k0)
-#ifdef CONFIG_PROC_FS
-	PTR_LA	k0, vced_count
-	lw	k1, (k0)
-	addiu	k1, 1
-	sw	k1, (k0)
-#endif
-	eret
-
-handle_vcei:
-	MFC0	k0, CP0_BADVADDR
-	cache	Hit_Writeback_Inv_SD, (k0)		# also cleans pi
-#ifdef CONFIG_PROC_FS
-	PTR_LA	k0, vcei_count
-	lw	k1, (k0)
-	addiu	k1, 1
-	sw	k1, (k0)
-#endif
-	eret
-	.set	pop
-	END(except_vec3_r4000)
-
-	__FINIT
-
-	.align	5	/* 32 byte rollback region */
-LEAF(__r4k_wait)
-	.set	push
-	.set	noreorder
-	/* start of rollback region */
-	LONG_L	t0, TI_FLAGS($28)
-	nop
-	andi	t0, _TIF_NEED_RESCHED
-	bnez	t0, 1f
-	 nop
-	nop
-	nop
-#ifdef CONFIG_CPU_MICROMIPS
-	nop
-	nop
-	nop
-	nop
-#endif
-	.set	MIPS_ISA_ARCH_LEVEL_RAW
-	wait
-	/* end of rollback region (the region size must be power of two) */
-1:
-	jr	ra
-	 nop
-	.set	pop
-	END(__r4k_wait)
-
-	.macro	BUILD_ROLLBACK_PROLOGUE handler
-	FEXPORT(rollback_\handler)
-	.set	push
-	.set	noat
-	MFC0	k0, CP0_EPC
-	PTR_LA	k1, __r4k_wait
-	ori	k0, 0x1f	/* 32 byte rollback region */
-	xori	k0, 0x1f
-	bne	k0, k1, \handler
-	MTC0	k0, CP0_EPC
-	.set pop
-	.endm
-
-	.align	5
-BUILD_ROLLBACK_PROLOGUE handle_int
-NESTED(handle_int, PT_SIZE, sp)
-	.cfi_signal_frame
-#ifdef CONFIG_TRACE_IRQFLAGS
-	/*
-	 * Check to see if the interrupted code has just disabled
-	 * interrupts and ignore this interrupt for now if so.
-	 *
-	 * local_irq_disable() disables interrupts and then calls
-	 * trace_hardirqs_off() to track the state. If an interrupt is taken
-	 * after interrupts are disabled but before the state is updated
-	 * it will appear to restore_all that it is incorrectly returning with
-	 * interrupts disabled
-	 */
-	.set	push
-	.set	noat
-	mfc0	k0, CP0_STATUS
-#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
-	and	k0, ST0_IEP
-	bnez	k0, 1f
-
-	mfc0	k0, CP0_EPC
-	.set	noreorder
-	j	k0
-	 rfe
-#else
-	and	k0, ST0_IE
-	bnez	k0, 1f
-
-	eret
-#endif
-1:
-	.set pop
-#endif
-	SAVE_ALL docfi=1
-	CLI
-	TRACE_IRQS_OFF
-
-	LONG_L	s0, TI_REGS($28)
-	LONG_S	sp, TI_REGS($28)
-
-	/*
-	 * SAVE_ALL ensures we are using a valid kernel stack for the thread.
-	 * Check if we are already using the IRQ stack.
-	 */
-	move	s1, sp # Preserve the sp
-
-	/* Get IRQ stack for this CPU */
-	ASM_CPUID_MFC0	k0, ASM_SMP_CPUID_REG
-#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32)
-	lui	k1, %hi(irq_stack)
-#else
-	lui	k1, %highest(irq_stack)
-	daddiu	k1, %higher(irq_stack)
-	dsll	k1, 16
-	daddiu	k1, %hi(irq_stack)
-	dsll	k1, 16
-#endif
-	LONG_SRL	k0, SMP_CPUID_PTRSHIFT
-	LONG_ADDU	k1, k0
-	LONG_L	t0, %lo(irq_stack)(k1)
-
-	# Check if already on IRQ stack
-	PTR_LI	t1, ~(_THREAD_SIZE-1)
-	and	t1, t1, sp
-	beq	t0, t1, 2f
-
-	/* Switch to IRQ stack */
-	li	t1, _IRQ_STACK_START
-	PTR_ADD sp, t0, t1
-
-	/* Save task's sp on IRQ stack so that unwinding can follow it */
-	LONG_S	s1, 0(sp)
-2:
-	jal	plat_irq_dispatch
-
-	/* Restore sp */
-	move	sp, s1
-
-	j	ret_from_irq
-#ifdef CONFIG_CPU_MICROMIPS
-	nop
-#endif
-	END(handle_int)
-
-	__INIT
-
-/*
- * Special interrupt vector for MIPS64 ISA & embedded MIPS processors.
- * This is a dedicated interrupt exception vector which reduces the
- * interrupt processing overhead.  The jump instruction will be replaced
- * at the initialization time.
- *
- * Be careful when changing this, it has to be at most 128 bytes
- * to fit into space reserved for the exception handler.
- */
-NESTED(except_vec4, 0, sp)
-1:	j	1b			/* Dummy, will be replaced */
-	END(except_vec4)
-
-/*
- * EJTAG debug exception handler.
- * The EJTAG debug exception entry point is 0xbfc00480, which
- * normally is in the boot PROM, so the boot PROM must do an
- * unconditional jump to this vector.
- */
-NESTED(except_vec_ejtag_debug, 0, sp)
-	j	ejtag_debug_handler
-#ifdef CONFIG_CPU_MICROMIPS
-	 nop
-#endif
-	END(except_vec_ejtag_debug)
-
-	__FINIT
-
-/*
- * Vectored interrupt handler.
- * This prototype is copied to ebase + n*IntCtl.VS and patched
- * to invoke the handler
- */
-BUILD_ROLLBACK_PROLOGUE except_vec_vi
-NESTED(except_vec_vi, 0, sp)
-	SAVE_SOME docfi=1
-	SAVE_AT docfi=1
-	.set	push
-	.set	noreorder
-	PTR_LA	v1, except_vec_vi_handler
-FEXPORT(except_vec_vi_lui)
-	lui	v0, 0		/* Patched */
-	jr	v1
-FEXPORT(except_vec_vi_ori)
-	 ori	v0, 0		/* Patched */
-	.set	pop
-	END(except_vec_vi)
-EXPORT(except_vec_vi_end)
-
-/*
- * Common Vectored Interrupt code
- * Complete the register saves and invoke the handler which is passed in $v0
- */
-NESTED(except_vec_vi_handler, 0, sp)
-	SAVE_TEMP
-	SAVE_STATIC
-	CLI
-#ifdef CONFIG_TRACE_IRQFLAGS
-	move	s0, v0
-	TRACE_IRQS_OFF
-	move	v0, s0
-#endif
-
-	LONG_L	s0, TI_REGS($28)
-	LONG_S	sp, TI_REGS($28)
-
-	/*
-	 * SAVE_ALL ensures we are using a valid kernel stack for the thread.
-	 * Check if we are already using the IRQ stack.
-	 */
-	move	s1, sp # Preserve the sp
-
-	/* Get IRQ stack for this CPU */
-	ASM_CPUID_MFC0	k0, ASM_SMP_CPUID_REG
-#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32)
-	lui	k1, %hi(irq_stack)
-#else
-	lui	k1, %highest(irq_stack)
-	daddiu	k1, %higher(irq_stack)
-	dsll	k1, 16
-	daddiu	k1, %hi(irq_stack)
-	dsll	k1, 16
-#endif
-	LONG_SRL	k0, SMP_CPUID_PTRSHIFT
-	LONG_ADDU	k1, k0
-	LONG_L	t0, %lo(irq_stack)(k1)
-
-	# Check if already on IRQ stack
-	PTR_LI	t1, ~(_THREAD_SIZE-1)
-	and	t1, t1, sp
-	beq	t0, t1, 2f
-
-	/* Switch to IRQ stack */
-	li	t1, _IRQ_STACK_START
-	PTR_ADD sp, t0, t1
-
-	/* Save task's sp on IRQ stack so that unwinding can follow it */
-	LONG_S	s1, 0(sp)
-2:
-	jalr	v0
-
-	/* Restore sp */
-	move	sp, s1
-
-	j	ret_from_irq
-	END(except_vec_vi_handler)
-
-/*
- * EJTAG debug exception handler.
- */
-NESTED(ejtag_debug_handler, PT_SIZE, sp)
-	.set	push
-	.set	noat
-	MTC0	k0, CP0_DESAVE
-	mfc0	k0, CP0_DEBUG
-
-	sll	k0, k0, 30	# Check for SDBBP.
-	bgez	k0, ejtag_return
-
-#ifdef CONFIG_SMP
-1:	PTR_LA	k0, ejtag_debug_buffer_spinlock
-	ll	k0, 0(k0)
-	bnez	k0, 1b
-	PTR_LA	k0, ejtag_debug_buffer_spinlock
-	sc	k0, 0(k0)
-	beqz	k0, 1b
-# ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC
-	sync
-# endif
-
-	PTR_LA	k0, ejtag_debug_buffer
-	LONG_S	k1, 0(k0)
-
-	ASM_CPUID_MFC0 k1, ASM_SMP_CPUID_REG
-	PTR_SRL	k1, SMP_CPUID_PTRSHIFT
-	PTR_SLL	k1, LONGLOG
-	PTR_LA	k0, ejtag_debug_buffer_per_cpu
-	PTR_ADDU k0, k1
-
-	PTR_LA	k1, ejtag_debug_buffer
-	LONG_L	k1, 0(k1)
-	LONG_S	k1, 0(k0)
-
-	PTR_LA	k0, ejtag_debug_buffer_spinlock
-	sw	zero, 0(k0)
-#else
-	PTR_LA	k0, ejtag_debug_buffer
-	LONG_S	k1, 0(k0)
-#endif
-
-	SAVE_ALL
-	move	a0, sp
-	jal	ejtag_exception_handler
-	RESTORE_ALL
-
-#ifdef CONFIG_SMP
-	ASM_CPUID_MFC0 k1, ASM_SMP_CPUID_REG
-	PTR_SRL	k1, SMP_CPUID_PTRSHIFT
-	PTR_SLL	k1, LONGLOG
-	PTR_LA	k0, ejtag_debug_buffer_per_cpu
-	PTR_ADDU k0, k1
-	LONG_L	k1, 0(k0)
-#else
-	PTR_LA	k0, ejtag_debug_buffer
-	LONG_L	k1, 0(k0)
-#endif
-
-ejtag_return:
-	back_to_back_c0_hazard
-	MFC0	k0, CP0_DESAVE
-	.set	mips32
-	deret
-	.set	pop
-	END(ejtag_debug_handler)
-
-/*
- * This buffer is reserved for the use of the EJTAG debug
- * handler.
- */
-	.data
-EXPORT(ejtag_debug_buffer)
-	.fill	LONGSIZE
-#ifdef CONFIG_SMP
-EXPORT(ejtag_debug_buffer_spinlock)
-	.fill	LONGSIZE
-EXPORT(ejtag_debug_buffer_per_cpu)
-	.fill	LONGSIZE * NR_CPUS
-#endif
-	.previous
-
-	__INIT
-
-/*
- * NMI debug exception handler for MIPS reference boards.
- * The NMI debug exception entry point is 0xbfc00000, which
- * normally is in the boot PROM, so the boot PROM must do a
- * unconditional jump to this vector.
- */
-NESTED(except_vec_nmi, 0, sp)
-	j	nmi_handler
-#ifdef CONFIG_CPU_MICROMIPS
-	 nop
-#endif
-	END(except_vec_nmi)
-
-	__FINIT
-
-NESTED(nmi_handler, PT_SIZE, sp)
-	.cfi_signal_frame
-	.set	push
-	.set	noat
-	/*
-	 * Clear ERL - restore segment mapping
-	 * Clear BEV - required for page fault exception handler to work
-	 */
-	mfc0	k0, CP0_STATUS
-	ori	k0, k0, ST0_EXL
-	li	k1, ~(ST0_BEV | ST0_ERL)
-	and	k0, k0, k1
-	mtc0	k0, CP0_STATUS
-	_ehb
-	SAVE_ALL
-	move	a0, sp
-	jal	nmi_exception_handler
-	/* nmi_exception_handler never returns */
-	.set	pop
-	END(nmi_handler)
-
-	.macro	__build_clear_none
-	.endm
-
-	.macro	__build_clear_sti
-	TRACE_IRQS_ON
-	STI
-	.endm
-
-	.macro	__build_clear_cli
-	CLI
-	TRACE_IRQS_OFF
-	.endm
-
-	.macro	__build_clear_fpe
-	CLI
-	TRACE_IRQS_OFF
-	.set	push
-	/* gas fails to assemble cfc1 for some archs (octeon).*/ \
-	.set	mips1
-	SET_HARDFLOAT
-	cfc1	a1, fcr31
-	.set	pop
-	.endm
-
-	.macro	__build_clear_msa_fpe
-	CLI
-	TRACE_IRQS_OFF
-	_cfcmsa	a1, MSA_CSR
-	.endm
-
-	.macro	__build_clear_ade
-	MFC0	t0, CP0_BADVADDR
-	PTR_S	t0, PT_BVADDR(sp)
-	KMODE
-	.endm
-
-	.macro	__BUILD_silent exception
-	.endm
-
-	/* Gas tries to parse the PRINT argument as a string containing
-	   string escapes and emits bogus warnings if it believes to
-	   recognize an unknown escape code.  So make the arguments
-	   start with an n and gas will believe \n is ok ...  */
-	.macro	__BUILD_verbose nexception
-	LONG_L	a1, PT_EPC(sp)
-#ifdef CONFIG_32BIT
-	PRINT("Got \nexception at %08lx\012")
-#endif
-#ifdef CONFIG_64BIT
-	PRINT("Got \nexception at %016lx\012")
-#endif
-	.endm
-
-	.macro	__BUILD_count exception
-	LONG_L	t0,exception_count_\exception
-	LONG_ADDIU	t0, 1
-	LONG_S	t0,exception_count_\exception
-	.comm	exception_count\exception, 8, 8
-	.endm
-
-	.macro	__BUILD_HANDLER exception handler clear verbose ext
-	.align	5
-	NESTED(handle_\exception, PT_SIZE, sp)
-	.cfi_signal_frame
-	.set	noat
-	SAVE_ALL
-	FEXPORT(handle_\exception\ext)
-	__build_clear_\clear
-	.set	at
-	__BUILD_\verbose \exception
-	move	a0, sp
-	jal	do_\handler
-	j	ret_from_exception
-	END(handle_\exception)
-	.endm
-
-	.macro	BUILD_HANDLER exception handler clear verbose
-	__BUILD_HANDLER \exception \handler \clear \verbose _int
-	.endm
-
-	BUILD_HANDLER adel ade ade silent		/* #4  */
-	BUILD_HANDLER ades ade ade silent		/* #5  */
-	BUILD_HANDLER ibe be cli silent			/* #6  */
-	BUILD_HANDLER dbe be cli silent			/* #7  */
-	BUILD_HANDLER bp bp sti silent			/* #9  */
-	BUILD_HANDLER ri ri sti silent			/* #10 */
-	BUILD_HANDLER cpu cpu sti silent		/* #11 */
-	BUILD_HANDLER ov ov sti silent			/* #12 */
-	BUILD_HANDLER tr tr sti silent			/* #13 */
-	BUILD_HANDLER msa_fpe msa_fpe msa_fpe silent	/* #14 */
-#ifdef CONFIG_MIPS_FP_SUPPORT
-	BUILD_HANDLER fpe fpe fpe silent		/* #15 */
-#endif
-	BUILD_HANDLER ftlb ftlb none silent		/* #16 */
-	BUILD_HANDLER msa msa sti silent		/* #21 */
-	BUILD_HANDLER mdmx mdmx sti silent		/* #22 */
-#ifdef	CONFIG_HARDWARE_WATCHPOINTS
-	/*
-	 * For watch, interrupts will be enabled after the watch
-	 * registers are read.
-	 */
-	BUILD_HANDLER watch watch cli silent		/* #23 */
-#else
-	BUILD_HANDLER watch watch sti verbose		/* #23 */
-#endif
-	BUILD_HANDLER mcheck mcheck cli verbose		/* #24 */
-	BUILD_HANDLER mt mt sti silent			/* #25 */
-	BUILD_HANDLER dsp dsp sti silent		/* #26 */
-	BUILD_HANDLER reserved reserved sti verbose	/* others */
-
-	.align	5
-	LEAF(handle_ri_rdhwr_tlbp)
-	.set	push
-	.set	noat
-	.set	noreorder
-	/* check if TLB contains a entry for EPC */
-	MFC0	k1, CP0_ENTRYHI
-	andi	k1, MIPS_ENTRYHI_ASID | MIPS_ENTRYHI_ASIDX
-	MFC0	k0, CP0_EPC
-	PTR_SRL	k0, _PAGE_SHIFT + 1
-	PTR_SLL	k0, _PAGE_SHIFT + 1
-	or	k1, k0
-	MTC0	k1, CP0_ENTRYHI
-	mtc0_tlbw_hazard
-	tlbp
-	tlb_probe_hazard
-	mfc0	k1, CP0_INDEX
-	.set	pop
-	bltz	k1, handle_ri	/* slow path */
-	/* fall thru */
-	END(handle_ri_rdhwr_tlbp)
-
-	LEAF(handle_ri_rdhwr)
-	.set	push
-	.set	noat
-	.set	noreorder
-	/* MIPS32:    0x7c03e83b: rdhwr v1,$29 */
-	/* microMIPS: 0x007d6b3c: rdhwr v1,$29 */
-	MFC0	k1, CP0_EPC
-#if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_MIPS32_R2) || defined(CONFIG_CPU_MIPS64_R2)
-	and	k0, k1, 1
-	beqz	k0, 1f
-	 xor	k1, k0
-	lhu	k0, (k1)
-	lhu	k1, 2(k1)
-	ins	k1, k0, 16, 16
-	lui	k0, 0x007d
-	b	docheck
-	 ori	k0, 0x6b3c
-1:
-	lui	k0, 0x7c03
-	lw	k1, (k1)
-	ori	k0, 0xe83b
-#else
-	andi	k0, k1, 1
-	bnez	k0, handle_ri
-	 lui	k0, 0x7c03
-	lw	k1, (k1)
-	ori	k0, 0xe83b
-#endif
-	.set	reorder
-docheck:
-	bne	k0, k1, handle_ri	/* if not ours */
-
-isrdhwr:
-	/* The insn is rdhwr.  No need to check CAUSE.BD here. */
-	get_saved_sp	/* k1 := current_thread_info */
-	.set	noreorder
-	MFC0	k0, CP0_EPC
-#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
-	ori	k1, _THREAD_MASK
-	xori	k1, _THREAD_MASK
-	LONG_L	v1, TI_TP_VALUE(k1)
-	LONG_ADDIU	k0, 4
-	jr	k0
-	 rfe
-#else
-#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
-	LONG_ADDIU	k0, 4		/* stall on $k0 */
-#else
-	.set	at=v1
-	LONG_ADDIU	k0, 4
-	.set	noat
-#endif
-	MTC0	k0, CP0_EPC
-	/* I hope three instructions between MTC0 and ERET are enough... */
-	ori	k1, _THREAD_MASK
-	xori	k1, _THREAD_MASK
-	LONG_L	v1, TI_TP_VALUE(k1)
-	.set	push
-	.set	arch=r4000
-	eret
-	.set	pop
-#endif
-	.set	pop
-	END(handle_ri_rdhwr)
-
-#ifdef CONFIG_64BIT
-/* A temporary overflow handler used by check_daddi(). */
-
-	__INIT
-
-	BUILD_HANDLER  daddi_ov daddi_ov none silent	/* #12 */
-#endif
diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
deleted file mode 100644
index 351d40fe08598e7c9b473ddaf085c7e1742161bd..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/head.S
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1994, 1995 Waldorf Electronics
- * Written by Ralf Baechle and Andreas Busse
- * Copyright (C) 1994 - 99, 2003, 06 Ralf Baechle
- * Copyright (C) 1996 Paul M. Antoine
- * Modified for DECStation and hence R3000 support by Paul M. Antoine
- * Further modifications by David S. Miller and Harald Koerfgen
- * Copyright (C) 1999 Silicon Graphics, Inc.
- * Kevin Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 2000 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/init.h>
-#include <linux/threads.h>
-
-#include <asm/addrspace.h>
-#include <asm/asm.h>
-#include <asm/asmmacro.h>
-#include <asm/irqflags.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-#include <asm/stackframe.h>
-
-#include <kernel-entry-init.h>
-
-	/*
-	 * For the moment disable interrupts, mark the kernel mode and
-	 * set ST0_KX so that the CPU does not spit fire when using
-	 * 64-bit addresses.  A full initialization of the CPU's status
-	 * register is done later in per_cpu_trap_init().
-	 */
-	.macro	setup_c0_status set clr
-	.set	push
-	mfc0	t0, CP0_STATUS
-	or	t0, ST0_CU0|\set|0x1f|\clr
-	xor	t0, 0x1f|\clr
-	mtc0	t0, CP0_STATUS
-	.set	noreorder
-	sll	zero,3				# ehb
-	.set	pop
-	.endm
-
-	.macro	setup_c0_status_pri
-#ifdef CONFIG_64BIT
-	setup_c0_status ST0_KX 0
-#else
-	setup_c0_status 0 0
-#endif
-	.endm
-
-	.macro	setup_c0_status_sec
-#ifdef CONFIG_64BIT
-	setup_c0_status ST0_KX ST0_BEV
-#else
-	setup_c0_status 0 ST0_BEV
-#endif
-	.endm
-
-#ifndef CONFIG_NO_EXCEPT_FILL
-	/*
-	 * Reserved space for exception handlers.
-	 * Necessary for machines which link their kernels at KSEG0.
-	 */
-	.fill	0x400
-#endif
-
-EXPORT(_stext)
-
-#ifdef CONFIG_BOOT_RAW
-	/*
-	 * Give us a fighting chance of running if execution beings at the
-	 * kernel load address.	 This is needed because this platform does
-	 * not have a ELF loader yet.
-	 */
-FEXPORT(__kernel_entry)
-	j	kernel_entry
-#endif /* CONFIG_BOOT_RAW */
-
-	__REF
-
-NESTED(kernel_entry, 16, sp)			# kernel entry point
-
-	kernel_entry_setup			# cpu specific setup
-
-	setup_c0_status_pri
-
-	/* We might not get launched at the address the kernel is linked to,
-	   so we jump there.  */
-	PTR_LA	t0, 0f
-	jr	t0
-0:
-
-#ifdef CONFIG_USE_OF
-#if defined(CONFIG_MIPS_RAW_APPENDED_DTB) || \
-	defined(CONFIG_MIPS_ELF_APPENDED_DTB)
-
-	PTR_LA		t2, __appended_dtb
-
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	li		t1, 0xd00dfeed
-#else  /* !CONFIG_CPU_BIG_ENDIAN */
-	li		t1, 0xedfe0dd0
-#endif /* !CONFIG_CPU_BIG_ENDIAN */
-	lw		t0, (t2)
-	beq		t0, t1, dtb_found
-#endif /* CONFIG_MIPS_RAW_APPENDED_DTB || CONFIG_MIPS_ELF_APPENDED_DTB */
-	li		t1, -2
-	move		t2, a1
-	beq		a0, t1, dtb_found
-
-	li		t2, 0
-dtb_found:
-#endif /* CONFIG_USE_OF */
-	PTR_LA		t0, __bss_start		# clear .bss
-	LONG_S		zero, (t0)
-	PTR_LA		t1, __bss_stop - LONGSIZE
-1:
-	PTR_ADDIU	t0, LONGSIZE
-	LONG_S		zero, (t0)
-	bne		t0, t1, 1b
-
-	LONG_S		a0, fw_arg0		# firmware arguments
-	LONG_S		a1, fw_arg1
-	LONG_S		a2, fw_arg2
-	LONG_S		a3, fw_arg3
-
-#ifdef CONFIG_USE_OF
-	LONG_S		t2, fw_passed_dtb
-#endif
-
-	MTC0		zero, CP0_CONTEXT	# clear context register
-	PTR_LA		$28, init_thread_union
-	/* Set the SP after an empty pt_regs.  */
-	PTR_LI		sp, _THREAD_SIZE - 32 - PT_SIZE
-	PTR_ADDU	sp, $28
-	back_to_back_c0_hazard
-	set_saved_sp	sp, t0, t1
-	PTR_SUBU	sp, 4 * SZREG		# init stack pointer
-
-#ifdef CONFIG_RELOCATABLE
-	/* Copy kernel and apply the relocations */
-	jal		relocate_kernel
-
-	/* Repoint the sp into the new kernel image */
-	PTR_LI		sp, _THREAD_SIZE - 32 - PT_SIZE
-	PTR_ADDU	sp, $28
-	set_saved_sp	sp, t0, t1
-	PTR_SUBU	sp, 4 * SZREG		# init stack pointer
-
-	/*
-	 * relocate_kernel returns the entry point either
-	 * in the relocated kernel or the original if for
-	 * some reason relocation failed - jump there now
-	 * with instruction hazard barrier because of the
-	 * newly sync'd icache.
-	 */
-	jr.hb		v0
-#else  /* !CONFIG_RELOCATABLE */
-	j		start_kernel
-#endif /* !CONFIG_RELOCATABLE */
-	END(kernel_entry)
-
-#ifdef CONFIG_SMP
-/*
- * SMP slave cpus entry point.	Board specific code for bootstrap calls this
- * function after setting up the stack and gp registers.
- */
-NESTED(smp_bootstrap, 16, sp)
-	smp_slave_setup
-	setup_c0_status_sec
-	j	start_secondary
-	END(smp_bootstrap)
-#endif /* CONFIG_SMP */
diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S
deleted file mode 100644
index cff52b283e03843519201ca8fe8754e0899c0c3c..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/mcount.S
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * MIPS specific _mcount support
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive for
- * more details.
- *
- * Copyright (C) 2009 Lemote Inc. & DSLab, Lanzhou University, China
- * Copyright (C) 2010 DSLab, Lanzhou University, China
- * Author: Wu Zhangjin <wuzhangjin@gmail.com>
- */
-
-#include <asm/export.h>
-#include <asm/regdef.h>
-#include <asm/stackframe.h>
-#include <asm/ftrace.h>
-
-	.text
-	.set noreorder
-	.set noat
-
-	.macro MCOUNT_SAVE_REGS
-	PTR_SUBU	sp, PT_SIZE
-	PTR_S	ra, PT_R31(sp)
-	PTR_S	AT, PT_R1(sp)
-	PTR_S	a0, PT_R4(sp)
-	PTR_S	a1, PT_R5(sp)
-	PTR_S	a2, PT_R6(sp)
-	PTR_S	a3, PT_R7(sp)
-#ifdef CONFIG_64BIT
-	PTR_S	a4, PT_R8(sp)
-	PTR_S	a5, PT_R9(sp)
-	PTR_S	a6, PT_R10(sp)
-	PTR_S	a7, PT_R11(sp)
-#endif
-	.endm
-
-	.macro MCOUNT_RESTORE_REGS
-	PTR_L	ra, PT_R31(sp)
-	PTR_L	AT, PT_R1(sp)
-	PTR_L	a0, PT_R4(sp)
-	PTR_L	a1, PT_R5(sp)
-	PTR_L	a2, PT_R6(sp)
-	PTR_L	a3, PT_R7(sp)
-#ifdef CONFIG_64BIT
-	PTR_L	a4, PT_R8(sp)
-	PTR_L	a5, PT_R9(sp)
-	PTR_L	a6, PT_R10(sp)
-	PTR_L	a7, PT_R11(sp)
-#endif
-	PTR_ADDIU	sp, PT_SIZE
-	.endm
-
-	.macro RETURN_BACK
-	jr ra
-	 move ra, AT
-	.endm
-
-/*
- * The -mmcount-ra-address option of gcc 4.5 uses register $12 to pass
- * the location of the parent's return address.
- */
-#define MCOUNT_RA_ADDRESS_REG	$12
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-NESTED(ftrace_caller, PT_SIZE, ra)
-	.globl _mcount
-_mcount:
-EXPORT_SYMBOL(_mcount)
-	b	ftrace_stub
-#ifdef CONFIG_32BIT
-	 addiu sp,sp,8
-#else
-	 nop
-#endif
-
-	/* When tracing is activated, it calls ftrace_caller+8 (aka here) */
-	MCOUNT_SAVE_REGS
-#ifdef KBUILD_MCOUNT_RA_ADDRESS
-	PTR_S	MCOUNT_RA_ADDRESS_REG, PT_R12(sp)
-#endif
-
-	PTR_SUBU a0, ra, 8	/* arg1: self address */
-	PTR_LA   t1, _stext
-	sltu     t2, a0, t1	/* t2 = (a0 < _stext) */
-	PTR_LA   t1, _etext
-	sltu     t3, t1, a0	/* t3 = (a0 > _etext) */
-	or       t1, t2, t3
-	beqz     t1, ftrace_call
-	 nop
-#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT)
-	PTR_SUBU a0, a0, 16	/* arg1: adjust to module's recorded callsite */
-#else
-	PTR_SUBU a0, a0, 12
-#endif
-
-	.globl ftrace_call
-ftrace_call:
-	nop	/* a placeholder for the call to a real tracing function */
-	 move	a1, AT		/* arg2: parent's return address */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	.globl ftrace_graph_call
-ftrace_graph_call:
-	nop
-	 nop
-#endif
-
-	MCOUNT_RESTORE_REGS
-	.globl ftrace_stub
-ftrace_stub:
-	RETURN_BACK
-	END(ftrace_caller)
-
-#else	/* ! CONFIG_DYNAMIC_FTRACE */
-
-NESTED(_mcount, PT_SIZE, ra)
-EXPORT_SYMBOL(_mcount)
-	PTR_LA	t1, ftrace_stub
-	PTR_L	t2, ftrace_trace_function /* Prepare t2 for (1) */
-	beq	t1, t2, fgraph_trace
-	 nop
-
-	MCOUNT_SAVE_REGS
-
-	move	a0, ra		/* arg1: self return address */
-	jalr	t2		/* (1) call *ftrace_trace_function */
-	 move	a1, AT		/* arg2: parent's return address */
-
-	MCOUNT_RESTORE_REGS
-
-fgraph_trace:
-#ifdef	CONFIG_FUNCTION_GRAPH_TRACER
-	PTR_LA	t1, ftrace_stub
-	PTR_L	t3, ftrace_graph_return
-	bne	t1, t3, ftrace_graph_caller
-	 nop
-	PTR_LA	t1, ftrace_graph_entry_stub
-	PTR_L	t3, ftrace_graph_entry
-	bne	t1, t3, ftrace_graph_caller
-	 nop
-#endif
-
-#ifdef CONFIG_32BIT
-	addiu sp, sp, 8
-#endif
-
-	.globl ftrace_stub
-ftrace_stub:
-	RETURN_BACK
-	END(_mcount)
-
-#endif	/* ! CONFIG_DYNAMIC_FTRACE */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-
-NESTED(ftrace_graph_caller, PT_SIZE, ra)
-#ifndef CONFIG_DYNAMIC_FTRACE
-	MCOUNT_SAVE_REGS
-#endif
-
-	/* arg1: Get the location of the parent's return address */
-#ifdef KBUILD_MCOUNT_RA_ADDRESS
-#ifdef CONFIG_DYNAMIC_FTRACE
-	PTR_L	a0, PT_R12(sp)
-#else
-	move	a0, MCOUNT_RA_ADDRESS_REG
-#endif
-	bnez	a0, 1f	/* non-leaf func: stored in MCOUNT_RA_ADDRESS_REG */
-	 nop
-#endif
-	PTR_LA	a0, PT_R1(sp)	/* leaf func: the location in current stack */
-1:
-
-	/* arg2: Get self return address */
-#ifdef CONFIG_DYNAMIC_FTRACE
-	PTR_L	a1, PT_R31(sp)
-#else
-	move	a1, ra
-#endif
-
-	/* arg3: Get frame pointer of current stack */
-#ifdef CONFIG_64BIT
-	PTR_LA	a2, PT_SIZE(sp)
-#else
-	PTR_LA	a2, (PT_SIZE+8)(sp)
-#endif
-
-	jal	prepare_ftrace_return
-	 nop
-	MCOUNT_RESTORE_REGS
-#ifndef CONFIG_DYNAMIC_FTRACE
-#ifdef CONFIG_32BIT
-	addiu sp, sp, 8
-#endif
-#endif
-	RETURN_BACK
-	END(ftrace_graph_caller)
-
-	.align	2
-	.globl	return_to_handler
-return_to_handler:
-	PTR_SUBU	sp, PT_SIZE
-	PTR_S	v0, PT_R2(sp)
-
-	jal	ftrace_return_to_handler
-	 PTR_S	v1, PT_R3(sp)
-
-	/* restore the real parent address: v0 -> ra */
-	move	ra, v0
-
-	PTR_L	v0, PT_R2(sp)
-	PTR_L	v1, PT_R3(sp)
-	jr	ra
-	 PTR_ADDIU	sp, PT_SIZE
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-	.set at
-	.set reorder
diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S
deleted file mode 100644
index 896080b445c2db7e4c02adb85ad59a92d2809b2f..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/octeon_switch.S
+++ /dev/null
@@ -1,554 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1994, 1995, 1996, 1998, 1999, 2002, 2003 Ralf Baechle
- * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
- * Copyright (C) 1994, 1995, 1996, by Andreas Busse
- * Copyright (C) 1999 Silicon Graphics, Inc.
- * Copyright (C) 2000 MIPS Technologies, Inc.
- *    written by Carsten Langgaard, carstenl@mips.com
- */
-#include <asm/asm.h>
-#include <asm/export.h>
-#include <asm/asm-offsets.h>
-#include <asm/mipsregs.h>
-#include <asm/regdef.h>
-#include <asm/stackframe.h>
-
-/*
- * task_struct *resume(task_struct *prev, task_struct *next,
- *		       struct thread_info *next_ti)
- */
-	.align	7
-	LEAF(resume)
-	.set arch=octeon
-	mfc0	t1, CP0_STATUS
-	LONG_S	t1, THREAD_STATUS(a0)
-	cpu_save_nonscratch a0
-	LONG_S	ra, THREAD_REG31(a0)
-
-#if CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0
-	/* Check if we need to store CVMSEG state */
-	dmfc0	t0, $11,7	/* CvmMemCtl */
-	bbit0	t0, 6, 3f	/* Is user access enabled? */
-
-	/* Store the CVMSEG state */
-	/* Extract the size of CVMSEG */
-	andi	t0, 0x3f
-	/* Multiply * (cache line size/sizeof(long)/2) */
-	sll	t0, 7-LONGLOG-1
-	li	t1, -32768	/* Base address of CVMSEG */
-	LONG_ADDI t2, a0, THREAD_CVMSEG /* Where to store CVMSEG to */
-	synciobdma
-2:
-	.set noreorder
-	LONG_L	t8, 0(t1)	/* Load from CVMSEG */
-	subu	t0, 1		/* Decrement loop var */
-	LONG_L	t9, LONGSIZE(t1)/* Load from CVMSEG */
-	LONG_ADDU t1, LONGSIZE*2 /* Increment loc in CVMSEG */
-	LONG_S	t8, 0(t2)	/* Store CVMSEG to thread storage */
-	LONG_ADDU t2, LONGSIZE*2 /* Increment loc in thread storage */
-	bnez	t0, 2b		/* Loop until we've copied it all */
-	 LONG_S t9, -LONGSIZE(t2)/* Store CVMSEG to thread storage */
-	.set reorder
-
-	/* Disable access to CVMSEG */
-	dmfc0	t0, $11,7	/* CvmMemCtl */
-	xori	t0, t0, 0x40	/* Bit 6 is CVMSEG user enable */
-	dmtc0	t0, $11,7	/* CvmMemCtl */
-#endif
-3:
-
-#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP)
-	PTR_LA	t8, __stack_chk_guard
-	LONG_L	t9, TASK_STACK_CANARY(a1)
-	LONG_S	t9, 0(t8)
-#endif
-
-	/*
-	 * The order of restoring the registers takes care of the race
-	 * updating $28, $29 and kernelsp without disabling ints.
-	 */
-	move	$28, a2
-	cpu_restore_nonscratch a1
-
-	PTR_ADDU	t0, $28, _THREAD_SIZE - 32
-	set_saved_sp	t0, t1, t2
-
-	mfc0	t1, CP0_STATUS		/* Do we really need this? */
-	li	a3, 0xff01
-	and	t1, a3
-	LONG_L	a2, THREAD_STATUS(a1)
-	nor	a3, $0, a3
-	and	a2, a3
-	or	a2, t1
-	mtc0	a2, CP0_STATUS
-	move	v0, a0
-	jr	ra
-	END(resume)
-
-/*
- * void octeon_cop2_save(struct octeon_cop2_state *a0)
- */
-	.align	7
-	.set push
-	.set noreorder
-	LEAF(octeon_cop2_save)
-
-	dmfc0	t9, $9,7	/* CvmCtl register. */
-
-	/* Save the COP2 CRC state */
-	dmfc2	t0, 0x0201
-	dmfc2	t1, 0x0202
-	dmfc2	t2, 0x0200
-	sd	t0, OCTEON_CP2_CRC_IV(a0)
-	sd	t1, OCTEON_CP2_CRC_LENGTH(a0)
-	/* Skip next instructions if CvmCtl[NODFA_CP2] set */
-	bbit1	t9, 28, 1f
-	 sd	t2, OCTEON_CP2_CRC_POLY(a0)
-
-	/* Save the LLM state */
-	dmfc2	t0, 0x0402
-	dmfc2	t1, 0x040A
-	sd	t0, OCTEON_CP2_LLM_DAT(a0)
-
-1:	bbit1	t9, 26, 3f	/* done if CvmCtl[NOCRYPTO] set */
-	 sd	t1, OCTEON_CP2_LLM_DAT+8(a0)
-
-	/* Save the COP2 crypto state */
-	/* this part is mostly common to both pass 1 and later revisions */
-	dmfc2	t0, 0x0084
-	dmfc2	t1, 0x0080
-	dmfc2	t2, 0x0081
-	dmfc2	t3, 0x0082
-	sd	t0, OCTEON_CP2_3DES_IV(a0)
-	dmfc2	t0, 0x0088
-	sd	t1, OCTEON_CP2_3DES_KEY(a0)
-	dmfc2	t1, 0x0111			/* only necessary for pass 1 */
-	sd	t2, OCTEON_CP2_3DES_KEY+8(a0)
-	dmfc2	t2, 0x0102
-	sd	t3, OCTEON_CP2_3DES_KEY+16(a0)
-	dmfc2	t3, 0x0103
-	sd	t0, OCTEON_CP2_3DES_RESULT(a0)
-	dmfc2	t0, 0x0104
-	sd	t1, OCTEON_CP2_AES_INP0(a0)	/* only necessary for pass 1 */
-	dmfc2	t1, 0x0105
-	sd	t2, OCTEON_CP2_AES_IV(a0)
-	dmfc2	t2, 0x0106
-	sd	t3, OCTEON_CP2_AES_IV+8(a0)
-	dmfc2	t3, 0x0107
-	sd	t0, OCTEON_CP2_AES_KEY(a0)
-	dmfc2	t0, 0x0110
-	sd	t1, OCTEON_CP2_AES_KEY+8(a0)
-	dmfc2	t1, 0x0100
-	sd	t2, OCTEON_CP2_AES_KEY+16(a0)
-	dmfc2	t2, 0x0101
-	sd	t3, OCTEON_CP2_AES_KEY+24(a0)
-	mfc0	v0, $15,0	/* Get the processor ID register */
-	sd	t0, OCTEON_CP2_AES_KEYLEN(a0)
-	li	v1, 0x000d0000	/* This is the processor ID of Octeon Pass1 */
-	sd	t1, OCTEON_CP2_AES_RESULT(a0)
-	/* Skip to the Pass1 version of the remainder of the COP2 state */
-	beq	v0, v1, 2f
-	 sd	t2, OCTEON_CP2_AES_RESULT+8(a0)
-
-	/* the non-pass1 state when !CvmCtl[NOCRYPTO] */
-	dmfc2	t1, 0x0240
-	dmfc2	t2, 0x0241
-	ori	v1, v1, 0x9500 /* lowest OCTEON III PrId*/
-	dmfc2	t3, 0x0242
-	subu	v1, v0, v1 /* prid - lowest OCTEON III PrId */
-	dmfc2	t0, 0x0243
-	sd	t1, OCTEON_CP2_HSH_DATW(a0)
-	dmfc2	t1, 0x0244
-	sd	t2, OCTEON_CP2_HSH_DATW+8(a0)
-	dmfc2	t2, 0x0245
-	sd	t3, OCTEON_CP2_HSH_DATW+16(a0)
-	dmfc2	t3, 0x0246
-	sd	t0, OCTEON_CP2_HSH_DATW+24(a0)
-	dmfc2	t0, 0x0247
-	sd	t1, OCTEON_CP2_HSH_DATW+32(a0)
-	dmfc2	t1, 0x0248
-	sd	t2, OCTEON_CP2_HSH_DATW+40(a0)
-	dmfc2	t2, 0x0249
-	sd	t3, OCTEON_CP2_HSH_DATW+48(a0)
-	dmfc2	t3, 0x024A
-	sd	t0, OCTEON_CP2_HSH_DATW+56(a0)
-	dmfc2	t0, 0x024B
-	sd	t1, OCTEON_CP2_HSH_DATW+64(a0)
-	dmfc2	t1, 0x024C
-	sd	t2, OCTEON_CP2_HSH_DATW+72(a0)
-	dmfc2	t2, 0x024D
-	sd	t3, OCTEON_CP2_HSH_DATW+80(a0)
-	dmfc2	t3, 0x024E
-	sd	t0, OCTEON_CP2_HSH_DATW+88(a0)
-	dmfc2	t0, 0x0250
-	sd	t1, OCTEON_CP2_HSH_DATW+96(a0)
-	dmfc2	t1, 0x0251
-	sd	t2, OCTEON_CP2_HSH_DATW+104(a0)
-	dmfc2	t2, 0x0252
-	sd	t3, OCTEON_CP2_HSH_DATW+112(a0)
-	dmfc2	t3, 0x0253
-	sd	t0, OCTEON_CP2_HSH_IVW(a0)
-	dmfc2	t0, 0x0254
-	sd	t1, OCTEON_CP2_HSH_IVW+8(a0)
-	dmfc2	t1, 0x0255
-	sd	t2, OCTEON_CP2_HSH_IVW+16(a0)
-	dmfc2	t2, 0x0256
-	sd	t3, OCTEON_CP2_HSH_IVW+24(a0)
-	dmfc2	t3, 0x0257
-	sd	t0, OCTEON_CP2_HSH_IVW+32(a0)
-	dmfc2	t0, 0x0258
-	sd	t1, OCTEON_CP2_HSH_IVW+40(a0)
-	dmfc2	t1, 0x0259
-	sd	t2, OCTEON_CP2_HSH_IVW+48(a0)
-	dmfc2	t2, 0x025E
-	sd	t3, OCTEON_CP2_HSH_IVW+56(a0)
-	dmfc2	t3, 0x025A
-	sd	t0, OCTEON_CP2_GFM_MULT(a0)
-	dmfc2	t0, 0x025B
-	sd	t1, OCTEON_CP2_GFM_MULT+8(a0)
-	sd	t2, OCTEON_CP2_GFM_POLY(a0)
-	sd	t3, OCTEON_CP2_GFM_RESULT(a0)
-	bltz	v1, 4f
-	 sd	t0, OCTEON_CP2_GFM_RESULT+8(a0)
-	/* OCTEON III things*/
-	dmfc2	t0, 0x024F
-	dmfc2	t1, 0x0050
-	sd	t0, OCTEON_CP2_SHA3(a0)
-	sd	t1, OCTEON_CP2_SHA3+8(a0)
-4:
-	jr	ra
-	 nop
-
-2:	/* pass 1 special stuff when !CvmCtl[NOCRYPTO] */
-	dmfc2	t3, 0x0040
-	dmfc2	t0, 0x0041
-	dmfc2	t1, 0x0042
-	dmfc2	t2, 0x0043
-	sd	t3, OCTEON_CP2_HSH_DATW(a0)
-	dmfc2	t3, 0x0044
-	sd	t0, OCTEON_CP2_HSH_DATW+8(a0)
-	dmfc2	t0, 0x0045
-	sd	t1, OCTEON_CP2_HSH_DATW+16(a0)
-	dmfc2	t1, 0x0046
-	sd	t2, OCTEON_CP2_HSH_DATW+24(a0)
-	dmfc2	t2, 0x0048
-	sd	t3, OCTEON_CP2_HSH_DATW+32(a0)
-	dmfc2	t3, 0x0049
-	sd	t0, OCTEON_CP2_HSH_DATW+40(a0)
-	dmfc2	t0, 0x004A
-	sd	t1, OCTEON_CP2_HSH_DATW+48(a0)
-	sd	t2, OCTEON_CP2_HSH_IVW(a0)
-	sd	t3, OCTEON_CP2_HSH_IVW+8(a0)
-	sd	t0, OCTEON_CP2_HSH_IVW+16(a0)
-
-3:	/* pass 1 or CvmCtl[NOCRYPTO] set */
-	jr	ra
-	 nop
-	END(octeon_cop2_save)
-	.set pop
-
-/*
- * void octeon_cop2_restore(struct octeon_cop2_state *a0)
- */
-	.align	7
-	.set push
-	.set noreorder
-	LEAF(octeon_cop2_restore)
-	/* First cache line was prefetched before the call */
-	pref	4,  128(a0)
-	dmfc0	t9, $9,7	/* CvmCtl register. */
-
-	pref	4,  256(a0)
-	ld	t0, OCTEON_CP2_CRC_IV(a0)
-	pref	4,  384(a0)
-	ld	t1, OCTEON_CP2_CRC_LENGTH(a0)
-	ld	t2, OCTEON_CP2_CRC_POLY(a0)
-
-	/* Restore the COP2 CRC state */
-	dmtc2	t0, 0x0201
-	dmtc2	t1, 0x1202
-	bbit1	t9, 28, 2f	/* Skip LLM if CvmCtl[NODFA_CP2] is set */
-	 dmtc2	t2, 0x4200
-
-	/* Restore the LLM state */
-	ld	t0, OCTEON_CP2_LLM_DAT(a0)
-	ld	t1, OCTEON_CP2_LLM_DAT+8(a0)
-	dmtc2	t0, 0x0402
-	dmtc2	t1, 0x040A
-
-2:
-	bbit1	t9, 26, done_restore	/* done if CvmCtl[NOCRYPTO] set */
-	 nop
-
-	/* Restore the COP2 crypto state common to pass 1 and pass 2 */
-	ld	t0, OCTEON_CP2_3DES_IV(a0)
-	ld	t1, OCTEON_CP2_3DES_KEY(a0)
-	ld	t2, OCTEON_CP2_3DES_KEY+8(a0)
-	dmtc2	t0, 0x0084
-	ld	t0, OCTEON_CP2_3DES_KEY+16(a0)
-	dmtc2	t1, 0x0080
-	ld	t1, OCTEON_CP2_3DES_RESULT(a0)
-	dmtc2	t2, 0x0081
-	ld	t2, OCTEON_CP2_AES_INP0(a0) /* only really needed for pass 1 */
-	dmtc2	t0, 0x0082
-	ld	t0, OCTEON_CP2_AES_IV(a0)
-	dmtc2	t1, 0x0098
-	ld	t1, OCTEON_CP2_AES_IV+8(a0)
-	dmtc2	t2, 0x010A		    /* only really needed for pass 1 */
-	ld	t2, OCTEON_CP2_AES_KEY(a0)
-	dmtc2	t0, 0x0102
-	ld	t0, OCTEON_CP2_AES_KEY+8(a0)
-	dmtc2	t1, 0x0103
-	ld	t1, OCTEON_CP2_AES_KEY+16(a0)
-	dmtc2	t2, 0x0104
-	ld	t2, OCTEON_CP2_AES_KEY+24(a0)
-	dmtc2	t0, 0x0105
-	ld	t0, OCTEON_CP2_AES_KEYLEN(a0)
-	dmtc2	t1, 0x0106
-	ld	t1, OCTEON_CP2_AES_RESULT(a0)
-	dmtc2	t2, 0x0107
-	ld	t2, OCTEON_CP2_AES_RESULT+8(a0)
-	mfc0	t3, $15,0	/* Get the processor ID register */
-	dmtc2	t0, 0x0110
-	li	v0, 0x000d0000	/* This is the processor ID of Octeon Pass1 */
-	dmtc2	t1, 0x0100
-	bne	v0, t3, 3f	/* Skip the next stuff for non-pass1 */
-	 dmtc2	t2, 0x0101
-
-	/* this code is specific for pass 1 */
-	ld	t0, OCTEON_CP2_HSH_DATW(a0)
-	ld	t1, OCTEON_CP2_HSH_DATW+8(a0)
-	ld	t2, OCTEON_CP2_HSH_DATW+16(a0)
-	dmtc2	t0, 0x0040
-	ld	t0, OCTEON_CP2_HSH_DATW+24(a0)
-	dmtc2	t1, 0x0041
-	ld	t1, OCTEON_CP2_HSH_DATW+32(a0)
-	dmtc2	t2, 0x0042
-	ld	t2, OCTEON_CP2_HSH_DATW+40(a0)
-	dmtc2	t0, 0x0043
-	ld	t0, OCTEON_CP2_HSH_DATW+48(a0)
-	dmtc2	t1, 0x0044
-	ld	t1, OCTEON_CP2_HSH_IVW(a0)
-	dmtc2	t2, 0x0045
-	ld	t2, OCTEON_CP2_HSH_IVW+8(a0)
-	dmtc2	t0, 0x0046
-	ld	t0, OCTEON_CP2_HSH_IVW+16(a0)
-	dmtc2	t1, 0x0048
-	dmtc2	t2, 0x0049
-	b done_restore	 /* unconditional branch */
-	 dmtc2	t0, 0x004A
-
-3:	/* this is post-pass1 code */
-	ld	t2, OCTEON_CP2_HSH_DATW(a0)
-	ori	v0, v0, 0x9500 /* lowest OCTEON III PrId*/
-	ld	t0, OCTEON_CP2_HSH_DATW+8(a0)
-	ld	t1, OCTEON_CP2_HSH_DATW+16(a0)
-	dmtc2	t2, 0x0240
-	ld	t2, OCTEON_CP2_HSH_DATW+24(a0)
-	dmtc2	t0, 0x0241
-	ld	t0, OCTEON_CP2_HSH_DATW+32(a0)
-	dmtc2	t1, 0x0242
-	ld	t1, OCTEON_CP2_HSH_DATW+40(a0)
-	dmtc2	t2, 0x0243
-	ld	t2, OCTEON_CP2_HSH_DATW+48(a0)
-	dmtc2	t0, 0x0244
-	ld	t0, OCTEON_CP2_HSH_DATW+56(a0)
-	dmtc2	t1, 0x0245
-	ld	t1, OCTEON_CP2_HSH_DATW+64(a0)
-	dmtc2	t2, 0x0246
-	ld	t2, OCTEON_CP2_HSH_DATW+72(a0)
-	dmtc2	t0, 0x0247
-	ld	t0, OCTEON_CP2_HSH_DATW+80(a0)
-	dmtc2	t1, 0x0248
-	ld	t1, OCTEON_CP2_HSH_DATW+88(a0)
-	dmtc2	t2, 0x0249
-	ld	t2, OCTEON_CP2_HSH_DATW+96(a0)
-	dmtc2	t0, 0x024A
-	ld	t0, OCTEON_CP2_HSH_DATW+104(a0)
-	dmtc2	t1, 0x024B
-	ld	t1, OCTEON_CP2_HSH_DATW+112(a0)
-	dmtc2	t2, 0x024C
-	ld	t2, OCTEON_CP2_HSH_IVW(a0)
-	dmtc2	t0, 0x024D
-	ld	t0, OCTEON_CP2_HSH_IVW+8(a0)
-	dmtc2	t1, 0x024E
-	ld	t1, OCTEON_CP2_HSH_IVW+16(a0)
-	dmtc2	t2, 0x0250
-	ld	t2, OCTEON_CP2_HSH_IVW+24(a0)
-	dmtc2	t0, 0x0251
-	ld	t0, OCTEON_CP2_HSH_IVW+32(a0)
-	dmtc2	t1, 0x0252
-	ld	t1, OCTEON_CP2_HSH_IVW+40(a0)
-	dmtc2	t2, 0x0253
-	ld	t2, OCTEON_CP2_HSH_IVW+48(a0)
-	dmtc2	t0, 0x0254
-	ld	t0, OCTEON_CP2_HSH_IVW+56(a0)
-	dmtc2	t1, 0x0255
-	ld	t1, OCTEON_CP2_GFM_MULT(a0)
-	dmtc2	t2, 0x0256
-	ld	t2, OCTEON_CP2_GFM_MULT+8(a0)
-	dmtc2	t0, 0x0257
-	ld	t0, OCTEON_CP2_GFM_POLY(a0)
-	dmtc2	t1, 0x0258
-	ld	t1, OCTEON_CP2_GFM_RESULT(a0)
-	dmtc2	t2, 0x0259
-	ld	t2, OCTEON_CP2_GFM_RESULT+8(a0)
-	dmtc2	t0, 0x025E
-	subu	v0, t3, v0 /* prid - lowest OCTEON III PrId */
-	dmtc2	t1, 0x025A
-	bltz	v0, done_restore
-	 dmtc2	t2, 0x025B
-	/* OCTEON III things*/
-	ld	t0, OCTEON_CP2_SHA3(a0)
-	ld	t1, OCTEON_CP2_SHA3+8(a0)
-	dmtc2	t0, 0x0051
-	dmtc2	t1, 0x0050
-done_restore:
-	jr	ra
-	 nop
-	END(octeon_cop2_restore)
-	.set pop
-
-/*
- * void octeon_mult_save()
- * sp is assumed to point to a struct pt_regs
- *
- * NOTE: This is called in SAVE_TEMP in stackframe.h. It can
- *       safely modify v1,k0, k1,$10-$15, and $24.  It will
- *	 be overwritten with a processor specific version of the code.
- */
-	.p2align 7
-	.set push
-	.set noreorder
-	LEAF(octeon_mult_save)
-	jr	ra
-	 nop
-	.space 30 * 4, 0
-octeon_mult_save_end:
-	EXPORT(octeon_mult_save_end)
-	END(octeon_mult_save)
-
-	LEAF(octeon_mult_save2)
-	/* Save the multiplier state OCTEON II and earlier*/
-	v3mulu	k0, $0, $0
-	v3mulu	k1, $0, $0
-	sd	k0, PT_MTP(sp)	      /* PT_MTP	   has P0 */
-	v3mulu	k0, $0, $0
-	sd	k1, PT_MTP+8(sp)      /* PT_MTP+8  has P1 */
-	ori	k1, $0, 1
-	v3mulu	k1, k1, $0
-	sd	k0, PT_MTP+16(sp)     /* PT_MTP+16 has P2 */
-	v3mulu	k0, $0, $0
-	sd	k1, PT_MPL(sp)	      /* PT_MPL	   has MPL0 */
-	v3mulu	k1, $0, $0
-	sd	k0, PT_MPL+8(sp)      /* PT_MPL+8  has MPL1 */
-	jr	ra
-	 sd	k1, PT_MPL+16(sp)     /* PT_MPL+16 has MPL2 */
-octeon_mult_save2_end:
-	EXPORT(octeon_mult_save2_end)
-	END(octeon_mult_save2)
-
-	LEAF(octeon_mult_save3)
-	/* Save the multiplier state OCTEON III */
-	v3mulu	$10, $0, $0		/* read P0 */
-	v3mulu	$11, $0, $0		/* read P1 */
-	v3mulu	$12, $0, $0		/* read P2 */
-	sd	$10, PT_MTP+(0*8)(sp)	/* store P0 */
-	v3mulu	$10, $0, $0		/* read P3 */
-	sd	$11, PT_MTP+(1*8)(sp)	/*  store P1 */
-	v3mulu	$11, $0, $0		/* read P4 */
-	sd	$12, PT_MTP+(2*8)(sp)	/* store P2 */
-	ori	$13, $0, 1
-	v3mulu	$12, $0, $0		/* read P5 */
-	sd	$10, PT_MTP+(3*8)(sp)	/* store P3 */
-	v3mulu	$13, $13, $0		/* P4-P0 = MPL5-MPL1, $13 = MPL0 */
-	sd	$11, PT_MTP+(4*8)(sp)	/* store P4 */
-	v3mulu	$10, $0, $0		/* read MPL1 */
-	sd	$12, PT_MTP+(5*8)(sp)	/* store P5 */
-	v3mulu	$11, $0, $0		/* read MPL2 */
-	sd	$13, PT_MPL+(0*8)(sp)	/* store MPL0 */
-	v3mulu	$12, $0, $0		/* read MPL3 */
-	sd	$10, PT_MPL+(1*8)(sp)	/* store MPL1 */
-	v3mulu	$10, $0, $0		/* read MPL4 */
-	sd	$11, PT_MPL+(2*8)(sp)	/* store MPL2 */
-	v3mulu	$11, $0, $0		/* read MPL5 */
-	sd	$12, PT_MPL+(3*8)(sp)	/* store MPL3 */
-	sd	$10, PT_MPL+(4*8)(sp)	/* store MPL4 */
-	jr	ra
-	 sd	$11, PT_MPL+(5*8)(sp)	/* store MPL5 */
-octeon_mult_save3_end:
-	EXPORT(octeon_mult_save3_end)
-	END(octeon_mult_save3)
-	.set pop
-
-/*
- * void octeon_mult_restore()
- * sp is assumed to point to a struct pt_regs
- *
- * NOTE: This is called in RESTORE_TEMP in stackframe.h.
- */
-	.p2align 7
-	.set push
-	.set noreorder
-	LEAF(octeon_mult_restore)
-	jr	ra
-	 nop
-	.space 30 * 4, 0
-octeon_mult_restore_end:
-	EXPORT(octeon_mult_restore_end)
-	END(octeon_mult_restore)
-
-	LEAF(octeon_mult_restore2)
-	ld	v0, PT_MPL(sp)        	/* MPL0 */
-	ld	v1, PT_MPL+8(sp)      	/* MPL1 */
-	ld	k0, PT_MPL+16(sp)     	/* MPL2 */
-	/* Restore the multiplier state */
-	ld	k1, PT_MTP+16(sp)     	/* P2 */
-	mtm0	v0			/* MPL0 */
-	ld	v0, PT_MTP+8(sp)	/* P1 */
-	mtm1	v1			/* MPL1 */
-	ld	v1, PT_MTP(sp)   	/* P0 */
-	mtm2	k0			/* MPL2 */
-	mtp2	k1			/* P2 */
-	mtp1	v0			/* P1 */
-	jr	ra
-	 mtp0	v1			/* P0 */
-octeon_mult_restore2_end:
-	EXPORT(octeon_mult_restore2_end)
-	END(octeon_mult_restore2)
-
-	LEAF(octeon_mult_restore3)
-	ld	$12, PT_MPL+(0*8)(sp)	/* read MPL0 */
-	ld	$13, PT_MPL+(3*8)(sp)	/* read MPL3 */
-	ld	$10, PT_MPL+(1*8)(sp)	/* read MPL1 */
-	ld	$11, PT_MPL+(4*8)(sp)	/* read MPL4 */
-	.word	0x718d0008
-	/* mtm0	$12, $13		   restore MPL0 and MPL3 */
-	ld	$12, PT_MPL+(2*8)(sp)	/* read MPL2 */
-	.word	0x714b000c
-	/* mtm1	$10, $11		   restore MPL1 and MPL4 */
-	ld	$13, PT_MPL+(5*8)(sp)	/* read MPL5 */
-	ld	$10, PT_MTP+(0*8)(sp)	/* read P0 */
-	ld	$11, PT_MTP+(3*8)(sp)	/* read P3 */
-	.word	0x718d000d
-	/* mtm2	$12, $13		   restore MPL2 and MPL5 */
-	ld	$12, PT_MTP+(1*8)(sp)	/* read P1 */
-	.word	0x714b0009
-	/* mtp0	$10, $11		   restore P0 and P3 */
-	ld	$13, PT_MTP+(4*8)(sp)	/* read P4 */
-	ld	$10, PT_MTP+(2*8)(sp)	/* read P2 */
-	ld	$11, PT_MTP+(5*8)(sp)	/* read P5 */
-	.word	0x718d000a
-	/* mtp1	$12, $13		   restore P1 and P4 */
-	jr	ra
-	.word	0x714b000b
-	/* mtp2	$10, $11		   restore P2 and P5 */
-
-octeon_mult_restore3_end:
-	EXPORT(octeon_mult_restore3_end)
-	END(octeon_mult_restore3)
-	.set pop
diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S
deleted file mode 100644
index 12e58053544fca151e1311006c340c4fd46826f2..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/r2300_fpu.S
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1996, 1998 by Ralf Baechle
- *
- * Multi-arch abstraction and asm macros for easier reading:
- * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
- *
- * Further modifications to make this work:
- * Copyright (c) 1998 Harald Koerfgen
- */
-#include <asm/asm.h>
-#include <asm/asmmacro.h>
-#include <asm/errno.h>
-#include <asm/export.h>
-#include <asm/fpregdef.h>
-#include <asm/mipsregs.h>
-#include <asm/asm-offsets.h>
-#include <asm/regdef.h>
-
-#define EX(a,b)							\
-9:	a,##b;							\
-	.section __ex_table,"a";				\
-	PTR	9b,fault;					\
-	.previous
-
-#define EX2(a,b)						\
-9:	a,##b;							\
-	.section __ex_table,"a";				\
-	PTR	9b,bad_stack;					\
-	PTR	9b+4,bad_stack;					\
-	.previous
-
-	.set	mips1
-
-/*
- * Save a thread's fp context.
- */
-LEAF(_save_fp)
-EXPORT_SYMBOL(_save_fp)
-	fpu_save_single a0, t1			# clobbers t1
-	jr	ra
-	END(_save_fp)
-
-/*
- * Restore a thread's fp context.
- */
-LEAF(_restore_fp)
-	fpu_restore_single a0, t1		# clobbers t1
-	jr	ra
-	END(_restore_fp)
-
-	.set	noreorder
-
-/**
- * _save_fp_context() - save FP context from the FPU
- * @a0 - pointer to fpregs field of sigcontext
- * @a1 - pointer to fpc_csr field of sigcontext
- *
- * Save FP context, including the 32 FP data registers and the FP
- * control & status register, from the FPU to signal context.
- */
-LEAF(_save_fp_context)
-	.set	push
-	SET_HARDFLOAT
-	li	v0, 0					# assume success
-	cfc1	t1, fcr31
-	EX2(s.d $f0, 0(a0))
-	EX2(s.d $f2, 16(a0))
-	EX2(s.d $f4, 32(a0))
-	EX2(s.d $f6, 48(a0))
-	EX2(s.d $f8, 64(a0))
-	EX2(s.d $f10, 80(a0))
-	EX2(s.d $f12, 96(a0))
-	EX2(s.d $f14, 112(a0))
-	EX2(s.d $f16, 128(a0))
-	EX2(s.d $f18, 144(a0))
-	EX2(s.d $f20, 160(a0))
-	EX2(s.d $f22, 176(a0))
-	EX2(s.d $f24, 192(a0))
-	EX2(s.d $f26, 208(a0))
-	EX2(s.d $f28, 224(a0))
-	EX2(s.d $f30, 240(a0))
-	jr	ra
-	 EX(sw	t1, (a1))
-	.set	pop
-	END(_save_fp_context)
-
-/**
- * _restore_fp_context() - restore FP context to the FPU
- * @a0 - pointer to fpregs field of sigcontext
- * @a1 - pointer to fpc_csr field of sigcontext
- *
- * Restore FP context, including the 32 FP data registers and the FP
- * control & status register, from signal context to the FPU.
- */
-LEAF(_restore_fp_context)
-	.set	push
-	SET_HARDFLOAT
-	li	v0, 0					# assume success
-	EX(lw t0, (a1))
-	EX2(l.d $f0, 0(a0))
-	EX2(l.d $f2, 16(a0))
-	EX2(l.d $f4, 32(a0))
-	EX2(l.d $f6, 48(a0))
-	EX2(l.d $f8, 64(a0))
-	EX2(l.d $f10, 80(a0))
-	EX2(l.d $f12, 96(a0))
-	EX2(l.d $f14, 112(a0))
-	EX2(l.d $f16, 128(a0))
-	EX2(l.d $f18, 144(a0))
-	EX2(l.d $f20, 160(a0))
-	EX2(l.d $f22, 176(a0))
-	EX2(l.d $f24, 192(a0))
-	EX2(l.d $f26, 208(a0))
-	EX2(l.d $f28, 224(a0))
-	EX2(l.d $f30, 240(a0))
-	jr	ra
-	 ctc1	t0, fcr31
-	.set	pop
-	END(_restore_fp_context)
-	.set	reorder
-
-	.type	fault, @function
-	.ent	fault
-fault:	li	v0, -EFAULT
-	jr	ra
-	.end	fault
diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S
deleted file mode 100644
index 71b1aafae1bb1c1a209e4722043b369a34dbfc3e..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/r2300_switch.S
+++ /dev/null
@@ -1,65 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * r2300_switch.S: R2300 specific task switching code.
- *
- * Copyright (C) 1994, 1995, 1996, 1999 by Ralf Baechle
- * Copyright (C) 1994, 1995, 1996 by Andreas Busse
- *
- * Multi-cpu abstraction and macros for easier reading:
- * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
- *
- * Further modifications to make this work:
- * Copyright (c) 1998-2000 Harald Koerfgen
- */
-#include <asm/asm.h>
-#include <asm/cachectl.h>
-#include <asm/export.h>
-#include <asm/fpregdef.h>
-#include <asm/mipsregs.h>
-#include <asm/asm-offsets.h>
-#include <asm/regdef.h>
-#include <asm/stackframe.h>
-#include <asm/thread_info.h>
-
-#include <asm/asmmacro.h>
-
-	.set	mips1
-	.align	5
-
-/*
- * task_struct *resume(task_struct *prev, task_struct *next,
- *		       struct thread_info *next_ti)
- */
-LEAF(resume)
-	mfc0	t1, CP0_STATUS
-	sw	t1, THREAD_STATUS(a0)
-	cpu_save_nonscratch a0
-	sw	ra, THREAD_REG31(a0)
-
-#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP)
-	PTR_LA	t8, __stack_chk_guard
-	LONG_L	t9, TASK_STACK_CANARY(a1)
-	LONG_S	t9, 0(t8)
-#endif
-
-	/*
-	 * The order of restoring the registers takes care of the race
-	 * updating $28, $29 and kernelsp without disabling ints.
-	 */
-	move	$28, a2
-	cpu_restore_nonscratch a1
-
-	addiu	t1, $28, _THREAD_SIZE - 32
-	sw	t1, kernelsp
-
-	mfc0	t1, CP0_STATUS		/* Do we really need this? */
-	li	a3, 0xff01
-	and	t1, a3
-	lw	a2, THREAD_STATUS(a1)
-	nor	a3, $0, a3
-	and	a2, a3
-	or	a2, t1
-	mtc0	a2, CP0_STATUS
-	move	v0, a0
-	jr	ra
-	END(resume)
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
deleted file mode 100644
index 59be5c812aa28a321ee42cce82da569b7d27aa92..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/r4k_fpu.S
+++ /dev/null
@@ -1,417 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1996, 98, 99, 2000, 01 Ralf Baechle
- *
- * Multi-arch abstraction and asm macros for easier reading:
- * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
- *
- * Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 2000 MIPS Technologies, Inc.
- * Copyright (C) 1999, 2001 Silicon Graphics, Inc.
- */
-#include <asm/asm.h>
-#include <asm/asmmacro.h>
-#include <asm/errno.h>
-#include <asm/export.h>
-#include <asm/fpregdef.h>
-#include <asm/mipsregs.h>
-#include <asm/asm-offsets.h>
-#include <asm/regdef.h>
-
-/* preprocessor replaces the fp in ".set fp=64" with $30 otherwise */
-#undef fp
-
-	.macro	EX insn, reg, src
-	.set	push
-	SET_HARDFLOAT
-	.set	nomacro
-.ex\@:	\insn	\reg, \src
-	.set	pop
-	.section __ex_table,"a"
-	PTR	.ex\@, fault
-	.previous
-	.endm
-
-/*
- * Save a thread's fp context.
- */
-LEAF(_save_fp)
-EXPORT_SYMBOL(_save_fp)
-#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \
-		defined(CONFIG_CPU_MIPSR6)
-	mfc0	t0, CP0_STATUS
-#endif
-	fpu_save_double a0 t0 t1		# clobbers t1
-	jr	ra
-	END(_save_fp)
-
-/*
- * Restore a thread's fp context.
- */
-LEAF(_restore_fp)
-#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \
-		defined(CONFIG_CPU_MIPSR6)
-	mfc0	t0, CP0_STATUS
-#endif
-	fpu_restore_double a0 t0 t1		# clobbers t1
-	jr	ra
-	END(_restore_fp)
-
-#ifdef CONFIG_CPU_HAS_MSA
-
-/*
- * Save a thread's MSA vector context.
- */
-LEAF(_save_msa)
-EXPORT_SYMBOL(_save_msa)
-	msa_save_all	a0
-	jr	ra
-	END(_save_msa)
-
-/*
- * Restore a thread's MSA vector context.
- */
-LEAF(_restore_msa)
-	msa_restore_all	a0
-	jr	ra
-	END(_restore_msa)
-
-LEAF(_init_msa_upper)
-	msa_init_all_upper
-	jr	ra
-	END(_init_msa_upper)
-
-#endif
-
-	.set	noreorder
-
-/**
- * _save_fp_context() - save FP context from the FPU
- * @a0 - pointer to fpregs field of sigcontext
- * @a1 - pointer to fpc_csr field of sigcontext
- *
- * Save FP context, including the 32 FP data registers and the FP
- * control & status register, from the FPU to signal context.
- */
-LEAF(_save_fp_context)
-	.set	push
-	SET_HARDFLOAT
-	cfc1	t1, fcr31
-	.set	pop
-
-#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \
-		defined(CONFIG_CPU_MIPSR6)
-	.set	push
-	SET_HARDFLOAT
-#ifdef CONFIG_CPU_MIPSR2
-	.set	mips32r2
-	.set	fp=64
-	mfc0	t0, CP0_STATUS
-	sll	t0, t0, 5
-	bgez	t0, 1f			# skip storing odd if FR=0
-	 nop
-#endif
-	/* Store the 16 odd double precision registers */
-	EX	sdc1 $f1, 8(a0)
-	EX	sdc1 $f3, 24(a0)
-	EX	sdc1 $f5, 40(a0)
-	EX	sdc1 $f7, 56(a0)
-	EX	sdc1 $f9, 72(a0)
-	EX	sdc1 $f11, 88(a0)
-	EX	sdc1 $f13, 104(a0)
-	EX	sdc1 $f15, 120(a0)
-	EX	sdc1 $f17, 136(a0)
-	EX	sdc1 $f19, 152(a0)
-	EX	sdc1 $f21, 168(a0)
-	EX	sdc1 $f23, 184(a0)
-	EX	sdc1 $f25, 200(a0)
-	EX	sdc1 $f27, 216(a0)
-	EX	sdc1 $f29, 232(a0)
-	EX	sdc1 $f31, 248(a0)
-1:	.set	pop
-#endif
-
-	.set push
-	SET_HARDFLOAT
-	/* Store the 16 even double precision registers */
-	EX	sdc1 $f0, 0(a0)
-	EX	sdc1 $f2, 16(a0)
-	EX	sdc1 $f4, 32(a0)
-	EX	sdc1 $f6, 48(a0)
-	EX	sdc1 $f8, 64(a0)
-	EX	sdc1 $f10, 80(a0)
-	EX	sdc1 $f12, 96(a0)
-	EX	sdc1 $f14, 112(a0)
-	EX	sdc1 $f16, 128(a0)
-	EX	sdc1 $f18, 144(a0)
-	EX	sdc1 $f20, 160(a0)
-	EX	sdc1 $f22, 176(a0)
-	EX	sdc1 $f24, 192(a0)
-	EX	sdc1 $f26, 208(a0)
-	EX	sdc1 $f28, 224(a0)
-	EX	sdc1 $f30, 240(a0)
-	EX	sw t1, 0(a1)
-	jr	ra
-	 li	v0, 0					# success
-	.set pop
-	END(_save_fp_context)
-
-/**
- * _restore_fp_context() - restore FP context to the FPU
- * @a0 - pointer to fpregs field of sigcontext
- * @a1 - pointer to fpc_csr field of sigcontext
- *
- * Restore FP context, including the 32 FP data registers and the FP
- * control & status register, from signal context to the FPU.
- */
-LEAF(_restore_fp_context)
-	EX	lw t1, 0(a1)
-
-#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2)  || \
-		defined(CONFIG_CPU_MIPSR6)
-	.set	push
-	SET_HARDFLOAT
-#ifdef CONFIG_CPU_MIPSR2
-	.set	mips32r2
-	.set	fp=64
-	mfc0	t0, CP0_STATUS
-	sll	t0, t0, 5
-	bgez	t0, 1f			# skip loading odd if FR=0
-	 nop
-#endif
-	EX	ldc1 $f1, 8(a0)
-	EX	ldc1 $f3, 24(a0)
-	EX	ldc1 $f5, 40(a0)
-	EX	ldc1 $f7, 56(a0)
-	EX	ldc1 $f9, 72(a0)
-	EX	ldc1 $f11, 88(a0)
-	EX	ldc1 $f13, 104(a0)
-	EX	ldc1 $f15, 120(a0)
-	EX	ldc1 $f17, 136(a0)
-	EX	ldc1 $f19, 152(a0)
-	EX	ldc1 $f21, 168(a0)
-	EX	ldc1 $f23, 184(a0)
-	EX	ldc1 $f25, 200(a0)
-	EX	ldc1 $f27, 216(a0)
-	EX	ldc1 $f29, 232(a0)
-	EX	ldc1 $f31, 248(a0)
-1:	.set pop
-#endif
-	.set push
-	SET_HARDFLOAT
-	EX	ldc1 $f0, 0(a0)
-	EX	ldc1 $f2, 16(a0)
-	EX	ldc1 $f4, 32(a0)
-	EX	ldc1 $f6, 48(a0)
-	EX	ldc1 $f8, 64(a0)
-	EX	ldc1 $f10, 80(a0)
-	EX	ldc1 $f12, 96(a0)
-	EX	ldc1 $f14, 112(a0)
-	EX	ldc1 $f16, 128(a0)
-	EX	ldc1 $f18, 144(a0)
-	EX	ldc1 $f20, 160(a0)
-	EX	ldc1 $f22, 176(a0)
-	EX	ldc1 $f24, 192(a0)
-	EX	ldc1 $f26, 208(a0)
-	EX	ldc1 $f28, 224(a0)
-	EX	ldc1 $f30, 240(a0)
-	ctc1	t1, fcr31
-	.set pop
-	jr	ra
-	 li	v0, 0					# success
-	END(_restore_fp_context)
-
-#ifdef CONFIG_CPU_HAS_MSA
-
-	.macro	op_one_wr	op, idx, base
-	.align	4
-\idx:	\op	\idx, 0, \base
-	jr	ra
-	 nop
-	.endm
-
-	.macro	op_msa_wr	name, op
-LEAF(\name)
-	.set		push
-	.set		noreorder
-	sll		t0, a0, 4
-	PTR_LA		t1, 0f
-	PTR_ADDU	t0, t0, t1
-	jr		t0
-	  nop
-	op_one_wr	\op, 0, a1
-	op_one_wr	\op, 1, a1
-	op_one_wr	\op, 2, a1
-	op_one_wr	\op, 3, a1
-	op_one_wr	\op, 4, a1
-	op_one_wr	\op, 5, a1
-	op_one_wr	\op, 6, a1
-	op_one_wr	\op, 7, a1
-	op_one_wr	\op, 8, a1
-	op_one_wr	\op, 9, a1
-	op_one_wr	\op, 10, a1
-	op_one_wr	\op, 11, a1
-	op_one_wr	\op, 12, a1
-	op_one_wr	\op, 13, a1
-	op_one_wr	\op, 14, a1
-	op_one_wr	\op, 15, a1
-	op_one_wr	\op, 16, a1
-	op_one_wr	\op, 17, a1
-	op_one_wr	\op, 18, a1
-	op_one_wr	\op, 19, a1
-	op_one_wr	\op, 20, a1
-	op_one_wr	\op, 21, a1
-	op_one_wr	\op, 22, a1
-	op_one_wr	\op, 23, a1
-	op_one_wr	\op, 24, a1
-	op_one_wr	\op, 25, a1
-	op_one_wr	\op, 26, a1
-	op_one_wr	\op, 27, a1
-	op_one_wr	\op, 28, a1
-	op_one_wr	\op, 29, a1
-	op_one_wr	\op, 30, a1
-	op_one_wr	\op, 31, a1
-	.set		pop
-	END(\name)
-	.endm
-
-	op_msa_wr	read_msa_wr_b, st_b
-	op_msa_wr	read_msa_wr_h, st_h
-	op_msa_wr	read_msa_wr_w, st_w
-	op_msa_wr	read_msa_wr_d, st_d
-
-	op_msa_wr	write_msa_wr_b, ld_b
-	op_msa_wr	write_msa_wr_h, ld_h
-	op_msa_wr	write_msa_wr_w, ld_w
-	op_msa_wr	write_msa_wr_d, ld_d
-
-#endif /* CONFIG_CPU_HAS_MSA */
-
-#ifdef CONFIG_CPU_HAS_MSA
-
-	.macro	save_msa_upper	wr, off, base
-	.set	push
-	.set	noat
-#ifdef CONFIG_64BIT
-	copy_s_d \wr, 1
-	EX sd	$1, \off(\base)
-#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
-	copy_s_w \wr, 2
-	EX sw	$1, \off(\base)
-	copy_s_w \wr, 3
-	EX sw	$1, (\off+4)(\base)
-#else /* CONFIG_CPU_BIG_ENDIAN */
-	copy_s_w \wr, 2
-	EX sw	$1, (\off+4)(\base)
-	copy_s_w \wr, 3
-	EX sw	$1, \off(\base)
-#endif
-	.set	pop
-	.endm
-
-LEAF(_save_msa_all_upper)
-	save_msa_upper	0, 0x00, a0
-	save_msa_upper	1, 0x08, a0
-	save_msa_upper	2, 0x10, a0
-	save_msa_upper	3, 0x18, a0
-	save_msa_upper	4, 0x20, a0
-	save_msa_upper	5, 0x28, a0
-	save_msa_upper	6, 0x30, a0
-	save_msa_upper	7, 0x38, a0
-	save_msa_upper	8, 0x40, a0
-	save_msa_upper	9, 0x48, a0
-	save_msa_upper	10, 0x50, a0
-	save_msa_upper	11, 0x58, a0
-	save_msa_upper	12, 0x60, a0
-	save_msa_upper	13, 0x68, a0
-	save_msa_upper	14, 0x70, a0
-	save_msa_upper	15, 0x78, a0
-	save_msa_upper	16, 0x80, a0
-	save_msa_upper	17, 0x88, a0
-	save_msa_upper	18, 0x90, a0
-	save_msa_upper	19, 0x98, a0
-	save_msa_upper	20, 0xa0, a0
-	save_msa_upper	21, 0xa8, a0
-	save_msa_upper	22, 0xb0, a0
-	save_msa_upper	23, 0xb8, a0
-	save_msa_upper	24, 0xc0, a0
-	save_msa_upper	25, 0xc8, a0
-	save_msa_upper	26, 0xd0, a0
-	save_msa_upper	27, 0xd8, a0
-	save_msa_upper	28, 0xe0, a0
-	save_msa_upper	29, 0xe8, a0
-	save_msa_upper	30, 0xf0, a0
-	save_msa_upper	31, 0xf8, a0
-	jr	ra
-	 li	v0, 0
-	END(_save_msa_all_upper)
-
-	.macro	restore_msa_upper	wr, off, base
-	.set	push
-	.set	noat
-#ifdef CONFIG_64BIT
-	EX ld	$1, \off(\base)
-	insert_d \wr, 1
-#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
-	EX lw	$1, \off(\base)
-	insert_w \wr, 2
-	EX lw	$1, (\off+4)(\base)
-	insert_w \wr, 3
-#else /* CONFIG_CPU_BIG_ENDIAN */
-	EX lw	$1, (\off+4)(\base)
-	insert_w \wr, 2
-	EX lw	$1, \off(\base)
-	insert_w \wr, 3
-#endif
-	.set	pop
-	.endm
-
-LEAF(_restore_msa_all_upper)
-	restore_msa_upper	0, 0x00, a0
-	restore_msa_upper	1, 0x08, a0
-	restore_msa_upper	2, 0x10, a0
-	restore_msa_upper	3, 0x18, a0
-	restore_msa_upper	4, 0x20, a0
-	restore_msa_upper	5, 0x28, a0
-	restore_msa_upper	6, 0x30, a0
-	restore_msa_upper	7, 0x38, a0
-	restore_msa_upper	8, 0x40, a0
-	restore_msa_upper	9, 0x48, a0
-	restore_msa_upper	10, 0x50, a0
-	restore_msa_upper	11, 0x58, a0
-	restore_msa_upper	12, 0x60, a0
-	restore_msa_upper	13, 0x68, a0
-	restore_msa_upper	14, 0x70, a0
-	restore_msa_upper	15, 0x78, a0
-	restore_msa_upper	16, 0x80, a0
-	restore_msa_upper	17, 0x88, a0
-	restore_msa_upper	18, 0x90, a0
-	restore_msa_upper	19, 0x98, a0
-	restore_msa_upper	20, 0xa0, a0
-	restore_msa_upper	21, 0xa8, a0
-	restore_msa_upper	22, 0xb0, a0
-	restore_msa_upper	23, 0xb8, a0
-	restore_msa_upper	24, 0xc0, a0
-	restore_msa_upper	25, 0xc8, a0
-	restore_msa_upper	26, 0xd0, a0
-	restore_msa_upper	27, 0xd8, a0
-	restore_msa_upper	28, 0xe0, a0
-	restore_msa_upper	29, 0xe8, a0
-	restore_msa_upper	30, 0xf0, a0
-	restore_msa_upper	31, 0xf8, a0
-	jr	ra
-	 li	v0, 0
-	END(_restore_msa_all_upper)
-
-#endif /* CONFIG_CPU_HAS_MSA */
-
-	.set	reorder
-
-	.type	fault, @function
-	.ent	fault
-fault:	li	v0, -EFAULT				# failure
-	jr	ra
-	.end	fault
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
deleted file mode 100644
index 58232ae6cfae3e4c26aa96afd09eb3adb378184e..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/r4k_switch.S
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1994, 1995, 1996, 1998, 1999, 2002, 2003 Ralf Baechle
- * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
- * Copyright (C) 1994, 1995, 1996, by Andreas Busse
- * Copyright (C) 1999 Silicon Graphics, Inc.
- * Copyright (C) 2000 MIPS Technologies, Inc.
- *    written by Carsten Langgaard, carstenl@mips.com
- */
-#include <asm/asm.h>
-#include <asm/cachectl.h>
-#include <asm/mipsregs.h>
-#include <asm/asm-offsets.h>
-#include <asm/regdef.h>
-#include <asm/stackframe.h>
-#include <asm/thread_info.h>
-
-#include <asm/asmmacro.h>
-
-/*
- * task_struct *resume(task_struct *prev, task_struct *next,
- *		       struct thread_info *next_ti)
- */
-	.align	5
-	LEAF(resume)
-	mfc0	t1, CP0_STATUS
-	LONG_S	t1, THREAD_STATUS(a0)
-	cpu_save_nonscratch a0
-	LONG_S	ra, THREAD_REG31(a0)
-
-#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP)
-	PTR_LA	t8, __stack_chk_guard
-	LONG_L	t9, TASK_STACK_CANARY(a1)
-	LONG_S	t9, 0(t8)
-#endif
-
-	/*
-	 * The order of restoring the registers takes care of the race
-	 * updating $28, $29 and kernelsp without disabling ints.
-	 */
-	move	$28, a2
-	cpu_restore_nonscratch a1
-
-	PTR_ADDU	t0, $28, _THREAD_SIZE - 32
-	set_saved_sp	t0, t1, t2
-	mfc0	t1, CP0_STATUS		/* Do we really need this? */
-	li	a3, 0xff01
-	and	t1, a3
-	LONG_L	a2, THREAD_STATUS(a1)
-	nor	a3, $0, a3
-	and	a2, a3
-	or	a2, t1
-	mtc0	a2, CP0_STATUS
-	move	v0, a0
-	jr	ra
-	END(resume)
diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S
deleted file mode 100644
index ac870893ba2d827ba4b1b7f0dc061b5072a88e39..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/relocate_kernel.S
+++ /dev/null
@@ -1,190 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * relocate_kernel.S for kexec
- * Created by <nschichan@corp.free.fr> on Thu Oct 12 17:49:57 2006
- */
-
-#include <asm/asm.h>
-#include <asm/asmmacro.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-#include <asm/stackframe.h>
-#include <asm/addrspace.h>
-
-LEAF(relocate_new_kernel)
-	PTR_L a0,	arg0
-	PTR_L a1,	arg1
-	PTR_L a2,	arg2
-	PTR_L a3,	arg3
-
-	PTR_L		s0, kexec_indirection_page
-	PTR_L		s1, kexec_start_address
-
-process_entry:
-	PTR_L		s2, (s0)
-	PTR_ADDIU	s0, s0, SZREG
-
-	/*
-	 * In case of a kdump/crash kernel, the indirection page is not
-	 * populated as the kernel is directly copied to a reserved location
-	 */
-	beqz		s2, done
-
-	/* destination page */
-	and		s3, s2, 0x1
-	beq		s3, zero, 1f
-	and		s4, s2, ~0x1	/* store destination addr in s4 */
-	b		process_entry
-
-1:
-	/* indirection page, update s0	*/
-	and		s3, s2, 0x2
-	beq		s3, zero, 1f
-	and		s0, s2, ~0x2
-	b		process_entry
-
-1:
-	/* done page */
-	and		s3, s2, 0x4
-	beq		s3, zero, 1f
-	b		done
-1:
-	/* source page */
-	and		s3, s2, 0x8
-	beq		s3, zero, process_entry
-	and		s2, s2, ~0x8
-	li		s6, (1 << _PAGE_SHIFT) / SZREG
-
-copy_word:
-	/* copy page word by word */
-	REG_L		s5, (s2)
-	REG_S		s5, (s4)
-	PTR_ADDIU	s4, s4, SZREG
-	PTR_ADDIU	s2, s2, SZREG
-	LONG_ADDIU	s6, s6, -1
-	beq		s6, zero, process_entry
-	b		copy_word
-	b		process_entry
-
-done:
-#ifdef CONFIG_SMP
-	/* kexec_flag reset is signal to other CPUs what kernel
-	   was moved to it's location. Note - we need relocated address
-	   of kexec_flag.  */
-
-	bal		1f
- 1:	move		t1,ra;
-	PTR_LA		t2,1b
-	PTR_LA		t0,kexec_flag
-	PTR_SUB		t0,t0,t2;
-	PTR_ADD		t0,t1,t0;
-	LONG_S		zero,(t0)
-#endif
-
-#ifdef CONFIG_CPU_CAVIUM_OCTEON
-	/* We need to flush I-cache before jumping to new kernel.
-	 * Unfortunately, this code is cpu-specific.
-	 */
-	.set push
-	.set noreorder
-	syncw
-	syncw
-	synci		0($0)
-	.set pop
-#else
-	sync
-#endif
-	/* jump to kexec_start_address */
-	j		s1
-	END(relocate_new_kernel)
-
-#ifdef CONFIG_SMP
-/*
- * Other CPUs should wait until code is relocated and
- * then start at entry (?) point.
- */
-LEAF(kexec_smp_wait)
-	PTR_L		a0, s_arg0
-	PTR_L		a1, s_arg1
-	PTR_L		a2, s_arg2
-	PTR_L		a3, s_arg3
-	PTR_L		s1, kexec_start_address
-
-	/* Non-relocated address works for args and kexec_start_address ( old
-	 * kernel is not overwritten). But we need relocated address of
-	 * kexec_flag.
-	 */
-
-	bal		1f
-1:	move		t1,ra;
-	PTR_LA		t2,1b
-	PTR_LA		t0,kexec_flag
-	PTR_SUB		t0,t0,t2;
-	PTR_ADD		t0,t1,t0;
-
-1:	LONG_L		s0, (t0)
-	bne		s0, zero,1b
-
-#ifdef CONFIG_CPU_CAVIUM_OCTEON
-	.set push
-	.set noreorder
-	synci		0($0)
-	.set pop
-#else
-	sync
-#endif
-	j		s1
-	END(kexec_smp_wait)
-#endif
-
-#ifdef __mips64
-       /* all PTR's must be aligned to 8 byte in 64-bit mode */
-       .align  3
-#endif
-
-/* All parameters to new kernel are passed in registers a0-a3.
- * kexec_args[0..3] are used to prepare register values.
- */
-
-kexec_args:
-	EXPORT(kexec_args)
-arg0:	PTR		0x0
-arg1:	PTR		0x0
-arg2:	PTR		0x0
-arg3:	PTR		0x0
-	.size	kexec_args,PTRSIZE*4
-
-#ifdef CONFIG_SMP
-/*
- * Secondary CPUs may have different kernel parameters in
- * their registers a0-a3. secondary_kexec_args[0..3] are used
- * to prepare register values.
- */
-secondary_kexec_args:
-	EXPORT(secondary_kexec_args)
-s_arg0: PTR		0x0
-s_arg1: PTR		0x0
-s_arg2: PTR		0x0
-s_arg3: PTR		0x0
-	.size	secondary_kexec_args,PTRSIZE*4
-kexec_flag:
-	LONG		0x1
-
-#endif
-
-kexec_start_address:
-	EXPORT(kexec_start_address)
-	PTR		0x0
-	.size		kexec_start_address, PTRSIZE
-
-kexec_indirection_page:
-	EXPORT(kexec_indirection_page)
-	PTR		0
-	.size		kexec_indirection_page, PTRSIZE
-
-relocate_new_kernel_end:
-
-relocate_new_kernel_size:
-	EXPORT(relocate_new_kernel_size)
-	PTR		relocate_new_kernel_end - relocate_new_kernel
-	.size		relocate_new_kernel_size, PTRSIZE
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
deleted file mode 100644
index b449b68662a9aa488400e5b6e86a71ac84d286f1..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/scall32-o32.S
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1995-99, 2000- 02, 06 Ralf Baechle <ralf@linux-mips.org>
- * Copyright (C) 2001 MIPS Technologies, Inc.
- * Copyright (C) 2004 Thiemo Seufer
- * Copyright (C) 2014 Imagination Technologies Ltd.
- */
-#include <linux/errno.h>
-#include <asm/asm.h>
-#include <asm/asmmacro.h>
-#include <asm/irqflags.h>
-#include <asm/mipsregs.h>
-#include <asm/regdef.h>
-#include <asm/stackframe.h>
-#include <asm/isadep.h>
-#include <asm/sysmips.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-#include <asm/war.h>
-#include <asm/asm-offsets.h>
-
-	.align	5
-NESTED(handle_sys, PT_SIZE, sp)
-	.set	noat
-	SAVE_SOME
-	TRACE_IRQS_ON_RELOAD
-	STI
-	.set	at
-
-	lw	t1, PT_EPC(sp)		# skip syscall on return
-
-	addiu	t1, 4			# skip to next instruction
-	sw	t1, PT_EPC(sp)
-
-	sw	a3, PT_R26(sp)		# save a3 for syscall restarting
-
-	/*
-	 * More than four arguments.  Try to deal with it by copying the
-	 * stack arguments from the user stack to the kernel stack.
-	 * This Sucks (TM).
-	 */
-	lw	t0, PT_R29(sp)		# get old user stack pointer
-
-	/*
-	 * We intentionally keep the kernel stack a little below the top of
-	 * userspace so we don't have to do a slower byte accurate check here.
-	 */
-	lw	t5, TI_ADDR_LIMIT($28)
-	addu	t4, t0, 32
-	and	t5, t4
-	bltz	t5, bad_stack		# -> sp is bad
-
-	/*
-	 * Ok, copy the args from the luser stack to the kernel stack.
-	 */
-
-	.set    push
-	.set    noreorder
-	.set	nomacro
-
-load_a4: user_lw(t5, 16(t0))		# argument #5 from usp
-load_a5: user_lw(t6, 20(t0))		# argument #6 from usp
-load_a6: user_lw(t7, 24(t0))		# argument #7 from usp
-load_a7: user_lw(t8, 28(t0))		# argument #8 from usp
-loads_done:
-
-	sw	t5, 16(sp)		# argument #5 to ksp
-	sw	t6, 20(sp)		# argument #6 to ksp
-	sw	t7, 24(sp)		# argument #7 to ksp
-	sw	t8, 28(sp)		# argument #8 to ksp
-	.set	pop
-
-	.section __ex_table,"a"
-	PTR	load_a4, bad_stack_a4
-	PTR	load_a5, bad_stack_a5
-	PTR	load_a6, bad_stack_a6
-	PTR	load_a7, bad_stack_a7
-	.previous
-
-	lw	t0, TI_FLAGS($28)	# syscall tracing enabled?
-	li	t1, _TIF_WORK_SYSCALL_ENTRY
-	and	t0, t1
-	bnez	t0, syscall_trace_entry # -> yes
-syscall_common:
-	subu	v0, v0, __NR_O32_Linux	# check syscall number
-	sltiu	t0, v0, __NR_O32_Linux_syscalls
-	beqz	t0, illegal_syscall
-
-	sll	t0, v0, 2
-	la	t1, sys_call_table
-	addu	t1, t0
-	lw	t2, (t1)		# syscall routine
-
-	beqz	t2, illegal_syscall
-
-	jalr	t2			# Do The Real Thing (TM)
-
-	li	t0, -EMAXERRNO - 1	# error?
-	sltu	t0, t0, v0
-	sw	t0, PT_R7(sp)		# set error flag
-	beqz	t0, 1f
-
-	lw	t1, PT_R2(sp)		# syscall number
-	negu	v0			# error
-	sw	t1, PT_R0(sp)		# save it for syscall restarting
-1:	sw	v0, PT_R2(sp)		# result
-
-o32_syscall_exit:
-	j	syscall_exit_partial
-
-/* ------------------------------------------------------------------------ */
-
-syscall_trace_entry:
-	SAVE_STATIC
-	move	a0, sp
-
-	/*
-	 * syscall number is in v0 unless we called syscall(__NR_###)
-	 * where the real syscall number is in a0
-	 */
-	move	a1, v0
-	subu	t2, v0,  __NR_O32_Linux
-	bnez	t2, 1f /* __NR_syscall at offset 0 */
-	lw	a1, PT_R4(sp)
-
-1:	jal	syscall_trace_enter
-
-	bltz	v0, 1f			# seccomp failed? Skip syscall
-
-	RESTORE_STATIC
-	lw	v0, PT_R2(sp)		# Restore syscall (maybe modified)
-	lw	a0, PT_R4(sp)		# Restore argument registers
-	lw	a1, PT_R5(sp)
-	lw	a2, PT_R6(sp)
-	lw	a3, PT_R7(sp)
-	j	syscall_common
-
-1:	j	syscall_exit
-
-/* ------------------------------------------------------------------------ */
-
-	/*
-	 * Our open-coded access area sanity test for the stack pointer
-	 * failed. We probably should handle this case a bit more drastic.
-	 */
-bad_stack:
-	li	v0, EFAULT
-	sw	v0, PT_R2(sp)
-	li	t0, 1				# set error flag
-	sw	t0, PT_R7(sp)
-	j	o32_syscall_exit
-
-bad_stack_a4:
-	li	t5, 0
-	b	load_a5
-
-bad_stack_a5:
-	li	t6, 0
-	b	load_a6
-
-bad_stack_a6:
-	li	t7, 0
-	b	load_a7
-
-bad_stack_a7:
-	li	t8, 0
-	b	loads_done
-
-	/*
-	 * The system call does not exist in this kernel
-	 */
-illegal_syscall:
-	li	v0, ENOSYS			# error
-	sw	v0, PT_R2(sp)
-	li	t0, 1				# set error flag
-	sw	t0, PT_R7(sp)
-	j	o32_syscall_exit
-	END(handle_sys)
-
-	LEAF(sys_syscall)
-	subu	t0, a0, __NR_O32_Linux	# check syscall number
-	sltiu	v0, t0, __NR_O32_Linux_syscalls
-	beqz	t0, einval		# do not recurse
-	sll	t1, t0, 2
-	beqz	v0, einval
-	lw	t2, sys_call_table(t1)		# syscall routine
-
-	move	a0, a1				# shift argument registers
-	move	a1, a2
-	move	a2, a3
-	lw	a3, 16(sp)
-	lw	t4, 20(sp)
-	lw	t5, 24(sp)
-	lw	t6, 28(sp)
-	sw	t4, 16(sp)
-	sw	t5, 20(sp)
-	sw	t6, 24(sp)
-	jr	t2
-	/* Unreached */
-
-einval: li	v0, -ENOSYS
-	jr	ra
-	END(sys_syscall)
-
-#ifdef CONFIG_MIPS_MT_FPAFF
-	/*
-	 * For FPU affinity scheduling on MIPS MT processors, we need to
-	 * intercept sys_sched_xxxaffinity() calls until we get a proper hook
-	 * in kernel/sched/core.c.  Considered only temporary we only support
-	 * these hooks for the 32-bit kernel - there is no MIPS64 MT processor
-	 * atm.
-	 */
-#define sys_sched_setaffinity	mipsmt_sys_sched_setaffinity
-#define sys_sched_getaffinity	mipsmt_sys_sched_getaffinity
-#endif /* CONFIG_MIPS_MT_FPAFF */
-
-#define __SYSCALL(nr, entry) 	PTR entry
-	.align	2
-	.type	sys_call_table, @object
-EXPORT(sys_call_table)
-#include <asm/syscall_table_32_o32.h>
-#undef __SYSCALL
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
deleted file mode 100644
index 35d8c86b160ea1b181a2b270fad69b7c18369a26..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/scall64-n32.S
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1995, 96, 97, 98, 99, 2000, 01 by Ralf Baechle
- * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
- * Copyright (C) 2001 MIPS Technologies, Inc.
- */
-#include <linux/errno.h>
-#include <asm/asm.h>
-#include <asm/asmmacro.h>
-#include <asm/irqflags.h>
-#include <asm/mipsregs.h>
-#include <asm/regdef.h>
-#include <asm/stackframe.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-
-#ifndef CONFIG_MIPS32_O32
-/* No O32, so define handle_sys here */
-#define handle_sysn32 handle_sys
-#endif
-
-	.align	5
-NESTED(handle_sysn32, PT_SIZE, sp)
-#ifndef CONFIG_MIPS32_O32
-	.set	noat
-	SAVE_SOME
-	TRACE_IRQS_ON_RELOAD
-	STI
-	.set	at
-#endif
-
-	dsubu	t0, v0, __NR_N32_Linux	# check syscall number
-	sltiu	t0, t0, __NR_N32_Linux_syscalls
-
-#ifndef CONFIG_MIPS32_O32
-	ld	t1, PT_EPC(sp)		# skip syscall on return
-	daddiu	t1, 4			# skip to next instruction
-	sd	t1, PT_EPC(sp)
-#endif
-	beqz	t0, not_n32_scall
-
-	sd	a3, PT_R26(sp)		# save a3 for syscall restarting
-
-	li	t1, _TIF_WORK_SYSCALL_ENTRY
-	LONG_L	t0, TI_FLAGS($28)	# syscall tracing enabled?
-	and	t0, t1, t0
-	bnez	t0, n32_syscall_trace_entry
-
-syscall_common:
-	dsll	t0, v0, 3		# offset into table
-	ld	t2, (sysn32_call_table - (__NR_N32_Linux * 8))(t0)
-
-	jalr	t2			# Do The Real Thing (TM)
-
-	li	t0, -EMAXERRNO - 1	# error?
-	sltu	t0, t0, v0
-	sd	t0, PT_R7(sp)		# set error flag
-	beqz	t0, 1f
-
-	ld	t1, PT_R2(sp)		# syscall number
-	dnegu	v0			# error
-	sd	t1, PT_R0(sp)		# save it for syscall restarting
-1:	sd	v0, PT_R2(sp)		# result
-
-	j	syscall_exit_partial
-
-/* ------------------------------------------------------------------------ */
-
-n32_syscall_trace_entry:
-	SAVE_STATIC
-	move	a0, sp
-	move	a1, v0
-	jal	syscall_trace_enter
-
-	bltz	v0, 1f			# seccomp failed? Skip syscall
-
-	RESTORE_STATIC
-	ld	v0, PT_R2(sp)		# Restore syscall (maybe modified)
-	ld	a0, PT_R4(sp)		# Restore argument registers
-	ld	a1, PT_R5(sp)
-	ld	a2, PT_R6(sp)
-	ld	a3, PT_R7(sp)
-	ld	a4, PT_R8(sp)
-	ld	a5, PT_R9(sp)
-
-	dsubu	t2, v0, __NR_N32_Linux	# check (new) syscall number
-	sltiu   t0, t2, __NR_N32_Linux_syscalls
-	beqz	t0, not_n32_scall
-
-	j	syscall_common
-
-1:	j	syscall_exit
-
-not_n32_scall:
-	/* This is not an n32 compatibility syscall, pass it on to
-	   the n64 syscall handlers.  */
-	j	handle_sys64
-
-	END(handle_sysn32)
-
-#define __SYSCALL(nr, entry)	PTR entry
-	.type	sysn32_call_table, @object
-EXPORT(sysn32_call_table)
-#include <asm/syscall_table_64_n32.h>
-#undef __SYSCALL
diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S
deleted file mode 100644
index 23b2e2b1609cf72f417c5d82172e53a5a2e03701..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/scall64-n64.S
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1995, 96, 97, 98, 99, 2000, 01, 02 by Ralf Baechle
- * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
- * Copyright (C) 2001 MIPS Technologies, Inc.
- */
-#include <linux/errno.h>
-#include <asm/asm.h>
-#include <asm/asmmacro.h>
-#include <asm/irqflags.h>
-#include <asm/mipsregs.h>
-#include <asm/regdef.h>
-#include <asm/stackframe.h>
-#include <asm/asm-offsets.h>
-#include <asm/sysmips.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-#include <asm/war.h>
-
-#ifndef CONFIG_BINFMT_ELF32
-/* Neither O32 nor N32, so define handle_sys here */
-#define handle_sys64 handle_sys
-#endif
-
-	.align	5
-NESTED(handle_sys64, PT_SIZE, sp)
-#if !defined(CONFIG_MIPS32_O32) && !defined(CONFIG_MIPS32_N32)
-	/*
-	 * When 32-bit compatibility is configured scall_o32.S
-	 * already did this.
-	 */
-	.set	noat
-	SAVE_SOME
-	TRACE_IRQS_ON_RELOAD
-	STI
-	.set	at
-#endif
-
-#if !defined(CONFIG_MIPS32_O32) && !defined(CONFIG_MIPS32_N32)
-	ld	t1, PT_EPC(sp)		# skip syscall on return
-	daddiu	t1, 4			# skip to next instruction
-	sd	t1, PT_EPC(sp)
-#endif
-
-	sd	a3, PT_R26(sp)		# save a3 for syscall restarting
-
-	li	t1, _TIF_WORK_SYSCALL_ENTRY
-	LONG_L	t0, TI_FLAGS($28)	# syscall tracing enabled?
-	and	t0, t1, t0
-	bnez	t0, syscall_trace_entry
-
-syscall_common:
-	dsubu	t2, v0, __NR_64_Linux
-	sltiu   t0, t2, __NR_64_Linux_syscalls
-	beqz	t0, illegal_syscall
-
-	dsll	t0, t2, 3		# offset into table
-	dla	t2, sys_call_table
-	daddu	t0, t2, t0
-	ld	t2, (t0)		# syscall routine
-	beqz	t2, illegal_syscall
-
-	jalr	t2			# Do The Real Thing (TM)
-
-	li	t0, -EMAXERRNO - 1	# error?
-	sltu	t0, t0, v0
-	sd	t0, PT_R7(sp)		# set error flag
-	beqz	t0, 1f
-
-	ld	t1, PT_R2(sp)		# syscall number
-	dnegu	v0			# error
-	sd	t1, PT_R0(sp)		# save it for syscall restarting
-1:	sd	v0, PT_R2(sp)		# result
-
-n64_syscall_exit:
-	j	syscall_exit_partial
-
-/* ------------------------------------------------------------------------ */
-
-syscall_trace_entry:
-	SAVE_STATIC
-	move	a0, sp
-	move	a1, v0
-	jal	syscall_trace_enter
-
-	bltz	v0, 1f			# seccomp failed? Skip syscall
-
-	RESTORE_STATIC
-	ld	v0, PT_R2(sp)		# Restore syscall (maybe modified)
-	ld	a0, PT_R4(sp)		# Restore argument registers
-	ld	a1, PT_R5(sp)
-	ld	a2, PT_R6(sp)
-	ld	a3, PT_R7(sp)
-	ld	a4, PT_R8(sp)
-	ld	a5, PT_R9(sp)
-	j	syscall_common
-
-1:	j	syscall_exit
-
-illegal_syscall:
-	/* This also isn't a 64-bit syscall, throw an error.  */
-	li	v0, ENOSYS			# error
-	sd	v0, PT_R2(sp)
-	li	t0, 1				# set error flag
-	sd	t0, PT_R7(sp)
-	j	n64_syscall_exit
-	END(handle_sys64)
-
-#define __SYSCALL(nr, entry)	PTR entry
-	.align	3
-	.type	sys_call_table, @object
-EXPORT(sys_call_table)
-#include <asm/syscall_table_64_n64.h>
-#undef __SYSCALL
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
deleted file mode 100644
index 41df8221bb8fd4208c445fa0eeddcfc687656d63..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/scall64-o32.S
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1995 - 2000, 2001 by Ralf Baechle
- * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
- * Copyright (C) 2001 MIPS Technologies, Inc.
- * Copyright (C) 2004 Thiemo Seufer
- *
- * Hairy, the userspace application uses a different argument passing
- * convention than the kernel, so we have to translate things from o32
- * to ABI64 calling convention.	 64-bit syscalls are also processed
- * here for now.
- */
-#include <linux/errno.h>
-#include <asm/asm.h>
-#include <asm/asmmacro.h>
-#include <asm/irqflags.h>
-#include <asm/mipsregs.h>
-#include <asm/regdef.h>
-#include <asm/stackframe.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-#include <asm/sysmips.h>
-
-	.align	5
-NESTED(handle_sys, PT_SIZE, sp)
-	.set	noat
-	SAVE_SOME
-	TRACE_IRQS_ON_RELOAD
-	STI
-	.set	at
-	ld	t1, PT_EPC(sp)		# skip syscall on return
-
-	dsubu	t0, v0, __NR_O32_Linux	# check syscall number
-	sltiu	t0, t0, __NR_O32_Linux_syscalls
-	daddiu	t1, 4			# skip to next instruction
-	sd	t1, PT_EPC(sp)
-	beqz	t0, not_o32_scall
-#if 0
- SAVE_ALL
- move a1, v0
- PRINT("Scall %ld\n")
- RESTORE_ALL
-#endif
-
-	/* We don't want to stumble over broken sign extensions from
-	   userland. O32 does never use the upper half. */
-	sll	a0, a0, 0
-	sll	a1, a1, 0
-	sll	a2, a2, 0
-	sll	a3, a3, 0
-
-	sd	a3, PT_R26(sp)		# save a3 for syscall restarting
-
-	/*
-	 * More than four arguments.  Try to deal with it by copying the
-	 * stack arguments from the user stack to the kernel stack.
-	 * This Sucks (TM).
-	 *
-	 * We intentionally keep the kernel stack a little below the top of
-	 * userspace so we don't have to do a slower byte accurate check here.
-	 */
-	ld	t0, PT_R29(sp)		# get old user stack pointer
-	daddu	t1, t0, 32
-	bltz	t1, bad_stack
-
-load_a4: lw	a4, 16(t0)		# argument #5 from usp
-load_a5: lw	a5, 20(t0)		# argument #6 from usp
-load_a6: lw	a6, 24(t0)		# argument #7 from usp
-load_a7: lw	a7, 28(t0)		# argument #8 from usp
-loads_done:
-
-	.section __ex_table,"a"
-	PTR	load_a4, bad_stack_a4
-	PTR	load_a5, bad_stack_a5
-	PTR	load_a6, bad_stack_a6
-	PTR	load_a7, bad_stack_a7
-	.previous
-
-	li	t1, _TIF_WORK_SYSCALL_ENTRY
-	LONG_L	t0, TI_FLAGS($28)	# syscall tracing enabled?
-	and	t0, t1, t0
-	bnez	t0, trace_a_syscall
-
-syscall_common:
-	dsll	t0, v0, 3		# offset into table
-	ld	t2, (sys32_call_table - (__NR_O32_Linux * 8))(t0)
-
-	jalr	t2			# Do The Real Thing (TM)
-
-	li	t0, -EMAXERRNO - 1	# error?
-	sltu	t0, t0, v0
-	sd	t0, PT_R7(sp)		# set error flag
-	beqz	t0, 1f
-
-	ld	t1, PT_R2(sp)		# syscall number
-	dnegu	v0			# error
-	sd	t1, PT_R0(sp)		# save it for syscall restarting
-1:	sd	v0, PT_R2(sp)		# result
-
-o32_syscall_exit:
-	j	syscall_exit_partial
-
-/* ------------------------------------------------------------------------ */
-
-trace_a_syscall:
-	SAVE_STATIC
-	sd	a4, PT_R8(sp)		# Save argument registers
-	sd	a5, PT_R9(sp)
-	sd	a6, PT_R10(sp)
-	sd	a7, PT_R11(sp)		# For indirect syscalls
-
-	move	a0, sp
-	/*
-	 * absolute syscall number is in v0 unless we called syscall(__NR_###)
-	 * where the real syscall number is in a0
-	 * note: NR_syscall is the first O32 syscall but the macro is
-	 * only defined when compiling with -mabi=32 (CONFIG_32BIT)
-	 * therefore __NR_O32_Linux is used (4000)
-	 */
-	.set	push
-	.set	reorder
-	subu	t1, v0,  __NR_O32_Linux
-	move	a1, v0
-	bnez	t1, 1f /* __NR_syscall at offset 0 */
-	ld	a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
-	.set	pop
-
-1:	jal	syscall_trace_enter
-
-	bltz	v0, 1f			# seccomp failed? Skip syscall
-
-	RESTORE_STATIC
-	ld	v0, PT_R2(sp)		# Restore syscall (maybe modified)
-	ld	a0, PT_R4(sp)		# Restore argument registers
-	ld	a1, PT_R5(sp)
-	ld	a2, PT_R6(sp)
-	ld	a3, PT_R7(sp)
-	ld	a4, PT_R8(sp)
-	ld	a5, PT_R9(sp)
-	ld	a6, PT_R10(sp)
-	ld	a7, PT_R11(sp)		# For indirect syscalls
-
-	dsubu	t0, v0, __NR_O32_Linux	# check (new) syscall number
-	sltiu	t0, t0, __NR_O32_Linux_syscalls
-	beqz	t0, not_o32_scall
-
-	j	syscall_common
-
-1:	j	syscall_exit
-
-/* ------------------------------------------------------------------------ */
-
-	/*
-	 * The stackpointer for a call with more than 4 arguments is bad.
-	 */
-bad_stack:
-	li	v0, EFAULT
-	sd	v0, PT_R2(sp)
-	li	t0, 1			# set error flag
-	sd	t0, PT_R7(sp)
-	j	o32_syscall_exit
-
-bad_stack_a4:
-	li	a4, 0
-	b	load_a5
-
-bad_stack_a5:
-	li	a5, 0
-	b	load_a6
-
-bad_stack_a6:
-	li	a6, 0
-	b	load_a7
-
-bad_stack_a7:
-	li	a7, 0
-	b	loads_done
-
-not_o32_scall:
-	/*
-	 * This is not an o32 compatibility syscall, pass it on
-	 * to the 64-bit syscall handlers.
-	 */
-#ifdef CONFIG_MIPS32_N32
-	j	handle_sysn32
-#else
-	j	handle_sys64
-#endif
-	END(handle_sys)
-
-LEAF(sys32_syscall)
-	subu	t0, a0, __NR_O32_Linux	# check syscall number
-	sltiu	v0, t0, __NR_O32_Linux_syscalls
-	beqz	t0, einval		# do not recurse
-	dsll	t1, t0, 3
-	beqz	v0, einval
-	ld	t2, sys32_call_table(t1)		# syscall routine
-
-	move	a0, a1			# shift argument registers
-	move	a1, a2
-	move	a2, a3
-	move	a3, a4
-	move	a4, a5
-	move	a5, a6
-	move	a6, a7
-	jr	t2
-	/* Unreached */
-
-einval: li	v0, -ENOSYS
-	jr	ra
-	END(sys32_syscall)
-
-#define __SYSCALL(nr, entry)	PTR entry
-	.align	3
-	.type	sys32_call_table,@object
-EXPORT(sys32_call_table)
-#include <asm/syscall_table_64_o32.h>
-#undef __SYSCALL
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
deleted file mode 100644
index faf98f209b3f479f38fb96eae8039e842931965c..0000000000000000000000000000000000000000
--- a/arch/mips/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,230 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-
-#define PAGE_SIZE _PAGE_SIZE
-
-/*
- * Put .bss..swapper_pg_dir as the first thing in .bss. This will
- * ensure that it has .bss alignment (64K).
- */
-#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir)
-
-#include <asm-generic/vmlinux.lds.h>
-
-#undef mips
-#define mips mips
-OUTPUT_ARCH(mips)
-ENTRY(kernel_entry)
-PHDRS {
-	text PT_LOAD FLAGS(7);	/* RWX */
-#ifndef CONFIG_CAVIUM_OCTEON_SOC
-	note PT_NOTE FLAGS(4);	/* R__ */
-#endif /* CAVIUM_OCTEON_SOC */
-}
-
-#ifdef CONFIG_32BIT
-	#ifdef CONFIG_CPU_LITTLE_ENDIAN
-		jiffies	 = jiffies_64;
-	#else
-		jiffies	 = jiffies_64 + 4;
-	#endif
-#else
-	jiffies	 = jiffies_64;
-#endif
-
-SECTIONS
-{
-#ifdef CONFIG_BOOT_ELF64
-	/* Read-only sections, merged into text segment: */
-	/* . = 0xc000000000000000; */
-
-	/* This is the value for an Origin kernel, taken from an IRIX kernel.  */
-	/* . = 0xc00000000001c000; */
-
-	/* Set the vaddr for the text segment to a value
-	 *   >= 0xa800 0000 0001 9000 if no symmon is going to configured
-	 *   >= 0xa800 0000 0030 0000 otherwise
-	 */
-
-	/* . = 0xa800000000300000; */
-	. = 0xffffffff80300000;
-#endif
-	. = LINKER_LOAD_ADDRESS;
-	/* read-only */
-	_text = .;	/* Text and read-only data */
-	.text : {
-		TEXT_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		IRQENTRY_TEXT
-		SOFTIRQENTRY_TEXT
-		*(.text.*)
-		*(.fixup)
-		*(.gnu.warning)
-	} :text = 0
-	_etext = .;	/* End of text section */
-
-	EXCEPTION_TABLE(16)
-
-	/* Exception table for data bus errors */
-	__dbe_table : {
-		__start___dbe_table = .;
-		KEEP(*(__dbe_table))
-		__stop___dbe_table = .;
-	}
-
-#ifdef CONFIG_CAVIUM_OCTEON_SOC
-#define NOTES_HEADER
-#else /* CONFIG_CAVIUM_OCTEON_SOC */
-#define NOTES_HEADER :note
-#endif /* CONFIG_CAVIUM_OCTEON_SOC */
-	NOTES :text NOTES_HEADER
-	.dummy : { *(.dummy) } :text
-
-	_sdata = .;			/* Start of data section */
-	RODATA
-
-	/* writeable */
-	.data : {	/* Data */
-		. = . + DATAOFFSET;		/* for CONFIG_MAPPED_KERNEL */
-
-		INIT_TASK_DATA(THREAD_SIZE)
-		NOSAVE_DATA
-		PAGE_ALIGNED_DATA(PAGE_SIZE)
-		CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
-		READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
-		DATA_DATA
-		CONSTRUCTORS
-	}
-	BUG_TABLE
-	_gp = . + 0x8000;
-	.lit8 : {
-		*(.lit8)
-	}
-	.lit4 : {
-		*(.lit4)
-	}
-	/* We want the small data sections together, so single-instruction offsets
-	   can access them all, and initialized data all before uninitialized, so
-	   we can shorten the on-disk segment size.  */
-	.sdata : {
-		*(.sdata)
-	}
-	_edata =  .;			/* End of data section */
-
-	/* will be freed after init */
-	. = ALIGN(PAGE_SIZE);		/* Init code and data */
-	__init_begin = .;
-	INIT_TEXT_SECTION(PAGE_SIZE)
-	INIT_DATA_SECTION(16)
-
-	. = ALIGN(4);
-	.mips.machines.init : AT(ADDR(.mips.machines.init) - LOAD_OFFSET) {
-		__mips_machines_start = .;
-		KEEP(*(.mips.machines.init))
-		__mips_machines_end = .;
-	}
-
-	/* .exit.text is discarded at runtime, not link time, to deal with
-	 * references from .rodata
-	 */
-	.exit.text : {
-		EXIT_TEXT
-	}
-	.exit.data : {
-		EXIT_DATA
-	}
-#ifdef CONFIG_SMP
-	PERCPU_SECTION(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
-#endif
-
-#ifdef CONFIG_MIPS_ELF_APPENDED_DTB
-	.appended_dtb : AT(ADDR(.appended_dtb) - LOAD_OFFSET) {
-		*(.appended_dtb)
-		KEEP(*(.appended_dtb))
-	}
-#endif
-
-#ifdef CONFIG_RELOCATABLE
-	. = ALIGN(4);
-
-	.data.reloc : {
-		_relocation_start = .;
-		/*
-		 * Space for relocation table
-		 * This needs to be filled so that the
-		 * relocs tool can overwrite the content.
-		 * An invalid value is left at the start of the
-		 * section to abort relocation if the table
-		 * has not been filled in.
-		 */
-		LONG(0xFFFFFFFF);
-		FILL(0);
-		. += CONFIG_RELOCATION_TABLE_SIZE - 4;
-		_relocation_end = .;
-	}
-#endif
-
-#ifdef CONFIG_MIPS_RAW_APPENDED_DTB
-	__appended_dtb = .;
-	/* leave space for appended DTB */
-	. += 0x100000;
-#endif
-	/*
-	 * Align to 64K in attempt to eliminate holes before the
-	 * .bss..swapper_pg_dir section at the start of .bss.  This
-	 * also satisfies PAGE_SIZE alignment as the largest page size
-	 * allowed is 64K.
-	 */
-	. = ALIGN(0x10000);
-	__init_end = .;
-	/* freed after init ends here */
-
-	/*
-	 * Force .bss to 64K alignment so that .bss..swapper_pg_dir
-	 * gets that alignment.	 .sbss should be empty, so there will be
-	 * no holes after __init_end. */
-	BSS_SECTION(0, 0x10000, 8)
-
-	_end = . ;
-
-	/* These mark the ABI of the kernel for debuggers.  */
-	.mdebug.abi32 : {
-		KEEP(*(.mdebug.abi32))
-	}
-	.mdebug.abi64 : {
-		KEEP(*(.mdebug.abi64))
-	}
-
-	/* This is the MIPS specific mdebug section.  */
-	.mdebug : {
-		*(.mdebug)
-	}
-
-	STABS_DEBUG
-	DWARF_DEBUG
-
-	/* These must appear regardless of  .  */
-	.gptab.sdata : {
-		*(.gptab.data)
-		*(.gptab.sdata)
-	}
-	.gptab.sbss : {
-		*(.gptab.bss)
-		*(.gptab.sbss)
-	}
-
-	/* Sections to be discarded */
-	DISCARDS
-	/DISCARD/ : {
-		/* ABI crap starts here */
-		*(.MIPS.abiflags)
-		*(.MIPS.options)
-		*(.options)
-		*(.pdr)
-		*(.reginfo)
-	}
-}
diff --git a/arch/mips/kvm/fpu.S b/arch/mips/kvm/fpu.S
deleted file mode 100644
index 16f17c6390dd000248abcfde1c272dbd0ae679df..0000000000000000000000000000000000000000
--- a/arch/mips/kvm/fpu.S
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * FPU context handling code for KVM.
- *
- * Copyright (C) 2015 Imagination Technologies Ltd.
- */
-
-#include <asm/asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/fpregdef.h>
-#include <asm/mipsregs.h>
-#include <asm/regdef.h>
-
-/* preprocessor replaces the fp in ".set fp=64" with $30 otherwise */
-#undef fp
-
-	.set	noreorder
-	.set	noat
-
-LEAF(__kvm_save_fpu)
-	.set	push
-	SET_HARDFLOAT
-	.set	fp=64
-	mfc0	t0, CP0_STATUS
-	sll     t0, t0, 5			# is Status.FR set?
-	bgez    t0, 1f				# no: skip odd doubles
-	 nop
-	sdc1	$f1,  VCPU_FPR1(a0)
-	sdc1	$f3,  VCPU_FPR3(a0)
-	sdc1	$f5,  VCPU_FPR5(a0)
-	sdc1	$f7,  VCPU_FPR7(a0)
-	sdc1	$f9,  VCPU_FPR9(a0)
-	sdc1	$f11, VCPU_FPR11(a0)
-	sdc1	$f13, VCPU_FPR13(a0)
-	sdc1	$f15, VCPU_FPR15(a0)
-	sdc1	$f17, VCPU_FPR17(a0)
-	sdc1	$f19, VCPU_FPR19(a0)
-	sdc1	$f21, VCPU_FPR21(a0)
-	sdc1	$f23, VCPU_FPR23(a0)
-	sdc1	$f25, VCPU_FPR25(a0)
-	sdc1	$f27, VCPU_FPR27(a0)
-	sdc1	$f29, VCPU_FPR29(a0)
-	sdc1	$f31, VCPU_FPR31(a0)
-1:	sdc1	$f0,  VCPU_FPR0(a0)
-	sdc1	$f2,  VCPU_FPR2(a0)
-	sdc1	$f4,  VCPU_FPR4(a0)
-	sdc1	$f6,  VCPU_FPR6(a0)
-	sdc1	$f8,  VCPU_FPR8(a0)
-	sdc1	$f10, VCPU_FPR10(a0)
-	sdc1	$f12, VCPU_FPR12(a0)
-	sdc1	$f14, VCPU_FPR14(a0)
-	sdc1	$f16, VCPU_FPR16(a0)
-	sdc1	$f18, VCPU_FPR18(a0)
-	sdc1	$f20, VCPU_FPR20(a0)
-	sdc1	$f22, VCPU_FPR22(a0)
-	sdc1	$f24, VCPU_FPR24(a0)
-	sdc1	$f26, VCPU_FPR26(a0)
-	sdc1	$f28, VCPU_FPR28(a0)
-	jr	ra
-	 sdc1	$f30, VCPU_FPR30(a0)
-	.set	pop
-	END(__kvm_save_fpu)
-
-LEAF(__kvm_restore_fpu)
-	.set	push
-	SET_HARDFLOAT
-	.set	fp=64
-	mfc0	t0, CP0_STATUS
-	sll     t0, t0, 5			# is Status.FR set?
-	bgez    t0, 1f				# no: skip odd doubles
-	 nop
-	ldc1	$f1,  VCPU_FPR1(a0)
-	ldc1	$f3,  VCPU_FPR3(a0)
-	ldc1	$f5,  VCPU_FPR5(a0)
-	ldc1	$f7,  VCPU_FPR7(a0)
-	ldc1	$f9,  VCPU_FPR9(a0)
-	ldc1	$f11, VCPU_FPR11(a0)
-	ldc1	$f13, VCPU_FPR13(a0)
-	ldc1	$f15, VCPU_FPR15(a0)
-	ldc1	$f17, VCPU_FPR17(a0)
-	ldc1	$f19, VCPU_FPR19(a0)
-	ldc1	$f21, VCPU_FPR21(a0)
-	ldc1	$f23, VCPU_FPR23(a0)
-	ldc1	$f25, VCPU_FPR25(a0)
-	ldc1	$f27, VCPU_FPR27(a0)
-	ldc1	$f29, VCPU_FPR29(a0)
-	ldc1	$f31, VCPU_FPR31(a0)
-1:	ldc1	$f0,  VCPU_FPR0(a0)
-	ldc1	$f2,  VCPU_FPR2(a0)
-	ldc1	$f4,  VCPU_FPR4(a0)
-	ldc1	$f6,  VCPU_FPR6(a0)
-	ldc1	$f8,  VCPU_FPR8(a0)
-	ldc1	$f10, VCPU_FPR10(a0)
-	ldc1	$f12, VCPU_FPR12(a0)
-	ldc1	$f14, VCPU_FPR14(a0)
-	ldc1	$f16, VCPU_FPR16(a0)
-	ldc1	$f18, VCPU_FPR18(a0)
-	ldc1	$f20, VCPU_FPR20(a0)
-	ldc1	$f22, VCPU_FPR22(a0)
-	ldc1	$f24, VCPU_FPR24(a0)
-	ldc1	$f26, VCPU_FPR26(a0)
-	ldc1	$f28, VCPU_FPR28(a0)
-	jr	ra
-	 ldc1	$f30, VCPU_FPR30(a0)
-	.set	pop
-	END(__kvm_restore_fpu)
-
-LEAF(__kvm_restore_fcsr)
-	.set	push
-	SET_HARDFLOAT
-	lw	t0, VCPU_FCR31(a0)
-	/*
-	 * The ctc1 must stay at this offset in __kvm_restore_fcsr.
-	 * See kvm_mips_csr_die_notify() which handles t0 containing a value
-	 * which triggers an FP Exception, which must be stepped over and
-	 * ignored since the set cause bits must remain there for the guest.
-	 */
-	ctc1	t0, fcr31
-	jr	ra
-	 nop
-	.set	pop
-	END(__kvm_restore_fcsr)
diff --git a/arch/mips/kvm/msa.S b/arch/mips/kvm/msa.S
deleted file mode 100644
index d02f0c6cc2cc47bda4352339b512f084751dea32..0000000000000000000000000000000000000000
--- a/arch/mips/kvm/msa.S
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * MIPS SIMD Architecture (MSA) context handling code for KVM.
- *
- * Copyright (C) 2015 Imagination Technologies Ltd.
- */
-
-#include <asm/asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/asmmacro.h>
-#include <asm/regdef.h>
-
-	.set	noreorder
-	.set	noat
-
-LEAF(__kvm_save_msa)
-	st_d	0,  VCPU_FPR0,  a0
-	st_d	1,  VCPU_FPR1,  a0
-	st_d	2,  VCPU_FPR2,  a0
-	st_d	3,  VCPU_FPR3,  a0
-	st_d	4,  VCPU_FPR4,  a0
-	st_d	5,  VCPU_FPR5,  a0
-	st_d	6,  VCPU_FPR6,  a0
-	st_d	7,  VCPU_FPR7,  a0
-	st_d	8,  VCPU_FPR8,  a0
-	st_d	9,  VCPU_FPR9,  a0
-	st_d	10, VCPU_FPR10, a0
-	st_d	11, VCPU_FPR11, a0
-	st_d	12, VCPU_FPR12, a0
-	st_d	13, VCPU_FPR13, a0
-	st_d	14, VCPU_FPR14, a0
-	st_d	15, VCPU_FPR15, a0
-	st_d	16, VCPU_FPR16, a0
-	st_d	17, VCPU_FPR17, a0
-	st_d	18, VCPU_FPR18, a0
-	st_d	19, VCPU_FPR19, a0
-	st_d	20, VCPU_FPR20, a0
-	st_d	21, VCPU_FPR21, a0
-	st_d	22, VCPU_FPR22, a0
-	st_d	23, VCPU_FPR23, a0
-	st_d	24, VCPU_FPR24, a0
-	st_d	25, VCPU_FPR25, a0
-	st_d	26, VCPU_FPR26, a0
-	st_d	27, VCPU_FPR27, a0
-	st_d	28, VCPU_FPR28, a0
-	st_d	29, VCPU_FPR29, a0
-	st_d	30, VCPU_FPR30, a0
-	st_d	31, VCPU_FPR31, a0
-	jr	ra
-	 nop
-	END(__kvm_save_msa)
-
-LEAF(__kvm_restore_msa)
-	ld_d	0,  VCPU_FPR0,  a0
-	ld_d	1,  VCPU_FPR1,  a0
-	ld_d	2,  VCPU_FPR2,  a0
-	ld_d	3,  VCPU_FPR3,  a0
-	ld_d	4,  VCPU_FPR4,  a0
-	ld_d	5,  VCPU_FPR5,  a0
-	ld_d	6,  VCPU_FPR6,  a0
-	ld_d	7,  VCPU_FPR7,  a0
-	ld_d	8,  VCPU_FPR8,  a0
-	ld_d	9,  VCPU_FPR9,  a0
-	ld_d	10, VCPU_FPR10, a0
-	ld_d	11, VCPU_FPR11, a0
-	ld_d	12, VCPU_FPR12, a0
-	ld_d	13, VCPU_FPR13, a0
-	ld_d	14, VCPU_FPR14, a0
-	ld_d	15, VCPU_FPR15, a0
-	ld_d	16, VCPU_FPR16, a0
-	ld_d	17, VCPU_FPR17, a0
-	ld_d	18, VCPU_FPR18, a0
-	ld_d	19, VCPU_FPR19, a0
-	ld_d	20, VCPU_FPR20, a0
-	ld_d	21, VCPU_FPR21, a0
-	ld_d	22, VCPU_FPR22, a0
-	ld_d	23, VCPU_FPR23, a0
-	ld_d	24, VCPU_FPR24, a0
-	ld_d	25, VCPU_FPR25, a0
-	ld_d	26, VCPU_FPR26, a0
-	ld_d	27, VCPU_FPR27, a0
-	ld_d	28, VCPU_FPR28, a0
-	ld_d	29, VCPU_FPR29, a0
-	ld_d	30, VCPU_FPR30, a0
-	ld_d	31, VCPU_FPR31, a0
-	jr	ra
-	 nop
-	END(__kvm_restore_msa)
-
-	.macro	kvm_restore_msa_upper	wr, off, base
-	.set	push
-	.set	noat
-#ifdef CONFIG_64BIT
-	ld	$1, \off(\base)
-	insert_d \wr, 1
-#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
-	lw	$1, \off(\base)
-	insert_w \wr, 2
-	lw	$1, (\off+4)(\base)
-	insert_w \wr, 3
-#else /* CONFIG_CPU_BIG_ENDIAN */
-	lw	$1, (\off+4)(\base)
-	insert_w \wr, 2
-	lw	$1, \off(\base)
-	insert_w \wr, 3
-#endif
-	.set	pop
-	.endm
-
-LEAF(__kvm_restore_msa_upper)
-	kvm_restore_msa_upper	0,  VCPU_FPR0 +8, a0
-	kvm_restore_msa_upper	1,  VCPU_FPR1 +8, a0
-	kvm_restore_msa_upper	2,  VCPU_FPR2 +8, a0
-	kvm_restore_msa_upper	3,  VCPU_FPR3 +8, a0
-	kvm_restore_msa_upper	4,  VCPU_FPR4 +8, a0
-	kvm_restore_msa_upper	5,  VCPU_FPR5 +8, a0
-	kvm_restore_msa_upper	6,  VCPU_FPR6 +8, a0
-	kvm_restore_msa_upper	7,  VCPU_FPR7 +8, a0
-	kvm_restore_msa_upper	8,  VCPU_FPR8 +8, a0
-	kvm_restore_msa_upper	9,  VCPU_FPR9 +8, a0
-	kvm_restore_msa_upper	10, VCPU_FPR10+8, a0
-	kvm_restore_msa_upper	11, VCPU_FPR11+8, a0
-	kvm_restore_msa_upper	12, VCPU_FPR12+8, a0
-	kvm_restore_msa_upper	13, VCPU_FPR13+8, a0
-	kvm_restore_msa_upper	14, VCPU_FPR14+8, a0
-	kvm_restore_msa_upper	15, VCPU_FPR15+8, a0
-	kvm_restore_msa_upper	16, VCPU_FPR16+8, a0
-	kvm_restore_msa_upper	17, VCPU_FPR17+8, a0
-	kvm_restore_msa_upper	18, VCPU_FPR18+8, a0
-	kvm_restore_msa_upper	19, VCPU_FPR19+8, a0
-	kvm_restore_msa_upper	20, VCPU_FPR20+8, a0
-	kvm_restore_msa_upper	21, VCPU_FPR21+8, a0
-	kvm_restore_msa_upper	22, VCPU_FPR22+8, a0
-	kvm_restore_msa_upper	23, VCPU_FPR23+8, a0
-	kvm_restore_msa_upper	24, VCPU_FPR24+8, a0
-	kvm_restore_msa_upper	25, VCPU_FPR25+8, a0
-	kvm_restore_msa_upper	26, VCPU_FPR26+8, a0
-	kvm_restore_msa_upper	27, VCPU_FPR27+8, a0
-	kvm_restore_msa_upper	28, VCPU_FPR28+8, a0
-	kvm_restore_msa_upper	29, VCPU_FPR29+8, a0
-	kvm_restore_msa_upper	30, VCPU_FPR30+8, a0
-	kvm_restore_msa_upper	31, VCPU_FPR31+8, a0
-	jr	ra
-	 nop
-	END(__kvm_restore_msa_upper)
-
-LEAF(__kvm_restore_msacsr)
-	lw	t0, VCPU_MSA_CSR(a0)
-	/*
-	 * The ctcmsa must stay at this offset in __kvm_restore_msacsr.
-	 * See kvm_mips_csr_die_notify() which handles t0 containing a value
-	 * which triggers an MSA FP Exception, which must be stepped over and
-	 * ignored since the set cause bits must remain there for the guest.
-	 */
-	_ctcmsa	MSA_CSR, t0
-	jr	ra
-	 nop
-	END(__kvm_restore_msacsr)
diff --git a/arch/mips/lasat/image/head.S b/arch/mips/lasat/image/head.S
deleted file mode 100644
index 1a27312d4c2e87adae0ad02f3f9ca30e89c179f7..0000000000000000000000000000000000000000
--- a/arch/mips/lasat/image/head.S
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/lasat/head.h>
-
-	.text
-	.section .text..start, "ax"
-	.set noreorder
-	.set mips3
-
-	/* Magic words identifying a software image */
-	.word	LASAT_K_MAGIC0_VAL
-	.word	LASAT_K_MAGIC1_VAL
-
-	/* Image header version */
-	.word	0x00000002
-
-	/* image start and size */
-	.word	_image_start
-	.word	_image_size
-
-	/* start of kernel and entrypoint in uncompressed image */
-	.word	_kernel_start
-	.word	_kernel_entry
-
-	/* Here we have room for future flags */
-
-	.org	0x40
-reldate:
-	.word	TIMESTAMP
-
-	.org	0x50
-release:
-	.string VERSION
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
deleted file mode 100644
index 2ff84f4b1717aebc3fa61e9bae5d892f30fd6d2c..0000000000000000000000000000000000000000
--- a/arch/mips/lib/csum_partial.S
+++ /dev/null
@@ -1,847 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Quick'n'dirty IP checksum ...
- *
- * Copyright (C) 1998, 1999 Ralf Baechle
- * Copyright (C) 1999 Silicon Graphics, Inc.
- * Copyright (C) 2007  Maciej W. Rozycki
- * Copyright (C) 2014 Imagination Technologies Ltd.
- */
-#include <linux/errno.h>
-#include <asm/asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/export.h>
-#include <asm/regdef.h>
-
-#ifdef CONFIG_64BIT
-/*
- * As we are sharing code base with the mips32 tree (which use the o32 ABI
- * register definitions). We need to redefine the register definitions from
- * the n64 ABI register naming to the o32 ABI register naming.
- */
-#undef t0
-#undef t1
-#undef t2
-#undef t3
-#define t0	$8
-#define t1	$9
-#define t2	$10
-#define t3	$11
-#define t4	$12
-#define t5	$13
-#define t6	$14
-#define t7	$15
-
-#define USE_DOUBLE
-#endif
-
-#ifdef USE_DOUBLE
-
-#define LOAD   ld
-#define LOAD32 lwu
-#define ADD    daddu
-#define NBYTES 8
-
-#else
-
-#define LOAD   lw
-#define LOAD32 lw
-#define ADD    addu
-#define NBYTES 4
-
-#endif /* USE_DOUBLE */
-
-#define UNIT(unit)  ((unit)*NBYTES)
-
-#define ADDC(sum,reg)						\
-	.set	push;						\
-	.set	noat;						\
-	ADD	sum, reg;					\
-	sltu	v1, sum, reg;					\
-	ADD	sum, v1;					\
-	.set	pop
-
-#define ADDC32(sum,reg)						\
-	.set	push;						\
-	.set	noat;						\
-	addu	sum, reg;					\
-	sltu	v1, sum, reg;					\
-	addu	sum, v1;					\
-	.set	pop
-
-#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)	\
-	LOAD	_t0, (offset + UNIT(0))(src);			\
-	LOAD	_t1, (offset + UNIT(1))(src);			\
-	LOAD	_t2, (offset + UNIT(2))(src);			\
-	LOAD	_t3, (offset + UNIT(3))(src);			\
-	ADDC(_t0, _t1);						\
-	ADDC(_t2, _t3);						\
-	ADDC(sum, _t0);						\
-	ADDC(sum, _t2)
-
-#ifdef USE_DOUBLE
-#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)	\
-	CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
-#else
-#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)	\
-	CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3);	\
-	CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
-#endif
-
-/*
- * a0: source address
- * a1: length of the area to checksum
- * a2: partial checksum
- */
-
-#define src a0
-#define sum v0
-
-	.text
-	.set	noreorder
-	.align	5
-LEAF(csum_partial)
-EXPORT_SYMBOL(csum_partial)
-	move	sum, zero
-	move	t7, zero
-
-	sltiu	t8, a1, 0x8
-	bnez	t8, .Lsmall_csumcpy		/* < 8 bytes to copy */
-	 move	t2, a1
-
-	andi	t7, src, 0x1			/* odd buffer? */
-
-.Lhword_align:
-	beqz	t7, .Lword_align
-	 andi	t8, src, 0x2
-
-	lbu	t0, (src)
-	LONG_SUBU	a1, a1, 0x1
-#ifdef __MIPSEL__
-	sll	t0, t0, 8
-#endif
-	ADDC(sum, t0)
-	PTR_ADDU	src, src, 0x1
-	andi	t8, src, 0x2
-
-.Lword_align:
-	beqz	t8, .Ldword_align
-	 sltiu	t8, a1, 56
-
-	lhu	t0, (src)
-	LONG_SUBU	a1, a1, 0x2
-	ADDC(sum, t0)
-	sltiu	t8, a1, 56
-	PTR_ADDU	src, src, 0x2
-
-.Ldword_align:
-	bnez	t8, .Ldo_end_words
-	 move	t8, a1
-
-	andi	t8, src, 0x4
-	beqz	t8, .Lqword_align
-	 andi	t8, src, 0x8
-
-	LOAD32	t0, 0x00(src)
-	LONG_SUBU	a1, a1, 0x4
-	ADDC(sum, t0)
-	PTR_ADDU	src, src, 0x4
-	andi	t8, src, 0x8
-
-.Lqword_align:
-	beqz	t8, .Loword_align
-	 andi	t8, src, 0x10
-
-#ifdef USE_DOUBLE
-	ld	t0, 0x00(src)
-	LONG_SUBU	a1, a1, 0x8
-	ADDC(sum, t0)
-#else
-	lw	t0, 0x00(src)
-	lw	t1, 0x04(src)
-	LONG_SUBU	a1, a1, 0x8
-	ADDC(sum, t0)
-	ADDC(sum, t1)
-#endif
-	PTR_ADDU	src, src, 0x8
-	andi	t8, src, 0x10
-
-.Loword_align:
-	beqz	t8, .Lbegin_movement
-	 LONG_SRL	t8, a1, 0x7
-
-#ifdef USE_DOUBLE
-	ld	t0, 0x00(src)
-	ld	t1, 0x08(src)
-	ADDC(sum, t0)
-	ADDC(sum, t1)
-#else
-	CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
-#endif
-	LONG_SUBU	a1, a1, 0x10
-	PTR_ADDU	src, src, 0x10
-	LONG_SRL	t8, a1, 0x7
-
-.Lbegin_movement:
-	beqz	t8, 1f
-	 andi	t2, a1, 0x40
-
-.Lmove_128bytes:
-	CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
-	CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
-	CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
-	CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
-	LONG_SUBU	t8, t8, 0x01
-	.set	reorder				/* DADDI_WAR */
-	PTR_ADDU	src, src, 0x80
-	bnez	t8, .Lmove_128bytes
-	.set	noreorder
-
-1:
-	beqz	t2, 1f
-	 andi	t2, a1, 0x20
-
-.Lmove_64bytes:
-	CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
-	CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
-	PTR_ADDU	src, src, 0x40
-
-1:
-	beqz	t2, .Ldo_end_words
-	 andi	t8, a1, 0x1c
-
-.Lmove_32bytes:
-	CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
-	andi	t8, a1, 0x1c
-	PTR_ADDU	src, src, 0x20
-
-.Ldo_end_words:
-	beqz	t8, .Lsmall_csumcpy
-	 andi	t2, a1, 0x3
-	LONG_SRL	t8, t8, 0x2
-
-.Lend_words:
-	LOAD32	t0, (src)
-	LONG_SUBU	t8, t8, 0x1
-	ADDC(sum, t0)
-	.set	reorder				/* DADDI_WAR */
-	PTR_ADDU	src, src, 0x4
-	bnez	t8, .Lend_words
-	.set	noreorder
-
-/* unknown src alignment and < 8 bytes to go  */
-.Lsmall_csumcpy:
-	move	a1, t2
-
-	andi	t0, a1, 4
-	beqz	t0, 1f
-	 andi	t0, a1, 2
-
-	/* Still a full word to go  */
-	ulw	t1, (src)
-	PTR_ADDIU	src, 4
-#ifdef USE_DOUBLE
-	dsll	t1, t1, 32			/* clear lower 32bit */
-#endif
-	ADDC(sum, t1)
-
-1:	move	t1, zero
-	beqz	t0, 1f
-	 andi	t0, a1, 1
-
-	/* Still a halfword to go  */
-	ulhu	t1, (src)
-	PTR_ADDIU	src, 2
-
-1:	beqz	t0, 1f
-	 sll	t1, t1, 16
-
-	lbu	t2, (src)
-	 nop
-
-#ifdef __MIPSEB__
-	sll	t2, t2, 8
-#endif
-	or	t1, t2
-
-1:	ADDC(sum, t1)
-
-	/* fold checksum */
-#ifdef USE_DOUBLE
-	dsll32	v1, sum, 0
-	daddu	sum, v1
-	sltu	v1, sum, v1
-	dsra32	sum, sum, 0
-	addu	sum, v1
-#endif
-
-	/* odd buffer alignment? */
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON3)
-	.set	push
-	.set	arch=mips32r2
-	wsbh	v1, sum
-	movn	sum, v1, t7
-	.set	pop
-#else
-	beqz	t7, 1f			/* odd buffer alignment? */
-	 lui	v1, 0x00ff
-	addu	v1, 0x00ff
-	and	t0, sum, v1
-	sll	t0, t0, 8
-	srl	sum, sum, 8
-	and	sum, sum, v1
-	or	sum, sum, t0
-1:
-#endif
-	.set	reorder
-	/* Add the passed partial csum.	 */
-	ADDC32(sum, a2)
-	jr	ra
-	.set	noreorder
-	END(csum_partial)
-
-
-/*
- * checksum and copy routines based on memcpy.S
- *
- *	csum_partial_copy_nocheck(src, dst, len, sum)
- *	__csum_partial_copy_kernel(src, dst, len, sum, errp)
- *
- * See "Spec" in memcpy.S for details.	Unlike __copy_user, all
- * function in this file use the standard calling convention.
- */
-
-#define src a0
-#define dst a1
-#define len a2
-#define psum a3
-#define sum v0
-#define odd t8
-#define errptr t9
-
-/*
- * The exception handler for loads requires that:
- *  1- AT contain the address of the byte just past the end of the source
- *     of the copy,
- *  2- src_entry <= src < AT, and
- *  3- (dst - src) == (dst_entry - src_entry),
- * The _entry suffix denotes values when __copy_user was called.
- *
- * (1) is set up up by __csum_partial_copy_from_user and maintained by
- *	not writing AT in __csum_partial_copy
- * (2) is met by incrementing src by the number of bytes copied
- * (3) is met by not doing loads between a pair of increments of dst and src
- *
- * The exception handlers for stores stores -EFAULT to errptr and return.
- * These handlers do not need to overwrite any data.
- */
-
-/* Instruction type */
-#define LD_INSN 1
-#define ST_INSN 2
-#define LEGACY_MODE 1
-#define EVA_MODE    2
-#define USEROP   1
-#define KERNELOP 2
-
-/*
- * Wrapper to add an entry in the exception table
- * in case the insn causes a memory exception.
- * Arguments:
- * insn    : Load/store instruction
- * type    : Instruction type
- * reg     : Register
- * addr    : Address
- * handler : Exception handler
- */
-#define EXC(insn, type, reg, addr, handler)	\
-	.if \mode == LEGACY_MODE;		\
-9:		insn reg, addr;			\
-		.section __ex_table,"a";	\
-		PTR	9b, handler;		\
-		.previous;			\
-	/* This is enabled in EVA mode */	\
-	.else;					\
-		/* If loading from user or storing to user */	\
-		.if ((\from == USEROP) && (type == LD_INSN)) || \
-		    ((\to == USEROP) && (type == ST_INSN));	\
-9:			__BUILD_EVA_INSN(insn##e, reg, addr);	\
-			.section __ex_table,"a";		\
-			PTR	9b, handler;			\
-			.previous;				\
-		.else;						\
-			/* EVA without exception */		\
-			insn reg, addr;				\
-		.endif;						\
-	.endif
-
-#undef LOAD
-
-#ifdef USE_DOUBLE
-
-#define LOADK	ld /* No exception */
-#define LOAD(reg, addr, handler)	EXC(ld, LD_INSN, reg, addr, handler)
-#define LOADBU(reg, addr, handler)	EXC(lbu, LD_INSN, reg, addr, handler)
-#define LOADL(reg, addr, handler)	EXC(ldl, LD_INSN, reg, addr, handler)
-#define LOADR(reg, addr, handler)	EXC(ldr, LD_INSN, reg, addr, handler)
-#define STOREB(reg, addr, handler)	EXC(sb, ST_INSN, reg, addr, handler)
-#define STOREL(reg, addr, handler)	EXC(sdl, ST_INSN, reg, addr, handler)
-#define STORER(reg, addr, handler)	EXC(sdr, ST_INSN, reg, addr, handler)
-#define STORE(reg, addr, handler)	EXC(sd, ST_INSN, reg, addr, handler)
-#define ADD    daddu
-#define SUB    dsubu
-#define SRL    dsrl
-#define SLL    dsll
-#define SLLV   dsllv
-#define SRLV   dsrlv
-#define NBYTES 8
-#define LOG_NBYTES 3
-
-#else
-
-#define LOADK	lw /* No exception */
-#define LOAD(reg, addr, handler)	EXC(lw, LD_INSN, reg, addr, handler)
-#define LOADBU(reg, addr, handler)	EXC(lbu, LD_INSN, reg, addr, handler)
-#define LOADL(reg, addr, handler)	EXC(lwl, LD_INSN, reg, addr, handler)
-#define LOADR(reg, addr, handler)	EXC(lwr, LD_INSN, reg, addr, handler)
-#define STOREB(reg, addr, handler)	EXC(sb, ST_INSN, reg, addr, handler)
-#define STOREL(reg, addr, handler)	EXC(swl, ST_INSN, reg, addr, handler)
-#define STORER(reg, addr, handler)	EXC(swr, ST_INSN, reg, addr, handler)
-#define STORE(reg, addr, handler)	EXC(sw, ST_INSN, reg, addr, handler)
-#define ADD    addu
-#define SUB    subu
-#define SRL    srl
-#define SLL    sll
-#define SLLV   sllv
-#define SRLV   srlv
-#define NBYTES 4
-#define LOG_NBYTES 2
-
-#endif /* USE_DOUBLE */
-
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-#define LDFIRST LOADR
-#define LDREST	LOADL
-#define STFIRST STORER
-#define STREST	STOREL
-#define SHIFT_DISCARD SLLV
-#define SHIFT_DISCARD_REVERT SRLV
-#else
-#define LDFIRST LOADL
-#define LDREST	LOADR
-#define STFIRST STOREL
-#define STREST	STORER
-#define SHIFT_DISCARD SRLV
-#define SHIFT_DISCARD_REVERT SLLV
-#endif
-
-#define FIRST(unit) ((unit)*NBYTES)
-#define REST(unit)  (FIRST(unit)+NBYTES-1)
-
-#define ADDRMASK (NBYTES-1)
-
-#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
-	.set	noat
-#else
-	.set	at=v1
-#endif
-
-	.macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to, __nocheck
-
-	PTR_ADDU	AT, src, len	/* See (1) above. */
-	/* initialize __nocheck if this the first time we execute this
-	 * macro
-	 */
-#ifdef CONFIG_64BIT
-	move	errptr, a4
-#else
-	lw	errptr, 16(sp)
-#endif
-	.if \__nocheck == 1
-	FEXPORT(csum_partial_copy_nocheck)
-	EXPORT_SYMBOL(csum_partial_copy_nocheck)
-	.endif
-	move	sum, zero
-	move	odd, zero
-	/*
-	 * Note: dst & src may be unaligned, len may be 0
-	 * Temps
-	 */
-	/*
-	 * The "issue break"s below are very approximate.
-	 * Issue delays for dcache fills will perturb the schedule, as will
-	 * load queue full replay traps, etc.
-	 *
-	 * If len < NBYTES use byte operations.
-	 */
-	sltu	t2, len, NBYTES
-	and	t1, dst, ADDRMASK
-	bnez	t2, .Lcopy_bytes_checklen\@
-	 and	t0, src, ADDRMASK
-	andi	odd, dst, 0x1			/* odd buffer? */
-	bnez	t1, .Ldst_unaligned\@
-	 nop
-	bnez	t0, .Lsrc_unaligned_dst_aligned\@
-	/*
-	 * use delay slot for fall-through
-	 * src and dst are aligned; need to compute rem
-	 */
-.Lboth_aligned\@:
-	 SRL	t0, len, LOG_NBYTES+3	 # +3 for 8 units/iter
-	beqz	t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
-	 nop
-	SUB	len, 8*NBYTES		# subtract here for bgez loop
-	.align	4
-1:
-	LOAD(t0, UNIT(0)(src), .Ll_exc\@)
-	LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@)
-	LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@)
-	LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)
-	LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@)
-	LOAD(t5, UNIT(5)(src), .Ll_exc_copy\@)
-	LOAD(t6, UNIT(6)(src), .Ll_exc_copy\@)
-	LOAD(t7, UNIT(7)(src), .Ll_exc_copy\@)
-	SUB	len, len, 8*NBYTES
-	ADD	src, src, 8*NBYTES
-	STORE(t0, UNIT(0)(dst),	.Ls_exc\@)
-	ADDC(t0, t1)
-	STORE(t1, UNIT(1)(dst),	.Ls_exc\@)
-	ADDC(sum, t0)
-	STORE(t2, UNIT(2)(dst),	.Ls_exc\@)
-	ADDC(t2, t3)
-	STORE(t3, UNIT(3)(dst),	.Ls_exc\@)
-	ADDC(sum, t2)
-	STORE(t4, UNIT(4)(dst),	.Ls_exc\@)
-	ADDC(t4, t5)
-	STORE(t5, UNIT(5)(dst),	.Ls_exc\@)
-	ADDC(sum, t4)
-	STORE(t6, UNIT(6)(dst),	.Ls_exc\@)
-	ADDC(t6, t7)
-	STORE(t7, UNIT(7)(dst),	.Ls_exc\@)
-	ADDC(sum, t6)
-	.set	reorder				/* DADDI_WAR */
-	ADD	dst, dst, 8*NBYTES
-	bgez	len, 1b
-	.set	noreorder
-	ADD	len, 8*NBYTES		# revert len (see above)
-
-	/*
-	 * len == the number of bytes left to copy < 8*NBYTES
-	 */
-.Lcleanup_both_aligned\@:
-#define rem t7
-	beqz	len, .Ldone\@
-	 sltu	t0, len, 4*NBYTES
-	bnez	t0, .Lless_than_4units\@
-	 and	rem, len, (NBYTES-1)	# rem = len % NBYTES
-	/*
-	 * len >= 4*NBYTES
-	 */
-	LOAD(t0, UNIT(0)(src), .Ll_exc\@)
-	LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@)
-	LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@)
-	LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)
-	SUB	len, len, 4*NBYTES
-	ADD	src, src, 4*NBYTES
-	STORE(t0, UNIT(0)(dst),	.Ls_exc\@)
-	ADDC(t0, t1)
-	STORE(t1, UNIT(1)(dst),	.Ls_exc\@)
-	ADDC(sum, t0)
-	STORE(t2, UNIT(2)(dst),	.Ls_exc\@)
-	ADDC(t2, t3)
-	STORE(t3, UNIT(3)(dst),	.Ls_exc\@)
-	ADDC(sum, t2)
-	.set	reorder				/* DADDI_WAR */
-	ADD	dst, dst, 4*NBYTES
-	beqz	len, .Ldone\@
-	.set	noreorder
-.Lless_than_4units\@:
-	/*
-	 * rem = len % NBYTES
-	 */
-	beq	rem, len, .Lcopy_bytes\@
-	 nop
-1:
-	LOAD(t0, 0(src), .Ll_exc\@)
-	ADD	src, src, NBYTES
-	SUB	len, len, NBYTES
-	STORE(t0, 0(dst), .Ls_exc\@)
-	ADDC(sum, t0)
-	.set	reorder				/* DADDI_WAR */
-	ADD	dst, dst, NBYTES
-	bne	rem, len, 1b
-	.set	noreorder
-
-	/*
-	 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
-	 * A loop would do only a byte at a time with possible branch
-	 * mispredicts.	 Can't do an explicit LOAD dst,mask,or,STORE
-	 * because can't assume read-access to dst.  Instead, use
-	 * STREST dst, which doesn't require read access to dst.
-	 *
-	 * This code should perform better than a simple loop on modern,
-	 * wide-issue mips processors because the code has fewer branches and
-	 * more instruction-level parallelism.
-	 */
-#define bits t2
-	beqz	len, .Ldone\@
-	 ADD	t1, dst, len	# t1 is just past last byte of dst
-	li	bits, 8*NBYTES
-	SLL	rem, len, 3	# rem = number of bits to keep
-	LOAD(t0, 0(src), .Ll_exc\@)
-	SUB	bits, bits, rem # bits = number of bits to discard
-	SHIFT_DISCARD t0, t0, bits
-	STREST(t0, -1(t1), .Ls_exc\@)
-	SHIFT_DISCARD_REVERT t0, t0, bits
-	.set reorder
-	ADDC(sum, t0)
-	b	.Ldone\@
-	.set noreorder
-.Ldst_unaligned\@:
-	/*
-	 * dst is unaligned
-	 * t0 = src & ADDRMASK
-	 * t1 = dst & ADDRMASK; T1 > 0
-	 * len >= NBYTES
-	 *
-	 * Copy enough bytes to align dst
-	 * Set match = (src and dst have same alignment)
-	 */
-#define match rem
-	LDFIRST(t3, FIRST(0)(src), .Ll_exc\@)
-	ADD	t2, zero, NBYTES
-	LDREST(t3, REST(0)(src), .Ll_exc_copy\@)
-	SUB	t2, t2, t1	# t2 = number of bytes copied
-	xor	match, t0, t1
-	STFIRST(t3, FIRST(0)(dst), .Ls_exc\@)
-	SLL	t4, t1, 3		# t4 = number of bits to discard
-	SHIFT_DISCARD t3, t3, t4
-	/* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
-	ADDC(sum, t3)
-	beq	len, t2, .Ldone\@
-	 SUB	len, len, t2
-	ADD	dst, dst, t2
-	beqz	match, .Lboth_aligned\@
-	 ADD	src, src, t2
-
-.Lsrc_unaligned_dst_aligned\@:
-	SRL	t0, len, LOG_NBYTES+2	 # +2 for 4 units/iter
-	beqz	t0, .Lcleanup_src_unaligned\@
-	 and	rem, len, (4*NBYTES-1)	 # rem = len % 4*NBYTES
-1:
-/*
- * Avoid consecutive LD*'s to the same register since some mips
- * implementations can't issue them in the same cycle.
- * It's OK to load FIRST(N+1) before REST(N) because the two addresses
- * are to the same unit (unless src is aligned, but it's not).
- */
-	LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
-	LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@)
-	SUB	len, len, 4*NBYTES
-	LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
-	LDREST(t1, REST(1)(src), .Ll_exc_copy\@)
-	LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@)
-	LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@)
-	LDREST(t2, REST(2)(src), .Ll_exc_copy\@)
-	LDREST(t3, REST(3)(src), .Ll_exc_copy\@)
-	ADD	src, src, 4*NBYTES
-#ifdef CONFIG_CPU_SB1
-	nop				# improves slotting
-#endif
-	STORE(t0, UNIT(0)(dst),	.Ls_exc\@)
-	ADDC(t0, t1)
-	STORE(t1, UNIT(1)(dst),	.Ls_exc\@)
-	ADDC(sum, t0)
-	STORE(t2, UNIT(2)(dst),	.Ls_exc\@)
-	ADDC(t2, t3)
-	STORE(t3, UNIT(3)(dst),	.Ls_exc\@)
-	ADDC(sum, t2)
-	.set	reorder				/* DADDI_WAR */
-	ADD	dst, dst, 4*NBYTES
-	bne	len, rem, 1b
-	.set	noreorder
-
-.Lcleanup_src_unaligned\@:
-	beqz	len, .Ldone\@
-	 and	rem, len, NBYTES-1  # rem = len % NBYTES
-	beq	rem, len, .Lcopy_bytes\@
-	 nop
-1:
-	LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
-	LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
-	ADD	src, src, NBYTES
-	SUB	len, len, NBYTES
-	STORE(t0, 0(dst), .Ls_exc\@)
-	ADDC(sum, t0)
-	.set	reorder				/* DADDI_WAR */
-	ADD	dst, dst, NBYTES
-	bne	len, rem, 1b
-	.set	noreorder
-
-.Lcopy_bytes_checklen\@:
-	beqz	len, .Ldone\@
-	 nop
-.Lcopy_bytes\@:
-	/* 0 < len < NBYTES  */
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-#define SHIFT_START 0
-#define SHIFT_INC 8
-#else
-#define SHIFT_START 8*(NBYTES-1)
-#define SHIFT_INC -8
-#endif
-	move	t2, zero	# partial word
-	li	t3, SHIFT_START # shift
-/* use .Ll_exc_copy here to return correct sum on fault */
-#define COPY_BYTE(N)			\
-	LOADBU(t0, N(src), .Ll_exc_copy\@);	\
-	SUB	len, len, 1;		\
-	STOREB(t0, N(dst), .Ls_exc\@);	\
-	SLLV	t0, t0, t3;		\
-	addu	t3, SHIFT_INC;		\
-	beqz	len, .Lcopy_bytes_done\@; \
-	 or	t2, t0
-
-	COPY_BYTE(0)
-	COPY_BYTE(1)
-#ifdef USE_DOUBLE
-	COPY_BYTE(2)
-	COPY_BYTE(3)
-	COPY_BYTE(4)
-	COPY_BYTE(5)
-#endif
-	LOADBU(t0, NBYTES-2(src), .Ll_exc_copy\@)
-	SUB	len, len, 1
-	STOREB(t0, NBYTES-2(dst), .Ls_exc\@)
-	SLLV	t0, t0, t3
-	or	t2, t0
-.Lcopy_bytes_done\@:
-	ADDC(sum, t2)
-.Ldone\@:
-	/* fold checksum */
-	.set	push
-	.set	noat
-#ifdef USE_DOUBLE
-	dsll32	v1, sum, 0
-	daddu	sum, v1
-	sltu	v1, sum, v1
-	dsra32	sum, sum, 0
-	addu	sum, v1
-#endif
-
-#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON3)
-	.set	push
-	.set	arch=mips32r2
-	wsbh	v1, sum
-	movn	sum, v1, odd
-	.set	pop
-#else
-	beqz	odd, 1f			/* odd buffer alignment? */
-	 lui	v1, 0x00ff
-	addu	v1, 0x00ff
-	and	t0, sum, v1
-	sll	t0, t0, 8
-	srl	sum, sum, 8
-	and	sum, sum, v1
-	or	sum, sum, t0
-1:
-#endif
-	.set	pop
-	.set reorder
-	ADDC32(sum, psum)
-	jr	ra
-	.set noreorder
-
-.Ll_exc_copy\@:
-	/*
-	 * Copy bytes from src until faulting load address (or until a
-	 * lb faults)
-	 *
-	 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
-	 * may be more than a byte beyond the last address.
-	 * Hence, the lb below may get an exception.
-	 *
-	 * Assumes src < THREAD_BUADDR($28)
-	 */
-	LOADK	t0, TI_TASK($28)
-	 li	t2, SHIFT_START
-	LOADK	t0, THREAD_BUADDR(t0)
-1:
-	LOADBU(t1, 0(src), .Ll_exc\@)
-	ADD	src, src, 1
-	sb	t1, 0(dst)	# can't fault -- we're copy_from_user
-	SLLV	t1, t1, t2
-	addu	t2, SHIFT_INC
-	ADDC(sum, t1)
-	.set	reorder				/* DADDI_WAR */
-	ADD	dst, dst, 1
-	bne	src, t0, 1b
-	.set	noreorder
-.Ll_exc\@:
-	LOADK	t0, TI_TASK($28)
-	 nop
-	LOADK	t0, THREAD_BUADDR(t0)	# t0 is just past last good address
-	 nop
-	SUB	len, AT, t0		# len number of uncopied bytes
-	/*
-	 * Here's where we rely on src and dst being incremented in tandem,
-	 *   See (3) above.
-	 * dst += (fault addr - src) to put dst at first byte to clear
-	 */
-	ADD	dst, t0			# compute start address in a1
-	SUB	dst, src
-	/*
-	 * Clear len bytes starting at dst.  Can't call __bzero because it
-	 * might modify len.  An inefficient loop for these rare times...
-	 */
-	.set	reorder				/* DADDI_WAR */
-	SUB	src, len, 1
-	beqz	len, .Ldone\@
-	.set	noreorder
-1:	sb	zero, 0(dst)
-	ADD	dst, dst, 1
-	.set	push
-	.set	noat
-#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
-	bnez	src, 1b
-	 SUB	src, src, 1
-#else
-	li	v1, 1
-	bnez	src, 1b
-	 SUB	src, src, v1
-#endif
-	li	v1, -EFAULT
-	b	.Ldone\@
-	 sw	v1, (errptr)
-
-.Ls_exc\@:
-	li	v0, -1 /* invalid checksum */
-	li	v1, -EFAULT
-	jr	ra
-	 sw	v1, (errptr)
-	.set	pop
-	.endm
-
-LEAF(__csum_partial_copy_kernel)
-EXPORT_SYMBOL(__csum_partial_copy_kernel)
-#ifndef CONFIG_EVA
-FEXPORT(__csum_partial_copy_to_user)
-EXPORT_SYMBOL(__csum_partial_copy_to_user)
-FEXPORT(__csum_partial_copy_from_user)
-EXPORT_SYMBOL(__csum_partial_copy_from_user)
-#endif
-__BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP 1
-END(__csum_partial_copy_kernel)
-
-#ifdef CONFIG_EVA
-LEAF(__csum_partial_copy_to_user)
-__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP 0
-END(__csum_partial_copy_to_user)
-
-LEAF(__csum_partial_copy_from_user)
-__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP 0
-END(__csum_partial_copy_from_user)
-#endif
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
deleted file mode 100644
index cdd19d8561e83bc9728ef7f050cefc09741e55c7..0000000000000000000000000000000000000000
--- a/arch/mips/lib/memcpy.S
+++ /dev/null
@@ -1,706 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Unified implementation of memcpy, memmove and the __copy_user backend.
- *
- * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org)
- * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
- * Copyright (C) 2002 Broadcom, Inc.
- *   memcpy/copy_user author: Mark Vandevoorde
- * Copyright (C) 2007  Maciej W. Rozycki
- * Copyright (C) 2014 Imagination Technologies Ltd.
- *
- * Mnemonic names for arguments to memcpy/__copy_user
- */
-
-/*
- * Hack to resolve longstanding prefetch issue
- *
- * Prefetching may be fatal on some systems if we're prefetching beyond the
- * end of memory on some systems.  It's also a seriously bad idea on non
- * dma-coherent systems.
- */
-#ifdef CONFIG_DMA_NONCOHERENT
-#undef CONFIG_CPU_HAS_PREFETCH
-#endif
-#ifdef CONFIG_MIPS_MALTA
-#undef CONFIG_CPU_HAS_PREFETCH
-#endif
-#ifdef CONFIG_CPU_MIPSR6
-#undef CONFIG_CPU_HAS_PREFETCH
-#endif
-
-#include <asm/asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/export.h>
-#include <asm/regdef.h>
-
-#define dst a0
-#define src a1
-#define len a2
-
-/*
- * Spec
- *
- * memcpy copies len bytes from src to dst and sets v0 to dst.
- * It assumes that
- *   - src and dst don't overlap
- *   - src is readable
- *   - dst is writable
- * memcpy uses the standard calling convention
- *
- * __copy_user copies up to len bytes from src to dst and sets a2 (len) to
- * the number of uncopied bytes due to an exception caused by a read or write.
- * __copy_user assumes that src and dst don't overlap, and that the call is
- * implementing one of the following:
- *   copy_to_user
- *     - src is readable  (no exceptions when reading src)
- *   copy_from_user
- *     - dst is writable  (no exceptions when writing dst)
- * __copy_user uses a non-standard calling convention; see
- * include/asm-mips/uaccess.h
- *
- * When an exception happens on a load, the handler must
- # ensure that all of the destination buffer is overwritten to prevent
- * leaking information to user mode programs.
- */
-
-/*
- * Implementation
- */
-
-/*
- * The exception handler for loads requires that:
- *  1- AT contain the address of the byte just past the end of the source
- *     of the copy,
- *  2- src_entry <= src < AT, and
- *  3- (dst - src) == (dst_entry - src_entry),
- * The _entry suffix denotes values when __copy_user was called.
- *
- * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user
- * (2) is met by incrementing src by the number of bytes copied
- * (3) is met by not doing loads between a pair of increments of dst and src
- *
- * The exception handlers for stores adjust len (if necessary) and return.
- * These handlers do not need to overwrite any data.
- *
- * For __rmemcpy and memmove an exception is always a kernel bug, therefore
- * they're not protected.
- */
-
-/* Instruction type */
-#define LD_INSN 1
-#define ST_INSN 2
-/* Pretech type */
-#define SRC_PREFETCH 1
-#define DST_PREFETCH 2
-#define LEGACY_MODE 1
-#define EVA_MODE    2
-#define USEROP   1
-#define KERNELOP 2
-
-/*
- * Wrapper to add an entry in the exception table
- * in case the insn causes a memory exception.
- * Arguments:
- * insn    : Load/store instruction
- * type    : Instruction type
- * reg     : Register
- * addr    : Address
- * handler : Exception handler
- */
-
-#define EXC(insn, type, reg, addr, handler)			\
-	.if \mode == LEGACY_MODE;				\
-9:		insn reg, addr;					\
-		.section __ex_table,"a";			\
-		PTR	9b, handler;				\
-		.previous;					\
-	/* This is assembled in EVA mode */			\
-	.else;							\
-		/* If loading from user or storing to user */	\
-		.if ((\from == USEROP) && (type == LD_INSN)) || \
-		    ((\to == USEROP) && (type == ST_INSN));	\
-9:			__BUILD_EVA_INSN(insn##e, reg, addr);	\
-			.section __ex_table,"a";		\
-			PTR	9b, handler;			\
-			.previous;				\
-		.else;						\
-			/*					\
-			 *  Still in EVA, but no need for	\
-			 * exception handler or EVA insn	\
-			 */					\
-			insn reg, addr;				\
-		.endif;						\
-	.endif
-
-/*
- * Only on the 64-bit kernel we can made use of 64-bit registers.
- */
-#ifdef CONFIG_64BIT
-#define USE_DOUBLE
-#endif
-
-#ifdef USE_DOUBLE
-
-#define LOADK ld /* No exception */
-#define LOAD(reg, addr, handler)	EXC(ld, LD_INSN, reg, addr, handler)
-#define LOADL(reg, addr, handler)	EXC(ldl, LD_INSN, reg, addr, handler)
-#define LOADR(reg, addr, handler)	EXC(ldr, LD_INSN, reg, addr, handler)
-#define STOREL(reg, addr, handler)	EXC(sdl, ST_INSN, reg, addr, handler)
-#define STORER(reg, addr, handler)	EXC(sdr, ST_INSN, reg, addr, handler)
-#define STORE(reg, addr, handler)	EXC(sd, ST_INSN, reg, addr, handler)
-#define ADD    daddu
-#define SUB    dsubu
-#define SRL    dsrl
-#define SRA    dsra
-#define SLL    dsll
-#define SLLV   dsllv
-#define SRLV   dsrlv
-#define NBYTES 8
-#define LOG_NBYTES 3
-
-/*
- * As we are sharing code base with the mips32 tree (which use the o32 ABI
- * register definitions). We need to redefine the register definitions from
- * the n64 ABI register naming to the o32 ABI register naming.
- */
-#undef t0
-#undef t1
-#undef t2
-#undef t3
-#define t0	$8
-#define t1	$9
-#define t2	$10
-#define t3	$11
-#define t4	$12
-#define t5	$13
-#define t6	$14
-#define t7	$15
-
-#else
-
-#define LOADK lw /* No exception */
-#define LOAD(reg, addr, handler)	EXC(lw, LD_INSN, reg, addr, handler)
-#define LOADL(reg, addr, handler)	EXC(lwl, LD_INSN, reg, addr, handler)
-#define LOADR(reg, addr, handler)	EXC(lwr, LD_INSN, reg, addr, handler)
-#define STOREL(reg, addr, handler)	EXC(swl, ST_INSN, reg, addr, handler)
-#define STORER(reg, addr, handler)	EXC(swr, ST_INSN, reg, addr, handler)
-#define STORE(reg, addr, handler)	EXC(sw, ST_INSN, reg, addr, handler)
-#define ADD    addu
-#define SUB    subu
-#define SRL    srl
-#define SLL    sll
-#define SRA    sra
-#define SLLV   sllv
-#define SRLV   srlv
-#define NBYTES 4
-#define LOG_NBYTES 2
-
-#endif /* USE_DOUBLE */
-
-#define LOADB(reg, addr, handler)	EXC(lb, LD_INSN, reg, addr, handler)
-#define STOREB(reg, addr, handler)	EXC(sb, ST_INSN, reg, addr, handler)
-
-#ifdef CONFIG_CPU_HAS_PREFETCH
-# define _PREF(hint, addr, type)					\
-	.if \mode == LEGACY_MODE;					\
-		kernel_pref(hint, addr);				\
-	.else;								\
-		.if ((\from == USEROP) && (type == SRC_PREFETCH)) ||	\
-		    ((\to == USEROP) && (type == DST_PREFETCH));	\
-			/*						\
-			 * PREFE has only 9 bits for the offset		\
-			 * compared to PREF which has 16, so it may	\
-			 * need to use the $at register but this	\
-			 * register should remain intact because it's	\
-			 * used later on. Therefore use $v1.		\
-			 */						\
-			.set at=v1;					\
-			user_pref(hint, addr);				\
-			.set noat;					\
-		.else;							\
-			kernel_pref(hint, addr);			\
-		.endif;							\
-	.endif
-#else
-# define _PREF(hint, addr, type)
-#endif
-
-#define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH)
-#define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH)
-
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-#define LDFIRST LOADR
-#define LDREST	LOADL
-#define STFIRST STORER
-#define STREST	STOREL
-#define SHIFT_DISCARD SLLV
-#else
-#define LDFIRST LOADL
-#define LDREST	LOADR
-#define STFIRST STOREL
-#define STREST	STORER
-#define SHIFT_DISCARD SRLV
-#endif
-
-#define FIRST(unit) ((unit)*NBYTES)
-#define REST(unit)  (FIRST(unit)+NBYTES-1)
-#define UNIT(unit)  FIRST(unit)
-
-#define ADDRMASK (NBYTES-1)
-
-	.text
-	.set	noreorder
-#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
-	.set	noat
-#else
-	.set	at=v1
-#endif
-
-	.align	5
-
-	/*
-	 * Macro to build the __copy_user common code
-	 * Arguments:
-	 * mode : LEGACY_MODE or EVA_MODE
-	 * from : Source operand. USEROP or KERNELOP
-	 * to   : Destination operand. USEROP or KERNELOP
-	 */
-	.macro __BUILD_COPY_USER mode, from, to
-
-	/* initialize __memcpy if this the first time we execute this macro */
-	.ifnotdef __memcpy
-	.set __memcpy, 1
-	.hidden __memcpy /* make sure it does not leak */
-	.endif
-
-	/*
-	 * Note: dst & src may be unaligned, len may be 0
-	 * Temps
-	 */
-#define rem t8
-
-	R10KCBARRIER(0(ra))
-	/*
-	 * The "issue break"s below are very approximate.
-	 * Issue delays for dcache fills will perturb the schedule, as will
-	 * load queue full replay traps, etc.
-	 *
-	 * If len < NBYTES use byte operations.
-	 */
-	PREFS(	0, 0(src) )
-	PREFD(	1, 0(dst) )
-	sltu	t2, len, NBYTES
-	and	t1, dst, ADDRMASK
-	PREFS(	0, 1*32(src) )
-	PREFD(	1, 1*32(dst) )
-	bnez	t2, .Lcopy_bytes_checklen\@
-	 and	t0, src, ADDRMASK
-	PREFS(	0, 2*32(src) )
-	PREFD(	1, 2*32(dst) )
-#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
-	bnez	t1, .Ldst_unaligned\@
-	 nop
-	bnez	t0, .Lsrc_unaligned_dst_aligned\@
-#else
-	or	t0, t0, t1
-	bnez	t0, .Lcopy_unaligned_bytes\@
-#endif
-	/*
-	 * use delay slot for fall-through
-	 * src and dst are aligned; need to compute rem
-	 */
-.Lboth_aligned\@:
-	 SRL	t0, len, LOG_NBYTES+3	 # +3 for 8 units/iter
-	beqz	t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
-	 and	rem, len, (8*NBYTES-1)	 # rem = len % (8*NBYTES)
-	PREFS(	0, 3*32(src) )
-	PREFD(	1, 3*32(dst) )
-	.align	4
-1:
-	R10KCBARRIER(0(ra))
-	LOAD(t0, UNIT(0)(src), .Ll_exc\@)
-	LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@)
-	LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@)
-	LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)
-	SUB	len, len, 8*NBYTES
-	LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@)
-	LOAD(t7, UNIT(5)(src), .Ll_exc_copy\@)
-	STORE(t0, UNIT(0)(dst),	.Ls_exc_p8u\@)
-	STORE(t1, UNIT(1)(dst),	.Ls_exc_p7u\@)
-	LOAD(t0, UNIT(6)(src), .Ll_exc_copy\@)
-	LOAD(t1, UNIT(7)(src), .Ll_exc_copy\@)
-	ADD	src, src, 8*NBYTES
-	ADD	dst, dst, 8*NBYTES
-	STORE(t2, UNIT(-6)(dst), .Ls_exc_p6u\@)
-	STORE(t3, UNIT(-5)(dst), .Ls_exc_p5u\@)
-	STORE(t4, UNIT(-4)(dst), .Ls_exc_p4u\@)
-	STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u\@)
-	STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u\@)
-	STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u\@)
-	PREFS(	0, 8*32(src) )
-	PREFD(	1, 8*32(dst) )
-	bne	len, rem, 1b
-	 nop
-
-	/*
-	 * len == rem == the number of bytes left to copy < 8*NBYTES
-	 */
-.Lcleanup_both_aligned\@:
-	beqz	len, .Ldone\@
-	 sltu	t0, len, 4*NBYTES
-	bnez	t0, .Lless_than_4units\@
-	 and	rem, len, (NBYTES-1)	# rem = len % NBYTES
-	/*
-	 * len >= 4*NBYTES
-	 */
-	LOAD( t0, UNIT(0)(src),	.Ll_exc\@)
-	LOAD( t1, UNIT(1)(src),	.Ll_exc_copy\@)
-	LOAD( t2, UNIT(2)(src),	.Ll_exc_copy\@)
-	LOAD( t3, UNIT(3)(src),	.Ll_exc_copy\@)
-	SUB	len, len, 4*NBYTES
-	ADD	src, src, 4*NBYTES
-	R10KCBARRIER(0(ra))
-	STORE(t0, UNIT(0)(dst),	.Ls_exc_p4u\@)
-	STORE(t1, UNIT(1)(dst),	.Ls_exc_p3u\@)
-	STORE(t2, UNIT(2)(dst),	.Ls_exc_p2u\@)
-	STORE(t3, UNIT(3)(dst),	.Ls_exc_p1u\@)
-	.set	reorder				/* DADDI_WAR */
-	ADD	dst, dst, 4*NBYTES
-	beqz	len, .Ldone\@
-	.set	noreorder
-.Lless_than_4units\@:
-	/*
-	 * rem = len % NBYTES
-	 */
-	beq	rem, len, .Lcopy_bytes\@
-	 nop
-1:
-	R10KCBARRIER(0(ra))
-	LOAD(t0, 0(src), .Ll_exc\@)
-	ADD	src, src, NBYTES
-	SUB	len, len, NBYTES
-	STORE(t0, 0(dst), .Ls_exc_p1u\@)
-	.set	reorder				/* DADDI_WAR */
-	ADD	dst, dst, NBYTES
-	bne	rem, len, 1b
-	.set	noreorder
-
-#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
-	/*
-	 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
-	 * A loop would do only a byte at a time with possible branch
-	 * mispredicts.	 Can't do an explicit LOAD dst,mask,or,STORE
-	 * because can't assume read-access to dst.  Instead, use
-	 * STREST dst, which doesn't require read access to dst.
-	 *
-	 * This code should perform better than a simple loop on modern,
-	 * wide-issue mips processors because the code has fewer branches and
-	 * more instruction-level parallelism.
-	 */
-#define bits t2
-	beqz	len, .Ldone\@
-	 ADD	t1, dst, len	# t1 is just past last byte of dst
-	li	bits, 8*NBYTES
-	SLL	rem, len, 3	# rem = number of bits to keep
-	LOAD(t0, 0(src), .Ll_exc\@)
-	SUB	bits, bits, rem # bits = number of bits to discard
-	SHIFT_DISCARD t0, t0, bits
-	STREST(t0, -1(t1), .Ls_exc\@)
-	jr	ra
-	 move	len, zero
-.Ldst_unaligned\@:
-	/*
-	 * dst is unaligned
-	 * t0 = src & ADDRMASK
-	 * t1 = dst & ADDRMASK; T1 > 0
-	 * len >= NBYTES
-	 *
-	 * Copy enough bytes to align dst
-	 * Set match = (src and dst have same alignment)
-	 */
-#define match rem
-	LDFIRST(t3, FIRST(0)(src), .Ll_exc\@)
-	ADD	t2, zero, NBYTES
-	LDREST(t3, REST(0)(src), .Ll_exc_copy\@)
-	SUB	t2, t2, t1	# t2 = number of bytes copied
-	xor	match, t0, t1
-	R10KCBARRIER(0(ra))
-	STFIRST(t3, FIRST(0)(dst), .Ls_exc\@)
-	beq	len, t2, .Ldone\@
-	 SUB	len, len, t2
-	ADD	dst, dst, t2
-	beqz	match, .Lboth_aligned\@
-	 ADD	src, src, t2
-
-.Lsrc_unaligned_dst_aligned\@:
-	SRL	t0, len, LOG_NBYTES+2	 # +2 for 4 units/iter
-	PREFS(	0, 3*32(src) )
-	beqz	t0, .Lcleanup_src_unaligned\@
-	 and	rem, len, (4*NBYTES-1)	 # rem = len % 4*NBYTES
-	PREFD(	1, 3*32(dst) )
-1:
-/*
- * Avoid consecutive LD*'s to the same register since some mips
- * implementations can't issue them in the same cycle.
- * It's OK to load FIRST(N+1) before REST(N) because the two addresses
- * are to the same unit (unless src is aligned, but it's not).
- */
-	R10KCBARRIER(0(ra))
-	LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
-	LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@)
-	SUB	len, len, 4*NBYTES
-	LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
-	LDREST(t1, REST(1)(src), .Ll_exc_copy\@)
-	LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@)
-	LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@)
-	LDREST(t2, REST(2)(src), .Ll_exc_copy\@)
-	LDREST(t3, REST(3)(src), .Ll_exc_copy\@)
-	PREFS(	0, 9*32(src) )		# 0 is PREF_LOAD  (not streamed)
-	ADD	src, src, 4*NBYTES
-#ifdef CONFIG_CPU_SB1
-	nop				# improves slotting
-#endif
-	STORE(t0, UNIT(0)(dst),	.Ls_exc_p4u\@)
-	STORE(t1, UNIT(1)(dst),	.Ls_exc_p3u\@)
-	STORE(t2, UNIT(2)(dst),	.Ls_exc_p2u\@)
-	STORE(t3, UNIT(3)(dst),	.Ls_exc_p1u\@)
-	PREFD(	1, 9*32(dst) )		# 1 is PREF_STORE (not streamed)
-	.set	reorder				/* DADDI_WAR */
-	ADD	dst, dst, 4*NBYTES
-	bne	len, rem, 1b
-	.set	noreorder
-
-.Lcleanup_src_unaligned\@:
-	beqz	len, .Ldone\@
-	 and	rem, len, NBYTES-1  # rem = len % NBYTES
-	beq	rem, len, .Lcopy_bytes\@
-	 nop
-1:
-	R10KCBARRIER(0(ra))
-	LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
-	LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
-	ADD	src, src, NBYTES
-	SUB	len, len, NBYTES
-	STORE(t0, 0(dst), .Ls_exc_p1u\@)
-	.set	reorder				/* DADDI_WAR */
-	ADD	dst, dst, NBYTES
-	bne	len, rem, 1b
-	.set	noreorder
-
-#endif /* CONFIG_CPU_HAS_LOAD_STORE_LR */
-.Lcopy_bytes_checklen\@:
-	beqz	len, .Ldone\@
-	 nop
-.Lcopy_bytes\@:
-	/* 0 < len < NBYTES  */
-	R10KCBARRIER(0(ra))
-#define COPY_BYTE(N)			\
-	LOADB(t0, N(src), .Ll_exc\@);	\
-	SUB	len, len, 1;		\
-	beqz	len, .Ldone\@;		\
-	STOREB(t0, N(dst), .Ls_exc_p1\@)
-
-	COPY_BYTE(0)
-	COPY_BYTE(1)
-#ifdef USE_DOUBLE
-	COPY_BYTE(2)
-	COPY_BYTE(3)
-	COPY_BYTE(4)
-	COPY_BYTE(5)
-#endif
-	LOADB(t0, NBYTES-2(src), .Ll_exc\@)
-	SUB	len, len, 1
-	jr	ra
-	STOREB(t0, NBYTES-2(dst), .Ls_exc_p1\@)
-.Ldone\@:
-	jr	ra
-	 nop
-
-#ifndef CONFIG_CPU_HAS_LOAD_STORE_LR
-.Lcopy_unaligned_bytes\@:
-1:
-	COPY_BYTE(0)
-	COPY_BYTE(1)
-	COPY_BYTE(2)
-	COPY_BYTE(3)
-	COPY_BYTE(4)
-	COPY_BYTE(5)
-	COPY_BYTE(6)
-	COPY_BYTE(7)
-	ADD	src, src, 8
-	b	1b
-	 ADD	dst, dst, 8
-#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
-	.if __memcpy == 1
-	END(memcpy)
-	.set __memcpy, 0
-	.hidden __memcpy
-	.endif
-
-.Ll_exc_copy\@:
-	/*
-	 * Copy bytes from src until faulting load address (or until a
-	 * lb faults)
-	 *
-	 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
-	 * may be more than a byte beyond the last address.
-	 * Hence, the lb below may get an exception.
-	 *
-	 * Assumes src < THREAD_BUADDR($28)
-	 */
-	LOADK	t0, TI_TASK($28)
-	 nop
-	LOADK	t0, THREAD_BUADDR(t0)
-1:
-	LOADB(t1, 0(src), .Ll_exc\@)
-	ADD	src, src, 1
-	sb	t1, 0(dst)	# can't fault -- we're copy_from_user
-	.set	reorder				/* DADDI_WAR */
-	ADD	dst, dst, 1
-	bne	src, t0, 1b
-	.set	noreorder
-.Ll_exc\@:
-	LOADK	t0, TI_TASK($28)
-	 nop
-	LOADK	t0, THREAD_BUADDR(t0)	# t0 is just past last good address
-	 nop
-	SUB	len, AT, t0		# len number of uncopied bytes
-	jr	ra
-	 nop
-
-#define SEXC(n)							\
-	.set	reorder;			/* DADDI_WAR */ \
-.Ls_exc_p ## n ## u\@:						\
-	ADD	len, len, n*NBYTES;				\
-	jr	ra;						\
-	.set	noreorder
-
-SEXC(8)
-SEXC(7)
-SEXC(6)
-SEXC(5)
-SEXC(4)
-SEXC(3)
-SEXC(2)
-SEXC(1)
-
-.Ls_exc_p1\@:
-	.set	reorder				/* DADDI_WAR */
-	ADD	len, len, 1
-	jr	ra
-	.set	noreorder
-.Ls_exc\@:
-	jr	ra
-	 nop
-	.endm
-
-	.align	5
-LEAF(memmove)
-EXPORT_SYMBOL(memmove)
-	ADD	t0, a0, a2
-	ADD	t1, a1, a2
-	sltu	t0, a1, t0			# dst + len <= src -> memcpy
-	sltu	t1, a0, t1			# dst >= src + len -> memcpy
-	and	t0, t1
-	beqz	t0, .L__memcpy
-	 move	v0, a0				/* return value */
-	beqz	a2, .Lr_out
-	END(memmove)
-
-	/* fall through to __rmemcpy */
-LEAF(__rmemcpy)					/* a0=dst a1=src a2=len */
-	 sltu	t0, a1, a0
-	beqz	t0, .Lr_end_bytes_up		# src >= dst
-	 nop
-	ADD	a0, a2				# dst = dst + len
-	ADD	a1, a2				# src = src + len
-
-.Lr_end_bytes:
-	R10KCBARRIER(0(ra))
-	lb	t0, -1(a1)
-	SUB	a2, a2, 0x1
-	sb	t0, -1(a0)
-	SUB	a1, a1, 0x1
-	.set	reorder				/* DADDI_WAR */
-	SUB	a0, a0, 0x1
-	bnez	a2, .Lr_end_bytes
-	.set	noreorder
-
-.Lr_out:
-	jr	ra
-	 move	a2, zero
-
-.Lr_end_bytes_up:
-	R10KCBARRIER(0(ra))
-	lb	t0, (a1)
-	SUB	a2, a2, 0x1
-	sb	t0, (a0)
-	ADD	a1, a1, 0x1
-	.set	reorder				/* DADDI_WAR */
-	ADD	a0, a0, 0x1
-	bnez	a2, .Lr_end_bytes_up
-	.set	noreorder
-
-	jr	ra
-	 move	a2, zero
-	END(__rmemcpy)
-
-/*
- * A combined memcpy/__copy_user
- * __copy_user sets len to 0 for success; else to an upper bound of
- * the number of uncopied bytes.
- * memcpy sets v0 to dst.
- */
-	.align	5
-LEAF(memcpy)					/* a0=dst a1=src a2=len */
-EXPORT_SYMBOL(memcpy)
-	move	v0, dst				/* return value */
-.L__memcpy:
-FEXPORT(__copy_user)
-EXPORT_SYMBOL(__copy_user)
-	/* Legacy Mode, user <-> user */
-	__BUILD_COPY_USER LEGACY_MODE USEROP USEROP
-
-#ifdef CONFIG_EVA
-
-/*
- * For EVA we need distinct symbols for reading and writing to user space.
- * This is because we need to use specific EVA instructions to perform the
- * virtual <-> physical translation when a virtual address is actually in user
- * space
- */
-
-/*
- * __copy_from_user (EVA)
- */
-
-LEAF(__copy_from_user_eva)
-EXPORT_SYMBOL(__copy_from_user_eva)
-	__BUILD_COPY_USER EVA_MODE USEROP KERNELOP
-END(__copy_from_user_eva)
-
-
-
-/*
- * __copy_to_user (EVA)
- */
-
-LEAF(__copy_to_user_eva)
-EXPORT_SYMBOL(__copy_to_user_eva)
-__BUILD_COPY_USER EVA_MODE KERNELOP USEROP
-END(__copy_to_user_eva)
-
-/*
- * __copy_in_user (EVA)
- */
-
-LEAF(__copy_in_user_eva)
-EXPORT_SYMBOL(__copy_in_user_eva)
-__BUILD_COPY_USER EVA_MODE USEROP USEROP
-END(__copy_in_user_eva)
-
-#endif
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
deleted file mode 100644
index 418611ef13cfc1963b72ea9b9467e2ead1d3fd6f..0000000000000000000000000000000000000000
--- a/arch/mips/lib/memset.S
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1998, 1999, 2000 by Ralf Baechle
- * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
- * Copyright (C) 2007 by Maciej W. Rozycki
- * Copyright (C) 2011, 2012 MIPS Technologies, Inc.
- */
-#include <asm/asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/export.h>
-#include <asm/regdef.h>
-
-#if LONGSIZE == 4
-#define LONG_S_L swl
-#define LONG_S_R swr
-#else
-#define LONG_S_L sdl
-#define LONG_S_R sdr
-#endif
-
-#ifdef CONFIG_CPU_MICROMIPS
-#define STORSIZE (LONGSIZE * 2)
-#define STORMASK (STORSIZE - 1)
-#define FILL64RG t8
-#define FILLPTRG t7
-#undef  LONG_S
-#define LONG_S LONG_SP
-#else
-#define STORSIZE LONGSIZE
-#define STORMASK LONGMASK
-#define FILL64RG a1
-#define FILLPTRG t0
-#endif
-
-#define LEGACY_MODE 1
-#define EVA_MODE    2
-
-/*
- * No need to protect it with EVA #ifdefery. The generated block of code
- * will never be assembled if EVA is not enabled.
- */
-#define __EVAFY(insn, reg, addr) __BUILD_EVA_INSN(insn##e, reg, addr)
-#define ___BUILD_EVA_INSN(insn, reg, addr) __EVAFY(insn, reg, addr)
-
-#define EX(insn,reg,addr,handler)			\
-	.if \mode == LEGACY_MODE;			\
-9:		insn	reg, addr;			\
-	.else;						\
-9:		___BUILD_EVA_INSN(insn, reg, addr);	\
-	.endif;						\
-	.section __ex_table,"a";			\
-	PTR	9b, handler;				\
-	.previous
-
-	.macro	f_fill64 dst, offset, val, fixup, mode
-	EX(LONG_S, \val, (\offset +  0 * STORSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset +  1 * STORSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset +  2 * STORSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset +  3 * STORSIZE)(\dst), \fixup)
-#if ((defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4)) || !defined(CONFIG_CPU_MICROMIPS))
-	EX(LONG_S, \val, (\offset +  4 * STORSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset +  5 * STORSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset +  6 * STORSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset +  7 * STORSIZE)(\dst), \fixup)
-#endif
-#if (!defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4))
-	EX(LONG_S, \val, (\offset +  8 * STORSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset +  9 * STORSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset + 10 * STORSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset + 11 * STORSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset + 12 * STORSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset + 13 * STORSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset + 14 * STORSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset + 15 * STORSIZE)(\dst), \fixup)
-#endif
-	.endm
-
-	.align	5
-
-	/*
-	 * Macro to generate the __bzero{,_user} symbol
-	 * Arguments:
-	 * mode: LEGACY_MODE or EVA_MODE
-	 */
-	.macro __BUILD_BZERO mode
-	/* Initialize __memset if this is the first time we call this macro */
-	.ifnotdef __memset
-	.set __memset, 1
-	.hidden __memset /* Make sure it does not leak */
-	.endif
-
-	sltiu		t0, a2, STORSIZE	/* very small region? */
-	.set		noreorder
-	bnez		t0, .Lsmall_memset\@
-	 andi		t0, a0, STORMASK	/* aligned? */
-	.set		reorder
-
-#ifdef CONFIG_CPU_MICROMIPS
-	move		t8, a1			/* used by 'swp' instruction */
-	move		t9, a1
-#endif
-	.set		noreorder
-#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
-	beqz		t0, 1f
-	 PTR_SUBU	t0, STORSIZE		/* alignment in bytes */
-#else
-	.set		noat
-	li		AT, STORSIZE
-	beqz		t0, 1f
-	 PTR_SUBU	t0, AT			/* alignment in bytes */
-	.set		at
-#endif
-	.set		reorder
-
-#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
-	R10KCBARRIER(0(ra))
-#ifdef __MIPSEB__
-	EX(LONG_S_L, a1, (a0), .Lfirst_fixup\@)	/* make word/dword aligned */
-#else
-	EX(LONG_S_R, a1, (a0), .Lfirst_fixup\@)	/* make word/dword aligned */
-#endif
-	PTR_SUBU	a0, t0			/* long align ptr */
-	PTR_ADDU	a2, t0			/* correct size */
-
-#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
-#define STORE_BYTE(N)				\
-	EX(sb, a1, N(a0), .Lbyte_fixup\@);	\
-	.set		noreorder;		\
-	beqz		t0, 0f;			\
-	 PTR_ADDU	t0, 1;			\
-	.set		reorder;
-
-	PTR_ADDU	a2, t0			/* correct size */
-	PTR_ADDU	t0, 1
-	STORE_BYTE(0)
-	STORE_BYTE(1)
-#if LONGSIZE == 4
-	EX(sb, a1, 2(a0), .Lbyte_fixup\@)
-#else
-	STORE_BYTE(2)
-	STORE_BYTE(3)
-	STORE_BYTE(4)
-	STORE_BYTE(5)
-	EX(sb, a1, 6(a0), .Lbyte_fixup\@)
-#endif
-0:
-	ori		a0, STORMASK
-	xori		a0, STORMASK
-	PTR_ADDIU	a0, STORSIZE
-#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
-1:	ori		t1, a2, 0x3f		/* # of full blocks */
-	xori		t1, 0x3f
-	andi		t0, a2, 0x40-STORSIZE
-	beqz		t1, .Lmemset_partial\@	/* no block to fill */
-
-	PTR_ADDU	t1, a0			/* end address */
-1:	PTR_ADDIU	a0, 64
-	R10KCBARRIER(0(ra))
-	f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode
-	bne		t1, a0, 1b
-
-.Lmemset_partial\@:
-	R10KCBARRIER(0(ra))
-	PTR_LA		t1, 2f			/* where to start */
-#ifdef CONFIG_CPU_MICROMIPS
-	LONG_SRL	t7, t0, 1
-#endif
-#if LONGSIZE == 4
-	PTR_SUBU	t1, FILLPTRG
-#else
-	.set		noat
-	LONG_SRL	AT, FILLPTRG, 1
-	PTR_SUBU	t1, AT
-	.set		at
-#endif
-	PTR_ADDU	a0, t0			/* dest ptr */
-	jr		t1
-
-	/* ... but first do longs ... */
-	f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode
-2:	andi		a2, STORMASK		/* At most one long to go */
-
-	.set		noreorder
-	beqz		a2, 1f
-#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
-	 PTR_ADDU	a0, a2			/* What's left */
-	.set		reorder
-	R10KCBARRIER(0(ra))
-#ifdef __MIPSEB__
-	EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@)
-#else
-	EX(LONG_S_L, a1, -1(a0), .Llast_fixup\@)
-#endif
-#else
-	 PTR_SUBU	t0, $0, a2
-	.set		reorder
-	move		a2, zero		/* No remaining longs */
-	PTR_ADDIU	t0, 1
-	STORE_BYTE(0)
-	STORE_BYTE(1)
-#if LONGSIZE == 4
-	EX(sb, a1, 2(a0), .Lbyte_fixup\@)
-#else
-	STORE_BYTE(2)
-	STORE_BYTE(3)
-	STORE_BYTE(4)
-	STORE_BYTE(5)
-	EX(sb, a1, 6(a0), .Lbyte_fixup\@)
-#endif
-0:
-#endif
-1:	move		a2, zero
-	jr		ra
-
-.Lsmall_memset\@:
-	PTR_ADDU	t1, a0, a2
-	beqz		a2, 2f
-
-1:	PTR_ADDIU	a0, 1			/* fill bytewise */
-	R10KCBARRIER(0(ra))
-	.set		noreorder
-	bne		t1, a0, 1b
-	 EX(sb, a1, -1(a0), .Lsmall_fixup\@)
-	.set		reorder
-
-2:	move		a2, zero
-	jr		ra			/* done */
-	.if __memset == 1
-	END(memset)
-	.set __memset, 0
-	.hidden __memset
-	.endif
-
-#ifndef CONFIG_CPU_HAS_LOAD_STORE_LR
-.Lbyte_fixup\@:
-	/*
-	 * unset_bytes = (#bytes - (#unaligned bytes)) - (-#unaligned bytes remaining + 1) + 1
-	 *      a2     =             a2                -              t0                   + 1
-	 */
-	PTR_SUBU	a2, t0
-	PTR_ADDIU	a2, 1
-	jr		ra
-#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
-
-.Lfirst_fixup\@:
-	/* unset_bytes already in a2 */
-	jr	ra
-
-.Lfwd_fixup\@:
-	/*
-	 * unset_bytes = partial_start_addr +  #bytes   -     fault_addr
-	 *      a2     =         t1         + (a2 & 3f) - $28->task->BUADDR
-	 */
-	PTR_L		t0, TI_TASK($28)
-	andi		a2, 0x3f
-	LONG_L		t0, THREAD_BUADDR(t0)
-	LONG_ADDU	a2, t1
-	LONG_SUBU	a2, t0
-	jr		ra
-
-.Lpartial_fixup\@:
-	/*
-	 * unset_bytes = partial_end_addr +      #bytes     -     fault_addr
-	 *      a2     =       a0         + (a2 & STORMASK) - $28->task->BUADDR
-	 */
-	PTR_L		t0, TI_TASK($28)
-	andi		a2, STORMASK
-	LONG_L		t0, THREAD_BUADDR(t0)
-	LONG_ADDU	a2, a0
-	LONG_SUBU	a2, t0
-	jr		ra
-
-.Llast_fixup\@:
-	/* unset_bytes already in a2 */
-	jr		ra
-
-.Lsmall_fixup\@:
-	/*
-	 * unset_bytes = end_addr - current_addr + 1
-	 *      a2     =    t1    -      a0      + 1
-	 */
-	PTR_SUBU	a2, t1, a0
-	PTR_ADDIU	a2, 1
-	jr		ra
-
-	.endm
-
-/*
- * memset(void *s, int c, size_t n)
- *
- * a0: start of area to clear
- * a1: char to fill with
- * a2: size of area to clear
- */
-
-LEAF(memset)
-EXPORT_SYMBOL(memset)
-	move		v0, a0			/* result */
-	beqz		a1, 1f
-
-	andi		a1, 0xff		/* spread fillword */
-	LONG_SLL		t1, a1, 8
-	or		a1, t1
-	LONG_SLL		t1, a1, 16
-#if LONGSIZE == 8
-	or		a1, t1
-	LONG_SLL		t1, a1, 32
-#endif
-	or		a1, t1
-1:
-#ifndef CONFIG_EVA
-FEXPORT(__bzero)
-EXPORT_SYMBOL(__bzero)
-#else
-FEXPORT(__bzero_kernel)
-EXPORT_SYMBOL(__bzero_kernel)
-#endif
-	__BUILD_BZERO LEGACY_MODE
-
-#ifdef CONFIG_EVA
-LEAF(__bzero)
-EXPORT_SYMBOL(__bzero)
-	__BUILD_BZERO EVA_MODE
-END(__bzero)
-#endif
diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S
deleted file mode 100644
index acdff66bd5d2dd6cf15d6382ee43ee6c1c76e4bc..0000000000000000000000000000000000000000
--- a/arch/mips/lib/strncpy_user.S
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1996, 1999 by Ralf Baechle
- * Copyright (C) 2011 MIPS Technologies, Inc.
- */
-#include <linux/errno.h>
-#include <asm/asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/export.h>
-#include <asm/regdef.h>
-
-#define EX(insn,reg,addr,handler)			\
-9:	insn	reg, addr;				\
-	.section __ex_table,"a";			\
-	PTR	9b, handler;				\
-	.previous
-
-/*
- * Returns: -EFAULT if exception before terminator, N if the entire
- * buffer filled, else strlen.
- */
-
-/*
- * Ugly special case have to check: we might get passed a user space
- * pointer which wraps into the kernel space.  We don't deal with that.	 If
- * it happens at most some bytes of the exceptions handlers will be copied.
- */
-
-	.macro __BUILD_STRNCPY_ASM func
-LEAF(__strncpy_from_\func\()_asm)
-	LONG_L		v0, TI_ADDR_LIMIT($28)	# pointer ok?
-	and		v0, a1
-	bnez		v0, .Lfault\@
-
-	move		t0, zero
-	move		v1, a1
-.ifeqs "\func","kernel"
-1:	EX(lbu, v0, (v1), .Lfault\@)
-.else
-1:	EX(lbue, v0, (v1), .Lfault\@)
-.endif
-	PTR_ADDIU	v1, 1
-	R10KCBARRIER(0(ra))
-	sb		v0, (a0)
-	beqz		v0, 2f
-	PTR_ADDIU	t0, 1
-	PTR_ADDIU	a0, 1
-	bne		t0, a2, 1b
-2:	PTR_ADDU	v0, a1, t0
-	xor		v0, a1
-	bltz		v0, .Lfault\@
-	move		v0, t0
-	jr		ra			# return n
-	END(__strncpy_from_\func\()_asm)
-
-.Lfault\@:
-	li		v0, -EFAULT
-	jr		ra
-
-	.section	__ex_table,"a"
-	PTR		1b, .Lfault\@
-	.previous
-
-	.endm
-
-#ifndef CONFIG_EVA
-	/* Set aliases */
-	.global __strncpy_from_user_asm
-	.set __strncpy_from_user_asm, __strncpy_from_kernel_asm
-EXPORT_SYMBOL(__strncpy_from_user_asm)
-#endif
-
-__BUILD_STRNCPY_ASM kernel
-EXPORT_SYMBOL(__strncpy_from_kernel_asm)
-
-#ifdef CONFIG_EVA
-	.set push
-	.set eva
-__BUILD_STRNCPY_ASM user
-	.set pop
-EXPORT_SYMBOL(__strncpy_from_user_asm)
-#endif
diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S
deleted file mode 100644
index e1bacf5a3abe5d9588c948ea336f539b11f7d480..0000000000000000000000000000000000000000
--- a/arch/mips/lib/strnlen_user.S
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 1996, 1998, 1999, 2004 by Ralf Baechle
- * Copyright (c) 1999 Silicon Graphics, Inc.
- */
-#include <asm/asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/export.h>
-#include <asm/regdef.h>
-
-#define EX(insn,reg,addr,handler)			\
-9:	insn	reg, addr;				\
-	.section __ex_table,"a";			\
-	PTR	9b, handler;				\
-	.previous
-
-/*
- * Return the size of a string including the ending NUL character up to a
- * maximum of a1 or 0 in case of error.
- *
- * Note: for performance reasons we deliberately accept that a user may
- *	 make strlen_user and strnlen_user access the first few KSEG0
- *	 bytes.	 There's nothing secret there.	On 64-bit accessing beyond
- *	 the maximum is a tad hairier ...
- */
-	.macro __BUILD_STRNLEN_ASM func
-LEAF(__strnlen_\func\()_asm)
-	LONG_L		v0, TI_ADDR_LIMIT($28)	# pointer ok?
-	and		v0, a0
-	bnez		v0, .Lfault\@
-
-	move		v0, a0
-	PTR_ADDU	a1, a0			# stop pointer
-1:
-#ifdef CONFIG_CPU_DADDI_WORKAROUNDS
-	.set		noat
-	li		AT, 1
-#endif
-	beq		v0, a1, 1f		# limit reached?
-.ifeqs "\func", "kernel"
-	EX(lb, t0, (v0), .Lfault\@)
-.else
-	EX(lbe, t0, (v0), .Lfault\@)
-.endif
-	.set		noreorder
-	bnez		t0, 1b
-1:
-#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
-	 PTR_ADDIU	v0, 1
-#else
-	 PTR_ADDU	v0, AT
-	.set		at
-#endif
-	.set		reorder
-	PTR_SUBU	v0, a0
-	jr		ra
-	END(__strnlen_\func\()_asm)
-
-.Lfault\@:
-	move		v0, zero
-	jr		ra
-	.endm
-
-#ifndef CONFIG_EVA
-	/* Set aliases */
-	.global __strnlen_user_asm
-	.set __strnlen_user_asm, __strnlen_kernel_asm
-EXPORT_SYMBOL(__strnlen_user_asm)
-#endif
-
-__BUILD_STRNLEN_ASM kernel
-EXPORT_SYMBOL(__strnlen_kernel_asm)
-
-#ifdef CONFIG_EVA
-
-	.set push
-	.set eva
-__BUILD_STRNLEN_ASM user
-	.set pop
-EXPORT_SYMBOL(__strnlen_user_asm)
-#endif
diff --git a/arch/mips/mm/cex-gen.S b/arch/mips/mm/cex-gen.S
deleted file mode 100644
index 45dff5cd4b8e7a0278442acee6818a47bd72eb76..0000000000000000000000000000000000000000
--- a/arch/mips/mm/cex-gen.S
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1995 - 1999 Ralf Baechle
- * Copyright (C) 1999 Silicon Graphics, Inc.
- *
- * Cache error handler
- */
-#include <asm/asm.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-#include <asm/stackframe.h>
-
-/*
- * Game over.  Go to the button.  Press gently.	 Swear where allowed by
- * legislation.
- */
-	LEAF(except_vec2_generic)
-	.set	noreorder
-	.set	noat
-	.set	mips0
-	/*
-	 * This is a very bad place to be.  Our cache error
-	 * detection has triggered.  If we have write-back data
-	 * in the cache, we may not be able to recover.	 As a
-	 * first-order desperate measure, turn off KSEG0 cacheing.
-	 */
-	mfc0	k0,CP0_CONFIG
-	li	k1,~CONF_CM_CMASK
-	and	k0,k0,k1
-	ori	k0,k0,CONF_CM_UNCACHED
-	mtc0	k0,CP0_CONFIG
-	/* Give it a few cycles to sink in... */
-	nop
-	nop
-	nop
-
-	j	cache_parity_error
-	nop
-	END(except_vec2_generic)
diff --git a/arch/mips/mm/cex-oct.S b/arch/mips/mm/cex-oct.S
deleted file mode 100644
index 9029092aa740df636c2be53033ae3c3399c337ee..0000000000000000000000000000000000000000
--- a/arch/mips/mm/cex-oct.S
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2006 Cavium Networks
- * Cache error handler
- */
-
-#include <asm/asm.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-#include <asm/stackframe.h>
-
-/*
- * Handle cache error. Indicate to the second level handler whether
- * the exception is recoverable.
- */
-	LEAF(except_vec2_octeon)
-
-	.set	push
-	.set	mips64r2
-	.set	noreorder
-	.set	noat
-
-
-	/* due to an errata we need to read the COP0 CacheErr (Dcache)
-	 * before any cache/DRAM access	 */
-
-	rdhwr	k0, $0	      /* get core_id */
-	PTR_LA	k1, cache_err_dcache
-	sll	k0, k0, 3
-	PTR_ADDU k1, k0, k1    /* k1 = &cache_err_dcache[core_id] */
-
-	dmfc0	k0, CP0_CACHEERR, 1
-	sd	k0, (k1)
-	dmtc0	$0, CP0_CACHEERR, 1
-
-	/* check whether this is a nested exception */
-	mfc0	k1, CP0_STATUS
-	andi	k1, k1, ST0_EXL
-	beqz	k1, 1f
-	 nop
-	j	cache_parity_error_octeon_non_recoverable
-	 nop
-
-	/* exception is recoverable */
-1:	j	handle_cache_err
-	 nop
-
-	.set	pop
-	END(except_vec2_octeon)
-
- /* We need to jump to handle_cache_err so that the previous handler
-  * can fit within 0x80 bytes. We also move from 0xFFFFFFFFAXXXXXXX
-  * space (uncached) to the 0xFFFFFFFF8XXXXXXX space (cached).	*/
-	LEAF(handle_cache_err)
-	.set	push
-	.set	noreorder
-	.set	noat
-
-	SAVE_ALL
-	KMODE
-	jal	cache_parity_error_octeon_recoverable
-	nop
-	j	ret_from_exception
-	nop
-
-	.set pop
-	END(handle_cache_err)
diff --git a/arch/mips/mm/cex-sb1.S b/arch/mips/mm/cex-sb1.S
deleted file mode 100644
index 85c6e6a40b5f41ecc5ca43945a08d93a78f321c1..0000000000000000000000000000000000000000
--- a/arch/mips/mm/cex-sb1.S
+++ /dev/null
@@ -1,157 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2001,2002,2003 Broadcom Corporation
- */
-
-#include <asm/asm.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-#include <asm/stackframe.h>
-#include <asm/cacheops.h>
-#include <asm/sibyte/board.h>
-
-#define C0_ERRCTL     $26	      /* CP0: Error info */
-#define C0_CERR_I     $27	      /* CP0: Icache error */
-#define C0_CERR_D     $27,1	      /* CP0: Dcache error */
-
-	/*
-	 * Based on SiByte sample software cache-err/cerr.S
-	 * CVS revision 1.8.  Only the 'unrecoverable' case
-	 * is changed.
-	 */
-
-	.set	mips64
-	.set	noreorder
-	.set	noat
-
-	/*
-	 * sb1_cerr_vec: code to be copied to the Cache Error
-	 * Exception vector.  The code must be pushed out to memory
-	 * (either by copying to Kseg0 and Kseg1 both, or by flushing
-	 * the L1 and L2) since it is fetched as 0xa0000100.
-	 *
-	 * NOTE: Be sure this handler is at most 28 instructions long
-	 * since the final 16 bytes of the exception vector memory
-	 * (0x170-0x17f) are used to preserve k0, k1, and ra.
-	 */
-
-LEAF(except_vec2_sb1)
-	/*
-	 * If this error is recoverable, we need to exit the handler
-	 * without having dirtied any registers.  To do this,
-	 * save/restore k0 and k1 from low memory (Useg is direct
-	 * mapped while ERL=1). Note that we can't save to a
-	 * CPU-specific location without ruining a register in the
-	 * process.  This means we are vulnerable to data corruption
-	 * whenever the handler is reentered by a second CPU.
-	 */
-	sd	k0,0x170($0)
-	sd	k1,0x178($0)
-
-#ifdef CONFIG_SB1_CEX_ALWAYS_FATAL
-	j	handle_vec2_sb1
-	 nop
-#else
-	/*
-	 * M_ERRCTL_RECOVERABLE is bit 31, which makes it easy to tell
-	 * if we can fast-path out of here for a h/w-recovered error.
-	 */
-	mfc0	k1,C0_ERRCTL
-	bgtz	k1,attempt_recovery
-	 sll	k0,k1,1
-
-recovered_dcache:
-	/*
-	 * Unlock CacheErr-D (which in turn unlocks CacheErr-DPA).
-	 * Ought to log the occurrence of this recovered dcache error.
-	 */
-	b	recovered
-	 mtc0	$0,C0_CERR_D
-
-attempt_recovery:
-	/*
-	 * k0 has C0_ERRCTL << 1, which puts 'DC' at bit 31.  Any
-	 * Dcache errors we can recover from will take more extensive
-	 * processing.	For now, they are considered "unrecoverable".
-	 * Note that 'DC' becoming set (outside of ERL mode) will
-	 * cause 'IC' to clear; so if there's an Icache error, we'll
-	 * only find out about it if we recover from this error and
-	 * continue executing.
-	 */
-	bltz	k0,unrecoverable
-	 sll	k0,1
-
-	/*
-	 * k0 has C0_ERRCTL << 2, which puts 'IC' at bit 31.  If an
-	 * Icache error isn't indicated, I'm not sure why we got here.
-	 * Consider that case "unrecoverable" for now.
-	 */
-	bgez	k0,unrecoverable
-
-attempt_icache_recovery:
-	/*
-	 * External icache errors are due to uncorrectable ECC errors
-	 * in the L2 cache or Memory Controller and cannot be
-	 * recovered here.
-	 */
-	 mfc0	k0,C0_CERR_I		/* delay slot */
-	li	k1,1 << 26		/* ICACHE_EXTERNAL */
-	and	k1,k0
-	bnez	k1,unrecoverable
-	 andi	k0,0x1fe0
-
-	/*
-	 * Since the error is internal, the 'IDX' field from
-	 * CacheErr-I is valid and we can just invalidate all blocks
-	 * in that set.
-	 */
-	cache	Index_Invalidate_I,(0<<13)(k0)
-	cache	Index_Invalidate_I,(1<<13)(k0)
-	cache	Index_Invalidate_I,(2<<13)(k0)
-	cache	Index_Invalidate_I,(3<<13)(k0)
-
-	/* Ought to log this recovered icache error */
-
-recovered:
-	/* Restore the saved registers */
-	ld	k0,0x170($0)
-	ld	k1,0x178($0)
-	eret
-
-unrecoverable:
-	/* Unrecoverable Icache or Dcache error; log it and/or fail */
-	j	handle_vec2_sb1
-	 nop
-#endif
-
-END(except_vec2_sb1)
-
-	LEAF(handle_vec2_sb1)
-	mfc0	k0,CP0_CONFIG
-	li	k1,~CONF_CM_CMASK
-	and	k0,k0,k1
-	ori	k0,k0,CONF_CM_UNCACHED
-	mtc0	k0,CP0_CONFIG
-
-	SSNOP
-	SSNOP
-	SSNOP
-	SSNOP
-	bnezl	$0, 1f
-1:
-	mfc0	k0, CP0_STATUS
-	sll	k0, k0, 3			# check CU0 (kernel?)
-	bltz	k0, 2f
-	 nop
-
-	/* Get a valid Kseg0 stack pointer.  Any task's stack pointer
-	 * will do, although if we ever want to resume execution we
-	 * better not have corrupted any state. */
-	get_saved_sp
-	move	sp, k1
-
-2:
-	j	sb1_cache_error
-	 nop
-
-	END(handle_vec2_sb1)
diff --git a/arch/mips/mm/page-funcs.S b/arch/mips/mm/page-funcs.S
deleted file mode 100644
index 43181ac0a1afc6ccf4e7e2d972979da7fce79a0e..0000000000000000000000000000000000000000
--- a/arch/mips/mm/page-funcs.S
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Micro-assembler generated clear_page/copy_page functions.
- *
- * Copyright (C) 2012  MIPS Technologies, Inc.
- * Copyright (C) 2012  Ralf Baechle <ralf@linux-mips.org>
- */
-#include <asm/asm.h>
-#include <asm/export.h>
-#include <asm/regdef.h>
-
-#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
-#define cpu_clear_page_function_name	clear_page_cpu
-#define cpu_copy_page_function_name	copy_page_cpu
-#else
-#define cpu_clear_page_function_name	clear_page
-#define cpu_copy_page_function_name	copy_page
-#endif
-
-/*
- * Maximum sizes:
- *
- * R4000 128 bytes S-cache:		0x058 bytes
- * R4600 v1.7:				0x05c bytes
- * R4600 v2.0:				0x060 bytes
- * With prefetching, 16 word strides	0x120 bytes
- */
-EXPORT(__clear_page_start)
-LEAF(cpu_clear_page_function_name)
-EXPORT_SYMBOL(cpu_clear_page_function_name)
-1:	j	1b		/* Dummy, will be replaced. */
-	.space 288
-END(cpu_clear_page_function_name)
-EXPORT(__clear_page_end)
-
-/*
- * Maximum sizes:
- *
- * R4000 128 bytes S-cache:		0x11c bytes
- * R4600 v1.7:				0x080 bytes
- * R4600 v2.0:				0x07c bytes
- * With prefetching, 16 word strides	0x540 bytes
- */
-EXPORT(__copy_page_start)
-LEAF(cpu_copy_page_function_name)
-EXPORT_SYMBOL(cpu_copy_page_function_name)
-1:	j	1b		/* Dummy, will be replaced. */
-	.space 1344
-END(cpu_copy_page_function_name)
-EXPORT(__copy_page_end)
diff --git a/arch/mips/mm/tlb-funcs.S b/arch/mips/mm/tlb-funcs.S
deleted file mode 100644
index 00fef578c8cdcfbc1359e476255d81f5c6cd4363..0000000000000000000000000000000000000000
--- a/arch/mips/mm/tlb-funcs.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Micro-assembler generated tlb handler functions.
- *
- * Copyright (C) 2013  Broadcom Corporation.
- *
- * Based on mm/page-funcs.c
- * Copyright (C) 2012  MIPS Technologies, Inc.
- * Copyright (C) 2012  Ralf Baechle <ralf@linux-mips.org>
- */
-#include <asm/asm.h>
-#include <asm/export.h>
-#include <asm/regdef.h>
-
-#define FASTPATH_SIZE	128
-
-LEAF(tlbmiss_handler_setup_pgd)
-1:	j	1b		/* Dummy, will be replaced. */
-	.space	64
-END(tlbmiss_handler_setup_pgd)
-EXPORT(tlbmiss_handler_setup_pgd_end)
-EXPORT_SYMBOL_GPL(tlbmiss_handler_setup_pgd)
-
-LEAF(handle_tlbm)
-	.space		FASTPATH_SIZE * 4
-END(handle_tlbm)
-EXPORT(handle_tlbm_end)
-
-LEAF(handle_tlbs)
-	.space		FASTPATH_SIZE * 4
-END(handle_tlbs)
-EXPORT(handle_tlbs_end)
-
-LEAF(handle_tlbl)
-	.space		FASTPATH_SIZE * 4
-END(handle_tlbl)
-EXPORT(handle_tlbl_end)
diff --git a/arch/mips/mm/tlbex-fault.S b/arch/mips/mm/tlbex-fault.S
deleted file mode 100644
index 77db401fc62073a119bf79c10da908a07c08c4df..0000000000000000000000000000000000000000
--- a/arch/mips/mm/tlbex-fault.S
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1999 Ralf Baechle
- * Copyright (C) 1999 Silicon Graphics, Inc.
- */
-#include <asm/mipsregs.h>
-#include <asm/regdef.h>
-#include <asm/stackframe.h>
-
-	.macro tlb_do_page_fault, write
-	NESTED(tlb_do_page_fault_\write, PT_SIZE, sp)
-	.cfi_signal_frame
-	SAVE_ALL docfi=1
-	MFC0	a2, CP0_BADVADDR
-	KMODE
-	move	a0, sp
-	REG_S	a2, PT_BVADDR(sp)
-	li	a1, \write
-	jal	do_page_fault
-	j	ret_from_exception
-	END(tlb_do_page_fault_\write)
-	.endm
-
-	tlb_do_page_fault 0
-	tlb_do_page_fault 1
diff --git a/arch/mips/netlogic/common/reset.S b/arch/mips/netlogic/common/reset.S
deleted file mode 100644
index c474981a6c0dd473627ac14ecd40430ded4d0f71..0000000000000000000000000000000000000000
--- a/arch/mips/netlogic/common/reset.S
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * Copyright 2003-2013 Broadcom Corporation.
- * All Rights Reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the Broadcom
- * license below:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY BROADCOM ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
- * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include <asm/asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cpu.h>
-#include <asm/cacheops.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-#include <asm/stackframe.h>
-#include <asm/asmmacro.h>
-#include <asm/addrspace.h>
-
-#include <asm/netlogic/common.h>
-
-#include <asm/netlogic/xlp-hal/iomap.h>
-#include <asm/netlogic/xlp-hal/xlp.h>
-#include <asm/netlogic/xlp-hal/sys.h>
-#include <asm/netlogic/xlp-hal/cpucontrol.h>
-
-#define SYS_CPU_COHERENT_BASE	CKSEG1ADDR(XLP_DEFAULT_IO_BASE) + \
-			XLP_IO_SYS_OFFSET(0) + XLP_IO_PCI_HDRSZ + \
-			SYS_CPU_NONCOHERENT_MODE * 4
-
-/* Enable XLP features and workarounds in the LSU */
-.macro xlp_config_lsu
-	li	t0, LSU_DEFEATURE
-	mfcr	t1, t0
-
-	lui	t2, 0x4080	/* Enable Unaligned Access, L2HPE */
-	or	t1, t1, t2
-	mtcr	t1, t0
-
-	li	t0, ICU_DEFEATURE
-	mfcr	t1, t0
-	ori	t1, 0x1000	/* Enable Icache partitioning */
-	mtcr	t1, t0
-
-	li	t0, SCHED_DEFEATURE
-	lui	t1, 0x0100	/* Disable BRU accepting ALU ops */
-	mtcr	t1, t0
-.endm
-
-/*
- * Allow access to physical mem >64G by enabling ELPA in PAGEGRAIN
- * register. This is needed before going to C code since the SP can
- * in this region. Called from all HW threads.
- */
-.macro xlp_early_mmu_init
-	mfc0	t0, CP0_PAGEMASK, 1
-	li	t1, (1 << 29)		/* ELPA bit */
-	or	t0, t1
-	mtc0	t0, CP0_PAGEMASK, 1
-.endm
-
-/*
- * L1D cache has to be flushed before enabling threads in XLP.
- * On XLP8xx/XLP3xx, we do a low level flush using processor control
- * registers. On XLPII CPUs, usual cache instructions work.
- */
-.macro	xlp_flush_l1_dcache
-	mfc0	t0, CP0_PRID
-	andi	t0, t0, PRID_IMP_MASK
-	slt	t1, t0, 0x1200
-	beqz	t1, 15f
-	nop
-
-	/* XLP8xx low level cache flush */
-	li	t0, LSU_DEBUG_DATA0
-	li	t1, LSU_DEBUG_ADDR
-	li	t2, 0		/* index */
-	li	t3, 0x1000	/* loop count */
-11:
-	sll	v0, t2, 5
-	mtcr	zero, t0
-	ori	v1, v0, 0x3	/* way0 | write_enable | write_active */
-	mtcr	v1, t1
-12:
-	mfcr	v1, t1
-	andi	v1, 0x1		/* wait for write_active == 0 */
-	bnez	v1, 12b
-	nop
-	mtcr	zero, t0
-	ori	v1, v0, 0x7	/* way1 | write_enable | write_active */
-	mtcr	v1, t1
-13:
-	mfcr	v1, t1
-	andi	v1, 0x1		/* wait for write_active == 0 */
-	bnez	v1, 13b
-	nop
-	addi	t2, 1
-	bne	t3, t2, 11b
-	nop
-	b	17f
-	nop
-
-	/* XLPII CPUs, Invalidate all 64k of L1 D-cache */
-15:
-	li	t0, 0x80000000
-	li	t1, 0x80010000
-16:	cache	Index_Writeback_Inv_D, 0(t0)
-	addiu	t0, t0, 32
-	bne	t0, t1, 16b
-	nop
-17:
-.endm
-
-/*
- * nlm_reset_entry will be copied to the reset entry point for
- * XLR and XLP. The XLP cores start here when they are woken up. This
- * is also the NMI entry point.
- *
- * We use scratch reg 6/7 to save k0/k1 and check for NMI first.
- *
- * The data corresponding to reset/NMI is stored at RESET_DATA_PHYS
- * location, this will have the thread mask (used when core is woken up)
- * and the current NMI handler in case we reached here for an NMI.
- *
- * When a core or thread is newly woken up, it marks itself ready and
- * loops in a 'wait'. When the CPU really needs waking up, we send an NMI
- * IPI to it, with the NMI handler set to prom_boot_secondary_cpus
- */
-	.set	noreorder
-	.set	noat
-	.set	arch=xlr	/* for mfcr/mtcr, XLR is sufficient */
-
-FEXPORT(nlm_reset_entry)
-	dmtc0	k0, $22, 6
-	dmtc0	k1, $22, 7
-	mfc0	k0, CP0_STATUS
-	li	k1, 0x80000
-	and	k1, k0, k1
-	beqz	k1, 1f		/* go to real reset entry */
-	nop
-	li	k1, CKSEG1ADDR(RESET_DATA_PHYS) /* NMI */
-	ld	k0, BOOT_NMI_HANDLER(k1)
-	jr	k0
-	nop
-
-1:	/* Entry point on core wakeup */
-	mfc0	t0, CP0_PRID		/* processor ID */
-	andi	t0, PRID_IMP_MASK
-	li	t1, 0x1500		/* XLP 9xx */
-	beq	t0, t1, 2f		/* does not need to set coherent */
-	nop
-
-	li	t1, 0x1300		/* XLP 5xx */
-	beq	t0, t1, 2f		/* does not need to set coherent */
-	nop
-
-	/* set bit in SYS coherent register for the core */
-	mfc0	t0, CP0_EBASE
-	mfc0	t1, CP0_EBASE
-	srl	t1, 5
-	andi	t1, 0x3			/* t1 <- node */
-	li	t2, 0x40000
-	mul	t3, t2, t1		/* t3 = node * 0x40000 */
-	srl	t0, t0, 2
-	and	t0, t0, 0x7		/* t0 <- core */
-	li	t1, 0x1
-	sll	t0, t1, t0
-	nor	t0, t0, zero		/* t0 <- ~(1 << core) */
-	li	t2, SYS_CPU_COHERENT_BASE
-	add	t2, t2, t3		/* t2 <- SYS offset for node */
-	lw	t1, 0(t2)
-	and	t1, t1, t0
-	sw	t1, 0(t2)
-
-	/* read back to ensure complete */
-	lw	t1, 0(t2)
-	sync
-
-2:
-	/* Configure LSU on Non-0 Cores. */
-	xlp_config_lsu
-	/* FALL THROUGH */
-
-/*
- * Wake up sibling threads from the initial thread in a core.
- */
-EXPORT(nlm_boot_siblings)
-	/* core L1D flush before enable threads */
-	xlp_flush_l1_dcache
-	/* save ra and sp, will be used later (only for boot cpu) */
-	dmtc0	ra, $22, 6
-	dmtc0	sp, $22, 7
-	/* Enable hw threads by writing to MAP_THREADMODE of the core */
-	li	t0, CKSEG1ADDR(RESET_DATA_PHYS)
-	lw	t1, BOOT_THREAD_MODE(t0)	/* t1 <- thread mode */
-	li	t0, ((CPU_BLOCKID_MAP << 8) | MAP_THREADMODE)
-	mfcr	t2, t0
-	or	t2, t2, t1
-	mtcr	t2, t0
-
-	/*
-	 * The new hardware thread starts at the next instruction
-	 * For all the cases other than core 0 thread 0, we will
-	 * jump to the secondary wait function.
-
-	 * NOTE: All GPR contents are lost after the mtcr above!
-	 */
-	mfc0	v0, CP0_EBASE
-	andi	v0, 0x3ff		/* v0 <- node/core */
-
-	/*
-	 * Errata: to avoid potential live lock, setup IFU_BRUB_RESERVE
-	 * when running 4 threads per core
-	 */
-	andi	v1, v0, 0x3             /* v1 <- thread id */
-	bnez	v1, 2f
-	nop
-
-	/* thread 0 of each core. */
-	li	t0, CKSEG1ADDR(RESET_DATA_PHYS)
-	lw	t1, BOOT_THREAD_MODE(t0)        /* t1 <- thread mode */
-	subu	t1, 0x3				/* 4-thread per core mode? */
-	bnez	t1, 2f
-	nop
-
-	li	t0, IFU_BRUB_RESERVE
-	li	t1, 0x55
-	mtcr	t1, t0
-	_ehb
-2:
-	beqz	v0, 4f		/* boot cpu (cpuid == 0)? */
-	nop
-
-	/* setup status reg */
-	move	t1, zero
-#ifdef CONFIG_64BIT
-	ori	t1, ST0_KX
-#endif
-	mtc0	t1, CP0_STATUS
-
-	xlp_early_mmu_init
-
-	/* mark CPU ready */
-	li	t3, CKSEG1ADDR(RESET_DATA_PHYS)
-	ADDIU	t1, t3, BOOT_CPU_READY
-	sll	v1, v0, 2
-	PTR_ADDU t1, v1
-	li	t2, 1
-	sw	t2, 0(t1)
-	/* Wait until NMI hits */
-3:	wait
-	b	3b
-	nop
-
-	/*
-	 * For the boot CPU, we have to restore ra and sp and return, rest
-	 * of the registers will be restored by the caller
-	 */
-4:
-	dmfc0	ra, $22, 6
-	dmfc0	sp, $22, 7
-	jr	ra
-	nop
-EXPORT(nlm_reset_entry_end)
-
-LEAF(nlm_init_boot_cpu)
-#ifdef CONFIG_CPU_XLP
-	xlp_config_lsu
-	xlp_early_mmu_init
-#endif
-	jr	ra
-	nop
-END(nlm_init_boot_cpu)
diff --git a/arch/mips/netlogic/common/smpboot.S b/arch/mips/netlogic/common/smpboot.S
deleted file mode 100644
index 509c1a7e7c05b1ac7284663031b4d16ed2aad0aa..0000000000000000000000000000000000000000
--- a/arch/mips/netlogic/common/smpboot.S
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
- * reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the NetLogic
- * license below:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
- * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include <asm/asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-#include <asm/stackframe.h>
-#include <asm/asmmacro.h>
-#include <asm/addrspace.h>
-
-#include <asm/netlogic/common.h>
-
-#include <asm/netlogic/xlp-hal/iomap.h>
-#include <asm/netlogic/xlp-hal/xlp.h>
-#include <asm/netlogic/xlp-hal/sys.h>
-#include <asm/netlogic/xlp-hal/cpucontrol.h>
-
-	.set	noreorder
-	.set	noat
-	.set	arch=xlr		/* for mfcr/mtcr, XLR is sufficient */
-
-/* Called by the boot cpu to wake up its sibling threads */
-NESTED(xlp_boot_core0_siblings, PT_SIZE, sp)
-	/* CPU register contents lost when enabling threads, save them first */
-	SAVE_ALL
-	sync
-	/* find the location to which nlm_boot_siblings was relocated */
-	li	t0, CKSEG1ADDR(RESET_VEC_PHYS)
-	PTR_LA	t1, nlm_reset_entry
-	PTR_LA	t2, nlm_boot_siblings
-	dsubu	t2, t1
-	daddu	t2, t0
-	/* call it */
-	jalr	t2
-	nop
-	RESTORE_ALL
-	jr	ra
-	nop
-END(xlp_boot_core0_siblings)
-
-NESTED(nlm_boot_secondary_cpus, 16, sp)
-	/* Initialize CP0 Status */
-	move	t1, zero
-#ifdef CONFIG_64BIT
-	ori	t1, ST0_KX
-#endif
-	mtc0	t1, CP0_STATUS
-	PTR_LA	t1, nlm_next_sp
-	PTR_L	sp, 0(t1)
-	PTR_LA	t1, nlm_next_gp
-	PTR_L	gp, 0(t1)
-
-	/* a0 has the processor id */
-	mfc0	a0, CP0_EBASE
-	andi	a0, 0x3ff		/* a0 <- node/core */
-	PTR_LA	t0, nlm_early_init_secondary
-	jalr	t0
-	nop
-
-	PTR_LA	t0, smp_bootstrap
-	jr	t0
-	nop
-END(nlm_boot_secondary_cpus)
-
-/*
- * In case of RMIboot bootloader which is used on XLR boards, the CPUs
- * be already woken up and waiting in bootloader code.
- * This will get them out of the bootloader code and into linux. Needed
- * because the bootloader area will be taken and initialized by linux.
- */
-NESTED(nlm_rmiboot_preboot, 16, sp)
-	mfc0	t0, $15, 1	/* read ebase */
-	andi	t0, 0x1f	/* t0 has the processor_id() */
-	andi	t2, t0, 0x3	/* thread num */
-	sll	t0, 2		/* offset in cpu array */
-
-	li	t3, CKSEG1ADDR(RESET_DATA_PHYS)
-	ADDIU	t1, t3, BOOT_CPU_READY
-	ADDU	t1, t0
-	li	t3, 1
-	sw	t3, 0(t1)
-
-	bnez	t2, 1f		/* skip thread programming */
-	nop			/* for thread id != 0 */
-
-	/*
-	 * XLR MMU setup only for first thread in core
-	 */
-	li	t0, 0x400
-	mfcr	t1, t0
-	li	t2, 6		/* XLR thread mode mask */
-	nor	t3, t2, zero
-	and	t2, t1, t2	/* t2 - current thread mode */
-	li	v0, CKSEG1ADDR(RESET_DATA_PHYS)
-	lw	v1, BOOT_THREAD_MODE(v0) /* v1 - new thread mode */
-	sll	v1, 1
-	beq	v1, t2, 1f	/* same as request value */
-	nop			/* nothing to do */
-
-	and	t2, t1, t3	/* mask out old thread mode */
-	or	t1, t2, v1	/* put in new value */
-	mtcr	t1, t0		/* update core control */
-
-	/* wait for NMI to hit */
-1:	wait
-	b	1b
-	nop
-END(nlm_rmiboot_preboot)
diff --git a/arch/mips/power/hibernate_asm.S b/arch/mips/power/hibernate_asm.S
deleted file mode 100644
index e625387349af7fcfe8b5f7509ee296f2e75f5771..0000000000000000000000000000000000000000
--- a/arch/mips/power/hibernate_asm.S
+++ /dev/null
@@ -1,60 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Hibernation support specific for mips - temporary page tables
- *
- * Copyright (C) 2009 Lemote Inc.
- * Author: Hu Hongbing <huhb@lemote.com>
- *	   Wu Zhangjin <wuzhangjin@gmail.com>
- */
-#include <asm/asm-offsets.h>
-#include <asm/regdef.h>
-#include <asm/asm.h>
-
-.text
-LEAF(swsusp_arch_suspend)
-	PTR_LA t0, saved_regs
-	PTR_S ra, PT_R31(t0)
-	PTR_S sp, PT_R29(t0)
-	PTR_S fp, PT_R30(t0)
-	PTR_S gp, PT_R28(t0)
-	PTR_S s0, PT_R16(t0)
-	PTR_S s1, PT_R17(t0)
-	PTR_S s2, PT_R18(t0)
-	PTR_S s3, PT_R19(t0)
-	PTR_S s4, PT_R20(t0)
-	PTR_S s5, PT_R21(t0)
-	PTR_S s6, PT_R22(t0)
-	PTR_S s7, PT_R23(t0)
-	j swsusp_save
-END(swsusp_arch_suspend)
-
-LEAF(restore_image)
-	PTR_L t0, restore_pblist
-0:
-	PTR_L t1, PBE_ADDRESS(t0)   /* source */
-	PTR_L t2, PBE_ORIG_ADDRESS(t0) /* destination */
-	PTR_ADDU t3, t1, _PAGE_SIZE
-1:
-	REG_L t8, (t1)
-	REG_S t8, (t2)
-	PTR_ADDIU t1, t1, SZREG
-	PTR_ADDIU t2, t2, SZREG
-	bne t1, t3, 1b
-	PTR_L t0, PBE_NEXT(t0)
-	bnez t0, 0b
-	PTR_LA t0, saved_regs
-	PTR_L ra, PT_R31(t0)
-	PTR_L sp, PT_R29(t0)
-	PTR_L fp, PT_R30(t0)
-	PTR_L gp, PT_R28(t0)
-	PTR_L s0, PT_R16(t0)
-	PTR_L s1, PT_R17(t0)
-	PTR_L s2, PT_R18(t0)
-	PTR_L s3, PT_R19(t0)
-	PTR_L s4, PT_R20(t0)
-	PTR_L s5, PT_R21(t0)
-	PTR_L s6, PT_R22(t0)
-	PTR_L s7, PT_R23(t0)
-	PTR_LI v0, 0x0
-	jr ra
-END(restore_image)
diff --git a/arch/mips/tools/generic-board-config.sh b/arch/mips/tools/generic-board-config.sh
old mode 100755
new mode 100644
diff --git a/arch/mips/vdso/elf.S b/arch/mips/vdso/elf.S
deleted file mode 100644
index a25cb147f1caaf20e3f6a388d5f4498d421b883e..0000000000000000000000000000000000000000
--- a/arch/mips/vdso/elf.S
+++ /dev/null
@@ -1,62 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2015 Imagination Technologies
- * Author: Alex Smith <alex.smith@imgtec.com>
- */
-
-#include <asm/vdso/vdso.h>
-
-#include <asm/isa-rev.h>
-
-#include <linux/elfnote.h>
-#include <linux/version.h>
-
-ELFNOTE_START(Linux, 0, "a")
-	.long LINUX_VERSION_CODE
-ELFNOTE_END
-
-/*
- * The .MIPS.abiflags section must be defined with the FP ABI flags set
- * to 'any' to be able to link with both old and new libraries.
- * Newer toolchains are capable of automatically generating this, but we want
- * to work with older toolchains as well. Therefore, we define the contents of
- * this section here (under different names), and then genvdso will patch
- * it to have the correct name and type.
- *
- * We base the .MIPS.abiflags section on preprocessor definitions rather than
- * CONFIG_* because we need to match the particular ABI we are building the
- * VDSO for.
- *
- * See https://dmz-portal.mips.com/wiki/MIPS_O32_ABI_-_FR0_and_FR1_Interlinking
- * for the .MIPS.abiflags section description.
- */
-
-	.section .mips_abiflags, "a"
-	.align 3
-__mips_abiflags:
-	.hword	0		/* version */
-	.byte	__mips		/* isa_level */
-
-	/* isa_rev */
-	.byte	MIPS_ISA_REV
-
-	/* gpr_size */
-#ifdef __mips64
-	.byte	2		/* AFL_REG_64 */
-#else
-	.byte	1		/* AFL_REG_32 */
-#endif
-
-	/* cpr1_size */
-#if (MIPS_ISA_REV >= 6) || defined(__mips64)
-	.byte	2		/* AFL_REG_64 */
-#else
-	.byte	1		/* AFL_REG_32 */
-#endif
-
-	.byte	0		/* cpr2_size (AFL_REG_NONE) */
-	.byte	0		/* fp_abi (Val_GNU_MIPS_ABI_FP_ANY) */
-	.word	0		/* isa_ext */
-	.word	0		/* ases */
-	.word	0		/* flags1 */
-	.word	0		/* flags2 */
diff --git a/arch/mips/vdso/sigreturn.S b/arch/mips/vdso/sigreturn.S
deleted file mode 100644
index e5c0ab98ab462406fee230aa4cf2322c566855de..0000000000000000000000000000000000000000
--- a/arch/mips/vdso/sigreturn.S
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2015 Imagination Technologies
- * Author: Alex Smith <alex.smith@imgtec.com>
- */
-
-#include <asm/vdso/vdso.h>
-
-#include <uapi/asm/unistd.h>
-
-#include <asm/regdef.h>
-#include <asm/asm.h>
-
-	.section	.text
-	.cfi_sections	.debug_frame
-
-LEAF(__vdso_rt_sigreturn)
-	.cfi_signal_frame
-
-	li	v0, __NR_rt_sigreturn
-	syscall
-
-	END(__vdso_rt_sigreturn)
-
-#if _MIPS_SIM == _MIPS_SIM_ABI32
-
-LEAF(__vdso_sigreturn)
-	.cfi_signal_frame
-
-	li	v0, __NR_sigreturn
-	syscall
-
-	END(__vdso_sigreturn)
-
-#endif
diff --git a/arch/mips/vdso/vdso.lds.S b/arch/mips/vdso/vdso.lds.S
deleted file mode 100644
index da4627430aba40c4a1f1e1811a871ee200d4ee8b..0000000000000000000000000000000000000000
--- a/arch/mips/vdso/vdso.lds.S
+++ /dev/null
@@ -1,105 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2015 Imagination Technologies
- * Author: Alex Smith <alex.smith@imgtec.com>
- */
-
-#include <asm/sgidefs.h>
-
-#if _MIPS_SIM == _MIPS_SIM_ABI64
-OUTPUT_FORMAT("elf64-tradlittlemips", "elf64-tradbigmips", "elf64-tradlittlemips")
-#elif _MIPS_SIM == _MIPS_SIM_NABI32
-OUTPUT_FORMAT("elf32-ntradlittlemips", "elf32-ntradbigmips", "elf32-ntradlittlemips")
-#else
-OUTPUT_FORMAT("elf32-tradlittlemips", "elf32-tradbigmips", "elf32-tradlittlemips")
-#endif
-
-OUTPUT_ARCH(mips)
-
-SECTIONS
-{
-	PROVIDE(_start = .);
-	. = SIZEOF_HEADERS;
-
-	/*
-	 * In order to retain compatibility with older toolchains we provide the
-	 * ABI flags section ourself. Newer assemblers will automatically
-	 * generate .MIPS.abiflags sections so we discard such input sections,
-	 * and then manually define our own section here. genvdso will patch
-	 * this section to have the correct name/type.
-	 */
-	.mips_abiflags	: { *(.mips_abiflags) } 	:text :abiflags
-
-	.reginfo	: { *(.reginfo) }		:text :reginfo
-
-	.hash		: { *(.hash) }			:text
-	.gnu.hash	: { *(.gnu.hash) }
-	.dynsym		: { *(.dynsym) }
-	.dynstr		: { *(.dynstr) }
-	.gnu.version	: { *(.gnu.version) }
-	.gnu.version_d	: { *(.gnu.version_d) }
-	.gnu.version_r	: { *(.gnu.version_r) }
-
-	.note		: { *(.note.*) }		:text :note
-
-	.text		: { *(.text*) }			:text
-	PROVIDE (__etext = .);
-	PROVIDE (_etext = .);
-	PROVIDE (etext = .);
-
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text :eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-
-	.dynamic	: { *(.dynamic) }		:text :dynamic
-
-	.rodata		: { *(.rodata*) }		:text
-
-	_end = .;
-	PROVIDE(end = .);
-
-	/DISCARD/	: {
-		*(.MIPS.abiflags)
-		*(.gnu.attributes)
-		*(.note.GNU-stack)
-		*(.data .data.* .gnu.linkonce.d.* .sdata*)
-		*(.bss .sbss .dynbss .dynsbss)
-	}
-}
-
-PHDRS
-{
-	/*
-	 * Provide a PT_MIPS_ABIFLAGS header to assign the ABI flags section
-	 * to. We can specify the header type directly here so no modification
-	 * is needed later on.
-	 */
-	abiflags	0x70000003;
-
-	/*
-	 * The ABI flags header must exist directly after the PT_INTERP header,
-	 * so we must explicitly place the PT_MIPS_REGINFO header after it to
-	 * stop the linker putting one in at the start.
-	 */
-	reginfo		0x70000000;
-
-	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
-	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
-	note		PT_NOTE		FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
-}
-
-VERSION
-{
-	LINUX_2.6 {
-#ifndef DISABLE_MIPS_VDSO
-	global:
-		__vdso_clock_gettime;
-		__vdso_gettimeofday;
-		__vdso_clock_getres;
-#if _MIPS_SIM != _MIPS_SIM_ABI64
-		__vdso_clock_gettime64;
-#endif
-#endif
-	local: *;
-	};
-}
diff --git a/arch/nds32/kernel/ex-entry.S b/arch/nds32/kernel/ex-entry.S
deleted file mode 100644
index 107d98a1d1b851758c7d1b764008bece438e0a99..0000000000000000000000000000000000000000
--- a/arch/nds32/kernel/ex-entry.S
+++ /dev/null
@@ -1,177 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/memory.h>
-#include <asm/nds32.h>
-#include <asm/errno.h>
-#include <asm/asm-offsets.h>
-#include <asm/page.h>
-#include <asm/fpu.h>
-
-#ifdef CONFIG_HWZOL
-	.macro push_zol
-	mfusr	$r14, $LB
-	mfusr	$r15, $LE
-	mfusr	$r16, $LC
-	.endm
-#endif
-	.macro  skip_save_fucop_ctl
-#if defined(CONFIG_FPU)
-skip_fucop_ctl:
-	smw.adm $p0, [$sp], $p0, #0x1
-	j fucop_ctl_done
-#endif
-	.endm
-
-	.macro	save_user_regs
-#if defined(CONFIG_FPU)
-	sethi   $p0, hi20(has_fpu)
-	lbsi 	$p0, [$p0+lo12(has_fpu)]
-	beqz	$p0, skip_fucop_ctl
-	mfsr    $p0, $FUCOP_CTL
-	smw.adm $p0, [$sp], $p0, #0x1
-	bclr    $p0, $p0, #FUCOP_CTL_offCP0EN
-	mtsr    $p0, $FUCOP_CTL
-fucop_ctl_done:
-	/* move $SP to the bottom of pt_regs */
-	addi    $sp, $sp, -FUCOP_CTL_OFFSET
-#else
-	smw.adm $sp, [$sp], $sp, #0x1
-	/* move $SP to the bottom of pt_regs */
-	addi    $sp, $sp, -OSP_OFFSET
-#endif
-
-	/* push $r0 ~ $r25 */
-	smw.bim $r0, [$sp], $r25
-	/* push $fp, $gp, $lp */
-	smw.bim $sp, [$sp], $sp, #0xe
-
-	mfsr	$r12, $SP_USR
-	mfsr	$r13, $IPC
-#ifdef CONFIG_HWZOL
-	push_zol
-#endif
-	movi	$r17, -1
-	move	$r18, $r0
-	mfsr	$r19, $PSW
-	mfsr	$r20, $IPSW
-	mfsr	$r21, $P_IPSW
-	mfsr	$r22, $P_IPC
-	mfsr	$r23, $P_P0
-	mfsr	$r24, $P_P1
-	smw.bim $r12, [$sp], $r24, #0
-	addi	$sp, $sp, -FUCOP_CTL_OFFSET
-
-	/* Initialize kernel space $fp */
-	andi    $p0, $r20, #PSW_mskPOM
-	movi    $p1, #0x0
-	cmovz   $fp, $p1, $p0
-
-	andi	$r16, $r19, #PSW_mskINTL
-	slti	$r17, $r16, #4
-	bnez	$r17, 1f
-	addi	$r17, $r19, #-2
-	mtsr	$r17, $PSW
-	isb
-1:
-	/* If it was superuser mode, we don't need to update $r25 */
-	bnez	$p0, 2f
-	la	$p0, __entry_task
-	lw	$r25, [$p0]
-2:
-	.endm
-
-	.text
-
-/*
- * Exception Vector
- */
-exception_handlers:
-	.long	unhandled_exceptions	!Reset/NMI
-	.long	unhandled_exceptions	!TLB fill
-	.long	do_page_fault		!PTE not present
-	.long	do_dispatch_tlb_misc	!TLB misc
-	.long	unhandled_exceptions	!TLB VLPT
-	.long	unhandled_exceptions	!Machine Error
-	.long	do_debug_trap		!Debug related
-	.long	do_dispatch_general	!General exception
-	.long	eh_syscall		!Syscall
-	.long	asm_do_IRQ		!IRQ
-
-	skip_save_fucop_ctl
-common_exception_handler:
-	save_user_regs
-	mfsr	$p0, $ITYPE
-	andi	$p0, $p0, #ITYPE_mskVECTOR
-	srli	$p0, $p0, #ITYPE_offVECTOR
-	andi	$p1, $p0, #NDS32_VECTOR_mskNONEXCEPTION
-	bnez	$p1, 1f
-	sethi	$lp, hi20(ret_from_exception)
-	ori	$lp, $lp, lo12(ret_from_exception)
-	sethi	$p1, hi20(exception_handlers)
-	ori	$p1, $p1, lo12(exception_handlers)
-	lw	$p1, [$p1+$p0<<2]
-	move	$r0, $p0
-	mfsr	$r1, $EVA
-	mfsr	$r2, $ITYPE
-	move	$r3, $sp
-	mfsr    $r4, $OIPC
-	/* enable gie if it is enabled in IPSW. */
-	mfsr	$r21, $PSW
-	andi	$r20, $r20, #PSW_mskGIE	/* r20 is $IPSW*/
-	or	$r21, $r21, $r20
-	mtsr	$r21, $PSW
-	dsb
-	jr	$p1
-	/* syscall */
-1:
-	addi	$p1, $p0, #-NDS32_VECTOR_offEXCEPTION
-	bnez	$p1, 2f
-	sethi	$lp, hi20(ret_from_exception)
-	ori	$lp, $lp, lo12(ret_from_exception)
-	sethi	$p1, hi20(exception_handlers)
-	ori	$p1, $p1, lo12(exception_handlers)
-	lwi	$p1, [$p1+#NDS32_VECTOR_offEXCEPTION<<2]
-	jr	$p1
-
-	/* interrupt */
-2:
-#ifdef CONFIG_TRACE_IRQFLAGS
-	jal     __trace_hardirqs_off
-#endif
-	move	$r0, $sp
-	sethi	$lp, hi20(ret_from_intr)
-	ori	$lp, $lp, lo12(ret_from_intr)
-	sethi	$p0, hi20(exception_handlers)
-	ori	$p0, $p0, lo12(exception_handlers)
-	lwi	$p0, [$p0+#NDS32_VECTOR_offINTERRUPT<<2]
-	jr	$p0
-
-	.macro	EXCEPTION_VECTOR_DEBUG
-	.align 4
-	mfsr     $p0, $EDM_CTL
-	andi     $p0, $p0, EDM_CTL_mskV3_EDM_MODE
-	tnez     $p0, SWID_RAISE_INTERRUPT_LEVEL
-	.endm
-
-	.macro	EXCEPTION_VECTOR
-	.align 4
-	sethi	 $p0, hi20(common_exception_handler)
-	ori	 $p0, $p0, lo12(common_exception_handler)
-	jral.ton $p0, $p0
-	.endm
-
-	.section	".text.init", #alloc, #execinstr
-	.global	exception_vector
-exception_vector:
-.rept 6
-	EXCEPTION_VECTOR
-.endr
-	EXCEPTION_VECTOR_DEBUG
-.rept 121
-	EXCEPTION_VECTOR
-.endr
-	.align 4
-	.global	exception_vector_end
-exception_vector_end:
diff --git a/arch/nds32/kernel/ex-exit.S b/arch/nds32/kernel/ex-exit.S
deleted file mode 100644
index 1df02a79336417fb7eef1ca34d199f378b514659..0000000000000000000000000000000000000000
--- a/arch/nds32/kernel/ex-exit.S
+++ /dev/null
@@ -1,193 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-#include <asm/assembler.h>
-#include <asm/nds32.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/current.h>
-#include <asm/fpu.h>
-
-
-
-#ifdef CONFIG_HWZOL
-	.macro pop_zol
-	mtusr	$r14, $LB
-	mtusr	$r15, $LE
-	mtusr	$r16, $LC
-	.endm
-#endif
-
-	.macro	restore_user_regs_first
-	setgie.d
-	isb
-#if defined(CONFIG_FPU)
-	addi    $sp, $sp, OSP_OFFSET
-	lmw.adm $r12, [$sp], $r25, #0x0
-	sethi   $p0, hi20(has_fpu)
-	lbsi 	$p0, [$p0+lo12(has_fpu)]
-	beqz	$p0, 2f
-	mtsr    $r25, $FUCOP_CTL
-2:
-#else
-	addi	$sp, $sp, FUCOP_CTL_OFFSET
-	lmw.adm $r12, [$sp], $r24, #0x0
-#endif
-	mtsr	$r12, $SP_USR
-	mtsr	$r13, $IPC
-#ifdef CONFIG_HWZOL
-	pop_zol
-#endif
-	mtsr	$r19, $PSW
-	mtsr	$r20, $IPSW
-	mtsr    $r21, $P_IPSW
-	mtsr	$r22, $P_IPC
-	mtsr	$r23, $P_P0
-	mtsr	$r24, $P_P1
-	lmw.adm $sp, [$sp], $sp, #0xe
-	.endm
-
-	.macro	restore_user_regs_last
-	pop	$p0
-	cmovn	$sp, $p0, $p0
-
-	iret
-	nop
-
-	.endm
-
-	.macro	restore_user_regs
-	restore_user_regs_first
-	lmw.adm $r0, [$sp], $r25, #0x0
-	addi	$sp, $sp, OSP_OFFSET
-	restore_user_regs_last
-	.endm
-
-	.macro	fast_restore_user_regs
-	restore_user_regs_first
-	lmw.adm $r1, [$sp], $r25, #0x0
-	addi	$sp, $sp, OSP_OFFSET-4
-	restore_user_regs_last
-	.endm
-
-#ifdef CONFIG_PREEMPT
-	.macro	preempt_stop
-	.endm
-#else
-	.macro	preempt_stop
-	setgie.d
-	isb
-	.endm
-#define	resume_kernel	no_work_pending
-#endif
-
-ENTRY(ret_from_exception)
-	preempt_stop
-ENTRY(ret_from_intr)
-
-/*
- * judge Kernel or user mode
- *
- */
-	lwi	$p0, [$sp+(#IPSW_OFFSET)]	! Check if in nested interrupt
-	andi	$p0, $p0, #PSW_mskINTL
-	bnez	$p0, resume_kernel		! done with iret
-	j	resume_userspace
-
-
-/*
- * This is the fast syscall return path.  We do as little as
- * possible here, and this includes saving $r0 back into the SVC
- * stack.
- * fixed: tsk - $r25, syscall # - $r7, syscall table pointer - $r8
- */
-ENTRY(ret_fast_syscall)
-	gie_disable
-	lwi	$r1, [tsk+#TSK_TI_FLAGS]
-	andi	$p1, $r1, #_TIF_WORK_MASK
-	bnez	$p1, fast_work_pending
-	fast_restore_user_regs			! iret
-
-/*
- * Ok, we need to do extra processing,
- * enter the slow path returning from syscall, while pending work.
- */
-fast_work_pending:
-	swi	$r0, [$sp+(#R0_OFFSET)]		! what is different from ret_from_exception
-work_pending:
-	andi	$p1, $r1, #_TIF_NEED_RESCHED
-	bnez	$p1, work_resched
-
-	andi	$p1, $r1, #_TIF_SIGPENDING|#_TIF_NOTIFY_RESUME
-	beqz	$p1, no_work_pending
-
-	move	$r0, $sp			! 'regs'
-	gie_enable
-	bal	do_notify_resume
-	b       ret_slow_syscall
-work_resched:
-	bal	schedule			! path, return to user mode
-
-/*
- * "slow" syscall return path.
- */
-ENTRY(resume_userspace)
-ENTRY(ret_slow_syscall)
-	gie_disable
-	lwi	$p0, [$sp+(#IPSW_OFFSET)]	! Check if in nested interrupt
-	andi	$p0, $p0, #PSW_mskINTL
-	bnez	$p0, no_work_pending		! done with iret
-	lwi	$r1, [tsk+#TSK_TI_FLAGS]
-	andi	$p1, $r1, #_TIF_WORK_MASK
-	bnez	$p1, work_pending		! handle work_resched, sig_pend
-
-no_work_pending:
-#ifdef CONFIG_TRACE_IRQFLAGS
-	lwi	$p0, [$sp+(#IPSW_OFFSET)]
-	andi	$p0, $p0, #0x1
-	la	$r10, __trace_hardirqs_off
-	la	$r9, __trace_hardirqs_on
-	cmovz	$r9, $p0, $r10
-	jral	$r9
-#endif
-	restore_user_regs			! return from iret
-
-
-/*
- * preemptive kernel
- */
-#ifdef CONFIG_PREEMPT
-resume_kernel:
-	gie_disable
-	lwi	$t0, [tsk+#TSK_TI_PREEMPT]
-	bnez	$t0, no_work_pending
-
-	lwi	$t0, [tsk+#TSK_TI_FLAGS]
-	andi	$p1, $t0, #_TIF_NEED_RESCHED
-	beqz	$p1, no_work_pending
-
-	lwi	$t0, [$sp+(#IPSW_OFFSET)]	! Interrupts off?
-	andi	$t0, $t0, #1
-	beqz	$t0, no_work_pending
-
-	jal	preempt_schedule_irq
-	b	no_work_pending
-#endif
-
-/*
- * This is how we return from a fork.
- */
-ENTRY(ret_from_fork)
-	bal	schedule_tail
-	beqz	$r6, 1f				! r6 stores fn for kernel thread
-	move	$r0, $r7			! prepare kernel thread arg
-	jral	$r6
-1:
-	lwi	$r1, [tsk+#TSK_TI_FLAGS]		! check for syscall tracing
-	andi	$p1, $r1, #_TIF_WORK_SYSCALL_LEAVE	! are we tracing syscalls?
-	beqz	$p1, ret_slow_syscall
-	move    $r0, $sp
-	bal	syscall_trace_leave
-	b	ret_slow_syscall
diff --git a/arch/nds32/kernel/ex-scall.S b/arch/nds32/kernel/ex-scall.S
deleted file mode 100644
index 270050f1b7b1d201ad06da2429868423874340c9..0000000000000000000000000000000000000000
--- a/arch/nds32/kernel/ex-scall.S
+++ /dev/null
@@ -1,100 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-#include <asm/assembler.h>
-#include <asm/nds32.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/current.h>
-
-/*
- * $r0 = previous task_struct,
- * $r1 = next task_struct,
- * previous and next are guaranteed not to be the same.
- */
-
-ENTRY(__switch_to)
-
-	la	$p0, __entry_task
-	sw	$r1, [$p0]
-	addi	$p1, $r0, #THREAD_CPU_CONTEXT
-	smw.bi 	$r6, [$p1], $r14, #0xb		! push r6~r14, fp, lp, sp
-	move	$r25, $r1
-#if defined(CONFIG_FPU)
-	call	_switch_fpu
-#endif
-	addi	$r1, $r25, #THREAD_CPU_CONTEXT
-	lmw.bi 	$r6, [$r1], $r14, #0xb		! pop r6~r14, fp, lp, sp
-	ret
-
-
-#define tbl $r8
-
-/*
- * $r7 will be writen as syscall nr
- */
-	.macro	get_scno
-	lwi	$r7, [$sp + R15_OFFSET]
-	swi	$r7, [$sp + SYSCALLNO_OFFSET]
-	.endm
-
-	.macro	updateipc
-	addi	$r17, $r13, #4				! $r13 is $IPC
-	swi	$r17, [$sp + IPC_OFFSET]
-	.endm
-
-ENTRY(eh_syscall)
-	updateipc
-
-	get_scno
-	gie_enable
-
-	lwi	$p0, [tsk+#TSK_TI_FLAGS]		! check for syscall tracing
-
-	andi	$p1, $p0, #_TIF_WORK_SYSCALL_ENTRY	! are we tracing syscalls?
-	bnez	$p1, __sys_trace
-
-	la	$lp, ret_fast_syscall		! return address
-jmp_systbl:
-	addi	$p1, $r7, #-__NR_syscalls	! syscall number of syscall instruction is guarded by addembler
-	bgez	$p1, _SCNO_EXCEED		! call sys_* routine
-	la	tbl, sys_call_table		! load syscall table pointer
-	slli	$p1, $r7, #2
-	add	$p1, tbl, $p1
-	lwi	$p1, [$p1]
-	jr	$p1				! no return
-
-_SCNO_EXCEED:
-	ori	$r0, $r7, #0
-        ori	$r1, $sp, #0
-	b	bad_syscall
-
-/*
- * This is the really slow path.  We're going to be doing
- * context switches, and waiting for our parent to respond.
- */
-__sys_trace:
-	move	$r0, $sp
-	bal	syscall_trace_enter
-	move	$r7, $r0
-	la	$lp, __sys_trace_return		! return address
-
-	addi    $p1, $r7, #1
-	beqz    $p1, ret_slow_syscall		! fatal signal is pending
-
-	addi	$p1, $sp, #R0_OFFSET		! pointer to regs
-	lmw.bi	$r0, [$p1], $r5			! have to reload $r0 - $r5
-	b	jmp_systbl
-
-__sys_trace_return:
-	swi	$r0, [$sp+#R0_OFFSET]		! T: save returned $r0
-	move	$r0, $sp			! set pt_regs for syscall_trace_leave
-	bal	syscall_trace_leave
-	b	ret_slow_syscall
-
-ENTRY(sys_rt_sigreturn_wrapper)
-	addi	$r0, $sp, #0
-	b	sys_rt_sigreturn
-ENDPROC(sys_rt_sigreturn_wrapper)
diff --git a/arch/nds32/kernel/head.S b/arch/nds32/kernel/head.S
deleted file mode 100644
index fcefb62606cab118048e9caaef8dcedaa693923d..0000000000000000000000000000000000000000
--- a/arch/nds32/kernel/head.S
+++ /dev/null
@@ -1,197 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/ptrace.h>
-#include <asm/asm-offsets.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <linux/sizes.h>
-#include <asm/thread_info.h>
-
-#ifdef CONFIG_CPU_BIG_ENDIAN
-#define OF_DT_MAGIC 0xd00dfeed
-#else
-#define OF_DT_MAGIC 0xedfe0dd0
-#endif
-
-	.globl  swapper_pg_dir
-	.equ    swapper_pg_dir, TEXTADDR - 0x4000
-
-/*
- * Kernel startup entry point.
- */
-	.section ".head.text", "ax"
-	.type   _stext, %function
-ENTRY(_stext)
-	setgie.d                            ! Disable interrupt
-	isb
-/*
- * Disable I/D-cache and enable it at a proper time
- */
-	mfsr    $r0, $mr8
-	li      $r1, #~(CACHE_CTL_mskIC_EN|CACHE_CTL_mskDC_EN)
-	and     $r0, $r0, $r1
-	mtsr    $r0, $mr8
-
-/*
- * Process device tree blob
- */
-	andi 	$r0,$r2,#0x3
-	li	$r10, 0
-	bne     $r0, $r10, _nodtb
-	lwi	$r0, [$r2]
-	li	$r1, OF_DT_MAGIC
-	bne     $r0, $r1, _nodtb
-	move	$r10, $r2
-_nodtb:
-
-/*
- * Create a temporary mapping area for booting, before start_kernel
- */
-	sethi   $r4, hi20(swapper_pg_dir)
-	li      $p0, (PAGE_OFFSET - PHYS_OFFSET)
-	sub     $r4, $r4, $p0
-	tlbop   FlushAll            ! invalidate TLB\n"
-	isb
-	mtsr    $r4, $L1_PPTB       ! load page table pointer\n"
-
-#ifdef CONFIG_CPU_DCACHE_DISABLE
-	#define MMU_CTL_NTCC MMU_CTL_CACHEABLE_NON
-#else
-	#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-		#define MMU_CTL_NTCC MMU_CTL_CACHEABLE_WT
-	#else
-		#define MMU_CTL_NTCC MMU_CTL_CACHEABLE_WB
-	#endif
-#endif
-
-/* set NTC cacheability, mutliple page size in use */
-	mfsr    $r3, $MMU_CTL
-#if CONFIG_MEMORY_START >= 0xc0000000
-	ori     $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC3)
-#elif CONFIG_MEMORY_START >= 0x80000000
-	ori     $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC2)
-#elif CONFIG_MEMORY_START >= 0x40000000
-	ori     $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC1)
-#else
-	ori     $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC0)
-#endif
-
-#ifdef CONFIG_ANDES_PAGE_SIZE_4KB
-	ori     $r3, $r3, #(MMU_CTL_mskMPZIU)
-#else
-	ori     $r3, $r3, #(MMU_CTL_mskMPZIU|MMU_CTL_D8KB)
-#endif
-#ifdef CONFIG_HW_SUPPORT_UNALIGNMENT_ACCESS
-	li      $r0, #MMU_CTL_UNA
-	or      $r3, $r3, $r0
-#endif
-	mtsr    $r3, $MMU_CTL
-	isb
-
-/* set page size and size of kernel image */
-        mfsr    $r0, $MMU_CFG
-        srli    $r3, $r0, MMU_CFG_offfEPSZ
-        zeb     $r3, $r3
-        bnez    $r3, _extra_page_size_support
-#ifdef CONFIG_ANDES_PAGE_SIZE_4KB
-        li      $r5, #SZ_4K                 ! Use 4KB page size
-#else
-        li      $r5, #SZ_8K                 ! Use 8KB page size
-        li      $r3, #1
-#endif
-        mtsr    $r3, $TLB_MISC
-        b       _image_size_check
-
-_extra_page_size_support:                    ! Use epzs pages size
-        clz     $r6, $r3
-        subri   $r2, $r6, #31
-        li      $r3, #1
-        sll     $r3, $r3, $r2
-        /* MMU_CFG.EPSZ value -> meaning */
-        mul     $r5, $r3, $r3
-        slli    $r5, $r5, #14
-        /* MMU_CFG.EPSZ  -> TLB_MISC.ACC_PSZ */
-        addi    $r3, $r2, #0x2
-        mtsr    $r3, $TLB_MISC
-
-_image_size_check:
-        /* calculate the image maximum size accepted by TLB config */
-        andi    $r6, $r0, MMU_CFG_mskTBW
-        andi    $r0, $r0, MMU_CFG_mskTBS
-        srli    $r6, $r6, MMU_CFG_offTBW
-        srli    $r0, $r0, MMU_CFG_offTBS
-	addi    $r6, $r6, #0x1               ! MMU_CFG.TBW value -> meaning
-        addi    $r0, $r0, #0x2               ! MMU_CFG.TBS value -> meaning
-        sll     $r0, $r6, $r0                ! entries = k-way * n-set
-        mul     $r6, $r0, $r5                ! max size = entries * page size
-        /* check kernel image size */
-        la      $r3, (_end - PAGE_OFFSET)
-        bgt     $r3, $r6, __error
-
-	li      $r2, #(PHYS_OFFSET + TLB_DATA_kernel_text_attr)
-        li      $r3, PAGE_OFFSET
-        add     $r6, $r6, $r3
-
-_tlb:
-	mtsr    $r3, $TLB_VPN
-	dsb
-	tlbop   $r2, RWR
-	isb
-	add     $r3, $r3, $r5
-	add     $r2, $r2, $r5
-	bgt     $r6, $r3, _tlb
-	mfsr    $r3, $TLB_MISC      ! setup access page size
-	li      $r2, #~0xf
-	and     $r3, $r3, $r2
-#ifdef CONFIG_ANDES_PAGE_SIZE_8KB
-	ori    $r3, $r3, #0x1
-#endif
-	mtsr    $r3, $TLB_MISC
-
-	mfsr    $r0, $MISC_CTL      ! Enable BTB, RTP, shadow sp, and HW_PRE
-	ori     $r0, $r0, #MISC_init
-	mtsr    $r0, $MISC_CTL
-
-	mfsr    $p1, $PSW
-	li      $r15, #~PSW_clr             ! clear WBNA|DME|IME|DT|IT|POM|INTL|GIE
-	and     $p1, $p1, $r15
-	ori     $p1, $p1, #PSW_init
-	mtsr    $p1, $IPSW                  ! when iret, it will automatically enable MMU
-	la      $lp, __mmap_switched
-	mtsr    $lp, $IPC
-	iret
-	nop
-
-	.type   __switch_data, %object
-__switch_data:
-	.long   __bss_start                 ! $r6
-	.long   _end                        ! $r7
-	.long	__atags_pointer 	    ! $atag_pointer
-	.long   init_task                   ! $r9, move to $r25
-	.long   init_thread_union + THREAD_SIZE    ! $sp
-
-
-/*
- * The following fragment of code is executed with the MMU on in MMU mode,
- * and uses absolute addresses; this is not position independent.
- */
-	.align
-	.type   __mmap_switched, %function
-__mmap_switched:
-	la  $r3, __switch_data
-	lmw.bim $r6, [$r3], $r9, #0b0001
-	move	$r25, $r9
-	move    $fp, #0             ! Clear  BSS (and zero $fp)
-	beq $r7, $r6, _RRT
-1:	swi.bi  $fp, [$r6], #4
-	bne $r7, $r6, 1b
-	swi	$r10, [$r8]
-
-_RRT:
-	b   start_kernel
-
-__error:
-	b   __error
diff --git a/arch/nds32/kernel/sleep.S b/arch/nds32/kernel/sleep.S
deleted file mode 100644
index ca4e61f3656fddf1a931a41d4f156bb798b92bee..0000000000000000000000000000000000000000
--- a/arch/nds32/kernel/sleep.S
+++ /dev/null
@@ -1,131 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2017 Andes Technology Corporation */
-
-#include <asm/memory.h>
-
-.data
-.global sp_tmp
-sp_tmp:
-.long
-
-.text
-.globl suspend2ram
-.globl cpu_resume
-
-suspend2ram:
-	pushm   $r0, $r31
-#if defined(CONFIG_HWZOL)
-	mfusr   $r0, $lc
-	mfusr   $r1, $le
-	mfusr   $r2, $lb
-#endif
-	mfsr	$r3, $mr0
-	mfsr    $r4, $mr1
-	mfsr    $r5, $mr4
-	mfsr    $r6, $mr6
-	mfsr    $r7, $mr7
-	mfsr    $r8, $mr8
-	mfsr    $r9, $ir0
-	mfsr    $r10, $ir1
-	mfsr    $r11, $ir2
-	mfsr    $r12, $ir3
-	mfsr    $r13, $ir9
-	mfsr    $r14, $ir10
-	mfsr    $r15, $ir12
-	mfsr    $r16, $ir13
-	mfsr    $r17, $ir14
-	mfsr    $r18, $ir15
-	pushm   $r0, $r19
-#if defined(CONFIG_FPU)
-	jal	store_fpu_for_suspend
-#endif
-	tlbop	FlushAll
-	isb
-
-	// transfer $sp from va to pa
-	sethi	$r0, hi20(PAGE_OFFSET)
-	ori	$r0, $r0, lo12(PAGE_OFFSET)
-	movi	$r2, PHYS_OFFSET
-	sub	$r1, $sp, $r0
-	add	$r2, $r1, $r2
-
-	// store pa($sp) to sp_tmp
-	sethi 	$r1, hi20(sp_tmp)
-	swi	$r2, [$r1 + lo12(sp_tmp)]
-
-	pushm	$r16, $r25
-	pushm	$r29, $r30
-#ifdef	CONFIG_CACHE_L2
-	jal	dcache_wb_all_level
-#else
-	jal	cpu_dcache_wb_all
-#endif
-	popm	$r29, $r30
-	popm	$r16, $r25
-
-	// get wake_mask and loop in standby
-	la	$r1, wake_mask
-	lwi	$r1, [$r1]
-self_loop:
-	standby wake_grant
-	mfsr	$r2, $ir15
-	and	$r2, $r1, $r2
-	beqz	$r2, self_loop
-
-	// set ipc to resume address
-	la	$r1, resume_addr
-	lwi	$r1, [$r1]
-	mtsr	$r1, $ipc
-	isb
-
-	// reset psw, turn off the address translation
-	li      $r2, 0x7000a
-	mtsr    $r2, $ipsw
-	isb
-
-	iret
-cpu_resume:
-	// translate the address of sp_tmp variable to pa
-	la	$r1, sp_tmp
-	sethi   $r0, hi20(PAGE_OFFSET)
-	ori     $r0, $r0, lo12(PAGE_OFFSET)
-	movi    $r2, PHYS_OFFSET
-	sub     $r1, $r1, $r0
-	add     $r1, $r1, $r2
-
-	// access the sp_tmp to get stack pointer
-	lwi	$sp, [$r1]
-
-	popm	$r0, $r19
-#if defined(CONFIG_HWZOL)
-	mtusr   $r0, $lb
-	mtusr   $r1, $lc
-	mtusr   $r2, $le
-#endif
-	mtsr	$r3, $mr0
-	mtsr    $r4, $mr1
-	mtsr    $r5, $mr4
-	mtsr    $r6, $mr6
-	mtsr    $r7, $mr7
-	mtsr    $r8, $mr8
-	// set original psw to ipsw
-	mtsr    $r9, $ir1
-
-	mtsr    $r11, $ir2
-	mtsr    $r12, $ir3
-
-	// set ipc to RR
-	la	$r13, RR
-	mtsr	$r13, $ir9
-
-	mtsr    $r14, $ir10
-	mtsr    $r15, $ir12
-	mtsr    $r16, $ir13
-	mtsr    $r17, $ir14
-	mtsr    $r18, $ir15
-	popm    $r0, $r31
-
-	isb
-	iret
-RR:
-	ret
diff --git a/arch/nds32/kernel/vdso/datapage.S b/arch/nds32/kernel/vdso/datapage.S
deleted file mode 100644
index 4a62c3cab1c8c6480ee7c85e9c25a131c48d6280..0000000000000000000000000000000000000000
--- a/arch/nds32/kernel/vdso/datapage.S
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/page.h>
-
-ENTRY(__get_timerpage)
-	sethi	$r0, hi20(. + PAGE_SIZE + 8)
-	ori	$r0, $r0, lo12(. + PAGE_SIZE + 4)
-	mfusr	$r1, $pc
-	sub	$r0, $r1, $r0
-	ret
-ENDPROC(__get_timerpage)
-
-ENTRY(__get_datapage)
-	sethi	$r0, hi20(. + 2*PAGE_SIZE + 8)
-	ori	$r0, $r0, lo12(. + 2*PAGE_SIZE + 4)
-	mfusr	$r1, $pc
-	sub	$r0, $r1, $r0
-	ret
-ENDPROC(__get_datapage)
diff --git a/arch/nds32/kernel/vdso/gen_vdso_offsets.sh b/arch/nds32/kernel/vdso/gen_vdso_offsets.sh
old mode 100755
new mode 100644
diff --git a/arch/nds32/kernel/vdso/note.S b/arch/nds32/kernel/vdso/note.S
deleted file mode 100644
index 0aeaa19b05f03faf26765d3022c7a7fd42d826e6..0000000000000000000000000000000000000000
--- a/arch/nds32/kernel/vdso/note.S
+++ /dev/null
@@ -1,11 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2012 ARM Limited
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
-	.long LINUX_VERSION_CODE
-ELFNOTE_END
diff --git a/arch/nds32/kernel/vdso/sigreturn.S b/arch/nds32/kernel/vdso/sigreturn.S
deleted file mode 100644
index 67e4d1d1612a46ad6c9d8c44d7edfbd03ba5564a..0000000000000000000000000000000000000000
--- a/arch/nds32/kernel/vdso/sigreturn.S
+++ /dev/null
@@ -1,19 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2012 ARM Limited
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-
-	.text
-
-ENTRY(__kernel_rt_sigreturn)
-	.cfi_startproc
-	movi $r15, __NR_rt_sigreturn
-	/*
-	 * The SWID of syscall should be __NR_rt_sigreturn to synchronize
-	 * the unwinding scheme in gcc
-	 */
-	syscall	__NR_rt_sigreturn
-	.cfi_endproc
-ENDPROC(__kernel_rt_sigreturn)
diff --git a/arch/nds32/kernel/vdso/vdso.S b/arch/nds32/kernel/vdso/vdso.S
deleted file mode 100644
index 16737c11e55bcc17bcdc2f8f0793b104be6e74a3..0000000000000000000000000000000000000000
--- a/arch/nds32/kernel/vdso/vdso.S
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2012 ARM Limited
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <linux/const.h>
-#include <asm/page.h>
-
-	.globl vdso_start, vdso_end
-	.section .rodata
-	.balign PAGE_SIZE
-vdso_start:
-	.incbin "arch/nds32/kernel/vdso/vdso.so"
-	.balign PAGE_SIZE
-vdso_end:
-
-	.previous
diff --git a/arch/nds32/kernel/vdso/vdso.lds.S b/arch/nds32/kernel/vdso/vdso.lds.S
deleted file mode 100644
index 1f2b16004594223cc010dc356779b1acccd27fe3..0000000000000000000000000000000000000000
--- a/arch/nds32/kernel/vdso/vdso.lds.S
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * SPDX-License-Identifier: GPL-2.0
- * Copyright (C) 2005-2017 Andes Technology Corporation
- */
-
-
-#include <linux/const.h>
-#include <asm/page.h>
-#include <asm/vdso.h>
-
-OUTPUT_ARCH(nds32)
-
-SECTIONS
-{
-	. = SIZEOF_HEADERS;
-
-	.hash		: { *(.hash) }			:text
-	.gnu.hash	: { *(.gnu.hash) }
-	.dynsym		: { *(.dynsym) }
-	.dynstr		: { *(.dynstr) }
-	.gnu.version	: { *(.gnu.version) }
-	.gnu.version_d	: { *(.gnu.version_d) }
-	.gnu.version_r	: { *(.gnu.version_r) }
-
-	.note		: { *(.note.*) }		:text	:note
-
-
-	.text		: { *(.text*) }			:text
-
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-
-	.dynamic	: { *(.dynamic) }		:text	:dynamic
-
-	.rodata		: { *(.rodata*) }		:text
-
-
-	/DISCARD/	: {
-		*(.note.GNU-stack)
-		*(.data .data.* .gnu.linkonce.d.* .sdata*)
-		*(.bss .sbss .dynbss .dynsbss)
-	}
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
-	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
-	note		PT_NOTE		FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-	LINUX_4 {
-	global:
-		__kernel_rt_sigreturn;
-		__vdso_gettimeofday;
-		__vdso_clock_getres;
-		__vdso_clock_gettime;
-	local: *;
-	};
-}
-
-/*
- * Make the rt_sigreturn code visible to the kernel.
- */
-VDSO_rt_sigtramp	= __kernel_rt_sigreturn;
diff --git a/arch/nds32/kernel/vmlinux.lds.S b/arch/nds32/kernel/vmlinux.lds.S
deleted file mode 100644
index 9e90f30a181d7d9c9b06dc04d230c02bfde67b78..0000000000000000000000000000000000000000
--- a/arch/nds32/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,69 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <asm/page.h>
-#include <asm/thread_info.h>
-#include <asm/cache.h>
-#include <asm/memory.h>
-
-#define LOAD_OFFSET	(PAGE_OFFSET - PHYS_OFFSET)
-#include <asm-generic/vmlinux.lds.h>
-
-OUTPUT_ARCH(nds32)
-ENTRY(_stext_lma)
-jiffies = jiffies_64;
-
-#if defined(CONFIG_GCOV_KERNEL)
-#define NDS32_EXIT_KEEP(x)	x
-#else
-#define NDS32_EXIT_KEEP(x)
-#endif
-
-SECTIONS
-{
-	_stext_lma = TEXTADDR - LOAD_OFFSET;
-	. = TEXTADDR;
-	__init_begin = .;
-	HEAD_TEXT_SECTION
-	.exit.text : {
-		NDS32_EXIT_KEEP(EXIT_TEXT)
-	}
-	INIT_TEXT_SECTION(PAGE_SIZE)
-	INIT_DATA_SECTION(16)
-	.exit.data : {
-		NDS32_EXIT_KEEP(EXIT_DATA)
-	}
-	PERCPU_SECTION(L1_CACHE_BYTES)
-	__init_end = .;
-
-	. = ALIGN(PAGE_SIZE);
-	_stext = .;
-	/* Real text segment */
-	.text : AT(ADDR(.text) - LOAD_OFFSET) {
-		_text = .;		/* Text and read-only data	*/
-		TEXT_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		IRQENTRY_TEXT
-		*(.fixup)
-	}
-
-	_etext = .;			/* End of text and rodata section */
-
-	_sdata = .;
-	RO_DATA_SECTION(PAGE_SIZE)
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
-	_edata  =  .;
-
-	EXCEPTION_TABLE(16)
-	NOTES
-	BSS_SECTION(4, 4, 4)
-	_end = .;
-
-	STABS_DEBUG
-	DWARF_DEBUG
-
-	DISCARDS
-}
diff --git a/arch/nds32/lib/clear_user.S b/arch/nds32/lib/clear_user.S
deleted file mode 100644
index 805dfcd25bf8688f5a0bdcf4311e48444a22f9d9..0000000000000000000000000000000000000000
--- a/arch/nds32/lib/clear_user.S
+++ /dev/null
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
-/* Prototype: int __arch_clear_user(void *addr, size_t sz)
- * Purpose  : clear some user memory
- * Params   : addr - user memory address to clear
- *          : sz   - number of bytes to clear
- * Returns  : number of bytes NOT cleared
- */
-	.text
-	.align	5
-ENTRY(__arch_clear_user)
-	add	$r5, $r0, $r1
-	beqz	$r1, clear_exit
-	xor	$p1, $p1, $p1		! Use $p1=0 to clear mem
-	srli	$p0, $r1, #2		! $p0 = number of word to clear
-	andi	$r1, $r1, #3		! Bytes less than a word to copy
-	beqz	$p0, byte_clear		! Only less than a word to clear
-word_clear:
-USER(	smw.bim,$p1, [$r0], $p1)	! Clear the word
-	addi	$p0, $p0, #-1		! Decrease word count
-	bnez	$p0, word_clear		! Continue looping to clear all words
-	beqz	$r1, clear_exit		! No left bytes to copy
-byte_clear:
-USER(	sbi.bi,	$p1, [$r0], #1)		! Clear the byte
-	addi	$r1, $r1, #-1		! Decrease byte count
-	bnez	$r1, byte_clear		! Continue looping to clear all left bytes
-clear_exit:
-	move	$r0, $r1		! Set return value
-	ret
-
-	.section .fixup,"ax"
-	.align	0
-9001:
-	sub	$r0, $r5, $r0		! Bytes left to copy
-	ret
-	.previous
-ENDPROC(__arch_clear_user)
diff --git a/arch/nds32/lib/copy_from_user.S b/arch/nds32/lib/copy_from_user.S
deleted file mode 100644
index ad1857b20067a84e3cc15dd897b0da80db7d065f..0000000000000000000000000000000000000000
--- a/arch/nds32/lib/copy_from_user.S
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
-.macro 	lbi1 dst, addr, adj
-USER( lbi.bi, \dst, [\addr], \adj)
-.endm
-
-.macro 	sbi1 src, addr, adj
-sbi.bi	\src, [\addr], \adj
-.endm
-
-.macro	lmw1 start_reg, addr, end_reg
-USER( lmw.bim, \start_reg, [\addr], \end_reg)
-.endm
-
-.macro	smw1 start_reg, addr, end_reg
-smw.bim \start_reg, [\addr], \end_reg
-.endm
-
-
-/* Prototype: int __arch_copy_from_user(void *to, const char *from, size_t n)
- * Purpose  : copy a block from user memory to kernel memory
- * Params   : to   - kernel memory
- *          : from - user memory
- *          : n    - number of bytes to copy
- * Returns  : Number of bytes NOT copied.
- */
-
-.text
-ENTRY(__arch_copy_from_user)
-	add	$r5, $r0, $r2
-#include "copy_template.S"
-	move	$r0, $r2
-	ret
-.section .fixup,"ax"
-.align  2
-9001:
-	sub	$r0, $r5, $r0
-	ret
-.previous
-ENDPROC(__arch_copy_from_user)
diff --git a/arch/nds32/lib/copy_page.S b/arch/nds32/lib/copy_page.S
deleted file mode 100644
index f8701ed161a8baa905d3b6273a0916a50333da4c..0000000000000000000000000000000000000000
--- a/arch/nds32/lib/copy_page.S
+++ /dev/null
@@ -1,40 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/export.h>
-#include <asm/page.h>
-
-	.text
-ENTRY(copy_page)
-	pushm	$r2, $r10
-	movi	$r2, PAGE_SIZE >> 5
-.Lcopy_loop:
-	lmw.bim	$r3, [$r1], $r10
-	smw.bim	$r3, [$r0], $r10
-	subi45	$r2, #1
-	bnez38	$r2, .Lcopy_loop
-	popm	$r2, $r10
-	ret
-ENDPROC(copy_page)
-EXPORT_SYMBOL(copy_page)
-
-ENTRY(clear_page)
-	pushm	$r1, $r9
-	movi	$r1, PAGE_SIZE >> 5
-	movi55	$r2, #0
-	movi55	$r3, #0
-	movi55	$r4, #0
-	movi55	$r5, #0
-	movi55	$r6, #0
-	movi55	$r7, #0
-	movi55	$r8, #0
-	movi55	$r9, #0
-.Lclear_loop:
-	smw.bim	$r2, [$r0], $r9
-	subi45	$r1, #1
-	bnez38	$r1, .Lclear_loop
-	popm	$r1, $r9
-        ret
-ENDPROC(clear_page)
-EXPORT_SYMBOL(clear_page)
diff --git a/arch/nds32/lib/copy_template.S b/arch/nds32/lib/copy_template.S
deleted file mode 100644
index 3a9a2de468c22e67d603cd739691756114c37ecd..0000000000000000000000000000000000000000
--- a/arch/nds32/lib/copy_template.S
+++ /dev/null
@@ -1,69 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-
-	beq	$r1, $r0, quit_memcpy
-	beqz	$r2, quit_memcpy
-	srli    $r3, $r2, #5	! check if len < cache-line size 32
-	beqz	$r3, word_copy_entry
-	andi	$r4, $r0, #0x3	! check byte-align
-	beqz	$r4, unalign_word_copy_entry
-
-	addi	$r4, $r4,#-4
-	abs	$r4, $r4	! check how many un-align byte to copy
-	sub	$r2, $r2, $r4	! update $R2
-
-unalign_byte_copy:
-	lbi1	$r3, $r1, #1
-	addi	$r4, $r4, #-1
-	sbi1	$r3, $r0, #1
-	bnez	$r4, unalign_byte_copy
-	beqz	$r2, quit_memcpy
-
-unalign_word_copy_entry:
-	andi	$r3, $r0, 0x1f	! check cache-line unaligncount
-	beqz	$r3, cache_copy
-
-	addi	$r3, $r3, #-32
-	abs	$r3, $r3
-	sub	$r2, $r2, $r3	! update $R2
-
-unalign_word_copy:
-	lmw1	$r4, $r1, $r4
-	addi	$r3, $r3, #-4
-	smw1	$r4, $r0, $r4
-	bnez	$r3, unalign_word_copy
-	beqz	$r2, quit_memcpy
-
-	addi	$r3, $r2, #-32	! to check $r2< cache_line , than go to word_copy
-	bltz	$r3, word_copy_entry
-cache_copy:
-	srli	$r3, $r2, #5
-	beqz	$r3, word_copy_entry
-3:
-	lmw1	$r17, $r1, $r24
-	addi	$r3, $r3, #-1
-	smw1	$r17, $r0, $r24
-	bnez	$r3, 3b
-
-word_copy_entry:
-	andi	$r2, $r2, #31
-
-	beqz	$r2, quit_memcpy
-5:
-	srli	$r3, $r2, #2
-	beqz	$r3, byte_copy
-word_copy:
-	lmw1	$r4, $r1, $r4
-	addi	$r3, $r3, #-1
-	smw1	$r4, $r0, $r4
-	bnez	$r3, word_copy
-	andi	$r2, $r2, #3
-	beqz	$r2, quit_memcpy
-byte_copy:
-	lbi1	$r3, $r1, #1
-	addi	$r2, $r2, #-1
-
-	sbi1	$r3, $r0, #1
-	bnez	$r2, byte_copy
-quit_memcpy:
diff --git a/arch/nds32/lib/copy_to_user.S b/arch/nds32/lib/copy_to_user.S
deleted file mode 100644
index 3230044dcfb870cea169ae00a7d3999957749bbe..0000000000000000000000000000000000000000
--- a/arch/nds32/lib/copy_to_user.S
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
-.macro 	lbi1 dst, addr, adj
-lbi.bi	\dst, [\addr], \adj
-.endm
-
-.macro 	sbi1 src, addr, adj
-USER( sbi.bi, \src, [\addr], \adj)
-.endm
-
-.macro	lmw1 start_reg, addr, end_reg
-lmw.bim \start_reg, [\addr], \end_reg
-.endm
-
-.macro	smw1 start_reg, addr, end_reg
-USER( smw.bim, \start_reg, [\addr], \end_reg)
-.endm
-
-
-/* Prototype: int __arch_copy_to_user(void *to, const char *from, size_t n)
- * Purpose  : copy a block to user memory from kernel memory
- * Params   : to   - user memory
- *          : from - kernel memory
- *          : n    - number of bytes to copy
- * Returns  : Number of bytes NOT copied.
- */
-
-.text
-ENTRY(__arch_copy_to_user)
-	add	$r5, $r0, $r2
-#include "copy_template.S"
-	move	$r0, $r2
-	ret
-.section .fixup,"ax"
-.align  2
-9001:
-	sub	$r0, $r5, $r0
-	ret
-.previous
-ENDPROC(__arch_copy_to_user)
diff --git a/arch/nds32/lib/memcpy.S b/arch/nds32/lib/memcpy.S
deleted file mode 100644
index a2345ea721e4652a57669362614513242dddcfce..0000000000000000000000000000000000000000
--- a/arch/nds32/lib/memcpy.S
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-
-
-.macro 	lbi1 dst, addr, adj
-lbi.bi	\dst, [\addr], \adj
-.endm
-
-.macro 	sbi1 src, addr, adj
-sbi.bi	\src, [\addr], \adj
-.endm
-
-.macro	lmw1 start_reg, addr, end_reg
-lmw.bim \start_reg, [\addr], \end_reg
-.endm
-
-.macro	smw1 start_reg, addr, end_reg
-smw.bim \start_reg, [\addr], \end_reg
-.endm
-
-.text
-ENTRY(memcpy)
-	move	$r5, $r0
-#include "copy_template.S"
-	move	$r0, $r5
-	ret
-
-ENDPROC(memcpy)
diff --git a/arch/nds32/lib/memmove.S b/arch/nds32/lib/memmove.S
deleted file mode 100644
index c823aada22714994ed88800dfe2e555eb068840f..0000000000000000000000000000000000000000
--- a/arch/nds32/lib/memmove.S
+++ /dev/null
@@ -1,70 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-
-/*
-  void *memmove(void *dst, const void *src, int n);
-
-  dst: $r0
-  src: $r1
-  n  : $r2
-  ret: $r0 - pointer to the memory area dst.
-*/
-	.text
-
-ENTRY(memmove)
-	move	$r5, $r0		! Set return value = det
-	beq	$r0, $r1, exit_memcpy	! Exit when det = src
-	beqz	$r2, exit_memcpy	! Exit when n = 0
-	pushm	$t0, $t1		! Save reg
-	srli	$p1, $r2, #2		! $p1 is how many words to copy
-
-	! Avoid data lost when memory overlap
-	! Copy data reversely when src < dst
-	slt	$p0, $r0, $r1		! check if $r0 < $r1
-	beqz	$p0, do_reverse		! branch if dst > src
-
-	! No reverse, dst < src
-	andi	$r2, $r2, #3		! How many bytes are less than a word
-	li	$t0, #1			! Determining copy direction in byte_cpy
-	beqz	$p1, byte_cpy		! When n is less than a word
-
-word_cpy:
-	lmw.bim	$p0, [$r1], $p0		! Read a word from src
-	addi	$p1, $p1, #-1		! How many words left to copy
-	smw.bim	$p0, [$r0], $p0		! Copy the word to det
-	bnez	$p1, word_cpy		! If remained words > 0
-	beqz	$r2, end_memcpy		! No left bytes to copy
-	b	byte_cpy
-
-do_reverse:
-	add	$r0, $r0, $r2		! Start with the end of $r0
-	add	$r1, $r1, $r2		! Start with the end of $r1
-	andi	$r2, $r2, #3		! How many bytes are less than a word
-	li	$t0, #-1		! Determining copy direction in byte_cpy
-	beqz	$p1, reverse_byte_cpy	! When n is less than a word
-
-reverse_word_cpy:
-	lmw.adm	$p0, [$r1], $p0		! Read a word from src
-	addi	$p1, $p1, #-1		! How many words left to copy
-	smw.adm	$p0, [$r0], $p0		! Copy the word to det
-	bnez	$p1, reverse_word_cpy	! If remained words > 0
-	beqz	$r2, end_memcpy		! No left bytes to copy
-
-reverse_byte_cpy:
-	addi	$r0, $r0, #-1
-	addi	$r1, $r1, #-1
-byte_cpy:				! Less than 4 bytes to copy now
-	lb.bi	$p0, [$r1], $t0		! Read a byte from src
-	addi	$r2, $r2, #-1		! How many bytes left to copy
-	sb.bi	$p0, [$r0], $t0		! copy the byte to det
-	bnez	$r2, byte_cpy		! If remained bytes > 0
-
-end_memcpy:
-	popm	$t0, $t1
-exit_memcpy:
-	move	$r0, $r5
-	ret
-
-ENDPROC(memmove)
diff --git a/arch/nds32/lib/memset.S b/arch/nds32/lib/memset.S
deleted file mode 100644
index 193cb6ce21a943cd496469519f236a30bbc845aa..0000000000000000000000000000000000000000
--- a/arch/nds32/lib/memset.S
+++ /dev/null
@@ -1,33 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-
-	.text
-ENTRY(memset)
-	move	$r5, $r0		! Return value
-	beqz	$r2, end_memset		! Exit when len = 0
-	srli	$p1, $r2, 2		! $p1 is how many words to copy
-	andi	$r2, $r2, 3		! How many bytes are less than a word
-	beqz	$p1, byte_set		! When n is less than a word
-
-	! set $r1 from ??????ab to abababab
-	andi	$r1, $r1, #0x00ff	! $r1 = 000000ab
-	slli	$p0, $r1, #8		! $p0 = 0000ab00
-	or	$r1, $r1, $p0		! $r1 = 0000abab
-	slli	$p0, $r1, #16		! $p0 = abab0000
-	or	$r1, $r1, $p0		! $r1 = abababab
-word_set:
-	addi	$p1, $p1, #-1		! How many words left to copy
-	smw.bim	$r1, [$r0], $r1		! Copy the word to det
-	bnez	$p1, word_set		! Still words to set, continue looping
-	beqz	$r2, end_memset		! No left byte to set
-byte_set:				! Less than 4 bytes left to set
-	addi	$r2, $r2, #-1		! Decrease len by 1
-	sbi.bi	$r1, [$r0], #1		! Set data of the next byte to $r1
-	bnez	$r2, byte_set		! Still bytes left to set
-end_memset:
-	move	$r0, $r5
-	ret
-
-ENDPROC(memset)
diff --git a/arch/nds32/lib/memzero.S b/arch/nds32/lib/memzero.S
deleted file mode 100644
index f055972c93432d1e061f8ee6d9ce8e11c5b77620..0000000000000000000000000000000000000000
--- a/arch/nds32/lib/memzero.S
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#include <linux/linkage.h>
-
-	.text
-ENTRY(memzero)
-	beqz	$r1, 1f
-	push	$lp
-	move    $r2, $r1
-	move    $r1, #0
-	push	$r0
-	bal     memset
-	pop	$r0
-	pop	$lp
-1:
-        ret
-ENDPROC(memzero)
diff --git a/arch/nios2/boot/compressed/head.S b/arch/nios2/boot/compressed/head.S
deleted file mode 100644
index 15c6c48dd90950c2bbdfdda392834c822e132cc3..0000000000000000000000000000000000000000
--- a/arch/nios2/boot/compressed/head.S
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (C) 2009 Thomas Chou <thomas@wytron.com.tw>
- *
- * Based on arch/nios2/kernel/head.S
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- */
-
-/*
- *  This code can be loaded anywhere, eg FLASH ROM as reset vector,
- *  as long as output does not overlap it.
- */
-
-#include <linux/linkage.h>
-#include <asm/cache.h>
-
-	.text
-	.set noat
-ENTRY(_start)
-	wrctl	status, r0		/* disable interrupt */
-	/* invalidate all instruction cache */
-	movia	r1, NIOS2_ICACHE_SIZE
-	movui	r2, NIOS2_ICACHE_LINE_SIZE
-1:	initi	r1
-	sub	r1, r1, r2
-	bgt	r1, r0, 1b
-	/* invalidate all data cache */
-	movia	r1, NIOS2_DCACHE_SIZE
-	movui	r2, NIOS2_DCACHE_LINE_SIZE
-1:	initd	0(r1)
-	sub	r1, r1, r2
-	bgt	r1, r0, 1b
-
-	nextpc	r1			/* Find out where we are */
-chkadr:
-	movia	r2, chkadr
-	beq	r1, r2, finish_move	/* We are running in correct address,
-					   done */
-	/* move code, r1: src, r2: dest, r3: last dest */
-	addi	r1, r1, (_start - chkadr)	/* Source */
-	movia	r2, _start		/* Destination */
-	movia	r3, __bss_start		/* End of copy */
-1:	ldw	r8, 0(r1)		/* load a word from [r1] */
-	stw	r8, 0(r2)		/* stort a word to dest [r2] */
-	addi	r1, r1, 4		/* inc the src addr */
-	addi	r2, r2, 4		/* inc the dest addr */
-	blt	r2, r3, 1b
-	/* flush the data cache after moving */
-	movia	r1, NIOS2_DCACHE_SIZE
-	movui	r2, NIOS2_DCACHE_LINE_SIZE
-1:	flushd	0(r1)
-	sub	r1, r1, r2
-	bgt	r1, r0, 1b
-	movia	r1, finish_move
-	jmp	r1			/* jmp to linked address */
-
-finish_move:
-	/* zero out the .bss segment (uninitialized common data) */
-	movia	r2, __bss_start		/* presume nothing is between */
-	movia	r1, _end		/* the .bss and _end. */
-1: 	stb	r0, 0(r2)
-	addi	r2, r2, 1
-	bne	r1, r2, 1b
-	/*
-	 * set up the stack pointer, some where higher than _end.
-	 * The stack space must be greater than 32K for decompress.
-	 */
-	movia	sp, 0x10000
-	add	sp, sp, r1
-	/* save args passed from u-boot, maybe */
-	addi	sp, sp, -16
-	stw	r4, 0(sp)
-	stw	r5, 4(sp)
-	stw	r6, 8(sp)
-	stw	r7, 12(sp)
-	/* decompress the kernel */
-	call	decompress_kernel
-	/* pass saved args to kernel */
-	ldw	r4, 0(sp)
-	ldw	r5, 4(sp)
-	ldw	r6, 8(sp)
-	ldw	r7, 12(sp)
-
-	/* flush all data cache after decompressing */
-	movia	r1, NIOS2_DCACHE_SIZE
-	movui	r2, NIOS2_DCACHE_LINE_SIZE
-1:	flushd	0(r1)
-	sub	r1, r1, r2
-	bgt	r1, r0, 1b
-	/* flush all instruction cache */
-	movia	r1, NIOS2_ICACHE_SIZE
-	movui	r2, NIOS2_ICACHE_LINE_SIZE
-1:	flushi	r1
-	sub	r1, r1, r2
-	bgt	r1, r0, 1b
-	flushp
-	/* jump to start real kernel */
-	movia	r1, (CONFIG_NIOS2_MEM_BASE | CONFIG_NIOS2_KERNEL_REGION_BASE)
-	jmp	r1
-
-	.balign 512
-fake_headers_as_bzImage:
-	.short	0
-	.ascii	"HdrS"
-	.short	0x0202
-	.short	0
-	.short	0
-	.byte	0x00, 0x10
-	.short	0
-	.byte	0
-	.byte	1
-	.byte	0x00, 0x80
-	.long	0
-	.long	0
diff --git a/arch/nios2/boot/compressed/vmlinux.lds.S b/arch/nios2/boot/compressed/vmlinux.lds.S
deleted file mode 100644
index 9b02d0c97237d21e01996fbb7449965956a8554f..0000000000000000000000000000000000000000
--- a/arch/nios2/boot/compressed/vmlinux.lds.S
+++ /dev/null
@@ -1,45 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2009 Thomas Chou <thomas@wytron.com.tw>
- */
-
-#include <asm-generic/vmlinux.lds.h>
-
-OUTPUT_FORMAT("elf32-littlenios2", "elf32-littlenios2", "elf32-littlenios2")
-
-OUTPUT_ARCH(nios)
-ENTRY(_start)	/* Defined in head.S */
-
-SECTIONS
-{
-	. = (CONFIG_NIOS2_MEM_BASE + CONFIG_NIOS2_BOOT_LINK_OFFSET) |	\
-		CONFIG_NIOS2_KERNEL_REGION_BASE;
-
-	_text = .;
-	.text : { *(.text) } = 0
-	.rodata : { *(.rodata) *(.rodata.*) }
-	_etext = .;
-
-	. = ALIGN(32 / 8);
-	.data : { *(.data) }
-	. = ALIGN(32 / 8);
-	_got = .;
-	.got : {
-		*(.got.plt)
-		*(.igot.plt)
-		*(.got)
-		*(.igot)
-	}
-	_egot = .;
-	_edata =  .;
-
-	. = ALIGN(32 / 8);
-	__bss_start = .;
-	.bss : { *(.bss) *(.sbss) }
-	. = ALIGN(32 / 8);
-	_ebss = .;
-	end = . ;
-	_end = . ;
-
-	got_len = (_egot - _got);
-}
diff --git a/arch/nios2/kernel/entry.S b/arch/nios2/kernel/entry.S
deleted file mode 100644
index 1e515ccd698e38f4a437b43ee6a97eb8c2aed987..0000000000000000000000000000000000000000
--- a/arch/nios2/kernel/entry.S
+++ /dev/null
@@ -1,562 +0,0 @@
-/*
- * linux/arch/nios2/kernel/entry.S
- *
- * Copyright (C) 2013-2014  Altera Corporation
- * Copyright (C) 2009, Wind River Systems Inc
- *
- * Implemented by fredrik.markstrom@gmail.com and ivarholmqvist@gmail.com
- *
- *  Copyright (C) 1999-2002, Greg Ungerer (gerg@snapgear.com)
- *  Copyright (C) 1998  D. Jeff Dionne <jeff@lineo.ca>,
- *                      Kenneth Albanowski <kjahds@kjahds.com>,
- *  Copyright (C) 2000  Lineo Inc. (www.lineo.com)
- *  Copyright (C) 2004  Microtronix Datacom Ltd.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Linux/m68k support by Hamish Macdonald
- *
- * 68060 fixes by Jesper Skov
- * ColdFire support by Greg Ungerer (gerg@snapgear.com)
- * 5307 fixes by David W. Miller
- * linux 2.4 support David McCullough <davidm@snapgear.com>
- */
-
-#include <linux/sys.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/asm-macros.h>
-#include <asm/thread_info.h>
-#include <asm/errno.h>
-#include <asm/setup.h>
-#include <asm/entry.h>
-#include <asm/unistd.h>
-#include <asm/processor.h>
-
-.macro GET_THREAD_INFO reg
-.if THREAD_SIZE & 0xffff0000
-	andhi	\reg, sp, %hi(~(THREAD_SIZE-1))
-.else
-	addi	\reg, r0, %lo(~(THREAD_SIZE-1))
-	and	\reg, \reg, sp
-.endif
-.endm
-
-.macro	kuser_cmpxchg_check
-	/*
-	 * Make sure our user space atomic helper is restarted if it was
-	 * interrupted in a critical region.
-	 * ea-4 = address of interrupted insn (ea must be preserved).
-	 * sp = saved regs.
-	 * cmpxchg_ldw = first critical insn, cmpxchg_stw = last critical insn.
-	 * If ea <= cmpxchg_stw and ea > cmpxchg_ldw then saved EA is set to
-	 * cmpxchg_ldw + 4.
-	*/
-	/* et = cmpxchg_stw + 4 */
-	movui   et, (KUSER_BASE + 4 + (cmpxchg_stw - __kuser_helper_start))
-	bgtu	ea, et, 1f
-
-	subi	et, et, (cmpxchg_stw - cmpxchg_ldw) /* et = cmpxchg_ldw + 4 */
-	bltu	ea, et, 1f
-	stw	et, PT_EA(sp)	/* fix up EA */
-	mov	ea, et
-1:
-.endm
-
-.section .rodata
-.align 4
-exception_table:
-	.word unhandled_exception	/* 0 - Reset */
-	.word unhandled_exception	/* 1 - Processor-only Reset */
-	.word external_interrupt	/* 2 - Interrupt */
-	.word handle_trap		/* 3 - Trap Instruction */
-
-	.word instruction_trap		/* 4 - Unimplemented instruction */
-	.word handle_illegal		/* 5 - Illegal instruction */
-	.word handle_unaligned		/* 6 - Misaligned data access */
-	.word handle_unaligned		/* 7 - Misaligned destination address */
-
-	.word handle_diverror		/* 8 - Division error */
-	.word protection_exception_ba	/* 9 - Supervisor-only instr. address */
-	.word protection_exception_instr /* 10 - Supervisor only instruction */
-	.word protection_exception_ba	/* 11 - Supervisor only data address */
-
-	.word unhandled_exception	/* 12 - Double TLB miss (data) */
-	.word protection_exception_pte	/* 13 - TLB permission violation (x) */
-	.word protection_exception_pte	/* 14 - TLB permission violation (r) */
-	.word protection_exception_pte	/* 15 - TLB permission violation (w) */
-
-	.word unhandled_exception	/* 16 - MPU region violation */
-
-trap_table:
-	.word	handle_system_call	/* 0  */
-	.word	handle_trap_1		/* 1  */
-	.word	handle_trap_2		/* 2  */
-	.word	handle_trap_3		/* 3  */
-	.word	handle_trap_reserved	/* 4  */
-	.word	handle_trap_reserved	/* 5  */
-	.word	handle_trap_reserved	/* 6  */
-	.word	handle_trap_reserved	/* 7  */
-	.word	handle_trap_reserved	/* 8  */
-	.word	handle_trap_reserved	/* 9  */
-	.word	handle_trap_reserved	/* 10 */
-	.word	handle_trap_reserved	/* 11 */
-	.word	handle_trap_reserved	/* 12 */
-	.word	handle_trap_reserved	/* 13 */
-	.word	handle_trap_reserved	/* 14 */
-	.word	handle_trap_reserved	/* 15 */
-	.word	handle_trap_reserved	/* 16 */
-	.word	handle_trap_reserved	/* 17 */
-	.word	handle_trap_reserved	/* 18 */
-	.word	handle_trap_reserved	/* 19 */
-	.word	handle_trap_reserved	/* 20 */
-	.word	handle_trap_reserved	/* 21 */
-	.word	handle_trap_reserved	/* 22 */
-	.word	handle_trap_reserved	/* 23 */
-	.word	handle_trap_reserved	/* 24 */
-	.word	handle_trap_reserved	/* 25 */
-	.word	handle_trap_reserved	/* 26 */
-	.word	handle_trap_reserved	/* 27 */
-	.word	handle_trap_reserved	/* 28 */
-	.word	handle_trap_reserved	/* 29 */
-#ifdef CONFIG_KGDB
-	.word	handle_kgdb_breakpoint	/* 30 KGDB breakpoint */
-#else
-	.word	instruction_trap		/* 30 */
-#endif
-	.word	handle_breakpoint	/* 31 */
-
-.text
-.set noat
-.set nobreak
-
-ENTRY(inthandler)
-	SAVE_ALL
-
-	kuser_cmpxchg_check
-
-	/* Clear EH bit before we get a new excpetion in the kernel
-	 * and after we have saved it to the exception frame. This is done
-	 * whether it's trap, tlb-miss or interrupt. If we don't do this
-	 * estatus is not updated the next exception.
-	 */
-	rdctl	r24, status
-	movi	r9, %lo(~STATUS_EH)
-	and	r24, r24, r9
-	wrctl	status, r24
-
-	/* Read cause and vector and branch to the associated handler */
-	mov	r4, sp
-	rdctl	r5, exception
-	movia	r9, exception_table
-	add	r24, r9, r5
-	ldw	r24, 0(r24)
-	jmp	r24
-
-
-/***********************************************************************
- * Handle traps
- ***********************************************************************
- */
-ENTRY(handle_trap)
-	ldwio	r24, -4(ea)	/* instruction that caused the exception */
-	srli	r24, r24, 4
-	andi	r24, r24, 0x7c
-	movia	r9,trap_table
-	add	r24, r24, r9
-	ldw	r24, 0(r24)
-	jmp	r24
-
-
-/***********************************************************************
- * Handle system calls
- ***********************************************************************
- */
-ENTRY(handle_system_call)
-	/* Enable interrupts */
-	rdctl	r10, status
-	ori	r10, r10, STATUS_PIE
-	wrctl	status, r10
-
-	/* Reload registers destroyed by common code. */
-	ldw	r4, PT_R4(sp)
-	ldw	r5, PT_R5(sp)
-
-local_restart:
-	/* Check that the requested system call is within limits */
-	movui	r1, __NR_syscalls
-	bgeu	r2, r1, ret_invsyscall
-	slli	r1, r2, 2
-	movhi	r11, %hiadj(sys_call_table)
-	add	r1, r1, r11
-	ldw	r1, %lo(sys_call_table)(r1)
-	beq	r1, r0, ret_invsyscall
-
-	/* Check if we are being traced */
-	GET_THREAD_INFO r11
-	ldw	r11,TI_FLAGS(r11)
-	BTBNZ   r11,r11,TIF_SYSCALL_TRACE,traced_system_call
-
-	/* Execute the system call */
-	callr	r1
-
-	/* If the syscall returns a negative result:
-	 *   Set r7 to 1 to indicate error,
-	 *   Negate r2 to get a positive error code
-	 * If the syscall returns zero or a positive value:
-	 *   Set r7 to 0.
-	 * The sigreturn system calls will skip the code below by
-	 * adding to register ra. To avoid destroying registers
-	 */
-translate_rc_and_ret:
-	movi	r1, 0
-	bge	r2, zero, 3f
-	sub	r2, zero, r2
-	movi	r1, 1
-3:
-	stw	r2, PT_R2(sp)
-	stw	r1, PT_R7(sp)
-end_translate_rc_and_ret:
-
-ret_from_exception:
-	ldw	r1, PT_ESTATUS(sp)
-	/* if so, skip resched, signals */
-	TSTBNZ	r1, r1, ESTATUS_EU, Luser_return
-
-restore_all:
-	rdctl	r10, status			/* disable intrs */
-	andi	r10, r10, %lo(~STATUS_PIE)
-	wrctl	status, r10
-	RESTORE_ALL
-	eret
-
-	/* If the syscall number was invalid return ENOSYS */
-ret_invsyscall:
-	movi	r2, -ENOSYS
-	br	translate_rc_and_ret
-
-	/* This implements the same as above, except it calls
-	 * do_syscall_trace_enter and do_syscall_trace_exit before and after the
-	 * syscall in order for utilities like strace and gdb to work.
-	 */
-traced_system_call:
-	SAVE_SWITCH_STACK
-	call	do_syscall_trace_enter
-	RESTORE_SWITCH_STACK
-
-	/* Create system call register arguments. The 5th and 6th
-	   arguments on stack are already in place at the beginning
-	   of pt_regs. */
-	ldw	r2, PT_R2(sp)
-	ldw	r4, PT_R4(sp)
-	ldw	r5, PT_R5(sp)
-	ldw	r6, PT_R6(sp)
-	ldw	r7, PT_R7(sp)
-
-	/* Fetch the syscall function, we don't need to check the boundaries
-	 * since this is already done.
-	 */
-	slli	r1, r2, 2
-	movhi	r11,%hiadj(sys_call_table)
-	add	r1, r1, r11
-	ldw	r1, %lo(sys_call_table)(r1)
-
-	callr	r1
-
-	/* If the syscall returns a negative result:
-	 *   Set r7 to 1 to indicate error,
-	 *   Negate r2 to get a positive error code
-	 * If the syscall returns zero or a positive value:
-	 *   Set r7 to 0.
-	 * The sigreturn system calls will skip the code below by
-	 * adding to register ra. To avoid destroying registers
-	 */
-translate_rc_and_ret2:
-	movi	r1, 0
-	bge	r2, zero, 4f
-	sub	r2, zero, r2
-	movi	r1, 1
-4:
-	stw	r2, PT_R2(sp)
-	stw	r1, PT_R7(sp)
-end_translate_rc_and_ret2:
-	SAVE_SWITCH_STACK
-	call	do_syscall_trace_exit
-	RESTORE_SWITCH_STACK
-	br	ret_from_exception
-
-Luser_return:
-	GET_THREAD_INFO	r11			/* get thread_info pointer */
-	ldw	r10, TI_FLAGS(r11)		/* get thread_info->flags */
-	ANDI32	r11, r10, _TIF_WORK_MASK
-	beq	r11, r0, restore_all		/* Nothing to do */
-	BTBZ	r1, r10, TIF_NEED_RESCHED, Lsignal_return
-
-	/* Reschedule work */
-	call	schedule
-	br	ret_from_exception
-
-Lsignal_return:
-	ANDI32	r1, r10, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME
-	beq	r1, r0, restore_all
-	mov	r4, sp			/* pt_regs */
-	SAVE_SWITCH_STACK
-	call	do_notify_resume
-	beq	r2, r0, no_work_pending
-	RESTORE_SWITCH_STACK
-	/* prepare restart syscall here without leaving kernel */
-	ldw	r2, PT_R2(sp)	/* reload syscall number in r2 */
-	ldw 	r4, PT_R4(sp)	/* reload syscall arguments r4-r9 */
-	ldw 	r5, PT_R5(sp)
-	ldw 	r6, PT_R6(sp)
-	ldw 	r7, PT_R7(sp)
-	ldw 	r8, PT_R8(sp)
-	ldw 	r9, PT_R9(sp)
-	br	local_restart	/* restart syscall */
-
-no_work_pending:
-	RESTORE_SWITCH_STACK
-	br	ret_from_exception
-
-/***********************************************************************
- * Handle external interrupts.
- ***********************************************************************
- */
-/*
- * This is the generic interrupt handler (for all hardware interrupt
- * sources). It figures out the vector number and calls the appropriate
- * interrupt service routine directly.
- */
-external_interrupt:
-	rdctl	r12, ipending
-	rdctl	r9, ienable
-	and	r12, r12, r9
-	/* skip if no interrupt is pending */
-	beq	r12, r0, ret_from_interrupt
-
-	movi	r24, -1
-	stw	r24, PT_ORIG_R2(sp)
-
-	/*
-	 * Process an external hardware interrupt.
-	 */
-
-	addi	ea, ea, -4	/* re-issue the interrupted instruction */
-	stw	ea, PT_EA(sp)
-2:	movi	r4, %lo(-1)	/* Start from bit position 0,
-					highest priority */
-				/* This is the IRQ # for handler call */
-1:	andi	r10, r12, 1	/* Isolate bit we are interested in */
-	srli	r12, r12, 1	/* shift count is costly without hardware
-					multiplier */
-	addi	r4, r4, 1
-	beq	r10, r0, 1b
-	mov	r5, sp		/* Setup pt_regs pointer for handler call */
-	call	do_IRQ
-	rdctl	r12, ipending	/* check again if irq still pending */
-	rdctl	r9, ienable	/* Isolate possible interrupts */
-	and	r12, r12, r9
-	bne	r12, r0, 2b
-	/* br	ret_from_interrupt */ /* fall through to ret_from_interrupt */
-
-ENTRY(ret_from_interrupt)
-	ldw	r1, PT_ESTATUS(sp)	/* check if returning to kernel */
-	TSTBNZ	r1, r1, ESTATUS_EU, Luser_return
-
-#ifdef CONFIG_PREEMPT
-	GET_THREAD_INFO	r1
-	ldw	r4, TI_PREEMPT_COUNT(r1)
-	bne	r4, r0, restore_all
-	ldw	r4, TI_FLAGS(r1)		/* ? Need resched set */
-	BTBZ	r10, r4, TIF_NEED_RESCHED, restore_all
-	ldw	r4, PT_ESTATUS(sp)	/* ? Interrupts off */
-	andi	r10, r4, ESTATUS_EPIE
-	beq	r10, r0, restore_all
-	call	preempt_schedule_irq
-#endif
-	br	restore_all
-
-/***********************************************************************
- * A few syscall wrappers
- ***********************************************************************
- */
-/*
- * int clone(unsigned long clone_flags, unsigned long newsp,
- *		int __user * parent_tidptr, int __user * child_tidptr,
- *		int tls_val)
- */
-ENTRY(sys_clone)
-	SAVE_SWITCH_STACK
-	addi	sp, sp, -4
-	stw	r7, 0(sp)	/* Pass 5th arg thru stack */
-	mov	r7, r6		/* 4th arg is 3rd of clone() */
-	mov	r6, zero	/* 3rd arg always 0 */
-	call	do_fork
-	addi	sp, sp, 4
-	RESTORE_SWITCH_STACK
-	ret
-
-ENTRY(sys_rt_sigreturn)
-	SAVE_SWITCH_STACK
-	mov	r4, sp
-	call	do_rt_sigreturn
-	RESTORE_SWITCH_STACK
-	addi	ra, ra, (end_translate_rc_and_ret - translate_rc_and_ret)
-	ret
-
-/***********************************************************************
- * A few other wrappers and stubs
- ***********************************************************************
- */
-protection_exception_pte:
-	rdctl	r6, pteaddr
-	slli	r6, r6, 10
-	call	do_page_fault
-	br	ret_from_exception
-
-protection_exception_ba:
-	rdctl	r6, badaddr
-	call	do_page_fault
-	br	ret_from_exception
-
-protection_exception_instr:
-	call	handle_supervisor_instr
-	br	ret_from_exception
-
-handle_breakpoint:
-	call	breakpoint_c
-	br	ret_from_exception
-
-#ifdef CONFIG_NIOS2_ALIGNMENT_TRAP
-handle_unaligned:
-	SAVE_SWITCH_STACK
-	call	handle_unaligned_c
-	RESTORE_SWITCH_STACK
-	br	ret_from_exception
-#else
-handle_unaligned:
-	call	handle_unaligned_c
-	br	ret_from_exception
-#endif
-
-handle_illegal:
-	call	handle_illegal_c
-	br	ret_from_exception
-
-handle_diverror:
-	call	handle_diverror_c
-	br	ret_from_exception
-
-#ifdef CONFIG_KGDB
-handle_kgdb_breakpoint:
-	call	kgdb_breakpoint_c
-	br	ret_from_exception
-#endif
-
-handle_trap_1:
-	call	handle_trap_1_c
-	br	ret_from_exception
-
-handle_trap_2:
-	call	handle_trap_2_c
-	br	ret_from_exception
-
-handle_trap_3:
-handle_trap_reserved:
-	call	handle_trap_3_c
-	br	ret_from_exception
-
-/*
- * Beware - when entering resume, prev (the current task) is
- * in r4, next (the new task) is in r5, don't change these
- * registers.
- */
-ENTRY(resume)
-
-	rdctl	r7, status			/* save thread status reg */
-	stw	r7, TASK_THREAD + THREAD_KPSR(r4)
-
-	andi	r7, r7, %lo(~STATUS_PIE)	/* disable interrupts */
-	wrctl	status, r7
-
-	SAVE_SWITCH_STACK
-	stw	sp, TASK_THREAD + THREAD_KSP(r4)/* save kernel stack pointer */
-	ldw	sp, TASK_THREAD + THREAD_KSP(r5)/* restore new thread stack */
-	movia	r24, _current_thread		/* save thread */
-	GET_THREAD_INFO r1
-	stw	r1, 0(r24)
-	RESTORE_SWITCH_STACK
-
-	ldw	r7, TASK_THREAD + THREAD_KPSR(r5)/* restore thread status reg */
-	wrctl	status, r7
-	ret
-
-ENTRY(ret_from_fork)
-	call	schedule_tail
-	br	ret_from_exception
-
-ENTRY(ret_from_kernel_thread)
-	call	schedule_tail
-	mov	r4,r17	/* arg */
-	callr	r16	/* function */
-	br	ret_from_exception
-
-/*
- * Kernel user helpers.
- *
- * Each segment is 64-byte aligned and will be mapped to the <User space>.
- * New segments (if ever needed) must be added after the existing ones.
- * This mechanism should be used only for things that are really small and
- * justified, and not be abused freely.
- *
- */
-
- /* Filling pads with undefined instructions. */
-.macro	kuser_pad sym size
-	.if	((. - \sym) & 3)
-	.rept	(4 - (. - \sym) & 3)
-	.byte	0
-	.endr
-	.endif
-	.rept	((\size - (. - \sym)) / 4)
-	.word	0xdeadbeef
-	.endr
-.endm
-
-	.align	6
-	.globl	__kuser_helper_start
-__kuser_helper_start:
-
-__kuser_helper_version:				/* @ 0x1000 */
-	.word	((__kuser_helper_end - __kuser_helper_start) >> 6)
-
-__kuser_cmpxchg:				/* @ 0x1004 */
-	/*
-	 * r4 pointer to exchange variable
-	 * r5 old value
-	 * r6 new value
-	 */
-cmpxchg_ldw:
-	ldw	r2, 0(r4)			/* load current value */
-	sub	r2, r2, r5			/* compare with old value */
-	bne	r2, zero, cmpxchg_ret
-
-	/* We had a match, store the new value */
-cmpxchg_stw:
-	stw	r6, 0(r4)
-cmpxchg_ret:
-	ret
-
-	kuser_pad __kuser_cmpxchg, 64
-
-	.globl	__kuser_sigtramp
-__kuser_sigtramp:
-	movi	r2, __NR_rt_sigreturn
-	trap
-
-	kuser_pad __kuser_sigtramp, 64
-
-	.globl	__kuser_helper_end
-__kuser_helper_end:
diff --git a/arch/nios2/kernel/head.S b/arch/nios2/kernel/head.S
deleted file mode 100644
index 372ce4a33018b7ca1439d9a196ff5019798978d2..0000000000000000000000000000000000000000
--- a/arch/nios2/kernel/head.S
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright (C) 2009 Wind River Systems Inc
- *   Implemented by fredrik.markstrom@gmail.com and ivarholmqvist@gmail.com
- * Copyright (C) 2004 Microtronix Datacom Ltd
- * Copyright (C) 2001 Vic Phillips, Microtronix Datacom Ltd.
- *
- * Based on head.S for Altera's Excalibur development board with nios processor
- *
- * Based on the following from the Excalibur sdk distribution:
- *	NA_MemoryMap.s, NR_JumpToStart.s, NR_Setup.s, NR_CWPManager.s
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <asm/processor.h>
-#include <asm/cache.h>
-#include <asm/page.h>
-#include <asm/asm-offsets.h>
-#include <asm/asm-macros.h>
-
-/*
- * ZERO_PAGE is a special page that is used for zero-initialized
- * data and COW.
- */
-.data
-.global empty_zero_page
-.align 12
-empty_zero_page:
-	.space	PAGE_SIZE
-
-/*
- * This global variable is used as an extension to the nios'
- * STATUS register to emulate a user/supervisor mode.
- */
-	.data
-	.align	2
-	.set noat
-
-	.global _current_thread
-_current_thread:
-	.long	0
-/*
- * Input(s): passed from u-boot
- *   r4 - Optional pointer to a board information structure.
- *   r5 - Optional pointer to the physical starting address of the init RAM
- *        disk.
- *   r6 - Optional pointer to the physical ending address of the init RAM
- *        disk.
- *   r7 - Optional pointer to the physical starting address of any kernel
- *        command-line parameters.
- */
-
-/*
- * First executable code - detected and jumped to by the ROM bootstrap
- * if the code resides in flash (looks for "Nios" at offset 0x0c from
- * the potential executable image).
- */
-	__HEAD
-ENTRY(_start)
-	wrctl	status, r0		/* Disable interrupts */
-
-	/* Initialize all cache lines within the instruction cache */
-	movia	r1, NIOS2_ICACHE_SIZE
-	movui	r2, NIOS2_ICACHE_LINE_SIZE
-
-icache_init:
-	initi	r1
-	sub	r1, r1, r2
-	bgt	r1, r0, icache_init
-	br	1f
-
-	/*
-	 * This is the default location for the exception handler. Code in jump
-	 * to our handler
-	 */
-ENTRY(exception_handler_hook)
-	movia	r24, inthandler
-	jmp	r24
-
-ENTRY(fast_handler)
-	nextpc et
-helper:
-	stw	r3, r3save - helper(et)
-
-	rdctl	r3 , pteaddr
-	srli	r3, r3, 12
-	slli	r3, r3, 2
-	movia	et, pgd_current
-
-	ldw	et, 0(et)
-	add	r3, et, r3
-	ldw	et, 0(r3)
-
-	rdctl	r3, pteaddr
-	andi	r3, r3, 0xfff
-	add	et, r3, et
-	ldw	et, 0(et)
-	wrctl	tlbacc, et
-	nextpc	et
-helper2:
-	ldw	r3, r3save - helper2(et)
-	subi	ea, ea, 4
-	eret
-r3save:
-	.word 0x0
-ENTRY(fast_handler_end)
-
-1:
-	/*
-	 * After the instruction cache is initialized, the data cache must
-	 * also be initialized.
-	 */
-	movia	r1, NIOS2_DCACHE_SIZE
-	movui	r2, NIOS2_DCACHE_LINE_SIZE
-
-dcache_init:
-	initd	0(r1)
-	sub	r1, r1, r2
-	bgt	r1, r0, dcache_init
-
-	nextpc	r1			/* Find out where we are */
-chkadr:
-	movia	r2, chkadr
-	beq	r1, r2,finish_move	/* We are running in RAM done */
-	addi	r1, r1,(_start - chkadr)	/* Source */
-	movia	r2, _start		/* Destination */
-	movia	r3, __bss_start		/* End of copy */
-
-loop_move:				/* r1: src, r2: dest, r3: last dest */
-	ldw	r8, 0(r1)		/* load a word from [r1] */
-	stw	r8, 0(r2)		/* store a word to dest [r2] */
-	flushd	0(r2)			/* Flush cache for safety */
-	addi 	r1, r1, 4		/* inc the src addr */
-	addi	r2, r2, 4		/* inc the dest addr */
-	blt	r2, r3, loop_move
-
-	movia	r1, finish_move		/* VMA(_start)->l1 */
-	jmp	r1			/* jmp to _start */
-
-finish_move:
-
-	/* Mask off all possible interrupts */
-	wrctl	ienable, r0
-
-	/* Clear .bss */
-	movia	r2, __bss_start
-	movia	r1, __bss_stop
-1:
-	stb	r0, 0(r2)
-	addi	r2, r2, 1
-	bne	r1, r2, 1b
-
-	movia	r1, init_thread_union	/* set stack at top of the task union */
-	addi	sp, r1, THREAD_SIZE
-	movia	r2, _current_thread	/* Remember current thread */
-	stw	r1, 0(r2)
-
-	movia	r1, nios2_boot_init	/* save args r4-r7 passed from u-boot */
-	callr	r1
-
-	movia	r1, start_kernel	/* call start_kernel as a subroutine */
-	callr	r1
-
-	/* If we return from start_kernel, break to the oci debugger and
-	 * buggered we are.
-	 */
-	break
-
-	/* End of startup code */
-.set at
diff --git a/arch/nios2/kernel/insnemu.S b/arch/nios2/kernel/insnemu.S
deleted file mode 100644
index a027cc68bbca15e6ce5bfd33a26735e7c9492b89..0000000000000000000000000000000000000000
--- a/arch/nios2/kernel/insnemu.S
+++ /dev/null
@@ -1,580 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  Copyright (C) 2003-2013 Altera Corporation
- *  All rights reserved.
- */
-
-
-#include <linux/linkage.h>
-#include <asm/entry.h>
-
-.set noat
-.set nobreak
-
-/*
-* Explicitly allow the use of r1 (the assembler temporary register)
-* within this code. This register is normally reserved for the use of
-* the compiler.
-*/
-
-ENTRY(instruction_trap)
-	ldw	r1, PT_R1(sp)		// Restore registers
-	ldw	r2, PT_R2(sp)
-	ldw	r3, PT_R3(sp)
-	ldw	r4, PT_R4(sp)
-	ldw	r5, PT_R5(sp)
-	ldw	r6, PT_R6(sp)
-	ldw	r7, PT_R7(sp)
-	ldw	r8, PT_R8(sp)
-	ldw	r9, PT_R9(sp)
-	ldw	r10, PT_R10(sp)
-	ldw	r11, PT_R11(sp)
-	ldw	r12, PT_R12(sp)
-	ldw	r13, PT_R13(sp)
-	ldw	r14, PT_R14(sp)
-	ldw	r15, PT_R15(sp)
-	ldw	ra, PT_RA(sp)
-	ldw	fp, PT_FP(sp)
-	ldw	gp, PT_GP(sp)
-	ldw	et, PT_ESTATUS(sp)
-	wrctl	estatus, et
-	ldw	ea, PT_EA(sp)
-	ldw	et, PT_SP(sp)		/* backup sp in et */
-
-	addi	sp, sp, PT_REGS_SIZE
-
-	/* INSTRUCTION EMULATION
-	*  ---------------------
-	*
-	* Nios II processors generate exceptions for unimplemented instructions.
-	* The routines below emulate these instructions.  Depending on the
-	* processor core, the only instructions that might need to be emulated
-	* are div, divu, mul, muli, mulxss, mulxsu, and mulxuu.
-	*
-	* The emulations match the instructions, except for the following
-	* limitations:
-	*
-	* 1) The emulation routines do not emulate the use of the exception
-	*    temporary register (et) as a source operand because the exception
-	*    handler already has modified it.
-	*
-	* 2) The routines do not emulate the use of the stack pointer (sp) or
-	*    the exception return address register (ea) as a destination because
-	*    modifying these registers crashes the exception handler or the
-	*    interrupted routine.
-	*
-	* Detailed Design
-	* ---------------
-	*
-	* The emulation routines expect the contents of integer registers r0-r31
-	* to be on the stack at addresses sp, 4(sp), 8(sp), ... 124(sp).  The
-	* routines retrieve source operands from the stack and modify the
-	* destination register's value on the stack prior to the end of the
-	* exception handler.  Then all registers except the destination register
-	* are restored to their previous values.
-	*
-	* The instruction that causes the exception is found at address -4(ea).
-	* The instruction's OP and OPX fields identify the operation to be
-	* performed.
-	*
-	* One instruction, muli, is an I-type instruction that is identified by
-	* an OP field of 0x24.
-	*
-	* muli   AAAAA,BBBBB,IIIIIIIIIIIIIIII,-0x24-
-	*           27    22                6      0    <-- LSB of field
-	*
-	* The remaining emulated instructions are R-type and have an OP field
-	* of 0x3a.  Their OPX fields identify them.
-	*
-	* R-type AAAAA,BBBBB,CCCCC,XXXXXX,NNNNN,-0x3a-
-	*           27    22    17     11     6      0  <-- LSB of field
-	*
-	*
-	* Opcode Encoding.  muli is identified by its OP value.  Then OPX & 0x02
-	* is used to differentiate between the division opcodes and the
-	* remaining multiplication opcodes.
-	*
-	* Instruction   OP      OPX    OPX & 0x02
-	* -----------   ----    ----   ----------
-	* muli          0x24
-	* divu          0x3a    0x24         0
-	* div           0x3a    0x25         0
-	* mul           0x3a    0x27      != 0
-	* mulxuu        0x3a    0x07      != 0
-	* mulxsu        0x3a    0x17      != 0
-	* mulxss        0x3a    0x1f      != 0
-	*/
-
-
-	/*
-	* Save everything on the stack to make it easy for the emulation
-	* routines to retrieve the source register operands.
-	*/
-
-	addi sp, sp, -128
-	stw zero, 0(sp)	/* Save zero on stack to avoid special case for r0. */
-	stw r1, 4(sp)
-	stw r2,  8(sp)
-	stw r3, 12(sp)
-	stw r4, 16(sp)
-	stw r5, 20(sp)
-	stw r6, 24(sp)
-	stw r7, 28(sp)
-	stw r8, 32(sp)
-	stw r9, 36(sp)
-	stw r10, 40(sp)
-	stw r11, 44(sp)
-	stw r12, 48(sp)
-	stw r13, 52(sp)
-	stw r14, 56(sp)
-	stw r15, 60(sp)
-	stw r16, 64(sp)
-	stw r17, 68(sp)
-	stw r18, 72(sp)
-	stw r19, 76(sp)
-	stw r20, 80(sp)
-	stw r21, 84(sp)
-	stw r22, 88(sp)
-	stw r23, 92(sp)
-		/* Don't bother to save et.  It's already been changed. */
-	rdctl r5, estatus
-	stw r5,  100(sp)
-
-	stw gp, 104(sp)
-	stw et, 108(sp)	/* et contains previous sp value. */
-	stw fp, 112(sp)
-	stw ea, 116(sp)
-	stw ra, 120(sp)
-
-
-	/*
-	* Split the instruction into its fields.  We need 4*A, 4*B, and 4*C as
-	* offsets to the stack pointer for access to the stored register values.
-	*/
-	ldw r2,-4(ea)	/* r2 = AAAAA,BBBBB,IIIIIIIIIIIIIIII,PPPPPP */
-	roli r3, r2, 7	/* r3 = BBB,IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BB */
-	roli r4, r3, 3	/* r4 = IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB */
-	roli r5, r4, 2	/* r5 = IIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB,II */
-	srai r4, r4, 16	/* r4 = (sign-extended) IMM16 */
-	roli r6, r5, 5	/* r6 = XXXX,NNNNN,PPPPPP,AAAAA,BBBBB,CCCCC,XX */
-	andi r2, r2, 0x3f	/* r2 = 00000000000000000000000000,PPPPPP */
-	andi r3, r3, 0x7c	/* r3 = 0000000000000000000000000,AAAAA,00 */
-	andi r5, r5, 0x7c	/* r5 = 0000000000000000000000000,BBBBB,00 */
-	andi r6, r6, 0x7c	/* r6 = 0000000000000000000000000,CCCCC,00 */
-
-	/* Now
-	* r2 = OP
-	* r3 = 4*A
-	* r4 = IMM16 (sign extended)
-	* r5 = 4*B
-	* r6 = 4*C
-	*/
-
-	/*
-	* Get the operands.
-	*
-	* It is necessary to check for muli because it uses an I-type
-	* instruction format, while the other instructions are have an R-type
-	* format.
-	*
-	*  Prepare for either multiplication or division loop.
-	*  They both loop 32 times.
-	*/
-	movi r14, 32
-
-	add  r3, r3, sp		/* r3 = address of A-operand. */
-	ldw  r3, 0(r3)		/* r3 = A-operand. */
-	movi r7, 0x24		/* muli opcode (I-type instruction format) */
-	beq r2, r7, mul_immed /* muli doesn't use the B register as a source */
-
-	add  r5, r5, sp		/* r5 = address of B-operand. */
-	ldw  r5, 0(r5)		/* r5 = B-operand. */
-				/* r4 = SSSSSSSSSSSSSSSS,-----IMM16------ */
-				/* IMM16 not needed, align OPX portion */
-				/* r4 = SSSSSSSSSSSSSSSS,CCCCC,-OPX--,00000 */
-	srli r4, r4, 5		/* r4 = 00000,SSSSSSSSSSSSSSSS,CCCCC,-OPX-- */
-	andi r4, r4, 0x3f	/* r4 = 00000000000000000000000000,-OPX-- */
-
-	/* Now
-	* r2 = OP
-	* r3 = src1
-	* r5 = src2
-	* r4 = OPX (no longer can be muli)
-	* r6 = 4*C
-	*/
-
-
-	/*
-	*  Multiply or Divide?
-	*/
-	andi r7, r4, 0x02	/* For R-type multiply instructions,
-				   OPX & 0x02 != 0 */
-	bne r7, zero, multiply
-
-
-	/* DIVISION
-	*
-	* Divide an unsigned dividend by an unsigned divisor using
-	* a shift-and-subtract algorithm.  The example below shows
-	* 43 div 7 = 6 for 8-bit integers.  This classic algorithm uses a
-	* single register to store both the dividend and the quotient,
-	* allowing both values to be shifted with a single instruction.
-	*
-	*                               remainder dividend:quotient
-	*                               --------- -----------------
-	*   initialize                   00000000     00101011:
-	*   shift                        00000000     0101011:_
-	*   remainder >= divisor? no     00000000     0101011:0
-	*   shift                        00000000     101011:0_
-	*   remainder >= divisor? no     00000000     101011:00
-	*   shift                        00000001     01011:00_
-	*   remainder >= divisor? no     00000001     01011:000
-	*   shift                        00000010     1011:000_
-	*   remainder >= divisor? no     00000010     1011:0000
-	*   shift                        00000101     011:0000_
-	*   remainder >= divisor? no     00000101     011:00000
-	*   shift                        00001010     11:00000_
-	*   remainder >= divisor? yes    00001010     11:000001
-	*       remainder -= divisor   - 00000111
-	*                              ----------
-	*                                00000011     11:000001
-	*   shift                        00000111     1:000001_
-	*   remainder >= divisor? yes    00000111     1:0000011
-	*       remainder -= divisor   - 00000111
-	*                              ----------
-	*                                00000000     1:0000011
-	*   shift                        00000001     :0000011_
-	*   remainder >= divisor? no     00000001     :00000110
-	*
-	* The quotient is 00000110.
-	*/
-
-divide:
-	/*
-	*  Prepare for division by assuming the result
-	*  is unsigned, and storing its "sign" as 0.
-	*/
-	movi r17, 0
-
-
-	/* Which division opcode? */
-	xori r7, r4, 0x25		/* OPX of div */
-	bne r7, zero, unsigned_division
-
-
-	/*
-	*  OPX is div.  Determine and store the sign of the quotient.
-	*  Then take the absolute value of both operands.
-	*/
-	xor r17, r3, r5		/* MSB contains sign of quotient */
-	bge r3,zero,dividend_is_nonnegative
-	sub r3, zero, r3	/* -r3 */
-dividend_is_nonnegative:
-	bge r5, zero, divisor_is_nonnegative
-	sub r5, zero, r5	/* -r5 */
-divisor_is_nonnegative:
-
-
-unsigned_division:
-	/* Initialize the unsigned-division loop. */
-	movi r13, 0	/* remainder = 0 */
-
-	/* Now
-	* r3 = dividend : quotient
-	* r4 = 0x25 for div, 0x24 for divu
-	* r5 = divisor
-	* r13 = remainder
-	* r14 = loop counter (already initialized to 32)
-	* r17 = MSB contains sign of quotient
-	*/
-
-
-	/*
-	*   for (count = 32; count > 0; --count)
-	*   {
-	*/
-divide_loop:
-
-	/*
-	*       Division:
-	*
-	*       (remainder:dividend:quotient) <<= 1;
-	*/
-	slli r13, r13, 1
-	cmplt r7, r3, zero	/* r7 = MSB of r3 */
-	or r13, r13, r7
-	slli r3, r3, 1
-
-
-	/*
-	*       if (remainder >= divisor)
-	*       {
-	*           set LSB of quotient
-	*           remainder -= divisor;
-	*       }
-	*/
-	bltu r13, r5, div_skip
-	ori r3, r3, 1
-	sub r13, r13, r5
-div_skip:
-
-	/*
-	*   }
-	*/
-	subi r14, r14, 1
-	bne r14, zero, divide_loop
-
-
-	/* Now
-	* r3 = quotient
-	* r4 = 0x25 for div, 0x24 for divu
-	* r6 = 4*C
-	* r17 = MSB contains sign of quotient
-	*/
-
-
-	/*
-	*  Conditionally negate signed quotient.  If quotient is unsigned,
-	*  the sign already is initialized to 0.
-	*/
-	bge r17, zero, quotient_is_nonnegative
-	sub r3, zero, r3		/* -r3 */
-	quotient_is_nonnegative:
-
-
-	/*
-	*  Final quotient is in r3.
-	*/
-	add r6, r6, sp
-	stw r3, 0(r6)	/* write quotient to stack */
-	br restore_registers
-
-
-
-
-	/* MULTIPLICATION
-	*
-	* A "product" is the number that one gets by summing a "multiplicand"
-	* several times.  The "multiplier" specifies the number of copies of the
-	* multiplicand that are summed.
-	*
-	* Actual multiplication algorithms don't use repeated addition, however.
-	* Shift-and-add algorithms get the same answer as repeated addition, and
-	* they are faster.  To compute the lower half of a product (pppp below)
-	* one shifts the product left before adding in each of the partial
-	* products (a * mmmm) through (d * mmmm).
-	*
-	* To compute the upper half of a product (PPPP below), one adds in the
-	* partial products (d * mmmm) through (a * mmmm), each time following
-	* the add by a right shift of the product.
-	*
-	*     mmmm
-	*   * abcd
-	*   ------
-	*     ####  = d * mmmm
-	*    ####   = c * mmmm
-	*   ####    = b * mmmm
-	*  ####     = a * mmmm
-	* --------
-	* PPPPpppp
-	*
-	* The example above shows 4 partial products.  Computing actual Nios II
-	* products requires 32 partials.
-	*
-	* It is possible to compute the result of mulxsu from the result of
-	* mulxuu because the only difference between the results of these two
-	* opcodes is the value of the partial product associated with the sign
-	* bit of rA.
-	*
-	*   mulxsu = mulxuu - (rA < 0) ? rB : 0;
-	*
-	* It is possible to compute the result of mulxss from the result of
-	* mulxsu because the only difference between the results of these two
-	* opcodes is the value of the partial product associated with the sign
-	* bit of rB.
-	*
-	*   mulxss = mulxsu - (rB < 0) ? rA : 0;
-	*
-	*/
-
-mul_immed:
-	/* Opcode is muli.  Change it into mul for remainder of algorithm. */
-	mov r6, r5		/* Field B is dest register, not field C. */
-	mov r5, r4		/* Field IMM16 is src2, not field B. */
-	movi r4, 0x27		/* OPX of mul is 0x27 */
-
-multiply:
-	/* Initialize the multiplication loop. */
-	movi r9, 0	/* mul_product    = 0 */
-	movi r10, 0	/* mulxuu_product = 0 */
-	mov r11, r5	/* save original multiplier for mulxsu and mulxss */
-	mov r12, r5	/* mulxuu_multiplier (will be shifted) */
-	movi r16, 1	/* used to create "rori B,A,1" from "ror B,A,r16" */
-
-	/* Now
-	* r3 = multiplicand
-	* r5 = mul_multiplier
-	* r6 = 4 * dest_register (used later as offset to sp)
-	* r7 = temp
-	* r9 = mul_product
-	* r10 = mulxuu_product
-	* r11 = original multiplier
-	* r12 = mulxuu_multiplier
-	* r14 = loop counter (already initialized)
-	* r16 = 1
-	*/
-
-
-	/*
-	*   for (count = 32; count > 0; --count)
-	*   {
-	*/
-multiply_loop:
-
-	/*
-	*       mul_product <<= 1;
-	*       lsb = multiplier & 1;
-	*/
-	slli r9, r9, 1
-	andi r7, r12, 1
-
-	/*
-	*       if (lsb == 1)
-	*       {
-	*           mulxuu_product += multiplicand;
-	*       }
-	*/
-	beq r7, zero, mulx_skip
-	add r10, r10, r3
-	cmpltu r7, r10, r3 /* Save the carry from the MSB of mulxuu_product. */
-	ror r7, r7, r16	/* r7 = 0x80000000 on carry, or else 0x00000000 */
-mulx_skip:
-
-	/*
-	*       if (MSB of mul_multiplier == 1)
-	*       {
-	*           mul_product += multiplicand;
-	*       }
-	*/
-	bge r5, zero, mul_skip
-	add r9, r9, r3
-mul_skip:
-
-	/*
-	*       mulxuu_product >>= 1;           logical shift
-	*       mul_multiplier <<= 1;           done with MSB
-	*       mulx_multiplier >>= 1;          done with LSB
-	*/
-	srli r10, r10, 1
-	or r10, r10, r7		/* OR in the saved carry bit. */
-	slli r5, r5, 1
-	srli r12, r12, 1
-
-
-	/*
-	*   }
-	*/
-	subi r14, r14, 1
-	bne r14, zero, multiply_loop
-
-
-	/*
-	*  Multiply emulation loop done.
-	*/
-
-	/* Now
-	* r3 = multiplicand
-	* r4 = OPX
-	* r6 = 4 * dest_register (used later as offset to sp)
-	* r7 = temp
-	* r9 = mul_product
-	* r10 = mulxuu_product
-	* r11 = original multiplier
-	*/
-
-
-	/* Calculate address for result from 4 * dest_register */
-	add r6, r6, sp
-
-
-	/*
-	* Select/compute the result based on OPX.
-	*/
-
-
-	/* OPX == mul?  Then store. */
-	xori r7, r4, 0x27
-	beq r7, zero, store_product
-
-	/* It's one of the mulx.. opcodes.  Move over the result. */
-	mov r9, r10
-
-	/* OPX == mulxuu?  Then store. */
-	xori r7, r4, 0x07
-	beq r7, zero, store_product
-
-	/* Compute mulxsu
-	 *
-	 * mulxsu = mulxuu - (rA < 0) ? rB : 0;
-	 */
-	bge r3, zero, mulxsu_skip
-	sub r9, r9, r11
-mulxsu_skip:
-
-	/* OPX == mulxsu?  Then store. */
-	xori r7, r4, 0x17
-	beq r7, zero, store_product
-
-	/* Compute mulxss
-	 *
-	 * mulxss = mulxsu - (rB < 0) ? rA : 0;
-	 */
-	bge r11,zero,mulxss_skip
-	sub r9, r9, r3
-mulxss_skip:
-	/* At this point, assume that OPX is mulxss, so store*/
-
-
-store_product:
-	stw r9, 0(r6)
-
-
-restore_registers:
-			/* No need to restore r0. */
-	ldw r5, 100(sp)
-	wrctl estatus, r5
-
-	ldw r1, 4(sp)
-	ldw r2, 8(sp)
-	ldw r3, 12(sp)
-	ldw r4, 16(sp)
-	ldw r5, 20(sp)
-	ldw r6, 24(sp)
-	ldw r7, 28(sp)
-	ldw r8, 32(sp)
-	ldw r9, 36(sp)
-	ldw r10, 40(sp)
-	ldw r11, 44(sp)
-	ldw r12, 48(sp)
-	ldw r13, 52(sp)
-	ldw r14, 56(sp)
-	ldw r15, 60(sp)
-	ldw r16, 64(sp)
-	ldw r17, 68(sp)
-	ldw r18, 72(sp)
-	ldw r19, 76(sp)
-	ldw r20, 80(sp)
-	ldw r21, 84(sp)
-	ldw r22, 88(sp)
-	ldw r23, 92(sp)
-			/* Does not need to restore et */
-	ldw gp, 104(sp)
-
-	ldw fp, 112(sp)
-	ldw ea, 116(sp)
-	ldw ra, 120(sp)
-	ldw sp, 108(sp)	/* last restore sp */
-	eret
-
-.set at
-.set break
diff --git a/arch/nios2/kernel/vmlinux.lds.S b/arch/nios2/kernel/vmlinux.lds.S
deleted file mode 100644
index 6ad64f14617d3d5e09db4887967b5013fad1d632..0000000000000000000000000000000000000000
--- a/arch/nios2/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2009 Thomas Chou <thomas@wytron.com.tw>
- */
-#include <asm/page.h>
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-
-OUTPUT_FORMAT("elf32-littlenios2", "elf32-littlenios2", "elf32-littlenios2")
-
-OUTPUT_ARCH(nios)
-ENTRY(_start)	/* Defined in head.S */
-
-jiffies = jiffies_64;
-
-SECTIONS
-{
-	. = CONFIG_NIOS2_MEM_BASE | CONFIG_NIOS2_KERNEL_REGION_BASE;
-
-	_text = .;
-	_stext = .;
-	HEAD_TEXT_SECTION
-	.text : {
-		TEXT_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-		IRQENTRY_TEXT
-		SOFTIRQENTRY_TEXT
-		KPROBES_TEXT
-	} =0
-	_etext = .;
-
-	.got : {
-		*(.got.plt)
-		*(.igot.plt)
-		*(.got)
-		*(.igot)
-	}
-
-	EXCEPTION_TABLE(L1_CACHE_BYTES)
-
-	. = ALIGN(PAGE_SIZE);
-	__init_begin = .;
-	INIT_TEXT_SECTION(PAGE_SIZE)
-	INIT_DATA_SECTION(PAGE_SIZE)
-	PERCPU_SECTION(L1_CACHE_BYTES)
-	__init_end = .;
-
-	_sdata = .;
-	RO_DATA_SECTION(PAGE_SIZE)
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
-	_edata = .;
-
-	BSS_SECTION(0, 0, 0)
-	_end = .;
-
-	STABS_DEBUG
-	DWARF_DEBUG
-	NOTES
-
-	DISCARDS
-}
diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S
deleted file mode 100644
index c6481cfc5220f841b2d0cecdd4168502784a5919..0000000000000000000000000000000000000000
--- a/arch/openrisc/kernel/entry.S
+++ /dev/null
@@ -1,1214 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * OpenRISC entry.S
- *
- * Linux architectural port borrowing liberally from similar works of
- * others.  All original copyrights apply as per the original source
- * declaration.
- *
- * Modifications for the OpenRISC architecture:
- * Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
- * Copyright (C) 2005 Gyorgy Jeney <nog@bsemi.com>
- * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
- */
-
-#include <linux/linkage.h>
-
-#include <asm/processor.h>
-#include <asm/unistd.h>
-#include <asm/thread_info.h>
-#include <asm/errno.h>
-#include <asm/spr_defs.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/asm-offsets.h>
-
-#define DISABLE_INTERRUPTS(t1,t2)			\
-	l.mfspr t2,r0,SPR_SR				;\
-	l.movhi	t1,hi(~(SPR_SR_IEE|SPR_SR_TEE))		;\
-	l.ori	t1,t1,lo(~(SPR_SR_IEE|SPR_SR_TEE))	;\
-	l.and   t2,t2,t1				;\
-	l.mtspr r0,t2,SPR_SR
-
-#define ENABLE_INTERRUPTS(t1)				\
-	l.mfspr	t1,r0,SPR_SR				;\
-	l.ori	t1,t1,lo(SPR_SR_IEE|SPR_SR_TEE)		;\
-	l.mtspr	r0,t1,SPR_SR
-
-/* =========================================================[ macros ]=== */
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-/*
- * Trace irq on/off creating a stack frame.
- */
-#define TRACE_IRQS_OP(trace_op)					\
-	l.sw 	-8(r1),r2	/* store frame pointer */		;\
-	l.sw	-4(r1),r9	/* store return address */		;\
-	l.addi	r2,r1,0		/* move sp to fp */			;\
-	l.jal	trace_op						;\
-	 l.addi	r1,r1,-8						;\
-	l.ori	r1,r2,0		/* restore sp */			;\
-	l.lwz	r9,-4(r1)	/* restore return address */		;\
-	l.lwz	r2,-8(r1)	/* restore fp */			;\
-/*
- * Trace irq on/off and save registers we need that would otherwise be
- * clobbered.
- */
-#define TRACE_IRQS_SAVE(t1,trace_op)					\
-	l.sw	-12(r1),t1	/* save extra reg */			;\
-	l.sw 	-8(r1),r2	/* store frame pointer */		;\
-	l.sw	-4(r1),r9	/* store return address */		;\
-	l.addi	r2,r1,0		/* move sp to fp */			;\
-	l.jal	trace_op						;\
-	 l.addi	r1,r1,-12						;\
-	l.ori	r1,r2,0		/* restore sp */			;\
-	l.lwz	r9,-4(r1)	/* restore return address */		;\
-	l.lwz	r2,-8(r1)	/* restore fp */			;\
-	l.lwz	t1,-12(r1)	/* restore extra reg */
-
-#define TRACE_IRQS_OFF	TRACE_IRQS_OP(trace_hardirqs_off)
-#define TRACE_IRQS_ON	TRACE_IRQS_OP(trace_hardirqs_on)
-#define TRACE_IRQS_ON_SYSCALL						\
-	TRACE_IRQS_SAVE(r10,trace_hardirqs_on)				;\
-	l.lwz	r3,PT_GPR3(r1)						;\
-	l.lwz	r4,PT_GPR4(r1)						;\
-	l.lwz	r5,PT_GPR5(r1)						;\
-	l.lwz	r6,PT_GPR6(r1)						;\
-	l.lwz	r7,PT_GPR7(r1)						;\
-	l.lwz	r8,PT_GPR8(r1)						;\
-	l.lwz	r11,PT_GPR11(r1)
-#define TRACE_IRQS_OFF_ENTRY						\
-	l.lwz	r5,PT_SR(r1)						;\
-	l.andi	r3,r5,(SPR_SR_IEE|SPR_SR_TEE)				;\
-	l.sfeq	r5,r0		/* skip trace if irqs were already off */;\
-	l.bf	1f							;\
-	 l.nop								;\
-	TRACE_IRQS_SAVE(r4,trace_hardirqs_off)				;\
-1:
-#else
-#define TRACE_IRQS_OFF
-#define TRACE_IRQS_ON
-#define TRACE_IRQS_OFF_ENTRY
-#define TRACE_IRQS_ON_SYSCALL
-#endif
-
-/*
- * We need to disable interrupts at beginning of RESTORE_ALL
- * since interrupt might come in after we've loaded EPC return address
- * and overwrite EPC with address somewhere in RESTORE_ALL
- * which is of course wrong!
- */
-
-#define RESTORE_ALL						\
-	DISABLE_INTERRUPTS(r3,r4)				;\
-	l.lwz   r3,PT_PC(r1)					;\
-	l.mtspr r0,r3,SPR_EPCR_BASE				;\
-	l.lwz   r3,PT_SR(r1)					;\
-	l.mtspr r0,r3,SPR_ESR_BASE				;\
-	l.lwz   r2,PT_GPR2(r1)					;\
-	l.lwz   r3,PT_GPR3(r1)					;\
-	l.lwz   r4,PT_GPR4(r1)					;\
-	l.lwz   r5,PT_GPR5(r1)					;\
-	l.lwz   r6,PT_GPR6(r1)					;\
-	l.lwz   r7,PT_GPR7(r1)					;\
-	l.lwz   r8,PT_GPR8(r1)					;\
-	l.lwz   r9,PT_GPR9(r1)					;\
-	l.lwz   r10,PT_GPR10(r1)					;\
-	l.lwz   r11,PT_GPR11(r1)					;\
-	l.lwz   r12,PT_GPR12(r1)					;\
-	l.lwz   r13,PT_GPR13(r1)					;\
-	l.lwz   r14,PT_GPR14(r1)					;\
-	l.lwz   r15,PT_GPR15(r1)					;\
-	l.lwz   r16,PT_GPR16(r1)					;\
-	l.lwz   r17,PT_GPR17(r1)					;\
-	l.lwz   r18,PT_GPR18(r1)					;\
-	l.lwz   r19,PT_GPR19(r1)					;\
-	l.lwz   r20,PT_GPR20(r1)					;\
-	l.lwz   r21,PT_GPR21(r1)					;\
-	l.lwz   r22,PT_GPR22(r1)					;\
-	l.lwz   r23,PT_GPR23(r1)					;\
-	l.lwz   r24,PT_GPR24(r1)					;\
-	l.lwz   r25,PT_GPR25(r1)					;\
-	l.lwz   r26,PT_GPR26(r1)					;\
-	l.lwz   r27,PT_GPR27(r1)					;\
-	l.lwz   r28,PT_GPR28(r1)					;\
-	l.lwz   r29,PT_GPR29(r1)					;\
-	l.lwz   r30,PT_GPR30(r1)					;\
-	l.lwz   r31,PT_GPR31(r1)					;\
-	l.lwz   r1,PT_SP(r1)					;\
-	l.rfe
-
-
-#define EXCEPTION_ENTRY(handler)				\
-	.global	handler						;\
-handler:							;\
-	/* r1, EPCR, ESR a already saved */			;\
-	l.sw	PT_GPR2(r1),r2					;\
-	l.sw    PT_GPR3(r1),r3					;\
-	/* r4 already save */					;\
-	l.sw    PT_GPR5(r1),r5					;\
-	l.sw    PT_GPR6(r1),r6					;\
-	l.sw    PT_GPR7(r1),r7					;\
-	l.sw    PT_GPR8(r1),r8					;\
-	l.sw    PT_GPR9(r1),r9					;\
-	/* r10 already saved */					;\
-	l.sw    PT_GPR11(r1),r11					;\
-	/* r12 already saved */					;\
-	l.sw    PT_GPR13(r1),r13					;\
-	l.sw    PT_GPR14(r1),r14					;\
-	l.sw    PT_GPR15(r1),r15					;\
-	l.sw    PT_GPR16(r1),r16					;\
-	l.sw    PT_GPR17(r1),r17					;\
-	l.sw    PT_GPR18(r1),r18					;\
-	l.sw    PT_GPR19(r1),r19					;\
-	l.sw    PT_GPR20(r1),r20					;\
-	l.sw    PT_GPR21(r1),r21					;\
-	l.sw    PT_GPR22(r1),r22					;\
-	l.sw    PT_GPR23(r1),r23					;\
-	l.sw    PT_GPR24(r1),r24					;\
-	l.sw    PT_GPR25(r1),r25					;\
-	l.sw    PT_GPR26(r1),r26					;\
-	l.sw    PT_GPR27(r1),r27					;\
-	l.sw    PT_GPR28(r1),r28					;\
-	l.sw    PT_GPR29(r1),r29					;\
-	/* r30 already save */					;\
-/*        l.sw    PT_GPR30(r1),r30*/					;\
-	l.sw    PT_GPR31(r1),r31					;\
-	TRACE_IRQS_OFF_ENTRY						;\
-	/* Store -1 in orig_gpr11 for non-syscall exceptions */	;\
-	l.addi	r30,r0,-1					;\
-	l.sw	PT_ORIG_GPR11(r1),r30
-
-#define UNHANDLED_EXCEPTION(handler,vector)			\
-	.global	handler						;\
-handler:							;\
-	/* r1, EPCR, ESR already saved */			;\
-	l.sw    PT_GPR2(r1),r2					;\
-	l.sw    PT_GPR3(r1),r3					;\
-	l.sw    PT_GPR5(r1),r5					;\
-	l.sw    PT_GPR6(r1),r6					;\
-	l.sw    PT_GPR7(r1),r7					;\
-	l.sw    PT_GPR8(r1),r8					;\
-	l.sw    PT_GPR9(r1),r9					;\
-	/* r10 already saved */					;\
-	l.sw    PT_GPR11(r1),r11					;\
-	/* r12 already saved */					;\
-	l.sw    PT_GPR13(r1),r13					;\
-	l.sw    PT_GPR14(r1),r14					;\
-	l.sw    PT_GPR15(r1),r15					;\
-	l.sw    PT_GPR16(r1),r16					;\
-	l.sw    PT_GPR17(r1),r17					;\
-	l.sw    PT_GPR18(r1),r18					;\
-	l.sw    PT_GPR19(r1),r19					;\
-	l.sw    PT_GPR20(r1),r20					;\
-	l.sw    PT_GPR21(r1),r21					;\
-	l.sw    PT_GPR22(r1),r22					;\
-	l.sw    PT_GPR23(r1),r23					;\
-	l.sw    PT_GPR24(r1),r24					;\
-	l.sw    PT_GPR25(r1),r25					;\
-	l.sw    PT_GPR26(r1),r26					;\
-	l.sw    PT_GPR27(r1),r27					;\
-	l.sw    PT_GPR28(r1),r28					;\
-	l.sw    PT_GPR29(r1),r29					;\
-	/* r31 already saved */					;\
-	l.sw    PT_GPR30(r1),r30					;\
-/*        l.sw    PT_GPR31(r1),r31	*/				;\
-	/* Store -1 in orig_gpr11 for non-syscall exceptions */	;\
-	l.addi	r30,r0,-1					;\
-	l.sw	PT_ORIG_GPR11(r1),r30				;\
-	l.addi	r3,r1,0						;\
-	/* r4 is exception EA */				;\
-	l.addi	r5,r0,vector					;\
-	l.jal	unhandled_exception				;\
-	 l.nop							;\
-	l.j	_ret_from_exception				;\
-	 l.nop
-
-/* clobbers 'reg' */
-#define CLEAR_LWA_FLAG(reg)		\
-	l.movhi	reg,hi(lwa_flag)	;\
-	l.ori	reg,reg,lo(lwa_flag)	;\
-	l.sw	0(reg),r0
-/*
- * NOTE: one should never assume that SPR_EPC, SPR_ESR, SPR_EEAR
- *       contain the same values as when exception we're handling
- *	 occured. in fact they never do. if you need them use
- *	 values saved on stack (for SPR_EPC, SPR_ESR) or content
- *       of r4 (for SPR_EEAR). for details look at EXCEPTION_HANDLE()
- *       in 'arch/openrisc/kernel/head.S'
- */
-
-/* =====================================================[ exceptions] === */
-
-/* ---[ 0x100: RESET exception ]----------------------------------------- */
-
-EXCEPTION_ENTRY(_tng_kernel_start)
-	l.jal	_start
-	 l.andi r0,r0,0
-
-/* ---[ 0x200: BUS exception ]------------------------------------------- */
-
-EXCEPTION_ENTRY(_bus_fault_handler)
-	CLEAR_LWA_FLAG(r3)
-	/* r4: EA of fault (set by EXCEPTION_HANDLE) */
-	l.jal   do_bus_fault
-	 l.addi  r3,r1,0 /* pt_regs */
-
-	l.j     _ret_from_exception
-	 l.nop
-
-/* ---[ 0x300: Data Page Fault exception ]------------------------------- */
-EXCEPTION_ENTRY(_dtlb_miss_page_fault_handler)
-	CLEAR_LWA_FLAG(r3)
-	l.and	r5,r5,r0
-	l.j	1f
-	 l.nop
-
-EXCEPTION_ENTRY(_data_page_fault_handler)
-	CLEAR_LWA_FLAG(r3)
-	/* set up parameters for do_page_fault */
-	l.ori	r5,r0,0x300		   // exception vector
-1:
-	l.addi  r3,r1,0                    // pt_regs
-	/* r4 set be EXCEPTION_HANDLE */   // effective address of fault
-
-#ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX
-	l.lwz   r6,PT_PC(r3)               // address of an offending insn
-	l.lwz   r6,0(r6)                   // instruction that caused pf
-
-	l.srli  r6,r6,26                   // check opcode for jump insn
-	l.sfeqi r6,0                       // l.j
-	l.bf    8f
-	l.sfeqi r6,1                       // l.jal
-	l.bf    8f
-	l.sfeqi r6,3                       // l.bnf
-	l.bf    8f
-	l.sfeqi r6,4                       // l.bf
-	l.bf    8f
-	l.sfeqi r6,0x11                    // l.jr
-	l.bf    8f
-	l.sfeqi r6,0x12                    // l.jalr
-	l.bf    8f
-	 l.nop
-
-	l.j     9f
-	 l.nop
-
-8: // offending insn is in delay slot
-	l.lwz   r6,PT_PC(r3)               // address of an offending insn
-	l.addi  r6,r6,4
-	l.lwz   r6,0(r6)                   // instruction that caused pf
-	l.srli  r6,r6,26                   // get opcode
-9: // offending instruction opcode loaded in r6
-
-#else
-
-	l.mfspr r6,r0,SPR_SR               // SR
-	l.andi  r6,r6,SPR_SR_DSX           // check for delay slot exception
-	l.sfne  r6,r0                      // exception happened in delay slot
-	l.bnf   7f
-	 l.lwz  r6,PT_PC(r3)               // address of an offending insn
-
-	l.addi	r6,r6,4                    // offending insn is in delay slot
-7:
-	l.lwz   r6,0(r6)                   // instruction that caused pf
-	l.srli  r6,r6,26                   // check opcode for write access
-#endif
-
-	l.sfgeui r6,0x33                   // check opcode for write access
-	l.bnf   1f
-	l.sfleui r6,0x37
-	l.bnf   1f
-	l.ori   r6,r0,0x1                  // write access
-	l.j     2f
-	 l.nop
-1:	l.ori   r6,r0,0x0                  // !write access
-2:
-
-	/* call fault.c handler in or32/mm/fault.c */
-	l.jal   do_page_fault
-	 l.nop
-	l.j     _ret_from_exception
-	 l.nop
-
-/* ---[ 0x400: Insn Page Fault exception ]------------------------------- */
-EXCEPTION_ENTRY(_itlb_miss_page_fault_handler)
-	CLEAR_LWA_FLAG(r3)
-	l.and	r5,r5,r0
-	l.j	1f
-	 l.nop
-
-EXCEPTION_ENTRY(_insn_page_fault_handler)
-	CLEAR_LWA_FLAG(r3)
-	/* set up parameters for do_page_fault */
-	l.ori	r5,r0,0x400		   // exception vector
-1:
-	l.addi  r3,r1,0                    // pt_regs
-	/* r4 set be EXCEPTION_HANDLE */   // effective address of fault
-	l.ori	r6,r0,0x0		   // !write access
-
-	/* call fault.c handler in or32/mm/fault.c */
-	l.jal   do_page_fault
-	 l.nop
-	l.j     _ret_from_exception
-	 l.nop
-
-
-/* ---[ 0x500: Timer exception ]----------------------------------------- */
-
-EXCEPTION_ENTRY(_timer_handler)
-	CLEAR_LWA_FLAG(r3)
-	l.jal	timer_interrupt
-	 l.addi r3,r1,0 /* pt_regs */
-
-	l.j    _ret_from_intr
-	 l.nop
-
-/* ---[ 0x600: Alignment exception ]-------------------------------------- */
-
-EXCEPTION_ENTRY(_alignment_handler)
-	CLEAR_LWA_FLAG(r3)
-	/* r4: EA of fault (set by EXCEPTION_HANDLE) */
-	l.jal   do_unaligned_access
-	 l.addi  r3,r1,0 /* pt_regs */
-
-	l.j     _ret_from_exception
-	 l.nop
-
-#if 0
-EXCEPTION_ENTRY(_alignment_handler)
-//        l.mfspr r2,r0,SPR_EEAR_BASE     /* Load the effective address */
-	l.addi	r2,r4,0
-//        l.mfspr r5,r0,SPR_EPCR_BASE     /* Load the insn address */
-	l.lwz   r5,PT_PC(r1)
-
-	l.lwz   r3,0(r5)                /* Load insn */
-	l.srli  r4,r3,26                /* Shift left to get the insn opcode */
-
-	l.sfeqi r4,0x00                 /* Check if the load/store insn is in delay slot */
-	l.bf    jmp
-	l.sfeqi r4,0x01
-	l.bf    jmp
-	l.sfeqi r4,0x03
-	l.bf    jmp
-	l.sfeqi r4,0x04
-	l.bf    jmp
-	l.sfeqi r4,0x11
-	l.bf    jr
-	l.sfeqi r4,0x12
-	l.bf    jr
-	l.nop
-	l.j     1f
-	l.addi  r5,r5,4                 /* Increment PC to get return insn address */
-
-jmp:
-	l.slli  r4,r3,6                 /* Get the signed extended jump length */
-	l.srai  r4,r4,4
-
-	l.lwz   r3,4(r5)                /* Load the real load/store insn */
-
-	l.add   r5,r5,r4                /* Calculate jump target address */
-
-	l.j     1f
-	l.srli  r4,r3,26                /* Shift left to get the insn opcode */
-
-jr:
-	l.slli  r4,r3,9                 /* Shift to get the reg nb */
-	l.andi  r4,r4,0x7c
-
-	l.lwz   r3,4(r5)                /* Load the real load/store insn */
-
-	l.add   r4,r4,r1                /* Load the jump register value from the stack */
-	l.lwz   r5,0(r4)
-
-	l.srli  r4,r3,26                /* Shift left to get the insn opcode */
-
-
-1:
-//	  l.mtspr r0,r5,SPR_EPCR_BASE
-	l.sw	PT_PC(r1),r5
-
-	l.sfeqi r4,0x26
-	l.bf    lhs
-	l.sfeqi r4,0x25
-	l.bf    lhz
-	l.sfeqi r4,0x22
-	l.bf    lws
-	l.sfeqi r4,0x21
-	l.bf    lwz
-	l.sfeqi r4,0x37
-	l.bf    sh
-	l.sfeqi r4,0x35
-	l.bf    sw
-	l.nop
-
-1:      l.j     1b                      /* I don't know what to do */
-	l.nop
-
-lhs:    l.lbs   r5,0(r2)
-	l.slli  r5,r5,8
-	l.lbz   r6,1(r2)
-	l.or    r5,r5,r6
-	l.srli  r4,r3,19
-	l.andi  r4,r4,0x7c
-	l.add   r4,r4,r1
-	l.j     align_end
-	l.sw    0(r4),r5
-
-lhz:    l.lbz   r5,0(r2)
-	l.slli  r5,r5,8
-	l.lbz   r6,1(r2)
-	l.or    r5,r5,r6
-	l.srli  r4,r3,19
-	l.andi  r4,r4,0x7c
-	l.add   r4,r4,r1
-	l.j     align_end
-	l.sw    0(r4),r5
-
-lws:    l.lbs   r5,0(r2)
-	l.slli  r5,r5,24
-	l.lbz   r6,1(r2)
-	l.slli  r6,r6,16
-	l.or    r5,r5,r6
-	l.lbz   r6,2(r2)
-	l.slli  r6,r6,8
-	l.or    r5,r5,r6
-	l.lbz   r6,3(r2)
-	l.or    r5,r5,r6
-	l.srli  r4,r3,19
-	l.andi  r4,r4,0x7c
-	l.add   r4,r4,r1
-	l.j     align_end
-	l.sw    0(r4),r5
-
-lwz:    l.lbz   r5,0(r2)
-	l.slli  r5,r5,24
-	l.lbz   r6,1(r2)
-	l.slli  r6,r6,16
-	l.or    r5,r5,r6
-	l.lbz   r6,2(r2)
-	l.slli  r6,r6,8
-	l.or    r5,r5,r6
-	l.lbz   r6,3(r2)
-	l.or    r5,r5,r6
-	l.srli  r4,r3,19
-	l.andi  r4,r4,0x7c
-	l.add   r4,r4,r1
-	l.j     align_end
-	l.sw    0(r4),r5
-
-sh:
-	l.srli  r4,r3,9
-	l.andi  r4,r4,0x7c
-	l.add   r4,r4,r1
-	l.lwz   r5,0(r4)
-	l.sb    1(r2),r5
-	l.srli  r5,r5,8
-	l.j     align_end
-	l.sb    0(r2),r5
-
-sw:
-	l.srli  r4,r3,9
-	l.andi  r4,r4,0x7c
-	l.add   r4,r4,r1
-	l.lwz   r5,0(r4)
-	l.sb    3(r2),r5
-	l.srli  r5,r5,8
-	l.sb    2(r2),r5
-	l.srli  r5,r5,8
-	l.sb    1(r2),r5
-	l.srli  r5,r5,8
-	l.j     align_end
-	l.sb    0(r2),r5
-
-align_end:
-	l.j    _ret_from_intr
-	l.nop
-#endif
-
-/* ---[ 0x700: Illegal insn exception ]---------------------------------- */
-
-EXCEPTION_ENTRY(_illegal_instruction_handler)
-	/* r4: EA of fault (set by EXCEPTION_HANDLE) */
-	l.jal   do_illegal_instruction
-	 l.addi  r3,r1,0 /* pt_regs */
-
-	l.j     _ret_from_exception
-	 l.nop
-
-/* ---[ 0x800: External interrupt exception ]---------------------------- */
-
-EXCEPTION_ENTRY(_external_irq_handler)
-#ifdef CONFIG_OPENRISC_ESR_EXCEPTION_BUG_CHECK
-	l.lwz	r4,PT_SR(r1)		// were interrupts enabled ?
-	l.andi	r4,r4,SPR_SR_IEE
-	l.sfeqi	r4,0
-	l.bnf	1f			// ext irq enabled, all ok.
-	l.nop
-
-	l.addi  r1,r1,-0x8
-	l.movhi r3,hi(42f)
-	l.ori	r3,r3,lo(42f)
-	l.sw    0x0(r1),r3
-	l.jal   printk
-	l.sw    0x4(r1),r4
-	l.addi  r1,r1,0x8
-
-	.section .rodata, "a"
-42:
-		.string "\n\rESR interrupt bug: in _external_irq_handler (ESR %x)\n\r"
-		.align 4
-	.previous
-
-	l.ori	r4,r4,SPR_SR_IEE	// fix the bug
-//	l.sw	PT_SR(r1),r4
-1:
-#endif
-	CLEAR_LWA_FLAG(r3)
-	l.addi	r3,r1,0
-	l.movhi	r8,hi(do_IRQ)
-	l.ori	r8,r8,lo(do_IRQ)
-	l.jalr r8
-	l.nop
-	l.j    _ret_from_intr
-	l.nop
-
-/* ---[ 0x900: DTLB miss exception ]------------------------------------- */
-
-
-/* ---[ 0xa00: ITLB miss exception ]------------------------------------- */
-
-
-/* ---[ 0xb00: Range exception ]----------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0xb00,0xb00)
-
-/* ---[ 0xc00: Syscall exception ]--------------------------------------- */
-
-/*
- * Syscalls are a special type of exception in that they are
- * _explicitly_ invoked by userspace and can therefore be
- * held to conform to the same ABI as normal functions with
- * respect to whether registers are preserved across the call
- * or not.
- */
-
-/* Upon syscall entry we just save the callee-saved registers
- * and not the call-clobbered ones.
- */
-
-_string_syscall_return:
-	.string "syscall return %ld \n\r\0"
-	.align 4
-
-ENTRY(_sys_call_handler)
-	/* r1, EPCR, ESR a already saved */
-	l.sw	PT_GPR2(r1),r2
-	/* r3-r8 must be saved because syscall restart relies
-	 * on us being able to restart the syscall args... technically
-	 * they should be clobbered, otherwise
-	 */
-	l.sw    PT_GPR3(r1),r3
-	/*
-	 * r4 already saved
-	 * r4 holds the EEAR address of the fault, use it as screatch reg and
-	 * then load the original r4
-	 */
-	CLEAR_LWA_FLAG(r4)
-	l.lwz	r4,PT_GPR4(r1)
-	l.sw    PT_GPR5(r1),r5
-	l.sw    PT_GPR6(r1),r6
-	l.sw    PT_GPR7(r1),r7
-	l.sw    PT_GPR8(r1),r8
-	l.sw    PT_GPR9(r1),r9
-	/* r10 already saved */
-	l.sw    PT_GPR11(r1),r11
-	/* orig_gpr11 must be set for syscalls */
-	l.sw    PT_ORIG_GPR11(r1),r11
-	/* r12,r13 already saved */
-
-	/* r14-r28 (even) aren't touched by the syscall fast path below
-	 * so we don't need to save them.  However, the functions that return
-	 * to userspace via a call to switch() DO need to save these because
-	 * switch() effectively clobbers them... saving these registers for
-	 * such functions is handled in their syscall wrappers (see fork, vfork,
-	 * and clone, below).
-
-	/* r30 is the only register we clobber in the fast path */
-	/* r30 already saved */
-/*	l.sw    PT_GPR30(r1),r30 */
-
-_syscall_check_trace_enter:
-	/* syscalls run with interrupts enabled */
-	TRACE_IRQS_ON_SYSCALL
-	ENABLE_INTERRUPTS(r29)		// enable interrupts, r29 is temp
-
-	/* If TIF_SYSCALL_TRACE is set, then we want to do syscall tracing */
-	l.lwz	r30,TI_FLAGS(r10)
-	l.andi	r30,r30,_TIF_SYSCALL_TRACE
-	l.sfne	r30,r0
-	l.bf	_syscall_trace_enter
-	 l.nop
-
-_syscall_check:
-	/* Ensure that the syscall number is reasonable */
-	l.sfgeui r11,__NR_syscalls
-	l.bf	_syscall_badsys
-	 l.nop
-
-_syscall_call:
-	l.movhi r29,hi(sys_call_table)
-	l.ori   r29,r29,lo(sys_call_table)
-	l.slli  r11,r11,2
-	l.add   r29,r29,r11
-	l.lwz   r29,0(r29)
-
-	l.jalr  r29
-	 l.nop
-
-_syscall_return:
-	/* All syscalls return here... just pay attention to ret_from_fork
-	 * which does it in a round-about way.
-	 */
-	l.sw    PT_GPR11(r1),r11           // save return value
-
-#if 0
-_syscall_debug:
-	l.movhi r3,hi(_string_syscall_return)
-	l.ori   r3,r3,lo(_string_syscall_return)
-	l.ori   r27,r0,1
-	l.sw    -4(r1),r27
-	l.sw    -8(r1),r11
-	l.addi  r1,r1,-8
-	l.movhi r27,hi(printk)
-	l.ori   r27,r27,lo(printk)
-	l.jalr  r27
-	 l.nop
-	l.addi  r1,r1,8
-#endif
-
-_syscall_check_trace_leave:
-	/* r30 is a callee-saved register so this should still hold the
-	 * _TIF_SYSCALL_TRACE flag from _syscall_check_trace_enter above...
-	 * _syscall_trace_leave expects syscall result to be in pt_regs->r11.
-	 */
-	l.sfne	r30,r0
-	l.bf	_syscall_trace_leave
-	 l.nop
-
-/* This is where the exception-return code begins... interrupts need to be
- * disabled the rest of the way here because we can't afford to miss any
- * interrupts that set NEED_RESCHED or SIGNALPENDING... really true? */
-
-_syscall_check_work:
-	/* Here we need to disable interrupts */
-	DISABLE_INTERRUPTS(r27,r29)
-	TRACE_IRQS_OFF
-	l.lwz	r30,TI_FLAGS(r10)
-	l.andi	r30,r30,_TIF_WORK_MASK
-	l.sfne	r30,r0
-
-	l.bnf	_syscall_resume_userspace
-	 l.nop
-
-	/* Work pending follows a different return path, so we need to
-	 * make sure that all the call-saved registers get into pt_regs
-	 * before branching...
-	 */
-	l.sw    PT_GPR14(r1),r14
-	l.sw    PT_GPR16(r1),r16
-	l.sw    PT_GPR18(r1),r18
-	l.sw    PT_GPR20(r1),r20
-	l.sw    PT_GPR22(r1),r22
-	l.sw    PT_GPR24(r1),r24
-	l.sw    PT_GPR26(r1),r26
-	l.sw    PT_GPR28(r1),r28
-
-	/* _work_pending needs to be called with interrupts disabled */
-	l.j	_work_pending
-	 l.nop
-
-_syscall_resume_userspace:
-//	ENABLE_INTERRUPTS(r29)
-
-
-/* This is the hot path for returning to userspace from a syscall.  If there's
- * work to be done and the branch to _work_pending was taken above, then the
- * return to userspace will be done via the normal exception return path...
- * that path restores _all_ registers and will overwrite the "clobbered"
- * registers with whatever garbage is in pt_regs -- that's OK because those
- * registers are clobbered anyway and because the extra work is insignificant
- * in the context of the extra work that _work_pending is doing.
-
-/* Once again, syscalls are special and only guarantee to preserve the
- * same registers as a normal function call */
-
-/* The assumption here is that the registers r14-r28 (even) are untouched and
- * don't need to be restored... be sure that that's really the case!
- */
-
-/* This is still too much... we should only be restoring what we actually
- * clobbered... we should even be using 'scratch' (odd) regs above so that
- * we don't need to restore anything, hardly...
- */
-
-	l.lwz	r2,PT_GPR2(r1)
-
-	/* Restore args */
-	/* r3-r8 are technically clobbered, but syscall restart needs these
-	 * to be restored...
-	 */
-	l.lwz	r3,PT_GPR3(r1)
-	l.lwz	r4,PT_GPR4(r1)
-	l.lwz	r5,PT_GPR5(r1)
-	l.lwz	r6,PT_GPR6(r1)
-	l.lwz	r7,PT_GPR7(r1)
-	l.lwz	r8,PT_GPR8(r1)
-
-	l.lwz	r9,PT_GPR9(r1)
-	l.lwz	r10,PT_GPR10(r1)
-	l.lwz	r11,PT_GPR11(r1)
-
-	/* r30 is the only register we clobber in the fast path */
-	l.lwz	r30,PT_GPR30(r1)
-
-	/* Here we use r13-r19 (odd) as scratch regs */
-	l.lwz   r13,PT_PC(r1)
-	l.lwz   r15,PT_SR(r1)
-	l.lwz	r1,PT_SP(r1)
-	/* Interrupts need to be disabled for setting EPCR and ESR
-	 * so that another interrupt doesn't come in here and clobber
-	 * them before we can use them for our l.rfe */
-	DISABLE_INTERRUPTS(r17,r19)
-	l.mtspr r0,r13,SPR_EPCR_BASE
-	l.mtspr r0,r15,SPR_ESR_BASE
-	l.rfe
-
-/* End of hot path!
- * Keep the below tracing and error handling out of the hot path...
-*/
-
-_syscall_trace_enter:
-	/* Here we pass pt_regs to do_syscall_trace_enter.  Make sure
-	 * that function is really getting all the info it needs as
-	 * pt_regs isn't a complete set of userspace regs, just the
-	 * ones relevant to the syscall...
-	 *
-	 * Note use of delay slot for setting argument.
-	 */
-	l.jal	do_syscall_trace_enter
-	 l.addi	r3,r1,0
-
-	/* Restore arguments (not preserved across do_syscall_trace_enter)
-	 * so that we can do the syscall for real and return to the syscall
-	 * hot path.
-	 */
-	l.lwz	r11,PT_GPR11(r1)
-	l.lwz	r3,PT_GPR3(r1)
-	l.lwz	r4,PT_GPR4(r1)
-	l.lwz	r5,PT_GPR5(r1)
-	l.lwz	r6,PT_GPR6(r1)
-	l.lwz	r7,PT_GPR7(r1)
-
-	l.j	_syscall_check
-	 l.lwz	r8,PT_GPR8(r1)
-
-_syscall_trace_leave:
-	l.jal	do_syscall_trace_leave
-	 l.addi	r3,r1,0
-
-	l.j	_syscall_check_work
-	 l.nop
-
-_syscall_badsys:
-	/* Here we effectively pretend to have executed an imaginary
-	 * syscall that returns -ENOSYS and then return to the regular
-	 * syscall hot path.
-	 * Note that "return value" is set in the delay slot...
-	 */
-	l.j	_syscall_return
-	 l.addi	r11,r0,-ENOSYS
-
-/******* END SYSCALL HANDLING *******/
-
-/* ---[ 0xd00: Trap exception ]------------------------------------------ */
-
-UNHANDLED_EXCEPTION(_vector_0xd00,0xd00)
-
-/* ---[ 0xe00: Trap exception ]------------------------------------------ */
-
-EXCEPTION_ENTRY(_trap_handler)
-	CLEAR_LWA_FLAG(r3)
-	/* r4: EA of fault (set by EXCEPTION_HANDLE) */
-	l.jal   do_trap
-	 l.addi  r3,r1,0 /* pt_regs */
-
-	l.j     _ret_from_exception
-	 l.nop
-
-/* ---[ 0xf00: Reserved exception ]-------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0xf00,0xf00)
-
-/* ---[ 0x1000: Reserved exception ]------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0x1000,0x1000)
-
-/* ---[ 0x1100: Reserved exception ]------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0x1100,0x1100)
-
-/* ---[ 0x1200: Reserved exception ]------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0x1200,0x1200)
-
-/* ---[ 0x1300: Reserved exception ]------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0x1300,0x1300)
-
-/* ---[ 0x1400: Reserved exception ]------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0x1400,0x1400)
-
-/* ---[ 0x1500: Reserved exception ]------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0x1500,0x1500)
-
-/* ---[ 0x1600: Reserved exception ]------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0x1600,0x1600)
-
-/* ---[ 0x1700: Reserved exception ]------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0x1700,0x1700)
-
-/* ---[ 0x1800: Reserved exception ]------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0x1800,0x1800)
-
-/* ---[ 0x1900: Reserved exception ]------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0x1900,0x1900)
-
-/* ---[ 0x1a00: Reserved exception ]------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0x1a00,0x1a00)
-
-/* ---[ 0x1b00: Reserved exception ]------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0x1b00,0x1b00)
-
-/* ---[ 0x1c00: Reserved exception ]------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0x1c00,0x1c00)
-
-/* ---[ 0x1d00: Reserved exception ]------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0x1d00,0x1d00)
-
-/* ---[ 0x1e00: Reserved exception ]------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0x1e00,0x1e00)
-
-/* ---[ 0x1f00: Reserved exception ]------------------------------------- */
-
-UNHANDLED_EXCEPTION(_vector_0x1f00,0x1f00)
-
-/* ========================================================[ return ] === */
-
-_resume_userspace:
-	DISABLE_INTERRUPTS(r3,r4)
-	TRACE_IRQS_OFF
-	l.lwz	r4,TI_FLAGS(r10)
-	l.andi	r13,r4,_TIF_WORK_MASK
-	l.sfeqi	r13,0
-	l.bf	_restore_all
-	 l.nop
-
-_work_pending:
-	l.lwz	r5,PT_ORIG_GPR11(r1)
-	l.sfltsi r5,0
-	l.bnf	1f
-	 l.nop
-	l.andi	r5,r5,0
-1:
-	l.jal	do_work_pending
-	 l.ori	r3,r1,0			/* pt_regs */
-
-	l.sfeqi	r11,0
-	l.bf	_restore_all
-	 l.nop
-	l.sfltsi r11,0
-	l.bnf	1f
-	 l.nop
-	l.and	r11,r11,r0
-	l.ori	r11,r11,__NR_restart_syscall
-	l.j	_syscall_check_trace_enter
-	 l.nop
-1:
-	l.lwz	r11,PT_ORIG_GPR11(r1)
-	/* Restore arg registers */
-	l.lwz	r3,PT_GPR3(r1)
-	l.lwz	r4,PT_GPR4(r1)
-	l.lwz	r5,PT_GPR5(r1)
-	l.lwz	r6,PT_GPR6(r1)
-	l.lwz	r7,PT_GPR7(r1)
-	l.j	_syscall_check_trace_enter
-	 l.lwz	r8,PT_GPR8(r1)
-
-_restore_all:
-#ifdef CONFIG_TRACE_IRQFLAGS
-	l.lwz	r4,PT_SR(r1)
-	l.andi	r3,r4,(SPR_SR_IEE|SPR_SR_TEE)
-	l.sfeq	r3,r0		/* skip trace if irqs were off */
-	l.bf	skip_hardirqs_on
-	 l.nop
-	TRACE_IRQS_ON
-skip_hardirqs_on:
-#endif
-	RESTORE_ALL
-	/* This returns to userspace code */
-
-
-ENTRY(_ret_from_intr)
-ENTRY(_ret_from_exception)
-	l.lwz	r4,PT_SR(r1)
-	l.andi	r3,r4,SPR_SR_SM
-	l.sfeqi	r3,0
-	l.bnf	_restore_all
-	 l.nop
-	l.j	_resume_userspace
-	 l.nop
-
-ENTRY(ret_from_fork)
-	l.jal	schedule_tail
-	 l.nop
-
-	/* Check if we are a kernel thread */
-	l.sfeqi	r20,0
-	l.bf	1f
-	 l.nop
-
-	/* ...we are a kernel thread so invoke the requested callback */
-	l.jalr	r20
-	 l.or	r3,r22,r0
-
-1:
-	/* _syscall_returns expect r11 to contain return value */
-	l.lwz	r11,PT_GPR11(r1)
-
-	/* The syscall fast path return expects call-saved registers
-	 * r12-r28 to be untouched, so we restore them here as they
-	 * will have been effectively clobbered when arriving here
-	 * via the call to switch()
-	 */
-	l.lwz	r12,PT_GPR12(r1)
-	l.lwz	r14,PT_GPR14(r1)
-	l.lwz	r16,PT_GPR16(r1)
-	l.lwz	r18,PT_GPR18(r1)
-	l.lwz	r20,PT_GPR20(r1)
-	l.lwz	r22,PT_GPR22(r1)
-	l.lwz	r24,PT_GPR24(r1)
-	l.lwz	r26,PT_GPR26(r1)
-	l.lwz	r28,PT_GPR28(r1)
-
-	l.j	_syscall_return
-	 l.nop
-
-/* ========================================================[ switch ] === */
-
-/*
- * This routine switches between two different tasks.  The process
- * state of one is saved on its kernel stack.  Then the state
- * of the other is restored from its kernel stack.  The memory
- * management hardware is updated to the second process's state.
- * Finally, we can return to the second process, via the 'return'.
- *
- * Note: there are two ways to get to the "going out" portion
- * of this code; either by coming in via the entry (_switch)
- * or via "fork" which must set up an environment equivalent
- * to the "_switch" path.  If you change this (or in particular, the
- * SAVE_REGS macro), you'll have to change the fork code also.
- */
-
-
-/* _switch MUST never lay on page boundry, cause it runs from
- * effective addresses and beeing interrupted by iTLB miss would kill it.
- * dTLB miss seams to never accour in the bad place since data accesses
- * are from task structures which are always page aligned.
- *
- * The problem happens in RESTORE_ALL_NO_R11 where we first set the EPCR
- * register, then load the previous register values and only at the end call
- * the l.rfe instruction. If get TLB miss in beetwen the EPCR register gets
- * garbled and we end up calling l.rfe with the wrong EPCR. (same probably
- * holds for ESR)
- *
- * To avoid this problems it is sufficient to align _switch to
- * some nice round number smaller than it's size...
- */
-
-/* ABI rules apply here... we either enter _switch via schedule() or via
- * an imaginary call to which we shall return at return_from_fork.  Either
- * way, we are a function call and only need to preserve the callee-saved
- * registers when we return.  As such, we don't need to save the registers
- * on the stack that we won't be returning as they were...
- */
-
-	.align 0x400
-ENTRY(_switch)
-	/* We don't store SR as _switch only gets called in a context where
-	 * the SR will be the same going in and coming out... */
-
-	/* Set up new pt_regs struct for saving task state */
-	l.addi  r1,r1,-(INT_FRAME_SIZE)
-
-	/* No need to store r1/PT_SP as it goes into KSP below */
-	l.sw    PT_GPR2(r1),r2
-	l.sw    PT_GPR9(r1),r9
-	/* This is wrong, r12 shouldn't be here... but GCC is broken for the time being
-	 * and expects r12 to be callee-saved... */
-	l.sw    PT_GPR12(r1),r12
-	l.sw    PT_GPR14(r1),r14
-	l.sw    PT_GPR16(r1),r16
-	l.sw    PT_GPR18(r1),r18
-	l.sw    PT_GPR20(r1),r20
-	l.sw    PT_GPR22(r1),r22
-	l.sw    PT_GPR24(r1),r24
-	l.sw    PT_GPR26(r1),r26
-	l.sw    PT_GPR28(r1),r28
-	l.sw    PT_GPR30(r1),r30
-
-	l.addi	r11,r10,0			/* Save old 'current' to 'last' return value*/
-
-	/* We use thread_info->ksp for storing the address of the above
-	 * structure so that we can get back to it later... we don't want
-	 * to lose the value of thread_info->ksp, though, so store it as
-	 * pt_regs->sp so that we can easily restore it when we are made
-	 * live again...
-	 */
-
-	/* Save the old value of thread_info->ksp as pt_regs->sp */
-	l.lwz	r29,TI_KSP(r10)
-	l.sw	PT_SP(r1),r29
-
-	/* Swap kernel stack pointers */
-	l.sw    TI_KSP(r10),r1			/* Save old stack pointer */
-	l.or	r10,r4,r0			/* Set up new current_thread_info */
-	l.lwz   r1,TI_KSP(r10)			/* Load new stack pointer */
-
-	/* Restore the old value of thread_info->ksp */
-	l.lwz	r29,PT_SP(r1)
-	l.sw	TI_KSP(r10),r29
-
-	/* ...and restore the registers, except r11 because the return value
-	 * has already been set above.
-	 */
-	l.lwz   r2,PT_GPR2(r1)
-	l.lwz   r9,PT_GPR9(r1)
-	/* No need to restore r10 */
-	/* ...and do not restore r11 */
-
-	/* This is wrong, r12 shouldn't be here... but GCC is broken for the time being
-	 * and expects r12 to be callee-saved... */
-	l.lwz   r12,PT_GPR12(r1)
-	l.lwz   r14,PT_GPR14(r1)
-	l.lwz   r16,PT_GPR16(r1)
-	l.lwz   r18,PT_GPR18(r1)
-	l.lwz   r20,PT_GPR20(r1)
-	l.lwz   r22,PT_GPR22(r1)
-	l.lwz   r24,PT_GPR24(r1)
-	l.lwz   r26,PT_GPR26(r1)
-	l.lwz   r28,PT_GPR28(r1)
-	l.lwz   r30,PT_GPR30(r1)
-
-	/* Unwind stack to pre-switch state */
-	l.addi  r1,r1,(INT_FRAME_SIZE)
-
-	/* Return via the link-register back to where we 'came from', where
-	 * that may be either schedule(), ret_from_fork(), or
-	 * ret_from_kernel_thread().  If we are returning to a new thread,
-	 * we are expected to have set up the arg to schedule_tail already,
-	 * hence we do so here unconditionally:
-	 */
-	l.lwz   r3,TI_TASK(r3)		/* Load 'prev' as schedule_tail arg */
-	l.jr	r9
-	 l.nop
-
-/* ==================================================================== */
-
-/* These all use the delay slot for setting the argument register, so the
- * jump is always happening after the l.addi instruction.
- *
- * These are all just wrappers that don't touch the link-register r9, so the
- * return from the "real" syscall function will return back to the syscall
- * code that did the l.jal that brought us here.
- */
-
-/* fork requires that we save all the callee-saved registers because they
- * are all effectively clobbered by the call to _switch.  Here we store
- * all the registers that aren't touched by the syscall fast path and thus
- * weren't saved there.
- */
-
-_fork_save_extra_regs_and_call:
-	l.sw    PT_GPR14(r1),r14
-	l.sw    PT_GPR16(r1),r16
-	l.sw    PT_GPR18(r1),r18
-	l.sw    PT_GPR20(r1),r20
-	l.sw    PT_GPR22(r1),r22
-	l.sw    PT_GPR24(r1),r24
-	l.sw    PT_GPR26(r1),r26
-	l.jr	r29
-	 l.sw    PT_GPR28(r1),r28
-
-ENTRY(__sys_clone)
-	l.movhi	r29,hi(sys_clone)
-	l.ori	r29,r29,lo(sys_clone)
-	l.j	_fork_save_extra_regs_and_call
-	 l.nop
-
-ENTRY(__sys_fork)
-	l.movhi	r29,hi(sys_fork)
-	l.ori	r29,r29,lo(sys_fork)
-	l.j	_fork_save_extra_regs_and_call
-	 l.nop
-
-ENTRY(sys_rt_sigreturn)
-	l.jal	_sys_rt_sigreturn
-	 l.addi	r3,r1,0
-	l.sfne	r30,r0
-	l.bnf	_no_syscall_trace
-	 l.nop
-	l.jal	do_syscall_trace_leave
-	 l.addi	r3,r1,0
-_no_syscall_trace:
-	l.j	_resume_userspace
-	 l.nop
-
-/* This is a catch-all syscall for atomic instructions for the OpenRISC 1000.
- * The functions takes a variable number of parameters depending on which
- * particular flavour of atomic you want... parameter 1 is a flag identifying
- * the atomic in question.  Currently, this function implements the
- * following variants:
- *
- * XCHG:
- *  @flag: 1
- *  @ptr1:
- *  @ptr2:
- * Atomically exchange the values in pointers 1 and 2.
- *
- */
-
-ENTRY(sys_or1k_atomic)
-	/* FIXME: This ignores r3 and always does an XCHG */
-	DISABLE_INTERRUPTS(r17,r19)
-	l.lwz	r29,0(r4)
-	l.lwz	r27,0(r5)
-	l.sw	0(r4),r27
-	l.sw	0(r5),r29
-	ENABLE_INTERRUPTS(r17)
-	l.jr	r9
-	 l.or	r11,r0,r0
-
-/* ============================================================[ EOF ]=== */
diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S
deleted file mode 100644
index b0dc974f9a74388ced326662d3474fdf22327a38..0000000000000000000000000000000000000000
--- a/arch/openrisc/kernel/head.S
+++ /dev/null
@@ -1,1744 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * OpenRISC head.S
- *
- * Linux architectural port borrowing liberally from similar works of
- * others.  All original copyrights apply as per the original source
- * declaration.
- *
- * Modifications for the OpenRISC architecture:
- * Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
- * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
- */
-
-#include <linux/linkage.h>
-#include <linux/threads.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/serial_reg.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/thread_info.h>
-#include <asm/cache.h>
-#include <asm/spr_defs.h>
-#include <asm/asm-offsets.h>
-#include <linux/of_fdt.h>
-
-#define tophys(rd,rs)				\
-	l.movhi	rd,hi(-KERNELBASE)		;\
-	l.add	rd,rd,rs
-
-#define CLEAR_GPR(gpr)				\
-	l.movhi	gpr,0x0
-
-#define LOAD_SYMBOL_2_GPR(gpr,symbol)		\
-	l.movhi gpr,hi(symbol)			;\
-	l.ori   gpr,gpr,lo(symbol)
-
-
-#define UART_BASE_ADD      0x90000000
-
-#define EXCEPTION_SR  (SPR_SR_DME | SPR_SR_IME | SPR_SR_DCE | SPR_SR_ICE | SPR_SR_SM)
-#define SYSCALL_SR  (SPR_SR_DME | SPR_SR_IME | SPR_SR_DCE | SPR_SR_ICE | SPR_SR_IEE | SPR_SR_TEE | SPR_SR_SM)
-
-/* ============================================[ tmp store locations ]=== */
-
-#define SPR_SHADOW_GPR(x)	((x) + SPR_GPR_BASE + 32)
-
-/*
- * emergency_print temporary stores
- */
-#ifdef CONFIG_OPENRISC_HAVE_SHADOW_GPRS
-#define EMERGENCY_PRINT_STORE_GPR4	l.mtspr r0,r4,SPR_SHADOW_GPR(14)
-#define EMERGENCY_PRINT_LOAD_GPR4	l.mfspr r4,r0,SPR_SHADOW_GPR(14)
-
-#define EMERGENCY_PRINT_STORE_GPR5	l.mtspr r0,r5,SPR_SHADOW_GPR(15)
-#define EMERGENCY_PRINT_LOAD_GPR5	l.mfspr r5,r0,SPR_SHADOW_GPR(15)
-
-#define EMERGENCY_PRINT_STORE_GPR6	l.mtspr r0,r6,SPR_SHADOW_GPR(16)
-#define EMERGENCY_PRINT_LOAD_GPR6	l.mfspr r6,r0,SPR_SHADOW_GPR(16)
-
-#define EMERGENCY_PRINT_STORE_GPR7	l.mtspr r0,r7,SPR_SHADOW_GPR(7)
-#define EMERGENCY_PRINT_LOAD_GPR7	l.mfspr r7,r0,SPR_SHADOW_GPR(7)
-
-#define EMERGENCY_PRINT_STORE_GPR8	l.mtspr r0,r8,SPR_SHADOW_GPR(8)
-#define EMERGENCY_PRINT_LOAD_GPR8	l.mfspr r8,r0,SPR_SHADOW_GPR(8)
-
-#define EMERGENCY_PRINT_STORE_GPR9	l.mtspr r0,r9,SPR_SHADOW_GPR(9)
-#define EMERGENCY_PRINT_LOAD_GPR9	l.mfspr r9,r0,SPR_SHADOW_GPR(9)
-
-#else /* !CONFIG_OPENRISC_HAVE_SHADOW_GPRS */
-#define EMERGENCY_PRINT_STORE_GPR4	l.sw    0x20(r0),r4
-#define EMERGENCY_PRINT_LOAD_GPR4	l.lwz   r4,0x20(r0)
-
-#define EMERGENCY_PRINT_STORE_GPR5	l.sw    0x24(r0),r5
-#define EMERGENCY_PRINT_LOAD_GPR5	l.lwz   r5,0x24(r0)
-
-#define EMERGENCY_PRINT_STORE_GPR6	l.sw    0x28(r0),r6
-#define EMERGENCY_PRINT_LOAD_GPR6	l.lwz   r6,0x28(r0)
-
-#define EMERGENCY_PRINT_STORE_GPR7	l.sw    0x2c(r0),r7
-#define EMERGENCY_PRINT_LOAD_GPR7	l.lwz   r7,0x2c(r0)
-
-#define EMERGENCY_PRINT_STORE_GPR8	l.sw    0x30(r0),r8
-#define EMERGENCY_PRINT_LOAD_GPR8	l.lwz   r8,0x30(r0)
-
-#define EMERGENCY_PRINT_STORE_GPR9	l.sw    0x34(r0),r9
-#define EMERGENCY_PRINT_LOAD_GPR9	l.lwz   r9,0x34(r0)
-
-#endif
-
-/*
- * TLB miss handlers temorary stores
- */
-#ifdef CONFIG_OPENRISC_HAVE_SHADOW_GPRS
-#define EXCEPTION_STORE_GPR2		l.mtspr r0,r2,SPR_SHADOW_GPR(2)
-#define EXCEPTION_LOAD_GPR2		l.mfspr r2,r0,SPR_SHADOW_GPR(2)
-
-#define EXCEPTION_STORE_GPR3		l.mtspr r0,r3,SPR_SHADOW_GPR(3)
-#define EXCEPTION_LOAD_GPR3		l.mfspr r3,r0,SPR_SHADOW_GPR(3)
-
-#define EXCEPTION_STORE_GPR4		l.mtspr r0,r4,SPR_SHADOW_GPR(4)
-#define EXCEPTION_LOAD_GPR4		l.mfspr r4,r0,SPR_SHADOW_GPR(4)
-
-#define EXCEPTION_STORE_GPR5		l.mtspr r0,r5,SPR_SHADOW_GPR(5)
-#define EXCEPTION_LOAD_GPR5		l.mfspr r5,r0,SPR_SHADOW_GPR(5)
-
-#define EXCEPTION_STORE_GPR6		l.mtspr r0,r6,SPR_SHADOW_GPR(6)
-#define EXCEPTION_LOAD_GPR6		l.mfspr r6,r0,SPR_SHADOW_GPR(6)
-
-#else /* !CONFIG_OPENRISC_HAVE_SHADOW_GPRS */
-#define EXCEPTION_STORE_GPR2		l.sw    0x64(r0),r2
-#define EXCEPTION_LOAD_GPR2		l.lwz   r2,0x64(r0)
-
-#define EXCEPTION_STORE_GPR3		l.sw    0x68(r0),r3
-#define EXCEPTION_LOAD_GPR3		l.lwz   r3,0x68(r0)
-
-#define EXCEPTION_STORE_GPR4		l.sw    0x6c(r0),r4
-#define EXCEPTION_LOAD_GPR4		l.lwz   r4,0x6c(r0)
-
-#define EXCEPTION_STORE_GPR5		l.sw    0x70(r0),r5
-#define EXCEPTION_LOAD_GPR5		l.lwz   r5,0x70(r0)
-
-#define EXCEPTION_STORE_GPR6		l.sw    0x74(r0),r6
-#define EXCEPTION_LOAD_GPR6		l.lwz   r6,0x74(r0)
-
-#endif
-
-/*
- * EXCEPTION_HANDLE temporary stores
- */
-
-#ifdef CONFIG_OPENRISC_HAVE_SHADOW_GPRS
-#define EXCEPTION_T_STORE_GPR30		l.mtspr r0,r30,SPR_SHADOW_GPR(30)
-#define EXCEPTION_T_LOAD_GPR30(reg)	l.mfspr reg,r0,SPR_SHADOW_GPR(30)
-
-#define EXCEPTION_T_STORE_GPR10		l.mtspr r0,r10,SPR_SHADOW_GPR(10)
-#define EXCEPTION_T_LOAD_GPR10(reg)	l.mfspr reg,r0,SPR_SHADOW_GPR(10)
-
-#define EXCEPTION_T_STORE_SP		l.mtspr r0,r1,SPR_SHADOW_GPR(1)
-#define EXCEPTION_T_LOAD_SP(reg)	l.mfspr reg,r0,SPR_SHADOW_GPR(1)
-
-#else /* !CONFIG_OPENRISC_HAVE_SHADOW_GPRS */
-#define EXCEPTION_T_STORE_GPR30		l.sw    0x78(r0),r30
-#define EXCEPTION_T_LOAD_GPR30(reg)	l.lwz   reg,0x78(r0)
-
-#define EXCEPTION_T_STORE_GPR10		l.sw    0x7c(r0),r10
-#define EXCEPTION_T_LOAD_GPR10(reg)	l.lwz   reg,0x7c(r0)
-
-#define EXCEPTION_T_STORE_SP		l.sw    0x80(r0),r1
-#define EXCEPTION_T_LOAD_SP(reg)	l.lwz   reg,0x80(r0)
-#endif
-
-/* =========================================================[ macros ]=== */
-
-#ifdef CONFIG_SMP
-#define GET_CURRENT_PGD(reg,t1)					\
-	LOAD_SYMBOL_2_GPR(reg,current_pgd)			;\
-	l.mfspr	t1,r0,SPR_COREID				;\
-	l.slli	t1,t1,2						;\
-	l.add	reg,reg,t1					;\
-	tophys  (t1,reg)					;\
-	l.lwz   reg,0(t1)
-#else
-#define GET_CURRENT_PGD(reg,t1)					\
-	LOAD_SYMBOL_2_GPR(reg,current_pgd)			;\
-	tophys  (t1,reg)					;\
-	l.lwz   reg,0(t1)
-#endif
-
-/* Load r10 from current_thread_info_set - clobbers r1 and r30 */
-#ifdef CONFIG_SMP
-#define GET_CURRENT_THREAD_INFO					\
-	LOAD_SYMBOL_2_GPR(r1,current_thread_info_set)		;\
-	tophys  (r30,r1)					;\
-	l.mfspr	r10,r0,SPR_COREID				;\
-	l.slli	r10,r10,2					;\
-	l.add	r30,r30,r10					;\
-	/* r10: current_thread_info  */				;\
-	l.lwz   r10,0(r30)
-#else
-#define GET_CURRENT_THREAD_INFO					\
-	LOAD_SYMBOL_2_GPR(r1,current_thread_info_set)		;\
-	tophys  (r30,r1)					;\
-	/* r10: current_thread_info  */				;\
-	l.lwz   r10,0(r30)
-#endif
-
-/*
- * DSCR: this is a common hook for handling exceptions. it will save
- *       the needed registers, set up stack and pointer to current
- *	 then jump to the handler while enabling MMU
- *
- * PRMS: handler	- a function to jump to. it has to save the
- *			remaining registers to kernel stack, call
- *			appropriate arch-independant exception handler
- *			and finaly jump to ret_from_except
- *
- * PREQ: unchanged state from the time exception happened
- *
- * POST: SAVED the following registers original value
- *	       to the new created exception frame pointed to by r1
- *
- *	 r1  - ksp	pointing to the new (exception) frame
- *	 r4  - EEAR     exception EA
- *	 r10 - current	pointing to current_thread_info struct
- *	 r12 - syscall  0, since we didn't come from syscall
- *	 r30 - handler	address of the handler we'll jump to
- *
- *	 handler has to save remaining registers to the exception
- *	 ksp frame *before* tainting them!
- *
- * NOTE: this function is not reentrant per se. reentrancy is guaranteed
- *       by processor disabling all exceptions/interrupts when exception
- *	 accours.
- *
- * OPTM: no need to make it so wasteful to extract ksp when in user mode
- */
-
-#define EXCEPTION_HANDLE(handler)				\
-	EXCEPTION_T_STORE_GPR30					;\
-	l.mfspr r30,r0,SPR_ESR_BASE				;\
-	l.andi  r30,r30,SPR_SR_SM				;\
-	l.sfeqi r30,0						;\
-	EXCEPTION_T_STORE_GPR10					;\
-	l.bnf   2f                            /* kernel_mode */	;\
-	 EXCEPTION_T_STORE_SP                 /* delay slot */	;\
-1: /* user_mode:   */						;\
-	GET_CURRENT_THREAD_INFO	 				;\
-	tophys  (r30,r10)					;\
-	l.lwz   r1,(TI_KSP)(r30)				;\
-	/* fall through */					;\
-2: /* kernel_mode: */						;\
-	/* create new stack frame, save only needed gprs */	;\
-	/* r1: KSP, r10: current, r4: EEAR, r31: __pa(KSP) */	;\
-	/* r12:	temp, syscall indicator */			;\
-	l.addi  r1,r1,-(INT_FRAME_SIZE)				;\
-	/* r1 is KSP, r30 is __pa(KSP) */			;\
-	tophys  (r30,r1)					;\
-	l.sw    PT_GPR12(r30),r12				;\
-	/* r4 use for tmp before EA */				;\
-	l.mfspr r12,r0,SPR_EPCR_BASE				;\
-	l.sw    PT_PC(r30),r12					;\
-	l.mfspr r12,r0,SPR_ESR_BASE				;\
-	l.sw    PT_SR(r30),r12					;\
-	/* save r30 */						;\
-	EXCEPTION_T_LOAD_GPR30(r12)				;\
-	l.sw	PT_GPR30(r30),r12				;\
-	/* save r10 as was prior to exception */		;\
-	EXCEPTION_T_LOAD_GPR10(r12)				;\
-	l.sw	PT_GPR10(r30),r12				;\
-	/* save PT_SP as was prior to exception */		;\
-	EXCEPTION_T_LOAD_SP(r12)				;\
-	l.sw	PT_SP(r30),r12					;\
-	/* save exception r4, set r4 = EA */			;\
-	l.sw	PT_GPR4(r30),r4					;\
-	l.mfspr r4,r0,SPR_EEAR_BASE				;\
-	/* r12 == 1 if we come from syscall */			;\
-	CLEAR_GPR(r12)						;\
-	/* ----- turn on MMU ----- */				;\
-	/* Carry DSX into exception SR */			;\
-	l.mfspr r30,r0,SPR_SR					;\
-	l.andi	r30,r30,SPR_SR_DSX				;\
-	l.ori	r30,r30,(EXCEPTION_SR)				;\
-	l.mtspr	r0,r30,SPR_ESR_BASE				;\
-	/* r30:	EA address of handler */			;\
-	LOAD_SYMBOL_2_GPR(r30,handler)				;\
-	l.mtspr r0,r30,SPR_EPCR_BASE				;\
-	l.rfe
-
-/*
- * this doesn't work
- *
- *
- * #ifdef CONFIG_JUMP_UPON_UNHANDLED_EXCEPTION
- * #define UNHANDLED_EXCEPTION(handler)				\
- *	l.ori   r3,r0,0x1					;\
- *	l.mtspr r0,r3,SPR_SR					;\
- *      l.movhi r3,hi(0xf0000100)				;\
- *      l.ori   r3,r3,lo(0xf0000100)				;\
- *	l.jr	r3						;\
- *	l.nop	1
- *
- * #endif
- */
-
-/* DSCR: this is the same as EXCEPTION_HANDLE(), we are just
- *       a bit more carefull (if we have a PT_SP or current pointer
- *       corruption) and set them up from 'current_set'
- *
- */
-#define UNHANDLED_EXCEPTION(handler)				\
-	EXCEPTION_T_STORE_GPR30					;\
-	EXCEPTION_T_STORE_GPR10					;\
-	EXCEPTION_T_STORE_SP					;\
-	/* temporary store r3, r9 into r1, r10 */		;\
-	l.addi	r1,r3,0x0					;\
-	l.addi	r10,r9,0x0					;\
-	/* the string referenced by r3 must be low enough */	;\
-	l.jal	_emergency_print				;\
-	l.ori	r3,r0,lo(_string_unhandled_exception)		;\
-	l.mfspr	r3,r0,SPR_NPC					;\
-	l.jal	_emergency_print_nr				;\
-	l.andi	r3,r3,0x1f00					;\
-	/* the string referenced by r3 must be low enough */	;\
-	l.jal	_emergency_print				;\
-	l.ori	r3,r0,lo(_string_epc_prefix)			;\
-	l.jal	_emergency_print_nr				;\
-	l.mfspr	r3,r0,SPR_EPCR_BASE				;\
-	l.jal	_emergency_print				;\
-	l.ori	r3,r0,lo(_string_nl)				;\
-	/* end of printing */					;\
-	l.addi	r3,r1,0x0					;\
-	l.addi	r9,r10,0x0					;\
-	/* extract current, ksp from current_set */		;\
-	LOAD_SYMBOL_2_GPR(r1,_unhandled_stack_top)		;\
-	LOAD_SYMBOL_2_GPR(r10,init_thread_union)		;\
-	/* create new stack frame, save only needed gprs */	;\
-	/* r1: KSP, r10: current, r31: __pa(KSP) */		;\
-	/* r12:	temp, syscall indicator, r13 temp */		;\
-	l.addi  r1,r1,-(INT_FRAME_SIZE)				;\
-	/* r1 is KSP, r30 is __pa(KSP) */			;\
-	tophys  (r30,r1)					;\
-	l.sw    PT_GPR12(r30),r12					;\
-	l.mfspr r12,r0,SPR_EPCR_BASE				;\
-	l.sw    PT_PC(r30),r12					;\
-	l.mfspr r12,r0,SPR_ESR_BASE				;\
-	l.sw    PT_SR(r30),r12					;\
-	/* save r31 */						;\
-	EXCEPTION_T_LOAD_GPR30(r12)				;\
-	l.sw	PT_GPR30(r30),r12					;\
-	/* save r10 as was prior to exception */		;\
-	EXCEPTION_T_LOAD_GPR10(r12)				;\
-	l.sw	PT_GPR10(r30),r12					;\
-	/* save PT_SP as was prior to exception */			;\
-	EXCEPTION_T_LOAD_SP(r12)				;\
-	l.sw	PT_SP(r30),r12					;\
-	l.sw    PT_GPR13(r30),r13					;\
-	/* --> */						;\
-	/* save exception r4, set r4 = EA */			;\
-	l.sw	PT_GPR4(r30),r4					;\
-	l.mfspr r4,r0,SPR_EEAR_BASE				;\
-	/* r12 == 1 if we come from syscall */			;\
-	CLEAR_GPR(r12)						;\
-	/* ----- play a MMU trick ----- */			;\
-	l.ori	r30,r0,(EXCEPTION_SR)				;\
-	l.mtspr	r0,r30,SPR_ESR_BASE				;\
-	/* r31:	EA address of handler */			;\
-	LOAD_SYMBOL_2_GPR(r30,handler)				;\
-	l.mtspr r0,r30,SPR_EPCR_BASE				;\
-	l.rfe
-
-/* =====================================================[ exceptions] === */
-
-/* ---[ 0x100: RESET exception ]----------------------------------------- */
-    .org 0x100
-	/* Jump to .init code at _start which lives in the .head section
-	 * and will be discarded after boot.
-	 */
-	LOAD_SYMBOL_2_GPR(r15, _start)
-	tophys	(r13,r15)			/* MMU disabled */
-	l.jr	r13
-	 l.nop
-
-/* ---[ 0x200: BUS exception ]------------------------------------------- */
-    .org 0x200
-_dispatch_bus_fault:
-	EXCEPTION_HANDLE(_bus_fault_handler)
-
-/* ---[ 0x300: Data Page Fault exception ]------------------------------- */
-    .org 0x300
-_dispatch_do_dpage_fault:
-//      totaly disable timer interrupt
-// 	l.mtspr	r0,r0,SPR_TTMR
-//	DEBUG_TLB_PROBE(0x300)
-//	EXCEPTION_DEBUG_VALUE_ER_ENABLED(0x300)
-	EXCEPTION_HANDLE(_data_page_fault_handler)
-
-/* ---[ 0x400: Insn Page Fault exception ]------------------------------- */
-    .org 0x400
-_dispatch_do_ipage_fault:
-//      totaly disable timer interrupt
-//	l.mtspr	r0,r0,SPR_TTMR
-//	DEBUG_TLB_PROBE(0x400)
-//	EXCEPTION_DEBUG_VALUE_ER_ENABLED(0x400)
-	EXCEPTION_HANDLE(_insn_page_fault_handler)
-
-/* ---[ 0x500: Timer exception ]----------------------------------------- */
-    .org 0x500
-	EXCEPTION_HANDLE(_timer_handler)
-
-/* ---[ 0x600: Alignment exception ]-------------------------------------- */
-    .org 0x600
-	EXCEPTION_HANDLE(_alignment_handler)
-
-/* ---[ 0x700: Illegal insn exception ]---------------------------------- */
-    .org 0x700
-	EXCEPTION_HANDLE(_illegal_instruction_handler)
-
-/* ---[ 0x800: External interrupt exception ]---------------------------- */
-    .org 0x800
-	EXCEPTION_HANDLE(_external_irq_handler)
-
-/* ---[ 0x900: DTLB miss exception ]------------------------------------- */
-    .org 0x900
-	l.j	boot_dtlb_miss_handler
-	l.nop
-
-/* ---[ 0xa00: ITLB miss exception ]------------------------------------- */
-    .org 0xa00
-	l.j	boot_itlb_miss_handler
-	l.nop
-
-/* ---[ 0xb00: Range exception ]----------------------------------------- */
-    .org 0xb00
-	UNHANDLED_EXCEPTION(_vector_0xb00)
-
-/* ---[ 0xc00: Syscall exception ]--------------------------------------- */
-    .org 0xc00
-	EXCEPTION_HANDLE(_sys_call_handler)
-
-/* ---[ 0xd00: Trap exception ]------------------------------------------ */
-    .org 0xd00
-	UNHANDLED_EXCEPTION(_vector_0xd00)
-
-/* ---[ 0xe00: Trap exception ]------------------------------------------ */
-    .org 0xe00
-//	UNHANDLED_EXCEPTION(_vector_0xe00)
-	EXCEPTION_HANDLE(_trap_handler)
-
-/* ---[ 0xf00: Reserved exception ]-------------------------------------- */
-    .org 0xf00
-	UNHANDLED_EXCEPTION(_vector_0xf00)
-
-/* ---[ 0x1000: Reserved exception ]------------------------------------- */
-    .org 0x1000
-	UNHANDLED_EXCEPTION(_vector_0x1000)
-
-/* ---[ 0x1100: Reserved exception ]------------------------------------- */
-    .org 0x1100
-	UNHANDLED_EXCEPTION(_vector_0x1100)
-
-/* ---[ 0x1200: Reserved exception ]------------------------------------- */
-    .org 0x1200
-	UNHANDLED_EXCEPTION(_vector_0x1200)
-
-/* ---[ 0x1300: Reserved exception ]------------------------------------- */
-    .org 0x1300
-	UNHANDLED_EXCEPTION(_vector_0x1300)
-
-/* ---[ 0x1400: Reserved exception ]------------------------------------- */
-    .org 0x1400
-	UNHANDLED_EXCEPTION(_vector_0x1400)
-
-/* ---[ 0x1500: Reserved exception ]------------------------------------- */
-    .org 0x1500
-	UNHANDLED_EXCEPTION(_vector_0x1500)
-
-/* ---[ 0x1600: Reserved exception ]------------------------------------- */
-    .org 0x1600
-	UNHANDLED_EXCEPTION(_vector_0x1600)
-
-/* ---[ 0x1700: Reserved exception ]------------------------------------- */
-    .org 0x1700
-	UNHANDLED_EXCEPTION(_vector_0x1700)
-
-/* ---[ 0x1800: Reserved exception ]------------------------------------- */
-    .org 0x1800
-	UNHANDLED_EXCEPTION(_vector_0x1800)
-
-/* ---[ 0x1900: Reserved exception ]------------------------------------- */
-    .org 0x1900
-	UNHANDLED_EXCEPTION(_vector_0x1900)
-
-/* ---[ 0x1a00: Reserved exception ]------------------------------------- */
-    .org 0x1a00
-	UNHANDLED_EXCEPTION(_vector_0x1a00)
-
-/* ---[ 0x1b00: Reserved exception ]------------------------------------- */
-    .org 0x1b00
-	UNHANDLED_EXCEPTION(_vector_0x1b00)
-
-/* ---[ 0x1c00: Reserved exception ]------------------------------------- */
-    .org 0x1c00
-	UNHANDLED_EXCEPTION(_vector_0x1c00)
-
-/* ---[ 0x1d00: Reserved exception ]------------------------------------- */
-    .org 0x1d00
-	UNHANDLED_EXCEPTION(_vector_0x1d00)
-
-/* ---[ 0x1e00: Reserved exception ]------------------------------------- */
-    .org 0x1e00
-	UNHANDLED_EXCEPTION(_vector_0x1e00)
-
-/* ---[ 0x1f00: Reserved exception ]------------------------------------- */
-    .org 0x1f00
-	UNHANDLED_EXCEPTION(_vector_0x1f00)
-
-    .org 0x2000
-/* ===================================================[ kernel start ]=== */
-
-/*    .text*/
-
-/* This early stuff belongs in HEAD, but some of the functions below definitely
- * don't... */
-
-	__HEAD
-	.global _start
-_start:
-	/* Init r0 to zero as per spec */
-	CLEAR_GPR(r0)
-
-	/* save kernel parameters */
-	l.or	r25,r0,r3	/* pointer to fdt */
-
-	/*
-	 * ensure a deterministic start
-	 */
-
-	l.ori	r3,r0,0x1
-	l.mtspr	r0,r3,SPR_SR
-
-	CLEAR_GPR(r1)
-	CLEAR_GPR(r2)
-	CLEAR_GPR(r3)
-	CLEAR_GPR(r4)
-	CLEAR_GPR(r5)
-	CLEAR_GPR(r6)
-	CLEAR_GPR(r7)
-	CLEAR_GPR(r8)
-	CLEAR_GPR(r9)
-	CLEAR_GPR(r10)
-	CLEAR_GPR(r11)
-	CLEAR_GPR(r12)
-	CLEAR_GPR(r13)
-	CLEAR_GPR(r14)
-	CLEAR_GPR(r15)
-	CLEAR_GPR(r16)
-	CLEAR_GPR(r17)
-	CLEAR_GPR(r18)
-	CLEAR_GPR(r19)
-	CLEAR_GPR(r20)
-	CLEAR_GPR(r21)
-	CLEAR_GPR(r22)
-	CLEAR_GPR(r23)
-	CLEAR_GPR(r24)
-	CLEAR_GPR(r26)
-	CLEAR_GPR(r27)
-	CLEAR_GPR(r28)
-	CLEAR_GPR(r29)
-	CLEAR_GPR(r30)
-	CLEAR_GPR(r31)
-
-#ifdef CONFIG_SMP
-	l.mfspr	r26,r0,SPR_COREID
-	l.sfeq	r26,r0
-	l.bnf	secondary_wait
-	 l.nop
-#endif
-	/*
-	 * set up initial ksp and current
-	 */
-	/* setup kernel stack */
-	LOAD_SYMBOL_2_GPR(r1,init_thread_union + THREAD_SIZE)
-	LOAD_SYMBOL_2_GPR(r10,init_thread_union)	// setup current
-	tophys	(r31,r10)
-	l.sw	TI_KSP(r31), r1
-
-	l.ori	r4,r0,0x0
-
-
-	/*
-	 * .data contains initialized data,
-	 * .bss contains uninitialized data - clear it up
-	 */
-clear_bss:
-	LOAD_SYMBOL_2_GPR(r24, __bss_start)
-	LOAD_SYMBOL_2_GPR(r26, _end)
-	tophys(r28,r24)
-	tophys(r30,r26)
-	CLEAR_GPR(r24)
-	CLEAR_GPR(r26)
-1:
-	l.sw    (0)(r28),r0
-	l.sfltu r28,r30
-	l.bf    1b
-	l.addi  r28,r28,4
-
-enable_ic:
-	l.jal	_ic_enable
-	 l.nop
-
-enable_dc:
-	l.jal	_dc_enable
-	 l.nop
-
-flush_tlb:
-	l.jal	_flush_tlb
-	 l.nop
-
-/* The MMU needs to be enabled before or32_early_setup is called */
-
-enable_mmu:
-	/*
-	 * enable dmmu & immu
-	 * SR[5] = 0, SR[6] = 0, 6th and 7th bit of SR set to 0
-	 */
-	l.mfspr	r30,r0,SPR_SR
-	l.movhi	r28,hi(SPR_SR_DME | SPR_SR_IME)
-	l.ori	r28,r28,lo(SPR_SR_DME | SPR_SR_IME)
-	l.or	r30,r30,r28
-	l.mtspr	r0,r30,SPR_SR
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-
-	// reset the simulation counters
-	l.nop 5
-
-	/* check fdt header magic word */
-	l.lwz	r3,0(r25)	/* load magic from fdt into r3 */
-	l.movhi	r4,hi(OF_DT_HEADER)
-	l.ori	r4,r4,lo(OF_DT_HEADER)
-	l.sfeq	r3,r4
-	l.bf	_fdt_found
-	 l.nop
-	/* magic number mismatch, set fdt pointer to null */
-	l.or	r25,r0,r0
-_fdt_found:
-	/* pass fdt pointer to or32_early_setup in r3 */
-	l.or	r3,r0,r25
-	LOAD_SYMBOL_2_GPR(r24, or32_early_setup)
-	l.jalr r24
-	 l.nop
-
-clear_regs:
-	/*
-	 * clear all GPRS to increase determinism
-	 */
-	CLEAR_GPR(r2)
-	CLEAR_GPR(r3)
-	CLEAR_GPR(r4)
-	CLEAR_GPR(r5)
-	CLEAR_GPR(r6)
-	CLEAR_GPR(r7)
-	CLEAR_GPR(r8)
-	CLEAR_GPR(r9)
-	CLEAR_GPR(r11)
-	CLEAR_GPR(r12)
-	CLEAR_GPR(r13)
-	CLEAR_GPR(r14)
-	CLEAR_GPR(r15)
-	CLEAR_GPR(r16)
-	CLEAR_GPR(r17)
-	CLEAR_GPR(r18)
-	CLEAR_GPR(r19)
-	CLEAR_GPR(r20)
-	CLEAR_GPR(r21)
-	CLEAR_GPR(r22)
-	CLEAR_GPR(r23)
-	CLEAR_GPR(r24)
-	CLEAR_GPR(r25)
-	CLEAR_GPR(r26)
-	CLEAR_GPR(r27)
-	CLEAR_GPR(r28)
-	CLEAR_GPR(r29)
-	CLEAR_GPR(r30)
-	CLEAR_GPR(r31)
-
-jump_start_kernel:
-	/*
-	 * jump to kernel entry (start_kernel)
-	 */
-	LOAD_SYMBOL_2_GPR(r30, start_kernel)
-	l.jr    r30
-	 l.nop
-
-_flush_tlb:
-	/*
-	 *  I N V A L I D A T E   T L B   e n t r i e s
-	 */
-	LOAD_SYMBOL_2_GPR(r5,SPR_DTLBMR_BASE(0))
-	LOAD_SYMBOL_2_GPR(r6,SPR_ITLBMR_BASE(0))
-	l.addi	r7,r0,128 /* Maximum number of sets */
-1:
-	l.mtspr	r5,r0,0x0
-	l.mtspr	r6,r0,0x0
-
-	l.addi	r5,r5,1
-	l.addi	r6,r6,1
-	l.sfeq	r7,r0
-	l.bnf	1b
-	 l.addi	r7,r7,-1
-
-	l.jr	r9
-	 l.nop
-
-#ifdef CONFIG_SMP
-secondary_wait:
-	/* Doze the cpu until we are asked to run */
-	/* If we dont have power management skip doze */
-	l.mfspr r25,r0,SPR_UPR
-	l.andi  r25,r25,SPR_UPR_PMP
-	l.sfeq  r25,r0
-	l.bf	secondary_check_release
-	 l.nop
-
-	/* Setup special secondary exception handler */
-	LOAD_SYMBOL_2_GPR(r3, _secondary_evbar)
-	tophys(r25,r3)
-	l.mtspr	r0,r25,SPR_EVBAR
-
-	/* Enable Interrupts */
-	l.mfspr	r25,r0,SPR_SR
-	l.ori	r25,r25,SPR_SR_IEE
-	l.mtspr	r0,r25,SPR_SR
-
-	/* Unmask interrupts interrupts */
-	l.mfspr r25,r0,SPR_PICMR
-	l.ori   r25,r25,0xffff
-	l.mtspr	r0,r25,SPR_PICMR
-
-	/* Doze */
-	l.mfspr r25,r0,SPR_PMR
-	LOAD_SYMBOL_2_GPR(r3, SPR_PMR_DME)
-	l.or    r25,r25,r3
-	l.mtspr r0,r25,SPR_PMR
-
-	/* Wakeup - Restore exception handler */
-	l.mtspr	r0,r0,SPR_EVBAR
-
-secondary_check_release:
-	/*
-	 * Check if we actually got the release signal, if not go-back to
-	 * sleep.
-	 */
-	l.mfspr	r25,r0,SPR_COREID
-	LOAD_SYMBOL_2_GPR(r3, secondary_release)
-	tophys(r4, r3)
-	l.lwz	r3,0(r4)
-	l.sfeq	r25,r3
-	l.bnf	secondary_wait
-	 l.nop
-	/* fall through to secondary_init */
-
-secondary_init:
-	/*
-	 * set up initial ksp and current
-	 */
-	LOAD_SYMBOL_2_GPR(r10, secondary_thread_info)
-	tophys	(r30,r10)
-	l.lwz	r10,0(r30)
-	l.addi	r1,r10,THREAD_SIZE
-	tophys	(r30,r10)
-	l.sw	TI_KSP(r30),r1
-
-	l.jal	_ic_enable
-	 l.nop
-
-	l.jal	_dc_enable
-	 l.nop
-
-	l.jal	_flush_tlb
-	 l.nop
-
-	/*
-	 * enable dmmu & immu
-	 */
-	l.mfspr	r30,r0,SPR_SR
-	l.movhi	r28,hi(SPR_SR_DME | SPR_SR_IME)
-	l.ori	r28,r28,lo(SPR_SR_DME | SPR_SR_IME)
-	l.or	r30,r30,r28
-	/*
-	 * This is a bit tricky, we need to switch over from physical addresses
-	 * to virtual addresses on the fly.
-	 * To do that, we first set up ESR with the IME and DME bits set.
-	 * Then EPCR is set to secondary_start and then a l.rfe is issued to
-	 * "jump" to that.
-	 */
-	l.mtspr	r0,r30,SPR_ESR_BASE
-	LOAD_SYMBOL_2_GPR(r30, secondary_start)
-	l.mtspr	r0,r30,SPR_EPCR_BASE
-	l.rfe
-
-secondary_start:
-	LOAD_SYMBOL_2_GPR(r30, secondary_start_kernel)
-	l.jr    r30
-	 l.nop
-
-#endif
-
-/* ========================================[ cache ]=== */
-
-	/* alignment here so we don't change memory offsets with
-	 * memory controller defined
-	 */
-	.align 0x2000
-
-_ic_enable:
-	/* Check if IC present and skip enabling otherwise */
-	l.mfspr r24,r0,SPR_UPR
-	l.andi  r26,r24,SPR_UPR_ICP
-	l.sfeq  r26,r0
-	l.bf	9f
-	l.nop
-
-	/* Disable IC */
-	l.mfspr r6,r0,SPR_SR
-	l.addi  r5,r0,-1
-	l.xori  r5,r5,SPR_SR_ICE
-	l.and   r5,r6,r5
-	l.mtspr r0,r5,SPR_SR
-
-	/* Establish cache block size
-	   If BS=0, 16;
-	   If BS=1, 32;
-	   r14 contain block size
-	*/
-	l.mfspr r24,r0,SPR_ICCFGR
-	l.andi	r26,r24,SPR_ICCFGR_CBS
-	l.srli	r28,r26,7
-	l.ori	r30,r0,16
-	l.sll	r14,r30,r28
-
-	/* Establish number of cache sets
-	   r16 contains number of cache sets
-	   r28 contains log(# of cache sets)
-	*/
-	l.andi  r26,r24,SPR_ICCFGR_NCS
-	l.srli 	r28,r26,3
-	l.ori   r30,r0,1
-	l.sll   r16,r30,r28
-
-	/* Invalidate IC */
-	l.addi  r6,r0,0
-	l.sll   r5,r14,r28
-//        l.mul   r5,r14,r16
-//	l.trap  1
-//	l.addi  r5,r0,IC_SIZE
-1:
-	l.mtspr r0,r6,SPR_ICBIR
-	l.sfne  r6,r5
-	l.bf    1b
-	l.add   r6,r6,r14
- //       l.addi   r6,r6,IC_LINE
-
-	/* Enable IC */
-	l.mfspr r6,r0,SPR_SR
-	l.ori   r6,r6,SPR_SR_ICE
-	l.mtspr r0,r6,SPR_SR
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-	l.nop
-9:
-	l.jr    r9
-	l.nop
-
-_dc_enable:
-	/* Check if DC present and skip enabling otherwise */
-	l.mfspr r24,r0,SPR_UPR
-	l.andi  r26,r24,SPR_UPR_DCP
-	l.sfeq  r26,r0
-	l.bf	9f
-	l.nop
-
-	/* Disable DC */
-	l.mfspr r6,r0,SPR_SR
-	l.addi  r5,r0,-1
-	l.xori  r5,r5,SPR_SR_DCE
-	l.and   r5,r6,r5
-	l.mtspr r0,r5,SPR_SR
-
-	/* Establish cache block size
-	   If BS=0, 16;
-	   If BS=1, 32;
-	   r14 contain block size
-	*/
-	l.mfspr r24,r0,SPR_DCCFGR
-	l.andi	r26,r24,SPR_DCCFGR_CBS
-	l.srli	r28,r26,7
-	l.ori	r30,r0,16
-	l.sll	r14,r30,r28
-
-	/* Establish number of cache sets
-	   r16 contains number of cache sets
-	   r28 contains log(# of cache sets)
-	*/
-	l.andi  r26,r24,SPR_DCCFGR_NCS
-	l.srli 	r28,r26,3
-	l.ori   r30,r0,1
-	l.sll   r16,r30,r28
-
-	/* Invalidate DC */
-	l.addi  r6,r0,0
-	l.sll   r5,r14,r28
-1:
-	l.mtspr r0,r6,SPR_DCBIR
-	l.sfne  r6,r5
-	l.bf    1b
-	l.add   r6,r6,r14
-
-	/* Enable DC */
-	l.mfspr r6,r0,SPR_SR
-	l.ori   r6,r6,SPR_SR_DCE
-	l.mtspr r0,r6,SPR_SR
-9:
-	l.jr    r9
-	l.nop
-
-/* ===============================================[ page table masks ]=== */
-
-#define DTLB_UP_CONVERT_MASK  0x3fa
-#define ITLB_UP_CONVERT_MASK  0x3a
-
-/* for SMP we'd have (this is a bit subtle, CC must be always set
- * for SMP, but since we have _PAGE_PRESENT bit always defined
- * we can just modify the mask)
- */
-#define DTLB_SMP_CONVERT_MASK  0x3fb
-#define ITLB_SMP_CONVERT_MASK  0x3b
-
-/* ---[ boot dtlb miss handler ]----------------------------------------- */
-
-boot_dtlb_miss_handler:
-
-/* mask for DTLB_MR register: - (0) sets V (valid) bit,
- *                            - (31-12) sets bits belonging to VPN (31-12)
- */
-#define DTLB_MR_MASK 0xfffff001
-
-/* mask for DTLB_TR register: - (2) sets CI (cache inhibit) bit,
- *			      - (4) sets A (access) bit,
- *                            - (5) sets D (dirty) bit,
- *                            - (8) sets SRE (superuser read) bit
- *                            - (9) sets SWE (superuser write) bit
- *                            - (31-12) sets bits belonging to VPN (31-12)
- */
-#define DTLB_TR_MASK 0xfffff332
-
-/* These are for masking out the VPN/PPN value from the MR/TR registers...
- * it's not the same as the PFN */
-#define VPN_MASK 0xfffff000
-#define PPN_MASK 0xfffff000
-
-
-	EXCEPTION_STORE_GPR6
-
-#if 0
-	l.mfspr r6,r0,SPR_ESR_BASE	   //
-	l.andi  r6,r6,SPR_SR_SM            // are we in kernel mode ?
-	l.sfeqi r6,0                       // r6 == 0x1 --> SM
-	l.bf    exit_with_no_dtranslation  //
-	l.nop
-#endif
-
-	/* this could be optimized by moving storing of
-	 * non r6 registers here, and jumping r6 restore
-	 * if not in supervisor mode
-	 */
-
-	EXCEPTION_STORE_GPR2
-	EXCEPTION_STORE_GPR3
-	EXCEPTION_STORE_GPR4
-	EXCEPTION_STORE_GPR5
-
-	l.mfspr r4,r0,SPR_EEAR_BASE        // get the offending EA
-
-immediate_translation:
-	CLEAR_GPR(r6)
-
-	l.srli	r3,r4,0xd                  // r3 <- r4 / 8192 (sets are relative to page size (8Kb) NOT VPN size (4Kb)
-
-	l.mfspr r6, r0, SPR_DMMUCFGR
-	l.andi	r6, r6, SPR_DMMUCFGR_NTS
-	l.srli	r6, r6, SPR_DMMUCFGR_NTS_OFF
-	l.ori	r5, r0, 0x1
-	l.sll	r5, r5, r6 	// r5 = number DMMU sets
-	l.addi	r6, r5, -1  	// r6 = nsets mask
-	l.and	r2, r3, r6	// r2 <- r3 % NSETS_MASK
-
-	l.or    r6,r6,r4                   // r6 <- r4
-	l.ori   r6,r6,~(VPN_MASK)          // r6 <- VPN :VPN .xfff - clear up lo(r6) to 0x**** *fff
-	l.movhi r5,hi(DTLB_MR_MASK)        // r5 <- ffff:0000.x000
-	l.ori   r5,r5,lo(DTLB_MR_MASK)     // r5 <- ffff:1111.x001 - apply DTLB_MR_MASK
-	l.and   r5,r5,r6                   // r5 <- VPN :VPN .x001 - we have DTLBMR entry
-	l.mtspr r2,r5,SPR_DTLBMR_BASE(0)   // set DTLBMR
-
-	/* set up DTLB with no translation for EA <= 0xbfffffff */
-	LOAD_SYMBOL_2_GPR(r6,0xbfffffff)
-	l.sfgeu  r6,r4                     // flag if r6 >= r4 (if 0xbfffffff >= EA)
-	l.bf     1f                        // goto out
-	l.and    r3,r4,r4                  // delay slot :: 24 <- r4 (if flag==1)
-
-	tophys(r3,r4)                      // r3 <- PA
-1:
-	l.ori   r3,r3,~(PPN_MASK)          // r3 <- PPN :PPN .xfff - clear up lo(r6) to 0x**** *fff
-	l.movhi r5,hi(DTLB_TR_MASK)        // r5 <- ffff:0000.x000
-	l.ori   r5,r5,lo(DTLB_TR_MASK)     // r5 <- ffff:1111.x330 - apply DTLB_MR_MASK
-	l.and   r5,r5,r3                   // r5 <- PPN :PPN .x330 - we have DTLBTR entry
-	l.mtspr r2,r5,SPR_DTLBTR_BASE(0)   // set DTLBTR
-
-	EXCEPTION_LOAD_GPR6
-	EXCEPTION_LOAD_GPR5
-	EXCEPTION_LOAD_GPR4
-	EXCEPTION_LOAD_GPR3
-	EXCEPTION_LOAD_GPR2
-
-	l.rfe                              // SR <- ESR, PC <- EPC
-
-exit_with_no_dtranslation:
-	/* EA out of memory or not in supervisor mode */
-	EXCEPTION_LOAD_GPR6
-	EXCEPTION_LOAD_GPR4
-	l.j	_dispatch_bus_fault
-
-/* ---[ boot itlb miss handler ]----------------------------------------- */
-
-boot_itlb_miss_handler:
-
-/* mask for ITLB_MR register: - sets V (valid) bit,
- *                            - sets bits belonging to VPN (15-12)
- */
-#define ITLB_MR_MASK 0xfffff001
-
-/* mask for ITLB_TR register: - sets A (access) bit,
- *                            - sets SXE (superuser execute) bit
- *                            - sets bits belonging to VPN (15-12)
- */
-#define ITLB_TR_MASK 0xfffff050
-
-/*
-#define VPN_MASK 0xffffe000
-#define PPN_MASK 0xffffe000
-*/
-
-
-
-	EXCEPTION_STORE_GPR2
-	EXCEPTION_STORE_GPR3
-	EXCEPTION_STORE_GPR4
-	EXCEPTION_STORE_GPR5
-	EXCEPTION_STORE_GPR6
-
-#if 0
-	l.mfspr r6,r0,SPR_ESR_BASE         //
-	l.andi  r6,r6,SPR_SR_SM            // are we in kernel mode ?
-	l.sfeqi r6,0                       // r6 == 0x1 --> SM
-	l.bf    exit_with_no_itranslation
-	l.nop
-#endif
-
-
-	l.mfspr r4,r0,SPR_EEAR_BASE        // get the offending EA
-
-earlyearly:
-	CLEAR_GPR(r6)
-
-	l.srli  r3,r4,0xd                  // r3 <- r4 / 8192 (sets are relative to page size (8Kb) NOT VPN size (4Kb)
-
-	l.mfspr r6, r0, SPR_IMMUCFGR
-	l.andi	r6, r6, SPR_IMMUCFGR_NTS
-	l.srli	r6, r6, SPR_IMMUCFGR_NTS_OFF
-	l.ori	r5, r0, 0x1
-	l.sll	r5, r5, r6 	// r5 = number IMMU sets from IMMUCFGR
-	l.addi	r6, r5, -1  	// r6 = nsets mask
-	l.and	r2, r3, r6	// r2 <- r3 % NSETS_MASK
-
-	l.or    r6,r6,r4                   // r6 <- r4
-	l.ori   r6,r6,~(VPN_MASK)          // r6 <- VPN :VPN .xfff - clear up lo(r6) to 0x**** *fff
-	l.movhi r5,hi(ITLB_MR_MASK)        // r5 <- ffff:0000.x000
-	l.ori   r5,r5,lo(ITLB_MR_MASK)     // r5 <- ffff:1111.x001 - apply ITLB_MR_MASK
-	l.and   r5,r5,r6                   // r5 <- VPN :VPN .x001 - we have ITLBMR entry
-	l.mtspr r2,r5,SPR_ITLBMR_BASE(0)   // set ITLBMR
-
-	/*
-	 * set up ITLB with no translation for EA <= 0x0fffffff
-	 *
-	 * we need this for head.S mapping (EA = PA). if we move all functions
-	 * which run with mmu enabled into entry.S, we might be able to eliminate this.
-	 *
-	 */
-	LOAD_SYMBOL_2_GPR(r6,0x0fffffff)
-	l.sfgeu  r6,r4                     // flag if r6 >= r4 (if 0xb0ffffff >= EA)
-	l.bf     1f                        // goto out
-	l.and    r3,r4,r4                  // delay slot :: 24 <- r4 (if flag==1)
-
-	tophys(r3,r4)                      // r3 <- PA
-1:
-	l.ori   r3,r3,~(PPN_MASK)          // r3 <- PPN :PPN .xfff - clear up lo(r6) to 0x**** *fff
-	l.movhi r5,hi(ITLB_TR_MASK)        // r5 <- ffff:0000.x000
-	l.ori   r5,r5,lo(ITLB_TR_MASK)     // r5 <- ffff:1111.x050 - apply ITLB_MR_MASK
-	l.and   r5,r5,r3                   // r5 <- PPN :PPN .x050 - we have ITLBTR entry
-	l.mtspr r2,r5,SPR_ITLBTR_BASE(0)   // set ITLBTR
-
-	EXCEPTION_LOAD_GPR6
-	EXCEPTION_LOAD_GPR5
-	EXCEPTION_LOAD_GPR4
-	EXCEPTION_LOAD_GPR3
-	EXCEPTION_LOAD_GPR2
-
-	l.rfe                              // SR <- ESR, PC <- EPC
-
-exit_with_no_itranslation:
-	EXCEPTION_LOAD_GPR4
-	EXCEPTION_LOAD_GPR6
-	l.j    _dispatch_bus_fault
-	l.nop
-
-/* ====================================================================== */
-/*
- * Stuff below here shouldn't go into .head section... maybe this stuff
- * can be moved to entry.S ???
- */
-
-/* ==============================================[ DTLB miss handler ]=== */
-
-/*
- * Comments:
- *   Exception handlers are entered with MMU off so the following handler
- *   needs to use physical addressing
- *
- */
-
-	.text
-ENTRY(dtlb_miss_handler)
-	EXCEPTION_STORE_GPR2
-	EXCEPTION_STORE_GPR3
-	EXCEPTION_STORE_GPR4
-	/*
-	 * get EA of the miss
-	 */
-	l.mfspr	r2,r0,SPR_EEAR_BASE
-	/*
-	 * pmd = (pmd_t *)(current_pgd + pgd_index(daddr));
-	 */
-	GET_CURRENT_PGD(r3,r4)		// r3 is current_pgd, r4 is temp
-	l.srli	r4,r2,0x18		// >> PAGE_SHIFT + (PAGE_SHIFT - 2)
-	l.slli	r4,r4,0x2		// to get address << 2
-	l.add	r3,r4,r3		// r4 is pgd_index(daddr)
-	/*
-	 * if (pmd_none(*pmd))
-	 *   goto pmd_none:
-	 */
-	tophys	(r4,r3)
-	l.lwz	r3,0x0(r4)		// get *pmd value
-	l.sfne	r3,r0
-	l.bnf	d_pmd_none
-	 l.addi	r3,r0,0xffffe000	// PAGE_MASK
-
-d_pmd_good:
-	/*
-	 * pte = *pte_offset(pmd, daddr);
-	 */
-	l.lwz	r4,0x0(r4)		// get **pmd value
-	l.and	r4,r4,r3		// & PAGE_MASK
-	l.srli	r2,r2,0xd		// >> PAGE_SHIFT, r2 == EEAR
-	l.andi	r3,r2,0x7ff		// (1UL << PAGE_SHIFT - 2) - 1
-	l.slli	r3,r3,0x2		// to get address << 2
-	l.add	r3,r3,r4
-	l.lwz	r3,0x0(r3)		// this is pte at last
-	/*
-	 * if (!pte_present(pte))
-	 */
-	l.andi	r4,r3,0x1
-	l.sfne	r4,r0			// is pte present
-	l.bnf	d_pte_not_present
-	l.addi	r4,r0,0xffffe3fa	// PAGE_MASK | DTLB_UP_CONVERT_MASK
-	/*
-	 * fill DTLB TR register
-	 */
-	l.and	r4,r3,r4		// apply the mask
-	// Determine number of DMMU sets
-	l.mfspr r2, r0, SPR_DMMUCFGR
-	l.andi	r2, r2, SPR_DMMUCFGR_NTS
-	l.srli	r2, r2, SPR_DMMUCFGR_NTS_OFF
-	l.ori	r3, r0, 0x1
-	l.sll	r3, r3, r2 	// r3 = number DMMU sets DMMUCFGR
-	l.addi	r2, r3, -1  	// r2 = nsets mask
-	l.mfspr	r3, r0, SPR_EEAR_BASE
-	l.srli	r3, r3, 0xd	// >> PAGE_SHIFT
-	l.and	r2, r3, r2	// calc offset:	 & (NUM_TLB_ENTRIES-1)
-	                                                   //NUM_TLB_ENTRIES
-	l.mtspr	r2,r4,SPR_DTLBTR_BASE(0)
-	/*
-	 * fill DTLB MR register
-	 */
-	l.slli	r3, r3, 0xd		/* << PAGE_SHIFT => EA & PAGE_MASK */
-	l.ori	r4,r3,0x1		// set hardware valid bit: DTBL_MR entry
-	l.mtspr	r2,r4,SPR_DTLBMR_BASE(0)
-
-	EXCEPTION_LOAD_GPR2
-	EXCEPTION_LOAD_GPR3
-	EXCEPTION_LOAD_GPR4
-	l.rfe
-d_pmd_none:
-d_pte_not_present:
-	EXCEPTION_LOAD_GPR2
-	EXCEPTION_LOAD_GPR3
-	EXCEPTION_LOAD_GPR4
-	EXCEPTION_HANDLE(_dtlb_miss_page_fault_handler)
-
-/* ==============================================[ ITLB miss handler ]=== */
-ENTRY(itlb_miss_handler)
-	EXCEPTION_STORE_GPR2
-	EXCEPTION_STORE_GPR3
-	EXCEPTION_STORE_GPR4
-	/*
-	 * get EA of the miss
-	 */
-	l.mfspr	r2,r0,SPR_EEAR_BASE
-
-	/*
-	 * pmd = (pmd_t *)(current_pgd + pgd_index(daddr));
-	 *
-	 */
-	GET_CURRENT_PGD(r3,r4)		// r3 is current_pgd, r5 is temp
-	l.srli	r4,r2,0x18		// >> PAGE_SHIFT + (PAGE_SHIFT - 2)
-	l.slli	r4,r4,0x2		// to get address << 2
-	l.add	r3,r4,r3		// r4 is pgd_index(daddr)
-	/*
-	 * if (pmd_none(*pmd))
-	 *   goto pmd_none:
-	 */
-	tophys	(r4,r3)
-	l.lwz	r3,0x0(r4)		// get *pmd value
-	l.sfne	r3,r0
-	l.bnf	i_pmd_none
-	 l.addi	r3,r0,0xffffe000	// PAGE_MASK
-
-i_pmd_good:
-	/*
-	 * pte = *pte_offset(pmd, iaddr);
-	 *
-	 */
-	l.lwz	r4,0x0(r4)		// get **pmd value
-	l.and	r4,r4,r3		// & PAGE_MASK
-	l.srli	r2,r2,0xd		// >> PAGE_SHIFT, r2 == EEAR
-	l.andi	r3,r2,0x7ff		// (1UL << PAGE_SHIFT - 2) - 1
-	l.slli	r3,r3,0x2		// to get address << 2
-	l.add	r3,r3,r4
-	l.lwz	r3,0x0(r3)		// this is pte at last
-	/*
-	 * if (!pte_present(pte))
-	 *
-	 */
-	l.andi	r4,r3,0x1
-	l.sfne	r4,r0			// is pte present
-	l.bnf	i_pte_not_present
-	 l.addi	r4,r0,0xffffe03a	// PAGE_MASK | ITLB_UP_CONVERT_MASK
-	/*
-	 * fill ITLB TR register
-	 */
-	l.and	r4,r3,r4		// apply the mask
-	l.andi	r3,r3,0x7c0		// _PAGE_EXEC | _PAGE_SRE | _PAGE_SWE |  _PAGE_URE | _PAGE_UWE
-	l.sfeq	r3,r0
-	l.bf	itlb_tr_fill //_workaround
-	// Determine number of IMMU sets
-	l.mfspr r2, r0, SPR_IMMUCFGR
-	l.andi	r2, r2, SPR_IMMUCFGR_NTS
-	l.srli	r2, r2, SPR_IMMUCFGR_NTS_OFF
-	l.ori	r3, r0, 0x1
-	l.sll	r3, r3, r2 	// r3 = number IMMU sets IMMUCFGR
-	l.addi	r2, r3, -1  	// r2 = nsets mask
-	l.mfspr	r3, r0, SPR_EEAR_BASE
-	l.srli	r3, r3, 0xd	// >> PAGE_SHIFT
-	l.and	r2, r3, r2	// calc offset:	 & (NUM_TLB_ENTRIES-1)
-
-/*
- * __PHX__ :: fixme
- * we should not just blindly set executable flags,
- * but it does help with ping. the clean way would be to find out
- * (and fix it) why stack doesn't have execution permissions
- */
-
-itlb_tr_fill_workaround:
-	l.ori	r4,r4,0xc0		// | (SPR_ITLBTR_UXE | ITLBTR_SXE)
-itlb_tr_fill:
-	l.mtspr	r2,r4,SPR_ITLBTR_BASE(0)
-	/*
-	 * fill DTLB MR register
-	 */
-	l.slli	r3, r3, 0xd		/* << PAGE_SHIFT => EA & PAGE_MASK */
-	l.ori	r4,r3,0x1		// set hardware valid bit: ITBL_MR entry
-	l.mtspr	r2,r4,SPR_ITLBMR_BASE(0)
-
-	EXCEPTION_LOAD_GPR2
-	EXCEPTION_LOAD_GPR3
-	EXCEPTION_LOAD_GPR4
-	l.rfe
-
-i_pmd_none:
-i_pte_not_present:
-	EXCEPTION_LOAD_GPR2
-	EXCEPTION_LOAD_GPR3
-	EXCEPTION_LOAD_GPR4
-	EXCEPTION_HANDLE(_itlb_miss_page_fault_handler)
-
-/* ==============================================[ boot tlb handlers ]=== */
-
-
-/* =================================================[ debugging aids ]=== */
-
-	.align 64
-_immu_trampoline:
-	.space 64
-_immu_trampoline_top:
-
-#define TRAMP_SLOT_0		(0x0)
-#define TRAMP_SLOT_1		(0x4)
-#define TRAMP_SLOT_2		(0x8)
-#define TRAMP_SLOT_3		(0xc)
-#define TRAMP_SLOT_4		(0x10)
-#define TRAMP_SLOT_5		(0x14)
-#define TRAMP_FRAME_SIZE	(0x18)
-
-ENTRY(_immu_trampoline_workaround)
-	// r2 EEA
-	// r6 is physical EEA
-	tophys(r6,r2)
-
-	LOAD_SYMBOL_2_GPR(r5,_immu_trampoline)
-	tophys	(r3,r5)			// r3 is trampoline (physical)
-
-	LOAD_SYMBOL_2_GPR(r4,0x15000000)
-	l.sw	TRAMP_SLOT_0(r3),r4
-	l.sw	TRAMP_SLOT_1(r3),r4
-	l.sw	TRAMP_SLOT_4(r3),r4
-	l.sw	TRAMP_SLOT_5(r3),r4
-
-					// EPC = EEA - 0x4
-	l.lwz	r4,0x0(r6)		// load op @ EEA + 0x0 (fc address)
-	l.sw	TRAMP_SLOT_3(r3),r4	// store it to _immu_trampoline_data
-	l.lwz	r4,-0x4(r6)		// load op @ EEA - 0x4 (f8 address)
-	l.sw	TRAMP_SLOT_2(r3),r4	// store it to _immu_trampoline_data
-
-	l.srli  r5,r4,26                // check opcode for write access
-	l.sfeqi r5,0                    // l.j
-	l.bf    0f
-	l.sfeqi r5,0x11                 // l.jr
-	l.bf    1f
-	l.sfeqi r5,1                    // l.jal
-	l.bf    2f
-	l.sfeqi r5,0x12                 // l.jalr
-	l.bf    3f
-	l.sfeqi r5,3                    // l.bnf
-	l.bf    4f
-	l.sfeqi r5,4                    // l.bf
-	l.bf    5f
-99:
-	l.nop
-	l.j	99b			// should never happen
-	l.nop	1
-
-	// r2 is EEA
-	// r3 is trampoline address (physical)
-	// r4 is instruction
-	// r6 is physical(EEA)
-	//
-	// r5
-
-2:	// l.jal
-
-	/* 19 20 aa aa	l.movhi r9,0xaaaa
-	 * a9 29 bb bb  l.ori	r9,0xbbbb
-	 *
-	 * where 0xaaaabbbb is EEA + 0x4 shifted right 2
-	 */
-
-	l.addi	r6,r2,0x4		// this is 0xaaaabbbb
-
-					// l.movhi r9,0xaaaa
-	l.ori	r5,r0,0x1920		// 0x1920 == l.movhi r9
-	l.sh	(TRAMP_SLOT_0+0x0)(r3),r5
-	l.srli	r5,r6,16
-	l.sh	(TRAMP_SLOT_0+0x2)(r3),r5
-
-					// l.ori   r9,0xbbbb
-	l.ori	r5,r0,0xa929		// 0xa929 == l.ori r9
-	l.sh	(TRAMP_SLOT_1+0x0)(r3),r5
-	l.andi	r5,r6,0xffff
-	l.sh	(TRAMP_SLOT_1+0x2)(r3),r5
-
-	/* falthrough, need to set up new jump offset */
-
-
-0:	// l.j
-	l.slli	r6,r4,6			// original offset shifted left 6 - 2
-//	l.srli	r6,r6,6			// original offset shifted right 2
-
-	l.slli	r4,r2,4			// old jump position: EEA shifted left 4
-//	l.srli	r4,r4,6			// old jump position: shifted right 2
-
-	l.addi	r5,r3,0xc		// new jump position (physical)
-	l.slli	r5,r5,4			// new jump position: shifted left 4
-
-	// calculate new jump offset
-	// new_off = old_off + (old_jump - new_jump)
-
-	l.sub	r5,r4,r5		// old_jump - new_jump
-	l.add	r5,r6,r5		// orig_off + (old_jump - new_jump)
-	l.srli	r5,r5,6			// new offset shifted right 2
-
-	// r5 is new jump offset
-					// l.j has opcode 0x0...
-	l.sw	TRAMP_SLOT_2(r3),r5	// write it back
-
-	l.j	trampoline_out
-	l.nop
-
-/* ----------------------------- */
-
-3:	// l.jalr
-
-	/* 19 20 aa aa	l.movhi r9,0xaaaa
-	 * a9 29 bb bb  l.ori	r9,0xbbbb
-	 *
-	 * where 0xaaaabbbb is EEA + 0x4 shifted right 2
-	 */
-
-	l.addi	r6,r2,0x4		// this is 0xaaaabbbb
-
-					// l.movhi r9,0xaaaa
-	l.ori	r5,r0,0x1920		// 0x1920 == l.movhi r9
-	l.sh	(TRAMP_SLOT_0+0x0)(r3),r5
-	l.srli	r5,r6,16
-	l.sh	(TRAMP_SLOT_0+0x2)(r3),r5
-
-					// l.ori   r9,0xbbbb
-	l.ori	r5,r0,0xa929		// 0xa929 == l.ori r9
-	l.sh	(TRAMP_SLOT_1+0x0)(r3),r5
-	l.andi	r5,r6,0xffff
-	l.sh	(TRAMP_SLOT_1+0x2)(r3),r5
-
-	l.lhz	r5,(TRAMP_SLOT_2+0x0)(r3)	// load hi part of jump instruction
-	l.andi	r5,r5,0x3ff		// clear out opcode part
-	l.ori	r5,r5,0x4400		// opcode changed from l.jalr -> l.jr
-	l.sh	(TRAMP_SLOT_2+0x0)(r3),r5 // write it back
-
-	/* falthrough */
-
-1:	// l.jr
-	l.j	trampoline_out
-	l.nop
-
-/* ----------------------------- */
-
-4:	// l.bnf
-5:	// l.bf
-	l.slli	r6,r4,6			// original offset shifted left 6 - 2
-//	l.srli	r6,r6,6			// original offset shifted right 2
-
-	l.slli	r4,r2,4			// old jump position: EEA shifted left 4
-//	l.srli	r4,r4,6			// old jump position: shifted right 2
-
-	l.addi	r5,r3,0xc		// new jump position (physical)
-	l.slli	r5,r5,4			// new jump position: shifted left 4
-
-	// calculate new jump offset
-	// new_off = old_off + (old_jump - new_jump)
-
-	l.add	r6,r6,r4		// (orig_off + old_jump)
-	l.sub	r6,r6,r5		// (orig_off + old_jump) - new_jump
-	l.srli	r6,r6,6			// new offset shifted right 2
-
-	// r6 is new jump offset
-	l.lwz	r4,(TRAMP_SLOT_2+0x0)(r3)	// load jump instruction
-	l.srli	r4,r4,16
-	l.andi	r4,r4,0xfc00		// get opcode part
-	l.slli	r4,r4,16
-	l.or	r6,r4,r6		// l.b(n)f new offset
-	l.sw	TRAMP_SLOT_2(r3),r6	// write it back
-
-	/* we need to add l.j to EEA + 0x8 */
-	tophys	(r4,r2)			// may not be needed (due to shifts down_
-	l.addi	r4,r4,(0x8 - 0x8)	// jump target = r2 + 0x8 (compensate for 0x8)
-					// jump position = r5 + 0x8 (0x8 compensated)
-	l.sub	r4,r4,r5		// jump offset = target - new_position + 0x8
-
-	l.slli	r4,r4,4			// the amount of info in imediate of jump
-	l.srli	r4,r4,6			// jump instruction with offset
-	l.sw	TRAMP_SLOT_4(r3),r4	// write it to 4th slot
-
-	/* fallthrough */
-
-trampoline_out:
-	// set up new EPC to point to our trampoline code
-	LOAD_SYMBOL_2_GPR(r5,_immu_trampoline)
-	l.mtspr	r0,r5,SPR_EPCR_BASE
-
-	// immu_trampoline is (4x) CACHE_LINE aligned
-	// and only 6 instructions long,
-	// so we need to invalidate only 2 lines
-
-	/* Establish cache block size
-	   If BS=0, 16;
-	   If BS=1, 32;
-	   r14 contain block size
-	*/
-	l.mfspr r21,r0,SPR_ICCFGR
-	l.andi	r21,r21,SPR_ICCFGR_CBS
-	l.srli	r21,r21,7
-	l.ori	r23,r0,16
-	l.sll	r14,r23,r21
-
-	l.mtspr	r0,r5,SPR_ICBIR
-	l.add	r5,r5,r14
-	l.mtspr	r0,r5,SPR_ICBIR
-
-	l.jr	r9
-	l.nop
-
-
-/*
- * DSCR: prints a string referenced by r3.
- *
- * PRMS: r3     	- address of the first character of null
- *			terminated string to be printed
- *
- * PREQ: UART at UART_BASE_ADD has to be initialized
- *
- * POST: caller should be aware that r3, r9 are changed
- */
-ENTRY(_emergency_print)
-	EMERGENCY_PRINT_STORE_GPR4
-	EMERGENCY_PRINT_STORE_GPR5
-	EMERGENCY_PRINT_STORE_GPR6
-	EMERGENCY_PRINT_STORE_GPR7
-2:
-	l.lbz	r7,0(r3)
-	l.sfeq	r7,r0
-	l.bf	9f
-	l.nop
-
-// putc:
-	l.movhi r4,hi(UART_BASE_ADD)
-
-	l.addi  r6,r0,0x20
-1:      l.lbz   r5,5(r4)
-	l.andi  r5,r5,0x20
-	l.sfeq  r5,r6
-	l.bnf   1b
-	l.nop
-
-	l.sb    0(r4),r7
-
-	l.addi  r6,r0,0x60
-1:      l.lbz   r5,5(r4)
-	l.andi  r5,r5,0x60
-	l.sfeq  r5,r6
-	l.bnf   1b
-	l.nop
-
-	/* next character */
-	l.j	2b
-	l.addi	r3,r3,0x1
-
-9:
-	EMERGENCY_PRINT_LOAD_GPR7
-	EMERGENCY_PRINT_LOAD_GPR6
-	EMERGENCY_PRINT_LOAD_GPR5
-	EMERGENCY_PRINT_LOAD_GPR4
-	l.jr	r9
-	l.nop
-
-ENTRY(_emergency_print_nr)
-	EMERGENCY_PRINT_STORE_GPR4
-	EMERGENCY_PRINT_STORE_GPR5
-	EMERGENCY_PRINT_STORE_GPR6
-	EMERGENCY_PRINT_STORE_GPR7
-	EMERGENCY_PRINT_STORE_GPR8
-
-	l.addi	r8,r0,32		// shift register
-
-1:	/* remove leading zeros */
-	l.addi	r8,r8,-0x4
-	l.srl	r7,r3,r8
-	l.andi	r7,r7,0xf
-
-	/* don't skip the last zero if number == 0x0 */
-	l.sfeqi	r8,0x4
-	l.bf	2f
-	l.nop
-
-	l.sfeq	r7,r0
-	l.bf	1b
-	l.nop
-
-2:
-	l.srl	r7,r3,r8
-
-	l.andi	r7,r7,0xf
-	l.sflts	r8,r0
-	l.bf	9f
-
-	l.sfgtui r7,0x9
-	l.bnf	8f
-	l.nop
-	l.addi	r7,r7,0x27
-
-8:
-	l.addi	r7,r7,0x30
-// putc:
-	l.movhi r4,hi(UART_BASE_ADD)
-
-	l.addi  r6,r0,0x20
-1:      l.lbz   r5,5(r4)
-	l.andi  r5,r5,0x20
-	l.sfeq  r5,r6
-	l.bnf   1b
-	l.nop
-
-	l.sb    0(r4),r7
-
-	l.addi  r6,r0,0x60
-1:      l.lbz   r5,5(r4)
-	l.andi  r5,r5,0x60
-	l.sfeq  r5,r6
-	l.bnf   1b
-	l.nop
-
-	/* next character */
-	l.j	2b
-	l.addi	r8,r8,-0x4
-
-9:
-	EMERGENCY_PRINT_LOAD_GPR8
-	EMERGENCY_PRINT_LOAD_GPR7
-	EMERGENCY_PRINT_LOAD_GPR6
-	EMERGENCY_PRINT_LOAD_GPR5
-	EMERGENCY_PRINT_LOAD_GPR4
-	l.jr	r9
-	l.nop
-
-
-/*
- * This should be used for debugging only.
- * It messes up the Linux early serial output
- * somehow, so use it sparingly and essentially
- * only if you need to debug something that goes wrong
- * before Linux gets the early serial going.
- *
- * Furthermore, you'll have to make sure you set the
- * UART_DEVISOR correctly according to the system
- * clock rate.
- *
- *
- */
-
-
-
-#define SYS_CLK            20000000
-//#define SYS_CLK            1843200
-#define OR32_CONSOLE_BAUD  115200
-#define UART_DIVISOR       SYS_CLK/(16*OR32_CONSOLE_BAUD)
-
-ENTRY(_early_uart_init)
-	l.movhi	r3,hi(UART_BASE_ADD)
-
-	l.addi	r4,r0,0x7
-	l.sb	0x2(r3),r4
-
-	l.addi	r4,r0,0x0
-	l.sb	0x1(r3),r4
-
-	l.addi	r4,r0,0x3
-	l.sb	0x3(r3),r4
-
-	l.lbz	r5,3(r3)
-	l.ori	r4,r5,0x80
-	l.sb	0x3(r3),r4
-	l.addi	r4,r0,((UART_DIVISOR>>8) & 0x000000ff)
-	l.sb	UART_DLM(r3),r4
-	l.addi  r4,r0,((UART_DIVISOR) & 0x000000ff)
-	l.sb	UART_DLL(r3),r4
-	l.sb	0x3(r3),r5
-
-	l.jr	r9
-	l.nop
-
-	.align	0x1000
-	.global _secondary_evbar
-_secondary_evbar:
-
-	.space 0x800
-	/* Just disable interrupts and Return */
-	l.ori	r3,r0,SPR_SR_SM
-	l.mtspr	r0,r3,SPR_ESR_BASE
-	l.rfe
-
-
-	.section .rodata
-_string_unhandled_exception:
-	.string "\n\rRunarunaround: Unhandled exception 0x\0"
-
-_string_epc_prefix:
-	.string ": EPC=0x\0"
-
-_string_nl:
-	.string "\n\r\0"
-
-
-/* ========================================[ page aligned structures ]=== */
-
-/*
- * .data section should be page aligned
- *	(look into arch/openrisc/kernel/vmlinux.lds.S)
- */
-	.section .data,"aw"
-	.align	8192
-	.global  empty_zero_page
-empty_zero_page:
-	.space  8192
-
-	.global  swapper_pg_dir
-swapper_pg_dir:
-	.space  8192
-
-	.global	_unhandled_stack
-_unhandled_stack:
-	.space	8192
-_unhandled_stack_top:
-
-/* ============================================================[ EOF ]=== */
diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S
deleted file mode 100644
index 2e2c72c157f3caecc0dba26729397017776196ce..0000000000000000000000000000000000000000
--- a/arch/openrisc/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,122 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * OpenRISC vmlinux.lds.S
- *
- * Linux architectural port borrowing liberally from similar works of
- * others.  All original copyrights apply as per the original source
- * declaration.
- *
- * Modifications for the OpenRISC architecture:
- * Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
- * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
- *
- * ld script for OpenRISC architecture
- */
-
-/* TODO
- *		- clean up __offset & stuff
- *		- change all 8192 alignment to PAGE !!!
- *		- recheck if all alignments are really needed
- */
-
-#  define LOAD_OFFSET  PAGE_OFFSET
-#  define LOAD_BASE    PAGE_OFFSET
-
-#include <asm/page.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-#include <asm-generic/vmlinux.lds.h>
-
-#ifdef __OR1K__
-#define __OUTPUT_FORMAT        "elf32-or1k"
-#else
-#define __OUTPUT_FORMAT        "elf32-or32"
-#endif
-
-OUTPUT_FORMAT(__OUTPUT_FORMAT, __OUTPUT_FORMAT, __OUTPUT_FORMAT)
-jiffies = jiffies_64 + 4;
-
-SECTIONS
-{
-        /* Read-only sections, merged into text segment: */
-        . = LOAD_BASE ;
-
-	_text = .;
-
-	/* _s_kernel_ro must be page aligned */
-	. = ALIGN(PAGE_SIZE);
-	_s_kernel_ro = .;
-
-        .text                   : AT(ADDR(.text) - LOAD_OFFSET)
-	{
-          _stext = .;
-	  TEXT_TEXT
-	  SCHED_TEXT
-	  CPUIDLE_TEXT
-	  LOCK_TEXT
-	  KPROBES_TEXT
-	  IRQENTRY_TEXT
-	  SOFTIRQENTRY_TEXT
-	  *(.fixup)
-	  *(.text.__*)
-	  _etext = .;
-	}
-	/* TODO: Check if fixup and text.__* are really necessary
-	 * fixup is definitely necessary
-	 */
-
-	_sdata = .;
-
-	/* Page alignment required for RO_DATA_SECTION */
-	RO_DATA_SECTION(PAGE_SIZE)
-	_e_kernel_ro = .;
-
-	/* Whatever comes after _e_kernel_ro had better be page-aligend, too */
-
-	/* 32 here is cacheline size... recheck this */
-	RW_DATA_SECTION(32, PAGE_SIZE, PAGE_SIZE)
-
-        _edata  =  .;
-
-	EXCEPTION_TABLE(4)
-	NOTES
-
-	/* Init code and data */
-	. = ALIGN(PAGE_SIZE);
-	__init_begin = .;
-
-	HEAD_TEXT_SECTION
-
-	/* Page aligned */
-	INIT_TEXT_SECTION(PAGE_SIZE)
-
-	/* Align __setup_start on 16 byte boundary */
-	INIT_DATA_SECTION(16)
-
-	PERCPU_SECTION(L1_CACHE_BYTES)
-
-        __init_end = .;
-
-	. = ALIGN(PAGE_SIZE);
-	.initrd			: AT(ADDR(.initrd) - LOAD_OFFSET)
-	{
-		__initrd_start = .;
-		*(.initrd)
-		__initrd_end = .;
-		FILL (0);
-                . = ALIGN (PAGE_SIZE);
-	}
-
-        __vmlinux_end = .;            /* last address of the physical file */
-
-	BSS_SECTION(0, 0, 0x20)
-
-        _end = .;
-
-	/* Throw in the debugging sections */
-	STABS_DEBUG
-	DWARF_DEBUG
-
-        /* Sections to be discarded -- must be last */
-	DISCARDS
-}
diff --git a/arch/openrisc/lib/memset.S b/arch/openrisc/lib/memset.S
deleted file mode 100644
index c3ac2a8b68d37fdd9c37b9876d3e6660161b3af8..0000000000000000000000000000000000000000
--- a/arch/openrisc/lib/memset.S
+++ /dev/null
@@ -1,94 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * OpenRISC memset.S
- *
- * Hand-optimized assembler version of memset for OpenRISC.
- * Algorithm inspired by several other arch-specific memset routines
- * in the kernel tree
- *
- * Copyright (C) 2015 Olof Kindgren <olof.kindgren@gmail.com>
- */
-
-	.global memset
-	.type	memset, @function
-memset:
-	/* arguments:
-	 * r3 = *s
-	 * r4 = c
-	 * r5 = n
-	 * r13, r15, r17, r19 used as temp regs
-	*/
-
-	/* Exit if n == 0 */
-	l.sfeqi		r5, 0
-	l.bf		4f
-
-	/* Truncate c to char */
-	l.andi  	r13, r4, 0xff
-
-	/* Skip word extension if c is 0 */
-	l.sfeqi		r13, 0
-	l.bf		1f
-	/* Check for at least two whole words (8 bytes) */
-	 l.sfleui	r5, 7
-
-	/* Extend char c to 32-bit word cccc in r13 */
-	l.slli		r15, r13, 16  // r13 = 000c, r15 = 0c00
-	l.or		r13, r13, r15 // r13 = 0c0c, r15 = 0c00
-	l.slli		r15, r13, 8   // r13 = 0c0c, r15 = c0c0
-	l.or		r13, r13, r15 // r13 = cccc, r15 = c0c0
-
-1:	l.addi		r19, r3, 0 // Set r19 = src
-	/* Jump to byte copy loop if less than two words */
-	l.bf		3f
-	 l.or		r17, r5, r0 // Set r17 = n
-
-	/* Mask out two LSBs to check alignment */
-	l.andi		r15, r3, 0x3
-
-	/* lsb == 00, jump to word copy loop */
-	l.sfeqi		r15, 0
-	l.bf		2f
-	 l.addi		r19, r3, 0 // Set r19 = src
-
-	/* lsb == 01,10 or 11 */
-	l.sb		0(r3), r13   // *src = c
-	l.addi		r17, r17, -1 // Decrease n
-
-	l.sfeqi		r15, 3
-	l.bf		2f
-	 l.addi		r19, r3, 1  // src += 1
-
-	/* lsb == 01 or 10 */
-	l.sb		1(r3), r13   // *(src+1) = c
-	l.addi		r17, r17, -1 // Decrease n
-
-	l.sfeqi		r15, 2
-	l.bf		2f
-	 l.addi		r19, r3, 2  // src += 2
-
-	/* lsb == 01 */
-	l.sb		2(r3), r13   // *(src+2) = c
-	l.addi		r17, r17, -1 // Decrease n
-	l.addi		r19, r3, 3   // src += 3
-
-	/* Word copy loop */
-2:	l.sw		0(r19), r13  // *src = cccc
-	l.addi		r17, r17, -4 // Decrease n
-	l.sfgeui	r17, 4
-	l.bf		2b
-	 l.addi		r19, r19, 4  // Increase src
-
-	/* When n > 0, copy the remaining bytes, otherwise jump to exit */
-	l.sfeqi		r17, 0
-	l.bf		4f
-
-	/* Byte copy loop */
-3:	l.addi		r17, r17, -1 // Decrease n
-	l.sb		0(r19), r13  // *src = cccc
-	l.sfnei		r17, 0
-	l.bf		3b
-	 l.addi		r19, r19, 1  // Increase src
-
-4:	l.jr		r9
-	 l.ori		r11, r3, 0
diff --git a/arch/openrisc/lib/string.S b/arch/openrisc/lib/string.S
deleted file mode 100644
index 74046f2eb67520ac81cf457e53a0a19573e92991..0000000000000000000000000000000000000000
--- a/arch/openrisc/lib/string.S
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * OpenRISC string.S
- *
- * Linux architectural port borrowing liberally from similar works of
- * others.  All original copyrights apply as per the original source
- * declaration.
- *
- * Modifications for the OpenRISC architecture:
- * Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
- * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
- */
-
-#include <linux/linkage.h>
-#include <asm/errno.h>
-
-	/*
-	 * this can be optimized by doing gcc inline assemlby with
-	 * proper constraints (no need to save args registers...)
-	 *
-	 */
-
-
-/*
- *
- * int __copy_tofrom_user(void *to, const void *from, unsigned long size);
- *
- * NOTE: it returns number of bytes NOT copied !!!
- *
- */
-	.global	__copy_tofrom_user
-__copy_tofrom_user:
-	l.addi  r1,r1,-12
-	l.sw    0(r1),r6
-	l.sw    4(r1),r4
-	l.sw    8(r1),r3
-
-	l.addi  r11,r5,0
-2:  	l.sfeq  r11,r0
-	l.bf    1f
-	l.addi  r11,r11,-1
-8:    	l.lbz   r6,0(r4)
-9:    	l.sb    0(r3),r6
-	l.addi  r3,r3,1
-	l.j     2b
-	l.addi  r4,r4,1
-1:
-	l.addi  r11,r11,1               // r11 holds the return value
-
-	l.lwz   r6,0(r1)
-	l.lwz   r4,4(r1)
-	l.lwz   r3,8(r1)
-	l.jr    r9
-	l.addi  r1,r1,12
-
-	.section .fixup, "ax"
-99:
-		l.j     1b
-		l.nop
-	.previous
-
-	.section __ex_table, "a"
-		.long 8b, 99b		// read fault
-		.long 9b, 99b		// write fault
-	.previous
-
-/*
- * unsigned long clear_user(void *addr, unsigned long size) ;
- *
- * NOTE: it returns number of bytes NOT cleared !!!
- */
-	.global	__clear_user
-__clear_user:
-	l.addi  r1,r1,-8
-	l.sw    0(r1),r4
-	l.sw    4(r1),r3
-
-2:	l.sfeq	r4,r0
-	l.bf	1f
-	l.addi	r4,r4,-1
-9:	l.sb	0(r3),r0
-	l.j	2b
-	l.addi  r3,r3,1
-
-1:
-	l.addi  r11,r4,1
-
-	l.lwz	r4,0(r1)
-	l.lwz	r3,4(r1)
-	l.jr	r9
-	l.addi	r1,r1,8
-
-	.section .fixup, "ax"
-99:
-		l.j     1b
-		l.nop
-	.previous
-
-	.section __ex_table, "a"
-		.long 9b, 99b		// write fault
-	.previous
diff --git a/arch/parisc/boot/compressed/head.S b/arch/parisc/boot/compressed/head.S
deleted file mode 100644
index e8b798fd0cf038d5b596d5d988a3c9e83e073a09..0000000000000000000000000000000000000000
--- a/arch/parisc/boot/compressed/head.S
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Startup glue code to uncompress the kernel
- *
- *   (C) 2017 Helge Deller <deller@gmx.de>
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/page.h>
-#include <asm/psw.h>
-#include <asm/pdc.h>
-#include <asm/assembly.h>
-#include "sizes.h"
-
-#define BOOTADDR(x)	(x)
-
-#ifndef CONFIG_64BIT
-	.import	$global$		/* forward declaration */
-#endif /*!CONFIG_64BIT*/
-
-	__HEAD
-
-ENTRY(startup)
-	 .level PA_ASM_LEVEL
-
-#define PSW_W_SM	0x200
-#define PSW_W_BIT       36
-
-	;! nuke the W bit, saving original value
-	.level 2.0
-	rsm	PSW_W_SM, %r1
-
-	.level 1.1
-	extrw,u	%r1, PSW_W_BIT-32, 1, %r1
-	copy	%r1, %arg0
-
-	/* Make sure sr4-sr7 are set to zero for the kernel address space */
-	mtsp    %r0,%sr4
-	mtsp    %r0,%sr5
-	mtsp    %r0,%sr6
-	mtsp    %r0,%sr7
-
-	/* Clear BSS */
-
-	.import _bss,data
-	.import _ebss,data
-
-	load32	BOOTADDR(_bss),%r3
-	load32	BOOTADDR(_ebss),%r4
-	ldo	FRAME_SIZE(%r4),%sp	/* stack at end of bss */
-$bss_loop:
-	cmpb,<<,n %r3,%r4,$bss_loop
-	stw,ma	%r0,4(%r3)
-
-	/* Initialize the global data pointer */
-	loadgp
-
-	/* arg0..arg4 were set by palo. */
-	copy	%arg1, %r6		/* command line */
-	copy	%arg2, %r7		/* rd-start */
-	copy	%arg3, %r8		/* rd-end */
-	load32	BOOTADDR(decompress_kernel),%r3
-
-#ifdef CONFIG_64BIT
-	.level PA_ASM_LEVEL
-	ssm	PSW_W_SM, %r0		/* set W-bit */
-	depdi	0, 31, 32, %r3
-#endif
-	load32	BOOTADDR(startup_continue), %r2
-	bv,n	0(%r3)
-
-startup_continue:
-#ifdef CONFIG_64BIT
-	.level PA_ASM_LEVEL
-	rsm	PSW_W_SM, %r0		/* clear W-bit */
-#endif
-
-	load32	KERNEL_BINARY_TEXT_START, %arg0 /* free mem */
-	copy	%r6, %arg1		/* command line */
-	copy	%r7, %arg2		/* rd-start */
-	copy	%r8, %arg3		/* rd-end */
-
-	bv,n	0(%ret0)
-END(startup)
diff --git a/arch/parisc/boot/compressed/vmlinux.lds.S b/arch/parisc/boot/compressed/vmlinux.lds.S
deleted file mode 100644
index 2ac3a643f2eb3cc2b4846296dd5ba6dfc9c1ff56..0000000000000000000000000000000000000000
--- a/arch/parisc/boot/compressed/vmlinux.lds.S
+++ /dev/null
@@ -1,105 +0,0 @@
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/page.h>
-#include "sizes.h"
-
-#ifndef CONFIG_64BIT
-OUTPUT_FORMAT("elf32-hppa-linux")
-OUTPUT_ARCH(hppa)
-#else
-OUTPUT_FORMAT("elf64-hppa-linux")
-OUTPUT_ARCH(hppa:hppa2.0w)
-#endif
-
-ENTRY(startup)
-
-SECTIONS
-{
-	/* palo loads at 0x60000 */
-	/* loaded kernel will move to 0x10000 */
-	. = 0xe0000;    /* should not overwrite palo code */
-
-	.head.text : {
-		_head = . ;
-		HEAD_TEXT
-		_ehead = . ;
-	}
-
-	/* keep __gp below 0x1000000 */
-#ifdef CONFIG_64BIT
-	. = ALIGN(16);
-	/* Linkage tables */
-	.opd : {
-		__start_opd = .;
-		*(.opd)
-		__end_opd = .;
-	} PROVIDE (__gp = .);
-	.plt : {
-		*(.plt)
-	}
-	.dlt : {
-		*(.dlt)
-	}
-#endif
-	_startcode_end = .;
-
-	/* vmlinux.bin.gz is here */
-	. = ALIGN(8);
-	.rodata.compressed : {
-		*(.rodata.compressed)
-	}
-
-	/* bootloader code and data starts at least behind area of extracted kernel */
-	. = MAX(ABSOLUTE(.), (SZ_end - SZparisc_kernel_start + KERNEL_BINARY_TEXT_START));
-
-	/* align on next page boundary */
-	. = ALIGN(4096);
-	.text :	{
-		_text = .;	/* Text */
-		*(.text)
-		*(.text.*)
-		_etext = . ;
-	}
-	. = ALIGN(8);
-	.data :	{
-		_data = . ;
-		*(.data)
-		*(.data.*)
-		_edata = . ;
-	}
-	. = ALIGN(8);
-	.rodata : {
-		_rodata = . ;
-		*(.rodata)	 /* read-only data */
-		*(.rodata.*)
-		_erodata = . ;
-	}
-	. = ALIGN(8);
-	.bss : {
-		_bss = . ;
-		*(.bss)
-		*(.bss.*)
-		*(COMMON)
-		. = ALIGN(4096);
-		_ebss = .;
-	}
-
-	STABS_DEBUG
-	.note 0 : { *(.note) }
-
-	/* Sections to be discarded */
-	DISCARDS
-	/DISCARD/ : {
-#ifdef CONFIG_64BIT
-		/* temporary hack until binutils is fixed to not emit these
-		 * for static binaries
-		 */
-		*(.PARISC.unwind)	/* no unwind data */
-		*(.interp)
-		*(.dynsym)
-		*(.dynstr)
-		*(.dynamic)
-		*(.hash)
-		*(.gnu.hash)
-#endif
-	}
-}
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
deleted file mode 100644
index 873bf3434da94b2203ded124e26831d095eba1c3..0000000000000000000000000000000000000000
--- a/arch/parisc/kernel/entry.S
+++ /dev/null
@@ -1,2423 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Linux/PA-RISC Project (http://www.parisc-linux.org/)
- *
- * kernel entry points (interruptions, system call wrappers)
- *  Copyright (C) 1999,2000 Philipp Rumpf 
- *  Copyright (C) 1999 SuSE GmbH Nuernberg 
- *  Copyright (C) 2000 Hewlett-Packard (John Marvin)
- *  Copyright (C) 1999 Hewlett-Packard (Frank Rowand)
- */
-
-#include <asm/asm-offsets.h>
-
-/* we have the following possibilities to act on an interruption:
- *  - handle in assembly and use shadowed registers only
- *  - save registers to kernel stack and handle in assembly or C */
-
-
-#include <asm/psw.h>
-#include <asm/cache.h>		/* for L1_CACHE_SHIFT */
-#include <asm/assembly.h>	/* for LDREG/STREG defines */
-#include <asm/pgtable.h>
-#include <asm/signal.h>
-#include <asm/unistd.h>
-#include <asm/ldcw.h>
-#include <asm/traps.h>
-#include <asm/thread_info.h>
-#include <asm/alternative.h>
-
-#include <linux/linkage.h>
-
-#ifdef CONFIG_64BIT
-	.level 2.0w
-#else
-	.level 2.0
-#endif
-
-	.import		pa_tlb_lock,data
-	.macro  load_pa_tlb_lock reg
-	mfctl		%cr25,\reg
-	addil		L%(PAGE_SIZE << (PGD_ALLOC_ORDER - 1)),\reg
-	.endm
-
-	/* space_to_prot macro creates a prot id from a space id */
-
-#if (SPACEID_SHIFT) == 0
-	.macro  space_to_prot spc prot
-	depd,z  \spc,62,31,\prot
-	.endm
-#else
-	.macro  space_to_prot spc prot
-	extrd,u \spc,(64 - (SPACEID_SHIFT)),32,\prot
-	.endm
-#endif
-
-	/* Switch to virtual mapping, trashing only %r1 */
-	.macro  virt_map
-	/* pcxt_ssm_bug */
-	rsm	PSW_SM_I, %r0	/* barrier for "Relied upon Translation */
-	mtsp	%r0, %sr4
-	mtsp	%r0, %sr5
-	mtsp	%r0, %sr6
-	tovirt_r1 %r29
-	load32	KERNEL_PSW, %r1
-
-	rsm     PSW_SM_QUIET,%r0	/* second "heavy weight" ctl op */
-	mtctl	%r0, %cr17	/* Clear IIASQ tail */
-	mtctl	%r0, %cr17	/* Clear IIASQ head */
-	mtctl	%r1, %ipsw
-	load32	4f, %r1
-	mtctl	%r1, %cr18	/* Set IIAOQ tail */
-	ldo	4(%r1), %r1
-	mtctl	%r1, %cr18	/* Set IIAOQ head */
-	rfir
-	nop
-4:
-	.endm
-
-	/*
-	 * The "get_stack" macros are responsible for determining the
-	 * kernel stack value.
-	 *
-	 *      If sr7 == 0
-	 *          Already using a kernel stack, so call the
-	 *          get_stack_use_r30 macro to push a pt_regs structure
-	 *          on the stack, and store registers there.
-	 *      else
-	 *          Need to set up a kernel stack, so call the
-	 *          get_stack_use_cr30 macro to set up a pointer
-	 *          to the pt_regs structure contained within the
-	 *          task pointer pointed to by cr30. Set the stack
-	 *          pointer to point to the end of the task structure.
-	 *
-	 * Note that we use shadowed registers for temps until
-	 * we can save %r26 and %r29. %r26 is used to preserve
-	 * %r8 (a shadowed register) which temporarily contained
-	 * either the fault type ("code") or the eirr. We need
-	 * to use a non-shadowed register to carry the value over
-	 * the rfir in virt_map. We use %r26 since this value winds
-	 * up being passed as the argument to either do_cpu_irq_mask
-	 * or handle_interruption. %r29 is used to hold a pointer
-	 * the register save area, and once again, it needs to
-	 * be a non-shadowed register so that it survives the rfir.
-	 *
-	 * N.B. TASK_SZ_ALGN and PT_SZ_ALGN include space for a stack frame.
-	 */
-
-	.macro  get_stack_use_cr30
-
-	/* we save the registers in the task struct */
-
-	copy	%r30, %r17
-	mfctl   %cr30, %r1
-	ldo	THREAD_SZ_ALGN(%r1), %r30
-	mtsp	%r0,%sr7
-	mtsp	%r16,%sr3
-	tophys  %r1,%r9
-	LDREG	TI_TASK(%r9), %r1	/* thread_info -> task_struct */
-	tophys  %r1,%r9
-	ldo     TASK_REGS(%r9),%r9
-	STREG   %r17,PT_GR30(%r9)
-	STREG   %r29,PT_GR29(%r9)
-	STREG   %r26,PT_GR26(%r9)
-	STREG	%r16,PT_SR7(%r9)
-	copy    %r9,%r29
-	.endm
-
-	.macro  get_stack_use_r30
-
-	/* we put a struct pt_regs on the stack and save the registers there */
-
-	tophys  %r30,%r9
-	copy	%r30,%r1
-	ldo	PT_SZ_ALGN(%r30),%r30
-	STREG   %r1,PT_GR30(%r9)
-	STREG   %r29,PT_GR29(%r9)
-	STREG   %r26,PT_GR26(%r9)
-	STREG	%r16,PT_SR7(%r9)
-	copy    %r9,%r29
-	.endm
-
-	.macro  rest_stack
-	LDREG   PT_GR1(%r29), %r1
-	LDREG   PT_GR30(%r29),%r30
-	LDREG   PT_GR29(%r29),%r29
-	.endm
-
-	/* default interruption handler
-	 * (calls traps.c:handle_interruption) */
-	.macro	def code
-	b	intr_save
-	ldi     \code, %r8
-	.align	32
-	.endm
-
-	/* Interrupt interruption handler
-	 * (calls irq.c:do_cpu_irq_mask) */
-	.macro	extint code
-	b	intr_extint
-	mfsp    %sr7,%r16
-	.align	32
-	.endm	
-
-	.import	os_hpmc, code
-
-	/* HPMC handler */
-	.macro	hpmc code
-	nop			/* must be a NOP, will be patched later */
-	load32	PA(os_hpmc), %r3
-	bv,n	0(%r3)
-	nop
-	.word	0		/* checksum (will be patched) */
-	.word	0		/* address of handler */
-	.word	0		/* length of handler */
-	.endm
-
-	/*
-	 * Performance Note: Instructions will be moved up into
-	 * this part of the code later on, once we are sure
-	 * that the tlb miss handlers are close to final form.
-	 */
-
-	/* Register definitions for tlb miss handler macros */
-
-	va  = r8	/* virtual address for which the trap occurred */
-	spc = r24	/* space for which the trap occurred */
-
-#ifndef CONFIG_64BIT
-
-	/*
-	 * itlb miss interruption handler (parisc 1.1 - 32 bit)
-	 */
-
-	.macro	itlb_11 code
-
-	mfctl	%pcsq, spc
-	b	itlb_miss_11
-	mfctl	%pcoq, va
-
-	.align		32
-	.endm
-#endif
-	
-	/*
-	 * itlb miss interruption handler (parisc 2.0)
-	 */
-
-	.macro	itlb_20 code
-	mfctl	%pcsq, spc
-#ifdef CONFIG_64BIT
-	b       itlb_miss_20w
-#else
-	b	itlb_miss_20
-#endif
-	mfctl	%pcoq, va
-
-	.align		32
-	.endm
-	
-#ifndef CONFIG_64BIT
-	/*
-	 * naitlb miss interruption handler (parisc 1.1 - 32 bit)
-	 */
-
-	.macro	naitlb_11 code
-
-	mfctl	%isr,spc
-	b	naitlb_miss_11
-	mfctl 	%ior,va
-
-	.align		32
-	.endm
-#endif
-	
-	/*
-	 * naitlb miss interruption handler (parisc 2.0)
-	 */
-
-	.macro	naitlb_20 code
-
-	mfctl	%isr,spc
-#ifdef CONFIG_64BIT
-	b       naitlb_miss_20w
-#else
-	b	naitlb_miss_20
-#endif
-	mfctl 	%ior,va
-
-	.align		32
-	.endm
-	
-#ifndef CONFIG_64BIT
-	/*
-	 * dtlb miss interruption handler (parisc 1.1 - 32 bit)
-	 */
-
-	.macro	dtlb_11 code
-
-	mfctl	%isr, spc
-	b	dtlb_miss_11
-	mfctl	%ior, va
-
-	.align		32
-	.endm
-#endif
-
-	/*
-	 * dtlb miss interruption handler (parisc 2.0)
-	 */
-
-	.macro	dtlb_20 code
-
-	mfctl	%isr, spc
-#ifdef CONFIG_64BIT
-	b       dtlb_miss_20w
-#else
-	b	dtlb_miss_20
-#endif
-	mfctl	%ior, va
-
-	.align		32
-	.endm
-	
-#ifndef CONFIG_64BIT
-	/* nadtlb miss interruption handler (parisc 1.1 - 32 bit) */
-
-	.macro	nadtlb_11 code
-
-	mfctl	%isr,spc
-	b       nadtlb_miss_11
-	mfctl	%ior,va
-
-	.align		32
-	.endm
-#endif
-	
-	/* nadtlb miss interruption handler (parisc 2.0) */
-
-	.macro	nadtlb_20 code
-
-	mfctl	%isr,spc
-#ifdef CONFIG_64BIT
-	b       nadtlb_miss_20w
-#else
-	b       nadtlb_miss_20
-#endif
-	mfctl	%ior,va
-
-	.align		32
-	.endm
-	
-#ifndef CONFIG_64BIT
-	/*
-	 * dirty bit trap interruption handler (parisc 1.1 - 32 bit)
-	 */
-
-	.macro	dbit_11 code
-
-	mfctl	%isr,spc
-	b	dbit_trap_11
-	mfctl	%ior,va
-
-	.align		32
-	.endm
-#endif
-
-	/*
-	 * dirty bit trap interruption handler (parisc 2.0)
-	 */
-
-	.macro	dbit_20 code
-
-	mfctl	%isr,spc
-#ifdef CONFIG_64BIT
-	b       dbit_trap_20w
-#else
-	b	dbit_trap_20
-#endif
-	mfctl	%ior,va
-
-	.align		32
-	.endm
-
-	/* In LP64, the space contains part of the upper 32 bits of the
-	 * fault.  We have to extract this and place it in the va,
-	 * zeroing the corresponding bits in the space register */
-	.macro		space_adjust	spc,va,tmp
-#ifdef CONFIG_64BIT
-	extrd,u		\spc,63,SPACEID_SHIFT,\tmp
-	depd		%r0,63,SPACEID_SHIFT,\spc
-	depd		\tmp,31,SPACEID_SHIFT,\va
-#endif
-	.endm
-
-	.import		swapper_pg_dir,code
-
-	/* Get the pgd.  For faults on space zero (kernel space), this
-	 * is simply swapper_pg_dir.  For user space faults, the
-	 * pgd is stored in %cr25 */
-	.macro		get_pgd		spc,reg
-	ldil		L%PA(swapper_pg_dir),\reg
-	ldo		R%PA(swapper_pg_dir)(\reg),\reg
-	or,COND(=)	%r0,\spc,%r0
-	mfctl		%cr25,\reg
-	.endm
-
-	/* 
-		space_check(spc,tmp,fault)
-
-		spc - The space we saw the fault with.
-		tmp - The place to store the current space.
-		fault - Function to call on failure.
-
-		Only allow faults on different spaces from the
-		currently active one if we're the kernel 
-
-	*/
-	.macro		space_check	spc,tmp,fault
-	mfsp		%sr7,\tmp
-	/* check against %r0 which is same value as LINUX_GATEWAY_SPACE */
-	or,COND(<>)	%r0,\spc,%r0	/* user may execute gateway page
-					 * as kernel, so defeat the space
-					 * check if it is */
-	copy		\spc,\tmp
-	or,COND(=)	%r0,\tmp,%r0	/* nullify if executing as kernel */
-	cmpb,COND(<>),n	\tmp,\spc,\fault
-	.endm
-
-	/* Look up a PTE in a 2-Level scheme (faulting at each
-	 * level if the entry isn't present 
-	 *
-	 * NOTE: we use ldw even for LP64, since the short pointers
-	 * can address up to 1TB
-	 */
-	.macro		L2_ptep	pmd,pte,index,va,fault
-#if CONFIG_PGTABLE_LEVELS == 3
-	extru		\va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index
-#else
-# if defined(CONFIG_64BIT)
-	extrd,u		\va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
-  #else
-  # if PAGE_SIZE > 4096
-	extru		\va,31-ASM_PGDIR_SHIFT,32-ASM_PGDIR_SHIFT,\index
-  # else
-	extru		\va,31-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
-  # endif
-# endif
-#endif
-	dep             %r0,31,PAGE_SHIFT,\pmd  /* clear offset */
-	copy		%r0,\pte
-	ldw,s		\index(\pmd),\pmd
-	bb,>=,n		\pmd,_PxD_PRESENT_BIT,\fault
-	dep		%r0,31,PxD_FLAG_SHIFT,\pmd /* clear flags */
-	SHLREG		\pmd,PxD_VALUE_SHIFT,\pmd
-	extru		\va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index
-	dep		%r0,31,PAGE_SHIFT,\pmd  /* clear offset */
-	shladd		\index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */
-	.endm
-
-	/* Look up PTE in a 3-Level scheme.
-	 *
-	 * Here we implement a Hybrid L2/L3 scheme: we allocate the
-	 * first pmd adjacent to the pgd.  This means that we can
-	 * subtract a constant offset to get to it.  The pmd and pgd
-	 * sizes are arranged so that a single pmd covers 4GB (giving
-	 * a full LP64 process access to 8TB) so our lookups are
-	 * effectively L2 for the first 4GB of the kernel (i.e. for
-	 * all ILP32 processes and all the kernel for machines with
-	 * under 4GB of memory) */
-	.macro		L3_ptep pgd,pte,index,va,fault
-#if CONFIG_PGTABLE_LEVELS == 3 /* we might have a 2-Level scheme, e.g. with 16kb page size */
-	extrd,u		\va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
-	extrd,u,*=	\va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
-	ldw,s		\index(\pgd),\pgd
-	extrd,u,*=	\va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
-	bb,>=,n		\pgd,_PxD_PRESENT_BIT,\fault
-	extrd,u,*=	\va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
-	shld		\pgd,PxD_VALUE_SHIFT,\index
-	extrd,u,*=	\va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
-	copy		\index,\pgd
-	extrd,u,*<>	\va,63-ASM_PGDIR_SHIFT,64-ASM_PGDIR_SHIFT,%r0
-	ldo		ASM_PGD_PMD_OFFSET(\pgd),\pgd
-#endif
-	L2_ptep		\pgd,\pte,\index,\va,\fault
-	.endm
-
-	/* Acquire pa_tlb_lock lock and check page is present. */
-	.macro		tlb_lock	spc,ptp,pte,tmp,tmp1,fault
-#ifdef CONFIG_SMP
-98:	cmpib,COND(=),n	0,\spc,2f
-	load_pa_tlb_lock \tmp
-1:	LDCW		0(\tmp),\tmp1
-	cmpib,COND(=)	0,\tmp1,1b
-	nop
-	LDREG		0(\ptp),\pte
-	bb,<,n		\pte,_PAGE_PRESENT_BIT,3f
-	b		\fault
-	stw		\spc,0(\tmp)
-99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
-#endif
-2:	LDREG		0(\ptp),\pte
-	bb,>=,n		\pte,_PAGE_PRESENT_BIT,\fault
-3:
-	.endm
-
-	/* Release pa_tlb_lock lock without reloading lock address.
-	   Note that the values in the register spc are limited to
-	   NR_SPACE_IDS (262144). Thus, the stw instruction always
-	   stores a nonzero value even when register spc is 64 bits.
-	   We use an ordered store to ensure all prior accesses are
-	   performed prior to releasing the lock. */
-	.macro		tlb_unlock0	spc,tmp
-#ifdef CONFIG_SMP
-98:	or,COND(=)	%r0,\spc,%r0
-	stw,ma		\spc,0(\tmp)
-99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
-#endif
-	.endm
-
-	/* Release pa_tlb_lock lock. */
-	.macro		tlb_unlock1	spc,tmp
-#ifdef CONFIG_SMP
-98:	load_pa_tlb_lock \tmp
-99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
-	tlb_unlock0	\spc,\tmp
-#endif
-	.endm
-
-	/* Set the _PAGE_ACCESSED bit of the PTE.  Be clever and
-	 * don't needlessly dirty the cache line if it was already set */
-	.macro		update_accessed	ptp,pte,tmp,tmp1
-	ldi		_PAGE_ACCESSED,\tmp1
-	or		\tmp1,\pte,\tmp
-	and,COND(<>)	\tmp1,\pte,%r0
-	STREG		\tmp,0(\ptp)
-	.endm
-
-	/* Set the dirty bit (and accessed bit).  No need to be
-	 * clever, this is only used from the dirty fault */
-	.macro		update_dirty	ptp,pte,tmp
-	ldi		_PAGE_ACCESSED|_PAGE_DIRTY,\tmp
-	or		\tmp,\pte,\pte
-	STREG		\pte,0(\ptp)
-	.endm
-
-	/* We have (depending on the page size):
-	 * - 38 to 52-bit Physical Page Number
-	 * - 12 to 26-bit page offset
-	 */
-	/* bitshift difference between a PFN (based on kernel's PAGE_SIZE)
-	 * to a CPU TLB 4k PFN (4k => 12 bits to shift) */
-	#define PAGE_ADD_SHIFT		(PAGE_SHIFT-12)
-	#define PAGE_ADD_HUGE_SHIFT	(REAL_HPAGE_SHIFT-12)
-
-	/* Drop prot bits and convert to page addr for iitlbt and idtlbt */
-	.macro		convert_for_tlb_insert20 pte,tmp
-#ifdef CONFIG_HUGETLB_PAGE
-	copy		\pte,\tmp
-	extrd,u		\tmp,(63-ASM_PFN_PTE_SHIFT)+(63-58)+PAGE_ADD_SHIFT,\
-				64-PAGE_SHIFT-PAGE_ADD_SHIFT,\pte
-
-	depdi		_PAGE_SIZE_ENCODING_DEFAULT,63,\
-				(63-58)+PAGE_ADD_SHIFT,\pte
-	extrd,u,*=	\tmp,_PAGE_HPAGE_BIT+32,1,%r0
-	depdi		_HUGE_PAGE_SIZE_ENCODING_DEFAULT,63,\
-				(63-58)+PAGE_ADD_HUGE_SHIFT,\pte
-#else /* Huge pages disabled */
-	extrd,u		\pte,(63-ASM_PFN_PTE_SHIFT)+(63-58)+PAGE_ADD_SHIFT,\
-				64-PAGE_SHIFT-PAGE_ADD_SHIFT,\pte
-	depdi		_PAGE_SIZE_ENCODING_DEFAULT,63,\
-				(63-58)+PAGE_ADD_SHIFT,\pte
-#endif
-	.endm
-
-	/* Convert the pte and prot to tlb insertion values.  How
-	 * this happens is quite subtle, read below */
-	.macro		make_insert_tlb	spc,pte,prot,tmp
-	space_to_prot   \spc \prot        /* create prot id from space */
-	/* The following is the real subtlety.  This is depositing
-	 * T <-> _PAGE_REFTRAP
-	 * D <-> _PAGE_DIRTY
-	 * B <-> _PAGE_DMB (memory break)
-	 *
-	 * Then incredible subtlety: The access rights are
-	 * _PAGE_GATEWAY, _PAGE_EXEC and _PAGE_WRITE
-	 * See 3-14 of the parisc 2.0 manual
-	 *
-	 * Finally, _PAGE_READ goes in the top bit of PL1 (so we
-	 * trigger an access rights trap in user space if the user
-	 * tries to read an unreadable page */
-	depd            \pte,8,7,\prot
-
-	/* PAGE_USER indicates the page can be read with user privileges,
-	 * so deposit X1|11 to PL1|PL2 (remember the upper bit of PL1
-	 * contains _PAGE_READ) */
-	extrd,u,*=      \pte,_PAGE_USER_BIT+32,1,%r0
-	depdi		7,11,3,\prot
-	/* If we're a gateway page, drop PL2 back to zero for promotion
-	 * to kernel privilege (so we can execute the page as kernel).
-	 * Any privilege promotion page always denys read and write */
-	extrd,u,*= 	\pte,_PAGE_GATEWAY_BIT+32,1,%r0
-	depd		%r0,11,2,\prot	/* If Gateway, Set PL2 to 0 */
-
-	/* Enforce uncacheable pages.
-	 * This should ONLY be use for MMIO on PA 2.0 machines.
-	 * Memory/DMA is cache coherent on all PA2.0 machines we support
-	 * (that means T-class is NOT supported) and the memory controllers
-	 * on most of those machines only handles cache transactions.
-	 */
-	extrd,u,*=	\pte,_PAGE_NO_CACHE_BIT+32,1,%r0
-	depdi		1,12,1,\prot
-
-	/* Drop prot bits and convert to page addr for iitlbt and idtlbt */
-	convert_for_tlb_insert20 \pte \tmp
-	.endm
-
-	/* Identical macro to make_insert_tlb above, except it
-	 * makes the tlb entry for the differently formatted pa11
-	 * insertion instructions */
-	.macro		make_insert_tlb_11	spc,pte,prot
-	zdep		\spc,30,15,\prot
-	dep		\pte,8,7,\prot
-	extru,=		\pte,_PAGE_NO_CACHE_BIT,1,%r0
-	depi		1,12,1,\prot
-	extru,=         \pte,_PAGE_USER_BIT,1,%r0
-	depi		7,11,3,\prot   /* Set for user space (1 rsvd for read) */
-	extru,= 	\pte,_PAGE_GATEWAY_BIT,1,%r0
-	depi		0,11,2,\prot	/* If Gateway, Set PL2 to 0 */
-
-	/* Get rid of prot bits and convert to page addr for iitlba */
-
-	depi		0,31,ASM_PFN_PTE_SHIFT,\pte
-	SHRREG		\pte,(ASM_PFN_PTE_SHIFT-(31-26)),\pte
-	.endm
-
-	/* This is for ILP32 PA2.0 only.  The TLB insertion needs
-	 * to extend into I/O space if the address is 0xfXXXXXXX
-	 * so we extend the f's into the top word of the pte in
-	 * this case */
-	.macro		f_extend	pte,tmp
-	extrd,s		\pte,42,4,\tmp
-	addi,<>		1,\tmp,%r0
-	extrd,s		\pte,63,25,\pte
-	.endm
-
-	/* The alias region is an 8MB aligned 16MB to do clear and
-	 * copy user pages at addresses congruent with the user
-	 * virtual address.
-	 *
-	 * To use the alias page, you set %r26 up with the to TLB
-	 * entry (identifying the physical page) and %r23 up with
-	 * the from tlb entry (or nothing if only a to entry---for
-	 * clear_user_page_asm) */
-	.macro		do_alias	spc,tmp,tmp1,va,pte,prot,fault,patype
-	cmpib,COND(<>),n 0,\spc,\fault
-	ldil		L%(TMPALIAS_MAP_START),\tmp
-#if defined(CONFIG_64BIT) && (TMPALIAS_MAP_START >= 0x80000000)
-	/* on LP64, ldi will sign extend into the upper 32 bits,
-	 * which is behaviour we don't want */
-	depdi		0,31,32,\tmp
-#endif
-	copy		\va,\tmp1
-	depi		0,31,23,\tmp1
-	cmpb,COND(<>),n	\tmp,\tmp1,\fault
-	mfctl		%cr19,\tmp	/* iir */
-	/* get the opcode (first six bits) into \tmp */
-	extrw,u		\tmp,5,6,\tmp
-	/*
-	 * Only setting the T bit prevents data cache movein
-	 * Setting access rights to zero prevents instruction cache movein
-	 *
-	 * Note subtlety here: _PAGE_GATEWAY, _PAGE_EXEC and _PAGE_WRITE go
-	 * to type field and _PAGE_READ goes to top bit of PL1
-	 */
-	ldi		(_PAGE_REFTRAP|_PAGE_READ|_PAGE_WRITE),\prot
-	/*
-	 * so if the opcode is one (i.e. this is a memory management
-	 * instruction) nullify the next load so \prot is only T.
-	 * Otherwise this is a normal data operation
-	 */
-	cmpiclr,=	0x01,\tmp,%r0
-	ldi		(_PAGE_DIRTY|_PAGE_READ|_PAGE_WRITE),\prot
-.ifc \patype,20
-	depd,z		\prot,8,7,\prot
-.else
-.ifc \patype,11
-	depw,z		\prot,8,7,\prot
-.else
-	.error "undefined PA type to do_alias"
-.endif
-.endif
-	/*
-	 * OK, it is in the temp alias region, check whether "from" or "to".
-	 * Check "subtle" note in pacache.S re: r23/r26.
-	 */
-#ifdef CONFIG_64BIT
-	extrd,u,*=	\va,41,1,%r0
-#else
-	extrw,u,=	\va,9,1,%r0
-#endif
-	or,COND(tr)	%r23,%r0,\pte
-	or		%r26,%r0,\pte
-	.endm 
-
-
-	/*
-	 * Fault_vectors are architecturally required to be aligned on a 2K
-	 * boundary
-	 */
-
-	.section .text.hot
-	.align 2048
-
-ENTRY(fault_vector_20)
-	/* First vector is invalid (0) */
-	.ascii	"cows can fly"
-	.byte 0
-	.align 32
-
-	hpmc		 1
-	def		 2
-	def		 3
-	extint		 4
-	def		 5
-	itlb_20		 PARISC_ITLB_TRAP
-	def		 7
-	def		 8
-	def              9
-	def		10
-	def		11
-	def		12
-	def		13
-	def		14
-	dtlb_20		15
-	naitlb_20	16
-	nadtlb_20	17
-	def		18
-	def		19
-	dbit_20		20
-	def		21
-	def		22
-	def		23
-	def		24
-	def		25
-	def		26
-	def		27
-	def		28
-	def		29
-	def		30
-	def		31
-END(fault_vector_20)
-
-#ifndef CONFIG_64BIT
-
-	.align 2048
-
-ENTRY(fault_vector_11)
-	/* First vector is invalid (0) */
-	.ascii	"cows can fly"
-	.byte 0
-	.align 32
-
-	hpmc		 1
-	def		 2
-	def		 3
-	extint		 4
-	def		 5
-	itlb_11		 PARISC_ITLB_TRAP
-	def		 7
-	def		 8
-	def              9
-	def		10
-	def		11
-	def		12
-	def		13
-	def		14
-	dtlb_11		15
-	naitlb_11	16
-	nadtlb_11	17
-	def		18
-	def		19
-	dbit_11		20
-	def		21
-	def		22
-	def		23
-	def		24
-	def		25
-	def		26
-	def		27
-	def		28
-	def		29
-	def		30
-	def		31
-END(fault_vector_11)
-
-#endif
-	/* Fault vector is separately protected and *must* be on its own page */
-	.align		PAGE_SIZE
-
-	.import		handle_interruption,code
-	.import		do_cpu_irq_mask,code
-
-	/*
-	 * Child Returns here
-	 *
-	 * copy_thread moved args into task save area.
-	 */
-
-ENTRY(ret_from_kernel_thread)
-	/* Call schedule_tail first though */
-	BL	schedule_tail, %r2
-	nop
-
-	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30), %r1
-	LDREG	TASK_PT_GR25(%r1), %r26
-#ifdef CONFIG_64BIT
-	LDREG	TASK_PT_GR27(%r1), %r27
-#endif
-	LDREG	TASK_PT_GR26(%r1), %r1
-	ble	0(%sr7, %r1)
-	copy	%r31, %r2
-	b	finish_child_return
-	nop
-END(ret_from_kernel_thread)
-
-
-	/*
-	 * struct task_struct *_switch_to(struct task_struct *prev,
-	 *	struct task_struct *next)
-	 *
-	 * switch kernel stacks and return prev */
-ENTRY_CFI(_switch_to)
-	STREG	 %r2, -RP_OFFSET(%r30)
-
-	callee_save_float
-	callee_save
-
-	load32	_switch_to_ret, %r2
-
-	STREG	%r2, TASK_PT_KPC(%r26)
-	LDREG	TASK_PT_KPC(%r25), %r2
-
-	STREG	%r30, TASK_PT_KSP(%r26)
-	LDREG	TASK_PT_KSP(%r25), %r30
-	LDREG	TASK_THREAD_INFO(%r25), %r25
-	bv	%r0(%r2)
-	mtctl   %r25,%cr30
-
-ENTRY(_switch_to_ret)
-	mtctl	%r0, %cr0		/* Needed for single stepping */
-	callee_rest
-	callee_rest_float
-
-	LDREG	-RP_OFFSET(%r30), %r2
-	bv	%r0(%r2)
-	copy	%r26, %r28
-ENDPROC_CFI(_switch_to)
-
-	/*
-	 * Common rfi return path for interruptions, kernel execve, and
-	 * sys_rt_sigreturn (sometimes).  The sys_rt_sigreturn syscall will
-	 * return via this path if the signal was received when the process
-	 * was running; if the process was blocked on a syscall then the
-	 * normal syscall_exit path is used.  All syscalls for traced
-	 * proceses exit via intr_restore.
-	 *
-	 * XXX If any syscalls that change a processes space id ever exit
-	 * this way, then we will need to copy %sr3 in to PT_SR[3..7], and
-	 * adjust IASQ[0..1].
-	 *
-	 */
-
-	.align	PAGE_SIZE
-
-ENTRY_CFI(syscall_exit_rfi)
-	mfctl   %cr30,%r16
-	LDREG	TI_TASK(%r16), %r16	/* thread_info -> task_struct */
-	ldo	TASK_REGS(%r16),%r16
-	/* Force iaoq to userspace, as the user has had access to our current
-	 * context via sigcontext. Also Filter the PSW for the same reason.
-	 */
-	LDREG	PT_IAOQ0(%r16),%r19
-	depi	3,31,2,%r19
-	STREG	%r19,PT_IAOQ0(%r16)
-	LDREG	PT_IAOQ1(%r16),%r19
-	depi	3,31,2,%r19
-	STREG	%r19,PT_IAOQ1(%r16)
-	LDREG   PT_PSW(%r16),%r19
-	load32	USER_PSW_MASK,%r1
-#ifdef CONFIG_64BIT
-	load32	USER_PSW_HI_MASK,%r20
-	depd    %r20,31,32,%r1
-#endif
-	and     %r19,%r1,%r19 /* Mask out bits that user shouldn't play with */
-	load32	USER_PSW,%r1
-	or      %r19,%r1,%r19 /* Make sure default USER_PSW bits are set */
-	STREG   %r19,PT_PSW(%r16)
-
-	/*
-	 * If we aren't being traced, we never saved space registers
-	 * (we don't store them in the sigcontext), so set them
-	 * to "proper" values now (otherwise we'll wind up restoring
-	 * whatever was last stored in the task structure, which might
-	 * be inconsistent if an interrupt occurred while on the gateway
-	 * page). Note that we may be "trashing" values the user put in
-	 * them, but we don't support the user changing them.
-	 */
-
-	STREG   %r0,PT_SR2(%r16)
-	mfsp    %sr3,%r19
-	STREG   %r19,PT_SR0(%r16)
-	STREG   %r19,PT_SR1(%r16)
-	STREG   %r19,PT_SR3(%r16)
-	STREG   %r19,PT_SR4(%r16)
-	STREG   %r19,PT_SR5(%r16)
-	STREG   %r19,PT_SR6(%r16)
-	STREG   %r19,PT_SR7(%r16)
-
-ENTRY(intr_return)
-	/* check for reschedule */
-	mfctl   %cr30,%r1
-	LDREG   TI_FLAGS(%r1),%r19	/* sched.h: TIF_NEED_RESCHED */
-	bb,<,n	%r19,31-TIF_NEED_RESCHED,intr_do_resched /* forward */
-
-	.import do_notify_resume,code
-intr_check_sig:
-	/* As above */
-	mfctl   %cr30,%r1
-	LDREG	TI_FLAGS(%r1),%r19
-	ldi	(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME), %r20
-	and,COND(<>)	%r19, %r20, %r0
-	b,n	intr_restore	/* skip past if we've nothing to do */
-
-	/* This check is critical to having LWS
-	 * working. The IASQ is zero on the gateway
-	 * page and we cannot deliver any signals until
-	 * we get off the gateway page.
-	 *
-	 * Only do signals if we are returning to user space
-	 */
-	LDREG	PT_IASQ0(%r16), %r20
-	cmpib,COND(=),n LINUX_GATEWAY_SPACE, %r20, intr_restore /* backward */
-	LDREG	PT_IASQ1(%r16), %r20
-	cmpib,COND(=),n LINUX_GATEWAY_SPACE, %r20, intr_restore /* backward */
-
-	/* NOTE: We need to enable interrupts if we have to deliver
-	 * signals. We used to do this earlier but it caused kernel
-	 * stack overflows. */
-	ssm     PSW_SM_I, %r0
-
-	copy	%r0, %r25			/* long in_syscall = 0 */
-#ifdef CONFIG_64BIT
-	ldo	-16(%r30),%r29			/* Reference param save area */
-#endif
-
-	BL	do_notify_resume,%r2
-	copy	%r16, %r26			/* struct pt_regs *regs */
-
-	b,n	intr_check_sig
-
-intr_restore:
-	copy            %r16,%r29
-	ldo             PT_FR31(%r29),%r1
-	rest_fp         %r1
-	rest_general    %r29
-
-	/* inverse of virt_map */
-	pcxt_ssm_bug
-	rsm             PSW_SM_QUIET,%r0	/* prepare for rfi */
-	tophys_r1       %r29
-
-	/* Restore space id's and special cr's from PT_REGS
-	 * structure pointed to by r29
-	 */
-	rest_specials	%r29
-
-	/* IMPORTANT: rest_stack restores r29 last (we are using it)!
-	 * It also restores r1 and r30.
-	 */
-	rest_stack
-
-	rfi
-	nop
-
-#ifndef CONFIG_PREEMPT
-# define intr_do_preempt	intr_restore
-#endif /* !CONFIG_PREEMPT */
-
-	.import schedule,code
-intr_do_resched:
-	/* Only call schedule on return to userspace. If we're returning
-	 * to kernel space, we may schedule if CONFIG_PREEMPT, otherwise
-	 * we jump back to intr_restore.
-	 */
-	LDREG	PT_IASQ0(%r16), %r20
-	cmpib,COND(=)	0, %r20, intr_do_preempt
-	nop
-	LDREG	PT_IASQ1(%r16), %r20
-	cmpib,COND(=)	0, %r20, intr_do_preempt
-	nop
-
-	/* NOTE: We need to enable interrupts if we schedule.  We used
-	 * to do this earlier but it caused kernel stack overflows. */
-	ssm     PSW_SM_I, %r0
-
-#ifdef CONFIG_64BIT
-	ldo	-16(%r30),%r29		/* Reference param save area */
-#endif
-
-	ldil	L%intr_check_sig, %r2
-#ifndef CONFIG_64BIT
-	b	schedule
-#else
-	load32	schedule, %r20
-	bv	%r0(%r20)
-#endif
-	ldo	R%intr_check_sig(%r2), %r2
-
-	/* preempt the current task on returning to kernel
-	 * mode from an interrupt, iff need_resched is set,
-	 * and preempt_count is 0. otherwise, we continue on
-	 * our merry way back to the current running task.
-	 */
-#ifdef CONFIG_PREEMPT
-	.import preempt_schedule_irq,code
-intr_do_preempt:
-	rsm	PSW_SM_I, %r0		/* disable interrupts */
-
-	/* current_thread_info()->preempt_count */
-	mfctl	%cr30, %r1
-	LDREG	TI_PRE_COUNT(%r1), %r19
-	cmpib,COND(<>)	0, %r19, intr_restore	/* if preempt_count > 0 */
-	nop				/* prev insn branched backwards */
-
-	/* check if we interrupted a critical path */
-	LDREG	PT_PSW(%r16), %r20
-	bb,<,n	%r20, 31 - PSW_SM_I, intr_restore
-	nop
-
-	BL	preempt_schedule_irq, %r2
-	nop
-
-	b,n	intr_restore		/* ssm PSW_SM_I done by intr_restore */
-#endif /* CONFIG_PREEMPT */
-
-	/*
-	 * External interrupts.
-	 */
-
-intr_extint:
-	cmpib,COND(=),n 0,%r16,1f
-
-	get_stack_use_cr30
-	b,n 2f
-
-1:
-	get_stack_use_r30
-2:
-	save_specials	%r29
-	virt_map
-	save_general	%r29
-
-	ldo	PT_FR0(%r29), %r24
-	save_fp	%r24
-	
-	loadgp
-
-	copy	%r29, %r26	/* arg0 is pt_regs */
-	copy	%r29, %r16	/* save pt_regs */
-
-	ldil	L%intr_return, %r2
-
-#ifdef CONFIG_64BIT
-	ldo	-16(%r30),%r29	/* Reference param save area */
-#endif
-
-	b	do_cpu_irq_mask
-	ldo	R%intr_return(%r2), %r2	/* return to intr_return, not here */
-ENDPROC_CFI(syscall_exit_rfi)
-
-
-	/* Generic interruptions (illegal insn, unaligned, page fault, etc) */
-
-ENTRY_CFI(intr_save)		/* for os_hpmc */
-	mfsp    %sr7,%r16
-	cmpib,COND(=),n 0,%r16,1f
-	get_stack_use_cr30
-	b	2f
-	copy    %r8,%r26
-
-1:
-	get_stack_use_r30
-	copy    %r8,%r26
-
-2:
-	save_specials	%r29
-
-	/* If this trap is a itlb miss, skip saving/adjusting isr/ior */
-	cmpib,COND(=),n        PARISC_ITLB_TRAP,%r26,skip_save_ior
-
-
-	mfctl           %isr, %r16
-	nop		/* serialize mfctl on PA 2.0 to avoid 4 cycle penalty */
-	mfctl           %ior, %r17
-
-
-#ifdef CONFIG_64BIT
-	/*
-	 * If the interrupted code was running with W bit off (32 bit),
-	 * clear the b bits (bits 0 & 1) in the ior.
-	 * save_specials left ipsw value in r8 for us to test.
-	 */
-	extrd,u,*<>     %r8,PSW_W_BIT,1,%r0
-	depdi           0,1,2,%r17
-
-	/* adjust isr/ior: get high bits from isr and deposit in ior */
-	space_adjust	%r16,%r17,%r1
-#endif
-	STREG           %r16, PT_ISR(%r29)
-	STREG           %r17, PT_IOR(%r29)
-
-#if 0 && defined(CONFIG_64BIT)
-	/* Revisit when we have 64-bit code above 4Gb */
-	b,n		intr_save2
-
-skip_save_ior:
-	/* We have a itlb miss, and when executing code above 4 Gb on ILP64, we
-	 * need to adjust iasq/iaoq here in the same way we adjusted isr/ior
-	 * above.
-	 */
-	extrd,u,*	%r8,PSW_W_BIT,1,%r1
-	cmpib,COND(=),n	1,%r1,intr_save2
-	LDREG		PT_IASQ0(%r29), %r16
-	LDREG		PT_IAOQ0(%r29), %r17
-	/* adjust iasq/iaoq */
-	space_adjust	%r16,%r17,%r1
-	STREG           %r16, PT_IASQ0(%r29)
-	STREG           %r17, PT_IAOQ0(%r29)
-#else
-skip_save_ior:
-#endif
-
-intr_save2:
-	virt_map
-	save_general	%r29
-
-	ldo		PT_FR0(%r29), %r25
-	save_fp		%r25
-	
-	loadgp
-
-	copy		%r29, %r25	/* arg1 is pt_regs */
-#ifdef CONFIG_64BIT
-	ldo		-16(%r30),%r29	/* Reference param save area */
-#endif
-
-	ldil		L%intr_check_sig, %r2
-	copy		%r25, %r16	/* save pt_regs */
-
-	b		handle_interruption
-	ldo		R%intr_check_sig(%r2), %r2
-ENDPROC_CFI(intr_save)
-
-
-	/*
-	 * Note for all tlb miss handlers:
-	 *
-	 * cr24 contains a pointer to the kernel address space
-	 * page directory.
-	 *
-	 * cr25 contains a pointer to the current user address
-	 * space page directory.
-	 *
-	 * sr3 will contain the space id of the user address space
-	 * of the current running thread while that thread is
-	 * running in the kernel.
-	 */
-
-	/*
-	 * register number allocations.  Note that these are all
-	 * in the shadowed registers
-	 */
-
-	t0 = r1		/* temporary register 0 */
-	va = r8		/* virtual address for which the trap occurred */
-	t1 = r9		/* temporary register 1 */
-	pte  = r16	/* pte/phys page # */
-	prot = r17	/* prot bits */
-	spc  = r24	/* space for which the trap occurred */
-	ptp = r25	/* page directory/page table pointer */
-
-#ifdef CONFIG_64BIT
-
-dtlb_miss_20w:
-	space_adjust	spc,va,t0
-	get_pgd		spc,ptp
-	space_check	spc,t0,dtlb_fault
-
-	L3_ptep		ptp,pte,t0,va,dtlb_check_alias_20w
-
-	tlb_lock	spc,ptp,pte,t0,t1,dtlb_check_alias_20w
-	update_accessed	ptp,pte,t0,t1
-
-	make_insert_tlb	spc,pte,prot,t1
-	
-	idtlbt          pte,prot
-
-	tlb_unlock1	spc,t0
-	rfir
-	nop
-
-dtlb_check_alias_20w:
-	do_alias	spc,t0,t1,va,pte,prot,dtlb_fault,20
-
-	idtlbt          pte,prot
-
-	rfir
-	nop
-
-nadtlb_miss_20w:
-	space_adjust	spc,va,t0
-	get_pgd		spc,ptp
-	space_check	spc,t0,nadtlb_fault
-
-	L3_ptep		ptp,pte,t0,va,nadtlb_check_alias_20w
-
-	tlb_lock	spc,ptp,pte,t0,t1,nadtlb_check_alias_20w
-	update_accessed	ptp,pte,t0,t1
-
-	make_insert_tlb	spc,pte,prot,t1
-
-	idtlbt          pte,prot
-
-	tlb_unlock1	spc,t0
-	rfir
-	nop
-
-nadtlb_check_alias_20w:
-	do_alias	spc,t0,t1,va,pte,prot,nadtlb_emulate,20
-
-	idtlbt          pte,prot
-
-	rfir
-	nop
-
-#else
-
-dtlb_miss_11:
-	get_pgd		spc,ptp
-
-	space_check	spc,t0,dtlb_fault
-
-	L2_ptep		ptp,pte,t0,va,dtlb_check_alias_11
-
-	tlb_lock	spc,ptp,pte,t0,t1,dtlb_check_alias_11
-	update_accessed	ptp,pte,t0,t1
-
-	make_insert_tlb_11	spc,pte,prot
-
-	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
-	mtsp		spc,%sr1
-
-	idtlba		pte,(%sr1,va)
-	idtlbp		prot,(%sr1,va)
-
-	mtsp		t1, %sr1	/* Restore sr1 */
-
-	tlb_unlock1	spc,t0
-	rfir
-	nop
-
-dtlb_check_alias_11:
-	do_alias	spc,t0,t1,va,pte,prot,dtlb_fault,11
-
-	idtlba          pte,(va)
-	idtlbp          prot,(va)
-
-	rfir
-	nop
-
-nadtlb_miss_11:
-	get_pgd		spc,ptp
-
-	space_check	spc,t0,nadtlb_fault
-
-	L2_ptep		ptp,pte,t0,va,nadtlb_check_alias_11
-
-	tlb_lock	spc,ptp,pte,t0,t1,nadtlb_check_alias_11
-	update_accessed	ptp,pte,t0,t1
-
-	make_insert_tlb_11	spc,pte,prot
-
-	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
-	mtsp		spc,%sr1
-
-	idtlba		pte,(%sr1,va)
-	idtlbp		prot,(%sr1,va)
-
-	mtsp		t1, %sr1	/* Restore sr1 */
-
-	tlb_unlock1	spc,t0
-	rfir
-	nop
-
-nadtlb_check_alias_11:
-	do_alias	spc,t0,t1,va,pte,prot,nadtlb_emulate,11
-
-	idtlba          pte,(va)
-	idtlbp          prot,(va)
-
-	rfir
-	nop
-
-dtlb_miss_20:
-	space_adjust	spc,va,t0
-	get_pgd		spc,ptp
-	space_check	spc,t0,dtlb_fault
-
-	L2_ptep		ptp,pte,t0,va,dtlb_check_alias_20
-
-	tlb_lock	spc,ptp,pte,t0,t1,dtlb_check_alias_20
-	update_accessed	ptp,pte,t0,t1
-
-	make_insert_tlb	spc,pte,prot,t1
-
-	f_extend	pte,t1
-
-	idtlbt          pte,prot
-
-	tlb_unlock1	spc,t0
-	rfir
-	nop
-
-dtlb_check_alias_20:
-	do_alias	spc,t0,t1,va,pte,prot,dtlb_fault,20
-	
-	idtlbt          pte,prot
-
-	rfir
-	nop
-
-nadtlb_miss_20:
-	get_pgd		spc,ptp
-
-	space_check	spc,t0,nadtlb_fault
-
-	L2_ptep		ptp,pte,t0,va,nadtlb_check_alias_20
-
-	tlb_lock	spc,ptp,pte,t0,t1,nadtlb_check_alias_20
-	update_accessed	ptp,pte,t0,t1
-
-	make_insert_tlb	spc,pte,prot,t1
-
-	f_extend	pte,t1
-	
-	idtlbt		pte,prot
-
-	tlb_unlock1	spc,t0
-	rfir
-	nop
-
-nadtlb_check_alias_20:
-	do_alias	spc,t0,t1,va,pte,prot,nadtlb_emulate,20
-
-	idtlbt          pte,prot
-
-	rfir
-	nop
-
-#endif
-
-nadtlb_emulate:
-
-	/*
-	 * Non access misses can be caused by fdc,fic,pdc,lpa,probe and
-	 * probei instructions. We don't want to fault for these
-	 * instructions (not only does it not make sense, it can cause
-	 * deadlocks, since some flushes are done with the mmap
-	 * semaphore held). If the translation doesn't exist, we can't
-	 * insert a translation, so have to emulate the side effects
-	 * of the instruction. Since we don't insert a translation
-	 * we can get a lot of faults during a flush loop, so it makes
-	 * sense to try to do it here with minimum overhead. We only
-	 * emulate fdc,fic,pdc,probew,prober instructions whose base 
-	 * and index registers are not shadowed. We defer everything 
-	 * else to the "slow" path.
-	 */
-
-	mfctl           %cr19,%r9 /* Get iir */
-
-	/* PA 2.0 Arch Ref. Book pg 382 has a good description of the insn bits.
-	   Checks for fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw */
-
-	/* Checks for fdc,fdce,pdc,"fic,4f" only */
-	ldi             0x280,%r16
-	and             %r9,%r16,%r17
-	cmpb,<>,n       %r16,%r17,nadtlb_probe_check
-	bb,>=,n         %r9,26,nadtlb_nullify  /* m bit not set, just nullify */
-	BL		get_register,%r25
-	extrw,u         %r9,15,5,%r8           /* Get index register # */
-	cmpib,COND(=),n        -1,%r1,nadtlb_fault    /* have to use slow path */
-	copy            %r1,%r24
-	BL		get_register,%r25
-	extrw,u         %r9,10,5,%r8           /* Get base register # */
-	cmpib,COND(=),n        -1,%r1,nadtlb_fault    /* have to use slow path */
-	BL		set_register,%r25
-	add,l           %r1,%r24,%r1           /* doesn't affect c/b bits */
-
-nadtlb_nullify:
-	mfctl           %ipsw,%r8
-	ldil            L%PSW_N,%r9
-	or              %r8,%r9,%r8            /* Set PSW_N */
-	mtctl           %r8,%ipsw
-
-	rfir
-	nop
-
-	/* 
-		When there is no translation for the probe address then we
-		must nullify the insn and return zero in the target register.
-		This will indicate to the calling code that it does not have 
-		write/read privileges to this address.
-
-		This should technically work for prober and probew in PA 1.1,
-		and also probe,r and probe,w in PA 2.0
-
-		WARNING: USE ONLY NON-SHADOW REGISTERS WITH PROBE INSN!
-		THE SLOW-PATH EMULATION HAS NOT BEEN WRITTEN YET.
-
-	*/
-nadtlb_probe_check:
-	ldi             0x80,%r16
-	and             %r9,%r16,%r17
-	cmpb,<>,n       %r16,%r17,nadtlb_fault /* Must be probe,[rw]*/
-	BL              get_register,%r25      /* Find the target register */
-	extrw,u         %r9,31,5,%r8           /* Get target register */
-	cmpib,COND(=),n        -1,%r1,nadtlb_fault    /* have to use slow path */
-	BL		set_register,%r25
-	copy            %r0,%r1                /* Write zero to target register */
-	b nadtlb_nullify                       /* Nullify return insn */
-	nop
-
-
-#ifdef CONFIG_64BIT
-itlb_miss_20w:
-
-	/*
-	 * I miss is a little different, since we allow users to fault
-	 * on the gateway page which is in the kernel address space.
-	 */
-
-	space_adjust	spc,va,t0
-	get_pgd		spc,ptp
-	space_check	spc,t0,itlb_fault
-
-	L3_ptep		ptp,pte,t0,va,itlb_fault
-
-	tlb_lock	spc,ptp,pte,t0,t1,itlb_fault
-	update_accessed	ptp,pte,t0,t1
-
-	make_insert_tlb	spc,pte,prot,t1
-	
-	iitlbt          pte,prot
-
-	tlb_unlock1	spc,t0
-	rfir
-	nop
-
-naitlb_miss_20w:
-
-	/*
-	 * I miss is a little different, since we allow users to fault
-	 * on the gateway page which is in the kernel address space.
-	 */
-
-	space_adjust	spc,va,t0
-	get_pgd		spc,ptp
-	space_check	spc,t0,naitlb_fault
-
-	L3_ptep		ptp,pte,t0,va,naitlb_check_alias_20w
-
-	tlb_lock	spc,ptp,pte,t0,t1,naitlb_check_alias_20w
-	update_accessed	ptp,pte,t0,t1
-
-	make_insert_tlb	spc,pte,prot,t1
-
-	iitlbt          pte,prot
-
-	tlb_unlock1	spc,t0
-	rfir
-	nop
-
-naitlb_check_alias_20w:
-	do_alias	spc,t0,t1,va,pte,prot,naitlb_fault,20
-
-	iitlbt		pte,prot
-
-	rfir
-	nop
-
-#else
-
-itlb_miss_11:
-	get_pgd		spc,ptp
-
-	space_check	spc,t0,itlb_fault
-
-	L2_ptep		ptp,pte,t0,va,itlb_fault
-
-	tlb_lock	spc,ptp,pte,t0,t1,itlb_fault
-	update_accessed	ptp,pte,t0,t1
-
-	make_insert_tlb_11	spc,pte,prot
-
-	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
-	mtsp		spc,%sr1
-
-	iitlba		pte,(%sr1,va)
-	iitlbp		prot,(%sr1,va)
-
-	mtsp		t1, %sr1	/* Restore sr1 */
-
-	tlb_unlock1	spc,t0
-	rfir
-	nop
-
-naitlb_miss_11:
-	get_pgd		spc,ptp
-
-	space_check	spc,t0,naitlb_fault
-
-	L2_ptep		ptp,pte,t0,va,naitlb_check_alias_11
-
-	tlb_lock	spc,ptp,pte,t0,t1,naitlb_check_alias_11
-	update_accessed	ptp,pte,t0,t1
-
-	make_insert_tlb_11	spc,pte,prot
-
-	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
-	mtsp		spc,%sr1
-
-	iitlba		pte,(%sr1,va)
-	iitlbp		prot,(%sr1,va)
-
-	mtsp		t1, %sr1	/* Restore sr1 */
-
-	tlb_unlock1	spc,t0
-	rfir
-	nop
-
-naitlb_check_alias_11:
-	do_alias	spc,t0,t1,va,pte,prot,itlb_fault,11
-
-	iitlba          pte,(%sr0, va)
-	iitlbp          prot,(%sr0, va)
-
-	rfir
-	nop
-
-
-itlb_miss_20:
-	get_pgd		spc,ptp
-
-	space_check	spc,t0,itlb_fault
-
-	L2_ptep		ptp,pte,t0,va,itlb_fault
-
-	tlb_lock	spc,ptp,pte,t0,t1,itlb_fault
-	update_accessed	ptp,pte,t0,t1
-
-	make_insert_tlb	spc,pte,prot,t1
-
-	f_extend	pte,t1
-
-	iitlbt          pte,prot
-
-	tlb_unlock1	spc,t0
-	rfir
-	nop
-
-naitlb_miss_20:
-	get_pgd		spc,ptp
-
-	space_check	spc,t0,naitlb_fault
-
-	L2_ptep		ptp,pte,t0,va,naitlb_check_alias_20
-
-	tlb_lock	spc,ptp,pte,t0,t1,naitlb_check_alias_20
-	update_accessed	ptp,pte,t0,t1
-
-	make_insert_tlb	spc,pte,prot,t1
-
-	f_extend	pte,t1
-
-	iitlbt          pte,prot
-
-	tlb_unlock1	spc,t0
-	rfir
-	nop
-
-naitlb_check_alias_20:
-	do_alias	spc,t0,t1,va,pte,prot,naitlb_fault,20
-
-	iitlbt          pte,prot
-
-	rfir
-	nop
-
-#endif
-
-#ifdef CONFIG_64BIT
-
-dbit_trap_20w:
-	space_adjust	spc,va,t0
-	get_pgd		spc,ptp
-	space_check	spc,t0,dbit_fault
-
-	L3_ptep		ptp,pte,t0,va,dbit_fault
-
-	tlb_lock	spc,ptp,pte,t0,t1,dbit_fault
-	update_dirty	ptp,pte,t1
-
-	make_insert_tlb	spc,pte,prot,t1
-		
-	idtlbt          pte,prot
-
-	tlb_unlock0	spc,t0
-	rfir
-	nop
-#else
-
-dbit_trap_11:
-
-	get_pgd		spc,ptp
-
-	space_check	spc,t0,dbit_fault
-
-	L2_ptep		ptp,pte,t0,va,dbit_fault
-
-	tlb_lock	spc,ptp,pte,t0,t1,dbit_fault
-	update_dirty	ptp,pte,t1
-
-	make_insert_tlb_11	spc,pte,prot
-
-	mfsp            %sr1,t1  /* Save sr1 so we can use it in tlb inserts */
-	mtsp		spc,%sr1
-
-	idtlba		pte,(%sr1,va)
-	idtlbp		prot,(%sr1,va)
-
-	mtsp            t1, %sr1     /* Restore sr1 */
-
-	tlb_unlock0	spc,t0
-	rfir
-	nop
-
-dbit_trap_20:
-	get_pgd		spc,ptp
-
-	space_check	spc,t0,dbit_fault
-
-	L2_ptep		ptp,pte,t0,va,dbit_fault
-
-	tlb_lock	spc,ptp,pte,t0,t1,dbit_fault
-	update_dirty	ptp,pte,t1
-
-	make_insert_tlb	spc,pte,prot,t1
-
-	f_extend	pte,t1
-	
-	idtlbt		pte,prot
-
-	tlb_unlock0	spc,t0
-	rfir
-	nop
-#endif
-
-	.import handle_interruption,code
-
-kernel_bad_space:
-	b               intr_save
-	ldi             31,%r8  /* Use an unused code */
-
-dbit_fault:
-	b               intr_save
-	ldi             20,%r8
-
-itlb_fault:
-	b               intr_save
-	ldi             PARISC_ITLB_TRAP,%r8
-
-nadtlb_fault:
-	b               intr_save
-	ldi             17,%r8
-
-naitlb_fault:
-	b               intr_save
-	ldi             16,%r8
-
-dtlb_fault:
-	b               intr_save
-	ldi             15,%r8
-
-	/* Register saving semantics for system calls:
-
-	   %r1		   clobbered by system call macro in userspace
-	   %r2		   saved in PT_REGS by gateway page
-	   %r3  - %r18	   preserved by C code (saved by signal code)
-	   %r19 - %r20	   saved in PT_REGS by gateway page
-	   %r21 - %r22	   non-standard syscall args
-			   stored in kernel stack by gateway page
-	   %r23 - %r26	   arg3-arg0, saved in PT_REGS by gateway page
-	   %r27 - %r30	   saved in PT_REGS by gateway page
-	   %r31		   syscall return pointer
-	 */
-
-	/* Floating point registers (FIXME: what do we do with these?)
-
-	   %fr0  - %fr3	   status/exception, not preserved
-	   %fr4  - %fr7	   arguments
-	   %fr8	 - %fr11   not preserved by C code
-	   %fr12 - %fr21   preserved by C code
-	   %fr22 - %fr31   not preserved by C code
-	 */
-
-	.macro	reg_save regs
-	STREG	%r3, PT_GR3(\regs)
-	STREG	%r4, PT_GR4(\regs)
-	STREG	%r5, PT_GR5(\regs)
-	STREG	%r6, PT_GR6(\regs)
-	STREG	%r7, PT_GR7(\regs)
-	STREG	%r8, PT_GR8(\regs)
-	STREG	%r9, PT_GR9(\regs)
-	STREG   %r10,PT_GR10(\regs)
-	STREG   %r11,PT_GR11(\regs)
-	STREG   %r12,PT_GR12(\regs)
-	STREG   %r13,PT_GR13(\regs)
-	STREG   %r14,PT_GR14(\regs)
-	STREG   %r15,PT_GR15(\regs)
-	STREG   %r16,PT_GR16(\regs)
-	STREG   %r17,PT_GR17(\regs)
-	STREG   %r18,PT_GR18(\regs)
-	.endm
-
-	.macro	reg_restore regs
-	LDREG	PT_GR3(\regs), %r3
-	LDREG	PT_GR4(\regs), %r4
-	LDREG	PT_GR5(\regs), %r5
-	LDREG	PT_GR6(\regs), %r6
-	LDREG	PT_GR7(\regs), %r7
-	LDREG	PT_GR8(\regs), %r8
-	LDREG	PT_GR9(\regs), %r9
-	LDREG   PT_GR10(\regs),%r10
-	LDREG   PT_GR11(\regs),%r11
-	LDREG   PT_GR12(\regs),%r12
-	LDREG   PT_GR13(\regs),%r13
-	LDREG   PT_GR14(\regs),%r14
-	LDREG   PT_GR15(\regs),%r15
-	LDREG   PT_GR16(\regs),%r16
-	LDREG   PT_GR17(\regs),%r17
-	LDREG   PT_GR18(\regs),%r18
-	.endm
-
-	.macro	fork_like name
-ENTRY_CFI(sys_\name\()_wrapper)
-	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30), %r1
-	ldo	TASK_REGS(%r1),%r1
-	reg_save %r1
-	mfctl	%cr27, %r28
-	ldil	L%sys_\name, %r31
-	be	R%sys_\name(%sr4,%r31)
-	STREG	%r28, PT_CR27(%r1)
-ENDPROC_CFI(sys_\name\()_wrapper)
-	.endm
-
-fork_like clone
-fork_like clone3
-fork_like fork
-fork_like vfork
-
-	/* Set the return value for the child */
-ENTRY(child_return)
-	BL	schedule_tail, %r2
-	nop
-finish_child_return:
-	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30), %r1
-	ldo	TASK_REGS(%r1),%r1	 /* get pt regs */
-
-	LDREG	PT_CR27(%r1), %r3
-	mtctl	%r3, %cr27
-	reg_restore %r1
-	b	syscall_exit
-	copy	%r0,%r28
-END(child_return)
-
-ENTRY_CFI(sys_rt_sigreturn_wrapper)
-	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r26
-	ldo	TASK_REGS(%r26),%r26	/* get pt regs */
-	/* Don't save regs, we are going to restore them from sigcontext. */
-	STREG	%r2, -RP_OFFSET(%r30)
-#ifdef CONFIG_64BIT
-	ldo	FRAME_SIZE(%r30), %r30
-	BL	sys_rt_sigreturn,%r2
-	ldo	-16(%r30),%r29		/* Reference param save area */
-#else
-	BL	sys_rt_sigreturn,%r2
-	ldo	FRAME_SIZE(%r30), %r30
-#endif
-
-	ldo	-FRAME_SIZE(%r30), %r30
-	LDREG	-RP_OFFSET(%r30), %r2
-
-	/* FIXME: I think we need to restore a few more things here. */
-	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1
-	ldo	TASK_REGS(%r1),%r1	/* get pt regs */
-	reg_restore %r1
-
-	/* If the signal was received while the process was blocked on a
-	 * syscall, then r2 will take us to syscall_exit; otherwise r2 will
-	 * take us to syscall_exit_rfi and on to intr_return.
-	 */
-	bv	%r0(%r2)
-	LDREG	PT_GR28(%r1),%r28  /* reload original r28 for syscall_exit */
-ENDPROC_CFI(sys_rt_sigreturn_wrapper)
-
-ENTRY(syscall_exit)
-	/* NOTE: Not all syscalls exit this way.  rt_sigreturn will exit
-	 * via syscall_exit_rfi if the signal was received while the process
-	 * was running.
-	 */
-
-	/* save return value now */
-
-	mfctl     %cr30, %r1
-	LDREG     TI_TASK(%r1),%r1
-	STREG     %r28,TASK_PT_GR28(%r1)
-
-	/* Seems to me that dp could be wrong here, if the syscall involved
-	 * calling a module, and nothing got round to restoring dp on return.
-	 */
-	loadgp
-
-syscall_check_resched:
-
-	/* check for reschedule */
-
-	LDREG	TI_FLAGS-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r19	/* long */
-	bb,<,n	%r19, 31-TIF_NEED_RESCHED, syscall_do_resched /* forward */
-
-	.import do_signal,code
-syscall_check_sig:
-	LDREG	TI_FLAGS-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r19
-	ldi	(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME), %r26
-	and,COND(<>)	%r19, %r26, %r0
-	b,n	syscall_restore	/* skip past if we've nothing to do */
-
-syscall_do_signal:
-	/* Save callee-save registers (for sigcontext).
-	 * FIXME: After this point the process structure should be
-	 * consistent with all the relevant state of the process
-	 * before the syscall.  We need to verify this.
-	 */
-	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1
-	ldo	TASK_REGS(%r1), %r26		/* struct pt_regs *regs */
-	reg_save %r26
-
-#ifdef CONFIG_64BIT
-	ldo	-16(%r30),%r29			/* Reference param save area */
-#endif
-
-	BL	do_notify_resume,%r2
-	ldi	1, %r25				/* long in_syscall = 1 */
-
-	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1
-	ldo	TASK_REGS(%r1), %r20		/* reload pt_regs */
-	reg_restore %r20
-
-	b,n     syscall_check_sig
-
-syscall_restore:
-	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1
-
-	/* Are we being ptraced? */
-	ldw	TASK_FLAGS(%r1),%r19
-	ldi	_TIF_SYSCALL_TRACE_MASK,%r2
-	and,COND(=)	%r19,%r2,%r0
-	b,n	syscall_restore_rfi
-
-	ldo	TASK_PT_FR31(%r1),%r19		   /* reload fpregs */
-	rest_fp	%r19
-
-	LDREG	TASK_PT_SAR(%r1),%r19		   /* restore SAR */
-	mtsar	%r19
-
-	LDREG	TASK_PT_GR2(%r1),%r2		   /* restore user rp */
-	LDREG	TASK_PT_GR19(%r1),%r19
-	LDREG   TASK_PT_GR20(%r1),%r20
-	LDREG	TASK_PT_GR21(%r1),%r21
-	LDREG	TASK_PT_GR22(%r1),%r22
-	LDREG	TASK_PT_GR23(%r1),%r23
-	LDREG	TASK_PT_GR24(%r1),%r24
-	LDREG	TASK_PT_GR25(%r1),%r25
-	LDREG	TASK_PT_GR26(%r1),%r26
-	LDREG	TASK_PT_GR27(%r1),%r27	   /* restore user dp */
-	LDREG	TASK_PT_GR28(%r1),%r28	   /* syscall return value */
-	LDREG	TASK_PT_GR29(%r1),%r29
-	LDREG	TASK_PT_GR31(%r1),%r31	   /* restore syscall rp */
-
-	/* NOTE: We use rsm/ssm pair to make this operation atomic */
-	LDREG   TASK_PT_GR30(%r1),%r1              /* Get user sp */
-	rsm     PSW_SM_I, %r0
-	copy    %r1,%r30                           /* Restore user sp */
-	mfsp    %sr3,%r1                           /* Get user space id */
-	mtsp    %r1,%sr7                           /* Restore sr7 */
-	ssm     PSW_SM_I, %r0
-
-	/* Set sr2 to zero for userspace syscalls to work. */
-	mtsp	%r0,%sr2 
-	mtsp	%r1,%sr4			   /* Restore sr4 */
-	mtsp	%r1,%sr5			   /* Restore sr5 */
-	mtsp	%r1,%sr6			   /* Restore sr6 */
-
-	depi	3,31,2,%r31			   /* ensure return to user mode. */
-
-#ifdef CONFIG_64BIT
-	/* decide whether to reset the wide mode bit
-	 *
-	 * For a syscall, the W bit is stored in the lowest bit
-	 * of sp.  Extract it and reset W if it is zero */
-	extrd,u,*<>	%r30,63,1,%r1
-	rsm	PSW_SM_W, %r0
-	/* now reset the lowest bit of sp if it was set */
-	xor	%r30,%r1,%r30
-#endif
-	be,n    0(%sr3,%r31)                       /* return to user space */
-
-	/* We have to return via an RFI, so that PSW T and R bits can be set
-	 * appropriately.
-	 * This sets up pt_regs so we can return via intr_restore, which is not
-	 * the most efficient way of doing things, but it works.
-	 */
-syscall_restore_rfi:
-	ldo	-1(%r0),%r2			   /* Set recovery cntr to -1 */
-	mtctl	%r2,%cr0			   /*   for immediate trap */
-	LDREG	TASK_PT_PSW(%r1),%r2		   /* Get old PSW */
-	ldi	0x0b,%r20			   /* Create new PSW */
-	depi	-1,13,1,%r20			   /* C, Q, D, and I bits */
-
-	/* The values of SINGLESTEP_BIT and BLOCKSTEP_BIT are
-	 * set in thread_info.h and converted to PA bitmap
-	 * numbers in asm-offsets.c */
-
-	/* if ((%r19.SINGLESTEP_BIT)) { %r20.27=1} */
-	extru,=	%r19,TIF_SINGLESTEP_PA_BIT,1,%r0
-	depi	-1,27,1,%r20			   /* R bit */
-
-	/* if ((%r19.BLOCKSTEP_BIT)) { %r20.7=1} */
-	extru,= %r19,TIF_BLOCKSTEP_PA_BIT,1,%r0
-	depi	-1,7,1,%r20			   /* T bit */
-
-	STREG	%r20,TASK_PT_PSW(%r1)
-
-	/* Always store space registers, since sr3 can be changed (e.g. fork) */
-
-	mfsp    %sr3,%r25
-	STREG   %r25,TASK_PT_SR3(%r1)
-	STREG   %r25,TASK_PT_SR4(%r1)
-	STREG   %r25,TASK_PT_SR5(%r1)
-	STREG   %r25,TASK_PT_SR6(%r1)
-	STREG   %r25,TASK_PT_SR7(%r1)
-	STREG   %r25,TASK_PT_IASQ0(%r1)
-	STREG   %r25,TASK_PT_IASQ1(%r1)
-
-	/* XXX W bit??? */
-	/* Now if old D bit is clear, it means we didn't save all registers
-	 * on syscall entry, so do that now.  This only happens on TRACEME
-	 * calls, or if someone attached to us while we were on a syscall.
-	 * We could make this more efficient by not saving r3-r18, but
-	 * then we wouldn't be able to use the common intr_restore path.
-	 * It is only for traced processes anyway, so performance is not
-	 * an issue.
-	 */
-	bb,<	%r2,30,pt_regs_ok		   /* Branch if D set */
-	ldo	TASK_REGS(%r1),%r25
-	reg_save %r25				   /* Save r3 to r18 */
-
-	/* Save the current sr */
-	mfsp	%sr0,%r2
-	STREG	%r2,TASK_PT_SR0(%r1)
-
-	/* Save the scratch sr */
-	mfsp	%sr1,%r2
-	STREG	%r2,TASK_PT_SR1(%r1)
-
-	/* sr2 should be set to zero for userspace syscalls */
-	STREG	%r0,TASK_PT_SR2(%r1)
-
-	LDREG	TASK_PT_GR31(%r1),%r2
-	depi	3,31,2,%r2		   /* ensure return to user mode. */
-	STREG   %r2,TASK_PT_IAOQ0(%r1)
-	ldo	4(%r2),%r2
-	STREG	%r2,TASK_PT_IAOQ1(%r1)
-	b	intr_restore
-	copy	%r25,%r16
-
-pt_regs_ok:
-	LDREG	TASK_PT_IAOQ0(%r1),%r2
-	depi	3,31,2,%r2		   /* ensure return to user mode. */
-	STREG	%r2,TASK_PT_IAOQ0(%r1)
-	LDREG	TASK_PT_IAOQ1(%r1),%r2
-	depi	3,31,2,%r2
-	STREG	%r2,TASK_PT_IAOQ1(%r1)
-	b	intr_restore
-	copy	%r25,%r16
-
-syscall_do_resched:
-	load32	syscall_check_resched,%r2 /* if resched, we start over again */
-	load32	schedule,%r19
-	bv	%r0(%r19)		/* jumps to schedule() */
-#ifdef CONFIG_64BIT
-	ldo	-16(%r30),%r29		/* Reference param save area */
-#else
-	nop
-#endif
-END(syscall_exit)
-
-
-#ifdef CONFIG_FUNCTION_TRACER
-
-	.import ftrace_function_trampoline,code
-	.align L1_CACHE_BYTES
-ENTRY_CFI(mcount, caller)
-_mcount:
-	.export _mcount,data
-	/*
-	 * The 64bit mcount() function pointer needs 4 dwords, of which the
-	 * first two are free.  We optimize it here and put 2 instructions for
-	 * calling mcount(), and 2 instructions for ftrace_stub().  That way we
-	 * have all on one L1 cacheline.
-	 */
-	ldi	0, %arg3
-	b	ftrace_function_trampoline
-	copy	%r3, %arg2	/* caller original %sp */
-ftrace_stub:
-	.globl ftrace_stub
-        .type  ftrace_stub, @function
-#ifdef CONFIG_64BIT
-	bve	(%rp)
-#else
-	bv	%r0(%rp)
-#endif
-	nop
-#ifdef CONFIG_64BIT
-	.dword mcount
-	.dword 0 /* code in head.S puts value of global gp here */
-#endif
-ENDPROC_CFI(mcount)
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-#ifdef CONFIG_64BIT
-#define FTRACE_FRAME_SIZE (2*FRAME_SIZE)
-#else
-#define FTRACE_FRAME_SIZE FRAME_SIZE
-#endif
-ENTRY_CFI(ftrace_caller, caller,frame=FTRACE_FRAME_SIZE,CALLS,SAVE_RP,SAVE_SP)
-ftrace_caller:
-	.global ftrace_caller
-
-	STREG	%r3, -FTRACE_FRAME_SIZE+1*REG_SZ(%sp)
-	ldo	-FTRACE_FRAME_SIZE(%sp), %r3
-	STREG	%rp, -RP_OFFSET(%r3)
-
-	/* Offset 0 is already allocated for %r1 */
-	STREG	%r23, 2*REG_SZ(%r3)
-	STREG	%r24, 3*REG_SZ(%r3)
-	STREG	%r25, 4*REG_SZ(%r3)
-	STREG	%r26, 5*REG_SZ(%r3)
-	STREG	%r28, 6*REG_SZ(%r3)
-	STREG	%r29, 7*REG_SZ(%r3)
-#ifdef CONFIG_64BIT
-	STREG	%r19, 8*REG_SZ(%r3)
-	STREG	%r20, 9*REG_SZ(%r3)
-	STREG	%r21, 10*REG_SZ(%r3)
-	STREG	%r22, 11*REG_SZ(%r3)
-	STREG	%r27, 12*REG_SZ(%r3)
-	STREG	%r31, 13*REG_SZ(%r3)
-	loadgp
-	ldo	-16(%sp),%r29
-#endif
-	LDREG	0(%r3), %r25
-	copy	%rp, %r26
-	ldo	-8(%r25), %r25
-	ldi	0, %r23		/* no pt_regs */
-	b,l	ftrace_function_trampoline, %rp
-	copy	%r3, %r24
-
-	LDREG	-RP_OFFSET(%r3), %rp
-	LDREG	2*REG_SZ(%r3), %r23
-	LDREG	3*REG_SZ(%r3), %r24
-	LDREG	4*REG_SZ(%r3), %r25
-	LDREG	5*REG_SZ(%r3), %r26
-	LDREG	6*REG_SZ(%r3), %r28
-	LDREG	7*REG_SZ(%r3), %r29
-#ifdef CONFIG_64BIT
-	LDREG	8*REG_SZ(%r3), %r19
-	LDREG	9*REG_SZ(%r3), %r20
-	LDREG	10*REG_SZ(%r3), %r21
-	LDREG	11*REG_SZ(%r3), %r22
-	LDREG	12*REG_SZ(%r3), %r27
-	LDREG	13*REG_SZ(%r3), %r31
-#endif
-	LDREG	1*REG_SZ(%r3), %r3
-
-	LDREGM	-FTRACE_FRAME_SIZE(%sp), %r1
-	/* Adjust return point to jump back to beginning of traced function */
-	ldo	-4(%r1), %r1
-	bv,n	(%r1)
-
-ENDPROC_CFI(ftrace_caller)
-
-#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS
-ENTRY_CFI(ftrace_regs_caller,caller,frame=FTRACE_FRAME_SIZE+PT_SZ_ALGN,
-	CALLS,SAVE_RP,SAVE_SP)
-ftrace_regs_caller:
-	.global ftrace_regs_caller
-
-	ldo	-FTRACE_FRAME_SIZE(%sp), %r1
-	STREG	%rp, -RP_OFFSET(%r1)
-
-	copy	%sp, %r1
-	ldo	PT_SZ_ALGN(%sp), %sp
-
-	STREG	%rp, PT_GR2(%r1)
-	STREG	%r3, PT_GR3(%r1)
-	STREG	%r4, PT_GR4(%r1)
-	STREG	%r5, PT_GR5(%r1)
-	STREG	%r6, PT_GR6(%r1)
-	STREG	%r7, PT_GR7(%r1)
-	STREG	%r8, PT_GR8(%r1)
-	STREG	%r9, PT_GR9(%r1)
-	STREG   %r10, PT_GR10(%r1)
-	STREG   %r11, PT_GR11(%r1)
-	STREG   %r12, PT_GR12(%r1)
-	STREG   %r13, PT_GR13(%r1)
-	STREG   %r14, PT_GR14(%r1)
-	STREG   %r15, PT_GR15(%r1)
-	STREG   %r16, PT_GR16(%r1)
-	STREG   %r17, PT_GR17(%r1)
-	STREG   %r18, PT_GR18(%r1)
-	STREG	%r19, PT_GR19(%r1)
-	STREG	%r20, PT_GR20(%r1)
-	STREG	%r21, PT_GR21(%r1)
-	STREG	%r22, PT_GR22(%r1)
-	STREG	%r23, PT_GR23(%r1)
-	STREG	%r24, PT_GR24(%r1)
-	STREG	%r25, PT_GR25(%r1)
-	STREG	%r26, PT_GR26(%r1)
-	STREG	%r27, PT_GR27(%r1)
-	STREG	%r28, PT_GR28(%r1)
-	STREG	%r29, PT_GR29(%r1)
-	STREG	%r30, PT_GR30(%r1)
-	STREG	%r31, PT_GR31(%r1)
-	mfctl	%cr11, %r26
-	STREG	%r26, PT_SAR(%r1)
-
-	copy	%rp, %r26
-	LDREG	-FTRACE_FRAME_SIZE-PT_SZ_ALGN(%sp), %r25
-	ldo	-8(%r25), %r25
-	ldo	-FTRACE_FRAME_SIZE(%r1), %arg2
-	b,l	ftrace_function_trampoline, %rp
-	copy	%r1, %arg3 /* struct pt_regs */
-
-	ldo	-PT_SZ_ALGN(%sp), %r1
-
-	LDREG	PT_SAR(%r1), %rp
-	mtctl	%rp, %cr11
-
-	LDREG	PT_GR2(%r1), %rp
-	LDREG	PT_GR3(%r1), %r3
-	LDREG	PT_GR4(%r1), %r4
-	LDREG	PT_GR5(%r1), %r5
-	LDREG	PT_GR6(%r1), %r6
-	LDREG	PT_GR7(%r1), %r7
-	LDREG	PT_GR8(%r1), %r8
-	LDREG	PT_GR9(%r1), %r9
-	LDREG   PT_GR10(%r1),%r10
-	LDREG   PT_GR11(%r1),%r11
-	LDREG   PT_GR12(%r1),%r12
-	LDREG   PT_GR13(%r1),%r13
-	LDREG   PT_GR14(%r1),%r14
-	LDREG   PT_GR15(%r1),%r15
-	LDREG   PT_GR16(%r1),%r16
-	LDREG   PT_GR17(%r1),%r17
-	LDREG   PT_GR18(%r1),%r18
-	LDREG   PT_GR19(%r1),%r19
-	LDREG   PT_GR20(%r1),%r20
-	LDREG   PT_GR21(%r1),%r21
-	LDREG   PT_GR22(%r1),%r22
-	LDREG   PT_GR23(%r1),%r23
-	LDREG   PT_GR24(%r1),%r24
-	LDREG   PT_GR25(%r1),%r25
-	LDREG   PT_GR26(%r1),%r26
-	LDREG   PT_GR27(%r1),%r27
-	LDREG   PT_GR28(%r1),%r28
-	LDREG   PT_GR29(%r1),%r29
-	LDREG   PT_GR30(%r1),%r30
-	LDREG   PT_GR31(%r1),%r31
-
-	ldo	-PT_SZ_ALGN(%sp), %sp
-	LDREGM	-FTRACE_FRAME_SIZE(%sp), %r1
-	/* Adjust return point to jump back to beginning of traced function */
-	ldo	-4(%r1), %r1
-	bv,n	(%r1)
-
-ENDPROC_CFI(ftrace_regs_caller)
-
-#endif
-#endif
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	.align 8
-ENTRY_CFI(return_to_handler, caller,frame=FRAME_SIZE)
-	.export parisc_return_to_handler,data
-parisc_return_to_handler:
-	copy %r3,%r1
-	STREG %r0,-RP_OFFSET(%sp)	/* store 0 as %rp */
-	copy %sp,%r3
-	STREGM %r1,FRAME_SIZE(%sp)
-	STREG %ret0,8(%r3)
-	STREG %ret1,16(%r3)
-
-#ifdef CONFIG_64BIT
-	loadgp
-#endif
-
-	/* call ftrace_return_to_handler(0) */
-	.import ftrace_return_to_handler,code
-	load32 ftrace_return_to_handler,%ret0
-	load32 .Lftrace_ret,%r2
-#ifdef CONFIG_64BIT
-	ldo -16(%sp),%ret1		/* Reference param save area */
-	bve	(%ret0)
-#else
-	bv	%r0(%ret0)
-#endif
-	ldi 0,%r26
-.Lftrace_ret:
-	copy %ret0,%rp
-
-	/* restore original return values */
-	LDREG 8(%r3),%ret0
-	LDREG 16(%r3),%ret1
-
-	/* return from function */
-#ifdef CONFIG_64BIT
-	bve	(%rp)
-#else
-	bv	%r0(%rp)
-#endif
-	LDREGM -FRAME_SIZE(%sp),%r3
-ENDPROC_CFI(return_to_handler)
-
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-#endif	/* CONFIG_FUNCTION_TRACER */
-
-#ifdef CONFIG_IRQSTACKS
-/* void call_on_stack(unsigned long param1, void *func,
-		      unsigned long new_stack) */
-ENTRY_CFI(call_on_stack, FRAME=2*FRAME_SIZE,CALLS,SAVE_RP,SAVE_SP)
-ENTRY(_call_on_stack)
-	copy	%sp, %r1
-
-	/* Regarding the HPPA calling conventions for function pointers,
-	   we assume the PIC register is not changed across call.  For
-	   CONFIG_64BIT, the argument pointer is left to point at the
-	   argument region allocated for the call to call_on_stack. */
-
-	/* Switch to new stack.  We allocate two frames.  */
-	ldo	2*FRAME_SIZE(%arg2), %sp
-# ifdef CONFIG_64BIT
-	/* Save previous stack pointer and return pointer in frame marker */
-	STREG	%rp, -FRAME_SIZE-RP_OFFSET(%sp)
-	/* Calls always use function descriptor */
-	LDREG	16(%arg1), %arg1
-	bve,l	(%arg1), %rp
-	STREG	%r1, -FRAME_SIZE-REG_SZ(%sp)
-	LDREG	-FRAME_SIZE-RP_OFFSET(%sp), %rp
-	bve	(%rp)
-	LDREG	-FRAME_SIZE-REG_SZ(%sp), %sp
-# else
-	/* Save previous stack pointer and return pointer in frame marker */
-	STREG	%r1, -FRAME_SIZE-REG_SZ(%sp)
-	STREG	%rp, -FRAME_SIZE-RP_OFFSET(%sp)
-	/* Calls use function descriptor if PLABEL bit is set */
-	bb,>=,n	%arg1, 30, 1f
-	depwi	0,31,2, %arg1
-	LDREG	0(%arg1), %arg1
-1:
-	be,l	0(%sr4,%arg1), %sr0, %r31
-	copy	%r31, %rp
-	LDREG	-FRAME_SIZE-RP_OFFSET(%sp), %rp
-	bv	(%rp)
-	LDREG	-FRAME_SIZE-REG_SZ(%sp), %sp
-# endif /* CONFIG_64BIT */
-ENDPROC_CFI(call_on_stack)
-#endif /* CONFIG_IRQSTACKS */
-
-ENTRY_CFI(get_register)
-	/*
-	 * get_register is used by the non access tlb miss handlers to
-	 * copy the value of the general register specified in r8 into
-	 * r1. This routine can't be used for shadowed registers, since
-	 * the rfir will restore the original value. So, for the shadowed
-	 * registers we put a -1 into r1 to indicate that the register
-	 * should not be used (the register being copied could also have
-	 * a -1 in it, but that is OK, it just means that we will have
-	 * to use the slow path instead).
-	 */
-	blr     %r8,%r0
-	nop
-	bv      %r0(%r25)    /* r0 */
-	copy    %r0,%r1
-	bv      %r0(%r25)    /* r1 - shadowed */
-	ldi     -1,%r1
-	bv      %r0(%r25)    /* r2 */
-	copy    %r2,%r1
-	bv      %r0(%r25)    /* r3 */
-	copy    %r3,%r1
-	bv      %r0(%r25)    /* r4 */
-	copy    %r4,%r1
-	bv      %r0(%r25)    /* r5 */
-	copy    %r5,%r1
-	bv      %r0(%r25)    /* r6 */
-	copy    %r6,%r1
-	bv      %r0(%r25)    /* r7 */
-	copy    %r7,%r1
-	bv      %r0(%r25)    /* r8 - shadowed */
-	ldi     -1,%r1
-	bv      %r0(%r25)    /* r9 - shadowed */
-	ldi     -1,%r1
-	bv      %r0(%r25)    /* r10 */
-	copy    %r10,%r1
-	bv      %r0(%r25)    /* r11 */
-	copy    %r11,%r1
-	bv      %r0(%r25)    /* r12 */
-	copy    %r12,%r1
-	bv      %r0(%r25)    /* r13 */
-	copy    %r13,%r1
-	bv      %r0(%r25)    /* r14 */
-	copy    %r14,%r1
-	bv      %r0(%r25)    /* r15 */
-	copy    %r15,%r1
-	bv      %r0(%r25)    /* r16 - shadowed */
-	ldi     -1,%r1
-	bv      %r0(%r25)    /* r17 - shadowed */
-	ldi     -1,%r1
-	bv      %r0(%r25)    /* r18 */
-	copy    %r18,%r1
-	bv      %r0(%r25)    /* r19 */
-	copy    %r19,%r1
-	bv      %r0(%r25)    /* r20 */
-	copy    %r20,%r1
-	bv      %r0(%r25)    /* r21 */
-	copy    %r21,%r1
-	bv      %r0(%r25)    /* r22 */
-	copy    %r22,%r1
-	bv      %r0(%r25)    /* r23 */
-	copy    %r23,%r1
-	bv      %r0(%r25)    /* r24 - shadowed */
-	ldi     -1,%r1
-	bv      %r0(%r25)    /* r25 - shadowed */
-	ldi     -1,%r1
-	bv      %r0(%r25)    /* r26 */
-	copy    %r26,%r1
-	bv      %r0(%r25)    /* r27 */
-	copy    %r27,%r1
-	bv      %r0(%r25)    /* r28 */
-	copy    %r28,%r1
-	bv      %r0(%r25)    /* r29 */
-	copy    %r29,%r1
-	bv      %r0(%r25)    /* r30 */
-	copy    %r30,%r1
-	bv      %r0(%r25)    /* r31 */
-	copy    %r31,%r1
-ENDPROC_CFI(get_register)
-
-
-ENTRY_CFI(set_register)
-	/*
-	 * set_register is used by the non access tlb miss handlers to
-	 * copy the value of r1 into the general register specified in
-	 * r8.
-	 */
-	blr     %r8,%r0
-	nop
-	bv      %r0(%r25)    /* r0 (silly, but it is a place holder) */
-	copy    %r1,%r0
-	bv      %r0(%r25)    /* r1 */
-	copy    %r1,%r1
-	bv      %r0(%r25)    /* r2 */
-	copy    %r1,%r2
-	bv      %r0(%r25)    /* r3 */
-	copy    %r1,%r3
-	bv      %r0(%r25)    /* r4 */
-	copy    %r1,%r4
-	bv      %r0(%r25)    /* r5 */
-	copy    %r1,%r5
-	bv      %r0(%r25)    /* r6 */
-	copy    %r1,%r6
-	bv      %r0(%r25)    /* r7 */
-	copy    %r1,%r7
-	bv      %r0(%r25)    /* r8 */
-	copy    %r1,%r8
-	bv      %r0(%r25)    /* r9 */
-	copy    %r1,%r9
-	bv      %r0(%r25)    /* r10 */
-	copy    %r1,%r10
-	bv      %r0(%r25)    /* r11 */
-	copy    %r1,%r11
-	bv      %r0(%r25)    /* r12 */
-	copy    %r1,%r12
-	bv      %r0(%r25)    /* r13 */
-	copy    %r1,%r13
-	bv      %r0(%r25)    /* r14 */
-	copy    %r1,%r14
-	bv      %r0(%r25)    /* r15 */
-	copy    %r1,%r15
-	bv      %r0(%r25)    /* r16 */
-	copy    %r1,%r16
-	bv      %r0(%r25)    /* r17 */
-	copy    %r1,%r17
-	bv      %r0(%r25)    /* r18 */
-	copy    %r1,%r18
-	bv      %r0(%r25)    /* r19 */
-	copy    %r1,%r19
-	bv      %r0(%r25)    /* r20 */
-	copy    %r1,%r20
-	bv      %r0(%r25)    /* r21 */
-	copy    %r1,%r21
-	bv      %r0(%r25)    /* r22 */
-	copy    %r1,%r22
-	bv      %r0(%r25)    /* r23 */
-	copy    %r1,%r23
-	bv      %r0(%r25)    /* r24 */
-	copy    %r1,%r24
-	bv      %r0(%r25)    /* r25 */
-	copy    %r1,%r25
-	bv      %r0(%r25)    /* r26 */
-	copy    %r1,%r26
-	bv      %r0(%r25)    /* r27 */
-	copy    %r1,%r27
-	bv      %r0(%r25)    /* r28 */
-	copy    %r1,%r28
-	bv      %r0(%r25)    /* r29 */
-	copy    %r1,%r29
-	bv      %r0(%r25)    /* r30 */
-	copy    %r1,%r30
-	bv      %r0(%r25)    /* r31 */
-	copy    %r1,%r31
-ENDPROC_CFI(set_register)
-
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
deleted file mode 100644
index 951a339369dd5994bd40dae09e97021b539cf982..0000000000000000000000000000000000000000
--- a/arch/parisc/kernel/head.S
+++ /dev/null
@@ -1,388 +0,0 @@
-/* This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1999-2007 by Helge Deller <deller@gmx.de>
- * Copyright 1999 SuSE GmbH (Philipp Rumpf)
- * Copyright 1999 Philipp Rumpf (prumpf@tux.org)
- * Copyright 2000 Hewlett Packard (Paul Bame, bame@puffin.external.hp.com)
- * Copyright (C) 2001 Grant Grundler (Hewlett Packard)
- * Copyright (C) 2004 Kyle McMartin <kyle@debian.org>
- *
- * Initial Version 04-23-1999 by Helge Deller <deller@gmx.de>
- */
-
-#include <asm/asm-offsets.h>
-#include <asm/psw.h>
-#include <asm/pdc.h>
-	
-#include <asm/assembly.h>
-#include <asm/pgtable.h>
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-
-	.level	PA_ASM_LEVEL
-
-	__INITDATA
-ENTRY(boot_args)
-	.word 0 /* arg0 */
-	.word 0 /* arg1 */
-	.word 0 /* arg2 */
-	.word 0 /* arg3 */
-END(boot_args)
-
-	__HEAD
-
-	.align	4
-	.import init_thread_union,data
-	.import fault_vector_20,code    /* IVA parisc 2.0 32 bit */
-#ifndef CONFIG_64BIT
-        .import fault_vector_11,code    /* IVA parisc 1.1 32 bit */
-	.import	$global$		/* forward declaration */
-#endif /*!CONFIG_64BIT*/
-ENTRY(parisc_kernel_start)
-	.proc
-	.callinfo
-
-	/* Make sure sr4-sr7 are set to zero for the kernel address space */
-	mtsp	%r0,%sr4
-	mtsp	%r0,%sr5
-	mtsp	%r0,%sr6
-	mtsp	%r0,%sr7
-
-	/* Clear BSS (shouldn't the boot loader do this?) */
-
-	.import __bss_start,data
-	.import __bss_stop,data
-
-	load32		PA(__bss_start),%r3
-	load32		PA(__bss_stop),%r4
-$bss_loop:
-	cmpb,<<,n       %r3,%r4,$bss_loop
-	stw,ma          %r0,4(%r3)
-
-	/* Save away the arguments the boot loader passed in (32 bit args) */
-	load32		PA(boot_args),%r1
-	stw,ma          %arg0,4(%r1)
-	stw,ma          %arg1,4(%r1)
-	stw,ma          %arg2,4(%r1)
-	stw,ma          %arg3,4(%r1)
-
-	/* Initialize startup VM. Just map first 16/32 MB of memory */
-	load32		PA(swapper_pg_dir),%r4
-	mtctl		%r4,%cr24	/* Initialize kernel root pointer */
-	mtctl		%r4,%cr25	/* Initialize user root pointer */
-
-#if CONFIG_PGTABLE_LEVELS == 3
-	/* Set pmd in pgd */
-	load32		PA(pmd0),%r5
-	shrd            %r5,PxD_VALUE_SHIFT,%r3	
-	ldo		(PxD_FLAG_PRESENT+PxD_FLAG_VALID)(%r3),%r3
-	stw		%r3,ASM_PGD_ENTRY*ASM_PGD_ENTRY_SIZE(%r4)
-	ldo		ASM_PMD_ENTRY*ASM_PMD_ENTRY_SIZE(%r5),%r4
-#else
-	/* 2-level page table, so pmd == pgd */
-	ldo		ASM_PGD_ENTRY*ASM_PGD_ENTRY_SIZE(%r4),%r4
-#endif
-
-	/* Fill in pmd with enough pte directories */
-	load32		PA(pg0),%r1
-	SHRREG		%r1,PxD_VALUE_SHIFT,%r3
-	ldo		(PxD_FLAG_PRESENT+PxD_FLAG_VALID)(%r3),%r3
-
-	ldi		ASM_PT_INITIAL,%r1
-
-1:
-	stw		%r3,0(%r4)
-	ldo		(PAGE_SIZE >> PxD_VALUE_SHIFT)(%r3),%r3
-	addib,>		-1,%r1,1b
-#if CONFIG_PGTABLE_LEVELS == 3
-	ldo             ASM_PMD_ENTRY_SIZE(%r4),%r4
-#else
-	ldo             ASM_PGD_ENTRY_SIZE(%r4),%r4
-#endif
-
-
-	/* Now initialize the PTEs themselves.  We use RWX for
-	 * everything ... it will get remapped correctly later */
-	ldo		0+_PAGE_KERNEL_RWX(%r0),%r3 /* Hardwired 0 phys addr start */
-	load32		(1<<(KERNEL_INITIAL_ORDER-PAGE_SHIFT)),%r11 /* PFN count */
-	load32		PA(pg0),%r1
-
-$pgt_fill_loop:
-	STREGM          %r3,ASM_PTE_ENTRY_SIZE(%r1)
-	ldo		(1<<PFN_PTE_SHIFT)(%r3),%r3 /* add one PFN */
-	addib,>		-1,%r11,$pgt_fill_loop
-	nop
-
-	/* Load the return address...er...crash 'n burn */
-	copy		%r0,%r2
-
-	/* And the RFI Target address too */
-	load32		start_parisc,%r11
-
-	/* And the initial task pointer */
-	load32		init_thread_union,%r6
-	mtctl           %r6,%cr30
-
-	/* And the stack pointer too */
-	ldo             THREAD_SZ_ALGN(%r6),%sp
-
-#if defined(CONFIG_64BIT) && defined(CONFIG_FUNCTION_TRACER)
-	.import _mcount,data
-	/* initialize mcount FPTR */
-	/* Get the global data pointer */
-	loadgp
-	load32		PA(_mcount), %r10
-	std		%dp,0x18(%r10)
-#endif
-
-#ifdef CONFIG_64BIT
-	/* Get PDCE_PROC for monarch CPU. */
-#define MEM_PDC_LO 0x388
-#define MEM_PDC_HI 0x35C
-	ldw             MEM_PDC_LO(%r0),%r3
-	ldw             MEM_PDC_HI(%r0),%r10
-	depd            %r10, 31, 32, %r3        /* move to upper word */
-#endif
-
-
-#ifdef CONFIG_SMP
-	/* Set the smp rendezvous address into page zero.
-	** It would be safer to do this in init_smp_config() but
-	** it's just way easier to deal with here because
-	** of 64-bit function ptrs and the address is local to this file.
-	*/
-	load32		PA(smp_slave_stext),%r10
-	stw		%r10,0x10(%r0)	/* MEM_RENDEZ */
-	stw		%r0,0x28(%r0)	/* MEM_RENDEZ_HI - assume addr < 4GB */
-
-	/* FALLTHROUGH */
-	.procend
-
-	/*
-	** Code Common to both Monarch and Slave processors.
-	** Entry:
-	**
-	**  1.1:	
-	**    %r11 must contain RFI target address.
-	**    %r25/%r26 args to pass to target function
-	**    %r2  in case rfi target decides it didn't like something
-	**
-	**  2.0w:
-	**    %r3  PDCE_PROC address
-	**    %r11 RFI target address
-	**
-	** Caller must init: SR4-7, %sp, %r10, %cr24/25, 
-	*/
-common_stext:
-	.proc
-	.callinfo
-#else
-	/* Clear PDC entry point - we won't use it */
-	stw		%r0,0x10(%r0)	/* MEM_RENDEZ */
-	stw		%r0,0x28(%r0)	/* MEM_RENDEZ_HI */
-#endif /*CONFIG_SMP*/
-
-#ifdef CONFIG_64BIT
-	tophys_r1	%sp
-
-	/* Save the rfi target address */
-	ldd             TI_TASK-THREAD_SZ_ALGN(%sp), %r10
-	tophys_r1       %r10
-	std             %r11,  TASK_PT_GR11(%r10)
-	/* Switch to wide mode Superdome doesn't support narrow PDC
-	** calls.
-	*/
-1:	mfia            %rp             /* clear upper part of pcoq */
-	ldo             2f-1b(%rp),%rp
-	depdi           0,31,32,%rp
-	bv              (%rp)
-	ssm             PSW_SM_W,%r0
-
-        /* Set Wide mode as the "Default" (eg for traps)
-        ** First trap occurs *right* after (or part of) rfi for slave CPUs.
-        ** Someday, palo might not do this for the Monarch either.
-        */
-2:
-	mfctl		%cr30,%r6		/* PCX-W2 firmware bug */
-
-	ldo             PDC_PSW(%r0),%arg0              /* 21 */
-	ldo             PDC_PSW_SET_DEFAULTS(%r0),%arg1 /* 2 */
-	ldo             PDC_PSW_WIDE_BIT(%r0),%arg2     /* 2 */
-	load32          PA(stext_pdc_ret), %rp
-	bv              (%r3)
-	copy            %r0,%arg3
-
-stext_pdc_ret:
-	mtctl		%r6,%cr30		/* restore task thread info */
-
-	/* restore rfi target address*/
-	ldd             TI_TASK-THREAD_SZ_ALGN(%sp), %r10
-	tophys_r1       %r10
-	ldd             TASK_PT_GR11(%r10), %r11
-	tovirt_r1       %sp
-#endif
-	
-	/* PARANOID: clear user scratch/user space SR's */
-	mtsp	%r0,%sr0
-	mtsp	%r0,%sr1
-	mtsp	%r0,%sr2
-	mtsp	%r0,%sr3
-
-	/* Initialize Protection Registers */
-	mtctl	%r0,%cr8
-	mtctl	%r0,%cr9
-	mtctl	%r0,%cr12
-	mtctl	%r0,%cr13
-
-	/* Initialize the global data pointer */
-	loadgp
-
-	/* Set up our interrupt table.  HPMCs might not work after this! 
-	 *
-	 * We need to install the correct iva for PA1.1 or PA2.0. The
-	 * following short sequence of instructions can determine this
-	 * (without being illegal on a PA1.1 machine).
-	 */
-#ifndef CONFIG_64BIT
-	ldi		32,%r10
-	mtctl		%r10,%cr11
-	.level 2.0
-	mfctl,w		%cr11,%r10
-	.level 1.1
-	comib,<>,n	0,%r10,$is_pa20
-	ldil		L%PA(fault_vector_11),%r10
-	b		$install_iva
-	ldo		R%PA(fault_vector_11)(%r10),%r10
-
-$is_pa20:
-	.level		PA_ASM_LEVEL /* restore 1.1 || 2.0w */
-#endif /*!CONFIG_64BIT*/
-	load32		PA(fault_vector_20),%r10
-
-$install_iva:
-	mtctl		%r10,%cr14
-
-	b		aligned_rfi  /* Prepare to RFI! Man all the cannons! */
-	nop
-
-	.align 128
-aligned_rfi:
-	pcxt_ssm_bug
-
-	copy		%r3, %arg0	/* PDCE_PROC for smp_callin() */
-
-	rsm		PSW_SM_QUIET,%r0	/* off troublesome PSW bits */
-	/* Don't need NOPs, have 8 compliant insn before rfi */
-
-	mtctl		%r0,%cr17	/* Clear IIASQ tail */
-	mtctl		%r0,%cr17	/* Clear IIASQ head */
-
-	/* Load RFI target into PC queue */
-	mtctl		%r11,%cr18	/* IIAOQ head */
-	ldo		4(%r11),%r11
-	mtctl		%r11,%cr18	/* IIAOQ tail */
-
-	load32		KERNEL_PSW,%r10
-	mtctl		%r10,%ipsw
-	
-	/* Jump through hyperspace to Virt Mode */
-	rfi
-	nop
-
-	.procend
-
-#ifdef CONFIG_SMP
-
-	.import smp_init_current_idle_task,data
-	.import	smp_callin,code
-
-#ifndef CONFIG_64BIT
-smp_callin_rtn:
-        .proc
-	.callinfo
-	break	1,1		/*  Break if returned from start_secondary */
-	nop
-	nop
-        .procend
-#endif /*!CONFIG_64BIT*/
-
-/***************************************************************************
-* smp_slave_stext is executed by all non-monarch Processors when the Monarch
-* pokes the slave CPUs in smp.c:smp_boot_cpus().
-*
-* Once here, registers values are initialized in order to branch to virtual
-* mode. Once all available/eligible CPUs are in virtual mode, all are
-* released and start out by executing their own idle task.
-*****************************************************************************/
-smp_slave_stext:
-        .proc
-	.callinfo
-
-	/*
-	** Initialize Space registers
-	*/
-	mtsp	   %r0,%sr4
-	mtsp	   %r0,%sr5
-	mtsp	   %r0,%sr6
-	mtsp	   %r0,%sr7
-
-#ifdef CONFIG_64BIT
-	/*
-	 *  Enable Wide mode early, in case the task_struct for the idle
-	 *  task in smp_init_current_idle_task was allocated above 4GB.
-	 */
-1:	mfia            %rp             /* clear upper part of pcoq */
-	ldo             2f-1b(%rp),%rp
-	depdi           0,31,32,%rp
-	bv              (%rp)
-	ssm             PSW_SM_W,%r0
-2:
-#endif
-
-	/*  Initialize the SP - monarch sets up smp_init_current_idle_task */
-	load32		PA(smp_init_current_idle_task),%sp
-	LDREG		0(%sp),%sp	/* load task address */
-	tophys_r1	%sp
-	LDREG		TASK_THREAD_INFO(%sp),%sp
-	mtctl           %sp,%cr30       /* store in cr30 */
-	ldo             THREAD_SZ_ALGN(%sp),%sp
-
-	/* point CPU to kernel page tables */
-	load32		PA(swapper_pg_dir),%r4
-	mtctl		%r4,%cr24	/* Initialize kernel root pointer */
-	mtctl		%r4,%cr25	/* Initialize user root pointer */
-
-#ifdef CONFIG_64BIT
-	/* Setup PDCE_PROC entry */
-	copy            %arg0,%r3
-#else
-	/* Load RFI *return* address in case smp_callin bails */
-	load32		smp_callin_rtn,%r2
-#endif
-	
-	/* Load RFI target address.  */
-	load32		smp_callin,%r11
-	
-	/* ok...common code can handle the rest */
-	b		common_stext
-	nop
-
-	.procend
-#endif /* CONFIG_SMP */
-
-ENDPROC(parisc_kernel_start)
-
-#ifndef CONFIG_64BIT
-	.section .data..ro_after_init
-
-	.align	4
-	.export	$global$,data
-
-	.type	$global$,@object
-	.size	$global$,4
-$global$:	
-	.word 0
-#endif /*!CONFIG_64BIT*/
diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S
deleted file mode 100644
index 81de5e2b391cced4572b94a4191d6b8b813ed476..0000000000000000000000000000000000000000
--- a/arch/parisc/kernel/hpmc.S
+++ /dev/null
@@ -1,301 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* 
- * HPMC (High Priority Machine Check) handler.
- *
- * Copyright (C) 1999 Philipp Rumpf <prumpf@tux.org>
- * Copyright (C) 1999 Hewlett-Packard (Frank Rowand)
- * Copyright (C) 2000 Hewlett-Packard (John Marvin)
- */
-
-
-/*
- * This HPMC handler retrieves the HPMC pim data, resets IO and
- * returns to the default trap handler with code set to 1 (HPMC).
- * The default trap handler calls handle interruption, which
- * does a stack and register dump. This at least allows kernel
- * developers to get back to C code in virtual mode, where they
- * have the option to examine and print values from memory that
- * would help in debugging an HPMC caused by a software bug.
- *
- * There is more to do here:
- *
- *      1) On MP systems we need to synchronize processors
- *         before calling pdc/iodc.
- *      2) We should be checking the system state and not
- *         returning to the fault handler if things are really
- *         bad.
- *
- */
-
-	.level		1.1
-
-#include <asm/assembly.h>
-#include <asm/pdc.h>
-#include <asm/psw.h>
-
-#include <linux/linkage.h>
-#include <linux/init.h>
-
-	/*
-	 * stack for os_hpmc, the HPMC handler.
-	 * buffer for IODC procedures (for the HPMC handler).
-	 *
-	 * IODC requires 7K byte stack.  That leaves 1K byte for os_hpmc.
-	 */
-
-	__PAGE_ALIGNED_BSS
-	.align 4096
-hpmc_stack:
-	.block 16384
-
-#define HPMC_IODC_BUF_SIZE 0x8000
-
-	__PAGE_ALIGNED_BSS
-	.align 4096
-hpmc_iodc_buf:
-	.block HPMC_IODC_BUF_SIZE
-
-	.section .bss
-	.align 8
-hpmc_raddr:
-	.block 128
-
-#define HPMC_PIM_DATA_SIZE 896 /* Enough to hold all architected 2.0 state */
-
-	.section .bss
-	.align 8
-ENTRY(hpmc_pim_data)
-	.block HPMC_PIM_DATA_SIZE
-END(hpmc_pim_data)
-
-	.text
-
-	.import intr_save, code
-	.align 16
-ENTRY(os_hpmc)
-.os_hpmc:
-
-	/*
-	 * registers modified:
-	 *
-	 *   Using callee saves registers without saving them.  The
-	 *   original values are in the pim dump if we need them.
-	 *
-	 *   r2   (rp)  return pointer
-	 *   r3   address of PDCE_PROC
-	 *   r4   scratch
-	 *   r5   scratch
-	 *   r23  (arg3) procedure arg
-	 *   r24  (arg2) procedure arg
-	 *   r25  (arg1) procedure arg
-	 *   r26  (arg0) procedure arg
-	 *   r30  (sp)   stack pointer
-	 *
-	 * registers read:
-	 *
-	 *   r26  contains address of PDCE_PROC on entry
-	 *   r28  (ret0) return value from procedure
-	 */
-
-	copy    arg0, %r3       /* save address of PDCE_PROC */
-
-	/*
-	 *  disable nested HPMCs
-	 *
-	 * Increment os_hpmc checksum to invalidate it.
-	 * Do this before turning the PSW M bit off.
-	 */
-
-	mfctl   %cr14, %r4
-	ldw     52(%r4),%r5
-	addi    1,%r5,%r5
-	stw     %r5,52(%r4)
-
-	/* MP_FIXME: synchronize all processors. */
-
-	/* Setup stack pointer. */
-
-	load32	PA(hpmc_stack),sp
-	
-	ldo     128(sp),sp /* leave room for arguments */
-
-	/*
-	 * Most PDC routines require that the M bit be off.
-	 * So turn on the Q bit and turn off the M bit.
-	 */
-
-	ldi     PSW_SM_Q,%r4                   /* PSW Q on, PSW M off */
-	mtctl   %r4,ipsw
-	mtctl   %r0,pcsq
-	mtctl   %r0,pcsq
-	load32	PA(os_hpmc_1),%r4
-	mtctl   %r4,pcoq
-	ldo     4(%r4),%r4
-	mtctl   %r4,pcoq
-	rfi
-	nop
-
-os_hpmc_1:
-
-	/* Call PDC_PIM to get HPMC pim info */
-
-	/*
-	 * Note that on some newer boxes, PDC_PIM must be called
-	 * before PDC_IO if you want IO to be reset. PDC_PIM sets
-	 * a flag that PDC_IO examines.
-	 */
-
-	ldo     PDC_PIM(%r0), arg0
-	ldo     PDC_PIM_HPMC(%r0),arg1          /* Transfer HPMC data */
-	load32	PA(hpmc_raddr),arg2
-	load32	PA(hpmc_pim_data),arg3
-	load32	HPMC_PIM_DATA_SIZE,%r4
-	stw     %r4,-52(sp)
-
-	ldil    L%PA(os_hpmc_2), rp
-	bv      (r3)                            /* call pdce_proc */
-	ldo     R%PA(os_hpmc_2)(rp), rp
-
-os_hpmc_2:
-	comib,<>  0,ret0, os_hpmc_fail
-
-	/* Reset IO by calling the hversion dependent PDC_IO routine */
-
-	ldo     PDC_IO(%r0),arg0
-	ldo     0(%r0),arg1                     /* log IO errors */
-	ldo     0(%r0),arg2                     /* reserved */
-	ldo     0(%r0),arg3                     /* reserved */
-	stw     %r0,-52(sp)                     /* reserved */
-
-	ldil    L%PA(os_hpmc_3),rp
-	bv      (%r3)                           /* call pdce_proc */
-	ldo     R%PA(os_hpmc_3)(rp),rp
-
-os_hpmc_3:
-
-	/* FIXME? Check for errors from PDC_IO (-1 might be OK) */
-
-	/*
-	 * Initialize the IODC console device (HPA,SPA, path etc.
-	 * are stored on page 0.
-	 */
-
-	/*
-	 * Load IODC into hpmc_iodc_buf by calling PDC_IODC.
-	 * Note that PDC_IODC handles flushing the appropriate
-	 * data and instruction cache lines.
-	 */
-
-	ldo     PDC_IODC(%r0),arg0
-	ldo     PDC_IODC_READ(%r0),arg1
-	load32	PA(hpmc_raddr),arg2
-	ldw     BOOT_CONSOLE_HPA_OFFSET(%r0),arg3 /* console hpa */
-	ldo     PDC_IODC_RI_INIT(%r0),%r4
-	stw     %r4,-52(sp)
-	load32	PA(hpmc_iodc_buf),%r4
-	stw     %r4,-56(sp)
-	load32	HPMC_IODC_BUF_SIZE,%r4
-	stw     %r4,-60(sp)
-
-	ldil    L%PA(os_hpmc_4),rp
-	bv      (%r3)                            /* call pdce_proc */
-	ldo     R%PA(os_hpmc_4)(rp),rp
-
-os_hpmc_4:
-	comib,<>  0,ret0,os_hpmc_fail
-
-	/* Call the entry init (just loaded by PDC_IODC) */
-
-	ldw     BOOT_CONSOLE_HPA_OFFSET(%r0),arg0  /* console hpa */
-	ldo     ENTRY_INIT_MOD_DEV(%r0), arg1
-	ldw     BOOT_CONSOLE_SPA_OFFSET(%r0),arg2  /* console spa */
-	depi    0,31,11,arg2                       /* clear bits 21-31    */
-	ldo     BOOT_CONSOLE_PATH_OFFSET(%r0),arg3 /* console path */
-	load32	PA(hpmc_raddr),%r4
-	stw     %r4, -52(sp)
-	stw     %r0, -56(sp)                    /* HV                  */
-	stw     %r0, -60(sp)                    /* HV                  */
-	stw     %r0, -64(sp)                    /* HV                  */
-	stw     %r0, -68(sp)                    /* lang, must be zero  */
-
-	load32	PA(hpmc_iodc_buf),%r5
-	ldil    L%PA(os_hpmc_5),rp
-	bv      (%r5)
-	ldo     R%PA(os_hpmc_5)(rp),rp
-
-os_hpmc_5:
-	comib,<>  0,ret0,os_hpmc_fail
-
-	/* Prepare to call intr_save */
-
-	/*
-	 * Load kernel page directory (load into user also, since
-	 * we don't intend to ever return to user land anyway)
-	 */
-
-	load32		PA(swapper_pg_dir),%r4
-	mtctl		%r4,%cr24	/* Initialize kernel root pointer */
-	mtctl		%r4,%cr25	/* Initialize user root pointer */
-
-	/* Clear sr4-sr7 */
-
-	mtsp	%r0, %sr4
-	mtsp	%r0, %sr5
-	mtsp	%r0, %sr6
-	mtsp	%r0, %sr7
-
-	tovirt_r1 %r30      /* make sp virtual */
-
-	rsm     PSW_SM_Q,%r0           /* Clear Q bit */
-	ldi     1,%r8       /* Set trap code to "1" for HPMC */
-	load32	PA(intr_save),%r1
-	be      0(%sr7,%r1)
-	nop
-
-os_hpmc_fail:
-
-	/*
-	 * Reset the system
-	 *
-	 * Some systems may lockup from a broadcast reset, so try the
-	 * hversion PDC_BROADCAST_RESET() first.
-	 * MP_FIXME: reset all processors if more than one central bus.
-	 */
-
-	/* PDC_BROADCAST_RESET() */
-
-	ldo     PDC_BROADCAST_RESET(%r0),arg0
-	ldo     0(%r0),arg1                     /* do reset */
-
-	ldil    L%PA(os_hpmc_6),rp
-	bv      (%r3)                           /* call pdce_proc */
-	ldo     R%PA(os_hpmc_6)(rp),rp
-
-os_hpmc_6:
-
-	/*
-	 * possible return values:
-	 *  -1  non-existent procedure
-	 *  -2  non-existent option
-	 *  -16 unaligned stack
-	 *
-	 * If call returned, do a broadcast reset.
-	 */
-
-	ldil    L%0xfffc0000,%r4        /* IO_BROADCAST */
-	ldo     5(%r0),%r5
-	stw     %r5,48(%r4)             /*  CMD_RESET to IO_COMMAND offset */
-
-	b .
-	nop
-	.align 16	/* make function length multiple of 16 bytes */
-.os_hpmc_end:
-
-
-	__INITRODATA
-.globl os_hpmc_size
-	.align 4
-	.type   os_hpmc_size, @object
-	.size   os_hpmc_size, 4
-os_hpmc_size:
-	.word .os_hpmc_end-.os_hpmc
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
deleted file mode 100644
index fa092ed1e837be43e3c0ee93b27bd67b968d6dd3..0000000000000000000000000000000000000000
--- a/arch/parisc/kernel/pacache.S
+++ /dev/null
@@ -1,1357 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  PARISC TLB and cache flushing support
- *  Copyright (C) 2000-2001 Hewlett-Packard (John Marvin)
- *  Copyright (C) 2001 Matthew Wilcox (willy at parisc-linux.org)
- *  Copyright (C) 2002 Richard Hirst (rhirst with parisc-linux.org)
- */
-
-/*
- * NOTE: fdc,fic, and pdc instructions that use base register modification
- *       should only use index and base registers that are not shadowed,
- *       so that the fast path emulation in the non access miss handler
- *       can be used.
- */
-
-#ifdef CONFIG_64BIT
-	.level	2.0w
-#else
-	.level	2.0
-#endif
-
-#include <asm/psw.h>
-#include <asm/assembly.h>
-#include <asm/pgtable.h>
-#include <asm/cache.h>
-#include <asm/ldcw.h>
-#include <asm/alternative.h>
-#include <linux/linkage.h>
-#include <linux/init.h>
-
-	.section .text.hot
-	.align	16
-
-ENTRY_CFI(flush_tlb_all_local)
-	/*
-	 * The pitlbe and pdtlbe instructions should only be used to
-	 * flush the entire tlb. Also, there needs to be no intervening
-	 * tlb operations, e.g. tlb misses, so the operation needs
-	 * to happen in real mode with all interruptions disabled.
-	 */
-
-	/* pcxt_ssm_bug	- relied upon translation! PA 2.0 Arch. F-4 and F-5 */
-	rsm		PSW_SM_I, %r19		/* save I-bit state */
-	load32		PA(1f), %r1
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
-	mtctl		%r0, %cr17		/* Clear IIASQ tail */
-	mtctl		%r0, %cr17		/* Clear IIASQ head */
-	mtctl		%r1, %cr18		/* IIAOQ head */
-	ldo		4(%r1), %r1
-	mtctl		%r1, %cr18		/* IIAOQ tail */
-	load32		REAL_MODE_PSW, %r1
-	mtctl           %r1, %ipsw
-	rfi
-	nop
-
-1:      load32		PA(cache_info), %r1
-
-	/* Flush Instruction Tlb */
-
-88:	LDREG		ITLB_SID_BASE(%r1), %r20
-	LDREG		ITLB_SID_STRIDE(%r1), %r21
-	LDREG		ITLB_SID_COUNT(%r1), %r22
-	LDREG		ITLB_OFF_BASE(%r1), %arg0
-	LDREG		ITLB_OFF_STRIDE(%r1), %arg1
-	LDREG		ITLB_OFF_COUNT(%r1), %arg2
-	LDREG		ITLB_LOOP(%r1), %arg3
-
-	addib,COND(=)		-1, %arg3, fitoneloop	/* Preadjust and test */
-	movb,<,n	%arg3, %r31, fitdone	/* If loop < 0, skip */
-	copy		%arg0, %r28		/* Init base addr */
-
-fitmanyloop:					/* Loop if LOOP >= 2 */
-	mtsp		%r20, %sr1
-	add		%r21, %r20, %r20	/* increment space */
-	copy		%arg2, %r29		/* Init middle loop count */
-
-fitmanymiddle:					/* Loop if LOOP >= 2 */
-	addib,COND(>)		-1, %r31, fitmanymiddle	/* Adjusted inner loop decr */
-	pitlbe		%r0(%sr1, %r28)
-	pitlbe,m	%arg1(%sr1, %r28)	/* Last pitlbe and addr adjust */
-	addib,COND(>)		-1, %r29, fitmanymiddle	/* Middle loop decr */
-	copy		%arg3, %r31		/* Re-init inner loop count */
-
-	movb,tr		%arg0, %r28, fitmanyloop /* Re-init base addr */
-	addib,COND(<=),n	-1, %r22, fitdone	/* Outer loop count decr */
-
-fitoneloop:					/* Loop if LOOP = 1 */
-	mtsp		%r20, %sr1
-	copy		%arg0, %r28		/* init base addr */
-	copy		%arg2, %r29		/* init middle loop count */
-
-fitonemiddle:					/* Loop if LOOP = 1 */
-	addib,COND(>)		-1, %r29, fitonemiddle	/* Middle loop count decr */
-	pitlbe,m	%arg1(%sr1, %r28)	/* pitlbe for one loop */
-
-	addib,COND(>)		-1, %r22, fitoneloop	/* Outer loop count decr */
-	add		%r21, %r20, %r20		/* increment space */
-
-fitdone:
-	ALTERNATIVE(88b, fitdone, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
-
-	/* Flush Data Tlb */
-
-	LDREG		DTLB_SID_BASE(%r1), %r20
-	LDREG		DTLB_SID_STRIDE(%r1), %r21
-	LDREG		DTLB_SID_COUNT(%r1), %r22
-	LDREG		DTLB_OFF_BASE(%r1), %arg0
-	LDREG		DTLB_OFF_STRIDE(%r1), %arg1
-	LDREG		DTLB_OFF_COUNT(%r1), %arg2
-	LDREG		DTLB_LOOP(%r1), %arg3
-
-	addib,COND(=)		-1, %arg3, fdtoneloop	/* Preadjust and test */
-	movb,<,n	%arg3, %r31, fdtdone	/* If loop < 0, skip */
-	copy		%arg0, %r28		/* Init base addr */
-
-fdtmanyloop:					/* Loop if LOOP >= 2 */
-	mtsp		%r20, %sr1
-	add		%r21, %r20, %r20	/* increment space */
-	copy		%arg2, %r29		/* Init middle loop count */
-
-fdtmanymiddle:					/* Loop if LOOP >= 2 */
-	addib,COND(>)		-1, %r31, fdtmanymiddle	/* Adjusted inner loop decr */
-	pdtlbe		%r0(%sr1, %r28)
-	pdtlbe,m	%arg1(%sr1, %r28)	/* Last pdtlbe and addr adjust */
-	addib,COND(>)		-1, %r29, fdtmanymiddle	/* Middle loop decr */
-	copy		%arg3, %r31		/* Re-init inner loop count */
-
-	movb,tr		%arg0, %r28, fdtmanyloop /* Re-init base addr */
-	addib,COND(<=),n	-1, %r22,fdtdone	/* Outer loop count decr */
-
-fdtoneloop:					/* Loop if LOOP = 1 */
-	mtsp		%r20, %sr1
-	copy		%arg0, %r28		/* init base addr */
-	copy		%arg2, %r29		/* init middle loop count */
-
-fdtonemiddle:					/* Loop if LOOP = 1 */
-	addib,COND(>)		-1, %r29, fdtonemiddle	/* Middle loop count decr */
-	pdtlbe,m	%arg1(%sr1, %r28)	/* pdtlbe for one loop */
-
-	addib,COND(>)		-1, %r22, fdtoneloop	/* Outer loop count decr */
-	add		%r21, %r20, %r20	/* increment space */
-
-
-fdtdone:
-	/*
-	 * Switch back to virtual mode
-	 */
-	/* pcxt_ssm_bug */
-	rsm		PSW_SM_I, %r0
-	load32		2f, %r1
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
-	mtctl		%r0, %cr17		/* Clear IIASQ tail */
-	mtctl		%r0, %cr17		/* Clear IIASQ head */
-	mtctl		%r1, %cr18		/* IIAOQ head */
-	ldo		4(%r1), %r1
-	mtctl		%r1, %cr18		/* IIAOQ tail */
-	load32		KERNEL_PSW, %r1
-	or		%r1, %r19, %r1	/* I-bit to state on entry */
-	mtctl		%r1, %ipsw	/* restore I-bit (entire PSW) */
-	rfi
-	nop
-
-2:      bv		%r0(%r2)
-	nop
-
-	/*
-	 * When running in qemu, drop whole flush_tlb_all_local function and
-	 * replace by one pdtlbe instruction, for which QEMU will drop all
-	 * local TLB entries.
-	 */
-3:	pdtlbe		%r0(%sr1,%r0)
-	bv,n		%r0(%r2)
-	ALTERNATIVE_CODE(flush_tlb_all_local, 2, ALT_COND_RUN_ON_QEMU, 3b)
-ENDPROC_CFI(flush_tlb_all_local)
-
-	.import cache_info,data
-
-ENTRY_CFI(flush_instruction_cache_local)
-88:	load32		cache_info, %r1
-
-	/* Flush Instruction Cache */
-
-	LDREG		ICACHE_BASE(%r1), %arg0
-	LDREG		ICACHE_STRIDE(%r1), %arg1
-	LDREG		ICACHE_COUNT(%r1), %arg2
-	LDREG		ICACHE_LOOP(%r1), %arg3
-	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
-	mtsp		%r0, %sr1
-	addib,COND(=)		-1, %arg3, fioneloop	/* Preadjust and test */
-	movb,<,n	%arg3, %r31, fisync	/* If loop < 0, do sync */
-
-fimanyloop:					/* Loop if LOOP >= 2 */
-	addib,COND(>)		-1, %r31, fimanyloop	/* Adjusted inner loop decr */
-	fice            %r0(%sr1, %arg0)
-	fice,m		%arg1(%sr1, %arg0)	/* Last fice and addr adjust */
-	movb,tr		%arg3, %r31, fimanyloop	/* Re-init inner loop count */
-	addib,COND(<=),n	-1, %arg2, fisync	/* Outer loop decr */
-
-fioneloop:					/* Loop if LOOP = 1 */
-	/* Some implementations may flush with a single fice instruction */
-	cmpib,COND(>>=),n	15, %arg2, fioneloop2
-
-fioneloop1:
-	fice,m		%arg1(%sr1, %arg0)
-	fice,m		%arg1(%sr1, %arg0)
-	fice,m		%arg1(%sr1, %arg0)
-	fice,m		%arg1(%sr1, %arg0)
-	fice,m		%arg1(%sr1, %arg0)
-	fice,m		%arg1(%sr1, %arg0)
-	fice,m		%arg1(%sr1, %arg0)
-	fice,m		%arg1(%sr1, %arg0)
-	fice,m		%arg1(%sr1, %arg0)
-	fice,m		%arg1(%sr1, %arg0)
-	fice,m		%arg1(%sr1, %arg0)
-	fice,m		%arg1(%sr1, %arg0)
-	fice,m		%arg1(%sr1, %arg0)
-	fice,m		%arg1(%sr1, %arg0)
-	fice,m		%arg1(%sr1, %arg0)
-	addib,COND(>)	-16, %arg2, fioneloop1
-	fice,m		%arg1(%sr1, %arg0)
-
-	/* Check if done */
-	cmpb,COND(=),n	%arg2, %r0, fisync	/* Predict branch taken */
-
-fioneloop2:
-	addib,COND(>)	-1, %arg2, fioneloop2	/* Outer loop count decr */
-	fice,m		%arg1(%sr1, %arg0)	/* Fice for one loop */
-
-fisync:
-	sync
-	mtsm		%r22			/* restore I-bit */
-89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
-	bv		%r0(%r2)
-	nop
-ENDPROC_CFI(flush_instruction_cache_local)
-
-
-	.import cache_info, data
-ENTRY_CFI(flush_data_cache_local)
-88:	load32		cache_info, %r1
-
-	/* Flush Data Cache */
-
-	LDREG		DCACHE_BASE(%r1), %arg0
-	LDREG		DCACHE_STRIDE(%r1), %arg1
-	LDREG		DCACHE_COUNT(%r1), %arg2
-	LDREG		DCACHE_LOOP(%r1), %arg3
-	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
-	mtsp		%r0, %sr1
-	addib,COND(=)		-1, %arg3, fdoneloop	/* Preadjust and test */
-	movb,<,n	%arg3, %r31, fdsync	/* If loop < 0, do sync */
-
-fdmanyloop:					/* Loop if LOOP >= 2 */
-	addib,COND(>)		-1, %r31, fdmanyloop	/* Adjusted inner loop decr */
-	fdce		%r0(%sr1, %arg0)
-	fdce,m		%arg1(%sr1, %arg0)	/* Last fdce and addr adjust */
-	movb,tr		%arg3, %r31, fdmanyloop	/* Re-init inner loop count */
-	addib,COND(<=),n	-1, %arg2, fdsync	/* Outer loop decr */
-
-fdoneloop:					/* Loop if LOOP = 1 */
-	/* Some implementations may flush with a single fdce instruction */
-	cmpib,COND(>>=),n	15, %arg2, fdoneloop2
-
-fdoneloop1:
-	fdce,m		%arg1(%sr1, %arg0)
-	fdce,m		%arg1(%sr1, %arg0)
-	fdce,m		%arg1(%sr1, %arg0)
-	fdce,m		%arg1(%sr1, %arg0)
-	fdce,m		%arg1(%sr1, %arg0)
-	fdce,m		%arg1(%sr1, %arg0)
-	fdce,m		%arg1(%sr1, %arg0)
-	fdce,m		%arg1(%sr1, %arg0)
-	fdce,m		%arg1(%sr1, %arg0)
-	fdce,m		%arg1(%sr1, %arg0)
-	fdce,m		%arg1(%sr1, %arg0)
-	fdce,m		%arg1(%sr1, %arg0)
-	fdce,m		%arg1(%sr1, %arg0)
-	fdce,m		%arg1(%sr1, %arg0)
-	fdce,m		%arg1(%sr1, %arg0)
-	addib,COND(>)	-16, %arg2, fdoneloop1
-	fdce,m		%arg1(%sr1, %arg0)
-
-	/* Check if done */
-	cmpb,COND(=),n	%arg2, %r0, fdsync	/* Predict branch taken */
-
-fdoneloop2:
-	addib,COND(>)	-1, %arg2, fdoneloop2	/* Outer loop count decr */
-	fdce,m		%arg1(%sr1, %arg0)	/* Fdce for one loop */
-
-fdsync:
-	syncdma
-	sync
-	mtsm		%r22			/* restore I-bit */
-89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
-	bv		%r0(%r2)
-	nop
-ENDPROC_CFI(flush_data_cache_local)
-
-/* Clear page using kernel mapping.  */
-
-ENTRY_CFI(clear_page_asm)
-#ifdef CONFIG_64BIT
-
-	/* Unroll the loop.  */
-	ldi		(PAGE_SIZE / 128), %r1
-
-1:
-	std		%r0, 0(%r26)
-	std		%r0, 8(%r26)
-	std		%r0, 16(%r26)
-	std		%r0, 24(%r26)
-	std		%r0, 32(%r26)
-	std		%r0, 40(%r26)
-	std		%r0, 48(%r26)
-	std		%r0, 56(%r26)
-	std		%r0, 64(%r26)
-	std		%r0, 72(%r26)
-	std		%r0, 80(%r26)
-	std		%r0, 88(%r26)
-	std		%r0, 96(%r26)
-	std		%r0, 104(%r26)
-	std		%r0, 112(%r26)
-	std		%r0, 120(%r26)
-
-	/* Note reverse branch hint for addib is taken.  */
-	addib,COND(>),n	-1, %r1, 1b
-	ldo		128(%r26), %r26
-
-#else
-
-	/*
-	 * Note that until (if) we start saving the full 64-bit register
-	 * values on interrupt, we can't use std on a 32 bit kernel.
-	 */
-	ldi		(PAGE_SIZE / 64), %r1
-
-1:
-	stw		%r0, 0(%r26)
-	stw		%r0, 4(%r26)
-	stw		%r0, 8(%r26)
-	stw		%r0, 12(%r26)
-	stw		%r0, 16(%r26)
-	stw		%r0, 20(%r26)
-	stw		%r0, 24(%r26)
-	stw		%r0, 28(%r26)
-	stw		%r0, 32(%r26)
-	stw		%r0, 36(%r26)
-	stw		%r0, 40(%r26)
-	stw		%r0, 44(%r26)
-	stw		%r0, 48(%r26)
-	stw		%r0, 52(%r26)
-	stw		%r0, 56(%r26)
-	stw		%r0, 60(%r26)
-
-	addib,COND(>),n	-1, %r1, 1b
-	ldo		64(%r26), %r26
-#endif
-	bv		%r0(%r2)
-	nop
-ENDPROC_CFI(clear_page_asm)
-
-/* Copy page using kernel mapping.  */
-
-ENTRY_CFI(copy_page_asm)
-#ifdef CONFIG_64BIT
-	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
-	 * Unroll the loop by hand and arrange insn appropriately.
-	 * Prefetch doesn't improve performance on rp3440.
-	 * GCC probably can do this just as well...
-	 */
-
-	ldi		(PAGE_SIZE / 128), %r1
-
-1:	ldd		0(%r25), %r19
-	ldd		8(%r25), %r20
-
-	ldd		16(%r25), %r21
-	ldd		24(%r25), %r22
-	std		%r19, 0(%r26)
-	std		%r20, 8(%r26)
-
-	ldd		32(%r25), %r19
-	ldd		40(%r25), %r20
-	std		%r21, 16(%r26)
-	std		%r22, 24(%r26)
-
-	ldd		48(%r25), %r21
-	ldd		56(%r25), %r22
-	std		%r19, 32(%r26)
-	std		%r20, 40(%r26)
-
-	ldd		64(%r25), %r19
-	ldd		72(%r25), %r20
-	std		%r21, 48(%r26)
-	std		%r22, 56(%r26)
-
-	ldd		80(%r25), %r21
-	ldd		88(%r25), %r22
-	std		%r19, 64(%r26)
-	std		%r20, 72(%r26)
-
-	ldd		 96(%r25), %r19
-	ldd		104(%r25), %r20
-	std		%r21, 80(%r26)
-	std		%r22, 88(%r26)
-
-	ldd		112(%r25), %r21
-	ldd		120(%r25), %r22
-	ldo		128(%r25), %r25
-	std		%r19, 96(%r26)
-	std		%r20, 104(%r26)
-
-	std		%r21, 112(%r26)
-	std		%r22, 120(%r26)
-
-	/* Note reverse branch hint for addib is taken.  */
-	addib,COND(>),n	-1, %r1, 1b
-	ldo		128(%r26), %r26
-
-#else
-
-	/*
-	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
-	 * bundles (very restricted rules for bundling).
-	 * Note that until (if) we start saving
-	 * the full 64 bit register values on interrupt, we can't
-	 * use ldd/std on a 32 bit kernel.
-	 */
-	ldw		0(%r25), %r19
-	ldi		(PAGE_SIZE / 64), %r1
-
-1:
-	ldw		4(%r25), %r20
-	ldw		8(%r25), %r21
-	ldw		12(%r25), %r22
-	stw		%r19, 0(%r26)
-	stw		%r20, 4(%r26)
-	stw		%r21, 8(%r26)
-	stw		%r22, 12(%r26)
-	ldw		16(%r25), %r19
-	ldw		20(%r25), %r20
-	ldw		24(%r25), %r21
-	ldw		28(%r25), %r22
-	stw		%r19, 16(%r26)
-	stw		%r20, 20(%r26)
-	stw		%r21, 24(%r26)
-	stw		%r22, 28(%r26)
-	ldw		32(%r25), %r19
-	ldw		36(%r25), %r20
-	ldw		40(%r25), %r21
-	ldw		44(%r25), %r22
-	stw		%r19, 32(%r26)
-	stw		%r20, 36(%r26)
-	stw		%r21, 40(%r26)
-	stw		%r22, 44(%r26)
-	ldw		48(%r25), %r19
-	ldw		52(%r25), %r20
-	ldw		56(%r25), %r21
-	ldw		60(%r25), %r22
-	stw		%r19, 48(%r26)
-	stw		%r20, 52(%r26)
-	ldo		64(%r25), %r25
-	stw		%r21, 56(%r26)
-	stw		%r22, 60(%r26)
-	ldo		64(%r26), %r26
-	addib,COND(>),n	-1, %r1, 1b
-	ldw		0(%r25), %r19
-#endif
-	bv		%r0(%r2)
-	nop
-ENDPROC_CFI(copy_page_asm)
-
-/*
- * NOTE: Code in clear_user_page has a hard coded dependency on the
- *       maximum alias boundary being 4 Mb. We've been assured by the
- *       parisc chip designers that there will not ever be a parisc
- *       chip with a larger alias boundary (Never say never :-) ).
- *
- *       Subtle: the dtlb miss handlers support the temp alias region by
- *       "knowing" that if a dtlb miss happens within the temp alias
- *       region it must have occurred while in clear_user_page. Since
- *       this routine makes use of processor local translations, we
- *       don't want to insert them into the kernel page table. Instead,
- *       we load up some general registers (they need to be registers
- *       which aren't shadowed) with the physical page numbers (preshifted
- *       for tlb insertion) needed to insert the translations. When we
- *       miss on the translation, the dtlb miss handler inserts the
- *       translation into the tlb using these values:
- *
- *          %r26 physical page (shifted for tlb insert) of "to" translation
- *          %r23 physical page (shifted for tlb insert) of "from" translation
- */
-
-        /* Drop prot bits and convert to page addr for iitlbt and idtlbt */
-        #define PAGE_ADD_SHIFT  (PAGE_SHIFT-12)
-        .macro          convert_phys_for_tlb_insert20  phys
-        extrd,u         \phys, 56-PAGE_ADD_SHIFT, 32-PAGE_ADD_SHIFT, \phys
-#if _PAGE_SIZE_ENCODING_DEFAULT
-        depdi           _PAGE_SIZE_ENCODING_DEFAULT, 63, (63-58), \phys
-#endif
-	.endm
-
-	/*
-	 * copy_user_page_asm() performs a page copy using mappings
-	 * equivalent to the user page mappings.  It can be used to
-	 * implement copy_user_page() but unfortunately both the `from'
-	 * and `to' pages need to be flushed through mappings equivalent
-	 * to the user mappings after the copy because the kernel accesses
-	 * the `from' page through the kmap kernel mapping and the `to'
-	 * page needs to be flushed since code can be copied.  As a
-	 * result, this implementation is less efficient than the simpler
-	 * copy using the kernel mapping.  It only needs the `from' page
-	 * to flushed via the user mapping.  The kunmap routines handle
-	 * the flushes needed for the kernel mapping.
-	 *
-	 * I'm still keeping this around because it may be possible to
-	 * use it if more information is passed into copy_user_page().
-	 * Have to do some measurements to see if it is worthwhile to
-	 * lobby for such a change.
-	 *
-	 */
-
-ENTRY_CFI(copy_user_page_asm)
-	/* Convert virtual `to' and `from' addresses to physical addresses.
-	   Move `from' physical address to non shadowed register.  */
-	ldil		L%(__PAGE_OFFSET), %r1
-	sub		%r26, %r1, %r26
-	sub		%r25, %r1, %r23
-
-	ldil		L%(TMPALIAS_MAP_START), %r28
-#ifdef CONFIG_64BIT
-#if (TMPALIAS_MAP_START >= 0x80000000)
-	depdi		0, 31,32, %r28		/* clear any sign extension */
-#endif
-	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
-	convert_phys_for_tlb_insert20 %r23	/* convert phys addr to tlb insert format */
-	depd		%r24,63,22, %r28	/* Form aliased virtual address 'to' */
-	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
-	copy		%r28, %r29
-	depdi		1, 41,1, %r29		/* Form aliased virtual address 'from' */
-#else
-	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
-	extrw,u		%r23, 24,25, %r23	/* convert phys addr to tlb insert format */
-	depw		%r24, 31,22, %r28	/* Form aliased virtual address 'to' */
-	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
-	copy		%r28, %r29
-	depwi		1, 9,1, %r29		/* Form aliased virtual address 'from' */
-#endif
-
-	/* Purge any old translations */
-
-#ifdef CONFIG_PA20
-	pdtlb,l		%r0(%r28)
-	pdtlb,l		%r0(%r29)
-#else
-0:	pdtlb		%r0(%r28)
-1:	pdtlb		%r0(%r29)
-	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
-	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
-#endif
-
-#ifdef CONFIG_64BIT
-	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
-	 * Unroll the loop by hand and arrange insn appropriately.
-	 * GCC probably can do this just as well.
-	 */
-
-	ldd		0(%r29), %r19
-	ldi		(PAGE_SIZE / 128), %r1
-
-1:	ldd		8(%r29), %r20
-
-	ldd		16(%r29), %r21
-	ldd		24(%r29), %r22
-	std		%r19, 0(%r28)
-	std		%r20, 8(%r28)
-
-	ldd		32(%r29), %r19
-	ldd		40(%r29), %r20
-	std		%r21, 16(%r28)
-	std		%r22, 24(%r28)
-
-	ldd		48(%r29), %r21
-	ldd		56(%r29), %r22
-	std		%r19, 32(%r28)
-	std		%r20, 40(%r28)
-
-	ldd		64(%r29), %r19
-	ldd		72(%r29), %r20
-	std		%r21, 48(%r28)
-	std		%r22, 56(%r28)
-
-	ldd		80(%r29), %r21
-	ldd		88(%r29), %r22
-	std		%r19, 64(%r28)
-	std		%r20, 72(%r28)
-
-	ldd		 96(%r29), %r19
-	ldd		104(%r29), %r20
-	std		%r21, 80(%r28)
-	std		%r22, 88(%r28)
-
-	ldd		112(%r29), %r21
-	ldd		120(%r29), %r22
-	std		%r19, 96(%r28)
-	std		%r20, 104(%r28)
-
-	ldo		128(%r29), %r29
-	std		%r21, 112(%r28)
-	std		%r22, 120(%r28)
-	ldo		128(%r28), %r28
-
-	/* conditional branches nullify on forward taken branch, and on
-	 * non-taken backward branch. Note that .+4 is a backwards branch.
-	 * The ldd should only get executed if the branch is taken.
-	 */
-	addib,COND(>),n	-1, %r1, 1b		/* bundle 10 */
-	ldd		0(%r29), %r19		/* start next loads */
-
-#else
-	ldi		(PAGE_SIZE / 64), %r1
-
-	/*
-	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
-	 * bundles (very restricted rules for bundling). It probably
-	 * does OK on PCXU and better, but we could do better with
-	 * ldd/std instructions. Note that until (if) we start saving
-	 * the full 64 bit register values on interrupt, we can't
-	 * use ldd/std on a 32 bit kernel.
-	 */
-
-1:	ldw		0(%r29), %r19
-	ldw		4(%r29), %r20
-	ldw		8(%r29), %r21
-	ldw		12(%r29), %r22
-	stw		%r19, 0(%r28)
-	stw		%r20, 4(%r28)
-	stw		%r21, 8(%r28)
-	stw		%r22, 12(%r28)
-	ldw		16(%r29), %r19
-	ldw		20(%r29), %r20
-	ldw		24(%r29), %r21
-	ldw		28(%r29), %r22
-	stw		%r19, 16(%r28)
-	stw		%r20, 20(%r28)
-	stw		%r21, 24(%r28)
-	stw		%r22, 28(%r28)
-	ldw		32(%r29), %r19
-	ldw		36(%r29), %r20
-	ldw		40(%r29), %r21
-	ldw		44(%r29), %r22
-	stw		%r19, 32(%r28)
-	stw		%r20, 36(%r28)
-	stw		%r21, 40(%r28)
-	stw		%r22, 44(%r28)
-	ldw		48(%r29), %r19
-	ldw		52(%r29), %r20
-	ldw		56(%r29), %r21
-	ldw		60(%r29), %r22
-	stw		%r19, 48(%r28)
-	stw		%r20, 52(%r28)
-	stw		%r21, 56(%r28)
-	stw		%r22, 60(%r28)
-	ldo		64(%r28), %r28
-
-	addib,COND(>)		-1, %r1,1b
-	ldo		64(%r29), %r29
-#endif
-
-	bv		%r0(%r2)
-	nop
-ENDPROC_CFI(copy_user_page_asm)
-
-ENTRY_CFI(clear_user_page_asm)
-	tophys_r1	%r26
-
-	ldil		L%(TMPALIAS_MAP_START), %r28
-#ifdef CONFIG_64BIT
-#if (TMPALIAS_MAP_START >= 0x80000000)
-	depdi		0, 31,32, %r28		/* clear any sign extension */
-#endif
-	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
-	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
-	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
-#else
-	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
-	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
-	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
-#endif
-
-	/* Purge any old translation */
-
-#ifdef CONFIG_PA20
-	pdtlb,l		%r0(%r28)
-#else
-0:	pdtlb		%r0(%r28)
-	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
-#endif
-
-#ifdef CONFIG_64BIT
-	ldi		(PAGE_SIZE / 128), %r1
-
-	/* PREFETCH (Write) has not (yet) been proven to help here */
-	/* #define	PREFETCHW_OP	ldd		256(%0), %r0 */
-
-1:	std		%r0, 0(%r28)
-	std		%r0, 8(%r28)
-	std		%r0, 16(%r28)
-	std		%r0, 24(%r28)
-	std		%r0, 32(%r28)
-	std		%r0, 40(%r28)
-	std		%r0, 48(%r28)
-	std		%r0, 56(%r28)
-	std		%r0, 64(%r28)
-	std		%r0, 72(%r28)
-	std		%r0, 80(%r28)
-	std		%r0, 88(%r28)
-	std		%r0, 96(%r28)
-	std		%r0, 104(%r28)
-	std		%r0, 112(%r28)
-	std		%r0, 120(%r28)
-	addib,COND(>)		-1, %r1, 1b
-	ldo		128(%r28), %r28
-
-#else	/* ! CONFIG_64BIT */
-	ldi		(PAGE_SIZE / 64), %r1
-
-1:	stw		%r0, 0(%r28)
-	stw		%r0, 4(%r28)
-	stw		%r0, 8(%r28)
-	stw		%r0, 12(%r28)
-	stw		%r0, 16(%r28)
-	stw		%r0, 20(%r28)
-	stw		%r0, 24(%r28)
-	stw		%r0, 28(%r28)
-	stw		%r0, 32(%r28)
-	stw		%r0, 36(%r28)
-	stw		%r0, 40(%r28)
-	stw		%r0, 44(%r28)
-	stw		%r0, 48(%r28)
-	stw		%r0, 52(%r28)
-	stw		%r0, 56(%r28)
-	stw		%r0, 60(%r28)
-	addib,COND(>)		-1, %r1, 1b
-	ldo		64(%r28), %r28
-#endif	/* CONFIG_64BIT */
-
-	bv		%r0(%r2)
-	nop
-ENDPROC_CFI(clear_user_page_asm)
-
-ENTRY_CFI(flush_dcache_page_asm)
-	ldil		L%(TMPALIAS_MAP_START), %r28
-#ifdef CONFIG_64BIT
-#if (TMPALIAS_MAP_START >= 0x80000000)
-	depdi		0, 31,32, %r28		/* clear any sign extension */
-#endif
-	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
-	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
-	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
-#else
-	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
-	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
-	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
-#endif
-
-	/* Purge any old translation */
-
-#ifdef CONFIG_PA20
-	pdtlb,l		%r0(%r28)
-#else
-0:	pdtlb		%r0(%r28)
-	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
-#endif
-
-88:	ldil		L%dcache_stride, %r1
-	ldw		R%dcache_stride(%r1), r31
-
-#ifdef CONFIG_64BIT
-	depdi,z		1, 63-PAGE_SHIFT,1, %r25
-#else
-	depwi,z		1, 31-PAGE_SHIFT,1, %r25
-#endif
-	add		%r28, %r25, %r25
-	sub		%r25, r31, %r25
-
-1:	fdc,m		r31(%r28)
-	fdc,m		r31(%r28)
-	fdc,m		r31(%r28)
-	fdc,m		r31(%r28)
-	fdc,m		r31(%r28)
-	fdc,m		r31(%r28)
-	fdc,m		r31(%r28)
-	fdc,m		r31(%r28)
-	fdc,m		r31(%r28)
-	fdc,m		r31(%r28)
-	fdc,m		r31(%r28)
-	fdc,m		r31(%r28)
-	fdc,m		r31(%r28)
-	fdc,m		r31(%r28)
-	fdc,m		r31(%r28)
-	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
-	fdc,m		r31(%r28)
-
-89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
-	sync
-	bv		%r0(%r2)
-	nop
-ENDPROC_CFI(flush_dcache_page_asm)
-
-ENTRY_CFI(purge_dcache_page_asm)
-	ldil		L%(TMPALIAS_MAP_START), %r28
-#ifdef CONFIG_64BIT
-#if (TMPALIAS_MAP_START >= 0x80000000)
-	depdi		0, 31,32, %r28		/* clear any sign extension */
-#endif
-	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
-	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
-	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
-#else
-	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
-	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
-	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
-#endif
-
-	/* Purge any old translation */
-
-#ifdef CONFIG_PA20
-	pdtlb,l		%r0(%r28)
-#else
-0:	pdtlb		%r0(%r28)
-	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
-#endif
-
-88:	ldil		L%dcache_stride, %r1
-	ldw		R%dcache_stride(%r1), r31
-
-#ifdef CONFIG_64BIT
-	depdi,z		1, 63-PAGE_SHIFT,1, %r25
-#else
-	depwi,z		1, 31-PAGE_SHIFT,1, %r25
-#endif
-	add		%r28, %r25, %r25
-	sub		%r25, r31, %r25
-
-1:      pdc,m		r31(%r28)
-	pdc,m		r31(%r28)
-	pdc,m		r31(%r28)
-	pdc,m		r31(%r28)
-	pdc,m		r31(%r28)
-	pdc,m		r31(%r28)
-	pdc,m		r31(%r28)
-	pdc,m		r31(%r28)
-	pdc,m		r31(%r28)
-	pdc,m		r31(%r28)
-	pdc,m		r31(%r28)
-	pdc,m		r31(%r28)
-	pdc,m		r31(%r28)
-	pdc,m		r31(%r28)
-	pdc,m		r31(%r28)
-	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
-	pdc,m		r31(%r28)
-
-89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
-	sync
-	bv		%r0(%r2)
-	nop
-ENDPROC_CFI(purge_dcache_page_asm)
-
-ENTRY_CFI(flush_icache_page_asm)
-	ldil		L%(TMPALIAS_MAP_START), %r28
-#ifdef CONFIG_64BIT
-#if (TMPALIAS_MAP_START >= 0x80000000)
-	depdi		0, 31,32, %r28		/* clear any sign extension */
-#endif
-	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
-	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
-	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
-#else
-	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
-	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
-	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
-#endif
-
-	/* Purge any old translation.  Note that the FIC instruction
-	 * may use either the instruction or data TLB.  Given that we
-	 * have a flat address space, it's not clear which TLB will be
-	 * used.  So, we purge both entries.  */
-
-#ifdef CONFIG_PA20
-	pdtlb,l		%r0(%r28)
-1:	pitlb,l         %r0(%sr4,%r28)
-	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
-#else
-0:	pdtlb		%r0(%r28)
-1:	pitlb           %r0(%sr4,%r28)
-	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
-	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
-	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
-#endif
-
-88:	ldil		L%icache_stride, %r1
-	ldw		R%icache_stride(%r1), %r31
-
-#ifdef CONFIG_64BIT
-	depdi,z		1, 63-PAGE_SHIFT,1, %r25
-#else
-	depwi,z		1, 31-PAGE_SHIFT,1, %r25
-#endif
-	add		%r28, %r25, %r25
-	sub		%r25, %r31, %r25
-
-	/* fic only has the type 26 form on PA1.1, requiring an
-	 * explicit space specification, so use %sr4 */
-1:      fic,m		%r31(%sr4,%r28)
-	fic,m		%r31(%sr4,%r28)
-	fic,m		%r31(%sr4,%r28)
-	fic,m		%r31(%sr4,%r28)
-	fic,m		%r31(%sr4,%r28)
-	fic,m		%r31(%sr4,%r28)
-	fic,m		%r31(%sr4,%r28)
-	fic,m		%r31(%sr4,%r28)
-	fic,m		%r31(%sr4,%r28)
-	fic,m		%r31(%sr4,%r28)
-	fic,m		%r31(%sr4,%r28)
-	fic,m		%r31(%sr4,%r28)
-	fic,m		%r31(%sr4,%r28)
-	fic,m		%r31(%sr4,%r28)
-	fic,m		%r31(%sr4,%r28)
-	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
-	fic,m		%r31(%sr4,%r28)
-
-89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
-	sync
-	bv		%r0(%r2)
-	nop
-ENDPROC_CFI(flush_icache_page_asm)
-
-ENTRY_CFI(flush_kernel_dcache_page_asm)
-88:	ldil		L%dcache_stride, %r1
-	ldw		R%dcache_stride(%r1), %r23
-
-#ifdef CONFIG_64BIT
-	depdi,z		1, 63-PAGE_SHIFT,1, %r25
-#else
-	depwi,z		1, 31-PAGE_SHIFT,1, %r25
-#endif
-	add		%r26, %r25, %r25
-	sub		%r25, %r23, %r25
-
-1:      fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
-	fdc,m		%r23(%r26)
-
-89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
-	sync
-	bv		%r0(%r2)
-	nop
-ENDPROC_CFI(flush_kernel_dcache_page_asm)
-
-ENTRY_CFI(purge_kernel_dcache_page_asm)
-88:	ldil		L%dcache_stride, %r1
-	ldw		R%dcache_stride(%r1), %r23
-
-#ifdef CONFIG_64BIT
-	depdi,z		1, 63-PAGE_SHIFT,1, %r25
-#else
-	depwi,z		1, 31-PAGE_SHIFT,1, %r25
-#endif
-	add		%r26, %r25, %r25
-	sub		%r25, %r23, %r25
-
-1:      pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
-	pdc,m		%r23(%r26)
-
-89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
-	sync
-	bv		%r0(%r2)
-	nop
-ENDPROC_CFI(purge_kernel_dcache_page_asm)
-
-ENTRY_CFI(flush_user_dcache_range_asm)
-88:	ldil		L%dcache_stride, %r1
-	ldw		R%dcache_stride(%r1), %r23
-	ldo		-1(%r23), %r21
-	ANDCM		%r26, %r21, %r26
-
-#ifdef CONFIG_64BIT
-	depd,z		%r23, 59, 60, %r21
-#else
-	depw,z		%r23, 27, 28, %r21
-#endif
-	add		%r26, %r21, %r22
-	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
-1:	add		%r22, %r21, %r22
-	fdc,m		%r23(%sr3, %r26)
-	fdc,m		%r23(%sr3, %r26)
-	fdc,m		%r23(%sr3, %r26)
-	fdc,m		%r23(%sr3, %r26)
-	fdc,m		%r23(%sr3, %r26)
-	fdc,m		%r23(%sr3, %r26)
-	fdc,m		%r23(%sr3, %r26)
-	fdc,m		%r23(%sr3, %r26)
-	fdc,m		%r23(%sr3, %r26)
-	fdc,m		%r23(%sr3, %r26)
-	fdc,m		%r23(%sr3, %r26)
-	fdc,m		%r23(%sr3, %r26)
-	fdc,m		%r23(%sr3, %r26)
-	fdc,m		%r23(%sr3, %r26)
-	fdc,m		%r23(%sr3, %r26)
-	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
-	fdc,m		%r23(%sr3, %r26)
-
-2:	cmpb,COND(>>),n	%r25, %r26, 2b
-	fdc,m		%r23(%sr3, %r26)
-
-89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
-	sync
-	bv		%r0(%r2)
-	nop
-ENDPROC_CFI(flush_user_dcache_range_asm)
-
-ENTRY_CFI(flush_kernel_dcache_range_asm)
-88:	ldil		L%dcache_stride, %r1
-	ldw		R%dcache_stride(%r1), %r23
-	ldo		-1(%r23), %r21
-	ANDCM		%r26, %r21, %r26
-
-#ifdef CONFIG_64BIT
-	depd,z		%r23, 59, 60, %r21
-#else
-	depw,z		%r23, 27, 28, %r21
-#endif
-	add		%r26, %r21, %r22
-	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
-1:	add		%r22, %r21, %r22
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	fdc,m		%r23(%r26)
-	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
-	fdc,m		%r23(%r26)
-
-2:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
-	fdc,m		%r23(%r26)
-
-	sync
-89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
-	syncdma
-	bv		%r0(%r2)
-	nop
-ENDPROC_CFI(flush_kernel_dcache_range_asm)
-
-ENTRY_CFI(purge_kernel_dcache_range_asm)
-88:	ldil		L%dcache_stride, %r1
-	ldw		R%dcache_stride(%r1), %r23
-	ldo		-1(%r23), %r21
-	ANDCM		%r26, %r21, %r26
-
-#ifdef CONFIG_64BIT
-	depd,z		%r23, 59, 60, %r21
-#else
-	depw,z		%r23, 27, 28, %r21
-#endif
-	add		%r26, %r21, %r22
-	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
-1:	add		%r22, %r21, %r22
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	pdc,m		%r23(%r26)
-	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
-	pdc,m		%r23(%r26)
-
-2:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
-	pdc,m		%r23(%r26)
-
-	sync
-89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
-	syncdma
-	bv		%r0(%r2)
-	nop
-ENDPROC_CFI(purge_kernel_dcache_range_asm)
-
-ENTRY_CFI(flush_user_icache_range_asm)
-88:	ldil		L%icache_stride, %r1
-	ldw		R%icache_stride(%r1), %r23
-	ldo		-1(%r23), %r21
-	ANDCM		%r26, %r21, %r26
-
-#ifdef CONFIG_64BIT
-	depd,z		%r23, 59, 60, %r21
-#else
-	depw,z		%r23, 27, 28, %r21
-#endif
-	add		%r26, %r21, %r22
-	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
-1:	add		%r22, %r21, %r22
-	fic,m		%r23(%sr3, %r26)
-	fic,m		%r23(%sr3, %r26)
-	fic,m		%r23(%sr3, %r26)
-	fic,m		%r23(%sr3, %r26)
-	fic,m		%r23(%sr3, %r26)
-	fic,m		%r23(%sr3, %r26)
-	fic,m		%r23(%sr3, %r26)
-	fic,m		%r23(%sr3, %r26)
-	fic,m		%r23(%sr3, %r26)
-	fic,m		%r23(%sr3, %r26)
-	fic,m		%r23(%sr3, %r26)
-	fic,m		%r23(%sr3, %r26)
-	fic,m		%r23(%sr3, %r26)
-	fic,m		%r23(%sr3, %r26)
-	fic,m		%r23(%sr3, %r26)
-	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
-	fic,m		%r23(%sr3, %r26)
-
-2:	cmpb,COND(>>),n	%r25, %r26, 2b
-	fic,m		%r23(%sr3, %r26)
-
-89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
-	sync
-	bv		%r0(%r2)
-	nop
-ENDPROC_CFI(flush_user_icache_range_asm)
-
-ENTRY_CFI(flush_kernel_icache_page)
-88:	ldil		L%icache_stride, %r1
-	ldw		R%icache_stride(%r1), %r23
-
-#ifdef CONFIG_64BIT
-	depdi,z		1, 63-PAGE_SHIFT,1, %r25
-#else
-	depwi,z		1, 31-PAGE_SHIFT,1, %r25
-#endif
-	add		%r26, %r25, %r25
-	sub		%r25, %r23, %r25
-
-
-1:      fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
-	fic,m		%r23(%sr4, %r26)
-
-89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
-	sync
-	bv		%r0(%r2)
-	nop
-ENDPROC_CFI(flush_kernel_icache_page)
-
-ENTRY_CFI(flush_kernel_icache_range_asm)
-88:	ldil		L%icache_stride, %r1
-	ldw		R%icache_stride(%r1), %r23
-	ldo		-1(%r23), %r21
-	ANDCM		%r26, %r21, %r26
-
-#ifdef CONFIG_64BIT
-	depd,z		%r23, 59, 60, %r21
-#else
-	depw,z		%r23, 27, 28, %r21
-#endif
-	add		%r26, %r21, %r22
-	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
-1:	add		%r22, %r21, %r22
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	fic,m		%r23(%sr4, %r26)
-	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
-	fic,m		%r23(%sr4, %r26)
-
-2:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
-	fic,m		%r23(%sr4, %r26)
-
-89:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
-	sync
-	bv		%r0(%r2)
-	nop
-ENDPROC_CFI(flush_kernel_icache_range_asm)
-
-	__INIT
-
-	/* align should cover use of rfi in disable_sr_hashing_asm and
-	 * srdis_done.
-	 */
-	.align	256
-ENTRY_CFI(disable_sr_hashing_asm)
-	/*
-	 * Switch to real mode
-	 */
-	/* pcxt_ssm_bug */
-	rsm		PSW_SM_I, %r0
-	load32		PA(1f), %r1
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
-	mtctl		%r0, %cr17		/* Clear IIASQ tail */
-	mtctl		%r0, %cr17		/* Clear IIASQ head */
-	mtctl		%r1, %cr18		/* IIAOQ head */
-	ldo		4(%r1), %r1
-	mtctl		%r1, %cr18		/* IIAOQ tail */
-	load32		REAL_MODE_PSW, %r1
-	mtctl		%r1, %ipsw
-	rfi
-	nop
-
-1:      cmpib,=,n	SRHASH_PCXST, %r26,srdis_pcxs
-	cmpib,=,n	SRHASH_PCXL, %r26,srdis_pcxl
-	cmpib,=,n	SRHASH_PA20, %r26,srdis_pa20
-	b,n		srdis_done
-
-srdis_pcxs:
-
-	/* Disable Space Register Hashing for PCXS,PCXT,PCXT' */
-
-	.word		0x141c1a00		/* mfdiag %dr0, %r28 */
-	.word		0x141c1a00		/* must issue twice */
-	depwi		0,18,1, %r28		/* Clear DHE (dcache hash enable) */
-	depwi		0,20,1, %r28		/* Clear IHE (icache hash enable) */
-	.word		0x141c1600		/* mtdiag %r28, %dr0 */
-	.word		0x141c1600		/* must issue twice */
-	b,n		srdis_done
-
-srdis_pcxl:
-
-	/* Disable Space Register Hashing for PCXL */
-
-	.word		0x141c0600		/* mfdiag %dr0, %r28 */
-	depwi           0,28,2, %r28		/* Clear DHASH_EN & IHASH_EN */
-	.word		0x141c0240		/* mtdiag %r28, %dr0 */
-	b,n		srdis_done
-
-srdis_pa20:
-
-	/* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+,PCXW2 */
-
-	.word		0x144008bc		/* mfdiag %dr2, %r28 */
-	depdi		0, 54,1, %r28		/* clear DIAG_SPHASH_ENAB (bit 54) */
-	.word		0x145c1840		/* mtdiag %r28, %dr2 */
-
-
-srdis_done:
-	/* Switch back to virtual mode */
-	rsm		PSW_SM_I, %r0		/* prep to load iia queue */
-	load32 	   	2f, %r1
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
-	mtctl		%r0, %cr17		/* Clear IIASQ tail */
-	mtctl		%r0, %cr17		/* Clear IIASQ head */
-	mtctl		%r1, %cr18		/* IIAOQ head */
-	ldo		4(%r1), %r1
-	mtctl		%r1, %cr18		/* IIAOQ tail */
-	load32		KERNEL_PSW, %r1
-	mtctl		%r1, %ipsw
-	rfi
-	nop
-
-2:      bv		%r0(%r2)
-	nop
-ENDPROC_CFI(disable_sr_hashing_asm)
-
-	.end
diff --git a/arch/parisc/kernel/perf_asm.S b/arch/parisc/kernel/perf_asm.S
deleted file mode 100644
index 8fceabb1a832c5453bd530067d10b2ce239cf375..0000000000000000000000000000000000000000
--- a/arch/parisc/kernel/perf_asm.S
+++ /dev/null
@@ -1,1679 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-
-/*    low-level asm for "intrigue" (PA8500-8700 CPU perf counters)
- * 
- *    Copyright (C) 2001 Randolph Chung <tausq at parisc-linux.org>
- *    Copyright (C) 2001 Hewlett-Packard (Grant Grundler)
- */
-
-#include <asm/assembly.h>
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-
-#ifdef CONFIG_64BIT
-	.level		2.0w
-#endif /* CONFIG_64BIT */
-
-#define MTDIAG_1(gr)    .word 0x14201840 + gr*0x10000
-#define MTDIAG_2(gr)    .word 0x14401840 + gr*0x10000
-#define MFDIAG_1(gr)    .word 0x142008A0 + gr
-#define MFDIAG_2(gr)    .word 0x144008A0 + gr
-#define STDIAG(dr)      .word 0x14000AA0 + dr*0x200000
-#define SFDIAG(dr)      .word 0x14000BA0 + dr*0x200000
-#define DR2_SLOW_RET    53
-
-
-;
-; Enable the performance counters
-;
-; The coprocessor only needs to be enabled when
-; starting/stopping the coprocessor with the pmenb/pmdis.
-;
-	.text
-
-ENTRY(perf_intrigue_enable_perf_counters)
-	.proc
-	.callinfo  frame=0,NO_CALLS
-	.entry
-
-	ldi     0x20,%r25                ; load up perfmon bit
-	mfctl   ccr,%r26                 ; get coprocessor register
-	or      %r25,%r26,%r26             ; set bit
-	mtctl   %r26,ccr                 ; turn on performance coprocessor
-	pmenb                           ; enable performance monitor
-	ssm     0,0                     ; dummy op to ensure completion
-	sync                            ; follow ERS
-	andcm   %r26,%r25,%r26             ; clear bit now 
-	mtctl   %r26,ccr                 ; turn off performance coprocessor
-	nop                             ; NOPs as specified in ERS
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	bve    (%r2)
-	nop
-	.exit
-	.procend
-ENDPROC(perf_intrigue_enable_perf_counters)
-
-ENTRY(perf_intrigue_disable_perf_counters)
-	.proc
-	.callinfo  frame=0,NO_CALLS
-	.entry
-	ldi     0x20,%r25                ; load up perfmon bit
-	mfctl   ccr,%r26                 ; get coprocessor register
-	or      %r25,%r26,%r26             ; set bit
-	mtctl   %r26,ccr                 ; turn on performance coprocessor
-	pmdis                           ; disable performance monitor
-	ssm     0,0                     ; dummy op to ensure completion
-	andcm   %r26,%r25,%r26             ; clear bit now 
-	bve    (%r2)
-	mtctl   %r26,ccr                 ; turn off performance coprocessor
-	.exit
-	.procend
-ENDPROC(perf_intrigue_disable_perf_counters)
-
-;***********************************************************************
-;*
-;* Name: perf_rdr_shift_in_W
-;*
-;* Description:
-;*	This routine shifts data in from the RDR in arg0 and returns
-;*	the result in ret0.  If the RDR is <= 64 bits in length, it
-;*	is shifted shifted backup immediately.  This is to compensate
-;*	for RDR10 which has bits that preclude PDC stack operations
-;*	when they are in the wrong state.
-;*
-;* Arguments:
-;*	arg0 : rdr to be read
-;*	arg1 : bit length of rdr
-;*
-;* Returns:
-;*	ret0 = next 64 bits of rdr data from staging register
-;*
-;* Register usage:
-;*	arg0 : rdr to be read
-;*	arg1 : bit length of rdr
-;*	%r24  - original DR2 value
-;*	%r1   - scratch
-;*  %r29  - scratch
-;*
-;* Returns:
-;*	ret0 = RDR data (right justified)
-;*
-;***********************************************************************
-
-ENTRY(perf_rdr_shift_in_W)
-	.proc
-	.callinfo frame=0,NO_CALLS
-	.entry
-;
-; read(shift in) the RDR.
-;
-
-; NOTE: The PCX-W ERS states that DR2_SLOW_RET must be set before any
-; shifting is done, from or to, remote diagnose registers.
-;
-
-	depdi,z		1,DR2_SLOW_RET,1,%r29
-	MFDIAG_2	(24)
-	or		    %r24,%r29,%r29
-	MTDIAG_2	(29)			; set DR2_SLOW_RET
-
-	nop
-	nop
-	nop
-	nop
-
-;
-; Cacheline start (32-byte cacheline)
-;
-	nop
-	nop
-	nop
-	extrd,u		arg1,63,6,%r1	; setup shift amount by bits to move 
-
-	mtsar		%r1
-	shladd		arg0,2,%r0,%r1	; %r1 = 4 * RDR number
-	blr  		%r1,%r0		; branch to 8-instruction sequence
-	nop
-
-;
-; Cacheline start (32-byte cacheline)
-;
-
-	;
-	; RDR 0 sequence
-	;
-	SFDIAG		(0)
-	ssm		    0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)			; mtdiag %dr1, %r1 
-	STDIAG		(0)
-	ssm		    0,0
-	b,n         perf_rdr_shift_in_W_leave
-
-	;
-	; RDR 1 sequence
-	;
-	sync
-	ssm		    0,0
-	SFDIAG		(1)
-	ssm		    0,0
-	MFDIAG_1	(28)
-	ssm		    0,0
-	b,n         perf_rdr_shift_in_W_leave
-	nop
-
-	;
-	; RDR 2 read sequence
-	;
-	SFDIAG		(2)
-	ssm		    0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(2)
-	ssm		    0,0
-	b,n         perf_rdr_shift_in_W_leave
-
-	;
-	; RDR 3 read sequence
-	;
-	b,n         perf_rdr_shift_in_W_leave
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	;
-	; RDR 4 read sequence
-	;
-	sync
-	ssm		0,0
-	SFDIAG		(4)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_W_leave
-	ssm		0,0
-	nop
-
-	; 
-	; RDR 5 read sequence
-	;
-	sync
-	ssm		0,0
-	SFDIAG		(5)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_W_leave
-	ssm		0,0
-	nop
-
-	;
-	; RDR 6 read sequence
-	;
-	sync
-	ssm		0,0
-	SFDIAG		(6)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_W_leave
-	ssm		0,0
-	nop
-
-	;
-	; RDR 7 read sequence
-	;
-	b,n         perf_rdr_shift_in_W_leave
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	;
-	; RDR 8 read sequence
-	;
-	b,n         perf_rdr_shift_in_W_leave
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	;
-	; RDR 9 read sequence
-	;
-	b,n         perf_rdr_shift_in_W_leave
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	;
-	; RDR 10 read sequence
-	;
-	SFDIAG		(10)
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(10)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_W_leave
-
-	;
-	; RDR 11 read sequence
-	;
-	SFDIAG		(11)
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(11)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_W_leave
-
-	;
-	; RDR 12 read sequence
-	;
-	b,n         perf_rdr_shift_in_W_leave
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	;
-	; RDR 13 read sequence
-	;
-	sync
-	ssm		0,0
-	SFDIAG		(13)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_W_leave
-	ssm		0,0
-	nop
-
-	;
-	; RDR 14 read sequence
-	;
-	SFDIAG		(14)
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(14)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_W_leave
-
-	;
-	; RDR 15 read sequence
-	;
-	sync
-	ssm		0,0
-	SFDIAG		(15)
-	ssm		0,0
-	MFDIAG_1	(28)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_W_leave
-	nop
-
-	;
-	; RDR 16 read sequence
-	;
-	sync
-	ssm		0,0
-	SFDIAG		(16)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_W_leave
-	ssm		0,0
-	nop
-
-	;
-	; RDR 17 read sequence
-	;
-	SFDIAG		(17)
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(17)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_W_leave
-
-	;
-	; RDR 18 read sequence
-	;
-	SFDIAG		(18)
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(18)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_W_leave
-
-	;
-	; RDR 19 read sequence
-	;
-	b,n         perf_rdr_shift_in_W_leave
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	;
-	; RDR 20 read sequence
-	;
-	sync
-	ssm		0,0
-	SFDIAG		(20)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_W_leave
-	ssm		0,0
-	nop
-
-	;
-	; RDR 21 read sequence
-	;
-	sync
-	ssm		0,0
-	SFDIAG		(21)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_W_leave
-	ssm		0,0
-	nop
-
-	;
-	; RDR 22 read sequence
-	;
-	sync
-	ssm		0,0
-	SFDIAG		(22)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_W_leave
-	ssm		0,0
-	nop
-
-	;
-	; RDR 23 read sequence
-	;
-	sync
-	ssm		0,0
-	SFDIAG		(23)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_W_leave
-	ssm		0,0
-	nop
-
-	;
-	; RDR 24 read sequence
-	;
-	sync
-	ssm		0,0
-	SFDIAG		(24)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_W_leave
-	ssm		0,0
-	nop
-
-	;
-	; RDR 25 read sequence
-	;
-	sync
-	ssm		0,0
-	SFDIAG		(25)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_W_leave
-	ssm		0,0
-	nop
-
-	;
-	; RDR 26 read sequence
-	;
-	SFDIAG		(26)
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(26)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_W_leave
-
-	;
-	; RDR 27 read sequence
-	;
-	SFDIAG		(27)
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(27)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_W_leave
-
-	;
-	; RDR 28 read sequence
-	;
-	sync
-	ssm		0,0
-	SFDIAG		(28)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_W_leave
-	ssm		0,0
-	nop
-
-	;
-	; RDR 29 read sequence
-	;
-	sync
-	ssm		0,0
-	SFDIAG		(29)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_W_leave
-	ssm		0,0
-	nop
-
-	;
-	; RDR 30 read sequence
-	;
-	SFDIAG		(30)
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(30)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_W_leave
-
-	;
-	; RDR 31 read sequence
-	;
-	sync
-	ssm		0,0
-	SFDIAG		(31)
-	ssm		0,0
-	MFDIAG_1	(28)
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; Fallthrough
-	;
-
-perf_rdr_shift_in_W_leave:
-	bve		    (%r2)
-	.exit
-	MTDIAG_2	(24)			; restore DR2
-	.procend
-ENDPROC(perf_rdr_shift_in_W)
-
-
-;***********************************************************************
-;*
-;* Name: perf_rdr_shift_out_W
-;*
-;* Description:
-;*	This routine moves data to the RDR's.  The double-word that
-;*	arg1 points to is loaded and moved into the staging register.
-;*	Then the STDIAG instruction for the RDR # in arg0 is called
-;*	to move the data to the RDR.
-;*
-;* Arguments:
-;*	arg0 = rdr number
-;*	arg1 = 64-bit value to write
-;*	%r24 - DR2 | DR2_SLOW_RET
-;*	%r23 - original DR2 value
-;*
-;* Returns:
-;*	None
-;*
-;* Register usage:
-;*
-;***********************************************************************
-
-ENTRY(perf_rdr_shift_out_W)
-	.proc
-	.callinfo frame=0,NO_CALLS
-	.entry
-;
-; NOTE: The PCX-W ERS states that DR2_SLOW_RET must be set before any
-; shifting is done, from or to, the remote diagnose registers.
-;
-
-	depdi,z		1,DR2_SLOW_RET,1,%r24
-	MFDIAG_2	(23)
-	or		%r24,%r23,%r24
-	MTDIAG_2	(24)		; set DR2_SLOW_RET
-	MTDIAG_1	(25)		; data to the staging register
-	shladd		arg0,2,%r0,%r1	; %r1 = 4 * RDR number
-	blr		    %r1,%r0	; branch to 8-instruction sequence
-	nop
-
-	;
-	; RDR 0 write sequence
-	;
-	sync				; RDR 0 write sequence
-	ssm		0,0
-	STDIAG		(0)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 1 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(1)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 2 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(2)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 3 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(3)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 4 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(4)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 5 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(5)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 6 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(6)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 7 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(7)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 8 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(8)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 9 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(9)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 10 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(10)
-	STDIAG		(26)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	ssm		0,0
-	nop
-
-	;
-	; RDR 11 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(11)
-	STDIAG		(27)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	ssm		0,0
-	nop
-
-	;
-	; RDR 12 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(12)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 13 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(13)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 14 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(14)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 15 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(15)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 16 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(16)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 17 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(17)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 18 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(18)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 19 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(19)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 20 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(20)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 21 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(21)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 22 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(22)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 23 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(23)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 24 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(24)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 25 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(25)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 26 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(10)
-	STDIAG		(26)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	ssm		0,0
-	nop
-
-	;
-	; RDR 27 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(11)
-	STDIAG		(27)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	ssm		0,0
-	nop
-
-	;
-	; RDR 28 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(28)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 29 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(29)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 30 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(30)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-	;
-	; RDR 31 write sequence
-	;
-	sync
-	ssm		0,0
-	STDIAG		(31)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_W_leave
-	nop
-	ssm		0,0
-	nop
-
-perf_rdr_shift_out_W_leave:
-	bve		(%r2)
-	.exit
-	MTDIAG_2	(23)			; restore DR2
-	.procend
-ENDPROC(perf_rdr_shift_out_W)
-
-
-;***********************************************************************
-;*
-;* Name: rdr_shift_in_U
-;*
-;* Description:
-;*	This routine shifts data in from the RDR in arg0 and returns
-;*	the result in ret0.  If the RDR is <= 64 bits in length, it
-;*	is shifted shifted backup immediately.  This is to compensate
-;*	for RDR10 which has bits that preclude PDC stack operations
-;*	when they are in the wrong state.
-;*
-;* Arguments:
-;*	arg0 : rdr to be read
-;*	arg1 : bit length of rdr
-;*
-;* Returns:
-;*	ret0 = next 64 bits of rdr data from staging register
-;*
-;* Register usage:
-;*	arg0 : rdr to be read						                        
-;*	arg1 : bit length of rdr					                        
-;*	%r24 - original DR2 value
-;*	%r23 - DR2 | DR2_SLOW_RET
-;*	%r1  - scratch
-;*
-;***********************************************************************
-
-ENTRY(perf_rdr_shift_in_U)
-	.proc
-	.callinfo frame=0,NO_CALLS
-	.entry
-
-; read(shift in) the RDR.
-;
-; NOTE: The PCX-U ERS states that DR2_SLOW_RET must be set before any
-; shifting is done, from or to, remote diagnose registers.
-
-	depdi,z		1,DR2_SLOW_RET,1,%r29
-	MFDIAG_2	(24)
-	or			%r24,%r29,%r29
-	MTDIAG_2	(29)			; set DR2_SLOW_RET
-
-	nop
-	nop
-	nop
-	nop
-
-;
-; Start of next 32-byte cacheline
-;
-	nop
-	nop
-	nop
-	extrd,u		arg1,63,6,%r1
-
-	mtsar		%r1
-	shladd		arg0,2,%r0,%r1	; %r1 = 4 * RDR number
-	blr 		%r1,%r0		; branch to 8-instruction sequence
-	nop
-
-;
-; Start of next 32-byte cacheline
-;
-	SFDIAG		(0)		; RDR 0 read sequence
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(0)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_U_leave
-
-	SFDIAG		(1)		; RDR 1 read sequence
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(1)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_U_leave
-
-	sync				; RDR 2 read sequence
-	ssm		0,0
-	SFDIAG		(4)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_U_leave
-	ssm		0,0
-	nop
-
-	sync				; RDR 3 read sequence
-	ssm		0,0
-	SFDIAG		(3)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_U_leave
-	ssm		0,0
-	nop
-
-	sync				; RDR 4 read sequence
-	ssm		0,0
-	SFDIAG		(4)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_U_leave
-	ssm		0,0
-	nop
-
-	sync				; RDR 5 read sequence
-	ssm		0,0
-	SFDIAG		(5)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_U_leave
-	ssm		0,0
-	nop
-
-	sync				; RDR 6 read sequence
-	ssm		0,0
-	SFDIAG		(6)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_U_leave
-	ssm		0,0
-	nop
-
-	sync				; RDR 7 read sequence
-	ssm		0,0
-	SFDIAG		(7)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_U_leave
-	ssm		0,0
-	nop
-
-	b,n         perf_rdr_shift_in_U_leave
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	SFDIAG		(9)		; RDR 9 read sequence
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(9)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_U_leave
-
-	SFDIAG		(10)		; RDR 10 read sequence
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(10)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_U_leave
-
-	SFDIAG		(11)		; RDR 11 read sequence
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(11)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_U_leave
-
-	SFDIAG		(12)		; RDR 12 read sequence
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(12)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_U_leave
-
-	SFDIAG		(13)		; RDR 13 read sequence
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(13)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_U_leave
-
-	SFDIAG		(14)		; RDR 14 read sequence
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(14)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_U_leave
-
-	SFDIAG		(15)		; RDR 15 read sequence
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(15)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_U_leave
-
-	sync				; RDR 16 read sequence
-	ssm		0,0
-	SFDIAG		(16)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_U_leave
-	ssm		0,0
-	nop
-
-	SFDIAG		(17)		; RDR 17 read sequence
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(17)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_U_leave
-
-	SFDIAG		(18)		; RDR 18 read sequence
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(18)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_U_leave
-
-	b,n         perf_rdr_shift_in_U_leave
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	sync				; RDR 20 read sequence
-	ssm		0,0
-	SFDIAG		(20)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_U_leave
-	ssm		0,0
-	nop
-
-	sync				; RDR 21 read sequence
-	ssm		0,0
-	SFDIAG		(21)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_U_leave
-	ssm		0,0
-	nop
-
-	sync				; RDR 22 read sequence
-	ssm		0,0
-	SFDIAG		(22)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_U_leave
-	ssm		0,0
-	nop
-
-	sync				; RDR 23 read sequence
-	ssm		0,0
-	SFDIAG		(23)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_U_leave
-	ssm		0,0
-	nop
-
-	sync				; RDR 24 read sequence
-	ssm		0,0
-	SFDIAG		(24)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_U_leave
-	ssm		0,0
-	nop
-
-	sync				; RDR 25 read sequence
-	ssm		0,0
-	SFDIAG		(25)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_U_leave
-	ssm		0,0
-	nop
-
-	SFDIAG		(26)		; RDR 26 read sequence
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(26)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_U_leave
-
-	SFDIAG		(27)		; RDR 27 read sequence
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(27)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_U_leave
-
-	sync				; RDR 28 read sequence
-	ssm		0,0
-	SFDIAG		(28)
-	ssm		0,0
-	MFDIAG_1	(28)
-	b,n         perf_rdr_shift_in_U_leave
-	ssm		0,0
-	nop
-
-	b,n         perf_rdr_shift_in_U_leave
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	SFDIAG		(30)		; RDR 30 read sequence
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(30)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_U_leave
-
-	SFDIAG		(31)		; RDR 31 read sequence
-	ssm		0,0
-	MFDIAG_1	(28)
-	shrpd		ret0,%r0,%sar,%r1
-	MTDIAG_1	(1)
-	STDIAG		(31)
-	ssm		0,0
-	b,n         perf_rdr_shift_in_U_leave
-	nop
-
-perf_rdr_shift_in_U_leave:
-	bve		    (%r2)
-	.exit
-	MTDIAG_2	(24)			; restore DR2
-	.procend
-ENDPROC(perf_rdr_shift_in_U)
-
-;***********************************************************************
-;*
-;* Name: rdr_shift_out_U
-;*
-;* Description:
-;*	This routine moves data to the RDR's.  The double-word that
-;*	arg1 points to is loaded and moved into the staging register.
-;*	Then the STDIAG instruction for the RDR # in arg0 is called
-;*	to move the data to the RDR.
-;*
-;* Arguments:
-;*	arg0 = rdr target
-;*	arg1 = buffer pointer
-;*
-;* Returns:
-;*	None
-;*
-;* Register usage:
-;*	arg0 = rdr target
-;*	arg1 = buffer pointer
-;*	%r24 - DR2 | DR2_SLOW_RET
-;*	%r23 - original DR2 value
-;*
-;***********************************************************************
-
-ENTRY(perf_rdr_shift_out_U)
-	.proc
-	.callinfo frame=0,NO_CALLS
-	.entry
-
-;
-; NOTE: The PCX-U ERS states that DR2_SLOW_RET must be set before any
-; shifting is done, from or to, the remote diagnose registers.
-;
-
-	depdi,z		1,DR2_SLOW_RET,1,%r24
-	MFDIAG_2	(23)
-	or		%r24,%r23,%r24
-	MTDIAG_2	(24)		; set DR2_SLOW_RET
-
-	MTDIAG_1	(25)		; data to the staging register
-	shladd		arg0,2,%r0,%r1	; %r1 = 4 * RDR number
-	blr		%r1,%r0		; branch to 8-instruction sequence
-	nop
-
-;
-; 32-byte cachline aligned
-;
-
-	sync				; RDR 0 write sequence
-	ssm		0,0
-	STDIAG		(0)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 1 write sequence
-	ssm		0,0
-	STDIAG		(1)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 2 write sequence
-	ssm		0,0
-	STDIAG		(2)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 3 write sequence
-	ssm		0,0
-	STDIAG		(3)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 4 write sequence
-	ssm		0,0
-	STDIAG		(4)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 5 write sequence
-	ssm		0,0
-	STDIAG		(5)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 6 write sequence
-	ssm		0,0
-	STDIAG		(6)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 7 write sequence
-	ssm		0,0
-	STDIAG		(7)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 8 write sequence
-	ssm		0,0
-	STDIAG		(8)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 9 write sequence
-	ssm		0,0
-	STDIAG		(9)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 10 write sequence
-	ssm		0,0
-	STDIAG		(10)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 11 write sequence
-	ssm		0,0
-	STDIAG		(11)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 12 write sequence
-	ssm		0,0
-	STDIAG		(12)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 13 write sequence
-	ssm		0,0
-	STDIAG		(13)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 14 write sequence
-	ssm		0,0
-	STDIAG		(14)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 15 write sequence
-	ssm		0,0
-	STDIAG		(15)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 16 write sequence
-	ssm		0,0
-	STDIAG		(16)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 17 write sequence
-	ssm		0,0
-	STDIAG		(17)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 18 write sequence
-	ssm		0,0
-	STDIAG		(18)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 19 write sequence
-	ssm		0,0
-	STDIAG		(19)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 20 write sequence
-	ssm		0,0
-	STDIAG		(20)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 21 write sequence
-	ssm		0,0
-	STDIAG		(21)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 22 write sequence
-	ssm		0,0
-	STDIAG		(22)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 23 write sequence
-	ssm		0,0
-	STDIAG		(23)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 24 write sequence
-	ssm		0,0
-	STDIAG		(24)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 25 write sequence
-	ssm		0,0
-	STDIAG		(25)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 26 write sequence
-	ssm		0,0
-	STDIAG		(26)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 27 write sequence
-	ssm		0,0
-	STDIAG		(27)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 28 write sequence
-	ssm		0,0
-	STDIAG		(28)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 29 write sequence
-	ssm		0,0
-	STDIAG		(29)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 30 write sequence
-	ssm		0,0
-	STDIAG		(30)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-	sync				; RDR 31 write sequence
-	ssm		0,0
-	STDIAG		(31)
-	ssm		0,0
-	b,n         perf_rdr_shift_out_U_leave
-	nop
-	ssm		0,0
-	nop
-
-perf_rdr_shift_out_U_leave:
-	bve		(%r2)
-	.exit
-	MTDIAG_2	(23)			; restore DR2
-	.procend
-ENDPROC(perf_rdr_shift_out_U)
-
diff --git a/arch/parisc/kernel/real2.S b/arch/parisc/kernel/real2.S
deleted file mode 100644
index 2b16d8d6598f1d6015795a4455a24a4c039d0108..0000000000000000000000000000000000000000
--- a/arch/parisc/kernel/real2.S
+++ /dev/null
@@ -1,306 +0,0 @@
-/*
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2000 Hewlett Packard (Paul Bame bame@puffin.external.hp.com)
- *
- */
-
-#include <asm/pdc.h>
-#include <asm/psw.h>
-#include <asm/assembly.h>
-#include <asm/asm-offsets.h>
-
-#include <linux/linkage.h>
-
-
-	.section	.bss
-
-	.export pdc_result
-	.export pdc_result2
-	.align 8
-pdc_result:
-	.block	ASM_PDC_RESULT_SIZE
-pdc_result2:
-	.block	ASM_PDC_RESULT_SIZE
-
-	.export real_stack
-	.export real32_stack
-	.export real64_stack
-	.align	64
-real_stack:
-real32_stack:
-real64_stack:
-	.block	8192
-
-#define N_SAVED_REGS 9
-
-save_cr_space:
-	.block	REG_SZ * N_SAVED_REGS
-save_cr_end:
-
-
-/************************ 32-bit real-mode calls ***********************/
-/* This can be called in both narrow and wide kernels */
-
-	.text
-
-	/* unsigned long real32_call_asm(unsigned int *sp,
-	 *		unsigned int *arg0p,
-	 *		unsigned int iodc_fn)
-	 *	sp is value of stack pointer to adopt before calling PDC (virt)
-	 *	arg0p points to where saved arg values may be found
-	 *	iodc_fn is the IODC function to call
-	 */
-
-ENTRY_CFI(real32_call_asm)
-	STREG	%rp, -RP_OFFSET(%sp)	/* save RP */
-#ifdef CONFIG_64BIT
-	callee_save
-	ldo	2*REG_SZ(%sp), %sp	/* room for a couple more saves */
-	STREG	%r27, -1*REG_SZ(%sp)
-	STREG	%r29, -2*REG_SZ(%sp)
-#endif
-	STREG	%sp, -REG_SZ(%arg0)	/* save SP on real-mode stack */
-	copy	%arg0, %sp		/* adopt the real-mode SP */
-
-	/* save iodc_fn */
-	copy	%arg2, %r31
-
-	/* load up the arg registers from the saved arg area */
-	/* 32-bit calling convention passes first 4 args in registers */
-	ldw	0(%arg1), %arg0		/* note overwriting arg0 */
-	ldw	-8(%arg1), %arg2
-	ldw	-12(%arg1), %arg3
-	ldw	-4(%arg1), %arg1	/* obviously must do this one last! */
-
-	tophys_r1  %sp
-
-	b,l	rfi_virt2real,%r2
-	nop
-
-	b,l	save_control_regs,%r2		/* modifies r1, r2, r28 */
-	nop
-
-#ifdef CONFIG_64BIT
-	rsm	PSW_SM_W, %r0		/* go narrow */
-#endif
-
-	load32	PA(ric_ret), %r2
-	bv	0(%r31)
-	nop
-ric_ret:
-#ifdef CONFIG_64BIT
-	ssm	PSW_SM_W, %r0		/* go wide */
-#endif
-	/* restore CRs before going virtual in case we page fault */
-	b,l	restore_control_regs, %r2	/* modifies r1, r2, r26 */
-	nop
-
-	b,l	rfi_real2virt,%r2
-	nop
-
-	tovirt_r1 %sp
-	LDREG	-REG_SZ(%sp), %sp	/* restore SP */
-#ifdef CONFIG_64BIT
-	LDREG	-1*REG_SZ(%sp), %r27
-	LDREG	-2*REG_SZ(%sp), %r29
-	ldo	-2*REG_SZ(%sp), %sp
-	callee_rest
-#endif
-	LDREG	-RP_OFFSET(%sp), %rp	/* restore RP */
-	bv	0(%rp)
-	nop
-ENDPROC_CFI(real32_call_asm)
-
-
-#  define PUSH_CR(r, where) mfctl r, %r1 ! STREG,ma %r1, REG_SZ(where)
-#  define POP_CR(r, where) LDREG,mb -REG_SZ(where), %r1 ! mtctl %r1, r
-
-	.text
-ENTRY_CFI(save_control_regs)
-	load32	PA(save_cr_space), %r28
-	PUSH_CR(%cr24, %r28)
-	PUSH_CR(%cr25, %r28)
-	PUSH_CR(%cr26, %r28)
-	PUSH_CR(%cr27, %r28)
-	PUSH_CR(%cr28, %r28)
-	PUSH_CR(%cr29, %r28)
-	PUSH_CR(%cr30, %r28)
-	PUSH_CR(%cr31, %r28)
-	PUSH_CR(%cr15, %r28)
-	bv 0(%r2)
-	nop
-ENDPROC_CFI(save_control_regs)
-
-ENTRY_CFI(restore_control_regs)
-	load32	PA(save_cr_end), %r26
-	POP_CR(%cr15, %r26)
-	POP_CR(%cr31, %r26)
-	POP_CR(%cr30, %r26)
-	POP_CR(%cr29, %r26)
-	POP_CR(%cr28, %r26)
-	POP_CR(%cr27, %r26)
-	POP_CR(%cr26, %r26)
-	POP_CR(%cr25, %r26)
-	POP_CR(%cr24, %r26)
-	bv 0(%r2)
-	nop
-ENDPROC_CFI(restore_control_regs)
-
-/* rfi_virt2real() and rfi_real2virt() could perhaps be adapted for
- * more general-purpose use by the several places which need RFIs
- */
-	.text
-	.align 128
-ENTRY_CFI(rfi_virt2real)
-#if !defined(BOOTLOADER)
-	/* switch to real mode... */
-	rsm		PSW_SM_I,%r0
-	load32		PA(rfi_v2r_1), %r1
-	nop
-	nop
-	nop
-	nop
-	nop
-	
-	rsm             PSW_SM_Q,%r0  /* disable Q & I bits to load iia queue */
-	mtctl		%r0, %cr17	/* Clear IIASQ tail */
-	mtctl		%r0, %cr17	/* Clear IIASQ head */
-	mtctl		%r1, %cr18	/* IIAOQ head */
-	ldo		4(%r1), %r1
-	mtctl		%r1, %cr18	/* IIAOQ tail */
-	load32          REAL_MODE_PSW, %r1
-	mtctl		%r1, %cr22
-	rfi
-	
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-rfi_v2r_1:
-	tophys_r1 %r2
-#endif /* defined(BOOTLOADER) */
-	bv	0(%r2)
-	nop
-ENDPROC_CFI(rfi_virt2real)
-
-	.text
-	.align 128
-ENTRY_CFI(rfi_real2virt)
-#if !defined(BOOTLOADER)
-	rsm		PSW_SM_I,%r0
-	load32		(rfi_r2v_1), %r1
-	nop
-	nop
-	nop
-	nop
-	nop
-	
-	rsm             PSW_SM_Q,%r0    /* disable Q bit to load iia queue */
-	mtctl		%r0, %cr17	/* Clear IIASQ tail */
-	mtctl		%r0, %cr17	/* Clear IIASQ head */
-	mtctl		%r1, %cr18	/* IIAOQ head */
-	ldo		4(%r1), %r1
-	mtctl		%r1, %cr18	/* IIAOQ tail */
-	load32		KERNEL_PSW, %r1
-	mtctl		%r1, %cr22
-	rfi
-	
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-rfi_r2v_1:
-	tovirt_r1 %r2
-#endif /* defined(BOOTLOADER) */
-	bv	0(%r2)
-	nop
-ENDPROC_CFI(rfi_real2virt)
-
-#ifdef CONFIG_64BIT
-
-/************************ 64-bit real-mode calls ***********************/
-/* This is only usable in wide kernels right now and will probably stay so */
-	.text
-	/* unsigned long real64_call_asm(unsigned long *sp,
-	 *		unsigned long *arg0p,
-	 *		unsigned long fn)
-	 *	sp is value of stack pointer to adopt before calling PDC (virt)
-	 *	arg0p points to where saved arg values may be found
-	 *	iodc_fn is the IODC function to call
-	 */
-ENTRY_CFI(real64_call_asm)
-	std	%rp, -0x10(%sp)		/* save RP */
-	std	%sp, -8(%arg0)		/* save SP on real-mode stack */
-	copy	%arg0, %sp		/* adopt the real-mode SP */
-
-	/* save fn */
-	copy	%arg2, %r31
-
-	/* set up the new ap */
-	ldo	64(%arg1), %r29
-
-	/* load up the arg registers from the saved arg area */
-	/* 32-bit calling convention passes first 4 args in registers */
-	ldd	0*REG_SZ(%arg1), %arg0		/* note overwriting arg0 */
-	ldd	2*REG_SZ(%arg1), %arg2
-	ldd	3*REG_SZ(%arg1), %arg3
-	ldd	4*REG_SZ(%arg1), %r22
-	ldd	5*REG_SZ(%arg1), %r21
-	ldd	6*REG_SZ(%arg1), %r20
-	ldd	7*REG_SZ(%arg1), %r19
-	ldd	1*REG_SZ(%arg1), %arg1		/* do this one last! */
-
-	tophys_r1 %sp
-
-	b,l	rfi_virt2real,%r2
-	nop
-
-	b,l	save_control_regs,%r2		/* modifies r1, r2, r28 */
-	nop
-
-	load32	PA(r64_ret), %r2
-	bv	0(%r31)
-	nop
-r64_ret:
-	/* restore CRs before going virtual in case we page fault */
-	b,l	restore_control_regs, %r2	/* modifies r1, r2, r26 */
-	nop
-
-	b,l	rfi_real2virt,%r2
-	nop
-
-	tovirt_r1 %sp
-	ldd	-8(%sp), %sp		/* restore SP */
-	ldd	-0x10(%sp), %rp		/* restore RP */
-	bv	0(%rp)
-	nop
-ENDPROC_CFI(real64_call_asm)
-
-#endif
-
-	.text
-	/* http://lists.parisc-linux.org/hypermail/parisc-linux/10916.html
-	**	GCC 3.3 and later has a new function in libgcc.a for
-	**	comparing function pointers.
-	*/
-ENTRY_CFI(__canonicalize_funcptr_for_compare)
-#ifdef CONFIG_64BIT
-	bve (%r2)
-#else
-	bv %r0(%r2)
-#endif
-	copy %r26,%r28
-ENDPROC_CFI(__canonicalize_funcptr_for_compare)
-
diff --git a/arch/parisc/kernel/relocate_kernel.S b/arch/parisc/kernel/relocate_kernel.S
deleted file mode 100644
index 2561e52b8d9bc8f47f00b88600fa3e884c590d9f..0000000000000000000000000000000000000000
--- a/arch/parisc/kernel/relocate_kernel.S
+++ /dev/null
@@ -1,149 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <linux/kexec.h>
-
-#include <asm/assembly.h>
-#include <asm/asm-offsets.h>
-#include <asm/page.h>
-#include <asm/setup.h>
-#include <asm/psw.h>
-
-.level PA_ASM_LEVEL
-
-.macro	kexec_param name
-.align 8
-ENTRY(kexec\()_\name)
-#ifdef CONFIG_64BIT
-	.dword 0
-#else
-	.word 0
-#endif
-
-ENTRY(kexec\()_\name\()_offset)
-	.word kexec\()_\name - relocate_new_kernel
-.endm
-
-.text
-
-/* args:
- * r26 - kimage->head
- * r25 - start address of kernel
- * r24 - physical address of relocate code
- */
-
-ENTRY_CFI(relocate_new_kernel)
-0:	copy	%arg1, %rp
-	/* disable I and Q bit, so we are allowed to execute RFI */
-	rsm PSW_SM_I, %r0
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	rsm PSW_SM_Q, %r0
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	/*
-	 * After return-from-interrupt, we want to run without Code/Data
-	 * translation enabled just like on a normal boot.
-	 */
-
-	/* calculate new physical execution address */
-	ldo	1f-0b(%arg2), %r1
-	mtctl	%r0, %cr17 /* IIASQ */
-	mtctl	%r0, %cr17 /* IIASQ */
-	mtctl	%r1, %cr18 /* IIAOQ */
-	ldo	4(%r1),%r1
-	mtctl	%r1, %cr18 /* IIAOQ */
-#ifdef CONFIG_64BIT
-	depdi,z	1, PSW_W_BIT, 1, %r1
-	mtctl	%r1, %cr22 /* IPSW */
-#else
-	mtctl	%r0, %cr22 /* IPSW */
-#endif
-	/* lets go... */
-	rfi
-1:	nop
-	nop
-
-.Lloop:
-	LDREG,ma	REG_SZ(%arg0), %r3
-	/* If crash kernel, no copy needed */
-	cmpib,COND(=),n 0,%r3,boot
-
-	bb,<,n		%r3, 31 - IND_DONE_BIT, boot
-	bb,>=,n		%r3, 31 - IND_INDIRECTION_BIT, .Lnotind
-	/* indirection, load and restart */
-	movb		%r3, %arg0, .Lloop
-	depi		0, 31, PAGE_SHIFT, %arg0
-
-.Lnotind:
-	bb,>=,n		%r3, 31 - IND_DESTINATION_BIT, .Lnotdest
-	b		.Lloop
-	copy		%r3, %r20
-
-.Lnotdest:
-	bb,>=		%r3, 31 - IND_SOURCE_BIT, .Lloop
-	depi		0, 31, PAGE_SHIFT, %r3
-	copy		%r3, %r21
-
-	/* copy page */
-	copy		%r0, %r18
-	zdepi		1, 31 - PAGE_SHIFT, 1, %r18
-	add		%r20, %r18, %r17
-
-	depi		0, 31, PAGE_SHIFT, %r20
-.Lcopy:
-	copy		%r20, %r12
-	LDREG,ma	REG_SZ(%r21), %r8
-	LDREG,ma	REG_SZ(%r21), %r9
-	LDREG,ma	REG_SZ(%r21), %r10
-	LDREG,ma	REG_SZ(%r21), %r11
-	STREG,ma	%r8, REG_SZ(%r20)
-	STREG,ma	%r9, REG_SZ(%r20)
-	STREG,ma	%r10, REG_SZ(%r20)
-	STREG,ma	%r11, REG_SZ(%r20)
-
-#ifndef CONFIG_64BIT
-	LDREG,ma	REG_SZ(%r21), %r8
-	LDREG,ma	REG_SZ(%r21), %r9
-	LDREG,ma	REG_SZ(%r21), %r10
-	LDREG,ma	REG_SZ(%r21), %r11
-	STREG,ma	%r8, REG_SZ(%r20)
-	STREG,ma	%r9, REG_SZ(%r20)
-	STREG,ma	%r10, REG_SZ(%r20)
-	STREG,ma	%r11, REG_SZ(%r20)
-#endif
-
-	fdc		%r0(%r12)
-	cmpb,COND(<<)	%r20,%r17,.Lcopy
-	fic		(%sr4, %r12)
-	b,n		.Lloop
-
-boot:
-	mtctl	%r0, %cr15
-
-	LDREG	kexec_free_mem-0b(%arg2), %arg0
-	LDREG	kexec_cmdline-0b(%arg2), %arg1
-	LDREG	kexec_initrd_end-0b(%arg2), %arg3
-	LDREG	kexec_initrd_start-0b(%arg2), %arg2
-	bv,n %r0(%rp)
-
-ENDPROC_CFI(relocate_new_kernel);
-
-ENTRY(relocate_new_kernel_size)
-       .word relocate_new_kernel_size - relocate_new_kernel
-
-kexec_param cmdline
-kexec_param initrd_start
-kexec_param initrd_end
-kexec_param free_mem
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
deleted file mode 100644
index a37814cb66c76d8ba49f16ff5134aad31b763202..0000000000000000000000000000000000000000
--- a/arch/parisc/kernel/syscall.S
+++ /dev/null
@@ -1,965 +0,0 @@
-/* 
- * Linux/PA-RISC Project (http://www.parisc-linux.org/)
- * 
- * System call entry code / Linux gateway page
- * Copyright (c) Matthew Wilcox 1999 <willy@infradead.org>
- * Licensed under the GNU GPL.
- * thanks to Philipp Rumpf, Mike Shaver and various others
- * sorry about the wall, puffin..
- */
-
-/*
-How does the Linux gateway page on PA-RISC work?
-------------------------------------------------
-The Linux gateway page on PA-RISC is "special".
-It actually has PAGE_GATEWAY bits set (this is linux terminology; in parisc
-terminology it's Execute, promote to PL0) in the page map.  So anything
-executing on this page executes with kernel level privilege (there's more to it
-than that: to have this happen, you also have to use a branch with a ,gate
-completer to activate the privilege promotion).  The upshot is that everything
-that runs on the gateway page runs at kernel privilege but with the current
-user process address space (although you have access to kernel space via %sr2).
-For the 0x100 syscall entry, we redo the space registers to point to the kernel
-address space (preserving the user address space in %sr3), move to wide mode if
-required, save the user registers and branch into the kernel syscall entry
-point.  For all the other functions, we execute at kernel privilege but don't
-flip address spaces. The basic upshot of this is that these code snippets are
-executed atomically (because the kernel can't be pre-empted) and they may
-perform architecturally forbidden (to PL3) operations (like setting control
-registers).
-*/
-
-
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/errno.h>
-#include <asm/page.h>
-#include <asm/psw.h>
-#include <asm/thread_info.h>
-#include <asm/assembly.h>
-#include <asm/processor.h>
-#include <asm/cache.h>
-
-#include <linux/linkage.h>
-
-	/* We fill the empty parts of the gateway page with
- 	 * something that will kill the kernel or a
- 	 * userspace application.
-	 */
-#define KILL_INSN	break	0,0
-
-	.level          PA_ASM_LEVEL
-
-	.text
-
-	.import syscall_exit,code
-	.import syscall_exit_rfi,code
-
-	/* Linux gateway page is aliased to virtual page 0 in the kernel
-	 * address space. Since it is a gateway page it cannot be
-	 * dereferenced, so null pointers will still fault. We start
-	 * the actual entry point at 0x100. We put break instructions
-	 * at the beginning of the page to trap null indirect function
-	 * pointers.
-	 */
-
-	.align PAGE_SIZE
-ENTRY(linux_gateway_page)
-
-        /* ADDRESS 0x00 to 0xb0 = 176 bytes / 4 bytes per insn = 44 insns */
-	.rept 44
-	KILL_INSN
-	.endr
-
-	/* ADDRESS 0xb0 to 0xb8, lws uses two insns for entry */
-	/* Light-weight-syscall entry must always be located at 0xb0 */
-	/* WARNING: Keep this number updated with table size changes */
-#define __NR_lws_entries (3)
-
-lws_entry:
-	gate	lws_start, %r0		/* increase privilege */
-	depi	3, 31, 2, %r31		/* Ensure we return into user mode. */
-
-	/* Fill from 0xb8 to 0xe0 */
-	.rept 10
-	KILL_INSN
-	.endr
-
-	/* This function MUST be located at 0xe0 for glibc's threading 
-	mechanism to work. DO NOT MOVE THIS CODE EVER! */
-set_thread_pointer:
-	gate	.+8, %r0		/* increase privilege */
-	depi	3, 31, 2, %r31		/* Ensure we return into user mode. */
-	be	0(%sr7,%r31)		/* return to user space */
-	mtctl	%r26, %cr27		/* move arg0 to the control register */
-
-	/* Increase the chance of trapping if random jumps occur to this
-	address, fill from 0xf0 to 0x100 */
-	.rept 4
-	KILL_INSN
-	.endr
-
-/* This address must remain fixed at 0x100 for glibc's syscalls to work */
-	.align LINUX_GATEWAY_ADDR
-linux_gateway_entry:
-	gate	.+8, %r0			/* become privileged */
-	mtsp	%r0,%sr4			/* get kernel space into sr4 */
-	mtsp	%r0,%sr5			/* get kernel space into sr5 */
-	mtsp	%r0,%sr6			/* get kernel space into sr6 */
-
-#ifdef CONFIG_64BIT
-	/* Store W bit on entry to the syscall in case it's a wide userland
-	 * process. */
-	ssm	PSW_SM_W, %r1
-	extrd,u	%r1,PSW_W_BIT,1,%r1
-	/* sp must be aligned on 4, so deposit the W bit setting into
-	 * the bottom of sp temporarily */
-	or,ev	%r1,%r30,%r30
-	b,n	1f
-	/* The top halves of argument registers must be cleared on syscall
-	 * entry from narrow executable.
-	 */
-	depdi	0, 31, 32, %r26
-	depdi	0, 31, 32, %r25
-	depdi	0, 31, 32, %r24
-	depdi	0, 31, 32, %r23
-	depdi	0, 31, 32, %r22
-	depdi	0, 31, 32, %r21
-1:	
-#endif
-
-	/* We use a rsm/ssm pair to prevent sr3 from being clobbered
-	 * by external interrupts.
-	 */
-	mfsp    %sr7,%r1                        /* save user sr7 */
-	rsm	PSW_SM_I, %r0			/* disable interrupts */
-	mtsp    %r1,%sr3                        /* and store it in sr3 */
-
-	mfctl   %cr30,%r1
-	xor     %r1,%r30,%r30                   /* ye olde xor trick */
-	xor     %r1,%r30,%r1
-	xor     %r1,%r30,%r30
-	
-	ldo     THREAD_SZ_ALGN+FRAME_SIZE(%r30),%r30  /* set up kernel stack */
-
-	/* N.B.: It is critical that we don't set sr7 to 0 until r30
-	 *       contains a valid kernel stack pointer. It is also
-	 *       critical that we don't start using the kernel stack
-	 *       until after sr7 has been set to 0.
-	 */
-
-	mtsp	%r0,%sr7			/* get kernel space into sr7 */
-	ssm	PSW_SM_I, %r0			/* enable interrupts */
-	STREGM	%r1,FRAME_SIZE(%r30)		/* save r1 (usp) here for now */
-	mfctl	%cr30,%r1			/* get task ptr in %r1 */
-	LDREG	TI_TASK(%r1),%r1
-
-	/* Save some registers for sigcontext and potential task
-	   switch (see entry.S for the details of which ones are
-	   saved/restored).  TASK_PT_PSW is zeroed so we can see whether
-	   a process is on a syscall or not.  For an interrupt the real
-	   PSW value is stored.  This is needed for gdb and sys_ptrace. */
-	STREG	%r0,  TASK_PT_PSW(%r1)
-	STREG	%r2,  TASK_PT_GR2(%r1)		/* preserve rp */
-	STREG	%r19, TASK_PT_GR19(%r1)
-
-	LDREGM	-FRAME_SIZE(%r30), %r2		/* get users sp back */
-#ifdef CONFIG_64BIT
-	extrd,u	%r2,63,1,%r19			/* W hidden in bottom bit */
-#if 0
-	xor	%r19,%r2,%r2			/* clear bottom bit */
-	depd,z	%r19,1,1,%r19
-	std	%r19,TASK_PT_PSW(%r1)
-#endif
-#endif
-	STREG	%r2,  TASK_PT_GR30(%r1)		/* ... and save it */
-	
-	STREG	%r20, TASK_PT_GR20(%r1)		/* Syscall number */
-	STREG	%r21, TASK_PT_GR21(%r1)
-	STREG	%r22, TASK_PT_GR22(%r1)
-	STREG	%r23, TASK_PT_GR23(%r1)		/* 4th argument */
-	STREG	%r24, TASK_PT_GR24(%r1)		/* 3rd argument */
-	STREG	%r25, TASK_PT_GR25(%r1)		/* 2nd argument */
-	STREG	%r26, TASK_PT_GR26(%r1)	 	/* 1st argument */
-	STREG	%r27, TASK_PT_GR27(%r1)		/* user dp */
-	STREG   %r28, TASK_PT_GR28(%r1)         /* return value 0 */
-	STREG   %r0, TASK_PT_ORIG_R28(%r1)      /* don't prohibit restarts */
-	STREG	%r29, TASK_PT_GR29(%r1)		/* return value 1 */
-	STREG	%r31, TASK_PT_GR31(%r1)		/* preserve syscall return ptr */
-	
-	ldo	TASK_PT_FR0(%r1), %r27		/* save fpregs from the kernel */
-	save_fp	%r27				/* or potential task switch  */
-
-	mfctl	%cr11, %r27			/* i.e. SAR */
-	STREG	%r27, TASK_PT_SAR(%r1)
-
-	loadgp
-
-#ifdef CONFIG_64BIT
-	ldo	-16(%r30),%r29			/* Reference param save area */
-	copy	%r19,%r2			/* W bit back to r2 */
-#else
-	/* no need to save these on stack in wide mode because the first 8
-	 * args are passed in registers */
-	stw     %r22, -52(%r30)                 /* 5th argument */
-	stw     %r21, -56(%r30)                 /* 6th argument */
-#endif
-
-	/* Are we being ptraced? */
-	mfctl	%cr30, %r1
-	LDREG	TI_FLAGS(%r1),%r1
-	ldi	_TIF_SYSCALL_TRACE_MASK, %r19
-	and,COND(=) %r1, %r19, %r0
-	b,n	.Ltracesys
-	
-	/* Note!  We cannot use the syscall table that is mapped
-	nearby since the gateway page is mapped execute-only. */
-
-#ifdef CONFIG_64BIT
-	ldil	L%sys_call_table, %r1
-	or,=	%r2,%r2,%r2
-	addil	L%(sys_call_table64-sys_call_table), %r1
-	ldo	R%sys_call_table(%r1), %r19
-	or,=	%r2,%r2,%r2
-	ldo	R%sys_call_table64(%r1), %r19
-#else
-	load32	sys_call_table, %r19
-#endif	
-	comiclr,>>	__NR_Linux_syscalls, %r20, %r0
-	b,n	.Lsyscall_nosys
-	
-	LDREGX  %r20(%r19), %r19
-
-	/* If this is a sys_rt_sigreturn call, and the signal was received
-	 * when not in_syscall, then we want to return via syscall_exit_rfi,
-	 * not syscall_exit.  Signal no. in r20, in_syscall in r25 (see
-	 * trampoline code in signal.c).
-	 */
-	ldi	__NR_rt_sigreturn,%r2
-	comb,=	%r2,%r20,.Lrt_sigreturn
-.Lin_syscall:
-	ldil	L%syscall_exit,%r2
-	be      0(%sr7,%r19)
-	ldo	R%syscall_exit(%r2),%r2
-.Lrt_sigreturn:
-	comib,<> 0,%r25,.Lin_syscall
-	ldil	L%syscall_exit_rfi,%r2
-	be      0(%sr7,%r19)
-	ldo	R%syscall_exit_rfi(%r2),%r2
-
-	/* Note!  Because we are not running where we were linked, any
-	calls to functions external to this file must be indirect.  To
-	be safe, we apply the opposite rule to functions within this
-	file, with local labels given to them to ensure correctness. */
-	
-.Lsyscall_nosys:
-syscall_nosys:
-	ldil	L%syscall_exit,%r1
-	be	R%syscall_exit(%sr7,%r1)
-	ldo	-ENOSYS(%r0),%r28		   /* set errno */
-
-
-/* Warning! This trace code is a virtual duplicate of the code above so be
- * sure to maintain both! */
-.Ltracesys:
-tracesys:
-	/* Need to save more registers so the debugger can see where we
-	 * are.  This saves only the lower 8 bits of PSW, so that the C
-	 * bit is still clear on syscalls, and the D bit is set if this
-	 * full register save path has been executed.  We check the D
-	 * bit on syscall_return_rfi to determine which registers to
-	 * restore.  An interrupt results in a full PSW saved with the
-	 * C bit set, a non-straced syscall entry results in C and D clear
-	 * in the saved PSW.
-	 */
-	ldo     -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1      /* get task ptr */
-	LDREG	TI_TASK(%r1), %r1
-	ssm	0,%r2
-	STREG	%r2,TASK_PT_PSW(%r1)		/* Lower 8 bits only!! */
-	mfsp	%sr0,%r2
-	STREG	%r2,TASK_PT_SR0(%r1)
-	mfsp	%sr1,%r2
-	STREG	%r2,TASK_PT_SR1(%r1)
-	mfsp	%sr2,%r2
-	STREG	%r2,TASK_PT_SR2(%r1)
-	mfsp	%sr3,%r2
-	STREG	%r2,TASK_PT_SR3(%r1)
-	STREG	%r2,TASK_PT_SR4(%r1)
-	STREG	%r2,TASK_PT_SR5(%r1)
-	STREG	%r2,TASK_PT_SR6(%r1)
-	STREG	%r2,TASK_PT_SR7(%r1)
-	STREG	%r2,TASK_PT_IASQ0(%r1)
-	STREG	%r2,TASK_PT_IASQ1(%r1)
-	LDREG	TASK_PT_GR31(%r1),%r2
-	STREG	%r2,TASK_PT_IAOQ0(%r1)
-	ldo	4(%r2),%r2
-	STREG	%r2,TASK_PT_IAOQ1(%r1)
-	ldo	TASK_REGS(%r1),%r2
-	/* reg_save %r2 */
-	STREG	%r3,PT_GR3(%r2)
-	STREG	%r4,PT_GR4(%r2)
-	STREG	%r5,PT_GR5(%r2)
-	STREG	%r6,PT_GR6(%r2)
-	STREG	%r7,PT_GR7(%r2)
-	STREG	%r8,PT_GR8(%r2)
-	STREG	%r9,PT_GR9(%r2)
-	STREG	%r10,PT_GR10(%r2)
-	STREG	%r11,PT_GR11(%r2)
-	STREG	%r12,PT_GR12(%r2)
-	STREG	%r13,PT_GR13(%r2)
-	STREG	%r14,PT_GR14(%r2)
-	STREG	%r15,PT_GR15(%r2)
-	STREG	%r16,PT_GR16(%r2)
-	STREG	%r17,PT_GR17(%r2)
-	STREG	%r18,PT_GR18(%r2)
-	/* Finished saving things for the debugger */
-
-	copy	%r2,%r26
-	ldil	L%do_syscall_trace_enter,%r1
-	ldil	L%tracesys_next,%r2
-	be	R%do_syscall_trace_enter(%sr7,%r1)
-	ldo	R%tracesys_next(%r2),%r2
-	
-tracesys_next:
-	/* do_syscall_trace_enter either returned the syscallno, or -1L,
-	 *  so we skip restoring the PT_GR20 below, since we pulled it from
-	 *  task->thread.regs.gr[20] above.
-	 */
-	copy	%ret0,%r20
-
-	ldo     -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1      /* get task ptr */
-	LDREG	TI_TASK(%r1), %r1
-	LDREG   TASK_PT_GR28(%r1), %r28		/* Restore return value */
-	LDREG   TASK_PT_GR26(%r1), %r26		/* Restore the users args */
-	LDREG   TASK_PT_GR25(%r1), %r25
-	LDREG   TASK_PT_GR24(%r1), %r24
-	LDREG   TASK_PT_GR23(%r1), %r23
-	LDREG   TASK_PT_GR22(%r1), %r22
-	LDREG   TASK_PT_GR21(%r1), %r21
-#ifdef CONFIG_64BIT
-	ldo	-16(%r30),%r29			/* Reference param save area */
-#else
-	stw     %r22, -52(%r30)                 /* 5th argument */
-	stw     %r21, -56(%r30)                 /* 6th argument */
-#endif
-
-	cmpib,COND(=),n -1,%r20,tracesys_exit /* seccomp may have returned -1 */
-	comiclr,>>	__NR_Linux_syscalls, %r20, %r0
-	b,n	.Ltracesys_nosys
-
-	/* Note!  We cannot use the syscall table that is mapped
-	nearby since the gateway page is mapped execute-only. */
-
-#ifdef CONFIG_64BIT
-	LDREG	TASK_PT_GR30(%r1), %r19		/* get users sp back */
-	extrd,u	%r19,63,1,%r2			/* W hidden in bottom bit */
-
-	ldil	L%sys_call_table, %r1
-	or,=	%r2,%r2,%r2
-	addil	L%(sys_call_table64-sys_call_table), %r1
-	ldo	R%sys_call_table(%r1), %r19
-	or,=	%r2,%r2,%r2
-	ldo	R%sys_call_table64(%r1), %r19
-#else
-	load32	sys_call_table, %r19
-#endif
-
-	LDREGX  %r20(%r19), %r19
-
-	/* If this is a sys_rt_sigreturn call, and the signal was received
-	 * when not in_syscall, then we want to return via syscall_exit_rfi,
-	 * not syscall_exit.  Signal no. in r20, in_syscall in r25 (see
-	 * trampoline code in signal.c).
-	 */
-	ldi	__NR_rt_sigreturn,%r2
-	comb,=	%r2,%r20,.Ltrace_rt_sigreturn
-.Ltrace_in_syscall:
-	ldil	L%tracesys_exit,%r2
-	be      0(%sr7,%r19)
-	ldo	R%tracesys_exit(%r2),%r2
-
-.Ltracesys_nosys:
-	ldo	-ENOSYS(%r0),%r28		/* set errno */
-
-	/* Do *not* call this function on the gateway page, because it
-	makes a direct call to syscall_trace. */
-	
-tracesys_exit:
-	ldo     -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1      /* get task ptr */
-	LDREG	TI_TASK(%r1), %r1
-#ifdef CONFIG_64BIT
-	ldo	-16(%r30),%r29			/* Reference param save area */
-#endif
-	ldo	TASK_REGS(%r1),%r26
-	BL	do_syscall_trace_exit,%r2
-	STREG   %r28,TASK_PT_GR28(%r1)          /* save return value now */
-	ldo     -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1      /* get task ptr */
-	LDREG	TI_TASK(%r1), %r1
-	LDREG   TASK_PT_GR28(%r1), %r28		/* Restore return val. */
-
-	ldil	L%syscall_exit,%r1
-	be,n	R%syscall_exit(%sr7,%r1)
-
-.Ltrace_rt_sigreturn:
-	comib,<> 0,%r25,.Ltrace_in_syscall
-	ldil	L%tracesys_sigexit,%r2
-	be      0(%sr7,%r19)
-	ldo	R%tracesys_sigexit(%r2),%r2
-
-tracesys_sigexit:
-	ldo     -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1      /* get task ptr */
-	LDREG	TI_TASK(%r1), %r1
-#ifdef CONFIG_64BIT
-	ldo	-16(%r30),%r29			/* Reference param save area */
-#endif
-	BL	do_syscall_trace_exit,%r2
-	ldo	TASK_REGS(%r1),%r26
-
-	ldil	L%syscall_exit_rfi,%r1
-	be,n	R%syscall_exit_rfi(%sr7,%r1)
-
-
-	/*********************************************************
-		32/64-bit Light-Weight-Syscall ABI
-
-		* - Indicates a hint for userspace inline asm
-		implementations.
-
-		Syscall number (caller-saves)
-	        - %r20
-	        * In asm clobber.
-
-		Argument registers (caller-saves)
-	        - %r26, %r25, %r24, %r23, %r22
-	        * In asm input.
-
-		Return registers (caller-saves)
-	        - %r28 (return), %r21 (errno)
-	        * In asm output.
-
-		Caller-saves registers
-	        - %r1, %r27, %r29
-	        - %r2 (return pointer)
-	        - %r31 (ble link register)
-	        * In asm clobber.
-
-		Callee-saves registers
-	        - %r3-%r18
-	        - %r30 (stack pointer)
-	        * Not in asm clobber.
-
-		If userspace is 32-bit:
-		Callee-saves registers
-	        - %r19 (32-bit PIC register)
-
-		Differences from 32-bit calling convention:
-		- Syscall number in %r20
-		- Additional argument register %r22 (arg4)
-		- Callee-saves %r19.
-
-		If userspace is 64-bit:
-		Callee-saves registers
-		- %r27 (64-bit PIC register)
-
-		Differences from 64-bit calling convention:
-		- Syscall number in %r20
-		- Additional argument register %r22 (arg4)
-		- Callee-saves %r27.
-
-		Error codes returned by entry path:
-
-		ENOSYS - r20 was an invalid LWS number.
-
-	*********************************************************/
-lws_start:
-
-#ifdef CONFIG_64BIT
-	ssm	PSW_SM_W, %r1
-	extrd,u	%r1,PSW_W_BIT,1,%r1
-	/* sp must be aligned on 4, so deposit the W bit setting into
-	 * the bottom of sp temporarily */
-	or,ev	%r1,%r30,%r30
-
-	/* Clip LWS number to a 32-bit value for 32-bit processes */
-	depdi	0, 31, 32, %r20
-#endif	
-
-        /* Is the lws entry number valid? */
-	comiclr,>>	__NR_lws_entries, %r20, %r0
-	b,n	lws_exit_nosys
-
-	/* Load table start */
-	ldil	L%lws_table, %r1
-	ldo	R%lws_table(%r1), %r28	/* Scratch use of r28 */
-	LDREGX	%r20(%sr2,r28), %r21	/* Scratch use of r21 */
-
-	/* Jump to lws, lws table pointers already relocated */
-	be,n	0(%sr2,%r21)
-
-lws_exit_nosys:
-	ldo	-ENOSYS(%r0),%r21		   /* set errno */
-	/* Fall through: Return to userspace */
-
-lws_exit:
-#ifdef CONFIG_64BIT
-	/* decide whether to reset the wide mode bit
-	 *
-	 * For a syscall, the W bit is stored in the lowest bit
-	 * of sp.  Extract it and reset W if it is zero */
-	extrd,u,*<>	%r30,63,1,%r1
-	rsm	PSW_SM_W, %r0
-	/* now reset the lowest bit of sp if it was set */
-	xor	%r30,%r1,%r30
-#endif
-	be,n	0(%sr7, %r31)
-
-
-	
-	/***************************************************
-		Implementing 32bit CAS as an atomic operation:
-
-		%r26 - Address to examine
-		%r25 - Old value to check (old)
-		%r24 - New value to set (new)
-		%r28 - Return prev through this register.
-		%r21 - Kernel error code
-
-		If debugging is DISabled:
-
-		%r21 has the following meanings:
-
-		EAGAIN - CAS is busy, ldcw failed, try again.
-		EFAULT - Read or write failed.		
-
-		If debugging is enabled:
-
-		EDEADLOCK - CAS called recursively.
-		EAGAIN && r28 == 1 - CAS is busy. Lock contended.
-		EAGAIN && r28 == 2 - CAS is busy. ldcw failed.
-		EFAULT - Read or write failed.
-
-		Scratch: r20, r28, r1
-
-	****************************************************/
-
-	/* Do not enable LWS debugging */
-#define ENABLE_LWS_DEBUG 0 
-
-	/* ELF64 Process entry path */
-lws_compare_and_swap64:
-#ifdef CONFIG_64BIT
-	b,n	lws_compare_and_swap
-#else
-	/* If we are not a 64-bit kernel, then we don't
-	 * have 64-bit input registers, and calling
-	 * the 64-bit LWS CAS returns ENOSYS.
-	 */
-	b,n	lws_exit_nosys
-#endif
-
-	/* ELF32 Process entry path */
-lws_compare_and_swap32:
-#ifdef CONFIG_64BIT
-	/* Clip all the input registers */
-	depdi	0, 31, 32, %r26
-	depdi	0, 31, 32, %r25
-	depdi	0, 31, 32, %r24
-#endif
-
-lws_compare_and_swap:
-	/* Load start of lock table */
-	ldil	L%lws_lock_start, %r20
-	ldo	R%lws_lock_start(%r20), %r28
-
-	/* Extract four bits from r26 and hash lock (Bits 4-7) */
-	extru  %r26, 27, 4, %r20
-
-	/* Find lock to use, the hash is either one of 0 to
-	   15, multiplied by 16 (keep it 16-byte aligned)
-	   and add to the lock table offset. */
-	shlw	%r20, 4, %r20
-	add	%r20, %r28, %r20
-
-# if ENABLE_LWS_DEBUG
-	/*	
-		DEBUG, check for deadlock! 
-		If the thread register values are the same
-		then we were the one that locked it last and
-		this is a recurisve call that will deadlock.
-		We *must* giveup this call and fail.
-	*/
-	ldw	4(%sr2,%r20), %r28			/* Load thread register */
-	/* WARNING: If cr27 cycles to the same value we have problems */
-	mfctl	%cr27, %r21				/* Get current thread register */
-	cmpb,<>,n	%r21, %r28, cas_lock		/* Called recursive? */
-	b	lws_exit				/* Return error! */
-	ldo	-EDEADLOCK(%r0), %r21
-cas_lock:
-	cmpb,=,n	%r0, %r28, cas_nocontend	/* Is nobody using it? */
-	ldo	1(%r0), %r28				/* 1st case */
-	b	lws_exit				/* Contended... */
-	ldo	-EAGAIN(%r0), %r21			/* Spin in userspace */
-cas_nocontend:
-# endif
-/* ENABLE_LWS_DEBUG */
-
-	rsm	PSW_SM_I, %r0				/* Disable interrupts */
-	/* COW breaks can cause contention on UP systems */
-	LDCW	0(%sr2,%r20), %r28			/* Try to acquire the lock */
-	cmpb,<>,n	%r0, %r28, cas_action		/* Did we get it? */
-cas_wouldblock:
-	ldo	2(%r0), %r28				/* 2nd case */
-	ssm	PSW_SM_I, %r0
-	b	lws_exit				/* Contended... */
-	ldo	-EAGAIN(%r0), %r21			/* Spin in userspace */
-
-	/*
-		prev = *addr;
-		if ( prev == old )
-		  *addr = new;
-		return prev;
-	*/
-
-	/* NOTES:
-		This all works becuse intr_do_signal
-		and schedule both check the return iasq
-		and see that we are on the kernel page
-		so this process is never scheduled off
-		or is ever sent any signal of any sort,
-		thus it is wholly atomic from usrspaces
-		perspective
-	*/
-cas_action:
-#if defined CONFIG_SMP && ENABLE_LWS_DEBUG
-	/* DEBUG */
-	mfctl	%cr27, %r1
-	stw	%r1, 4(%sr2,%r20)
-#endif
-	/* The load and store could fail */
-1:	ldw	0(%r26), %r28
-	sub,<>	%r28, %r25, %r0
-2:	stw	%r24, 0(%r26)
-	/* Free lock */
-	stw,ma	%r20, 0(%sr2,%r20)
-#if ENABLE_LWS_DEBUG
-	/* Clear thread register indicator */
-	stw	%r0, 4(%sr2,%r20)
-#endif
-	/* Enable interrupts */
-	ssm	PSW_SM_I, %r0
-	/* Return to userspace, set no error */
-	b	lws_exit
-	copy	%r0, %r21
-
-3:		
-	/* Error occurred on load or store */
-	/* Free lock */
-	stw,ma	%r20, 0(%sr2,%r20)
-#if ENABLE_LWS_DEBUG
-	stw	%r0, 4(%sr2,%r20)
-#endif
-	ssm	PSW_SM_I, %r0
-	b	lws_exit
-	ldo	-EFAULT(%r0),%r21	/* set errno */
-	nop
-	nop
-	nop
-	nop
-
-	/* Two exception table entries, one for the load,
-	   the other for the store. Either return -EFAULT.
-	   Each of the entries must be relocated. */
-	ASM_EXCEPTIONTABLE_ENTRY(1b-linux_gateway_page, 3b-linux_gateway_page)
-	ASM_EXCEPTIONTABLE_ENTRY(2b-linux_gateway_page, 3b-linux_gateway_page)
-
-
-	/***************************************************
-		New CAS implementation which uses pointers and variable size
-		information. The value pointed by old and new MUST NOT change
-		while performing CAS. The lock only protect the value at %r26.
-
-		%r26 - Address to examine
-		%r25 - Pointer to the value to check (old)
-		%r24 - Pointer to the value to set (new)
-		%r23 - Size of the variable (0/1/2/3 for 8/16/32/64 bit)
-		%r28 - Return non-zero on failure
-		%r21 - Kernel error code
-
-		%r21 has the following meanings:
-
-		EAGAIN - CAS is busy, ldcw failed, try again.
-		EFAULT - Read or write failed.
-
-		Scratch: r20, r22, r28, r29, r1, fr4 (32bit for 64bit CAS only)
-
-	****************************************************/
-
-	/* ELF32 Process entry path */
-lws_compare_and_swap_2:
-#ifdef CONFIG_64BIT
-	/* Clip the input registers. We don't need to clip %r23 as we
-	   only use it for word operations */
-	depdi	0, 31, 32, %r26
-	depdi	0, 31, 32, %r25
-	depdi	0, 31, 32, %r24
-#endif
-
-	/* Check the validity of the size pointer */
-	subi,>>= 3, %r23, %r0
-	b,n	lws_exit_nosys
-
-	/* Jump to the functions which will load the old and new values into
-	   registers depending on the their size */
-	shlw	%r23, 2, %r29
-	blr	%r29, %r0
-	nop
-
-	/* 8bit load */
-4:	ldb	0(%r25), %r25
-	b	cas2_lock_start
-5:	ldb	0(%r24), %r24
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	/* 16bit load */
-6:	ldh	0(%r25), %r25
-	b	cas2_lock_start
-7:	ldh	0(%r24), %r24
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	/* 32bit load */
-8:	ldw	0(%r25), %r25
-	b	cas2_lock_start
-9:	ldw	0(%r24), %r24
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	/* 64bit load */
-#ifdef CONFIG_64BIT
-10:	ldd	0(%r25), %r25
-11:	ldd	0(%r24), %r24
-#else
-	/* Load old value into r22/r23 - high/low */
-10:	ldw	0(%r25), %r22
-11:	ldw	4(%r25), %r23
-	/* Load new value into fr4 for atomic store later */
-12:	flddx	0(%r24), %fr4
-#endif
-
-cas2_lock_start:
-	/* Load start of lock table */
-	ldil	L%lws_lock_start, %r20
-	ldo	R%lws_lock_start(%r20), %r28
-
-	/* Extract four bits from r26 and hash lock (Bits 4-7) */
-	extru  %r26, 27, 4, %r20
-
-	/* Find lock to use, the hash is either one of 0 to
-	   15, multiplied by 16 (keep it 16-byte aligned)
-	   and add to the lock table offset. */
-	shlw	%r20, 4, %r20
-	add	%r20, %r28, %r20
-
-	rsm	PSW_SM_I, %r0			/* Disable interrupts */
-	/* COW breaks can cause contention on UP systems */
-	LDCW	0(%sr2,%r20), %r28		/* Try to acquire the lock */
-	cmpb,<>,n	%r0, %r28, cas2_action	/* Did we get it? */
-cas2_wouldblock:
-	ldo	2(%r0), %r28			/* 2nd case */
-	ssm	PSW_SM_I, %r0
-	b	lws_exit			/* Contended... */
-	ldo	-EAGAIN(%r0), %r21		/* Spin in userspace */
-
-	/*
-		prev = *addr;
-		if ( prev == old )
-		  *addr = new;
-		return prev;
-	*/
-
-	/* NOTES:
-		This all works becuse intr_do_signal
-		and schedule both check the return iasq
-		and see that we are on the kernel page
-		so this process is never scheduled off
-		or is ever sent any signal of any sort,
-		thus it is wholly atomic from usrspaces
-		perspective
-	*/
-cas2_action:
-	/* Jump to the correct function */
-	blr	%r29, %r0
-	/* Set %r28 as non-zero for now */
-	ldo	1(%r0),%r28
-
-	/* 8bit CAS */
-13:	ldb	0(%r26), %r29
-	sub,=	%r29, %r25, %r0
-	b,n	cas2_end
-14:	stb	%r24, 0(%r26)
-	b	cas2_end
-	copy	%r0, %r28
-	nop
-	nop
-
-	/* 16bit CAS */
-15:	ldh	0(%r26), %r29
-	sub,=	%r29, %r25, %r0
-	b,n	cas2_end
-16:	sth	%r24, 0(%r26)
-	b	cas2_end
-	copy	%r0, %r28
-	nop
-	nop
-
-	/* 32bit CAS */
-17:	ldw	0(%r26), %r29
-	sub,=	%r29, %r25, %r0
-	b,n	cas2_end
-18:	stw	%r24, 0(%r26)
-	b	cas2_end
-	copy	%r0, %r28
-	nop
-	nop
-
-	/* 64bit CAS */
-#ifdef CONFIG_64BIT
-19:	ldd	0(%r26), %r29
-	sub,*=	%r29, %r25, %r0
-	b,n	cas2_end
-20:	std	%r24, 0(%r26)
-	copy	%r0, %r28
-#else
-	/* Compare first word */
-19:	ldw	0(%r26), %r29
-	sub,=	%r29, %r22, %r0
-	b,n	cas2_end
-	/* Compare second word */
-20:	ldw	4(%r26), %r29
-	sub,=	%r29, %r23, %r0
-	b,n	cas2_end
-	/* Perform the store */
-21:	fstdx	%fr4, 0(%r26)
-	copy	%r0, %r28
-#endif
-
-cas2_end:
-	/* Free lock */
-	stw,ma	%r20, 0(%sr2,%r20)
-	/* Enable interrupts */
-	ssm	PSW_SM_I, %r0
-	/* Return to userspace, set no error */
-	b	lws_exit
-	copy	%r0, %r21
-
-22:
-	/* Error occurred on load or store */
-	/* Free lock */
-	stw,ma	%r20, 0(%sr2,%r20)
-	ssm	PSW_SM_I, %r0
-	ldo	1(%r0),%r28
-	b	lws_exit
-	ldo	-EFAULT(%r0),%r21	/* set errno */
-	nop
-	nop
-	nop
-
-	/* Exception table entries, for the load and store, return EFAULT.
-	   Each of the entries must be relocated. */
-	ASM_EXCEPTIONTABLE_ENTRY(4b-linux_gateway_page, 22b-linux_gateway_page)
-	ASM_EXCEPTIONTABLE_ENTRY(5b-linux_gateway_page, 22b-linux_gateway_page)
-	ASM_EXCEPTIONTABLE_ENTRY(6b-linux_gateway_page, 22b-linux_gateway_page)
-	ASM_EXCEPTIONTABLE_ENTRY(7b-linux_gateway_page, 22b-linux_gateway_page)
-	ASM_EXCEPTIONTABLE_ENTRY(8b-linux_gateway_page, 22b-linux_gateway_page)
-	ASM_EXCEPTIONTABLE_ENTRY(9b-linux_gateway_page, 22b-linux_gateway_page)
-	ASM_EXCEPTIONTABLE_ENTRY(10b-linux_gateway_page, 22b-linux_gateway_page)
-	ASM_EXCEPTIONTABLE_ENTRY(11b-linux_gateway_page, 22b-linux_gateway_page)
-	ASM_EXCEPTIONTABLE_ENTRY(13b-linux_gateway_page, 22b-linux_gateway_page)
-	ASM_EXCEPTIONTABLE_ENTRY(14b-linux_gateway_page, 22b-linux_gateway_page)
-	ASM_EXCEPTIONTABLE_ENTRY(15b-linux_gateway_page, 22b-linux_gateway_page)
-	ASM_EXCEPTIONTABLE_ENTRY(16b-linux_gateway_page, 22b-linux_gateway_page)
-	ASM_EXCEPTIONTABLE_ENTRY(17b-linux_gateway_page, 22b-linux_gateway_page)
-	ASM_EXCEPTIONTABLE_ENTRY(18b-linux_gateway_page, 22b-linux_gateway_page)
-	ASM_EXCEPTIONTABLE_ENTRY(19b-linux_gateway_page, 22b-linux_gateway_page)
-	ASM_EXCEPTIONTABLE_ENTRY(20b-linux_gateway_page, 22b-linux_gateway_page)
-#ifndef CONFIG_64BIT
-	ASM_EXCEPTIONTABLE_ENTRY(12b-linux_gateway_page, 22b-linux_gateway_page)
-	ASM_EXCEPTIONTABLE_ENTRY(21b-linux_gateway_page, 22b-linux_gateway_page)
-#endif
-
-	/* Make sure nothing else is placed on this page */
-	.align PAGE_SIZE
-END(linux_gateway_page)
-ENTRY(end_linux_gateway_page)
-
-	/* Relocate symbols assuming linux_gateway_page is mapped
-	   to virtual address 0x0 */
-
-#define LWS_ENTRY(_name_) ASM_ULONG_INSN (lws_##_name_ - linux_gateway_page)
-
-	.section .rodata,"a"
-
-	.align 8
-	/* Light-weight-syscall table */
-	/* Start of lws table. */
-ENTRY(lws_table)
-	LWS_ENTRY(compare_and_swap32)		/* 0 - ELF32 Atomic 32bit CAS */
-	LWS_ENTRY(compare_and_swap64)		/* 1 - ELF64 Atomic 32bit CAS */
-	LWS_ENTRY(compare_and_swap_2)		/* 2 - ELF32 Atomic 64bit CAS */
-END(lws_table)
-	/* End of lws table */
-
-#define __SYSCALL(nr, entry, nargs) ASM_ULONG_INSN entry
-	.align 8
-ENTRY(sys_call_table)
-	.export sys_call_table,data
-#ifdef CONFIG_64BIT
-#include <asm/syscall_table_c32.h>   /* Compat syscalls */
-#else
-#include <asm/syscall_table_32.h>    /* 32-bit native syscalls */
-#endif
-END(sys_call_table)
-
-#ifdef CONFIG_64BIT
-	.align 8
-ENTRY(sys_call_table64)
-#include <asm/syscall_table_64.h>    /* 64-bit native syscalls */
-END(sys_call_table64)
-#endif
-#undef __SYSCALL
-
-	/*
-		All light-weight-syscall atomic operations 
-		will use this set of locks 
-
-		NOTE: The lws_lock_start symbol must be
-		at least 16-byte aligned for safe use
-		with ldcw.
-	*/
-	.section .data
-	.align	L1_CACHE_BYTES
-ENTRY(lws_lock_start)
-	/* lws locks */
-	.rept 16
-	/* Keep locks aligned at 16-bytes */
-	.word 1
-	.word 0 
-	.word 0
-	.word 0
-	.endr
-END(lws_lock_start)
-	.previous
-
-.end
-
-
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
deleted file mode 100644
index 99cd24f2ea01bb35013c3c0f00ce5b5a481d96cd..0000000000000000000000000000000000000000
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,187 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*    Kernel link layout for various "sections"
- *
- *    Copyright (C) 1999-2003 Matthew Wilcox <willy at parisc-linux.org>
- *    Copyright (C) 2000-2003 Paul Bame <bame at parisc-linux.org>
- *    Copyright (C) 2000 John Marvin <jsm at parisc-linux.org>
- *    Copyright (C) 2000 Michael Ang <mang with subcarrier.org>
- *    Copyright (C) 2002 Randolph Chung <tausq with parisc-linux.org>
- *    Copyright (C) 2003 James Bottomley <jejb with parisc-linux.org>
- *    Copyright (C) 2006-2013 Helge Deller <deller@gmx.de>
- */
-
-/*
- * Put page table entries (swapper_pg_dir) as the first thing in .bss. This
- * will ensure that it has .bss alignment (PAGE_SIZE).
- */
-#define BSS_FIRST_SECTIONS	*(.data..vm0.pmd) \
-				*(.data..vm0.pgd) \
-				*(.data..vm0.pte)
-
-#define CC_USING_PATCHABLE_FUNCTION_ENTRY
-
-#include <asm-generic/vmlinux.lds.h>
-
-/* needed for the processor specific cache alignment size */	
-#include <asm/cache.h>
-#include <asm/page.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-	
-/* ld script to make hppa Linux kernel */
-#ifndef CONFIG_64BIT
-OUTPUT_FORMAT("elf32-hppa-linux")
-OUTPUT_ARCH(hppa)
-#else
-OUTPUT_FORMAT("elf64-hppa-linux")
-OUTPUT_ARCH(hppa:hppa2.0w)
-#endif
-
-#define EXIT_TEXT_SECTIONS()	.exit.text : { EXIT_TEXT }
-#if !defined(CONFIG_64BIT) || defined(CONFIG_MLONGCALLS)
-#define MLONGCALL_KEEP(x)
-#define MLONGCALL_DISCARD(x)	x
-#else
-#define MLONGCALL_KEEP(x)	x
-#define MLONGCALL_DISCARD(x)
-#endif
-
-ENTRY(parisc_kernel_start)
-#ifndef CONFIG_64BIT
-jiffies = jiffies_64 + 4;
-#else
-jiffies = jiffies_64;
-#endif
-SECTIONS
-{
-	. = KERNEL_BINARY_TEXT_START;
-
-	__init_begin = .;
-	HEAD_TEXT_SECTION
-	MLONGCALL_DISCARD(INIT_TEXT_SECTION(8))
-
-	. = ALIGN(PAGE_SIZE);
-	INIT_DATA_SECTION(PAGE_SIZE)
-	MLONGCALL_DISCARD(EXIT_TEXT_SECTIONS())
-	.exit.data :
-	{
-		EXIT_DATA
-	}
-	PERCPU_SECTION(8)
-	. = ALIGN(4);
-	.altinstructions : {
-		__alt_instructions = .;
-		*(.altinstructions)
-		__alt_instructions_end = .;
-	}
-	. = ALIGN(HUGEPAGE_SIZE);
-	__init_end = .;
-	/* freed after init ends here */
-
-	_text = .;		/* Text and read-only data */
-	_stext = .;
-	MLONGCALL_KEEP(INIT_TEXT_SECTION(8))
-	.text ALIGN(PAGE_SIZE) : {
-		TEXT_TEXT
-		LOCK_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		KPROBES_TEXT
-		IRQENTRY_TEXT
-		SOFTIRQENTRY_TEXT
-		*(.text.do_softirq)
-		*(.text.sys_exit)
-		*(.text.do_sigaltstack)
-		*(.text.do_fork)
-		*(.text.div)
-		*($$*)			/* millicode routines */
-		*(.text.*)
-		*(.fixup)
-		*(.lock.text)		/* out-of-line lock text */
-		*(.gnu.warning)
-	}
-	MLONGCALL_KEEP(EXIT_TEXT_SECTIONS())
-	. = ALIGN(PAGE_SIZE);
-	_etext = .;
-	/* End of text section */
-
-	/* Start of data section */
-	_sdata = .;
-
-	/* Architecturally we need to keep __gp below 0x1000000 and thus
-	 * in front of RO_DATA_SECTION() which stores lots of tracepoint
-	 * and ftrace symbols. */
-#ifdef CONFIG_64BIT
-	. = ALIGN(16);
-	/* Linkage tables */
-	.opd : {
-		__start_opd = .;
-		*(.opd)
-		__end_opd = .;
-	} PROVIDE (__gp = .);
-	.plt : {
-		*(.plt)
-	}
-	.dlt : {
-		*(.dlt)
-	}
-#endif
-
-	RO_DATA_SECTION(8)
-
-	/* RO because of BUILDTIME_EXTABLE_SORT */
-	EXCEPTION_TABLE(8)
-	NOTES
-
-	/* unwind info */
-	.PARISC.unwind : {
-		__start___unwind = .;
-		*(.PARISC.unwind)
-		__stop___unwind = .;
-	}
-
-	/* writeable */
-	/* Make sure this is page aligned so
-	 * that we can properly leave these
-	 * as writable
-	 */
-	. = ALIGN(HUGEPAGE_SIZE);
-	data_start = .;
-
-	/* Data */
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, PAGE_SIZE)
-
-	/* PA-RISC locks requires 16-byte alignment */
-	. = ALIGN(16);
-	.data..lock_aligned : {
-		*(.data..lock_aligned)
-	}
-
-	/* End of data section */
-	_edata = .;
-
-	/* BSS */
-	BSS_SECTION(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE)
-
-	. = ALIGN(HUGEPAGE_SIZE);
-	_end = . ;
-
-	STABS_DEBUG
-	.note 0 : { *(.note) }
-
-	/* Sections to be discarded */
-	DISCARDS
-	/DISCARD/ : {
-#ifdef CONFIG_64BIT
-		/* temporary hack until binutils is fixed to not emit these
-	 	 * for static binaries
-		 */
-		*(.interp)
-		*(.dynsym)
-		*(.dynstr)
-		*(.dynamic)
-		*(.hash)
-		*(.gnu.hash)
-#endif
-	}
-}
diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S
deleted file mode 100644
index 36d6a8638ead8622fbb633d8355842d6312f0415..0000000000000000000000000000000000000000
--- a/arch/parisc/lib/lusercopy.S
+++ /dev/null
@@ -1,412 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *    User Space Access Routines
- *
- *    Copyright (C) 2000-2002 Hewlett-Packard (John Marvin)
- *    Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
- *    Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
- *    Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
- *    Copyright (C) 2017 Helge Deller <deller@gmx.de>
- *    Copyright (C) 2017 John David Anglin <dave.anglin@bell.net>
- */
-
-/*
- * These routines still have plenty of room for optimization
- * (word & doubleword load/store, dual issue, store hints, etc.).
- */
-
-/*
- * The following routines assume that space register 3 (sr3) contains
- * the space id associated with the current users address space.
- */
-
-
-	.text
-	
-#include <asm/assembly.h>
-#include <asm/errno.h>
-#include <linux/linkage.h>
-
-	/*
-	 * get_sr gets the appropriate space value into
-	 * sr1 for kernel/user space access, depending
-	 * on the flag stored in the task structure.
-	 */
-
-	.macro  get_sr
-	mfctl       %cr30,%r1
-	ldw         TI_SEGMENT(%r1),%r22
-	mfsp        %sr3,%r1
-	or,<>       %r22,%r0,%r0
-	copy        %r0,%r1
-	mtsp        %r1,%sr1
-	.endm
-
-	/*
-	 * unsigned long lclear_user(void *to, unsigned long n)
-	 *
-	 * Returns 0 for success.
-	 * otherwise, returns number of bytes not transferred.
-	 */
-
-ENTRY_CFI(lclear_user)
-	comib,=,n   0,%r25,$lclu_done
-	get_sr
-$lclu_loop:
-	addib,<>    -1,%r25,$lclu_loop
-1:      stbs,ma     %r0,1(%sr1,%r26)
-
-$lclu_done:
-	bv          %r0(%r2)
-	copy        %r25,%r28
-
-2:	b           $lclu_done
-	ldo         1(%r25),%r25
-
-	ASM_EXCEPTIONTABLE_ENTRY(1b,2b)
-ENDPROC_CFI(lclear_user)
-
-
-	/*
-	 * long lstrnlen_user(char *s, long n)
-	 *
-	 * Returns 0 if exception before zero byte or reaching N,
-	 *         N+1 if N would be exceeded,
-	 *         else strlen + 1 (i.e. includes zero byte).
-	 */
-
-ENTRY_CFI(lstrnlen_user)
-	comib,=     0,%r25,$lslen_nzero
-	copy	    %r26,%r24
-	get_sr
-1:      ldbs,ma     1(%sr1,%r26),%r1
-$lslen_loop:
-	comib,=,n   0,%r1,$lslen_done
-	addib,<>    -1,%r25,$lslen_loop
-2:      ldbs,ma     1(%sr1,%r26),%r1
-$lslen_done:
-	bv          %r0(%r2)
-	sub	    %r26,%r24,%r28
-
-$lslen_nzero:
-	b           $lslen_done
-	ldo         1(%r26),%r26 /* special case for N == 0 */
-
-3:      b	    $lslen_done
-	copy        %r24,%r26    /* reset r26 so 0 is returned on fault */
-
-	ASM_EXCEPTIONTABLE_ENTRY(1b,3b)
-	ASM_EXCEPTIONTABLE_ENTRY(2b,3b)
-
-ENDPROC_CFI(lstrnlen_user)
-
-
-/*
- * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
- *
- * Inputs:
- * - sr1 already contains space of source region
- * - sr2 already contains space of destination region
- *
- * Returns:
- * - number of bytes that could not be copied.
- *   On success, this will be zero.
- *
- * This code is based on a C-implementation of a copy routine written by
- * Randolph Chung, which in turn was derived from the glibc.
- *
- * Several strategies are tried to try to get the best performance for various
- * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
- * at a time using general registers.  Unaligned copies are handled either by
- * aligning the destination and then using shift-and-write method, or in a few
- * cases by falling back to a byte-at-a-time copy.
- *
- * Testing with various alignments and buffer sizes shows that this code is
- * often >10x faster than a simple byte-at-a-time copy, even for strangely
- * aligned operands. It is interesting to note that the glibc version of memcpy
- * (written in C) is actually quite fast already. This routine is able to beat
- * it by 30-40% for aligned copies because of the loop unrolling, but in some
- * cases the glibc version is still slightly faster. This lends more
- * credibility that gcc can generate very good code as long as we are careful.
- *
- * Possible optimizations:
- * - add cache prefetching
- * - try not to use the post-increment address modifiers; they may create
- *   additional interlocks. Assumption is that those were only efficient on old
- *   machines (pre PA8000 processors)
- */
-
-	dst = arg0
-	src = arg1
-	len = arg2
-	end = arg3
-	t1  = r19
-	t2  = r20
-	t3  = r21
-	t4  = r22
-	srcspc = sr1
-	dstspc = sr2
-
-	t0 = r1
-	a1 = t1
-	a2 = t2
-	a3 = t3
-	a0 = t4
-
-	save_src = ret0
-	save_dst = ret1
-	save_len = r31
-
-ENTRY_CFI(pa_memcpy)
-	/* Last destination address */
-	add	dst,len,end
-
-	/* short copy with less than 16 bytes? */
-	cmpib,COND(>>=),n 15,len,.Lbyte_loop
-
-	/* same alignment? */
-	xor	src,dst,t0
-	extru	t0,31,2,t1
-	cmpib,<>,n  0,t1,.Lunaligned_copy
-
-#ifdef CONFIG_64BIT
-	/* only do 64-bit copies if we can get aligned. */
-	extru	t0,31,3,t1
-	cmpib,<>,n  0,t1,.Lalign_loop32
-
-	/* loop until we are 64-bit aligned */
-.Lalign_loop64:
-	extru	dst,31,3,t1
-	cmpib,=,n	0,t1,.Lcopy_loop_16_start
-20:	ldb,ma	1(srcspc,src),t1
-21:	stb,ma	t1,1(dstspc,dst)
-	b	.Lalign_loop64
-	ldo	-1(len),len
-
-	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
-	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
-
-.Lcopy_loop_16_start:
-	ldi	31,t0
-.Lcopy_loop_16:
-	cmpb,COND(>>=),n t0,len,.Lword_loop
-
-10:	ldd	0(srcspc,src),t1
-11:	ldd	8(srcspc,src),t2
-	ldo	16(src),src
-12:	std,ma	t1,8(dstspc,dst)
-13:	std,ma	t2,8(dstspc,dst)
-14:	ldd	0(srcspc,src),t1
-15:	ldd	8(srcspc,src),t2
-	ldo	16(src),src
-16:	std,ma	t1,8(dstspc,dst)
-17:	std,ma	t2,8(dstspc,dst)
-
-	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
-	ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
-	ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
-	ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
-	ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
-	ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
-	ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
-	ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
-
-	b	.Lcopy_loop_16
-	ldo	-32(len),len
-
-.Lword_loop:
-	cmpib,COND(>>=),n 3,len,.Lbyte_loop
-20:	ldw,ma	4(srcspc,src),t1
-21:	stw,ma	t1,4(dstspc,dst)
-	b	.Lword_loop
-	ldo	-4(len),len
-
-	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
-	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
-
-#endif /* CONFIG_64BIT */
-
-	/* loop until we are 32-bit aligned */
-.Lalign_loop32:
-	extru	dst,31,2,t1
-	cmpib,=,n	0,t1,.Lcopy_loop_8
-20:	ldb,ma	1(srcspc,src),t1
-21:	stb,ma	t1,1(dstspc,dst)
-	b	.Lalign_loop32
-	ldo	-1(len),len
-
-	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
-	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
-
-
-.Lcopy_loop_8:
-	cmpib,COND(>>=),n 15,len,.Lbyte_loop
-
-10:	ldw	0(srcspc,src),t1
-11:	ldw	4(srcspc,src),t2
-12:	stw,ma	t1,4(dstspc,dst)
-13:	stw,ma	t2,4(dstspc,dst)
-14:	ldw	8(srcspc,src),t1
-15:	ldw	12(srcspc,src),t2
-	ldo	16(src),src
-16:	stw,ma	t1,4(dstspc,dst)
-17:	stw,ma	t2,4(dstspc,dst)
-
-	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
-	ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
-	ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
-	ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
-	ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
-	ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
-	ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
-	ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
-
-	b	.Lcopy_loop_8
-	ldo	-16(len),len
-
-.Lbyte_loop:
-	cmpclr,COND(<>) len,%r0,%r0
-	b,n	.Lcopy_done
-20:	ldb	0(srcspc,src),t1
-	ldo	1(src),src
-21:	stb,ma	t1,1(dstspc,dst)
-	b	.Lbyte_loop
-	ldo	-1(len),len
-
-	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
-	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
-
-.Lcopy_done:
-	bv	%r0(%r2)
-	sub	end,dst,ret0
-
-
-	/* src and dst are not aligned the same way. */
-	/* need to go the hard way */
-.Lunaligned_copy:
-	/* align until dst is 32bit-word-aligned */
-	extru	dst,31,2,t1
-	cmpib,=,n	0,t1,.Lcopy_dstaligned
-20:	ldb	0(srcspc,src),t1
-	ldo	1(src),src
-21:	stb,ma	t1,1(dstspc,dst)
-	b	.Lunaligned_copy
-	ldo	-1(len),len
-
-	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
-	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
-
-.Lcopy_dstaligned:
-
-	/* store src, dst and len in safe place */
-	copy	src,save_src
-	copy	dst,save_dst
-	copy	len,save_len
-
-	/* len now needs give number of words to copy */
-	SHRREG	len,2,len
-
-	/*
-	 * Copy from a not-aligned src to an aligned dst using shifts.
-	 * Handles 4 words per loop.
-	 */
-
-	depw,z src,28,2,t0
-	subi 32,t0,t0
-	mtsar t0
-	extru len,31,2,t0
-	cmpib,= 2,t0,.Lcase2
-	/* Make src aligned by rounding it down.  */
-	depi 0,31,2,src
-
-	cmpiclr,<> 3,t0,%r0
-	b,n .Lcase3
-	cmpiclr,<> 1,t0,%r0
-	b,n .Lcase1
-.Lcase0:
-	cmpb,COND(=) %r0,len,.Lcda_finish
-	nop
-
-1:	ldw,ma 4(srcspc,src), a3
-	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
-1:	ldw,ma 4(srcspc,src), a0
-	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
-	b,n .Ldo3
-.Lcase1:
-1:	ldw,ma 4(srcspc,src), a2
-	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
-1:	ldw,ma 4(srcspc,src), a3
-	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
-	ldo -1(len),len
-	cmpb,COND(=),n %r0,len,.Ldo0
-.Ldo4:
-1:	ldw,ma 4(srcspc,src), a0
-	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
-	shrpw a2, a3, %sar, t0
-1:	stw,ma t0, 4(dstspc,dst)
-	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
-.Ldo3:
-1:	ldw,ma 4(srcspc,src), a1
-	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
-	shrpw a3, a0, %sar, t0
-1:	stw,ma t0, 4(dstspc,dst)
-	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
-.Ldo2:
-1:	ldw,ma 4(srcspc,src), a2
-	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
-	shrpw a0, a1, %sar, t0
-1:	stw,ma t0, 4(dstspc,dst)
-	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
-.Ldo1:
-1:	ldw,ma 4(srcspc,src), a3
-	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
-	shrpw a1, a2, %sar, t0
-1:	stw,ma t0, 4(dstspc,dst)
-	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
-	ldo -4(len),len
-	cmpb,COND(<>) %r0,len,.Ldo4
-	nop
-.Ldo0:
-	shrpw a2, a3, %sar, t0
-1:	stw,ma t0, 4(dstspc,dst)
-	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
-
-.Lcda_rdfault:
-.Lcda_finish:
-	/* calculate new src, dst and len and jump to byte-copy loop */
-	sub	dst,save_dst,t0
-	add	save_src,t0,src
-	b	.Lbyte_loop
-	sub	save_len,t0,len
-
-.Lcase3:
-1:	ldw,ma 4(srcspc,src), a0
-	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
-1:	ldw,ma 4(srcspc,src), a1
-	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
-	b .Ldo2
-	ldo 1(len),len
-.Lcase2:
-1:	ldw,ma 4(srcspc,src), a1
-	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
-1:	ldw,ma 4(srcspc,src), a2
-	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
-	b .Ldo1
-	ldo 2(len),len
-
-
-	/* fault exception fixup handlers: */
-#ifdef CONFIG_64BIT
-.Lcopy16_fault:
-	b	.Lcopy_done
-10:	std,ma	t1,8(dstspc,dst)
-	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
-#endif
-
-.Lcopy8_fault:
-	b	.Lcopy_done
-10:	stw,ma	t1,4(dstspc,dst)
-	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
-ENDPROC_CFI(pa_memcpy)
-
-	.end
diff --git a/arch/parisc/lib/string.S b/arch/parisc/lib/string.S
deleted file mode 100644
index 4a64264427a6369113878cc01346a82b2b6d7156..0000000000000000000000000000000000000000
--- a/arch/parisc/lib/string.S
+++ /dev/null
@@ -1,136 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *    PA-RISC assembly string functions
- *
- *    Copyright (C) 2019 Helge Deller <deller@gmx.de>
- */
-
-#include <asm/assembly.h>
-#include <linux/linkage.h>
-
-	.section .text.hot
-	.level PA_ASM_LEVEL
-
-	t0 = r20
-	t1 = r21
-	t2 = r22
-
-ENTRY_CFI(strlen, frame=0,no_calls)
-	or,COND(<>) arg0,r0,ret0
-	b,l,n	.Lstrlen_null_ptr,r0
-	depwi	0,31,2,ret0
-	cmpb,COND(<>) arg0,ret0,.Lstrlen_not_aligned
-	ldw,ma	4(ret0),t0
-	cmpib,tr 0,r0,.Lstrlen_loop
-	uxor,nbz r0,t0,r0
-.Lstrlen_not_aligned:
-	uaddcm	arg0,ret0,t1
-	shladd	t1,3,r0,t1
-	mtsar	t1
-	depwi	-1,%sar,32,t0
-	uxor,nbz r0,t0,r0
-.Lstrlen_loop:
-	b,l,n	.Lstrlen_end_loop,r0
-	ldw,ma	4(ret0),t0
-	cmpib,tr 0,r0,.Lstrlen_loop
-	uxor,nbz r0,t0,r0
-.Lstrlen_end_loop:
-	extrw,u,<> t0,7,8,r0
-	addib,tr,n -3,ret0,.Lstrlen_out
-	extrw,u,<> t0,15,8,r0
-	addib,tr,n -2,ret0,.Lstrlen_out
-	extrw,u,<> t0,23,8,r0
-	addi	-1,ret0,ret0
-.Lstrlen_out:
-	bv r0(rp)
-	uaddcm ret0,arg0,ret0
-.Lstrlen_null_ptr:
-	bv,n r0(rp)
-ENDPROC_CFI(strlen)
-
-
-ENTRY_CFI(strcpy, frame=0,no_calls)
-	ldb	0(arg1),t0
-	stb	t0,0(arg0)
-	ldo	0(arg0),ret0
-	ldo	1(arg1),t1
-	cmpb,=	r0,t0,2f
-	ldo	1(arg0),t2
-1:	ldb	0(t1),arg1
-	stb	arg1,0(t2)
-	ldo	1(t1),t1
-	cmpb,<> r0,arg1,1b
-	ldo	1(t2),t2
-2:	bv,n	r0(rp)
-ENDPROC_CFI(strcpy)
-
-
-ENTRY_CFI(strncpy, frame=0,no_calls)
-	ldb	0(arg1),t0
-	stb	t0,0(arg0)
-	ldo	1(arg1),t1
-	ldo	0(arg0),ret0
-	cmpb,=	r0,t0,2f
-	ldo	1(arg0),arg1
-1:	ldo	-1(arg2),arg2
-	cmpb,COND(=),n r0,arg2,2f
-	ldb	0(t1),arg0
-	stb	arg0,0(arg1)
-	ldo	1(t1),t1
-	cmpb,<> r0,arg0,1b
-	ldo	1(arg1),arg1
-2:	bv,n	r0(rp)
-ENDPROC_CFI(strncpy)
-
-
-ENTRY_CFI(strcat, frame=0,no_calls)
-	ldb	0(arg0),t0
-	cmpb,=	t0,r0,2f
-	ldo	0(arg0),ret0
-	ldo	1(arg0),arg0
-1:	ldb	0(arg0),t1
-	cmpb,<>,n r0,t1,1b
-	ldo	1(arg0),arg0
-2:	ldb	0(arg1),t2
-	stb	t2,0(arg0)
-	ldo	1(arg0),arg0
-	ldb	0(arg1),t0
-	cmpb,<>	r0,t0,2b
-	ldo	1(arg1),arg1
-	bv,n	r0(rp)
-ENDPROC_CFI(strcat)
-
-
-ENTRY_CFI(memset, frame=0,no_calls)
-	copy	arg0,ret0
-	cmpb,COND(=) r0,arg0,4f
-	copy	arg0,t2
-	cmpb,COND(=) r0,arg2,4f
-	ldo	-1(arg2),arg3
-	subi	-1,arg3,t0
-	subi	0,t0,t1
-	cmpiclr,COND(>=) 0,t1,arg2
-	ldo	-1(t1),arg2
-	extru arg2,31,2,arg0
-2:	stb	arg1,0(t2)
-	ldo	1(t2),t2
-	addib,>= -1,arg0,2b
-	ldo	-1(arg3),arg3
-	cmpiclr,COND(<=) 4,arg2,r0
-	b,l,n	4f,r0
-#ifdef CONFIG_64BIT
-	depd,*	r0,63,2,arg2
-#else
-	depw	r0,31,2,arg2
-#endif
-	ldo	1(t2),t2
-3:	stb	arg1,-1(t2)
-	stb	arg1,0(t2)
-	stb	arg1,1(t2)
-	stb	arg1,2(t2)
-	addib,COND(>) -4,arg2,3b
-	ldo	4(t2),t2
-4:	bv,n	r0(rp)
-ENDPROC_CFI(memset)
-
-	.end
diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
deleted file mode 100644
index 92608f34d3123aa6f3bea24303039bfd816b3348..0000000000000000000000000000000000000000
--- a/arch/powerpc/boot/crt0.S
+++ /dev/null
@@ -1,299 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) Paul Mackerras 1997.
- *
- * Adapted for 64 bit LE PowerPC by Andrew Tauferner
- */
-
-#include "ppc_asm.h"
-
-RELA = 7
-RELACOUNT = 0x6ffffff9
-
-	.data
-	/* A procedure descriptor used when booting this as a COFF file.
-	 * When making COFF, this comes first in the link and we're
-	 * linked at 0x500000.
-	 */
-	.globl	_zimage_start_opd
-_zimage_start_opd:
-	.long	0x500000, 0, 0, 0
-	.text
-	b	_zimage_start
-
-#ifdef __powerpc64__
-.balign 8
-p_start:	.8byte	_start
-p_etext:	.8byte	_etext
-p_bss_start:	.8byte	__bss_start
-p_end:		.8byte	_end
-
-p_toc:		.8byte	__toc_start + 0x8000 - p_base
-p_dyn:		.8byte	__dynamic_start - p_base
-p_rela:		.8byte	__rela_dyn_start - p_base
-p_prom:		.8byte	0
-	.weak	_platform_stack_top
-p_pstack:	.8byte	_platform_stack_top
-#else
-p_start:	.long	_start
-p_etext:	.long	_etext
-p_bss_start:	.long	__bss_start
-p_end:		.long	_end
-
-	.weak	_platform_stack_top
-p_pstack:	.long	_platform_stack_top
-#endif
-
-	.globl	_zimage_start
-	/* Clang appears to require the .weak directive to be after the symbol
-	 * is defined. See https://bugs.llvm.org/show_bug.cgi?id=38921  */
-	.weak	_zimage_start
-_zimage_start:
-	.globl	_zimage_start_lib
-_zimage_start_lib:
-	/* Work out the offset between the address we were linked at
-	   and the address where we're running. */
-	bl	.+4
-p_base:	mflr	r10		/* r10 now points to runtime addr of p_base */
-#ifndef __powerpc64__
-	/* grab the link address of the dynamic section in r11 */
-	addis	r11,r10,(_GLOBAL_OFFSET_TABLE_-p_base)@ha
-	lwz	r11,(_GLOBAL_OFFSET_TABLE_-p_base)@l(r11)
-	cmpwi	r11,0
-	beq	3f		/* if not linked -pie */
-	/* get the runtime address of the dynamic section in r12 */
-	.weak	__dynamic_start
-	addis	r12,r10,(__dynamic_start-p_base)@ha
-	addi	r12,r12,(__dynamic_start-p_base)@l
-	subf	r11,r11,r12	/* runtime - linktime offset */
-
-	/* The dynamic section contains a series of tagged entries.
-	 * We need the RELA and RELACOUNT entries. */
-	li	r9,0
-	li	r0,0
-9:	lwz	r8,0(r12)	/* get tag */
-	cmpwi	r8,0
-	beq	10f		/* end of list */
-	cmpwi	r8,RELA
-	bne	11f
-	lwz	r9,4(r12)	/* get RELA pointer in r9 */
-	b	12f
-11:	addis	r8,r8,(-RELACOUNT)@ha
-	cmpwi	r8,RELACOUNT@l
-	bne	12f
-	lwz	r0,4(r12)	/* get RELACOUNT value in r0 */
-12:	addi	r12,r12,8
-	b	9b
-
-	/* The relocation section contains a list of relocations.
-	 * We now do the R_PPC_RELATIVE ones, which point to words
-	 * which need to be initialized with addend + offset.
-	 * The R_PPC_RELATIVE ones come first and there are RELACOUNT
-	 * of them. */
-10:	/* skip relocation if we don't have both */
-	cmpwi	r0,0
-	beq	3f
-	cmpwi	r9,0
-	beq	3f
-
-	add	r9,r9,r11	/* Relocate RELA pointer */
-	mtctr	r0
-2:	lbz	r0,4+3(r9)	/* ELF32_R_INFO(reloc->r_info) */
-	cmpwi	r0,22		/* R_PPC_RELATIVE */
-	bne	3f
-	lwz	r12,0(r9)	/* reloc->r_offset */
-	lwz	r0,8(r9)	/* reloc->r_addend */
-	add	r0,r0,r11
-	stwx	r0,r11,r12
-	addi	r9,r9,12
-	bdnz	2b
-
-	/* Do a cache flush for our text, in case the loader didn't */
-3:	lwz	r9,p_start-p_base(r10)	/* note: these are relocated now */
-	lwz	r8,p_etext-p_base(r10)
-4:	dcbf	r0,r9
-	icbi	r0,r9
-	addi	r9,r9,0x20
-	cmplw	cr0,r9,r8
-	blt	4b
-	sync
-	isync
-
-	/* Clear the BSS */
-	lwz	r9,p_bss_start-p_base(r10)
-	lwz	r8,p_end-p_base(r10)
-	li	r0,0
-5:	stw	r0,0(r9)
-	addi	r9,r9,4
-	cmplw	cr0,r9,r8
-	blt	5b
-
-	/* Possibly set up a custom stack */
-	lwz	r8,p_pstack-p_base(r10)
-	cmpwi	r8,0
-	beq	6f
-	lwz	r1,0(r8)
-	li	r0,0
-	stwu	r0,-16(r1)	/* establish a stack frame */
-6:
-#else /* __powerpc64__ */
-	/* Save the prom pointer at p_prom. */
-	std	r5,(p_prom-p_base)(r10)
-
-	/* Set r2 to the TOC. */
-	ld	r2,(p_toc-p_base)(r10)
-	add	r2,r2,r10
-
-	/* Grab the link address of the dynamic section in r11. */
-	ld	r11,-32768(r2)
-	cmpwi	r11,0
-	beq	3f              /* if not linked -pie then no dynamic section */
-
-	ld	r11,(p_dyn-p_base)(r10)
-	add	r11,r11,r10
-	ld	r9,(p_rela-p_base)(r10)
-	add	r9,r9,r10
-
-	li	r13,0
-	li	r8,0
-9:	ld	r12,0(r11)       /* get tag */
-	cmpdi	r12,0
-	beq	12f              /* end of list */
-	cmpdi	r12,RELA
-	bne	10f
-	ld	r13,8(r11)       /* get RELA pointer in r13 */
-	b	11f
-10:	addis	r12,r12,(-RELACOUNT)@ha
-	cmpdi	r12,RELACOUNT@l
-	bne	11f
-	ld	r8,8(r11)       /* get RELACOUNT value in r8 */
-11:	addi	r11,r11,16
-	b	9b
-12:
-	cmpdi	r13,0            /* check we have both RELA and RELACOUNT */
-	cmpdi	cr1,r8,0
-	beq	3f
-	beq	cr1,3f
-
-	/* Calcuate the runtime offset. */
-	subf	r13,r13,r9
-
-	/* Run through the list of relocations and process the
-	 * R_PPC64_RELATIVE ones. */
-	mtctr	r8
-13:	ld	r0,8(r9)        /* ELF64_R_TYPE(reloc->r_info) */
-	cmpdi	r0,22           /* R_PPC64_RELATIVE */
-	bne	3f
-	ld	r12,0(r9)        /* reloc->r_offset */
-	ld	r0,16(r9)       /* reloc->r_addend */
-	add	r0,r0,r13
-	stdx	r0,r13,r12
-	addi	r9,r9,24
-	bdnz	13b
-
-	/* Do a cache flush for our text, in case the loader didn't */
-3:	ld	r9,p_start-p_base(r10)	/* note: these are relocated now */
-	ld	r8,p_etext-p_base(r10)
-4:	dcbf	r0,r9
-	icbi	r0,r9
-	addi	r9,r9,0x20
-	cmpld	cr0,r9,r8
-	blt	4b
-	sync
-	isync
-
-	/* Clear the BSS */
-	ld	r9,p_bss_start-p_base(r10)
-	ld	r8,p_end-p_base(r10)
-	li	r0,0
-5:	std	r0,0(r9)
-	addi	r9,r9,8
-	cmpld	cr0,r9,r8
-	blt	5b
-
-	/* Possibly set up a custom stack */
-	ld	r8,p_pstack-p_base(r10)
-	cmpdi	r8,0
-	beq	6f
-	ld	r1,0(r8)
-	li	r0,0
-	stdu	r0,-112(r1)	/* establish a stack frame */
-6:
-#endif  /* __powerpc64__ */
-	/* Call platform_init() */
-	bl	platform_init
-
-	/* Call start */
-	b	start
-
-#ifdef __powerpc64__
-
-#define PROM_FRAME_SIZE 512
-#define SAVE_GPR(n, base)       std     n,8*(n)(base)
-#define REST_GPR(n, base)       ld      n,8*(n)(base)
-#define SAVE_2GPRS(n, base)     SAVE_GPR(n, base); SAVE_GPR(n+1, base)
-#define SAVE_4GPRS(n, base)     SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base)
-#define SAVE_8GPRS(n, base)     SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base)
-#define SAVE_10GPRS(n, base)    SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base)
-#define REST_2GPRS(n, base)     REST_GPR(n, base); REST_GPR(n+1, base)
-#define REST_4GPRS(n, base)     REST_2GPRS(n, base); REST_2GPRS(n+2, base)
-#define REST_8GPRS(n, base)     REST_4GPRS(n, base); REST_4GPRS(n+4, base)
-#define REST_10GPRS(n, base)    REST_8GPRS(n, base); REST_2GPRS(n+8, base)
-
-/* prom handles the jump into and return from firmware.  The prom args pointer
-   is loaded in r3. */
-.globl prom
-prom:
-	mflr	r0
-	std	r0,16(r1)
-	stdu	r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */
-
-	SAVE_GPR(2, r1)
-	SAVE_GPR(13, r1)
-	SAVE_8GPRS(14, r1)
-	SAVE_10GPRS(22, r1)
-	mfcr    r10
-	std     r10,8*32(r1)
-	mfmsr   r10
-	std     r10,8*33(r1)
-
-	/* remove MSR_LE from msr but keep MSR_SF */
-	mfmsr	r10
-	rldicr	r10,r10,0,62
-	mtsrr1	r10
-
-	/* Load FW address, set LR to label 1, and jump to FW */
-	bl	0f
-0:	mflr	r10
-	addi	r11,r10,(1f-0b)
-	mtlr	r11
-
-	ld	r10,(p_prom-0b)(r10)
-	mtsrr0	r10
-
-	rfid
-
-1:	/* Return from OF */
-	FIXUP_ENDIAN
-
-	/* Restore registers and return. */
-	rldicl  r1,r1,0,32
-
-	/* Restore the MSR (back to 64 bits) */
-	ld      r10,8*(33)(r1)
-	mtmsr	r10
-	isync
-
-	/* Restore other registers */
-	REST_GPR(2, r1)
-	REST_GPR(13, r1)
-	REST_8GPRS(14, r1)
-	REST_10GPRS(22, r1)
-	ld      r10,8*32(r1)
-	mtcr	r10
-
-	addi    r1,r1,PROM_FRAME_SIZE
-	ld      r0,16(r1)
-	mtlr    r0
-	blr
-#endif
diff --git a/arch/powerpc/boot/crtsavres.S b/arch/powerpc/boot/crtsavres.S
deleted file mode 100644
index 085fb2b9a8b89223750dad5befa78e3ebfa73c88..0000000000000000000000000000000000000000
--- a/arch/powerpc/boot/crtsavres.S
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Special support for eabi and SVR4
- *
- *   Copyright (C) 1995, 1996, 1998, 2000, 2001 Free Software Foundation, Inc.
- *   Copyright 2008 Freescale Semiconductor, Inc.
- *   Written By Michael Meissner
- *
- * Based on gcc/config/rs6000/crtsavres.asm from gcc
- *
- * This file is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * In addition to the permissions in the GNU General Public License, the
- * Free Software Foundation gives you unlimited permission to link the
- * compiled version of this file with other programs, and to distribute
- * those programs without any restriction coming from the use of this
- * file.  (The General Public License restrictions do apply in other
- * respects; for example, they cover modification of the file, and
- * distribution when not linked into another program.)
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; see the file COPYING.  If not, write to
- * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- *    As a special exception, if you link this library with files
- *    compiled with GCC to produce an executable, this does not cause
- *    the resulting executable to be covered by the GNU General Public License.
- *    This exception does not however invalidate any other reasons why
- *    the executable file might be covered by the GNU General Public License.
- */
-
-#ifdef __powerpc64__
-#error "On PPC64, FPR save/restore functions are provided by the linker."
-#endif
-
-	.file	"crtsavres.S"
-	.section ".text"
-
-#define _GLOBAL(name) \
-	.type name,@function; \
-	.globl name; \
-name:
-
-/* Routines for saving integer registers, called by the compiler.  */
-/* Called with r11 pointing to the stack header word of the caller of the */
-/* function, just beyond the end of the integer save area.  */
-
-_GLOBAL(_savegpr_14)
-_GLOBAL(_save32gpr_14)
-	stw	14,-72(11)	/* save gp registers */
-_GLOBAL(_savegpr_15)
-_GLOBAL(_save32gpr_15)
-	stw	15,-68(11)
-_GLOBAL(_savegpr_16)
-_GLOBAL(_save32gpr_16)
-	stw	16,-64(11)
-_GLOBAL(_savegpr_17)
-_GLOBAL(_save32gpr_17)
-	stw	17,-60(11)
-_GLOBAL(_savegpr_18)
-_GLOBAL(_save32gpr_18)
-	stw	18,-56(11)
-_GLOBAL(_savegpr_19)
-_GLOBAL(_save32gpr_19)
-	stw	19,-52(11)
-_GLOBAL(_savegpr_20)
-_GLOBAL(_save32gpr_20)
-	stw	20,-48(11)
-_GLOBAL(_savegpr_21)
-_GLOBAL(_save32gpr_21)
-	stw	21,-44(11)
-_GLOBAL(_savegpr_22)
-_GLOBAL(_save32gpr_22)
-	stw	22,-40(11)
-_GLOBAL(_savegpr_23)
-_GLOBAL(_save32gpr_23)
-	stw	23,-36(11)
-_GLOBAL(_savegpr_24)
-_GLOBAL(_save32gpr_24)
-	stw	24,-32(11)
-_GLOBAL(_savegpr_25)
-_GLOBAL(_save32gpr_25)
-	stw	25,-28(11)
-_GLOBAL(_savegpr_26)
-_GLOBAL(_save32gpr_26)
-	stw	26,-24(11)
-_GLOBAL(_savegpr_27)
-_GLOBAL(_save32gpr_27)
-	stw	27,-20(11)
-_GLOBAL(_savegpr_28)
-_GLOBAL(_save32gpr_28)
-	stw	28,-16(11)
-_GLOBAL(_savegpr_29)
-_GLOBAL(_save32gpr_29)
-	stw	29,-12(11)
-_GLOBAL(_savegpr_30)
-_GLOBAL(_save32gpr_30)
-	stw	30,-8(11)
-_GLOBAL(_savegpr_31)
-_GLOBAL(_save32gpr_31)
-	stw	31,-4(11)
-	blr
-
-/* Routines for restoring integer registers, called by the compiler.  */
-/* Called with r11 pointing to the stack header word of the caller of the */
-/* function, just beyond the end of the integer restore area.  */
-
-_GLOBAL(_restgpr_14)
-_GLOBAL(_rest32gpr_14)
-	lwz	14,-72(11)	/* restore gp registers */
-_GLOBAL(_restgpr_15)
-_GLOBAL(_rest32gpr_15)
-	lwz	15,-68(11)
-_GLOBAL(_restgpr_16)
-_GLOBAL(_rest32gpr_16)
-	lwz	16,-64(11)
-_GLOBAL(_restgpr_17)
-_GLOBAL(_rest32gpr_17)
-	lwz	17,-60(11)
-_GLOBAL(_restgpr_18)
-_GLOBAL(_rest32gpr_18)
-	lwz	18,-56(11)
-_GLOBAL(_restgpr_19)
-_GLOBAL(_rest32gpr_19)
-	lwz	19,-52(11)
-_GLOBAL(_restgpr_20)
-_GLOBAL(_rest32gpr_20)
-	lwz	20,-48(11)
-_GLOBAL(_restgpr_21)
-_GLOBAL(_rest32gpr_21)
-	lwz	21,-44(11)
-_GLOBAL(_restgpr_22)
-_GLOBAL(_rest32gpr_22)
-	lwz	22,-40(11)
-_GLOBAL(_restgpr_23)
-_GLOBAL(_rest32gpr_23)
-	lwz	23,-36(11)
-_GLOBAL(_restgpr_24)
-_GLOBAL(_rest32gpr_24)
-	lwz	24,-32(11)
-_GLOBAL(_restgpr_25)
-_GLOBAL(_rest32gpr_25)
-	lwz	25,-28(11)
-_GLOBAL(_restgpr_26)
-_GLOBAL(_rest32gpr_26)
-	lwz	26,-24(11)
-_GLOBAL(_restgpr_27)
-_GLOBAL(_rest32gpr_27)
-	lwz	27,-20(11)
-_GLOBAL(_restgpr_28)
-_GLOBAL(_rest32gpr_28)
-	lwz	28,-16(11)
-_GLOBAL(_restgpr_29)
-_GLOBAL(_rest32gpr_29)
-	lwz	29,-12(11)
-_GLOBAL(_restgpr_30)
-_GLOBAL(_rest32gpr_30)
-	lwz	30,-8(11)
-_GLOBAL(_restgpr_31)
-_GLOBAL(_rest32gpr_31)
-	lwz	31,-4(11)
-	blr
-
-/* Routines for restoring integer registers, called by the compiler.  */
-/* Called with r11 pointing to the stack header word of the caller of the */
-/* function, just beyond the end of the integer restore area.  */
-
-_GLOBAL(_restgpr_14_x)
-_GLOBAL(_rest32gpr_14_x)
-	lwz	14,-72(11)	/* restore gp registers */
-_GLOBAL(_restgpr_15_x)
-_GLOBAL(_rest32gpr_15_x)
-	lwz	15,-68(11)
-_GLOBAL(_restgpr_16_x)
-_GLOBAL(_rest32gpr_16_x)
-	lwz	16,-64(11)
-_GLOBAL(_restgpr_17_x)
-_GLOBAL(_rest32gpr_17_x)
-	lwz	17,-60(11)
-_GLOBAL(_restgpr_18_x)
-_GLOBAL(_rest32gpr_18_x)
-	lwz	18,-56(11)
-_GLOBAL(_restgpr_19_x)
-_GLOBAL(_rest32gpr_19_x)
-	lwz	19,-52(11)
-_GLOBAL(_restgpr_20_x)
-_GLOBAL(_rest32gpr_20_x)
-	lwz	20,-48(11)
-_GLOBAL(_restgpr_21_x)
-_GLOBAL(_rest32gpr_21_x)
-	lwz	21,-44(11)
-_GLOBAL(_restgpr_22_x)
-_GLOBAL(_rest32gpr_22_x)
-	lwz	22,-40(11)
-_GLOBAL(_restgpr_23_x)
-_GLOBAL(_rest32gpr_23_x)
-	lwz	23,-36(11)
-_GLOBAL(_restgpr_24_x)
-_GLOBAL(_rest32gpr_24_x)
-	lwz	24,-32(11)
-_GLOBAL(_restgpr_25_x)
-_GLOBAL(_rest32gpr_25_x)
-	lwz	25,-28(11)
-_GLOBAL(_restgpr_26_x)
-_GLOBAL(_rest32gpr_26_x)
-	lwz	26,-24(11)
-_GLOBAL(_restgpr_27_x)
-_GLOBAL(_rest32gpr_27_x)
-	lwz	27,-20(11)
-_GLOBAL(_restgpr_28_x)
-_GLOBAL(_rest32gpr_28_x)
-	lwz	28,-16(11)
-_GLOBAL(_restgpr_29_x)
-_GLOBAL(_rest32gpr_29_x)
-	lwz	29,-12(11)
-_GLOBAL(_restgpr_30_x)
-_GLOBAL(_rest32gpr_30_x)
-	lwz	30,-8(11)
-_GLOBAL(_restgpr_31_x)
-_GLOBAL(_rest32gpr_31_x)
-	lwz	0,4(11)
-	lwz	31,-4(11)
-	mtlr	0
-	mr	1,11
-	blr
diff --git a/arch/powerpc/boot/div64.S b/arch/powerpc/boot/div64.S
deleted file mode 100644
index 4354928ed62e1e632fb06cbd94f87db7007676d8..0000000000000000000000000000000000000000
--- a/arch/powerpc/boot/div64.S
+++ /dev/null
@@ -1,107 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Divide a 64-bit unsigned number by a 32-bit unsigned number.
- * This routine assumes that the top 32 bits of the dividend are
- * non-zero to start with.
- * On entry, r3 points to the dividend, which get overwritten with
- * the 64-bit quotient, and r4 contains the divisor.
- * On exit, r3 contains the remainder.
- *
- * Copyright (C) 2002 Paul Mackerras, IBM Corp.
- */
-#include "ppc_asm.h"
-
-	.globl __div64_32
-__div64_32:
-	lwz	r5,0(r3)	# get the dividend into r5/r6
-	lwz	r6,4(r3)
-	cmplw	r5,r4
-	li	r7,0
-	li	r8,0
-	blt	1f
-	divwu	r7,r5,r4	# if dividend.hi >= divisor,
-	mullw	r0,r7,r4	# quotient.hi = dividend.hi / divisor
-	subf.	r5,r0,r5	# dividend.hi %= divisor
-	beq	3f
-1:	mr	r11,r5		# here dividend.hi != 0
-	andis.	r0,r5,0xc000
-	bne	2f
-	cntlzw	r0,r5		# we are shifting the dividend right
-	li	r10,-1		# to make it < 2^32, and shifting
-	srw	r10,r10,r0	# the divisor right the same amount,
-	addc	r9,r4,r10	# rounding up (so the estimate cannot
-	andc	r11,r6,r10	# ever be too large, only too small)
-	andc	r9,r9,r10
-	addze	r9,r9
-	or	r11,r5,r11
-	rotlw	r9,r9,r0
-	rotlw	r11,r11,r0
-	divwu	r11,r11,r9	# then we divide the shifted quantities
-2:	mullw	r10,r11,r4	# to get an estimate of the quotient,
-	mulhwu	r9,r11,r4	# multiply the estimate by the divisor,
-	subfc	r6,r10,r6	# take the product from the divisor,
-	add	r8,r8,r11	# and add the estimate to the accumulated
-	subfe.	r5,r9,r5	# quotient
-	bne	1b
-3:	cmplw	r6,r4
-	blt	4f
-	divwu	r0,r6,r4	# perform the remaining 32-bit division
-	mullw	r10,r0,r4	# and get the remainder
-	add	r8,r8,r0
-	subf	r6,r10,r6
-4:	stw	r7,0(r3)	# return the quotient in *r3
-	stw	r8,4(r3)
-	mr	r3,r6		# return the remainder in r3
-	blr
-
-/*
- * Extended precision shifts.
- *
- * Updated to be valid for shift counts from 0 to 63 inclusive.
- * -- Gabriel
- *
- * R3/R4 has 64 bit value
- * R5    has shift count
- * result in R3/R4
- *
- *  ashrdi3: arithmetic right shift (sign propagation)	
- *  lshrdi3: logical right shift
- *  ashldi3: left shift
- */
-	.globl __ashrdi3
-__ashrdi3:
-	subfic	r6,r5,32
-	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
-	addi	r7,r5,32	# could be xori, or addi with -32
-	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
-	rlwinm	r8,r7,0,32	# t3 = (count < 32) ? 32 : 0
-	sraw	r7,r3,r7	# t2 = MSW >> (count-32)
-	or	r4,r4,r6	# LSW |= t1
-	slw	r7,r7,r8	# t2 = (count < 32) ? 0 : t2
-	sraw	r3,r3,r5	# MSW = MSW >> count
-	or	r4,r4,r7	# LSW |= t2
-	blr
-
-	.globl __ashldi3
-__ashldi3:
-	subfic	r6,r5,32
-	slw	r3,r3,r5	# MSW = count > 31 ? 0 : MSW << count
-	addi	r7,r5,32	# could be xori, or addi with -32
-	srw	r6,r4,r6	# t1 = count > 31 ? 0 : LSW >> (32-count)
-	slw	r7,r4,r7	# t2 = count < 32 ? 0 : LSW << (count-32)
-	or	r3,r3,r6	# MSW |= t1
-	slw	r4,r4,r5	# LSW = LSW << count
-	or	r3,r3,r7	# MSW |= t2
-	blr
-
-	.globl __lshrdi3
-__lshrdi3:
-	subfic	r6,r5,32
-	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
-	addi	r7,r5,32	# could be xori, or addi with -32
-	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
-	srw	r7,r3,r7	# t2 = count < 32 ? 0 : MSW >> (count-32)
-	or	r4,r4,r6	# LSW |= t1
-	srw	r3,r3,r5	# MSW = MSW >> count
-	or	r4,r4,r7	# LSW |= t2
-	blr
diff --git a/arch/powerpc/boot/fixed-head.S b/arch/powerpc/boot/fixed-head.S
deleted file mode 100644
index 4346c750cac17b5bf88acde9e7339a3537a81319..0000000000000000000000000000000000000000
--- a/arch/powerpc/boot/fixed-head.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.text
-	.global _zimage_start
-_zimage_start:
-	b	_zimage_start_lib
diff --git a/arch/powerpc/boot/gamecube-head.S b/arch/powerpc/boot/gamecube-head.S
deleted file mode 100644
index ccf5f1045e4abdf940a7ce6b18531e090657e4a4..0000000000000000000000000000000000000000
--- a/arch/powerpc/boot/gamecube-head.S
+++ /dev/null
@@ -1,106 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * arch/powerpc/boot/gamecube-head.S
- *
- * Nintendo GameCube bootwrapper entry.
- * Copyright (C) 2004-2009 The GameCube Linux Team
- * Copyright (C) 2008,2009 Albert Herranz
- */
-
-#include "ppc_asm.h"
-
-/*
- * The entry code does no assumptions regarding:
- * - if the data and instruction caches are enabled or not
- * - if the MMU is enabled or not
- *
- * We enable the caches if not already enabled, enable the MMU with an
- * identity mapping scheme and jump to the start code.
- */
-
-	.text
-
-	.globl _zimage_start
-_zimage_start:
-
-	/* turn the MMU off */
-	mfmsr	9
-	rlwinm	9, 9, 0, ~((1<<4)|(1<<5)) /* MSR_DR|MSR_IR */
-	bcl	20, 31, 1f
-1:
-	mflr	8
-	clrlwi	8, 8, 3		/* convert to a real address */
-	addi	8, 8, _mmu_off - 1b
-	mtsrr0	8
-	mtsrr1	9
-	rfi
-_mmu_off:
-	/* MMU disabled */
-
-	/* setup BATs */
-	isync
-	li      8, 0
-	mtspr	0x210, 8	/* IBAT0U */
-	mtspr	0x212, 8	/* IBAT1U */
-	mtspr	0x214, 8	/* IBAT2U */
-	mtspr	0x216, 8	/* IBAT3U */
-	mtspr	0x218, 8	/* DBAT0U */
-	mtspr	0x21a, 8	/* DBAT1U */
-	mtspr	0x21c, 8	/* DBAT2U */
-	mtspr	0x21e, 8	/* DBAT3U */
-
-	li	8, 0x01ff	/* first 16MiB */
-	li	9, 0x0002	/* rw */
-	mtspr	0x211, 9	/* IBAT0L */
-	mtspr	0x210, 8	/* IBAT0U */
-	mtspr	0x219, 9	/* DBAT0L */
-	mtspr	0x218, 8	/* DBAT0U */
-
-	lis	8, 0x0c00	/* I/O mem */
-	ori	8, 8, 0x3ff	/* 32MiB */
-	lis	9, 0x0c00
-	ori	9, 9, 0x002a	/* uncached, guarded, rw */
-	mtspr	0x21b, 9	/* DBAT1L */
-	mtspr	0x21a, 8	/* DBAT1U */
-
-	lis	8, 0x0100	/* next 8MiB */
-	ori	8, 8, 0x00ff	/* 8MiB */
-	lis	9, 0x0100
-	ori	9, 9, 0x0002	/* rw */
-	mtspr	0x215, 9	/* IBAT2L */
-	mtspr	0x214, 8	/* IBAT2U */
-	mtspr	0x21d, 9	/* DBAT2L */
-	mtspr	0x21c, 8	/* DBAT2U */
-
-	/* enable and invalidate the caches if not already enabled */
-	mfspr	8, 0x3f0	/* HID0 */
-	andi.	0, 8, (1<<15)		/* HID0_ICE */
-	bne	1f
-	ori	8, 8, (1<<15)|(1<<11)	/* HID0_ICE|HID0_ICFI*/
-1:
-	andi.	0, 8, (1<<14)		/* HID0_DCE */
-	bne	1f
-	ori	8, 8, (1<<14)|(1<<10)	/* HID0_DCE|HID0_DCFI*/
-1:
-	mtspr	0x3f0, 8	/* HID0 */
-	isync
-
-	/* initialize arguments */
-	li	3, 0
-	li	4, 0
-	li	5, 0
-
-	/* turn the MMU on */
-	bcl	20, 31, 1f
-1:
-	mflr	8
-	addi	8, 8, _mmu_on - 1b
-	mfmsr	9
-	ori	9, 9, (1<<4)|(1<<5) /* MSR_DR|MSR_IR */
-	mtsrr0	8
-	mtsrr1	9
-	sync
-	rfi
-_mmu_on:
-	b _zimage_start_lib
-
diff --git a/arch/powerpc/boot/motload-head.S b/arch/powerpc/boot/motload-head.S
deleted file mode 100644
index 826dad0c19d97200e891221a7261cf0bf9072899..0000000000000000000000000000000000000000
--- a/arch/powerpc/boot/motload-head.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include "ppc_asm.h"
-
-	.text
-	.globl _zimage_start
-_zimage_start:
-	mfmsr   r10
-	rlwinm  r10,r10,0,~(1<<15)        /* Clear MSR_EE */
-	sync
-	mtmsr   r10
-	isync
-	b	_zimage_start_lib
diff --git a/arch/powerpc/boot/opal-calls.S b/arch/powerpc/boot/opal-calls.S
deleted file mode 100644
index ad0e15d930c4e759fcb87e1db2887071a143d6c4..0000000000000000000000000000000000000000
--- a/arch/powerpc/boot/opal-calls.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2016 IBM Corporation.
- */
-
-#include "ppc_asm.h"
-#include "../include/asm/opal-api.h"
-
-	.text
-
-	.globl opal_kentry
-opal_kentry:
-	/* r3 is the fdt ptr */
-	mtctr r4
-	li	r4, 0
-	li	r5, 0
-	li	r6, 0
-	li	r7, 0
-	ld	r11,opal@got(r2)
-	ld	r8,0(r11)
-	ld	r9,8(r11)
-	bctr
-
-#define OPAL_CALL(name, token)				\
-	.globl name;					\
-name:							\
-	li	r0, token;				\
-	b	opal_call;
-
-opal_call:
-	mflr	r11
-	std	r11,16(r1)
-	mfcr	r12
-	stw	r12,8(r1)
-	mr	r13,r2
-
-	/* Set opal return address */
-	ld	r11,opal_return@got(r2)
-	mtlr	r11
-	mfmsr	r12
-
-	/* switch to BE when we enter OPAL */
-	li	r11,MSR_LE
-	andc	r12,r12,r11
-	mtspr	SPRN_HSRR1,r12
-
-	/* load the opal call entry point and base */
-	ld	r11,opal@got(r2)
-	ld	r12,8(r11)
-	ld	r2,0(r11)
-	mtspr	SPRN_HSRR0,r12
-	hrfid
-
-opal_return:
-	FIXUP_ENDIAN
-	mr	r2,r13;
-	lwz	r11,8(r1);
-	ld	r12,16(r1)
-	mtcr	r11;
-	mtlr	r12
-	blr
-
-OPAL_CALL(opal_console_write,			OPAL_CONSOLE_WRITE);
-OPAL_CALL(opal_console_read,			OPAL_CONSOLE_READ);
-OPAL_CALL(opal_console_write_buffer_space,	OPAL_CONSOLE_WRITE_BUFFER_SPACE);
-OPAL_CALL(opal_poll_events,			OPAL_POLL_EVENTS);
-OPAL_CALL(opal_console_flush,			OPAL_CONSOLE_FLUSH);
diff --git a/arch/powerpc/boot/ps3-head.S b/arch/powerpc/boot/ps3-head.S
deleted file mode 100644
index 0a4ebfcc39491c828bd98da32e12f2d88c20cf46..0000000000000000000000000000000000000000
--- a/arch/powerpc/boot/ps3-head.S
+++ /dev/null
@@ -1,72 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  PS3 bootwrapper entry.
- *
- *  Copyright (C) 2007 Sony Computer Entertainment Inc.
- *  Copyright 2007 Sony Corp.
- */
-
-#include "ppc_asm.h"
-
-	.machine "ppc64"
-
-	.text
-
-/*
- * __system_reset_overlay - The PS3 first stage entry.
- *
- * The bootwraper build script copies the 512 bytes at symbol
- * __system_reset_overlay to offset 0x100 of the rom image.  This symbol
- * must occupy 512 or less bytes.
- *
- * The PS3 has a single processor with two threads.
- */
-
-	.globl __system_reset_overlay
-__system_reset_overlay:
-
-	/* Switch to 32-bit mode. */
-
-	mfmsr	r9
-	clrldi	r9,r9,1
-	mtmsrd	r9
-	nop
-
-	/* Get thread number in r3 and branch. */
-
-	mfspr	r3, 0x88
-	cntlzw.	r3, r3
-	beq	1f
-
-	/* Secondary goes to __secondary_hold in kernel. */
-
-	li	r4, 0x60
-	mtctr	r4
-	bctr
-
-1:
-	/* Primary delays then goes to _zimage_start in wrapper. */
-
-	or	31, 31, 31 /* db16cyc */
-	or	31, 31, 31 /* db16cyc */
-
-	lis	r4, _zimage_start@ha
-	addi	r4, r4, _zimage_start@l
-	mtctr	r4
-	bctr
-
-	. = __system_reset_overlay + 512
-
-/*
- * __system_reset_kernel - Place holder for the kernel reset vector.
- *
- * The bootwrapper build script copies 512 bytes from offset 0x100
- * of the rom image to the symbol __system_reset_kernel.  At runtime
- * the bootwrapper program copies the 512 bytes at __system_reset_kernel
- * to ram address 0x100.  This symbol must occupy 512 bytes.
- */
-
-	.globl __system_reset_kernel
-__system_reset_kernel:
-
-	. = __system_reset_kernel + 512
diff --git a/arch/powerpc/boot/ps3-hvcall.S b/arch/powerpc/boot/ps3-hvcall.S
deleted file mode 100644
index ff74102e8a7161d90c551f11e21f66eba2eba1be..0000000000000000000000000000000000000000
--- a/arch/powerpc/boot/ps3-hvcall.S
+++ /dev/null
@@ -1,174 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  PS3 bootwrapper hvcalls.
- *
- *  Copyright (C) 2007 Sony Computer Entertainment Inc.
- *  Copyright 2007 Sony Corp.
- */
-
-#include "ppc_asm.h"
-
-	.machine "ppc64"
-
-/*
- * The PS3 hypervisor uses a 64 bit "C" language calling convention.
- * The routines here marshal arguments between the 32 bit wrapper
- * program and the 64 bit hvcalls.
- *
- *  wrapper           lv1
- *  32-bit (h,l)      64-bit
- *
- *  1: r3,r4          <-> r3
- *  2: r5,r6          <-> r4
- *  3: r7,r8          <-> r5
- *  4: r9,r10         <-> r6
- *  5: 8(r1),12(r1)   <-> r7
- *  6: 16(r1),20(r1)  <-> r8
- *  7: 24(r1),28(r1)  <-> r9
- *  8: 32(r1),36(r1)  <-> r10
- *
- */
-
-.macro GLOBAL name
-	.section ".text"
-	.balign 4
-	.globl \name
-\name:
-.endm
-
-.macro NO_SUPPORT name
-	GLOBAL \name
-	b ps3_no_support
-.endm
-
-.macro HVCALL num
-	li r11, \num
-	.long 0x44000022
-	extsw r3, r3
-.endm
-
-.macro SAVE_LR offset=4
-	mflr r0
-	stw r0, \offset(r1)
-.endm
-
-.macro LOAD_LR offset=4
-	lwz r0, \offset(r1)
-	mtlr r0
-.endm
-
-.macro LOAD_64_REG target,high,low
-	sldi r11, \high, 32
-	or \target, r11, \low
-.endm
-
-.macro LOAD_64_STACK target,offset
-	ld \target, \offset(r1)
-.endm
-
-.macro LOAD_R3
-	LOAD_64_REG r3,r3,r4
-.endm
-
-.macro LOAD_R4
-	LOAD_64_REG r4,r5,r6
-.endm
-
-.macro LOAD_R5
-	LOAD_64_REG r5,r7,r8
-.endm
-
-.macro LOAD_R6
-	LOAD_64_REG r6,r9,r10
-.endm
-
-.macro LOAD_R7
-	LOAD_64_STACK r7,8
-.endm
-
-.macro LOAD_R8
-	LOAD_64_STACK r8,16
-.endm
-
-.macro LOAD_R9
-	LOAD_64_STACK r9,24
-.endm
-
-.macro LOAD_R10
-	LOAD_64_STACK r10,32
-.endm
-
-.macro LOAD_REGS_0
-	stwu 1,-16(1)
-	stw 3, 8(1)
-.endm
-
-.macro LOAD_REGS_5
-	LOAD_R3
-	LOAD_R4
-	LOAD_R5
-	LOAD_R6
-	LOAD_R7
-.endm
-
-.macro LOAD_REGS_6
-	LOAD_REGS_5
-	LOAD_R8
-.endm
-
-.macro LOAD_REGS_8
-	LOAD_REGS_6
-	LOAD_R9
-	LOAD_R10
-.endm
-
-.macro STORE_REGS_0_1
-	lwz r11, 8(r1)
-	std r4, 0(r11)
-	mr r4, r3
-	li r3, 0
-	addi r1,r1,16
-.endm
-
-.macro STORE_REGS_5_2
-	lwz r11, 16(r1)
-	std r4, 0(r11)
-	lwz r11, 20(r1)
-	std r5, 0(r11)
-.endm
-
-.macro STORE_REGS_6_1
-	lwz r11, 24(r1)
-	std r4, 0(r11)
-.endm
-
-GLOBAL lv1_get_logical_ppe_id
-	SAVE_LR
-	LOAD_REGS_0
-	HVCALL 69
-	STORE_REGS_0_1
-	LOAD_LR
-	blr
-
-GLOBAL lv1_get_logical_partition_id
-	SAVE_LR
-	LOAD_REGS_0
-	HVCALL 74
-	STORE_REGS_0_1
-	LOAD_LR
-	blr
-
-GLOBAL lv1_get_repository_node_value
-	SAVE_LR
-	LOAD_REGS_5
-	HVCALL 91
-	STORE_REGS_5_2
-	LOAD_LR
-	blr
-
-GLOBAL lv1_panic
-	SAVE_LR
-	LOAD_REGS_8
-	HVCALL 255
-	LOAD_LR
-	blr
diff --git a/arch/powerpc/boot/pseries-head.S b/arch/powerpc/boot/pseries-head.S
deleted file mode 100644
index 1b1a638ce6e845d363f55f7ac36d51679719e6dc..0000000000000000000000000000000000000000
--- a/arch/powerpc/boot/pseries-head.S
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include "ppc_asm.h"
-
-	.text
-
-	.globl _zimage_start
-_zimage_start:
-	FIXUP_ENDIAN
-	b _zimage_start_lib
diff --git a/arch/powerpc/boot/string.S b/arch/powerpc/boot/string.S
deleted file mode 100644
index d2a2dbf1eefc3d781d441d17d8160c8b96449093..0000000000000000000000000000000000000000
--- a/arch/powerpc/boot/string.S
+++ /dev/null
@@ -1,265 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) Paul Mackerras 1997.
- *
- * NOTE: this code runs in 32 bit mode and is packaged as ELF32.
- */
-
-#include "ppc_asm.h"
-
-	.text
-	.globl	strcpy
-strcpy:
-	addi	r5,r3,-1
-	addi	r4,r4,-1
-1:	lbzu	r0,1(r4)
-	cmpwi	0,r0,0
-	stbu	r0,1(r5)
-	bne	1b
-	blr
-
-	.globl	strncpy
-strncpy:
-	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r6,r3,-1
-	addi	r4,r4,-1
-1:	lbzu	r0,1(r4)
-	cmpwi	0,r0,0
-	stbu	r0,1(r6)
-	bdnzf	2,1b		/* dec ctr, branch if ctr != 0 && !cr0.eq */
-	blr
-
-	.globl	strcat
-strcat:
-	addi	r5,r3,-1
-	addi	r4,r4,-1
-1:	lbzu	r0,1(r5)
-	cmpwi	0,r0,0
-	bne	1b
-	addi	r5,r5,-1
-1:	lbzu	r0,1(r4)
-	cmpwi	0,r0,0
-	stbu	r0,1(r5)
-	bne	1b
-	blr
-
-	.globl	strchr
-strchr:
-	addi	r3,r3,-1
-1:	lbzu	r0,1(r3)
-	cmpw	0,r0,r4
-	beqlr
-	cmpwi	0,r0,0
-	bne	1b
-	li	r3,0
-	blr
-
-	.globl	strcmp
-strcmp:
-	addi	r5,r3,-1
-	addi	r4,r4,-1
-1:	lbzu	r3,1(r5)
-	cmpwi	1,r3,0
-	lbzu	r0,1(r4)
-	subf.	r3,r0,r3
-	beqlr	1
-	beq	1b
-	blr
-
-	.globl	strncmp
-strncmp:
-	mtctr	r5
-	addi	r5,r3,-1
-	addi	r4,r4,-1
-1:	lbzu	r3,1(r5)
-	cmpwi	1,r3,0
-	lbzu	r0,1(r4)
-	subf.	r3,r0,r3
-	beqlr	1
-	bdnzt	eq,1b
-	blr
-
-	.globl	strlen
-strlen:
-	addi	r4,r3,-1
-1:	lbzu	r0,1(r4)
-	cmpwi	0,r0,0
-	bne	1b
-	subf	r3,r3,r4
-	blr
-
-	.globl	memset
-memset:
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
-	addi	r6,r3,-4
-	cmplwi	0,r5,4
-	blt	7f
-	stwu	r4,4(r6)
-	beqlr
-	andi.	r0,r6,3
-	add	r5,r0,r5
-	subf	r6,r0,r6
-	rlwinm	r0,r5,32-2,2,31
-	mtctr	r0
-	bdz	6f
-1:	stwu	r4,4(r6)
-	bdnz	1b
-6:	andi.	r5,r5,3
-7:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r6,r6,3
-8:	stbu	r4,1(r6)
-	bdnz	8b
-	blr
-
-	.globl	memmove
-memmove:
-	cmplw	0,r3,r4
-	bgt	backwards_memcpy
-	/* fall through */
-
-	.globl	memcpy
-memcpy:
-	rlwinm.	r7,r5,32-3,3,31		/* r7 = r5 >> 3 */
-	addi	r6,r3,-4
-	addi	r4,r4,-4
-	beq	3f			/* if less than 8 bytes to do */
-	andi.	r0,r6,3			/* get dest word aligned */
-	mtctr	r7
-	bne	5f
-	andi.	r0,r4,3			/* check src word aligned too */
-	bne	3f
-1:	lwz	r7,4(r4)
-	lwzu	r8,8(r4)
-	stw	r7,4(r6)
-	stwu	r8,8(r6)
-	bdnz	1b
-	andi.	r5,r5,7
-2:	cmplwi	0,r5,4
-	blt	3f
-	lwzu	r0,4(r4)
-	addi	r5,r5,-4
-	stwu	r0,4(r6)
-3:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r4,r4,3
-	addi	r6,r6,3
-4:	lbzu	r0,1(r4)
-	stbu	r0,1(r6)
-	bdnz	4b
-	blr
-5:	subfic	r0,r0,4
-	cmpw	cr1,r0,r5
-	add	r7,r0,r4
-	andi.	r7,r7,3			/* will source be word-aligned too? */
-	ble	cr1,3b
-	bne	3b			/* do byte-by-byte if not */
-	mtctr	r0
-6:	lbz	r7,4(r4)
-	addi	r4,r4,1
-	stb	r7,4(r6)
-	addi	r6,r6,1
-	bdnz	6b
-	subf	r5,r0,r5
-	rlwinm.	r7,r5,32-3,3,31
-	beq	2b
-	mtctr	r7
-	b	1b
-
-	.globl	backwards_memcpy
-backwards_memcpy:
-	rlwinm.	r7,r5,32-3,3,31		/* r7 = r5 >> 3 */
-	add	r6,r3,r5
-	add	r4,r4,r5
-	beq	3f
-	andi.	r0,r6,3
-	mtctr	r7
-	bne	5f
-	andi.	r0,r4,3
-	bne	3f
-1:	lwz	r7,-4(r4)
-	lwzu	r8,-8(r4)
-	stw	r7,-4(r6)
-	stwu	r8,-8(r6)
-	bdnz	1b
-	andi.	r5,r5,7
-2:	cmplwi	0,r5,4
-	blt	3f
-	lwzu	r0,-4(r4)
-	subi	r5,r5,4
-	stwu	r0,-4(r6)
-3:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-4:	lbzu	r0,-1(r4)
-	stbu	r0,-1(r6)
-	bdnz	4b
-	blr
-5:	cmpw	cr1,r0,r5
-	subf	r7,r0,r4
-	andi.	r7,r7,3
-	ble	cr1,3b
-	bne	3b
-	mtctr	r0
-6:	lbzu	r7,-1(r4)
-	stbu	r7,-1(r6)
-	bdnz	6b
-	subf	r5,r0,r5
-	rlwinm.	r7,r5,32-3,3,31
-	beq	2b
-	mtctr	r7
-	b	1b
-
-	.globl	memchr
-memchr:
-	cmpwi	0,r5,0
-	blelr
-	mtctr	r5
-	addi	r3,r3,-1
-1:	lbzu	r0,1(r3)
-	cmpw	r0,r4
-	beqlr
-	bdnz	1b
-	li	r3,0
-	blr
-
-	.globl	memcmp
-memcmp:
-	cmpwi	0,r5,0
-	ble	2f
-	mtctr	r5
-	addi	r6,r3,-1
-	addi	r4,r4,-1
-1:	lbzu	r3,1(r6)
-	lbzu	r0,1(r4)
-	subf.	r3,r0,r3
-	bdnzt	2,1b
-	blr
-2:	li	r3,0
-	blr
-
-
-/*
- * Flush the dcache and invalidate the icache for a range of addresses.
- *
- * flush_cache(addr, len)
- */
-	.global	flush_cache
-flush_cache:
-	addi	4,4,0x1f	/* len = (len + 0x1f) / 0x20 */
-	rlwinm.	4,4,27,5,31
-	mtctr	4
-	beqlr
-1:	dcbf	0,3
-	icbi	0,3
-	addi	3,3,0x20
-	bdnz	1b
-	sync
-	isync
-	blr
-
diff --git a/arch/powerpc/boot/util.S b/arch/powerpc/boot/util.S
deleted file mode 100644
index f11f0589a669f1ec8bcd5b40efb2797d17dbdbae..0000000000000000000000000000000000000000
--- a/arch/powerpc/boot/util.S
+++ /dev/null
@@ -1,83 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copied from <file:arch/powerpc/kernel/misc_32.S>
- *
- * This file contains miscellaneous low-level functions.
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
- * and Paul Mackerras.
- *
- * kexec bits:
- * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- */
-#include "ppc_asm.h"
-
-#define SPRN_PVR        0x11F   /* Processor Version Register */
-
-	.text
-
-/* udelay (on non-601 processors) needs to know the period of the
- * timebase in nanoseconds.  This used to be hardcoded to be 60ns
- * (period of 66MHz/4).  Now a variable is used that is initialized to
- * 60 for backward compatibility, but it can be overridden as necessary
- * with code something like this:
- *    extern unsigned long timebase_period_ns;
- *    timebase_period_ns = 1000000000 / bd->bi_tbfreq;
- */
-	.data
-	.globl timebase_period_ns
-timebase_period_ns:
-	.long	60
-
-	.text
-/*
- * Delay for a number of microseconds
- */
-	.globl	udelay
-udelay:
-	mfspr	r4,SPRN_PVR
-	srwi	r4,r4,16
-	cmpwi	0,r4,1		/* 601 ? */
-	bne	.Ludelay_not_601
-00:	li	r0,86	/* Instructions / microsecond? */
-	mtctr	r0
-10:	addi	r0,r0,0 /* NOP */
-	bdnz	10b
-	subic.	r3,r3,1
-	bne	00b
-	blr
-
-.Ludelay_not_601:
-	mulli	r4,r3,1000	/* nanoseconds */
-	/*  Change r4 to be the number of ticks using:
-	 *	(nanoseconds + (timebase_period_ns - 1 )) / timebase_period_ns
-	 *  timebase_period_ns defaults to 60 (16.6MHz) */
-	mflr	r5
-	bl	0f
-0:	mflr	r6
-	mtlr	r5
-	lis	r5,0b@ha
-	addi	r5,r5,0b@l
-	subf	r5,r5,r6	/* In case we're relocated */
-	addis	r5,r5,timebase_period_ns@ha
-	lwz	r5,timebase_period_ns@l(r5)
-	add	r4,r4,r5
-	addi	r4,r4,-1
-	divw	r4,r4,r5	/* BUS ticks */
-1:	MFTBU(r5)
-	MFTBL(r6)
-	MFTBU(r7)
-	cmpw	0,r5,r7
-	bne	1b		/* Get [synced] base time */
-	addc	r9,r6,r4	/* Compute end time */
-	addze	r8,r5
-2:	MFTBU(r5)
-	cmpw	0,r5,r8
-	blt	2b
-	bgt	3f
-	MFTBL(r6)
-	cmpw	0,r6,r9
-	blt	2b
-3:	blr
diff --git a/arch/powerpc/boot/virtex405-head.S b/arch/powerpc/boot/virtex405-head.S
deleted file mode 100644
index 00bab7d7c48ce9800dd0b277d9b1e7c29a7c4eb3..0000000000000000000000000000000000000000
--- a/arch/powerpc/boot/virtex405-head.S
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include "ppc_asm.h"
-
-	.text
-	.global _zimage_start
-_zimage_start:
-
-	/* PPC errata 213: needed by Virtex-4 FX */
-	mfccr0  0
-	oris    0,0,0x50000000@h
-	mtccr0  0
-
-	/*
-	 * Invalidate the data cache if the data cache is turned off.
-	 * - The 405 core does not invalidate the data cache on power-up
-	 *   or reset but does turn off the data cache. We cannot assume
-	 *   that the cache contents are valid.
-	 * - If the data cache is turned on this must have been done by
-	 *   a bootloader and we assume that the cache contents are
-	 *   valid.
-	 */
-	mfdccr	r9
-	cmplwi	r9,0
-	bne	2f
-	lis	r9,0
-	li	r8,256
-	mtctr	r8
-1:	dccci	r0,r9
-	addi	r9,r9,0x20
-	bdnz	1b
-2:	b	_zimage_start_lib
diff --git a/arch/powerpc/boot/wii-head.S b/arch/powerpc/boot/wii-head.S
deleted file mode 100644
index 7b1e5a019f9081f8a1cec2e5607365ed33ca0176..0000000000000000000000000000000000000000
--- a/arch/powerpc/boot/wii-head.S
+++ /dev/null
@@ -1,137 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * arch/powerpc/boot/wii-head.S
- *
- * Nintendo Wii bootwrapper entry.
- * Copyright (C) 2008-2009 The GameCube Linux Team
- * Copyright (C) 2008,2009 Albert Herranz
- */
-
-#include "ppc_asm.h"
-
-/*
- * The entry code does no assumptions regarding:
- * - if the data and instruction caches are enabled or not
- * - if the MMU is enabled or not
- * - if the high BATs are enabled or not
- *
- * We enable the high BATs, enable the caches if not already enabled,
- * enable the MMU with an identity mapping scheme and jump to the start code.
- */
-
-	.text
-
-	.globl _zimage_start
-_zimage_start:
-
-	/* turn the MMU off */
-	mfmsr	9
-	rlwinm	9, 9, 0, ~((1<<4)|(1<<5)) /* MSR_DR|MSR_IR */
-	bcl	20, 31, 1f
-1:
-	mflr	8
-	clrlwi	8, 8, 3		/* convert to a real address */
-	addi	8, 8, _mmu_off - 1b
-	mtsrr0	8
-	mtsrr1	9
-	rfi
-_mmu_off:
-	/* MMU disabled */
-
-	/* setup BATs */
-	isync
-	li      8, 0
-	mtspr	0x210, 8	/* IBAT0U */
-	mtspr	0x212, 8	/* IBAT1U */
-	mtspr	0x214, 8	/* IBAT2U */
-	mtspr	0x216, 8	/* IBAT3U */
-	mtspr	0x218, 8	/* DBAT0U */
-	mtspr	0x21a, 8	/* DBAT1U */
-	mtspr	0x21c, 8	/* DBAT2U */
-	mtspr	0x21e, 8	/* DBAT3U */
-
-	mtspr	0x230, 8	/* IBAT4U */
-	mtspr	0x232, 8	/* IBAT5U */
-	mtspr	0x234, 8	/* IBAT6U */
-	mtspr	0x236, 8	/* IBAT7U */
-	mtspr	0x238, 8	/* DBAT4U */
-	mtspr	0x23a, 8	/* DBAT5U */
-	mtspr	0x23c, 8	/* DBAT6U */
-	mtspr	0x23e, 8	/* DBAT7U */
-
-	li	8, 0x01ff	/* first 16MiB */
-	li	9, 0x0002	/* rw */
-	mtspr	0x211, 9	/* IBAT0L */
-	mtspr	0x210, 8	/* IBAT0U */
-	mtspr	0x219, 9	/* DBAT0L */
-	mtspr	0x218, 8	/* DBAT0U */
-
-	lis	8, 0x0c00	/* I/O mem */
-	ori	8, 8, 0x3ff	/* 32MiB */
-	lis	9, 0x0c00
-	ori	9, 9, 0x002a	/* uncached, guarded, rw */
-	mtspr	0x21b, 9	/* DBAT1L */
-	mtspr	0x21a, 8	/* DBAT1U */
-
-	lis	8, 0x0100	/* next 8MiB */
-	ori	8, 8, 0x00ff	/* 8MiB */
-	lis	9, 0x0100
-	ori	9, 9, 0x0002	/* rw */
-	mtspr	0x215, 9	/* IBAT2L */
-	mtspr	0x214, 8	/* IBAT2U */
-	mtspr	0x21d, 9	/* DBAT2L */
-	mtspr	0x21c, 8	/* DBAT2U */
-
-	lis	8, 0x1000	/* MEM2 */
-	ori	8, 8, 0x07ff	/* 64MiB */
-	lis	9, 0x1000
-	ori	9, 9, 0x0002	/* rw */
-	mtspr	0x216, 8	/* IBAT3U */
-	mtspr	0x217, 9	/* IBAT3L */
-	mtspr	0x21e, 8	/* DBAT3U */
-	mtspr	0x21f, 9	/* DBAT3L */
-
-	/* enable the high BATs */
-	mfspr	8, 0x3f3	/* HID4 */
-	oris	8, 8, 0x0200
-	mtspr	0x3f3, 8	/* HID4 */
-
-	/* enable and invalidate the caches if not already enabled */
-	mfspr	8, 0x3f0	/* HID0 */
-	andi.	0, 8, (1<<15)		/* HID0_ICE */
-	bne	1f
-	ori	8, 8, (1<<15)|(1<<11)	/* HID0_ICE|HID0_ICFI*/
-1:
-	andi.	0, 8, (1<<14)		/* HID0_DCE */
-	bne	1f
-	ori	8, 8, (1<<14)|(1<<10)	/* HID0_DCE|HID0_DCFI*/
-1:
-	mtspr	0x3f0, 8	/* HID0 */
-	isync
-
-	/* initialize arguments */
-	li	3, 0
-	li	4, 0
-	li	5, 0
-
-	/* turn the MMU on */
-	bcl	20, 31, 1f
-1:
-	mflr	8
-	addi	8, 8, _mmu_on - 1b
-	mfmsr	9
-	ori	9, 9, (1<<4)|(1<<5) /* MSR_DR|MSR_IR */
-	mtsrr0	8
-	mtsrr1	9
-	sync
-	rfi
-_mmu_on:
-	/* turn on the front blue led (aka: yay! we got here!) */
-	lis	8, 0x0d00
-	ori	8, 8, 0x00c0
-	lwz	9, 0(8)
-	ori	9, 9, 0x20
-	stw	9, 0(8)
-
-	b _zimage_start_lib
-
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
old mode 100755
new mode 100644
diff --git a/arch/powerpc/boot/zImage.coff.lds.S b/arch/powerpc/boot/zImage.coff.lds.S
deleted file mode 100644
index 1179512951179a77ced283a46677d50fac9599c2..0000000000000000000000000000000000000000
--- a/arch/powerpc/boot/zImage.coff.lds.S
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-OUTPUT_ARCH(powerpc:common)
-ENTRY(_zimage_start_opd)
-EXTERN(_zimage_start_opd)
-SECTIONS
-{
-  .text      :
-  {
-    _start = .;
-    *(.text)
-    *(.fixup)
-    _etext = .;
-  }
-  . = ALIGN(4096);
-  .data    :
-  {
-    *(.rodata*)
-    *(.data*)
-    *(__builtin_*)
-    *(.sdata*)
-    *(.got2)
-
-    _dtb_start = .;
-    *(.kernel:dtb)
-    _dtb_end = .;
-
-    _vmlinux_start =  .;
-    *(.kernel:vmlinux.strip)
-    _vmlinux_end =  .;
-
-    _initrd_start =  .;
-    *(.kernel:initrd)
-    _initrd_end =  .;
-  }
-
-  . = ALIGN(4096);
-  _edata  =  .;
-  __bss_start = .;
-  .bss       :
-  {
-   *(.sbss)
-   *(.bss)
-  }
-  _end = . ;
-
-  /DISCARD/ :
-  {
-    *(.comment)
-  }
-}
diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
deleted file mode 100644
index a21f3a76e06fce36dc5b69a065a30df62adf0dec..0000000000000000000000000000000000000000
--- a/arch/powerpc/boot/zImage.lds.S
+++ /dev/null
@@ -1,99 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm-generic/vmlinux.lds.h>
-
-#ifdef CONFIG_PPC64_BOOT_WRAPPER
-OUTPUT_ARCH(powerpc:common64)
-#else
-OUTPUT_ARCH(powerpc:common)
-#endif
-ENTRY(_zimage_start)
-EXTERN(_zimage_start)
-SECTIONS
-{
-  .text      :
-  {
-    _start = .;
-    *(.text)
-    *(.fixup)
-    _etext = .;
-  }
-  . = ALIGN(4096);
-  .data    :
-  {
-    *(.rodata*)
-    *(.data*)
-    *(.sdata*)
-#ifndef CONFIG_PPC64_BOOT_WRAPPER
-    *(.got2)
-#endif
-  }
-  .dynsym : { *(.dynsym) }
-  .dynstr : { *(.dynstr) }
-  .dynamic :
-  {
-    __dynamic_start = .;
-    *(.dynamic)
-  }
-  .hash : { *(.hash) }
-  .interp : { *(.interp) }
-  .rela.dyn :
-  {
-#ifdef CONFIG_PPC64_BOOT_WRAPPER
-    __rela_dyn_start = .;
-#endif
-    *(.rela*)
-  }
-
-  . = ALIGN(8);
-  .kernel:dtb :
-  {
-    _dtb_start = .;
-    *(.kernel:dtb)
-    _dtb_end = .;
-  }
-
-  . = ALIGN(4096);
-  .kernel:vmlinux.strip :
-  {
-    _vmlinux_start =  .;
-    *(.kernel:vmlinux.strip)
-    _vmlinux_end =  .;
-  }
-
-  . = ALIGN(4096);
-  .kernel:initrd :
-  {
-    _initrd_start =  .;
-    *(.kernel:initrd)
-    _initrd_end =  .;
-  }
-
-  . = ALIGN(4096);
-  .kernel:esm_blob :
-  {
-    _esm_blob_start =  .;
-    *(.kernel:esm_blob)
-    _esm_blob_end =  .;
-  }
-
-#ifdef CONFIG_PPC64_BOOT_WRAPPER
-  . = ALIGN(256);
-  .got :
-  {
-    __toc_start = .;
-    *(.got)
-    *(.toc)
-  }
-#endif
-
-  . = ALIGN(4096);
-  .bss       :
-  {
-    _edata  =  .;
-    __bss_start = .;
-    *(.sbss)
-    *(.bss)
-    *(COMMON)
-    _end = . ;
-  }
-}
diff --git a/arch/powerpc/boot/zImage.ps3.lds.S b/arch/powerpc/boot/zImage.ps3.lds.S
deleted file mode 100644
index 7b2ff2eaa73a61179ea795b14981cd7f11fa9e54..0000000000000000000000000000000000000000
--- a/arch/powerpc/boot/zImage.ps3.lds.S
+++ /dev/null
@@ -1,51 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-OUTPUT_ARCH(powerpc:common)
-ENTRY(_zimage_start)
-EXTERN(_zimage_start)
-SECTIONS
-{
-  _vmlinux_start =  .;
-  .kernel:vmlinux.bin : { *(.kernel:vmlinux.bin) }
-  _vmlinux_end =  .;
-
-  . = ALIGN(4096);
-  _dtb_start = .;
-  .kernel:dtb : { *(.kernel:dtb) }
-  _dtb_end = .;
-
-  . = ALIGN(4096);
-  _initrd_start =  .;
-  .kernel:initrd : { *(.kernel:initrd) }
-  _initrd_end =  .;
-
-  _start = .;
-  .text      :
-  {
-    *(.text)
-    *(.fixup)
-  }
-  _etext = .;
-  . = ALIGN(4096);
-  .data    :
-  {
-    *(.rodata*)
-    *(.data*)
-    *(.sdata*)
-    __got2_start = .;
-    *(.got2)
-    __got2_end = .;
-  }
-
-  . = ALIGN(4096);
-  _edata  =  .;
-
-  . = ALIGN(4096);
-  __bss_start = .;
-  .bss       :
-  {
-   *(.sbss)
-   *(.bss)
-  }
-  . = ALIGN(4096);
-  _end = . ;
-}
diff --git a/arch/powerpc/crypto/aes-spe-core.S b/arch/powerpc/crypto/aes-spe-core.S
deleted file mode 100644
index 8e00eccc352b17cb630b15e46f0a6792abcb90af..0000000000000000000000000000000000000000
--- a/arch/powerpc/crypto/aes-spe-core.S
+++ /dev/null
@@ -1,346 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Fast AES implementation for SPE instruction set (PPC)
- *
- * This code makes use of the SPE SIMD instruction set as defined in
- * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
- * Implementation is based on optimization guide notes from
- * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- */
-
-#include <asm/ppc_asm.h>
-#include "aes-spe-regs.h"
-
-#define	EAD(in, bpos) \
-	rlwimi		rT0,in,28-((bpos+3)%4)*8,20,27;
-
-#define DAD(in, bpos) \
-	rlwimi		rT1,in,24-((bpos+3)%4)*8,24,31;
-
-#define LWH(out, off) \
-	evlwwsplat	out,off(rT0);	/* load word high		*/
-
-#define LWL(out, off) \
-	lwz		out,off(rT0);	/* load word low		*/
-
-#define LBZ(out, tab, off) \
-	lbz		out,off(tab);	/* load byte			*/
-
-#define LAH(out, in, bpos, off) \
-	EAD(in, bpos)			/* calc addr + load word high	*/ \
-	LWH(out, off)
-
-#define LAL(out, in, bpos, off) \
-	EAD(in, bpos)			/* calc addr + load word low	*/ \
-	LWL(out, off)
-
-#define LAE(out, in, bpos) \
-	EAD(in, bpos)			/* calc addr + load enc byte	*/ \
-	LBZ(out, rT0, 8)
-
-#define LBE(out) \
-	LBZ(out, rT0, 8)		/* load enc byte		*/
-
-#define LAD(out, in, bpos) \
-	DAD(in, bpos)			/* calc addr + load dec byte	*/ \
-	LBZ(out, rT1, 0)
-
-#define LBD(out) \
-	LBZ(out, rT1, 0)
-
-/*
- * ppc_encrypt_block: The central encryption function for a single 16 bytes
- * block. It does no stack handling or register saving to support fast calls
- * via bl/blr. It expects that caller has pre-xored input data with first
- * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
- * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
- * and rW0-rW3 and caller must execute a final xor on the output registers.
- * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
- *
- */
-_GLOBAL(ppc_encrypt_block)
-	LAH(rW4, rD1, 2, 4)
-	LAH(rW6, rD0, 3, 0)
-	LAH(rW3, rD0, 1, 8)
-ppc_encrypt_block_loop:
-	LAH(rW0, rD3, 0, 12)
-	LAL(rW0, rD0, 0, 12)
-	LAH(rW1, rD1, 0, 12)
-	LAH(rW2, rD2, 1, 8)
-	LAL(rW2, rD3, 1, 8)
-	LAL(rW3, rD1, 1, 8)
-	LAL(rW4, rD2, 2, 4)
-	LAL(rW6, rD1, 3, 0)
-	LAH(rW5, rD3, 2, 4)
-	LAL(rW5, rD0, 2, 4)
-	LAH(rW7, rD2, 3, 0)
-	evldw		rD1,16(rKP)
-	EAD(rD3, 3)
-	evxor		rW2,rW2,rW4
-	LWL(rW7, 0)
-	evxor		rW2,rW2,rW6
-	EAD(rD2, 0)
-	evxor		rD1,rD1,rW2
-	LWL(rW1, 12)
-	evxor		rD1,rD1,rW0
-	evldw		rD3,24(rKP)
-	evmergehi	rD0,rD0,rD1
-	EAD(rD1, 2)
-	evxor		rW3,rW3,rW5
-	LWH(rW4, 4)
-	evxor		rW3,rW3,rW7
-	EAD(rD0, 3)
-	evxor		rD3,rD3,rW3
-	LWH(rW6, 0)
-	evxor		rD3,rD3,rW1
-	EAD(rD0, 1)
-	evmergehi	rD2,rD2,rD3
-	LWH(rW3, 8)
-	LAH(rW0, rD3, 0, 12)
-	LAL(rW0, rD0, 0, 12)
-	LAH(rW1, rD1, 0, 12)
-	LAH(rW2, rD2, 1, 8)
-	LAL(rW2, rD3, 1, 8)
-	LAL(rW3, rD1, 1, 8)
-	LAL(rW4, rD2, 2, 4)
-	LAL(rW6, rD1, 3, 0)
-	LAH(rW5, rD3, 2, 4)
-	LAL(rW5, rD0, 2, 4)
-	LAH(rW7, rD2, 3, 0)
-	evldw		rD1,32(rKP)
-	EAD(rD3, 3)
-	evxor		rW2,rW2,rW4
-	LWL(rW7, 0)
-	evxor		rW2,rW2,rW6
-	EAD(rD2, 0)
-	evxor		rD1,rD1,rW2
-	LWL(rW1, 12)
-	evxor		rD1,rD1,rW0
-	evldw		rD3,40(rKP)
-	evmergehi	rD0,rD0,rD1
-	EAD(rD1, 2)
-	evxor		rW3,rW3,rW5
-	LWH(rW4, 4)
-	evxor		rW3,rW3,rW7
-	EAD(rD0, 3)
-	evxor		rD3,rD3,rW3
-	LWH(rW6, 0)
-	evxor		rD3,rD3,rW1
-	EAD(rD0, 1)
-	evmergehi	rD2,rD2,rD3
-	LWH(rW3, 8)
-	addi		rKP,rKP,32
-	bdnz		ppc_encrypt_block_loop
-	LAH(rW0, rD3, 0, 12)
-	LAL(rW0, rD0, 0, 12)
-	LAH(rW1, rD1, 0, 12)
-	LAH(rW2, rD2, 1, 8)
-	LAL(rW2, rD3, 1, 8)
-	LAL(rW3, rD1, 1, 8)
-	LAL(rW4, rD2, 2, 4)
-	LAH(rW5, rD3, 2, 4)
-	LAL(rW6, rD1, 3, 0)
-	LAL(rW5, rD0, 2, 4)
-	LAH(rW7, rD2, 3, 0)
-	evldw		rD1,16(rKP)
-	EAD(rD3, 3)
-	evxor		rW2,rW2,rW4
-	LWL(rW7, 0)
-	evxor		rW2,rW2,rW6
-	EAD(rD2, 0)
-	evxor		rD1,rD1,rW2
-	LWL(rW1, 12)
-	evxor		rD1,rD1,rW0
-	evldw		rD3,24(rKP)
-	evmergehi	rD0,rD0,rD1
-	EAD(rD1, 0)
-	evxor		rW3,rW3,rW5
-	LBE(rW2)
-	evxor		rW3,rW3,rW7
-	EAD(rD0, 1)
-	evxor		rD3,rD3,rW3
-	LBE(rW6)
-	evxor		rD3,rD3,rW1
-	EAD(rD0, 0)
-	evmergehi	rD2,rD2,rD3
-	LBE(rW1)
-	LAE(rW0, rD3, 0)
-	LAE(rW1, rD0, 0)
-	LAE(rW4, rD2, 1)
-	LAE(rW5, rD3, 1)
-	LAE(rW3, rD2, 0)
-	LAE(rW7, rD1, 1)
-	rlwimi		rW0,rW4,8,16,23
-	rlwimi		rW1,rW5,8,16,23
-	LAE(rW4, rD1, 2)
-	LAE(rW5, rD2, 2)
-	rlwimi		rW2,rW6,8,16,23
-	rlwimi		rW3,rW7,8,16,23
-	LAE(rW6, rD3, 2)
-	LAE(rW7, rD0, 2)
-	rlwimi		rW0,rW4,16,8,15
-	rlwimi		rW1,rW5,16,8,15
-	LAE(rW4, rD0, 3)
-	LAE(rW5, rD1, 3)
-	rlwimi		rW2,rW6,16,8,15
-	lwz		rD0,32(rKP)
-	rlwimi		rW3,rW7,16,8,15
-	lwz		rD1,36(rKP)
-	LAE(rW6, rD2, 3)
-	LAE(rW7, rD3, 3)
-	rlwimi		rW0,rW4,24,0,7
-	lwz		rD2,40(rKP)
-	rlwimi		rW1,rW5,24,0,7
-	lwz		rD3,44(rKP)
-	rlwimi		rW2,rW6,24,0,7
-	rlwimi		rW3,rW7,24,0,7
-	blr
-
-/*
- * ppc_decrypt_block: The central decryption function for a single 16 bytes
- * block. It does no stack handling or register saving to support fast calls
- * via bl/blr. It expects that caller has pre-xored input data with first
- * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
- * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
- * and rW0-rW3 and caller must execute a final xor on the output registers.
- * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
- *
- */
-_GLOBAL(ppc_decrypt_block)
-	LAH(rW0, rD1, 0, 12)
-	LAH(rW6, rD0, 3, 0)
-	LAH(rW3, rD0, 1, 8)
-ppc_decrypt_block_loop:
-	LAH(rW1, rD3, 0, 12)
-	LAL(rW0, rD2, 0, 12)
-	LAH(rW2, rD2, 1, 8)
-	LAL(rW2, rD3, 1, 8)
-	LAH(rW4, rD3, 2, 4)
-	LAL(rW4, rD0, 2, 4)
-	LAL(rW6, rD1, 3, 0)
-	LAH(rW5, rD1, 2, 4)
-	LAH(rW7, rD2, 3, 0)
-	LAL(rW7, rD3, 3, 0)
-	LAL(rW3, rD1, 1, 8)
-	evldw		rD1,16(rKP)
-	EAD(rD0, 0)
-	evxor		rW4,rW4,rW6
-	LWL(rW1, 12)
-	evxor		rW0,rW0,rW4
-	EAD(rD2, 2)
-	evxor		rW0,rW0,rW2
-	LWL(rW5, 4)
-	evxor		rD1,rD1,rW0
-	evldw		rD3,24(rKP)
-	evmergehi	rD0,rD0,rD1
-	EAD(rD1, 0)
-	evxor		rW3,rW3,rW7
-	LWH(rW0, 12)
-	evxor		rW3,rW3,rW1
-	EAD(rD0, 3)
-	evxor		rD3,rD3,rW3
-	LWH(rW6, 0)
-	evxor		rD3,rD3,rW5
-	EAD(rD0, 1)
-	evmergehi	rD2,rD2,rD3
-	LWH(rW3, 8)
-	LAH(rW1, rD3, 0, 12)
-	LAL(rW0, rD2, 0, 12)
-	LAH(rW2, rD2, 1, 8)
-	LAL(rW2, rD3, 1, 8)
-	LAH(rW4, rD3, 2, 4)
-	LAL(rW4, rD0, 2, 4)
-	LAL(rW6, rD1, 3, 0)
-	LAH(rW5, rD1, 2, 4)
-	LAH(rW7, rD2, 3, 0)
-	LAL(rW7, rD3, 3, 0)
-	LAL(rW3, rD1, 1, 8)
-	evldw		 rD1,32(rKP)
-	EAD(rD0, 0)
-	evxor		rW4,rW4,rW6
-	LWL(rW1, 12)
-	evxor		rW0,rW0,rW4
-	EAD(rD2, 2)
-	evxor		rW0,rW0,rW2
-	LWL(rW5, 4)
-	evxor		rD1,rD1,rW0
-	evldw		rD3,40(rKP)
-	evmergehi	rD0,rD0,rD1
-	EAD(rD1, 0)
-	evxor		rW3,rW3,rW7
-	LWH(rW0, 12)
-	evxor		rW3,rW3,rW1
-	EAD(rD0, 3)
-	evxor		rD3,rD3,rW3
-	LWH(rW6, 0)
-	evxor		rD3,rD3,rW5
-	EAD(rD0, 1)
-	evmergehi	rD2,rD2,rD3
-	LWH(rW3, 8)
-	addi		rKP,rKP,32
-	bdnz		ppc_decrypt_block_loop
-	LAH(rW1, rD3, 0, 12)
-	LAL(rW0, rD2, 0, 12)
-	LAH(rW2, rD2, 1, 8)
-	LAL(rW2, rD3, 1, 8)
-	LAH(rW4, rD3, 2, 4)
-	LAL(rW4, rD0, 2, 4)
-	LAL(rW6, rD1, 3, 0)
-	LAH(rW5, rD1, 2, 4)
-	LAH(rW7, rD2, 3, 0)
-	LAL(rW7, rD3, 3, 0)
-	LAL(rW3, rD1, 1, 8)
-	evldw		 rD1,16(rKP)
-	EAD(rD0, 0)
-	evxor		rW4,rW4,rW6
-	LWL(rW1, 12)
-	evxor		rW0,rW0,rW4
-	EAD(rD2, 2)
-	evxor		rW0,rW0,rW2
-	LWL(rW5, 4)
-	evxor		rD1,rD1,rW0
-	evldw		rD3,24(rKP)
-	evmergehi	rD0,rD0,rD1
-	DAD(rD1, 0)
-	evxor		rW3,rW3,rW7
-	LBD(rW0)
-	evxor		rW3,rW3,rW1
-	DAD(rD0, 1)
-	evxor		rD3,rD3,rW3
-	LBD(rW6)
-	evxor		rD3,rD3,rW5
-	DAD(rD0, 0)
-	evmergehi	rD2,rD2,rD3
-	LBD(rW3)
-	LAD(rW2, rD3, 0)
-	LAD(rW1, rD2, 0)
-	LAD(rW4, rD2, 1)
-	LAD(rW5, rD3, 1)
-	LAD(rW7, rD1, 1)
-	rlwimi		rW0,rW4,8,16,23
-	rlwimi		rW1,rW5,8,16,23
-	LAD(rW4, rD3, 2)
-	LAD(rW5, rD0, 2)
-	rlwimi		rW2,rW6,8,16,23
-	rlwimi		rW3,rW7,8,16,23
-	LAD(rW6, rD1, 2)
-	LAD(rW7, rD2, 2)
-	rlwimi		rW0,rW4,16,8,15
-	rlwimi		rW1,rW5,16,8,15
-	LAD(rW4, rD0, 3)
-	LAD(rW5, rD1, 3)
-	rlwimi		rW2,rW6,16,8,15
-	lwz		rD0,32(rKP)
-	rlwimi		rW3,rW7,16,8,15
-	lwz		rD1,36(rKP)
-	LAD(rW6, rD2, 3)
-	LAD(rW7, rD3, 3)
-	rlwimi		rW0,rW4,24,0,7
-	lwz		rD2,40(rKP)
-	rlwimi		rW1,rW5,24,0,7
-	lwz		rD3,44(rKP)
-	rlwimi		rW2,rW6,24,0,7
-	rlwimi		rW3,rW7,24,0,7
-	blr
diff --git a/arch/powerpc/crypto/aes-spe-keys.S b/arch/powerpc/crypto/aes-spe-keys.S
deleted file mode 100644
index 2e1bc0d099bfb9a8130bfd0d77508104c293a125..0000000000000000000000000000000000000000
--- a/arch/powerpc/crypto/aes-spe-keys.S
+++ /dev/null
@@ -1,278 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Key handling functions for PPC AES implementation
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- */
-
-#include <asm/ppc_asm.h>
-
-#ifdef __BIG_ENDIAN__
-#define LOAD_KEY(d, s, off) \
-	lwz		d,off(s);
-#else
-#define LOAD_KEY(d, s, off) \
-	li		r0,off; \
-	lwbrx		d,s,r0;
-#endif
-
-#define INITIALIZE_KEY \
-	stwu		r1,-32(r1);	/* create stack frame		*/ \
-	stw		r14,8(r1);	/* save registers		*/ \
-	stw		r15,12(r1);					   \
-	stw		r16,16(r1);
-
-#define FINALIZE_KEY \
-	lwz		r14,8(r1);	/* restore registers		*/ \
-	lwz		r15,12(r1);					   \
-	lwz		r16,16(r1);					   \
-	xor		r5,r5,r5;	/* clear sensitive data		*/ \
-	xor		r6,r6,r6;					   \
-	xor		r7,r7,r7;					   \
-	xor		r8,r8,r8;					   \
-	xor		r9,r9,r9;					   \
-	xor		r10,r10,r10;					   \
-	xor		r11,r11,r11;					   \
-	xor		r12,r12,r12;					   \
-	addi		r1,r1,32;	/* cleanup stack		*/
-
-#define LS_BOX(r, t1, t2) \
-	lis		t2,PPC_AES_4K_ENCTAB@h;				   \
-	ori		t2,t2,PPC_AES_4K_ENCTAB@l;			   \
-	rlwimi		t2,r,4,20,27;					   \
-	lbz		t1,8(t2);					   \
-	rlwimi		r,t1,0,24,31;					   \
-	rlwimi		t2,r,28,20,27;					   \
-	lbz		t1,8(t2);					   \
-	rlwimi		r,t1,8,16,23;					   \
-	rlwimi		t2,r,20,20,27;					   \
-	lbz		t1,8(t2);					   \
-	rlwimi		r,t1,16,8,15;					   \
-	rlwimi		t2,r,12,20,27;					   \
-	lbz		t1,8(t2);					   \
-	rlwimi		r,t1,24,0,7;
-
-#define GF8_MUL(out, in, t1, t2) \
-	lis t1,0x8080;			/* multiplication in GF8	*/ \
-	ori t1,t1,0x8080; 						   \
-	and t1,t1,in; 							   \
-	srwi t1,t1,7; 							   \
-	mulli t1,t1,0x1b; 						   \
-	lis t2,0x7f7f; 							   \
-	ori t2,t2,0x7f7f; 						   \
-	and t2,t2,in; 							   \
-	slwi t2,t2,1; 							   \
-	xor out,t1,t2;
-
-/*
- * ppc_expand_key_128(u32 *key_enc, const u8 *key)
- *
- * Expand 128 bit key into 176 bytes encryption key. It consists of
- * key itself plus 10 rounds with 16 bytes each
- *
- */
-_GLOBAL(ppc_expand_key_128)
-	INITIALIZE_KEY
-	LOAD_KEY(r5,r4,0)
-	LOAD_KEY(r6,r4,4)
-	LOAD_KEY(r7,r4,8)
-	LOAD_KEY(r8,r4,12)
-	stw		r5,0(r3)	/* key[0..3] = input data	*/
-	stw		r6,4(r3)
-	stw		r7,8(r3)
-	stw		r8,12(r3)
-	li		r16,10		/* 10 expansion rounds		*/
-	lis		r0,0x0100	/* RCO(1)			*/
-ppc_expand_128_loop:
-	addi		r3,r3,16
-	mr		r14,r8		/* apply LS_BOX to 4th temp	*/
-	rotlwi		r14,r14,8
-	LS_BOX(r14, r15, r4)
-	xor		r14,r14,r0
-	xor		r5,r5,r14	/* xor next 4 keys		*/
-	xor		r6,r6,r5
-	xor		r7,r7,r6
-	xor		r8,r8,r7
-	stw		r5,0(r3)	/* store next 4 keys		*/
-	stw		r6,4(r3)
-	stw		r7,8(r3)
-	stw		r8,12(r3)
-	GF8_MUL(r0, r0, r4, r14)	/* multiply RCO by 2 in GF	*/
-	subi		r16,r16,1
-	cmpwi		r16,0
-	bt		eq,ppc_expand_128_end
-	b		ppc_expand_128_loop
-ppc_expand_128_end:
-	FINALIZE_KEY
-	blr
-
-/*
- * ppc_expand_key_192(u32 *key_enc, const u8 *key)
- *
- * Expand 192 bit key into 208 bytes encryption key. It consists of key
- * itself plus 12 rounds with 16 bytes each
- *
- */
-_GLOBAL(ppc_expand_key_192)
-	INITIALIZE_KEY
-	LOAD_KEY(r5,r4,0)
-	LOAD_KEY(r6,r4,4)
-	LOAD_KEY(r7,r4,8)
-	LOAD_KEY(r8,r4,12)
-	LOAD_KEY(r9,r4,16)
-	LOAD_KEY(r10,r4,20)
-	stw		r5,0(r3)
-	stw		r6,4(r3)
-	stw		r7,8(r3)
-	stw		r8,12(r3)
-	stw		r9,16(r3)
-	stw		r10,20(r3)
-	li		r16,8		/* 8 expansion rounds		*/
-	lis		r0,0x0100	/* RCO(1)			*/
-ppc_expand_192_loop:
-	addi		r3,r3,24
-	mr		r14,r10		/* apply LS_BOX to 6th temp	*/
-	rotlwi		r14,r14,8
-	LS_BOX(r14, r15, r4)
-	xor		r14,r14,r0
-	xor		r5,r5,r14	/* xor next 6 keys		*/
-	xor		r6,r6,r5
-	xor		r7,r7,r6
-	xor		r8,r8,r7
-	xor		r9,r9,r8
-	xor		r10,r10,r9
-	stw		r5,0(r3)
-	stw		r6,4(r3)
-	stw		r7,8(r3)
-	stw		r8,12(r3)
-	subi		r16,r16,1
-	cmpwi		r16,0		/* last round early kick out	*/
-	bt		eq,ppc_expand_192_end
-	stw		r9,16(r3)
-	stw		r10,20(r3)
-	GF8_MUL(r0, r0, r4, r14)	/* multiply RCO GF8		*/
-	b		ppc_expand_192_loop
-ppc_expand_192_end:
-	FINALIZE_KEY
-	blr
-
-/*
- * ppc_expand_key_256(u32 *key_enc, const u8 *key)
- *
- * Expand 256 bit key into 240 bytes encryption key. It consists of key
- * itself plus 14 rounds with 16 bytes each
- *
- */
-_GLOBAL(ppc_expand_key_256)
-	INITIALIZE_KEY
-	LOAD_KEY(r5,r4,0)
-	LOAD_KEY(r6,r4,4)
-	LOAD_KEY(r7,r4,8)
-	LOAD_KEY(r8,r4,12)
-	LOAD_KEY(r9,r4,16)
-	LOAD_KEY(r10,r4,20)
-	LOAD_KEY(r11,r4,24)
-	LOAD_KEY(r12,r4,28)
-	stw		r5,0(r3)
-	stw		r6,4(r3)
-	stw		r7,8(r3)
-	stw		r8,12(r3)
-	stw		r9,16(r3)
-	stw		r10,20(r3)
-	stw		r11,24(r3)
-	stw		r12,28(r3)
-	li		r16,7		/* 7 expansion rounds		*/
-	lis		r0,0x0100	/* RCO(1)			*/
-ppc_expand_256_loop:
-	addi		r3,r3,32
-	mr		r14,r12		/* apply LS_BOX to 8th temp	*/
-	rotlwi		r14,r14,8
-	LS_BOX(r14, r15, r4)
-	xor		r14,r14,r0
-	xor		r5,r5,r14	/* xor 4 keys			*/
-	xor		r6,r6,r5
-	xor		r7,r7,r6
-	xor		r8,r8,r7
-	mr		r14,r8
-	LS_BOX(r14, r15, r4)		/* apply LS_BOX to 4th temp	*/
-	xor		r9,r9,r14	/* xor 4 keys			*/
-	xor		r10,r10,r9
-	xor		r11,r11,r10
-	xor		r12,r12,r11
-	stw		r5,0(r3)
-	stw		r6,4(r3)
-	stw		r7,8(r3)
-	stw		r8,12(r3)
-	subi		r16,r16,1
-	cmpwi		r16,0		/* last round early kick out	*/
-	bt		eq,ppc_expand_256_end
-	stw		r9,16(r3)
-	stw		r10,20(r3)
-	stw		r11,24(r3)
-	stw		r12,28(r3)
-	GF8_MUL(r0, r0, r4, r14)
-	b		ppc_expand_256_loop
-ppc_expand_256_end:
-	FINALIZE_KEY
-	blr
-
-/*
- * ppc_generate_decrypt_key: derive decryption key from encryption key
- * number of bytes to handle are calculated from length of key (16/24/32)
- *
- */
-_GLOBAL(ppc_generate_decrypt_key)
-	addi		r6,r5,24
-	slwi		r6,r6,2
-	lwzx		r7,r4,r6	/* first/last 4 words are same	*/
-	stw		r7,0(r3)
-	lwz		r7,0(r4)
-	stwx		r7,r3,r6
-	addi		r6,r6,4
-	lwzx		r7,r4,r6
-	stw		r7,4(r3)
-	lwz		r7,4(r4)
-	stwx		r7,r3,r6
-	addi		r6,r6,4
-	lwzx		r7,r4,r6
-	stw		r7,8(r3)
-	lwz		r7,8(r4)
-	stwx		r7,r3,r6
-	addi		r6,r6,4
-	lwzx		r7,r4,r6
-	stw		r7,12(r3)
-	lwz		r7,12(r4)
-	stwx		r7,r3,r6
-	addi		r3,r3,16
-	add		r4,r4,r6
-	subi		r4,r4,28
-	addi		r5,r5,20
-	srwi		r5,r5,2
-ppc_generate_decrypt_block:
-	li	r6,4
-	mtctr	r6
-ppc_generate_decrypt_word:
-	lwz		r6,0(r4)
-	GF8_MUL(r7, r6, r0, r7)
-	GF8_MUL(r8, r7, r0, r8)
-	GF8_MUL(r9, r8, r0, r9)
-	xor		r10,r9,r6
-	xor		r11,r7,r8
-	xor		r11,r11,r9
-	xor		r12,r7,r10
-	rotrwi		r12,r12,24
-	xor		r11,r11,r12
-	xor		r12,r8,r10
-	rotrwi		r12,r12,16
-	xor		r11,r11,r12
-	rotrwi		r12,r10,8
-	xor		r11,r11,r12
-	stw		r11,0(r3)
-	addi		r3,r3,4
-	addi		r4,r4,4
-	bdnz		ppc_generate_decrypt_word
-	subi		r4,r4,32
-	subi		r5,r5,1
-	cmpwi		r5,0
-	bt		gt,ppc_generate_decrypt_block
-	blr
diff --git a/arch/powerpc/crypto/aes-spe-modes.S b/arch/powerpc/crypto/aes-spe-modes.S
deleted file mode 100644
index 3f92a6a85785776977dd351b61e7dbfc943e1626..0000000000000000000000000000000000000000
--- a/arch/powerpc/crypto/aes-spe-modes.S
+++ /dev/null
@@ -1,625 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- */
-
-#include <asm/ppc_asm.h>
-#include "aes-spe-regs.h"
-
-#ifdef __BIG_ENDIAN__			/* Macros for big endian builds	*/
-
-#define LOAD_DATA(reg, off) \
-	lwz		reg,off(rSP);	/* load with offset		*/
-#define SAVE_DATA(reg, off) \
-	stw		reg,off(rDP);	/* save with offset		*/
-#define NEXT_BLOCK \
-	addi		rSP,rSP,16;	/* increment pointers per bloc	*/ \
-	addi		rDP,rDP,16;
-#define LOAD_IV(reg, off) \
-	lwz		reg,off(rIP);	/* IV loading with offset	*/
-#define SAVE_IV(reg, off) \
-	stw		reg,off(rIP);	/* IV saving with offset	*/
-#define START_IV			/* nothing to reset		*/
-#define CBC_DEC 16			/* CBC decrement per block	*/
-#define CTR_DEC 1			/* CTR decrement one byte	*/
-
-#else					/* Macros for little endian	*/
-
-#define LOAD_DATA(reg, off) \
-	lwbrx		reg,0,rSP;	/* load reversed		*/ \
-	addi		rSP,rSP,4;	/* and increment pointer	*/
-#define SAVE_DATA(reg, off) \
-	stwbrx		reg,0,rDP;	/* save reversed		*/ \
-	addi		rDP,rDP,4;	/* and increment pointer	*/
-#define NEXT_BLOCK			/* nothing todo			*/
-#define LOAD_IV(reg, off) \
-	lwbrx		reg,0,rIP;	/* load reversed		*/ \
-	addi		rIP,rIP,4;	/* and increment pointer	*/
-#define SAVE_IV(reg, off) \
-	stwbrx		reg,0,rIP;	/* load reversed		*/ \
-	addi		rIP,rIP,4;	/* and increment pointer	*/
-#define START_IV \
-	subi		rIP,rIP,16;	/* must reset pointer		*/
-#define CBC_DEC 32			/* 2 blocks because of incs	*/
-#define CTR_DEC 17			/* 1 block because of incs	*/
-
-#endif
-
-#define SAVE_0_REGS
-#define LOAD_0_REGS
-
-#define SAVE_4_REGS \
-	stw		rI0,96(r1);	/* save 32 bit registers	*/ \
-	stw		rI1,100(r1);					   \
-	stw		rI2,104(r1);					   \
-	stw		rI3,108(r1);
-
-#define LOAD_4_REGS \
-	lwz		rI0,96(r1);	/* restore 32 bit registers	*/ \
-	lwz		rI1,100(r1);					   \
-	lwz		rI2,104(r1);					   \
-	lwz		rI3,108(r1);
-
-#define SAVE_8_REGS \
-	SAVE_4_REGS							   \
-	stw		rG0,112(r1);	/* save 32 bit registers	*/ \
-	stw		rG1,116(r1);					   \
-	stw		rG2,120(r1);					   \
-	stw		rG3,124(r1);
-
-#define LOAD_8_REGS \
-	LOAD_4_REGS							   \
-	lwz		rG0,112(r1);	/* restore 32 bit registers	*/ \
-	lwz		rG1,116(r1);					   \
-	lwz		rG2,120(r1);					   \
-	lwz		rG3,124(r1);
-
-#define INITIALIZE_CRYPT(tab,nr32bitregs) \
-	mflr		r0;						   \
-	stwu		r1,-160(r1);	/* create stack frame		*/ \
-	lis		rT0,tab@h;	/* en-/decryption table pointer	*/ \
-	stw		r0,8(r1);	/* save link register		*/ \
-	ori		rT0,rT0,tab@l;					   \
-	evstdw		r14,16(r1);					   \
-	mr		rKS,rKP;					   \
-	evstdw		r15,24(r1);	/* We must save non volatile	*/ \
-	evstdw		r16,32(r1);	/* registers. Take the chance	*/ \
-	evstdw		r17,40(r1);	/* and save the SPE part too	*/ \
-	evstdw		r18,48(r1);					   \
-	evstdw		r19,56(r1);					   \
-	evstdw		r20,64(r1);					   \
-	evstdw		r21,72(r1);					   \
-	evstdw		r22,80(r1);					   \
-	evstdw		r23,88(r1);					   \
-	SAVE_##nr32bitregs##_REGS
-
-#define FINALIZE_CRYPT(nr32bitregs) \
-	lwz		r0,8(r1);					   \
-	evldw		r14,16(r1);	/* restore SPE registers	*/ \
-	evldw		r15,24(r1);					   \
-	evldw		r16,32(r1);					   \
-	evldw		r17,40(r1);					   \
-	evldw		r18,48(r1);					   \
-	evldw		r19,56(r1);					   \
-	evldw		r20,64(r1);					   \
-	evldw		r21,72(r1);					   \
-	evldw		r22,80(r1);					   \
-	evldw		r23,88(r1);					   \
-	LOAD_##nr32bitregs##_REGS					   \
-	mtlr		r0;		/* restore link register	*/ \
-	xor		r0,r0,r0;					   \
-	stw		r0,16(r1);	/* delete sensitive data	*/ \
-	stw		r0,24(r1);	/* that we might have pushed	*/ \
-	stw		r0,32(r1);	/* from other context that runs	*/ \
-	stw		r0,40(r1);	/* the same code		*/ \
-	stw		r0,48(r1);					   \
-	stw		r0,56(r1);					   \
-	stw		r0,64(r1);					   \
-	stw		r0,72(r1);					   \
-	stw		r0,80(r1);					   \
-	stw		r0,88(r1);					   \
-	addi		r1,r1,160;	/* cleanup stack frame		*/
-
-#define ENDIAN_SWAP(t0, t1, s0, s1) \
-	rotrwi		t0,s0,8;	/* swap endianness for 2 GPRs	*/ \
-	rotrwi		t1,s1,8;					   \
-	rlwimi		t0,s0,8,8,15;					   \
-	rlwimi		t1,s1,8,8,15;					   \
-	rlwimi		t0,s0,8,24,31;					   \
-	rlwimi		t1,s1,8,24,31;
-
-#define GF128_MUL(d0, d1, d2, d3, t0) \
-	li		t0,0x87;	/* multiplication in GF128	*/ \
-	cmpwi		d3,-1;						   \
-	iselgt		t0,0,t0;					   \
-	rlwimi		d3,d2,0,0,0;	/* propagate "carry" bits	*/ \
-	rotlwi		d3,d3,1;					   \
-	rlwimi		d2,d1,0,0,0;					   \
-	rotlwi		d2,d2,1;					   \
-	rlwimi		d1,d0,0,0,0;					   \
-	slwi		d0,d0,1;	/* shift left 128 bit		*/ \
-	rotlwi		d1,d1,1;					   \
-	xor		d0,d0,t0;
-
-#define START_KEY(d0, d1, d2, d3) \
-	lwz		rW0,0(rKP);					   \
-	mtctr		rRR;						   \
-	lwz		rW1,4(rKP);					   \
-	lwz		rW2,8(rKP);					   \
-	lwz		rW3,12(rKP);					   \
-	xor		rD0,d0,rW0;					   \
-	xor		rD1,d1,rW1;					   \
-	xor		rD2,d2,rW2;					   \
-	xor		rD3,d3,rW3;
-
-/*
- * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
- *		   u32 rounds)
- *
- * called from glue layer to encrypt a single 16 byte block
- * round values are AES128 = 4, AES192 = 5, AES256 = 6
- *
- */
-_GLOBAL(ppc_encrypt_aes)
-	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
-	LOAD_DATA(rD0, 0)
-	LOAD_DATA(rD1, 4)
-	LOAD_DATA(rD2, 8)
-	LOAD_DATA(rD3, 12)
-	START_KEY(rD0, rD1, rD2, rD3)
-	bl		ppc_encrypt_block
-	xor		rD0,rD0,rW0
-	SAVE_DATA(rD0, 0)
-	xor		rD1,rD1,rW1
-	SAVE_DATA(rD1, 4)
-	xor		rD2,rD2,rW2
-	SAVE_DATA(rD2, 8)
-	xor		rD3,rD3,rW3
-	SAVE_DATA(rD3, 12)
-	FINALIZE_CRYPT(0)
-	blr
-
-/*
- * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
- *		   u32 rounds)
- *
- * called from glue layer to decrypt a single 16 byte block
- * round values are AES128 = 4, AES192 = 5, AES256 = 6
- *
- */
-_GLOBAL(ppc_decrypt_aes)
-	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
-	LOAD_DATA(rD0, 0)
-	addi		rT1,rT0,4096
-	LOAD_DATA(rD1, 4)
-	LOAD_DATA(rD2, 8)
-	LOAD_DATA(rD3, 12)
-	START_KEY(rD0, rD1, rD2, rD3)
-	bl		ppc_decrypt_block
-	xor		rD0,rD0,rW0
-	SAVE_DATA(rD0, 0)
-	xor		rD1,rD1,rW1
-	SAVE_DATA(rD1, 4)
-	xor		rD2,rD2,rW2
-	SAVE_DATA(rD2, 8)
-	xor		rD3,rD3,rW3
-	SAVE_DATA(rD3, 12)
-	FINALIZE_CRYPT(0)
-	blr
-
-/*
- * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
- *		   u32 rounds, u32 bytes);
- *
- * called from glue layer to encrypt multiple blocks via ECB
- * Bytes must be larger or equal 16 and only whole blocks are
- * processed. round values are AES128 = 4, AES192 = 5 and
- * AES256 = 6
- *
- */
-_GLOBAL(ppc_encrypt_ecb)
-	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
-ppc_encrypt_ecb_loop:
-	LOAD_DATA(rD0, 0)
-	mr		rKP,rKS
-	LOAD_DATA(rD1, 4)
-	subi		rLN,rLN,16
-	LOAD_DATA(rD2, 8)
-	cmpwi		rLN,15
-	LOAD_DATA(rD3, 12)
-	START_KEY(rD0, rD1, rD2, rD3)
-	bl		ppc_encrypt_block
-	xor		rD0,rD0,rW0
-	SAVE_DATA(rD0, 0)
-	xor		rD1,rD1,rW1
-	SAVE_DATA(rD1, 4)
-	xor		rD2,rD2,rW2
-	SAVE_DATA(rD2, 8)
-	xor		rD3,rD3,rW3
-	SAVE_DATA(rD3, 12)
-	NEXT_BLOCK
-	bt		gt,ppc_encrypt_ecb_loop
-	FINALIZE_CRYPT(0)
-	blr
-
-/*
- * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
- *		   u32 rounds, u32 bytes);
- *
- * called from glue layer to decrypt multiple blocks via ECB
- * Bytes must be larger or equal 16 and only whole blocks are
- * processed. round values are AES128 = 4, AES192 = 5 and
- * AES256 = 6
- *
- */
-_GLOBAL(ppc_decrypt_ecb)
-	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
-	addi		rT1,rT0,4096
-ppc_decrypt_ecb_loop:
-	LOAD_DATA(rD0, 0)
-	mr		rKP,rKS
-	LOAD_DATA(rD1, 4)
-	subi		rLN,rLN,16
-	LOAD_DATA(rD2, 8)
-	cmpwi		rLN,15
-	LOAD_DATA(rD3, 12)
-	START_KEY(rD0, rD1, rD2, rD3)
-	bl		ppc_decrypt_block
-	xor		rD0,rD0,rW0
-	SAVE_DATA(rD0, 0)
-	xor		rD1,rD1,rW1
-	SAVE_DATA(rD1, 4)
-	xor		rD2,rD2,rW2
-	SAVE_DATA(rD2, 8)
-	xor		rD3,rD3,rW3
-	SAVE_DATA(rD3, 12)
-	NEXT_BLOCK
-	bt		gt,ppc_decrypt_ecb_loop
-	FINALIZE_CRYPT(0)
-	blr
-
-/*
- * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
- *		   32 rounds, u32 bytes, u8 *iv);
- *
- * called from glue layer to encrypt multiple blocks via CBC
- * Bytes must be larger or equal 16 and only whole blocks are
- * processed. round values are AES128 = 4, AES192 = 5 and
- * AES256 = 6
- *
- */
-_GLOBAL(ppc_encrypt_cbc)
-	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
-	LOAD_IV(rI0, 0)
-	LOAD_IV(rI1, 4)
-	LOAD_IV(rI2, 8)
-	LOAD_IV(rI3, 12)
-ppc_encrypt_cbc_loop:
-	LOAD_DATA(rD0, 0)
-	mr		rKP,rKS
-	LOAD_DATA(rD1, 4)
-	subi		rLN,rLN,16
-	LOAD_DATA(rD2, 8)
-	cmpwi		rLN,15
-	LOAD_DATA(rD3, 12)
-	xor		rD0,rD0,rI0
-	xor		rD1,rD1,rI1
-	xor		rD2,rD2,rI2
-	xor		rD3,rD3,rI3
-	START_KEY(rD0, rD1, rD2, rD3)
-	bl		ppc_encrypt_block
-	xor		rI0,rD0,rW0
-	SAVE_DATA(rI0, 0)
-	xor		rI1,rD1,rW1
-	SAVE_DATA(rI1, 4)
-	xor		rI2,rD2,rW2
-	SAVE_DATA(rI2, 8)
-	xor		rI3,rD3,rW3
-	SAVE_DATA(rI3, 12)
-	NEXT_BLOCK
-	bt		gt,ppc_encrypt_cbc_loop
-	START_IV
-	SAVE_IV(rI0, 0)
-	SAVE_IV(rI1, 4)
-	SAVE_IV(rI2, 8)
-	SAVE_IV(rI3, 12)
-	FINALIZE_CRYPT(4)
-	blr
-
-/*
- * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
- *		   u32 rounds, u32 bytes, u8 *iv);
- *
- * called from glue layer to decrypt multiple blocks via CBC
- * round values are AES128 = 4, AES192 = 5, AES256 = 6
- *
- */
-_GLOBAL(ppc_decrypt_cbc)
-	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
-	li		rT1,15
-	LOAD_IV(rI0, 0)
-	andc		rLN,rLN,rT1
-	LOAD_IV(rI1, 4)
-	subi		rLN,rLN,16
-	LOAD_IV(rI2, 8)
-	add		rSP,rSP,rLN	/* reverse processing		*/
-	LOAD_IV(rI3, 12)
-	add		rDP,rDP,rLN
-	LOAD_DATA(rD0, 0)
-	addi		rT1,rT0,4096
-	LOAD_DATA(rD1, 4)
-	LOAD_DATA(rD2, 8)
-	LOAD_DATA(rD3, 12)
-	START_IV
-	SAVE_IV(rD0, 0)
-	SAVE_IV(rD1, 4)
-	SAVE_IV(rD2, 8)
-	cmpwi		rLN,16
-	SAVE_IV(rD3, 12)
-	bt		lt,ppc_decrypt_cbc_end
-ppc_decrypt_cbc_loop:
-	mr		rKP,rKS
-	START_KEY(rD0, rD1, rD2, rD3)
-	bl		ppc_decrypt_block
-	subi		rLN,rLN,16
-	subi		rSP,rSP,CBC_DEC
-	xor		rW0,rD0,rW0
-	LOAD_DATA(rD0, 0)
-	xor		rW1,rD1,rW1
-	LOAD_DATA(rD1, 4)
-	xor		rW2,rD2,rW2
-	LOAD_DATA(rD2, 8)
-	xor		rW3,rD3,rW3
-	LOAD_DATA(rD3, 12)
-	xor		rW0,rW0,rD0
-	SAVE_DATA(rW0, 0)
-	xor		rW1,rW1,rD1
-	SAVE_DATA(rW1, 4)
-	xor		rW2,rW2,rD2
-	SAVE_DATA(rW2, 8)
-	xor		rW3,rW3,rD3
-	SAVE_DATA(rW3, 12)
-	cmpwi		rLN,15
-	subi		rDP,rDP,CBC_DEC
-	bt		gt,ppc_decrypt_cbc_loop
-ppc_decrypt_cbc_end:
-	mr		rKP,rKS
-	START_KEY(rD0, rD1, rD2, rD3)
-	bl		ppc_decrypt_block
-	xor		rW0,rW0,rD0
-	xor		rW1,rW1,rD1
-	xor		rW2,rW2,rD2
-	xor		rW3,rW3,rD3
-	xor		rW0,rW0,rI0	/* decrypt with initial IV	*/
-	SAVE_DATA(rW0, 0)
-	xor		rW1,rW1,rI1
-	SAVE_DATA(rW1, 4)
-	xor		rW2,rW2,rI2
-	SAVE_DATA(rW2, 8)
-	xor		rW3,rW3,rI3
-	SAVE_DATA(rW3, 12)
-	FINALIZE_CRYPT(4)
-	blr
-
-/*
- * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
- *		 u32 rounds, u32 bytes, u8 *iv);
- *
- * called from glue layer to encrypt/decrypt multiple blocks
- * via CTR. Number of bytes does not need to be a multiple of
- * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
- *
- */
-_GLOBAL(ppc_crypt_ctr)
-	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
-	LOAD_IV(rI0, 0)
-	LOAD_IV(rI1, 4)
-	LOAD_IV(rI2, 8)
-	cmpwi		rLN,16
-	LOAD_IV(rI3, 12)
-	START_IV
-	bt		lt,ppc_crypt_ctr_partial
-ppc_crypt_ctr_loop:
-	mr		rKP,rKS
-	START_KEY(rI0, rI1, rI2, rI3)
-	bl		ppc_encrypt_block
-	xor		rW0,rD0,rW0
-	xor		rW1,rD1,rW1
-	xor		rW2,rD2,rW2
-	xor		rW3,rD3,rW3
-	LOAD_DATA(rD0, 0)
-	subi		rLN,rLN,16
-	LOAD_DATA(rD1, 4)
-	LOAD_DATA(rD2, 8)
-	LOAD_DATA(rD3, 12)
-	xor		rD0,rD0,rW0
-	SAVE_DATA(rD0, 0)
-	xor		rD1,rD1,rW1
-	SAVE_DATA(rD1, 4)
-	xor		rD2,rD2,rW2
-	SAVE_DATA(rD2, 8)
-	xor		rD3,rD3,rW3
-	SAVE_DATA(rD3, 12)
-	addic		rI3,rI3,1	/* increase counter			*/
-	addze		rI2,rI2
-	addze		rI1,rI1
-	addze		rI0,rI0
-	NEXT_BLOCK
-	cmpwi		rLN,15
-	bt		gt,ppc_crypt_ctr_loop
-ppc_crypt_ctr_partial:
-	cmpwi		rLN,0
-	bt		eq,ppc_crypt_ctr_end
-	mr		rKP,rKS
-	START_KEY(rI0, rI1, rI2, rI3)
-	bl		ppc_encrypt_block
-	xor		rW0,rD0,rW0
-	SAVE_IV(rW0, 0)
-	xor		rW1,rD1,rW1
-	SAVE_IV(rW1, 4)
-	xor		rW2,rD2,rW2
-	SAVE_IV(rW2, 8)
-	xor		rW3,rD3,rW3
-	SAVE_IV(rW3, 12)
-	mtctr		rLN
-	subi		rIP,rIP,CTR_DEC
-	subi		rSP,rSP,1
-	subi		rDP,rDP,1
-ppc_crypt_ctr_xorbyte:
-	lbzu		rW4,1(rIP)	/* bytewise xor for partial block	*/
-	lbzu		rW5,1(rSP)
-	xor		rW4,rW4,rW5
-	stbu		rW4,1(rDP)
-	bdnz		ppc_crypt_ctr_xorbyte
-	subf		rIP,rLN,rIP
-	addi		rIP,rIP,1
-	addic		rI3,rI3,1
-	addze		rI2,rI2
-	addze		rI1,rI1
-	addze		rI0,rI0
-ppc_crypt_ctr_end:
-	SAVE_IV(rI0, 0)
-	SAVE_IV(rI1, 4)
-	SAVE_IV(rI2, 8)
-	SAVE_IV(rI3, 12)
-	FINALIZE_CRYPT(4)
-	blr
-
-/*
- * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
- *		   u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
- *
- * called from glue layer to encrypt multiple blocks via XTS
- * If key_twk is given, the initial IV encryption will be
- * processed too. Round values are AES128 = 4, AES192 = 5,
- * AES256 = 6
- *
- */
-_GLOBAL(ppc_encrypt_xts)
-	INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
-	LOAD_IV(rI0, 0)
-	LOAD_IV(rI1, 4)
-	LOAD_IV(rI2, 8)
-	cmpwi		rKT,0
-	LOAD_IV(rI3, 12)
-	bt		eq,ppc_encrypt_xts_notweak
-	mr		rKP,rKT
-	START_KEY(rI0, rI1, rI2, rI3)
-	bl		ppc_encrypt_block
-	xor		rI0,rD0,rW0
-	xor		rI1,rD1,rW1
-	xor		rI2,rD2,rW2
-	xor		rI3,rD3,rW3
-ppc_encrypt_xts_notweak:
-	ENDIAN_SWAP(rG0, rG1, rI0, rI1)
-	ENDIAN_SWAP(rG2, rG3, rI2, rI3)
-ppc_encrypt_xts_loop:
-	LOAD_DATA(rD0, 0)
-	mr		rKP,rKS
-	LOAD_DATA(rD1, 4)
-	subi		rLN,rLN,16
-	LOAD_DATA(rD2, 8)
-	LOAD_DATA(rD3, 12)
-	xor		rD0,rD0,rI0
-	xor		rD1,rD1,rI1
-	xor		rD2,rD2,rI2
-	xor		rD3,rD3,rI3
-	START_KEY(rD0, rD1, rD2, rD3)
-	bl		ppc_encrypt_block
-	xor		rD0,rD0,rW0
-	xor		rD1,rD1,rW1
-	xor		rD2,rD2,rW2
-	xor		rD3,rD3,rW3
-	xor		rD0,rD0,rI0
-	SAVE_DATA(rD0, 0)
-	xor		rD1,rD1,rI1
-	SAVE_DATA(rD1, 4)
-	xor		rD2,rD2,rI2
-	SAVE_DATA(rD2, 8)
-	xor		rD3,rD3,rI3
-	SAVE_DATA(rD3, 12)
-	GF128_MUL(rG0, rG1, rG2, rG3, rW0)
-	ENDIAN_SWAP(rI0, rI1, rG0, rG1)
-	ENDIAN_SWAP(rI2, rI3, rG2, rG3)
-	cmpwi		rLN,0
-	NEXT_BLOCK
-	bt		gt,ppc_encrypt_xts_loop
-	START_IV
-	SAVE_IV(rI0, 0)
-	SAVE_IV(rI1, 4)
-	SAVE_IV(rI2, 8)
-	SAVE_IV(rI3, 12)
-	FINALIZE_CRYPT(8)
-	blr
-
-/*
- * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
- *		   u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
- *
- * called from glue layer to decrypt multiple blocks via XTS
- * If key_twk is given, the initial IV encryption will be
- * processed too. Round values are AES128 = 4, AES192 = 5,
- * AES256 = 6
- *
- */
-_GLOBAL(ppc_decrypt_xts)
-	INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
-	LOAD_IV(rI0, 0)
-	addi		rT1,rT0,4096
-	LOAD_IV(rI1, 4)
-	LOAD_IV(rI2, 8)
-	cmpwi		rKT,0
-	LOAD_IV(rI3, 12)
-	bt		eq,ppc_decrypt_xts_notweak
-	subi		rT0,rT0,4096
-	mr		rKP,rKT
-	START_KEY(rI0, rI1, rI2, rI3)
-	bl		ppc_encrypt_block
-	xor		rI0,rD0,rW0
-	xor		rI1,rD1,rW1
-	xor		rI2,rD2,rW2
-	xor		rI3,rD3,rW3
-	addi		rT0,rT0,4096
-ppc_decrypt_xts_notweak:
-	ENDIAN_SWAP(rG0, rG1, rI0, rI1)
-	ENDIAN_SWAP(rG2, rG3, rI2, rI3)
-ppc_decrypt_xts_loop:
-	LOAD_DATA(rD0, 0)
-	mr		rKP,rKS
-	LOAD_DATA(rD1, 4)
-	subi		rLN,rLN,16
-	LOAD_DATA(rD2, 8)
-	LOAD_DATA(rD3, 12)
-	xor		rD0,rD0,rI0
-	xor		rD1,rD1,rI1
-	xor		rD2,rD2,rI2
-	xor		rD3,rD3,rI3
-	START_KEY(rD0, rD1, rD2, rD3)
-	bl		ppc_decrypt_block
-	xor		rD0,rD0,rW0
-	xor		rD1,rD1,rW1
-	xor		rD2,rD2,rW2
-	xor		rD3,rD3,rW3
-	xor		rD0,rD0,rI0
-	SAVE_DATA(rD0, 0)
-	xor		rD1,rD1,rI1
-	SAVE_DATA(rD1, 4)
-	xor		rD2,rD2,rI2
-	SAVE_DATA(rD2, 8)
-	xor		rD3,rD3,rI3
-	SAVE_DATA(rD3, 12)
-	GF128_MUL(rG0, rG1, rG2, rG3, rW0)
-	ENDIAN_SWAP(rI0, rI1, rG0, rG1)
-	ENDIAN_SWAP(rI2, rI3, rG2, rG3)
-	cmpwi		rLN,0
-	NEXT_BLOCK
-	bt		gt,ppc_decrypt_xts_loop
-	START_IV
-	SAVE_IV(rI0, 0)
-	SAVE_IV(rI1, 4)
-	SAVE_IV(rI2, 8)
-	SAVE_IV(rI3, 12)
-	FINALIZE_CRYPT(8)
-	blr
diff --git a/arch/powerpc/crypto/aes-tab-4k.S b/arch/powerpc/crypto/aes-tab-4k.S
deleted file mode 100644
index ceb604bc6f72375499c4eaa81d7cba968da2a678..0000000000000000000000000000000000000000
--- a/arch/powerpc/crypto/aes-tab-4k.S
+++ /dev/null
@@ -1,326 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * 4K AES tables for PPC AES implementation
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- */
-
-/*
- * These big endian AES encryption/decryption tables have been taken from
- * crypto/aes_generic.c and are designed to be simply accessed by a combination
- * of rlwimi/lwz instructions with a minimum of table registers (usually only
- * one required). Thus they are aligned to 4K. The locality of rotated values
- * is derived from the reduced offsets that are available in the SPE load
- * instructions. E.g. evldw, evlwwsplat, ...
- *
- * For the safety-conscious it has to be noted that they might be vulnerable
- * to cache timing attacks because of their size. Nevertheless in contrast to
- * the generic tables they have been reduced from 16KB to 8KB + 256 bytes.
- * This is a quite good tradeoff for low power devices (e.g. routers) without
- * dedicated encryption hardware where we usually have no multiuser
- * environment.
- *
- */
-
-#define R(a, b, c, d) \
-	0x##a##b##c##d, 0x##d##a##b##c, 0x##c##d##a##b, 0x##b##c##d##a
-
-.data
-.align 12
-.globl PPC_AES_4K_ENCTAB
-PPC_AES_4K_ENCTAB:
-/* encryption table, same as crypto_ft_tab in crypto/aes-generic.c */
-	.long R(c6, 63, 63, a5), R(f8, 7c, 7c, 84)
-	.long R(ee, 77, 77, 99), R(f6, 7b, 7b, 8d)
-	.long R(ff, f2, f2, 0d), R(d6, 6b, 6b, bd)
-	.long R(de, 6f, 6f, b1), R(91, c5, c5, 54)
-	.long R(60, 30, 30, 50), R(02, 01, 01, 03)
-	.long R(ce, 67, 67, a9), R(56, 2b, 2b, 7d)
-	.long R(e7, fe, fe, 19), R(b5, d7, d7, 62)
-	.long R(4d, ab, ab, e6), R(ec, 76, 76, 9a)
-	.long R(8f, ca, ca, 45), R(1f, 82, 82, 9d)
-	.long R(89, c9, c9, 40), R(fa, 7d, 7d, 87)
-	.long R(ef, fa, fa, 15), R(b2, 59, 59, eb)
-	.long R(8e, 47, 47, c9), R(fb, f0, f0, 0b)
-	.long R(41, ad, ad, ec), R(b3, d4, d4, 67)
-	.long R(5f, a2, a2, fd), R(45, af, af, ea)
-	.long R(23, 9c, 9c, bf), R(53, a4, a4, f7)
-	.long R(e4, 72, 72, 96), R(9b, c0, c0, 5b)
-	.long R(75, b7, b7, c2), R(e1, fd, fd, 1c)
-	.long R(3d, 93, 93, ae), R(4c, 26, 26, 6a)
-	.long R(6c, 36, 36, 5a), R(7e, 3f, 3f, 41)
-	.long R(f5, f7, f7, 02), R(83, cc, cc, 4f)
-	.long R(68, 34, 34, 5c), R(51, a5, a5, f4)
-	.long R(d1, e5, e5, 34), R(f9, f1, f1, 08)
-	.long R(e2, 71, 71, 93), R(ab, d8, d8, 73)
-	.long R(62, 31, 31, 53), R(2a, 15, 15, 3f)
-	.long R(08, 04, 04, 0c), R(95, c7, c7, 52)
-	.long R(46, 23, 23, 65), R(9d, c3, c3, 5e)
-	.long R(30, 18, 18, 28), R(37, 96, 96, a1)
-	.long R(0a, 05, 05, 0f), R(2f, 9a, 9a, b5)
-	.long R(0e, 07, 07, 09), R(24, 12, 12, 36)
-	.long R(1b, 80, 80, 9b), R(df, e2, e2, 3d)
-	.long R(cd, eb, eb, 26), R(4e, 27, 27, 69)
-	.long R(7f, b2, b2, cd), R(ea, 75, 75, 9f)
-	.long R(12, 09, 09, 1b), R(1d, 83, 83, 9e)
-	.long R(58, 2c, 2c, 74), R(34, 1a, 1a, 2e)
-	.long R(36, 1b, 1b, 2d), R(dc, 6e, 6e, b2)
-	.long R(b4, 5a, 5a, ee), R(5b, a0, a0, fb)
-	.long R(a4, 52, 52, f6), R(76, 3b, 3b, 4d)
-	.long R(b7, d6, d6, 61), R(7d, b3, b3, ce)
-	.long R(52, 29, 29, 7b), R(dd, e3, e3, 3e)
-	.long R(5e, 2f, 2f, 71), R(13, 84, 84, 97)
-	.long R(a6, 53, 53, f5), R(b9, d1, d1, 68)
-	.long R(00, 00, 00, 00), R(c1, ed, ed, 2c)
-	.long R(40, 20, 20, 60), R(e3, fc, fc, 1f)
-	.long R(79, b1, b1, c8), R(b6, 5b, 5b, ed)
-	.long R(d4, 6a, 6a, be), R(8d, cb, cb, 46)
-	.long R(67, be, be, d9), R(72, 39, 39, 4b)
-	.long R(94, 4a, 4a, de), R(98, 4c, 4c, d4)
-	.long R(b0, 58, 58, e8), R(85, cf, cf, 4a)
-	.long R(bb, d0, d0, 6b), R(c5, ef, ef, 2a)
-	.long R(4f, aa, aa, e5), R(ed, fb, fb, 16)
-	.long R(86, 43, 43, c5), R(9a, 4d, 4d, d7)
-	.long R(66, 33, 33, 55), R(11, 85, 85, 94)
-	.long R(8a, 45, 45, cf), R(e9, f9, f9, 10)
-	.long R(04, 02, 02, 06), R(fe, 7f, 7f, 81)
-	.long R(a0, 50, 50, f0), R(78, 3c, 3c, 44)
-	.long R(25, 9f, 9f, ba), R(4b, a8, a8, e3)
-	.long R(a2, 51, 51, f3), R(5d, a3, a3, fe)
-	.long R(80, 40, 40, c0), R(05, 8f, 8f, 8a)
-	.long R(3f, 92, 92, ad), R(21, 9d, 9d, bc)
-	.long R(70, 38, 38, 48), R(f1, f5, f5, 04)
-	.long R(63, bc, bc, df), R(77, b6, b6, c1)
-	.long R(af, da, da, 75), R(42, 21, 21, 63)
-	.long R(20, 10, 10, 30), R(e5, ff, ff, 1a)
-	.long R(fd, f3, f3, 0e), R(bf, d2, d2, 6d)
-	.long R(81, cd, cd, 4c), R(18, 0c, 0c, 14)
-	.long R(26, 13, 13, 35), R(c3, ec, ec, 2f)
-	.long R(be, 5f, 5f, e1), R(35, 97, 97, a2)
-	.long R(88, 44, 44, cc), R(2e, 17, 17, 39)
-	.long R(93, c4, c4, 57), R(55, a7, a7, f2)
-	.long R(fc, 7e, 7e, 82), R(7a, 3d, 3d, 47)
-	.long R(c8, 64, 64, ac), R(ba, 5d, 5d, e7)
-	.long R(32, 19, 19, 2b), R(e6, 73, 73, 95)
-	.long R(c0, 60, 60, a0), R(19, 81, 81, 98)
-	.long R(9e, 4f, 4f, d1), R(a3, dc, dc, 7f)
-	.long R(44, 22, 22, 66), R(54, 2a, 2a, 7e)
-	.long R(3b, 90, 90, ab), R(0b, 88, 88, 83)
-	.long R(8c, 46, 46, ca), R(c7, ee, ee, 29)
-	.long R(6b, b8, b8, d3), R(28, 14, 14, 3c)
-	.long R(a7, de, de, 79), R(bc, 5e, 5e, e2)
-	.long R(16, 0b, 0b, 1d), R(ad, db, db, 76)
-	.long R(db, e0, e0, 3b), R(64, 32, 32, 56)
-	.long R(74, 3a, 3a, 4e), R(14, 0a, 0a, 1e)
-	.long R(92, 49, 49, db), R(0c, 06, 06, 0a)
-	.long R(48, 24, 24, 6c), R(b8, 5c, 5c, e4)
-	.long R(9f, c2, c2, 5d), R(bd, d3, d3, 6e)
-	.long R(43, ac, ac, ef), R(c4, 62, 62, a6)
-	.long R(39, 91, 91, a8), R(31, 95, 95, a4)
-	.long R(d3, e4, e4, 37), R(f2, 79, 79, 8b)
-	.long R(d5, e7, e7, 32), R(8b, c8, c8, 43)
-	.long R(6e, 37, 37, 59), R(da, 6d, 6d, b7)
-	.long R(01, 8d, 8d, 8c), R(b1, d5, d5, 64)
-	.long R(9c, 4e, 4e, d2), R(49, a9, a9, e0)
-	.long R(d8, 6c, 6c, b4), R(ac, 56, 56, fa)
-	.long R(f3, f4, f4, 07), R(cf, ea, ea, 25)
-	.long R(ca, 65, 65, af), R(f4, 7a, 7a, 8e)
-	.long R(47, ae, ae, e9), R(10, 08, 08, 18)
-	.long R(6f, ba, ba, d5), R(f0, 78, 78, 88)
-	.long R(4a, 25, 25, 6f), R(5c, 2e, 2e, 72)
-	.long R(38, 1c, 1c, 24), R(57, a6, a6, f1)
-	.long R(73, b4, b4, c7), R(97, c6, c6, 51)
-	.long R(cb, e8, e8, 23), R(a1, dd, dd, 7c)
-	.long R(e8, 74, 74, 9c), R(3e, 1f, 1f, 21)
-	.long R(96, 4b, 4b, dd), R(61, bd, bd, dc)
-	.long R(0d, 8b, 8b, 86), R(0f, 8a, 8a, 85)
-	.long R(e0, 70, 70, 90), R(7c, 3e, 3e, 42)
-	.long R(71, b5, b5, c4), R(cc, 66, 66, aa)
-	.long R(90, 48, 48, d8), R(06, 03, 03, 05)
-	.long R(f7, f6, f6, 01), R(1c, 0e, 0e, 12)
-	.long R(c2, 61, 61, a3), R(6a, 35, 35, 5f)
-	.long R(ae, 57, 57, f9), R(69, b9, b9, d0)
-	.long R(17, 86, 86, 91), R(99, c1, c1, 58)
-	.long R(3a, 1d, 1d, 27), R(27, 9e, 9e, b9)
-	.long R(d9, e1, e1, 38), R(eb, f8, f8, 13)
-	.long R(2b, 98, 98, b3), R(22, 11, 11, 33)
-	.long R(d2, 69, 69, bb), R(a9, d9, d9, 70)
-	.long R(07, 8e, 8e, 89), R(33, 94, 94, a7)
-	.long R(2d, 9b, 9b, b6), R(3c, 1e, 1e, 22)
-	.long R(15, 87, 87, 92), R(c9, e9, e9, 20)
-	.long R(87, ce, ce, 49), R(aa, 55, 55, ff)
-	.long R(50, 28, 28, 78), R(a5, df, df, 7a)
-	.long R(03, 8c, 8c, 8f), R(59, a1, a1, f8)
-	.long R(09, 89, 89, 80), R(1a, 0d, 0d, 17)
-	.long R(65, bf, bf, da), R(d7, e6, e6, 31)
-	.long R(84, 42, 42, c6), R(d0, 68, 68, b8)
-	.long R(82, 41, 41, c3), R(29, 99, 99, b0)
-	.long R(5a, 2d, 2d, 77), R(1e, 0f, 0f, 11)
-	.long R(7b, b0, b0, cb), R(a8, 54, 54, fc)
-	.long R(6d, bb, bb, d6), R(2c, 16, 16, 3a)
-.globl PPC_AES_4K_DECTAB
-PPC_AES_4K_DECTAB:
-/* decryption table, same as crypto_it_tab in crypto/aes-generic.c */
-	.long R(51, f4, a7, 50), R(7e, 41, 65, 53)
-	.long R(1a, 17, a4, c3), R(3a, 27, 5e, 96)
-	.long R(3b, ab, 6b, cb), R(1f, 9d, 45, f1)
-	.long R(ac, fa, 58, ab), R(4b, e3, 03, 93)
-	.long R(20, 30, fa, 55), R(ad, 76, 6d, f6)
-	.long R(88, cc, 76, 91), R(f5, 02, 4c, 25)
-	.long R(4f, e5, d7, fc), R(c5, 2a, cb, d7)
-	.long R(26, 35, 44, 80), R(b5, 62, a3, 8f)
-	.long R(de, b1, 5a, 49), R(25, ba, 1b, 67)
-	.long R(45, ea, 0e, 98), R(5d, fe, c0, e1)
-	.long R(c3, 2f, 75, 02), R(81, 4c, f0, 12)
-	.long R(8d, 46, 97, a3), R(6b, d3, f9, c6)
-	.long R(03, 8f, 5f, e7), R(15, 92, 9c, 95)
-	.long R(bf, 6d, 7a, eb), R(95, 52, 59, da)
-	.long R(d4, be, 83, 2d), R(58, 74, 21, d3)
-	.long R(49, e0, 69, 29), R(8e, c9, c8, 44)
-	.long R(75, c2, 89, 6a), R(f4, 8e, 79, 78)
-	.long R(99, 58, 3e, 6b), R(27, b9, 71, dd)
-	.long R(be, e1, 4f, b6), R(f0, 88, ad, 17)
-	.long R(c9, 20, ac, 66), R(7d, ce, 3a, b4)
-	.long R(63, df, 4a, 18), R(e5, 1a, 31, 82)
-	.long R(97, 51, 33, 60), R(62, 53, 7f, 45)
-	.long R(b1, 64, 77, e0), R(bb, 6b, ae, 84)
-	.long R(fe, 81, a0, 1c), R(f9, 08, 2b, 94)
-	.long R(70, 48, 68, 58), R(8f, 45, fd, 19)
-	.long R(94, de, 6c, 87), R(52, 7b, f8, b7)
-	.long R(ab, 73, d3, 23), R(72, 4b, 02, e2)
-	.long R(e3, 1f, 8f, 57), R(66, 55, ab, 2a)
-	.long R(b2, eb, 28, 07), R(2f, b5, c2, 03)
-	.long R(86, c5, 7b, 9a), R(d3, 37, 08, a5)
-	.long R(30, 28, 87, f2), R(23, bf, a5, b2)
-	.long R(02, 03, 6a, ba), R(ed, 16, 82, 5c)
-	.long R(8a, cf, 1c, 2b), R(a7, 79, b4, 92)
-	.long R(f3, 07, f2, f0), R(4e, 69, e2, a1)
-	.long R(65, da, f4, cd), R(06, 05, be, d5)
-	.long R(d1, 34, 62, 1f), R(c4, a6, fe, 8a)
-	.long R(34, 2e, 53, 9d), R(a2, f3, 55, a0)
-	.long R(05, 8a, e1, 32), R(a4, f6, eb, 75)
-	.long R(0b, 83, ec, 39), R(40, 60, ef, aa)
-	.long R(5e, 71, 9f, 06), R(bd, 6e, 10, 51)
-	.long R(3e, 21, 8a, f9), R(96, dd, 06, 3d)
-	.long R(dd, 3e, 05, ae), R(4d, e6, bd, 46)
-	.long R(91, 54, 8d, b5), R(71, c4, 5d, 05)
-	.long R(04, 06, d4, 6f), R(60, 50, 15, ff)
-	.long R(19, 98, fb, 24), R(d6, bd, e9, 97)
-	.long R(89, 40, 43, cc), R(67, d9, 9e, 77)
-	.long R(b0, e8, 42, bd), R(07, 89, 8b, 88)
-	.long R(e7, 19, 5b, 38), R(79, c8, ee, db)
-	.long R(a1, 7c, 0a, 47), R(7c, 42, 0f, e9)
-	.long R(f8, 84, 1e, c9), R(00, 00, 00, 00)
-	.long R(09, 80, 86, 83), R(32, 2b, ed, 48)
-	.long R(1e, 11, 70, ac), R(6c, 5a, 72, 4e)
-	.long R(fd, 0e, ff, fb), R(0f, 85, 38, 56)
-	.long R(3d, ae, d5, 1e), R(36, 2d, 39, 27)
-	.long R(0a, 0f, d9, 64), R(68, 5c, a6, 21)
-	.long R(9b, 5b, 54, d1), R(24, 36, 2e, 3a)
-	.long R(0c, 0a, 67, b1), R(93, 57, e7, 0f)
-	.long R(b4, ee, 96, d2), R(1b, 9b, 91, 9e)
-	.long R(80, c0, c5, 4f), R(61, dc, 20, a2)
-	.long R(5a, 77, 4b, 69), R(1c, 12, 1a, 16)
-	.long R(e2, 93, ba, 0a), R(c0, a0, 2a, e5)
-	.long R(3c, 22, e0, 43), R(12, 1b, 17, 1d)
-	.long R(0e, 09, 0d, 0b), R(f2, 8b, c7, ad)
-	.long R(2d, b6, a8, b9), R(14, 1e, a9, c8)
-	.long R(57, f1, 19, 85), R(af, 75, 07, 4c)
-	.long R(ee, 99, dd, bb), R(a3, 7f, 60, fd)
-	.long R(f7, 01, 26, 9f), R(5c, 72, f5, bc)
-	.long R(44, 66, 3b, c5), R(5b, fb, 7e, 34)
-	.long R(8b, 43, 29, 76), R(cb, 23, c6, dc)
-	.long R(b6, ed, fc, 68), R(b8, e4, f1, 63)
-	.long R(d7, 31, dc, ca), R(42, 63, 85, 10)
-	.long R(13, 97, 22, 40), R(84, c6, 11, 20)
-	.long R(85, 4a, 24, 7d), R(d2, bb, 3d, f8)
-	.long R(ae, f9, 32, 11), R(c7, 29, a1, 6d)
-	.long R(1d, 9e, 2f, 4b), R(dc, b2, 30, f3)
-	.long R(0d, 86, 52, ec), R(77, c1, e3, d0)
-	.long R(2b, b3, 16, 6c), R(a9, 70, b9, 99)
-	.long R(11, 94, 48, fa), R(47, e9, 64, 22)
-	.long R(a8, fc, 8c, c4), R(a0, f0, 3f, 1a)
-	.long R(56, 7d, 2c, d8), R(22, 33, 90, ef)
-	.long R(87, 49, 4e, c7), R(d9, 38, d1, c1)
-	.long R(8c, ca, a2, fe), R(98, d4, 0b, 36)
-	.long R(a6, f5, 81, cf), R(a5, 7a, de, 28)
-	.long R(da, b7, 8e, 26), R(3f, ad, bf, a4)
-	.long R(2c, 3a, 9d, e4), R(50, 78, 92, 0d)
-	.long R(6a, 5f, cc, 9b), R(54, 7e, 46, 62)
-	.long R(f6, 8d, 13, c2), R(90, d8, b8, e8)
-	.long R(2e, 39, f7, 5e), R(82, c3, af, f5)
-	.long R(9f, 5d, 80, be), R(69, d0, 93, 7c)
-	.long R(6f, d5, 2d, a9), R(cf, 25, 12, b3)
-	.long R(c8, ac, 99, 3b), R(10, 18, 7d, a7)
-	.long R(e8, 9c, 63, 6e), R(db, 3b, bb, 7b)
-	.long R(cd, 26, 78, 09), R(6e, 59, 18, f4)
-	.long R(ec, 9a, b7, 01), R(83, 4f, 9a, a8)
-	.long R(e6, 95, 6e, 65), R(aa, ff, e6, 7e)
-	.long R(21, bc, cf, 08), R(ef, 15, e8, e6)
-	.long R(ba, e7, 9b, d9), R(4a, 6f, 36, ce)
-	.long R(ea, 9f, 09, d4), R(29, b0, 7c, d6)
-	.long R(31, a4, b2, af), R(2a, 3f, 23, 31)
-	.long R(c6, a5, 94, 30), R(35, a2, 66, c0)
-	.long R(74, 4e, bc, 37), R(fc, 82, ca, a6)
-	.long R(e0, 90, d0, b0), R(33, a7, d8, 15)
-	.long R(f1, 04, 98, 4a), R(41, ec, da, f7)
-	.long R(7f, cd, 50, 0e), R(17, 91, f6, 2f)
-	.long R(76, 4d, d6, 8d), R(43, ef, b0, 4d)
-	.long R(cc, aa, 4d, 54), R(e4, 96, 04, df)
-	.long R(9e, d1, b5, e3), R(4c, 6a, 88, 1b)
-	.long R(c1, 2c, 1f, b8), R(46, 65, 51, 7f)
-	.long R(9d, 5e, ea, 04), R(01, 8c, 35, 5d)
-	.long R(fa, 87, 74, 73), R(fb, 0b, 41, 2e)
-	.long R(b3, 67, 1d, 5a), R(92, db, d2, 52)
-	.long R(e9, 10, 56, 33), R(6d, d6, 47, 13)
-	.long R(9a, d7, 61, 8c), R(37, a1, 0c, 7a)
-	.long R(59, f8, 14, 8e), R(eb, 13, 3c, 89)
-	.long R(ce, a9, 27, ee), R(b7, 61, c9, 35)
-	.long R(e1, 1c, e5, ed), R(7a, 47, b1, 3c)
-	.long R(9c, d2, df, 59), R(55, f2, 73, 3f)
-	.long R(18, 14, ce, 79), R(73, c7, 37, bf)
-	.long R(53, f7, cd, ea), R(5f, fd, aa, 5b)
-	.long R(df, 3d, 6f, 14), R(78, 44, db, 86)
-	.long R(ca, af, f3, 81), R(b9, 68, c4, 3e)
-	.long R(38, 24, 34, 2c), R(c2, a3, 40, 5f)
-	.long R(16, 1d, c3, 72), R(bc, e2, 25, 0c)
-	.long R(28, 3c, 49, 8b), R(ff, 0d, 95, 41)
-	.long R(39, a8, 01, 71), R(08, 0c, b3, de)
-	.long R(d8, b4, e4, 9c), R(64, 56, c1, 90)
-	.long R(7b, cb, 84, 61), R(d5, 32, b6, 70)
-	.long R(48, 6c, 5c, 74), R(d0, b8, 57, 42)
-.globl PPC_AES_4K_DECTAB2
-PPC_AES_4K_DECTAB2:
-/* decryption table, same as crypto_il_tab in crypto/aes-generic.c */
-	.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
-	.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
-	.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
-	.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
-	.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
-	.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
-	.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
-	.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
-	.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
-	.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
-	.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
-	.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
-	.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
-	.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
-	.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
-	.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
-	.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
-	.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
-	.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
-	.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
-	.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
-	.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
-	.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
-	.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
-	.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
-	.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
-	.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
-	.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
-	.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
-	.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
-	.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
-	.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
diff --git a/arch/powerpc/crypto/crc32-vpmsum_core.S b/arch/powerpc/crypto/crc32-vpmsum_core.S
deleted file mode 100644
index c3524eba4d0d555d8f6ffe7e322f4d2a6b4a7a46..0000000000000000000000000000000000000000
--- a/arch/powerpc/crypto/crc32-vpmsum_core.S
+++ /dev/null
@@ -1,751 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Core of the accelerated CRC algorithm.
- * In your file, define the constants and CRC_FUNCTION_NAME
- * Then include this file.
- *
- * Calculate the checksum of data that is 16 byte aligned and a multiple of
- * 16 bytes.
- *
- * The first step is to reduce it to 1024 bits. We do this in 8 parallel
- * chunks in order to mask the latency of the vpmsum instructions. If we
- * have more than 32 kB of data to checksum we repeat this step multiple
- * times, passing in the previous 1024 bits.
- *
- * The next step is to reduce the 1024 bits to 64 bits. This step adds
- * 32 bits of 0s to the end - this matches what a CRC does. We just
- * calculate constants that land the data in this 32 bits.
- *
- * We then use fixed point Barrett reduction to compute a mod n over GF(2)
- * for n = CRC using POWER8 instructions. We use x = 32.
- *
- * http://en.wikipedia.org/wiki/Barrett_reduction
- *
- * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
-*/
-
-#include <asm/ppc_asm.h>
-#include <asm/ppc-opcode.h>
-
-#define MAX_SIZE	32768
-
-	.text
-
-#if defined(__BIG_ENDIAN__) && defined(REFLECT)
-#define BYTESWAP_DATA
-#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT)
-#define BYTESWAP_DATA
-#else
-#undef BYTESWAP_DATA
-#endif
-
-#define off16		r25
-#define off32		r26
-#define off48		r27
-#define off64		r28
-#define off80		r29
-#define off96		r30
-#define off112		r31
-
-#define const1		v24
-#define const2		v25
-
-#define byteswap	v26
-#define	mask_32bit	v27
-#define	mask_64bit	v28
-#define zeroes		v29
-
-#ifdef BYTESWAP_DATA
-#define VPERM(A, B, C, D) vperm	A, B, C, D
-#else
-#define VPERM(A, B, C, D)
-#endif
-
-/* unsigned int CRC_FUNCTION_NAME(unsigned int crc, void *p, unsigned long len) */
-FUNC_START(CRC_FUNCTION_NAME)
-	std	r31,-8(r1)
-	std	r30,-16(r1)
-	std	r29,-24(r1)
-	std	r28,-32(r1)
-	std	r27,-40(r1)
-	std	r26,-48(r1)
-	std	r25,-56(r1)
-
-	li	off16,16
-	li	off32,32
-	li	off48,48
-	li	off64,64
-	li	off80,80
-	li	off96,96
-	li	off112,112
-	li	r0,0
-
-	/* Enough room for saving 10 non volatile VMX registers */
-	subi	r6,r1,56+10*16
-	subi	r7,r1,56+2*16
-
-	stvx	v20,0,r6
-	stvx	v21,off16,r6
-	stvx	v22,off32,r6
-	stvx	v23,off48,r6
-	stvx	v24,off64,r6
-	stvx	v25,off80,r6
-	stvx	v26,off96,r6
-	stvx	v27,off112,r6
-	stvx	v28,0,r7
-	stvx	v29,off16,r7
-
-	mr	r10,r3
-
-	vxor	zeroes,zeroes,zeroes
-	vspltisw v0,-1
-
-	vsldoi	mask_32bit,zeroes,v0,4
-	vsldoi	mask_64bit,zeroes,v0,8
-
-	/* Get the initial value into v8 */
-	vxor	v8,v8,v8
-	MTVRD(v8, R3)
-#ifdef REFLECT
-	vsldoi	v8,zeroes,v8,8	/* shift into bottom 32 bits */
-#else
-	vsldoi	v8,v8,zeroes,4	/* shift into top 32 bits */
-#endif
-
-#ifdef BYTESWAP_DATA
-	addis	r3,r2,.byteswap_constant@toc@ha
-	addi	r3,r3,.byteswap_constant@toc@l
-
-	lvx	byteswap,0,r3
-	addi	r3,r3,16
-#endif
-
-	cmpdi	r5,256
-	blt	.Lshort
-
-	rldicr	r6,r5,0,56
-
-	/* Checksum in blocks of MAX_SIZE */
-1:	lis	r7,MAX_SIZE@h
-	ori	r7,r7,MAX_SIZE@l
-	mr	r9,r7
-	cmpd	r6,r7
-	bgt	2f
-	mr	r7,r6
-2:	subf	r6,r7,r6
-
-	/* our main loop does 128 bytes at a time */
-	srdi	r7,r7,7
-
-	/*
-	 * Work out the offset into the constants table to start at. Each
-	 * constant is 16 bytes, and it is used against 128 bytes of input
-	 * data - 128 / 16 = 8
-	 */
-	sldi	r8,r7,4
-	srdi	r9,r9,3
-	subf	r8,r8,r9
-
-	/* We reduce our final 128 bytes in a separate step */
-	addi	r7,r7,-1
-	mtctr	r7
-
-	addis	r3,r2,.constants@toc@ha
-	addi	r3,r3,.constants@toc@l
-
-	/* Find the start of our constants */
-	add	r3,r3,r8
-
-	/* zero v0-v7 which will contain our checksums */
-	vxor	v0,v0,v0
-	vxor	v1,v1,v1
-	vxor	v2,v2,v2
-	vxor	v3,v3,v3
-	vxor	v4,v4,v4
-	vxor	v5,v5,v5
-	vxor	v6,v6,v6
-	vxor	v7,v7,v7
-
-	lvx	const1,0,r3
-
-	/*
-	 * If we are looping back to consume more data we use the values
-	 * already in v16-v23.
-	 */
-	cmpdi	r0,1
-	beq	2f
-
-	/* First warm up pass */
-	lvx	v16,0,r4
-	lvx	v17,off16,r4
-	VPERM(v16,v16,v16,byteswap)
-	VPERM(v17,v17,v17,byteswap)
-	lvx	v18,off32,r4
-	lvx	v19,off48,r4
-	VPERM(v18,v18,v18,byteswap)
-	VPERM(v19,v19,v19,byteswap)
-	lvx	v20,off64,r4
-	lvx	v21,off80,r4
-	VPERM(v20,v20,v20,byteswap)
-	VPERM(v21,v21,v21,byteswap)
-	lvx	v22,off96,r4
-	lvx	v23,off112,r4
-	VPERM(v22,v22,v22,byteswap)
-	VPERM(v23,v23,v23,byteswap)
-	addi	r4,r4,8*16
-
-	/* xor in initial value */
-	vxor	v16,v16,v8
-
-2:	bdz	.Lfirst_warm_up_done
-
-	addi	r3,r3,16
-	lvx	const2,0,r3
-
-	/* Second warm up pass */
-	VPMSUMD(v8,v16,const1)
-	lvx	v16,0,r4
-	VPERM(v16,v16,v16,byteswap)
-	ori	r2,r2,0
-
-	VPMSUMD(v9,v17,const1)
-	lvx	v17,off16,r4
-	VPERM(v17,v17,v17,byteswap)
-	ori	r2,r2,0
-
-	VPMSUMD(v10,v18,const1)
-	lvx	v18,off32,r4
-	VPERM(v18,v18,v18,byteswap)
-	ori	r2,r2,0
-
-	VPMSUMD(v11,v19,const1)
-	lvx	v19,off48,r4
-	VPERM(v19,v19,v19,byteswap)
-	ori	r2,r2,0
-
-	VPMSUMD(v12,v20,const1)
-	lvx	v20,off64,r4
-	VPERM(v20,v20,v20,byteswap)
-	ori	r2,r2,0
-
-	VPMSUMD(v13,v21,const1)
-	lvx	v21,off80,r4
-	VPERM(v21,v21,v21,byteswap)
-	ori	r2,r2,0
-
-	VPMSUMD(v14,v22,const1)
-	lvx	v22,off96,r4
-	VPERM(v22,v22,v22,byteswap)
-	ori	r2,r2,0
-
-	VPMSUMD(v15,v23,const1)
-	lvx	v23,off112,r4
-	VPERM(v23,v23,v23,byteswap)
-
-	addi	r4,r4,8*16
-
-	bdz	.Lfirst_cool_down
-
-	/*
-	 * main loop. We modulo schedule it such that it takes three iterations
-	 * to complete - first iteration load, second iteration vpmsum, third
-	 * iteration xor.
-	 */
-	.balign	16
-4:	lvx	const1,0,r3
-	addi	r3,r3,16
-	ori	r2,r2,0
-
-	vxor	v0,v0,v8
-	VPMSUMD(v8,v16,const2)
-	lvx	v16,0,r4
-	VPERM(v16,v16,v16,byteswap)
-	ori	r2,r2,0
-
-	vxor	v1,v1,v9
-	VPMSUMD(v9,v17,const2)
-	lvx	v17,off16,r4
-	VPERM(v17,v17,v17,byteswap)
-	ori	r2,r2,0
-
-	vxor	v2,v2,v10
-	VPMSUMD(v10,v18,const2)
-	lvx	v18,off32,r4
-	VPERM(v18,v18,v18,byteswap)
-	ori	r2,r2,0
-
-	vxor	v3,v3,v11
-	VPMSUMD(v11,v19,const2)
-	lvx	v19,off48,r4
-	VPERM(v19,v19,v19,byteswap)
-	lvx	const2,0,r3
-	ori	r2,r2,0
-
-	vxor	v4,v4,v12
-	VPMSUMD(v12,v20,const1)
-	lvx	v20,off64,r4
-	VPERM(v20,v20,v20,byteswap)
-	ori	r2,r2,0
-
-	vxor	v5,v5,v13
-	VPMSUMD(v13,v21,const1)
-	lvx	v21,off80,r4
-	VPERM(v21,v21,v21,byteswap)
-	ori	r2,r2,0
-
-	vxor	v6,v6,v14
-	VPMSUMD(v14,v22,const1)
-	lvx	v22,off96,r4
-	VPERM(v22,v22,v22,byteswap)
-	ori	r2,r2,0
-
-	vxor	v7,v7,v15
-	VPMSUMD(v15,v23,const1)
-	lvx	v23,off112,r4
-	VPERM(v23,v23,v23,byteswap)
-
-	addi	r4,r4,8*16
-
-	bdnz	4b
-
-.Lfirst_cool_down:
-	/* First cool down pass */
-	lvx	const1,0,r3
-	addi	r3,r3,16
-
-	vxor	v0,v0,v8
-	VPMSUMD(v8,v16,const1)
-	ori	r2,r2,0
-
-	vxor	v1,v1,v9
-	VPMSUMD(v9,v17,const1)
-	ori	r2,r2,0
-
-	vxor	v2,v2,v10
-	VPMSUMD(v10,v18,const1)
-	ori	r2,r2,0
-
-	vxor	v3,v3,v11
-	VPMSUMD(v11,v19,const1)
-	ori	r2,r2,0
-
-	vxor	v4,v4,v12
-	VPMSUMD(v12,v20,const1)
-	ori	r2,r2,0
-
-	vxor	v5,v5,v13
-	VPMSUMD(v13,v21,const1)
-	ori	r2,r2,0
-
-	vxor	v6,v6,v14
-	VPMSUMD(v14,v22,const1)
-	ori	r2,r2,0
-
-	vxor	v7,v7,v15
-	VPMSUMD(v15,v23,const1)
-	ori	r2,r2,0
-
-.Lsecond_cool_down:
-	/* Second cool down pass */
-	vxor	v0,v0,v8
-	vxor	v1,v1,v9
-	vxor	v2,v2,v10
-	vxor	v3,v3,v11
-	vxor	v4,v4,v12
-	vxor	v5,v5,v13
-	vxor	v6,v6,v14
-	vxor	v7,v7,v15
-
-#ifdef REFLECT
-	/*
-	 * vpmsumd produces a 96 bit result in the least significant bits
-	 * of the register. Since we are bit reflected we have to shift it
-	 * left 32 bits so it occupies the least significant bits in the
-	 * bit reflected domain.
-	 */
-	vsldoi	v0,v0,zeroes,4
-	vsldoi	v1,v1,zeroes,4
-	vsldoi	v2,v2,zeroes,4
-	vsldoi	v3,v3,zeroes,4
-	vsldoi	v4,v4,zeroes,4
-	vsldoi	v5,v5,zeroes,4
-	vsldoi	v6,v6,zeroes,4
-	vsldoi	v7,v7,zeroes,4
-#endif
-
-	/* xor with last 1024 bits */
-	lvx	v8,0,r4
-	lvx	v9,off16,r4
-	VPERM(v8,v8,v8,byteswap)
-	VPERM(v9,v9,v9,byteswap)
-	lvx	v10,off32,r4
-	lvx	v11,off48,r4
-	VPERM(v10,v10,v10,byteswap)
-	VPERM(v11,v11,v11,byteswap)
-	lvx	v12,off64,r4
-	lvx	v13,off80,r4
-	VPERM(v12,v12,v12,byteswap)
-	VPERM(v13,v13,v13,byteswap)
-	lvx	v14,off96,r4
-	lvx	v15,off112,r4
-	VPERM(v14,v14,v14,byteswap)
-	VPERM(v15,v15,v15,byteswap)
-
-	addi	r4,r4,8*16
-
-	vxor	v16,v0,v8
-	vxor	v17,v1,v9
-	vxor	v18,v2,v10
-	vxor	v19,v3,v11
-	vxor	v20,v4,v12
-	vxor	v21,v5,v13
-	vxor	v22,v6,v14
-	vxor	v23,v7,v15
-
-	li	r0,1
-	cmpdi	r6,0
-	addi	r6,r6,128
-	bne	1b
-
-	/* Work out how many bytes we have left */
-	andi.	r5,r5,127
-
-	/* Calculate where in the constant table we need to start */
-	subfic	r6,r5,128
-	add	r3,r3,r6
-
-	/* How many 16 byte chunks are in the tail */
-	srdi	r7,r5,4
-	mtctr	r7
-
-	/*
-	 * Reduce the previously calculated 1024 bits to 64 bits, shifting
-	 * 32 bits to include the trailing 32 bits of zeros
-	 */
-	lvx	v0,0,r3
-	lvx	v1,off16,r3
-	lvx	v2,off32,r3
-	lvx	v3,off48,r3
-	lvx	v4,off64,r3
-	lvx	v5,off80,r3
-	lvx	v6,off96,r3
-	lvx	v7,off112,r3
-	addi	r3,r3,8*16
-
-	VPMSUMW(v0,v16,v0)
-	VPMSUMW(v1,v17,v1)
-	VPMSUMW(v2,v18,v2)
-	VPMSUMW(v3,v19,v3)
-	VPMSUMW(v4,v20,v4)
-	VPMSUMW(v5,v21,v5)
-	VPMSUMW(v6,v22,v6)
-	VPMSUMW(v7,v23,v7)
-
-	/* Now reduce the tail (0 - 112 bytes) */
-	cmpdi	r7,0
-	beq	1f
-
-	lvx	v16,0,r4
-	lvx	v17,0,r3
-	VPERM(v16,v16,v16,byteswap)
-	VPMSUMW(v16,v16,v17)
-	vxor	v0,v0,v16
-	bdz	1f
-
-	lvx	v16,off16,r4
-	lvx	v17,off16,r3
-	VPERM(v16,v16,v16,byteswap)
-	VPMSUMW(v16,v16,v17)
-	vxor	v0,v0,v16
-	bdz	1f
-
-	lvx	v16,off32,r4
-	lvx	v17,off32,r3
-	VPERM(v16,v16,v16,byteswap)
-	VPMSUMW(v16,v16,v17)
-	vxor	v0,v0,v16
-	bdz	1f
-
-	lvx	v16,off48,r4
-	lvx	v17,off48,r3
-	VPERM(v16,v16,v16,byteswap)
-	VPMSUMW(v16,v16,v17)
-	vxor	v0,v0,v16
-	bdz	1f
-
-	lvx	v16,off64,r4
-	lvx	v17,off64,r3
-	VPERM(v16,v16,v16,byteswap)
-	VPMSUMW(v16,v16,v17)
-	vxor	v0,v0,v16
-	bdz	1f
-
-	lvx	v16,off80,r4
-	lvx	v17,off80,r3
-	VPERM(v16,v16,v16,byteswap)
-	VPMSUMW(v16,v16,v17)
-	vxor	v0,v0,v16
-	bdz	1f
-
-	lvx	v16,off96,r4
-	lvx	v17,off96,r3
-	VPERM(v16,v16,v16,byteswap)
-	VPMSUMW(v16,v16,v17)
-	vxor	v0,v0,v16
-
-	/* Now xor all the parallel chunks together */
-1:	vxor	v0,v0,v1
-	vxor	v2,v2,v3
-	vxor	v4,v4,v5
-	vxor	v6,v6,v7
-
-	vxor	v0,v0,v2
-	vxor	v4,v4,v6
-
-	vxor	v0,v0,v4
-
-.Lbarrett_reduction:
-	/* Barrett constants */
-	addis	r3,r2,.barrett_constants@toc@ha
-	addi	r3,r3,.barrett_constants@toc@l
-
-	lvx	const1,0,r3
-	lvx	const2,off16,r3
-
-	vsldoi	v1,v0,v0,8
-	vxor	v0,v0,v1		/* xor two 64 bit results together */
-
-#ifdef REFLECT
-	/* shift left one bit */
-	vspltisb v1,1
-	vsl	v0,v0,v1
-#endif
-
-	vand	v0,v0,mask_64bit
-#ifndef REFLECT
-	/*
-	 * Now for the Barrett reduction algorithm. The idea is to calculate q,
-	 * the multiple of our polynomial that we need to subtract. By
-	 * doing the computation 2x bits higher (ie 64 bits) and shifting the
-	 * result back down 2x bits, we round down to the nearest multiple.
-	 */
-	VPMSUMD(v1,v0,const1)	/* ma */
-	vsldoi	v1,zeroes,v1,8	/* q = floor(ma/(2^64)) */
-	VPMSUMD(v1,v1,const2)	/* qn */
-	vxor	v0,v0,v1	/* a - qn, subtraction is xor in GF(2) */
-
-	/*
-	 * Get the result into r3. We need to shift it left 8 bytes:
-	 * V0 [ 0 1 2 X ]
-	 * V0 [ 0 X 2 3 ]
-	 */
-	vsldoi	v0,v0,zeroes,8	/* shift result into top 64 bits */
-#else
-	/*
-	 * The reflected version of Barrett reduction. Instead of bit
-	 * reflecting our data (which is expensive to do), we bit reflect our
-	 * constants and our algorithm, which means the intermediate data in
-	 * our vector registers goes from 0-63 instead of 63-0. We can reflect
-	 * the algorithm because we don't carry in mod 2 arithmetic.
-	 */
-	vand	v1,v0,mask_32bit	/* bottom 32 bits of a */
-	VPMSUMD(v1,v1,const1)		/* ma */
-	vand	v1,v1,mask_32bit	/* bottom 32bits of ma */
-	VPMSUMD(v1,v1,const2)		/* qn */
-	vxor	v0,v0,v1		/* a - qn, subtraction is xor in GF(2) */
-
-	/*
-	 * Since we are bit reflected, the result (ie the low 32 bits) is in
-	 * the high 32 bits. We just need to shift it left 4 bytes
-	 * V0 [ 0 1 X 3 ]
-	 * V0 [ 0 X 2 3 ]
-	 */
-	vsldoi	v0,v0,zeroes,4		/* shift result into top 64 bits of */
-#endif
-
-	/* Get it into r3 */
-	MFVRD(R3, v0)
-
-.Lout:
-	subi	r6,r1,56+10*16
-	subi	r7,r1,56+2*16
-
-	lvx	v20,0,r6
-	lvx	v21,off16,r6
-	lvx	v22,off32,r6
-	lvx	v23,off48,r6
-	lvx	v24,off64,r6
-	lvx	v25,off80,r6
-	lvx	v26,off96,r6
-	lvx	v27,off112,r6
-	lvx	v28,0,r7
-	lvx	v29,off16,r7
-
-	ld	r31,-8(r1)
-	ld	r30,-16(r1)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
-	ld	r27,-40(r1)
-	ld	r26,-48(r1)
-	ld	r25,-56(r1)
-
-	blr
-
-.Lfirst_warm_up_done:
-	lvx	const1,0,r3
-	addi	r3,r3,16
-
-	VPMSUMD(v8,v16,const1)
-	VPMSUMD(v9,v17,const1)
-	VPMSUMD(v10,v18,const1)
-	VPMSUMD(v11,v19,const1)
-	VPMSUMD(v12,v20,const1)
-	VPMSUMD(v13,v21,const1)
-	VPMSUMD(v14,v22,const1)
-	VPMSUMD(v15,v23,const1)
-
-	b	.Lsecond_cool_down
-
-.Lshort:
-	cmpdi	r5,0
-	beq	.Lzero
-
-	addis	r3,r2,.short_constants@toc@ha
-	addi	r3,r3,.short_constants@toc@l
-
-	/* Calculate where in the constant table we need to start */
-	subfic	r6,r5,256
-	add	r3,r3,r6
-
-	/* How many 16 byte chunks? */
-	srdi	r7,r5,4
-	mtctr	r7
-
-	vxor	v19,v19,v19
-	vxor	v20,v20,v20
-
-	lvx	v0,0,r4
-	lvx	v16,0,r3
-	VPERM(v0,v0,v16,byteswap)
-	vxor	v0,v0,v8	/* xor in initial value */
-	VPMSUMW(v0,v0,v16)
-	bdz	.Lv0
-
-	lvx	v1,off16,r4
-	lvx	v17,off16,r3
-	VPERM(v1,v1,v17,byteswap)
-	VPMSUMW(v1,v1,v17)
-	bdz	.Lv1
-
-	lvx	v2,off32,r4
-	lvx	v16,off32,r3
-	VPERM(v2,v2,v16,byteswap)
-	VPMSUMW(v2,v2,v16)
-	bdz	.Lv2
-
-	lvx	v3,off48,r4
-	lvx	v17,off48,r3
-	VPERM(v3,v3,v17,byteswap)
-	VPMSUMW(v3,v3,v17)
-	bdz	.Lv3
-
-	lvx	v4,off64,r4
-	lvx	v16,off64,r3
-	VPERM(v4,v4,v16,byteswap)
-	VPMSUMW(v4,v4,v16)
-	bdz	.Lv4
-
-	lvx	v5,off80,r4
-	lvx	v17,off80,r3
-	VPERM(v5,v5,v17,byteswap)
-	VPMSUMW(v5,v5,v17)
-	bdz	.Lv5
-
-	lvx	v6,off96,r4
-	lvx	v16,off96,r3
-	VPERM(v6,v6,v16,byteswap)
-	VPMSUMW(v6,v6,v16)
-	bdz	.Lv6
-
-	lvx	v7,off112,r4
-	lvx	v17,off112,r3
-	VPERM(v7,v7,v17,byteswap)
-	VPMSUMW(v7,v7,v17)
-	bdz	.Lv7
-
-	addi	r3,r3,128
-	addi	r4,r4,128
-
-	lvx	v8,0,r4
-	lvx	v16,0,r3
-	VPERM(v8,v8,v16,byteswap)
-	VPMSUMW(v8,v8,v16)
-	bdz	.Lv8
-
-	lvx	v9,off16,r4
-	lvx	v17,off16,r3
-	VPERM(v9,v9,v17,byteswap)
-	VPMSUMW(v9,v9,v17)
-	bdz	.Lv9
-
-	lvx	v10,off32,r4
-	lvx	v16,off32,r3
-	VPERM(v10,v10,v16,byteswap)
-	VPMSUMW(v10,v10,v16)
-	bdz	.Lv10
-
-	lvx	v11,off48,r4
-	lvx	v17,off48,r3
-	VPERM(v11,v11,v17,byteswap)
-	VPMSUMW(v11,v11,v17)
-	bdz	.Lv11
-
-	lvx	v12,off64,r4
-	lvx	v16,off64,r3
-	VPERM(v12,v12,v16,byteswap)
-	VPMSUMW(v12,v12,v16)
-	bdz	.Lv12
-
-	lvx	v13,off80,r4
-	lvx	v17,off80,r3
-	VPERM(v13,v13,v17,byteswap)
-	VPMSUMW(v13,v13,v17)
-	bdz	.Lv13
-
-	lvx	v14,off96,r4
-	lvx	v16,off96,r3
-	VPERM(v14,v14,v16,byteswap)
-	VPMSUMW(v14,v14,v16)
-	bdz	.Lv14
-
-	lvx	v15,off112,r4
-	lvx	v17,off112,r3
-	VPERM(v15,v15,v17,byteswap)
-	VPMSUMW(v15,v15,v17)
-
-.Lv15:	vxor	v19,v19,v15
-.Lv14:	vxor	v20,v20,v14
-.Lv13:	vxor	v19,v19,v13
-.Lv12:	vxor	v20,v20,v12
-.Lv11:	vxor	v19,v19,v11
-.Lv10:	vxor	v20,v20,v10
-.Lv9:	vxor	v19,v19,v9
-.Lv8:	vxor	v20,v20,v8
-.Lv7:	vxor	v19,v19,v7
-.Lv6:	vxor	v20,v20,v6
-.Lv5:	vxor	v19,v19,v5
-.Lv4:	vxor	v20,v20,v4
-.Lv3:	vxor	v19,v19,v3
-.Lv2:	vxor	v20,v20,v2
-.Lv1:	vxor	v19,v19,v1
-.Lv0:	vxor	v20,v20,v0
-
-	vxor	v0,v19,v20
-
-	b	.Lbarrett_reduction
-
-.Lzero:
-	mr	r3,r10
-	b	.Lout
-
-FUNC_END(CRC_FUNCTION_NAME)
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_asm.S b/arch/powerpc/crypto/crc32c-vpmsum_asm.S
deleted file mode 100644
index bf442004ea1f22c625f050d9fe1844fca4f279d0..0000000000000000000000000000000000000000
--- a/arch/powerpc/crypto/crc32c-vpmsum_asm.S
+++ /dev/null
@@ -1,842 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Calculate a crc32c with vpmsum acceleration
- *
- * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
- */
-	.section	.rodata
-.balign 16
-
-.byteswap_constant:
-	/* byte reverse permute constant */
-	.octa 0x0F0E0D0C0B0A09080706050403020100
-
-.constants:
-
-	/* Reduce 262144 kbits to 1024 bits */
-	/* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
-	.octa 0x00000000b6ca9e20000000009c37c408
-
-	/* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
-	.octa 0x00000000350249a800000001b51df26c
-
-	/* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
-	.octa 0x00000001862dac54000000000724b9d0
-
-	/* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
-	.octa 0x00000001d87fb48c00000001c00532fe
-
-	/* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
-	.octa 0x00000001f39b699e00000000f05a9362
-
-	/* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
-	.octa 0x0000000101da11b400000001e1007970
-
-	/* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
-	.octa 0x00000001cab571e000000000a57366ee
-
-	/* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
-	.octa 0x00000000c7020cfe0000000192011284
-
-	/* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
-	.octa 0x00000000cdaed1ae0000000162716d9a
-
-	/* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
-	.octa 0x00000001e804effc00000000cd97ecde
-
-	/* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
-	.octa 0x0000000077c3ea3a0000000058812bc0
-
-	/* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
-	.octa 0x0000000068df31b40000000088b8c12e
-
-	/* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
-	.octa 0x00000000b059b6c200000001230b234c
-
-	/* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
-	.octa 0x0000000145fb8ed800000001120b416e
-
-	/* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
-	.octa 0x00000000cbc0916800000001974aecb0
-
-	/* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
-	.octa 0x000000005ceeedc2000000008ee3f226
-
-	/* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
-	.octa 0x0000000047d74e8600000001089aba9a
-
-	/* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
-	.octa 0x00000001407e9e220000000065113872
-
-	/* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
-	.octa 0x00000001da967bda000000005c07ec10
-
-	/* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
-	.octa 0x000000006c8983680000000187590924
-
-	/* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
-	.octa 0x00000000f2d14c9800000000e35da7c6
-
-	/* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
-	.octa 0x00000001993c6ad4000000000415855a
-
-	/* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
-	.octa 0x000000014683d1ac0000000073617758
-
-	/* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
-	.octa 0x00000001a7c93e6c0000000176021d28
-
-	/* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
-	.octa 0x000000010211e90a00000001c358fd0a
-
-	/* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
-	.octa 0x000000001119403e00000001ff7a2c18
-
-	/* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
-	.octa 0x000000001c3261aa00000000f2d9f7e4
-
-	/* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
-	.octa 0x000000014e37a634000000016cf1f9c8
-
-	/* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
-	.octa 0x0000000073786c0c000000010af9279a
-
-	/* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
-	.octa 0x000000011dc037f80000000004f101e8
-
-	/* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
-	.octa 0x0000000031433dfc0000000070bcf184
-
-	/* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
-	.octa 0x000000009cde8348000000000a8de642
-
-	/* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
-	.octa 0x0000000038d3c2a60000000062ea130c
-
-	/* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
-	.octa 0x000000011b25f26000000001eb31cbb2
-
-	/* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
-	.octa 0x000000001629e6f00000000170783448
-
-	/* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
-	.octa 0x0000000160838b4c00000001a684b4c6
-
-	/* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
-	.octa 0x000000007a44011c00000000253ca5b4
-
-	/* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
-	.octa 0x00000000226f417a0000000057b4b1e2
-
-	/* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
-	.octa 0x0000000045eb2eb400000000b6bd084c
-
-	/* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
-	.octa 0x000000014459d70c0000000123c2d592
-
-	/* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
-	.octa 0x00000001d406ed8200000000159dafce
-
-	/* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
-	.octa 0x0000000160c8e1a80000000127e1a64e
-
-	/* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
-	.octa 0x0000000027ba80980000000056860754
-
-	/* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
-	.octa 0x000000006d92d01800000001e661aae8
-
-	/* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
-	.octa 0x000000012ed7e3f200000000f82c6166
-
-	/* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
-	.octa 0x000000002dc8778800000000c4f9c7ae
-
-	/* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
-	.octa 0x0000000018240bb80000000074203d20
-
-	/* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
-	.octa 0x000000001ad381580000000198173052
-
-	/* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
-	.octa 0x00000001396b78f200000001ce8aba54
-
-	/* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
-	.octa 0x000000011a68133400000001850d5d94
-
-	/* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
-	.octa 0x000000012104732e00000001d609239c
-
-	/* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
-	.octa 0x00000000a140d90c000000001595f048
-
-	/* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
-	.octa 0x00000001b7215eda0000000042ccee08
-
-	/* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
-	.octa 0x00000001aaf1df3c000000010a389d74
-
-	/* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
-	.octa 0x0000000029d15b8a000000012a840da6
-
-	/* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
-	.octa 0x00000000f1a96922000000001d181c0c
-
-	/* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
-	.octa 0x00000001ac80d03c0000000068b7d1f6
-
-	/* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
-	.octa 0x000000000f11d56a000000005b0f14fc
-
-	/* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
-	.octa 0x00000001f1c022a20000000179e9e730
-
-	/* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
-	.octa 0x0000000173d00ae200000001ce1368d6
-
-	/* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
-	.octa 0x00000001d4ffe4ac0000000112c3a84c
-
-	/* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
-	.octa 0x000000016edc5ae400000000de940fee
-
-	/* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
-	.octa 0x00000001f1a0214000000000fe896b7e
-
-	/* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
-	.octa 0x00000000ca0b28a000000001f797431c
-
-	/* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
-	.octa 0x00000001928e30a20000000053e989ba
-
-	/* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
-	.octa 0x0000000097b1b002000000003920cd16
-
-	/* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
-	.octa 0x00000000b15bf90600000001e6f579b8
-
-	/* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
-	.octa 0x00000000411c5d52000000007493cb0a
-
-	/* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
-	.octa 0x00000001c36f330000000001bdd376d8
-
-	/* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
-	.octa 0x00000001119227e0000000016badfee6
-
-	/* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
-	.octa 0x00000000114d47020000000071de5c58
-
-	/* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
-	.octa 0x00000000458b5b9800000000453f317c
-
-	/* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
-	.octa 0x000000012e31fb8e0000000121675cce
-
-	/* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
-	.octa 0x000000005cf619d800000001f409ee92
-
-	/* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
-	.octa 0x0000000063f4d8b200000000f36b9c88
-
-	/* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
-	.octa 0x000000004138dc8a0000000036b398f4
-
-	/* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
-	.octa 0x00000001d29ee8e000000001748f9adc
-
-	/* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
-	.octa 0x000000006a08ace800000001be94ec00
-
-	/* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
-	.octa 0x0000000127d4201000000000b74370d6
-
-	/* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
-	.octa 0x0000000019d76b6200000001174d0b98
-
-	/* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
-	.octa 0x00000001b1471f6e00000000befc06a4
-
-	/* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
-	.octa 0x00000001f64c19cc00000001ae125288
-
-	/* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
-	.octa 0x00000000003c0ea00000000095c19b34
-
-	/* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
-	.octa 0x000000014d73abf600000001a78496f2
-
-	/* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
-	.octa 0x00000001620eb84400000001ac5390a0
-
-	/* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
-	.octa 0x0000000147655048000000002a80ed6e
-
-	/* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
-	.octa 0x0000000067b5077e00000001fa9b0128
-
-	/* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
-	.octa 0x0000000010ffe20600000001ea94929e
-
-	/* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
-	.octa 0x000000000fee8f1e0000000125f4305c
-
-	/* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
-	.octa 0x00000001da26fbae00000001471e2002
-
-	/* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
-	.octa 0x00000001b3a8bd880000000132d2253a
-
-	/* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
-	.octa 0x00000000e8f3898e00000000f26b3592
-
-	/* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
-	.octa 0x00000000b0d0d28c00000000bc8b67b0
-
-	/* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
-	.octa 0x0000000030f2a798000000013a826ef2
-
-	/* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
-	.octa 0x000000000fba10020000000081482c84
-
-	/* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
-	.octa 0x00000000bdb9bd7200000000e77307c2
-
-	/* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
-	.octa 0x0000000075d3bf5a00000000d4a07ec8
-
-	/* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
-	.octa 0x00000000ef1f98a00000000017102100
-
-	/* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
-	.octa 0x00000000689c760200000000db406486
-
-	/* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
-	.octa 0x000000016d5fa5fe0000000192db7f88
-
-	/* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
-	.octa 0x00000001d0d2b9ca000000018bf67b1e
-
-	/* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
-	.octa 0x0000000041e7b470000000007c09163e
-
-	/* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
-	.octa 0x00000001cbb6495e000000000adac060
-
-	/* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
-	.octa 0x000000010052a0b000000000bd8316ae
-
-	/* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
-	.octa 0x00000001d8effb5c000000019f09ab54
-
-	/* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
-	.octa 0x00000001d969853c0000000125155542
-
-	/* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
-	.octa 0x00000000523ccce2000000018fdb5882
-
-	/* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
-	.octa 0x000000001e2436bc00000000e794b3f4
-
-	/* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
-	.octa 0x00000000ddd1c3a2000000016f9bb022
-
-	/* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
-	.octa 0x0000000019fcfe3800000000290c9978
-
-	/* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
-	.octa 0x00000001ce95db640000000083c0f350
-
-	/* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
-	.octa 0x00000000af5828060000000173ea6628
-
-	/* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
-	.octa 0x00000001006388f600000001c8b4e00a
-
-	/* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
-	.octa 0x0000000179eca00a00000000de95d6aa
-
-	/* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
-	.octa 0x0000000122410a6a000000010b7f7248
-
-	/* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
-	.octa 0x000000004288e87c00000001326e3a06
-
-	/* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
-	.octa 0x000000016c5490da00000000bb62c2e6
-
-	/* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
-	.octa 0x00000000d1c71f6e0000000156a4b2c2
-
-	/* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
-	.octa 0x00000001b4ce08a6000000011dfe763a
-
-	/* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
-	.octa 0x00000001466ba60c000000007bcca8e2
-
-	/* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
-	.octa 0x00000001f6c488a40000000186118faa
-
-	/* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
-	.octa 0x000000013bfb06820000000111a65a88
-
-	/* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
-	.octa 0x00000000690e9e54000000003565e1c4
-
-	/* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
-	.octa 0x00000000281346b6000000012ed02a82
-
-	/* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
-	.octa 0x000000015646402400000000c486ecfc
-
-	/* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
-	.octa 0x000000016063a8dc0000000001b951b2
-
-	/* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
-	.octa 0x0000000116a663620000000048143916
-
-	/* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
-	.octa 0x000000017e8aa4d200000001dc2ae124
-
-	/* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
-	.octa 0x00000001728eb10c00000001416c58d6
-
-	/* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
-	.octa 0x00000001b08fd7fa00000000a479744a
-
-	/* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
-	.octa 0x00000001092a16e80000000096ca3a26
-
-	/* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
-	.octa 0x00000000a505637c00000000ff223d4e
-
-	/* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
-	.octa 0x00000000d94869b2000000010e84da42
-
-	/* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
-	.octa 0x00000001c8b203ae00000001b61ba3d0
-
-	/* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
-	.octa 0x000000005704aea000000000680f2de8
-
-	/* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
-	.octa 0x000000012e295fa2000000008772a9a8
-
-	/* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
-	.octa 0x000000011d0908bc0000000155f295bc
-
-	/* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
-	.octa 0x0000000193ed97ea00000000595f9282
-
-	/* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
-	.octa 0x000000013a0f1c520000000164b1c25a
-
-	/* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
-	.octa 0x000000010c2c40c000000000fbd67c50
-
-	/* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
-	.octa 0x00000000ff6fac3e0000000096076268
-
-	/* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
-	.octa 0x000000017b3609c000000001d288e4cc
-
-	/* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
-	.octa 0x0000000088c8c92200000001eaac1bdc
-
-	/* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
-	.octa 0x00000001751baae600000001f1ea39e2
-
-	/* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
-	.octa 0x000000010795297200000001eb6506fc
-
-	/* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
-	.octa 0x0000000162b00abe000000010f806ffe
-
-	/* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
-	.octa 0x000000000d7b404c000000010408481e
-
-	/* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
-	.octa 0x00000000763b13d40000000188260534
-
-	/* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
-	.octa 0x00000000f6dc22d80000000058fc73e0
-
-	/* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
-	.octa 0x000000007daae06000000000391c59b8
-
-	/* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
-	.octa 0x000000013359ab7c000000018b638400
-
-	/* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
-	.octa 0x000000008add438a000000011738f5c4
-
-	/* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
-	.octa 0x00000001edbefdea000000008cf7c6da
-
-	/* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
-	.octa 0x000000004104e0f800000001ef97fb16
-
-	/* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
-	.octa 0x00000000b48a82220000000102130e20
-
-	/* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
-	.octa 0x00000001bcb4684400000000db968898
-
-	/* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
-	.octa 0x000000013293ce0a00000000b5047b5e
-
-	/* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
-	.octa 0x00000001710d0844000000010b90fdb2
-
-	/* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
-	.octa 0x0000000117907f6e000000004834a32e
-
-	/* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
-	.octa 0x0000000087ddf93e0000000059c8f2b0
-
-	/* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
-	.octa 0x000000005970e9b00000000122cec508
-
-	/* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
-	.octa 0x0000000185b2b7d0000000000a330cda
-
-	/* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
-	.octa 0x00000001dcee0efc000000014a47148c
-
-	/* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
-	.octa 0x0000000030da27220000000042c61cb8
-
-	/* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
-	.octa 0x000000012f925a180000000012fe6960
-
-	/* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
-	.octa 0x00000000dd2e357c00000000dbda2c20
-
-	/* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
-	.octa 0x00000000071c80de000000011122410c
-
-	/* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
-	.octa 0x000000011513140a00000000977b2070
-
-	/* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
-	.octa 0x00000001df876e8e000000014050438e
-
-	/* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
-	.octa 0x000000015f81d6ce0000000147c840e8
-
-	/* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
-	.octa 0x000000019dd94dbe00000001cc7c88ce
-
-	/* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
-	.octa 0x00000001373d206e00000001476b35a4
-
-	/* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
-	.octa 0x00000000668ccade000000013d52d508
-
-	/* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
-	.octa 0x00000001b192d268000000008e4be32e
-
-	/* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
-	.octa 0x00000000e30f3a7800000000024120fe
-
-	/* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
-	.octa 0x000000010ef1f7bc00000000ddecddb4
-
-	/* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
-	.octa 0x00000001f5ac738000000000d4d403bc
-
-	/* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
-	.octa 0x000000011822ea7000000001734b89aa
-
-	/* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
-	.octa 0x00000000c3a33848000000010e7a58d6
-
-	/* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
-	.octa 0x00000001bd151c2400000001f9f04e9c
-
-	/* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
-	.octa 0x0000000056002d7600000000b692225e
-
-	/* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
-	.octa 0x000000014657c4f4000000019b8d3f3e
-
-	/* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
-	.octa 0x0000000113742d7c00000001a874f11e
-
-	/* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
-	.octa 0x000000019c5920ba000000010d5a4254
-
-	/* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
-	.octa 0x000000005216d2d600000000bbb2f5d6
-
-	/* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
-	.octa 0x0000000136f5ad8a0000000179cc0e36
-
-	/* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
-	.octa 0x000000018b07beb600000001dca1da4a
-
-	/* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
-	.octa 0x00000000db1e93b000000000feb1a192
-
-	/* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
-	.octa 0x000000000b96fa3a00000000d1eeedd6
-
-	/* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
-	.octa 0x00000001d9968af0000000008fad9bb4
-
-	/* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
-	.octa 0x000000000e4a77a200000001884938e4
-
-	/* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
-	.octa 0x00000000508c2ac800000001bc2e9bc0
-
-	/* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
-	.octa 0x0000000021572a8000000001f9658a68
-
-	/* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
-	.octa 0x00000001b859daf2000000001b9224fc
-
-	/* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
-	.octa 0x000000016f7884740000000055b2fb84
-
-	/* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
-	.octa 0x00000001b438810e000000018b090348
-
-	/* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
-	.octa 0x0000000095ddc6f2000000011ccbd5ea
-
-	/* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
-	.octa 0x00000001d977c20c0000000007ae47f8
-
-	/* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
-	.octa 0x00000000ebedb99a0000000172acbec0
-
-	/* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
-	.octa 0x00000001df9e9e9200000001c6e3ff20
-
-	/* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
-	.octa 0x00000001a4a3f95200000000e1b38744
-
-	/* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
-	.octa 0x00000000e2f5122000000000791585b2
-
-	/* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
-	.octa 0x000000004aa01f3e00000000ac53b894
-
-	/* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
-	.octa 0x00000000b3e90a5800000001ed5f2cf4
-
-	/* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
-	.octa 0x000000000c9ca2aa00000001df48b2e0
-
-	/* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
-	.octa 0x000000015168231600000000049c1c62
-
-	/* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
-	.octa 0x0000000036fce78c000000017c460c12
-
-	/* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
-	.octa 0x000000009037dc10000000015be4da7e
-
-	/* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
-	.octa 0x00000000d3298582000000010f38f668
-
-	/* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
-	.octa 0x00000001b42e8ad60000000039f40a00
-
-	/* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
-	.octa 0x00000000142a983800000000bd4c10c4
-
-	/* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
-	.octa 0x0000000109c7f1900000000042db1d98
-
-	/* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
-	.octa 0x0000000056ff931000000001c905bae6
-
-	/* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
-	.octa 0x00000001594513aa00000000069d40ea
-
-	/* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
-	.octa 0x00000001e3b5b1e8000000008e4fbad0
-
-	/* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
-	.octa 0x000000011dd5fc080000000047bedd46
-
-	/* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
-	.octa 0x00000001675f0cc20000000026396bf8
-
-	/* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
-	.octa 0x00000000d1c8dd4400000000379beb92
-
-	/* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
-	.octa 0x0000000115ebd3d8000000000abae54a
-
-	/* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
-	.octa 0x00000001ecbd0dac0000000007e6a128
-
-	/* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
-	.octa 0x00000000cdf67af2000000000ade29d2
-
-	/* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
-	.octa 0x000000004c01ff4c00000000f974c45c
-
-	/* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
-	.octa 0x00000000f2d8657e00000000e77ac60a
-
-	/* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
-	.octa 0x000000006bae74c40000000145895816
-
-	/* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
-	.octa 0x0000000152af8aa00000000038e362be
-
-	/* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
-	.octa 0x0000000004663802000000007f991a64
-
-	/* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
-	.octa 0x00000001ab2f5afc00000000fa366d3a
-
-	/* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
-	.octa 0x0000000074a4ebd400000001a2bb34f0
-
-	/* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
-	.octa 0x00000001d7ab3a4c0000000028a9981e
-
-	/* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
-	.octa 0x00000001a8da60c600000001dbc672be
-
-	/* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
-	.octa 0x000000013cf6382000000000b04d77f6
-
-	/* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
-	.octa 0x00000000bec12e1e0000000124400d96
-
-	/* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
-	.octa 0x00000001c6368010000000014ca4b414
-
-	/* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
-	.octa 0x00000001e6e78758000000012fe2c938
-
-	/* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
-	.octa 0x000000008d7f2b3c00000001faed01e6
-
-	/* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
-	.octa 0x000000016b4a156e000000007e80ecfe
-
-	/* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
-	.octa 0x00000001c63cfeb60000000098daee94
-
-	/* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
-	.octa 0x000000015f902670000000010a04edea
-
-	/* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
-	.octa 0x00000001cd5de11e00000001c00b4524
-
-	/* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
-	.octa 0x000000001acaec540000000170296550
-
-	/* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
-	.octa 0x000000002bd0ca780000000181afaa48
-
-	/* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
-	.octa 0x0000000032d63d5c0000000185a31ffa
-
-	/* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
-	.octa 0x000000001c6d4e4c000000002469f608
-
-	/* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
-	.octa 0x0000000106a60b92000000006980102a
-
-	/* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
-	.octa 0x00000000d3855e120000000111ea9ca8
-
-	/* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
-	.octa 0x00000000e312563600000001bd1d29ce
-
-	/* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
-	.octa 0x000000009e8f7ea400000001b34b9580
-
-	/* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
-	.octa 0x00000001c82e562c000000003076054e
-
-	/* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
-	.octa 0x00000000ca9f09ce000000012a608ea4
-
-	/* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
-	.octa 0x00000000c63764e600000000784d05fe
-
-	/* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
-	.octa 0x0000000168d2e49e000000016ef0d82a
-
-	/* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
-	.octa 0x00000000e986c1480000000075bda454
-
-	/* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
-	.octa 0x00000000cfb65894000000003dc0a1c4
-
-	/* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
-	.octa 0x0000000111cadee400000000e9a5d8be
-
-	/* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
-	.octa 0x0000000171fb63ce00000001609bc4b4
-
-.short_constants:
-
-	/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
-	/* x^1952 mod p(x)`, x^1984 mod p(x)`, x^2016 mod p(x)`, x^2048 mod p(x)` */
-	.octa 0x7fec2963e5bf80485cf015c388e56f72
-
-	/* x^1824 mod p(x)`, x^1856 mod p(x)`, x^1888 mod p(x)`, x^1920 mod p(x)` */
-	.octa 0x38e888d4844752a9963a18920246e2e6
-
-	/* x^1696 mod p(x)`, x^1728 mod p(x)`, x^1760 mod p(x)`, x^1792 mod p(x)` */
-	.octa 0x42316c00730206ad419a441956993a31
-
-	/* x^1568 mod p(x)`, x^1600 mod p(x)`, x^1632 mod p(x)`, x^1664 mod p(x)` */
-	.octa 0x543d5c543e65ddf9924752ba2b830011
-
-	/* x^1440 mod p(x)`, x^1472 mod p(x)`, x^1504 mod p(x)`, x^1536 mod p(x)` */
-	.octa 0x78e87aaf56767c9255bd7f9518e4a304
-
-	/* x^1312 mod p(x)`, x^1344 mod p(x)`, x^1376 mod p(x)`, x^1408 mod p(x)` */
-	.octa 0x8f68fcec1903da7f6d76739fe0553f1e
-
-	/* x^1184 mod p(x)`, x^1216 mod p(x)`, x^1248 mod p(x)`, x^1280 mod p(x)` */
-	.octa 0x3f4840246791d588c133722b1fe0b5c3
-
-	/* x^1056 mod p(x)`, x^1088 mod p(x)`, x^1120 mod p(x)`, x^1152 mod p(x)` */
-	.octa 0x34c96751b04de25a64b67ee0e55ef1f3
-
-	/* x^928 mod p(x)`, x^960 mod p(x)`, x^992 mod p(x)`, x^1024 mod p(x)` */
-	.octa 0x156c8e180b4a395b069db049b8fdb1e7
-
-	/* x^800 mod p(x)`, x^832 mod p(x)`, x^864 mod p(x)`, x^896 mod p(x)` */
-	.octa 0xe0b99ccbe661f7bea11bfaf3c9e90b9e
-
-	/* x^672 mod p(x)`, x^704 mod p(x)`, x^736 mod p(x)`, x^768 mod p(x)` */
-	.octa 0x041d37768cd75659817cdc5119b29a35
-
-	/* x^544 mod p(x)`, x^576 mod p(x)`, x^608 mod p(x)`, x^640 mod p(x)` */
-	.octa 0x3a0777818cfaa9651ce9d94b36c41f1c
-
-	/* x^416 mod p(x)`, x^448 mod p(x)`, x^480 mod p(x)`, x^512 mod p(x)` */
-	.octa 0x0e148e8252377a554f256efcb82be955
-
-	/* x^288 mod p(x)`, x^320 mod p(x)`, x^352 mod p(x)`, x^384 mod p(x)` */
-	.octa 0x9c25531d19e65ddeec1631edb2dea967
-
-	/* x^160 mod p(x)`, x^192 mod p(x)`, x^224 mod p(x)`, x^256 mod p(x)` */
-	.octa 0x790606ff9957c0a65d27e147510ac59a
-
-	/* x^32 mod p(x)`, x^64 mod p(x)`, x^96 mod p(x)`, x^128 mod p(x)` */
-	.octa 0x82f63b786ea2d55ca66805eb18b8ea18
-
-
-.barrett_constants:
-	/* 33 bit reflected Barrett constant m - (4^32)/n */
-	.octa 0x000000000000000000000000dea713f1	/* x^64 div p(x)` */
-	/* 33 bit reflected Barrett constant n */
-	.octa 0x00000000000000000000000105ec76f1
-
-#define CRC_FUNCTION_NAME __crc32c_vpmsum
-#define REFLECT
-#include "crc32-vpmsum_core.S"
diff --git a/arch/powerpc/crypto/crct10dif-vpmsum_asm.S b/arch/powerpc/crypto/crct10dif-vpmsum_asm.S
deleted file mode 100644
index f0b93a0fe168aa65ab459b01ded6e3e5a600cd1e..0000000000000000000000000000000000000000
--- a/arch/powerpc/crypto/crct10dif-vpmsum_asm.S
+++ /dev/null
@@ -1,845 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Calculate a CRC T10DIF  with vpmsum acceleration
- *
- * Constants generated by crc32-vpmsum, available at
- * https://github.com/antonblanchard/crc32-vpmsum
- *
- * crc32-vpmsum is
- * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
- */
-	.section	.rodata
-.balign 16
-
-.byteswap_constant:
-	/* byte reverse permute constant */
-	.octa 0x0F0E0D0C0B0A09080706050403020100
-
-.constants:
-
-	/* Reduce 262144 kbits to 1024 bits */
-	/* x^261184 mod p(x), x^261120 mod p(x) */
-	.octa 0x0000000056d300000000000052550000
-
-	/* x^260160 mod p(x), x^260096 mod p(x) */
-	.octa 0x00000000ee67000000000000a1e40000
-
-	/* x^259136 mod p(x), x^259072 mod p(x) */
-	.octa 0x0000000060830000000000004ad10000
-
-	/* x^258112 mod p(x), x^258048 mod p(x) */
-	.octa 0x000000008cfe0000000000009ab40000
-
-	/* x^257088 mod p(x), x^257024 mod p(x) */
-	.octa 0x000000003e93000000000000fdb50000
-
-	/* x^256064 mod p(x), x^256000 mod p(x) */
-	.octa 0x000000003c2000000000000045480000
-
-	/* x^255040 mod p(x), x^254976 mod p(x) */
-	.octa 0x00000000b1fc0000000000008d690000
-
-	/* x^254016 mod p(x), x^253952 mod p(x) */
-	.octa 0x00000000f82b00000000000024ad0000
-
-	/* x^252992 mod p(x), x^252928 mod p(x) */
-	.octa 0x0000000044420000000000009f1a0000
-
-	/* x^251968 mod p(x), x^251904 mod p(x) */
-	.octa 0x00000000e88c00000000000066ec0000
-
-	/* x^250944 mod p(x), x^250880 mod p(x) */
-	.octa 0x00000000385c000000000000c87d0000
-
-	/* x^249920 mod p(x), x^249856 mod p(x) */
-	.octa 0x000000003227000000000000c8ff0000
-
-	/* x^248896 mod p(x), x^248832 mod p(x) */
-	.octa 0x00000000a9a900000000000033440000
-
-	/* x^247872 mod p(x), x^247808 mod p(x) */
-	.octa 0x00000000abaa00000000000066eb0000
-
-	/* x^246848 mod p(x), x^246784 mod p(x) */
-	.octa 0x000000001ac3000000000000c4ef0000
-
-	/* x^245824 mod p(x), x^245760 mod p(x) */
-	.octa 0x0000000063f000000000000056f30000
-
-	/* x^244800 mod p(x), x^244736 mod p(x) */
-	.octa 0x0000000032cc00000000000002050000
-
-	/* x^243776 mod p(x), x^243712 mod p(x) */
-	.octa 0x00000000f8b5000000000000568e0000
-
-	/* x^242752 mod p(x), x^242688 mod p(x) */
-	.octa 0x000000008db100000000000064290000
-
-	/* x^241728 mod p(x), x^241664 mod p(x) */
-	.octa 0x0000000059ca0000000000006b660000
-
-	/* x^240704 mod p(x), x^240640 mod p(x) */
-	.octa 0x000000005f5c00000000000018f80000
-
-	/* x^239680 mod p(x), x^239616 mod p(x) */
-	.octa 0x0000000061af000000000000b6090000
-
-	/* x^238656 mod p(x), x^238592 mod p(x) */
-	.octa 0x00000000e29e000000000000099a0000
-
-	/* x^237632 mod p(x), x^237568 mod p(x) */
-	.octa 0x000000000975000000000000a8360000
-
-	/* x^236608 mod p(x), x^236544 mod p(x) */
-	.octa 0x0000000043900000000000004f570000
-
-	/* x^235584 mod p(x), x^235520 mod p(x) */
-	.octa 0x00000000f9cd000000000000134c0000
-
-	/* x^234560 mod p(x), x^234496 mod p(x) */
-	.octa 0x000000007c29000000000000ec380000
-
-	/* x^233536 mod p(x), x^233472 mod p(x) */
-	.octa 0x000000004c6a000000000000b0d10000
-
-	/* x^232512 mod p(x), x^232448 mod p(x) */
-	.octa 0x00000000e7290000000000007d3e0000
-
-	/* x^231488 mod p(x), x^231424 mod p(x) */
-	.octa 0x00000000f1ab000000000000f0b20000
-
-	/* x^230464 mod p(x), x^230400 mod p(x) */
-	.octa 0x0000000039db0000000000009c270000
-
-	/* x^229440 mod p(x), x^229376 mod p(x) */
-	.octa 0x000000005e2800000000000092890000
-
-	/* x^228416 mod p(x), x^228352 mod p(x) */
-	.octa 0x00000000d44e000000000000d5ee0000
-
-	/* x^227392 mod p(x), x^227328 mod p(x) */
-	.octa 0x00000000cd0a00000000000041f50000
-
-	/* x^226368 mod p(x), x^226304 mod p(x) */
-	.octa 0x00000000c5b400000000000010520000
-
-	/* x^225344 mod p(x), x^225280 mod p(x) */
-	.octa 0x00000000fd2100000000000042170000
-
-	/* x^224320 mod p(x), x^224256 mod p(x) */
-	.octa 0x000000002f2500000000000095c20000
-
-	/* x^223296 mod p(x), x^223232 mod p(x) */
-	.octa 0x000000001b0100000000000001ce0000
-
-	/* x^222272 mod p(x), x^222208 mod p(x) */
-	.octa 0x000000000d430000000000002aca0000
-
-	/* x^221248 mod p(x), x^221184 mod p(x) */
-	.octa 0x0000000030a6000000000000385e0000
-
-	/* x^220224 mod p(x), x^220160 mod p(x) */
-	.octa 0x00000000e37b0000000000006f7a0000
-
-	/* x^219200 mod p(x), x^219136 mod p(x) */
-	.octa 0x00000000873600000000000024320000
-
-	/* x^218176 mod p(x), x^218112 mod p(x) */
-	.octa 0x00000000e9fb000000000000bd9c0000
-
-	/* x^217152 mod p(x), x^217088 mod p(x) */
-	.octa 0x000000003b9500000000000054bc0000
-
-	/* x^216128 mod p(x), x^216064 mod p(x) */
-	.octa 0x00000000133e000000000000a4660000
-
-	/* x^215104 mod p(x), x^215040 mod p(x) */
-	.octa 0x00000000784500000000000079930000
-
-	/* x^214080 mod p(x), x^214016 mod p(x) */
-	.octa 0x00000000b9800000000000001bb80000
-
-	/* x^213056 mod p(x), x^212992 mod p(x) */
-	.octa 0x00000000687600000000000024400000
-
-	/* x^212032 mod p(x), x^211968 mod p(x) */
-	.octa 0x00000000aff300000000000029e10000
-
-	/* x^211008 mod p(x), x^210944 mod p(x) */
-	.octa 0x0000000024b50000000000005ded0000
-
-	/* x^209984 mod p(x), x^209920 mod p(x) */
-	.octa 0x0000000017e8000000000000b12e0000
-
-	/* x^208960 mod p(x), x^208896 mod p(x) */
-	.octa 0x00000000128400000000000026d20000
-
-	/* x^207936 mod p(x), x^207872 mod p(x) */
-	.octa 0x000000002115000000000000a32a0000
-
-	/* x^206912 mod p(x), x^206848 mod p(x) */
-	.octa 0x000000009595000000000000a1210000
-
-	/* x^205888 mod p(x), x^205824 mod p(x) */
-	.octa 0x00000000281e000000000000ee8b0000
-
-	/* x^204864 mod p(x), x^204800 mod p(x) */
-	.octa 0x0000000006010000000000003d0d0000
-
-	/* x^203840 mod p(x), x^203776 mod p(x) */
-	.octa 0x00000000e2b600000000000034e90000
-
-	/* x^202816 mod p(x), x^202752 mod p(x) */
-	.octa 0x000000001bd40000000000004cdb0000
-
-	/* x^201792 mod p(x), x^201728 mod p(x) */
-	.octa 0x00000000df2800000000000030e90000
-
-	/* x^200768 mod p(x), x^200704 mod p(x) */
-	.octa 0x0000000049c200000000000042590000
-
-	/* x^199744 mod p(x), x^199680 mod p(x) */
-	.octa 0x000000009b97000000000000df950000
-
-	/* x^198720 mod p(x), x^198656 mod p(x) */
-	.octa 0x000000006184000000000000da7b0000
-
-	/* x^197696 mod p(x), x^197632 mod p(x) */
-	.octa 0x00000000461700000000000012510000
-
-	/* x^196672 mod p(x), x^196608 mod p(x) */
-	.octa 0x000000009b40000000000000f37e0000
-
-	/* x^195648 mod p(x), x^195584 mod p(x) */
-	.octa 0x00000000eeb2000000000000ecf10000
-
-	/* x^194624 mod p(x), x^194560 mod p(x) */
-	.octa 0x00000000b2e800000000000050f20000
-
-	/* x^193600 mod p(x), x^193536 mod p(x) */
-	.octa 0x00000000f59a000000000000e0b30000
-
-	/* x^192576 mod p(x), x^192512 mod p(x) */
-	.octa 0x00000000467f0000000000004d5a0000
-
-	/* x^191552 mod p(x), x^191488 mod p(x) */
-	.octa 0x00000000da92000000000000bb010000
-
-	/* x^190528 mod p(x), x^190464 mod p(x) */
-	.octa 0x000000001e1000000000000022a40000
-
-	/* x^189504 mod p(x), x^189440 mod p(x) */
-	.octa 0x0000000058fe000000000000836f0000
-
-	/* x^188480 mod p(x), x^188416 mod p(x) */
-	.octa 0x00000000b9ce000000000000d78d0000
-
-	/* x^187456 mod p(x), x^187392 mod p(x) */
-	.octa 0x0000000022210000000000004f8d0000
-
-	/* x^186432 mod p(x), x^186368 mod p(x) */
-	.octa 0x00000000744600000000000033760000
-
-	/* x^185408 mod p(x), x^185344 mod p(x) */
-	.octa 0x000000001c2e000000000000a1e50000
-
-	/* x^184384 mod p(x), x^184320 mod p(x) */
-	.octa 0x00000000dcc8000000000000a1a40000
-
-	/* x^183360 mod p(x), x^183296 mod p(x) */
-	.octa 0x00000000910f00000000000019a20000
-
-	/* x^182336 mod p(x), x^182272 mod p(x) */
-	.octa 0x0000000055d5000000000000f6ae0000
-
-	/* x^181312 mod p(x), x^181248 mod p(x) */
-	.octa 0x00000000c8ba000000000000a7ac0000
-
-	/* x^180288 mod p(x), x^180224 mod p(x) */
-	.octa 0x0000000031f8000000000000eea20000
-
-	/* x^179264 mod p(x), x^179200 mod p(x) */
-	.octa 0x000000001966000000000000c4d90000
-
-	/* x^178240 mod p(x), x^178176 mod p(x) */
-	.octa 0x00000000b9810000000000002b470000
-
-	/* x^177216 mod p(x), x^177152 mod p(x) */
-	.octa 0x000000008303000000000000f7cf0000
-
-	/* x^176192 mod p(x), x^176128 mod p(x) */
-	.octa 0x000000002ce500000000000035b30000
-
-	/* x^175168 mod p(x), x^175104 mod p(x) */
-	.octa 0x000000002fae0000000000000c7c0000
-
-	/* x^174144 mod p(x), x^174080 mod p(x) */
-	.octa 0x00000000f50c0000000000009edf0000
-
-	/* x^173120 mod p(x), x^173056 mod p(x) */
-	.octa 0x00000000714f00000000000004cd0000
-
-	/* x^172096 mod p(x), x^172032 mod p(x) */
-	.octa 0x00000000c161000000000000541b0000
-
-	/* x^171072 mod p(x), x^171008 mod p(x) */
-	.octa 0x0000000021c8000000000000e2700000
-
-	/* x^170048 mod p(x), x^169984 mod p(x) */
-	.octa 0x00000000b93d00000000000009a60000
-
-	/* x^169024 mod p(x), x^168960 mod p(x) */
-	.octa 0x00000000fbcf000000000000761c0000
-
-	/* x^168000 mod p(x), x^167936 mod p(x) */
-	.octa 0x0000000026350000000000009db30000
-
-	/* x^166976 mod p(x), x^166912 mod p(x) */
-	.octa 0x00000000b64f0000000000003e9f0000
-
-	/* x^165952 mod p(x), x^165888 mod p(x) */
-	.octa 0x00000000bd0e00000000000078590000
-
-	/* x^164928 mod p(x), x^164864 mod p(x) */
-	.octa 0x00000000d9360000000000008bc80000
-
-	/* x^163904 mod p(x), x^163840 mod p(x) */
-	.octa 0x000000002f140000000000008c9f0000
-
-	/* x^162880 mod p(x), x^162816 mod p(x) */
-	.octa 0x000000006a270000000000006af70000
-
-	/* x^161856 mod p(x), x^161792 mod p(x) */
-	.octa 0x000000006685000000000000e5210000
-
-	/* x^160832 mod p(x), x^160768 mod p(x) */
-	.octa 0x0000000062da00000000000008290000
-
-	/* x^159808 mod p(x), x^159744 mod p(x) */
-	.octa 0x00000000bb4b000000000000e4d00000
-
-	/* x^158784 mod p(x), x^158720 mod p(x) */
-	.octa 0x00000000d2490000000000004ae10000
-
-	/* x^157760 mod p(x), x^157696 mod p(x) */
-	.octa 0x00000000c85b00000000000000e70000
-
-	/* x^156736 mod p(x), x^156672 mod p(x) */
-	.octa 0x00000000c37a00000000000015650000
-
-	/* x^155712 mod p(x), x^155648 mod p(x) */
-	.octa 0x0000000018530000000000001c2f0000
-
-	/* x^154688 mod p(x), x^154624 mod p(x) */
-	.octa 0x00000000b46600000000000037bd0000
-
-	/* x^153664 mod p(x), x^153600 mod p(x) */
-	.octa 0x00000000439b00000000000012190000
-
-	/* x^152640 mod p(x), x^152576 mod p(x) */
-	.octa 0x00000000b1260000000000005ece0000
-
-	/* x^151616 mod p(x), x^151552 mod p(x) */
-	.octa 0x00000000d8110000000000002a5e0000
-
-	/* x^150592 mod p(x), x^150528 mod p(x) */
-	.octa 0x00000000099f00000000000052330000
-
-	/* x^149568 mod p(x), x^149504 mod p(x) */
-	.octa 0x00000000f9f9000000000000f9120000
-
-	/* x^148544 mod p(x), x^148480 mod p(x) */
-	.octa 0x000000005cc00000000000000ddc0000
-
-	/* x^147520 mod p(x), x^147456 mod p(x) */
-	.octa 0x00000000343b00000000000012200000
-
-	/* x^146496 mod p(x), x^146432 mod p(x) */
-	.octa 0x000000009222000000000000d12b0000
-
-	/* x^145472 mod p(x), x^145408 mod p(x) */
-	.octa 0x00000000d781000000000000eb2d0000
-
-	/* x^144448 mod p(x), x^144384 mod p(x) */
-	.octa 0x000000000bf400000000000058970000
-
-	/* x^143424 mod p(x), x^143360 mod p(x) */
-	.octa 0x00000000094200000000000013690000
-
-	/* x^142400 mod p(x), x^142336 mod p(x) */
-	.octa 0x00000000d55100000000000051950000
-
-	/* x^141376 mod p(x), x^141312 mod p(x) */
-	.octa 0x000000008f11000000000000954b0000
-
-	/* x^140352 mod p(x), x^140288 mod p(x) */
-	.octa 0x00000000140f000000000000b29e0000
-
-	/* x^139328 mod p(x), x^139264 mod p(x) */
-	.octa 0x00000000c6db000000000000db5d0000
-
-	/* x^138304 mod p(x), x^138240 mod p(x) */
-	.octa 0x00000000715b000000000000dfaf0000
-
-	/* x^137280 mod p(x), x^137216 mod p(x) */
-	.octa 0x000000000dea000000000000e3b60000
-
-	/* x^136256 mod p(x), x^136192 mod p(x) */
-	.octa 0x000000006f94000000000000ddaf0000
-
-	/* x^135232 mod p(x), x^135168 mod p(x) */
-	.octa 0x0000000024e1000000000000e4f70000
-
-	/* x^134208 mod p(x), x^134144 mod p(x) */
-	.octa 0x000000008810000000000000aa110000
-
-	/* x^133184 mod p(x), x^133120 mod p(x) */
-	.octa 0x0000000030c2000000000000a8e60000
-
-	/* x^132160 mod p(x), x^132096 mod p(x) */
-	.octa 0x00000000e6d0000000000000ccf30000
-
-	/* x^131136 mod p(x), x^131072 mod p(x) */
-	.octa 0x000000004da000000000000079bf0000
-
-	/* x^130112 mod p(x), x^130048 mod p(x) */
-	.octa 0x000000007759000000000000b3a30000
-
-	/* x^129088 mod p(x), x^129024 mod p(x) */
-	.octa 0x00000000597400000000000028790000
-
-	/* x^128064 mod p(x), x^128000 mod p(x) */
-	.octa 0x000000007acd000000000000b5820000
-
-	/* x^127040 mod p(x), x^126976 mod p(x) */
-	.octa 0x00000000e6e400000000000026ad0000
-
-	/* x^126016 mod p(x), x^125952 mod p(x) */
-	.octa 0x000000006d49000000000000985b0000
-
-	/* x^124992 mod p(x), x^124928 mod p(x) */
-	.octa 0x000000000f0800000000000011520000
-
-	/* x^123968 mod p(x), x^123904 mod p(x) */
-	.octa 0x000000002c7f000000000000846c0000
-
-	/* x^122944 mod p(x), x^122880 mod p(x) */
-	.octa 0x000000005ce7000000000000ae1d0000
-
-	/* x^121920 mod p(x), x^121856 mod p(x) */
-	.octa 0x00000000d4cb000000000000e21d0000
-
-	/* x^120896 mod p(x), x^120832 mod p(x) */
-	.octa 0x000000003a2300000000000019bb0000
-
-	/* x^119872 mod p(x), x^119808 mod p(x) */
-	.octa 0x000000000e1700000000000095290000
-
-	/* x^118848 mod p(x), x^118784 mod p(x) */
-	.octa 0x000000006e6400000000000050d20000
-
-	/* x^117824 mod p(x), x^117760 mod p(x) */
-	.octa 0x000000008d5c0000000000000cd10000
-
-	/* x^116800 mod p(x), x^116736 mod p(x) */
-	.octa 0x00000000ef310000000000007b570000
-
-	/* x^115776 mod p(x), x^115712 mod p(x) */
-	.octa 0x00000000645d00000000000053d60000
-
-	/* x^114752 mod p(x), x^114688 mod p(x) */
-	.octa 0x0000000018fc00000000000077510000
-
-	/* x^113728 mod p(x), x^113664 mod p(x) */
-	.octa 0x000000000cb3000000000000a7b70000
-
-	/* x^112704 mod p(x), x^112640 mod p(x) */
-	.octa 0x00000000991b000000000000d0780000
-
-	/* x^111680 mod p(x), x^111616 mod p(x) */
-	.octa 0x00000000845a000000000000be3c0000
-
-	/* x^110656 mod p(x), x^110592 mod p(x) */
-	.octa 0x00000000d3a9000000000000df020000
-
-	/* x^109632 mod p(x), x^109568 mod p(x) */
-	.octa 0x0000000017d7000000000000063e0000
-
-	/* x^108608 mod p(x), x^108544 mod p(x) */
-	.octa 0x000000007a860000000000008ab40000
-
-	/* x^107584 mod p(x), x^107520 mod p(x) */
-	.octa 0x00000000fd7c000000000000c7bd0000
-
-	/* x^106560 mod p(x), x^106496 mod p(x) */
-	.octa 0x00000000a56b000000000000efd60000
-
-	/* x^105536 mod p(x), x^105472 mod p(x) */
-	.octa 0x0000000010e400000000000071380000
-
-	/* x^104512 mod p(x), x^104448 mod p(x) */
-	.octa 0x00000000994500000000000004d30000
-
-	/* x^103488 mod p(x), x^103424 mod p(x) */
-	.octa 0x00000000b83c0000000000003b0e0000
-
-	/* x^102464 mod p(x), x^102400 mod p(x) */
-	.octa 0x00000000d6c10000000000008b020000
-
-	/* x^101440 mod p(x), x^101376 mod p(x) */
-	.octa 0x000000009efc000000000000da940000
-
-	/* x^100416 mod p(x), x^100352 mod p(x) */
-	.octa 0x000000005e87000000000000f9f70000
-
-	/* x^99392 mod p(x), x^99328 mod p(x) */
-	.octa 0x000000006c9b00000000000045e40000
-
-	/* x^98368 mod p(x), x^98304 mod p(x) */
-	.octa 0x00000000178a00000000000083940000
-
-	/* x^97344 mod p(x), x^97280 mod p(x) */
-	.octa 0x00000000f0c8000000000000f0a00000
-
-	/* x^96320 mod p(x), x^96256 mod p(x) */
-	.octa 0x00000000f699000000000000b74b0000
-
-	/* x^95296 mod p(x), x^95232 mod p(x) */
-	.octa 0x00000000316d000000000000c1cf0000
-
-	/* x^94272 mod p(x), x^94208 mod p(x) */
-	.octa 0x00000000987e00000000000072680000
-
-	/* x^93248 mod p(x), x^93184 mod p(x) */
-	.octa 0x00000000acff000000000000e0ab0000
-
-	/* x^92224 mod p(x), x^92160 mod p(x) */
-	.octa 0x00000000a1f6000000000000c5a80000
-
-	/* x^91200 mod p(x), x^91136 mod p(x) */
-	.octa 0x0000000061bd000000000000cf690000
-
-	/* x^90176 mod p(x), x^90112 mod p(x) */
-	.octa 0x00000000c9f2000000000000cbcc0000
-
-	/* x^89152 mod p(x), x^89088 mod p(x) */
-	.octa 0x000000005a33000000000000de050000
-
-	/* x^88128 mod p(x), x^88064 mod p(x) */
-	.octa 0x00000000e416000000000000ccd70000
-
-	/* x^87104 mod p(x), x^87040 mod p(x) */
-	.octa 0x0000000058930000000000002f670000
-
-	/* x^86080 mod p(x), x^86016 mod p(x) */
-	.octa 0x00000000a9d3000000000000152f0000
-
-	/* x^85056 mod p(x), x^84992 mod p(x) */
-	.octa 0x00000000c114000000000000ecc20000
-
-	/* x^84032 mod p(x), x^83968 mod p(x) */
-	.octa 0x00000000b9270000000000007c890000
-
-	/* x^83008 mod p(x), x^82944 mod p(x) */
-	.octa 0x000000002e6000000000000006ee0000
-
-	/* x^81984 mod p(x), x^81920 mod p(x) */
-	.octa 0x00000000dfc600000000000009100000
-
-	/* x^80960 mod p(x), x^80896 mod p(x) */
-	.octa 0x000000004911000000000000ad4e0000
-
-	/* x^79936 mod p(x), x^79872 mod p(x) */
-	.octa 0x00000000ae1b000000000000b04d0000
-
-	/* x^78912 mod p(x), x^78848 mod p(x) */
-	.octa 0x0000000005fa000000000000e9900000
-
-	/* x^77888 mod p(x), x^77824 mod p(x) */
-	.octa 0x0000000004a1000000000000cc6f0000
-
-	/* x^76864 mod p(x), x^76800 mod p(x) */
-	.octa 0x00000000af73000000000000ed110000
-
-	/* x^75840 mod p(x), x^75776 mod p(x) */
-	.octa 0x0000000082530000000000008f7e0000
-
-	/* x^74816 mod p(x), x^74752 mod p(x) */
-	.octa 0x00000000cfdc000000000000594f0000
-
-	/* x^73792 mod p(x), x^73728 mod p(x) */
-	.octa 0x00000000a6b6000000000000a8750000
-
-	/* x^72768 mod p(x), x^72704 mod p(x) */
-	.octa 0x00000000fd76000000000000aa0c0000
-
-	/* x^71744 mod p(x), x^71680 mod p(x) */
-	.octa 0x0000000006f500000000000071db0000
-
-	/* x^70720 mod p(x), x^70656 mod p(x) */
-	.octa 0x0000000037ca000000000000ab0c0000
-
-	/* x^69696 mod p(x), x^69632 mod p(x) */
-	.octa 0x00000000d7ab000000000000b7a00000
-
-	/* x^68672 mod p(x), x^68608 mod p(x) */
-	.octa 0x00000000440800000000000090d30000
-
-	/* x^67648 mod p(x), x^67584 mod p(x) */
-	.octa 0x00000000186100000000000054730000
-
-	/* x^66624 mod p(x), x^66560 mod p(x) */
-	.octa 0x000000007368000000000000a3a20000
-
-	/* x^65600 mod p(x), x^65536 mod p(x) */
-	.octa 0x0000000026d0000000000000f9040000
-
-	/* x^64576 mod p(x), x^64512 mod p(x) */
-	.octa 0x00000000fe770000000000009c0a0000
-
-	/* x^63552 mod p(x), x^63488 mod p(x) */
-	.octa 0x000000002cba000000000000d1e70000
-
-	/* x^62528 mod p(x), x^62464 mod p(x) */
-	.octa 0x00000000f8bd0000000000005ac10000
-
-	/* x^61504 mod p(x), x^61440 mod p(x) */
-	.octa 0x000000007372000000000000d68d0000
-
-	/* x^60480 mod p(x), x^60416 mod p(x) */
-	.octa 0x00000000f37f00000000000089f60000
-
-	/* x^59456 mod p(x), x^59392 mod p(x) */
-	.octa 0x00000000078400000000000008a90000
-
-	/* x^58432 mod p(x), x^58368 mod p(x) */
-	.octa 0x00000000d3e400000000000042360000
-
-	/* x^57408 mod p(x), x^57344 mod p(x) */
-	.octa 0x00000000eba800000000000092d50000
-
-	/* x^56384 mod p(x), x^56320 mod p(x) */
-	.octa 0x00000000afbe000000000000b4d50000
-
-	/* x^55360 mod p(x), x^55296 mod p(x) */
-	.octa 0x00000000d8ca000000000000c9060000
-
-	/* x^54336 mod p(x), x^54272 mod p(x) */
-	.octa 0x00000000c2d00000000000008f4f0000
-
-	/* x^53312 mod p(x), x^53248 mod p(x) */
-	.octa 0x00000000373200000000000028690000
-
-	/* x^52288 mod p(x), x^52224 mod p(x) */
-	.octa 0x0000000046ae000000000000c3b30000
-
-	/* x^51264 mod p(x), x^51200 mod p(x) */
-	.octa 0x00000000b243000000000000f8700000
-
-	/* x^50240 mod p(x), x^50176 mod p(x) */
-	.octa 0x00000000f7f500000000000029eb0000
-
-	/* x^49216 mod p(x), x^49152 mod p(x) */
-	.octa 0x000000000c7e000000000000fe730000
-
-	/* x^48192 mod p(x), x^48128 mod p(x) */
-	.octa 0x00000000c38200000000000096000000
-
-	/* x^47168 mod p(x), x^47104 mod p(x) */
-	.octa 0x000000008956000000000000683c0000
-
-	/* x^46144 mod p(x), x^46080 mod p(x) */
-	.octa 0x00000000422d0000000000005f1e0000
-
-	/* x^45120 mod p(x), x^45056 mod p(x) */
-	.octa 0x00000000ac0f0000000000006f810000
-
-	/* x^44096 mod p(x), x^44032 mod p(x) */
-	.octa 0x00000000ce30000000000000031f0000
-
-	/* x^43072 mod p(x), x^43008 mod p(x) */
-	.octa 0x000000003d43000000000000455a0000
-
-	/* x^42048 mod p(x), x^41984 mod p(x) */
-	.octa 0x000000007ebe000000000000a6050000
-
-	/* x^41024 mod p(x), x^40960 mod p(x) */
-	.octa 0x00000000976e00000000000077eb0000
-
-	/* x^40000 mod p(x), x^39936 mod p(x) */
-	.octa 0x000000000872000000000000389c0000
-
-	/* x^38976 mod p(x), x^38912 mod p(x) */
-	.octa 0x000000008979000000000000c7b20000
-
-	/* x^37952 mod p(x), x^37888 mod p(x) */
-	.octa 0x000000005c1e0000000000001d870000
-
-	/* x^36928 mod p(x), x^36864 mod p(x) */
-	.octa 0x00000000aebb00000000000045810000
-
-	/* x^35904 mod p(x), x^35840 mod p(x) */
-	.octa 0x000000004f7e0000000000006d4a0000
-
-	/* x^34880 mod p(x), x^34816 mod p(x) */
-	.octa 0x00000000ea98000000000000b9200000
-
-	/* x^33856 mod p(x), x^33792 mod p(x) */
-	.octa 0x00000000f39600000000000022f20000
-
-	/* x^32832 mod p(x), x^32768 mod p(x) */
-	.octa 0x000000000bc500000000000041ca0000
-
-	/* x^31808 mod p(x), x^31744 mod p(x) */
-	.octa 0x00000000786400000000000078500000
-
-	/* x^30784 mod p(x), x^30720 mod p(x) */
-	.octa 0x00000000be970000000000009e7e0000
-
-	/* x^29760 mod p(x), x^29696 mod p(x) */
-	.octa 0x00000000dd6d000000000000a53c0000
-
-	/* x^28736 mod p(x), x^28672 mod p(x) */
-	.octa 0x000000004c3f00000000000039340000
-
-	/* x^27712 mod p(x), x^27648 mod p(x) */
-	.octa 0x0000000093a4000000000000b58e0000
-
-	/* x^26688 mod p(x), x^26624 mod p(x) */
-	.octa 0x0000000050fb00000000000062d40000
-
-	/* x^25664 mod p(x), x^25600 mod p(x) */
-	.octa 0x00000000f505000000000000a26f0000
-
-	/* x^24640 mod p(x), x^24576 mod p(x) */
-	.octa 0x0000000064f900000000000065e60000
-
-	/* x^23616 mod p(x), x^23552 mod p(x) */
-	.octa 0x00000000e8c2000000000000aad90000
-
-	/* x^22592 mod p(x), x^22528 mod p(x) */
-	.octa 0x00000000720b000000000000a3b00000
-
-	/* x^21568 mod p(x), x^21504 mod p(x) */
-	.octa 0x00000000e992000000000000d2680000
-
-	/* x^20544 mod p(x), x^20480 mod p(x) */
-	.octa 0x000000009132000000000000cf4c0000
-
-	/* x^19520 mod p(x), x^19456 mod p(x) */
-	.octa 0x00000000608a00000000000076610000
-
-	/* x^18496 mod p(x), x^18432 mod p(x) */
-	.octa 0x000000009948000000000000fb9f0000
-
-	/* x^17472 mod p(x), x^17408 mod p(x) */
-	.octa 0x00000000173000000000000003770000
-
-	/* x^16448 mod p(x), x^16384 mod p(x) */
-	.octa 0x000000006fe300000000000004880000
-
-	/* x^15424 mod p(x), x^15360 mod p(x) */
-	.octa 0x00000000e15300000000000056a70000
-
-	/* x^14400 mod p(x), x^14336 mod p(x) */
-	.octa 0x0000000092d60000000000009dfd0000
-
-	/* x^13376 mod p(x), x^13312 mod p(x) */
-	.octa 0x0000000002fd00000000000074c80000
-
-	/* x^12352 mod p(x), x^12288 mod p(x) */
-	.octa 0x00000000c78b000000000000a3ec0000
-
-	/* x^11328 mod p(x), x^11264 mod p(x) */
-	.octa 0x000000009262000000000000b3530000
-
-	/* x^10304 mod p(x), x^10240 mod p(x) */
-	.octa 0x0000000084f200000000000047bf0000
-
-	/* x^9280 mod p(x), x^9216 mod p(x) */
-	.octa 0x0000000067ee000000000000e97c0000
-
-	/* x^8256 mod p(x), x^8192 mod p(x) */
-	.octa 0x00000000535b00000000000091e10000
-
-	/* x^7232 mod p(x), x^7168 mod p(x) */
-	.octa 0x000000007ebb00000000000055060000
-
-	/* x^6208 mod p(x), x^6144 mod p(x) */
-	.octa 0x00000000c6a1000000000000fd360000
-
-	/* x^5184 mod p(x), x^5120 mod p(x) */
-	.octa 0x000000001be500000000000055860000
-
-	/* x^4160 mod p(x), x^4096 mod p(x) */
-	.octa 0x00000000ae0e0000000000005bd00000
-
-	/* x^3136 mod p(x), x^3072 mod p(x) */
-	.octa 0x0000000022040000000000008db20000
-
-	/* x^2112 mod p(x), x^2048 mod p(x) */
-	.octa 0x00000000c9eb000000000000efe20000
-
-	/* x^1088 mod p(x), x^1024 mod p(x) */
-	.octa 0x0000000039b400000000000051d10000
-
-.short_constants:
-
-	/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
-	/* x^2048 mod p(x), x^2016 mod p(x), x^1984 mod p(x), x^1952 mod p(x) */
-	.octa 0xefe20000dccf00009440000033590000
-
-	/* x^1920 mod p(x), x^1888 mod p(x), x^1856 mod p(x), x^1824 mod p(x) */
-	.octa 0xee6300002f3f000062180000e0ed0000
-
-	/* x^1792 mod p(x), x^1760 mod p(x), x^1728 mod p(x), x^1696 mod p(x) */
-	.octa 0xcf5f000017ef0000ccbe000023d30000
-
-	/* x^1664 mod p(x), x^1632 mod p(x), x^1600 mod p(x), x^1568 mod p(x) */
-	.octa 0x6d0c0000a30e00000920000042630000
-
-	/* x^1536 mod p(x), x^1504 mod p(x), x^1472 mod p(x), x^1440 mod p(x) */
-	.octa 0x21d30000932b0000a7a00000efcc0000
-
-	/* x^1408 mod p(x), x^1376 mod p(x), x^1344 mod p(x), x^1312 mod p(x) */
-	.octa 0x10be00000b310000666f00000d1c0000
-
-	/* x^1280 mod p(x), x^1248 mod p(x), x^1216 mod p(x), x^1184 mod p(x) */
-	.octa 0x1f240000ce9e0000caad0000589e0000
-
-	/* x^1152 mod p(x), x^1120 mod p(x), x^1088 mod p(x), x^1056 mod p(x) */
-	.octa 0x29610000d02b000039b400007cf50000
-
-	/* x^1024 mod p(x), x^992 mod p(x), x^960 mod p(x), x^928 mod p(x) */
-	.octa 0x51d100009d9d00003c0e0000bfd60000
-
-	/* x^896 mod p(x), x^864 mod p(x), x^832 mod p(x), x^800 mod p(x) */
-	.octa 0xda390000ceae000013830000713c0000
-
-	/* x^768 mod p(x), x^736 mod p(x), x^704 mod p(x), x^672 mod p(x) */
-	.octa 0xb67800001e16000085c0000080a60000
-
-	/* x^640 mod p(x), x^608 mod p(x), x^576 mod p(x), x^544 mod p(x) */
-	.octa 0x0db40000f7f90000371d0000e6580000
-
-	/* x^512 mod p(x), x^480 mod p(x), x^448 mod p(x), x^416 mod p(x) */
-	.octa 0x87e70000044c0000aadb0000a4970000
-
-	/* x^384 mod p(x), x^352 mod p(x), x^320 mod p(x), x^288 mod p(x) */
-	.octa 0x1f990000ad180000d8b30000e7b50000
-
-	/* x^256 mod p(x), x^224 mod p(x), x^192 mod p(x), x^160 mod p(x) */
-	.octa 0xbe6c00006ee300004c1a000006df0000
-
-	/* x^128 mod p(x), x^96 mod p(x), x^64 mod p(x), x^32 mod p(x) */
-	.octa 0xfb0b00002d560000136800008bb70000
-
-
-.barrett_constants:
-	/* Barrett constant m - (4^32)/n */
-	.octa 0x000000000000000000000001f65a57f8	/* x^64 div p(x) */
-	/* Barrett constant n */
-	.octa 0x0000000000000000000000018bb70000
-
-#define CRC_FUNCTION_NAME __crct10dif_vpmsum
-#include "crc32-vpmsum_core.S"
diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S
deleted file mode 100644
index 948d100a2934362e0ef923570649a9bc76f82dba..0000000000000000000000000000000000000000
--- a/arch/powerpc/crypto/md5-asm.S
+++ /dev/null
@@ -1,239 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Fast MD5 implementation for PPC
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- */
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/asm-compat.h>
-
-#define rHP	r3
-#define rWP	r4
-
-#define rH0	r0
-#define rH1	r6
-#define rH2	r7
-#define rH3	r5
-
-#define rW00	r8
-#define rW01	r9
-#define rW02	r10
-#define rW03	r11
-#define rW04	r12
-#define rW05	r14
-#define rW06	r15
-#define rW07	r16
-#define rW08	r17
-#define rW09	r18
-#define rW10	r19
-#define rW11	r20
-#define rW12	r21
-#define rW13	r22
-#define rW14	r23
-#define rW15	r24
-
-#define rT0	r25
-#define rT1	r26
-
-#define INITIALIZE \
-	PPC_STLU r1,-INT_FRAME_SIZE(r1); \
-	SAVE_8GPRS(14, r1);		/* push registers onto stack	*/ \
-	SAVE_4GPRS(22, r1);						   \
-	SAVE_GPR(26, r1)
-
-#define FINALIZE \
-	REST_8GPRS(14, r1);		/* pop registers from stack	*/ \
-	REST_4GPRS(22, r1);						   \
-	REST_GPR(26, r1);						   \
-	addi	r1,r1,INT_FRAME_SIZE;
-
-#ifdef __BIG_ENDIAN__
-#define LOAD_DATA(reg, off) \
-	lwbrx		reg,0,rWP;	/* load data			*/
-#define INC_PTR \
-	addi		rWP,rWP,4;	/* increment per word		*/
-#define NEXT_BLOCK			/* nothing to do		*/
-#else
-#define LOAD_DATA(reg, off) \
-	lwz		reg,off(rWP);	/* load data			*/
-#define INC_PTR				/* nothing to do		*/
-#define NEXT_BLOCK \
-	addi		rWP,rWP,64;	/* increment per block		*/
-#endif
-
-#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
-	LOAD_DATA(w0, off)		/*    W				*/ \
-	and		rT0,b,c;	/* 1: f = b and c		*/ \
-	INC_PTR				/*    ptr++			*/ \
-	andc		rT1,d,b;	/* 1: f' = ~b and d		*/ \
-	LOAD_DATA(w1, off+4)		/*    W				*/ \
-	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
-	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
-	add		a,a,rT0;	/* 1: a = a + f			*/ \
-	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
-	addis		w1,w1,k1h;	/* 2: wk = w + k		*/ \
-	add		a,a,w0;		/* 1: a = a + wk		*/ \
-	addi		w1,w1,k1l;	/* 2: wk = w + k'		*/ \
-	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
-	add		d,d,w1;		/* 2: a = a + wk		*/ \
-	add		a,a,b;		/* 1: a = a + b			*/ \
-	and		rT0,a,b;	/* 2: f = b and c		*/ \
-	andc		rT1,c,a;	/* 2: f' = ~b and d		*/ \
-	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
-	add		d,d,rT0;	/* 2: a = a + f			*/ \
-	INC_PTR				/*    ptr++			*/ \
-	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
-	add		d,d,a;		/* 2: a = a + b			*/
-
-#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
-	andc		rT0,c,d;	/* 1: f = c and ~d		*/ \
-	and		rT1,b,d;	/* 1: f' = b and d		*/ \
-	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
-	or		rT0,rT0,rT1;	/* 1: f = f or f'		*/ \
-	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
-	add		a,a,rT0;	/* 1: a = a + f			*/ \
-	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
-	add		a,a,w0;		/* 1: a = a + wk		*/ \
-	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
-	andc		rT0,b,c;	/* 2: f = c and ~d		*/ \
-	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
-	add		a,a,b;		/* 1: a = a + b			*/ \
-	add		d,d,w1;		/* 2: a = a + wk		*/ \
-	and		rT1,a,c;	/* 2: f' = b and d		*/ \
-	or		rT0,rT0,rT1;	/* 2: f = f or f'		*/ \
-	add		d,d,rT0;	/* 2: a = a + f			*/ \
-	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
-	add		d,d,a;		/* 2: a = a +b			*/
-
-#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
-	xor		rT0,b,c;	/* 1: f' = b xor c		*/ \
-	addi		w0,w0,k0l;	/* 1: wk = w + k		*/ \
-	xor		rT1,rT0,d;	/* 1: f = f xor f'		*/ \
-	addis		w0,w0,k0h;	/* 1: wk = w + k'		*/ \
-	add		a,a,rT1;	/* 1: a = a + f			*/ \
-	addi		w1,w1,k1l;	/* 2: wk = w + k		*/ \
-	add		a,a,w0;		/* 1: a = a + wk		*/ \
-	addis		w1,w1,k1h;	/* 2: wk = w + k'		*/ \
-	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
-	add		d,d,w1;		/* 2: a = a + wk		*/ \
-	add		a,a,b;		/* 1: a = a + b			*/ \
-	xor		rT1,rT0,a;	/* 2: f = b xor f'		*/ \
-	add		d,d,rT1;	/* 2: a = a + f			*/ \
-	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
-	add		d,d,a;		/* 2: a = a + b			*/
-
-#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
-	addi		w0,w0,k0l;	/* 1: w = w + k			*/ \
-	orc		rT0,b,d;	/* 1: f = b or ~d		*/ \
-	addis		w0,w0,k0h;	/* 1: w = w + k'		*/ \
-	xor		rT0,rT0,c;	/* 1: f = f xor c		*/ \
-	add		a,a,w0;		/* 1: a = a + wk		*/ \
-	addi		w1,w1,k1l;	/* 2: w = w + k			*/ \
-	add		a,a,rT0;	/* 1: a = a + f			*/ \
-	addis		w1,w1,k1h;	/* 2: w = w + k'		*/ \
-	rotrwi		a,a,p;		/* 1: a = a rotl x		*/ \
-	add		a,a,b;		/* 1: a = a + b			*/ \
-	orc		rT0,a,c;	/* 2: f = b or ~d		*/ \
-	add		d,d,w1;		/* 2: a = a + wk		*/ \
-	xor		rT0,rT0,b;	/* 2: f = f xor c		*/ \
-	add		d,d,rT0;	/* 2: a = a + f			*/ \
-	rotrwi		d,d,q;		/* 2: a = a rotl x		*/ \
-	add		d,d,a;		/* 2: a = a + b			*/
-
-_GLOBAL(ppc_md5_transform)
-	INITIALIZE
-
-	mtctr		r5
-	lwz		rH0,0(rHP)
-	lwz		rH1,4(rHP)
-	lwz		rH2,8(rHP)
-	lwz		rH3,12(rHP)
-
-ppc_md5_main:
-	R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
-		0xd76b, -23432, 0xe8c8, -18602)
-	R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
-		0x2420, 0x70db, 0xc1be, -12562)
-	R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
-		0xf57c, 0x0faf, 0x4788, -14806)
-	R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
-		0xa830, 0x4613, 0xfd47, -27391)
-	R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
-		0x6981, -26408, 0x8b45,  -2129)
-	R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
-		0xffff, 0x5bb1, 0x895d, -10306)
-	R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
-		0x6b90, 0x1122, 0xfd98, 0x7193)
-	R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
-		0xa679, 0x438e, 0x49b4, 0x0821)
-
-	R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
-		0x0d56, 0x6e0c, 0x1810, 0x6d2d)
-	R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
-		0x9d02, -32109, 0x124c, 0x2332)
-	R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
-		0x8ea7, 0x4a33, 0x0245, -18270)
-	R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
-		0x8eee,  -8608, 0xf258,  -5095)
-	R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
-		0x969d, -10697, 0x1cbe, -15288)
-	R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
-		0x3317, 0x3e99, 0xdbd9, 0x7c15)
-	R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
-		0xac4b, 0x7772, 0xd8cf, 0x331d)
-	R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
-		0x6a28, 0x6dd8, 0x219a, 0x3b68)
-
-	R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
-		0x29cb, 0x28e5, 0x4218,  -7788)
-	R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16,  9,
-		0x473f, 0x06d1, 0x3aae, 0x3036)
-	R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
-		0xaea1, -15134, 0x640b, -11295)
-	R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16,  9,
-		0x8f4c, 0x4887, 0xbc7c, -22499)
-	R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
-		0x7eb8, -27199, 0x00ea, 0x6050)
-	R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16,  9,
-		0xe01a, 0x22fe, 0x4447, 0x69c5)
-	R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
-		0xb7f3, 0x0253, 0x59b1, 0x4d5b)
-	R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16,  9,
-		0x4701, -27017, 0xc7bd, -19859)
-
-	R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
-		0x0988,  -1462, 0x4c70, -19401)
-	R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
-		0xadaf,  -5221, 0xfc99, 0x66f7)
-	R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
-		0x7e80, -16418, 0xba1e, -25587)
-	R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
-		0x4130, 0x380d, 0xe0c5, 0x738d)
-	lwz		rW00,0(rHP)
-	R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
-		0xe837, -30770, 0xde8a, 0x69e8)
-	lwz		rW14,4(rHP)
-	R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
-		0x9e79, 0x260f, 0x256d, -27941)
-	lwz		rW12,8(rHP)
-	R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
-		0xab75, -20775, 0x4f9e, -28397)
-	lwz		rW10,12(rHP)
-	R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
-		0x662b, 0x7c56, 0x11b2, 0x0358)
-
-	add		rH0,rH0,rW00
-	stw		rH0,0(rHP)
-	add		rH1,rH1,rW14
-	stw		rH1,4(rHP)
-	add		rH2,rH2,rW12
-	stw		rH2,8(rHP)
-	add		rH3,rH3,rW10
-	stw		rH3,12(rHP)
-	NEXT_BLOCK
-
-	bdnz		ppc_md5_main
-
-	FINALIZE
-	blr
diff --git a/arch/powerpc/crypto/sha1-powerpc-asm.S b/arch/powerpc/crypto/sha1-powerpc-asm.S
deleted file mode 100644
index 23e248beff7166213e95583975341247c4680233..0000000000000000000000000000000000000000
--- a/arch/powerpc/crypto/sha1-powerpc-asm.S
+++ /dev/null
@@ -1,190 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * SHA-1 implementation for PowerPC.
- *
- * Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/asm-compat.h>
-
-#ifdef __BIG_ENDIAN__
-#define LWZ(rt, d, ra)	\
-	lwz	rt,d(ra)
-#else
-#define LWZ(rt, d, ra)	\
-	li	rt,d;	\
-	lwbrx	rt,rt,ra
-#endif
-
-/*
- * We roll the registers for T, A, B, C, D, E around on each
- * iteration; T on iteration t is A on iteration t+1, and so on.
- * We use registers 7 - 12 for this.
- */
-#define RT(t)	((((t)+5)%6)+7)
-#define RA(t)	((((t)+4)%6)+7)
-#define RB(t)	((((t)+3)%6)+7)
-#define RC(t)	((((t)+2)%6)+7)
-#define RD(t)	((((t)+1)%6)+7)
-#define RE(t)	((((t)+0)%6)+7)
-
-/* We use registers 16 - 31 for the W values */
-#define W(t)	(((t)%16)+16)
-
-#define LOADW(t)				\
-	LWZ(W(t),(t)*4,r4)
-
-#define STEPD0_LOAD(t)				\
-	andc	r0,RD(t),RB(t);		\
-	and	r6,RB(t),RC(t);		\
-	rotlwi	RT(t),RA(t),5;			\
-	or	r6,r6,r0;			\
-	add	r0,RE(t),r15;			\
-	add	RT(t),RT(t),r6;		\
-	add	r14,r0,W(t);			\
-	LWZ(W((t)+4),((t)+4)*4,r4);	\
-	rotlwi	RB(t),RB(t),30;			\
-	add	RT(t),RT(t),r14
-
-#define STEPD0_UPDATE(t)			\
-	and	r6,RB(t),RC(t);		\
-	andc	r0,RD(t),RB(t);		\
-	rotlwi	RT(t),RA(t),5;			\
-	rotlwi	RB(t),RB(t),30;			\
-	or	r6,r6,r0;			\
-	add	r0,RE(t),r15;			\
-	xor	r5,W((t)+4-3),W((t)+4-8);		\
-	add	RT(t),RT(t),r6;		\
-	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
-	add	r0,r0,W(t);			\
-	xor	W((t)+4),W((t)+4),r5;			\
-	add	RT(t),RT(t),r0;		\
-	rotlwi	W((t)+4),W((t)+4),1
-
-#define STEPD1(t)				\
-	xor	r6,RB(t),RC(t);		\
-	rotlwi	RT(t),RA(t),5;			\
-	rotlwi	RB(t),RB(t),30;			\
-	xor	r6,r6,RD(t);			\
-	add	r0,RE(t),r15;			\
-	add	RT(t),RT(t),r6;		\
-	add	r0,r0,W(t);			\
-	add	RT(t),RT(t),r0
-
-#define STEPD1_UPDATE(t)				\
-	xor	r6,RB(t),RC(t);		\
-	rotlwi	RT(t),RA(t),5;			\
-	rotlwi	RB(t),RB(t),30;			\
-	xor	r6,r6,RD(t);			\
-	add	r0,RE(t),r15;			\
-	xor	r5,W((t)+4-3),W((t)+4-8);		\
-	add	RT(t),RT(t),r6;		\
-	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
-	add	r0,r0,W(t);			\
-	xor	W((t)+4),W((t)+4),r5;			\
-	add	RT(t),RT(t),r0;		\
-	rotlwi	W((t)+4),W((t)+4),1
-
-#define STEPD2_UPDATE(t)			\
-	and	r6,RB(t),RC(t);		\
-	and	r0,RB(t),RD(t);		\
-	rotlwi	RT(t),RA(t),5;			\
-	or	r6,r6,r0;			\
-	rotlwi	RB(t),RB(t),30;			\
-	and	r0,RC(t),RD(t);		\
-	xor	r5,W((t)+4-3),W((t)+4-8);	\
-	or	r6,r6,r0;			\
-	xor	W((t)+4),W((t)+4-16),W((t)+4-14);	\
-	add	r0,RE(t),r15;			\
-	add	RT(t),RT(t),r6;		\
-	add	r0,r0,W(t);			\
-	xor	W((t)+4),W((t)+4),r5;		\
-	add	RT(t),RT(t),r0;		\
-	rotlwi	W((t)+4),W((t)+4),1
-
-#define STEP0LD4(t)				\
-	STEPD0_LOAD(t);				\
-	STEPD0_LOAD((t)+1);			\
-	STEPD0_LOAD((t)+2);			\
-	STEPD0_LOAD((t)+3)
-
-#define STEPUP4(t, fn)				\
-	STEP##fn##_UPDATE(t);			\
-	STEP##fn##_UPDATE((t)+1);		\
-	STEP##fn##_UPDATE((t)+2);		\
-	STEP##fn##_UPDATE((t)+3)
-
-#define STEPUP20(t, fn)				\
-	STEPUP4(t, fn);				\
-	STEPUP4((t)+4, fn);			\
-	STEPUP4((t)+8, fn);			\
-	STEPUP4((t)+12, fn);			\
-	STEPUP4((t)+16, fn)
-
-_GLOBAL(powerpc_sha_transform)
-	PPC_STLU r1,-INT_FRAME_SIZE(r1)
-	SAVE_8GPRS(14, r1)
-	SAVE_10GPRS(22, r1)
-
-	/* Load up A - E */
-	lwz	RA(0),0(r3)	/* A */
-	lwz	RB(0),4(r3)	/* B */
-	lwz	RC(0),8(r3)	/* C */
-	lwz	RD(0),12(r3)	/* D */
-	lwz	RE(0),16(r3)	/* E */
-
-	LOADW(0)
-	LOADW(1)
-	LOADW(2)
-	LOADW(3)
-
-	lis	r15,0x5a82	/* K0-19 */
-	ori	r15,r15,0x7999
-	STEP0LD4(0)
-	STEP0LD4(4)
-	STEP0LD4(8)
-	STEPUP4(12, D0)
-	STEPUP4(16, D0)
-
-	lis	r15,0x6ed9	/* K20-39 */
-	ori	r15,r15,0xeba1
-	STEPUP20(20, D1)
-
-	lis	r15,0x8f1b	/* K40-59 */
-	ori	r15,r15,0xbcdc
-	STEPUP20(40, D2)
-
-	lis	r15,0xca62	/* K60-79 */
-	ori	r15,r15,0xc1d6
-	STEPUP4(60, D1)
-	STEPUP4(64, D1)
-	STEPUP4(68, D1)
-	STEPUP4(72, D1)
-	lwz	r20,16(r3)
-	STEPD1(76)
-	lwz	r19,12(r3)
-	STEPD1(77)
-	lwz	r18,8(r3)
-	STEPD1(78)
-	lwz	r17,4(r3)
-	STEPD1(79)
-
-	lwz	r16,0(r3)
-	add	r20,RE(80),r20
-	add	RD(0),RD(80),r19
-	add	RC(0),RC(80),r18
-	add	RB(0),RB(80),r17
-	add	RA(0),RA(80),r16
-	mr	RE(0),r20
-	stw	RA(0),0(r3)
-	stw	RB(0),4(r3)
-	stw	RC(0),8(r3)
-	stw	RD(0),12(r3)
-	stw	RE(0),16(r3)
-
-	REST_8GPRS(14, r1)
-	REST_10GPRS(22, r1)
-	addi	r1,r1,INT_FRAME_SIZE
-	blr
diff --git a/arch/powerpc/crypto/sha1-spe-asm.S b/arch/powerpc/crypto/sha1-spe-asm.S
deleted file mode 100644
index 0f447523be5ef9f30b8632ef5bb28850b56ecae8..0000000000000000000000000000000000000000
--- a/arch/powerpc/crypto/sha1-spe-asm.S
+++ /dev/null
@@ -1,294 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Fast SHA-1 implementation for SPE instruction set (PPC)
- *
- * This code makes use of the SPE SIMD instruction set as defined in
- * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
- * Implementation is based on optimization guide notes from
- * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-
-#define rHP	r3	/* pointer to hash value			*/
-#define rWP	r4	/* pointer to input				*/
-#define rKP	r5	/* pointer to constants				*/
-
-#define rW0	r14	/* 64 bit round words				*/
-#define rW1	r15
-#define rW2	r16
-#define rW3	r17
-#define rW4	r18
-#define rW5	r19
-#define rW6	r20
-#define rW7	r21
-
-#define rH0	r6	/* 32 bit hash values 				*/
-#define rH1	r7
-#define rH2	r8
-#define rH3	r9
-#define rH4	r10
-
-#define rT0	r22	/* 64 bit temporary				*/
-#define rT1	r0	/* 32 bit temporaries				*/
-#define rT2	r11
-#define rT3	r12
-
-#define rK	r23	/* 64 bit constant in volatile register		*/
-
-#define LOAD_K01
-
-#define LOAD_K11 \
-	evlwwsplat	rK,0(rKP);
-
-#define LOAD_K21 \
-	evlwwsplat	rK,4(rKP);
-
-#define LOAD_K31 \
-	evlwwsplat	rK,8(rKP);
-
-#define LOAD_K41 \
-	evlwwsplat	rK,12(rKP);
-
-#define INITIALIZE \
-	stwu		r1,-128(r1);	/* create stack frame		*/ \
-	evstdw		r14,8(r1);	/* We must save non volatile	*/ \
-	evstdw		r15,16(r1);	/* registers. Take the chance	*/ \
-	evstdw		r16,24(r1);	/* and save the SPE part too	*/ \
-	evstdw		r17,32(r1);					   \
-	evstdw		r18,40(r1);					   \
-	evstdw		r19,48(r1);					   \
-	evstdw		r20,56(r1);					   \
-	evstdw		r21,64(r1);					   \
-	evstdw		r22,72(r1);					   \
-	evstdw		r23,80(r1);
-
-
-#define FINALIZE \
-	evldw		r14,8(r1);	/* restore SPE registers	*/ \
-	evldw		r15,16(r1);					   \
-	evldw		r16,24(r1);					   \
-	evldw		r17,32(r1);					   \
-	evldw		r18,40(r1);					   \
-	evldw		r19,48(r1);					   \
-	evldw		r20,56(r1);					   \
-	evldw		r21,64(r1);					   \
-	evldw		r22,72(r1);					   \
-	evldw		r23,80(r1);					   \
-	xor		r0,r0,r0;					   \
-	stw		r0,8(r1);	/* Delete sensitive data	*/ \
-	stw		r0,16(r1);	/* that we might have pushed	*/ \
-	stw		r0,24(r1);	/* from other context that runs	*/ \
-	stw		r0,32(r1);	/* the same code. Assume that	*/ \
-	stw		r0,40(r1);	/* the lower part of the GPRs	*/ \
-	stw		r0,48(r1);	/* were already overwritten on	*/ \
-	stw		r0,56(r1);	/* the way down to here		*/ \
-	stw		r0,64(r1);					   \
-	stw		r0,72(r1);					   \
-	stw		r0,80(r1);					   \
-	addi		r1,r1,128;	/* cleanup stack frame		*/
-
-#ifdef __BIG_ENDIAN__
-#define LOAD_DATA(reg, off) \
-	lwz		reg,off(rWP);	/* load data			*/
-#define NEXT_BLOCK \
-	addi		rWP,rWP,64;	/* increment per block		*/
-#else
-#define LOAD_DATA(reg, off) \
-	lwbrx		reg,0,rWP;	/* load data			*/ \
-	addi		rWP,rWP,4;	/* increment per word		*/
-#define NEXT_BLOCK			/* nothing to do		*/
-#endif
-
-#define	R_00_15(a, b, c, d, e, w0, w1, k, off) \
-	LOAD_DATA(w0, off)		/* 1: W				*/ \
-	and		rT2,b,c;	/* 1: F' = B and C 		*/ \
-	LOAD_K##k##1							   \
-	andc		rT1,d,b;	/* 1: F" = ~B and D 		*/ \
-	rotrwi		rT0,a,27;	/* 1: A' = A rotl 5		*/ \
-	or		rT2,rT2,rT1;	/* 1: F = F' or F"		*/ \
-	add		e,e,rT0;	/* 1: E = E + A'		*/ \
-	rotrwi		b,b,2;		/* 1: B = B rotl 30		*/ \
-	add		e,e,w0;		/* 1: E = E + W			*/ \
-	LOAD_DATA(w1, off+4)		/* 2: W				*/ \
-	add		e,e,rT2;	/* 1: E = E + F			*/ \
-	and		rT1,a,b;	/* 2: F' = B and C 		*/ \
-	add		e,e,rK;		/* 1: E = E + K			*/ \
-	andc		rT2,c,a;	/* 2: F" = ~B and D 		*/ \
-	add		d,d,rK;		/* 2: E = E + K			*/ \
-	or		rT2,rT2,rT1;	/* 2: F = F' or F"		*/ \
-	rotrwi		rT0,e,27;	/* 2: A' = A rotl 5		*/ \
-	add		d,d,w1;		/* 2: E = E + W			*/ \
-	rotrwi		a,a,2;		/* 2: B = B rotl 30		*/ \
-	add		d,d,rT0;	/* 2: E = E + A'		*/ \
-	evmergelo	w1,w1,w0;	/*    mix W[0]/W[1]		*/ \
-	add		d,d,rT2		/* 2: E = E + F			*/
-
-#define R_16_19(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
-	and		rT2,b,c;	/* 1: F' = B and C 		*/ \
-	evmergelohi	rT0,w7,w6;	/*    W[-3]			*/ \
-	andc		rT1,d,b;	/* 1: F" = ~B and D 		*/ \
-	evxor		w0,w0,rT0;	/*    W = W[-16] xor W[-3]	*/ \
-	or		rT1,rT1,rT2;	/* 1: F = F' or F"		*/ \
-	evxor		w0,w0,w4;	/*    W = W xor W[-8]		*/ \
-	add		e,e,rT1;	/* 1: E = E + F			*/ \
-	evxor		w0,w0,w1;	/*    W = W xor W[-14]		*/ \
-	rotrwi		rT2,a,27;	/* 1: A' = A rotl 5		*/ \
-	evrlwi		w0,w0,1;	/*    W = W rotl 1		*/ \
-	add		e,e,rT2;	/* 1: E = E + A'		*/ \
-	evaddw		rT0,w0,rK;	/*    WK = W + K		*/ \
-	rotrwi		b,b,2;		/* 1: B = B rotl 30		*/ \
-	LOAD_K##k##1							   \
-	evmergehi	rT1,rT1,rT0;	/*    WK1/WK2			*/ \
-	add		e,e,rT0;	/* 1: E = E + WK		*/ \
-	add		d,d,rT1;	/* 2: E = E + WK		*/ \
-	and		rT2,a,b;	/* 2: F' = B and C 		*/ \
-	andc		rT1,c,a;	/* 2: F" = ~B and D 		*/ \
-	rotrwi		rT0,e,27;	/* 2: A' = A rotl 5		*/ \
-	or		rT1,rT1,rT2;	/* 2: F = F' or F"		*/ \
-	add		d,d,rT0;	/* 2: E = E + A'		*/ \
-	rotrwi		a,a,2;		/* 2: B = B rotl 30		*/ \
-	add		d,d,rT1		/* 2: E = E + F			*/
-
-#define R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
-	evmergelohi	rT0,w7,w6;	/*    W[-3]			*/ \
-	xor		rT2,b,c;	/* 1: F' = B xor C		*/ \
-	evxor		w0,w0,rT0;	/*    W = W[-16] xor W[-3]	*/ \
-	xor		rT2,rT2,d;	/* 1: F = F' xor D		*/ \
-	evxor		w0,w0,w4;	/*    W = W xor W[-8]		*/ \
-	add		e,e,rT2;	/* 1: E = E + F			*/ \
-	evxor		w0,w0,w1;	/*    W = W xor W[-14]		*/ \
-	rotrwi		rT2,a,27;	/* 1: A' = A rotl 5		*/ \
-	evrlwi		w0,w0,1;	/*    W = W rotl 1		*/ \
-	add		e,e,rT2;	/* 1: E = E + A'		*/ \
-	evaddw		rT0,w0,rK;	/*    WK = W + K		*/ \
-	rotrwi		b,b,2;		/* 1: B = B rotl 30		*/ \
-	LOAD_K##k##1							   \
-	evmergehi	rT1,rT1,rT0;	/*    WK1/WK2			*/ \
-	add		e,e,rT0;	/* 1: E = E + WK		*/ \
-	xor		rT2,a,b;	/* 2: F' = B xor C		*/ \
-	add		d,d,rT1;	/* 2: E = E + WK		*/ \
-	xor		rT2,rT2,c;	/* 2: F = F' xor D		*/ \
-	rotrwi		rT0,e,27;	/* 2: A' = A rotl 5		*/ \
-	add		d,d,rT2;	/* 2: E = E + F			*/ \
-	rotrwi		a,a,2;		/* 2: B = B rotl 30		*/ \
-	add		d,d,rT0		/* 2: E = E + A'		*/
-
-#define R_40_59(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
-	and		rT2,b,c;	/* 1: F' = B and C		*/ \
-	evmergelohi	rT0,w7,w6;	/*    W[-3]			*/ \
-	or		rT1,b,c;	/* 1: F" = B or C		*/ \
-	evxor		w0,w0,rT0;	/*    W = W[-16] xor W[-3]	*/ \
-	and		rT1,d,rT1;	/* 1: F" = F" and D		*/ \
-	evxor		w0,w0,w4;	/*    W = W xor W[-8]		*/ \
-	or		rT2,rT2,rT1;	/* 1: F = F' or F"		*/ \
-	evxor		w0,w0,w1;	/*    W = W xor W[-14]		*/ \
-	add		e,e,rT2;	/* 1: E = E + F			*/ \
-	evrlwi		w0,w0,1;	/*    W = W rotl 1		*/ \
-	rotrwi		rT2,a,27;	/* 1: A' = A rotl 5		*/ \
-	evaddw		rT0,w0,rK;	/*    WK = W + K		*/ \
-	add		e,e,rT2;	/* 1: E = E + A'		*/ \
-	LOAD_K##k##1							   \
-	evmergehi	rT1,rT1,rT0;	/*    WK1/WK2			*/ \
-	rotrwi		b,b,2;		/* 1: B = B rotl 30		*/ \
-	add		e,e,rT0;	/* 1: E = E + WK		*/ \
-	and		rT2,a,b;	/* 2: F' = B and C		*/ \
-	or		rT0,a,b;	/* 2: F" = B or C		*/ \
-	add		d,d,rT1;	/* 2: E = E + WK		*/ \
-	and		rT0,c,rT0;	/* 2: F" = F" and D		*/ \
-	rotrwi		a,a,2;		/* 2: B = B rotl 30		*/ \
-	or		rT2,rT2,rT0;	/* 2: F = F' or F"		*/ \
-	rotrwi		rT0,e,27;	/* 2: A' = A rotl 5		*/ \
-	add		d,d,rT2;	/* 2: E = E + F			*/ \
-	add		d,d,rT0		/* 2: E = E + A'		*/
-
-#define R_60_79(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
-	R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k)
-
-_GLOBAL(ppc_spe_sha1_transform)
-	INITIALIZE
-
-	lwz		rH0,0(rHP)
-	lwz		rH1,4(rHP)
-	mtctr		r5
-	lwz		rH2,8(rHP)
-	lis		rKP,PPC_SPE_SHA1_K@h
-	lwz		rH3,12(rHP)
-	ori		rKP,rKP,PPC_SPE_SHA1_K@l
-	lwz		rH4,16(rHP)
-
-ppc_spe_sha1_main:
-	R_00_15(rH0, rH1, rH2, rH3, rH4, rW1, rW0, 1, 0)
-	R_00_15(rH3, rH4, rH0, rH1, rH2, rW2, rW1, 0, 8)
-	R_00_15(rH1, rH2, rH3, rH4, rH0, rW3, rW2, 0, 16)
-	R_00_15(rH4, rH0, rH1, rH2, rH3, rW4, rW3, 0, 24)
-	R_00_15(rH2, rH3, rH4, rH0, rH1, rW5, rW4, 0, 32)
-	R_00_15(rH0, rH1, rH2, rH3, rH4, rW6, rW5, 0, 40)
-	R_00_15(rH3, rH4, rH0, rH1, rH2, rT3, rW6, 0, 48)
-	R_00_15(rH1, rH2, rH3, rH4, rH0, rT3, rW7, 0, 56)
-
-	R_16_19(rH4, rH0, rH1, rH2, rH3, rW0, rW1, rW4, rW6, rW7, 0)
-	R_16_19(rH2, rH3, rH4, rH0, rH1, rW1, rW2, rW5, rW7, rW0, 2)
-
-	R_20_39(rH0, rH1, rH2, rH3, rH4, rW2, rW3, rW6, rW0, rW1, 0)
-	R_20_39(rH3, rH4, rH0, rH1, rH2, rW3, rW4, rW7, rW1, rW2, 0)
-	R_20_39(rH1, rH2, rH3, rH4, rH0, rW4, rW5, rW0, rW2, rW3, 0)
-	R_20_39(rH4, rH0, rH1, rH2, rH3, rW5, rW6, rW1, rW3, rW4, 0)
-	R_20_39(rH2, rH3, rH4, rH0, rH1, rW6, rW7, rW2, rW4, rW5, 0)
-	R_20_39(rH0, rH1, rH2, rH3, rH4, rW7, rW0, rW3, rW5, rW6, 0)
-	R_20_39(rH3, rH4, rH0, rH1, rH2, rW0, rW1, rW4, rW6, rW7, 0)
-	R_20_39(rH1, rH2, rH3, rH4, rH0, rW1, rW2, rW5, rW7, rW0, 0)
-	R_20_39(rH4, rH0, rH1, rH2, rH3, rW2, rW3, rW6, rW0, rW1, 0)
-	R_20_39(rH2, rH3, rH4, rH0, rH1, rW3, rW4, rW7, rW1, rW2, 3)
-
-	R_40_59(rH0, rH1, rH2, rH3, rH4, rW4, rW5, rW0, rW2, rW3, 0)
-	R_40_59(rH3, rH4, rH0, rH1, rH2, rW5, rW6, rW1, rW3, rW4, 0)
-	R_40_59(rH1, rH2, rH3, rH4, rH0, rW6, rW7, rW2, rW4, rW5, 0)
-	R_40_59(rH4, rH0, rH1, rH2, rH3, rW7, rW0, rW3, rW5, rW6, 0)
-	R_40_59(rH2, rH3, rH4, rH0, rH1, rW0, rW1, rW4, rW6, rW7, 0)
-	R_40_59(rH0, rH1, rH2, rH3, rH4, rW1, rW2, rW5, rW7, rW0, 0)
-	R_40_59(rH3, rH4, rH0, rH1, rH2, rW2, rW3, rW6, rW0, rW1, 0)
-	R_40_59(rH1, rH2, rH3, rH4, rH0, rW3, rW4, rW7, rW1, rW2, 0)
-	R_40_59(rH4, rH0, rH1, rH2, rH3, rW4, rW5, rW0, rW2, rW3, 0)
-	R_40_59(rH2, rH3, rH4, rH0, rH1, rW5, rW6, rW1, rW3, rW4, 4)
-
-	R_60_79(rH0, rH1, rH2, rH3, rH4, rW6, rW7, rW2, rW4, rW5, 0)
-	R_60_79(rH3, rH4, rH0, rH1, rH2, rW7, rW0, rW3, rW5, rW6, 0)
-	R_60_79(rH1, rH2, rH3, rH4, rH0, rW0, rW1, rW4, rW6, rW7, 0)
-	R_60_79(rH4, rH0, rH1, rH2, rH3, rW1, rW2, rW5, rW7, rW0, 0)
-	R_60_79(rH2, rH3, rH4, rH0, rH1, rW2, rW3, rW6, rW0, rW1, 0)
-	R_60_79(rH0, rH1, rH2, rH3, rH4, rW3, rW4, rW7, rW1, rW2, 0)
-	R_60_79(rH3, rH4, rH0, rH1, rH2, rW4, rW5, rW0, rW2, rW3, 0)
-	lwz		rT3,0(rHP)
-	R_60_79(rH1, rH2, rH3, rH4, rH0, rW5, rW6, rW1, rW3, rW4, 0)
-	lwz		rW1,4(rHP)
-	R_60_79(rH4, rH0, rH1, rH2, rH3, rW6, rW7, rW2, rW4, rW5, 0)
-	lwz		rW2,8(rHP)
-	R_60_79(rH2, rH3, rH4, rH0, rH1, rW7, rW0, rW3, rW5, rW6, 0)
-	lwz		rW3,12(rHP)
-	NEXT_BLOCK
-	lwz		rW4,16(rHP)
-
-	add		rH0,rH0,rT3
-	stw		rH0,0(rHP)
-	add		rH1,rH1,rW1
-	stw		rH1,4(rHP)
-	add		rH2,rH2,rW2
-	stw		rH2,8(rHP)
-	add		rH3,rH3,rW3
-	stw		rH3,12(rHP)
-	add		rH4,rH4,rW4
-	stw		rH4,16(rHP)
-
-	bdnz		ppc_spe_sha1_main
-
-	FINALIZE
-	blr
-
-.data
-.align 4
-PPC_SPE_SHA1_K:
-	.long 0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6
diff --git a/arch/powerpc/crypto/sha256-spe-asm.S b/arch/powerpc/crypto/sha256-spe-asm.S
deleted file mode 100644
index cd99d71dae345fd9045b5540fac2d8bda662e60b..0000000000000000000000000000000000000000
--- a/arch/powerpc/crypto/sha256-spe-asm.S
+++ /dev/null
@@ -1,318 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Fast SHA-256 implementation for SPE instruction set (PPC)
- *
- * This code makes use of the SPE SIMD instruction set as defined in
- * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
- * Implementation is based on optimization guide notes from
- * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-
-#define rHP	r3	/* pointer to hash values in memory		*/
-#define rKP	r24	/* pointer to round constants			*/
-#define rWP	r4	/* pointer to input data			*/
-
-#define rH0	r5	/* 8 32 bit hash values in 8 registers		*/
-#define rH1	r6
-#define rH2	r7
-#define rH3	r8
-#define rH4	r9
-#define rH5	r10
-#define rH6	r11
-#define rH7	r12
-
-#define rW0	r14	/* 64 bit registers. 16 words in 8 registers	*/
-#define rW1	r15
-#define rW2	r16
-#define rW3	r17
-#define rW4	r18
-#define rW5	r19
-#define rW6	r20
-#define rW7	r21
-
-#define rT0	r22	/* 64 bit temporaries 				*/
-#define rT1	r23
-#define rT2	r0	/* 32 bit temporaries				*/
-#define rT3	r25
-
-#define CMP_KN_LOOP
-#define CMP_KC_LOOP \
-	cmpwi		rT1,0;
-
-#define INITIALIZE \
-	stwu		r1,-128(r1);	/* create stack frame		*/ \
-	evstdw		r14,8(r1);	/* We must save non volatile	*/ \
-	evstdw		r15,16(r1);	/* registers. Take the chance	*/ \
-	evstdw		r16,24(r1);	/* and save the SPE part too	*/ \
-	evstdw		r17,32(r1);					   \
-	evstdw		r18,40(r1);					   \
-	evstdw		r19,48(r1);					   \
-	evstdw		r20,56(r1);					   \
-	evstdw		r21,64(r1);					   \
-	evstdw		r22,72(r1);					   \
-	evstdw		r23,80(r1);					   \
-	stw		r24,88(r1);	/* save normal registers	*/ \
-	stw		r25,92(r1);
-
-
-#define FINALIZE \
-	evldw		r14,8(r1);	/* restore SPE registers	*/ \
-	evldw		r15,16(r1);					   \
-	evldw		r16,24(r1);					   \
-	evldw		r17,32(r1);					   \
-	evldw		r18,40(r1);					   \
-	evldw		r19,48(r1);					   \
-	evldw		r20,56(r1);					   \
-	evldw		r21,64(r1);					   \
-	evldw		r22,72(r1);					   \
-	evldw		r23,80(r1);					   \
-	lwz		r24,88(r1);	/* restore normal registers	*/ \
-	lwz		r25,92(r1);					   \
-	xor		r0,r0,r0;					   \
-	stw		r0,8(r1);	/* Delete sensitive data	*/ \
-	stw		r0,16(r1);	/* that we might have pushed	*/ \
-	stw		r0,24(r1);	/* from other context that runs	*/ \
-	stw		r0,32(r1);	/* the same code. Assume that	*/ \
-	stw		r0,40(r1);	/* the lower part of the GPRs	*/ \
-	stw		r0,48(r1);	/* was already overwritten on	*/ \
-	stw		r0,56(r1);	/* the way down to here		*/ \
-	stw		r0,64(r1);					   \
-	stw		r0,72(r1);					   \
-	stw		r0,80(r1);					   \
-	addi		r1,r1,128;	/* cleanup stack frame		*/
-
-#ifdef __BIG_ENDIAN__
-#define LOAD_DATA(reg, off) \
-	lwz		reg,off(rWP);	/* load data			*/
-#define NEXT_BLOCK \
-	addi		rWP,rWP,64;	/* increment per block		*/
-#else
-#define LOAD_DATA(reg, off) \
-	lwbrx		reg,0,rWP; 	/* load data			*/ \
-	addi		rWP,rWP,4;	/* increment per word		*/
-#define NEXT_BLOCK			/* nothing to do		*/
-#endif
-
-#define R_LOAD_W(a, b, c, d, e, f, g, h, w, off) \
-	LOAD_DATA(w, off)		/* 1: W				*/ \
-	rotrwi		rT0,e,6;	/* 1: S1 = e rotr 6		*/ \
-	rotrwi		rT1,e,11;	/* 1: S1' = e rotr 11		*/ \
-	rotrwi		rT2,e,25;	/* 1: S1" = e rotr 25		*/ \
-	xor		rT0,rT0,rT1;	/* 1: S1 = S1 xor S1'		*/ \
-	and		rT3,e,f;	/* 1: ch = e and f		*/ \
-	xor		rT0,rT0,rT2;	/* 1: S1 = S1 xor S1"		*/ \
-	andc		rT1,g,e;	/* 1: ch' = ~e and g		*/ \
-	lwz		rT2,off(rKP);	/* 1: K				*/ \
-	xor		rT3,rT3,rT1;	/* 1: ch = ch xor ch'		*/ \
-	add		h,h,rT0;	/* 1: temp1 = h + S1		*/ \
-	add		rT3,rT3,w;	/* 1: temp1' = ch + w		*/ \
-	rotrwi		rT0,a,2;	/* 1: S0 = a rotr 2		*/ \
-	add		h,h,rT3;	/* 1: temp1 = temp1 + temp1'	*/ \
-	rotrwi		rT1,a,13;	/* 1: S0' = a rotr 13		*/ \
-	add		h,h,rT2;	/* 1: temp1 = temp1 + K		*/ \
-	rotrwi		rT3,a,22;	/* 1: S0" = a rotr 22		*/ \
-	xor		rT0,rT0,rT1;	/* 1: S0 = S0 xor S0'		*/ \
-	add		d,d,h;		/* 1: d = d + temp1		*/ \
-	xor		rT3,rT0,rT3;	/* 1: S0 = S0 xor S0"		*/ \
-	evmergelo	w,w,w;		/*    shift W			*/ \
-	or		rT2,a,b;	/* 1: maj = a or b		*/ \
-	and		rT1,a,b;	/* 1: maj' = a and b		*/ \
-	and		rT2,rT2,c;	/* 1: maj = maj and c		*/ \
-	LOAD_DATA(w, off+4)		/* 2: W				*/ \
-	or		rT2,rT1,rT2;	/* 1: maj = maj or maj'		*/ \
-	rotrwi		rT0,d,6;	/* 2: S1 = e rotr 6		*/ \
-	add		rT3,rT3,rT2;	/* 1: temp2 = S0 + maj		*/ \
-	rotrwi		rT1,d,11;	/* 2: S1' = e rotr 11		*/ \
-	add		h,h,rT3;	/* 1: h = temp1 + temp2		*/ \
-	rotrwi		rT2,d,25;	/* 2: S1" = e rotr 25		*/ \
-	xor		rT0,rT0,rT1;	/* 2: S1 = S1 xor S1'		*/ \
-	and		rT3,d,e;	/* 2: ch = e and f		*/ \
-	xor		rT0,rT0,rT2;	/* 2: S1 = S1 xor S1"		*/ \
-	andc		rT1,f,d;	/* 2: ch' = ~e and g		*/ \
-	lwz		rT2,off+4(rKP);	/* 2: K				*/ \
-	xor		rT3,rT3,rT1;	/* 2: ch = ch xor ch'		*/ \
-	add		g,g,rT0;	/* 2: temp1 = h + S1		*/ \
-	add		rT3,rT3,w;	/* 2: temp1' = ch + w		*/ \
-	rotrwi		rT0,h,2;	/* 2: S0 = a rotr 2		*/ \
-	add		g,g,rT3;	/* 2: temp1 = temp1 + temp1'	*/ \
-	rotrwi		rT1,h,13;	/* 2: S0' = a rotr 13		*/ \
-	add		g,g,rT2;	/* 2: temp1 = temp1 + K		*/ \
-	rotrwi		rT3,h,22;	/* 2: S0" = a rotr 22		*/ \
-	xor		rT0,rT0,rT1;	/* 2: S0 = S0 xor S0'		*/ \
-	or		rT2,h,a;	/* 2: maj = a or b		*/ \
-	xor		rT3,rT0,rT3;	/* 2: S0 = S0 xor S0"		*/ \
-	and		rT1,h,a;	/* 2: maj' = a and b		*/ \
-	and		rT2,rT2,b;	/* 2: maj = maj and c		*/ \
-	add		c,c,g;		/* 2: d = d + temp1		*/ \
-	or		rT2,rT1,rT2;	/* 2: maj = maj or maj'		*/ \
-	add		rT3,rT3,rT2;	/* 2: temp2 = S0 + maj		*/ \
-	add		g,g,rT3		/* 2: h = temp1 + temp2		*/
-
-#define R_CALC_W(a, b, c, d, e, f, g, h, w0, w1, w4, w5, w7, k, off) \
-	rotrwi		rT2,e,6;	/* 1: S1 = e rotr 6		*/ \
-	evmergelohi	rT0,w0,w1;	/*    w[-15]			*/ \
-	rotrwi		rT3,e,11;	/* 1: S1' = e rotr 11		*/ \
-	evsrwiu		rT1,rT0,3;	/*    s0 = w[-15] >> 3		*/ \
-	xor		rT2,rT2,rT3;	/* 1: S1 = S1 xor S1'		*/ \
-	evrlwi		rT0,rT0,25;	/*    s0' = w[-15] rotr	7	*/ \
-	rotrwi		rT3,e,25;	/* 1: S1' = e rotr 25		*/ \
-	evxor		rT1,rT1,rT0;	/*    s0 = s0 xor s0'		*/ \
-	xor		rT2,rT2,rT3;	/* 1: S1 = S1 xor S1'		*/ \
-	evrlwi		rT0,rT0,21;	/*    s0' = w[-15] rotr 18	*/ \
-	add		h,h,rT2;	/* 1: temp1 = h + S1		*/ \
-	evxor		rT0,rT0,rT1;	/*    s0 = s0 xor s0'		*/ \
-	and		rT2,e,f;	/* 1: ch = e and f		*/ \
-	evaddw		w0,w0,rT0;	/*    w = w[-16] + s0		*/ \
-	andc		rT3,g,e;	/* 1: ch' = ~e and g		*/ \
-	evsrwiu		rT0,w7,10;	/*    s1 = w[-2] >> 10		*/ \
-	xor		rT2,rT2,rT3;	/* 1: ch = ch xor ch'		*/ \
-	evrlwi		rT1,w7,15;	/*    s1' = w[-2] rotr 17	*/ \
-	add		h,h,rT2;	/* 1: temp1 = temp1 + ch	*/ \
-	evxor		rT0,rT0,rT1;	/*    s1 = s1 xor s1'		*/ \
-	rotrwi		rT2,a,2;	/* 1: S0 = a rotr 2		*/ \
-	evrlwi		rT1,w7,13;	/*    s1' = w[-2] rotr 19	*/ \
-	rotrwi		rT3,a,13;	/* 1: S0' = a rotr 13		*/ \
-	evxor		rT0,rT0,rT1;	/*    s1 = s1 xor s1'		*/ \
-	xor		rT2,rT2,rT3;	/* 1: S0 = S0 xor S0'		*/ \
-	evldw		rT1,off(rKP);	/*    k				*/ \
-	rotrwi		rT3,a,22;	/* 1: S0' = a rotr 22		*/ \
-	evaddw		w0,w0,rT0;	/*    w = w + s1		*/ \
-	xor		rT2,rT2,rT3;	/* 1: S0 = S0 xor S0'		*/ \
-	evmergelohi	rT0,w4,w5;	/*    w[-7]			*/ \
-	and		rT3,a,b;	/* 1: maj = a and b		*/ \
-	evaddw		w0,w0,rT0;	/*    w = w + w[-7]		*/ \
-	CMP_K##k##_LOOP							   \
-	add		rT2,rT2,rT3;	/* 1: temp2 = S0 + maj		*/ \
-	evaddw		rT1,rT1,w0;	/*    wk = w + k		*/ \
-	xor		rT3,a,b;	/* 1: maj = a xor b		*/ \
-	evmergehi	rT0,rT1,rT1;	/*    wk1/wk2			*/ \
-	and		rT3,rT3,c;	/* 1: maj = maj and c		*/ \
-	add		h,h,rT0;	/* 1: temp1 = temp1 + wk	*/ \
-	add		rT2,rT2,rT3;	/* 1: temp2 = temp2 + maj	*/ \
-	add		g,g,rT1;	/* 2: temp1 = temp1 + wk	*/ \
-	add		d,d,h;		/* 1: d = d + temp1		*/ \
-	rotrwi		rT0,d,6;	/* 2: S1 = e rotr 6		*/ \
-	add		h,h,rT2;	/* 1: h = temp1 + temp2		*/ \
-	rotrwi		rT1,d,11;	/* 2: S1' = e rotr 11		*/ \
-	rotrwi		rT2,d,25;	/* 2: S" = e rotr 25		*/ \
-	xor		rT0,rT0,rT1;	/* 2: S1 = S1 xor S1'		*/ \
-	and		rT3,d,e;	/* 2: ch = e and f		*/ \
-	xor		rT0,rT0,rT2;	/* 2: S1 = S1 xor S1"		*/ \
-	andc		rT1,f,d;	/* 2: ch' = ~e and g		*/ \
-	add		g,g,rT0;	/* 2: temp1 = h + S1		*/ \
-	xor		rT3,rT3,rT1;	/* 2: ch = ch xor ch'		*/ \
-	rotrwi		rT0,h,2;	/* 2: S0 = a rotr 2		*/ \
-	add		g,g,rT3;	/* 2: temp1 = temp1 + ch	*/ \
-	rotrwi		rT1,h,13;	/* 2: S0' = a rotr 13		*/ \
-	rotrwi		rT3,h,22;	/* 2: S0" = a rotr 22		*/ \
-	xor		rT0,rT0,rT1;	/* 2: S0 = S0 xor S0'		*/ \
-	or		rT2,h,a;	/* 2: maj = a or b		*/ \
-	and		rT1,h,a;	/* 2: maj' = a and b		*/ \
-	and		rT2,rT2,b;	/* 2: maj = maj and c		*/ \
-	xor		rT3,rT0,rT3;	/* 2: S0 = S0 xor S0"		*/ \
-	or		rT2,rT1,rT2;	/* 2: maj = maj or maj'		*/ \
-	add		c,c,g;		/* 2: d = d + temp1		*/ \
-	add		rT3,rT3,rT2;	/* 2: temp2 = S0 + maj		*/ \
-	add		g,g,rT3		/* 2: h = temp1 + temp2		*/
-
-_GLOBAL(ppc_spe_sha256_transform)
-	INITIALIZE
-
-	mtctr		r5
-	lwz		rH0,0(rHP)
-	lwz		rH1,4(rHP)
-	lwz		rH2,8(rHP)
-	lwz		rH3,12(rHP)
-	lwz		rH4,16(rHP)
-	lwz		rH5,20(rHP)
-	lwz		rH6,24(rHP)
-	lwz		rH7,28(rHP)
-
-ppc_spe_sha256_main:
-	lis		rKP,PPC_SPE_SHA256_K@ha
-	addi		rKP,rKP,PPC_SPE_SHA256_K@l
-
-	R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW0, 0)
-	R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW1, 8)
-	R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW2, 16)
-	R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW3, 24)
-	R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW4, 32)
-	R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW5, 40)
-	R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW6, 48)
-	R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW7, 56)
-ppc_spe_sha256_16_rounds:
-	addi		rKP,rKP,64
-	R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
-		 rW0, rW1, rW4, rW5, rW7, N, 0)
-	R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
-		 rW1, rW2, rW5, rW6, rW0, N, 8)
-	R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
-		 rW2, rW3, rW6, rW7, rW1, N, 16)
-	R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
-		 rW3, rW4, rW7, rW0, rW2, N, 24)
-	R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
-		 rW4, rW5, rW0, rW1, rW3, N, 32)
-	R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
-		 rW5, rW6, rW1, rW2, rW4, N, 40)
-	R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
-		 rW6, rW7, rW2, rW3, rW5, N, 48)
-	R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
-		 rW7, rW0, rW3, rW4, rW6, C, 56)
-	bt		gt,ppc_spe_sha256_16_rounds
-
-	lwz		rW0,0(rHP)
-	NEXT_BLOCK
-	lwz		rW1,4(rHP)
-	lwz		rW2,8(rHP)
-	lwz		rW3,12(rHP)
-	lwz		rW4,16(rHP)
-	lwz		rW5,20(rHP)
-	lwz		rW6,24(rHP)
-	lwz		rW7,28(rHP)
-
-	add		rH0,rH0,rW0
-	stw		rH0,0(rHP)
-	add		rH1,rH1,rW1
-	stw		rH1,4(rHP)
-	add		rH2,rH2,rW2
-	stw		rH2,8(rHP)
-	add		rH3,rH3,rW3
-	stw		rH3,12(rHP)
-	add		rH4,rH4,rW4
-	stw		rH4,16(rHP)
-	add		rH5,rH5,rW5
-	stw		rH5,20(rHP)
-	add		rH6,rH6,rW6
-	stw		rH6,24(rHP)
-	add		rH7,rH7,rW7
-	stw		rH7,28(rHP)
-
-	bdnz		ppc_spe_sha256_main
-
-	FINALIZE
-	blr
-
-.data
-.align 5
-PPC_SPE_SHA256_K:
-	.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
-	.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
-	.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
-	.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
-	.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
-	.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
-	.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
-	.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
-	.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
-	.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
-	.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
-	.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
-	.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
-	.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
-	.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
-	.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/arch/powerpc/kernel/cpu_setup_44x.S b/arch/powerpc/kernel/cpu_setup_44x.S
deleted file mode 100644
index e1d705ea2cf556e3220a71890909084e55d2442a..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/cpu_setup_44x.S
+++ /dev/null
@@ -1,69 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains low level CPU setup functions.
- * Valentine Barshak <vbarshak@ru.mvista.com>
- * MontaVista Software, Inc (c) 2007
- *
- * Based on cpu_setup_6xx code by
- * Benjamin Herrenschmidt <benh@kernel.crashing.org>
- */
-
-#include <asm/processor.h>
-#include <asm/cputable.h>
-#include <asm/ppc_asm.h>
-
-_GLOBAL(__setup_cpu_440ep)
-	b	__init_fpu_44x
-_GLOBAL(__setup_cpu_440epx)
-	mflr	r4
-	bl	__init_fpu_44x
-	bl	__plb_disable_wrp
-	bl	__fixup_440A_mcheck
-	mtlr	r4
-	blr
-_GLOBAL(__setup_cpu_440grx)
-	mflr	r4
-	bl	__plb_disable_wrp
-	bl	__fixup_440A_mcheck
-	mtlr	r4
-	blr
-_GLOBAL(__setup_cpu_460ex)
-_GLOBAL(__setup_cpu_460gt)
-_GLOBAL(__setup_cpu_460sx)
-_GLOBAL(__setup_cpu_apm821xx)
-	mflr	r4
-	bl	__init_fpu_44x
-	bl	__fixup_440A_mcheck
-	mtlr	r4
-	blr
-
-_GLOBAL(__setup_cpu_440x5)
-_GLOBAL(__setup_cpu_440gx)
-_GLOBAL(__setup_cpu_440spe)
-	b	__fixup_440A_mcheck
-
-/* enable APU between CPU and FPU */
-_GLOBAL(__init_fpu_44x)
-	mfspr	r3,SPRN_CCR0
-	/* Clear DAPUIB flag in CCR0 */
-	rlwinm	r3,r3,0,12,10
-	mtspr	SPRN_CCR0,r3
-	isync
-	blr
-
-/*
- * Workaround for the incorrect write to DDR SDRAM errata.
- * The write address can be corrupted during writes to
- * DDR SDRAM when write pipelining is enabled on PLB0.
- * Disable write pipelining here.
- */
-#define DCRN_PLB4A0_ACR	0x81
-
-_GLOBAL(__plb_disable_wrp)
-	mfdcr	r3,DCRN_PLB4A0_ACR
-	/* clear WRP bit in PLB4A0_ACR */
-	rlwinm	r3,r3,0,8,6
-	mtdcr	DCRN_PLB4A0_ACR,r3
-	isync
-	blr
-
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
deleted file mode 100644
index f6517f67265aefd2d6baf1851b9fdf1e327d9c40..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ /dev/null
@@ -1,486 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains low level CPU setup functions.
- *    Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
- */
-
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cache.h>
-#include <asm/mmu.h>
-#include <asm/feature-fixups.h>
-
-_GLOBAL(__setup_cpu_603)
-	mflr	r5
-BEGIN_MMU_FTR_SECTION
-	li	r10,0
-	mtspr	SPRN_SPRG_603_LRU,r10		/* init SW LRU tracking */
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
-
-BEGIN_FTR_SECTION
-	bl	__init_fpu_registers
-END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE)
-	bl	setup_common_caches
-	mtlr	r5
-	blr
-_GLOBAL(__setup_cpu_604)
-	mflr	r5
-	bl	setup_common_caches
-	bl	setup_604_hid0
-	mtlr	r5
-	blr
-_GLOBAL(__setup_cpu_750)
-	mflr	r5
-	bl	__init_fpu_registers
-	bl	setup_common_caches
-	bl	setup_750_7400_hid0
-	mtlr	r5
-	blr
-_GLOBAL(__setup_cpu_750cx)
-	mflr	r5
-	bl	__init_fpu_registers
-	bl	setup_common_caches
-	bl	setup_750_7400_hid0
-	bl	setup_750cx
-	mtlr	r5
-	blr
-_GLOBAL(__setup_cpu_750fx)
-	mflr	r5
-	bl	__init_fpu_registers
-	bl	setup_common_caches
-	bl	setup_750_7400_hid0
-	bl	setup_750fx
-	mtlr	r5
-	blr
-_GLOBAL(__setup_cpu_7400)
-	mflr	r5
-	bl	__init_fpu_registers
-	bl	setup_7400_workarounds
-	bl	setup_common_caches
-	bl	setup_750_7400_hid0
-	mtlr	r5
-	blr
-_GLOBAL(__setup_cpu_7410)
-	mflr	r5
-	bl	__init_fpu_registers
-	bl	setup_7410_workarounds
-	bl	setup_common_caches
-	bl	setup_750_7400_hid0
-	li	r3,0
-	mtspr	SPRN_L2CR2,r3
-	mtlr	r5
-	blr
-_GLOBAL(__setup_cpu_745x)
-	mflr	r5
-	bl	setup_common_caches
-	bl	setup_745x_specifics
-	mtlr	r5
-	blr
-
-/* Enable caches for 603's, 604, 750 & 7400 */
-setup_common_caches:
-	mfspr	r11,SPRN_HID0
-	andi.	r0,r11,HID0_DCE
-	ori	r11,r11,HID0_ICE|HID0_DCE
-	ori	r8,r11,HID0_ICFI
-	bne	1f			/* don't invalidate the D-cache */
-	ori	r8,r8,HID0_DCI		/* unless it wasn't enabled */
-1:	sync
-	mtspr	SPRN_HID0,r8		/* enable and invalidate caches */
-	sync
-	mtspr	SPRN_HID0,r11		/* enable caches */
-	sync
-	isync
-	blr
-
-/* 604, 604e, 604ev, ...
- * Enable superscalar execution & branch history table
- */
-setup_604_hid0:
-	mfspr	r11,SPRN_HID0
-	ori	r11,r11,HID0_SIED|HID0_BHTE
-	ori	r8,r11,HID0_BTCD
-	sync
-	mtspr	SPRN_HID0,r8	/* flush branch target address cache */
-	sync			/* on 604e/604r */
-	mtspr	SPRN_HID0,r11
-	sync
-	isync
-	blr
-
-/* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some
- * erratas we work around here.
- * Moto MPC710CE.pdf describes them, those are errata
- * #3, #4 and #5
- * Note that we assume the firmware didn't choose to
- * apply other workarounds (there are other ones documented
- * in the .pdf). It appear that Apple firmware only works
- * around #3 and with the same fix we use. We may want to
- * check if the CPU is using 60x bus mode in which case
- * the workaround for errata #4 is useless. Also, we may
- * want to explicitly clear HID0_NOPDST as this is not
- * needed once we have applied workaround #5 (though it's
- * not set by Apple's firmware at least).
- */
-setup_7400_workarounds:
-	mfpvr	r3
-	rlwinm	r3,r3,0,20,31
-	cmpwi	0,r3,0x0207
-	ble	1f
-	blr
-setup_7410_workarounds:
-	mfpvr	r3
-	rlwinm	r3,r3,0,20,31
-	cmpwi	0,r3,0x0100
-	bnelr
-1:
-	mfspr	r11,SPRN_MSSSR0
-	/* Errata #3: Set L1OPQ_SIZE to 0x10 */
-	rlwinm	r11,r11,0,9,6
-	oris	r11,r11,0x0100
-	/* Errata #4: Set L2MQ_SIZE to 1 (check for MPX mode first ?) */
-	oris	r11,r11,0x0002
-	/* Errata #5: Set DRLT_SIZE to 0x01 */
-	rlwinm	r11,r11,0,5,2
-	oris	r11,r11,0x0800
-	sync
-	mtspr	SPRN_MSSSR0,r11
-	sync
-	isync
-	blr
-
-/* 740/750/7400/7410
- * Enable Store Gathering (SGE), Address Broadcast (ABE),
- * Branch History Table (BHTE), Branch Target ICache (BTIC)
- * Dynamic Power Management (DPM), Speculative (SPD)
- * Clear Instruction cache throttling (ICTC)
- */
-setup_750_7400_hid0:
-	mfspr	r11,SPRN_HID0
-	ori	r11,r11,HID0_SGE | HID0_ABE | HID0_BHTE | HID0_BTIC
-	oris	r11,r11,HID0_DPM@h
-BEGIN_FTR_SECTION
-	xori	r11,r11,HID0_BTIC
-END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC)
-BEGIN_FTR_SECTION
-	xoris	r11,r11,HID0_DPM@h	/* disable dynamic power mgmt */
-END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM)
-	li	r3,HID0_SPD
-	andc	r11,r11,r3		/* clear SPD: enable speculative */
- 	li	r3,0
- 	mtspr	SPRN_ICTC,r3		/* Instruction Cache Throttling off */
-	isync
-	mtspr	SPRN_HID0,r11
-	sync
-	isync
-	blr
-
-/* 750cx specific
- * Looks like we have to disable NAP feature for some PLL settings...
- * (waiting for confirmation)
- */
-setup_750cx:
-	mfspr	r10, SPRN_HID1
-	rlwinm	r10,r10,4,28,31
-	cmpwi	cr0,r10,7
-	cmpwi	cr1,r10,9
-	cmpwi	cr2,r10,11
-	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
-	cror	4*cr0+eq,4*cr0+eq,4*cr2+eq
-	bnelr
-	lwz	r6,CPU_SPEC_FEATURES(r4)
-	li	r7,CPU_FTR_CAN_NAP
-	andc	r6,r6,r7
-	stw	r6,CPU_SPEC_FEATURES(r4)
-	blr
-
-/* 750fx specific
- */
-setup_750fx:
-	blr
-
-/* MPC 745x
- * Enable Store Gathering (SGE), Branch Folding (FOLD)
- * Branch History Table (BHTE), Branch Target ICache (BTIC)
- * Dynamic Power Management (DPM), Speculative (SPD)
- * Ensure our data cache instructions really operate.
- * Timebase has to be running or we wouldn't have made it here,
- * just ensure we don't disable it.
- * Clear Instruction cache throttling (ICTC)
- * Enable L2 HW prefetch
- */
-setup_745x_specifics:
-	/* We check for the presence of an L3 cache setup by
-	 * the firmware. If any, we disable NAP capability as
-	 * it's known to be bogus on rev 2.1 and earlier
-	 */
-BEGIN_FTR_SECTION
-	mfspr	r11,SPRN_L3CR
-	andis.	r11,r11,L3CR_L3E@h
-	beq	1f
-END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
-	lwz	r6,CPU_SPEC_FEATURES(r4)
-	andis.	r0,r6,CPU_FTR_L3_DISABLE_NAP@h
-	beq	1f
-	li	r7,CPU_FTR_CAN_NAP
-	andc	r6,r6,r7
-	stw	r6,CPU_SPEC_FEATURES(r4)
-1:
-	mfspr	r11,SPRN_HID0
-
-	/* All of the bits we have to set.....
-	 */
-	ori	r11,r11,HID0_SGE | HID0_FOLD | HID0_BHTE
-	ori	r11,r11,HID0_LRSTK | HID0_BTIC
-	oris	r11,r11,HID0_DPM@h
-BEGIN_MMU_FTR_SECTION
-	oris	r11,r11,HID0_HIGH_BAT@h
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
-BEGIN_FTR_SECTION
-	xori	r11,r11,HID0_BTIC
-END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC)
-BEGIN_FTR_SECTION
-	xoris	r11,r11,HID0_DPM@h	/* disable dynamic power mgmt */
-END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM)
-
-	/* All of the bits we have to clear....
-	 */
-	li	r3,HID0_SPD | HID0_NOPDST | HID0_NOPTI
-	andc	r11,r11,r3		/* clear SPD: enable speculative */
- 	li	r3,0
-
- 	mtspr	SPRN_ICTC,r3		/* Instruction Cache Throttling off */
-	isync
-	mtspr	SPRN_HID0,r11
-	sync
-	isync
-
-	/* Enable L2 HW prefetch, if L2 is enabled
-	 */
-	mfspr	r3,SPRN_L2CR
-	andis.	r3,r3,L2CR_L2E@h
-	beqlr
-	mfspr	r3,SPRN_MSSCR0
-	ori	r3,r3,3
-	sync
-	mtspr	SPRN_MSSCR0,r3
-	sync
-	isync
-	blr
-
-/*
- * Initialize the FPU registers. This is needed to work around an errata
- * in some 750 cpus where using a not yet initialized FPU register after
- * power on reset may hang the CPU
- */
-_GLOBAL(__init_fpu_registers)
-	mfmsr	r10
-	ori	r11,r10,MSR_FP
-	mtmsr	r11
-	isync
-	addis	r9,r3,empty_zero_page@ha
-	addi	r9,r9,empty_zero_page@l
-	REST_32FPRS(0,r9)
-	sync
-	mtmsr	r10
-	isync
-	blr
-
-
-/* Definitions for the table use to save CPU states */
-#define CS_HID0		0
-#define CS_HID1		4
-#define CS_HID2		8
-#define	CS_MSSCR0	12
-#define CS_MSSSR0	16
-#define CS_ICTRL	20
-#define CS_LDSTCR	24
-#define CS_LDSTDB	28
-#define CS_SIZE		32
-
-	.data
-	.balign	L1_CACHE_BYTES
-cpu_state_storage:
-	.space	CS_SIZE
-	.balign	L1_CACHE_BYTES,0
-	.text
-
-/* Called in normal context to backup CPU 0 state. This
- * does not include cache settings. This function is also
- * called for machine sleep. This does not include the MMU
- * setup, BATs, etc... but rather the "special" registers
- * like HID0, HID1, MSSCR0, etc...
- */
-_GLOBAL(__save_cpu_setup)
-	/* Some CR fields are volatile, we back it up all */
-	mfcr	r7
-
-	/* Get storage ptr */
-	lis	r5,cpu_state_storage@h
-	ori	r5,r5,cpu_state_storage@l
-
-	/* Save HID0 (common to all CONFIG_PPC_BOOK3S_32 cpus) */
-	mfspr	r3,SPRN_HID0
-	stw	r3,CS_HID0(r5)
-
-	/* Now deal with CPU type dependent registers */
-	mfspr	r3,SPRN_PVR
-	srwi	r3,r3,16
-	cmplwi	cr0,r3,0x8000	/* 7450 */
-	cmplwi	cr1,r3,0x000c	/* 7400 */
-	cmplwi	cr2,r3,0x800c	/* 7410 */
-	cmplwi	cr3,r3,0x8001	/* 7455 */
-	cmplwi	cr4,r3,0x8002	/* 7457 */
-	cmplwi	cr5,r3,0x8003	/* 7447A */
-	cmplwi	cr6,r3,0x7000	/* 750FX */
-	cmplwi	cr7,r3,0x8004	/* 7448 */
-	/* cr1 is 7400 || 7410 */
-	cror	4*cr1+eq,4*cr1+eq,4*cr2+eq
-	/* cr0 is 74xx */
-	cror	4*cr0+eq,4*cr0+eq,4*cr3+eq
-	cror	4*cr0+eq,4*cr0+eq,4*cr4+eq
-	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
-	cror	4*cr0+eq,4*cr0+eq,4*cr5+eq
-	cror	4*cr0+eq,4*cr0+eq,4*cr7+eq
-	bne	1f
-	/* Backup 74xx specific regs */
-	mfspr	r4,SPRN_MSSCR0
-	stw	r4,CS_MSSCR0(r5)
-	mfspr	r4,SPRN_MSSSR0
-	stw	r4,CS_MSSSR0(r5)
-	beq	cr1,1f
-	/* Backup 745x specific registers */
-	mfspr	r4,SPRN_HID1
-	stw	r4,CS_HID1(r5)
-	mfspr	r4,SPRN_ICTRL
-	stw	r4,CS_ICTRL(r5)
-	mfspr	r4,SPRN_LDSTCR
-	stw	r4,CS_LDSTCR(r5)
-	mfspr	r4,SPRN_LDSTDB
-	stw	r4,CS_LDSTDB(r5)
-1:
-	bne	cr6,1f
-	/* Backup 750FX specific registers */
-	mfspr	r4,SPRN_HID1
-	stw	r4,CS_HID1(r5)
-	/* If rev 2.x, backup HID2 */
-	mfspr	r3,SPRN_PVR
-	andi.	r3,r3,0xff00
-	cmpwi	cr0,r3,0x0200
-	bne	1f
-	mfspr	r4,SPRN_HID2
-	stw	r4,CS_HID2(r5)
-1:
-	mtcr	r7
-	blr
-
-/* Called with no MMU context (typically MSR:IR/DR off) to
- * restore CPU state as backed up by the previous
- * function. This does not include cache setting
- */
-_GLOBAL(__restore_cpu_setup)
-	/* Some CR fields are volatile, we back it up all */
-	mfcr	r7
-
-	/* Get storage ptr */
-	lis	r5,(cpu_state_storage-KERNELBASE)@h
-	ori	r5,r5,cpu_state_storage@l
-
-	/* Restore HID0 */
-	lwz	r3,CS_HID0(r5)
-	sync
-	isync
-	mtspr	SPRN_HID0,r3
-	sync
-	isync
-
-	/* Now deal with CPU type dependent registers */
-	mfspr	r3,SPRN_PVR
-	srwi	r3,r3,16
-	cmplwi	cr0,r3,0x8000	/* 7450 */
-	cmplwi	cr1,r3,0x000c	/* 7400 */
-	cmplwi	cr2,r3,0x800c	/* 7410 */
-	cmplwi	cr3,r3,0x8001	/* 7455 */
-	cmplwi	cr4,r3,0x8002	/* 7457 */
-	cmplwi	cr5,r3,0x8003	/* 7447A */
-	cmplwi	cr6,r3,0x7000	/* 750FX */
-	cmplwi	cr7,r3,0x8004	/* 7448 */
-	/* cr1 is 7400 || 7410 */
-	cror	4*cr1+eq,4*cr1+eq,4*cr2+eq
-	/* cr0 is 74xx */
-	cror	4*cr0+eq,4*cr0+eq,4*cr3+eq
-	cror	4*cr0+eq,4*cr0+eq,4*cr4+eq
-	cror	4*cr0+eq,4*cr0+eq,4*cr1+eq
-	cror	4*cr0+eq,4*cr0+eq,4*cr5+eq
-	cror	4*cr0+eq,4*cr0+eq,4*cr7+eq
-	bne	2f
-	/* Restore 74xx specific regs */
-	lwz	r4,CS_MSSCR0(r5)
-	sync
-	mtspr	SPRN_MSSCR0,r4
-	sync
-	isync
-	lwz	r4,CS_MSSSR0(r5)
-	sync
-	mtspr	SPRN_MSSSR0,r4
-	sync
-	isync
-	bne	cr2,1f
-	/* Clear 7410 L2CR2 */
-	li	r4,0
-	mtspr	SPRN_L2CR2,r4
-1:	beq	cr1,2f
-	/* Restore 745x specific registers */
-	lwz	r4,CS_HID1(r5)
-	sync
-	mtspr	SPRN_HID1,r4
-	isync
-	sync
-	lwz	r4,CS_ICTRL(r5)
-	sync
-	mtspr	SPRN_ICTRL,r4
-	isync
-	sync
-	lwz	r4,CS_LDSTCR(r5)
-	sync
-	mtspr	SPRN_LDSTCR,r4
-	isync
-	sync
-	lwz	r4,CS_LDSTDB(r5)
-	sync
-	mtspr	SPRN_LDSTDB,r4
-	isync
-	sync
-2:	bne	cr6,1f
-	/* Restore 750FX specific registers
-	 * that is restore HID2 on rev 2.x and PLL config & switch
-	 * to PLL 0 on all
-	 */
-	/* If rev 2.x, restore HID2 with low voltage bit cleared */
-	mfspr	r3,SPRN_PVR
-	andi.	r3,r3,0xff00
-	cmpwi	cr0,r3,0x0200
-	bne	4f
-	lwz	r4,CS_HID2(r5)
-	rlwinm	r4,r4,0,19,17
-	mtspr	SPRN_HID2,r4
-	sync
-4:
-	lwz	r4,CS_HID1(r5)
-	rlwinm  r5,r4,0,16,14
-	mtspr	SPRN_HID1,r5
-		/* Wait for PLL to stabilize */
-	mftbl	r5
-3:	mftbl	r6
-	sub	r6,r6,r5
-	cmplwi	cr0,r6,10000
-	ble	3b
-	/* Setup final PLL */
-	mtspr	SPRN_HID1,r4
-1:
-	mtcr	r7
-	blr
-
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
deleted file mode 100644
index 2b4f3ec0acf7d988b17a2e80d932dabcf43bfd99..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ /dev/null
@@ -1,342 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains low level CPU setup functions.
- * Kumar Gala <galak@kernel.crashing.org>
- * Copyright 2009 Freescale Semiconductor, Inc.
- *
- * Based on cpu_setup_6xx code by
- * Benjamin Herrenschmidt <benh@kernel.crashing.org>
- */
-
-#include <asm/page.h>
-#include <asm/processor.h>
-#include <asm/cputable.h>
-#include <asm/ppc_asm.h>
-#include <asm/nohash/mmu-book3e.h>
-#include <asm/asm-offsets.h>
-#include <asm/mpc85xx.h>
-
-_GLOBAL(__e500_icache_setup)
-	mfspr	r0, SPRN_L1CSR1
-	andi.	r3, r0, L1CSR1_ICE
-	bnelr				/* Already enabled */
-	oris	r0, r0, L1CSR1_CPE@h
-	ori	r0, r0, (L1CSR1_ICFI | L1CSR1_ICLFR |  L1CSR1_ICE)
-	mtspr	SPRN_L1CSR1, r0		/* Enable I-Cache */
-	isync
-	blr
-
-_GLOBAL(__e500_dcache_setup)
-	mfspr	r0, SPRN_L1CSR0
-	andi.	r3, r0, L1CSR0_DCE
-	bnelr				/* Already enabled */
-	msync
-	isync
-	li	r0, 0
-	mtspr	SPRN_L1CSR0, r0		/* Disable */
-	msync
-	isync
-	li	r0, (L1CSR0_DCFI | L1CSR0_CLFC)
-	mtspr	SPRN_L1CSR0, r0		/* Invalidate */
-	isync
-1:	mfspr	r0, SPRN_L1CSR0
-	andi.	r3, r0, L1CSR0_CLFC
-	bne+	1b			/* Wait for lock bits reset */
-	oris	r0, r0, L1CSR0_CPE@h
-	ori	r0, r0, L1CSR0_DCE
-	msync
-	isync
-	mtspr	SPRN_L1CSR0, r0		/* Enable */
-	isync
-	blr
-
-/*
- * FIXME - we haven't yet done testing to determine a reasonable default
- * value for PW20_WAIT_IDLE_BIT.
- */
-#define PW20_WAIT_IDLE_BIT		50 /* 1ms, TB frequency is 41.66MHZ */
-_GLOBAL(setup_pw20_idle)
-	mfspr	r3, SPRN_PWRMGTCR0
-
-	/* Set PW20_WAIT bit, enable pw20 state*/
-	ori	r3, r3, PWRMGTCR0_PW20_WAIT
-	li	r11, PW20_WAIT_IDLE_BIT
-
-	/* Set Automatic PW20 Core Idle Count */
-	rlwimi	r3, r11, PWRMGTCR0_PW20_ENT_SHIFT, PWRMGTCR0_PW20_ENT
-
-	mtspr	SPRN_PWRMGTCR0, r3
-
-	blr
-
-/*
- * FIXME - we haven't yet done testing to determine a reasonable default
- * value for AV_WAIT_IDLE_BIT.
- */
-#define AV_WAIT_IDLE_BIT		50 /* 1ms, TB frequency is 41.66MHZ */
-_GLOBAL(setup_altivec_idle)
-	mfspr	r3, SPRN_PWRMGTCR0
-
-	/* Enable Altivec Idle */
-	oris	r3, r3, PWRMGTCR0_AV_IDLE_PD_EN@h
-	li	r11, AV_WAIT_IDLE_BIT
-
-	/* Set Automatic AltiVec Idle Count */
-	rlwimi	r3, r11, PWRMGTCR0_AV_IDLE_CNT_SHIFT, PWRMGTCR0_AV_IDLE_CNT
-
-	mtspr	SPRN_PWRMGTCR0, r3
-
-	blr
-
-#ifdef CONFIG_PPC_E500MC
-_GLOBAL(__setup_cpu_e6500)
-	mflr	r6
-#ifdef CONFIG_PPC64
-	bl	setup_altivec_ivors
-	/* Touch IVOR42 only if the CPU supports E.HV category */
-	mfspr	r10,SPRN_MMUCFG
-	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
-	beq	1f
-	bl	setup_lrat_ivor
-1:
-#endif
-	bl	setup_pw20_idle
-	bl	setup_altivec_idle
-	bl	__setup_cpu_e5500
-	mtlr	r6
-	blr
-#endif /* CONFIG_PPC_E500MC */
-
-#ifdef CONFIG_PPC32
-#ifdef CONFIG_E200
-_GLOBAL(__setup_cpu_e200)
-	/* enable dedicated debug exception handling resources (Debug APU) */
-	mfspr	r3,SPRN_HID0
-	ori	r3,r3,HID0_DAPUEN@l
-	mtspr	SPRN_HID0,r3
-	b	__setup_e200_ivors
-#endif /* CONFIG_E200 */
-
-#ifdef CONFIG_E500
-#ifndef CONFIG_PPC_E500MC
-_GLOBAL(__setup_cpu_e500v1)
-_GLOBAL(__setup_cpu_e500v2)
-	mflr	r4
-	bl	__e500_icache_setup
-	bl	__e500_dcache_setup
-	bl	__setup_e500_ivors
-#if defined(CONFIG_FSL_RIO) || defined(CONFIG_FSL_PCI)
-	/* Ensure that RFXE is set */
-	mfspr	r3,SPRN_HID1
-	oris	r3,r3,HID1_RFXE@h
-	mtspr	SPRN_HID1,r3
-#endif
-	mtlr	r4
-	blr
-#else /* CONFIG_PPC_E500MC */
-_GLOBAL(__setup_cpu_e500mc)
-_GLOBAL(__setup_cpu_e5500)
-	mflr	r5
-	bl	__e500_icache_setup
-	bl	__e500_dcache_setup
-	bl	__setup_e500mc_ivors
-	/*
-	 * We only want to touch IVOR38-41 if we're running on hardware
-	 * that supports category E.HV.  The architectural way to determine
-	 * this is MMUCFG[LPIDSIZE].
-	 */
-	mfspr	r3, SPRN_MMUCFG
-	rlwinm.	r3, r3, 0, MMUCFG_LPIDSIZE
-	beq	1f
-	bl	__setup_ehv_ivors
-	b	2f
-1:
-	lwz	r3, CPU_SPEC_FEATURES(r4)
-	/* We need this check as cpu_setup is also called for
-	 * the secondary cores. So, if we have already cleared
-	 * the feature on the primary core, avoid doing it on the
-	 * secondary core.
-	 */
-	andi.	r6, r3, CPU_FTR_EMB_HV
-	beq	2f
-	rlwinm	r3, r3, 0, ~CPU_FTR_EMB_HV
-	stw	r3, CPU_SPEC_FEATURES(r4)
-2:
-	mtlr	r5
-	blr
-#endif /* CONFIG_PPC_E500MC */
-#endif /* CONFIG_E500 */
-#endif /* CONFIG_PPC32 */
-
-#ifdef CONFIG_PPC_BOOK3E_64
-_GLOBAL(__restore_cpu_e6500)
-	mflr	r5
-	bl	setup_altivec_ivors
-	/* Touch IVOR42 only if the CPU supports E.HV category */
-	mfspr	r10,SPRN_MMUCFG
-	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
-	beq	1f
-	bl	setup_lrat_ivor
-1:
-	bl	setup_pw20_idle
-	bl	setup_altivec_idle
-	bl	__restore_cpu_e5500
-	mtlr	r5
-	blr
-
-_GLOBAL(__restore_cpu_e5500)
-	mflr	r4
-	bl	__e500_icache_setup
-	bl	__e500_dcache_setup
-	bl	__setup_base_ivors
-	bl	setup_perfmon_ivor
-	bl	setup_doorbell_ivors
-	/*
-	 * We only want to touch IVOR38-41 if we're running on hardware
-	 * that supports category E.HV.  The architectural way to determine
-	 * this is MMUCFG[LPIDSIZE].
-	 */
-	mfspr	r10,SPRN_MMUCFG
-	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
-	beq	1f
-	bl	setup_ehv_ivors
-1:
-	mtlr	r4
-	blr
-
-_GLOBAL(__setup_cpu_e5500)
-	mflr	r5
-	bl	__e500_icache_setup
-	bl	__e500_dcache_setup
-	bl	__setup_base_ivors
-	bl	setup_perfmon_ivor
-	bl	setup_doorbell_ivors
-	/*
-	 * We only want to touch IVOR38-41 if we're running on hardware
-	 * that supports category E.HV.  The architectural way to determine
-	 * this is MMUCFG[LPIDSIZE].
-	 */
-	mfspr	r10,SPRN_MMUCFG
-	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
-	beq	1f
-	bl	setup_ehv_ivors
-	b	2f
-1:
-	ld	r10,CPU_SPEC_FEATURES(r4)
-	LOAD_REG_IMMEDIATE(r9,CPU_FTR_EMB_HV)
-	andc	r10,r10,r9
-	std	r10,CPU_SPEC_FEATURES(r4)
-2:
-	mtlr	r5
-	blr
-#endif
-
-/* flush L1 date cache, it can apply to e500v2, e500mc and e5500 */
-_GLOBAL(flush_dcache_L1)
-	mfmsr	r10
-	wrteei	0
-
-	mfspr	r3,SPRN_L1CFG0
-	rlwinm	r5,r3,9,3	/* Extract cache block size */
-	twlgti	r5,1		/* Only 32 and 64 byte cache blocks
-				 * are currently defined.
-				 */
-	li	r4,32
-	subfic	r6,r5,2		/* r6 = log2(1KiB / cache block size) -
-				 *      log2(number of ways)
-				 */
-	slw	r5,r4,r5	/* r5 = cache block size */
-
-	rlwinm	r7,r3,0,0xff	/* Extract number of KiB in the cache */
-	mulli	r7,r7,13	/* An 8-way cache will require 13
-				 * loads per set.
-				 */
-	slw	r7,r7,r6
-
-	/* save off HID0 and set DCFA */
-	mfspr	r8,SPRN_HID0
-	ori	r9,r8,HID0_DCFA@l
-	mtspr	SPRN_HID0,r9
-	isync
-
-	LOAD_REG_IMMEDIATE(r6, KERNELBASE)
-	mr	r4, r6
-	mtctr	r7
-
-1:	lwz	r3,0(r4)	/* Load... */
-	add	r4,r4,r5
-	bdnz	1b
-
-	msync
-	mr	r4, r6
-	mtctr	r7
-
-1:	dcbf	0,r4		/* ...and flush. */
-	add	r4,r4,r5
-	bdnz	1b
-
-	/* restore HID0 */
-	mtspr	SPRN_HID0,r8
-	isync
-
-	wrtee r10
-
-	blr
-
-has_L2_cache:
-	/* skip L2 cache on P2040/P2040E as they have no L2 cache */
-	mfspr	r3, SPRN_SVR
-	/* shift right by 8 bits and clear E bit of SVR */
-	rlwinm	r4, r3, 24, ~0x800
-
-	lis	r3, SVR_P2040@h
-	ori	r3, r3, SVR_P2040@l
-	cmpw	r4, r3
-	beq	1f
-
-	li	r3, 1
-	blr
-1:
-	li	r3, 0
-	blr
-
-/* flush backside L2 cache */
-flush_backside_L2_cache:
-	mflr	r10
-	bl	has_L2_cache
-	mtlr	r10
-	cmpwi	r3, 0
-	beq	2f
-
-	/* Flush the L2 cache */
-	mfspr	r3, SPRN_L2CSR0
-	ori	r3, r3, L2CSR0_L2FL@l
-	msync
-	isync
-	mtspr	SPRN_L2CSR0,r3
-	isync
-
-	/* check if it is complete */
-1:	mfspr	r3,SPRN_L2CSR0
-	andi.	r3, r3, L2CSR0_L2FL@l
-	bne	1b
-2:
-	blr
-
-_GLOBAL(cpu_down_flush_e500v2)
-	mflr r0
-	bl	flush_dcache_L1
-	mtlr r0
-	blr
-
-_GLOBAL(cpu_down_flush_e500mc)
-_GLOBAL(cpu_down_flush_e5500)
-	mflr r0
-	bl	flush_dcache_L1
-	bl	flush_backside_L2_cache
-	mtlr r0
-	blr
-
-/* L1 Data Cache of e6500 contains no modified data, no flush is required */
-_GLOBAL(cpu_down_flush_e6500)
-	blr
diff --git a/arch/powerpc/kernel/cpu_setup_pa6t.S b/arch/powerpc/kernel/cpu_setup_pa6t.S
deleted file mode 100644
index e6bfd4490e190e8c2e6c9ec5da019910795fcca9..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/cpu_setup_pa6t.S
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2006-2007 PA Semi, Inc
- *
- * Maintained by: Olof Johansson <olof@lixom.net>
- */
-
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cache.h>
-
-/* Right now, restore and setup are the same thing */
-_GLOBAL(__restore_cpu_pa6t)
-_GLOBAL(__setup_cpu_pa6t)
-	/* Do nothing if not running in HV mode */
-	mfmsr	r0
-	rldicl.	r0,r0,4,63
-	beqlr
-
-	mfspr	r0,SPRN_HID5
-	ori	r0,r0,0x38
-	mtspr	SPRN_HID5,r0
-
-	mfspr	r0,SPRN_LPCR
-	ori	r0,r0,0x7000
-	mtspr	SPRN_LPCR,r0
-
-	blr
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
deleted file mode 100644
index f91ecb10d0ae758604578cc5d97990f9dcf14466..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ /dev/null
@@ -1,219 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains low level CPU setup functions.
- *    Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
- */
-
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cache.h>
-#include <asm/book3s/64/mmu-hash.h>
-
-/* Entry: r3 = crap, r4 = ptr to cputable entry
- *
- * Note that we can be called twice for pseudo-PVRs
- */
-_GLOBAL(__setup_cpu_power7)
-	mflr	r11
-	bl	__init_hvmode_206
-	mtlr	r11
-	beqlr
-	li	r0,0
-	mtspr	SPRN_LPID,r0
-	LOAD_REG_IMMEDIATE(r0, PCR_MASK)
-	mtspr	SPRN_PCR,r0
-	mfspr	r3,SPRN_LPCR
-	li	r4,(LPCR_LPES1 >> LPCR_LPES_SH)
-	bl	__init_LPCR_ISA206
-	mtlr	r11
-	blr
-
-_GLOBAL(__restore_cpu_power7)
-	mflr	r11
-	mfmsr	r3
-	rldicl.	r0,r3,4,63
-	beqlr
-	li	r0,0
-	mtspr	SPRN_LPID,r0
-	LOAD_REG_IMMEDIATE(r0, PCR_MASK)
-	mtspr	SPRN_PCR,r0
-	mfspr	r3,SPRN_LPCR
-	li	r4,(LPCR_LPES1 >> LPCR_LPES_SH)
-	bl	__init_LPCR_ISA206
-	mtlr	r11
-	blr
-
-_GLOBAL(__setup_cpu_power8)
-	mflr	r11
-	bl	__init_FSCR
-	bl	__init_PMU
-	bl	__init_PMU_ISA207
-	bl	__init_hvmode_206
-	mtlr	r11
-	beqlr
-	li	r0,0
-	mtspr	SPRN_LPID,r0
-	LOAD_REG_IMMEDIATE(r0, PCR_MASK)
-	mtspr	SPRN_PCR,r0
-	mfspr	r3,SPRN_LPCR
-	ori	r3, r3, LPCR_PECEDH
-	li	r4,0 /* LPES = 0 */
-	bl	__init_LPCR_ISA206
-	bl	__init_HFSCR
-	bl	__init_PMU_HV
-	bl	__init_PMU_HV_ISA207
-	mtlr	r11
-	blr
-
-_GLOBAL(__restore_cpu_power8)
-	mflr	r11
-	bl	__init_FSCR
-	bl	__init_PMU
-	bl	__init_PMU_ISA207
-	mfmsr	r3
-	rldicl.	r0,r3,4,63
-	mtlr	r11
-	beqlr
-	li	r0,0
-	mtspr	SPRN_LPID,r0
-	LOAD_REG_IMMEDIATE(r0, PCR_MASK)
-	mtspr	SPRN_PCR,r0
-	mfspr   r3,SPRN_LPCR
-	ori	r3, r3, LPCR_PECEDH
-	li	r4,0 /* LPES = 0 */
-	bl	__init_LPCR_ISA206
-	bl	__init_HFSCR
-	bl	__init_PMU_HV
-	bl	__init_PMU_HV_ISA207
-	mtlr	r11
-	blr
-
-_GLOBAL(__setup_cpu_power9)
-	mflr	r11
-	bl	__init_FSCR
-	bl	__init_PMU
-	bl	__init_hvmode_206
-	mtlr	r11
-	beqlr
-	li	r0,0
-	mtspr	SPRN_PSSCR,r0
-	mtspr	SPRN_LPID,r0
-	mtspr	SPRN_PID,r0
-	LOAD_REG_IMMEDIATE(r0, PCR_MASK)
-	mtspr	SPRN_PCR,r0
-	mfspr	r3,SPRN_LPCR
-	LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE  | LPCR_HEIC)
-	or	r3, r3, r4
-	LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR)
-	andc	r3, r3, r4
-	li	r4,0 /* LPES = 0 */
-	bl	__init_LPCR_ISA300
-	bl	__init_HFSCR
-	bl	__init_PMU_HV
-	mtlr	r11
-	blr
-
-_GLOBAL(__restore_cpu_power9)
-	mflr	r11
-	bl	__init_FSCR
-	bl	__init_PMU
-	mfmsr	r3
-	rldicl.	r0,r3,4,63
-	mtlr	r11
-	beqlr
-	li	r0,0
-	mtspr	SPRN_PSSCR,r0
-	mtspr	SPRN_LPID,r0
-	mtspr	SPRN_PID,r0
-	LOAD_REG_IMMEDIATE(r0, PCR_MASK)
-	mtspr	SPRN_PCR,r0
-	mfspr   r3,SPRN_LPCR
-	LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
-	or	r3, r3, r4
-	LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR)
-	andc	r3, r3, r4
-	li	r4,0 /* LPES = 0 */
-	bl	__init_LPCR_ISA300
-	bl	__init_HFSCR
-	bl	__init_PMU_HV
-	mtlr	r11
-	blr
-
-__init_hvmode_206:
-	/* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */
-	mfmsr	r3
-	rldicl.	r0,r3,4,63
-	bnelr
-	ld	r5,CPU_SPEC_FEATURES(r4)
-	LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST)
-	andc	r5,r5,r6
-	std	r5,CPU_SPEC_FEATURES(r4)
-	blr
-
-__init_LPCR_ISA206:
-	/* Setup a sane LPCR:
-	 *   Called with initial LPCR in R3 and desired LPES 2-bit value in R4
-	 *
-	 *   LPES = 0b01 (HSRR0/1 used for 0x500)
-	 *   PECE = 0b111
-	 *   DPFD = 4
-	 *   HDICE = 0
-	 *   VC = 0b100 (VPM0=1, VPM1=0, ISL=0)
-	 *   VRMASD = 0b10000 (L=1, LP=00)
-	 *
-	 * Other bits untouched for now
-	 */
-	li	r5,0x10
-	rldimi	r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5
-
-	/* POWER9 has no VRMASD */
-__init_LPCR_ISA300:
-	rldimi	r3,r4, LPCR_LPES_SH, 64-LPCR_LPES_SH-2
-	ori	r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2)
-	li	r5,4
-	rldimi	r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3
-	clrrdi	r3,r3,1		/* clear HDICE */
-	li	r5,4
-	rldimi	r3,r5, LPCR_VC_SH, 0
-	mtspr	SPRN_LPCR,r3
-	isync
-	blr
-
-__init_FSCR:
-	mfspr	r3,SPRN_FSCR
-	ori	r3,r3,FSCR_TAR|FSCR_EBB
-	mtspr	SPRN_FSCR,r3
-	blr
-
-__init_HFSCR:
-	mfspr	r3,SPRN_HFSCR
-	ori	r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\
-		      HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP
-	mtspr	SPRN_HFSCR,r3
-	blr
-
-__init_PMU_HV:
-	li	r5,0
-	mtspr	SPRN_MMCRC,r5
-	blr
-
-__init_PMU_HV_ISA207:
-	li	r5,0
-	mtspr	SPRN_MMCRH,r5
-	blr
-
-__init_PMU:
-	li	r5,0
-	mtspr	SPRN_MMCRA,r5
-	mtspr	SPRN_MMCR0,r5
-	mtspr	SPRN_MMCR1,r5
-	mtspr	SPRN_MMCR2,r5
-	blr
-
-__init_PMU_ISA207:
-	li	r5,0
-	mtspr	SPRN_MMCRS,r5
-	blr
diff --git a/arch/powerpc/kernel/cpu_setup_ppc970.S b/arch/powerpc/kernel/cpu_setup_ppc970.S
deleted file mode 100644
index f0c07e70f0b605eb33b19730c32ed239475136b5..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/cpu_setup_ppc970.S
+++ /dev/null
@@ -1,205 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains low level CPU setup functions.
- *    Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
- */
-
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cache.h>
-
-_GLOBAL(__cpu_preinit_ppc970)
-	/* Do nothing if not running in HV mode */
-	mfmsr	r0
-	rldicl.	r0,r0,4,63
-	beqlr
-
-	/* Make sure HID4:rm_ci is off before MMU is turned off, that large
-	 * pages are enabled with HID4:61 and clear HID5:DCBZ_size and
-	 * HID5:DCBZ32_ill
-	 */
-	li	r0,0
-	mfspr	r3,SPRN_HID4
-	rldimi	r3,r0,40,23	/* clear bit 23 (rm_ci) */
-	rldimi	r3,r0,2,61	/* clear bit 61 (lg_pg_en) */
-	sync
-	mtspr	SPRN_HID4,r3
-	isync
-	sync
-	mfspr	r3,SPRN_HID5
-	rldimi	r3,r0,6,56	/* clear bits 56 & 57 (DCBZ*) */
-	sync
-	mtspr	SPRN_HID5,r3
-	isync
-	sync
-
-	/* Setup some basic HID1 features */
-	mfspr	r0,SPRN_HID1
-	li	r3,0x1200		/* enable i-fetch cacheability */
-	sldi	r3,r3,44		/* and prefetch */
-	or	r0,r0,r3
-	mtspr	SPRN_HID1,r0
-	mtspr	SPRN_HID1,r0
-	isync
-
-	/* Clear HIOR */
-	li	r0,0
-	sync
-	mtspr	SPRN_HIOR,0		/* Clear interrupt prefix */
-	isync
-	blr
-
-/* Definitions for the table use to save CPU states */
-#define CS_HID0		0
-#define CS_HID1		8
-#define	CS_HID4		16
-#define CS_HID5		24
-#define CS_SIZE		32
-
-	.data
-	.balign	L1_CACHE_BYTES,0
-cpu_state_storage:
-	.space	CS_SIZE
-	.balign	L1_CACHE_BYTES,0
-	.text
-
-
-_GLOBAL(__setup_cpu_ppc970)
-	/* Do nothing if not running in HV mode */
-	mfmsr	r0
-	rldicl.	r0,r0,4,63
-	beq	no_hv_mode
-
-	mfspr	r0,SPRN_HID0
-	li	r11,5			/* clear DOZE and SLEEP */
-	rldimi	r0,r11,52,8		/* set NAP and DPM */
-	li	r11,0
-	rldimi	r0,r11,32,31		/* clear EN_ATTN */
-	b	load_hids		/* Jump to shared code */
-
-
-_GLOBAL(__setup_cpu_ppc970MP)
-	/* Do nothing if not running in HV mode */
-	mfmsr	r0
-	rldicl.	r0,r0,4,63
-	beq	no_hv_mode
-
-	mfspr	r0,SPRN_HID0
-	li	r11,0x15		/* clear DOZE and SLEEP */
-	rldimi	r0,r11,52,6		/* set DEEPNAP, NAP and DPM */
-	li	r11,0
-	rldimi	r0,r11,32,31		/* clear EN_ATTN */
-
-load_hids:
-	mtspr	SPRN_HID0,r0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	sync
-	isync
-
-	/* Try to set LPES = 01 in HID4 */
-	mfspr	r0,SPRN_HID4
-	clrldi	r0,r0,1			/* clear LPES0 */
-	ori	r0,r0,HID4_LPES1	/* set LPES1 */
-	sync
-	mtspr	SPRN_HID4,r0
-	isync
-
-	/* Save away cpu state */
-	LOAD_REG_ADDR(r5,cpu_state_storage)
-
-	/* Save HID0,1,4 and 5 */
-	mfspr	r3,SPRN_HID0
-	std	r3,CS_HID0(r5)
-	mfspr	r3,SPRN_HID1
-	std	r3,CS_HID1(r5)
-	mfspr	r4,SPRN_HID4
-	std	r4,CS_HID4(r5)
-	mfspr	r3,SPRN_HID5
-	std	r3,CS_HID5(r5)
-
-	/* See if we successfully set LPES1 to 1; if not we are in Apple mode */
-	andi.	r4,r4,HID4_LPES1
-	bnelr
-
-no_hv_mode:
-	/* Disable CPU_FTR_HVMODE and exit, since we don't have HV mode */
-	ld	r5,CPU_SPEC_FEATURES(r4)
-	LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE)
-	andc	r5,r5,r6
-	std	r5,CPU_SPEC_FEATURES(r4)
-	blr
-
-/* Called with no MMU context (typically MSR:IR/DR off) to
- * restore CPU state as backed up by the previous
- * function. This does not include cache setting
- */
-_GLOBAL(__restore_cpu_ppc970)
-	/* Do nothing if not running in HV mode */
-	mfmsr	r0
-	rldicl.	r0,r0,4,63
-	beqlr
-
-	LOAD_REG_ADDR(r5,cpu_state_storage)
-	/* Before accessing memory, we make sure rm_ci is clear */
-	li	r0,0
-	mfspr	r3,SPRN_HID4
-	rldimi	r3,r0,40,23	/* clear bit 23 (rm_ci) */
-	sync
-	mtspr	SPRN_HID4,r3
-	isync
-	sync
-
-	/* Clear interrupt prefix */
-	li	r0,0
-	sync
-	mtspr	SPRN_HIOR,0
-	isync
-
-	/* Restore HID0 */
-	ld	r3,CS_HID0(r5)
-	sync
-	isync
-	mtspr	SPRN_HID0,r3
-	mfspr	r3,SPRN_HID0
-	mfspr	r3,SPRN_HID0
-	mfspr	r3,SPRN_HID0
-	mfspr	r3,SPRN_HID0
-	mfspr	r3,SPRN_HID0
-	mfspr	r3,SPRN_HID0
-	sync
-	isync
-
-	/* Restore HID1 */
-	ld	r3,CS_HID1(r5)
-	sync
-	isync
-	mtspr	SPRN_HID1,r3
-	mtspr	SPRN_HID1,r3
-	sync
-	isync
-
-	/* Restore HID4 */
-	ld	r3,CS_HID4(r5)
-	sync
-	isync
-	mtspr	SPRN_HID4,r3
-	sync
-	isync
-
-	/* Restore HID5 */
-	ld	r3,CS_HID5(r5)
-	sync
-	isync
-	mtspr	SPRN_HID5,r3
-	sync
-	isync
-	blr
-
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
deleted file mode 100644
index c72894ff9d614db5c3987392953266c3b70d63d0..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/entry_32.S
+++ /dev/null
@@ -1,1371 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  PowerPC version
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *  Rewritten by Cort Dougan (cort@fsmlabs.com) for PReP
- *    Copyright (C) 1996 Cort Dougan <cort@fsmlabs.com>
- *  Adapted for Power Macintosh by Paul Mackerras.
- *  Low-level exception handlers and MMU support
- *  rewritten by Paul Mackerras.
- *    Copyright (C) 1996 Paul Mackerras.
- *  MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
- *
- *  This file contains the system call entry code, context switch
- *  code, and exception/interrupt return code for PowerPC.
- */
-
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/sys.h>
-#include <linux/threads.h>
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/ptrace.h>
-#include <asm/export.h>
-#include <asm/asm-405.h>
-#include <asm/feature-fixups.h>
-#include <asm/barrier.h>
-#include <asm/kup.h>
-#include <asm/bug.h>
-
-#include "head_32.h"
-
-/*
- * Align to 4k in order to ensure that all functions modyfing srr0/srr1
- * fit into one page in order to not encounter a TLB miss between the
- * modification of srr0/srr1 and the associated rfi.
- */
-	.align	12
-
-#ifdef CONFIG_BOOKE
-	.globl	mcheck_transfer_to_handler
-mcheck_transfer_to_handler:
-	mfspr	r0,SPRN_DSRR0
-	stw	r0,_DSRR0(r11)
-	mfspr	r0,SPRN_DSRR1
-	stw	r0,_DSRR1(r11)
-	/* fall through */
-
-	.globl	debug_transfer_to_handler
-debug_transfer_to_handler:
-	mfspr	r0,SPRN_CSRR0
-	stw	r0,_CSRR0(r11)
-	mfspr	r0,SPRN_CSRR1
-	stw	r0,_CSRR1(r11)
-	/* fall through */
-
-	.globl	crit_transfer_to_handler
-crit_transfer_to_handler:
-#ifdef CONFIG_PPC_BOOK3E_MMU
-	mfspr	r0,SPRN_MAS0
-	stw	r0,MAS0(r11)
-	mfspr	r0,SPRN_MAS1
-	stw	r0,MAS1(r11)
-	mfspr	r0,SPRN_MAS2
-	stw	r0,MAS2(r11)
-	mfspr	r0,SPRN_MAS3
-	stw	r0,MAS3(r11)
-	mfspr	r0,SPRN_MAS6
-	stw	r0,MAS6(r11)
-#ifdef CONFIG_PHYS_64BIT
-	mfspr	r0,SPRN_MAS7
-	stw	r0,MAS7(r11)
-#endif /* CONFIG_PHYS_64BIT */
-#endif /* CONFIG_PPC_BOOK3E_MMU */
-#ifdef CONFIG_44x
-	mfspr	r0,SPRN_MMUCR
-	stw	r0,MMUCR(r11)
-#endif
-	mfspr	r0,SPRN_SRR0
-	stw	r0,_SRR0(r11)
-	mfspr	r0,SPRN_SRR1
-	stw	r0,_SRR1(r11)
-
-	/* set the stack limit to the current stack */
-	mfspr	r8,SPRN_SPRG_THREAD
-	lwz	r0,KSP_LIMIT(r8)
-	stw	r0,SAVED_KSP_LIMIT(r11)
-	rlwinm	r0,r1,0,0,(31 - THREAD_SHIFT)
-	stw	r0,KSP_LIMIT(r8)
-	/* fall through */
-#endif
-
-#ifdef CONFIG_40x
-	.globl	crit_transfer_to_handler
-crit_transfer_to_handler:
-	lwz	r0,crit_r10@l(0)
-	stw	r0,GPR10(r11)
-	lwz	r0,crit_r11@l(0)
-	stw	r0,GPR11(r11)
-	mfspr	r0,SPRN_SRR0
-	stw	r0,crit_srr0@l(0)
-	mfspr	r0,SPRN_SRR1
-	stw	r0,crit_srr1@l(0)
-
-	/* set the stack limit to the current stack */
-	mfspr	r8,SPRN_SPRG_THREAD
-	lwz	r0,KSP_LIMIT(r8)
-	stw	r0,saved_ksp_limit@l(0)
-	rlwinm	r0,r1,0,0,(31 - THREAD_SHIFT)
-	stw	r0,KSP_LIMIT(r8)
-	/* fall through */
-#endif
-
-/*
- * This code finishes saving the registers to the exception frame
- * and jumps to the appropriate handler for the exception, turning
- * on address translation.
- * Note that we rely on the caller having set cr0.eq iff the exception
- * occurred in kernel mode (i.e. MSR:PR = 0).
- */
-	.globl	transfer_to_handler_full
-transfer_to_handler_full:
-	SAVE_NVGPRS(r11)
-	/* fall through */
-
-	.globl	transfer_to_handler
-transfer_to_handler:
-	stw	r2,GPR2(r11)
-	stw	r12,_NIP(r11)
-	stw	r9,_MSR(r11)
-	andi.	r2,r9,MSR_PR
-	mfctr	r12
-	mfspr	r2,SPRN_XER
-	stw	r12,_CTR(r11)
-	stw	r2,_XER(r11)
-	mfspr	r12,SPRN_SPRG_THREAD
-	beq	2f			/* if from user, fix up THREAD.regs */
-	addi	r2, r12, -THREAD
-	addi	r11,r1,STACK_FRAME_OVERHEAD
-	stw	r11,PT_REGS(r12)
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
-	/* Check to see if the dbcr0 register is set up to debug.  Use the
-	   internal debug mode bit to do this. */
-	lwz	r12,THREAD_DBCR0(r12)
-	andis.	r12,r12,DBCR0_IDM@h
-#endif
-	ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
-#ifdef CONFIG_PPC_BOOK3S_32
-	kuep_lock r11, r12
-#endif
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
-	beq+	3f
-	/* From user and task is ptraced - load up global dbcr0 */
-	li	r12,-1			/* clear all pending debug events */
-	mtspr	SPRN_DBSR,r12
-	lis	r11,global_dbcr0@ha
-	tophys(r11,r11)
-	addi	r11,r11,global_dbcr0@l
-#ifdef CONFIG_SMP
-	lwz	r9,TASK_CPU(r2)
-	slwi	r9,r9,3
-	add	r11,r11,r9
-#endif
-	lwz	r12,0(r11)
-	mtspr	SPRN_DBCR0,r12
-	lwz	r12,4(r11)
-	addi	r12,r12,-1
-	stw	r12,4(r11)
-#endif
-
-	b	3f
-
-2:	/* if from kernel, check interrupted DOZE/NAP mode and
-         * check for stack overflow
-         */
-	kuap_save_and_lock r11, r12, r9, r2, r6
-	addi	r2, r12, -THREAD
-	lwz	r9,KSP_LIMIT(r12)
-	cmplw	r1,r9			/* if r1 <= ksp_limit */
-	ble-	stack_ovf		/* then the kernel stack overflowed */
-5:
-#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
-	lwz	r12,TI_LOCAL_FLAGS(r2)
-	mtcrf	0x01,r12
-	bt-	31-TLF_NAPPING,4f
-	bt-	31-TLF_SLEEPING,7f
-#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
-	.globl transfer_to_handler_cont
-transfer_to_handler_cont:
-3:
-	mflr	r9
-	tovirt(r2, r2)			/* set r2 to current */
-	lwz	r11,0(r9)		/* virtual address of handler */
-	lwz	r9,4(r9)		/* where to go when done */
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
-	mtspr	SPRN_NRI, r0
-#endif
-#ifdef CONFIG_TRACE_IRQFLAGS
-	/*
-	 * When tracing IRQ state (lockdep) we enable the MMU before we call
-	 * the IRQ tracing functions as they might access vmalloc space or
-	 * perform IOs for console output.
-	 *
-	 * To speed up the syscall path where interrupts stay on, let's check
-	 * first if we are changing the MSR value at all.
-	 */
-	tophys(r12, r1)
-	lwz	r12,_MSR(r12)
-	andi.	r12,r12,MSR_EE
-	bne	1f
-
-	/* MSR isn't changing, just transition directly */
-#endif
-	mtspr	SPRN_SRR0,r11
-	mtspr	SPRN_SRR1,r10
-	mtlr	r9
-	SYNC
-	RFI				/* jump to handler, enable MMU */
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-1:	/* MSR is changing, re-enable MMU so we can notify lockdep. We need to
-	 * keep interrupts disabled at this point otherwise we might risk
-	 * taking an interrupt before we tell lockdep they are enabled.
-	 */
-	lis	r12,reenable_mmu@h
-	ori	r12,r12,reenable_mmu@l
-	LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
-	mtspr	SPRN_SRR0,r12
-	mtspr	SPRN_SRR1,r0
-	SYNC
-	RFI
-
-reenable_mmu:
-	/*
-	 * We save a bunch of GPRs,
-	 * r3 can be different from GPR3(r1) at this point, r9 and r11
-	 * contains the old MSR and handler address respectively,
-	 * r4 & r5 can contain page fault arguments that need to be passed
-	 * along as well. r12, CCR, CTR, XER etc... are left clobbered as
-	 * they aren't useful past this point (aren't syscall arguments),
-	 * the rest is restored from the exception frame.
-	 */
-
-	stwu	r1,-32(r1)
-	stw	r9,8(r1)
-	stw	r11,12(r1)
-	stw	r3,16(r1)
-	stw	r4,20(r1)
-	stw	r5,24(r1)
-
-	/* If we are disabling interrupts (normal case), simply log it with
-	 * lockdep
-	 */
-1:	bl	trace_hardirqs_off
-2:	lwz	r5,24(r1)
-	lwz	r4,20(r1)
-	lwz	r3,16(r1)
-	lwz	r11,12(r1)
-	lwz	r9,8(r1)
-	addi	r1,r1,32
-	lwz	r0,GPR0(r1)
-	lwz	r6,GPR6(r1)
-	lwz	r7,GPR7(r1)
-	lwz	r8,GPR8(r1)
-	mtctr	r11
-	mtlr	r9
-	bctr				/* jump to handler */
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
-#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
-4:	rlwinm	r12,r12,0,~_TLF_NAPPING
-	stw	r12,TI_LOCAL_FLAGS(r2)
-	b	power_save_ppc32_restore
-
-7:	rlwinm	r12,r12,0,~_TLF_SLEEPING
-	stw	r12,TI_LOCAL_FLAGS(r2)
-	lwz	r9,_MSR(r11)		/* if sleeping, clear MSR.EE */
-	rlwinm	r9,r9,0,~MSR_EE
-	lwz	r12,_LINK(r11)		/* and return to address in LR */
-	kuap_restore r11, r2, r3, r4, r5
-	lwz	r2, GPR2(r11)
-	b	fast_exception_return
-#endif
-
-/*
- * On kernel stack overflow, load up an initial stack pointer
- * and call StackOverflow(regs), which should not return.
- */
-stack_ovf:
-	/* sometimes we use a statically-allocated stack, which is OK. */
-	lis	r12,_end@h
-	ori	r12,r12,_end@l
-	cmplw	r1,r12
-	ble	5b			/* r1 <= &_end is OK */
-	SAVE_NVGPRS(r11)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	lis	r1,init_thread_union@ha
-	addi	r1,r1,init_thread_union@l
-	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
-	lis	r9,StackOverflow@ha
-	addi	r9,r9,StackOverflow@l
-	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
-	mtspr	SPRN_NRI, r0
-#endif
-	mtspr	SPRN_SRR0,r9
-	mtspr	SPRN_SRR1,r10
-	SYNC
-	RFI
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-trace_syscall_entry_irq_off:
-	/*
-	 * Syscall shouldn't happen while interrupts are disabled,
-	 * so let's do a warning here.
-	 */
-0:	trap
-	EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
-	bl	trace_hardirqs_on
-
-	/* Now enable for real */
-	LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
-	mtmsr	r10
-
-	REST_GPR(0, r1)
-	REST_4GPRS(3, r1)
-	REST_2GPRS(7, r1)
-	b	DoSyscall
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
-	.globl	transfer_to_syscall
-transfer_to_syscall:
-#ifdef CONFIG_PPC_BOOK3S_32
-	kuep_lock r11, r12
-#endif
-#ifdef CONFIG_TRACE_IRQFLAGS
-	andi.	r12,r9,MSR_EE
-	beq-	trace_syscall_entry_irq_off
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
-/*
- * Handle a system call.
- */
-	.stabs	"arch/powerpc/kernel/",N_SO,0,0,0f
-	.stabs	"entry_32.S",N_SO,0,0,0f
-0:
-
-_GLOBAL(DoSyscall)
-	stw	r3,ORIG_GPR3(r1)
-	li	r12,0
-	stw	r12,RESULT(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
-	/* Make sure interrupts are enabled */
-	mfmsr	r11
-	andi.	r12,r11,MSR_EE
-	/* We came in with interrupts disabled, we WARN and mark them enabled
-	 * for lockdep now */
-0:	tweqi	r12, 0
-	EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
-#endif /* CONFIG_TRACE_IRQFLAGS */
-	lwz	r11,TI_FLAGS(r2)
-	andi.	r11,r11,_TIF_SYSCALL_DOTRACE
-	bne-	syscall_dotrace
-syscall_dotrace_cont:
-	cmplwi	0,r0,NR_syscalls
-	lis	r10,sys_call_table@h
-	ori	r10,r10,sys_call_table@l
-	slwi	r0,r0,2
-	bge-	66f
-
-	barrier_nospec_asm
-	/*
-	 * Prevent the load of the handler below (based on the user-passed
-	 * system call number) being speculatively executed until the test
-	 * against NR_syscalls and branch to .66f above has
-	 * committed.
-	 */
-
-	lwzx	r10,r10,r0	/* Fetch system call handler [ptr] */
-	mtlr	r10
-	addi	r9,r1,STACK_FRAME_OVERHEAD
-	PPC440EP_ERR42
-	blrl			/* Call handler */
-	.globl	ret_from_syscall
-ret_from_syscall:
-#ifdef CONFIG_DEBUG_RSEQ
-	/* Check whether the syscall is issued inside a restartable sequence */
-	stw	r3,GPR3(r1)
-	addi    r3,r1,STACK_FRAME_OVERHEAD
-	bl      rseq_syscall
-	lwz	r3,GPR3(r1)
-#endif
-	mr	r6,r3
-	/* disable interrupts so current_thread_info()->flags can't change */
-	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)	/* doesn't include MSR_EE */
-	/* Note: We don't bother telling lockdep about it */
-	SYNC
-	MTMSRD(r10)
-	lwz	r9,TI_FLAGS(r2)
-	li	r8,-MAX_ERRNO
-	andi.	r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
-	bne-	syscall_exit_work
-	cmplw	0,r3,r8
-	blt+	syscall_exit_cont
-	lwz	r11,_CCR(r1)			/* Load CR */
-	neg	r3,r3
-	oris	r11,r11,0x1000	/* Set SO bit in CR */
-	stw	r11,_CCR(r1)
-syscall_exit_cont:
-	lwz	r8,_MSR(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
-	/* If we are going to return from the syscall with interrupts
-	 * off, we trace that here. It shouldn't normally happen.
-	 */
-	andi.	r10,r8,MSR_EE
-	bne+	1f
-	stw	r3,GPR3(r1)
-	bl      trace_hardirqs_off
-	lwz	r3,GPR3(r1)
-1:
-#endif /* CONFIG_TRACE_IRQFLAGS */
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-	/* If the process has its own DBCR0 value, load it up.  The internal
-	   debug mode bit tells us that dbcr0 should be loaded. */
-	lwz	r0,THREAD+THREAD_DBCR0(r2)
-	andis.	r10,r0,DBCR0_IDM@h
-	bnel-	load_dbcr0
-#endif
-#ifdef CONFIG_44x
-BEGIN_MMU_FTR_SECTION
-	lis	r4,icache_44x_need_flush@ha
-	lwz	r5,icache_44x_need_flush@l(r4)
-	cmplwi	cr0,r5,0
-	bne-	2f
-1:
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x)
-#endif /* CONFIG_44x */
-BEGIN_FTR_SECTION
-	lwarx	r7,0,r1
-END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
-	stwcx.	r0,0,r1			/* to clear the reservation */
-	ACCOUNT_CPU_USER_EXIT(r2, r5, r7)
-#ifdef CONFIG_PPC_BOOK3S_32
-	kuep_unlock r5, r7
-#endif
-	kuap_check r2, r4
-	lwz	r4,_LINK(r1)
-	lwz	r5,_CCR(r1)
-	mtlr	r4
-	mtcr	r5
-	lwz	r7,_NIP(r1)
-	lwz	r2,GPR2(r1)
-	lwz	r1,GPR1(r1)
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
-	mtspr	SPRN_NRI, r0
-#endif
-	mtspr	SPRN_SRR0,r7
-	mtspr	SPRN_SRR1,r8
-	SYNC
-	RFI
-#ifdef CONFIG_44x
-2:	li	r7,0
-	iccci	r0,r0
-	stw	r7,icache_44x_need_flush@l(r4)
-	b	1b
-#endif  /* CONFIG_44x */
-
-66:	li	r3,-ENOSYS
-	b	ret_from_syscall
-
-	.globl	ret_from_fork
-ret_from_fork:
-	REST_NVGPRS(r1)
-	bl	schedule_tail
-	li	r3,0
-	b	ret_from_syscall
-
-	.globl	ret_from_kernel_thread
-ret_from_kernel_thread:
-	REST_NVGPRS(r1)
-	bl	schedule_tail
-	mtlr	r14
-	mr	r3,r15
-	PPC440EP_ERR42
-	blrl
-	li	r3,0
-	b	ret_from_syscall
-
-/* Traced system call support */
-syscall_dotrace:
-	SAVE_NVGPRS(r1)
-	li	r0,0xc00
-	stw	r0,_TRAP(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	do_syscall_trace_enter
-	/*
-	 * Restore argument registers possibly just changed.
-	 * We use the return value of do_syscall_trace_enter
-	 * for call number to look up in the table (r0).
-	 */
-	mr	r0,r3
-	lwz	r3,GPR3(r1)
-	lwz	r4,GPR4(r1)
-	lwz	r5,GPR5(r1)
-	lwz	r6,GPR6(r1)
-	lwz	r7,GPR7(r1)
-	lwz	r8,GPR8(r1)
-	REST_NVGPRS(r1)
-
-	cmplwi	r0,NR_syscalls
-	/* Return code is already in r3 thanks to do_syscall_trace_enter() */
-	bge-	ret_from_syscall
-	b	syscall_dotrace_cont
-
-syscall_exit_work:
-	andi.	r0,r9,_TIF_RESTOREALL
-	beq+	0f
-	REST_NVGPRS(r1)
-	b	2f
-0:	cmplw	0,r3,r8
-	blt+	1f
-	andi.	r0,r9,_TIF_NOERROR
-	bne-	1f
-	lwz	r11,_CCR(r1)			/* Load CR */
-	neg	r3,r3
-	oris	r11,r11,0x1000	/* Set SO bit in CR */
-	stw	r11,_CCR(r1)
-
-1:	stw	r6,RESULT(r1)	/* Save result */
-	stw	r3,GPR3(r1)	/* Update return value */
-2:	andi.	r0,r9,(_TIF_PERSYSCALL_MASK)
-	beq	4f
-
-	/* Clear per-syscall TIF flags if any are set.  */
-
-	li	r11,_TIF_PERSYSCALL_MASK
-	addi	r12,r2,TI_FLAGS
-3:	lwarx	r8,0,r12
-	andc	r8,r8,r11
-#ifdef CONFIG_IBM405_ERR77
-	dcbt	0,r12
-#endif
-	stwcx.	r8,0,r12
-	bne-	3b
-	
-4:	/* Anything which requires enabling interrupts? */
-	andi.	r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
-	beq	ret_from_except
-
-	/* Re-enable interrupts. There is no need to trace that with
-	 * lockdep as we are supposed to have IRQs on at this point
-	 */
-	ori	r10,r10,MSR_EE
-	SYNC
-	MTMSRD(r10)
-
-	/* Save NVGPRS if they're not saved already */
-	lwz	r4,_TRAP(r1)
-	andi.	r4,r4,1
-	beq	5f
-	SAVE_NVGPRS(r1)
-	li	r4,0xc00
-	stw	r4,_TRAP(r1)
-5:
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	do_syscall_trace_leave
-	b	ret_from_except_full
-
-/*
- * The fork/clone functions need to copy the full register set into
- * the child process. Therefore we need to save all the nonvolatile
- * registers (r13 - r31) before calling the C code.
- */
-	.globl	ppc_fork
-ppc_fork:
-	SAVE_NVGPRS(r1)
-	lwz	r0,_TRAP(r1)
-	rlwinm	r0,r0,0,0,30		/* clear LSB to indicate full */
-	stw	r0,_TRAP(r1)		/* register set saved */
-	b	sys_fork
-
-	.globl	ppc_vfork
-ppc_vfork:
-	SAVE_NVGPRS(r1)
-	lwz	r0,_TRAP(r1)
-	rlwinm	r0,r0,0,0,30		/* clear LSB to indicate full */
-	stw	r0,_TRAP(r1)		/* register set saved */
-	b	sys_vfork
-
-	.globl	ppc_clone
-ppc_clone:
-	SAVE_NVGPRS(r1)
-	lwz	r0,_TRAP(r1)
-	rlwinm	r0,r0,0,0,30		/* clear LSB to indicate full */
-	stw	r0,_TRAP(r1)		/* register set saved */
-	b	sys_clone
-
-	.globl	ppc_clone3
-ppc_clone3:
-	SAVE_NVGPRS(r1)
-	lwz	r0,_TRAP(r1)
-	rlwinm	r0,r0,0,0,30		/* clear LSB to indicate full */
-	stw	r0,_TRAP(r1)		/* register set saved */
-	b	sys_clone3
-
-	.globl	ppc_swapcontext
-ppc_swapcontext:
-	SAVE_NVGPRS(r1)
-	lwz	r0,_TRAP(r1)
-	rlwinm	r0,r0,0,0,30		/* clear LSB to indicate full */
-	stw	r0,_TRAP(r1)		/* register set saved */
-	b	sys_swapcontext
-
-/*
- * Top-level page fault handling.
- * This is in assembler because if do_page_fault tells us that
- * it is a bad kernel page fault, we want to save the non-volatile
- * registers before calling bad_page_fault.
- */
-	.globl	handle_page_fault
-handle_page_fault:
-	stw	r4,_DAR(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_BOOK3S_32
-	andis.  r0,r5,DSISR_DABRMATCH@h
-	bne-    handle_dabr_fault
-#endif
-	bl	do_page_fault
-	cmpwi	r3,0
-	beq+	ret_from_except
-	SAVE_NVGPRS(r1)
-	lwz	r0,_TRAP(r1)
-	clrrwi	r0,r0,1
-	stw	r0,_TRAP(r1)
-	mr	r5,r3
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	lwz	r4,_DAR(r1)
-	bl	bad_page_fault
-	b	ret_from_except_full
-
-#ifdef CONFIG_PPC_BOOK3S_32
-	/* We have a data breakpoint exception - handle it */
-handle_dabr_fault:
-	SAVE_NVGPRS(r1)
-	lwz	r0,_TRAP(r1)
-	clrrwi	r0,r0,1
-	stw	r0,_TRAP(r1)
-	bl      do_break
-	b	ret_from_except_full
-#endif
-
-/*
- * This routine switches between two different tasks.  The process
- * state of one is saved on its kernel stack.  Then the state
- * of the other is restored from its kernel stack.  The memory
- * management hardware is updated to the second process's state.
- * Finally, we can return to the second process.
- * On entry, r3 points to the THREAD for the current task, r4
- * points to the THREAD for the new task.
- *
- * This routine is always called with interrupts disabled.
- *
- * Note: there are two ways to get to the "going out" portion
- * of this code; either by coming in via the entry (_switch)
- * or via "fork" which must set up an environment equivalent
- * to the "_switch" path.  If you change this , you'll have to
- * change the fork code also.
- *
- * The code which creates the new task context is in 'copy_thread'
- * in arch/ppc/kernel/process.c
- */
-_GLOBAL(_switch)
-	stwu	r1,-INT_FRAME_SIZE(r1)
-	mflr	r0
-	stw	r0,INT_FRAME_SIZE+4(r1)
-	/* r3-r12 are caller saved -- Cort */
-	SAVE_NVGPRS(r1)
-	stw	r0,_NIP(r1)	/* Return to switch caller */
-	mfmsr	r11
-	li	r0,MSR_FP	/* Disable floating-point */
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
-	oris	r0,r0,MSR_VEC@h	/* Disable altivec */
-	mfspr	r12,SPRN_VRSAVE	/* save vrsave register value */
-	stw	r12,THREAD+THREAD_VRSAVE(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_SPE
-BEGIN_FTR_SECTION
-	oris	r0,r0,MSR_SPE@h	 /* Disable SPE */
-	mfspr	r12,SPRN_SPEFSCR /* save spefscr register value */
-	stw	r12,THREAD+THREAD_SPEFSCR(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_SPE)
-#endif /* CONFIG_SPE */
-	and.	r0,r0,r11	/* FP or altivec or SPE enabled? */
-	beq+	1f
-	andc	r11,r11,r0
-	MTMSRD(r11)
-	isync
-1:	stw	r11,_MSR(r1)
-	mfcr	r10
-	stw	r10,_CCR(r1)
-	stw	r1,KSP(r3)	/* Set old stack pointer */
-
-	kuap_check r2, r0
-#ifdef CONFIG_SMP
-	/* We need a sync somewhere here to make sure that if the
-	 * previous task gets rescheduled on another CPU, it sees all
-	 * stores it has performed on this one.
-	 */
-	sync
-#endif /* CONFIG_SMP */
-
-	tophys(r0,r4)
-	mtspr	SPRN_SPRG_THREAD,r0	/* Update current THREAD phys addr */
-	lwz	r1,KSP(r4)	/* Load new stack pointer */
-
-	/* save the old current 'last' for return value */
-	mr	r3,r2
-	addi	r2,r4,-THREAD	/* Update current */
-
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
-	lwz	r0,THREAD+THREAD_VRSAVE(r2)
-	mtspr	SPRN_VRSAVE,r0		/* if G4, restore VRSAVE reg */
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_SPE
-BEGIN_FTR_SECTION
-	lwz	r0,THREAD+THREAD_SPEFSCR(r2)
-	mtspr	SPRN_SPEFSCR,r0		/* restore SPEFSCR reg */
-END_FTR_SECTION_IFSET(CPU_FTR_SPE)
-#endif /* CONFIG_SPE */
-
-	lwz	r0,_CCR(r1)
-	mtcrf	0xFF,r0
-	/* r3-r12 are destroyed -- Cort */
-	REST_NVGPRS(r1)
-
-	lwz	r4,_NIP(r1)	/* Return to _switch caller in new task */
-	mtlr	r4
-	addi	r1,r1,INT_FRAME_SIZE
-	blr
-
-	.globl	fast_exception_return
-fast_exception_return:
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-	andi.	r10,r9,MSR_RI		/* check for recoverable interrupt */
-	beq	1f			/* if not, we've got problems */
-#endif
-
-2:	REST_4GPRS(3, r11)
-	lwz	r10,_CCR(r11)
-	REST_GPR(1, r11)
-	mtcr	r10
-	lwz	r10,_LINK(r11)
-	mtlr	r10
-	/* Clear the exception_marker on the stack to avoid confusing stacktrace */
-	li	r10, 0
-	stw	r10, 8(r11)
-	REST_GPR(10, r11)
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
-	mtspr	SPRN_NRI, r0
-#endif
-	mtspr	SPRN_SRR1,r9
-	mtspr	SPRN_SRR0,r12
-	REST_GPR(9, r11)
-	REST_GPR(12, r11)
-	lwz	r11,GPR11(r11)
-	SYNC
-	RFI
-
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-/* check if the exception happened in a restartable section */
-1:	lis	r3,exc_exit_restart_end@ha
-	addi	r3,r3,exc_exit_restart_end@l
-	cmplw	r12,r3
-#ifdef CONFIG_PPC_BOOK3S_601
-	bge	2b
-#else
-	bge	3f
-#endif
-	lis	r4,exc_exit_restart@ha
-	addi	r4,r4,exc_exit_restart@l
-	cmplw	r12,r4
-#ifdef CONFIG_PPC_BOOK3S_601
-	blt	2b
-#else
-	blt	3f
-#endif
-	lis	r3,fee_restarts@ha
-	tophys(r3,r3)
-	lwz	r5,fee_restarts@l(r3)
-	addi	r5,r5,1
-	stw	r5,fee_restarts@l(r3)
-	mr	r12,r4		/* restart at exc_exit_restart */
-	b	2b
-
-	.section .bss
-	.align	2
-fee_restarts:
-	.space	4
-	.previous
-
-/* aargh, a nonrecoverable interrupt, panic */
-/* aargh, we don't know which trap this is */
-/* but the 601 doesn't implement the RI bit, so assume it's OK */
-3:
-	li	r10,-1
-	stw	r10,_TRAP(r11)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	lis	r10,MSR_KERNEL@h
-	ori	r10,r10,MSR_KERNEL@l
-	bl	transfer_to_handler_full
-	.long	unrecoverable_exception
-	.long	ret_from_except
-#endif
-
-	.globl	ret_from_except_full
-ret_from_except_full:
-	REST_NVGPRS(r1)
-	/* fall through */
-
-	.globl	ret_from_except
-ret_from_except:
-	/* Hard-disable interrupts so that current_thread_info()->flags
-	 * can't change between when we test it and when we return
-	 * from the interrupt. */
-	/* Note: We don't bother telling lockdep about it */
-	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
-	SYNC			/* Some chip revs have problems here... */
-	MTMSRD(r10)		/* disable interrupts */
-
-	lwz	r3,_MSR(r1)	/* Returning to user mode? */
-	andi.	r0,r3,MSR_PR
-	beq	resume_kernel
-
-user_exc_return:		/* r10 contains MSR_KERNEL here */
-	/* Check current_thread_info()->flags */
-	lwz	r9,TI_FLAGS(r2)
-	andi.	r0,r9,_TIF_USER_WORK_MASK
-	bne	do_work
-
-restore_user:
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-	/* Check whether this process has its own DBCR0 value.  The internal
-	   debug mode bit tells us that dbcr0 should be loaded. */
-	lwz	r0,THREAD+THREAD_DBCR0(r2)
-	andis.	r10,r0,DBCR0_IDM@h
-	bnel-	load_dbcr0
-#endif
-	ACCOUNT_CPU_USER_EXIT(r2, r10, r11)
-#ifdef CONFIG_PPC_BOOK3S_32
-	kuep_unlock	r10, r11
-#endif
-
-	b	restore
-
-/* N.B. the only way to get here is from the beq following ret_from_except. */
-resume_kernel:
-	/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
-	lwz	r8,TI_FLAGS(r2)
-	andis.	r0,r8,_TIF_EMULATE_STACK_STORE@h
-	beq+	1f
-
-	addi	r8,r1,INT_FRAME_SIZE	/* Get the kprobed function entry */
-
-	lwz	r3,GPR1(r1)
-	subi	r3,r3,INT_FRAME_SIZE	/* dst: Allocate a trampoline exception frame */
-	mr	r4,r1			/* src:  current exception frame */
-	mr	r1,r3			/* Reroute the trampoline frame to r1 */
-
-	/* Copy from the original to the trampoline. */
-	li	r5,INT_FRAME_SIZE/4	/* size: INT_FRAME_SIZE */
-	li	r6,0			/* start offset: 0 */
-	mtctr	r5
-2:	lwzx	r0,r6,r4
-	stwx	r0,r6,r3
-	addi	r6,r6,4
-	bdnz	2b
-
-	/* Do real store operation to complete stwu */
-	lwz	r5,GPR1(r1)
-	stw	r8,0(r5)
-
-	/* Clear _TIF_EMULATE_STACK_STORE flag */
-	lis	r11,_TIF_EMULATE_STACK_STORE@h
-	addi	r5,r2,TI_FLAGS
-0:	lwarx	r8,0,r5
-	andc	r8,r8,r11
-#ifdef CONFIG_IBM405_ERR77
-	dcbt	0,r5
-#endif
-	stwcx.	r8,0,r5
-	bne-	0b
-1:
-
-#ifdef CONFIG_PREEMPT
-	/* check current_thread_info->preempt_count */
-	lwz	r0,TI_PREEMPT(r2)
-	cmpwi	0,r0,0		/* if non-zero, just restore regs and return */
-	bne	restore_kuap
-	andi.	r8,r8,_TIF_NEED_RESCHED
-	beq+	restore_kuap
-	lwz	r3,_MSR(r1)
-	andi.	r0,r3,MSR_EE	/* interrupts off? */
-	beq	restore_kuap	/* don't schedule if so */
-#ifdef CONFIG_TRACE_IRQFLAGS
-	/* Lockdep thinks irqs are enabled, we need to call
-	 * preempt_schedule_irq with IRQs off, so we inform lockdep
-	 * now that we -did- turn them off already
-	 */
-	bl	trace_hardirqs_off
-#endif
-	bl	preempt_schedule_irq
-#ifdef CONFIG_TRACE_IRQFLAGS
-	/* And now, to properly rebalance the above, we tell lockdep they
-	 * are being turned back on, which will happen when we return
-	 */
-	bl	trace_hardirqs_on
-#endif
-#endif /* CONFIG_PREEMPT */
-restore_kuap:
-	kuap_restore r1, r2, r9, r10, r0
-
-	/* interrupts are hard-disabled at this point */
-restore:
-#ifdef CONFIG_44x
-BEGIN_MMU_FTR_SECTION
-	b	1f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
-	lis	r4,icache_44x_need_flush@ha
-	lwz	r5,icache_44x_need_flush@l(r4)
-	cmplwi	cr0,r5,0
-	beq+	1f
-	li	r6,0
-	iccci	r0,r0
-	stw	r6,icache_44x_need_flush@l(r4)
-1:
-#endif  /* CONFIG_44x */
-
-	lwz	r9,_MSR(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
-	/* Lockdep doesn't know about the fact that IRQs are temporarily turned
-	 * off in this assembly code while peeking at TI_FLAGS() and such. However
-	 * we need to inform it if the exception turned interrupts off, and we
-	 * are about to trun them back on.
-	 */
-	andi.	r10,r9,MSR_EE
-	beq	1f
-	stwu	r1,-32(r1)
-	mflr	r0
-	stw	r0,4(r1)
-	bl	trace_hardirqs_on
-	addi	r1, r1, 32
-	lwz	r9,_MSR(r1)
-1:
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
-	lwz	r0,GPR0(r1)
-	lwz	r2,GPR2(r1)
-	REST_4GPRS(3, r1)
-	REST_2GPRS(7, r1)
-
-	lwz	r10,_XER(r1)
-	lwz	r11,_CTR(r1)
-	mtspr	SPRN_XER,r10
-	mtctr	r11
-
-	PPC405_ERR77(0,r1)
-BEGIN_FTR_SECTION
-	lwarx	r11,0,r1
-END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
-	stwcx.	r0,0,r1			/* to clear the reservation */
-
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-	andi.	r10,r9,MSR_RI		/* check if this exception occurred */
-	beql	nonrecoverable		/* at a bad place (MSR:RI = 0) */
-
-	lwz	r10,_CCR(r1)
-	lwz	r11,_LINK(r1)
-	mtcrf	0xFF,r10
-	mtlr	r11
-
-	/* Clear the exception_marker on the stack to avoid confusing stacktrace */
-	li	r10, 0
-	stw	r10, 8(r1)
-	/*
-	 * Once we put values in SRR0 and SRR1, we are in a state
-	 * where exceptions are not recoverable, since taking an
-	 * exception will trash SRR0 and SRR1.  Therefore we clear the
-	 * MSR:RI bit to indicate this.  If we do take an exception,
-	 * we can't return to the point of the exception but we
-	 * can restart the exception exit path at the label
-	 * exc_exit_restart below.  -- paulus
-	 */
-	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
-	SYNC
-	MTMSRD(r10)		/* clear the RI bit */
-	.globl exc_exit_restart
-exc_exit_restart:
-	lwz	r12,_NIP(r1)
-	mtspr	SPRN_SRR0,r12
-	mtspr	SPRN_SRR1,r9
-	REST_4GPRS(9, r1)
-	lwz	r1,GPR1(r1)
-	.globl exc_exit_restart_end
-exc_exit_restart_end:
-	SYNC
-	RFI
-
-#else /* !(CONFIG_4xx || CONFIG_BOOKE) */
-	/*
-	 * This is a bit different on 4xx/Book-E because it doesn't have
-	 * the RI bit in the MSR.
-	 * The TLB miss handler checks if we have interrupted
-	 * the exception exit path and restarts it if so
-	 * (well maybe one day it will... :).
-	 */
-	lwz	r11,_LINK(r1)
-	mtlr	r11
-	lwz	r10,_CCR(r1)
-	mtcrf	0xff,r10
-	/* Clear the exception_marker on the stack to avoid confusing stacktrace */
-	li	r10, 0
-	stw	r10, 8(r1)
-	REST_2GPRS(9, r1)
-	.globl exc_exit_restart
-exc_exit_restart:
-	lwz	r11,_NIP(r1)
-	lwz	r12,_MSR(r1)
-exc_exit_start:
-	mtspr	SPRN_SRR0,r11
-	mtspr	SPRN_SRR1,r12
-	REST_2GPRS(11, r1)
-	lwz	r1,GPR1(r1)
-	.globl exc_exit_restart_end
-exc_exit_restart_end:
-	PPC405_ERR77_SYNC
-	rfi
-	b	.			/* prevent prefetch past rfi */
-
-/*
- * Returning from a critical interrupt in user mode doesn't need
- * to be any different from a normal exception.  For a critical
- * interrupt in the kernel, we just return (without checking for
- * preemption) since the interrupt may have happened at some crucial
- * place (e.g. inside the TLB miss handler), and because we will be
- * running with r1 pointing into critical_stack, not the current
- * process's kernel stack (and therefore current_thread_info() will
- * give the wrong answer).
- * We have to restore various SPRs that may have been in use at the
- * time of the critical interrupt.
- *
- */
-#ifdef CONFIG_40x
-#define PPC_40x_TURN_OFF_MSR_DR						    \
-	/* avoid any possible TLB misses here by turning off MSR.DR, we	    \
-	 * assume the instructions here are mapped by a pinned TLB entry */ \
-	li	r10,MSR_IR;						    \
-	mtmsr	r10;							    \
-	isync;								    \
-	tophys(r1, r1);
-#else
-#define PPC_40x_TURN_OFF_MSR_DR
-#endif
-
-#define RET_FROM_EXC_LEVEL(exc_lvl_srr0, exc_lvl_srr1, exc_lvl_rfi)	\
-	REST_NVGPRS(r1);						\
-	lwz	r3,_MSR(r1);						\
-	andi.	r3,r3,MSR_PR;						\
-	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL);				\
-	bne	user_exc_return;					\
-	lwz	r0,GPR0(r1);						\
-	lwz	r2,GPR2(r1);						\
-	REST_4GPRS(3, r1);						\
-	REST_2GPRS(7, r1);						\
-	lwz	r10,_XER(r1);						\
-	lwz	r11,_CTR(r1);						\
-	mtspr	SPRN_XER,r10;						\
-	mtctr	r11;							\
-	PPC405_ERR77(0,r1);						\
-	stwcx.	r0,0,r1;		/* to clear the reservation */	\
-	lwz	r11,_LINK(r1);						\
-	mtlr	r11;							\
-	lwz	r10,_CCR(r1);						\
-	mtcrf	0xff,r10;						\
-	PPC_40x_TURN_OFF_MSR_DR;					\
-	lwz	r9,_DEAR(r1);						\
-	lwz	r10,_ESR(r1);						\
-	mtspr	SPRN_DEAR,r9;						\
-	mtspr	SPRN_ESR,r10;						\
-	lwz	r11,_NIP(r1);						\
-	lwz	r12,_MSR(r1);						\
-	mtspr	exc_lvl_srr0,r11;					\
-	mtspr	exc_lvl_srr1,r12;					\
-	lwz	r9,GPR9(r1);						\
-	lwz	r12,GPR12(r1);						\
-	lwz	r10,GPR10(r1);						\
-	lwz	r11,GPR11(r1);						\
-	lwz	r1,GPR1(r1);						\
-	PPC405_ERR77_SYNC;						\
-	exc_lvl_rfi;							\
-	b	.;		/* prevent prefetch past exc_lvl_rfi */
-
-#define	RESTORE_xSRR(exc_lvl_srr0, exc_lvl_srr1)			\
-	lwz	r9,_##exc_lvl_srr0(r1);					\
-	lwz	r10,_##exc_lvl_srr1(r1);				\
-	mtspr	SPRN_##exc_lvl_srr0,r9;					\
-	mtspr	SPRN_##exc_lvl_srr1,r10;
-
-#if defined(CONFIG_PPC_BOOK3E_MMU)
-#ifdef CONFIG_PHYS_64BIT
-#define	RESTORE_MAS7							\
-	lwz	r11,MAS7(r1);						\
-	mtspr	SPRN_MAS7,r11;
-#else
-#define	RESTORE_MAS7
-#endif /* CONFIG_PHYS_64BIT */
-#define RESTORE_MMU_REGS						\
-	lwz	r9,MAS0(r1);						\
-	lwz	r10,MAS1(r1);						\
-	lwz	r11,MAS2(r1);						\
-	mtspr	SPRN_MAS0,r9;						\
-	lwz	r9,MAS3(r1);						\
-	mtspr	SPRN_MAS1,r10;						\
-	lwz	r10,MAS6(r1);						\
-	mtspr	SPRN_MAS2,r11;						\
-	mtspr	SPRN_MAS3,r9;						\
-	mtspr	SPRN_MAS6,r10;						\
-	RESTORE_MAS7;
-#elif defined(CONFIG_44x)
-#define RESTORE_MMU_REGS						\
-	lwz	r9,MMUCR(r1);						\
-	mtspr	SPRN_MMUCR,r9;
-#else
-#define RESTORE_MMU_REGS
-#endif
-
-#ifdef CONFIG_40x
-	.globl	ret_from_crit_exc
-ret_from_crit_exc:
-	mfspr	r9,SPRN_SPRG_THREAD
-	lis	r10,saved_ksp_limit@ha;
-	lwz	r10,saved_ksp_limit@l(r10);
-	tovirt(r9,r9);
-	stw	r10,KSP_LIMIT(r9)
-	lis	r9,crit_srr0@ha;
-	lwz	r9,crit_srr0@l(r9);
-	lis	r10,crit_srr1@ha;
-	lwz	r10,crit_srr1@l(r10);
-	mtspr	SPRN_SRR0,r9;
-	mtspr	SPRN_SRR1,r10;
-	RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
-#endif /* CONFIG_40x */
-
-#ifdef CONFIG_BOOKE
-	.globl	ret_from_crit_exc
-ret_from_crit_exc:
-	mfspr	r9,SPRN_SPRG_THREAD
-	lwz	r10,SAVED_KSP_LIMIT(r1)
-	stw	r10,KSP_LIMIT(r9)
-	RESTORE_xSRR(SRR0,SRR1);
-	RESTORE_MMU_REGS;
-	RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
-
-	.globl	ret_from_debug_exc
-ret_from_debug_exc:
-	mfspr	r9,SPRN_SPRG_THREAD
-	lwz	r10,SAVED_KSP_LIMIT(r1)
-	stw	r10,KSP_LIMIT(r9)
-	RESTORE_xSRR(SRR0,SRR1);
-	RESTORE_xSRR(CSRR0,CSRR1);
-	RESTORE_MMU_REGS;
-	RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, PPC_RFDI)
-
-	.globl	ret_from_mcheck_exc
-ret_from_mcheck_exc:
-	mfspr	r9,SPRN_SPRG_THREAD
-	lwz	r10,SAVED_KSP_LIMIT(r1)
-	stw	r10,KSP_LIMIT(r9)
-	RESTORE_xSRR(SRR0,SRR1);
-	RESTORE_xSRR(CSRR0,CSRR1);
-	RESTORE_xSRR(DSRR0,DSRR1);
-	RESTORE_MMU_REGS;
-	RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI)
-#endif /* CONFIG_BOOKE */
-
-/*
- * Load the DBCR0 value for a task that is being ptraced,
- * having first saved away the global DBCR0.  Note that r0
- * has the dbcr0 value to set upon entry to this.
- */
-load_dbcr0:
-	mfmsr	r10		/* first disable debug exceptions */
-	rlwinm	r10,r10,0,~MSR_DE
-	mtmsr	r10
-	isync
-	mfspr	r10,SPRN_DBCR0
-	lis	r11,global_dbcr0@ha
-	addi	r11,r11,global_dbcr0@l
-#ifdef CONFIG_SMP
-	lwz	r9,TASK_CPU(r2)
-	slwi	r9,r9,3
-	add	r11,r11,r9
-#endif
-	stw	r10,0(r11)
-	mtspr	SPRN_DBCR0,r0
-	lwz	r10,4(r11)
-	addi	r10,r10,1
-	stw	r10,4(r11)
-	li	r11,-1
-	mtspr	SPRN_DBSR,r11	/* clear all pending debug events */
-	blr
-
-	.section .bss
-	.align	4
-	.global global_dbcr0
-global_dbcr0:
-	.space	8*NR_CPUS
-	.previous
-#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
-
-do_work:			/* r10 contains MSR_KERNEL here */
-	andi.	r0,r9,_TIF_NEED_RESCHED
-	beq	do_user_signal
-
-do_resched:			/* r10 contains MSR_KERNEL here */
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_on
-	mfmsr	r10
-#endif
-	ori	r10,r10,MSR_EE
-	SYNC
-	MTMSRD(r10)		/* hard-enable interrupts */
-	bl	schedule
-recheck:
-	/* Note: And we don't tell it we are disabling them again
-	 * neither. Those disable/enable cycles used to peek at
-	 * TI_FLAGS aren't advertised.
-	 */
-	LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
-	SYNC
-	MTMSRD(r10)		/* disable interrupts */
-	lwz	r9,TI_FLAGS(r2)
-	andi.	r0,r9,_TIF_NEED_RESCHED
-	bne-	do_resched
-	andi.	r0,r9,_TIF_USER_WORK_MASK
-	beq	restore_user
-do_user_signal:			/* r10 contains MSR_KERNEL here */
-	ori	r10,r10,MSR_EE
-	SYNC
-	MTMSRD(r10)		/* hard-enable interrupts */
-	/* save r13-r31 in the exception frame, if not already done */
-	lwz	r3,_TRAP(r1)
-	andi.	r0,r3,1
-	beq	2f
-	SAVE_NVGPRS(r1)
-	rlwinm	r3,r3,0,0,30
-	stw	r3,_TRAP(r1)
-2:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	mr	r4,r9
-	bl	do_notify_resume
-	REST_NVGPRS(r1)
-	b	recheck
-
-/*
- * We come here when we are at the end of handling an exception
- * that occurred at a place where taking an exception will lose
- * state information, such as the contents of SRR0 and SRR1.
- */
-nonrecoverable:
-	lis	r10,exc_exit_restart_end@ha
-	addi	r10,r10,exc_exit_restart_end@l
-	cmplw	r12,r10
-#ifdef CONFIG_PPC_BOOK3S_601
-	bgelr
-#else
-	bge	3f
-#endif
-	lis	r11,exc_exit_restart@ha
-	addi	r11,r11,exc_exit_restart@l
-	cmplw	r12,r11
-#ifdef CONFIG_PPC_BOOK3S_601
-	bltlr
-#else
-	blt	3f
-#endif
-	lis	r10,ee_restarts@ha
-	lwz	r12,ee_restarts@l(r10)
-	addi	r12,r12,1
-	stw	r12,ee_restarts@l(r10)
-	mr	r12,r11		/* restart at exc_exit_restart */
-	blr
-3:	/* OK, we can't recover, kill this process */
-	/* but the 601 doesn't implement the RI bit, so assume it's OK */
-	lwz	r3,_TRAP(r1)
-	andi.	r0,r3,1
-	beq	5f
-	SAVE_NVGPRS(r1)
-	rlwinm	r3,r3,0,0,30
-	stw	r3,_TRAP(r1)
-5:	mfspr	r2,SPRN_SPRG_THREAD
-	addi	r2,r2,-THREAD
-	tovirt(r2,r2)			/* set back r2 to current */
-4:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	unrecoverable_exception
-	/* shouldn't return */
-	b	4b
-
-	.section .bss
-	.align	2
-ee_restarts:
-	.space	4
-	.previous
-
-/*
- * PROM code for specific machines follows.  Put it
- * here so it's easy to add arch-specific sections later.
- * -- Cort
- */
-#ifdef CONFIG_PPC_RTAS
-/*
- * On CHRP, the Run-Time Abstraction Services (RTAS) have to be
- * called with the MMU off.
- */
-_GLOBAL(enter_rtas)
-	stwu	r1,-INT_FRAME_SIZE(r1)
-	mflr	r0
-	stw	r0,INT_FRAME_SIZE+4(r1)
-	LOAD_REG_ADDR(r4, rtas)
-	lis	r6,1f@ha	/* physical return address for rtas */
-	addi	r6,r6,1f@l
-	tophys(r6,r6)
-	tophys(r7,r1)
-	lwz	r8,RTASENTRY(r4)
-	lwz	r4,RTASBASE(r4)
-	mfmsr	r9
-	stw	r9,8(r1)
-	LOAD_REG_IMMEDIATE(r0,MSR_KERNEL)
-	SYNC			/* disable interrupts so SRR0/1 */
-	MTMSRD(r0)		/* don't get trashed */
-	li	r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
-	mtlr	r6
-	stw	r7, THREAD + RTAS_SP(r2)
-	mtspr	SPRN_SRR0,r8
-	mtspr	SPRN_SRR1,r9
-	RFI
-1:	tophys(r9,r1)
-	lwz	r8,INT_FRAME_SIZE+4(r9)	/* get return address */
-	lwz	r9,8(r9)	/* original msr value */
-	addi	r1,r1,INT_FRAME_SIZE
-	li	r0,0
-	tophys(r7, r2)
-	stw	r0, THREAD + RTAS_SP(r7)
-	mtspr	SPRN_SRR0,r8
-	mtspr	SPRN_SRR1,r9
-	RFI			/* return to caller */
-
-	.globl	machine_check_in_rtas
-machine_check_in_rtas:
-	twi	31,0,0
-	/* XXX load up BATs and panic */
-
-#endif /* CONFIG_PPC_RTAS */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
deleted file mode 100644
index 3fd3ef352e3fde8f1f83772c6df71e63e21483e6..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/entry_64.S
+++ /dev/null
@@ -1,1354 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  PowerPC version 
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
- *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
- *  Adapted for Power Macintosh by Paul Mackerras.
- *  Low-level exception handlers and MMU support
- *  rewritten by Paul Mackerras.
- *    Copyright (C) 1996 Paul Mackerras.
- *  MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
- *
- *  This file contains the system call entry code, context switch
- *  code, and exception/interrupt return code for PowerPC.
- */
-
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <asm/unistd.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/thread_info.h>
-#include <asm/code-patching-asm.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cputable.h>
-#include <asm/firmware.h>
-#include <asm/bug.h>
-#include <asm/ptrace.h>
-#include <asm/irqflags.h>
-#include <asm/hw_irq.h>
-#include <asm/context_tracking.h>
-#include <asm/tm.h>
-#include <asm/ppc-opcode.h>
-#include <asm/barrier.h>
-#include <asm/export.h>
-#include <asm/asm-compat.h>
-#ifdef CONFIG_PPC_BOOK3S
-#include <asm/exception-64s.h>
-#else
-#include <asm/exception-64e.h>
-#endif
-#include <asm/feature-fixups.h>
-#include <asm/kup.h>
-
-/*
- * System calls.
- */
-	.section	".toc","aw"
-SYS_CALL_TABLE:
-	.tc sys_call_table[TC],sys_call_table
-
-COMPAT_SYS_CALL_TABLE:
-	.tc compat_sys_call_table[TC],compat_sys_call_table
-
-/* This value is used to mark exception frames on the stack. */
-exception_marker:
-	.tc	ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
-
-	.section	".text"
-	.align 7
-
-	.globl system_call_common
-system_call_common:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-BEGIN_FTR_SECTION
-	extrdi.	r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
-	bne	.Ltabort_syscall
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
-#endif
-	mr	r10,r1
-	ld	r1,PACAKSAVE(r13)
-	std	r10,0(r1)
-	std	r11,_NIP(r1)
-	std	r12,_MSR(r1)
-	std	r0,GPR0(r1)
-	std	r10,GPR1(r1)
-#ifdef CONFIG_PPC_FSL_BOOK3E
-START_BTB_FLUSH_SECTION
-	BTB_FLUSH(r10)
-END_BTB_FLUSH_SECTION
-#endif
-	ACCOUNT_CPU_USER_ENTRY(r13, r10, r11)
-	std	r2,GPR2(r1)
-	std	r3,GPR3(r1)
-	mfcr	r2
-	std	r4,GPR4(r1)
-	std	r5,GPR5(r1)
-	std	r6,GPR6(r1)
-	std	r7,GPR7(r1)
-	std	r8,GPR8(r1)
-	li	r11,0
-	std	r11,GPR9(r1)
-	std	r11,GPR10(r1)
-	std	r11,GPR11(r1)
-	std	r11,GPR12(r1)
-	std	r11,_XER(r1)
-	std	r11,_CTR(r1)
-	std	r9,GPR13(r1)
-	mflr	r10
-	/*
-	 * This clears CR0.SO (bit 28), which is the error indication on
-	 * return from this system call.
-	 */
-	rldimi	r2,r11,28,(63-28)
-	li	r11,0xc01
-	std	r10,_LINK(r1)
-	std	r11,_TRAP(r1)
-	std	r3,ORIG_GPR3(r1)
-	std	r2,_CCR(r1)
-	ld	r2,PACATOC(r13)
-	addi	r9,r1,STACK_FRAME_OVERHEAD
-	ld	r11,exception_marker@toc(r2)
-	std	r11,-16(r9)		/* "regshere" marker */
-
-	kuap_check_amr r10, r11
-
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
-BEGIN_FW_FTR_SECTION
-	/* see if there are any DTL entries to process */
-	ld	r10,PACALPPACAPTR(r13)	/* get ptr to VPA */
-	ld	r11,PACA_DTL_RIDX(r13)	/* get log read index */
-	addi	r10,r10,LPPACA_DTLIDX
-	LDX_BE	r10,0,r10		/* get log write index */
-	cmpd	r11,r10
-	beq+	33f
-	bl	accumulate_stolen_time
-	REST_GPR(0,r1)
-	REST_4GPRS(3,r1)
-	REST_2GPRS(7,r1)
-	addi	r9,r1,STACK_FRAME_OVERHEAD
-33:
-END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
-
-	/*
-	 * A syscall should always be called with interrupts enabled
-	 * so we just unconditionally hard-enable here. When some kind
-	 * of irq tracing is used, we additionally check that condition
-	 * is correct
-	 */
-#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
-	lbz	r10,PACAIRQSOFTMASK(r13)
-1:	tdnei	r10,IRQS_ENABLED
-	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
-
-#ifdef CONFIG_PPC_BOOK3E
-	wrteei	1
-#else
-	li	r11,MSR_RI
-	ori	r11,r11,MSR_EE
-	mtmsrd	r11,1
-#endif /* CONFIG_PPC_BOOK3E */
-
-system_call:			/* label this so stack traces look sane */
-	/* We do need to set SOFTE in the stack frame or the return
-	 * from interrupt will be painful
-	 */
-	li	r10,IRQS_ENABLED
-	std	r10,SOFTE(r1)
-
-	ld	r11, PACA_THREAD_INFO(r13)
-	ld	r10,TI_FLAGS(r11)
-	andi.	r11,r10,_TIF_SYSCALL_DOTRACE
-	bne	.Lsyscall_dotrace		/* does not return */
-	cmpldi	0,r0,NR_syscalls
-	bge-	.Lsyscall_enosys
-
-.Lsyscall:
-/*
- * Need to vector to 32 Bit or default sys_call_table here,
- * based on caller's run-mode / personality.
- */
-	ld	r11,SYS_CALL_TABLE@toc(2)
-	andis.	r10,r10,_TIF_32BIT@h
-	beq	15f
-	ld	r11,COMPAT_SYS_CALL_TABLE@toc(2)
-	clrldi	r3,r3,32
-	clrldi	r4,r4,32
-	clrldi	r5,r5,32
-	clrldi	r6,r6,32
-	clrldi	r7,r7,32
-	clrldi	r8,r8,32
-15:
-	slwi	r0,r0,3
-
-	barrier_nospec_asm
-	/*
-	 * Prevent the load of the handler below (based on the user-passed
-	 * system call number) being speculatively executed until the test
-	 * against NR_syscalls and branch to .Lsyscall_enosys above has
-	 * committed.
-	 */
-
-	ldx	r12,r11,r0	/* Fetch system call handler [ptr] */
-	mtctr   r12
-	bctrl			/* Call handler */
-
-	/* syscall_exit can exit to kernel mode, via ret_from_kernel_thread */
-.Lsyscall_exit:
-	std	r3,RESULT(r1)
-
-#ifdef CONFIG_DEBUG_RSEQ
-	/* Check whether the syscall is issued inside a restartable sequence */
-	addi    r3,r1,STACK_FRAME_OVERHEAD
-	bl      rseq_syscall
-	ld	r3,RESULT(r1)
-#endif
-
-	ld	r12, PACA_THREAD_INFO(r13)
-
-	ld	r8,_MSR(r1)
-
-/*
- * This is a few instructions into the actual syscall exit path (which actually
- * starts at .Lsyscall_exit) to cater to kprobe blacklisting and to reduce the
- * number of visible symbols for profiling purposes.
- *
- * We can probe from system_call until this point as MSR_RI is set. But once it
- * is cleared below, we won't be able to take a trap.
- *
- * This is blacklisted from kprobes further below with _ASM_NOKPROBE_SYMBOL().
- */
-system_call_exit:
-	/*
-	 * Disable interrupts so current_thread_info()->flags can't change,
-	 * and so that we don't get interrupted after loading SRR0/1.
-	 *
-	 * Leave MSR_RI enabled for now, because with THREAD_INFO_IN_TASK we
-	 * could fault on the load of the TI_FLAGS below.
-	 */
-#ifdef CONFIG_PPC_BOOK3E
-	wrteei	0
-#else
-	li	r11,MSR_RI
-	mtmsrd	r11,1
-#endif /* CONFIG_PPC_BOOK3E */
-
-	ld	r9,TI_FLAGS(r12)
-	li	r11,-MAX_ERRNO
-	andi.	r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
-	bne-	.Lsyscall_exit_work
-
-	andi.	r0,r8,MSR_FP
-	beq 2f
-#ifdef CONFIG_ALTIVEC
-	andis.	r0,r8,MSR_VEC@h
-	bne	3f
-#endif
-2:	addi    r3,r1,STACK_FRAME_OVERHEAD
-	bl	restore_math
-	ld	r8,_MSR(r1)
-	ld	r3,RESULT(r1)
-	li	r11,-MAX_ERRNO
-
-3:	cmpld	r3,r11
-	ld	r5,_CCR(r1)
-	bge-	.Lsyscall_error
-.Lsyscall_error_cont:
-	ld	r7,_NIP(r1)
-BEGIN_FTR_SECTION
-	stdcx.	r0,0,r1			/* to clear the reservation */
-END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
-	andi.	r6,r8,MSR_PR
-	ld	r4,_LINK(r1)
-
-	kuap_check_amr r10, r11
-
-#ifdef CONFIG_PPC_BOOK3S
-	/*
-	 * Clear MSR_RI, MSR_EE is already and remains disabled. We could do
-	 * this later, but testing shows that doing it here causes less slow
-	 * down than doing it closer to the rfid.
-	 */
-	li	r11,0
-	mtmsrd	r11,1
-#endif
-
-	beq-	1f
-	ACCOUNT_CPU_USER_EXIT(r13, r11, r12)
-
-BEGIN_FTR_SECTION
-	HMT_MEDIUM_LOW
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	std	r8, PACATMSCRATCH(r13)
-#endif
-
-	/*
-	 * We don't need to restore AMR on the way back to userspace for KUAP.
-	 * The value of AMR only matters while we're in the kernel.
-	 */
-	ld	r13,GPR13(r1)	/* only restore r13 if returning to usermode */
-	ld	r2,GPR2(r1)
-	ld	r1,GPR1(r1)
-	mtlr	r4
-	mtcr	r5
-	mtspr	SPRN_SRR0,r7
-	mtspr	SPRN_SRR1,r8
-	RFI_TO_USER
-	b	.	/* prevent speculative execution */
-
-1:	/* exit to kernel */
-	kuap_restore_amr r2
-
-	ld	r2,GPR2(r1)
-	ld	r1,GPR1(r1)
-	mtlr	r4
-	mtcr	r5
-	mtspr	SPRN_SRR0,r7
-	mtspr	SPRN_SRR1,r8
-	RFI_TO_KERNEL
-	b	.	/* prevent speculative execution */
-
-.Lsyscall_error:
-	oris	r5,r5,0x1000	/* Set SO bit in CR */
-	neg	r3,r3
-	std	r5,_CCR(r1)
-	b	.Lsyscall_error_cont
-
-/* Traced system call support */
-.Lsyscall_dotrace:
-	bl	save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	do_syscall_trace_enter
-
-	/*
-	 * We use the return value of do_syscall_trace_enter() as the syscall
-	 * number. If the syscall was rejected for any reason do_syscall_trace_enter()
-	 * returns an invalid syscall number and the test below against
-	 * NR_syscalls will fail.
-	 */
-	mr	r0,r3
-
-	/* Restore argument registers just clobbered and/or possibly changed. */
-	ld	r3,GPR3(r1)
-	ld	r4,GPR4(r1)
-	ld	r5,GPR5(r1)
-	ld	r6,GPR6(r1)
-	ld	r7,GPR7(r1)
-	ld	r8,GPR8(r1)
-
-	/* Repopulate r9 and r10 for the syscall path */
-	addi	r9,r1,STACK_FRAME_OVERHEAD
-	ld	r10, PACA_THREAD_INFO(r13)
-	ld	r10,TI_FLAGS(r10)
-
-	cmpldi	r0,NR_syscalls
-	blt+	.Lsyscall
-
-	/* Return code is already in r3 thanks to do_syscall_trace_enter() */
-	b	.Lsyscall_exit
-
-
-.Lsyscall_enosys:
-	li	r3,-ENOSYS
-	b	.Lsyscall_exit
-	
-.Lsyscall_exit_work:
-	/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
-	 If TIF_NOERROR is set, just save r3 as it is. */
-
-	andi.	r0,r9,_TIF_RESTOREALL
-	beq+	0f
-	REST_NVGPRS(r1)
-	b	2f
-0:	cmpld	r3,r11		/* r11 is -MAX_ERRNO */
-	blt+	1f
-	andi.	r0,r9,_TIF_NOERROR
-	bne-	1f
-	ld	r5,_CCR(r1)
-	neg	r3,r3
-	oris	r5,r5,0x1000	/* Set SO bit in CR */
-	std	r5,_CCR(r1)
-1:	std	r3,GPR3(r1)
-2:	andi.	r0,r9,(_TIF_PERSYSCALL_MASK)
-	beq	4f
-
-	/* Clear per-syscall TIF flags if any are set.  */
-
-	li	r11,_TIF_PERSYSCALL_MASK
-	addi	r12,r12,TI_FLAGS
-3:	ldarx	r10,0,r12
-	andc	r10,r10,r11
-	stdcx.	r10,0,r12
-	bne-	3b
-	subi	r12,r12,TI_FLAGS
-
-4:	/* Anything else left to do? */
-BEGIN_FTR_SECTION
-	lis	r3,DEFAULT_PPR@highest	/* Set default PPR */
-	sldi	r3,r3,32	/* bits 11-13 are used for ppr */
-	std	r3,_PPR(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
-	andi.	r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
-	beq	ret_from_except_lite
-
-	/* Re-enable interrupts */
-#ifdef CONFIG_PPC_BOOK3E
-	wrteei	1
-#else
-	li	r10,MSR_RI
-	ori	r10,r10,MSR_EE
-	mtmsrd	r10,1
-#endif /* CONFIG_PPC_BOOK3E */
-
-	bl	save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	do_syscall_trace_leave
-	b	ret_from_except
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-.Ltabort_syscall:
-	/* Firstly we need to enable TM in the kernel */
-	mfmsr	r10
-	li	r9, 1
-	rldimi	r10, r9, MSR_TM_LG, 63-MSR_TM_LG
-	mtmsrd	r10, 0
-
-	/* tabort, this dooms the transaction, nothing else */
-	li	r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
-	TABORT(R9)
-
-	/*
-	 * Return directly to userspace. We have corrupted user register state,
-	 * but userspace will never see that register state. Execution will
-	 * resume after the tbegin of the aborted transaction with the
-	 * checkpointed register state.
-	 */
-	li	r9, MSR_RI
-	andc	r10, r10, r9
-	mtmsrd	r10, 1
-	mtspr	SPRN_SRR0, r11
-	mtspr	SPRN_SRR1, r12
-	RFI_TO_USER
-	b	.	/* prevent speculative execution */
-#endif
-_ASM_NOKPROBE_SYMBOL(system_call_common);
-_ASM_NOKPROBE_SYMBOL(system_call_exit);
-
-/* Save non-volatile GPRs, if not already saved. */
-_GLOBAL(save_nvgprs)
-	ld	r11,_TRAP(r1)
-	andi.	r0,r11,1
-	beqlr-
-	SAVE_NVGPRS(r1)
-	clrrdi	r0,r11,1
-	std	r0,_TRAP(r1)
-	blr
-_ASM_NOKPROBE_SYMBOL(save_nvgprs);
-
-	
-/*
- * The sigsuspend and rt_sigsuspend system calls can call do_signal
- * and thus put the process into the stopped state where we might
- * want to examine its user state with ptrace.  Therefore we need
- * to save all the nonvolatile registers (r14 - r31) before calling
- * the C code.  Similarly, fork, vfork and clone need the full
- * register state on the stack so that it can be copied to the child.
- */
-
-_GLOBAL(ppc_fork)
-	bl	save_nvgprs
-	bl	sys_fork
-	b	.Lsyscall_exit
-
-_GLOBAL(ppc_vfork)
-	bl	save_nvgprs
-	bl	sys_vfork
-	b	.Lsyscall_exit
-
-_GLOBAL(ppc_clone)
-	bl	save_nvgprs
-	bl	sys_clone
-	b	.Lsyscall_exit
-
-_GLOBAL(ppc_clone3)
-       bl      save_nvgprs
-       bl      sys_clone3
-       b       .Lsyscall_exit
-
-_GLOBAL(ppc32_swapcontext)
-	bl	save_nvgprs
-	bl	compat_sys_swapcontext
-	b	.Lsyscall_exit
-
-_GLOBAL(ppc64_swapcontext)
-	bl	save_nvgprs
-	bl	sys_swapcontext
-	b	.Lsyscall_exit
-
-_GLOBAL(ppc_switch_endian)
-	bl	save_nvgprs
-	bl	sys_switch_endian
-	b	.Lsyscall_exit
-
-_GLOBAL(ret_from_fork)
-	bl	schedule_tail
-	REST_NVGPRS(r1)
-	li	r3,0
-	b	.Lsyscall_exit
-
-_GLOBAL(ret_from_kernel_thread)
-	bl	schedule_tail
-	REST_NVGPRS(r1)
-	mtlr	r14
-	mr	r3,r15
-#ifdef PPC64_ELF_ABI_v2
-	mr	r12,r14
-#endif
-	blrl
-	li	r3,0
-	b	.Lsyscall_exit
-
-#ifdef CONFIG_PPC_BOOK3S_64
-
-#define FLUSH_COUNT_CACHE	\
-1:	nop;			\
-	patch_site 1b, patch__call_flush_count_cache
-
-
-#define BCCTR_FLUSH	.long 0x4c400420
-
-.macro nops number
-	.rept \number
-	nop
-	.endr
-.endm
-
-.balign 32
-.global flush_count_cache
-flush_count_cache:
-	/* Save LR into r9 */
-	mflr	r9
-
-	// Flush the link stack
-	.rept 64
-	bl	.+4
-	.endr
-	b	1f
-	nops	6
-
-	.balign 32
-	/* Restore LR */
-1:	mtlr	r9
-
-	// If we're just flushing the link stack, return here
-3:	nop
-	patch_site 3b patch__flush_link_stack_return
-
-	li	r9,0x7fff
-	mtctr	r9
-
-	BCCTR_FLUSH
-
-2:	nop
-	patch_site 2b patch__flush_count_cache_return
-
-	nops	3
-
-	.rept 278
-	.balign 32
-	BCCTR_FLUSH
-	nops	7
-	.endr
-
-	blr
-#else
-#define FLUSH_COUNT_CACHE
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-/*
- * This routine switches between two different tasks.  The process
- * state of one is saved on its kernel stack.  Then the state
- * of the other is restored from its kernel stack.  The memory
- * management hardware is updated to the second process's state.
- * Finally, we can return to the second process, via ret_from_except.
- * On entry, r3 points to the THREAD for the current task, r4
- * points to the THREAD for the new task.
- *
- * Note: there are two ways to get to the "going out" portion
- * of this code; either by coming in via the entry (_switch)
- * or via "fork" which must set up an environment equivalent
- * to the "_switch" path.  If you change this you'll have to change
- * the fork code also.
- *
- * The code which creates the new task context is in 'copy_thread'
- * in arch/powerpc/kernel/process.c 
- */
-	.align	7
-_GLOBAL(_switch)
-	mflr	r0
-	std	r0,16(r1)
-	stdu	r1,-SWITCH_FRAME_SIZE(r1)
-	/* r3-r13 are caller saved -- Cort */
-	SAVE_8GPRS(14, r1)
-	SAVE_10GPRS(22, r1)
-	std	r0,_NIP(r1)	/* Return to switch caller */
-	mfcr	r23
-	std	r23,_CCR(r1)
-	std	r1,KSP(r3)	/* Set old stack pointer */
-
-	kuap_check_amr r9, r10
-
-	FLUSH_COUNT_CACHE
-
-	/*
-	 * On SMP kernels, care must be taken because a task may be
-	 * scheduled off CPUx and on to CPUy. Memory ordering must be
-	 * considered.
-	 *
-	 * Cacheable stores on CPUx will be visible when the task is
-	 * scheduled on CPUy by virtue of the core scheduler barriers
-	 * (see "Notes on Program-Order guarantees on SMP systems." in
-	 * kernel/sched/core.c).
-	 *
-	 * Uncacheable stores in the case of involuntary preemption must
-	 * be taken care of. The smp_mb__before_spin_lock() in __schedule()
-	 * is implemented as hwsync on powerpc, which orders MMIO too. So
-	 * long as there is an hwsync in the context switch path, it will
-	 * be executed on the source CPU after the task has performed
-	 * all MMIO ops on that CPU, and on the destination CPU before the
-	 * task performs any MMIO ops there.
-	 */
-
-	/*
-	 * The kernel context switch path must contain a spin_lock,
-	 * which contains larx/stcx, which will clear any reservation
-	 * of the task being switched.
-	 */
-#ifdef CONFIG_PPC_BOOK3S
-/* Cancel all explict user streams as they will have no use after context
- * switch and will stop the HW from creating streams itself
- */
-	DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6)
-#endif
-
-	addi	r6,r4,-THREAD	/* Convert THREAD to 'current' */
-	std	r6,PACACURRENT(r13)	/* Set new 'current' */
-#if defined(CONFIG_STACKPROTECTOR)
-	ld	r6, TASK_CANARY(r6)
-	std	r6, PACA_CANARY(r13)
-#endif
-
-	ld	r8,KSP(r4)	/* new stack pointer */
-#ifdef CONFIG_PPC_BOOK3S_64
-BEGIN_MMU_FTR_SECTION
-	b	2f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
-BEGIN_FTR_SECTION
-	clrrdi	r6,r8,28	/* get its ESID */
-	clrrdi	r9,r1,28	/* get current sp ESID */
-FTR_SECTION_ELSE
-	clrrdi	r6,r8,40	/* get its 1T ESID */
-	clrrdi	r9,r1,40	/* get current sp 1T ESID */
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_1T_SEGMENT)
-	clrldi.	r0,r6,2		/* is new ESID c00000000? */
-	cmpd	cr1,r6,r9	/* or is new ESID the same as current ESID? */
-	cror	eq,4*cr1+eq,eq
-	beq	2f		/* if yes, don't slbie it */
-
-	/* Bolt in the new stack SLB entry */
-	ld	r7,KSP_VSID(r4)	/* Get new stack's VSID */
-	oris	r0,r6,(SLB_ESID_V)@h
-	ori	r0,r0,(SLB_NUM_BOLTED-1)@l
-BEGIN_FTR_SECTION
-	li	r9,MMU_SEGSIZE_1T	/* insert B field */
-	oris	r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h
-	rldimi	r7,r9,SLB_VSID_SSIZE_SHIFT,0
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-
-	/* Update the last bolted SLB.  No write barriers are needed
-	 * here, provided we only update the current CPU's SLB shadow
-	 * buffer.
-	 */
-	ld	r9,PACA_SLBSHADOWPTR(r13)
-	li	r12,0
-	std	r12,SLBSHADOW_STACKESID(r9)	/* Clear ESID */
-	li	r12,SLBSHADOW_STACKVSID
-	STDX_BE	r7,r12,r9			/* Save VSID */
-	li	r12,SLBSHADOW_STACKESID
-	STDX_BE	r0,r12,r9			/* Save ESID */
-
-	/* No need to check for MMU_FTR_NO_SLBIE_B here, since when
-	 * we have 1TB segments, the only CPUs known to have the errata
-	 * only support less than 1TB of system memory and we'll never
-	 * actually hit this code path.
-	 */
-
-	isync
-	slbie	r6
-BEGIN_FTR_SECTION
-	slbie	r6		/* Workaround POWER5 < DD2.1 issue */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
-	slbmte	r7,r0
-	isync
-2:
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-	clrrdi	r7, r8, THREAD_SHIFT	/* base of new stack */
-	/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
-	   because we don't need to leave the 288-byte ABI gap at the
-	   top of the kernel stack. */
-	addi	r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
-
-	/*
-	 * PMU interrupts in radix may come in here. They will use r1, not
-	 * PACAKSAVE, so this stack switch will not cause a problem. They
-	 * will store to the process stack, which may then be migrated to
-	 * another CPU. However the rq lock release on this CPU paired with
-	 * the rq lock acquire on the new CPU before the stack becomes
-	 * active on the new CPU, will order those stores.
-	 */
-	mr	r1,r8		/* start using new stack pointer */
-	std	r7,PACAKSAVE(r13)
-
-	ld	r6,_CCR(r1)
-	mtcrf	0xFF,r6
-
-	/* r3-r13 are destroyed -- Cort */
-	REST_8GPRS(14, r1)
-	REST_10GPRS(22, r1)
-
-	/* convert old thread to its task_struct for return value */
-	addi	r3,r3,-THREAD
-	ld	r7,_NIP(r1)	/* Return to _switch caller in new task */
-	mtlr	r7
-	addi	r1,r1,SWITCH_FRAME_SIZE
-	blr
-
-	.align	7
-_GLOBAL(ret_from_except)
-	ld	r11,_TRAP(r1)
-	andi.	r0,r11,1
-	bne	ret_from_except_lite
-	REST_NVGPRS(r1)
-
-_GLOBAL(ret_from_except_lite)
-	/*
-	 * Disable interrupts so that current_thread_info()->flags
-	 * can't change between when we test it and when we return
-	 * from the interrupt.
-	 */
-#ifdef CONFIG_PPC_BOOK3E
-	wrteei	0
-#else
-	li	r10,MSR_RI
-	mtmsrd	r10,1		  /* Update machine state */
-#endif /* CONFIG_PPC_BOOK3E */
-
-	ld	r9, PACA_THREAD_INFO(r13)
-	ld	r3,_MSR(r1)
-#ifdef CONFIG_PPC_BOOK3E
-	ld	r10,PACACURRENT(r13)
-#endif /* CONFIG_PPC_BOOK3E */
-	ld	r4,TI_FLAGS(r9)
-	andi.	r3,r3,MSR_PR
-	beq	resume_kernel
-#ifdef CONFIG_PPC_BOOK3E
-	lwz	r3,(THREAD+THREAD_DBCR0)(r10)
-#endif /* CONFIG_PPC_BOOK3E */
-
-	/* Check current_thread_info()->flags */
-	andi.	r0,r4,_TIF_USER_WORK_MASK
-	bne	1f
-#ifdef CONFIG_PPC_BOOK3E
-	/*
-	 * Check to see if the dbcr0 register is set up to debug.
-	 * Use the internal debug mode bit to do this.
-	 */
-	andis.	r0,r3,DBCR0_IDM@h
-	beq	restore
-	mfmsr	r0
-	rlwinm	r0,r0,0,~MSR_DE	/* Clear MSR.DE */
-	mtmsr	r0
-	mtspr	SPRN_DBCR0,r3
-	li	r10, -1
-	mtspr	SPRN_DBSR,r10
-	b	restore
-#else
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	restore_math
-	b	restore
-#endif
-1:	andi.	r0,r4,_TIF_NEED_RESCHED
-	beq	2f
-	bl	restore_interrupts
-	SCHEDULE_USER
-	b	ret_from_except_lite
-2:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	andi.	r0,r4,_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM
-	bne	3f		/* only restore TM if nothing else to do */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	restore_tm_state
-	b	restore
-3:
-#endif
-	bl	save_nvgprs
-	/*
-	 * Use a non volatile GPR to save and restore our thread_info flags
-	 * across the call to restore_interrupts.
-	 */
-	mr	r30,r4
-	bl	restore_interrupts
-	mr	r4,r30
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	do_notify_resume
-	b	ret_from_except
-
-resume_kernel:
-	/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
-	andis.	r8,r4,_TIF_EMULATE_STACK_STORE@h
-	beq+	1f
-
-	addi	r8,r1,INT_FRAME_SIZE	/* Get the kprobed function entry */
-
-	ld	r3,GPR1(r1)
-	subi	r3,r3,INT_FRAME_SIZE	/* dst: Allocate a trampoline exception frame */
-	mr	r4,r1			/* src:  current exception frame */
-	mr	r1,r3			/* Reroute the trampoline frame to r1 */
-
-	/* Copy from the original to the trampoline. */
-	li	r5,INT_FRAME_SIZE/8	/* size: INT_FRAME_SIZE */
-	li	r6,0			/* start offset: 0 */
-	mtctr	r5
-2:	ldx	r0,r6,r4
-	stdx	r0,r6,r3
-	addi	r6,r6,8
-	bdnz	2b
-
-	/* Do real store operation to complete stdu */
-	ld	r5,GPR1(r1)
-	std	r8,0(r5)
-
-	/* Clear _TIF_EMULATE_STACK_STORE flag */
-	lis	r11,_TIF_EMULATE_STACK_STORE@h
-	addi	r5,r9,TI_FLAGS
-0:	ldarx	r4,0,r5
-	andc	r4,r4,r11
-	stdcx.	r4,0,r5
-	bne-	0b
-1:
-
-#ifdef CONFIG_PREEMPT
-	/* Check if we need to preempt */
-	andi.	r0,r4,_TIF_NEED_RESCHED
-	beq+	restore
-	/* Check that preempt_count() == 0 and interrupts are enabled */
-	lwz	r8,TI_PREEMPT(r9)
-	cmpwi	cr0,r8,0
-	bne	restore
-	ld	r0,SOFTE(r1)
-	andi.	r0,r0,IRQS_DISABLED
-	bne	restore
-
-	/*
-	 * Here we are preempting the current task. We want to make
-	 * sure we are soft-disabled first and reconcile irq state.
-	 */
-	RECONCILE_IRQ_STATE(r3,r4)
-	bl	preempt_schedule_irq
-
-	/*
-	 * arch_local_irq_restore() from preempt_schedule_irq above may
-	 * enable hard interrupt but we really should disable interrupts
-	 * when we return from the interrupt, and so that we don't get
-	 * interrupted after loading SRR0/1.
-	 */
-#ifdef CONFIG_PPC_BOOK3E
-	wrteei	0
-#else
-	li	r10,MSR_RI
-	mtmsrd	r10,1		  /* Update machine state */
-#endif /* CONFIG_PPC_BOOK3E */
-#endif /* CONFIG_PREEMPT */
-
-	.globl	fast_exc_return_irq
-fast_exc_return_irq:
-restore:
-	/*
-	 * This is the main kernel exit path. First we check if we
-	 * are about to re-enable interrupts
-	 */
-	ld	r5,SOFTE(r1)
-	lbz	r6,PACAIRQSOFTMASK(r13)
-	andi.	r5,r5,IRQS_DISABLED
-	bne	.Lrestore_irq_off
-
-	/* We are enabling, were we already enabled ? Yes, just return */
-	andi.	r6,r6,IRQS_DISABLED
-	beq	cr0,.Ldo_restore
-
-	/*
-	 * We are about to soft-enable interrupts (we are hard disabled
-	 * at this point). We check if there's anything that needs to
-	 * be replayed first.
-	 */
-	lbz	r0,PACAIRQHAPPENED(r13)
-	cmpwi	cr0,r0,0
-	bne-	.Lrestore_check_irq_replay
-
-	/*
-	 * Get here when nothing happened while soft-disabled, just
-	 * soft-enable and move-on. We will hard-enable as a side
-	 * effect of rfi
-	 */
-.Lrestore_no_replay:
-	TRACE_ENABLE_INTS
-	li	r0,IRQS_ENABLED
-	stb	r0,PACAIRQSOFTMASK(r13);
-
-	/*
-	 * Final return path. BookE is handled in a different file
-	 */
-.Ldo_restore:
-#ifdef CONFIG_PPC_BOOK3E
-	b	exception_return_book3e
-#else
-	/*
-	 * Clear the reservation. If we know the CPU tracks the address of
-	 * the reservation then we can potentially save some cycles and use
-	 * a larx. On POWER6 and POWER7 this is significantly faster.
-	 */
-BEGIN_FTR_SECTION
-	stdcx.	r0,0,r1		/* to clear the reservation */
-FTR_SECTION_ELSE
-	ldarx	r4,0,r1
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
-
-	/*
-	 * Some code path such as load_up_fpu or altivec return directly
-	 * here. They run entirely hard disabled and do not alter the
-	 * interrupt state. They also don't use lwarx/stwcx. and thus
-	 * are known not to leave dangling reservations.
-	 */
-	.globl	fast_exception_return
-fast_exception_return:
-	ld	r3,_MSR(r1)
-	ld	r4,_CTR(r1)
-	ld	r0,_LINK(r1)
-	mtctr	r4
-	mtlr	r0
-	ld	r4,_XER(r1)
-	mtspr	SPRN_XER,r4
-
-	kuap_check_amr r5, r6
-
-	REST_8GPRS(5, r1)
-
-	andi.	r0,r3,MSR_RI
-	beq-	.Lunrecov_restore
-
-	/*
-	 * Clear RI before restoring r13.  If we are returning to
-	 * userspace and we take an exception after restoring r13,
-	 * we end up corrupting the userspace r13 value.
-	 */
-	li	r4,0
-	mtmsrd	r4,1
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	/* TM debug */
-	std	r3, PACATMSCRATCH(r13) /* Stash returned-to MSR */
-#endif
-	/*
-	 * r13 is our per cpu area, only restore it if we are returning to
-	 * userspace the value stored in the stack frame may belong to
-	 * another CPU.
-	 */
-	andi.	r0,r3,MSR_PR
-	beq	1f
-BEGIN_FTR_SECTION
-	/* Restore PPR */
-	ld	r2,_PPR(r1)
-	mtspr	SPRN_PPR,r2
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-	ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
-	REST_GPR(13, r1)
-
-	/*
-	 * We don't need to restore AMR on the way back to userspace for KUAP.
-	 * The value of AMR only matters while we're in the kernel.
-	 */
-	mtspr	SPRN_SRR1,r3
-
-	ld	r2,_CCR(r1)
-	mtcrf	0xFF,r2
-	ld	r2,_NIP(r1)
-	mtspr	SPRN_SRR0,r2
-
-	ld	r0,GPR0(r1)
-	ld	r2,GPR2(r1)
-	ld	r3,GPR3(r1)
-	ld	r4,GPR4(r1)
-	ld	r1,GPR1(r1)
-	RFI_TO_USER
-	b	.	/* prevent speculative execution */
-
-1:	mtspr	SPRN_SRR1,r3
-
-	ld	r2,_CCR(r1)
-	mtcrf	0xFF,r2
-	ld	r2,_NIP(r1)
-	mtspr	SPRN_SRR0,r2
-
-	/*
-	 * Leaving a stale exception_marker on the stack can confuse
-	 * the reliable stack unwinder later on. Clear it.
-	 */
-	li	r2,0
-	std	r2,STACK_FRAME_OVERHEAD-16(r1)
-
-	ld	r0,GPR0(r1)
-	ld	r2,GPR2(r1)
-	ld	r3,GPR3(r1)
-
-	kuap_restore_amr r4
-
-	ld	r4,GPR4(r1)
-	ld	r1,GPR1(r1)
-	RFI_TO_KERNEL
-	b	.	/* prevent speculative execution */
-
-#endif /* CONFIG_PPC_BOOK3E */
-
-	/*
-	 * We are returning to a context with interrupts soft disabled.
-	 *
-	 * However, we may also about to hard enable, so we need to
-	 * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
-	 * or that bit can get out of sync and bad things will happen
-	 */
-.Lrestore_irq_off:
-	ld	r3,_MSR(r1)
-	lbz	r7,PACAIRQHAPPENED(r13)
-	andi.	r0,r3,MSR_EE
-	beq	1f
-	rlwinm	r7,r7,0,~PACA_IRQ_HARD_DIS
-	stb	r7,PACAIRQHAPPENED(r13)
-1:
-#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
-	/* The interrupt should not have soft enabled. */
-	lbz	r7,PACAIRQSOFTMASK(r13)
-1:	tdeqi	r7,IRQS_ENABLED
-	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
-	b	.Ldo_restore
-
-	/*
-	 * Something did happen, check if a re-emit is needed
-	 * (this also clears paca->irq_happened)
-	 */
-.Lrestore_check_irq_replay:
-	/* XXX: We could implement a fast path here where we check
-	 * for irq_happened being just 0x01, in which case we can
-	 * clear it and return. That means that we would potentially
-	 * miss a decrementer having wrapped all the way around.
-	 *
-	 * Still, this might be useful for things like hash_page
-	 */
-	bl	__check_irq_replay
-	cmpwi	cr0,r3,0
-	beq	.Lrestore_no_replay
- 
-	/*
-	 * We need to re-emit an interrupt. We do so by re-using our
-	 * existing exception frame. We first change the trap value,
-	 * but we need to ensure we preserve the low nibble of it
-	 */
-	ld	r4,_TRAP(r1)
-	clrldi	r4,r4,60
-	or	r4,r4,r3
-	std	r4,_TRAP(r1)
-
-	/*
-	 * PACA_IRQ_HARD_DIS won't always be set here, so set it now
-	 * to reconcile the IRQ state. Tracing is already accounted for.
-	 */
-	lbz	r4,PACAIRQHAPPENED(r13)
-	ori	r4,r4,PACA_IRQ_HARD_DIS
-	stb	r4,PACAIRQHAPPENED(r13)
-
-	/*
-	 * Then find the right handler and call it. Interrupts are
-	 * still soft-disabled and we keep them that way.
-	*/
-	cmpwi	cr0,r3,0x500
-	bne	1f
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
- 	bl	do_IRQ
-	b	ret_from_except
-1:	cmpwi	cr0,r3,0xf00
-	bne	1f
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	bl	performance_monitor_exception
-	b	ret_from_except
-1:	cmpwi	cr0,r3,0xe60
-	bne	1f
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	bl	handle_hmi_exception
-	b	ret_from_except
-1:	cmpwi	cr0,r3,0x900
-	bne	1f
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	bl	timer_interrupt
-	b	ret_from_except
-#ifdef CONFIG_PPC_DOORBELL
-1:
-#ifdef CONFIG_PPC_BOOK3E
-	cmpwi	cr0,r3,0x280
-#else
-	cmpwi	cr0,r3,0xa00
-#endif /* CONFIG_PPC_BOOK3E */
-	bne	1f
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	bl	doorbell_exception
-#endif /* CONFIG_PPC_DOORBELL */
-1:	b	ret_from_except /* What else to do here ? */
- 
-.Lunrecov_restore:
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	unrecoverable_exception
-	b	.Lunrecov_restore
-
-_ASM_NOKPROBE_SYMBOL(ret_from_except);
-_ASM_NOKPROBE_SYMBOL(ret_from_except_lite);
-_ASM_NOKPROBE_SYMBOL(resume_kernel);
-_ASM_NOKPROBE_SYMBOL(fast_exc_return_irq);
-_ASM_NOKPROBE_SYMBOL(restore);
-_ASM_NOKPROBE_SYMBOL(fast_exception_return);
-
-
-#ifdef CONFIG_PPC_RTAS
-/*
- * On CHRP, the Run-Time Abstraction Services (RTAS) have to be
- * called with the MMU off.
- *
- * In addition, we need to be in 32b mode, at least for now.
- * 
- * Note: r3 is an input parameter to rtas, so don't trash it...
- */
-_GLOBAL(enter_rtas)
-	mflr	r0
-	std	r0,16(r1)
-        stdu	r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space. */
-
-	/* Because RTAS is running in 32b mode, it clobbers the high order half
-	 * of all registers that it saves.  We therefore save those registers
-	 * RTAS might touch to the stack.  (r0, r3-r13 are caller saved)
-   	 */
-	SAVE_GPR(2, r1)			/* Save the TOC */
-	SAVE_GPR(13, r1)		/* Save paca */
-	SAVE_8GPRS(14, r1)		/* Save the non-volatiles */
-	SAVE_10GPRS(22, r1)		/* ditto */
-
-	mfcr	r4
-	std	r4,_CCR(r1)
-	mfctr	r5
-	std	r5,_CTR(r1)
-	mfspr	r6,SPRN_XER
-	std	r6,_XER(r1)
-	mfdar	r7
-	std	r7,_DAR(r1)
-	mfdsisr	r8
-	std	r8,_DSISR(r1)
-
-	/* Temporary workaround to clear CR until RTAS can be modified to
-	 * ignore all bits.
-	 */
-	li	r0,0
-	mtcr	r0
-
-#ifdef CONFIG_BUG
-	/* There is no way it is acceptable to get here with interrupts enabled,
-	 * check it with the asm equivalent of WARN_ON
-	 */
-	lbz	r0,PACAIRQSOFTMASK(r13)
-1:	tdeqi	r0,IRQS_ENABLED
-	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
-
-	/* Hard-disable interrupts */
-	mfmsr	r6
-	rldicl	r7,r6,48,1
-	rotldi	r7,r7,16
-	mtmsrd	r7,1
-
-	/* Unfortunately, the stack pointer and the MSR are also clobbered,
-	 * so they are saved in the PACA which allows us to restore
-	 * our original state after RTAS returns.
-         */
-	std	r1,PACAR1(r13)
-        std	r6,PACASAVEDMSR(r13)
-
-	/* Setup our real return addr */	
-	LOAD_REG_ADDR(r4,rtas_return_loc)
-	clrldi	r4,r4,2			/* convert to realmode address */
-       	mtlr	r4
-
-	li	r0,0
-	ori	r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI
-	andc	r0,r6,r0
-	
-        li      r9,1
-        rldicr  r9,r9,MSR_SF_LG,(63-MSR_SF_LG)
-	ori	r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE
-	andc	r6,r0,r9
-
-__enter_rtas:
-	sync				/* disable interrupts so SRR0/1 */
-	mtmsrd	r0			/* don't get trashed */
-
-	LOAD_REG_ADDR(r4, rtas)
-	ld	r5,RTASENTRY(r4)	/* get the rtas->entry value */
-	ld	r4,RTASBASE(r4)		/* get the rtas->base value */
-	
-	mtspr	SPRN_SRR0,r5
-	mtspr	SPRN_SRR1,r6
-	RFI_TO_KERNEL
-	b	.	/* prevent speculative execution */
-
-rtas_return_loc:
-	FIXUP_ENDIAN
-
-	/*
-	 * Clear RI and set SF before anything.
-	 */
-	mfmsr   r6
-	li	r0,MSR_RI
-	andc	r6,r6,r0
-	sldi	r0,r0,(MSR_SF_LG - MSR_RI_LG)
-	or	r6,r6,r0
-	sync
-	mtmsrd  r6
-
-	/* relocation is off at this point */
-	GET_PACA(r4)
-	clrldi	r4,r4,2			/* convert to realmode address */
-
-	bcl	20,31,$+4
-0:	mflr	r3
-	ld	r3,(1f-0b)(r3)		/* get &rtas_restore_regs */
-
-        ld	r1,PACAR1(r4)           /* Restore our SP */
-        ld	r4,PACASAVEDMSR(r4)     /* Restore our MSR */
-
-	mtspr	SPRN_SRR0,r3
-	mtspr	SPRN_SRR1,r4
-	RFI_TO_KERNEL
-	b	.	/* prevent speculative execution */
-_ASM_NOKPROBE_SYMBOL(__enter_rtas)
-_ASM_NOKPROBE_SYMBOL(rtas_return_loc)
-
-	.align	3
-1:	.8byte	rtas_restore_regs
-
-rtas_restore_regs:
-	/* relocation is on at this point */
-	REST_GPR(2, r1)			/* Restore the TOC */
-	REST_GPR(13, r1)		/* Restore paca */
-	REST_8GPRS(14, r1)		/* Restore the non-volatiles */
-	REST_10GPRS(22, r1)		/* ditto */
-
-	GET_PACA(r13)
-
-	ld	r4,_CCR(r1)
-	mtcr	r4
-	ld	r5,_CTR(r1)
-	mtctr	r5
-	ld	r6,_XER(r1)
-	mtspr	SPRN_XER,r6
-	ld	r7,_DAR(r1)
-	mtdar	r7
-	ld	r8,_DSISR(r1)
-	mtdsisr	r8
-
-        addi	r1,r1,SWITCH_FRAME_SIZE	/* Unstack our frame */
-	ld	r0,16(r1)		/* get return address */
-
-	mtlr    r0
-        blr				/* return to caller */
-
-#endif /* CONFIG_PPC_RTAS */
-
-_GLOBAL(enter_prom)
-	mflr	r0
-	std	r0,16(r1)
-        stdu	r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */
-
-	/* Because PROM is running in 32b mode, it clobbers the high order half
-	 * of all registers that it saves.  We therefore save those registers
-	 * PROM might touch to the stack.  (r0, r3-r13 are caller saved)
-   	 */
-	SAVE_GPR(2, r1)
-	SAVE_GPR(13, r1)
-	SAVE_8GPRS(14, r1)
-	SAVE_10GPRS(22, r1)
-	mfcr	r10
-	mfmsr	r11
-	std	r10,_CCR(r1)
-	std	r11,_MSR(r1)
-
-	/* Put PROM address in SRR0 */
-	mtsrr0	r4
-
-	/* Setup our trampoline return addr in LR */
-	bcl	20,31,$+4
-0:	mflr	r4
-	addi	r4,r4,(1f - 0b)
-       	mtlr	r4
-
-	/* Prepare a 32-bit mode big endian MSR
-	 */
-#ifdef CONFIG_PPC_BOOK3E
-	rlwinm	r11,r11,0,1,31
-	mtsrr1	r11
-	rfi
-#else /* CONFIG_PPC_BOOK3E */
-	LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
-	andc	r11,r11,r12
-	mtsrr1	r11
-	RFI_TO_KERNEL
-#endif /* CONFIG_PPC_BOOK3E */
-
-1:	/* Return from OF */
-	FIXUP_ENDIAN
-
-	/* Just make sure that r1 top 32 bits didn't get
-	 * corrupt by OF
-	 */
-	rldicl	r1,r1,0,32
-
-	/* Restore the MSR (back to 64 bits) */
-	ld	r0,_MSR(r1)
-	MTMSRD(r0)
-        isync
-
-	/* Restore other registers */
-	REST_GPR(2, r1)
-	REST_GPR(13, r1)
-	REST_8GPRS(14, r1)
-	REST_10GPRS(22, r1)
-	ld	r4,_CCR(r1)
-	mtcr	r4
-
-        addi	r1,r1,SWITCH_FRAME_SIZE
-	ld	r0,16(r1)
-	mtlr    r0
-        blr
diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S
deleted file mode 100644
index 69a912550577efd0f2f21372f3db8dbae7ecb8c5..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/epapr_hcalls.S
+++ /dev/null
@@ -1,52 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2012 Freescale Semiconductor, Inc.
- */
-
-#include <linux/threads.h>
-#include <asm/epapr_hcalls.h>
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-compat.h>
-#include <asm/asm-offsets.h>
-#include <asm/export.h>
-
-#ifndef CONFIG_PPC64
-/* epapr_ev_idle() was derived from e500_idle() */
-_GLOBAL(epapr_ev_idle)
-	PPC_LL	r4, TI_LOCAL_FLAGS(r2)	/* set napping bit */
-	ori	r4, r4,_TLF_NAPPING	/* so when we take an exception */
-	PPC_STL	r4, TI_LOCAL_FLAGS(r2)	/* it will return to our caller */
-
-	wrteei	1
-
-idle_loop:
-	LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE))
-
-.global epapr_ev_idle_start
-epapr_ev_idle_start:
-	li	r3, -1
-	nop
-	nop
-	nop
-
-	/*
-	 * Guard against spurious wakeups from a hypervisor --
-	 * only interrupt will cause us to return to LR due to
-	 * _TLF_NAPPING.
-	 */
-	b	idle_loop
-#endif
-
-/* Hypercall entry point. Will be patched with device tree instructions. */
-.global epapr_hypercall_start
-epapr_hypercall_start:
-	li	r3, -1
-	nop
-	nop
-	nop
-	blr
-EXPORT_SYMBOL(epapr_hypercall_start)
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
deleted file mode 100644
index 829950b96d291220be8fd61902d9b0738cc032a2..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ /dev/null
@@ -1,1691 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  Boot code and exception vectors for Book3E processors
- *
- *  Copyright (C) 2007 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
- */
-
-#include <linux/threads.h>
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cputable.h>
-#include <asm/setup.h>
-#include <asm/thread_info.h>
-#include <asm/reg_a2.h>
-#include <asm/exception-64e.h>
-#include <asm/bug.h>
-#include <asm/irqflags.h>
-#include <asm/ptrace.h>
-#include <asm/ppc-opcode.h>
-#include <asm/mmu.h>
-#include <asm/hw_irq.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_booke_hv_asm.h>
-#include <asm/feature-fixups.h>
-
-/* XXX This will ultimately add space for a special exception save
- *     structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
- *     when taking special interrupts. For now we don't support that,
- *     special interrupts from within a non-standard level will probably
- *     blow you up
- */
-#define SPECIAL_EXC_SRR0	0
-#define SPECIAL_EXC_SRR1	1
-#define SPECIAL_EXC_SPRG_GEN	2
-#define SPECIAL_EXC_SPRG_TLB	3
-#define SPECIAL_EXC_MAS0	4
-#define SPECIAL_EXC_MAS1	5
-#define SPECIAL_EXC_MAS2	6
-#define SPECIAL_EXC_MAS3	7
-#define SPECIAL_EXC_MAS6	8
-#define SPECIAL_EXC_MAS7	9
-#define SPECIAL_EXC_MAS5	10	/* E.HV only */
-#define SPECIAL_EXC_MAS8	11	/* E.HV only */
-#define SPECIAL_EXC_IRQHAPPENED	12
-#define SPECIAL_EXC_DEAR	13
-#define SPECIAL_EXC_ESR		14
-#define SPECIAL_EXC_SOFTE	15
-#define SPECIAL_EXC_CSRR0	16
-#define SPECIAL_EXC_CSRR1	17
-/* must be even to keep 16-byte stack alignment */
-#define SPECIAL_EXC_END		18
-
-#define SPECIAL_EXC_FRAME_SIZE	(INT_FRAME_SIZE + SPECIAL_EXC_END * 8)
-#define SPECIAL_EXC_FRAME_OFFS  (INT_FRAME_SIZE - 288)
-
-#define SPECIAL_EXC_STORE(reg, name) \
-	std	reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1)
-
-#define SPECIAL_EXC_LOAD(reg, name) \
-	ld	reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1)
-
-special_reg_save:
-	lbz	r9,PACAIRQHAPPENED(r13)
-	RECONCILE_IRQ_STATE(r3,r4)
-
-	/*
-	 * We only need (or have stack space) to save this stuff if
-	 * we interrupted the kernel.
-	 */
-	ld	r3,_MSR(r1)
-	andi.	r3,r3,MSR_PR
-	bnelr
-
-	/*
-	 * Advance to the next TLB exception frame for handler
-	 * types that don't do it automatically.
-	 */
-	LOAD_REG_ADDR(r11,extlb_level_exc)
-	lwz	r12,0(r11)
-	mfspr	r10,SPRN_SPRG_TLB_EXFRAME
-	add	r10,r10,r12
-	mtspr	SPRN_SPRG_TLB_EXFRAME,r10
-
-	/*
-	 * Save registers needed to allow nesting of certain exceptions
-	 * (such as TLB misses) inside special exception levels
-	 */
-	mfspr	r10,SPRN_SRR0
-	SPECIAL_EXC_STORE(r10,SRR0)
-	mfspr	r10,SPRN_SRR1
-	SPECIAL_EXC_STORE(r10,SRR1)
-	mfspr	r10,SPRN_SPRG_GEN_SCRATCH
-	SPECIAL_EXC_STORE(r10,SPRG_GEN)
-	mfspr	r10,SPRN_SPRG_TLB_SCRATCH
-	SPECIAL_EXC_STORE(r10,SPRG_TLB)
-	mfspr	r10,SPRN_MAS0
-	SPECIAL_EXC_STORE(r10,MAS0)
-	mfspr	r10,SPRN_MAS1
-	SPECIAL_EXC_STORE(r10,MAS1)
-	mfspr	r10,SPRN_MAS2
-	SPECIAL_EXC_STORE(r10,MAS2)
-	mfspr	r10,SPRN_MAS3
-	SPECIAL_EXC_STORE(r10,MAS3)
-	mfspr	r10,SPRN_MAS6
-	SPECIAL_EXC_STORE(r10,MAS6)
-	mfspr	r10,SPRN_MAS7
-	SPECIAL_EXC_STORE(r10,MAS7)
-BEGIN_FTR_SECTION
-	mfspr	r10,SPRN_MAS5
-	SPECIAL_EXC_STORE(r10,MAS5)
-	mfspr	r10,SPRN_MAS8
-	SPECIAL_EXC_STORE(r10,MAS8)
-
-	/* MAS5/8 could have inappropriate values if we interrupted KVM code */
-	li	r10,0
-	mtspr	SPRN_MAS5,r10
-	mtspr	SPRN_MAS8,r10
-END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
-	SPECIAL_EXC_STORE(r9,IRQHAPPENED)
-
-	mfspr	r10,SPRN_DEAR
-	SPECIAL_EXC_STORE(r10,DEAR)
-	mfspr	r10,SPRN_ESR
-	SPECIAL_EXC_STORE(r10,ESR)
-
-	lbz	r10,PACAIRQSOFTMASK(r13)
-	SPECIAL_EXC_STORE(r10,SOFTE)
-	ld	r10,_NIP(r1)
-	SPECIAL_EXC_STORE(r10,CSRR0)
-	ld	r10,_MSR(r1)
-	SPECIAL_EXC_STORE(r10,CSRR1)
-
-	blr
-
-ret_from_level_except:
-	ld	r3,_MSR(r1)
-	andi.	r3,r3,MSR_PR
-	beq	1f
-	b	ret_from_except
-1:
-
-	LOAD_REG_ADDR(r11,extlb_level_exc)
-	lwz	r12,0(r11)
-	mfspr	r10,SPRN_SPRG_TLB_EXFRAME
-	sub	r10,r10,r12
-	mtspr	SPRN_SPRG_TLB_EXFRAME,r10
-
-	/*
-	 * It's possible that the special level exception interrupted a
-	 * TLB miss handler, and inserted the same entry that the
-	 * interrupted handler was about to insert.  On CPUs without TLB
-	 * write conditional, this can result in a duplicate TLB entry.
-	 * Wipe all non-bolted entries to be safe.
-	 *
-	 * Note that this doesn't protect against any TLB misses
-	 * we may take accessing the stack from here to the end of
-	 * the special level exception.  It's not clear how we can
-	 * reasonably protect against that, but only CPUs with
-	 * neither TLB write conditional nor bolted kernel memory
-	 * are affected.  Do any such CPUs even exist?
-	 */
-	PPC_TLBILX_ALL(0,R0)
-
-	REST_NVGPRS(r1)
-
-	SPECIAL_EXC_LOAD(r10,SRR0)
-	mtspr	SPRN_SRR0,r10
-	SPECIAL_EXC_LOAD(r10,SRR1)
-	mtspr	SPRN_SRR1,r10
-	SPECIAL_EXC_LOAD(r10,SPRG_GEN)
-	mtspr	SPRN_SPRG_GEN_SCRATCH,r10
-	SPECIAL_EXC_LOAD(r10,SPRG_TLB)
-	mtspr	SPRN_SPRG_TLB_SCRATCH,r10
-	SPECIAL_EXC_LOAD(r10,MAS0)
-	mtspr	SPRN_MAS0,r10
-	SPECIAL_EXC_LOAD(r10,MAS1)
-	mtspr	SPRN_MAS1,r10
-	SPECIAL_EXC_LOAD(r10,MAS2)
-	mtspr	SPRN_MAS2,r10
-	SPECIAL_EXC_LOAD(r10,MAS3)
-	mtspr	SPRN_MAS3,r10
-	SPECIAL_EXC_LOAD(r10,MAS6)
-	mtspr	SPRN_MAS6,r10
-	SPECIAL_EXC_LOAD(r10,MAS7)
-	mtspr	SPRN_MAS7,r10
-BEGIN_FTR_SECTION
-	SPECIAL_EXC_LOAD(r10,MAS5)
-	mtspr	SPRN_MAS5,r10
-	SPECIAL_EXC_LOAD(r10,MAS8)
-	mtspr	SPRN_MAS8,r10
-END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
-
-	lbz	r6,PACAIRQSOFTMASK(r13)
-	ld	r5,SOFTE(r1)
-
-	/* Interrupts had better not already be enabled... */
-	tweqi	r6,IRQS_ENABLED
-
-	andi.	r6,r5,IRQS_DISABLED
-	bne	1f
-
-	TRACE_ENABLE_INTS
-	stb	r5,PACAIRQSOFTMASK(r13)
-1:
-	/*
-	 * Restore PACAIRQHAPPENED rather than setting it based on
-	 * the return MSR[EE], since we could have interrupted
-	 * __check_irq_replay() or other inconsistent transitory
-	 * states that must remain that way.
-	 */
-	SPECIAL_EXC_LOAD(r10,IRQHAPPENED)
-	stb	r10,PACAIRQHAPPENED(r13)
-
-	SPECIAL_EXC_LOAD(r10,DEAR)
-	mtspr	SPRN_DEAR,r10
-	SPECIAL_EXC_LOAD(r10,ESR)
-	mtspr	SPRN_ESR,r10
-
-	stdcx.	r0,0,r1		/* to clear the reservation */
-
-	REST_4GPRS(2, r1)
-	REST_4GPRS(6, r1)
-
-	ld	r10,_CTR(r1)
-	ld	r11,_XER(r1)
-	mtctr	r10
-	mtxer	r11
-
-	blr
-
-.macro ret_from_level srr0 srr1 paca_ex scratch
-	bl	ret_from_level_except
-
-	ld	r10,_LINK(r1)
-	ld	r11,_CCR(r1)
-	ld	r0,GPR13(r1)
-	mtlr	r10
-	mtcr	r11
-
-	ld	r10,GPR10(r1)
-	ld	r11,GPR11(r1)
-	ld	r12,GPR12(r1)
-	mtspr	\scratch,r0
-
-	std	r10,\paca_ex+EX_R10(r13);
-	std	r11,\paca_ex+EX_R11(r13);
-	ld	r10,_NIP(r1)
-	ld	r11,_MSR(r1)
-	ld	r0,GPR0(r1)
-	ld	r1,GPR1(r1)
-	mtspr	\srr0,r10
-	mtspr	\srr1,r11
-	ld	r10,\paca_ex+EX_R10(r13)
-	ld	r11,\paca_ex+EX_R11(r13)
-	mfspr	r13,\scratch
-.endm
-
-ret_from_crit_except:
-	ret_from_level SPRN_CSRR0 SPRN_CSRR1 PACA_EXCRIT SPRN_SPRG_CRIT_SCRATCH
-	rfci
-
-ret_from_mc_except:
-	ret_from_level SPRN_MCSRR0 SPRN_MCSRR1 PACA_EXMC SPRN_SPRG_MC_SCRATCH
-	rfmci
-
-/* Exception prolog code for all exceptions */
-#define EXCEPTION_PROLOG(n, intnum, type, addition)	    		    \
-	mtspr	SPRN_SPRG_##type##_SCRATCH,r13;	/* get spare registers */   \
-	mfspr	r13,SPRN_SPRG_PACA;	/* get PACA */			    \
-	std	r10,PACA_EX##type+EX_R10(r13);				    \
-	std	r11,PACA_EX##type+EX_R11(r13);				    \
-	mfcr	r10;			/* save CR */			    \
-	mfspr	r11,SPRN_##type##_SRR1;/* what are we coming from */	    \
-	DO_KVM	intnum,SPRN_##type##_SRR1;    /* KVM hook */		    \
-	stw	r10,PACA_EX##type+EX_CR(r13); /* save old CR in the PACA */ \
-	addition;			/* additional code for that exc. */ \
-	std	r1,PACA_EX##type+EX_R1(r13); /* save old r1 in the PACA */  \
-	type##_SET_KSTACK;		/* get special stack if necessary */\
-	andi.	r10,r11,MSR_PR;		/* save stack pointer */	    \
-	beq	1f;			/* branch around if supervisor */   \
-	ld	r1,PACAKSAVE(r13);	/* get kernel stack coming from usr */\
-1:	type##_BTB_FLUSH		\
-	cmpdi	cr1,r1,0;		/* check if SP makes sense */	    \
-	bge-	cr1,exc_##n##_bad_stack;/* bad stack (TODO: out of line) */ \
-	mfspr	r10,SPRN_##type##_SRR0;	/* read SRR0 before touching stack */
-
-/* Exception type-specific macros */
-#define	GEN_SET_KSTACK							    \
-	subi	r1,r1,INT_FRAME_SIZE;	/* alloc frame on kernel stack */
-#define SPRN_GEN_SRR0	SPRN_SRR0
-#define SPRN_GEN_SRR1	SPRN_SRR1
-
-#define	GDBELL_SET_KSTACK	GEN_SET_KSTACK
-#define SPRN_GDBELL_SRR0	SPRN_GSRR0
-#define SPRN_GDBELL_SRR1	SPRN_GSRR1
-
-#define CRIT_SET_KSTACK						            \
-	ld	r1,PACA_CRIT_STACK(r13);				    \
-	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE
-#define SPRN_CRIT_SRR0	SPRN_CSRR0
-#define SPRN_CRIT_SRR1	SPRN_CSRR1
-
-#define DBG_SET_KSTACK						            \
-	ld	r1,PACA_DBG_STACK(r13);					    \
-	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE
-#define SPRN_DBG_SRR0	SPRN_DSRR0
-#define SPRN_DBG_SRR1	SPRN_DSRR1
-
-#define MC_SET_KSTACK						            \
-	ld	r1,PACA_MC_STACK(r13);					    \
-	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE
-#define SPRN_MC_SRR0	SPRN_MCSRR0
-#define SPRN_MC_SRR1	SPRN_MCSRR1
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
-#define GEN_BTB_FLUSH			\
-	START_BTB_FLUSH_SECTION		\
-		beq 1f;			\
-		BTB_FLUSH(r10)			\
-		1:		\
-	END_BTB_FLUSH_SECTION
-
-#define CRIT_BTB_FLUSH			\
-	START_BTB_FLUSH_SECTION		\
-		BTB_FLUSH(r10)		\
-	END_BTB_FLUSH_SECTION
-
-#define DBG_BTB_FLUSH CRIT_BTB_FLUSH
-#define MC_BTB_FLUSH CRIT_BTB_FLUSH
-#define GDBELL_BTB_FLUSH GEN_BTB_FLUSH
-#else
-#define GEN_BTB_FLUSH
-#define CRIT_BTB_FLUSH
-#define DBG_BTB_FLUSH
-#define MC_BTB_FLUSH
-#define GDBELL_BTB_FLUSH
-#endif
-
-#define NORMAL_EXCEPTION_PROLOG(n, intnum, addition)			    \
-	EXCEPTION_PROLOG(n, intnum, GEN, addition##_GEN(n))
-
-#define CRIT_EXCEPTION_PROLOG(n, intnum, addition)			    \
-	EXCEPTION_PROLOG(n, intnum, CRIT, addition##_CRIT(n))
-
-#define DBG_EXCEPTION_PROLOG(n, intnum, addition)			    \
-	EXCEPTION_PROLOG(n, intnum, DBG, addition##_DBG(n))
-
-#define MC_EXCEPTION_PROLOG(n, intnum, addition)			    \
-	EXCEPTION_PROLOG(n, intnum, MC, addition##_MC(n))
-
-#define GDBELL_EXCEPTION_PROLOG(n, intnum, addition)			    \
-	EXCEPTION_PROLOG(n, intnum, GDBELL, addition##_GDBELL(n))
-
-/* Variants of the "addition" argument for the prolog
- */
-#define PROLOG_ADDITION_NONE_GEN(n)
-#define PROLOG_ADDITION_NONE_GDBELL(n)
-#define PROLOG_ADDITION_NONE_CRIT(n)
-#define PROLOG_ADDITION_NONE_DBG(n)
-#define PROLOG_ADDITION_NONE_MC(n)
-
-#define PROLOG_ADDITION_MASKABLE_GEN(n)					    \
-	lbz	r10,PACAIRQSOFTMASK(r13);	/* are irqs soft-masked? */ \
-	andi.	r10,r10,IRQS_DISABLED;	/* yes -> go out of line */ \
-	bne	masked_interrupt_book3e_##n
-
-#define PROLOG_ADDITION_2REGS_GEN(n)					    \
-	std	r14,PACA_EXGEN+EX_R14(r13);				    \
-	std	r15,PACA_EXGEN+EX_R15(r13)
-
-#define PROLOG_ADDITION_1REG_GEN(n)					    \
-	std	r14,PACA_EXGEN+EX_R14(r13);
-
-#define PROLOG_ADDITION_2REGS_CRIT(n)					    \
-	std	r14,PACA_EXCRIT+EX_R14(r13);				    \
-	std	r15,PACA_EXCRIT+EX_R15(r13)
-
-#define PROLOG_ADDITION_2REGS_DBG(n)					    \
-	std	r14,PACA_EXDBG+EX_R14(r13);				    \
-	std	r15,PACA_EXDBG+EX_R15(r13)
-
-#define PROLOG_ADDITION_2REGS_MC(n)					    \
-	std	r14,PACA_EXMC+EX_R14(r13);				    \
-	std	r15,PACA_EXMC+EX_R15(r13)
-
-
-/* Core exception code for all exceptions except TLB misses. */
-#define EXCEPTION_COMMON_LVL(n, scratch, excf)				    \
-exc_##n##_common:							    \
-	std	r0,GPR0(r1);		/* save r0 in stackframe */	    \
-	std	r2,GPR2(r1);		/* save r2 in stackframe */	    \
-	SAVE_4GPRS(3, r1);		/* save r3 - r6 in stackframe */    \
-	SAVE_2GPRS(7, r1);		/* save r7, r8 in stackframe */	    \
-	std	r9,GPR9(r1);		/* save r9 in stackframe */	    \
-	std	r10,_NIP(r1);		/* save SRR0 to stackframe */	    \
-	std	r11,_MSR(r1);		/* save SRR1 to stackframe */	    \
-	beq	2f;			/* if from kernel mode */	    \
-	ACCOUNT_CPU_USER_ENTRY(r13,r10,r11);/* accounting (uses cr0+eq) */  \
-2:	ld	r3,excf+EX_R10(r13);	/* get back r10 */		    \
-	ld	r4,excf+EX_R11(r13);	/* get back r11 */		    \
-	mfspr	r5,scratch;		/* get back r13 */		    \
-	std	r12,GPR12(r1);		/* save r12 in stackframe */	    \
-	ld	r2,PACATOC(r13);	/* get kernel TOC into r2 */	    \
-	mflr	r6;			/* save LR in stackframe */	    \
-	mfctr	r7;			/* save CTR in stackframe */	    \
-	mfspr	r8,SPRN_XER;		/* save XER in stackframe */	    \
-	ld	r9,excf+EX_R1(r13);	/* load orig r1 back from PACA */   \
-	lwz	r10,excf+EX_CR(r13);	/* load orig CR back from PACA	*/  \
-	lbz	r11,PACAIRQSOFTMASK(r13); /* get current IRQ softe */	    \
-	ld	r12,exception_marker@toc(r2);				    \
-	li	r0,0;							    \
-	std	r3,GPR10(r1);		/* save r10 to stackframe */	    \
-	std	r4,GPR11(r1);		/* save r11 to stackframe */	    \
-	std	r5,GPR13(r1);		/* save it to stackframe */	    \
-	std	r6,_LINK(r1);						    \
-	std	r7,_CTR(r1);						    \
-	std	r8,_XER(r1);						    \
-	li	r3,(n)+1;		/* indicate partial regs in trap */ \
-	std	r9,0(r1);		/* store stack frame back link */   \
-	std	r10,_CCR(r1);		/* store orig CR in stackframe */   \
-	std	r9,GPR1(r1);		/* store stack frame back link */   \
-	std	r11,SOFTE(r1);		/* and save it to stackframe */     \
-	std	r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */	    \
-	std	r3,_TRAP(r1);		/* set trap number		*/  \
-	std	r0,RESULT(r1);		/* clear regs->result */
-
-#define EXCEPTION_COMMON(n) \
-	EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN)
-#define EXCEPTION_COMMON_CRIT(n) \
-	EXCEPTION_COMMON_LVL(n, SPRN_SPRG_CRIT_SCRATCH, PACA_EXCRIT)
-#define EXCEPTION_COMMON_MC(n) \
-	EXCEPTION_COMMON_LVL(n, SPRN_SPRG_MC_SCRATCH, PACA_EXMC)
-#define EXCEPTION_COMMON_DBG(n) \
-	EXCEPTION_COMMON_LVL(n, SPRN_SPRG_DBG_SCRATCH, PACA_EXDBG)
-
-/*
- * This is meant for exceptions that don't immediately hard-enable.  We
- * set a bit in paca->irq_happened to ensure that a subsequent call to
- * arch_local_irq_restore() will properly hard-enable and avoid the
- * fast-path, and then reconcile irq state.
- */
-#define INTS_DISABLE	RECONCILE_IRQ_STATE(r3,r4)
-
-/*
- * This is called by exceptions that don't use INTS_DISABLE (that did not
- * touch irq indicators in the PACA).  This will restore MSR:EE to it's
- * previous value
- *
- * XXX In the long run, we may want to open-code it in order to separate the
- *     load from the wrtee, thus limiting the latency caused by the dependency
- *     but at this point, I'll favor code clarity until we have a near to final
- *     implementation
- */
-#define INTS_RESTORE_HARD						    \
-	ld	r11,_MSR(r1);						    \
-	wrtee	r11;
-
-/* XXX FIXME: Restore r14/r15 when necessary */
-#define BAD_STACK_TRAMPOLINE(n)						    \
-exc_##n##_bad_stack:							    \
-	li	r1,(n);			/* get exception number */	    \
-	sth	r1,PACA_TRAP_SAVE(r13);	/* store trap */		    \
-	b	bad_stack_book3e;	/* bad stack error */
-
-/* WARNING: If you change the layout of this stub, make sure you check
-	*   the debug exception handler which handles single stepping
-	*   into exceptions from userspace, and the MM code in
-	*   arch/powerpc/mm/tlb_nohash.c which patches the branch here
-	*   and would need to be updated if that branch is moved
-	*/
-#define	EXCEPTION_STUB(loc, label)					\
-	. = interrupt_base_book3e + loc;				\
-	nop;	/* To make debug interrupts happy */			\
-	b	exc_##label##_book3e;
-
-#define ACK_NONE(r)
-#define ACK_DEC(r)							\
-	lis	r,TSR_DIS@h;						\
-	mtspr	SPRN_TSR,r
-#define ACK_FIT(r)							\
-	lis	r,TSR_FIS@h;						\
-	mtspr	SPRN_TSR,r
-
-/* Used by asynchronous interrupt that may happen in the idle loop.
- *
- * This check if the thread was in the idle loop, and if yes, returns
- * to the caller rather than the PC. This is to avoid a race if
- * interrupts happen before the wait instruction.
- */
-#define CHECK_NAPPING()							\
-	ld	r11, PACA_THREAD_INFO(r13);				\
-	ld	r10,TI_LOCAL_FLAGS(r11);				\
-	andi.	r9,r10,_TLF_NAPPING;					\
-	beq+	1f;							\
-	ld	r8,_LINK(r1);						\
-	rlwinm	r7,r10,0,~_TLF_NAPPING;					\
-	std	r8,_NIP(r1);						\
-	std	r7,TI_LOCAL_FLAGS(r11);					\
-1:
-
-
-#define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack)		\
-	START_EXCEPTION(label);						\
-	NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\
-	EXCEPTION_COMMON(trapnum)					\
-	INTS_DISABLE;							\
-	ack(r8);							\
-	CHECK_NAPPING();						\
-	addi	r3,r1,STACK_FRAME_OVERHEAD;				\
-	bl	hdlr;							\
-	b	ret_from_except_lite;
-
-/* This value is used to mark exception frames on the stack. */
-	.section	".toc","aw"
-exception_marker:
-	.tc	ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
-
-
-/*
- * And here we have the exception vectors !
- */
-
-	.text
-	.balign	0x1000
-	.globl interrupt_base_book3e
-interrupt_base_book3e:					/* fake trap */
-	EXCEPTION_STUB(0x000, machine_check)
-	EXCEPTION_STUB(0x020, critical_input)		/* 0x0100 */
-	EXCEPTION_STUB(0x040, debug_crit)		/* 0x0d00 */
-	EXCEPTION_STUB(0x060, data_storage)		/* 0x0300 */
-	EXCEPTION_STUB(0x080, instruction_storage)	/* 0x0400 */
-	EXCEPTION_STUB(0x0a0, external_input)		/* 0x0500 */
-	EXCEPTION_STUB(0x0c0, alignment)		/* 0x0600 */
-	EXCEPTION_STUB(0x0e0, program)			/* 0x0700 */
-	EXCEPTION_STUB(0x100, fp_unavailable)		/* 0x0800 */
-	EXCEPTION_STUB(0x120, system_call)		/* 0x0c00 */
-	EXCEPTION_STUB(0x140, ap_unavailable)		/* 0x0f20 */
-	EXCEPTION_STUB(0x160, decrementer)		/* 0x0900 */
-	EXCEPTION_STUB(0x180, fixed_interval)		/* 0x0980 */
-	EXCEPTION_STUB(0x1a0, watchdog)			/* 0x09f0 */
-	EXCEPTION_STUB(0x1c0, data_tlb_miss)
-	EXCEPTION_STUB(0x1e0, instruction_tlb_miss)
-	EXCEPTION_STUB(0x200, altivec_unavailable)
-	EXCEPTION_STUB(0x220, altivec_assist)
-	EXCEPTION_STUB(0x260, perfmon)
-	EXCEPTION_STUB(0x280, doorbell)
-	EXCEPTION_STUB(0x2a0, doorbell_crit)
-	EXCEPTION_STUB(0x2c0, guest_doorbell)
-	EXCEPTION_STUB(0x2e0, guest_doorbell_crit)
-	EXCEPTION_STUB(0x300, hypercall)
-	EXCEPTION_STUB(0x320, ehpriv)
-	EXCEPTION_STUB(0x340, lrat_error)
-
-	.globl __end_interrupts
-__end_interrupts:
-
-/* Critical Input Interrupt */
-	START_EXCEPTION(critical_input);
-	CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL,
-			      PROLOG_ADDITION_NONE)
-	EXCEPTION_COMMON_CRIT(0x100)
-	bl	save_nvgprs
-	bl	special_reg_save
-	CHECK_NAPPING();
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	unknown_exception
-	b	ret_from_crit_except
-
-/* Machine Check Interrupt */
-	START_EXCEPTION(machine_check);
-	MC_EXCEPTION_PROLOG(0x000, BOOKE_INTERRUPT_MACHINE_CHECK,
-			    PROLOG_ADDITION_NONE)
-	EXCEPTION_COMMON_MC(0x000)
-	bl	save_nvgprs
-	bl	special_reg_save
-	CHECK_NAPPING();
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	machine_check_exception
-	b	ret_from_mc_except
-
-/* Data Storage Interrupt */
-	START_EXCEPTION(data_storage)
-	NORMAL_EXCEPTION_PROLOG(0x300, BOOKE_INTERRUPT_DATA_STORAGE,
-				PROLOG_ADDITION_2REGS)
-	mfspr	r14,SPRN_DEAR
-	mfspr	r15,SPRN_ESR
-	EXCEPTION_COMMON(0x300)
-	INTS_DISABLE
-	b	storage_fault_common
-
-/* Instruction Storage Interrupt */
-	START_EXCEPTION(instruction_storage);
-	NORMAL_EXCEPTION_PROLOG(0x400, BOOKE_INTERRUPT_INST_STORAGE,
-				PROLOG_ADDITION_2REGS)
-	li	r15,0
-	mr	r14,r10
-	EXCEPTION_COMMON(0x400)
-	INTS_DISABLE
-	b	storage_fault_common
-
-/* External Input Interrupt */
-	MASKABLE_EXCEPTION(0x500, BOOKE_INTERRUPT_EXTERNAL,
-			   external_input, do_IRQ, ACK_NONE)
-
-/* Alignment */
-	START_EXCEPTION(alignment);
-	NORMAL_EXCEPTION_PROLOG(0x600, BOOKE_INTERRUPT_ALIGNMENT,
-				PROLOG_ADDITION_2REGS)
-	mfspr	r14,SPRN_DEAR
-	mfspr	r15,SPRN_ESR
-	EXCEPTION_COMMON(0x600)
-	b	alignment_more	/* no room, go out of line */
-
-/* Program Interrupt */
-	START_EXCEPTION(program);
-	NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
-				PROLOG_ADDITION_1REG)
-	mfspr	r14,SPRN_ESR
-	EXCEPTION_COMMON(0x700)
-	INTS_DISABLE
-	std	r14,_DSISR(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	ld	r14,PACA_EXGEN+EX_R14(r13)
-	bl	save_nvgprs
-	bl	program_check_exception
-	b	ret_from_except
-
-/* Floating Point Unavailable Interrupt */
-	START_EXCEPTION(fp_unavailable);
-	NORMAL_EXCEPTION_PROLOG(0x800, BOOKE_INTERRUPT_FP_UNAVAIL,
-				PROLOG_ADDITION_NONE)
-	/* we can probably do a shorter exception entry for that one... */
-	EXCEPTION_COMMON(0x800)
-	ld	r12,_MSR(r1)
-	andi.	r0,r12,MSR_PR;
-	beq-	1f
-	bl	load_up_fpu
-	b	fast_exception_return
-1:	INTS_DISABLE
-	bl	save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	kernel_fp_unavailable_exception
-	b	ret_from_except
-
-/* Altivec Unavailable Interrupt */
-	START_EXCEPTION(altivec_unavailable);
-	NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_ALTIVEC_UNAVAIL,
-				PROLOG_ADDITION_NONE)
-	/* we can probably do a shorter exception entry for that one... */
-	EXCEPTION_COMMON(0x200)
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
-	ld	r12,_MSR(r1)
-	andi.	r0,r12,MSR_PR;
-	beq-	1f
-	bl	load_up_altivec
-	b	fast_exception_return
-1:
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif
-	INTS_DISABLE
-	bl	save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	altivec_unavailable_exception
-	b	ret_from_except
-
-/* AltiVec Assist */
-	START_EXCEPTION(altivec_assist);
-	NORMAL_EXCEPTION_PROLOG(0x220,
-				BOOKE_INTERRUPT_ALTIVEC_ASSIST,
-				PROLOG_ADDITION_NONE)
-	EXCEPTION_COMMON(0x220)
-	INTS_DISABLE
-	bl	save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
-	bl	altivec_assist_exception
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#else
-	bl	unknown_exception
-#endif
-	b	ret_from_except
-
-
-/* Decrementer Interrupt */
-	MASKABLE_EXCEPTION(0x900, BOOKE_INTERRUPT_DECREMENTER,
-			   decrementer, timer_interrupt, ACK_DEC)
-
-/* Fixed Interval Timer Interrupt */
-	MASKABLE_EXCEPTION(0x980, BOOKE_INTERRUPT_FIT,
-			   fixed_interval, unknown_exception, ACK_FIT)
-
-/* Watchdog Timer Interrupt */
-	START_EXCEPTION(watchdog);
-	CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG,
-			      PROLOG_ADDITION_NONE)
-	EXCEPTION_COMMON_CRIT(0x9f0)
-	bl	save_nvgprs
-	bl	special_reg_save
-	CHECK_NAPPING();
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_BOOKE_WDT
-	bl	WatchdogException
-#else
-	bl	unknown_exception
-#endif
-	b	ret_from_crit_except
-
-/* System Call Interrupt */
-	START_EXCEPTION(system_call)
-	mr	r9,r13			/* keep a copy of userland r13 */
-	mfspr	r11,SPRN_SRR0		/* get return address */
-	mfspr	r12,SPRN_SRR1		/* get previous MSR */
-	mfspr	r13,SPRN_SPRG_PACA	/* get our PACA */
-	b	system_call_common
-
-/* Auxiliary Processor Unavailable Interrupt */
-	START_EXCEPTION(ap_unavailable);
-	NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL,
-				PROLOG_ADDITION_NONE)
-	EXCEPTION_COMMON(0xf20)
-	INTS_DISABLE
-	bl	save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	unknown_exception
-	b	ret_from_except
-
-/* Debug exception as a critical interrupt*/
-	START_EXCEPTION(debug_crit);
-	CRIT_EXCEPTION_PROLOG(0xd00, BOOKE_INTERRUPT_DEBUG,
-			      PROLOG_ADDITION_2REGS)
-
-	/*
-	 * If there is a single step or branch-taken exception in an
-	 * exception entry sequence, it was probably meant to apply to
-	 * the code where the exception occurred (since exception entry
-	 * doesn't turn off DE automatically).  We simulate the effect
-	 * of turning off DE on entry to an exception handler by turning
-	 * off DE in the CSRR1 value and clearing the debug status.
-	 */
-
-	mfspr	r14,SPRN_DBSR		/* check single-step/branch taken */
-	andis.	r15,r14,(DBSR_IC|DBSR_BT)@h
-	beq+	1f
-
-#ifdef CONFIG_RELOCATABLE
-	ld	r15,PACATOC(r13)
-	ld	r14,interrupt_base_book3e@got(r15)
-	ld	r15,__end_interrupts@got(r15)
-	cmpld	cr0,r10,r14
-	cmpld	cr1,r10,r15
-#else
-	LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
-	cmpld	cr0, r10, r14
-	LOAD_REG_IMMEDIATE_SYM(r14, r15, __end_interrupts)
-	cmpld	cr1, r10, r14
-#endif
-	blt+	cr0,1f
-	bge+	cr1,1f
-
-	/* here it looks like we got an inappropriate debug exception. */
-	lis	r14,(DBSR_IC|DBSR_BT)@h		/* clear the event */
-	rlwinm	r11,r11,0,~MSR_DE	/* clear DE in the CSRR1 value */
-	mtspr	SPRN_DBSR,r14
-	mtspr	SPRN_CSRR1,r11
-	lwz	r10,PACA_EXCRIT+EX_CR(r13)	/* restore registers */
-	ld	r1,PACA_EXCRIT+EX_R1(r13)
-	ld	r14,PACA_EXCRIT+EX_R14(r13)
-	ld	r15,PACA_EXCRIT+EX_R15(r13)
-	mtcr	r10
-	ld	r10,PACA_EXCRIT+EX_R10(r13)	/* restore registers */
-	ld	r11,PACA_EXCRIT+EX_R11(r13)
-	mfspr	r13,SPRN_SPRG_CRIT_SCRATCH
-	rfci
-
-	/* Normal debug exception */
-	/* XXX We only handle coming from userspace for now since we can't
-	 *     quite save properly an interrupted kernel state yet
-	 */
-1:	andi.	r14,r11,MSR_PR;		/* check for userspace again */
-	beq	kernel_dbg_exc;		/* if from kernel mode */
-
-	/* Now we mash up things to make it look like we are coming on a
-	 * normal exception
-	 */
-	mfspr	r14,SPRN_DBSR
-	EXCEPTION_COMMON_CRIT(0xd00)
-	std	r14,_DSISR(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	mr	r4,r14
-	ld	r14,PACA_EXCRIT+EX_R14(r13)
-	ld	r15,PACA_EXCRIT+EX_R15(r13)
-	bl	save_nvgprs
-	bl	DebugException
-	b	ret_from_except
-
-kernel_dbg_exc:
-	b	.	/* NYI */
-
-/* Debug exception as a debug interrupt*/
-	START_EXCEPTION(debug_debug);
-	DBG_EXCEPTION_PROLOG(0xd00, BOOKE_INTERRUPT_DEBUG,
-						 PROLOG_ADDITION_2REGS)
-
-	/*
-	 * If there is a single step or branch-taken exception in an
-	 * exception entry sequence, it was probably meant to apply to
-	 * the code where the exception occurred (since exception entry
-	 * doesn't turn off DE automatically).  We simulate the effect
-	 * of turning off DE on entry to an exception handler by turning
-	 * off DE in the DSRR1 value and clearing the debug status.
-	 */
-
-	mfspr	r14,SPRN_DBSR		/* check single-step/branch taken */
-	andis.	r15,r14,(DBSR_IC|DBSR_BT)@h
-	beq+	1f
-
-#ifdef CONFIG_RELOCATABLE
-	ld	r15,PACATOC(r13)
-	ld	r14,interrupt_base_book3e@got(r15)
-	ld	r15,__end_interrupts@got(r15)
-	cmpld	cr0,r10,r14
-	cmpld	cr1,r10,r15
-#else
-	LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
-	cmpld	cr0, r10, r14
-	LOAD_REG_IMMEDIATE_SYM(r14, r15,__end_interrupts)
-	cmpld	cr1, r10, r14
-#endif
-	blt+	cr0,1f
-	bge+	cr1,1f
-
-	/* here it looks like we got an inappropriate debug exception. */
-	lis	r14,(DBSR_IC|DBSR_BT)@h		/* clear the event */
-	rlwinm	r11,r11,0,~MSR_DE	/* clear DE in the DSRR1 value */
-	mtspr	SPRN_DBSR,r14
-	mtspr	SPRN_DSRR1,r11
-	lwz	r10,PACA_EXDBG+EX_CR(r13)	/* restore registers */
-	ld	r1,PACA_EXDBG+EX_R1(r13)
-	ld	r14,PACA_EXDBG+EX_R14(r13)
-	ld	r15,PACA_EXDBG+EX_R15(r13)
-	mtcr	r10
-	ld	r10,PACA_EXDBG+EX_R10(r13)	/* restore registers */
-	ld	r11,PACA_EXDBG+EX_R11(r13)
-	mfspr	r13,SPRN_SPRG_DBG_SCRATCH
-	rfdi
-
-	/* Normal debug exception */
-	/* XXX We only handle coming from userspace for now since we can't
-	 *     quite save properly an interrupted kernel state yet
-	 */
-1:	andi.	r14,r11,MSR_PR;		/* check for userspace again */
-	beq	kernel_dbg_exc;		/* if from kernel mode */
-
-	/* Now we mash up things to make it look like we are coming on a
-	 * normal exception
-	 */
-	mfspr	r14,SPRN_DBSR
-	EXCEPTION_COMMON_DBG(0xd08)
-	INTS_DISABLE
-	std	r14,_DSISR(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	mr	r4,r14
-	ld	r14,PACA_EXDBG+EX_R14(r13)
-	ld	r15,PACA_EXDBG+EX_R15(r13)
-	bl	save_nvgprs
-	bl	DebugException
-	b	ret_from_except
-
-	START_EXCEPTION(perfmon);
-	NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
-				PROLOG_ADDITION_NONE)
-	EXCEPTION_COMMON(0x260)
-	INTS_DISABLE
-	CHECK_NAPPING()
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	performance_monitor_exception
-	b	ret_from_except_lite
-
-/* Doorbell interrupt */
-	MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL,
-			   doorbell, doorbell_exception, ACK_NONE)
-
-/* Doorbell critical Interrupt */
-	START_EXCEPTION(doorbell_crit);
-	CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL,
-			      PROLOG_ADDITION_NONE)
-	EXCEPTION_COMMON_CRIT(0x2a0)
-	bl	save_nvgprs
-	bl	special_reg_save
-	CHECK_NAPPING();
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	unknown_exception
-	b	ret_from_crit_except
-
-/*
- *	Guest doorbell interrupt
- *	This general exception use GSRRx save/restore registers
- */
-	START_EXCEPTION(guest_doorbell);
-	GDBELL_EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL,
-			        PROLOG_ADDITION_NONE)
-	EXCEPTION_COMMON(0x2c0)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	save_nvgprs
-	INTS_RESTORE_HARD
-	bl	unknown_exception
-	b	ret_from_except
-
-/* Guest Doorbell critical Interrupt */
-	START_EXCEPTION(guest_doorbell_crit);
-	CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT,
-			      PROLOG_ADDITION_NONE)
-	EXCEPTION_COMMON_CRIT(0x2e0)
-	bl	save_nvgprs
-	bl	special_reg_save
-	CHECK_NAPPING();
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	unknown_exception
-	b	ret_from_crit_except
-
-/* Hypervisor call */
-	START_EXCEPTION(hypercall);
-	NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL,
-			        PROLOG_ADDITION_NONE)
-	EXCEPTION_COMMON(0x310)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	save_nvgprs
-	INTS_RESTORE_HARD
-	bl	unknown_exception
-	b	ret_from_except
-
-/* Embedded Hypervisor priviledged  */
-	START_EXCEPTION(ehpriv);
-	NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV,
-			        PROLOG_ADDITION_NONE)
-	EXCEPTION_COMMON(0x320)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	save_nvgprs
-	INTS_RESTORE_HARD
-	bl	unknown_exception
-	b	ret_from_except
-
-/* LRAT Error interrupt */
-	START_EXCEPTION(lrat_error);
-	NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR,
-			        PROLOG_ADDITION_NONE)
-	EXCEPTION_COMMON(0x340)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	save_nvgprs
-	INTS_RESTORE_HARD
-	bl	unknown_exception
-	b	ret_from_except
-
-/*
- * An interrupt came in while soft-disabled; We mark paca->irq_happened
- * accordingly and if the interrupt is level sensitive, we hard disable
- * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so
- * keep these in synch.
- */
-
-.macro masked_interrupt_book3e paca_irq full_mask
-	lbz	r10,PACAIRQHAPPENED(r13)
-	.if \full_mask == 1
-	ori	r10,r10,\paca_irq | PACA_IRQ_HARD_DIS
-	.else
-	ori	r10,r10,\paca_irq
-	.endif
-	stb	r10,PACAIRQHAPPENED(r13)
-
-	.if \full_mask == 1
-	rldicl	r10,r11,48,1		/* clear MSR_EE */
-	rotldi	r11,r10,16
-	mtspr	SPRN_SRR1,r11
-	.endif
-
-	lwz	r11,PACA_EXGEN+EX_CR(r13)
-	mtcr	r11
-	ld	r10,PACA_EXGEN+EX_R10(r13)
-	ld	r11,PACA_EXGEN+EX_R11(r13)
-	mfspr	r13,SPRN_SPRG_GEN_SCRATCH
-	rfi
-	b	.
-.endm
-
-masked_interrupt_book3e_0x500:
-	// XXX When adding support for EPR, use PACA_IRQ_EE_EDGE
-	masked_interrupt_book3e PACA_IRQ_EE 1
-
-masked_interrupt_book3e_0x900:
-	ACK_DEC(r10);
-	masked_interrupt_book3e PACA_IRQ_DEC 0
-
-masked_interrupt_book3e_0x980:
-	ACK_FIT(r10);
-	masked_interrupt_book3e PACA_IRQ_DEC 0
-
-masked_interrupt_book3e_0x280:
-masked_interrupt_book3e_0x2c0:
-	masked_interrupt_book3e PACA_IRQ_DBELL 0
-
-/*
- * Called from arch_local_irq_enable when an interrupt needs
- * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280
- * to indicate the kind of interrupt. MSR:EE is already off.
- * We generate a stackframe like if a real interrupt had happened.
- *
- * Note: While MSR:EE is off, we need to make sure that _MSR
- * in the generated frame has EE set to 1 or the exception
- * handler will not properly re-enable them.
- */
-_GLOBAL(__replay_interrupt)
-	/* We are going to jump to the exception common code which
-	 * will retrieve various register values from the PACA which
-	 * we don't give a damn about.
-	 */
-	mflr	r10
-	mfmsr	r11
-	mfcr	r4
-	mtspr	SPRN_SPRG_GEN_SCRATCH,r13;
-	std	r1,PACA_EXGEN+EX_R1(r13);
-	stw	r4,PACA_EXGEN+EX_CR(r13);
-	ori	r11,r11,MSR_EE
-	subi	r1,r1,INT_FRAME_SIZE;
-	cmpwi	cr0,r3,0x500
-	beq	exc_0x500_common
-	cmpwi	cr0,r3,0x900
-	beq	exc_0x900_common
-	cmpwi	cr0,r3,0x280
-	beq	exc_0x280_common
-	blr
-
-
-/*
- * This is called from 0x300 and 0x400 handlers after the prologs with
- * r14 and r15 containing the fault address and error code, with the
- * original values stashed away in the PACA
- */
-storage_fault_common:
-	std	r14,_DAR(r1)
-	std	r15,_DSISR(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	mr	r4,r14
-	mr	r5,r15
-	ld	r14,PACA_EXGEN+EX_R14(r13)
-	ld	r15,PACA_EXGEN+EX_R15(r13)
-	bl	do_page_fault
-	cmpdi	r3,0
-	bne-	1f
-	b	ret_from_except_lite
-1:	bl	save_nvgprs
-	mr	r5,r3
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	ld	r4,_DAR(r1)
-	bl	bad_page_fault
-	b	ret_from_except
-
-/*
- * Alignment exception doesn't fit entirely in the 0x100 bytes so it
- * continues here.
- */
-alignment_more:
-	std	r14,_DAR(r1)
-	std	r15,_DSISR(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	ld	r14,PACA_EXGEN+EX_R14(r13)
-	ld	r15,PACA_EXGEN+EX_R15(r13)
-	bl	save_nvgprs
-	INTS_RESTORE_HARD
-	bl	alignment_exception
-	b	ret_from_except
-
-/*
- * We branch here from entry_64.S for the last stage of the exception
- * return code path. MSR:EE is expected to be off at that point
- */
-_GLOBAL(exception_return_book3e)
-	b	1f
-
-/* This is the return from load_up_fpu fast path which could do with
- * less GPR restores in fact, but for now we have a single return path
- */
-	.globl fast_exception_return
-fast_exception_return:
-	wrteei	0
-1:	mr	r0,r13
-	ld	r10,_MSR(r1)
-	REST_4GPRS(2, r1)
-	andi.	r6,r10,MSR_PR
-	REST_2GPRS(6, r1)
-	beq	1f
-	ACCOUNT_CPU_USER_EXIT(r13, r10, r11)
-	ld	r0,GPR13(r1)
-
-1:	stdcx.	r0,0,r1		/* to clear the reservation */
-
-	ld	r8,_CCR(r1)
-	ld	r9,_LINK(r1)
-	ld	r10,_CTR(r1)
-	ld	r11,_XER(r1)
-	mtcr	r8
-	mtlr	r9
-	mtctr	r10
-	mtxer	r11
-	REST_2GPRS(8, r1)
-	ld	r10,GPR10(r1)
-	ld	r11,GPR11(r1)
-	ld	r12,GPR12(r1)
-	mtspr	SPRN_SPRG_GEN_SCRATCH,r0
-
-	std	r10,PACA_EXGEN+EX_R10(r13);
-	std	r11,PACA_EXGEN+EX_R11(r13);
-	ld	r10,_NIP(r1)
-	ld	r11,_MSR(r1)
-	ld	r0,GPR0(r1)
-	ld	r1,GPR1(r1)
-	mtspr	SPRN_SRR0,r10
-	mtspr	SPRN_SRR1,r11
-	ld	r10,PACA_EXGEN+EX_R10(r13)
-	ld	r11,PACA_EXGEN+EX_R11(r13)
-	mfspr	r13,SPRN_SPRG_GEN_SCRATCH
-	rfi
-
-/*
- * Trampolines used when spotting a bad kernel stack pointer in
- * the exception entry code.
- *
- * TODO: move some bits like SRR0 read to trampoline, pass PACA
- * index around, etc... to handle crit & mcheck
- */
-BAD_STACK_TRAMPOLINE(0x000)
-BAD_STACK_TRAMPOLINE(0x100)
-BAD_STACK_TRAMPOLINE(0x200)
-BAD_STACK_TRAMPOLINE(0x220)
-BAD_STACK_TRAMPOLINE(0x260)
-BAD_STACK_TRAMPOLINE(0x280)
-BAD_STACK_TRAMPOLINE(0x2a0)
-BAD_STACK_TRAMPOLINE(0x2c0)
-BAD_STACK_TRAMPOLINE(0x2e0)
-BAD_STACK_TRAMPOLINE(0x300)
-BAD_STACK_TRAMPOLINE(0x310)
-BAD_STACK_TRAMPOLINE(0x320)
-BAD_STACK_TRAMPOLINE(0x340)
-BAD_STACK_TRAMPOLINE(0x400)
-BAD_STACK_TRAMPOLINE(0x500)
-BAD_STACK_TRAMPOLINE(0x600)
-BAD_STACK_TRAMPOLINE(0x700)
-BAD_STACK_TRAMPOLINE(0x800)
-BAD_STACK_TRAMPOLINE(0x900)
-BAD_STACK_TRAMPOLINE(0x980)
-BAD_STACK_TRAMPOLINE(0x9f0)
-BAD_STACK_TRAMPOLINE(0xa00)
-BAD_STACK_TRAMPOLINE(0xb00)
-BAD_STACK_TRAMPOLINE(0xc00)
-BAD_STACK_TRAMPOLINE(0xd00)
-BAD_STACK_TRAMPOLINE(0xd08)
-BAD_STACK_TRAMPOLINE(0xe00)
-BAD_STACK_TRAMPOLINE(0xf00)
-BAD_STACK_TRAMPOLINE(0xf20)
-
-	.globl	bad_stack_book3e
-bad_stack_book3e:
-	/* XXX: Needs to make SPRN_SPRG_GEN depend on exception type */
-	mfspr	r10,SPRN_SRR0;		  /* read SRR0 before touching stack */
-	ld	r1,PACAEMERGSP(r13)
-	subi	r1,r1,64+INT_FRAME_SIZE
-	std	r10,_NIP(r1)
-	std	r11,_MSR(r1)
-	ld	r10,PACA_EXGEN+EX_R1(r13) /* FIXME for crit & mcheck */
-	lwz	r11,PACA_EXGEN+EX_CR(r13) /* FIXME for crit & mcheck */
-	std	r10,GPR1(r1)
-	std	r11,_CCR(r1)
-	mfspr	r10,SPRN_DEAR
-	mfspr	r11,SPRN_ESR
-	std	r10,_DAR(r1)
-	std	r11,_DSISR(r1)
-	std	r0,GPR0(r1);		/* save r0 in stackframe */	    \
-	std	r2,GPR2(r1);		/* save r2 in stackframe */	    \
-	SAVE_4GPRS(3, r1);		/* save r3 - r6 in stackframe */    \
-	SAVE_2GPRS(7, r1);		/* save r7, r8 in stackframe */	    \
-	std	r9,GPR9(r1);		/* save r9 in stackframe */	    \
-	ld	r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */		    \
-	ld	r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */		    \
-	mfspr	r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \
-	std	r3,GPR10(r1);		/* save r10 to stackframe */	    \
-	std	r4,GPR11(r1);		/* save r11 to stackframe */	    \
-	std	r12,GPR12(r1);		/* save r12 in stackframe */	    \
-	std	r5,GPR13(r1);		/* save it to stackframe */	    \
-	mflr	r10
-	mfctr	r11
-	mfxer	r12
-	std	r10,_LINK(r1)
-	std	r11,_CTR(r1)
-	std	r12,_XER(r1)
-	SAVE_10GPRS(14,r1)
-	SAVE_8GPRS(24,r1)
-	lhz	r12,PACA_TRAP_SAVE(r13)
-	std	r12,_TRAP(r1)
-	addi	r11,r1,INT_FRAME_SIZE
-	std	r11,0(r1)
-	li	r12,0
-	std	r12,0(r11)
-	ld	r2,PACATOC(r13)
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	kernel_bad_stack
-	b	1b
-
-/*
- * Setup the initial TLB for a core. This current implementation
- * assume that whatever we are running off will not conflict with
- * the new mapping at PAGE_OFFSET.
- */
-_GLOBAL(initial_tlb_book3e)
-
-	/* Look for the first TLB with IPROT set */
-	mfspr	r4,SPRN_TLB0CFG
-	andi.	r3,r4,TLBnCFG_IPROT
-	lis	r3,MAS0_TLBSEL(0)@h
-	bne	found_iprot
-
-	mfspr	r4,SPRN_TLB1CFG
-	andi.	r3,r4,TLBnCFG_IPROT
-	lis	r3,MAS0_TLBSEL(1)@h
-	bne	found_iprot
-
-	mfspr	r4,SPRN_TLB2CFG
-	andi.	r3,r4,TLBnCFG_IPROT
-	lis	r3,MAS0_TLBSEL(2)@h
-	bne	found_iprot
-
-	lis	r3,MAS0_TLBSEL(3)@h
-	mfspr	r4,SPRN_TLB3CFG
-	/* fall through */
-
-found_iprot:
-	andi.	r5,r4,TLBnCFG_HES
-	bne	have_hes
-
-	mflr	r8				/* save LR */
-/* 1. Find the index of the entry we're executing in
- *
- * r3 = MAS0_TLBSEL (for the iprot array)
- * r4 = SPRN_TLBnCFG
- */
-	bl	invstr				/* Find our address */
-invstr:	mflr	r6				/* Make it accessible */
-	mfmsr	r7
-	rlwinm	r5,r7,27,31,31			/* extract MSR[IS] */
-	mfspr	r7,SPRN_PID
-	slwi	r7,r7,16
-	or	r7,r7,r5
-	mtspr	SPRN_MAS6,r7
-	tlbsx	0,r6				/* search MSR[IS], SPID=PID */
-
-	mfspr	r3,SPRN_MAS0
-	rlwinm	r5,r3,16,20,31			/* Extract MAS0(Entry) */
-
-	mfspr	r7,SPRN_MAS1			/* Insure IPROT set */
-	oris	r7,r7,MAS1_IPROT@h
-	mtspr	SPRN_MAS1,r7
-	tlbwe
-
-/* 2. Invalidate all entries except the entry we're executing in
- *
- * r3 = MAS0 w/TLBSEL & ESEL for the entry we are running in
- * r4 = SPRN_TLBnCFG
- * r5 = ESEL of entry we are running in
- */
-	andi.	r4,r4,TLBnCFG_N_ENTRY		/* Extract # entries */
-	li	r6,0				/* Set Entry counter to 0 */
-1:	mr	r7,r3				/* Set MAS0(TLBSEL) */
-	rlwimi	r7,r6,16,4,15			/* Setup MAS0 = TLBSEL | ESEL(r6) */
-	mtspr	SPRN_MAS0,r7
-	tlbre
-	mfspr	r7,SPRN_MAS1
-	rlwinm	r7,r7,0,2,31			/* Clear MAS1 Valid and IPROT */
-	cmpw	r5,r6
-	beq	skpinv				/* Dont update the current execution TLB */
-	mtspr	SPRN_MAS1,r7
-	tlbwe
-	isync
-skpinv:	addi	r6,r6,1				/* Increment */
-	cmpw	r6,r4				/* Are we done? */
-	bne	1b				/* If not, repeat */
-
-	/* Invalidate all TLBs */
-	PPC_TLBILX_ALL(0,R0)
-	sync
-	isync
-
-/* 3. Setup a temp mapping and jump to it
- *
- * r3 = MAS0 w/TLBSEL & ESEL for the entry we are running in
- * r5 = ESEL of entry we are running in
- */
-	andi.	r7,r5,0x1	/* Find an entry not used and is non-zero */
-	addi	r7,r7,0x1
-	mr	r4,r3		/* Set MAS0(TLBSEL) = 1 */
-	mtspr	SPRN_MAS0,r4
-	tlbre
-
-	rlwimi	r4,r7,16,4,15	/* Setup MAS0 = TLBSEL | ESEL(r7) */
-	mtspr	SPRN_MAS0,r4
-
-	mfspr	r7,SPRN_MAS1
-	xori	r6,r7,MAS1_TS		/* Setup TMP mapping in the other Address space */
-	mtspr	SPRN_MAS1,r6
-
-	tlbwe
-
-	mfmsr	r6
-	xori	r6,r6,MSR_IS
-	mtspr	SPRN_SRR1,r6
-	bl	1f		/* Find our address */
-1:	mflr	r6
-	addi	r6,r6,(2f - 1b)
-	mtspr	SPRN_SRR0,r6
-	rfi
-2:
-
-/* 4. Clear out PIDs & Search info
- *
- * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
- * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
- * r5 = MAS3
- */
-	li	r6,0
-	mtspr   SPRN_MAS6,r6
-	mtspr	SPRN_PID,r6
-
-/* 5. Invalidate mapping we started in
- *
- * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
- * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
- * r5 = MAS3
- */
-	mtspr	SPRN_MAS0,r3
-	tlbre
-	mfspr	r6,SPRN_MAS1
-	rlwinm	r6,r6,0,2,31	/* clear IPROT and VALID */
-	mtspr	SPRN_MAS1,r6
-	tlbwe
-	sync
-	isync
-
-/*
- * The mapping only needs to be cache-coherent on SMP, except on
- * Freescale e500mc derivatives where it's also needed for coherent DMA.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDED	MAS2_M
-#else
-#define M_IF_NEEDED	0
-#endif
-
-/* 6. Setup KERNELBASE mapping in TLB[0]
- *
- * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
- * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
- * r5 = MAS3
- */
-	rlwinm	r3,r3,0,16,3	/* clear ESEL */
-	mtspr	SPRN_MAS0,r3
-	lis	r6,(MAS1_VALID|MAS1_IPROT)@h
-	ori	r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
-	mtspr	SPRN_MAS1,r6
-
-	LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | M_IF_NEEDED)
-	mtspr	SPRN_MAS2,r6
-
-	rlwinm	r5,r5,0,0,25
-	ori	r5,r5,MAS3_SR | MAS3_SW | MAS3_SX
-	mtspr	SPRN_MAS3,r5
-	li	r5,-1
-	rlwinm	r5,r5,0,0,25
-
-	tlbwe
-
-/* 7. Jump to KERNELBASE mapping
- *
- * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
- */
-	/* Now we branch the new virtual address mapped by this entry */
-	bl	1f		/* Find our address */
-1:	mflr	r6
-	addi	r6,r6,(2f - 1b)
-	tovirt(r6,r6)
-	lis	r7,MSR_KERNEL@h
-	ori	r7,r7,MSR_KERNEL@l
-	mtspr	SPRN_SRR0,r6
-	mtspr	SPRN_SRR1,r7
-	rfi				/* start execution out of TLB1[0] entry */
-2:
-
-/* 8. Clear out the temp mapping
- *
- * r4 = MAS0 w/TLBSEL & ESEL for the entry we are running in
- */
-	mtspr	SPRN_MAS0,r4
-	tlbre
-	mfspr	r5,SPRN_MAS1
-	rlwinm	r5,r5,0,2,31	/* clear IPROT and VALID */
-	mtspr	SPRN_MAS1,r5
-	tlbwe
-	sync
-	isync
-
-	/* We translate LR and return */
-	tovirt(r8,r8)
-	mtlr	r8
-	blr
-
-have_hes:
-	/* Setup MAS 0,1,2,3 and 7 for tlbwe of a 1G entry that maps the
-	 * kernel linear mapping. We also set MAS8 once for all here though
-	 * that will have to be made dependent on whether we are running under
-	 * a hypervisor I suppose.
-	 */
-
-	/* BEWARE, MAGIC
-	 * This code is called as an ordinary function on the boot CPU. But to
-	 * avoid duplication, this code is also used in SCOM bringup of
-	 * secondary CPUs. We read the code between the initial_tlb_code_start
-	 * and initial_tlb_code_end labels one instruction at a time and RAM it
-	 * into the new core via SCOM. That doesn't process branches, so there
-	 * must be none between those two labels. It also means if this code
-	 * ever takes any parameters, the SCOM code must also be updated to
-	 * provide them.
-	 */
-	.globl a2_tlbinit_code_start
-a2_tlbinit_code_start:
-
-	ori	r11,r3,MAS0_WQ_ALLWAYS
-	oris	r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */
-	mtspr	SPRN_MAS0,r11
-	lis	r3,(MAS1_VALID | MAS1_IPROT)@h
-	ori	r3,r3,BOOK3E_PAGESZ_1GB << MAS1_TSIZE_SHIFT
-	mtspr	SPRN_MAS1,r3
-	LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET | MAS2_M)
-	mtspr	SPRN_MAS2,r3
-	li	r3,MAS3_SR | MAS3_SW | MAS3_SX
-	mtspr	SPRN_MAS7_MAS3,r3
-	li	r3,0
-	mtspr	SPRN_MAS8,r3
-
-	/* Write the TLB entry */
-	tlbwe
-
-	.globl a2_tlbinit_after_linear_map
-a2_tlbinit_after_linear_map:
-
-	/* Now we branch the new virtual address mapped by this entry */
-	LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f)
-	mtctr	r3
-	bctr
-
-1:	/* We are now running at PAGE_OFFSET, clean the TLB of everything
-	 * else (including IPROTed things left by firmware)
-	 * r4 = TLBnCFG
-	 * r3 = current address (more or less)
-	 */
-
-	li	r5,0
-	mtspr	SPRN_MAS6,r5
-	tlbsx	0,r3
-
-	rlwinm	r9,r4,0,TLBnCFG_N_ENTRY
-	rlwinm	r10,r4,8,0xff
-	addi	r10,r10,-1	/* Get inner loop mask */
-
-	li	r3,1
-
-	mfspr	r5,SPRN_MAS1
-	rlwinm	r5,r5,0,(~(MAS1_VALID|MAS1_IPROT))
-
-	mfspr	r6,SPRN_MAS2
-	rldicr	r6,r6,0,51		/* Extract EPN */
-
-	mfspr	r7,SPRN_MAS0
-	rlwinm	r7,r7,0,0xffff0fff	/* Clear HES and WQ */
-
-	rlwinm	r8,r7,16,0xfff		/* Extract ESEL */
-
-2:	add	r4,r3,r8
-	and	r4,r4,r10
-
-	rlwimi	r7,r4,16,MAS0_ESEL_MASK
-
-	mtspr	SPRN_MAS0,r7
-	mtspr	SPRN_MAS1,r5
-	mtspr	SPRN_MAS2,r6
-	tlbwe
-
-	addi	r3,r3,1
-	and.	r4,r3,r10
-
-	bne	3f
-	addis	r6,r6,(1<<30)@h
-3:
-	cmpw	r3,r9
-	blt	2b
-
-	.globl  a2_tlbinit_after_iprot_flush
-a2_tlbinit_after_iprot_flush:
-
-	PPC_TLBILX(0,0,R0)
-	sync
-	isync
-
-	.globl a2_tlbinit_code_end
-a2_tlbinit_code_end:
-
-	/* We translate LR and return */
-	mflr	r3
-	tovirt(r3,r3)
-	mtlr	r3
-	blr
-
-/*
- * Main entry (boot CPU, thread 0)
- *
- * We enter here from head_64.S, possibly after the prom_init trampoline
- * with r3 and r4 already saved to r31 and 30 respectively and in 64 bits
- * mode. Anything else is as it was left by the bootloader
- *
- * Initial requirements of this port:
- *
- * - Kernel loaded at 0 physical
- * - A good lump of memory mapped 0:0 by UTLB entry 0
- * - MSR:IS & MSR:DS set to 0
- *
- * Note that some of the above requirements will be relaxed in the future
- * as the kernel becomes smarter at dealing with different initial conditions
- * but for now you have to be careful
- */
-_GLOBAL(start_initialization_book3e)
-	mflr	r28
-
-	/* First, we need to setup some initial TLBs to map the kernel
-	 * text, data and bss at PAGE_OFFSET. We don't have a real mode
-	 * and always use AS 0, so we just set it up to match our link
-	 * address and never use 0 based addresses.
-	 */
-	bl	initial_tlb_book3e
-
-	/* Init global core bits */
-	bl	init_core_book3e
-
-	/* Init per-thread bits */
-	bl	init_thread_book3e
-
-	/* Return to common init code */
-	tovirt(r28,r28)
-	mtlr	r28
-	blr
-
-
-/*
- * Secondary core/processor entry
- *
- * This is entered for thread 0 of a secondary core, all other threads
- * are expected to be stopped. It's similar to start_initialization_book3e
- * except that it's generally entered from the holding loop in head_64.S
- * after CPUs have been gathered by Open Firmware.
- *
- * We assume we are in 32 bits mode running with whatever TLB entry was
- * set for us by the firmware or POR engine.
- */
-_GLOBAL(book3e_secondary_core_init_tlb_set)
-	li	r4,1
-	b	generic_secondary_smp_init
-
-_GLOBAL(book3e_secondary_core_init)
-	mflr	r28
-
-	/* Do we need to setup initial TLB entry ? */
-	cmplwi	r4,0
-	bne	2f
-
-	/* Setup TLB for this core */
-	bl	initial_tlb_book3e
-
-	/* We can return from the above running at a different
-	 * address, so recalculate r2 (TOC)
-	 */
-	bl	relative_toc
-
-	/* Init global core bits */
-2:	bl	init_core_book3e
-
-	/* Init per-thread bits */
-3:	bl	init_thread_book3e
-
-	/* Return to common init code at proper virtual address.
-	 *
-	 * Due to various previous assumptions, we know we entered this
-	 * function at either the final PAGE_OFFSET mapping or using a
-	 * 1:1 mapping at 0, so we don't bother doing a complicated check
-	 * here, we just ensure the return address has the right top bits.
-	 *
-	 * Note that if we ever want to be smarter about where we can be
-	 * started from, we have to be careful that by the time we reach
-	 * the code below we may already be running at a different location
-	 * than the one we were called from since initial_tlb_book3e can
-	 * have moved us already.
-	 */
-	cmpdi	cr0,r28,0
-	blt	1f
-	lis	r3,PAGE_OFFSET@highest
-	sldi	r3,r3,32
-	or	r28,r28,r3
-1:	mtlr	r28
-	blr
-
-_GLOBAL(book3e_secondary_thread_init)
-	mflr	r28
-	b	3b
-
-	.globl init_core_book3e
-init_core_book3e:
-	/* Establish the interrupt vector base */
-	tovirt(r2,r2)
-	LOAD_REG_ADDR(r3, interrupt_base_book3e)
-	mtspr	SPRN_IVPR,r3
-	sync
-	blr
-
-init_thread_book3e:
-	lis	r3,(SPRN_EPCR_ICM | SPRN_EPCR_GICM)@h
-	mtspr	SPRN_EPCR,r3
-
-	/* Make sure interrupts are off */
-	wrteei	0
-
-	/* disable all timers and clear out status */
-	li	r3,0
-	mtspr	SPRN_TCR,r3
-	mfspr	r3,SPRN_TSR
-	mtspr	SPRN_TSR,r3
-
-	blr
-
-_GLOBAL(__setup_base_ivors)
-	SET_IVOR(0, 0x020) /* Critical Input */
-	SET_IVOR(1, 0x000) /* Machine Check */
-	SET_IVOR(2, 0x060) /* Data Storage */ 
-	SET_IVOR(3, 0x080) /* Instruction Storage */
-	SET_IVOR(4, 0x0a0) /* External Input */ 
-	SET_IVOR(5, 0x0c0) /* Alignment */ 
-	SET_IVOR(6, 0x0e0) /* Program */ 
-	SET_IVOR(7, 0x100) /* FP Unavailable */ 
-	SET_IVOR(8, 0x120) /* System Call */ 
-	SET_IVOR(9, 0x140) /* Auxiliary Processor Unavailable */ 
-	SET_IVOR(10, 0x160) /* Decrementer */ 
-	SET_IVOR(11, 0x180) /* Fixed Interval Timer */ 
-	SET_IVOR(12, 0x1a0) /* Watchdog Timer */ 
-	SET_IVOR(13, 0x1c0) /* Data TLB Error */ 
-	SET_IVOR(14, 0x1e0) /* Instruction TLB Error */
-	SET_IVOR(15, 0x040) /* Debug */
-
-	sync
-
-	blr
-
-_GLOBAL(setup_altivec_ivors)
-	SET_IVOR(32, 0x200) /* AltiVec Unavailable */
-	SET_IVOR(33, 0x220) /* AltiVec Assist */
-	blr
-
-_GLOBAL(setup_perfmon_ivor)
-	SET_IVOR(35, 0x260) /* Performance Monitor */
-	blr
-
-_GLOBAL(setup_doorbell_ivors)
-	SET_IVOR(36, 0x280) /* Processor Doorbell */
-	SET_IVOR(37, 0x2a0) /* Processor Doorbell Crit */
-	blr
-
-_GLOBAL(setup_ehv_ivors)
-	SET_IVOR(40, 0x300) /* Embedded Hypervisor System Call */
-	SET_IVOR(41, 0x320) /* Embedded Hypervisor Privilege */
-	SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */
-	SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */
-	blr
-
-_GLOBAL(setup_lrat_ivor)
-	SET_IVOR(42, 0x340) /* LRAT Error */
-	blr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
deleted file mode 100644
index 88bba0a931d654b0d1c36734e32be0d3cc602afe..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ /dev/null
@@ -1,2417 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This file contains the 64-bit "server" PowerPC variant
- * of the low level exception handling including exception
- * vectors, exception return, part of the slb and stab
- * handling and other fixed offset specific things.
- *
- * This file is meant to be #included from head_64.S due to
- * position dependent assembly.
- *
- * Most of this originates from head_64.S and thus has the same
- * copyright history.
- *
- */
-
-#include <asm/hw_irq.h>
-#include <asm/exception-64s.h>
-#include <asm/ptrace.h>
-#include <asm/cpuidle.h>
-#include <asm/head-64.h>
-#include <asm/feature-fixups.h>
-#include <asm/kup.h>
-
-/* PACA save area offsets (exgen, exmc, etc) */
-#define EX_R9		0
-#define EX_R10		8
-#define EX_R11		16
-#define EX_R12		24
-#define EX_R13		32
-#define EX_DAR		40
-#define EX_DSISR	48
-#define EX_CCR		52
-#define EX_CFAR		56
-#define EX_PPR		64
-#if defined(CONFIG_RELOCATABLE)
-#define EX_CTR		72
-.if EX_SIZE != 10
-	.error "EX_SIZE is wrong"
-.endif
-#else
-.if EX_SIZE != 9
-	.error "EX_SIZE is wrong"
-.endif
-#endif
-
-/*
- * Following are fixed section helper macros.
- *
- * EXC_REAL_BEGIN/END  - real, unrelocated exception vectors
- * EXC_VIRT_BEGIN/END  - virt (AIL), unrelocated exception vectors
- * TRAMP_REAL_BEGIN    - real, unrelocated helpers (virt may call these)
- * TRAMP_VIRT_BEGIN    - virt, unreloc helpers (in practice, real can use)
- * TRAMP_KVM_BEGIN     - KVM handlers, these are put into real, unrelocated
- * EXC_COMMON          - After switching to virtual, relocated mode.
- */
-
-#define EXC_REAL_BEGIN(name, start, size)			\
-	FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
-
-#define EXC_REAL_END(name, start, size)				\
-	FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
-
-#define EXC_VIRT_BEGIN(name, start, size)			\
-	FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
-
-#define EXC_VIRT_END(name, start, size)				\
-	FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
-
-#define EXC_COMMON_BEGIN(name)					\
-	USE_TEXT_SECTION();					\
-	.balign IFETCH_ALIGN_BYTES;				\
-	.global name;						\
-	_ASM_NOKPROBE_SYMBOL(name);				\
-	DEFINE_FIXED_SYMBOL(name);				\
-name:
-
-#define TRAMP_REAL_BEGIN(name)					\
-	FIXED_SECTION_ENTRY_BEGIN(real_trampolines, name)
-
-#define TRAMP_VIRT_BEGIN(name)					\
-	FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
-
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#define TRAMP_KVM_BEGIN(name)					\
-	TRAMP_VIRT_BEGIN(name)
-#else
-#define TRAMP_KVM_BEGIN(name)
-#endif
-
-#define EXC_REAL_NONE(start, size)				\
-	FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \
-	FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size)
-
-#define EXC_VIRT_NONE(start, size)				\
-	FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size); \
-	FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size)
-
-/*
- * We're short on space and time in the exception prolog, so we can't
- * use the normal LOAD_REG_IMMEDIATE macro to load the address of label.
- * Instead we get the base of the kernel from paca->kernelbase and or in the low
- * part of label. This requires that the label be within 64KB of kernelbase, and
- * that kernelbase be 64K aligned.
- */
-#define LOAD_HANDLER(reg, label)					\
-	ld	reg,PACAKBASE(r13);	/* get high part of &label */	\
-	ori	reg,reg,FIXED_SYMBOL_ABS_ADDR(label)
-
-#define __LOAD_HANDLER(reg, label)					\
-	ld	reg,PACAKBASE(r13);					\
-	ori	reg,reg,(ABS_ADDR(label))@l
-
-/*
- * Branches from unrelocated code (e.g., interrupts) to labels outside
- * head-y require >64K offsets.
- */
-#define __LOAD_FAR_HANDLER(reg, label)					\
-	ld	reg,PACAKBASE(r13);					\
-	ori	reg,reg,(ABS_ADDR(label))@l;				\
-	addis	reg,reg,(ABS_ADDR(label))@h
-
-/* Exception register prefixes */
-#define EXC_HV_OR_STD	2 /* depends on HVMODE */
-#define EXC_HV		1
-#define EXC_STD		0
-
-#if defined(CONFIG_RELOCATABLE)
-/*
- * If we support interrupts with relocation on AND we're a relocatable kernel,
- * we need to use CTR to get to the 2nd level handler.  So, save/restore it
- * when required.
- */
-#define SAVE_CTR(reg, area)	mfctr	reg ; 	std	reg,area+EX_CTR(r13)
-#define GET_CTR(reg, area) 			ld	reg,area+EX_CTR(r13)
-#define RESTORE_CTR(reg, area)	ld	reg,area+EX_CTR(r13) ; mtctr reg
-#else
-/* ...else CTR is unused and in register. */
-#define SAVE_CTR(reg, area)
-#define GET_CTR(reg, area) 	mfctr	reg
-#define RESTORE_CTR(reg, area)
-#endif
-
-/*
- * PPR save/restore macros used in exceptions-64s.S
- * Used for P7 or later processors
- */
-#define SAVE_PPR(area, ra)						\
-BEGIN_FTR_SECTION_NESTED(940)						\
-	ld	ra,area+EX_PPR(r13);	/* Read PPR from paca */	\
-	std	ra,_PPR(r1);						\
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
-
-#define RESTORE_PPR_PACA(area, ra)					\
-BEGIN_FTR_SECTION_NESTED(941)						\
-	ld	ra,area+EX_PPR(r13);					\
-	mtspr	SPRN_PPR,ra;						\
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941)
-
-/*
- * Get an SPR into a register if the CPU has the given feature
- */
-#define OPT_GET_SPR(ra, spr, ftr)					\
-BEGIN_FTR_SECTION_NESTED(943)						\
-	mfspr	ra,spr;							\
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Set an SPR from a register if the CPU has the given feature
- */
-#define OPT_SET_SPR(ra, spr, ftr)					\
-BEGIN_FTR_SECTION_NESTED(943)						\
-	mtspr	spr,ra;							\
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Save a register to the PACA if the CPU has the given feature
- */
-#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr)				\
-BEGIN_FTR_SECTION_NESTED(943)						\
-	std	ra,offset(r13);						\
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Branch to label using its 0xC000 address. This results in instruction
- * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
- * on using mtmsr rather than rfid.
- *
- * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than
- * load KBASE for a slight optimisation.
- */
-#define BRANCH_TO_C000(reg, label)					\
-	__LOAD_FAR_HANDLER(reg, label);					\
-	mtctr	reg;							\
-	bctr
-
-.macro INT_KVM_HANDLER name, vec, hsrr, area, skip
-	TRAMP_KVM_BEGIN(\name\()_kvm)
-	KVM_HANDLER \vec, \hsrr, \area, \skip
-.endm
-
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-/*
- * If hv is possible, interrupts come into to the hv version
- * of the kvmppc_interrupt code, which then jumps to the PR handler,
- * kvmppc_interrupt_pr, if the guest is a PR guest.
- */
-#define kvmppc_interrupt kvmppc_interrupt_hv
-#else
-#define kvmppc_interrupt kvmppc_interrupt_pr
-#endif
-
-.macro KVMTEST name, hsrr, n
-	lbz	r10,HSTATE_IN_GUEST(r13)
-	cmpwi	r10,0
-	bne	\name\()_kvm
-.endm
-
-.macro KVM_HANDLER vec, hsrr, area, skip
-	.if \skip
-	cmpwi	r10,KVM_GUEST_MODE_SKIP
-	beq	89f
-	.else
-BEGIN_FTR_SECTION_NESTED(947)
-	ld	r10,\area+EX_CFAR(r13)
-	std	r10,HSTATE_CFAR(r13)
-END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947)
-	.endif
-
-BEGIN_FTR_SECTION_NESTED(948)
-	ld	r10,\area+EX_PPR(r13)
-	std	r10,HSTATE_PPR(r13)
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
-	ld	r10,\area+EX_R10(r13)
-	std	r12,HSTATE_SCRATCH0(r13)
-	sldi	r12,r9,32
-	/* HSRR variants have the 0x2 bit added to their trap number */
-	.if \hsrr == EXC_HV_OR_STD
-	BEGIN_FTR_SECTION
-	ori	r12,r12,(\vec + 0x2)
-	FTR_SECTION_ELSE
-	ori	r12,r12,(\vec)
-	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-	.elseif \hsrr
-	ori	r12,r12,(\vec + 0x2)
-	.else
-	ori	r12,r12,(\vec)
-	.endif
-
-#ifdef CONFIG_RELOCATABLE
-	/*
-	 * KVM requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
-	 * outside the head section. CONFIG_RELOCATABLE KVM expects CTR
-	 * to be saved in HSTATE_SCRATCH1.
-	 */
-	mfctr	r9
-	std	r9,HSTATE_SCRATCH1(r13)
-	__LOAD_FAR_HANDLER(r9, kvmppc_interrupt)
-	mtctr	r9
-	ld	r9,\area+EX_R9(r13)
-	bctr
-#else
-	ld	r9,\area+EX_R9(r13)
-	b	kvmppc_interrupt
-#endif
-
-
-	.if \skip
-89:	mtocrf	0x80,r9
-	ld	r9,\area+EX_R9(r13)
-	ld	r10,\area+EX_R10(r13)
-	.if \hsrr == EXC_HV_OR_STD
-	BEGIN_FTR_SECTION
-	b	kvmppc_skip_Hinterrupt
-	FTR_SECTION_ELSE
-	b	kvmppc_skip_interrupt
-	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-	.elseif \hsrr
-	b	kvmppc_skip_Hinterrupt
-	.else
-	b	kvmppc_skip_interrupt
-	.endif
-	.endif
-.endm
-
-#else
-.macro KVMTEST name, hsrr, n
-.endm
-.macro KVM_HANDLER name, vec, hsrr, area, skip
-.endm
-#endif
-
-.macro INT_SAVE_SRR_AND_JUMP label, hsrr, set_ri
-	ld	r10,PACAKMSR(r13)	/* get MSR value for kernel */
-	.if ! \set_ri
-	xori	r10,r10,MSR_RI		/* Clear MSR_RI */
-	.endif
-	.if \hsrr == EXC_HV_OR_STD
-	BEGIN_FTR_SECTION
-	mfspr	r11,SPRN_HSRR0		/* save HSRR0 */
-	mfspr	r12,SPRN_HSRR1		/* and HSRR1 */
-	mtspr	SPRN_HSRR1,r10
-	FTR_SECTION_ELSE
-	mfspr	r11,SPRN_SRR0		/* save SRR0 */
-	mfspr	r12,SPRN_SRR1		/* and SRR1 */
-	mtspr	SPRN_SRR1,r10
-	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-	.elseif \hsrr
-	mfspr	r11,SPRN_HSRR0		/* save HSRR0 */
-	mfspr	r12,SPRN_HSRR1		/* and HSRR1 */
-	mtspr	SPRN_HSRR1,r10
-	.else
-	mfspr	r11,SPRN_SRR0		/* save SRR0 */
-	mfspr	r12,SPRN_SRR1		/* and SRR1 */
-	mtspr	SPRN_SRR1,r10
-	.endif
-	LOAD_HANDLER(r10, \label\())
-	.if \hsrr == EXC_HV_OR_STD
-	BEGIN_FTR_SECTION
-	mtspr	SPRN_HSRR0,r10
-	HRFI_TO_KERNEL
-	FTR_SECTION_ELSE
-	mtspr	SPRN_SRR0,r10
-	RFI_TO_KERNEL
-	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-	.elseif \hsrr
-	mtspr	SPRN_HSRR0,r10
-	HRFI_TO_KERNEL
-	.else
-	mtspr	SPRN_SRR0,r10
-	RFI_TO_KERNEL
-	.endif
-	b	.	/* prevent speculative execution */
-.endm
-
-/* INT_SAVE_SRR_AND_JUMP works for real or virt, this is faster but virt only */
-.macro INT_VIRT_SAVE_SRR_AND_JUMP label, hsrr
-#ifdef CONFIG_RELOCATABLE
-	.if \hsrr == EXC_HV_OR_STD
-	BEGIN_FTR_SECTION
-	mfspr	r11,SPRN_HSRR0	/* save HSRR0 */
-	FTR_SECTION_ELSE
-	mfspr	r11,SPRN_SRR0	/* save SRR0 */
-	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-	.elseif \hsrr
-	mfspr	r11,SPRN_HSRR0	/* save HSRR0 */
-	.else
-	mfspr	r11,SPRN_SRR0	/* save SRR0 */
-	.endif
-	LOAD_HANDLER(r12, \label\())
-	mtctr	r12
-	.if \hsrr == EXC_HV_OR_STD
-	BEGIN_FTR_SECTION
-	mfspr	r12,SPRN_HSRR1	/* and HSRR1 */
-	FTR_SECTION_ELSE
-	mfspr	r12,SPRN_SRR1	/* and HSRR1 */
-	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-	.elseif \hsrr
-	mfspr	r12,SPRN_HSRR1	/* and HSRR1 */
-	.else
-	mfspr	r12,SPRN_SRR1	/* and HSRR1 */
-	.endif
-	li	r10,MSR_RI
-	mtmsrd 	r10,1		/* Set RI (EE=0) */
-	bctr
-#else
-	.if \hsrr == EXC_HV_OR_STD
-	BEGIN_FTR_SECTION
-	mfspr	r11,SPRN_HSRR0		/* save HSRR0 */
-	mfspr	r12,SPRN_HSRR1		/* and HSRR1 */
-	FTR_SECTION_ELSE
-	mfspr	r11,SPRN_SRR0		/* save SRR0 */
-	mfspr	r12,SPRN_SRR1		/* and SRR1 */
-	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-	.elseif \hsrr
-	mfspr	r11,SPRN_HSRR0		/* save HSRR0 */
-	mfspr	r12,SPRN_HSRR1		/* and HSRR1 */
-	.else
-	mfspr	r11,SPRN_SRR0		/* save SRR0 */
-	mfspr	r12,SPRN_SRR1		/* and SRR1 */
-	.endif
-	li	r10,MSR_RI
-	mtmsrd 	r10,1			/* Set RI (EE=0) */
-	b	\label
-#endif
-.endm
-
-/*
- * This is the BOOK3S interrupt entry code macro.
- *
- * This can result in one of several things happening:
- * - Branch to the _common handler, relocated, in virtual mode.
- *   These are normal interrupts (synchronous and asynchronous) handled by
- *   the kernel.
- * - Branch to KVM, relocated but real mode interrupts remain in real mode.
- *   These occur when HSTATE_IN_GUEST is set. The interrupt may be caused by
- *   / intended for host or guest kernel, but KVM must always be involved
- *   because the machine state is set for guest execution.
- * - Branch to the masked handler, unrelocated.
- *   These occur when maskable asynchronous interrupts are taken with the
- *   irq_soft_mask set.
- * - Branch to an "early" handler in real mode but relocated.
- *   This is done if early=1. MCE and HMI use these to handle errors in real
- *   mode.
- * - Fall through and continue executing in real, unrelocated mode.
- *   This is done if early=2.
- */
-.macro INT_HANDLER name, vec, ool=0, early=0, virt=0, hsrr=0, area=PACA_EXGEN, ri=1, dar=0, dsisr=0, bitmask=0, kvm=0
-	SET_SCRATCH0(r13)			/* save r13 */
-	GET_PACA(r13)
-	std	r9,\area\()+EX_R9(r13)		/* save r9 */
-	OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
-	HMT_MEDIUM
-	std	r10,\area\()+EX_R10(r13)	/* save r10 - r12 */
-	OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
-	.if \ool
-	.if !\virt
-	b	tramp_real_\name
-	.pushsection .text
-	TRAMP_REAL_BEGIN(tramp_real_\name)
-	.else
-	b	tramp_virt_\name
-	.pushsection .text
-	TRAMP_VIRT_BEGIN(tramp_virt_\name)
-	.endif
-	.endif
-
-	OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
-	OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR)
-	INTERRUPT_TO_KERNEL
-	SAVE_CTR(r10, \area\())
-	mfcr	r9
-	.if \kvm
-		KVMTEST \name \hsrr \vec
-	.endif
-	.if \bitmask
-		lbz	r10,PACAIRQSOFTMASK(r13)
-		andi.	r10,r10,\bitmask
-		/* Associate vector numbers with bits in paca->irq_happened */
-		.if \vec == 0x500 || \vec == 0xea0
-		li	r10,PACA_IRQ_EE
-		.elseif \vec == 0x900
-		li	r10,PACA_IRQ_DEC
-		.elseif \vec == 0xa00 || \vec == 0xe80
-		li	r10,PACA_IRQ_DBELL
-		.elseif \vec == 0xe60
-		li	r10,PACA_IRQ_HMI
-		.elseif \vec == 0xf00
-		li	r10,PACA_IRQ_PMI
-		.else
-		.abort "Bad maskable vector"
-		.endif
-
-		.if \hsrr == EXC_HV_OR_STD
-		BEGIN_FTR_SECTION
-		bne	masked_Hinterrupt
-		FTR_SECTION_ELSE
-		bne	masked_interrupt
-		ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-		.elseif \hsrr
-		bne	masked_Hinterrupt
-		.else
-		bne	masked_interrupt
-		.endif
-	.endif
-
-	std	r11,\area\()+EX_R11(r13)
-	std	r12,\area\()+EX_R12(r13)
-
-	/*
-	 * DAR/DSISR, SCRATCH0 must be read before setting MSR[RI],
-	 * because a d-side MCE will clobber those registers so is
-	 * not recoverable if they are live.
-	 */
-	GET_SCRATCH0(r10)
-	std	r10,\area\()+EX_R13(r13)
-	.if \dar
-	.if \hsrr
-	mfspr	r10,SPRN_HDAR
-	.else
-	mfspr	r10,SPRN_DAR
-	.endif
-	std	r10,\area\()+EX_DAR(r13)
-	.endif
-	.if \dsisr
-	.if \hsrr
-	mfspr	r10,SPRN_HDSISR
-	.else
-	mfspr	r10,SPRN_DSISR
-	.endif
-	stw	r10,\area\()+EX_DSISR(r13)
-	.endif
-
-	.if \early == 2
-	/* nothing more */
-	.elseif \early
-	mfctr	r10			/* save ctr, even for !RELOCATABLE */
-	BRANCH_TO_C000(r11, \name\()_early_common)
-	.elseif !\virt
-	INT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr, \ri
-	.else
-	INT_VIRT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr
-	.endif
-	.if \ool
-	.popsection
-	.endif
-.endm
-
-/*
- * On entry r13 points to the paca, r9-r13 are saved in the paca,
- * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
- * SRR1, and relocation is on.
- *
- * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
- * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
- */
-.macro INT_COMMON vec, area, stack, kaup, reconcile, dar, dsisr
-	.if \stack
-	andi.	r10,r12,MSR_PR		/* See if coming from user	*/
-	mr	r10,r1			/* Save r1			*/
-	subi	r1,r1,INT_FRAME_SIZE	/* alloc frame on kernel stack	*/
-	beq-	100f
-	ld	r1,PACAKSAVE(r13)	/* kernel stack to use		*/
-100:	tdgei	r1,-INT_FRAME_SIZE	/* trap if r1 is in userspace	*/
-	EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
-	.endif
-
-	std	r9,_CCR(r1)		/* save CR in stackframe	*/
-	std	r11,_NIP(r1)		/* save SRR0 in stackframe	*/
-	std	r12,_MSR(r1)		/* save SRR1 in stackframe	*/
-	std	r10,0(r1)		/* make stack chain pointer	*/
-	std	r0,GPR0(r1)		/* save r0 in stackframe	*/
-	std	r10,GPR1(r1)		/* save r1 in stackframe	*/
-
-	.if \stack
-	.if \kaup
-	kuap_save_amr_and_lock r9, r10, cr1, cr0
-	.endif
-	beq	101f			/* if from kernel mode		*/
-	ACCOUNT_CPU_USER_ENTRY(r13, r9, r10)
-	SAVE_PPR(\area, r9)
-101:
-	.else
-	.if \kaup
-	kuap_save_amr_and_lock r9, r10, cr1
-	.endif
-	.endif
-
-	/* Save original regs values from save area to stack frame. */
-	ld	r9,\area+EX_R9(r13)	/* move r9, r10 to stackframe	*/
-	ld	r10,\area+EX_R10(r13)
-	std	r9,GPR9(r1)
-	std	r10,GPR10(r1)
-	ld	r9,\area+EX_R11(r13)	/* move r11 - r13 to stackframe	*/
-	ld	r10,\area+EX_R12(r13)
-	ld	r11,\area+EX_R13(r13)
-	std	r9,GPR11(r1)
-	std	r10,GPR12(r1)
-	std	r11,GPR13(r1)
-	.if \dar
-	.if \dar == 2
-	ld	r10,_NIP(r1)
-	.else
-	ld	r10,\area+EX_DAR(r13)
-	.endif
-	std	r10,_DAR(r1)
-	.endif
-	.if \dsisr
-	.if \dsisr == 2
-	ld	r10,_MSR(r1)
-	lis	r11,DSISR_SRR1_MATCH_64S@h
-	and	r10,r10,r11
-	.else
-	lwz	r10,\area+EX_DSISR(r13)
-	.endif
-	std	r10,_DSISR(r1)
-	.endif
-BEGIN_FTR_SECTION_NESTED(66)
-	ld	r10,\area+EX_CFAR(r13)
-	std	r10,ORIG_GPR3(r1)
-END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
-	GET_CTR(r10, \area)
-	std	r10,_CTR(r1)
-	std	r2,GPR2(r1)		/* save r2 in stackframe	*/
-	SAVE_4GPRS(3, r1)		/* save r3 - r6 in stackframe   */
-	SAVE_2GPRS(7, r1)		/* save r7, r8 in stackframe	*/
-	mflr	r9			/* Get LR, later save to stack	*/
-	ld	r2,PACATOC(r13)		/* get kernel TOC into r2	*/
-	std	r9,_LINK(r1)
-	lbz	r10,PACAIRQSOFTMASK(r13)
-	mfspr	r11,SPRN_XER		/* save XER in stackframe	*/
-	std	r10,SOFTE(r1)
-	std	r11,_XER(r1)
-	li	r9,(\vec)+1
-	std	r9,_TRAP(r1)		/* set trap number		*/
-	li	r10,0
-	ld	r11,exception_marker@toc(r2)
-	std	r10,RESULT(r1)		/* clear regs->result		*/
-	std	r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame	*/
-
-	.if \stack
-	ACCOUNT_STOLEN_TIME
-	.endif
-
-	.if \reconcile
-	RECONCILE_IRQ_STATE(r10, r11)
-	.endif
-.endm
-
-/*
- * Restore all registers including H/SRR0/1 saved in a stack frame of a
- * standard exception.
- */
-.macro EXCEPTION_RESTORE_REGS hsrr
-	/* Move original SRR0 and SRR1 into the respective regs */
-	ld	r9,_MSR(r1)
-	.if \hsrr == EXC_HV_OR_STD
-	.error "EXC_HV_OR_STD Not implemented for EXCEPTION_RESTORE_REGS"
-	.endif
-	.if \hsrr
-	mtspr	SPRN_HSRR1,r9
-	.else
-	mtspr	SPRN_SRR1,r9
-	.endif
-	ld	r9,_NIP(r1)
-	.if \hsrr
-	mtspr	SPRN_HSRR0,r9
-	.else
-	mtspr	SPRN_SRR0,r9
-	.endif
-	ld	r9,_CTR(r1)
-	mtctr	r9
-	ld	r9,_XER(r1)
-	mtxer	r9
-	ld	r9,_LINK(r1)
-	mtlr	r9
-	ld	r9,_CCR(r1)
-	mtcr	r9
-	REST_8GPRS(2, r1)
-	REST_4GPRS(10, r1)
-	REST_GPR(0, r1)
-	/* restore original r1. */
-	ld	r1,GPR1(r1)
-.endm
-
-#define RUNLATCH_ON				\
-BEGIN_FTR_SECTION				\
-	ld	r3, PACA_THREAD_INFO(r13);	\
-	ld	r4,TI_LOCAL_FLAGS(r3);		\
-	andi.	r0,r4,_TLF_RUNLATCH;		\
-	beql	ppc64_runlatch_on_trampoline;	\
-END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
-
-/*
- * When the idle code in power4_idle puts the CPU into NAP mode,
- * it has to do so in a loop, and relies on the external interrupt
- * and decrementer interrupt entry code to get it out of the loop.
- * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags
- * to signal that it is in the loop and needs help to get out.
- */
-#ifdef CONFIG_PPC_970_NAP
-#define FINISH_NAP				\
-BEGIN_FTR_SECTION				\
-	ld	r11, PACA_THREAD_INFO(r13);	\
-	ld	r9,TI_LOCAL_FLAGS(r11);		\
-	andi.	r10,r9,_TLF_NAPPING;		\
-	bnel	power4_fixup_nap;		\
-END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
-#else
-#define FINISH_NAP
-#endif
-
-#define EXC_COMMON(name, realvec, hdlr)					\
-	EXC_COMMON_BEGIN(name);						\
-	INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ;			\
-	bl	save_nvgprs;						\
-	addi	r3,r1,STACK_FRAME_OVERHEAD;				\
-	bl	hdlr;							\
-	b	ret_from_except
-
-/*
- * Like EXC_COMMON, but for exceptions that can occur in the idle task and
- * therefore need the special idle handling (finish nap and runlatch)
- */
-#define EXC_COMMON_ASYNC(name, realvec, hdlr)				\
-	EXC_COMMON_BEGIN(name);						\
-	INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ;			\
-	FINISH_NAP;							\
-	RUNLATCH_ON;							\
-	addi	r3,r1,STACK_FRAME_OVERHEAD;				\
-	bl	hdlr;							\
-	b	ret_from_except_lite
-
-
-/*
- * There are a few constraints to be concerned with.
- * - Real mode exceptions code/data must be located at their physical location.
- * - Virtual mode exceptions must be mapped at their 0xc000... location.
- * - Fixed location code must not call directly beyond the __end_interrupts
- *   area when built with CONFIG_RELOCATABLE. LOAD_HANDLER / bctr sequence
- *   must be used.
- * - LOAD_HANDLER targets must be within first 64K of physical 0 /
- *   virtual 0xc00...
- * - Conditional branch targets must be within +/-32K of caller.
- *
- * "Virtual exceptions" run with relocation on (MSR_IR=1, MSR_DR=1), and
- * therefore don't have to run in physically located code or rfid to
- * virtual mode kernel code. However on relocatable kernels they do have
- * to branch to KERNELBASE offset because the rest of the kernel (outside
- * the exception vectors) may be located elsewhere.
- *
- * Virtual exceptions correspond with physical, except their entry points
- * are offset by 0xc000000000000000 and also tend to get an added 0x4000
- * offset applied. Virtual exceptions are enabled with the Alternate
- * Interrupt Location (AIL) bit set in the LPCR. However this does not
- * guarantee they will be delivered virtually. Some conditions (see the ISA)
- * cause exceptions to be delivered in real mode.
- *
- * It's impossible to receive interrupts below 0x300 via AIL.
- *
- * KVM: None of the virtual exceptions are from the guest. Anything that
- * escalated to HV=1 from HV=0 is delivered via real mode handlers.
- *
- *
- * We layout physical memory as follows:
- * 0x0000 - 0x00ff : Secondary processor spin code
- * 0x0100 - 0x18ff : Real mode pSeries interrupt vectors
- * 0x1900 - 0x3fff : Real mode trampolines
- * 0x4000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors
- * 0x5900 - 0x6fff : Relon mode trampolines
- * 0x7000 - 0x7fff : FWNMI data area
- * 0x8000 -   .... : Common interrupt handlers, remaining early
- *                   setup code, rest of kernel.
- *
- * We could reclaim 0x4000-0x42ff for real mode trampolines if the space
- * is necessary. Until then it's more consistent to explicitly put VIRT_NONE
- * vectors there.
- */
-OPEN_FIXED_SECTION(real_vectors,        0x0100, 0x1900)
-OPEN_FIXED_SECTION(real_trampolines,    0x1900, 0x4000)
-OPEN_FIXED_SECTION(virt_vectors,        0x4000, 0x5900)
-OPEN_FIXED_SECTION(virt_trampolines,    0x5900, 0x7000)
-
-#ifdef CONFIG_PPC_POWERNV
-	.globl start_real_trampolines
-	.globl end_real_trampolines
-	.globl start_virt_trampolines
-	.globl end_virt_trampolines
-#endif
-
-#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
-/*
- * Data area reserved for FWNMI option.
- * This address (0x7000) is fixed by the RPA.
- * pseries and powernv need to keep the whole page from
- * 0x7000 to 0x8000 free for use by the firmware
- */
-ZERO_FIXED_SECTION(fwnmi_page,          0x7000, 0x8000)
-OPEN_TEXT_SECTION(0x8000)
-#else
-OPEN_TEXT_SECTION(0x7000)
-#endif
-
-USE_FIXED_SECTION(real_vectors)
-
-/*
- * This is the start of the interrupt handlers for pSeries
- * This code runs with relocation off.
- * Code from here to __end_interrupts gets copied down to real
- * address 0x100 when we are running a relocatable kernel.
- * Therefore any relative branches in this section must only
- * branch to labels in this section.
- */
-	.globl __start_interrupts
-__start_interrupts:
-
-/* No virt vectors corresponding with 0x0..0x100 */
-EXC_VIRT_NONE(0x4000, 0x100)
-
-
-EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
-#ifdef CONFIG_PPC_P7_NAP
-	/*
-	 * If running native on arch 2.06 or later, check if we are waking up
-	 * from nap/sleep/winkle, and branch to idle handler. This tests SRR1
-	 * bits 46:47. A non-0 value indicates that we are coming from a power
-	 * saving state. The idle wakeup handler initially runs in real mode,
-	 * but we branch to the 0xc000... address so we can turn on relocation
-	 * with mtmsrd later, after SPRs are restored.
-	 *
-	 * Careful to minimise cost for the fast path (idle wakeup) while
-	 * also avoiding clobbering CFAR for the debug path (non-idle).
-	 *
-	 * For the idle wake case volatile registers can be clobbered, which
-	 * is why we use those initially. If it turns out to not be an idle
-	 * wake, carefully put everything back the way it was, so we can use
-	 * common exception macros to handle it.
-	 */
-BEGIN_FTR_SECTION
-	SET_SCRATCH0(r13)
-	GET_PACA(r13)
-	std	r3,PACA_EXNMI+0*8(r13)
-	std	r4,PACA_EXNMI+1*8(r13)
-	std	r5,PACA_EXNMI+2*8(r13)
-	mfspr	r3,SPRN_SRR1
-	mfocrf	r4,0x80
-	rlwinm.	r5,r3,47-31,30,31
-	bne+	system_reset_idle_wake
-	/* Not powersave wakeup. Restore regs for regular interrupt handler. */
-	mtocrf	0x80,r4
-	ld	r3,PACA_EXNMI+0*8(r13)
-	ld	r4,PACA_EXNMI+1*8(r13)
-	ld	r5,PACA_EXNMI+2*8(r13)
-	GET_SCRATCH0(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif
-
-	INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0, kvm=1
-	/*
-	 * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
-	 * being used, so a nested NMI exception would corrupt it.
-	 *
-	 * In theory, we should not enable relocation here if it was disabled
-	 * in SRR1, because the MMU may not be configured to support it (e.g.,
-	 * SLB may have been cleared). In practice, there should only be a few
-	 * small windows where that's the case, and sreset is considered to
-	 * be dangerous anyway.
-	 */
-EXC_REAL_END(system_reset, 0x100, 0x100)
-EXC_VIRT_NONE(0x4100, 0x100)
-INT_KVM_HANDLER system_reset 0x100, EXC_STD, PACA_EXNMI, 0
-
-#ifdef CONFIG_PPC_P7_NAP
-TRAMP_REAL_BEGIN(system_reset_idle_wake)
-	/* We are waking up from idle, so may clobber any volatile register */
-	cmpwi	cr1,r5,2
-	bltlr	cr1	/* no state loss, return to idle caller with r3=SRR1 */
-	BRANCH_TO_C000(r12, DOTSYM(idle_return_gpr_loss))
-#endif
-
-#ifdef CONFIG_PPC_PSERIES
-/*
- * Vectors for the FWNMI option.  Share common code.
- */
-TRAMP_REAL_BEGIN(system_reset_fwnmi)
-	/* See comment at system_reset exception, don't turn on RI */
-	INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0
-
-#endif /* CONFIG_PPC_PSERIES */
-
-EXC_COMMON_BEGIN(system_reset_common)
-	/*
-	 * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able
-	 * to recover, but nested NMI will notice in_nmi and not recover
-	 * because of the use of the NMI stack. in_nmi reentrancy is tested in
-	 * system_reset_exception.
-	 */
-	lhz	r10,PACA_IN_NMI(r13)
-	addi	r10,r10,1
-	sth	r10,PACA_IN_NMI(r13)
-	li	r10,MSR_RI
-	mtmsrd 	r10,1
-
-	mr	r10,r1
-	ld	r1,PACA_NMI_EMERG_SP(r13)
-	subi	r1,r1,INT_FRAME_SIZE
-	INT_COMMON 0x100, PACA_EXNMI, 0, 1, 0, 0, 0
-	bl	save_nvgprs
-	/*
-	 * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
-	 * the right thing. We do not want to reconcile because that goes
-	 * through irq tracing which we don't want in NMI.
-	 *
-	 * Save PACAIRQHAPPENED because some code will do a hard disable
-	 * (e.g., xmon). So we want to restore this back to where it was
-	 * when we return. DAR is unused in the stack, so save it there.
-	 */
-	li	r10,IRQS_ALL_DISABLED
-	stb	r10,PACAIRQSOFTMASK(r13)
-	lbz	r10,PACAIRQHAPPENED(r13)
-	std	r10,_DAR(r1)
-
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	system_reset_exception
-
-	/* Clear MSR_RI before setting SRR0 and SRR1. */
-	li	r9,0
-	mtmsrd	r9,1
-
-	/*
-	 * MSR_RI is clear, now we can decrement paca->in_nmi.
-	 */
-	lhz	r10,PACA_IN_NMI(r13)
-	subi	r10,r10,1
-	sth	r10,PACA_IN_NMI(r13)
-
-	/*
-	 * Restore soft mask settings.
-	 */
-	ld	r10,_DAR(r1)
-	stb	r10,PACAIRQHAPPENED(r13)
-	ld	r10,SOFTE(r1)
-	stb	r10,PACAIRQSOFTMASK(r13)
-
-	EXCEPTION_RESTORE_REGS EXC_STD
-	RFI_TO_USER_OR_KERNEL
-
-
-EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
-	INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
-	/*
-	 * MSR_RI is not enabled, because PACA_EXMC is being used, so a
-	 * nested machine check corrupts it. machine_check_common enables
-	 * MSR_RI.
-	 */
-EXC_REAL_END(machine_check, 0x200, 0x100)
-EXC_VIRT_NONE(0x4200, 0x100)
-
-#ifdef CONFIG_PPC_PSERIES
-TRAMP_REAL_BEGIN(machine_check_fwnmi)
-	/* See comment at machine_check exception, don't turn on RI */
-	INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
-#endif
-
-INT_KVM_HANDLER machine_check 0x200, EXC_STD, PACA_EXMC, 1
-
-#define MACHINE_CHECK_HANDLER_WINDUP			\
-	/* Clear MSR_RI before setting SRR0 and SRR1. */\
-	li	r9,0;					\
-	mtmsrd	r9,1;		/* Clear MSR_RI */	\
-	/* Decrement paca->in_mce now RI is clear. */	\
-	lhz	r12,PACA_IN_MCE(r13);			\
-	subi	r12,r12,1;				\
-	sth	r12,PACA_IN_MCE(r13);			\
-	EXCEPTION_RESTORE_REGS EXC_STD
-
-EXC_COMMON_BEGIN(machine_check_early_common)
-	mtctr	r10			/* Restore ctr */
-	mfspr	r11,SPRN_SRR0
-	mfspr	r12,SPRN_SRR1
-
-	/*
-	 * Switch to mc_emergency stack and handle re-entrancy (we limit
-	 * the nested MCE upto level 4 to avoid stack overflow).
-	 * Save MCE registers srr1, srr0, dar and dsisr and then set ME=1
-	 *
-	 * We use paca->in_mce to check whether this is the first entry or
-	 * nested machine check. We increment paca->in_mce to track nested
-	 * machine checks.
-	 *
-	 * If this is the first entry then set stack pointer to
-	 * paca->mc_emergency_sp, otherwise r1 is already pointing to
-	 * stack frame on mc_emergency stack.
-	 *
-	 * NOTE: We are here with MSR_ME=0 (off), which means we risk a
-	 * checkstop if we get another machine check exception before we do
-	 * rfid with MSR_ME=1.
-	 *
-	 * This interrupt can wake directly from idle. If that is the case,
-	 * the machine check is handled then the idle wakeup code is called
-	 * to restore state.
-	 */
-	lhz	r10,PACA_IN_MCE(r13)
-	cmpwi	r10,0			/* Are we in nested machine check */
-	cmpwi	cr1,r10,MAX_MCE_DEPTH	/* Are we at maximum nesting */
-	addi	r10,r10,1		/* increment paca->in_mce */
-	sth	r10,PACA_IN_MCE(r13)
-
-	mr	r10,r1			/* Save r1 */
-	bne	1f
-	/* First machine check entry */
-	ld	r1,PACAMCEMERGSP(r13)	/* Use MC emergency stack */
-1:	/* Limit nested MCE to level 4 to avoid stack overflow */
-	bgt	cr1,unrecoverable_mce	/* Check if we hit limit of 4 */
-	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame */
-
-	/* We don't touch AMR here, we never go to virtual mode */
-	INT_COMMON 0x200, PACA_EXMC, 0, 0, 0, 1, 1
-
-BEGIN_FTR_SECTION
-	bl	enable_machine_check
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-	li	r10,MSR_RI
-	mtmsrd	r10,1
-
-	bl	save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	machine_check_early
-	std	r3,RESULT(r1)	/* Save result */
-	ld	r12,_MSR(r1)
-
-#ifdef CONFIG_PPC_P7_NAP
-	/*
-	 * Check if thread was in power saving mode. We come here when any
-	 * of the following is true:
-	 * a. thread wasn't in power saving mode
-	 * b. thread was in power saving mode with no state loss,
-	 *    supervisor state loss or hypervisor state loss.
-	 *
-	 * Go back to nap/sleep/winkle mode again if (b) is true.
-	 */
-BEGIN_FTR_SECTION
-	rlwinm.	r11,r12,47-31,30,31
-	bne	machine_check_idle_common
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif
-
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-	/*
-	 * Check if we are coming from guest. If yes, then run the normal
-	 * exception handler which will take the
-	 * machine_check_kvm->kvmppc_interrupt branch to deliver the MC event
-	 * to guest.
-	 */
-	lbz	r11,HSTATE_IN_GUEST(r13)
-	cmpwi	r11,0			/* Check if coming from guest */
-	bne	mce_deliver		/* continue if we are. */
-#endif
-
-	/*
-	 * Check if we are coming from userspace. If yes, then run the normal
-	 * exception handler which will deliver the MC event to this kernel.
-	 */
-	andi.	r11,r12,MSR_PR		/* See if coming from user. */
-	bne	mce_deliver		/* continue in V mode if we are. */
-
-	/*
-	 * At this point we are coming from kernel context.
-	 * Queue up the MCE event and return from the interrupt.
-	 * But before that, check if this is an un-recoverable exception.
-	 * If yes, then stay on emergency stack and panic.
-	 */
-	andi.	r11,r12,MSR_RI
-	beq	unrecoverable_mce
-
-	/*
-	 * Check if we have successfully handled/recovered from error, if not
-	 * then stay on emergency stack and panic.
-	 */
-	ld	r3,RESULT(r1)	/* Load result */
-	cmpdi	r3,0		/* see if we handled MCE successfully */
-	beq	unrecoverable_mce /* if !handled then panic */
-
-	/*
-	 * Return from MC interrupt.
-	 * Queue up the MCE event so that we can log it later, while
-	 * returning from kernel or opal call.
-	 */
-	bl	machine_check_queue_event
-	MACHINE_CHECK_HANDLER_WINDUP
-	RFI_TO_KERNEL
-
-mce_deliver:
-	/*
-	 * This is a host user or guest MCE. Restore all registers, then
-	 * run the "late" handler. For host user, this will run the
-	 * machine_check_exception handler in virtual mode like a normal
-	 * interrupt handler. For guest, this will trigger the KVM test
-	 * and branch to the KVM interrupt similarly to other interrupts.
-	 */
-BEGIN_FTR_SECTION
-	ld	r10,ORIG_GPR3(r1)
-	mtspr	SPRN_CFAR,r10
-END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
-	MACHINE_CHECK_HANDLER_WINDUP
-	/* See comment at machine_check exception, don't turn on RI */
-	INT_HANDLER machine_check, 0x200, area=PACA_EXMC, ri=0, dar=1, dsisr=1, kvm=1
-
-EXC_COMMON_BEGIN(machine_check_common)
-	/*
-	 * Machine check is different because we use a different
-	 * save area: PACA_EXMC instead of PACA_EXGEN.
-	 */
-	INT_COMMON 0x200, PACA_EXMC, 1, 1, 1, 1, 1
-	FINISH_NAP
-	/* Enable MSR_RI when finished with PACA_EXMC */
-	li	r10,MSR_RI
-	mtmsrd 	r10,1
-	bl	save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	machine_check_exception
-	b	ret_from_except
-
-#ifdef CONFIG_PPC_P7_NAP
-/*
- * This is an idle wakeup. Low level machine check has already been
- * done. Queue the event then call the idle code to do the wake up.
- */
-EXC_COMMON_BEGIN(machine_check_idle_common)
-	bl	machine_check_queue_event
-
-	/*
-	 * GPR-loss wakeups are relatively straightforward, because the
-	 * idle sleep code has saved all non-volatile registers on its
-	 * own stack, and r1 in PACAR1.
-	 *
-	 * For no-loss wakeups the r1 and lr registers used by the
-	 * early machine check handler have to be restored first. r2 is
-	 * the kernel TOC, so no need to restore it.
-	 *
-	 * Then decrement MCE nesting after finishing with the stack.
-	 */
-	ld	r3,_MSR(r1)
-	ld	r4,_LINK(r1)
-	ld	r1,GPR1(r1)
-
-	lhz	r11,PACA_IN_MCE(r13)
-	subi	r11,r11,1
-	sth	r11,PACA_IN_MCE(r13)
-
-	mtlr	r4
-	rlwinm	r10,r3,47-31,30,31
-	cmpwi	cr1,r10,2
-	bltlr	cr1	/* no state loss, return to idle caller with r3=SRR1 */
-	b	idle_return_gpr_loss
-#endif
-
-EXC_COMMON_BEGIN(unrecoverable_mce)
-	/*
-	 * We are going down. But there are chances that we might get hit by
-	 * another MCE during panic path and we may run into unstable state
-	 * with no way out. Hence, turn ME bit off while going down, so that
-	 * when another MCE is hit during panic path, system will checkstop
-	 * and hypervisor will get restarted cleanly by SP.
-	 */
-BEGIN_FTR_SECTION
-	li	r10,0 /* clear MSR_RI */
-	mtmsrd	r10,1
-	bl	disable_machine_check
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-	ld	r10,PACAKMSR(r13)
-	li	r3,MSR_ME
-	andc	r10,r10,r3
-	mtmsrd	r10
-
-	/* Invoke machine_check_exception to print MCE event and panic. */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	machine_check_exception
-
-	/*
-	 * We will not reach here. Even if we did, there is no way out.
-	 * Call unrecoverable_exception and die.
-	 */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	unrecoverable_exception
-	b	.
-
-
-EXC_REAL_BEGIN(data_access, 0x300, 0x80)
-	INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1
-EXC_REAL_END(data_access, 0x300, 0x80)
-EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
-	INT_HANDLER data_access, 0x300, ool=1, virt=1, dar=1, dsisr=1
-EXC_VIRT_END(data_access, 0x4300, 0x80)
-INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1
-EXC_COMMON_BEGIN(data_access_common)
-	/*
-	 * Here r13 points to the paca, r9 contains the saved CR,
-	 * SRR0 and SRR1 are saved in r11 and r12,
-	 * r9 - r13 are saved in paca->exgen.
-	 * EX_DAR and EX_DSISR have saved DAR/DSISR
-	 */
-	INT_COMMON 0x300, PACA_EXGEN, 1, 1, 1, 1, 1
-	ld	r4,_DAR(r1)
-	ld	r5,_DSISR(r1)
-BEGIN_MMU_FTR_SECTION
-	ld	r6,_MSR(r1)
-	li	r3,0x300
-	b	do_hash_page		/* Try to handle as hpte fault */
-MMU_FTR_SECTION_ELSE
-	b	handle_page_fault
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
-
-
-EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
-	INT_HANDLER data_access_slb, 0x380, ool=1, area=PACA_EXSLB, dar=1, kvm=1
-EXC_REAL_END(data_access_slb, 0x380, 0x80)
-EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
-	INT_HANDLER data_access_slb, 0x380, virt=1, area=PACA_EXSLB, dar=1
-EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
-INT_KVM_HANDLER data_access_slb, 0x380, EXC_STD, PACA_EXSLB, 1
-EXC_COMMON_BEGIN(data_access_slb_common)
-	INT_COMMON 0x380, PACA_EXSLB, 1, 1, 0, 1, 0
-	ld	r4,_DAR(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-BEGIN_MMU_FTR_SECTION
-	/* HPT case, do SLB fault */
-	bl	do_slb_fault
-	cmpdi	r3,0
-	bne-	1f
-	b	fast_exception_return
-1:	/* Error case */
-MMU_FTR_SECTION_ELSE
-	/* Radix case, access is outside page table range */
-	li	r3,-EFAULT
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
-	std	r3,RESULT(r1)
-	bl	save_nvgprs
-	RECONCILE_IRQ_STATE(r10, r11)
-	ld	r4,_DAR(r1)
-	ld	r5,RESULT(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	do_bad_slb_fault
-	b	ret_from_except
-
-
-EXC_REAL_BEGIN(instruction_access, 0x400, 0x80)
-	INT_HANDLER instruction_access, 0x400, ool=1, kvm=1
-EXC_REAL_END(instruction_access, 0x400, 0x80)
-EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80)
-	INT_HANDLER instruction_access, 0x400, virt=1
-EXC_VIRT_END(instruction_access, 0x4400, 0x80)
-INT_KVM_HANDLER instruction_access, 0x400, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON_BEGIN(instruction_access_common)
-	INT_COMMON 0x400, PACA_EXGEN, 1, 1, 1, 2, 2
-	ld	r4,_DAR(r1)
-	ld	r5,_DSISR(r1)
-BEGIN_MMU_FTR_SECTION
-	ld      r6,_MSR(r1)
-	li	r3,0x400
-	b	do_hash_page		/* Try to handle as hpte fault */
-MMU_FTR_SECTION_ELSE
-	b	handle_page_fault
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
-
-
-EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
-	INT_HANDLER instruction_access_slb, 0x480, ool=1, area=PACA_EXSLB, kvm=1
-EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
-EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
-	INT_HANDLER instruction_access_slb, 0x480, virt=1, area=PACA_EXSLB
-EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
-INT_KVM_HANDLER instruction_access_slb, 0x480, EXC_STD, PACA_EXSLB, 0
-EXC_COMMON_BEGIN(instruction_access_slb_common)
-	INT_COMMON 0x480, PACA_EXSLB, 1, 1, 0, 2, 0
-	ld	r4,_DAR(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-BEGIN_MMU_FTR_SECTION
-	/* HPT case, do SLB fault */
-	bl	do_slb_fault
-	cmpdi	r3,0
-	bne-	1f
-	b	fast_exception_return
-1:	/* Error case */
-MMU_FTR_SECTION_ELSE
-	/* Radix case, access is outside page table range */
-	li	r3,-EFAULT
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
-	std	r3,RESULT(r1)
-	bl	save_nvgprs
-	RECONCILE_IRQ_STATE(r10, r11)
-	ld	r4,_DAR(r1)
-	ld	r5,RESULT(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	do_bad_slb_fault
-	b	ret_from_except
-
-EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
-	INT_HANDLER hardware_interrupt, 0x500, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
-EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
-EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
-	INT_HANDLER hardware_interrupt, 0x500, virt=1, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
-EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
-INT_KVM_HANDLER hardware_interrupt, 0x500, EXC_HV_OR_STD, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ)
-
-
-EXC_REAL_BEGIN(alignment, 0x600, 0x100)
-	INT_HANDLER alignment, 0x600, dar=1, dsisr=1, kvm=1
-EXC_REAL_END(alignment, 0x600, 0x100)
-EXC_VIRT_BEGIN(alignment, 0x4600, 0x100)
-	INT_HANDLER alignment, 0x600, virt=1, dar=1, dsisr=1
-EXC_VIRT_END(alignment, 0x4600, 0x100)
-INT_KVM_HANDLER alignment, 0x600, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON_BEGIN(alignment_common)
-	INT_COMMON 0x600, PACA_EXGEN, 1, 1, 1, 1, 1
-	bl	save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	alignment_exception
-	b	ret_from_except
-
-
-EXC_REAL_BEGIN(program_check, 0x700, 0x100)
-	INT_HANDLER program_check, 0x700, kvm=1
-EXC_REAL_END(program_check, 0x700, 0x100)
-EXC_VIRT_BEGIN(program_check, 0x4700, 0x100)
-	INT_HANDLER program_check, 0x700, virt=1
-EXC_VIRT_END(program_check, 0x4700, 0x100)
-INT_KVM_HANDLER program_check, 0x700, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON_BEGIN(program_check_common)
-	/*
-	 * It's possible to receive a TM Bad Thing type program check with
-	 * userspace register values (in particular r1), but with SRR1 reporting
-	 * that we came from the kernel. Normally that would confuse the bad
-	 * stack logic, and we would report a bad kernel stack pointer. Instead
-	 * we switch to the emergency stack if we're taking a TM Bad Thing from
-	 * the kernel.
-	 */
-
-	andi.	r10,r12,MSR_PR
-	bne	2f			/* If userspace, go normal path */
-
-	andis.	r10,r12,(SRR1_PROGTM)@h
-	bne	1f			/* If TM, emergency		*/
-
-	cmpdi	r1,-INT_FRAME_SIZE	/* check if r1 is in userspace	*/
-	blt	2f			/* normal path if not		*/
-
-	/* Use the emergency stack					*/
-1:	andi.	r10,r12,MSR_PR		/* Set CR0 correctly for label	*/
-					/* 3 in EXCEPTION_PROLOG_COMMON	*/
-	mr	r10,r1			/* Save r1			*/
-	ld	r1,PACAEMERGSP(r13)	/* Use emergency stack		*/
-	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame		*/
-	INT_COMMON 0x700, PACA_EXGEN, 0, 1, 1, 0, 0
-	b 3f
-2:
-	INT_COMMON 0x700, PACA_EXGEN, 1, 1, 1, 0, 0
-3:
-	bl	save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	program_check_exception
-	b	ret_from_except
-
-
-EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
-	INT_HANDLER fp_unavailable, 0x800, kvm=1
-EXC_REAL_END(fp_unavailable, 0x800, 0x100)
-EXC_VIRT_BEGIN(fp_unavailable, 0x4800, 0x100)
-	INT_HANDLER fp_unavailable, 0x800, virt=1
-EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
-INT_KVM_HANDLER fp_unavailable, 0x800, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON_BEGIN(fp_unavailable_common)
-	INT_COMMON 0x800, PACA_EXGEN, 1, 1, 0, 0, 0
-	bne	1f			/* if from user, just load it up */
-	bl	save_nvgprs
-	RECONCILE_IRQ_STATE(r10, r11)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	kernel_fp_unavailable_exception
-0:	trap
-	EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
-1:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-BEGIN_FTR_SECTION
-	/* Test if 2 TM state bits are zero.  If non-zero (ie. userspace was in
-	 * transaction), go do TM stuff
-	 */
-	rldicl.	r0, r12, (64-MSR_TS_LG), (64-2)
-	bne-	2f
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
-#endif
-	bl	load_up_fpu
-	b	fast_exception_return
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-2:	/* User process was in a transaction */
-	bl	save_nvgprs
-	RECONCILE_IRQ_STATE(r10, r11)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	fp_unavailable_tm
-	b	ret_from_except
-#endif
-
-
-EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
-	INT_HANDLER decrementer, 0x900, ool=1, bitmask=IRQS_DISABLED, kvm=1
-EXC_REAL_END(decrementer, 0x900, 0x80)
-EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
-	INT_HANDLER decrementer, 0x900, ool=1, virt=1, bitmask=IRQS_DISABLED
-EXC_VIRT_END(decrementer, 0x4900, 0x80)
-INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
-
-
-EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80)
-	INT_HANDLER hdecrementer, 0x980, ool=1, hsrr=EXC_HV, kvm=1
-EXC_REAL_END(hdecrementer, 0x980, 0x80)
-EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80)
-	INT_HANDLER hdecrementer, 0x980, ool=1, virt=1, hsrr=EXC_HV, kvm=1
-EXC_VIRT_END(hdecrementer, 0x4980, 0x80)
-INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
-
-
-EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100)
-	INT_HANDLER doorbell_super, 0xa00, bitmask=IRQS_DISABLED, kvm=1
-EXC_REAL_END(doorbell_super, 0xa00, 0x100)
-EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
-	INT_HANDLER doorbell_super, 0xa00, virt=1, bitmask=IRQS_DISABLED
-EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
-INT_KVM_HANDLER doorbell_super, 0xa00, EXC_STD, PACA_EXGEN, 0
-#ifdef CONFIG_PPC_DOORBELL
-EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
-#else
-EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception)
-#endif
-
-
-EXC_REAL_NONE(0xb00, 0x100)
-EXC_VIRT_NONE(0x4b00, 0x100)
-
-/*
- * system call / hypercall (0xc00, 0x4c00)
- *
- * The system call exception is invoked with "sc 0" and does not alter HV bit.
- *
- * The hypercall is invoked with "sc 1" and sets HV=1.
- *
- * In HPT, sc 1 always goes to 0xc00 real mode. In RADIX, sc 1 can go to
- * 0x4c00 virtual mode.
- *
- * Call convention:
- *
- * syscall register convention is in Documentation/powerpc/syscall64-abi.rst
- *
- * For hypercalls, the register convention is as follows:
- * r0 volatile
- * r1-2 nonvolatile
- * r3 volatile parameter and return value for status
- * r4-r10 volatile input and output value
- * r11 volatile hypercall number and output value
- * r12 volatile input and output value
- * r13-r31 nonvolatile
- * LR nonvolatile
- * CTR volatile
- * XER volatile
- * CR0-1 CR5-7 volatile
- * CR2-4 nonvolatile
- * Other registers nonvolatile
- *
- * The intersection of volatile registers that don't contain possible
- * inputs is: cr0, xer, ctr. We may use these as scratch regs upon entry
- * without saving, though xer is not a good idea to use, as hardware may
- * interpret some bits so it may be costly to change them.
- */
-.macro SYSTEM_CALL virt
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-	/*
-	 * There is a little bit of juggling to get syscall and hcall
-	 * working well. Save r13 in ctr to avoid using SPRG scratch
-	 * register.
-	 *
-	 * Userspace syscalls have already saved the PPR, hcalls must save
-	 * it before setting HMT_MEDIUM.
-	 */
-	mtctr	r13
-	GET_PACA(r13)
-	std	r10,PACA_EXGEN+EX_R10(r13)
-	INTERRUPT_TO_KERNEL
-	KVMTEST system_call EXC_STD 0xc00 /* uses r10, branch to system_call_kvm */
-	mfctr	r9
-#else
-	mr	r9,r13
-	GET_PACA(r13)
-	INTERRUPT_TO_KERNEL
-#endif
-
-#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
-BEGIN_FTR_SECTION
-	cmpdi	r0,0x1ebe
-	beq-	1f
-END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
-#endif
-
-	/* We reach here with PACA in r13, r13 in r9. */
-	mfspr	r11,SPRN_SRR0
-	mfspr	r12,SPRN_SRR1
-
-	HMT_MEDIUM
-
-	.if ! \virt
-	__LOAD_HANDLER(r10, system_call_common)
-	mtspr	SPRN_SRR0,r10
-	ld	r10,PACAKMSR(r13)
-	mtspr	SPRN_SRR1,r10
-	RFI_TO_KERNEL
-	b	.	/* prevent speculative execution */
-	.else
-	li	r10,MSR_RI
-	mtmsrd 	r10,1			/* Set RI (EE=0) */
-#ifdef CONFIG_RELOCATABLE
-	__LOAD_HANDLER(r10, system_call_common)
-	mtctr	r10
-	bctr
-#else
-	b	system_call_common
-#endif
-	.endif
-
-#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
-	/* Fast LE/BE switch system call */
-1:	mfspr	r12,SPRN_SRR1
-	xori	r12,r12,MSR_LE
-	mtspr	SPRN_SRR1,r12
-	mr	r13,r9
-	RFI_TO_USER	/* return to userspace */
-	b	.	/* prevent speculative execution */
-#endif
-.endm
-
-EXC_REAL_BEGIN(system_call, 0xc00, 0x100)
-	SYSTEM_CALL 0
-EXC_REAL_END(system_call, 0xc00, 0x100)
-EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
-	SYSTEM_CALL 1
-EXC_VIRT_END(system_call, 0x4c00, 0x100)
-
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-	/*
-	 * This is a hcall, so register convention is as above, with these
-	 * differences:
-	 * r13 = PACA
-	 * ctr = orig r13
-	 * orig r10 saved in PACA
-	 */
-TRAMP_KVM_BEGIN(system_call_kvm)
-	 /*
-	  * Save the PPR (on systems that support it) before changing to
-	  * HMT_MEDIUM. That allows the KVM code to save that value into the
-	  * guest state (it is the guest's PPR value).
-	  */
-	OPT_GET_SPR(r10, SPRN_PPR, CPU_FTR_HAS_PPR)
-	HMT_MEDIUM
-	OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r10, CPU_FTR_HAS_PPR)
-	mfctr	r10
-	SET_SCRATCH0(r10)
-	std	r9,PACA_EXGEN+EX_R9(r13)
-	mfcr	r9
-	KVM_HANDLER 0xc00, EXC_STD, PACA_EXGEN, 0
-#endif
-
-
-EXC_REAL_BEGIN(single_step, 0xd00, 0x100)
-	INT_HANDLER single_step, 0xd00, kvm=1
-EXC_REAL_END(single_step, 0xd00, 0x100)
-EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100)
-	INT_HANDLER single_step, 0xd00, virt=1
-EXC_VIRT_END(single_step, 0x4d00, 0x100)
-INT_KVM_HANDLER single_step, 0xd00, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON(single_step_common, 0xd00, single_step_exception)
-
-
-EXC_REAL_BEGIN(h_data_storage, 0xe00, 0x20)
-	INT_HANDLER h_data_storage, 0xe00, ool=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
-EXC_REAL_END(h_data_storage, 0xe00, 0x20)
-EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20)
-	INT_HANDLER h_data_storage, 0xe00, ool=1, virt=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
-EXC_VIRT_END(h_data_storage, 0x4e00, 0x20)
-INT_KVM_HANDLER h_data_storage, 0xe00, EXC_HV, PACA_EXGEN, 1
-EXC_COMMON_BEGIN(h_data_storage_common)
-	INT_COMMON 0xe00, PACA_EXGEN, 1, 1, 1, 1, 1
-	bl      save_nvgprs
-	addi    r3,r1,STACK_FRAME_OVERHEAD
-BEGIN_MMU_FTR_SECTION
-	ld	r4,_DAR(r1)
-	li	r5,SIGSEGV
-	bl      bad_page_fault
-MMU_FTR_SECTION_ELSE
-	bl      unknown_exception
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
-	b       ret_from_except
-
-
-EXC_REAL_BEGIN(h_instr_storage, 0xe20, 0x20)
-	INT_HANDLER h_instr_storage, 0xe20, ool=1, hsrr=EXC_HV, kvm=1
-EXC_REAL_END(h_instr_storage, 0xe20, 0x20)
-EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20)
-	INT_HANDLER h_instr_storage, 0xe20, ool=1, virt=1, hsrr=EXC_HV, kvm=1
-EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)
-INT_KVM_HANDLER h_instr_storage, 0xe20, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception)
-
-
-EXC_REAL_BEGIN(emulation_assist, 0xe40, 0x20)
-	INT_HANDLER emulation_assist, 0xe40, ool=1, hsrr=EXC_HV, kvm=1
-EXC_REAL_END(emulation_assist, 0xe40, 0x20)
-EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20)
-	INT_HANDLER emulation_assist, 0xe40, ool=1, virt=1, hsrr=EXC_HV, kvm=1
-EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)
-INT_KVM_HANDLER emulation_assist, 0xe40, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
-
-
-/*
- * hmi_exception trampoline is a special case. It jumps to hmi_exception_early
- * first, and then eventaully from there to the trampoline to get into virtual
- * mode.
- */
-EXC_REAL_BEGIN(hmi_exception, 0xe60, 0x20)
-	INT_HANDLER hmi_exception, 0xe60, ool=1, early=1, hsrr=EXC_HV, ri=0, kvm=1
-EXC_REAL_END(hmi_exception, 0xe60, 0x20)
-EXC_VIRT_NONE(0x4e60, 0x20)
-INT_KVM_HANDLER hmi_exception, 0xe60, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON_BEGIN(hmi_exception_early_common)
-	mtctr	r10			/* Restore ctr */
-	mfspr	r11,SPRN_HSRR0		/* Save HSRR0 */
-	mfspr	r12,SPRN_HSRR1		/* Save HSRR1 */
-	mr	r10,r1			/* Save r1 */
-	ld	r1,PACAEMERGSP(r13)	/* Use emergency stack for realmode */
-	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame		*/
-
-	/* We don't touch AMR here, we never go to virtual mode */
-	INT_COMMON 0xe60, PACA_EXGEN, 0, 0, 0, 0, 0
-
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	hmi_exception_realmode
-	cmpdi	cr0,r3,0
-	bne	1f
-
-	EXCEPTION_RESTORE_REGS EXC_HV
-	HRFI_TO_USER_OR_KERNEL
-
-1:
-	/*
-	 * Go to virtual mode and pull the HMI event information from
-	 * firmware.
-	 */
-	EXCEPTION_RESTORE_REGS EXC_HV
-	INT_HANDLER hmi_exception, 0xe60, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
-
-EXC_COMMON_BEGIN(hmi_exception_common)
-	INT_COMMON 0xe60, PACA_EXGEN, 1, 1, 1, 0, 0
-	FINISH_NAP
-	RUNLATCH_ON
-	bl	save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	handle_hmi_exception
-	b	ret_from_except
-
-
-EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20)
-	INT_HANDLER h_doorbell, 0xe80, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
-EXC_REAL_END(h_doorbell, 0xe80, 0x20)
-EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
-	INT_HANDLER h_doorbell, 0xe80, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
-EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
-INT_KVM_HANDLER h_doorbell, 0xe80, EXC_HV, PACA_EXGEN, 0
-#ifdef CONFIG_PPC_DOORBELL
-EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception)
-#else
-EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception)
-#endif
-
-
-EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20)
-	INT_HANDLER h_virt_irq, 0xea0, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
-EXC_REAL_END(h_virt_irq, 0xea0, 0x20)
-EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
-	INT_HANDLER h_virt_irq, 0xea0, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
-EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
-INT_KVM_HANDLER h_virt_irq, 0xea0, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ)
-
-
-EXC_REAL_NONE(0xec0, 0x20)
-EXC_VIRT_NONE(0x4ec0, 0x20)
-EXC_REAL_NONE(0xee0, 0x20)
-EXC_VIRT_NONE(0x4ee0, 0x20)
-
-
-EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20)
-	INT_HANDLER performance_monitor, 0xf00, ool=1, bitmask=IRQS_PMI_DISABLED, kvm=1
-EXC_REAL_END(performance_monitor, 0xf00, 0x20)
-EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
-	INT_HANDLER performance_monitor, 0xf00, ool=1, virt=1, bitmask=IRQS_PMI_DISABLED
-EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
-INT_KVM_HANDLER performance_monitor, 0xf00, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception)
-
-
-EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20)
-	INT_HANDLER altivec_unavailable, 0xf20, ool=1, kvm=1
-EXC_REAL_END(altivec_unavailable, 0xf20, 0x20)
-EXC_VIRT_BEGIN(altivec_unavailable, 0x4f20, 0x20)
-	INT_HANDLER altivec_unavailable, 0xf20, ool=1, virt=1
-EXC_VIRT_END(altivec_unavailable, 0x4f20, 0x20)
-INT_KVM_HANDLER altivec_unavailable, 0xf20, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON_BEGIN(altivec_unavailable_common)
-	INT_COMMON 0xf20, PACA_EXGEN, 1, 1, 0, 0, 0
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
-	beq	1f
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-  BEGIN_FTR_SECTION_NESTED(69)
-	/* Test if 2 TM state bits are zero.  If non-zero (ie. userspace was in
-	 * transaction), go do TM stuff
-	 */
-	rldicl.	r0, r12, (64-MSR_TS_LG), (64-2)
-	bne-	2f
-  END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
-#endif
-	bl	load_up_altivec
-	b	fast_exception_return
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-2:	/* User process was in a transaction */
-	bl	save_nvgprs
-	RECONCILE_IRQ_STATE(r10, r11)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	altivec_unavailable_tm
-	b	ret_from_except
-#endif
-1:
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif
-	bl	save_nvgprs
-	RECONCILE_IRQ_STATE(r10, r11)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	altivec_unavailable_exception
-	b	ret_from_except
-
-
-EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20)
-	INT_HANDLER vsx_unavailable, 0xf40, ool=1, kvm=1
-EXC_REAL_END(vsx_unavailable, 0xf40, 0x20)
-EXC_VIRT_BEGIN(vsx_unavailable, 0x4f40, 0x20)
-	INT_HANDLER vsx_unavailable, 0xf40, ool=1, virt=1
-EXC_VIRT_END(vsx_unavailable, 0x4f40, 0x20)
-INT_KVM_HANDLER vsx_unavailable, 0xf40, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON_BEGIN(vsx_unavailable_common)
-	INT_COMMON 0xf40, PACA_EXGEN, 1, 1, 0, 0, 0
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
-	beq	1f
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-  BEGIN_FTR_SECTION_NESTED(69)
-	/* Test if 2 TM state bits are zero.  If non-zero (ie. userspace was in
-	 * transaction), go do TM stuff
-	 */
-	rldicl.	r0, r12, (64-MSR_TS_LG), (64-2)
-	bne-	2f
-  END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
-#endif
-	b	load_up_vsx
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-2:	/* User process was in a transaction */
-	bl	save_nvgprs
-	RECONCILE_IRQ_STATE(r10, r11)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	vsx_unavailable_tm
-	b	ret_from_except
-#endif
-1:
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
-	bl	save_nvgprs
-	RECONCILE_IRQ_STATE(r10, r11)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	vsx_unavailable_exception
-	b	ret_from_except
-
-
-EXC_REAL_BEGIN(facility_unavailable, 0xf60, 0x20)
-	INT_HANDLER facility_unavailable, 0xf60, ool=1, kvm=1
-EXC_REAL_END(facility_unavailable, 0xf60, 0x20)
-EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20)
-	INT_HANDLER facility_unavailable, 0xf60, ool=1, virt=1
-EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)
-INT_KVM_HANDLER facility_unavailable, 0xf60, EXC_STD, PACA_EXGEN, 0
-EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception)
-
-
-EXC_REAL_BEGIN(h_facility_unavailable, 0xf80, 0x20)
-	INT_HANDLER h_facility_unavailable, 0xf80, ool=1, hsrr=EXC_HV, kvm=1
-EXC_REAL_END(h_facility_unavailable, 0xf80, 0x20)
-EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20)
-	INT_HANDLER h_facility_unavailable, 0xf80, ool=1, virt=1, hsrr=EXC_HV, kvm=1
-EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)
-INT_KVM_HANDLER h_facility_unavailable, 0xf80, EXC_HV, PACA_EXGEN, 0
-EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception)
-
-
-EXC_REAL_NONE(0xfa0, 0x20)
-EXC_VIRT_NONE(0x4fa0, 0x20)
-EXC_REAL_NONE(0xfc0, 0x20)
-EXC_VIRT_NONE(0x4fc0, 0x20)
-EXC_REAL_NONE(0xfe0, 0x20)
-EXC_VIRT_NONE(0x4fe0, 0x20)
-
-EXC_REAL_NONE(0x1000, 0x100)
-EXC_VIRT_NONE(0x5000, 0x100)
-EXC_REAL_NONE(0x1100, 0x100)
-EXC_VIRT_NONE(0x5100, 0x100)
-
-#ifdef CONFIG_CBE_RAS
-EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100)
-	INT_HANDLER cbe_system_error, 0x1200, ool=1, hsrr=EXC_HV, kvm=1
-EXC_REAL_END(cbe_system_error, 0x1200, 0x100)
-EXC_VIRT_NONE(0x5200, 0x100)
-INT_KVM_HANDLER cbe_system_error, 0x1200, EXC_HV, PACA_EXGEN, 1
-EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception)
-#else /* CONFIG_CBE_RAS */
-EXC_REAL_NONE(0x1200, 0x100)
-EXC_VIRT_NONE(0x5200, 0x100)
-#endif
-
-
-EXC_REAL_BEGIN(instruction_breakpoint, 0x1300, 0x100)
-	INT_HANDLER instruction_breakpoint, 0x1300, kvm=1
-EXC_REAL_END(instruction_breakpoint, 0x1300, 0x100)
-EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100)
-	INT_HANDLER instruction_breakpoint, 0x1300, virt=1
-EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)
-INT_KVM_HANDLER instruction_breakpoint, 0x1300, EXC_STD, PACA_EXGEN, 1
-EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception)
-
-
-EXC_REAL_NONE(0x1400, 0x100)
-EXC_VIRT_NONE(0x5400, 0x100)
-
-EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
-	INT_HANDLER denorm_exception_hv, 0x1500, early=2, hsrr=EXC_HV
-#ifdef CONFIG_PPC_DENORMALISATION
-	mfspr	r10,SPRN_HSRR1
-	andis.	r10,r10,(HSRR1_DENORM)@h /* denorm? */
-	bne+	denorm_assist
-#endif
-	KVMTEST denorm_exception_hv, EXC_HV 0x1500
-	INT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV, 1
-EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100)
-
-#ifdef CONFIG_PPC_DENORMALISATION
-EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100)
-	INT_HANDLER denorm_exception, 0x1500, 0, 2, 1, EXC_HV, PACA_EXGEN, 1, 0, 0, 0, 0
-	mfspr	r10,SPRN_HSRR1
-	andis.	r10,r10,(HSRR1_DENORM)@h /* denorm? */
-	bne+	denorm_assist
-	INT_VIRT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV
-EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
-#else
-EXC_VIRT_NONE(0x5500, 0x100)
-#endif
-
-INT_KVM_HANDLER denorm_exception_hv, 0x1500, EXC_HV, PACA_EXGEN, 0
-
-#ifdef CONFIG_PPC_DENORMALISATION
-TRAMP_REAL_BEGIN(denorm_assist)
-BEGIN_FTR_SECTION
-/*
- * To denormalise we need to move a copy of the register to itself.
- * For POWER6 do that here for all FP regs.
- */
-	mfmsr	r10
-	ori	r10,r10,(MSR_FP|MSR_FE0|MSR_FE1)
-	xori	r10,r10,(MSR_FE0|MSR_FE1)
-	mtmsrd	r10
-	sync
-
-	.Lreg=0
-	.rept 32
-	fmr	.Lreg,.Lreg
-	.Lreg=.Lreg+1
-	.endr
-
-FTR_SECTION_ELSE
-/*
- * To denormalise we need to move a copy of the register to itself.
- * For POWER7 do that here for the first 32 VSX registers only.
- */
-	mfmsr	r10
-	oris	r10,r10,MSR_VSX@h
-	mtmsrd	r10
-	sync
-
-	.Lreg=0
-	.rept 32
-	XVCPSGNDP(.Lreg,.Lreg,.Lreg)
-	.Lreg=.Lreg+1
-	.endr
-
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206)
-
-BEGIN_FTR_SECTION
-	b	denorm_done
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
-/*
- * To denormalise we need to move a copy of the register to itself.
- * For POWER8 we need to do that for all 64 VSX registers
- */
-	.Lreg=32
-	.rept 32
-	XVCPSGNDP(.Lreg,.Lreg,.Lreg)
-	.Lreg=.Lreg+1
-	.endr
-
-denorm_done:
-	mfspr	r11,SPRN_HSRR0
-	subi	r11,r11,4
-	mtspr	SPRN_HSRR0,r11
-	mtcrf	0x80,r9
-	ld	r9,PACA_EXGEN+EX_R9(r13)
-	RESTORE_PPR_PACA(PACA_EXGEN, r10)
-BEGIN_FTR_SECTION
-	ld	r10,PACA_EXGEN+EX_CFAR(r13)
-	mtspr	SPRN_CFAR,r10
-END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
-	ld	r10,PACA_EXGEN+EX_R10(r13)
-	ld	r11,PACA_EXGEN+EX_R11(r13)
-	ld	r12,PACA_EXGEN+EX_R12(r13)
-	ld	r13,PACA_EXGEN+EX_R13(r13)
-	HRFI_TO_UNKNOWN
-	b	.
-#endif
-
-EXC_COMMON(denorm_common, 0x1500, unknown_exception)
-
-
-#ifdef CONFIG_CBE_RAS
-EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100)
-	INT_HANDLER cbe_maintenance, 0x1600, ool=1, hsrr=EXC_HV, kvm=1
-EXC_REAL_END(cbe_maintenance, 0x1600, 0x100)
-EXC_VIRT_NONE(0x5600, 0x100)
-INT_KVM_HANDLER cbe_maintenance, 0x1600, EXC_HV, PACA_EXGEN, 1
-EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception)
-#else /* CONFIG_CBE_RAS */
-EXC_REAL_NONE(0x1600, 0x100)
-EXC_VIRT_NONE(0x5600, 0x100)
-#endif
-
-
-EXC_REAL_BEGIN(altivec_assist, 0x1700, 0x100)
-	INT_HANDLER altivec_assist, 0x1700, kvm=1
-EXC_REAL_END(altivec_assist, 0x1700, 0x100)
-EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100)
-	INT_HANDLER altivec_assist, 0x1700, virt=1
-EXC_VIRT_END(altivec_assist, 0x5700, 0x100)
-INT_KVM_HANDLER altivec_assist, 0x1700, EXC_STD, PACA_EXGEN, 0
-#ifdef CONFIG_ALTIVEC
-EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception)
-#else
-EXC_COMMON(altivec_assist_common, 0x1700, unknown_exception)
-#endif
-
-
-#ifdef CONFIG_CBE_RAS
-EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100)
-	INT_HANDLER cbe_thermal, 0x1800, ool=1, hsrr=EXC_HV, kvm=1
-EXC_REAL_END(cbe_thermal, 0x1800, 0x100)
-EXC_VIRT_NONE(0x5800, 0x100)
-INT_KVM_HANDLER cbe_thermal, 0x1800, EXC_HV, PACA_EXGEN, 1
-EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception)
-#else /* CONFIG_CBE_RAS */
-EXC_REAL_NONE(0x1800, 0x100)
-EXC_VIRT_NONE(0x5800, 0x100)
-#endif
-
-
-#ifdef CONFIG_PPC_WATCHDOG
-
-#define MASKED_DEC_HANDLER_LABEL 3f
-
-#define MASKED_DEC_HANDLER(_H)				\
-3: /* soft-nmi */					\
-	std	r12,PACA_EXGEN+EX_R12(r13);		\
-	GET_SCRATCH0(r10);				\
-	std	r10,PACA_EXGEN+EX_R13(r13);		\
-	INT_SAVE_SRR_AND_JUMP soft_nmi_common, _H, 1
-
-/*
- * Branch to soft_nmi_interrupt using the emergency stack. The emergency
- * stack is one that is usable by maskable interrupts so long as MSR_EE
- * remains off. It is used for recovery when something has corrupted the
- * normal kernel stack, for example. The "soft NMI" must not use the process
- * stack because we want irq disabled sections to avoid touching the stack
- * at all (other than PMU interrupts), so use the emergency stack for this,
- * and run it entirely with interrupts hard disabled.
- */
-EXC_COMMON_BEGIN(soft_nmi_common)
-	mr	r10,r1
-	ld	r1,PACAEMERGSP(r13)
-	subi	r1,r1,INT_FRAME_SIZE
-	INT_COMMON 0x900, PACA_EXGEN, 0, 1, 1, 0, 0
-	bl	save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	soft_nmi_interrupt
-	b	ret_from_except
-
-#else /* CONFIG_PPC_WATCHDOG */
-#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */
-#define MASKED_DEC_HANDLER(_H)
-#endif /* CONFIG_PPC_WATCHDOG */
-
-/*
- * An interrupt came in while soft-disabled. We set paca->irq_happened, then:
- * - If it was a decrementer interrupt, we bump the dec to max and and return.
- * - If it was a doorbell we return immediately since doorbells are edge
- *   triggered and won't automatically refire.
- * - If it was a HMI we return immediately since we handled it in realmode
- *   and it won't refire.
- * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
- * This is called with r10 containing the value to OR to the paca field.
- */
-.macro MASKED_INTERRUPT hsrr
-	.if \hsrr
-masked_Hinterrupt:
-	.else
-masked_interrupt:
-	.endif
-	std	r11,PACA_EXGEN+EX_R11(r13)
-	lbz	r11,PACAIRQHAPPENED(r13)
-	or	r11,r11,r10
-	stb	r11,PACAIRQHAPPENED(r13)
-	cmpwi	r10,PACA_IRQ_DEC
-	bne	1f
-	lis	r10,0x7fff
-	ori	r10,r10,0xffff
-	mtspr	SPRN_DEC,r10
-	b	MASKED_DEC_HANDLER_LABEL
-1:	andi.	r10,r10,PACA_IRQ_MUST_HARD_MASK
-	beq	2f
-	.if \hsrr
-	mfspr	r10,SPRN_HSRR1
-	xori	r10,r10,MSR_EE	/* clear MSR_EE */
-	mtspr	SPRN_HSRR1,r10
-	.else
-	mfspr	r10,SPRN_SRR1
-	xori	r10,r10,MSR_EE	/* clear MSR_EE */
-	mtspr	SPRN_SRR1,r10
-	.endif
-	ori	r11,r11,PACA_IRQ_HARD_DIS
-	stb	r11,PACAIRQHAPPENED(r13)
-2:	/* done */
-	mtcrf	0x80,r9
-	std	r1,PACAR1(r13)
-	ld	r9,PACA_EXGEN+EX_R9(r13)
-	ld	r10,PACA_EXGEN+EX_R10(r13)
-	ld	r11,PACA_EXGEN+EX_R11(r13)
-	/* returns to kernel where r13 must be set up, so don't restore it */
-	.if \hsrr
-	HRFI_TO_KERNEL
-	.else
-	RFI_TO_KERNEL
-	.endif
-	b	.
-	MASKED_DEC_HANDLER(\hsrr\())
-.endm
-
-TRAMP_REAL_BEGIN(stf_barrier_fallback)
-	std	r9,PACA_EXRFI+EX_R9(r13)
-	std	r10,PACA_EXRFI+EX_R10(r13)
-	sync
-	ld	r9,PACA_EXRFI+EX_R9(r13)
-	ld	r10,PACA_EXRFI+EX_R10(r13)
-	ori	31,31,0
-	.rept 14
-	b	1f
-1:
-	.endr
-	blr
-
-/* Clobbers r10, r11, ctr */
-.macro L1D_DISPLACEMENT_FLUSH
-	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
-	ld	r11,PACA_L1D_FLUSH_SIZE(r13)
-	srdi	r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
-	mtctr	r11
-	DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
-
-	/* order ld/st prior to dcbt stop all streams with flushing */
-	sync
-
-	/*
-	 * The load addresses are at staggered offsets within cachelines,
-	 * which suits some pipelines better (on others it should not
-	 * hurt).
-	 */
-1:
-	ld	r11,(0x80 + 8)*0(r10)
-	ld	r11,(0x80 + 8)*1(r10)
-	ld	r11,(0x80 + 8)*2(r10)
-	ld	r11,(0x80 + 8)*3(r10)
-	ld	r11,(0x80 + 8)*4(r10)
-	ld	r11,(0x80 + 8)*5(r10)
-	ld	r11,(0x80 + 8)*6(r10)
-	ld	r11,(0x80 + 8)*7(r10)
-	addi	r10,r10,0x80*8
-	bdnz	1b
-.endm
-
-TRAMP_REAL_BEGIN(entry_flush_fallback)
-	std	r9,PACA_EXRFI+EX_R9(r13)
-	std	r10,PACA_EXRFI+EX_R10(r13)
-	std	r11,PACA_EXRFI+EX_R11(r13)
-	mfctr	r9
-	L1D_DISPLACEMENT_FLUSH
-	mtctr	r9
-	ld	r9,PACA_EXRFI+EX_R9(r13)
-	ld	r10,PACA_EXRFI+EX_R10(r13)
-	ld	r11,PACA_EXRFI+EX_R11(r13)
-	blr
-
-TRAMP_REAL_BEGIN(rfi_flush_fallback)
-	SET_SCRATCH0(r13);
-	GET_PACA(r13);
-	std	r1,PACA_EXRFI+EX_R12(r13)
-	ld	r1,PACAKSAVE(r13)
-	std	r9,PACA_EXRFI+EX_R9(r13)
-	std	r10,PACA_EXRFI+EX_R10(r13)
-	std	r11,PACA_EXRFI+EX_R11(r13)
-	mfctr	r9
-	L1D_DISPLACEMENT_FLUSH
-	mtctr	r9
-	ld	r9,PACA_EXRFI+EX_R9(r13)
-	ld	r10,PACA_EXRFI+EX_R10(r13)
-	ld	r11,PACA_EXRFI+EX_R11(r13)
-	ld	r1,PACA_EXRFI+EX_R12(r13)
-	GET_SCRATCH0(r13);
-	rfid
-
-TRAMP_REAL_BEGIN(hrfi_flush_fallback)
-	SET_SCRATCH0(r13);
-	GET_PACA(r13);
-	std	r1,PACA_EXRFI+EX_R12(r13)
-	ld	r1,PACAKSAVE(r13)
-	std	r9,PACA_EXRFI+EX_R9(r13)
-	std	r10,PACA_EXRFI+EX_R10(r13)
-	std	r11,PACA_EXRFI+EX_R11(r13)
-	mfctr	r9
-	L1D_DISPLACEMENT_FLUSH
-	mtctr	r9
-	ld	r9,PACA_EXRFI+EX_R9(r13)
-	ld	r10,PACA_EXRFI+EX_R10(r13)
-	ld	r11,PACA_EXRFI+EX_R11(r13)
-	ld	r1,PACA_EXRFI+EX_R12(r13)
-	GET_SCRATCH0(r13);
-	hrfid
-
-USE_TEXT_SECTION()
-
-_GLOBAL(do_uaccess_flush)
-	UACCESS_FLUSH_FIXUP_SECTION
-	nop
-	nop
-	nop
-	blr
-	L1D_DISPLACEMENT_FLUSH
-	blr
-_ASM_NOKPROBE_SYMBOL(do_uaccess_flush)
-EXPORT_SYMBOL(do_uaccess_flush)
-
-/*
- * Real mode exceptions actually use this too, but alternate
- * instruction code patches (which end up in the common .text area)
- * cannot reach these if they are put there.
- */
-USE_FIXED_SECTION(virt_trampolines)
-	MASKED_INTERRUPT EXC_STD
-	MASKED_INTERRUPT EXC_HV
-
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
-	/*
-	 * Here all GPRs are unchanged from when the interrupt happened
-	 * except for r13, which is saved in SPRG_SCRATCH0.
-	 */
-	mfspr	r13, SPRN_SRR0
-	addi	r13, r13, 4
-	mtspr	SPRN_SRR0, r13
-	GET_SCRATCH0(r13)
-	RFI_TO_KERNEL
-	b	.
-
-TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
-	/*
-	 * Here all GPRs are unchanged from when the interrupt happened
-	 * except for r13, which is saved in SPRG_SCRATCH0.
-	 */
-	mfspr	r13, SPRN_HSRR0
-	addi	r13, r13, 4
-	mtspr	SPRN_HSRR0, r13
-	GET_SCRATCH0(r13)
-	HRFI_TO_KERNEL
-	b	.
-#endif
-
-/*
- * Ensure that any handlers that get invoked from the exception prologs
- * above are below the first 64KB (0x10000) of the kernel image because
- * the prologs assemble the addresses of these handlers using the
- * LOAD_HANDLER macro, which uses an ori instruction.
- */
-
-/*** Common interrupt handlers ***/
-
-
-	/*
-	 * Relocation-on interrupts: A subset of the interrupts can be delivered
-	 * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
-	 * it.  Addresses are the same as the original interrupt addresses, but
-	 * offset by 0xc000000000004000.
-	 * It's impossible to receive interrupts below 0x300 via this mechanism.
-	 * KVM: None of these traps are from the guest ; anything that escalated
-	 * to HV=1 from HV=0 is delivered via real mode handlers.
-	 */
-
-	/*
-	 * This uses the standard macro, since the original 0x300 vector
-	 * only has extra guff for STAB-based processors -- which never
-	 * come here.
-	 */
-
-EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline)
-	b	__ppc64_runlatch_on
-
-USE_FIXED_SECTION(virt_trampolines)
-	/*
-	 * The __end_interrupts marker must be past the out-of-line (OOL)
-	 * handlers, so that they are copied to real address 0x100 when running
-	 * a relocatable kernel. This ensures they can be reached from the short
-	 * trampoline handlers (like 0x4f00, 0x4f20, etc.) which branch
-	 * directly, without using LOAD_HANDLER().
-	 */
-	.align	7
-	.globl	__end_interrupts
-__end_interrupts:
-DEFINE_FIXED_SYMBOL(__end_interrupts)
-
-#ifdef CONFIG_PPC_970_NAP
-EXC_COMMON_BEGIN(power4_fixup_nap)
-	andc	r9,r9,r10
-	std	r9,TI_LOCAL_FLAGS(r11)
-	ld	r10,_LINK(r1)		/* make idle task do the */
-	std	r10,_NIP(r1)		/* equivalent of a blr */
-	blr
-#endif
-
-CLOSE_FIXED_SECTION(real_vectors);
-CLOSE_FIXED_SECTION(real_trampolines);
-CLOSE_FIXED_SECTION(virt_vectors);
-CLOSE_FIXED_SECTION(virt_trampolines);
-
-USE_TEXT_SECTION()
-
-/* MSR[RI] should be clear because this uses SRR[01] */
-enable_machine_check:
-	mflr	r0
-	bcl	20,31,$+4
-0:	mflr	r3
-	addi	r3,r3,(1f - 0b)
-	mtspr	SPRN_SRR0,r3
-	mfmsr	r3
-	ori	r3,r3,MSR_ME
-	mtspr	SPRN_SRR1,r3
-	RFI_TO_KERNEL
-1:	mtlr	r0
-	blr
-
-/* MSR[RI] should be clear because this uses SRR[01] */
-disable_machine_check:
-	mflr	r0
-	bcl	20,31,$+4
-0:	mflr	r3
-	addi	r3,r3,(1f - 0b)
-	mtspr	SPRN_SRR0,r3
-	mfmsr	r3
-	li	r4,MSR_ME
-	andc	r3,r3,r4
-	mtspr	SPRN_SRR1,r3
-	RFI_TO_KERNEL
-1:	mtlr	r0
-	blr
-
-/*
- * Hash table stuff
- */
-	.balign	IFETCH_ALIGN_BYTES
-do_hash_page:
-#ifdef CONFIG_PPC_BOOK3S_64
-	lis	r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h
-	ori	r0,r0,DSISR_BAD_FAULT_64S@l
-	and.	r0,r5,r0		/* weird error? */
-	bne-	handle_page_fault	/* if not, try to insert a HPTE */
-	ld	r11, PACA_THREAD_INFO(r13)
-	lwz	r0,TI_PREEMPT(r11)	/* If we're in an "NMI" */
-	andis.	r0,r0,NMI_MASK@h	/* (i.e. an irq when soft-disabled) */
-	bne	77f			/* then don't call hash_page now */
-
-	/*
-	 * r3 contains the trap number
-	 * r4 contains the faulting address
-	 * r5 contains dsisr
-	 * r6 msr
-	 *
-	 * at return r3 = 0 for success, 1 for page fault, negative for error
-	 */
-	bl	__hash_page		/* build HPTE if possible */
-        cmpdi	r3,0			/* see if __hash_page succeeded */
-
-	/* Success */
-	beq	fast_exc_return_irq	/* Return from exception on success */
-
-	/* Error */
-	blt-	13f
-
-	/* Reload DAR/DSISR into r4/r5 for the DABR check below */
-	ld	r4,_DAR(r1)
-	ld      r5,_DSISR(r1)
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-/* Here we have a page fault that hash_page can't handle. */
-handle_page_fault:
-11:	andis.  r0,r5,DSISR_DABRMATCH@h
-	bne-    handle_dabr_fault
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	do_page_fault
-	cmpdi	r3,0
-	beq+	ret_from_except_lite
-	bl	save_nvgprs
-	mr	r5,r3
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	ld	r4,_DAR(r1)
-	bl	bad_page_fault
-	b	ret_from_except
-
-/* We have a data breakpoint exception - handle it */
-handle_dabr_fault:
-	bl	save_nvgprs
-	ld      r4,_DAR(r1)
-	ld      r5,_DSISR(r1)
-	addi    r3,r1,STACK_FRAME_OVERHEAD
-	bl      do_break
-	/*
-	 * do_break() may have changed the NV GPRS while handling a breakpoint.
-	 * If so, we need to restore them with their updated values. Don't use
-	 * ret_from_except_lite here.
-	 */
-	b       ret_from_except
-
-
-#ifdef CONFIG_PPC_BOOK3S_64
-/* We have a page fault that hash_page could handle but HV refused
- * the PTE insertion
- */
-13:	bl	save_nvgprs
-	mr	r5,r3
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	ld	r4,_DAR(r1)
-	bl	low_hash_fault
-	b	ret_from_except
-#endif
-
-/*
- * We come here as a result of a DSI at a point where we don't want
- * to call hash_page, such as when we are accessing memory (possibly
- * user memory) inside a PMU interrupt that occurred while interrupts
- * were soft-disabled.  We want to invoke the exception handler for
- * the access, or panic if there isn't a handler.
- */
-77:	bl	save_nvgprs
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	li	r5,SIGSEGV
-	bl	bad_page_fault
-	b	ret_from_except
-
-/*
- * When doorbell is triggered from system reset wakeup, the message is
- * not cleared, so it would fire again when EE is enabled.
- *
- * When coming from local_irq_enable, there may be the same problem if
- * we were hard disabled.
- *
- * Execute msgclr to clear pending exceptions before handling it.
- */
-h_doorbell_common_msgclr:
-	LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
-	PPC_MSGCLR(3)
-	b 	h_doorbell_common
-
-doorbell_super_common_msgclr:
-	LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
-	PPC_MSGCLRP(3)
-	b 	doorbell_super_common
-
-/*
- * Called from arch_local_irq_enable when an interrupt needs
- * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate
- * which kind of interrupt. MSR:EE is already off. We generate a
- * stackframe like if a real interrupt had happened.
- *
- * Note: While MSR:EE is off, we need to make sure that _MSR
- * in the generated frame has EE set to 1 or the exception
- * handler will not properly re-enable them.
- *
- * Note that we don't specify LR as the NIP (return address) for
- * the interrupt because that would unbalance the return branch
- * predictor.
- */
-_GLOBAL(__replay_interrupt)
-	/* We are going to jump to the exception common code which
-	 * will retrieve various register values from the PACA which
-	 * we don't give a damn about, so we don't bother storing them.
-	 */
-	mfmsr	r12
-	LOAD_REG_ADDR(r11, replay_interrupt_return)
-	mfcr	r9
-	ori	r12,r12,MSR_EE
-	cmpwi	r3,0x900
-	beq	decrementer_common
-	cmpwi	r3,0x500
-BEGIN_FTR_SECTION
-	beq	h_virt_irq_common
-FTR_SECTION_ELSE
-	beq	hardware_interrupt_common
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300)
-	cmpwi	r3,0xf00
-	beq	performance_monitor_common
-BEGIN_FTR_SECTION
-	cmpwi	r3,0xa00
-	beq	h_doorbell_common_msgclr
-	cmpwi	r3,0xe60
-	beq	hmi_exception_common
-FTR_SECTION_ELSE
-	cmpwi	r3,0xa00
-	beq	doorbell_super_common_msgclr
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
-replay_interrupt_return:
-	blr
-
-_ASM_NOKPROBE_SYMBOL(__replay_interrupt)
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
deleted file mode 100644
index 0bb991ddd2641245c8c98b8c382314330e474fa9..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/fpu.S
+++ /dev/null
@@ -1,150 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  FPU support code, moved here from head.S so that it can be used
- *  by chips which use other head-whatever.S files.
- *
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
- *    Copyright (C) 1996 Paul Mackerras.
- *    Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
- */
-
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/cputable.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ptrace.h>
-#include <asm/export.h>
-#include <asm/asm-compat.h>
-#include <asm/feature-fixups.h>
-
-#ifdef CONFIG_VSX
-#define __REST_32FPVSRS(n,c,base)					\
-BEGIN_FTR_SECTION							\
-	b	2f;							\
-END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
-	REST_32FPRS(n,base);						\
-	b	3f;							\
-2:	REST_32VSRS(n,c,base);						\
-3:
-
-#define __SAVE_32FPVSRS(n,c,base)					\
-BEGIN_FTR_SECTION							\
-	b	2f;							\
-END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
-	SAVE_32FPRS(n,base);						\
-	b	3f;							\
-2:	SAVE_32VSRS(n,c,base);						\
-3:
-#else
-#define __REST_32FPVSRS(n,b,base)	REST_32FPRS(n, base)
-#define __SAVE_32FPVSRS(n,b,base)	SAVE_32FPRS(n, base)
-#endif
-#define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
-#define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
-
-/*
- * Load state from memory into FP registers including FPSCR.
- * Assumes the caller has enabled FP in the MSR.
- */
-_GLOBAL(load_fp_state)
-	lfd	fr0,FPSTATE_FPSCR(r3)
-	MTFSF_L(fr0)
-	REST_32FPVSRS(0, R4, R3)
-	blr
-EXPORT_SYMBOL(load_fp_state)
-_ASM_NOKPROBE_SYMBOL(load_fp_state); /* used by restore_math */
-
-/*
- * Store FP state into memory, including FPSCR
- * Assumes the caller has enabled FP in the MSR.
- */
-_GLOBAL(store_fp_state)
-	SAVE_32FPVSRS(0, R4, R3)
-	mffs	fr0
-	stfd	fr0,FPSTATE_FPSCR(r3)
-	blr
-EXPORT_SYMBOL(store_fp_state)
-
-/*
- * This task wants to use the FPU now.
- * On UP, disable FP for the task which had the FPU previously,
- * and save its floating-point registers in its thread_struct.
- * Load up this task's FP registers from its thread_struct,
- * enable the FPU for the current task and return to the task.
- * Note that on 32-bit this can only use registers that will be
- * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
- */
-_GLOBAL(load_up_fpu)
-	mfmsr	r5
-	ori	r5,r5,MSR_FP
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
-	oris	r5,r5,MSR_VSX@h
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
-	SYNC
-	MTMSRD(r5)			/* enable use of fpu now */
-	isync
-	/* enable use of FP after return */
-#ifdef CONFIG_PPC32
-	mfspr	r5,SPRN_SPRG_THREAD	/* current task's THREAD (phys) */
-	lwz	r4,THREAD_FPEXC_MODE(r5)
-	ori	r9,r9,MSR_FP		/* enable FP for current */
-	or	r9,r9,r4
-#else
-	ld	r4,PACACURRENT(r13)
-	addi	r5,r4,THREAD		/* Get THREAD */
-	lwz	r4,THREAD_FPEXC_MODE(r5)
-	ori	r12,r12,MSR_FP
-	or	r12,r12,r4
-	std	r12,_MSR(r1)
-#endif
-	/* Don't care if r4 overflows, this is desired behaviour */
-	lbz	r4,THREAD_LOAD_FP(r5)
-	addi	r4,r4,1
-	stb	r4,THREAD_LOAD_FP(r5)
-	addi	r10,r5,THREAD_FPSTATE
-	lfd	fr0,FPSTATE_FPSCR(r10)
-	MTFSF_L(fr0)
-	REST_32FPVSRS(0, R4, R10)
-	/* restore registers and return */
-	/* we haven't used ctr or xer or lr */
-	blr
-
-/*
- * save_fpu(tsk)
- * Save the floating-point registers in its thread_struct.
- * Enables the FPU for use in the kernel on return.
- */
-_GLOBAL(save_fpu)
-	addi	r3,r3,THREAD	        /* want THREAD of task */
-	PPC_LL	r6,THREAD_FPSAVEAREA(r3)
-	PPC_LL	r5,PT_REGS(r3)
-	PPC_LCMPI	0,r6,0
-	bne	2f
-	addi	r6,r3,THREAD_FPSTATE
-2:	SAVE_32FPVSRS(0, R4, R6)
-	mffs	fr0
-	stfd	fr0,FPSTATE_FPSCR(r6)
-	blr
-
-/*
- * These are used in the alignment trap handler when emulating
- * single-precision loads and stores.
- */
-
-_GLOBAL(cvt_fd)
-	lfs	0,0(r3)
-	stfd	0,0(r4)
-	blr
-
-_GLOBAL(cvt_df)
-	lfd	0,0(r3)
-	stfs	0,0(r4)
-	blr
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
deleted file mode 100644
index ea065282b3035aee92f0586081d0654ee95639b3..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ /dev/null
@@ -1,241 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/* 1. Find the index of the entry we're executing in */
-	bl	invstr				/* Find our address */
-invstr:	mflr	r6				/* Make it accessible */
-	mfmsr	r7
-	rlwinm	r4,r7,27,31,31			/* extract MSR[IS] */
-	mfspr	r7, SPRN_PID0
-	slwi	r7,r7,16
-	or	r7,r7,r4
-	mtspr	SPRN_MAS6,r7
-	tlbsx	0,r6				/* search MSR[IS], SPID=PID0 */
-	mfspr	r7,SPRN_MAS1
-	andis.	r7,r7,MAS1_VALID@h
-	bne	match_TLB
-
-	mfspr	r7,SPRN_MMUCFG
-	rlwinm	r7,r7,21,28,31			/* extract MMUCFG[NPIDS] */
-	cmpwi	r7,3
-	bne	match_TLB			/* skip if NPIDS != 3 */
-
-	mfspr	r7,SPRN_PID1
-	slwi	r7,r7,16
-	or	r7,r7,r4
-	mtspr	SPRN_MAS6,r7
-	tlbsx	0,r6				/* search MSR[IS], SPID=PID1 */
-	mfspr	r7,SPRN_MAS1
-	andis.	r7,r7,MAS1_VALID@h
-	bne	match_TLB
-	mfspr	r7, SPRN_PID2
-	slwi	r7,r7,16
-	or	r7,r7,r4
-	mtspr	SPRN_MAS6,r7
-	tlbsx	0,r6				/* Fall through, we had to match */
-
-match_TLB:
-	mfspr	r7,SPRN_MAS0
-	rlwinm	r3,r7,16,20,31			/* Extract MAS0(Entry) */
-
-	mfspr	r7,SPRN_MAS1			/* Insure IPROT set */
-	oris	r7,r7,MAS1_IPROT@h
-	mtspr	SPRN_MAS1,r7
-	tlbwe
-
-/* 2. Invalidate all entries except the entry we're executing in */
-	mfspr	r9,SPRN_TLB1CFG
-	andi.	r9,r9,0xfff
-	li	r6,0				/* Set Entry counter to 0 */
-1:	lis	r7,0x1000			/* Set MAS0(TLBSEL) = 1 */
-	rlwimi	r7,r6,16,4,15			/* Setup MAS0 = TLBSEL | ESEL(r6) */
-	mtspr	SPRN_MAS0,r7
-	tlbre
-	mfspr	r7,SPRN_MAS1
-	rlwinm	r7,r7,0,2,31			/* Clear MAS1 Valid and IPROT */
-	cmpw	r3,r6
-	beq	skpinv				/* Dont update the current execution TLB */
-	mtspr	SPRN_MAS1,r7
-	tlbwe
-	isync
-skpinv:	addi	r6,r6,1				/* Increment */
-	cmpw	r6,r9				/* Are we done? */
-	bne	1b				/* If not, repeat */
-
-	/* Invalidate TLB0 */
-	li	r6,0x04
-	tlbivax 0,r6
-	TLBSYNC
-	/* Invalidate TLB1 */
-	li	r6,0x0c
-	tlbivax 0,r6
-	TLBSYNC
-
-/* 3. Setup a temp mapping and jump to it */
-	andi.	r5, r3, 0x1	/* Find an entry not used and is non-zero */
-	addi	r5, r5, 0x1
-	lis	r7,0x1000	/* Set MAS0(TLBSEL) = 1 */
-	rlwimi	r7,r3,16,4,15	/* Setup MAS0 = TLBSEL | ESEL(r3) */
-	mtspr	SPRN_MAS0,r7
-	tlbre
-
-	/* grab and fixup the RPN */
-	mfspr	r6,SPRN_MAS1	/* extract MAS1[SIZE] */
-	rlwinm	r6,r6,25,27,31
-	li	r8,-1
-	addi	r6,r6,10
-	slw	r6,r8,r6	/* convert to mask */
-
-	bl	1f		/* Find our address */
-1:	mflr	r7
-
-	mfspr	r8,SPRN_MAS3
-#ifdef CONFIG_PHYS_64BIT
-	mfspr	r23,SPRN_MAS7
-#endif
-	and	r8,r6,r8
-	subfic	r9,r6,-4096
-	and	r9,r9,r7
-
-	or	r25,r8,r9
-	ori	r8,r25,(MAS3_SX|MAS3_SW|MAS3_SR)
-
-	/* Just modify the entry ID and EPN for the temp mapping */
-	lis	r7,0x1000	/* Set MAS0(TLBSEL) = 1 */
-	rlwimi	r7,r5,16,4,15	/* Setup MAS0 = TLBSEL | ESEL(r5) */
-	mtspr	SPRN_MAS0,r7
-	xori	r6,r4,1		/* Setup TMP mapping in the other Address space */
-	slwi	r6,r6,12
-	oris	r6,r6,(MAS1_VALID|MAS1_IPROT)@h
-	ori	r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_4K))@l
-	mtspr	SPRN_MAS1,r6
-	mfspr	r6,SPRN_MAS2
-	li	r7,0		/* temp EPN = 0 */
-	rlwimi	r7,r6,0,20,31
-	mtspr	SPRN_MAS2,r7
-	mtspr	SPRN_MAS3,r8
-	tlbwe
-
-	xori	r6,r4,1
-	slwi	r6,r6,5		/* setup new context with other address space */
-	bl	1f		/* Find our address */
-1:	mflr	r9
-	rlwimi	r7,r9,0,20,31
-	addi	r7,r7,(2f - 1b)
-	mtspr	SPRN_SRR0,r7
-	mtspr	SPRN_SRR1,r6
-	rfi
-2:
-/* 4. Clear out PIDs & Search info */
-	li	r6,0
-	mtspr   SPRN_MAS6,r6
-	mtspr	SPRN_PID0,r6
-
-	mfspr	r7,SPRN_MMUCFG
-	rlwinm	r7,r7,21,28,31			/* extract MMUCFG[NPIDS] */
-	cmpwi	r7,3
-	bne	2f				/* skip if NPIDS != 3 */
-
-	mtspr	SPRN_PID1,r6
-	mtspr	SPRN_PID2,r6
-
-/* 5. Invalidate mapping we started in */
-2:
-	lis	r7,0x1000	/* Set MAS0(TLBSEL) = 1 */
-	rlwimi	r7,r3,16,4,15	/* Setup MAS0 = TLBSEL | ESEL(r3) */
-	mtspr	SPRN_MAS0,r7
-	tlbre
-	mfspr	r6,SPRN_MAS1
-	rlwinm	r6,r6,0,2,0	/* clear IPROT */
-	mtspr	SPRN_MAS1,r6
-	tlbwe
-	/* Invalidate TLB1 */
-	li	r9,0x0c
-	tlbivax 0,r9
-	TLBSYNC
-
-/*
- * The mapping only needs to be cache-coherent on SMP, except on
- * Freescale e500mc derivatives where it's also needed for coherent DMA.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDED	MAS2_M
-#else
-#define M_IF_NEEDED	0
-#endif
-
-#if defined(ENTRY_MAPPING_BOOT_SETUP)
-
-/* 6. Setup KERNELBASE mapping in TLB1[0] */
-	lis	r6,0x1000		/* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */
-	mtspr	SPRN_MAS0,r6
-	lis	r6,(MAS1_VALID|MAS1_IPROT)@h
-	ori	r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
-	mtspr	SPRN_MAS1,r6
-	lis	r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@h
-	ori	r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@l
-	mtspr	SPRN_MAS2,r6
-	mtspr	SPRN_MAS3,r8
-	tlbwe
-
-/* 7. Jump to KERNELBASE mapping */
-	lis	r6,(KERNELBASE & ~0xfff)@h
-	ori	r6,r6,(KERNELBASE & ~0xfff)@l
-	rlwinm	r7,r25,0,0x03ffffff
-	add	r6,r7,r6
-
-#elif defined(ENTRY_MAPPING_KEXEC_SETUP)
-/*
- * 6. Setup a 1:1 mapping in TLB1. Esel 0 is unsued, 1 or 2 contains the tmp
- * mapping so we start at 3. We setup 8 mappings, each 256MiB in size. This
- * will cover the first 2GiB of memory.
- */
-
-	lis r10, (MAS1_VALID|MAS1_IPROT)@h
-	ori r10,r10, (MAS1_TSIZE(BOOK3E_PAGESZ_256M))@l
-	li  r11, 0
-	li  r0, 8
-	mtctr   r0
-
-next_tlb_setup:
-	addi	r0, r11, 3
-	rlwinm  r0, r0, 16, 4, 15  // Compute esel
-	rlwinm  r9, r11, 28, 0, 3   // Compute [ER]PN
-	oris    r0, r0, (MAS0_TLBSEL(1))@h
-	mtspr   SPRN_MAS0,r0
-	mtspr   SPRN_MAS1,r10
-	mtspr   SPRN_MAS2,r9
-	ori r9, r9, (MAS3_SX|MAS3_SW|MAS3_SR)
-	mtspr   SPRN_MAS3,r9
-	tlbwe
-	addi    r11, r11, 1
-	bdnz+   next_tlb_setup
-
-/* 7. Jump to our 1:1 mapping */
-	mr	r6, r25
-#else
-	#error You need to specify the mapping or not use this at all.
-#endif
-
-	lis	r7,MSR_KERNEL@h
-	ori	r7,r7,MSR_KERNEL@l
-	bl	1f			/* Find our address */
-1:	mflr	r9
-	rlwimi	r6,r9,0,20,31
-	addi	r6,r6,(2f - 1b)
-	mtspr	SPRN_SRR0,r6
-	mtspr	SPRN_SRR1,r7
-	rfi				/* start execution out of TLB1[0] entry */
-
-/* 8. Clear out the temp mapping */
-2:	lis	r7,0x1000	/* Set MAS0(TLBSEL) = 1 */
-	rlwimi	r7,r5,16,4,15	/* Setup MAS0 = TLBSEL | ESEL(r5) */
-	mtspr	SPRN_MAS0,r7
-	tlbre
-	mfspr	r8,SPRN_MAS1
-	rlwinm	r8,r8,0,2,0	/* clear IPROT */
-	mtspr	SPRN_MAS1,r8
-	tlbwe
-	/* Invalidate TLB1 */
-	li	r9,0x0c
-	tlbivax 0,r9
-	TLBSYNC
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
deleted file mode 100644
index edaab1142498c550627ca71c45448c0f1df023e7..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/head_32.S
+++ /dev/null
@@ -1,1248 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  PowerPC version
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
- *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
- *  Adapted for Power Macintosh by Paul Mackerras.
- *  Low-level exception handlers and MMU support
- *  rewritten by Paul Mackerras.
- *    Copyright (C) 1996 Paul Mackerras.
- *  MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
- *
- *  This file contains the low-level support and setup for the
- *  PowerPC platform, including trap and interrupt dispatch.
- *  (The PPC 8xx embedded CPUs use head_8xx.S instead.)
- */
-
-#include <linux/init.h>
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/cputable.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ptrace.h>
-#include <asm/bug.h>
-#include <asm/kvm_book3s_asm.h>
-#include <asm/export.h>
-#include <asm/feature-fixups.h>
-
-#include "head_32.h"
-
-/* 601 only have IBAT */
-#ifdef CONFIG_PPC_BOOK3S_601
-#define LOAD_BAT(n, reg, RA, RB)	\
-	li	RA,0;			\
-	mtspr	SPRN_IBAT##n##U,RA;	\
-	lwz	RA,(n*16)+0(reg);	\
-	lwz	RB,(n*16)+4(reg);	\
-	mtspr	SPRN_IBAT##n##U,RA;	\
-	mtspr	SPRN_IBAT##n##L,RB
-#else
-#define LOAD_BAT(n, reg, RA, RB)	\
-	/* see the comment for clear_bats() -- Cort */ \
-	li	RA,0;			\
-	mtspr	SPRN_IBAT##n##U,RA;	\
-	mtspr	SPRN_DBAT##n##U,RA;	\
-	lwz	RA,(n*16)+0(reg);	\
-	lwz	RB,(n*16)+4(reg);	\
-	mtspr	SPRN_IBAT##n##U,RA;	\
-	mtspr	SPRN_IBAT##n##L,RB;	\
-	lwz	RA,(n*16)+8(reg);	\
-	lwz	RB,(n*16)+12(reg);	\
-	mtspr	SPRN_DBAT##n##U,RA;	\
-	mtspr	SPRN_DBAT##n##L,RB
-#endif
-
-	__HEAD
-	.stabs	"arch/powerpc/kernel/",N_SO,0,0,0f
-	.stabs	"head_32.S",N_SO,0,0,0f
-0:
-_ENTRY(_stext);
-
-/*
- * _start is defined this way because the XCOFF loader in the OpenFirmware
- * on the powermac expects the entry point to be a procedure descriptor.
- */
-_ENTRY(_start);
-	/*
-	 * These are here for legacy reasons, the kernel used to
-	 * need to look like a coff function entry for the pmac
-	 * but we're always started by some kind of bootloader now.
-	 *  -- Cort
-	 */
-	nop	/* used by __secondary_hold on prep (mtx) and chrp smp */
-	nop	/* used by __secondary_hold on prep (mtx) and chrp smp */
-	nop
-
-/* PMAC
- * Enter here with the kernel text, data and bss loaded starting at
- * 0, running with virtual == physical mapping.
- * r5 points to the prom entry point (the client interface handler
- * address).  Address translation is turned on, with the prom
- * managing the hash table.  Interrupts are disabled.  The stack
- * pointer (r1) points to just below the end of the half-meg region
- * from 0x380000 - 0x400000, which is mapped in already.
- *
- * If we are booted from MacOS via BootX, we enter with the kernel
- * image loaded somewhere, and the following values in registers:
- *  r3: 'BooX' (0x426f6f58)
- *  r4: virtual address of boot_infos_t
- *  r5: 0
- *
- * PREP
- * This is jumped to on prep systems right after the kernel is relocated
- * to its proper place in memory by the boot loader.  The expected layout
- * of the regs is:
- *   r3: ptr to residual data
- *   r4: initrd_start or if no initrd then 0
- *   r5: initrd_end - unused if r4 is 0
- *   r6: Start of command line string
- *   r7: End of command line string
- *
- * This just gets a minimal mmu environment setup so we can call
- * start_here() to do the real work.
- * -- Cort
- */
-
-	.globl	__start
-__start:
-/*
- * We have to do any OF calls before we map ourselves to KERNELBASE,
- * because OF may have I/O devices mapped into that area
- * (particularly on CHRP).
- */
-	cmpwi	0,r5,0
-	beq	1f
-
-#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
-	/* find out where we are now */
-	bcl	20,31,$+4
-0:	mflr	r8			/* r8 = runtime addr here */
-	addis	r8,r8,(_stext - 0b)@ha
-	addi	r8,r8,(_stext - 0b)@l	/* current runtime base addr */
-	bl	prom_init
-#endif /* CONFIG_PPC_OF_BOOT_TRAMPOLINE */
-
-	/* We never return. We also hit that trap if trying to boot
-	 * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */
-	trap
-
-/*
- * Check for BootX signature when supporting PowerMac and branch to
- * appropriate trampoline if it's present
- */
-#ifdef CONFIG_PPC_PMAC
-1:	lis	r31,0x426f
-	ori	r31,r31,0x6f58
-	cmpw	0,r3,r31
-	bne	1f
-	bl	bootx_init
-	trap
-#endif /* CONFIG_PPC_PMAC */
-
-1:	mr	r31,r3			/* save device tree ptr */
-	li	r24,0			/* cpu # */
-
-/*
- * early_init() does the early machine identification and does
- * the necessary low-level setup and clears the BSS
- *  -- Cort <cort@fsmlabs.com>
- */
-	bl	early_init
-
-/* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains
- * the physical address we are running at, returned by early_init()
- */
- 	bl	mmu_off
-__after_mmu_off:
-	bl	clear_bats
-	bl	flush_tlbs
-
-	bl	initial_bats
-	bl	load_segment_registers
-#ifdef CONFIG_KASAN
-	bl	early_hash_table
-#endif
-#if defined(CONFIG_BOOTX_TEXT)
-	bl	setup_disp_bat
-#endif
-#ifdef CONFIG_PPC_EARLY_DEBUG_CPM
-	bl	setup_cpm_bat
-#endif
-#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
-	bl	setup_usbgecko_bat
-#endif
-
-/*
- * Call setup_cpu for CPU 0 and initialize 6xx Idle
- */
-	bl	reloc_offset
-	li	r24,0			/* cpu# */
-	bl	call_setup_cpu		/* Call setup_cpu for this CPU */
-#ifdef CONFIG_PPC_BOOK3S_32
-	bl	reloc_offset
-	bl	init_idle_6xx
-#endif /* CONFIG_PPC_BOOK3S_32 */
-
-
-/*
- * We need to run with _start at physical address 0.
- * On CHRP, we are loaded at 0x10000 since OF on CHRP uses
- * the exception vectors at 0 (and therefore this copy
- * overwrites OF's exception vectors with our own).
- * The MMU is off at this point.
- */
-	bl	reloc_offset
-	mr	r26,r3
-	addis	r4,r3,KERNELBASE@h	/* current address of _start */
-	lis	r5,PHYSICAL_START@h
-	cmplw	0,r4,r5			/* already running at PHYSICAL_START? */
-	bne	relocate_kernel
-/*
- * we now have the 1st 16M of ram mapped with the bats.
- * prep needs the mmu to be turned on here, but pmac already has it on.
- * this shouldn't bother the pmac since it just gets turned on again
- * as we jump to our code at KERNELBASE. -- Cort
- * Actually no, pmac doesn't have it on any more. BootX enters with MMU
- * off, and in other cases, we now turn it off before changing BATs above.
- */
-turn_on_mmu:
-	mfmsr	r0
-	ori	r0,r0,MSR_DR|MSR_IR|MSR_RI
-	mtspr	SPRN_SRR1,r0
-	lis	r0,start_here@h
-	ori	r0,r0,start_here@l
-	mtspr	SPRN_SRR0,r0
-	SYNC
-	RFI				/* enables MMU */
-
-/*
- * We need __secondary_hold as a place to hold the other cpus on
- * an SMP machine, even when we are running a UP kernel.
- */
-	. = 0xc0			/* for prep bootloader */
-	li	r3,1			/* MTX only has 1 cpu */
-	.globl	__secondary_hold
-__secondary_hold:
-	/* tell the master we're here */
-	stw	r3,__secondary_hold_acknowledge@l(0)
-#ifdef CONFIG_SMP
-100:	lwz	r4,0(0)
-	/* wait until we're told to start */
-	cmpw	0,r4,r3
-	bne	100b
-	/* our cpu # was at addr 0 - go */
-	mr	r24,r3			/* cpu # */
-	b	__secondary_start
-#else
-	b	.
-#endif /* CONFIG_SMP */
-
-	.globl	__secondary_hold_spinloop
-__secondary_hold_spinloop:
-	.long	0
-	.globl	__secondary_hold_acknowledge
-__secondary_hold_acknowledge:
-	.long	-1
-
-/* System reset */
-/* core99 pmac starts the seconary here by changing the vector, and
-   putting it back to what it was (unknown_exception) when done.  */
-	EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD)
-
-/* Machine check */
-/*
- * On CHRP, this is complicated by the fact that we could get a
- * machine check inside RTAS, and we have no guarantee that certain
- * critical registers will have the values we expect.  The set of
- * registers that might have bad values includes all the GPRs
- * and all the BATs.  We indicate that we are in RTAS by putting
- * a non-zero value, the address of the exception frame to use,
- * in thread.rtas_sp.  The machine check handler checks thread.rtas_sp
- * and uses its value if it is non-zero.
- * (Other exception handlers assume that r1 is a valid kernel stack
- * pointer when we take an exception from supervisor mode.)
- *	-- paulus.
- */
-	. = 0x200
-	DO_KVM  0x200
-	mtspr	SPRN_SPRG_SCRATCH0,r10
-	mtspr	SPRN_SPRG_SCRATCH1,r11
-	mfcr	r10
-#ifdef CONFIG_PPC_CHRP
-	mfspr	r11, SPRN_SPRG_THREAD
-	lwz	r11, RTAS_SP(r11)
-	cmpwi	cr1, r11, 0
-	bne	cr1, 7f
-#endif /* CONFIG_PPC_CHRP */
-	EXCEPTION_PROLOG_1
-7:	EXCEPTION_PROLOG_2
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_CHRP
-	bne	cr1,1f
-#endif
-	EXC_XFER_STD(0x200, machine_check_exception)
-#ifdef CONFIG_PPC_CHRP
-1:	b	machine_check_in_rtas
-#endif
-
-/* Data access exception. */
-	. = 0x300
-	DO_KVM  0x300
-DataAccess:
-	EXCEPTION_PROLOG
-	mfspr	r10,SPRN_DSISR
-	stw	r10,_DSISR(r11)
-#ifdef CONFIG_PPC_KUAP
-	andis.	r0,r10,(DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
-#else
-	andis.	r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h
-#endif
-	bne	1f			/* if not, try to put a PTE */
-	mfspr	r4,SPRN_DAR		/* into the hash table */
-	rlwinm	r3,r10,32-15,21,21	/* DSISR_STORE -> _PAGE_RW */
-BEGIN_MMU_FTR_SECTION
-	bl	hash_page
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-1:	lwz	r5,_DSISR(r11)		/* get DSISR value */
-	mfspr	r4,SPRN_DAR
-	EXC_XFER_LITE(0x300, handle_page_fault)
-
-
-/* Instruction access exception. */
-	. = 0x400
-	DO_KVM  0x400
-InstructionAccess:
-	EXCEPTION_PROLOG
-	andis.	r0,r9,SRR1_ISI_NOPT@h	/* no pte found? */
-	beq	1f			/* if so, try to put a PTE */
-	li	r3,0			/* into the hash table */
-	mr	r4,r12			/* SRR0 is fault address */
-BEGIN_MMU_FTR_SECTION
-	bl	hash_page
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-1:	mr	r4,r12
-	andis.	r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
-	EXC_XFER_LITE(0x400, handle_page_fault)
-
-/* External interrupt */
-	EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
-
-/* Alignment exception */
-	. = 0x600
-	DO_KVM  0x600
-Alignment:
-	EXCEPTION_PROLOG
-	mfspr	r4,SPRN_DAR
-	stw	r4,_DAR(r11)
-	mfspr	r5,SPRN_DSISR
-	stw	r5,_DSISR(r11)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_STD(0x600, alignment_exception)
-
-/* Program check exception */
-	EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
-
-/* Floating-point unavailable */
-	. = 0x800
-	DO_KVM  0x800
-FPUnavailable:
-BEGIN_FTR_SECTION
-/*
- * Certain Freescale cores don't have a FPU and treat fp instructions
- * as a FP Unavailable exception.  Redirect to illegal/emulation handling.
- */
-	b 	ProgramCheck
-END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
-	EXCEPTION_PROLOG
-	beq	1f
-	bl	load_up_fpu		/* if from user, just load it up */
-	b	fast_exception_return
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception)
-
-/* Decrementer */
-	EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
-
-	EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
-
-/* System call */
-	. = 0xc00
-	DO_KVM  0xc00
-SystemCall:
-	SYSCALL_ENTRY	0xc00
-
-/* Single step - not used on 601 */
-	EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
-	EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
-
-/*
- * The Altivec unavailable trap is at 0x0f20.  Foo.
- * We effectively remap it to 0x3000.
- * We include an altivec unavailable exception vector even if
- * not configured for Altivec, so that you can't panic a
- * non-altivec kernel running on a machine with altivec just
- * by executing an altivec instruction.
- */
-	. = 0xf00
-	DO_KVM  0xf00
-	b	PerformanceMonitor
-
-	. = 0xf20
-	DO_KVM  0xf20
-	b	AltiVecUnavailable
-
-/*
- * Handle TLB miss for instruction on 603/603e.
- * Note: we get an alternate set of r0 - r3 to use automatically.
- */
-	. = 0x1000
-InstructionTLBMiss:
-/*
- * r0:	scratch
- * r1:	linux style pte ( later becomes ppc hardware pte )
- * r2:	ptr to linux-style pte
- * r3:	scratch
- */
-	/* Get PTE (linux-style) and check access */
-	mfspr	r3,SPRN_IMISS
-#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
-	lis	r1,PAGE_OFFSET@h		/* check if kernel address */
-	cmplw	0,r1,r3
-#endif
-	mfspr	r2, SPRN_SPRG_PGDIR
-	li	r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER
-#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
-	bge-	112f
-	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
-	li	r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
-	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
-#endif
-112:	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
-	lwz	r2,0(r2)		/* get pmd entry */
-	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
-	beq-	InstructionAddressInvalid	/* return if no mapping */
-	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
-	lwz	r0,0(r2)		/* get linux-style pte */
-	andc.	r1,r1,r0		/* check access & ~permission */
-	bne-	InstructionAddressInvalid /* return if access not permitted */
-	/* Convert linux-style PTE to low word of PPC-style PTE */
-	rlwimi	r0,r0,32-2,31,31	/* _PAGE_USER -> PP lsb */
-	ori	r1, r1, 0xe06		/* clear out reserved bits */
-	andc	r1, r0, r1		/* PP = user? 1 : 0 */
-BEGIN_FTR_SECTION
-	rlwinm	r1,r1,0,~_PAGE_COHERENT	/* clear M (coherence not required) */
-END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
-	mtspr	SPRN_RPA,r1
-	tlbli	r3
-	mfspr	r3,SPRN_SRR1		/* Need to restore CR0 */
-	mtcrf	0x80,r3
-	rfi
-InstructionAddressInvalid:
-	mfspr	r3,SPRN_SRR1
-	rlwinm	r1,r3,9,6,6	/* Get load/store bit */
-
-	addis	r1,r1,0x2000
-	mtspr	SPRN_DSISR,r1	/* (shouldn't be needed) */
-	andi.	r2,r3,0xFFFF	/* Clear upper bits of SRR1 */
-	or	r2,r2,r1
-	mtspr	SPRN_SRR1,r2
-	mfspr	r1,SPRN_IMISS	/* Get failing address */
-	rlwinm.	r2,r2,0,31,31	/* Check for little endian access */
-	rlwimi	r2,r2,1,30,30	/* change 1 -> 3 */
-	xor	r1,r1,r2
-	mtspr	SPRN_DAR,r1	/* Set fault address */
-	mfmsr	r0		/* Restore "normal" registers */
-	xoris	r0,r0,MSR_TGPR>>16
-	mtcrf	0x80,r3		/* Restore CR0 */
-	mtmsr	r0
-	b	InstructionAccess
-
-/*
- * Handle TLB miss for DATA Load operation on 603/603e
- */
-	. = 0x1100
-DataLoadTLBMiss:
-/*
- * r0:	scratch
- * r1:	linux style pte ( later becomes ppc hardware pte )
- * r2:	ptr to linux-style pte
- * r3:	scratch
- */
-	/* Get PTE (linux-style) and check access */
-	mfspr	r3,SPRN_DMISS
-	lis	r1,PAGE_OFFSET@h		/* check if kernel address */
-	cmplw	0,r1,r3
-	mfspr	r2, SPRN_SPRG_PGDIR
-	li	r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER
-	bge-	112f
-	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
-	li	r1, _PAGE_PRESENT | _PAGE_ACCESSED
-	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
-112:	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
-	lwz	r2,0(r2)		/* get pmd entry */
-	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
-	beq-	DataAddressInvalid	/* return if no mapping */
-	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
-	lwz	r0,0(r2)		/* get linux-style pte */
-	andc.	r1,r1,r0		/* check access & ~permission */
-	bne-	DataAddressInvalid	/* return if access not permitted */
-	/*
-	 * NOTE! We are assuming this is not an SMP system, otherwise
-	 * we would need to update the pte atomically with lwarx/stwcx.
-	 */
-	/* Convert linux-style PTE to low word of PPC-style PTE */
-	rlwinm	r1,r0,32-9,30,30	/* _PAGE_RW -> PP msb */
-	rlwimi	r0,r0,32-1,30,30	/* _PAGE_USER -> PP msb */
-	rlwimi	r0,r0,32-1,31,31	/* _PAGE_USER -> PP lsb */
-	ori	r1,r1,0xe04		/* clear out reserved bits */
-	andc	r1,r0,r1		/* PP = user? rw? 1: 3: 0 */
-BEGIN_FTR_SECTION
-	rlwinm	r1,r1,0,~_PAGE_COHERENT	/* clear M (coherence not required) */
-END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
-	mtspr	SPRN_RPA,r1
-	mfspr	r2,SPRN_SRR1		/* Need to restore CR0 */
-	mtcrf	0x80,r2
-BEGIN_MMU_FTR_SECTION
-	li	r0,1
-	mfspr	r1,SPRN_SPRG_603_LRU
-	rlwinm	r2,r3,20,27,31		/* Get Address bits 15:19 */
-	slw	r0,r0,r2
-	xor	r1,r0,r1
-	srw	r0,r1,r2
-	mtspr   SPRN_SPRG_603_LRU,r1
-	mfspr	r2,SPRN_SRR1
-	rlwimi	r2,r0,31-14,14,14
-	mtspr   SPRN_SRR1,r2
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
-	tlbld	r3
-	rfi
-DataAddressInvalid:
-	mfspr	r3,SPRN_SRR1
-	rlwinm	r1,r3,9,6,6	/* Get load/store bit */
-	addis	r1,r1,0x2000
-	mtspr	SPRN_DSISR,r1
-	andi.	r2,r3,0xFFFF	/* Clear upper bits of SRR1 */
-	mtspr	SPRN_SRR1,r2
-	mfspr	r1,SPRN_DMISS	/* Get failing address */
-	rlwinm.	r2,r2,0,31,31	/* Check for little endian access */
-	beq	20f		/* Jump if big endian */
-	xori	r1,r1,3
-20:	mtspr	SPRN_DAR,r1	/* Set fault address */
-	mfmsr	r0		/* Restore "normal" registers */
-	xoris	r0,r0,MSR_TGPR>>16
-	mtcrf	0x80,r3		/* Restore CR0 */
-	mtmsr	r0
-	b	DataAccess
-
-/*
- * Handle TLB miss for DATA Store on 603/603e
- */
-	. = 0x1200
-DataStoreTLBMiss:
-/*
- * r0:	scratch
- * r1:	linux style pte ( later becomes ppc hardware pte )
- * r2:	ptr to linux-style pte
- * r3:	scratch
- */
-	/* Get PTE (linux-style) and check access */
-	mfspr	r3,SPRN_DMISS
-	lis	r1,PAGE_OFFSET@h		/* check if kernel address */
-	cmplw	0,r1,r3
-	mfspr	r2, SPRN_SPRG_PGDIR
-	li	r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER
-	bge-	112f
-	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
-	li	r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
-	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
-112:	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
-	lwz	r2,0(r2)		/* get pmd entry */
-	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
-	beq-	DataAddressInvalid	/* return if no mapping */
-	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
-	lwz	r0,0(r2)		/* get linux-style pte */
-	andc.	r1,r1,r0		/* check access & ~permission */
-	bne-	DataAddressInvalid	/* return if access not permitted */
-	/*
-	 * NOTE! We are assuming this is not an SMP system, otherwise
-	 * we would need to update the pte atomically with lwarx/stwcx.
-	 */
-	/* Convert linux-style PTE to low word of PPC-style PTE */
-	rlwimi	r0,r0,32-2,31,31	/* _PAGE_USER -> PP lsb */
-	li	r1,0xe06		/* clear out reserved bits & PP msb */
-	andc	r1,r0,r1		/* PP = user? 1: 0 */
-BEGIN_FTR_SECTION
-	rlwinm	r1,r1,0,~_PAGE_COHERENT	/* clear M (coherence not required) */
-END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
-	mtspr	SPRN_RPA,r1
-	mfspr	r2,SPRN_SRR1		/* Need to restore CR0 */
-	mtcrf	0x80,r2
-BEGIN_MMU_FTR_SECTION
-	li	r0,1
-	mfspr	r1,SPRN_SPRG_603_LRU
-	rlwinm	r2,r3,20,27,31		/* Get Address bits 15:19 */
-	slw	r0,r0,r2
-	xor	r1,r0,r1
-	srw	r0,r1,r2
-	mtspr   SPRN_SPRG_603_LRU,r1
-	mfspr	r2,SPRN_SRR1
-	rlwimi	r2,r0,31-14,14,14
-	mtspr   SPRN_SRR1,r2
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
-	tlbld	r3
-	rfi
-
-#ifndef CONFIG_ALTIVEC
-#define altivec_assist_exception	unknown_exception
-#endif
-
-	EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD)
-	EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD)
-	EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD)
-	EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD)
-	EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD)
-	EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD)
-
-	. = 0x3000
-
-AltiVecUnavailable:
-	EXCEPTION_PROLOG
-#ifdef CONFIG_ALTIVEC
-	beq	1f
-	bl	load_up_altivec		/* if from user, just load it up */
-	b	fast_exception_return
-#endif /* CONFIG_ALTIVEC */
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_LITE(0xf20, altivec_unavailable_exception)
-
-PerformanceMonitor:
-	EXCEPTION_PROLOG
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_STD(0xf00, performance_monitor_exception)
-
-
-/*
- * This code is jumped to from the startup code to copy
- * the kernel image to physical address PHYSICAL_START.
- */
-relocate_kernel:
-	addis	r9,r26,klimit@ha	/* fetch klimit */
-	lwz	r25,klimit@l(r9)
-	addis	r25,r25,-KERNELBASE@h
-	lis	r3,PHYSICAL_START@h	/* Destination base address */
-	li	r6,0			/* Destination offset */
-	li	r5,0x4000		/* # bytes of memory to copy */
-	bl	copy_and_flush		/* copy the first 0x4000 bytes */
-	addi	r0,r3,4f@l		/* jump to the address of 4f */
-	mtctr	r0			/* in copy and do the rest. */
-	bctr				/* jump to the copy */
-4:	mr	r5,r25
-	bl	copy_and_flush		/* copy the rest */
-	b	turn_on_mmu
-
-/*
- * Copy routine used to copy the kernel to start at physical address 0
- * and flush and invalidate the caches as needed.
- * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
- * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
- */
-_ENTRY(copy_and_flush)
-	addi	r5,r5,-4
-	addi	r6,r6,-4
-4:	li	r0,L1_CACHE_BYTES/4
-	mtctr	r0
-3:	addi	r6,r6,4			/* copy a cache line */
-	lwzx	r0,r6,r4
-	stwx	r0,r6,r3
-	bdnz	3b
-	dcbst	r6,r3			/* write it to memory */
-	sync
-	icbi	r6,r3			/* flush the icache line */
-	cmplw	0,r6,r5
-	blt	4b
-	sync				/* additional sync needed on g4 */
-	isync
-	addi	r5,r5,4
-	addi	r6,r6,4
-	blr
-
-#ifdef CONFIG_SMP
-	.globl __secondary_start_mpc86xx
-__secondary_start_mpc86xx:
-	mfspr	r3, SPRN_PIR
-	stw	r3, __secondary_hold_acknowledge@l(0)
-	mr	r24, r3			/* cpu # */
-	b	__secondary_start
-
-	.globl	__secondary_start_pmac_0
-__secondary_start_pmac_0:
-	/* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */
-	li	r24,0
-	b	1f
-	li	r24,1
-	b	1f
-	li	r24,2
-	b	1f
-	li	r24,3
-1:
-	/* on powersurge, we come in here with IR=0 and DR=1, and DBAT 0
-	   set to map the 0xf0000000 - 0xffffffff region */
-	mfmsr	r0
-	rlwinm	r0,r0,0,28,26		/* clear DR (0x10) */
-	SYNC
-	mtmsr	r0
-	isync
-
-	.globl	__secondary_start
-__secondary_start:
-	/* Copy some CPU settings from CPU 0 */
-	bl	__restore_cpu_setup
-
-	lis	r3,-KERNELBASE@h
-	mr	r4,r24
-	bl	call_setup_cpu		/* Call setup_cpu for this CPU */
-#ifdef CONFIG_PPC_BOOK3S_32
-	lis	r3,-KERNELBASE@h
-	bl	init_idle_6xx
-#endif /* CONFIG_PPC_BOOK3S_32 */
-
-	/* get current's stack and current */
-	lis	r2,secondary_current@ha
-	tophys(r2,r2)
-	lwz	r2,secondary_current@l(r2)
-	tophys(r1,r2)
-	lwz	r1,TASK_STACK(r1)
-
-	/* stack */
-	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
-	li	r0,0
-	tophys(r3,r1)
-	stw	r0,0(r3)
-
-	/* load up the MMU */
-	bl	load_segment_registers
-	bl	load_up_mmu
-
-	/* ptr to phys current thread */
-	tophys(r4,r2)
-	addi	r4,r4,THREAD	/* phys address of our thread_struct */
-	mtspr	SPRN_SPRG_THREAD,r4
-	lis	r4, (swapper_pg_dir - PAGE_OFFSET)@h
-	ori	r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
-	mtspr	SPRN_SPRG_PGDIR, r4
-
-	/* enable MMU and jump to start_secondary */
-	li	r4,MSR_KERNEL
-	lis	r3,start_secondary@h
-	ori	r3,r3,start_secondary@l
-	mtspr	SPRN_SRR0,r3
-	mtspr	SPRN_SRR1,r4
-	SYNC
-	RFI
-#endif /* CONFIG_SMP */
-
-#ifdef CONFIG_KVM_BOOK3S_HANDLER
-#include "../kvm/book3s_rmhandlers.S"
-#endif
-
-/*
- * Those generic dummy functions are kept for CPUs not
- * included in CONFIG_PPC_BOOK3S_32
- */
-#if !defined(CONFIG_PPC_BOOK3S_32)
-_ENTRY(__save_cpu_setup)
-	blr
-_ENTRY(__restore_cpu_setup)
-	blr
-#endif /* !defined(CONFIG_PPC_BOOK3S_32) */
-
-/*
- * Load stuff into the MMU.  Intended to be called with
- * IR=0 and DR=0.
- */
-#ifdef CONFIG_KASAN
-early_hash_table:
-	sync			/* Force all PTE updates to finish */
-	isync
-	tlbia			/* Clear all TLB entries */
-	sync			/* wait for tlbia/tlbie to finish */
-	TLBSYNC			/* ... on all CPUs */
-	/* Load the SDR1 register (hash table base & size) */
-	lis	r6, early_hash - PAGE_OFFSET@h
-	ori	r6, r6, 3	/* 256kB table */
-	mtspr	SPRN_SDR1, r6
-	blr
-#endif
-
-load_up_mmu:
-	sync			/* Force all PTE updates to finish */
-	isync
-	tlbia			/* Clear all TLB entries */
-	sync			/* wait for tlbia/tlbie to finish */
-	TLBSYNC			/* ... on all CPUs */
-	/* Load the SDR1 register (hash table base & size) */
-	lis	r6,_SDR1@ha
-	tophys(r6,r6)
-	lwz	r6,_SDR1@l(r6)
-	mtspr	SPRN_SDR1,r6
-
-/* Load the BAT registers with the values set up by MMU_init.
-   MMU_init takes care of whether we're on a 601 or not. */
-	lis	r3,BATS@ha
-	addi	r3,r3,BATS@l
-	tophys(r3,r3)
-	LOAD_BAT(0,r3,r4,r5)
-	LOAD_BAT(1,r3,r4,r5)
-	LOAD_BAT(2,r3,r4,r5)
-	LOAD_BAT(3,r3,r4,r5)
-BEGIN_MMU_FTR_SECTION
-	LOAD_BAT(4,r3,r4,r5)
-	LOAD_BAT(5,r3,r4,r5)
-	LOAD_BAT(6,r3,r4,r5)
-	LOAD_BAT(7,r3,r4,r5)
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
-	blr
-
-_GLOBAL(load_segment_registers)
-	li	r0, NUM_USER_SEGMENTS /* load up user segment register values */
-	mtctr	r0		/* for context 0 */
-	li	r3, 0		/* Kp = 0, Ks = 0, VSID = 0 */
-#ifdef CONFIG_PPC_KUEP
-	oris	r3, r3, SR_NX@h	/* Set Nx */
-#endif
-#ifdef CONFIG_PPC_KUAP
-	oris	r3, r3, SR_KS@h	/* Set Ks */
-#endif
-	li	r4, 0
-3:	mtsrin	r3, r4
-	addi	r3, r3, 0x111	/* increment VSID */
-	addis	r4, r4, 0x1000	/* address of next segment */
-	bdnz	3b
-	li	r0, 16 - NUM_USER_SEGMENTS /* load up kernel segment registers */
-	mtctr	r0			/* for context 0 */
-	rlwinm	r3, r3, 0, ~SR_NX	/* Nx = 0 */
-	rlwinm	r3, r3, 0, ~SR_KS	/* Ks = 0 */
-	oris	r3, r3, SR_KP@h		/* Kp = 1 */
-3:	mtsrin	r3, r4
-	addi	r3, r3, 0x111	/* increment VSID */
-	addis	r4, r4, 0x1000	/* address of next segment */
-	bdnz	3b
-	blr
-
-/*
- * This is where the main kernel code starts.
- */
-start_here:
-	/* ptr to current */
-	lis	r2,init_task@h
-	ori	r2,r2,init_task@l
-	/* Set up for using our exception vectors */
-	/* ptr to phys current thread */
-	tophys(r4,r2)
-	addi	r4,r4,THREAD	/* init task's THREAD */
-	mtspr	SPRN_SPRG_THREAD,r4
-	lis	r4, (swapper_pg_dir - PAGE_OFFSET)@h
-	ori	r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
-	mtspr	SPRN_SPRG_PGDIR, r4
-
-	/* stack */
-	lis	r1,init_thread_union@ha
-	addi	r1,r1,init_thread_union@l
-	li	r0,0
-	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
-/*
- * Do early platform-specific initialization,
- * and set up the MMU.
- */
-#ifdef CONFIG_KASAN
-	bl	kasan_early_init
-#endif
-	li	r3,0
-	mr	r4,r31
-	bl	machine_init
-	bl	__save_cpu_setup
-	bl	MMU_init
-#ifdef CONFIG_KASAN
-BEGIN_MMU_FTR_SECTION
-	bl	MMU_init_hw_patch
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-#endif
-
-/*
- * Go back to running unmapped so we can load up new values
- * for SDR1 (hash table pointer) and the segment registers
- * and change to using our exception vectors.
- */
-	lis	r4,2f@h
-	ori	r4,r4,2f@l
-	tophys(r4,r4)
-	li	r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
-	mtspr	SPRN_SRR0,r4
-	mtspr	SPRN_SRR1,r3
-	SYNC
-	RFI
-/* Load up the kernel context */
-2:	bl	load_up_mmu
-
-#ifdef CONFIG_BDI_SWITCH
-	/* Add helper information for the Abatron bdiGDB debugger.
-	 * We do this here because we know the mmu is disabled, and
-	 * will be enabled for real in just a few instructions.
-	 */
-	lis	r5, abatron_pteptrs@h
-	ori	r5, r5, abatron_pteptrs@l
-	stw	r5, 0xf0(r0)	/* This much match your Abatron config */
-	lis	r6, swapper_pg_dir@h
-	ori	r6, r6, swapper_pg_dir@l
-	tophys(r5, r5)
-	stw	r6, 0(r5)
-#endif /* CONFIG_BDI_SWITCH */
-
-/* Now turn on the MMU for real! */
-	li	r4,MSR_KERNEL
-	lis	r3,start_kernel@h
-	ori	r3,r3,start_kernel@l
-	mtspr	SPRN_SRR0,r3
-	mtspr	SPRN_SRR1,r4
-	SYNC
-	RFI
-
-/*
- * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
- *
- * Set up the segment registers for a new context.
- */
-_ENTRY(switch_mmu_context)
-	lwz	r3,MMCONTEXTID(r4)
-	cmpwi	cr0,r3,0
-	blt-	4f
-	mulli	r3,r3,897	/* multiply context by skew factor */
-	rlwinm	r3,r3,4,8,27	/* VSID = (context & 0xfffff) << 4 */
-#ifdef CONFIG_PPC_KUEP
-	oris	r3, r3, SR_NX@h	/* Set Nx */
-#endif
-#ifdef CONFIG_PPC_KUAP
-	oris	r3, r3, SR_KS@h	/* Set Ks */
-#endif
-	li	r0,NUM_USER_SEGMENTS
-	mtctr	r0
-
-	lwz	r4, MM_PGD(r4)
-#ifdef CONFIG_BDI_SWITCH
-	/* Context switch the PTE pointer for the Abatron BDI2000.
-	 * The PGDIR is passed as second argument.
-	 */
-	lis	r5, abatron_pteptrs@ha
-	stw	r4, abatron_pteptrs@l + 0x4(r5)
-#endif
-	tophys(r4, r4)
-	mtspr	SPRN_SPRG_PGDIR, r4
-	li	r4,0
-	isync
-3:
-	mtsrin	r3,r4
-	addi	r3,r3,0x111	/* next VSID */
-	rlwinm	r3,r3,0,8,3	/* clear out any overflow from VSID field */
-	addis	r4,r4,0x1000	/* address of next segment */
-	bdnz	3b
-	sync
-	isync
-	blr
-4:	trap
-	EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0
-	blr
-EXPORT_SYMBOL(switch_mmu_context)
-
-/*
- * An undocumented "feature" of 604e requires that the v bit
- * be cleared before changing BAT values.
- *
- * Also, newer IBM firmware does not clear bat3 and 4 so
- * this makes sure it's done.
- *  -- Cort
- */
-clear_bats:
-	li	r10,0
-
-#ifndef CONFIG_PPC_BOOK3S_601
-	mtspr	SPRN_DBAT0U,r10
-	mtspr	SPRN_DBAT0L,r10
-	mtspr	SPRN_DBAT1U,r10
-	mtspr	SPRN_DBAT1L,r10
-	mtspr	SPRN_DBAT2U,r10
-	mtspr	SPRN_DBAT2L,r10
-	mtspr	SPRN_DBAT3U,r10
-	mtspr	SPRN_DBAT3L,r10
-#endif
-	mtspr	SPRN_IBAT0U,r10
-	mtspr	SPRN_IBAT0L,r10
-	mtspr	SPRN_IBAT1U,r10
-	mtspr	SPRN_IBAT1L,r10
-	mtspr	SPRN_IBAT2U,r10
-	mtspr	SPRN_IBAT2L,r10
-	mtspr	SPRN_IBAT3U,r10
-	mtspr	SPRN_IBAT3L,r10
-BEGIN_MMU_FTR_SECTION
-	/* Here's a tweak: at this point, CPU setup have
-	 * not been called yet, so HIGH_BAT_EN may not be
-	 * set in HID0 for the 745x processors. However, it
-	 * seems that doesn't affect our ability to actually
-	 * write to these SPRs.
-	 */
-	mtspr	SPRN_DBAT4U,r10
-	mtspr	SPRN_DBAT4L,r10
-	mtspr	SPRN_DBAT5U,r10
-	mtspr	SPRN_DBAT5L,r10
-	mtspr	SPRN_DBAT6U,r10
-	mtspr	SPRN_DBAT6L,r10
-	mtspr	SPRN_DBAT7U,r10
-	mtspr	SPRN_DBAT7L,r10
-	mtspr	SPRN_IBAT4U,r10
-	mtspr	SPRN_IBAT4L,r10
-	mtspr	SPRN_IBAT5U,r10
-	mtspr	SPRN_IBAT5L,r10
-	mtspr	SPRN_IBAT6U,r10
-	mtspr	SPRN_IBAT6L,r10
-	mtspr	SPRN_IBAT7U,r10
-	mtspr	SPRN_IBAT7L,r10
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
-	blr
-
-_ENTRY(update_bats)
-	lis	r4, 1f@h
-	ori	r4, r4, 1f@l
-	tophys(r4, r4)
-	mfmsr	r6
-	mflr	r7
-	li	r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)
-	rlwinm	r0, r6, 0, ~MSR_RI
-	rlwinm	r0, r0, 0, ~MSR_EE
-	mtmsr	r0
-	mtspr	SPRN_SRR0, r4
-	mtspr	SPRN_SRR1, r3
-	SYNC
-	RFI
-1:	bl	clear_bats
-	lis	r3, BATS@ha
-	addi	r3, r3, BATS@l
-	tophys(r3, r3)
-	LOAD_BAT(0, r3, r4, r5)
-	LOAD_BAT(1, r3, r4, r5)
-	LOAD_BAT(2, r3, r4, r5)
-	LOAD_BAT(3, r3, r4, r5)
-BEGIN_MMU_FTR_SECTION
-	LOAD_BAT(4, r3, r4, r5)
-	LOAD_BAT(5, r3, r4, r5)
-	LOAD_BAT(6, r3, r4, r5)
-	LOAD_BAT(7, r3, r4, r5)
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
-	li	r3, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI)
-	mtmsr	r3
-	mtspr	SPRN_SRR0, r7
-	mtspr	SPRN_SRR1, r6
-	SYNC
-	RFI
-
-flush_tlbs:
-	lis	r10, 0x40
-1:	addic.	r10, r10, -0x1000
-	tlbie	r10
-	bgt	1b
-	sync
-	blr
-
-mmu_off:
- 	addi	r4, r3, __after_mmu_off - _start
-	mfmsr	r3
-	andi.	r0,r3,MSR_DR|MSR_IR		/* MMU enabled? */
-	beqlr
-	andc	r3,r3,r0
-	mtspr	SPRN_SRR0,r4
-	mtspr	SPRN_SRR1,r3
-	sync
-	RFI
-
-/*
- * On 601, we use 3 BATs to map up to 24M of RAM at _PAGE_OFFSET
- * (we keep one for debugging) and on others, we use one 256M BAT.
- */
-initial_bats:
-	lis	r11,PAGE_OFFSET@h
-#ifdef CONFIG_PPC_BOOK3S_601
-	ori	r11,r11,4		/* set up BAT registers for 601 */
-	li	r8,0x7f			/* valid, block length = 8MB */
-	mtspr	SPRN_IBAT0U,r11		/* N.B. 601 has valid bit in */
-	mtspr	SPRN_IBAT0L,r8		/* lower BAT register */
-	addis	r11,r11,0x800000@h
-	addis	r8,r8,0x800000@h
-	mtspr	SPRN_IBAT1U,r11
-	mtspr	SPRN_IBAT1L,r8
-	addis	r11,r11,0x800000@h
-	addis	r8,r8,0x800000@h
-	mtspr	SPRN_IBAT2U,r11
-	mtspr	SPRN_IBAT2L,r8
-#else
-	tophys(r8,r11)
-#ifdef CONFIG_SMP
-	ori	r8,r8,0x12		/* R/W access, M=1 */
-#else
-	ori	r8,r8,2			/* R/W access */
-#endif /* CONFIG_SMP */
-	ori	r11,r11,BL_256M<<2|0x2	/* set up BAT registers for 604 */
-
-	mtspr	SPRN_DBAT0L,r8		/* N.B. 6xx (not 601) have valid */
-	mtspr	SPRN_DBAT0U,r11		/* bit in upper BAT register */
-	mtspr	SPRN_IBAT0L,r8
-	mtspr	SPRN_IBAT0U,r11
-#endif
-	isync
-	blr
-
-#ifdef CONFIG_BOOTX_TEXT
-setup_disp_bat:
-	/*
-	 * setup the display bat prepared for us in prom.c
-	 */
-	mflr	r8
-	bl	reloc_offset
-	mtlr	r8
-	addis	r8,r3,disp_BAT@ha
-	addi	r8,r8,disp_BAT@l
-	cmpwi	cr0,r8,0
-	beqlr
-	lwz	r11,0(r8)
-	lwz	r8,4(r8)
-#ifndef CONFIG_PPC_BOOK3S_601
-	mtspr	SPRN_DBAT3L,r8
-	mtspr	SPRN_DBAT3U,r11
-#else
-	mtspr	SPRN_IBAT3L,r8
-	mtspr	SPRN_IBAT3U,r11
-#endif
-	blr
-#endif /* CONFIG_BOOTX_TEXT */
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_CPM
-setup_cpm_bat:
-	lis	r8, 0xf000
-	ori	r8, r8,	0x002a
-	mtspr	SPRN_DBAT1L, r8
-
-	lis	r11, 0xf000
-	ori	r11, r11, (BL_1M << 2) | 2
-	mtspr	SPRN_DBAT1U, r11
-
-	blr
-#endif
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
-setup_usbgecko_bat:
-	/* prepare a BAT for early io */
-#if defined(CONFIG_GAMECUBE)
-	lis	r8, 0x0c00
-#elif defined(CONFIG_WII)
-	lis	r8, 0x0d00
-#else
-#error Invalid platform for USB Gecko based early debugging.
-#endif
-	/*
-	 * The virtual address used must match the virtual address
-	 * associated to the fixmap entry FIX_EARLY_DEBUG_BASE.
-	 */
-	lis	r11, 0xfffe	/* top 128K */
-	ori	r8, r8, 0x002a	/* uncached, guarded ,rw */
-	ori	r11, r11, 0x2	/* 128K, Vs=1, Vp=0 */
-	mtspr	SPRN_DBAT1L, r8
-	mtspr	SPRN_DBAT1U, r11
-	blr
-#endif
-
-#ifdef CONFIG_8260
-/* Jump into the system reset for the rom.
- * We first disable the MMU, and then jump to the ROM reset address.
- *
- * r3 is the board info structure, r4 is the location for starting.
- * I use this for building a small kernel that can load other kernels,
- * rather than trying to write or rely on a rom monitor that can tftp load.
- */
-       .globl  m8260_gorom
-m8260_gorom:
-	mfmsr	r0
-	rlwinm	r0,r0,0,17,15	/* clear MSR_EE in r0 */
-	sync
-	mtmsr	r0
-	sync
-	mfspr	r11, SPRN_HID0
-	lis	r10, 0
-	ori	r10,r10,HID0_ICE|HID0_DCE
-	andc	r11, r11, r10
-	mtspr	SPRN_HID0, r11
-	isync
-	li	r5, MSR_ME|MSR_RI
-	lis	r6,2f@h
-	addis	r6,r6,-KERNELBASE@h
-	ori	r6,r6,2f@l
-	mtspr	SPRN_SRR0,r6
-	mtspr	SPRN_SRR1,r5
-	isync
-	sync
-	rfi
-2:
-	mtlr	r4
-	blr
-#endif
-
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the data segment,
- * which is page-aligned.
- */
-	.data
-	.globl	sdata
-sdata:
-	.globl	empty_zero_page
-empty_zero_page:
-	.space	4096
-EXPORT_SYMBOL(empty_zero_page)
-
-	.globl	swapper_pg_dir
-swapper_pg_dir:
-	.space	PGD_TABLE_SIZE
-
-/* Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
-	.space	8
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
deleted file mode 100644
index 585ea19765501112067f962183e8ebf93d6c4091..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/head_40x.S
+++ /dev/null
@@ -1,903 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
- *      Initial PowerPC version.
- *    Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
- *      Rewritten for PReP
- *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
- *      Low-level exception handers, MMU support, and rewrite.
- *    Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
- *      PowerPC 8xx modifications.
- *    Copyright (c) 1998-1999 TiVo, Inc.
- *      PowerPC 403GCX modifications.
- *    Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
- *      PowerPC 403GCX/405GP modifications.
- *    Copyright 2000 MontaVista Software Inc.
- *	PPC405 modifications
- *      PowerPC 403GCX/405GP modifications.
- * 	Author: MontaVista Software, Inc.
- *         	frank_rowand@mvista.com or source@mvista.com
- * 	   	debbie_chu@mvista.com
- *
- *    Module name: head_4xx.S
- *
- *    Description:
- *      Kernel execution entry point code.
- */
-
-#include <linux/init.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ptrace.h>
-#include <asm/export.h>
-#include <asm/asm-405.h>
-
-#include "head_32.h"
-
-/* As with the other PowerPC ports, it is expected that when code
- * execution begins here, the following registers contain valid, yet
- * optional, information:
- *
- *   r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.)
- *   r4 - Starting address of the init RAM disk
- *   r5 - Ending address of the init RAM disk
- *   r6 - Start of kernel command line string (e.g. "mem=96m")
- *   r7 - End of kernel command line string
- *
- * This is all going to change RSN when we add bi_recs.......  -- Dan
- */
-	__HEAD
-_ENTRY(_stext);
-_ENTRY(_start);
-
-	mr	r31,r3			/* save device tree ptr */
-
-	/* We have to turn on the MMU right away so we get cache modes
-	 * set correctly.
-	 */
-	bl	initial_mmu
-
-/* We now have the lower 16 Meg mapped into TLB entries, and the caches
- * ready to work.
- */
-turn_on_mmu:
-	lis	r0,MSR_KERNEL@h
-	ori	r0,r0,MSR_KERNEL@l
-	mtspr	SPRN_SRR1,r0
-	lis	r0,start_here@h
-	ori	r0,r0,start_here@l
-	mtspr	SPRN_SRR0,r0
-	SYNC
-	rfi				/* enables MMU */
-	b	.			/* prevent prefetch past rfi */
-
-/*
- * This area is used for temporarily saving registers during the
- * critical exception prolog.
- */
-	. = 0xc0
-crit_save:
-_ENTRY(crit_r10)
-	.space	4
-_ENTRY(crit_r11)
-	.space	4
-_ENTRY(crit_srr0)
-	.space	4
-_ENTRY(crit_srr1)
-	.space	4
-_ENTRY(saved_ksp_limit)
-	.space	4
-
-/*
- * Exception prolog for critical exceptions.  This is a little different
- * from the normal exception prolog above since a critical exception
- * can potentially occur at any point during normal exception processing.
- * Thus we cannot use the same SPRG registers as the normal prolog above.
- * Instead we use a couple of words of memory at low physical addresses.
- * This is OK since we don't support SMP on these processors.
- */
-#define CRITICAL_EXCEPTION_PROLOG					     \
-	stw	r10,crit_r10@l(0);	/* save two registers to work with */\
-	stw	r11,crit_r11@l(0);					     \
-	mfcr	r10;			/* save CR in r10 for now	   */\
-	mfspr	r11,SPRN_SRR3;		/* check whether user or kernel    */\
-	andi.	r11,r11,MSR_PR;						     \
-	lis	r11,critirq_ctx@ha;					     \
-	tophys(r11,r11);						     \
-	lwz	r11,critirq_ctx@l(r11);					     \
-	beq	1f;							     \
-	/* COMING FROM USER MODE */					     \
-	mfspr	r11,SPRN_SPRG_THREAD;	/* if from user, start at top of   */\
-	lwz	r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\
-1:	addi	r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm  */\
-	tophys(r11,r11);						     \
-	stw	r10,_CCR(r11);          /* save various registers	   */\
-	stw	r12,GPR12(r11);						     \
-	stw	r9,GPR9(r11);						     \
-	mflr	r10;							     \
-	stw	r10,_LINK(r11);						     \
-	mfspr	r12,SPRN_DEAR;		/* save DEAR and ESR in the frame  */\
-	stw	r12,_DEAR(r11);		/* since they may have had stuff   */\
-	mfspr	r9,SPRN_ESR;		/* in them at the point where the  */\
-	stw	r9,_ESR(r11);		/* exception was taken		   */\
-	mfspr	r12,SPRN_SRR2;						     \
-	stw	r1,GPR1(r11);						     \
-	mfspr	r9,SPRN_SRR3;						     \
-	stw	r1,0(r11);						     \
-	tovirt(r1,r11);							     \
-	rlwinm	r9,r9,0,14,12;		/* clear MSR_WE (necessary?)	   */\
-	stw	r0,GPR0(r11);						     \
-	lis	r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\
-	addi	r10, r10, STACK_FRAME_REGS_MARKER@l;			     \
-	stw	r10, 8(r11);						     \
-	SAVE_4GPRS(3, r11);						     \
-	SAVE_2GPRS(7, r11)
-
-	/*
-	 * State at this point:
-	 * r9 saved in stack frame, now saved SRR3 & ~MSR_WE
-	 * r10 saved in crit_r10 and in stack frame, trashed
-	 * r11 saved in crit_r11 and in stack frame,
-	 *	now phys stack/exception frame pointer
-	 * r12 saved in stack frame, now saved SRR2
-	 * CR saved in stack frame, CR0.EQ = !SRR3.PR
-	 * LR, DEAR, ESR in stack frame
-	 * r1 saved in stack frame, now virt stack/excframe pointer
-	 * r0, r3-r8 saved in stack frame
-	 */
-
-/*
- * Exception vectors.
- */
-#define CRITICAL_EXCEPTION(n, label, hdlr)			\
-	START_EXCEPTION(n, label);				\
-	CRITICAL_EXCEPTION_PROLOG;				\
-	addi	r3,r1,STACK_FRAME_OVERHEAD;			\
-	EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
-			  crit_transfer_to_handler, ret_from_crit_exc)
-
-/*
- * 0x0100 - Critical Interrupt Exception
- */
-	CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, unknown_exception)
-
-/*
- * 0x0200 - Machine Check Exception
- */
-	CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
-
-/*
- * 0x0300 - Data Storage Exception
- * This happens for just a few reasons.  U0 set (but we don't do that),
- * or zone protection fault (user violation, write to protected page).
- * If this is just an update of modified status, we do that quickly
- * and exit.  Otherwise, we call heavywight functions to do the work.
- */
-	START_EXCEPTION(0x0300,	DataStorage)
-	mtspr	SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
-	mtspr	SPRN_SPRG_SCRATCH1, r11
-#ifdef CONFIG_403GCX
-	stw     r12, 0(r0)
-	stw     r9, 4(r0)
-	mfcr    r11
-	mfspr   r12, SPRN_PID
-	stw     r11, 8(r0)
-	stw     r12, 12(r0)
-#else
-	mtspr	SPRN_SPRG_SCRATCH3, r12
-	mtspr	SPRN_SPRG_SCRATCH4, r9
-	mfcr	r11
-	mfspr	r12, SPRN_PID
-	mtspr	SPRN_SPRG_SCRATCH6, r11
-	mtspr	SPRN_SPRG_SCRATCH5, r12
-#endif
-
-	/* First, check if it was a zone fault (which means a user
-	* tried to access a kernel or read-protected page - always
-	* a SEGV).  All other faults here must be stores, so no
-	* need to check ESR_DST as well. */
-	mfspr	r10, SPRN_ESR
-	andis.	r10, r10, ESR_DIZ@h
-	bne	2f
-
-	mfspr	r10, SPRN_DEAR		/* Get faulting address */
-
-	/* If we are faulting a kernel address, we have to use the
-	 * kernel page tables.
-	 */
-	lis	r11, PAGE_OFFSET@h
-	cmplw	r10, r11
-	blt+	3f
-	lis	r11, swapper_pg_dir@h
-	ori	r11, r11, swapper_pg_dir@l
-	li	r9, 0
-	mtspr	SPRN_PID, r9		/* TLB will have 0 TID */
-	b	4f
-
-	/* Get the PGD for the current thread.
-	 */
-3:
-	mfspr	r11,SPRN_SPRG_THREAD
-	lwz	r11,PGDIR(r11)
-4:
-	tophys(r11, r11)
-	rlwimi	r11, r10, 12, 20, 29	/* Create L1 (pgdir/pmd) address */
-	lwz	r11, 0(r11)		/* Get L1 entry */
-	rlwinm.	r12, r11, 0, 0, 19	/* Extract L2 (pte) base address */
-	beq	2f			/* Bail if no table */
-
-	rlwimi	r12, r10, 22, 20, 29	/* Compute PTE address */
-	lwz	r11, 0(r12)		/* Get Linux PTE */
-
-	andi.	r9, r11, _PAGE_RW	/* Is it writeable? */
-	beq	2f			/* Bail if not */
-
-	/* Update 'changed'.
-	*/
-	ori	r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
-	stw	r11, 0(r12)		/* Update Linux page table */
-
-	/* Most of the Linux PTE is ready to load into the TLB LO.
-	 * We set ZSEL, where only the LS-bit determines user access.
-	 * We set execute, because we don't have the granularity to
-	 * properly set this at the page level (Linux problem).
-	 * If shared is set, we cause a zero PID->TID load.
-	 * Many of these bits are software only.  Bits we don't set
-	 * here we (properly should) assume have the appropriate value.
-	 */
-	li	r12, 0x0ce2
-	andc	r11, r11, r12		/* Make sure 20, 21 are zero */
-
-	/* find the TLB index that caused the fault.  It has to be here.
-	*/
-	tlbsx	r9, 0, r10
-
-	tlbwe	r11, r9, TLB_DATA		/* Load TLB LO */
-
-	/* Done...restore registers and get out of here.
-	*/
-#ifdef CONFIG_403GCX
-	lwz     r12, 12(r0)
-	lwz     r11, 8(r0)
-	mtspr   SPRN_PID, r12
-	mtcr    r11
-	lwz     r9, 4(r0)
-	lwz     r12, 0(r0)
-#else
-	mfspr	r12, SPRN_SPRG_SCRATCH5
-	mfspr	r11, SPRN_SPRG_SCRATCH6
-	mtspr	SPRN_PID, r12
-	mtcr	r11
-	mfspr	r9, SPRN_SPRG_SCRATCH4
-	mfspr	r12, SPRN_SPRG_SCRATCH3
-#endif
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	mfspr	r10, SPRN_SPRG_SCRATCH0
-	PPC405_ERR77_SYNC
-	rfi			/* Should sync shadow TLBs */
-	b	.		/* prevent prefetch past rfi */
-
-2:
-	/* The bailout.  Restore registers to pre-exception conditions
-	 * and call the heavyweights to help us out.
-	 */
-#ifdef CONFIG_403GCX
-	lwz     r12, 12(r0)
-	lwz     r11, 8(r0)
-	mtspr   SPRN_PID, r12
-	mtcr    r11
-	lwz     r9, 4(r0)
-	lwz     r12, 0(r0)
-#else
-	mfspr	r12, SPRN_SPRG_SCRATCH5
-	mfspr	r11, SPRN_SPRG_SCRATCH6
-	mtspr	SPRN_PID, r12
-	mtcr	r11
-	mfspr	r9, SPRN_SPRG_SCRATCH4
-	mfspr	r12, SPRN_SPRG_SCRATCH3
-#endif
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	mfspr	r10, SPRN_SPRG_SCRATCH0
-	b	DataAccess
-
-/*
- * 0x0400 - Instruction Storage Exception
- * This is caused by a fetch from non-execute or guarded pages.
- */
-	START_EXCEPTION(0x0400, InstructionAccess)
-	EXCEPTION_PROLOG
-	mr	r4,r12			/* Pass SRR0 as arg2 */
-	li	r5,0			/* Pass zero as arg3 */
-	EXC_XFER_LITE(0x400, handle_page_fault)
-
-/* 0x0500 - External Interrupt Exception */
-	EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
-
-/* 0x0600 - Alignment Exception */
-	START_EXCEPTION(0x0600, Alignment)
-	EXCEPTION_PROLOG
-	mfspr	r4,SPRN_DEAR		/* Grab the DEAR and save it */
-	stw	r4,_DEAR(r11)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_STD(0x600, alignment_exception)
-
-/* 0x0700 - Program Exception */
-	START_EXCEPTION(0x0700, ProgramCheck)
-	EXCEPTION_PROLOG
-	mfspr	r4,SPRN_ESR		/* Grab the ESR and save it */
-	stw	r4,_ESR(r11)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_STD(0x700, program_check_exception)
-
-	EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_STD)
-
-/* 0x0C00 - System Call Exception */
-	START_EXCEPTION(0x0C00,	SystemCall)
-	SYSCALL_ENTRY	0xc00
-
-	EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD)
-
-/* 0x1000 - Programmable Interval Timer (PIT) Exception */
-	. = 0x1000
-	b Decrementer
-
-/* 0x1010 - Fixed Interval Timer (FIT) Exception
-*/
-	. = 0x1010
-	b FITException
-
-/* 0x1020 - Watchdog Timer (WDT) Exception
-*/
-	. = 0x1020
-	b WDTException
-
-/* 0x1100 - Data TLB Miss Exception
- * As the name implies, translation is not in the MMU, so search the
- * page tables and fix it.  The only purpose of this function is to
- * load TLB entries from the page table if they exist.
- */
-	START_EXCEPTION(0x1100,	DTLBMiss)
-	mtspr	SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
-	mtspr	SPRN_SPRG_SCRATCH1, r11
-#ifdef CONFIG_403GCX
-	stw     r12, 0(r0)
-	stw     r9, 4(r0)
-	mfcr    r11
-	mfspr   r12, SPRN_PID
-	stw     r11, 8(r0)
-	stw     r12, 12(r0)
-#else
-	mtspr	SPRN_SPRG_SCRATCH3, r12
-	mtspr	SPRN_SPRG_SCRATCH4, r9
-	mfcr	r11
-	mfspr	r12, SPRN_PID
-	mtspr	SPRN_SPRG_SCRATCH6, r11
-	mtspr	SPRN_SPRG_SCRATCH5, r12
-#endif
-	mfspr	r10, SPRN_DEAR		/* Get faulting address */
-
-	/* If we are faulting a kernel address, we have to use the
-	 * kernel page tables.
-	 */
-	lis	r11, PAGE_OFFSET@h
-	cmplw	r10, r11
-	blt+	3f
-	lis	r11, swapper_pg_dir@h
-	ori	r11, r11, swapper_pg_dir@l
-	li	r9, 0
-	mtspr	SPRN_PID, r9		/* TLB will have 0 TID */
-	b	4f
-
-	/* Get the PGD for the current thread.
-	 */
-3:
-	mfspr	r11,SPRN_SPRG_THREAD
-	lwz	r11,PGDIR(r11)
-4:
-	tophys(r11, r11)
-	rlwimi	r11, r10, 12, 20, 29	/* Create L1 (pgdir/pmd) address */
-	lwz	r12, 0(r11)		/* Get L1 entry */
-	andi.	r9, r12, _PMD_PRESENT	/* Check if it points to a PTE page */
-	beq	2f			/* Bail if no table */
-
-	rlwimi	r12, r10, 22, 20, 29	/* Compute PTE address */
-	lwz	r11, 0(r12)		/* Get Linux PTE */
-	andi.	r9, r11, _PAGE_PRESENT
-	beq	5f
-
-	ori	r11, r11, _PAGE_ACCESSED
-	stw	r11, 0(r12)
-
-	/* Create TLB tag.  This is the faulting address plus a static
-	 * set of bits.  These are size, valid, E, U0.
-	*/
-	li	r12, 0x00c0
-	rlwimi	r10, r12, 0, 20, 31
-
-	b	finish_tlb_load
-
-2:	/* Check for possible large-page pmd entry */
-	rlwinm.	r9, r12, 2, 22, 24
-	beq	5f
-
-	/* Create TLB tag.  This is the faulting address, plus a static
-	 * set of bits (valid, E, U0) plus the size from the PMD.
-	 */
-	ori	r9, r9, 0x40
-	rlwimi	r10, r9, 0, 20, 31
-	mr	r11, r12
-
-	b	finish_tlb_load
-
-5:
-	/* The bailout.  Restore registers to pre-exception conditions
-	 * and call the heavyweights to help us out.
-	 */
-#ifdef CONFIG_403GCX
-	lwz     r12, 12(r0)
-	lwz     r11, 8(r0)
-	mtspr   SPRN_PID, r12
-	mtcr    r11
-	lwz     r9, 4(r0)
-	lwz     r12, 0(r0)
-#else
-	mfspr	r12, SPRN_SPRG_SCRATCH5
-	mfspr	r11, SPRN_SPRG_SCRATCH6
-	mtspr	SPRN_PID, r12
-	mtcr	r11
-	mfspr	r9, SPRN_SPRG_SCRATCH4
-	mfspr	r12, SPRN_SPRG_SCRATCH3
-#endif
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	mfspr	r10, SPRN_SPRG_SCRATCH0
-	b	DataAccess
-
-/* 0x1200 - Instruction TLB Miss Exception
- * Nearly the same as above, except we get our information from different
- * registers and bailout to a different point.
- */
-	START_EXCEPTION(0x1200,	ITLBMiss)
-	mtspr	SPRN_SPRG_SCRATCH0, r10	 /* Save some working registers */
-	mtspr	SPRN_SPRG_SCRATCH1, r11
-#ifdef CONFIG_403GCX
-	stw     r12, 0(r0)
-	stw     r9, 4(r0)
-	mfcr    r11
-	mfspr   r12, SPRN_PID
-	stw     r11, 8(r0)
-	stw     r12, 12(r0)
-#else
-	mtspr	SPRN_SPRG_SCRATCH3, r12
-	mtspr	SPRN_SPRG_SCRATCH4, r9
-	mfcr	r11
-	mfspr	r12, SPRN_PID
-	mtspr	SPRN_SPRG_SCRATCH6, r11
-	mtspr	SPRN_SPRG_SCRATCH5, r12
-#endif
-	mfspr	r10, SPRN_SRR0		/* Get faulting address */
-
-	/* If we are faulting a kernel address, we have to use the
-	 * kernel page tables.
-	 */
-	lis	r11, PAGE_OFFSET@h
-	cmplw	r10, r11
-	blt+	3f
-	lis	r11, swapper_pg_dir@h
-	ori	r11, r11, swapper_pg_dir@l
-	li	r9, 0
-	mtspr	SPRN_PID, r9		/* TLB will have 0 TID */
-	b	4f
-
-	/* Get the PGD for the current thread.
-	 */
-3:
-	mfspr	r11,SPRN_SPRG_THREAD
-	lwz	r11,PGDIR(r11)
-4:
-	tophys(r11, r11)
-	rlwimi	r11, r10, 12, 20, 29	/* Create L1 (pgdir/pmd) address */
-	lwz	r12, 0(r11)		/* Get L1 entry */
-	andi.	r9, r12, _PMD_PRESENT	/* Check if it points to a PTE page */
-	beq	2f			/* Bail if no table */
-
-	rlwimi	r12, r10, 22, 20, 29	/* Compute PTE address */
-	lwz	r11, 0(r12)		/* Get Linux PTE */
-	andi.	r9, r11, _PAGE_PRESENT
-	beq	5f
-
-	ori	r11, r11, _PAGE_ACCESSED
-	stw	r11, 0(r12)
-
-	/* Create TLB tag.  This is the faulting address plus a static
-	 * set of bits.  These are size, valid, E, U0.
-	*/
-	li	r12, 0x00c0
-	rlwimi	r10, r12, 0, 20, 31
-
-	b	finish_tlb_load
-
-2:	/* Check for possible large-page pmd entry */
-	rlwinm.	r9, r12, 2, 22, 24
-	beq	5f
-
-	/* Create TLB tag.  This is the faulting address, plus a static
-	 * set of bits (valid, E, U0) plus the size from the PMD.
-	 */
-	ori	r9, r9, 0x40
-	rlwimi	r10, r9, 0, 20, 31
-	mr	r11, r12
-
-	b	finish_tlb_load
-
-5:
-	/* The bailout.  Restore registers to pre-exception conditions
-	 * and call the heavyweights to help us out.
-	 */
-#ifdef CONFIG_403GCX
-	lwz     r12, 12(r0)
-	lwz     r11, 8(r0)
-	mtspr   SPRN_PID, r12
-	mtcr    r11
-	lwz     r9, 4(r0)
-	lwz     r12, 0(r0)
-#else
-	mfspr	r12, SPRN_SPRG_SCRATCH5
-	mfspr	r11, SPRN_SPRG_SCRATCH6
-	mtspr	SPRN_PID, r12
-	mtcr	r11
-	mfspr	r9, SPRN_SPRG_SCRATCH4
-	mfspr	r12, SPRN_SPRG_SCRATCH3
-#endif
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	mfspr	r10, SPRN_SPRG_SCRATCH0
-	b	InstructionAccess
-
-	EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
-#ifdef CONFIG_IBM405_ERR51
-	/* 405GP errata 51 */
-	START_EXCEPTION(0x1700, Trap_17)
-	b DTLBMiss
-#else
-	EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
-#endif
-	EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_STD)
-
-/* Check for a single step debug exception while in an exception
- * handler before state has been saved.  This is to catch the case
- * where an instruction that we are trying to single step causes
- * an exception (eg ITLB/DTLB miss) and thus the first instruction of
- * the exception handler generates a single step debug exception.
- *
- * If we get a debug trap on the first instruction of an exception handler,
- * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is
- * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR).
- * The exception handler was handling a non-critical interrupt, so it will
- * save (and later restore) the MSR via SPRN_SRR1, which will still have
- * the MSR_DE bit set.
- */
-	/* 0x2000 - Debug Exception */
-	START_EXCEPTION(0x2000, DebugTrap)
-	CRITICAL_EXCEPTION_PROLOG
-
-	/*
-	 * If this is a single step or branch-taken exception in an
-	 * exception entry sequence, it was probably meant to apply to
-	 * the code where the exception occurred (since exception entry
-	 * doesn't turn off DE automatically).  We simulate the effect
-	 * of turning off DE on entry to an exception handler by turning
-	 * off DE in the SRR3 value and clearing the debug status.
-	 */
-	mfspr	r10,SPRN_DBSR		/* check single-step/branch taken */
-	andis.	r10,r10,DBSR_IC@h
-	beq+	2f
-
-	andi.	r10,r9,MSR_IR|MSR_PR	/* check supervisor + MMU off */
-	beq	1f			/* branch and fix it up */
-
-	mfspr   r10,SPRN_SRR2		/* Faulting instruction address */
-	cmplwi  r10,0x2100
-	bgt+    2f			/* address above exception vectors */
-
-	/* here it looks like we got an inappropriate debug exception. */
-1:	rlwinm	r9,r9,0,~MSR_DE		/* clear DE in the SRR3 value */
-	lis	r10,DBSR_IC@h		/* clear the IC event */
-	mtspr	SPRN_DBSR,r10
-	/* restore state and get out */
-	lwz	r10,_CCR(r11)
-	lwz	r0,GPR0(r11)
-	lwz	r1,GPR1(r11)
-	mtcrf	0x80,r10
-	mtspr	SPRN_SRR2,r12
-	mtspr	SPRN_SRR3,r9
-	lwz	r9,GPR9(r11)
-	lwz	r12,GPR12(r11)
-	lwz	r10,crit_r10@l(0)
-	lwz	r11,crit_r11@l(0)
-	PPC405_ERR77_SYNC
-	rfci
-	b	.
-
-	/* continue normal handling for a critical exception... */
-2:	mfspr	r4,SPRN_DBSR
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_TEMPLATE(DebugException, 0x2002, \
-		(MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
-		crit_transfer_to_handler, ret_from_crit_exc)
-
-	/* Programmable Interval Timer (PIT) Exception. (from 0x1000) */
-Decrementer:
-	EXCEPTION_PROLOG
-	lis	r0,TSR_PIS@h
-	mtspr	SPRN_TSR,r0		/* Clear the PIT exception */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_LITE(0x1000, timer_interrupt)
-
-	/* Fixed Interval Timer (FIT) Exception. (from 0x1010) */
-FITException:
-	EXCEPTION_PROLOG
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	EXC_XFER_STD(0x1010, unknown_exception)
-
-	/* Watchdog Timer (WDT) Exception. (from 0x1020) */
-WDTException:
-	CRITICAL_EXCEPTION_PROLOG;
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2,
-	                  (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)),
-			  crit_transfer_to_handler, ret_from_crit_exc)
-
-/*
- * The other Data TLB exceptions bail out to this point
- * if they can't resolve the lightweight TLB fault.
- */
-DataAccess:
-	EXCEPTION_PROLOG
-	mfspr	r5,SPRN_ESR		/* Grab the ESR, save it, pass arg3 */
-	stw	r5,_ESR(r11)
-	mfspr	r4,SPRN_DEAR		/* Grab the DEAR, save it, pass arg2 */
-	EXC_XFER_LITE(0x300, handle_page_fault)
-
-/* Other PowerPC processors, namely those derived from the 6xx-series
- * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved.
- * However, for the 4xx-series processors these are neither defined nor
- * reserved.
- */
-
-	/* Damn, I came up one instruction too many to fit into the
-	 * exception space :-).  Both the instruction and data TLB
-	 * miss get to this point to load the TLB.
-	 * 	r10 - TLB_TAG value
-	 * 	r11 - Linux PTE
-	 *	r12, r9 - available to use
-	 *	PID - loaded with proper value when we get here
-	 *	Upon exit, we reload everything and RFI.
-	 * Actually, it will fit now, but oh well.....a common place
-	 * to load the TLB.
-	 */
-tlb_4xx_index:
-	.long	0
-finish_tlb_load:
-	/* load the next available TLB index.
-	*/
-	lwz	r9, tlb_4xx_index@l(0)
-	addi	r9, r9, 1
-	andi.	r9, r9, (PPC40X_TLB_SIZE-1)
-	stw	r9, tlb_4xx_index@l(0)
-
-6:
-	/*
-	 * Clear out the software-only bits in the PTE to generate the
-	 * TLB_DATA value.  These are the bottom 2 bits of the RPM, the
-	 * top 3 bits of the zone field, and M.
-	 */
-	li	r12, 0x0ce2
-	andc	r11, r11, r12
-
-	tlbwe	r11, r9, TLB_DATA		/* Load TLB LO */
-	tlbwe	r10, r9, TLB_TAG		/* Load TLB HI */
-
-	/* Done...restore registers and get out of here.
-	*/
-#ifdef CONFIG_403GCX
-	lwz     r12, 12(r0)
-	lwz     r11, 8(r0)
-	mtspr   SPRN_PID, r12
-	mtcr    r11
-	lwz     r9, 4(r0)
-	lwz     r12, 0(r0)
-#else
-	mfspr	r12, SPRN_SPRG_SCRATCH5
-	mfspr	r11, SPRN_SPRG_SCRATCH6
-	mtspr	SPRN_PID, r12
-	mtcr	r11
-	mfspr	r9, SPRN_SPRG_SCRATCH4
-	mfspr	r12, SPRN_SPRG_SCRATCH3
-#endif
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	mfspr	r10, SPRN_SPRG_SCRATCH0
-	PPC405_ERR77_SYNC
-	rfi			/* Should sync shadow TLBs */
-	b	.		/* prevent prefetch past rfi */
-
-/* This is where the main kernel code starts.
- */
-start_here:
-
-	/* ptr to current */
-	lis	r2,init_task@h
-	ori	r2,r2,init_task@l
-
-	/* ptr to phys current thread */
-	tophys(r4,r2)
-	addi	r4,r4,THREAD	/* init task's THREAD */
-	mtspr	SPRN_SPRG_THREAD,r4
-
-	/* stack */
-	lis	r1,init_thread_union@ha
-	addi	r1,r1,init_thread_union@l
-	li	r0,0
-	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
-
-	bl	early_init	/* We have to do this with MMU on */
-
-/*
- * Decide what sort of machine this is and initialize the MMU.
- */
-#ifdef CONFIG_KASAN
-	bl	kasan_early_init
-#endif
-	li	r3,0
-	mr	r4,r31
-	bl	machine_init
-	bl	MMU_init
-
-/* Go back to running unmapped so we can load up new values
- * and change to using our exception vectors.
- * On the 4xx, all we have to do is invalidate the TLB to clear
- * the old 16M byte TLB mappings.
- */
-	lis	r4,2f@h
-	ori	r4,r4,2f@l
-	tophys(r4,r4)
-	lis	r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@h
-	ori	r3,r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@l
-	mtspr	SPRN_SRR0,r4
-	mtspr	SPRN_SRR1,r3
-	rfi
-	b	.		/* prevent prefetch past rfi */
-
-/* Load up the kernel context */
-2:
-	sync			/* Flush to memory before changing TLB */
-	tlbia
-	isync			/* Flush shadow TLBs */
-
-	/* set up the PTE pointers for the Abatron bdiGDB.
-	*/
-	lis	r6, swapper_pg_dir@h
-	ori	r6, r6, swapper_pg_dir@l
-	lis	r5, abatron_pteptrs@h
-	ori	r5, r5, abatron_pteptrs@l
-	stw	r5, 0xf0(r0)	/* Must match your Abatron config file */
-	tophys(r5,r5)
-	stw	r6, 0(r5)
-
-/* Now turn on the MMU for real! */
-	lis	r4,MSR_KERNEL@h
-	ori	r4,r4,MSR_KERNEL@l
-	lis	r3,start_kernel@h
-	ori	r3,r3,start_kernel@l
-	mtspr	SPRN_SRR0,r3
-	mtspr	SPRN_SRR1,r4
-	rfi			/* enable MMU and jump to start_kernel */
-	b	.		/* prevent prefetch past rfi */
-
-/* Set up the initial MMU state so we can do the first level of
- * kernel initialization.  This maps the first 16 MBytes of memory 1:1
- * virtual to physical and more importantly sets the cache mode.
- */
-initial_mmu:
-	tlbia			/* Invalidate all TLB entries */
-	isync
-
-	/* We should still be executing code at physical address 0x0000xxxx
-	 * at this point. However, start_here is at virtual address
-	 * 0xC000xxxx. So, set up a TLB mapping to cover this once
-	 * translation is enabled.
-	 */
-
-	lis	r3,KERNELBASE@h		/* Load the kernel virtual address */
-	ori	r3,r3,KERNELBASE@l
-	tophys(r4,r3)			/* Load the kernel physical address */
-
-	iccci	r0,r3			/* Invalidate the i-cache before use */
-
-	/* Load the kernel PID.
-	*/
-	li	r0,0
-	mtspr	SPRN_PID,r0
-	sync
-
-	/* Configure and load one entry into TLB slots 63 */
-	clrrwi	r4,r4,10		/* Mask off the real page number */
-	ori	r4,r4,(TLB_WR | TLB_EX)	/* Set the write and execute bits */
-
-	clrrwi	r3,r3,10		/* Mask off the effective page number */
-	ori	r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_16M))
-
-        li      r0,63                    /* TLB slot 63 */
-
-	tlbwe	r4,r0,TLB_DATA		/* Load the data portion of the entry */
-	tlbwe	r3,r0,TLB_TAG		/* Load the tag portion of the entry */
-
-	isync
-
-	/* Establish the exception vector base
-	*/
-	lis	r4,KERNELBASE@h		/* EVPR only uses the high 16-bits */
-	tophys(r0,r4)			/* Use the physical address */
-	mtspr	SPRN_EVPR,r0
-
-	blr
-
-_GLOBAL(abort)
-        mfspr   r13,SPRN_DBCR0
-        oris    r13,r13,DBCR0_RST_SYSTEM@h
-        mtspr   SPRN_DBCR0,r13
-
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
-	/* Context switch the PTE pointer for the Abatron BDI2000.
-	 * The PGDIR is the second parameter.
-	 */
-	lis	r5, abatron_pteptrs@ha
-	stw	r4, abatron_pteptrs@l + 0x4(r5)
-#endif
-	sync
-	mtspr	SPRN_PID,r3
-	isync				/* Need an isync to flush shadow */
-					/* TLBs after changing PID */
-	blr
-
-/* We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
-	.data
-	.align	12
-	.globl	sdata
-sdata:
-	.globl	empty_zero_page
-empty_zero_page:
-	.space	4096
-EXPORT_SYMBOL(empty_zero_page)
-	.globl	swapper_pg_dir
-swapper_pg_dir:
-	.space	PGD_TABLE_SIZE
-
-/* Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
-	.space	8
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
deleted file mode 100644
index 51dd01a273141bba332f92830e429ac11b74b7d1..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/head_44x.S
+++ /dev/null
@@ -1,1275 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Kernel execution entry point code.
- *
- *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
- *      Initial PowerPC version.
- *    Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
- *      Rewritten for PReP
- *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
- *      Low-level exception handers, MMU support, and rewrite.
- *    Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
- *      PowerPC 8xx modifications.
- *    Copyright (c) 1998-1999 TiVo, Inc.
- *      PowerPC 403GCX modifications.
- *    Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
- *      PowerPC 403GCX/405GP modifications.
- *    Copyright 2000 MontaVista Software Inc.
- *	PPC405 modifications
- *      PowerPC 403GCX/405GP modifications.
- * 	Author: MontaVista Software, Inc.
- *         	frank_rowand@mvista.com or source@mvista.com
- * 	   	debbie_chu@mvista.com
- *    Copyright 2002-2005 MontaVista Software, Inc.
- *      PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
- */
-
-#include <linux/init.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ptrace.h>
-#include <asm/synch.h>
-#include <asm/export.h>
-#include <asm/code-patching-asm.h>
-#include "head_booke.h"
-
-
-/* As with the other PowerPC ports, it is expected that when code
- * execution begins here, the following registers contain valid, yet
- * optional, information:
- *
- *   r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.)
- *   r4 - Starting address of the init RAM disk
- *   r5 - Ending address of the init RAM disk
- *   r6 - Start of kernel command line string (e.g. "mem=128")
- *   r7 - End of kernel command line string
- *
- */
-	__HEAD
-_ENTRY(_stext);
-_ENTRY(_start);
-	/*
-	 * Reserve a word at a fixed location to store the address
-	 * of abatron_pteptrs
-	 */
-	nop
-	mr	r31,r3		/* save device tree ptr */
-	li	r24,0		/* CPU number */
-
-#ifdef CONFIG_RELOCATABLE
-/*
- * Relocate ourselves to the current runtime address.
- * This is called only by the Boot CPU.
- * "relocate" is called with our current runtime virutal
- * address.
- * r21 will be loaded with the physical runtime address of _stext
- */
-	bl	0f				/* Get our runtime address */
-0:	mflr	r21				/* Make it accessible */
-	addis	r21,r21,(_stext - 0b)@ha
-	addi	r21,r21,(_stext - 0b)@l 	/* Get our current runtime base */
-
-	/*
-	 * We have the runtime (virutal) address of our base.
-	 * We calculate our shift of offset from a 256M page.
-	 * We could map the 256M page we belong to at PAGE_OFFSET and
-	 * get going from there.
-	 */
-	lis	r4,KERNELBASE@h
-	ori	r4,r4,KERNELBASE@l
-	rlwinm	r6,r21,0,4,31			/* r6 = PHYS_START % 256M */
-	rlwinm	r5,r4,0,4,31			/* r5 = KERNELBASE % 256M */
-	subf	r3,r5,r6			/* r3 = r6 - r5 */
-	add	r3,r4,r3			/* Required Virutal Address */
-
-	bl	relocate
-#endif
-
-	bl	init_cpu_state
-
-	/*
-	 * This is where the main kernel code starts.
-	 */
-
-	/* ptr to current */
-	lis	r2,init_task@h
-	ori	r2,r2,init_task@l
-
-	/* ptr to current thread */
-	addi	r4,r2,THREAD	/* init task's THREAD */
-	mtspr	SPRN_SPRG_THREAD,r4
-
-	/* stack */
-	lis	r1,init_thread_union@h
-	ori	r1,r1,init_thread_union@l
-	li	r0,0
-	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
-
-	bl	early_init
-
-#ifdef CONFIG_RELOCATABLE
-	/*
-	 * Relocatable kernel support based on processing of dynamic
-	 * relocation entries.
-	 *
-	 * r25 will contain RPN/ERPN for the start address of memory
-	 * r21 will contain the current offset of _stext
-	 */
-	lis	r3,kernstart_addr@ha
-	la	r3,kernstart_addr@l(r3)
-
-	/*
-	 * Compute the kernstart_addr.
-	 * kernstart_addr => (r6,r8)
-	 * kernstart_addr & ~0xfffffff => (r6,r7)
-	 */
-	rlwinm	r6,r25,0,28,31	/* ERPN. Bits 32-35 of Address */
-	rlwinm	r7,r25,0,0,3	/* RPN - assuming 256 MB page size */
-	rlwinm	r8,r21,0,4,31	/* r8 = (_stext & 0xfffffff) */
-	or	r8,r7,r8	/* Compute the lower 32bit of kernstart_addr */
-
-	/* Store kernstart_addr */
-	stw	r6,0(r3)	/* higher 32bit */
-	stw	r8,4(r3)	/* lower 32bit  */
-
-	/*
-	 * Compute the virt_phys_offset :
-	 * virt_phys_offset = stext.run - kernstart_addr
-	 *
-	 * stext.run = (KERNELBASE & ~0xfffffff) + (kernstart_addr & 0xfffffff)
-	 * When we relocate, we have :
-	 *
-	 *	(kernstart_addr & 0xfffffff) = (stext.run & 0xfffffff)
-	 *
-	 * hence:
-	 *  virt_phys_offset = (KERNELBASE & ~0xfffffff) - (kernstart_addr & ~0xfffffff)
-	 *
-	 */
-
-	/* KERNELBASE&~0xfffffff => (r4,r5) */
-	li	r4, 0		/* higer 32bit */
-	lis	r5,KERNELBASE@h
-	rlwinm	r5,r5,0,0,3	/* Align to 256M, lower 32bit */
-
-	/*
-	 * 64bit subtraction.
-	 */
-	subfc	r5,r7,r5
-	subfe	r4,r6,r4
-
-	/* Store virt_phys_offset */
-	lis	r3,virt_phys_offset@ha
-	la	r3,virt_phys_offset@l(r3)
-
-	stw	r4,0(r3)
-	stw	r5,4(r3)
-
-#elif defined(CONFIG_DYNAMIC_MEMSTART)
-	/*
-	 * Mapping based, page aligned dynamic kernel loading.
-	 *
-	 * r25 will contain RPN/ERPN for the start address of memory
-	 *
-	 * Add the difference between KERNELBASE and PAGE_OFFSET to the
-	 * start of physical memory to get kernstart_addr.
-	 */
-	lis	r3,kernstart_addr@ha
-	la	r3,kernstart_addr@l(r3)
-
-	lis	r4,KERNELBASE@h
-	ori	r4,r4,KERNELBASE@l
-	lis	r5,PAGE_OFFSET@h
-	ori	r5,r5,PAGE_OFFSET@l
-	subf	r4,r5,r4
-
-	rlwinm	r6,r25,0,28,31	/* ERPN */
-	rlwinm	r7,r25,0,0,3	/* RPN - assuming 256 MB page size */
-	add	r7,r7,r4
-
-	stw	r6,0(r3)
-	stw	r7,4(r3)
-#endif
-
-/*
- * Decide what sort of machine this is and initialize the MMU.
- */
-#ifdef CONFIG_KASAN
-	bl	kasan_early_init
-#endif
-	li	r3,0
-	mr	r4,r31
-	bl	machine_init
-	bl	MMU_init
-
-	/* Setup PTE pointers for the Abatron bdiGDB */
-	lis	r6, swapper_pg_dir@h
-	ori	r6, r6, swapper_pg_dir@l
-	lis	r5, abatron_pteptrs@h
-	ori	r5, r5, abatron_pteptrs@l
-	lis	r4, KERNELBASE@h
-	ori	r4, r4, KERNELBASE@l
-	stw	r5, 0(r4)	/* Save abatron_pteptrs at a fixed location */
-	stw	r6, 0(r5)
-
-	/* Clear the Machine Check Syndrome Register */
-	li	r0,0
-	mtspr	SPRN_MCSR,r0
-
-	/* Let's move on */
-	lis	r4,start_kernel@h
-	ori	r4,r4,start_kernel@l
-	lis	r3,MSR_KERNEL@h
-	ori	r3,r3,MSR_KERNEL@l
-	mtspr	SPRN_SRR0,r4
-	mtspr	SPRN_SRR1,r3
-	rfi			/* change context and jump to start_kernel */
-
-/*
- * Interrupt vector entry code
- *
- * The Book E MMUs are always on so we don't need to handle
- * interrupts in real mode as with previous PPC processors. In
- * this case we handle interrupts in the kernel virtual address
- * space.
- *
- * Interrupt vectors are dynamically placed relative to the
- * interrupt prefix as determined by the address of interrupt_base.
- * The interrupt vectors offsets are programmed using the labels
- * for each interrupt vector entry.
- *
- * Interrupt vectors must be aligned on a 16 byte boundary.
- * We align on a 32 byte cache line boundary for good measure.
- */
-
-interrupt_base:
-	/* Critical Input Interrupt */
-	CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
-
-	/* Machine Check Interrupt */
-	CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
-			   machine_check_exception)
-	MCHECK_EXCEPTION(0x0210, MachineCheckA, machine_check_exception)
-
-	/* Data Storage Interrupt */
-	DATA_STORAGE_EXCEPTION
-
-		/* Instruction Storage Interrupt */
-	INSTRUCTION_STORAGE_EXCEPTION
-
-	/* External Input Interrupt */
-	EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \
-		  do_IRQ, EXC_XFER_LITE)
-
-	/* Alignment Interrupt */
-	ALIGNMENT_EXCEPTION
-
-	/* Program Interrupt */
-	PROGRAM_EXCEPTION
-
-	/* Floating Point Unavailable Interrupt */
-#ifdef CONFIG_PPC_FPU
-	FP_UNAVAILABLE_EXCEPTION
-#else
-	EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \
-		  FloatingPointUnavailable, unknown_exception, EXC_XFER_STD)
-#endif
-	/* System Call Interrupt */
-	START_EXCEPTION(SystemCall)
-	SYSCALL_ENTRY   0xc00 BOOKE_INTERRUPT_SYSCALL
-
-	/* Auxiliary Processor Unavailable Interrupt */
-	EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
-		  AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_STD)
-
-	/* Decrementer Interrupt */
-	DECREMENTER_EXCEPTION
-
-	/* Fixed Internal Timer Interrupt */
-	/* TODO: Add FIT support */
-	EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \
-		  unknown_exception, EXC_XFER_STD)
-
-	/* Watchdog Timer Interrupt */
-	/* TODO: Add watchdog support */
-#ifdef CONFIG_BOOKE_WDT
-	CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, WatchdogException)
-#else
-	CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, unknown_exception)
-#endif
-
-	/* Data TLB Error Interrupt */
-	START_EXCEPTION(DataTLBError44x)
-	mtspr	SPRN_SPRG_WSCRATCH0, r10		/* Save some working registers */
-	mtspr	SPRN_SPRG_WSCRATCH1, r11
-	mtspr	SPRN_SPRG_WSCRATCH2, r12
-	mtspr	SPRN_SPRG_WSCRATCH3, r13
-	mfcr	r11
-	mtspr	SPRN_SPRG_WSCRATCH4, r11
-	mfspr	r10, SPRN_DEAR		/* Get faulting address */
-
-	/* If we are faulting a kernel address, we have to use the
-	 * kernel page tables.
-	 */
-	lis	r11, PAGE_OFFSET@h
-	cmplw	r10, r11
-	blt+	3f
-	lis	r11, swapper_pg_dir@h
-	ori	r11, r11, swapper_pg_dir@l
-
-	mfspr	r12,SPRN_MMUCR
-	rlwinm	r12,r12,0,0,23		/* Clear TID */
-
-	b	4f
-
-	/* Get the PGD for the current thread */
-3:
-	mfspr	r11,SPRN_SPRG_THREAD
-	lwz	r11,PGDIR(r11)
-
-	/* Load PID into MMUCR TID */
-	mfspr	r12,SPRN_MMUCR
-	mfspr   r13,SPRN_PID		/* Get PID */
-	rlwimi	r12,r13,0,24,31		/* Set TID */
-
-4:
-	mtspr	SPRN_MMUCR,r12
-
-	/* Mask of required permission bits. Note that while we
-	 * do copy ESR:ST to _PAGE_RW position as trying to write
-	 * to an RO page is pretty common, we don't do it with
-	 * _PAGE_DIRTY. We could do it, but it's a fairly rare
-	 * event so I'd rather take the overhead when it happens
-	 * rather than adding an instruction here. We should measure
-	 * whether the whole thing is worth it in the first place
-	 * as we could avoid loading SPRN_ESR completely in the first
-	 * place...
-	 *
-	 * TODO: Is it worth doing that mfspr & rlwimi in the first
-	 *       place or can we save a couple of instructions here ?
-	 */
-	mfspr	r12,SPRN_ESR
-	li	r13,_PAGE_PRESENT|_PAGE_ACCESSED
-	rlwimi	r13,r12,10,30,30
-
-	/* Load the PTE */
-	/* Compute pgdir/pmd offset */
-	rlwinm  r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29
-	lwzx	r11, r12, r11		/* Get pgd/pmd entry */
-	rlwinm.	r12, r11, 0, 0, 20	/* Extract pt base address */
-	beq	2f			/* Bail if no table */
-
-	/* Compute pte address */
-	rlwimi  r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28
-	lwz	r11, 0(r12)		/* Get high word of pte entry */
-	lwz	r12, 4(r12)		/* Get low word of pte entry */
-
-	lis	r10,tlb_44x_index@ha
-
-	andc.	r13,r13,r12		/* Check permission */
-
-	/* Load the next available TLB index */
-	lwz	r13,tlb_44x_index@l(r10)
-
-	bne	2f			/* Bail if permission mismach */
-
-	/* Increment, rollover, and store TLB index */
-	addi	r13,r13,1
-
-	patch_site 0f, patch__tlb_44x_hwater_D
-	/* Compare with watermark (instruction gets patched) */
-0:	cmpwi	0,r13,1			/* reserve entries */
-	ble	5f
-	li	r13,0
-5:
-	/* Store the next available TLB index */
-	stw	r13,tlb_44x_index@l(r10)
-
-	/* Re-load the faulting address */
-	mfspr	r10,SPRN_DEAR
-
-	 /* Jump to common tlb load */
-	b	finish_tlb_load_44x
-
-2:
-	/* The bailout.  Restore registers to pre-exception conditions
-	 * and call the heavyweights to help us out.
-	 */
-	mfspr	r11, SPRN_SPRG_RSCRATCH4
-	mtcr	r11
-	mfspr	r13, SPRN_SPRG_RSCRATCH3
-	mfspr	r12, SPRN_SPRG_RSCRATCH2
-	mfspr	r11, SPRN_SPRG_RSCRATCH1
-	mfspr	r10, SPRN_SPRG_RSCRATCH0
-	b	DataStorage
-
-	/* Instruction TLB Error Interrupt */
-	/*
-	 * Nearly the same as above, except we get our
-	 * information from different registers and bailout
-	 * to a different point.
-	 */
-	START_EXCEPTION(InstructionTLBError44x)
-	mtspr	SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
-	mtspr	SPRN_SPRG_WSCRATCH1, r11
-	mtspr	SPRN_SPRG_WSCRATCH2, r12
-	mtspr	SPRN_SPRG_WSCRATCH3, r13
-	mfcr	r11
-	mtspr	SPRN_SPRG_WSCRATCH4, r11
-	mfspr	r10, SPRN_SRR0		/* Get faulting address */
-
-	/* If we are faulting a kernel address, we have to use the
-	 * kernel page tables.
-	 */
-	lis	r11, PAGE_OFFSET@h
-	cmplw	r10, r11
-	blt+	3f
-	lis	r11, swapper_pg_dir@h
-	ori	r11, r11, swapper_pg_dir@l
-
-	mfspr	r12,SPRN_MMUCR
-	rlwinm	r12,r12,0,0,23		/* Clear TID */
-
-	b	4f
-
-	/* Get the PGD for the current thread */
-3:
-	mfspr	r11,SPRN_SPRG_THREAD
-	lwz	r11,PGDIR(r11)
-
-	/* Load PID into MMUCR TID */
-	mfspr	r12,SPRN_MMUCR
-	mfspr   r13,SPRN_PID		/* Get PID */
-	rlwimi	r12,r13,0,24,31		/* Set TID */
-
-4:
-	mtspr	SPRN_MMUCR,r12
-
-	/* Make up the required permissions */
-	li	r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
-
-	/* Compute pgdir/pmd offset */
-	rlwinm 	r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29
-	lwzx	r11, r12, r11		/* Get pgd/pmd entry */
-	rlwinm.	r12, r11, 0, 0, 20	/* Extract pt base address */
-	beq	2f			/* Bail if no table */
-
-	/* Compute pte address */
-	rlwimi	r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28
-	lwz	r11, 0(r12)		/* Get high word of pte entry */
-	lwz	r12, 4(r12)		/* Get low word of pte entry */
-
-	lis	r10,tlb_44x_index@ha
-
-	andc.	r13,r13,r12		/* Check permission */
-
-	/* Load the next available TLB index */
-	lwz	r13,tlb_44x_index@l(r10)
-
-	bne	2f			/* Bail if permission mismach */
-
-	/* Increment, rollover, and store TLB index */
-	addi	r13,r13,1
-
-	patch_site 0f, patch__tlb_44x_hwater_I
-	/* Compare with watermark (instruction gets patched) */
-0:	cmpwi	0,r13,1			/* reserve entries */
-	ble	5f
-	li	r13,0
-5:
-	/* Store the next available TLB index */
-	stw	r13,tlb_44x_index@l(r10)
-
-	/* Re-load the faulting address */
-	mfspr	r10,SPRN_SRR0
-
-	/* Jump to common TLB load point */
-	b	finish_tlb_load_44x
-
-2:
-	/* The bailout.  Restore registers to pre-exception conditions
-	 * and call the heavyweights to help us out.
-	 */
-	mfspr	r11, SPRN_SPRG_RSCRATCH4
-	mtcr	r11
-	mfspr	r13, SPRN_SPRG_RSCRATCH3
-	mfspr	r12, SPRN_SPRG_RSCRATCH2
-	mfspr	r11, SPRN_SPRG_RSCRATCH1
-	mfspr	r10, SPRN_SPRG_RSCRATCH0
-	b	InstructionStorage
-
-/*
- * Both the instruction and data TLB miss get to this
- * point to load the TLB.
- * 	r10 - EA of fault
- * 	r11 - PTE high word value
- *	r12 - PTE low word value
- *	r13 - TLB index
- *	MMUCR - loaded with proper value when we get here
- *	Upon exit, we reload everything and RFI.
- */
-finish_tlb_load_44x:
-	/* Combine RPN & ERPN an write WS 0 */
-	rlwimi	r11,r12,0,0,31-PAGE_SHIFT
-	tlbwe	r11,r13,PPC44x_TLB_XLAT
-
-	/*
-	 * Create WS1. This is the faulting address (EPN),
-	 * page size, and valid flag.
-	 */
-	li	r11,PPC44x_TLB_VALID | PPC44x_TLBE_SIZE
-	/* Insert valid and page size */
-	rlwimi	r10,r11,0,PPC44x_PTE_ADD_MASK_BIT,31
-	tlbwe	r10,r13,PPC44x_TLB_PAGEID	/* Write PAGEID */
-
-	/* And WS 2 */
-	li	r10,0xf85			/* Mask to apply from PTE */
-	rlwimi	r10,r12,29,30,30		/* DIRTY -> SW position */
-	and	r11,r12,r10			/* Mask PTE bits to keep */
-	andi.	r10,r12,_PAGE_USER		/* User page ? */
-	beq	1f				/* nope, leave U bits empty */
-	rlwimi	r11,r11,3,26,28			/* yes, copy S bits to U */
-1:	tlbwe	r11,r13,PPC44x_TLB_ATTRIB	/* Write ATTRIB */
-
-	/* Done...restore registers and get out of here.
-	*/
-	mfspr	r11, SPRN_SPRG_RSCRATCH4
-	mtcr	r11
-	mfspr	r13, SPRN_SPRG_RSCRATCH3
-	mfspr	r12, SPRN_SPRG_RSCRATCH2
-	mfspr	r11, SPRN_SPRG_RSCRATCH1
-	mfspr	r10, SPRN_SPRG_RSCRATCH0
-	rfi					/* Force context change */
-
-/* TLB error interrupts for 476
- */
-#ifdef CONFIG_PPC_47x
-	START_EXCEPTION(DataTLBError47x)
-	mtspr	SPRN_SPRG_WSCRATCH0,r10	/* Save some working registers */
-	mtspr	SPRN_SPRG_WSCRATCH1,r11
-	mtspr	SPRN_SPRG_WSCRATCH2,r12
-	mtspr	SPRN_SPRG_WSCRATCH3,r13
-	mfcr	r11
-	mtspr	SPRN_SPRG_WSCRATCH4,r11
-	mfspr	r10,SPRN_DEAR		/* Get faulting address */
-
-	/* If we are faulting a kernel address, we have to use the
-	 * kernel page tables.
-	 */
-	lis	r11,PAGE_OFFSET@h
-	cmplw	cr0,r10,r11
-	blt+	3f
-	lis	r11,swapper_pg_dir@h
-	ori	r11,r11, swapper_pg_dir@l
-	li	r12,0			/* MMUCR = 0 */
-	b	4f
-
-	/* Get the PGD for the current thread and setup MMUCR */
-3:	mfspr	r11,SPRN_SPRG3
-	lwz	r11,PGDIR(r11)
-	mfspr   r12,SPRN_PID		/* Get PID */
-4:	mtspr	SPRN_MMUCR,r12		/* Set MMUCR */
-
-	/* Mask of required permission bits. Note that while we
-	 * do copy ESR:ST to _PAGE_RW position as trying to write
-	 * to an RO page is pretty common, we don't do it with
-	 * _PAGE_DIRTY. We could do it, but it's a fairly rare
-	 * event so I'd rather take the overhead when it happens
-	 * rather than adding an instruction here. We should measure
-	 * whether the whole thing is worth it in the first place
-	 * as we could avoid loading SPRN_ESR completely in the first
-	 * place...
-	 *
-	 * TODO: Is it worth doing that mfspr & rlwimi in the first
-	 *       place or can we save a couple of instructions here ?
-	 */
-	mfspr	r12,SPRN_ESR
-	li	r13,_PAGE_PRESENT|_PAGE_ACCESSED
-	rlwimi	r13,r12,10,30,30
-
-	/* Load the PTE */
-	/* Compute pgdir/pmd offset */
-	rlwinm  r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29
-	lwzx	r11,r12,r11		/* Get pgd/pmd entry */
-
-	/* Word 0 is EPN,V,TS,DSIZ */
-	li	r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE
-	rlwimi	r10,r12,0,32-PAGE_SHIFT,31	/* Insert valid and page size*/
-	li	r12,0
-	tlbwe	r10,r12,0
-
-	/* XXX can we do better ? Need to make sure tlbwe has established
-	 * latch V bit in MMUCR0 before the PTE is loaded further down */
-#ifdef CONFIG_SMP
-	isync
-#endif
-
-	rlwinm.	r12,r11,0,0,20		/* Extract pt base address */
-	/* Compute pte address */
-	rlwimi  r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28
-	beq	2f			/* Bail if no table */
-	lwz	r11,0(r12)		/* Get high word of pte entry */
-
-	/* XXX can we do better ? maybe insert a known 0 bit from r11 into the
-	 * bottom of r12 to create a data dependency... We can also use r10
-	 * as destination nowadays
-	 */
-#ifdef CONFIG_SMP
-	lwsync
-#endif
-	lwz	r12,4(r12)		/* Get low word of pte entry */
-
-	andc.	r13,r13,r12		/* Check permission */
-
-	 /* Jump to common tlb load */
-	beq	finish_tlb_load_47x
-
-2:	/* The bailout.  Restore registers to pre-exception conditions
-	 * and call the heavyweights to help us out.
-	 */
-	mfspr	r11,SPRN_SPRG_RSCRATCH4
-	mtcr	r11
-	mfspr	r13,SPRN_SPRG_RSCRATCH3
-	mfspr	r12,SPRN_SPRG_RSCRATCH2
-	mfspr	r11,SPRN_SPRG_RSCRATCH1
-	mfspr	r10,SPRN_SPRG_RSCRATCH0
-	b	DataStorage
-
-	/* Instruction TLB Error Interrupt */
-	/*
-	 * Nearly the same as above, except we get our
-	 * information from different registers and bailout
-	 * to a different point.
-	 */
-	START_EXCEPTION(InstructionTLBError47x)
-	mtspr	SPRN_SPRG_WSCRATCH0,r10	/* Save some working registers */
-	mtspr	SPRN_SPRG_WSCRATCH1,r11
-	mtspr	SPRN_SPRG_WSCRATCH2,r12
-	mtspr	SPRN_SPRG_WSCRATCH3,r13
-	mfcr	r11
-	mtspr	SPRN_SPRG_WSCRATCH4,r11
-	mfspr	r10,SPRN_SRR0		/* Get faulting address */
-
-	/* If we are faulting a kernel address, we have to use the
-	 * kernel page tables.
-	 */
-	lis	r11,PAGE_OFFSET@h
-	cmplw	cr0,r10,r11
-	blt+	3f
-	lis	r11,swapper_pg_dir@h
-	ori	r11,r11, swapper_pg_dir@l
-	li	r12,0			/* MMUCR = 0 */
-	b	4f
-
-	/* Get the PGD for the current thread and setup MMUCR */
-3:	mfspr	r11,SPRN_SPRG_THREAD
-	lwz	r11,PGDIR(r11)
-	mfspr   r12,SPRN_PID		/* Get PID */
-4:	mtspr	SPRN_MMUCR,r12		/* Set MMUCR */
-
-	/* Make up the required permissions */
-	li	r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
-
-	/* Load PTE */
-	/* Compute pgdir/pmd offset */
-	rlwinm  r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29
-	lwzx	r11,r12,r11		/* Get pgd/pmd entry */
-
-	/* Word 0 is EPN,V,TS,DSIZ */
-	li	r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE
-	rlwimi	r10,r12,0,32-PAGE_SHIFT,31	/* Insert valid and page size*/
-	li	r12,0
-	tlbwe	r10,r12,0
-
-	/* XXX can we do better ? Need to make sure tlbwe has established
-	 * latch V bit in MMUCR0 before the PTE is loaded further down */
-#ifdef CONFIG_SMP
-	isync
-#endif
-
-	rlwinm.	r12,r11,0,0,20		/* Extract pt base address */
-	/* Compute pte address */
-	rlwimi  r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28
-	beq	2f			/* Bail if no table */
-
-	lwz	r11,0(r12)		/* Get high word of pte entry */
-	/* XXX can we do better ? maybe insert a known 0 bit from r11 into the
-	 * bottom of r12 to create a data dependency... We can also use r10
-	 * as destination nowadays
-	 */
-#ifdef CONFIG_SMP
-	lwsync
-#endif
-	lwz	r12,4(r12)		/* Get low word of pte entry */
-
-	andc.	r13,r13,r12		/* Check permission */
-
-	/* Jump to common TLB load point */
-	beq	finish_tlb_load_47x
-
-2:	/* The bailout.  Restore registers to pre-exception conditions
-	 * and call the heavyweights to help us out.
-	 */
-	mfspr	r11, SPRN_SPRG_RSCRATCH4
-	mtcr	r11
-	mfspr	r13, SPRN_SPRG_RSCRATCH3
-	mfspr	r12, SPRN_SPRG_RSCRATCH2
-	mfspr	r11, SPRN_SPRG_RSCRATCH1
-	mfspr	r10, SPRN_SPRG_RSCRATCH0
-	b	InstructionStorage
-
-/*
- * Both the instruction and data TLB miss get to this
- * point to load the TLB.
- * 	r10 - free to use
- * 	r11 - PTE high word value
- *	r12 - PTE low word value
- *      r13 - free to use
- *	MMUCR - loaded with proper value when we get here
- *	Upon exit, we reload everything and RFI.
- */
-finish_tlb_load_47x:
-	/* Combine RPN & ERPN an write WS 1 */
-	rlwimi	r11,r12,0,0,31-PAGE_SHIFT
-	tlbwe	r11,r13,1
-
-	/* And make up word 2 */
-	li	r10,0xf85			/* Mask to apply from PTE */
-	rlwimi	r10,r12,29,30,30		/* DIRTY -> SW position */
-	and	r11,r12,r10			/* Mask PTE bits to keep */
-	andi.	r10,r12,_PAGE_USER		/* User page ? */
-	beq	1f				/* nope, leave U bits empty */
-	rlwimi	r11,r11,3,26,28			/* yes, copy S bits to U */
-1:	tlbwe	r11,r13,2
-
-	/* Done...restore registers and get out of here.
-	*/
-	mfspr	r11, SPRN_SPRG_RSCRATCH4
-	mtcr	r11
-	mfspr	r13, SPRN_SPRG_RSCRATCH3
-	mfspr	r12, SPRN_SPRG_RSCRATCH2
-	mfspr	r11, SPRN_SPRG_RSCRATCH1
-	mfspr	r10, SPRN_SPRG_RSCRATCH0
-	rfi
-
-#endif /* CONFIG_PPC_47x */
-
-	/* Debug Interrupt */
-	/*
-	 * This statement needs to exist at the end of the IVPR
-	 * definition just in case you end up taking a debug
-	 * exception within another exception.
-	 */
-	DEBUG_CRIT_EXCEPTION
-
-interrupt_end:
-
-/*
- * Global functions
- */
-
-/*
- * Adjust the machine check IVOR on 440A cores
- */
-_GLOBAL(__fixup_440A_mcheck)
-	li	r3,MachineCheckA@l
-	mtspr	SPRN_IVOR1,r3
-	sync
-	blr
-
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
-	/* Context switch the PTE pointer for the Abatron BDI2000.
-	 * The PGDIR is the second parameter.
-	 */
-	lis	r5, abatron_pteptrs@h
-	ori	r5, r5, abatron_pteptrs@l
-	stw	r4, 0x4(r5)
-#endif
-	mtspr	SPRN_PID,r3
-	isync			/* Force context change */
-	blr
-
-/*
- * Init CPU state. This is called at boot time or for secondary CPUs
- * to setup initial TLB entries, setup IVORs, etc...
- *
- */
-_GLOBAL(init_cpu_state)
-	mflr	r22
-#ifdef CONFIG_PPC_47x
-	/* We use the PVR to differentiate 44x cores from 476 */
-	mfspr	r3,SPRN_PVR
-	srwi	r3,r3,16
-	cmplwi	cr0,r3,PVR_476FPE@h
-	beq	head_start_47x
-	cmplwi	cr0,r3,PVR_476@h
-	beq	head_start_47x
-	cmplwi	cr0,r3,PVR_476_ISS@h
-	beq	head_start_47x
-#endif /* CONFIG_PPC_47x */
-
-/*
- * In case the firmware didn't do it, we apply some workarounds
- * that are good for all 440 core variants here
- */
-	mfspr	r3,SPRN_CCR0
-	rlwinm	r3,r3,0,0,27	/* disable icache prefetch */
-	isync
-	mtspr	SPRN_CCR0,r3
-	isync
-	sync
-
-/*
- * Set up the initial MMU state for 44x
- *
- * We are still executing code at the virtual address
- * mappings set by the firmware for the base of RAM.
- *
- * We first invalidate all TLB entries but the one
- * we are running from.  We then load the KERNELBASE
- * mappings so we can begin to use kernel addresses
- * natively and so the interrupt vector locations are
- * permanently pinned (necessary since Book E
- * implementations always have translation enabled).
- *
- * TODO: Use the known TLB entry we are running from to
- *	 determine which physical region we are located
- *	 in.  This can be used to determine where in RAM
- *	 (on a shared CPU system) or PCI memory space
- *	 (on a DRAMless system) we are located.
- *       For now, we assume a perfect world which means
- *	 we are located at the base of DRAM (physical 0).
- */
-
-/*
- * Search TLB for entry that we are currently using.
- * Invalidate all entries but the one we are using.
- */
-	/* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */
-	mfspr	r3,SPRN_PID			/* Get PID */
-	mfmsr	r4				/* Get MSR */
-	andi.	r4,r4,MSR_IS@l			/* TS=1? */
-	beq	wmmucr				/* If not, leave STS=0 */
-	oris	r3,r3,PPC44x_MMUCR_STS@h	/* Set STS=1 */
-wmmucr:	mtspr	SPRN_MMUCR,r3			/* Put MMUCR */
-	sync
-
-	bl	invstr				/* Find our address */
-invstr:	mflr	r5				/* Make it accessible */
-	tlbsx	r23,0,r5			/* Find entry we are in */
-	li	r4,0				/* Start at TLB entry 0 */
-	li	r3,0				/* Set PAGEID inval value */
-1:	cmpw	r23,r4				/* Is this our entry? */
-	beq	skpinv				/* If so, skip the inval */
-	tlbwe	r3,r4,PPC44x_TLB_PAGEID		/* If not, inval the entry */
-skpinv:	addi	r4,r4,1				/* Increment */
-	cmpwi	r4,64				/* Are we done? */
-	bne	1b				/* If not, repeat */
-	isync					/* If so, context change */
-
-/*
- * Configure and load pinned entry into TLB slot 63.
- */
-#ifdef CONFIG_NONSTATIC_KERNEL
-	/*
-	 * In case of a NONSTATIC_KERNEL we reuse the TLB XLAT
-	 * entries of the initial mapping set by the boot loader.
-	 * The XLAT entry is stored in r25
-	 */
-
-	/* Read the XLAT entry for our current mapping */
-	tlbre	r25,r23,PPC44x_TLB_XLAT
-
-	lis	r3,KERNELBASE@h
-	ori	r3,r3,KERNELBASE@l
-
-	/* Use our current RPN entry */
-	mr	r4,r25
-#else
-
-	lis	r3,PAGE_OFFSET@h
-	ori	r3,r3,PAGE_OFFSET@l
-
-	/* Kernel is at the base of RAM */
-	li r4, 0			/* Load the kernel physical address */
-#endif
-
-	/* Load the kernel PID = 0 */
-	li	r0,0
-	mtspr	SPRN_PID,r0
-	sync
-
-	/* Initialize MMUCR */
-	li	r5,0
-	mtspr	SPRN_MMUCR,r5
-	sync
-
-	/* pageid fields */
-	clrrwi	r3,r3,10		/* Mask off the effective page number */
-	ori	r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_256M
-
-	/* xlat fields */
-	clrrwi	r4,r4,10		/* Mask off the real page number */
-					/* ERPN is 0 for first 4GB page */
-
-	/* attrib fields */
-	/* Added guarded bit to protect against speculative loads/stores */
-	li	r5,0
-	ori	r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
-
-        li      r0,63                    /* TLB slot 63 */
-
-	tlbwe	r3,r0,PPC44x_TLB_PAGEID	/* Load the pageid fields */
-	tlbwe	r4,r0,PPC44x_TLB_XLAT	/* Load the translation fields */
-	tlbwe	r5,r0,PPC44x_TLB_ATTRIB	/* Load the attrib/access fields */
-
-	/* Force context change */
-	mfmsr	r0
-	mtspr	SPRN_SRR1, r0
-	lis	r0,3f@h
-	ori	r0,r0,3f@l
-	mtspr	SPRN_SRR0,r0
-	sync
-	rfi
-
-	/* If necessary, invalidate original entry we used */
-3:	cmpwi	r23,63
-	beq	4f
-	li	r6,0
-	tlbwe   r6,r23,PPC44x_TLB_PAGEID
-	isync
-
-4:
-#ifdef CONFIG_PPC_EARLY_DEBUG_44x
-	/* Add UART mapping for early debug. */
-
-	/* pageid fields */
-	lis	r3,PPC44x_EARLY_DEBUG_VIRTADDR@h
-	ori	r3,r3,PPC44x_TLB_VALID|PPC44x_TLB_TS|PPC44x_TLB_64K
-
-	/* xlat fields */
-	lis	r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h
-	ori	r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH
-
-	/* attrib fields */
-	li	r5,(PPC44x_TLB_SW|PPC44x_TLB_SR|PPC44x_TLB_I|PPC44x_TLB_G)
-        li      r0,62                    /* TLB slot 0 */
-
-	tlbwe	r3,r0,PPC44x_TLB_PAGEID
-	tlbwe	r4,r0,PPC44x_TLB_XLAT
-	tlbwe	r5,r0,PPC44x_TLB_ATTRIB
-
-	/* Force context change */
-	isync
-#endif /* CONFIG_PPC_EARLY_DEBUG_44x */
-
-	/* Establish the interrupt vector offsets */
-	SET_IVOR(0,  CriticalInput);
-	SET_IVOR(1,  MachineCheck);
-	SET_IVOR(2,  DataStorage);
-	SET_IVOR(3,  InstructionStorage);
-	SET_IVOR(4,  ExternalInput);
-	SET_IVOR(5,  Alignment);
-	SET_IVOR(6,  Program);
-	SET_IVOR(7,  FloatingPointUnavailable);
-	SET_IVOR(8,  SystemCall);
-	SET_IVOR(9,  AuxillaryProcessorUnavailable);
-	SET_IVOR(10, Decrementer);
-	SET_IVOR(11, FixedIntervalTimer);
-	SET_IVOR(12, WatchdogTimer);
-	SET_IVOR(13, DataTLBError44x);
-	SET_IVOR(14, InstructionTLBError44x);
-	SET_IVOR(15, DebugCrit);
-
-	b	head_start_common
-
-
-#ifdef CONFIG_PPC_47x
-
-#ifdef CONFIG_SMP
-
-/* Entry point for secondary 47x processors */
-_GLOBAL(start_secondary_47x)
-        mr      r24,r3          /* CPU number */
-
-	bl	init_cpu_state
-
-	/* Now we need to bolt the rest of kernel memory which
-	 * is done in C code. We must be careful because our task
-	 * struct or our stack can (and will probably) be out
-	 * of reach of the initial 256M TLB entry, so we use a
-	 * small temporary stack in .bss for that. This works
-	 * because only one CPU at a time can be in this code
-	 */
-	lis	r1,temp_boot_stack@h
-	ori	r1,r1,temp_boot_stack@l
-	addi	r1,r1,1024-STACK_FRAME_OVERHEAD
-	li	r0,0
-	stw	r0,0(r1)
-	bl	mmu_init_secondary
-
-	/* Now we can get our task struct and real stack pointer */
-
-	/* Get current's stack and current */
-	lis	r2,secondary_current@ha
-	lwz	r2,secondary_current@l(r2)
-	lwz	r1,TASK_STACK(r2)
-
-	/* Current stack pointer */
-	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
-	li	r0,0
-	stw	r0,0(r1)
-
-	/* Kernel stack for exception entry in SPRG3 */
-	addi	r4,r2,THREAD	/* init task's THREAD */
-	mtspr	SPRN_SPRG3,r4
-
-	b	start_secondary
-
-#endif /* CONFIG_SMP */
-
-/*
- * Set up the initial MMU state for 44x
- *
- * We are still executing code at the virtual address
- * mappings set by the firmware for the base of RAM.
- */
-
-head_start_47x:
-	/* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */
-	mfspr	r3,SPRN_PID			/* Get PID */
-	mfmsr	r4				/* Get MSR */
-	andi.	r4,r4,MSR_IS@l			/* TS=1? */
-	beq	1f				/* If not, leave STS=0 */
-	oris	r3,r3,PPC47x_MMUCR_STS@h	/* Set STS=1 */
-1:	mtspr	SPRN_MMUCR,r3			/* Put MMUCR */
-	sync
-
-	/* Find the entry we are running from */
-	bl	1f
-1:	mflr	r23
-	tlbsx	r23,0,r23
-	tlbre	r24,r23,0
-	tlbre	r25,r23,1
-	tlbre	r26,r23,2
-
-/*
- * Cleanup time
- */
-
-	/* Initialize MMUCR */
-	li	r5,0
-	mtspr	SPRN_MMUCR,r5
-	sync
-
-clear_all_utlb_entries:
-
-	#; Set initial values.
-
-	addis		r3,0,0x8000
-	addi		r4,0,0
-	addi		r5,0,0
-	b		clear_utlb_entry
-
-	#; Align the loop to speed things up.
-
-	.align		6
-
-clear_utlb_entry:
-
-	tlbwe		r4,r3,0
-	tlbwe		r5,r3,1
-	tlbwe		r5,r3,2
-	addis		r3,r3,0x2000
-	cmpwi		r3,0
-	bne		clear_utlb_entry
-	addis		r3,0,0x8000
-	addis		r4,r4,0x100
-	cmpwi		r4,0
-	bne		clear_utlb_entry
-
-	#; Restore original entry.
-
-	oris	r23,r23,0x8000  /* specify the way */
-	tlbwe		r24,r23,0
-	tlbwe		r25,r23,1
-	tlbwe		r26,r23,2
-
-/*
- * Configure and load pinned entry into TLB for the kernel core
- */
-
-	lis	r3,PAGE_OFFSET@h
-	ori	r3,r3,PAGE_OFFSET@l
-
-	/* Load the kernel PID = 0 */
-	li	r0,0
-	mtspr	SPRN_PID,r0
-	sync
-
-	/* Word 0 */
-	clrrwi	r3,r3,12		/* Mask off the effective page number */
-	ori	r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_256M
-
-	/* Word 1 - use r25.  RPN is the same as the original entry */
-
-	/* Word 2 */
-	li	r5,0
-	ori	r5,r5,PPC47x_TLB2_S_RWX
-#ifdef CONFIG_SMP
-	ori	r5,r5,PPC47x_TLB2_M
-#endif
-
-	/* We write to way 0 and bolted 0 */
-	lis	r0,0x8800
-	tlbwe	r3,r0,0
-	tlbwe	r25,r0,1
-	tlbwe	r5,r0,2
-
-/*
- * Configure SSPCR, ISPCR and USPCR for now to search everything, we can fix
- * them up later
- */
-	LOAD_REG_IMMEDIATE(r3, 0x9abcdef0)
-	mtspr	SPRN_SSPCR,r3
-	mtspr	SPRN_USPCR,r3
-	LOAD_REG_IMMEDIATE(r3, 0x12345670)
-	mtspr	SPRN_ISPCR,r3
-
-	/* Force context change */
-	mfmsr	r0
-	mtspr	SPRN_SRR1, r0
-	lis	r0,3f@h
-	ori	r0,r0,3f@l
-	mtspr	SPRN_SRR0,r0
-	sync
-	rfi
-
-	/* Invalidate original entry we used */
-3:
-	rlwinm	r24,r24,0,21,19 /* clear the "valid" bit */
-	tlbwe	r24,r23,0
-	addi	r24,0,0
-	tlbwe	r24,r23,1
-	tlbwe	r24,r23,2
-	isync                   /* Clear out the shadow TLB entries */
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_44x
-	/* Add UART mapping for early debug. */
-
-	/* Word 0 */
-	lis	r3,PPC44x_EARLY_DEBUG_VIRTADDR@h
-	ori	r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_TS | PPC47x_TLB0_1M
-
-	/* Word 1 */
-	lis	r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h
-	ori	r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH
-
-	/* Word 2 */
-	li	r5,(PPC47x_TLB2_S_RW | PPC47x_TLB2_IMG)
-
-	/* Bolted in way 0, bolt slot 5, we -hope- we don't hit the same
-	 * congruence class as the kernel, we need to make sure of it at
-	 * some point
-	 */
-        lis	r0,0x8d00
-	tlbwe	r3,r0,0
-	tlbwe	r4,r0,1
-	tlbwe	r5,r0,2
-
-	/* Force context change */
-	isync
-#endif /* CONFIG_PPC_EARLY_DEBUG_44x */
-
-	/* Establish the interrupt vector offsets */
-	SET_IVOR(0,  CriticalInput);
-	SET_IVOR(1,  MachineCheckA);
-	SET_IVOR(2,  DataStorage);
-	SET_IVOR(3,  InstructionStorage);
-	SET_IVOR(4,  ExternalInput);
-	SET_IVOR(5,  Alignment);
-	SET_IVOR(6,  Program);
-	SET_IVOR(7,  FloatingPointUnavailable);
-	SET_IVOR(8,  SystemCall);
-	SET_IVOR(9,  AuxillaryProcessorUnavailable);
-	SET_IVOR(10, Decrementer);
-	SET_IVOR(11, FixedIntervalTimer);
-	SET_IVOR(12, WatchdogTimer);
-	SET_IVOR(13, DataTLBError47x);
-	SET_IVOR(14, InstructionTLBError47x);
-	SET_IVOR(15, DebugCrit);
-
-	/* We configure icbi to invalidate 128 bytes at a time since the
-	 * current 32-bit kernel code isn't too happy with icache != dcache
-	 * block size. We also disable the BTAC as this can cause errors
-	 * in some circumstances (see IBM Erratum 47).
-	 */
-	mfspr	r3,SPRN_CCR0
-	oris	r3,r3,0x0020
-	ori	r3,r3,0x0040
-	mtspr	SPRN_CCR0,r3
-	isync
-
-#endif /* CONFIG_PPC_47x */
-
-/*
- * Here we are back to code that is common between 44x and 47x
- *
- * We proceed to further kernel initialization and return to the
- * main kernel entry
- */
-head_start_common:
-	/* Establish the interrupt vector base */
-	lis	r4,interrupt_base@h	/* IVPR only uses the high 16-bits */
-	mtspr	SPRN_IVPR,r4
-
-	/*
-	 * If the kernel was loaded at a non-zero 256 MB page, we need to
-	 * mask off the most significant 4 bits to get the relative address
-	 * from the start of physical memory
-	 */
-	rlwinm	r22,r22,0,4,31
-	addis	r22,r22,PAGE_OFFSET@h
-	mtlr	r22
-	isync
-	blr
-
-/*
- * We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
-	.data
-	.align	PAGE_SHIFT
-	.globl	sdata
-sdata:
-	.globl	empty_zero_page
-empty_zero_page:
-	.space	PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
-
-/*
- * To support >32-bit physical addresses, we use an 8KB pgdir.
- */
-	.globl	swapper_pg_dir
-swapper_pg_dir:
-	.space	PGD_TABLE_SIZE
-
-/*
- * Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
-	.space	8
-
-#ifdef CONFIG_SMP
-	.align	12
-temp_boot_stack:
-	.space	1024
-#endif /* CONFIG_SMP */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
deleted file mode 100644
index 9019f1395d39ac293c2612e8c69142587c7228fd..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/head_64.S
+++ /dev/null
@@ -1,1016 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  PowerPC version
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
- *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
- *  Adapted for Power Macintosh by Paul Mackerras.
- *  Low-level exception handlers and MMU support
- *  rewritten by Paul Mackerras.
- *    Copyright (C) 1996 Paul Mackerras.
- *
- *  Adapted for 64bit PowerPC by Dave Engebretsen, Peter Bergner, and
- *    Mike Corrigan {engebret|bergner|mikejc}@us.ibm.com
- *
- *  This file contains the entry point for the 64-bit kernel along
- *  with some early initialization code common to all 64-bit powerpc
- *  variants.
- */
-
-#include <linux/threads.h>
-#include <linux/init.h>
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/ppc_asm.h>
-#include <asm/head-64.h>
-#include <asm/asm-offsets.h>
-#include <asm/bug.h>
-#include <asm/cputable.h>
-#include <asm/setup.h>
-#include <asm/hvcall.h>
-#include <asm/thread_info.h>
-#include <asm/firmware.h>
-#include <asm/page_64.h>
-#include <asm/irqflags.h>
-#include <asm/kvm_book3s_asm.h>
-#include <asm/ptrace.h>
-#include <asm/hw_irq.h>
-#include <asm/cputhreads.h>
-#include <asm/ppc-opcode.h>
-#include <asm/export.h>
-#include <asm/feature-fixups.h>
-
-/* The physical memory is laid out such that the secondary processor
- * spin code sits at 0x0000...0x00ff. On server, the vectors follow
- * using the layout described in exceptions-64s.S
- */
-
-/*
- * Entering into this code we make the following assumptions:
- *
- *  For pSeries or server processors:
- *   1. The MMU is off & open firmware is running in real mode.
- *   2. The primary CPU enters at __start.
- *   3. If the RTAS supports "query-cpu-stopped-state", then secondary
- *      CPUs will enter as directed by "start-cpu" RTAS call, which is
- *      generic_secondary_smp_init, with PIR in r3.
- *   4. Else the secondary CPUs will enter at secondary_hold (0x60) as
- *      directed by the "start-cpu" RTS call, with PIR in r3.
- * -or- For OPAL entry:
- *   1. The MMU is off, processor in HV mode.
- *   2. The primary CPU enters at 0 with device-tree in r3, OPAL base
- *      in r8, and entry in r9 for debugging purposes.
- *   3. Secondary CPUs enter as directed by OPAL_START_CPU call, which
- *      is at generic_secondary_smp_init, with PIR in r3.
- *
- *  For Book3E processors:
- *   1. The MMU is on running in AS0 in a state defined in ePAPR
- *   2. The kernel is entered at __start
- */
-
-OPEN_FIXED_SECTION(first_256B, 0x0, 0x100)
-USE_FIXED_SECTION(first_256B)
-	/*
-	 * Offsets are relative from the start of fixed section, and
-	 * first_256B starts at 0. Offsets are a bit easier to use here
-	 * than the fixed section entry macros.
-	 */
-	. = 0x0
-_GLOBAL(__start)
-	/* NOP this out unconditionally */
-BEGIN_FTR_SECTION
-	FIXUP_ENDIAN
-	b	__start_initialization_multiplatform
-END_FTR_SECTION(0, 1)
-
-	/* Catch branch to 0 in real mode */
-	trap
-
-	/* Secondary processors spin on this value until it becomes non-zero.
-	 * When non-zero, it contains the real address of the function the cpu
-	 * should jump to.
-	 */
-	.balign 8
-	.globl  __secondary_hold_spinloop
-__secondary_hold_spinloop:
-	.8byte	0x0
-
-	/* Secondary processors write this value with their cpu # */
-	/* after they enter the spin loop immediately below.	  */
-	.globl	__secondary_hold_acknowledge
-__secondary_hold_acknowledge:
-	.8byte	0x0
-
-#ifdef CONFIG_RELOCATABLE
-	/* This flag is set to 1 by a loader if the kernel should run
-	 * at the loaded address instead of the linked address.  This
-	 * is used by kexec-tools to keep the the kdump kernel in the
-	 * crash_kernel region.  The loader is responsible for
-	 * observing the alignment requirement.
-	 */
-
-#ifdef CONFIG_RELOCATABLE_TEST
-#define RUN_AT_LOAD_DEFAULT 1		/* Test relocation, do not copy to 0 */
-#else
-#define RUN_AT_LOAD_DEFAULT 0x72756e30  /* "run0" -- relocate to 0 by default */
-#endif
-
-	/* Do not move this variable as kexec-tools knows about it. */
-	. = 0x5c
-	.globl	__run_at_load
-__run_at_load:
-DEFINE_FIXED_SYMBOL(__run_at_load)
-	.long	RUN_AT_LOAD_DEFAULT
-#endif
-
-	. = 0x60
-/*
- * The following code is used to hold secondary processors
- * in a spin loop after they have entered the kernel, but
- * before the bulk of the kernel has been relocated.  This code
- * is relocated to physical address 0x60 before prom_init is run.
- * All of it must fit below the first exception vector at 0x100.
- * Use .globl here not _GLOBAL because we want __secondary_hold
- * to be the actual text address, not a descriptor.
- */
-	.globl	__secondary_hold
-__secondary_hold:
-	FIXUP_ENDIAN
-#ifndef CONFIG_PPC_BOOK3E
-	mfmsr	r24
-	ori	r24,r24,MSR_RI
-	mtmsrd	r24			/* RI on */
-#endif
-	/* Grab our physical cpu number */
-	mr	r24,r3
-	/* stash r4 for book3e */
-	mr	r25,r4
-
-	/* Tell the master cpu we're here */
-	/* Relocation is off & we are located at an address less */
-	/* than 0x100, so only need to grab low order offset.    */
-	std	r24,(ABS_ADDR(__secondary_hold_acknowledge))(0)
-	sync
-
-	li	r26,0
-#ifdef CONFIG_PPC_BOOK3E
-	tovirt(r26,r26)
-#endif
-	/* All secondary cpus wait here until told to start. */
-100:	ld	r12,(ABS_ADDR(__secondary_hold_spinloop))(r26)
-	cmpdi	0,r12,0
-	beq	100b
-
-#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
-#ifdef CONFIG_PPC_BOOK3E
-	tovirt(r12,r12)
-#endif
-	mtctr	r12
-	mr	r3,r24
-	/*
-	 * it may be the case that other platforms have r4 right to
-	 * begin with, this gives us some safety in case it is not
-	 */
-#ifdef CONFIG_PPC_BOOK3E
-	mr	r4,r25
-#else
-	li	r4,0
-#endif
-	/* Make sure that patched code is visible */
-	isync
-	bctr
-#else
-0:	trap
-	EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
-#endif
-CLOSE_FIXED_SECTION(first_256B)
-
-/* This value is used to mark exception frames on the stack. */
-	.section ".toc","aw"
-exception_marker:
-	.tc	ID_72656773_68657265[TC],0x7265677368657265
-	.previous
-
-/*
- * On server, we include the exception vectors code here as it
- * relies on absolute addressing which is only possible within
- * this compilation unit
- */
-#ifdef CONFIG_PPC_BOOK3S
-#include "exceptions-64s.S"
-#else
-OPEN_TEXT_SECTION(0x100)
-#endif
-
-USE_TEXT_SECTION()
-
-#ifdef CONFIG_PPC_BOOK3E
-/*
- * The booting_thread_hwid holds the thread id we want to boot in cpu
- * hotplug case. It is set by cpu hotplug code, and is invalid by default.
- * The thread id is the same as the initial value of SPRN_PIR[THREAD_ID]
- * bit field.
- */
-	.globl	booting_thread_hwid
-booting_thread_hwid:
-	.long  INVALID_THREAD_HWID
-	.align 3
-/*
- * start a thread in the same core
- * input parameters:
- * r3 = the thread physical id
- * r4 = the entry point where thread starts
- */
-_GLOBAL(book3e_start_thread)
-	LOAD_REG_IMMEDIATE(r5, MSR_KERNEL)
-	cmpwi	r3, 0
-	beq	10f
-	cmpwi	r3, 1
-	beq	11f
-	/* If the thread id is invalid, just exit. */
-	b	13f
-10:
-	MTTMR(TMRN_IMSR0, 5)
-	MTTMR(TMRN_INIA0, 4)
-	b	12f
-11:
-	MTTMR(TMRN_IMSR1, 5)
-	MTTMR(TMRN_INIA1, 4)
-12:
-	isync
-	li	r6, 1
-	sld	r6, r6, r3
-	mtspr	SPRN_TENS, r6
-13:
-	blr
-
-/*
- * stop a thread in the same core
- * input parameter:
- * r3 = the thread physical id
- */
-_GLOBAL(book3e_stop_thread)
-	cmpwi	r3, 0
-	beq	10f
-	cmpwi	r3, 1
-	beq	10f
-	/* If the thread id is invalid, just exit. */
-	b	13f
-10:
-	li	r4, 1
-	sld	r4, r4, r3
-	mtspr	SPRN_TENC, r4
-13:
-	blr
-
-_GLOBAL(fsl_secondary_thread_init)
-	mfspr	r4,SPRN_BUCSR
-
-	/* Enable branch prediction */
-	lis     r3,BUCSR_INIT@h
-	ori     r3,r3,BUCSR_INIT@l
-	mtspr   SPRN_BUCSR,r3
-	isync
-
-	/*
-	 * Fix PIR to match the linear numbering in the device tree.
-	 *
-	 * On e6500, the reset value of PIR uses the low three bits for
-	 * the thread within a core, and the upper bits for the core
-	 * number.  There are two threads per core, so shift everything
-	 * but the low bit right by two bits so that the cpu numbering is
-	 * continuous.
-	 *
-	 * If the old value of BUCSR is non-zero, this thread has run
-	 * before.  Thus, we assume we are coming from kexec or a similar
-	 * scenario, and PIR is already set to the correct value.  This
-	 * is a bit of a hack, but there are limited opportunities for
-	 * getting information into the thread and the alternatives
-	 * seemed like they'd be overkill.  We can't tell just by looking
-	 * at the old PIR value which state it's in, since the same value
-	 * could be valid for one thread out of reset and for a different
-	 * thread in Linux.
-	 */
-
-	mfspr	r3, SPRN_PIR
-	cmpwi	r4,0
-	bne	1f
-	rlwimi	r3, r3, 30, 2, 30
-	mtspr	SPRN_PIR, r3
-1:
-#endif
-
-_GLOBAL(generic_secondary_thread_init)
-	mr	r24,r3
-
-	/* turn on 64-bit mode */
-	bl	enable_64b_mode
-
-	/* get a valid TOC pointer, wherever we're mapped at */
-	bl	relative_toc
-	tovirt(r2,r2)
-
-#ifdef CONFIG_PPC_BOOK3E
-	/* Book3E initialization */
-	mr	r3,r24
-	bl	book3e_secondary_thread_init
-#endif
-	b	generic_secondary_common_init
-
-/*
- * On pSeries and most other platforms, secondary processors spin
- * in the following code.
- * At entry, r3 = this processor's number (physical cpu id)
- *
- * On Book3E, r4 = 1 to indicate that the initial TLB entry for
- * this core already exists (setup via some other mechanism such
- * as SCOM before entry).
- */
-_GLOBAL(generic_secondary_smp_init)
-	FIXUP_ENDIAN
-	mr	r24,r3
-	mr	r25,r4
-
-	/* turn on 64-bit mode */
-	bl	enable_64b_mode
-
-	/* get a valid TOC pointer, wherever we're mapped at */
-	bl	relative_toc
-	tovirt(r2,r2)
-
-#ifdef CONFIG_PPC_BOOK3E
-	/* Book3E initialization */
-	mr	r3,r24
-	mr	r4,r25
-	bl	book3e_secondary_core_init
-
-/*
- * After common core init has finished, check if the current thread is the
- * one we wanted to boot. If not, start the specified thread and stop the
- * current thread.
- */
-	LOAD_REG_ADDR(r4, booting_thread_hwid)
-	lwz     r3, 0(r4)
-	li	r5, INVALID_THREAD_HWID
-	cmpw	r3, r5
-	beq	20f
-
-	/*
-	 * The value of booting_thread_hwid has been stored in r3,
-	 * so make it invalid.
-	 */
-	stw	r5, 0(r4)
-
-	/*
-	 * Get the current thread id and check if it is the one we wanted.
-	 * If not, start the one specified in booting_thread_hwid and stop
-	 * the current thread.
-	 */
-	mfspr	r8, SPRN_TIR
-	cmpw	r3, r8
-	beq	20f
-
-	/* start the specified thread */
-	LOAD_REG_ADDR(r5, fsl_secondary_thread_init)
-	ld	r4, 0(r5)
-	bl	book3e_start_thread
-
-	/* stop the current thread */
-	mr	r3, r8
-	bl	book3e_stop_thread
-10:
-	b	10b
-20:
-#endif
-
-generic_secondary_common_init:
-	/* Set up a paca value for this processor. Since we have the
-	 * physical cpu id in r24, we need to search the pacas to find
-	 * which logical id maps to our physical one.
-	 */
-#ifndef CONFIG_SMP
-	b	kexec_wait		/* wait for next kernel if !SMP	 */
-#else
-	LOAD_REG_ADDR(r8, paca_ptrs)	/* Load paca_ptrs pointe	 */
-	ld	r8,0(r8)		/* Get base vaddr of array	 */
-	LOAD_REG_ADDR(r7, nr_cpu_ids)	/* Load nr_cpu_ids address       */
-	lwz	r7,0(r7)		/* also the max paca allocated 	 */
-	li	r5,0			/* logical cpu id                */
-1:
-	sldi	r9,r5,3			/* get paca_ptrs[] index from cpu id */
-	ldx	r13,r9,r8		/* r13 = paca_ptrs[cpu id]       */
-	lhz	r6,PACAHWCPUID(r13)	/* Load HW procid from paca      */
-	cmpw	r6,r24			/* Compare to our id             */
-	beq	2f
-	addi	r5,r5,1
-	cmpw	r5,r7			/* Check if more pacas exist     */
-	blt	1b
-
-	mr	r3,r24			/* not found, copy phys to r3	 */
-	b	kexec_wait		/* next kernel might do better	 */
-
-2:	SET_PACA(r13)
-#ifdef CONFIG_PPC_BOOK3E
-	addi	r12,r13,PACA_EXTLB	/* and TLB exc frame in another  */
-	mtspr	SPRN_SPRG_TLB_EXFRAME,r12
-#endif
-
-	/* From now on, r24 is expected to be logical cpuid */
-	mr	r24,r5
-
-	/* Create a temp kernel stack for use before relocation is on.	*/
-	ld	r1,PACAEMERGSP(r13)
-	subi	r1,r1,STACK_FRAME_OVERHEAD
-
-	/* See if we need to call a cpu state restore handler */
-	LOAD_REG_ADDR(r23, cur_cpu_spec)
-	ld	r23,0(r23)
-	ld	r12,CPU_SPEC_RESTORE(r23)
-	cmpdi	0,r12,0
-	beq	3f
-#ifdef PPC64_ELF_ABI_v1
-	ld	r12,0(r12)
-#endif
-	mtctr	r12
-	bctrl
-
-3:	LOAD_REG_ADDR(r3, spinning_secondaries) /* Decrement spinning_secondaries */
-	lwarx	r4,0,r3
-	subi	r4,r4,1
-	stwcx.	r4,0,r3
-	bne	3b
-	isync
-
-4:	HMT_LOW
-	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor should */
-					/* start.			 */
-	cmpwi	0,r23,0
-	beq	4b			/* Loop until told to go	 */
-
-	sync				/* order paca.run and cur_cpu_spec */
-	isync				/* In case code patching happened */
-
-	b	__secondary_start
-#endif /* SMP */
-
-/*
- * Turn the MMU off.
- * Assumes we're mapped EA == RA if the MMU is on.
- */
-#ifdef CONFIG_PPC_BOOK3S
-__mmu_off:
-	mfmsr	r3
-	andi.	r0,r3,MSR_IR|MSR_DR
-	beqlr
-	mflr	r4
-	andc	r3,r3,r0
-	mtspr	SPRN_SRR0,r4
-	mtspr	SPRN_SRR1,r3
-	sync
-	rfid
-	b	.	/* prevent speculative execution */
-#endif
-
-
-/*
- * Here is our main kernel entry point. We support currently 2 kind of entries
- * depending on the value of r5.
- *
- *   r5 != NULL -> OF entry, we go to prom_init, "legacy" parameter content
- *                 in r3...r7
- *   
- *   r5 == NULL -> kexec style entry. r3 is a physical pointer to the
- *                 DT block, r4 is a physical pointer to the kernel itself
- *
- */
-__start_initialization_multiplatform:
-	/* Make sure we are running in 64 bits mode */
-	bl	enable_64b_mode
-
-	/* Get TOC pointer (current runtime address) */
-	bl	relative_toc
-
-	/* find out where we are now */
-	bcl	20,31,$+4
-0:	mflr	r26			/* r26 = runtime addr here */
-	addis	r26,r26,(_stext - 0b)@ha
-	addi	r26,r26,(_stext - 0b)@l	/* current runtime base addr */
-
-	/*
-	 * Are we booted from a PROM Of-type client-interface ?
-	 */
-	cmpldi	cr0,r5,0
-	beq	1f
-	b	__boot_from_prom		/* yes -> prom */
-1:
-	/* Save parameters */
-	mr	r31,r3
-	mr	r30,r4
-#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
-	/* Save OPAL entry */
-	mr	r28,r8
-	mr	r29,r9
-#endif
-
-#ifdef CONFIG_PPC_BOOK3E
-	bl	start_initialization_book3e
-	b	__after_prom_start
-#else
-	/* Setup some critical 970 SPRs before switching MMU off */
-	mfspr	r0,SPRN_PVR
-	srwi	r0,r0,16
-	cmpwi	r0,0x39		/* 970 */
-	beq	1f
-	cmpwi	r0,0x3c		/* 970FX */
-	beq	1f
-	cmpwi	r0,0x44		/* 970MP */
-	beq	1f
-	cmpwi	r0,0x45		/* 970GX */
-	bne	2f
-1:	bl	__cpu_preinit_ppc970
-2:
-
-	/* Switch off MMU if not already off */
-	bl	__mmu_off
-	b	__after_prom_start
-#endif /* CONFIG_PPC_BOOK3E */
-
-__boot_from_prom:
-#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
-	/* Save parameters */
-	mr	r31,r3
-	mr	r30,r4
-	mr	r29,r5
-	mr	r28,r6
-	mr	r27,r7
-
-	/*
-	 * Align the stack to 16-byte boundary
-	 * Depending on the size and layout of the ELF sections in the initial
-	 * boot binary, the stack pointer may be unaligned on PowerMac
-	 */
-	rldicr	r1,r1,0,59
-
-#ifdef CONFIG_RELOCATABLE
-	/* Relocate code for where we are now */
-	mr	r3,r26
-	bl	relocate
-#endif
-
-	/* Restore parameters */
-	mr	r3,r31
-	mr	r4,r30
-	mr	r5,r29
-	mr	r6,r28
-	mr	r7,r27
-
-	/* Do all of the interaction with OF client interface */
-	mr	r8,r26
-	bl	prom_init
-#endif /* #CONFIG_PPC_OF_BOOT_TRAMPOLINE */
-
-	/* We never return. We also hit that trap if trying to boot
-	 * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */
-	trap
-
-__after_prom_start:
-#ifdef CONFIG_RELOCATABLE
-	/* process relocations for the final address of the kernel */
-	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */
-	sldi	r25,r25,32
-#if defined(CONFIG_PPC_BOOK3E)
-	tovirt(r26,r26)		/* on booke, we already run at PAGE_OFFSET */
-#endif
-	lwz	r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)
-#if defined(CONFIG_PPC_BOOK3E)
-	tophys(r26,r26)
-#endif
-	cmplwi	cr0,r7,1	/* flagged to stay where we are ? */
-	bne	1f
-	add	r25,r25,r26
-1:	mr	r3,r25
-	bl	relocate
-#if defined(CONFIG_PPC_BOOK3E)
-	/* IVPR needs to be set after relocation. */
-	bl	init_core_book3e
-#endif
-#endif
-
-/*
- * We need to run with _stext at physical address PHYSICAL_START.
- * This will leave some code in the first 256B of
- * real memory, which are reserved for software use.
- *
- * Note: This process overwrites the OF exception vectors.
- */
-	li	r3,0			/* target addr */
-#ifdef CONFIG_PPC_BOOK3E
-	tovirt(r3,r3)		/* on booke, we already run at PAGE_OFFSET */
-#endif
-	mr.	r4,r26			/* In some cases the loader may  */
-#if defined(CONFIG_PPC_BOOK3E)
-	tovirt(r4,r4)
-#endif
-	beq	9f			/* have already put us at zero */
-	li	r6,0x100		/* Start offset, the first 0x100 */
-					/* bytes were copied earlier.	 */
-
-#ifdef CONFIG_RELOCATABLE
-/*
- * Check if the kernel has to be running as relocatable kernel based on the
- * variable __run_at_load, if it is set the kernel is treated as relocatable
- * kernel, otherwise it will be moved to PHYSICAL_START
- */
-#if defined(CONFIG_PPC_BOOK3E)
-	tovirt(r26,r26)		/* on booke, we already run at PAGE_OFFSET */
-#endif
-	lwz	r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)
-	cmplwi	cr0,r7,1
-	bne	3f
-
-#ifdef CONFIG_PPC_BOOK3E
-	LOAD_REG_ADDR(r5, __end_interrupts)
-	LOAD_REG_ADDR(r11, _stext)
-	sub	r5,r5,r11
-#else
-	/* just copy interrupts */
-	LOAD_REG_IMMEDIATE_SYM(r5, r11, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
-#endif
-	b	5f
-3:
-#endif
-	/* # bytes of memory to copy */
-	lis	r5,(ABS_ADDR(copy_to_here))@ha
-	addi	r5,r5,(ABS_ADDR(copy_to_here))@l
-
-	bl	copy_and_flush		/* copy the first n bytes	 */
-					/* this includes the code being	 */
-					/* executed here.		 */
-	/* Jump to the copy of this code that we just made */
-	addis	r8,r3,(ABS_ADDR(4f))@ha
-	addi	r12,r8,(ABS_ADDR(4f))@l
-	mtctr	r12
-	bctr
-
-.balign 8
-p_end: .8byte _end - copy_to_here
-
-4:
-	/*
-	 * Now copy the rest of the kernel up to _end, add
-	 * _end - copy_to_here to the copy limit and run again.
-	 */
-	addis   r8,r26,(ABS_ADDR(p_end))@ha
-	ld      r8,(ABS_ADDR(p_end))@l(r8)
-	add	r5,r5,r8
-5:	bl	copy_and_flush		/* copy the rest */
-
-9:	b	start_here_multiplatform
-
-/*
- * Copy routine used to copy the kernel to start at physical address 0
- * and flush and invalidate the caches as needed.
- * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
- * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
- *
- * Note: this routine *only* clobbers r0, r6 and lr
- */
-_GLOBAL(copy_and_flush)
-	addi	r5,r5,-8
-	addi	r6,r6,-8
-4:	li	r0,8			/* Use the smallest common	*/
-					/* denominator cache line	*/
-					/* size.  This results in	*/
-					/* extra cache line flushes	*/
-					/* but operation is correct.	*/
-					/* Can't get cache line size	*/
-					/* from NACA as it is being	*/
-					/* moved too.			*/
-
-	mtctr	r0			/* put # words/line in ctr	*/
-3:	addi	r6,r6,8			/* copy a cache line		*/
-	ldx	r0,r6,r4
-	stdx	r0,r6,r3
-	bdnz	3b
-	dcbst	r6,r3			/* write it to memory		*/
-	sync
-	icbi	r6,r3			/* flush the icache line	*/
-	cmpld	0,r6,r5
-	blt	4b
-	sync
-	addi	r5,r5,8
-	addi	r6,r6,8
-	isync
-	blr
-
-.align 8
-copy_to_here:
-
-#ifdef CONFIG_SMP
-#ifdef CONFIG_PPC_PMAC
-/*
- * On PowerMac, secondary processors starts from the reset vector, which
- * is temporarily turned into a call to one of the functions below.
- */
-	.section ".text";
-	.align 2 ;
-
-	.globl	__secondary_start_pmac_0
-__secondary_start_pmac_0:
-	/* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */
-	li	r24,0
-	b	1f
-	li	r24,1
-	b	1f
-	li	r24,2
-	b	1f
-	li	r24,3
-1:
-	
-_GLOBAL(pmac_secondary_start)
-	/* turn on 64-bit mode */
-	bl	enable_64b_mode
-
-	li	r0,0
-	mfspr	r3,SPRN_HID4
-	rldimi	r3,r0,40,23	/* clear bit 23 (rm_ci) */
-	sync
-	mtspr	SPRN_HID4,r3
-	isync
-	sync
-	slbia
-
-	/* get TOC pointer (real address) */
-	bl	relative_toc
-	tovirt(r2,r2)
-
-	/* Copy some CPU settings from CPU 0 */
-	bl	__restore_cpu_ppc970
-
-	/* pSeries do that early though I don't think we really need it */
-	mfmsr	r3
-	ori	r3,r3,MSR_RI
-	mtmsrd	r3			/* RI on */
-
-	/* Set up a paca value for this processor. */
-	LOAD_REG_ADDR(r4,paca_ptrs)	/* Load paca pointer		*/
-	ld	r4,0(r4)		/* Get base vaddr of paca_ptrs array */
-	sldi	r5,r24,3		/* get paca_ptrs[] index from cpu id */
-	ldx	r13,r5,r4		/* r13 = paca_ptrs[cpu id]       */
-	SET_PACA(r13)			/* Save vaddr of paca in an SPRG*/
-
-	/* Mark interrupts soft and hard disabled (they might be enabled
-	 * in the PACA when doing hotplug)
-	 */
-	li	r0,IRQS_DISABLED
-	stb	r0,PACAIRQSOFTMASK(r13)
-	li	r0,PACA_IRQ_HARD_DIS
-	stb	r0,PACAIRQHAPPENED(r13)
-
-	/* Create a temp kernel stack for use before relocation is on.	*/
-	ld	r1,PACAEMERGSP(r13)
-	subi	r1,r1,STACK_FRAME_OVERHEAD
-
-	b	__secondary_start
-
-#endif /* CONFIG_PPC_PMAC */
-
-/*
- * This function is called after the master CPU has released the
- * secondary processors.  The execution environment is relocation off.
- * The paca for this processor has the following fields initialized at
- * this point:
- *   1. Processor number
- *   2. Segment table pointer (virtual address)
- * On entry the following are set:
- *   r1	       = stack pointer (real addr of temp stack)
- *   r24       = cpu# (in Linux terms)
- *   r13       = paca virtual address
- *   SPRG_PACA = paca virtual address
- */
-	.section ".text";
-	.align 2 ;
-
-	.globl	__secondary_start
-__secondary_start:
-	/* Set thread priority to MEDIUM */
-	HMT_MEDIUM
-
-	/*
-	 * Do early setup for this CPU, in particular initialising the MMU so we
-	 * can turn it on below. This is a call to C, which is OK, we're still
-	 * running on the emergency stack.
-	 */
-	bl	early_setup_secondary
-
-	/*
-	 * The primary has initialized our kernel stack for us in the paca, grab
-	 * it and put it in r1. We must *not* use it until we turn on the MMU
-	 * below, because it may not be inside the RMO.
-	 */
-	ld	r1, PACAKSAVE(r13)
-
-	/* Clear backchain so we get nice backtraces */
-	li	r7,0
-	mtlr	r7
-
-	/* Mark interrupts soft and hard disabled (they might be enabled
-	 * in the PACA when doing hotplug)
-	 */
-	li	r7,IRQS_DISABLED
-	stb	r7,PACAIRQSOFTMASK(r13)
-	li	r0,PACA_IRQ_HARD_DIS
-	stb	r0,PACAIRQHAPPENED(r13)
-
-	/* enable MMU and jump to start_secondary */
-	LOAD_REG_ADDR(r3, start_secondary_prolog)
-	LOAD_REG_IMMEDIATE(r4, MSR_KERNEL)
-
-	mtspr	SPRN_SRR0,r3
-	mtspr	SPRN_SRR1,r4
-	RFI
-	b	.	/* prevent speculative execution */
-
-/* 
- * Running with relocation on at this point.  All we want to do is
- * zero the stack back-chain pointer and get the TOC virtual address
- * before going into C code.
- */
-start_secondary_prolog:
-	ld	r2,PACATOC(r13)
-	li	r3,0
-	std	r3,0(r1)		/* Zero the stack frame pointer	*/
-	bl	start_secondary
-	b	.
-/*
- * Reset stack pointer and call start_secondary
- * to continue with online operation when woken up
- * from cede in cpu offline.
- */
-_GLOBAL(start_secondary_resume)
-	ld	r1,PACAKSAVE(r13)	/* Reload kernel stack pointer */
-	li	r3,0
-	std	r3,0(r1)		/* Zero the stack frame pointer	*/
-	bl	start_secondary
-	b	.
-#endif
-
-/*
- * This subroutine clobbers r11 and r12
- */
-enable_64b_mode:
-	mfmsr	r11			/* grab the current MSR */
-#ifdef CONFIG_PPC_BOOK3E
-	oris	r11,r11,0x8000		/* CM bit set, we'll set ICM later */
-	mtmsr	r11
-#else /* CONFIG_PPC_BOOK3E */
-	li	r12,(MSR_64BIT | MSR_ISF)@highest
-	sldi	r12,r12,48
-	or	r11,r11,r12
-	mtmsrd	r11
-	isync
-#endif
-	blr
-
-/*
- * This puts the TOC pointer into r2, offset by 0x8000 (as expected
- * by the toolchain).  It computes the correct value for wherever we
- * are running at the moment, using position-independent code.
- *
- * Note: The compiler constructs pointers using offsets from the
- * TOC in -mcmodel=medium mode. After we relocate to 0 but before
- * the MMU is on we need our TOC to be a virtual address otherwise
- * these pointers will be real addresses which may get stored and
- * accessed later with the MMU on. We use tovirt() at the call
- * sites to handle this.
- */
-_GLOBAL(relative_toc)
-	mflr	r0
-	bcl	20,31,$+4
-0:	mflr	r11
-	ld	r2,(p_toc - 0b)(r11)
-	add	r2,r2,r11
-	mtlr	r0
-	blr
-
-.balign 8
-p_toc:	.8byte	__toc_start + 0x8000 - 0b
-
-/*
- * This is where the main kernel code starts.
- */
-__REF
-start_here_multiplatform:
-	/* set up the TOC */
-	bl      relative_toc
-	tovirt(r2,r2)
-
-	/* Clear out the BSS. It may have been done in prom_init,
-	 * already but that's irrelevant since prom_init will soon
-	 * be detached from the kernel completely. Besides, we need
-	 * to clear it now for kexec-style entry.
-	 */
-	LOAD_REG_ADDR(r11,__bss_stop)
-	LOAD_REG_ADDR(r8,__bss_start)
-	sub	r11,r11,r8		/* bss size			*/
-	addi	r11,r11,7		/* round up to an even double word */
-	srdi.	r11,r11,3		/* shift right by 3		*/
-	beq	4f
-	addi	r8,r8,-8
-	li	r0,0
-	mtctr	r11			/* zero this many doublewords	*/
-3:	stdu	r0,8(r8)
-	bdnz	3b
-4:
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
-	/* Setup OPAL entry */
-	LOAD_REG_ADDR(r11, opal)
-	std	r28,0(r11);
-	std	r29,8(r11);
-#endif
-
-#ifndef CONFIG_PPC_BOOK3E
-	mfmsr	r6
-	ori	r6,r6,MSR_RI
-	mtmsrd	r6			/* RI on */
-#endif
-
-#ifdef CONFIG_RELOCATABLE
-	/* Save the physical address we're running at in kernstart_addr */
-	LOAD_REG_ADDR(r4, kernstart_addr)
-	clrldi	r0,r25,2
-	std	r0,0(r4)
-#endif
-
-	/* set up a stack pointer */
-	LOAD_REG_ADDR(r3,init_thread_union)
-	LOAD_REG_IMMEDIATE(r1,THREAD_SIZE)
-	add	r1,r3,r1
-	li	r0,0
-	stdu	r0,-STACK_FRAME_OVERHEAD(r1)
-
-	/*
-	 * Do very early kernel initializations, including initial hash table
-	 * and SLB setup before we turn on relocation.
-	 */
-
-	/* Restore parameters passed from prom_init/kexec */
-	mr	r3,r31
-	LOAD_REG_ADDR(r12, DOTSYM(early_setup))
-	mtctr	r12
-	bctrl		/* also sets r13 and SPRG_PACA */
-
-	LOAD_REG_ADDR(r3, start_here_common)
-	ld	r4,PACAKMSR(r13)
-	mtspr	SPRN_SRR0,r3
-	mtspr	SPRN_SRR1,r4
-	RFI
-	b	.	/* prevent speculative execution */
-
-	.previous
-	/* This is where all platforms converge execution */
-
-start_here_common:
-	/* relocation is on at this point */
-	std	r1,PACAKSAVE(r13)
-
-	/* Load the TOC (virtual address) */
-	ld	r2,PACATOC(r13)
-
-	/* Mark interrupts soft and hard disabled (they might be enabled
-	 * in the PACA when doing hotplug)
-	 */
-	li	r0,IRQS_DISABLED
-	stb	r0,PACAIRQSOFTMASK(r13)
-	li	r0,PACA_IRQ_HARD_DIS
-	stb	r0,PACAIRQHAPPENED(r13)
-
-	/* Generic kernel entry */
-	bl	start_kernel
-
-	/* Not reached */
-0:	trap
-	EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the bss, which is page-aligned.
- */
-	.section ".bss"
-/*
- * pgd dir should be aligned to PGD_TABLE_SIZE which is 64K.
- * We will need to find a better way to fix this
- */
-	.align	16
-
-	.globl	swapper_pg_dir
-swapper_pg_dir:
-	.space	PGD_TABLE_SIZE
-
-	.globl	empty_zero_page
-empty_zero_page:
-	.space	PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
deleted file mode 100644
index 6f3e417f55a35cd63fa02be259615d3a549c2924..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/head_8xx.S
+++ /dev/null
@@ -1,908 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  PowerPC version
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
- *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
- *  Low-level exception handlers and MMU support
- *  rewritten by Paul Mackerras.
- *    Copyright (C) 1996 Paul Mackerras.
- *  MPC8xx modifications by Dan Malek
- *    Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
- *
- *  This file contains low-level support and setup for PowerPC 8xx
- *  embedded processors, including trap and interrupt dispatch.
- */
-
-#include <linux/init.h>
-#include <linux/magic.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/cache.h>
-#include <asm/pgtable.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ptrace.h>
-#include <asm/export.h>
-#include <asm/code-patching-asm.h>
-
-#include "head_32.h"
-
-#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
-/* By simply checking Address >= 0x80000000, we know if its a kernel address */
-#define SIMPLE_KERNEL_ADDRESS		1
-#endif
-
-/*
- * We need an ITLB miss handler for kernel addresses if:
- * - Either we have modules
- * - Or we have not pinned the first 8M
- */
-#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT) || \
-    defined(CONFIG_DEBUG_PAGEALLOC)
-#define ITLB_MISS_KERNEL	1
-#endif
-
-/*
- * Value for the bits that have fixed value in RPN entries.
- * Also used for tagging DAR for DTLBerror.
- */
-#define RPN_PATTERN	0x00f0
-
-#define PAGE_SHIFT_512K		19
-#define PAGE_SHIFT_8M		23
-
-	__HEAD
-_ENTRY(_stext);
-_ENTRY(_start);
-
-/* MPC8xx
- * This port was done on an MBX board with an 860.  Right now I only
- * support an ELF compressed (zImage) boot from EPPC-Bug because the
- * code there loads up some registers before calling us:
- *   r3: ptr to board info data
- *   r4: initrd_start or if no initrd then 0
- *   r5: initrd_end - unused if r4 is 0
- *   r6: Start of command line string
- *   r7: End of command line string
- *
- * I decided to use conditional compilation instead of checking PVR and
- * adding more processor specific branches around code I don't need.
- * Since this is an embedded processor, I also appreciate any memory
- * savings I can get.
- *
- * The MPC8xx does not have any BATs, but it supports large page sizes.
- * We first initialize the MMU to support 8M byte pages, then load one
- * entry into each of the instruction and data TLBs to map the first
- * 8M 1:1.  I also mapped an additional I/O space 1:1 so we can get to
- * the "internal" processor registers before MMU_init is called.
- *
- *	-- Dan
- */
-	.globl	__start
-__start:
-	mr	r31,r3			/* save device tree ptr */
-
-	/* We have to turn on the MMU right away so we get cache modes
-	 * set correctly.
-	 */
-	bl	initial_mmu
-
-/* We now have the lower 8 Meg mapped into TLB entries, and the caches
- * ready to work.
- */
-
-turn_on_mmu:
-	mfmsr	r0
-	ori	r0,r0,MSR_DR|MSR_IR
-	mtspr	SPRN_SRR1,r0
-	lis	r0,start_here@h
-	ori	r0,r0,start_here@l
-	mtspr	SPRN_SRR0,r0
-	rfi				/* enables MMU */
-
-
-#ifdef CONFIG_PERF_EVENTS
-	.align	4
-
-	.globl	itlb_miss_counter
-itlb_miss_counter:
-	.space	4
-
-	.globl	dtlb_miss_counter
-dtlb_miss_counter:
-	.space	4
-
-	.globl	instruction_counter
-instruction_counter:
-	.space	4
-#endif
-
-/* System reset */
-	EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD)
-
-/* Machine check */
-	. = 0x200
-MachineCheck:
-	EXCEPTION_PROLOG
-	mfspr r4,SPRN_DAR
-	stw r4,_DAR(r11)
-	li r5,RPN_PATTERN
-	mtspr SPRN_DAR,r5	/* Tag DAR, to be used in DTLB Error */
-	mfspr r5,SPRN_DSISR
-	stw r5,_DSISR(r11)
-	addi r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_STD(0x200, machine_check_exception)
-
-/* Data access exception.
- * This is "never generated" by the MPC8xx.
- */
-	. = 0x300
-DataAccess:
-
-/* Instruction access exception.
- * This is "never generated" by the MPC8xx.
- */
-	. = 0x400
-InstructionAccess:
-
-/* External interrupt */
-	EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
-
-/* Alignment exception */
-	. = 0x600
-Alignment:
-	EXCEPTION_PROLOG
-	mfspr	r4,SPRN_DAR
-	stw	r4,_DAR(r11)
-	li	r5,RPN_PATTERN
-	mtspr	SPRN_DAR,r5	/* Tag DAR, to be used in DTLB Error */
-	mfspr	r5,SPRN_DSISR
-	stw	r5,_DSISR(r11)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_STD(0x600, alignment_exception)
-
-/* Program check exception */
-	EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
-
-/* No FPU on MPC8xx.  This exception is not supposed to happen.
-*/
-	EXCEPTION(0x800, FPUnavailable, unknown_exception, EXC_XFER_STD)
-
-/* Decrementer */
-	EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
-
-	EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
-
-/* System call */
-	. = 0xc00
-SystemCall:
-	SYSCALL_ENTRY	0xc00
-
-/* Single step - not used on 601 */
-	EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
-	EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_STD)
-
-/* On the MPC8xx, this is a software emulation interrupt.  It occurs
- * for all unimplemented and illegal instructions.
- */
-	EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD)
-
-/* Called from DataStoreTLBMiss when perf TLB misses events are activated */
-#ifdef CONFIG_PERF_EVENTS
-	patch_site	0f, patch__dtlbmiss_perf
-0:	lwz	r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
-	addi	r10, r10, 1
-	stw	r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
-	mfspr	r10, SPRN_SPRG_SCRATCH0
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	rfi
-#endif
-
-	. = 0x1100
-/*
- * For the MPC8xx, this is a software tablewalk to load the instruction
- * TLB.  The task switch loads the M_TWB register with the pointer to the first
- * level table.
- * If we discover there is no second level table (value is zero) or if there
- * is an invalid pte, we load that into the TLB, which causes another fault
- * into the TLB Error interrupt where we can handle such problems.
- * We have to use the MD_xxx registers for the tablewalk because the
- * equivalent MI_xxx registers only perform the attribute functions.
- */
-
-#ifdef CONFIG_8xx_CPU15
-#define INVALIDATE_ADJACENT_PAGES_CPU15(addr)	\
-	addi	addr, addr, PAGE_SIZE;	\
-	tlbie	addr;			\
-	addi	addr, addr, -(PAGE_SIZE << 1);	\
-	tlbie	addr;			\
-	addi	addr, addr, PAGE_SIZE
-#else
-#define INVALIDATE_ADJACENT_PAGES_CPU15(addr)
-#endif
-
-InstructionTLBMiss:
-	mtspr	SPRN_SPRG_SCRATCH0, r10
-	mtspr	SPRN_SPRG_SCRATCH1, r11
-
-	/* If we are faulting a kernel address, we have to use the
-	 * kernel page tables.
-	 */
-	mfspr	r10, SPRN_SRR0	/* Get effective address of fault */
-	INVALIDATE_ADJACENT_PAGES_CPU15(r10)
-	mtspr	SPRN_MD_EPN, r10
-	/* Only modules will cause ITLB Misses as we always
-	 * pin the first 8MB of kernel memory */
-#ifdef ITLB_MISS_KERNEL
-	mfcr	r11
-#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
-	cmpi	cr0, r10, 0	/* Address >= 0x80000000 */
-#else
-	rlwinm	r10, r10, 16, 0xfff8
-	cmpli	cr0, r10, PAGE_OFFSET@h
-#ifndef CONFIG_PIN_TLB_TEXT
-	/* It is assumed that kernel code fits into the first 32M */
-0:	cmpli	cr7, r10, (PAGE_OFFSET + 0x2000000)@h
-	patch_site	0b, patch__itlbmiss_linmem_top
-#endif
-#endif
-#endif
-	mfspr	r10, SPRN_M_TWB	/* Get level 1 table */
-#ifdef ITLB_MISS_KERNEL
-#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT)
-	bge+	3f
-#else
-	blt+	3f
-#endif
-#ifndef CONFIG_PIN_TLB_TEXT
-	blt	cr7, ITLBMissLinear
-#endif
-	rlwinm	r10, r10, 0, 20, 31
-	oris	r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
-3:
-#endif
-	lwz	r10, (swapper_pg_dir-PAGE_OFFSET)@l(r10)	/* Get level 1 entry */
-	mtspr	SPRN_MI_TWC, r10	/* Set segment attributes */
-
-	mtspr	SPRN_MD_TWC, r10
-	mfspr	r10, SPRN_MD_TWC
-	lwz	r10, 0(r10)	/* Get the pte */
-#ifdef ITLB_MISS_KERNEL
-	mtcr	r11
-#endif
-	rlwinm	r11, r10, 32-7, _PAGE_PRESENT
-	and	r11, r11, r10
-	rlwimi	r10, r11, 0, _PAGE_PRESENT
-	/* The Linux PTE won't go exactly into the MMU TLB.
-	 * Software indicator bits 20 and 23 must be clear.
-	 * Software indicator bits 22, 24, 25, 26, and 27 must be
-	 * set.  All other Linux PTE bits control the behavior
-	 * of the MMU.
-	 */
-	rlwinm	r10, r10, 0, ~0x0f00	/* Clear bits 20-23 */
-	rlwimi	r10, r10, 4, 0x0400	/* Copy _PAGE_EXEC into bit 21 */
-	ori	r10, r10, RPN_PATTERN | 0x200 /* Set 22 and 24-27 */
-	mtspr	SPRN_MI_RPN, r10	/* Update TLB entry */
-
-	/* Restore registers */
-0:	mfspr	r10, SPRN_SPRG_SCRATCH0
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	rfi
-	patch_site	0b, patch__itlbmiss_exit_1
-
-#ifdef CONFIG_PERF_EVENTS
-	patch_site	0f, patch__itlbmiss_perf
-0:	lwz	r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)
-	addi	r10, r10, 1
-	stw	r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)
-	mfspr	r10, SPRN_SPRG_SCRATCH0
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	rfi
-#endif
-
-#ifndef CONFIG_PIN_TLB_TEXT
-ITLBMissLinear:
-	mtcr	r11
-#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23
-	patch_site	0f, patch__itlbmiss_linmem_top8
-
-	mfspr	r10, SPRN_SRR0
-0:	subis	r11, r10, (PAGE_OFFSET - 0x80000000)@ha
-	rlwinm	r11, r11, 4, MI_PS8MEG ^ MI_PS512K
-	ori	r11, r11, MI_PS512K | MI_SVALID
-	rlwinm	r10, r10, 0, 0x0ff80000	/* 8xx supports max 256Mb RAM */
-#else
-	/* Set 8M byte page and mark it valid */
-	li	r11, MI_PS8MEG | MI_SVALID
-	rlwinm	r10, r10, 20, 0x0f800000	/* 8xx supports max 256Mb RAM */
-#endif
-	mtspr	SPRN_MI_TWC, r11
-	ori	r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
-			  _PAGE_PRESENT
-	mtspr	SPRN_MI_RPN, r10	/* Update TLB entry */
-
-0:	mfspr	r10, SPRN_SPRG_SCRATCH0
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	rfi
-	patch_site	0b, patch__itlbmiss_exit_2
-#endif
-
-	. = 0x1200
-DataStoreTLBMiss:
-	mtspr	SPRN_SPRG_SCRATCH0, r10
-	mtspr	SPRN_SPRG_SCRATCH1, r11
-	mfcr	r11
-
-	/* If we are faulting a kernel address, we have to use the
-	 * kernel page tables.
-	 */
-	mfspr	r10, SPRN_MD_EPN
-	rlwinm	r10, r10, 16, 0xfff8
-	cmpli	cr0, r10, PAGE_OFFSET@h
-#ifndef CONFIG_PIN_TLB_IMMR
-	cmpli	cr6, r10, VIRT_IMMR_BASE@h
-#endif
-0:	cmpli	cr7, r10, (PAGE_OFFSET + 0x2000000)@h
-	patch_site	0b, patch__dtlbmiss_linmem_top
-
-	mfspr	r10, SPRN_M_TWB	/* Get level 1 table */
-	blt+	3f
-#ifndef CONFIG_PIN_TLB_IMMR
-0:	beq-	cr6, DTLBMissIMMR
-	patch_site	0b, patch__dtlbmiss_immr_jmp
-#endif
-	blt	cr7, DTLBMissLinear
-	rlwinm	r10, r10, 0, 20, 31
-	oris	r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
-3:
-	mtcr	r11
-	lwz	r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10)	/* Get level 1 entry */
-
-	mtspr	SPRN_MD_TWC, r11
-	mfspr	r10, SPRN_MD_TWC
-	lwz	r10, 0(r10)	/* Get the pte */
-
-	/* Insert the Guarded flag into the TWC from the Linux PTE.
-	 * It is bit 27 of both the Linux PTE and the TWC (at least
-	 * I got that right :-).  It will be better when we can put
-	 * this into the Linux pgd/pmd and load it in the operation
-	 * above.
-	 */
-	rlwimi	r11, r10, 0, _PAGE_GUARDED
-	mtspr	SPRN_MD_TWC, r11
-
-	/* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
-	 * We also need to know if the insn is a load/store, so:
-	 * Clear _PAGE_PRESENT and load that which will
-	 * trap into DTLB Error with store bit set accordinly.
-	 */
-	/* PRESENT=0x1, ACCESSED=0x20
-	 * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
-	 * r10 = (r10 & ~PRESENT) | r11;
-	 */
-	rlwinm	r11, r10, 32-7, _PAGE_PRESENT
-	and	r11, r11, r10
-	rlwimi	r10, r11, 0, _PAGE_PRESENT
-	/* The Linux PTE won't go exactly into the MMU TLB.
-	 * Software indicator bits 24, 25, 26, and 27 must be
-	 * set.  All other Linux PTE bits control the behavior
-	 * of the MMU.
-	 */
-	li	r11, RPN_PATTERN
-	rlwimi	r10, r11, 0, 24, 27	/* Set 24-27 */
-	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
-
-	/* Restore registers */
-	mtspr	SPRN_DAR, r11	/* Tag DAR */
-
-0:	mfspr	r10, SPRN_SPRG_SCRATCH0
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	rfi
-	patch_site	0b, patch__dtlbmiss_exit_1
-
-DTLBMissIMMR:
-	mtcr	r11
-	/* Set 512k byte guarded page and mark it valid */
-	li	r10, MD_PS512K | MD_GUARDED | MD_SVALID
-	mtspr	SPRN_MD_TWC, r10
-	mfspr	r10, SPRN_IMMR			/* Get current IMMR */
-	rlwinm	r10, r10, 0, 0xfff80000		/* Get 512 kbytes boundary */
-	ori	r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
-			  _PAGE_PRESENT | _PAGE_NO_CACHE
-	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
-
-	li	r11, RPN_PATTERN
-	mtspr	SPRN_DAR, r11	/* Tag DAR */
-
-0:	mfspr	r10, SPRN_SPRG_SCRATCH0
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	rfi
-	patch_site	0b, patch__dtlbmiss_exit_2
-
-DTLBMissLinear:
-	mtcr	r11
-	rlwinm	r10, r10, 20, 0x0f800000	/* 8xx supports max 256Mb RAM */
-#if defined(CONFIG_STRICT_KERNEL_RWX) && CONFIG_DATA_SHIFT < 23
-	patch_site	0f, patch__dtlbmiss_romem_top8
-
-0:	subis	r11, r10, (PAGE_OFFSET - 0x80000000)@ha
-	rlwinm	r11, r11, 0, 0xff800000
-	neg	r10, r11
-	or	r11, r11, r10
-	rlwinm	r11, r11, 4, MI_PS8MEG ^ MI_PS512K
-	ori	r11, r11, MI_PS512K | MI_SVALID
-	mfspr	r10, SPRN_MD_EPN
-	rlwinm	r10, r10, 0, 0x0ff80000	/* 8xx supports max 256Mb RAM */
-#else
-	/* Set 8M byte page and mark it valid */
-	li	r11, MD_PS8MEG | MD_SVALID
-#endif
-	mtspr	SPRN_MD_TWC, r11
-#ifdef CONFIG_STRICT_KERNEL_RWX
-	patch_site	0f, patch__dtlbmiss_romem_top
-
-0:	subis	r11, r10, 0
-	rlwimi	r10, r11, 11, _PAGE_RO
-#endif
-	ori	r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
-			  _PAGE_PRESENT
-	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
-
-	li	r11, RPN_PATTERN
-	mtspr	SPRN_DAR, r11	/* Tag DAR */
-
-0:	mfspr	r10, SPRN_SPRG_SCRATCH0
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	rfi
-	patch_site	0b, patch__dtlbmiss_exit_3
-
-/* This is an instruction TLB error on the MPC8xx.  This could be due
- * to many reasons, such as executing guarded memory or illegal instruction
- * addresses.  There is nothing to do but handle a big time error fault.
- */
-	. = 0x1300
-InstructionTLBError:
-	EXCEPTION_PROLOG
-	mr	r4,r12
-	andis.	r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
-	andis.	r10,r9,SRR1_ISI_NOPT@h
-	beq+	.Litlbie
-	tlbie	r4
-	/* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
-.Litlbie:
-	EXC_XFER_LITE(0x400, handle_page_fault)
-
-/* This is the data TLB error on the MPC8xx.  This could be due to
- * many reasons, including a dirty update to a pte.  We bail out to
- * a higher level function that can handle it.
- */
-	. = 0x1400
-DataTLBError:
-	mtspr	SPRN_SPRG_SCRATCH0, r10
-	mtspr	SPRN_SPRG_SCRATCH1, r11
-	mfcr	r10
-
-	mfspr	r11, SPRN_DAR
-	cmpwi	cr0, r11, RPN_PATTERN
-	beq-	FixupDAR	/* must be a buggy dcbX, icbi insn. */
-DARFixed:/* Return from dcbx instruction bug workaround */
-	EXCEPTION_PROLOG_1
-	EXCEPTION_PROLOG_2
-	mfspr	r5,SPRN_DSISR
-	stw	r5,_DSISR(r11)
-	mfspr	r4,SPRN_DAR
-	andis.	r10,r5,DSISR_NOHPTE@h
-	beq+	.Ldtlbie
-	tlbie	r4
-.Ldtlbie:
-	li	r10,RPN_PATTERN
-	mtspr	SPRN_DAR,r10	/* Tag DAR, to be used in DTLB Error */
-	/* 0x300 is DataAccess exception, needed by bad_page_fault() */
-	EXC_XFER_LITE(0x300, handle_page_fault)
-
-	EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
-
-/* On the MPC8xx, these next four traps are used for development
- * support of breakpoints and such.  Someday I will get around to
- * using them.
- */
-	. = 0x1c00
-DataBreakpoint:
-	mtspr	SPRN_SPRG_SCRATCH0, r10
-	mtspr	SPRN_SPRG_SCRATCH1, r11
-	mfcr	r10
-	mfspr	r11, SPRN_SRR0
-	cmplwi	cr0, r11, (.Ldtlbie - PAGE_OFFSET)@l
-	cmplwi	cr7, r11, (.Litlbie - PAGE_OFFSET)@l
-	beq-	cr0, 11f
-	beq-	cr7, 11f
-	EXCEPTION_PROLOG_1
-	EXCEPTION_PROLOG_2
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	mfspr	r4,SPRN_BAR
-	stw	r4,_DAR(r11)
-	mfspr	r5,SPRN_DSISR
-	EXC_XFER_STD(0x1c00, do_break)
-11:
-	mtcr	r10
-	mfspr	r10, SPRN_SPRG_SCRATCH0
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	rfi
-
-#ifdef CONFIG_PERF_EVENTS
-	. = 0x1d00
-InstructionBreakpoint:
-	mtspr	SPRN_SPRG_SCRATCH0, r10
-	lwz	r10, (instruction_counter - PAGE_OFFSET)@l(0)
-	addi	r10, r10, -1
-	stw	r10, (instruction_counter - PAGE_OFFSET)@l(0)
-	lis	r10, 0xffff
-	ori	r10, r10, 0x01
-	mtspr	SPRN_COUNTA, r10
-	mfspr	r10, SPRN_SPRG_SCRATCH0
-	rfi
-#else
-	EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
-#endif
-	EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
-
-	. = 0x2000
-
-/* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
- * by decoding the registers used by the dcbx instruction and adding them.
- * DAR is set to the calculated address.
- */
-FixupDAR:/* Entry point for dcbx workaround. */
-	mtspr	SPRN_M_TW, r10
-	/* fetch instruction from memory. */
-	mfspr	r10, SPRN_SRR0
-	mtspr	SPRN_MD_EPN, r10
-	rlwinm	r11, r10, 16, 0xfff8
-	cmpli	cr0, r11, PAGE_OFFSET@h
-	mfspr	r11, SPRN_M_TWB	/* Get level 1 table */
-	blt+	3f
-	rlwinm	r11, r10, 16, 0xfff8
-
-0:	cmpli	cr7, r11, (PAGE_OFFSET + 0x1800000)@h
-	patch_site	0b, patch__fixupdar_linmem_top
-
-	/* create physical page address from effective address */
-	tophys(r11, r10)
-	blt-	cr7, 201f
-	mfspr	r11, SPRN_M_TWB	/* Get level 1 table */
-	rlwinm	r11, r11, 0, 20, 31
-	oris	r11, r11, (swapper_pg_dir - PAGE_OFFSET)@ha
-3:
-	lwz	r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)	/* Get the level 1 entry */
-	mtspr	SPRN_MD_TWC, r11
-	mtcr	r11
-	mfspr	r11, SPRN_MD_TWC
-	lwz	r11, 0(r11)	/* Get the pte */
-	bt	28,200f		/* bit 28 = Large page (8M) */
-	bt	29,202f		/* bit 29 = Large page (8M or 512K) */
-	/* concat physical page address(r11) and page offset(r10) */
-	rlwimi	r11, r10, 0, 32 - PAGE_SHIFT, 31
-201:	lwz	r11,0(r11)
-/* Check if it really is a dcbx instruction. */
-/* dcbt and dcbtst does not generate DTLB Misses/Errors,
- * no need to include them here */
-	xoris	r10, r11, 0x7c00	/* check if major OP code is 31 */
-	rlwinm	r10, r10, 0, 21, 5
-	cmpwi	cr0, r10, 2028	/* Is dcbz? */
-	beq+	142f
-	cmpwi	cr0, r10, 940	/* Is dcbi? */
-	beq+	142f
-	cmpwi	cr0, r10, 108	/* Is dcbst? */
-	beq+	144f		/* Fix up store bit! */
-	cmpwi	cr0, r10, 172	/* Is dcbf? */
-	beq+	142f
-	cmpwi	cr0, r10, 1964	/* Is icbi? */
-	beq+	142f
-141:	mfspr	r10,SPRN_M_TW
-	b	DARFixed	/* Nope, go back to normal TLB processing */
-
-200:
-	/* concat physical page address(r11) and page offset(r10) */
-	rlwimi	r11, r10, 0, 32 - PAGE_SHIFT_8M, 31
-	b	201b
-
-202:
-	/* concat physical page address(r11) and page offset(r10) */
-	rlwimi	r11, r10, 0, 32 - PAGE_SHIFT_512K, 31
-	b	201b
-
-144:	mfspr	r10, SPRN_DSISR
-	rlwinm	r10, r10,0,7,5	/* Clear store bit for buggy dcbst insn */
-	mtspr	SPRN_DSISR, r10
-142:	/* continue, it was a dcbx, dcbi instruction. */
-	mfctr	r10
-	mtdar	r10			/* save ctr reg in DAR */
-	rlwinm	r10, r11, 24, 24, 28	/* offset into jump table for reg RB */
-	addi	r10, r10, 150f@l	/* add start of table */
-	mtctr	r10			/* load ctr with jump address */
-	xor	r10, r10, r10		/* sum starts at zero */
-	bctr				/* jump into table */
-150:
-	add	r10, r10, r0	;b	151f
-	add	r10, r10, r1	;b	151f
-	add	r10, r10, r2	;b	151f
-	add	r10, r10, r3	;b	151f
-	add	r10, r10, r4	;b	151f
-	add	r10, r10, r5	;b	151f
-	add	r10, r10, r6	;b	151f
-	add	r10, r10, r7	;b	151f
-	add	r10, r10, r8	;b	151f
-	add	r10, r10, r9	;b	151f
-	mtctr	r11	;b	154f	/* r10 needs special handling */
-	mtctr	r11	;b	153f	/* r11 needs special handling */
-	add	r10, r10, r12	;b	151f
-	add	r10, r10, r13	;b	151f
-	add	r10, r10, r14	;b	151f
-	add	r10, r10, r15	;b	151f
-	add	r10, r10, r16	;b	151f
-	add	r10, r10, r17	;b	151f
-	add	r10, r10, r18	;b	151f
-	add	r10, r10, r19	;b	151f
-	add	r10, r10, r20	;b	151f
-	add	r10, r10, r21	;b	151f
-	add	r10, r10, r22	;b	151f
-	add	r10, r10, r23	;b	151f
-	add	r10, r10, r24	;b	151f
-	add	r10, r10, r25	;b	151f
-	add	r10, r10, r26	;b	151f
-	add	r10, r10, r27	;b	151f
-	add	r10, r10, r28	;b	151f
-	add	r10, r10, r29	;b	151f
-	add	r10, r10, r30	;b	151f
-	add	r10, r10, r31
-151:
-	rlwinm. r11,r11,19,24,28	/* offset into jump table for reg RA */
-	beq	152f			/* if reg RA is zero, don't add it */
-	addi	r11, r11, 150b@l	/* add start of table */
-	mtctr	r11			/* load ctr with jump address */
-	rlwinm	r11,r11,0,16,10		/* make sure we don't execute this more than once */
-	bctr				/* jump into table */
-152:
-	mfdar	r11
-	mtctr	r11			/* restore ctr reg from DAR */
-	mtdar	r10			/* save fault EA to DAR */
-	mfspr	r10,SPRN_M_TW
-	b	DARFixed		/* Go back to normal TLB handling */
-
-	/* special handling for r10,r11 since these are modified already */
-153:	mfspr	r11, SPRN_SPRG_SCRATCH1	/* load r11 from SPRN_SPRG_SCRATCH1 */
-	add	r10, r10, r11	/* add it */
-	mfctr	r11		/* restore r11 */
-	b	151b
-154:	mfspr	r11, SPRN_SPRG_SCRATCH0	/* load r10 from SPRN_SPRG_SCRATCH0 */
-	add	r10, r10, r11	/* add it */
-	mfctr	r11		/* restore r11 */
-	b	151b
-
-/*
- * This is where the main kernel code starts.
- */
-start_here:
-	/* ptr to current */
-	lis	r2,init_task@h
-	ori	r2,r2,init_task@l
-
-	/* ptr to phys current thread */
-	tophys(r4,r2)
-	addi	r4,r4,THREAD	/* init task's THREAD */
-	mtspr	SPRN_SPRG_THREAD,r4
-
-	/* stack */
-	lis	r1,init_thread_union@ha
-	addi	r1,r1,init_thread_union@l
-	lis	r0, STACK_END_MAGIC@h
-	ori	r0, r0, STACK_END_MAGIC@l
-	stw	r0, 0(r1)
-	li	r0,0
-	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
-
-	lis	r6, swapper_pg_dir@ha
-	tophys(r6,r6)
-	mtspr	SPRN_M_TWB, r6
-
-	bl	early_init	/* We have to do this with MMU on */
-
-/*
- * Decide what sort of machine this is and initialize the MMU.
- */
-#ifdef CONFIG_KASAN
-	bl	kasan_early_init
-#endif
-	li	r3,0
-	mr	r4,r31
-	bl	machine_init
-	bl	MMU_init
-
-/*
- * Go back to running unmapped so we can load up new values
- * and change to using our exception vectors.
- * On the 8xx, all we have to do is invalidate the TLB to clear
- * the old 8M byte TLB mappings and load the page table base register.
- */
-	/* The right way to do this would be to track it down through
-	 * init's THREAD like the context switch code does, but this is
-	 * easier......until someone changes init's static structures.
-	 */
-	lis	r4,2f@h
-	ori	r4,r4,2f@l
-	tophys(r4,r4)
-	li	r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
-	mtspr	SPRN_SRR0,r4
-	mtspr	SPRN_SRR1,r3
-	rfi
-/* Load up the kernel context */
-2:
-	tlbia			/* Clear all TLB entries */
-	sync			/* wait for tlbia/tlbie to finish */
-
-	/* set up the PTE pointers for the Abatron bdiGDB.
-	*/
-	lis	r5, abatron_pteptrs@h
-	ori	r5, r5, abatron_pteptrs@l
-	stw	r5, 0xf0(0)	/* Must match your Abatron config file */
-	tophys(r5,r5)
-	lis	r6, swapper_pg_dir@h
-	ori	r6, r6, swapper_pg_dir@l
-	stw	r6, 0(r5)
-
-/* Now turn on the MMU for real! */
-	li	r4,MSR_KERNEL
-	lis	r3,start_kernel@h
-	ori	r3,r3,start_kernel@l
-	mtspr	SPRN_SRR0,r3
-	mtspr	SPRN_SRR1,r4
-	rfi			/* enable MMU and jump to start_kernel */
-
-/* Set up the initial MMU state so we can do the first level of
- * kernel initialization.  This maps the first 8 MBytes of memory 1:1
- * virtual to physical.  Also, set the cache mode since that is defined
- * by TLB entries and perform any additional mapping (like of the IMMR).
- * If configured to pin some TLBs, we pin the first 8 Mbytes of kernel,
- * 24 Mbytes of data, and the 512k IMMR space.  Anything not covered by
- * these mappings is mapped by page tables.
- */
-initial_mmu:
-	li	r8, 0
-	mtspr	SPRN_MI_CTR, r8		/* remove PINNED ITLB entries */
-	lis	r10, MD_RESETVAL@h
-#ifndef CONFIG_8xx_COPYBACK
-	oris	r10, r10, MD_WTDEF@h
-#endif
-	mtspr	SPRN_MD_CTR, r10	/* remove PINNED DTLB entries */
-
-	tlbia			/* Invalidate all TLB entries */
-#ifdef CONFIG_PIN_TLB_DATA
-	oris	r10, r10, MD_RSV4I@h
-	mtspr	SPRN_MD_CTR, r10	/* Set data TLB control */
-#endif
-
-	lis	r8, MI_APG_INIT@h	/* Set protection modes */
-	ori	r8, r8, MI_APG_INIT@l
-	mtspr	SPRN_MI_AP, r8
-	lis	r8, MD_APG_INIT@h
-	ori	r8, r8, MD_APG_INIT@l
-	mtspr	SPRN_MD_AP, r8
-
-	/* Map a 512k page for the IMMR to get the processor
-	 * internal registers (among other things).
-	 */
-#ifdef CONFIG_PIN_TLB_IMMR
-	oris	r10, r10, MD_RSV4I@h
-	ori	r10, r10, 0x1c00
-	mtspr	SPRN_MD_CTR, r10
-
-	mfspr	r9, 638			/* Get current IMMR */
-	andis.	r9, r9, 0xfff8		/* Get 512 kbytes boundary */
-
-	lis	r8, VIRT_IMMR_BASE@h	/* Create vaddr for TLB */
-	ori	r8, r8, MD_EVALID	/* Mark it valid */
-	mtspr	SPRN_MD_EPN, r8
-	li	r8, MD_PS512K | MD_GUARDED	/* Set 512k byte page */
-	ori	r8, r8, MD_SVALID	/* Make it valid */
-	mtspr	SPRN_MD_TWC, r8
-	mr	r8, r9			/* Create paddr for TLB */
-	ori	r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */
-	mtspr	SPRN_MD_RPN, r8
-#endif
-
-	/* Now map the lower RAM (up to 32 Mbytes) into the ITLB. */
-#ifdef CONFIG_PIN_TLB_TEXT
-	lis	r8, MI_RSV4I@h
-	ori	r8, r8, 0x1c00
-#endif
-	li	r9, 4				/* up to 4 pages of 8M */
-	mtctr	r9
-	lis	r9, KERNELBASE@h		/* Create vaddr for TLB */
-	li	r10, MI_PS8MEG | MI_SVALID	/* Set 8M byte page */
-	li	r11, MI_BOOTINIT		/* Create RPN for address 0 */
-	lis	r12, _einittext@h
-	ori	r12, r12, _einittext@l
-1:
-#ifdef CONFIG_PIN_TLB_TEXT
-	mtspr	SPRN_MI_CTR, r8	/* Set instruction MMU control */
-	addi	r8, r8, 0x100
-#endif
-
-	ori	r0, r9, MI_EVALID		/* Mark it valid */
-	mtspr	SPRN_MI_EPN, r0
-	mtspr	SPRN_MI_TWC, r10
-	mtspr	SPRN_MI_RPN, r11		/* Store TLB entry */
-	addis	r9, r9, 0x80
-	addis	r11, r11, 0x80
-
-	cmpl	cr0, r9, r12
-	bdnzf	gt, 1b
-
-	/* Since the cache is enabled according to the information we
-	 * just loaded into the TLB, invalidate and enable the caches here.
-	 * We should probably check/set other modes....later.
-	 */
-	lis	r8, IDC_INVALL@h
-	mtspr	SPRN_IC_CST, r8
-	mtspr	SPRN_DC_CST, r8
-	lis	r8, IDC_ENABLE@h
-	mtspr	SPRN_IC_CST, r8
-#ifdef CONFIG_8xx_COPYBACK
-	mtspr	SPRN_DC_CST, r8
-#else
-	/* For a debug option, I left this here to easily enable
-	 * the write through cache mode
-	 */
-	lis	r8, DC_SFWT@h
-	mtspr	SPRN_DC_CST, r8
-	lis	r8, IDC_ENABLE@h
-	mtspr	SPRN_DC_CST, r8
-#endif
-	/* Disable debug mode entry on breakpoints */
-	mfspr	r8, SPRN_DER
-#ifdef CONFIG_PERF_EVENTS
-	rlwinm	r8, r8, 0, ~0xc
-#else
-	rlwinm	r8, r8, 0, ~0x8
-#endif
-	mtspr	SPRN_DER, r8
-	blr
-
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the data segment,
- * which is page-aligned.
- */
-	.data
-	.globl	sdata
-sdata:
-	.globl	empty_zero_page
-	.align	PAGE_SHIFT
-empty_zero_page:
-	.space	PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
-
-	.globl	swapper_pg_dir
-swapper_pg_dir:
-	.space	PGD_TABLE_SIZE
-
-/* Room for two PTE table poiners, usually the kernel and current user
- * pointer to their respective root page table (pgdir).
- */
-	.globl	abatron_pteptrs
-abatron_pteptrs:
-	.space	8
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
deleted file mode 100644
index 519d49547e2f306f39af91f6b91af6585aae00fb..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ /dev/null
@@ -1,1244 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Kernel execution entry point code.
- *
- *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
- *	Initial PowerPC version.
- *    Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
- *	Rewritten for PReP
- *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
- *	Low-level exception handers, MMU support, and rewrite.
- *    Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
- *	PowerPC 8xx modifications.
- *    Copyright (c) 1998-1999 TiVo, Inc.
- *	PowerPC 403GCX modifications.
- *    Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
- *	PowerPC 403GCX/405GP modifications.
- *    Copyright 2000 MontaVista Software Inc.
- *	PPC405 modifications
- *	PowerPC 403GCX/405GP modifications.
- *	Author: MontaVista Software, Inc.
- *		frank_rowand@mvista.com or source@mvista.com
- *		debbie_chu@mvista.com
- *    Copyright 2002-2004 MontaVista Software, Inc.
- *	PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
- *    Copyright 2004 Freescale Semiconductor, Inc
- *	PowerPC e500 modifications, Kumar Gala <galak@kernel.crashing.org>
- */
-
-#include <linux/init.h>
-#include <linux/threads.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cache.h>
-#include <asm/ptrace.h>
-#include <asm/export.h>
-#include <asm/feature-fixups.h>
-#include "head_booke.h"
-
-/* As with the other PowerPC ports, it is expected that when code
- * execution begins here, the following registers contain valid, yet
- * optional, information:
- *
- *   r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.)
- *   r4 - Starting address of the init RAM disk
- *   r5 - Ending address of the init RAM disk
- *   r6 - Start of kernel command line string (e.g. "mem=128")
- *   r7 - End of kernel command line string
- *
- */
-	__HEAD
-_ENTRY(_stext);
-_ENTRY(_start);
-	/*
-	 * Reserve a word at a fixed location to store the address
-	 * of abatron_pteptrs
-	 */
-	nop
-
-	/* Translate device tree address to physical, save in r30/r31 */
-	bl	get_phys_addr
-	mr	r30,r3
-	mr	r31,r4
-
-	li	r25,0			/* phys kernel start (low) */
-	li	r24,0			/* CPU number */
-	li	r23,0			/* phys kernel start (high) */
-
-#ifdef CONFIG_RELOCATABLE
-	LOAD_REG_ADDR_PIC(r3, _stext)	/* Get our current runtime base */
-
-	/* Translate _stext address to physical, save in r23/r25 */
-	bl	get_phys_addr
-	mr	r23,r3
-	mr	r25,r4
-
-	bl	0f
-0:	mflr	r8
-	addis	r3,r8,(is_second_reloc - 0b)@ha
-	lwz	r19,(is_second_reloc - 0b)@l(r3)
-
-	/* Check if this is the second relocation. */
-	cmpwi	r19,1
-	bne	1f
-
-	/*
-	 * For the second relocation, we already get the real memstart_addr
-	 * from device tree. So we will map PAGE_OFFSET to memstart_addr,
-	 * then the virtual address of start kernel should be:
-	 *          PAGE_OFFSET + (kernstart_addr - memstart_addr)
-	 * Since the offset between kernstart_addr and memstart_addr should
-	 * never be beyond 1G, so we can just use the lower 32bit of them
-	 * for the calculation.
-	 */
-	lis	r3,PAGE_OFFSET@h
-
-	addis	r4,r8,(kernstart_addr - 0b)@ha
-	addi	r4,r4,(kernstart_addr - 0b)@l
-	lwz	r5,4(r4)
-
-	addis	r6,r8,(memstart_addr - 0b)@ha
-	addi	r6,r6,(memstart_addr - 0b)@l
-	lwz	r7,4(r6)
-
-	subf	r5,r7,r5
-	add	r3,r3,r5
-	b	2f
-
-1:
-	/*
-	 * We have the runtime (virutal) address of our base.
-	 * We calculate our shift of offset from a 64M page.
-	 * We could map the 64M page we belong to at PAGE_OFFSET and
-	 * get going from there.
-	 */
-	lis	r4,KERNELBASE@h
-	ori	r4,r4,KERNELBASE@l
-	rlwinm	r6,r25,0,0x3ffffff		/* r6 = PHYS_START % 64M */
-	rlwinm	r5,r4,0,0x3ffffff		/* r5 = KERNELBASE % 64M */
-	subf	r3,r5,r6			/* r3 = r6 - r5 */
-	add	r3,r4,r3			/* Required Virtual Address */
-
-2:	bl	relocate
-
-	/*
-	 * For the second relocation, we already set the right tlb entries
-	 * for the kernel space, so skip the code in fsl_booke_entry_mapping.S
-	*/
-	cmpwi	r19,1
-	beq	set_ivor
-#endif
-
-/* We try to not make any assumptions about how the boot loader
- * setup or used the TLBs.  We invalidate all mappings from the
- * boot loader and load a single entry in TLB1[0] to map the
- * first 64M of kernel memory.  Any boot info passed from the
- * bootloader needs to live in this first 64M.
- *
- * Requirement on bootloader:
- *  - The page we're executing in needs to reside in TLB1 and
- *    have IPROT=1.  If not an invalidate broadcast could
- *    evict the entry we're currently executing in.
- *
- *  r3 = Index of TLB1 were executing in
- *  r4 = Current MSR[IS]
- *  r5 = Index of TLB1 temp mapping
- *
- * Later in mapin_ram we will correctly map lowmem, and resize TLB1[0]
- * if needed
- */
-
-_ENTRY(__early_start)
-
-#define ENTRY_MAPPING_BOOT_SETUP
-#include "fsl_booke_entry_mapping.S"
-#undef ENTRY_MAPPING_BOOT_SETUP
-
-set_ivor:
-	/* Establish the interrupt vector offsets */
-	SET_IVOR(0,  CriticalInput);
-	SET_IVOR(1,  MachineCheck);
-	SET_IVOR(2,  DataStorage);
-	SET_IVOR(3,  InstructionStorage);
-	SET_IVOR(4,  ExternalInput);
-	SET_IVOR(5,  Alignment);
-	SET_IVOR(6,  Program);
-	SET_IVOR(7,  FloatingPointUnavailable);
-	SET_IVOR(8,  SystemCall);
-	SET_IVOR(9,  AuxillaryProcessorUnavailable);
-	SET_IVOR(10, Decrementer);
-	SET_IVOR(11, FixedIntervalTimer);
-	SET_IVOR(12, WatchdogTimer);
-	SET_IVOR(13, DataTLBError);
-	SET_IVOR(14, InstructionTLBError);
-	SET_IVOR(15, DebugCrit);
-
-	/* Establish the interrupt vector base */
-	lis	r4,interrupt_base@h	/* IVPR only uses the high 16-bits */
-	mtspr	SPRN_IVPR,r4
-
-	/* Setup the defaults for TLB entries */
-	li	r2,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l
-#ifdef CONFIG_E200
-	oris	r2,r2,MAS4_TLBSELD(1)@h
-#endif
-	mtspr	SPRN_MAS4, r2
-
-#if !defined(CONFIG_BDI_SWITCH)
-	/*
-	 * The Abatron BDI JTAG debugger does not tolerate others
-	 * mucking with the debug registers.
-	 */
-	lis	r2,DBCR0_IDM@h
-	mtspr	SPRN_DBCR0,r2
-	isync
-	/* clear any residual debug events */
-	li	r2,-1
-	mtspr	SPRN_DBSR,r2
-#endif
-
-#ifdef CONFIG_SMP
-	/* Check to see if we're the second processor, and jump
-	 * to the secondary_start code if so
-	 */
-	LOAD_REG_ADDR_PIC(r24, boot_cpuid)
-	lwz	r24, 0(r24)
-	cmpwi	r24, -1
-	mfspr   r24,SPRN_PIR
-	bne	__secondary_start
-#endif
-
-	/*
-	 * This is where the main kernel code starts.
-	 */
-
-	/* ptr to current */
-	lis	r2,init_task@h
-	ori	r2,r2,init_task@l
-
-	/* ptr to current thread */
-	addi	r4,r2,THREAD	/* init task's THREAD */
-	mtspr	SPRN_SPRG_THREAD,r4
-
-	/* stack */
-	lis	r1,init_thread_union@h
-	ori	r1,r1,init_thread_union@l
-	li	r0,0
-	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
-
-#ifdef CONFIG_SMP
-	stw	r24, TASK_CPU(r2)
-#endif
-
-	bl	early_init
-
-#ifdef CONFIG_KASAN
-	bl	kasan_early_init
-#endif
-#ifdef CONFIG_RELOCATABLE
-	mr	r3,r30
-	mr	r4,r31
-#ifdef CONFIG_PHYS_64BIT
-	mr	r5,r23
-	mr	r6,r25
-#else
-	mr	r5,r25
-#endif
-	bl	relocate_init
-#endif
-
-#ifdef CONFIG_DYNAMIC_MEMSTART
-	lis	r3,kernstart_addr@ha
-	la	r3,kernstart_addr@l(r3)
-#ifdef CONFIG_PHYS_64BIT
-	stw	r23,0(r3)
-	stw	r25,4(r3)
-#else
-	stw	r25,0(r3)
-#endif
-#endif
-
-/*
- * Decide what sort of machine this is and initialize the MMU.
- */
-	mr	r3,r30
-	mr	r4,r31
-	bl	machine_init
-	bl	MMU_init
-
-	/* Setup PTE pointers for the Abatron bdiGDB */
-	lis	r6, swapper_pg_dir@h
-	ori	r6, r6, swapper_pg_dir@l
-	lis	r5, abatron_pteptrs@h
-	ori	r5, r5, abatron_pteptrs@l
-	lis	r4, KERNELBASE@h
-	ori	r4, r4, KERNELBASE@l
-	stw	r5, 0(r4)	/* Save abatron_pteptrs at a fixed location */
-	stw	r6, 0(r5)
-
-	/* Let's move on */
-	lis	r4,start_kernel@h
-	ori	r4,r4,start_kernel@l
-	lis	r3,MSR_KERNEL@h
-	ori	r3,r3,MSR_KERNEL@l
-	mtspr	SPRN_SRR0,r4
-	mtspr	SPRN_SRR1,r3
-	rfi			/* change context and jump to start_kernel */
-
-/* Macros to hide the PTE size differences
- *
- * FIND_PTE -- walks the page tables given EA & pgdir pointer
- *   r10 -- EA of fault
- *   r11 -- PGDIR pointer
- *   r12 -- free
- *   label 2: is the bailout case
- *
- * if we find the pte (fall through):
- *   r11 is low pte word
- *   r12 is pointer to the pte
- *   r10 is the pshift from the PGD, if we're a hugepage
- */
-#ifdef CONFIG_PTE_64BIT
-#ifdef CONFIG_HUGETLB_PAGE
-#define FIND_PTE	\
-	rlwinm	r12, r10, 13, 19, 29;	/* Compute pgdir/pmd offset */	\
-	lwzx	r11, r12, r11;		/* Get pgd/pmd entry */		\
-	rlwinm.	r12, r11, 0, 0, 20;	/* Extract pt base address */	\
-	blt	1000f;			/* Normal non-huge page */	\
-	beq	2f;			/* Bail if no table */		\
-	oris	r11, r11, PD_HUGE@h;	/* Put back address bit */	\
-	andi.	r10, r11, HUGEPD_SHIFT_MASK@l; /* extract size field */	\
-	xor	r12, r10, r11;		/* drop size bits from pointer */ \
-	b	1001f;							\
-1000:	rlwimi	r12, r10, 23, 20, 28;	/* Compute pte address */	\
-	li	r10, 0;			/* clear r10 */			\
-1001:	lwz	r11, 4(r12);		/* Get pte entry */
-#else
-#define FIND_PTE	\
-	rlwinm	r12, r10, 13, 19, 29;	/* Compute pgdir/pmd offset */	\
-	lwzx	r11, r12, r11;		/* Get pgd/pmd entry */		\
-	rlwinm.	r12, r11, 0, 0, 20;	/* Extract pt base address */	\
-	beq	2f;			/* Bail if no table */		\
-	rlwimi	r12, r10, 23, 20, 28;	/* Compute pte address */	\
-	lwz	r11, 4(r12);		/* Get pte entry */
-#endif /* HUGEPAGE */
-#else /* !PTE_64BIT */
-#define FIND_PTE	\
-	rlwimi	r11, r10, 12, 20, 29;	/* Create L1 (pgdir/pmd) address */	\
-	lwz	r11, 0(r11);		/* Get L1 entry */			\
-	rlwinm.	r12, r11, 0, 0, 19;	/* Extract L2 (pte) base address */	\
-	beq	2f;			/* Bail if no table */			\
-	rlwimi	r12, r10, 22, 20, 29;	/* Compute PTE address */		\
-	lwz	r11, 0(r12);		/* Get Linux PTE */
-#endif
-
-/*
- * Interrupt vector entry code
- *
- * The Book E MMUs are always on so we don't need to handle
- * interrupts in real mode as with previous PPC processors. In
- * this case we handle interrupts in the kernel virtual address
- * space.
- *
- * Interrupt vectors are dynamically placed relative to the
- * interrupt prefix as determined by the address of interrupt_base.
- * The interrupt vectors offsets are programmed using the labels
- * for each interrupt vector entry.
- *
- * Interrupt vectors must be aligned on a 16 byte boundary.
- * We align on a 32 byte cache line boundary for good measure.
- */
-
-interrupt_base:
-	/* Critical Input Interrupt */
-	CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
-
-	/* Machine Check Interrupt */
-#ifdef CONFIG_E200
-	/* no RFMCI, MCSRRs on E200 */
-	CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
-			   machine_check_exception)
-#else
-	MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
-#endif
-
-	/* Data Storage Interrupt */
-	START_EXCEPTION(DataStorage)
-	NORMAL_EXCEPTION_PROLOG(DATA_STORAGE)
-	mfspr	r5,SPRN_ESR		/* Grab the ESR, save it, pass arg3 */
-	stw	r5,_ESR(r11)
-	mfspr	r4,SPRN_DEAR		/* Grab the DEAR, save it, pass arg2 */
-	andis.	r10,r5,(ESR_ILK|ESR_DLK)@h
-	bne	1f
-	EXC_XFER_LITE(0x0300, handle_page_fault)
-1:
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_LITE(0x0300, CacheLockingException)
-
-	/* Instruction Storage Interrupt */
-	INSTRUCTION_STORAGE_EXCEPTION
-
-	/* External Input Interrupt */
-	EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE)
-
-	/* Alignment Interrupt */
-	ALIGNMENT_EXCEPTION
-
-	/* Program Interrupt */
-	PROGRAM_EXCEPTION
-
-	/* Floating Point Unavailable Interrupt */
-#ifdef CONFIG_PPC_FPU
-	FP_UNAVAILABLE_EXCEPTION
-#else
-#ifdef CONFIG_E200
-	/* E200 treats 'normal' floating point instructions as FP Unavail exception */
-	EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
-		  program_check_exception, EXC_XFER_STD)
-#else
-	EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
-		  unknown_exception, EXC_XFER_STD)
-#endif
-#endif
-
-	/* System Call Interrupt */
-	START_EXCEPTION(SystemCall)
-	SYSCALL_ENTRY   0xc00 BOOKE_INTERRUPT_SYSCALL SPRN_SRR1
-
-	/* Auxiliary Processor Unavailable Interrupt */
-	EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
-		  unknown_exception, EXC_XFER_STD)
-
-	/* Decrementer Interrupt */
-	DECREMENTER_EXCEPTION
-
-	/* Fixed Internal Timer Interrupt */
-	/* TODO: Add FIT support */
-	EXCEPTION(0x3100, FIT, FixedIntervalTimer, \
-		  unknown_exception, EXC_XFER_STD)
-
-	/* Watchdog Timer Interrupt */
-#ifdef CONFIG_BOOKE_WDT
-	CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, WatchdogException)
-#else
-	CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, unknown_exception)
-#endif
-
-	/* Data TLB Error Interrupt */
-	START_EXCEPTION(DataTLBError)
-	mtspr	SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
-	mfspr	r10, SPRN_SPRG_THREAD
-	stw	r11, THREAD_NORMSAVE(0)(r10)
-#ifdef CONFIG_KVM_BOOKE_HV
-BEGIN_FTR_SECTION
-	mfspr	r11, SPRN_SRR1
-END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
-#endif
-	stw	r12, THREAD_NORMSAVE(1)(r10)
-	stw	r13, THREAD_NORMSAVE(2)(r10)
-	mfcr	r13
-	stw	r13, THREAD_NORMSAVE(3)(r10)
-	DO_KVM	BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1
-START_BTB_FLUSH_SECTION
-	mfspr r11, SPRN_SRR1
-	andi. r10,r11,MSR_PR
-	beq 1f
-	BTB_FLUSH(r10)
-1:
-END_BTB_FLUSH_SECTION
-	mfspr	r10, SPRN_DEAR		/* Get faulting address */
-
-	/* If we are faulting a kernel address, we have to use the
-	 * kernel page tables.
-	 */
-	lis	r11, PAGE_OFFSET@h
-	cmplw	5, r10, r11
-	blt	5, 3f
-	lis	r11, swapper_pg_dir@h
-	ori	r11, r11, swapper_pg_dir@l
-
-	mfspr	r12,SPRN_MAS1		/* Set TID to 0 */
-	rlwinm	r12,r12,0,16,1
-	mtspr	SPRN_MAS1,r12
-
-	b	4f
-
-	/* Get the PGD for the current thread */
-3:
-	mfspr	r11,SPRN_SPRG_THREAD
-	lwz	r11,PGDIR(r11)
-
-4:
-	/* Mask of required permission bits. Note that while we
-	 * do copy ESR:ST to _PAGE_RW position as trying to write
-	 * to an RO page is pretty common, we don't do it with
-	 * _PAGE_DIRTY. We could do it, but it's a fairly rare
-	 * event so I'd rather take the overhead when it happens
-	 * rather than adding an instruction here. We should measure
-	 * whether the whole thing is worth it in the first place
-	 * as we could avoid loading SPRN_ESR completely in the first
-	 * place...
-	 *
-	 * TODO: Is it worth doing that mfspr & rlwimi in the first
-	 *       place or can we save a couple of instructions here ?
-	 */
-	mfspr	r12,SPRN_ESR
-#ifdef CONFIG_PTE_64BIT
-	li	r13,_PAGE_PRESENT
-	oris	r13,r13,_PAGE_ACCESSED@h
-#else
-	li	r13,_PAGE_PRESENT|_PAGE_ACCESSED
-#endif
-	rlwimi	r13,r12,11,29,29
-
-	FIND_PTE
-	andc.	r13,r13,r11		/* Check permission */
-
-#ifdef CONFIG_PTE_64BIT
-#ifdef CONFIG_SMP
-	subf	r13,r11,r12		/* create false data dep */
-	lwzx	r13,r11,r13		/* Get upper pte bits */
-#else
-	lwz	r13,0(r12)		/* Get upper pte bits */
-#endif
-#endif
-
-	bne	2f			/* Bail if permission/valid mismach */
-
-	/* Jump to common tlb load */
-	b	finish_tlb_load
-2:
-	/* The bailout.  Restore registers to pre-exception conditions
-	 * and call the heavyweights to help us out.
-	 */
-	mfspr	r10, SPRN_SPRG_THREAD
-	lwz	r11, THREAD_NORMSAVE(3)(r10)
-	mtcr	r11
-	lwz	r13, THREAD_NORMSAVE(2)(r10)
-	lwz	r12, THREAD_NORMSAVE(1)(r10)
-	lwz	r11, THREAD_NORMSAVE(0)(r10)
-	mfspr	r10, SPRN_SPRG_RSCRATCH0
-	b	DataStorage
-
-	/* Instruction TLB Error Interrupt */
-	/*
-	 * Nearly the same as above, except we get our
-	 * information from different registers and bailout
-	 * to a different point.
-	 */
-	START_EXCEPTION(InstructionTLBError)
-	mtspr	SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
-	mfspr	r10, SPRN_SPRG_THREAD
-	stw	r11, THREAD_NORMSAVE(0)(r10)
-#ifdef CONFIG_KVM_BOOKE_HV
-BEGIN_FTR_SECTION
-	mfspr	r11, SPRN_SRR1
-END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
-#endif
-	stw	r12, THREAD_NORMSAVE(1)(r10)
-	stw	r13, THREAD_NORMSAVE(2)(r10)
-	mfcr	r13
-	stw	r13, THREAD_NORMSAVE(3)(r10)
-	DO_KVM	BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR1
-START_BTB_FLUSH_SECTION
-	mfspr r11, SPRN_SRR1
-	andi. r10,r11,MSR_PR
-	beq 1f
-	BTB_FLUSH(r10)
-1:
-END_BTB_FLUSH_SECTION
-
-	mfspr	r10, SPRN_SRR0		/* Get faulting address */
-
-	/* If we are faulting a kernel address, we have to use the
-	 * kernel page tables.
-	 */
-	lis	r11, PAGE_OFFSET@h
-	cmplw	5, r10, r11
-	blt	5, 3f
-	lis	r11, swapper_pg_dir@h
-	ori	r11, r11, swapper_pg_dir@l
-
-	mfspr	r12,SPRN_MAS1		/* Set TID to 0 */
-	rlwinm	r12,r12,0,16,1
-	mtspr	SPRN_MAS1,r12
-
-	/* Make up the required permissions for kernel code */
-#ifdef CONFIG_PTE_64BIT
-	li	r13,_PAGE_PRESENT | _PAGE_BAP_SX
-	oris	r13,r13,_PAGE_ACCESSED@h
-#else
-	li	r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
-#endif
-	b	4f
-
-	/* Get the PGD for the current thread */
-3:
-	mfspr	r11,SPRN_SPRG_THREAD
-	lwz	r11,PGDIR(r11)
-
-	/* Make up the required permissions for user code */
-#ifdef CONFIG_PTE_64BIT
-	li	r13,_PAGE_PRESENT | _PAGE_BAP_UX
-	oris	r13,r13,_PAGE_ACCESSED@h
-#else
-	li	r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
-#endif
-
-4:
-	FIND_PTE
-	andc.	r13,r13,r11		/* Check permission */
-
-#ifdef CONFIG_PTE_64BIT
-#ifdef CONFIG_SMP
-	subf	r13,r11,r12		/* create false data dep */
-	lwzx	r13,r11,r13		/* Get upper pte bits */
-#else
-	lwz	r13,0(r12)		/* Get upper pte bits */
-#endif
-#endif
-
-	bne	2f			/* Bail if permission mismach */
-
-	/* Jump to common TLB load point */
-	b	finish_tlb_load
-
-2:
-	/* The bailout.  Restore registers to pre-exception conditions
-	 * and call the heavyweights to help us out.
-	 */
-	mfspr	r10, SPRN_SPRG_THREAD
-	lwz	r11, THREAD_NORMSAVE(3)(r10)
-	mtcr	r11
-	lwz	r13, THREAD_NORMSAVE(2)(r10)
-	lwz	r12, THREAD_NORMSAVE(1)(r10)
-	lwz	r11, THREAD_NORMSAVE(0)(r10)
-	mfspr	r10, SPRN_SPRG_RSCRATCH0
-	b	InstructionStorage
-
-/* Define SPE handlers for e200 and e500v2 */
-#ifdef CONFIG_SPE
-	/* SPE Unavailable */
-	START_EXCEPTION(SPEUnavailable)
-	NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL)
-	beq	1f
-	bl	load_up_spe
-	b	fast_exception_return
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	EXC_XFER_LITE(0x2010, KernelSPE)
-#elif defined(CONFIG_SPE_POSSIBLE)
-	EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
-		  unknown_exception, EXC_XFER_STD)
-#endif /* CONFIG_SPE_POSSIBLE */
-
-	/* SPE Floating Point Data */
-#ifdef CONFIG_SPE
-	EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData,
-		  SPEFloatingPointException, EXC_XFER_STD)
-
-	/* SPE Floating Point Round */
-	EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
-		  SPEFloatingPointRoundException, EXC_XFER_STD)
-#elif defined(CONFIG_SPE_POSSIBLE)
-	EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData,
-		  unknown_exception, EXC_XFER_STD)
-	EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
-		  unknown_exception, EXC_XFER_STD)
-#endif /* CONFIG_SPE_POSSIBLE */
-
-
-	/* Performance Monitor */
-	EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \
-		  performance_monitor_exception, EXC_XFER_STD)
-
-	EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD)
-
-	CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \
-			   CriticalDoorbell, unknown_exception)
-
-	/* Debug Interrupt */
-	DEBUG_DEBUG_EXCEPTION
-	DEBUG_CRIT_EXCEPTION
-
-	GUEST_DOORBELL_EXCEPTION
-
-	CRITICAL_EXCEPTION(0, GUEST_DBELL_CRIT, CriticalGuestDoorbell, \
-			   unknown_exception)
-
-	/* Hypercall */
-	EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_STD)
-
-	/* Embedded Hypervisor Privilege */
-	EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_STD)
-
-interrupt_end:
-
-/*
- * Local functions
- */
-
-/*
- * Both the instruction and data TLB miss get to this
- * point to load the TLB.
- *	r10 - tsize encoding (if HUGETLB_PAGE) or available to use
- *	r11 - TLB (info from Linux PTE)
- *	r12 - available to use
- *	r13 - upper bits of PTE (if PTE_64BIT) or available to use
- *	CR5 - results of addr >= PAGE_OFFSET
- *	MAS0, MAS1 - loaded with proper value when we get here
- *	MAS2, MAS3 - will need additional info from Linux PTE
- *	Upon exit, we reload everything and RFI.
- */
-finish_tlb_load:
-#ifdef CONFIG_HUGETLB_PAGE
-	cmpwi	6, r10, 0			/* check for huge page */
-	beq	6, finish_tlb_load_cont    	/* !huge */
-
-	/* Alas, we need more scratch registers for hugepages */
-	mfspr	r12, SPRN_SPRG_THREAD
-	stw	r14, THREAD_NORMSAVE(4)(r12)
-	stw	r15, THREAD_NORMSAVE(5)(r12)
-	stw	r16, THREAD_NORMSAVE(6)(r12)
-	stw	r17, THREAD_NORMSAVE(7)(r12)
-
-	/* Get the next_tlbcam_idx percpu var */
-#ifdef CONFIG_SMP
-	lwz	r15, TASK_CPU-THREAD(r12)
-	lis     r14, __per_cpu_offset@h
-	ori     r14, r14, __per_cpu_offset@l
-	rlwinm  r15, r15, 2, 0, 29
-	lwzx    r16, r14, r15
-#else
-	li	r16, 0
-#endif
-	lis     r17, next_tlbcam_idx@h
-	ori	r17, r17, next_tlbcam_idx@l
-	add	r17, r17, r16			/* r17 = *next_tlbcam_idx */
-	lwz     r15, 0(r17)			/* r15 = next_tlbcam_idx */
-
-	lis	r14, MAS0_TLBSEL(1)@h		/* select TLB1 (TLBCAM) */
-	rlwimi	r14, r15, 16, 4, 15		/* next_tlbcam_idx entry */
-	mtspr	SPRN_MAS0, r14
-
-	/* Extract TLB1CFG(NENTRY) */
-	mfspr	r16, SPRN_TLB1CFG
-	andi.	r16, r16, 0xfff
-
-	/* Update next_tlbcam_idx, wrapping when necessary */
-	addi	r15, r15, 1
-	cmpw	r15, r16
-	blt 	100f
-	lis	r14, tlbcam_index@h
-	ori	r14, r14, tlbcam_index@l
-	lwz	r15, 0(r14)
-100:	stw	r15, 0(r17)
-
-	/*
-	 * Calc MAS1_TSIZE from r10 (which has pshift encoded)
-	 * tlb_enc = (pshift - 10).
-	 */
-	subi	r15, r10, 10
-	mfspr	r16, SPRN_MAS1
-	rlwimi	r16, r15, 7, 20, 24
-	mtspr	SPRN_MAS1, r16
-
-	/* copy the pshift for use later */
-	mr	r14, r10
-
-	/* fall through */
-
-#endif /* CONFIG_HUGETLB_PAGE */
-
-	/*
-	 * We set execute, because we don't have the granularity to
-	 * properly set this at the page level (Linux problem).
-	 * Many of these bits are software only.  Bits we don't set
-	 * here we (properly should) assume have the appropriate value.
-	 */
-finish_tlb_load_cont:
-#ifdef CONFIG_PTE_64BIT
-	rlwinm	r12, r11, 32-2, 26, 31	/* Move in perm bits */
-	andi.	r10, r11, _PAGE_DIRTY
-	bne	1f
-	li	r10, MAS3_SW | MAS3_UW
-	andc	r12, r12, r10
-1:	rlwimi	r12, r13, 20, 0, 11	/* grab RPN[32:43] */
-	rlwimi	r12, r11, 20, 12, 19	/* grab RPN[44:51] */
-2:	mtspr	SPRN_MAS3, r12
-BEGIN_MMU_FTR_SECTION
-	srwi	r10, r13, 12		/* grab RPN[12:31] */
-	mtspr	SPRN_MAS7, r10
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
-#else
-	li	r10, (_PAGE_EXEC | _PAGE_PRESENT)
-	mr	r13, r11
-	rlwimi	r10, r11, 31, 29, 29	/* extract _PAGE_DIRTY into SW */
-	and	r12, r11, r10
-	andi.	r10, r11, _PAGE_USER	/* Test for _PAGE_USER */
-	slwi	r10, r12, 1
-	or	r10, r10, r12
-	iseleq	r12, r12, r10
-	rlwimi	r13, r12, 0, 20, 31	/* Get RPN from PTE, merge w/ perms */
-	mtspr	SPRN_MAS3, r13
-#endif
-
-	mfspr	r12, SPRN_MAS2
-#ifdef CONFIG_PTE_64BIT
-	rlwimi	r12, r11, 32-19, 27, 31	/* extract WIMGE from pte */
-#else
-	rlwimi	r12, r11, 26, 27, 31	/* extract WIMGE from pte */
-#endif
-#ifdef CONFIG_HUGETLB_PAGE
-	beq	6, 3f			/* don't mask if page isn't huge */
-	li	r13, 1
-	slw	r13, r13, r14
-	subi	r13, r13, 1
-	rlwinm	r13, r13, 0, 0, 19	/* bottom bits used for WIMGE/etc */
-	andc	r12, r12, r13		/* mask off ea bits within the page */
-#endif
-3:	mtspr	SPRN_MAS2, r12
-
-#ifdef CONFIG_E200
-	/* Round robin TLB1 entries assignment */
-	mfspr	r12, SPRN_MAS0
-
-	/* Extract TLB1CFG(NENTRY) */
-	mfspr	r11, SPRN_TLB1CFG
-	andi.	r11, r11, 0xfff
-
-	/* Extract MAS0(NV) */
-	andi.	r13, r12, 0xfff
-	addi	r13, r13, 1
-	cmpw	0, r13, r11
-	addi	r12, r12, 1
-
-	/* check if we need to wrap */
-	blt	7f
-
-	/* wrap back to first free tlbcam entry */
-	lis	r13, tlbcam_index@ha
-	lwz	r13, tlbcam_index@l(r13)
-	rlwimi	r12, r13, 0, 20, 31
-7:
-	mtspr	SPRN_MAS0,r12
-#endif /* CONFIG_E200 */
-
-tlb_write_entry:
-	tlbwe
-
-	/* Done...restore registers and get out of here.  */
-	mfspr	r10, SPRN_SPRG_THREAD
-#ifdef CONFIG_HUGETLB_PAGE
-	beq	6, 8f /* skip restore for 4k page faults */
-	lwz	r14, THREAD_NORMSAVE(4)(r10)
-	lwz	r15, THREAD_NORMSAVE(5)(r10)
-	lwz	r16, THREAD_NORMSAVE(6)(r10)
-	lwz	r17, THREAD_NORMSAVE(7)(r10)
-#endif
-8:	lwz	r11, THREAD_NORMSAVE(3)(r10)
-	mtcr	r11
-	lwz	r13, THREAD_NORMSAVE(2)(r10)
-	lwz	r12, THREAD_NORMSAVE(1)(r10)
-	lwz	r11, THREAD_NORMSAVE(0)(r10)
-	mfspr	r10, SPRN_SPRG_RSCRATCH0
-	rfi					/* Force context change */
-
-#ifdef CONFIG_SPE
-/* Note that the SPE support is closely modeled after the AltiVec
- * support.  Changes to one are likely to be applicable to the
- * other!  */
-_GLOBAL(load_up_spe)
-/*
- * Disable SPE for the task which had SPE previously,
- * and save its SPE registers in its thread_struct.
- * Enables SPE for use in the kernel on return.
- * On SMP we know the SPE units are free, since we give it up every
- * switch.  -- Kumar
- */
-	mfmsr	r5
-	oris	r5,r5,MSR_SPE@h
-	mtmsr	r5			/* enable use of SPE now */
-	isync
-	/* enable use of SPE after return */
-	oris	r9,r9,MSR_SPE@h
-	mfspr	r5,SPRN_SPRG_THREAD	/* current task's THREAD (phys) */
-	li	r4,1
-	li	r10,THREAD_ACC
-	stw	r4,THREAD_USED_SPE(r5)
-	evlddx	evr4,r10,r5
-	evmra	evr4,evr4
-	REST_32EVRS(0,r10,r5,THREAD_EVR0)
-	blr
-
-/*
- * SPE unavailable trap from kernel - print a message, but let
- * the task use SPE in the kernel until it returns to user mode.
- */
-KernelSPE:
-	lwz	r3,_MSR(r1)
-	oris	r3,r3,MSR_SPE@h
-	stw	r3,_MSR(r1)	/* enable use of SPE after return */
-#ifdef CONFIG_PRINTK
-	lis	r3,87f@h
-	ori	r3,r3,87f@l
-	mr	r4,r2		/* current */
-	lwz	r5,_NIP(r1)
-	bl	printk
-#endif
-	b	ret_from_except
-#ifdef CONFIG_PRINTK
-87:	.string	"SPE used in kernel  (task=%p, pc=%x)  \n"
-#endif
-	.align	4,0
-
-#endif /* CONFIG_SPE */
-
-/*
- * Translate the effec addr in r3 to phys addr. The phys addr will be put
- * into r3(higher 32bit) and r4(lower 32bit)
- */
-get_phys_addr:
-	mfmsr	r8
-	mfspr	r9,SPRN_PID
-	rlwinm	r9,r9,16,0x3fff0000	/* turn PID into MAS6[SPID] */
-	rlwimi	r9,r8,28,0x00000001	/* turn MSR[DS] into MAS6[SAS] */
-	mtspr	SPRN_MAS6,r9
-
-	tlbsx	0,r3			/* must succeed */
-
-	mfspr	r8,SPRN_MAS1
-	mfspr	r12,SPRN_MAS3
-	rlwinm	r9,r8,25,0x1f		/* r9 = log2(page size) */
-	li	r10,1024
-	slw	r10,r10,r9		/* r10 = page size */
-	addi	r10,r10,-1
-	and	r11,r3,r10		/* r11 = page offset */
-	andc	r4,r12,r10		/* r4 = page base */
-	or	r4,r4,r11		/* r4 = devtree phys addr */
-#ifdef CONFIG_PHYS_64BIT
-	mfspr	r3,SPRN_MAS7
-#endif
-	blr
-
-/*
- * Global functions
- */
-
-#ifdef CONFIG_E200
-/* Adjust or setup IVORs for e200 */
-_GLOBAL(__setup_e200_ivors)
-	li	r3,DebugDebug@l
-	mtspr	SPRN_IVOR15,r3
-	li	r3,SPEUnavailable@l
-	mtspr	SPRN_IVOR32,r3
-	li	r3,SPEFloatingPointData@l
-	mtspr	SPRN_IVOR33,r3
-	li	r3,SPEFloatingPointRound@l
-	mtspr	SPRN_IVOR34,r3
-	sync
-	blr
-#endif
-
-#ifdef CONFIG_E500
-#ifndef CONFIG_PPC_E500MC
-/* Adjust or setup IVORs for e500v1/v2 */
-_GLOBAL(__setup_e500_ivors)
-	li	r3,DebugCrit@l
-	mtspr	SPRN_IVOR15,r3
-	li	r3,SPEUnavailable@l
-	mtspr	SPRN_IVOR32,r3
-	li	r3,SPEFloatingPointData@l
-	mtspr	SPRN_IVOR33,r3
-	li	r3,SPEFloatingPointRound@l
-	mtspr	SPRN_IVOR34,r3
-	li	r3,PerformanceMonitor@l
-	mtspr	SPRN_IVOR35,r3
-	sync
-	blr
-#else
-/* Adjust or setup IVORs for e500mc */
-_GLOBAL(__setup_e500mc_ivors)
-	li	r3,DebugDebug@l
-	mtspr	SPRN_IVOR15,r3
-	li	r3,PerformanceMonitor@l
-	mtspr	SPRN_IVOR35,r3
-	li	r3,Doorbell@l
-	mtspr	SPRN_IVOR36,r3
-	li	r3,CriticalDoorbell@l
-	mtspr	SPRN_IVOR37,r3
-	sync
-	blr
-
-/* setup ehv ivors for */
-_GLOBAL(__setup_ehv_ivors)
-	li	r3,GuestDoorbell@l
-	mtspr	SPRN_IVOR38,r3
-	li	r3,CriticalGuestDoorbell@l
-	mtspr	SPRN_IVOR39,r3
-	li	r3,Hypercall@l
-	mtspr	SPRN_IVOR40,r3
-	li	r3,Ehvpriv@l
-	mtspr	SPRN_IVOR41,r3
-	sync
-	blr
-#endif /* CONFIG_PPC_E500MC */
-#endif /* CONFIG_E500 */
-
-#ifdef CONFIG_SPE
-/*
- * extern void __giveup_spe(struct task_struct *prev)
- *
- */
-_GLOBAL(__giveup_spe)
-	addi	r3,r3,THREAD		/* want THREAD of task */
-	lwz	r5,PT_REGS(r3)
-	cmpi	0,r5,0
-	SAVE_32EVRS(0, r4, r3, THREAD_EVR0)
-	evxor	evr6, evr6, evr6	/* clear out evr6 */
-	evmwumiaa evr6, evr6, evr6	/* evr6 <- ACC = 0 * 0 + ACC */
-	li	r4,THREAD_ACC
-	evstddx	evr6, r4, r3		/* save off accumulator */
-	beq	1f
-	lwz	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-	lis	r3,MSR_SPE@h
-	andc	r4,r4,r3		/* disable SPE for previous task */
-	stw	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-	blr
-#endif /* CONFIG_SPE */
-
-/*
- * extern void abort(void)
- *
- * At present, this routine just applies a system reset.
- */
-_GLOBAL(abort)
-	li	r13,0
-	mtspr	SPRN_DBCR0,r13		/* disable all debug events */
-	isync
-	mfmsr	r13
-	ori	r13,r13,MSR_DE@l	/* Enable Debug Events */
-	mtmsr	r13
-	isync
-	mfspr	r13,SPRN_DBCR0
-	lis	r13,(DBCR0_IDM|DBCR0_RST_CHIP)@h
-	mtspr	SPRN_DBCR0,r13
-	isync
-
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
-	/* Context switch the PTE pointer for the Abatron BDI2000.
-	 * The PGDIR is the second parameter.
-	 */
-	lis	r5, abatron_pteptrs@h
-	ori	r5, r5, abatron_pteptrs@l
-	stw	r4, 0x4(r5)
-#endif
-	mtspr	SPRN_PID,r3
-	isync			/* Force context change */
-	blr
-
-#ifdef CONFIG_SMP
-/* When we get here, r24 needs to hold the CPU # */
-	.globl __secondary_start
-__secondary_start:
-	LOAD_REG_ADDR_PIC(r3, tlbcam_index)
-	lwz	r3,0(r3)
-	mtctr	r3
-	li	r26,0		/* r26 safe? */
-
-	bl	switch_to_as1
-	mr	r27,r3		/* tlb entry */
-	/* Load each CAM entry */
-1:	mr	r3,r26
-	bl	loadcam_entry
-	addi	r26,r26,1
-	bdnz	1b
-	mr	r3,r27		/* tlb entry */
-	LOAD_REG_ADDR_PIC(r4, memstart_addr)
-	lwz	r4,0(r4)
-	mr	r5,r25		/* phys kernel start */
-	rlwinm	r5,r5,0,~0x3ffffff	/* aligned 64M */
-	subf	r4,r5,r4	/* memstart_addr - phys kernel start */
-	li	r5,0		/* no device tree */
-	li	r6,0		/* not boot cpu */
-	bl	restore_to_as0
-
-
-	lis	r3,__secondary_hold_acknowledge@h
-	ori	r3,r3,__secondary_hold_acknowledge@l
-	stw	r24,0(r3)
-
-	li	r3,0
-	mr	r4,r24		/* Why? */
-	bl	call_setup_cpu
-
-	/* get current's stack and current */
-	lis	r2,secondary_current@ha
-	lwz	r2,secondary_current@l(r2)
-	lwz	r1,TASK_STACK(r2)
-
-	/* stack */
-	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
-	li	r0,0
-	stw	r0,0(r1)
-
-	/* ptr to current thread */
-	addi	r4,r2,THREAD	/* address of our thread_struct */
-	mtspr	SPRN_SPRG_THREAD,r4
-
-	/* Setup the defaults for TLB entries */
-	li	r4,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l
-	mtspr	SPRN_MAS4,r4
-
-	/* Jump to start_secondary */
-	lis	r4,MSR_KERNEL@h
-	ori	r4,r4,MSR_KERNEL@l
-	lis	r3,start_secondary@h
-	ori	r3,r3,start_secondary@l
-	mtspr	SPRN_SRR0,r3
-	mtspr	SPRN_SRR1,r4
-	sync
-	rfi
-	sync
-
-	.globl __secondary_hold_acknowledge
-__secondary_hold_acknowledge:
-	.long	-1
-#endif
-
-/*
- * Create a tlb entry with the same effective and physical address as
- * the tlb entry used by the current running code. But set the TS to 1.
- * Then switch to the address space 1. It will return with the r3 set to
- * the ESEL of the new created tlb.
- */
-_GLOBAL(switch_to_as1)
-	mflr	r5
-
-	/* Find a entry not used */
-	mfspr	r3,SPRN_TLB1CFG
-	andi.	r3,r3,0xfff
-	mfspr	r4,SPRN_PID
-	rlwinm	r4,r4,16,0x3fff0000	/* turn PID into MAS6[SPID] */
-	mtspr	SPRN_MAS6,r4
-1:	lis	r4,0x1000		/* Set MAS0(TLBSEL) = 1 */
-	addi	r3,r3,-1
-	rlwimi	r4,r3,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r3) */
-	mtspr	SPRN_MAS0,r4
-	tlbre
-	mfspr	r4,SPRN_MAS1
-	andis.	r4,r4,MAS1_VALID@h
-	bne	1b
-
-	/* Get the tlb entry used by the current running code */
-	bl	0f
-0:	mflr	r4
-	tlbsx	0,r4
-
-	mfspr	r4,SPRN_MAS1
-	ori	r4,r4,MAS1_TS		/* Set the TS = 1 */
-	mtspr	SPRN_MAS1,r4
-
-	mfspr	r4,SPRN_MAS0
-	rlwinm	r4,r4,0,~MAS0_ESEL_MASK
-	rlwimi	r4,r3,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r3) */
-	mtspr	SPRN_MAS0,r4
-	tlbwe
-	isync
-	sync
-
-	mfmsr	r4
-	ori	r4,r4,MSR_IS | MSR_DS
-	mtspr	SPRN_SRR0,r5
-	mtspr	SPRN_SRR1,r4
-	sync
-	rfi
-
-/*
- * Restore to the address space 0 and also invalidate the tlb entry created
- * by switch_to_as1.
- * r3 - the tlb entry which should be invalidated
- * r4 - __pa(PAGE_OFFSET in AS1) - __pa(PAGE_OFFSET in AS0)
- * r5 - device tree virtual address. If r4 is 0, r5 is ignored.
- * r6 - boot cpu
-*/
-_GLOBAL(restore_to_as0)
-	mflr	r0
-
-	bl	0f
-0:	mflr	r9
-	addi	r9,r9,1f - 0b
-
-	/*
-	 * We may map the PAGE_OFFSET in AS0 to a different physical address,
-	 * so we need calculate the right jump and device tree address based
-	 * on the offset passed by r4.
-	 */
-	add	r9,r9,r4
-	add	r5,r5,r4
-	add	r0,r0,r4
-
-2:	mfmsr	r7
-	li	r8,(MSR_IS | MSR_DS)
-	andc	r7,r7,r8
-
-	mtspr	SPRN_SRR0,r9
-	mtspr	SPRN_SRR1,r7
-	sync
-	rfi
-
-	/* Invalidate the temporary tlb entry for AS1 */
-1:	lis	r9,0x1000		/* Set MAS0(TLBSEL) = 1 */
-	rlwimi	r9,r3,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r3) */
-	mtspr	SPRN_MAS0,r9
-	tlbre
-	mfspr	r9,SPRN_MAS1
-	rlwinm	r9,r9,0,2,31		/* Clear MAS1 Valid and IPPROT */
-	mtspr	SPRN_MAS1,r9
-	tlbwe
-	isync
-
-	cmpwi	r4,0
-	cmpwi	cr1,r6,0
-	cror	eq,4*cr1+eq,eq
-	bne	3f			/* offset != 0 && is_boot_cpu */
-	mtlr	r0
-	blr
-
-	/*
-	 * The PAGE_OFFSET will map to a different physical address,
-	 * jump to _start to do another relocation again.
-	*/
-3:	mr	r3,r5
-	bl	_start
-
-/*
- * We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
-	.data
-	.align	12
-	.globl	sdata
-sdata:
-	.globl	empty_zero_page
-empty_zero_page:
-	.space	4096
-EXPORT_SYMBOL(empty_zero_page)
-	.globl	swapper_pg_dir
-swapper_pg_dir:
-	.space	PGD_TABLE_SIZE
-
-/*
- * Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
-	.space	8
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
deleted file mode 100644
index 0ffdd18b9f268b2b75f3c305505deb89fb6d09de..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/idle_6xx.S
+++ /dev/null
@@ -1,192 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  This file contains the power_save function for 6xx & 7xxx CPUs
- *  rewritten in assembler
- *
- *  Warning ! This code assumes that if your machine has a 750fx
- *  it will have PLL 1 set to low speed mode (used during NAP/DOZE).
- *  if this is not the case some additional changes will have to
- *  be done to check a runtime var (a bit like powersave-nap)
- */
-
-#include <linux/threads.h>
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/feature-fixups.h>
-
-	.text
-
-/*
- * Init idle, called at early CPU setup time from head.S for each CPU
- * Make sure no rest of NAP mode remains in HID0, save default
- * values for some CPU specific registers. Called with r24
- * containing CPU number and r3 reloc offset
- */
-_GLOBAL(init_idle_6xx)
-BEGIN_FTR_SECTION
-	mfspr	r4,SPRN_HID0
-	rlwinm	r4,r4,0,10,8	/* Clear NAP */
-	mtspr	SPRN_HID0, r4
-	b	1f
-END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
-	blr
-1:
-	slwi	r5,r24,2
-	add	r5,r5,r3
-BEGIN_FTR_SECTION
-	mfspr	r4,SPRN_MSSCR0
-	addis	r6,r5, nap_save_msscr0@ha
-	stw	r4,nap_save_msscr0@l(r6)
-END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
-BEGIN_FTR_SECTION
-	mfspr	r4,SPRN_HID1
-	addis	r6,r5,nap_save_hid1@ha
-	stw	r4,nap_save_hid1@l(r6)
-END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
-	blr
-
-/*
- * Here is the power_save_6xx function. This could eventually be
- * split into several functions & changing the function pointer
- * depending on the various features.
- */
-_GLOBAL(ppc6xx_idle)
-	/* Check if we can nap or doze, put HID0 mask in r3
-	 */
-	lis	r3, 0
-BEGIN_FTR_SECTION
-	lis	r3,HID0_DOZE@h
-END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE)
-BEGIN_FTR_SECTION
-	/* We must dynamically check for the NAP feature as it
-	 * can be cleared by CPU init after the fixups are done
-	 */
-	lis	r4,cur_cpu_spec@ha
-	lwz	r4,cur_cpu_spec@l(r4)
-	lwz	r4,CPU_SPEC_FEATURES(r4)
-	andi.	r0,r4,CPU_FTR_CAN_NAP
-	beq	1f
-	/* Now check if user or arch enabled NAP mode */
-	lis	r4,powersave_nap@ha
-	lwz	r4,powersave_nap@l(r4)
-	cmpwi	0,r4,0
-	beq	1f
-	lis	r3,HID0_NAP@h
-1:	
-END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
-	cmpwi	0,r3,0
-	beqlr
-
-	/* Some pre-nap cleanups needed on some CPUs */
-	andis.	r0,r3,HID0_NAP@h
-	beq	2f
-BEGIN_FTR_SECTION
-	/* Disable L2 prefetch on some 745x and try to ensure
-	 * L2 prefetch engines are idle. As explained by errata
-	 * text, we can't be sure they are, we just hope very hard
-	 * that well be enough (sic !). At least I noticed Apple
-	 * doesn't even bother doing the dcbf's here...
-	 */
-	mfspr	r4,SPRN_MSSCR0
-	rlwinm	r4,r4,0,0,29
-	sync
-	mtspr	SPRN_MSSCR0,r4
-	sync
-	isync
-	lis	r4,KERNELBASE@h
-	dcbf	0,r4
-	dcbf	0,r4
-	dcbf	0,r4
-	dcbf	0,r4
-END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
-2:
-BEGIN_FTR_SECTION
-	/* Go to low speed mode on some 750FX */
-	lis	r4,powersave_lowspeed@ha
-	lwz	r4,powersave_lowspeed@l(r4)
-	cmpwi	0,r4,0
-	beq	1f
-	mfspr	r4,SPRN_HID1
-	oris	r4,r4,0x0001
-	mtspr	SPRN_HID1,r4
-1:	
-END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
-
-	/* Go to NAP or DOZE now */	
-	mfspr	r4,SPRN_HID0
-	lis	r5,(HID0_NAP|HID0_SLEEP)@h
-BEGIN_FTR_SECTION
-	oris	r5,r5,HID0_DOZE@h
-END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE)
-	andc	r4,r4,r5
-	or	r4,r4,r3
-BEGIN_FTR_SECTION
-	oris	r4,r4,HID0_DPM@h	/* that should be done once for all  */
-END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM)
-	mtspr	SPRN_HID0,r4
-BEGIN_FTR_SECTION
-	DSSALL
-	sync
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-	lwz	r8,TI_LOCAL_FLAGS(r2)	/* set napping bit */
-	ori	r8,r8,_TLF_NAPPING	/* so when we take an exception */
-	stw	r8,TI_LOCAL_FLAGS(r2)	/* it will return to our caller */
-	mfmsr	r7
-	ori	r7,r7,MSR_EE
-	oris	r7,r7,MSR_POW@h
-1:	sync
-	mtmsr	r7
-	isync
-	b	1b
-
-/*
- * Return from NAP/DOZE mode, restore some CPU specific registers,
- * we are called with DR/IR still off and r2 containing physical
- * address of current.  R11 points to the exception frame (physical
- * address).  We have to preserve r10.
- */
-_GLOBAL(power_save_ppc32_restore)
-	lwz	r9,_LINK(r11)		/* interrupted in ppc6xx_idle: */
-	stw	r9,_NIP(r11)		/* make it do a blr */
-
-#ifdef CONFIG_SMP
-	lwz	r11,TASK_CPU(r2)	/* get cpu number * 4 */
-	slwi	r11,r11,2
-#else
-	li	r11,0
-#endif
-	/* Todo make sure all these are in the same page
-	 * and load r11 (@ha part + CPU offset) only once
-	 */
-BEGIN_FTR_SECTION
-	mfspr	r9,SPRN_HID0
-	andis.	r9,r9,HID0_NAP@h
-	beq	1f
-	addis	r9,r11,(nap_save_msscr0-KERNELBASE)@ha
-	lwz	r9,nap_save_msscr0@l(r9)
-	mtspr	SPRN_MSSCR0, r9
-	sync
-	isync
-1:
-END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
-BEGIN_FTR_SECTION
-	addis	r9,r11,(nap_save_hid1-KERNELBASE)@ha
-	lwz	r9,nap_save_hid1@l(r9)
-	mtspr	SPRN_HID1, r9
-END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
-	b	transfer_to_handler_cont
-
-	.data
-
-_GLOBAL(nap_save_msscr0)
-	.space	4*NR_CPUS
-
-_GLOBAL(nap_save_hid1)
-	.space	4*NR_CPUS
-
-_GLOBAL(powersave_lowspeed)
-	.long	0
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
deleted file mode 100644
index cc008de58b050925f81a2c05407f87e1bd2f3185..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/idle_book3e.S
+++ /dev/null
@@ -1,103 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright 2010 IBM Corp, Benjamin Herrenschmidt <benh@kernel.crashing.org>
- *
- * Generic idle routine for Book3E processors
- */
-
-#include <linux/threads.h>
-#include <asm/reg.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ppc-opcode.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-#include <asm/epapr_hcalls.h>
-#include <asm/hw_irq.h>
-
-/* 64-bit version only for now */
-#ifdef CONFIG_PPC64
-
-.macro BOOK3E_IDLE name loop
-_GLOBAL(\name)
-	/* Save LR for later */
-	mflr	r0
-	std	r0,16(r1)
-
-	/* Hard disable interrupts */
-	wrteei	0
-
-	/* Now check if an interrupt came in while we were soft disabled
-	 * since we may otherwise lose it (doorbells etc...).
-	 */
-	lbz	r3,PACAIRQHAPPENED(r13)
-	cmpwi	cr0,r3,0
-	bne	2f
-
-	/* Now we are going to mark ourselves as soft and hard enabled in
-	 * order to be able to take interrupts while asleep. We inform lockdep
-	 * of that. We don't actually turn interrupts on just yet tho.
-	 */
-#ifdef CONFIG_TRACE_IRQFLAGS
-	stdu    r1,-128(r1)
-	bl	trace_hardirqs_on
-	addi    r1,r1,128
-#endif
-	li	r0,IRQS_ENABLED
-	stb	r0,PACAIRQSOFTMASK(r13)
-	
-	/* Interrupts will make use return to LR, so get something we want
-	 * in there
-	 */
-	bl	1f
-
-	/* And return (interrupts are on) */
-	ld	r0,16(r1)
-	mtlr	r0
-	blr
-
-1:	/* Let's set the _TLF_NAPPING flag so interrupts make us return
-	 * to the right spot
-	*/
-	ld	r11, PACACURRENT(r13)
-	ld	r10,TI_LOCAL_FLAGS(r11)
-	ori	r10,r10,_TLF_NAPPING
-	std	r10,TI_LOCAL_FLAGS(r11)
-
-	/* We can now re-enable hard interrupts and go to sleep */
-	wrteei	1
-	\loop
-
-2:
-	lbz	r10,PACAIRQHAPPENED(r13)
-	ori	r10,r10,PACA_IRQ_HARD_DIS
-	stb	r10,PACAIRQHAPPENED(r13)
-	blr
-.endm
-
-.macro BOOK3E_IDLE_LOOP
-1:
-	PPC_WAIT(0)
-	b	1b
-.endm
-
-/* epapr_ev_idle_start below is patched with the proper hcall
-   opcodes during kernel initialization */
-.macro EPAPR_EV_IDLE_LOOP
-idle_loop:
-	LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE))
-
-.global epapr_ev_idle_start
-epapr_ev_idle_start:
-	li      r3, -1
-	nop
-	nop
-	nop
-	b       idle_loop
-.endm
-
-BOOK3E_IDLE epapr_ev_idle EPAPR_EV_IDLE_LOOP
-
-BOOK3E_IDLE book3e_idle BOOK3E_IDLE_LOOP
-
-#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
deleted file mode 100644
index d32751994a62c7ea8de19b3a5935637a6c9dbb2a..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/idle_book3s.S
+++ /dev/null
@@ -1,184 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  Copyright 2018, IBM Corporation.
- *
- *  This file contains general idle entry/exit functions to save
- *  and restore stack and NVGPRs which allows C code to call idle
- *  states that lose GPRs, and it will return transparently with
- *  SRR1 wakeup reason return value.
- *
- *  The platform / CPU caller must ensure SPRs and any other non-GPR
- *  state is saved and restored correctly, handle KVM, interrupts, etc.
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ppc-opcode.h>
-#include <asm/cpuidle.h>
-
-/*
- * Desired PSSCR in r3
- *
- * No state will be lost regardless of wakeup mechanism (interrupt or NIA).
- *
- * An EC=0 type wakeup will return with a value of 0. SRESET wakeup (which can
- * happen with xscom SRESET and possibly MCE) may clobber volatiles except LR,
- * and must blr, to return to caller with r3 set according to caller's expected
- * return code (for Book3S/64 that is SRR1).
- */
-_GLOBAL(isa300_idle_stop_noloss)
-	mtspr 	SPRN_PSSCR,r3
-	PPC_STOP
-	li	r3,0
-	blr
-
-/*
- * Desired PSSCR in r3
- *
- * GPRs may be lost, so they are saved here. Wakeup is by interrupt only.
- * The SRESET wakeup returns to this function's caller by calling
- * idle_return_gpr_loss with r3 set to desired return value.
- *
- * A wakeup without GPR loss may alteratively be handled as in
- * isa300_idle_stop_noloss and blr directly, as an optimisation.
- *
- * The caller is responsible for saving/restoring SPRs, MSR, timebase,
- * etc.
- */
-_GLOBAL(isa300_idle_stop_mayloss)
-	mtspr 	SPRN_PSSCR,r3
-	std	r1,PACAR1(r13)
-	mflr	r4
-	mfcr	r5
-	/* use stack red zone rather than a new frame for saving regs */
-	std	r2,-8*0(r1)
-	std	r14,-8*1(r1)
-	std	r15,-8*2(r1)
-	std	r16,-8*3(r1)
-	std	r17,-8*4(r1)
-	std	r18,-8*5(r1)
-	std	r19,-8*6(r1)
-	std	r20,-8*7(r1)
-	std	r21,-8*8(r1)
-	std	r22,-8*9(r1)
-	std	r23,-8*10(r1)
-	std	r24,-8*11(r1)
-	std	r25,-8*12(r1)
-	std	r26,-8*13(r1)
-	std	r27,-8*14(r1)
-	std	r28,-8*15(r1)
-	std	r29,-8*16(r1)
-	std	r30,-8*17(r1)
-	std	r31,-8*18(r1)
-	std	r4,-8*19(r1)
-	std	r5,-8*20(r1)
-	/* 168 bytes */
-	PPC_STOP
-	b	.	/* catch bugs */
-
-/*
- * Desired return value in r3
- *
- * The idle wakeup SRESET interrupt can call this after calling
- * to return to the idle sleep function caller with r3 as the return code.
- *
- * This must not be used if idle was entered via a _noloss function (use
- * a simple blr instead).
- */
-_GLOBAL(idle_return_gpr_loss)
-	ld	r1,PACAR1(r13)
-	ld	r4,-8*19(r1)
-	ld	r5,-8*20(r1)
-	mtlr	r4
-	mtcr	r5
-	/*
-	 * KVM nap requires r2 to be saved, rather than just restoring it
-	 * from PACATOC. This could be avoided for that less common case
-	 * if KVM saved its r2.
-	 */
-	ld	r2,-8*0(r1)
-	ld	r14,-8*1(r1)
-	ld	r15,-8*2(r1)
-	ld	r16,-8*3(r1)
-	ld	r17,-8*4(r1)
-	ld	r18,-8*5(r1)
-	ld	r19,-8*6(r1)
-	ld	r20,-8*7(r1)
-	ld	r21,-8*8(r1)
-	ld	r22,-8*9(r1)
-	ld	r23,-8*10(r1)
-	ld	r24,-8*11(r1)
-	ld	r25,-8*12(r1)
-	ld	r26,-8*13(r1)
-	ld	r27,-8*14(r1)
-	ld	r28,-8*15(r1)
-	ld	r29,-8*16(r1)
-	ld	r30,-8*17(r1)
-	ld	r31,-8*18(r1)
-	blr
-
-/*
- * This is the sequence required to execute idle instructions, as
- * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
- *
- * The 0(r1) slot is used to save r2 in isa206, so use that here.
- */
-#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST)			\
-	/* Magic NAP/SLEEP/WINKLE mode enter sequence */	\
-	std	r2,0(r1);					\
-	ptesync;						\
-	ld	r2,0(r1);					\
-236:	cmpd	cr0,r2,r2;					\
-	bne	236b;						\
-	IDLE_INST;						\
-	b	.	/* catch bugs */
-
-/*
- * Desired instruction type in r3
- *
- * GPRs may be lost, so they are saved here. Wakeup is by interrupt only.
- * The SRESET wakeup returns to this function's caller by calling
- * idle_return_gpr_loss with r3 set to desired return value.
- *
- * A wakeup without GPR loss may alteratively be handled as in
- * isa300_idle_stop_noloss and blr directly, as an optimisation.
- *
- * The caller is responsible for saving/restoring SPRs, MSR, timebase,
- * etc.
- *
- * This must be called in real-mode (MSR_IDLE).
- */
-_GLOBAL(isa206_idle_insn_mayloss)
-	std	r1,PACAR1(r13)
-	mflr	r4
-	mfcr	r5
-	/* use stack red zone rather than a new frame for saving regs */
-	std	r2,-8*0(r1)
-	std	r14,-8*1(r1)
-	std	r15,-8*2(r1)
-	std	r16,-8*3(r1)
-	std	r17,-8*4(r1)
-	std	r18,-8*5(r1)
-	std	r19,-8*6(r1)
-	std	r20,-8*7(r1)
-	std	r21,-8*8(r1)
-	std	r22,-8*9(r1)
-	std	r23,-8*10(r1)
-	std	r24,-8*11(r1)
-	std	r25,-8*12(r1)
-	std	r26,-8*13(r1)
-	std	r27,-8*14(r1)
-	std	r28,-8*15(r1)
-	std	r29,-8*16(r1)
-	std	r30,-8*17(r1)
-	std	r31,-8*18(r1)
-	std	r4,-8*19(r1)
-	std	r5,-8*20(r1)
-	cmpwi	r3,PNV_THREAD_NAP
-	bne	1f
-	IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
-1:	cmpwi	r3,PNV_THREAD_SLEEP
-	bne	2f
-	IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
-2:	IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
-
diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S
deleted file mode 100644
index 308f499e146c0b0f7623a3aac49b3d74fd3ba37e..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/idle_e500.S
+++ /dev/null
@@ -1,92 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
- * Dave Liu <daveliu@freescale.com>
- * copy from idle_6xx.S and modify for e500 based processor,
- * implement the power_save function in idle.
- */
-
-#include <linux/threads.h>
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/feature-fixups.h>
-
-	.text
-
-_GLOBAL(e500_idle)
-	lwz	r4,TI_LOCAL_FLAGS(r2)	/* set napping bit */
-	ori	r4,r4,_TLF_NAPPING	/* so when we take an exception */
-	stw	r4,TI_LOCAL_FLAGS(r2)	/* it will return to our caller */
-
-#ifdef CONFIG_PPC_E500MC
-	wrteei	1
-1:	wait
-
-	/*
-	 * Guard against spurious wakeups (e.g. from a hypervisor) --
-	 * any real interrupt will cause us to return to LR due to
-	 * _TLF_NAPPING.
-	 */
-	b	1b
-#else
-	/* Check if we can nap or doze, put HID0 mask in r3 */
-	lis	r3,0
-BEGIN_FTR_SECTION
-	lis	r3,HID0_DOZE@h
-END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE)
-
-BEGIN_FTR_SECTION
-	/* Now check if user enabled NAP mode */
-	lis	r4,powersave_nap@ha
-	lwz	r4,powersave_nap@l(r4)
-	cmpwi	0,r4,0
-	beq	1f
-	stwu	r1,-16(r1)
-	mflr	r0
-	stw	r0,20(r1)
-	bl	flush_dcache_L1
-	lwz	r0,20(r1)
-	addi	r1,r1,16
-	mtlr	r0
-	lis	r3,HID0_NAP@h
-END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
-1:
-	/* Go to NAP or DOZE now */
-	mfspr	r4,SPRN_HID0
-	rlwinm	r4,r4,0,~(HID0_DOZE|HID0_NAP|HID0_SLEEP)
-	or	r4,r4,r3
-	isync
-	mtspr	SPRN_HID0,r4
-	isync
-
-	mfmsr	r7
-	oris	r7,r7,MSR_WE@h
-	ori	r7,r7,MSR_EE
-	msync
-	mtmsr	r7
-	isync
-2:	b	2b
-#endif /* !E500MC */
-
-/*
- * Return from NAP/DOZE mode, restore some CPU specific registers,
- * r2 containing physical address of current.
- * r11 points to the exception frame (physical address).
- * We have to preserve r10.
- */
-_GLOBAL(power_save_ppc32_restore)
-	lwz	r9,_LINK(r11)		/* interrupted in e500_idle */
-	stw	r9,_NIP(r11)		/* make it do a blr */
-
-#ifdef CONFIG_SMP
-	lwz	r11,TASK_CPU(r2)		/* get cpu number * 4 */
-	slwi	r11,r11,2
-#else
-	li	r11,0
-#endif
-
-	b	transfer_to_handler_cont
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
deleted file mode 100644
index 33c625329078f0d802bdeef0d49cd485ff1414dc..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/idle_power4.S
+++ /dev/null
@@ -1,83 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  This file contains the power_save function for 970-family CPUs.
- */
-
-#include <linux/threads.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/irqflags.h>
-#include <asm/hw_irq.h>
-#include <asm/feature-fixups.h>
-
-#undef DEBUG
-
-	.text
-
-_GLOBAL(power4_idle)
-BEGIN_FTR_SECTION
-	blr
-END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
-	/* Now check if user or arch enabled NAP mode */
-	LOAD_REG_ADDRBASE(r3,powersave_nap)
-	lwz	r4,ADDROFF(powersave_nap)(r3)
-	cmpwi	0,r4,0
-	beqlr
-
-	/* This sequence is similar to prep_irq_for_idle() */
-
-	/* Hard disable interrupts */
-	mfmsr	r7
-	rldicl	r0,r7,48,1
-	rotldi	r0,r0,16
-	mtmsrd	r0,1
-
-	/* Check if something happened while soft-disabled */
-	lbz	r0,PACAIRQHAPPENED(r13)
-	cmpwi	cr0,r0,0
-	bne-	2f
-
-	/*
-	 * Soft-enable interrupts. This will make power4_fixup_nap return
-	 * to our caller with interrupts enabled (soft and hard). The caller
-	 * can cope with either interrupts disabled or enabled upon return.
-	 */
-#ifdef CONFIG_TRACE_IRQFLAGS
-	/* Tell the tracer interrupts are on, because idle responds to them. */
-	mflr	r0
-	std	r0,16(r1)
-	stdu    r1,-128(r1)
-	bl	trace_hardirqs_on
-	addi    r1,r1,128
-	ld	r0,16(r1)
-	mtlr	r0
-	mfmsr	r7
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
-	li	r0,IRQS_ENABLED
-	stb	r0,PACAIRQSOFTMASK(r13)	/* we'll hard-enable shortly */
-BEGIN_FTR_SECTION
-	DSSALL
-	sync
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-	ld	r9, PACA_THREAD_INFO(r13)
-	ld	r8,TI_LOCAL_FLAGS(r9)	/* set napping bit */
-	ori	r8,r8,_TLF_NAPPING	/* so when we take an exception */
-	std	r8,TI_LOCAL_FLAGS(r9)	/* it will return to our caller */
-	ori	r7,r7,MSR_EE
-	oris	r7,r7,MSR_POW@h
-1:	sync
-	isync
-	mtmsrd	r7
-	isync
-	b	1b
-
-2:	/* Return if an interrupt had happened while soft disabled */
-	/* Set the HARD_DIS flag because interrupts are now hard disabled */
-	ori	r0,r0,PACA_IRQ_HARD_DIS
-	stb	r0,PACAIRQHAPPENED(r13)
-	blr
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
deleted file mode 100644
index 7af6f8b50c5d638646f927fb1aa63eb74529a49b..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/kvm_emul.S
+++ /dev/null
@@ -1,354 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- * Copyright SUSE Linux Products GmbH 2010
- * Copyright 2010-2011 Freescale Semiconductor, Inc.
- *
- * Authors: Alexander Graf <agraf@suse.de>
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/kvm_asm.h>
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/asm-offsets.h>
-#include <asm/asm-compat.h>
-
-#define KVM_MAGIC_PAGE		(-4096)
-
-#ifdef CONFIG_64BIT
-#define LL64(reg, offs, reg2)	ld	reg, (offs)(reg2)
-#define STL64(reg, offs, reg2)	std	reg, (offs)(reg2)
-#else
-#define LL64(reg, offs, reg2)	lwz	reg, (offs + 4)(reg2)
-#define STL64(reg, offs, reg2)	stw	reg, (offs + 4)(reg2)
-#endif
-
-#define SCRATCH_SAVE							\
-	/* Enable critical section. We are critical if			\
-	   shared->critical == r1 */					\
-	STL64(r1, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0);		\
-									\
-	/* Save state */						\
-	PPC_STL	r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0);		\
-	PPC_STL	r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0);		\
-	mfcr	r31;							\
-	stw	r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0);
-
-#define SCRATCH_RESTORE							\
-	/* Restore state */						\
-	PPC_LL	r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0);		\
-	lwz	r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0);		\
-	mtcr	r30;							\
-	PPC_LL	r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0);		\
-									\
-	/* Disable critical section. We are critical if			\
-	   shared->critical == r1 and r2 is always != r1 */		\
-	STL64(r2, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0);
-
-.global kvm_template_start
-kvm_template_start:
-
-.global kvm_emulate_mtmsrd
-kvm_emulate_mtmsrd:
-
-	SCRATCH_SAVE
-
-	/* Put MSR & ~(MSR_EE|MSR_RI) in r31 */
-	LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
-	lis	r30, (~(MSR_EE | MSR_RI))@h
-	ori	r30, r30, (~(MSR_EE | MSR_RI))@l
-	and	r31, r31, r30
-
-	/* OR the register's (MSR_EE|MSR_RI) on MSR */
-kvm_emulate_mtmsrd_reg:
-	ori	r30, r0, 0
-	andi.	r30, r30, (MSR_EE|MSR_RI)
-	or	r31, r31, r30
-
-	/* Put MSR back into magic page */
-	STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
-
-	/* Check if we have to fetch an interrupt */
-	lwz	r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
-	cmpwi	r31, 0
-	beq+	no_check
-
-	/* Check if we may trigger an interrupt */
-	andi.	r30, r30, MSR_EE
-	beq	no_check
-
-	SCRATCH_RESTORE
-
-	/* Nag hypervisor */
-kvm_emulate_mtmsrd_orig_ins:
-	tlbsync
-
-	b	kvm_emulate_mtmsrd_branch
-
-no_check:
-
-	SCRATCH_RESTORE
-
-	/* Go back to caller */
-kvm_emulate_mtmsrd_branch:
-	b	.
-kvm_emulate_mtmsrd_end:
-
-.global kvm_emulate_mtmsrd_branch_offs
-kvm_emulate_mtmsrd_branch_offs:
-	.long (kvm_emulate_mtmsrd_branch - kvm_emulate_mtmsrd) / 4
-
-.global kvm_emulate_mtmsrd_reg_offs
-kvm_emulate_mtmsrd_reg_offs:
-	.long (kvm_emulate_mtmsrd_reg - kvm_emulate_mtmsrd) / 4
-
-.global kvm_emulate_mtmsrd_orig_ins_offs
-kvm_emulate_mtmsrd_orig_ins_offs:
-	.long (kvm_emulate_mtmsrd_orig_ins - kvm_emulate_mtmsrd) / 4
-
-.global kvm_emulate_mtmsrd_len
-kvm_emulate_mtmsrd_len:
-	.long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4
-
-
-#define MSR_SAFE_BITS (MSR_EE | MSR_RI)
-#define MSR_CRITICAL_BITS ~MSR_SAFE_BITS
-
-.global kvm_emulate_mtmsr
-kvm_emulate_mtmsr:
-
-	SCRATCH_SAVE
-
-	/* Fetch old MSR in r31 */
-	LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
-
-	/* Find the changed bits between old and new MSR */
-kvm_emulate_mtmsr_reg1:
-	ori	r30, r0, 0
-	xor	r31, r30, r31
-
-	/* Check if we need to really do mtmsr */
-	LOAD_REG_IMMEDIATE(r30, MSR_CRITICAL_BITS)
-	and.	r31, r31, r30
-
-	/* No critical bits changed? Maybe we can stay in the guest. */
-	beq	maybe_stay_in_guest
-
-do_mtmsr:
-
-	SCRATCH_RESTORE
-
-	/* Just fire off the mtmsr if it's critical */
-kvm_emulate_mtmsr_orig_ins:
-	mtmsr	r0
-
-	b	kvm_emulate_mtmsr_branch
-
-maybe_stay_in_guest:
-
-	/* Get the target register in r30 */
-kvm_emulate_mtmsr_reg2:
-	ori	r30, r0, 0
-
-	/* Put MSR into magic page because we don't call mtmsr */
-	STL64(r30, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
-
-	/* Check if we have to fetch an interrupt */
-	lwz	r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
-	cmpwi	r31, 0
-	beq+	no_mtmsr
-
-	/* Check if we may trigger an interrupt */
-	andi.	r31, r30, MSR_EE
-	bne	do_mtmsr
-
-no_mtmsr:
-
-	SCRATCH_RESTORE
-
-	/* Go back to caller */
-kvm_emulate_mtmsr_branch:
-	b	.
-kvm_emulate_mtmsr_end:
-
-.global kvm_emulate_mtmsr_branch_offs
-kvm_emulate_mtmsr_branch_offs:
-	.long (kvm_emulate_mtmsr_branch - kvm_emulate_mtmsr) / 4
-
-.global kvm_emulate_mtmsr_reg1_offs
-kvm_emulate_mtmsr_reg1_offs:
-	.long (kvm_emulate_mtmsr_reg1 - kvm_emulate_mtmsr) / 4
-
-.global kvm_emulate_mtmsr_reg2_offs
-kvm_emulate_mtmsr_reg2_offs:
-	.long (kvm_emulate_mtmsr_reg2 - kvm_emulate_mtmsr) / 4
-
-.global kvm_emulate_mtmsr_orig_ins_offs
-kvm_emulate_mtmsr_orig_ins_offs:
-	.long (kvm_emulate_mtmsr_orig_ins - kvm_emulate_mtmsr) / 4
-
-.global kvm_emulate_mtmsr_len
-kvm_emulate_mtmsr_len:
-	.long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
-
-#ifdef CONFIG_BOOKE
-
-/* also used for wrteei 1 */
-.global kvm_emulate_wrtee
-kvm_emulate_wrtee:
-
-	SCRATCH_SAVE
-
-	/* Fetch old MSR in r31 */
-	LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
-
-	/* Insert new MSR[EE] */
-kvm_emulate_wrtee_reg:
-	ori	r30, r0, 0
-	rlwimi	r31, r30, 0, MSR_EE
-
-	/*
-	 * If MSR[EE] is now set, check for a pending interrupt.
-	 * We could skip this if MSR[EE] was already on, but that
-	 * should be rare, so don't bother.
-	 */
-	andi.	r30, r30, MSR_EE
-
-	/* Put MSR into magic page because we don't call wrtee */
-	STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
-
-	beq	no_wrtee
-
-	/* Check if we have to fetch an interrupt */
-	lwz	r30, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
-	cmpwi	r30, 0
-	bne	do_wrtee
-
-no_wrtee:
-	SCRATCH_RESTORE
-
-	/* Go back to caller */
-kvm_emulate_wrtee_branch:
-	b	.
-
-do_wrtee:
-	SCRATCH_RESTORE
-
-	/* Just fire off the wrtee if it's critical */
-kvm_emulate_wrtee_orig_ins:
-	wrtee	r0
-
-	b	kvm_emulate_wrtee_branch
-
-kvm_emulate_wrtee_end:
-
-.global kvm_emulate_wrtee_branch_offs
-kvm_emulate_wrtee_branch_offs:
-	.long (kvm_emulate_wrtee_branch - kvm_emulate_wrtee) / 4
-
-.global kvm_emulate_wrtee_reg_offs
-kvm_emulate_wrtee_reg_offs:
-	.long (kvm_emulate_wrtee_reg - kvm_emulate_wrtee) / 4
-
-.global kvm_emulate_wrtee_orig_ins_offs
-kvm_emulate_wrtee_orig_ins_offs:
-	.long (kvm_emulate_wrtee_orig_ins - kvm_emulate_wrtee) / 4
-
-.global kvm_emulate_wrtee_len
-kvm_emulate_wrtee_len:
-	.long (kvm_emulate_wrtee_end - kvm_emulate_wrtee) / 4
-
-.global kvm_emulate_wrteei_0
-kvm_emulate_wrteei_0:
-	SCRATCH_SAVE
-
-	/* Fetch old MSR in r31 */
-	LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
-
-	/* Remove MSR_EE from old MSR */
-	rlwinm	r31, r31, 0, ~MSR_EE
-
-	/* Write new MSR value back */
-	STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
-
-	SCRATCH_RESTORE
-
-	/* Go back to caller */
-kvm_emulate_wrteei_0_branch:
-	b	.
-kvm_emulate_wrteei_0_end:
-
-.global kvm_emulate_wrteei_0_branch_offs
-kvm_emulate_wrteei_0_branch_offs:
-	.long (kvm_emulate_wrteei_0_branch - kvm_emulate_wrteei_0) / 4
-
-.global kvm_emulate_wrteei_0_len
-kvm_emulate_wrteei_0_len:
-	.long (kvm_emulate_wrteei_0_end - kvm_emulate_wrteei_0) / 4
-
-#endif /* CONFIG_BOOKE */
-
-#ifdef CONFIG_PPC_BOOK3S_32
-
-.global kvm_emulate_mtsrin
-kvm_emulate_mtsrin:
-
-	SCRATCH_SAVE
-
-	LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
-	andi.	r31, r31, MSR_DR | MSR_IR
-	beq	kvm_emulate_mtsrin_reg1
-
-	SCRATCH_RESTORE
-
-kvm_emulate_mtsrin_orig_ins:
-	nop
-	b	kvm_emulate_mtsrin_branch
-
-kvm_emulate_mtsrin_reg1:
-	/* rX >> 26 */
-	rlwinm  r30,r0,6,26,29
-
-kvm_emulate_mtsrin_reg2:
-	stw	r0, (KVM_MAGIC_PAGE + KVM_MAGIC_SR)(r30)
-
-	SCRATCH_RESTORE
-
-	/* Go back to caller */
-kvm_emulate_mtsrin_branch:
-	b	.
-kvm_emulate_mtsrin_end:
-
-.global kvm_emulate_mtsrin_branch_offs
-kvm_emulate_mtsrin_branch_offs:
-	.long (kvm_emulate_mtsrin_branch - kvm_emulate_mtsrin) / 4
-
-.global kvm_emulate_mtsrin_reg1_offs
-kvm_emulate_mtsrin_reg1_offs:
-	.long (kvm_emulate_mtsrin_reg1 - kvm_emulate_mtsrin) / 4
-
-.global kvm_emulate_mtsrin_reg2_offs
-kvm_emulate_mtsrin_reg2_offs:
-	.long (kvm_emulate_mtsrin_reg2 - kvm_emulate_mtsrin) / 4
-
-.global kvm_emulate_mtsrin_orig_ins_offs
-kvm_emulate_mtsrin_orig_ins_offs:
-	.long (kvm_emulate_mtsrin_orig_ins - kvm_emulate_mtsrin) / 4
-
-.global kvm_emulate_mtsrin_len
-kvm_emulate_mtsrin_len:
-	.long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
-
-#endif /* CONFIG_PPC_BOOK3S_32 */
-
-	.balign 4
-	.global kvm_tmp
-kvm_tmp:
-	.space	(64 * 1024)
-
-.global kvm_tmp_end
-kvm_tmp_end:
-
-.global kvm_template_end
-kvm_template_end:
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
deleted file mode 100644
index 2020d255585fad156c944eb1c640f8dc8fe67a55..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/l2cr_6xx.S
+++ /dev/null
@@ -1,459 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
-	L2CR functions
-	Copyright © 1997-1998 by PowerLogix R & D, Inc.
-
-*/
-/*
-	Thur, Dec. 12, 1998.
-	- First public release, contributed by PowerLogix.
-	***********
-	Sat, Aug. 7, 1999.
-	- Terry: Made sure code disabled interrupts before running. (Previously
-			it was assumed interrupts were already disabled).
-	- Terry: Updated for tentative G4 support.  4MB of memory is now flushed
-			instead of 2MB.  (Prob. only 3 is necessary).
-	- Terry: Updated for workaround to HID0[DPM] processor bug
-			during global invalidates.
-	***********
-	Thu, July 13, 2000.
-	- Terry: Added isync to correct for an errata.
-
-	22 August 2001.
-	- DanM: Finally added the 7450 patch I've had for the past
-		several months.  The L2CR is similar, but I'm going
-		to assume the user of this functions knows what they
-		are doing.
-
-	Author:	Terry Greeniaus (tgree@phys.ualberta.ca)
-	Please e-mail updates to this file to me, thanks!
-*/
-#include <asm/processor.h>
-#include <asm/cputable.h>
-#include <asm/ppc_asm.h>
-#include <asm/cache.h>
-#include <asm/page.h>
-#include <asm/feature-fixups.h>
-
-/* Usage:
-
-	When setting the L2CR register, you must do a few special
-	things.  If you are enabling the cache, you must perform a
-	global invalidate.  If you are disabling the cache, you must
-	flush the cache contents first.  This routine takes care of
-	doing these things.  When first enabling the cache, make sure
-	you pass in the L2CR you want, as well as passing in the
-	global invalidate bit set.  A global invalidate will only be
-	performed if the L2I bit is set in applyThis.  When enabling
-	the cache, you should also set the L2E bit in applyThis.  If
-	you want to modify the L2CR contents after the cache has been
-	enabled, the recommended procedure is to first call
-	__setL2CR(0) to disable the cache and then call it again with
-	the new values for L2CR.  Examples:
-
-	_setL2CR(0)		- disables the cache
-	_setL2CR(0xB3A04000)	- enables my G3 upgrade card:
-				- L2E set to turn on the cache
-				- L2SIZ set to 1MB
-				- L2CLK set to 1:1
-				- L2RAM set to pipelined synchronous late-write
-				- L2I set to perform a global invalidation
-				- L2OH set to 0.5 nS
-				- L2DF set because this upgrade card
-				  requires it
-
-	A similar call should work for your card.  You need to know
-	the correct setting for your card and then place them in the
-	fields I have outlined above.  Other fields support optional
-	features, such as L2DO which caches only data, or L2TS which
-	causes cache pushes from the L1 cache to go to the L2 cache
-	instead of to main memory.
-
-IMPORTANT:
-	Starting with the 7450, the bits in this register have moved
-	or behave differently.  The Enable, Parity Enable, Size,
-	and L2 Invalidate are the only bits that have not moved.
-	The size is read-only for these processors with internal L2
-	cache, and the invalidate is a control as well as status.
-		-- Dan
-
-*/
-/*
- * Summary: this procedure ignores the L2I bit in the value passed in,
- * flushes the cache if it was already enabled, always invalidates the
- * cache, then enables the cache if the L2E bit is set in the value
- * passed in.
- *   -- paulus.
- */
-_GLOBAL(_set_L2CR)
-	/* Make sure this is a 750 or 7400 chip */
-BEGIN_FTR_SECTION
-	li	r3,-1
-	blr
-END_FTR_SECTION_IFCLR(CPU_FTR_L2CR)
-
-	mflr	r9
-
-	/* Stop DST streams */
-BEGIN_FTR_SECTION
-	DSSALL
-	sync
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-
-	/* Turn off interrupts and data relocation. */
-	mfmsr	r7		/* Save MSR in r7 */
-	rlwinm	r4,r7,0,17,15
-	rlwinm	r4,r4,0,28,26	/* Turn off DR bit */
-	sync
-	mtmsr	r4
-	isync
-
-	/* Before we perform the global invalidation, we must disable dynamic
-	 * power management via HID0[DPM] to work around a processor bug where
-	 * DPM can possibly interfere with the state machine in the processor
-	 * that invalidates the L2 cache tags.
-	 */
-	mfspr	r8,SPRN_HID0		/* Save HID0 in r8 */
-	rlwinm	r4,r8,0,12,10		/* Turn off HID0[DPM] */
-	sync
-	mtspr	SPRN_HID0,r4		/* Disable DPM */
-	sync
-
-	/* Get the current enable bit of the L2CR into r4 */
-	mfspr	r4,SPRN_L2CR
-
-	/* Tweak some bits */
-	rlwinm	r5,r3,0,0,0		/* r5 contains the new enable bit */
-	rlwinm	r3,r3,0,11,9		/* Turn off the invalidate bit */
-	rlwinm	r3,r3,0,1,31		/* Turn off the enable bit */
-
-	/* Check to see if we need to flush */
-	rlwinm.	r4,r4,0,0,0
-	beq	2f
-
-	/* Flush the cache. First, read the first 4MB of memory (physical) to
-	 * put new data in the cache.  (Actually we only need
-	 * the size of the L2 cache plus the size of the L1 cache, but 4MB will
-	 * cover everything just to be safe).
-	 */
-
-	 /**** Might be a good idea to set L2DO here - to prevent instructions
-	       from getting into the cache.  But since we invalidate
-	       the next time we enable the cache it doesn't really matter.
-	       Don't do this unless you accommodate all processor variations.
-	       The bit moved on the 7450.....
-	  ****/
-
-BEGIN_FTR_SECTION
-	/* Disable L2 prefetch on some 745x and try to ensure
-	 * L2 prefetch engines are idle. As explained by errata
-	 * text, we can't be sure they are, we just hope very hard
-	 * that well be enough (sic !). At least I noticed Apple
-	 * doesn't even bother doing the dcbf's here...
-	 */
-	mfspr	r4,SPRN_MSSCR0
-	rlwinm	r4,r4,0,0,29
-	sync
-	mtspr	SPRN_MSSCR0,r4
-	sync
-	isync
-	lis	r4,KERNELBASE@h
-	dcbf	0,r4
-	dcbf	0,r4
-	dcbf	0,r4
-	dcbf	0,r4
-END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
-
-	/* TODO: use HW flush assist when available */
-
-	lis	r4,0x0002
-	mtctr	r4
-	li	r4,0
-1:
-	lwzx	r0,0,r4
-	addi	r4,r4,32		/* Go to start of next cache line */
-	bdnz	1b
-	isync
-
-	/* Now, flush the first 4MB of memory */
-	lis	r4,0x0002
-	mtctr	r4
-	li	r4,0
-	sync
-1:
-	dcbf	0,r4
-	addi	r4,r4,32		/* Go to start of next cache line */
-	bdnz	1b
-
-2:
-	/* Set up the L2CR configuration bits (and switch L2 off) */
-	/* CPU errata: Make sure the mtspr below is already in the
-	 * L1 icache
-	 */
-	b	20f
-	.balign	L1_CACHE_BYTES
-22:
-	sync
-	mtspr	SPRN_L2CR,r3
-	sync
-	b	23f
-20:
-	b	21f
-21:	sync
-	isync
-	b	22b
-
-23:
-	/* Perform a global invalidation */
-	oris	r3,r3,0x0020
-	sync
-	mtspr	SPRN_L2CR,r3
-	sync
-	isync				/* For errata */
-
-BEGIN_FTR_SECTION
-	/* On the 7450, we wait for the L2I bit to clear......
-	*/
-10:	mfspr	r3,SPRN_L2CR
-	andis.	r4,r3,0x0020
-	bne	10b
-	b	11f
-END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
-
-	/* Wait for the invalidation to complete */
-3:	mfspr	r3,SPRN_L2CR
-	rlwinm.	r4,r3,0,31,31
-	bne	3b
-
-11:	rlwinm	r3,r3,0,11,9		/* Turn off the L2I bit */
-	sync
-	mtspr	SPRN_L2CR,r3
-	sync
-
-	/* See if we need to enable the cache */
-	cmplwi	r5,0
-	beq	4f
-
-	/* Enable the cache */
-	oris	r3,r3,0x8000
-	mtspr	SPRN_L2CR,r3
-	sync
-	
-	/* Enable L2 HW prefetch on 744x/745x */
-BEGIN_FTR_SECTION
-	mfspr	r3,SPRN_MSSCR0
-	ori	r3,r3,3
-	sync
-	mtspr	SPRN_MSSCR0,r3
-	sync
-	isync
-END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
-4:
-
-	/* Restore HID0[DPM] to whatever it was before */
-	sync
-	mtspr	1008,r8
-	sync
-
-	/* Restore MSR (restores EE and DR bits to original state) */
-	SYNC
-	mtmsr	r7
-	isync
-
-	mtlr	r9
-	blr
-
-_GLOBAL(_get_L2CR)
-	/* Return the L2CR contents */
-	li	r3,0
-BEGIN_FTR_SECTION
-	mfspr	r3,SPRN_L2CR
-END_FTR_SECTION_IFSET(CPU_FTR_L2CR)
-	blr
-
-
-/*
- * Here is a similar routine for dealing with the L3 cache
- * on the 745x family of chips
- */
-
-_GLOBAL(_set_L3CR)
-	/* Make sure this is a 745x chip */
-BEGIN_FTR_SECTION
-	li	r3,-1
-	blr
-END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
-
-	/* Turn off interrupts and data relocation. */
-	mfmsr	r7		/* Save MSR in r7 */
-	rlwinm	r4,r7,0,17,15
-	rlwinm	r4,r4,0,28,26	/* Turn off DR bit */
-	sync
-	mtmsr	r4
-	isync
-
-	/* Stop DST streams */
-	DSSALL
-	sync
-
-	/* Get the current enable bit of the L3CR into r4 */
-	mfspr	r4,SPRN_L3CR
-
-	/* Tweak some bits */
-	rlwinm	r5,r3,0,0,0		/* r5 contains the new enable bit */
-	rlwinm	r3,r3,0,22,20		/* Turn off the invalidate bit */
-	rlwinm	r3,r3,0,2,31		/* Turn off the enable & PE bits */
-	rlwinm	r3,r3,0,5,3		/* Turn off the clken bit */
-	/* Check to see if we need to flush */
-	rlwinm.	r4,r4,0,0,0
-	beq	2f
-
-	/* Flush the cache.
-	 */
-
-	/* TODO: use HW flush assist */
-
-	lis	r4,0x0008
-	mtctr	r4
-	li	r4,0
-1:
-	lwzx	r0,0,r4
-	dcbf	0,r4
-	addi	r4,r4,32		/* Go to start of next cache line */
-	bdnz	1b
-
-2:
-	/* Set up the L3CR configuration bits (and switch L3 off) */
-	sync
-	mtspr	SPRN_L3CR,r3
-	sync
-
-	oris	r3,r3,L3CR_L3RES@h		/* Set reserved bit 5 */
-	mtspr	SPRN_L3CR,r3
-	sync
-	oris	r3,r3,L3CR_L3CLKEN@h		/* Set clken */
-	mtspr	SPRN_L3CR,r3
-	sync
-
-	/* Wait for stabilize */
-	li	r0,256
-	mtctr	r0
-1:	bdnz	1b
-
-	/* Perform a global invalidation */
-	ori	r3,r3,0x0400
-	sync
-	mtspr	SPRN_L3CR,r3
-	sync
-	isync
-
-	/* We wait for the L3I bit to clear...... */
-10:	mfspr	r3,SPRN_L3CR
-	andi.	r4,r3,0x0400
-	bne	10b
-
-	/* Clear CLKEN */
-	rlwinm	r3,r3,0,5,3		/* Turn off the clken bit */
-	mtspr	SPRN_L3CR,r3
-	sync
-
-	/* Wait for stabilize */
-	li	r0,256
-	mtctr	r0
-1:	bdnz	1b
-
-	/* See if we need to enable the cache */
-	cmplwi	r5,0
-	beq	4f
-
-	/* Enable the cache */
-	oris	r3,r3,(L3CR_L3E | L3CR_L3CLKEN)@h
-	mtspr	SPRN_L3CR,r3
-	sync
-
-	/* Wait for stabilize */
-	li	r0,256
-	mtctr	r0
-1:	bdnz	1b
-
-	/* Restore MSR (restores EE and DR bits to original state) */
-4:	SYNC
-	mtmsr	r7
-	isync
-	blr
-
-_GLOBAL(_get_L3CR)
-	/* Return the L3CR contents */
-	li	r3,0
-BEGIN_FTR_SECTION
-	mfspr	r3,SPRN_L3CR
-END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
-	blr
-
-/* --- End of PowerLogix code ---
- */
-
-
-/* flush_disable_L1()	- Flush and disable L1 cache
- *
- * clobbers r0, r3, ctr, cr0
- * Must be called with interrupts disabled and MMU enabled.
- */
-_GLOBAL(__flush_disable_L1)
-	/* Stop pending alitvec streams and memory accesses */
-BEGIN_FTR_SECTION
-	DSSALL
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
- 	sync
-
-	/* Load counter to 0x4000 cache lines (512k) and
-	 * load cache with datas
-	 */
-	li	r3,0x4000	/* 512kB / 32B */
-	mtctr	r3
-	lis	r3,KERNELBASE@h
-1:
-	lwz	r0,0(r3)
-	addi	r3,r3,0x0020	/* Go to start of next cache line */
-	bdnz	1b
-	isync
-	sync
-
-	/* Now flush those cache lines */
-	li	r3,0x4000	/* 512kB / 32B */
-	mtctr	r3
-	lis	r3,KERNELBASE@h
-1:
-	dcbf	0,r3
-	addi	r3,r3,0x0020	/* Go to start of next cache line */
-	bdnz	1b
-	sync
-
-	/* We can now disable the L1 cache (HID0:DCE, HID0:ICE) */
-	mfspr	r3,SPRN_HID0
-	rlwinm	r3,r3,0,18,15
-	mtspr	SPRN_HID0,r3
-	sync
-	isync
- 	blr
-
-/* inval_enable_L1	- Invalidate and enable L1 cache
- *
- * Assumes L1 is already disabled and MSR:EE is off
- *
- * clobbers r3
- */
-_GLOBAL(__inval_enable_L1)
-	/* Enable and then Flash inval the instruction & data cache */
-	mfspr	r3,SPRN_HID0
-	ori	r3,r3, HID0_ICE|HID0_ICFI|HID0_DCE|HID0_DCI
-	sync
-	isync
-	mtspr	SPRN_HID0,r3
-	xori	r3,r3, HID0_ICFI|HID0_DCI
-	mtspr	SPRN_HID0,r3
-	sync
-
- 	blr
-
-
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
deleted file mode 100644
index 974f65f79a8efeb6e0c05042e169fefe694bb434..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/misc.S
+++ /dev/null
@@ -1,116 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains miscellaneous low-level functions.
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
- * and Paul Mackerras.
- *
- * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
- *
- * setjmp/longjmp code by Paul Mackerras.
- */
-#include <asm/ppc_asm.h>
-#include <asm/unistd.h>
-#include <asm/asm-compat.h>
-#include <asm/asm-offsets.h>
-#include <asm/export.h>
-
-	.text
-
-/*
- * Returns (address we are running at) - (address we were linked at)
- * for use before the text and data are mapped to KERNELBASE.
-
- * add_reloc_offset(x) returns x + reloc_offset().
- */
-
-_GLOBAL(reloc_offset)
-	li	r3, 0
-_GLOBAL(add_reloc_offset)
-	mflr	r0
-	bl	1f
-1:	mflr	r5
-	PPC_LL	r4,(2f-1b)(r5)
-	subf	r5,r4,r5
-	add	r3,r3,r5
-	mtlr	r0
-	blr
-
-	.align	3
-2:	PPC_LONG 1b
-
-_GLOBAL(setjmp)
-	mflr	r0
-	PPC_STL	r0,0(r3)
-	PPC_STL	r1,SZL(r3)
-	PPC_STL	r2,2*SZL(r3)
-#ifdef CONFIG_PPC32
-	mfcr	r12
-	stmw	r12, 3*SZL(r3)
-#else
-	mfcr	r0
-	PPC_STL	r0,3*SZL(r3)
-	PPC_STL	r13,4*SZL(r3)
-	PPC_STL	r14,5*SZL(r3)
-	PPC_STL	r15,6*SZL(r3)
-	PPC_STL	r16,7*SZL(r3)
-	PPC_STL	r17,8*SZL(r3)
-	PPC_STL	r18,9*SZL(r3)
-	PPC_STL	r19,10*SZL(r3)
-	PPC_STL	r20,11*SZL(r3)
-	PPC_STL	r21,12*SZL(r3)
-	PPC_STL	r22,13*SZL(r3)
-	PPC_STL	r23,14*SZL(r3)
-	PPC_STL	r24,15*SZL(r3)
-	PPC_STL	r25,16*SZL(r3)
-	PPC_STL	r26,17*SZL(r3)
-	PPC_STL	r27,18*SZL(r3)
-	PPC_STL	r28,19*SZL(r3)
-	PPC_STL	r29,20*SZL(r3)
-	PPC_STL	r30,21*SZL(r3)
-	PPC_STL	r31,22*SZL(r3)
-#endif
-	li	r3,0
-	blr
-
-_GLOBAL(longjmp)
-#ifdef CONFIG_PPC32
-	lmw	r12, 3*SZL(r3)
-	mtcrf	0x38, r12
-#else
-	PPC_LL	r13,4*SZL(r3)
-	PPC_LL	r14,5*SZL(r3)
-	PPC_LL	r15,6*SZL(r3)
-	PPC_LL	r16,7*SZL(r3)
-	PPC_LL	r17,8*SZL(r3)
-	PPC_LL	r18,9*SZL(r3)
-	PPC_LL	r19,10*SZL(r3)
-	PPC_LL	r20,11*SZL(r3)
-	PPC_LL	r21,12*SZL(r3)
-	PPC_LL	r22,13*SZL(r3)
-	PPC_LL	r23,14*SZL(r3)
-	PPC_LL	r24,15*SZL(r3)
-	PPC_LL	r25,16*SZL(r3)
-	PPC_LL	r26,17*SZL(r3)
-	PPC_LL	r27,18*SZL(r3)
-	PPC_LL	r28,19*SZL(r3)
-	PPC_LL	r29,20*SZL(r3)
-	PPC_LL	r30,21*SZL(r3)
-	PPC_LL	r31,22*SZL(r3)
-	PPC_LL	r0,3*SZL(r3)
-	mtcrf	0x38,r0
-#endif
-	PPC_LL	r0,0(r3)
-	PPC_LL	r1,SZL(r3)
-	PPC_LL	r2,2*SZL(r3)
-	mtlr	r0
-	mr.	r3, r4
-	bnelr
-	li	r3, 1
-	blr
-
-_GLOBAL(current_stack_pointer)
-	PPC_LL	r3,0(r1)
-	blr
-EXPORT_SYMBOL(current_stack_pointer)
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
deleted file mode 100644
index f4e4a1926a7a5bab6cc65ff380ac17311bd4c2d1..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/misc_32.S
+++ /dev/null
@@ -1,981 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains miscellaneous low-level functions.
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
- * and Paul Mackerras.
- *
- * kexec bits:
- * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- * PPC44x port. Copyright (C) 2011,  IBM Corporation
- * 		Author: Suzuki Poulose <suzuki@in.ibm.com>
- */
-
-#include <linux/sys.h>
-#include <asm/unistd.h>
-#include <asm/errno.h>
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/cache.h>
-#include <asm/cputable.h>
-#include <asm/mmu.h>
-#include <asm/ppc_asm.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/processor.h>
-#include <asm/kexec.h>
-#include <asm/bug.h>
-#include <asm/ptrace.h>
-#include <asm/export.h>
-#include <asm/feature-fixups.h>
-
-	.text
-
-/*
- * We store the saved ksp_limit in the unused part
- * of the STACK_FRAME_OVERHEAD
- */
-_GLOBAL(call_do_softirq)
-	mflr	r0
-	stw	r0,4(r1)
-	lwz	r10,THREAD+KSP_LIMIT(r2)
-	stw	r3, THREAD+KSP_LIMIT(r2)
-	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
-	mr	r1,r3
-	stw	r10,8(r1)
-	bl	__do_softirq
-	lwz	r10,8(r1)
-	lwz	r1,0(r1)
-	lwz	r0,4(r1)
-	stw	r10,THREAD+KSP_LIMIT(r2)
-	mtlr	r0
-	blr
-
-/*
- * void call_do_irq(struct pt_regs *regs, void *sp);
- */
-_GLOBAL(call_do_irq)
-	mflr	r0
-	stw	r0,4(r1)
-	lwz	r10,THREAD+KSP_LIMIT(r2)
-	stw	r4, THREAD+KSP_LIMIT(r2)
-	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
-	mr	r1,r4
-	stw	r10,8(r1)
-	bl	__do_irq
-	lwz	r10,8(r1)
-	lwz	r1,0(r1)
-	lwz	r0,4(r1)
-	stw	r10,THREAD+KSP_LIMIT(r2)
-	mtlr	r0
-	blr
-
-/*
- * This returns the high 64 bits of the product of two 64-bit numbers.
- */
-_GLOBAL(mulhdu)
-	cmpwi	r6,0
-	cmpwi	cr1,r3,0
-	mr	r10,r4
-	mulhwu	r4,r4,r5
-	beq	1f
-	mulhwu	r0,r10,r6
-	mullw	r7,r10,r5
-	addc	r7,r0,r7
-	addze	r4,r4
-1:	beqlr	cr1		/* all done if high part of A is 0 */
-	mullw	r9,r3,r5
-	mulhwu	r10,r3,r5
-	beq	2f
-	mullw	r0,r3,r6
-	mulhwu	r8,r3,r6
-	addc	r7,r0,r7
-	adde	r4,r4,r8
-	addze	r10,r10
-2:	addc	r4,r4,r9
-	addze	r3,r10
-	blr
-
-/*
- * reloc_got2 runs through the .got2 section adding an offset
- * to each entry.
- */
-_GLOBAL(reloc_got2)
-	mflr	r11
-	lis	r7,__got2_start@ha
-	addi	r7,r7,__got2_start@l
-	lis	r8,__got2_end@ha
-	addi	r8,r8,__got2_end@l
-	subf	r8,r7,r8
-	srwi.	r8,r8,2
-	beqlr
-	mtctr	r8
-	bl	1f
-1:	mflr	r0
-	lis	r4,1b@ha
-	addi	r4,r4,1b@l
-	subf	r0,r4,r0
-	add	r7,r0,r7
-2:	lwz	r0,0(r7)
-	add	r0,r0,r3
-	stw	r0,0(r7)
-	addi	r7,r7,4
-	bdnz	2b
-	mtlr	r11
-	blr
-
-/*
- * call_setup_cpu - call the setup_cpu function for this cpu
- * r3 = data offset, r24 = cpu number
- *
- * Setup function is called with:
- *   r3 = data offset
- *   r4 = ptr to CPU spec (relocated)
- */
-_GLOBAL(call_setup_cpu)
-	addis	r4,r3,cur_cpu_spec@ha
-	addi	r4,r4,cur_cpu_spec@l
-	lwz	r4,0(r4)
-	add	r4,r4,r3
-	lwz	r5,CPU_SPEC_SETUP(r4)
-	cmpwi	0,r5,0
-	add	r5,r5,r3
-	beqlr
-	mtctr	r5
-	bctr
-
-#if defined(CONFIG_CPU_FREQ_PMAC) && defined(CONFIG_PPC_BOOK3S_32)
-
-/* This gets called by via-pmu.c to switch the PLL selection
- * on 750fx CPU. This function should really be moved to some
- * other place (as most of the cpufreq code in via-pmu
- */
-_GLOBAL(low_choose_750fx_pll)
-	/* Clear MSR:EE */
-	mfmsr	r7
-	rlwinm	r0,r7,0,17,15
-	mtmsr	r0
-
-	/* If switching to PLL1, disable HID0:BTIC */
-	cmplwi	cr0,r3,0
-	beq	1f
-	mfspr	r5,SPRN_HID0
-	rlwinm	r5,r5,0,27,25
-	sync
-	mtspr	SPRN_HID0,r5
-	isync
-	sync
-
-1:
-	/* Calc new HID1 value */
-	mfspr	r4,SPRN_HID1	/* Build a HID1:PS bit from parameter */
-	rlwinm	r5,r3,16,15,15	/* Clear out HID1:PS from value read */
-	rlwinm	r4,r4,0,16,14	/* Could have I used rlwimi here ? */
-	or	r4,r4,r5
-	mtspr	SPRN_HID1,r4
-
-#ifdef CONFIG_SMP
-	/* Store new HID1 image */
-	lwz	r6,TASK_CPU(r2)
-	slwi	r6,r6,2
-#else
-	li	r6, 0
-#endif
-	addis	r6,r6,nap_save_hid1@ha
-	stw	r4,nap_save_hid1@l(r6)
-
-	/* If switching to PLL0, enable HID0:BTIC */
-	cmplwi	cr0,r3,0
-	bne	1f
-	mfspr	r5,SPRN_HID0
-	ori	r5,r5,HID0_BTIC
-	sync
-	mtspr	SPRN_HID0,r5
-	isync
-	sync
-
-1:
-	/* Return */
-	mtmsr	r7
-	blr
-
-_GLOBAL(low_choose_7447a_dfs)
-	/* Clear MSR:EE */
-	mfmsr	r7
-	rlwinm	r0,r7,0,17,15
-	mtmsr	r0
-	
-	/* Calc new HID1 value */
-	mfspr	r4,SPRN_HID1
-	insrwi	r4,r3,1,9	/* insert parameter into bit 9 */
-	sync
-	mtspr	SPRN_HID1,r4
-	sync
-	isync
-
-	/* Return */
-	mtmsr	r7
-	blr
-
-#endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_PPC_BOOK3S_32 */
-
-/*
- * complement mask on the msr then "or" some values on.
- *     _nmask_and_or_msr(nmask, value_to_or)
- */
-_GLOBAL(_nmask_and_or_msr)
-	mfmsr	r0		/* Get current msr */
-	andc	r0,r0,r3	/* And off the bits set in r3 (first parm) */
-	or	r0,r0,r4	/* Or on the bits in r4 (second parm) */
-	SYNC			/* Some chip revs have problems here... */
-	mtmsr	r0		/* Update machine state */
-	isync
-	blr			/* Done */
-
-#ifdef CONFIG_40x
-
-/*
- * Do an IO access in real mode
- */
-_GLOBAL(real_readb)
-	mfmsr	r7
-	rlwinm	r0,r7,0,~MSR_DR
-	sync
-	mtmsr	r0
-	sync
-	isync
-	lbz	r3,0(r3)
-	sync
-	mtmsr	r7
-	sync
-	isync
-	blr
-
-	/*
- * Do an IO access in real mode
- */
-_GLOBAL(real_writeb)
-	mfmsr	r7
-	rlwinm	r0,r7,0,~MSR_DR
-	sync
-	mtmsr	r0
-	sync
-	isync
-	stb	r3,0(r4)
-	sync
-	mtmsr	r7
-	sync
-	isync
-	blr
-
-#endif /* CONFIG_40x */
-
-
-/*
- * Flush instruction cache.
- * This is a no-op on the 601.
- */
-#ifndef CONFIG_PPC_8xx
-_GLOBAL(flush_instruction_cache)
-#if defined(CONFIG_4xx)
-#ifdef CONFIG_403GCX
-	li      r3, 512
-	mtctr   r3
-	lis     r4, KERNELBASE@h
-1:	iccci   0, r4
-	addi    r4, r4, 16
-	bdnz    1b
-#else
-	lis	r3, KERNELBASE@h
-	iccci	0,r3
-#endif
-#elif defined(CONFIG_FSL_BOOKE)
-#ifdef CONFIG_E200
-	mfspr   r3,SPRN_L1CSR0
-	ori     r3,r3,L1CSR0_CFI|L1CSR0_CLFC
-	/* msync; isync recommended here */
-	mtspr   SPRN_L1CSR0,r3
-	isync
-	blr
-#endif
-	mfspr	r3,SPRN_L1CSR1
-	ori	r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
-	mtspr	SPRN_L1CSR1,r3
-#elif defined(CONFIG_PPC_BOOK3S_601)
-	blr			/* for 601, do nothing */
-#else
-	/* 603/604 processor - use invalidate-all bit in HID0 */
-	mfspr	r3,SPRN_HID0
-	ori	r3,r3,HID0_ICFI
-	mtspr	SPRN_HID0,r3
-#endif /* CONFIG_4xx */
-	isync
-	blr
-EXPORT_SYMBOL(flush_instruction_cache)
-#endif /* CONFIG_PPC_8xx */
-
-/*
- * Copy a whole page.  We use the dcbz instruction on the destination
- * to reduce memory traffic (it eliminates the unnecessary reads of
- * the destination into cache).  This requires that the destination
- * is cacheable.
- */
-#define COPY_16_BYTES		\
-	lwz	r6,4(r4);	\
-	lwz	r7,8(r4);	\
-	lwz	r8,12(r4);	\
-	lwzu	r9,16(r4);	\
-	stw	r6,4(r3);	\
-	stw	r7,8(r3);	\
-	stw	r8,12(r3);	\
-	stwu	r9,16(r3)
-
-_GLOBAL(copy_page)
-	rlwinm	r5, r3, 0, L1_CACHE_BYTES - 1
-	addi	r3,r3,-4
-
-0:	twnei	r5, 0	/* WARN if r3 is not cache aligned */
-	EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
-
-	addi	r4,r4,-4
-
-	li	r5,4
-
-#if MAX_COPY_PREFETCH > 1
-	li	r0,MAX_COPY_PREFETCH
-	li	r11,4
-	mtctr	r0
-11:	dcbt	r11,r4
-	addi	r11,r11,L1_CACHE_BYTES
-	bdnz	11b
-#else /* MAX_COPY_PREFETCH == 1 */
-	dcbt	r5,r4
-	li	r11,L1_CACHE_BYTES+4
-#endif /* MAX_COPY_PREFETCH */
-	li	r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH
-	crclr	4*cr0+eq
-2:
-	mtctr	r0
-1:
-	dcbt	r11,r4
-	dcbz	r5,r3
-	COPY_16_BYTES
-#if L1_CACHE_BYTES >= 32
-	COPY_16_BYTES
-#if L1_CACHE_BYTES >= 64
-	COPY_16_BYTES
-	COPY_16_BYTES
-#if L1_CACHE_BYTES >= 128
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-#endif
-#endif
-#endif
-	bdnz	1b
-	beqlr
-	crnot	4*cr0+eq,4*cr0+eq
-	li	r0,MAX_COPY_PREFETCH
-	li	r11,4
-	b	2b
-EXPORT_SYMBOL(copy_page)
-
-/*
- * Extended precision shifts.
- *
- * Updated to be valid for shift counts from 0 to 63 inclusive.
- * -- Gabriel
- *
- * R3/R4 has 64 bit value
- * R5    has shift count
- * result in R3/R4
- *
- *  ashrdi3: arithmetic right shift (sign propagation)	
- *  lshrdi3: logical right shift
- *  ashldi3: left shift
- */
-_GLOBAL(__ashrdi3)
-	subfic	r6,r5,32
-	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
-	addi	r7,r5,32	# could be xori, or addi with -32
-	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
-	rlwinm	r8,r7,0,32	# t3 = (count < 32) ? 32 : 0
-	sraw	r7,r3,r7	# t2 = MSW >> (count-32)
-	or	r4,r4,r6	# LSW |= t1
-	slw	r7,r7,r8	# t2 = (count < 32) ? 0 : t2
-	sraw	r3,r3,r5	# MSW = MSW >> count
-	or	r4,r4,r7	# LSW |= t2
-	blr
-EXPORT_SYMBOL(__ashrdi3)
-
-_GLOBAL(__ashldi3)
-	subfic	r6,r5,32
-	slw	r3,r3,r5	# MSW = count > 31 ? 0 : MSW << count
-	addi	r7,r5,32	# could be xori, or addi with -32
-	srw	r6,r4,r6	# t1 = count > 31 ? 0 : LSW >> (32-count)
-	slw	r7,r4,r7	# t2 = count < 32 ? 0 : LSW << (count-32)
-	or	r3,r3,r6	# MSW |= t1
-	slw	r4,r4,r5	# LSW = LSW << count
-	or	r3,r3,r7	# MSW |= t2
-	blr
-EXPORT_SYMBOL(__ashldi3)
-
-_GLOBAL(__lshrdi3)
-	subfic	r6,r5,32
-	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
-	addi	r7,r5,32	# could be xori, or addi with -32
-	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
-	srw	r7,r3,r7	# t2 = count < 32 ? 0 : MSW >> (count-32)
-	or	r4,r4,r6	# LSW |= t1
-	srw	r3,r3,r5	# MSW = MSW >> count
-	or	r4,r4,r7	# LSW |= t2
-	blr
-EXPORT_SYMBOL(__lshrdi3)
-
-/*
- * 64-bit comparison: __cmpdi2(s64 a, s64 b)
- * Returns 0 if a < b, 1 if a == b, 2 if a > b.
- */
-_GLOBAL(__cmpdi2)
-	cmpw	r3,r5
-	li	r3,1
-	bne	1f
-	cmplw	r4,r6
-	beqlr
-1:	li	r3,0
-	bltlr
-	li	r3,2
-	blr
-EXPORT_SYMBOL(__cmpdi2)
-/*
- * 64-bit comparison: __ucmpdi2(u64 a, u64 b)
- * Returns 0 if a < b, 1 if a == b, 2 if a > b.
- */
-_GLOBAL(__ucmpdi2)
-	cmplw	r3,r5
-	li	r3,1
-	bne	1f
-	cmplw	r4,r6
-	beqlr
-1:	li	r3,0
-	bltlr
-	li	r3,2
-	blr
-EXPORT_SYMBOL(__ucmpdi2)
-
-_GLOBAL(__bswapdi2)
-	rotlwi  r9,r4,8
-	rotlwi  r10,r3,8
-	rlwimi  r9,r4,24,0,7
-	rlwimi  r10,r3,24,0,7
-	rlwimi  r9,r4,24,16,23
-	rlwimi  r10,r3,24,16,23
-	mr      r3,r9
-	mr      r4,r10
-	blr
-EXPORT_SYMBOL(__bswapdi2)
-
-#ifdef CONFIG_SMP
-_GLOBAL(start_secondary_resume)
-	/* Reset stack */
-	rlwinm	r1, r1, 0, 0, 31 - THREAD_SHIFT
-	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
-	li	r3,0
-	stw	r3,0(r1)		/* Zero the stack frame pointer	*/
-	bl	start_secondary
-	b	.
-#endif /* CONFIG_SMP */
-	
-/*
- * This routine is just here to keep GCC happy - sigh...
- */
-_GLOBAL(__main)
-	blr
-
-#ifdef CONFIG_KEXEC_CORE
-	/*
-	 * Must be relocatable PIC code callable as a C function.
-	 */
-	.globl relocate_new_kernel
-relocate_new_kernel:
-	/* r3 = page_list   */
-	/* r4 = reboot_code_buffer */
-	/* r5 = start_address      */
-
-#ifdef CONFIG_FSL_BOOKE
-
-	mr	r29, r3
-	mr	r30, r4
-	mr	r31, r5
-
-#define ENTRY_MAPPING_KEXEC_SETUP
-#include "fsl_booke_entry_mapping.S"
-#undef ENTRY_MAPPING_KEXEC_SETUP
-
-	mr      r3, r29
-	mr      r4, r30
-	mr      r5, r31
-
-	li	r0, 0
-#elif defined(CONFIG_44x)
-
-	/* Save our parameters */
-	mr	r29, r3
-	mr	r30, r4
-	mr	r31, r5
-
-#ifdef CONFIG_PPC_47x
-	/* Check for 47x cores */
-	mfspr	r3,SPRN_PVR
-	srwi	r3,r3,16
-	cmplwi	cr0,r3,PVR_476FPE@h
-	beq	setup_map_47x
-	cmplwi	cr0,r3,PVR_476@h
-	beq	setup_map_47x
-	cmplwi	cr0,r3,PVR_476_ISS@h
-	beq	setup_map_47x
-#endif /* CONFIG_PPC_47x */
-	
-/*
- * Code for setting up 1:1 mapping for PPC440x for KEXEC
- *
- * We cannot switch off the MMU on PPC44x.
- * So we:
- * 1) Invalidate all the mappings except the one we are running from.
- * 2) Create a tmp mapping for our code in the other address space(TS) and
- *    jump to it. Invalidate the entry we started in.
- * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS.
- * 4) Jump to the 1:1 mapping in original TS.
- * 5) Invalidate the tmp mapping.
- *
- * - Based on the kexec support code for FSL BookE
- *
- */
-
-	/* 
-	 * Load the PID with kernel PID (0).
-	 * Also load our MSR_IS and TID to MMUCR for TLB search.
-	 */
-	li	r3, 0
-	mtspr	SPRN_PID, r3
-	mfmsr	r4
-	andi.	r4,r4,MSR_IS@l
-	beq	wmmucr
-	oris	r3,r3,PPC44x_MMUCR_STS@h
-wmmucr:
-	mtspr	SPRN_MMUCR,r3
-	sync
-
-	/*
-	 * Invalidate all the TLB entries except the current entry
-	 * where we are running from
-	 */
-	bl	0f				/* Find our address */
-0:	mflr	r5				/* Make it accessible */
-	tlbsx	r23,0,r5			/* Find entry we are in */
-	li	r4,0				/* Start at TLB entry 0 */
-	li	r3,0				/* Set PAGEID inval value */
-1:	cmpw	r23,r4				/* Is this our entry? */
-	beq	skip				/* If so, skip the inval */
-	tlbwe	r3,r4,PPC44x_TLB_PAGEID		/* If not, inval the entry */
-skip:
-	addi	r4,r4,1				/* Increment */
-	cmpwi	r4,64				/* Are we done?	*/
-	bne	1b				/* If not, repeat */
-	isync
-
-	/* Create a temp mapping and jump to it */
-	andi.	r6, r23, 1		/* Find the index to use */
-	addi	r24, r6, 1		/* r24 will contain 1 or 2 */
-
-	mfmsr	r9			/* get the MSR */
-	rlwinm	r5, r9, 27, 31, 31	/* Extract the MSR[IS] */
-	xori	r7, r5, 1		/* Use the other address space */
-
-	/* Read the current mapping entries */
-	tlbre	r3, r23, PPC44x_TLB_PAGEID
-	tlbre	r4, r23, PPC44x_TLB_XLAT
-	tlbre	r5, r23, PPC44x_TLB_ATTRIB
-
-	/* Save our current XLAT entry */
-	mr	r25, r4
-
-	/* Extract the TLB PageSize */
-	li	r10, 1 			/* r10 will hold PageSize */
-	rlwinm	r11, r3, 0, 24, 27	/* bits 24-27 */
-
-	/* XXX: As of now we use 256M, 4K pages */
-	cmpwi	r11, PPC44x_TLB_256M
-	bne	tlb_4k
-	rotlwi	r10, r10, 28		/* r10 = 256M */
-	b	write_out
-tlb_4k:
-	cmpwi	r11, PPC44x_TLB_4K
-	bne	default
-	rotlwi	r10, r10, 12		/* r10 = 4K */
-	b	write_out
-default:
-	rotlwi	r10, r10, 10		/* r10 = 1K */
-
-write_out:
-	/*
-	 * Write out the tmp 1:1 mapping for this code in other address space
-	 * Fixup  EPN = RPN , TS=other address space
-	 */
-	insrwi	r3, r7, 1, 23		/* Bit 23 is TS for PAGEID field */
-
-	/* Write out the tmp mapping entries */
-	tlbwe	r3, r24, PPC44x_TLB_PAGEID
-	tlbwe	r4, r24, PPC44x_TLB_XLAT
-	tlbwe	r5, r24, PPC44x_TLB_ATTRIB
-
-	subi	r11, r10, 1		/* PageOffset Mask = PageSize - 1 */
-	not	r10, r11		/* Mask for PageNum */
-
-	/* Switch to other address space in MSR */
-	insrwi	r9, r7, 1, 26		/* Set MSR[IS] = r7 */
-
-	bl	1f
-1:	mflr	r8
-	addi	r8, r8, (2f-1b)		/* Find the target offset */
-
-	/* Jump to the tmp mapping */
-	mtspr	SPRN_SRR0, r8
-	mtspr	SPRN_SRR1, r9
-	rfi
-
-2:
-	/* Invalidate the entry we were executing from */
-	li	r3, 0
-	tlbwe	r3, r23, PPC44x_TLB_PAGEID
-
-	/* attribute fields. rwx for SUPERVISOR mode */
-	li	r5, 0
-	ori	r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
-
-	/* Create 1:1 mapping in 256M pages */
-	xori	r7, r7, 1			/* Revert back to Original TS */
-
-	li	r8, 0				/* PageNumber */
-	li	r6, 3				/* TLB Index, start at 3  */
-
-next_tlb:
-	rotlwi	r3, r8, 28			/* Create EPN (bits 0-3) */
-	mr	r4, r3				/* RPN = EPN  */
-	ori	r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */
-	insrwi	r3, r7, 1, 23			/* Set TS from r7 */
-
-	tlbwe	r3, r6, PPC44x_TLB_PAGEID	/* PageID field : EPN, V, SIZE */
-	tlbwe	r4, r6, PPC44x_TLB_XLAT		/* Address translation : RPN   */
-	tlbwe	r5, r6, PPC44x_TLB_ATTRIB	/* Attributes */
-
-	addi	r8, r8, 1			/* Increment PN */
-	addi	r6, r6, 1			/* Increment TLB Index */
-	cmpwi	r8, 8				/* Are we done ? */
-	bne	next_tlb
-	isync
-
-	/* Jump to the new mapping 1:1 */
-	li	r9,0
-	insrwi	r9, r7, 1, 26			/* Set MSR[IS] = r7 */
-
-	bl	1f
-1:	mflr	r8
-	and	r8, r8, r11			/* Get our offset within page */
-	addi	r8, r8, (2f-1b)
-
-	and	r5, r25, r10			/* Get our target PageNum */
-	or	r8, r8, r5			/* Target jump address */
-
-	mtspr	SPRN_SRR0, r8
-	mtspr	SPRN_SRR1, r9
-	rfi
-2:
-	/* Invalidate the tmp entry we used */
-	li	r3, 0
-	tlbwe	r3, r24, PPC44x_TLB_PAGEID
-	sync
-	b	ppc44x_map_done
-
-#ifdef CONFIG_PPC_47x
-
-	/* 1:1 mapping for 47x */
-
-setup_map_47x:
-
-	/*
-	 * Load the kernel pid (0) to PID and also to MMUCR[TID].
-	 * Also set the MSR IS->MMUCR STS
-	 */
-	li	r3, 0
-	mtspr	SPRN_PID, r3			/* Set PID */
-	mfmsr	r4				/* Get MSR */
-	andi.	r4, r4, MSR_IS@l		/* TS=1? */
-	beq	1f				/* If not, leave STS=0 */
-	oris	r3, r3, PPC47x_MMUCR_STS@h	/* Set STS=1 */
-1:	mtspr	SPRN_MMUCR, r3			/* Put MMUCR */
-	sync
-
-	/* Find the entry we are running from */
-	bl	2f
-2:	mflr	r23
-	tlbsx	r23, 0, r23
-	tlbre	r24, r23, 0			/* TLB Word 0 */
-	tlbre	r25, r23, 1			/* TLB Word 1 */
-	tlbre	r26, r23, 2			/* TLB Word 2 */
-
-
-	/*
-	 * Invalidates all the tlb entries by writing to 256 RPNs(r4)
-	 * of 4k page size in all  4 ways (0-3 in r3).
-	 * This would invalidate the entire UTLB including the one we are
-	 * running from. However the shadow TLB entries would help us 
-	 * to continue the execution, until we flush them (rfi/isync).
-	 */
-	addis	r3, 0, 0x8000			/* specify the way */
-	addi	r4, 0, 0			/* TLB Word0 = (EPN=0, VALID = 0) */
-	addi	r5, 0, 0
-	b	clear_utlb_entry
-
-	/* Align the loop to speed things up. from head_44x.S */
-	.align	6
-
-clear_utlb_entry:
-
-	tlbwe	r4, r3, 0
-	tlbwe	r5, r3, 1
-	tlbwe	r5, r3, 2
-	addis	r3, r3, 0x2000			/* Increment the way */
-	cmpwi	r3, 0
-	bne	clear_utlb_entry
-	addis	r3, 0, 0x8000
-	addis	r4, r4, 0x100			/* Increment the EPN */
-	cmpwi	r4, 0
-	bne	clear_utlb_entry
-
-	/* Create the entries in the other address space */
-	mfmsr	r5
-	rlwinm	r7, r5, 27, 31, 31		/* Get the TS (Bit 26) from MSR */
-	xori	r7, r7, 1			/* r7 = !TS */
-
-	insrwi	r24, r7, 1, 21			/* Change the TS in the saved TLB word 0 */
-
-	/* 
-	 * write out the TLB entries for the tmp mapping
-	 * Use way '0' so that we could easily invalidate it later.
-	 */
-	lis	r3, 0x8000			/* Way '0' */ 
-
-	tlbwe	r24, r3, 0
-	tlbwe	r25, r3, 1
-	tlbwe	r26, r3, 2
-
-	/* Update the msr to the new TS */
-	insrwi	r5, r7, 1, 26
-
-	bl	1f
-1:	mflr	r6
-	addi	r6, r6, (2f-1b)
-
-	mtspr	SPRN_SRR0, r6
-	mtspr	SPRN_SRR1, r5
-	rfi
-
-	/* 
-	 * Now we are in the tmp address space.
-	 * Create a 1:1 mapping for 0-2GiB in the original TS.
-	 */
-2:
-	li	r3, 0
-	li	r4, 0				/* TLB Word 0 */
-	li	r5, 0				/* TLB Word 1 */
-	li	r6, 0
-	ori	r6, r6, PPC47x_TLB2_S_RWX	/* TLB word 2 */
-
-	li	r8, 0				/* PageIndex */
-
-	xori	r7, r7, 1			/* revert back to original TS */
-
-write_utlb:
-	rotlwi	r5, r8, 28			/* RPN = PageIndex * 256M */
-						/* ERPN = 0 as we don't use memory above 2G */
-
-	mr	r4, r5				/* EPN = RPN */
-	ori	r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M)
-	insrwi	r4, r7, 1, 21			/* Insert the TS to Word 0 */
-
-	tlbwe	r4, r3, 0			/* Write out the entries */
-	tlbwe	r5, r3, 1
-	tlbwe	r6, r3, 2
-	addi	r8, r8, 1
-	cmpwi	r8, 8				/* Have we completed ? */
-	bne	write_utlb
-
-	/* make sure we complete the TLB write up */
-	isync
-
-	/* 
-	 * Prepare to jump to the 1:1 mapping.
-	 * 1) Extract page size of the tmp mapping
-	 *    DSIZ = TLB_Word0[22:27]
-	 * 2) Calculate the physical address of the address
-	 *    to jump to.
-	 */
-	rlwinm	r10, r24, 0, 22, 27
-
-	cmpwi	r10, PPC47x_TLB0_4K
-	bne	0f
-	li	r10, 0x1000			/* r10 = 4k */
-	bl	1f
-
-0:
-	/* Defaults to 256M */
-	lis	r10, 0x1000
-	
-	bl	1f
-1:	mflr	r4
-	addi	r4, r4, (2f-1b)			/* virtual address  of 2f */
-
-	subi	r11, r10, 1			/* offsetmask = Pagesize - 1 */
-	not	r10, r11			/* Pagemask = ~(offsetmask) */
-
-	and	r5, r25, r10			/* Physical page */
-	and	r6, r4, r11			/* offset within the current page */
-
-	or	r5, r5, r6			/* Physical address for 2f */
-
-	/* Switch the TS in MSR to the original one */
-	mfmsr	r8
-	insrwi	r8, r7, 1, 26
-
-	mtspr	SPRN_SRR1, r8
-	mtspr	SPRN_SRR0, r5
-	rfi
-
-2:
-	/* Invalidate the tmp mapping */
-	lis	r3, 0x8000			/* Way '0' */
-
-	clrrwi	r24, r24, 12			/* Clear the valid bit */
-	tlbwe	r24, r3, 0
-	tlbwe	r25, r3, 1
-	tlbwe	r26, r3, 2
-
-	/* Make sure we complete the TLB write and flush the shadow TLB */
-	isync
-
-#endif
-
-ppc44x_map_done:
-
-
-	/* Restore the parameters */
-	mr	r3, r29
-	mr	r4, r30
-	mr	r5, r31
-
-	li	r0, 0
-#else
-	li	r0, 0
-
-	/*
-	 * Set Machine Status Register to a known status,
-	 * switch the MMU off and jump to 1: in a single step.
-	 */
-
-	mr	r8, r0
-	ori     r8, r8, MSR_RI|MSR_ME
-	mtspr	SPRN_SRR1, r8
-	addi	r8, r4, 1f - relocate_new_kernel
-	mtspr	SPRN_SRR0, r8
-	sync
-	rfi
-
-1:
-#endif
-	/* from this point address translation is turned off */
-	/* and interrupts are disabled */
-
-	/* set a new stack at the bottom of our page... */
-	/* (not really needed now) */
-	addi	r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */
-	stw	r0, 0(r1)
-
-	/* Do the copies */
-	li	r6, 0 /* checksum */
-	mr	r0, r3
-	b	1f
-
-0:	/* top, read another word for the indirection page */
-	lwzu	r0, 4(r3)
-
-1:
-	/* is it a destination page? (r8) */
-	rlwinm.	r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */
-	beq	2f
-
-	rlwinm	r8, r0, 0, 0, 19 /* clear kexec flags, page align */
-	b	0b
-
-2:	/* is it an indirection page? (r3) */
-	rlwinm.	r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */
-	beq	2f
-
-	rlwinm	r3, r0, 0, 0, 19 /* clear kexec flags, page align */
-	subi	r3, r3, 4
-	b	0b
-
-2:	/* are we done? */
-	rlwinm.	r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */
-	beq	2f
-	b	3f
-
-2:	/* is it a source page? (r9) */
-	rlwinm.	r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */
-	beq	0b
-
-	rlwinm	r9, r0, 0, 0, 19 /* clear kexec flags, page align */
-
-	li	r7, PAGE_SIZE / 4
-	mtctr   r7
-	subi    r9, r9, 4
-	subi    r8, r8, 4
-9:
-	lwzu    r0, 4(r9)  /* do the copy */
-	xor	r6, r6, r0
-	stwu    r0, 4(r8)
-	dcbst	0, r8
-	sync
-	icbi	0, r8
-	bdnz    9b
-
-	addi    r9, r9, 4
-	addi    r8, r8, 4
-	b	0b
-
-3:
-
-	/* To be certain of avoiding problems with self-modifying code
-	 * execute a serializing instruction here.
-	 */
-	isync
-	sync
-
-	mfspr	r3, SPRN_PIR /* current core we are running on */
-	mr	r4, r5 /* load physical address of chunk called */
-
-	/* jump to the entry point, usually the setup routine */
-	mtlr	r5
-	blrl
-
-1:	b	1b
-
-relocate_new_kernel_end:
-
-	.globl relocate_new_kernel_size
-relocate_new_kernel_size:
-	.long relocate_new_kernel_end - relocate_new_kernel
-#endif
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
deleted file mode 100644
index ff20c253f2737a693da8eda4861fd01ec1494fb5..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/misc_64.S
+++ /dev/null
@@ -1,537 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains miscellaneous low-level functions.
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
- * and Paul Mackerras.
- * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
- * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
- */
-
-#include <linux/sys.h>
-#include <asm/unistd.h>
-#include <asm/errno.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cache.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/kexec.h>
-#include <asm/ptrace.h>
-#include <asm/mmu.h>
-#include <asm/export.h>
-#include <asm/feature-fixups.h>
-
-	.text
-
-_GLOBAL(call_do_softirq)
-	mflr	r0
-	std	r0,16(r1)
-	stdu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
-	mr	r1,r3
-	bl	__do_softirq
-	ld	r1,0(r1)
-	ld	r0,16(r1)
-	mtlr	r0
-	blr
-
-_GLOBAL(call_do_irq)
-	mflr	r0
-	std	r0,16(r1)
-	stdu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
-	mr	r1,r4
-	bl	__do_irq
-	ld	r1,0(r1)
-	ld	r0,16(r1)
-	mtlr	r0
-	blr
-
-_GLOBAL(__bswapdi2)
-EXPORT_SYMBOL(__bswapdi2)
-	srdi	r8,r3,32
-	rlwinm	r7,r3,8,0xffffffff
-	rlwimi	r7,r3,24,0,7
-	rlwinm	r9,r8,8,0xffffffff
-	rlwimi	r7,r3,24,16,23
-	rlwimi	r9,r8,24,0,7
-	rlwimi	r9,r8,24,16,23
-	sldi	r7,r7,32
-	or	r3,r7,r9
-	blr
-
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
-_GLOBAL(rmci_on)
-	sync
-	isync
-	li	r3,0x100
-	rldicl	r3,r3,32,0
-	mfspr	r5,SPRN_HID4
-	or	r5,r5,r3
-	sync
-	mtspr	SPRN_HID4,r5
-	isync
-	slbia
-	isync
-	sync
-	blr
-
-_GLOBAL(rmci_off)
-	sync
-	isync
-	li	r3,0x100
-	rldicl	r3,r3,32,0
-	mfspr	r5,SPRN_HID4
-	andc	r5,r5,r3
-	sync
-	mtspr	SPRN_HID4,r5
-	isync
-	slbia
-	isync
-	sync
-	blr
-#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
-
-#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)
-
-/*
- * Do an IO access in real mode
- */
-_GLOBAL(real_readb)
-	mfmsr	r7
-	ori	r0,r7,MSR_DR
-	xori	r0,r0,MSR_DR
-	sync
-	mtmsrd	r0
-	sync
-	isync
-	mfspr	r6,SPRN_HID4
-	rldicl	r5,r6,32,0
-	ori	r5,r5,0x100
-	rldicl	r5,r5,32,0
-	sync
-	mtspr	SPRN_HID4,r5
-	isync
-	slbia
-	isync
-	lbz	r3,0(r3)
-	sync
-	mtspr	SPRN_HID4,r6
-	isync
-	slbia
-	isync
-	mtmsrd	r7
-	sync
-	isync
-	blr
-
-	/*
- * Do an IO access in real mode
- */
-_GLOBAL(real_writeb)
-	mfmsr	r7
-	ori	r0,r7,MSR_DR
-	xori	r0,r0,MSR_DR
-	sync
-	mtmsrd	r0
-	sync
-	isync
-	mfspr	r6,SPRN_HID4
-	rldicl	r5,r6,32,0
-	ori	r5,r5,0x100
-	rldicl	r5,r5,32,0
-	sync
-	mtspr	SPRN_HID4,r5
-	isync
-	slbia
-	isync
-	stb	r3,0(r4)
-	sync
-	mtspr	SPRN_HID4,r6
-	isync
-	slbia
-	isync
-	mtmsrd	r7
-	sync
-	isync
-	blr
-#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */
-
-#ifdef CONFIG_PPC_PASEMI
-
-_GLOBAL(real_205_readb)
-	mfmsr	r7
-	ori	r0,r7,MSR_DR
-	xori	r0,r0,MSR_DR
-	sync
-	mtmsrd	r0
-	sync
-	isync
-	LBZCIX(R3,R0,R3)
-	isync
-	mtmsrd	r7
-	sync
-	isync
-	blr
-
-_GLOBAL(real_205_writeb)
-	mfmsr	r7
-	ori	r0,r7,MSR_DR
-	xori	r0,r0,MSR_DR
-	sync
-	mtmsrd	r0
-	sync
-	isync
-	STBCIX(R3,R0,R4)
-	isync
-	mtmsrd	r7
-	sync
-	isync
-	blr
-
-#endif /* CONFIG_PPC_PASEMI */
-
-
-#if defined(CONFIG_CPU_FREQ_PMAC64) || defined(CONFIG_CPU_FREQ_MAPLE)
-/*
- * SCOM access functions for 970 (FX only for now)
- *
- * unsigned long scom970_read(unsigned int address);
- * void scom970_write(unsigned int address, unsigned long value);
- *
- * The address passed in is the 24 bits register address. This code
- * is 970 specific and will not check the status bits, so you should
- * know what you are doing.
- */
-_GLOBAL(scom970_read)
-	/* interrupts off */
-	mfmsr	r4
-	ori	r0,r4,MSR_EE
-	xori	r0,r0,MSR_EE
-	mtmsrd	r0,1
-
-	/* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
-	 * (including parity). On current CPUs they must be 0'd,
-	 * and finally or in RW bit
-	 */
-	rlwinm	r3,r3,8,0,15
-	ori	r3,r3,0x8000
-
-	/* do the actual scom read */
-	sync
-	mtspr	SPRN_SCOMC,r3
-	isync
-	mfspr	r3,SPRN_SCOMD
-	isync
-	mfspr	r0,SPRN_SCOMC
-	isync
-
-	/* XXX:	fixup result on some buggy 970's (ouch ! we lost a bit, bah
-	 * that's the best we can do). Not implemented yet as we don't use
-	 * the scom on any of the bogus CPUs yet, but may have to be done
-	 * ultimately
-	 */
-
-	/* restore interrupts */
-	mtmsrd	r4,1
-	blr
-
-
-_GLOBAL(scom970_write)
-	/* interrupts off */
-	mfmsr	r5
-	ori	r0,r5,MSR_EE
-	xori	r0,r0,MSR_EE
-	mtmsrd	r0,1
-
-	/* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
-	 * (including parity). On current CPUs they must be 0'd.
-	 */
-
-	rlwinm	r3,r3,8,0,15
-
-	sync
-	mtspr	SPRN_SCOMD,r4      /* write data */
-	isync
-	mtspr	SPRN_SCOMC,r3      /* write command */
-	isync
-	mfspr	3,SPRN_SCOMC
-	isync
-
-	/* restore interrupts */
-	mtmsrd	r5,1
-	blr
-#endif /* CONFIG_CPU_FREQ_PMAC64 || CONFIG_CPU_FREQ_MAPLE */
-
-/* kexec_wait(phys_cpu)
- *
- * wait for the flag to change, indicating this kernel is going away but
- * the slave code for the next one is at addresses 0 to 100.
- *
- * This is used by all slaves, even those that did not find a matching
- * paca in the secondary startup code.
- *
- * Physical (hardware) cpu id should be in r3.
- */
-_GLOBAL(kexec_wait)
-	bl	1f
-1:	mflr	r5
-	addi	r5,r5,kexec_flag-1b
-
-99:	HMT_LOW
-#ifdef CONFIG_KEXEC_CORE	/* use no memory without kexec */
-	lwz	r4,0(r5)
-	cmpwi	0,r4,0
-	beq	99b
-#ifdef CONFIG_PPC_BOOK3S_64
-	li	r10,0x60
-	mfmsr	r11
-	clrrdi	r11,r11,1	/* Clear MSR_LE */
-	mtsrr0	r10
-	mtsrr1	r11
-	rfid
-#else
-	/* Create TLB entry in book3e_secondary_core_init */
-	li	r4,0
-	ba	0x60
-#endif
-#endif
-
-/* this can be in text because we won't change it until we are
- * running in real anyways
- */
-kexec_flag:
-	.long	0
-
-
-#ifdef CONFIG_KEXEC_CORE
-#ifdef CONFIG_PPC_BOOK3E
-/*
- * BOOK3E has no real MMU mode, so we have to setup the initial TLB
- * for a core to identity map v:0 to p:0.  This current implementation
- * assumes that 1G is enough for kexec.
- */
-kexec_create_tlb:
-	/*
-	 * Invalidate all non-IPROT TLB entries to avoid any TLB conflict.
-	 * IPROT TLB entries should be >= PAGE_OFFSET and thus not conflict.
-	 */
-	PPC_TLBILX_ALL(0,R0)
-	sync
-	isync
-
-	mfspr	r10,SPRN_TLB1CFG
-	andi.	r10,r10,TLBnCFG_N_ENTRY	/* Extract # entries */
-	subi	r10,r10,1	/* Last entry: no conflict with kernel text */
-	lis	r9,MAS0_TLBSEL(1)@h
-	rlwimi	r9,r10,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r9) */
-
-/* Set up a temp identity mapping v:0 to p:0 and return to it. */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDED	MAS2_M
-#else
-#define M_IF_NEEDED	0
-#endif
-	mtspr	SPRN_MAS0,r9
-
-	lis	r9,(MAS1_VALID|MAS1_IPROT)@h
-	ori	r9,r9,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
-	mtspr	SPRN_MAS1,r9
-
-	LOAD_REG_IMMEDIATE(r9, 0x0 | M_IF_NEEDED)
-	mtspr	SPRN_MAS2,r9
-
-	LOAD_REG_IMMEDIATE(r9, 0x0 | MAS3_SR | MAS3_SW | MAS3_SX)
-	mtspr	SPRN_MAS3,r9
-	li	r9,0
-	mtspr	SPRN_MAS7,r9
-
-	tlbwe
-	isync
-	blr
-#endif
-
-/* kexec_smp_wait(void)
- *
- * call with interrupts off
- * note: this is a terminal routine, it does not save lr
- *
- * get phys id from paca
- * switch to real mode
- * mark the paca as no longer used
- * join other cpus in kexec_wait(phys_id)
- */
-_GLOBAL(kexec_smp_wait)
-	lhz	r3,PACAHWCPUID(r13)
-	bl	real_mode
-
-	li	r4,KEXEC_STATE_REAL_MODE
-	stb	r4,PACAKEXECSTATE(r13)
-	SYNC
-
-	b	kexec_wait
-
-/*
- * switch to real mode (turn mmu off)
- * we use the early kernel trick that the hardware ignores bits
- * 0 and 1 (big endian) of the effective address in real mode
- *
- * don't overwrite r3 here, it is live for kexec_wait above.
- */
-real_mode:	/* assume normal blr return */
-#ifdef CONFIG_PPC_BOOK3E
-	/* Create an identity mapping. */
-	b	kexec_create_tlb
-#else
-1:	li	r9,MSR_RI
-	li	r10,MSR_DR|MSR_IR
-	mflr	r11		/* return address to SRR0 */
-	mfmsr	r12
-	andc	r9,r12,r9
-	andc	r10,r12,r10
-
-	mtmsrd	r9,1
-	mtspr	SPRN_SRR1,r10
-	mtspr	SPRN_SRR0,r11
-	rfid
-#endif
-
-/*
- * kexec_sequence(newstack, start, image, control, clear_all(),
-	          copy_with_mmu_off)
- *
- * does the grungy work with stack switching and real mode switches
- * also does simple calls to other code
- */
-
-_GLOBAL(kexec_sequence)
-	mflr	r0
-	std	r0,16(r1)
-
-	/* switch stacks to newstack -- &kexec_stack.stack */
-	stdu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
-	mr	r1,r3
-
-	li	r0,0
-	std	r0,16(r1)
-
-BEGIN_FTR_SECTION
-	/*
-	 * This is the best time to turn AMR/IAMR off.
-	 * key 0 is used in radix for supervisor<->user
-	 * protection, but on hash key 0 is reserved
-	 * ideally we want to enter with a clean state.
-	 * NOTE, we rely on r0 being 0 from above.
-	 */
-	mtspr	SPRN_IAMR,r0
-BEGIN_FTR_SECTION_NESTED(42)
-	mtspr	SPRN_AMOR,r0
-END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
-	/* save regs for local vars on new stack.
-	 * yes, we won't go back, but ...
-	 */
-	std	r31,-8(r1)
-	std	r30,-16(r1)
-	std	r29,-24(r1)
-	std	r28,-32(r1)
-	std	r27,-40(r1)
-	std	r26,-48(r1)
-	std	r25,-56(r1)
-
-	stdu	r1,-STACK_FRAME_OVERHEAD-64(r1)
-
-	/* save args into preserved regs */
-	mr	r31,r3			/* newstack (both) */
-	mr	r30,r4			/* start (real) */
-	mr	r29,r5			/* image (virt) */
-	mr	r28,r6			/* control, unused */
-	mr	r27,r7			/* clear_all() fn desc */
-	mr	r26,r8			/* copy_with_mmu_off */
-	lhz	r25,PACAHWCPUID(r13)	/* get our phys cpu from paca */
-
-	/* disable interrupts, we are overwriting kernel data next */
-#ifdef CONFIG_PPC_BOOK3E
-	wrteei	0
-#else
-	mfmsr	r3
-	rlwinm	r3,r3,0,17,15
-	mtmsrd	r3,1
-#endif
-
-	/* We need to turn the MMU off unless we are in hash mode
-	 * under a hypervisor
-	 */
-	cmpdi	r26,0
-	beq	1f
-	bl	real_mode
-1:
-	/* copy dest pages, flush whole dest image */
-	mr	r3,r29
-	bl	kexec_copy_flush	/* (image) */
-
-	/* turn off mmu now if not done earlier */
-	cmpdi	r26,0
-	bne	1f
-	bl	real_mode
-
-	/* copy  0x100 bytes starting at start to 0 */
-1:	li	r3,0
-	mr	r4,r30		/* start, aka phys mem offset */
-	li	r5,0x100
-	li	r6,0
-	bl	copy_and_flush	/* (dest, src, copy limit, start offset) */
-1:	/* assume normal blr return */
-
-	/* release other cpus to the new kernel secondary start at 0x60 */
-	mflr	r5
-	li	r6,1
-	stw	r6,kexec_flag-1b(5)
-
-	cmpdi	r27,0
-	beq	1f
-
-	/* clear out hardware hash page table and tlb */
-#ifdef PPC64_ELF_ABI_v1
-	ld	r12,0(r27)		/* deref function descriptor */
-#else
-	mr	r12,r27
-#endif
-	mtctr	r12
-	bctrl				/* mmu_hash_ops.hpte_clear_all(void); */
-
-/*
- *   kexec image calling is:
- *      the first 0x100 bytes of the entry point are copied to 0
- *
- *      all slaves branch to slave = 0x60 (absolute)
- *              slave(phys_cpu_id);
- *
- *      master goes to start = entry point
- *              start(phys_cpu_id, start, 0);
- *
- *
- *   a wrapper is needed to call existing kernels, here is an approximate
- *   description of one method:
- *
- * v2: (2.6.10)
- *   start will be near the boot_block (maybe 0x100 bytes before it?)
- *   it will have a 0x60, which will b to boot_block, where it will wait
- *   and 0 will store phys into struct boot-block and load r3 from there,
- *   copy kernel 0-0x100 and tell slaves to back down to 0x60 again
- *
- * v1: (2.6.9)
- *    boot block will have all cpus scanning device tree to see if they
- *    are the boot cpu ?????
- *    other device tree differences (prop sizes, va vs pa, etc)...
- */
-1:	mr	r3,r25	# my phys cpu
-	mr	r4,r30	# start, aka phys mem offset
-	mtlr	4
-	li	r5,0
-	blr	/* image->start(physid, image->start, 0); */
-#endif /* CONFIG_KEXEC_CORE */
diff --git a/arch/powerpc/kernel/note.S b/arch/powerpc/kernel/note.S
deleted file mode 100644
index bcdad15395ddd73bd36a510c4456c763757b059a..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/note.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * PowerPC ELF notes.
- *
- * Copyright 2019, IBM Corporation
- */
-
-#include <linux/elfnote.h>
-#include <asm/elfnote.h>
-
-/*
- * Ultravisor-capable bit (PowerNV only).
- *
- * Bit 0 indicates that the powerpc kernel binary knows how to run in an
- * ultravisor-enabled system.
- *
- * In an ultravisor-enabled system, some machine resources are now controlled
- * by the ultravisor. If the kernel is not ultravisor-capable, but it ends up
- * being run on a machine with ultravisor, the kernel will probably crash
- * trying to access ultravisor resources. For instance, it may crash in early
- * boot trying to set the partition table entry 0.
- *
- * In an ultravisor-enabled system, a bootloader could warn the user or prevent
- * the kernel from being run if the PowerPC ultravisor capability doesn't exist
- * or the Ultravisor-capable bit is not set.
- */
-#ifdef CONFIG_PPC_POWERNV
-#define PPCCAP_ULTRAVISOR_BIT		(1 << 0)
-#else
-#define PPCCAP_ULTRAVISOR_BIT		0
-#endif
-
-/*
- * Add the PowerPC Capabilities in the binary ELF note. It is a bitmap that
- * can be used to advertise kernel capabilities to userland.
- */
-#define PPC_CAPABILITIES_BITMAP (PPCCAP_ULTRAVISOR_BIT)
-
-ELFNOTE(PowerPC, PPC_ELFNOTE_CAPABILITIES,
-	.long PPC_CAPABILITIES_BITMAP)
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
deleted file mode 100644
index cf383520843f6693f547d53ec09122f899916633..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/optprobes_head.S
+++ /dev/null
@@ -1,130 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Code to prepare detour buffer for optprobes in Kernel.
- *
- * Copyright 2017, Anju T, IBM Corp.
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/ptrace.h>
-#include <asm/asm-offsets.h>
-
-#define	OPT_SLOT_SIZE	65536
-
-	.balign	4
-
-	/*
-	 * Reserve an area to allocate slots for detour buffer.
-	 * This is part of .text section (rather than vmalloc area)
-	 * as this needs to be within 32MB of the probed address.
-	 */
-	.global optinsn_slot
-optinsn_slot:
-	.space	OPT_SLOT_SIZE
-
-	/*
-	 * Optprobe template:
-	 * This template gets copied into one of the slots in optinsn_slot
-	 * and gets fixed up with real optprobe structures et al.
-	 */
-	.global optprobe_template_entry
-optprobe_template_entry:
-	/* Create an in-memory pt_regs */
-	stdu	r1,-INT_FRAME_SIZE(r1)
-	SAVE_GPR(0,r1)
-	/* Save the previous SP into stack */
-	addi	r0,r1,INT_FRAME_SIZE
-	std	r0,GPR1(r1)
-	SAVE_10GPRS(2,r1)
-	SAVE_10GPRS(12,r1)
-	SAVE_10GPRS(22,r1)
-	/* Save SPRS */
-	mfmsr	r5
-	std	r5,_MSR(r1)
-	li	r5,0x700
-	std	r5,_TRAP(r1)
-	li	r5,0
-	std	r5,ORIG_GPR3(r1)
-	std	r5,RESULT(r1)
-	mfctr	r5
-	std	r5,_CTR(r1)
-	mflr	r5
-	std	r5,_LINK(r1)
-	mfspr	r5,SPRN_XER
-	std	r5,_XER(r1)
-	mfcr	r5
-	std	r5,_CCR(r1)
-	lbz     r5,PACAIRQSOFTMASK(r13)
-	std     r5,SOFTE(r1)
-
-	/*
-	 * We may get here from a module, so load the kernel TOC in r2.
-	 * The original TOC gets restored when pt_regs is restored
-	 * further below.
-	 */
-	ld	r2,PACATOC(r13)
-
-	.global optprobe_template_op_address
-optprobe_template_op_address:
-	/*
-	 * Parameters to optimized_callback():
-	 * 1. optimized_kprobe structure in r3
-	 */
-	nop
-	nop
-	nop
-	nop
-	nop
-	/* 2. pt_regs pointer in r4 */
-	addi	r4,r1,STACK_FRAME_OVERHEAD
-
-	.global optprobe_template_call_handler
-optprobe_template_call_handler:
-	/* Branch to optimized_callback() */
-	nop
-
-	/*
-	 * Parameters for instruction emulation:
-	 * 1. Pass SP in register r3.
-	 */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-
-	.global optprobe_template_insn
-optprobe_template_insn:
-	/* 2, Pass instruction to be emulated in r4 */
-	nop
-	nop
-
-	.global optprobe_template_call_emulate
-optprobe_template_call_emulate:
-	/* Branch to emulate_step()  */
-	nop
-
-	/*
-	 * All done.
-	 * Now, restore the registers...
-	 */
-	ld	r5,_MSR(r1)
-	mtmsr	r5
-	ld	r5,_CTR(r1)
-	mtctr	r5
-	ld	r5,_LINK(r1)
-	mtlr	r5
-	ld	r5,_XER(r1)
-	mtxer	r5
-	ld	r5,_CCR(r1)
-	mtcr	r5
-	REST_GPR(0,r1)
-	REST_10GPRS(2,r1)
-	REST_10GPRS(12,r1)
-	REST_10GPRS(22,r1)
-	/* Restore the previous SP */
-	addi	r1,r1,INT_FRAME_SIZE
-
-	.global optprobe_template_ret
-optprobe_template_ret:
-	/* ... and jump back from trampoline */
-	nop
-
-	.global optprobe_template_end
-optprobe_template_end:
diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S
deleted file mode 100644
index f3bd0bbf2ae808748c14d77b8283993ed5d075ec..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/ppc_save_regs.S
+++ /dev/null
@@ -1,76 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 1996 Paul Mackerras.
- *
- * NOTE: assert(sizeof(buf) > 23 * sizeof(long))
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ptrace.h>
-#include <asm/asm-compat.h>
-
-/*
- * Grab the register values as they are now.
- * This won't do a particularly good job because we really
- * want our caller's caller's registers, and our caller has
- * already executed its prologue.
- * ToDo: We could reach back into the caller's save area to do
- * a better job of representing the caller's state (note that
- * that will be different for 32-bit and 64-bit, because of the
- * different ABIs, though).
- */
-_GLOBAL(ppc_save_regs)
-	PPC_STL	r0,0*SZL(r3)
-#ifdef CONFIG_PPC32
-	stmw	r2, 2*SZL(r3)
-#else
-	PPC_STL	r2,2*SZL(r3)
-	PPC_STL	r3,3*SZL(r3)
-	PPC_STL	r4,4*SZL(r3)
-	PPC_STL	r5,5*SZL(r3)
-	PPC_STL	r6,6*SZL(r3)
-	PPC_STL	r7,7*SZL(r3)
-	PPC_STL	r8,8*SZL(r3)
-	PPC_STL	r9,9*SZL(r3)
-	PPC_STL	r10,10*SZL(r3)
-	PPC_STL	r11,11*SZL(r3)
-	PPC_STL	r12,12*SZL(r3)
-	PPC_STL	r13,13*SZL(r3)
-	PPC_STL	r14,14*SZL(r3)
-	PPC_STL	r15,15*SZL(r3)
-	PPC_STL	r16,16*SZL(r3)
-	PPC_STL	r17,17*SZL(r3)
-	PPC_STL	r18,18*SZL(r3)
-	PPC_STL	r19,19*SZL(r3)
-	PPC_STL	r20,20*SZL(r3)
-	PPC_STL	r21,21*SZL(r3)
-	PPC_STL	r22,22*SZL(r3)
-	PPC_STL	r23,23*SZL(r3)
-	PPC_STL	r24,24*SZL(r3)
-	PPC_STL	r25,25*SZL(r3)
-	PPC_STL	r26,26*SZL(r3)
-	PPC_STL	r27,27*SZL(r3)
-	PPC_STL	r28,28*SZL(r3)
-	PPC_STL	r29,29*SZL(r3)
-	PPC_STL	r30,30*SZL(r3)
-	PPC_STL	r31,31*SZL(r3)
-#endif
-	/* go up one stack frame for SP */
-	PPC_LL	r4,0(r1)
-	PPC_STL	r4,1*SZL(r3)
-	/* get caller's LR */
-	PPC_LL	r0,LRSAVE(r4)
-	PPC_STL	r0,_NIP-STACK_FRAME_OVERHEAD(r3)
-	PPC_STL	r0,_LINK-STACK_FRAME_OVERHEAD(r3)
-	mfmsr	r0
-	PPC_STL	r0,_MSR-STACK_FRAME_OVERHEAD(r3)
-	mfctr	r0
-	PPC_STL	r0,_CTR-STACK_FRAME_OVERHEAD(r3)
-	mfxer	r0
-	PPC_STL	r0,_XER-STACK_FRAME_OVERHEAD(r3)
-	mfcr	r0
-	PPC_STL	r0,_CCR-STACK_FRAME_OVERHEAD(r3)
-	li	r0,0
-	PPC_STL	r0,_TRAP-STACK_FRAME_OVERHEAD(r3)
-	blr
diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S
deleted file mode 100644
index 10e96f3e22fe3f4092e0a82c7f47f5bbba31bc58..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/reloc_32.S
+++ /dev/null
@@ -1,205 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Code to process dynamic relocations for PPC32.
- *
- * Copyrights (C) IBM Corporation, 2011.
- *	Author: Suzuki Poulose <suzuki@in.ibm.com>
- *
- *  - Based on ppc64 code - reloc_64.S
- */
-
-#include <asm/ppc_asm.h>
-
-/* Dynamic section table entry tags */
-DT_RELA = 7			/* Tag for Elf32_Rela section */
-DT_RELASZ = 8			/* Size of the Rela relocs */
-DT_RELAENT = 9			/* Size of one Rela reloc entry */
-
-STN_UNDEF = 0			/* Undefined symbol index */
-STB_LOCAL = 0			/* Local binding for the symbol */
-
-R_PPC_ADDR16_LO = 4		/* Lower half of (S+A) */
-R_PPC_ADDR16_HI = 5		/* Upper half of (S+A) */
-R_PPC_ADDR16_HA = 6		/* High Adjusted (S+A) */
-R_PPC_RELATIVE = 22
-
-/*
- * r3 = desired final address
- */
-
-_GLOBAL(relocate)
-
-	mflr	r0		/* Save our LR */
-	bl	0f		/* Find our current runtime address */
-0:	mflr	r12		/* Make it accessible */
-	mtlr	r0
-
-	lwz	r11, (p_dyn - 0b)(r12)
-	add	r11, r11, r12	/* runtime address of .dynamic section */
-	lwz	r9, (p_rela - 0b)(r12)
-	add	r9, r9, r12	/* runtime address of .rela.dyn section */
-	lwz	r10, (p_st - 0b)(r12)
-	add	r10, r10, r12	/* runtime address of _stext section */
-	lwz	r13, (p_sym - 0b)(r12)
-	add	r13, r13, r12	/* runtime address of .dynsym section */
-
-	/*
-	 * Scan the dynamic section for RELA, RELASZ entries
-	 */
-	li	r6, 0
-	li	r7, 0
-	li	r8, 0
-1:	lwz	r5, 0(r11)	/* ELF_Dyn.d_tag */
-	cmpwi	r5, 0		/* End of ELF_Dyn[] */
-	beq	eodyn
-	cmpwi	r5, DT_RELA
-	bne	relasz
-	lwz	r7, 4(r11)	/* r7 = rela.link */
-	b	skip
-relasz:
-	cmpwi	r5, DT_RELASZ
-	bne	relaent
-	lwz	r8, 4(r11)	/* r8 = Total Rela relocs size */
-	b	skip
-relaent:
-	cmpwi	r5, DT_RELAENT
-	bne	skip
-	lwz	r6, 4(r11)	/* r6 = Size of one Rela reloc */
-skip:
-	addi	r11, r11, 8
-	b	1b
-eodyn:				/* End of Dyn Table scan */
-
-	/* Check if we have found all the entries */
-	cmpwi	r7, 0
-	beq	done
-	cmpwi	r8, 0
-	beq	done
-	cmpwi	r6, 0
-	beq	done
-
-
-	/*
-	 * Work out the current offset from the link time address of .rela
-	 * section.
-	 *  cur_offset[r7] = rela.run[r9] - rela.link [r7]
-	 *  _stext.link[r12] = _stext.run[r10] - cur_offset[r7]
-	 *  final_offset[r3] = _stext.final[r3] - _stext.link[r12]
-	 */
-	subf	r7, r7, r9	/* cur_offset */
-	subf	r12, r7, r10
-	subf	r3, r12, r3	/* final_offset */
-
-	subf	r8, r6, r8	/* relaz -= relaent */
-	/*
-	 * Scan through the .rela table and process each entry
-	 * r9	- points to the current .rela table entry
-	 * r13	- points to the symbol table
-	 */
-
-	/*
-	 * Check if we have a relocation based on symbol
-	 * r5 will hold the value of the symbol.
-	 */
-applyrela:
-	lwz	r4, 4(r9)		/* r4 = rela.r_info */
-	srwi	r5, r4, 8		/* ELF32_R_SYM(r_info) */
-	cmpwi	r5, STN_UNDEF	/* sym == STN_UNDEF ? */
-	beq	get_type	/* value = 0 */
-	/* Find the value of the symbol at index(r5) */
-	slwi	r5, r5, 4		/* r5 = r5 * sizeof(Elf32_Sym) */
-	add	r12, r13, r5	/* r12 = &__dyn_sym[Index] */
-
-	/*
-	 * GNU ld has a bug, where dynamic relocs based on
-	 * STB_LOCAL symbols, the value should be assumed
-	 * to be zero. - Alan Modra
-	 */
-	/* XXX: Do we need to check if we are using GNU ld ? */
-	lbz	r5, 12(r12)	/* r5 = dyn_sym[Index].st_info */
-	extrwi	r5, r5, 4, 24	/* r5 = ELF32_ST_BIND(r5) */
-	cmpwi	r5, STB_LOCAL	/* st_value = 0, ld bug */
-	beq	get_type	/* We have r5 = 0 */
-	lwz	r5, 4(r12)	/* r5 = __dyn_sym[Index].st_value */
-
-get_type:
-	/* Load the relocation type to r4 */
-	extrwi	r4, r4, 8, 24	/* r4 = ELF32_R_TYPE(r_info) = ((char*)r4)[3] */
-
-	/* R_PPC_RELATIVE */
-	cmpwi	r4, R_PPC_RELATIVE
-	bne	hi16
-	lwz	r4, 0(r9)	/* r_offset */
-	lwz	r0, 8(r9)	/* r_addend */
-	add	r0, r0, r3	/* final addend */
-	stwx	r0, r4, r7	/* memory[r4+r7]) = (u32)r0 */
-	b	nxtrela		/* continue */
-
-	/* R_PPC_ADDR16_HI */
-hi16:
-	cmpwi	r4, R_PPC_ADDR16_HI
-	bne	ha16
-	lwz	r4, 0(r9)	/* r_offset */
-	lwz	r0, 8(r9)	/* r_addend */
-	add	r0, r0, r3
-	add	r0, r0, r5	/* r0 = (S+A+Offset) */
-	extrwi	r0, r0, 16, 0	/* r0 = (r0 >> 16) */
-	b	store_half
-
-	/* R_PPC_ADDR16_HA */
-ha16:
-	cmpwi	r4, R_PPC_ADDR16_HA
-	bne	lo16
-	lwz	r4, 0(r9)	/* r_offset */
-	lwz	r0, 8(r9)	/* r_addend */
-	add	r0, r0, r3
-	add	r0, r0, r5	/* r0 = (S+A+Offset) */
-	extrwi	r5, r0, 1, 16	/* Extract bit 16 */
-	extrwi	r0, r0, 16, 0	/* r0 = (r0 >> 16) */
-	add	r0, r0, r5	/* Add it to r0 */
-	b	store_half
-
-	/* R_PPC_ADDR16_LO */
-lo16:
-	cmpwi	r4, R_PPC_ADDR16_LO
-	bne	unknown_type
-	lwz	r4, 0(r9)	/* r_offset */
-	lwz	r0, 8(r9)	/* r_addend */
-	add	r0, r0, r3
-	add	r0, r0, r5	/* r0 = (S+A+Offset) */
-	extrwi	r0, r0, 16, 16	/* r0 &= 0xffff */
-	/* Fall through to */
-
-	/* Store half word */
-store_half:
-	sthx	r0, r4, r7	/* memory[r4+r7] = (u16)r0 */
-
-nxtrela:
-	/*
-	 * We have to flush the modified instructions to the
-	 * main storage from the d-cache. And also, invalidate the
-	 * cached instructions in i-cache which has been modified.
-	 *
-	 * We delay the sync / isync operation till the end, since
-	 * we won't be executing the modified instructions until
-	 * we return from here.
-	 */
-	dcbst	r4,r7
-	sync			/* Ensure the data is flushed before icbi */
-	icbi	r4,r7
-unknown_type:
-	cmpwi	r8, 0		/* relasz = 0 ? */
-	ble	done
-	add	r9, r9, r6	/* move to next entry in the .rela table */
-	subf	r8, r6, r8	/* relasz -= relaent */
-	b	applyrela
-
-done:
-	sync			/* Wait for the flush to finish */
-	isync			/* Discard prefetched instructions */
-	blr
-
-p_dyn:		.long	__dynamic_start - 0b
-p_rela:		.long	__rela_dyn_start - 0b
-p_sym:		.long	__dynamic_symtab - 0b
-p_st:		.long	_stext - 0b
diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S
deleted file mode 100644
index 02d4719bf43a8137baedba2e2158838f4f2ad028..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/reloc_64.S
+++ /dev/null
@@ -1,84 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Code to process dynamic relocations in the kernel.
- *
- * Copyright 2008 Paul Mackerras, IBM Corp.
- */
-
-#include <asm/ppc_asm.h>
-
-RELA = 7
-RELACOUNT = 0x6ffffff9
-R_PPC64_RELATIVE = 22
-
-/*
- * r3 = desired final address of kernel
- */
-_GLOBAL(relocate)
-	mflr	r0
-	bcl	20,31,$+4
-0:	mflr	r12		/* r12 has runtime addr of label 0 */
-	mtlr	r0
-	ld	r11,(p_dyn - 0b)(r12)
-	add	r11,r11,r12	/* r11 has runtime addr of .dynamic section */
-	ld	r9,(p_rela - 0b)(r12)
-	add	r9,r9,r12	/* r9 has runtime addr of .rela.dyn section */
-	ld	r10,(p_st - 0b)(r12)
-	add	r10,r10,r12	/* r10 has runtime addr of _stext */
-
-	/*
-	 * Scan the dynamic section for the RELA and RELACOUNT entries.
-	 */
-	li	r7,0
-	li	r8,0
-1:	ld	r6,0(r11)	/* get tag */
-	cmpdi	r6,0
-	beq	4f		/* end of list */
-	cmpdi	r6,RELA
-	bne	2f
-	ld	r7,8(r11)	/* get RELA pointer in r7 */
-	b	3f
-2:	addis	r6,r6,(-RELACOUNT)@ha
-	cmpdi	r6,RELACOUNT@l
-	bne	3f
-	ld	r8,8(r11)	/* get RELACOUNT value in r8 */
-3:	addi	r11,r11,16
-	b	1b
-4:	cmpdi	r7,0		/* check we have both RELA and RELACOUNT */
-	cmpdi	cr1,r8,0
-	beq	6f
-	beq	cr1,6f
-
-	/*
-	 * Work out linktime address of _stext and hence the
-	 * relocation offset to be applied.
-	 * cur_offset [r7] = rela.run [r9] - rela.link [r7]
-	 * _stext.link [r10] = _stext.run [r10] - cur_offset [r7]
-	 * final_offset [r3] = _stext.final [r3] - _stext.link [r10]
-	 */
-	subf	r7,r7,r9	/* cur_offset */
-	subf	r10,r7,r10
-	subf	r3,r10,r3	/* final_offset */
-
-	/*
-	 * Run through the list of relocations and process the
-	 * R_PPC64_RELATIVE ones.
-	 */
-	mtctr	r8
-5:	ld	r0,8(9)		/* ELF64_R_TYPE(reloc->r_info) */
-	cmpdi	r0,R_PPC64_RELATIVE
-	bne	6f
-	ld	r6,0(r9)	/* reloc->r_offset */
-	ld	r0,16(r9)	/* reloc->r_addend */
-	add	r0,r0,r3
-	stdx	r0,r7,r6
-	addi	r9,r9,24
-	bdnz	5b
-
-6:	blr
-
-.balign 8
-p_dyn:	.8byte	__dynamic_start - 0b
-p_rela:	.8byte	__rela_dyn_start - 0b
-p_st:	.8byte	_stext - 0b
-
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
deleted file mode 100644
index cbdf86228eaaa7c0d64082979d265e332de42bbc..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/swsusp_32.S
+++ /dev/null
@@ -1,409 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/threads.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/mmu.h>
-#include <asm/feature-fixups.h>
-
-/*
- * Structure for storing CPU registers on the save area.
- */
-#define SL_SP		0
-#define SL_PC		4
-#define SL_MSR		8
-#define SL_SDR1		0xc
-#define SL_SPRG0	0x10	/* 4 sprg's */
-#define SL_DBAT0	0x20
-#define SL_IBAT0	0x28
-#define SL_DBAT1	0x30
-#define SL_IBAT1	0x38
-#define SL_DBAT2	0x40
-#define SL_IBAT2	0x48
-#define SL_DBAT3	0x50
-#define SL_IBAT3	0x58
-#define SL_DBAT4	0x60
-#define SL_IBAT4	0x68
-#define SL_DBAT5	0x70
-#define SL_IBAT5	0x78
-#define SL_DBAT6	0x80
-#define SL_IBAT6	0x88
-#define SL_DBAT7	0x90
-#define SL_IBAT7	0x98
-#define SL_TB		0xa0
-#define SL_R2		0xa8
-#define SL_CR		0xac
-#define SL_LR		0xb0
-#define SL_R12		0xb4	/* r12 to r31 */
-#define SL_SIZE		(SL_R12 + 80)
-
-	.section .data
-	.align	5
-
-_GLOBAL(swsusp_save_area)
-	.space	SL_SIZE
-
-
-	.section .text
-	.align	5
-
-_GLOBAL(swsusp_arch_suspend)
-
-	lis	r11,swsusp_save_area@h
-	ori	r11,r11,swsusp_save_area@l
-
-	mflr	r0
-	stw	r0,SL_LR(r11)
-	mfcr	r0
-	stw	r0,SL_CR(r11)
-	stw	r1,SL_SP(r11)
-	stw	r2,SL_R2(r11)
-	stmw	r12,SL_R12(r11)
-
-	/* Save MSR & SDR1 */
-	mfmsr	r4
-	stw	r4,SL_MSR(r11)
-	mfsdr1	r4
-	stw	r4,SL_SDR1(r11)
-
-	/* Get a stable timebase and save it */
-1:	mftbu	r4
-	stw	r4,SL_TB(r11)
-	mftb	r5
-	stw	r5,SL_TB+4(r11)
-	mftbu	r3
-	cmpw	r3,r4
-	bne	1b
-
-	/* Save SPRGs */
-	mfsprg	r4,0
-	stw	r4,SL_SPRG0(r11)
-	mfsprg	r4,1
-	stw	r4,SL_SPRG0+4(r11)
-	mfsprg	r4,2
-	stw	r4,SL_SPRG0+8(r11)
-	mfsprg	r4,3
-	stw	r4,SL_SPRG0+12(r11)
-
-	/* Save BATs */
-	mfdbatu	r4,0
-	stw	r4,SL_DBAT0(r11)
-	mfdbatl	r4,0
-	stw	r4,SL_DBAT0+4(r11)
-	mfdbatu	r4,1
-	stw	r4,SL_DBAT1(r11)
-	mfdbatl	r4,1
-	stw	r4,SL_DBAT1+4(r11)
-	mfdbatu	r4,2
-	stw	r4,SL_DBAT2(r11)
-	mfdbatl	r4,2
-	stw	r4,SL_DBAT2+4(r11)
-	mfdbatu	r4,3
-	stw	r4,SL_DBAT3(r11)
-	mfdbatl	r4,3
-	stw	r4,SL_DBAT3+4(r11)
-	mfibatu	r4,0
-	stw	r4,SL_IBAT0(r11)
-	mfibatl	r4,0
-	stw	r4,SL_IBAT0+4(r11)
-	mfibatu	r4,1
-	stw	r4,SL_IBAT1(r11)
-	mfibatl	r4,1
-	stw	r4,SL_IBAT1+4(r11)
-	mfibatu	r4,2
-	stw	r4,SL_IBAT2(r11)
-	mfibatl	r4,2
-	stw	r4,SL_IBAT2+4(r11)
-	mfibatu	r4,3
-	stw	r4,SL_IBAT3(r11)
-	mfibatl	r4,3
-	stw	r4,SL_IBAT3+4(r11)
-
-BEGIN_MMU_FTR_SECTION
-	mfspr	r4,SPRN_DBAT4U
-	stw	r4,SL_DBAT4(r11)
-	mfspr	r4,SPRN_DBAT4L
-	stw	r4,SL_DBAT4+4(r11)
-	mfspr	r4,SPRN_DBAT5U
-	stw	r4,SL_DBAT5(r11)
-	mfspr	r4,SPRN_DBAT5L
-	stw	r4,SL_DBAT5+4(r11)
-	mfspr	r4,SPRN_DBAT6U
-	stw	r4,SL_DBAT6(r11)
-	mfspr	r4,SPRN_DBAT6L
-	stw	r4,SL_DBAT6+4(r11)
-	mfspr	r4,SPRN_DBAT7U
-	stw	r4,SL_DBAT7(r11)
-	mfspr	r4,SPRN_DBAT7L
-	stw	r4,SL_DBAT7+4(r11)
-	mfspr	r4,SPRN_IBAT4U
-	stw	r4,SL_IBAT4(r11)
-	mfspr	r4,SPRN_IBAT4L
-	stw	r4,SL_IBAT4+4(r11)
-	mfspr	r4,SPRN_IBAT5U
-	stw	r4,SL_IBAT5(r11)
-	mfspr	r4,SPRN_IBAT5L
-	stw	r4,SL_IBAT5+4(r11)
-	mfspr	r4,SPRN_IBAT6U
-	stw	r4,SL_IBAT6(r11)
-	mfspr	r4,SPRN_IBAT6L
-	stw	r4,SL_IBAT6+4(r11)
-	mfspr	r4,SPRN_IBAT7U
-	stw	r4,SL_IBAT7(r11)
-	mfspr	r4,SPRN_IBAT7L
-	stw	r4,SL_IBAT7+4(r11)
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
-
-#if  0
-	/* Backup various CPU config stuffs */
-	bl	__save_cpu_setup
-#endif
-	/* Call the low level suspend stuff (we should probably have made
-	 * a stackframe...
-	 */
-	bl	swsusp_save
-
-	/* Restore LR from the save area */
-	lis	r11,swsusp_save_area@h
-	ori	r11,r11,swsusp_save_area@l
-	lwz	r0,SL_LR(r11)
-	mtlr	r0
-
-	blr
-
-
-/* Resume code */
-_GLOBAL(swsusp_arch_resume)
-
-#ifdef CONFIG_ALTIVEC
-	/* Stop pending alitvec streams and memory accesses */
-BEGIN_FTR_SECTION
-	DSSALL
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif
- 	sync
-
-	/* Disable MSR:DR to make sure we don't take a TLB or
-	 * hash miss during the copy, as our hash table will
-	 * for a while be unusable. For .text, we assume we are
-	 * covered by a BAT. This works only for non-G5 at this
-	 * point. G5 will need a better approach, possibly using
-	 * a small temporary hash table filled with large mappings,
-	 * disabling the MMU completely isn't a good option for
-	 * performance reasons.
-	 * (Note that 750's may have the same performance issue as
-	 * the G5 in this case, we should investigate using moving
-	 * BATs for these CPUs)
-	 */
-	mfmsr	r0
-	sync
-	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
-	mtmsr	r0
-	sync
-	isync
-
-	/* Load ptr the list of pages to copy in r3 */
-	lis	r11,(restore_pblist - KERNELBASE)@h
-	ori	r11,r11,restore_pblist@l
-	lwz	r10,0(r11)
-
-	/* Copy the pages. This is a very basic implementation, to
-	 * be replaced by something more cache efficient */
-1:
-	tophys(r3,r10)
-	li	r0,256
-	mtctr	r0
-	lwz	r11,pbe_address(r3)	/* source */
-	tophys(r5,r11)
-	lwz	r10,pbe_orig_address(r3)	/* destination */
-	tophys(r6,r10)
-2:
-	lwz	r8,0(r5)
-	lwz	r9,4(r5)
-	lwz	r10,8(r5)
-	lwz	r11,12(r5)
-	addi	r5,r5,16
-	stw	r8,0(r6)
-	stw	r9,4(r6)
-	stw	r10,8(r6)
-	stw	r11,12(r6)
-	addi	r6,r6,16
-	bdnz	2b
-	lwz		r10,pbe_next(r3)
-	cmpwi	0,r10,0
-	bne	1b
-
-	/* Do a very simple cache flush/inval of the L1 to ensure
-	 * coherency of the icache
-	 */
-	lis	r3,0x0002
-	mtctr	r3
-	li	r3, 0
-1:
-	lwz	r0,0(r3)
-	addi	r3,r3,0x0020
-	bdnz	1b
-	isync
-	sync
-
-	/* Now flush those cache lines */
-	lis	r3,0x0002
-	mtctr	r3
-	li	r3, 0
-1:
-	dcbf	0,r3
-	addi	r3,r3,0x0020
-	bdnz	1b
-	sync
-
-	/* Ok, we are now running with the kernel data of the old
-	 * kernel fully restored. We can get to the save area
-	 * easily now. As for the rest of the code, it assumes the
-	 * loader kernel and the booted one are exactly identical
-	 */
-	lis	r11,swsusp_save_area@h
-	ori	r11,r11,swsusp_save_area@l
-	tophys(r11,r11)
-
-#if 0
-	/* Restore various CPU config stuffs */
-	bl	__restore_cpu_setup
-#endif
-	/* Restore the BATs, and SDR1.  Then we can turn on the MMU.
-	 * This is a bit hairy as we are running out of those BATs,
-	 * but first, our code is probably in the icache, and we are
-	 * writing the same value to the BAT, so that should be fine,
-	 * though a better solution will have to be found long-term
-	 */
-	lwz	r4,SL_SDR1(r11)
-	mtsdr1	r4
-	lwz	r4,SL_SPRG0(r11)
-	mtsprg	0,r4
-	lwz	r4,SL_SPRG0+4(r11)
-	mtsprg	1,r4
-	lwz	r4,SL_SPRG0+8(r11)
-	mtsprg	2,r4
-	lwz	r4,SL_SPRG0+12(r11)
-	mtsprg	3,r4
-
-#if 0
-	lwz	r4,SL_DBAT0(r11)
-	mtdbatu	0,r4
-	lwz	r4,SL_DBAT0+4(r11)
-	mtdbatl	0,r4
-	lwz	r4,SL_DBAT1(r11)
-	mtdbatu	1,r4
-	lwz	r4,SL_DBAT1+4(r11)
-	mtdbatl	1,r4
-	lwz	r4,SL_DBAT2(r11)
-	mtdbatu	2,r4
-	lwz	r4,SL_DBAT2+4(r11)
-	mtdbatl	2,r4
-	lwz	r4,SL_DBAT3(r11)
-	mtdbatu	3,r4
-	lwz	r4,SL_DBAT3+4(r11)
-	mtdbatl	3,r4
-	lwz	r4,SL_IBAT0(r11)
-	mtibatu	0,r4
-	lwz	r4,SL_IBAT0+4(r11)
-	mtibatl	0,r4
-	lwz	r4,SL_IBAT1(r11)
-	mtibatu	1,r4
-	lwz	r4,SL_IBAT1+4(r11)
-	mtibatl	1,r4
-	lwz	r4,SL_IBAT2(r11)
-	mtibatu	2,r4
-	lwz	r4,SL_IBAT2+4(r11)
-	mtibatl	2,r4
-	lwz	r4,SL_IBAT3(r11)
-	mtibatu	3,r4
-	lwz	r4,SL_IBAT3+4(r11)
-	mtibatl	3,r4
-BEGIN_MMU_FTR_SECTION
-	lwz	r4,SL_DBAT4(r11)
-	mtspr	SPRN_DBAT4U,r4
-	lwz	r4,SL_DBAT4+4(r11)
-	mtspr	SPRN_DBAT4L,r4
-	lwz	r4,SL_DBAT5(r11)
-	mtspr	SPRN_DBAT5U,r4
-	lwz	r4,SL_DBAT5+4(r11)
-	mtspr	SPRN_DBAT5L,r4
-	lwz	r4,SL_DBAT6(r11)
-	mtspr	SPRN_DBAT6U,r4
-	lwz	r4,SL_DBAT6+4(r11)
-	mtspr	SPRN_DBAT6L,r4
-	lwz	r4,SL_DBAT7(r11)
-	mtspr	SPRN_DBAT7U,r4
-	lwz	r4,SL_DBAT7+4(r11)
-	mtspr	SPRN_DBAT7L,r4
-	lwz	r4,SL_IBAT4(r11)
-	mtspr	SPRN_IBAT4U,r4
-	lwz	r4,SL_IBAT4+4(r11)
-	mtspr	SPRN_IBAT4L,r4
-	lwz	r4,SL_IBAT5(r11)
-	mtspr	SPRN_IBAT5U,r4
-	lwz	r4,SL_IBAT5+4(r11)
-	mtspr	SPRN_IBAT5L,r4
-	lwz	r4,SL_IBAT6(r11)
-	mtspr	SPRN_IBAT6U,r4
-	lwz	r4,SL_IBAT6+4(r11)
-	mtspr	SPRN_IBAT6L,r4
-	lwz	r4,SL_IBAT7(r11)
-	mtspr	SPRN_IBAT7U,r4
-	lwz	r4,SL_IBAT7+4(r11)
-	mtspr	SPRN_IBAT7L,r4
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
-#endif
-
-	/* Flush all TLBs */
-	lis	r4,0x1000
-1:	addic.	r4,r4,-0x1000
-	tlbie	r4
-	bgt	1b
-	sync
-
-	/* restore the MSR and turn on the MMU */
-	lwz	r3,SL_MSR(r11)
-	bl	turn_on_mmu
-	tovirt(r11,r11)
-
-	/* Restore TB */
-	li	r3,0
-	mttbl	r3
-	lwz	r3,SL_TB(r11)
-	lwz	r4,SL_TB+4(r11)
-	mttbu	r3
-	mttbl	r4
-
-	/* Kick decrementer */
-	li	r0,1
-	mtdec	r0
-
-	/* Restore the callee-saved registers and return */
-	lwz	r0,SL_CR(r11)
-	mtcr	r0
-	lwz	r2,SL_R2(r11)
-	lmw	r12,SL_R12(r11)
-	lwz	r1,SL_SP(r11)
-	lwz	r0,SL_LR(r11)
-	mtlr	r0
-
-	// XXX Note: we don't really need to call swsusp_resume
-
-	li	r3,0
-	blr
-
-/* FIXME:This construct is actually not useful since we don't shut
- * down the instruction MMU, we could just flip back MSR-DR on.
- */
-turn_on_mmu:
-	mflr	r4
-	mtsrr0	r4
-	mtsrr1	r3
-	sync
-	isync
-	rfi
-
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
deleted file mode 100644
index 6d3189830dd3230de6c4e08b587dd8244022497e..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ /dev/null
@@ -1,273 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * PowerPC 64-bit swsusp implementation
- *
- * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
- */
-
-#include <linux/threads.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/feature-fixups.h>
-
-/*
- * Structure for storing CPU registers on the save area.
- */
-#define SL_r1		0x00	/* stack pointer */
-#define SL_PC		0x08
-#define SL_MSR		0x10
-#define SL_SDR1		0x18
-#define SL_XER		0x20
-#define SL_TB		0x40
-#define SL_r2		0x48
-#define SL_CR		0x50
-#define SL_LR		0x58
-#define SL_r12		0x60
-#define SL_r13		0x68
-#define SL_r14		0x70
-#define SL_r15		0x78
-#define SL_r16		0x80
-#define SL_r17		0x88
-#define SL_r18		0x90
-#define SL_r19		0x98
-#define SL_r20		0xa0
-#define SL_r21		0xa8
-#define SL_r22		0xb0
-#define SL_r23		0xb8
-#define SL_r24		0xc0
-#define SL_r25		0xc8
-#define SL_r26		0xd0
-#define SL_r27		0xd8
-#define SL_r28		0xe0
-#define SL_r29		0xe8
-#define SL_r30		0xf0
-#define SL_r31		0xf8
-#define SL_SPRG1	0x100
-#define SL_TCR		0x108
-#define SL_SIZE		SL_TCR+8
-
-/* these macros rely on the save area being
- * pointed to by r11 */
-
-#define SAVE_SPR(register)		\
-	mfspr	r0, SPRN_##register	;\
-	std	r0, SL_##register(r11)
-#define RESTORE_SPR(register)		\
-	ld	r0, SL_##register(r11)	;\
-	mtspr	SPRN_##register, r0
-#define SAVE_SPECIAL(special)		\
-	mf##special	r0		;\
-	std	r0, SL_##special(r11)
-#define RESTORE_SPECIAL(special)	\
-	ld	r0, SL_##special(r11)	;\
-	mt##special	r0
-#define SAVE_REGISTER(reg)		\
-	std	reg, SL_##reg(r11)
-#define RESTORE_REGISTER(reg)		\
-	ld	reg, SL_##reg(r11)
-
-/* space for storing cpu state */
-	.section .data
-	.align  5
-swsusp_save_area:
-	.space SL_SIZE
-
-	.section ".toc","aw"
-swsusp_save_area_ptr:
-	.tc	swsusp_save_area[TC],swsusp_save_area
-restore_pblist_ptr:
-	.tc	restore_pblist[TC],restore_pblist
-
-	.section .text
-	.align  5
-_GLOBAL(swsusp_arch_suspend)
-	ld	r11,swsusp_save_area_ptr@toc(r2)
-	SAVE_SPECIAL(LR)
-	SAVE_REGISTER(r1)
-	SAVE_SPECIAL(CR)
-	SAVE_SPECIAL(TB)
-	SAVE_REGISTER(r2)
-	SAVE_REGISTER(r12)
-	SAVE_REGISTER(r13)
-	SAVE_REGISTER(r14)
-	SAVE_REGISTER(r15)
-	SAVE_REGISTER(r16)
-	SAVE_REGISTER(r17)
-	SAVE_REGISTER(r18)
-	SAVE_REGISTER(r19)
-	SAVE_REGISTER(r20)
-	SAVE_REGISTER(r21)
-	SAVE_REGISTER(r22)
-	SAVE_REGISTER(r23)
-	SAVE_REGISTER(r24)
-	SAVE_REGISTER(r25)
-	SAVE_REGISTER(r26)
-	SAVE_REGISTER(r27)
-	SAVE_REGISTER(r28)
-	SAVE_REGISTER(r29)
-	SAVE_REGISTER(r30)
-	SAVE_REGISTER(r31)
-	SAVE_SPECIAL(MSR)
-	SAVE_SPECIAL(XER)
-#ifdef CONFIG_PPC_BOOK3S_64
-BEGIN_FW_FTR_SECTION
-	SAVE_SPECIAL(SDR1)
-END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
-#else
-	SAVE_SPR(TCR)
-
-	/* Save SPRG1, SPRG1 be used save paca */
-	SAVE_SPR(SPRG1)
-#endif
-
-	/* we push the stack up 128 bytes but don't store the
-	 * stack pointer on the stack like a real stackframe */
-	addi	r1,r1,-128
-
-	bl _iommu_save
-	bl swsusp_save
-
-	/* restore LR */
-	ld	r11,swsusp_save_area_ptr@toc(r2)
-	RESTORE_SPECIAL(LR)
-	addi	r1,r1,128
-
-	blr
-
-/* Resume code */
-_GLOBAL(swsusp_arch_resume)
-	/* Stop pending alitvec streams and memory accesses */
-BEGIN_FTR_SECTION
-	DSSALL
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-	sync
-
-	ld	r12,restore_pblist_ptr@toc(r2)
-	ld	r12,0(r12)
-
-	cmpdi	r12,0
-	beq-	nothing_to_copy
-	li	r15,PAGE_SIZE>>3
-copyloop:
-	ld	r13,pbe_address(r12)
-	ld	r14,pbe_orig_address(r12)
-
-	mtctr	r15
-	li	r10,0
-copy_page_loop:
-	ldx	r0,r10,r13
-	stdx	r0,r10,r14
-	addi	r10,r10,8
-	bdnz copy_page_loop
-
-	ld	r12,pbe_next(r12)
-	cmpdi	r12,0
-	bne+	copyloop
-nothing_to_copy:
-
-#ifdef CONFIG_PPC_BOOK3S_64
-	/* flush caches */
-	lis	r3, 0x10
-	mtctr	r3
-	li	r3, 0
-	ori	r3, r3, CONFIG_KERNEL_START>>48
-	li	r0, 48
-	sld	r3, r3, r0
-	li	r0, 0
-1:
-	dcbf	0,r3
-	addi	r3,r3,0x20
-	bdnz	1b
-
-	sync
-
-	tlbia
-#endif
-
-	ld	r11,swsusp_save_area_ptr@toc(r2)
-
-	RESTORE_SPECIAL(CR)
-
-	/* restore timebase */
-	/* load saved tb */
-	ld	r1, SL_TB(r11)
-	/* get upper 32 bits of it */
-	srdi	r2, r1, 32
-	/* clear tb lower to avoid wrap */
-	li	r0, 0
-	mttbl	r0
-	/* set tb upper */
-	mttbu	r2
-	/* set tb lower */
-	mttbl	r1
-
-	/* restore registers */
-	RESTORE_REGISTER(r1)
-	RESTORE_REGISTER(r2)
-	RESTORE_REGISTER(r12)
-	RESTORE_REGISTER(r13)
-	RESTORE_REGISTER(r14)
-	RESTORE_REGISTER(r15)
-	RESTORE_REGISTER(r16)
-	RESTORE_REGISTER(r17)
-	RESTORE_REGISTER(r18)
-	RESTORE_REGISTER(r19)
-	RESTORE_REGISTER(r20)
-	RESTORE_REGISTER(r21)
-	RESTORE_REGISTER(r22)
-	RESTORE_REGISTER(r23)
-	RESTORE_REGISTER(r24)
-	RESTORE_REGISTER(r25)
-	RESTORE_REGISTER(r26)
-	RESTORE_REGISTER(r27)
-	RESTORE_REGISTER(r28)
-	RESTORE_REGISTER(r29)
-	RESTORE_REGISTER(r30)
-	RESTORE_REGISTER(r31)
-
-#ifdef CONFIG_PPC_BOOK3S_64
-	/* can't use RESTORE_SPECIAL(MSR) */
-	ld	r0, SL_MSR(r11)
-	mtmsrd	r0, 0
-BEGIN_FW_FTR_SECTION
-	RESTORE_SPECIAL(SDR1)
-END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
-#else
-	/* Restore SPRG1, be used to save paca */
-	ld	r0, SL_SPRG1(r11)
-	mtsprg	1, r0
-
-	RESTORE_SPECIAL(MSR)
-
-	/* Restore TCR and clear any pending bits in TSR. */
-	RESTORE_SPR(TCR)
-	lis	r0, (TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS)@h
-	mtspr	SPRN_TSR, r0
-
-	/* Kick decrementer */
-	li	r0, 1
-	mtdec	r0
-
-	/* Invalidate all tlbs */
-	bl	_tlbil_all
-#endif
-	RESTORE_SPECIAL(XER)
-
-	sync
-
-	addi	r1,r1,-128
-#ifdef CONFIG_PPC_BOOK3S_64
-	bl	slb_flush_and_restore_bolted
-#endif
-	bl	do_after_copyback
-	addi	r1,r1,128
-
-	ld	r11,swsusp_save_area_ptr@toc(r2)
-	RESTORE_SPECIAL(LR)
-
-	li	r3, 0
-	blr
diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_booke.S
deleted file mode 100644
index 88cfdbd530f163bb64b81747eaf41972291912e1..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/swsusp_booke.S
+++ /dev/null
@@ -1,202 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Based on swsusp_32.S, modified for FSL BookE by
- * Anton Vorontsov <avorontsov@ru.mvista.com>
- * Copyright (c) 2009-2010 MontaVista Software, LLC.
- */
-
-#include <linux/threads.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/mmu.h>
-
-/*
- * Structure for storing CPU registers on the save area.
- */
-#define SL_SP		0
-#define SL_PC		4
-#define SL_MSR		8
-#define SL_TCR		0xc
-#define SL_SPRG0	0x10
-#define SL_SPRG1	0x14
-#define SL_SPRG2	0x18
-#define SL_SPRG3	0x1c
-#define SL_SPRG4	0x20
-#define SL_SPRG5	0x24
-#define SL_SPRG6	0x28
-#define SL_SPRG7	0x2c
-#define SL_TBU		0x30
-#define SL_TBL		0x34
-#define SL_R2		0x38
-#define SL_CR		0x3c
-#define SL_LR		0x40
-#define SL_R12		0x44	/* r12 to r31 */
-#define SL_SIZE		(SL_R12 + 80)
-
-	.section .data
-	.align	5
-
-_GLOBAL(swsusp_save_area)
-	.space	SL_SIZE
-
-
-	.section .text
-	.align	5
-
-_GLOBAL(swsusp_arch_suspend)
-	lis	r11,swsusp_save_area@h
-	ori	r11,r11,swsusp_save_area@l
-
-	mflr	r0
-	stw	r0,SL_LR(r11)
-	mfcr	r0
-	stw	r0,SL_CR(r11)
-	stw	r1,SL_SP(r11)
-	stw	r2,SL_R2(r11)
-	stmw	r12,SL_R12(r11)
-
-	/* Save MSR & TCR */
-	mfmsr	r4
-	stw	r4,SL_MSR(r11)
-	mfspr	r4,SPRN_TCR
-	stw	r4,SL_TCR(r11)
-
-	/* Get a stable timebase and save it */
-1:	mfspr	r4,SPRN_TBRU
-	stw	r4,SL_TBU(r11)
-	mfspr	r5,SPRN_TBRL
-	stw	r5,SL_TBL(r11)
-	mfspr	r3,SPRN_TBRU
-	cmpw	r3,r4
-	bne	1b
-
-	/* Save SPRGs */
-	mfspr	r4,SPRN_SPRG0
-	stw	r4,SL_SPRG0(r11)
-	mfspr	r4,SPRN_SPRG1
-	stw	r4,SL_SPRG1(r11)
-	mfspr	r4,SPRN_SPRG2
-	stw	r4,SL_SPRG2(r11)
-	mfspr	r4,SPRN_SPRG3
-	stw	r4,SL_SPRG3(r11)
-	mfspr	r4,SPRN_SPRG4
-	stw	r4,SL_SPRG4(r11)
-	mfspr	r4,SPRN_SPRG5
-	stw	r4,SL_SPRG5(r11)
-	mfspr	r4,SPRN_SPRG6
-	stw	r4,SL_SPRG6(r11)
-	mfspr	r4,SPRN_SPRG7
-	stw	r4,SL_SPRG7(r11)
-
-	/* Call the low level suspend stuff (we should probably have made
-	 * a stackframe...
-	 */
-	bl	swsusp_save
-
-	/* Restore LR from the save area */
-	lis	r11,swsusp_save_area@h
-	ori	r11,r11,swsusp_save_area@l
-	lwz	r0,SL_LR(r11)
-	mtlr	r0
-
-	blr
-
-_GLOBAL(swsusp_arch_resume)
-	sync
-
-	/* Load ptr the list of pages to copy in r3 */
-	lis	r11,(restore_pblist)@h
-	ori	r11,r11,restore_pblist@l
-	lwz	r3,0(r11)
-
-	/* Copy the pages. This is a very basic implementation, to
-	 * be replaced by something more cache efficient */
-1:
-	li	r0,256
-	mtctr	r0
-	lwz	r5,pbe_address(r3)	/* source */
-	lwz	r6,pbe_orig_address(r3)	/* destination */
-2:
-	lwz	r8,0(r5)
-	lwz	r9,4(r5)
-	lwz	r10,8(r5)
-	lwz	r11,12(r5)
-	addi	r5,r5,16
-	stw	r8,0(r6)
-	stw	r9,4(r6)
-	stw	r10,8(r6)
-	stw	r11,12(r6)
-	addi	r6,r6,16
-	bdnz	2b
-	lwz	r3,pbe_next(r3)
-	cmpwi	0,r3,0
-	bne	1b
-
-	bl flush_dcache_L1
-	bl flush_instruction_cache
-
-	lis	r11,swsusp_save_area@h
-	ori	r11,r11,swsusp_save_area@l
-
-	/*
-	 * Mappings from virtual addresses to physical addresses may be
-	 * different than they were prior to restoring hibernation state. 
-	 * Invalidate the TLB so that the boot CPU is using the new
-	 * mappings.
-	 */
-	bl	_tlbil_all
-
-	lwz	r4,SL_SPRG0(r11)
-	mtspr	SPRN_SPRG0,r4
-	lwz	r4,SL_SPRG1(r11)
-	mtspr	SPRN_SPRG1,r4
-	lwz	r4,SL_SPRG2(r11)
-	mtspr	SPRN_SPRG2,r4
-	lwz	r4,SL_SPRG3(r11)
-	mtspr	SPRN_SPRG3,r4
-	lwz	r4,SL_SPRG4(r11)
-	mtspr	SPRN_SPRG4,r4
-	lwz	r4,SL_SPRG5(r11)
-	mtspr	SPRN_SPRG5,r4
-	lwz	r4,SL_SPRG6(r11)
-	mtspr	SPRN_SPRG6,r4
-	lwz	r4,SL_SPRG7(r11)
-	mtspr	SPRN_SPRG7,r4
-
-	/* restore the MSR */
-	lwz	r3,SL_MSR(r11)
-	mtmsr	r3
-
-	/* Restore TB */
-	li	r3,0
-	mtspr	SPRN_TBWL,r3
-	lwz	r3,SL_TBU(r11)
-	lwz	r4,SL_TBL(r11)
-	mtspr	SPRN_TBWU,r3
-	mtspr	SPRN_TBWL,r4
-
-	/* Restore TCR and clear any pending bits in TSR. */
-	lwz	r4,SL_TCR(r11)
-	mtspr	SPRN_TCR,r4
-	lis	r4, (TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS)@h
-	mtspr	SPRN_TSR,r4
-
-	/* Kick decrementer */
-	li	r0,1
-	mtdec	r0
-
-	/* Restore the callee-saved registers and return */
-	lwz	r0,SL_CR(r11)
-	mtcr	r0
-	lwz	r2,SL_R2(r11)
-	lmw	r12,SL_R12(r11)
-	lwz	r1,SL_SP(r11)
-	lwz	r0,SL_LR(r11)
-	mtlr	r0
-
-	li	r3,0
-	blr
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
deleted file mode 100644
index 5b905a2f4e4dfc0d803cc9796a4a08007f3200fb..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/systbl.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains the table of syscall-handling functions.
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
- * and Paul Mackerras.
- *
- * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
- * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) 
- */
-
-#include <asm/ppc_asm.h>
-
-.section .rodata,"a"
-
-#ifdef CONFIG_PPC64
-	.p2align	3
-#endif
-
-.globl sys_call_table
-sys_call_table:
-#ifdef CONFIG_PPC64
-#define __SYSCALL(nr, entry)	.8byte DOTSYM(entry)
-#include <asm/syscall_table_64.h>
-#undef __SYSCALL
-#else
-#define __SYSCALL(nr, entry)	.long entry
-#include <asm/syscall_table_32.h>
-#undef __SYSCALL
-#endif
-
-#ifdef CONFIG_COMPAT
-.globl compat_sys_call_table
-compat_sys_call_table:
-#define compat_sys_sigsuspend	sys_sigsuspend
-#define __SYSCALL(nr, entry)	.8byte DOTSYM(entry)
-#include <asm/syscall_table_c32.h>
-#undef __SYSCALL
-#endif
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
deleted file mode 100644
index 6ba0fdd1e7f81531d26d46c98b734afc1e89b776..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/tm.S
+++ /dev/null
@@ -1,527 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Transactional memory support routines to reclaim and recheckpoint
- * transactional process state.
- *
- * Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation.
- */
-
-#include <asm/asm-offsets.h>
-#include <asm/ppc_asm.h>
-#include <asm/ppc-opcode.h>
-#include <asm/ptrace.h>
-#include <asm/reg.h>
-#include <asm/bug.h>
-#include <asm/export.h>
-#include <asm/feature-fixups.h>
-
-#ifdef CONFIG_VSX
-/* See fpu.S, this is borrowed from there */
-#define __SAVE_32FPRS_VSRS(n,c,base)		\
-BEGIN_FTR_SECTION				\
-	b	2f;				\
-END_FTR_SECTION_IFSET(CPU_FTR_VSX);		\
-	SAVE_32FPRS(n,base);			\
-	b	3f;				\
-2:	SAVE_32VSRS(n,c,base);			\
-3:
-#define __REST_32FPRS_VSRS(n,c,base)		\
-BEGIN_FTR_SECTION				\
-	b	2f;				\
-END_FTR_SECTION_IFSET(CPU_FTR_VSX);		\
-	REST_32FPRS(n,base);			\
-	b	3f;				\
-2:	REST_32VSRS(n,c,base);			\
-3:
-#else
-#define __SAVE_32FPRS_VSRS(n,c,base)	SAVE_32FPRS(n, base)
-#define __REST_32FPRS_VSRS(n,c,base)	REST_32FPRS(n, base)
-#endif
-#define SAVE_32FPRS_VSRS(n,c,base) \
-	__SAVE_32FPRS_VSRS(n,__REG_##c,__REG_##base)
-#define REST_32FPRS_VSRS(n,c,base) \
-	__REST_32FPRS_VSRS(n,__REG_##c,__REG_##base)
-
-/* Stack frame offsets for local variables. */
-#define TM_FRAME_L0	TM_FRAME_SIZE-16
-#define TM_FRAME_L1	TM_FRAME_SIZE-8
-
-
-/* In order to access the TM SPRs, TM must be enabled.  So, do so: */
-_GLOBAL(tm_enable)
-	mfmsr	r4
-	li	r3, MSR_TM >> 32
-	sldi	r3, r3, 32
-	and.	r0, r4, r3
-	bne	1f
-	or	r4, r4, r3
-	mtmsrd	r4
-1:	blr
-EXPORT_SYMBOL_GPL(tm_enable);
-
-_GLOBAL(tm_disable)
-	mfmsr	r4
-	li	r3, MSR_TM >> 32
-	sldi	r3, r3, 32
-	andc	r4, r4, r3
-	mtmsrd	r4
-	blr
-EXPORT_SYMBOL_GPL(tm_disable);
-
-_GLOBAL(tm_save_sprs)
-	mfspr	r0, SPRN_TFHAR
-	std	r0, THREAD_TM_TFHAR(r3)
-	mfspr	r0, SPRN_TEXASR
-	std	r0, THREAD_TM_TEXASR(r3)
-	mfspr	r0, SPRN_TFIAR
-	std	r0, THREAD_TM_TFIAR(r3)
-	blr
-
-_GLOBAL(tm_restore_sprs)
-	ld	r0, THREAD_TM_TFHAR(r3)
-	mtspr	SPRN_TFHAR, r0
-	ld	r0, THREAD_TM_TEXASR(r3)
-	mtspr	SPRN_TEXASR, r0
-	ld	r0, THREAD_TM_TFIAR(r3)
-	mtspr	SPRN_TFIAR, r0
-	blr
-
-	/* Passed an 8-bit failure cause as first argument. */
-_GLOBAL(tm_abort)
-	TABORT(R3)
-	blr
-EXPORT_SYMBOL_GPL(tm_abort);
-
-/*
- * void tm_reclaim(struct thread_struct *thread,
- *		   uint8_t cause)
- *
- *	- Performs a full reclaim.  This destroys outstanding
- *	  transactions and updates thread.ckpt_regs, thread.ckfp_state and
- *	  thread.ckvr_state with the original checkpointed state.  Note that
- *	  thread->regs is unchanged.
- *
- * Purpose is to both abort transactions of, and preserve the state of,
- * a transactions at a context switch. We preserve/restore both sets of process
- * state to restore them when the thread's scheduled again.  We continue in
- * userland as though nothing happened, but when the transaction is resumed
- * they will abort back to the checkpointed state we save out here.
- *
- * Call with IRQs off, stacks get all out of sync for some periods in here!
- */
-_GLOBAL(tm_reclaim)
-	mfcr	r5
-	mflr	r0
-	stw	r5, 8(r1)
-	std	r0, 16(r1)
-	std	r2, STK_GOT(r1)
-	stdu	r1, -TM_FRAME_SIZE(r1)
-
-	/* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */
-
-	std	r3, STK_PARAM(R3)(r1)
-	SAVE_NVGPRS(r1)
-
-	/* We need to setup MSR for VSX register save instructions. */
-	mfmsr	r14
-	mr	r15, r14
-	ori	r15, r15, MSR_FP
-	li	r16, 0
-	ori	r16, r16, MSR_EE /* IRQs hard off */
-	andc	r15, r15, r16
-	oris	r15, r15, MSR_VEC@h
-#ifdef CONFIG_VSX
-	BEGIN_FTR_SECTION
-	oris	r15,r15, MSR_VSX@h
-	END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
-	mtmsrd	r15
-	std	r14, TM_FRAME_L0(r1)
-
-	/* Do sanity check on MSR to make sure we are suspended */
-	li	r7, (MSR_TS_S)@higher
-	srdi	r6, r14, 32
-	and	r6, r6, r7
-1:	tdeqi   r6, 0
-	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
-
-	/* Stash the stack pointer away for use after reclaim */
-	std	r1, PACAR1(r13)
-
-	/* Clear MSR RI since we are about to use SCRATCH0, EE is already off */
-	li	r5, 0
-	mtmsrd	r5, 1
-
-	/*
-	 * BE CAREFUL HERE:
-	 * At this point we can't take an SLB miss since we have MSR_RI
-	 * off. Load only to/from the stack/paca which are in SLB bolted regions
-	 * until we turn MSR RI back on.
-	 *
-	 * The moment we treclaim, ALL of our GPRs will switch
-	 * to user register state.  (FPRs, CCR etc. also!)
-	 * Use an sprg and a tm_scratch in the PACA to shuffle.
-	 */
-	TRECLAIM(R4)				/* Cause in r4 */
-
-	/*
-	 * ******************** GPRs ********************
-	 * Stash the checkpointed r13 in the scratch SPR and get the real paca.
-	 */
-	SET_SCRATCH0(r13)
-	GET_PACA(r13)
-
-	/*
-	 * Stash the checkpointed r1 away in paca->tm_scratch and get the real
-	 * stack pointer back into r1.
-	 */
-	std	r1, PACATMSCRATCH(r13)
-	ld	r1, PACAR1(r13)
-
-	std	r11, GPR11(r1)			/* Temporary stash */
-
-	/*
-	 * Move the saved user r1 to the kernel stack in case PACATMSCRATCH is
-	 * clobbered by an exception once we turn on MSR_RI below.
-	 */
-	ld	r11, PACATMSCRATCH(r13)
-	std	r11, GPR1(r1)
-
-	/*
-	 * Store r13 away so we can free up the scratch SPR for the SLB fault
-	 * handler (needed once we start accessing the thread_struct).
-	 */
-	GET_SCRATCH0(r11)
-	std	r11, GPR13(r1)
-
-	/* Reset MSR RI so we can take SLB faults again */
-	li	r11, MSR_RI
-	mtmsrd	r11, 1
-
-	/* Store the PPR in r11 and reset to decent value */
-	mfspr	r11, SPRN_PPR
-	HMT_MEDIUM
-
-	/* Now get some more GPRS free */
-	std	r7, GPR7(r1)			/* Temporary stash */
-	std	r12, GPR12(r1)			/* ''   ''    ''   */
-	ld	r12, STK_PARAM(R3)(r1)		/* Param 0, thread_struct * */
-
-	std	r11, THREAD_TM_PPR(r12)		/* Store PPR and free r11 */
-
-	addi	r7, r12, PT_CKPT_REGS		/* Thread's ckpt_regs */
-
-	/*
-	 * Make r7 look like an exception frame so that we can use the neat
-	 * GPRx(n) macros. r7 is NOT a pt_regs ptr!
-	 */
-	subi	r7, r7, STACK_FRAME_OVERHEAD
-
-	/* Sync the userland GPRs 2-12, 14-31 to thread->regs: */
-	SAVE_GPR(0, r7)				/* user r0 */
-	SAVE_GPR(2, r7)				/* user r2 */
-	SAVE_4GPRS(3, r7)			/* user r3-r6 */
-	SAVE_GPR(8, r7)				/* user r8 */
-	SAVE_GPR(9, r7)				/* user r9 */
-	SAVE_GPR(10, r7)			/* user r10 */
-	ld	r3, GPR1(r1)			/* user r1 */
-	ld	r4, GPR7(r1)			/* user r7 */
-	ld	r5, GPR11(r1)			/* user r11 */
-	ld	r6, GPR12(r1)			/* user r12 */
-	ld	r8, GPR13(r1)			/* user r13 */
-	std	r3, GPR1(r7)
-	std	r4, GPR7(r7)
-	std	r5, GPR11(r7)
-	std	r6, GPR12(r7)
-	std	r8, GPR13(r7)
-
-	SAVE_NVGPRS(r7)				/* user r14-r31 */
-
-	/* ******************** NIP ******************** */
-	mfspr	r3, SPRN_TFHAR
-	std	r3, _NIP(r7)			/* Returns to failhandler */
-	/*
-	 * The checkpointed NIP is ignored when rescheduling/rechkpting,
-	 * but is used in signal return to 'wind back' to the abort handler.
-	 */
-
-	/* ******************** CR,LR,CCR,MSR ********** */
-	mfctr	r3
-	mflr	r4
-	mfcr	r5
-	mfxer	r6
-
-	std	r3, _CTR(r7)
-	std	r4, _LINK(r7)
-	std	r5, _CCR(r7)
-	std	r6, _XER(r7)
-
-
-	/* ******************** TAR, DSCR ********** */
-	mfspr	r3, SPRN_TAR
-	mfspr	r4, SPRN_DSCR
-
-	std	r3, THREAD_TM_TAR(r12)
-	std	r4, THREAD_TM_DSCR(r12)
-
-	/*
-	 * MSR and flags: We don't change CRs, and we don't need to alter MSR.
-	 */
-
-
-	/*
-	 * ******************** FPR/VR/VSRs ************
-	 * After reclaiming, capture the checkpointed FPRs/VRs.
-	 *
-	 * We enabled VEC/FP/VSX in the msr above, so we can execute these
-	 * instructions!
-	 */
-	mr	r3, r12
-
-	/* Altivec (VEC/VMX/VR)*/
-	addi	r7, r3, THREAD_CKVRSTATE
-	SAVE_32VRS(0, r6, r7)	/* r6 scratch, r7 ckvr_state */
-	mfvscr	v0
-	li	r6, VRSTATE_VSCR
-	stvx	v0, r7, r6
-
-	/* VRSAVE */
-	mfspr	r0, SPRN_VRSAVE
-	std	r0, THREAD_CKVRSAVE(r3)
-
-	/* Floating Point (FP) */
-	addi	r7, r3, THREAD_CKFPSTATE
-	SAVE_32FPRS_VSRS(0, R6, R7)	/* r6 scratch, r7 ckfp_state */
-	mffs    fr0
-	stfd    fr0,FPSTATE_FPSCR(r7)
-
-
-	/*
-	 * TM regs, incl TEXASR -- these live in thread_struct.  Note they've
-	 * been updated by the treclaim, to explain to userland the failure
-	 * cause (aborted).
-	 */
-	mfspr	r0, SPRN_TEXASR
-	mfspr	r3, SPRN_TFHAR
-	mfspr	r4, SPRN_TFIAR
-	std	r0, THREAD_TM_TEXASR(r12)
-	std	r3, THREAD_TM_TFHAR(r12)
-	std	r4, THREAD_TM_TFIAR(r12)
-
-	/* AMR is checkpointed too, but is unsupported by Linux. */
-
-	/* Restore original MSR/IRQ state & clear TM mode */
-	ld	r14, TM_FRAME_L0(r1)		/* Orig MSR */
-
-	li	r15, 0
-	rldimi  r14, r15, MSR_TS_LG, (63-MSR_TS_LG)-1
-	mtmsrd  r14
-
-	REST_NVGPRS(r1)
-
-	addi    r1, r1, TM_FRAME_SIZE
-	lwz	r4, 8(r1)
-	ld	r0, 16(r1)
-	mtcr	r4
-	mtlr	r0
-	ld	r2, STK_GOT(r1)
-
-	/* Load CPU's default DSCR */
-	ld	r0, PACA_DSCR_DEFAULT(r13)
-	mtspr	SPRN_DSCR, r0
-
-	blr
-
-
-	/*
-	 * void __tm_recheckpoint(struct thread_struct *thread)
-	 *	- Restore the checkpointed register state saved by tm_reclaim
-	 *	  when we switch_to a process.
-	 *
-	 *	Call with IRQs off, stacks get all out of sync for
-	 *	some periods in here!
-	 */
-_GLOBAL(__tm_recheckpoint)
-	mfcr	r5
-	mflr	r0
-	stw	r5, 8(r1)
-	std	r0, 16(r1)
-	std	r2, STK_GOT(r1)
-	stdu	r1, -TM_FRAME_SIZE(r1)
-
-	/*
-	 * We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD].
-	 * This is used for backing up the NVGPRs:
-	 */
-	SAVE_NVGPRS(r1)
-
-	/* Load complete register state from ts_ckpt* registers */
-
-	addi	r7, r3, PT_CKPT_REGS		/* Thread's ckpt_regs */
-
-	/*
-	 * Make r7 look like an exception frame so that we can use the neat
-	 * GPRx(n) macros. r7 is now NOT a pt_regs ptr!
-	 */
-	subi	r7, r7, STACK_FRAME_OVERHEAD
-
-	/* We need to setup MSR for FP/VMX/VSX register save instructions. */
-	mfmsr	r6
-	mr	r5, r6
-	ori	r5, r5, MSR_FP
-#ifdef CONFIG_ALTIVEC
-	oris	r5, r5, MSR_VEC@h
-#endif
-#ifdef CONFIG_VSX
-	BEGIN_FTR_SECTION
-	oris	r5,r5, MSR_VSX@h
-	END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
-	mtmsrd	r5
-
-#ifdef CONFIG_ALTIVEC
-	/*
-	 * FP and VEC registers: These are recheckpointed from
-	 * thread.ckfp_state and thread.ckvr_state respectively. The
-	 * thread.fp_state[] version holds the 'live' (transactional)
-	 * and will be loaded subsequently by any FPUnavailable trap.
-	 */
-	addi	r8, r3, THREAD_CKVRSTATE
-	li	r5, VRSTATE_VSCR
-	lvx	v0, r8, r5
-	mtvscr	v0
-	REST_32VRS(0, r5, r8)			/* r5 scratch, r8 ptr */
-	ld	r5, THREAD_CKVRSAVE(r3)
-	mtspr	SPRN_VRSAVE, r5
-#endif
-
-	addi	r8, r3, THREAD_CKFPSTATE
-	lfd	fr0, FPSTATE_FPSCR(r8)
-	MTFSF_L(fr0)
-	REST_32FPRS_VSRS(0, R4, R8)
-
-	mtmsr	r6				/* FP/Vec off again! */
-
-restore_gprs:
-
-	/* ******************** CR,LR,CCR,MSR ********** */
-	ld	r4, _CTR(r7)
-	ld	r5, _LINK(r7)
-	ld	r8, _XER(r7)
-
-	mtctr	r4
-	mtlr	r5
-	mtxer	r8
-
-	/* ******************** TAR ******************** */
-	ld	r4, THREAD_TM_TAR(r3)
-	mtspr	SPRN_TAR,	r4
-
-	/* Load up the PPR and DSCR in GPRs only at this stage */
-	ld	r5, THREAD_TM_DSCR(r3)
-	ld	r6, THREAD_TM_PPR(r3)
-
-	REST_GPR(0, r7)				/* GPR0 */
-	REST_2GPRS(2, r7)			/* GPR2-3 */
-	REST_GPR(4, r7)				/* GPR4 */
-	REST_4GPRS(8, r7)			/* GPR8-11 */
-	REST_2GPRS(12, r7)			/* GPR12-13 */
-
-	REST_NVGPRS(r7)				/* GPR14-31 */
-
-	/* Load up PPR and DSCR here so we don't run with user values for long */
-	mtspr	SPRN_DSCR, r5
-	mtspr	SPRN_PPR, r6
-
-	/*
-	 * Do final sanity check on TEXASR to make sure FS is set. Do this
-	 * here before we load up the userspace r1 so any bugs we hit will get
-	 * a call chain.
-	 */
-	mfspr	r5, SPRN_TEXASR
-	srdi	r5, r5, 16
-	li	r6, (TEXASR_FS)@h
-	and	r6, r6, r5
-1:	tdeqi	r6, 0
-	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
-
-	/*
-	 * Do final sanity check on MSR to make sure we are not transactional
-	 * or suspended.
-	 */
-	mfmsr   r6
-	li	r5, (MSR_TS_MASK)@higher
-	srdi	r6, r6, 32
-	and	r6, r6, r5
-1:	tdnei   r6, 0
-	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
-
-	/* Restore CR */
-	ld	r6, _CCR(r7)
-	mtcr    r6
-
-	REST_GPR(6, r7)
-
-	/*
-	 * Store r1 and r5 on the stack so that we can access them after we
-	 * clear MSR RI.
-	 */
-
-	REST_GPR(5, r7)
-	std	r5, -8(r1)
-	ld	r5, GPR1(r7)
-	std	r5, -16(r1)
-
-	REST_GPR(7, r7)
-
-	/* Clear MSR RI since we are about to use SCRATCH0. EE is already off */
-	li	r5, 0
-	mtmsrd	r5, 1
-
-	/*
-	 * BE CAREFUL HERE:
-	 * At this point we can't take an SLB miss since we have MSR_RI
-	 * off. Load only to/from the stack/paca which are in SLB bolted regions
-	 * until we turn MSR RI back on.
-	 */
-
-	SET_SCRATCH0(r1)
-	ld	r5, -8(r1)
-	ld	r1, -16(r1)
-
-	/* Commit register state as checkpointed state: */
-	TRECHKPT
-
-	HMT_MEDIUM
-
-	/*
-	 * Our transactional state has now changed.
-	 *
-	 * Now just get out of here.  Transactional (current) state will be
-	 * updated once restore is called on the return path in the _switch-ed
-	 * -to process.
-	 */
-
-	GET_PACA(r13)
-	GET_SCRATCH0(r1)
-
-	/* R1 is restored, so we are recoverable again.  EE is still off */
-	li	r4, MSR_RI
-	mtmsrd	r4, 1
-
-	REST_NVGPRS(r1)
-
-	addi    r1, r1, TM_FRAME_SIZE
-	lwz	r4, 8(r1)
-	ld	r0, 16(r1)
-	mtcr	r4
-	mtlr	r0
-	ld	r2, STK_GOT(r1)
-
-	/* Load CPU's default DSCR */
-	ld	r0, PACA_DSCR_DEFAULT(r13)
-	mtspr	SPRN_DSCR, r0
-
-	blr
-
-	/* ****************************************************************** */
diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S
deleted file mode 100644
index e023ae59c4294c1d6c38181f5af10fe0c8983d5a..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/trace/ftrace_32.S
+++ /dev/null
@@ -1,95 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Split from entry_32.S
- */
-
-#include <linux/magic.h>
-#include <asm/reg.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ftrace.h>
-#include <asm/export.h>
-
-_GLOBAL(mcount)
-_GLOBAL(_mcount)
-	/*
-	 * It is required that _mcount on PPC32 must preserve the
-	 * link register. But we have r0 to play with. We use r0
-	 * to push the return address back to the caller of mcount
-	 * into the ctr register, restore the link register and
-	 * then jump back using the ctr register.
-	 */
-	mflr	r0
-	mtctr	r0
-	lwz	r0, 4(r1)
-	mtlr	r0
-	bctr
-
-_GLOBAL(ftrace_caller)
-	MCOUNT_SAVE_FRAME
-	/* r3 ends up with link register */
-	subi	r3, r3, MCOUNT_INSN_SIZE
-.globl ftrace_call
-ftrace_call:
-	bl	ftrace_stub
-	nop
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
-	b	ftrace_graph_stub
-_GLOBAL(ftrace_graph_stub)
-#endif
-	MCOUNT_RESTORE_FRAME
-	/* old link register ends up in ctr reg */
-	bctr
-
-EXPORT_SYMBOL(_mcount)
-
-_GLOBAL(ftrace_stub)
-	blr
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-_GLOBAL(ftrace_graph_caller)
-	addi	r5, r1, 48
-	/* load r4 with local address */
-	lwz	r4, 44(r1)
-	subi	r4, r4, MCOUNT_INSN_SIZE
-
-	/* Grab the LR out of the caller stack frame */
-	lwz	r3,52(r1)
-
-	bl	prepare_ftrace_return
-	nop
-
-        /*
-         * prepare_ftrace_return gives us the address we divert to.
-         * Change the LR in the callers stack frame to this.
-         */
-	stw	r3,52(r1)
-
-	MCOUNT_RESTORE_FRAME
-	/* old link register ends up in ctr reg */
-	bctr
-
-_GLOBAL(return_to_handler)
-	/* need to save return values */
-	stwu	r1, -32(r1)
-	stw	r3, 20(r1)
-	stw	r4, 16(r1)
-	stw	r31, 12(r1)
-	mr	r31, r1
-
-	bl	ftrace_return_to_handler
-	nop
-
-	/* return value has real return address */
-	mtlr	r3
-
-	lwz	r3, 20(r1)
-	lwz	r4, 16(r1)
-	lwz	r31,12(r1)
-	lwz	r1, 0(r1)
-
-	/* Jump back to real return address */
-	blr
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_64.S
deleted file mode 100644
index 25e5b9e47c06d45e1ec527ac35eb06240e8b6af4..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/trace/ftrace_64.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Split from entry_64.S
- */
-
-#include <linux/magic.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ftrace.h>
-#include <asm/ppc-opcode.h>
-#include <asm/export.h>
-
-.pushsection ".tramp.ftrace.text","aw",@progbits;
-.globl ftrace_tramp_text
-ftrace_tramp_text:
-	.space 64
-.popsection
-
-.pushsection ".tramp.ftrace.init","aw",@progbits;
-.globl ftrace_tramp_init
-ftrace_tramp_init:
-	.space 64
-.popsection
-
-_GLOBAL(mcount)
-_GLOBAL(_mcount)
-EXPORT_SYMBOL(_mcount)
-	mflr	r12
-	mtctr	r12
-	mtlr	r0
-	bctr
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-_GLOBAL(return_to_handler)
-	/* need to save return values */
-	std	r4,  -32(r1)
-	std	r3,  -24(r1)
-	/* save TOC */
-	std	r2,  -16(r1)
-	std	r31, -8(r1)
-	mr	r31, r1
-	stdu	r1, -112(r1)
-
-	/*
-	 * We might be called from a module.
-	 * Switch to our TOC to run inside the core kernel.
-	 */
-	ld	r2, PACATOC(r13)
-
-	bl	ftrace_return_to_handler
-	nop
-
-	/* return value has real return address */
-	mtlr	r3
-
-	ld	r1, 0(r1)
-	ld	r4,  -32(r1)
-	ld	r3,  -24(r1)
-	ld	r2,  -16(r1)
-	ld	r31, -8(r1)
-
-	/* Jump back to real return address */
-	blr
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
deleted file mode 100644
index f9fd5f743eba34125531452bc9d4f61b256145fa..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ /dev/null
@@ -1,330 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Split from ftrace_64.S
- */
-
-#include <linux/magic.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ftrace.h>
-#include <asm/ppc-opcode.h>
-#include <asm/export.h>
-#include <asm/thread_info.h>
-#include <asm/bug.h>
-#include <asm/ptrace.h>
-
-/*
- *
- * ftrace_caller()/ftrace_regs_caller() is the function that replaces _mcount()
- * when ftrace is active.
- *
- * We arrive here after a function A calls function B, and we are the trace
- * function for B. When we enter r1 points to A's stack frame, B has not yet
- * had a chance to allocate one yet.
- *
- * Additionally r2 may point either to the TOC for A, or B, depending on
- * whether B did a TOC setup sequence before calling us.
- *
- * On entry the LR points back to the _mcount() call site, and r0 holds the
- * saved LR as it was on entry to B, ie. the original return address at the
- * call site in A.
- *
- * Our job is to save the register state into a struct pt_regs (on the stack)
- * and then arrange for the ftrace function to be called.
- */
-_GLOBAL(ftrace_regs_caller)
-	/* Save the original return address in A's stack frame */
-	std	r0,LRSAVE(r1)
-
-	/* Create our stack frame + pt_regs */
-	stdu	r1,-SWITCH_FRAME_SIZE(r1)
-
-	/* Save all gprs to pt_regs */
-	SAVE_GPR(0, r1)
-	SAVE_10GPRS(2, r1)
-
-	/* Ok to continue? */
-	lbz	r3, PACA_FTRACE_ENABLED(r13)
-	cmpdi	r3, 0
-	beq	ftrace_no_trace
-
-	SAVE_10GPRS(12, r1)
-	SAVE_10GPRS(22, r1)
-
-	/* Save previous stack pointer (r1) */
-	addi	r8, r1, SWITCH_FRAME_SIZE
-	std	r8, GPR1(r1)
-
-	/* Load special regs for save below */
-	mfmsr   r8
-	mfctr   r9
-	mfxer   r10
-	mfcr	r11
-
-	/* Get the _mcount() call site out of LR */
-	mflr	r7
-	/* Save it as pt_regs->nip */
-	std     r7, _NIP(r1)
-	/* Save the read LR in pt_regs->link */
-	std     r0, _LINK(r1)
-
-	/* Save callee's TOC in the ABI compliant location */
-	std	r2, 24(r1)
-	ld	r2,PACATOC(r13)	/* get kernel TOC in r2 */
-
-	addis	r3,r2,function_trace_op@toc@ha
-	addi	r3,r3,function_trace_op@toc@l
-	ld	r5,0(r3)
-
-#ifdef CONFIG_LIVEPATCH
-	mr	r14,r7		/* remember old NIP */
-#endif
-	/* Calculate ip from nip-4 into r3 for call below */
-	subi    r3, r7, MCOUNT_INSN_SIZE
-
-	/* Put the original return address in r4 as parent_ip */
-	mr	r4, r0
-
-	/* Save special regs */
-	std     r8, _MSR(r1)
-	std     r9, _CTR(r1)
-	std     r10, _XER(r1)
-	std     r11, _CCR(r1)
-
-	/* Load &pt_regs in r6 for call below */
-	addi    r6, r1 ,STACK_FRAME_OVERHEAD
-
-	/* ftrace_call(r3, r4, r5, r6) */
-.globl ftrace_regs_call
-ftrace_regs_call:
-	bl	ftrace_stub
-	nop
-
-	/* Load ctr with the possibly modified NIP */
-	ld	r3, _NIP(r1)
-	mtctr	r3
-#ifdef CONFIG_LIVEPATCH
-	cmpd	r14, r3		/* has NIP been altered? */
-#endif
-
-	/* Restore gprs */
-	REST_GPR(0,r1)
-	REST_10GPRS(2,r1)
-	REST_10GPRS(12,r1)
-	REST_10GPRS(22,r1)
-
-	/* Restore possibly modified LR */
-	ld	r0, _LINK(r1)
-	mtlr	r0
-
-	/* Restore callee's TOC */
-	ld	r2, 24(r1)
-
-	/* Pop our stack frame */
-	addi r1, r1, SWITCH_FRAME_SIZE
-
-#ifdef CONFIG_LIVEPATCH
-        /* Based on the cmpd above, if the NIP was altered handle livepatch */
-	bne-	livepatch_handler
-#endif
-
-ftrace_caller_common:
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
-	b	ftrace_graph_stub
-_GLOBAL(ftrace_graph_stub)
-#endif
-
-	bctr			/* jump after _mcount site */
-
-_GLOBAL(ftrace_stub)
-	blr
-
-ftrace_no_trace:
-	mflr	r3
-	mtctr	r3
-	REST_GPR(3, r1)
-	addi	r1, r1, SWITCH_FRAME_SIZE
-	mtlr	r0
-	bctr
-
-_GLOBAL(ftrace_caller)
-	/* Save the original return address in A's stack frame */
-	std	r0, LRSAVE(r1)
-
-	/* Create our stack frame + pt_regs */
-	stdu	r1, -SWITCH_FRAME_SIZE(r1)
-
-	/* Save all gprs to pt_regs */
-	SAVE_8GPRS(3, r1)
-
-	lbz	r3, PACA_FTRACE_ENABLED(r13)
-	cmpdi	r3, 0
-	beq	ftrace_no_trace
-
-	/* Get the _mcount() call site out of LR */
-	mflr	r7
-	std     r7, _NIP(r1)
-
-	/* Save callee's TOC in the ABI compliant location */
-	std	r2, 24(r1)
-	ld	r2, PACATOC(r13)	/* get kernel TOC in r2 */
-
-	addis	r3, r2, function_trace_op@toc@ha
-	addi	r3, r3, function_trace_op@toc@l
-	ld	r5, 0(r3)
-
-	/* Calculate ip from nip-4 into r3 for call below */
-	subi    r3, r7, MCOUNT_INSN_SIZE
-
-	/* Put the original return address in r4 as parent_ip */
-	mr	r4, r0
-
-	/* Set pt_regs to NULL */
-	li	r6, 0
-
-	/* ftrace_call(r3, r4, r5, r6) */
-.globl ftrace_call
-ftrace_call:
-	bl	ftrace_stub
-	nop
-
-	ld	r3, _NIP(r1)
-	mtctr	r3
-
-	/* Restore gprs */
-	REST_8GPRS(3,r1)
-
-	/* Restore callee's TOC */
-	ld	r2, 24(r1)
-
-	/* Pop our stack frame */
-	addi	r1, r1, SWITCH_FRAME_SIZE
-
-	/* Reload original LR */
-	ld	r0, LRSAVE(r1)
-	mtlr	r0
-
-	/* Handle function_graph or go back */
-	b	ftrace_caller_common
-
-#ifdef CONFIG_LIVEPATCH
-	/*
-	 * This function runs in the mcount context, between two functions. As
-	 * such it can only clobber registers which are volatile and used in
-	 * function linkage.
-	 *
-	 * We get here when a function A, calls another function B, but B has
-	 * been live patched with a new function C.
-	 *
-	 * On entry:
-	 *  - we have no stack frame and can not allocate one
-	 *  - LR points back to the original caller (in A)
-	 *  - CTR holds the new NIP in C
-	 *  - r0, r11 & r12 are free
-	 */
-livepatch_handler:
-	ld	r12, PACA_THREAD_INFO(r13)
-
-	/* Allocate 3 x 8 bytes */
-	ld	r11, TI_livepatch_sp(r12)
-	addi	r11, r11, 24
-	std	r11, TI_livepatch_sp(r12)
-
-	/* Save toc & real LR on livepatch stack */
-	std	r2,  -24(r11)
-	mflr	r12
-	std	r12, -16(r11)
-
-	/* Store stack end marker */
-	lis     r12, STACK_END_MAGIC@h
-	ori     r12, r12, STACK_END_MAGIC@l
-	std	r12, -8(r11)
-
-	/* Put ctr in r12 for global entry and branch there */
-	mfctr	r12
-	bctrl
-
-	/*
-	 * Now we are returning from the patched function to the original
-	 * caller A. We are free to use r11, r12 and we can use r2 until we
-	 * restore it.
-	 */
-
-	ld	r12, PACA_THREAD_INFO(r13)
-
-	ld	r11, TI_livepatch_sp(r12)
-
-	/* Check stack marker hasn't been trashed */
-	lis     r2,  STACK_END_MAGIC@h
-	ori     r2,  r2, STACK_END_MAGIC@l
-	ld	r12, -8(r11)
-1:	tdne	r12, r2
-	EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0
-
-	/* Restore LR & toc from livepatch stack */
-	ld	r12, -16(r11)
-	mtlr	r12
-	ld	r2,  -24(r11)
-
-	/* Pop livepatch stack frame */
-	ld	r12, PACA_THREAD_INFO(r13)
-	subi	r11, r11, 24
-	std	r11, TI_livepatch_sp(r12)
-
-	/* Return to original caller of live patched function */
-	blr
-#endif /* CONFIG_LIVEPATCH */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-_GLOBAL(ftrace_graph_caller)
-	stdu	r1, -112(r1)
-	/* with -mprofile-kernel, parameter regs are still alive at _mcount */
-	std	r10, 104(r1)
-	std	r9, 96(r1)
-	std	r8, 88(r1)
-	std	r7, 80(r1)
-	std	r6, 72(r1)
-	std	r5, 64(r1)
-	std	r4, 56(r1)
-	std	r3, 48(r1)
-
-	/* Save callee's TOC in the ABI compliant location */
-	std	r2, 24(r1)
-	ld	r2, PACATOC(r13)	/* get kernel TOC in r2 */
-
-	addi	r5, r1, 112
-	mfctr	r4		/* ftrace_caller has moved local addr here */
-	std	r4, 40(r1)
-	mflr	r3		/* ftrace_caller has restored LR from stack */
-	subi	r4, r4, MCOUNT_INSN_SIZE
-
-	bl	prepare_ftrace_return
-	nop
-
-	/*
-	 * prepare_ftrace_return gives us the address we divert to.
-	 * Change the LR to this.
-	 */
-	mtlr	r3
-
-	ld	r0, 40(r1)
-	mtctr	r0
-	ld	r10, 104(r1)
-	ld	r9, 96(r1)
-	ld	r8, 88(r1)
-	ld	r7, 80(r1)
-	ld	r6, 72(r1)
-	ld	r5, 64(r1)
-	ld	r4, 56(r1)
-	ld	r3, 48(r1)
-
-	/* Restore callee's TOC */
-	ld	r2, 24(r1)
-
-	addi	r1, r1, 112
-	mflr	r0
-	std	r0, LRSAVE(r1)
-	bctr
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S
deleted file mode 100644
index 6708e24db0aba8c159d50c56eca6f3b419b1fe94..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Split from ftrace_64.S
- */
-
-#include <linux/magic.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ftrace.h>
-#include <asm/ppc-opcode.h>
-#include <asm/export.h>
-
-_GLOBAL_TOC(ftrace_caller)
-	lbz	r3, PACA_FTRACE_ENABLED(r13)
-	cmpdi	r3, 0
-	beqlr
-
-	/* Taken from output of objdump from lib64/glibc */
-	mflr	r3
-	ld	r11, 0(r1)
-	stdu	r1, -112(r1)
-	std	r3, 128(r1)
-	ld	r4, 16(r11)
-	subi	r3, r3, MCOUNT_INSN_SIZE
-.globl ftrace_call
-ftrace_call:
-	bl	ftrace_stub
-	nop
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
-	b	ftrace_graph_stub
-_GLOBAL(ftrace_graph_stub)
-#endif
-	ld	r0, 128(r1)
-	mtlr	r0
-	addi	r1, r1, 112
-
-_GLOBAL(ftrace_stub)
-	blr
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-_GLOBAL(ftrace_graph_caller)
-	addi	r5, r1, 112
-	/* load r4 with local address */
-	ld	r4, 128(r1)
-	subi	r4, r4, MCOUNT_INSN_SIZE
-
-	/* Grab the LR out of the caller stack frame */
-	ld	r11, 112(r1)
-	ld	r3, 16(r11)
-
-	bl	prepare_ftrace_return
-	nop
-
-	/*
-	 * prepare_ftrace_return gives us the address we divert to.
-	 * Change the LR in the callers stack frame to this.
-	 */
-	ld	r11, 112(r1)
-	std	r3, 16(r11)
-
-	ld	r0, 128(r1)
-	mtlr	r0
-	addi	r1, r1, 112
-	blr
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S
deleted file mode 100644
index 07296bc3916643e4b663bf8d89d4557a2a6652b0..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/ucall.S
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Generic code to perform an ultravisor call.
- *
- * Copyright 2019, IBM Corporation.
- *
- */
-#include <asm/ppc_asm.h>
-#include <asm/export.h>
-
-_GLOBAL(ucall_norets)
-EXPORT_SYMBOL_GPL(ucall_norets)
-	sc	2	/* Invoke the ultravisor */
-	blr		/* Return r3 = status */
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
deleted file mode 100644
index 7f882e7b9f436807f739cfbfde8272ede66fd79c..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vdso32/cacheflush.S
+++ /dev/null
@@ -1,81 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * vDSO provided cache flush routines
- *
- * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
- *                    IBM Corp.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-
-	.text
-
-/*
- * Default "generic" version of __kernel_sync_dicache.
- *
- * void __kernel_sync_dicache(unsigned long start, unsigned long end)
- *
- * Flushes the data cache & invalidate the instruction cache for the
- * provided range [start, end[
- */
-V_FUNCTION_BEGIN(__kernel_sync_dicache)
-  .cfi_startproc
-	mflr	r12
-  .cfi_register lr,r12
-	mr	r11,r3
-	bl	__get_datapage@local
-	mtlr	r12
-	mr	r10,r3
-
-	lwz	r7,CFG_DCACHE_BLOCKSZ(r10)
-	addi	r5,r7,-1
-	andc	r6,r11,r5		/* round low to line bdy */
-	subf	r8,r6,r4		/* compute length */
-	add	r8,r8,r5		/* ensure we get enough */
-	lwz	r9,CFG_DCACHE_LOGBLOCKSZ(r10)
-	srw.	r8,r8,r9		/* compute line count */
-	crclr	cr0*4+so
-	beqlr				/* nothing to do? */
-	mtctr	r8
-1:	dcbst	0,r6
-	add	r6,r6,r7
-	bdnz	1b
-	sync
-
-/* Now invalidate the instruction cache */
-
-	lwz	r7,CFG_ICACHE_BLOCKSZ(r10)
-	addi	r5,r7,-1
-	andc	r6,r11,r5		/* round low to line bdy */
-	subf	r8,r6,r4		/* compute length */
-	add	r8,r8,r5
-	lwz	r9,CFG_ICACHE_LOGBLOCKSZ(r10)
-	srw.	r8,r8,r9		/* compute line count */
-	crclr	cr0*4+so
-	beqlr				/* nothing to do? */
-	mtctr	r8
-2:	icbi	0,r6
-	add	r6,r6,r7
-	bdnz	2b
-	isync
-	li	r3,0
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_sync_dicache)
-
-
-/*
- * POWER5 version of __kernel_sync_dicache
- */
-V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
-  .cfi_startproc
-	crclr	cr0*4+so
-	sync
-	isync
-	li	r3,0
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_sync_dicache_p5)
-
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
deleted file mode 100644
index 6c7401bd284e7da4db9b03300f2b0bb3fe6bf572..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ /dev/null
@@ -1,86 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Access to the shared data page by the vDSO & syscall map
- *
- * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
- */
-
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/vdso.h>
-
-	.text
-	.global	__kernel_datapage_offset;
-__kernel_datapage_offset:
-	.long	0
-
-V_FUNCTION_BEGIN(__get_datapage)
-  .cfi_startproc
-	/* We don't want that exposed or overridable as we want other objects
-	 * to be able to bl directly to here
-	 */
-	.protected __get_datapage
-	.hidden __get_datapage
-
-	mflr	r0
-  .cfi_register lr,r0
-
-	bcl	20,31,data_page_branch
-data_page_branch:
-	mflr	r3
-	mtlr	r0
-	addi	r3, r3, __kernel_datapage_offset-data_page_branch
-	lwz	r0,0(r3)
-  .cfi_restore lr
-	add	r3,r0,r3
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__get_datapage)
-
-/*
- * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
- *
- * returns a pointer to the syscall map. the map is agnostic to the
- * size of "long", unlike kernel bitops, it stores bits from top to
- * bottom so that memory actually contains a linear bitmap
- * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of
- * 32 bits int at N >> 5.
- */
-V_FUNCTION_BEGIN(__kernel_get_syscall_map)
-  .cfi_startproc
-	mflr	r12
-  .cfi_register lr,r12
-	mr	r4,r3
-	bl	__get_datapage@local
-	mtlr	r12
-	addi	r3,r3,CFG_SYSCALL_MAP32
-	cmpli	cr0,r4,0
-	beqlr
-	li	r0,NR_syscalls
-	stw	r0,0(r4)
-	crclr	cr0*4+so
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_get_syscall_map)
-
-/*
- * void unsigned long long  __kernel_get_tbfreq(void);
- *
- * returns the timebase frequency in HZ
- */
-#ifndef CONFIG_PPC_BOOK3S_601
-V_FUNCTION_BEGIN(__kernel_get_tbfreq)
-  .cfi_startproc
-	mflr	r12
-  .cfi_register lr,r12
-	bl	__get_datapage@local
-	lwz	r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
-	lwz	r3,CFG_TB_TICKS_PER_SEC(r3)
-	mtlr	r12
-	crclr	cr0*4+so
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_get_tbfreq)
-#endif
diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S
deleted file mode 100644
index 63e914539e1a2f4c2fcfc36458793f1cd338a0f7..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vdso32/getcpu.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *
- * Copyright (C) IBM Corporation, 2012
- *
- * Author: Anton Blanchard <anton@au.ibm.com>
- */
-#include <asm/ppc_asm.h>
-#include <asm/vdso.h>
-
-	.text
-/*
- * Exact prototype of getcpu
- *
- * int __kernel_getcpu(unsigned *cpu, unsigned *node);
- *
- */
-V_FUNCTION_BEGIN(__kernel_getcpu)
-  .cfi_startproc
-	mfspr	r5,SPRN_SPRG_VDSO_READ
-	cmpwi	cr0,r3,0
-	cmpwi	cr1,r4,0
-	clrlwi  r6,r5,16
-	rlwinm  r7,r5,16,31-15,31-0
-	beq	cr0,1f
-	stw	r6,0(r3)
-1:	beq	cr1,2f
-	stw	r7,0(r4)
-2:	crclr	cr0*4+so
-	li	r3,0			/* always success */
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_getcpu)
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
deleted file mode 100644
index a967e795b96d991b1efe162c872baf4032c1e26c..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ /dev/null
@@ -1,292 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Userland implementation of gettimeofday() for 32 bits processes in a
- * ppc64 kernel for use in the vDSO
- *
- * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org,
- *                    IBM Corp.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
-/* Offset for the low 32-bit part of a field of long type */
-#ifdef CONFIG_PPC64
-#define LOPART	4
-#define TSPEC_TV_SEC	TSPC64_TV_SEC+LOPART
-#else
-#define LOPART	0
-#define TSPEC_TV_SEC	TSPC32_TV_SEC
-#endif
-
-	.text
-/*
- * Exact prototype of gettimeofday
- *
- * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
- *
- */
-V_FUNCTION_BEGIN(__kernel_gettimeofday)
-  .cfi_startproc
-	mflr	r12
-  .cfi_register lr,r12
-
-	mr	r10,r3			/* r10 saves tv */
-	mr	r11,r4			/* r11 saves tz */
-	bl	__get_datapage@local	/* get data page */
-	mr	r9, r3			/* datapage ptr in r9 */
-	cmplwi	r10,0			/* check if tv is NULL */
-	beq	3f
-	lis	r7,1000000@ha		/* load up USEC_PER_SEC */
-	addi	r7,r7,1000000@l		/* so we get microseconds in r4 */
-	bl	__do_get_tspec@local	/* get sec/usec from tb & kernel */
-	stw	r3,TVAL32_TV_SEC(r10)
-	stw	r4,TVAL32_TV_USEC(r10)
-
-3:	cmplwi	r11,0			/* check if tz is NULL */
-	beq	1f
-	lwz	r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */
-	lwz	r5,CFG_TZ_DSTTIME(r9)
-	stw	r4,TZONE_TZ_MINWEST(r11)
-	stw	r5,TZONE_TZ_DSTTIME(r11)
-
-1:	mtlr	r12
-	crclr	cr0*4+so
-	li	r3,0
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_gettimeofday)
-
-/*
- * Exact prototype of clock_gettime()
- *
- * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
- *
- */
-V_FUNCTION_BEGIN(__kernel_clock_gettime)
-  .cfi_startproc
-	/* Check for supported clock IDs */
-	cmpli	cr0,r3,CLOCK_REALTIME
-	cmpli	cr1,r3,CLOCK_MONOTONIC
-	cror	cr0*4+eq,cr0*4+eq,cr1*4+eq
-	bne	cr0,99f
-
-	mflr	r12			/* r12 saves lr */
-  .cfi_register lr,r12
-	mr	r11,r4			/* r11 saves tp */
-	bl	__get_datapage@local	/* get data page */
-	mr	r9,r3			/* datapage ptr in r9 */
-	lis	r7,NSEC_PER_SEC@h	/* want nanoseconds */
-	ori	r7,r7,NSEC_PER_SEC@l
-50:	bl	__do_get_tspec@local	/* get sec/nsec from tb & kernel */
-	bne	cr1,80f			/* not monotonic -> all done */
-
-	/*
-	 * CLOCK_MONOTONIC
-	 */
-
-	/* now we must fixup using wall to monotonic. We need to snapshot
-	 * that value and do the counter trick again. Fortunately, we still
-	 * have the counter value in r8 that was returned by __do_get_xsec.
-	 * At this point, r3,r4 contain our sec/nsec values, r5 and r6
-	 * can be used, r7 contains NSEC_PER_SEC.
-	 */
-
-	lwz	r5,(WTOM_CLOCK_SEC+LOPART)(r9)
-	lwz	r6,WTOM_CLOCK_NSEC(r9)
-
-	/* We now have our offset in r5,r6. We create a fake dependency
-	 * on that value and re-check the counter
-	 */
-	or	r0,r6,r5
-	xor	r0,r0,r0
-	add	r9,r9,r0
-	lwz	r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
-        cmpl    cr0,r8,r0		/* check if updated */
-	bne-	50b
-
-	/* Calculate and store result. Note that this mimics the C code,
-	 * which may cause funny results if nsec goes negative... is that
-	 * possible at all ?
-	 */
-	add	r3,r3,r5
-	add	r4,r4,r6
-	cmpw	cr0,r4,r7
-	cmpwi	cr1,r4,0
-	blt	1f
-	subf	r4,r7,r4
-	addi	r3,r3,1
-1:	bge	cr1,80f
-	addi	r3,r3,-1
-	add	r4,r4,r7
-
-80:	stw	r3,TSPC32_TV_SEC(r11)
-	stw	r4,TSPC32_TV_NSEC(r11)
-
-	mtlr	r12
-	crclr	cr0*4+so
-	li	r3,0
-	blr
-
-	/*
-	 * syscall fallback
-	 */
-99:
-	li	r0,__NR_clock_gettime
-  .cfi_restore lr
-	sc
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_clock_gettime)
-
-
-/*
- * Exact prototype of clock_getres()
- *
- * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
- *
- */
-V_FUNCTION_BEGIN(__kernel_clock_getres)
-  .cfi_startproc
-	/* Check for supported clock IDs */
-	cmpwi	cr0,r3,CLOCK_REALTIME
-	cmpwi	cr1,r3,CLOCK_MONOTONIC
-	cror	cr0*4+eq,cr0*4+eq,cr1*4+eq
-	bne	cr0,99f
-
-	mflr	r12
-  .cfi_register lr,r12
-	bl	__get_datapage@local	/* get data page */
-	lwz	r5, CLOCK_HRTIMER_RES(r3)
-	mtlr	r12
-	li	r3,0
-	cmpli	cr0,r4,0
-	crclr	cr0*4+so
-	beqlr
-	stw	r3,TSPC32_TV_SEC(r4)
-	stw	r5,TSPC32_TV_NSEC(r4)
-	blr
-
-	/*
-	 * syscall fallback
-	 */
-99:
-	li	r0,__NR_clock_getres
-	sc
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_clock_getres)
-
-
-/*
- * Exact prototype of time()
- *
- * time_t time(time *t);
- *
- */
-V_FUNCTION_BEGIN(__kernel_time)
-  .cfi_startproc
-	mflr	r12
-  .cfi_register lr,r12
-
-	mr	r11,r3			/* r11 holds t */
-	bl	__get_datapage@local
-	mr	r9, r3			/* datapage ptr in r9 */
-
-	lwz	r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
-
-	cmplwi	r11,0			/* check if t is NULL */
-	beq	2f
-	stw	r3,0(r11)		/* store result at *t */
-2:	mtlr	r12
-	crclr	cr0*4+so
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_time)
-
-/*
- * This is the core of clock_gettime() and gettimeofday(),
- * it returns the current time in r3 (seconds) and r4.
- * On entry, r7 gives the resolution of r4, either USEC_PER_SEC
- * or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds.
- * It expects the datapage ptr in r9 and doesn't clobber it.
- * It clobbers r0, r5 and r6.
- * On return, r8 contains the counter value that can be reused.
- * This clobbers cr0 but not any other cr field.
- */
-__do_get_tspec:
-  .cfi_startproc
-	/* Check for update count & load values. We use the low
-	 * order 32 bits of the update count
-	 */
-1:	lwz	r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
-	andi.	r0,r8,1			/* pending update ? loop */
-	bne-	1b
-	xor	r0,r8,r8		/* create dependency */
-	add	r9,r9,r0
-
-	/* Load orig stamp (offset to TB) */
-	lwz	r5,CFG_TB_ORIG_STAMP(r9)
-	lwz	r6,(CFG_TB_ORIG_STAMP+4)(r9)
-
-	/* Get a stable TB value */
-2:	MFTBU(r3)
-	MFTBL(r4)
-	MFTBU(r0)
-	cmplw	cr0,r3,r0
-	bne-	2b
-
-	/* Subtract tb orig stamp and shift left 12 bits.
-	 */
-	subfc	r4,r6,r4
-	subfe	r0,r5,r3
-	slwi	r0,r0,12
-	rlwimi.	r0,r4,12,20,31
-	slwi	r4,r4,12
-
-	/*
-	 * Load scale factor & do multiplication.
-	 * We only use the high 32 bits of the tb_to_xs value.
-	 * Even with a 1GHz timebase clock, the high 32 bits of
-	 * tb_to_xs will be at least 4 million, so the error from
-	 * ignoring the low 32 bits will be no more than 0.25ppm.
-	 * The error will just make the clock run very very slightly
-	 * slow until the next time the kernel updates the VDSO data,
-	 * at which point the clock will catch up to the kernel's value,
-	 * so there is no long-term error accumulation.
-	 */
-	lwz	r5,CFG_TB_TO_XS(r9)	/* load values */
-	mulhwu	r4,r4,r5
-	li	r3,0
-
-	beq+	4f			/* skip high part computation if 0 */
-	mulhwu	r3,r0,r5
-	mullw	r5,r0,r5
-	addc	r4,r4,r5
-	addze	r3,r3
-4:
-	/* At this point, we have seconds since the xtime stamp
-	 * as a 32.32 fixed-point number in r3 and r4.
-	 * Load & add the xtime stamp.
-	 */
-	lwz	r5,STAMP_XTIME+TSPEC_TV_SEC(r9)
-	lwz	r6,STAMP_SEC_FRAC(r9)
-	addc	r4,r4,r6
-	adde	r3,r3,r5
-
-	/* We create a fake dependency on the result in r3/r4
-	 * and re-check the counter
-	 */
-	or	r6,r4,r3
-	xor	r0,r6,r6
-	add	r9,r9,r0
-	lwz	r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
-        cmplw	cr0,r8,r0		/* check if updated */
-	bne-	1b
-
-	mulhwu	r4,r4,r7		/* convert to micro or nanoseconds */
-
-	blr
-  .cfi_endproc
diff --git a/arch/powerpc/kernel/vdso32/note.S b/arch/powerpc/kernel/vdso32/note.S
deleted file mode 100644
index 227a7327399e620bab8aac77278894624a27f792..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vdso32/note.S
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/build-salt.h>
-
-#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type)			      \
-	.section name, flags;						      \
-	.balign 4;							      \
-	.long 1f - 0f;		/* name length */			      \
-	.long 3f - 2f;		/* data length */			      \
-	.long type;		/* note type */				      \
-0:	.asciz vendor;		/* vendor name */			      \
-1:	.balign 4;							      \
-2:
-
-#define ASM_ELF_NOTE_END						      \
-3:	.balign 4;		/* pad out section */			      \
-	.previous
-
-	ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0)
-	.long LINUX_VERSION_CODE
-	ASM_ELF_NOTE_END
-
-BUILD_SALT
diff --git a/arch/powerpc/kernel/vdso32/sigtramp.S b/arch/powerpc/kernel/vdso32/sigtramp.S
deleted file mode 100644
index 0bcc5e5fe7893a1f1a5988f92f10c00b25951405..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vdso32/sigtramp.S
+++ /dev/null
@@ -1,295 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Signal trampolines for 32 bits processes in a ppc64 kernel for
- * use in the vDSO
- *
- * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
- * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/unistd.h>
-#include <asm/vdso.h>
-
-	.text
-
-/* The nop here is a hack.  The dwarf2 unwind routines subtract 1 from
-   the return address to get an address in the middle of the presumed
-   call instruction.  Since we don't have a call here, we artificially
-   extend the range covered by the unwind info by adding a nop before
-   the real start.  */
-	nop
-V_FUNCTION_BEGIN(__kernel_sigtramp32)
-.Lsig_start = . - 4
-	li	r0,__NR_sigreturn
-	sc
-.Lsig_end:
-V_FUNCTION_END(__kernel_sigtramp32)
-
-.Lsigrt_start:
-	nop
-V_FUNCTION_BEGIN(__kernel_sigtramp_rt32)
-	li	r0,__NR_rt_sigreturn
-	sc
-.Lsigrt_end:
-V_FUNCTION_END(__kernel_sigtramp_rt32)
-
-	.section .eh_frame,"a",@progbits
-
-/* Register r1 can be found at offset 4 of a pt_regs structure.
-   A pointer to the pt_regs is stored in memory at the old sp plus PTREGS.  */
-#define cfa_save \
-  .byte 0x0f;			/* DW_CFA_def_cfa_expression */		\
-  .uleb128 9f - 1f;		/*   length */				\
-1:									\
-  .byte 0x71; .sleb128 PTREGS;	/*     DW_OP_breg1 */			\
-  .byte 0x06;			/*     DW_OP_deref */			\
-  .byte 0x23; .uleb128 RSIZE;	/*     DW_OP_plus_uconst */		\
-  .byte 0x06;			/*     DW_OP_deref */			\
-9:
-
-/* Register REGNO can be found at offset OFS of a pt_regs structure.
-   A pointer to the pt_regs is stored in memory at the old sp plus PTREGS.  */
-#define rsave(regno, ofs) \
-  .byte 0x10;			/* DW_CFA_expression */			\
-  .uleb128 regno;		/*   regno */				\
-  .uleb128 9f - 1f;		/*   length */				\
-1:									\
-  .byte 0x71; .sleb128 PTREGS;	/*     DW_OP_breg1 */			\
-  .byte 0x06;			/*     DW_OP_deref */			\
-  .ifne ofs;								\
-    .byte 0x23; .uleb128 ofs;	/*     DW_OP_plus_uconst */		\
-  .endif;								\
-9:
-
-/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
-   of the VMX reg struct.  The VMX reg struct is at offset VREGS of
-   the pt_regs struct.  This macro is for REGNO == 0, and contains
-   'subroutines' that the other macros jump to.  */
-#define vsave_msr0(regno) \
-  .byte 0x10;			/* DW_CFA_expression */			\
-  .uleb128 regno + 77;		/*   regno */				\
-  .uleb128 9f - 1f;		/*   length */				\
-1:									\
-  .byte 0x30 + regno;		/*     DW_OP_lit0 */			\
-2:									\
-  .byte 0x40;			/*     DW_OP_lit16 */			\
-  .byte 0x1e;			/*     DW_OP_mul */			\
-3:									\
-  .byte 0x71; .sleb128 PTREGS;	/*     DW_OP_breg1 */			\
-  .byte 0x06;			/*     DW_OP_deref */			\
-  .byte 0x12;			/*     DW_OP_dup */			\
-  .byte 0x23;			/*     DW_OP_plus_uconst */		\
-    .uleb128 33*RSIZE;		/*       msr offset */			\
-  .byte 0x06;			/*     DW_OP_deref */			\
-  .byte 0x0c; .long 1 << 25;	/*     DW_OP_const4u */			\
-  .byte 0x1a;			/*     DW_OP_and */			\
-  .byte 0x12;			/*     DW_OP_dup, ret 0 if bra taken */	\
-  .byte 0x30;			/*     DW_OP_lit0 */			\
-  .byte 0x29;			/*     DW_OP_eq */			\
-  .byte 0x28; .short 0x7fff;	/*     DW_OP_bra to end */		\
-  .byte 0x13;			/*     DW_OP_drop, pop the 0 */		\
-  .byte 0x23; .uleb128 VREGS;	/*     DW_OP_plus_uconst */		\
-  .byte 0x22;			/*     DW_OP_plus */			\
-  .byte 0x2f; .short 0x7fff;	/*     DW_OP_skip to end */		\
-9:
-
-/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
-   of the VMX reg struct.  REGNO is 1 thru 31.  */
-#define vsave_msr1(regno) \
-  .byte 0x10;			/* DW_CFA_expression */			\
-  .uleb128 regno + 77;		/*   regno */				\
-  .uleb128 9f - 1f;		/*   length */				\
-1:									\
-  .byte 0x30 + regno;		/*     DW_OP_lit n */			\
-  .byte 0x2f; .short 2b - 9f;	/*     DW_OP_skip */			\
-9:
-
-/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of
-   the VMX save block.  */
-#define vsave_msr2(regno, ofs) \
-  .byte 0x10;			/* DW_CFA_expression */			\
-  .uleb128 regno + 77;		/*   regno */				\
-  .uleb128 9f - 1f;		/*   length */				\
-1:									\
-  .byte 0x0a; .short ofs;	/*     DW_OP_const2u */			\
-  .byte 0x2f; .short 3b - 9f;	/*     DW_OP_skip */			\
-9:
-
-/* VMX register REGNO is at offset OFS of the VMX save area.  */
-#define vsave(regno, ofs) \
-  .byte 0x10;			/* DW_CFA_expression */			\
-  .uleb128 regno + 77;		/*   regno */				\
-  .uleb128 9f - 1f;		/*   length */				\
-1:									\
-  .byte 0x71; .sleb128 PTREGS;	/*     DW_OP_breg1 */			\
-  .byte 0x06;			/*     DW_OP_deref */			\
-  .byte 0x23; .uleb128 VREGS;	/*     DW_OP_plus_uconst */		\
-  .byte 0x23; .uleb128 ofs;	/*     DW_OP_plus_uconst */		\
-9:
-
-/* This is where the pt_regs pointer can be found on the stack.  */
-#define PTREGS 64+28
-
-/* Size of regs.  */
-#define RSIZE 4
-
-/* This is the offset of the VMX regs.  */
-#define VREGS 48*RSIZE+34*8
-
-/* Describe where general purpose regs are saved.  */
-#define EH_FRAME_GEN \
-  cfa_save;								\
-  rsave ( 0,  0*RSIZE);							\
-  rsave ( 2,  2*RSIZE);							\
-  rsave ( 3,  3*RSIZE);							\
-  rsave ( 4,  4*RSIZE);							\
-  rsave ( 5,  5*RSIZE);							\
-  rsave ( 6,  6*RSIZE);							\
-  rsave ( 7,  7*RSIZE);							\
-  rsave ( 8,  8*RSIZE);							\
-  rsave ( 9,  9*RSIZE);							\
-  rsave (10, 10*RSIZE);							\
-  rsave (11, 11*RSIZE);							\
-  rsave (12, 12*RSIZE);							\
-  rsave (13, 13*RSIZE);							\
-  rsave (14, 14*RSIZE);							\
-  rsave (15, 15*RSIZE);							\
-  rsave (16, 16*RSIZE);							\
-  rsave (17, 17*RSIZE);							\
-  rsave (18, 18*RSIZE);							\
-  rsave (19, 19*RSIZE);							\
-  rsave (20, 20*RSIZE);							\
-  rsave (21, 21*RSIZE);							\
-  rsave (22, 22*RSIZE);							\
-  rsave (23, 23*RSIZE);							\
-  rsave (24, 24*RSIZE);							\
-  rsave (25, 25*RSIZE);							\
-  rsave (26, 26*RSIZE);							\
-  rsave (27, 27*RSIZE);							\
-  rsave (28, 28*RSIZE);							\
-  rsave (29, 29*RSIZE);							\
-  rsave (30, 30*RSIZE);							\
-  rsave (31, 31*RSIZE);							\
-  rsave (67, 32*RSIZE);		/* ap, used as temp for nip */		\
-  rsave (65, 36*RSIZE);		/* lr */				\
-  rsave (70, 38*RSIZE)		/* cr */
-
-/* Describe where the FP regs are saved.  */
-#define EH_FRAME_FP \
-  rsave (32, 48*RSIZE +  0*8);						\
-  rsave (33, 48*RSIZE +  1*8);						\
-  rsave (34, 48*RSIZE +  2*8);						\
-  rsave (35, 48*RSIZE +  3*8);						\
-  rsave (36, 48*RSIZE +  4*8);						\
-  rsave (37, 48*RSIZE +  5*8);						\
-  rsave (38, 48*RSIZE +  6*8);						\
-  rsave (39, 48*RSIZE +  7*8);						\
-  rsave (40, 48*RSIZE +  8*8);						\
-  rsave (41, 48*RSIZE +  9*8);						\
-  rsave (42, 48*RSIZE + 10*8);						\
-  rsave (43, 48*RSIZE + 11*8);						\
-  rsave (44, 48*RSIZE + 12*8);						\
-  rsave (45, 48*RSIZE + 13*8);						\
-  rsave (46, 48*RSIZE + 14*8);						\
-  rsave (47, 48*RSIZE + 15*8);						\
-  rsave (48, 48*RSIZE + 16*8);						\
-  rsave (49, 48*RSIZE + 17*8);						\
-  rsave (50, 48*RSIZE + 18*8);						\
-  rsave (51, 48*RSIZE + 19*8);						\
-  rsave (52, 48*RSIZE + 20*8);						\
-  rsave (53, 48*RSIZE + 21*8);						\
-  rsave (54, 48*RSIZE + 22*8);						\
-  rsave (55, 48*RSIZE + 23*8);						\
-  rsave (56, 48*RSIZE + 24*8);						\
-  rsave (57, 48*RSIZE + 25*8);						\
-  rsave (58, 48*RSIZE + 26*8);						\
-  rsave (59, 48*RSIZE + 27*8);						\
-  rsave (60, 48*RSIZE + 28*8);						\
-  rsave (61, 48*RSIZE + 29*8);						\
-  rsave (62, 48*RSIZE + 30*8);						\
-  rsave (63, 48*RSIZE + 31*8)
-
-/* Describe where the VMX regs are saved.  */
-#ifdef CONFIG_ALTIVEC
-#define EH_FRAME_VMX \
-  vsave_msr0 ( 0);							\
-  vsave_msr1 ( 1);							\
-  vsave_msr1 ( 2);							\
-  vsave_msr1 ( 3);							\
-  vsave_msr1 ( 4);							\
-  vsave_msr1 ( 5);							\
-  vsave_msr1 ( 6);							\
-  vsave_msr1 ( 7);							\
-  vsave_msr1 ( 8);							\
-  vsave_msr1 ( 9);							\
-  vsave_msr1 (10);							\
-  vsave_msr1 (11);							\
-  vsave_msr1 (12);							\
-  vsave_msr1 (13);							\
-  vsave_msr1 (14);							\
-  vsave_msr1 (15);							\
-  vsave_msr1 (16);							\
-  vsave_msr1 (17);							\
-  vsave_msr1 (18);							\
-  vsave_msr1 (19);							\
-  vsave_msr1 (20);							\
-  vsave_msr1 (21);							\
-  vsave_msr1 (22);							\
-  vsave_msr1 (23);							\
-  vsave_msr1 (24);							\
-  vsave_msr1 (25);							\
-  vsave_msr1 (26);							\
-  vsave_msr1 (27);							\
-  vsave_msr1 (28);							\
-  vsave_msr1 (29);							\
-  vsave_msr1 (30);							\
-  vsave_msr1 (31);							\
-  vsave_msr2 (33, 32*16+12);						\
-  vsave      (32, 32*16)
-#else
-#define EH_FRAME_VMX
-#endif
-
-.Lcie:
-	.long .Lcie_end - .Lcie_start
-.Lcie_start:
-	.long 0			/* CIE ID */
-	.byte 1			/* Version number */
-	.string "zRS"		/* NUL-terminated augmentation string */
-	.uleb128 4		/* Code alignment factor */
-	.sleb128 -4		/* Data alignment factor */
-	.byte 67		/* Return address register column, ap */
-	.uleb128 1		/* Augmentation value length */
-	.byte 0x1b		/* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */
-	.byte 0x0c,1,0		/* DW_CFA_def_cfa: r1 ofs 0 */
-	.balign 4
-.Lcie_end:
-
-	.long .Lfde0_end - .Lfde0_start
-.Lfde0_start:
-	.long .Lfde0_start - .Lcie	/* CIE pointer. */
-	.long .Lsig_start - .		/* PC start, length */
-	.long .Lsig_end - .Lsig_start
-	.uleb128 0			/* Augmentation */
-	EH_FRAME_GEN
-	EH_FRAME_FP
-	EH_FRAME_VMX
-	.balign 4
-.Lfde0_end:
-
-/* We have a different stack layout for rt_sigreturn.  */
-#undef PTREGS
-#define PTREGS 64+16+128+20+28
-
-	.long .Lfde1_end - .Lfde1_start
-.Lfde1_start:
-	.long .Lfde1_start - .Lcie	/* CIE pointer. */
-	.long .Lsigrt_start - .		/* PC start, length */
-	.long .Lsigrt_end - .Lsigrt_start
-	.uleb128 0			/* Augmentation */
-	EH_FRAME_GEN
-	EH_FRAME_FP
-	EH_FRAME_VMX
-	.balign 4
-.Lfde1_end:
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
deleted file mode 100644
index 00c025ba4a92b4ab0516be84665e783b8afc4033..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ /dev/null
@@ -1,164 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This is the infamous ld script for the 32 bits vdso
- * library
- */
-#include <asm/vdso.h>
-
-#ifdef __LITTLE_ENDIAN__
-OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle")
-#else
-OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc")
-#endif
-OUTPUT_ARCH(powerpc:common)
-ENTRY(_start)
-
-SECTIONS
-{
-	. = VDSO32_LBASE + SIZEOF_HEADERS;
-
-	.hash          	: { *(.hash) }			:text
-	.gnu.hash      	: { *(.gnu.hash) }
-	.dynsym        	: { *(.dynsym) }
-	.dynstr        	: { *(.dynstr) }
-	.gnu.version   	: { *(.gnu.version) }
-	.gnu.version_d 	: { *(.gnu.version_d) }
-	.gnu.version_r 	: { *(.gnu.version_r) }
-
-	.note		: { *(.note.*) }		:text	:note
-
-	. = ALIGN(16);
-	.text		: {
-		*(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*)
-	}		      	      			:text
-	PROVIDE(__etext = .);
-	PROVIDE(_etext = .);
-	PROVIDE(etext = .);
-
-	. = ALIGN(8);
-	__ftr_fixup	: { *(__ftr_fixup) }
-
-	. = ALIGN(8);
-	__mmu_ftr_fixup	: { *(__mmu_ftr_fixup) }
-
-	. = ALIGN(8);
-	__lwsync_fixup	: { *(__lwsync_fixup) }
-
-#ifdef CONFIG_PPC64
-	. = ALIGN(8);
-	__fw_ftr_fixup	: { *(__fw_ftr_fixup) }
-#endif
-
-	/*
-	 * Other stuff is appended to the text segment:
-	 */
-	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
-	.rodata1	: { *(.rodata1) }
-
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-	.gcc_except_table : { *(.gcc_except_table) }
-	.fixup		: { *(.fixup) }
-
-	.dynamic	: { *(.dynamic) }		:text	:dynamic
-	.got		: { *(.got) }			:text
-	.plt		: { *(.plt) }
-
-	_end = .;
-	__end = .;
-	PROVIDE(end = .);
-
-	/*
-	 * Stabs debugging sections are here too.
-	 */
-	.stab 0 : { *(.stab) }
-	.stabstr 0 : { *(.stabstr) }
-	.stab.excl 0 : { *(.stab.excl) }
-	.stab.exclstr 0 : { *(.stab.exclstr) }
-	.stab.index 0 : { *(.stab.index) }
-	.stab.indexstr 0 : { *(.stab.indexstr) }
-	.comment       0 : { *(.comment) }
-
-	/*
-	 * DWARF debug sections.
-	 * Symbols in the DWARF debugging sections are relative to the beginning
-	 * of the section so we begin them at 0.
-	 */
-	/* DWARF 1 */
-	.debug          0 : { *(.debug) }
-	.line           0 : { *(.line) }
-	/* GNU DWARF 1 extensions */
-	.debug_srcinfo  0 : { *(.debug_srcinfo) }
-	.debug_sfnames  0 : { *(.debug_sfnames) }
-	/* DWARF 1.1 and DWARF 2 */
-	.debug_aranges  0 : { *(.debug_aranges) }
-	.debug_pubnames 0 : { *(.debug_pubnames) }
-	/* DWARF 2 */
-	.debug_info     0 : { *(.debug_info .gnu.linkonce.wi.*) }
-	.debug_abbrev   0 : { *(.debug_abbrev) }
-	.debug_line     0 : { *(.debug_line) }
-	.debug_frame    0 : { *(.debug_frame) }
-	.debug_str      0 : { *(.debug_str) }
-	.debug_loc      0 : { *(.debug_loc) }
-	.debug_macinfo  0 : { *(.debug_macinfo) }
-	/* SGI/MIPS DWARF 2 extensions */
-	.debug_weaknames 0 : { *(.debug_weaknames) }
-	.debug_funcnames 0 : { *(.debug_funcnames) }
-	.debug_typenames 0 : { *(.debug_typenames) }
-	.debug_varnames  0 : { *(.debug_varnames) }
-
-	/DISCARD/	: {
-		*(.note.GNU-stack)
-		*(.data .data.* .gnu.linkonce.d.* .sdata*)
-		*(.bss .sbss .dynbss .dynsbss)
-	}
-}
-
-/*
- * Very old versions of ld do not recognize this name token; use the constant.
- */
-#define PT_GNU_EH_FRAME	0x6474e550
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
-	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
-	note		PT_NOTE FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-	VDSO_VERSION_STRING {
-	global:
-		/*
-		 * Has to be there for the kernel to find
-		 */
-		__kernel_datapage_offset;
-
-		__kernel_get_syscall_map;
-#ifndef CONFIG_PPC_BOOK3S_601
-		__kernel_gettimeofday;
-		__kernel_clock_gettime;
-		__kernel_clock_getres;
-		__kernel_time;
-		__kernel_get_tbfreq;
-#endif
-		__kernel_sync_dicache;
-		__kernel_sync_dicache_p5;
-		__kernel_sigtramp32;
-		__kernel_sigtramp_rt32;
-#ifdef CONFIG_PPC64
-		__kernel_getcpu;
-#endif
-
-	local: *;
-	};
-}
diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
deleted file mode 100644
index 3f5ef035b0a981459385a36abbcd254ae1f80edd..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/page.h>
-
-	__PAGE_ALIGNED_DATA
-
-	.globl vdso32_start, vdso32_end
-	.balign PAGE_SIZE
-vdso32_start:
-	.incbin "arch/powerpc/kernel/vdso32/vdso32.so.dbg"
-	.balign PAGE_SIZE
-vdso32_end:
-
-	.previous
diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
deleted file mode 100644
index 526f5ba2593e22eed7240e5aed782d7b77785326..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vdso64/cacheflush.S
+++ /dev/null
@@ -1,80 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * vDSO provided cache flush routines
- *
- * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
- *                    IBM Corp.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-
-	.text
-
-/*
- * Default "generic" version of __kernel_sync_dicache.
- *
- * void __kernel_sync_dicache(unsigned long start, unsigned long end)
- *
- * Flushes the data cache & invalidate the instruction cache for the
- * provided range [start, end[
- */
-V_FUNCTION_BEGIN(__kernel_sync_dicache)
-  .cfi_startproc
-	mflr	r12
-  .cfi_register lr,r12
-	mr	r11,r3
-	bl	V_LOCAL_FUNC(__get_datapage)
-	mtlr	r12
-	mr	r10,r3
-
-	lwz	r7,CFG_DCACHE_BLOCKSZ(r10)
-	addi	r5,r7,-1
-	andc	r6,r11,r5		/* round low to line bdy */
-	subf	r8,r6,r4		/* compute length */
-	add	r8,r8,r5		/* ensure we get enough */
-	lwz	r9,CFG_DCACHE_LOGBLOCKSZ(r10)
-	srd.	r8,r8,r9		/* compute line count */
-	crclr	cr0*4+so
-	beqlr				/* nothing to do? */
-	mtctr	r8
-1:	dcbst	0,r6
-	add	r6,r6,r7
-	bdnz	1b
-	sync
-
-/* Now invalidate the instruction cache */
-
-	lwz	r7,CFG_ICACHE_BLOCKSZ(r10)
-	addi	r5,r7,-1
-	andc	r6,r11,r5		/* round low to line bdy */
-	subf	r8,r6,r4		/* compute length */
-	add	r8,r8,r5
-	lwz	r9,CFG_ICACHE_LOGBLOCKSZ(r10)
-	srd.	r8,r8,r9		/* compute line count */
-	crclr	cr0*4+so
-	beqlr				/* nothing to do? */
-	mtctr	r8
-2:	icbi	0,r6
-	add	r6,r6,r7
-	bdnz	2b
-	isync
-	li	r3,0
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_sync_dicache)
-
-
-/*
- * POWER5 version of __kernel_sync_dicache
- */
-V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
-  .cfi_startproc
-	crclr	cr0*4+so
-	sync
-	isync
-	li	r3,0
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_sync_dicache_p5)
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
deleted file mode 100644
index dc84f5ae3802b44c531811015e3b5bdf2a24633e..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vdso64/datapage.S
+++ /dev/null
@@ -1,84 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Access to the shared data page by the vDSO & syscall map
- *
- * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
- */
-
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/vdso.h>
-
-	.text
-.global	__kernel_datapage_offset;
-__kernel_datapage_offset:
-	.long	0
-
-V_FUNCTION_BEGIN(__get_datapage)
-  .cfi_startproc
-	/* We don't want that exposed or overridable as we want other objects
-	 * to be able to bl directly to here
-	 */
-	.protected __get_datapage
-	.hidden __get_datapage
-
-	mflr	r0
-  .cfi_register lr,r0
-
-	bcl	20,31,data_page_branch
-data_page_branch:
-	mflr	r3
-	mtlr	r0
-	addi	r3, r3, __kernel_datapage_offset-data_page_branch
-	lwz	r0,0(r3)
-  .cfi_restore lr
-	add	r3,r0,r3
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__get_datapage)
-
-/*
- * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
- *
- * returns a pointer to the syscall map. the map is agnostic to the
- * size of "long", unlike kernel bitops, it stores bits from top to
- * bottom so that memory actually contains a linear bitmap
- * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of
- * 32 bits int at N >> 5.
- */
-V_FUNCTION_BEGIN(__kernel_get_syscall_map)
-  .cfi_startproc
-	mflr	r12
-  .cfi_register lr,r12
-	mr	r4,r3
-	bl	V_LOCAL_FUNC(__get_datapage)
-	mtlr	r12
-	addi	r3,r3,CFG_SYSCALL_MAP64
-	cmpldi	cr0,r4,0
-	crclr	cr0*4+so
-	beqlr
-	li	r0,NR_syscalls
-	stw	r0,0(r4)
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_get_syscall_map)
-
-
-/*
- * void unsigned long  __kernel_get_tbfreq(void);
- *
- * returns the timebase frequency in HZ
- */
-V_FUNCTION_BEGIN(__kernel_get_tbfreq)
-  .cfi_startproc
-	mflr	r12
-  .cfi_register lr,r12
-	bl	V_LOCAL_FUNC(__get_datapage)
-	ld	r3,CFG_TB_TICKS_PER_SEC(r3)
-	mtlr	r12
-	crclr	cr0*4+so
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso64/getcpu.S b/arch/powerpc/kernel/vdso64/getcpu.S
deleted file mode 100644
index 12bbf236cdc4253bb14abb7d0a4a42e2e744a728..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vdso64/getcpu.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *
- * Copyright (C) IBM Corporation, 2012
- *
- * Author: Anton Blanchard <anton@au.ibm.com>
- */
-#include <asm/ppc_asm.h>
-#include <asm/vdso.h>
-
-	.text
-/*
- * Exact prototype of getcpu
- *
- * int __kernel_getcpu(unsigned *cpu, unsigned *node);
- *
- */
-V_FUNCTION_BEGIN(__kernel_getcpu)
-  .cfi_startproc
-	mfspr	r5,SPRN_SPRG_VDSO_READ
-	cmpdi	cr0,r3,0
-	cmpdi	cr1,r4,0
-	clrlwi  r6,r5,16
-	rlwinm  r7,r5,16,31-15,31-0
-	beq	cr0,1f
-	stw	r6,0(r3)
-1:	beq	cr1,2f
-	stw	r7,0(r4)
-2:	crclr	cr0*4+so
-	li	r3,0			/* always success */
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_getcpu)
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
deleted file mode 100644
index 81757f06bbd7a9740c7da5870f317dfd1d484429..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ /dev/null
@@ -1,289 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Userland implementation of gettimeofday() for 64 bits processes in a
- * ppc64 kernel for use in the vDSO
- *
- * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
- *                    IBM Corp.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
-	.text
-/*
- * Exact prototype of gettimeofday
- *
- * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
- *
- */
-V_FUNCTION_BEGIN(__kernel_gettimeofday)
-  .cfi_startproc
-	mflr	r12
-  .cfi_register lr,r12
-
-	mr	r11,r3			/* r11 holds tv */
-	mr	r10,r4			/* r10 holds tz */
-	bl	V_LOCAL_FUNC(__get_datapage)	/* get data page */
-	cmpldi	r11,0			/* check if tv is NULL */
-	beq	2f
-	lis	r7,1000000@ha		/* load up USEC_PER_SEC */
-	addi	r7,r7,1000000@l
-	bl	V_LOCAL_FUNC(__do_get_tspec) /* get sec/us from tb & kernel */
-	std	r4,TVAL64_TV_SEC(r11)	/* store sec in tv */
-	std	r5,TVAL64_TV_USEC(r11)	/* store usec in tv */
-2:	cmpldi	r10,0			/* check if tz is NULL */
-	beq	1f
-	lwz	r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */
-	lwz	r5,CFG_TZ_DSTTIME(r3)
-	stw	r4,TZONE_TZ_MINWEST(r10)
-	stw	r5,TZONE_TZ_DSTTIME(r10)
-1:	mtlr	r12
-	crclr	cr0*4+so
-	li	r3,0			/* always success */
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_gettimeofday)
-
-
-/*
- * Exact prototype of clock_gettime()
- *
- * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
- *
- */
-V_FUNCTION_BEGIN(__kernel_clock_gettime)
-  .cfi_startproc
-	/* Check for supported clock IDs */
-	cmpwi	cr0,r3,CLOCK_REALTIME
-	cmpwi	cr1,r3,CLOCK_MONOTONIC
-	cror	cr0*4+eq,cr0*4+eq,cr1*4+eq
-
-	cmpwi	cr5,r3,CLOCK_REALTIME_COARSE
-	cmpwi	cr6,r3,CLOCK_MONOTONIC_COARSE
-	cror	cr5*4+eq,cr5*4+eq,cr6*4+eq
-
-	cror	cr0*4+eq,cr0*4+eq,cr5*4+eq
-	bne	cr0,99f
-
-	mflr	r12			/* r12 saves lr */
-  .cfi_register lr,r12
-	mr	r11,r4			/* r11 saves tp */
-	bl	V_LOCAL_FUNC(__get_datapage)	/* get data page */
-	lis	r7,NSEC_PER_SEC@h	/* want nanoseconds */
-	ori	r7,r7,NSEC_PER_SEC@l
-	beq	cr5,70f
-50:	bl	V_LOCAL_FUNC(__do_get_tspec)	/* get time from tb & kernel */
-	bne	cr1,80f			/* if not monotonic, all done */
-
-	/*
-	 * CLOCK_MONOTONIC
-	 */
-
-	/* now we must fixup using wall to monotonic. We need to snapshot
-	 * that value and do the counter trick again. Fortunately, we still
-	 * have the counter value in r8 that was returned by __do_get_tspec.
-	 * At this point, r4,r5 contain our sec/nsec values.
-	 */
-
-	ld	r6,WTOM_CLOCK_SEC(r3)
-	lwa	r9,WTOM_CLOCK_NSEC(r3)
-
-	/* We now have our result in r6,r9. We create a fake dependency
-	 * on that result and re-check the counter
-	 */
-	or	r0,r6,r9
-	xor	r0,r0,r0
-	add	r3,r3,r0
-	ld	r0,CFG_TB_UPDATE_COUNT(r3)
-        cmpld   cr0,r0,r8		/* check if updated */
-	bne-	50b
-	b	78f
-
-	/*
-	 * For coarse clocks we get data directly from the vdso data page, so
-	 * we don't need to call __do_get_tspec, but we still need to do the
-	 * counter trick.
-	 */
-70:	ld      r8,CFG_TB_UPDATE_COUNT(r3)
-	andi.   r0,r8,1                 /* pending update ? loop */
-	bne-    70b
-	add     r3,r3,r0		/* r0 is already 0 */
-
-	/*
-	 * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE
-	 * too
-	 */
-	ld      r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
-	ld      r5,STAMP_XTIME+TSPC64_TV_NSEC(r3)
-	bne     cr6,75f
-
-	/* CLOCK_MONOTONIC_COARSE */
-	ld	r6,WTOM_CLOCK_SEC(r3)
-	lwa     r9,WTOM_CLOCK_NSEC(r3)
-
-	/* check if counter has updated */
-	or      r0,r6,r9
-75:	or	r0,r0,r4
-	or	r0,r0,r5
-	xor     r0,r0,r0
-	add     r3,r3,r0
-	ld      r0,CFG_TB_UPDATE_COUNT(r3)
-	cmpld   cr0,r0,r8               /* check if updated */
-	bne-    70b
-
-	/* Counter has not updated, so continue calculating proper values for
-	 * sec and nsec if monotonic coarse, or just return with the proper
-	 * values for realtime.
-	 */
-	bne     cr6,80f
-
-	/* Add wall->monotonic offset and check for overflow or underflow */
-78:	add     r4,r4,r6
-	add     r5,r5,r9
-	cmpd    cr0,r5,r7
-	cmpdi   cr1,r5,0
-	blt     79f
-	subf    r5,r7,r5
-	addi    r4,r4,1
-79:	bge     cr1,80f
-	addi    r4,r4,-1
-	add     r5,r5,r7
-
-80:	std	r4,TSPC64_TV_SEC(r11)
-	std	r5,TSPC64_TV_NSEC(r11)
-
-	mtlr	r12
-	crclr	cr0*4+so
-	li	r3,0
-	blr
-
-	/*
-	 * syscall fallback
-	 */
-99:
-	li	r0,__NR_clock_gettime
-  .cfi_restore lr
-	sc
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_clock_gettime)
-
-
-/*
- * Exact prototype of clock_getres()
- *
- * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
- *
- */
-V_FUNCTION_BEGIN(__kernel_clock_getres)
-  .cfi_startproc
-	/* Check for supported clock IDs */
-	cmpwi	cr0,r3,CLOCK_REALTIME
-	cmpwi	cr1,r3,CLOCK_MONOTONIC
-	cror	cr0*4+eq,cr0*4+eq,cr1*4+eq
-	bne	cr0,99f
-
-	mflr	r12
-  .cfi_register lr,r12
-	bl	V_LOCAL_FUNC(__get_datapage)
-	lwz	r5, CLOCK_HRTIMER_RES(r3)
-	mtlr	r12
-	li	r3,0
-	cmpldi	cr0,r4,0
-	crclr	cr0*4+so
-	beqlr
-	std	r3,TSPC64_TV_SEC(r4)
-	std	r5,TSPC64_TV_NSEC(r4)
-	blr
-
-	/*
-	 * syscall fallback
-	 */
-99:
-	li	r0,__NR_clock_getres
-	sc
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_clock_getres)
-
-/*
- * Exact prototype of time()
- *
- * time_t time(time *t);
- *
- */
-V_FUNCTION_BEGIN(__kernel_time)
-  .cfi_startproc
-	mflr	r12
-  .cfi_register lr,r12
-
-	mr	r11,r3			/* r11 holds t */
-	bl	V_LOCAL_FUNC(__get_datapage)
-
-	ld	r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
-
-	cmpldi	r11,0			/* check if t is NULL */
-	beq	2f
-	std	r4,0(r11)		/* store result at *t */
-2:	mtlr	r12
-	crclr	cr0*4+so
-	mr	r3,r4
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__kernel_time)
-
-
-/*
- * This is the core of clock_gettime() and gettimeofday(),
- * it returns the current time in r4 (seconds) and r5.
- * On entry, r7 gives the resolution of r5, either USEC_PER_SEC
- * or NSEC_PER_SEC, giving r5 in microseconds or nanoseconds.
- * It expects the datapage ptr in r3 and doesn't clobber it.
- * It clobbers r0, r6 and r9.
- * On return, r8 contains the counter value that can be reused.
- * This clobbers cr0 but not any other cr field.
- */
-V_FUNCTION_BEGIN(__do_get_tspec)
-  .cfi_startproc
-	/* check for update count & load values */
-1:	ld	r8,CFG_TB_UPDATE_COUNT(r3)
-	andi.	r0,r8,1			/* pending update ? loop */
-	bne-	1b
-	xor	r0,r8,r8		/* create dependency */
-	add	r3,r3,r0
-
-	/* Get TB & offset it. We use the MFTB macro which will generate
-	 * workaround code for Cell.
-	 */
-	MFTB(r6)
-	ld	r9,CFG_TB_ORIG_STAMP(r3)
-	subf	r6,r9,r6
-
-	/* Scale result */
-	ld	r5,CFG_TB_TO_XS(r3)
-	sldi	r6,r6,12		/* compute time since stamp_xtime */
-	mulhdu	r6,r6,r5		/* in units of 2^-32 seconds */
-
-	/* Add stamp since epoch */
-	ld	r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
-	lwz	r5,STAMP_SEC_FRAC(r3)
-	or	r0,r4,r5
-	or	r0,r0,r6
-	xor	r0,r0,r0
-	add	r3,r3,r0
-	ld	r0,CFG_TB_UPDATE_COUNT(r3)
-	cmpld   r0,r8			/* check if updated */
-	bne-	1b			/* reload if so */
-
-	/* convert to seconds & nanoseconds and add to stamp */
-	add	r6,r6,r5		/* add on fractional seconds of xtime */
-	mulhwu	r5,r6,r7		/* compute micro or nanoseconds and */
-	srdi	r6,r6,32		/* seconds since stamp_xtime */
-	clrldi	r5,r5,32
-	add	r4,r4,r6
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__do_get_tspec)
diff --git a/arch/powerpc/kernel/vdso64/note.S b/arch/powerpc/kernel/vdso64/note.S
deleted file mode 100644
index dc2a509f7e8ac11d41fddc774aceaf7abcec20d9..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vdso64/note.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../vdso32/note.S"
diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S
deleted file mode 100644
index a8cc0409d7d25aa2c0a473d472b51f89fdbcc104..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vdso64/sigtramp.S
+++ /dev/null
@@ -1,307 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Signal trampoline for 64 bits processes in a ppc64 kernel for
- * use in the vDSO
- *
- * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
- * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/unistd.h>
-#include <asm/vdso.h>
-#include <asm/ptrace.h>		/* XXX for __SIGNAL_FRAMESIZE */
-
-	.text
-
-/* The nop here is a hack.  The dwarf2 unwind routines subtract 1 from
-   the return address to get an address in the middle of the presumed
-   call instruction.  Since we don't have a call here, we artificially
-   extend the range covered by the unwind info by padding before the
-   real start.  */
-	nop
-	.balign 8
-V_FUNCTION_BEGIN(__kernel_sigtramp_rt64)
-.Lsigrt_start = . - 4
-	addi	r1, r1, __SIGNAL_FRAMESIZE
-	li	r0,__NR_rt_sigreturn
-	sc
-.Lsigrt_end:
-V_FUNCTION_END(__kernel_sigtramp_rt64)
-/* The ".balign 8" above and the following zeros mimic the old stack
-   trampoline layout.  The last magic value is the ucontext pointer,
-   chosen in such a way that older libgcc unwind code returns a zero
-   for a sigcontext pointer.  */
-	.long 0,0,0
-	.quad 0,-21*8
-
-/* Register r1 can be found at offset 8 of a pt_regs structure.
-   A pointer to the pt_regs is stored in memory at the old sp plus PTREGS.  */
-#define cfa_save \
-  .byte 0x0f;			/* DW_CFA_def_cfa_expression */		\
-  .uleb128 9f - 1f;		/*   length */				\
-1:									\
-  .byte 0x71; .sleb128 PTREGS;	/*     DW_OP_breg1 */			\
-  .byte 0x06;			/*     DW_OP_deref */			\
-  .byte 0x23; .uleb128 RSIZE;	/*     DW_OP_plus_uconst */		\
-  .byte 0x06;			/*     DW_OP_deref */			\
-9:
-
-/* Register REGNO can be found at offset OFS of a pt_regs structure.
-   A pointer to the pt_regs is stored in memory at the old sp plus PTREGS.  */
-#define rsave(regno, ofs) \
-  .byte 0x10;			/* DW_CFA_expression */			\
-  .uleb128 regno;		/*   regno */				\
-  .uleb128 9f - 1f;		/*   length */				\
-1:									\
-  .byte 0x71; .sleb128 PTREGS;	/*     DW_OP_breg1 */			\
-  .byte 0x06;			/*     DW_OP_deref */			\
-  .ifne ofs;								\
-    .byte 0x23; .uleb128 ofs;	/*     DW_OP_plus_uconst */		\
-  .endif;								\
-9:
-
-/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
-   of the VMX reg struct.  A pointer to the VMX reg struct is at VREGS in
-   the pt_regs struct.  This macro is for REGNO == 0, and contains
-   'subroutines' that the other macros jump to.  */
-#define vsave_msr0(regno) \
-  .byte 0x10;			/* DW_CFA_expression */			\
-  .uleb128 regno + 77;		/*   regno */				\
-  .uleb128 9f - 1f;		/*   length */				\
-1:									\
-  .byte 0x30 + regno;		/*     DW_OP_lit0 */			\
-2:									\
-  .byte 0x40;			/*     DW_OP_lit16 */			\
-  .byte 0x1e;			/*     DW_OP_mul */			\
-3:									\
-  .byte 0x71; .sleb128 PTREGS;	/*     DW_OP_breg1 */			\
-  .byte 0x06;			/*     DW_OP_deref */			\
-  .byte 0x12;			/*     DW_OP_dup */			\
-  .byte 0x23;			/*     DW_OP_plus_uconst */		\
-    .uleb128 33*RSIZE;		/*       msr offset */			\
-  .byte 0x06;			/*     DW_OP_deref */			\
-  .byte 0x0c; .long 1 << 25;	/*     DW_OP_const4u */			\
-  .byte 0x1a;			/*     DW_OP_and */			\
-  .byte 0x12;			/*     DW_OP_dup, ret 0 if bra taken */	\
-  .byte 0x30;			/*     DW_OP_lit0 */			\
-  .byte 0x29;			/*     DW_OP_eq */			\
-  .byte 0x28; .short 0x7fff;	/*     DW_OP_bra to end */		\
-  .byte 0x13;			/*     DW_OP_drop, pop the 0 */		\
-  .byte 0x23; .uleb128 VREGS;	/*     DW_OP_plus_uconst */		\
-  .byte 0x06;			/*     DW_OP_deref */			\
-  .byte 0x22;			/*     DW_OP_plus */			\
-  .byte 0x2f; .short 0x7fff;	/*     DW_OP_skip to end */		\
-9:
-
-/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
-   of the VMX reg struct.  REGNO is 1 thru 31.  */
-#define vsave_msr1(regno) \
-  .byte 0x10;			/* DW_CFA_expression */			\
-  .uleb128 regno + 77;		/*   regno */				\
-  .uleb128 9f - 1f;		/*   length */				\
-1:									\
-  .byte 0x30 + regno;		/*     DW_OP_lit n */			\
-  .byte 0x2f; .short 2b - 9f;	/*     DW_OP_skip */			\
-9:
-
-/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of
-   the VMX save block.  */
-#define vsave_msr2(regno, ofs) \
-  .byte 0x10;			/* DW_CFA_expression */			\
-  .uleb128 regno + 77;		/*   regno */				\
-  .uleb128 9f - 1f;		/*   length */				\
-1:									\
-  .byte 0x0a; .short ofs;	/*     DW_OP_const2u */			\
-  .byte 0x2f; .short 3b - 9f;	/*     DW_OP_skip */			\
-9:
-
-/* VMX register REGNO is at offset OFS of the VMX save area.  */
-#define vsave(regno, ofs) \
-  .byte 0x10;			/* DW_CFA_expression */			\
-  .uleb128 regno + 77;		/*   regno */				\
-  .uleb128 9f - 1f;		/*   length */				\
-1:									\
-  .byte 0x71; .sleb128 PTREGS;	/*     DW_OP_breg1 */			\
-  .byte 0x06;			/*     DW_OP_deref */			\
-  .byte 0x23; .uleb128 VREGS;	/*     DW_OP_plus_uconst */		\
-  .byte 0x06;			/*     DW_OP_deref */			\
-  .byte 0x23; .uleb128 ofs;	/*     DW_OP_plus_uconst */		\
-9:
-
-/* This is where the pt_regs pointer can be found on the stack.  */
-#define PTREGS	128+168+56
-
-/* Size of regs.  */
-#define RSIZE	8
-
-/* Size of CR reg in DWARF unwind info. */
-#define CRSIZE	4
-
-/* Offset of CR reg within a full word. */
-#ifdef __LITTLE_ENDIAN__
-#define CROFF 0
-#else
-#define CROFF (RSIZE - CRSIZE)
-#endif
-
-/* This is the offset of the VMX reg pointer.  */
-#define VREGS	48*RSIZE+33*8
-
-/* Describe where general purpose regs are saved.  */
-#define EH_FRAME_GEN \
-  cfa_save;								\
-  rsave ( 0,  0*RSIZE);							\
-  rsave ( 2,  2*RSIZE);							\
-  rsave ( 3,  3*RSIZE);							\
-  rsave ( 4,  4*RSIZE);							\
-  rsave ( 5,  5*RSIZE);							\
-  rsave ( 6,  6*RSIZE);							\
-  rsave ( 7,  7*RSIZE);							\
-  rsave ( 8,  8*RSIZE);							\
-  rsave ( 9,  9*RSIZE);							\
-  rsave (10, 10*RSIZE);							\
-  rsave (11, 11*RSIZE);							\
-  rsave (12, 12*RSIZE);							\
-  rsave (13, 13*RSIZE);							\
-  rsave (14, 14*RSIZE);							\
-  rsave (15, 15*RSIZE);							\
-  rsave (16, 16*RSIZE);							\
-  rsave (17, 17*RSIZE);							\
-  rsave (18, 18*RSIZE);							\
-  rsave (19, 19*RSIZE);							\
-  rsave (20, 20*RSIZE);							\
-  rsave (21, 21*RSIZE);							\
-  rsave (22, 22*RSIZE);							\
-  rsave (23, 23*RSIZE);							\
-  rsave (24, 24*RSIZE);							\
-  rsave (25, 25*RSIZE);							\
-  rsave (26, 26*RSIZE);							\
-  rsave (27, 27*RSIZE);							\
-  rsave (28, 28*RSIZE);							\
-  rsave (29, 29*RSIZE);							\
-  rsave (30, 30*RSIZE);							\
-  rsave (31, 31*RSIZE);							\
-  rsave (67, 32*RSIZE);		/* ap, used as temp for nip */		\
-  rsave (65, 36*RSIZE);		/* lr */				\
-  rsave (68, 38*RSIZE + CROFF);	/* cr fields */				\
-  rsave (69, 38*RSIZE + CROFF);						\
-  rsave (70, 38*RSIZE + CROFF);						\
-  rsave (71, 38*RSIZE + CROFF);						\
-  rsave (72, 38*RSIZE + CROFF);						\
-  rsave (73, 38*RSIZE + CROFF);						\
-  rsave (74, 38*RSIZE + CROFF);						\
-  rsave (75, 38*RSIZE + CROFF)
-
-/* Describe where the FP regs are saved.  */
-#define EH_FRAME_FP \
-  rsave (32, 48*RSIZE +  0*8);						\
-  rsave (33, 48*RSIZE +  1*8);						\
-  rsave (34, 48*RSIZE +  2*8);						\
-  rsave (35, 48*RSIZE +  3*8);						\
-  rsave (36, 48*RSIZE +  4*8);						\
-  rsave (37, 48*RSIZE +  5*8);						\
-  rsave (38, 48*RSIZE +  6*8);						\
-  rsave (39, 48*RSIZE +  7*8);						\
-  rsave (40, 48*RSIZE +  8*8);						\
-  rsave (41, 48*RSIZE +  9*8);						\
-  rsave (42, 48*RSIZE + 10*8);						\
-  rsave (43, 48*RSIZE + 11*8);						\
-  rsave (44, 48*RSIZE + 12*8);						\
-  rsave (45, 48*RSIZE + 13*8);						\
-  rsave (46, 48*RSIZE + 14*8);						\
-  rsave (47, 48*RSIZE + 15*8);						\
-  rsave (48, 48*RSIZE + 16*8);						\
-  rsave (49, 48*RSIZE + 17*8);						\
-  rsave (50, 48*RSIZE + 18*8);						\
-  rsave (51, 48*RSIZE + 19*8);						\
-  rsave (52, 48*RSIZE + 20*8);						\
-  rsave (53, 48*RSIZE + 21*8);						\
-  rsave (54, 48*RSIZE + 22*8);						\
-  rsave (55, 48*RSIZE + 23*8);						\
-  rsave (56, 48*RSIZE + 24*8);						\
-  rsave (57, 48*RSIZE + 25*8);						\
-  rsave (58, 48*RSIZE + 26*8);						\
-  rsave (59, 48*RSIZE + 27*8);						\
-  rsave (60, 48*RSIZE + 28*8);						\
-  rsave (61, 48*RSIZE + 29*8);						\
-  rsave (62, 48*RSIZE + 30*8);						\
-  rsave (63, 48*RSIZE + 31*8)
-
-/* Describe where the VMX regs are saved.  */
-#ifdef CONFIG_ALTIVEC
-#define EH_FRAME_VMX \
-  vsave_msr0 ( 0);							\
-  vsave_msr1 ( 1);							\
-  vsave_msr1 ( 2);							\
-  vsave_msr1 ( 3);							\
-  vsave_msr1 ( 4);							\
-  vsave_msr1 ( 5);							\
-  vsave_msr1 ( 6);							\
-  vsave_msr1 ( 7);							\
-  vsave_msr1 ( 8);							\
-  vsave_msr1 ( 9);							\
-  vsave_msr1 (10);							\
-  vsave_msr1 (11);							\
-  vsave_msr1 (12);							\
-  vsave_msr1 (13);							\
-  vsave_msr1 (14);							\
-  vsave_msr1 (15);							\
-  vsave_msr1 (16);							\
-  vsave_msr1 (17);							\
-  vsave_msr1 (18);							\
-  vsave_msr1 (19);							\
-  vsave_msr1 (20);							\
-  vsave_msr1 (21);							\
-  vsave_msr1 (22);							\
-  vsave_msr1 (23);							\
-  vsave_msr1 (24);							\
-  vsave_msr1 (25);							\
-  vsave_msr1 (26);							\
-  vsave_msr1 (27);							\
-  vsave_msr1 (28);							\
-  vsave_msr1 (29);							\
-  vsave_msr1 (30);							\
-  vsave_msr1 (31);							\
-  vsave_msr2 (33, 32*16+12);						\
-  vsave      (32, 33*16)
-#else
-#define EH_FRAME_VMX
-#endif
-
-	.section .eh_frame,"a",@progbits
-.Lcie:
-	.long .Lcie_end - .Lcie_start
-.Lcie_start:
-	.long 0			/* CIE ID */
-	.byte 1			/* Version number */
-	.string "zRS"		/* NUL-terminated augmentation string */
-	.uleb128 4		/* Code alignment factor */
-	.sleb128 -8		/* Data alignment factor */
-	.byte 67		/* Return address register column, ap */
-	.uleb128 1		/* Augmentation value length */
-	.byte 0x14		/* DW_EH_PE_pcrel | DW_EH_PE_udata8. */
-	.byte 0x0c,1,0		/* DW_CFA_def_cfa: r1 ofs 0 */
-	.balign 8
-.Lcie_end:
-
-	.long .Lfde0_end - .Lfde0_start
-.Lfde0_start:
-	.long .Lfde0_start - .Lcie	/* CIE pointer. */
-	.quad .Lsigrt_start - .		/* PC start, length */
-	.quad .Lsigrt_end - .Lsigrt_start
-	.uleb128 0			/* Augmentation */
-	EH_FRAME_GEN
-	EH_FRAME_FP
-	EH_FRAME_VMX
-# Do we really need to describe the frame at this point?  ie. will
-# we ever have some call chain that returns somewhere past the addi?
-# I don't think so, since gcc doesn't support async signals.
-#	.byte 0x41		/* DW_CFA_advance_loc 1*4 */
-#undef PTREGS
-#define PTREGS 168+56
-#	EH_FRAME_GEN
-#	EH_FRAME_FP
-#	EH_FRAME_VMX
-	.balign 8
-.Lfde0_end:
diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S
deleted file mode 100644
index 256fb972029871cc6c16e46a58b3652c4fbf9f8c..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vdso64/vdso64.lds.S
+++ /dev/null
@@ -1,159 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This is the infamous ld script for the 64 bits vdso
- * library
- */
-#include <asm/vdso.h>
-
-#ifdef __LITTLE_ENDIAN__
-OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle")
-#else
-OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", "elf64-powerpc")
-#endif
-OUTPUT_ARCH(powerpc:common64)
-ENTRY(_start)
-
-SECTIONS
-{
-	. = VDSO64_LBASE + SIZEOF_HEADERS;
-
-	.hash		: { *(.hash) }			:text
-	.gnu.hash	: { *(.gnu.hash) }
-	.dynsym		: { *(.dynsym) }
-	.dynstr		: { *(.dynstr) }
-	.gnu.version	: { *(.gnu.version) }
-	.gnu.version_d	: { *(.gnu.version_d) }
-	.gnu.version_r	: { *(.gnu.version_r) }
-
-	.note		: { *(.note.*) }		:text	:note
-
-	. = ALIGN(16);
-	.text		: {
-		*(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*)
-		*(.sfpr .glink)
-	}						:text
-	PROVIDE(__etext = .);
-	PROVIDE(_etext = .);
-	PROVIDE(etext = .);
-
-	. = ALIGN(8);
-	__ftr_fixup	: { *(__ftr_fixup) }
-
-	. = ALIGN(8);
-	__mmu_ftr_fixup	: { *(__mmu_ftr_fixup) }
-
-	. = ALIGN(8);
-	__lwsync_fixup	: { *(__lwsync_fixup) }
-
-	. = ALIGN(8);
-	__fw_ftr_fixup	: { *(__fw_ftr_fixup) }
-
-	/*
-	 * Other stuff is appended to the text segment:
-	 */
-	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
-	.rodata1	: { *(.rodata1) }
-
-	.dynamic	: { *(.dynamic) }		:text	:dynamic
-
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-	.gcc_except_table : { *(.gcc_except_table) }
-	.rela.dyn ALIGN(8) : { *(.rela.dyn) }
-
-	.opd ALIGN(8)	: { KEEP (*(.opd)) }
-	.got ALIGN(8)	: { *(.got .toc) }
-
-	_end = .;
-	PROVIDE(end = .);
-
-	/*
-	 * Stabs debugging sections are here too.
-	 */
-	.stab          0 : { *(.stab) }
-	.stabstr       0 : { *(.stabstr) }
-	.stab.excl     0 : { *(.stab.excl) }
-	.stab.exclstr  0 : { *(.stab.exclstr) }
-	.stab.index    0 : { *(.stab.index) }
-	.stab.indexstr 0 : { *(.stab.indexstr) }
-	.comment       0 : { *(.comment) }
-
-	/*
-	 * DWARF debug sections.
-	 * Symbols in the DWARF debugging sections are relative to the beginning
-	 * of the section so we begin them at 0.
-	 */
-	/* DWARF 1 */
-	.debug          0 : { *(.debug) }
-	.line           0 : { *(.line) }
-	/* GNU DWARF 1 extensions */
-	.debug_srcinfo  0 : { *(.debug_srcinfo) }
-	.debug_sfnames  0 : { *(.debug_sfnames) }
-	/* DWARF 1.1 and DWARF 2 */
-	.debug_aranges  0 : { *(.debug_aranges) }
-	.debug_pubnames 0 : { *(.debug_pubnames) }
-	/* DWARF 2 */
-	.debug_info     0 : { *(.debug_info .gnu.linkonce.wi.*) }
-	.debug_abbrev   0 : { *(.debug_abbrev) }
-	.debug_line     0 : { *(.debug_line) }
-	.debug_frame    0 : { *(.debug_frame) }
-	.debug_str      0 : { *(.debug_str) }
-	.debug_loc      0 : { *(.debug_loc) }
-	.debug_macinfo  0 : { *(.debug_macinfo) }
-	/* SGI/MIPS DWARF 2 extensions */
-	.debug_weaknames 0 : { *(.debug_weaknames) }
-	.debug_funcnames 0 : { *(.debug_funcnames) }
-	.debug_typenames 0 : { *(.debug_typenames) }
-	.debug_varnames  0 : { *(.debug_varnames) }
-
-	/DISCARD/	: {
-		*(.note.GNU-stack)
-		*(.branch_lt)
-		*(.data .data.* .gnu.linkonce.d.* .sdata*)
-		*(.bss .sbss .dynbss .dynsbss)
-	}
-}
-
-/*
- * Very old versions of ld do not recognize this name token; use the constant.
- */
-#define PT_GNU_EH_FRAME	0x6474e550
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
-	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
-	note		PT_NOTE FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-	VDSO_VERSION_STRING {
-	global:
-		/*
-		 * Has to be there for the kernel to find
-		 */
-		__kernel_datapage_offset;
-
-		__kernel_get_syscall_map;
-		__kernel_gettimeofday;
-		__kernel_clock_gettime;
-		__kernel_clock_getres;
-		__kernel_get_tbfreq;
-		__kernel_sync_dicache;
-		__kernel_sync_dicache_p5;
-		__kernel_sigtramp_rt64;
-		__kernel_getcpu;
-		__kernel_time;
-
-	local: *;
-	};
-}
diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
deleted file mode 100644
index 1d56d81fe3b39a85fd84f622068c0a3c241a47c7..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/page.h>
-
-	__PAGE_ALIGNED_DATA
-
-	.globl vdso64_start, vdso64_end
-	.balign PAGE_SIZE
-vdso64_start:
-	.incbin "arch/powerpc/kernel/vdso64/vdso64.so.dbg"
-	.balign PAGE_SIZE
-vdso64_end:
-
-	.previous
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
deleted file mode 100644
index 8eb867dbad5fd92c95feea2345f9ea0d5d7694bf..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vector.S
+++ /dev/null
@@ -1,331 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/reg.h>
-#include <asm/asm-offsets.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include <asm/export.h>
-#include <asm/asm-compat.h>
-
-/*
- * Load state from memory into VMX registers including VSCR.
- * Assumes the caller has enabled VMX in the MSR.
- */
-_GLOBAL(load_vr_state)
-	li	r4,VRSTATE_VSCR
-	lvx	v0,r4,r3
-	mtvscr	v0
-	REST_32VRS(0,r4,r3)
-	blr
-EXPORT_SYMBOL(load_vr_state)
-_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */
-
-/*
- * Store VMX state into memory, including VSCR.
- * Assumes the caller has enabled VMX in the MSR.
- */
-_GLOBAL(store_vr_state)
-	SAVE_32VRS(0, r4, r3)
-	mfvscr	v0
-	li	r4, VRSTATE_VSCR
-	stvx	v0, r4, r3
-	blr
-EXPORT_SYMBOL(store_vr_state)
-
-/*
- * Disable VMX for the task which had it previously,
- * and save its vector registers in its thread_struct.
- * Enables the VMX for use in the kernel on return.
- * On SMP we know the VMX is free, since we give it up every
- * switch (ie, no lazy save of the vector registers).
- *
- * Note that on 32-bit this can only use registers that will be
- * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
- */
-_GLOBAL(load_up_altivec)
-	mfmsr	r5			/* grab the current MSR */
-	oris	r5,r5,MSR_VEC@h
-	MTMSRD(r5)			/* enable use of AltiVec now */
-	isync
-
-	/*
-	 * While userspace in general ignores VRSAVE, glibc uses it as a boolean
-	 * to optimise userspace context save/restore. Whenever we take an
-	 * altivec unavailable exception we must set VRSAVE to something non
-	 * zero. Set it to all 1s. See also the programming note in the ISA.
-	 */
-	mfspr	r4,SPRN_VRSAVE
-	cmpwi	0,r4,0
-	bne+	1f
-	li	r4,-1
-	mtspr	SPRN_VRSAVE,r4
-1:
-	/* enable use of VMX after return */
-#ifdef CONFIG_PPC32
-	mfspr	r5,SPRN_SPRG_THREAD		/* current task's THREAD (phys) */
-	oris	r9,r9,MSR_VEC@h
-#else
-	ld	r4,PACACURRENT(r13)
-	addi	r5,r4,THREAD		/* Get THREAD */
-	oris	r12,r12,MSR_VEC@h
-	std	r12,_MSR(r1)
-#endif
-	/* Don't care if r4 overflows, this is desired behaviour */
-	lbz	r4,THREAD_LOAD_VEC(r5)
-	addi	r4,r4,1
-	stb	r4,THREAD_LOAD_VEC(r5)
-	addi	r6,r5,THREAD_VRSTATE
-	li	r4,1
-	li	r10,VRSTATE_VSCR
-	stw	r4,THREAD_USED_VR(r5)
-	lvx	v0,r10,r6
-	mtvscr	v0
-	REST_32VRS(0,r4,r6)
-	/* restore registers and return */
-	blr
-
-/*
- * save_altivec(tsk)
- * Save the vector registers to its thread_struct
- */
-_GLOBAL(save_altivec)
-	addi	r3,r3,THREAD		/* want THREAD of task */
-	PPC_LL	r7,THREAD_VRSAVEAREA(r3)
-	PPC_LL	r5,PT_REGS(r3)
-	PPC_LCMPI	0,r7,0
-	bne	2f
-	addi	r7,r3,THREAD_VRSTATE
-2:	SAVE_32VRS(0,r4,r7)
-	mfvscr	v0
-	li	r4,VRSTATE_VSCR
-	stvx	v0,r4,r7
-	blr
-
-#ifdef CONFIG_VSX
-
-#ifdef CONFIG_PPC32
-#error This asm code isn't ready for 32-bit kernels
-#endif
-
-/*
- * load_up_vsx(unused, unused, tsk)
- * Disable VSX for the task which had it previously,
- * and save its vector registers in its thread_struct.
- * Reuse the fp and vsx saves, but first check to see if they have
- * been saved already.
- */
-_GLOBAL(load_up_vsx)
-/* Load FP and VSX registers if they haven't been done yet */
-	andi.	r5,r12,MSR_FP
-	beql+	load_up_fpu		/* skip if already loaded */
-	andis.	r5,r12,MSR_VEC@h
-	beql+	load_up_altivec		/* skip if already loaded */
-
-	ld	r4,PACACURRENT(r13)
-	addi	r4,r4,THREAD		/* Get THREAD */
-	li	r6,1
-	stw	r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
-	/* enable use of VSX after return */
-	oris	r12,r12,MSR_VSX@h
-	std	r12,_MSR(r1)
-	b	fast_exception_return
-
-#endif /* CONFIG_VSX */
-
-
-/*
- * The routines below are in assembler so we can closely control the
- * usage of floating-point registers.  These routines must be called
- * with preempt disabled.
- */
-#ifdef CONFIG_PPC32
-	.data
-fpzero:
-	.long	0
-fpone:
-	.long	0x3f800000	/* 1.0 in single-precision FP */
-fphalf:
-	.long	0x3f000000	/* 0.5 in single-precision FP */
-
-#define LDCONST(fr, name)	\
-	lis	r11,name@ha;	\
-	lfs	fr,name@l(r11)
-#else
-
-	.section ".toc","aw"
-fpzero:
-	.tc	FD_0_0[TC],0
-fpone:
-	.tc	FD_3ff00000_0[TC],0x3ff0000000000000	/* 1.0 */
-fphalf:
-	.tc	FD_3fe00000_0[TC],0x3fe0000000000000	/* 0.5 */
-
-#define LDCONST(fr, name)	\
-	lfd	fr,name@toc(r2)
-#endif
-
-	.text
-/*
- * Internal routine to enable floating point and set FPSCR to 0.
- * Don't call it from C; it doesn't use the normal calling convention.
- */
-fpenable:
-#ifdef CONFIG_PPC32
-	stwu	r1,-64(r1)
-#else
-	stdu	r1,-64(r1)
-#endif
-	mfmsr	r10
-	ori	r11,r10,MSR_FP
-	mtmsr	r11
-	isync
-	stfd	fr0,24(r1)
-	stfd	fr1,16(r1)
-	stfd	fr31,8(r1)
-	LDCONST(fr1, fpzero)
-	mffs	fr31
-	MTFSF_L(fr1)
-	blr
-
-fpdisable:
-	mtlr	r12
-	MTFSF_L(fr31)
-	lfd	fr31,8(r1)
-	lfd	fr1,16(r1)
-	lfd	fr0,24(r1)
-	mtmsr	r10
-	isync
-	addi	r1,r1,64
-	blr
-
-/*
- * Vector add, floating point.
- */
-_GLOBAL(vaddfp)
-	mflr	r12
-	bl	fpenable
-	li	r0,4
-	mtctr	r0
-	li	r6,0
-1:	lfsx	fr0,r4,r6
-	lfsx	fr1,r5,r6
-	fadds	fr0,fr0,fr1
-	stfsx	fr0,r3,r6
-	addi	r6,r6,4
-	bdnz	1b
-	b	fpdisable
-
-/*
- * Vector subtract, floating point.
- */
-_GLOBAL(vsubfp)
-	mflr	r12
-	bl	fpenable
-	li	r0,4
-	mtctr	r0
-	li	r6,0
-1:	lfsx	fr0,r4,r6
-	lfsx	fr1,r5,r6
-	fsubs	fr0,fr0,fr1
-	stfsx	fr0,r3,r6
-	addi	r6,r6,4
-	bdnz	1b
-	b	fpdisable
-
-/*
- * Vector multiply and add, floating point.
- */
-_GLOBAL(vmaddfp)
-	mflr	r12
-	bl	fpenable
-	stfd	fr2,32(r1)
-	li	r0,4
-	mtctr	r0
-	li	r7,0
-1:	lfsx	fr0,r4,r7
-	lfsx	fr1,r5,r7
-	lfsx	fr2,r6,r7
-	fmadds	fr0,fr0,fr2,fr1
-	stfsx	fr0,r3,r7
-	addi	r7,r7,4
-	bdnz	1b
-	lfd	fr2,32(r1)
-	b	fpdisable
-
-/*
- * Vector negative multiply and subtract, floating point.
- */
-_GLOBAL(vnmsubfp)
-	mflr	r12
-	bl	fpenable
-	stfd	fr2,32(r1)
-	li	r0,4
-	mtctr	r0
-	li	r7,0
-1:	lfsx	fr0,r4,r7
-	lfsx	fr1,r5,r7
-	lfsx	fr2,r6,r7
-	fnmsubs	fr0,fr0,fr2,fr1
-	stfsx	fr0,r3,r7
-	addi	r7,r7,4
-	bdnz	1b
-	lfd	fr2,32(r1)
-	b	fpdisable
-
-/*
- * Vector reciprocal estimate.  We just compute 1.0/x.
- * r3 -> destination, r4 -> source.
- */
-_GLOBAL(vrefp)
-	mflr	r12
-	bl	fpenable
-	li	r0,4
-	LDCONST(fr1, fpone)
-	mtctr	r0
-	li	r6,0
-1:	lfsx	fr0,r4,r6
-	fdivs	fr0,fr1,fr0
-	stfsx	fr0,r3,r6
-	addi	r6,r6,4
-	bdnz	1b
-	b	fpdisable
-
-/*
- * Vector reciprocal square-root estimate, floating point.
- * We use the frsqrte instruction for the initial estimate followed
- * by 2 iterations of Newton-Raphson to get sufficient accuracy.
- * r3 -> destination, r4 -> source.
- */
-_GLOBAL(vrsqrtefp)
-	mflr	r12
-	bl	fpenable
-	stfd	fr2,32(r1)
-	stfd	fr3,40(r1)
-	stfd	fr4,48(r1)
-	stfd	fr5,56(r1)
-	li	r0,4
-	LDCONST(fr4, fpone)
-	LDCONST(fr5, fphalf)
-	mtctr	r0
-	li	r6,0
-1:	lfsx	fr0,r4,r6
-	frsqrte	fr1,fr0		/* r = frsqrte(s) */
-	fmuls	fr3,fr1,fr0	/* r * s */
-	fmuls	fr2,fr1,fr5	/* r * 0.5 */
-	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
-	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
-	fmuls	fr3,fr1,fr0	/* r * s */
-	fmuls	fr2,fr1,fr5	/* r * 0.5 */
-	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
-	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
-	stfsx	fr1,r3,r6
-	addi	r6,r6,4
-	bdnz	1b
-	lfd	fr5,56(r1)
-	lfd	fr4,48(r1)
-	lfd	fr3,40(r1)
-	lfd	fr2,32(r1)
-	b	fpdisable
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
deleted file mode 100644
index 3ea360cad337be08679a9452a8d66fb5509b89a4..0000000000000000000000000000000000000000
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,402 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_PPC64
-#define PROVIDE32(x)	PROVIDE(__unused__##x)
-#else
-#define PROVIDE32(x)	PROVIDE(x)
-#endif
-
-#define BSS_FIRST_SECTIONS *(.bss.prominit)
-
-#include <asm/page.h>
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-
-#define STRICT_ALIGN_SIZE	(1 << CONFIG_DATA_SHIFT)
-#define ETEXT_ALIGN_SIZE	(1 << CONFIG_ETEXT_SHIFT)
-
-ENTRY(_stext)
-
-PHDRS {
-	kernel PT_LOAD FLAGS(7); /* RWX */
-	notes PT_NOTE FLAGS(0);
-	dummy PT_NOTE FLAGS(0);
-
-	/* binutils < 2.18 has a bug that makes it misbehave when taking an
-	   ELF file with all segments at load address 0 as input.  This
-	   happens when running "strip" on vmlinux, because of the AT() magic
-	   in this linker script.  People using GCC >= 4.2 won't run into
-	   this problem, because the "build-id" support will put some data
-	   into the "notes" segment (at a non-zero load address).
-
-	   To work around this, we force some data into both the "dummy"
-	   segment and the kernel segment, so the dummy segment will get a
-	   non-zero load address.  It's not enough to always create the
-	   "notes" segment, since if nothing gets assigned to it, its load
-	   address will be zero.  */
-}
-
-#ifdef CONFIG_PPC64
-OUTPUT_ARCH(powerpc:common64)
-jiffies = jiffies_64;
-#else
-OUTPUT_ARCH(powerpc:common)
-jiffies = jiffies_64 + 4;
-#endif
-SECTIONS
-{
-	. = KERNELBASE;
-
-/*
- * Text, read only data and other permanent read-only sections
- */
-
-	_text = .;
-	_stext = .;
-
-	/*
-	 * Head text.
-	 * This needs to be in its own output section to avoid ld placing
-	 * branch trampoline stubs randomly throughout the fixed sections,
-	 * which it will do (even if the branch comes from another section)
-	 * in order to optimize stub generation.
-	 */
-	.head.text : AT(ADDR(.head.text) - LOAD_OFFSET) {
-#ifdef CONFIG_PPC64
-		KEEP(*(.head.text.first_256B));
-#ifdef CONFIG_PPC_BOOK3E
-#else
-		KEEP(*(.head.text.real_vectors));
-		*(.head.text.real_trampolines);
-		KEEP(*(.head.text.virt_vectors));
-		*(.head.text.virt_trampolines);
-# if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
-		KEEP(*(.head.data.fwnmi_page));
-# endif
-#endif
-#else /* !CONFIG_PPC64 */
-		HEAD_TEXT
-#endif
-	} :kernel
-
-	__head_end = .;
-
-#ifdef CONFIG_PPC64
-	/*
-	 * ALIGN(0) overrides the default output section alignment because
-	 * this needs to start right after .head.text in order for fixed
-	 * section placement to work.
-	 */
-	.text ALIGN(0) : AT(ADDR(.text) - LOAD_OFFSET) {
-#ifdef CONFIG_LD_HEAD_STUB_CATCH
-		KEEP(*(.linker_stub_catch));
-		. = . ;
-#endif
-
-#else
-	.text : AT(ADDR(.text) - LOAD_OFFSET) {
-		ALIGN_FUNCTION();
-#endif
-		/* careful! __ftr_alt_* sections need to be close to .text */
-		*(.text.hot .text.hot.* TEXT_MAIN .text.fixup .text.unlikely .text.unlikely.* .fixup __ftr_alt_* .ref.text);
-#ifdef CONFIG_PPC64
-		*(.tramp.ftrace.text);
-#endif
-		NOINSTR_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		IRQENTRY_TEXT
-		SOFTIRQENTRY_TEXT
-		/*
-		 * -Os builds call FP save/restore functions. The powerpc64
-		 * linker generates those on demand in the .sfpr section.
-		 * .sfpr gets placed at the beginning of a group of input
-		 * sections, which can break start-of-text offset if it is
-		 * included with the main text sections, so put it by itself.
-		 */
-		*(.sfpr);
-		MEM_KEEP(init.text)
-		MEM_KEEP(exit.text)
-
-#ifdef CONFIG_PPC32
-		*(.got1)
-		__got2_start = .;
-		*(.got2)
-		__got2_end = .;
-#endif /* CONFIG_PPC32 */
-
-	} :kernel
-
-	. = ALIGN(ETEXT_ALIGN_SIZE);
-	_etext = .;
-	PROVIDE32 (etext = .);
-
-	/* Read-only data */
-	RO_DATA(PAGE_SIZE)
-
-#ifdef CONFIG_PPC64
-	. = ALIGN(8);
-	__stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) {
-		__start___stf_entry_barrier_fixup = .;
-		*(__stf_entry_barrier_fixup)
-		__stop___stf_entry_barrier_fixup = .;
-	}
-
-	. = ALIGN(8);
-	__uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) {
-		__start___uaccess_flush_fixup = .;
-		*(__uaccess_flush_fixup)
-		__stop___uaccess_flush_fixup = .;
-	}
-
-	. = ALIGN(8);
-	__entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) {
-		__start___entry_flush_fixup = .;
-		*(__entry_flush_fixup)
-		__stop___entry_flush_fixup = .;
-	}
-
-	. = ALIGN(8);
-	__stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {
-		__start___stf_exit_barrier_fixup = .;
-		*(__stf_exit_barrier_fixup)
-		__stop___stf_exit_barrier_fixup = .;
-	}
-
-	. = ALIGN(8);
-	__rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
-		__start___rfi_flush_fixup = .;
-		*(__rfi_flush_fixup)
-		__stop___rfi_flush_fixup = .;
-	}
-#endif /* CONFIG_PPC64 */
-
-#ifdef CONFIG_PPC_BARRIER_NOSPEC
-	. = ALIGN(8);
-	__spec_barrier_fixup : AT(ADDR(__spec_barrier_fixup) - LOAD_OFFSET) {
-		__start___barrier_nospec_fixup = .;
-		*(__barrier_nospec_fixup)
-		__stop___barrier_nospec_fixup = .;
-	}
-#endif /* CONFIG_PPC_BARRIER_NOSPEC */
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
-	. = ALIGN(8);
-	__spec_btb_flush_fixup : AT(ADDR(__spec_btb_flush_fixup) - LOAD_OFFSET) {
-		__start__btb_flush_fixup = .;
-		*(__btb_flush_fixup)
-		__stop__btb_flush_fixup = .;
-	}
-#endif
-	EXCEPTION_TABLE(0)
-
-	NOTES :kernel :notes
-
-	/* The dummy segment contents for the bug workaround mentioned above
-	   near PHDRS.  */
-	.dummy : AT(ADDR(.dummy) - LOAD_OFFSET) {
-		LONG(0)
-		LONG(0)
-		LONG(0)
-	} :kernel :dummy
-
-/*
- * Init sections discarded at runtime
- */
-	. = ALIGN(STRICT_ALIGN_SIZE);
-	__init_begin = .;
-	. = ALIGN(PAGE_SIZE);
-	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
-		_sinittext = .;
-		INIT_TEXT
-
-		/*
-		 *.init.text might be RO so we must ensure this section ends on
-		 * a page boundary.
-		 */
-		. = ALIGN(PAGE_SIZE);
-		_einittext = .;
-#ifdef CONFIG_PPC64
-		*(.tramp.ftrace.init);
-#endif
-	} :kernel
-
-	/* .exit.text is discarded at runtime, not link time,
-	 * to deal with references from __bug_table
-	 */
-	.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
-		EXIT_TEXT
-	}
-
-	. = ALIGN(PAGE_SIZE);
-
-	INIT_DATA_SECTION(16)
-
-	. = ALIGN(8);
-	__ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) {
-		__start___ftr_fixup = .;
-		KEEP(*(__ftr_fixup))
-		__stop___ftr_fixup = .;
-	}
-	. = ALIGN(8);
-	__mmu_ftr_fixup : AT(ADDR(__mmu_ftr_fixup) - LOAD_OFFSET) {
-		__start___mmu_ftr_fixup = .;
-		KEEP(*(__mmu_ftr_fixup))
-		__stop___mmu_ftr_fixup = .;
-	}
-	. = ALIGN(8);
-	__lwsync_fixup : AT(ADDR(__lwsync_fixup) - LOAD_OFFSET) {
-		__start___lwsync_fixup = .;
-		KEEP(*(__lwsync_fixup))
-		__stop___lwsync_fixup = .;
-	}
-#ifdef CONFIG_PPC64
-	. = ALIGN(8);
-	__fw_ftr_fixup : AT(ADDR(__fw_ftr_fixup) - LOAD_OFFSET) {
-		__start___fw_ftr_fixup = .;
-		KEEP(*(__fw_ftr_fixup))
-		__stop___fw_ftr_fixup = .;
-	}
-#endif
-
-	PERCPU_SECTION(L1_CACHE_BYTES)
-
-	. = ALIGN(8);
-	.machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
-		__machine_desc_start = . ;
-		KEEP(*(.machine.desc))
-		__machine_desc_end = . ;
-	}
-#ifdef CONFIG_RELOCATABLE
-	. = ALIGN(8);
-	.dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET)
-	{
-#ifdef CONFIG_PPC32
-		__dynamic_symtab = .;
-#endif
-		*(.dynsym)
-	}
-	.dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) }
-	.dynamic : AT(ADDR(.dynamic) - LOAD_OFFSET)
-	{
-		__dynamic_start = .;
-		*(.dynamic)
-	}
-	.hash : AT(ADDR(.hash) - LOAD_OFFSET) { *(.hash) }
-	.interp : AT(ADDR(.interp) - LOAD_OFFSET) { *(.interp) }
-	.rela.dyn : AT(ADDR(.rela.dyn) - LOAD_OFFSET)
-	{
-		__rela_dyn_start = .;
-		*(.rela*)
-	}
-#endif
-	/* .exit.data is discarded at runtime, not link time,
-	 * to deal with references from .exit.text
-	 */
-	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
-		EXIT_DATA
-	}
-
-	/* freed after init ends here */
-	. = ALIGN(PAGE_SIZE);
-	__init_end = .;
-
-/*
- * And now the various read/write data
- */
-
-	. = ALIGN(PAGE_SIZE);
-	_sdata = .;
-
-#ifdef CONFIG_PPC32
-	.data : AT(ADDR(.data) - LOAD_OFFSET) {
-		DATA_DATA
-#ifdef CONFIG_UBSAN
-		*(.data..Lubsan_data*)
-		*(.data..Lubsan_type*)
-#endif
-		*(.data.rel*)
-		*(SDATA_MAIN)
-		*(.sdata2)
-		*(.got.plt) *(.got)
-		*(.plt)
-		*(.branch_lt)
-	}
-#else
-	.data : AT(ADDR(.data) - LOAD_OFFSET) {
-		DATA_DATA
-		*(.data.rel*)
-		*(.toc1)
-		*(.branch_lt)
-	}
-
-	.opd : AT(ADDR(.opd) - LOAD_OFFSET) {
-		__start_opd = .;
-		KEEP(*(.opd))
-		__end_opd = .;
-	}
-
-	. = ALIGN(256);
-	.got : AT(ADDR(.got) - LOAD_OFFSET) {
-		__toc_start = .;
-#ifndef CONFIG_RELOCATABLE
-		__prom_init_toc_start = .;
-		arch/powerpc/kernel/prom_init.o*(.toc .got)
-		__prom_init_toc_end = .;
-#endif
-		*(.got)
-		*(.toc)
-	}
-#endif
-
-	/* The initial task and kernel stack */
-	INIT_TASK_DATA_SECTION(THREAD_SIZE)
-
-	.data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) {
-		PAGE_ALIGNED_DATA(PAGE_SIZE)
-	}
-
-	.data..cacheline_aligned : AT(ADDR(.data..cacheline_aligned) - LOAD_OFFSET) {
-		CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
-	}
-
-	.data..read_mostly : AT(ADDR(.data..read_mostly) - LOAD_OFFSET) {
-		READ_MOSTLY_DATA(L1_CACHE_BYTES)
-	}
-
-	. = ALIGN(PAGE_SIZE);
-	.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
-		NOSAVE_DATA
-	}
-
-	BUG_TABLE
-
-	. = ALIGN(PAGE_SIZE);
-	_edata  =  .;
-	PROVIDE32 (edata = .);
-
-/*
- * And finally the bss
- */
-
-	BSS_SECTION(0, 0, 0)
-
-	. = ALIGN(PAGE_SIZE);
-	_end = . ;
-	PROVIDE32 (end = .);
-
-	STABS_DEBUG
-
-	DWARF_DEBUG
-
-	DISCARDS
-	/DISCARD/ : {
-		*(*.EMB.apuinfo)
-		*(.glink .iplt .plt .rela* .comment)
-		*(.gnu.version*)
-		*(.gnu.attributes)
-		*(.eh_frame)
-	}
-}
diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S
deleted file mode 100644
index e3ab9df6cf199c1151464ac08cc62460d08e210f..0000000000000000000000000000000000000000
--- a/arch/powerpc/kvm/book3s_32_sr.S
+++ /dev/null
@@ -1,132 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- * Copyright SUSE Linux Products GmbH 2009
- *
- * Authors: Alexander Graf <agraf@suse.de>
- */
-
-/******************************************************************************
- *                                                                            *
- *                               Entry code                                   *
- *                                                                            *
- *****************************************************************************/
-
-.macro LOAD_GUEST_SEGMENTS
-
-	/* Required state:
-	 *
-	 * MSR = ~IR|DR
-	 * R1 = host R1
-	 * R2 = host R2
-	 * R3 = shadow vcpu
-	 * all other volatile GPRS = free except R4, R6
-	 * SVCPU[CR]  = guest CR
-	 * SVCPU[XER] = guest XER
-	 * SVCPU[CTR] = guest CTR
-	 * SVCPU[LR]  = guest LR
-	 */
-
-#define XCHG_SR(n)	lwz	r9, (SVCPU_SR+(n*4))(r3);  \
-			mtsr	n, r9
-
-	XCHG_SR(0)
-	XCHG_SR(1)
-	XCHG_SR(2)
-	XCHG_SR(3)
-	XCHG_SR(4)
-	XCHG_SR(5)
-	XCHG_SR(6)
-	XCHG_SR(7)
-	XCHG_SR(8)
-	XCHG_SR(9)
-	XCHG_SR(10)
-	XCHG_SR(11)
-	XCHG_SR(12)
-	XCHG_SR(13)
-	XCHG_SR(14)
-	XCHG_SR(15)
-
-	/* Clear BATs. */
-
-#define KVM_KILL_BAT(n, reg)		\
-        mtspr   SPRN_IBAT##n##U,reg;	\
-        mtspr   SPRN_IBAT##n##L,reg;	\
-        mtspr   SPRN_DBAT##n##U,reg;	\
-        mtspr   SPRN_DBAT##n##L,reg;	\
-
-        li	r9, 0
-	KVM_KILL_BAT(0, r9)
-	KVM_KILL_BAT(1, r9)
-	KVM_KILL_BAT(2, r9)
-	KVM_KILL_BAT(3, r9)
-
-.endm
-
-/******************************************************************************
- *                                                                            *
- *                               Exit code                                    *
- *                                                                            *
- *****************************************************************************/
-
-.macro LOAD_HOST_SEGMENTS
-
-	/* Register usage at this point:
-	 *
-	 * R1         = host R1
-	 * R2         = host R2
-	 * R12        = exit handler id
-	 * R13        = shadow vcpu - SHADOW_VCPU_OFF
-	 * SVCPU.*    = guest *
-	 * SVCPU[CR]  = guest CR
-	 * SVCPU[XER] = guest XER
-	 * SVCPU[CTR] = guest CTR
-	 * SVCPU[LR]  = guest LR
-	 *
-	 */
-
-	/* Restore BATs */
-
-	/* We only overwrite the upper part, so we only restoree
-	   the upper part. */
-#define KVM_LOAD_BAT(n, reg, RA, RB)	\
-	lwz	RA,(n*16)+0(reg);	\
-	lwz	RB,(n*16)+4(reg);	\
-	mtspr	SPRN_IBAT##n##U,RA;	\
-	mtspr	SPRN_IBAT##n##L,RB;	\
-	lwz	RA,(n*16)+8(reg);	\
-	lwz	RB,(n*16)+12(reg);	\
-	mtspr	SPRN_DBAT##n##U,RA;	\
-	mtspr	SPRN_DBAT##n##L,RB;	\
-
-	lis     r9, BATS@ha
-	addi    r9, r9, BATS@l
-	tophys(r9, r9)
-	KVM_LOAD_BAT(0, r9, r10, r11)
-	KVM_LOAD_BAT(1, r9, r10, r11)
-	KVM_LOAD_BAT(2, r9, r10, r11)
-	KVM_LOAD_BAT(3, r9, r10, r11)
-
-	/* Restore Segment Registers */
-
-	/* 0xc - 0xf */
-
-        li      r0, 4
-        mtctr   r0
-	LOAD_REG_IMMEDIATE(r3, 0x20000000 | (0x111 * 0xc))
-        lis     r4, 0xc000
-3:      mtsrin  r3, r4
-        addi    r3, r3, 0x111     /* increment VSID */
-        addis   r4, r4, 0x1000    /* address of next segment */
-        bdnz    3b
-
-	/* 0x0 - 0xb */
-
-	/* 'current->mm' needs to be in r4 */
-	tophys(r4, r2)
-	lwz	r4, MM(r4)
-	tophys(r4, r4)
-	/* This only clobbers r0, r3, r4 and r5 */
-	bl	switch_mmu_context
-
-.endm
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
deleted file mode 100644
index 4d958dd21e597fafe9585d33d8ef02a1412021af..0000000000000000000000000000000000000000
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ /dev/null
@@ -1,145 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- * Copyright SUSE Linux Products GmbH 2009
- *
- * Authors: Alexander Graf <agraf@suse.de>
- */
-
-#include <asm/asm-compat.h>
-#include <asm/feature-fixups.h>
-
-#define SHADOW_SLB_ENTRY_LEN	0x10
-#define OFFSET_ESID(x)		(SHADOW_SLB_ENTRY_LEN * x)
-#define OFFSET_VSID(x)		((SHADOW_SLB_ENTRY_LEN * x) + 8)
-
-/******************************************************************************
- *                                                                            *
- *                               Entry code                                   *
- *                                                                            *
- *****************************************************************************/
-
-.macro LOAD_GUEST_SEGMENTS
-
-	/* Required state:
-	 *
-	 * MSR = ~IR|DR
-	 * R13 = PACA
-	 * R1 = host R1
-	 * R2 = host R2
-	 * R3 = shadow vcpu
-	 * all other volatile GPRS = free except R4, R6
-	 * SVCPU[CR]  = guest CR
-	 * SVCPU[XER] = guest XER
-	 * SVCPU[CTR] = guest CTR
-	 * SVCPU[LR]  = guest LR
-	 */
-
-BEGIN_FW_FTR_SECTION
-
-	/* Declare SLB shadow as 0 entries big */
-
-	ld	r11, PACA_SLBSHADOWPTR(r13)
-	li	r8, 0
-	stb	r8, 3(r11)
-
-END_FW_FTR_SECTION_IFSET(FW_FEATURE_LPAR)
-
-	/* Flush SLB */
-
-	li	r10, 0
-	slbmte	r10, r10
-	slbia
-
-	/* Fill SLB with our shadow */
-
-	lbz	r12, SVCPU_SLB_MAX(r3)
-	mulli	r12, r12, 16
-	addi	r12, r12, SVCPU_SLB
-	add	r12, r12, r3
-
-	/* for (r11 = kvm_slb; r11 < kvm_slb + kvm_slb_size; r11+=slb_entry) */
-	li	r11, SVCPU_SLB
-	add	r11, r11, r3
-
-slb_loop_enter:
-
-	ld	r10, 0(r11)
-
-	andis.	r9, r10, SLB_ESID_V@h
-	beq	slb_loop_enter_skip
-
-	ld	r9, 8(r11)
-	slbmte	r9, r10
-
-slb_loop_enter_skip:
-	addi	r11, r11, 16
-	cmpd	cr0, r11, r12
-	blt	slb_loop_enter
-
-slb_do_enter:
-
-.endm
-
-/******************************************************************************
- *                                                                            *
- *                               Exit code                                    *
- *                                                                            *
- *****************************************************************************/
-
-.macro LOAD_HOST_SEGMENTS
-
-	/* Register usage at this point:
-	 *
-	 * R1         = host R1
-	 * R2         = host R2
-	 * R12        = exit handler id
-	 * R13        = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64]
-	 * SVCPU.*    = guest *
-	 * SVCPU[CR]  = guest CR
-	 * SVCPU[XER] = guest XER
-	 * SVCPU[CTR] = guest CTR
-	 * SVCPU[LR]  = guest LR
-	 *
-	 */
-
-	/* Remove all SLB entries that are in use. */
-
-	li	r0, 0
-	slbmte	r0, r0
-	slbia
-
-	/* Restore bolted entries from the shadow */
-
-	ld	r11, PACA_SLBSHADOWPTR(r13)
-
-BEGIN_FW_FTR_SECTION
-
-	/* Declare SLB shadow as SLB_NUM_BOLTED entries big */
-
-	li	r8, SLB_NUM_BOLTED
-	stb	r8, 3(r11)
-
-END_FW_FTR_SECTION_IFSET(FW_FEATURE_LPAR)
-
-	/* Manually load all entries from shadow SLB */
-
-	li	r8, SLBSHADOW_SAVEAREA
-	li	r7, SLBSHADOW_SAVEAREA + 8
-
-	.rept	SLB_NUM_BOLTED
-	LDX_BE	r10, r11, r8
-	cmpdi	r10, 0
-	beq	1f
-	LDX_BE	r9, r11, r7
-	slbmte	r9, r10
-1:	addi	r7, r7, SHADOW_SLB_ENTRY_LEN
-	addi	r8, r8, SHADOW_SLB_ENTRY_LEN
-	.endr
-
-	isync
-	sync
-
-slb_do_exit:
-
-.endm
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
deleted file mode 100644
index 63fd81f3039dd32c3e43503633a57c1a738463df..0000000000000000000000000000000000000000
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ /dev/null
@@ -1,155 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
- *
- * Derived from book3s_interrupts.S, which is:
- * Copyright SUSE Linux Products GmbH 2009
- *
- * Authors: Alexander Graf <agraf@suse.de>
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/kvm_asm.h>
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/asm-offsets.h>
-#include <asm/exception-64s.h>
-#include <asm/ppc-opcode.h>
-#include <asm/asm-compat.h>
-#include <asm/feature-fixups.h>
-
-/*****************************************************************************
- *                                                                           *
- *     Guest entry / exit code that is in kernel module memory (vmalloc)     *
- *                                                                           *
- ****************************************************************************/
-
-/* Registers:
- *  none
- */
-_GLOBAL(__kvmppc_vcore_entry)
-
-	/* Write correct stack frame */
-	mflr	r0
-	std	r0,PPC_LR_STKOFF(r1)
-
-	/* Save host state to the stack */
-	stdu	r1, -SWITCH_FRAME_SIZE(r1)
-
-	/* Save non-volatile registers (r14 - r31) and CR */
-	SAVE_NVGPRS(r1)
-	mfcr	r3
-	std	r3, _CCR(r1)
-
-	/* Save host DSCR */
-	mfspr	r3, SPRN_DSCR
-	std	r3, HSTATE_DSCR(r13)
-
-BEGIN_FTR_SECTION
-	/* Save host DABR */
-	mfspr	r3, SPRN_DABR
-	std	r3, HSTATE_DABR(r13)
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
-
-	/* Save host PMU registers */
-	bl	kvmhv_save_host_pmu
-
-	/*
-	 * Put whatever is in the decrementer into the
-	 * hypervisor decrementer.
-	 */
-BEGIN_FTR_SECTION
-	ld	r5, HSTATE_KVM_VCORE(r13)
-	ld	r6, VCORE_KVM(r5)
-	ld	r9, KVM_HOST_LPCR(r6)
-	andis.	r9, r9, LPCR_LD@h
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	mfspr	r8,SPRN_DEC
-	mftb	r7
-BEGIN_FTR_SECTION
-	/* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
-	bne	32f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	extsw	r8,r8
-32:	mtspr	SPRN_HDEC,r8
-	add	r8,r8,r7
-	std	r8,HSTATE_DECEXP(r13)
-
-	/* Jump to partition switch code */
-	bl	kvmppc_hv_entry_trampoline
-	nop
-
-/*
- * We return here in virtual mode after the guest exits
- * with something that we can't handle in real mode.
- * Interrupts are still hard-disabled.
- */
-
-	/*
-	 * Register usage at this point:
-	 *
-	 * R1       = host R1
-	 * R2       = host R2
-	 * R3       = trap number on this thread
-	 * R12      = exit handler id
-	 * R13      = PACA
-	 */
-
-	/* Restore non-volatile host registers (r14 - r31) and CR */
-	REST_NVGPRS(r1)
-	ld	r4, _CCR(r1)
-	mtcr	r4
-
-	addi    r1, r1, SWITCH_FRAME_SIZE
-	ld	r0, PPC_LR_STKOFF(r1)
-	mtlr	r0
-	blr
-
-_GLOBAL(kvmhv_save_host_pmu)
-BEGIN_FTR_SECTION
-	/* Work around P8 PMAE bug */
-	li	r3, -1
-	clrrdi	r3, r3, 10
-	mfspr	r8, SPRN_MMCR2
-	mtspr	SPRN_MMCR2, r3		/* freeze all counters using MMCR2 */
-	isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-	li	r3, 1
-	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
-	mfspr	r7, SPRN_MMCR0		/* save MMCR0 */
-	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable interrupts */
-	mfspr	r6, SPRN_MMCRA
-	/* Clear MMCRA in order to disable SDAR updates */
-	li	r5, 0
-	mtspr	SPRN_MMCRA, r5
-	isync
-	lbz	r5, PACA_PMCINUSE(r13)	/* is the host using the PMU? */
-	cmpwi	r5, 0
-	beq	31f			/* skip if not */
-	mfspr	r5, SPRN_MMCR1
-	mfspr	r9, SPRN_SIAR
-	mfspr	r10, SPRN_SDAR
-	std	r7, HSTATE_MMCR0(r13)
-	std	r5, HSTATE_MMCR1(r13)
-	std	r6, HSTATE_MMCRA(r13)
-	std	r9, HSTATE_SIAR(r13)
-	std	r10, HSTATE_SDAR(r13)
-BEGIN_FTR_SECTION
-	mfspr	r9, SPRN_SIER
-	std	r8, HSTATE_MMCR2(r13)
-	std	r9, HSTATE_SIER(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-	mfspr	r3, SPRN_PMC1
-	mfspr	r5, SPRN_PMC2
-	mfspr	r6, SPRN_PMC3
-	mfspr	r7, SPRN_PMC4
-	mfspr	r8, SPRN_PMC5
-	mfspr	r9, SPRN_PMC6
-	stw	r3, HSTATE_PMC1(r13)
-	stw	r5, HSTATE_PMC2(r13)
-	stw	r6, HSTATE_PMC3(r13)
-	stw	r7, HSTATE_PMC4(r13)
-	stw	r8, HSTATE_PMC5(r13)
-	stw	r9, HSTATE_PMC6(r13)
-31:	blr
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
deleted file mode 100644
index c6fbbd29bd8717a44bcac3cc5706269867d9d272..0000000000000000000000000000000000000000
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ /dev/null
@@ -1,3661 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
- *
- * Derived from book3s_rmhandlers.S and other files, which are:
- *
- * Copyright SUSE Linux Products GmbH 2009
- *
- * Authors: Alexander Graf <agraf@suse.de>
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/code-patching-asm.h>
-#include <asm/kvm_asm.h>
-#include <asm/reg.h>
-#include <asm/mmu.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include <asm/hvcall.h>
-#include <asm/asm-offsets.h>
-#include <asm/exception-64s.h>
-#include <asm/kvm_book3s_asm.h>
-#include <asm/book3s/64/mmu-hash.h>
-#include <asm/export.h>
-#include <asm/tm.h>
-#include <asm/opal.h>
-#include <asm/xive-regs.h>
-#include <asm/thread_info.h>
-#include <asm/asm-compat.h>
-#include <asm/feature-fixups.h>
-#include <asm/cpuidle.h>
-#include <asm/ultravisor-api.h>
-
-/* Sign-extend HDEC if not on POWER9 */
-#define EXTEND_HDEC(reg)			\
-BEGIN_FTR_SECTION;				\
-	extsw	reg, reg;			\
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-
-/* Values in HSTATE_NAPPING(r13) */
-#define NAPPING_CEDE	1
-#define NAPPING_NOVCPU	2
-#define NAPPING_UNSPLIT	3
-
-/* Stack frame offsets for kvmppc_hv_entry */
-#define SFS			208
-#define STACK_SLOT_TRAP		(SFS-4)
-#define STACK_SLOT_SHORT_PATH	(SFS-8)
-#define STACK_SLOT_TID		(SFS-16)
-#define STACK_SLOT_PSSCR	(SFS-24)
-#define STACK_SLOT_PID		(SFS-32)
-#define STACK_SLOT_IAMR		(SFS-40)
-#define STACK_SLOT_CIABR	(SFS-48)
-#define STACK_SLOT_DAWR		(SFS-56)
-#define STACK_SLOT_DAWRX	(SFS-64)
-#define STACK_SLOT_HFSCR	(SFS-72)
-#define STACK_SLOT_AMR		(SFS-80)
-#define STACK_SLOT_UAMOR	(SFS-88)
-/* the following is used by the P9 short path */
-#define STACK_SLOT_NVGPRS	(SFS-152)	/* 18 gprs */
-
-/*
- * Call kvmppc_hv_entry in real mode.
- * Must be called with interrupts hard-disabled.
- *
- * Input Registers:
- *
- * LR = return address to continue at after eventually re-enabling MMU
- */
-_GLOBAL_TOC(kvmppc_hv_entry_trampoline)
-	mflr	r0
-	std	r0, PPC_LR_STKOFF(r1)
-	stdu	r1, -112(r1)
-	mfmsr	r10
-	std	r10, HSTATE_HOST_MSR(r13)
-	LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
-	li	r0,MSR_RI
-	andc	r0,r10,r0
-	li	r6,MSR_IR | MSR_DR
-	andc	r6,r10,r6
-	mtmsrd	r0,1		/* clear RI in MSR */
-	mtsrr0	r5
-	mtsrr1	r6
-	RFI_TO_KERNEL
-
-kvmppc_call_hv_entry:
-BEGIN_FTR_SECTION
-	/* On P9, do LPCR setting, if necessary */
-	ld	r3, HSTATE_SPLIT_MODE(r13)
-	cmpdi	r3, 0
-	beq	46f
-	lwz	r4, KVM_SPLIT_DO_SET(r3)
-	cmpwi	r4, 0
-	beq	46f
-	bl	kvmhv_p9_set_lpcr
-	nop
-46:
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
-	ld	r4, HSTATE_KVM_VCPU(r13)
-	bl	kvmppc_hv_entry
-
-	/* Back from guest - restore host state and return to caller */
-
-BEGIN_FTR_SECTION
-	/* Restore host DABR and DABRX */
-	ld	r5,HSTATE_DABR(r13)
-	li	r6,7
-	mtspr	SPRN_DABR,r5
-	mtspr	SPRN_DABRX,r6
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
-
-	/* Restore SPRG3 */
-	ld	r3,PACA_SPRG_VDSO(r13)
-	mtspr	SPRN_SPRG_VDSO_WRITE,r3
-
-	/* Reload the host's PMU registers */
-	bl	kvmhv_load_host_pmu
-
-	/*
-	 * Reload DEC.  HDEC interrupts were disabled when
-	 * we reloaded the host's LPCR value.
-	 */
-	ld	r3, HSTATE_DECEXP(r13)
-	mftb	r4
-	subf	r4, r4, r3
-	mtspr	SPRN_DEC, r4
-
-	/* hwthread_req may have got set by cede or no vcpu, so clear it */
-	li	r0, 0
-	stb	r0, HSTATE_HWTHREAD_REQ(r13)
-
-	/*
-	 * For external interrupts we need to call the Linux
-	 * handler to process the interrupt. We do that by jumping
-	 * to absolute address 0x500 for external interrupts.
-	 * The [h]rfid at the end of the handler will return to
-	 * the book3s_hv_interrupts.S code. For other interrupts
-	 * we do the rfid to get back to the book3s_hv_interrupts.S
-	 * code here.
-	 */
-	ld	r8, 112+PPC_LR_STKOFF(r1)
-	addi	r1, r1, 112
-	ld	r7, HSTATE_HOST_MSR(r13)
-
-	/* Return the trap number on this thread as the return value */
-	mr	r3, r12
-
-	/*
-	 * If we came back from the guest via a relocation-on interrupt,
-	 * we will be in virtual mode at this point, which makes it a
-	 * little easier to get back to the caller.
-	 */
-	mfmsr	r0
-	andi.	r0, r0, MSR_IR		/* in real mode? */
-	bne	.Lvirt_return
-
-	/* RFI into the highmem handler */
-	mfmsr	r6
-	li	r0, MSR_RI
-	andc	r6, r6, r0
-	mtmsrd	r6, 1			/* Clear RI in MSR */
-	mtsrr0	r8
-	mtsrr1	r7
-	RFI_TO_KERNEL
-
-	/* Virtual-mode return */
-.Lvirt_return:
-	mtlr	r8
-	blr
-
-kvmppc_primary_no_guest:
-	/* We handle this much like a ceded vcpu */
-	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
-	/* HDEC may be larger than DEC for arch >= v3.00, but since the */
-	/* HDEC value came from DEC in the first place, it will fit */
-	mfspr	r3, SPRN_HDEC
-	mtspr	SPRN_DEC, r3
-	/*
-	 * Make sure the primary has finished the MMU switch.
-	 * We should never get here on a secondary thread, but
-	 * check it for robustness' sake.
-	 */
-	ld	r5, HSTATE_KVM_VCORE(r13)
-65:	lbz	r0, VCORE_IN_GUEST(r5)
-	cmpwi	r0, 0
-	beq	65b
-	/* Set LPCR. */
-	ld	r8,VCORE_LPCR(r5)
-	mtspr	SPRN_LPCR,r8
-	isync
-	/* set our bit in napping_threads */
-	ld	r5, HSTATE_KVM_VCORE(r13)
-	lbz	r7, HSTATE_PTID(r13)
-	li	r0, 1
-	sld	r0, r0, r7
-	addi	r6, r5, VCORE_NAPPING_THREADS
-1:	lwarx	r3, 0, r6
-	or	r3, r3, r0
-	stwcx.	r3, 0, r6
-	bne	1b
-	/* order napping_threads update vs testing entry_exit_map */
-	isync
-	li	r12, 0
-	lwz	r7, VCORE_ENTRY_EXIT(r5)
-	cmpwi	r7, 0x100
-	bge	kvm_novcpu_exit	/* another thread already exiting */
-	li	r3, NAPPING_NOVCPU
-	stb	r3, HSTATE_NAPPING(r13)
-
-	li	r3, 0		/* Don't wake on privileged (OS) doorbell */
-	b	kvm_do_nap
-
-/*
- * kvm_novcpu_wakeup
- *	Entered from kvm_start_guest if kvm_hstate.napping is set
- *	to NAPPING_NOVCPU
- *		r2 = kernel TOC
- *		r13 = paca
- */
-kvm_novcpu_wakeup:
-	ld	r1, HSTATE_HOST_R1(r13)
-	ld	r5, HSTATE_KVM_VCORE(r13)
-	li	r0, 0
-	stb	r0, HSTATE_NAPPING(r13)
-
-	/* check the wake reason */
-	bl	kvmppc_check_wake_reason
-
-	/*
-	 * Restore volatile registers since we could have called
-	 * a C routine in kvmppc_check_wake_reason.
-	 *	r5 = VCORE
-	 */
-	ld	r5, HSTATE_KVM_VCORE(r13)
-
-	/* see if any other thread is already exiting */
-	lwz	r0, VCORE_ENTRY_EXIT(r5)
-	cmpwi	r0, 0x100
-	bge	kvm_novcpu_exit
-
-	/* clear our bit in napping_threads */
-	lbz	r7, HSTATE_PTID(r13)
-	li	r0, 1
-	sld	r0, r0, r7
-	addi	r6, r5, VCORE_NAPPING_THREADS
-4:	lwarx	r7, 0, r6
-	andc	r7, r7, r0
-	stwcx.	r7, 0, r6
-	bne	4b
-
-	/* See if the wake reason means we need to exit */
-	cmpdi	r3, 0
-	bge	kvm_novcpu_exit
-
-	/* See if our timeslice has expired (HDEC is negative) */
-	mfspr	r0, SPRN_HDEC
-	EXTEND_HDEC(r0)
-	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
-	cmpdi	r0, 0
-	blt	kvm_novcpu_exit
-
-	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
-	ld	r4, HSTATE_KVM_VCPU(r13)
-	cmpdi	r4, 0
-	beq	kvmppc_primary_no_guest
-
-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
-	addi	r3, r4, VCPU_TB_RMENTRY
-	bl	kvmhv_start_timing
-#endif
-	b	kvmppc_got_guest
-
-kvm_novcpu_exit:
-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
-	ld	r4, HSTATE_KVM_VCPU(r13)
-	cmpdi	r4, 0
-	beq	13f
-	addi	r3, r4, VCPU_TB_RMEXIT
-	bl	kvmhv_accumulate_time
-#endif
-13:	mr	r3, r12
-	stw	r12, STACK_SLOT_TRAP(r1)
-	bl	kvmhv_commence_exit
-	nop
-	b	kvmhv_switch_to_host
-
-/*
- * We come in here when wakened from Linux offline idle code.
- * Relocation is off
- * r3 contains the SRR1 wakeup value, SRR1 is trashed.
- */
-_GLOBAL(idle_kvm_start_guest)
-	ld	r4,PACAEMERGSP(r13)
-	mfcr	r5
-	mflr	r0
-	std	r1,0(r4)
-	std	r5,8(r4)
-	std	r0,16(r4)
-	subi	r1,r4,STACK_FRAME_OVERHEAD
-	SAVE_NVGPRS(r1)
-
-	/*
-	 * Could avoid this and pass it through in r3. For now,
-	 * code expects it to be in SRR1.
-	 */
-	mtspr	SPRN_SRR1,r3
-
-	li	r0,0
-	stb	r0,PACA_FTRACE_ENABLED(r13)
-
-	li	r0,KVM_HWTHREAD_IN_KVM
-	stb	r0,HSTATE_HWTHREAD_STATE(r13)
-
-	/* kvm cede / napping does not come through here */
-	lbz	r0,HSTATE_NAPPING(r13)
-	twnei	r0,0
-
-	b	1f
-
-kvm_unsplit_wakeup:
-	li	r0, 0
-	stb	r0, HSTATE_NAPPING(r13)
-
-1:
-
-	/*
-	 * We weren't napping due to cede, so this must be a secondary
-	 * thread being woken up to run a guest, or being woken up due
-	 * to a stray IPI.  (Or due to some machine check or hypervisor
-	 * maintenance interrupt while the core is in KVM.)
-	 */
-
-	/* Check the wake reason in SRR1 to see why we got here */
-	bl	kvmppc_check_wake_reason
-	/*
-	 * kvmppc_check_wake_reason could invoke a C routine, but we
-	 * have no volatile registers to restore when we return.
-	 */
-
-	cmpdi	r3, 0
-	bge	kvm_no_guest
-
-	/* get vcore pointer, NULL if we have nothing to run */
-	ld	r5,HSTATE_KVM_VCORE(r13)
-	cmpdi	r5,0
-	/* if we have no vcore to run, go back to sleep */
-	beq	kvm_no_guest
-
-kvm_secondary_got_guest:
-
-	/* Set HSTATE_DSCR(r13) to something sensible */
-	ld	r6, PACA_DSCR_DEFAULT(r13)
-	std	r6, HSTATE_DSCR(r13)
-
-	/* On thread 0 of a subcore, set HDEC to max */
-	lbz	r4, HSTATE_PTID(r13)
-	cmpwi	r4, 0
-	bne	63f
-	LOAD_REG_ADDR(r6, decrementer_max)
-	ld	r6, 0(r6)
-	mtspr	SPRN_HDEC, r6
-	/* and set per-LPAR registers, if doing dynamic micro-threading */
-	ld	r6, HSTATE_SPLIT_MODE(r13)
-	cmpdi	r6, 0
-	beq	63f
-BEGIN_FTR_SECTION
-	ld	r0, KVM_SPLIT_RPR(r6)
-	mtspr	SPRN_RPR, r0
-	ld	r0, KVM_SPLIT_PMMAR(r6)
-	mtspr	SPRN_PMMAR, r0
-	ld	r0, KVM_SPLIT_LDBAR(r6)
-	mtspr	SPRN_LDBAR, r0
-	isync
-FTR_SECTION_ELSE
-	/* On P9 we use the split_info for coordinating LPCR changes */
-	lwz	r4, KVM_SPLIT_DO_SET(r6)
-	cmpwi	r4, 0
-	beq	1f
-	mr	r3, r6
-	bl	kvmhv_p9_set_lpcr
-	nop
-1:
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
-63:
-	/* Order load of vcpu after load of vcore */
-	lwsync
-	ld	r4, HSTATE_KVM_VCPU(r13)
-	bl	kvmppc_hv_entry
-
-	/* Back from the guest, go back to nap */
-	/* Clear our vcpu and vcore pointers so we don't come back in early */
-	li	r0, 0
-	std	r0, HSTATE_KVM_VCPU(r13)
-	/*
-	 * Once we clear HSTATE_KVM_VCORE(r13), the code in
-	 * kvmppc_run_core() is going to assume that all our vcpu
-	 * state is visible in memory.  This lwsync makes sure
-	 * that that is true.
-	 */
-	lwsync
-	std	r0, HSTATE_KVM_VCORE(r13)
-
-	/*
-	 * All secondaries exiting guest will fall through this path.
-	 * Before proceeding, just check for HMI interrupt and
-	 * invoke opal hmi handler. By now we are sure that the
-	 * primary thread on this core/subcore has already made partition
-	 * switch/TB resync and we are good to call opal hmi handler.
-	 */
-	cmpwi	r12, BOOK3S_INTERRUPT_HMI
-	bne	kvm_no_guest
-
-	li	r3,0			/* NULL argument */
-	bl	hmi_exception_realmode
-/*
- * At this point we have finished executing in the guest.
- * We need to wait for hwthread_req to become zero, since
- * we may not turn on the MMU while hwthread_req is non-zero.
- * While waiting we also need to check if we get given a vcpu to run.
- */
-kvm_no_guest:
-	lbz	r3, HSTATE_HWTHREAD_REQ(r13)
-	cmpwi	r3, 0
-	bne	53f
-	HMT_MEDIUM
-	li	r0, KVM_HWTHREAD_IN_KERNEL
-	stb	r0, HSTATE_HWTHREAD_STATE(r13)
-	/* need to recheck hwthread_req after a barrier, to avoid race */
-	sync
-	lbz	r3, HSTATE_HWTHREAD_REQ(r13)
-	cmpwi	r3, 0
-	bne	54f
-
-	/*
-	 * Jump to idle_return_gpr_loss, which returns to the
-	 * idle_kvm_start_guest caller.
-	 */
-	li	r3, LPCR_PECE0
-	mfspr	r4, SPRN_LPCR
-	rlwimi	r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
-	mtspr	SPRN_LPCR, r4
-	/* set up r3 for return */
-	mfspr	r3,SPRN_SRR1
-	REST_NVGPRS(r1)
-	addi	r1, r1, STACK_FRAME_OVERHEAD
-	ld	r0, 16(r1)
-	ld	r5, 8(r1)
-	ld	r1, 0(r1)
-	mtlr	r0
-	mtcr	r5
-	blr
-
-53:	HMT_LOW
-	ld	r5, HSTATE_KVM_VCORE(r13)
-	cmpdi	r5, 0
-	bne	60f
-	ld	r3, HSTATE_SPLIT_MODE(r13)
-	cmpdi	r3, 0
-	beq	kvm_no_guest
-	lwz	r0, KVM_SPLIT_DO_SET(r3)
-	cmpwi	r0, 0
-	bne	kvmhv_do_set
-	lwz	r0, KVM_SPLIT_DO_RESTORE(r3)
-	cmpwi	r0, 0
-	bne	kvmhv_do_restore
-	lbz	r0, KVM_SPLIT_DO_NAP(r3)
-	cmpwi	r0, 0
-	beq	kvm_no_guest
-	HMT_MEDIUM
-	b	kvm_unsplit_nap
-60:	HMT_MEDIUM
-	b	kvm_secondary_got_guest
-
-54:	li	r0, KVM_HWTHREAD_IN_KVM
-	stb	r0, HSTATE_HWTHREAD_STATE(r13)
-	b	kvm_no_guest
-
-kvmhv_do_set:
-	/* Set LPCR, LPIDR etc. on P9 */
-	HMT_MEDIUM
-	bl	kvmhv_p9_set_lpcr
-	nop
-	b	kvm_no_guest
-
-kvmhv_do_restore:
-	HMT_MEDIUM
-	bl	kvmhv_p9_restore_lpcr
-	nop
-	b	kvm_no_guest
-
-/*
- * Here the primary thread is trying to return the core to
- * whole-core mode, so we need to nap.
- */
-kvm_unsplit_nap:
-	/*
-	 * When secondaries are napping in kvm_unsplit_nap() with
-	 * hwthread_req = 1, HMI goes ignored even though subcores are
-	 * already exited the guest. Hence HMI keeps waking up secondaries
-	 * from nap in a loop and secondaries always go back to nap since
-	 * no vcore is assigned to them. This makes impossible for primary
-	 * thread to get hold of secondary threads resulting into a soft
-	 * lockup in KVM path.
-	 *
-	 * Let us check if HMI is pending and handle it before we go to nap.
-	 */
-	cmpwi	r12, BOOK3S_INTERRUPT_HMI
-	bne	55f
-	li	r3, 0			/* NULL argument */
-	bl	hmi_exception_realmode
-55:
-	/*
-	 * Ensure that secondary doesn't nap when it has
-	 * its vcore pointer set.
-	 */
-	sync		/* matches smp_mb() before setting split_info.do_nap */
-	ld	r0, HSTATE_KVM_VCORE(r13)
-	cmpdi	r0, 0
-	bne	kvm_no_guest
-	/* clear any pending message */
-BEGIN_FTR_SECTION
-	lis	r6, (PPC_DBELL_SERVER << (63-36))@h
-	PPC_MSGCLR(6)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-	/* Set kvm_split_mode.napped[tid] = 1 */
-	ld	r3, HSTATE_SPLIT_MODE(r13)
-	li	r0, 1
-	lbz	r4, HSTATE_TID(r13)
-	addi	r4, r4, KVM_SPLIT_NAPPED
-	stbx	r0, r3, r4
-	/* Check the do_nap flag again after setting napped[] */
-	sync
-	lbz	r0, KVM_SPLIT_DO_NAP(r3)
-	cmpwi	r0, 0
-	beq	57f
-	li	r3, NAPPING_UNSPLIT
-	stb	r3, HSTATE_NAPPING(r13)
-	li	r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
-	mfspr	r5, SPRN_LPCR
-	rlwimi	r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
-	b	kvm_nap_sequence
-
-57:	li	r0, 0
-	stbx	r0, r3, r4
-	b	kvm_no_guest
-
-/******************************************************************************
- *                                                                            *
- *                               Entry code                                   *
- *                                                                            *
- *****************************************************************************/
-
-.global kvmppc_hv_entry
-kvmppc_hv_entry:
-
-	/* Required state:
-	 *
-	 * R4 = vcpu pointer (or NULL)
-	 * MSR = ~IR|DR
-	 * R13 = PACA
-	 * R1 = host R1
-	 * R2 = TOC
-	 * all other volatile GPRS = free
-	 * Does not preserve non-volatile GPRs or CR fields
-	 */
-	mflr	r0
-	std	r0, PPC_LR_STKOFF(r1)
-	stdu	r1, -SFS(r1)
-
-	/* Save R1 in the PACA */
-	std	r1, HSTATE_HOST_R1(r13)
-
-	li	r6, KVM_GUEST_MODE_HOST_HV
-	stb	r6, HSTATE_IN_GUEST(r13)
-
-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
-	/* Store initial timestamp */
-	cmpdi	r4, 0
-	beq	1f
-	addi	r3, r4, VCPU_TB_RMENTRY
-	bl	kvmhv_start_timing
-1:
-#endif
-
-	ld	r5, HSTATE_KVM_VCORE(r13)
-	ld	r9, VCORE_KVM(r5)	/* pointer to struct kvm */
-
-	/*
-	 * POWER7/POWER8 host -> guest partition switch code.
-	 * We don't have to lock against concurrent tlbies,
-	 * but we do have to coordinate across hardware threads.
-	 */
-	/* Set bit in entry map iff exit map is zero. */
-	li	r7, 1
-	lbz	r6, HSTATE_PTID(r13)
-	sld	r7, r7, r6
-	addi	r8, r5, VCORE_ENTRY_EXIT
-21:	lwarx	r3, 0, r8
-	cmpwi	r3, 0x100		/* any threads starting to exit? */
-	bge	secondary_too_late	/* if so we're too late to the party */
-	or	r3, r3, r7
-	stwcx.	r3, 0, r8
-	bne	21b
-
-	/* Primary thread switches to guest partition. */
-	cmpwi	r6,0
-	bne	10f
-
-	lwz	r7,KVM_LPID(r9)
-BEGIN_FTR_SECTION
-	ld	r6,KVM_SDR1(r9)
-	li	r0,LPID_RSVD		/* switch to reserved LPID */
-	mtspr	SPRN_LPID,r0
-	ptesync
-	mtspr	SPRN_SDR1,r6		/* switch to partition page table */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-	mtspr	SPRN_LPID,r7
-	isync
-
-	/* See if we need to flush the TLB. */
-	mr	r3, r9			/* kvm pointer */
-	lhz	r4, PACAPACAINDEX(r13)	/* physical cpu number */
-	li	r5, 0			/* nested vcpu pointer */
-	bl	kvmppc_check_need_tlb_flush
-	nop
-	ld	r5, HSTATE_KVM_VCORE(r13)
-
-	/* Add timebase offset onto timebase */
-22:	ld	r8,VCORE_TB_OFFSET(r5)
-	cmpdi	r8,0
-	beq	37f
-	std	r8, VCORE_TB_OFFSET_APPL(r5)
-	mftb	r6		/* current host timebase */
-	add	r8,r8,r6
-	mtspr	SPRN_TBU40,r8	/* update upper 40 bits */
-	mftb	r7		/* check if lower 24 bits overflowed */
-	clrldi	r6,r6,40
-	clrldi	r7,r7,40
-	cmpld	r7,r6
-	bge	37f
-	addis	r8,r8,0x100	/* if so, increment upper 40 bits */
-	mtspr	SPRN_TBU40,r8
-
-	/* Load guest PCR value to select appropriate compat mode */
-37:	ld	r7, VCORE_PCR(r5)
-	LOAD_REG_IMMEDIATE(r6, PCR_MASK)
-	cmpld	r7, r6
-	beq	38f
-	or	r7, r7, r6
-	mtspr	SPRN_PCR, r7
-38:
-
-BEGIN_FTR_SECTION
-	/* DPDES and VTB are shared between threads */
-	ld	r8, VCORE_DPDES(r5)
-	ld	r7, VCORE_VTB(r5)
-	mtspr	SPRN_DPDES, r8
-	mtspr	SPRN_VTB, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-
-	/* Mark the subcore state as inside guest */
-	bl	kvmppc_subcore_enter_guest
-	nop
-	ld	r5, HSTATE_KVM_VCORE(r13)
-	ld	r4, HSTATE_KVM_VCPU(r13)
-	li	r0,1
-	stb	r0,VCORE_IN_GUEST(r5)	/* signal secondaries to continue */
-
-	/* Do we have a guest vcpu to run? */
-10:	cmpdi	r4, 0
-	beq	kvmppc_primary_no_guest
-kvmppc_got_guest:
-	/* Increment yield count if they have a VPA */
-	ld	r3, VCPU_VPA(r4)
-	cmpdi	r3, 0
-	beq	25f
-	li	r6, LPPACA_YIELDCOUNT
-	LWZX_BE	r5, r3, r6
-	addi	r5, r5, 1
-	STWX_BE	r5, r3, r6
-	li	r6, 1
-	stb	r6, VCPU_VPA_DIRTY(r4)
-25:
-
-	/* Save purr/spurr */
-	mfspr	r5,SPRN_PURR
-	mfspr	r6,SPRN_SPURR
-	std	r5,HSTATE_PURR(r13)
-	std	r6,HSTATE_SPURR(r13)
-	ld	r7,VCPU_PURR(r4)
-	ld	r8,VCPU_SPURR(r4)
-	mtspr	SPRN_PURR,r7
-	mtspr	SPRN_SPURR,r8
-
-	/* Save host values of some registers */
-BEGIN_FTR_SECTION
-	mfspr	r5, SPRN_TIDR
-	mfspr	r6, SPRN_PSSCR
-	mfspr	r7, SPRN_PID
-	std	r5, STACK_SLOT_TID(r1)
-	std	r6, STACK_SLOT_PSSCR(r1)
-	std	r7, STACK_SLOT_PID(r1)
-	mfspr	r5, SPRN_HFSCR
-	std	r5, STACK_SLOT_HFSCR(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-BEGIN_FTR_SECTION
-	mfspr	r5, SPRN_CIABR
-	mfspr	r6, SPRN_DAWR
-	mfspr	r7, SPRN_DAWRX
-	mfspr	r8, SPRN_IAMR
-	std	r5, STACK_SLOT_CIABR(r1)
-	std	r6, STACK_SLOT_DAWR(r1)
-	std	r7, STACK_SLOT_DAWRX(r1)
-	std	r8, STACK_SLOT_IAMR(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-
-	mfspr	r5, SPRN_AMR
-	std	r5, STACK_SLOT_AMR(r1)
-	mfspr	r6, SPRN_UAMOR
-	std	r6, STACK_SLOT_UAMOR(r1)
-
-BEGIN_FTR_SECTION
-	/* Set partition DABR */
-	/* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
-	lwz	r5,VCPU_DABRX(r4)
-	ld	r6,VCPU_DABR(r4)
-	mtspr	SPRN_DABRX,r5
-	mtspr	SPRN_DABR,r6
-	isync
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
-BEGIN_FTR_SECTION
-	b	91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
-	/*
-	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
-	 */
-	mr      r3, r4
-	ld      r4, VCPU_MSR(r3)
-	li	r5, 0			/* don't preserve non-vol regs */
-	bl	kvmppc_restore_tm_hv
-	nop
-	ld	r4, HSTATE_KVM_VCPU(r13)
-91:
-#endif
-
-	/* Load guest PMU registers; r4 = vcpu pointer here */
-	mr	r3, r4
-	bl	kvmhv_load_guest_pmu
-
-	/* Load up FP, VMX and VSX registers */
-	ld	r4, HSTATE_KVM_VCPU(r13)
-	bl	kvmppc_load_fp
-
-	ld	r14, VCPU_GPR(R14)(r4)
-	ld	r15, VCPU_GPR(R15)(r4)
-	ld	r16, VCPU_GPR(R16)(r4)
-	ld	r17, VCPU_GPR(R17)(r4)
-	ld	r18, VCPU_GPR(R18)(r4)
-	ld	r19, VCPU_GPR(R19)(r4)
-	ld	r20, VCPU_GPR(R20)(r4)
-	ld	r21, VCPU_GPR(R21)(r4)
-	ld	r22, VCPU_GPR(R22)(r4)
-	ld	r23, VCPU_GPR(R23)(r4)
-	ld	r24, VCPU_GPR(R24)(r4)
-	ld	r25, VCPU_GPR(R25)(r4)
-	ld	r26, VCPU_GPR(R26)(r4)
-	ld	r27, VCPU_GPR(R27)(r4)
-	ld	r28, VCPU_GPR(R28)(r4)
-	ld	r29, VCPU_GPR(R29)(r4)
-	ld	r30, VCPU_GPR(R30)(r4)
-	ld	r31, VCPU_GPR(R31)(r4)
-
-	/* Switch DSCR to guest value */
-	ld	r5, VCPU_DSCR(r4)
-	mtspr	SPRN_DSCR, r5
-
-BEGIN_FTR_SECTION
-	/* Skip next section on POWER7 */
-	b	8f
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
-	/* Load up POWER8-specific registers */
-	ld	r5, VCPU_IAMR(r4)
-	lwz	r6, VCPU_PSPB(r4)
-	ld	r7, VCPU_FSCR(r4)
-	mtspr	SPRN_IAMR, r5
-	mtspr	SPRN_PSPB, r6
-	mtspr	SPRN_FSCR, r7
-	/*
-	 * Handle broken DAWR case by not writing it. This means we
-	 * can still store the DAWR register for migration.
-	 */
-	LOAD_REG_ADDR(r5, dawr_force_enable)
-	lbz	r5, 0(r5)
-	cmpdi	r5, 0
-	beq	1f
-	ld	r5, VCPU_DAWR(r4)
-	ld	r6, VCPU_DAWRX(r4)
-	mtspr	SPRN_DAWR, r5
-	mtspr	SPRN_DAWRX, r6
-1:
-	ld	r7, VCPU_CIABR(r4)
-	ld	r8, VCPU_TAR(r4)
-	mtspr	SPRN_CIABR, r7
-	mtspr	SPRN_TAR, r8
-	ld	r5, VCPU_IC(r4)
-	ld	r8, VCPU_EBBHR(r4)
-	mtspr	SPRN_IC, r5
-	mtspr	SPRN_EBBHR, r8
-	ld	r5, VCPU_EBBRR(r4)
-	ld	r6, VCPU_BESCR(r4)
-	lwz	r7, VCPU_GUEST_PID(r4)
-	ld	r8, VCPU_WORT(r4)
-	mtspr	SPRN_EBBRR, r5
-	mtspr	SPRN_BESCR, r6
-	mtspr	SPRN_PID, r7
-	mtspr	SPRN_WORT, r8
-BEGIN_FTR_SECTION
-	/* POWER8-only registers */
-	ld	r5, VCPU_TCSCR(r4)
-	ld	r6, VCPU_ACOP(r4)
-	ld	r7, VCPU_CSIGR(r4)
-	ld	r8, VCPU_TACR(r4)
-	mtspr	SPRN_TCSCR, r5
-	mtspr	SPRN_ACOP, r6
-	mtspr	SPRN_CSIGR, r7
-	mtspr	SPRN_TACR, r8
-	nop
-FTR_SECTION_ELSE
-	/* POWER9-only registers */
-	ld	r5, VCPU_TID(r4)
-	ld	r6, VCPU_PSSCR(r4)
-	lbz	r8, HSTATE_FAKE_SUSPEND(r13)
-	oris	r6, r6, PSSCR_EC@h	/* This makes stop trap to HV */
-	rldimi	r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG
-	ld	r7, VCPU_HFSCR(r4)
-	mtspr	SPRN_TIDR, r5
-	mtspr	SPRN_PSSCR, r6
-	mtspr	SPRN_HFSCR, r7
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
-8:
-
-	ld	r5, VCPU_SPRG0(r4)
-	ld	r6, VCPU_SPRG1(r4)
-	ld	r7, VCPU_SPRG2(r4)
-	ld	r8, VCPU_SPRG3(r4)
-	mtspr	SPRN_SPRG0, r5
-	mtspr	SPRN_SPRG1, r6
-	mtspr	SPRN_SPRG2, r7
-	mtspr	SPRN_SPRG3, r8
-
-	/* Load up DAR and DSISR */
-	ld	r5, VCPU_DAR(r4)
-	lwz	r6, VCPU_DSISR(r4)
-	mtspr	SPRN_DAR, r5
-	mtspr	SPRN_DSISR, r6
-
-	/* Restore AMR and UAMOR, set AMOR to all 1s */
-	ld	r5,VCPU_AMR(r4)
-	ld	r6,VCPU_UAMOR(r4)
-	li	r7,-1
-	mtspr	SPRN_AMR,r5
-	mtspr	SPRN_UAMOR,r6
-	mtspr	SPRN_AMOR,r7
-
-	/* Restore state of CTRL run bit; assume 1 on entry */
-	lwz	r5,VCPU_CTRL(r4)
-	andi.	r5,r5,1
-	bne	4f
-	mfspr	r6,SPRN_CTRLF
-	clrrdi	r6,r6,1
-	mtspr	SPRN_CTRLT,r6
-4:
-	/* Secondary threads wait for primary to have done partition switch */
-	ld	r5, HSTATE_KVM_VCORE(r13)
-	lbz	r6, HSTATE_PTID(r13)
-	cmpwi	r6, 0
-	beq	21f
-	lbz	r0, VCORE_IN_GUEST(r5)
-	cmpwi	r0, 0
-	bne	21f
-	HMT_LOW
-20:	lwz	r3, VCORE_ENTRY_EXIT(r5)
-	cmpwi	r3, 0x100
-	bge	no_switch_exit
-	lbz	r0, VCORE_IN_GUEST(r5)
-	cmpwi	r0, 0
-	beq	20b
-	HMT_MEDIUM
-21:
-	/* Set LPCR. */
-	ld	r8,VCORE_LPCR(r5)
-	mtspr	SPRN_LPCR,r8
-	isync
-
-	/*
-	 * Set the decrementer to the guest decrementer.
-	 */
-	ld	r8,VCPU_DEC_EXPIRES(r4)
-	/* r8 is a host timebase value here, convert to guest TB */
-	ld	r5,HSTATE_KVM_VCORE(r13)
-	ld	r6,VCORE_TB_OFFSET_APPL(r5)
-	add	r8,r8,r6
-	mftb	r7
-	subf	r3,r7,r8
-	mtspr	SPRN_DEC,r3
-
-	/* Check if HDEC expires soon */
-	mfspr	r3, SPRN_HDEC
-	EXTEND_HDEC(r3)
-	cmpdi	r3, 512		/* 1 microsecond */
-	blt	hdec_soon
-
-	/* For hash guest, clear out and reload the SLB */
-	ld	r6, VCPU_KVM(r4)
-	lbz	r0, KVM_RADIX(r6)
-	cmpwi	r0, 0
-	bne	9f
-	li	r6, 0
-	slbmte	r6, r6
-	slbia
-	ptesync
-
-	/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
-	lwz	r5,VCPU_SLB_MAX(r4)
-	cmpwi	r5,0
-	beq	9f
-	mtctr	r5
-	addi	r6,r4,VCPU_SLB
-1:	ld	r8,VCPU_SLB_E(r6)
-	ld	r9,VCPU_SLB_V(r6)
-	slbmte	r9,r8
-	addi	r6,r6,VCPU_SLB_SIZE
-	bdnz	1b
-9:
-
-#ifdef CONFIG_KVM_XICS
-	/* We are entering the guest on that thread, push VCPU to XIVE */
-	ld	r11, VCPU_XIVE_SAVED_STATE(r4)
-	li	r9, TM_QW1_OS
-	lwz	r8, VCPU_XIVE_CAM_WORD(r4)
-	cmpwi	r8, 0
-	beq	no_xive
-	li	r7, TM_QW1_OS + TM_WORD2
-	mfmsr	r0
-	andi.	r0, r0, MSR_DR		/* in real mode? */
-	beq	2f
-	ld	r10, HSTATE_XIVE_TIMA_VIRT(r13)
-	cmpldi	cr1, r10, 0
-	beq     cr1, no_xive
-	eieio
-	stdx	r11,r9,r10
-	stwx	r8,r7,r10
-	b	3f
-2:	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
-	cmpldi	cr1, r10, 0
-	beq	cr1, no_xive
-	eieio
-	stdcix	r11,r9,r10
-	stwcix	r8,r7,r10
-3:	li	r9, 1
-	stb	r9, VCPU_XIVE_PUSHED(r4)
-	eieio
-
-	/*
-	 * We clear the irq_pending flag. There is a small chance of a
-	 * race vs. the escalation interrupt happening on another
-	 * processor setting it again, but the only consequence is to
-	 * cause a spurrious wakeup on the next H_CEDE which is not an
-	 * issue.
-	 */
-	li	r0,0
-	stb	r0, VCPU_IRQ_PENDING(r4)
-
-	/*
-	 * In single escalation mode, if the escalation interrupt is
-	 * on, we mask it.
-	 */
-	lbz	r0, VCPU_XIVE_ESC_ON(r4)
-	cmpwi	cr1, r0,0
-	beq	cr1, 1f
-	li	r9, XIVE_ESB_SET_PQ_01
-	beq	4f			/* in real mode? */
-	ld	r10, VCPU_XIVE_ESC_VADDR(r4)
-	ldx	r0, r10, r9
-	b	5f
-4:	ld	r10, VCPU_XIVE_ESC_RADDR(r4)
-	ldcix	r0, r10, r9
-5:	sync
-
-	/* We have a possible subtle race here: The escalation interrupt might
-	 * have fired and be on its way to the host queue while we mask it,
-	 * and if we unmask it early enough (re-cede right away), there is
-	 * a theorical possibility that it fires again, thus landing in the
-	 * target queue more than once which is a big no-no.
-	 *
-	 * Fortunately, solving this is rather easy. If the above load setting
-	 * PQ to 01 returns a previous value where P is set, then we know the
-	 * escalation interrupt is somewhere on its way to the host. In that
-	 * case we simply don't clear the xive_esc_on flag below. It will be
-	 * eventually cleared by the handler for the escalation interrupt.
-	 *
-	 * Then, when doing a cede, we check that flag again before re-enabling
-	 * the escalation interrupt, and if set, we abort the cede.
-	 */
-	andi.	r0, r0, XIVE_ESB_VAL_P
-	bne-	1f
-
-	/* Now P is 0, we can clear the flag */
-	li	r0, 0
-	stb	r0, VCPU_XIVE_ESC_ON(r4)
-1:
-no_xive:
-#endif /* CONFIG_KVM_XICS */
-
-	li	r0, 0
-	stw	r0, STACK_SLOT_SHORT_PATH(r1)
-
-deliver_guest_interrupt:	/* r4 = vcpu, r13 = paca */
-	/* Check if we can deliver an external or decrementer interrupt now */
-	ld	r0, VCPU_PENDING_EXC(r4)
-BEGIN_FTR_SECTION
-	/* On POWER9, also check for emulated doorbell interrupt */
-	lbz	r3, VCPU_DBELL_REQ(r4)
-	or	r0, r0, r3
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	cmpdi	r0, 0
-	beq	71f
-	mr	r3, r4
-	bl	kvmppc_guest_entry_inject_int
-	ld	r4, HSTATE_KVM_VCPU(r13)
-71:
-	ld	r6, VCPU_SRR0(r4)
-	ld	r7, VCPU_SRR1(r4)
-	mtspr	SPRN_SRR0, r6
-	mtspr	SPRN_SRR1, r7
-
-fast_guest_entry_c:
-	ld	r10, VCPU_PC(r4)
-	ld	r11, VCPU_MSR(r4)
-	/* r11 = vcpu->arch.msr & ~MSR_HV */
-	rldicl	r11, r11, 63 - MSR_HV_LG, 1
-	rotldi	r11, r11, 1 + MSR_HV_LG
-	ori	r11, r11, MSR_ME
-
-	ld	r6, VCPU_CTR(r4)
-	ld	r7, VCPU_XER(r4)
-	mtctr	r6
-	mtxer	r7
-
-/*
- * Required state:
- * R4 = vcpu
- * R10: value for HSRR0
- * R11: value for HSRR1
- * R13 = PACA
- */
-fast_guest_return:
-	li	r0,0
-	stb	r0,VCPU_CEDED(r4)	/* cancel cede */
-	mtspr	SPRN_HSRR0,r10
-	mtspr	SPRN_HSRR1,r11
-
-	/* Activate guest mode, so faults get handled by KVM */
-	li	r9, KVM_GUEST_MODE_GUEST_HV
-	stb	r9, HSTATE_IN_GUEST(r13)
-
-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
-	/* Accumulate timing */
-	addi	r3, r4, VCPU_TB_GUEST
-	bl	kvmhv_accumulate_time
-#endif
-
-	/* Enter guest */
-
-BEGIN_FTR_SECTION
-	ld	r5, VCPU_CFAR(r4)
-	mtspr	SPRN_CFAR, r5
-END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
-BEGIN_FTR_SECTION
-	ld	r0, VCPU_PPR(r4)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
-	ld	r5, VCPU_LR(r4)
-	mtlr	r5
-
-	ld	r1, VCPU_GPR(R1)(r4)
-	ld	r5, VCPU_GPR(R5)(r4)
-	ld	r8, VCPU_GPR(R8)(r4)
-	ld	r9, VCPU_GPR(R9)(r4)
-	ld	r10, VCPU_GPR(R10)(r4)
-	ld	r11, VCPU_GPR(R11)(r4)
-	ld	r12, VCPU_GPR(R12)(r4)
-	ld	r13, VCPU_GPR(R13)(r4)
-
-BEGIN_FTR_SECTION
-	mtspr	SPRN_PPR, r0
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
-/* Move canary into DSISR to check for later */
-BEGIN_FTR_SECTION
-	li	r0, 0x7fff
-	mtspr	SPRN_HDSISR, r0
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
-	ld	r6, VCPU_KVM(r4)
-	lbz	r7, KVM_SECURE_GUEST(r6)
-	cmpdi	r7, 0
-	ld	r6, VCPU_GPR(R6)(r4)
-	ld	r7, VCPU_GPR(R7)(r4)
-	bne	ret_to_ultra
-
-	ld	r0, VCPU_CR(r4)
-	mtcr	r0
-
-	ld	r0, VCPU_GPR(R0)(r4)
-	ld	r2, VCPU_GPR(R2)(r4)
-	ld	r3, VCPU_GPR(R3)(r4)
-	ld	r4, VCPU_GPR(R4)(r4)
-	HRFI_TO_GUEST
-	b	.
-/*
- * Use UV_RETURN ultracall to return control back to the Ultravisor after
- * processing an hypercall or interrupt that was forwarded (a.k.a. reflected)
- * to the Hypervisor.
- *
- * All registers have already been loaded, except:
- *   R0 = hcall result
- *   R2 = SRR1, so UV can detect a synthesized interrupt (if any)
- *   R3 = UV_RETURN
- */
-ret_to_ultra:
-	ld	r0, VCPU_CR(r4)
-	mtcr	r0
-
-	ld	r0, VCPU_GPR(R3)(r4)
-	mfspr	r2, SPRN_SRR1
-	li	r3, 0
-	ori	r3, r3, UV_RETURN
-	ld	r4, VCPU_GPR(R4)(r4)
-	sc	2
-
-/*
- * Enter the guest on a P9 or later system where we have exactly
- * one vcpu per vcore and we don't need to go to real mode
- * (which implies that host and guest are both using radix MMU mode).
- * r3 = vcpu pointer
- * Most SPRs and all the VSRs have been loaded already.
- */
-_GLOBAL(__kvmhv_vcpu_entry_p9)
-EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
-	mflr	r0
-	std	r0, PPC_LR_STKOFF(r1)
-	stdu	r1, -SFS(r1)
-
-	li	r0, 1
-	stw	r0, STACK_SLOT_SHORT_PATH(r1)
-
-	std	r3, HSTATE_KVM_VCPU(r13)
-	mfcr	r4
-	stw	r4, SFS+8(r1)
-
-	std	r1, HSTATE_HOST_R1(r13)
-
-	reg = 14
-	.rept	18
-	std	reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
-	reg = reg + 1
-	.endr
-
-	reg = 14
-	.rept	18
-	ld	reg, __VCPU_GPR(reg)(r3)
-	reg = reg + 1
-	.endr
-
-	mfmsr	r10
-	std	r10, HSTATE_HOST_MSR(r13)
-
-	mr	r4, r3
-	b	fast_guest_entry_c
-guest_exit_short_path:
-
-	li	r0, KVM_GUEST_MODE_NONE
-	stb	r0, HSTATE_IN_GUEST(r13)
-
-	reg = 14
-	.rept	18
-	std	reg, __VCPU_GPR(reg)(r9)
-	reg = reg + 1
-	.endr
-
-	reg = 14
-	.rept	18
-	ld	reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
-	reg = reg + 1
-	.endr
-
-	lwz	r4, SFS+8(r1)
-	mtcr	r4
-
-	mr	r3, r12		/* trap number */
-
-	addi	r1, r1, SFS
-	ld	r0, PPC_LR_STKOFF(r1)
-	mtlr	r0
-
-	/* If we are in real mode, do a rfid to get back to the caller */
-	mfmsr	r4
-	andi.	r5, r4, MSR_IR
-	bnelr
-	rldicl	r5, r4, 64 - MSR_TS_S_LG, 62	/* extract TS field */
-	mtspr	SPRN_SRR0, r0
-	ld	r10, HSTATE_HOST_MSR(r13)
-	rldimi	r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG
-	mtspr	SPRN_SRR1, r10
-	RFI_TO_KERNEL
-	b	.
-
-secondary_too_late:
-	li	r12, 0
-	stw	r12, STACK_SLOT_TRAP(r1)
-	cmpdi	r4, 0
-	beq	11f
-	stw	r12, VCPU_TRAP(r4)
-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
-	addi	r3, r4, VCPU_TB_RMEXIT
-	bl	kvmhv_accumulate_time
-#endif
-11:	b	kvmhv_switch_to_host
-
-no_switch_exit:
-	HMT_MEDIUM
-	li	r12, 0
-	b	12f
-hdec_soon:
-	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
-12:	stw	r12, VCPU_TRAP(r4)
-	mr	r9, r4
-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
-	addi	r3, r4, VCPU_TB_RMEXIT
-	bl	kvmhv_accumulate_time
-#endif
-	b	guest_bypass
-
-/******************************************************************************
- *                                                                            *
- *                               Exit code                                    *
- *                                                                            *
- *****************************************************************************/
-
-/*
- * We come here from the first-level interrupt handlers.
- */
-	.globl	kvmppc_interrupt_hv
-kvmppc_interrupt_hv:
-	/*
-	 * Register contents:
-	 * R12		= (guest CR << 32) | interrupt vector
-	 * R13		= PACA
-	 * guest R12 saved in shadow VCPU SCRATCH0
-	 * guest CTR saved in shadow VCPU SCRATCH1 if RELOCATABLE
-	 * guest R13 saved in SPRN_SCRATCH0
-	 */
-	std	r9, HSTATE_SCRATCH2(r13)
-	lbz	r9, HSTATE_IN_GUEST(r13)
-	cmpwi	r9, KVM_GUEST_MODE_HOST_HV
-	beq	kvmppc_bad_host_intr
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-	cmpwi	r9, KVM_GUEST_MODE_GUEST
-	ld	r9, HSTATE_SCRATCH2(r13)
-	beq	kvmppc_interrupt_pr
-#endif
-	/* We're now back in the host but in guest MMU context */
-	li	r9, KVM_GUEST_MODE_HOST_HV
-	stb	r9, HSTATE_IN_GUEST(r13)
-
-	ld	r9, HSTATE_KVM_VCPU(r13)
-
-	/* Save registers */
-
-	std	r0, VCPU_GPR(R0)(r9)
-	std	r1, VCPU_GPR(R1)(r9)
-	std	r2, VCPU_GPR(R2)(r9)
-	std	r3, VCPU_GPR(R3)(r9)
-	std	r4, VCPU_GPR(R4)(r9)
-	std	r5, VCPU_GPR(R5)(r9)
-	std	r6, VCPU_GPR(R6)(r9)
-	std	r7, VCPU_GPR(R7)(r9)
-	std	r8, VCPU_GPR(R8)(r9)
-	ld	r0, HSTATE_SCRATCH2(r13)
-	std	r0, VCPU_GPR(R9)(r9)
-	std	r10, VCPU_GPR(R10)(r9)
-	std	r11, VCPU_GPR(R11)(r9)
-	ld	r3, HSTATE_SCRATCH0(r13)
-	std	r3, VCPU_GPR(R12)(r9)
-	/* CR is in the high half of r12 */
-	srdi	r4, r12, 32
-	std	r4, VCPU_CR(r9)
-BEGIN_FTR_SECTION
-	ld	r3, HSTATE_CFAR(r13)
-	std	r3, VCPU_CFAR(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
-BEGIN_FTR_SECTION
-	ld	r4, HSTATE_PPR(r13)
-	std	r4, VCPU_PPR(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
-	/* Restore R1/R2 so we can handle faults */
-	ld	r1, HSTATE_HOST_R1(r13)
-	ld	r2, PACATOC(r13)
-
-	mfspr	r10, SPRN_SRR0
-	mfspr	r11, SPRN_SRR1
-	std	r10, VCPU_SRR0(r9)
-	std	r11, VCPU_SRR1(r9)
-	/* trap is in the low half of r12, clear CR from the high half */
-	clrldi	r12, r12, 32
-	andi.	r0, r12, 2		/* need to read HSRR0/1? */
-	beq	1f
-	mfspr	r10, SPRN_HSRR0
-	mfspr	r11, SPRN_HSRR1
-	clrrdi	r12, r12, 2
-1:	std	r10, VCPU_PC(r9)
-	std	r11, VCPU_MSR(r9)
-
-	GET_SCRATCH0(r3)
-	mflr	r4
-	std	r3, VCPU_GPR(R13)(r9)
-	std	r4, VCPU_LR(r9)
-
-	stw	r12,VCPU_TRAP(r9)
-
-	/*
-	 * Now that we have saved away SRR0/1 and HSRR0/1,
-	 * interrupts are recoverable in principle, so set MSR_RI.
-	 * This becomes important for relocation-on interrupts from
-	 * the guest, which we can get in radix mode on POWER9.
-	 */
-	li	r0, MSR_RI
-	mtmsrd	r0, 1
-
-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
-	addi	r3, r9, VCPU_TB_RMINTR
-	mr	r4, r9
-	bl	kvmhv_accumulate_time
-	ld	r5, VCPU_GPR(R5)(r9)
-	ld	r6, VCPU_GPR(R6)(r9)
-	ld	r7, VCPU_GPR(R7)(r9)
-	ld	r8, VCPU_GPR(R8)(r9)
-#endif
-
-	/* Save HEIR (HV emulation assist reg) in emul_inst
-	   if this is an HEI (HV emulation interrupt, e40) */
-	li	r3,KVM_INST_FETCH_FAILED
-	stw	r3,VCPU_LAST_INST(r9)
-	cmpwi	r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
-	bne	11f
-	mfspr	r3,SPRN_HEIR
-11:	stw	r3,VCPU_HEIR(r9)
-
-	/* these are volatile across C function calls */
-#ifdef CONFIG_RELOCATABLE
-	ld	r3, HSTATE_SCRATCH1(r13)
-	mtctr	r3
-#else
-	mfctr	r3
-#endif
-	mfxer	r4
-	std	r3, VCPU_CTR(r9)
-	std	r4, VCPU_XER(r9)
-
-	/* Save more register state  */
-	mfdar	r3
-	mfdsisr	r4
-	std	r3, VCPU_DAR(r9)
-	stw	r4, VCPU_DSISR(r9)
-
-	/* If this is a page table miss then see if it's theirs or ours */
-	cmpwi	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
-	beq	kvmppc_hdsi
-	std	r3, VCPU_FAULT_DAR(r9)
-	stw	r4, VCPU_FAULT_DSISR(r9)
-	cmpwi	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
-	beq	kvmppc_hisi
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	/* For softpatch interrupt, go off and do TM instruction emulation */
-	cmpwi	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
-	beq	kvmppc_tm_emul
-#endif
-
-	/* See if this is a leftover HDEC interrupt */
-	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
-	bne	2f
-	mfspr	r3,SPRN_HDEC
-	EXTEND_HDEC(r3)
-	cmpdi	r3,0
-	mr	r4,r9
-	bge	fast_guest_return
-2:
-	/* See if this is an hcall we can handle in real mode */
-	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
-	beq	hcall_try_real_mode
-
-	/* Hypervisor doorbell - exit only if host IPI flag set */
-	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
-	bne	3f
-BEGIN_FTR_SECTION
-	PPC_MSGSYNC
-	lwsync
-	/* always exit if we're running a nested guest */
-	ld	r0, VCPU_NESTED(r9)
-	cmpdi	r0, 0
-	bne	guest_exit_cont
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	lbz	r0, HSTATE_HOST_IPI(r13)
-	cmpwi	r0, 0
-	beq	maybe_reenter_guest
-	b	guest_exit_cont
-3:
-	/* If it's a hypervisor facility unavailable interrupt, save HFSCR */
-	cmpwi	r12, BOOK3S_INTERRUPT_H_FAC_UNAVAIL
-	bne	14f
-	mfspr	r3, SPRN_HFSCR
-	std	r3, VCPU_HFSCR(r9)
-	b	guest_exit_cont
-14:
-	/* External interrupt ? */
-	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-	beq	kvmppc_guest_external
-	/* See if it is a machine check */
-	cmpwi	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
-	beq	machine_check_realmode
-	/* Or a hypervisor maintenance interrupt */
-	cmpwi	r12, BOOK3S_INTERRUPT_HMI
-	beq	hmi_realmode
-
-guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
-
-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
-	addi	r3, r9, VCPU_TB_RMEXIT
-	mr	r4, r9
-	bl	kvmhv_accumulate_time
-#endif
-#ifdef CONFIG_KVM_XICS
-	/* We are exiting, pull the VP from the XIVE */
-	lbz	r0, VCPU_XIVE_PUSHED(r9)
-	cmpwi	cr0, r0, 0
-	beq	1f
-	li	r7, TM_SPC_PULL_OS_CTX
-	li	r6, TM_QW1_OS
-	mfmsr	r0
-	andi.	r0, r0, MSR_DR		/* in real mode? */
-	beq	2f
-	ld	r10, HSTATE_XIVE_TIMA_VIRT(r13)
-	cmpldi	cr0, r10, 0
-	beq	1f
-	/* First load to pull the context, we ignore the value */
-	eieio
-	lwzx	r11, r7, r10
-	/* Second load to recover the context state (Words 0 and 1) */
-	ldx	r11, r6, r10
-	b	3f
-2:	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
-	cmpldi	cr0, r10, 0
-	beq	1f
-	/* First load to pull the context, we ignore the value */
-	eieio
-	lwzcix	r11, r7, r10
-	/* Second load to recover the context state (Words 0 and 1) */
-	ldcix	r11, r6, r10
-3:	std	r11, VCPU_XIVE_SAVED_STATE(r9)
-	/* Fixup some of the state for the next load */
-	li	r10, 0
-	li	r0, 0xff
-	stb	r10, VCPU_XIVE_PUSHED(r9)
-	stb	r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
-	stb	r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
-	eieio
-1:
-#endif /* CONFIG_KVM_XICS */
-
-	/*
-	 * Possibly flush the link stack here, before we do a blr in
-	 * guest_exit_short_path.
-	 */
-1:	nop
-	patch_site 1b patch__call_kvm_flush_link_stack
-
-	/* If we came in through the P9 short path, go back out to C now */
-	lwz	r0, STACK_SLOT_SHORT_PATH(r1)
-	cmpwi	r0, 0
-	bne	guest_exit_short_path
-
-	/* For hash guest, read the guest SLB and save it away */
-	ld	r5, VCPU_KVM(r9)
-	lbz	r0, KVM_RADIX(r5)
-	li	r5, 0
-	cmpwi	r0, 0
-	bne	3f			/* for radix, save 0 entries */
-	lwz	r0,VCPU_SLB_NR(r9)	/* number of entries in SLB */
-	mtctr	r0
-	li	r6,0
-	addi	r7,r9,VCPU_SLB
-1:	slbmfee	r8,r6
-	andis.	r0,r8,SLB_ESID_V@h
-	beq	2f
-	add	r8,r8,r6		/* put index in */
-	slbmfev	r3,r6
-	std	r8,VCPU_SLB_E(r7)
-	std	r3,VCPU_SLB_V(r7)
-	addi	r7,r7,VCPU_SLB_SIZE
-	addi	r5,r5,1
-2:	addi	r6,r6,1
-	bdnz	1b
-	/* Finally clear out the SLB */
-	li	r0,0
-	slbmte	r0,r0
-	slbia
-	ptesync
-3:	stw	r5,VCPU_SLB_MAX(r9)
-
-	/* load host SLB entries */
-BEGIN_MMU_FTR_SECTION
-	b	0f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
-	ld	r8,PACA_SLBSHADOWPTR(r13)
-
-	.rept	SLB_NUM_BOLTED
-	li	r3, SLBSHADOW_SAVEAREA
-	LDX_BE	r5, r8, r3
-	addi	r3, r3, 8
-	LDX_BE	r6, r8, r3
-	andis.	r7,r5,SLB_ESID_V@h
-	beq	1f
-	slbmte	r6,r5
-1:	addi	r8,r8,16
-	.endr
-0:
-
-guest_bypass:
-	stw	r12, STACK_SLOT_TRAP(r1)
-
-	/* Save DEC */
-	/* Do this before kvmhv_commence_exit so we know TB is guest TB */
-	ld	r3, HSTATE_KVM_VCORE(r13)
-	mfspr	r5,SPRN_DEC
-	mftb	r6
-	/* On P9, if the guest has large decr enabled, don't sign extend */
-BEGIN_FTR_SECTION
-	ld	r4, VCORE_LPCR(r3)
-	andis.	r4, r4, LPCR_LD@h
-	bne	16f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	extsw	r5,r5
-16:	add	r5,r5,r6
-	/* r5 is a guest timebase value here, convert to host TB */
-	ld	r4,VCORE_TB_OFFSET_APPL(r3)
-	subf	r5,r4,r5
-	std	r5,VCPU_DEC_EXPIRES(r9)
-
-	/* Increment exit count, poke other threads to exit */
-	mr 	r3, r12
-	bl	kvmhv_commence_exit
-	nop
-	ld	r9, HSTATE_KVM_VCPU(r13)
-
-	/* Stop others sending VCPU interrupts to this physical CPU */
-	li	r0, -1
-	stw	r0, VCPU_CPU(r9)
-	stw	r0, VCPU_THREAD_CPU(r9)
-
-	/* Save guest CTRL register, set runlatch to 1 */
-	mfspr	r6,SPRN_CTRLF
-	stw	r6,VCPU_CTRL(r9)
-	andi.	r0,r6,1
-	bne	4f
-	ori	r6,r6,1
-	mtspr	SPRN_CTRLT,r6
-4:
-	/*
-	 * Save the guest PURR/SPURR
-	 */
-	mfspr	r5,SPRN_PURR
-	mfspr	r6,SPRN_SPURR
-	ld	r7,VCPU_PURR(r9)
-	ld	r8,VCPU_SPURR(r9)
-	std	r5,VCPU_PURR(r9)
-	std	r6,VCPU_SPURR(r9)
-	subf	r5,r7,r5
-	subf	r6,r8,r6
-
-	/*
-	 * Restore host PURR/SPURR and add guest times
-	 * so that the time in the guest gets accounted.
-	 */
-	ld	r3,HSTATE_PURR(r13)
-	ld	r4,HSTATE_SPURR(r13)
-	add	r3,r3,r5
-	add	r4,r4,r6
-	mtspr	SPRN_PURR,r3
-	mtspr	SPRN_SPURR,r4
-
-BEGIN_FTR_SECTION
-	b	8f
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
-	/* Save POWER8-specific registers */
-	mfspr	r5, SPRN_IAMR
-	mfspr	r6, SPRN_PSPB
-	mfspr	r7, SPRN_FSCR
-	std	r5, VCPU_IAMR(r9)
-	stw	r6, VCPU_PSPB(r9)
-	std	r7, VCPU_FSCR(r9)
-	mfspr	r5, SPRN_IC
-	mfspr	r7, SPRN_TAR
-	std	r5, VCPU_IC(r9)
-	std	r7, VCPU_TAR(r9)
-	mfspr	r8, SPRN_EBBHR
-	std	r8, VCPU_EBBHR(r9)
-	mfspr	r5, SPRN_EBBRR
-	mfspr	r6, SPRN_BESCR
-	mfspr	r7, SPRN_PID
-	mfspr	r8, SPRN_WORT
-	std	r5, VCPU_EBBRR(r9)
-	std	r6, VCPU_BESCR(r9)
-	stw	r7, VCPU_GUEST_PID(r9)
-	std	r8, VCPU_WORT(r9)
-BEGIN_FTR_SECTION
-	mfspr	r5, SPRN_TCSCR
-	mfspr	r6, SPRN_ACOP
-	mfspr	r7, SPRN_CSIGR
-	mfspr	r8, SPRN_TACR
-	std	r5, VCPU_TCSCR(r9)
-	std	r6, VCPU_ACOP(r9)
-	std	r7, VCPU_CSIGR(r9)
-	std	r8, VCPU_TACR(r9)
-FTR_SECTION_ELSE
-	mfspr	r5, SPRN_TIDR
-	mfspr	r6, SPRN_PSSCR
-	std	r5, VCPU_TID(r9)
-	rldicl	r6, r6, 4, 50		/* r6 &= PSSCR_GUEST_VIS */
-	rotldi	r6, r6, 60
-	std	r6, VCPU_PSSCR(r9)
-	/* Restore host HFSCR value */
-	ld	r7, STACK_SLOT_HFSCR(r1)
-	mtspr	SPRN_HFSCR, r7
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
-	/*
-	 * Restore various registers to 0, where non-zero values
-	 * set by the guest could disrupt the host.
-	 */
-	li	r0, 0
-	mtspr	SPRN_PSPB, r0
-	mtspr	SPRN_WORT, r0
-BEGIN_FTR_SECTION
-	mtspr	SPRN_TCSCR, r0
-	/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
-	li	r0, 1
-	sldi	r0, r0, 31
-	mtspr	SPRN_MMCRS, r0
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-
-	/* Save and restore AMR, IAMR and UAMOR before turning on the MMU */
-	ld	r8, STACK_SLOT_IAMR(r1)
-	mtspr	SPRN_IAMR, r8
-
-8:	/* Power7 jumps back in here */
-	mfspr	r5,SPRN_AMR
-	mfspr	r6,SPRN_UAMOR
-	std	r5,VCPU_AMR(r9)
-	std	r6,VCPU_UAMOR(r9)
-	ld	r5,STACK_SLOT_AMR(r1)
-	ld	r6,STACK_SLOT_UAMOR(r1)
-	mtspr	SPRN_AMR, r5
-	mtspr	SPRN_UAMOR, r6
-
-	/* Switch DSCR back to host value */
-	mfspr	r8, SPRN_DSCR
-	ld	r7, HSTATE_DSCR(r13)
-	std	r8, VCPU_DSCR(r9)
-	mtspr	SPRN_DSCR, r7
-
-	/* Save non-volatile GPRs */
-	std	r14, VCPU_GPR(R14)(r9)
-	std	r15, VCPU_GPR(R15)(r9)
-	std	r16, VCPU_GPR(R16)(r9)
-	std	r17, VCPU_GPR(R17)(r9)
-	std	r18, VCPU_GPR(R18)(r9)
-	std	r19, VCPU_GPR(R19)(r9)
-	std	r20, VCPU_GPR(R20)(r9)
-	std	r21, VCPU_GPR(R21)(r9)
-	std	r22, VCPU_GPR(R22)(r9)
-	std	r23, VCPU_GPR(R23)(r9)
-	std	r24, VCPU_GPR(R24)(r9)
-	std	r25, VCPU_GPR(R25)(r9)
-	std	r26, VCPU_GPR(R26)(r9)
-	std	r27, VCPU_GPR(R27)(r9)
-	std	r28, VCPU_GPR(R28)(r9)
-	std	r29, VCPU_GPR(R29)(r9)
-	std	r30, VCPU_GPR(R30)(r9)
-	std	r31, VCPU_GPR(R31)(r9)
-
-	/* Save SPRGs */
-	mfspr	r3, SPRN_SPRG0
-	mfspr	r4, SPRN_SPRG1
-	mfspr	r5, SPRN_SPRG2
-	mfspr	r6, SPRN_SPRG3
-	std	r3, VCPU_SPRG0(r9)
-	std	r4, VCPU_SPRG1(r9)
-	std	r5, VCPU_SPRG2(r9)
-	std	r6, VCPU_SPRG3(r9)
-
-	/* save FP state */
-	mr	r3, r9
-	bl	kvmppc_save_fp
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
-BEGIN_FTR_SECTION
-	b	91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
-	/*
-	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
-	 */
-	mr      r3, r9
-	ld      r4, VCPU_MSR(r3)
-	li	r5, 0			/* don't preserve non-vol regs */
-	bl	kvmppc_save_tm_hv
-	nop
-	ld	r9, HSTATE_KVM_VCPU(r13)
-91:
-#endif
-
-	/* Increment yield count if they have a VPA */
-	ld	r8, VCPU_VPA(r9)	/* do they have a VPA? */
-	cmpdi	r8, 0
-	beq	25f
-	li	r4, LPPACA_YIELDCOUNT
-	LWZX_BE	r3, r8, r4
-	addi	r3, r3, 1
-	STWX_BE	r3, r8, r4
-	li	r3, 1
-	stb	r3, VCPU_VPA_DIRTY(r9)
-25:
-	/* Save PMU registers if requested */
-	/* r8 and cr0.eq are live here */
-	mr	r3, r9
-	li	r4, 1
-	beq	21f			/* if no VPA, save PMU stuff anyway */
-	lbz	r4, LPPACA_PMCINUSE(r8)
-21:	bl	kvmhv_save_guest_pmu
-	ld	r9, HSTATE_KVM_VCPU(r13)
-
-	/* Restore host values of some registers */
-BEGIN_FTR_SECTION
-	ld	r5, STACK_SLOT_CIABR(r1)
-	ld	r6, STACK_SLOT_DAWR(r1)
-	ld	r7, STACK_SLOT_DAWRX(r1)
-	mtspr	SPRN_CIABR, r5
-	/*
-	 * If the DAWR doesn't work, it's ok to write these here as
-	 * this value should always be zero
-	*/
-	mtspr	SPRN_DAWR, r6
-	mtspr	SPRN_DAWRX, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
-	ld	r5, STACK_SLOT_TID(r1)
-	ld	r6, STACK_SLOT_PSSCR(r1)
-	ld	r7, STACK_SLOT_PID(r1)
-	mtspr	SPRN_TIDR, r5
-	mtspr	SPRN_PSSCR, r6
-	mtspr	SPRN_PID, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
-#ifdef CONFIG_PPC_RADIX_MMU
-	/*
-	 * Are we running hash or radix ?
-	 */
-	ld	r5, VCPU_KVM(r9)
-	lbz	r0, KVM_RADIX(r5)
-	cmpwi	cr2, r0, 0
-	beq	cr2, 2f
-
-	/*
-	 * Radix: do eieio; tlbsync; ptesync sequence in case we
-	 * interrupted the guest between a tlbie and a ptesync.
-	 */
-	eieio
-	tlbsync
-	ptesync
-
-	/* Radix: Handle the case where the guest used an illegal PID */
-	LOAD_REG_ADDR(r4, mmu_base_pid)
-	lwz	r3, VCPU_GUEST_PID(r9)
-	lwz	r5, 0(r4)
-	cmpw	cr0,r3,r5
-	blt	2f
-
-	/*
-	 * Illegal PID, the HW might have prefetched and cached in the TLB
-	 * some translations for the  LPID 0 / guest PID combination which
-	 * Linux doesn't know about, so we need to flush that PID out of
-	 * the TLB. First we need to set LPIDR to 0 so tlbiel applies to
-	 * the right context.
-	*/
-	li	r0,0
-	mtspr	SPRN_LPID,r0
-	isync
-
-	/* Then do a congruence class local flush */
-	ld	r6,VCPU_KVM(r9)
-	lwz	r0,KVM_TLB_SETS(r6)
-	mtctr	r0
-	li	r7,0x400		/* IS field = 0b01 */
-	ptesync
-	sldi	r0,r3,32		/* RS has PID */
-1:	PPC_TLBIEL(7,0,2,1,1)		/* RIC=2, PRS=1, R=1 */
-	addi	r7,r7,0x1000
-	bdnz	1b
-	ptesync
-
-2:
-#endif /* CONFIG_PPC_RADIX_MMU */
-
-	/*
-	 * POWER7/POWER8 guest -> host partition switch code.
-	 * We don't have to lock against tlbies but we do
-	 * have to coordinate the hardware threads.
-	 * Here STACK_SLOT_TRAP(r1) contains the trap number.
-	 */
-kvmhv_switch_to_host:
-	/* Secondary threads wait for primary to do partition switch */
-	ld	r5,HSTATE_KVM_VCORE(r13)
-	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
-	lbz	r3,HSTATE_PTID(r13)
-	cmpwi	r3,0
-	beq	15f
-	HMT_LOW
-13:	lbz	r3,VCORE_IN_GUEST(r5)
-	cmpwi	r3,0
-	bne	13b
-	HMT_MEDIUM
-	b	16f
-
-	/* Primary thread waits for all the secondaries to exit guest */
-15:	lwz	r3,VCORE_ENTRY_EXIT(r5)
-	rlwinm	r0,r3,32-8,0xff
-	clrldi	r3,r3,56
-	cmpw	r3,r0
-	bne	15b
-	isync
-
-	/* Did we actually switch to the guest at all? */
-	lbz	r6, VCORE_IN_GUEST(r5)
-	cmpwi	r6, 0
-	beq	19f
-
-	/* Primary thread switches back to host partition */
-	lwz	r7,KVM_HOST_LPID(r4)
-BEGIN_FTR_SECTION
-	ld	r6,KVM_HOST_SDR1(r4)
-	li	r8,LPID_RSVD		/* switch to reserved LPID */
-	mtspr	SPRN_LPID,r8
-	ptesync
-	mtspr	SPRN_SDR1,r6		/* switch to host page table */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-	mtspr	SPRN_LPID,r7
-	isync
-
-BEGIN_FTR_SECTION
-	/* DPDES and VTB are shared between threads */
-	mfspr	r7, SPRN_DPDES
-	mfspr	r8, SPRN_VTB
-	std	r7, VCORE_DPDES(r5)
-	std	r8, VCORE_VTB(r5)
-	/* clear DPDES so we don't get guest doorbells in the host */
-	li	r8, 0
-	mtspr	SPRN_DPDES, r8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-
-	/* Subtract timebase offset from timebase */
-	ld	r8, VCORE_TB_OFFSET_APPL(r5)
-	cmpdi	r8,0
-	beq	17f
-	li	r0, 0
-	std	r0, VCORE_TB_OFFSET_APPL(r5)
-	mftb	r6			/* current guest timebase */
-	subf	r8,r8,r6
-	mtspr	SPRN_TBU40,r8		/* update upper 40 bits */
-	mftb	r7			/* check if lower 24 bits overflowed */
-	clrldi	r6,r6,40
-	clrldi	r7,r7,40
-	cmpld	r7,r6
-	bge	17f
-	addis	r8,r8,0x100		/* if so, increment upper 40 bits */
-	mtspr	SPRN_TBU40,r8
-
-17:
-	/*
-	 * If this is an HMI, we called kvmppc_realmode_hmi_handler
-	 * above, which may or may not have already called
-	 * kvmppc_subcore_exit_guest.  Fortunately, all that
-	 * kvmppc_subcore_exit_guest does is clear a flag, so calling
-	 * it again here is benign even if kvmppc_realmode_hmi_handler
-	 * has already called it.
-	 */
-	bl	kvmppc_subcore_exit_guest
-	nop
-30:	ld	r5,HSTATE_KVM_VCORE(r13)
-	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
-
-	/* Reset PCR */
-	ld	r0, VCORE_PCR(r5)
-	LOAD_REG_IMMEDIATE(r6, PCR_MASK)
-	cmpld	r0, r6
-	beq	18f
-	mtspr	SPRN_PCR, r6
-18:
-	/* Signal secondary CPUs to continue */
-	li	r0, 0
-	stb	r0,VCORE_IN_GUEST(r5)
-19:	lis	r8,0x7fff		/* MAX_INT@h */
-	mtspr	SPRN_HDEC,r8
-
-16:
-BEGIN_FTR_SECTION
-	/* On POWER9 with HPT-on-radix we need to wait for all other threads */
-	ld	r3, HSTATE_SPLIT_MODE(r13)
-	cmpdi	r3, 0
-	beq	47f
-	lwz	r8, KVM_SPLIT_DO_RESTORE(r3)
-	cmpwi	r8, 0
-	beq	47f
-	bl	kvmhv_p9_restore_lpcr
-	nop
-	b	48f
-47:
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	ld	r8,KVM_HOST_LPCR(r4)
-	mtspr	SPRN_LPCR,r8
-	isync
-48:
-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
-	/* Finish timing, if we have a vcpu */
-	ld	r4, HSTATE_KVM_VCPU(r13)
-	cmpdi	r4, 0
-	li	r3, 0
-	beq	2f
-	bl	kvmhv_accumulate_time
-2:
-#endif
-	/* Unset guest mode */
-	li	r0, KVM_GUEST_MODE_NONE
-	stb	r0, HSTATE_IN_GUEST(r13)
-
-	lwz	r12, STACK_SLOT_TRAP(r1)	/* return trap # in r12 */
-	ld	r0, SFS+PPC_LR_STKOFF(r1)
-	addi	r1, r1, SFS
-	mtlr	r0
-	blr
-
-.balign 32
-.global kvm_flush_link_stack
-kvm_flush_link_stack:
-	/* Save LR into r0 */
-	mflr	r0
-
-	/* Flush the link stack. On Power8 it's up to 32 entries in size. */
-	.rept 32
-	bl	.+4
-	.endr
-
-	/* And on Power9 it's up to 64. */
-BEGIN_FTR_SECTION
-	.rept 32
-	bl	.+4
-	.endr
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
-	/* Restore LR */
-	mtlr	r0
-	blr
-
-kvmppc_guest_external:
-	/* External interrupt, first check for host_ipi. If this is
-	 * set, we know the host wants us out so let's do it now
-	 */
-	bl	kvmppc_read_intr
-
-	/*
-	 * Restore the active volatile registers after returning from
-	 * a C function.
-	 */
-	ld	r9, HSTATE_KVM_VCPU(r13)
-	li	r12, BOOK3S_INTERRUPT_EXTERNAL
-
-	/*
-	 * kvmppc_read_intr return codes:
-	 *
-	 * Exit to host (r3 > 0)
-	 *   1 An interrupt is pending that needs to be handled by the host
-	 *     Exit guest and return to host by branching to guest_exit_cont
-	 *
-	 *   2 Passthrough that needs completion in the host
-	 *     Exit guest and return to host by branching to guest_exit_cont
-	 *     However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
-	 *     to indicate to the host to complete handling the interrupt
-	 *
-	 * Before returning to guest, we check if any CPU is heading out
-	 * to the host and if so, we head out also. If no CPUs are heading
-	 * check return values <= 0.
-	 *
-	 * Return to guest (r3 <= 0)
-	 *  0 No external interrupt is pending
-	 * -1 A guest wakeup IPI (which has now been cleared)
-	 *    In either case, we return to guest to deliver any pending
-	 *    guest interrupts.
-	 *
-	 * -2 A PCI passthrough external interrupt was handled
-	 *    (interrupt was delivered directly to guest)
-	 *    Return to guest to deliver any pending guest interrupts.
-	 */
-
-	cmpdi	r3, 1
-	ble	1f
-
-	/* Return code = 2 */
-	li	r12, BOOK3S_INTERRUPT_HV_RM_HARD
-	stw	r12, VCPU_TRAP(r9)
-	b	guest_exit_cont
-
-1:	/* Return code <= 1 */
-	cmpdi	r3, 0
-	bgt	guest_exit_cont
-
-	/* Return code <= 0 */
-maybe_reenter_guest:
-	ld	r5, HSTATE_KVM_VCORE(r13)
-	lwz	r0, VCORE_ENTRY_EXIT(r5)
-	cmpwi	r0, 0x100
-	mr	r4, r9
-	blt	deliver_guest_interrupt
-	b	guest_exit_cont
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Softpatch interrupt for transactional memory emulation cases
- * on POWER9 DD2.2.  This is early in the guest exit path - we
- * haven't saved registers or done a treclaim yet.
- */
-kvmppc_tm_emul:
-	/* Save instruction image in HEIR */
-	mfspr	r3, SPRN_HEIR
-	stw	r3, VCPU_HEIR(r9)
-
-	/*
-	 * The cases we want to handle here are those where the guest
-	 * is in real suspend mode and is trying to transition to
-	 * transactional mode.
-	 */
-	lbz	r0, HSTATE_FAKE_SUSPEND(r13)
-	cmpwi	r0, 0		/* keep exiting guest if in fake suspend */
-	bne	guest_exit_cont
-	rldicl	r3, r11, 64 - MSR_TS_S_LG, 62
-	cmpwi	r3, 1		/* or if not in suspend state */
-	bne	guest_exit_cont
-
-	/* Call C code to do the emulation */
-	mr	r3, r9
-	bl	kvmhv_p9_tm_emulation_early
-	nop
-	ld	r9, HSTATE_KVM_VCPU(r13)
-	li	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
-	cmpwi	r3, 0
-	beq	guest_exit_cont		/* continue exiting if not handled */
-	ld	r10, VCPU_PC(r9)
-	ld	r11, VCPU_MSR(r9)
-	b	fast_interrupt_c_return	/* go back to guest if handled */
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
-/*
- * Check whether an HDSI is an HPTE not found fault or something else.
- * If it is an HPTE not found fault that is due to the guest accessing
- * a page that they have mapped but which we have paged out, then
- * we continue on with the guest exit path.  In all other cases,
- * reflect the HDSI to the guest as a DSI.
- */
-kvmppc_hdsi:
-	ld	r3, VCPU_KVM(r9)
-	lbz	r0, KVM_RADIX(r3)
-	mfspr	r4, SPRN_HDAR
-	mfspr	r6, SPRN_HDSISR
-BEGIN_FTR_SECTION
-	/* Look for DSISR canary. If we find it, retry instruction */
-	cmpdi	r6, 0x7fff
-	beq	6f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	cmpwi	r0, 0
-	bne	.Lradix_hdsi		/* on radix, just save DAR/DSISR/ASDR */
-	/* HPTE not found fault or protection fault? */
-	andis.	r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
-	beq	1f			/* if not, send it to the guest */
-	andi.	r0, r11, MSR_DR		/* data relocation enabled? */
-	beq	3f
-BEGIN_FTR_SECTION
-	mfspr	r5, SPRN_ASDR		/* on POWER9, use ASDR to get VSID */
-	b	4f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	clrrdi	r0, r4, 28
-	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
-	li	r0, BOOK3S_INTERRUPT_DATA_SEGMENT
-	bne	7f			/* if no SLB entry found */
-4:	std	r4, VCPU_FAULT_DAR(r9)
-	stw	r6, VCPU_FAULT_DSISR(r9)
-
-	/* Search the hash table. */
-	mr	r3, r9			/* vcpu pointer */
-	li	r7, 1			/* data fault */
-	bl	kvmppc_hpte_hv_fault
-	ld	r9, HSTATE_KVM_VCPU(r13)
-	ld	r10, VCPU_PC(r9)
-	ld	r11, VCPU_MSR(r9)
-	li	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
-	cmpdi	r3, 0			/* retry the instruction */
-	beq	6f
-	cmpdi	r3, -1			/* handle in kernel mode */
-	beq	guest_exit_cont
-	cmpdi	r3, -2			/* MMIO emulation; need instr word */
-	beq	2f
-
-	/* Synthesize a DSI (or DSegI) for the guest */
-	ld	r4, VCPU_FAULT_DAR(r9)
-	mr	r6, r3
-1:	li	r0, BOOK3S_INTERRUPT_DATA_STORAGE
-	mtspr	SPRN_DSISR, r6
-7:	mtspr	SPRN_DAR, r4
-	mtspr	SPRN_SRR0, r10
-	mtspr	SPRN_SRR1, r11
-	mr	r10, r0
-	bl	kvmppc_msr_interrupt
-fast_interrupt_c_return:
-6:	ld	r7, VCPU_CTR(r9)
-	ld	r8, VCPU_XER(r9)
-	mtctr	r7
-	mtxer	r8
-	mr	r4, r9
-	b	fast_guest_return
-
-3:	ld	r5, VCPU_KVM(r9)	/* not relocated, use VRMA */
-	ld	r5, KVM_VRMA_SLB_V(r5)
-	b	4b
-
-	/* If this is for emulated MMIO, load the instruction word */
-2:	li	r8, KVM_INST_FETCH_FAILED	/* In case lwz faults */
-
-	/* Set guest mode to 'jump over instruction' so if lwz faults
-	 * we'll just continue at the next IP. */
-	li	r0, KVM_GUEST_MODE_SKIP
-	stb	r0, HSTATE_IN_GUEST(r13)
-
-	/* Do the access with MSR:DR enabled */
-	mfmsr	r3
-	ori	r4, r3, MSR_DR		/* Enable paging for data */
-	mtmsrd	r4
-	lwz	r8, 0(r10)
-	mtmsrd	r3
-
-	/* Store the result */
-	stw	r8, VCPU_LAST_INST(r9)
-
-	/* Unset guest mode. */
-	li	r0, KVM_GUEST_MODE_HOST_HV
-	stb	r0, HSTATE_IN_GUEST(r13)
-	b	guest_exit_cont
-
-.Lradix_hdsi:
-	std	r4, VCPU_FAULT_DAR(r9)
-	stw	r6, VCPU_FAULT_DSISR(r9)
-.Lradix_hisi:
-	mfspr	r5, SPRN_ASDR
-	std	r5, VCPU_FAULT_GPA(r9)
-	b	guest_exit_cont
-
-/*
- * Similarly for an HISI, reflect it to the guest as an ISI unless
- * it is an HPTE not found fault for a page that we have paged out.
- */
-kvmppc_hisi:
-	ld	r3, VCPU_KVM(r9)
-	lbz	r0, KVM_RADIX(r3)
-	cmpwi	r0, 0
-	bne	.Lradix_hisi		/* for radix, just save ASDR */
-	andis.	r0, r11, SRR1_ISI_NOPT@h
-	beq	1f
-	andi.	r0, r11, MSR_IR		/* instruction relocation enabled? */
-	beq	3f
-BEGIN_FTR_SECTION
-	mfspr	r5, SPRN_ASDR		/* on POWER9, use ASDR to get VSID */
-	b	4f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	clrrdi	r0, r10, 28
-	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
-	li	r0, BOOK3S_INTERRUPT_INST_SEGMENT
-	bne	7f			/* if no SLB entry found */
-4:
-	/* Search the hash table. */
-	mr	r3, r9			/* vcpu pointer */
-	mr	r4, r10
-	mr	r6, r11
-	li	r7, 0			/* instruction fault */
-	bl	kvmppc_hpte_hv_fault
-	ld	r9, HSTATE_KVM_VCPU(r13)
-	ld	r10, VCPU_PC(r9)
-	ld	r11, VCPU_MSR(r9)
-	li	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
-	cmpdi	r3, 0			/* retry the instruction */
-	beq	fast_interrupt_c_return
-	cmpdi	r3, -1			/* handle in kernel mode */
-	beq	guest_exit_cont
-
-	/* Synthesize an ISI (or ISegI) for the guest */
-	mr	r11, r3
-1:	li	r0, BOOK3S_INTERRUPT_INST_STORAGE
-7:	mtspr	SPRN_SRR0, r10
-	mtspr	SPRN_SRR1, r11
-	mr	r10, r0
-	bl	kvmppc_msr_interrupt
-	b	fast_interrupt_c_return
-
-3:	ld	r6, VCPU_KVM(r9)	/* not relocated, use VRMA */
-	ld	r5, KVM_VRMA_SLB_V(r6)
-	b	4b
-
-/*
- * Try to handle an hcall in real mode.
- * Returns to the guest if we handle it, or continues on up to
- * the kernel if we can't (i.e. if we don't have a handler for
- * it, or if the handler returns H_TOO_HARD).
- *
- * r5 - r8 contain hcall args,
- * r9 = vcpu, r10 = pc, r11 = msr, r12 = trap, r13 = paca
- */
-hcall_try_real_mode:
-	ld	r3,VCPU_GPR(R3)(r9)
-	andi.	r0,r11,MSR_PR
-	/* sc 1 from userspace - reflect to guest syscall */
-	bne	sc_1_fast_return
-	/* sc 1 from nested guest - give it to L1 to handle */
-	ld	r0, VCPU_NESTED(r9)
-	cmpdi	r0, 0
-	bne	guest_exit_cont
-	clrrdi	r3,r3,2
-	cmpldi	r3,hcall_real_table_end - hcall_real_table
-	bge	guest_exit_cont
-	/* See if this hcall is enabled for in-kernel handling */
-	ld	r4, VCPU_KVM(r9)
-	srdi	r0, r3, 8	/* r0 = (r3 / 4) >> 6 */
-	sldi	r0, r0, 3	/* index into kvm->arch.enabled_hcalls[] */
-	add	r4, r4, r0
-	ld	r0, KVM_ENABLED_HCALLS(r4)
-	rlwinm	r4, r3, 32-2, 0x3f	/* r4 = (r3 / 4) & 0x3f */
-	srd	r0, r0, r4
-	andi.	r0, r0, 1
-	beq	guest_exit_cont
-	/* Get pointer to handler, if any, and call it */
-	LOAD_REG_ADDR(r4, hcall_real_table)
-	lwax	r3,r3,r4
-	cmpwi	r3,0
-	beq	guest_exit_cont
-	add	r12,r3,r4
-	mtctr	r12
-	mr	r3,r9		/* get vcpu pointer */
-	ld	r4,VCPU_GPR(R4)(r9)
-	bctrl
-	cmpdi	r3,H_TOO_HARD
-	beq	hcall_real_fallback
-	ld	r4,HSTATE_KVM_VCPU(r13)
-	std	r3,VCPU_GPR(R3)(r4)
-	ld	r10,VCPU_PC(r4)
-	ld	r11,VCPU_MSR(r4)
-	b	fast_guest_return
-
-sc_1_fast_return:
-	mtspr	SPRN_SRR0,r10
-	mtspr	SPRN_SRR1,r11
-	li	r10, BOOK3S_INTERRUPT_SYSCALL
-	bl	kvmppc_msr_interrupt
-	mr	r4,r9
-	b	fast_guest_return
-
-	/* We've attempted a real mode hcall, but it's punted it back
-	 * to userspace.  We need to restore some clobbered volatiles
-	 * before resuming the pass-it-to-qemu path */
-hcall_real_fallback:
-	li	r12,BOOK3S_INTERRUPT_SYSCALL
-	ld	r9, HSTATE_KVM_VCPU(r13)
-
-	b	guest_exit_cont
-
-	.globl	hcall_real_table
-hcall_real_table:
-	.long	0		/* 0 - unused */
-	.long	DOTSYM(kvmppc_h_remove) - hcall_real_table
-	.long	DOTSYM(kvmppc_h_enter) - hcall_real_table
-	.long	DOTSYM(kvmppc_h_read) - hcall_real_table
-	.long	DOTSYM(kvmppc_h_clear_mod) - hcall_real_table
-	.long	DOTSYM(kvmppc_h_clear_ref) - hcall_real_table
-	.long	DOTSYM(kvmppc_h_protect) - hcall_real_table
-#ifdef CONFIG_SPAPR_TCE_IOMMU
-	.long	DOTSYM(kvmppc_h_get_tce) - hcall_real_table
-	.long	DOTSYM(kvmppc_rm_h_put_tce) - hcall_real_table
-#else
-	.long	0		/* 0x1c */
-	.long	0		/* 0x20 */
-#endif
-	.long	0		/* 0x24 - H_SET_SPRG0 */
-	.long	DOTSYM(kvmppc_h_set_dabr) - hcall_real_table
-	.long	DOTSYM(kvmppc_rm_h_page_init) - hcall_real_table
-	.long	0		/* 0x30 */
-	.long	0		/* 0x34 */
-	.long	0		/* 0x38 */
-	.long	0		/* 0x3c */
-	.long	0		/* 0x40 */
-	.long	0		/* 0x44 */
-	.long	0		/* 0x48 */
-	.long	0		/* 0x4c */
-	.long	0		/* 0x50 */
-	.long	0		/* 0x54 */
-	.long	0		/* 0x58 */
-	.long	0		/* 0x5c */
-	.long	0		/* 0x60 */
-#ifdef CONFIG_KVM_XICS
-	.long	DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
-	.long	DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
-	.long	DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
-	.long	DOTSYM(kvmppc_rm_h_ipoll) - hcall_real_table
-	.long	DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
-#else
-	.long	0		/* 0x64 - H_EOI */
-	.long	0		/* 0x68 - H_CPPR */
-	.long	0		/* 0x6c - H_IPI */
-	.long	0		/* 0x70 - H_IPOLL */
-	.long	0		/* 0x74 - H_XIRR */
-#endif
-	.long	0		/* 0x78 */
-	.long	0		/* 0x7c */
-	.long	0		/* 0x80 */
-	.long	0		/* 0x84 */
-	.long	0		/* 0x88 */
-	.long	0		/* 0x8c */
-	.long	0		/* 0x90 */
-	.long	0		/* 0x94 */
-	.long	0		/* 0x98 */
-	.long	0		/* 0x9c */
-	.long	0		/* 0xa0 */
-	.long	0		/* 0xa4 */
-	.long	0		/* 0xa8 */
-	.long	0		/* 0xac */
-	.long	0		/* 0xb0 */
-	.long	0		/* 0xb4 */
-	.long	0		/* 0xb8 */
-	.long	0		/* 0xbc */
-	.long	0		/* 0xc0 */
-	.long	0		/* 0xc4 */
-	.long	0		/* 0xc8 */
-	.long	0		/* 0xcc */
-	.long	0		/* 0xd0 */
-	.long	0		/* 0xd4 */
-	.long	0		/* 0xd8 */
-	.long	0		/* 0xdc */
-	.long	DOTSYM(kvmppc_h_cede) - hcall_real_table
-	.long	DOTSYM(kvmppc_rm_h_confer) - hcall_real_table
-	.long	0		/* 0xe8 */
-	.long	0		/* 0xec */
-	.long	0		/* 0xf0 */
-	.long	0		/* 0xf4 */
-	.long	0		/* 0xf8 */
-	.long	0		/* 0xfc */
-	.long	0		/* 0x100 */
-	.long	0		/* 0x104 */
-	.long	0		/* 0x108 */
-	.long	0		/* 0x10c */
-	.long	0		/* 0x110 */
-	.long	0		/* 0x114 */
-	.long	0		/* 0x118 */
-	.long	0		/* 0x11c */
-	.long	0		/* 0x120 */
-	.long	DOTSYM(kvmppc_h_bulk_remove) - hcall_real_table
-	.long	0		/* 0x128 */
-	.long	0		/* 0x12c */
-	.long	0		/* 0x130 */
-	.long	DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
-#ifdef CONFIG_SPAPR_TCE_IOMMU
-	.long	DOTSYM(kvmppc_rm_h_stuff_tce) - hcall_real_table
-	.long	DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table
-#else
-	.long	0		/* 0x138 */
-	.long	0		/* 0x13c */
-#endif
-	.long	0		/* 0x140 */
-	.long	0		/* 0x144 */
-	.long	0		/* 0x148 */
-	.long	0		/* 0x14c */
-	.long	0		/* 0x150 */
-	.long	0		/* 0x154 */
-	.long	0		/* 0x158 */
-	.long	0		/* 0x15c */
-	.long	0		/* 0x160 */
-	.long	0		/* 0x164 */
-	.long	0		/* 0x168 */
-	.long	0		/* 0x16c */
-	.long	0		/* 0x170 */
-	.long	0		/* 0x174 */
-	.long	0		/* 0x178 */
-	.long	0		/* 0x17c */
-	.long	0		/* 0x180 */
-	.long	0		/* 0x184 */
-	.long	0		/* 0x188 */
-	.long	0		/* 0x18c */
-	.long	0		/* 0x190 */
-	.long	0		/* 0x194 */
-	.long	0		/* 0x198 */
-	.long	0		/* 0x19c */
-	.long	0		/* 0x1a0 */
-	.long	0		/* 0x1a4 */
-	.long	0		/* 0x1a8 */
-	.long	0		/* 0x1ac */
-	.long	0		/* 0x1b0 */
-	.long	0		/* 0x1b4 */
-	.long	0		/* 0x1b8 */
-	.long	0		/* 0x1bc */
-	.long	0		/* 0x1c0 */
-	.long	0		/* 0x1c4 */
-	.long	0		/* 0x1c8 */
-	.long	0		/* 0x1cc */
-	.long	0		/* 0x1d0 */
-	.long	0		/* 0x1d4 */
-	.long	0		/* 0x1d8 */
-	.long	0		/* 0x1dc */
-	.long	0		/* 0x1e0 */
-	.long	0		/* 0x1e4 */
-	.long	0		/* 0x1e8 */
-	.long	0		/* 0x1ec */
-	.long	0		/* 0x1f0 */
-	.long	0		/* 0x1f4 */
-	.long	0		/* 0x1f8 */
-	.long	0		/* 0x1fc */
-	.long	0		/* 0x200 */
-	.long	0		/* 0x204 */
-	.long	0		/* 0x208 */
-	.long	0		/* 0x20c */
-	.long	0		/* 0x210 */
-	.long	0		/* 0x214 */
-	.long	0		/* 0x218 */
-	.long	0		/* 0x21c */
-	.long	0		/* 0x220 */
-	.long	0		/* 0x224 */
-	.long	0		/* 0x228 */
-	.long	0		/* 0x22c */
-	.long	0		/* 0x230 */
-	.long	0		/* 0x234 */
-	.long	0		/* 0x238 */
-	.long	0		/* 0x23c */
-	.long	0		/* 0x240 */
-	.long	0		/* 0x244 */
-	.long	0		/* 0x248 */
-	.long	0		/* 0x24c */
-	.long	0		/* 0x250 */
-	.long	0		/* 0x254 */
-	.long	0		/* 0x258 */
-	.long	0		/* 0x25c */
-	.long	0		/* 0x260 */
-	.long	0		/* 0x264 */
-	.long	0		/* 0x268 */
-	.long	0		/* 0x26c */
-	.long	0		/* 0x270 */
-	.long	0		/* 0x274 */
-	.long	0		/* 0x278 */
-	.long	0		/* 0x27c */
-	.long	0		/* 0x280 */
-	.long	0		/* 0x284 */
-	.long	0		/* 0x288 */
-	.long	0		/* 0x28c */
-	.long	0		/* 0x290 */
-	.long	0		/* 0x294 */
-	.long	0		/* 0x298 */
-	.long	0		/* 0x29c */
-	.long	0		/* 0x2a0 */
-	.long	0		/* 0x2a4 */
-	.long	0		/* 0x2a8 */
-	.long	0		/* 0x2ac */
-	.long	0		/* 0x2b0 */
-	.long	0		/* 0x2b4 */
-	.long	0		/* 0x2b8 */
-	.long	0		/* 0x2bc */
-	.long	0		/* 0x2c0 */
-	.long	0		/* 0x2c4 */
-	.long	0		/* 0x2c8 */
-	.long	0		/* 0x2cc */
-	.long	0		/* 0x2d0 */
-	.long	0		/* 0x2d4 */
-	.long	0		/* 0x2d8 */
-	.long	0		/* 0x2dc */
-	.long	0		/* 0x2e0 */
-	.long	0		/* 0x2e4 */
-	.long	0		/* 0x2e8 */
-	.long	0		/* 0x2ec */
-	.long	0		/* 0x2f0 */
-	.long	0		/* 0x2f4 */
-	.long	0		/* 0x2f8 */
-#ifdef CONFIG_KVM_XICS
-	.long	DOTSYM(kvmppc_rm_h_xirr_x) - hcall_real_table
-#else
-	.long	0		/* 0x2fc - H_XIRR_X*/
-#endif
-	.long	DOTSYM(kvmppc_h_random) - hcall_real_table
-	.globl	hcall_real_table_end
-hcall_real_table_end:
-
-_GLOBAL(kvmppc_h_set_xdabr)
-EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr)
-	andi.	r0, r5, DABRX_USER | DABRX_KERNEL
-	beq	6f
-	li	r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI
-	andc.	r0, r5, r0
-	beq	3f
-6:	li	r3, H_PARAMETER
-	blr
-
-_GLOBAL(kvmppc_h_set_dabr)
-EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr)
-	li	r5, DABRX_USER | DABRX_KERNEL
-3:
-BEGIN_FTR_SECTION
-	b	2f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-	std	r4,VCPU_DABR(r3)
-	stw	r5, VCPU_DABRX(r3)
-	mtspr	SPRN_DABRX, r5
-	/* Work around P7 bug where DABR can get corrupted on mtspr */
-1:	mtspr	SPRN_DABR,r4
-	mfspr	r5, SPRN_DABR
-	cmpd	r4, r5
-	bne	1b
-	isync
-	li	r3,0
-	blr
-
-2:
-	LOAD_REG_ADDR(r11, dawr_force_enable)
-	lbz	r11, 0(r11)
-	cmpdi	r11, 0
-	bne	3f
-	li	r3, H_HARDWARE
-	blr
-3:
-	/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
-	rlwimi	r5, r4, 5, DAWRX_DR | DAWRX_DW
-	rlwimi	r5, r4, 2, DAWRX_WT
-	clrrdi	r4, r4, 3
-	std	r4, VCPU_DAWR(r3)
-	std	r5, VCPU_DAWRX(r3)
-	/*
-	 * If came in through the real mode hcall handler then it is necessary
-	 * to write the registers since the return path won't. Otherwise it is
-	 * sufficient to store then in the vcpu struct as they will be loaded
-	 * next time the vcpu is run.
-	 */
-	mfmsr	r6
-	andi.	r6, r6, MSR_DR		/* in real mode? */
-	bne	4f
-	mtspr	SPRN_DAWR, r4
-	mtspr	SPRN_DAWRX, r5
-4:	li	r3, 0
-	blr
-
-_GLOBAL(kvmppc_h_cede)		/* r3 = vcpu pointer, r11 = msr, r13 = paca */
-	ori	r11,r11,MSR_EE
-	std	r11,VCPU_MSR(r3)
-	li	r0,1
-	stb	r0,VCPU_CEDED(r3)
-	sync			/* order setting ceded vs. testing prodded */
-	lbz	r5,VCPU_PRODDED(r3)
-	cmpwi	r5,0
-	bne	kvm_cede_prodded
-	li	r12,0		/* set trap to 0 to say hcall is handled */
-	stw	r12,VCPU_TRAP(r3)
-	li	r0,H_SUCCESS
-	std	r0,VCPU_GPR(R3)(r3)
-
-	/*
-	 * Set our bit in the bitmask of napping threads unless all the
-	 * other threads are already napping, in which case we send this
-	 * up to the host.
-	 */
-	ld	r5,HSTATE_KVM_VCORE(r13)
-	lbz	r6,HSTATE_PTID(r13)
-	lwz	r8,VCORE_ENTRY_EXIT(r5)
-	clrldi	r8,r8,56
-	li	r0,1
-	sld	r0,r0,r6
-	addi	r6,r5,VCORE_NAPPING_THREADS
-31:	lwarx	r4,0,r6
-	or	r4,r4,r0
-	cmpw	r4,r8
-	beq	kvm_cede_exit
-	stwcx.	r4,0,r6
-	bne	31b
-	/* order napping_threads update vs testing entry_exit_map */
-	isync
-	li	r0,NAPPING_CEDE
-	stb	r0,HSTATE_NAPPING(r13)
-	lwz	r7,VCORE_ENTRY_EXIT(r5)
-	cmpwi	r7,0x100
-	bge	33f		/* another thread already exiting */
-
-/*
- * Although not specifically required by the architecture, POWER7
- * preserves the following registers in nap mode, even if an SMT mode
- * switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3,
- * DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR.
- */
-	/* Save non-volatile GPRs */
-	std	r14, VCPU_GPR(R14)(r3)
-	std	r15, VCPU_GPR(R15)(r3)
-	std	r16, VCPU_GPR(R16)(r3)
-	std	r17, VCPU_GPR(R17)(r3)
-	std	r18, VCPU_GPR(R18)(r3)
-	std	r19, VCPU_GPR(R19)(r3)
-	std	r20, VCPU_GPR(R20)(r3)
-	std	r21, VCPU_GPR(R21)(r3)
-	std	r22, VCPU_GPR(R22)(r3)
-	std	r23, VCPU_GPR(R23)(r3)
-	std	r24, VCPU_GPR(R24)(r3)
-	std	r25, VCPU_GPR(R25)(r3)
-	std	r26, VCPU_GPR(R26)(r3)
-	std	r27, VCPU_GPR(R27)(r3)
-	std	r28, VCPU_GPR(R28)(r3)
-	std	r29, VCPU_GPR(R29)(r3)
-	std	r30, VCPU_GPR(R30)(r3)
-	std	r31, VCPU_GPR(R31)(r3)
-
-	/* save FP state */
-	bl	kvmppc_save_fp
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
-BEGIN_FTR_SECTION
-	b	91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
-	/*
-	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
-	 */
-	ld	r3, HSTATE_KVM_VCPU(r13)
-	ld      r4, VCPU_MSR(r3)
-	li	r5, 0			/* don't preserve non-vol regs */
-	bl	kvmppc_save_tm_hv
-	nop
-91:
-#endif
-
-	/*
-	 * Set DEC to the smaller of DEC and HDEC, so that we wake
-	 * no later than the end of our timeslice (HDEC interrupts
-	 * don't wake us from nap).
-	 */
-	mfspr	r3, SPRN_DEC
-	mfspr	r4, SPRN_HDEC
-	mftb	r5
-BEGIN_FTR_SECTION
-	/* On P9 check whether the guest has large decrementer mode enabled */
-	ld	r6, HSTATE_KVM_VCORE(r13)
-	ld	r6, VCORE_LPCR(r6)
-	andis.	r6, r6, LPCR_LD@h
-	bne	68f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	extsw	r3, r3
-68:	EXTEND_HDEC(r4)
-	cmpd	r3, r4
-	ble	67f
-	mtspr	SPRN_DEC, r4
-67:
-	/* save expiry time of guest decrementer */
-	add	r3, r3, r5
-	ld	r4, HSTATE_KVM_VCPU(r13)
-	ld	r5, HSTATE_KVM_VCORE(r13)
-	ld	r6, VCORE_TB_OFFSET_APPL(r5)
-	subf	r3, r6, r3	/* convert to host TB value */
-	std	r3, VCPU_DEC_EXPIRES(r4)
-
-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
-	ld	r4, HSTATE_KVM_VCPU(r13)
-	addi	r3, r4, VCPU_TB_CEDE
-	bl	kvmhv_accumulate_time
-#endif
-
-	lis	r3, LPCR_PECEDP@h	/* Do wake on privileged doorbell */
-
-	/* Go back to host stack */
-	ld	r1, HSTATE_HOST_R1(r13)
-
-	/*
-	 * Take a nap until a decrementer or external or doobell interrupt
-	 * occurs, with PECE1 and PECE0 set in LPCR.
-	 * On POWER8, set PECEDH, and if we are ceding, also set PECEDP.
-	 * Also clear the runlatch bit before napping.
-	 */
-kvm_do_nap:
-	mfspr	r0, SPRN_CTRLF
-	clrrdi	r0, r0, 1
-	mtspr	SPRN_CTRLT, r0
-
-	li	r0,1
-	stb	r0,HSTATE_HWTHREAD_REQ(r13)
-	mfspr	r5,SPRN_LPCR
-	ori	r5,r5,LPCR_PECE0 | LPCR_PECE1
-BEGIN_FTR_SECTION
-	ori	r5, r5, LPCR_PECEDH
-	rlwimi	r5, r3, 0, LPCR_PECEDP
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-
-kvm_nap_sequence:		/* desired LPCR value in r5 */
-BEGIN_FTR_SECTION
-	/*
-	 * PSSCR bits:	exit criterion = 1 (wakeup based on LPCR at sreset)
-	 *		enable state loss = 1 (allow SMT mode switch)
-	 *		requested level = 0 (just stop dispatching)
-	 */
-	lis	r3, (PSSCR_EC | PSSCR_ESL)@h
-	/* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
-	li	r4, LPCR_PECE_HVEE@higher
-	sldi	r4, r4, 32
-	or	r5, r5, r4
-FTR_SECTION_ELSE
-	li	r3, PNV_THREAD_NAP
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
-	mtspr	SPRN_LPCR,r5
-	isync
-
-BEGIN_FTR_SECTION
-	bl	isa300_idle_stop_mayloss
-FTR_SECTION_ELSE
-	bl	isa206_idle_insn_mayloss
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
-
-	mfspr	r0, SPRN_CTRLF
-	ori	r0, r0, 1
-	mtspr	SPRN_CTRLT, r0
-
-	mtspr	SPRN_SRR1, r3
-
-	li	r0, 0
-	stb	r0, PACA_FTRACE_ENABLED(r13)
-
-	li	r0, KVM_HWTHREAD_IN_KVM
-	stb	r0, HSTATE_HWTHREAD_STATE(r13)
-
-	lbz	r0, HSTATE_NAPPING(r13)
-	cmpwi	r0, NAPPING_CEDE
-	beq	kvm_end_cede
-	cmpwi	r0, NAPPING_NOVCPU
-	beq	kvm_novcpu_wakeup
-	cmpwi	r0, NAPPING_UNSPLIT
-	beq	kvm_unsplit_wakeup
-	twi	31,0,0 /* Nap state must not be zero */
-
-33:	mr	r4, r3
-	li	r3, 0
-	li	r12, 0
-	b	34f
-
-kvm_end_cede:
-	/* Woken by external or decrementer interrupt */
-
-	/* get vcpu pointer */
-	ld	r4, HSTATE_KVM_VCPU(r13)
-
-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
-	addi	r3, r4, VCPU_TB_RMINTR
-	bl	kvmhv_accumulate_time
-#endif
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
-BEGIN_FTR_SECTION
-	b	91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
-	/*
-	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
-	 */
-	mr      r3, r4
-	ld      r4, VCPU_MSR(r3)
-	li	r5, 0			/* don't preserve non-vol regs */
-	bl	kvmppc_restore_tm_hv
-	nop
-	ld	r4, HSTATE_KVM_VCPU(r13)
-91:
-#endif
-
-	/* load up FP state */
-	bl	kvmppc_load_fp
-
-	/* Restore guest decrementer */
-	ld	r3, VCPU_DEC_EXPIRES(r4)
-	ld	r5, HSTATE_KVM_VCORE(r13)
-	ld	r6, VCORE_TB_OFFSET_APPL(r5)
-	add	r3, r3, r6	/* convert host TB to guest TB value */
-	mftb	r7
-	subf	r3, r7, r3
-	mtspr	SPRN_DEC, r3
-
-	/* Load NV GPRS */
-	ld	r14, VCPU_GPR(R14)(r4)
-	ld	r15, VCPU_GPR(R15)(r4)
-	ld	r16, VCPU_GPR(R16)(r4)
-	ld	r17, VCPU_GPR(R17)(r4)
-	ld	r18, VCPU_GPR(R18)(r4)
-	ld	r19, VCPU_GPR(R19)(r4)
-	ld	r20, VCPU_GPR(R20)(r4)
-	ld	r21, VCPU_GPR(R21)(r4)
-	ld	r22, VCPU_GPR(R22)(r4)
-	ld	r23, VCPU_GPR(R23)(r4)
-	ld	r24, VCPU_GPR(R24)(r4)
-	ld	r25, VCPU_GPR(R25)(r4)
-	ld	r26, VCPU_GPR(R26)(r4)
-	ld	r27, VCPU_GPR(R27)(r4)
-	ld	r28, VCPU_GPR(R28)(r4)
-	ld	r29, VCPU_GPR(R29)(r4)
-	ld	r30, VCPU_GPR(R30)(r4)
-	ld	r31, VCPU_GPR(R31)(r4)
-
-	/* Check the wake reason in SRR1 to see why we got here */
-	bl	kvmppc_check_wake_reason
-
-	/*
-	 * Restore volatile registers since we could have called a
-	 * C routine in kvmppc_check_wake_reason
-	 *	r4 = VCPU
-	 * r3 tells us whether we need to return to host or not
-	 * WARNING: it gets checked further down:
-	 * should not modify r3 until this check is done.
-	 */
-	ld	r4, HSTATE_KVM_VCPU(r13)
-
-	/* clear our bit in vcore->napping_threads */
-34:	ld	r5,HSTATE_KVM_VCORE(r13)
-	lbz	r7,HSTATE_PTID(r13)
-	li	r0,1
-	sld	r0,r0,r7
-	addi	r6,r5,VCORE_NAPPING_THREADS
-32:	lwarx	r7,0,r6
-	andc	r7,r7,r0
-	stwcx.	r7,0,r6
-	bne	32b
-	li	r0,0
-	stb	r0,HSTATE_NAPPING(r13)
-
-	/* See if the wake reason saved in r3 means we need to exit */
-	stw	r12, VCPU_TRAP(r4)
-	mr	r9, r4
-	cmpdi	r3, 0
-	bgt	guest_exit_cont
-	b	maybe_reenter_guest
-
-	/* cede when already previously prodded case */
-kvm_cede_prodded:
-	li	r0,0
-	stb	r0,VCPU_PRODDED(r3)
-	sync			/* order testing prodded vs. clearing ceded */
-	stb	r0,VCPU_CEDED(r3)
-	li	r3,H_SUCCESS
-	blr
-
-	/* we've ceded but we want to give control to the host */
-kvm_cede_exit:
-	ld	r9, HSTATE_KVM_VCPU(r13)
-#ifdef CONFIG_KVM_XICS
-	/* are we using XIVE with single escalation? */
-	ld	r10, VCPU_XIVE_ESC_VADDR(r9)
-	cmpdi	r10, 0
-	beq	3f
-	li	r6, XIVE_ESB_SET_PQ_00
-	/*
-	 * If we still have a pending escalation, abort the cede,
-	 * and we must set PQ to 10 rather than 00 so that we don't
-	 * potentially end up with two entries for the escalation
-	 * interrupt in the XIVE interrupt queue.  In that case
-	 * we also don't want to set xive_esc_on to 1 here in
-	 * case we race with xive_esc_irq().
-	 */
-	lbz	r5, VCPU_XIVE_ESC_ON(r9)
-	cmpwi	r5, 0
-	beq	4f
-	li	r0, 0
-	stb	r0, VCPU_CEDED(r9)
-	li	r6, XIVE_ESB_SET_PQ_10
-	b	5f
-4:	li	r0, 1
-	stb	r0, VCPU_XIVE_ESC_ON(r9)
-	/* make sure store to xive_esc_on is seen before xive_esc_irq runs */
-	sync
-5:	/* Enable XIVE escalation */
-	mfmsr	r0
-	andi.	r0, r0, MSR_DR		/* in real mode? */
-	beq	1f
-	ldx	r0, r10, r6
-	b	2f
-1:	ld	r10, VCPU_XIVE_ESC_RADDR(r9)
-	ldcix	r0, r10, r6
-2:	sync
-#endif /* CONFIG_KVM_XICS */
-3:	b	guest_exit_cont
-
-	/* Try to do machine check recovery in real mode */
-machine_check_realmode:
-	mr	r3, r9		/* get vcpu pointer */
-	bl	kvmppc_realmode_machine_check
-	nop
-	/* all machine checks go to virtual mode for further handling */
-	ld	r9, HSTATE_KVM_VCPU(r13)
-	li	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
-	b	guest_exit_cont
-
-/*
- * Call C code to handle a HMI in real mode.
- * Only the primary thread does the call, secondary threads are handled
- * by calling hmi_exception_realmode() after kvmppc_hv_entry returns.
- * r9 points to the vcpu on entry
- */
-hmi_realmode:
-	lbz	r0, HSTATE_PTID(r13)
-	cmpwi	r0, 0
-	bne	guest_exit_cont
-	bl	kvmppc_realmode_hmi_handler
-	ld	r9, HSTATE_KVM_VCPU(r13)
-	li	r12, BOOK3S_INTERRUPT_HMI
-	b	guest_exit_cont
-
-/*
- * Check the reason we woke from nap, and take appropriate action.
- * Returns (in r3):
- *	0 if nothing needs to be done
- *	1 if something happened that needs to be handled by the host
- *	-1 if there was a guest wakeup (IPI or msgsnd)
- *	-2 if we handled a PCI passthrough interrupt (returned by
- *		kvmppc_read_intr only)
- *
- * Also sets r12 to the interrupt vector for any interrupt that needs
- * to be handled now by the host (0x500 for external interrupt), or zero.
- * Modifies all volatile registers (since it may call a C function).
- * This routine calls kvmppc_read_intr, a C function, if an external
- * interrupt is pending.
- */
-kvmppc_check_wake_reason:
-	mfspr	r6, SPRN_SRR1
-BEGIN_FTR_SECTION
-	rlwinm	r6, r6, 45-31, 0xf	/* extract wake reason field (P8) */
-FTR_SECTION_ELSE
-	rlwinm	r6, r6, 45-31, 0xe	/* P7 wake reason field is 3 bits */
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
-	cmpwi	r6, 8			/* was it an external interrupt? */
-	beq	7f			/* if so, see what it was */
-	li	r3, 0
-	li	r12, 0
-	cmpwi	r6, 6			/* was it the decrementer? */
-	beq	0f
-BEGIN_FTR_SECTION
-	cmpwi	r6, 5			/* privileged doorbell? */
-	beq	0f
-	cmpwi	r6, 3			/* hypervisor doorbell? */
-	beq	3f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-	cmpwi	r6, 0xa			/* Hypervisor maintenance ? */
-	beq	4f
-	li	r3, 1			/* anything else, return 1 */
-0:	blr
-
-	/* hypervisor doorbell */
-3:	li	r12, BOOK3S_INTERRUPT_H_DOORBELL
-
-	/*
-	 * Clear the doorbell as we will invoke the handler
-	 * explicitly in the guest exit path.
-	 */
-	lis	r6, (PPC_DBELL_SERVER << (63-36))@h
-	PPC_MSGCLR(6)
-	/* see if it's a host IPI */
-	li	r3, 1
-BEGIN_FTR_SECTION
-	PPC_MSGSYNC
-	lwsync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	lbz	r0, HSTATE_HOST_IPI(r13)
-	cmpwi	r0, 0
-	bnelr
-	/* if not, return -1 */
-	li	r3, -1
-	blr
-
-	/* Woken up due to Hypervisor maintenance interrupt */
-4:	li	r12, BOOK3S_INTERRUPT_HMI
-	li	r3, 1
-	blr
-
-	/* external interrupt - create a stack frame so we can call C */
-7:	mflr	r0
-	std	r0, PPC_LR_STKOFF(r1)
-	stdu	r1, -PPC_MIN_STKFRM(r1)
-	bl	kvmppc_read_intr
-	nop
-	li	r12, BOOK3S_INTERRUPT_EXTERNAL
-	cmpdi	r3, 1
-	ble	1f
-
-	/*
-	 * Return code of 2 means PCI passthrough interrupt, but
-	 * we need to return back to host to complete handling the
-	 * interrupt. Trap reason is expected in r12 by guest
-	 * exit code.
-	 */
-	li	r12, BOOK3S_INTERRUPT_HV_RM_HARD
-1:
-	ld	r0, PPC_MIN_STKFRM+PPC_LR_STKOFF(r1)
-	addi	r1, r1, PPC_MIN_STKFRM
-	mtlr	r0
-	blr
-
-/*
- * Save away FP, VMX and VSX registers.
- * r3 = vcpu pointer
- * N.B. r30 and r31 are volatile across this function,
- * thus it is not callable from C.
- */
-kvmppc_save_fp:
-	mflr	r30
-	mr	r31,r3
-	mfmsr	r5
-	ori	r8,r5,MSR_FP
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
-	oris	r8,r8,MSR_VEC@h
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
-	oris	r8,r8,MSR_VSX@h
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
-	mtmsrd	r8
-	addi	r3,r3,VCPU_FPRS
-	bl	store_fp_state
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
-	addi	r3,r31,VCPU_VRS
-	bl	store_vr_state
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif
-	mfspr	r6,SPRN_VRSAVE
-	stw	r6,VCPU_VRSAVE(r31)
-	mtlr	r30
-	blr
-
-/*
- * Load up FP, VMX and VSX registers
- * r4 = vcpu pointer
- * N.B. r30 and r31 are volatile across this function,
- * thus it is not callable from C.
- */
-kvmppc_load_fp:
-	mflr	r30
-	mr	r31,r4
-	mfmsr	r9
-	ori	r8,r9,MSR_FP
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
-	oris	r8,r8,MSR_VEC@h
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
-	oris	r8,r8,MSR_VSX@h
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
-	mtmsrd	r8
-	addi	r3,r4,VCPU_FPRS
-	bl	load_fp_state
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
-	addi	r3,r31,VCPU_VRS
-	bl	load_vr_state
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif
-	lwz	r7,VCPU_VRSAVE(r31)
-	mtspr	SPRN_VRSAVE,r7
-	mtlr	r30
-	mr	r4,r31
-	blr
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Save transactional state and TM-related registers.
- * Called with r3 pointing to the vcpu struct and r4 containing
- * the guest MSR value.
- * r5 is non-zero iff non-volatile register state needs to be maintained.
- * If r5 == 0, this can modify all checkpointed registers, but
- * restores r1 and r2 before exit.
- */
-_GLOBAL_TOC(kvmppc_save_tm_hv)
-EXPORT_SYMBOL_GPL(kvmppc_save_tm_hv)
-	/* See if we need to handle fake suspend mode */
-BEGIN_FTR_SECTION
-	b	__kvmppc_save_tm
-END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
-
-	lbz	r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */
-	cmpwi	r0, 0
-	beq	__kvmppc_save_tm
-
-	/* The following code handles the fake_suspend = 1 case */
-	mflr	r0
-	std	r0, PPC_LR_STKOFF(r1)
-	stdu	r1, -PPC_MIN_STKFRM(r1)
-
-	/* Turn on TM. */
-	mfmsr	r8
-	li	r0, 1
-	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
-	mtmsrd	r8
-
-	rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */
-	beq	4f
-BEGIN_FTR_SECTION
-	bl	pnv_power9_force_smt4_catch
-END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
-	nop
-
-	/* We have to treclaim here because that's the only way to do S->N */
-	li	r3, TM_CAUSE_KVM_RESCHED
-	TRECLAIM(R3)
-
-	/*
-	 * We were in fake suspend, so we are not going to save the
-	 * register state as the guest checkpointed state (since
-	 * we already have it), therefore we can now use any volatile GPR.
-	 * In fact treclaim in fake suspend state doesn't modify
-	 * any registers.
-	 */
-
-BEGIN_FTR_SECTION
-	bl	pnv_power9_force_smt4_release
-END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
-	nop
-
-4:
-	mfspr	r3, SPRN_PSSCR
-	/* PSSCR_FAKE_SUSPEND is a write-only bit, but clear it anyway */
-	li	r0, PSSCR_FAKE_SUSPEND
-	andc	r3, r3, r0
-	mtspr	SPRN_PSSCR, r3
-
-	/* Don't save TEXASR, use value from last exit in real suspend state */
-	ld	r9, HSTATE_KVM_VCPU(r13)
-	mfspr	r5, SPRN_TFHAR
-	mfspr	r6, SPRN_TFIAR
-	std	r5, VCPU_TFHAR(r9)
-	std	r6, VCPU_TFIAR(r9)
-
-	addi	r1, r1, PPC_MIN_STKFRM
-	ld	r0, PPC_LR_STKOFF(r1)
-	mtlr	r0
-	blr
-
-/*
- * Restore transactional state and TM-related registers.
- * Called with r3 pointing to the vcpu struct
- * and r4 containing the guest MSR value.
- * r5 is non-zero iff non-volatile register state needs to be maintained.
- * This potentially modifies all checkpointed registers.
- * It restores r1 and r2 from the PACA.
- */
-_GLOBAL_TOC(kvmppc_restore_tm_hv)
-EXPORT_SYMBOL_GPL(kvmppc_restore_tm_hv)
-	/*
-	 * If we are doing TM emulation for the guest on a POWER9 DD2,
-	 * then we don't actually do a trechkpt -- we either set up
-	 * fake-suspend mode, or emulate a TM rollback.
-	 */
-BEGIN_FTR_SECTION
-	b	__kvmppc_restore_tm
-END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
-	mflr	r0
-	std	r0, PPC_LR_STKOFF(r1)
-
-	li	r0, 0
-	stb	r0, HSTATE_FAKE_SUSPEND(r13)
-
-	/* Turn on TM so we can restore TM SPRs */
-	mfmsr	r5
-	li	r0, 1
-	rldimi	r5, r0, MSR_TM_LG, 63-MSR_TM_LG
-	mtmsrd	r5
-
-	/*
-	 * The user may change these outside of a transaction, so they must
-	 * always be context switched.
-	 */
-	ld	r5, VCPU_TFHAR(r3)
-	ld	r6, VCPU_TFIAR(r3)
-	ld	r7, VCPU_TEXASR(r3)
-	mtspr	SPRN_TFHAR, r5
-	mtspr	SPRN_TFIAR, r6
-	mtspr	SPRN_TEXASR, r7
-
-	rldicl. r5, r4, 64 - MSR_TS_S_LG, 62
-	beqlr		/* TM not active in guest */
-
-	/* Make sure the failure summary is set */
-	oris	r7, r7, (TEXASR_FS)@h
-	mtspr	SPRN_TEXASR, r7
-
-	cmpwi	r5, 1		/* check for suspended state */
-	bgt	10f
-	stb	r5, HSTATE_FAKE_SUSPEND(r13)
-	b	9f		/* and return */
-10:	stdu	r1, -PPC_MIN_STKFRM(r1)
-	/* guest is in transactional state, so simulate rollback */
-	bl	kvmhv_emulate_tm_rollback
-	nop
-	addi	r1, r1, PPC_MIN_STKFRM
-9:	ld	r0, PPC_LR_STKOFF(r1)
-	mtlr	r0
-	blr
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
-/*
- * We come here if we get any exception or interrupt while we are
- * executing host real mode code while in guest MMU context.
- * r12 is (CR << 32) | vector
- * r13 points to our PACA
- * r12 is saved in HSTATE_SCRATCH0(r13)
- * ctr is saved in HSTATE_SCRATCH1(r13) if RELOCATABLE
- * r9 is saved in HSTATE_SCRATCH2(r13)
- * r13 is saved in HSPRG1
- * cfar is saved in HSTATE_CFAR(r13)
- * ppr is saved in HSTATE_PPR(r13)
- */
-kvmppc_bad_host_intr:
-	/*
-	 * Switch to the emergency stack, but start half-way down in
-	 * case we were already on it.
-	 */
-	mr	r9, r1
-	std	r1, PACAR1(r13)
-	ld	r1, PACAEMERGSP(r13)
-	subi	r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE
-	std	r9, 0(r1)
-	std	r0, GPR0(r1)
-	std	r9, GPR1(r1)
-	std	r2, GPR2(r1)
-	SAVE_4GPRS(3, r1)
-	SAVE_2GPRS(7, r1)
-	srdi	r0, r12, 32
-	clrldi	r12, r12, 32
-	std	r0, _CCR(r1)
-	std	r12, _TRAP(r1)
-	andi.	r0, r12, 2
-	beq	1f
-	mfspr	r3, SPRN_HSRR0
-	mfspr	r4, SPRN_HSRR1
-	mfspr	r5, SPRN_HDAR
-	mfspr	r6, SPRN_HDSISR
-	b	2f
-1:	mfspr	r3, SPRN_SRR0
-	mfspr	r4, SPRN_SRR1
-	mfspr	r5, SPRN_DAR
-	mfspr	r6, SPRN_DSISR
-2:	std	r3, _NIP(r1)
-	std	r4, _MSR(r1)
-	std	r5, _DAR(r1)
-	std	r6, _DSISR(r1)
-	ld	r9, HSTATE_SCRATCH2(r13)
-	ld	r12, HSTATE_SCRATCH0(r13)
-	GET_SCRATCH0(r0)
-	SAVE_4GPRS(9, r1)
-	std	r0, GPR13(r1)
-	SAVE_NVGPRS(r1)
-	ld	r5, HSTATE_CFAR(r13)
-	std	r5, ORIG_GPR3(r1)
-	mflr	r3
-#ifdef CONFIG_RELOCATABLE
-	ld	r4, HSTATE_SCRATCH1(r13)
-#else
-	mfctr	r4
-#endif
-	mfxer	r5
-	lbz	r6, PACAIRQSOFTMASK(r13)
-	std	r3, _LINK(r1)
-	std	r4, _CTR(r1)
-	std	r5, _XER(r1)
-	std	r6, SOFTE(r1)
-	ld	r2, PACATOC(r13)
-	LOAD_REG_IMMEDIATE(3, 0x7265677368657265)
-	std	r3, STACK_FRAME_OVERHEAD-16(r1)
-
-	/*
-	 * On POWER9 do a minimal restore of the MMU and call C code,
-	 * which will print a message and panic.
-	 * XXX On POWER7 and POWER8, we just spin here since we don't
-	 * know what the other threads are doing (and we don't want to
-	 * coordinate with them) - but at least we now have register state
-	 * in memory that we might be able to look at from another CPU.
-	 */
-BEGIN_FTR_SECTION
-	b	.
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-	ld	r9, HSTATE_KVM_VCPU(r13)
-	ld	r10, VCPU_KVM(r9)
-
-	li	r0, 0
-	mtspr	SPRN_AMR, r0
-	mtspr	SPRN_IAMR, r0
-	mtspr	SPRN_CIABR, r0
-	mtspr	SPRN_DAWRX, r0
-
-BEGIN_MMU_FTR_SECTION
-	b	4f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
-
-	slbmte	r0, r0
-	slbia
-	ptesync
-	ld	r8, PACA_SLBSHADOWPTR(r13)
-	.rept	SLB_NUM_BOLTED
-	li	r3, SLBSHADOW_SAVEAREA
-	LDX_BE	r5, r8, r3
-	addi	r3, r3, 8
-	LDX_BE	r6, r8, r3
-	andis.	r7, r5, SLB_ESID_V@h
-	beq	3f
-	slbmte	r6, r5
-3:	addi	r8, r8, 16
-	.endr
-
-4:	lwz	r7, KVM_HOST_LPID(r10)
-	mtspr	SPRN_LPID, r7
-	mtspr	SPRN_PID, r0
-	ld	r8, KVM_HOST_LPCR(r10)
-	mtspr	SPRN_LPCR, r8
-	isync
-	li	r0, KVM_GUEST_MODE_NONE
-	stb	r0, HSTATE_IN_GUEST(r13)
-
-	/*
-	 * Turn on the MMU and jump to C code
-	 */
-	bcl	20, 31, .+4
-5:	mflr	r3
-	addi	r3, r3, 9f - 5b
-	li	r4, -1
-	rldimi	r3, r4, 62, 0	/* ensure 0xc000000000000000 bits are set */
-	ld	r4, PACAKMSR(r13)
-	mtspr	SPRN_SRR0, r3
-	mtspr	SPRN_SRR1, r4
-	RFI_TO_KERNEL
-9:	addi	r3, r1, STACK_FRAME_OVERHEAD
-	bl	kvmppc_bad_interrupt
-	b	9b
-
-/*
- * This mimics the MSR transition on IRQ delivery.  The new guest MSR is taken
- * from VCPU_INTR_MSR and is modified based on the required TM state changes.
- *   r11 has the guest MSR value (in/out)
- *   r9 has a vcpu pointer (in)
- *   r0 is used as a scratch register
- */
-kvmppc_msr_interrupt:
-	rldicl	r0, r11, 64 - MSR_TS_S_LG, 62
-	cmpwi	r0, 2 /* Check if we are in transactional state..  */
-	ld	r11, VCPU_INTR_MSR(r9)
-	bne	1f
-	/* ... if transactional, change to suspended */
-	li	r0, 1
-1:	rldimi	r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
-	blr
-
-/*
- * Load up guest PMU state.  R3 points to the vcpu struct.
- */
-_GLOBAL(kvmhv_load_guest_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu)
-	mr	r4, r3
-	mflr	r0
-	li	r3, 1
-	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
-	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
-	isync
-BEGIN_FTR_SECTION
-	ld	r3, VCPU_MMCR(r4)
-	andi.	r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
-	cmpwi	r5, MMCR0_PMAO
-	beql	kvmppc_fix_pmao
-END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
-	lwz	r3, VCPU_PMC(r4)	/* always load up guest PMU registers */
-	lwz	r5, VCPU_PMC + 4(r4)	/* to prevent information leak */
-	lwz	r6, VCPU_PMC + 8(r4)
-	lwz	r7, VCPU_PMC + 12(r4)
-	lwz	r8, VCPU_PMC + 16(r4)
-	lwz	r9, VCPU_PMC + 20(r4)
-	mtspr	SPRN_PMC1, r3
-	mtspr	SPRN_PMC2, r5
-	mtspr	SPRN_PMC3, r6
-	mtspr	SPRN_PMC4, r7
-	mtspr	SPRN_PMC5, r8
-	mtspr	SPRN_PMC6, r9
-	ld	r3, VCPU_MMCR(r4)
-	ld	r5, VCPU_MMCR + 8(r4)
-	ld	r6, VCPU_MMCR + 16(r4)
-	ld	r7, VCPU_SIAR(r4)
-	ld	r8, VCPU_SDAR(r4)
-	mtspr	SPRN_MMCR1, r5
-	mtspr	SPRN_MMCRA, r6
-	mtspr	SPRN_SIAR, r7
-	mtspr	SPRN_SDAR, r8
-BEGIN_FTR_SECTION
-	ld	r5, VCPU_MMCR + 24(r4)
-	ld	r6, VCPU_SIER(r4)
-	mtspr	SPRN_MMCR2, r5
-	mtspr	SPRN_SIER, r6
-BEGIN_FTR_SECTION_NESTED(96)
-	lwz	r7, VCPU_PMC + 24(r4)
-	lwz	r8, VCPU_PMC + 28(r4)
-	ld	r9, VCPU_MMCR + 32(r4)
-	mtspr	SPRN_SPMC1, r7
-	mtspr	SPRN_SPMC2, r8
-	mtspr	SPRN_MMCRS, r9
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-	mtspr	SPRN_MMCR0, r3
-	isync
-	mtlr	r0
-	blr
-
-/*
- * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
- */
-_GLOBAL(kvmhv_load_host_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu)
-	mflr	r0
-	lbz	r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
-	cmpwi	r4, 0
-	beq	23f			/* skip if not */
-BEGIN_FTR_SECTION
-	ld	r3, HSTATE_MMCR0(r13)
-	andi.	r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
-	cmpwi	r4, MMCR0_PMAO
-	beql	kvmppc_fix_pmao
-END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
-	lwz	r3, HSTATE_PMC1(r13)
-	lwz	r4, HSTATE_PMC2(r13)
-	lwz	r5, HSTATE_PMC3(r13)
-	lwz	r6, HSTATE_PMC4(r13)
-	lwz	r8, HSTATE_PMC5(r13)
-	lwz	r9, HSTATE_PMC6(r13)
-	mtspr	SPRN_PMC1, r3
-	mtspr	SPRN_PMC2, r4
-	mtspr	SPRN_PMC3, r5
-	mtspr	SPRN_PMC4, r6
-	mtspr	SPRN_PMC5, r8
-	mtspr	SPRN_PMC6, r9
-	ld	r3, HSTATE_MMCR0(r13)
-	ld	r4, HSTATE_MMCR1(r13)
-	ld	r5, HSTATE_MMCRA(r13)
-	ld	r6, HSTATE_SIAR(r13)
-	ld	r7, HSTATE_SDAR(r13)
-	mtspr	SPRN_MMCR1, r4
-	mtspr	SPRN_MMCRA, r5
-	mtspr	SPRN_SIAR, r6
-	mtspr	SPRN_SDAR, r7
-BEGIN_FTR_SECTION
-	ld	r8, HSTATE_MMCR2(r13)
-	ld	r9, HSTATE_SIER(r13)
-	mtspr	SPRN_MMCR2, r8
-	mtspr	SPRN_SIER, r9
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-	mtspr	SPRN_MMCR0, r3
-	isync
-	mtlr	r0
-23:	blr
-
-/*
- * Save guest PMU state into the vcpu struct.
- * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
- */
-_GLOBAL(kvmhv_save_guest_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu)
-	mr	r9, r3
-	mr	r8, r4
-BEGIN_FTR_SECTION
-	/*
-	 * POWER8 seems to have a hardware bug where setting
-	 * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
-	 * when some counters are already negative doesn't seem
-	 * to cause a performance monitor alert (and hence interrupt).
-	 * The effect of this is that when saving the PMU state,
-	 * if there is no PMU alert pending when we read MMCR0
-	 * before freezing the counters, but one becomes pending
-	 * before we read the counters, we lose it.
-	 * To work around this, we need a way to freeze the counters
-	 * before reading MMCR0.  Normally, freezing the counters
-	 * is done by writing MMCR0 (to set MMCR0[FC]) which
-	 * unavoidably writes MMCR0[PMA0] as well.  On POWER8,
-	 * we can also freeze the counters using MMCR2, by writing
-	 * 1s to all the counter freeze condition bits (there are
-	 * 9 bits each for 6 counters).
-	 */
-	li	r3, -1			/* set all freeze bits */
-	clrrdi	r3, r3, 10
-	mfspr	r10, SPRN_MMCR2
-	mtspr	SPRN_MMCR2, r3
-	isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-	li	r3, 1
-	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
-	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */
-	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
-	mfspr	r6, SPRN_MMCRA
-	/* Clear MMCRA in order to disable SDAR updates */
-	li	r7, 0
-	mtspr	SPRN_MMCRA, r7
-	isync
-	cmpwi	r8, 0			/* did they ask for PMU stuff to be saved? */
-	bne	21f
-	std	r3, VCPU_MMCR(r9)	/* if not, set saved MMCR0 to FC */
-	b	22f
-21:	mfspr	r5, SPRN_MMCR1
-	mfspr	r7, SPRN_SIAR
-	mfspr	r8, SPRN_SDAR
-	std	r4, VCPU_MMCR(r9)
-	std	r5, VCPU_MMCR + 8(r9)
-	std	r6, VCPU_MMCR + 16(r9)
-BEGIN_FTR_SECTION
-	std	r10, VCPU_MMCR + 24(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-	std	r7, VCPU_SIAR(r9)
-	std	r8, VCPU_SDAR(r9)
-	mfspr	r3, SPRN_PMC1
-	mfspr	r4, SPRN_PMC2
-	mfspr	r5, SPRN_PMC3
-	mfspr	r6, SPRN_PMC4
-	mfspr	r7, SPRN_PMC5
-	mfspr	r8, SPRN_PMC6
-	stw	r3, VCPU_PMC(r9)
-	stw	r4, VCPU_PMC + 4(r9)
-	stw	r5, VCPU_PMC + 8(r9)
-	stw	r6, VCPU_PMC + 12(r9)
-	stw	r7, VCPU_PMC + 16(r9)
-	stw	r8, VCPU_PMC + 20(r9)
-BEGIN_FTR_SECTION
-	mfspr	r5, SPRN_SIER
-	std	r5, VCPU_SIER(r9)
-BEGIN_FTR_SECTION_NESTED(96)
-	mfspr	r6, SPRN_SPMC1
-	mfspr	r7, SPRN_SPMC2
-	mfspr	r8, SPRN_MMCRS
-	stw	r6, VCPU_PMC + 24(r9)
-	stw	r7, VCPU_PMC + 28(r9)
-	std	r8, VCPU_MMCR + 32(r9)
-	lis	r4, 0x8000
-	mtspr	SPRN_MMCRS, r4
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-22:	blr
-
-/*
- * This works around a hardware bug on POWER8E processors, where
- * writing a 1 to the MMCR0[PMAO] bit doesn't generate a
- * performance monitor interrupt.  Instead, when we need to have
- * an interrupt pending, we have to arrange for a counter to overflow.
- */
-kvmppc_fix_pmao:
-	li	r3, 0
-	mtspr	SPRN_MMCR2, r3
-	lis	r3, (MMCR0_PMXE | MMCR0_FCECE)@h
-	ori	r3, r3, MMCR0_PMCjCE | MMCR0_C56RUN
-	mtspr	SPRN_MMCR0, r3
-	lis	r3, 0x7fff
-	ori	r3, r3, 0xffff
-	mtspr	SPRN_PMC6, r3
-	isync
-	blr
-
-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
-/*
- * Start timing an activity
- * r3 = pointer to time accumulation struct, r4 = vcpu
- */
-kvmhv_start_timing:
-	ld	r5, HSTATE_KVM_VCORE(r13)
-	ld	r6, VCORE_TB_OFFSET_APPL(r5)
-	mftb	r5
-	subf	r5, r6, r5	/* subtract current timebase offset */
-	std	r3, VCPU_CUR_ACTIVITY(r4)
-	std	r5, VCPU_ACTIVITY_START(r4)
-	blr
-
-/*
- * Accumulate time to one activity and start another.
- * r3 = pointer to new time accumulation struct, r4 = vcpu
- */
-kvmhv_accumulate_time:
-	ld	r5, HSTATE_KVM_VCORE(r13)
-	ld	r8, VCORE_TB_OFFSET_APPL(r5)
-	ld	r5, VCPU_CUR_ACTIVITY(r4)
-	ld	r6, VCPU_ACTIVITY_START(r4)
-	std	r3, VCPU_CUR_ACTIVITY(r4)
-	mftb	r7
-	subf	r7, r8, r7	/* subtract current timebase offset */
-	std	r7, VCPU_ACTIVITY_START(r4)
-	cmpdi	r5, 0
-	beqlr
-	subf	r3, r6, r7
-	ld	r8, TAS_SEQCOUNT(r5)
-	cmpdi	r8, 0
-	addi	r8, r8, 1
-	std	r8, TAS_SEQCOUNT(r5)
-	lwsync
-	ld	r7, TAS_TOTAL(r5)
-	add	r7, r7, r3
-	std	r7, TAS_TOTAL(r5)
-	ld	r6, TAS_MIN(r5)
-	ld	r7, TAS_MAX(r5)
-	beq	3f
-	cmpd	r3, r6
-	bge	1f
-3:	std	r3, TAS_MIN(r5)
-1:	cmpd	r3, r7
-	ble	2f
-	std	r3, TAS_MAX(r5)
-2:	lwsync
-	addi	r8, r8, 1
-	std	r8, TAS_SEQCOUNT(r5)
-	blr
-#endif
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
deleted file mode 100644
index f7ad99d972ce344148fe4c7e05873b1f72aa93aa..0000000000000000000000000000000000000000
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ /dev/null
@@ -1,241 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- * Copyright SUSE Linux Products GmbH 2009
- *
- * Authors: Alexander Graf <agraf@suse.de>
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/kvm_asm.h>
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/asm-offsets.h>
-#include <asm/exception-64s.h>
-#include <asm/asm-compat.h>
-
-#if defined(CONFIG_PPC_BOOK3S_64)
-#ifdef PPC64_ELF_ABI_v2
-#define FUNC(name) 		name
-#else
-#define FUNC(name) 		GLUE(.,name)
-#endif
-#define GET_SHADOW_VCPU(reg)    addi	reg, r13, PACA_SVCPU
-
-#elif defined(CONFIG_PPC_BOOK3S_32)
-#define FUNC(name)		name
-#define GET_SHADOW_VCPU(reg)	lwz     reg, (THREAD + THREAD_KVM_SVCPU)(r2)
-
-#endif /* CONFIG_PPC_BOOK3S_XX */
-
-#define VCPU_LOAD_NVGPRS(vcpu) \
-	PPC_LL	r14, VCPU_GPR(R14)(vcpu); \
-	PPC_LL	r15, VCPU_GPR(R15)(vcpu); \
-	PPC_LL	r16, VCPU_GPR(R16)(vcpu); \
-	PPC_LL	r17, VCPU_GPR(R17)(vcpu); \
-	PPC_LL	r18, VCPU_GPR(R18)(vcpu); \
-	PPC_LL	r19, VCPU_GPR(R19)(vcpu); \
-	PPC_LL	r20, VCPU_GPR(R20)(vcpu); \
-	PPC_LL	r21, VCPU_GPR(R21)(vcpu); \
-	PPC_LL	r22, VCPU_GPR(R22)(vcpu); \
-	PPC_LL	r23, VCPU_GPR(R23)(vcpu); \
-	PPC_LL	r24, VCPU_GPR(R24)(vcpu); \
-	PPC_LL	r25, VCPU_GPR(R25)(vcpu); \
-	PPC_LL	r26, VCPU_GPR(R26)(vcpu); \
-	PPC_LL	r27, VCPU_GPR(R27)(vcpu); \
-	PPC_LL	r28, VCPU_GPR(R28)(vcpu); \
-	PPC_LL	r29, VCPU_GPR(R29)(vcpu); \
-	PPC_LL	r30, VCPU_GPR(R30)(vcpu); \
-	PPC_LL	r31, VCPU_GPR(R31)(vcpu); \
-
-/*****************************************************************************
- *                                                                           *
- *     Guest entry / exit code that is in kernel module memory (highmem)     *
- *                                                                           *
- ****************************************************************************/
-
-/* Registers:
- *  r3: kvm_run pointer
- *  r4: vcpu pointer
- */
-_GLOBAL(__kvmppc_vcpu_run)
-
-kvm_start_entry:
-	/* Write correct stack frame */
-	mflr	r0
-	PPC_STL	r0,PPC_LR_STKOFF(r1)
-
-	/* Save host state to the stack */
-	PPC_STLU r1, -SWITCH_FRAME_SIZE(r1)
-
-	/* Save r3 (kvm_run) and r4 (vcpu) */
-	SAVE_2GPRS(3, r1)
-
-	/* Save non-volatile registers (r14 - r31) */
-	SAVE_NVGPRS(r1)
-
-	/* Save CR */
-	mfcr	r14
-	stw	r14, _CCR(r1)
-
-	/* Save LR */
-	PPC_STL	r0, _LINK(r1)
-
-	/* Load non-volatile guest state from the vcpu */
-	VCPU_LOAD_NVGPRS(r4)
-
-kvm_start_lightweight:
-	/* Copy registers into shadow vcpu so we can access them in real mode */
-	mr	r3, r4
-	bl	FUNC(kvmppc_copy_to_svcpu)
-	nop
-	REST_GPR(4, r1)
-
-#ifdef CONFIG_PPC_BOOK3S_64
-	/* Get the dcbz32 flag */
-	PPC_LL	r3, VCPU_HFLAGS(r4)
-	rldicl	r3, r3, 0, 63		/* r3 &= 1 */
-	stb	r3, HSTATE_RESTORE_HID5(r13)
-
-	/* Load up guest SPRG3 value, since it's user readable */
-	lwz	r3, VCPU_SHAREDBE(r4)
-	cmpwi	r3, 0
-	ld	r5, VCPU_SHARED(r4)
-	beq	sprg3_little_endian
-sprg3_big_endian:
-#ifdef __BIG_ENDIAN__
-	ld	r3, VCPU_SHARED_SPRG3(r5)
-#else
-	addi	r5, r5, VCPU_SHARED_SPRG3
-	ldbrx	r3, 0, r5
-#endif
-	b	after_sprg3_load
-sprg3_little_endian:
-#ifdef __LITTLE_ENDIAN__
-	ld	r3, VCPU_SHARED_SPRG3(r5)
-#else
-	addi	r5, r5, VCPU_SHARED_SPRG3
-	ldbrx	r3, 0, r5
-#endif
-
-after_sprg3_load:
-	mtspr	SPRN_SPRG3, r3
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-	PPC_LL	r4, VCPU_SHADOW_MSR(r4)	/* get shadow_msr */
-
-	/* Jump to segment patching handler and into our guest */
-	bl	FUNC(kvmppc_entry_trampoline)
-	nop
-
-/*
- * This is the handler in module memory. It gets jumped at from the
- * lowmem trampoline code, so it's basically the guest exit code.
- *
- */
-
-	/*
-	 * Register usage at this point:
-	 *
-	 * R1       = host R1
-	 * R2       = host R2
-	 * R12      = exit handler id
-	 * R13      = PACA
-	 * SVCPU.*  = guest *
-	 * MSR.EE   = 1
-	 *
-	 */
-
-	PPC_LL	r3, GPR4(r1)		/* vcpu pointer */
-
-	/*
-	 * kvmppc_copy_from_svcpu can clobber volatile registers, save
-	 * the exit handler id to the vcpu and restore it from there later.
-	 */
-	stw	r12, VCPU_TRAP(r3)
-
-	/* Transfer reg values from shadow vcpu back to vcpu struct */
-
-	bl	FUNC(kvmppc_copy_from_svcpu)
-	nop
-
-#ifdef CONFIG_PPC_BOOK3S_64
-	/*
-	 * Reload kernel SPRG3 value.
-	 * No need to save guest value as usermode can't modify SPRG3.
-	 */
-	ld	r3, PACA_SPRG_VDSO(r13)
-	mtspr	SPRN_SPRG_VDSO_WRITE, r3
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-	/* R7 = vcpu */
-	PPC_LL	r7, GPR4(r1)
-
-	PPC_STL	r14, VCPU_GPR(R14)(r7)
-	PPC_STL	r15, VCPU_GPR(R15)(r7)
-	PPC_STL	r16, VCPU_GPR(R16)(r7)
-	PPC_STL	r17, VCPU_GPR(R17)(r7)
-	PPC_STL	r18, VCPU_GPR(R18)(r7)
-	PPC_STL	r19, VCPU_GPR(R19)(r7)
-	PPC_STL	r20, VCPU_GPR(R20)(r7)
-	PPC_STL	r21, VCPU_GPR(R21)(r7)
-	PPC_STL	r22, VCPU_GPR(R22)(r7)
-	PPC_STL	r23, VCPU_GPR(R23)(r7)
-	PPC_STL	r24, VCPU_GPR(R24)(r7)
-	PPC_STL	r25, VCPU_GPR(R25)(r7)
-	PPC_STL	r26, VCPU_GPR(R26)(r7)
-	PPC_STL	r27, VCPU_GPR(R27)(r7)
-	PPC_STL	r28, VCPU_GPR(R28)(r7)
-	PPC_STL	r29, VCPU_GPR(R29)(r7)
-	PPC_STL	r30, VCPU_GPR(R30)(r7)
-	PPC_STL	r31, VCPU_GPR(R31)(r7)
-
-	/* Pass the exit number as 3rd argument to kvmppc_handle_exit */
-	lwz	r5, VCPU_TRAP(r7)
-
-	/* Restore r3 (kvm_run) and r4 (vcpu) */
-	REST_2GPRS(3, r1)
-	bl	FUNC(kvmppc_handle_exit_pr)
-
-	/* If RESUME_GUEST, get back in the loop */
-	cmpwi	r3, RESUME_GUEST
-	beq	kvm_loop_lightweight
-
-	cmpwi	r3, RESUME_GUEST_NV
-	beq	kvm_loop_heavyweight
-
-kvm_exit_loop:
-
-	PPC_LL	r4, _LINK(r1)
-	mtlr	r4
-
-	lwz	r14, _CCR(r1)
-	mtcr	r14
-
-	/* Restore non-volatile host registers (r14 - r31) */
-	REST_NVGPRS(r1)
-
-	addi    r1, r1, SWITCH_FRAME_SIZE
-	blr
-
-kvm_loop_heavyweight:
-
-	PPC_LL	r4, _LINK(r1)
-	PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1)
-
-	/* Load vcpu and cpu_run */
-	REST_2GPRS(3, r1)
-
-	/* Load non-volatile guest state from the vcpu */
-	VCPU_LOAD_NVGPRS(r4)
-
-	/* Jump back into the beginning of this function */
-	b	kvm_start_lightweight
-
-kvm_loop_lightweight:
-
-	/* We'll need the vcpu pointer */
-	REST_GPR(4, r1)
-
-	/* Jump back into the beginning of this function */
-	b	kvm_start_lightweight
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
deleted file mode 100644
index 3dc129a254b5e67da137f063adf0b6612d19cd6b..0000000000000000000000000000000000000000
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ /dev/null
@@ -1,162 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- * Copyright SUSE Linux Products GmbH 2009
- *
- * Authors: Alexander Graf <agraf@suse.de>
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/kvm_asm.h>
-#include <asm/reg.h>
-#include <asm/mmu.h>
-#include <asm/page.h>
-#include <asm/asm-offsets.h>
-#include <asm/asm-compat.h>
-
-#ifdef CONFIG_PPC_BOOK3S_64
-#include <asm/exception-64s.h>
-#endif
-
-/*****************************************************************************
- *                                                                           *
- *        Real Mode handlers that need to be in low physical memory          *
- *                                                                           *
- ****************************************************************************/
-
-#if defined(CONFIG_PPC_BOOK3S_64)
-
-#ifdef PPC64_ELF_ABI_v2
-#define FUNC(name) 		name
-#else
-#define FUNC(name) 		GLUE(.,name)
-#endif
-
-#elif defined(CONFIG_PPC_BOOK3S_32)
-
-#define FUNC(name)		name
-
-#define RFI_TO_KERNEL	RFI
-#define RFI_TO_GUEST	RFI
-
-.macro INTERRUPT_TRAMPOLINE intno
-
-.global kvmppc_trampoline_\intno
-kvmppc_trampoline_\intno:
-
-	mtspr	SPRN_SPRG_SCRATCH0, r13		/* Save r13 */
-
-	/*
-	 * First thing to do is to find out if we're coming
-	 * from a KVM guest or a Linux process.
-	 *
-	 * To distinguish, we check a magic byte in the PACA/current
-	 */
-	mfspr	r13, SPRN_SPRG_THREAD
-	lwz	r13, THREAD_KVM_SVCPU(r13)
-	/* PPC32 can have a NULL pointer - let's check for that */
-	mtspr   SPRN_SPRG_SCRATCH1, r12		/* Save r12 */
-	mfcr	r12
-	cmpwi	r13, 0
-	bne	1f
-2:	mtcr	r12
-	mfspr	r12, SPRN_SPRG_SCRATCH1
-	mfspr	r13, SPRN_SPRG_SCRATCH0		/* r13 = original r13 */
-	b	kvmppc_resume_\intno		/* Get back original handler */
-
-1:	tophys(r13, r13)
-	stw	r12, HSTATE_SCRATCH1(r13)
-	mfspr	r12, SPRN_SPRG_SCRATCH1
-	stw	r12, HSTATE_SCRATCH0(r13)
-	lbz	r12, HSTATE_IN_GUEST(r13)
-	cmpwi	r12, KVM_GUEST_MODE_NONE
-	bne	..kvmppc_handler_hasmagic_\intno
-	/* No KVM guest? Then jump back to the Linux handler! */
-	lwz	r12, HSTATE_SCRATCH1(r13)
-	b	2b
-
-	/* Now we know we're handling a KVM guest */
-..kvmppc_handler_hasmagic_\intno:
-
-	/* Should we just skip the faulting instruction? */
-	cmpwi	r12, KVM_GUEST_MODE_SKIP
-	beq	kvmppc_handler_skip_ins
-
-	/* Let's store which interrupt we're handling */
-	li	r12, \intno
-
-	/* Jump into the SLB exit code that goes to the highmem handler */
-	b	kvmppc_handler_trampoline_exit
-
-.endm
-
-INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_SYSTEM_RESET
-INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_MACHINE_CHECK
-INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_DATA_STORAGE
-INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_INST_STORAGE
-INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_EXTERNAL
-INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_ALIGNMENT
-INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_PROGRAM
-INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_FP_UNAVAIL
-INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_DECREMENTER
-INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_SYSCALL
-INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_TRACE
-INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_PERFMON
-INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_ALTIVEC
-
-/*
- * Bring us back to the faulting code, but skip the
- * faulting instruction.
- *
- * This is a generic exit path from the interrupt
- * trampolines above.
- *
- * Input Registers:
- *
- * R12            = free
- * R13            = Shadow VCPU (PACA)
- * HSTATE.SCRATCH0 = guest R12
- * HSTATE.SCRATCH1 = guest CR
- * SPRG_SCRATCH0  = guest R13
- *
- */
-kvmppc_handler_skip_ins:
-
-	/* Patch the IP to the next instruction */
-	mfsrr0	r12
-	addi	r12, r12, 4
-	mtsrr0	r12
-
-	/* Clean up all state */
-	lwz	r12, HSTATE_SCRATCH1(r13)
-	mtcr	r12
-	PPC_LL	r12, HSTATE_SCRATCH0(r13)
-	GET_SCRATCH0(r13)
-
-	/* And get back into the code */
-	RFI_TO_KERNEL
-#endif
-
-/*
- * Call kvmppc_handler_trampoline_enter in real mode
- *
- * On entry, r4 contains the guest shadow MSR
- * MSR.EE has to be 0 when calling this function
- */
-_GLOBAL_TOC(kvmppc_entry_trampoline)
-	mfmsr	r5
-	LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
-	toreal(r7)
-
-	li	r6, MSR_IR | MSR_DR
-	andc	r6, r5, r6	/* Clear DR and IR in MSR value */
-	/*
-	 * Set EE in HOST_MSR so that it's enabled when we get into our
-	 * C exit handler function.
-	 */
-	ori	r5, r5, MSR_EE
-	mtsrr0	r7
-	mtsrr1	r6
-	RFI_TO_KERNEL
-
-#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
deleted file mode 100644
index 0169bab544dd7fcaa84cd7ac71bffa27de63de76..0000000000000000000000000000000000000000
--- a/arch/powerpc/kvm/book3s_segment.S
+++ /dev/null
@@ -1,416 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- * Copyright SUSE Linux Products GmbH 2010
- *
- * Authors: Alexander Graf <agraf@suse.de>
- */
-
-/* Real mode helpers */
-
-#include <asm/asm-compat.h>
-#include <asm/feature-fixups.h>
-
-#if defined(CONFIG_PPC_BOOK3S_64)
-
-#define GET_SHADOW_VCPU(reg)    \
-	mr	reg, r13
-
-#elif defined(CONFIG_PPC_BOOK3S_32)
-
-#define GET_SHADOW_VCPU(reg)    			\
-	tophys(reg, r2);       			\
-	lwz     reg, (THREAD + THREAD_KVM_SVCPU)(reg);	\
-	tophys(reg, reg)
-
-#endif
-
-/* Disable for nested KVM */
-#define USE_QUICK_LAST_INST
-
-
-/* Get helper functions for subarch specific functionality */
-
-#if defined(CONFIG_PPC_BOOK3S_64)
-#include "book3s_64_slb.S"
-#elif defined(CONFIG_PPC_BOOK3S_32)
-#include "book3s_32_sr.S"
-#endif
-
-/******************************************************************************
- *                                                                            *
- *                               Entry code                                   *
- *                                                                            *
- *****************************************************************************/
-
-.global kvmppc_handler_trampoline_enter
-kvmppc_handler_trampoline_enter:
-
-	/* Required state:
-	 *
-	 * MSR = ~IR|DR
-	 * R1 = host R1
-	 * R2 = host R2
-	 * R4 = guest shadow MSR
-	 * R5 = normal host MSR
-	 * R6 = current host MSR (EE, IR, DR off)
-	 * LR = highmem guest exit code
-	 * all other volatile GPRS = free
-	 * SVCPU[CR] = guest CR
-	 * SVCPU[XER] = guest XER
-	 * SVCPU[CTR] = guest CTR
-	 * SVCPU[LR] = guest LR
-	 */
-
-	/* r3 = shadow vcpu */
-	GET_SHADOW_VCPU(r3)
-
-	/* Save guest exit handler address and MSR */
-	mflr	r0
-	PPC_STL	r0, HSTATE_VMHANDLER(r3)
-	PPC_STL	r5, HSTATE_HOST_MSR(r3)
-
-	/* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */
-	PPC_STL	r1, HSTATE_HOST_R1(r3)
-	PPC_STL	r2, HSTATE_HOST_R2(r3)
-
-	/* Activate guest mode, so faults get handled by KVM */
-	li	r11, KVM_GUEST_MODE_GUEST
-	stb	r11, HSTATE_IN_GUEST(r3)
-
-	/* Switch to guest segment. This is subarch specific. */
-	LOAD_GUEST_SEGMENTS
-
-#ifdef CONFIG_PPC_BOOK3S_64
-BEGIN_FTR_SECTION
-	/* Save host FSCR */
-	mfspr	r8, SPRN_FSCR
-	std	r8, HSTATE_HOST_FSCR(r13)
-	/* Set FSCR during guest execution */
-	ld	r9, SVCPU_SHADOW_FSCR(r13)
-	mtspr	SPRN_FSCR, r9
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-
-	/* Some guests may need to have dcbz set to 32 byte length.
-	 *
-	 * Usually we ensure that by patching the guest's instructions
-	 * to trap on dcbz and emulate it in the hypervisor.
-	 *
-	 * If we can, we should tell the CPU to use 32 byte dcbz though,
-	 * because that's a lot faster.
-	 */
-	lbz	r0, HSTATE_RESTORE_HID5(r3)
-	cmpwi	r0, 0
-	beq	no_dcbz32_on
-
-	mfspr   r0,SPRN_HID5
-	ori     r0, r0, 0x80		/* XXX HID5_dcbz32 = 0x80 */
-	mtspr   SPRN_HID5,r0
-no_dcbz32_on:
-
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-	/* Enter guest */
-
-	PPC_LL	r8, SVCPU_CTR(r3)
-	PPC_LL	r9, SVCPU_LR(r3)
-	lwz	r10, SVCPU_CR(r3)
-	PPC_LL	r11, SVCPU_XER(r3)
-
-	mtctr	r8
-	mtlr	r9
-	mtcr	r10
-	mtxer	r11
-
-	/* Move SRR0 and SRR1 into the respective regs */
-	PPC_LL  r9, SVCPU_PC(r3)
-	/* First clear RI in our current MSR value */
-	li	r0, MSR_RI
-	andc	r6, r6, r0
-
-	PPC_LL	r0, SVCPU_R0(r3)
-	PPC_LL	r1, SVCPU_R1(r3)
-	PPC_LL	r2, SVCPU_R2(r3)
-	PPC_LL	r5, SVCPU_R5(r3)
-	PPC_LL	r7, SVCPU_R7(r3)
-	PPC_LL	r8, SVCPU_R8(r3)
-	PPC_LL	r10, SVCPU_R10(r3)
-	PPC_LL	r11, SVCPU_R11(r3)
-	PPC_LL	r12, SVCPU_R12(r3)
-	PPC_LL	r13, SVCPU_R13(r3)
-
-	MTMSR_EERI(r6)
-	mtsrr0	r9
-	mtsrr1	r4
-
-	PPC_LL	r4, SVCPU_R4(r3)
-	PPC_LL	r6, SVCPU_R6(r3)
-	PPC_LL	r9, SVCPU_R9(r3)
-	PPC_LL	r3, (SVCPU_R3)(r3)
-
-	RFI_TO_GUEST
-kvmppc_handler_trampoline_enter_end:
-
-
-
-/******************************************************************************
- *                                                                            *
- *                               Exit code                                    *
- *                                                                            *
- *****************************************************************************/
-
-.global kvmppc_interrupt_pr
-kvmppc_interrupt_pr:
-	/* 64-bit entry. Register usage at this point:
-	 *
-	 * SPRG_SCRATCH0   = guest R13
-	 * R12             = (guest CR << 32) | exit handler id
-	 * R13             = PACA
-	 * HSTATE.SCRATCH0 = guest R12
-	 * HSTATE.SCRATCH1 = guest CTR if RELOCATABLE
-	 */
-#ifdef CONFIG_PPC64
-	/* Match 32-bit entry */
-#ifdef CONFIG_RELOCATABLE
-	std	r9, HSTATE_SCRATCH2(r13)
-	ld	r9, HSTATE_SCRATCH1(r13)
-	mtctr	r9
-	ld	r9, HSTATE_SCRATCH2(r13)
-#endif
-	rotldi	r12, r12, 32		  /* Flip R12 halves for stw */
-	stw	r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */
-	srdi	r12, r12, 32		  /* shift trap into low half */
-#endif
-
-.global kvmppc_handler_trampoline_exit
-kvmppc_handler_trampoline_exit:
-	/* Register usage at this point:
-	 *
-	 * SPRG_SCRATCH0   = guest R13
-	 * R12             = exit handler id
-	 * R13             = shadow vcpu (32-bit) or PACA (64-bit)
-	 * HSTATE.SCRATCH0 = guest R12
-	 * HSTATE.SCRATCH1 = guest CR
-	 */
-
-	/* Save registers */
-
-	PPC_STL	r0, SVCPU_R0(r13)
-	PPC_STL	r1, SVCPU_R1(r13)
-	PPC_STL	r2, SVCPU_R2(r13)
-	PPC_STL	r3, SVCPU_R3(r13)
-	PPC_STL	r4, SVCPU_R4(r13)
-	PPC_STL	r5, SVCPU_R5(r13)
-	PPC_STL	r6, SVCPU_R6(r13)
-	PPC_STL	r7, SVCPU_R7(r13)
-	PPC_STL	r8, SVCPU_R8(r13)
-	PPC_STL	r9, SVCPU_R9(r13)
-	PPC_STL	r10, SVCPU_R10(r13)
-	PPC_STL	r11, SVCPU_R11(r13)
-
-	/* Restore R1/R2 so we can handle faults */
-	PPC_LL	r1, HSTATE_HOST_R1(r13)
-	PPC_LL	r2, HSTATE_HOST_R2(r13)
-
-	/* Save guest PC and MSR */
-#ifdef CONFIG_PPC64
-BEGIN_FTR_SECTION
-	andi.	r0, r12, 0x2
-	cmpwi	cr1, r0, 0
-	beq	1f
-	mfspr	r3,SPRN_HSRR0
-	mfspr	r4,SPRN_HSRR1
-	andi.	r12,r12,0x3ffd
-	b	2f
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-#endif
-1:	mfsrr0	r3
-	mfsrr1	r4
-2:
-	PPC_STL	r3, SVCPU_PC(r13)
-	PPC_STL	r4, SVCPU_SHADOW_SRR1(r13)
-
-	/* Get scratch'ed off registers */
-	GET_SCRATCH0(r9)
-	PPC_LL	r8, HSTATE_SCRATCH0(r13)
-	lwz	r7, HSTATE_SCRATCH1(r13)
-
-	PPC_STL	r9, SVCPU_R13(r13)
-	PPC_STL	r8, SVCPU_R12(r13)
-	stw	r7, SVCPU_CR(r13)
-
-	/* Save more register state  */
-
-	mfxer	r5
-	mfdar	r6
-	mfdsisr	r7
-	mfctr	r8
-	mflr	r9
-
-	PPC_STL	r5, SVCPU_XER(r13)
-	PPC_STL	r6, SVCPU_FAULT_DAR(r13)
-	stw	r7, SVCPU_FAULT_DSISR(r13)
-	PPC_STL	r8, SVCPU_CTR(r13)
-	PPC_STL	r9, SVCPU_LR(r13)
-
-	/*
-	 * In order for us to easily get the last instruction,
-	 * we got the #vmexit at, we exploit the fact that the
-	 * virtual layout is still the same here, so we can just
-	 * ld from the guest's PC address
-	 */
-
-	/* We only load the last instruction when it's safe */
-	cmpwi	r12, BOOK3S_INTERRUPT_DATA_STORAGE
-	beq	ld_last_inst
-	cmpwi	r12, BOOK3S_INTERRUPT_PROGRAM
-	beq	ld_last_inst
-	cmpwi	r12, BOOK3S_INTERRUPT_SYSCALL
-	beq	ld_last_prev_inst
-	cmpwi	r12, BOOK3S_INTERRUPT_ALIGNMENT
-	beq-	ld_last_inst
-#ifdef CONFIG_PPC64
-BEGIN_FTR_SECTION
-	cmpwi	r12, BOOK3S_INTERRUPT_H_EMUL_ASSIST
-	beq-	ld_last_inst
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-BEGIN_FTR_SECTION
-	cmpwi	r12, BOOK3S_INTERRUPT_FAC_UNAVAIL
-	beq-	ld_last_inst
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-#endif
-
-	b	no_ld_last_inst
-
-ld_last_prev_inst:
-	addi	r3, r3, -4
-
-ld_last_inst:
-	/* Save off the guest instruction we're at */
-
-	/* In case lwz faults */
-	li	r0, KVM_INST_FETCH_FAILED
-
-#ifdef USE_QUICK_LAST_INST
-
-	/* Set guest mode to 'jump over instruction' so if lwz faults
-	 * we'll just continue at the next IP. */
-	li	r9, KVM_GUEST_MODE_SKIP
-	stb	r9, HSTATE_IN_GUEST(r13)
-
-	/*    1) enable paging for data */
-	mfmsr	r9
-	ori	r11, r9, MSR_DR			/* Enable paging for data */
-	mtmsr	r11
-	sync
-	/*    2) fetch the instruction */
-	lwz	r0, 0(r3)
-	/*    3) disable paging again */
-	mtmsr	r9
-	sync
-
-#endif
-	stw	r0, SVCPU_LAST_INST(r13)
-
-no_ld_last_inst:
-
-	/* Unset guest mode */
-	li	r9, KVM_GUEST_MODE_NONE
-	stb	r9, HSTATE_IN_GUEST(r13)
-
-	/* Switch back to host MMU */
-	LOAD_HOST_SEGMENTS
-
-#ifdef CONFIG_PPC_BOOK3S_64
-
-	lbz	r5, HSTATE_RESTORE_HID5(r13)
-	cmpwi	r5, 0
-	beq	no_dcbz32_off
-
-	li	r4, 0
-	mfspr   r5,SPRN_HID5
-	rldimi  r5,r4,6,56
-	mtspr   SPRN_HID5,r5
-
-no_dcbz32_off:
-
-BEGIN_FTR_SECTION
-	/* Save guest FSCR on a FAC_UNAVAIL interrupt */
-	cmpwi	r12, BOOK3S_INTERRUPT_FAC_UNAVAIL
-	bne+	no_fscr_save
-	mfspr	r7, SPRN_FSCR
-	std	r7, SVCPU_SHADOW_FSCR(r13)
-no_fscr_save:
-	/* Restore host FSCR */
-	ld	r8, HSTATE_HOST_FSCR(r13)
-	mtspr	SPRN_FSCR, r8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-	/*
-	 * For some interrupts, we need to call the real Linux
-	 * handler, so it can do work for us. This has to happen
-	 * as if the interrupt arrived from the kernel though,
-	 * so let's fake it here where most state is restored.
-	 *
-	 * Having set up SRR0/1 with the address where we want
-	 * to continue with relocation on (potentially in module
-	 * space), we either just go straight there with rfi[d],
-	 * or we jump to an interrupt handler if there is an
-	 * interrupt to be handled first.  In the latter case,
-	 * the rfi[d] at the end of the interrupt handler will
-	 * get us back to where we want to continue.
-	 */
-
-	/* Register usage at this point:
-	 *
-	 * R1       = host R1
-	 * R2       = host R2
-	 * R10      = raw exit handler id
-	 * R12      = exit handler id
-	 * R13      = shadow vcpu (32-bit) or PACA (64-bit)
-	 * SVCPU.*  = guest *
-	 *
-	 */
-
-	PPC_LL	r6, HSTATE_HOST_MSR(r13)
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	/*
-	 * We don't want to change MSR[TS] bits via rfi here.
-	 * The actual TM handling logic will be in host with
-	 * recovered DR/IR bits after HSTATE_VMHANDLER.
-	 * And MSR_TM can be enabled in HOST_MSR so rfid may
-	 * not suppress this change and can lead to exception.
-	 * Manually set MSR to prevent TS state change here.
-	 */
-	mfmsr   r7
-	rldicl  r7, r7, 64 - MSR_TS_S_LG, 62
-	rldimi  r6, r7, MSR_TS_S_LG, 63 - MSR_TS_T_LG
-#endif
-	PPC_LL	r8, HSTATE_VMHANDLER(r13)
-
-#ifdef CONFIG_PPC64
-BEGIN_FTR_SECTION
-	beq	cr1, 1f
-	mtspr	SPRN_HSRR1, r6
-	mtspr	SPRN_HSRR0, r8
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-#endif
-1:	/* Restore host msr -> SRR1 */
-	mtsrr1	r6
-	/* Load highmem handler address */
-	mtsrr0	r8
-
-	/* RFI into the highmem handler, or jump to interrupt handler */
-	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-	beqa	BOOK3S_INTERRUPT_EXTERNAL
-	cmpwi	r12, BOOK3S_INTERRUPT_DECREMENTER
-	beqa	BOOK3S_INTERRUPT_DECREMENTER
-	cmpwi	r12, BOOK3S_INTERRUPT_PERFMON
-	beqa	BOOK3S_INTERRUPT_PERFMON
-	cmpwi	r12, BOOK3S_INTERRUPT_DOORBELL
-	beqa	BOOK3S_INTERRUPT_DOORBELL
-
-	RFI_TO_KERNEL
-kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
deleted file mode 100644
index 2e56ab5a5f55f0b61f5bd56fa888462418fbb38a..0000000000000000000000000000000000000000
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ /dev/null
@@ -1,536 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- * Copyright IBM Corp. 2007
- * Copyright 2011 Freescale Semiconductor, Inc.
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/kvm_asm.h>
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/asm-offsets.h>
-
-/* The host stack layout: */
-#define HOST_R1         0 /* Implied by stwu. */
-#define HOST_CALLEE_LR  4
-#define HOST_RUN        8
-/* r2 is special: it holds 'current', and it made nonvolatile in the
- * kernel with the -ffixed-r2 gcc option. */
-#define HOST_R2         12
-#define HOST_CR         16
-#define HOST_NV_GPRS    20
-#define __HOST_NV_GPR(n)  (HOST_NV_GPRS + ((n - 14) * 4))
-#define HOST_NV_GPR(n)  __HOST_NV_GPR(__REG_##n)
-#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + 4)
-#define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
-#define HOST_STACK_LR   (HOST_STACK_SIZE + 4) /* In caller stack frame. */
-
-#define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
-                        (1<<BOOKE_INTERRUPT_DTLB_MISS) | \
-                        (1<<BOOKE_INTERRUPT_DEBUG))
-
-#define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
-                        (1<<BOOKE_INTERRUPT_DTLB_MISS) | \
-                        (1<<BOOKE_INTERRUPT_ALIGNMENT))
-
-#define NEED_ESR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
-                       (1<<BOOKE_INTERRUPT_INST_STORAGE) | \
-                       (1<<BOOKE_INTERRUPT_PROGRAM) | \
-                       (1<<BOOKE_INTERRUPT_DTLB_MISS) | \
-                       (1<<BOOKE_INTERRUPT_ALIGNMENT))
-
-.macro __KVM_HANDLER ivor_nr scratch srr0
-	/* Get pointer to vcpu and record exit number. */
-	mtspr	\scratch , r4
-	mfspr   r4, SPRN_SPRG_THREAD
-	lwz     r4, THREAD_KVM_VCPU(r4)
-	stw	r3, VCPU_GPR(R3)(r4)
-	stw	r5, VCPU_GPR(R5)(r4)
-	stw	r6, VCPU_GPR(R6)(r4)
-	mfspr	r3, \scratch
-	mfctr	r5
-	stw	r3, VCPU_GPR(R4)(r4)
-	stw	r5, VCPU_CTR(r4)
-	mfspr	r3, \srr0
-	lis	r6, kvmppc_resume_host@h
-	stw	r3, VCPU_PC(r4)
-	li	r5, \ivor_nr
-	ori	r6, r6, kvmppc_resume_host@l
-	mtctr	r6
-	bctr
-.endm
-
-.macro KVM_HANDLER ivor_nr scratch srr0
-_GLOBAL(kvmppc_handler_\ivor_nr)
-	__KVM_HANDLER \ivor_nr \scratch \srr0
-.endm
-
-.macro KVM_DBG_HANDLER ivor_nr scratch srr0
-_GLOBAL(kvmppc_handler_\ivor_nr)
-	mtspr   \scratch, r4
-	mfspr	r4, SPRN_SPRG_THREAD
-	lwz	r4, THREAD_KVM_VCPU(r4)
-	stw	r3, VCPU_CRIT_SAVE(r4)
-	mfcr	r3
-	mfspr	r4, SPRN_CSRR1
-	andi.	r4, r4, MSR_PR
-	bne	1f
-	/* debug interrupt happened in enter/exit path */
-	mfspr   r4, SPRN_CSRR1
-	rlwinm  r4, r4, 0, ~MSR_DE
-	mtspr   SPRN_CSRR1, r4
-	lis	r4, 0xffff
-	ori	r4, r4, 0xffff
-	mtspr	SPRN_DBSR, r4
-	mfspr	r4, SPRN_SPRG_THREAD
-	lwz	r4, THREAD_KVM_VCPU(r4)
-	mtcr	r3
-	lwz     r3, VCPU_CRIT_SAVE(r4)
-	mfspr   r4, \scratch
-	rfci
-1:	/* debug interrupt happened in guest */
-	mtcr	r3
-	mfspr	r4, SPRN_SPRG_THREAD
-	lwz	r4, THREAD_KVM_VCPU(r4)
-	lwz     r3, VCPU_CRIT_SAVE(r4)
-	mfspr   r4, \scratch
-	__KVM_HANDLER \ivor_nr \scratch \srr0
-.endm
-
-.macro KVM_HANDLER_ADDR ivor_nr
-	.long	kvmppc_handler_\ivor_nr
-.endm
-
-.macro KVM_HANDLER_END
-	.long	kvmppc_handlers_end
-.endm
-
-_GLOBAL(kvmppc_handlers_start)
-KVM_HANDLER BOOKE_INTERRUPT_CRITICAL SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
-KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK  SPRN_SPRG_RSCRATCH_MC SPRN_MCSRR0
-KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-KVM_HANDLER BOOKE_INTERRUPT_PROGRAM SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-KVM_HANDLER BOOKE_INTERRUPT_SYSCALL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-KVM_HANDLER BOOKE_INTERRUPT_FIT SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
-KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-KVM_DBG_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
-KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0
-_GLOBAL(kvmppc_handlers_end)
-
-/* Registers:
- *  SPRG_SCRATCH0: guest r4
- *  r4: vcpu pointer
- *  r5: KVM exit number
- */
-_GLOBAL(kvmppc_resume_host)
-	mfcr	r3
-	stw	r3, VCPU_CR(r4)
-	stw	r7, VCPU_GPR(R7)(r4)
-	stw	r8, VCPU_GPR(R8)(r4)
-	stw	r9, VCPU_GPR(R9)(r4)
-
-	li	r6, 1
-	slw	r6, r6, r5
-
-#ifdef CONFIG_KVM_EXIT_TIMING
-	/* save exit time */
-1:
-	mfspr	r7, SPRN_TBRU
-	mfspr	r8, SPRN_TBRL
-	mfspr	r9, SPRN_TBRU
-	cmpw	r9, r7
-	bne	1b
-	stw	r8, VCPU_TIMING_EXIT_TBL(r4)
-	stw	r9, VCPU_TIMING_EXIT_TBU(r4)
-#endif
-
-	/* Save the faulting instruction and all GPRs for emulation. */
-	andi.	r7, r6, NEED_INST_MASK
-	beq	..skip_inst_copy
-	mfspr	r9, SPRN_SRR0
-	mfmsr	r8
-	ori	r7, r8, MSR_DS
-	mtmsr	r7
-	isync
-	lwz	r9, 0(r9)
-	mtmsr	r8
-	isync
-	stw	r9, VCPU_LAST_INST(r4)
-
-	stw	r15, VCPU_GPR(R15)(r4)
-	stw	r16, VCPU_GPR(R16)(r4)
-	stw	r17, VCPU_GPR(R17)(r4)
-	stw	r18, VCPU_GPR(R18)(r4)
-	stw	r19, VCPU_GPR(R19)(r4)
-	stw	r20, VCPU_GPR(R20)(r4)
-	stw	r21, VCPU_GPR(R21)(r4)
-	stw	r22, VCPU_GPR(R22)(r4)
-	stw	r23, VCPU_GPR(R23)(r4)
-	stw	r24, VCPU_GPR(R24)(r4)
-	stw	r25, VCPU_GPR(R25)(r4)
-	stw	r26, VCPU_GPR(R26)(r4)
-	stw	r27, VCPU_GPR(R27)(r4)
-	stw	r28, VCPU_GPR(R28)(r4)
-	stw	r29, VCPU_GPR(R29)(r4)
-	stw	r30, VCPU_GPR(R30)(r4)
-	stw	r31, VCPU_GPR(R31)(r4)
-..skip_inst_copy:
-
-	/* Also grab DEAR and ESR before the host can clobber them. */
-
-	andi.	r7, r6, NEED_DEAR_MASK
-	beq	..skip_dear
-	mfspr	r9, SPRN_DEAR
-	stw	r9, VCPU_FAULT_DEAR(r4)
-..skip_dear:
-
-	andi.	r7, r6, NEED_ESR_MASK
-	beq	..skip_esr
-	mfspr	r9, SPRN_ESR
-	stw	r9, VCPU_FAULT_ESR(r4)
-..skip_esr:
-
-	/* Save remaining volatile guest register state to vcpu. */
-	stw	r0, VCPU_GPR(R0)(r4)
-	stw	r1, VCPU_GPR(R1)(r4)
-	stw	r2, VCPU_GPR(R2)(r4)
-	stw	r10, VCPU_GPR(R10)(r4)
-	stw	r11, VCPU_GPR(R11)(r4)
-	stw	r12, VCPU_GPR(R12)(r4)
-	stw	r13, VCPU_GPR(R13)(r4)
-	stw	r14, VCPU_GPR(R14)(r4) /* We need a NV GPR below. */
-	mflr	r3
-	stw	r3, VCPU_LR(r4)
-	mfxer	r3
-	stw	r3, VCPU_XER(r4)
-
-	/* Restore host stack pointer and PID before IVPR, since the host
-	 * exception handlers use them. */
-	lwz	r1, VCPU_HOST_STACK(r4)
-	lwz	r3, VCPU_HOST_PID(r4)
-	mtspr	SPRN_PID, r3
-
-#ifdef CONFIG_FSL_BOOKE
-	/* we cheat and know that Linux doesn't use PID1 which is always 0 */
-	lis	r3, 0
-	mtspr	SPRN_PID1, r3
-#endif
-
-	/* Restore host IVPR before re-enabling interrupts. We cheat and know
-	 * that Linux IVPR is always 0xc0000000. */
-	lis	r3, 0xc000
-	mtspr	SPRN_IVPR, r3
-
-	/* Switch to kernel stack and jump to handler. */
-	LOAD_REG_ADDR(r3, kvmppc_handle_exit)
-	mtctr	r3
-	lwz	r3, HOST_RUN(r1)
-	lwz	r2, HOST_R2(r1)
-	mr	r14, r4 /* Save vcpu pointer. */
-
-	bctrl	/* kvmppc_handle_exit() */
-
-	/* Restore vcpu pointer and the nonvolatiles we used. */
-	mr	r4, r14
-	lwz	r14, VCPU_GPR(R14)(r4)
-
-	/* Sometimes instruction emulation must restore complete GPR state. */
-	andi.	r5, r3, RESUME_FLAG_NV
-	beq	..skip_nv_load
-	lwz	r15, VCPU_GPR(R15)(r4)
-	lwz	r16, VCPU_GPR(R16)(r4)
-	lwz	r17, VCPU_GPR(R17)(r4)
-	lwz	r18, VCPU_GPR(R18)(r4)
-	lwz	r19, VCPU_GPR(R19)(r4)
-	lwz	r20, VCPU_GPR(R20)(r4)
-	lwz	r21, VCPU_GPR(R21)(r4)
-	lwz	r22, VCPU_GPR(R22)(r4)
-	lwz	r23, VCPU_GPR(R23)(r4)
-	lwz	r24, VCPU_GPR(R24)(r4)
-	lwz	r25, VCPU_GPR(R25)(r4)
-	lwz	r26, VCPU_GPR(R26)(r4)
-	lwz	r27, VCPU_GPR(R27)(r4)
-	lwz	r28, VCPU_GPR(R28)(r4)
-	lwz	r29, VCPU_GPR(R29)(r4)
-	lwz	r30, VCPU_GPR(R30)(r4)
-	lwz	r31, VCPU_GPR(R31)(r4)
-..skip_nv_load:
-
-	/* Should we return to the guest? */
-	andi.	r5, r3, RESUME_FLAG_HOST
-	beq	lightweight_exit
-
-	srawi	r3, r3, 2 /* Shift -ERR back down. */
-
-heavyweight_exit:
-	/* Not returning to guest. */
-
-#ifdef CONFIG_SPE
-	/* save guest SPEFSCR and load host SPEFSCR */
-	mfspr	r9, SPRN_SPEFSCR
-	stw	r9, VCPU_SPEFSCR(r4)
-	lwz	r9, VCPU_HOST_SPEFSCR(r4)
-	mtspr	SPRN_SPEFSCR, r9
-#endif
-
-	/* We already saved guest volatile register state; now save the
-	 * non-volatiles. */
-	stw	r15, VCPU_GPR(R15)(r4)
-	stw	r16, VCPU_GPR(R16)(r4)
-	stw	r17, VCPU_GPR(R17)(r4)
-	stw	r18, VCPU_GPR(R18)(r4)
-	stw	r19, VCPU_GPR(R19)(r4)
-	stw	r20, VCPU_GPR(R20)(r4)
-	stw	r21, VCPU_GPR(R21)(r4)
-	stw	r22, VCPU_GPR(R22)(r4)
-	stw	r23, VCPU_GPR(R23)(r4)
-	stw	r24, VCPU_GPR(R24)(r4)
-	stw	r25, VCPU_GPR(R25)(r4)
-	stw	r26, VCPU_GPR(R26)(r4)
-	stw	r27, VCPU_GPR(R27)(r4)
-	stw	r28, VCPU_GPR(R28)(r4)
-	stw	r29, VCPU_GPR(R29)(r4)
-	stw	r30, VCPU_GPR(R30)(r4)
-	stw	r31, VCPU_GPR(R31)(r4)
-
-	/* Load host non-volatile register state from host stack. */
-	lwz	r14, HOST_NV_GPR(R14)(r1)
-	lwz	r15, HOST_NV_GPR(R15)(r1)
-	lwz	r16, HOST_NV_GPR(R16)(r1)
-	lwz	r17, HOST_NV_GPR(R17)(r1)
-	lwz	r18, HOST_NV_GPR(R18)(r1)
-	lwz	r19, HOST_NV_GPR(R19)(r1)
-	lwz	r20, HOST_NV_GPR(R20)(r1)
-	lwz	r21, HOST_NV_GPR(R21)(r1)
-	lwz	r22, HOST_NV_GPR(R22)(r1)
-	lwz	r23, HOST_NV_GPR(R23)(r1)
-	lwz	r24, HOST_NV_GPR(R24)(r1)
-	lwz	r25, HOST_NV_GPR(R25)(r1)
-	lwz	r26, HOST_NV_GPR(R26)(r1)
-	lwz	r27, HOST_NV_GPR(R27)(r1)
-	lwz	r28, HOST_NV_GPR(R28)(r1)
-	lwz	r29, HOST_NV_GPR(R29)(r1)
-	lwz	r30, HOST_NV_GPR(R30)(r1)
-	lwz	r31, HOST_NV_GPR(R31)(r1)
-
-	/* Return to kvm_vcpu_run(). */
-	lwz	r4, HOST_STACK_LR(r1)
-	lwz	r5, HOST_CR(r1)
-	addi	r1, r1, HOST_STACK_SIZE
-	mtlr	r4
-	mtcr	r5
-	/* r3 still contains the return code from kvmppc_handle_exit(). */
-	blr
-
-
-/* Registers:
- *  r3: kvm_run pointer
- *  r4: vcpu pointer
- */
-_GLOBAL(__kvmppc_vcpu_run)
-	stwu	r1, -HOST_STACK_SIZE(r1)
-	stw	r1, VCPU_HOST_STACK(r4)	/* Save stack pointer to vcpu. */
-
-	/* Save host state to stack. */
-	stw	r3, HOST_RUN(r1)
-	mflr	r3
-	stw	r3, HOST_STACK_LR(r1)
-	mfcr	r5
-	stw	r5, HOST_CR(r1)
-
-	/* Save host non-volatile register state to stack. */
-	stw	r14, HOST_NV_GPR(R14)(r1)
-	stw	r15, HOST_NV_GPR(R15)(r1)
-	stw	r16, HOST_NV_GPR(R16)(r1)
-	stw	r17, HOST_NV_GPR(R17)(r1)
-	stw	r18, HOST_NV_GPR(R18)(r1)
-	stw	r19, HOST_NV_GPR(R19)(r1)
-	stw	r20, HOST_NV_GPR(R20)(r1)
-	stw	r21, HOST_NV_GPR(R21)(r1)
-	stw	r22, HOST_NV_GPR(R22)(r1)
-	stw	r23, HOST_NV_GPR(R23)(r1)
-	stw	r24, HOST_NV_GPR(R24)(r1)
-	stw	r25, HOST_NV_GPR(R25)(r1)
-	stw	r26, HOST_NV_GPR(R26)(r1)
-	stw	r27, HOST_NV_GPR(R27)(r1)
-	stw	r28, HOST_NV_GPR(R28)(r1)
-	stw	r29, HOST_NV_GPR(R29)(r1)
-	stw	r30, HOST_NV_GPR(R30)(r1)
-	stw	r31, HOST_NV_GPR(R31)(r1)
-
-	/* Load guest non-volatiles. */
-	lwz	r14, VCPU_GPR(R14)(r4)
-	lwz	r15, VCPU_GPR(R15)(r4)
-	lwz	r16, VCPU_GPR(R16)(r4)
-	lwz	r17, VCPU_GPR(R17)(r4)
-	lwz	r18, VCPU_GPR(R18)(r4)
-	lwz	r19, VCPU_GPR(R19)(r4)
-	lwz	r20, VCPU_GPR(R20)(r4)
-	lwz	r21, VCPU_GPR(R21)(r4)
-	lwz	r22, VCPU_GPR(R22)(r4)
-	lwz	r23, VCPU_GPR(R23)(r4)
-	lwz	r24, VCPU_GPR(R24)(r4)
-	lwz	r25, VCPU_GPR(R25)(r4)
-	lwz	r26, VCPU_GPR(R26)(r4)
-	lwz	r27, VCPU_GPR(R27)(r4)
-	lwz	r28, VCPU_GPR(R28)(r4)
-	lwz	r29, VCPU_GPR(R29)(r4)
-	lwz	r30, VCPU_GPR(R30)(r4)
-	lwz	r31, VCPU_GPR(R31)(r4)
-
-#ifdef CONFIG_SPE
-	/* save host SPEFSCR and load guest SPEFSCR */
-	mfspr	r3, SPRN_SPEFSCR
-	stw	r3, VCPU_HOST_SPEFSCR(r4)
-	lwz	r3, VCPU_SPEFSCR(r4)
-	mtspr	SPRN_SPEFSCR, r3
-#endif
-
-lightweight_exit:
-	stw	r2, HOST_R2(r1)
-
-	mfspr	r3, SPRN_PID
-	stw	r3, VCPU_HOST_PID(r4)
-	lwz	r3, VCPU_SHADOW_PID(r4)
-	mtspr	SPRN_PID, r3
-
-#ifdef CONFIG_FSL_BOOKE
-	lwz	r3, VCPU_SHADOW_PID1(r4)
-	mtspr	SPRN_PID1, r3
-#endif
-
-	/* Load some guest volatiles. */
-	lwz	r0, VCPU_GPR(R0)(r4)
-	lwz	r2, VCPU_GPR(R2)(r4)
-	lwz	r9, VCPU_GPR(R9)(r4)
-	lwz	r10, VCPU_GPR(R10)(r4)
-	lwz	r11, VCPU_GPR(R11)(r4)
-	lwz	r12, VCPU_GPR(R12)(r4)
-	lwz	r13, VCPU_GPR(R13)(r4)
-	lwz	r3, VCPU_LR(r4)
-	mtlr	r3
-	lwz	r3, VCPU_XER(r4)
-	mtxer	r3
-
-	/* Switch the IVPR. XXX If we take a TLB miss after this we're screwed,
-	 * so how do we make sure vcpu won't fault? */
-	lis	r8, kvmppc_booke_handlers@ha
-	lwz	r8, kvmppc_booke_handlers@l(r8)
-	mtspr	SPRN_IVPR, r8
-
-	lwz	r5, VCPU_SHARED(r4)
-
-	/* Can't switch the stack pointer until after IVPR is switched,
-	 * because host interrupt handlers would get confused. */
-	lwz	r1, VCPU_GPR(R1)(r4)
-
-	/*
-	 * Host interrupt handlers may have clobbered these
-	 * guest-readable SPRGs, or the guest kernel may have
-	 * written directly to the shared area, so we
-	 * need to reload them here with the guest's values.
-	 */
-	PPC_LD(r3, VCPU_SHARED_SPRG4, r5)
-	mtspr	SPRN_SPRG4W, r3
-	PPC_LD(r3, VCPU_SHARED_SPRG5, r5)
-	mtspr	SPRN_SPRG5W, r3
-	PPC_LD(r3, VCPU_SHARED_SPRG6, r5)
-	mtspr	SPRN_SPRG6W, r3
-	PPC_LD(r3, VCPU_SHARED_SPRG7, r5)
-	mtspr	SPRN_SPRG7W, r3
-
-#ifdef CONFIG_KVM_EXIT_TIMING
-	/* save enter time */
-1:
-	mfspr	r6, SPRN_TBRU
-	mfspr	r7, SPRN_TBRL
-	mfspr	r8, SPRN_TBRU
-	cmpw	r8, r6
-	bne	1b
-	stw	r7, VCPU_TIMING_LAST_ENTER_TBL(r4)
-	stw	r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
-#endif
-
-	/* Finish loading guest volatiles and jump to guest. */
-	lwz	r3, VCPU_CTR(r4)
-	lwz	r5, VCPU_CR(r4)
-	lwz	r6, VCPU_PC(r4)
-	lwz	r7, VCPU_SHADOW_MSR(r4)
-	mtctr	r3
-	mtcr	r5
-	mtsrr0	r6
-	mtsrr1	r7
-	lwz	r5, VCPU_GPR(R5)(r4)
-	lwz	r6, VCPU_GPR(R6)(r4)
-	lwz	r7, VCPU_GPR(R7)(r4)
-	lwz	r8, VCPU_GPR(R8)(r4)
-
-	/* Clear any debug events which occurred since we disabled MSR[DE].
-	 * XXX This gives us a 3-instruction window in which a breakpoint
-	 * intended for guest context could fire in the host instead. */
-	lis	r3, 0xffff
-	ori	r3, r3, 0xffff
-	mtspr	SPRN_DBSR, r3
-
-	lwz	r3, VCPU_GPR(R3)(r4)
-	lwz	r4, VCPU_GPR(R4)(r4)
-	rfi
-
-	.data
-	.align	4
-	.globl	kvmppc_booke_handler_addr
-kvmppc_booke_handler_addr:
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_CRITICAL
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_MACHINE_CHECK
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_DATA_STORAGE
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_INST_STORAGE
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_EXTERNAL
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_ALIGNMENT
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_PROGRAM
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_FP_UNAVAIL
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_SYSCALL
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_AP_UNAVAIL
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_DECREMENTER
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_FIT
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_WATCHDOG
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_DTLB_MISS
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_ITLB_MISS
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_DEBUG
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_UNAVAIL
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_FP_DATA
-KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_FP_ROUND
-KVM_HANDLER_END /*Always keep this in end*/
-
-#ifdef CONFIG_SPE
-_GLOBAL(kvmppc_save_guest_spe)
-	cmpi	0,r3,0
-	beqlr-
-	SAVE_32EVRS(0, r4, r3, VCPU_EVR)
-	evxor   evr6, evr6, evr6
-	evmwumiaa evr6, evr6, evr6
-	li	r4,VCPU_ACC
-	evstddx evr6, r4, r3		/* save acc */
-	blr
-
-_GLOBAL(kvmppc_load_guest_spe)
-	cmpi	0,r3,0
-	beqlr-
-	li      r4,VCPU_ACC
-	evlddx  evr6,r4,r3
-	evmra   evr6,evr6		/* load acc */
-	REST_32EVRS(0, r4, r3, VCPU_EVR)
-	blr
-#endif
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
deleted file mode 100644
index c577ba4b31697efc85b58290794aae8f2fb2ee7b..0000000000000000000000000000000000000000
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ /dev/null
@@ -1,682 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- * Copyright (C) 2010-2011 Freescale Semiconductor, Inc.
- *
- * Author: Varun Sethi <varun.sethi@freescale.com>
- * Author: Scott Wood <scotwood@freescale.com>
- * Author: Mihai Caraman <mihai.caraman@freescale.com>
- *
- * This file is derived from arch/powerpc/kvm/booke_interrupts.S
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/kvm_asm.h>
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/asm-compat.h>
-#include <asm/asm-offsets.h>
-#include <asm/bitsperlong.h>
-
-#ifdef CONFIG_64BIT
-#include <asm/exception-64e.h>
-#include <asm/hw_irq.h>
-#include <asm/irqflags.h>
-#else
-#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
-#endif
-
-#define LONGBYTES		(BITS_PER_LONG / 8)
-
-#define VCPU_GUEST_SPRG(n)	(VCPU_GUEST_SPRGS + (n * LONGBYTES))
-
-/* The host stack layout: */
-#define HOST_R1         0 /* Implied by stwu. */
-#define HOST_CALLEE_LR  PPC_LR_STKOFF
-#define HOST_RUN        (HOST_CALLEE_LR + LONGBYTES)
-/*
- * r2 is special: it holds 'current', and it made nonvolatile in the
- * kernel with the -ffixed-r2 gcc option.
- */
-#define HOST_R2         (HOST_RUN + LONGBYTES)
-#define HOST_CR         (HOST_R2 + LONGBYTES)
-#define HOST_NV_GPRS    (HOST_CR + LONGBYTES)
-#define __HOST_NV_GPR(n)  (HOST_NV_GPRS + ((n - 14) * LONGBYTES))
-#define HOST_NV_GPR(n)  __HOST_NV_GPR(__REG_##n)
-#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + LONGBYTES)
-#define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */
-/* LR in caller stack frame. */
-#define HOST_STACK_LR	(HOST_STACK_SIZE + PPC_LR_STKOFF)
-
-#define NEED_EMU		0x00000001 /* emulation -- save nv regs */
-#define NEED_DEAR		0x00000002 /* save faulting DEAR */
-#define NEED_ESR		0x00000004 /* save faulting ESR */
-
-/*
- * On entry:
- * r4 = vcpu, r5 = srr0, r6 = srr1
- * saved in vcpu: cr, ctr, r3-r13
- */
-.macro kvm_handler_common intno, srr0, flags
-	/* Restore host stack pointer */
-	PPC_STL	r1, VCPU_GPR(R1)(r4)
-	PPC_STL	r2, VCPU_GPR(R2)(r4)
-	PPC_LL	r1, VCPU_HOST_STACK(r4)
-	PPC_LL	r2, HOST_R2(r1)
-
-START_BTB_FLUSH_SECTION
-	BTB_FLUSH(r10)
-END_BTB_FLUSH_SECTION
-
-	mfspr	r10, SPRN_PID
-	lwz	r8, VCPU_HOST_PID(r4)
-	PPC_LL	r11, VCPU_SHARED(r4)
-	PPC_STL	r14, VCPU_GPR(R14)(r4) /* We need a non-volatile GPR. */
-	li	r14, \intno
-
-	stw	r10, VCPU_GUEST_PID(r4)
-	mtspr	SPRN_PID, r8
-
-#ifdef CONFIG_KVM_EXIT_TIMING
-	/* save exit time */
-1:	mfspr	r7, SPRN_TBRU
-	mfspr	r8, SPRN_TBRL
-	mfspr	r9, SPRN_TBRU
-	cmpw	r9, r7
-	stw	r8, VCPU_TIMING_EXIT_TBL(r4)
-	bne-	1b
-	stw	r9, VCPU_TIMING_EXIT_TBU(r4)
-#endif
-
-	oris	r8, r6, MSR_CE@h
-	PPC_STD(r6, VCPU_SHARED_MSR, r11)
-	ori	r8, r8, MSR_ME | MSR_RI
-	PPC_STL	r5, VCPU_PC(r4)
-
-	/*
-	 * Make sure CE/ME/RI are set (if appropriate for exception type)
-	 * whether or not the guest had it set.  Since mfmsr/mtmsr are
-	 * somewhat expensive, skip in the common case where the guest
-	 * had all these bits set (and thus they're still set if
-	 * appropriate for the exception type).
-	 */
-	cmpw	r6, r8
-	beq	1f
-	mfmsr	r7
-	.if	\srr0 != SPRN_MCSRR0 && \srr0 != SPRN_CSRR0
-	oris	r7, r7, MSR_CE@h
-	.endif
-	.if	\srr0 != SPRN_MCSRR0
-	ori	r7, r7, MSR_ME | MSR_RI
-	.endif
-	mtmsr	r7
-1:
-
-	.if	\flags & NEED_EMU
-	PPC_STL	r15, VCPU_GPR(R15)(r4)
-	PPC_STL	r16, VCPU_GPR(R16)(r4)
-	PPC_STL	r17, VCPU_GPR(R17)(r4)
-	PPC_STL	r18, VCPU_GPR(R18)(r4)
-	PPC_STL	r19, VCPU_GPR(R19)(r4)
-	PPC_STL	r20, VCPU_GPR(R20)(r4)
-	PPC_STL	r21, VCPU_GPR(R21)(r4)
-	PPC_STL	r22, VCPU_GPR(R22)(r4)
-	PPC_STL	r23, VCPU_GPR(R23)(r4)
-	PPC_STL	r24, VCPU_GPR(R24)(r4)
-	PPC_STL	r25, VCPU_GPR(R25)(r4)
-	PPC_STL	r26, VCPU_GPR(R26)(r4)
-	PPC_STL	r27, VCPU_GPR(R27)(r4)
-	PPC_STL	r28, VCPU_GPR(R28)(r4)
-	PPC_STL	r29, VCPU_GPR(R29)(r4)
-	PPC_STL	r30, VCPU_GPR(R30)(r4)
-	PPC_STL	r31, VCPU_GPR(R31)(r4)
-
-	/*
-	 * We don't use external PID support. lwepx faults would need to be
-	 * handled by KVM and this implies aditional code in DO_KVM (for
-	 * DTB_MISS, DSI and LRAT) to check ESR[EPID] and EPLC[EGS] which
-	 * is too intrusive for the host. Get last instuction in
-	 * kvmppc_get_last_inst().
-	 */
-	li	r9, KVM_INST_FETCH_FAILED
-	stw	r9, VCPU_LAST_INST(r4)
-	.endif
-
-	.if	\flags & NEED_ESR
-	mfspr	r8, SPRN_ESR
-	PPC_STL	r8, VCPU_FAULT_ESR(r4)
-	.endif
-
-	.if	\flags & NEED_DEAR
-	mfspr	r9, SPRN_DEAR
-	PPC_STL	r9, VCPU_FAULT_DEAR(r4)
-	.endif
-
-	b	kvmppc_resume_host
-.endm
-
-#ifdef CONFIG_64BIT
-/* Exception types */
-#define EX_GEN			1
-#define EX_GDBELL		2
-#define EX_DBG			3
-#define EX_MC			4
-#define EX_CRIT			5
-#define EX_TLB			6
-
-/*
- * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
- */
-.macro kvm_handler intno type scratch, paca_ex, ex_r10, ex_r11, srr0, srr1, flags
- _GLOBAL(kvmppc_handler_\intno\()_\srr1)
-	mr	r11, r4
-	/*
-	 * Get vcpu from Paca: paca->__current.thread->kvm_vcpu
-	 */
-	PPC_LL	r4, PACACURRENT(r13)
-	PPC_LL	r4, (THREAD + THREAD_KVM_VCPU)(r4)
-	PPC_STL	r10, VCPU_CR(r4)
-	PPC_STL r11, VCPU_GPR(R4)(r4)
-	PPC_STL	r5, VCPU_GPR(R5)(r4)
-	PPC_STL	r6, VCPU_GPR(R6)(r4)
-	PPC_STL	r8, VCPU_GPR(R8)(r4)
-	PPC_STL	r9, VCPU_GPR(R9)(r4)
-	.if \type == EX_TLB
-	PPC_LL	r5, EX_TLB_R13(r12)
-	PPC_LL	r6, EX_TLB_R10(r12)
-	PPC_LL	r8, EX_TLB_R11(r12)
-	mfspr	r12, \scratch
-	.else
-	mfspr	r5, \scratch
-	PPC_LL	r6, (\paca_ex + \ex_r10)(r13)
-	PPC_LL	r8, (\paca_ex + \ex_r11)(r13)
-	.endif
-	PPC_STL r5, VCPU_GPR(R13)(r4)
-	PPC_STL r3, VCPU_GPR(R3)(r4)
-	PPC_STL r7, VCPU_GPR(R7)(r4)
-	PPC_STL r12, VCPU_GPR(R12)(r4)
-	PPC_STL r6, VCPU_GPR(R10)(r4)
-	PPC_STL r8, VCPU_GPR(R11)(r4)
-	mfctr	r5
-	PPC_STL	r5, VCPU_CTR(r4)
-	mfspr	r5, \srr0
-	mfspr	r6, \srr1
-	kvm_handler_common \intno, \srr0, \flags
-.endm
-
-#define EX_PARAMS(type) \
-	EX_##type, \
-	SPRN_SPRG_##type##_SCRATCH, \
-	PACA_EX##type, \
-	EX_R10, \
-	EX_R11
-
-#define EX_PARAMS_TLB \
-	EX_TLB, \
-	SPRN_SPRG_GEN_SCRATCH, \
-	PACA_EXTLB, \
-	EX_TLB_R10, \
-	EX_TLB_R11
-
-kvm_handler BOOKE_INTERRUPT_CRITICAL, EX_PARAMS(CRIT), \
-	SPRN_CSRR0, SPRN_CSRR1, 0
-kvm_handler BOOKE_INTERRUPT_MACHINE_CHECK, EX_PARAMS(MC), \
-	SPRN_MCSRR0, SPRN_MCSRR1, 0
-kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1,(NEED_EMU | NEED_DEAR | NEED_ESR)
-kvm_handler BOOKE_INTERRUPT_INST_STORAGE, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1, NEED_ESR
-kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR)
-kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU)
-kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_DECREMENTER, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_FIT, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_WATCHDOG, EX_PARAMS(CRIT),\
-	SPRN_CSRR0, SPRN_CSRR1, 0
-/*
- * Only bolted TLB miss exception handlers are supported for now
- */
-kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \
-	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
-kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \
-	SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_ALTIVEC_ASSIST, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_DOORBELL, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, EX_PARAMS(CRIT), \
-	SPRN_CSRR0, SPRN_CSRR1, 0
-kvm_handler BOOKE_INTERRUPT_HV_PRIV, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1, NEED_EMU
-kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, EX_PARAMS(GDBELL), \
-	SPRN_GSRR0, SPRN_GSRR1, 0
-kvm_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, EX_PARAMS(CRIT), \
-	SPRN_CSRR0, SPRN_CSRR1, 0
-kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
-	SPRN_DSRR0, SPRN_DSRR1, 0
-kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
-	SPRN_CSRR0, SPRN_CSRR1, 0
-kvm_handler BOOKE_INTERRUPT_LRAT_ERROR, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
-#else
-/*
- * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
- */
-.macro kvm_handler intno srr0, srr1, flags
-_GLOBAL(kvmppc_handler_\intno\()_\srr1)
-	PPC_LL	r11, THREAD_KVM_VCPU(r10)
-	PPC_STL r3, VCPU_GPR(R3)(r11)
-	mfspr	r3, SPRN_SPRG_RSCRATCH0
-	PPC_STL	r4, VCPU_GPR(R4)(r11)
-	PPC_LL	r4, THREAD_NORMSAVE(0)(r10)
-	PPC_STL	r5, VCPU_GPR(R5)(r11)
-	PPC_STL	r13, VCPU_CR(r11)
-	mfspr	r5, \srr0
-	PPC_STL	r3, VCPU_GPR(R10)(r11)
-	PPC_LL	r3, THREAD_NORMSAVE(2)(r10)
-	PPC_STL	r6, VCPU_GPR(R6)(r11)
-	PPC_STL	r4, VCPU_GPR(R11)(r11)
-	mfspr	r6, \srr1
-	PPC_STL	r7, VCPU_GPR(R7)(r11)
-	PPC_STL	r8, VCPU_GPR(R8)(r11)
-	PPC_STL	r9, VCPU_GPR(R9)(r11)
-	PPC_STL r3, VCPU_GPR(R13)(r11)
-	mfctr	r7
-	PPC_STL	r12, VCPU_GPR(R12)(r11)
-	PPC_STL	r7, VCPU_CTR(r11)
-	mr	r4, r11
-	kvm_handler_common \intno, \srr0, \flags
-.endm
-
-.macro kvm_lvl_handler intno scratch srr0, srr1, flags
-_GLOBAL(kvmppc_handler_\intno\()_\srr1)
-	mfspr	r10, SPRN_SPRG_THREAD
-	PPC_LL	r11, THREAD_KVM_VCPU(r10)
-	PPC_STL r3, VCPU_GPR(R3)(r11)
-	mfspr	r3, \scratch
-	PPC_STL	r4, VCPU_GPR(R4)(r11)
-	PPC_LL	r4, GPR9(r8)
-	PPC_STL	r5, VCPU_GPR(R5)(r11)
-	PPC_STL	r9, VCPU_CR(r11)
-	mfspr	r5, \srr0
-	PPC_STL	r3, VCPU_GPR(R8)(r11)
-	PPC_LL	r3, GPR10(r8)
-	PPC_STL	r6, VCPU_GPR(R6)(r11)
-	PPC_STL	r4, VCPU_GPR(R9)(r11)
-	mfspr	r6, \srr1
-	PPC_LL	r4, GPR11(r8)
-	PPC_STL	r7, VCPU_GPR(R7)(r11)
-	PPC_STL r3, VCPU_GPR(R10)(r11)
-	mfctr	r7
-	PPC_STL	r12, VCPU_GPR(R12)(r11)
-	PPC_STL r13, VCPU_GPR(R13)(r11)
-	PPC_STL	r4, VCPU_GPR(R11)(r11)
-	PPC_STL	r7, VCPU_CTR(r11)
-	mr	r4, r11
-	kvm_handler_common \intno, \srr0, \flags
-.endm
-
-kvm_lvl_handler BOOKE_INTERRUPT_CRITICAL, \
-	SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
-kvm_lvl_handler BOOKE_INTERRUPT_MACHINE_CHECK, \
-	SPRN_SPRG_RSCRATCH_MC, SPRN_MCSRR0, SPRN_MCSRR1, 0
-kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, \
-	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
-kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR
-kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \
-	SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR)
-kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU)
-kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_DECREMENTER, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_FIT, SPRN_SRR0, SPRN_SRR1, 0
-kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \
-	SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
-kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \
-	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
-kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0
-kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \
-	SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
-kvm_handler BOOKE_INTERRUPT_HV_PRIV, SPRN_SRR0, SPRN_SRR1, NEED_EMU
-kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, SPRN_GSRR0, SPRN_GSRR1, 0
-kvm_lvl_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, \
-	SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
-kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
-	SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
-kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
-	SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0
-#endif
-
-/* Registers:
- *  SPRG_SCRATCH0: guest r10
- *  r4: vcpu pointer
- *  r11: vcpu->arch.shared
- *  r14: KVM exit number
- */
-_GLOBAL(kvmppc_resume_host)
-	/* Save remaining volatile guest register state to vcpu. */
-	mfspr	r3, SPRN_VRSAVE
-	PPC_STL	r0, VCPU_GPR(R0)(r4)
-	mflr	r5
-	mfspr	r6, SPRN_SPRG4
-	PPC_STL	r5, VCPU_LR(r4)
-	mfspr	r7, SPRN_SPRG5
-	stw	r3, VCPU_VRSAVE(r4)
-#ifdef CONFIG_64BIT
-	PPC_LL	r3, PACA_SPRG_VDSO(r13)
-#endif
-	mfspr	r5, SPRN_SPRG9
-	PPC_STD(r6, VCPU_SHARED_SPRG4, r11)
-	mfspr	r8, SPRN_SPRG6
-	PPC_STD(r7, VCPU_SHARED_SPRG5, r11)
-	mfspr	r9, SPRN_SPRG7
-#ifdef CONFIG_64BIT
-	mtspr	SPRN_SPRG_VDSO_WRITE, r3
-#endif
-	PPC_STD(r5, VCPU_SPRG9, r4)
-	PPC_STD(r8, VCPU_SHARED_SPRG6, r11)
-	mfxer	r3
-	PPC_STD(r9, VCPU_SHARED_SPRG7, r11)
-
-	/* save guest MAS registers and restore host mas4 & mas6 */
-	mfspr	r5, SPRN_MAS0
-	PPC_STL	r3, VCPU_XER(r4)
-	mfspr	r6, SPRN_MAS1
-	stw	r5, VCPU_SHARED_MAS0(r11)
-	mfspr	r7, SPRN_MAS2
-	stw	r6, VCPU_SHARED_MAS1(r11)
-	PPC_STD(r7, VCPU_SHARED_MAS2, r11)
-	mfspr	r5, SPRN_MAS3
-	mfspr	r6, SPRN_MAS4
-	stw	r5, VCPU_SHARED_MAS7_3+4(r11)
-	mfspr	r7, SPRN_MAS6
-	stw	r6, VCPU_SHARED_MAS4(r11)
-	mfspr	r5, SPRN_MAS7
-	lwz	r6, VCPU_HOST_MAS4(r4)
-	stw	r7, VCPU_SHARED_MAS6(r11)
-	lwz	r8, VCPU_HOST_MAS6(r4)
-	mtspr	SPRN_MAS4, r6
-	stw	r5, VCPU_SHARED_MAS7_3+0(r11)
-	mtspr	SPRN_MAS6, r8
-	/* Enable MAS register updates via exception */
-	mfspr	r3, SPRN_EPCR
-	rlwinm	r3, r3, 0, ~SPRN_EPCR_DMIUH
-	mtspr	SPRN_EPCR, r3
-	isync
-
-#ifdef CONFIG_64BIT
-	/*
-	 * We enter with interrupts disabled in hardware, but
-	 * we need to call RECONCILE_IRQ_STATE to ensure
-	 * that the software state is kept in sync.
-	 */
-	RECONCILE_IRQ_STATE(r3,r5)
-#endif
-
-	/* Switch to kernel stack and jump to handler. */
-	PPC_LL	r3, HOST_RUN(r1)
-	mr	r5, r14 /* intno */
-	mr	r14, r4 /* Save vcpu pointer. */
-	bl	kvmppc_handle_exit
-
-	/* Restore vcpu pointer and the nonvolatiles we used. */
-	mr	r4, r14
-	PPC_LL	r14, VCPU_GPR(R14)(r4)
-
-	andi.	r5, r3, RESUME_FLAG_NV
-	beq	skip_nv_load
-	PPC_LL	r15, VCPU_GPR(R15)(r4)
-	PPC_LL	r16, VCPU_GPR(R16)(r4)
-	PPC_LL	r17, VCPU_GPR(R17)(r4)
-	PPC_LL	r18, VCPU_GPR(R18)(r4)
-	PPC_LL	r19, VCPU_GPR(R19)(r4)
-	PPC_LL	r20, VCPU_GPR(R20)(r4)
-	PPC_LL	r21, VCPU_GPR(R21)(r4)
-	PPC_LL	r22, VCPU_GPR(R22)(r4)
-	PPC_LL	r23, VCPU_GPR(R23)(r4)
-	PPC_LL	r24, VCPU_GPR(R24)(r4)
-	PPC_LL	r25, VCPU_GPR(R25)(r4)
-	PPC_LL	r26, VCPU_GPR(R26)(r4)
-	PPC_LL	r27, VCPU_GPR(R27)(r4)
-	PPC_LL	r28, VCPU_GPR(R28)(r4)
-	PPC_LL	r29, VCPU_GPR(R29)(r4)
-	PPC_LL	r30, VCPU_GPR(R30)(r4)
-	PPC_LL	r31, VCPU_GPR(R31)(r4)
-skip_nv_load:
-	/* Should we return to the guest? */
-	andi.	r5, r3, RESUME_FLAG_HOST
-	beq	lightweight_exit
-
-	srawi	r3, r3, 2 /* Shift -ERR back down. */
-
-heavyweight_exit:
-	/* Not returning to guest. */
-	PPC_LL	r5, HOST_STACK_LR(r1)
-	lwz	r6, HOST_CR(r1)
-
-	/*
-	 * We already saved guest volatile register state; now save the
-	 * non-volatiles.
-	 */
-
-	PPC_STL	r15, VCPU_GPR(R15)(r4)
-	PPC_STL	r16, VCPU_GPR(R16)(r4)
-	PPC_STL	r17, VCPU_GPR(R17)(r4)
-	PPC_STL	r18, VCPU_GPR(R18)(r4)
-	PPC_STL	r19, VCPU_GPR(R19)(r4)
-	PPC_STL	r20, VCPU_GPR(R20)(r4)
-	PPC_STL	r21, VCPU_GPR(R21)(r4)
-	PPC_STL	r22, VCPU_GPR(R22)(r4)
-	PPC_STL	r23, VCPU_GPR(R23)(r4)
-	PPC_STL	r24, VCPU_GPR(R24)(r4)
-	PPC_STL	r25, VCPU_GPR(R25)(r4)
-	PPC_STL	r26, VCPU_GPR(R26)(r4)
-	PPC_STL	r27, VCPU_GPR(R27)(r4)
-	PPC_STL	r28, VCPU_GPR(R28)(r4)
-	PPC_STL	r29, VCPU_GPR(R29)(r4)
-	PPC_STL	r30, VCPU_GPR(R30)(r4)
-	PPC_STL	r31, VCPU_GPR(R31)(r4)
-
-	/* Load host non-volatile register state from host stack. */
-	PPC_LL	r14, HOST_NV_GPR(R14)(r1)
-	PPC_LL	r15, HOST_NV_GPR(R15)(r1)
-	PPC_LL	r16, HOST_NV_GPR(R16)(r1)
-	PPC_LL	r17, HOST_NV_GPR(R17)(r1)
-	PPC_LL	r18, HOST_NV_GPR(R18)(r1)
-	PPC_LL	r19, HOST_NV_GPR(R19)(r1)
-	PPC_LL	r20, HOST_NV_GPR(R20)(r1)
-	PPC_LL	r21, HOST_NV_GPR(R21)(r1)
-	PPC_LL	r22, HOST_NV_GPR(R22)(r1)
-	PPC_LL	r23, HOST_NV_GPR(R23)(r1)
-	PPC_LL	r24, HOST_NV_GPR(R24)(r1)
-	PPC_LL	r25, HOST_NV_GPR(R25)(r1)
-	PPC_LL	r26, HOST_NV_GPR(R26)(r1)
-	PPC_LL	r27, HOST_NV_GPR(R27)(r1)
-	PPC_LL	r28, HOST_NV_GPR(R28)(r1)
-	PPC_LL	r29, HOST_NV_GPR(R29)(r1)
-	PPC_LL	r30, HOST_NV_GPR(R30)(r1)
-	PPC_LL	r31, HOST_NV_GPR(R31)(r1)
-
-	/* Return to kvm_vcpu_run(). */
-	mtlr	r5
-	mtcr	r6
-	addi	r1, r1, HOST_STACK_SIZE
-	/* r3 still contains the return code from kvmppc_handle_exit(). */
-	blr
-
-/* Registers:
- *  r3: kvm_run pointer
- *  r4: vcpu pointer
- */
-_GLOBAL(__kvmppc_vcpu_run)
-	stwu	r1, -HOST_STACK_SIZE(r1)
-	PPC_STL	r1, VCPU_HOST_STACK(r4)	/* Save stack pointer to vcpu. */
-
-	/* Save host state to stack. */
-	PPC_STL	r3, HOST_RUN(r1)
-	mflr	r3
-	mfcr	r5
-	PPC_STL	r3, HOST_STACK_LR(r1)
-
-	stw	r5, HOST_CR(r1)
-
-	/* Save host non-volatile register state to stack. */
-	PPC_STL	r14, HOST_NV_GPR(R14)(r1)
-	PPC_STL	r15, HOST_NV_GPR(R15)(r1)
-	PPC_STL	r16, HOST_NV_GPR(R16)(r1)
-	PPC_STL	r17, HOST_NV_GPR(R17)(r1)
-	PPC_STL	r18, HOST_NV_GPR(R18)(r1)
-	PPC_STL	r19, HOST_NV_GPR(R19)(r1)
-	PPC_STL	r20, HOST_NV_GPR(R20)(r1)
-	PPC_STL	r21, HOST_NV_GPR(R21)(r1)
-	PPC_STL	r22, HOST_NV_GPR(R22)(r1)
-	PPC_STL	r23, HOST_NV_GPR(R23)(r1)
-	PPC_STL	r24, HOST_NV_GPR(R24)(r1)
-	PPC_STL	r25, HOST_NV_GPR(R25)(r1)
-	PPC_STL	r26, HOST_NV_GPR(R26)(r1)
-	PPC_STL	r27, HOST_NV_GPR(R27)(r1)
-	PPC_STL	r28, HOST_NV_GPR(R28)(r1)
-	PPC_STL	r29, HOST_NV_GPR(R29)(r1)
-	PPC_STL	r30, HOST_NV_GPR(R30)(r1)
-	PPC_STL	r31, HOST_NV_GPR(R31)(r1)
-
-	/* Load guest non-volatiles. */
-	PPC_LL	r14, VCPU_GPR(R14)(r4)
-	PPC_LL	r15, VCPU_GPR(R15)(r4)
-	PPC_LL	r16, VCPU_GPR(R16)(r4)
-	PPC_LL	r17, VCPU_GPR(R17)(r4)
-	PPC_LL	r18, VCPU_GPR(R18)(r4)
-	PPC_LL	r19, VCPU_GPR(R19)(r4)
-	PPC_LL	r20, VCPU_GPR(R20)(r4)
-	PPC_LL	r21, VCPU_GPR(R21)(r4)
-	PPC_LL	r22, VCPU_GPR(R22)(r4)
-	PPC_LL	r23, VCPU_GPR(R23)(r4)
-	PPC_LL	r24, VCPU_GPR(R24)(r4)
-	PPC_LL	r25, VCPU_GPR(R25)(r4)
-	PPC_LL	r26, VCPU_GPR(R26)(r4)
-	PPC_LL	r27, VCPU_GPR(R27)(r4)
-	PPC_LL	r28, VCPU_GPR(R28)(r4)
-	PPC_LL	r29, VCPU_GPR(R29)(r4)
-	PPC_LL	r30, VCPU_GPR(R30)(r4)
-	PPC_LL	r31, VCPU_GPR(R31)(r4)
-
-
-lightweight_exit:
-	PPC_STL	r2, HOST_R2(r1)
-
-	mfspr	r3, SPRN_PID
-	stw	r3, VCPU_HOST_PID(r4)
-	lwz	r3, VCPU_GUEST_PID(r4)
-	mtspr	SPRN_PID, r3
-
-	PPC_LL	r11, VCPU_SHARED(r4)
-	/* Disable MAS register updates via exception */
-	mfspr	r3, SPRN_EPCR
-	oris	r3, r3, SPRN_EPCR_DMIUH@h
-	mtspr	SPRN_EPCR, r3
-	isync
-	/* Save host mas4 and mas6 and load guest MAS registers */
-	mfspr	r3, SPRN_MAS4
-	stw	r3, VCPU_HOST_MAS4(r4)
-	mfspr	r3, SPRN_MAS6
-	stw	r3, VCPU_HOST_MAS6(r4)
-	lwz	r3, VCPU_SHARED_MAS0(r11)
-	lwz	r5, VCPU_SHARED_MAS1(r11)
-	PPC_LD(r6, VCPU_SHARED_MAS2, r11)
-	lwz	r7, VCPU_SHARED_MAS7_3+4(r11)
-	lwz	r8, VCPU_SHARED_MAS4(r11)
-	mtspr	SPRN_MAS0, r3
-	mtspr	SPRN_MAS1, r5
-	mtspr	SPRN_MAS2, r6
-	mtspr	SPRN_MAS3, r7
-	mtspr	SPRN_MAS4, r8
-	lwz	r3, VCPU_SHARED_MAS6(r11)
-	lwz	r5, VCPU_SHARED_MAS7_3+0(r11)
-	mtspr	SPRN_MAS6, r3
-	mtspr	SPRN_MAS7, r5
-
-	/*
-	 * Host interrupt handlers may have clobbered these guest-readable
-	 * SPRGs, so we need to reload them here with the guest's values.
-	 */
-	lwz	r3, VCPU_VRSAVE(r4)
-	PPC_LD(r5, VCPU_SHARED_SPRG4, r11)
-	mtspr	SPRN_VRSAVE, r3
-	PPC_LD(r6, VCPU_SHARED_SPRG5, r11)
-	mtspr	SPRN_SPRG4W, r5
-	PPC_LD(r7, VCPU_SHARED_SPRG6, r11)
-	mtspr	SPRN_SPRG5W, r6
-	PPC_LD(r8, VCPU_SHARED_SPRG7, r11)
-	mtspr	SPRN_SPRG6W, r7
-	PPC_LD(r5, VCPU_SPRG9, r4)
-	mtspr	SPRN_SPRG7W, r8
-	mtspr	SPRN_SPRG9, r5
-
-	/* Load some guest volatiles. */
-	PPC_LL	r3, VCPU_LR(r4)
-	PPC_LL	r5, VCPU_XER(r4)
-	PPC_LL	r6, VCPU_CTR(r4)
-	PPC_LL	r7, VCPU_CR(r4)
-	PPC_LL	r8, VCPU_PC(r4)
-	PPC_LD(r9, VCPU_SHARED_MSR, r11)
-	PPC_LL	r0, VCPU_GPR(R0)(r4)
-	PPC_LL	r1, VCPU_GPR(R1)(r4)
-	PPC_LL	r2, VCPU_GPR(R2)(r4)
-	PPC_LL	r10, VCPU_GPR(R10)(r4)
-	PPC_LL	r11, VCPU_GPR(R11)(r4)
-	PPC_LL	r12, VCPU_GPR(R12)(r4)
-	PPC_LL	r13, VCPU_GPR(R13)(r4)
-	mtlr	r3
-	mtxer	r5
-	mtctr	r6
-	mtsrr0	r8
-	mtsrr1	r9
-
-#ifdef CONFIG_KVM_EXIT_TIMING
-	/* save enter time */
-1:
-	mfspr	r6, SPRN_TBRU
-	mfspr	r9, SPRN_TBRL
-	mfspr	r8, SPRN_TBRU
-	cmpw	r8, r6
-	stw	r9, VCPU_TIMING_LAST_ENTER_TBL(r4)
-	bne	1b
-	stw	r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
-#endif
-
-	/*
-	 * Don't execute any instruction which can change CR after
-	 * below instruction.
-	 */
-	mtcr	r7
-
-	/* Finish loading guest volatiles and jump to guest. */
-	PPC_LL	r5, VCPU_GPR(R5)(r4)
-	PPC_LL	r6, VCPU_GPR(R6)(r4)
-	PPC_LL	r7, VCPU_GPR(R7)(r4)
-	PPC_LL	r8, VCPU_GPR(R8)(r4)
-	PPC_LL	r9, VCPU_GPR(R9)(r4)
-
-	PPC_LL	r3, VCPU_GPR(R3)(r4)
-	PPC_LL	r4, VCPU_GPR(R4)(r4)
-	rfi
diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S
deleted file mode 100644
index 3dfae0cb62280b99e28dfe2bc96614eddaedd68e..0000000000000000000000000000000000000000
--- a/arch/powerpc/kvm/fpu.S
+++ /dev/null
@@ -1,278 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  FPU helper code to use FPU operations from inside the kernel
- *
- *    Copyright (C) 2010 Alexander Graf (agraf@suse.de)
- */
-
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/cputable.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-
-/* Instructions operating on single parameters */
-
-/*
- * Single operation with one input operand
- *
- * R3 = (double*)&fpscr
- * R4 = (short*)&result
- * R5 = (short*)&param1
- */
-#define FPS_ONE_IN(name) 					\
-_GLOBAL(fps_ ## name);							\
-	lfd	0,0(r3);		/* load up fpscr value */	\
-	MTFSF_L(0);							\
-	lfs	0,0(r5);						\
-									\
-	name	0,0;							\
-									\
-	stfs	0,0(r4);						\
-	mffs	0;							\
-	stfd	0,0(r3);	/* save new fpscr value */	\
-	blr
-
-/*
- * Single operation with two input operands
- *
- * R3 = (double*)&fpscr
- * R4 = (short*)&result
- * R5 = (short*)&param1
- * R6 = (short*)&param2
- */
-#define FPS_TWO_IN(name) 					\
-_GLOBAL(fps_ ## name);							\
-	lfd	0,0(r3);		/* load up fpscr value */	\
-	MTFSF_L(0);							\
-	lfs	0,0(r5);						\
-	lfs	1,0(r6);						\
-									\
-	name	0,0,1;							\
-									\
-	stfs	0,0(r4);						\
-	mffs	0;							\
-	stfd	0,0(r3);		/* save new fpscr value */	\
-	blr
-
-/*
- * Single operation with three input operands
- *
- * R3 = (double*)&fpscr
- * R4 = (short*)&result
- * R5 = (short*)&param1
- * R6 = (short*)&param2
- * R7 = (short*)&param3
- */
-#define FPS_THREE_IN(name) 					\
-_GLOBAL(fps_ ## name);							\
-	lfd	0,0(r3);		/* load up fpscr value */	\
-	MTFSF_L(0);							\
-	lfs	0,0(r5);						\
-	lfs	1,0(r6);						\
-	lfs	2,0(r7);						\
-									\
-	name	0,0,1,2;						\
-									\
-	stfs	0,0(r4);						\
-	mffs	0;							\
-	stfd	0,0(r3);		/* save new fpscr value */	\
-	blr
-
-FPS_ONE_IN(fres)
-FPS_ONE_IN(frsqrte)
-FPS_ONE_IN(fsqrts)
-FPS_TWO_IN(fadds)
-FPS_TWO_IN(fdivs)
-FPS_TWO_IN(fmuls)
-FPS_TWO_IN(fsubs)
-FPS_THREE_IN(fmadds)
-FPS_THREE_IN(fmsubs)
-FPS_THREE_IN(fnmadds)
-FPS_THREE_IN(fnmsubs)
-FPS_THREE_IN(fsel)
-
-
-/* Instructions operating on double parameters */
-
-/*
- * Beginning of double instruction processing
- *
- * R3 = (double*)&fpscr
- * R4 = (u32*)&cr
- * R5 = (double*)&result
- * R6 = (double*)&param1
- * R7 = (double*)&param2 [load_two]
- * R8 = (double*)&param3 [load_three]
- * LR = instruction call function
- */
-fpd_load_three:
-	lfd	2,0(r8)			/* load param3 */
-fpd_load_two:
-	lfd	1,0(r7)			/* load param2 */
-fpd_load_one:
-	lfd	0,0(r6)			/* load param1 */
-fpd_load_none:
-	lfd	3,0(r3)			/* load up fpscr value */
-	MTFSF_L(3)
-	lwz	r6, 0(r4)		/* load cr */
-	mtcr	r6
-	blr
-
-/*
- * End of double instruction processing
- *
- * R3 = (double*)&fpscr
- * R4 = (u32*)&cr
- * R5 = (double*)&result
- * LR = caller of instruction call function
- */
-fpd_return:
-	mfcr	r6
-	stfd	0,0(r5)			/* save result */
-	mffs	0
-	stfd	0,0(r3)			/* save new fpscr value */
-	stw	r6,0(r4)		/* save new cr value */
-	blr
-
-/*
- * Double operation with no input operand
- *
- * R3 = (double*)&fpscr
- * R4 = (u32*)&cr
- * R5 = (double*)&result
- */
-#define FPD_NONE_IN(name) 						\
-_GLOBAL(fpd_ ## name);							\
-	mflr	r12;							\
-	bl	fpd_load_none;						\
-	mtlr	r12;							\
-									\
-	name.	0;			/* call instruction */		\
-	b	fpd_return
-
-/*
- * Double operation with one input operand
- *
- * R3 = (double*)&fpscr
- * R4 = (u32*)&cr
- * R5 = (double*)&result
- * R6 = (double*)&param1
- */
-#define FPD_ONE_IN(name) 						\
-_GLOBAL(fpd_ ## name);							\
-	mflr	r12;							\
-	bl	fpd_load_one;						\
-	mtlr	r12;							\
-									\
-	name.	0,0;			/* call instruction */		\
-	b	fpd_return
-
-/*
- * Double operation with two input operands
- *
- * R3 = (double*)&fpscr
- * R4 = (u32*)&cr
- * R5 = (double*)&result
- * R6 = (double*)&param1
- * R7 = (double*)&param2
- * R8 = (double*)&param3
- */
-#define FPD_TWO_IN(name) 						\
-_GLOBAL(fpd_ ## name);							\
-	mflr	r12;							\
-	bl	fpd_load_two;						\
-	mtlr	r12;							\
-									\
-	name.	0,0,1;			/* call instruction */		\
-	b	fpd_return
-
-/*
- * CR Double operation with two input operands
- *
- * R3 = (double*)&fpscr
- * R4 = (u32*)&cr
- * R5 = (double*)&param1
- * R6 = (double*)&param2
- * R7 = (double*)&param3
- */
-#define FPD_TWO_IN_CR(name)						\
-_GLOBAL(fpd_ ## name);							\
-	lfd	1,0(r6);		/* load param2 */		\
-	lfd	0,0(r5);		/* load param1 */		\
-	lfd	3,0(r3);		/* load up fpscr value */	\
-	MTFSF_L(3);							\
-	lwz	r6, 0(r4);		/* load cr */			\
-	mtcr	r6;							\
-									\
-	name	0,0,1;			/* call instruction */		\
-	mfcr	r6;							\
-	mffs	0;							\
-	stfd	0,0(r3);		/* save new fpscr value */	\
-	stw	r6,0(r4);		/* save new cr value */		\
-	blr
-
-/*
- * Double operation with three input operands
- *
- * R3 = (double*)&fpscr
- * R4 = (u32*)&cr
- * R5 = (double*)&result
- * R6 = (double*)&param1
- * R7 = (double*)&param2
- * R8 = (double*)&param3
- */
-#define FPD_THREE_IN(name) 						\
-_GLOBAL(fpd_ ## name);							\
-	mflr	r12;							\
-	bl	fpd_load_three;						\
-	mtlr	r12;							\
-									\
-	name.	0,0,1,2;		/* call instruction */		\
-	b	fpd_return
-
-FPD_ONE_IN(fsqrts)
-FPD_ONE_IN(frsqrtes)
-FPD_ONE_IN(fres)
-FPD_ONE_IN(frsp)
-FPD_ONE_IN(fctiw)
-FPD_ONE_IN(fctiwz)
-FPD_ONE_IN(fsqrt)
-FPD_ONE_IN(fre)
-FPD_ONE_IN(frsqrte)
-FPD_ONE_IN(fneg)
-FPD_ONE_IN(fabs)
-FPD_TWO_IN(fadds)
-FPD_TWO_IN(fsubs)
-FPD_TWO_IN(fdivs)
-FPD_TWO_IN(fmuls)
-FPD_TWO_IN_CR(fcmpu)
-FPD_TWO_IN(fcpsgn)
-FPD_TWO_IN(fdiv)
-FPD_TWO_IN(fadd)
-FPD_TWO_IN(fmul)
-FPD_TWO_IN_CR(fcmpo)
-FPD_TWO_IN(fsub)
-FPD_THREE_IN(fmsubs)
-FPD_THREE_IN(fmadds)
-FPD_THREE_IN(fnmsubs)
-FPD_THREE_IN(fnmadds)
-FPD_THREE_IN(fsel)
-FPD_THREE_IN(fmsub)
-FPD_THREE_IN(fmadd)
-FPD_THREE_IN(fnmsub)
-FPD_THREE_IN(fnmadd)
-
-_GLOBAL(kvm_cvt_fd)
-	lfs	0,0(r3)
-	stfd	0,0(r4)
-	blr
-
-_GLOBAL(kvm_cvt_df)
-	lfd	0,0(r3)
-	stfs	0,0(r4)
-	blr
diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
deleted file mode 100644
index 3bf17c854be4470bbcd69f7c537a4f5d762d562b..0000000000000000000000000000000000000000
--- a/arch/powerpc/kvm/tm.S
+++ /dev/null
@@ -1,398 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- * Derived from book3s_hv_rmhandlers.S, which is:
- *
- * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
- */
-
-#include <asm/reg.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/export.h>
-#include <asm/tm.h>
-#include <asm/cputable.h>
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
-
-/*
- * Save transactional state and TM-related registers.
- * Called with:
- * - r3 pointing to the vcpu struct
- * - r4 containing the MSR with current TS bits:
- * 	(For HV KVM, it is VCPU_MSR ; For PR KVM, it is host MSR).
- * - r5 containing a flag indicating that non-volatile registers
- *	must be preserved.
- * If r5 == 0, this can modify all checkpointed registers, but
- * restores r1, r2 before exit.  If r5 != 0, this restores the
- * MSR TM/FP/VEC/VSX bits to their state on entry.
- */
-_GLOBAL(__kvmppc_save_tm)
-	mflr	r0
-	std	r0, PPC_LR_STKOFF(r1)
-	stdu    r1, -SWITCH_FRAME_SIZE(r1)
-
-	mr	r9, r3
-	cmpdi	cr7, r5, 0
-
-	/* Turn on TM. */
-	mfmsr	r8
-	mr	r10, r8
-	li	r0, 1
-	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
-	ori     r8, r8, MSR_FP
-	oris    r8, r8, (MSR_VEC | MSR_VSX)@h
-	mtmsrd	r8
-
-	rldicl. r4, r4, 64 - MSR_TS_S_LG, 62
-	beq	1f	/* TM not active in guest. */
-
-	std	r1, HSTATE_SCRATCH2(r13)
-	std	r3, HSTATE_SCRATCH1(r13)
-
-	/* Save CR on the stack - even if r5 == 0 we need to get cr7 back. */
-	mfcr	r6
-	SAVE_GPR(6, r1)
-
-	/* Save DSCR so we can restore it to avoid running with user value */
-	mfspr	r7, SPRN_DSCR
-	SAVE_GPR(7, r1)
-
-	/*
-	 * We are going to do treclaim., which will modify all checkpointed
-	 * registers.  Save the non-volatile registers on the stack if
-	 * preservation of non-volatile state has been requested.
-	 */
-	beq	cr7, 3f
-	SAVE_NVGPRS(r1)
-
-	/* MSR[TS] will be 0 (non-transactional) once we do treclaim. */
-	li	r0, 0
-	rldimi	r10, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
-	SAVE_GPR(10, r1)	/* final MSR value */
-3:
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-BEGIN_FTR_SECTION
-	/* Emulation of the treclaim instruction needs TEXASR before treclaim */
-	mfspr	r6, SPRN_TEXASR
-	std	r6, VCPU_ORIG_TEXASR(r3)
-END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
-#endif
-
-	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
-	li	r5, 0
-	mtmsrd	r5, 1
-
-	li	r3, TM_CAUSE_KVM_RESCHED
-
-	/* All GPRs are volatile at this point. */
-	TRECLAIM(R3)
-
-	/* Temporarily store r13 and r9 so we have some regs to play with */
-	SET_SCRATCH0(r13)
-	GET_PACA(r13)
-	std	r9, PACATMSCRATCH(r13)
-	ld	r9, HSTATE_SCRATCH1(r13)
-
-	/* Save away PPR soon so we don't run with user value. */
-	std	r0, VCPU_GPRS_TM(0)(r9)
-	mfspr	r0, SPRN_PPR
-	HMT_MEDIUM
-
-	/* Reload stack pointer. */
-	std	r1, VCPU_GPRS_TM(1)(r9)
-	ld	r1, HSTATE_SCRATCH2(r13)
-
-	/* Set MSR RI now we have r1 and r13 back. */
-	std	r2, VCPU_GPRS_TM(2)(r9)
-	li	r2, MSR_RI
-	mtmsrd	r2, 1
-
-	/* Reload TOC pointer. */
-	ld	r2, PACATOC(r13)
-
-	/* Save all but r0-r2, r9 & r13 */
-	reg = 3
-	.rept	29
-	.if (reg != 9) && (reg != 13)
-	std	reg, VCPU_GPRS_TM(reg)(r9)
-	.endif
-	reg = reg + 1
-	.endr
-	/* ... now save r13 */
-	GET_SCRATCH0(r4)
-	std	r4, VCPU_GPRS_TM(13)(r9)
-	/* ... and save r9 */
-	ld	r4, PACATMSCRATCH(r13)
-	std	r4, VCPU_GPRS_TM(9)(r9)
-
-	/* Restore host DSCR and CR values, after saving guest values */
-	mfcr	r6
-	mfspr	r7, SPRN_DSCR
-	stw	r6, VCPU_CR_TM(r9)
-	std	r7, VCPU_DSCR_TM(r9)
-	REST_GPR(6, r1)
-	REST_GPR(7, r1)
-	mtcr	r6
-	mtspr	SPRN_DSCR, r7
-
-	/* Save away checkpointed SPRs. */
-	std	r0, VCPU_PPR_TM(r9)
-	mflr	r5
-	mfctr	r7
-	mfspr	r8, SPRN_AMR
-	mfspr	r10, SPRN_TAR
-	mfxer	r11
-	std	r5, VCPU_LR_TM(r9)
-	std	r7, VCPU_CTR_TM(r9)
-	std	r8, VCPU_AMR_TM(r9)
-	std	r10, VCPU_TAR_TM(r9)
-	std	r11, VCPU_XER_TM(r9)
-
-	/* Save FP/VSX. */
-	addi	r3, r9, VCPU_FPRS_TM
-	bl	store_fp_state
-	addi	r3, r9, VCPU_VRS_TM
-	bl	store_vr_state
-	mfspr	r6, SPRN_VRSAVE
-	stw	r6, VCPU_VRSAVE_TM(r9)
-
-	/* Restore non-volatile registers if requested to */
-	beq	cr7, 1f
-	REST_NVGPRS(r1)
-	REST_GPR(10, r1)
-1:
-	/*
-	 * We need to save these SPRs after the treclaim so that the software
-	 * error code is recorded correctly in the TEXASR.  Also the user may
-	 * change these outside of a transaction, so they must always be
-	 * context switched.
-	 */
-	mfspr	r7, SPRN_TEXASR
-	std	r7, VCPU_TEXASR(r9)
-	mfspr	r5, SPRN_TFHAR
-	mfspr	r6, SPRN_TFIAR
-	std	r5, VCPU_TFHAR(r9)
-	std	r6, VCPU_TFIAR(r9)
-
-	/* Restore MSR state if requested */
-	beq	cr7, 2f
-	mtmsrd	r10, 0
-2:
-	addi	r1, r1, SWITCH_FRAME_SIZE
-	ld	r0, PPC_LR_STKOFF(r1)
-	mtlr	r0
-	blr
-
-/*
- * _kvmppc_save_tm_pr() is a wrapper around __kvmppc_save_tm(), so that it can
- * be invoked from C function by PR KVM only.
- */
-_GLOBAL(_kvmppc_save_tm_pr)
-	mflr	r0
-	std	r0, PPC_LR_STKOFF(r1)
-	stdu    r1, -PPC_MIN_STKFRM(r1)
-
-	mfspr   r8, SPRN_TAR
-	std	r8, PPC_MIN_STKFRM-8(r1)
-
-	li	r5, 1		/* preserve non-volatile registers */
-	bl	__kvmppc_save_tm
-
-	ld	r8, PPC_MIN_STKFRM-8(r1)
-	mtspr   SPRN_TAR, r8
-
-	addi    r1, r1, PPC_MIN_STKFRM
-	ld	r0, PPC_LR_STKOFF(r1)
-	mtlr	r0
-	blr
-
-EXPORT_SYMBOL_GPL(_kvmppc_save_tm_pr);
-
-/*
- * Restore transactional state and TM-related registers.
- * Called with:
- *  - r3 pointing to the vcpu struct.
- *  - r4 is the guest MSR with desired TS bits:
- * 	For HV KVM, it is VCPU_MSR
- * 	For PR KVM, it is provided by caller
- * - r5 containing a flag indicating that non-volatile registers
- *	must be preserved.
- * If r5 == 0, this potentially modifies all checkpointed registers, but
- * restores r1, r2 from the PACA before exit.
- * If r5 != 0, this restores the MSR TM/FP/VEC/VSX bits to their state on entry.
- */
-_GLOBAL(__kvmppc_restore_tm)
-	mflr	r0
-	std	r0, PPC_LR_STKOFF(r1)
-
-	cmpdi	cr7, r5, 0
-
-	/* Turn on TM/FP/VSX/VMX so we can restore them. */
-	mfmsr	r5
-	mr	r10, r5
-	li	r6, MSR_TM >> 32
-	sldi	r6, r6, 32
-	or	r5, r5, r6
-	ori	r5, r5, MSR_FP
-	oris	r5, r5, (MSR_VEC | MSR_VSX)@h
-	mtmsrd	r5
-
-	/*
-	 * The user may change these outside of a transaction, so they must
-	 * always be context switched.
-	 */
-	ld	r5, VCPU_TFHAR(r3)
-	ld	r6, VCPU_TFIAR(r3)
-	ld	r7, VCPU_TEXASR(r3)
-	mtspr	SPRN_TFHAR, r5
-	mtspr	SPRN_TFIAR, r6
-	mtspr	SPRN_TEXASR, r7
-
-	mr	r5, r4
-	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
-	beq	9f		/* TM not active in guest */
-
-	/* Make sure the failure summary is set, otherwise we'll program check
-	 * when we trechkpt.  It's possible that this might have been not set
-	 * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
-	 * host.
-	 */
-	oris	r7, r7, (TEXASR_FS)@h
-	mtspr	SPRN_TEXASR, r7
-
-	/*
-	 * Make a stack frame and save non-volatile registers if requested.
-	 */
-	stdu	r1, -SWITCH_FRAME_SIZE(r1)
-	std	r1, HSTATE_SCRATCH2(r13)
-
-	mfcr	r6
-	mfspr	r7, SPRN_DSCR
-	SAVE_GPR(2, r1)
-	SAVE_GPR(6, r1)
-	SAVE_GPR(7, r1)
-
-	beq	cr7, 4f
-	SAVE_NVGPRS(r1)
-
-	/* MSR[TS] will be 1 (suspended) once we do trechkpt */
-	li	r0, 1
-	rldimi	r10, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
-	SAVE_GPR(10, r1)	/* final MSR value */
-4:
-	/*
-	 * We need to load up the checkpointed state for the guest.
-	 * We need to do this early as it will blow away any GPRs, VSRs and
-	 * some SPRs.
-	 */
-
-	mr	r31, r3
-	addi	r3, r31, VCPU_FPRS_TM
-	bl	load_fp_state
-	addi	r3, r31, VCPU_VRS_TM
-	bl	load_vr_state
-	mr	r3, r31
-	lwz	r7, VCPU_VRSAVE_TM(r3)
-	mtspr	SPRN_VRSAVE, r7
-
-	ld	r5, VCPU_LR_TM(r3)
-	lwz	r6, VCPU_CR_TM(r3)
-	ld	r7, VCPU_CTR_TM(r3)
-	ld	r8, VCPU_AMR_TM(r3)
-	ld	r9, VCPU_TAR_TM(r3)
-	ld	r10, VCPU_XER_TM(r3)
-	mtlr	r5
-	mtcr	r6
-	mtctr	r7
-	mtspr	SPRN_AMR, r8
-	mtspr	SPRN_TAR, r9
-	mtxer	r10
-
-	/*
-	 * Load up PPR and DSCR values but don't put them in the actual SPRs
-	 * till the last moment to avoid running with userspace PPR and DSCR for
-	 * too long.
-	 */
-	ld	r29, VCPU_DSCR_TM(r3)
-	ld	r30, VCPU_PPR_TM(r3)
-
-	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
-	li	r5, 0
-	mtmsrd	r5, 1
-
-	/* Load GPRs r0-r28 */
-	reg = 0
-	.rept	29
-	ld	reg, VCPU_GPRS_TM(reg)(r31)
-	reg = reg + 1
-	.endr
-
-	mtspr	SPRN_DSCR, r29
-	mtspr	SPRN_PPR, r30
-
-	/* Load final GPRs */
-	ld	29, VCPU_GPRS_TM(29)(r31)
-	ld	30, VCPU_GPRS_TM(30)(r31)
-	ld	31, VCPU_GPRS_TM(31)(r31)
-
-	/* TM checkpointed state is now setup.  All GPRs are now volatile. */
-	TRECHKPT
-
-	/* Now let's get back the state we need. */
-	HMT_MEDIUM
-	GET_PACA(r13)
-	ld	r1, HSTATE_SCRATCH2(r13)
-	REST_GPR(7, r1)
-	mtspr	SPRN_DSCR, r7
-
-	/* Set the MSR RI since we have our registers back. */
-	li	r5, MSR_RI
-	mtmsrd	r5, 1
-
-	/* Restore TOC pointer and CR */
-	REST_GPR(2, r1)
-	REST_GPR(6, r1)
-	mtcr	r6
-
-	/* Restore non-volatile registers if requested to. */
-	beq	cr7, 5f
-	REST_GPR(10, r1)
-	REST_NVGPRS(r1)
-
-5:	addi	r1, r1, SWITCH_FRAME_SIZE
-	ld	r0, PPC_LR_STKOFF(r1)
-	mtlr	r0
-
-9:	/* Restore MSR bits if requested */
-	beqlr	cr7
-	mtmsrd	r10, 0
-	blr
-
-/*
- * _kvmppc_restore_tm_pr() is a wrapper around __kvmppc_restore_tm(), so that it
- * can be invoked from C function by PR KVM only.
- */
-_GLOBAL(_kvmppc_restore_tm_pr)
-	mflr	r0
-	std	r0, PPC_LR_STKOFF(r1)
-	stdu    r1, -PPC_MIN_STKFRM(r1)
-
-	/* save TAR so that it can be recovered later */
-	mfspr   r8, SPRN_TAR
-	std	r8, PPC_MIN_STKFRM-8(r1)
-
-	li	r5, 1
-	bl	__kvmppc_restore_tm
-
-	ld	r8, PPC_MIN_STKFRM-8(r1)
-	mtspr   SPRN_TAR, r8
-
-	addi    r1, r1, PPC_MIN_STKFRM
-	ld	r0, PPC_LR_STKOFF(r1)
-	mtlr	r0
-	blr
-
-EXPORT_SYMBOL_GPL(_kvmppc_restore_tm_pr);
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
deleted file mode 100644
index ecd150dc3ed9e9c6c500ba3ac75d063761c92a4a..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/checksum_32.S
+++ /dev/null
@@ -1,333 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains assembly-language implementations
- * of IP-style 1's complement checksum routines.
- *	
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
- */
-
-#include <linux/sys.h>
-#include <asm/processor.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
-#include <asm/ppc_asm.h>
-#include <asm/export.h>
-
-	.text
-
-/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * __csum_partial(buff, len, sum)
- */
-_GLOBAL(__csum_partial)
-	subi	r3,r3,4
-	srawi.	r6,r4,2		/* Divide len by 4 and also clear carry */
-	beq	3f		/* if we're doing < 4 bytes */
-	andi.	r0,r3,2		/* Align buffer to longword boundary */
-	beq+	1f
-	lhz	r0,4(r3)	/* do 2 bytes to get aligned */
-	subi	r4,r4,2
-	addi	r3,r3,2
-	srwi.	r6,r4,2		/* # words to do */
-	adde	r5,r5,r0
-	beq	3f
-1:	andi.	r6,r6,3		/* Prepare to handle words 4 by 4 */
-	beq	21f
-	mtctr	r6
-2:	lwzu	r0,4(r3)
-	adde	r5,r5,r0
-	bdnz	2b
-21:	srwi.	r6,r4,4		/* # blocks of 4 words to do */
-	beq	3f
-	lwz	r0,4(r3)
-	mtctr	r6
-	lwz	r6,8(r3)
-	adde	r5,r5,r0
-	lwz	r7,12(r3)
-	adde	r5,r5,r6
-	lwzu	r8,16(r3)
-	adde	r5,r5,r7
-	bdz	23f
-22:	lwz	r0,4(r3)
-	adde	r5,r5,r8
-	lwz	r6,8(r3)
-	adde	r5,r5,r0
-	lwz	r7,12(r3)
-	adde	r5,r5,r6
-	lwzu	r8,16(r3)
-	adde	r5,r5,r7
-	bdnz	22b
-23:	adde	r5,r5,r8
-3:	andi.	r0,r4,2
-	beq+	4f
-	lhz	r0,4(r3)
-	addi	r3,r3,2
-	adde	r5,r5,r0
-4:	andi.	r0,r4,1
-	beq+	5f
-	lbz	r0,4(r3)
-	slwi	r0,r0,8		/* Upper byte of word */
-	adde	r5,r5,r0
-5:	addze	r3,r5		/* add in final carry */
-	blr
-EXPORT_SYMBOL(__csum_partial)
-
-/*
- * Computes the checksum of a memory block at src, length len,
- * and adds in "sum" (32-bit), while copying the block to dst.
- * If an access exception occurs on src or dst, it stores -EFAULT
- * to *src_err or *dst_err respectively, and (for an error on
- * src) zeroes the rest of dst.
- *
- * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
- */
-#define CSUM_COPY_16_BYTES_WITHEX(n)	\
-8 ## n ## 0:			\
-	lwz	r7,4(r4);	\
-8 ## n ## 1:			\
-	lwz	r8,8(r4);	\
-8 ## n ## 2:			\
-	lwz	r9,12(r4);	\
-8 ## n ## 3:			\
-	lwzu	r10,16(r4);	\
-8 ## n ## 4:			\
-	stw	r7,4(r6);	\
-	adde	r12,r12,r7;	\
-8 ## n ## 5:			\
-	stw	r8,8(r6);	\
-	adde	r12,r12,r8;	\
-8 ## n ## 6:			\
-	stw	r9,12(r6);	\
-	adde	r12,r12,r9;	\
-8 ## n ## 7:			\
-	stwu	r10,16(r6);	\
-	adde	r12,r12,r10
-
-#define CSUM_COPY_16_BYTES_EXCODE(n)		\
-	EX_TABLE(8 ## n ## 0b, src_error);	\
-	EX_TABLE(8 ## n ## 1b, src_error);	\
-	EX_TABLE(8 ## n ## 2b, src_error);	\
-	EX_TABLE(8 ## n ## 3b, src_error);	\
-	EX_TABLE(8 ## n ## 4b, dst_error);	\
-	EX_TABLE(8 ## n ## 5b, dst_error);	\
-	EX_TABLE(8 ## n ## 6b, dst_error);	\
-	EX_TABLE(8 ## n ## 7b, dst_error);
-
-	.text
-	.stabs	"arch/powerpc/lib/",N_SO,0,0,0f
-	.stabs	"checksum_32.S",N_SO,0,0,0f
-0:
-
-CACHELINE_BYTES = L1_CACHE_BYTES
-LG_CACHELINE_BYTES = L1_CACHE_SHIFT
-CACHELINE_MASK = (L1_CACHE_BYTES-1)
-
-_GLOBAL(csum_partial_copy_generic)
-	stwu	r1,-16(r1)
-	stw	r7,12(r1)
-	stw	r8,8(r1)
-
-	addic	r12,r6,0
-	addi	r6,r4,-4
-	neg	r0,r4
-	addi	r4,r3,-4
-	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
-	crset	4*cr7+eq
-	beq	58f
-
-	cmplw	0,r5,r0			/* is this more than total to do? */
-	blt	63f			/* if not much to do */
-	rlwinm	r7,r6,3,0x8
-	rlwnm	r12,r12,r7,0,31	/* odd destination address: rotate one byte */
-	cmplwi	cr7,r7,0	/* is destination address even ? */
-	andi.	r8,r0,3			/* get it word-aligned first */
-	mtctr	r8
-	beq+	61f
-	li	r3,0
-70:	lbz	r9,4(r4)		/* do some bytes */
-	addi	r4,r4,1
-	slwi	r3,r3,8
-	rlwimi	r3,r9,0,24,31
-71:	stb	r9,4(r6)
-	addi	r6,r6,1
-	bdnz	70b
-	adde	r12,r12,r3
-61:	subf	r5,r0,r5
-	srwi.	r0,r0,2
-	mtctr	r0
-	beq	58f
-72:	lwzu	r9,4(r4)		/* do some words */
-	adde	r12,r12,r9
-73:	stwu	r9,4(r6)
-	bdnz	72b
-
-58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
-	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
-	li	r11,4
-	beq	63f
-
-	/* Here we decide how far ahead to prefetch the source */
-	li	r3,4
-	cmpwi	r0,1
-	li	r7,0
-	ble	114f
-	li	r7,1
-#if MAX_COPY_PREFETCH > 1
-	/* Heuristically, for large transfers we prefetch
-	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
-	   we prefetch 1 cacheline ahead. */
-	cmpwi	r0,MAX_COPY_PREFETCH
-	ble	112f
-	li	r7,MAX_COPY_PREFETCH
-112:	mtctr	r7
-111:	dcbt	r3,r4
-	addi	r3,r3,CACHELINE_BYTES
-	bdnz	111b
-#else
-	dcbt	r3,r4
-	addi	r3,r3,CACHELINE_BYTES
-#endif /* MAX_COPY_PREFETCH > 1 */
-
-114:	subf	r8,r7,r0
-	mr	r0,r7
-	mtctr	r8
-
-53:	dcbt	r3,r4
-54:	dcbz	r11,r6
-/* the main body of the cacheline loop */
-	CSUM_COPY_16_BYTES_WITHEX(0)
-#if L1_CACHE_BYTES >= 32
-	CSUM_COPY_16_BYTES_WITHEX(1)
-#if L1_CACHE_BYTES >= 64
-	CSUM_COPY_16_BYTES_WITHEX(2)
-	CSUM_COPY_16_BYTES_WITHEX(3)
-#if L1_CACHE_BYTES >= 128
-	CSUM_COPY_16_BYTES_WITHEX(4)
-	CSUM_COPY_16_BYTES_WITHEX(5)
-	CSUM_COPY_16_BYTES_WITHEX(6)
-	CSUM_COPY_16_BYTES_WITHEX(7)
-#endif
-#endif
-#endif
-	bdnz	53b
-	cmpwi	r0,0
-	li	r3,4
-	li	r7,0
-	bne	114b
-
-63:	srwi.	r0,r5,2
-	mtctr	r0
-	beq	64f
-30:	lwzu	r0,4(r4)
-	adde	r12,r12,r0
-31:	stwu	r0,4(r6)
-	bdnz	30b
-
-64:	andi.	r0,r5,2
-	beq+	65f
-40:	lhz	r0,4(r4)
-	addi	r4,r4,2
-41:	sth	r0,4(r6)
-	adde	r12,r12,r0
-	addi	r6,r6,2
-65:	andi.	r0,r5,1
-	beq+	66f
-50:	lbz	r0,4(r4)
-51:	stb	r0,4(r6)
-	slwi	r0,r0,8
-	adde	r12,r12,r0
-66:	addze	r3,r12
-	addi	r1,r1,16
-	beqlr+	cr7
-	rlwinm	r3,r3,8,0,31	/* odd destination address: rotate one byte */
-	blr
-
-/* read fault */
-src_error:
-	lwz	r7,12(r1)
-	addi	r1,r1,16
-	cmpwi	cr0,r7,0
-	beqlr
-	li	r0,-EFAULT
-	stw	r0,0(r7)
-	blr
-/* write fault */
-dst_error:
-	lwz	r8,8(r1)
-	addi	r1,r1,16
-	cmpwi	cr0,r8,0
-	beqlr
-	li	r0,-EFAULT
-	stw	r0,0(r8)
-	blr
-
-	EX_TABLE(70b, src_error);
-	EX_TABLE(71b, dst_error);
-	EX_TABLE(72b, src_error);
-	EX_TABLE(73b, dst_error);
-	EX_TABLE(54b, dst_error);
-
-/*
- * this stuff handles faults in the cacheline loop and branches to either
- * src_error (if in read part) or dst_error (if in write part)
- */
-	CSUM_COPY_16_BYTES_EXCODE(0)
-#if L1_CACHE_BYTES >= 32
-	CSUM_COPY_16_BYTES_EXCODE(1)
-#if L1_CACHE_BYTES >= 64
-	CSUM_COPY_16_BYTES_EXCODE(2)
-	CSUM_COPY_16_BYTES_EXCODE(3)
-#if L1_CACHE_BYTES >= 128
-	CSUM_COPY_16_BYTES_EXCODE(4)
-	CSUM_COPY_16_BYTES_EXCODE(5)
-	CSUM_COPY_16_BYTES_EXCODE(6)
-	CSUM_COPY_16_BYTES_EXCODE(7)
-#endif
-#endif
-#endif
-
-	EX_TABLE(30b, src_error);
-	EX_TABLE(31b, dst_error);
-	EX_TABLE(40b, src_error);
-	EX_TABLE(41b, dst_error);
-	EX_TABLE(50b, src_error);
-	EX_TABLE(51b, dst_error);
-
-EXPORT_SYMBOL(csum_partial_copy_generic)
-
-/*
- * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
- *			   const struct in6_addr *daddr,
- *			   __u32 len, __u8 proto, __wsum sum)
- */
-
-_GLOBAL(csum_ipv6_magic)
-	lwz	r8, 0(r3)
-	lwz	r9, 4(r3)
-	addc	r0, r7, r8
-	lwz	r10, 8(r3)
-	adde	r0, r0, r9
-	lwz	r11, 12(r3)
-	adde	r0, r0, r10
-	lwz	r8, 0(r4)
-	adde	r0, r0, r11
-	lwz	r9, 4(r4)
-	adde	r0, r0, r8
-	lwz	r10, 8(r4)
-	adde	r0, r0, r9
-	lwz	r11, 12(r4)
-	adde	r0, r0, r10
-	add	r5, r5, r6	/* assumption: len + proto doesn't carry */
-	adde	r0, r0, r11
-	adde	r0, r0, r5
-	addze	r0, r0
-	rotlwi	r3, r0, 16
-	add	r3, r0, r3
-	not	r3, r3
-	rlwinm	r3, r3, 16, 16, 31
-	blr
-EXPORT_SYMBOL(csum_ipv6_magic)
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
deleted file mode 100644
index 514978f908d4edb84a1e1f62e480f4c845aa16a1..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/checksum_64.S
+++ /dev/null
@@ -1,458 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains assembly-language implementations
- * of IP-style 1's complement checksum routines.
- *	
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
- */
-
-#include <linux/sys.h>
-#include <asm/processor.h>
-#include <asm/errno.h>
-#include <asm/ppc_asm.h>
-#include <asm/export.h>
-
-/*
- * Computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit).
- *
- * __csum_partial(r3=buff, r4=len, r5=sum)
- */
-_GLOBAL(__csum_partial)
-	addic	r0,r5,0			/* clear carry */
-
-	srdi.	r6,r4,3			/* less than 8 bytes? */
-	beq	.Lcsum_tail_word
-
-	/*
-	 * If only halfword aligned, align to a double word. Since odd
-	 * aligned addresses should be rare and they would require more
-	 * work to calculate the correct checksum, we ignore that case
-	 * and take the potential slowdown of unaligned loads.
-	 */
-	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 >> 1) & 0x3 */
-	beq	.Lcsum_aligned
-
-	li	r7,4
-	sub	r6,r7,r6
-	mtctr	r6
-
-1:
-	lhz	r6,0(r3)		/* align to doubleword */
-	subi	r4,r4,2
-	addi	r3,r3,2
-	adde	r0,r0,r6
-	bdnz	1b
-
-.Lcsum_aligned:
-	/*
-	 * We unroll the loop such that each iteration is 64 bytes with an
-	 * entry and exit limb of 64 bytes, meaning a minimum size of
-	 * 128 bytes.
-	 */
-	srdi.	r6,r4,7
-	beq	.Lcsum_tail_doublewords		/* len < 128 */
-
-	srdi	r6,r4,6
-	subi	r6,r6,1
-	mtctr	r6
-
-	stdu	r1,-STACKFRAMESIZE(r1)
-	std	r14,STK_REG(R14)(r1)
-	std	r15,STK_REG(R15)(r1)
-	std	r16,STK_REG(R16)(r1)
-
-	ld	r6,0(r3)
-	ld	r9,8(r3)
-
-	ld	r10,16(r3)
-	ld	r11,24(r3)
-
-	/*
-	 * On POWER6 and POWER7 back to back adde instructions take 2 cycles
-	 * because of the XER dependency. This means the fastest this loop can
-	 * go is 16 cycles per iteration. The scheduling of the loop below has
-	 * been shown to hit this on both POWER6 and POWER7.
-	 */
-	.align 5
-2:
-	adde	r0,r0,r6
-	ld	r12,32(r3)
-	ld	r14,40(r3)
-
-	adde	r0,r0,r9
-	ld	r15,48(r3)
-	ld	r16,56(r3)
-	addi	r3,r3,64
-
-	adde	r0,r0,r10
-
-	adde	r0,r0,r11
-
-	adde	r0,r0,r12
-
-	adde	r0,r0,r14
-
-	adde	r0,r0,r15
-	ld	r6,0(r3)
-	ld	r9,8(r3)
-
-	adde	r0,r0,r16
-	ld	r10,16(r3)
-	ld	r11,24(r3)
-	bdnz	2b
-
-
-	adde	r0,r0,r6
-	ld	r12,32(r3)
-	ld	r14,40(r3)
-
-	adde	r0,r0,r9
-	ld	r15,48(r3)
-	ld	r16,56(r3)
-	addi	r3,r3,64
-
-	adde	r0,r0,r10
-	adde	r0,r0,r11
-	adde	r0,r0,r12
-	adde	r0,r0,r14
-	adde	r0,r0,r15
-	adde	r0,r0,r16
-
-	ld	r14,STK_REG(R14)(r1)
-	ld	r15,STK_REG(R15)(r1)
-	ld	r16,STK_REG(R16)(r1)
-	addi	r1,r1,STACKFRAMESIZE
-
-	andi.	r4,r4,63
-
-.Lcsum_tail_doublewords:		/* Up to 127 bytes to go */
-	srdi.	r6,r4,3
-	beq	.Lcsum_tail_word
-
-	mtctr	r6
-3:
-	ld	r6,0(r3)
-	addi	r3,r3,8
-	adde	r0,r0,r6
-	bdnz	3b
-
-	andi.	r4,r4,7
-
-.Lcsum_tail_word:			/* Up to 7 bytes to go */
-	srdi.	r6,r4,2
-	beq	.Lcsum_tail_halfword
-
-	lwz	r6,0(r3)
-	addi	r3,r3,4
-	adde	r0,r0,r6
-	subi	r4,r4,4
-
-.Lcsum_tail_halfword:			/* Up to 3 bytes to go */
-	srdi.	r6,r4,1
-	beq	.Lcsum_tail_byte
-
-	lhz	r6,0(r3)
-	addi	r3,r3,2
-	adde	r0,r0,r6
-	subi	r4,r4,2
-
-.Lcsum_tail_byte:			/* Up to 1 byte to go */
-	andi.	r6,r4,1
-	beq	.Lcsum_finish
-
-	lbz	r6,0(r3)
-#ifdef __BIG_ENDIAN__
-	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
-	adde	r0,r0,r9
-#else
-	adde	r0,r0,r6
-#endif
-
-.Lcsum_finish:
-	addze	r0,r0			/* add in final carry */
-	rldicl	r4,r0,32,0		/* fold two 32 bit halves together */
-	add	r3,r4,r0
-	srdi	r3,r3,32
-	blr
-EXPORT_SYMBOL(__csum_partial)
-
-
-	.macro srcnr
-100:
-	EX_TABLE(100b,.Lsrc_error_nr)
-	.endm
-
-	.macro source
-150:
-	EX_TABLE(150b,.Lsrc_error)
-	.endm
-
-	.macro dstnr
-200:
-	EX_TABLE(200b,.Ldest_error_nr)
-	.endm
-
-	.macro dest
-250:
-	EX_TABLE(250b,.Ldest_error)
-	.endm
-
-/*
- * Computes the checksum of a memory block at src, length len,
- * and adds in "sum" (32-bit), while copying the block to dst.
- * If an access exception occurs on src or dst, it stores -EFAULT
- * to *src_err or *dst_err respectively. The caller must take any action
- * required in this case (zeroing memory, recalculating partial checksum etc).
- *
- * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err)
- */
-_GLOBAL(csum_partial_copy_generic)
-	addic	r0,r6,0			/* clear carry */
-
-	srdi.	r6,r5,3			/* less than 8 bytes? */
-	beq	.Lcopy_tail_word
-
-	/*
-	 * If only halfword aligned, align to a double word. Since odd
-	 * aligned addresses should be rare and they would require more
-	 * work to calculate the correct checksum, we ignore that case
-	 * and take the potential slowdown of unaligned loads.
-	 *
-	 * If the source and destination are relatively unaligned we only
-	 * align the source. This keeps things simple.
-	 */
-	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 >> 1) & 0x3 */
-	beq	.Lcopy_aligned
-
-	li	r9,4
-	sub	r6,r9,r6
-	mtctr	r6
-
-1:
-srcnr;	lhz	r6,0(r3)		/* align to doubleword */
-	subi	r5,r5,2
-	addi	r3,r3,2
-	adde	r0,r0,r6
-dstnr;	sth	r6,0(r4)
-	addi	r4,r4,2
-	bdnz	1b
-
-.Lcopy_aligned:
-	/*
-	 * We unroll the loop such that each iteration is 64 bytes with an
-	 * entry and exit limb of 64 bytes, meaning a minimum size of
-	 * 128 bytes.
-	 */
-	srdi.	r6,r5,7
-	beq	.Lcopy_tail_doublewords		/* len < 128 */
-
-	srdi	r6,r5,6
-	subi	r6,r6,1
-	mtctr	r6
-
-	stdu	r1,-STACKFRAMESIZE(r1)
-	std	r14,STK_REG(R14)(r1)
-	std	r15,STK_REG(R15)(r1)
-	std	r16,STK_REG(R16)(r1)
-
-source;	ld	r6,0(r3)
-source;	ld	r9,8(r3)
-
-source;	ld	r10,16(r3)
-source;	ld	r11,24(r3)
-
-	/*
-	 * On POWER6 and POWER7 back to back adde instructions take 2 cycles
-	 * because of the XER dependency. This means the fastest this loop can
-	 * go is 16 cycles per iteration. The scheduling of the loop below has
-	 * been shown to hit this on both POWER6 and POWER7.
-	 */
-	.align 5
-2:
-	adde	r0,r0,r6
-source;	ld	r12,32(r3)
-source;	ld	r14,40(r3)
-
-	adde	r0,r0,r9
-source;	ld	r15,48(r3)
-source;	ld	r16,56(r3)
-	addi	r3,r3,64
-
-	adde	r0,r0,r10
-dest;	std	r6,0(r4)
-dest;	std	r9,8(r4)
-
-	adde	r0,r0,r11
-dest;	std	r10,16(r4)
-dest;	std	r11,24(r4)
-
-	adde	r0,r0,r12
-dest;	std	r12,32(r4)
-dest;	std	r14,40(r4)
-
-	adde	r0,r0,r14
-dest;	std	r15,48(r4)
-dest;	std	r16,56(r4)
-	addi	r4,r4,64
-
-	adde	r0,r0,r15
-source;	ld	r6,0(r3)
-source;	ld	r9,8(r3)
-
-	adde	r0,r0,r16
-source;	ld	r10,16(r3)
-source;	ld	r11,24(r3)
-	bdnz	2b
-
-
-	adde	r0,r0,r6
-source;	ld	r12,32(r3)
-source;	ld	r14,40(r3)
-
-	adde	r0,r0,r9
-source;	ld	r15,48(r3)
-source;	ld	r16,56(r3)
-	addi	r3,r3,64
-
-	adde	r0,r0,r10
-dest;	std	r6,0(r4)
-dest;	std	r9,8(r4)
-
-	adde	r0,r0,r11
-dest;	std	r10,16(r4)
-dest;	std	r11,24(r4)
-
-	adde	r0,r0,r12
-dest;	std	r12,32(r4)
-dest;	std	r14,40(r4)
-
-	adde	r0,r0,r14
-dest;	std	r15,48(r4)
-dest;	std	r16,56(r4)
-	addi	r4,r4,64
-
-	adde	r0,r0,r15
-	adde	r0,r0,r16
-
-	ld	r14,STK_REG(R14)(r1)
-	ld	r15,STK_REG(R15)(r1)
-	ld	r16,STK_REG(R16)(r1)
-	addi	r1,r1,STACKFRAMESIZE
-
-	andi.	r5,r5,63
-
-.Lcopy_tail_doublewords:		/* Up to 127 bytes to go */
-	srdi.	r6,r5,3
-	beq	.Lcopy_tail_word
-
-	mtctr	r6
-3:
-srcnr;	ld	r6,0(r3)
-	addi	r3,r3,8
-	adde	r0,r0,r6
-dstnr;	std	r6,0(r4)
-	addi	r4,r4,8
-	bdnz	3b
-
-	andi.	r5,r5,7
-
-.Lcopy_tail_word:			/* Up to 7 bytes to go */
-	srdi.	r6,r5,2
-	beq	.Lcopy_tail_halfword
-
-srcnr;	lwz	r6,0(r3)
-	addi	r3,r3,4
-	adde	r0,r0,r6
-dstnr;	stw	r6,0(r4)
-	addi	r4,r4,4
-	subi	r5,r5,4
-
-.Lcopy_tail_halfword:			/* Up to 3 bytes to go */
-	srdi.	r6,r5,1
-	beq	.Lcopy_tail_byte
-
-srcnr;	lhz	r6,0(r3)
-	addi	r3,r3,2
-	adde	r0,r0,r6
-dstnr;	sth	r6,0(r4)
-	addi	r4,r4,2
-	subi	r5,r5,2
-
-.Lcopy_tail_byte:			/* Up to 1 byte to go */
-	andi.	r6,r5,1
-	beq	.Lcopy_finish
-
-srcnr;	lbz	r6,0(r3)
-#ifdef __BIG_ENDIAN__
-	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
-	adde	r0,r0,r9
-#else
-	adde	r0,r0,r6
-#endif
-dstnr;	stb	r6,0(r4)
-
-.Lcopy_finish:
-	addze	r0,r0			/* add in final carry */
-	rldicl	r4,r0,32,0		/* fold two 32 bit halves together */
-	add	r3,r4,r0
-	srdi	r3,r3,32
-	blr
-
-.Lsrc_error:
-	ld	r14,STK_REG(R14)(r1)
-	ld	r15,STK_REG(R15)(r1)
-	ld	r16,STK_REG(R16)(r1)
-	addi	r1,r1,STACKFRAMESIZE
-.Lsrc_error_nr:
-	cmpdi	0,r7,0
-	beqlr
-	li	r6,-EFAULT
-	stw	r6,0(r7)
-	blr
-
-.Ldest_error:
-	ld	r14,STK_REG(R14)(r1)
-	ld	r15,STK_REG(R15)(r1)
-	ld	r16,STK_REG(R16)(r1)
-	addi	r1,r1,STACKFRAMESIZE
-.Ldest_error_nr:
-	cmpdi	0,r8,0
-	beqlr
-	li	r6,-EFAULT
-	stw	r6,0(r8)
-	blr
-EXPORT_SYMBOL(csum_partial_copy_generic)
-
-/*
- * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
- *			   const struct in6_addr *daddr,
- *			   __u32 len, __u8 proto, __wsum sum)
- */
-
-_GLOBAL(csum_ipv6_magic)
-	ld	r8, 0(r3)
-	ld	r9, 8(r3)
-	add	r5, r5, r6
-	addc	r0, r8, r9
-	ld	r10, 0(r4)
-	ld	r11, 8(r4)
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-	rotldi	r5, r5, 8
-#endif
-	adde	r0, r0, r10
-	add	r5, r5, r7
-	adde	r0, r0, r11
-	adde	r0, r0, r5
-	addze	r0, r0
-	rotldi  r3, r0, 32		/* fold two 32 bit halves together */
-	add	r3, r0, r3
-	srdi	r0, r3, 32
-	rotlwi	r3, r0, 16		/* fold two 16 bit halves together */
-	add	r3, r0, r3
-	not	r3, r3
-	rlwinm	r3, r3, 16, 16, 31
-	blr
-EXPORT_SYMBOL(csum_ipv6_magic)
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
deleted file mode 100644
index a3bcf4786e4aadf39398592ba039d7c7b6b7aaea..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/copy_32.S
+++ /dev/null
@@ -1,518 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Memory copy functions for 32-bit PowerPC.
- *
- * Copyright (C) 1996-2005 Paul Mackerras.
- */
-#include <asm/processor.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
-#include <asm/ppc_asm.h>
-#include <asm/export.h>
-#include <asm/code-patching-asm.h>
-#include <asm/kasan.h>
-
-#define COPY_16_BYTES		\
-	lwz	r7,4(r4);	\
-	lwz	r8,8(r4);	\
-	lwz	r9,12(r4);	\
-	lwzu	r10,16(r4);	\
-	stw	r7,4(r6);	\
-	stw	r8,8(r6);	\
-	stw	r9,12(r6);	\
-	stwu	r10,16(r6)
-
-#define COPY_16_BYTES_WITHEX(n)	\
-8 ## n ## 0:			\
-	lwz	r7,4(r4);	\
-8 ## n ## 1:			\
-	lwz	r8,8(r4);	\
-8 ## n ## 2:			\
-	lwz	r9,12(r4);	\
-8 ## n ## 3:			\
-	lwzu	r10,16(r4);	\
-8 ## n ## 4:			\
-	stw	r7,4(r6);	\
-8 ## n ## 5:			\
-	stw	r8,8(r6);	\
-8 ## n ## 6:			\
-	stw	r9,12(r6);	\
-8 ## n ## 7:			\
-	stwu	r10,16(r6)
-
-#define COPY_16_BYTES_EXCODE(n)			\
-9 ## n ## 0:					\
-	addi	r5,r5,-(16 * n);		\
-	b	104f;				\
-9 ## n ## 1:					\
-	addi	r5,r5,-(16 * n);		\
-	b	105f;				\
-	EX_TABLE(8 ## n ## 0b,9 ## n ## 0b);	\
-	EX_TABLE(8 ## n ## 1b,9 ## n ## 0b);	\
-	EX_TABLE(8 ## n ## 2b,9 ## n ## 0b);	\
-	EX_TABLE(8 ## n ## 3b,9 ## n ## 0b);	\
-	EX_TABLE(8 ## n ## 4b,9 ## n ## 1b);	\
-	EX_TABLE(8 ## n ## 5b,9 ## n ## 1b);	\
-	EX_TABLE(8 ## n ## 6b,9 ## n ## 1b);	\
-	EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
-
-	.text
-	.stabs	"arch/powerpc/lib/",N_SO,0,0,0f
-	.stabs	"copy_32.S",N_SO,0,0,0f
-0:
-
-CACHELINE_BYTES = L1_CACHE_BYTES
-LG_CACHELINE_BYTES = L1_CACHE_SHIFT
-CACHELINE_MASK = (L1_CACHE_BYTES-1)
-
-#ifndef CONFIG_KASAN
-_GLOBAL(memset16)
-	rlwinm.	r0 ,r5, 31, 1, 31
-	addi	r6, r3, -4
-	beq-	2f
-	rlwimi	r4 ,r4 ,16 ,0 ,15
-	mtctr	r0
-1:	stwu	r4, 4(r6)
-	bdnz	1b
-2:	andi.	r0, r5, 1
-	beqlr
-	sth	r4, 4(r6)
-	blr
-EXPORT_SYMBOL(memset16)
-#endif
-
-/*
- * Use dcbz on the complete cache lines in the destination
- * to set them to zero.  This requires that the destination
- * area is cacheable.  -- paulus
- *
- * During early init, cache might not be active yet, so dcbz cannot be used.
- * We therefore skip the optimised bloc that uses dcbz. This jump is
- * replaced by a nop once cache is active. This is done in machine_init()
- */
-_GLOBAL_KASAN(memset)
-	cmplwi	0,r5,4
-	blt	7f
-
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
-
-	stw	r4,0(r3)
-	beqlr
-	andi.	r0,r3,3
-	add	r5,r0,r5
-	subf	r6,r0,r3
-	cmplwi	0,r4,0
-	/*
-	 * Skip optimised bloc until cache is enabled. Will be replaced
-	 * by 'bne' during boot to use normal procedure if r4 is not zero
-	 */
-5:	b	2f
-	patch_site	5b, patch__memset_nocache
-
-	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
-	add	r8,r7,r5
-	srwi	r9,r8,LG_CACHELINE_BYTES
-	addic.	r9,r9,-1	/* total number of complete cachelines */
-	ble	2f
-	xori	r0,r7,CACHELINE_MASK & ~3
-	srwi.	r0,r0,2
-	beq	3f
-	mtctr	r0
-4:	stwu	r4,4(r6)
-	bdnz	4b
-3:	mtctr	r9
-	li	r7,4
-10:	dcbz	r7,r6
-	addi	r6,r6,CACHELINE_BYTES
-	bdnz	10b
-	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
-	addi	r5,r5,4
-
-2:	srwi	r0,r5,2
-	mtctr	r0
-	bdz	6f
-1:	stwu	r4,4(r6)
-	bdnz	1b
-6:	andi.	r5,r5,3
-	beqlr
-	mtctr	r5
-	addi	r6,r6,3
-8:	stbu	r4,1(r6)
-	bdnz	8b
-	blr
-
-7:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r6,r3,-1
-9:	stbu	r4,1(r6)
-	bdnz	9b
-	blr
-EXPORT_SYMBOL(memset)
-EXPORT_SYMBOL_KASAN(memset)
-
-/*
- * This version uses dcbz on the complete cache lines in the
- * destination area to reduce memory traffic.  This requires that
- * the destination area is cacheable.
- * We only use this version if the source and dest don't overlap.
- * -- paulus.
- *
- * During early init, cache might not be active yet, so dcbz cannot be used.
- * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
- * replaced by a nop once cache is active. This is done in machine_init()
- */
-_GLOBAL_KASAN(memmove)
-	cmplw	0,r3,r4
-	bgt	backwards_memcpy
-	/* fall through */
-
-_GLOBAL_KASAN(memcpy)
-1:	b	generic_memcpy
-	patch_site	1b, patch__memcpy_nocache
-
-	add	r7,r3,r5		/* test if the src & dst overlap */
-	add	r8,r4,r5
-	cmplw	0,r4,r7
-	cmplw	1,r3,r8
-	crand	0,0,4			/* cr0.lt &= cr1.lt */
-	blt	generic_memcpy		/* if regions overlap */
-
-	addi	r4,r4,-4
-	addi	r6,r3,-4
-	neg	r0,r3
-	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
-	beq	58f
-
-	cmplw	0,r5,r0			/* is this more than total to do? */
-	blt	63f			/* if not much to do */
-	andi.	r8,r0,3			/* get it word-aligned first */
-	subf	r5,r0,r5
-	mtctr	r8
-	beq+	61f
-70:	lbz	r9,4(r4)		/* do some bytes */
-	addi	r4,r4,1
-	addi	r6,r6,1
-	stb	r9,3(r6)
-	bdnz	70b
-61:	srwi.	r0,r0,2
-	mtctr	r0
-	beq	58f
-72:	lwzu	r9,4(r4)		/* do some words */
-	stwu	r9,4(r6)
-	bdnz	72b
-
-58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
-	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
-	li	r11,4
-	mtctr	r0
-	beq	63f
-53:
-	dcbz	r11,r6
-	COPY_16_BYTES
-#if L1_CACHE_BYTES >= 32
-	COPY_16_BYTES
-#if L1_CACHE_BYTES >= 64
-	COPY_16_BYTES
-	COPY_16_BYTES
-#if L1_CACHE_BYTES >= 128
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-#endif
-#endif
-#endif
-	bdnz	53b
-
-63:	srwi.	r0,r5,2
-	mtctr	r0
-	beq	64f
-30:	lwzu	r0,4(r4)
-	stwu	r0,4(r6)
-	bdnz	30b
-
-64:	andi.	r0,r5,3
-	mtctr	r0
-	beq+	65f
-	addi	r4,r4,3
-	addi	r6,r6,3
-40:	lbzu	r0,1(r4)
-	stbu	r0,1(r6)
-	bdnz	40b
-65:	blr
-EXPORT_SYMBOL(memcpy)
-EXPORT_SYMBOL(memmove)
-EXPORT_SYMBOL_KASAN(memcpy)
-EXPORT_SYMBOL_KASAN(memmove)
-
-generic_memcpy:
-	srwi.	r7,r5,3
-	addi	r6,r3,-4
-	addi	r4,r4,-4
-	beq	2f			/* if less than 8 bytes to do */
-	andi.	r0,r6,3			/* get dest word aligned */
-	mtctr	r7
-	bne	5f
-1:	lwz	r7,4(r4)
-	lwzu	r8,8(r4)
-	stw	r7,4(r6)
-	stwu	r8,8(r6)
-	bdnz	1b
-	andi.	r5,r5,7
-2:	cmplwi	0,r5,4
-	blt	3f
-	lwzu	r0,4(r4)
-	addi	r5,r5,-4
-	stwu	r0,4(r6)
-3:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r4,r4,3
-	addi	r6,r6,3
-4:	lbzu	r0,1(r4)
-	stbu	r0,1(r6)
-	bdnz	4b
-	blr
-5:	subfic	r0,r0,4
-	mtctr	r0
-6:	lbz	r7,4(r4)
-	addi	r4,r4,1
-	stb	r7,4(r6)
-	addi	r6,r6,1
-	bdnz	6b
-	subf	r5,r0,r5
-	rlwinm.	r7,r5,32-3,3,31
-	beq	2b
-	mtctr	r7
-	b	1b
-
-_GLOBAL(backwards_memcpy)
-	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
-	add	r6,r3,r5
-	add	r4,r4,r5
-	beq	2f
-	andi.	r0,r6,3
-	mtctr	r7
-	bne	5f
-1:	lwz	r7,-4(r4)
-	lwzu	r8,-8(r4)
-	stw	r7,-4(r6)
-	stwu	r8,-8(r6)
-	bdnz	1b
-	andi.	r5,r5,7
-2:	cmplwi	0,r5,4
-	blt	3f
-	lwzu	r0,-4(r4)
-	subi	r5,r5,4
-	stwu	r0,-4(r6)
-3:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-4:	lbzu	r0,-1(r4)
-	stbu	r0,-1(r6)
-	bdnz	4b
-	blr
-5:	mtctr	r0
-6:	lbzu	r7,-1(r4)
-	stbu	r7,-1(r6)
-	bdnz	6b
-	subf	r5,r0,r5
-	rlwinm.	r7,r5,32-3,3,31
-	beq	2b
-	mtctr	r7
-	b	1b
-
-_GLOBAL(__copy_tofrom_user)
-	addi	r4,r4,-4
-	addi	r6,r3,-4
-	neg	r0,r3
-	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
-	beq	58f
-
-	cmplw	0,r5,r0			/* is this more than total to do? */
-	blt	63f			/* if not much to do */
-	andi.	r8,r0,3			/* get it word-aligned first */
-	mtctr	r8
-	beq+	61f
-70:	lbz	r9,4(r4)		/* do some bytes */
-71:	stb	r9,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	70b
-61:	subf	r5,r0,r5
-	srwi.	r0,r0,2
-	mtctr	r0
-	beq	58f
-72:	lwzu	r9,4(r4)		/* do some words */
-73:	stwu	r9,4(r6)
-	bdnz	72b
-
-	EX_TABLE(70b,100f)
-	EX_TABLE(71b,101f)
-	EX_TABLE(72b,102f)
-	EX_TABLE(73b,103f)
-
-58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
-	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
-	li	r11,4
-	beq	63f
-
-	/* Here we decide how far ahead to prefetch the source */
-	li	r3,4
-	cmpwi	r0,1
-	li	r7,0
-	ble	114f
-	li	r7,1
-#if MAX_COPY_PREFETCH > 1
-	/* Heuristically, for large transfers we prefetch
-	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
-	   we prefetch 1 cacheline ahead. */
-	cmpwi	r0,MAX_COPY_PREFETCH
-	ble	112f
-	li	r7,MAX_COPY_PREFETCH
-112:	mtctr	r7
-111:	dcbt	r3,r4
-	addi	r3,r3,CACHELINE_BYTES
-	bdnz	111b
-#else
-	dcbt	r3,r4
-	addi	r3,r3,CACHELINE_BYTES
-#endif /* MAX_COPY_PREFETCH > 1 */
-
-114:	subf	r8,r7,r0
-	mr	r0,r7
-	mtctr	r8
-
-53:	dcbt	r3,r4
-54:	dcbz	r11,r6
-	EX_TABLE(54b,105f)
-/* the main body of the cacheline loop */
-	COPY_16_BYTES_WITHEX(0)
-#if L1_CACHE_BYTES >= 32
-	COPY_16_BYTES_WITHEX(1)
-#if L1_CACHE_BYTES >= 64
-	COPY_16_BYTES_WITHEX(2)
-	COPY_16_BYTES_WITHEX(3)
-#if L1_CACHE_BYTES >= 128
-	COPY_16_BYTES_WITHEX(4)
-	COPY_16_BYTES_WITHEX(5)
-	COPY_16_BYTES_WITHEX(6)
-	COPY_16_BYTES_WITHEX(7)
-#endif
-#endif
-#endif
-	bdnz	53b
-	cmpwi	r0,0
-	li	r3,4
-	li	r7,0
-	bne	114b
-
-63:	srwi.	r0,r5,2
-	mtctr	r0
-	beq	64f
-30:	lwzu	r0,4(r4)
-31:	stwu	r0,4(r6)
-	bdnz	30b
-
-64:	andi.	r0,r5,3
-	mtctr	r0
-	beq+	65f
-40:	lbz	r0,4(r4)
-41:	stb	r0,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	40b
-65:	li	r3,0
-	blr
-
-/* read fault, initial single-byte copy */
-100:	li	r9,0
-	b	90f
-/* write fault, initial single-byte copy */
-101:	li	r9,1
-90:	subf	r5,r8,r5
-	li	r3,0
-	b	99f
-/* read fault, initial word copy */
-102:	li	r9,0
-	b	91f
-/* write fault, initial word copy */
-103:	li	r9,1
-91:	li	r3,2
-	b	99f
-
-/*
- * this stuff handles faults in the cacheline loop and branches to either
- * 104f (if in read part) or 105f (if in write part), after updating r5
- */
-	COPY_16_BYTES_EXCODE(0)
-#if L1_CACHE_BYTES >= 32
-	COPY_16_BYTES_EXCODE(1)
-#if L1_CACHE_BYTES >= 64
-	COPY_16_BYTES_EXCODE(2)
-	COPY_16_BYTES_EXCODE(3)
-#if L1_CACHE_BYTES >= 128
-	COPY_16_BYTES_EXCODE(4)
-	COPY_16_BYTES_EXCODE(5)
-	COPY_16_BYTES_EXCODE(6)
-	COPY_16_BYTES_EXCODE(7)
-#endif
-#endif
-#endif
-
-/* read fault in cacheline loop */
-104:	li	r9,0
-	b	92f
-/* fault on dcbz (effectively a write fault) */
-/* or write fault in cacheline loop */
-105:	li	r9,1
-92:	li	r3,LG_CACHELINE_BYTES
-	mfctr	r8
-	add	r0,r0,r8
-	b	106f
-/* read fault in final word loop */
-108:	li	r9,0
-	b	93f
-/* write fault in final word loop */
-109:	li	r9,1
-93:	andi.	r5,r5,3
-	li	r3,2
-	b	99f
-/* read fault in final byte loop */
-110:	li	r9,0
-	b	94f
-/* write fault in final byte loop */
-111:	li	r9,1
-94:	li	r5,0
-	li	r3,0
-/*
- * At this stage the number of bytes not copied is
- * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
- */
-99:	mfctr	r0
-106:	slw	r3,r0,r3
-	add.	r3,r3,r5
-	beq	120f			/* shouldn't happen */
-	cmpwi	0,r9,0
-	bne	120f
-/* for a read fault, first try to continue the copy one byte at a time */
-	mtctr	r3
-130:	lbz	r0,4(r4)
-131:	stb	r0,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	130b
-/* then clear out the destination: r3 bytes starting at 4(r6) */
-132:	mfctr	r3
-120:	blr
-
-	EX_TABLE(30b,108b)
-	EX_TABLE(31b,109b)
-	EX_TABLE(40b,110b)
-	EX_TABLE(41b,111b)
-	EX_TABLE(130b,132b)
-	EX_TABLE(131b,120b)
-
-EXPORT_SYMBOL(__copy_tofrom_user)
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
deleted file mode 100644
index d1091b5ee5da9ae508416d48dd5c31b0b3992d85..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/copypage_64.S
+++ /dev/null
@@ -1,113 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2008 Mark Nelson, IBM Corp.
- */
-#include <asm/page.h>
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/export.h>
-#include <asm/feature-fixups.h>
-
-        .section        ".toc","aw"
-PPC64_CACHES:
-        .tc             ppc64_caches[TC],ppc64_caches
-        .section        ".text"
-
-_GLOBAL_TOC(copy_page)
-BEGIN_FTR_SECTION
-	lis	r5,PAGE_SIZE@h
-FTR_SECTION_ELSE
-#ifdef CONFIG_PPC_BOOK3S_64
-	b	copypage_power7
-#endif
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
-	ori	r5,r5,PAGE_SIZE@l
-BEGIN_FTR_SECTION
-	ld      r10,PPC64_CACHES@toc(r2)
-	lwz	r11,DCACHEL1LOGBLOCKSIZE(r10)	/* log2 of cache block size */
-	lwz     r12,DCACHEL1BLOCKSIZE(r10)	/* get cache block size */
-	li	r9,0
-	srd	r8,r5,r11
-
-	mtctr	r8
-.Lsetup:
-	dcbt	r9,r4
-	dcbz	r9,r3
-	add	r9,r9,r12
-	bdnz	.Lsetup
-END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
-	addi	r3,r3,-8
-	srdi    r8,r5,7		/* page is copied in 128 byte strides */
-	addi	r8,r8,-1	/* one stride copied outside loop */
-
-	mtctr	r8
-
-	ld	r5,0(r4)
-	ld	r6,8(r4)
-	ld	r7,16(r4)
-	ldu	r8,24(r4)
-1:	std	r5,8(r3)
-	std	r6,16(r3)
-	ld	r9,8(r4)
-	ld	r10,16(r4)
-	std	r7,24(r3)
-	std	r8,32(r3)
-	ld	r11,24(r4)
-	ld	r12,32(r4)
-	std	r9,40(r3)
-	std	r10,48(r3)
-	ld	r5,40(r4)
-	ld	r6,48(r4)
-	std	r11,56(r3)
-	std	r12,64(r3)
-	ld	r7,56(r4)
-	ld	r8,64(r4)
-	std	r5,72(r3)
-	std	r6,80(r3)
-	ld	r9,72(r4)
-	ld	r10,80(r4)
-	std	r7,88(r3)
-	std	r8,96(r3)
-	ld	r11,88(r4)
-	ld	r12,96(r4)
-	std	r9,104(r3)
-	std	r10,112(r3)
-	ld	r5,104(r4)
-	ld	r6,112(r4)
-	std	r11,120(r3)
-	stdu	r12,128(r3)
-	ld	r7,120(r4)
-	ldu	r8,128(r4)
-	bdnz	1b
-
-	std	r5,8(r3)
-	std	r6,16(r3)
-	ld	r9,8(r4)
-	ld	r10,16(r4)
-	std	r7,24(r3)
-	std	r8,32(r3)
-	ld	r11,24(r4)
-	ld	r12,32(r4)
-	std	r9,40(r3)
-	std	r10,48(r3)
-	ld	r5,40(r4)
-	ld	r6,48(r4)
-	std	r11,56(r3)
-	std	r12,64(r3)
-	ld	r7,56(r4)
-	ld	r8,64(r4)
-	std	r5,72(r3)
-	std	r6,80(r3)
-	ld	r9,72(r4)
-	ld	r10,80(r4)
-	std	r7,88(r3)
-	std	r8,96(r3)
-	ld	r11,88(r4)
-	ld	r12,96(r4)
-	std	r9,104(r3)
-	std	r10,112(r3)
-	std	r11,120(r3)
-	std	r12,128(r3)
-	blr
-EXPORT_SYMBOL(copy_page)
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
deleted file mode 100644
index a9844c6353cf2f12fc8623638f4327a073d189bf..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/copypage_power7.S
+++ /dev/null
@@ -1,153 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *
- * Copyright (C) IBM Corporation, 2012
- *
- * Author: Anton Blanchard <anton@au.ibm.com>
- */
-#include <asm/page.h>
-#include <asm/ppc_asm.h>
-
-_GLOBAL(copypage_power7)
-	/*
-	 * We prefetch both the source and destination using enhanced touch
-	 * instructions. We use a stream ID of 0 for the load side and
-	 * 1 for the store side. Since source and destination are page
-	 * aligned we don't need to clear the bottom 7 bits of either
-	 * address.
-	 */
-	ori	r9,r3,1		/* stream=1 => to */
-
-#ifdef CONFIG_PPC_64K_PAGES
-	lis	r7,0x0E01	/* depth=7
-				 * units/cachelines=512 */
-#else
-	lis	r7,0x0E00	/* depth=7 */
-	ori	r7,r7,0x1000	/* units/cachelines=32 */
-#endif
-	ori	r10,r7,1	/* stream=1 */
-
-	lis	r8,0x8000	/* GO=1 */
-	clrldi	r8,r8,32
-
-	/* setup read stream 0  */
-	dcbt	0,r4,0b01000  	/* addr from */
-	dcbt	0,r7,0b01010   /* length and depth from */
-	/* setup write stream 1 */
-	dcbtst	0,r9,0b01000   /* addr to */
-	dcbtst	0,r10,0b01010  /* length and depth to */
-	eieio
-	dcbt	0,r8,0b01010	/* all streams GO */
-
-#ifdef CONFIG_ALTIVEC
-	mflr	r0
-	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
-	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
-	std	r0,16(r1)
-	stdu	r1,-STACKFRAMESIZE(r1)
-	bl	enter_vmx_ops
-	cmpwi	r3,0
-	ld	r0,STACKFRAMESIZE+16(r1)
-	ld	r3,STK_REG(R31)(r1)
-	ld	r4,STK_REG(R30)(r1)
-	mtlr	r0
-
-	li	r0,(PAGE_SIZE/128)
-	mtctr	r0
-
-	beq	.Lnonvmx_copy
-
-	addi	r1,r1,STACKFRAMESIZE
-
-	li	r6,16
-	li	r7,32
-	li	r8,48
-	li	r9,64
-	li	r10,80
-	li	r11,96
-	li	r12,112
-
-	.align	5
-1:	lvx	v7,0,r4
-	lvx	v6,r4,r6
-	lvx	v5,r4,r7
-	lvx	v4,r4,r8
-	lvx	v3,r4,r9
-	lvx	v2,r4,r10
-	lvx	v1,r4,r11
-	lvx	v0,r4,r12
-	addi	r4,r4,128
-	stvx	v7,0,r3
-	stvx	v6,r3,r6
-	stvx	v5,r3,r7
-	stvx	v4,r3,r8
-	stvx	v3,r3,r9
-	stvx	v2,r3,r10
-	stvx	v1,r3,r11
-	stvx	v0,r3,r12
-	addi	r3,r3,128
-	bdnz	1b
-
-	b	exit_vmx_ops		/* tail call optimise */
-
-#else
-	li	r0,(PAGE_SIZE/128)
-	mtctr	r0
-
-	stdu	r1,-STACKFRAMESIZE(r1)
-#endif
-
-.Lnonvmx_copy:
-	std	r14,STK_REG(R14)(r1)
-	std	r15,STK_REG(R15)(r1)
-	std	r16,STK_REG(R16)(r1)
-	std	r17,STK_REG(R17)(r1)
-	std	r18,STK_REG(R18)(r1)
-	std	r19,STK_REG(R19)(r1)
-	std	r20,STK_REG(R20)(r1)
-
-1:	ld	r0,0(r4)
-	ld	r5,8(r4)
-	ld	r6,16(r4)
-	ld	r7,24(r4)
-	ld	r8,32(r4)
-	ld	r9,40(r4)
-	ld	r10,48(r4)
-	ld	r11,56(r4)
-	ld	r12,64(r4)
-	ld	r14,72(r4)
-	ld	r15,80(r4)
-	ld	r16,88(r4)
-	ld	r17,96(r4)
-	ld	r18,104(r4)
-	ld	r19,112(r4)
-	ld	r20,120(r4)
-	addi	r4,r4,128
-	std	r0,0(r3)
-	std	r5,8(r3)
-	std	r6,16(r3)
-	std	r7,24(r3)
-	std	r8,32(r3)
-	std	r9,40(r3)
-	std	r10,48(r3)
-	std	r11,56(r3)
-	std	r12,64(r3)
-	std	r14,72(r3)
-	std	r15,80(r3)
-	std	r16,88(r3)
-	std	r17,96(r3)
-	std	r18,104(r3)
-	std	r19,112(r3)
-	std	r20,120(r3)
-	addi	r3,r3,128
-	bdnz	1b
-
-	ld	r14,STK_REG(R14)(r1)
-	ld	r15,STK_REG(R15)(r1)
-	ld	r16,STK_REG(R16)(r1)
-	ld	r17,STK_REG(R17)(r1)
-	ld	r18,STK_REG(R18)(r1)
-	ld	r19,STK_REG(R19)(r1)
-	ld	r20,STK_REG(R20)(r1)
-	addi	r1,r1,STACKFRAMESIZE
-	blr
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
deleted file mode 100644
index db8719a14846da9897bac1f21a3430684f15d4bb..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/copyuser_64.S
+++ /dev/null
@@ -1,564 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2002 Paul Mackerras, IBM Corp.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/export.h>
-#include <asm/asm-compat.h>
-#include <asm/feature-fixups.h>
-
-#ifndef SELFTEST_CASE
-/* 0 == most CPUs, 1 == POWER6, 2 == Cell */
-#define SELFTEST_CASE	0
-#endif
-
-#ifdef __BIG_ENDIAN__
-#define sLd sld		/* Shift towards low-numbered address. */
-#define sHd srd		/* Shift towards high-numbered address. */
-#else
-#define sLd srd		/* Shift towards low-numbered address. */
-#define sHd sld		/* Shift towards high-numbered address. */
-#endif
-
-/*
- * These macros are used to generate exception table entries.
- * The exception handlers below use the original arguments
- * (stored on the stack) and the point where we're up to in
- * the destination buffer, i.e. the address of the first
- * unmodified byte.  Generally r3 points into the destination
- * buffer, but the first unmodified byte is at a variable
- * offset from r3.  In the code below, the symbol r3_offset
- * is set to indicate the current offset at each point in
- * the code.  This offset is then used as a negative offset
- * from the exception handler code, and those instructions
- * before the exception handlers are addi instructions that
- * adjust r3 to point to the correct place.
- */
-	.macro	lex		/* exception handler for load */
-100:	EX_TABLE(100b, .Lld_exc - r3_offset)
-	.endm
-
-	.macro	stex		/* exception handler for store */
-100:	EX_TABLE(100b, .Lst_exc - r3_offset)
-	.endm
-
-	.align	7
-_GLOBAL_TOC(__copy_tofrom_user)
-#ifdef CONFIG_PPC_BOOK3S_64
-BEGIN_FTR_SECTION
-	nop
-FTR_SECTION_ELSE
-	b	__copy_tofrom_user_power7
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
-#endif
-_GLOBAL(__copy_tofrom_user_base)
-	/* first check for a 4kB copy on a 4kB boundary */
-	cmpldi	cr1,r5,16
-	cmpdi	cr6,r5,4096
-	or	r0,r3,r4
-	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
-	andi.	r0,r0,4095
-	std	r3,-24(r1)
-	crand	cr0*4+2,cr0*4+2,cr6*4+2
-	std	r4,-16(r1)
-	std	r5,-8(r1)
-	dcbt	0,r4
-	beq	.Lcopy_page_4K
-	andi.	r6,r6,7
-	PPC_MTOCRF(0x01,r5)
-	blt	cr1,.Lshort_copy
-/* Below we want to nop out the bne if we're on a CPU that has the
- * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
- * cleared.
- * At the time of writing the only CPU that has this combination of bits
- * set is Power6.
- */
-test_feature = (SELFTEST_CASE == 1)
-BEGIN_FTR_SECTION
-	nop
-FTR_SECTION_ELSE
-	bne	.Ldst_unaligned
-ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
-		    CPU_FTR_UNALIGNED_LD_STD)
-.Ldst_aligned:
-	addi	r3,r3,-16
-r3_offset = 16
-test_feature = (SELFTEST_CASE == 0)
-BEGIN_FTR_SECTION
-	andi.	r0,r4,7
-	bne	.Lsrc_unaligned
-END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
-	blt	cr1,.Ldo_tail		/* if < 16 bytes to copy */
-	srdi	r0,r5,5
-	cmpdi	cr1,r0,0
-lex;	ld	r7,0(r4)
-lex;	ld	r6,8(r4)
-	addi	r4,r4,16
-	mtctr	r0
-	andi.	r0,r5,0x10
-	beq	22f
-	addi	r3,r3,16
-r3_offset = 0
-	addi	r4,r4,-16
-	mr	r9,r7
-	mr	r8,r6
-	beq	cr1,72f
-21:
-lex;	ld	r7,16(r4)
-lex;	ld	r6,24(r4)
-	addi	r4,r4,32
-stex;	std	r9,0(r3)
-r3_offset = 8
-stex;	std	r8,8(r3)
-r3_offset = 16
-22:
-lex;	ld	r9,0(r4)
-lex;	ld	r8,8(r4)
-stex;	std	r7,16(r3)
-r3_offset = 24
-stex;	std	r6,24(r3)
-	addi	r3,r3,32
-r3_offset = 0
-	bdnz	21b
-72:
-stex;	std	r9,0(r3)
-r3_offset = 8
-stex;	std	r8,8(r3)
-r3_offset = 16
-	andi.	r5,r5,0xf
-	beq+	3f
-	addi	r4,r4,16
-.Ldo_tail:
-	addi	r3,r3,16
-r3_offset = 0
-	bf	cr7*4+0,246f
-lex;	ld	r9,0(r4)
-	addi	r4,r4,8
-stex;	std	r9,0(r3)
-	addi	r3,r3,8
-246:	bf	cr7*4+1,1f
-lex;	lwz	r9,0(r4)
-	addi	r4,r4,4
-stex;	stw	r9,0(r3)
-	addi	r3,r3,4
-1:	bf	cr7*4+2,2f
-lex;	lhz	r9,0(r4)
-	addi	r4,r4,2
-stex;	sth	r9,0(r3)
-	addi	r3,r3,2
-2:	bf	cr7*4+3,3f
-lex;	lbz	r9,0(r4)
-stex;	stb	r9,0(r3)
-3:	li	r3,0
-	blr
-
-.Lsrc_unaligned:
-r3_offset = 16
-	srdi	r6,r5,3
-	addi	r5,r5,-16
-	subf	r4,r0,r4
-	srdi	r7,r5,4
-	sldi	r10,r0,3
-	cmpldi	cr6,r6,3
-	andi.	r5,r5,7
-	mtctr	r7
-	subfic	r11,r10,64
-	add	r5,r5,r0
-	bt	cr7*4+0,28f
-
-lex;	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
-lex;	ld	r0,8(r4)
-	sLd	r6,r9,r10
-lex;	ldu	r9,16(r4)
-	sHd	r7,r0,r11
-	sLd	r8,r0,r10
-	or	r7,r7,r6
-	blt	cr6,79f
-lex;	ld	r0,8(r4)
-	b	2f
-
-28:
-lex;	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
-lex;	ldu	r9,8(r4)
-	sLd	r8,r0,r10
-	addi	r3,r3,-8
-r3_offset = 24
-	blt	cr6,5f
-lex;	ld	r0,8(r4)
-	sHd	r12,r9,r11
-	sLd	r6,r9,r10
-lex;	ldu	r9,16(r4)
-	or	r12,r8,r12
-	sHd	r7,r0,r11
-	sLd	r8,r0,r10
-	addi	r3,r3,16
-r3_offset = 8
-	beq	cr6,78f
-
-1:	or	r7,r7,r6
-lex;	ld	r0,8(r4)
-stex;	std	r12,8(r3)
-r3_offset = 16
-2:	sHd	r12,r9,r11
-	sLd	r6,r9,r10
-lex;	ldu	r9,16(r4)
-	or	r12,r8,r12
-stex;	stdu	r7,16(r3)
-r3_offset = 8
-	sHd	r7,r0,r11
-	sLd	r8,r0,r10
-	bdnz	1b
-
-78:
-stex;	std	r12,8(r3)
-r3_offset = 16
-	or	r7,r7,r6
-79:
-stex;	std	r7,16(r3)
-r3_offset = 24
-5:	sHd	r12,r9,r11
-	or	r12,r8,r12
-stex;	std	r12,24(r3)
-r3_offset = 32
-	bne	6f
-	li	r3,0
-	blr
-6:	cmpwi	cr1,r5,8
-	addi	r3,r3,32
-r3_offset = 0
-	sLd	r9,r9,r10
-	ble	cr1,7f
-lex;	ld	r0,8(r4)
-	sHd	r7,r0,r11
-	or	r9,r7,r9
-7:
-	bf	cr7*4+1,1f
-#ifdef __BIG_ENDIAN__
-	rotldi	r9,r9,32
-#endif
-stex;	stw	r9,0(r3)
-#ifdef __LITTLE_ENDIAN__
-	rotrdi	r9,r9,32
-#endif
-	addi	r3,r3,4
-1:	bf	cr7*4+2,2f
-#ifdef __BIG_ENDIAN__
-	rotldi	r9,r9,16
-#endif
-stex;	sth	r9,0(r3)
-#ifdef __LITTLE_ENDIAN__
-	rotrdi	r9,r9,16
-#endif
-	addi	r3,r3,2
-2:	bf	cr7*4+3,3f
-#ifdef __BIG_ENDIAN__
-	rotldi	r9,r9,8
-#endif
-stex;	stb	r9,0(r3)
-#ifdef __LITTLE_ENDIAN__
-	rotrdi	r9,r9,8
-#endif
-3:	li	r3,0
-	blr
-
-.Ldst_unaligned:
-r3_offset = 0
-	PPC_MTOCRF(0x01,r6)		/* put #bytes to 8B bdry into cr7 */
-	subf	r5,r6,r5
-	li	r7,0
-	cmpldi	cr1,r5,16
-	bf	cr7*4+3,1f
-100:	EX_TABLE(100b, .Lld_exc_r7)
-	lbz	r0,0(r4)
-100:	EX_TABLE(100b, .Lst_exc_r7)
-	stb	r0,0(r3)
-	addi	r7,r7,1
-1:	bf	cr7*4+2,2f
-100:	EX_TABLE(100b, .Lld_exc_r7)
-	lhzx	r0,r7,r4
-100:	EX_TABLE(100b, .Lst_exc_r7)
-	sthx	r0,r7,r3
-	addi	r7,r7,2
-2:	bf	cr7*4+1,3f
-100:	EX_TABLE(100b, .Lld_exc_r7)
-	lwzx	r0,r7,r4
-100:	EX_TABLE(100b, .Lst_exc_r7)
-	stwx	r0,r7,r3
-3:	PPC_MTOCRF(0x01,r5)
-	add	r4,r6,r4
-	add	r3,r6,r3
-	b	.Ldst_aligned
-
-.Lshort_copy:
-r3_offset = 0
-	bf	cr7*4+0,1f
-lex;	lwz	r0,0(r4)
-lex;	lwz	r9,4(r4)
-	addi	r4,r4,8
-stex;	stw	r0,0(r3)
-stex;	stw	r9,4(r3)
-	addi	r3,r3,8
-1:	bf	cr7*4+1,2f
-lex;	lwz	r0,0(r4)
-	addi	r4,r4,4
-stex;	stw	r0,0(r3)
-	addi	r3,r3,4
-2:	bf	cr7*4+2,3f
-lex;	lhz	r0,0(r4)
-	addi	r4,r4,2
-stex;	sth	r0,0(r3)
-	addi	r3,r3,2
-3:	bf	cr7*4+3,4f
-lex;	lbz	r0,0(r4)
-stex;	stb	r0,0(r3)
-4:	li	r3,0
-	blr
-
-/*
- * exception handlers follow
- * we have to return the number of bytes not copied
- * for an exception on a load, we set the rest of the destination to 0
- * Note that the number of bytes of instructions for adjusting r3 needs
- * to equal the amount of the adjustment, due to the trick of using
- * .Lld_exc - r3_offset as the handler address.
- */
-
-.Lld_exc_r7:
-	add	r3,r3,r7
-	b	.Lld_exc
-
-	/* adjust by 24 */
-	addi	r3,r3,8
-	nop
-	/* adjust by 16 */
-	addi	r3,r3,8
-	nop
-	/* adjust by 8 */
-	addi	r3,r3,8
-	nop
-
-/*
- * Here we have had a fault on a load and r3 points to the first
- * unmodified byte of the destination.  We use the original arguments
- * and r3 to work out how much wasn't copied.  Since we load some
- * distance ahead of the stores, we continue copying byte-by-byte until
- * we hit the load fault again in order to copy as much as possible.
- */
-.Lld_exc:
-	ld	r6,-24(r1)
-	ld	r4,-16(r1)
-	ld	r5,-8(r1)
-	subf	r6,r6,r3
-	add	r4,r4,r6
-	subf	r5,r6,r5	/* #bytes left to go */
-
-/*
- * first see if we can copy any more bytes before hitting another exception
- */
-	mtctr	r5
-r3_offset = 0
-100:	EX_TABLE(100b, .Ldone)
-43:	lbz	r0,0(r4)
-	addi	r4,r4,1
-stex;	stb	r0,0(r3)
-	addi	r3,r3,1
-	bdnz	43b
-	li	r3,0		/* huh? all copied successfully this time? */
-	blr
-
-/*
- * here we have trapped again, amount remaining is in ctr.
- */
-.Ldone:
-	mfctr	r3
-	blr
-
-/*
- * exception handlers for stores: we need to work out how many bytes
- * weren't copied, and we may need to copy some more.
- * Note that the number of bytes of instructions for adjusting r3 needs
- * to equal the amount of the adjustment, due to the trick of using
- * .Lst_exc - r3_offset as the handler address.
- */
-.Lst_exc_r7:
-	add	r3,r3,r7
-	b	.Lst_exc
-
-	/* adjust by 24 */
-	addi	r3,r3,8
-	nop
-	/* adjust by 16 */
-	addi	r3,r3,8
-	nop
-	/* adjust by 8 */
-	addi	r3,r3,4
-	/* adjust by 4 */
-	addi	r3,r3,4
-.Lst_exc:
-	ld	r6,-24(r1)	/* original destination pointer */
-	ld	r4,-16(r1)	/* original source pointer */
-	ld	r5,-8(r1)	/* original number of bytes */
-	add	r7,r6,r5
-	/*
-	 * If the destination pointer isn't 8-byte aligned,
-	 * we may have got the exception as a result of a
-	 * store that overlapped a page boundary, so we may be
-	 * able to copy a few more bytes.
-	 */
-17:	andi.	r0,r3,7
-	beq	19f
-	subf	r8,r6,r3	/* #bytes copied */
-100:	EX_TABLE(100b,19f)
-	lbzx	r0,r8,r4
-100:	EX_TABLE(100b,19f)
-	stb	r0,0(r3)
-	addi	r3,r3,1
-	cmpld	r3,r7
-	blt	17b
-19:	subf	r3,r3,r7	/* #bytes not copied in r3 */
-	blr
-
-/*
- * Routine to copy a whole page of data, optimized for POWER4.
- * On POWER4 it is more than 50% faster than the simple loop
- * above (following the .Ldst_aligned label).
- */
-	.macro	exc
-100:	EX_TABLE(100b, .Labort)
-	.endm
-.Lcopy_page_4K:
-	std	r31,-32(1)
-	std	r30,-40(1)
-	std	r29,-48(1)
-	std	r28,-56(1)
-	std	r27,-64(1)
-	std	r26,-72(1)
-	std	r25,-80(1)
-	std	r24,-88(1)
-	std	r23,-96(1)
-	std	r22,-104(1)
-	std	r21,-112(1)
-	std	r20,-120(1)
-	li	r5,4096/32 - 1
-	addi	r3,r3,-8
-	li	r0,5
-0:	addi	r5,r5,-24
-	mtctr	r0
-exc;	ld	r22,640(4)
-exc;	ld	r21,512(4)
-exc;	ld	r20,384(4)
-exc;	ld	r11,256(4)
-exc;	ld	r9,128(4)
-exc;	ld	r7,0(4)
-exc;	ld	r25,648(4)
-exc;	ld	r24,520(4)
-exc;	ld	r23,392(4)
-exc;	ld	r10,264(4)
-exc;	ld	r8,136(4)
-exc;	ldu	r6,8(4)
-	cmpwi	r5,24
-1:
-exc;	std	r22,648(3)
-exc;	std	r21,520(3)
-exc;	std	r20,392(3)
-exc;	std	r11,264(3)
-exc;	std	r9,136(3)
-exc;	std	r7,8(3)
-exc;	ld	r28,648(4)
-exc;	ld	r27,520(4)
-exc;	ld	r26,392(4)
-exc;	ld	r31,264(4)
-exc;	ld	r30,136(4)
-exc;	ld	r29,8(4)
-exc;	std	r25,656(3)
-exc;	std	r24,528(3)
-exc;	std	r23,400(3)
-exc;	std	r10,272(3)
-exc;	std	r8,144(3)
-exc;	std	r6,16(3)
-exc;	ld	r22,656(4)
-exc;	ld	r21,528(4)
-exc;	ld	r20,400(4)
-exc;	ld	r11,272(4)
-exc;	ld	r9,144(4)
-exc;	ld	r7,16(4)
-exc;	std	r28,664(3)
-exc;	std	r27,536(3)
-exc;	std	r26,408(3)
-exc;	std	r31,280(3)
-exc;	std	r30,152(3)
-exc;	stdu	r29,24(3)
-exc;	ld	r25,664(4)
-exc;	ld	r24,536(4)
-exc;	ld	r23,408(4)
-exc;	ld	r10,280(4)
-exc;	ld	r8,152(4)
-exc;	ldu	r6,24(4)
-	bdnz	1b
-exc;	std	r22,648(3)
-exc;	std	r21,520(3)
-exc;	std	r20,392(3)
-exc;	std	r11,264(3)
-exc;	std	r9,136(3)
-exc;	std	r7,8(3)
-	addi	r4,r4,640
-	addi	r3,r3,648
-	bge	0b
-	mtctr	r5
-exc;	ld	r7,0(4)
-exc;	ld	r8,8(4)
-exc;	ldu	r9,16(4)
-3:
-exc;	ld	r10,8(4)
-exc;	std	r7,8(3)
-exc;	ld	r7,16(4)
-exc;	std	r8,16(3)
-exc;	ld	r8,24(4)
-exc;	std	r9,24(3)
-exc;	ldu	r9,32(4)
-exc;	stdu	r10,32(3)
-	bdnz	3b
-4:
-exc;	ld	r10,8(4)
-exc;	std	r7,8(3)
-exc;	std	r8,16(3)
-exc;	std	r9,24(3)
-exc;	std	r10,32(3)
-9:	ld	r20,-120(1)
-	ld	r21,-112(1)
-	ld	r22,-104(1)
-	ld	r23,-96(1)
-	ld	r24,-88(1)
-	ld	r25,-80(1)
-	ld	r26,-72(1)
-	ld	r27,-64(1)
-	ld	r28,-56(1)
-	ld	r29,-48(1)
-	ld	r30,-40(1)
-	ld	r31,-32(1)
-	li	r3,0
-	blr
-
-/*
- * on an exception, reset to the beginning and jump back into the
- * standard __copy_tofrom_user
- */
-.Labort:
-	ld	r20,-120(1)
-	ld	r21,-112(1)
-	ld	r22,-104(1)
-	ld	r23,-96(1)
-	ld	r24,-88(1)
-	ld	r25,-80(1)
-	ld	r26,-72(1)
-	ld	r27,-64(1)
-	ld	r28,-56(1)
-	ld	r29,-48(1)
-	ld	r30,-40(1)
-	ld	r31,-32(1)
-	ld	r3,-24(r1)
-	ld	r4,-16(r1)
-	li	r5,4096
-	b	.Ldst_aligned
-EXPORT_SYMBOL(__copy_tofrom_user)
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
deleted file mode 100644
index 28f0be523c06627fa1b0f447268b8a894c4e4bbd..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/copyuser_power7.S
+++ /dev/null
@@ -1,695 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *
- * Copyright (C) IBM Corporation, 2011
- *
- * Author: Anton Blanchard <anton@au.ibm.com>
- */
-#include <asm/ppc_asm.h>
-
-#ifndef SELFTEST_CASE
-/* 0 == don't use VMX, 1 == use VMX */
-#define SELFTEST_CASE	0
-#endif
-
-#ifdef __BIG_ENDIAN__
-#define LVS(VRT,RA,RB)		lvsl	VRT,RA,RB
-#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRA,VRB,VRC
-#else
-#define LVS(VRT,RA,RB)		lvsr	VRT,RA,RB
-#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRB,VRA,VRC
-#endif
-
-	.macro err1
-100:
-	EX_TABLE(100b,.Ldo_err1)
-	.endm
-
-	.macro err2
-200:
-	EX_TABLE(200b,.Ldo_err2)
-	.endm
-
-#ifdef CONFIG_ALTIVEC
-	.macro err3
-300:
-	EX_TABLE(300b,.Ldo_err3)
-	.endm
-
-	.macro err4
-400:
-	EX_TABLE(400b,.Ldo_err4)
-	.endm
-
-
-.Ldo_err4:
-	ld	r16,STK_REG(R16)(r1)
-	ld	r15,STK_REG(R15)(r1)
-	ld	r14,STK_REG(R14)(r1)
-.Ldo_err3:
-	bl	exit_vmx_usercopy
-	ld	r0,STACKFRAMESIZE+16(r1)
-	mtlr	r0
-	b	.Lexit
-#endif /* CONFIG_ALTIVEC */
-
-.Ldo_err2:
-	ld	r22,STK_REG(R22)(r1)
-	ld	r21,STK_REG(R21)(r1)
-	ld	r20,STK_REG(R20)(r1)
-	ld	r19,STK_REG(R19)(r1)
-	ld	r18,STK_REG(R18)(r1)
-	ld	r17,STK_REG(R17)(r1)
-	ld	r16,STK_REG(R16)(r1)
-	ld	r15,STK_REG(R15)(r1)
-	ld	r14,STK_REG(R14)(r1)
-.Lexit:
-	addi	r1,r1,STACKFRAMESIZE
-.Ldo_err1:
-	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
-	ld	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
-	ld	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
-	b	__copy_tofrom_user_base
-
-
-_GLOBAL(__copy_tofrom_user_power7)
-	cmpldi	r5,16
-	cmpldi	cr1,r5,3328
-
-	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
-	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
-	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
-
-	blt	.Lshort_copy
-
-#ifdef CONFIG_ALTIVEC
-test_feature = SELFTEST_CASE
-BEGIN_FTR_SECTION
-	bgt	cr1,.Lvmx_copy
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif
-
-.Lnonvmx_copy:
-	/* Get the source 8B aligned */
-	neg	r6,r4
-	mtocrf	0x01,r6
-	clrldi	r6,r6,(64-3)
-
-	bf	cr7*4+3,1f
-err1;	lbz	r0,0(r4)
-	addi	r4,r4,1
-err1;	stb	r0,0(r3)
-	addi	r3,r3,1
-
-1:	bf	cr7*4+2,2f
-err1;	lhz	r0,0(r4)
-	addi	r4,r4,2
-err1;	sth	r0,0(r3)
-	addi	r3,r3,2
-
-2:	bf	cr7*4+1,3f
-err1;	lwz	r0,0(r4)
-	addi	r4,r4,4
-err1;	stw	r0,0(r3)
-	addi	r3,r3,4
-
-3:	sub	r5,r5,r6
-	cmpldi	r5,128
-	blt	5f
-
-	mflr	r0
-	stdu	r1,-STACKFRAMESIZE(r1)
-	std	r14,STK_REG(R14)(r1)
-	std	r15,STK_REG(R15)(r1)
-	std	r16,STK_REG(R16)(r1)
-	std	r17,STK_REG(R17)(r1)
-	std	r18,STK_REG(R18)(r1)
-	std	r19,STK_REG(R19)(r1)
-	std	r20,STK_REG(R20)(r1)
-	std	r21,STK_REG(R21)(r1)
-	std	r22,STK_REG(R22)(r1)
-	std	r0,STACKFRAMESIZE+16(r1)
-
-	srdi	r6,r5,7
-	mtctr	r6
-
-	/* Now do cacheline (128B) sized loads and stores. */
-	.align	5
-4:
-err2;	ld	r0,0(r4)
-err2;	ld	r6,8(r4)
-err2;	ld	r7,16(r4)
-err2;	ld	r8,24(r4)
-err2;	ld	r9,32(r4)
-err2;	ld	r10,40(r4)
-err2;	ld	r11,48(r4)
-err2;	ld	r12,56(r4)
-err2;	ld	r14,64(r4)
-err2;	ld	r15,72(r4)
-err2;	ld	r16,80(r4)
-err2;	ld	r17,88(r4)
-err2;	ld	r18,96(r4)
-err2;	ld	r19,104(r4)
-err2;	ld	r20,112(r4)
-err2;	ld	r21,120(r4)
-	addi	r4,r4,128
-err2;	std	r0,0(r3)
-err2;	std	r6,8(r3)
-err2;	std	r7,16(r3)
-err2;	std	r8,24(r3)
-err2;	std	r9,32(r3)
-err2;	std	r10,40(r3)
-err2;	std	r11,48(r3)
-err2;	std	r12,56(r3)
-err2;	std	r14,64(r3)
-err2;	std	r15,72(r3)
-err2;	std	r16,80(r3)
-err2;	std	r17,88(r3)
-err2;	std	r18,96(r3)
-err2;	std	r19,104(r3)
-err2;	std	r20,112(r3)
-err2;	std	r21,120(r3)
-	addi	r3,r3,128
-	bdnz	4b
-
-	clrldi	r5,r5,(64-7)
-
-	ld	r14,STK_REG(R14)(r1)
-	ld	r15,STK_REG(R15)(r1)
-	ld	r16,STK_REG(R16)(r1)
-	ld	r17,STK_REG(R17)(r1)
-	ld	r18,STK_REG(R18)(r1)
-	ld	r19,STK_REG(R19)(r1)
-	ld	r20,STK_REG(R20)(r1)
-	ld	r21,STK_REG(R21)(r1)
-	ld	r22,STK_REG(R22)(r1)
-	addi	r1,r1,STACKFRAMESIZE
-
-	/* Up to 127B to go */
-5:	srdi	r6,r5,4
-	mtocrf	0x01,r6
-
-6:	bf	cr7*4+1,7f
-err1;	ld	r0,0(r4)
-err1;	ld	r6,8(r4)
-err1;	ld	r7,16(r4)
-err1;	ld	r8,24(r4)
-err1;	ld	r9,32(r4)
-err1;	ld	r10,40(r4)
-err1;	ld	r11,48(r4)
-err1;	ld	r12,56(r4)
-	addi	r4,r4,64
-err1;	std	r0,0(r3)
-err1;	std	r6,8(r3)
-err1;	std	r7,16(r3)
-err1;	std	r8,24(r3)
-err1;	std	r9,32(r3)
-err1;	std	r10,40(r3)
-err1;	std	r11,48(r3)
-err1;	std	r12,56(r3)
-	addi	r3,r3,64
-
-	/* Up to 63B to go */
-7:	bf	cr7*4+2,8f
-err1;	ld	r0,0(r4)
-err1;	ld	r6,8(r4)
-err1;	ld	r7,16(r4)
-err1;	ld	r8,24(r4)
-	addi	r4,r4,32
-err1;	std	r0,0(r3)
-err1;	std	r6,8(r3)
-err1;	std	r7,16(r3)
-err1;	std	r8,24(r3)
-	addi	r3,r3,32
-
-	/* Up to 31B to go */
-8:	bf	cr7*4+3,9f
-err1;	ld	r0,0(r4)
-err1;	ld	r6,8(r4)
-	addi	r4,r4,16
-err1;	std	r0,0(r3)
-err1;	std	r6,8(r3)
-	addi	r3,r3,16
-
-9:	clrldi	r5,r5,(64-4)
-
-	/* Up to 15B to go */
-.Lshort_copy:
-	mtocrf	0x01,r5
-	bf	cr7*4+0,12f
-err1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
-err1;	lwz	r6,4(r4)
-	addi	r4,r4,8
-err1;	stw	r0,0(r3)
-err1;	stw	r6,4(r3)
-	addi	r3,r3,8
-
-12:	bf	cr7*4+1,13f
-err1;	lwz	r0,0(r4)
-	addi	r4,r4,4
-err1;	stw	r0,0(r3)
-	addi	r3,r3,4
-
-13:	bf	cr7*4+2,14f
-err1;	lhz	r0,0(r4)
-	addi	r4,r4,2
-err1;	sth	r0,0(r3)
-	addi	r3,r3,2
-
-14:	bf	cr7*4+3,15f
-err1;	lbz	r0,0(r4)
-err1;	stb	r0,0(r3)
-
-15:	li	r3,0
-	blr
-
-.Lunwind_stack_nonvmx_copy:
-	addi	r1,r1,STACKFRAMESIZE
-	b	.Lnonvmx_copy
-
-.Lvmx_copy:
-#ifdef CONFIG_ALTIVEC
-	mflr	r0
-	std	r0,16(r1)
-	stdu	r1,-STACKFRAMESIZE(r1)
-	bl	enter_vmx_usercopy
-	cmpwi	cr1,r3,0
-	ld	r0,STACKFRAMESIZE+16(r1)
-	ld	r3,STK_REG(R31)(r1)
-	ld	r4,STK_REG(R30)(r1)
-	ld	r5,STK_REG(R29)(r1)
-	mtlr	r0
-
-	/*
-	 * We prefetch both the source and destination using enhanced touch
-	 * instructions. We use a stream ID of 0 for the load side and
-	 * 1 for the store side.
-	 */
-	clrrdi	r6,r4,7
-	clrrdi	r9,r3,7
-	ori	r9,r9,1		/* stream=1 */
-
-	srdi	r7,r5,7		/* length in cachelines, capped at 0x3FF */
-	cmpldi	r7,0x3FF
-	ble	1f
-	li	r7,0x3FF
-1:	lis	r0,0x0E00	/* depth=7 */
-	sldi	r7,r7,7
-	or	r7,r7,r0
-	ori	r10,r7,1	/* stream=1 */
-
-	lis	r8,0x8000	/* GO=1 */
-	clrldi	r8,r8,32
-
-	/* setup read stream 0 */
-	dcbt	0,r6,0b01000   /* addr from */
-	dcbt	0,r7,0b01010   /* length and depth from */
-	/* setup write stream 1 */
-	dcbtst	0,r9,0b01000   /* addr to */
-	dcbtst	0,r10,0b01010  /* length and depth to */
-	eieio
-	dcbt	0,r8,0b01010	/* all streams GO */
-
-	beq	cr1,.Lunwind_stack_nonvmx_copy
-
-	/*
-	 * If source and destination are not relatively aligned we use a
-	 * slower permute loop.
-	 */
-	xor	r6,r4,r3
-	rldicl.	r6,r6,0,(64-4)
-	bne	.Lvmx_unaligned_copy
-
-	/* Get the destination 16B aligned */
-	neg	r6,r3
-	mtocrf	0x01,r6
-	clrldi	r6,r6,(64-4)
-
-	bf	cr7*4+3,1f
-err3;	lbz	r0,0(r4)
-	addi	r4,r4,1
-err3;	stb	r0,0(r3)
-	addi	r3,r3,1
-
-1:	bf	cr7*4+2,2f
-err3;	lhz	r0,0(r4)
-	addi	r4,r4,2
-err3;	sth	r0,0(r3)
-	addi	r3,r3,2
-
-2:	bf	cr7*4+1,3f
-err3;	lwz	r0,0(r4)
-	addi	r4,r4,4
-err3;	stw	r0,0(r3)
-	addi	r3,r3,4
-
-3:	bf	cr7*4+0,4f
-err3;	ld	r0,0(r4)
-	addi	r4,r4,8
-err3;	std	r0,0(r3)
-	addi	r3,r3,8
-
-4:	sub	r5,r5,r6
-
-	/* Get the desination 128B aligned */
-	neg	r6,r3
-	srdi	r7,r6,4
-	mtocrf	0x01,r7
-	clrldi	r6,r6,(64-7)
-
-	li	r9,16
-	li	r10,32
-	li	r11,48
-
-	bf	cr7*4+3,5f
-err3;	lvx	v1,0,r4
-	addi	r4,r4,16
-err3;	stvx	v1,0,r3
-	addi	r3,r3,16
-
-5:	bf	cr7*4+2,6f
-err3;	lvx	v1,0,r4
-err3;	lvx	v0,r4,r9
-	addi	r4,r4,32
-err3;	stvx	v1,0,r3
-err3;	stvx	v0,r3,r9
-	addi	r3,r3,32
-
-6:	bf	cr7*4+1,7f
-err3;	lvx	v3,0,r4
-err3;	lvx	v2,r4,r9
-err3;	lvx	v1,r4,r10
-err3;	lvx	v0,r4,r11
-	addi	r4,r4,64
-err3;	stvx	v3,0,r3
-err3;	stvx	v2,r3,r9
-err3;	stvx	v1,r3,r10
-err3;	stvx	v0,r3,r11
-	addi	r3,r3,64
-
-7:	sub	r5,r5,r6
-	srdi	r6,r5,7
-
-	std	r14,STK_REG(R14)(r1)
-	std	r15,STK_REG(R15)(r1)
-	std	r16,STK_REG(R16)(r1)
-
-	li	r12,64
-	li	r14,80
-	li	r15,96
-	li	r16,112
-
-	mtctr	r6
-
-	/*
-	 * Now do cacheline sized loads and stores. By this stage the
-	 * cacheline stores are also cacheline aligned.
-	 */
-	.align	5
-8:
-err4;	lvx	v7,0,r4
-err4;	lvx	v6,r4,r9
-err4;	lvx	v5,r4,r10
-err4;	lvx	v4,r4,r11
-err4;	lvx	v3,r4,r12
-err4;	lvx	v2,r4,r14
-err4;	lvx	v1,r4,r15
-err4;	lvx	v0,r4,r16
-	addi	r4,r4,128
-err4;	stvx	v7,0,r3
-err4;	stvx	v6,r3,r9
-err4;	stvx	v5,r3,r10
-err4;	stvx	v4,r3,r11
-err4;	stvx	v3,r3,r12
-err4;	stvx	v2,r3,r14
-err4;	stvx	v1,r3,r15
-err4;	stvx	v0,r3,r16
-	addi	r3,r3,128
-	bdnz	8b
-
-	ld	r14,STK_REG(R14)(r1)
-	ld	r15,STK_REG(R15)(r1)
-	ld	r16,STK_REG(R16)(r1)
-
-	/* Up to 127B to go */
-	clrldi	r5,r5,(64-7)
-	srdi	r6,r5,4
-	mtocrf	0x01,r6
-
-	bf	cr7*4+1,9f
-err3;	lvx	v3,0,r4
-err3;	lvx	v2,r4,r9
-err3;	lvx	v1,r4,r10
-err3;	lvx	v0,r4,r11
-	addi	r4,r4,64
-err3;	stvx	v3,0,r3
-err3;	stvx	v2,r3,r9
-err3;	stvx	v1,r3,r10
-err3;	stvx	v0,r3,r11
-	addi	r3,r3,64
-
-9:	bf	cr7*4+2,10f
-err3;	lvx	v1,0,r4
-err3;	lvx	v0,r4,r9
-	addi	r4,r4,32
-err3;	stvx	v1,0,r3
-err3;	stvx	v0,r3,r9
-	addi	r3,r3,32
-
-10:	bf	cr7*4+3,11f
-err3;	lvx	v1,0,r4
-	addi	r4,r4,16
-err3;	stvx	v1,0,r3
-	addi	r3,r3,16
-
-	/* Up to 15B to go */
-11:	clrldi	r5,r5,(64-4)
-	mtocrf	0x01,r5
-	bf	cr7*4+0,12f
-err3;	ld	r0,0(r4)
-	addi	r4,r4,8
-err3;	std	r0,0(r3)
-	addi	r3,r3,8
-
-12:	bf	cr7*4+1,13f
-err3;	lwz	r0,0(r4)
-	addi	r4,r4,4
-err3;	stw	r0,0(r3)
-	addi	r3,r3,4
-
-13:	bf	cr7*4+2,14f
-err3;	lhz	r0,0(r4)
-	addi	r4,r4,2
-err3;	sth	r0,0(r3)
-	addi	r3,r3,2
-
-14:	bf	cr7*4+3,15f
-err3;	lbz	r0,0(r4)
-err3;	stb	r0,0(r3)
-
-15:	addi	r1,r1,STACKFRAMESIZE
-	b	exit_vmx_usercopy	/* tail call optimise */
-
-.Lvmx_unaligned_copy:
-	/* Get the destination 16B aligned */
-	neg	r6,r3
-	mtocrf	0x01,r6
-	clrldi	r6,r6,(64-4)
-
-	bf	cr7*4+3,1f
-err3;	lbz	r0,0(r4)
-	addi	r4,r4,1
-err3;	stb	r0,0(r3)
-	addi	r3,r3,1
-
-1:	bf	cr7*4+2,2f
-err3;	lhz	r0,0(r4)
-	addi	r4,r4,2
-err3;	sth	r0,0(r3)
-	addi	r3,r3,2
-
-2:	bf	cr7*4+1,3f
-err3;	lwz	r0,0(r4)
-	addi	r4,r4,4
-err3;	stw	r0,0(r3)
-	addi	r3,r3,4
-
-3:	bf	cr7*4+0,4f
-err3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
-err3;	lwz	r7,4(r4)
-	addi	r4,r4,8
-err3;	stw	r0,0(r3)
-err3;	stw	r7,4(r3)
-	addi	r3,r3,8
-
-4:	sub	r5,r5,r6
-
-	/* Get the desination 128B aligned */
-	neg	r6,r3
-	srdi	r7,r6,4
-	mtocrf	0x01,r7
-	clrldi	r6,r6,(64-7)
-
-	li	r9,16
-	li	r10,32
-	li	r11,48
-
-	LVS(v16,0,r4)		/* Setup permute control vector */
-err3;	lvx	v0,0,r4
-	addi	r4,r4,16
-
-	bf	cr7*4+3,5f
-err3;	lvx	v1,0,r4
-	VPERM(v8,v0,v1,v16)
-	addi	r4,r4,16
-err3;	stvx	v8,0,r3
-	addi	r3,r3,16
-	vor	v0,v1,v1
-
-5:	bf	cr7*4+2,6f
-err3;	lvx	v1,0,r4
-	VPERM(v8,v0,v1,v16)
-err3;	lvx	v0,r4,r9
-	VPERM(v9,v1,v0,v16)
-	addi	r4,r4,32
-err3;	stvx	v8,0,r3
-err3;	stvx	v9,r3,r9
-	addi	r3,r3,32
-
-6:	bf	cr7*4+1,7f
-err3;	lvx	v3,0,r4
-	VPERM(v8,v0,v3,v16)
-err3;	lvx	v2,r4,r9
-	VPERM(v9,v3,v2,v16)
-err3;	lvx	v1,r4,r10
-	VPERM(v10,v2,v1,v16)
-err3;	lvx	v0,r4,r11
-	VPERM(v11,v1,v0,v16)
-	addi	r4,r4,64
-err3;	stvx	v8,0,r3
-err3;	stvx	v9,r3,r9
-err3;	stvx	v10,r3,r10
-err3;	stvx	v11,r3,r11
-	addi	r3,r3,64
-
-7:	sub	r5,r5,r6
-	srdi	r6,r5,7
-
-	std	r14,STK_REG(R14)(r1)
-	std	r15,STK_REG(R15)(r1)
-	std	r16,STK_REG(R16)(r1)
-
-	li	r12,64
-	li	r14,80
-	li	r15,96
-	li	r16,112
-
-	mtctr	r6
-
-	/*
-	 * Now do cacheline sized loads and stores. By this stage the
-	 * cacheline stores are also cacheline aligned.
-	 */
-	.align	5
-8:
-err4;	lvx	v7,0,r4
-	VPERM(v8,v0,v7,v16)
-err4;	lvx	v6,r4,r9
-	VPERM(v9,v7,v6,v16)
-err4;	lvx	v5,r4,r10
-	VPERM(v10,v6,v5,v16)
-err4;	lvx	v4,r4,r11
-	VPERM(v11,v5,v4,v16)
-err4;	lvx	v3,r4,r12
-	VPERM(v12,v4,v3,v16)
-err4;	lvx	v2,r4,r14
-	VPERM(v13,v3,v2,v16)
-err4;	lvx	v1,r4,r15
-	VPERM(v14,v2,v1,v16)
-err4;	lvx	v0,r4,r16
-	VPERM(v15,v1,v0,v16)
-	addi	r4,r4,128
-err4;	stvx	v8,0,r3
-err4;	stvx	v9,r3,r9
-err4;	stvx	v10,r3,r10
-err4;	stvx	v11,r3,r11
-err4;	stvx	v12,r3,r12
-err4;	stvx	v13,r3,r14
-err4;	stvx	v14,r3,r15
-err4;	stvx	v15,r3,r16
-	addi	r3,r3,128
-	bdnz	8b
-
-	ld	r14,STK_REG(R14)(r1)
-	ld	r15,STK_REG(R15)(r1)
-	ld	r16,STK_REG(R16)(r1)
-
-	/* Up to 127B to go */
-	clrldi	r5,r5,(64-7)
-	srdi	r6,r5,4
-	mtocrf	0x01,r6
-
-	bf	cr7*4+1,9f
-err3;	lvx	v3,0,r4
-	VPERM(v8,v0,v3,v16)
-err3;	lvx	v2,r4,r9
-	VPERM(v9,v3,v2,v16)
-err3;	lvx	v1,r4,r10
-	VPERM(v10,v2,v1,v16)
-err3;	lvx	v0,r4,r11
-	VPERM(v11,v1,v0,v16)
-	addi	r4,r4,64
-err3;	stvx	v8,0,r3
-err3;	stvx	v9,r3,r9
-err3;	stvx	v10,r3,r10
-err3;	stvx	v11,r3,r11
-	addi	r3,r3,64
-
-9:	bf	cr7*4+2,10f
-err3;	lvx	v1,0,r4
-	VPERM(v8,v0,v1,v16)
-err3;	lvx	v0,r4,r9
-	VPERM(v9,v1,v0,v16)
-	addi	r4,r4,32
-err3;	stvx	v8,0,r3
-err3;	stvx	v9,r3,r9
-	addi	r3,r3,32
-
-10:	bf	cr7*4+3,11f
-err3;	lvx	v1,0,r4
-	VPERM(v8,v0,v1,v16)
-	addi	r4,r4,16
-err3;	stvx	v8,0,r3
-	addi	r3,r3,16
-
-	/* Up to 15B to go */
-11:	clrldi	r5,r5,(64-4)
-	addi	r4,r4,-16	/* Unwind the +16 load offset */
-	mtocrf	0x01,r5
-	bf	cr7*4+0,12f
-err3;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
-err3;	lwz	r6,4(r4)
-	addi	r4,r4,8
-err3;	stw	r0,0(r3)
-err3;	stw	r6,4(r3)
-	addi	r3,r3,8
-
-12:	bf	cr7*4+1,13f
-err3;	lwz	r0,0(r4)
-	addi	r4,r4,4
-err3;	stw	r0,0(r3)
-	addi	r3,r3,4
-
-13:	bf	cr7*4+2,14f
-err3;	lhz	r0,0(r4)
-	addi	r4,r4,2
-err3;	sth	r0,0(r3)
-	addi	r3,r3,2
-
-14:	bf	cr7*4+3,15f
-err3;	lbz	r0,0(r4)
-err3;	stb	r0,0(r3)
-
-15:	addi	r1,r1,STACKFRAMESIZE
-	b	exit_vmx_usercopy	/* tail call optimise */
-#endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S
deleted file mode 100644
index 7e5e1c28e56ac58b4cd3ce718c8eeea92c18735e..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/crtsavres.S
+++ /dev/null
@@ -1,545 +0,0 @@
-/*
- * Special support for eabi and SVR4
- *
- *   Copyright (C) 1995, 1996, 1998, 2000, 2001 Free Software Foundation, Inc.
- *   Copyright 2008 Freescale Semiconductor, Inc.
- *   Written By Michael Meissner
- *
- * Based on gcc/config/rs6000/crtsavres.asm from gcc
- * 64 bit additions from reading the PPC elf64abi document.
- *
- * This file is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * In addition to the permissions in the GNU General Public License, the
- * Free Software Foundation gives you unlimited permission to link the
- * compiled version of this file with other programs, and to distribute
- * those programs without any restriction coming from the use of this
- * file.  (The General Public License restrictions do apply in other
- * respects; for example, they cover modification of the file, and
- * distribution when not linked into another program.)
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; see the file COPYING.  If not, write to
- * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- *    As a special exception, if you link this library with files
- *    compiled with GCC to produce an executable, this does not cause
- *    the resulting executable to be covered by the GNU General Public License.
- *    This exception does not however invalidate any other reasons why
- *    the executable file might be covered by the GNU General Public License.
- */
-
-#include <asm/ppc_asm.h>
-
-	.file	"crtsavres.S"
-
-#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-
-	.section ".text"
-
-#ifndef CONFIG_PPC64
-
-/* Routines for saving integer registers, called by the compiler.  */
-/* Called with r11 pointing to the stack header word of the caller of the */
-/* function, just beyond the end of the integer save area.  */
-
-_GLOBAL(_savegpr_14)
-_GLOBAL(_save32gpr_14)
-	stw	14,-72(11)	/* save gp registers */
-_GLOBAL(_savegpr_15)
-_GLOBAL(_save32gpr_15)
-	stw	15,-68(11)
-_GLOBAL(_savegpr_16)
-_GLOBAL(_save32gpr_16)
-	stw	16,-64(11)
-_GLOBAL(_savegpr_17)
-_GLOBAL(_save32gpr_17)
-	stw	17,-60(11)
-_GLOBAL(_savegpr_18)
-_GLOBAL(_save32gpr_18)
-	stw	18,-56(11)
-_GLOBAL(_savegpr_19)
-_GLOBAL(_save32gpr_19)
-	stw	19,-52(11)
-_GLOBAL(_savegpr_20)
-_GLOBAL(_save32gpr_20)
-	stw	20,-48(11)
-_GLOBAL(_savegpr_21)
-_GLOBAL(_save32gpr_21)
-	stw	21,-44(11)
-_GLOBAL(_savegpr_22)
-_GLOBAL(_save32gpr_22)
-	stw	22,-40(11)
-_GLOBAL(_savegpr_23)
-_GLOBAL(_save32gpr_23)
-	stw	23,-36(11)
-_GLOBAL(_savegpr_24)
-_GLOBAL(_save32gpr_24)
-	stw	24,-32(11)
-_GLOBAL(_savegpr_25)
-_GLOBAL(_save32gpr_25)
-	stw	25,-28(11)
-_GLOBAL(_savegpr_26)
-_GLOBAL(_save32gpr_26)
-	stw	26,-24(11)
-_GLOBAL(_savegpr_27)
-_GLOBAL(_save32gpr_27)
-	stw	27,-20(11)
-_GLOBAL(_savegpr_28)
-_GLOBAL(_save32gpr_28)
-	stw	28,-16(11)
-_GLOBAL(_savegpr_29)
-_GLOBAL(_save32gpr_29)
-	stw	29,-12(11)
-_GLOBAL(_savegpr_30)
-_GLOBAL(_save32gpr_30)
-	stw	30,-8(11)
-_GLOBAL(_savegpr_31)
-_GLOBAL(_save32gpr_31)
-	stw	31,-4(11)
-	blr
-
-/* Routines for restoring integer registers, called by the compiler.  */
-/* Called with r11 pointing to the stack header word of the caller of the */
-/* function, just beyond the end of the integer restore area.  */
-
-_GLOBAL(_restgpr_14)
-_GLOBAL(_rest32gpr_14)
-	lwz	14,-72(11)	/* restore gp registers */
-_GLOBAL(_restgpr_15)
-_GLOBAL(_rest32gpr_15)
-	lwz	15,-68(11)
-_GLOBAL(_restgpr_16)
-_GLOBAL(_rest32gpr_16)
-	lwz	16,-64(11)
-_GLOBAL(_restgpr_17)
-_GLOBAL(_rest32gpr_17)
-	lwz	17,-60(11)
-_GLOBAL(_restgpr_18)
-_GLOBAL(_rest32gpr_18)
-	lwz	18,-56(11)
-_GLOBAL(_restgpr_19)
-_GLOBAL(_rest32gpr_19)
-	lwz	19,-52(11)
-_GLOBAL(_restgpr_20)
-_GLOBAL(_rest32gpr_20)
-	lwz	20,-48(11)
-_GLOBAL(_restgpr_21)
-_GLOBAL(_rest32gpr_21)
-	lwz	21,-44(11)
-_GLOBAL(_restgpr_22)
-_GLOBAL(_rest32gpr_22)
-	lwz	22,-40(11)
-_GLOBAL(_restgpr_23)
-_GLOBAL(_rest32gpr_23)
-	lwz	23,-36(11)
-_GLOBAL(_restgpr_24)
-_GLOBAL(_rest32gpr_24)
-	lwz	24,-32(11)
-_GLOBAL(_restgpr_25)
-_GLOBAL(_rest32gpr_25)
-	lwz	25,-28(11)
-_GLOBAL(_restgpr_26)
-_GLOBAL(_rest32gpr_26)
-	lwz	26,-24(11)
-_GLOBAL(_restgpr_27)
-_GLOBAL(_rest32gpr_27)
-	lwz	27,-20(11)
-_GLOBAL(_restgpr_28)
-_GLOBAL(_rest32gpr_28)
-	lwz	28,-16(11)
-_GLOBAL(_restgpr_29)
-_GLOBAL(_rest32gpr_29)
-	lwz	29,-12(11)
-_GLOBAL(_restgpr_30)
-_GLOBAL(_rest32gpr_30)
-	lwz	30,-8(11)
-_GLOBAL(_restgpr_31)
-_GLOBAL(_rest32gpr_31)
-	lwz	31,-4(11)
-	blr
-
-/* Routines for restoring integer registers, called by the compiler.  */
-/* Called with r11 pointing to the stack header word of the caller of the */
-/* function, just beyond the end of the integer restore area.  */
-
-_GLOBAL(_restgpr_14_x)
-_GLOBAL(_rest32gpr_14_x)
-	lwz	14,-72(11)	/* restore gp registers */
-_GLOBAL(_restgpr_15_x)
-_GLOBAL(_rest32gpr_15_x)
-	lwz	15,-68(11)
-_GLOBAL(_restgpr_16_x)
-_GLOBAL(_rest32gpr_16_x)
-	lwz	16,-64(11)
-_GLOBAL(_restgpr_17_x)
-_GLOBAL(_rest32gpr_17_x)
-	lwz	17,-60(11)
-_GLOBAL(_restgpr_18_x)
-_GLOBAL(_rest32gpr_18_x)
-	lwz	18,-56(11)
-_GLOBAL(_restgpr_19_x)
-_GLOBAL(_rest32gpr_19_x)
-	lwz	19,-52(11)
-_GLOBAL(_restgpr_20_x)
-_GLOBAL(_rest32gpr_20_x)
-	lwz	20,-48(11)
-_GLOBAL(_restgpr_21_x)
-_GLOBAL(_rest32gpr_21_x)
-	lwz	21,-44(11)
-_GLOBAL(_restgpr_22_x)
-_GLOBAL(_rest32gpr_22_x)
-	lwz	22,-40(11)
-_GLOBAL(_restgpr_23_x)
-_GLOBAL(_rest32gpr_23_x)
-	lwz	23,-36(11)
-_GLOBAL(_restgpr_24_x)
-_GLOBAL(_rest32gpr_24_x)
-	lwz	24,-32(11)
-_GLOBAL(_restgpr_25_x)
-_GLOBAL(_rest32gpr_25_x)
-	lwz	25,-28(11)
-_GLOBAL(_restgpr_26_x)
-_GLOBAL(_rest32gpr_26_x)
-	lwz	26,-24(11)
-_GLOBAL(_restgpr_27_x)
-_GLOBAL(_rest32gpr_27_x)
-	lwz	27,-20(11)
-_GLOBAL(_restgpr_28_x)
-_GLOBAL(_rest32gpr_28_x)
-	lwz	28,-16(11)
-_GLOBAL(_restgpr_29_x)
-_GLOBAL(_rest32gpr_29_x)
-	lwz	29,-12(11)
-_GLOBAL(_restgpr_30_x)
-_GLOBAL(_rest32gpr_30_x)
-	lwz	30,-8(11)
-_GLOBAL(_restgpr_31_x)
-_GLOBAL(_rest32gpr_31_x)
-	lwz	0,4(11)
-	lwz	31,-4(11)
-	mtlr	0
-	mr	1,11
-	blr
-
-#ifdef CONFIG_ALTIVEC
-/* Called with r0 pointing just beyond the end of the vector save area.  */
-
-_GLOBAL(_savevr_20)
-	li	r11,-192
-	stvx	v20,r11,r0
-_GLOBAL(_savevr_21)
-	li	r11,-176
-	stvx	v21,r11,r0
-_GLOBAL(_savevr_22)
-	li	r11,-160
-	stvx	v22,r11,r0
-_GLOBAL(_savevr_23)
-	li	r11,-144
-	stvx	v23,r11,r0
-_GLOBAL(_savevr_24)
-	li	r11,-128
-	stvx	v24,r11,r0
-_GLOBAL(_savevr_25)
-	li	r11,-112
-	stvx	v25,r11,r0
-_GLOBAL(_savevr_26)
-	li	r11,-96
-	stvx	v26,r11,r0
-_GLOBAL(_savevr_27)
-	li	r11,-80
-	stvx	v27,r11,r0
-_GLOBAL(_savevr_28)
-	li	r11,-64
-	stvx	v28,r11,r0
-_GLOBAL(_savevr_29)
-	li	r11,-48
-	stvx	v29,r11,r0
-_GLOBAL(_savevr_30)
-	li	r11,-32
-	stvx	v30,r11,r0
-_GLOBAL(_savevr_31)
-	li	r11,-16
-	stvx	v31,r11,r0
-	blr
-
-_GLOBAL(_restvr_20)
-	li	r11,-192
-	lvx	v20,r11,r0
-_GLOBAL(_restvr_21)
-	li	r11,-176
-	lvx	v21,r11,r0
-_GLOBAL(_restvr_22)
-	li	r11,-160
-	lvx	v22,r11,r0
-_GLOBAL(_restvr_23)
-	li	r11,-144
-	lvx	v23,r11,r0
-_GLOBAL(_restvr_24)
-	li	r11,-128
-	lvx	v24,r11,r0
-_GLOBAL(_restvr_25)
-	li	r11,-112
-	lvx	v25,r11,r0
-_GLOBAL(_restvr_26)
-	li	r11,-96
-	lvx	v26,r11,r0
-_GLOBAL(_restvr_27)
-	li	r11,-80
-	lvx	v27,r11,r0
-_GLOBAL(_restvr_28)
-	li	r11,-64
-	lvx	v28,r11,r0
-_GLOBAL(_restvr_29)
-	li	r11,-48
-	lvx	v29,r11,r0
-_GLOBAL(_restvr_30)
-	li	r11,-32
-	lvx	v30,r11,r0
-_GLOBAL(_restvr_31)
-	li	r11,-16
-	lvx	v31,r11,r0
-	blr
-
-#endif /* CONFIG_ALTIVEC */
-
-#else /* CONFIG_PPC64 */
-
-.globl	_savegpr0_14
-_savegpr0_14:
-	std	r14,-144(r1)
-.globl	_savegpr0_15
-_savegpr0_15:
-	std	r15,-136(r1)
-.globl	_savegpr0_16
-_savegpr0_16:
-	std	r16,-128(r1)
-.globl	_savegpr0_17
-_savegpr0_17:
-	std	r17,-120(r1)
-.globl	_savegpr0_18
-_savegpr0_18:
-	std	r18,-112(r1)
-.globl	_savegpr0_19
-_savegpr0_19:
-	std	r19,-104(r1)
-.globl	_savegpr0_20
-_savegpr0_20:
-	std	r20,-96(r1)
-.globl	_savegpr0_21
-_savegpr0_21:
-	std	r21,-88(r1)
-.globl	_savegpr0_22
-_savegpr0_22:
-	std	r22,-80(r1)
-.globl	_savegpr0_23
-_savegpr0_23:
-	std	r23,-72(r1)
-.globl	_savegpr0_24
-_savegpr0_24:
-	std	r24,-64(r1)
-.globl	_savegpr0_25
-_savegpr0_25:
-	std	r25,-56(r1)
-.globl	_savegpr0_26
-_savegpr0_26:
-	std	r26,-48(r1)
-.globl	_savegpr0_27
-_savegpr0_27:
-	std	r27,-40(r1)
-.globl	_savegpr0_28
-_savegpr0_28:
-	std	r28,-32(r1)
-.globl	_savegpr0_29
-_savegpr0_29:
-	std	r29,-24(r1)
-.globl	_savegpr0_30
-_savegpr0_30:
-	std	r30,-16(r1)
-.globl	_savegpr0_31
-_savegpr0_31:
-	std	r31,-8(r1)
-	std	r0,16(r1)
-	blr
-
-.globl	_restgpr0_14
-_restgpr0_14:
-	ld	r14,-144(r1)
-.globl	_restgpr0_15
-_restgpr0_15:
-	ld	r15,-136(r1)
-.globl	_restgpr0_16
-_restgpr0_16:
-	ld	r16,-128(r1)
-.globl	_restgpr0_17
-_restgpr0_17:
-	ld	r17,-120(r1)
-.globl	_restgpr0_18
-_restgpr0_18:
-	ld	r18,-112(r1)
-.globl	_restgpr0_19
-_restgpr0_19:
-	ld	r19,-104(r1)
-.globl	_restgpr0_20
-_restgpr0_20:
-	ld	r20,-96(r1)
-.globl	_restgpr0_21
-_restgpr0_21:
-	ld	r21,-88(r1)
-.globl	_restgpr0_22
-_restgpr0_22:
-	ld	r22,-80(r1)
-.globl	_restgpr0_23
-_restgpr0_23:
-	ld	r23,-72(r1)
-.globl	_restgpr0_24
-_restgpr0_24:
-	ld	r24,-64(r1)
-.globl	_restgpr0_25
-_restgpr0_25:
-	ld	r25,-56(r1)
-.globl	_restgpr0_26
-_restgpr0_26:
-	ld	r26,-48(r1)
-.globl	_restgpr0_27
-_restgpr0_27:
-	ld	r27,-40(r1)
-.globl	_restgpr0_28
-_restgpr0_28:
-	ld	r28,-32(r1)
-.globl	_restgpr0_29
-_restgpr0_29:
-	ld	r0,16(r1)
-	ld	r29,-24(r1)
-	mtlr	r0
-	ld	r30,-16(r1)
-	ld	r31,-8(r1)
-	blr
-
-.globl	_restgpr0_30
-_restgpr0_30:
-	ld	r30,-16(r1)
-.globl	_restgpr0_31
-_restgpr0_31:
-	ld	r0,16(r1)
-	ld	r31,-8(r1)
-	mtlr	r0
-	blr
-
-#ifdef CONFIG_ALTIVEC
-/* Called with r0 pointing just beyond the end of the vector save area.  */
-
-.globl	_savevr_20
-_savevr_20:
-	li	r12,-192
-	stvx	v20,r12,r0
-.globl	_savevr_21
-_savevr_21:
-	li	r12,-176
-	stvx	v21,r12,r0
-.globl	_savevr_22
-_savevr_22:
-	li	r12,-160
-	stvx	v22,r12,r0
-.globl	_savevr_23
-_savevr_23:
-	li	r12,-144
-	stvx	v23,r12,r0
-.globl	_savevr_24
-_savevr_24:
-	li	r12,-128
-	stvx	v24,r12,r0
-.globl	_savevr_25
-_savevr_25:
-	li	r12,-112
-	stvx	v25,r12,r0
-.globl	_savevr_26
-_savevr_26:
-	li	r12,-96
-	stvx	v26,r12,r0
-.globl	_savevr_27
-_savevr_27:
-	li	r12,-80
-	stvx	v27,r12,r0
-.globl	_savevr_28
-_savevr_28:
-	li	r12,-64
-	stvx	v28,r12,r0
-.globl	_savevr_29
-_savevr_29:
-	li	r12,-48
-	stvx	v29,r12,r0
-.globl	_savevr_30
-_savevr_30:
-	li	r12,-32
-	stvx	v30,r12,r0
-.globl	_savevr_31
-_savevr_31:
-	li	r12,-16
-	stvx	v31,r12,r0
-	blr
-
-.globl	_restvr_20
-_restvr_20:
-	li	r12,-192
-	lvx	v20,r12,r0
-.globl	_restvr_21
-_restvr_21:
-	li	r12,-176
-	lvx	v21,r12,r0
-.globl	_restvr_22
-_restvr_22:
-	li	r12,-160
-	lvx	v22,r12,r0
-.globl	_restvr_23
-_restvr_23:
-	li	r12,-144
-	lvx	v23,r12,r0
-.globl	_restvr_24
-_restvr_24:
-	li	r12,-128
-	lvx	v24,r12,r0
-.globl	_restvr_25
-_restvr_25:
-	li	r12,-112
-	lvx	v25,r12,r0
-.globl	_restvr_26
-_restvr_26:
-	li	r12,-96
-	lvx	v26,r12,r0
-.globl	_restvr_27
-_restvr_27:
-	li	r12,-80
-	lvx	v27,r12,r0
-.globl	_restvr_28
-_restvr_28:
-	li	r12,-64
-	lvx	v28,r12,r0
-.globl	_restvr_29
-_restvr_29:
-	li	r12,-48
-	lvx	v29,r12,r0
-.globl	_restvr_30
-_restvr_30:
-	li	r12,-32
-	lvx	v30,r12,r0
-.globl	_restvr_31
-_restvr_31:
-	li	r12,-16
-	lvx	v31,r12,r0
-	blr
-
-#endif /* CONFIG_ALTIVEC */
-
-#endif /* CONFIG_PPC64 */
-
-#endif
diff --git a/arch/powerpc/lib/div64.S b/arch/powerpc/lib/div64.S
deleted file mode 100644
index 3d5426e7dcc43908b2ebbe9793db4a1ad6e5536b..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/div64.S
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Divide a 64-bit unsigned number by a 32-bit unsigned number.
- * This routine assumes that the top 32 bits of the dividend are
- * non-zero to start with.
- * On entry, r3 points to the dividend, which get overwritten with
- * the 64-bit quotient, and r4 contains the divisor.
- * On exit, r3 contains the remainder.
- *
- * Copyright (C) 2002 Paul Mackerras, IBM Corp.
- */
-#include <asm/ppc_asm.h>
-#include <asm/processor.h>
-
-_GLOBAL(__div64_32)
-	lwz	r5,0(r3)	# get the dividend into r5/r6
-	lwz	r6,4(r3)
-	cmplw	r5,r4
-	li	r7,0
-	li	r8,0
-	blt	1f
-	divwu	r7,r5,r4	# if dividend.hi >= divisor,
-	mullw	r0,r7,r4	# quotient.hi = dividend.hi / divisor
-	subf.	r5,r0,r5	# dividend.hi %= divisor
-	beq	3f
-1:	mr	r11,r5		# here dividend.hi != 0
-	andis.	r0,r5,0xc000
-	bne	2f
-	cntlzw	r0,r5		# we are shifting the dividend right
-	li	r10,-1		# to make it < 2^32, and shifting
-	srw	r10,r10,r0	# the divisor right the same amount,
-	addc	r9,r4,r10	# rounding up (so the estimate cannot
-	andc	r11,r6,r10	# ever be too large, only too small)
-	andc	r9,r9,r10
-	addze	r9,r9
-	or	r11,r5,r11
-	rotlw	r9,r9,r0
-	rotlw	r11,r11,r0
-	divwu	r11,r11,r9	# then we divide the shifted quantities
-2:	mullw	r10,r11,r4	# to get an estimate of the quotient,
-	mulhwu	r9,r11,r4	# multiply the estimate by the divisor,
-	subfc	r6,r10,r6	# take the product from the divisor,
-	add	r8,r8,r11	# and add the estimate to the accumulated
-	subfe.	r5,r9,r5	# quotient
-	bne	1b
-3:	cmplw	r6,r4
-	blt	4f
-	divwu	r0,r6,r4	# perform the remaining 32-bit division
-	mullw	r10,r0,r4	# and get the remainder
-	add	r8,r8,r0
-	subf	r6,r10,r6
-4:	stw	r7,0(r3)	# return the quotient in *r3
-	stw	r8,4(r3)
-	mr	r3,r6		# return the remainder in r3
-	blr
diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S
deleted file mode 100644
index b12168c2447a576ec6a47195b8006d05676ae2ba..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/feature-fixups-test.S
+++ /dev/null
@@ -1,793 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright 2008 Michael Ellerman, IBM Corporation.
- */
-
-#include <asm/feature-fixups.h>
-#include <asm/ppc_asm.h>
-#include <asm/synch.h>
-#include <asm/asm-compat.h>
-
-	.text
-
-#define globl(x)		\
-	.globl x;	\
-x:
-
-globl(ftr_fixup_test1)
-	or	1,1,1
-	or	2,2,2	/* fixup will nop out this instruction */
-	or	3,3,3
-
-globl(end_ftr_fixup_test1)
-
-globl(ftr_fixup_test1_orig)
-	or	1,1,1
-	or	2,2,2
-	or	3,3,3
-
-globl(ftr_fixup_test1_expected)
-	or	1,1,1
-	nop
-	or	3,3,3
-
-globl(ftr_fixup_test2)
-	or	1,1,1
-	or	2,2,2	/* fixup will replace this with ftr_fixup_test2_alt */
-	or	3,3,3
-
-globl(end_ftr_fixup_test2)
-
-globl(ftr_fixup_test2_orig)
-	or	1,1,1
-	or	2,2,2
-	or	3,3,3
-
-globl(ftr_fixup_test2_alt)
-	or	31,31,31
-
-globl(ftr_fixup_test2_expected)
-	or	1,1,1
-	or	31,31,31
-	or	3,3,3
-
-globl(ftr_fixup_test3)
-	or	1,1,1
-	or	2,2,2	/* fixup will fail to replace this */
-	or	3,3,3
-
-globl(end_ftr_fixup_test3)
-
-globl(ftr_fixup_test3_orig)
-	or	1,1,1
-	or	2,2,2
-	or	3,3,3
-
-globl(ftr_fixup_test3_alt)
-	or	31,31,31
-	or	31,31,31
-
-globl(ftr_fixup_test4)
-	or	1,1,1
-	or	2,2,2
-	or	2,2,2
-	or	2,2,2
-	or	2,2,2
-	or	3,3,3
-
-globl(end_ftr_fixup_test4)
-
-globl(ftr_fixup_test4_expected)
-	or	1,1,1
-	or	31,31,31
-	or	31,31,31
-	nop
-	nop
-	or	3,3,3
-
-globl(ftr_fixup_test4_orig)
-	or	1,1,1
-	or	2,2,2
-	or	2,2,2
-	or	2,2,2
-	or	2,2,2
-	or	3,3,3
-
-globl(ftr_fixup_test4_alt)
-	or	31,31,31
-	or	31,31,31
-
-
-globl(ftr_fixup_test5)
-	or	1,1,1
-BEGIN_FTR_SECTION
-	or	2,2,2
-	or	2,2,2
-	or	2,2,2
-	or	2,2,2
-	or	2,2,2
-	or	2,2,2
-	or	2,2,2
-FTR_SECTION_ELSE
-2:	b	3f
-3:	or	5,5,5
-	beq	3b
-	b	1f
-	or	6,6,6
-	b	2b
-1:	bdnz	3b
-ALT_FTR_SECTION_END(0, 1)
-	or	1,1,1
-
-globl(end_ftr_fixup_test5)
-
-globl(ftr_fixup_test5_expected)
-	or	1,1,1
-2:	b	3f
-3:	or	5,5,5
-	beq	3b
-	b	1f
-	or	6,6,6
-	b	2b
-1:	bdnz	3b
-	or	1,1,1
-
-globl(ftr_fixup_test6)
-1:	or	1,1,1
-BEGIN_FTR_SECTION
-	or	5,5,5
-2:	PPC_LCMPI	r3,0
-	beq	4f
-	blt	2b
-	b	1b
-	b	4f
-FTR_SECTION_ELSE
-2:	or	2,2,2
-	PPC_LCMPI	r3,1
-	beq	3f
-	blt	2b
-	b	3f
-	b	1b
-ALT_FTR_SECTION_END(0, 1)
-3:	or	1,1,1
-	or	2,2,2
-4:	or	3,3,3
-
-globl(end_ftr_fixup_test6)
-
-globl(ftr_fixup_test6_expected)
-1:	or	1,1,1
-2:	or	2,2,2
-	PPC_LCMPI	r3,1
-	beq	3f
-	blt	2b
-	b	3f
-	b	1b
-3:	or	1,1,1
-	or	2,2,2
-	or	3,3,3
-
-globl(ftr_fixup_test7)
-	or	1,1,1
-BEGIN_FTR_SECTION
-	or	2,2,2
-	or	2,2,2
-	or	2,2,2
-	or	2,2,2
-	or	2,2,2
-	or	2,2,2
-	or	2,2,2
-FTR_SECTION_ELSE
-2:	b	3f
-3:	or	5,5,5
-	beq	3b
-	b	1f
-	or	6,6,6
-	b	2b
-	bdnz	3b
-1:
-ALT_FTR_SECTION_END(0, 1)
-	or	1,1,1
-	or	1,1,1
-
-globl(end_ftr_fixup_test7)
-	nop
-
-globl(ftr_fixup_test7_expected)
-	or	1,1,1
-2:	b	3f
-3:	or	5,5,5
-	beq	3b
-	b	1f
-	or	6,6,6
-	b	2b
-	bdnz	3b
-1:	or	1,1,1
-
-#if 0
-/* Test that if we have a larger else case the assembler spots it and
- * reports an error. #if 0'ed so as not to break the build normally.
- */
-ftr_fixup_test_too_big:
-	or	1,1,1
-BEGIN_FTR_SECTION
-	or	2,2,2
-	or	2,2,2
-	or	2,2,2
-FTR_SECTION_ELSE
-	or	3,3,3
-	or	3,3,3
-	or	3,3,3
-	or	3,3,3
-ALT_FTR_SECTION_END(0, 1)
-	or	1,1,1
-#endif
-
-#define	MAKE_MACRO_TEST(TYPE)						\
-globl(ftr_fixup_test_ ##TYPE##_macros)					\
-	or	1,1,1;							\
-	/* Basic test, this section should all be nop'ed */		\
-BEGIN_##TYPE##_SECTION							\
-	or	2,2,2;							\
-	or	2,2,2;							\
-	or	2,2,2;							\
-END_##TYPE##_SECTION(0, 1)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Basic test, this section should NOT be nop'ed */		\
-BEGIN_##TYPE##_SECTION							\
-	or	2,2,2;							\
-	or	2,2,2;							\
-	or	2,2,2;							\
-END_##TYPE##_SECTION(0, 0)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Nesting test, inner section should be nop'ed */		\
-BEGIN_##TYPE##_SECTION							\
-	or	2,2,2;							\
-	or	2,2,2;							\
-BEGIN_##TYPE##_SECTION_NESTED(80)					\
-	or	3,3,3;							\
-	or	3,3,3;							\
-END_##TYPE##_SECTION_NESTED(0, 1, 80)					\
-	or	2,2,2;							\
-	or	2,2,2;							\
-END_##TYPE##_SECTION(0, 0)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Nesting test, whole section should be nop'ed */		\
-BEGIN_##TYPE##_SECTION							\
-	or	2,2,2;							\
-	or	2,2,2;							\
-BEGIN_##TYPE##_SECTION_NESTED(80)					\
-	or	3,3,3;							\
-	or	3,3,3;							\
-END_##TYPE##_SECTION_NESTED(0, 0, 80)					\
-	or	2,2,2;							\
-	or	2,2,2;							\
-END_##TYPE##_SECTION(0, 1)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Nesting test, none should be nop'ed */			\
-BEGIN_##TYPE##_SECTION							\
-	or	2,2,2;							\
-	or	2,2,2;							\
-BEGIN_##TYPE##_SECTION_NESTED(80)					\
-	or	3,3,3;							\
-	or	3,3,3;							\
-END_##TYPE##_SECTION_NESTED(0, 0, 80)					\
-	or	2,2,2;							\
-	or	2,2,2;							\
-END_##TYPE##_SECTION(0, 0)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Basic alt section test, default case should be taken */	\
-BEGIN_##TYPE##_SECTION							\
-	or	3,3,3;							\
-	or	3,3,3;							\
-	or	3,3,3;							\
-##TYPE##_SECTION_ELSE							\
-	or	5,5,5;							\
-	or	5,5,5;							\
-ALT_##TYPE##_SECTION_END(0, 0)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Basic alt section test, else case should be taken */		\
-BEGIN_##TYPE##_SECTION							\
-	or	3,3,3;							\
-	or	3,3,3;							\
-	or	3,3,3;							\
-##TYPE##_SECTION_ELSE							\
-	or	31,31,31;						\
-	or	31,31,31;						\
-	or	31,31,31;						\
-ALT_##TYPE##_SECTION_END(0, 1)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Alt with smaller else case, should be padded with nops */	\
-BEGIN_##TYPE##_SECTION							\
-	or	3,3,3;							\
-	or	3,3,3;							\
-	or	3,3,3;							\
-##TYPE##_SECTION_ELSE							\
-	or	31,31,31;						\
-ALT_##TYPE##_SECTION_END(0, 1)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Alt section with nested section in default case */		\
-	/* Default case should be taken, with nop'ed inner section */	\
-BEGIN_##TYPE##_SECTION							\
-	or	3,3,3;							\
-BEGIN_##TYPE##_SECTION_NESTED(95)					\
-	or	3,3,3;							\
-	or	3,3,3;							\
-END_##TYPE##_SECTION_NESTED(0, 1, 95)					\
-	or	3,3,3;							\
-##TYPE##_SECTION_ELSE							\
-	or	2,2,2;							\
-	or	2,2,2;							\
-ALT_##TYPE##_SECTION_END(0, 0)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Alt section with nested section in else, default taken */	\
-BEGIN_##TYPE##_SECTION							\
-	or	3,3,3;							\
-	or	3,3,3;							\
-	or	3,3,3;							\
-##TYPE##_SECTION_ELSE							\
-	or	5,5,5;							\
-BEGIN_##TYPE##_SECTION_NESTED(95)					\
-	or	3,3,3;							\
-END_##TYPE##_SECTION_NESTED(0, 1, 95)					\
-	or	5,5,5;							\
-ALT_##TYPE##_SECTION_END(0, 0)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Alt section with nested section in else, else taken & nop */	\
-BEGIN_##TYPE##_SECTION							\
-	or	3,3,3;							\
-	or	3,3,3;							\
-	or	3,3,3;							\
-##TYPE##_SECTION_ELSE							\
-	or	5,5,5;							\
-BEGIN_##TYPE##_SECTION_NESTED(95)					\
-	or	3,3,3;							\
-END_##TYPE##_SECTION_NESTED(0, 1, 95)					\
-	or	5,5,5;							\
-ALT_##TYPE##_SECTION_END(0, 1)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Feature section with nested alt section, default taken */	\
-BEGIN_##TYPE##_SECTION							\
-	or	2,2,2;							\
-BEGIN_##TYPE##_SECTION_NESTED(95)					\
-	or	1,1,1;							\
-##TYPE##_SECTION_ELSE_NESTED(95)					\
-	or	5,5,5;							\
-ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95)				\
-	or	2,2,2;							\
-END_##TYPE##_SECTION(0, 0)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Feature section with nested alt section, else taken */	\
-BEGIN_##TYPE##_SECTION							\
-	or	2,2,2;							\
-BEGIN_##TYPE##_SECTION_NESTED(95)					\
-	or	1,1,1;							\
-##TYPE##_SECTION_ELSE_NESTED(95)					\
-	or	5,5,5;							\
-ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95)				\
-	or	2,2,2;							\
-END_##TYPE##_SECTION(0, 0)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Feature section with nested alt section, all nop'ed */	\
-BEGIN_##TYPE##_SECTION							\
-	or	2,2,2;							\
-BEGIN_##TYPE##_SECTION_NESTED(95)					\
-	or	1,1,1;							\
-##TYPE##_SECTION_ELSE_NESTED(95)					\
-	or	5,5,5;							\
-ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95)				\
-	or	2,2,2;							\
-END_##TYPE##_SECTION(0, 1)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Nested alt sections, default with inner default taken */	\
-BEGIN_##TYPE##_SECTION							\
-	or	2,2,2;							\
-BEGIN_##TYPE##_SECTION_NESTED(95)					\
-	or	1,1,1;							\
-##TYPE##_SECTION_ELSE_NESTED(95)					\
-	or	5,5,5;							\
-ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95)				\
-	or	2,2,2;							\
-##TYPE##_SECTION_ELSE							\
-	or	31,31,31;						\
-BEGIN_##TYPE##_SECTION_NESTED(94)					\
-	or	5,5,5;							\
-##TYPE##_SECTION_ELSE_NESTED(94)					\
-	or	1,1,1;							\
-ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94)				\
-	or	31,31,31;						\
-ALT_##TYPE##_SECTION_END(0, 0)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Nested alt sections, default with inner else taken */	\
-BEGIN_##TYPE##_SECTION							\
-	or	2,2,2;							\
-BEGIN_##TYPE##_SECTION_NESTED(95)					\
-	or	1,1,1;							\
-##TYPE##_SECTION_ELSE_NESTED(95)					\
-	or	5,5,5;							\
-ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95)				\
-	or	2,2,2;							\
-##TYPE##_SECTION_ELSE							\
-	or	31,31,31;						\
-BEGIN_##TYPE##_SECTION_NESTED(94)					\
-	or	5,5,5;							\
-##TYPE##_SECTION_ELSE_NESTED(94)					\
-	or	1,1,1;							\
-ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94)				\
-	or	31,31,31;						\
-ALT_##TYPE##_SECTION_END(0, 0)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Nested alt sections, else with inner default taken */	\
-BEGIN_##TYPE##_SECTION							\
-	or	2,2,2;							\
-BEGIN_##TYPE##_SECTION_NESTED(95)					\
-	or	1,1,1;							\
-##TYPE##_SECTION_ELSE_NESTED(95)					\
-	or	5,5,5;							\
-ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95)				\
-	or	2,2,2;							\
-##TYPE##_SECTION_ELSE							\
-	or	31,31,31;						\
-BEGIN_##TYPE##_SECTION_NESTED(94)					\
-	or	5,5,5;							\
-##TYPE##_SECTION_ELSE_NESTED(94)					\
-	or	1,1,1;							\
-ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94)				\
-	or	31,31,31;						\
-ALT_##TYPE##_SECTION_END(0, 1)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Nested alt sections, else with inner else taken */		\
-BEGIN_##TYPE##_SECTION							\
-	or	2,2,2;							\
-BEGIN_##TYPE##_SECTION_NESTED(95)					\
-	or	1,1,1;							\
-##TYPE##_SECTION_ELSE_NESTED(95)					\
-	or	5,5,5;							\
-ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95)				\
-	or	2,2,2;							\
-##TYPE##_SECTION_ELSE							\
-	or	31,31,31;						\
-BEGIN_##TYPE##_SECTION_NESTED(94)					\
-	or	5,5,5;							\
-##TYPE##_SECTION_ELSE_NESTED(94)					\
-	or	1,1,1;							\
-ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94)				\
-	or	31,31,31;						\
-ALT_##TYPE##_SECTION_END(0, 1)						\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Nested alt sections, else can have large else case */	\
-BEGIN_##TYPE##_SECTION							\
-	or	2,2,2;							\
-	or	2,2,2;							\
-	or	2,2,2;							\
-	or	2,2,2;							\
-##TYPE##_SECTION_ELSE 							\
-BEGIN_##TYPE##_SECTION_NESTED(94) 					\
-	or	5,5,5;							\
-	or	5,5,5;							\
-	or	5,5,5;							\
-	or	5,5,5;							\
-##TYPE##_SECTION_ELSE_NESTED(94) 					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	or	1,1,1;							\
-ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94)				\
-ALT_##TYPE##_SECTION_END(0, 1)						\
-	or	1,1,1;							\
-	or	1,1,1;
-
-#define	MAKE_MACRO_TEST_EXPECTED(TYPE)					\
-globl(ftr_fixup_test_ ##TYPE##_macros_expected)				\
-	or	1,1,1;							\
-	/* Basic test, this section should all be nop'ed */		\
-/* BEGIN_##TYPE##_SECTION */						\
-	nop;								\
-	nop;								\
-	nop;								\
-/* END_##TYPE##_SECTION(0, 1) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Basic test, this section should NOT be nop'ed */		\
-/* BEGIN_##TYPE##_SECTION */						\
-	or	2,2,2;							\
-	or	2,2,2;							\
-	or	2,2,2;							\
-/* END_##TYPE##_SECTION(0, 0) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Nesting test, inner section should be nop'ed */		\
-/* BEGIN_##TYPE##_SECTION */						\
-	or	2,2,2;							\
-	or	2,2,2;							\
-/* BEGIN_##TYPE##_SECTION_NESTED(80) */					\
-	nop;								\
-	nop;								\
-/* END_##TYPE##_SECTION_NESTED(0, 1, 80) */				\
-	or	2,2,2;							\
-	or	2,2,2;							\
-/* END_##TYPE##_SECTION(0, 0) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Nesting test, whole section should be nop'ed */		\
-	/* NB. inner section is not nop'ed, but then entire outer is */	\
-/* BEGIN_##TYPE##_SECTION */						\
-	nop;								\
-	nop;								\
-/* BEGIN_##TYPE##_SECTION_NESTED(80) */					\
-	nop;								\
-	nop;								\
-/* END_##TYPE##_SECTION_NESTED(0, 0, 80) */				\
-	nop;								\
-	nop;								\
-/* END_##TYPE##_SECTION(0, 1) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Nesting test, none should be nop'ed */			\
-/* BEGIN_##TYPE##_SECTION */						\
-	or	2,2,2;							\
-	or	2,2,2;							\
-/* BEGIN_##TYPE##_SECTION_NESTED(80) */					\
-	or	3,3,3;							\
-	or	3,3,3;							\
-/* END_##TYPE##_SECTION_NESTED(0, 0, 80) */				\
-	or	2,2,2;							\
-	or	2,2,2;							\
-/* END_##TYPE##_SECTION(0, 0) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Basic alt section test, default case should be taken */	\
-/* BEGIN_##TYPE##_SECTION */						\
-	or	3,3,3;							\
-	or	3,3,3;							\
-	or	3,3,3;							\
-/* ##TYPE##_SECTION_ELSE */						\
-	/* or	5,5,5; */						\
-	/* or	5,5,5; */						\
-/* ALT_##TYPE##_SECTION_END(0, 0) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Basic alt section test, else case should be taken */		\
-/* BEGIN_##TYPE##_SECTION */						\
-	/* or	3,3,3; */						\
-	/* or	3,3,3; */						\
-	/* or	3,3,3; */						\
-/* ##TYPE##_SECTION_ELSE */						\
-	or	31,31,31;						\
-	or	31,31,31;						\
-	or	31,31,31;						\
-/* ALT_##TYPE##_SECTION_END(0, 1) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Alt with smaller else case, should be padded with nops */	\
-/* BEGIN_##TYPE##_SECTION */						\
-	/* or	3,3,3; */						\
-	/* or	3,3,3; */						\
-	/* or	3,3,3; */						\
-/* ##TYPE##_SECTION_ELSE */						\
-	or	31,31,31;						\
-	nop;								\
-	nop;								\
-/* ALT_##TYPE##_SECTION_END(0, 1) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Alt section with nested section in default case */		\
-	/* Default case should be taken, with nop'ed inner section */	\
-/* BEGIN_##TYPE##_SECTION */						\
-	or	3,3,3;							\
-/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
-	nop;								\
-	nop;								\
-/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */				\
-	or	3,3,3;							\
-/* ##TYPE##_SECTION_ELSE */						\
-	/* or	2,2,2; */						\
-	/* or	2,2,2; */						\
-/* ALT_##TYPE##_SECTION_END(0, 0) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Alt section with nested section in else, default taken */	\
-/* BEGIN_##TYPE##_SECTION */						\
-	or	3,3,3;							\
-	or	3,3,3;							\
-	or	3,3,3;							\
-/* ##TYPE##_SECTION_ELSE */						\
-	/* or	5,5,5; */						\
-/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
-	/* or	3,3,3; */						\
-/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */				\
-	/* or	5,5,5; */						\
-/* ALT_##TYPE##_SECTION_END(0, 0) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Alt section with nested section in else, else taken & nop */	\
-/* BEGIN_##TYPE##_SECTION */						\
-	/* or	3,3,3; */						\
-	/* or	3,3,3; */						\
-	/* or	3,3,3; */						\
-/* ##TYPE##_SECTION_ELSE */						\
-	or	5,5,5;							\
-/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
-	nop;								\
-/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */				\
-	or	5,5,5;							\
-/* ALT_##TYPE##_SECTION_END(0, 1) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Feature section with nested alt section, default taken */	\
-/* BEGIN_##TYPE##_SECTION */						\
-	or	2,2,2;							\
-/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
-	or	1,1,1;							\
-/* ##TYPE##_SECTION_ELSE_NESTED(95) */					\
-	/* or	5,5,5; */						\
-/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */				\
-	or	2,2,2;							\
-/* END_##TYPE##_SECTION(0, 0) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Feature section with nested alt section, else taken */	\
-/* BEGIN_##TYPE##_SECTION */						\
-	or	2,2,2;							\
-/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
-	/* or	1,1,1; */						\
-/* ##TYPE##_SECTION_ELSE_NESTED(95) */					\
-	or	5,5,5;							\
-/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */				\
-	or	2,2,2;							\
-/* END_##TYPE##_SECTION(0, 0) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Feature section with nested alt section, all nop'ed */	\
-/* BEGIN_##TYPE##_SECTION */						\
-	nop;								\
-/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
-	nop;								\
-/* ##TYPE##_SECTION_ELSE_NESTED(95) */					\
-	/* or	5,5,5; */						\
-/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */				\
-	nop;								\
-/* END_##TYPE##_SECTION(0, 1) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Nested alt sections, default with inner default taken */	\
-/* BEGIN_##TYPE##_SECTION */						\
-	or	2,2,2;							\
-/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
-	or	1,1,1;							\
-/* ##TYPE##_SECTION_ELSE_NESTED(95) */					\
-	/* or	5,5,5; */						\
-/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */				\
-	or	2,2,2;							\
-/* ##TYPE##_SECTION_ELSE */						\
-	/* or	31,31,31; */						\
-/* BEGIN_##TYPE##_SECTION_NESTED(94) */					\
-	/* or	5,5,5; */						\
-/* ##TYPE##_SECTION_ELSE_NESTED(94) */					\
-	/* or	1,1,1; */						\
-/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */				\
-	/* or	31,31,31; */						\
-/* ALT_##TYPE##_SECTION_END(0, 0) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Nested alt sections, default with inner else taken */	\
-/* BEGIN_##TYPE##_SECTION */						\
-	or	2,2,2;							\
-/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
-	/* or	1,1,1; */						\
-/* ##TYPE##_SECTION_ELSE_NESTED(95) */					\
-	or	5,5,5;							\
-/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */				\
-	or	2,2,2;							\
-/* ##TYPE##_SECTION_ELSE */						\
-	/* or	31,31,31; */						\
-/* BEGIN_##TYPE##_SECTION_NESTED(94) */					\
-	/* or	5,5,5; */						\
-/* ##TYPE##_SECTION_ELSE_NESTED(94) */					\
-	/* or	1,1,1; */						\
-/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */				\
-	/* or	31,31,31; */						\
-/* ALT_##TYPE##_SECTION_END(0, 0) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Nested alt sections, else with inner default taken */	\
-/* BEGIN_##TYPE##_SECTION */						\
-	/* or	2,2,2; */						\
-/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
-	/* or	1,1,1; */						\
-/* ##TYPE##_SECTION_ELSE_NESTED(95) */					\
-	/* or	5,5,5; */						\
-/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */				\
-	/* or	2,2,2; */						\
-/* ##TYPE##_SECTION_ELSE */						\
-	or	31,31,31;						\
-/* BEGIN_##TYPE##_SECTION_NESTED(94) */					\
-	or	5,5,5;							\
-/* ##TYPE##_SECTION_ELSE_NESTED(94) */					\
-	/* or	1,1,1; */						\
-/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */				\
-	or	31,31,31;						\
-/* ALT_##TYPE##_SECTION_END(0, 1) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Nested alt sections, else with inner else taken */		\
-/* BEGIN_##TYPE##_SECTION */						\
-	/* or	2,2,2; */						\
-/* BEGIN_##TYPE##_SECTION_NESTED(95) */					\
-	/* or	1,1,1; */						\
-/* ##TYPE##_SECTION_ELSE_NESTED(95) */					\
-	/* or	5,5,5; */						\
-/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */				\
-	/* or	2,2,2; */						\
-/* ##TYPE##_SECTION_ELSE */						\
-	or	31,31,31;						\
-/* BEGIN_##TYPE##_SECTION_NESTED(94) */					\
-	/* or	5,5,5; */						\
-/* ##TYPE##_SECTION_ELSE_NESTED(94) */					\
-	or	1,1,1;							\
-/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) */				\
-	or	31,31,31;						\
-/* ALT_##TYPE##_SECTION_END(0, 1) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	/* Nested alt sections, else can have large else case */	\
-/* BEGIN_##TYPE##_SECTION */						\
-	/* or	2,2,2; */						\
-	/* or	2,2,2; */						\
-	/* or	2,2,2; */						\
-	/* or	2,2,2; */						\
-/* ##TYPE##_SECTION_ELSE */						\
-/* BEGIN_##TYPE##_SECTION_NESTED(94) */					\
-	/* or	5,5,5; */						\
-	/* or	5,5,5; */						\
-	/* or	5,5,5; */						\
-	/* or	5,5,5; */						\
-/* ##TYPE##_SECTION_ELSE_NESTED(94) */					\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	or	1,1,1;							\
-	or	1,1,1;							\
-/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) */				\
-/* ALT_##TYPE##_SECTION_END(0, 1) */					\
-	or	1,1,1;							\
-	or	1,1,1;
-
-MAKE_MACRO_TEST(FTR);
-MAKE_MACRO_TEST_EXPECTED(FTR);
-
-#ifdef CONFIG_PPC64
-MAKE_MACRO_TEST(FW_FTR);
-MAKE_MACRO_TEST_EXPECTED(FW_FTR);
-#endif
-
-globl(lwsync_fixup_test)
-1:	or	1,1,1
-	LWSYNC
-globl(end_lwsync_fixup_test)
-
-globl(lwsync_fixup_test_expected_LWSYNC)
-1:	or	1,1,1
-	lwsync
-
-globl(lwsync_fixup_test_expected_SYNC)
-1:	or	1,1,1
-	sync
-
diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S
deleted file mode 100644
index 6effad901ef7a1c886c2411b89f263acc05f1106..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/hweight_64.S
+++ /dev/null
@@ -1,104 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *
- * Copyright (C) IBM Corporation, 2010
- *
- * Author: Anton Blanchard <anton@au.ibm.com>
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/export.h>
-#include <asm/feature-fixups.h>
-
-/* Note: This code relies on -mminimal-toc */
-
-_GLOBAL(__arch_hweight8)
-BEGIN_FTR_SECTION
-	b __sw_hweight8
-	nop
-	nop
-FTR_SECTION_ELSE
-	PPC_POPCNTB(R3,R3)
-	clrldi	r3,r3,64-8
-	blr
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
-EXPORT_SYMBOL(__arch_hweight8)
-
-_GLOBAL(__arch_hweight16)
-BEGIN_FTR_SECTION
-	b __sw_hweight16
-	nop
-	nop
-	nop
-	nop
-FTR_SECTION_ELSE
-  BEGIN_FTR_SECTION_NESTED(50)
-	PPC_POPCNTB(R3,R3)
-	srdi	r4,r3,8
-	add	r3,r4,r3
-	clrldi	r3,r3,64-8
-	blr
-  FTR_SECTION_ELSE_NESTED(50)
-	clrlwi  r3,r3,16
-	PPC_POPCNTW(R3,R3)
-	clrldi	r3,r3,64-8
-	blr
-  ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 50)
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
-EXPORT_SYMBOL(__arch_hweight16)
-
-_GLOBAL(__arch_hweight32)
-BEGIN_FTR_SECTION
-	b __sw_hweight32
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-FTR_SECTION_ELSE
-  BEGIN_FTR_SECTION_NESTED(51)
-	PPC_POPCNTB(R3,R3)
-	srdi	r4,r3,16
-	add	r3,r4,r3
-	srdi	r4,r3,8
-	add	r3,r4,r3
-	clrldi	r3,r3,64-8
-	blr
-  FTR_SECTION_ELSE_NESTED(51)
-	PPC_POPCNTW(R3,R3)
-	clrldi	r3,r3,64-8
-	blr
-  ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 51)
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
-EXPORT_SYMBOL(__arch_hweight32)
-
-_GLOBAL(__arch_hweight64)
-BEGIN_FTR_SECTION
-	b __sw_hweight64
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-FTR_SECTION_ELSE
-  BEGIN_FTR_SECTION_NESTED(52)
-	PPC_POPCNTB(R3,R3)
-	srdi	r4,r3,32
-	add	r3,r4,r3
-	srdi	r4,r3,16
-	add	r3,r4,r3
-	srdi	r4,r3,8
-	add	r3,r4,r3
-	clrldi	r3,r3,64-8
-	blr
-  FTR_SECTION_ELSE_NESTED(52)
-	PPC_POPCNTD(R3,R3)
-	clrldi	r3,r3,64-8
-	blr
-  ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 52)
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
-EXPORT_SYMBOL(__arch_hweight64)
diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S
deleted file mode 100644
index e00abeabc54db8614318f5583610ef3f47c61c67..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/ldstfp.S
+++ /dev/null
@@ -1,237 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Floating-point, VMX/Altivec and VSX loads and stores
- * for use in instruction emulation.
- *
- * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
- */
-
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/ppc-opcode.h>
-#include <asm/reg.h>
-#include <asm/asm-offsets.h>
-#include <asm/asm-compat.h>
-#include <linux/errno.h>
-
-#define STKFRM	(PPC_MIN_STKFRM + 16)
-
-/* Get the contents of frN into *p; N is in r3 and p is in r4. */
-_GLOBAL(get_fpr)
-	mflr	r0
-	mfmsr	r6
-	ori	r7, r6, MSR_FP
-	MTMSRD(r7)
-	isync
-	rlwinm	r3,r3,3,0xf8
-	bcl	20,31,1f
-reg = 0
-	.rept	32
-	stfd	reg, 0(r4)
-	b	2f
-reg = reg + 1
-	.endr
-1:	mflr	r5
-	add	r5,r3,r5
-	mtctr	r5
-	mtlr	r0
-	bctr
-2:	MTMSRD(r6)
-	isync
-	blr
-
-/* Put the contents of *p into frN; N is in r3 and p is in r4. */
-_GLOBAL(put_fpr)
-	mflr	r0
-	mfmsr	r6
-	ori	r7, r6, MSR_FP
-	MTMSRD(r7)
-	isync
-	rlwinm	r3,r3,3,0xf8
-	bcl	20,31,1f
-reg = 0
-	.rept	32
-	lfd	reg, 0(r4)
-	b	2f
-reg = reg + 1
-	.endr
-1:	mflr	r5
-	add	r5,r3,r5
-	mtctr	r5
-	mtlr	r0
-	bctr
-2:	MTMSRD(r6)
-	isync
-	blr
-
-#ifdef CONFIG_ALTIVEC
-/* Get the contents of vrN into *p; N is in r3 and p is in r4. */
-_GLOBAL(get_vr)
-	mflr	r0
-	mfmsr	r6
-	oris	r7, r6, MSR_VEC@h
-	MTMSRD(r7)
-	isync
-	rlwinm	r3,r3,3,0xf8
-	bcl	20,31,1f
-reg = 0
-	.rept	32
-	stvx	reg, 0, r4
-	b	2f
-reg = reg + 1
-	.endr
-1:	mflr	r5
-	add	r5,r3,r5
-	mtctr	r5
-	mtlr	r0
-	bctr
-2:	MTMSRD(r6)
-	isync
-	blr
-
-/* Put the contents of *p into vrN; N is in r3 and p is in r4. */
-_GLOBAL(put_vr)
-	mflr	r0
-	mfmsr	r6
-	oris	r7, r6, MSR_VEC@h
-	MTMSRD(r7)
-	isync
-	rlwinm	r3,r3,3,0xf8
-	bcl	20,31,1f
-reg = 0
-	.rept	32
-	lvx	reg, 0, r4
-	b	2f
-reg = reg + 1
-	.endr
-1:	mflr	r5
-	add	r5,r3,r5
-	mtctr	r5
-	mtlr	r0
-	bctr
-2:	MTMSRD(r6)
-	isync
-	blr
-#endif /* CONFIG_ALTIVEC */
-
-#ifdef CONFIG_VSX
-/* Get the contents of vsN into vs0; N is in r3. */
-_GLOBAL(get_vsr)
-	mflr	r0
-	rlwinm	r3,r3,3,0x1f8
-	bcl	20,31,1f
-	blr			/* vs0 is already in vs0 */
-	nop
-reg = 1
-	.rept	63
-	XXLOR(0,reg,reg)
-	blr
-reg = reg + 1
-	.endr
-1:	mflr	r5
-	add	r5,r3,r5
-	mtctr	r5
-	mtlr	r0
-	bctr
-
-/* Put the contents of vs0 into vsN; N is in r3. */
-_GLOBAL(put_vsr)
-	mflr	r0
-	rlwinm	r3,r3,3,0x1f8
-	bcl	20,31,1f
-	blr			/* v0 is already in v0 */
-	nop
-reg = 1
-	.rept	63
-	XXLOR(reg,0,0)
-	blr
-reg = reg + 1
-	.endr
-1:	mflr	r5
-	add	r5,r3,r5
-	mtctr	r5
-	mtlr	r0
-	bctr
-
-/* Load VSX reg N from vector doubleword *p.  N is in r3, p in r4. */
-_GLOBAL(load_vsrn)
-	PPC_STLU r1,-STKFRM(r1)
-	mflr	r0
-	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
-	mfmsr	r6
-	oris	r7,r6,MSR_VSX@h
-	cmpwi	cr7,r3,0
-	li	r8,STKFRM-16
-	MTMSRD(r7)
-	isync
-	beq	cr7,1f
-	STXVD2X(0,R1,R8)
-1:	LXVD2X(0,R0,R4)
-#ifdef __LITTLE_ENDIAN__
-	XXSWAPD(0,0)
-#endif
-	beq	cr7,4f
-	bl	put_vsr
-	LXVD2X(0,R1,R8)
-4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
-	mtlr	r0
-	MTMSRD(r6)
-	isync
-	addi	r1,r1,STKFRM
-	blr
-
-/* Store VSX reg N to vector doubleword *p.  N is in r3, p in r4. */
-_GLOBAL(store_vsrn)
-	PPC_STLU r1,-STKFRM(r1)
-	mflr	r0
-	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
-	mfmsr	r6
-	oris	r7,r6,MSR_VSX@h
-	li	r8,STKFRM-16
-	MTMSRD(r7)
-	isync
-	STXVD2X(0,R1,R8)
-	bl	get_vsr
-#ifdef __LITTLE_ENDIAN__
-	XXSWAPD(0,0)
-#endif
-	STXVD2X(0,R0,R4)
-	LXVD2X(0,R1,R8)
-	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
-	mtlr	r0
-	MTMSRD(r6)
-	isync
-	mr	r3,r9
-	addi	r1,r1,STKFRM
-	blr
-#endif /* CONFIG_VSX */
-
-/* Convert single-precision to double, without disturbing FPRs. */
-/* conv_sp_to_dp(float *sp, double *dp) */
-_GLOBAL(conv_sp_to_dp)
-	mfmsr	r6
-	ori	r7, r6, MSR_FP
-	MTMSRD(r7)
-	isync
-	stfd	fr0, -16(r1)
-	lfs	fr0, 0(r3)
-	stfd	fr0, 0(r4)
-	lfd	fr0, -16(r1)
-	MTMSRD(r6)
-	isync
-	blr
-
-/* Convert single-precision to double, without disturbing FPRs. */
-/* conv_sp_to_dp(double *dp, float *sp) */
-_GLOBAL(conv_dp_to_sp)
-	mfmsr	r6
-	ori	r7, r6, MSR_FP
-	MTMSRD(r7)
-	isync
-	stfd	fr0, -16(r1)
-	lfd	fr0, 0(r3)
-	stfs	fr0, 0(r4)
-	lfd	fr0, -16(r1)
-	MTMSRD(r6)
-	isync
-	blr
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
deleted file mode 100644
index 9351ffab409cf15912abe0e052530a41e8d00f5a..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/mem_64.S
+++ /dev/null
@@ -1,142 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * String handling functions for PowerPC.
- *
- * Copyright (C) 1996 Paul Mackerras.
- */
-#include <asm/processor.h>
-#include <asm/errno.h>
-#include <asm/ppc_asm.h>
-#include <asm/export.h>
-#include <asm/kasan.h>
-
-#ifndef CONFIG_KASAN
-_GLOBAL(__memset16)
-	rlwimi	r4,r4,16,0,15
-	/* fall through */
-
-_GLOBAL(__memset32)
-	rldimi	r4,r4,32,0
-	/* fall through */
-
-_GLOBAL(__memset64)
-	neg	r0,r3
-	andi.	r0,r0,7
-	cmplw	cr1,r5,r0
-	b	.Lms
-EXPORT_SYMBOL(__memset16)
-EXPORT_SYMBOL(__memset32)
-EXPORT_SYMBOL(__memset64)
-#endif
-
-_GLOBAL_KASAN(memset)
-	neg	r0,r3
-	rlwimi	r4,r4,8,16,23
-	andi.	r0,r0,7			/* # bytes to be 8-byte aligned */
-	rlwimi	r4,r4,16,0,15
-	cmplw	cr1,r5,r0		/* do we get that far? */
-	rldimi	r4,r4,32,0
-.Lms:	PPC_MTOCRF(1,r0)
-	mr	r6,r3
-	blt	cr1,8f
-	beq	3f			/* if already 8-byte aligned */
-	subf	r5,r0,r5
-	bf	31,1f
-	stb	r4,0(r6)
-	addi	r6,r6,1
-1:	bf	30,2f
-	sth	r4,0(r6)
-	addi	r6,r6,2
-2:	bf	29,3f
-	stw	r4,0(r6)
-	addi	r6,r6,4
-3:	srdi.	r0,r5,6
-	clrldi	r5,r5,58
-	mtctr	r0
-	beq	5f
-	.balign 16
-4:	std	r4,0(r6)
-	std	r4,8(r6)
-	std	r4,16(r6)
-	std	r4,24(r6)
-	std	r4,32(r6)
-	std	r4,40(r6)
-	std	r4,48(r6)
-	std	r4,56(r6)
-	addi	r6,r6,64
-	bdnz	4b
-5:	srwi.	r0,r5,3
-	clrlwi	r5,r5,29
-	PPC_MTOCRF(1,r0)
-	beq	8f
-	bf	29,6f
-	std	r4,0(r6)
-	std	r4,8(r6)
-	std	r4,16(r6)
-	std	r4,24(r6)
-	addi	r6,r6,32
-6:	bf	30,7f
-	std	r4,0(r6)
-	std	r4,8(r6)
-	addi	r6,r6,16
-7:	bf	31,8f
-	std	r4,0(r6)
-	addi	r6,r6,8
-8:	cmpwi	r5,0
-	PPC_MTOCRF(1,r5)
-	beqlr
-	bf	29,9f
-	stw	r4,0(r6)
-	addi	r6,r6,4
-9:	bf	30,10f
-	sth	r4,0(r6)
-	addi	r6,r6,2
-10:	bflr	31
-	stb	r4,0(r6)
-	blr
-EXPORT_SYMBOL(memset)
-EXPORT_SYMBOL_KASAN(memset)
-
-_GLOBAL_TOC_KASAN(memmove)
-	cmplw	0,r3,r4
-	bgt	backwards_memcpy
-	b	memcpy
-
-_GLOBAL(backwards_memcpy)
-	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
-	add	r6,r3,r5
-	add	r4,r4,r5
-	beq	2f
-	andi.	r0,r6,3
-	mtctr	r7
-	bne	5f
-	.balign 16
-1:	lwz	r7,-4(r4)
-	lwzu	r8,-8(r4)
-	stw	r7,-4(r6)
-	stwu	r8,-8(r6)
-	bdnz	1b
-	andi.	r5,r5,7
-2:	cmplwi	0,r5,4
-	blt	3f
-	lwzu	r0,-4(r4)
-	subi	r5,r5,4
-	stwu	r0,-4(r6)
-3:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-4:	lbzu	r0,-1(r4)
-	stbu	r0,-1(r6)
-	bdnz	4b
-	blr
-5:	mtctr	r0
-6:	lbzu	r7,-1(r4)
-	stbu	r7,-1(r6)
-	bdnz	6b
-	subf	r5,r0,r5
-	rlwinm.	r7,r5,32-3,3,31
-	beq	2b
-	mtctr	r7
-	b	1b
-EXPORT_SYMBOL(memmove)
-EXPORT_SYMBOL_KASAN(memmove)
diff --git a/arch/powerpc/lib/memcmp_32.S b/arch/powerpc/lib/memcmp_32.S
deleted file mode 100644
index 5010e376f7b835e7b5c2d9be99629da653486af5..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/memcmp_32.S
+++ /dev/null
@@ -1,45 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- * memcmp for PowerPC32
- *
- * Copyright (C) 1996 Paul Mackerras.
- *
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/export.h>
-
-	.text
-
-_GLOBAL(memcmp)
-	srawi.	r7, r5, 2		/* Divide len by 4 */
-	mr	r6, r3
-	beq-	3f
-	mtctr	r7
-	li	r7, 0
-1:	lwzx	r3, r6, r7
-	lwzx	r0, r4, r7
-	addi	r7, r7, 4
-	cmplw	cr0, r3, r0
-	bdnzt	eq, 1b
-	bne	5f
-3:	andi.	r3, r5, 3
-	beqlr
-	cmplwi	cr1, r3, 2
-	blt-	cr1, 4f
-	lhzx	r3, r6, r7
-	lhzx	r0, r4, r7
-	addi	r7, r7, 2
-	subf.	r3, r0, r3
-	beqlr	cr1
-	bnelr
-4:	lbzx	r3, r6, r7
-	lbzx	r0, r4, r7
-	subf.	r3, r0, r3
-	blr
-5:	li	r3, 1
-	bgtlr
-	li	r3, -1
-	blr
-EXPORT_SYMBOL(memcmp)
diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S
deleted file mode 100644
index 384218df71baf43ad8eb7bfe6c56cf92bb5e3be8..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/memcmp_64.S
+++ /dev/null
@@ -1,638 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Author: Anton Blanchard <anton@au.ibm.com>
- * Copyright 2015 IBM Corporation.
- */
-#include <asm/ppc_asm.h>
-#include <asm/export.h>
-#include <asm/ppc-opcode.h>
-
-#define off8	r6
-#define off16	r7
-#define off24	r8
-
-#define rA	r9
-#define rB	r10
-#define rC	r11
-#define rD	r27
-#define rE	r28
-#define rF	r29
-#define rG	r30
-#define rH	r31
-
-#ifdef __LITTLE_ENDIAN__
-#define LH	lhbrx
-#define LW	lwbrx
-#define LD	ldbrx
-#define LVS	lvsr
-#define VPERM(_VRT,_VRA,_VRB,_VRC) \
-	vperm _VRT,_VRB,_VRA,_VRC
-#else
-#define LH	lhzx
-#define LW	lwzx
-#define LD	ldx
-#define LVS	lvsl
-#define VPERM(_VRT,_VRA,_VRB,_VRC) \
-	vperm _VRT,_VRA,_VRB,_VRC
-#endif
-
-#define VMX_THRESH 4096
-#define ENTER_VMX_OPS	\
-	mflr    r0;	\
-	std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \
-	std     r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \
-	std     r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
-	std     r0,16(r1); \
-	stdu    r1,-STACKFRAMESIZE(r1); \
-	bl      enter_vmx_ops; \
-	cmpwi   cr1,r3,0; \
-	ld      r0,STACKFRAMESIZE+16(r1); \
-	ld      r3,STK_REG(R31)(r1); \
-	ld      r4,STK_REG(R30)(r1); \
-	ld      r5,STK_REG(R29)(r1); \
-	addi	r1,r1,STACKFRAMESIZE; \
-	mtlr    r0
-
-#define EXIT_VMX_OPS \
-	mflr    r0; \
-	std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \
-	std     r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \
-	std     r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
-	std     r0,16(r1); \
-	stdu    r1,-STACKFRAMESIZE(r1); \
-	bl      exit_vmx_ops; \
-	ld      r0,STACKFRAMESIZE+16(r1); \
-	ld      r3,STK_REG(R31)(r1); \
-	ld      r4,STK_REG(R30)(r1); \
-	ld      r5,STK_REG(R29)(r1); \
-	addi	r1,r1,STACKFRAMESIZE; \
-	mtlr    r0
-
-/*
- * LD_VSR_CROSS16B load the 2nd 16 bytes for _vaddr which is unaligned with
- * 16 bytes boundary and permute the result with the 1st 16 bytes.
-
- *    |  y y y y y y y y y y y y y 0 1 2 | 3 4 5 6 7 8 9 a b c d e f z z z |
- *    ^                                  ^                                 ^
- * 0xbbbb10                          0xbbbb20                          0xbbb30
- *                                 ^
- *                                _vaddr
- *
- *
- * _vmask is the mask generated by LVS
- * _v1st_qw is the 1st aligned QW of current addr which is already loaded.
- *   for example: 0xyyyyyyyyyyyyy012 for big endian
- * _v2nd_qw is the 2nd aligned QW of cur _vaddr to be loaded.
- *   for example: 0x3456789abcdefzzz for big endian
- * The permute result is saved in _v_res.
- *   for example: 0x0123456789abcdef for big endian.
- */
-#define LD_VSR_CROSS16B(_vaddr,_vmask,_v1st_qw,_v2nd_qw,_v_res) \
-        lvx     _v2nd_qw,_vaddr,off16; \
-        VPERM(_v_res,_v1st_qw,_v2nd_qw,_vmask)
-
-/*
- * There are 2 categories for memcmp:
- * 1) src/dst has the same offset to the 8 bytes boundary. The handlers
- * are named like .Lsameoffset_xxxx
- * 2) src/dst has different offset to the 8 bytes boundary. The handlers
- * are named like .Ldiffoffset_xxxx
- */
-_GLOBAL_TOC(memcmp)
-	cmpdi	cr1,r5,0
-
-	/* Use the short loop if the src/dst addresses are not
-	 * with the same offset of 8 bytes align boundary.
-	 */
-	xor	r6,r3,r4
-	andi.	r6,r6,7
-
-	/* Fall back to short loop if compare at aligned addrs
-	 * with less than 8 bytes.
-	 */
-	cmpdi   cr6,r5,7
-
-	beq	cr1,.Lzero
-	bgt	cr6,.Lno_short
-
-.Lshort:
-	mtctr	r5
-1:	lbz	rA,0(r3)
-	lbz	rB,0(r4)
-	subf.	rC,rB,rA
-	bne	.Lnon_zero
-	bdz	.Lzero
-
-	lbz	rA,1(r3)
-	lbz	rB,1(r4)
-	subf.	rC,rB,rA
-	bne	.Lnon_zero
-	bdz	.Lzero
-
-	lbz	rA,2(r3)
-	lbz	rB,2(r4)
-	subf.	rC,rB,rA
-	bne	.Lnon_zero
-	bdz	.Lzero
-
-	lbz	rA,3(r3)
-	lbz	rB,3(r4)
-	subf.	rC,rB,rA
-	bne	.Lnon_zero
-
-	addi	r3,r3,4
-	addi	r4,r4,4
-
-	bdnz	1b
-
-.Lzero:
-	li	r3,0
-	blr
-
-.Lno_short:
-	dcbt	0,r3
-	dcbt	0,r4
-	bne	.Ldiffoffset_8bytes_make_align_start
-
-
-.Lsameoffset_8bytes_make_align_start:
-	/* attempt to compare bytes not aligned with 8 bytes so that
-	 * rest comparison can run based on 8 bytes alignment.
-	 */
-	andi.   r6,r3,7
-
-	/* Try to compare the first double word which is not 8 bytes aligned:
-	 * load the first double word at (src & ~7UL) and shift left appropriate
-	 * bits before comparision.
-	 */
-	rlwinm  r6,r3,3,26,28
-	beq     .Lsameoffset_8bytes_aligned
-	clrrdi	r3,r3,3
-	clrrdi	r4,r4,3
-	LD	rA,0,r3
-	LD	rB,0,r4
-	sld	rA,rA,r6
-	sld	rB,rB,r6
-	cmpld	cr0,rA,rB
-	srwi	r6,r6,3
-	bne	cr0,.LcmpAB_lightweight
-	subfic  r6,r6,8
-	subf.	r5,r6,r5
-	addi	r3,r3,8
-	addi	r4,r4,8
-	beq	.Lzero
-
-.Lsameoffset_8bytes_aligned:
-	/* now we are aligned with 8 bytes.
-	 * Use .Llong loop if left cmp bytes are equal or greater than 32B.
-	 */
-	cmpdi   cr6,r5,31
-	bgt	cr6,.Llong
-
-.Lcmp_lt32bytes:
-	/* compare 1 ~ 31 bytes, at least r3 addr is 8 bytes aligned now */
-	cmpdi   cr5,r5,7
-	srdi    r0,r5,3
-	ble	cr5,.Lcmp_rest_lt8bytes
-
-	/* handle 8 ~ 31 bytes */
-	clrldi  r5,r5,61
-	mtctr   r0
-2:
-	LD	rA,0,r3
-	LD	rB,0,r4
-	cmpld	cr0,rA,rB
-	addi	r3,r3,8
-	addi	r4,r4,8
-	bne	cr0,.LcmpAB_lightweight
-	bdnz	2b
-
-	cmpwi   r5,0
-	beq	.Lzero
-
-.Lcmp_rest_lt8bytes:
-	/*
-	 * Here we have less than 8 bytes to compare. At least s1 is aligned to
-	 * 8 bytes, but s2 may not be. We must make sure s2 + 7 doesn't cross a
-	 * page boundary, otherwise we might read past the end of the buffer and
-	 * trigger a page fault. We use 4K as the conservative minimum page
-	 * size. If we detect that case we go to the byte-by-byte loop.
-	 *
-	 * Otherwise the next double word is loaded from s1 and s2, and shifted
-	 * right to compare the appropriate bits.
-	 */
-	clrldi	r6,r4,(64-12)	// r6 = r4 & 0xfff
-	cmpdi	r6,0xff8
-	bgt	.Lshort
-
-	subfic  r6,r5,8
-	slwi	r6,r6,3
-	LD	rA,0,r3
-	LD	rB,0,r4
-	srd	rA,rA,r6
-	srd	rB,rB,r6
-	cmpld	cr0,rA,rB
-	bne	cr0,.LcmpAB_lightweight
-	b	.Lzero
-
-.Lnon_zero:
-	mr	r3,rC
-	blr
-
-.Llong:
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
-	/* Try to use vmx loop if length is equal or greater than 4K */
-	cmpldi  cr6,r5,VMX_THRESH
-	bge	cr6,.Lsameoffset_vmx_cmp
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-
-.Llong_novmx_cmp:
-#endif
-	/* At least s1 addr is aligned with 8 bytes */
-	li	off8,8
-	li	off16,16
-	li	off24,24
-
-	std	r31,-8(r1)
-	std	r30,-16(r1)
-	std	r29,-24(r1)
-	std	r28,-32(r1)
-	std	r27,-40(r1)
-
-	srdi	r0,r5,5
-	mtctr	r0
-	andi.	r5,r5,31
-
-	LD	rA,0,r3
-	LD	rB,0,r4
-
-	LD	rC,off8,r3
-	LD	rD,off8,r4
-
-	LD	rE,off16,r3
-	LD	rF,off16,r4
-
-	LD	rG,off24,r3
-	LD	rH,off24,r4
-	cmpld	cr0,rA,rB
-
-	addi	r3,r3,32
-	addi	r4,r4,32
-
-	bdz	.Lfirst32
-
-	LD	rA,0,r3
-	LD	rB,0,r4
-	cmpld	cr1,rC,rD
-
-	LD	rC,off8,r3
-	LD	rD,off8,r4
-	cmpld	cr6,rE,rF
-
-	LD	rE,off16,r3
-	LD	rF,off16,r4
-	cmpld	cr7,rG,rH
-	bne	cr0,.LcmpAB
-
-	LD	rG,off24,r3
-	LD	rH,off24,r4
-	cmpld	cr0,rA,rB
-	bne	cr1,.LcmpCD
-
-	addi	r3,r3,32
-	addi	r4,r4,32
-
-	bdz	.Lsecond32
-
-	.balign	16
-
-1:	LD	rA,0,r3
-	LD	rB,0,r4
-	cmpld	cr1,rC,rD
-	bne	cr6,.LcmpEF
-
-	LD	rC,off8,r3
-	LD	rD,off8,r4
-	cmpld	cr6,rE,rF
-	bne	cr7,.LcmpGH
-
-	LD	rE,off16,r3
-	LD	rF,off16,r4
-	cmpld	cr7,rG,rH
-	bne	cr0,.LcmpAB
-
-	LD	rG,off24,r3
-	LD	rH,off24,r4
-	cmpld	cr0,rA,rB
-	bne	cr1,.LcmpCD
-
-	addi	r3,r3,32
-	addi	r4,r4,32
-
-	bdnz	1b
-
-.Lsecond32:
-	cmpld	cr1,rC,rD
-	bne	cr6,.LcmpEF
-
-	cmpld	cr6,rE,rF
-	bne	cr7,.LcmpGH
-
-	cmpld	cr7,rG,rH
-	bne	cr0,.LcmpAB
-
-	bne	cr1,.LcmpCD
-	bne	cr6,.LcmpEF
-	bne	cr7,.LcmpGH
-
-.Ltail:
-	ld	r31,-8(r1)
-	ld	r30,-16(r1)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
-	ld	r27,-40(r1)
-
-	cmpdi	r5,0
-	beq	.Lzero
-	b	.Lshort
-
-.Lfirst32:
-	cmpld	cr1,rC,rD
-	cmpld	cr6,rE,rF
-	cmpld	cr7,rG,rH
-
-	bne	cr0,.LcmpAB
-	bne	cr1,.LcmpCD
-	bne	cr6,.LcmpEF
-	bne	cr7,.LcmpGH
-
-	b	.Ltail
-
-.LcmpAB:
-	li	r3,1
-	bgt	cr0,.Lout
-	li	r3,-1
-	b	.Lout
-
-.LcmpCD:
-	li	r3,1
-	bgt	cr1,.Lout
-	li	r3,-1
-	b	.Lout
-
-.LcmpEF:
-	li	r3,1
-	bgt	cr6,.Lout
-	li	r3,-1
-	b	.Lout
-
-.LcmpGH:
-	li	r3,1
-	bgt	cr7,.Lout
-	li	r3,-1
-
-.Lout:
-	ld	r31,-8(r1)
-	ld	r30,-16(r1)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
-	ld	r27,-40(r1)
-	blr
-
-.LcmpAB_lightweight:   /* skip NV GPRS restore */
-	li	r3,1
-	bgtlr
-	li	r3,-1
-	blr
-
-#ifdef CONFIG_ALTIVEC
-.Lsameoffset_vmx_cmp:
-	/* Enter with src/dst addrs has the same offset with 8 bytes
-	 * align boundary.
-	 *
-	 * There is an optimization based on following fact: memcmp()
-	 * prones to fail early at the first 32 bytes.
-	 * Before applying VMX instructions which will lead to 32x128bits
-	 * VMX regs load/restore penalty, we compare the first 32 bytes
-	 * so that we can catch the ~80% fail cases.
-	 */
-
-	li	r0,4
-	mtctr	r0
-.Lsameoffset_prechk_32B_loop:
-	LD	rA,0,r3
-	LD	rB,0,r4
-	cmpld	cr0,rA,rB
-	addi	r3,r3,8
-	addi	r4,r4,8
-	bne     cr0,.LcmpAB_lightweight
-	addi	r5,r5,-8
-	bdnz	.Lsameoffset_prechk_32B_loop
-
-	ENTER_VMX_OPS
-	beq     cr1,.Llong_novmx_cmp
-
-3:
-	/* need to check whether r4 has the same offset with r3
-	 * for 16 bytes boundary.
-	 */
-	xor	r0,r3,r4
-	andi.	r0,r0,0xf
-	bne	.Ldiffoffset_vmx_cmp_start
-
-	/* len is no less than 4KB. Need to align with 16 bytes further.
-	 */
-	andi.	rA,r3,8
-	LD	rA,0,r3
-	beq	4f
-	LD	rB,0,r4
-	cmpld	cr0,rA,rB
-	addi	r3,r3,8
-	addi	r4,r4,8
-	addi	r5,r5,-8
-
-	beq	cr0,4f
-	/* save and restore cr0 */
-	mfocrf  r5,128
-	EXIT_VMX_OPS
-	mtocrf  128,r5
-	b	.LcmpAB_lightweight
-
-4:
-	/* compare 32 bytes for each loop */
-	srdi	r0,r5,5
-	mtctr	r0
-	clrldi  r5,r5,59
-	li	off16,16
-
-.balign 16
-5:
-	lvx 	v0,0,r3
-	lvx 	v1,0,r4
-	VCMPEQUD_RC(v0,v0,v1)
-	bnl	cr6,7f
-	lvx 	v0,off16,r3
-	lvx 	v1,off16,r4
-	VCMPEQUD_RC(v0,v0,v1)
-	bnl	cr6,6f
-	addi	r3,r3,32
-	addi	r4,r4,32
-	bdnz	5b
-
-	EXIT_VMX_OPS
-	cmpdi	r5,0
-	beq	.Lzero
-	b	.Lcmp_lt32bytes
-
-6:
-	addi	r3,r3,16
-	addi	r4,r4,16
-
-7:
-	/* diff the last 16 bytes */
-	EXIT_VMX_OPS
-	LD	rA,0,r3
-	LD	rB,0,r4
-	cmpld	cr0,rA,rB
-	li	off8,8
-	bne	cr0,.LcmpAB_lightweight
-
-	LD	rA,off8,r3
-	LD	rB,off8,r4
-	cmpld	cr0,rA,rB
-	bne	cr0,.LcmpAB_lightweight
-	b	.Lzero
-#endif
-
-.Ldiffoffset_8bytes_make_align_start:
-	/* now try to align s1 with 8 bytes */
-	rlwinm  r6,r3,3,26,28
-	beq     .Ldiffoffset_align_s1_8bytes
-
-	clrrdi	r3,r3,3
-	LD	rA,0,r3
-	LD	rB,0,r4  /* unaligned load */
-	sld	rA,rA,r6
-	srd	rA,rA,r6
-	srd	rB,rB,r6
-	cmpld	cr0,rA,rB
-	srwi	r6,r6,3
-	bne	cr0,.LcmpAB_lightweight
-
-	subfic  r6,r6,8
-	subf.	r5,r6,r5
-	addi	r3,r3,8
-	add	r4,r4,r6
-
-	beq	.Lzero
-
-.Ldiffoffset_align_s1_8bytes:
-	/* now s1 is aligned with 8 bytes. */
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
-	/* only do vmx ops when the size equal or greater than 4K bytes */
-	cmpdi	cr5,r5,VMX_THRESH
-	bge	cr5,.Ldiffoffset_vmx_cmp
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-
-.Ldiffoffset_novmx_cmp:
-#endif
-
-
-	cmpdi   cr5,r5,31
-	ble	cr5,.Lcmp_lt32bytes
-
-#ifdef CONFIG_ALTIVEC
-	b	.Llong_novmx_cmp
-#else
-	b	.Llong
-#endif
-
-#ifdef CONFIG_ALTIVEC
-.Ldiffoffset_vmx_cmp:
-	/* perform a 32 bytes pre-checking before
-	 * enable VMX operations.
-	 */
-	li	r0,4
-	mtctr	r0
-.Ldiffoffset_prechk_32B_loop:
-	LD	rA,0,r3
-	LD	rB,0,r4
-	cmpld	cr0,rA,rB
-	addi	r3,r3,8
-	addi	r4,r4,8
-	bne     cr0,.LcmpAB_lightweight
-	addi	r5,r5,-8
-	bdnz	.Ldiffoffset_prechk_32B_loop
-
-	ENTER_VMX_OPS
-	beq     cr1,.Ldiffoffset_novmx_cmp
-
-.Ldiffoffset_vmx_cmp_start:
-	/* Firstly try to align r3 with 16 bytes */
-	andi.   r6,r3,0xf
-	li	off16,16
-	beq     .Ldiffoffset_vmx_s1_16bytes_align
-
-	LVS	v3,0,r3
-	LVS	v4,0,r4
-
-	lvx     v5,0,r3
-	lvx     v6,0,r4
-	LD_VSR_CROSS16B(r3,v3,v5,v7,v9)
-	LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
-
-	VCMPEQUB_RC(v7,v9,v10)
-	bnl	cr6,.Ldiffoffset_vmx_diff_found
-
-	subfic  r6,r6,16
-	subf    r5,r6,r5
-	add     r3,r3,r6
-	add     r4,r4,r6
-
-.Ldiffoffset_vmx_s1_16bytes_align:
-	/* now s1 is aligned with 16 bytes */
-	lvx     v6,0,r4
-	LVS	v4,0,r4
-	srdi	r6,r5,5  /* loop for 32 bytes each */
-	clrldi  r5,r5,59
-	mtctr	r6
-
-.balign	16
-.Ldiffoffset_vmx_32bytesloop:
-	/* the first qw of r4 was saved in v6 */
-	lvx	v9,0,r3
-	LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
-	VCMPEQUB_RC(v7,v9,v10)
-	vor	v6,v8,v8
-	bnl	cr6,.Ldiffoffset_vmx_diff_found
-
-	addi	r3,r3,16
-	addi	r4,r4,16
-
-	lvx	v9,0,r3
-	LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
-	VCMPEQUB_RC(v7,v9,v10)
-	vor	v6,v8,v8
-	bnl	cr6,.Ldiffoffset_vmx_diff_found
-
-	addi	r3,r3,16
-	addi	r4,r4,16
-
-	bdnz	.Ldiffoffset_vmx_32bytesloop
-
-	EXIT_VMX_OPS
-
-	cmpdi	r5,0
-	beq	.Lzero
-	b	.Lcmp_lt32bytes
-
-.Ldiffoffset_vmx_diff_found:
-	EXIT_VMX_OPS
-	/* anyway, the diff will appear in next 16 bytes */
-	li	r5,16
-	b	.Lcmp_lt32bytes
-
-#endif
-EXPORT_SYMBOL(memcmp)
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
deleted file mode 100644
index 016c91e958d8fdd73e5f73a3ccef91f27bde442a..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/memcpy_64.S
+++ /dev/null
@@ -1,230 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2002 Paul Mackerras, IBM Corp.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/export.h>
-#include <asm/asm-compat.h>
-#include <asm/feature-fixups.h>
-#include <asm/kasan.h>
-
-#ifndef SELFTEST_CASE
-/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
-#define SELFTEST_CASE	0
-#endif
-
-	.align	7
-_GLOBAL_TOC_KASAN(memcpy)
-BEGIN_FTR_SECTION
-#ifdef __LITTLE_ENDIAN__
-	cmpdi	cr7,r5,0
-#else
-	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* save destination pointer for return value */
-#endif
-FTR_SECTION_ELSE
-#ifdef CONFIG_PPC_BOOK3S_64
-	b	memcpy_power7
-#endif
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
-#ifdef __LITTLE_ENDIAN__
-	/* dumb little-endian memcpy that will get replaced at runtime */
-	addi r9,r3,-1
-	addi r4,r4,-1
-	beqlr cr7
-	mtctr r5
-1:	lbzu r10,1(r4)
-	stbu r10,1(r9)
-	bdnz 1b
-	blr
-#else
-	PPC_MTOCRF(0x01,r5)
-	cmpldi	cr1,r5,16
-	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
-	andi.	r6,r6,7
-	dcbt	0,r4
-	blt	cr1,.Lshort_copy
-/* Below we want to nop out the bne if we're on a CPU that has the
-   CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
-   cleared.
-   At the time of writing the only CPU that has this combination of bits
-   set is Power6. */
-test_feature = (SELFTEST_CASE == 1)
-BEGIN_FTR_SECTION
-	nop
-FTR_SECTION_ELSE
-	bne	.Ldst_unaligned
-ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
-                    CPU_FTR_UNALIGNED_LD_STD)
-.Ldst_aligned:
-	addi	r3,r3,-16
-test_feature = (SELFTEST_CASE == 0)
-BEGIN_FTR_SECTION
-	andi.	r0,r4,7
-	bne	.Lsrc_unaligned
-END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
-	srdi	r7,r5,4
-	ld	r9,0(r4)
-	addi	r4,r4,-8
-	mtctr	r7
-	andi.	r5,r5,7
-	bf	cr7*4+0,2f
-	addi	r3,r3,8
-	addi	r4,r4,8
-	mr	r8,r9
-	blt	cr1,3f
-1:	ld	r9,8(r4)
-	std	r8,8(r3)
-2:	ldu	r8,16(r4)
-	stdu	r9,16(r3)
-	bdnz	1b
-3:	std	r8,8(r3)
-	beq	3f
-	addi	r3,r3,16
-.Ldo_tail:
-	bf	cr7*4+1,1f
-	lwz	r9,8(r4)
-	addi	r4,r4,4
-	stw	r9,0(r3)
-	addi	r3,r3,4
-1:	bf	cr7*4+2,2f
-	lhz	r9,8(r4)
-	addi	r4,r4,2
-	sth	r9,0(r3)
-	addi	r3,r3,2
-2:	bf	cr7*4+3,3f
-	lbz	r9,8(r4)
-	stb	r9,0(r3)
-3:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
-	blr
-
-.Lsrc_unaligned:
-	srdi	r6,r5,3
-	addi	r5,r5,-16
-	subf	r4,r0,r4
-	srdi	r7,r5,4
-	sldi	r10,r0,3
-	cmpdi	cr6,r6,3
-	andi.	r5,r5,7
-	mtctr	r7
-	subfic	r11,r10,64
-	add	r5,r5,r0
-
-	bt	cr7*4+0,0f
-
-	ld	r9,0(r4)	# 3+2n loads, 2+2n stores
-	ld	r0,8(r4)
-	sld	r6,r9,r10
-	ldu	r9,16(r4)
-	srd	r7,r0,r11
-	sld	r8,r0,r10
-	or	r7,r7,r6
-	blt	cr6,4f
-	ld	r0,8(r4)
-	# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
-	b	2f
-
-0:	ld	r0,0(r4)	# 4+2n loads, 3+2n stores
-	ldu	r9,8(r4)
-	sld	r8,r0,r10
-	addi	r3,r3,-8
-	blt	cr6,5f
-	ld	r0,8(r4)
-	srd	r12,r9,r11
-	sld	r6,r9,r10
-	ldu	r9,16(r4)
-	or	r12,r8,r12
-	srd	r7,r0,r11
-	sld	r8,r0,r10
-	addi	r3,r3,16
-	beq	cr6,3f
-
-	# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
-1:	or	r7,r7,r6
-	ld	r0,8(r4)
-	std	r12,8(r3)
-2:	srd	r12,r9,r11
-	sld	r6,r9,r10
-	ldu	r9,16(r4)
-	or	r12,r8,r12
-	stdu	r7,16(r3)
-	srd	r7,r0,r11
-	sld	r8,r0,r10
-	bdnz	1b
-
-3:	std	r12,8(r3)
-	or	r7,r7,r6
-4:	std	r7,16(r3)
-5:	srd	r12,r9,r11
-	or	r12,r8,r12
-	std	r12,24(r3)
-	beq	4f
-	cmpwi	cr1,r5,8
-	addi	r3,r3,32
-	sld	r9,r9,r10
-	ble	cr1,6f
-	ld	r0,8(r4)
-	srd	r7,r0,r11
-	or	r9,r7,r9
-6:
-	bf	cr7*4+1,1f
-	rotldi	r9,r9,32
-	stw	r9,0(r3)
-	addi	r3,r3,4
-1:	bf	cr7*4+2,2f
-	rotldi	r9,r9,16
-	sth	r9,0(r3)
-	addi	r3,r3,2
-2:	bf	cr7*4+3,3f
-	rotldi	r9,r9,8
-	stb	r9,0(r3)
-3:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
-	blr
-
-.Ldst_unaligned:
-	PPC_MTOCRF(0x01,r6)		# put #bytes to 8B bdry into cr7
-	subf	r5,r6,r5
-	li	r7,0
-	cmpldi	cr1,r5,16
-	bf	cr7*4+3,1f
-	lbz	r0,0(r4)
-	stb	r0,0(r3)
-	addi	r7,r7,1
-1:	bf	cr7*4+2,2f
-	lhzx	r0,r7,r4
-	sthx	r0,r7,r3
-	addi	r7,r7,2
-2:	bf	cr7*4+1,3f
-	lwzx	r0,r7,r4
-	stwx	r0,r7,r3
-3:	PPC_MTOCRF(0x01,r5)
-	add	r4,r6,r4
-	add	r3,r6,r3
-	b	.Ldst_aligned
-
-.Lshort_copy:
-	bf	cr7*4+0,1f
-	lwz	r0,0(r4)
-	lwz	r9,4(r4)
-	addi	r4,r4,8
-	stw	r0,0(r3)
-	stw	r9,4(r3)
-	addi	r3,r3,8
-1:	bf	cr7*4+1,2f
-	lwz	r0,0(r4)
-	addi	r4,r4,4
-	stw	r0,0(r3)
-	addi	r3,r3,4
-2:	bf	cr7*4+2,3f
-	lhz	r0,0(r4)
-	addi	r4,r4,2
-	sth	r0,0(r3)
-	addi	r3,r3,2
-3:	bf	cr7*4+3,4f
-	lbz	r0,0(r4)
-	stb	r0,0(r3)
-4:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* return dest pointer */
-	blr
-#endif
-EXPORT_SYMBOL(memcpy)
-EXPORT_SYMBOL_KASAN(memcpy)
diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S b/arch/powerpc/lib/memcpy_mcsafe_64.S
deleted file mode 100644
index cb882d9a6d8a3d897c41edb54b6d2b68c93852a0..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/memcpy_mcsafe_64.S
+++ /dev/null
@@ -1,242 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) IBM Corporation, 2011
- * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
- * Author - Balbir Singh <bsingharora@gmail.com>
- */
-#include <asm/ppc_asm.h>
-#include <asm/errno.h>
-#include <asm/export.h>
-
-	.macro err1
-100:
-	EX_TABLE(100b,.Ldo_err1)
-	.endm
-
-	.macro err2
-200:
-	EX_TABLE(200b,.Ldo_err2)
-	.endm
-
-	.macro err3
-300:	EX_TABLE(300b,.Ldone)
-	.endm
-
-.Ldo_err2:
-	ld	r22,STK_REG(R22)(r1)
-	ld	r21,STK_REG(R21)(r1)
-	ld	r20,STK_REG(R20)(r1)
-	ld	r19,STK_REG(R19)(r1)
-	ld	r18,STK_REG(R18)(r1)
-	ld	r17,STK_REG(R17)(r1)
-	ld	r16,STK_REG(R16)(r1)
-	ld	r15,STK_REG(R15)(r1)
-	ld	r14,STK_REG(R14)(r1)
-	addi	r1,r1,STACKFRAMESIZE
-.Ldo_err1:
-	/* Do a byte by byte copy to get the exact remaining size */
-	mtctr	r7
-46:
-err3;	lbz	r0,0(r4)
-	addi	r4,r4,1
-err3;	stb	r0,0(r3)
-	addi	r3,r3,1
-	bdnz	46b
-	li	r3,0
-	blr
-
-.Ldone:
-	mfctr	r3
-	blr
-
-
-_GLOBAL(memcpy_mcsafe)
-	mr	r7,r5
-	cmpldi	r5,16
-	blt	.Lshort_copy
-
-.Lcopy:
-	/* Get the source 8B aligned */
-	neg	r6,r4
-	mtocrf	0x01,r6
-	clrldi	r6,r6,(64-3)
-
-	bf	cr7*4+3,1f
-err1;	lbz	r0,0(r4)
-	addi	r4,r4,1
-err1;	stb	r0,0(r3)
-	addi	r3,r3,1
-	subi	r7,r7,1
-
-1:	bf	cr7*4+2,2f
-err1;	lhz	r0,0(r4)
-	addi	r4,r4,2
-err1;	sth	r0,0(r3)
-	addi	r3,r3,2
-	subi	r7,r7,2
-
-2:	bf	cr7*4+1,3f
-err1;	lwz	r0,0(r4)
-	addi	r4,r4,4
-err1;	stw	r0,0(r3)
-	addi	r3,r3,4
-	subi	r7,r7,4
-
-3:	sub	r5,r5,r6
-	cmpldi	r5,128
-
-	mflr	r0
-	stdu	r1,-STACKFRAMESIZE(r1)
-	std	r14,STK_REG(R14)(r1)
-	std	r15,STK_REG(R15)(r1)
-	std	r16,STK_REG(R16)(r1)
-	std	r17,STK_REG(R17)(r1)
-	std	r18,STK_REG(R18)(r1)
-	std	r19,STK_REG(R19)(r1)
-	std	r20,STK_REG(R20)(r1)
-	std	r21,STK_REG(R21)(r1)
-	std	r22,STK_REG(R22)(r1)
-	std	r0,STACKFRAMESIZE+16(r1)
-
-	blt	5f
-	srdi	r6,r5,7
-	mtctr	r6
-
-	/* Now do cacheline (128B) sized loads and stores. */
-	.align	5
-4:
-err2;	ld	r0,0(r4)
-err2;	ld	r6,8(r4)
-err2;	ld	r8,16(r4)
-err2;	ld	r9,24(r4)
-err2;	ld	r10,32(r4)
-err2;	ld	r11,40(r4)
-err2;	ld	r12,48(r4)
-err2;	ld	r14,56(r4)
-err2;	ld	r15,64(r4)
-err2;	ld	r16,72(r4)
-err2;	ld	r17,80(r4)
-err2;	ld	r18,88(r4)
-err2;	ld	r19,96(r4)
-err2;	ld	r20,104(r4)
-err2;	ld	r21,112(r4)
-err2;	ld	r22,120(r4)
-	addi	r4,r4,128
-err2;	std	r0,0(r3)
-err2;	std	r6,8(r3)
-err2;	std	r8,16(r3)
-err2;	std	r9,24(r3)
-err2;	std	r10,32(r3)
-err2;	std	r11,40(r3)
-err2;	std	r12,48(r3)
-err2;	std	r14,56(r3)
-err2;	std	r15,64(r3)
-err2;	std	r16,72(r3)
-err2;	std	r17,80(r3)
-err2;	std	r18,88(r3)
-err2;	std	r19,96(r3)
-err2;	std	r20,104(r3)
-err2;	std	r21,112(r3)
-err2;	std	r22,120(r3)
-	addi	r3,r3,128
-	subi	r7,r7,128
-	bdnz	4b
-
-	clrldi	r5,r5,(64-7)
-
-	/* Up to 127B to go */
-5:	srdi	r6,r5,4
-	mtocrf	0x01,r6
-
-6:	bf	cr7*4+1,7f
-err2;	ld	r0,0(r4)
-err2;	ld	r6,8(r4)
-err2;	ld	r8,16(r4)
-err2;	ld	r9,24(r4)
-err2;	ld	r10,32(r4)
-err2;	ld	r11,40(r4)
-err2;	ld	r12,48(r4)
-err2;	ld	r14,56(r4)
-	addi	r4,r4,64
-err2;	std	r0,0(r3)
-err2;	std	r6,8(r3)
-err2;	std	r8,16(r3)
-err2;	std	r9,24(r3)
-err2;	std	r10,32(r3)
-err2;	std	r11,40(r3)
-err2;	std	r12,48(r3)
-err2;	std	r14,56(r3)
-	addi	r3,r3,64
-	subi	r7,r7,64
-
-7:	ld	r14,STK_REG(R14)(r1)
-	ld	r15,STK_REG(R15)(r1)
-	ld	r16,STK_REG(R16)(r1)
-	ld	r17,STK_REG(R17)(r1)
-	ld	r18,STK_REG(R18)(r1)
-	ld	r19,STK_REG(R19)(r1)
-	ld	r20,STK_REG(R20)(r1)
-	ld	r21,STK_REG(R21)(r1)
-	ld	r22,STK_REG(R22)(r1)
-	addi	r1,r1,STACKFRAMESIZE
-
-	/* Up to 63B to go */
-	bf	cr7*4+2,8f
-err1;	ld	r0,0(r4)
-err1;	ld	r6,8(r4)
-err1;	ld	r8,16(r4)
-err1;	ld	r9,24(r4)
-	addi	r4,r4,32
-err1;	std	r0,0(r3)
-err1;	std	r6,8(r3)
-err1;	std	r8,16(r3)
-err1;	std	r9,24(r3)
-	addi	r3,r3,32
-	subi	r7,r7,32
-
-	/* Up to 31B to go */
-8:	bf	cr7*4+3,9f
-err1;	ld	r0,0(r4)
-err1;	ld	r6,8(r4)
-	addi	r4,r4,16
-err1;	std	r0,0(r3)
-err1;	std	r6,8(r3)
-	addi	r3,r3,16
-	subi	r7,r7,16
-
-9:	clrldi	r5,r5,(64-4)
-
-	/* Up to 15B to go */
-.Lshort_copy:
-	mtocrf	0x01,r5
-	bf	cr7*4+0,12f
-err1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
-err1;	lwz	r6,4(r4)
-	addi	r4,r4,8
-err1;	stw	r0,0(r3)
-err1;	stw	r6,4(r3)
-	addi	r3,r3,8
-	subi	r7,r7,8
-
-12:	bf	cr7*4+1,13f
-err1;	lwz	r0,0(r4)
-	addi	r4,r4,4
-err1;	stw	r0,0(r3)
-	addi	r3,r3,4
-	subi	r7,r7,4
-
-13:	bf	cr7*4+2,14f
-err1;	lhz	r0,0(r4)
-	addi	r4,r4,2
-err1;	sth	r0,0(r3)
-	addi	r3,r3,2
-	subi	r7,r7,2
-
-14:	bf	cr7*4+3,15f
-err1;	lbz	r0,0(r4)
-err1;	stb	r0,0(r3)
-
-15:	li	r3,0
-	blr
-
-EXPORT_SYMBOL_GPL(memcpy_mcsafe);
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
deleted file mode 100644
index 54f226333c9422f4f95f025ee00c18074cc81b55..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/memcpy_power7.S
+++ /dev/null
@@ -1,641 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *
- * Copyright (C) IBM Corporation, 2012
- *
- * Author: Anton Blanchard <anton@au.ibm.com>
- */
-#include <asm/ppc_asm.h>
-
-#ifndef SELFTEST_CASE
-/* 0 == don't use VMX, 1 == use VMX */
-#define SELFTEST_CASE	0
-#endif
-
-#ifdef __BIG_ENDIAN__
-#define LVS(VRT,RA,RB)		lvsl	VRT,RA,RB
-#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRA,VRB,VRC
-#else
-#define LVS(VRT,RA,RB)		lvsr	VRT,RA,RB
-#define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRB,VRA,VRC
-#endif
-
-_GLOBAL(memcpy_power7)
-	cmpldi	r5,16
-	cmpldi	cr1,r5,4096
-	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
-	blt	.Lshort_copy
-
-#ifdef CONFIG_ALTIVEC
-test_feature = SELFTEST_CASE
-BEGIN_FTR_SECTION
-	bgt	cr1, .Lvmx_copy
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif
-
-.Lnonvmx_copy:
-	/* Get the source 8B aligned */
-	neg	r6,r4
-	mtocrf	0x01,r6
-	clrldi	r6,r6,(64-3)
-
-	bf	cr7*4+3,1f
-	lbz	r0,0(r4)
-	addi	r4,r4,1
-	stb	r0,0(r3)
-	addi	r3,r3,1
-
-1:	bf	cr7*4+2,2f
-	lhz	r0,0(r4)
-	addi	r4,r4,2
-	sth	r0,0(r3)
-	addi	r3,r3,2
-
-2:	bf	cr7*4+1,3f
-	lwz	r0,0(r4)
-	addi	r4,r4,4
-	stw	r0,0(r3)
-	addi	r3,r3,4
-
-3:	sub	r5,r5,r6
-	cmpldi	r5,128
-	blt	5f
-
-	mflr	r0
-	stdu	r1,-STACKFRAMESIZE(r1)
-	std	r14,STK_REG(R14)(r1)
-	std	r15,STK_REG(R15)(r1)
-	std	r16,STK_REG(R16)(r1)
-	std	r17,STK_REG(R17)(r1)
-	std	r18,STK_REG(R18)(r1)
-	std	r19,STK_REG(R19)(r1)
-	std	r20,STK_REG(R20)(r1)
-	std	r21,STK_REG(R21)(r1)
-	std	r22,STK_REG(R22)(r1)
-	std	r0,STACKFRAMESIZE+16(r1)
-
-	srdi	r6,r5,7
-	mtctr	r6
-
-	/* Now do cacheline (128B) sized loads and stores. */
-	.align	5
-4:
-	ld	r0,0(r4)
-	ld	r6,8(r4)
-	ld	r7,16(r4)
-	ld	r8,24(r4)
-	ld	r9,32(r4)
-	ld	r10,40(r4)
-	ld	r11,48(r4)
-	ld	r12,56(r4)
-	ld	r14,64(r4)
-	ld	r15,72(r4)
-	ld	r16,80(r4)
-	ld	r17,88(r4)
-	ld	r18,96(r4)
-	ld	r19,104(r4)
-	ld	r20,112(r4)
-	ld	r21,120(r4)
-	addi	r4,r4,128
-	std	r0,0(r3)
-	std	r6,8(r3)
-	std	r7,16(r3)
-	std	r8,24(r3)
-	std	r9,32(r3)
-	std	r10,40(r3)
-	std	r11,48(r3)
-	std	r12,56(r3)
-	std	r14,64(r3)
-	std	r15,72(r3)
-	std	r16,80(r3)
-	std	r17,88(r3)
-	std	r18,96(r3)
-	std	r19,104(r3)
-	std	r20,112(r3)
-	std	r21,120(r3)
-	addi	r3,r3,128
-	bdnz	4b
-
-	clrldi	r5,r5,(64-7)
-
-	ld	r14,STK_REG(R14)(r1)
-	ld	r15,STK_REG(R15)(r1)
-	ld	r16,STK_REG(R16)(r1)
-	ld	r17,STK_REG(R17)(r1)
-	ld	r18,STK_REG(R18)(r1)
-	ld	r19,STK_REG(R19)(r1)
-	ld	r20,STK_REG(R20)(r1)
-	ld	r21,STK_REG(R21)(r1)
-	ld	r22,STK_REG(R22)(r1)
-	addi	r1,r1,STACKFRAMESIZE
-
-	/* Up to 127B to go */
-5:	srdi	r6,r5,4
-	mtocrf	0x01,r6
-
-6:	bf	cr7*4+1,7f
-	ld	r0,0(r4)
-	ld	r6,8(r4)
-	ld	r7,16(r4)
-	ld	r8,24(r4)
-	ld	r9,32(r4)
-	ld	r10,40(r4)
-	ld	r11,48(r4)
-	ld	r12,56(r4)
-	addi	r4,r4,64
-	std	r0,0(r3)
-	std	r6,8(r3)
-	std	r7,16(r3)
-	std	r8,24(r3)
-	std	r9,32(r3)
-	std	r10,40(r3)
-	std	r11,48(r3)
-	std	r12,56(r3)
-	addi	r3,r3,64
-
-	/* Up to 63B to go */
-7:	bf	cr7*4+2,8f
-	ld	r0,0(r4)
-	ld	r6,8(r4)
-	ld	r7,16(r4)
-	ld	r8,24(r4)
-	addi	r4,r4,32
-	std	r0,0(r3)
-	std	r6,8(r3)
-	std	r7,16(r3)
-	std	r8,24(r3)
-	addi	r3,r3,32
-
-	/* Up to 31B to go */
-8:	bf	cr7*4+3,9f
-	ld	r0,0(r4)
-	ld	r6,8(r4)
-	addi	r4,r4,16
-	std	r0,0(r3)
-	std	r6,8(r3)
-	addi	r3,r3,16
-
-9:	clrldi	r5,r5,(64-4)
-
-	/* Up to 15B to go */
-.Lshort_copy:
-	mtocrf	0x01,r5
-	bf	cr7*4+0,12f
-	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
-	lwz	r6,4(r4)
-	addi	r4,r4,8
-	stw	r0,0(r3)
-	stw	r6,4(r3)
-	addi	r3,r3,8
-
-12:	bf	cr7*4+1,13f
-	lwz	r0,0(r4)
-	addi	r4,r4,4
-	stw	r0,0(r3)
-	addi	r3,r3,4
-
-13:	bf	cr7*4+2,14f
-	lhz	r0,0(r4)
-	addi	r4,r4,2
-	sth	r0,0(r3)
-	addi	r3,r3,2
-
-14:	bf	cr7*4+3,15f
-	lbz	r0,0(r4)
-	stb	r0,0(r3)
-
-15:	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
-	blr
-
-.Lunwind_stack_nonvmx_copy:
-	addi	r1,r1,STACKFRAMESIZE
-	b	.Lnonvmx_copy
-
-.Lvmx_copy:
-#ifdef CONFIG_ALTIVEC
-	mflr	r0
-	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
-	std	r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
-	std	r0,16(r1)
-	stdu	r1,-STACKFRAMESIZE(r1)
-	bl	enter_vmx_ops
-	cmpwi	cr1,r3,0
-	ld	r0,STACKFRAMESIZE+16(r1)
-	ld	r3,STK_REG(R31)(r1)
-	ld	r4,STK_REG(R30)(r1)
-	ld	r5,STK_REG(R29)(r1)
-	mtlr	r0
-
-	/*
-	 * We prefetch both the source and destination using enhanced touch
-	 * instructions. We use a stream ID of 0 for the load side and
-	 * 1 for the store side.
-	 */
-	clrrdi	r6,r4,7
-	clrrdi	r9,r3,7
-	ori	r9,r9,1		/* stream=1 */
-
-	srdi	r7,r5,7		/* length in cachelines, capped at 0x3FF */
-	cmpldi	r7,0x3FF
-	ble	1f
-	li	r7,0x3FF
-1:	lis	r0,0x0E00	/* depth=7 */
-	sldi	r7,r7,7
-	or	r7,r7,r0
-	ori	r10,r7,1	/* stream=1 */
-
-	lis	r8,0x8000	/* GO=1 */
-	clrldi	r8,r8,32
-
-	dcbt	0,r6,0b01000
-	dcbt	0,r7,0b01010
-	dcbtst	0,r9,0b01000
-	dcbtst	0,r10,0b01010
-	eieio
-	dcbt	0,r8,0b01010	/* GO */
-
-	beq	cr1,.Lunwind_stack_nonvmx_copy
-
-	/*
-	 * If source and destination are not relatively aligned we use a
-	 * slower permute loop.
-	 */
-	xor	r6,r4,r3
-	rldicl.	r6,r6,0,(64-4)
-	bne	.Lvmx_unaligned_copy
-
-	/* Get the destination 16B aligned */
-	neg	r6,r3
-	mtocrf	0x01,r6
-	clrldi	r6,r6,(64-4)
-
-	bf	cr7*4+3,1f
-	lbz	r0,0(r4)
-	addi	r4,r4,1
-	stb	r0,0(r3)
-	addi	r3,r3,1
-
-1:	bf	cr7*4+2,2f
-	lhz	r0,0(r4)
-	addi	r4,r4,2
-	sth	r0,0(r3)
-	addi	r3,r3,2
-
-2:	bf	cr7*4+1,3f
-	lwz	r0,0(r4)
-	addi	r4,r4,4
-	stw	r0,0(r3)
-	addi	r3,r3,4
-
-3:	bf	cr7*4+0,4f
-	ld	r0,0(r4)
-	addi	r4,r4,8
-	std	r0,0(r3)
-	addi	r3,r3,8
-
-4:	sub	r5,r5,r6
-
-	/* Get the desination 128B aligned */
-	neg	r6,r3
-	srdi	r7,r6,4
-	mtocrf	0x01,r7
-	clrldi	r6,r6,(64-7)
-
-	li	r9,16
-	li	r10,32
-	li	r11,48
-
-	bf	cr7*4+3,5f
-	lvx	v1,0,r4
-	addi	r4,r4,16
-	stvx	v1,0,r3
-	addi	r3,r3,16
-
-5:	bf	cr7*4+2,6f
-	lvx	v1,0,r4
-	lvx	v0,r4,r9
-	addi	r4,r4,32
-	stvx	v1,0,r3
-	stvx	v0,r3,r9
-	addi	r3,r3,32
-
-6:	bf	cr7*4+1,7f
-	lvx	v3,0,r4
-	lvx	v2,r4,r9
-	lvx	v1,r4,r10
-	lvx	v0,r4,r11
-	addi	r4,r4,64
-	stvx	v3,0,r3
-	stvx	v2,r3,r9
-	stvx	v1,r3,r10
-	stvx	v0,r3,r11
-	addi	r3,r3,64
-
-7:	sub	r5,r5,r6
-	srdi	r6,r5,7
-
-	std	r14,STK_REG(R14)(r1)
-	std	r15,STK_REG(R15)(r1)
-	std	r16,STK_REG(R16)(r1)
-
-	li	r12,64
-	li	r14,80
-	li	r15,96
-	li	r16,112
-
-	mtctr	r6
-
-	/*
-	 * Now do cacheline sized loads and stores. By this stage the
-	 * cacheline stores are also cacheline aligned.
-	 */
-	.align	5
-8:
-	lvx	v7,0,r4
-	lvx	v6,r4,r9
-	lvx	v5,r4,r10
-	lvx	v4,r4,r11
-	lvx	v3,r4,r12
-	lvx	v2,r4,r14
-	lvx	v1,r4,r15
-	lvx	v0,r4,r16
-	addi	r4,r4,128
-	stvx	v7,0,r3
-	stvx	v6,r3,r9
-	stvx	v5,r3,r10
-	stvx	v4,r3,r11
-	stvx	v3,r3,r12
-	stvx	v2,r3,r14
-	stvx	v1,r3,r15
-	stvx	v0,r3,r16
-	addi	r3,r3,128
-	bdnz	8b
-
-	ld	r14,STK_REG(R14)(r1)
-	ld	r15,STK_REG(R15)(r1)
-	ld	r16,STK_REG(R16)(r1)
-
-	/* Up to 127B to go */
-	clrldi	r5,r5,(64-7)
-	srdi	r6,r5,4
-	mtocrf	0x01,r6
-
-	bf	cr7*4+1,9f
-	lvx	v3,0,r4
-	lvx	v2,r4,r9
-	lvx	v1,r4,r10
-	lvx	v0,r4,r11
-	addi	r4,r4,64
-	stvx	v3,0,r3
-	stvx	v2,r3,r9
-	stvx	v1,r3,r10
-	stvx	v0,r3,r11
-	addi	r3,r3,64
-
-9:	bf	cr7*4+2,10f
-	lvx	v1,0,r4
-	lvx	v0,r4,r9
-	addi	r4,r4,32
-	stvx	v1,0,r3
-	stvx	v0,r3,r9
-	addi	r3,r3,32
-
-10:	bf	cr7*4+3,11f
-	lvx	v1,0,r4
-	addi	r4,r4,16
-	stvx	v1,0,r3
-	addi	r3,r3,16
-
-	/* Up to 15B to go */
-11:	clrldi	r5,r5,(64-4)
-	mtocrf	0x01,r5
-	bf	cr7*4+0,12f
-	ld	r0,0(r4)
-	addi	r4,r4,8
-	std	r0,0(r3)
-	addi	r3,r3,8
-
-12:	bf	cr7*4+1,13f
-	lwz	r0,0(r4)
-	addi	r4,r4,4
-	stw	r0,0(r3)
-	addi	r3,r3,4
-
-13:	bf	cr7*4+2,14f
-	lhz	r0,0(r4)
-	addi	r4,r4,2
-	sth	r0,0(r3)
-	addi	r3,r3,2
-
-14:	bf	cr7*4+3,15f
-	lbz	r0,0(r4)
-	stb	r0,0(r3)
-
-15:	addi	r1,r1,STACKFRAMESIZE
-	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
-	b	exit_vmx_ops		/* tail call optimise */
-
-.Lvmx_unaligned_copy:
-	/* Get the destination 16B aligned */
-	neg	r6,r3
-	mtocrf	0x01,r6
-	clrldi	r6,r6,(64-4)
-
-	bf	cr7*4+3,1f
-	lbz	r0,0(r4)
-	addi	r4,r4,1
-	stb	r0,0(r3)
-	addi	r3,r3,1
-
-1:	bf	cr7*4+2,2f
-	lhz	r0,0(r4)
-	addi	r4,r4,2
-	sth	r0,0(r3)
-	addi	r3,r3,2
-
-2:	bf	cr7*4+1,3f
-	lwz	r0,0(r4)
-	addi	r4,r4,4
-	stw	r0,0(r3)
-	addi	r3,r3,4
-
-3:	bf	cr7*4+0,4f
-	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
-	lwz	r7,4(r4)
-	addi	r4,r4,8
-	stw	r0,0(r3)
-	stw	r7,4(r3)
-	addi	r3,r3,8
-
-4:	sub	r5,r5,r6
-
-	/* Get the desination 128B aligned */
-	neg	r6,r3
-	srdi	r7,r6,4
-	mtocrf	0x01,r7
-	clrldi	r6,r6,(64-7)
-
-	li	r9,16
-	li	r10,32
-	li	r11,48
-
-	LVS(v16,0,r4)		/* Setup permute control vector */
-	lvx	v0,0,r4
-	addi	r4,r4,16
-
-	bf	cr7*4+3,5f
-	lvx	v1,0,r4
-	VPERM(v8,v0,v1,v16)
-	addi	r4,r4,16
-	stvx	v8,0,r3
-	addi	r3,r3,16
-	vor	v0,v1,v1
-
-5:	bf	cr7*4+2,6f
-	lvx	v1,0,r4
-	VPERM(v8,v0,v1,v16)
-	lvx	v0,r4,r9
-	VPERM(v9,v1,v0,v16)
-	addi	r4,r4,32
-	stvx	v8,0,r3
-	stvx	v9,r3,r9
-	addi	r3,r3,32
-
-6:	bf	cr7*4+1,7f
-	lvx	v3,0,r4
-	VPERM(v8,v0,v3,v16)
-	lvx	v2,r4,r9
-	VPERM(v9,v3,v2,v16)
-	lvx	v1,r4,r10
-	VPERM(v10,v2,v1,v16)
-	lvx	v0,r4,r11
-	VPERM(v11,v1,v0,v16)
-	addi	r4,r4,64
-	stvx	v8,0,r3
-	stvx	v9,r3,r9
-	stvx	v10,r3,r10
-	stvx	v11,r3,r11
-	addi	r3,r3,64
-
-7:	sub	r5,r5,r6
-	srdi	r6,r5,7
-
-	std	r14,STK_REG(R14)(r1)
-	std	r15,STK_REG(R15)(r1)
-	std	r16,STK_REG(R16)(r1)
-
-	li	r12,64
-	li	r14,80
-	li	r15,96
-	li	r16,112
-
-	mtctr	r6
-
-	/*
-	 * Now do cacheline sized loads and stores. By this stage the
-	 * cacheline stores are also cacheline aligned.
-	 */
-	.align	5
-8:
-	lvx	v7,0,r4
-	VPERM(v8,v0,v7,v16)
-	lvx	v6,r4,r9
-	VPERM(v9,v7,v6,v16)
-	lvx	v5,r4,r10
-	VPERM(v10,v6,v5,v16)
-	lvx	v4,r4,r11
-	VPERM(v11,v5,v4,v16)
-	lvx	v3,r4,r12
-	VPERM(v12,v4,v3,v16)
-	lvx	v2,r4,r14
-	VPERM(v13,v3,v2,v16)
-	lvx	v1,r4,r15
-	VPERM(v14,v2,v1,v16)
-	lvx	v0,r4,r16
-	VPERM(v15,v1,v0,v16)
-	addi	r4,r4,128
-	stvx	v8,0,r3
-	stvx	v9,r3,r9
-	stvx	v10,r3,r10
-	stvx	v11,r3,r11
-	stvx	v12,r3,r12
-	stvx	v13,r3,r14
-	stvx	v14,r3,r15
-	stvx	v15,r3,r16
-	addi	r3,r3,128
-	bdnz	8b
-
-	ld	r14,STK_REG(R14)(r1)
-	ld	r15,STK_REG(R15)(r1)
-	ld	r16,STK_REG(R16)(r1)
-
-	/* Up to 127B to go */
-	clrldi	r5,r5,(64-7)
-	srdi	r6,r5,4
-	mtocrf	0x01,r6
-
-	bf	cr7*4+1,9f
-	lvx	v3,0,r4
-	VPERM(v8,v0,v3,v16)
-	lvx	v2,r4,r9
-	VPERM(v9,v3,v2,v16)
-	lvx	v1,r4,r10
-	VPERM(v10,v2,v1,v16)
-	lvx	v0,r4,r11
-	VPERM(v11,v1,v0,v16)
-	addi	r4,r4,64
-	stvx	v8,0,r3
-	stvx	v9,r3,r9
-	stvx	v10,r3,r10
-	stvx	v11,r3,r11
-	addi	r3,r3,64
-
-9:	bf	cr7*4+2,10f
-	lvx	v1,0,r4
-	VPERM(v8,v0,v1,v16)
-	lvx	v0,r4,r9
-	VPERM(v9,v1,v0,v16)
-	addi	r4,r4,32
-	stvx	v8,0,r3
-	stvx	v9,r3,r9
-	addi	r3,r3,32
-
-10:	bf	cr7*4+3,11f
-	lvx	v1,0,r4
-	VPERM(v8,v0,v1,v16)
-	addi	r4,r4,16
-	stvx	v8,0,r3
-	addi	r3,r3,16
-
-	/* Up to 15B to go */
-11:	clrldi	r5,r5,(64-4)
-	addi	r4,r4,-16	/* Unwind the +16 load offset */
-	mtocrf	0x01,r5
-	bf	cr7*4+0,12f
-	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
-	lwz	r6,4(r4)
-	addi	r4,r4,8
-	stw	r0,0(r3)
-	stw	r6,4(r3)
-	addi	r3,r3,8
-
-12:	bf	cr7*4+1,13f
-	lwz	r0,0(r4)
-	addi	r4,r4,4
-	stw	r0,0(r3)
-	addi	r3,r3,4
-
-13:	bf	cr7*4+2,14f
-	lhz	r0,0(r4)
-	addi	r4,r4,2
-	sth	r0,0(r3)
-	addi	r3,r3,2
-
-14:	bf	cr7*4+3,15f
-	lbz	r0,0(r4)
-	stb	r0,0(r3)
-
-15:	addi	r1,r1,STACKFRAMESIZE
-	ld	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
-	b	exit_vmx_ops		/* tail call optimise */
-#endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/lib/quad.S b/arch/powerpc/lib/quad.S
deleted file mode 100644
index da71760e50b5161655a2df06ff0c5e825dd063d4..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/quad.S
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Quadword loads and stores
- * for use in instruction emulation.
- *
- * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
- */
-
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/ppc-opcode.h>
-#include <asm/reg.h>
-#include <asm/asm-offsets.h>
-#include <linux/errno.h>
-
-/* do_lq(unsigned long ea, unsigned long *regs) */
-_GLOBAL(do_lq)
-1:	lq	r6, 0(r3)
-	std	r6, 0(r4)
-	std	r7, 8(r4)
-	li	r3, 0
-	blr
-2:	li	r3, -EFAULT
-	blr
-	EX_TABLE(1b, 2b)
-
-/* do_stq(unsigned long ea, unsigned long val0, unsigned long val1) */
-_GLOBAL(do_stq)
-1:	stq	r4, 0(r3)
-	li	r3, 0
-	blr
-2:	li	r3, -EFAULT
-	blr
-	EX_TABLE(1b, 2b)
-
-/* do_lqarx(unsigned long ea, unsigned long *regs) */
-_GLOBAL(do_lqarx)
-1:	PPC_LQARX(6, 0, 3, 0)
-	std	r6, 0(r4)
-	std	r7, 8(r4)
-	li	r3, 0
-	blr
-2:	li	r3, -EFAULT
-	blr
-	EX_TABLE(1b, 2b)
-
-/* do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1,
-	    unsigned int *crp) */
-
-_GLOBAL(do_stqcx)
-1:	PPC_STQCX(4, 0, 3)
-	mfcr	r5
-	stw	r5, 0(r6)
-	li	r3, 0
-	blr
-2:	li	r3, -EFAULT
-	blr
-	EX_TABLE(1b, 2b)
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
deleted file mode 100644
index 2752b1cc1d45f0496ac1effa410d24d46bca0244..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/string.S
+++ /dev/null
@@ -1,65 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * String handling functions for PowerPC.
- *
- * Copyright (C) 1996 Paul Mackerras.
- */
-#include <asm/ppc_asm.h>
-#include <asm/export.h>
-#include <asm/cache.h>
-
-	.text
-	
-/* This clears out any unused part of the destination buffer,
-   just as the libc version does.  -- paulus */
-_GLOBAL(strncpy)
-	PPC_LCMPI 0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r6,r3,-1
-	addi	r4,r4,-1
-	.balign IFETCH_ALIGN_BYTES
-1:	lbzu	r0,1(r4)
-	cmpwi	0,r0,0
-	stbu	r0,1(r6)
-	bdnzf	2,1b		/* dec ctr, branch if ctr != 0 && !cr0.eq */
-	bnelr			/* if we didn't hit a null char, we're done */
-	mfctr	r5
-	PPC_LCMPI 0,r5,0	/* any space left in destination buffer? */
-	beqlr			/* we know r0 == 0 here */
-2:	stbu	r0,1(r6)	/* clear it out if so */
-	bdnz	2b
-	blr
-EXPORT_SYMBOL(strncpy)
-
-_GLOBAL(strncmp)
-	PPC_LCMPI 0,r5,0
-	beq-	2f
-	mtctr	r5
-	addi	r5,r3,-1
-	addi	r4,r4,-1
-	.balign IFETCH_ALIGN_BYTES
-1:	lbzu	r3,1(r5)
-	cmpwi	1,r3,0
-	lbzu	r0,1(r4)
-	subf.	r3,r0,r3
-	beqlr	1
-	bdnzt	eq,1b
-	blr
-2:	li	r3,0
-	blr
-EXPORT_SYMBOL(strncmp)
-
-_GLOBAL(memchr)
-	PPC_LCMPI 0,r5,0
-	beq-	2f
-	mtctr	r5
-	addi	r3,r3,-1
-	.balign IFETCH_ALIGN_BYTES
-1:	lbzu	r0,1(r3)
-	cmpw	0,r0,r4
-	bdnzf	2,1b
-	beqlr
-2:	li	r3,0
-	blr
-EXPORT_SYMBOL(memchr)
diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S
deleted file mode 100644
index 1ddb26394e8ac5a1739b6f5aaf12512e5c3ed9f6..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/string_32.S
+++ /dev/null
@@ -1,90 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- * String handling functions for PowerPC32
- *
- * Copyright (C) 1996 Paul Mackerras.
- *
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/export.h>
-#include <asm/cache.h>
-
-	.text
-
-CACHELINE_BYTES = L1_CACHE_BYTES
-LG_CACHELINE_BYTES = L1_CACHE_SHIFT
-CACHELINE_MASK = (L1_CACHE_BYTES-1)
-
-_GLOBAL(__arch_clear_user)
-/*
- * Use dcbz on the complete cache lines in the destination
- * to set them to zero.  This requires that the destination
- * area is cacheable.
- */
-	cmplwi	cr0, r4, 4
-	mr	r10, r3
-	li	r3, 0
-	blt	7f
-
-11:	stw	r3, 0(r10)
-	beqlr
-	andi.	r0, r10, 3
-	add	r11, r0, r4
-	subf	r6, r0, r10
-
-	clrlwi	r7, r6, 32 - LG_CACHELINE_BYTES
-	add	r8, r7, r11
-	srwi	r9, r8, LG_CACHELINE_BYTES
-	addic.	r9, r9, -1	/* total number of complete cachelines */
-	ble	2f
-	xori	r0, r7, CACHELINE_MASK & ~3
-	srwi.	r0, r0, 2
-	beq	3f
-	mtctr	r0
-4:	stwu	r3, 4(r6)
-	bdnz	4b
-3:	mtctr	r9
-	li	r7, 4
-10:	dcbz	r7, r6
-	addi	r6, r6, CACHELINE_BYTES
-	bdnz	10b
-	clrlwi	r11, r8, 32 - LG_CACHELINE_BYTES
-	addi	r11, r11, 4
-
-2:	srwi	r0 ,r11 ,2
-	mtctr	r0
-	bdz	6f
-1:	stwu	r3, 4(r6)
-	bdnz	1b
-6:	andi.	r11, r11, 3
-	beqlr
-	mtctr	r11
-	addi	r6, r6, 3
-8:	stbu	r3, 1(r6)
-	bdnz	8b
-	blr
-
-7:	cmpwi	cr0, r4, 0
-	beqlr
-	mtctr	r4
-	addi	r6, r10, -1
-9:	stbu	r3, 1(r6)
-	bdnz	9b
-	blr
-
-90:	mr	r3, r4
-	blr
-91:	add	r3, r10, r4
-	subf	r3, r6, r3
-	blr
-
-	EX_TABLE(11b, 90b)
-	EX_TABLE(4b, 91b)
-	EX_TABLE(10b, 91b)
-	EX_TABLE(1b, 91b)
-	EX_TABLE(8b, 91b)
-	EX_TABLE(9b, 91b)
-
-EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
deleted file mode 100644
index 169872bc08928aa5fc566355ac0e5165985a1d00..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/string_64.S
+++ /dev/null
@@ -1,184 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *
- * Copyright (C) IBM Corporation, 2012
- *
- * Author: Anton Blanchard <anton@au.ibm.com>
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/export.h>
-
-	.section	".toc","aw"
-PPC64_CACHES:
-	.tc		ppc64_caches[TC],ppc64_caches
-	.section	".text"
-
-/**
- * __arch_clear_user: - Zero a block of memory in user space, with less checking.
- * @to:   Destination address, in user space.
- * @n:    Number of bytes to zero.
- *
- * Zero a block of memory in user space.  Caller must check
- * the specified block with access_ok() before calling this function.
- *
- * Returns number of bytes that could not be cleared.
- * On success, this will be zero.
- */
-
-	.macro err1
-100:
-	EX_TABLE(100b,.Ldo_err1)
-	.endm
-
-	.macro err2
-200:
-	EX_TABLE(200b,.Ldo_err2)
-	.endm
-
-	.macro err3
-300:
-	EX_TABLE(300b,.Ldo_err3)
-	.endm
-
-.Ldo_err1:
-	mr	r3,r8
-
-.Ldo_err2:
-	mtctr	r4
-1:
-err3;	stb	r0,0(r3)
-	addi	r3,r3,1
-	addi	r4,r4,-1
-	bdnz	1b
-
-.Ldo_err3:
-	mr	r3,r4
-	blr
-
-_GLOBAL_TOC(__arch_clear_user)
-	cmpdi	r4,32
-	neg	r6,r3
-	li	r0,0
-	blt	.Lshort_clear
-	mr	r8,r3
-	mtocrf	0x01,r6
-	clrldi	r6,r6,(64-3)
-
-	/* Get the destination 8 byte aligned */
-	bf	cr7*4+3,1f
-err1;	stb	r0,0(r3)
-	addi	r3,r3,1
-
-1:	bf	cr7*4+2,2f
-err1;	sth	r0,0(r3)
-	addi	r3,r3,2
-
-2:	bf	cr7*4+1,3f
-err1;	stw	r0,0(r3)
-	addi	r3,r3,4
-
-3:	sub	r4,r4,r6
-
-	cmpdi	r4,32
-	cmpdi	cr1,r4,512
-	blt	.Lshort_clear
-	bgt	cr1,.Llong_clear
-
-.Lmedium_clear:
-	srdi	r6,r4,5
-	mtctr	r6
-
-	/* Do 32 byte chunks */
-4:
-err2;	std	r0,0(r3)
-err2;	std	r0,8(r3)
-err2;	std	r0,16(r3)
-err2;	std	r0,24(r3)
-	addi	r3,r3,32
-	addi	r4,r4,-32
-	bdnz	4b
-
-.Lshort_clear:
-	/* up to 31 bytes to go */
-	cmpdi	r4,16
-	blt	6f
-err2;	std	r0,0(r3)
-err2;	std	r0,8(r3)
-	addi	r3,r3,16
-	addi	r4,r4,-16
-
-	/* Up to 15 bytes to go */
-6:	mr	r8,r3
-	clrldi	r4,r4,(64-4)
-	mtocrf	0x01,r4
-	bf	cr7*4+0,7f
-err1;	std	r0,0(r3)
-	addi	r3,r3,8
-
-7:	bf	cr7*4+1,8f
-err1;	stw	r0,0(r3)
-	addi	r3,r3,4
-
-8:	bf	cr7*4+2,9f
-err1;	sth	r0,0(r3)
-	addi	r3,r3,2
-
-9:	bf	cr7*4+3,10f
-err1;	stb	r0,0(r3)
-
-10:	li	r3,0
-	blr
-
-.Llong_clear:
-	ld	r5,PPC64_CACHES@toc(r2)
-
-	bf	cr7*4+0,11f
-err2;	std	r0,0(r3)
-	addi	r3,r3,8
-	addi	r4,r4,-8
-
-	/* Destination is 16 byte aligned, need to get it cache block aligned */
-11:	lwz	r7,DCACHEL1LOGBLOCKSIZE(r5)
-	lwz	r9,DCACHEL1BLOCKSIZE(r5)
-
-	/*
-	 * With worst case alignment the long clear loop takes a minimum
-	 * of 1 byte less than 2 cachelines.
-	 */
-	sldi	r10,r9,2
-	cmpd	r4,r10
-	blt	.Lmedium_clear
-
-	neg	r6,r3
-	addi	r10,r9,-1
-	and.	r5,r6,r10
-	beq	13f
-
-	srdi	r6,r5,4
-	mtctr	r6
-	mr	r8,r3
-12:
-err1;	std	r0,0(r3)
-err1;	std	r0,8(r3)
-	addi	r3,r3,16
-	bdnz	12b
-
-	sub	r4,r4,r5
-
-13:	srd	r6,r4,r7
-	mtctr	r6
-	mr	r8,r3
-14:
-err1;	dcbz	0,r3
-	add	r3,r3,r9
-	bdnz	14b
-
-	and	r4,r4,r10
-
-	cmpdi	r4,32
-	blt	.Lshort_clear
-	b	.Lmedium_clear
-EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/powerpc/lib/strlen_32.S b/arch/powerpc/lib/strlen_32.S
deleted file mode 100644
index 0a8d3f64d493542d6631bc60bd7d69402192131e..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/strlen_32.S
+++ /dev/null
@@ -1,78 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * strlen() for PPC32
- *
- * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information.
- *
- * Inspired from glibc implementation
- */
-#include <asm/ppc_asm.h>
-#include <asm/export.h>
-#include <asm/cache.h>
-
-	.text
-
-/*
- * Algorithm:
- *
- * 1) Given a word 'x', we can test to see if it contains any 0 bytes
- *    by subtracting 0x01010101, and seeing if any of the high bits of each
- *    byte changed from 0 to 1. This works because the least significant
- *    0 byte must have had no incoming carry (otherwise it's not the least
- *    significant), so it is 0x00 - 0x01 == 0xff. For all other
- *    byte values, either they have the high bit set initially, or when
- *    1 is subtracted you get a value in the range 0x00-0x7f, none of which
- *    have their high bit set. The expression here is
- *    (x - 0x01010101) & ~x & 0x80808080), which gives 0x00000000 when
- *    there were no 0x00 bytes in the word.  You get 0x80 in bytes that
- *    match, but possibly false 0x80 matches in the next more significant
- *    byte to a true match due to carries.  For little-endian this is
- *    of no consequence since the least significant match is the one
- *    we're interested in, but big-endian needs method 2 to find which
- *    byte matches.
- * 2) Given a word 'x', we can test to see _which_ byte was zero by
- *    calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080).
- *    This produces 0x80 in each byte that was zero, and 0x00 in all
- *    the other bytes. The '| ~0x80808080' clears the low 7 bits in each
- *    byte, and the '| x' part ensures that bytes with the high bit set
- *    produce 0x00. The addition will carry into the high bit of each byte
- *    iff that byte had one of its low 7 bits set. We can then just see
- *    which was the most significant bit set and divide by 8 to find how
- *    many to add to the index.
- *    This is from the book 'The PowerPC Compiler Writer's Guide',
- *    by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
- */
-
-_GLOBAL(strlen)
-	andi.   r0, r3, 3
-	lis	r7, 0x0101
-	addi	r10, r3, -4
-	addic	r7, r7, 0x0101	/* r7 = 0x01010101 (lomagic) & clear XER[CA] */
-	rotlwi	r6, r7, 31 	/* r6 = 0x80808080 (himagic) */
-	bne-	3f
-	.balign IFETCH_ALIGN_BYTES
-1:	lwzu	r9, 4(r10)
-2:	subf	r8, r7, r9
-	and.	r8, r8, r6
-	beq+	1b
-	andc.	r8, r8, r9
-	beq+	1b
-	andc	r8, r9, r6
-	orc	r9, r9, r6
-	subfe	r8, r6, r8
-	nor	r8, r8, r9
-	cntlzw	r8, r8
-	subf	r3, r3, r10
-	srwi	r8, r8, 3
-	add	r3, r3, r8
-	blr
-
-	/* Missaligned string: make sure bytes before string are seen not 0 */
-3:	xor	r10, r10, r0
-	orc	r8, r8, r8
-	lwzu	r9, 4(r10)
-	slwi	r0, r0, 3
-	srw	r8, r8, r0
-	orc	r9, r9, r8
-	b	2b
-EXPORT_SYMBOL(strlen)
diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S
deleted file mode 100644
index 1580f34f4f4fa2d506c6ca71f32af5caf4850dcd..0000000000000000000000000000000000000000
--- a/arch/powerpc/lib/test_emulate_step_exec_instr.S
+++ /dev/null
@@ -1,150 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Non-emulated single-stepping support (currently limited to basic integer
- * computations) used to validate the instruction emulation infrastructure.
- *
- * Copyright (C) 2019 IBM Corporation
- */
-
-#include <asm/asm-offsets.h>
-#include <asm/ppc_asm.h>
-#include <asm/code-patching-asm.h>
-#include <linux/errno.h>
-
-/* int exec_instr(struct pt_regs *regs) */
-_GLOBAL(exec_instr)
-
-	/*
-	 * Stack frame layout (INT_FRAME_SIZE bytes)
-	 *   In-memory pt_regs	(SP + STACK_FRAME_OVERHEAD)
-	 *   Scratch space	(SP + 8)
-	 *   Back chain		(SP + 0)
-	 */
-
-	/*
-	 * Allocate a new stack frame with enough space to hold the register
-	 * states in an in-memory pt_regs and also create the back chain to
-	 * the caller's stack frame.
-	 */
-	stdu	r1, -INT_FRAME_SIZE(r1)
-
-	/*
-	 * Save non-volatile GPRs on stack. This includes TOC pointer (GPR2)
-	 * and local variables (GPR14 to GPR31). The register for the pt_regs
-	 * parameter (GPR3) is saved additionally to ensure that the resulting
-	 * register state can still be saved even if GPR3 gets overwritten
-	 * when loading the initial register state for the test instruction.
-	 * The stack pointer (GPR1) and the thread pointer (GPR13) are not
-	 * saved as these should not be modified anyway.
-	 */
-	SAVE_2GPRS(2, r1)
-	SAVE_NVGPRS(r1)
-
-	/*
-	 * Save LR on stack to ensure that the return address is available
-	 * even if it gets overwritten by the test instruction.
-	 */
-	mflr	r0
-	std	r0, _LINK(r1)
-
-	/*
-	 * Save CR on stack. For simplicity, the entire register is saved
-	 * even though only fields 2 to 4 are non-volatile.
-	 */
-	mfcr	r0
-	std	r0, _CCR(r1)
-
-	/*
-	 * Load register state for the test instruction without touching the
-	 * critical non-volatile registers. The register state is passed as a
-	 * pointer to a pt_regs instance.
-	 */
-	subi	r31, r3, GPR0
-
-	/* Load LR from pt_regs */
-	ld	r0, _LINK(r31)
-	mtlr	r0
-
-	/* Load CR from pt_regs */
-	ld	r0, _CCR(r31)
-	mtcr	r0
-
-	/* Load XER from pt_regs */
-	ld	r0, _XER(r31)
-	mtxer	r0
-
-	/* Load GPRs from pt_regs */
-	REST_GPR(0, r31)
-	REST_10GPRS(2, r31)
-	REST_GPR(12, r31)
-	REST_NVGPRS(r31)
-
-	/* Placeholder for the test instruction */
-1:	nop
-	patch_site 1b patch__exec_instr
-
-	/*
-	 * Since GPR3 is overwritten, temporarily restore it back to its
-	 * original state, i.e. the pointer to pt_regs, to ensure that the
-	 * resulting register state can be saved. Before doing this, a copy
-	 * of it is created in the scratch space which is used later on to
-	 * save it to pt_regs.
-	 */
-	std	r3, 8(r1)
-	REST_GPR(3, r1)
-
-	/* Save resulting GPR state to pt_regs */
-	subi	r3, r3, GPR0
-	SAVE_GPR(0, r3)
-	SAVE_GPR(2, r3)
-	SAVE_8GPRS(4, r3)
-	SAVE_GPR(12, r3)
-	SAVE_NVGPRS(r3)
-
-	/* Save resulting LR to pt_regs */
-	mflr	r0
-	std	r0, _LINK(r3)
-
-	/* Save resulting CR to pt_regs */
-	mfcr	r0
-	std	r0, _CCR(r3)
-
-	/* Save resulting XER to pt_regs */
-	mfxer	r0
-	std	r0, _XER(r3)
-
-	/* Restore resulting GPR3 from scratch space and save it to pt_regs */
-	ld	r0, 8(r1)
-	std	r0, GPR3(r3)
-
-	/* Set return value to denote execution success */
-	li	r3, 0
-
-	/* Continue */
-	b	3f
-
-	/* Set return value to denote execution failure */
-2:	li	r3, -EFAULT
-
-	/* Restore the non-volatile GPRs from stack */
-3:	REST_GPR(2, r1)
-	REST_NVGPRS(r1)
-
-	/* Restore LR from stack to be able to return */
-	ld	r0, _LINK(r1)
-	mtlr	r0
-
-	/* Restore CR from stack */
-	ld	r0, _CCR(r1)
-	mtcr	r0
-
-	/* Tear down stack frame */
-	addi	r1, r1, INT_FRAME_SIZE
-
-	/* Return */
-	blr
-
-	/* Setup exception table */
-	EX_TABLE(1b, 2b)
-
-_ASM_NOKPROBE_SYMBOL(exec_instr)
diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S
deleted file mode 100644
index 8bbbd9775c8a0d836763d3fe636f8b8ef1f9f906..0000000000000000000000000000000000000000
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ /dev/null
@@ -1,701 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  PowerPC version
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
- *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
- *  Adapted for Power Macintosh by Paul Mackerras.
- *  Low-level exception handlers and MMU support
- *  rewritten by Paul Mackerras.
- *    Copyright (C) 1996 Paul Mackerras.
- *
- *  This file contains low-level assembler routines for managing
- *  the PowerPC MMU hash table.  (PPC 8xx processors don't use a
- *  hash table, so this file is not used on them.)
- */
-
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/cputable.h>
-#include <asm/ppc_asm.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/export.h>
-#include <asm/feature-fixups.h>
-#include <asm/code-patching-asm.h>
-
-#ifdef CONFIG_SMP
-	.section .bss
-	.align	2
-mmu_hash_lock:
-	.space	4
-#endif /* CONFIG_SMP */
-
-/*
- * Load a PTE into the hash table, if possible.
- * The address is in r4, and r3 contains an access flag:
- * _PAGE_RW (0x400) if a write.
- * r9 contains the SRR1 value, from which we use the MSR_PR bit.
- * SPRG_THREAD contains the physical address of the current task's thread.
- *
- * Returns to the caller if the access is illegal or there is no
- * mapping for the address.  Otherwise it places an appropriate PTE
- * in the hash table and returns from the exception.
- * Uses r0, r3 - r6, r8, r10, ctr, lr.
- */
-	.text
-_GLOBAL(hash_page)
-#ifdef CONFIG_SMP
-	lis	r8, (mmu_hash_lock - PAGE_OFFSET)@h
-	ori	r8, r8, (mmu_hash_lock - PAGE_OFFSET)@l
-	lis	r0,0x0fff
-	b	10f
-11:	lwz	r6,0(r8)
-	cmpwi	0,r6,0
-	bne	11b
-10:	lwarx	r6,0,r8
-	cmpwi	0,r6,0
-	bne-	11b
-	stwcx.	r0,0,r8
-	bne-	10b
-	isync
-#endif
-	/* Get PTE (linux-style) and check access */
-	lis	r0,KERNELBASE@h		/* check if kernel address */
-	cmplw	0,r4,r0
-	ori	r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
-	mfspr	r5, SPRN_SPRG_PGDIR	/* phys page-table root */
-	blt+	112f			/* assume user more likely */
-	lis	r5, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
-	addi	r5 ,r5 ,(swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
-	rlwimi	r3,r9,32-12,29,29	/* MSR_PR -> _PAGE_USER */
-112:
-#ifndef CONFIG_PTE_64BIT
-	rlwimi	r5,r4,12,20,29		/* insert top 10 bits of address */
-	lwz	r8,0(r5)		/* get pmd entry */
-	rlwinm.	r8,r8,0,0,19		/* extract address of pte page */
-#else
-	rlwinm	r8,r4,13,19,29		/* Compute pgdir/pmd offset */
-	lwzx	r8,r8,r5		/* Get L1 entry */
-	rlwinm.	r8,r8,0,0,20		/* extract pt base address */
-#endif
-#ifdef CONFIG_SMP
-	beq-	hash_page_out		/* return if no mapping */
-#else
-	/* XXX it seems like the 601 will give a machine fault on the
-	   rfi if its alignment is wrong (bottom 4 bits of address are
-	   8 or 0xc) and we have had a not-taken conditional branch
-	   to the address following the rfi. */
-	beqlr-
-#endif
-#ifndef CONFIG_PTE_64BIT
-	rlwimi	r8,r4,22,20,29		/* insert next 10 bits of address */
-#else
-	rlwimi	r8,r4,23,20,28		/* compute pte address */
-#endif
-	rlwinm	r0,r3,32-3,24,24	/* _PAGE_RW access -> _PAGE_DIRTY */
-	ori	r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE
-
-	/*
-	 * Update the linux PTE atomically.  We do the lwarx up-front
-	 * because almost always, there won't be a permission violation
-	 * and there won't already be an HPTE, and thus we will have
-	 * to update the PTE to set _PAGE_HASHPTE.  -- paulus.
-	 *
-	 * If PTE_64BIT is set, the low word is the flags word; use that
-	 * word for locking since it contains all the interesting bits.
-	 */
-#if (PTE_FLAGS_OFFSET != 0)
-	addi	r8,r8,PTE_FLAGS_OFFSET
-#endif
-retry:
-	lwarx	r6,0,r8			/* get linux-style pte, flag word */
-	andc.	r5,r3,r6		/* check access & ~permission */
-#ifdef CONFIG_SMP
-	bne-	hash_page_out		/* return if access not permitted */
-#else
-	bnelr-
-#endif
-	or	r5,r0,r6		/* set accessed/dirty bits */
-#ifdef CONFIG_PTE_64BIT
-#ifdef CONFIG_SMP
-	subf	r10,r6,r8		/* create false data dependency */
-	subi	r10,r10,PTE_FLAGS_OFFSET
-	lwzx	r10,r6,r10		/* Get upper PTE word */
-#else
-	lwz	r10,-PTE_FLAGS_OFFSET(r8)
-#endif /* CONFIG_SMP */
-#endif /* CONFIG_PTE_64BIT */
-	stwcx.	r5,0,r8			/* attempt to update PTE */
-	bne-	retry			/* retry if someone got there first */
-
-	mfsrin	r3,r4			/* get segment reg for segment */
-	mfctr	r0
-	stw	r0,_CTR(r11)
-	bl	create_hpte		/* add the hash table entry */
-
-#ifdef CONFIG_SMP
-	eieio
-	lis	r8, (mmu_hash_lock - PAGE_OFFSET)@ha
-	li	r0,0
-	stw	r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8)
-#endif
-
-	/* Return from the exception */
-	lwz	r5,_CTR(r11)
-	mtctr	r5
-	lwz	r0,GPR0(r11)
-	lwz	r8,GPR8(r11)
-	b	fast_exception_return
-
-#ifdef CONFIG_SMP
-hash_page_out:
-	eieio
-	lis	r8, (mmu_hash_lock - PAGE_OFFSET)@ha
-	li	r0,0
-	stw	r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8)
-	blr
-#endif /* CONFIG_SMP */
-
-/*
- * Add an entry for a particular page to the hash table.
- *
- * add_hash_page(unsigned context, unsigned long va, unsigned long pmdval)
- *
- * We assume any necessary modifications to the pte (e.g. setting
- * the accessed bit) have already been done and that there is actually
- * a hash table in use (i.e. we're not on a 603).
- */
-_GLOBAL(add_hash_page)
-	mflr	r0
-	stw	r0,4(r1)
-
-	/* Convert context and va to VSID */
-	mulli	r3,r3,897*16		/* multiply context by context skew */
-	rlwinm	r0,r4,4,28,31		/* get ESID (top 4 bits of va) */
-	mulli	r0,r0,0x111		/* multiply by ESID skew */
-	add	r3,r3,r0		/* note create_hpte trims to 24 bits */
-
-#ifdef CONFIG_SMP
-	lwz	r8,TASK_CPU(r2)		/* to go in mmu_hash_lock */
-	oris	r8,r8,12
-#endif /* CONFIG_SMP */
-
-	/*
-	 * We disable interrupts here, even on UP, because we don't
-	 * want to race with hash_page, and because we want the
-	 * _PAGE_HASHPTE bit to be a reliable indication of whether
-	 * the HPTE exists (or at least whether one did once).
-	 * We also turn off the MMU for data accesses so that we
-	 * we can't take a hash table miss (assuming the code is
-	 * covered by a BAT).  -- paulus
-	 */
-	mfmsr	r9
-	SYNC
-	rlwinm	r0,r9,0,17,15		/* clear bit 16 (MSR_EE) */
-	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
-	mtmsr	r0
-	SYNC_601
-	isync
-
-#ifdef CONFIG_SMP
-	lis	r6, (mmu_hash_lock - PAGE_OFFSET)@ha
-	addi	r6, r6, (mmu_hash_lock - PAGE_OFFSET)@l
-10:	lwarx	r0,0,r6			/* take the mmu_hash_lock */
-	cmpi	0,r0,0
-	bne-	11f
-	stwcx.	r8,0,r6
-	beq+	12f
-11:	lwz	r0,0(r6)
-	cmpi	0,r0,0
-	beq	10b
-	b	11b
-12:	isync
-#endif
-
-	/*
-	 * Fetch the linux pte and test and set _PAGE_HASHPTE atomically.
-	 * If _PAGE_HASHPTE was already set, we don't replace the existing
-	 * HPTE, so we just unlock and return.
-	 */
-	mr	r8,r5
-#ifndef CONFIG_PTE_64BIT
-	rlwimi	r8,r4,22,20,29
-#else
-	rlwimi	r8,r4,23,20,28
-	addi	r8,r8,PTE_FLAGS_OFFSET
-#endif
-1:	lwarx	r6,0,r8
-	andi.	r0,r6,_PAGE_HASHPTE
-	bne	9f			/* if HASHPTE already set, done */
-#ifdef CONFIG_PTE_64BIT
-#ifdef CONFIG_SMP
-	subf	r10,r6,r8		/* create false data dependency */
-	subi	r10,r10,PTE_FLAGS_OFFSET
-	lwzx	r10,r6,r10		/* Get upper PTE word */
-#else
-	lwz	r10,-PTE_FLAGS_OFFSET(r8)
-#endif /* CONFIG_SMP */
-#endif /* CONFIG_PTE_64BIT */
-	ori	r5,r6,_PAGE_HASHPTE
-	stwcx.	r5,0,r8
-	bne-	1b
-
-	bl	create_hpte
-
-9:
-#ifdef CONFIG_SMP
-	lis	r6, (mmu_hash_lock - PAGE_OFFSET)@ha
-	addi	r6, r6, (mmu_hash_lock - PAGE_OFFSET)@l
-	eieio
-	li	r0,0
-	stw	r0,0(r6)		/* clear mmu_hash_lock */
-#endif
-
-	/* reenable interrupts and DR */
-	mtmsr	r9
-	SYNC_601
-	isync
-
-	lwz	r0,4(r1)
-	mtlr	r0
-	blr
-
-/*
- * This routine adds a hardware PTE to the hash table.
- * It is designed to be called with the MMU either on or off.
- * r3 contains the VSID, r4 contains the virtual address,
- * r5 contains the linux PTE, r6 contains the old value of the
- * linux PTE (before setting _PAGE_HASHPTE). r10 contains the
- * upper half of the PTE if CONFIG_PTE_64BIT.
- * On SMP, the caller should have the mmu_hash_lock held.
- * We assume that the caller has (or will) set the _PAGE_HASHPTE
- * bit in the linux PTE in memory.  The value passed in r6 should
- * be the old linux PTE value; if it doesn't have _PAGE_HASHPTE set
- * this routine will skip the search for an existing HPTE.
- * This procedure modifies r0, r3 - r6, r8, cr0.
- *  -- paulus.
- *
- * For speed, 4 of the instructions get patched once the size and
- * physical address of the hash table are known.  These definitions
- * of Hash_base and Hash_bits below are just an example.
- */
-Hash_base = 0xc0180000
-Hash_bits = 12				/* e.g. 256kB hash table */
-Hash_msk = (((1 << Hash_bits) - 1) * 64)
-
-/* defines for the PTE format for 32-bit PPCs */
-#define HPTE_SIZE	8
-#define PTEG_SIZE	64
-#define LG_PTEG_SIZE	6
-#define LDPTEu		lwzu
-#define LDPTE		lwz
-#define STPTE		stw
-#define CMPPTE		cmpw
-#define PTE_H		0x40
-#define PTE_V		0x80000000
-#define TST_V(r)	rlwinm. r,r,0,0,0
-#define SET_V(r)	oris r,r,PTE_V@h
-#define CLR_V(r,t)	rlwinm r,r,0,1,31
-
-#define HASH_LEFT	31-(LG_PTEG_SIZE+Hash_bits-1)
-#define HASH_RIGHT	31-LG_PTEG_SIZE
-
-_GLOBAL(create_hpte)
-	/* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */
-	rlwinm	r8,r5,32-9,30,30	/* _PAGE_RW -> PP msb */
-	rlwinm	r0,r5,32-6,30,30	/* _PAGE_DIRTY -> PP msb */
-	and	r8,r8,r0		/* writable if _RW & _DIRTY */
-	rlwimi	r5,r5,32-1,30,30	/* _PAGE_USER -> PP msb */
-	rlwimi	r5,r5,32-2,31,31	/* _PAGE_USER -> PP lsb */
-	ori	r8,r8,0xe04		/* clear out reserved bits */
-	andc	r8,r5,r8		/* PP = user? (rw&dirty? 1: 3): 0 */
-BEGIN_FTR_SECTION
-	rlwinm	r8,r8,0,~_PAGE_COHERENT	/* clear M (coherence not required) */
-END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
-#ifdef CONFIG_PTE_64BIT
-	/* Put the XPN bits into the PTE */
-	rlwimi	r8,r10,8,20,22
-	rlwimi	r8,r10,2,29,29
-#endif
-
-	/* Construct the high word of the PPC-style PTE (r5) */
-	rlwinm	r5,r3,7,1,24		/* put VSID in 0x7fffff80 bits */
-	rlwimi	r5,r4,10,26,31		/* put in API (abbrev page index) */
-	SET_V(r5)			/* set V (valid) bit */
-
-	patch_site	0f, patch__hash_page_A0
-	patch_site	1f, patch__hash_page_A1
-	patch_site	2f, patch__hash_page_A2
-	/* Get the address of the primary PTE group in the hash table (r3) */
-0:	lis	r0, (Hash_base - PAGE_OFFSET)@h	/* base address of hash table */
-1:	rlwimi	r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT    /* VSID -> hash */
-2:	rlwinm	r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
-	xor	r3,r3,r0		/* make primary hash */
-	li	r0,8			/* PTEs/group */
-
-	/*
-	 * Test the _PAGE_HASHPTE bit in the old linux PTE, and skip the search
-	 * if it is clear, meaning that the HPTE isn't there already...
-	 */
-	andi.	r6,r6,_PAGE_HASHPTE
-	beq+	10f			/* no PTE: go look for an empty slot */
-	tlbie	r4
-
-	lis	r4, (htab_hash_searches - PAGE_OFFSET)@ha
-	lwz	r6, (htab_hash_searches - PAGE_OFFSET)@l(r4)
-	addi	r6,r6,1			/* count how many searches we do */
-	stw	r6, (htab_hash_searches - PAGE_OFFSET)@l(r4)
-
-	/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
-	mtctr	r0
-	addi	r4,r3,-HPTE_SIZE
-1:	LDPTEu	r6,HPTE_SIZE(r4)	/* get next PTE */
-	CMPPTE	0,r6,r5
-	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
-	beq+	found_slot
-
-	patch_site	0f, patch__hash_page_B
-	/* Search the secondary PTEG for a matching PTE */
-	ori	r5,r5,PTE_H		/* set H (secondary hash) bit */
-0:	xoris	r4,r3,Hash_msk>>16	/* compute secondary hash */
-	xori	r4,r4,(-PTEG_SIZE & 0xffff)
-	addi	r4,r4,-HPTE_SIZE
-	mtctr	r0
-2:	LDPTEu	r6,HPTE_SIZE(r4)
-	CMPPTE	0,r6,r5
-	bdnzf	2,2b
-	beq+	found_slot
-	xori	r5,r5,PTE_H		/* clear H bit again */
-
-	/* Search the primary PTEG for an empty slot */
-10:	mtctr	r0
-	addi	r4,r3,-HPTE_SIZE	/* search primary PTEG */
-1:	LDPTEu	r6,HPTE_SIZE(r4)	/* get next PTE */
-	TST_V(r6)			/* test valid bit */
-	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
-	beq+	found_empty
-
-	/* update counter of times that the primary PTEG is full */
-	lis	r4, (primary_pteg_full - PAGE_OFFSET)@ha
-	lwz	r6, (primary_pteg_full - PAGE_OFFSET)@l(r4)
-	addi	r6,r6,1
-	stw	r6, (primary_pteg_full - PAGE_OFFSET)@l(r4)
-
-	patch_site	0f, patch__hash_page_C
-	/* Search the secondary PTEG for an empty slot */
-	ori	r5,r5,PTE_H		/* set H (secondary hash) bit */
-0:	xoris	r4,r3,Hash_msk>>16	/* compute secondary hash */
-	xori	r4,r4,(-PTEG_SIZE & 0xffff)
-	addi	r4,r4,-HPTE_SIZE
-	mtctr	r0
-2:	LDPTEu	r6,HPTE_SIZE(r4)
-	TST_V(r6)
-	bdnzf	2,2b
-	beq+	found_empty
-	xori	r5,r5,PTE_H		/* clear H bit again */
-
-	/*
-	 * Choose an arbitrary slot in the primary PTEG to overwrite.
-	 * Since both the primary and secondary PTEGs are full, and we
-	 * have no information that the PTEs in the primary PTEG are
-	 * more important or useful than those in the secondary PTEG,
-	 * and we know there is a definite (although small) speed
-	 * advantage to putting the PTE in the primary PTEG, we always
-	 * put the PTE in the primary PTEG.
-	 *
-	 * In addition, we skip any slot that is mapping kernel text in
-	 * order to avoid a deadlock when not using BAT mappings if
-	 * trying to hash in the kernel hash code itself after it has
-	 * already taken the hash table lock. This works in conjunction
-	 * with pre-faulting of the kernel text.
-	 *
-	 * If the hash table bucket is full of kernel text entries, we'll
-	 * lockup here but that shouldn't happen
-	 */
-
-1:	lis	r4, (next_slot - PAGE_OFFSET)@ha	/* get next evict slot */
-	lwz	r6, (next_slot - PAGE_OFFSET)@l(r4)
-	addi	r6,r6,HPTE_SIZE			/* search for candidate */
-	andi.	r6,r6,7*HPTE_SIZE
-	stw	r6,next_slot@l(r4)
-	add	r4,r3,r6
-	LDPTE	r0,HPTE_SIZE/2(r4)		/* get PTE second word */
-	clrrwi	r0,r0,12
-	lis	r6,etext@h
-	ori	r6,r6,etext@l			/* get etext */
-	tophys(r6,r6)
-	cmpl	cr0,r0,r6			/* compare and try again */
-	blt	1b
-
-#ifndef CONFIG_SMP
-	/* Store PTE in PTEG */
-found_empty:
-	STPTE	r5,0(r4)
-found_slot:
-	STPTE	r8,HPTE_SIZE/2(r4)
-
-#else /* CONFIG_SMP */
-/*
- * Between the tlbie above and updating the hash table entry below,
- * another CPU could read the hash table entry and put it in its TLB.
- * There are 3 cases:
- * 1. using an empty slot
- * 2. updating an earlier entry to change permissions (i.e. enable write)
- * 3. taking over the PTE for an unrelated address
- *
- * In each case it doesn't really matter if the other CPUs have the old
- * PTE in their TLB.  So we don't need to bother with another tlbie here,
- * which is convenient as we've overwritten the register that had the
- * address. :-)  The tlbie above is mainly to make sure that this CPU comes
- * and gets the new PTE from the hash table.
- *
- * We do however have to make sure that the PTE is never in an invalid
- * state with the V bit set.
- */
-found_empty:
-found_slot:
-	CLR_V(r5,r0)		/* clear V (valid) bit in PTE */
-	STPTE	r5,0(r4)
-	sync
-	TLBSYNC
-	STPTE	r8,HPTE_SIZE/2(r4) /* put in correct RPN, WIMG, PP bits */
-	sync
-	SET_V(r5)
-	STPTE	r5,0(r4)	/* finally set V bit in PTE */
-#endif /* CONFIG_SMP */
-
-	sync		/* make sure pte updates get to memory */
-	blr
-
-	.section .bss
-	.align	2
-next_slot:
-	.space	4
-primary_pteg_full:
-	.space	4
-htab_hash_searches:
-	.space	4
-	.previous
-
-/*
- * Flush the entry for a particular page from the hash table.
- *
- * flush_hash_pages(unsigned context, unsigned long va, unsigned long pmdval,
- *		    int count)
- *
- * We assume that there is a hash table in use (Hash != 0).
- */
-_GLOBAL(flush_hash_pages)
-	/*
-	 * We disable interrupts here, even on UP, because we want
-	 * the _PAGE_HASHPTE bit to be a reliable indication of
-	 * whether the HPTE exists (or at least whether one did once).
-	 * We also turn off the MMU for data accesses so that we
-	 * we can't take a hash table miss (assuming the code is
-	 * covered by a BAT).  -- paulus
-	 */
-	mfmsr	r10
-	SYNC
-	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
-	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
-	mtmsr	r0
-	SYNC_601
-	isync
-
-	/* First find a PTE in the range that has _PAGE_HASHPTE set */
-#ifndef CONFIG_PTE_64BIT
-	rlwimi	r5,r4,22,20,29
-#else
-	rlwimi	r5,r4,23,20,28
-#endif
-1:	lwz	r0,PTE_FLAGS_OFFSET(r5)
-	cmpwi	cr1,r6,1
-	andi.	r0,r0,_PAGE_HASHPTE
-	bne	2f
-	ble	cr1,19f
-	addi	r4,r4,0x1000
-	addi	r5,r5,PTE_SIZE
-	addi	r6,r6,-1
-	b	1b
-
-	/* Convert context and va to VSID */
-2:	mulli	r3,r3,897*16		/* multiply context by context skew */
-	rlwinm	r0,r4,4,28,31		/* get ESID (top 4 bits of va) */
-	mulli	r0,r0,0x111		/* multiply by ESID skew */
-	add	r3,r3,r0		/* note code below trims to 24 bits */
-
-	/* Construct the high word of the PPC-style PTE (r11) */
-	rlwinm	r11,r3,7,1,24		/* put VSID in 0x7fffff80 bits */
-	rlwimi	r11,r4,10,26,31		/* put in API (abbrev page index) */
-	SET_V(r11)			/* set V (valid) bit */
-
-#ifdef CONFIG_SMP
-	lis	r9, (mmu_hash_lock - PAGE_OFFSET)@ha
-	addi	r9, r9, (mmu_hash_lock - PAGE_OFFSET)@l
-	tophys	(r8, r2)
-	lwz	r8, TASK_CPU(r8)
-	oris	r8,r8,9
-10:	lwarx	r0,0,r9
-	cmpi	0,r0,0
-	bne-	11f
-	stwcx.	r8,0,r9
-	beq+	12f
-11:	lwz	r0,0(r9)
-	cmpi	0,r0,0
-	beq	10b
-	b	11b
-12:	isync
-#endif
-
-	/*
-	 * Check the _PAGE_HASHPTE bit in the linux PTE.  If it is
-	 * already clear, we're done (for this pte).  If not,
-	 * clear it (atomically) and proceed.  -- paulus.
-	 */
-#if (PTE_FLAGS_OFFSET != 0)
-	addi	r5,r5,PTE_FLAGS_OFFSET
-#endif
-33:	lwarx	r8,0,r5			/* fetch the pte flags word */
-	andi.	r0,r8,_PAGE_HASHPTE
-	beq	8f			/* done if HASHPTE is already clear */
-	rlwinm	r8,r8,0,31,29		/* clear HASHPTE bit */
-	stwcx.	r8,0,r5			/* update the pte */
-	bne-	33b
-
-	patch_site	0f, patch__flush_hash_A0
-	patch_site	1f, patch__flush_hash_A1
-	patch_site	2f, patch__flush_hash_A2
-	/* Get the address of the primary PTE group in the hash table (r3) */
-0:	lis	r8, (Hash_base - PAGE_OFFSET)@h	/* base address of hash table */
-1:	rlwimi	r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT    /* VSID -> hash */
-2:	rlwinm	r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
-	xor	r8,r0,r8		/* make primary hash */
-
-	/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
-	li	r0,8			/* PTEs/group */
-	mtctr	r0
-	addi	r12,r8,-HPTE_SIZE
-1:	LDPTEu	r0,HPTE_SIZE(r12)	/* get next PTE */
-	CMPPTE	0,r0,r11
-	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
-	beq+	3f
-
-	patch_site	0f, patch__flush_hash_B
-	/* Search the secondary PTEG for a matching PTE */
-	ori	r11,r11,PTE_H		/* set H (secondary hash) bit */
-	li	r0,8			/* PTEs/group */
-0:	xoris	r12,r8,Hash_msk>>16	/* compute secondary hash */
-	xori	r12,r12,(-PTEG_SIZE & 0xffff)
-	addi	r12,r12,-HPTE_SIZE
-	mtctr	r0
-2:	LDPTEu	r0,HPTE_SIZE(r12)
-	CMPPTE	0,r0,r11
-	bdnzf	2,2b
-	xori	r11,r11,PTE_H		/* clear H again */
-	bne-	4f			/* should rarely fail to find it */
-
-3:	li	r0,0
-	STPTE	r0,0(r12)		/* invalidate entry */
-4:	sync
-	tlbie	r4			/* in hw tlb too */
-	sync
-
-8:	ble	cr1,9f			/* if all ptes checked */
-81:	addi	r6,r6,-1
-	addi	r5,r5,PTE_SIZE
-	addi	r4,r4,0x1000
-	lwz	r0,0(r5)		/* check next pte */
-	cmpwi	cr1,r6,1
-	andi.	r0,r0,_PAGE_HASHPTE
-	bne	33b
-	bgt	cr1,81b
-
-9:
-#ifdef CONFIG_SMP
-	TLBSYNC
-	li	r0,0
-	stw	r0,0(r9)		/* clear mmu_hash_lock */
-#endif
-
-19:	mtmsr	r10
-	SYNC_601
-	isync
-	blr
-EXPORT_SYMBOL(flush_hash_pages)
-
-/*
- * Flush an entry from the TLB
- */
-_GLOBAL(_tlbie)
-#ifdef CONFIG_SMP
-	lwz	r8,TASK_CPU(r2)
-	oris	r8,r8,11
-	mfmsr	r10
-	SYNC
-	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
-	rlwinm	r0,r0,0,28,26		/* clear DR */
-	mtmsr	r0
-	SYNC_601
-	isync
-	lis	r9,mmu_hash_lock@h
-	ori	r9,r9,mmu_hash_lock@l
-	tophys(r9,r9)
-10:	lwarx	r7,0,r9
-	cmpwi	0,r7,0
-	bne-	10b
-	stwcx.	r8,0,r9
-	bne-	10b
-	eieio
-	tlbie	r3
-	sync
-	TLBSYNC
-	li	r0,0
-	stw	r0,0(r9)		/* clear mmu_hash_lock */
-	mtmsr	r10
-	SYNC_601
-	isync
-#else /* CONFIG_SMP */
-	tlbie	r3
-	sync
-#endif /* CONFIG_SMP */
-	blr
-
-/*
- * Flush the entire TLB. 603/603e only
- */
-_GLOBAL(_tlbia)
-#if defined(CONFIG_SMP)
-	lwz	r8,TASK_CPU(r2)
-	oris	r8,r8,10
-	mfmsr	r10
-	SYNC
-	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
-	rlwinm	r0,r0,0,28,26		/* clear DR */
-	mtmsr	r0
-	SYNC_601
-	isync
-	lis	r9,mmu_hash_lock@h
-	ori	r9,r9,mmu_hash_lock@l
-	tophys(r9,r9)
-10:	lwarx	r7,0,r9
-	cmpwi	0,r7,0
-	bne-	10b
-	stwcx.	r8,0,r9
-	bne-	10b
-	sync
-	tlbia
-	sync
-	TLBSYNC
-	li	r0,0
-	stw	r0,0(r9)		/* clear mmu_hash_lock */
-	mtmsr	r10
-	SYNC_601
-	isync
-#else /* CONFIG_SMP */
-	sync
-	tlbia
-	sync
-#endif /* CONFIG_SMP */
-	blr
diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S
deleted file mode 100644
index eaeee402f96e85155e067d6fae75b347d1a2d038..0000000000000000000000000000000000000000
--- a/arch/powerpc/mm/nohash/tlb_low.S
+++ /dev/null
@@ -1,496 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains low-level functions for performing various
- * types of TLB invalidations on various processors with no hash
- * table.
- *
- * This file implements the following functions for all no-hash
- * processors. Some aren't implemented for some variants. Some
- * are inline in tlbflush.h
- *
- *	- tlbil_va
- *	- tlbil_pid
- *	- tlbil_all
- *	- tlbivax_bcast
- *
- * Code mostly moved over from misc_32.S
- *
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Partially rewritten by Cort Dougan (cort@cs.nmt.edu)
- * Paul Mackerras, Kumar Gala and Benjamin Herrenschmidt.
- */
-
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/mmu.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/processor.h>
-#include <asm/bug.h>
-#include <asm/asm-compat.h>
-#include <asm/feature-fixups.h>
-
-#if defined(CONFIG_40x)
-
-/*
- * 40x implementation needs only tlbil_va
- */
-_GLOBAL(__tlbil_va)
-	/* We run the search with interrupts disabled because we have to change
-	 * the PID and I don't want to preempt when that happens.
-	 */
-	mfmsr	r5
-	mfspr	r6,SPRN_PID
-	wrteei	0
-	mtspr	SPRN_PID,r4
-	tlbsx.	r3, 0, r3
-	mtspr	SPRN_PID,r6
-	wrtee	r5
-	bne	1f
-	sync
-	/* There are only 64 TLB entries, so r3 < 64, which means bit 25 is
-	 * clear. Since 25 is the V bit in the TLB_TAG, loading this value
-	 * will invalidate the TLB entry. */
-	tlbwe	r3, r3, TLB_TAG
-	isync
-1:	blr
-
-#elif defined(CONFIG_PPC_8xx)
-
-/*
- * Nothing to do for 8xx, everything is inline
- */
-
-#elif defined(CONFIG_44x) /* Includes 47x */
-
-/*
- * 440 implementation uses tlbsx/we for tlbil_va and a full sweep
- * of the TLB for everything else.
- */
-_GLOBAL(__tlbil_va)
-	mfspr	r5,SPRN_MMUCR
-	mfmsr   r10
-
-	/*
-	 * We write 16 bits of STID since 47x supports that much, we
-	 * will never be passed out of bounds values on 440 (hopefully)
-	 */
-	rlwimi  r5,r4,0,16,31
-
-	/* We have to run the search with interrupts disabled, otherwise
-	 * an interrupt which causes a TLB miss can clobber the MMUCR
-	 * between the mtspr and the tlbsx.
-	 *
-	 * Critical and Machine Check interrupts take care of saving
-	 * and restoring MMUCR, so only normal interrupts have to be
-	 * taken care of.
-	 */
-	wrteei	0
-	mtspr	SPRN_MMUCR,r5
-	tlbsx.	r6,0,r3
-	bne	10f
-	sync
-BEGIN_MMU_FTR_SECTION
-	b	2f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
-	/* On 440 There are only 64 TLB entries, so r3 < 64, which means bit
-	 * 22, is clear.  Since 22 is the V bit in the TLB_PAGEID, loading this
-	 * value will invalidate the TLB entry.
-	 */
-	tlbwe	r6,r6,PPC44x_TLB_PAGEID
-	isync
-10:	wrtee	r10
-	blr
-2:
-#ifdef CONFIG_PPC_47x
-	oris	r7,r6,0x8000	/* specify way explicitly */
-	clrrwi	r4,r3,12	/* get an EPN for the hashing with V = 0 */
-	ori	r4,r4,PPC47x_TLBE_SIZE
-	tlbwe   r4,r7,0		/* write it */
-	isync
-	wrtee	r10
-	blr
-#else /* CONFIG_PPC_47x */
-1:	trap
-	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0;
-#endif /* !CONFIG_PPC_47x */
-
-_GLOBAL(_tlbil_all)
-_GLOBAL(_tlbil_pid)
-BEGIN_MMU_FTR_SECTION
-	b	2f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
-	li	r3,0
-	sync
-
-	/* Load high watermark */
-	lis	r4,tlb_44x_hwater@ha
-	lwz	r5,tlb_44x_hwater@l(r4)
-
-1:	tlbwe	r3,r3,PPC44x_TLB_PAGEID
-	addi	r3,r3,1
-	cmpw	0,r3,r5
-	ble	1b
-
-	isync
-	blr
-2:
-#ifdef CONFIG_PPC_47x
-	/* 476 variant. There's not simple way to do this, hopefully we'll
-	 * try to limit the amount of such full invalidates
-	 */
-	mfmsr	r11		/* Interrupts off */
-	wrteei	0
-	li	r3,-1		/* Current set */
-	lis	r10,tlb_47x_boltmap@h
-	ori	r10,r10,tlb_47x_boltmap@l
-	lis	r7,0x8000	/* Specify way explicitly */
-
-	b	9f		/* For each set */
-
-1:	li	r9,4		/* Number of ways */
-	li	r4,0		/* Current way */
-	li	r6,0		/* Default entry value 0 */
-	andi.	r0,r8,1		/* Check if way 0 is bolted */
-	mtctr	r9		/* Load way counter */
-	bne-	3f		/* Bolted, skip loading it */
-
-2:	/* For each way */
-	or	r5,r3,r4	/* Make way|index for tlbre */
-	rlwimi	r5,r5,16,8,15	/* Copy index into position */
-	tlbre	r6,r5,0		/* Read entry */
-3:	addis	r4,r4,0x2000	/* Next way */
-	andi.	r0,r6,PPC47x_TLB0_VALID /* Valid entry ? */
-	beq	4f		/* Nope, skip it */
-	rlwimi	r7,r5,0,1,2	/* Insert way number */
-	rlwinm	r6,r6,0,21,19	/* Clear V */
-	tlbwe   r6,r7,0		/* Write it */
-4:	bdnz	2b		/* Loop for each way */
-	srwi	r8,r8,1		/* Next boltmap bit */
-9:	cmpwi	cr1,r3,255	/* Last set done ? */
-	addi	r3,r3,1		/* Next set */
-	beq	cr1,1f		/* End of loop */
-	andi.	r0,r3,0x1f	/* Need to load a new boltmap word ? */
-	bne	1b		/* No, loop */
-	lwz	r8,0(r10)	/* Load boltmap entry */
-	addi	r10,r10,4	/* Next word */
-	b	1b		/* Then loop */
-1:	isync			/* Sync shadows */
-	wrtee	r11
-#else /* CONFIG_PPC_47x */
-1:	trap
-	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0;
-#endif /* !CONFIG_PPC_47x */
-	blr
-
-#ifdef CONFIG_PPC_47x
-
-/*
- * _tlbivax_bcast is only on 47x. We don't bother doing a runtime
- * check though, it will blow up soon enough if we mistakenly try
- * to use it on a 440.
- */
-_GLOBAL(_tlbivax_bcast)
-	mfspr	r5,SPRN_MMUCR
-	mfmsr	r10
-	rlwimi	r5,r4,0,16,31
-	wrteei	0
-	mtspr	SPRN_MMUCR,r5
-	isync
-	PPC_TLBIVAX(0, R3)
-	isync
-	eieio
-	tlbsync
-BEGIN_FTR_SECTION
-	b	1f
-END_FTR_SECTION_IFSET(CPU_FTR_476_DD2)
-	sync
-	wrtee	r10
-	blr
-/*
- * DD2 HW could hang if in instruction fetch happens before msync completes.
- * Touch enough instruction cache lines to ensure cache hits
- */
-1:	mflr	r9
-	bl	2f
-2:	mflr	r6
-	li	r7,32
-	PPC_ICBT(0,R6,R7)		/* touch next cache line */
-	add	r6,r6,r7
-	PPC_ICBT(0,R6,R7)		/* touch next cache line */
-	add	r6,r6,r7
-	PPC_ICBT(0,R6,R7)		/* touch next cache line */
-	sync
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	mtlr	r9
-	wrtee	r10
-	blr
-#endif /* CONFIG_PPC_47x */
-
-#elif defined(CONFIG_FSL_BOOKE)
-/*
- * FSL BookE implementations.
- *
- * Since feature sections are using _SECTION_ELSE we need
- * to have the larger code path before the _SECTION_ELSE
- */
-
-/*
- * Flush MMU TLB on the local processor
- */
-_GLOBAL(_tlbil_all)
-BEGIN_MMU_FTR_SECTION
-	li	r3,(MMUCSR0_TLBFI)@l
-	mtspr	SPRN_MMUCSR0, r3
-1:
-	mfspr	r3,SPRN_MMUCSR0
-	andi.	r3,r3,MMUCSR0_TLBFI@l
-	bne	1b
-MMU_FTR_SECTION_ELSE
-	PPC_TLBILX_ALL(0,R0)
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
-	msync
-	isync
-	blr
-
-_GLOBAL(_tlbil_pid)
-BEGIN_MMU_FTR_SECTION
-	slwi	r3,r3,16
-	mfmsr	r10
-	wrteei	0
-	mfspr	r4,SPRN_MAS6	/* save MAS6 */
-	mtspr	SPRN_MAS6,r3
-	PPC_TLBILX_PID(0,R0)
-	mtspr	SPRN_MAS6,r4	/* restore MAS6 */
-	wrtee	r10
-MMU_FTR_SECTION_ELSE
-	li	r3,(MMUCSR0_TLBFI)@l
-	mtspr	SPRN_MMUCSR0, r3
-1:
-	mfspr	r3,SPRN_MMUCSR0
-	andi.	r3,r3,MMUCSR0_TLBFI@l
-	bne	1b
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBILX)
-	msync
-	isync
-	blr
-
-/*
- * Flush MMU TLB for a particular address, but only on the local processor
- * (no broadcast)
- */
-_GLOBAL(__tlbil_va)
-	mfmsr	r10
-	wrteei	0
-	slwi	r4,r4,16
-	ori	r4,r4,(MAS6_ISIZE(BOOK3E_PAGESZ_4K))@l
-	mtspr	SPRN_MAS6,r4		/* assume AS=0 for now */
-BEGIN_MMU_FTR_SECTION
-	tlbsx	0,r3
-	mfspr	r4,SPRN_MAS1		/* check valid */
-	andis.	r3,r4,MAS1_VALID@h
-	beq	1f
-	rlwinm	r4,r4,0,1,31
-	mtspr	SPRN_MAS1,r4
-	tlbwe
-MMU_FTR_SECTION_ELSE
-	PPC_TLBILX_VA(0,R3)
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
-	msync
-	isync
-1:	wrtee	r10
-	blr
-#elif defined(CONFIG_PPC_BOOK3E)
-/*
- * New Book3E (>= 2.06) implementation
- *
- * Note: We may be able to get away without the interrupt masking stuff
- * if we save/restore MAS6 on exceptions that might modify it
- */
-_GLOBAL(_tlbil_pid)
-	slwi	r4,r3,MAS6_SPID_SHIFT
-	mfmsr	r10
-	wrteei	0
-	mtspr	SPRN_MAS6,r4
-	PPC_TLBILX_PID(0,R0)
-	wrtee	r10
-	msync
-	isync
-	blr
-
-_GLOBAL(_tlbil_pid_noind)
-	slwi	r4,r3,MAS6_SPID_SHIFT
-	mfmsr	r10
-	ori	r4,r4,MAS6_SIND
-	wrteei	0
-	mtspr	SPRN_MAS6,r4
-	PPC_TLBILX_PID(0,R0)
-	wrtee	r10
-	msync
-	isync
-	blr
-
-_GLOBAL(_tlbil_all)
-	PPC_TLBILX_ALL(0,R0)
-	msync
-	isync
-	blr
-
-_GLOBAL(_tlbil_va)
-	mfmsr	r10
-	wrteei	0
-	cmpwi	cr0,r6,0
-	slwi	r4,r4,MAS6_SPID_SHIFT
-	rlwimi	r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK
-	beq	1f
-	rlwimi	r4,r6,MAS6_SIND_SHIFT,MAS6_SIND
-1:	mtspr	SPRN_MAS6,r4		/* assume AS=0 for now */
-	PPC_TLBILX_VA(0,R3)
-	msync
-	isync
-	wrtee	r10
-	blr
-
-_GLOBAL(_tlbivax_bcast)
-	mfmsr	r10
-	wrteei	0
-	cmpwi	cr0,r6,0
-	slwi	r4,r4,MAS6_SPID_SHIFT
-	rlwimi	r4,r5,MAS6_ISIZE_SHIFT,MAS6_ISIZE_MASK
-	beq	1f
-	rlwimi	r4,r6,MAS6_SIND_SHIFT,MAS6_SIND
-1:	mtspr	SPRN_MAS6,r4		/* assume AS=0 for now */
-	PPC_TLBIVAX(0,R3)
-	eieio
-	tlbsync
-	sync
-	wrtee	r10
-	blr
-
-_GLOBAL(set_context)
-#ifdef CONFIG_BDI_SWITCH
-	/* Context switch the PTE pointer for the Abatron BDI2000.
-	 * The PGDIR is the second parameter.
-	 */
-	lis	r5, abatron_pteptrs@h
-	ori	r5, r5, abatron_pteptrs@l
-	stw	r4, 0x4(r5)
-#endif
-	mtspr	SPRN_PID,r3
-	isync			/* Force context change */
-	blr
-#else
-#error Unsupported processor type !
-#endif
-
-#if defined(CONFIG_PPC_FSL_BOOK3E)
-/*
- * extern void loadcam_entry(unsigned int index)
- *
- * Load TLBCAM[index] entry in to the L2 CAM MMU
- * Must preserve r7, r8, r9, r10 and r11
- */
-_GLOBAL(loadcam_entry)
-	mflr	r5
-	LOAD_REG_ADDR_PIC(r4, TLBCAM)
-	mtlr	r5
-	mulli	r5,r3,TLBCAM_SIZE
-	add	r3,r5,r4
-	lwz	r4,TLBCAM_MAS0(r3)
-	mtspr	SPRN_MAS0,r4
-	lwz	r4,TLBCAM_MAS1(r3)
-	mtspr	SPRN_MAS1,r4
-	PPC_LL	r4,TLBCAM_MAS2(r3)
-	mtspr	SPRN_MAS2,r4
-	lwz	r4,TLBCAM_MAS3(r3)
-	mtspr	SPRN_MAS3,r4
-BEGIN_MMU_FTR_SECTION
-	lwz	r4,TLBCAM_MAS7(r3)
-	mtspr	SPRN_MAS7,r4
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
-	isync
-	tlbwe
-	isync
-	blr
-
-/*
- * Load multiple TLB entries at once, using an alternate-space
- * trampoline so that we don't have to care about whether the same
- * TLB entry maps us before and after.
- *
- * r3 = first entry to write
- * r4 = number of entries to write
- * r5 = temporary tlb entry
- */
-_GLOBAL(loadcam_multi)
-	mflr	r8
-	/* Don't switch to AS=1 if already there */
-	mfmsr	r11
-	andi.	r11,r11,MSR_IS
-	bne	10f
-
-	/*
-	 * Set up temporary TLB entry that is the same as what we're
-	 * running from, but in AS=1.
-	 */
-	bl	1f
-1:	mflr	r6
-	tlbsx	0,r8
-	mfspr	r6,SPRN_MAS1
-	ori	r6,r6,MAS1_TS
-	mtspr	SPRN_MAS1,r6
-	mfspr	r6,SPRN_MAS0
-	rlwimi	r6,r5,MAS0_ESEL_SHIFT,MAS0_ESEL_MASK
-	mr	r7,r5
-	mtspr	SPRN_MAS0,r6
-	isync
-	tlbwe
-	isync
-
-	/* Switch to AS=1 */
-	mfmsr	r6
-	ori	r6,r6,MSR_IS|MSR_DS
-	mtmsr	r6
-	isync
-
-10:
-	mr	r9,r3
-	add	r10,r3,r4
-2:	bl	loadcam_entry
-	addi	r9,r9,1
-	cmpw	r9,r10
-	mr	r3,r9
-	blt	2b
-
-	/* Don't return to AS=0 if we were in AS=1 at function start */
-	andi.	r11,r11,MSR_IS
-	bne	3f
-
-	/* Return to AS=0 and clear the temporary entry */
-	mfmsr	r6
-	rlwinm.	r6,r6,0,~(MSR_IS|MSR_DS)
-	mtmsr	r6
-	isync
-
-	li	r6,0
-	mtspr	SPRN_MAS1,r6
-	rlwinm	r6,r7,MAS0_ESEL_SHIFT,MAS0_ESEL_MASK
-	oris	r6,r6,MAS0_TLBSEL(1)@h
-	mtspr	SPRN_MAS0,r6
-	isync
-	tlbwe
-	isync
-
-3:
-	mtlr	r8
-	blr
-#endif
diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S
deleted file mode 100644
index 1f110c3c48fbe0f267c42148876e8e2b34e7aa7a..0000000000000000000000000000000000000000
--- a/arch/powerpc/mm/nohash/tlb_low_64e.S
+++ /dev/null
@@ -1,1245 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *  Low level TLB miss handlers for Book3E
- *
- *  Copyright (C) 2008-2009
- *      Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
- */
-
-#include <asm/processor.h>
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cputable.h>
-#include <asm/pgtable.h>
-#include <asm/exception-64e.h>
-#include <asm/ppc-opcode.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_booke_hv_asm.h>
-#include <asm/feature-fixups.h>
-
-#define VPTE_PMD_SHIFT	(PTE_INDEX_SIZE)
-#define VPTE_PUD_SHIFT	(VPTE_PMD_SHIFT + PMD_INDEX_SIZE)
-#define VPTE_PGD_SHIFT	(VPTE_PUD_SHIFT + PUD_INDEX_SIZE)
-#define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE)
-
-/**********************************************************************
- *                                                                    *
- * TLB miss handling for Book3E with a bolted linear mapping          *
- * No virtual page table, no nested TLB misses                        *
- *                                                                    *
- **********************************************************************/
-
-/*
- * Note that, unlike non-bolted handlers, TLB_EXFRAME is not
- * modified by the TLB miss handlers themselves, since the TLB miss
- * handler code will not itself cause a recursive TLB miss.
- *
- * TLB_EXFRAME will be modified when crit/mc/debug exceptions are
- * entered/exited.
- */
-.macro tlb_prolog_bolted intnum addr
-	mtspr	SPRN_SPRG_GEN_SCRATCH,r12
-	mfspr	r12,SPRN_SPRG_TLB_EXFRAME
-	std	r13,EX_TLB_R13(r12)
-	std	r10,EX_TLB_R10(r12)
-	mfspr	r13,SPRN_SPRG_PACA
-
-	mfcr	r10
-	std	r11,EX_TLB_R11(r12)
-#ifdef CONFIG_KVM_BOOKE_HV
-BEGIN_FTR_SECTION
-	mfspr	r11, SPRN_SRR1
-END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
-#endif
-	DO_KVM	\intnum, SPRN_SRR1
-	std	r16,EX_TLB_R16(r12)
-	mfspr	r16,\addr		/* get faulting address */
-	std	r14,EX_TLB_R14(r12)
-	ld	r14,PACAPGD(r13)
-	std	r15,EX_TLB_R15(r12)
-	std	r10,EX_TLB_CR(r12)
-#ifdef CONFIG_PPC_FSL_BOOK3E
-START_BTB_FLUSH_SECTION
-	mfspr r11, SPRN_SRR1
-	andi. r10,r11,MSR_PR
-	beq 1f
-	BTB_FLUSH(r10)
-1:
-END_BTB_FLUSH_SECTION
-	std	r7,EX_TLB_R7(r12)
-#endif
-	TLB_MISS_PROLOG_STATS
-.endm
-
-.macro tlb_epilog_bolted
-	ld	r14,EX_TLB_CR(r12)
-#ifdef CONFIG_PPC_FSL_BOOK3E
-	ld	r7,EX_TLB_R7(r12)
-#endif
-	ld	r10,EX_TLB_R10(r12)
-	ld	r11,EX_TLB_R11(r12)
-	ld	r13,EX_TLB_R13(r12)
-	mtcr	r14
-	ld	r14,EX_TLB_R14(r12)
-	ld	r15,EX_TLB_R15(r12)
-	TLB_MISS_RESTORE_STATS
-	ld	r16,EX_TLB_R16(r12)
-	mfspr	r12,SPRN_SPRG_GEN_SCRATCH
-.endm
-
-/* Data TLB miss */
-	START_EXCEPTION(data_tlb_miss_bolted)
-	tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR
-
-	/* We need _PAGE_PRESENT and  _PAGE_ACCESSED set */
-
-	/* We do the user/kernel test for the PID here along with the RW test
-	 */
-	/* We pre-test some combination of permissions to avoid double
-	 * faults:
-	 *
-	 * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE
-	 * ESR_ST   is 0x00800000
-	 * _PAGE_BAP_SW is 0x00000010
-	 * So the shift is >> 19. This tests for supervisor writeability.
-	 * If the page happens to be supervisor writeable and not user
-	 * writeable, we will take a new fault later, but that should be
-	 * a rare enough case.
-	 *
-	 * We also move ESR_ST in _PAGE_DIRTY position
-	 * _PAGE_DIRTY is 0x00001000 so the shift is >> 11
-	 *
-	 * MAS1 is preset for all we need except for TID that needs to
-	 * be cleared for kernel translations
-	 */
-
-	mfspr	r11,SPRN_ESR
-
-	srdi	r15,r16,60		/* get region */
-	rldicl.	r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
-	bne-	dtlb_miss_fault_bolted	/* Bail if fault addr is invalid */
-
-	rlwinm	r10,r11,32-19,27,27
-	rlwimi	r10,r11,32-16,19,19
-	cmpwi	r15,0			/* user vs kernel check */
-	ori	r10,r10,_PAGE_PRESENT
-	oris	r11,r10,_PAGE_ACCESSED@h
-
-	TLB_MISS_STATS_SAVE_INFO_BOLTED
-	bne	tlb_miss_kernel_bolted
-
-tlb_miss_common_bolted:
-/*
- * This is the guts of the TLB miss handler for bolted-linear.
- * We are entered with:
- *
- * r16 = faulting address
- * r15 = crap (free to use)
- * r14 = page table base
- * r13 = PACA
- * r11 = PTE permission mask
- * r10 = crap (free to use)
- */
-	rldicl	r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3
-	cmpldi	cr0,r14,0
-	clrrdi	r15,r15,3
-	beq	tlb_miss_fault_bolted	/* No PGDIR, bail */
-
-BEGIN_MMU_FTR_SECTION
-	/* Set the TLB reservation and search for existing entry. Then load
-	 * the entry.
-	 */
-	PPC_TLBSRX_DOT(0,R16)
-	ldx	r14,r14,r15		/* grab pgd entry */
-	beq	tlb_miss_done_bolted	/* tlb exists already, bail */
-MMU_FTR_SECTION_ELSE
-	ldx	r14,r14,r15		/* grab pgd entry */
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
-
-	rldicl	r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
-	clrrdi	r15,r15,3
-	cmpdi	cr0,r14,0
-	bge	tlb_miss_fault_bolted	/* Bad pgd entry or hugepage; bail */
-	ldx	r14,r14,r15		/* grab pud entry */
-
-	rldicl	r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
-	clrrdi	r15,r15,3
-	cmpdi	cr0,r14,0
-	bge	tlb_miss_fault_bolted
-	ldx	r14,r14,r15		/* Grab pmd entry */
-
-	rldicl	r15,r16,64-PAGE_SHIFT+3,64-PTE_INDEX_SIZE-3
-	clrrdi	r15,r15,3
-	cmpdi	cr0,r14,0
-	bge	tlb_miss_fault_bolted
-	ldx	r14,r14,r15		/* Grab PTE, normal (!huge) page */
-
-	/* Check if required permissions are met */
-	andc.	r15,r11,r14
-	rldicr	r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
-	bne-	tlb_miss_fault_bolted
-
-	/* Now we build the MAS:
-	 *
-	 * MAS 0   :	Fully setup with defaults in MAS4 and TLBnCFG
-	 * MAS 1   :	Almost fully setup
-	 *               - PID already updated by caller if necessary
-	 *               - TSIZE need change if !base page size, not
-	 *                 yet implemented for now
-	 * MAS 2   :	Defaults not useful, need to be redone
-	 * MAS 3+7 :	Needs to be done
-	 */
-	clrrdi	r11,r16,12		/* Clear low crap in EA */
-	clrldi	r15,r15,12		/* Clear crap at the top */
-	rlwimi	r11,r14,32-19,27,31	/* Insert WIMGE */
-	rlwimi	r15,r14,32-8,22,25	/* Move in U bits */
-	mtspr	SPRN_MAS2,r11
-	andi.	r11,r14,_PAGE_DIRTY
-	rlwimi	r15,r14,32-2,26,31	/* Move in BAP bits */
-
-	/* Mask out SW and UW if !DIRTY (XXX optimize this !) */
-	bne	1f
-	li	r11,MAS3_SW|MAS3_UW
-	andc	r15,r15,r11
-1:
-	mtspr	SPRN_MAS7_MAS3,r15
-	tlbwe
-
-tlb_miss_done_bolted:
-	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
-	tlb_epilog_bolted
-	rfi
-
-itlb_miss_kernel_bolted:
-	li	r11,_PAGE_PRESENT|_PAGE_BAP_SX	/* Base perm */
-	oris	r11,r11,_PAGE_ACCESSED@h
-tlb_miss_kernel_bolted:
-	mfspr	r10,SPRN_MAS1
-	ld	r14,PACA_KERNELPGD(r13)
-	cmpldi	cr0,r15,8		/* Check for vmalloc region */
-	rlwinm	r10,r10,0,16,1		/* Clear TID */
-	mtspr	SPRN_MAS1,r10
-	beq+	tlb_miss_common_bolted
-
-tlb_miss_fault_bolted:
-	/* We need to check if it was an instruction miss */
-	andi.	r10,r11,_PAGE_EXEC|_PAGE_BAP_SX
-	bne	itlb_miss_fault_bolted
-dtlb_miss_fault_bolted:
-	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
-	tlb_epilog_bolted
-	b	exc_data_storage_book3e
-itlb_miss_fault_bolted:
-	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
-	tlb_epilog_bolted
-	b	exc_instruction_storage_book3e
-
-/* Instruction TLB miss */
-	START_EXCEPTION(instruction_tlb_miss_bolted)
-	tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0
-
-	rldicl.	r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
-	srdi	r15,r16,60		/* get region */
-	TLB_MISS_STATS_SAVE_INFO_BOLTED
-	bne-	itlb_miss_fault_bolted
-
-	li	r11,_PAGE_PRESENT|_PAGE_EXEC	/* Base perm */
-
-	/* We do the user/kernel test for the PID here along with the RW test
-	 */
-
-	cmpldi	cr0,r15,0			/* Check for user region */
-	oris	r11,r11,_PAGE_ACCESSED@h
-	beq	tlb_miss_common_bolted
-	b	itlb_miss_kernel_bolted
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
-/*
- * TLB miss handling for e6500 and derivatives, using hardware tablewalk.
- *
- * Linear mapping is bolted: no virtual page table or nested TLB misses
- * Indirect entries in TLB1, hardware loads resulting direct entries
- *    into TLB0
- * No HES or NV hint on TLB1, so we need to do software round-robin
- * No tlbsrx. so we need a spinlock, and we have to deal
- *    with MAS-damage caused by tlbsx
- * 4K pages only
- */
-
-	START_EXCEPTION(instruction_tlb_miss_e6500)
-	tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0
-
-	ld	r11,PACA_TCD_PTR(r13)
-	srdi.	r15,r16,60		/* get region */
-	ori	r16,r16,1
-
-	TLB_MISS_STATS_SAVE_INFO_BOLTED
-	bne	tlb_miss_kernel_e6500	/* user/kernel test */
-
-	b	tlb_miss_common_e6500
-
-	START_EXCEPTION(data_tlb_miss_e6500)
-	tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR
-
-	ld	r11,PACA_TCD_PTR(r13)
-	srdi.	r15,r16,60		/* get region */
-	rldicr	r16,r16,0,62
-
-	TLB_MISS_STATS_SAVE_INFO_BOLTED
-	bne	tlb_miss_kernel_e6500	/* user vs kernel check */
-
-/*
- * This is the guts of the TLB miss handler for e6500 and derivatives.
- * We are entered with:
- *
- * r16 = page of faulting address (low bit 0 if data, 1 if instruction)
- * r15 = crap (free to use)
- * r14 = page table base
- * r13 = PACA
- * r11 = tlb_per_core ptr
- * r10 = crap (free to use)
- * r7  = esel_next
- */
-tlb_miss_common_e6500:
-	crmove	cr2*4+2,cr0*4+2		/* cr2.eq != 0 if kernel address */
-
-BEGIN_FTR_SECTION		/* CPU_FTR_SMT */
-	/*
-	 * Search if we already have an indirect entry for that virtual
-	 * address, and if we do, bail out.
-	 *
-	 * MAS6:IND should be already set based on MAS4
-	 */
-	lhz	r10,PACAPACAINDEX(r13)
-	addi	r10,r10,1
-	crclr	cr1*4+eq	/* set cr1.eq = 0 for non-recursive */
-1:	lbarx	r15,0,r11
-	cmpdi	r15,0
-	bne	2f
-	stbcx.	r10,0,r11
-	bne	1b
-3:
-	.subsection 1
-2:	cmpd	cr1,r15,r10	/* recursive lock due to mcheck/crit/etc? */
-	beq	cr1,3b		/* unlock will happen if cr1.eq = 0 */
-10:	lbz	r15,0(r11)
-	cmpdi	r15,0
-	bne	10b
-	b	1b
-	.previous
-END_FTR_SECTION_IFSET(CPU_FTR_SMT)
-
-	lbz	r7,TCD_ESEL_NEXT(r11)
-
-BEGIN_FTR_SECTION		/* CPU_FTR_SMT */
-	/*
-	 * Erratum A-008139 says that we can't use tlbwe to change
-	 * an indirect entry in any way (including replacing or
-	 * invalidating) if the other thread could be in the process
-	 * of a lookup.  The workaround is to invalidate the entry
-	 * with tlbilx before overwriting.
-	 */
-
-	rlwinm	r10,r7,16,0xff0000
-	oris	r10,r10,MAS0_TLBSEL(1)@h
-	mtspr	SPRN_MAS0,r10
-	isync
-	tlbre
-	mfspr	r15,SPRN_MAS1
-	andis.	r15,r15,MAS1_VALID@h
-	beq	5f
-
-BEGIN_FTR_SECTION_NESTED(532)
-	mfspr	r10,SPRN_MAS8
-	rlwinm	r10,r10,0,0x80000fff  /* tgs,tlpid -> sgs,slpid */
-	mtspr	SPRN_MAS5,r10
-END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532)
-
-	mfspr	r10,SPRN_MAS1
-	rlwinm	r15,r10,0,0x3fff0000  /* tid -> spid */
-	rlwimi	r15,r10,20,0x00000003 /* ind,ts -> sind,sas */
-	mfspr	r10,SPRN_MAS6
-	mtspr	SPRN_MAS6,r15
-
-	mfspr	r15,SPRN_MAS2
-	isync
-	tlbilxva 0,r15
-	isync
-
-	mtspr	SPRN_MAS6,r10
-
-5:
-BEGIN_FTR_SECTION_NESTED(532)
-	li	r10,0
-	mtspr	SPRN_MAS8,r10
-	mtspr	SPRN_MAS5,r10
-END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532)
-
-	tlbsx	0,r16
-	mfspr	r10,SPRN_MAS1
-	andis.	r15,r10,MAS1_VALID@h
-	bne	tlb_miss_done_e6500
-FTR_SECTION_ELSE
-	mfspr	r10,SPRN_MAS1
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
-
-	oris	r10,r10,MAS1_VALID@h
-	beq	cr2,4f
-	rlwinm	r10,r10,0,16,1		/* Clear TID */
-4:	mtspr	SPRN_MAS1,r10
-
-	/* Now, we need to walk the page tables. First check if we are in
-	 * range.
-	 */
-	rldicl.	r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
-	bne-	tlb_miss_fault_e6500
-
-	rldicl	r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3
-	cmpldi	cr0,r14,0
-	clrrdi	r15,r15,3
-	beq-	tlb_miss_fault_e6500 /* No PGDIR, bail */
-	ldx	r14,r14,r15		/* grab pgd entry */
-
-	rldicl	r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
-	clrrdi	r15,r15,3
-	cmpdi	cr0,r14,0
-	bge	tlb_miss_huge_e6500	/* Bad pgd entry or hugepage; bail */
-	ldx	r14,r14,r15		/* grab pud entry */
-
-	rldicl	r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
-	clrrdi	r15,r15,3
-	cmpdi	cr0,r14,0
-	bge	tlb_miss_huge_e6500
-	ldx	r14,r14,r15		/* Grab pmd entry */
-
-	mfspr	r10,SPRN_MAS0
-	cmpdi	cr0,r14,0
-	bge	tlb_miss_huge_e6500
-
-	/* Now we build the MAS for a 2M indirect page:
-	 *
-	 * MAS 0   :	ESEL needs to be filled by software round-robin
-	 * MAS 1   :	Fully set up
-	 *               - PID already updated by caller if necessary
-	 *               - TSIZE for now is base ind page size always
-	 *               - TID already cleared if necessary
-	 * MAS 2   :	Default not 2M-aligned, need to be redone
-	 * MAS 3+7 :	Needs to be done
-	 */
-
-	ori	r14,r14,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
-	mtspr	SPRN_MAS7_MAS3,r14
-
-	clrrdi	r15,r16,21		/* make EA 2M-aligned */
-	mtspr	SPRN_MAS2,r15
-
-tlb_miss_huge_done_e6500:
-	lbz	r16,TCD_ESEL_MAX(r11)
-	lbz	r14,TCD_ESEL_FIRST(r11)
-	rlwimi	r10,r7,16,0x00ff0000	/* insert esel_next into MAS0 */
-	addi	r7,r7,1			/* increment esel_next */
-	mtspr	SPRN_MAS0,r10
-	cmpw	r7,r16
-	iseleq	r7,r14,r7		/* if next == last use first */
-	stb	r7,TCD_ESEL_NEXT(r11)
-
-	tlbwe
-
-tlb_miss_done_e6500:
-	.macro	tlb_unlock_e6500
-BEGIN_FTR_SECTION
-	beq	cr1,1f		/* no unlock if lock was recursively grabbed */
-	li	r15,0
-	isync
-	stb	r15,0(r11)
-1:
-END_FTR_SECTION_IFSET(CPU_FTR_SMT)
-	.endm
-
-	tlb_unlock_e6500
-	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
-	tlb_epilog_bolted
-	rfi
-
-tlb_miss_huge_e6500:
-	beq	tlb_miss_fault_e6500
-	li	r10,1
-	andi.	r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */
-	rldimi	r14,r10,63,0		/* Set PD_HUGE */
-	xor	r14,r14,r15		/* Clear size bits */
-	ldx	r14,0,r14
-
-	/*
-	 * Now we build the MAS for a huge page.
-	 *
-	 * MAS 0   :	ESEL needs to be filled by software round-robin
-	 *		 - can be handled by indirect code
-	 * MAS 1   :	Need to clear IND and set TSIZE
-	 * MAS 2,3+7:	Needs to be redone similar to non-tablewalk handler
-	 */
-
-	subi	r15,r15,10		/* Convert psize to tsize */
-	mfspr	r10,SPRN_MAS1
-	rlwinm	r10,r10,0,~MAS1_IND
-	rlwimi	r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK
-	mtspr	SPRN_MAS1,r10
-
-	li	r10,-0x400
-	sld	r15,r10,r15		/* Generate mask based on size */
-	and	r10,r16,r15
-	rldicr	r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
-	rlwimi	r10,r14,32-19,27,31	/* Insert WIMGE */
-	clrldi	r15,r15,PAGE_SHIFT	/* Clear crap at the top */
-	rlwimi	r15,r14,32-8,22,25	/* Move in U bits */
-	mtspr	SPRN_MAS2,r10
-	andi.	r10,r14,_PAGE_DIRTY
-	rlwimi	r15,r14,32-2,26,31	/* Move in BAP bits */
-
-	/* Mask out SW and UW if !DIRTY (XXX optimize this !) */
-	bne	1f
-	li	r10,MAS3_SW|MAS3_UW
-	andc	r15,r15,r10
-1:
-	mtspr	SPRN_MAS7_MAS3,r15
-
-	mfspr	r10,SPRN_MAS0
-	b	tlb_miss_huge_done_e6500
-
-tlb_miss_kernel_e6500:
-	ld	r14,PACA_KERNELPGD(r13)
-	cmpldi	cr1,r15,8		/* Check for vmalloc region */
-	beq+	cr1,tlb_miss_common_e6500
-
-tlb_miss_fault_e6500:
-	tlb_unlock_e6500
-	/* We need to check if it was an instruction miss */
-	andi.	r16,r16,1
-	bne	itlb_miss_fault_e6500
-dtlb_miss_fault_e6500:
-	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
-	tlb_epilog_bolted
-	b	exc_data_storage_book3e
-itlb_miss_fault_e6500:
-	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
-	tlb_epilog_bolted
-	b	exc_instruction_storage_book3e
-#endif /* CONFIG_PPC_FSL_BOOK3E */
-
-/**********************************************************************
- *                                                                    *
- * TLB miss handling for Book3E with TLB reservation and HES support  *
- *                                                                    *
- **********************************************************************/
-
-
-/* Data TLB miss */
-	START_EXCEPTION(data_tlb_miss)
-	TLB_MISS_PROLOG
-
-	/* Now we handle the fault proper. We only save DEAR in normal
-	 * fault case since that's the only interesting values here.
-	 * We could probably also optimize by not saving SRR0/1 in the
-	 * linear mapping case but I'll leave that for later
-	 */
-	mfspr	r14,SPRN_ESR
-	mfspr	r16,SPRN_DEAR		/* get faulting address */
-	srdi	r15,r16,60		/* get region */
-	cmpldi	cr0,r15,0xc		/* linear mapping ? */
-	TLB_MISS_STATS_SAVE_INFO
-	beq	tlb_load_linear		/* yes -> go to linear map load */
-
-	/* The page tables are mapped virtually linear. At this point, though,
-	 * we don't know whether we are trying to fault in a first level
-	 * virtual address or a virtual page table address. We can get that
-	 * from bit 0x1 of the region ID which we have set for a page table
-	 */
-	andi.	r10,r15,0x1
-	bne-	virt_page_table_tlb_miss
-
-	std	r14,EX_TLB_ESR(r12);	/* save ESR */
-	std	r16,EX_TLB_DEAR(r12);	/* save DEAR */
-
-	 /* We need _PAGE_PRESENT and  _PAGE_ACCESSED set */
-	li	r11,_PAGE_PRESENT
-	oris	r11,r11,_PAGE_ACCESSED@h
-
-	/* We do the user/kernel test for the PID here along with the RW test
-	 */
-	cmpldi	cr0,r15,0		/* Check for user region */
-
-	/* We pre-test some combination of permissions to avoid double
-	 * faults:
-	 *
-	 * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE
-	 * ESR_ST   is 0x00800000
-	 * _PAGE_BAP_SW is 0x00000010
-	 * So the shift is >> 19. This tests for supervisor writeability.
-	 * If the page happens to be supervisor writeable and not user
-	 * writeable, we will take a new fault later, but that should be
-	 * a rare enough case.
-	 *
-	 * We also move ESR_ST in _PAGE_DIRTY position
-	 * _PAGE_DIRTY is 0x00001000 so the shift is >> 11
-	 *
-	 * MAS1 is preset for all we need except for TID that needs to
-	 * be cleared for kernel translations
-	 */
-	rlwimi	r11,r14,32-19,27,27
-	rlwimi	r11,r14,32-16,19,19
-	beq	normal_tlb_miss
-	/* XXX replace the RMW cycles with immediate loads + writes */
-1:	mfspr	r10,SPRN_MAS1
-	cmpldi	cr0,r15,8		/* Check for vmalloc region */
-	rlwinm	r10,r10,0,16,1		/* Clear TID */
-	mtspr	SPRN_MAS1,r10
-	beq+	normal_tlb_miss
-
-	/* We got a crappy address, just fault with whatever DEAR and ESR
-	 * are here
-	 */
-	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
-	TLB_MISS_EPILOG_ERROR
-	b	exc_data_storage_book3e
-
-/* Instruction TLB miss */
-	START_EXCEPTION(instruction_tlb_miss)
-	TLB_MISS_PROLOG
-
-	/* If we take a recursive fault, the second level handler may need
-	 * to know whether we are handling a data or instruction fault in
-	 * order to get to the right store fault handler. We provide that
-	 * info by writing a crazy value in ESR in our exception frame
-	 */
-	li	r14,-1	/* store to exception frame is done later */
-
-	/* Now we handle the fault proper. We only save DEAR in the non
-	 * linear mapping case since we know the linear mapping case will
-	 * not re-enter. We could indeed optimize and also not save SRR0/1
-	 * in the linear mapping case but I'll leave that for later
-	 *
-	 * Faulting address is SRR0 which is already in r16
-	 */
-	srdi	r15,r16,60		/* get region */
-	cmpldi	cr0,r15,0xc		/* linear mapping ? */
-	TLB_MISS_STATS_SAVE_INFO
-	beq	tlb_load_linear		/* yes -> go to linear map load */
-
-	/* We do the user/kernel test for the PID here along with the RW test
-	 */
-	li	r11,_PAGE_PRESENT|_PAGE_EXEC	/* Base perm */
-	oris	r11,r11,_PAGE_ACCESSED@h
-
-	cmpldi	cr0,r15,0			/* Check for user region */
-	std	r14,EX_TLB_ESR(r12)		/* write crazy -1 to frame */
-	beq	normal_tlb_miss
-
-	li	r11,_PAGE_PRESENT|_PAGE_BAP_SX	/* Base perm */
-	oris	r11,r11,_PAGE_ACCESSED@h
-	/* XXX replace the RMW cycles with immediate loads + writes */
-	mfspr	r10,SPRN_MAS1
-	cmpldi	cr0,r15,8			/* Check for vmalloc region */
-	rlwinm	r10,r10,0,16,1			/* Clear TID */
-	mtspr	SPRN_MAS1,r10
-	beq+	normal_tlb_miss
-
-	/* We got a crappy address, just fault */
-	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
-	TLB_MISS_EPILOG_ERROR
-	b	exc_instruction_storage_book3e
-
-/*
- * This is the guts of the first-level TLB miss handler for direct
- * misses. We are entered with:
- *
- * r16 = faulting address
- * r15 = region ID
- * r14 = crap (free to use)
- * r13 = PACA
- * r12 = TLB exception frame in PACA
- * r11 = PTE permission mask
- * r10 = crap (free to use)
- */
-normal_tlb_miss:
-	/* So we first construct the page table address. We do that by
-	 * shifting the bottom of the address (not the region ID) by
-	 * PAGE_SHIFT-3, clearing the bottom 3 bits (get a PTE ptr) and
-	 * or'ing the fourth high bit.
-	 *
-	 * NOTE: For 64K pages, we do things slightly differently in
-	 * order to handle the weird page table format used by linux
-	 */
-	ori	r10,r15,0x1
-	rldicl	r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4
-	sldi	r15,r10,60
-	clrrdi	r14,r14,3
-	or	r10,r15,r14
-
-BEGIN_MMU_FTR_SECTION
-	/* Set the TLB reservation and search for existing entry. Then load
-	 * the entry.
-	 */
-	PPC_TLBSRX_DOT(0,R16)
-	ld	r14,0(r10)
-	beq	normal_tlb_miss_done
-MMU_FTR_SECTION_ELSE
-	ld	r14,0(r10)
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
-
-finish_normal_tlb_miss:
-	/* Check if required permissions are met */
-	andc.	r15,r11,r14
-	bne-	normal_tlb_miss_access_fault
-
-	/* Now we build the MAS:
-	 *
-	 * MAS 0   :	Fully setup with defaults in MAS4 and TLBnCFG
-	 * MAS 1   :	Almost fully setup
-	 *               - PID already updated by caller if necessary
-	 *               - TSIZE need change if !base page size, not
-	 *                 yet implemented for now
-	 * MAS 2   :	Defaults not useful, need to be redone
-	 * MAS 3+7 :	Needs to be done
-	 *
-	 * TODO: mix up code below for better scheduling
-	 */
-	clrrdi	r11,r16,12		/* Clear low crap in EA */
-	rlwimi	r11,r14,32-19,27,31	/* Insert WIMGE */
-	mtspr	SPRN_MAS2,r11
-
-	/* Check page size, if not standard, update MAS1 */
-	rldicl	r11,r14,64-8,64-8
-	cmpldi	cr0,r11,BOOK3E_PAGESZ_4K
-	beq-	1f
-	mfspr	r11,SPRN_MAS1
-	rlwimi	r11,r14,31,21,24
-	rlwinm	r11,r11,0,21,19
-	mtspr	SPRN_MAS1,r11
-1:
-	/* Move RPN in position */
-	rldicr	r11,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
-	clrldi	r15,r11,12		/* Clear crap at the top */
-	rlwimi	r15,r14,32-8,22,25	/* Move in U bits */
-	rlwimi	r15,r14,32-2,26,31	/* Move in BAP bits */
-
-	/* Mask out SW and UW if !DIRTY (XXX optimize this !) */
-	andi.	r11,r14,_PAGE_DIRTY
-	bne	1f
-	li	r11,MAS3_SW|MAS3_UW
-	andc	r15,r15,r11
-1:
-BEGIN_MMU_FTR_SECTION
-	srdi	r16,r15,32
-	mtspr	SPRN_MAS3,r15
-	mtspr	SPRN_MAS7,r16
-MMU_FTR_SECTION_ELSE
-	mtspr	SPRN_MAS7_MAS3,r15
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
-
-	tlbwe
-
-normal_tlb_miss_done:
-	/* We don't bother with restoring DEAR or ESR since we know we are
-	 * level 0 and just going back to userland. They are only needed
-	 * if you are going to take an access fault
-	 */
-	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
-	TLB_MISS_EPILOG_SUCCESS
-	rfi
-
-normal_tlb_miss_access_fault:
-	/* We need to check if it was an instruction miss */
-	andi.	r10,r11,_PAGE_EXEC
-	bne	1f
-	ld	r14,EX_TLB_DEAR(r12)
-	ld	r15,EX_TLB_ESR(r12)
-	mtspr	SPRN_DEAR,r14
-	mtspr	SPRN_ESR,r15
-	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
-	TLB_MISS_EPILOG_ERROR
-	b	exc_data_storage_book3e
-1:	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
-	TLB_MISS_EPILOG_ERROR
-	b	exc_instruction_storage_book3e
-
-
-/*
- * This is the guts of the second-level TLB miss handler for direct
- * misses. We are entered with:
- *
- * r16 = virtual page table faulting address
- * r15 = region (top 4 bits of address)
- * r14 = crap (free to use)
- * r13 = PACA
- * r12 = TLB exception frame in PACA
- * r11 = crap (free to use)
- * r10 = crap (free to use)
- *
- * Note that this should only ever be called as a second level handler
- * with the current scheme when using SW load.
- * That means we can always get the original fault DEAR at
- * EX_TLB_DEAR-EX_TLB_SIZE(r12)
- *
- * It can be re-entered by the linear mapping miss handler. However, to
- * avoid too much complication, it will restart the whole fault at level
- * 0 so we don't care too much about clobbers
- *
- * XXX That code was written back when we couldn't clobber r14. We can now,
- * so we could probably optimize things a bit
- */
-virt_page_table_tlb_miss:
-	/* Are we hitting a kernel page table ? */
-	andi.	r10,r15,0x8
-
-	/* The cool thing now is that r10 contains 0 for user and 8 for kernel,
-	 * and we happen to have the swapper_pg_dir at offset 8 from the user
-	 * pgdir in the PACA :-).
-	 */
-	add	r11,r10,r13
-
-	/* If kernel, we need to clear MAS1 TID */
-	beq	1f
-	/* XXX replace the RMW cycles with immediate loads + writes */
-	mfspr	r10,SPRN_MAS1
-	rlwinm	r10,r10,0,16,1			/* Clear TID */
-	mtspr	SPRN_MAS1,r10
-1:
-BEGIN_MMU_FTR_SECTION
-	/* Search if we already have a TLB entry for that virtual address, and
-	 * if we do, bail out.
-	 */
-	PPC_TLBSRX_DOT(0,R16)
-	beq	virt_page_table_tlb_miss_done
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
-
-	/* Now, we need to walk the page tables. First check if we are in
-	 * range.
-	 */
-	rldicl.	r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4
-	bne-	virt_page_table_tlb_miss_fault
-
-	/* Get the PGD pointer */
-	ld	r15,PACAPGD(r11)
-	cmpldi	cr0,r15,0
-	beq-	virt_page_table_tlb_miss_fault
-
-	/* Get to PGD entry */
-	rldicl	r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3
-	clrrdi	r10,r11,3
-	ldx	r15,r10,r15
-	cmpdi	cr0,r15,0
-	bge	virt_page_table_tlb_miss_fault
-
-	/* Get to PUD entry */
-	rldicl	r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3
-	clrrdi	r10,r11,3
-	ldx	r15,r10,r15
-	cmpdi	cr0,r15,0
-	bge	virt_page_table_tlb_miss_fault
-
-	/* Get to PMD entry */
-	rldicl	r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3
-	clrrdi	r10,r11,3
-	ldx	r15,r10,r15
-	cmpdi	cr0,r15,0
-	bge	virt_page_table_tlb_miss_fault
-
-	/* Ok, we're all right, we can now create a kernel translation for
-	 * a 4K or 64K page from r16 -> r15.
-	 */
-	/* Now we build the MAS:
-	 *
-	 * MAS 0   :	Fully setup with defaults in MAS4 and TLBnCFG
-	 * MAS 1   :	Almost fully setup
-	 *               - PID already updated by caller if necessary
-	 *               - TSIZE for now is base page size always
-	 * MAS 2   :	Use defaults
-	 * MAS 3+7 :	Needs to be done
-	 *
-	 * So we only do MAS 2 and 3 for now...
-	 */
-	clrldi	r11,r15,4		/* remove region ID from RPN */
-	ori	r10,r11,1		/* Or-in SR */
-
-BEGIN_MMU_FTR_SECTION
-	srdi	r16,r10,32
-	mtspr	SPRN_MAS3,r10
-	mtspr	SPRN_MAS7,r16
-MMU_FTR_SECTION_ELSE
-	mtspr	SPRN_MAS7_MAS3,r10
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
-
-	tlbwe
-
-BEGIN_MMU_FTR_SECTION
-virt_page_table_tlb_miss_done:
-
-	/* We have overridden MAS2:EPN but currently our primary TLB miss
-	 * handler will always restore it so that should not be an issue,
-	 * if we ever optimize the primary handler to not write MAS2 on
-	 * some cases, we'll have to restore MAS2:EPN here based on the
-	 * original fault's DEAR. If we do that we have to modify the
-	 * ITLB miss handler to also store SRR0 in the exception frame
-	 * as DEAR.
-	 *
-	 * However, one nasty thing we did is we cleared the reservation
-	 * (well, potentially we did). We do a trick here thus if we
-	 * are not a level 0 exception (we interrupted the TLB miss) we
-	 * offset the return address by -4 in order to replay the tlbsrx
-	 * instruction there
-	 */
-	subf	r10,r13,r12
-	cmpldi	cr0,r10,PACA_EXTLB+EX_TLB_SIZE
-	bne-	1f
-	ld	r11,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13)
-	addi	r10,r11,-4
-	std	r10,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13)
-1:
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
-	/* Return to caller, normal case */
-	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK);
-	TLB_MISS_EPILOG_SUCCESS
-	rfi
-
-virt_page_table_tlb_miss_fault:
-	/* If we fault here, things are a little bit tricky. We need to call
-	 * either data or instruction store fault, and we need to retrieve
-	 * the original fault address and ESR (for data).
-	 *
-	 * The thing is, we know that in normal circumstances, this is
-	 * always called as a second level tlb miss for SW load or as a first
-	 * level TLB miss for HW load, so we should be able to peek at the
-	 * relevant information in the first exception frame in the PACA.
-	 *
-	 * However, we do need to double check that, because we may just hit
-	 * a stray kernel pointer or a userland attack trying to hit those
-	 * areas. If that is the case, we do a data fault. (We can't get here
-	 * from an instruction tlb miss anyway).
-	 *
-	 * Note also that when going to a fault, we must unwind the previous
-	 * level as well. Since we are doing that, we don't need to clear or
-	 * restore the TLB reservation neither.
-	 */
-	subf	r10,r13,r12
-	cmpldi	cr0,r10,PACA_EXTLB+EX_TLB_SIZE
-	bne-	virt_page_table_tlb_miss_whacko_fault
-
-	/* We dig the original DEAR and ESR from slot 0 */
-	ld	r15,EX_TLB_DEAR+PACA_EXTLB(r13)
-	ld	r16,EX_TLB_ESR+PACA_EXTLB(r13)
-
-	/* We check for the "special" ESR value for instruction faults */
-	cmpdi	cr0,r16,-1
-	beq	1f
-	mtspr	SPRN_DEAR,r15
-	mtspr	SPRN_ESR,r16
-	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT);
-	TLB_MISS_EPILOG_ERROR
-	b	exc_data_storage_book3e
-1:	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT);
-	TLB_MISS_EPILOG_ERROR
-	b	exc_instruction_storage_book3e
-
-virt_page_table_tlb_miss_whacko_fault:
-	/* The linear fault will restart everything so ESR and DEAR will
-	 * not have been clobbered, let's just fault with what we have
-	 */
-	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_FAULT);
-	TLB_MISS_EPILOG_ERROR
-	b	exc_data_storage_book3e
-
-
-/**************************************************************
- *                                                            *
- * TLB miss handling for Book3E with hw page table support    *
- *                                                            *
- **************************************************************/
-
-
-/* Data TLB miss */
-	START_EXCEPTION(data_tlb_miss_htw)
-	TLB_MISS_PROLOG
-
-	/* Now we handle the fault proper. We only save DEAR in normal
-	 * fault case since that's the only interesting values here.
-	 * We could probably also optimize by not saving SRR0/1 in the
-	 * linear mapping case but I'll leave that for later
-	 */
-	mfspr	r14,SPRN_ESR
-	mfspr	r16,SPRN_DEAR		/* get faulting address */
-	srdi	r11,r16,60		/* get region */
-	cmpldi	cr0,r11,0xc		/* linear mapping ? */
-	TLB_MISS_STATS_SAVE_INFO
-	beq	tlb_load_linear		/* yes -> go to linear map load */
-
-	/* We do the user/kernel test for the PID here along with the RW test
-	 */
-	cmpldi	cr0,r11,0		/* Check for user region */
-	ld	r15,PACAPGD(r13)	/* Load user pgdir */
-	beq	htw_tlb_miss
-
-	/* XXX replace the RMW cycles with immediate loads + writes */
-1:	mfspr	r10,SPRN_MAS1
-	cmpldi	cr0,r11,8		/* Check for vmalloc region */
-	rlwinm	r10,r10,0,16,1		/* Clear TID */
-	mtspr	SPRN_MAS1,r10
-	ld	r15,PACA_KERNELPGD(r13)	/* Load kernel pgdir */
-	beq+	htw_tlb_miss
-
-	/* We got a crappy address, just fault with whatever DEAR and ESR
-	 * are here
-	 */
-	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
-	TLB_MISS_EPILOG_ERROR
-	b	exc_data_storage_book3e
-
-/* Instruction TLB miss */
-	START_EXCEPTION(instruction_tlb_miss_htw)
-	TLB_MISS_PROLOG
-
-	/* If we take a recursive fault, the second level handler may need
-	 * to know whether we are handling a data or instruction fault in
-	 * order to get to the right store fault handler. We provide that
-	 * info by keeping a crazy value for ESR in r14
-	 */
-	li	r14,-1	/* store to exception frame is done later */
-
-	/* Now we handle the fault proper. We only save DEAR in the non
-	 * linear mapping case since we know the linear mapping case will
-	 * not re-enter. We could indeed optimize and also not save SRR0/1
-	 * in the linear mapping case but I'll leave that for later
-	 *
-	 * Faulting address is SRR0 which is already in r16
-	 */
-	srdi	r11,r16,60		/* get region */
-	cmpldi	cr0,r11,0xc		/* linear mapping ? */
-	TLB_MISS_STATS_SAVE_INFO
-	beq	tlb_load_linear		/* yes -> go to linear map load */
-
-	/* We do the user/kernel test for the PID here along with the RW test
-	 */
-	cmpldi	cr0,r11,0			/* Check for user region */
-	ld	r15,PACAPGD(r13)		/* Load user pgdir */
-	beq	htw_tlb_miss
-
-	/* XXX replace the RMW cycles with immediate loads + writes */
-1:	mfspr	r10,SPRN_MAS1
-	cmpldi	cr0,r11,8			/* Check for vmalloc region */
-	rlwinm	r10,r10,0,16,1			/* Clear TID */
-	mtspr	SPRN_MAS1,r10
-	ld	r15,PACA_KERNELPGD(r13)		/* Load kernel pgdir */
-	beq+	htw_tlb_miss
-
-	/* We got a crappy address, just fault */
-	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
-	TLB_MISS_EPILOG_ERROR
-	b	exc_instruction_storage_book3e
-
-
-/*
- * This is the guts of the second-level TLB miss handler for direct
- * misses. We are entered with:
- *
- * r16 = virtual page table faulting address
- * r15 = PGD pointer
- * r14 = ESR
- * r13 = PACA
- * r12 = TLB exception frame in PACA
- * r11 = crap (free to use)
- * r10 = crap (free to use)
- *
- * It can be re-entered by the linear mapping miss handler. However, to
- * avoid too much complication, it will save/restore things for us
- */
-htw_tlb_miss:
-	/* Search if we already have a TLB entry for that virtual address, and
-	 * if we do, bail out.
-	 *
-	 * MAS1:IND should be already set based on MAS4
-	 */
-	PPC_TLBSRX_DOT(0,R16)
-	beq	htw_tlb_miss_done
-
-	/* Now, we need to walk the page tables. First check if we are in
-	 * range.
-	 */
-	rldicl.	r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
-	bne-	htw_tlb_miss_fault
-
-	/* Get the PGD pointer */
-	cmpldi	cr0,r15,0
-	beq-	htw_tlb_miss_fault
-
-	/* Get to PGD entry */
-	rldicl	r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3
-	clrrdi	r10,r11,3
-	ldx	r15,r10,r15
-	cmpdi	cr0,r15,0
-	bge	htw_tlb_miss_fault
-
-	/* Get to PUD entry */
-	rldicl	r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3
-	clrrdi	r10,r11,3
-	ldx	r15,r10,r15
-	cmpdi	cr0,r15,0
-	bge	htw_tlb_miss_fault
-
-	/* Get to PMD entry */
-	rldicl	r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3
-	clrrdi	r10,r11,3
-	ldx	r15,r10,r15
-	cmpdi	cr0,r15,0
-	bge	htw_tlb_miss_fault
-
-	/* Ok, we're all right, we can now create an indirect entry for
-	 * a 1M or 256M page.
-	 *
-	 * The last trick is now that because we use "half" pages for
-	 * the HTW (1M IND is 2K and 256M IND is 32K) we need to account
-	 * for an added LSB bit to the RPN. For 64K pages, there is no
-	 * problem as we already use 32K arrays (half PTE pages), but for
-	 * 4K page we need to extract a bit from the virtual address and
-	 * insert it into the "PA52" bit of the RPN.
-	 */
-	rlwimi	r15,r16,32-9,20,20
-	/* Now we build the MAS:
-	 *
-	 * MAS 0   :	Fully setup with defaults in MAS4 and TLBnCFG
-	 * MAS 1   :	Almost fully setup
-	 *               - PID already updated by caller if necessary
-	 *               - TSIZE for now is base ind page size always
-	 * MAS 2   :	Use defaults
-	 * MAS 3+7 :	Needs to be done
-	 */
-	ori	r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
-
-BEGIN_MMU_FTR_SECTION
-	srdi	r16,r10,32
-	mtspr	SPRN_MAS3,r10
-	mtspr	SPRN_MAS7,r16
-MMU_FTR_SECTION_ELSE
-	mtspr	SPRN_MAS7_MAS3,r10
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
-
-	tlbwe
-
-htw_tlb_miss_done:
-	/* We don't bother with restoring DEAR or ESR since we know we are
-	 * level 0 and just going back to userland. They are only needed
-	 * if you are going to take an access fault
-	 */
-	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK)
-	TLB_MISS_EPILOG_SUCCESS
-	rfi
-
-htw_tlb_miss_fault:
-	/* We need to check if it was an instruction miss. We know this
-	 * though because r14 would contain -1
-	 */
-	cmpdi	cr0,r14,-1
-	beq	1f
-	mtspr	SPRN_DEAR,r16
-	mtspr	SPRN_ESR,r14
-	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT)
-	TLB_MISS_EPILOG_ERROR
-	b	exc_data_storage_book3e
-1:	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT)
-	TLB_MISS_EPILOG_ERROR
-	b	exc_instruction_storage_book3e
-
-/*
- * This is the guts of "any" level TLB miss handler for kernel linear
- * mapping misses. We are entered with:
- *
- *
- * r16 = faulting address
- * r15 = crap (free to use)
- * r14 = ESR (data) or -1 (instruction)
- * r13 = PACA
- * r12 = TLB exception frame in PACA
- * r11 = crap (free to use)
- * r10 = crap (free to use)
- *
- * In addition we know that we will not re-enter, so in theory, we could
- * use a simpler epilog not restoring SRR0/1 etc.. but we'll do that later.
- *
- * We also need to be careful about MAS registers here & TLB reservation,
- * as we know we'll have clobbered them if we interrupt the main TLB miss
- * handlers in which case we probably want to do a full restart at level
- * 0 rather than saving / restoring the MAS.
- *
- * Note: If we care about performance of that core, we can easily shuffle
- *       a few things around
- */
-tlb_load_linear:
-	/* For now, we assume the linear mapping is contiguous and stops at
-	 * linear_map_top. We also assume the size is a multiple of 1G, thus
-	 * we only use 1G pages for now. That might have to be changed in a
-	 * final implementation, especially when dealing with hypervisors
-	 */
-	ld	r11,PACATOC(r13)
-	ld	r11,linear_map_top@got(r11)
-	ld	r10,0(r11)
-	tovirt(10,10)
-	cmpld	cr0,r16,r10
-	bge	tlb_load_linear_fault
-
-	/* MAS1 need whole new setup. */
-	li	r15,(BOOK3E_PAGESZ_1GB<<MAS1_TSIZE_SHIFT)
-	oris	r15,r15,MAS1_VALID@h	/* MAS1 needs V and TSIZE */
-	mtspr	SPRN_MAS1,r15
-
-	/* Already somebody there ? */
-	PPC_TLBSRX_DOT(0,R16)
-	beq	tlb_load_linear_done
-
-	/* Now we build the remaining MAS. MAS0 and 2 should be fine
-	 * with their defaults, which leaves us with MAS 3 and 7. The
-	 * mapping is linear, so we just take the address, clear the
-	 * region bits, and or in the permission bits which are currently
-	 * hard wired
-	 */
-	clrrdi	r10,r16,30		/* 1G page index */
-	clrldi	r10,r10,4		/* clear region bits */
-	ori	r10,r10,MAS3_SR|MAS3_SW|MAS3_SX
-
-BEGIN_MMU_FTR_SECTION
-	srdi	r16,r10,32
-	mtspr	SPRN_MAS3,r10
-	mtspr	SPRN_MAS7,r16
-MMU_FTR_SECTION_ELSE
-	mtspr	SPRN_MAS7_MAS3,r10
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
-
-	tlbwe
-
-tlb_load_linear_done:
-	/* We use the "error" epilog for success as we do want to
-	 * restore to the initial faulting context, whatever it was.
-	 * We do that because we can't resume a fault within a TLB
-	 * miss handler, due to MAS and TLB reservation being clobbered.
-	 */
-	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_LINEAR)
-	TLB_MISS_EPILOG_ERROR
-	rfi
-
-tlb_load_linear_fault:
-	/* We keep the DEAR and ESR around, this shouldn't have happened */
-	cmpdi	cr0,r14,-1
-	beq	1f
-	TLB_MISS_EPILOG_ERROR_SPECIAL
-	b	exc_data_storage_book3e
-1:	TLB_MISS_EPILOG_ERROR_SPECIAL
-	b	exc_instruction_storage_book3e
-
-
-#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
-.tlb_stat_inc:
-1:	ldarx	r8,0,r9
-	addi	r8,r8,1
-	stdcx.	r8,0,r9
-	bne-	1b
-	blr
-#endif
diff --git a/arch/powerpc/net/bpf_jit_asm.S b/arch/powerpc/net/bpf_jit_asm.S
deleted file mode 100644
index 2f5030d8383f9d3ff5a944db98c0c25444f5ed10..0000000000000000000000000000000000000000
--- a/arch/powerpc/net/bpf_jit_asm.S
+++ /dev/null
@@ -1,226 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* bpf_jit.S: Packet/header access helper functions
- * for PPC64 BPF compiler.
- *
- * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/asm-compat.h>
-#include "bpf_jit32.h"
-
-/*
- * All of these routines are called directly from generated code,
- * whose register usage is:
- *
- * r3		skb
- * r4,r5	A,X
- * r6		*** address parameter to helper ***
- * r7-r10	scratch
- * r14		skb->data
- * r15		skb headlen
- * r16-31	M[]
- */
-
-/*
- * To consider: These helpers are so small it could be better to just
- * generate them inline.  Inline code can do the simple headlen check
- * then branch directly to slow_path_XXX if required.  (In fact, could
- * load a spare GPR with the address of slow_path_generic and pass size
- * as an argument, making the call site a mtlr, li and bllr.)
- */
-	.globl	sk_load_word
-sk_load_word:
-	PPC_LCMPI	r_addr, 0
-	blt	bpf_slow_path_word_neg
-	.globl	sk_load_word_positive_offset
-sk_load_word_positive_offset:
-	/* Are we accessing past headlen? */
-	subi	r_scratch1, r_HL, 4
-	PPC_LCMP	r_scratch1, r_addr
-	blt	bpf_slow_path_word
-	/* Nope, just hitting the header.  cr0 here is eq or gt! */
-#ifdef __LITTLE_ENDIAN__
-	lwbrx	r_A, r_D, r_addr
-#else
-	lwzx	r_A, r_D, r_addr
-#endif
-	blr	/* Return success, cr0 != LT */
-
-	.globl	sk_load_half
-sk_load_half:
-	PPC_LCMPI	r_addr, 0
-	blt	bpf_slow_path_half_neg
-	.globl	sk_load_half_positive_offset
-sk_load_half_positive_offset:
-	subi	r_scratch1, r_HL, 2
-	PPC_LCMP	r_scratch1, r_addr
-	blt	bpf_slow_path_half
-#ifdef __LITTLE_ENDIAN__
-	lhbrx	r_A, r_D, r_addr
-#else
-	lhzx	r_A, r_D, r_addr
-#endif
-	blr
-
-	.globl	sk_load_byte
-sk_load_byte:
-	PPC_LCMPI	r_addr, 0
-	blt	bpf_slow_path_byte_neg
-	.globl	sk_load_byte_positive_offset
-sk_load_byte_positive_offset:
-	PPC_LCMP	r_HL, r_addr
-	ble	bpf_slow_path_byte
-	lbzx	r_A, r_D, r_addr
-	blr
-
-/*
- * BPF_LDX | BPF_B | BPF_MSH: ldxb  4*([offset]&0xf)
- * r_addr is the offset value
- */
-	.globl sk_load_byte_msh
-sk_load_byte_msh:
-	PPC_LCMPI	r_addr, 0
-	blt	bpf_slow_path_byte_msh_neg
-	.globl sk_load_byte_msh_positive_offset
-sk_load_byte_msh_positive_offset:
-	PPC_LCMP	r_HL, r_addr
-	ble	bpf_slow_path_byte_msh
-	lbzx	r_X, r_D, r_addr
-	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
-	blr
-
-/* Call out to skb_copy_bits:
- * We'll need to back up our volatile regs first; we have
- * local variable space at r1+(BPF_PPC_STACK_BASIC).
- * Allocate a new stack frame here to remain ABI-compliant in
- * stashing LR.
- */
-#define bpf_slow_path_common(SIZE)				\
-	mflr	r0;						\
-	PPC_STL	r0, PPC_LR_STKOFF(r1);					\
-	/* R3 goes in parameter space of caller's frame */	\
-	PPC_STL	r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1);		\
-	PPC_STL	r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1);		\
-	PPC_STL	r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1);		\
-	addi	r5, r1, BPF_PPC_STACK_BASIC+(2*REG_SZ);		\
-	PPC_STLU	r1, -BPF_PPC_SLOWPATH_FRAME(r1);		\
-	/* R3 = r_skb, as passed */				\
-	mr	r4, r_addr;					\
-	li	r6, SIZE;					\
-	bl	skb_copy_bits;					\
-	nop;							\
-	/* R3 = 0 on success */					\
-	addi	r1, r1, BPF_PPC_SLOWPATH_FRAME;			\
-	PPC_LL	r0, PPC_LR_STKOFF(r1);					\
-	PPC_LL	r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1);		\
-	PPC_LL	r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1);		\
-	mtlr	r0;						\
-	PPC_LCMPI	r3, 0;						\
-	blt	bpf_error;	/* cr0 = LT */			\
-	PPC_LL	r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1);		\
-	/* Great success! */
-
-bpf_slow_path_word:
-	bpf_slow_path_common(4)
-	/* Data value is on stack, and cr0 != LT */
-	lwz	r_A, BPF_PPC_STACK_BASIC+(2*REG_SZ)(r1)
-	blr
-
-bpf_slow_path_half:
-	bpf_slow_path_common(2)
-	lhz	r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
-	blr
-
-bpf_slow_path_byte:
-	bpf_slow_path_common(1)
-	lbz	r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
-	blr
-
-bpf_slow_path_byte_msh:
-	bpf_slow_path_common(1)
-	lbz	r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
-	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
-	blr
-
-/* Call out to bpf_internal_load_pointer_neg_helper:
- * We'll need to back up our volatile regs first; we have
- * local variable space at r1+(BPF_PPC_STACK_BASIC).
- * Allocate a new stack frame here to remain ABI-compliant in
- * stashing LR.
- */
-#define sk_negative_common(SIZE)				\
-	mflr	r0;						\
-	PPC_STL	r0, PPC_LR_STKOFF(r1);					\
-	/* R3 goes in parameter space of caller's frame */	\
-	PPC_STL	r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1);		\
-	PPC_STL	r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1);		\
-	PPC_STL	r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1);		\
-	PPC_STLU	r1, -BPF_PPC_SLOWPATH_FRAME(r1);		\
-	/* R3 = r_skb, as passed */				\
-	mr	r4, r_addr;					\
-	li	r5, SIZE;					\
-	bl	bpf_internal_load_pointer_neg_helper;		\
-	nop;							\
-	/* R3 != 0 on success */				\
-	addi	r1, r1, BPF_PPC_SLOWPATH_FRAME;			\
-	PPC_LL	r0, PPC_LR_STKOFF(r1);					\
-	PPC_LL	r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1);		\
-	PPC_LL	r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1);		\
-	mtlr	r0;						\
-	PPC_LCMPLI	r3, 0;						\
-	beq	bpf_error_slow;	/* cr0 = EQ */			\
-	mr	r_addr, r3;					\
-	PPC_LL	r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1);		\
-	/* Great success! */
-
-bpf_slow_path_word_neg:
-	lis     r_scratch1,-32	/* SKF_LL_OFF */
-	PPC_LCMP	r_addr, r_scratch1	/* addr < SKF_* */
-	blt	bpf_error	/* cr0 = LT */
-	.globl	sk_load_word_negative_offset
-sk_load_word_negative_offset:
-	sk_negative_common(4)
-	lwz	r_A, 0(r_addr)
-	blr
-
-bpf_slow_path_half_neg:
-	lis     r_scratch1,-32	/* SKF_LL_OFF */
-	PPC_LCMP	r_addr, r_scratch1	/* addr < SKF_* */
-	blt	bpf_error	/* cr0 = LT */
-	.globl	sk_load_half_negative_offset
-sk_load_half_negative_offset:
-	sk_negative_common(2)
-	lhz	r_A, 0(r_addr)
-	blr
-
-bpf_slow_path_byte_neg:
-	lis     r_scratch1,-32	/* SKF_LL_OFF */
-	PPC_LCMP	r_addr, r_scratch1	/* addr < SKF_* */
-	blt	bpf_error	/* cr0 = LT */
-	.globl	sk_load_byte_negative_offset
-sk_load_byte_negative_offset:
-	sk_negative_common(1)
-	lbz	r_A, 0(r_addr)
-	blr
-
-bpf_slow_path_byte_msh_neg:
-	lis     r_scratch1,-32	/* SKF_LL_OFF */
-	PPC_LCMP	r_addr, r_scratch1	/* addr < SKF_* */
-	blt	bpf_error	/* cr0 = LT */
-	.globl	sk_load_byte_msh_negative_offset
-sk_load_byte_msh_negative_offset:
-	sk_negative_common(1)
-	lbz	r_X, 0(r_addr)
-	rlwinm	r_X, r_X, 2, 32-4-2, 31-2
-	blr
-
-bpf_error_slow:
-	/* fabricate a cr0 = lt */
-	li	r_scratch1, -1
-	PPC_LCMPI	r_scratch1, 0
-bpf_error:
-	/* Entered with cr0 = lt */
-	li	r3, 0
-	/* Generated code will 'blt epilogue', returning 0. */
-	blr
diff --git a/arch/powerpc/perf/bhrb.S b/arch/powerpc/perf/bhrb.S
deleted file mode 100644
index 1aa3259716b8c92cc96dd32bdb5659267d24a421..0000000000000000000000000000000000000000
--- a/arch/powerpc/perf/bhrb.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Basic assembly code to read BHRB entries
- *
- * Copyright 2013 Anshuman Khandual, IBM Corporation.
- */
-#include <asm/ppc_asm.h>
-#include <asm/ppc-opcode.h>
-
-	.text
-
-.balign 8
-
-/* r3 = n  (where n = [0-31])
- * The maximum number of BHRB entries supported with PPC_MFBHRBE instruction
- * is 1024. We have limited number of table entries here as POWER8 implements
- * 32 BHRB entries.
- */
-
-/* .global read_bhrb */
-_GLOBAL(read_bhrb)
-	cmpldi	r3,31
-	bgt	1f
-	ld	r4,bhrb_table@got(r2)
-	sldi	r3,r3,3
-	add	r3,r4,r3
-	mtctr	r3
-	bctr
-1:	li	r3,0
-	blr
-
-#define MFBHRB_TABLE1(n) PPC_MFBHRBE(R3,n); blr
-#define MFBHRB_TABLE2(n) MFBHRB_TABLE1(n); MFBHRB_TABLE1(n+1)
-#define MFBHRB_TABLE4(n) MFBHRB_TABLE2(n); MFBHRB_TABLE2(n+2)
-#define MFBHRB_TABLE8(n) MFBHRB_TABLE4(n); MFBHRB_TABLE4(n+4)
-#define MFBHRB_TABLE16(n) MFBHRB_TABLE8(n); MFBHRB_TABLE8(n+8)
-#define MFBHRB_TABLE32(n) MFBHRB_TABLE16(n); MFBHRB_TABLE16(n+16)
-
-bhrb_table:
-	MFBHRB_TABLE32(0)
diff --git a/arch/powerpc/platforms/44x/misc_44x.S b/arch/powerpc/platforms/44x/misc_44x.S
deleted file mode 100644
index 3a0c4bd3d6bfedf237453534d876c15d4703f463..0000000000000000000000000000000000000000
--- a/arch/powerpc/platforms/44x/misc_44x.S
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains miscellaneous low-level functions for PPC 44x.
- *    Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
- */
-
-#include <asm/reg.h>
-#include <asm/ppc_asm.h>
-
-	.text
-
-/*
- * Do an IO access in AS1
- */
-_GLOBAL(as1_readb)
-	mfmsr	r7
-	ori	r0,r7,MSR_DS
-	sync
-	mtmsr	r0
-	sync
-	isync
-	lbz	r3,0(r3)
-	sync
-	mtmsr	r7
-	sync
-	isync
-	blr
-
-_GLOBAL(as1_writeb)
-	mfmsr	r7
-	ori	r0,r7,MSR_DS
-	sync
-	mtmsr	r0
-	sync
-	isync
-	stb	r3,0(r4)
-	sync
-	mtmsr	r7
-	sync
-	isync
-	blr
diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S
deleted file mode 100644
index 054f927bfef93a3b0224736b03d21967a9078e42..0000000000000000000000000000000000000000
--- a/arch/powerpc/platforms/52xx/lite5200_sleep.S
+++ /dev/null
@@ -1,413 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/reg.h>
-#include <asm/ppc_asm.h>
-#include <asm/processor.h>
-#include <asm/cache.h>
-
-
-#define SDRAM_CTRL	0x104
-#define SC_MODE_EN	(1<<31)
-#define SC_CKE		(1<<30)
-#define SC_REF_EN	(1<<28)
-#define SC_SOFT_PRE	(1<<1)
-
-#define GPIOW_GPIOE	0xc00
-#define GPIOW_DDR	0xc08
-#define GPIOW_DVO	0xc0c
-
-#define CDM_CE		0x214
-#define CDM_SDRAM	(1<<3)
-
-
-/* helpers... beware: r10 and r4 are overwritten */
-#define SAVE_SPRN(reg, addr)		\
-	mfspr	r10, SPRN_##reg;	\
-	stw	r10, ((addr)*4)(r4);
-
-#define LOAD_SPRN(reg, addr)		\
-	lwz	r10, ((addr)*4)(r4);	\
-	mtspr	SPRN_##reg, r10;	\
-	sync;				\
-	isync;
-
-
-	.data
-registers:
-	.space 0x5c*4
-	.text
-
-/* ---------------------------------------------------------------------- */
-/* low-power mode with help of M68HLC908QT1 */
-
-	.globl lite5200_low_power
-lite5200_low_power:
-
-	mr	r7, r3	/* save SRAM va */
-	mr	r8, r4	/* save MBAR va */
-
-	/* setup wakeup address for u-boot at physical location 0x0 */
-	lis	r3, CONFIG_KERNEL_START@h
-	lis	r4, lite5200_wakeup@h
-	ori	r4, r4, lite5200_wakeup@l
-	sub	r4, r4, r3
-	stw	r4, 0(r3)
-
-
-	/*
-	 * save stuff BDI overwrites
-	 * 0xf0 (0xe0->0x100 gets overwritten when BDI connected;
-	 *   even when CONFIG_BDI* is disabled and MMU XLAT commented; heisenbug?))
-	 * WARNING: self-refresh doesn't seem to work when BDI2000 is connected,
-	 *   possibly because BDI sets SDRAM registers before wakeup code does
-	 */
-	lis	r4, registers@h
-	ori	r4, r4, registers@l
-	lwz	r10, 0xf0(r3)
-	stw	r10, (0x1d*4)(r4)
-
-	/* save registers to r4 [destroys r10] */
-	SAVE_SPRN(LR, 0x1c)
-	bl	save_regs
-
-	/* flush caches [destroys r3, r4] */
-	bl	flush_data_cache
-
-
-	/* copy code to sram */
-	mr	r4, r7
-	li	r3, (sram_code_end - sram_code)/4
-	mtctr	r3
-	lis	r3, sram_code@h
-	ori	r3, r3, sram_code@l
-1:
-	lwz	r5, 0(r3)
-	stw	r5, 0(r4)
-	addi	r3, r3, 4
-	addi	r4, r4, 4
-	bdnz	1b
-
-	/* get tb_ticks_per_usec */
-	lis	r3, tb_ticks_per_usec@h
-	lwz	r11, tb_ticks_per_usec@l(r3)
-
-	/* disable I and D caches */
-	mfspr	r3, SPRN_HID0
-	ori	r3, r3, HID0_ICE | HID0_DCE
-	xori	r3, r3, HID0_ICE | HID0_DCE
-	sync; isync;
-	mtspr	SPRN_HID0, r3
-	sync; isync;
-
-	/* jump to sram */
-	mtlr	r7
-	blrl
-	/* doesn't return */
-
-
-sram_code:
-	/* self refresh */
-	lwz	r4, SDRAM_CTRL(r8)
-
-	/* send NOP (precharge) */
-	oris	r4, r4, SC_MODE_EN@h	/* mode_en */
-	stw	r4, SDRAM_CTRL(r8)
-	sync
-
-	ori	r4, r4, SC_SOFT_PRE	/* soft_pre */
-	stw	r4, SDRAM_CTRL(r8)
-	sync
-	xori	r4, r4, SC_SOFT_PRE
-
-	xoris	r4, r4, SC_MODE_EN@h	/* !mode_en */
-	stw	r4, SDRAM_CTRL(r8)
-	sync
-
-	/* delay (for NOP to finish) */
-	li	r12, 1
-	bl	udelay
-
-	/*
-	 * mode_en must not be set when enabling self-refresh
-	 * send AR with CKE low (self-refresh)
-	 */
-	oris	r4, r4, (SC_REF_EN | SC_CKE)@h
-	xoris	r4, r4, (SC_CKE)@h	/* ref_en !cke */
-	stw	r4, SDRAM_CTRL(r8)
-	sync
-
-	/* delay (after !CKE there should be two cycles) */
-	li	r12, 1
-	bl	udelay
-
-	/* disable clock */
-	lwz	r4, CDM_CE(r8)
-	ori	r4, r4, CDM_SDRAM
-	xori	r4, r4, CDM_SDRAM
-	stw	r4, CDM_CE(r8)
-	sync
-
-	/* delay a bit */
-	li	r12, 1
-	bl	udelay
-
-
-	/* turn off with QT chip */
-	li	r4, 0x02
-	stb	r4, GPIOW_GPIOE(r8)	/* enable gpio_wkup1 */
-	sync
-
-	stb	r4, GPIOW_DVO(r8)	/* "output" high */
-	sync
-	stb	r4, GPIOW_DDR(r8)	/* output */
-	sync
-	stb	r4, GPIOW_DVO(r8)	/* output high */
-	sync
-
-	/* 10uS delay */
-	li	r12, 10
-	bl	udelay
-
-	/* turn off */
-	li	r4, 0
-	stb	r4, GPIOW_DVO(r8)	/* output low */
-	sync
-
-	/* wait until we're offline */
-  1:
-	b	1b
-
-
-	/* local udelay in sram is needed */
-  udelay: /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */
-	mullw	r12, r12, r11
-	mftb	r13	/* start */
-	add	r12, r13, r12 /* end */
-    1:
-	mftb	r13	/* current */
-	cmp	cr0, r13, r12
-	blt	1b
-	blr
-
-sram_code_end:
-
-
-
-/* uboot jumps here on resume */
-lite5200_wakeup:
-	bl	restore_regs
-
-
-	/* HIDs, MSR */
-	LOAD_SPRN(HID1, 0x19)
-	LOAD_SPRN(HID2, 0x1a)
-
-
-	/* address translation is tricky (see turn_on_mmu) */
-	mfmsr	r10
-	ori	r10, r10, MSR_DR | MSR_IR
-
-
-	mtspr	SPRN_SRR1, r10
-	lis	r10, mmu_on@h
-	ori	r10, r10, mmu_on@l
-	mtspr	SPRN_SRR0, r10
-	sync
-	rfi
-mmu_on:
-	/* kernel offset (r4 is still set from restore_registers) */
-	addis	r4, r4, CONFIG_KERNEL_START@h
-
-
-	/* restore MSR */
-	lwz	r10, (4*0x1b)(r4)
-	mtmsr	r10
-	sync; isync;
-
-	/* invalidate caches */
-	mfspr	r10, SPRN_HID0
-	ori	r5, r10, HID0_ICFI | HID0_DCI
-	mtspr	SPRN_HID0, r5	/* invalidate caches */
-	sync; isync;
-	mtspr	SPRN_HID0, r10
-	sync; isync;
-
-	/* enable caches */
-	lwz	r10, (4*0x18)(r4)
-	mtspr	SPRN_HID0, r10	/* restore (enable caches, DPM) */
-	/* ^ this has to be after address translation set in MSR */
-	sync
-	isync
-
-
-	/* restore 0xf0 (BDI2000) */
-	lis	r3, CONFIG_KERNEL_START@h
-	lwz	r10, (0x1d*4)(r4)
-	stw	r10, 0xf0(r3)
-
-	LOAD_SPRN(LR, 0x1c)
-
-
-	blr
-
-
-/* ---------------------------------------------------------------------- */
-/* boring code: helpers */
-
-/* save registers */
-#define SAVE_BAT(n, addr)		\
-	SAVE_SPRN(DBAT##n##L, addr);	\
-	SAVE_SPRN(DBAT##n##U, addr+1);	\
-	SAVE_SPRN(IBAT##n##L, addr+2);	\
-	SAVE_SPRN(IBAT##n##U, addr+3);
-
-#define SAVE_SR(n, addr)		\
-	mfsr	r10, n;			\
-	stw	r10, ((addr)*4)(r4);
-
-#define SAVE_4SR(n, addr)	\
-	SAVE_SR(n, addr);	\
-	SAVE_SR(n+1, addr+1);	\
-	SAVE_SR(n+2, addr+2);	\
-	SAVE_SR(n+3, addr+3);
-
-save_regs:
-	stw	r0, 0(r4)
-	stw	r1, 0x4(r4)
-	stw	r2, 0x8(r4)
-	stmw	r11, 0xc(r4) /* 0xc -> 0x5f, (0x18*4-1) */
-
-	SAVE_SPRN(HID0, 0x18)
-	SAVE_SPRN(HID1, 0x19)
-	SAVE_SPRN(HID2, 0x1a)
-	mfmsr	r10
-	stw	r10, (4*0x1b)(r4)
-	/*SAVE_SPRN(LR, 0x1c) have to save it before the call */
-	/* 0x1d reserved by 0xf0 */
-	SAVE_SPRN(RPA,   0x1e)
-	SAVE_SPRN(SDR1,  0x1f)
-
-	/* save MMU regs */
-	SAVE_BAT(0, 0x20)
-	SAVE_BAT(1, 0x24)
-	SAVE_BAT(2, 0x28)
-	SAVE_BAT(3, 0x2c)
-	SAVE_BAT(4, 0x30)
-	SAVE_BAT(5, 0x34)
-	SAVE_BAT(6, 0x38)
-	SAVE_BAT(7, 0x3c)
-
-	SAVE_4SR(0, 0x40)
-	SAVE_4SR(4, 0x44)
-	SAVE_4SR(8, 0x48)
-	SAVE_4SR(12, 0x4c)
-
-	SAVE_SPRN(SPRG0, 0x50)
-	SAVE_SPRN(SPRG1, 0x51)
-	SAVE_SPRN(SPRG2, 0x52)
-	SAVE_SPRN(SPRG3, 0x53)
-	SAVE_SPRN(SPRG4, 0x54)
-	SAVE_SPRN(SPRG5, 0x55)
-	SAVE_SPRN(SPRG6, 0x56)
-	SAVE_SPRN(SPRG7, 0x57)
-
-	SAVE_SPRN(IABR,  0x58)
-	SAVE_SPRN(DABR,  0x59)
-	SAVE_SPRN(TBRL,  0x5a)
-	SAVE_SPRN(TBRU,  0x5b)
-
-	blr
-
-
-/* restore registers */
-#define LOAD_BAT(n, addr)		\
-	LOAD_SPRN(DBAT##n##L, addr);	\
-	LOAD_SPRN(DBAT##n##U, addr+1);	\
-	LOAD_SPRN(IBAT##n##L, addr+2);	\
-	LOAD_SPRN(IBAT##n##U, addr+3);
-
-#define LOAD_SR(n, addr)		\
-	lwz	r10, ((addr)*4)(r4);	\
-	mtsr	n, r10;
-
-#define LOAD_4SR(n, addr)	\
-	LOAD_SR(n, addr);	\
-	LOAD_SR(n+1, addr+1);	\
-	LOAD_SR(n+2, addr+2);	\
-	LOAD_SR(n+3, addr+3);
-
-restore_regs:
-	lis	r4, registers@h
-	ori	r4, r4, registers@l
-
-	/* MMU is not up yet */
-	subis	r4, r4, CONFIG_KERNEL_START@h
-
-	lwz	r0, 0(r4)
-	lwz	r1, 0x4(r4)
-	lwz	r2, 0x8(r4)
-	lmw	r11, 0xc(r4)
-
-	/*
-	 * these are a bit tricky
-	 *
-	 * 0x18 - HID0
-	 * 0x19 - HID1
-	 * 0x1a - HID2
-	 * 0x1b - MSR
-	 * 0x1c - LR
-	 * 0x1d - reserved by 0xf0 (BDI2000)
-	 */
-	LOAD_SPRN(RPA,   0x1e);
-	LOAD_SPRN(SDR1,  0x1f);
-
-	/* restore MMU regs */
-	LOAD_BAT(0, 0x20)
-	LOAD_BAT(1, 0x24)
-	LOAD_BAT(2, 0x28)
-	LOAD_BAT(3, 0x2c)
-	LOAD_BAT(4, 0x30)
-	LOAD_BAT(5, 0x34)
-	LOAD_BAT(6, 0x38)
-	LOAD_BAT(7, 0x3c)
-
-	LOAD_4SR(0, 0x40)
-	LOAD_4SR(4, 0x44)
-	LOAD_4SR(8, 0x48)
-	LOAD_4SR(12, 0x4c)
-
-	/* rest of regs */
-	LOAD_SPRN(SPRG0, 0x50);
-	LOAD_SPRN(SPRG1, 0x51);
-	LOAD_SPRN(SPRG2, 0x52);
-	LOAD_SPRN(SPRG3, 0x53);
-	LOAD_SPRN(SPRG4, 0x54);
-	LOAD_SPRN(SPRG5, 0x55);
-	LOAD_SPRN(SPRG6, 0x56);
-	LOAD_SPRN(SPRG7, 0x57);
-
-	LOAD_SPRN(IABR,  0x58);
-	LOAD_SPRN(DABR,  0x59);
-	LOAD_SPRN(TBWL,  0x5a);	/* these two have separate R/W regs */
-	LOAD_SPRN(TBWU,  0x5b);
-
-	blr
-
-
-
-/* cache flushing code. copied from arch/ppc/boot/util.S */
-#define NUM_CACHE_LINES (128*8)
-
-/*
- * Flush data cache
- * Do this by just reading lots of stuff into the cache.
- */
-flush_data_cache:
-	lis	r3,CONFIG_KERNEL_START@h
-	ori	r3,r3,CONFIG_KERNEL_START@l
-	li	r4,NUM_CACHE_LINES
-	mtctr	r4
-1:
-	lwz	r4,0(r3)
-	addi	r3,r3,L1_CACHE_BYTES	/* Next line, please */
-	bdnz	1b
-	blr
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_sleep.S b/arch/powerpc/platforms/52xx/mpc52xx_sleep.S
deleted file mode 100644
index a66eb311b6390c641fe0faf57779ce6a670536c0..0000000000000000000000000000000000000000
--- a/arch/powerpc/platforms/52xx/mpc52xx_sleep.S
+++ /dev/null
@@ -1,155 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/reg.h>
-#include <asm/ppc_asm.h>
-#include <asm/processor.h>
-
-
-.text
-
-_GLOBAL(mpc52xx_deep_sleep)
-mpc52xx_deep_sleep: /* args r3-r6: SRAM, SDRAM regs, CDM regs, INTR regs */
-
-	/* enable interrupts */
-	mfmsr	r7
-	ori	r7, r7, 0x8000 /* EE */
-	mtmsr	r7
-	sync; isync;
-
-	li	r10, 0 /* flag that irq handler sets */
-
-	/* enable tmr7 (or any other) interrupt */
-	lwz	r8, 0x14(r6) /* intr->main_mask */
-	ori	r8, r8, 0x1
-	xori	r8, r8, 0x1
-	stw	r8, 0x14(r6)
-	sync
-
-	/* emulate tmr7 interrupt */
-	li	r8, 0x1
-	stw	r8, 0x40(r6) /* intr->main_emulate */
-	sync
-
-	/* wait for it to happen */
-1:
-	cmpi	cr0, r10, 1
-	bne	cr0, 1b
-
-	/* lock icache */
-	mfspr	r10, SPRN_HID0
-	ori	r10, r10, 0x2000
-	sync; isync;
-	mtspr	SPRN_HID0, r10
-	sync; isync;
-
-
-	mflr	r9 /* save LR */
-
-	/* jump to sram */
-	mtlr	r3
-	blrl
-
-	mtlr	r9 /* restore LR */
-
-	/* unlock icache */
-	mfspr	r10, SPRN_HID0
-	ori	r10, r10, 0x2000
-	xori	r10, r10, 0x2000
-	sync; isync;
-	mtspr	SPRN_HID0, r10
-	sync; isync;
-
-
-	/* return to C code */
-	blr
-
-
-_GLOBAL(mpc52xx_ds_sram)
-mpc52xx_ds_sram:
-	/* put SDRAM into self-refresh */
-	lwz	r8, 0x4(r4)	/* sdram->ctrl */
-
-	oris	r8, r8, 0x8000 /* mode_en */
-	stw	r8, 0x4(r4)
-	sync
-
-	ori	r8, r8, 0x0002 /* soft_pre */
-	stw	r8, 0x4(r4)
-	sync
-	xori	r8, r8, 0x0002
-
-	xoris	r8, r8, 0x8000 /* !mode_en */
-	stw	r8, 0x4(r4)
-	sync
-
-	oris	r8, r8, 0x5000
-	xoris	r8, r8, 0x4000 /* ref_en !cke */
-	stw	r8, 0x4(r4)
-	sync
-
-	/* disable SDRAM clock */
-	lwz	r8, 0x14(r5) /* cdm->clkenable */
-	ori	r8, r8, 0x0008
-	xori	r8, r8, 0x0008
-	stw	r8, 0x14(r5)
-	sync
-
-
-	/* put mpc5200 to sleep */
-	mfmsr	r10
-	oris	r10, r10, 0x0004	/* POW = 1 */
-	sync; isync;
-	mtmsr	r10
-	sync; isync;
-
-
-	/* enable clock */
-	lwz	r8, 0x14(r5)
-	ori	r8, r8, 0x0008
-	stw	r8, 0x14(r5)
-	sync
-
-	/* get ram out of self-refresh */
-	lwz	r8, 0x4(r4)
-	oris	r8, r8, 0x5000 /* cke ref_en */
-	stw	r8, 0x4(r4)
-	sync
-
-	blr
-_GLOBAL(mpc52xx_ds_sram_size)
-mpc52xx_ds_sram_size:
-	.long $-mpc52xx_ds_sram
-
-
-/* ### interrupt handler for wakeup from deep-sleep ### */
-_GLOBAL(mpc52xx_ds_cached)
-mpc52xx_ds_cached:
-	mtspr	SPRN_SPRG0, r7
-	mtspr	SPRN_SPRG1, r8
-
-	/* disable emulated interrupt */
-	mfspr	r7, 311 /* MBAR */
-	addi	r7, r7, 0x540	/* intr->main_emul */
-	li	r8, 0
-	stw	r8, 0(r7)
-	sync
-	dcbf	0, r7
-
-	/* acknowledge wakeup, so CCS releases power pown */
-	mfspr	r7, 311	/* MBAR */
-	addi	r7, r7, 0x524	/* intr->enc_status */
-	lwz	r8, 0(r7)
-	ori	r8, r8, 0x0400
-	stw	r8, 0(r7)
-	sync
-	dcbf	0, r7
-
-	/* flag - we handled the interrupt */
-	li	r10, 1
-
-	mfspr	r8, SPRN_SPRG1
-	mfspr	r7, SPRN_SPRG0
-
-	rfi
-_GLOBAL(mpc52xx_ds_cached_size)
-mpc52xx_ds_cached_size:
-	.long $-mpc52xx_ds_cached
diff --git a/arch/powerpc/platforms/83xx/suspend-asm.S b/arch/powerpc/platforms/83xx/suspend-asm.S
deleted file mode 100644
index 3acd7470dc5e3fe2b74b2f65e72112865861e132..0000000000000000000000000000000000000000
--- a/arch/powerpc/platforms/83xx/suspend-asm.S
+++ /dev/null
@@ -1,550 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Enter and leave deep sleep state on MPC83xx
- *
- * Copyright (c) 2006-2008 Freescale Semiconductor, Inc.
- * Author: Scott Wood <scottwood@freescale.com>
- */
-
-#include <asm/page.h>
-#include <asm/ppc_asm.h>
-#include <asm/reg.h>
-#include <asm/asm-offsets.h>
-
-#define SS_MEMSAVE	0x00 /* First 8 bytes of RAM */
-#define SS_HID		0x08 /* 3 HIDs */
-#define SS_IABR		0x14 /* 2 IABRs */
-#define SS_IBCR		0x1c
-#define SS_DABR		0x20 /* 2 DABRs */
-#define SS_DBCR		0x28
-#define SS_SP		0x2c
-#define SS_SR		0x30 /* 16 segment registers */
-#define SS_R2		0x70
-#define SS_MSR		0x74
-#define SS_SDR1		0x78
-#define SS_LR		0x7c
-#define SS_SPRG		0x80 /* 8 SPRGs */
-#define SS_DBAT		0xa0 /* 8 DBATs */
-#define SS_IBAT		0xe0 /* 8 IBATs */
-#define SS_TB		0x120
-#define SS_CR		0x128
-#define SS_GPREG	0x12c /* r12-r31 */
-#define STATE_SAVE_SIZE 0x17c
-
-	.section .data
-	.align	5
-
-mpc83xx_sleep_save_area:
-	.space	STATE_SAVE_SIZE
-immrbase:
-	.long	0
-
-	.section .text
-	.align	5
-
-	/* r3 = physical address of IMMR */
-_GLOBAL(mpc83xx_enter_deep_sleep)
-	lis	r4, immrbase@ha
-	stw	r3, immrbase@l(r4)
-
-	/* The first 2 words of memory are used to communicate with the
-	 * bootloader, to tell it how to resume.
-	 *
-	 * The first word is the magic number 0xf5153ae5, and the second
-	 * is the pointer to mpc83xx_deep_resume.
-	 *
-	 * The original content of these two words is saved in SS_MEMSAVE.
-	 */
-
-	lis	r3, mpc83xx_sleep_save_area@h
-	ori	r3, r3, mpc83xx_sleep_save_area@l
-
-	lis	r4, KERNELBASE@h
-	lwz	r5, 0(r4)
-	lwz	r6, 4(r4)
-
-	stw	r5, SS_MEMSAVE+0(r3)
-	stw	r6, SS_MEMSAVE+4(r3)
-
-	mfspr	r5, SPRN_HID0
-	mfspr	r6, SPRN_HID1
-	mfspr	r7, SPRN_HID2
-
-	stw	r5, SS_HID+0(r3)
-	stw	r6, SS_HID+4(r3)
-	stw	r7, SS_HID+8(r3)
-
-	mfspr	r4, SPRN_IABR
-	mfspr	r5, SPRN_IABR2
-	mfspr	r6, SPRN_IBCR
-	mfspr	r7, SPRN_DABR
-	mfspr	r8, SPRN_DABR2
-	mfspr	r9, SPRN_DBCR
-
-	stw	r4, SS_IABR+0(r3)
-	stw	r5, SS_IABR+4(r3)
-	stw	r6, SS_IBCR(r3)
-	stw	r7, SS_DABR+0(r3)
-	stw	r8, SS_DABR+4(r3)
-	stw	r9, SS_DBCR(r3)
-
-	mfspr	r4, SPRN_SPRG0
-	mfspr	r5, SPRN_SPRG1
-	mfspr	r6, SPRN_SPRG2
-	mfspr	r7, SPRN_SPRG3
-	mfsdr1	r8
-
-	stw	r4, SS_SPRG+0(r3)
-	stw	r5, SS_SPRG+4(r3)
-	stw	r6, SS_SPRG+8(r3)
-	stw	r7, SS_SPRG+12(r3)
-	stw	r8, SS_SDR1(r3)
-
-	mfspr	r4, SPRN_SPRG4
-	mfspr	r5, SPRN_SPRG5
-	mfspr	r6, SPRN_SPRG6
-	mfspr	r7, SPRN_SPRG7
-
-	stw	r4, SS_SPRG+16(r3)
-	stw	r5, SS_SPRG+20(r3)
-	stw	r6, SS_SPRG+24(r3)
-	stw	r7, SS_SPRG+28(r3)
-
-	mfspr	r4, SPRN_DBAT0U
-	mfspr	r5, SPRN_DBAT0L
-	mfspr	r6, SPRN_DBAT1U
-	mfspr	r7, SPRN_DBAT1L
-
-	stw	r4, SS_DBAT+0x00(r3)
-	stw	r5, SS_DBAT+0x04(r3)
-	stw	r6, SS_DBAT+0x08(r3)
-	stw	r7, SS_DBAT+0x0c(r3)
-
-	mfspr	r4, SPRN_DBAT2U
-	mfspr	r5, SPRN_DBAT2L
-	mfspr	r6, SPRN_DBAT3U
-	mfspr	r7, SPRN_DBAT3L
-
-	stw	r4, SS_DBAT+0x10(r3)
-	stw	r5, SS_DBAT+0x14(r3)
-	stw	r6, SS_DBAT+0x18(r3)
-	stw	r7, SS_DBAT+0x1c(r3)
-
-	mfspr	r4, SPRN_DBAT4U
-	mfspr	r5, SPRN_DBAT4L
-	mfspr	r6, SPRN_DBAT5U
-	mfspr	r7, SPRN_DBAT5L
-
-	stw	r4, SS_DBAT+0x20(r3)
-	stw	r5, SS_DBAT+0x24(r3)
-	stw	r6, SS_DBAT+0x28(r3)
-	stw	r7, SS_DBAT+0x2c(r3)
-
-	mfspr	r4, SPRN_DBAT6U
-	mfspr	r5, SPRN_DBAT6L
-	mfspr	r6, SPRN_DBAT7U
-	mfspr	r7, SPRN_DBAT7L
-
-	stw	r4, SS_DBAT+0x30(r3)
-	stw	r5, SS_DBAT+0x34(r3)
-	stw	r6, SS_DBAT+0x38(r3)
-	stw	r7, SS_DBAT+0x3c(r3)
-
-	mfspr	r4, SPRN_IBAT0U
-	mfspr	r5, SPRN_IBAT0L
-	mfspr	r6, SPRN_IBAT1U
-	mfspr	r7, SPRN_IBAT1L
-
-	stw	r4, SS_IBAT+0x00(r3)
-	stw	r5, SS_IBAT+0x04(r3)
-	stw	r6, SS_IBAT+0x08(r3)
-	stw	r7, SS_IBAT+0x0c(r3)
-
-	mfspr	r4, SPRN_IBAT2U
-	mfspr	r5, SPRN_IBAT2L
-	mfspr	r6, SPRN_IBAT3U
-	mfspr	r7, SPRN_IBAT3L
-
-	stw	r4, SS_IBAT+0x10(r3)
-	stw	r5, SS_IBAT+0x14(r3)
-	stw	r6, SS_IBAT+0x18(r3)
-	stw	r7, SS_IBAT+0x1c(r3)
-
-	mfspr	r4, SPRN_IBAT4U
-	mfspr	r5, SPRN_IBAT4L
-	mfspr	r6, SPRN_IBAT5U
-	mfspr	r7, SPRN_IBAT5L
-
-	stw	r4, SS_IBAT+0x20(r3)
-	stw	r5, SS_IBAT+0x24(r3)
-	stw	r6, SS_IBAT+0x28(r3)
-	stw	r7, SS_IBAT+0x2c(r3)
-
-	mfspr	r4, SPRN_IBAT6U
-	mfspr	r5, SPRN_IBAT6L
-	mfspr	r6, SPRN_IBAT7U
-	mfspr	r7, SPRN_IBAT7L
-
-	stw	r4, SS_IBAT+0x30(r3)
-	stw	r5, SS_IBAT+0x34(r3)
-	stw	r6, SS_IBAT+0x38(r3)
-	stw	r7, SS_IBAT+0x3c(r3)
-
-	mfmsr	r4
-	mflr	r5
-	mfcr	r6
-
-	stw	r4, SS_MSR(r3)
-	stw	r5, SS_LR(r3)
-	stw	r6, SS_CR(r3)
-	stw	r1, SS_SP(r3)
-	stw	r2, SS_R2(r3)
-
-1:	mftbu	r4
-	mftb	r5
-	mftbu	r6
-	cmpw	r4, r6
-	bne	1b
-
-	stw	r4, SS_TB+0(r3)
-	stw	r5, SS_TB+4(r3)
-
-	stmw	r12, SS_GPREG(r3)
-
-	li	r4, 0
-	addi	r6, r3, SS_SR-4
-1:	mfsrin	r5, r4
-	stwu	r5, 4(r6)
-	addis	r4, r4, 0x1000
-	cmpwi	r4, 0
-	bne	1b
-
-	/* Disable machine checks and critical exceptions */
-	mfmsr	r4
-	rlwinm	r4, r4, 0, ~MSR_CE
-	rlwinm	r4, r4, 0, ~MSR_ME
-	mtmsr	r4
-	isync
-
-#define TMP_VIRT_IMMR		0xf0000000
-#define DEFAULT_IMMR_VALUE	0xff400000
-#define IMMRBAR_BASE		0x0000
-
-	lis	r4, immrbase@ha
-	lwz	r4, immrbase@l(r4)
-
-	/* Use DBAT0 to address the current IMMR space */
-
-	ori	r4, r4, 0x002a
-	mtspr	SPRN_DBAT0L, r4
-	lis	r8, TMP_VIRT_IMMR@h
-	ori	r4, r8, 0x001e	/* 1 MByte accessible from Kernel Space only */
-	mtspr	SPRN_DBAT0U, r4
-	isync
-
-	/* Use DBAT1 to address the original IMMR space */
-
-	lis	r4, DEFAULT_IMMR_VALUE@h
-	ori	r4, r4, 0x002a
-	mtspr	SPRN_DBAT1L, r4
-	lis	r9, (TMP_VIRT_IMMR + 0x01000000)@h
-	ori	r4, r9, 0x001e	/* 1 MByte accessible from Kernel Space only */
-	mtspr	SPRN_DBAT1U, r4
-	isync
-
-	/* Use DBAT2 to address the beginning of RAM.  This isn't done
-	 * using the normal virtual mapping, because with page debugging
-	 * enabled it will be read-only.
-	 */
-
-	li	r4, 0x0002
-	mtspr	SPRN_DBAT2L, r4
-	lis	r4, KERNELBASE@h
-	ori	r4, r4, 0x001e	/* 1 MByte accessible from Kernel Space only */
-	mtspr	SPRN_DBAT2U, r4
-	isync
-
-	/* Flush the cache with our BAT, as there will be TLB misses
-	 * otherwise if page debugging is enabled, and these misses
-	 * will disturb the PLRU algorithm.
-	 */
-
-	bl	__flush_disable_L1
-
-	/* Keep the i-cache enabled, so the hack below for low-boot
-	 * flash will work.
-	 */
-	mfspr	r3, SPRN_HID0
-	ori	r3, r3, HID0_ICE
-	mtspr	SPRN_HID0, r3
-	isync
-
-	lis	r6, 0xf515
-	ori	r6, r6, 0x3ae5
-
-	lis	r7, mpc83xx_deep_resume@h
-	ori	r7, r7, mpc83xx_deep_resume@l
-	tophys(r7, r7)
-
-	lis	r5, KERNELBASE@h
-	stw	r6, 0(r5)
-	stw	r7, 4(r5)
-
-	/* Reset BARs */
-
-	li	r4, 0
-	stw	r4, 0x0024(r8)
-	stw	r4, 0x002c(r8)
-	stw	r4, 0x0034(r8)
-	stw	r4, 0x003c(r8)
-	stw	r4, 0x0064(r8)
-	stw	r4, 0x006c(r8)
-
-	/* Rev 1 of the 8313 has problems with wakeup events that are
-	 * pending during the transition to deep sleep state (such as if
-	 * the PCI host sets the state to D3 and then D0 in rapid
-	 * succession).  This check shrinks the race window somewhat.
-	 *
-	 * See erratum PCI23, though the problem is not limited
-	 * to PCI.
-	 */
-
-	lwz	r3, 0x0b04(r8)
-	andi.	r3, r3, 1
-	bne-	mpc83xx_deep_resume
-
-	/* Move IMMR back to the default location, following the
-	 * procedure specified in the MPC8313 manual.
-	 */
-	lwz	r4, IMMRBAR_BASE(r8)
-	isync
-	lis	r4, DEFAULT_IMMR_VALUE@h
-	stw	r4, IMMRBAR_BASE(r8)
-	lis	r4, KERNELBASE@h
-	lwz	r4, 0(r4)
-	isync
-	lwz	r4, IMMRBAR_BASE(r9)
-	mr	r8, r9
-	isync
-
-	/* Check the Reset Configuration Word to see whether flash needs
-	 * to be mapped at a low address or a high address.
-	 */
-
-	lwz	r4, 0x0904(r8)
-	andis.	r4, r4, 0x0400
-	li	r4, 0
-	beq	boot_low
-	lis	r4, 0xff80
-boot_low:
-	stw	r4, 0x0020(r8)
-	lis	r7, 0x8000
-	ori	r7, r7, 0x0016
-
-	mfspr	r5, SPRN_HID0
-	rlwinm	r5, r5, 0, ~(HID0_DOZE | HID0_NAP)
-	oris	r5, r5, HID0_SLEEP@h
-	mtspr	SPRN_HID0, r5
-	isync
-
-	mfmsr	r5
-	oris	r5, r5, MSR_POW@h
-
-	/* Enable the flash mapping at the appropriate address.  This
-	 * mapping will override the RAM mapping if booting low, so there's
-	 * no need to disable the latter.  This must be done inside the same
-	 * cache line as setting MSR_POW, so that no instruction fetches
-	 * from RAM happen after the flash mapping is turned on.
-	 */
-
-	.align	5
-	stw	r7, 0x0024(r8)
-	sync
-	isync
-	mtmsr	r5
-	isync
-1:	b	1b
-
-mpc83xx_deep_resume:
-	lis	r4, 1f@h
-	ori	r4, r4, 1f@l
-	tophys(r4, r4)
-	mtsrr0	r4
-
-	mfmsr	r4
-	rlwinm	r4, r4, 0, ~(MSR_IR | MSR_DR)
-	mtsrr1	r4
-
-	rfi
-
-1:	tlbia
-	bl	__inval_enable_L1
-
-	lis	r3, mpc83xx_sleep_save_area@h
-	ori	r3, r3, mpc83xx_sleep_save_area@l
-	tophys(r3, r3)
-
-	lwz	r5, SS_MEMSAVE+0(r3)
-	lwz	r6, SS_MEMSAVE+4(r3)
-
-	stw	r5, 0(0)
-	stw	r6, 4(0)
-
-	lwz	r5, SS_HID+0(r3)
-	lwz	r6, SS_HID+4(r3)
-	lwz	r7, SS_HID+8(r3)
-
-	mtspr	SPRN_HID0, r5
-	mtspr	SPRN_HID1, r6
-	mtspr	SPRN_HID2, r7
-
-	lwz	r4, SS_IABR+0(r3)
-	lwz	r5, SS_IABR+4(r3)
-	lwz	r6, SS_IBCR(r3)
-	lwz	r7, SS_DABR+0(r3)
-	lwz	r8, SS_DABR+4(r3)
-	lwz	r9, SS_DBCR(r3)
-
-	mtspr	SPRN_IABR, r4
-	mtspr	SPRN_IABR2, r5
-	mtspr	SPRN_IBCR, r6
-	mtspr	SPRN_DABR, r7
-	mtspr	SPRN_DABR2, r8
-	mtspr	SPRN_DBCR, r9
-
-	li	r4, 0
-	addi	r6, r3, SS_SR-4
-1:	lwzu	r5, 4(r6)
-	mtsrin	r5, r4
-	addis	r4, r4, 0x1000
-	cmpwi	r4, 0
-	bne	1b
-
-	lwz	r4, SS_DBAT+0x00(r3)
-	lwz	r5, SS_DBAT+0x04(r3)
-	lwz	r6, SS_DBAT+0x08(r3)
-	lwz	r7, SS_DBAT+0x0c(r3)
-
-	mtspr	SPRN_DBAT0U, r4
-	mtspr	SPRN_DBAT0L, r5
-	mtspr	SPRN_DBAT1U, r6
-	mtspr	SPRN_DBAT1L, r7
-
-	lwz	r4, SS_DBAT+0x10(r3)
-	lwz	r5, SS_DBAT+0x14(r3)
-	lwz	r6, SS_DBAT+0x18(r3)
-	lwz	r7, SS_DBAT+0x1c(r3)
-
-	mtspr	SPRN_DBAT2U, r4
-	mtspr	SPRN_DBAT2L, r5
-	mtspr	SPRN_DBAT3U, r6
-	mtspr	SPRN_DBAT3L, r7
-
-	lwz	r4, SS_DBAT+0x20(r3)
-	lwz	r5, SS_DBAT+0x24(r3)
-	lwz	r6, SS_DBAT+0x28(r3)
-	lwz	r7, SS_DBAT+0x2c(r3)
-
-	mtspr	SPRN_DBAT4U, r4
-	mtspr	SPRN_DBAT4L, r5
-	mtspr	SPRN_DBAT5U, r6
-	mtspr	SPRN_DBAT5L, r7
-
-	lwz	r4, SS_DBAT+0x30(r3)
-	lwz	r5, SS_DBAT+0x34(r3)
-	lwz	r6, SS_DBAT+0x38(r3)
-	lwz	r7, SS_DBAT+0x3c(r3)
-
-	mtspr	SPRN_DBAT6U, r4
-	mtspr	SPRN_DBAT6L, r5
-	mtspr	SPRN_DBAT7U, r6
-	mtspr	SPRN_DBAT7L, r7
-
-	lwz	r4, SS_IBAT+0x00(r3)
-	lwz	r5, SS_IBAT+0x04(r3)
-	lwz	r6, SS_IBAT+0x08(r3)
-	lwz	r7, SS_IBAT+0x0c(r3)
-
-	mtspr	SPRN_IBAT0U, r4
-	mtspr	SPRN_IBAT0L, r5
-	mtspr	SPRN_IBAT1U, r6
-	mtspr	SPRN_IBAT1L, r7
-
-	lwz	r4, SS_IBAT+0x10(r3)
-	lwz	r5, SS_IBAT+0x14(r3)
-	lwz	r6, SS_IBAT+0x18(r3)
-	lwz	r7, SS_IBAT+0x1c(r3)
-
-	mtspr	SPRN_IBAT2U, r4
-	mtspr	SPRN_IBAT2L, r5
-	mtspr	SPRN_IBAT3U, r6
-	mtspr	SPRN_IBAT3L, r7
-
-	lwz	r4, SS_IBAT+0x20(r3)
-	lwz	r5, SS_IBAT+0x24(r3)
-	lwz	r6, SS_IBAT+0x28(r3)
-	lwz	r7, SS_IBAT+0x2c(r3)
-
-	mtspr	SPRN_IBAT4U, r4
-	mtspr	SPRN_IBAT4L, r5
-	mtspr	SPRN_IBAT5U, r6
-	mtspr	SPRN_IBAT5L, r7
-
-	lwz	r4, SS_IBAT+0x30(r3)
-	lwz	r5, SS_IBAT+0x34(r3)
-	lwz	r6, SS_IBAT+0x38(r3)
-	lwz	r7, SS_IBAT+0x3c(r3)
-
-	mtspr	SPRN_IBAT6U, r4
-	mtspr	SPRN_IBAT6L, r5
-	mtspr	SPRN_IBAT7U, r6
-	mtspr	SPRN_IBAT7L, r7
-
-	lwz	r4, SS_SPRG+16(r3)
-	lwz	r5, SS_SPRG+20(r3)
-	lwz	r6, SS_SPRG+24(r3)
-	lwz	r7, SS_SPRG+28(r3)
-
-	mtspr	SPRN_SPRG4, r4
-	mtspr	SPRN_SPRG5, r5
-	mtspr	SPRN_SPRG6, r6
-	mtspr	SPRN_SPRG7, r7
-
-	lwz	r4, SS_SPRG+0(r3)
-	lwz	r5, SS_SPRG+4(r3)
-	lwz	r6, SS_SPRG+8(r3)
-	lwz	r7, SS_SPRG+12(r3)
-	lwz	r8, SS_SDR1(r3)
-
-	mtspr	SPRN_SPRG0, r4
-	mtspr	SPRN_SPRG1, r5
-	mtspr	SPRN_SPRG2, r6
-	mtspr	SPRN_SPRG3, r7
-	mtsdr1	r8
-
-	lwz	r4, SS_MSR(r3)
-	lwz	r5, SS_LR(r3)
-	lwz	r6, SS_CR(r3)
-	lwz	r1, SS_SP(r3)
-	lwz	r2, SS_R2(r3)
-
-	mtsrr1	r4
-	mtsrr0	r5
-	mtcr	r6
-
-	li	r4, 0
-	mtspr	SPRN_TBWL, r4
-
-	lwz	r4, SS_TB+0(r3)
-	lwz	r5, SS_TB+4(r3)
-
-	mtspr	SPRN_TBWU, r4
-	mtspr	SPRN_TBWL, r5
-
-	lmw	r12, SS_GPREG(r3)
-
-	/* Kick decrementer */
-	li	r0, 1
-	mtdec	r0
-
-	rfi
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S
deleted file mode 100644
index 6d799f84763a63f4be19922afc11b6e0a98fffc7..0000000000000000000000000000000000000000
--- a/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * crt0_r.S: Entry function for SPU-side context restore.
- *
- * Copyright (C) 2005 IBM
- *
- * Entry and exit function for SPU-side of the context restore
- * sequence.  Sets up an initial stack frame, then branches to
- * 'main'.  On return, restores all 128 registers from the LSCSA
- * and exits.
- */
-
-#include <asm/spu_csa.h>
-
-.data
-.align 7
-.globl regs_spill
-regs_spill:
-.space SIZEOF_SPU_SPILL_REGS, 0x0
-
-.text
-.global _start
-_start:
-	/* Initialize the stack pointer to point to 16368
-	 * (16kb-16). The back chain pointer is initialized
-	 * to NULL.
-	 */
-	il      $0, 0
-	il      $SP, 16368
-	stqd    $0, 0($SP)
-
-	/* Allocate a minimum stack frame for the called main.
-	 * This is needed so that main has a place to save the
-	 * link register when it calls another function.
-	 */
-	stqd    $SP, -160($SP)
-	ai      $SP, $SP, -160
-
-	/* Call the program's main function. */
-	brsl    $0, main
-
-.global exit
-.global	_exit
-exit:
-_exit:
-	/* SPU Context Restore, Step 5: Restore the remaining 112 GPRs. */
-	ila     $3, regs_spill + 256
-restore_regs:
-	lqr     $4, restore_reg_insts
-restore_reg_loop:
-	ai      $4, $4, 4
-	.balignl 16, 0x40200000
-restore_reg_insts:       /* must be quad-word aligned. */
-	lqd     $16, 0($3)
-	lqd     $17, 16($3)
-	lqd     $18, 32($3)
-	lqd     $19, 48($3)
-	andi    $5, $4, 0x7F
-	stqr    $4, restore_reg_insts
-	ai      $3, $3, 64
-	brnz    $5, restore_reg_loop
-
-	/* SPU Context Restore Step 17: Restore the first 16 GPRs. */
-	lqa $0, regs_spill + 0
-	lqa $1, regs_spill + 16
-	lqa $2, regs_spill + 32
-	lqa $3, regs_spill + 48
-	lqa $4, regs_spill + 64
-	lqa $5, regs_spill + 80
-	lqa $6, regs_spill + 96
-	lqa $7, regs_spill + 112
-	lqa $8, regs_spill + 128
-	lqa $9, regs_spill + 144
-	lqa $10, regs_spill + 160
-	lqa $11, regs_spill + 176
-	lqa $12, regs_spill + 192
-	lqa $13, regs_spill + 208
-	lqa $14, regs_spill + 224
-	lqa $15, regs_spill + 240
-
-	/* Under normal circumstances, the 'exit' function
-	 * terminates with 'stop SPU_RESTORE_COMPLETE',
-	 * indicating that the SPU-side restore code has
-	 * completed.
-	 *
-	 * However it is possible that instructions immediately
-	 * following the 'stop 0x3ffc' have been modified at run
-	 * time so as to recreate the exact SPU_Status settings
-	 * from the application, e.g. illegal instruciton, halt,
-	 * etc.
-	 */
-.global exit_fini
-.global	_exit_fini
-exit_fini:
-_exit_fini:
-	stop	SPU_RESTORE_COMPLETE
-	stop	0
-	stop	0
-	stop	0
-
-	/* Pad the size of this crt0.o to be multiple of 16 bytes. */
-.balignl 16, 0x0
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S
deleted file mode 100644
index 5ce32efdca1df64b86108850d8d5151644f980a8..0000000000000000000000000000000000000000
--- a/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S
+++ /dev/null
@@ -1,88 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * crt0_s.S: Entry function for SPU-side context save.
- *
- * Copyright (C) 2005 IBM
- *
- * Entry function for SPU-side of the context save sequence.
- * Saves all 128 GPRs, sets up an initial stack frame, then
- * branches to 'main'.
- */
-
-#include <asm/spu_csa.h>
-
-.data
-.align 7
-.globl regs_spill
-regs_spill:
-.space SIZEOF_SPU_SPILL_REGS, 0x0
-
-.text
-.global _start
-_start:
-	/* SPU Context Save Step 1: Save the first 16 GPRs. */
-	stqa $0, regs_spill + 0
-	stqa $1, regs_spill + 16
-	stqa $2, regs_spill + 32
-	stqa $3, regs_spill + 48
-	stqa $4, regs_spill + 64
-	stqa $5, regs_spill + 80
-	stqa $6, regs_spill + 96
-	stqa $7, regs_spill + 112
-	stqa $8, regs_spill + 128
-	stqa $9, regs_spill + 144
-	stqa $10, regs_spill + 160
-	stqa $11, regs_spill + 176
-	stqa $12, regs_spill + 192
-	stqa $13, regs_spill + 208
-	stqa $14, regs_spill + 224
-	stqa $15, regs_spill + 240
-
-	/* SPU Context Save, Step 8: Save the remaining 112 GPRs. */
-	ila     $3, regs_spill + 256
-save_regs:
-	lqr     $4, save_reg_insts
-save_reg_loop:
-	ai      $4, $4, 4
-	.balignl 16, 0x40200000
-save_reg_insts:       /* must be quad-word aligned. */
-	stqd    $16, 0($3)
-	stqd    $17, 16($3)
-	stqd    $18, 32($3)
-	stqd    $19, 48($3)
-	andi    $5, $4, 0x7F
-	stqr    $4, save_reg_insts
-	ai      $3, $3, 64
-	brnz    $5, save_reg_loop
-
-	/* Initialize the stack pointer to point to 16368
-	 * (16kb-16). The back chain pointer is initialized
-	 * to NULL.
-	 */
-	il	$0, 0
-	il	$SP, 16368
-	stqd	$0, 0($SP)
-
-	/* Allocate a minimum stack frame for the called main.
-	 * This is needed so that main has a place to save the
-	 * link register when it calls another function.
-	 */
-	stqd	$SP, -160($SP)
-	ai	$SP, $SP, -160
-
-	/* Call the program's main function. */
-	brsl	$0, main
-
-	/* In this case main should not return; if it does
-	 * there has been an error in the sequence.  Execute
-	 * stop-and-signal with code=0.
-	 */
-.global exit
-.global	_exit
-exit:
-_exit:
-	stop	0x0
-
-	/* Pad the size of this crt0.o to be multiple of 16 bytes. */
-.balignl 16, 0x0
-
diff --git a/arch/powerpc/platforms/pasemi/powersave.S b/arch/powerpc/platforms/pasemi/powersave.S
deleted file mode 100644
index d0215d5329ca70f91af48b71737dbbb149902c83..0000000000000000000000000000000000000000
--- a/arch/powerpc/platforms/pasemi/powersave.S
+++ /dev/null
@@ -1,76 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2006-2007 PA Semi, Inc
- *
- * Maintained by: Olof Johansson <olof@lixom.net>
- */
-
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/ppc_asm.h>
-#include <asm/cputable.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-
-/* Power savings opcodes since not all binutils have them at this time */
-#define DOZE	.long	0x4c000324
-#define NAP	.long	0x4c000364
-#define SLEEP	.long	0x4c0003a4
-#define RVW	.long	0x4c0003e4
-
-/* Common sequence to do before going to any of the
- * powersavings modes.
- */
-
-#define PRE_SLEEP_SEQUENCE	\
-	std	r3,8(r1);	\
-	ptesync	;		\
-	ld	r3,8(r1);	\
-1:	cmpd 	r3,r3;		\
-	bne	1b
-
-_doze:
-	PRE_SLEEP_SEQUENCE
-	DOZE
-	b	.
-
-
-_GLOBAL(idle_spin)
-	blr
-
-_GLOBAL(idle_doze)
-	LOAD_REG_ADDR(r3, _doze)
-	b	sleep_common
-
-/* Add more modes here later */
-
-sleep_common:
-	mflr	r0
-	std	r0, 16(r1)
-	stdu	r1,-64(r1)
-#ifdef CONFIG_PPC_PASEMI_CPUFREQ
-	std	r3, 48(r1)
-
-	/* Only do power savings when in astate 0 */
-	bl	check_astate
-	cmpwi	r3,0
-	bne	1f
-
-	ld	r3, 48(r1)
-#endif
-	LOAD_REG_IMMEDIATE(r6,MSR_DR|MSR_IR|MSR_ME|MSR_EE)
-	mfmsr	r4
-	andc	r5,r4,r6
-	mtmsrd	r5,0
-
-	mtctr	r3
-	bctrl
-
-	mtmsrd	r4,0
-
-1:	addi	r1,r1,64
-	ld	r0,16(r1)
-	mtlr	r0
-	blr
-
diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S
deleted file mode 100644
index da69e0fcb4f1505721798bd69877ed2d5abc85e2..0000000000000000000000000000000000000000
--- a/arch/powerpc/platforms/powermac/cache.S
+++ /dev/null
@@ -1,354 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains low-level cache management functions
- * used for sleep and CPU speed changes on Apple machines.
- * (In fact the only thing that is Apple-specific is that we assume
- * that we can read from ROM at physical address 0xfff00000.)
- *
- *    Copyright (C) 2004 Paul Mackerras (paulus@samba.org) and
- *                       Benjamin Herrenschmidt (benh@kernel.crashing.org)
- */
-
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/cputable.h>
-#include <asm/feature-fixups.h>
-
-/*
- * Flush and disable all data caches (dL1, L2, L3). This is used
- * when going to sleep, when doing a PMU based cpufreq transition,
- * or when "offlining" a CPU on SMP machines. This code is over
- * paranoid, but I've had enough issues with various CPU revs and
- * bugs that I decided it was worth being over cautious
- */
-
-_GLOBAL(flush_disable_caches)
-#ifndef CONFIG_PPC_BOOK3S_32
-	blr
-#else
-BEGIN_FTR_SECTION
-	b	flush_disable_745x
-END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
-BEGIN_FTR_SECTION
-	b	flush_disable_75x
-END_FTR_SECTION_IFSET(CPU_FTR_L2CR)
-	b	__flush_disable_L1
-
-/* This is the code for G3 and 74[01]0 */
-flush_disable_75x:
-	mflr	r10
-
-	/* Turn off EE and DR in MSR */
-	mfmsr	r11
-	rlwinm	r0,r11,0,~MSR_EE
-	rlwinm	r0,r0,0,~MSR_DR
-	sync
-	mtmsr	r0
-	isync
-
-	/* Stop DST streams */
-BEGIN_FTR_SECTION
-	DSSALL
-	sync
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-
-	/* Stop DPM */
-	mfspr	r8,SPRN_HID0		/* Save SPRN_HID0 in r8 */
-	rlwinm	r4,r8,0,12,10		/* Turn off HID0[DPM] */
-	sync
-	mtspr	SPRN_HID0,r4		/* Disable DPM */
-	sync
-
-	/* Disp-flush L1. We have a weird problem here that I never
-	 * totally figured out. On 750FX, using the ROM for the flush
-	 * results in a non-working flush. We use that workaround for
-	 * now until I finally understand what's going on. --BenH
-	 */
-
-	/* ROM base by default */
-	lis	r4,0xfff0
-	mfpvr	r3
-	srwi	r3,r3,16
-	cmplwi	cr0,r3,0x7000
-	bne+	1f
-	/* RAM base on 750FX */
-	li	r4,0
-1:	li	r4,0x4000
-	mtctr	r4
-1:	lwz	r0,0(r4)
-	addi	r4,r4,32
-	bdnz	1b
-	sync
-	isync
-
-	/* Disable / invalidate / enable L1 data */
-	mfspr	r3,SPRN_HID0
-	rlwinm	r3,r3,0,~(HID0_DCE | HID0_ICE)
-	mtspr	SPRN_HID0,r3
-	sync
-	isync
-	ori	r3,r3,(HID0_DCE|HID0_DCI|HID0_ICE|HID0_ICFI)
-	sync
-	isync
-	mtspr	SPRN_HID0,r3
-	xori	r3,r3,(HID0_DCI|HID0_ICFI)
-	mtspr	SPRN_HID0,r3
-	sync
-
-	/* Get the current enable bit of the L2CR into r4 */
-	mfspr	r5,SPRN_L2CR
-	/* Set to data-only (pre-745x bit) */
-	oris	r3,r5,L2CR_L2DO@h
-	b	2f
-	/* When disabling L2, code must be in L1 */
-	.balign 32
-1:	mtspr	SPRN_L2CR,r3
-3:	sync
-	isync
-	b	1f
-2:	b	3f
-3:	sync
-	isync
-	b	1b
-1:	/* disp-flush L2. The interesting thing here is that the L2 can be
-	 * up to 2Mb ... so using the ROM, we'll end up wrapping back to memory
-	 * but that is probbaly fine. We disp-flush over 4Mb to be safe
-	 */
-	lis	r4,2
-	mtctr	r4
-	lis	r4,0xfff0
-1:	lwz	r0,0(r4)
-	addi	r4,r4,32
-	bdnz	1b
-	sync
-	isync
-	lis	r4,2
-	mtctr	r4
-	lis	r4,0xfff0
-1:	dcbf	0,r4
-	addi	r4,r4,32
-	bdnz	1b
-	sync
-	isync
-
-	/* now disable L2 */
-	rlwinm	r5,r5,0,~L2CR_L2E
-	b	2f
-	/* When disabling L2, code must be in L1 */
-	.balign 32
-1:	mtspr	SPRN_L2CR,r5
-3:	sync
-	isync
-	b	1f
-2:	b	3f
-3:	sync
-	isync
-	b	1b
-1:	sync
-	isync
-	/* Invalidate L2. This is pre-745x, we clear the L2I bit ourselves */
-	oris	r4,r5,L2CR_L2I@h
-	mtspr	SPRN_L2CR,r4
-	sync
-	isync
-
-	/* Wait for the invalidation to complete */
-1:	mfspr	r3,SPRN_L2CR
-	rlwinm.	r0,r3,0,31,31
-	bne	1b
-
-	/* Clear L2I */
-	xoris	r4,r4,L2CR_L2I@h
-	sync
-	mtspr	SPRN_L2CR,r4
-	sync
-
-	/* now disable the L1 data cache */
-	mfspr	r0,SPRN_HID0
-	rlwinm	r0,r0,0,~(HID0_DCE|HID0_ICE)
-	mtspr	SPRN_HID0,r0
-	sync
-	isync
-
-	/* Restore HID0[DPM] to whatever it was before */
-	sync
-	mfspr	r0,SPRN_HID0
-	rlwimi	r0,r8,0,11,11		/* Turn back HID0[DPM] */
-	mtspr	SPRN_HID0,r0
-	sync
-
-	/* restore DR and EE */
-	sync
-	mtmsr	r11
-	isync
-
-	mtlr	r10
-	blr
-
-/* This code is for 745x processors */
-flush_disable_745x:
-	/* Turn off EE and DR in MSR */
-	mfmsr	r11
-	rlwinm	r0,r11,0,~MSR_EE
-	rlwinm	r0,r0,0,~MSR_DR
-	sync
-	mtmsr	r0
-	isync
-
-	/* Stop prefetch streams */
-	DSSALL
-	sync
-
-	/* Disable L2 prefetching */
-	mfspr	r0,SPRN_MSSCR0
-	rlwinm	r0,r0,0,0,29
-	mtspr	SPRN_MSSCR0,r0
-	sync
-	isync
-	lis	r4,0
-	dcbf	0,r4
-	dcbf	0,r4
-	dcbf	0,r4
-	dcbf	0,r4
-	dcbf	0,r4
-	dcbf	0,r4
-	dcbf	0,r4
-	dcbf	0,r4
-
-	/* Due to a bug with the HW flush on some CPU revs, we occasionally
-	 * experience data corruption. I'm adding a displacement flush along
-	 * with a dcbf loop over a few Mb to "help". The problem isn't totally
-	 * fixed by this in theory, but at least, in practice, I couldn't reproduce
-	 * it even with a big hammer...
-	 */
-
-        lis     r4,0x0002
-        mtctr   r4
- 	li      r4,0
-1:
-        lwz     r0,0(r4)
-        addi    r4,r4,32                /* Go to start of next cache line */
-        bdnz    1b
-        isync
-
-        /* Now, flush the first 4MB of memory */
-        lis     r4,0x0002
-        mtctr   r4
-	li      r4,0
-        sync
-1:
-        dcbf    0,r4
-        addi    r4,r4,32                /* Go to start of next cache line */
-        bdnz    1b
-
-	/* Flush and disable the L1 data cache */
-	mfspr	r6,SPRN_LDSTCR
-	lis	r3,0xfff0	/* read from ROM for displacement flush */
-	li	r4,0xfe		/* start with only way 0 unlocked */
-	li	r5,128		/* 128 lines in each way */
-1:	mtctr	r5
-	rlwimi	r6,r4,0,24,31
-	mtspr	SPRN_LDSTCR,r6
-	sync
-	isync
-2:	lwz	r0,0(r3)	/* touch each cache line */
-	addi	r3,r3,32
-	bdnz	2b
-	rlwinm	r4,r4,1,24,30	/* move on to the next way */
-	ori	r4,r4,1
-	cmpwi	r4,0xff		/* all done? */
-	bne	1b
-	/* now unlock the L1 data cache */
-	li	r4,0
-	rlwimi	r6,r4,0,24,31
-	sync
-	mtspr	SPRN_LDSTCR,r6
-	sync
-	isync
-
-	/* Flush the L2 cache using the hardware assist */
-	mfspr	r3,SPRN_L2CR
-	cmpwi	r3,0		/* check if it is enabled first */
-	bge	4f
-	oris	r0,r3,(L2CR_L2IO_745x|L2CR_L2DO_745x)@h
-	b	2f
-	/* When disabling/locking L2, code must be in L1 */
-	.balign 32
-1:	mtspr	SPRN_L2CR,r0	/* lock the L2 cache */
-3:	sync
-	isync
-	b	1f
-2:	b	3f
-3:	sync
-	isync
-	b	1b
-1:	sync
-	isync
-	ori	r0,r3,L2CR_L2HWF_745x
-	sync
-	mtspr	SPRN_L2CR,r0	/* set the hardware flush bit */
-3:	mfspr	r0,SPRN_L2CR	/* wait for it to go to 0 */
-	andi.	r0,r0,L2CR_L2HWF_745x
-	bne	3b
-	sync
-	rlwinm	r3,r3,0,~L2CR_L2E
-	b	2f
-	/* When disabling L2, code must be in L1 */
-	.balign 32
-1:	mtspr	SPRN_L2CR,r3	/* disable the L2 cache */
-3:	sync
-	isync
-	b	1f
-2:	b	3f
-3:	sync
-	isync
-	b	1b
-1:	sync
-	isync
-	oris	r4,r3,L2CR_L2I@h
-	mtspr	SPRN_L2CR,r4
-	sync
-	isync
-1:	mfspr	r4,SPRN_L2CR
-	andis.	r0,r4,L2CR_L2I@h
-	bne	1b
-	sync
-
-BEGIN_FTR_SECTION
-	/* Flush the L3 cache using the hardware assist */
-4:	mfspr	r3,SPRN_L3CR
-	cmpwi	r3,0		/* check if it is enabled */
-	bge	6f
-	oris	r0,r3,L3CR_L3IO@h
-	ori	r0,r0,L3CR_L3DO
-	sync
-	mtspr	SPRN_L3CR,r0	/* lock the L3 cache */
-	sync
-	isync
-	ori	r0,r0,L3CR_L3HWF
-	sync
-	mtspr	SPRN_L3CR,r0	/* set the hardware flush bit */
-5:	mfspr	r0,SPRN_L3CR	/* wait for it to go to zero */
-	andi.	r0,r0,L3CR_L3HWF
-	bne	5b
-	rlwinm	r3,r3,0,~L3CR_L3E
-	sync
-	mtspr	SPRN_L3CR,r3	/* disable the L3 cache */
-	sync
-	ori	r4,r3,L3CR_L3I
-	mtspr	SPRN_L3CR,r4
-1:	mfspr	r4,SPRN_L3CR
-	andi.	r0,r4,L3CR_L3I
-	bne	1b
-	sync
-END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
-
-6:	mfspr	r0,SPRN_HID0	/* now disable the L1 data cache */
-	rlwinm	r0,r0,0,~HID0_DCE
-	mtspr	SPRN_HID0,r0
-	sync
-	isync
-	mtmsr	r11		/* restore DR and EE */
-	isync
-	blr
-#endif	/* CONFIG_PPC_BOOK3S_32 */
diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S
deleted file mode 100644
index 935dcac4c02d39dcc434d1c920edae20d8413c63..0000000000000000000000000000000000000000
--- a/arch/powerpc/platforms/powermac/sleep.S
+++ /dev/null
@@ -1,444 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains sleep low-level functions for PowerBook G3.
- *    Copyright (C) 1999 Benjamin Herrenschmidt (benh@kernel.crashing.org)
- *    and Paul Mackerras (paulus@samba.org).
- */
-
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/ppc_asm.h>
-#include <asm/cputable.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/mmu.h>
-#include <asm/feature-fixups.h>
-
-#define MAGIC	0x4c617273	/* 'Lars' */
-
-/*
- * Structure for storing CPU registers on the stack.
- */
-#define SL_SP		0
-#define SL_PC		4
-#define SL_MSR		8
-#define SL_SDR1		0xc
-#define SL_SPRG0	0x10	/* 4 sprg's */
-#define SL_DBAT0	0x20
-#define SL_IBAT0	0x28
-#define SL_DBAT1	0x30
-#define SL_IBAT1	0x38
-#define SL_DBAT2	0x40
-#define SL_IBAT2	0x48
-#define SL_DBAT3	0x50
-#define SL_IBAT3	0x58
-#define SL_DBAT4	0x60
-#define SL_IBAT4	0x68
-#define SL_DBAT5	0x70
-#define SL_IBAT5	0x78
-#define SL_DBAT6	0x80
-#define SL_IBAT6	0x88
-#define SL_DBAT7	0x90
-#define SL_IBAT7	0x98
-#define SL_TB		0xa0
-#define SL_R2		0xa8
-#define SL_CR		0xac
-#define SL_R12		0xb0	/* r12 to r31 */
-#define SL_SIZE		(SL_R12 + 80)
-
-	.section .text
-	.align	5
-
-#if defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ_PMAC) || \
-    (defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC32))
-
-/* This gets called by via-pmu.c late during the sleep process.
- * The PMU was already send the sleep command and will shut us down
- * soon. We need to save all that is needed and setup the wakeup
- * vector that will be called by the ROM on wakeup
- */
-_GLOBAL(low_sleep_handler)
-#ifndef CONFIG_PPC_BOOK3S_32
-	blr
-#else
-	mflr	r0
-	stw	r0,4(r1)
-	stwu	r1,-SL_SIZE(r1)
-	mfcr	r0
-	stw	r0,SL_CR(r1)
-	stw	r2,SL_R2(r1)
-	stmw	r12,SL_R12(r1)
-
-	/* Save MSR & SDR1 */
-	mfmsr	r4
-	stw	r4,SL_MSR(r1)
-	mfsdr1	r4
-	stw	r4,SL_SDR1(r1)
-
-	/* Get a stable timebase and save it */
-1:	mftbu	r4
-	stw	r4,SL_TB(r1)
-	mftb	r5
-	stw	r5,SL_TB+4(r1)
-	mftbu	r3
-	cmpw	r3,r4
-	bne	1b
-
-	/* Save SPRGs */
-	mfsprg	r4,0
-	stw	r4,SL_SPRG0(r1)
-	mfsprg	r4,1
-	stw	r4,SL_SPRG0+4(r1)
-	mfsprg	r4,2
-	stw	r4,SL_SPRG0+8(r1)
-	mfsprg	r4,3
-	stw	r4,SL_SPRG0+12(r1)
-
-	/* Save BATs */
-	mfdbatu	r4,0
-	stw	r4,SL_DBAT0(r1)
-	mfdbatl	r4,0
-	stw	r4,SL_DBAT0+4(r1)
-	mfdbatu	r4,1
-	stw	r4,SL_DBAT1(r1)
-	mfdbatl	r4,1
-	stw	r4,SL_DBAT1+4(r1)
-	mfdbatu	r4,2
-	stw	r4,SL_DBAT2(r1)
-	mfdbatl	r4,2
-	stw	r4,SL_DBAT2+4(r1)
-	mfdbatu	r4,3
-	stw	r4,SL_DBAT3(r1)
-	mfdbatl	r4,3
-	stw	r4,SL_DBAT3+4(r1)
-	mfibatu	r4,0
-	stw	r4,SL_IBAT0(r1)
-	mfibatl	r4,0
-	stw	r4,SL_IBAT0+4(r1)
-	mfibatu	r4,1
-	stw	r4,SL_IBAT1(r1)
-	mfibatl	r4,1
-	stw	r4,SL_IBAT1+4(r1)
-	mfibatu	r4,2
-	stw	r4,SL_IBAT2(r1)
-	mfibatl	r4,2
-	stw	r4,SL_IBAT2+4(r1)
-	mfibatu	r4,3
-	stw	r4,SL_IBAT3(r1)
-	mfibatl	r4,3
-	stw	r4,SL_IBAT3+4(r1)
-
-BEGIN_MMU_FTR_SECTION
-	mfspr	r4,SPRN_DBAT4U
-	stw	r4,SL_DBAT4(r1)
-	mfspr	r4,SPRN_DBAT4L
-	stw	r4,SL_DBAT4+4(r1)
-	mfspr	r4,SPRN_DBAT5U
-	stw	r4,SL_DBAT5(r1)
-	mfspr	r4,SPRN_DBAT5L
-	stw	r4,SL_DBAT5+4(r1)
-	mfspr	r4,SPRN_DBAT6U
-	stw	r4,SL_DBAT6(r1)
-	mfspr	r4,SPRN_DBAT6L
-	stw	r4,SL_DBAT6+4(r1)
-	mfspr	r4,SPRN_DBAT7U
-	stw	r4,SL_DBAT7(r1)
-	mfspr	r4,SPRN_DBAT7L
-	stw	r4,SL_DBAT7+4(r1)
-	mfspr	r4,SPRN_IBAT4U
-	stw	r4,SL_IBAT4(r1)
-	mfspr	r4,SPRN_IBAT4L
-	stw	r4,SL_IBAT4+4(r1)
-	mfspr	r4,SPRN_IBAT5U
-	stw	r4,SL_IBAT5(r1)
-	mfspr	r4,SPRN_IBAT5L
-	stw	r4,SL_IBAT5+4(r1)
-	mfspr	r4,SPRN_IBAT6U
-	stw	r4,SL_IBAT6(r1)
-	mfspr	r4,SPRN_IBAT6L
-	stw	r4,SL_IBAT6+4(r1)
-	mfspr	r4,SPRN_IBAT7U
-	stw	r4,SL_IBAT7(r1)
-	mfspr	r4,SPRN_IBAT7L
-	stw	r4,SL_IBAT7+4(r1)
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
-
-	/* Backup various CPU config stuffs */
-	bl	__save_cpu_setup
-
-	/* The ROM can wake us up via 2 different vectors:
-	 *  - On wallstreet & lombard, we must write a magic
-	 *    value 'Lars' at address 4 and a pointer to a
-	 *    memory location containing the PC to resume from
-	 *    at address 0.
-	 *  - On Core99, we must store the wakeup vector at
-	 *    address 0x80 and eventually it's parameters
-	 *    at address 0x84. I've have some trouble with those
-	 *    parameters however and I no longer use them.
-	 */
-	lis	r5,grackle_wake_up@ha
-	addi	r5,r5,grackle_wake_up@l
-	tophys(r5,r5)
-	stw	r5,SL_PC(r1)
-	lis	r4,KERNELBASE@h
-	tophys(r5,r1)
-	addi	r5,r5,SL_PC
-	lis	r6,MAGIC@ha
-	addi	r6,r6,MAGIC@l
-	stw	r5,0(r4)
-	stw	r6,4(r4)
-	/* Setup stuffs at 0x80-0x84 for Core99 */
-	lis	r3,core99_wake_up@ha
-	addi	r3,r3,core99_wake_up@l
-	tophys(r3,r3)
-	stw	r3,0x80(r4)
-	stw	r5,0x84(r4)
-	/* Store a pointer to our backup storage into
-	 * a kernel global
-	 */
-	lis r3,sleep_storage@ha
-	addi r3,r3,sleep_storage@l
-	stw r5,0(r3)
-
-	.globl	low_cpu_die
-low_cpu_die:
-	/* Flush & disable all caches */
-	bl	flush_disable_caches
-
-	/* Turn off data relocation. */
-	mfmsr	r3		/* Save MSR in r7 */
-	rlwinm	r3,r3,0,28,26	/* Turn off DR bit */
-	sync
-	mtmsr	r3
-	isync
-
-BEGIN_FTR_SECTION
-	/* Flush any pending L2 data prefetches to work around HW bug */
-	sync
-	lis	r3,0xfff0
-	lwz	r0,0(r3)	/* perform cache-inhibited load to ROM */
-	sync			/* (caches are disabled at this point) */
-END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
-
-/*
- * Set the HID0 and MSR for sleep.
- */
-	mfspr	r2,SPRN_HID0
-	rlwinm	r2,r2,0,10,7	/* clear doze, nap */
-	oris	r2,r2,HID0_SLEEP@h
-	sync
-	isync
-	mtspr	SPRN_HID0,r2
-	sync
-
-/* This loop puts us back to sleep in case we have a spurrious
- * wakeup so that the host bridge properly stays asleep. The
- * CPU will be turned off, either after a known time (about 1
- * second) on wallstreet & lombard, or as soon as the CPU enters
- * SLEEP mode on core99
- */
-	mfmsr	r2
-	oris	r2,r2,MSR_POW@h
-1:	sync
-	mtmsr	r2
-	isync
-	b	1b
-
-/*
- * Here is the resume code.
- */
-
-
-/*
- * Core99 machines resume here
- * r4 has the physical address of SL_PC(sp) (unused)
- */
-_GLOBAL(core99_wake_up)
-	/* Make sure HID0 no longer contains any sleep bit and that data cache
-	 * is disabled
-	 */
-	mfspr	r3,SPRN_HID0
-	rlwinm	r3,r3,0,11,7		/* clear SLEEP, NAP, DOZE bits */
-	rlwinm	3,r3,0,18,15		/* clear DCE, ICE */
-	mtspr	SPRN_HID0,r3
-	sync
-	isync
-
-	/* sanitize MSR */
-	mfmsr	r3
-	ori	r3,r3,MSR_EE|MSR_IP
-	xori	r3,r3,MSR_EE|MSR_IP
-	sync
-	isync
-	mtmsr	r3
-	sync
-	isync
-
-	/* Recover sleep storage */
-	lis	r3,sleep_storage@ha
-	addi	r3,r3,sleep_storage@l
-	tophys(r3,r3)
-	lwz	r1,0(r3)
-
-	/* Pass thru to older resume code ... */
-/*
- * Here is the resume code for older machines.
- * r1 has the physical address of SL_PC(sp).
- */
-
-grackle_wake_up:
-
-	/* Restore the kernel's segment registers before
-	 * we do any r1 memory access as we are not sure they
-	 * are in a sane state above the first 256Mb region
-	 */
-	bl	load_segment_registers
-	sync
-	isync
-
-	subi	r1,r1,SL_PC
-
-	/* Restore various CPU config stuffs */
-	bl	__restore_cpu_setup
-
-	/* Make sure all FPRs have been initialized */
-	bl	reloc_offset
-	bl	__init_fpu_registers
-
-	/* Invalidate & enable L1 cache, we don't care about
-	 * whatever the ROM may have tried to write to memory
-	 */
-	bl	__inval_enable_L1
-
-	/* Restore the BATs, and SDR1.  Then we can turn on the MMU. */
-	lwz	r4,SL_SDR1(r1)
-	mtsdr1	r4
-	lwz	r4,SL_SPRG0(r1)
-	mtsprg	0,r4
-	lwz	r4,SL_SPRG0+4(r1)
-	mtsprg	1,r4
-	lwz	r4,SL_SPRG0+8(r1)
-	mtsprg	2,r4
-	lwz	r4,SL_SPRG0+12(r1)
-	mtsprg	3,r4
-
-	lwz	r4,SL_DBAT0(r1)
-	mtdbatu	0,r4
-	lwz	r4,SL_DBAT0+4(r1)
-	mtdbatl	0,r4
-	lwz	r4,SL_DBAT1(r1)
-	mtdbatu	1,r4
-	lwz	r4,SL_DBAT1+4(r1)
-	mtdbatl	1,r4
-	lwz	r4,SL_DBAT2(r1)
-	mtdbatu	2,r4
-	lwz	r4,SL_DBAT2+4(r1)
-	mtdbatl	2,r4
-	lwz	r4,SL_DBAT3(r1)
-	mtdbatu	3,r4
-	lwz	r4,SL_DBAT3+4(r1)
-	mtdbatl	3,r4
-	lwz	r4,SL_IBAT0(r1)
-	mtibatu	0,r4
-	lwz	r4,SL_IBAT0+4(r1)
-	mtibatl	0,r4
-	lwz	r4,SL_IBAT1(r1)
-	mtibatu	1,r4
-	lwz	r4,SL_IBAT1+4(r1)
-	mtibatl	1,r4
-	lwz	r4,SL_IBAT2(r1)
-	mtibatu	2,r4
-	lwz	r4,SL_IBAT2+4(r1)
-	mtibatl	2,r4
-	lwz	r4,SL_IBAT3(r1)
-	mtibatu	3,r4
-	lwz	r4,SL_IBAT3+4(r1)
-	mtibatl	3,r4
-
-BEGIN_MMU_FTR_SECTION
-	lwz	r4,SL_DBAT4(r1)
-	mtspr	SPRN_DBAT4U,r4
-	lwz	r4,SL_DBAT4+4(r1)
-	mtspr	SPRN_DBAT4L,r4
-	lwz	r4,SL_DBAT5(r1)
-	mtspr	SPRN_DBAT5U,r4
-	lwz	r4,SL_DBAT5+4(r1)
-	mtspr	SPRN_DBAT5L,r4
-	lwz	r4,SL_DBAT6(r1)
-	mtspr	SPRN_DBAT6U,r4
-	lwz	r4,SL_DBAT6+4(r1)
-	mtspr	SPRN_DBAT6L,r4
-	lwz	r4,SL_DBAT7(r1)
-	mtspr	SPRN_DBAT7U,r4
-	lwz	r4,SL_DBAT7+4(r1)
-	mtspr	SPRN_DBAT7L,r4
-	lwz	r4,SL_IBAT4(r1)
-	mtspr	SPRN_IBAT4U,r4
-	lwz	r4,SL_IBAT4+4(r1)
-	mtspr	SPRN_IBAT4L,r4
-	lwz	r4,SL_IBAT5(r1)
-	mtspr	SPRN_IBAT5U,r4
-	lwz	r4,SL_IBAT5+4(r1)
-	mtspr	SPRN_IBAT5L,r4
-	lwz	r4,SL_IBAT6(r1)
-	mtspr	SPRN_IBAT6U,r4
-	lwz	r4,SL_IBAT6+4(r1)
-	mtspr	SPRN_IBAT6L,r4
-	lwz	r4,SL_IBAT7(r1)
-	mtspr	SPRN_IBAT7U,r4
-	lwz	r4,SL_IBAT7+4(r1)
-	mtspr	SPRN_IBAT7L,r4
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
-
-	/* Flush all TLBs */
-	lis	r4,0x1000
-1:	addic.	r4,r4,-0x1000
-	tlbie	r4
-	blt	1b
-	sync
-
-	/* restore the MSR and turn on the MMU */
-	lwz	r3,SL_MSR(r1)
-	bl	turn_on_mmu
-
-	/* get back the stack pointer */
-	tovirt(r1,r1)
-
-	/* Restore TB */
-	li	r3,0
-	mttbl	r3
-	lwz	r3,SL_TB(r1)
-	lwz	r4,SL_TB+4(r1)
-	mttbu	r3
-	mttbl	r4
-
-	/* Restore the callee-saved registers and return */
-	lwz	r0,SL_CR(r1)
-	mtcr	r0
-	lwz	r2,SL_R2(r1)
-	lmw	r12,SL_R12(r1)
-	addi	r1,r1,SL_SIZE
-	lwz	r0,4(r1)
-	mtlr	r0
-	blr
-
-turn_on_mmu:
-	mflr	r4
-	tovirt(r4,r4)
-	mtsrr0	r4
-	mtsrr1	r3
-	sync
-	isync
-	rfi
-
-#endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */
-
-	.section .data
-	.balign	L1_CACHE_BYTES
-sleep_storage:
-	.long 0
-	.balign	L1_CACHE_BYTES, 0
-
-#endif /* CONFIG_PPC_BOOK3S_32 */
-	.section .text
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
deleted file mode 100644
index e5acc33b3b200bda05d37c872930dc91cc430b30..0000000000000000000000000000000000000000
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ /dev/null
@@ -1,63 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * PowerNV OPAL API wrappers
- *
- * Copyright 2011 IBM Corp.
- */
-
-#include <linux/jump_label.h>
-#include <asm/ppc_asm.h>
-#include <asm/hvcall.h>
-#include <asm/asm-offsets.h>
-#include <asm/opal.h>
-#include <asm/asm-compat.h>
-#include <asm/feature-fixups.h>
-
-	.section ".text"
-
-/*
- * r3-r10		- OPAL call arguments
- * STK_PARAM(R11)	- OPAL opcode
- * STK_PARAM(R12)	- MSR to restore
- */
-_GLOBAL_TOC(__opal_call)
-	mflr	r0
-	std	r0,PPC_LR_STKOFF(r1)
-	ld	r12,STK_PARAM(R12)(r1)
-	li	r0,MSR_IR|MSR_DR|MSR_LE
-	andc	r12,r12,r0
-	LOAD_REG_ADDR(r11, opal_return)
-	mtlr	r11
-	LOAD_REG_ADDR(r11, opal)
-	ld	r2,0(r11)
-	ld	r11,8(r11)
-	mtspr	SPRN_HSRR0,r11
-	mtspr	SPRN_HSRR1,r12
-	/* set token to r0 */
-	ld	r0,STK_PARAM(R11)(r1)
-	hrfid
-opal_return:
-	/*
-	 * Restore MSR on OPAL return. The MSR is set to big-endian.
-	 */
-#ifdef __BIG_ENDIAN__
-	ld	r11,STK_PARAM(R12)(r1)
-	mtmsrd	r11
-#else
-	/* Endian can only be switched with rfi, must byte reverse MSR load */
-	.short 0x4039	 /* li r10,STK_PARAM(R12)		*/
-	.byte (STK_PARAM(R12) >> 8) & 0xff
-	.byte STK_PARAM(R12) & 0xff
-
-	.long 0x280c6a7d /* ldbrx r11,r10,r1			*/
-	.long 0x05009f42 /* bcl 20,31,$+4			*/
-	.long 0xa602487d /* mflr r10				*/
-	.long 0x14004a39 /* addi r10,r10,20			*/
-	.long 0xa64b5a7d /* mthsrr0 r10				*/
-	.long 0xa64b7b7d /* mthsrr1 r11				*/
-	.long 0x2402004c /* hrfid				*/
-#endif
-	ld	r2,PACATOC(r13)
-	ld	r0,PPC_LR_STKOFF(r1)
-	mtlr	r0
-	blr
diff --git a/arch/powerpc/platforms/powernv/subcore-asm.S b/arch/powerpc/platforms/powernv/subcore-asm.S
deleted file mode 100644
index e038f67617907c3c8ea4362f4fc1fda0974852c0..0000000000000000000000000000000000000000
--- a/arch/powerpc/platforms/powernv/subcore-asm.S
+++ /dev/null
@@ -1,91 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation.
- */
-
-#include <asm/asm-offsets.h>
-#include <asm/ppc_asm.h>
-#include <asm/reg.h>
-
-#include "subcore.h"
-
-
-_GLOBAL(split_core_secondary_loop)
-	/*
-	 * r3 = u8 *state, used throughout the routine
-	 * r4 = temp
-	 * r5 = temp
-	 * ..
-	 * r12 = MSR
-	 */
-	mfmsr	r12
-
-	/* Disable interrupts so SRR0/1 don't get trashed */
-	li	r4,0
-	ori	r4,r4,MSR_EE|MSR_SE|MSR_BE|MSR_RI
-	andc	r4,r12,r4
-	sync
-	mtmsrd	r4
-
-	/* Switch to real mode and leave interrupts off */
-	li	r5, MSR_IR|MSR_DR
-	andc	r5, r4, r5
-
-	LOAD_REG_ADDR(r4, real_mode)
-
-	mtspr	SPRN_SRR0,r4
-	mtspr	SPRN_SRR1,r5
-	rfid
-	b	.	/* prevent speculative execution */
-
-real_mode:
-	/* Grab values from unsplit SPRs */
-	mfspr	r6,  SPRN_LDBAR
-	mfspr	r7,  SPRN_PMMAR
-	mfspr	r8,  SPRN_PMCR
-	mfspr	r9,  SPRN_RPR
-	mfspr	r10, SPRN_SDR1
-
-	/* Order reading the SPRs vs telling the primary we are ready to split */
-	sync
-
-	/* Tell thread 0 we are in real mode */
-	li	r4, SYNC_STEP_REAL_MODE
-	stb	r4, 0(r3)
-
-	li	r5, (HID0_POWER8_4LPARMODE | HID0_POWER8_2LPARMODE)@highest
-	sldi	r5, r5, 48
-
-	/* Loop until we see the split happen in HID0 */
-1:	mfspr	r4, SPRN_HID0
-	and.	r4, r4, r5
-	beq	1b
-
-	/*
-	 * We only need to initialise the below regs once for each subcore,
-	 * but it's simpler and harmless to do it on each thread.
-	 */
-
-	/* Make sure various SPRS have sane values */
-	li	r4, 0
-	mtspr	SPRN_LPID, r4
-	mtspr	SPRN_PCR, r4
-	mtspr	SPRN_HDEC, r4
-
-	/* Restore SPR values now we are split */
-	mtspr	SPRN_LDBAR, r6
-	mtspr	SPRN_PMMAR, r7
-	mtspr	SPRN_PMCR, r8
-	mtspr	SPRN_RPR, r9
-	mtspr	SPRN_SDR1, r10
-
-	LOAD_REG_ADDR(r5, virtual_mode)
-
-	/* Get out of real mode */
-	mtspr	SPRN_SRR0,r5
-	mtspr	SPRN_SRR1,r12
-	rfid
-	b	.	/* prevent speculative execution */
-
-virtual_mode:
-	blr
diff --git a/arch/powerpc/platforms/ps3/hvcall.S b/arch/powerpc/platforms/ps3/hvcall.S
deleted file mode 100644
index 509e30ad01bb430319bf8ebc6d5ffc9bb9146669..0000000000000000000000000000000000000000
--- a/arch/powerpc/platforms/ps3/hvcall.S
+++ /dev/null
@@ -1,792 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  PS3 hvcall interface.
- *
- *  Copyright (C) 2006 Sony Computer Entertainment Inc.
- *  Copyright 2006 Sony Corp.
- *  Copyright 2003, 2004 (c) MontaVista Software, Inc.
- */
-
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-
-#define lv1call .long 0x44000022; extsw r3, r3
-
-#define LV1_N_IN_0_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_0_IN_0_OUT LV1_N_IN_0_OUT
-#define LV1_1_IN_0_OUT LV1_N_IN_0_OUT
-#define LV1_2_IN_0_OUT LV1_N_IN_0_OUT
-#define LV1_3_IN_0_OUT LV1_N_IN_0_OUT
-#define LV1_4_IN_0_OUT LV1_N_IN_0_OUT
-#define LV1_5_IN_0_OUT LV1_N_IN_0_OUT
-#define LV1_6_IN_0_OUT LV1_N_IN_0_OUT
-#define LV1_7_IN_0_OUT LV1_N_IN_0_OUT
-
-#define LV1_0_IN_1_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	stdu    r3, -8(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 8;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_0_IN_2_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r3, -8(r1);			\
-	stdu	r4, -16(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 16;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_0_IN_3_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r3, -8(r1);			\
-	std	r4, -16(r1);			\
-	stdu	r5, -24(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 24;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-	ld	r11, -24(r1);			\
-	std	r6, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_0_IN_7_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r3, -8(r1);			\
-	std	r4, -16(r1);			\
-	std	r5, -24(r1);			\
-	std	r6, -32(r1);			\
-	std	r7, -40(r1);			\
-	std	r8, -48(r1);			\
-	stdu	r9, -56(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 56;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-	ld	r11, -24(r1);			\
-	std	r6, 0(r11);			\
-	ld	r11, -32(r1);			\
-	std	r7, 0(r11);			\
-	ld	r11, -40(r1);			\
-	std	r8, 0(r11);			\
-	ld	r11, -48(r1);			\
-	std	r9, 0(r11);			\
-	ld	r11, -56(r1);			\
-	std	r10, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_1_IN_1_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	stdu    r4, -8(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 8;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_1_IN_2_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r4, -8(r1);			\
-	stdu	r5, -16(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 16;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_1_IN_3_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r4, -8(r1);			\
-	std	r5, -16(r1);			\
-	stdu	r6, -24(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 24;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-	ld	r11, -24(r1);			\
-	std	r6, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_1_IN_4_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r4, -8(r1);			\
-	std	r5, -16(r1);			\
-	std	r6, -24(r1);			\
-	stdu	r7, -32(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 32;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-	ld	r11, -24(r1);			\
-	std	r6, 0(r11);			\
-	ld	r11, -32(r1);			\
-	std	r7, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_1_IN_5_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r4, -8(r1);			\
-	std	r5, -16(r1);			\
-	std	r6, -24(r1);			\
-	std	r7, -32(r1);			\
-	stdu	r8, -40(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 40;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-	ld	r11, -24(r1);			\
-	std	r6, 0(r11);			\
-	ld	r11, -32(r1);			\
-	std	r7, 0(r11);			\
-	ld	r11, -40(r1);			\
-	std	r8, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_1_IN_6_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r4, -8(r1);			\
-	std	r5, -16(r1);			\
-	std	r6, -24(r1);			\
-	std	r7, -32(r1);			\
-	std	r8, -40(r1);			\
-	stdu	r9, -48(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 48;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-	ld	r11, -24(r1);			\
-	std	r6, 0(r11);			\
-	ld	r11, -32(r1);			\
-	std	r7, 0(r11);			\
-	ld	r11, -40(r1);			\
-	std	r8, 0(r11);			\
-	ld	r11, -48(r1);			\
-	std	r9, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_1_IN_7_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r4, -8(r1);			\
-	std	r5, -16(r1);			\
-	std	r6, -24(r1);			\
-	std	r7, -32(r1);			\
-	std	r8, -40(r1);			\
-	std	r9, -48(r1);			\
-	stdu	r10, -56(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 56;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-	ld	r11, -24(r1);			\
-	std	r6, 0(r11);			\
-	ld	r11, -32(r1);			\
-	std	r7, 0(r11);			\
-	ld	r11, -40(r1);			\
-	std	r8, 0(r11);			\
-	ld	r11, -48(r1);			\
-	std	r9, 0(r11);			\
-	ld	r11, -56(r1);			\
-	std	r10, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_2_IN_1_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	stdu	r5, -8(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 8;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_2_IN_2_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r5, -8(r1);			\
-	stdu	r6, -16(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 16;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_2_IN_3_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r5, -8(r1);			\
-	std	r6, -16(r1);			\
-	stdu	r7, -24(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 24;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-	ld	r11, -24(r1);			\
-	std	r6, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_2_IN_4_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r5, -8(r1);			\
-	std	r6, -16(r1);			\
-	std	r7, -24(r1);			\
-	stdu	r8, -32(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 32;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-	ld	r11, -24(r1);			\
-	std	r6, 0(r11);			\
-	ld	r11, -32(r1);			\
-	std	r7, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_2_IN_5_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r5, -8(r1);			\
-	std	r6, -16(r1);			\
-	std	r7, -24(r1);			\
-	std	r8, -32(r1);			\
-	stdu	r9, -40(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 40;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-	ld	r11, -24(r1);			\
-	std	r6, 0(r11);			\
-	ld	r11, -32(r1);			\
-	std	r7, 0(r11);			\
-	ld	r11, -40(r1);			\
-	std	r8, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_3_IN_1_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	stdu	r6, -8(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 8;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_3_IN_2_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r6, -8(r1);			\
-	stdu	r7, -16(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 16;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_3_IN_3_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r6, -8(r1);			\
-	std	r7, -16(r1);			\
-	stdu	r8, -24(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 24;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-	ld	r11, -24(r1);			\
-	std	r6, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_4_IN_1_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	stdu    r7, -8(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 8;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_4_IN_2_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r7, -8(r1);			\
-	stdu	r8, -16(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 16;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_4_IN_3_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r7, -8(r1);			\
-	std	r8, -16(r1);			\
-	stdu	r9, -24(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 24;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-	ld	r11, -24(r1);			\
-	std	r6, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_5_IN_1_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	stdu    r8, -8(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 8;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_5_IN_2_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r8, -8(r1);			\
-	stdu	r9, -16(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 16;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_5_IN_3_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r8, -8(r1);			\
-	std	r9, -16(r1);			\
-	stdu	r10, -24(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 24;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-	ld	r11, -24(r1);			\
-	std	r6, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_6_IN_1_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	stdu    r9, -8(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 8;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_6_IN_2_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r9, -8(r1);			\
-	stdu    r10, -16(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 16;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_6_IN_3_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std     r9, -8(r1);			\
-	stdu    r10, -16(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 16;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-	ld	r11, -16(r1);			\
-	std	r5, 0(r11);			\
-	ld	r11, 48+8*8(r1);		\
-	std	r6, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_7_IN_1_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	stdu    r10, -8(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	addi	r1, r1, 8;			\
-	ld	r11, -8(r1);			\
-	std	r4, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_7_IN_6_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	std	r10, 48+8*7(r1);		\
-						\
-	li	r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	ld	r11, 48+8*7(r1);		\
-	std	r4, 0(r11);			\
-	ld	r11, 48+8*8(r1);		\
-	std	r5, 0(r11);			\
-	ld	r11, 48+8*9(r1);		\
-	std	r6, 0(r11);			\
-	ld	r11, 48+8*10(r1);		\
-	std	r7, 0(r11);			\
-	ld	r11, 48+8*11(r1);		\
-	std	r8, 0(r11);			\
-	ld	r11, 48+8*12(r1);		\
-	std	r9, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-#define LV1_8_IN_1_OUT(API_NAME, API_NUMBER)	\
-_GLOBAL(_##API_NAME)				\
-						\
-	mflr	r0;				\
-	std	r0, 16(r1);			\
-						\
-	li      r11, API_NUMBER;		\
-	lv1call;				\
-						\
-	ld	r11, 48+8*8(r1);		\
-	std	r4, 0(r11);			\
-						\
-	ld	r0, 16(r1);			\
-	mtlr	r0;				\
-	blr
-
-	.text
-
-/* the lv1 underscored call definitions expand here */
-
-#define LV1_CALL(name, in, out, num) LV1_##in##_IN_##out##_OUT(lv1_##name, num)
-#include <asm/lv1call.h>
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
deleted file mode 100644
index 2136e42833af3fa1fcb9a408baa93fa2782e9f09..0000000000000000000000000000000000000000
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ /dev/null
@@ -1,335 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains the generic code to perform a call to the
- * pSeries LPAR hypervisor.
- */
-#include <linux/jump_label.h>
-#include <asm/hvcall.h>
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ptrace.h>
-#include <asm/feature-fixups.h>
-
-	.section	".text"
-	
-#ifdef CONFIG_TRACEPOINTS
-
-#ifndef CONFIG_JUMP_LABEL
-	.section	".toc","aw"
-
-	.globl hcall_tracepoint_refcount
-hcall_tracepoint_refcount:
-	.8byte	0
-
-	.section	".text"
-#endif
-
-/*
- * precall must preserve all registers.  use unused STK_PARAM()
- * areas to save snapshots and opcode.
- */
-#define HCALL_INST_PRECALL(FIRST_REG)				\
-	mflr	r0;						\
-	std	r3,STK_PARAM(R3)(r1);				\
-	std	r4,STK_PARAM(R4)(r1);				\
-	std	r5,STK_PARAM(R5)(r1);				\
-	std	r6,STK_PARAM(R6)(r1);				\
-	std	r7,STK_PARAM(R7)(r1);				\
-	std	r8,STK_PARAM(R8)(r1);				\
-	std	r9,STK_PARAM(R9)(r1);				\
-	std	r10,STK_PARAM(R10)(r1);				\
-	std	r0,16(r1);					\
-	addi	r4,r1,STK_PARAM(FIRST_REG);			\
-	stdu	r1,-STACK_FRAME_OVERHEAD(r1);			\
-	bl	__trace_hcall_entry;				\
-	ld	r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1);	\
-	ld	r4,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1);	\
-	ld	r5,STACK_FRAME_OVERHEAD+STK_PARAM(R5)(r1);	\
-	ld	r6,STACK_FRAME_OVERHEAD+STK_PARAM(R6)(r1);	\
-	ld	r7,STACK_FRAME_OVERHEAD+STK_PARAM(R7)(r1);	\
-	ld	r8,STACK_FRAME_OVERHEAD+STK_PARAM(R8)(r1);	\
-	ld	r9,STACK_FRAME_OVERHEAD+STK_PARAM(R9)(r1);	\
-	ld	r10,STACK_FRAME_OVERHEAD+STK_PARAM(R10)(r1)
-
-/*
- * postcall is performed immediately before function return which
- * allows liberal use of volatile registers.
- */
-#define __HCALL_INST_POSTCALL					\
-	ld	r0,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1);	\
-	std	r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1);	\
-	mr	r4,r3;						\
-	mr	r3,r0;						\
-	bl	__trace_hcall_exit;				\
-	ld	r0,STACK_FRAME_OVERHEAD+16(r1);			\
-	addi	r1,r1,STACK_FRAME_OVERHEAD;			\
-	ld	r3,STK_PARAM(R3)(r1);				\
-	mtlr	r0
-
-#define HCALL_INST_POSTCALL_NORETS				\
-	li	r5,0;						\
-	__HCALL_INST_POSTCALL
-
-#define HCALL_INST_POSTCALL(BUFREG)				\
-	mr	r5,BUFREG;					\
-	__HCALL_INST_POSTCALL
-
-#ifdef CONFIG_JUMP_LABEL
-#define HCALL_BRANCH(LABEL)					\
-	ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key)
-#else
-
-/*
- * We branch around this in early init (eg when populating the MMU
- * hashtable) by using an unconditional cpu feature.
- */
-#define HCALL_BRANCH(LABEL)					\
-BEGIN_FTR_SECTION;						\
-	b	1f;						\
-END_FTR_SECTION(0, 1);						\
-	ld	r12,hcall_tracepoint_refcount@toc(r2);		\
-	std	r12,32(r1);					\
-	cmpdi	r12,0;						\
-	bne-	LABEL;						\
-1:
-#endif
-
-#else
-#define HCALL_INST_PRECALL(FIRST_ARG)
-#define HCALL_INST_POSTCALL_NORETS
-#define HCALL_INST_POSTCALL(BUFREG)
-#define HCALL_BRANCH(LABEL)
-#endif
-
-_GLOBAL_TOC(plpar_hcall_norets)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-	HCALL_BRANCH(plpar_hcall_norets_trace)
-	HVSC				/* invoke the hypervisor */
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-	blr				/* return r3 = status */
-
-#ifdef CONFIG_TRACEPOINTS
-plpar_hcall_norets_trace:
-	HCALL_INST_PRECALL(R4)
-	HVSC
-	HCALL_INST_POSTCALL_NORETS
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-	blr
-#endif
-
-_GLOBAL_TOC(plpar_hcall)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_BRANCH(plpar_hcall_trace)
-
-	std     r4,STK_PARAM(R4)(r1)     /* Save ret buffer */
-
-	mr	r4,r5
-	mr	r5,r6
-	mr	r6,r7
-	mr	r7,r8
-	mr	r8,r9
-	mr	r9,r10
-
-	HVSC				/* invoke the hypervisor */
-
-	ld	r12,STK_PARAM(R4)(r1)
-	std	r4,  0(r12)
-	std	r5,  8(r12)
-	std	r6, 16(r12)
-	std	r7, 24(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
-
-#ifdef CONFIG_TRACEPOINTS
-plpar_hcall_trace:
-	HCALL_INST_PRECALL(R5)
-
-	std	r4,STK_PARAM(R4)(r1)
-	mr	r0,r4
-
-	mr	r4,r5
-	mr	r5,r6
-	mr	r6,r7
-	mr	r7,r8
-	mr	r8,r9
-	mr	r9,r10
-
-	HVSC
-
-	ld	r12,STK_PARAM(R4)(r1)
-	std	r4,0(r12)
-	std	r5,8(r12)
-	std	r6,16(r12)
-	std	r7,24(r12)
-
-	HCALL_INST_POSTCALL(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr
-#endif
-
-/*
- * plpar_hcall_raw can be called in real mode. kexec/kdump need some
- * hypervisor calls to be executed in real mode. So plpar_hcall_raw
- * does not access the per cpu hypervisor call statistics variables,
- * since these variables may not be present in the RMO region.
- */
-_GLOBAL(plpar_hcall_raw)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	std     r4,STK_PARAM(R4)(r1)     /* Save ret buffer */
-
-	mr	r4,r5
-	mr	r5,r6
-	mr	r6,r7
-	mr	r7,r8
-	mr	r8,r9
-	mr	r9,r10
-
-	HVSC				/* invoke the hypervisor */
-
-	ld	r12,STK_PARAM(R4)(r1)
-	std	r4,  0(r12)
-	std	r5,  8(r12)
-	std	r6, 16(r12)
-	std	r7, 24(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
-
-_GLOBAL_TOC(plpar_hcall9)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	HCALL_BRANCH(plpar_hcall9_trace)
-
-	std     r4,STK_PARAM(R4)(r1)     /* Save ret buffer */
-
-	mr	r4,r5
-	mr	r5,r6
-	mr	r6,r7
-	mr	r7,r8
-	mr	r8,r9
-	mr	r9,r10
-	ld	r10,STK_PARAM(R11)(r1)	 /* put arg7 in R10 */
-	ld	r11,STK_PARAM(R12)(r1)	 /* put arg8 in R11 */
-	ld	r12,STK_PARAM(R13)(r1)    /* put arg9 in R12 */
-
-	HVSC				/* invoke the hypervisor */
-
-	mr	r0,r12
-	ld	r12,STK_PARAM(R4)(r1)
-	std	r4,  0(r12)
-	std	r5,  8(r12)
-	std	r6, 16(r12)
-	std	r7, 24(r12)
-	std	r8, 32(r12)
-	std	r9, 40(r12)
-	std	r10,48(r12)
-	std	r11,56(r12)
-	std	r0, 64(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
-
-#ifdef CONFIG_TRACEPOINTS
-plpar_hcall9_trace:
-	HCALL_INST_PRECALL(R5)
-
-	std	r4,STK_PARAM(R4)(r1)
-	mr	r0,r4
-
-	mr	r4,r5
-	mr	r5,r6
-	mr	r6,r7
-	mr	r7,r8
-	mr	r8,r9
-	mr	r9,r10
-	ld	r10,STACK_FRAME_OVERHEAD+STK_PARAM(R11)(r1)
-	ld	r11,STACK_FRAME_OVERHEAD+STK_PARAM(R12)(r1)
-	ld	r12,STACK_FRAME_OVERHEAD+STK_PARAM(R13)(r1)
-
-	HVSC
-
-	mr	r0,r12
-	ld	r12,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1)
-	std	r4,0(r12)
-	std	r5,8(r12)
-	std	r6,16(r12)
-	std	r7,24(r12)
-	std	r8,32(r12)
-	std	r9,40(r12)
-	std	r10,48(r12)
-	std	r11,56(r12)
-	std	r0,64(r12)
-
-	HCALL_INST_POSTCALL(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr
-#endif
-
-/* See plpar_hcall_raw to see why this is needed */
-_GLOBAL(plpar_hcall9_raw)
-	HMT_MEDIUM
-
-	mfcr	r0
-	stw	r0,8(r1)
-
-	std     r4,STK_PARAM(R4)(r1)     /* Save ret buffer */
-
-	mr	r4,r5
-	mr	r5,r6
-	mr	r6,r7
-	mr	r7,r8
-	mr	r8,r9
-	mr	r9,r10
-	ld	r10,STK_PARAM(R11)(r1)	 /* put arg7 in R10 */
-	ld	r11,STK_PARAM(R12)(r1)	 /* put arg8 in R11 */
-	ld	r12,STK_PARAM(R13)(r1)    /* put arg9 in R12 */
-
-	HVSC				/* invoke the hypervisor */
-
-	mr	r0,r12
-	ld	r12,STK_PARAM(R4)(r1)
-	std	r4,  0(r12)
-	std	r5,  8(r12)
-	std	r6, 16(r12)
-	std	r7, 24(r12)
-	std	r8, 32(r12)
-	std	r9, 40(r12)
-	std	r10,48(r12)
-	std	r11,56(r12)
-	std	r0, 64(r12)
-
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-
-	blr				/* return r3 = status */
diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S
deleted file mode 100644
index a5a83c3f53e6be313f060388ff93b4bdbef60556..0000000000000000000000000000000000000000
--- a/arch/powerpc/purgatory/trampoline.S
+++ /dev/null
@@ -1,117 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * kexec trampoline
- *
- * Based on code taken from kexec-tools and kexec-lite.
- *
- * Copyright (C) 2004 - 2005, Milton D Miller II, IBM Corporation
- * Copyright (C) 2006, Mohan Kumar M, IBM Corporation
- * Copyright (C) 2013, Anton Blanchard, IBM Corporation
- */
-
-#include <asm/asm-compat.h>
-
-	.machine ppc64
-	.balign 256
-	.globl purgatory_start
-purgatory_start:
-	b	master
-
-	/* ABI: possible run_at_load flag at 0x5c */
-	.org purgatory_start + 0x5c
-	.globl run_at_load
-run_at_load:
-	.long 0
-	.size run_at_load, . - run_at_load
-
-	/* ABI: slaves start at 60 with r3=phys */
-	.org purgatory_start + 0x60
-slave:
-	b .
-	/* ABI: end of copied region */
-	.org purgatory_start + 0x100
-	.size purgatory_start, . - purgatory_start
-
-/*
- * The above 0x100 bytes at purgatory_start are replaced with the
- * code from the kernel (or next stage) by setup_purgatory().
- */
-
-master:
-	or	%r1,%r1,%r1	/* low priority to let other threads catchup */
-	isync
-	mr	%r17,%r3	/* save cpu id to r17 */
-	mr	%r15,%r4	/* save physical address in reg15 */
-
-	or	%r3,%r3,%r3	/* ok now to high priority, lets boot */
-	lis	%r6,0x1
-	mtctr	%r6		/* delay a bit for slaves to catch up */
-	bdnz	.		/* before we overwrite 0-100 again */
-
-	bl	0f		/* Work out where we're running */
-0:	mflr	%r18
-
-	/* load device-tree address */
-	ld	%r3, (dt_offset - 0b)(%r18)
-	mr	%r16,%r3	/* save dt address in reg16 */
-	li	%r4,20
-	LWZX_BE	%r6,%r3,%r4	/* fetch __be32 version number at byte 20 */
-	cmpwi	%cr0,%r6,2	/* v2 or later? */
-	blt	1f
-	li	%r4,28
-	STWX_BE	%r17,%r3,%r4	/* Store my cpu as __be32 at byte 28 */
-1:
-	/* load the kernel address */
-	ld	%r4,(kernel - 0b)(%r18)
-
-	/* load the run_at_load flag */
-	/* possibly patched by kexec */
-	ld	%r6,(run_at_load - 0b)(%r18)
-	/* and patch it into the kernel */
-	stw	%r6,(0x5c)(%r4)
-
-	mr	%r3,%r16	/* restore dt address */
-
-	li	%r5,0		/* r5 will be 0 for kernel */
-
-	mfmsr	%r11
-	andi.	%r10,%r11,1	/* test MSR_LE */
-	bne	.Little_endian
-
-	mtctr	%r4		/* prepare branch to */
-	bctr			/* start kernel */
-
-.Little_endian:
-	mtsrr0	%r4		/* prepare branch to */
-
-	clrrdi	%r11,%r11,1	/* clear MSR_LE */
-	mtsrr1	%r11
-
-	rfid			/* update MSR and start kernel */
-
-
-	.balign 8
-	.globl kernel
-kernel:
-	.8byte  0x0
-	.size kernel, . - kernel
-
-	.balign 8
-	.globl dt_offset
-dt_offset:
-	.8byte  0x0
-	.size dt_offset, . - dt_offset
-
-
-	.data
-	.balign 8
-.globl purgatory_sha256_digest
-purgatory_sha256_digest:
-	.skip	32
-	.size purgatory_sha256_digest, . - purgatory_sha256_digest
-
-	.balign 8
-.globl purgatory_sha_regions
-purgatory_sha_regions:
-	.skip	8 * 2 * 16
-	.size purgatory_sha_regions, . - purgatory_sha_regions
diff --git a/arch/powerpc/sysdev/6xx-suspend.S b/arch/powerpc/sysdev/6xx-suspend.S
deleted file mode 100644
index e882524fff5aa62b978697aeabc5cb314132c8a8..0000000000000000000000000000000000000000
--- a/arch/powerpc/sysdev/6xx-suspend.S
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Enter and leave sleep state on chips with 6xx-style HID0
- * power management bits, which don't leave sleep state via reset.
- *
- * Author: Scott Wood <scottwood@freescale.com>
- *
- * Copyright (c) 2006-2007 Freescale Semiconductor, Inc.
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/reg.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-
-_GLOBAL(mpc6xx_enter_standby)
-	mflr	r4
-
-	mfspr	r5, SPRN_HID0
-	rlwinm	r5, r5, 0, ~(HID0_DOZE | HID0_NAP)
-	oris	r5, r5, HID0_SLEEP@h
-	mtspr	SPRN_HID0, r5
-	isync
-
-	lis	r5, ret_from_standby@h
-	ori	r5, r5, ret_from_standby@l
-	mtlr	r5
-
-	lwz	r6, TI_LOCAL_FLAGS(r2)
-	ori	r6, r6, _TLF_SLEEPING
-	stw	r6, TI_LOCAL_FLAGS(r2)
-
-	mfmsr	r5
-	ori	r5, r5, MSR_EE
-	oris	r5, r5, MSR_POW@h
-	sync
-	mtmsr	r5
-	isync
-
-1:	b	1b
-
-ret_from_standby:
-	mfspr	r5, SPRN_HID0
-	rlwinm	r5, r5, 0, ~HID0_SLEEP
-	mtspr	SPRN_HID0, r5
-
-	mtlr	r4
-	blr
diff --git a/arch/powerpc/sysdev/dcr-low.S b/arch/powerpc/sysdev/dcr-low.S
deleted file mode 100644
index efeeb1b885a17e4ac6c5f8e7c2394c59460179aa..0000000000000000000000000000000000000000
--- a/arch/powerpc/sysdev/dcr-low.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * "Indirect" DCR access
- *
- * Copyright (c) 2004 Eugene Surovegin <ebs@ebshome.net>
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/processor.h>
-#include <asm/bug.h>
-#include <asm/export.h>
-
-#define DCR_ACCESS_PROLOG(table) \
-	cmpli	cr0,r3,1024;	 \
-	rlwinm  r3,r3,4,18,27;   \
-	lis     r5,table@h;      \
-	ori     r5,r5,table@l;   \
-	add     r3,r3,r5;        \
-	bge-	1f;		 \
-	mtctr   r3;              \
-	bctr;			 \
-1:	trap;			 \
-	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0;	\
-	blr
-
-_GLOBAL(__mfdcr)
-	DCR_ACCESS_PROLOG(__mfdcr_table)
-EXPORT_SYMBOL(__mfdcr)
-
-_GLOBAL(__mtdcr)
-	DCR_ACCESS_PROLOG(__mtdcr_table)
-EXPORT_SYMBOL(__mtdcr)
-
-__mfdcr_table:
-	mfdcr  r3,0; blr
-__mtdcr_table:
-	mtdcr  0,r4; blr
-
-dcr     = 1
-        .rept   1023
-	mfdcr   r3,dcr; blr
-	mtdcr   dcr,r4; blr
-	dcr     = dcr + 1
-	.endr
diff --git a/arch/powerpc/tools/checkpatch.sh b/arch/powerpc/tools/checkpatch.sh
old mode 100755
new mode 100644
diff --git a/arch/powerpc/tools/gcc-check-mprofile-kernel.sh b/arch/powerpc/tools/gcc-check-mprofile-kernel.sh
old mode 100755
new mode 100644
diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh
old mode 100755
new mode 100644
diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
old mode 100755
new mode 100644
diff --git a/arch/powerpc/xmon/spr_access.S b/arch/powerpc/xmon/spr_access.S
deleted file mode 100644
index 720a52afdd5813849c473abcdc91cb5667b7fac6..0000000000000000000000000000000000000000
--- a/arch/powerpc/xmon/spr_access.S
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/ppc_asm.h>
-#include <asm/asm-compat.h>
-
-/* unsigned long xmon_mfspr(sprn, default_value) */
-_GLOBAL(xmon_mfspr)
-	PPC_LL	r5, .Lmfspr_table@got(r2)
-	b	xmon_mxspr
-
-/* void xmon_mtspr(sprn, new_value) */
-_GLOBAL(xmon_mtspr)
-	PPC_LL	r5, .Lmtspr_table@got(r2)
-	b	xmon_mxspr
-
-/*
- * r3 = sprn
- * r4 = default or new value
- * r5 = table base
- */
-xmon_mxspr:
-	/*
-	 * To index into the table of mxsprs we need:
-	 *  i = (sprn & 0x3ff) * 8
-	 * or using rwlinm:
-	 *  i = (sprn << 3) & (0x3ff << 3)
-	 */
-	rlwinm	r3, r3, 3, 0x3ff << 3
-	add	r5, r5, r3
-	mtctr	r5
-	mr	r3, r4 /* put default_value in r3 for mfspr */
-	bctr
-
-.Lmfspr_table:
-	spr = 0
-	.rept	1024
-	mfspr	r3, spr
-	blr
-	spr = spr + 1
-	.endr
-
-.Lmtspr_table:
-	spr = 0
-	.rept	1024
-	mtspr	spr, r4
-	blr
-	spr = spr + 1
-	.endr
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
deleted file mode 100644
index 9c87ae77ad5d3cf360a3249a061a3bb9a2748258..0000000000000000000000000000000000000000
--- a/arch/riscv/kernel/entry.S
+++ /dev/null
@@ -1,410 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 Regents of the University of California
- * Copyright (C) 2017 SiFive
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-
-#include <asm/asm.h>
-#include <asm/csr.h>
-#include <asm/unistd.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-
-	.text
-	.altmacro
-
-/*
- * Prepares to enter a system call or exception by saving all registers to the
- * stack.
- */
-	.macro SAVE_ALL
-	LOCAL _restore_kernel_tpsp
-	LOCAL _save_context
-
-	/*
-	 * If coming from userspace, preserve the user thread pointer and load
-	 * the kernel thread pointer.  If we came from the kernel, sscratch
-	 * will contain 0, and we should continue on the current TP.
-	 */
-	csrrw tp, CSR_SSCRATCH, tp
-	bnez tp, _save_context
-
-_restore_kernel_tpsp:
-	csrr tp, CSR_SSCRATCH
-	REG_S sp, TASK_TI_KERNEL_SP(tp)
-_save_context:
-	REG_S sp, TASK_TI_USER_SP(tp)
-	REG_L sp, TASK_TI_KERNEL_SP(tp)
-	addi sp, sp, -(PT_SIZE_ON_STACK)
-	REG_S x1,  PT_RA(sp)
-	REG_S x3,  PT_GP(sp)
-	REG_S x5,  PT_T0(sp)
-	REG_S x6,  PT_T1(sp)
-	REG_S x7,  PT_T2(sp)
-	REG_S x8,  PT_S0(sp)
-	REG_S x9,  PT_S1(sp)
-	REG_S x10, PT_A0(sp)
-	REG_S x11, PT_A1(sp)
-	REG_S x12, PT_A2(sp)
-	REG_S x13, PT_A3(sp)
-	REG_S x14, PT_A4(sp)
-	REG_S x15, PT_A5(sp)
-	REG_S x16, PT_A6(sp)
-	REG_S x17, PT_A7(sp)
-	REG_S x18, PT_S2(sp)
-	REG_S x19, PT_S3(sp)
-	REG_S x20, PT_S4(sp)
-	REG_S x21, PT_S5(sp)
-	REG_S x22, PT_S6(sp)
-	REG_S x23, PT_S7(sp)
-	REG_S x24, PT_S8(sp)
-	REG_S x25, PT_S9(sp)
-	REG_S x26, PT_S10(sp)
-	REG_S x27, PT_S11(sp)
-	REG_S x28, PT_T3(sp)
-	REG_S x29, PT_T4(sp)
-	REG_S x30, PT_T5(sp)
-	REG_S x31, PT_T6(sp)
-
-	/*
-	 * Disable user-mode memory access as it should only be set in the
-	 * actual user copy routines.
-	 *
-	 * Disable the FPU to detect illegal usage of floating point in kernel
-	 * space.
-	 */
-	li t0, SR_SUM | SR_FS
-
-	REG_L s0, TASK_TI_USER_SP(tp)
-	csrrc s1, CSR_SSTATUS, t0
-	csrr s2, CSR_SEPC
-	csrr s3, CSR_STVAL
-	csrr s4, CSR_SCAUSE
-	csrr s5, CSR_SSCRATCH
-	REG_S s0, PT_SP(sp)
-	REG_S s1, PT_SSTATUS(sp)
-	REG_S s2, PT_SEPC(sp)
-	REG_S s3, PT_SBADADDR(sp)
-	REG_S s4, PT_SCAUSE(sp)
-	REG_S s5, PT_TP(sp)
-	.endm
-
-/*
- * Prepares to return from a system call or exception by restoring all
- * registers from the stack.
- */
-	.macro RESTORE_ALL
-	REG_L a0, PT_SSTATUS(sp)
-	/*
-	 * The current load reservation is effectively part of the processor's
-	 * state, in the sense that load reservations cannot be shared between
-	 * different hart contexts.  We can't actually save and restore a load
-	 * reservation, so instead here we clear any existing reservation --
-	 * it's always legal for implementations to clear load reservations at
-	 * any point (as long as the forward progress guarantee is kept, but
-	 * we'll ignore that here).
-	 *
-	 * Dangling load reservations can be the result of taking a trap in the
-	 * middle of an LR/SC sequence, but can also be the result of a taken
-	 * forward branch around an SC -- which is how we implement CAS.  As a
-	 * result we need to clear reservations between the last CAS and the
-	 * jump back to the new context.  While it is unlikely the store
-	 * completes, implementations are allowed to expand reservations to be
-	 * arbitrarily large.
-	 */
-	REG_L  a2, PT_SEPC(sp)
-	REG_SC x0, a2, PT_SEPC(sp)
-
-	csrw CSR_SSTATUS, a0
-	csrw CSR_SEPC, a2
-
-	REG_L x1,  PT_RA(sp)
-	REG_L x3,  PT_GP(sp)
-	REG_L x4,  PT_TP(sp)
-	REG_L x5,  PT_T0(sp)
-	REG_L x6,  PT_T1(sp)
-	REG_L x7,  PT_T2(sp)
-	REG_L x8,  PT_S0(sp)
-	REG_L x9,  PT_S1(sp)
-	REG_L x10, PT_A0(sp)
-	REG_L x11, PT_A1(sp)
-	REG_L x12, PT_A2(sp)
-	REG_L x13, PT_A3(sp)
-	REG_L x14, PT_A4(sp)
-	REG_L x15, PT_A5(sp)
-	REG_L x16, PT_A6(sp)
-	REG_L x17, PT_A7(sp)
-	REG_L x18, PT_S2(sp)
-	REG_L x19, PT_S3(sp)
-	REG_L x20, PT_S4(sp)
-	REG_L x21, PT_S5(sp)
-	REG_L x22, PT_S6(sp)
-	REG_L x23, PT_S7(sp)
-	REG_L x24, PT_S8(sp)
-	REG_L x25, PT_S9(sp)
-	REG_L x26, PT_S10(sp)
-	REG_L x27, PT_S11(sp)
-	REG_L x28, PT_T3(sp)
-	REG_L x29, PT_T4(sp)
-	REG_L x30, PT_T5(sp)
-	REG_L x31, PT_T6(sp)
-
-	REG_L x2,  PT_SP(sp)
-	.endm
-
-#if !IS_ENABLED(CONFIG_PREEMPT)
-.set resume_kernel, restore_all
-#endif
-
-ENTRY(handle_exception)
-	SAVE_ALL
-
-	/*
-	 * Set sscratch register to 0, so that if a recursive exception
-	 * occurs, the exception vector knows it came from the kernel
-	 */
-	csrw CSR_SSCRATCH, x0
-
-	/* Load the global pointer */
-.option push
-.option norelax
-	la gp, __global_pointer$
-.option pop
-
-	la ra, ret_from_exception
-	/*
-	 * MSB of cause differentiates between
-	 * interrupts and exceptions
-	 */
-	bge s4, zero, 1f
-
-	/* Handle interrupts */
-	move a0, sp /* pt_regs */
-	tail do_IRQ
-1:
-	/* Exceptions run with interrupts enabled or disabled
-	   depending on the state of sstatus.SR_SPIE */
-	andi t0, s1, SR_SPIE
-	beqz t0, 1f
-	csrs CSR_SSTATUS, SR_SIE
-
-1:
-	/* Handle syscalls */
-	li t0, EXC_SYSCALL
-	beq s4, t0, handle_syscall
-
-	/* Handle other exceptions */
-	slli t0, s4, RISCV_LGPTR
-	la t1, excp_vect_table
-	la t2, excp_vect_table_end
-	move a0, sp /* pt_regs */
-	add t0, t1, t0
-	/* Check if exception code lies within bounds */
-	bgeu t0, t2, 1f
-	REG_L t0, 0(t0)
-	jr t0
-1:
-	tail do_trap_unknown
-
-handle_syscall:
-	 /* save the initial A0 value (needed in signal handlers) */
-	REG_S a0, PT_ORIG_A0(sp)
-	/*
-	 * Advance SEPC to avoid executing the original
-	 * scall instruction on sret
-	 */
-	addi s2, s2, 0x4
-	REG_S s2, PT_SEPC(sp)
-	/* Trace syscalls, but only if requested by the user. */
-	REG_L t0, TASK_TI_FLAGS(tp)
-	andi t0, t0, _TIF_SYSCALL_WORK
-	bnez t0, handle_syscall_trace_enter
-check_syscall_nr:
-	/* Check to make sure we don't jump to a bogus syscall number. */
-	li t0, __NR_syscalls
-	la s0, sys_ni_syscall
-	/* Syscall number held in a7 */
-	bgeu a7, t0, 1f
-	la s0, sys_call_table
-	slli t0, a7, RISCV_LGPTR
-	add s0, s0, t0
-	REG_L s0, 0(s0)
-1:
-	jalr s0
-
-ret_from_syscall:
-	/* Set user a0 to kernel a0 */
-	REG_S a0, PT_A0(sp)
-	/* Trace syscalls, but only if requested by the user. */
-	REG_L t0, TASK_TI_FLAGS(tp)
-	andi t0, t0, _TIF_SYSCALL_WORK
-	bnez t0, handle_syscall_trace_exit
-
-ret_from_exception:
-	REG_L s0, PT_SSTATUS(sp)
-	csrc CSR_SSTATUS, SR_SIE
-	andi s0, s0, SR_SPP
-	bnez s0, resume_kernel
-
-resume_userspace:
-	/* Interrupts must be disabled here so flags are checked atomically */
-	REG_L s0, TASK_TI_FLAGS(tp) /* current_thread_info->flags */
-	andi s1, s0, _TIF_WORK_MASK
-	bnez s1, work_pending
-
-	/* Save unwound kernel stack pointer in thread_info */
-	addi s0, sp, PT_SIZE_ON_STACK
-	REG_S s0, TASK_TI_KERNEL_SP(tp)
-
-	/*
-	 * Save TP into sscratch, so we can find the kernel data structures
-	 * again.
-	 */
-	csrw CSR_SSCRATCH, tp
-
-restore_all:
-	RESTORE_ALL
-	sret
-
-#if IS_ENABLED(CONFIG_PREEMPT)
-resume_kernel:
-	REG_L s0, TASK_TI_PREEMPT_COUNT(tp)
-	bnez s0, restore_all
-	REG_L s0, TASK_TI_FLAGS(tp)
-	andi s0, s0, _TIF_NEED_RESCHED
-	beqz s0, restore_all
-	call preempt_schedule_irq
-	j restore_all
-#endif
-
-work_pending:
-	/* Enter slow path for supplementary processing */
-	la ra, ret_from_exception
-	andi s1, s0, _TIF_NEED_RESCHED
-	bnez s1, work_resched
-work_notifysig:
-	/* Handle pending signals and notify-resume requests */
-	csrs CSR_SSTATUS, SR_SIE /* Enable interrupts for do_notify_resume() */
-	move a0, sp /* pt_regs */
-	move a1, s0 /* current_thread_info->flags */
-	tail do_notify_resume
-work_resched:
-	tail schedule
-
-/* Slow paths for ptrace. */
-handle_syscall_trace_enter:
-	move a0, sp
-	call do_syscall_trace_enter
-	REG_L a0, PT_A0(sp)
-	REG_L a1, PT_A1(sp)
-	REG_L a2, PT_A2(sp)
-	REG_L a3, PT_A3(sp)
-	REG_L a4, PT_A4(sp)
-	REG_L a5, PT_A5(sp)
-	REG_L a6, PT_A6(sp)
-	REG_L a7, PT_A7(sp)
-	j check_syscall_nr
-handle_syscall_trace_exit:
-	move a0, sp
-	call do_syscall_trace_exit
-	j ret_from_exception
-
-END(handle_exception)
-
-ENTRY(ret_from_fork)
-	la ra, ret_from_exception
-	tail schedule_tail
-ENDPROC(ret_from_fork)
-
-ENTRY(ret_from_kernel_thread)
-	call schedule_tail
-	/* Call fn(arg) */
-	la ra, ret_from_exception
-	move a0, s1
-	jr s0
-ENDPROC(ret_from_kernel_thread)
-
-
-/*
- * Integer register context switch
- * The callee-saved registers must be saved and restored.
- *
- *   a0: previous task_struct (must be preserved across the switch)
- *   a1: next task_struct
- *
- * The value of a0 and a1 must be preserved by this function, as that's how
- * arguments are passed to schedule_tail.
- */
-ENTRY(__switch_to)
-	/* Save context into prev->thread */
-	li    a4,  TASK_THREAD_RA
-	add   a3, a0, a4
-	add   a4, a1, a4
-	REG_S ra,  TASK_THREAD_RA_RA(a3)
-	REG_S sp,  TASK_THREAD_SP_RA(a3)
-	REG_S s0,  TASK_THREAD_S0_RA(a3)
-	REG_S s1,  TASK_THREAD_S1_RA(a3)
-	REG_S s2,  TASK_THREAD_S2_RA(a3)
-	REG_S s3,  TASK_THREAD_S3_RA(a3)
-	REG_S s4,  TASK_THREAD_S4_RA(a3)
-	REG_S s5,  TASK_THREAD_S5_RA(a3)
-	REG_S s6,  TASK_THREAD_S6_RA(a3)
-	REG_S s7,  TASK_THREAD_S7_RA(a3)
-	REG_S s8,  TASK_THREAD_S8_RA(a3)
-	REG_S s9,  TASK_THREAD_S9_RA(a3)
-	REG_S s10, TASK_THREAD_S10_RA(a3)
-	REG_S s11, TASK_THREAD_S11_RA(a3)
-	/* Restore context from next->thread */
-	REG_L ra,  TASK_THREAD_RA_RA(a4)
-	REG_L sp,  TASK_THREAD_SP_RA(a4)
-	REG_L s0,  TASK_THREAD_S0_RA(a4)
-	REG_L s1,  TASK_THREAD_S1_RA(a4)
-	REG_L s2,  TASK_THREAD_S2_RA(a4)
-	REG_L s3,  TASK_THREAD_S3_RA(a4)
-	REG_L s4,  TASK_THREAD_S4_RA(a4)
-	REG_L s5,  TASK_THREAD_S5_RA(a4)
-	REG_L s6,  TASK_THREAD_S6_RA(a4)
-	REG_L s7,  TASK_THREAD_S7_RA(a4)
-	REG_L s8,  TASK_THREAD_S8_RA(a4)
-	REG_L s9,  TASK_THREAD_S9_RA(a4)
-	REG_L s10, TASK_THREAD_S10_RA(a4)
-	REG_L s11, TASK_THREAD_S11_RA(a4)
-	/* Swap the CPU entry around. */
-	lw a3, TASK_TI_CPU(a0)
-	lw a4, TASK_TI_CPU(a1)
-	sw a3, TASK_TI_CPU(a1)
-	sw a4, TASK_TI_CPU(a0)
-#if TASK_TI != 0
-#error "TASK_TI != 0: tp will contain a 'struct thread_info', not a 'struct task_struct' so get_current() won't work."
-	addi tp, a1, TASK_TI
-#else
-	move tp, a1
-#endif
-	ret
-ENDPROC(__switch_to)
-
-	.section ".rodata"
-	.align LGREG
-	/* Exception vector table */
-ENTRY(excp_vect_table)
-	RISCV_PTR do_trap_insn_misaligned
-	RISCV_PTR do_trap_insn_fault
-	RISCV_PTR do_trap_insn_illegal
-	RISCV_PTR do_trap_break
-	RISCV_PTR do_trap_load_misaligned
-	RISCV_PTR do_trap_load_fault
-	RISCV_PTR do_trap_store_misaligned
-	RISCV_PTR do_trap_store_fault
-	RISCV_PTR do_trap_ecall_u /* system call, gets intercepted */
-	RISCV_PTR do_trap_ecall_s
-	RISCV_PTR do_trap_unknown
-	RISCV_PTR do_trap_ecall_m
-	RISCV_PTR do_page_fault   /* instruction page fault */
-	RISCV_PTR do_page_fault   /* load page fault */
-	RISCV_PTR do_trap_unknown
-	RISCV_PTR do_page_fault   /* store page fault */
-excp_vect_table_end:
-END(excp_vect_table)
diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S
deleted file mode 100644
index 631d31540660e09977ddd8a728fd885d4a4205cb..0000000000000000000000000000000000000000
--- a/arch/riscv/kernel/fpu.S
+++ /dev/null
@@ -1,106 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2012 Regents of the University of California
- * Copyright (C) 2017 SiFive
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- */
-
-#include <linux/linkage.h>
-
-#include <asm/asm.h>
-#include <asm/csr.h>
-#include <asm/asm-offsets.h>
-
-ENTRY(__fstate_save)
-	li  a2,  TASK_THREAD_F0
-	add a0, a0, a2
-	li t1, SR_FS
-	csrs CSR_SSTATUS, t1
-	frcsr t0
-	fsd f0,  TASK_THREAD_F0_F0(a0)
-	fsd f1,  TASK_THREAD_F1_F0(a0)
-	fsd f2,  TASK_THREAD_F2_F0(a0)
-	fsd f3,  TASK_THREAD_F3_F0(a0)
-	fsd f4,  TASK_THREAD_F4_F0(a0)
-	fsd f5,  TASK_THREAD_F5_F0(a0)
-	fsd f6,  TASK_THREAD_F6_F0(a0)
-	fsd f7,  TASK_THREAD_F7_F0(a0)
-	fsd f8,  TASK_THREAD_F8_F0(a0)
-	fsd f9,  TASK_THREAD_F9_F0(a0)
-	fsd f10, TASK_THREAD_F10_F0(a0)
-	fsd f11, TASK_THREAD_F11_F0(a0)
-	fsd f12, TASK_THREAD_F12_F0(a0)
-	fsd f13, TASK_THREAD_F13_F0(a0)
-	fsd f14, TASK_THREAD_F14_F0(a0)
-	fsd f15, TASK_THREAD_F15_F0(a0)
-	fsd f16, TASK_THREAD_F16_F0(a0)
-	fsd f17, TASK_THREAD_F17_F0(a0)
-	fsd f18, TASK_THREAD_F18_F0(a0)
-	fsd f19, TASK_THREAD_F19_F0(a0)
-	fsd f20, TASK_THREAD_F20_F0(a0)
-	fsd f21, TASK_THREAD_F21_F0(a0)
-	fsd f22, TASK_THREAD_F22_F0(a0)
-	fsd f23, TASK_THREAD_F23_F0(a0)
-	fsd f24, TASK_THREAD_F24_F0(a0)
-	fsd f25, TASK_THREAD_F25_F0(a0)
-	fsd f26, TASK_THREAD_F26_F0(a0)
-	fsd f27, TASK_THREAD_F27_F0(a0)
-	fsd f28, TASK_THREAD_F28_F0(a0)
-	fsd f29, TASK_THREAD_F29_F0(a0)
-	fsd f30, TASK_THREAD_F30_F0(a0)
-	fsd f31, TASK_THREAD_F31_F0(a0)
-	sw t0, TASK_THREAD_FCSR_F0(a0)
-	csrc CSR_SSTATUS, t1
-	ret
-ENDPROC(__fstate_save)
-
-ENTRY(__fstate_restore)
-	li  a2,  TASK_THREAD_F0
-	add a0, a0, a2
-	li t1, SR_FS
-	lw t0, TASK_THREAD_FCSR_F0(a0)
-	csrs CSR_SSTATUS, t1
-	fld f0,  TASK_THREAD_F0_F0(a0)
-	fld f1,  TASK_THREAD_F1_F0(a0)
-	fld f2,  TASK_THREAD_F2_F0(a0)
-	fld f3,  TASK_THREAD_F3_F0(a0)
-	fld f4,  TASK_THREAD_F4_F0(a0)
-	fld f5,  TASK_THREAD_F5_F0(a0)
-	fld f6,  TASK_THREAD_F6_F0(a0)
-	fld f7,  TASK_THREAD_F7_F0(a0)
-	fld f8,  TASK_THREAD_F8_F0(a0)
-	fld f9,  TASK_THREAD_F9_F0(a0)
-	fld f10, TASK_THREAD_F10_F0(a0)
-	fld f11, TASK_THREAD_F11_F0(a0)
-	fld f12, TASK_THREAD_F12_F0(a0)
-	fld f13, TASK_THREAD_F13_F0(a0)
-	fld f14, TASK_THREAD_F14_F0(a0)
-	fld f15, TASK_THREAD_F15_F0(a0)
-	fld f16, TASK_THREAD_F16_F0(a0)
-	fld f17, TASK_THREAD_F17_F0(a0)
-	fld f18, TASK_THREAD_F18_F0(a0)
-	fld f19, TASK_THREAD_F19_F0(a0)
-	fld f20, TASK_THREAD_F20_F0(a0)
-	fld f21, TASK_THREAD_F21_F0(a0)
-	fld f22, TASK_THREAD_F22_F0(a0)
-	fld f23, TASK_THREAD_F23_F0(a0)
-	fld f24, TASK_THREAD_F24_F0(a0)
-	fld f25, TASK_THREAD_F25_F0(a0)
-	fld f26, TASK_THREAD_F26_F0(a0)
-	fld f27, TASK_THREAD_F27_F0(a0)
-	fld f28, TASK_THREAD_F28_F0(a0)
-	fld f29, TASK_THREAD_F29_F0(a0)
-	fld f30, TASK_THREAD_F30_F0(a0)
-	fld f31, TASK_THREAD_F31_F0(a0)
-	fscsr t0
-	csrc CSR_SSTATUS, t1
-	ret
-ENDPROC(__fstate_restore)
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
deleted file mode 100644
index 344793159b97ddb3d740d21ad0dbf21031b293b6..0000000000000000000000000000000000000000
--- a/arch/riscv/kernel/head.S
+++ /dev/null
@@ -1,205 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 Regents of the University of California
- */
-
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/asm.h>
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <asm/page.h>
-#include <asm/csr.h>
-#include <asm/image.h>
-
-__INIT
-ENTRY(_start)
-	/*
-	 * Image header expected by Linux boot-loaders. The image header data
-	 * structure is described in asm/image.h.
-	 * Do not modify it without modifying the structure and all bootloaders
-	 * that expects this header format!!
-	 */
-	/* jump to start kernel */
-	j _start_kernel
-	/* reserved */
-	.word 0
-	.balign 8
-#ifdef CONFIG_RISCV_M_MODE
-	/* Image load offset (0MB) from start of RAM for M-mode */
-	.dword 0
-#else
-#if __riscv_xlen == 64
-	/* Image load offset(2MB) from start of RAM */
-	.dword 0x200000
-#else
-	/* Image load offset(4MB) from start of RAM */
-	.dword 0x400000
-#endif
-#endif
-	/* Effective size of kernel image */
-	.dword _end - _start
-	.dword __HEAD_FLAGS
-	.word RISCV_HEADER_VERSION
-	.word 0
-	.dword 0
-	.ascii RISCV_IMAGE_MAGIC
-	.balign 4
-	.ascii RISCV_IMAGE_MAGIC2
-	.word 0
-
-.global _start_kernel
-_start_kernel:
-	/* Mask all interrupts */
-	csrw CSR_SIE, zero
-	csrw CSR_SIP, zero
-
-	/* Load the global pointer */
-.option push
-.option norelax
-	la gp, __global_pointer$
-.option pop
-
-	/*
-	 * Disable FPU to detect illegal usage of
-	 * floating point in kernel space
-	 */
-	li t0, SR_FS
-	csrc CSR_SSTATUS, t0
-
-#ifdef CONFIG_SMP
-	li t0, CONFIG_NR_CPUS
-	bgeu a0, t0, .Lsecondary_park
-#endif
-
-	/* Pick one hart to run the main boot sequence */
-	la a3, hart_lottery
-	li a2, 1
-	amoadd.w a3, a2, (a3)
-	bnez a3, .Lsecondary_start
-
-	/* Clear BSS for flat non-ELF images */
-	la a3, __bss_start
-	la a4, __bss_stop
-	ble a4, a3, clear_bss_done
-clear_bss:
-	REG_S zero, (a3)
-	add a3, a3, RISCV_SZPTR
-	blt a3, a4, clear_bss
-clear_bss_done:
-
-	/* Save hart ID and DTB physical address */
-	mv s0, a0
-	mv s1, a1
-	la a2, boot_cpu_hartid
-	REG_S a0, (a2)
-
-	/* Initialize page tables and relocate to virtual addresses */
-	la sp, init_thread_union + THREAD_SIZE
-	mv a0, s1
-	call setup_vm
-	la a0, early_pg_dir
-	call relocate
-
-	/* Restore C environment */
-	la tp, init_task
-	sw zero, TASK_TI_CPU(tp)
-	la sp, init_thread_union + THREAD_SIZE
-
-	/* Start the kernel */
-	call parse_dtb
-	tail start_kernel
-
-relocate:
-	/* Relocate return address */
-	li a1, PAGE_OFFSET
-	la a2, _start
-	sub a1, a1, a2
-	add ra, ra, a1
-
-	/* Point stvec to virtual address of intruction after satp write */
-	la a2, 1f
-	add a2, a2, a1
-	csrw CSR_STVEC, a2
-
-	/* Compute satp for kernel page tables, but don't load it yet */
-	srl a2, a0, PAGE_SHIFT
-	li a1, SATP_MODE
-	or a2, a2, a1
-
-	/*
-	 * Load trampoline page directory, which will cause us to trap to
-	 * stvec if VA != PA, or simply fall through if VA == PA.  We need a
-	 * full fence here because setup_vm() just wrote these PTEs and we need
-	 * to ensure the new translations are in use.
-	 */
-	la a0, trampoline_pg_dir
-	srl a0, a0, PAGE_SHIFT
-	or a0, a0, a1
-	sfence.vma
-	csrw CSR_SATP, a0
-.align 2
-1:
-	/* Set trap vector to spin forever to help debug */
-	la a0, .Lsecondary_park
-	csrw CSR_STVEC, a0
-
-	/* Reload the global pointer */
-.option push
-.option norelax
-	la gp, __global_pointer$
-.option pop
-
-	/*
-	 * Switch to kernel page tables.  A full fence is necessary in order to
-	 * avoid using the trampoline translations, which are only correct for
-	 * the first superpage.  Fetching the fence is guarnteed to work
-	 * because that first superpage is translated the same way.
-	 */
-	csrw CSR_SATP, a2
-	sfence.vma
-
-	ret
-
-.Lsecondary_start:
-#ifdef CONFIG_SMP
-	/* Set trap vector to spin forever to help debug */
-	la a3, .Lsecondary_park
-	csrw CSR_STVEC, a3
-
-	slli a3, a0, LGREG
-	la a1, __cpu_up_stack_pointer
-	la a2, __cpu_up_task_pointer
-	add a1, a3, a1
-	add a2, a3, a2
-
-	/*
-	 * This hart didn't win the lottery, so we wait for the winning hart to
-	 * get far enough along the boot process that it should continue.
-	 */
-.Lwait_for_cpu_up:
-	/* FIXME: We should WFI to save some energy here. */
-	REG_L sp, (a1)
-	REG_L tp, (a2)
-	beqz sp, .Lwait_for_cpu_up
-	beqz tp, .Lwait_for_cpu_up
-	fence
-
-	/* Enable virtual memory and relocate to virtual address */
-	la a0, swapper_pg_dir
-	call relocate
-
-	tail smp_callin
-#endif
-
-.align 2
-.Lsecondary_park:
-	/* We lack SMP support or have too many harts, so park this hart */
-	wfi
-	j .Lsecondary_park
-END(_start)
-
-__PAGE_ALIGNED_BSS
-	/* Empty zero page */
-	.balign PAGE_SIZE
diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
deleted file mode 100644
index 35a6ed76cb8b74e7806d211173fdb77dc25f9298..0000000000000000000000000000000000000000
--- a/arch/riscv/kernel/mcount-dyn.S
+++ /dev/null
@@ -1,239 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2017 Andes Technology Corporation */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/asm.h>
-#include <asm/csr.h>
-#include <asm/unistd.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm-generic/export.h>
-#include <asm/ftrace.h>
-
-	.text
-
-	.macro SAVE_ABI_STATE
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	addi    sp, sp, -48
-	sd      s0, 32(sp)
-	sd      ra, 40(sp)
-	addi    s0, sp, 48
-	sd      t0, 24(sp)
-	sd      t1, 16(sp)
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-	sd      t2, 8(sp)
-#endif
-#else
-	addi	sp, sp, -16
-	sd	s0, 0(sp)
-	sd	ra, 8(sp)
-	addi	s0, sp, 16
-#endif
-	.endm
-
-	.macro RESTORE_ABI_STATE
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	ld	s0, 32(sp)
-	ld	ra, 40(sp)
-	addi	sp, sp, 48
-#else
-	ld	ra, 8(sp)
-	ld	s0, 0(sp)
-	addi	sp, sp, 16
-#endif
-	.endm
-
-	.macro RESTORE_GRAPH_ARGS
-	ld	a0, 24(sp)
-	ld	a1, 16(sp)
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-	ld	a2, 8(sp)
-#endif
-	.endm
-
-ENTRY(ftrace_graph_caller)
-	addi	sp, sp, -16
-	sd	s0, 0(sp)
-	sd	ra, 8(sp)
-	addi	s0, sp, 16
-ftrace_graph_call:
-	.global ftrace_graph_call
-	/*
-	 * Calling ftrace_enable/disable_ftrace_graph_caller would overwrite the
-	 * call below.  Check ftrace_modify_all_code for details.
-	 */
-	call	ftrace_stub
-	ld	ra, 8(sp)
-	ld	s0, 0(sp)
-	addi	sp, sp, 16
-	ret
-ENDPROC(ftrace_graph_caller)
-
-ENTRY(ftrace_caller)
-	/*
-	 * a0: the address in the caller when calling ftrace_caller
-	 * a1: the caller's return address
-	 * a2: the address of global variable function_trace_op
-	 */
-	ld	a1, -8(s0)
-	addi	a0, ra, -MCOUNT_INSN_SIZE
-	la	t5, function_trace_op
-	ld	a2, 0(t5)
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	/*
-	 * the graph tracer (specifically, prepare_ftrace_return) needs these
-	 * arguments but for now the function tracer occupies the regs, so we
-	 * save them in temporary regs to recover later.
-	 */
-	addi	t0, s0, -8
-	mv	t1, a0
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-	ld	t2, -16(s0)
-#endif
-#endif
-
-	SAVE_ABI_STATE
-ftrace_call:
-	.global ftrace_call
-	/*
-	 * For the dynamic ftrace to work, here we should reserve at least
-	 * 8 bytes for a functional auipc-jalr pair.  The following call
-	 * serves this purpose.
-	 *
-	 * Calling ftrace_update_ftrace_func would overwrite the nops below.
-	 * Check ftrace_modify_all_code for details.
-	 */
-	call	ftrace_stub
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	RESTORE_GRAPH_ARGS
-	call	ftrace_graph_caller
-#endif
-
-	RESTORE_ABI_STATE
-	ret
-ENDPROC(ftrace_caller)
-
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-	.macro SAVE_ALL
-	addi	sp, sp, -(PT_SIZE_ON_STACK+16)
-	sd	s0, (PT_SIZE_ON_STACK)(sp)
-	sd	ra, (PT_SIZE_ON_STACK+8)(sp)
-	addi	s0, sp, (PT_SIZE_ON_STACK+16)
-
-	sd x1,  PT_RA(sp)
-	sd x2,  PT_SP(sp)
-	sd x3,  PT_GP(sp)
-	sd x4,  PT_TP(sp)
-	sd x5,  PT_T0(sp)
-	sd x6,  PT_T1(sp)
-	sd x7,  PT_T2(sp)
-	sd x8,  PT_S0(sp)
-	sd x9,  PT_S1(sp)
-	sd x10, PT_A0(sp)
-	sd x11, PT_A1(sp)
-	sd x12, PT_A2(sp)
-	sd x13, PT_A3(sp)
-	sd x14, PT_A4(sp)
-	sd x15, PT_A5(sp)
-	sd x16, PT_A6(sp)
-	sd x17, PT_A7(sp)
-	sd x18, PT_S2(sp)
-	sd x19, PT_S3(sp)
-	sd x20, PT_S4(sp)
-	sd x21, PT_S5(sp)
-	sd x22, PT_S6(sp)
-	sd x23, PT_S7(sp)
-	sd x24, PT_S8(sp)
-	sd x25, PT_S9(sp)
-	sd x26, PT_S10(sp)
-	sd x27, PT_S11(sp)
-	sd x28, PT_T3(sp)
-	sd x29, PT_T4(sp)
-	sd x30, PT_T5(sp)
-	sd x31, PT_T6(sp)
-	.endm
-
-	.macro RESTORE_ALL
-	ld x1,  PT_RA(sp)
-	ld x2,  PT_SP(sp)
-	ld x3,  PT_GP(sp)
-	ld x4,  PT_TP(sp)
-	ld x5,  PT_T0(sp)
-	ld x6,  PT_T1(sp)
-	ld x7,  PT_T2(sp)
-	ld x8,  PT_S0(sp)
-	ld x9,  PT_S1(sp)
-	ld x10, PT_A0(sp)
-	ld x11, PT_A1(sp)
-	ld x12, PT_A2(sp)
-	ld x13, PT_A3(sp)
-	ld x14, PT_A4(sp)
-	ld x15, PT_A5(sp)
-	ld x16, PT_A6(sp)
-	ld x17, PT_A7(sp)
-	ld x18, PT_S2(sp)
-	ld x19, PT_S3(sp)
-	ld x20, PT_S4(sp)
-	ld x21, PT_S5(sp)
-	ld x22, PT_S6(sp)
-	ld x23, PT_S7(sp)
-	ld x24, PT_S8(sp)
-	ld x25, PT_S9(sp)
-	ld x26, PT_S10(sp)
-	ld x27, PT_S11(sp)
-	ld x28, PT_T3(sp)
-	ld x29, PT_T4(sp)
-	ld x30, PT_T5(sp)
-	ld x31, PT_T6(sp)
-
-	ld	s0, (PT_SIZE_ON_STACK)(sp)
-	ld	ra, (PT_SIZE_ON_STACK+8)(sp)
-	addi	sp, sp, (PT_SIZE_ON_STACK+16)
-	.endm
-
-	.macro RESTORE_GRAPH_REG_ARGS
-	ld	a0, PT_T0(sp)
-	ld	a1, PT_T1(sp)
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-	ld	a2, PT_T2(sp)
-#endif
-	.endm
-
-/*
- * Most of the contents are the same as ftrace_caller.
- */
-ENTRY(ftrace_regs_caller)
-	/*
-	 * a3: the address of all registers in the stack
-	 */
-	ld	a1, -8(s0)
-	addi	a0, ra, -MCOUNT_INSN_SIZE
-	la	t5, function_trace_op
-	ld	a2, 0(t5)
-	addi	a3, sp, -(PT_SIZE_ON_STACK+16)
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	addi	t0, s0, -8
-	mv	t1, a0
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-	ld	t2, -16(s0)
-#endif
-#endif
-	SAVE_ALL
-
-ftrace_regs_call:
-	.global ftrace_regs_call
-	call	ftrace_stub
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	RESTORE_GRAPH_REG_ARGS
-	call	ftrace_graph_caller
-#endif
-
-	RESTORE_ALL
-	ret
-ENDPROC(ftrace_regs_caller)
-#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S
deleted file mode 100644
index 8a5593ff9ff3da5c3ca80e14e8e9f87dc2e1211d..0000000000000000000000000000000000000000
--- a/arch/riscv/kernel/mcount.S
+++ /dev/null
@@ -1,129 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2017 Andes Technology Corporation */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/asm.h>
-#include <asm/csr.h>
-#include <asm/unistd.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm-generic/export.h>
-#include <asm/ftrace.h>
-
-	.text
-
-	.macro SAVE_ABI_STATE
-	addi	sp, sp, -16
-	sd	s0, 0(sp)
-	sd	ra, 8(sp)
-	addi	s0, sp, 16
-	.endm
-
-	/*
-	 * The call to ftrace_return_to_handler would overwrite the return
-	 * register if a0 was not saved.
-	 */
-	.macro SAVE_RET_ABI_STATE
-	addi	sp, sp, -32
-	sd	s0, 16(sp)
-	sd	ra, 24(sp)
-	sd	a0, 8(sp)
-	addi	s0, sp, 32
-	.endm
-
-	.macro RESTORE_ABI_STATE
-	ld	ra, 8(sp)
-	ld	s0, 0(sp)
-	addi	sp, sp, 16
-	.endm
-
-	.macro RESTORE_RET_ABI_STATE
-	ld	ra, 24(sp)
-	ld	s0, 16(sp)
-	ld	a0, 8(sp)
-	addi	sp, sp, 32
-	.endm
-
-ENTRY(ftrace_stub)
-#ifdef CONFIG_DYNAMIC_FTRACE
-       .global _mcount
-       .set    _mcount, ftrace_stub
-#endif
-	ret
-ENDPROC(ftrace_stub)
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(return_to_handler)
-/*
- * On implementing the frame point test, the ideal way is to compare the
- * s0 (frame pointer, if enabled) on entry and the sp (stack pointer) on return.
- * However, the psABI of variable-length-argument functions does not allow this.
- *
- * So alternatively we check the *old* frame pointer position, that is, the
- * value stored in -16(s0) on entry, and the s0 on return.
- */
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-	mv	t6, s0
-#endif
-	SAVE_RET_ABI_STATE
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-	mv	a0, t6
-#endif
-	call	ftrace_return_to_handler
-	mv	a1, a0
-	RESTORE_RET_ABI_STATE
-	jalr	a1
-ENDPROC(return_to_handler)
-#endif
-
-#ifndef CONFIG_DYNAMIC_FTRACE
-ENTRY(_mcount)
-	la	t4, ftrace_stub
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	la	t0, ftrace_graph_return
-	ld	t1, 0(t0)
-	bne	t1, t4, do_ftrace_graph_caller
-
-	la	t3, ftrace_graph_entry
-	ld	t2, 0(t3)
-	la	t6, ftrace_graph_entry_stub
-	bne	t2, t6, do_ftrace_graph_caller
-#endif
-	la	t3, ftrace_trace_function
-	ld	t5, 0(t3)
-	bne	t5, t4, do_trace
-	ret
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-/*
- * A pseudo representation for the function graph tracer:
- * prepare_to_return(&ra_to_caller_of_caller, ra_to_caller)
- */
-do_ftrace_graph_caller:
-	addi	a0, s0, -8
-	mv	a1, ra
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-	ld	a2, -16(s0)
-#endif
-	SAVE_ABI_STATE
-	call	prepare_ftrace_return
-	RESTORE_ABI_STATE
-	ret
-#endif
-
-/*
- * A pseudo representation for the function tracer:
- * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller)
- */
-do_trace:
-	ld	a1, -8(s0)
-	mv	a0, ra
-
-	SAVE_ABI_STATE
-	jalr	t5
-	RESTORE_ABI_STATE
-	ret
-ENDPROC(_mcount)
-#endif
-EXPORT_SYMBOL(_mcount)
diff --git a/arch/riscv/kernel/vdso/clock_getres.S b/arch/riscv/kernel/vdso/clock_getres.S
deleted file mode 100644
index 91378a52eb2258fa45b5e3202359e7fa12555e88..0000000000000000000000000000000000000000
--- a/arch/riscv/kernel/vdso/clock_getres.S
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2017 SiFive
- */
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-
-	.text
-/* int __vdso_clock_getres(clockid_t clock_id, struct timespec *res); */
-ENTRY(__vdso_clock_getres)
-	.cfi_startproc
-	/* For now, just do the syscall. */
-	li a7, __NR_clock_getres
-	ecall
-	ret
-	.cfi_endproc
-ENDPROC(__vdso_clock_getres)
diff --git a/arch/riscv/kernel/vdso/clock_gettime.S b/arch/riscv/kernel/vdso/clock_gettime.S
deleted file mode 100644
index 5371fd9bc01fe06b98706ac03c01ff04e1ee7ed0..0000000000000000000000000000000000000000
--- a/arch/riscv/kernel/vdso/clock_gettime.S
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2017 SiFive
- */
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-
-	.text
-/* int __vdso_clock_gettime(clockid_t clock_id, struct timespec *tp); */
-ENTRY(__vdso_clock_gettime)
-	.cfi_startproc
-	/* For now, just do the syscall. */
-	li a7, __NR_clock_gettime
-	ecall
-	ret
-	.cfi_endproc
-ENDPROC(__vdso_clock_gettime)
diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S
deleted file mode 100644
index 82f97d67c23e9bdde94b0d2f655f52d32c8fd6d1..0000000000000000000000000000000000000000
--- a/arch/riscv/kernel/vdso/flush_icache.S
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2017 SiFive
- */
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-
-	.text
-/* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */
-ENTRY(__vdso_flush_icache)
-	.cfi_startproc
-#ifdef CONFIG_SMP
-	li a7, __NR_riscv_flush_icache
-	ecall
-#else
-	fence.i
-	li a0, 0
-#endif
-	ret
-	.cfi_endproc
-ENDPROC(__vdso_flush_icache)
diff --git a/arch/riscv/kernel/vdso/getcpu.S b/arch/riscv/kernel/vdso/getcpu.S
deleted file mode 100644
index bb0c05e2ffbae3d6aa3609fc5b5de84630aaa37d..0000000000000000000000000000000000000000
--- a/arch/riscv/kernel/vdso/getcpu.S
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2017 SiFive
- */
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-
-	.text
-/* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */
-ENTRY(__vdso_getcpu)
-	.cfi_startproc
-	/* For now, just do the syscall. */
-	li a7, __NR_getcpu
-	ecall
-	ret
-	.cfi_endproc
-ENDPROC(__vdso_getcpu)
diff --git a/arch/riscv/kernel/vdso/gettimeofday.S b/arch/riscv/kernel/vdso/gettimeofday.S
deleted file mode 100644
index e6fb8af88632b8392b03619bfe473da2e084271a..0000000000000000000000000000000000000000
--- a/arch/riscv/kernel/vdso/gettimeofday.S
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2017 SiFive
- */
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-
-	.text
-/* int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); */
-ENTRY(__vdso_gettimeofday)
-	.cfi_startproc
-	/* For now, just do the syscall. */
-	li a7, __NR_gettimeofday
-	ecall
-	ret
-	.cfi_endproc
-ENDPROC(__vdso_gettimeofday)
diff --git a/arch/riscv/kernel/vdso/rt_sigreturn.S b/arch/riscv/kernel/vdso/rt_sigreturn.S
deleted file mode 100644
index 0573705eac76bc7b5f2698e027b826454a7e84fa..0000000000000000000000000000000000000000
--- a/arch/riscv/kernel/vdso/rt_sigreturn.S
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2014 Regents of the University of California
- */
-
-#include <linux/linkage.h>
-#include <asm/unistd.h>
-
-	.text
-ENTRY(__vdso_rt_sigreturn)
-	.cfi_startproc
-	.cfi_signal_frame
-	li a7, __NR_rt_sigreturn
-	scall
-	.cfi_endproc
-ENDPROC(__vdso_rt_sigreturn)
diff --git a/arch/riscv/kernel/vdso/vdso.S b/arch/riscv/kernel/vdso/vdso.S
deleted file mode 100644
index df222245be0572ede4a2f6f2a6983685ed9bbf32..0000000000000000000000000000000000000000
--- a/arch/riscv/kernel/vdso/vdso.S
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2014 Regents of the University of California
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/page.h>
-
-	__PAGE_ALIGNED_DATA
-
-	.globl vdso_start, vdso_end
-	.balign PAGE_SIZE
-vdso_start:
-	.incbin "arch/riscv/kernel/vdso/vdso.so"
-	.balign PAGE_SIZE
-vdso_end:
-
-	.previous
diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
deleted file mode 100644
index f66a091cb8909452a20d466a2f5316e764556d1e..0000000000000000000000000000000000000000
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ /dev/null
@@ -1,72 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 Regents of the University of California
- */
-
-OUTPUT_ARCH(riscv)
-
-SECTIONS
-{
-	. = SIZEOF_HEADERS;
-
-	.hash		: { *(.hash) }			:text
-	.gnu.hash	: { *(.gnu.hash) }
-	.dynsym		: { *(.dynsym) }
-	.dynstr		: { *(.dynstr) }
-	.gnu.version	: { *(.gnu.version) }
-	.gnu.version_d	: { *(.gnu.version_d) }
-	.gnu.version_r	: { *(.gnu.version_r) }
-
-	.note		: { *(.note.*) }		:text	:note
-	.dynamic	: { *(.dynamic) }		:text	:dynamic
-
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-
-	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
-
-	/*
-	 * This linker script is used both with -r and with -shared.
-	 * For the layouts to match, we need to skip more than enough
-	 * space for the dynamic symbol table, etc. If this amount is
-	 * insufficient, ld -shared will error; simply increase it here.
-	 */
-	. = 0x800;
-	.text		: { *(.text .text.*) }		:text
-
-	.data		: {
-		*(.got.plt) *(.got)
-		*(.data .data.* .gnu.linkonce.d.*)
-		*(.dynbss)
-		*(.bss .bss.* .gnu.linkonce.b.*)
-	}
-}
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
-	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
-	note		PT_NOTE		FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-	LINUX_4.15 {
-	global:
-		__vdso_rt_sigreturn;
-		__vdso_gettimeofday;
-		__vdso_clock_gettime;
-		__vdso_clock_getres;
-		__vdso_getcpu;
-		__vdso_flush_icache;
-	local: *;
-	};
-}
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
deleted file mode 100644
index 23cd1a9e52a10dec19820d0f8f3e6df4121a4229..0000000000000000000000000000000000000000
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,84 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 Regents of the University of California
- * Copyright (C) 2017 SiFive
- */
-
-#define LOAD_OFFSET PAGE_OFFSET
-#include <asm/vmlinux.lds.h>
-#include <asm/page.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-
-OUTPUT_ARCH(riscv)
-ENTRY(_start)
-
-jiffies = jiffies_64;
-
-SECTIONS
-{
-	/* Beginning of code and text segment */
-	. = LOAD_OFFSET;
-	_start = .;
-	__init_begin = .;
-	HEAD_TEXT_SECTION
-	INIT_TEXT_SECTION(PAGE_SIZE)
-	INIT_DATA_SECTION(16)
-	/* we have to discard exit text and such at runtime, not link time */
-	.exit.text :
-	{
-		EXIT_TEXT
-	}
-	.exit.data :
-	{
-		EXIT_DATA
-	}
-	PERCPU_SECTION(L1_CACHE_BYTES)
-	__init_end = .;
-
-	.text : {
-		_text = .;
-		_stext = .;
-		TEXT_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		ENTRY_TEXT
-		IRQENTRY_TEXT
-		*(.fixup)
-		_etext = .;
-	}
-
-	/* Start of data section */
-	_sdata = .;
-	RO_DATA_SECTION(L1_CACHE_BYTES)
-	.srodata : {
-		*(.srodata*)
-	}
-
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
-	.sdata : {
-		__global_pointer$ = . + 0x800;
-		*(.sdata*)
-		/* End of data section */
-		_edata = .;
-		*(.sbss*)
-	}
-
-	BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
-
-	EXCEPTION_TABLE(0x10)
-	NOTES
-
-	.rel.dyn : {
-		*(.rel.dyn*)
-	}
-
-	_end = .;
-
-	STABS_DEBUG
-	DWARF_DEBUG
-
-	DISCARDS
-}
diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S
deleted file mode 100644
index b4c477846e913001496fbc21661f965d15440756..0000000000000000000000000000000000000000
--- a/arch/riscv/lib/memcpy.S
+++ /dev/null
@@ -1,107 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 Regents of the University of California
- */
-
-#include <linux/linkage.h>
-#include <asm/asm.h>
-
-/* void *memcpy(void *, const void *, size_t) */
-ENTRY(memcpy)
-	move t6, a0  /* Preserve return value */
-
-	/* Defer to byte-oriented copy for small sizes */
-	sltiu a3, a2, 128
-	bnez a3, 4f
-	/* Use word-oriented copy only if low-order bits match */
-	andi a3, t6, SZREG-1
-	andi a4, a1, SZREG-1
-	bne a3, a4, 4f
-
-	beqz a3, 2f  /* Skip if already aligned */
-	/*
-	 * Round to nearest double word-aligned address
-	 * greater than or equal to start address
-	 */
-	andi a3, a1, ~(SZREG-1)
-	addi a3, a3, SZREG
-	/* Handle initial misalignment */
-	sub a4, a3, a1
-1:
-	lb a5, 0(a1)
-	addi a1, a1, 1
-	sb a5, 0(t6)
-	addi t6, t6, 1
-	bltu a1, a3, 1b
-	sub a2, a2, a4  /* Update count */
-
-2:
-	andi a4, a2, ~((16*SZREG)-1)
-	beqz a4, 4f
-	add a3, a1, a4
-3:
-	REG_L a4,       0(a1)
-	REG_L a5,   SZREG(a1)
-	REG_L a6, 2*SZREG(a1)
-	REG_L a7, 3*SZREG(a1)
-	REG_L t0, 4*SZREG(a1)
-	REG_L t1, 5*SZREG(a1)
-	REG_L t2, 6*SZREG(a1)
-	REG_L t3, 7*SZREG(a1)
-	REG_L t4, 8*SZREG(a1)
-	REG_L t5, 9*SZREG(a1)
-	REG_S a4,       0(t6)
-	REG_S a5,   SZREG(t6)
-	REG_S a6, 2*SZREG(t6)
-	REG_S a7, 3*SZREG(t6)
-	REG_S t0, 4*SZREG(t6)
-	REG_S t1, 5*SZREG(t6)
-	REG_S t2, 6*SZREG(t6)
-	REG_S t3, 7*SZREG(t6)
-	REG_S t4, 8*SZREG(t6)
-	REG_S t5, 9*SZREG(t6)
-	REG_L a4, 10*SZREG(a1)
-	REG_L a5, 11*SZREG(a1)
-	REG_L a6, 12*SZREG(a1)
-	REG_L a7, 13*SZREG(a1)
-	REG_L t0, 14*SZREG(a1)
-	REG_L t1, 15*SZREG(a1)
-	addi a1, a1, 16*SZREG
-	REG_S a4, 10*SZREG(t6)
-	REG_S a5, 11*SZREG(t6)
-	REG_S a6, 12*SZREG(t6)
-	REG_S a7, 13*SZREG(t6)
-	REG_S t0, 14*SZREG(t6)
-	REG_S t1, 15*SZREG(t6)
-	addi t6, t6, 16*SZREG
-	bltu a1, a3, 3b
-	andi a2, a2, (16*SZREG)-1  /* Update count */
-
-4:
-	/* Handle trailing misalignment */
-	beqz a2, 6f
-	add a3, a1, a2
-
-	/* Use word-oriented copy if co-aligned to word boundary */
-	or a5, a1, t6
-	or a5, a5, a3
-	andi a5, a5, 3
-	bnez a5, 5f
-7:
-	lw a4, 0(a1)
-	addi a1, a1, 4
-	sw a4, 0(t6)
-	addi t6, t6, 4
-	bltu a1, a3, 7b
-
-	ret
-
-5:
-	lb a4, 0(a1)
-	addi a1, a1, 1
-	sb a4, 0(t6)
-	addi t6, t6, 1
-	bltu a1, a3, 5b
-6:
-	ret
-END(memcpy)
diff --git a/arch/riscv/lib/memset.S b/arch/riscv/lib/memset.S
deleted file mode 100644
index 5a7386b47175046c58b1d670ef76b8af87088ad8..0000000000000000000000000000000000000000
--- a/arch/riscv/lib/memset.S
+++ /dev/null
@@ -1,112 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 Regents of the University of California
- */
-
-
-#include <linux/linkage.h>
-#include <asm/asm.h>
-
-/* void *memset(void *, int, size_t) */
-ENTRY(memset)
-	move t0, a0  /* Preserve return value */
-
-	/* Defer to byte-oriented fill for small sizes */
-	sltiu a3, a2, 16
-	bnez a3, 4f
-
-	/*
-	 * Round to nearest XLEN-aligned address
-	 * greater than or equal to start address
-	 */
-	addi a3, t0, SZREG-1
-	andi a3, a3, ~(SZREG-1)
-	beq a3, t0, 2f  /* Skip if already aligned */
-	/* Handle initial misalignment */
-	sub a4, a3, t0
-1:
-	sb a1, 0(t0)
-	addi t0, t0, 1
-	bltu t0, a3, 1b
-	sub a2, a2, a4  /* Update count */
-
-2: /* Duff's device with 32 XLEN stores per iteration */
-	/* Broadcast value into all bytes */
-	andi a1, a1, 0xff
-	slli a3, a1, 8
-	or a1, a3, a1
-	slli a3, a1, 16
-	or a1, a3, a1
-#ifdef CONFIG_64BIT
-	slli a3, a1, 32
-	or a1, a3, a1
-#endif
-
-	/* Calculate end address */
-	andi a4, a2, ~(SZREG-1)
-	add a3, t0, a4
-
-	andi a4, a4, 31*SZREG  /* Calculate remainder */
-	beqz a4, 3f            /* Shortcut if no remainder */
-	neg a4, a4
-	addi a4, a4, 32*SZREG  /* Calculate initial offset */
-
-	/* Adjust start address with offset */
-	sub t0, t0, a4
-
-	/* Jump into loop body */
-	/* Assumes 32-bit instruction lengths */
-	la a5, 3f
-#ifdef CONFIG_64BIT
-	srli a4, a4, 1
-#endif
-	add a5, a5, a4
-	jr a5
-3:
-	REG_S a1,        0(t0)
-	REG_S a1,    SZREG(t0)
-	REG_S a1,  2*SZREG(t0)
-	REG_S a1,  3*SZREG(t0)
-	REG_S a1,  4*SZREG(t0)
-	REG_S a1,  5*SZREG(t0)
-	REG_S a1,  6*SZREG(t0)
-	REG_S a1,  7*SZREG(t0)
-	REG_S a1,  8*SZREG(t0)
-	REG_S a1,  9*SZREG(t0)
-	REG_S a1, 10*SZREG(t0)
-	REG_S a1, 11*SZREG(t0)
-	REG_S a1, 12*SZREG(t0)
-	REG_S a1, 13*SZREG(t0)
-	REG_S a1, 14*SZREG(t0)
-	REG_S a1, 15*SZREG(t0)
-	REG_S a1, 16*SZREG(t0)
-	REG_S a1, 17*SZREG(t0)
-	REG_S a1, 18*SZREG(t0)
-	REG_S a1, 19*SZREG(t0)
-	REG_S a1, 20*SZREG(t0)
-	REG_S a1, 21*SZREG(t0)
-	REG_S a1, 22*SZREG(t0)
-	REG_S a1, 23*SZREG(t0)
-	REG_S a1, 24*SZREG(t0)
-	REG_S a1, 25*SZREG(t0)
-	REG_S a1, 26*SZREG(t0)
-	REG_S a1, 27*SZREG(t0)
-	REG_S a1, 28*SZREG(t0)
-	REG_S a1, 29*SZREG(t0)
-	REG_S a1, 30*SZREG(t0)
-	REG_S a1, 31*SZREG(t0)
-	addi t0, t0, 32*SZREG
-	bltu t0, a3, 3b
-	andi a2, a2, SZREG-1  /* Update count */
-
-4:
-	/* Handle trailing misalignment */
-	beqz a2, 6f
-	add a3, t0, a2
-5:
-	sb a1, 0(t0)
-	addi t0, t0, 1
-	bltu t0, a3, 5b
-6:
-	ret
-END(memset)
diff --git a/arch/riscv/lib/tishift.S b/arch/riscv/lib/tishift.S
deleted file mode 100644
index 15f9d54c7db63859de60f86ced505165aa7a84cb..0000000000000000000000000000000000000000
--- a/arch/riscv/lib/tishift.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2018 Free Software Foundation, Inc.
- */
-
-#include <linux/linkage.h>
-
-ENTRY(__lshrti3)
-	beqz	a2, .L1
-	li	a5,64
-	sub	a5,a5,a2
-	addi	sp,sp,-16
-	sext.w	a4,a5
-	blez	a5, .L2
-	sext.w	a2,a2
-	sll	a4,a1,a4
-	srl	a0,a0,a2
-	srl	a1,a1,a2
-	or	a0,a0,a4
-	sd	a1,8(sp)
-	sd	a0,0(sp)
-	ld	a0,0(sp)
-	ld	a1,8(sp)
-	addi	sp,sp,16
-	ret
-.L1:
-	ret
-.L2:
-	negw	a4,a4
-	srl	a1,a1,a4
-	sd	a1,0(sp)
-	sd	zero,8(sp)
-	ld	a0,0(sp)
-	ld	a1,8(sp)
-	addi	sp,sp,16
-	ret
-ENDPROC(__lshrti3)
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
deleted file mode 100644
index ed2696c0143d5aad57af20020f4515f50a8783eb..0000000000000000000000000000000000000000
--- a/arch/riscv/lib/uaccess.S
+++ /dev/null
@@ -1,124 +0,0 @@
-#include <linux/linkage.h>
-#include <asm/asm.h>
-#include <asm/csr.h>
-
-	.altmacro
-	.macro fixup op reg addr lbl
-	LOCAL _epc
-_epc:
-	\op \reg, \addr
-	.section __ex_table,"a"
-	.balign RISCV_SZPTR
-	RISCV_PTR _epc, \lbl
-	.previous
-	.endm
-
-ENTRY(__asm_copy_to_user)
-ENTRY(__asm_copy_from_user)
-
-	/* Enable access to user memory */
-	li t6, SR_SUM
-	csrs CSR_SSTATUS, t6
-
-	add a3, a1, a2
-	/* Use word-oriented copy only if low-order bits match */
-	andi t0, a0, SZREG-1
-	andi t1, a1, SZREG-1
-	bne t0, t1, 2f
-
-	addi t0, a1, SZREG-1
-	andi t1, a3, ~(SZREG-1)
-	andi t0, t0, ~(SZREG-1)
-	/*
-	 * a3: terminal address of source region
-	 * t0: lowest XLEN-aligned address in source
-	 * t1: highest XLEN-aligned address in source
-	 */
-	bgeu t0, t1, 2f
-	bltu a1, t0, 4f
-1:
-	fixup REG_L, t2, (a1), 10f
-	fixup REG_S, t2, (a0), 10f
-	addi a1, a1, SZREG
-	addi a0, a0, SZREG
-	bltu a1, t1, 1b
-2:
-	bltu a1, a3, 5f
-
-3:
-	/* Disable access to user memory */
-	csrc CSR_SSTATUS, t6
-	li a0, 0
-	ret
-4: /* Edge case: unalignment */
-	fixup lbu, t2, (a1), 10f
-	fixup sb, t2, (a0), 10f
-	addi a1, a1, 1
-	addi a0, a0, 1
-	bltu a1, t0, 4b
-	j 1b
-5: /* Edge case: remainder */
-	fixup lbu, t2, (a1), 10f
-	fixup sb, t2, (a0), 10f
-	addi a1, a1, 1
-	addi a0, a0, 1
-	bltu a1, a3, 5b
-	j 3b
-ENDPROC(__asm_copy_to_user)
-ENDPROC(__asm_copy_from_user)
-
-
-ENTRY(__clear_user)
-
-	/* Enable access to user memory */
-	li t6, SR_SUM
-	csrs CSR_SSTATUS, t6
-
-	add a3, a0, a1
-	addi t0, a0, SZREG-1
-	andi t1, a3, ~(SZREG-1)
-	andi t0, t0, ~(SZREG-1)
-	/*
-	 * a3: terminal address of target region
-	 * t0: lowest doubleword-aligned address in target region
-	 * t1: highest doubleword-aligned address in target region
-	 */
-	bgeu t0, t1, 2f
-	bltu a0, t0, 4f
-1:
-	fixup REG_S, zero, (a0), 11f
-	addi a0, a0, SZREG
-	bltu a0, t1, 1b
-2:
-	bltu a0, a3, 5f
-
-3:
-	/* Disable access to user memory */
-	csrc CSR_SSTATUS, t6
-	li a0, 0
-	ret
-4: /* Edge case: unalignment */
-	fixup sb, zero, (a0), 11f
-	addi a0, a0, 1
-	bltu a0, t0, 4b
-	j 1b
-5: /* Edge case: remainder */
-	fixup sb, zero, (a0), 11f
-	addi a0, a0, 1
-	bltu a0, a3, 5b
-	j 3b
-ENDPROC(__clear_user)
-
-	.section .fixup,"ax"
-	.balign 4
-	/* Fixup code for __copy_user(10) and __clear_user(11) */
-10:
-	/* Disable access to user memory */
-	csrs CSR_SSTATUS, t6
-	mv a0, a2
-	ret
-11:
-	csrs CSR_SSTATUS, t6
-	mv a0, a1
-	ret
-	.previous
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
deleted file mode 100644
index 44561b2c3712452bf3dcc50736cd88fb3cc081ee..0000000000000000000000000000000000000000
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/vmlinux.lds.h>
-
-OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
-OUTPUT_ARCH(s390:64-bit)
-
-ENTRY(startup)
-
-SECTIONS
-{
-	. = 0;
-	.head.text : {
-		_head = . ;
-		HEAD_TEXT
-		_ehead = . ;
-	}
-	.text :	{
-		_text = .;	/* Text */
-		*(.text)
-		*(.text.*)
-		_etext = . ;
-	}
-	.rodata : {
-		_rodata = . ;
-		*(.rodata)	 /* read-only data */
-		*(.rodata.*)
-		_erodata = . ;
-	}
-	.data :	{
-		_data = . ;
-		*(.data)
-		*(.data.*)
-		_edata = . ;
-	}
-	/*
-	* .dma section for code, data, ex_table that need to stay below 2 GB,
-	* even when the kernel is relocate: above 2 GB.
-	*/
-	. = ALIGN(PAGE_SIZE);
-	_sdma = .;
-	.dma.text : {
-		_stext_dma = .;
-		*(.dma.text)
-		. = ALIGN(PAGE_SIZE);
-		_etext_dma = .;
-	}
-	. = ALIGN(16);
-	.dma.ex_table : {
-		_start_dma_ex_table = .;
-		KEEP(*(.dma.ex_table))
-		_stop_dma_ex_table = .;
-	}
-	.dma.data : { *(.dma.data) }
-	. = ALIGN(PAGE_SIZE);
-	_edma = .;
-
-	BOOT_DATA
-	BOOT_DATA_PRESERVED
-
-	/*
-	 * uncompressed image info used by the decompressor it should match
-	 * struct vmlinux_info. It comes from .vmlinux.info section of
-	 * uncompressed vmlinux in a form of info.o
-	 */
-	. = ALIGN(8);
-	.vmlinux.info : {
-		_vmlinux_info = .;
-		*(.vmlinux.info)
-	}
-
-#ifdef CONFIG_KERNEL_UNCOMPRESSED
-	. = 0x100000;
-#else
-	. = ALIGN(8);
-#endif
-	.rodata.compressed : {
-		_compressed_start = .;
-		*(.vmlinux.bin.compressed)
-		_compressed_end = .;
-		FILL(0xff);
-		. = ALIGN(4096);
-	}
-	. = ALIGN(256);
-	.bss : {
-		_bss = . ;
-		*(.bss)
-		*(.bss.*)
-		*(COMMON)
-		. = ALIGN(8);	/* For convenience during zeroing */
-		_ebss = .;
-	}
-	_end = .;
-
-	/* Sections to be discarded */
-	/DISCARD/ : {
-		*(.eh_frame)
-		*(__ex_table)
-		*(*__ksymtab*)
-		*(___kcrctab*)
-	}
-}
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
deleted file mode 100644
index e6bf5f40bff34b8dc4c63b477ac82d95039ab242..0000000000000000000000000000000000000000
--- a/arch/s390/boot/head.S
+++ /dev/null
@@ -1,404 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright IBM Corp. 1999, 2010
- *
- *    Author(s): Hartmut Penner <hp@de.ibm.com>
- *		 Martin Schwidefsky <schwidefsky@de.ibm.com>
- *		 Rob van der Heij <rvdhei@iae.nl>
- *		 Heiko Carstens <heiko.carstens@de.ibm.com>
- *
- * There are 5 different IPL methods
- *  1) load the image directly into ram at address 0 and do an PSW restart
- *  2) linload will load the image from address 0x10000 to memory 0x10000
- *     and start the code thru LPSW 0x0008000080010000 (VM only, deprecated)
- *  3) generate the tape ipl header, store the generated image on a tape
- *     and ipl from it
- *     In case of SL tape you need to IPL 5 times to get past VOL1 etc
- *  4) generate the vm reader ipl header, move the generated image to the
- *     VM reader (use option NOH!) and do a ipl from reader (VM only)
- *  5) direct call of start by the SALIPL loader
- *  We use the cpuid to distinguish between VM and native ipl
- *  params for kernel are pushed to 0x10400 (see setup.h)
- *
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-
-#define ARCH_OFFSET	4
-
-__HEAD
-
-#define IPL_BS	0x730
-	.org	0
-	.long	0x00080000,0x80000000+iplstart	# The first 24 bytes are loaded
-	.long	0x02000018,0x60000050		# by ipl to addresses 0-23.
-	.long	0x02000068,0x60000050		# (a PSW and two CCWs).
-	.fill	80-24,1,0x40			# bytes 24-79 are discarded !!
-	.long	0x020000f0,0x60000050		# The next 160 byte are loaded
-	.long	0x02000140,0x60000050		# to addresses 0x18-0xb7
-	.long	0x02000190,0x60000050		# They form the continuation
-	.long	0x020001e0,0x60000050		# of the CCW program started
-	.long	0x02000230,0x60000050		# by ipl and load the range
-	.long	0x02000280,0x60000050		# 0x0f0-0x730 from the image
-	.long	0x020002d0,0x60000050		# to the range 0x0f0-0x730
-	.long	0x02000320,0x60000050		# in memory. At the end of
-	.long	0x02000370,0x60000050		# the channel program the PSW
-	.long	0x020003c0,0x60000050		# at location 0 is loaded.
-	.long	0x02000410,0x60000050		# Initial processing starts
-	.long	0x02000460,0x60000050		# at 0x200 = iplstart.
-	.long	0x020004b0,0x60000050
-	.long	0x02000500,0x60000050
-	.long	0x02000550,0x60000050
-	.long	0x020005a0,0x60000050
-	.long	0x020005f0,0x60000050
-	.long	0x02000640,0x60000050
-	.long	0x02000690,0x60000050
-	.long	0x020006e0,0x20000050
-
-	.org	__LC_RST_NEW_PSW		# 0x1a0
-	.quad	0,iplstart
-	.org	__LC_PGM_NEW_PSW		# 0x1d0
-	.quad	0x0000000180000000,startup_pgm_check_handler
-
-	.org	0x200
-
-#
-# subroutine to wait for end I/O
-#
-.Lirqwait:
-	mvc	__LC_IO_NEW_PSW(16),.Lnewpsw	# set up IO interrupt psw
-	lpsw	.Lwaitpsw
-.Lioint:
-	br	%r14
-	.align	8
-.Lnewpsw:
-	.quad	0x0000000080000000,.Lioint
-.Lwaitpsw:
-	.long	0x020a0000,0x80000000+.Lioint
-
-#
-# subroutine for loading cards from the reader
-#
-.Lloader:
-	la	%r4,0(%r14)
-	la	%r3,.Lorb		# r2 = address of orb into r2
-	la	%r5,.Lirb		# r4 = address of irb
-	la	%r6,.Lccws
-	la	%r7,20
-.Linit:
-	st	%r2,4(%r6)		# initialize CCW data addresses
-	la	%r2,0x50(%r2)
-	la	%r6,8(%r6)
-	bct	7,.Linit
-
-	lctl	%c6,%c6,.Lcr6		# set IO subclass mask
-	slr	%r2,%r2
-.Lldlp:
-	ssch	0(%r3)			# load chunk of 1600 bytes
-	bnz	.Llderr
-.Lwait4irq:
-	bas	%r14,.Lirqwait
-	c	%r1,__LC_SUBCHANNEL_ID	# compare subchannel number
-	bne	.Lwait4irq
-	tsch	0(%r5)
-
-	slr	%r0,%r0
-	ic	%r0,8(%r5)		# get device status
-	chi	%r0,8			# channel end ?
-	be	.Lcont
-	chi	%r0,12			# channel end + device end ?
-	be	.Lcont
-
-	l	%r0,4(%r5)
-	s	%r0,8(%r3)		# r0/8 = number of ccws executed
-	mhi	%r0,10			# *10 = number of bytes in ccws
-	lh	%r3,10(%r5)		# get residual count
-	sr	%r0,%r3 		# #ccws*80-residual=#bytes read
-	ar	%r2,%r0
-
-	br	%r4			# r2 contains the total size
-
-.Lcont:
-	ahi	%r2,0x640		# add 0x640 to total size
-	la	%r6,.Lccws
-	la	%r7,20
-.Lincr:
-	l	%r0,4(%r6)		# update CCW data addresses
-	ahi	%r0,0x640
-	st	%r0,4(%r6)
-	ahi	%r6,8
-	bct	7,.Lincr
-
-	b	.Lldlp
-.Llderr:
-	lpsw	.Lcrash
-
-	.align	8
-.Lorb:	.long	0x00000000,0x0080ff00,.Lccws
-.Lirb:	.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-.Lcr6:	.long	0xff000000
-.Lloadp:.long	0,0
-	.align	8
-.Lcrash:.long	0x000a0000,0x00000000
-
-	.align	8
-.Lccws: .rept	19
-	.long	0x02600050,0x00000000
-	.endr
-	.long	0x02200050,0x00000000
-
-iplstart:
-	mvi	__LC_AR_MODE_ID,1	# set esame flag
-	slr	%r0,%r0			# set cpuid to zero
-	lhi	%r1,2			# mode 2 = esame (dump)
-	sigp	%r1,%r0,0x12		# switch to esame mode
-	bras	%r13,0f
-	.fill	16,4,0x0
-0:	lmh	%r0,%r15,0(%r13)	# clear high-order half of gprs
-	sam31				# switch to 31 bit addressing mode
-	lh	%r1,__LC_SUBCHANNEL_ID	# test if subchannel number
-	bct	%r1,.Lnoload		#  is valid
-	l	%r1,__LC_SUBCHANNEL_ID	# load ipl subchannel number
-	la	%r2,IPL_BS		# load start address
-	bas	%r14,.Lloader		# load rest of ipl image
-	l	%r12,.Lparm		# pointer to parameter area
-	st	%r1,IPL_DEVICE+ARCH_OFFSET-PARMAREA(%r12) # save ipl device number
-
-#
-# load parameter file from ipl device
-#
-.Lagain1:
-	l	%r2,.Linitrd		# ramdisk loc. is temp
-	bas	%r14,.Lloader		# load parameter file
-	ltr	%r2,%r2 		# got anything ?
-	bz	.Lnopf
-	chi	%r2,895
-	bnh	.Lnotrunc
-	la	%r2,895
-.Lnotrunc:
-	l	%r4,.Linitrd
-	clc	0(3,%r4),.L_hdr		# if it is HDRx
-	bz	.Lagain1		# skip dataset header
-	clc	0(3,%r4),.L_eof		# if it is EOFx
-	bz	.Lagain1		# skip dateset trailer
-	la	%r5,0(%r4,%r2)
-	lr	%r3,%r2
-	la	%r3,COMMAND_LINE-PARMAREA(%r12) # load adr. of command line
-	mvc	0(256,%r3),0(%r4)
-	mvc	256(256,%r3),256(%r4)
-	mvc	512(256,%r3),512(%r4)
-	mvc	768(122,%r3),768(%r4)
-	slr	%r0,%r0
-	b	.Lcntlp
-.Ldelspc:
-	ic	%r0,0(%r2,%r3)
-	chi	%r0,0x20		# is it a space ?
-	be	.Lcntlp
-	ahi	%r2,1
-	b	.Leolp
-.Lcntlp:
-	brct	%r2,.Ldelspc
-.Leolp:
-	slr	%r0,%r0
-	stc	%r0,0(%r2,%r3)		# terminate buffer
-.Lnopf:
-
-#
-# load ramdisk from ipl device
-#
-.Lagain2:
-	l	%r2,.Linitrd		# addr of ramdisk
-	st	%r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12)
-	bas	%r14,.Lloader		# load ramdisk
-	st	%r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of rd
-	ltr	%r2,%r2
-	bnz	.Lrdcont
-	st	%r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found
-.Lrdcont:
-	l	%r2,.Linitrd
-
-	clc	0(3,%r2),.L_hdr		# skip HDRx and EOFx
-	bz	.Lagain2
-	clc	0(3,%r2),.L_eof
-	bz	.Lagain2
-
-#
-# reset files in VM reader
-#
-	stidp	.Lcpuid			# store cpuid
-	tm	.Lcpuid,0xff		# running VM ?
-	bno	.Lnoreset
-	la	%r2,.Lreset
-	lhi	%r3,26
-	diag	%r2,%r3,8
-	la	%r5,.Lirb
-	stsch	0(%r5)			# check if irq is pending
-	tm	30(%r5),0x0f		# by verifying if any of the
-	bnz	.Lwaitforirq		# activity or status control
-	tm	31(%r5),0xff		# bits is set in the schib
-	bz	.Lnoreset
-.Lwaitforirq:
-	bas	%r14,.Lirqwait		# wait for IO interrupt
-	c	%r1,__LC_SUBCHANNEL_ID	# compare subchannel number
-	bne	.Lwaitforirq
-	la	%r5,.Lirb
-	tsch	0(%r5)
-.Lnoreset:
-	b	.Lnoload
-
-#
-# everything loaded, go for it
-#
-.Lnoload:
-	l	%r1,.Lstartup
-	br	%r1
-
-.Linitrd:.long _end			# default address of initrd
-.Lparm:	.long  PARMAREA
-.Lstartup: .long startup
-.Lreset:.byte	0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40
-	.byte	0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6
-	.byte	0xc8,0xd6,0xd3,0xc4	# "change rdr all keep nohold"
-.L_eof: .long	0xc5d6c600	 /* C'EOF' */
-.L_hdr: .long	0xc8c4d900	 /* C'HDR' */
-	.align	8
-.Lcpuid:.fill	8,1,0
-
-#
-# startup-code at 0x10000, running in absolute addressing mode
-# this is called either by the ipl loader or directly by PSW restart
-# or linload or SALIPL
-#
-	.org	0x10000
-ENTRY(startup)
-	j	.Lep_startup_normal
-	.org	EP_OFFSET
-#
-# This is a list of s390 kernel entry points. At address 0x1000f the number of
-# valid entry points is stored.
-#
-# IMPORTANT: Do not change this table, it is s390 kernel ABI!
-#
-	.ascii	EP_STRING
-	.byte	0x00,0x01
-#
-# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode
-#
-	.org	0x10010
-ENTRY(startup_kdump)
-	j	.Lep_startup_kdump
-.Lep_startup_normal:
-	mvi	__LC_AR_MODE_ID,1	# set esame flag
-	slr	%r0,%r0 		# set cpuid to zero
-	lhi	%r1,2			# mode 2 = esame (dump)
-	sigp	%r1,%r0,0x12		# switch to esame mode
-	bras	%r13,0f
-	.fill	16,4,0x0
-0:	lmh	%r0,%r15,0(%r13)	# clear high-order half of gprs
-	sam64				# switch to 64 bit addressing mode
-	basr	%r13,0			# get base
-.LPG0:
-	xc	0x200(256),0x200	# partially clear lowcore
-	xc	0x300(256),0x300
-	xc	0xe00(256),0xe00
-	xc	0xf00(256),0xf00
-	lctlg	%c0,%c15,.Lctl-.LPG0(%r13)	# load control registers
-	stcke	__LC_BOOT_CLOCK
-	mvc	__LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
-	spt	6f-.LPG0(%r13)
-	mvc	__LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
-	l	%r15,.Lstack-.LPG0(%r13)
-	brasl	%r14,verify_facilities
-	brasl	%r14,startup_kernel
-
-.Lstack:
-	.long	0x8000 + (1<<(PAGE_SHIFT+BOOT_STACK_ORDER)) - STACK_FRAME_OVERHEAD
-	.align	8
-6:	.long	0x7fffffff,0xffffffff
-
-.Lctl:	.quad	0x04040000		# cr0: AFP registers & secondary space
-	.quad	0			# cr1: primary space segment table
-	.quad	.Lduct			# cr2: dispatchable unit control table
-	.quad	0			# cr3: instruction authorization
-	.quad	0xffff			# cr4: instruction authorization
-	.quad	.Lduct			# cr5: primary-aste origin
-	.quad	0			# cr6:	I/O interrupts
-	.quad	0			# cr7:	secondary space segment table
-	.quad	0			# cr8:	access registers translation
-	.quad	0			# cr9:	tracing off
-	.quad	0			# cr10: tracing off
-	.quad	0			# cr11: tracing off
-	.quad	0			# cr12: tracing off
-	.quad	0			# cr13: home space segment table
-	.quad	0xc0000000		# cr14: machine check handling off
-	.quad	.Llinkage_stack		# cr15: linkage stack operations
-
-	.section .dma.data,"aw",@progbits
-.Lduct: .long	0,.Laste,.Laste,0,.Lduald,0,0,0
-	.long	0,0,0,0,0,0,0,0
-.Llinkage_stack:
-	.long	0,0,0x89000000,0,0,0,0x8a000000,0
-	.align 64
-.Laste:	.quad	0,0xffffffffffffffff,0,0,0,0,0,0
-	.align	128
-.Lduald:.rept	8
-	.long	0x80000000,0,0,0	# invalid access-list entries
-	.endr
-	.previous
-
-#include "head_kdump.S"
-
-#
-# This program check is active immediately after kernel start
-# and until early_pgm_check_handler is set in kernel/early.c
-# It simply saves general/control registers and psw in
-# the save area and does disabled wait with a faulty address.
-#
-ENTRY(startup_pgm_check_handler)
-	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
-	la	%r8,4095
-	stctg	%c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r8)
-	stmg	%r0,%r7,__LC_GPREGS_SAVE_AREA-4095(%r8)
-	mvc	__LC_GPREGS_SAVE_AREA-4095+64(64,%r8),__LC_SAVE_AREA_SYNC
-	mvc	__LC_PSW_SAVE_AREA-4095(16,%r8),__LC_PGM_OLD_PSW
-	mvc	__LC_RETURN_PSW(16),__LC_PGM_OLD_PSW
-	ni	__LC_RETURN_PSW,0xfc	# remove IO and EX bits
-	ni	__LC_RETURN_PSW+1,0xfb	# remove MCHK bit
-	oi	__LC_RETURN_PSW+1,0x2	# set wait state bit
-	larl	%r9,.Lold_psw_disabled_wait
-	stg	%r9,__LC_PGM_NEW_PSW+8
-	l	%r15,.Ldump_info_stack-.Lold_psw_disabled_wait(%r9)
-	brasl	%r14,print_pgm_check_info
-.Lold_psw_disabled_wait:
-	la	%r8,4095
-	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8)
-	lpswe	__LC_RETURN_PSW		# disabled wait
-.Ldump_info_stack:
-	.long	0x5000 + PAGE_SIZE - STACK_FRAME_OVERHEAD
-ENDPROC(startup_pgm_check_handler)
-
-#
-# params at 10400 (setup.h)
-# Must be keept in sync with struct parmarea in setup.h
-#
-	.org	PARMAREA
-	.quad	0			# IPL_DEVICE
-	.quad	0			# INITRD_START
-	.quad	0			# INITRD_SIZE
-	.quad	0			# OLDMEM_BASE
-	.quad	0			# OLDMEM_SIZE
-	.quad	kernel_version		# points to kernel version string
-
-	.org	COMMAND_LINE
-	.byte	"root=/dev/ram0 ro"
-	.byte	0
-
-	.org	EARLY_SCCB_OFFSET
-	.fill	4096
-
-	.org	HEAD_END
diff --git a/arch/s390/boot/head_kdump.S b/arch/s390/boot/head_kdump.S
deleted file mode 100644
index 174d6959bf5bd7d0ba498e532da5322f633f6225..0000000000000000000000000000000000000000
--- a/arch/s390/boot/head_kdump.S
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * S390 kdump lowlevel functions (new kernel)
- *
- * Copyright IBM Corp. 2011
- * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
- */
-
-#include <asm/sigp.h>
-
-#define DATAMOVER_ADDR	0x4000
-#define COPY_PAGE_ADDR	0x6000
-
-#ifdef CONFIG_CRASH_DUMP
-
-#
-# kdump entry (new kernel - not yet relocated)
-#
-# Note: This code has to be position independent
-#
-
-.align 2
-.Lep_startup_kdump:
-	lhi	%r1,2				# mode 2 = esame (dump)
-	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE	# Switch to esame mode
-	sam64					# Switch to 64 bit addressing
-	basr	%r13,0
-.Lbase:
-	larl	%r2,.Lbase_addr			# Check, if we have been
-	lg	%r2,0(%r2)			# already relocated:
-	clgr	%r2,%r13			#
-	jne	.Lrelocate			# No : Start data mover
-	lghi	%r2,0				# Yes: Start kdump kernel
-	brasl	%r14,startup_kdump_relocated
-
-.Lrelocate:
-	larl	%r4,startup
-	lg	%r2,0x418(%r4)			# Get kdump base
-	lg	%r3,0x420(%r4)			# Get kdump size
-
-	larl	%r10,.Lcopy_start		# Source of data mover
-	lghi	%r8,DATAMOVER_ADDR		# Target of data mover
-	mvc	0(256,%r8),0(%r10)		# Copy data mover code
-
-	agr	%r8,%r2				# Copy data mover to
-	mvc	0(256,%r8),0(%r10)		# reserved mem
-
-	lghi	%r14,DATAMOVER_ADDR		# Jump to copied data mover
-	basr	%r14,%r14
-.Lbase_addr:
-	.quad	.Lbase
-
-#
-# kdump data mover code (runs at address DATAMOVER_ADDR)
-#
-# r2: kdump base address
-# r3: kdump size
-#
-.Lcopy_start:
-	basr	%r13,0				# Base
-0:
-	lgr	%r11,%r2			# Save kdump base address
-	lgr	%r12,%r2
-	agr	%r12,%r3			# Compute kdump end address
-
-	lghi	%r5,0
-	lghi	%r10,COPY_PAGE_ADDR		# Load copy page address
-1:
-	mvc	0(256,%r10),0(%r5)		# Copy old kernel to tmp
-	mvc	0(256,%r5),0(%r11)		# Copy new kernel to old
-	mvc	0(256,%r11),0(%r10)		# Copy tmp to new
-	aghi	%r11,256
-	aghi	%r5,256
-	clgr	%r11,%r12
-	jl	1b
-
-	lg	%r14,.Lstartup_kdump-0b(%r13)
-	basr	%r14,%r14			# Start relocated kernel
-.Lstartup_kdump:
-	.long	0x00000000,0x00000000 + startup_kdump_relocated
-.Lcopy_end:
-
-#
-# Startup of kdump (relocated new kernel)
-#
-.align 2
-startup_kdump_relocated:
-	basr	%r13,0
-0:	lpswe	.Lrestart_psw-0b(%r13)		# Start new kernel...
-.align	8
-.Lrestart_psw:
-	.quad	0x0000000080000000,0x0000000000000000 + startup
-#else
-.align 2
-.Lep_startup_kdump:
-	larl	%r13,startup_kdump_crash
-	lpswe	0(%r13)
-.align 8
-startup_kdump_crash:
-	.quad	0x0002000080000000,0x0000000000000000 + startup_kdump_crash
-#endif /* CONFIG_CRASH_DUMP */
diff --git a/arch/s390/boot/mem.S b/arch/s390/boot/mem.S
deleted file mode 100644
index b33463633f03e47ca4cc7d2df14f96cc7c8e8c85..0000000000000000000000000000000000000000
--- a/arch/s390/boot/mem.S
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include "../lib/mem.S"
diff --git a/arch/s390/boot/text_dma.S b/arch/s390/boot/text_dma.S
deleted file mode 100644
index 9715715c4c28d2d976b404167ed1b35dfc823ab1..0000000000000000000000000000000000000000
--- a/arch/s390/boot/text_dma.S
+++ /dev/null
@@ -1,184 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Code that needs to run below 2 GB.
- *
- * Copyright IBM Corp. 2019
- */
-
-#include <linux/linkage.h>
-#include <asm/errno.h>
-#include <asm/sigp.h>
-
-#ifdef CC_USING_EXPOLINE
-	.pushsection .dma.text.__s390_indirect_jump_r14,"axG"
-__dma__s390_indirect_jump_r14:
-	larl	%r1,0f
-	ex	0,0(%r1)
-	j	.
-0:	br	%r14
-	.popsection
-#endif
-
-	.section .dma.text,"ax"
-/*
- * Simplified version of expoline thunk. The normal thunks can not be used here,
- * because they might be more than 2 GB away, and not reachable by the relative
- * branch. No comdat, exrl, etc. optimizations used here, because it only
- * affects a few functions that are not performance-relevant.
- */
-	.macro BR_EX_DMA_r14
-#ifdef CC_USING_EXPOLINE
-	jg	__dma__s390_indirect_jump_r14
-#else
-	br	%r14
-#endif
-	.endm
-
-/*
- * int _diag14_dma(unsigned long rx, unsigned long ry1, unsigned long subcode)
- */
-ENTRY(_diag14_dma)
-	lgr	%r1,%r2
-	lgr	%r2,%r3
-	lgr	%r3,%r4
-	lhi	%r5,-EIO
-	sam31
-	diag	%r1,%r2,0x14
-.Ldiag14_ex:
-	ipm	%r5
-	srl	%r5,28
-.Ldiag14_fault:
-	sam64
-	lgfr	%r2,%r5
-	BR_EX_DMA_r14
-	EX_TABLE_DMA(.Ldiag14_ex, .Ldiag14_fault)
-ENDPROC(_diag14_dma)
-
-/*
- * int _diag210_dma(struct diag210 *addr)
- */
-ENTRY(_diag210_dma)
-	lgr	%r1,%r2
-	lhi	%r2,-1
-	sam31
-	diag	%r1,%r0,0x210
-.Ldiag210_ex:
-	ipm	%r2
-	srl	%r2,28
-.Ldiag210_fault:
-	sam64
-	lgfr	%r2,%r2
-	BR_EX_DMA_r14
-	EX_TABLE_DMA(.Ldiag210_ex, .Ldiag210_fault)
-ENDPROC(_diag210_dma)
-
-/*
- * int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode)
- */
-ENTRY(_diag26c_dma)
-	lghi	%r5,-EOPNOTSUPP
-	sam31
-	diag	%r2,%r4,0x26c
-.Ldiag26c_ex:
-	sam64
-	lgfr	%r2,%r5
-	BR_EX_DMA_r14
-	EX_TABLE_DMA(.Ldiag26c_ex, .Ldiag26c_ex)
-ENDPROC(_diag26c_dma)
-
-/*
- * void _diag0c_dma(struct hypfs_diag0c_entry *entry)
- */
-ENTRY(_diag0c_dma)
-	sam31
-	diag	%r2,%r2,0x0c
-	sam64
-	BR_EX_DMA_r14
-ENDPROC(_diag0c_dma)
-
-/*
- * void _swsusp_reset_dma(void)
- */
-ENTRY(_swsusp_reset_dma)
-	larl	%r1,restart_entry
-	larl	%r2,.Lrestart_diag308_psw
-	og	%r1,0(%r2)
-	stg	%r1,0(%r0)
-	lghi	%r0,0
-	diag	%r0,%r0,0x308
-restart_entry:
-	lhi	%r1,1
-	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE
-	sam64
-	BR_EX_DMA_r14
-ENDPROC(_swsusp_reset_dma)
-
-/*
- * void _diag308_reset_dma(void)
- *
- * Calls diag 308 subcode 1 and continues execution
- */
-ENTRY(_diag308_reset_dma)
-	larl	%r4,.Lctlregs		# Save control registers
-	stctg	%c0,%c15,0(%r4)
-	lg	%r2,0(%r4)		# Disable lowcore protection
-	nilh	%r2,0xefff
-	larl	%r4,.Lctlreg0
-	stg	%r2,0(%r4)
-	lctlg	%c0,%c0,0(%r4)
-	larl	%r4,.Lfpctl		# Floating point control register
-	stfpc	0(%r4)
-	larl	%r4,.Lprefix		# Save prefix register
-	stpx	0(%r4)
-	larl	%r4,.Lprefix_zero	# Set prefix register to 0
-	spx	0(%r4)
-	larl	%r4,.Lcontinue_psw	# Save PSW flags
-	epsw	%r2,%r3
-	stm	%r2,%r3,0(%r4)
-	larl	%r4,restart_part2	# Setup restart PSW at absolute 0
-	larl	%r3,.Lrestart_diag308_psw
-	og	%r4,0(%r3)		# Save PSW
-	lghi	%r3,0
-	sturg	%r4,%r3			# Use sturg, because of large pages
-	lghi	%r1,1
-	lghi	%r0,0
-	diag	%r0,%r1,0x308
-restart_part2:
-	lhi	%r0,0			# Load r0 with zero
-	lhi	%r1,2			# Use mode 2 = ESAME (dump)
-	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE	# Switch to ESAME mode
-	sam64				# Switch to 64 bit addressing mode
-	larl	%r4,.Lctlregs		# Restore control registers
-	lctlg	%c0,%c15,0(%r4)
-	larl	%r4,.Lfpctl		# Restore floating point ctl register
-	lfpc	0(%r4)
-	larl	%r4,.Lprefix		# Restore prefix register
-	spx	0(%r4)
-	larl	%r4,.Lcontinue_psw	# Restore PSW flags
-	lpswe	0(%r4)
-.Lcontinue:
-	BR_EX_DMA_r14
-ENDPROC(_diag308_reset_dma)
-
-	.section .dma.data,"aw",@progbits
-.align	8
-.Lrestart_diag308_psw:
-	.long	0x00080000,0x80000000
-
-.align 8
-.Lcontinue_psw:
-	.quad	0,.Lcontinue
-
-.align 8
-.Lctlreg0:
-	.quad	0
-.Lctlregs:
-	.rept	16
-	.quad	0
-	.endr
-.Lfpctl:
-	.long	0
-.Lprefix:
-	.long	0
-.Lprefix_zero:
-	.long	0
diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S
deleted file mode 100644
index 0099044e2c8605ce58d19c68bd887c78956c03d0..0000000000000000000000000000000000000000
--- a/arch/s390/crypto/crc32be-vx.S
+++ /dev/null
@@ -1,212 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Hardware-accelerated CRC-32 variants for Linux on z Systems
- *
- * Use the z/Architecture Vector Extension Facility to accelerate the
- * computing of CRC-32 checksums.
- *
- * This CRC-32 implementation algorithm processes the most-significant
- * bit first (BE).
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/nospec-insn.h>
-#include <asm/vx-insn.h>
-
-/* Vector register range containing CRC-32 constants */
-#define CONST_R1R2		%v9
-#define CONST_R3R4		%v10
-#define CONST_R5		%v11
-#define CONST_R6		%v12
-#define CONST_RU_POLY		%v13
-#define CONST_CRC_POLY		%v14
-
-.data
-.align 8
-
-/*
- * The CRC-32 constant block contains reduction constants to fold and
- * process particular chunks of the input data stream in parallel.
- *
- * For the CRC-32 variants, the constants are precomputed according to
- * these defintions:
- *
- *	R1 = x4*128+64 mod P(x)
- *	R2 = x4*128    mod P(x)
- *	R3 = x128+64   mod P(x)
- *	R4 = x128      mod P(x)
- *	R5 = x96       mod P(x)
- *	R6 = x64       mod P(x)
- *
- *	Barret reduction constant, u, is defined as floor(x**64 / P(x)).
- *
- *	where P(x) is the polynomial in the normal domain and the P'(x) is the
- *	polynomial in the reversed (bitreflected) domain.
- *
- * Note that the constant definitions below are extended in order to compute
- * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction.
- * The righmost doubleword can be 0 to prevent contribution to the result or
- * can be multiplied by 1 to perform an XOR without the need for a separate
- * VECTOR EXCLUSIVE OR instruction.
- *
- * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
- *
- *	P(x)  = 0x04C11DB7
- *	P'(x) = 0xEDB88320
- */
-
-.Lconstants_CRC_32_BE:
-	.quad		0x08833794c, 0x0e6228b11	# R1, R2
-	.quad		0x0c5b9cd4c, 0x0e8a45605	# R3, R4
-	.quad		0x0f200aa66, 1 << 32		# R5, x32
-	.quad		0x0490d678d, 1			# R6, 1
-	.quad		0x104d101df, 0			# u
-	.quad		0x104C11DB7, 0			# P(x)
-
-.previous
-
-	GEN_BR_THUNK %r14
-
-.text
-/*
- * The CRC-32 function(s) use these calling conventions:
- *
- * Parameters:
- *
- *	%r2:	Initial CRC value, typically ~0; and final CRC (return) value.
- *	%r3:	Input buffer pointer, performance might be improved if the
- *		buffer is on a doubleword boundary.
- *	%r4:	Length of the buffer, must be 64 bytes or greater.
- *
- * Register usage:
- *
- *	%r5:	CRC-32 constant pool base pointer.
- *	V0:	Initial CRC value and intermediate constants and results.
- *	V1..V4:	Data for CRC computation.
- *	V5..V8:	Next data chunks that are fetched from the input buffer.
- *
- *	V9..V14: CRC-32 constants.
- */
-ENTRY(crc32_be_vgfm_16)
-	/* Load CRC-32 constants */
-	larl	%r5,.Lconstants_CRC_32_BE
-	VLM	CONST_R1R2,CONST_CRC_POLY,0,%r5
-
-	/* Load the initial CRC value into the leftmost word of V0. */
-	VZERO	%v0
-	VLVGF	%v0,%r2,0
-
-	/* Load a 64-byte data chunk and XOR with CRC */
-	VLM	%v1,%v4,0,%r3		/* 64-bytes into V1..V4 */
-	VX	%v1,%v0,%v1		/* V1 ^= CRC */
-	aghi	%r3,64			/* BUF = BUF + 64 */
-	aghi	%r4,-64			/* LEN = LEN - 64 */
-
-	/* Check remaining buffer size and jump to proper folding method */
-	cghi	%r4,64
-	jl	.Lless_than_64bytes
-
-.Lfold_64bytes_loop:
-	/* Load the next 64-byte data chunk into V5 to V8 */
-	VLM	%v5,%v8,0,%r3
-
-	/*
-	 * Perform a GF(2) multiplication of the doublewords in V1 with
-	 * the reduction constants in V0.  The intermediate result is
-	 * then folded (accumulated) with the next data chunk in V5 and
-	 * stored in V1.  Repeat this step for the register contents
-	 * in V2, V3, and V4 respectively.
-	 */
-	VGFMAG	%v1,CONST_R1R2,%v1,%v5
-	VGFMAG	%v2,CONST_R1R2,%v2,%v6
-	VGFMAG	%v3,CONST_R1R2,%v3,%v7
-	VGFMAG	%v4,CONST_R1R2,%v4,%v8
-
-	/* Adjust buffer pointer and length for next loop */
-	aghi	%r3,64			/* BUF = BUF + 64 */
-	aghi	%r4,-64			/* LEN = LEN - 64 */
-
-	cghi	%r4,64
-	jnl	.Lfold_64bytes_loop
-
-.Lless_than_64bytes:
-	/* Fold V1 to V4 into a single 128-bit value in V1 */
-	VGFMAG	%v1,CONST_R3R4,%v1,%v2
-	VGFMAG	%v1,CONST_R3R4,%v1,%v3
-	VGFMAG	%v1,CONST_R3R4,%v1,%v4
-
-	/* Check whether to continue with 64-bit folding */
-	cghi	%r4,16
-	jl	.Lfinal_fold
-
-.Lfold_16bytes_loop:
-
-	VL	%v2,0,,%r3		/* Load next data chunk */
-	VGFMAG	%v1,CONST_R3R4,%v1,%v2	/* Fold next data chunk */
-
-	/* Adjust buffer pointer and size for folding next data chunk */
-	aghi	%r3,16
-	aghi	%r4,-16
-
-	/* Process remaining data chunks */
-	cghi	%r4,16
-	jnl	.Lfold_16bytes_loop
-
-.Lfinal_fold:
-	/*
-	 * The R5 constant is used to fold a 128-bit value into an 96-bit value
-	 * that is XORed with the next 96-bit input data chunk.  To use a single
-	 * VGFMG instruction, multiply the rightmost 64-bit with x^32 (1<<32) to
-	 * form an intermediate 96-bit value (with appended zeros) which is then
-	 * XORed with the intermediate reduction result.
-	 */
-	VGFMG	%v1,CONST_R5,%v1
-
-	/*
-	 * Further reduce the remaining 96-bit value to a 64-bit value using a
-	 * single VGFMG, the rightmost doubleword is multiplied with 0x1. The
-	 * intermediate result is then XORed with the product of the leftmost
-	 * doubleword with R6.	The result is a 64-bit value and is subject to
-	 * the Barret reduction.
-	 */
-	VGFMG	%v1,CONST_R6,%v1
-
-	/*
-	 * The input values to the Barret reduction are the degree-63 polynomial
-	 * in V1 (R(x)), degree-32 generator polynomial, and the reduction
-	 * constant u.	The Barret reduction result is the CRC value of R(x) mod
-	 * P(x).
-	 *
-	 * The Barret reduction algorithm is defined as:
-	 *
-	 *    1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
-	 *    2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
-	 *    3. C(x)  = R(x) XOR T2(x) mod x^32
-	 *
-	 * Note: To compensate the division by x^32, use the vector unpack
-	 * instruction to move the leftmost word into the leftmost doubleword
-	 * of the vector register.  The rightmost doubleword is multiplied
-	 * with zero to not contribute to the intermedate results.
-	 */
-
-	/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
-	VUPLLF	%v2,%v1
-	VGFMG	%v2,CONST_RU_POLY,%v2
-
-	/*
-	 * Compute the GF(2) product of the CRC polynomial in VO with T1(x) in
-	 * V2 and XOR the intermediate result, T2(x),  with the value in V1.
-	 * The final result is in the rightmost word of V2.
-	 */
-	VUPLLF	%v2,%v2
-	VGFMAG	%v2,CONST_CRC_POLY,%v2,%v1
-
-.Ldone:
-	VLGVF	%r2,%v2,3
-	BR_EX	%r14
-ENDPROC(crc32_be_vgfm_16)
-
-.previous
diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S
deleted file mode 100644
index 71caf0f4ec08be8a6ecf4614b86230936f9353f0..0000000000000000000000000000000000000000
--- a/arch/s390/crypto/crc32le-vx.S
+++ /dev/null
@@ -1,273 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Hardware-accelerated CRC-32 variants for Linux on z Systems
- *
- * Use the z/Architecture Vector Extension Facility to accelerate the
- * computing of bitreflected CRC-32 checksums for IEEE 802.3 Ethernet
- * and Castagnoli.
- *
- * This CRC-32 implementation algorithm is bitreflected and processes
- * the least-significant bit first (Little-Endian).
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/nospec-insn.h>
-#include <asm/vx-insn.h>
-
-/* Vector register range containing CRC-32 constants */
-#define CONST_PERM_LE2BE	%v9
-#define CONST_R2R1		%v10
-#define CONST_R4R3		%v11
-#define CONST_R5		%v12
-#define CONST_RU_POLY		%v13
-#define CONST_CRC_POLY		%v14
-
-.data
-.align 8
-
-/*
- * The CRC-32 constant block contains reduction constants to fold and
- * process particular chunks of the input data stream in parallel.
- *
- * For the CRC-32 variants, the constants are precomputed according to
- * these definitions:
- *
- *	R1 = [(x4*128+32 mod P'(x) << 32)]' << 1
- *	R2 = [(x4*128-32 mod P'(x) << 32)]' << 1
- *	R3 = [(x128+32 mod P'(x) << 32)]'   << 1
- *	R4 = [(x128-32 mod P'(x) << 32)]'   << 1
- *	R5 = [(x64 mod P'(x) << 32)]'	    << 1
- *	R6 = [(x32 mod P'(x) << 32)]'	    << 1
- *
- *	The bitreflected Barret reduction constant, u', is defined as
- *	the bit reversal of floor(x**64 / P(x)).
- *
- *	where P(x) is the polynomial in the normal domain and the P'(x) is the
- *	polynomial in the reversed (bitreflected) domain.
- *
- * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
- *
- *	P(x)  = 0x04C11DB7
- *	P'(x) = 0xEDB88320
- *
- * CRC-32C (Castagnoli) polynomials:
- *
- *	P(x)  = 0x1EDC6F41
- *	P'(x) = 0x82F63B78
- */
-
-.Lconstants_CRC_32_LE:
-	.octa		0x0F0E0D0C0B0A09080706050403020100	# BE->LE mask
-	.quad		0x1c6e41596, 0x154442bd4		# R2, R1
-	.quad		0x0ccaa009e, 0x1751997d0		# R4, R3
-	.octa		0x163cd6124				# R5
-	.octa		0x1F7011641				# u'
-	.octa		0x1DB710641				# P'(x) << 1
-
-.Lconstants_CRC_32C_LE:
-	.octa		0x0F0E0D0C0B0A09080706050403020100	# BE->LE mask
-	.quad		0x09e4addf8, 0x740eef02			# R2, R1
-	.quad		0x14cd00bd6, 0xf20c0dfe			# R4, R3
-	.octa		0x0dd45aab8				# R5
-	.octa		0x0dea713f1				# u'
-	.octa		0x105ec76f0				# P'(x) << 1
-
-.previous
-
-	GEN_BR_THUNK %r14
-
-.text
-
-/*
- * The CRC-32 functions use these calling conventions:
- *
- * Parameters:
- *
- *	%r2:	Initial CRC value, typically ~0; and final CRC (return) value.
- *	%r3:	Input buffer pointer, performance might be improved if the
- *		buffer is on a doubleword boundary.
- *	%r4:	Length of the buffer, must be 64 bytes or greater.
- *
- * Register usage:
- *
- *	%r5:	CRC-32 constant pool base pointer.
- *	V0:	Initial CRC value and intermediate constants and results.
- *	V1..V4:	Data for CRC computation.
- *	V5..V8:	Next data chunks that are fetched from the input buffer.
- *	V9:	Constant for BE->LE conversion and shift operations
- *
- *	V10..V14: CRC-32 constants.
- */
-
-ENTRY(crc32_le_vgfm_16)
-	larl	%r5,.Lconstants_CRC_32_LE
-	j	crc32_le_vgfm_generic
-ENDPROC(crc32_le_vgfm_16)
-
-ENTRY(crc32c_le_vgfm_16)
-	larl	%r5,.Lconstants_CRC_32C_LE
-	j	crc32_le_vgfm_generic
-ENDPROC(crc32c_le_vgfm_16)
-
-ENTRY(crc32_le_vgfm_generic)
-	/* Load CRC-32 constants */
-	VLM	CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5
-
-	/*
-	 * Load the initial CRC value.
-	 *
-	 * The CRC value is loaded into the rightmost word of the
-	 * vector register and is later XORed with the LSB portion
-	 * of the loaded input data.
-	 */
-	VZERO	%v0			/* Clear V0 */
-	VLVGF	%v0,%r2,3		/* Load CRC into rightmost word */
-
-	/* Load a 64-byte data chunk and XOR with CRC */
-	VLM	%v1,%v4,0,%r3		/* 64-bytes into V1..V4 */
-	VPERM	%v1,%v1,%v1,CONST_PERM_LE2BE
-	VPERM	%v2,%v2,%v2,CONST_PERM_LE2BE
-	VPERM	%v3,%v3,%v3,CONST_PERM_LE2BE
-	VPERM	%v4,%v4,%v4,CONST_PERM_LE2BE
-
-	VX	%v1,%v0,%v1		/* V1 ^= CRC */
-	aghi	%r3,64			/* BUF = BUF + 64 */
-	aghi	%r4,-64			/* LEN = LEN - 64 */
-
-	cghi	%r4,64
-	jl	.Lless_than_64bytes
-
-.Lfold_64bytes_loop:
-	/* Load the next 64-byte data chunk into V5 to V8 */
-	VLM	%v5,%v8,0,%r3
-	VPERM	%v5,%v5,%v5,CONST_PERM_LE2BE
-	VPERM	%v6,%v6,%v6,CONST_PERM_LE2BE
-	VPERM	%v7,%v7,%v7,CONST_PERM_LE2BE
-	VPERM	%v8,%v8,%v8,CONST_PERM_LE2BE
-
-	/*
-	 * Perform a GF(2) multiplication of the doublewords in V1 with
-	 * the R1 and R2 reduction constants in V0.  The intermediate result
-	 * is then folded (accumulated) with the next data chunk in V5 and
-	 * stored in V1. Repeat this step for the register contents
-	 * in V2, V3, and V4 respectively.
-	 */
-	VGFMAG	%v1,CONST_R2R1,%v1,%v5
-	VGFMAG	%v2,CONST_R2R1,%v2,%v6
-	VGFMAG	%v3,CONST_R2R1,%v3,%v7
-	VGFMAG	%v4,CONST_R2R1,%v4,%v8
-
-	aghi	%r3,64			/* BUF = BUF + 64 */
-	aghi	%r4,-64			/* LEN = LEN - 64 */
-
-	cghi	%r4,64
-	jnl	.Lfold_64bytes_loop
-
-.Lless_than_64bytes:
-	/*
-	 * Fold V1 to V4 into a single 128-bit value in V1.  Multiply V1 with R3
-	 * and R4 and accumulating the next 128-bit chunk until a single 128-bit
-	 * value remains.
-	 */
-	VGFMAG	%v1,CONST_R4R3,%v1,%v2
-	VGFMAG	%v1,CONST_R4R3,%v1,%v3
-	VGFMAG	%v1,CONST_R4R3,%v1,%v4
-
-	cghi	%r4,16
-	jl	.Lfinal_fold
-
-.Lfold_16bytes_loop:
-
-	VL	%v2,0,,%r3		/* Load next data chunk */
-	VPERM	%v2,%v2,%v2,CONST_PERM_LE2BE
-	VGFMAG	%v1,CONST_R4R3,%v1,%v2	/* Fold next data chunk */
-
-	aghi	%r3,16
-	aghi	%r4,-16
-
-	cghi	%r4,16
-	jnl	.Lfold_16bytes_loop
-
-.Lfinal_fold:
-	/*
-	 * Set up a vector register for byte shifts.  The shift value must
-	 * be loaded in bits 1-4 in byte element 7 of a vector register.
-	 * Shift by 8 bytes: 0x40
-	 * Shift by 4 bytes: 0x20
-	 */
-	VLEIB	%v9,0x40,7
-
-	/*
-	 * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
-	 * to move R4 into the rightmost doubleword and set the leftmost
-	 * doubleword to 0x1.
-	 */
-	VSRLB	%v0,CONST_R4R3,%v9
-	VLEIG	%v0,1,0
-
-	/*
-	 * Compute GF(2) product of V1 and V0.	The rightmost doubleword
-	 * of V1 is multiplied with R4.  The leftmost doubleword of V1 is
-	 * multiplied by 0x1 and is then XORed with rightmost product.
-	 * Implicitly, the intermediate leftmost product becomes padded
-	 */
-	VGFMG	%v1,%v0,%v1
-
-	/*
-	 * Now do the final 32-bit fold by multiplying the rightmost word
-	 * in V1 with R5 and XOR the result with the remaining bits in V1.
-	 *
-	 * To achieve this by a single VGFMAG, right shift V1 by a word
-	 * and store the result in V2 which is then accumulated.  Use the
-	 * vector unpack instruction to load the rightmost half of the
-	 * doubleword into the rightmost doubleword element of V1; the other
-	 * half is loaded in the leftmost doubleword.
-	 * The vector register with CONST_R5 contains the R5 constant in the
-	 * rightmost doubleword and the leftmost doubleword is zero to ignore
-	 * the leftmost product of V1.
-	 */
-	VLEIB	%v9,0x20,7		  /* Shift by words */
-	VSRLB	%v2,%v1,%v9		  /* Store remaining bits in V2 */
-	VUPLLF	%v1,%v1			  /* Split rightmost doubleword */
-	VGFMAG	%v1,CONST_R5,%v1,%v2	  /* V1 = (V1 * R5) XOR V2 */
-
-	/*
-	 * Apply a Barret reduction to compute the final 32-bit CRC value.
-	 *
-	 * The input values to the Barret reduction are the degree-63 polynomial
-	 * in V1 (R(x)), degree-32 generator polynomial, and the reduction
-	 * constant u.	The Barret reduction result is the CRC value of R(x) mod
-	 * P(x).
-	 *
-	 * The Barret reduction algorithm is defined as:
-	 *
-	 *    1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
-	 *    2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
-	 *    3. C(x)  = R(x) XOR T2(x) mod x^32
-	 *
-	 *  Note: The leftmost doubleword of vector register containing
-	 *  CONST_RU_POLY is zero and, thus, the intermediate GF(2) product
-	 *  is zero and does not contribute to the final result.
-	 */
-
-	/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
-	VUPLLF	%v2,%v1
-	VGFMG	%v2,CONST_RU_POLY,%v2
-
-	/*
-	 * Compute the GF(2) product of the CRC polynomial with T1(x) in
-	 * V2 and XOR the intermediate result, T2(x), with the value in V1.
-	 * The final result is stored in word element 2 of V2.
-	 */
-	VUPLLF	%v2,%v2
-	VGFMAG	%v2,CONST_CRC_POLY,%v2,%v1
-
-.Ldone:
-	VLGVF	%r2,%v2,2
-	BR_EX	%r14
-ENDPROC(crc32_le_vgfm_generic)
-
-.previous
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
deleted file mode 100644
index b79e0fd571f83a30d28cff98b10402496f41b9ed..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/base.S
+++ /dev/null
@@ -1,63 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  arch/s390/kernel/base.S
- *
- *    Copyright IBM Corp. 2006, 2007
- *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
- *		 Michael Holzheu <holzheu@de.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/nospec-insn.h>
-#include <asm/ptrace.h>
-#include <asm/sigp.h>
-
-	GEN_BR_THUNK %r9
-	GEN_BR_THUNK %r14
-
-ENTRY(s390_base_ext_handler)
-	stmg	%r0,%r15,__LC_SAVE_AREA_ASYNC
-	basr	%r13,0
-0:	aghi	%r15,-STACK_FRAME_OVERHEAD
-	larl	%r1,s390_base_ext_handler_fn
-	lg	%r9,0(%r1)
-	ltgr	%r9,%r9
-	jz	1f
-	BASR_EX	%r14,%r9
-1:	lmg	%r0,%r15,__LC_SAVE_AREA_ASYNC
-	ni	__LC_EXT_OLD_PSW+1,0xfd	# clear wait state bit
-	lpswe	__LC_EXT_OLD_PSW
-ENDPROC(s390_base_ext_handler)
-
-	.section .bss
-	.align 8
-	.globl s390_base_ext_handler_fn
-s390_base_ext_handler_fn:
-	.quad	0
-	.previous
-
-ENTRY(s390_base_pgm_handler)
-	stmg	%r0,%r15,__LC_SAVE_AREA_SYNC
-	basr	%r13,0
-0:	aghi	%r15,-STACK_FRAME_OVERHEAD
-	larl	%r1,s390_base_pgm_handler_fn
-	lg	%r9,0(%r1)
-	ltgr	%r9,%r9
-	jz	1f
-	BASR_EX	%r14,%r9
-	lmg	%r0,%r15,__LC_SAVE_AREA_SYNC
-	lpswe	__LC_PGM_OLD_PSW
-1:	lpswe	disabled_wait_psw-0b(%r13)
-ENDPROC(s390_base_pgm_handler)
-
-	.align	8
-disabled_wait_psw:
-	.quad	0x0002000180000000,0x0000000000000000 + s390_base_pgm_handler
-
-	.section .bss
-	.align 8
-	.globl s390_base_pgm_handler_fn
-s390_base_pgm_handler_fn:
-	.quad	0
-	.previous
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
deleted file mode 100644
index 5cba1815b8f8f0015a1d03b8d77bb0d620df28c3..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/entry.S
+++ /dev/null
@@ -1,1558 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *    S390 low-level entry points.
- *
- *    Copyright IBM Corp. 1999, 2012
- *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
- *		 Hartmut Penner (hp@de.ibm.com),
- *		 Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
- *		 Heiko Carstens <heiko.carstens@de.ibm.com>
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/alternative-asm.h>
-#include <asm/processor.h>
-#include <asm/cache.h>
-#include <asm/ctl_reg.h>
-#include <asm/dwarf.h>
-#include <asm/errno.h>
-#include <asm/ptrace.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/page.h>
-#include <asm/sigp.h>
-#include <asm/irq.h>
-#include <asm/vx-insn.h>
-#include <asm/setup.h>
-#include <asm/nmi.h>
-#include <asm/export.h>
-#include <asm/nospec-insn.h>
-
-__PT_R0      =	__PT_GPRS
-__PT_R1      =	__PT_GPRS + 8
-__PT_R2      =	__PT_GPRS + 16
-__PT_R3      =	__PT_GPRS + 24
-__PT_R4      =	__PT_GPRS + 32
-__PT_R5      =	__PT_GPRS + 40
-__PT_R6      =	__PT_GPRS + 48
-__PT_R7      =	__PT_GPRS + 56
-__PT_R8      =	__PT_GPRS + 64
-__PT_R9      =	__PT_GPRS + 72
-__PT_R10     =	__PT_GPRS + 80
-__PT_R11     =	__PT_GPRS + 88
-__PT_R12     =	__PT_GPRS + 96
-__PT_R13     =	__PT_GPRS + 104
-__PT_R14     =	__PT_GPRS + 112
-__PT_R15     =	__PT_GPRS + 120
-
-STACK_SHIFT = PAGE_SHIFT + THREAD_SIZE_ORDER
-STACK_SIZE  = 1 << STACK_SHIFT
-STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
-
-_TIF_WORK	= (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
-		   _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING)
-_TIF_TRACE	= (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
-		   _TIF_SYSCALL_TRACEPOINT)
-_CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
-		   _CIF_ASCE_SECONDARY | _CIF_FPU)
-_PIF_WORK	= (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
-
-_LPP_OFFSET	= __LC_LPP
-
-#define BASED(name) name-cleanup_critical(%r13)
-
-	.macro	TRACE_IRQS_ON
-#ifdef CONFIG_TRACE_IRQFLAGS
-	basr	%r2,%r0
-	brasl	%r14,trace_hardirqs_on_caller
-#endif
-	.endm
-
-	.macro	TRACE_IRQS_OFF
-#ifdef CONFIG_TRACE_IRQFLAGS
-	basr	%r2,%r0
-	brasl	%r14,trace_hardirqs_off_caller
-#endif
-	.endm
-
-	.macro	LOCKDEP_SYS_EXIT
-#ifdef CONFIG_LOCKDEP
-	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
-	jz	.+10
-	brasl	%r14,lockdep_sys_exit
-#endif
-	.endm
-
-	.macro	CHECK_STACK savearea
-#ifdef CONFIG_CHECK_STACK
-	tml	%r15,STACK_SIZE - CONFIG_STACK_GUARD
-	lghi	%r14,\savearea
-	jz	stack_overflow
-#endif
-	.endm
-
-	.macro	CHECK_VMAP_STACK savearea,oklabel
-#ifdef CONFIG_VMAP_STACK
-	lgr	%r14,%r15
-	nill	%r14,0x10000 - STACK_SIZE
-	oill	%r14,STACK_INIT
-	clg	%r14,__LC_KERNEL_STACK
-	je	\oklabel
-	clg	%r14,__LC_ASYNC_STACK
-	je	\oklabel
-	clg	%r14,__LC_NODAT_STACK
-	je	\oklabel
-	clg	%r14,__LC_RESTART_STACK
-	je	\oklabel
-	lghi	%r14,\savearea
-	j	stack_overflow
-#else
-	j	\oklabel
-#endif
-	.endm
-
-	.macro	SWITCH_ASYNC savearea,timer
-	tmhh	%r8,0x0001		# interrupting from user ?
-	jnz	2f
-	lgr	%r14,%r9
-	cghi	%r14,__LC_RETURN_LPSWE
-	je	0f
-	slg	%r14,BASED(.Lcritical_start)
-	clg	%r14,BASED(.Lcritical_length)
-	jhe	1f
-0:
-	lghi	%r11,\savearea		# inside critical section, do cleanup
-	brasl	%r14,cleanup_critical
-	tmhh	%r8,0x0001		# retest problem state after cleanup
-	jnz	2f
-1:	lg	%r14,__LC_ASYNC_STACK	# are we already on the target stack?
-	slgr	%r14,%r15
-	srag	%r14,%r14,STACK_SHIFT
-	jnz	3f
-	CHECK_STACK \savearea
-	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	4f
-2:	UPDATE_VTIME %r14,%r15,\timer
-	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
-3:	lg	%r15,__LC_ASYNC_STACK	# load async stack
-4:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	.endm
-
-	.macro UPDATE_VTIME w1,w2,enter_timer
-	lg	\w1,__LC_EXIT_TIMER
-	lg	\w2,__LC_LAST_UPDATE_TIMER
-	slg	\w1,\enter_timer
-	slg	\w2,__LC_EXIT_TIMER
-	alg	\w1,__LC_USER_TIMER
-	alg	\w2,__LC_SYSTEM_TIMER
-	stg	\w1,__LC_USER_TIMER
-	stg	\w2,__LC_SYSTEM_TIMER
-	mvc	__LC_LAST_UPDATE_TIMER(8),\enter_timer
-	.endm
-
-	.macro REENABLE_IRQS
-	stg	%r8,__LC_RETURN_PSW
-	ni	__LC_RETURN_PSW,0xbf
-	ssm	__LC_RETURN_PSW
-	.endm
-
-	.macro STCK savearea
-#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
-	.insn	s,0xb27c0000,\savearea		# store clock fast
-#else
-	.insn	s,0xb2050000,\savearea		# store clock
-#endif
-	.endm
-
-	/*
-	 * The TSTMSK macro generates a test-under-mask instruction by
-	 * calculating the memory offset for the specified mask value.
-	 * Mask value can be any constant.  The macro shifts the mask
-	 * value to calculate the memory offset for the test-under-mask
-	 * instruction.
-	 */
-	.macro TSTMSK addr, mask, size=8, bytepos=0
-		.if (\bytepos < \size) && (\mask >> 8)
-			.if (\mask & 0xff)
-				.error "Mask exceeds byte boundary"
-			.endif
-			TSTMSK \addr, "(\mask >> 8)", \size, "(\bytepos + 1)"
-			.exitm
-		.endif
-		.ifeq \mask
-			.error "Mask must not be zero"
-		.endif
-		off = \size - \bytepos - 1
-		tm	off+\addr, \mask
-	.endm
-
-	.macro BPOFF
-	ALTERNATIVE "", ".long 0xb2e8c000", 82
-	.endm
-
-	.macro BPON
-	ALTERNATIVE "", ".long 0xb2e8d000", 82
-	.endm
-
-	.macro BPENTER tif_ptr,tif_mask
-	ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .long 0xb2e8d000", \
-		    "", 82
-	.endm
-
-	.macro BPEXIT tif_ptr,tif_mask
-	TSTMSK	\tif_ptr,\tif_mask
-	ALTERNATIVE "jz .+8;  .long 0xb2e8c000", \
-		    "jnz .+8; .long 0xb2e8d000", 82
-	.endm
-
-	GEN_BR_THUNK %r9
-	GEN_BR_THUNK %r14
-	GEN_BR_THUNK %r14,%r11
-
-	.section .kprobes.text, "ax"
-.Ldummy:
-	/*
-	 * This nop exists only in order to avoid that __switch_to starts at
-	 * the beginning of the kprobes text section. In that case we would
-	 * have several symbols at the same address. E.g. objdump would take
-	 * an arbitrary symbol name when disassembling this code.
-	 * With the added nop in between the __switch_to symbol is unique
-	 * again.
-	 */
-	nop	0
-
-ENTRY(__bpon)
-	.globl __bpon
-	BPON
-	BR_EX	%r14
-ENDPROC(__bpon)
-
-/*
- * Scheduler resume function, called by switch_to
- *  gpr2 = (task_struct *) prev
- *  gpr3 = (task_struct *) next
- * Returns:
- *  gpr2 = prev
- */
-ENTRY(__switch_to)
-	stmg	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
-	lghi	%r4,__TASK_stack
-	lghi	%r1,__TASK_thread
-	llill	%r5,STACK_INIT
-	stg	%r15,__THREAD_ksp(%r1,%r2)	# store kernel stack of prev
-	lg	%r15,0(%r4,%r3)			# start of kernel stack of next
-	agr	%r15,%r5			# end of kernel stack of next
-	stg	%r3,__LC_CURRENT		# store task struct of next
-	stg	%r15,__LC_KERNEL_STACK		# store end of kernel stack
-	lg	%r15,__THREAD_ksp(%r1,%r3)	# load kernel stack of next
-	aghi	%r3,__TASK_pid
-	mvc	__LC_CURRENT_PID(4,%r0),0(%r3)	# store pid of next
-	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
-	ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
-	BR_EX	%r14
-ENDPROC(__switch_to)
-
-.L__critical_start:
-
-#if IS_ENABLED(CONFIG_KVM)
-/*
- * sie64a calling convention:
- * %r2 pointer to sie control block
- * %r3 guest register save area
- */
-ENTRY(sie64a)
-	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
-	lg	%r12,__LC_CURRENT
-	stg	%r2,__SF_SIE_CONTROL(%r15)	# save control block pointer
-	stg	%r3,__SF_SIE_SAVEAREA(%r15)	# save guest register save area
-	xc	__SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
-	mvc	__SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags
-	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU		# load guest fp/vx registers ?
-	jno	.Lsie_load_guest_gprs
-	brasl	%r14,load_fpu_regs		# load guest fp/vx regs
-.Lsie_load_guest_gprs:
-	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
-	lg	%r14,__LC_GMAP			# get gmap pointer
-	ltgr	%r14,%r14
-	jz	.Lsie_gmap
-	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
-.Lsie_gmap:
-	lg	%r14,__SF_SIE_CONTROL(%r15)	# get control block pointer
-	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
-	tm	__SIE_PROG20+3(%r14),3		# last exit...
-	jnz	.Lsie_skip
-	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
-	jo	.Lsie_skip			# exit if fp/vx regs changed
-	BPEXIT	__SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
-.Lsie_entry:
-	sie	0(%r14)
-.Lsie_exit:
-	BPOFF
-	BPENTER	__SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
-.Lsie_skip:
-	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-.Lsie_done:
-# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
-# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
-# Other instructions between sie64a and .Lsie_done should not cause program
-# interrupts. So lets use 3 nops as a landing pad for all possible rewinds.
-# See also .Lcleanup_sie
-.Lrewind_pad6:
-	nopr	7
-.Lrewind_pad4:
-	nopr	7
-.Lrewind_pad2:
-	nopr	7
-	.globl sie_exit
-sie_exit:
-	lg	%r14,__SF_SIE_SAVEAREA(%r15)	# load guest register save area
-	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
-	xgr	%r0,%r0				# clear guest registers to
-	xgr	%r1,%r1				# prevent speculative use
-	xgr	%r2,%r2
-	xgr	%r3,%r3
-	xgr	%r4,%r4
-	xgr	%r5,%r5
-	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
-	lg	%r2,__SF_SIE_REASON(%r15)	# return exit reason code
-	BR_EX	%r14
-.Lsie_fault:
-	lghi	%r14,-EFAULT
-	stg	%r14,__SF_SIE_REASON(%r15)	# set exit reason code
-	j	sie_exit
-
-	EX_TABLE(.Lrewind_pad6,.Lsie_fault)
-	EX_TABLE(.Lrewind_pad4,.Lsie_fault)
-	EX_TABLE(.Lrewind_pad2,.Lsie_fault)
-	EX_TABLE(sie_exit,.Lsie_fault)
-ENDPROC(sie64a)
-EXPORT_SYMBOL(sie64a)
-EXPORT_SYMBOL(sie_exit)
-#endif
-
-/*
- * SVC interrupt handler routine. System calls are synchronous events and
- * are executed with interrupts enabled.
- */
-
-ENTRY(system_call)
-	stpt	__LC_SYNC_ENTER_TIMER
-.Lsysc_stmg:
-	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
-	BPOFF
-	lg	%r12,__LC_CURRENT
-	lghi	%r13,__TASK_thread
-	lghi	%r14,_PIF_SYSCALL
-.Lsysc_per:
-	lg	%r15,__LC_KERNEL_STACK
-	la	%r11,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
-.Lsysc_vtime:
-	UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER
-	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
-	stmg	%r0,%r7,__PT_R0(%r11)
-	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
-	mvc	__PT_PSW(16,%r11),__LC_SVC_OLD_PSW
-	mvc	__PT_INT_CODE(4,%r11),__LC_SVC_ILC
-	stg	%r14,__PT_FLAGS(%r11)
-.Lsysc_do_svc:
-	# clear user controlled register to prevent speculative use
-	xgr	%r0,%r0
-	# load address of system call table
-	lg	%r10,__THREAD_sysc_table(%r13,%r12)
-	llgh	%r8,__PT_INT_CODE+2(%r11)
-	slag	%r8,%r8,3			# shift and test for svc 0
-	jnz	.Lsysc_nr_ok
-	# svc 0: system call number in %r1
-	llgfr	%r1,%r1				# clear high word in r1
-	sth	%r1,__PT_INT_CODE+2(%r11)
-	cghi	%r1,NR_syscalls
-	jnl	.Lsysc_nr_ok
-	slag	%r8,%r1,3
-.Lsysc_nr_ok:
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-	stg	%r2,__PT_ORIG_GPR2(%r11)
-	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
-	lg	%r9,0(%r8,%r10)			# get system call add.
-	TSTMSK	__TI_flags(%r12),_TIF_TRACE
-	jnz	.Lsysc_tracesys
-	BASR_EX	%r14,%r9			# call sys_xxxx
-	stg	%r2,__PT_R2(%r11)		# store return value
-
-.Lsysc_return:
-#ifdef CONFIG_DEBUG_RSEQ
-	lgr	%r2,%r11
-	brasl	%r14,rseq_syscall
-#endif
-	LOCKDEP_SYS_EXIT
-.Lsysc_tif:
-	TSTMSK	__PT_FLAGS(%r11),_PIF_WORK
-	jnz	.Lsysc_work
-	TSTMSK	__TI_flags(%r12),_TIF_WORK
-	jnz	.Lsysc_work			# check for work
-	TSTMSK	__LC_CPU_FLAGS,_CIF_WORK
-	jnz	.Lsysc_work
-	BPEXIT	__TI_flags(%r12),_TIF_ISOLATE_BP
-.Lsysc_restore:
-	lg	%r14,__LC_VDSO_PER_CPU
-	lmg	%r0,%r10,__PT_R0(%r11)
-	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
-.Lsysc_exit_timer:
-	stpt	__LC_EXIT_TIMER
-	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
-	lmg	%r11,%r15,__PT_R11(%r11)
-	b	__LC_RETURN_LPSWE(%r0)
-.Lsysc_done:
-
-#
-# One of the work bits is on. Find out which one.
-#
-.Lsysc_work:
-	TSTMSK	__LC_CPU_FLAGS,_CIF_MCCK_PENDING
-	jo	.Lsysc_mcck_pending
-	TSTMSK	__TI_flags(%r12),_TIF_NEED_RESCHED
-	jo	.Lsysc_reschedule
-	TSTMSK	__PT_FLAGS(%r11),_PIF_SYSCALL_RESTART
-	jo	.Lsysc_syscall_restart
-#ifdef CONFIG_UPROBES
-	TSTMSK	__TI_flags(%r12),_TIF_UPROBE
-	jo	.Lsysc_uprobe_notify
-#endif
-	TSTMSK	__TI_flags(%r12),_TIF_GUARDED_STORAGE
-	jo	.Lsysc_guarded_storage
-	TSTMSK	__PT_FLAGS(%r11),_PIF_PER_TRAP
-	jo	.Lsysc_singlestep
-#ifdef CONFIG_LIVEPATCH
-	TSTMSK	__TI_flags(%r12),_TIF_PATCH_PENDING
-	jo	.Lsysc_patch_pending	# handle live patching just before
-					# signals and possible syscall restart
-#endif
-	TSTMSK	__PT_FLAGS(%r11),_PIF_SYSCALL_RESTART
-	jo	.Lsysc_syscall_restart
-	TSTMSK	__TI_flags(%r12),_TIF_SIGPENDING
-	jo	.Lsysc_sigpending
-	TSTMSK	__TI_flags(%r12),_TIF_NOTIFY_RESUME
-	jo	.Lsysc_notify_resume
-	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
-	jo	.Lsysc_vxrs
-	TSTMSK	__LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
-	jnz	.Lsysc_asce
-	j	.Lsysc_return		# beware of critical section cleanup
-
-#
-# _TIF_NEED_RESCHED is set, call schedule
-#
-.Lsysc_reschedule:
-	larl	%r14,.Lsysc_return
-	jg	schedule
-
-#
-# _CIF_MCCK_PENDING is set, call handler
-#
-.Lsysc_mcck_pending:
-	larl	%r14,.Lsysc_return
-	jg	s390_handle_mcck	# TIF bit will be cleared by handler
-
-#
-# _CIF_ASCE_PRIMARY and/or _CIF_ASCE_SECONDARY set, load user space asce
-#
-.Lsysc_asce:
-	ni	__LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY
-	lctlg	%c7,%c7,__LC_VDSO_ASCE		# load secondary asce
-	TSTMSK	__LC_CPU_FLAGS,_CIF_ASCE_PRIMARY
-	jz	.Lsysc_return
-#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES
-	tm	__LC_STFLE_FAC_LIST+3,0x10	# has MVCOS ?
-	jnz	.Lsysc_set_fs_fixup
-	ni	__LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-	j	.Lsysc_return
-.Lsysc_set_fs_fixup:
-#endif
-	larl	%r14,.Lsysc_return
-	jg	set_fs_fixup
-
-#
-# CIF_FPU is set, restore floating-point controls and floating-point registers.
-#
-.Lsysc_vxrs:
-	larl	%r14,.Lsysc_return
-	jg	load_fpu_regs
-
-#
-# _TIF_SIGPENDING is set, call do_signal
-#
-.Lsysc_sigpending:
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	brasl	%r14,do_signal
-	TSTMSK	__PT_FLAGS(%r11),_PIF_SYSCALL
-	jno	.Lsysc_return
-.Lsysc_do_syscall:
-	lghi	%r13,__TASK_thread
-	lmg	%r2,%r7,__PT_R2(%r11)	# load svc arguments
-	lghi	%r1,0			# svc 0 returns -ENOSYS
-	j	.Lsysc_do_svc
-
-#
-# _TIF_NOTIFY_RESUME is set, call do_notify_resume
-#
-.Lsysc_notify_resume:
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	larl	%r14,.Lsysc_return
-	jg	do_notify_resume
-
-#
-# _TIF_UPROBE is set, call uprobe_notify_resume
-#
-#ifdef CONFIG_UPROBES
-.Lsysc_uprobe_notify:
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	larl	%r14,.Lsysc_return
-	jg	uprobe_notify_resume
-#endif
-
-#
-# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
-#
-.Lsysc_guarded_storage:
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	larl	%r14,.Lsysc_return
-	jg	gs_load_bc_cb
-#
-# _TIF_PATCH_PENDING is set, call klp_update_patch_state
-#
-#ifdef CONFIG_LIVEPATCH
-.Lsysc_patch_pending:
-	lg	%r2,__LC_CURRENT	# pass pointer to task struct
-	larl	%r14,.Lsysc_return
-	jg	klp_update_patch_state
-#endif
-
-#
-# _PIF_PER_TRAP is set, call do_per_trap
-#
-.Lsysc_singlestep:
-	ni	__PT_FLAGS+7(%r11),255-_PIF_PER_TRAP
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	larl	%r14,.Lsysc_return
-	jg	do_per_trap
-
-#
-# _PIF_SYSCALL_RESTART is set, repeat the current system call
-#
-.Lsysc_syscall_restart:
-	ni	__PT_FLAGS+7(%r11),255-_PIF_SYSCALL_RESTART
-	lmg	%r1,%r7,__PT_R1(%r11)	# load svc arguments
-	lg	%r2,__PT_ORIG_GPR2(%r11)
-	j	.Lsysc_do_svc
-
-#
-# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
-# and after the system call
-#
-.Lsysc_tracesys:
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	la	%r3,0
-	llgh	%r0,__PT_INT_CODE+2(%r11)
-	stg	%r0,__PT_R2(%r11)
-	brasl	%r14,do_syscall_trace_enter
-	lghi	%r0,NR_syscalls
-	clgr	%r0,%r2
-	jnh	.Lsysc_tracenogo
-	sllg	%r8,%r2,3
-	lg	%r9,0(%r8,%r10)
-.Lsysc_tracego:
-	lmg	%r3,%r7,__PT_R3(%r11)
-	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
-	lg	%r2,__PT_ORIG_GPR2(%r11)
-	BASR_EX	%r14,%r9		# call sys_xxx
-	stg	%r2,__PT_R2(%r11)	# store return value
-.Lsysc_tracenogo:
-	TSTMSK	__TI_flags(%r12),_TIF_TRACE
-	jz	.Lsysc_return
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	larl	%r14,.Lsysc_return
-	jg	do_syscall_trace_exit
-ENDPROC(system_call)
-
-#
-# a new process exits the kernel with ret_from_fork
-#
-ENTRY(ret_from_fork)
-	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	lg	%r12,__LC_CURRENT
-	brasl	%r14,schedule_tail
-	TRACE_IRQS_ON
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	tm	__PT_PSW+1(%r11),0x01	# forking a kernel thread ?
-	jne	.Lsysc_tracenogo
-	# it's a kernel thread
-	lmg	%r9,%r10,__PT_R9(%r11)	# load gprs
-	la	%r2,0(%r10)
-	BASR_EX	%r14,%r9
-	j	.Lsysc_tracenogo
-ENDPROC(ret_from_fork)
-
-ENTRY(kernel_thread_starter)
-	la	%r2,0(%r10)
-	BASR_EX	%r14,%r9
-	j	.Lsysc_tracenogo
-ENDPROC(kernel_thread_starter)
-
-/*
- * Program check handler routine
- */
-
-ENTRY(pgm_check_handler)
-	stpt	__LC_SYNC_ENTER_TIMER
-	BPOFF
-	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
-	lg	%r10,__LC_LAST_BREAK
-	srag	%r11,%r10,12
-	jnz	0f
-	/* if __LC_LAST_BREAK is < 4096, it contains one of
-	 * the lpswe addresses in lowcore. Set it to 1 (initial state)
-	 * to prevent leaking that address to userspace.
-	 */
-	lghi	%r10,1
-0:	lg	%r12,__LC_CURRENT
-	lghi	%r11,0
-	larl	%r13,cleanup_critical
-	lmg	%r8,%r9,__LC_PGM_OLD_PSW
-	tmhh	%r8,0x0001		# test problem state bit
-	jnz	3f			# -> fault in user space
-#if IS_ENABLED(CONFIG_KVM)
-	# cleanup critical section for program checks in sie64a
-	lgr	%r14,%r9
-	slg	%r14,BASED(.Lsie_critical_start)
-	clg	%r14,BASED(.Lsie_critical_length)
-	jhe	1f
-	lg	%r14,__SF_SIE_CONTROL(%r15)	# get control block pointer
-	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-	larl	%r9,sie_exit			# skip forward to sie_exit
-	lghi	%r11,_PIF_GUEST_FAULT
-#endif
-1:	tmhh	%r8,0x4000		# PER bit set in old PSW ?
-	jnz	2f			# -> enabled, can't be a double fault
-	tm	__LC_PGM_ILC+3,0x80	# check for per exception
-	jnz	.Lpgm_svcper		# -> single stepped svc
-2:	CHECK_STACK __LC_SAVE_AREA_SYNC
-	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	# CHECK_VMAP_STACK branches to stack_overflow or 5f
-	CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,5f
-3:	UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
-	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
-	lg	%r15,__LC_KERNEL_STACK
-	lgr	%r14,%r12
-	aghi	%r14,__TASK_thread	# pointer to thread_struct
-	lghi	%r13,__LC_PGM_TDB
-	tm	__LC_PGM_ILC+2,0x02	# check for transaction abort
-	jz	4f
-	mvc	__THREAD_trap_tdb(256,%r14),0(%r13)
-4:	stg	%r10,__THREAD_last_break(%r14)
-5:	lgr	%r13,%r11
-	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	stmg	%r0,%r7,__PT_R0(%r11)
-	# clear user controlled registers to prevent speculative use
-	xgr	%r0,%r0
-	xgr	%r1,%r1
-	xgr	%r2,%r2
-	xgr	%r3,%r3
-	xgr	%r4,%r4
-	xgr	%r5,%r5
-	xgr	%r6,%r6
-	xgr	%r7,%r7
-	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
-	stmg	%r8,%r9,__PT_PSW(%r11)
-	mvc	__PT_INT_CODE(4,%r11),__LC_PGM_ILC
-	mvc	__PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE
-	stg	%r13,__PT_FLAGS(%r11)
-	stg	%r10,__PT_ARGS(%r11)
-	tm	__LC_PGM_ILC+3,0x80	# check for per exception
-	jz	6f
-	tmhh	%r8,0x0001		# kernel per event ?
-	jz	.Lpgm_kprobe
-	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
-	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
-	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
-	mvc	__THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-6:	REENABLE_IRQS
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-	larl	%r1,pgm_check_table
-	llgh	%r10,__PT_INT_CODE+2(%r11)
-	nill	%r10,0x007f
-	sll	%r10,3
-	je	.Lpgm_return
-	lg	%r9,0(%r10,%r1)		# load address of handler routine
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	BASR_EX	%r14,%r9		# branch to interrupt-handler
-.Lpgm_return:
-	LOCKDEP_SYS_EXIT
-	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
-	jno	.Lsysc_restore
-	TSTMSK	__PT_FLAGS(%r11),_PIF_SYSCALL
-	jo	.Lsysc_do_syscall
-	j	.Lsysc_tif
-
-#
-# PER event in supervisor state, must be kprobes
-#
-.Lpgm_kprobe:
-	REENABLE_IRQS
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	brasl	%r14,do_per_trap
-	j	.Lpgm_return
-
-#
-# single stepped system call
-#
-.Lpgm_svcper:
-	mvc	__LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
-	lghi	%r13,__TASK_thread
-	larl	%r14,.Lsysc_per
-	stg	%r14,__LC_RETURN_PSW+8
-	lghi	%r14,_PIF_SYSCALL | _PIF_PER_TRAP
-	lpswe	__LC_RETURN_PSW		# branch to .Lsysc_per and enable irqs
-ENDPROC(pgm_check_handler)
-
-/*
- * IO interrupt handler routine
- */
-ENTRY(io_int_handler)
-	STCK	__LC_INT_CLOCK
-	stpt	__LC_ASYNC_ENTER_TIMER
-	BPOFF
-	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
-	lg	%r12,__LC_CURRENT
-	larl	%r13,cleanup_critical
-	lmg	%r8,%r9,__LC_IO_OLD_PSW
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
-	stmg	%r0,%r7,__PT_R0(%r11)
-	# clear user controlled registers to prevent speculative use
-	xgr	%r0,%r0
-	xgr	%r1,%r1
-	xgr	%r2,%r2
-	xgr	%r3,%r3
-	xgr	%r4,%r4
-	xgr	%r5,%r5
-	xgr	%r6,%r6
-	xgr	%r7,%r7
-	xgr	%r10,%r10
-	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
-	stmg	%r8,%r9,__PT_PSW(%r11)
-	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
-	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
-	TSTMSK	__LC_CPU_FLAGS,_CIF_IGNORE_IRQ
-	jo	.Lio_restore
-	TRACE_IRQS_OFF
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-.Lio_loop:
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	lghi	%r3,IO_INTERRUPT
-	tm	__PT_INT_CODE+8(%r11),0x80	# adapter interrupt ?
-	jz	.Lio_call
-	lghi	%r3,THIN_INTERRUPT
-.Lio_call:
-	brasl	%r14,do_IRQ
-	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_LPAR
-	jz	.Lio_return
-	tpi	0
-	jz	.Lio_return
-	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
-	j	.Lio_loop
-.Lio_return:
-	LOCKDEP_SYS_EXIT
-	TRACE_IRQS_ON
-.Lio_tif:
-	TSTMSK	__TI_flags(%r12),_TIF_WORK
-	jnz	.Lio_work		# there is work to do (signals etc.)
-	TSTMSK	__LC_CPU_FLAGS,_CIF_WORK
-	jnz	.Lio_work
-.Lio_restore:
-	lg	%r14,__LC_VDSO_PER_CPU
-	lmg	%r0,%r10,__PT_R0(%r11)
-	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
-	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
-	jno	.Lio_exit_kernel
-	BPEXIT	__TI_flags(%r12),_TIF_ISOLATE_BP
-.Lio_exit_timer:
-	stpt	__LC_EXIT_TIMER
-	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
-.Lio_exit_kernel:
-	lmg	%r11,%r15,__PT_R11(%r11)
-	b	__LC_RETURN_LPSWE(%r0)
-.Lio_done:
-
-#
-# There is work todo, find out in which context we have been interrupted:
-# 1) if we return to user space we can do all _TIF_WORK work
-# 2) if we return to kernel code and kvm is enabled check if we need to
-#    modify the psw to leave SIE
-# 3) if we return to kernel code and preemptive scheduling is enabled check
-#    the preemption counter and if it is zero call preempt_schedule_irq
-# Before any work can be done, a switch to the kernel stack is required.
-#
-.Lio_work:
-	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
-	jo	.Lio_work_user		# yes -> do resched & signal
-#ifdef CONFIG_PREEMPT
-	# check for preemptive scheduling
-	icm	%r0,15,__LC_PREEMPT_COUNT
-	jnz	.Lio_restore		# preemption is disabled
-	TSTMSK	__TI_flags(%r12),_TIF_NEED_RESCHED
-	jno	.Lio_restore
-	# switch to kernel stack
-	lg	%r1,__PT_R15(%r11)
-	aghi	%r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
-	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
-	la	%r11,STACK_FRAME_OVERHEAD(%r1)
-	lgr	%r15,%r1
-	# TRACE_IRQS_ON already done at .Lio_return, call
-	# TRACE_IRQS_OFF to keep things symmetrical
-	TRACE_IRQS_OFF
-	brasl	%r14,preempt_schedule_irq
-	j	.Lio_return
-#else
-	j	.Lio_restore
-#endif
-
-#
-# Need to do work before returning to userspace, switch to kernel stack
-#
-.Lio_work_user:
-	lg	%r1,__LC_KERNEL_STACK
-	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
-	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
-	la	%r11,STACK_FRAME_OVERHEAD(%r1)
-	lgr	%r15,%r1
-
-#
-# One of the work bits is on. Find out which one.
-#
-.Lio_work_tif:
-	TSTMSK	__LC_CPU_FLAGS,_CIF_MCCK_PENDING
-	jo	.Lio_mcck_pending
-	TSTMSK	__TI_flags(%r12),_TIF_NEED_RESCHED
-	jo	.Lio_reschedule
-#ifdef CONFIG_LIVEPATCH
-	TSTMSK	__TI_flags(%r12),_TIF_PATCH_PENDING
-	jo	.Lio_patch_pending
-#endif
-	TSTMSK	__TI_flags(%r12),_TIF_SIGPENDING
-	jo	.Lio_sigpending
-	TSTMSK	__TI_flags(%r12),_TIF_NOTIFY_RESUME
-	jo	.Lio_notify_resume
-	TSTMSK	__TI_flags(%r12),_TIF_GUARDED_STORAGE
-	jo	.Lio_guarded_storage
-	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
-	jo	.Lio_vxrs
-	TSTMSK	__LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
-	jnz	.Lio_asce
-	j	.Lio_return		# beware of critical section cleanup
-
-#
-# _CIF_MCCK_PENDING is set, call handler
-#
-.Lio_mcck_pending:
-	# TRACE_IRQS_ON already done at .Lio_return
-	brasl	%r14,s390_handle_mcck	# TIF bit will be cleared by handler
-	TRACE_IRQS_OFF
-	j	.Lio_return
-
-#
-# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce
-#
-.Lio_asce:
-	ni	__LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY
-	lctlg	%c7,%c7,__LC_VDSO_ASCE		# load secondary asce
-	TSTMSK	__LC_CPU_FLAGS,_CIF_ASCE_PRIMARY
-	jz	.Lio_return
-#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES
-	tm	__LC_STFLE_FAC_LIST+3,0x10	# has MVCOS ?
-	jnz	.Lio_set_fs_fixup
-	ni	__LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-	j	.Lio_return
-.Lio_set_fs_fixup:
-#endif
-	larl	%r14,.Lio_return
-	jg	set_fs_fixup
-
-#
-# CIF_FPU is set, restore floating-point controls and floating-point registers.
-#
-.Lio_vxrs:
-	larl	%r14,.Lio_return
-	jg	load_fpu_regs
-
-#
-# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
-#
-.Lio_guarded_storage:
-	# TRACE_IRQS_ON already done at .Lio_return
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	brasl	%r14,gs_load_bc_cb
-	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
-	TRACE_IRQS_OFF
-	j	.Lio_return
-
-#
-# _TIF_NEED_RESCHED is set, call schedule
-#
-.Lio_reschedule:
-	# TRACE_IRQS_ON already done at .Lio_return
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	brasl	%r14,schedule		# call scheduler
-	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
-	TRACE_IRQS_OFF
-	j	.Lio_return
-
-#
-# _TIF_PATCH_PENDING is set, call klp_update_patch_state
-#
-#ifdef CONFIG_LIVEPATCH
-.Lio_patch_pending:
-	lg	%r2,__LC_CURRENT	# pass pointer to task struct
-	larl	%r14,.Lio_return
-	jg	klp_update_patch_state
-#endif
-
-#
-# _TIF_SIGPENDING or is set, call do_signal
-#
-.Lio_sigpending:
-	# TRACE_IRQS_ON already done at .Lio_return
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	brasl	%r14,do_signal
-	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
-	TRACE_IRQS_OFF
-	j	.Lio_return
-
-#
-# _TIF_NOTIFY_RESUME or is set, call do_notify_resume
-#
-.Lio_notify_resume:
-	# TRACE_IRQS_ON already done at .Lio_return
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	brasl	%r14,do_notify_resume
-	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
-	TRACE_IRQS_OFF
-	j	.Lio_return
-ENDPROC(io_int_handler)
-
-/*
- * External interrupt handler routine
- */
-ENTRY(ext_int_handler)
-	STCK	__LC_INT_CLOCK
-	stpt	__LC_ASYNC_ENTER_TIMER
-	BPOFF
-	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
-	lg	%r12,__LC_CURRENT
-	larl	%r13,cleanup_critical
-	lmg	%r8,%r9,__LC_EXT_OLD_PSW
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
-	stmg	%r0,%r7,__PT_R0(%r11)
-	# clear user controlled registers to prevent speculative use
-	xgr	%r0,%r0
-	xgr	%r1,%r1
-	xgr	%r2,%r2
-	xgr	%r3,%r3
-	xgr	%r4,%r4
-	xgr	%r5,%r5
-	xgr	%r6,%r6
-	xgr	%r7,%r7
-	xgr	%r10,%r10
-	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
-	stmg	%r8,%r9,__PT_PSW(%r11)
-	lghi	%r1,__LC_EXT_PARAMS2
-	mvc	__PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
-	mvc	__PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
-	mvc	__PT_INT_PARM_LONG(8,%r11),0(%r1)
-	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
-	TSTMSK	__LC_CPU_FLAGS,_CIF_IGNORE_IRQ
-	jo	.Lio_restore
-	TRACE_IRQS_OFF
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	lghi	%r3,EXT_INTERRUPT
-	brasl	%r14,do_IRQ
-	j	.Lio_return
-ENDPROC(ext_int_handler)
-
-/*
- * Load idle PSW. The second "half" of this function is in .Lcleanup_idle.
- */
-ENTRY(psw_idle)
-	stg	%r14,(__SF_GPRS+8*8)(%r15)
-	stg	%r3,__SF_EMPTY(%r15)
-	larl	%r1,.Lpsw_idle_lpsw+4
-	stg	%r1,__SF_EMPTY+8(%r15)
-	larl	%r1,smp_cpu_mtid
-	llgf	%r1,0(%r1)
-	ltgr	%r1,%r1
-	jz	.Lpsw_idle_stcctm
-	.insn	rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15)
-.Lpsw_idle_stcctm:
-	oi	__LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT
-	BPON
-	STCK	__CLOCK_IDLE_ENTER(%r2)
-	stpt	__TIMER_IDLE_ENTER(%r2)
-.Lpsw_idle_lpsw:
-	lpswe	__SF_EMPTY(%r15)
-	BR_EX	%r14
-.Lpsw_idle_end:
-ENDPROC(psw_idle)
-
-/*
- * Store floating-point controls and floating-point or vector register
- * depending whether the vector facility is available.	A critical section
- * cleanup assures that the registers are stored even if interrupted for
- * some other work.  The CIF_FPU flag is set to trigger a lazy restore
- * of the register contents at return from io or a system call.
- */
-ENTRY(save_fpu_regs)
-	lg	%r2,__LC_CURRENT
-	aghi	%r2,__TASK_thread
-	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
-	jo	.Lsave_fpu_regs_exit
-	stfpc	__THREAD_FPU_fpc(%r2)
-	lg	%r3,__THREAD_FPU_regs(%r2)
-	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_VX
-	jz	.Lsave_fpu_regs_fp	  # no -> store FP regs
-	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
-	VSTM	%v16,%v31,256,%r3	  # vstm 16,31,256(3)
-	j	.Lsave_fpu_regs_done	  # -> set CIF_FPU flag
-.Lsave_fpu_regs_fp:
-	std	0,0(%r3)
-	std	1,8(%r3)
-	std	2,16(%r3)
-	std	3,24(%r3)
-	std	4,32(%r3)
-	std	5,40(%r3)
-	std	6,48(%r3)
-	std	7,56(%r3)
-	std	8,64(%r3)
-	std	9,72(%r3)
-	std	10,80(%r3)
-	std	11,88(%r3)
-	std	12,96(%r3)
-	std	13,104(%r3)
-	std	14,112(%r3)
-	std	15,120(%r3)
-.Lsave_fpu_regs_done:
-	oi	__LC_CPU_FLAGS+7,_CIF_FPU
-.Lsave_fpu_regs_exit:
-	BR_EX	%r14
-.Lsave_fpu_regs_end:
-ENDPROC(save_fpu_regs)
-EXPORT_SYMBOL(save_fpu_regs)
-
-/*
- * Load floating-point controls and floating-point or vector registers.
- * A critical section cleanup assures that the register contents are
- * loaded even if interrupted for some other work.
- *
- * There are special calling conventions to fit into sysc and io return work:
- *	%r15:	<kernel stack>
- * The function requires:
- *	%r4
- */
-load_fpu_regs:
-	lg	%r4,__LC_CURRENT
-	aghi	%r4,__TASK_thread
-	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
-	jno	.Lload_fpu_regs_exit
-	lfpc	__THREAD_FPU_fpc(%r4)
-	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_VX
-	lg	%r4,__THREAD_FPU_regs(%r4)	# %r4 <- reg save area
-	jz	.Lload_fpu_regs_fp		# -> no VX, load FP regs
-	VLM	%v0,%v15,0,%r4
-	VLM	%v16,%v31,256,%r4
-	j	.Lload_fpu_regs_done
-.Lload_fpu_regs_fp:
-	ld	0,0(%r4)
-	ld	1,8(%r4)
-	ld	2,16(%r4)
-	ld	3,24(%r4)
-	ld	4,32(%r4)
-	ld	5,40(%r4)
-	ld	6,48(%r4)
-	ld	7,56(%r4)
-	ld	8,64(%r4)
-	ld	9,72(%r4)
-	ld	10,80(%r4)
-	ld	11,88(%r4)
-	ld	12,96(%r4)
-	ld	13,104(%r4)
-	ld	14,112(%r4)
-	ld	15,120(%r4)
-.Lload_fpu_regs_done:
-	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU
-.Lload_fpu_regs_exit:
-	BR_EX	%r14
-.Lload_fpu_regs_end:
-ENDPROC(load_fpu_regs)
-
-.L__critical_end:
-
-/*
- * Machine check handler routines
- */
-ENTRY(mcck_int_handler)
-	STCK	__LC_MCCK_CLOCK
-	BPOFF
-	la	%r1,4095		# validate r1
-	spt	__LC_CPU_TIMER_SAVE_AREA-4095(%r1)	# validate cpu timer
-	sckc	__LC_CLOCK_COMPARATOR			# validate comparator
-	lam	%a0,%a15,__LC_AREGS_SAVE_AREA-4095(%r1) # validate acrs
-	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs
-	lg	%r12,__LC_CURRENT
-	larl	%r13,cleanup_critical
-	lmg	%r8,%r9,__LC_MCK_OLD_PSW
-	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
-	jo	.Lmcck_panic		# yes -> rest of mcck code invalid
-	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_CR_VALID
-	jno	.Lmcck_panic		# control registers invalid -> panic
-	la	%r14,4095
-	lctlg	%c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r14) # validate ctl regs
-	ptlb
-	lg	%r11,__LC_MCESAD-4095(%r14) # extended machine check save area
-	nill	%r11,0xfc00		# MCESA_ORIGIN_MASK
-	TSTMSK	__LC_CREGS_SAVE_AREA+16-4095(%r14),CR2_GUARDED_STORAGE
-	jno	0f
-	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_GS_VALID
-	jno	0f
-	.insn	 rxy,0xe3000000004d,0,__MCESA_GS_SAVE_AREA(%r11) # LGSC
-0:	l	%r14,__LC_FP_CREG_SAVE_AREA-4095(%r14)
-	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_FC_VALID
-	jo	0f
-	sr	%r14,%r14
-0:	sfpc	%r14
-	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_VX
-	jo	0f
-	lghi	%r14,__LC_FPREGS_SAVE_AREA
-	ld	%f0,0(%r14)
-	ld	%f1,8(%r14)
-	ld	%f2,16(%r14)
-	ld	%f3,24(%r14)
-	ld	%f4,32(%r14)
-	ld	%f5,40(%r14)
-	ld	%f6,48(%r14)
-	ld	%f7,56(%r14)
-	ld	%f8,64(%r14)
-	ld	%f9,72(%r14)
-	ld	%f10,80(%r14)
-	ld	%f11,88(%r14)
-	ld	%f12,96(%r14)
-	ld	%f13,104(%r14)
-	ld	%f14,112(%r14)
-	ld	%f15,120(%r14)
-	j	1f
-0:	VLM	%v0,%v15,0,%r11
-	VLM	%v16,%v31,256,%r11
-1:	lghi	%r14,__LC_CPU_TIMER_SAVE_AREA
-	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
-	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID
-	jo	3f
-	la	%r14,__LC_SYNC_ENTER_TIMER
-	clc	0(8,%r14),__LC_ASYNC_ENTER_TIMER
-	jl	0f
-	la	%r14,__LC_ASYNC_ENTER_TIMER
-0:	clc	0(8,%r14),__LC_EXIT_TIMER
-	jl	1f
-	la	%r14,__LC_EXIT_TIMER
-1:	clc	0(8,%r14),__LC_LAST_UPDATE_TIMER
-	jl	2f
-	la	%r14,__LC_LAST_UPDATE_TIMER
-2:	spt	0(%r14)
-	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
-3:	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID
-	jno	.Lmcck_panic
-	tmhh	%r8,0x0001		# interrupting from user ?
-	jnz	4f
-	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
-	jno	.Lmcck_panic
-4:	ssm	__LC_PGM_NEW_PSW	# turn dat on, keep irqs off
-	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
-.Lmcck_skip:
-	lghi	%r14,__LC_GPREGS_SAVE_AREA+64
-	stmg	%r0,%r7,__PT_R0(%r11)
-	# clear user controlled registers to prevent speculative use
-	xgr	%r0,%r0
-	xgr	%r1,%r1
-	xgr	%r2,%r2
-	xgr	%r3,%r3
-	xgr	%r4,%r4
-	xgr	%r5,%r5
-	xgr	%r6,%r6
-	xgr	%r7,%r7
-	xgr	%r10,%r10
-	mvc	__PT_R8(64,%r11),0(%r14)
-	stmg	%r8,%r9,__PT_PSW(%r11)
-	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	brasl	%r14,s390_do_machine_check
-	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
-	jno	.Lmcck_return
-	lg	%r1,__LC_KERNEL_STACK	# switch to kernel stack
-	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
-	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
-	la	%r11,STACK_FRAME_OVERHEAD(%r1)
-	lgr	%r15,%r1
-	TSTMSK	__LC_CPU_FLAGS,_CIF_MCCK_PENDING
-	jno	.Lmcck_return
-	TRACE_IRQS_OFF
-	brasl	%r14,s390_handle_mcck
-	TRACE_IRQS_ON
-.Lmcck_return:
-	lg	%r14,__LC_VDSO_PER_CPU
-	lmg	%r0,%r10,__PT_R0(%r11)
-	mvc	__LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
-	tm	__LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
-	jno	0f
-	BPEXIT	__TI_flags(%r12),_TIF_ISOLATE_BP
-	stpt	__LC_EXIT_TIMER
-	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
-0:	lmg	%r11,%r15,__PT_R11(%r11)
-	b	__LC_RETURN_MCCK_LPSWE
-
-.Lmcck_panic:
-	lg	%r15,__LC_NODAT_STACK
-	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	j	.Lmcck_skip
-ENDPROC(mcck_int_handler)
-
-#
-# PSW restart interrupt handler
-#
-ENTRY(restart_int_handler)
-	ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
-	stg	%r15,__LC_SAVE_AREA_RESTART
-	lg	%r15,__LC_RESTART_STACK
-	xc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
-	stmg	%r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
-	mvc	STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
-	mvc	STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW
-	xc	0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
-	lg	%r1,__LC_RESTART_FN		# load fn, parm & source cpu
-	lg	%r2,__LC_RESTART_DATA
-	lg	%r3,__LC_RESTART_SOURCE
-	ltgr	%r3,%r3				# test source cpu address
-	jm	1f				# negative -> skip source stop
-0:	sigp	%r4,%r3,SIGP_SENSE		# sigp sense to source cpu
-	brc	10,0b				# wait for status stored
-1:	basr	%r14,%r1			# call function
-	stap	__SF_EMPTY(%r15)		# store cpu address
-	llgh	%r3,__SF_EMPTY(%r15)
-2:	sigp	%r4,%r3,SIGP_STOP		# sigp stop to current cpu
-	brc	2,2b
-3:	j	3b
-ENDPROC(restart_int_handler)
-
-	.section .kprobes.text, "ax"
-
-#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK)
-/*
- * The synchronous or the asynchronous stack overflowed. We are dead.
- * No need to properly save the registers, we are going to panic anyway.
- * Setup a pt_regs so that show_trace can provide a good call trace.
- */
-ENTRY(stack_overflow)
-	lg	%r15,__LC_NODAT_STACK	# change to panic stack
-	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	stmg	%r0,%r7,__PT_R0(%r11)
-	stmg	%r8,%r9,__PT_PSW(%r11)
-	mvc	__PT_R8(64,%r11),0(%r14)
-	stg	%r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-	lgr	%r2,%r11		# pass pointer to pt_regs
-	jg	kernel_stack_overflow
-ENDPROC(stack_overflow)
-#endif
-
-ENTRY(cleanup_critical)
-	cghi	%r9,__LC_RETURN_LPSWE
-	je	.Lcleanup_lpswe
-#if IS_ENABLED(CONFIG_KVM)
-	clg	%r9,BASED(.Lcleanup_table_sie)	# .Lsie_gmap
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table_sie+8)# .Lsie_done
-	jl	.Lcleanup_sie
-#endif
-	clg	%r9,BASED(.Lcleanup_table)	# system_call
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+8)	# .Lsysc_do_svc
-	jl	.Lcleanup_system_call
-	clg	%r9,BASED(.Lcleanup_table+16)	# .Lsysc_tif
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+24)	# .Lsysc_restore
-	jl	.Lcleanup_sysc_tif
-	clg	%r9,BASED(.Lcleanup_table+32)	# .Lsysc_done
-	jl	.Lcleanup_sysc_restore
-	clg	%r9,BASED(.Lcleanup_table+40)	# .Lio_tif
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+48)	# .Lio_restore
-	jl	.Lcleanup_io_tif
-	clg	%r9,BASED(.Lcleanup_table+56)	# .Lio_done
-	jl	.Lcleanup_io_restore
-	clg	%r9,BASED(.Lcleanup_table+64)	# psw_idle
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+72)	# .Lpsw_idle_end
-	jl	.Lcleanup_idle
-	clg	%r9,BASED(.Lcleanup_table+80)	# save_fpu_regs
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+88)	# .Lsave_fpu_regs_end
-	jl	.Lcleanup_save_fpu_regs
-	clg	%r9,BASED(.Lcleanup_table+96)	# load_fpu_regs
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+104)	# .Lload_fpu_regs_end
-	jl	.Lcleanup_load_fpu_regs
-0:	BR_EX	%r14,%r11
-ENDPROC(cleanup_critical)
-
-	.align	8
-.Lcleanup_table:
-	.quad	system_call
-	.quad	.Lsysc_do_svc
-	.quad	.Lsysc_tif
-	.quad	.Lsysc_restore
-	.quad	.Lsysc_done
-	.quad	.Lio_tif
-	.quad	.Lio_restore
-	.quad	.Lio_done
-	.quad	psw_idle
-	.quad	.Lpsw_idle_end
-	.quad	save_fpu_regs
-	.quad	.Lsave_fpu_regs_end
-	.quad	load_fpu_regs
-	.quad	.Lload_fpu_regs_end
-
-#if IS_ENABLED(CONFIG_KVM)
-.Lcleanup_table_sie:
-	.quad	.Lsie_gmap
-	.quad	.Lsie_done
-
-.Lcleanup_sie:
-	cghi    %r11,__LC_SAVE_AREA_ASYNC 	#Is this in normal interrupt?
-	je      1f
-	slg     %r9,BASED(.Lsie_crit_mcck_start)
-	clg     %r9,BASED(.Lsie_crit_mcck_length)
-	jh      1f
-	oi      __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
-1:	BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
-	lg	%r9,__SF_SIE_CONTROL(%r15)	# get control block pointer
-	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-	larl	%r9,sie_exit			# skip forward to sie_exit
-	BR_EX	%r14,%r11
-#endif
-
-.Lcleanup_system_call:
-	# check if stpt has been executed
-	clg	%r9,BASED(.Lcleanup_system_call_insn)
-	jh	0f
-	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
-	cghi	%r11,__LC_SAVE_AREA_ASYNC
-	je	0f
-	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
-0:	# check if stmg has been executed
-	clg	%r9,BASED(.Lcleanup_system_call_insn+8)
-	jh	0f
-	mvc	__LC_SAVE_AREA_SYNC(64),0(%r11)
-0:	# check if base register setup + TIF bit load has been done
-	clg	%r9,BASED(.Lcleanup_system_call_insn+16)
-	jhe	0f
-	# set up saved register r12 task struct pointer
-	stg	%r12,32(%r11)
-	# set up saved register r13 __TASK_thread offset
-	mvc	40(8,%r11),BASED(.Lcleanup_system_call_const)
-0:	# check if the user time update has been done
-	clg	%r9,BASED(.Lcleanup_system_call_insn+24)
-	jh	0f
-	lg	%r15,__LC_EXIT_TIMER
-	slg	%r15,__LC_SYNC_ENTER_TIMER
-	alg	%r15,__LC_USER_TIMER
-	stg	%r15,__LC_USER_TIMER
-0:	# check if the system time update has been done
-	clg	%r9,BASED(.Lcleanup_system_call_insn+32)
-	jh	0f
-	lg	%r15,__LC_LAST_UPDATE_TIMER
-	slg	%r15,__LC_EXIT_TIMER
-	alg	%r15,__LC_SYSTEM_TIMER
-	stg	%r15,__LC_SYSTEM_TIMER
-0:	# update accounting time stamp
-	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
-	# set up saved register r11
-	lg	%r15,__LC_KERNEL_STACK
-	la	%r9,STACK_FRAME_OVERHEAD(%r15)
-	stg	%r9,24(%r11)		# r11 pt_regs pointer
-	# fill pt_regs
-	mvc	__PT_R8(64,%r9),__LC_SAVE_AREA_SYNC
-	stmg	%r0,%r7,__PT_R0(%r9)
-	mvc	__PT_PSW(16,%r9),__LC_SVC_OLD_PSW
-	mvc	__PT_INT_CODE(4,%r9),__LC_SVC_ILC
-	xc	__PT_FLAGS(8,%r9),__PT_FLAGS(%r9)
-	mvi	__PT_FLAGS+7(%r9),_PIF_SYSCALL
-	# setup saved register r15
-	stg	%r15,56(%r11)		# r15 stack pointer
-	# set new psw address and exit
-	larl	%r9,.Lsysc_do_svc
-	BR_EX	%r14,%r11
-.Lcleanup_system_call_insn:
-	.quad	system_call
-	.quad	.Lsysc_stmg
-	.quad	.Lsysc_per
-	.quad	.Lsysc_vtime+36
-	.quad	.Lsysc_vtime+42
-.Lcleanup_system_call_const:
-	.quad	__TASK_thread
-
-.Lcleanup_sysc_tif:
-	larl	%r9,.Lsysc_tif
-	BR_EX	%r14,%r11
-
-.Lcleanup_sysc_restore:
-	# check if stpt has been executed
-	clg	%r9,BASED(.Lcleanup_sysc_restore_insn)
-	jh	0f
-	mvc	__LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
-	cghi	%r11,__LC_SAVE_AREA_ASYNC
-	je	0f
-	mvc	__LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
-0:	clg	%r9,BASED(.Lcleanup_sysc_restore_insn+8)
-	je	1f
-	lg	%r9,24(%r11)		# get saved pointer to pt_regs
-	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
-	mvc	0(64,%r11),__PT_R8(%r9)
-	lmg	%r0,%r7,__PT_R0(%r9)
-.Lcleanup_lpswe:
-1:	lmg	%r8,%r9,__LC_RETURN_PSW
-	BR_EX	%r14,%r11
-.Lcleanup_sysc_restore_insn:
-	.quad	.Lsysc_exit_timer
-	.quad	.Lsysc_done - 4
-
-.Lcleanup_io_tif:
-	larl	%r9,.Lio_tif
-	BR_EX	%r14,%r11
-
-.Lcleanup_io_restore:
-	# check if stpt has been executed
-	clg	%r9,BASED(.Lcleanup_io_restore_insn)
-	jh	0f
-	mvc	__LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
-0:	clg	%r9,BASED(.Lcleanup_io_restore_insn+8)
-	je	1f
-	lg	%r9,24(%r11)		# get saved r11 pointer to pt_regs
-	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
-	mvc	0(64,%r11),__PT_R8(%r9)
-	lmg	%r0,%r7,__PT_R0(%r9)
-1:	lmg	%r8,%r9,__LC_RETURN_PSW
-	BR_EX	%r14,%r11
-.Lcleanup_io_restore_insn:
-	.quad	.Lio_exit_timer
-	.quad	.Lio_done - 4
-
-.Lcleanup_idle:
-	ni	__LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT
-	# copy interrupt clock & cpu timer
-	mvc	__CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK
-	mvc	__TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER
-	cghi	%r11,__LC_SAVE_AREA_ASYNC
-	je	0f
-	mvc	__CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
-	mvc	__TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER
-0:	# check if stck & stpt have been executed
-	clg	%r9,BASED(.Lcleanup_idle_insn)
-	jhe	1f
-	mvc	__CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
-	mvc	__TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
-1:	# calculate idle cycles
-	clg	%r9,BASED(.Lcleanup_idle_insn)
-	jl	3f
-	larl	%r1,smp_cpu_mtid
-	llgf	%r1,0(%r1)
-	ltgr	%r1,%r1
-	jz	3f
-	.insn	rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15)
-	larl	%r3,mt_cycles
-	ag	%r3,__LC_PERCPU_OFFSET
-	la	%r4,__SF_EMPTY+16(%r15)
-2:	lg	%r0,0(%r3)
-	slg	%r0,0(%r4)
-	alg	%r0,64(%r4)
-	stg	%r0,0(%r3)
-	la	%r3,8(%r3)
-	la	%r4,8(%r4)
-	brct	%r1,2b
-3:	# account system time going idle
-	lg	%r9,__LC_STEAL_TIMER
-	alg	%r9,__CLOCK_IDLE_ENTER(%r2)
-	slg	%r9,__LC_LAST_UPDATE_CLOCK
-	stg	%r9,__LC_STEAL_TIMER
-	mvc	__LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
-	lg	%r9,__LC_SYSTEM_TIMER
-	alg	%r9,__LC_LAST_UPDATE_TIMER
-	slg	%r9,__TIMER_IDLE_ENTER(%r2)
-	stg	%r9,__LC_SYSTEM_TIMER
-	mvc	__LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
-	# prepare return psw
-	nihh	%r8,0xfcfd		# clear irq & wait state bits
-	lg	%r9,48(%r11)		# return from psw_idle
-	BR_EX	%r14,%r11
-.Lcleanup_idle_insn:
-	.quad	.Lpsw_idle_lpsw
-
-.Lcleanup_save_fpu_regs:
-	larl	%r9,save_fpu_regs
-	BR_EX	%r14,%r11
-
-.Lcleanup_load_fpu_regs:
-	larl	%r9,load_fpu_regs
-	BR_EX	%r14,%r11
-
-/*
- * Integer constants
- */
-	.align	8
-.Lcritical_start:
-	.quad	.L__critical_start
-.Lcritical_length:
-	.quad	.L__critical_end - .L__critical_start
-#if IS_ENABLED(CONFIG_KVM)
-.Lsie_critical_start:
-	.quad	.Lsie_gmap
-.Lsie_critical_length:
-	.quad	.Lsie_done - .Lsie_gmap
-.Lsie_crit_mcck_start:
-	.quad   .Lsie_entry
-.Lsie_crit_mcck_length:
-	.quad   .Lsie_skip - .Lsie_entry
-#endif
-	.section .rodata, "a"
-#define SYSCALL(esame,emu)	.quad __s390x_ ## esame
-	.globl	sys_call_table
-sys_call_table:
-#include "asm/syscall_table.h"
-#undef SYSCALL
-
-#ifdef CONFIG_COMPAT
-
-#define SYSCALL(esame,emu)	.quad __s390_ ## emu
-	.globl	sys_call_table_emu
-sys_call_table_emu:
-#include "asm/syscall_table.h"
-#undef SYSCALL
-#endif
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
deleted file mode 100644
index 0d9ee198f4eb2dbe36eb98f3dd8a81635cd50033..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/head64.S
+++ /dev/null
@@ -1,62 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright IBM Corp. 1999, 2010
- *
- *   Author(s):	Hartmut Penner <hp@de.ibm.com>
- *		Martin Schwidefsky <schwidefsky@de.ibm.com>
- *		Rob van der Heij <rvdhei@iae.nl>
- *		Heiko Carstens <heiko.carstens@de.ibm.com>
- *
- */
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-
-__HEAD
-ENTRY(startup_continue)
-	tm	__LC_STFLE_FAC_LIST+5,0x80	# LPP available ?
-	jz	0f
-	xc	__LC_LPP+1(7,0),__LC_LPP+1	# clear lpp and current_pid
-	mvi	__LC_LPP,0x80			#   and set LPP_MAGIC
-	.insn	s,0xb2800000,__LC_LPP		# load program parameter
-0:	larl	%r1,tod_clock_base
-	mvc	0(16,%r1),__LC_BOOT_CLOCK
-	larl	%r13,.LPG1		# get base
-	larl	%r0,boot_vdso_data
-	stg	%r0,__LC_VDSO_PER_CPU
-#
-# Setup stack
-#
-	larl	%r14,init_task
-	stg	%r14,__LC_CURRENT
-	larl	%r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD
-#ifdef CONFIG_KASAN
-	brasl	%r14,kasan_early_init
-#endif
-#
-# Early machine initialization and detection functions.
-#
-	brasl	%r14,startup_init
-
-# check control registers
-	stctg	%c0,%c15,0(%r15)
-	oi	6(%r15),0x60		# enable sigp emergency & external call
-	oi	4(%r15),0x10		# switch on low address proctection
-	lctlg	%c0,%c15,0(%r15)
-
-	lam	0,15,.Laregs-.LPG1(%r13)	# load acrs needed by uaccess
-	brasl	%r14,start_kernel		# go to C code
-#
-# We returned from start_kernel ?!? PANIK
-#
-	basr	%r13,0
-	lpswe	.Ldw-.(%r13)		# load disabled wait psw
-
-	.align	16
-.LPG1:
-.Ldw:	.quad	0x0002000180000000,0x0000000000000000
-.Laregs:.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
deleted file mode 100644
index f942341429b1cd30309da7e911683a62562d8ae1..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/mcount.S
+++ /dev/null
@@ -1,108 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright IBM Corp. 2008, 2009
- *
- *   Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/ftrace.h>
-#include <asm/nospec-insn.h>
-#include <asm/ptrace.h>
-#include <asm/export.h>
-
-	GEN_BR_THUNK %r1
-	GEN_BR_THUNK %r14
-
-	.section .kprobes.text, "ax"
-
-ENTRY(ftrace_stub)
-	BR_EX	%r14
-ENDPROC(ftrace_stub)
-
-#define STACK_FRAME_SIZE  (STACK_FRAME_OVERHEAD + __PT_SIZE)
-#define STACK_PTREGS	  (STACK_FRAME_OVERHEAD)
-#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
-#define STACK_PTREGS_PSW  (STACK_PTREGS + __PT_PSW)
-#ifdef __PACK_STACK
-/* allocate just enough for r14, r15 and backchain */
-#define TRACED_FUNC_FRAME_SIZE	24
-#else
-#define TRACED_FUNC_FRAME_SIZE	STACK_FRAME_OVERHEAD
-#endif
-
-ENTRY(_mcount)
-	BR_EX	%r14
-ENDPROC(_mcount)
-EXPORT_SYMBOL(_mcount)
-
-ENTRY(ftrace_caller)
-	.globl	ftrace_regs_caller
-	.set	ftrace_regs_caller,ftrace_caller
-	stg	%r14,(__SF_GPRS+8*8)(%r15)	# save traced function caller
-	lgr	%r1,%r15
-#if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT))
-	aghi	%r0,MCOUNT_RETURN_FIXUP
-#endif
-	# allocate stack frame for ftrace_caller to contain traced function
-	aghi	%r15,-TRACED_FUNC_FRAME_SIZE
-	stg	%r1,__SF_BACKCHAIN(%r15)
-	stg	%r0,(__SF_GPRS+8*8)(%r15)
-	stg	%r15,(__SF_GPRS+9*8)(%r15)
-	# allocate pt_regs and stack frame for ftrace_trace_function
-	aghi	%r15,-STACK_FRAME_SIZE
-	stg	%r1,(STACK_PTREGS_GPRS+15*8)(%r15)
-	aghi	%r1,-TRACED_FUNC_FRAME_SIZE
-	stg	%r1,__SF_BACKCHAIN(%r15)
-	stg	%r0,(STACK_PTREGS_PSW+8)(%r15)
-	stmg	%r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15)
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
-	aghik	%r2,%r0,-MCOUNT_INSN_SIZE
-	lgrl	%r4,function_trace_op
-	lgrl	%r1,ftrace_trace_function
-#else
-	lgr	%r2,%r0
-	aghi	%r2,-MCOUNT_INSN_SIZE
-	larl	%r4,function_trace_op
-	lg	%r4,0(%r4)
-	larl	%r1,ftrace_trace_function
-	lg	%r1,0(%r1)
-#endif
-	lgr	%r3,%r14
-	la	%r5,STACK_PTREGS(%r15)
-	BASR_EX	%r14,%r1
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-# The j instruction gets runtime patched to a nop instruction.
-# See ftrace_enable_ftrace_graph_caller.
-	.globl ftrace_graph_caller
-ftrace_graph_caller:
-	j	ftrace_graph_caller_end
-	lmg	%r2,%r3,(STACK_PTREGS_GPRS+14*8)(%r15)
-	lg	%r4,(STACK_PTREGS_PSW+8)(%r15)
-	brasl	%r14,prepare_ftrace_return
-	stg	%r2,(STACK_PTREGS_GPRS+14*8)(%r15)
-ftrace_graph_caller_end:
-	.globl	ftrace_graph_caller_end
-#endif
-	lg	%r1,(STACK_PTREGS_PSW+8)(%r15)
-	lmg	%r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
-	BR_EX	%r1
-ENDPROC(ftrace_caller)
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-
-ENTRY(return_to_handler)
-	stmg	%r2,%r5,32(%r15)
-	lgr	%r1,%r15
-	aghi	%r15,-STACK_FRAME_OVERHEAD
-	stg	%r1,__SF_BACKCHAIN(%r15)
-	brasl	%r14,ftrace_return_to_handler
-	aghi	%r15,STACK_FRAME_OVERHEAD
-	lgr	%r14,%r2
-	lmg	%r2,%r5,32(%r15)
-	BR_EX	%r14
-ENDPROC(return_to_handler)
-
-#endif
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
deleted file mode 100644
index 59dee9d3bebf145aba71d6a2b164823294dad8f5..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/pgm_check.S
+++ /dev/null
@@ -1,147 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *    Program check table.
- *
- *    Copyright IBM Corp. 2012
- */
-
-#include <linux/linkage.h>
-
-#define PGM_CHECK(handler)	.quad handler
-#define PGM_CHECK_DEFAULT	PGM_CHECK(default_trap_handler)
-
-/*
- * The program check table contains exactly 128 (0x00-0x7f) entries. Each
- * line defines the function to be called corresponding to the program check
- * interruption code.
- */
-.section .rodata, "a"
-ENTRY(pgm_check_table)
-PGM_CHECK_DEFAULT			/* 00 */
-PGM_CHECK(illegal_op)			/* 01 */
-PGM_CHECK(privileged_op)		/* 02 */
-PGM_CHECK(execute_exception)		/* 03 */
-PGM_CHECK(do_protection_exception)	/* 04 */
-PGM_CHECK(addressing_exception)		/* 05 */
-PGM_CHECK(specification_exception)	/* 06 */
-PGM_CHECK(data_exception)		/* 07 */
-PGM_CHECK(overflow_exception)		/* 08 */
-PGM_CHECK(divide_exception)		/* 09 */
-PGM_CHECK(overflow_exception)		/* 0a */
-PGM_CHECK(divide_exception)		/* 0b */
-PGM_CHECK(hfp_overflow_exception)	/* 0c */
-PGM_CHECK(hfp_underflow_exception)	/* 0d */
-PGM_CHECK(hfp_significance_exception)	/* 0e */
-PGM_CHECK(hfp_divide_exception)		/* 0f */
-PGM_CHECK(do_dat_exception)		/* 10 */
-PGM_CHECK(do_dat_exception)		/* 11 */
-PGM_CHECK(translation_exception)	/* 12 */
-PGM_CHECK(special_op_exception)		/* 13 */
-PGM_CHECK_DEFAULT			/* 14 */
-PGM_CHECK(operand_exception)		/* 15 */
-PGM_CHECK_DEFAULT			/* 16 */
-PGM_CHECK_DEFAULT			/* 17 */
-PGM_CHECK(transaction_exception)	/* 18 */
-PGM_CHECK_DEFAULT			/* 19 */
-PGM_CHECK_DEFAULT			/* 1a */
-PGM_CHECK(vector_exception)		/* 1b */
-PGM_CHECK(space_switch_exception)	/* 1c */
-PGM_CHECK(hfp_sqrt_exception)		/* 1d */
-PGM_CHECK_DEFAULT			/* 1e */
-PGM_CHECK_DEFAULT			/* 1f */
-PGM_CHECK_DEFAULT			/* 20 */
-PGM_CHECK_DEFAULT			/* 21 */
-PGM_CHECK_DEFAULT			/* 22 */
-PGM_CHECK_DEFAULT			/* 23 */
-PGM_CHECK_DEFAULT			/* 24 */
-PGM_CHECK_DEFAULT			/* 25 */
-PGM_CHECK_DEFAULT			/* 26 */
-PGM_CHECK_DEFAULT			/* 27 */
-PGM_CHECK_DEFAULT			/* 28 */
-PGM_CHECK_DEFAULT			/* 29 */
-PGM_CHECK_DEFAULT			/* 2a */
-PGM_CHECK_DEFAULT			/* 2b */
-PGM_CHECK_DEFAULT			/* 2c */
-PGM_CHECK_DEFAULT			/* 2d */
-PGM_CHECK_DEFAULT			/* 2e */
-PGM_CHECK_DEFAULT			/* 2f */
-PGM_CHECK_DEFAULT			/* 30 */
-PGM_CHECK_DEFAULT			/* 31 */
-PGM_CHECK_DEFAULT			/* 32 */
-PGM_CHECK_DEFAULT			/* 33 */
-PGM_CHECK_DEFAULT			/* 34 */
-PGM_CHECK_DEFAULT			/* 35 */
-PGM_CHECK_DEFAULT			/* 36 */
-PGM_CHECK_DEFAULT			/* 37 */
-PGM_CHECK(do_dat_exception)		/* 38 */
-PGM_CHECK(do_dat_exception)		/* 39 */
-PGM_CHECK(do_dat_exception)		/* 3a */
-PGM_CHECK(do_dat_exception)		/* 3b */
-PGM_CHECK_DEFAULT			/* 3c */
-PGM_CHECK_DEFAULT			/* 3d */
-PGM_CHECK_DEFAULT			/* 3e */
-PGM_CHECK_DEFAULT			/* 3f */
-PGM_CHECK_DEFAULT			/* 40 */
-PGM_CHECK_DEFAULT			/* 41 */
-PGM_CHECK_DEFAULT			/* 42 */
-PGM_CHECK_DEFAULT			/* 43 */
-PGM_CHECK_DEFAULT			/* 44 */
-PGM_CHECK_DEFAULT			/* 45 */
-PGM_CHECK_DEFAULT			/* 46 */
-PGM_CHECK_DEFAULT			/* 47 */
-PGM_CHECK_DEFAULT			/* 48 */
-PGM_CHECK_DEFAULT			/* 49 */
-PGM_CHECK_DEFAULT			/* 4a */
-PGM_CHECK_DEFAULT			/* 4b */
-PGM_CHECK_DEFAULT			/* 4c */
-PGM_CHECK_DEFAULT			/* 4d */
-PGM_CHECK_DEFAULT			/* 4e */
-PGM_CHECK_DEFAULT			/* 4f */
-PGM_CHECK_DEFAULT			/* 50 */
-PGM_CHECK_DEFAULT			/* 51 */
-PGM_CHECK_DEFAULT			/* 52 */
-PGM_CHECK_DEFAULT			/* 53 */
-PGM_CHECK_DEFAULT			/* 54 */
-PGM_CHECK_DEFAULT			/* 55 */
-PGM_CHECK_DEFAULT			/* 56 */
-PGM_CHECK_DEFAULT			/* 57 */
-PGM_CHECK_DEFAULT			/* 58 */
-PGM_CHECK_DEFAULT			/* 59 */
-PGM_CHECK_DEFAULT			/* 5a */
-PGM_CHECK_DEFAULT			/* 5b */
-PGM_CHECK_DEFAULT			/* 5c */
-PGM_CHECK_DEFAULT			/* 5d */
-PGM_CHECK_DEFAULT			/* 5e */
-PGM_CHECK_DEFAULT			/* 5f */
-PGM_CHECK_DEFAULT			/* 60 */
-PGM_CHECK_DEFAULT			/* 61 */
-PGM_CHECK_DEFAULT			/* 62 */
-PGM_CHECK_DEFAULT			/* 63 */
-PGM_CHECK_DEFAULT			/* 64 */
-PGM_CHECK_DEFAULT			/* 65 */
-PGM_CHECK_DEFAULT			/* 66 */
-PGM_CHECK_DEFAULT			/* 67 */
-PGM_CHECK_DEFAULT			/* 68 */
-PGM_CHECK_DEFAULT			/* 69 */
-PGM_CHECK_DEFAULT			/* 6a */
-PGM_CHECK_DEFAULT			/* 6b */
-PGM_CHECK_DEFAULT			/* 6c */
-PGM_CHECK_DEFAULT			/* 6d */
-PGM_CHECK_DEFAULT			/* 6e */
-PGM_CHECK_DEFAULT			/* 6f */
-PGM_CHECK_DEFAULT			/* 70 */
-PGM_CHECK_DEFAULT			/* 71 */
-PGM_CHECK_DEFAULT			/* 72 */
-PGM_CHECK_DEFAULT			/* 73 */
-PGM_CHECK_DEFAULT			/* 74 */
-PGM_CHECK_DEFAULT			/* 75 */
-PGM_CHECK_DEFAULT			/* 76 */
-PGM_CHECK_DEFAULT			/* 77 */
-PGM_CHECK_DEFAULT			/* 78 */
-PGM_CHECK_DEFAULT			/* 79 */
-PGM_CHECK_DEFAULT			/* 7a */
-PGM_CHECK_DEFAULT			/* 7b */
-PGM_CHECK_DEFAULT			/* 7c */
-PGM_CHECK_DEFAULT			/* 7d */
-PGM_CHECK_DEFAULT			/* 7e */
-PGM_CHECK_DEFAULT			/* 7f */
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
deleted file mode 100644
index 4a22163962eb3870df6dc3818c1e53b87ecf0982..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/reipl.S
+++ /dev/null
@@ -1,81 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *    Copyright IBM Corp 2000, 2011
- *    Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>,
- *		 Denis Joseph Barrow,
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/nospec-insn.h>
-#include <asm/sigp.h>
-
-	GEN_BR_THUNK %r9
-
-#
-# Issue "store status" for the current CPU to its prefix page
-# and call passed function afterwards
-#
-# r2 = Function to be called after store status
-# r3 = Parameter for function
-#
-ENTRY(store_status)
-	/* Save register one and load save area base */
-	stg	%r1,__LC_SAVE_AREA_RESTART
-	/* General purpose registers */
-	lghi	%r1,__LC_GPREGS_SAVE_AREA
-	stmg	%r0,%r15,0(%r1)
-	mvc	8(8,%r1),__LC_SAVE_AREA_RESTART
-	/* Control registers */
-	lghi	%r1,__LC_CREGS_SAVE_AREA
-	stctg	%c0,%c15,0(%r1)
-	/* Access registers */
-	lghi	%r1,__LC_AREGS_SAVE_AREA
-	stam	%a0,%a15,0(%r1)
-	/* Floating point registers */
-	lghi	%r1,__LC_FPREGS_SAVE_AREA
-	std	%f0, 0x00(%r1)
-	std	%f1, 0x08(%r1)
-	std	%f2, 0x10(%r1)
-	std	%f3, 0x18(%r1)
-	std	%f4, 0x20(%r1)
-	std	%f5, 0x28(%r1)
-	std	%f6, 0x30(%r1)
-	std	%f7, 0x38(%r1)
-	std	%f8, 0x40(%r1)
-	std	%f9, 0x48(%r1)
-	std	%f10,0x50(%r1)
-	std	%f11,0x58(%r1)
-	std	%f12,0x60(%r1)
-	std	%f13,0x68(%r1)
-	std	%f14,0x70(%r1)
-	std	%f15,0x78(%r1)
-	/* Floating point control register */
-	lghi	%r1,__LC_FP_CREG_SAVE_AREA
-	stfpc	0(%r1)
-	/* CPU timer */
-	lghi	%r1,__LC_CPU_TIMER_SAVE_AREA
-	stpt	0(%r1)
-	/* Store prefix register */
-	lghi	%r1,__LC_PREFIX_SAVE_AREA
-	stpx	0(%r1)
-	/* Clock comparator - seven bytes */
-	lghi	%r1,__LC_CLOCK_COMP_SAVE_AREA
-	larl	%r4,.Lclkcmp
-	stckc	0(%r4)
-	mvc	1(7,%r1),1(%r4)
-	/* Program status word */
-	lghi	%r1,__LC_PSW_SAVE_AREA
-	epsw	%r4,%r5
-	st	%r4,0(%r1)
-	st	%r5,4(%r1)
-	stg	%r2,8(%r1)
-	lgr	%r9,%r2
-	lgr	%r2,%r3
-	BR_EX	%r9
-ENDPROC(store_status)
-
-	.section .bss
-	.align	8
-.Lclkcmp:	.quad	0x0000000000000000
-	.previous
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
deleted file mode 100644
index fe396673e8a66c77bf8fb6aec75df18fe4ec8c18..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/relocate_kernel.S
+++ /dev/null
@@ -1,78 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright IBM Corp. 2005
- *
- * Author(s): Rolf Adelsberger,
- *	      Heiko Carstens <heiko.carstens@de.ibm.com>
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/page.h>
-#include <asm/sigp.h>
-
-/*
- * moves the new kernel to its destination...
- * %r2 = pointer to first kimage_entry_t
- * %r3 = start address - where to jump to after the job is done...
- *
- * %r5 will be used as temp. storage
- * %r6 holds the destination address
- * %r7 = PAGE_SIZE
- * %r8 holds the source address
- * %r9 = PAGE_SIZE
- *
- * 0xf000 is a page_mask
- */
-
-	.text
-ENTRY(relocate_kernel)
-		basr	%r13,0		# base address
-	.base:
-		lghi	%r7,PAGE_SIZE	# load PAGE_SIZE in r7
-		lghi	%r9,PAGE_SIZE	# load PAGE_SIZE in r9
-		lg	%r5,0(%r2)	# read another word for indirection page
-		aghi	%r2,8		# increment pointer
-		tml	%r5,0x1		# is it a destination page?
-		je	.indir_check	# NO, goto "indir_check"
-		lgr	%r6,%r5		# r6 = r5
-		nill	%r6,0xf000	# mask it out and...
-		j	.base		# ...next iteration
-	.indir_check:
-		tml	%r5,0x2		# is it a indirection page?
-		je	.done_test	# NO, goto "done_test"
-		nill	%r5,0xf000	# YES, mask out,
-		lgr	%r2,%r5		# move it into the right register,
-		j	.base		# and read next...
-	.done_test:
-		tml	%r5,0x4		# is it the done indicator?
-		je	.source_test	# NO! Well, then it should be the source indicator...
-		j	.done		# ok, lets finish it here...
-	.source_test:
-		tml	%r5,0x8		# it should be a source indicator...
-		je	.base		# NO, ignore it...
-		lgr	%r8,%r5		# r8 = r5
-		nill	%r8,0xf000	# masking
-	0:	mvcle	%r6,%r8,0x0	# copy PAGE_SIZE bytes from r8 to r6 - pad with 0
-		jo	0b
-		j	.base
-	.done:
-		sgr	%r0,%r0		# clear register r0
-		cghi	%r3,0
-		je	.diag
-		la	%r4,load_psw-.base(%r13)	# load psw-address into the register
-		o	%r3,4(%r4)	# or load address into psw
-		st	%r3,4(%r4)
-		mvc	0(8,%r0),0(%r4)	# copy psw to absolute address 0
-	.diag:
-		diag	%r0,%r0,0x308
-ENDPROC(relocate_kernel)
-
-		.align	8
-	load_psw:
-		.long	0x00080000,0x80000000
-	relocate_kernel_end:
-	.align 8
-	.globl	relocate_kernel_len
-	relocate_kernel_len:
-		.quad	relocate_kernel_end - relocate_kernel
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
deleted file mode 100644
index a7baf0b5f818161edc9d405248830f04b7f8576e..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/swsusp.S
+++ /dev/null
@@ -1,276 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * S390 64-bit swsusp implementation
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/nospec-insn.h>
-#include <asm/sigp.h>
-
-/*
- * Save register context in absolute 0 lowcore and call swsusp_save() to
- * create in-memory kernel image. The context is saved in the designated
- * "store status" memory locations (see POP).
- * We return from this function twice. The first time during the suspend to
- * disk process. The second time via the swsusp_arch_resume() function
- * (see below) in the resume process.
- * This function runs with disabled interrupts.
- */
-	GEN_BR_THUNK %r14
-
-	.section .text
-ENTRY(swsusp_arch_suspend)
-	lg	%r1,__LC_NODAT_STACK
-	stmg	%r6,%r15,__SF_GPRS(%r1)
-	aghi	%r1,-STACK_FRAME_OVERHEAD
-	stg	%r15,__SF_BACKCHAIN(%r1)
-	lgr	%r15,%r1
-
-	/* Store FPU registers */
-	brasl	%r14,save_fpu_regs
-
-	/* Deactivate DAT */
-	stnsm	__SF_EMPTY(%r15),0xfb
-
-	/* Store prefix register on stack */
-	stpx	__SF_EMPTY(%r15)
-
-	/* Save prefix register contents for lowcore copy */
-	llgf	%r10,__SF_EMPTY(%r15)
-
-	/* Get pointer to save area */
-	lghi	%r1,0x1000
-
-	/* Save CPU address */
-	stap	__LC_EXT_CPU_ADDR(%r0)
-
-	/* Store registers */
-	mvc	0x318(4,%r1),__SF_EMPTY(%r15)	/* move prefix to lowcore */
-	stam	%a0,%a15,0x340(%r1)		/* store access registers */
-	stctg	%c0,%c15,0x380(%r1)		/* store control registers */
-	stmg	%r0,%r15,0x280(%r1)		/* store general registers */
-
-	stpt	0x328(%r1)			/* store timer */
-	stck	__SF_EMPTY(%r15)		/* store clock */
-	stckc	0x330(%r1)			/* store clock comparator */
-
-	/* Update cputime accounting before going to sleep */
-	lg	%r0,__LC_LAST_UPDATE_TIMER
-	slg	%r0,0x328(%r1)
-	alg	%r0,__LC_SYSTEM_TIMER
-	stg	%r0,__LC_SYSTEM_TIMER
-	mvc	__LC_LAST_UPDATE_TIMER(8),0x328(%r1)
-	lg	%r0,__LC_LAST_UPDATE_CLOCK
-	slg	%r0,__SF_EMPTY(%r15)
-	alg	%r0,__LC_STEAL_TIMER
-	stg	%r0,__LC_STEAL_TIMER
-	mvc	__LC_LAST_UPDATE_CLOCK(8),__SF_EMPTY(%r15)
-
-	/* Activate DAT */
-	stosm	__SF_EMPTY(%r15),0x04
-
-	/* Set prefix page to zero */
-	xc	__SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
-	spx	__SF_EMPTY(%r15)
-
-	/* Save absolute zero pages */
-	larl	%r2,suspend_zero_pages
-	lg	%r2,0(%r2)
-	lghi	%r4,0
-	lghi	%r3,2*PAGE_SIZE
-	lghi	%r5,2*PAGE_SIZE
-1:	mvcle	%r2,%r4,0
-	jo	1b
-
-	/* Copy lowcore to absolute zero lowcore */
-	lghi	%r2,0
-	lgr	%r4,%r10
-	lghi	%r3,2*PAGE_SIZE
-	lghi	%r5,2*PAGE_SIZE
-1:	mvcle	%r2,%r4,0
-	jo	1b
-
-	/* Save image */
-	brasl	%r14,swsusp_save
-
-	/* Restore prefix register and return */
-	lghi	%r1,0x1000
-	spx	0x318(%r1)
-	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
-	lghi	%r2,0
-	BR_EX	%r14
-ENDPROC(swsusp_arch_suspend)
-
-/*
- * Restore saved memory image to correct place and restore register context.
- * Then we return to the function that called swsusp_arch_suspend().
- * swsusp_arch_resume() runs with disabled interrupts.
- */
-ENTRY(swsusp_arch_resume)
-	stmg	%r6,%r15,__SF_GPRS(%r15)
-	lgr	%r1,%r15
-	aghi	%r15,-STACK_FRAME_OVERHEAD
-	stg	%r1,__SF_BACKCHAIN(%r15)
-
-	/* Make all free pages stable */
-	lghi	%r2,1
-	brasl	%r14,arch_set_page_states
-
-	/* Set prefix page to zero */
-	xc	__SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
-	spx	__SF_EMPTY(%r15)
-
-	/* Deactivate DAT */
-	stnsm	__SF_EMPTY(%r15),0xfb
-
-	/* Restore saved image */
-	larl	%r1,restore_pblist
-	lg	%r1,0(%r1)
-	ltgr	%r1,%r1
-	jz	2f
-0:
-	lg	%r2,8(%r1)
-	lg	%r4,0(%r1)
-	iske	%r0,%r4
-	lghi	%r3,PAGE_SIZE
-	lghi	%r5,PAGE_SIZE
-1:
-	mvcle	%r2,%r4,0
-	jo	1b
-	lg	%r2,8(%r1)
-	sske	%r0,%r2
-	lg	%r1,16(%r1)
-	ltgr	%r1,%r1
-	jnz	0b
-2:
-	ptlb				/* flush tlb */
-
-	/* Reset System */
-	larl	%r1,.Lnew_pgm_check_psw
-	epsw	%r2,%r3
-	stm	%r2,%r3,0(%r1)
-	mvc	__LC_PGM_NEW_PSW(16,%r0),0(%r1)
-	larl	%r1,__swsusp_reset_dma
-	lg	%r1,0(%r1)
-	BASR_EX	%r14,%r1
-	larl	%r1,smp_cpu_mt_shift
-	icm	%r1,15,0(%r1)
-	jz	smt_done
-	llgfr	%r1,%r1
-smt_loop:
-	sigp	%r1,%r0,SIGP_SET_MULTI_THREADING
-	brc	8,smt_done			/* accepted */
-	brc	2,smt_loop			/* busy, try again */
-smt_done:
-	larl	%r1,.Lnew_pgm_check_psw
-	lpswe	0(%r1)
-pgm_check_entry:
-
-	/* Switch to original suspend CPU */
-	larl	%r1,.Lresume_cpu		/* Resume CPU address: r2 */
-	stap	0(%r1)
-	llgh	%r2,0(%r1)
-	llgh	%r1,__LC_EXT_CPU_ADDR(%r0)	/* Suspend CPU address: r1 */
-	cgr	%r1,%r2
-	je	restore_registers		/* r1 = r2 -> nothing to do */
-	larl	%r4,.Lrestart_suspend_psw	/* Set new restart PSW */
-	mvc	__LC_RST_NEW_PSW(16,%r0),0(%r4)
-3:
-	sigp	%r9,%r1,SIGP_INITIAL_CPU_RESET	/* sigp initial cpu reset */
-	brc	8,4f				/* accepted */
-	brc	2,3b				/* busy, try again */
-
-	/* Suspend CPU not available -> panic */
-	larl	%r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD
-	larl	%r2,.Lpanic_string
-	brasl	%r14,sclp_early_printk_force
-	larl	%r3,.Ldisabled_wait_31
-	lpsw	0(%r3)
-4:
-	/* Switch to suspend CPU */
-	sigp	%r9,%r1,SIGP_RESTART	/* sigp restart to suspend CPU */
-	brc	2,4b			/* busy, try again */
-5:
-	sigp	%r9,%r2,SIGP_STOP	/* sigp stop to current resume CPU */
-	brc	2,5b			/* busy, try again */
-6:	j	6b
-
-restart_suspend:
-	larl	%r1,.Lresume_cpu
-	llgh	%r2,0(%r1)
-7:
-	sigp	%r9,%r2,SIGP_SENSE	/* sigp sense, wait for resume CPU */
-	brc	8,7b			/* accepted, status 0, still running */
-	brc	2,7b			/* busy, try again */
-	tmll	%r9,0x40		/* Test if resume CPU is stopped */
-	jz	7b
-
-restore_registers:
-	/* Restore registers */
-	lghi	%r13,0x1000		/* %r1 = pointer to save area */
-
-	/* Ignore time spent in suspended state. */
-	llgf	%r1,0x318(%r13)
-	stck	__LC_LAST_UPDATE_CLOCK(%r1)
-	spt	0x328(%r13)		/* reprogram timer */
-	//sckc	0x330(%r13)		/* set clock comparator */
-
-	lctlg	%c0,%c15,0x380(%r13)	/* load control registers */
-	lam	%a0,%a15,0x340(%r13)	/* load access registers */
-
-	/* Load old stack */
-	lg	%r15,0x2f8(%r13)
-
-	/* Save prefix register */
-	mvc __SF_EMPTY(4,%r15),0x318(%r13)
-
-	/* Restore absolute zero pages */
-	lghi	%r2,0
-	larl	%r4,suspend_zero_pages
-	lg	%r4,0(%r4)
-	lghi	%r3,2*PAGE_SIZE
-	lghi	%r5,2*PAGE_SIZE
-1:	mvcle	%r2,%r4,0
-	jo	1b
-
-	/* Restore prefix register */
-	spx	__SF_EMPTY(%r15)
-
-	/* Activate DAT */
-	stosm	__SF_EMPTY(%r15),0x04
-
-	/* Make all free pages unstable */
-	lghi	%r2,0
-	brasl	%r14,arch_set_page_states
-
-	/* Call arch specific early resume code */
-	brasl	%r14,s390_early_resume
-
-	/* Return 0 */
-	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
-	lghi	%r2,0
-	BR_EX	%r14
-ENDPROC(swsusp_arch_resume)
-
-	.section .data..nosave,"aw",@progbits
-	.align	8
-.Ldisabled_wait_31:
-	.long  0x000a0000,0x00000000
-.Lpanic_string:
-	.asciz	"Resume not possible because suspend CPU is no longer available\n"
-	.align	8
-.Lrestart_suspend_psw:
-	.quad	0x0000000180000000,restart_suspend
-.Lnew_pgm_check_psw:
-	.quad	0,pgm_check_entry
-.Lresume_cpu:
-	.byte	0,0
diff --git a/arch/s390/kernel/syscalls/syscalltbl b/arch/s390/kernel/syscalls/syscalltbl
old mode 100755
new mode 100644
diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S
deleted file mode 100644
index eaf9cf1417f675818e46a9ddf575ba13428b5176..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/vdso32/clock_getres.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_getres() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-
-	.text
-	.align 4
-	.globl __kernel_clock_getres
-	.type  __kernel_clock_getres,@function
-__kernel_clock_getres:
-	CFI_STARTPROC
-	basr	%r1,0
-	la	%r1,4f-.(%r1)
-	chi	%r2,__CLOCK_REALTIME
-	je	0f
-	chi	%r2,__CLOCK_MONOTONIC
-	je	0f
-	la	%r1,5f-4f(%r1)
-	chi	%r2,__CLOCK_REALTIME_COARSE
-	je	0f
-	chi	%r2,__CLOCK_MONOTONIC_COARSE
-	jne	3f
-0:	ltr	%r3,%r3
-	jz	2f				/* res == NULL */
-1:	l	%r0,0(%r1)
-	xc	0(4,%r3),0(%r3)			/* set tp->tv_sec to zero */
-	st	%r0,4(%r3)			/* store tp->tv_usec */
-2:	lhi	%r2,0
-	br	%r14
-3:	lhi	%r1,__NR_clock_getres		/* fallback to svc */
-	svc	0
-	br	%r14
-	CFI_ENDPROC
-4:	.long	__CLOCK_REALTIME_RES
-5:	.long	__CLOCK_COARSE_RES
-	.size	__kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
deleted file mode 100644
index ada5c11a16e5adb20cfcd7f9908296eb1ac6cbb9..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/vdso32/clock_gettime.S
+++ /dev/null
@@ -1,179 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_gettime() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
-	.text
-	.align 4
-	.globl __kernel_clock_gettime
-	.type  __kernel_clock_gettime,@function
-__kernel_clock_gettime:
-	CFI_STARTPROC
-	ahi	%r15,-16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-	basr	%r5,0
-0:	al	%r5,21f-0b(%r5)			/* get &_vdso_data */
-	chi	%r2,__CLOCK_REALTIME_COARSE
-	je	10f
-	chi	%r2,__CLOCK_REALTIME
-	je	11f
-	chi	%r2,__CLOCK_MONOTONIC_COARSE
-	je	9f
-	chi	%r2,__CLOCK_MONOTONIC
-	jne	19f
-
-	/* CLOCK_MONOTONIC */
-1:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	1b
-	stcke	0(%r15)				/* Store TOD clock */
-	lm	%r0,%r1,1(%r15)
-	s	%r0,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	sl	%r1,__VDSO_XTIME_STAMP+4(%r5)
-	brc	3,2f
-	ahi	%r0,-1
-2:	ms	%r0,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	lr	%r2,%r0
-	l	%r0,__VDSO_TK_MULT(%r5)
-	ltr	%r1,%r1
-	mr	%r0,%r0
-	jnm	3f
-	a	%r0,__VDSO_TK_MULT(%r5)
-3:	alr	%r0,%r2
-	al	%r0,__VDSO_WTOM_NSEC(%r5)
-	al	%r1,__VDSO_WTOM_NSEC+4(%r5)
-	brc	12,5f
-	ahi	%r0,1
-5:	l	%r2,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	srdl	%r0,0(%r2)			/*  >> tk->shift */
-	l	%r2,__VDSO_WTOM_SEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	1b
-	basr	%r5,0
-6:	ltr	%r0,%r0
-	jnz	7f
-	cl	%r1,20f-6b(%r5)
-	jl	8f
-7:	ahi	%r2,1
-	sl	%r1,20f-6b(%r5)
-	brc	3,6b
-	ahi	%r0,-1
-	j	6b
-8:	st	%r2,0(%r3)			/* store tp->tv_sec */
-	st	%r1,4(%r3)			/* store tp->tv_nsec */
-	lhi	%r2,0
-	ahi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-
-	/* CLOCK_MONOTONIC_COARSE */
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-9:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	9b
-	l	%r2,__VDSO_WTOM_CRS_SEC+4(%r5)
-	l	%r1,__VDSO_WTOM_CRS_NSEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	9b
-	j	8b
-
-	/* CLOCK_REALTIME_COARSE */
-10:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	10b
-	l	%r2,__VDSO_XTIME_CRS_SEC+4(%r5)
-	l	%r1,__VDSO_XTIME_CRS_NSEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	10b
-	j	17f
-
-	/* CLOCK_REALTIME */
-11:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	11b
-	stcke	0(%r15)				/* Store TOD clock */
-	lm	%r0,%r1,__VDSO_TS_END(%r5)	/* TOD steering end time */
-	s	%r0,1(%r15)			/* no - ts_steering_end */
-	sl	%r1,5(%r15)
-	brc	3,22f
-	ahi	%r0,-1
-22:	ltr	%r0,%r0				/* past end of steering? */
-	jm	24f
-	srdl	%r0,15				/* 1 per 2^16 */
-	tm	__VDSO_TS_DIR+3(%r5),0x01	/* steering direction? */
-	jz	23f
-	lcr	%r0,%r0				/* negative TOD offset */
-	lcr	%r1,%r1
-	je	23f
-	ahi	%r0,-1
-23:	a	%r0,1(%r15)			/* add TOD timestamp */
-	al	%r1,5(%r15)
-	brc	12,25f
-	ahi	%r0,1
-	j	25f
-24:	lm	%r0,%r1,1(%r15)			/* load TOD timestamp */
-25:	s	%r0,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	sl	%r1,__VDSO_XTIME_STAMP+4(%r5)
-	brc	3,12f
-	ahi	%r0,-1
-12:	ms	%r0,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	lr	%r2,%r0
-	l	%r0,__VDSO_TK_MULT(%r5)
-	ltr	%r1,%r1
-	mr	%r0,%r0
-	jnm	13f
-	a	%r0,__VDSO_TK_MULT(%r5)
-13:	alr	%r0,%r2
-	al	%r0,__VDSO_XTIME_NSEC(%r5)	/*  + tk->xtime_nsec */
-	al	%r1,__VDSO_XTIME_NSEC+4(%r5)
-	brc	12,14f
-	ahi	%r0,1
-14:	l	%r2,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	srdl	%r0,0(%r2)			/*  >> tk->shift */
-	l	%r2,__VDSO_XTIME_SEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	11b
-	basr	%r5,0
-15:	ltr	%r0,%r0
-	jnz	16f
-	cl	%r1,20f-15b(%r5)
-	jl	17f
-16:	ahi	%r2,1
-	sl	%r1,20f-15b(%r5)
-	brc	3,15b
-	ahi	%r0,-1
-	j	15b
-17:	st	%r2,0(%r3)			/* store tp->tv_sec */
-	st	%r1,4(%r3)			/* store tp->tv_nsec */
-	lhi	%r2,0
-	ahi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-
-	/* Fallback to system call */
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-19:	lhi	%r1,__NR_clock_gettime
-	svc	0
-	ahi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-	CFI_ENDPROC
-
-20:	.long	1000000000
-21:	.long	_vdso_data - 0b
-	.size	__kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso32/getcpu.S b/arch/s390/kernel/vdso32/getcpu.S
deleted file mode 100644
index 25515f3fbcea8283b52b76739541c326b39d2683..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/vdso32/getcpu.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of getcpu() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2016
- *  Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/dwarf.h>
-
-	.text
-	.align 4
-	.globl __kernel_getcpu
-	.type  __kernel_getcpu,@function
-__kernel_getcpu:
-	CFI_STARTPROC
-	la	%r4,0
-	sacf	256
-	l	%r5,__VDSO_CPU_NR(%r4)
-	l	%r4,__VDSO_NODE_ID(%r4)
-	sacf	0
-	ltr	%r2,%r2
-	jz	2f
-	st	%r5,0(%r2)
-2:	ltr	%r3,%r3
-	jz	3f
-	st	%r4,0(%r3)
-3:	lhi	%r2,0
-	br	%r14
-	CFI_ENDPROC
-	.size	__kernel_getcpu,.-__kernel_getcpu
diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S
deleted file mode 100644
index b23063fbc892cd91b1e08fabc52a52b26f968e98..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/vdso32/gettimeofday.S
+++ /dev/null
@@ -1,103 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of gettimeofday() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
-	.text
-	.align 4
-	.globl __kernel_gettimeofday
-	.type  __kernel_gettimeofday,@function
-__kernel_gettimeofday:
-	CFI_STARTPROC
-	ahi	%r15,-16
-	CFI_ADJUST_CFA_OFFSET 16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-	basr	%r5,0
-0:	al	%r5,13f-0b(%r5)			/* get &_vdso_data */
-1:	ltr	%r3,%r3				/* check if tz is NULL */
-	je	2f
-	mvc	0(8,%r3),__VDSO_TIMEZONE(%r5)
-2:	ltr	%r2,%r2				/* check if tv is NULL */
-	je	10f
-	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	1b
-	stcke	0(%r15)				/* Store TOD clock */
-	lm	%r0,%r1,__VDSO_TS_END(%r5)	/* TOD steering end time */
-	s	%r0,1(%r15)
-	sl	%r1,5(%r15)
-	brc	3,14f
-	ahi	%r0,-1
-14:	ltr	%r0,%r0				/* past end of steering? */
-	jm	16f
-	srdl	%r0,15				/* 1 per 2^16 */
-	tm	__VDSO_TS_DIR+3(%r5),0x01	/* steering direction? */
-	jz	15f
-	lcr	%r0,%r0				/* negative TOD offset */
-	lcr	%r1,%r1
-	je	15f
-	ahi	%r0,-1
-15:	a	%r0,1(%r15)			/* add TOD timestamp */
-	al	%r1,5(%r15)
-	brc	12,17f
-	ahi	%r0,1
-	j	17f
-16:	lm	%r0,%r1,1(%r15)			/* load TOD timestamp */
-17:	s	%r0,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	sl	%r1,__VDSO_XTIME_STAMP+4(%r5)
-	brc	3,3f
-	ahi	%r0,-1
-3:	ms	%r0,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	st	%r0,0(%r15)
-	l	%r0,__VDSO_TK_MULT(%r5)
-	ltr	%r1,%r1
-	mr	%r0,%r0
-	jnm	4f
-	a	%r0,__VDSO_TK_MULT(%r5)
-4:	al	%r0,0(%r15)
-	al	%r0,__VDSO_XTIME_NSEC(%r5)	/*  + xtime */
-	al	%r1,__VDSO_XTIME_NSEC+4(%r5)
-	brc	12,5f
-	ahi	%r0,1
-5:	mvc	0(4,%r15),__VDSO_XTIME_SEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	1b
-	l	%r4,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	srdl	%r0,0(%r4)			/*  >> tk->shift */
-	l	%r4,0(%r15)			/* get tv_sec from stack */
-	basr	%r5,0
-6:	ltr	%r0,%r0
-	jnz	7f
-	cl	%r1,11f-6b(%r5)
-	jl	8f
-7:	ahi	%r4,1
-	sl	%r1,11f-6b(%r5)
-	brc	3,6b
-	ahi	%r0,-1
-	j	6b
-8:	st	%r4,0(%r2)			/* store tv->tv_sec */
-	ltr	%r1,%r1
-	m	%r0,12f-6b(%r5)
-	jnm	9f
-	al	%r0,12f-6b(%r5)
-9:	srl	%r0,6
-	st	%r0,4(%r2)			/* store tv->tv_usec */
-10:	slr	%r2,%r2
-	ahi	%r15,16
-	CFI_ADJUST_CFA_OFFSET -16
-	CFI_RESTORE 15
-	br	%r14
-	CFI_ENDPROC
-11:	.long	1000000000
-12:	.long	274877907
-13:	.long	_vdso_data - 0b
-	.size	__kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S
deleted file mode 100644
index db19d0680a0afdf34b85eddf418a7e822dcad0a7..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/vdso32/note.S
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
-	.long LINUX_VERSION_CODE
-ELFNOTE_END
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
deleted file mode 100644
index 721c4954cb6e731d37a778f5bc9116e21417f9a7..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ /dev/null
@@ -1,142 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This is the infamous ld script for the 32 bits vdso
- * library
- */
-
-#include <asm/page.h>
-#include <asm/vdso.h>
-
-OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
-OUTPUT_ARCH(s390:31-bit)
-ENTRY(_start)
-
-SECTIONS
-{
-	. = VDSO32_LBASE + SIZEOF_HEADERS;
-
-	.hash		: { *(.hash) }			:text
-	.gnu.hash	: { *(.gnu.hash) }
-	.dynsym		: { *(.dynsym) }
-	.dynstr		: { *(.dynstr) }
-	.gnu.version	: { *(.gnu.version) }
-	.gnu.version_d	: { *(.gnu.version_d) }
-	.gnu.version_r	: { *(.gnu.version_r) }
-
-	.note		: { *(.note.*) }		:text	:note
-
-	. = ALIGN(16);
-	.text		: {
-		*(.text .stub .text.* .gnu.linkonce.t.*)
-	} :text
-	PROVIDE(__etext = .);
-	PROVIDE(_etext = .);
-	PROVIDE(etext = .);
-
-	/*
-	 * Other stuff is appended to the text segment:
-	 */
-	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
-	.rodata1	: { *(.rodata1) }
-
-	.dynamic	: { *(.dynamic) }		:text	:dynamic
-
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-	.gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
-
-	.rela.dyn ALIGN(8) : { *(.rela.dyn) }
-	.got ALIGN(8)	: { *(.got .toc) }
-
-	_end = .;
-	PROVIDE(end = .);
-
-	/*
-	 * Stabs debugging sections are here too.
-	 */
-	.stab	       0 : { *(.stab) }
-	.stabstr       0 : { *(.stabstr) }
-	.stab.excl     0 : { *(.stab.excl) }
-	.stab.exclstr  0 : { *(.stab.exclstr) }
-	.stab.index    0 : { *(.stab.index) }
-	.stab.indexstr 0 : { *(.stab.indexstr) }
-	.comment       0 : { *(.comment) }
-
-	/*
-	 * DWARF debug sections.
-	 * Symbols in the DWARF debugging sections are relative to the
-	 * beginning of the section so we begin them at 0.
-	 */
-	/* DWARF 1 */
-	.debug		0 : { *(.debug) }
-	.line		0 : { *(.line) }
-	/* GNU DWARF 1 extensions */
-	.debug_srcinfo	0 : { *(.debug_srcinfo) }
-	.debug_sfnames	0 : { *(.debug_sfnames) }
-	/* DWARF 1.1 and DWARF 2 */
-	.debug_aranges	0 : { *(.debug_aranges) }
-	.debug_pubnames 0 : { *(.debug_pubnames) }
-	/* DWARF 2 */
-	.debug_info	0 : { *(.debug_info .gnu.linkonce.wi.*) }
-	.debug_abbrev	0 : { *(.debug_abbrev) }
-	.debug_line	0 : { *(.debug_line) }
-	.debug_frame	0 : { *(.debug_frame) }
-	.debug_str	0 : { *(.debug_str) }
-	.debug_loc	0 : { *(.debug_loc) }
-	.debug_macinfo	0 : { *(.debug_macinfo) }
-	/* SGI/MIPS DWARF 2 extensions */
-	.debug_weaknames 0 : { *(.debug_weaknames) }
-	.debug_funcnames 0 : { *(.debug_funcnames) }
-	.debug_typenames 0 : { *(.debug_typenames) }
-	.debug_varnames  0 : { *(.debug_varnames) }
-	/* DWARF 3 */
-	.debug_pubtypes 0 : { *(.debug_pubtypes) }
-	.debug_ranges	0 : { *(.debug_ranges) }
-	.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
-
-	. = ALIGN(PAGE_SIZE);
-	PROVIDE(_vdso_data = .);
-
-	/DISCARD/	: {
-		*(.note.GNU-stack)
-		*(.branch_lt)
-		*(.data .data.* .gnu.linkonce.d.* .sdata*)
-		*(.bss .sbss .dynbss .dynsbss)
-	}
-}
-
-/*
- * Very old versions of ld do not recognize this name token; use the constant.
- */
-#define PT_GNU_EH_FRAME	0x6474e550
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
-	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
-	note		PT_NOTE FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-	VDSO_VERSION_STRING {
-	global:
-		/*
-		 * Has to be there for the kernel to find
-		 */
-		__kernel_gettimeofday;
-		__kernel_clock_gettime;
-		__kernel_clock_getres;
-		__kernel_getcpu;
-
-	local: *;
-	};
-}
diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S
deleted file mode 100644
index de2fb930471af7293f408be4d3475d88772d2ce6..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/vdso32/vdso32_wrapper.S
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/page.h>
-
-	__PAGE_ALIGNED_DATA
-
-	.globl vdso32_start, vdso32_end
-	.balign PAGE_SIZE
-vdso32_start:
-	.incbin "arch/s390/kernel/vdso32/vdso32.so"
-	.balign PAGE_SIZE
-vdso32_end:
-
-	.previous
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
deleted file mode 100644
index 0c79caa32b5924ff76364f2ff09227edb79305d7..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_getres() for 64 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-
-	.text
-	.align 4
-	.globl __kernel_clock_getres
-	.type  __kernel_clock_getres,@function
-__kernel_clock_getres:
-	CFI_STARTPROC
-	larl	%r1,3f
-	lg	%r0,0(%r1)
-	cghi	%r2,__CLOCK_REALTIME_COARSE
-	je	0f
-	cghi	%r2,__CLOCK_MONOTONIC_COARSE
-	je	0f
-	larl	%r1,_vdso_data
-	llgf	%r0,__VDSO_CLOCK_REALTIME_RES(%r1)
-	cghi	%r2,__CLOCK_REALTIME
-	je	0f
-	cghi	%r2,__CLOCK_MONOTONIC
-	je	0f
-	cghi	%r2,__CLOCK_THREAD_CPUTIME_ID
-	je	0f
-	cghi	%r2,-2		/* Per-thread CPUCLOCK with PID=0, VIRT=1 */
-	jne	2f
-	larl	%r5,_vdso_data
-	icm	%r0,15,__LC_ECTG_OK(%r5)
-	jz	2f
-0:	ltgr	%r3,%r3
-	jz	1f				/* res == NULL */
-	xc	0(8,%r3),0(%r3)			/* set tp->tv_sec to zero */
-	stg	%r0,8(%r3)			/* store tp->tv_usec */
-1:	lghi	%r2,0
-	br	%r14
-2:	lghi	%r1,__NR_clock_getres		/* fallback to svc */
-	svc	0
-	br	%r14
-	CFI_ENDPROC
-3:	.quad	__CLOCK_COARSE_RES
-	.size	__kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
deleted file mode 100644
index 9d2ee79b90f250afeedaeb22e6646ef55bfce056..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ /dev/null
@@ -1,163 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_gettime() for 64 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
-	.text
-	.align 4
-	.globl __kernel_clock_gettime
-	.type  __kernel_clock_gettime,@function
-__kernel_clock_gettime:
-	CFI_STARTPROC
-	aghi	%r15,-16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-	larl	%r5,_vdso_data
-	cghi	%r2,__CLOCK_REALTIME_COARSE
-	je	4f
-	cghi	%r2,__CLOCK_REALTIME
-	je	5f
-	cghi	%r2,-3		/* Per-thread CPUCLOCK with PID=0, VIRT=1 */
-	je	9f
-	cghi	%r2,__CLOCK_MONOTONIC_COARSE
-	je	3f
-	cghi	%r2,__CLOCK_MONOTONIC
-	jne	12f
-
-	/* CLOCK_MONOTONIC */
-0:	lg	%r4,__VDSO_UPD_COUNT(%r5)	/* load update counter */
-	tmll	%r4,0x0001			/* pending update ? loop */
-	jnz	0b
-	stcke	0(%r15)				/* Store TOD clock */
-	lgf	%r2,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	lg	%r0,__VDSO_WTOM_SEC(%r5)
-	lg	%r1,1(%r15)
-	sg	%r1,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	msgf	%r1,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	alg	%r1,__VDSO_WTOM_NSEC(%r5)
-	srlg	%r1,%r1,0(%r2)			/*  >> tk->shift */
-	clg	%r4,__VDSO_UPD_COUNT(%r5)	/* check update counter */
-	jne	0b
-	larl	%r5,13f
-1:	clg	%r1,0(%r5)
-	jl	2f
-	slg	%r1,0(%r5)
-	aghi	%r0,1
-	j	1b
-2:	stg	%r0,0(%r3)			/* store tp->tv_sec */
-	stg	%r1,8(%r3)			/* store tp->tv_nsec */
-	lghi	%r2,0
-	aghi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-
-	/* CLOCK_MONOTONIC_COARSE */
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-3:	lg	%r4,__VDSO_UPD_COUNT(%r5)	/* load update counter */
-	tmll	%r4,0x0001			/* pending update ? loop */
-	jnz	3b
-	lg	%r0,__VDSO_WTOM_CRS_SEC(%r5)
-	lg	%r1,__VDSO_WTOM_CRS_NSEC(%r5)
-	clg	%r4,__VDSO_UPD_COUNT(%r5)	/* check update counter */
-	jne	3b
-	j	2b
-
-	/* CLOCK_REALTIME_COARSE */
-4:	lg	%r4,__VDSO_UPD_COUNT(%r5)	/* load update counter */
-	tmll	%r4,0x0001			/* pending update ? loop */
-	jnz	4b
-	lg	%r0,__VDSO_XTIME_CRS_SEC(%r5)
-	lg	%r1,__VDSO_XTIME_CRS_NSEC(%r5)
-	clg	%r4,__VDSO_UPD_COUNT(%r5)	/* check update counter */
-	jne	4b
-	j	7f
-
-	/* CLOCK_REALTIME */
-5:	lg	%r4,__VDSO_UPD_COUNT(%r5)	/* load update counter */
-	tmll	%r4,0x0001			/* pending update ? loop */
-	jnz	5b
-	stcke	0(%r15)				/* Store TOD clock */
-	lg	%r1,1(%r15)
-	lg	%r0,__VDSO_TS_END(%r5)		/* TOD steering end time */
-	slgr	%r0,%r1				/* now - ts_steering_end */
-	ltgr	%r0,%r0				/* past end of steering ? */
-	jm	17f
-	srlg	%r0,%r0,15			/* 1 per 2^16 */
-	tm	__VDSO_TS_DIR+3(%r5),0x01	/* steering direction? */
-	jz	18f
-	lcgr	%r0,%r0				/* negative TOD offset */
-18:	algr	%r1,%r0				/* add steering offset */
-17:	lgf	%r2,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	sg	%r1,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	msgf	%r1,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	alg	%r1,__VDSO_XTIME_NSEC(%r5)	/*  + tk->xtime_nsec */
-	srlg	%r1,%r1,0(%r2)			/*  >> tk->shift */
-	lg	%r0,__VDSO_XTIME_SEC(%r5)	/* tk->xtime_sec */
-	clg	%r4,__VDSO_UPD_COUNT(%r5)	/* check update counter */
-	jne	5b
-	larl	%r5,13f
-6:	clg	%r1,0(%r5)
-	jl	7f
-	slg	%r1,0(%r5)
-	aghi	%r0,1
-	j	6b
-7:	stg	%r0,0(%r3)			/* store tp->tv_sec */
-	stg	%r1,8(%r3)			/* store tp->tv_nsec */
-	lghi	%r2,0
-	aghi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-
-	/* CPUCLOCK_VIRT for this thread */
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-9:	lghi	%r4,0
-	icm	%r0,15,__VDSO_ECTG_OK(%r5)
-	jz	12f
-	sacf	256				/* Magic ectg instruction */
-	.insn	ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
-	sacf	0
-	algr	%r1,%r0				/* r1 = cputime as TOD value */
-	mghi	%r1,1000			/* convert to nanoseconds */
-	srlg	%r1,%r1,12			/* r1 = cputime in nanosec */
-	lgr	%r4,%r1
-	larl	%r5,13f
-	srlg	%r1,%r1,9			/* divide by 1000000000 */
-	mlg	%r0,8(%r5)
-	srlg	%r0,%r0,11			/* r0 = tv_sec */
-	stg	%r0,0(%r3)
-	msg	%r0,0(%r5)			/* calculate tv_nsec */
-	slgr	%r4,%r0				/* r4 = tv_nsec */
-	stg	%r4,8(%r3)
-	lghi	%r2,0
-	aghi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-
-	/* Fallback to system call */
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-12:	lghi	%r1,__NR_clock_gettime
-	svc	0
-	aghi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-	CFI_ENDPROC
-
-13:	.quad	1000000000
-14:	.quad	19342813113834067
-	.size	__kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso64/getcpu.S b/arch/s390/kernel/vdso64/getcpu.S
deleted file mode 100644
index 2446e9dac8ab9cd77db7295f001cd63af97aff6d..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/vdso64/getcpu.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of getcpu() for 64 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2016
- *  Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/dwarf.h>
-
-	.text
-	.align 4
-	.globl __kernel_getcpu
-	.type  __kernel_getcpu,@function
-__kernel_getcpu:
-	CFI_STARTPROC
-	la	%r4,0
-	sacf	256
-	l	%r5,__VDSO_CPU_NR(%r4)
-	l	%r4,__VDSO_NODE_ID(%r4)
-	sacf	0
-	ltgr	%r2,%r2
-	jz	2f
-	st	%r5,0(%r2)
-2:	ltgr	%r3,%r3
-	jz	3f
-	st	%r4,0(%r3)
-3:	lghi	%r2,0
-	br	%r14
-	CFI_ENDPROC
-	.size	__kernel_getcpu,.-__kernel_getcpu
diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S
deleted file mode 100644
index aebe10dc7c99a13498edd6ffddf99d822cc37d23..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/vdso64/gettimeofday.S
+++ /dev/null
@@ -1,71 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of gettimeofday() for 64 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
-	.text
-	.align 4
-	.globl __kernel_gettimeofday
-	.type  __kernel_gettimeofday,@function
-__kernel_gettimeofday:
-	CFI_STARTPROC
-	aghi	%r15,-16
-	CFI_ADJUST_CFA_OFFSET 16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-	larl	%r5,_vdso_data
-0:	ltgr	%r3,%r3				/* check if tz is NULL */
-	je	1f
-	mvc	0(8,%r3),__VDSO_TIMEZONE(%r5)
-1:	ltgr	%r2,%r2				/* check if tv is NULL */
-	je	4f
-	lg	%r4,__VDSO_UPD_COUNT(%r5)	/* load update counter */
-	tmll	%r4,0x0001			/* pending update ? loop */
-	jnz	0b
-	stcke	0(%r15)				/* Store TOD clock */
-	lg	%r1,1(%r15)
-	lg	%r0,__VDSO_TS_END(%r5)		/* TOD steering end time */
-	slgr	%r0,%r1				/* now - ts_steering_end */
-	ltgr	%r0,%r0				/* past end of steering ? */
-	jm	6f
-	srlg	%r0,%r0,15			/* 1 per 2^16 */
-	tm	__VDSO_TS_DIR+3(%r5),0x01	/* steering direction? */
-	jz	7f
-	lcgr	%r0,%r0				/* negative TOD offset */
-7:	algr	%r1,%r0				/* add steering offset */
-6:	sg	%r1,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	msgf	%r1,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	alg	%r1,__VDSO_XTIME_NSEC(%r5)	/*  + tk->xtime_nsec */
-	lg	%r0,__VDSO_XTIME_SEC(%r5)	/* tk->xtime_sec */
-	clg	%r4,__VDSO_UPD_COUNT(%r5)	/* check update counter */
-	jne	0b
-	lgf	%r5,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	srlg	%r1,%r1,0(%r5)			/*  >> tk->shift */
-	larl	%r5,5f
-2:	clg	%r1,0(%r5)
-	jl	3f
-	slg	%r1,0(%r5)
-	aghi	%r0,1
-	j	2b
-3:	stg	%r0,0(%r2)			/* store tv->tv_sec */
-	slgr	%r0,%r0				/* tv_nsec -> tv_usec */
-	ml	%r0,8(%r5)
-	srlg	%r0,%r0,6
-	stg	%r0,8(%r2)			/* store tv->tv_usec */
-4:	lghi	%r2,0
-	aghi	%r15,16
-	CFI_ADJUST_CFA_OFFSET -16
-	CFI_RESTORE 15
-	br	%r14
-	CFI_ENDPROC
-5:	.quad	1000000000
-	.long	274877907
-	.size	__kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S
deleted file mode 100644
index db19d0680a0afdf34b85eddf418a7e822dcad0a7..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/vdso64/note.S
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
-	.long LINUX_VERSION_CODE
-ELFNOTE_END
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
deleted file mode 100644
index 7ddb116b5e2e8aeae3746f08b3ee95ec14e70eb4..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ /dev/null
@@ -1,142 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This is the infamous ld script for the 64 bits vdso
- * library
- */
-
-#include <asm/page.h>
-#include <asm/vdso.h>
-
-OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
-OUTPUT_ARCH(s390:64-bit)
-ENTRY(_start)
-
-SECTIONS
-{
-	. = VDSO64_LBASE + SIZEOF_HEADERS;
-
-	.hash		: { *(.hash) }			:text
-	.gnu.hash	: { *(.gnu.hash) }
-	.dynsym		: { *(.dynsym) }
-	.dynstr		: { *(.dynstr) }
-	.gnu.version	: { *(.gnu.version) }
-	.gnu.version_d	: { *(.gnu.version_d) }
-	.gnu.version_r	: { *(.gnu.version_r) }
-
-	.note		: { *(.note.*) }		:text	:note
-
-	. = ALIGN(16);
-	.text		: {
-		*(.text .stub .text.* .gnu.linkonce.t.*)
-	} :text
-	PROVIDE(__etext = .);
-	PROVIDE(_etext = .);
-	PROVIDE(etext = .);
-
-	/*
-	 * Other stuff is appended to the text segment:
-	 */
-	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
-	.rodata1	: { *(.rodata1) }
-
-	.dynamic	: { *(.dynamic) }		:text	:dynamic
-
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-	.gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
-
-	.rela.dyn ALIGN(8) : { *(.rela.dyn) }
-	.got ALIGN(8)	: { *(.got .toc) }
-
-	_end = .;
-	PROVIDE(end = .);
-
-	/*
-	 * Stabs debugging sections are here too.
-	 */
-	.stab	       0 : { *(.stab) }
-	.stabstr       0 : { *(.stabstr) }
-	.stab.excl     0 : { *(.stab.excl) }
-	.stab.exclstr  0 : { *(.stab.exclstr) }
-	.stab.index    0 : { *(.stab.index) }
-	.stab.indexstr 0 : { *(.stab.indexstr) }
-	.comment       0 : { *(.comment) }
-
-	/*
-	 * DWARF debug sections.
-	 * Symbols in the DWARF debugging sections are relative to the
-	 * beginning of the section so we begin them at 0.
-	 */
-	/* DWARF 1 */
-	.debug		0 : { *(.debug) }
-	.line		0 : { *(.line) }
-	/* GNU DWARF 1 extensions */
-	.debug_srcinfo	0 : { *(.debug_srcinfo) }
-	.debug_sfnames	0 : { *(.debug_sfnames) }
-	/* DWARF 1.1 and DWARF 2 */
-	.debug_aranges	0 : { *(.debug_aranges) }
-	.debug_pubnames 0 : { *(.debug_pubnames) }
-	/* DWARF 2 */
-	.debug_info	0 : { *(.debug_info .gnu.linkonce.wi.*) }
-	.debug_abbrev	0 : { *(.debug_abbrev) }
-	.debug_line	0 : { *(.debug_line) }
-	.debug_frame	0 : { *(.debug_frame) }
-	.debug_str	0 : { *(.debug_str) }
-	.debug_loc	0 : { *(.debug_loc) }
-	.debug_macinfo	0 : { *(.debug_macinfo) }
-	/* SGI/MIPS DWARF 2 extensions */
-	.debug_weaknames 0 : { *(.debug_weaknames) }
-	.debug_funcnames 0 : { *(.debug_funcnames) }
-	.debug_typenames 0 : { *(.debug_typenames) }
-	.debug_varnames  0 : { *(.debug_varnames) }
-	/* DWARF 3 */
-	.debug_pubtypes 0 : { *(.debug_pubtypes) }
-	.debug_ranges	0 : { *(.debug_ranges) }
-	.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
-
-	. = ALIGN(PAGE_SIZE);
-	PROVIDE(_vdso_data = .);
-
-	/DISCARD/	: {
-		*(.note.GNU-stack)
-		*(.branch_lt)
-		*(.data .data.* .gnu.linkonce.d.* .sdata*)
-		*(.bss .sbss .dynbss .dynsbss)
-	}
-}
-
-/*
- * Very old versions of ld do not recognize this name token; use the constant.
- */
-#define PT_GNU_EH_FRAME	0x6474e550
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
-	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
-	note		PT_NOTE FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-	VDSO_VERSION_STRING {
-	global:
-		/*
-		 * Has to be there for the kernel to find
-		 */
-		__kernel_gettimeofday;
-		__kernel_clock_gettime;
-		__kernel_clock_getres;
-		__kernel_getcpu;
-
-	local: *;
-	};
-}
diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S
deleted file mode 100644
index 672184998623bbc50f7b858c7dc8d954e86d9dae..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/vdso64/vdso64_wrapper.S
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/page.h>
-
-	__PAGE_ALIGNED_DATA
-
-	.globl vdso64_start, vdso64_end
-	.balign PAGE_SIZE
-vdso64_start:
-	.incbin "arch/s390/kernel/vdso64/vdso64.so"
-	.balign PAGE_SIZE
-vdso64_end:
-
-	.previous
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
deleted file mode 100644
index 7e0eb40209177ad1014d34d9c1fc3360fa8d1b24..0000000000000000000000000000000000000000
--- a/arch/s390/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,192 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* ld script to make s390 Linux kernel
- * Written by Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-
-#include <asm/thread_info.h>
-#include <asm/page.h>
-
-/*
- * Put .bss..swapper_pg_dir as the first thing in .bss. This will
- * make sure it has 16k alignment.
- */
-#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir)
-
-/* Handle ro_after_init data on our own. */
-#define RO_AFTER_INIT_DATA
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/vmlinux.lds.h>
-
-OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
-OUTPUT_ARCH(s390:64-bit)
-ENTRY(startup_continue)
-jiffies = jiffies_64;
-
-PHDRS {
-	text PT_LOAD FLAGS(5);	/* R_E */
-	data PT_LOAD FLAGS(7);	/* RWE */
-	note PT_NOTE FLAGS(0);	/* ___ */
-}
-
-SECTIONS
-{
-	. = 0x100000;
-	.text : {
-		_stext = .;		/* Start of text section */
-		_text = .;		/* Text and read-only data */
-		HEAD_TEXT
-		TEXT_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		IRQENTRY_TEXT
-		SOFTIRQENTRY_TEXT
-		*(.text.*_indirect_*)
-		*(.fixup)
-		*(.gnu.warning)
-		. = ALIGN(PAGE_SIZE);
-		_etext = .;		/* End of text section */
-	} :text = 0x0700
-
-	NOTES :text :note
-
-	.dummy : { *(.dummy) } :data
-
-	RO_DATA_SECTION(PAGE_SIZE)
-
-	. = ALIGN(PAGE_SIZE);
-	_sdata = .;		/* Start of data section */
-
-	. = ALIGN(PAGE_SIZE);
-	__start_ro_after_init = .;
-	.data..ro_after_init : {
-		 *(.data..ro_after_init)
-		JUMP_TABLE_DATA
-	}
-	EXCEPTION_TABLE(16)
-	. = ALIGN(PAGE_SIZE);
-	__end_ro_after_init = .;
-
-	RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
-	BOOT_DATA_PRESERVED
-
-	_edata = .;		/* End of data section */
-
-	/* will be freed after init */
-	. = ALIGN(PAGE_SIZE);	/* Init code and data */
-	__init_begin = .;
-
-	. = ALIGN(PAGE_SIZE);
-	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
-		_sinittext = .;
-		INIT_TEXT
-		. = ALIGN(PAGE_SIZE);
-		_einittext = .;
-	}
-
-	/*
-	 * .exit.text is discarded at runtime, not link time,
-	 * to deal with references from __bug_table
-	*/
-	.exit.text : {
-		EXIT_TEXT
-	}
-
-	.exit.data : {
-		EXIT_DATA
-	}
-
-	/*
-	 * struct alt_inst entries. From the header (alternative.h):
-	 * "Alternative instructions for different CPU types or capabilities"
-	 * Think locking instructions on spinlocks.
-	 * Note, that it is a part of __init region.
-	 */
-	. = ALIGN(8);
-	.altinstructions : {
-		__alt_instructions = .;
-		*(.altinstructions)
-		__alt_instructions_end = .;
-	}
-
-	/*
-	 * And here are the replacement instructions. The linker sticks
-	 * them as binary blobs. The .altinstructions has enough data to
-	 * get the address and the length of them to patch the kernel safely.
-	 * Note, that it is a part of __init region.
-	 */
-	.altinstr_replacement : {
-		*(.altinstr_replacement)
-	}
-
-	/*
-	 * Table with the patch locations to undo expolines
-	*/
-	.nospec_call_table : {
-		__nospec_call_start = . ;
-		*(.s390_indirect*)
-		__nospec_call_end = . ;
-	}
-	.nospec_return_table : {
-		__nospec_return_start = . ;
-		*(.s390_return*)
-		__nospec_return_end = . ;
-	}
-
-	BOOT_DATA
-
-	/* early.c uses stsi, which requires page aligned data. */
-	. = ALIGN(PAGE_SIZE);
-	INIT_DATA_SECTION(0x100)
-
-	PERCPU_SECTION(0x100)
-
-	.dynsym ALIGN(8) : {
-		__dynsym_start = .;
-		*(.dynsym)
-		__dynsym_end = .;
-	}
-	.rela.dyn ALIGN(8) : {
-		__rela_dyn_start = .;
-		*(.rela*)
-		__rela_dyn_end = .;
-	}
-
-	. = ALIGN(PAGE_SIZE);
-	__init_end = .;		/* freed after init ends here */
-
-	BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE)
-
-	_end = . ;
-
-	/*
-	 * uncompressed image info used by the decompressor
-	 * it should match struct vmlinux_info
-	 */
-	.vmlinux.info 0 (INFO) : {
-		QUAD(_stext)					/* default_lma */
-		QUAD(startup_continue)				/* entry */
-		QUAD(__bss_start - _stext)			/* image_size */
-		QUAD(__bss_stop - __bss_start)			/* bss_size */
-		QUAD(__boot_data_start)				/* bootdata_off */
-		QUAD(__boot_data_end - __boot_data_start)	/* bootdata_size */
-		QUAD(__boot_data_preserved_start)		/* bootdata_preserved_off */
-		QUAD(__boot_data_preserved_end -
-		     __boot_data_preserved_start)		/* bootdata_preserved_size */
-		QUAD(__dynsym_start)				/* dynsym_start */
-		QUAD(__rela_dyn_start)				/* rela_dyn_start */
-		QUAD(__rela_dyn_end)				/* rela_dyn_end */
-	} :NONE
-
-	/* Debugging sections.	*/
-	STABS_DEBUG
-	DWARF_DEBUG
-
-	/* Sections to be discarded */
-	DISCARDS
-	/DISCARD/ : {
-		*(.eh_frame)
-	}
-}
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
deleted file mode 100644
index dc0874f2e203c79c691ffb5746a3c0650aaa3cbc..0000000000000000000000000000000000000000
--- a/arch/s390/lib/mem.S
+++ /dev/null
@@ -1,191 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * String handling functions.
- *
- * Copyright IBM Corp. 2012
- */
-
-#include <linux/linkage.h>
-#include <asm/export.h>
-#include <asm/nospec-insn.h>
-
-	GEN_BR_THUNK %r14
-
-/*
- * void *memmove(void *dest, const void *src, size_t n)
- */
-WEAK(memmove)
-ENTRY(__memmove)
-	ltgr	%r4,%r4
-	lgr	%r1,%r2
-	jz	.Lmemmove_exit
-	aghi	%r4,-1
-	clgr	%r2,%r3
-	jnh	.Lmemmove_forward
-	la	%r5,1(%r4,%r3)
-	clgr	%r2,%r5
-	jl	.Lmemmove_reverse
-.Lmemmove_forward:
-	srlg	%r0,%r4,8
-	ltgr	%r0,%r0
-	jz	.Lmemmove_forward_remainder
-.Lmemmove_forward_loop:
-	mvc	0(256,%r1),0(%r3)
-	la	%r1,256(%r1)
-	la	%r3,256(%r3)
-	brctg	%r0,.Lmemmove_forward_loop
-.Lmemmove_forward_remainder:
-	larl	%r5,.Lmemmove_mvc
-	ex	%r4,0(%r5)
-.Lmemmove_exit:
-	BR_EX	%r14
-.Lmemmove_reverse:
-	ic	%r0,0(%r4,%r3)
-	stc	%r0,0(%r4,%r1)
-	brctg	%r4,.Lmemmove_reverse
-	ic	%r0,0(%r4,%r3)
-	stc	%r0,0(%r4,%r1)
-	BR_EX	%r14
-.Lmemmove_mvc:
-	mvc	0(1,%r1),0(%r3)
-ENDPROC(__memmove)
-EXPORT_SYMBOL(memmove)
-
-/*
- * memset implementation
- *
- * This code corresponds to the C construct below. We do distinguish
- * between clearing (c == 0) and setting a memory array (c != 0) simply
- * because nearly all memset invocations in the kernel clear memory and
- * the xc instruction is preferred in such cases.
- *
- * void *memset(void *s, int c, size_t n)
- * {
- *	if (likely(c == 0))
- *		return __builtin_memset(s, 0, n);
- *	return __builtin_memset(s, c, n);
- * }
- */
-WEAK(memset)
-ENTRY(__memset)
-	ltgr	%r4,%r4
-	jz	.Lmemset_exit
-	ltgr	%r3,%r3
-	jnz	.Lmemset_fill
-	aghi	%r4,-1
-	srlg	%r3,%r4,8
-	ltgr	%r3,%r3
-	lgr	%r1,%r2
-	jz	.Lmemset_clear_remainder
-.Lmemset_clear_loop:
-	xc	0(256,%r1),0(%r1)
-	la	%r1,256(%r1)
-	brctg	%r3,.Lmemset_clear_loop
-.Lmemset_clear_remainder:
-	larl	%r3,.Lmemset_xc
-	ex	%r4,0(%r3)
-.Lmemset_exit:
-	BR_EX	%r14
-.Lmemset_fill:
-	cghi	%r4,1
-	lgr	%r1,%r2
-	je	.Lmemset_fill_exit
-	aghi	%r4,-2
-	srlg	%r5,%r4,8
-	ltgr	%r5,%r5
-	jz	.Lmemset_fill_remainder
-.Lmemset_fill_loop:
-	stc	%r3,0(%r1)
-	mvc	1(255,%r1),0(%r1)
-	la	%r1,256(%r1)
-	brctg	%r5,.Lmemset_fill_loop
-.Lmemset_fill_remainder:
-	stc	%r3,0(%r1)
-	larl	%r5,.Lmemset_mvc
-	ex	%r4,0(%r5)
-	BR_EX	%r14
-.Lmemset_fill_exit:
-	stc	%r3,0(%r1)
-	BR_EX	%r14
-.Lmemset_xc:
-	xc	0(1,%r1),0(%r1)
-.Lmemset_mvc:
-	mvc	1(1,%r1),0(%r1)
-ENDPROC(__memset)
-EXPORT_SYMBOL(memset)
-
-/*
- * memcpy implementation
- *
- * void *memcpy(void *dest, const void *src, size_t n)
- */
-WEAK(memcpy)
-ENTRY(__memcpy)
-	ltgr	%r4,%r4
-	jz	.Lmemcpy_exit
-	aghi	%r4,-1
-	srlg	%r5,%r4,8
-	ltgr	%r5,%r5
-	lgr	%r1,%r2
-	jnz	.Lmemcpy_loop
-.Lmemcpy_remainder:
-	larl	%r5,.Lmemcpy_mvc
-	ex	%r4,0(%r5)
-.Lmemcpy_exit:
-	BR_EX	%r14
-.Lmemcpy_loop:
-	mvc	0(256,%r1),0(%r3)
-	la	%r1,256(%r1)
-	la	%r3,256(%r3)
-	brctg	%r5,.Lmemcpy_loop
-	j	.Lmemcpy_remainder
-.Lmemcpy_mvc:
-	mvc	0(1,%r1),0(%r3)
-ENDPROC(__memcpy)
-EXPORT_SYMBOL(memcpy)
-
-/*
- * __memset16/32/64
- *
- * void *__memset16(uint16_t *s, uint16_t v, size_t count)
- * void *__memset32(uint32_t *s, uint32_t v, size_t count)
- * void *__memset64(uint64_t *s, uint64_t v, size_t count)
- */
-.macro __MEMSET bits,bytes,insn
-ENTRY(__memset\bits)
-	ltgr	%r4,%r4
-	jz	.L__memset_exit\bits
-	cghi	%r4,\bytes
-	je	.L__memset_store\bits
-	aghi	%r4,-(\bytes+1)
-	srlg	%r5,%r4,8
-	ltgr	%r5,%r5
-	lgr	%r1,%r2
-	jz	.L__memset_remainder\bits
-.L__memset_loop\bits:
-	\insn	%r3,0(%r1)
-	mvc	\bytes(256-\bytes,%r1),0(%r1)
-	la	%r1,256(%r1)
-	brctg	%r5,.L__memset_loop\bits
-.L__memset_remainder\bits:
-	\insn	%r3,0(%r1)
-	larl	%r5,.L__memset_mvc\bits
-	ex	%r4,0(%r5)
-	BR_EX	%r14
-.L__memset_store\bits:
-	\insn	%r3,0(%r2)
-.L__memset_exit\bits:
-	BR_EX	%r14
-.L__memset_mvc\bits:
-	mvc	\bytes(1,%r1),0(%r1)
-ENDPROC(__memset\bits)
-.endm
-
-__MEMSET 16,2,sth
-EXPORT_SYMBOL(__memset16)
-
-__MEMSET 32,4,st
-EXPORT_SYMBOL(__memset32)
-
-__MEMSET 64,8,stg
-EXPORT_SYMBOL(__memset64)
diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S
deleted file mode 100644
index 3d1c31e0cf3dd7e37dc382350a843a8c78f5657b..0000000000000000000000000000000000000000
--- a/arch/s390/purgatory/head.S
+++ /dev/null
@@ -1,273 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Purgatory setup code
- *
- * Copyright IBM Corp. 2018
- *
- * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/page.h>
-#include <asm/sigp.h>
-#include <asm/ptrace.h>
-
-/* The purgatory is the code running between two kernels. It's main purpose
- * is to verify that the next kernel was not corrupted after load and to
- * start it.
- *
- * If the next kernel is a crash kernel there are some peculiarities to
- * consider:
- *
- * First the purgatory is called twice. Once only to verify the
- * sha digest. So if the crash kernel got corrupted the old kernel can try
- * to trigger a stand-alone dumper. And once to actually load the crash kernel.
- *
- * Second the purgatory also has to swap the crash memory region with its
- * destination at address 0. As the purgatory is part of crash memory this
- * requires some finesse. The tactic here is that the purgatory first copies
- * itself to the end of the destination and then swaps the rest of the
- * memory running from there.
- */
-
-#define bufsz purgatory_end-stack
-
-.macro MEMCPY dst,src,len
-	lgr	%r0,\dst
-	lgr	%r1,\len
-	lgr	%r2,\src
-	lgr	%r3,\len
-
-20:	mvcle	%r0,%r2,0
-	jo	20b
-.endm
-
-.macro MEMSWAP dst,src,buf,len
-10:	cghi	\len,bufsz
-	jh	11f
-	lgr	%r4,\len
-	j	12f
-11:	lghi	%r4,bufsz
-
-12:	MEMCPY	\buf,\dst,%r4
-	MEMCPY	\dst,\src,%r4
-	MEMCPY	\src,\buf,%r4
-
-	agr	\dst,%r4
-	agr	\src,%r4
-	sgr	\len,%r4
-
-	cghi	\len,0
-	jh	10b
-.endm
-
-.macro START_NEXT_KERNEL base subcode
-	lg	%r4,kernel_entry-\base(%r13)
-	lg	%r5,load_psw_mask-\base(%r13)
-	ogr	%r4,%r5
-	stg	%r4,0(%r0)
-
-	xgr	%r0,%r0
-	lghi	%r1,\subcode
-	diag	%r0,%r1,0x308
-.endm
-
-.text
-.align PAGE_SIZE
-ENTRY(purgatory_start)
-	/* The purgatory might be called after a diag308 so better set
-	 * architecture and addressing mode.
-	 */
-	lhi	%r1,1
-	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE
-	sam64
-
-	larl	%r5,gprregs
-	stmg	%r6,%r15,0(%r5)
-
-	basr	%r13,0
-.base_crash:
-
-	/* Setup stack */
-	larl	%r15,purgatory_end-STACK_FRAME_OVERHEAD
-
-	/* If the next kernel is KEXEC_TYPE_CRASH the purgatory is called
-	 * directly with a flag passed in %r2 whether the purgatory shall do
-	 * checksum verification only (%r2 = 0 -> verification only).
-	 *
-	 * Check now and preserve over C function call by storing in
-	 * %r10 whith
-	 *	1 -> checksum verification only
-	 *	0 -> load new kernel
-	 */
-	lghi	%r10,0
-	lg	%r11,kernel_type-.base_crash(%r13)
-	cghi	%r11,1		/* KEXEC_TYPE_CRASH */
-	jne	.do_checksum_verification
-	cghi	%r2,0		/* checksum verification only */
-	jne	.do_checksum_verification
-	lghi	%r10,1
-
-.do_checksum_verification:
-	brasl	%r14,verify_sha256_digest
-
-	cghi	%r10,1		/* checksum verification only */
-	je	.return_old_kernel
-	cghi	%r2,0		/* checksum match */
-	jne	.disabled_wait
-
-	/* If the next kernel is a crash kernel the purgatory has to swap
-	 * the mem regions first.
-	 */
-	cghi	%r11,1 /* KEXEC_TYPE_CRASH */
-	je	.start_crash_kernel
-
-	/* start normal kernel */
-	START_NEXT_KERNEL .base_crash 0
-
-.return_old_kernel:
-	lmg	%r6,%r15,gprregs-.base_crash(%r13)
-	br	%r14
-
-.disabled_wait:
-	lpswe	disabled_wait_psw-.base_crash(%r13)
-
-.start_crash_kernel:
-	/* Location of purgatory_start in crash memory */
-	lgr	%r8,%r13
-	aghi	%r8,-(.base_crash-purgatory_start)
-
-	/* Destination for this code i.e. end of memory to be swapped. */
-	lg	%r9,crash_size-.base_crash(%r13)
-	aghi	%r9,-(purgatory_end-purgatory_start)
-
-	/* Destination in crash memory, i.e. same as r9 but in crash memory. */
-	lg	%r10,crash_start-.base_crash(%r13)
-	agr	%r10,%r9
-
-	/* Buffer location (in crash memory) and size. As the purgatory is
-	 * behind the point of no return it can re-use the stack as buffer.
-	 */
-	lghi	%r11,bufsz
-	larl	%r12,stack
-
-	MEMCPY	%r12,%r9,%r11	/* dst	-> (crash) buf */
-	MEMCPY	%r9,%r8,%r11	/* self -> dst */
-
-	/* Jump to new location. */
-	lgr	%r7,%r9
-	aghi	%r7,.jump_to_dst-purgatory_start
-	br	%r7
-
-.jump_to_dst:
-	basr	%r13,0
-.base_dst:
-
-	/* clear buffer */
-	MEMCPY	%r12,%r10,%r11	/* (crash) buf -> (crash) dst */
-
-	/* Load new buffer location after jump */
-	larl	%r7,stack
-	aghi	%r10,stack-purgatory_start
-	MEMCPY	%r10,%r7,%r11	/* (new) buf -> (crash) buf */
-
-	/* Now the code is set up to run from its designated location. Start
-	 * swapping the rest of crash memory now.
-	 *
-	 * The registers will be used as follow:
-	 *
-	 *	%r0-%r4	reserved for macros defined above
-	 *	%r5-%r6 tmp registers
-	 *	%r7	pointer to current struct sha region
-	 *	%r8	index to iterate over all sha regions
-	 *	%r9	pointer in crash memory
-	 *	%r10	pointer in old kernel
-	 *	%r11	total size (still) to be moved
-	 *	%r12	pointer to buffer
-	 */
-	lgr	%r12,%r7
-	lgr	%r11,%r9
-	lghi	%r10,0
-	lg	%r9,crash_start-.base_dst(%r13)
-	lghi	%r8,16	/* KEXEC_SEGMENTS_MAX */
-	larl	%r7,purgatory_sha_regions
-
-	j .loop_first
-
-	/* Loop over all purgatory_sha_regions. */
-.loop_next:
-	aghi	%r8,-1
-	cghi	%r8,0
-	je	.loop_out
-
-	aghi	%r7,__KEXEC_SHA_REGION_SIZE
-
-.loop_first:
-	lg	%r5,__KEXEC_SHA_REGION_START(%r7)
-	cghi	%r5,0
-	je	.loop_next
-
-	/* Copy [end last sha region, start current sha region) */
-	/* Note: kexec_sha_region->start points in crash memory */
-	sgr	%r5,%r9
-	MEMCPY	%r9,%r10,%r5
-
-	agr	%r9,%r5
-	agr	%r10,%r5
-	sgr	%r11,%r5
-
-	/* Swap sha region */
-	lg	%r6,__KEXEC_SHA_REGION_LEN(%r7)
-	MEMSWAP	%r9,%r10,%r12,%r6
-	sg	%r11,__KEXEC_SHA_REGION_LEN(%r7)
-	j	.loop_next
-
-.loop_out:
-	/* Copy rest of crash memory */
-	MEMCPY	%r9,%r10,%r11
-
-	/* start crash kernel */
-	START_NEXT_KERNEL .base_dst 1
-
-
-load_psw_mask:
-	.long	0x00080000,0x80000000
-
-	.align	8
-disabled_wait_psw:
-	.quad	0x0002000180000000
-	.quad	0x0000000000000000 + .do_checksum_verification
-
-gprregs:
-	.rept	10
-	.quad	0
-	.endr
-
-/* Macro to define a global variable with name and size (in bytes) to be
- * shared with C code.
- *
- * Add the .size and .type attribute to satisfy checks on the Elf_Sym during
- * purgatory load.
- */
-.macro GLOBAL_VARIABLE name,size
-\name:
-	.global \name
-	.size	\name,\size
-	.type	\name,object
-	.skip	\size,0
-.endm
-
-GLOBAL_VARIABLE purgatory_sha256_digest,32
-GLOBAL_VARIABLE purgatory_sha_regions,16*__KEXEC_SHA_REGION_SIZE
-GLOBAL_VARIABLE kernel_entry,8
-GLOBAL_VARIABLE kernel_type,8
-GLOBAL_VARIABLE crash_start,8
-GLOBAL_VARIABLE crash_size,8
-
-	.align	PAGE_SIZE
-stack:
-	/* The buffer to move this code must be as big as the code. */
-	.skip	stack-purgatory_start
-	.align	PAGE_SIZE
-purgatory_end:
diff --git a/arch/s390/purgatory/kexec-purgatory.S b/arch/s390/purgatory/kexec-purgatory.S
deleted file mode 100644
index 8293753100aea9221b4e9b2dd7971b3c40b46391..0000000000000000000000000000000000000000
--- a/arch/s390/purgatory/kexec-purgatory.S
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-	.section .rodata, "a"
-
-	.align	8
-kexec_purgatory:
-	.globl	kexec_purgatory
-	.incbin	"arch/s390/purgatory/purgatory.ro"
-.Lkexec_purgatroy_end:
-
-	.align	8
-kexec_purgatory_size:
-	.globl	kexec_purgatory_size
-	.quad	.Lkexec_purgatroy_end - kexec_purgatory
diff --git a/arch/s390/purgatory/purgatory.lds.S b/arch/s390/purgatory/purgatory.lds.S
deleted file mode 100644
index 482eb4fbcef190d7c97ee7f20e7ae905de79dc3f..0000000000000000000000000000000000000000
--- a/arch/s390/purgatory/purgatory.lds.S
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#include <asm-generic/vmlinux.lds.h>
-
-OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
-OUTPUT_ARCH(s390:64-bit)
-
-ENTRY(purgatory_start)
-
-SECTIONS
-{
-	. = 0;
-	.head.text : {
-		_head = . ;
-		HEAD_TEXT
-		_ehead = . ;
-	}
-	.text :	{
-		_text = .;	/* Text */
-		*(.text)
-		*(.text.*)
-		_etext = . ;
-	}
-	.rodata : {
-		_rodata = . ;
-		*(.rodata)	 /* read-only data */
-		*(.rodata.*)
-		_erodata = . ;
-	}
-	.data :	{
-		_data = . ;
-		*(.data)
-		*(.data.*)
-		_edata = . ;
-	}
-
-	. = ALIGN(256);
-	.bss : {
-		_bss = . ;
-		*(.bss)
-		*(.bss.*)
-		*(COMMON)
-		. = ALIGN(8);	/* For convenience during zeroing */
-		_ebss = .;
-	}
-	_end = .;
-
-	/* Sections to be discarded */
-	/DISCARD/ : {
-		*(.eh_frame)
-		*(*__ksymtab*)
-		*(___kcrctab*)
-	}
-}
diff --git a/arch/sh/boards/mach-ap325rxa/sdram.S b/arch/sh/boards/mach-ap325rxa/sdram.S
deleted file mode 100644
index 541c82cc30b1127229d864ddb55b88934f8fc336..0000000000000000000000000000000000000000
--- a/arch/sh/boards/mach-ap325rxa/sdram.S
+++ /dev/null
@@ -1,66 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * AP325RXA sdram self/auto-refresh setup code
- *
- *  Copyright (C) 2009 Magnus Damm
- */
-
-#include <linux/sys.h>
-#include <linux/errno.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/suspend.h>
-#include <asm/romimage-macros.h>
-
-/* code to enter and leave self-refresh. must be self-contained.
- * this code will be copied to on-chip memory and executed from there.
- */
-	.balign 4
-ENTRY(ap325rxa_sdram_enter_start)
-
-	/* SBSC: disable power down and put in self-refresh mode */
-	mov.l	1f, r4
-	mov.l	2f, r1
-	mov.l	@r4, r2
-	or	r1, r2
-	mov.l   3f, r3
-	and	r3, r2
-	mov.l	r2, @r4
-
-	rts
-	 nop
-
-	.balign 4
-1:	.long	0xfe400008 /* SDCR0 */
-2:	.long	0x00000400
-3:	.long	0xffff7fff
-ENTRY(ap325rxa_sdram_enter_end)
-
-	.balign 4
-ENTRY(ap325rxa_sdram_leave_start)
-
-	/* SBSC: set auto-refresh mode */
-	mov.l	1f, r4
-	mov.l	@r4, r0
-	mov.l   4f, r1
-	and	r1, r0
-	mov.l	r0, @r4
-	mov.l	6f, r4
-	mov.l	8f, r0
-	mov.l	@r4, r1
-	mov	#-1, r4
-	add	r4, r1
-	or	r1, r0
-	mov.l	7f, r1
-	mov.l	r0, @r1
-
-	rts
-	 nop
-
-	.balign 4
-1:	.long	0xfe400008 /* SDCR0 */
-4:	.long	0xfffffbff
-6:	.long   0xfe40001c /* RTCOR */
-7:	.long   0xfe400018 /* RTCNT */
-8:	.long   0xa55a0000
-ENTRY(ap325rxa_sdram_leave_end)
diff --git a/arch/sh/boards/mach-ecovec24/sdram.S b/arch/sh/boards/mach-ecovec24/sdram.S
deleted file mode 100644
index d2f269169abb94428494f88db9d8b57423a33e2e..0000000000000000000000000000000000000000
--- a/arch/sh/boards/mach-ecovec24/sdram.S
+++ /dev/null
@@ -1,108 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Ecovec24 sdram self/auto-refresh setup code
- *
- *  Copyright (C) 2009 Magnus Damm
- */
-
-#include <linux/sys.h>
-#include <linux/errno.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/suspend.h>
-#include <asm/romimage-macros.h>
-
-/* code to enter and leave self-refresh. must be self-contained.
- * this code will be copied to on-chip memory and executed from there.
- */
-	.balign 4
-ENTRY(ecovec24_sdram_enter_start)
-
-	/* DBSC: put memory in self-refresh mode */
-
-	ED 0xFD000010, 0x00000000 /* DBEN */
-	ED 0xFD000040, 0x00000000 /* DBRFPDN0 */
-	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
-	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
-	ED 0xFD000040, 0x00000001 /* DBRFPDN0 */
-
-	rts
-	 nop
-
-ENTRY(ecovec24_sdram_enter_end)
-
-	.balign 4
-ENTRY(ecovec24_sdram_leave_start)
-
-	mov.l	@(SH_SLEEP_MODE, r5), r0
-	tst	#SUSP_SH_RSTANDBY, r0
-	bf	resume_rstandby
-
-	/* DBSC: put memory in auto-refresh mode */
-
-	ED 0xFD000040, 0x00000000 /* DBRFPDN0 */
-	WAIT 1
-	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
-	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
-	ED 0xFD000010, 0x00000001 /* DBEN */
-	ED 0xFD000040, 0x00010000 /* DBRFPDN0 */
-
-	rts
-	 nop
-
-resume_rstandby:
-
-	/* DBSC: re-initialize and put in auto-refresh */
-
-	ED 0xFD000108, 0x00000181 /* DBPDCNT0 */
-	ED 0xFD000020, 0x015B0002 /* DBCONF */
-	ED 0xFD000030, 0x03071502 /* DBTR0 */
-	ED 0xFD000034, 0x02020102 /* DBTR1 */
-	ED 0xFD000038, 0x01090405 /* DBTR2 */
-	ED 0xFD00003C, 0x00000002 /* DBTR3 */
-	ED 0xFD000008, 0x00000005 /* DBKIND */
-	ED 0xFD000040, 0x00000001 /* DBRFPDN0 */
-	ED 0xFD000040, 0x00000000 /* DBRFPDN0 */
-	ED 0xFD000018, 0x00000001 /* DBCKECNT */
-
-	mov	#100,r0
-WAIT_400NS:
-	dt	r0
-	bf	WAIT_400NS
-
-	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
-	ED 0xFD000060, 0x00020000 /* DBMRCNT (EMR2) */
-	ED 0xFD000060, 0x00030000 /* DBMRCNT (EMR3) */
-	ED 0xFD000060, 0x00010004 /* DBMRCNT (EMR) */
-	ED 0xFD000060, 0x00000532 /* DBMRCNT (MRS) */
-	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
-	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
-	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
-	ED 0xFD000060, 0x00000432 /* DBMRCNT (MRS) */
-	ED 0xFD000060, 0x000103c0 /* DBMRCNT (EMR) */
-	ED 0xFD000060, 0x00010040 /* DBMRCNT (EMR) */
-
-	mov	#100,r0
-WAIT_400NS_2:
-	dt	r0
-	bf	WAIT_400NS_2
-
-	ED 0xFD000010, 0x00000001 /* DBEN */
-	ED 0xFD000044, 0x0000050f /* DBRFPDN1 */
-	ED 0xFD000048, 0x236800e6 /* DBRFPDN2 */
-
-	mov.l	DUMMY,r0
-	mov.l	@r0, r1 /* force single dummy read */
-
-	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
-	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
-	ED 0xFD000108, 0x00000080 /* DBPDCNT0 */
-	ED 0xFD000040, 0x00010000 /* DBRFPDN0 */
-
-	rts
-	 nop
-
-	.balign 4
-DUMMY:	.long	0xac400000
-
-ENTRY(ecovec24_sdram_leave_end)
diff --git a/arch/sh/boards/mach-hp6xx/pm_wakeup.S b/arch/sh/boards/mach-hp6xx/pm_wakeup.S
deleted file mode 100644
index 0fd43301f083d0fded7901f4fbb0591e2e48e6b4..0000000000000000000000000000000000000000
--- a/arch/sh/boards/mach-hp6xx/pm_wakeup.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright (c) 2006 Andriy Skulysh <askulsyh@gmail.com>
- */
-
-#include <linux/linkage.h>
-#include <cpu/mmu_context.h>
-
-/*
- * Kernel mode register usage:
- *	k0	scratch
- *	k1	scratch
- * For more details, please have a look at entry.S
- */
-
-#define k0	r0
-#define k1	r1
-
-ENTRY(wakeup_start)
-! clear STBY bit
-	mov	#-126, k1
-   	and	#127, k0
-	mov.b	k0, @k1
-! enable refresh
-	mov.l	5f, k1
-	mov.w	6f, k0
-  	mov.w	k0, @k1
-! jump to handler
-	mov.l	4f, k1
-	jmp	@k1
-	 nop
-
-	.align	2
-4:	.long	handle_interrupt
-5:	.long	0xffffff68
-6:	.word	0x0524
-
-ENTRY(wakeup_end)
-	nop
diff --git a/arch/sh/boards/mach-kfr2r09/sdram.S b/arch/sh/boards/mach-kfr2r09/sdram.S
deleted file mode 100644
index f1b8985cb9223cfb7d2871f933756ad70bb54ec7..0000000000000000000000000000000000000000
--- a/arch/sh/boards/mach-kfr2r09/sdram.S
+++ /dev/null
@@ -1,77 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * KFR2R09 sdram self/auto-refresh setup code
- *
- *  Copyright (C) 2009 Magnus Damm
- */
-
-#include <linux/sys.h>
-#include <linux/errno.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/suspend.h>
-#include <asm/romimage-macros.h>
-
-/* code to enter and leave self-refresh. must be self-contained.
- * this code will be copied to on-chip memory and executed from there.
- */
-	.balign 4
-ENTRY(kfr2r09_sdram_enter_start)
-
-	/* DBSC: put memory in self-refresh mode */
-
-	ED 0xFD000010, 0x00000000 /* DBEN */
-	ED 0xFD000040, 0x00000000 /* DBRFPDN0 */
-	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
-	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
-	ED 0xFD000040, 0x00000001 /* DBRFPDN0 */
-
-	rts
-	 nop
-
-ENTRY(kfr2r09_sdram_enter_end)
-
-	.balign 4
-ENTRY(kfr2r09_sdram_leave_start)
-
-	/* DBSC: put memory in auto-refresh mode */
-
-	mov.l	@(SH_SLEEP_MODE, r5), r0
-	tst	#SUSP_SH_RSTANDBY, r0
-	bf	resume_rstandby
-
-	ED 0xFD000040, 0x00000000 /* DBRFPDN0 */
-	WAIT 1
-	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
-	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
-	ED 0xFD000010, 0x00000001 /* DBEN */
-	ED 0xFD000040, 0x00010000 /* DBRFPDN0 */
-
-	rts
-	 nop
-
-resume_rstandby:
-
-	/* DBSC: re-initialize and put in auto-refresh */
-
-	ED 0xFD000108, 0x40000301 /* DBPDCNT0 */
-	ED 0xFD000020, 0x011B0002 /* DBCONF */
-	ED 0xFD000030, 0x03060E02 /* DBTR0 */
-	ED 0xFD000034, 0x01020102 /* DBTR1 */
-	ED 0xFD000038, 0x01090406 /* DBTR2 */
-	ED 0xFD000008, 0x00000004 /* DBKIND */
-	ED 0xFD000040, 0x00000001 /* DBRFPDN0 */
-	ED 0xFD000040, 0x00000000 /* DBRFPDN0 */
-	ED 0xFD000018, 0x00000001 /* DBCKECNT */
-	WAIT 1
-	ED 0xFD000010, 0x00000001 /* DBEN */
-	ED 0xFD000044, 0x000004AF /* DBRFPDN1 */
-	ED 0xFD000048, 0x20CF0037 /* DBRFPDN2 */
-	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
-	ED 0xFD000108, 0x40000300 /* DBPDCNT0 */
-	ED 0xFD000040, 0x00010000 /* DBRFPDN0 */
-
-	rts
-	 nop
-
-ENTRY(kfr2r09_sdram_leave_end)
diff --git a/arch/sh/boards/mach-migor/sdram.S b/arch/sh/boards/mach-migor/sdram.S
deleted file mode 100644
index 3a6bee1270aaf76cdda684cb4689ca5e080222ac..0000000000000000000000000000000000000000
--- a/arch/sh/boards/mach-migor/sdram.S
+++ /dev/null
@@ -1,66 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Migo-R sdram self/auto-refresh setup code
- *
- *  Copyright (C) 2009 Magnus Damm
- */
-
-#include <linux/sys.h>
-#include <linux/errno.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/suspend.h>
-#include <asm/romimage-macros.h>
-
-/* code to enter and leave self-refresh. must be self-contained.
- * this code will be copied to on-chip memory and executed from there.
- */
-	.balign 4
-ENTRY(migor_sdram_enter_start)
-
-	/* SBSC: disable power down and put in self-refresh mode */
-	mov.l	1f, r4
-	mov.l	2f, r1
-	mov.l	@r4, r2
-	or	r1, r2
-	mov.l   3f, r3
-	and	r3, r2
-	mov.l	r2, @r4
-
-	rts
-	 nop
-
-	.balign 4
-1:	.long	0xfe400008 /* SDCR0 */
-2:	.long	0x00000400
-3:	.long	0xffff7fff
-ENTRY(migor_sdram_enter_end)
-
-	.balign 4
-ENTRY(migor_sdram_leave_start)
-
-	/* SBSC: set auto-refresh mode */
-	mov.l	1f, r4
-	mov.l	@r4, r0
-	mov.l   4f, r1
-	and	r1, r0
-	mov.l	r0, @r4
-	mov.l	6f, r4
-	mov.l	8f, r0
-	mov.l	@r4, r1
-	mov	#-1, r4
-	add	r4, r1
-	or	r1, r0
-	mov.l	7f, r1
-	mov.l	r0, @r1
-
-	rts
-	 nop
-
-	.balign 4
-1:	.long	0xfe400008 /* SDCR0 */
-4:	.long	0xfffffbff
-6:	.long   0xfe40001c /* RTCOR */
-7:	.long   0xfe400018 /* RTCNT */
-8:	.long   0xa55a0000
-ENTRY(migor_sdram_leave_end)
diff --git a/arch/sh/boards/mach-se/7724/sdram.S b/arch/sh/boards/mach-se/7724/sdram.S
deleted file mode 100644
index 61c1fe78d71a05740f14e49735c03052192b7477..0000000000000000000000000000000000000000
--- a/arch/sh/boards/mach-se/7724/sdram.S
+++ /dev/null
@@ -1,128 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * MS7724SE sdram self/auto-refresh setup code
- *
- *  Copyright (C) 2009 Magnus Damm
- */
-
-#include <linux/sys.h>
-#include <linux/errno.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/suspend.h>
-#include <asm/romimage-macros.h>
-
-/* code to enter and leave self-refresh. must be self-contained.
- * this code will be copied to on-chip memory and executed from there.
- */
-	.balign 4
-ENTRY(ms7724se_sdram_enter_start)
-
-	/* DBSC: put memory in self-refresh mode */
-
-	ED 0xFD000010, 0x00000000 /* DBEN */
-	ED 0xFD000040, 0x00000000 /* DBRFPDN0 */
-	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
-	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
-	ED 0xFD000040, 0x00000001 /* DBRFPDN0 */
-
-	rts
-	 nop
-
-ENTRY(ms7724se_sdram_enter_end)
-
-	.balign 4
-ENTRY(ms7724se_sdram_leave_start)
-
-	/* DBSC: put memory in auto-refresh mode */
-
-	mov.l	@(SH_SLEEP_MODE, r5), r0
-	tst	#SUSP_SH_RSTANDBY, r0
-	bf	resume_rstandby
-
-	ED 0xFD000040, 0x00000000 /* DBRFPDN0 */
-	WAIT 1
-	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
-	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
-	ED 0xFD000010, 0x00000001 /* DBEN */
-	ED 0xFD000040, 0x00010000 /* DBRFPDN0 */
-
-	rts
-	 nop
-
-resume_rstandby:
-
-	/* CPG: setup clocks before restarting external memory */
-
-	ED 0xA4150024, 0x00004000 /* PLLCR */
-
-	mov.l	FRQCRA,r0
-	mov.l	@r0,r3
-	mov.l	KICK,r1
-	or	r1, r3
-	mov.l	r3, @r0
-
-	mov.l	LSTATS,r0
-	mov	#1,r1
-WAIT_LSTATS:
-	mov.l	@r0,r3
-	tst	r1,r3
-	bf	WAIT_LSTATS
-
-	/* DBSC: re-initialize and put in auto-refresh */
-
-	ED 0xFD000108, 0x00000181 /* DBPDCNT0 */
-	ED 0xFD000020, 0x015B0002 /* DBCONF */
-	ED 0xFD000030, 0x03071502 /* DBTR0 */
-	ED 0xFD000034, 0x02020102 /* DBTR1 */
-	ED 0xFD000038, 0x01090405 /* DBTR2 */
-	ED 0xFD00003C, 0x00000002 /* DBTR3 */
-	ED 0xFD000008, 0x00000005 /* DBKIND */
-	ED 0xFD000040, 0x00000001 /* DBRFPDN0 */
-	ED 0xFD000040, 0x00000000 /* DBRFPDN0 */
-	ED 0xFD000018, 0x00000001 /* DBCKECNT */
-
-	mov	#100,r0
-WAIT_400NS:
-	dt	r0
-	bf	WAIT_400NS
-
-	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
-	ED 0xFD000060, 0x00020000 /* DBMRCNT (EMR2) */
-	ED 0xFD000060, 0x00030000 /* DBMRCNT (EMR3) */
-	ED 0xFD000060, 0x00010004 /* DBMRCNT (EMR) */
-	ED 0xFD000060, 0x00000532 /* DBMRCNT (MRS) */
-	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
-	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
-	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
-	ED 0xFD000060, 0x00000432 /* DBMRCNT (MRS) */
-	ED 0xFD000060, 0x000103c0 /* DBMRCNT (EMR) */
-	ED 0xFD000060, 0x00010040 /* DBMRCNT (EMR) */
-
-	mov	#100,r0
-WAIT_400NS_2:
-	dt	r0
-	bf	WAIT_400NS_2
-
-	ED 0xFD000010, 0x00000001 /* DBEN */
-	ED 0xFD000044, 0x0000050f /* DBRFPDN1 */
-	ED 0xFD000048, 0x236800e6 /* DBRFPDN2 */
-
-	mov.l	DUMMY,r0
-	mov.l	@r0, r1 /* force single dummy read */
-
-	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
-	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
-	ED 0xFD000108, 0x00000080 /* DBPDCNT0 */
-	ED 0xFD000040, 0x00010000 /* DBRFPDN0 */
-
-	rts
-	 nop
-
-	.balign 4
-DUMMY:	.long	0xac400000
-FRQCRA:	.long	0xa4150000
-KICK:	.long	0x80000000
-LSTATS:	.long	0xa4150060
-
-ENTRY(ms7724se_sdram_leave_end)
diff --git a/arch/sh/boot/compressed/head_32.S b/arch/sh/boot/compressed/head_32.S
deleted file mode 100644
index 7bb168133dbbf8e4578ffd3bee958f235b7eb4da..0000000000000000000000000000000000000000
--- a/arch/sh/boot/compressed/head_32.S
+++ /dev/null
@@ -1,126 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  linux/arch/sh/boot/compressed/head.S
- *
- *  Copyright (C) 1999 Stuart Menefy
- *  Copyright (C) 2003 SUGIOKA Toshinobu
- */
-
-.text
-
-#include <asm/page.h>
-
-	.global	startup
-startup:
-	/* Load initial status register */
-	mov.l   init_sr, r1
-	ldc     r1, sr
-
-	/* Move myself to proper location if necessary */
-	mova	1f, r0
-	mov.l	1f, r2
-	cmp/eq	r2, r0
-	bt	clear_bss
-	sub	r0, r2
-	mov.l	bss_start_addr, r0
-	mov	#0xffffffe0, r1
-	and	r1, r0			! align cache line
-	mov.l	text_start_addr, r3
-	mov	r0, r1
-	sub	r2, r1
-3:
-	mov.l	@r1, r4
-	mov.l	@(4,r1), r5
-	mov.l	@(8,r1), r6
-	mov.l	@(12,r1), r7
-	mov.l	@(16,r1), r8
-	mov.l	@(20,r1), r9
-	mov.l	@(24,r1), r10
-	mov.l	@(28,r1), r11
-	mov.l	r4, @r0
-	mov.l	r5, @(4,r0)
-	mov.l	r6, @(8,r0)
-	mov.l	r7, @(12,r0)
-	mov.l	r8, @(16,r0)
-	mov.l	r9, @(20,r0)
-	mov.l	r10, @(24,r0)
-	mov.l	r11, @(28,r0)
-#ifdef CONFIG_CPU_SH4
-	ocbwb	@r0
-#endif
-	cmp/hi	r3, r0
-	add	#-32, r0
-	bt/s	3b
-	 add	#-32, r1
-	mov.l	2f, r0
-	jmp	@r0
-	 nop
-
-	.align 2
-1:	.long	1b
-2:	.long	clear_bss
-text_start_addr:
-	.long	startup
-
-	/* Clear BSS */
-clear_bss:
-	mov.l	end_addr, r1
-	mov.l	bss_start_addr, r2
-	mov	#0, r0
-l1:
-	mov.l	r0, @-r1
-	cmp/eq	r1,r2
-	bf	l1
-
-	/* Set the initial pointer. */
-	mov.l	init_stack_addr, r0
-	mov.l	@r0, r15
-
-	/* Decompress the kernel */
-	mov.l	decompress_kernel_addr, r0
-	jsr	@r0
-	nop
-
-	/* Jump to the start of the decompressed kernel */
-	mov.l	kernel_start_addr, r0
-	jmp	@r0
-	nop
-	
-	.align	2
-bss_start_addr:
-	.long	__bss_start
-end_addr:
-	.long	_end
-init_sr:
-	.long	0x500000F0	/* Privileged mode, Bank=0, Block=1, IMASK=0xF */
-kexec_magic:
-	.long	0x400000F0	/* magic used by kexec to parse zImage format */
-init_stack_addr:
-	.long	stack_start
-decompress_kernel_addr:
-	.long	decompress_kernel
-kernel_start_addr:
-#ifdef CONFIG_32BIT
-	.long	___pa(_text+PAGE_SIZE)
-#else
-	.long	_text+PAGE_SIZE
-#endif
-
-	.align	9
-fake_headers_as_bzImage:
-	.word	0
-	.ascii	"HdrS"		! header signature
-	.word	0x0202		! header version number (>= 0x0105)
-				! or else old loadlin-1.5 will fail)
-	.word	0		! default_switch
-	.word	0		! SETUPSEG
-	.word	0x1000
-	.word	0		! pointing to kernel version string
-	.byte	0		! = 0, old one (LILO, Loadlin,
-				! 0xTV: T=0 for LILO
-				!       V = version
-	.byte	1		! Load flags bzImage=1
-	.word	0x8000		! size to move, when setup is not
-	.long	0x100000	! 0x100000 = default for big kernel
-	.long	0		! address of loaded ramdisk image
-	.long	0		# its size in bytes
diff --git a/arch/sh/boot/compressed/head_64.S b/arch/sh/boot/compressed/head_64.S
deleted file mode 100644
index 9993113c671386c359561ea3393deb3c0cfe909f..0000000000000000000000000000000000000000
--- a/arch/sh/boot/compressed/head_64.S
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * arch/shmedia/boot/compressed/head.S
- *
- * Copied from
- *   arch/shmedia/kernel/head.S
- * which carried the copyright:
- *   Copyright (C) 2000, 2001  Paolo Alberelli
- *
- * Modification for compressed loader:
- *   Copyright (C) 2002 Stuart Menefy (stuart.menefy@st.com)
- */
-#include <asm/cache.h>
-#include <asm/tlb.h>
-#include <cpu/mmu_context.h>
-#include <cpu/registers.h>
-
-/*
- * Fixed TLB entries to identity map the beginning of RAM
- */
-#define MMUIR_TEXT_H	0x0000000000000003 | CONFIG_MEMORY_START
-			/* Enabled, Shared, ASID 0, Eff. Add. 0xA0000000 */
-#define MMUIR_TEXT_L	0x000000000000009a | CONFIG_MEMORY_START
-			/* 512 Mb, Cacheable (Write-back), execute, Not User, Ph. Add. */
-
-#define MMUDR_CACHED_H	0x0000000000000003 | CONFIG_MEMORY_START
-			/* Enabled, Shared, ASID 0, Eff. Add. 0xA0000000 */
-#define MMUDR_CACHED_L	0x000000000000015a | CONFIG_MEMORY_START
-			/* 512 Mb, Cacheable (Write-back), read/write, Not User, Ph. Add. */
-
-#define	ICCR0_INIT_VAL	ICCR0_ON | ICCR0_ICI		/* ICE + ICI */
-#define	ICCR1_INIT_VAL	ICCR1_NOLOCK			/* No locking */
-
-#define	OCCR0_INIT_VAL	OCCR0_ON | OCCR0_OCI | OCCR0_WB	/* OCE + OCI + WB */
-#define	OCCR1_INIT_VAL	OCCR1_NOLOCK			/* No locking */
-
-	.text
-
-	.global	startup
-startup:
-	/*
-	 * Prevent speculative fetch on device memory due to
-	 * uninitialized target registers.
-	 * This must be executed before the first branch.
-	 */
-	ptabs/u	r63, tr0
-	ptabs/u	r63, tr1
-	ptabs/u	r63, tr2
-	ptabs/u	r63, tr3
-	ptabs/u	r63, tr4
-	ptabs/u	r63, tr5
-	ptabs/u	r63, tr6
-	ptabs/u	r63, tr7
-	synci
-
-	/*
-	 * Set initial TLB entries for cached and uncached regions.
-	 * Note: PTA/BLINK is PIC code, PTABS/BLINK isn't !
-	 */
-	/* Clear ITLBs */
-	pta	1f, tr1
-	movi	ITLB_FIXED, r21
-	movi	ITLB_LAST_VAR_UNRESTRICTED+TLB_STEP, r22
-1:	putcfg	r21, 0, r63		/* Clear MMUIR[n].PTEH.V */
-	addi	r21, TLB_STEP, r21
-        bne	r21, r22, tr1
-
-	/* Clear DTLBs */
-	pta	1f, tr1
-	movi	DTLB_FIXED, r21
-	movi	DTLB_LAST_VAR_UNRESTRICTED+TLB_STEP, r22
-1:	putcfg	r21, 0, r63		/* Clear MMUDR[n].PTEH.V */
-	addi	r21, TLB_STEP, r21
-        bne	r21, r22, tr1
-
-	/* Map one big (512Mb) page for ITLB */
-	movi	ITLB_FIXED, r21
-	movi	MMUIR_TEXT_L, r22	/* PTEL first */
-	putcfg	r21, 1, r22		/* Set MMUIR[0].PTEL */
-	movi	MMUIR_TEXT_H, r22	/* PTEH last */
-	putcfg	r21, 0, r22		/* Set MMUIR[0].PTEH */
-
-	/* Map one big CACHED (512Mb) page for DTLB */
-	movi	DTLB_FIXED, r21
-	movi	MMUDR_CACHED_L, r22	/* PTEL first */
-	putcfg	r21, 1, r22		/* Set MMUDR[0].PTEL */
-	movi	MMUDR_CACHED_H, r22	/* PTEH last */
-	putcfg	r21, 0, r22		/* Set MMUDR[0].PTEH */
-
-	/* ICache */
-	movi	ICCR_BASE, r21
-	movi	ICCR0_INIT_VAL, r22
-	movi	ICCR1_INIT_VAL, r23
-	putcfg	r21, ICCR_REG0, r22
-	putcfg	r21, ICCR_REG1, r23
-	synci
-
-	/* OCache */
-	movi	OCCR_BASE, r21
-	movi	OCCR0_INIT_VAL, r22
-	movi	OCCR1_INIT_VAL, r23
-	putcfg	r21, OCCR_REG0, r22
-	putcfg	r21, OCCR_REG1, r23
-	synco
-
-	/*
-	 * Enable the MMU.
-	 * From here-on code can be non-PIC.
-	 */
-	movi	SR_HARMLESS | SR_ENABLE_MMU, r22
-	putcon	r22, SSR
-	movi	1f, r22
-	putcon	r22, SPC
-	synco
-	rte				/* And now go into the hyperspace ... */
-1:					/* ... that's the next instruction ! */
-
-	/* Set initial stack pointer */
-	movi	datalabel stack_start, r0
-	ld.l	r0, 0, r15
-
-	/*
-	 * Clear bss
-	 */
-	pt	1f, tr1
-	movi	datalabel __bss_start, r22
-	movi	datalabel _end, r23
-1:	st.l	r22, 0, r63
-	addi	r22, 4, r22
-	bne	r22, r23, tr1
-
-	/*
-	 * Decompress the kernel.
-	 */
-	pt	decompress_kernel, tr0
-	blink	tr0, r18
-
-	/*
-	 * Disable the MMU.
-	 */
-	movi	SR_HARMLESS, r22
-	putcon	r22, SSR
-	movi	1f, r22
-	putcon	r22, SPC
-	synco
-	rte				/* And now go into the hyperspace ... */
-1:					/* ... that's the next instruction ! */
-
-	/* Jump into the decompressed kernel */
-	movi	datalabel (CONFIG_MEMORY_START + 0x2000)+1, r19
-	ptabs	r19, tr0
-	blink	tr0, r18
-
-	/* Shouldn't return here, but just in case, loop forever */
-	pt	1f, tr0
-1:	blink	tr0, r63
diff --git a/arch/sh/boot/romimage/head.S b/arch/sh/boot/romimage/head.S
deleted file mode 100644
index 81a0652a3dff4b112acef582e11297f2b7c41438..0000000000000000000000000000000000000000
--- a/arch/sh/boot/romimage/head.S
+++ /dev/null
@@ -1,85 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  linux/arch/sh/boot/romimage/head.S
- *
- * Board specific setup code, executed before zImage loader
- */
-
-.text
-	#include <asm/page.h>
-
-	.global	romstart
-romstart:
-	/* include board specific setup code */
-#include <mach/romimage.h>
-
-#ifdef CONFIG_ROMIMAGE_MMCIF
-	/* load the romImage to above the empty zero page */
-	mov.l	empty_zero_page_dst, r4
-	mov.l	empty_zero_page_dst_adj, r5
-	add	r5, r4
-	mov.l	bytes_to_load, r5
-	mov.l	loader_function, r7
-	jsr	@r7
-	 mov	r4, r15
-
-	mov.l	empty_zero_page_dst, r4
-	mov.l	empty_zero_page_dst_adj, r5
-	add	r5, r4
-	mov.l	loaded_code_offs, r5
-	add	r5, r4
-	jmp	@r4
-	 nop
-
-	.balign 4
-empty_zero_page_dst_adj:
-	.long	PAGE_SIZE
-bytes_to_load:
-	.long	end_data - romstart
-loader_function:
-	.long	mmcif_loader
-loaded_code_offs:
-	.long	loaded_code - romstart
-loaded_code:
-#endif /* CONFIG_ROMIMAGE_MMCIF */
-
-	/* copy the empty_zero_page contents to where vmlinux expects it */
-	mova	extra_data_pos, r0
-	mov.l	extra_data_size, r1
-	add	r1, r0
-	mov.l	empty_zero_page_dst, r1
-	mov	#(PAGE_SHIFT - 4), r4
-	mov	#1, r3
-	shld	r4, r3 /* r3 = PAGE_SIZE / 16 */
-
-1:
-	mov.l	@r0, r4
-	mov.l	@(4, r0), r5
-	mov.l	@(8, r0), r6
-	mov.l	@(12, r0), r7
-	add	#16,r0
-	mov.l	r4, @r1
-	mov.l	r5, @(4, r1)
-	mov.l	r6, @(8, r1)
-	mov.l	r7, @(12, r1)
-	dt	r3
-	add	#16,r1
-	bf	1b
-
-	/* jump to the zImage entry point located after the zero page data */
-	mov	#PAGE_SHIFT, r4
-	mov	#1, r1
-	shld	r4, r1
-	mova	extra_data_pos, r0
-	add	r1, r0
-	mov.l	extra_data_size, r1
-	add	r1, r0
-	jmp	@r0
-	 nop
-
-	.align 2
-empty_zero_page_dst:
-	.long	_text
-extra_data_pos:
-extra_data_size:
-	.long	zero_page_pos - extra_data_pos
diff --git a/arch/sh/include/asm/entry-macros.S b/arch/sh/include/asm/entry-macros.S
deleted file mode 100644
index 5ce142d8c5a89e644b6ae28825a9de6fffc2e9a5..0000000000000000000000000000000000000000
--- a/arch/sh/include/asm/entry-macros.S
+++ /dev/null
@@ -1,123 +0,0 @@
-! SPDX-License-Identifier: GPL-2.0
-! entry.S macro define
-	
-	.macro	cli
-	stc	sr, r0
-	or	#0xf0, r0
-	ldc	r0, sr
-	.endm
-
-	.macro	sti
-	mov	#0xfffffff0, r11
-	extu.b	r11, r11
-	not	r11, r11
-	stc	sr, r10
-	and	r11, r10
-#ifdef CONFIG_CPU_HAS_SR_RB
-	stc	k_g_imask, r11
-	or	r11, r10
-#endif
-	ldc	r10, sr
-	.endm
-
-	.macro	get_current_thread_info, ti, tmp
-#ifdef CONFIG_CPU_HAS_SR_RB
-	stc	r7_bank, \ti
-#else
-	mov	#((THREAD_SIZE - 1) >> 10) ^ 0xff, \tmp
-	shll8	\tmp
-	shll2	\tmp
-	mov	r15, \ti
-	and	\tmp, \ti
-#endif	
-	.endm
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-
-	.macro	TRACE_IRQS_ON
-	mov.l	r0, @-r15
-	mov.l	r1, @-r15
-	mov.l	r2, @-r15
-	mov.l	r3, @-r15
-	mov.l	r4, @-r15
-	mov.l	r5, @-r15
-	mov.l	r6, @-r15
-	mov.l	r7, @-r15
-
-	mov.l   7834f, r0
-	jsr	@r0
-	 nop
-
-	mov.l	@r15+, r7
-	mov.l	@r15+, r6
-	mov.l	@r15+, r5
-	mov.l	@r15+, r4
-	mov.l	@r15+, r3
-	mov.l	@r15+, r2
-	mov.l	@r15+, r1
-	mov.l	@r15+, r0
-	mov.l	7834f, r0
-
-	bra	7835f
-	 nop
-	.balign	4
-7834:	.long	trace_hardirqs_on
-7835:
-	.endm
-	.macro	TRACE_IRQS_OFF
-
-	mov.l	r0, @-r15
-	mov.l	r1, @-r15
-	mov.l	r2, @-r15
-	mov.l	r3, @-r15
-	mov.l	r4, @-r15
-	mov.l	r5, @-r15
-	mov.l	r6, @-r15
-	mov.l	r7, @-r15
-
-	mov.l	7834f, r0
-	jsr	@r0
-	 nop
-
-	mov.l	@r15+, r7
-	mov.l	@r15+, r6
-	mov.l	@r15+, r5
-	mov.l	@r15+, r4
-	mov.l	@r15+, r3
-	mov.l	@r15+, r2
-	mov.l	@r15+, r1
-	mov.l	@r15+, r0
-	mov.l	7834f, r0
-
-	bra	7835f
-	 nop
-	.balign	4
-7834:	.long	trace_hardirqs_off
-7835:
-	.endm
-
-#else
-	.macro	TRACE_IRQS_ON
-	.endm
-
-	.macro	TRACE_IRQS_OFF
-	.endm
-#endif
-
-#if defined(CONFIG_CPU_SH2A) || defined(CONFIG_CPU_SH4)
-# define PREF(x)	pref	@x
-#else
-# define PREF(x)	nop
-#endif
-
-	/*
-	 * Macro for use within assembly. Because the DWARF unwinder
-	 * needs to use the frame register to unwind the stack, we
-	 * need to setup r14 with the value of the stack pointer as
-	 * the return address is usually on the stack somewhere.
-	 */
-	.macro	setup_frame_reg
-#ifdef CONFIG_DWARF_UNWINDER
-	mov	r15, r14
-#endif
-	.endm
diff --git a/arch/sh/kernel/cpu/sh2/entry.S b/arch/sh/kernel/cpu/sh2/entry.S
deleted file mode 100644
index 0a1c2bf216bc03dbe04ad315ae8923557df58ac4..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/cpu/sh2/entry.S
+++ /dev/null
@@ -1,373 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * arch/sh/kernel/cpu/sh2/entry.S
- *
- * The SH-2 exception entry
- *
- * Copyright (C) 2005-2008 Yoshinori Sato
- * Copyright (C) 2005  AXE,Inc.
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <cpu/mmu_context.h>
-#include <asm/unistd.h>
-#include <asm/errno.h>
-#include <asm/page.h>
-	
-/* Offsets to the stack */
-OFF_R0  =  0		/* Return value. New ABI also arg4 */
-OFF_R1  =  4     	/* New ABI: arg5 */
-OFF_R2  =  8     	/* New ABI: arg6 */
-OFF_R3  =  12     	/* New ABI: syscall_nr */
-OFF_R4  =  16     	/* New ABI: arg0 */
-OFF_R5  =  20     	/* New ABI: arg1 */
-OFF_R6  =  24     	/* New ABI: arg2 */
-OFF_R7  =  28     	/* New ABI: arg3 */
-OFF_SP	=  (15*4)
-OFF_PC  =  (16*4)
-OFF_SR	=  (16*4+2*4)
-OFF_TRA	=  (16*4+6*4)
-
-#include <asm/entry-macros.S>
-
-ENTRY(exception_handler)
-	! stack
-	! r0 <- point sp
-	! r1
-	! pc
-	! sr
-	! r0 = temporary
-	! r1 = vector (pseudo EXPEVT / INTEVT / TRA)
-	mov.l	r2,@-sp
-	mov.l	r3,@-sp
-	cli
-	mov.l	$cpu_mode,r2
-#ifdef CONFIG_SMP
-	mov.l	$cpuid,r3
-	mov.l	@r3,r3
-	mov.l	@r3,r3
-	shll2	r3
-	add	r3,r2
-#endif
-	mov.l	@r2,r0
-	mov.l	@(5*4,r15),r3	! previous SR
-	or	r0,r3		! set MD
-	tst	r0,r0
-	bf/s	1f		! previous mode check
-	 mov.l	r3,@(5*4,r15)	! update SR
-	! switch to kernel mode
-	mov.l	__md_bit,r0
-	mov.l	r0,@r2		! enter kernel mode
-	mov.l	$current_thread_info,r2
-#ifdef CONFIG_SMP
-	mov.l	$cpuid,r0
-	mov.l	@r0,r0
-	mov.l	@r0,r0
-	shll2	r0
-	add	r0,r2
-#endif
-	mov.l	@r2,r2
-	mov	#(THREAD_SIZE >> 8),r0
-	shll8	r0
-	add	r2,r0
-	mov	r15,r2		! r2 = user stack top
-	mov	r0,r15		! switch kernel stack
-	mov.l	r1,@-r15	! TRA
-	sts.l	macl, @-r15
-	sts.l	mach, @-r15
-	stc.l	gbr, @-r15
-	mov.l	@(5*4,r2),r0
-	mov.l	r0,@-r15	! original SR
-	sts.l	pr,@-r15
-	mov.l	@(4*4,r2),r0
-	mov.l	r0,@-r15	! original PC
-	mov	r2,r3
-	add	#(4+2)*4,r3	! rewind r0 - r3 + exception frame
-	mov.l	r3,@-r15	! original SP
-	mov.l	r14,@-r15
-	mov.l	r13,@-r15
-	mov.l	r12,@-r15
-	mov.l	r11,@-r15
-	mov.l	r10,@-r15
-	mov.l	r9,@-r15
-	mov.l	r8,@-r15
-	mov.l	r7,@-r15
-	mov.l	r6,@-r15
-	mov.l	r5,@-r15
-	mov.l	r4,@-r15
-	mov	r1,r9		! save TRA
-	mov	r2,r8		! copy user -> kernel stack
-	mov.l	@(0,r8),r3
-	mov.l	r3,@-r15
-	mov.l	@(4,r8),r2
-	mov.l	r2,@-r15
-	mov.l	@(12,r8),r1
-	mov.l	r1,@-r15
-	mov.l	@(8,r8),r0
-	bra	2f
-	 mov.l	r0,@-r15
-1:
-	! in kernel exception
-	mov	#(22-4-4-1)*4+4,r0
-	mov	r15,r2
-	sub	r0,r15
-	mov.l	@r2+,r0		! old R3
-	mov.l	r0,@-r15	
-	mov.l	@r2+,r0		! old R2
-	mov.l	r0,@-r15	
-	mov.l	@(4,r2),r0	! old R1
-	mov.l	r0,@-r15	
-	mov.l	@r2,r0		! old R0
-	mov.l	r0,@-r15
-	add	#8,r2
-	mov.l	@r2+,r3		! old PC
-	mov.l	@r2+,r0		! old SR
-	add	#-4,r2		! exception frame stub (sr)
-	mov.l	r1,@-r2		! TRA
-	sts.l	macl, @-r2
-	sts.l	mach, @-r2
-	stc.l	gbr, @-r2
-	mov.l	r0,@-r2		! save old SR
-	sts.l	pr,@-r2
-	mov.l	r3,@-r2		! save old PC
-	mov	r2,r0
-	add	#8*4,r0
-	mov.l	r0,@-r2		! save old SP
-	mov.l	r14,@-r2
-	mov.l	r13,@-r2
-	mov.l	r12,@-r2
-	mov.l	r11,@-r2
-	mov.l	r10,@-r2
-	mov.l	r9,@-r2
-	mov.l	r8,@-r2
-	mov.l	r7,@-r2
-	mov.l	r6,@-r2
-	mov.l	r5,@-r2
-	mov.l	r4,@-r2
-	mov	r1,r9
-	mov.l	@(OFF_R0,r15),r0
-	mov.l	@(OFF_R1,r15),r1
-	mov.l	@(OFF_R2,r15),r2
-	mov.l	@(OFF_R3,r15),r3
-2:
-	mov	#64,r8
-	cmp/hs	r8,r9
-	bt	interrupt_entry	! vec >= 64 is interrupt
-	mov	#31,r8
-	cmp/hs	r8,r9
-	bt	trap_entry	! 64 > vec >= 31  is trap
-#ifdef CONFIG_CPU_J2
-	mov	#16,r8
-	cmp/hs	r8,r9
-	bt	interrupt_entry	! 31 > vec >= 16 is interrupt
-#endif
-
-	mov.l	4f,r8
-	mov	r9,r4
-	shll2	r9
-	add	r9,r8
-	mov.l	@r8,r8		! exception handler address
-	tst	r8,r8
-	bf	3f
-	mov.l	8f,r8		! unhandled exception
-3:
-	mov.l	5f,r10
-	jmp	@r8
-	 lds	r10,pr
-
-interrupt_entry:
-	mov	r9,r4
-	mov	r15,r5
-	mov.l	6f,r9
-	mov.l	7f,r8
-	jmp	@r8
-	 lds	r9,pr
-
-	.align	2
-4:	.long	exception_handling_table
-5:	.long	ret_from_exception
-6:	.long	ret_from_irq
-7:	.long	do_IRQ
-8:	.long	exception_error
-
-trap_entry:
-	mov	#0x30,r8
-	cmp/ge	r8,r9		! vector 0x1f-0x2f is systemcall
-	bt	1f
-	mov     #0x1f,r9	! convert to unified SH2/3/4 trap number
-1:	
-	shll2	r9			! TRA
-	bra	system_call	! jump common systemcall entry
-	 mov	r9,r8
-	
-#if defined(CONFIG_SH_STANDARD_BIOS)
-	/* Unwind the stack and jmp to the debug entry */
-ENTRY(sh_bios_handler)
-	mov	r15,r0
-	add	#(22-4)*4-4,r0
-	ldc.l	@r0+,gbr
-	lds.l	@r0+,mach
-	lds.l	@r0+,macl
-	mov	r15,r0
-	mov.l	@(OFF_SP,r0),r1
-	mov	#OFF_SR,r2
-	mov.l	@(r0,r2),r3
-	mov.l	r3,@-r1
-	mov	#OFF_SP,r2
-	mov.l	@(r0,r2),r3
-	mov.l	r3,@-r1
-	mov	r15,r0
-	add	#(22-4)*4-8,r0
-	mov.l	1f,r2
-	mov.l	@r2,r2
-	stc	sr,r3
-	mov.l	r2,@r0
-	mov.l	r3,@(4,r0)
-	mov.l	r1,@(8,r0)	
-	mov.l	@r15+, r0
-	mov.l	@r15+, r1
-	mov.l	@r15+, r2
-	mov.l	@r15+, r3
-	mov.l	@r15+, r4
-	mov.l	@r15+, r5
-	mov.l	@r15+, r6
-	mov.l	@r15+, r7
-	mov.l	@r15+, r8
-	mov.l	@r15+, r9
-	mov.l	@r15+, r10
-	mov.l	@r15+, r11
-	mov.l	@r15+, r12
-	mov.l	@r15+, r13
-	mov.l	@r15+, r14
-	add	#8,r15
-	lds.l	@r15+, pr
-	mov.l	@r15+,r15
-	rte
-	 nop
-	.align	2
-1:	.long	gdb_vbr_vector
-#endif /* CONFIG_SH_STANDARD_BIOS */
-
-ENTRY(address_error_trap_handler)
-	mov	r15,r4				! regs
-	mov	#OFF_PC,r0
-	mov.l	@(r0,r15),r6			! pc
-	mov.l	1f,r0
-	jmp	@r0
-	 mov	#0,r5				! writeaccess is unknown
-
-	.align	2
-1:	.long	do_address_error
-
-restore_all:
-	stc	sr,r0
-	or	#0xf0,r0
-	ldc	r0,sr				! all interrupt block (same BL = 1)
-	! restore special register
-	! overlap exception frame
-	mov	r15,r0
-	add	#17*4,r0
-	lds.l	@r0+,pr
-	add	#4,r0
-	ldc.l	@r0+,gbr
-	lds.l	@r0+,mach
-	lds.l	@r0+,macl
-	mov	r15,r0
-	mov.l	$cpu_mode,r2
-#ifdef CONFIG_SMP
-	mov.l	$cpuid,r3
-	mov.l	@r3,r3
-	mov.l	@r3,r3
-	shll2	r3
-	add	r3,r2
-#endif
-	mov	#OFF_SR,r3
-	mov.l	@(r0,r3),r1
-	mov.l	__md_bit,r3
-	and	r1,r3				! copy MD bit
-	mov.l	r3,@r2
-	shll2	r1				! clear MD bit
-	shlr2	r1
-	mov.l	@(OFF_SP,r0),r2
-	add	#-8,r2
-	mov.l	r2,@(OFF_SP,r0)			! point exception frame top
-	mov.l	r1,@(4,r2)			! set sr
-	mov	#OFF_PC,r3
-	mov.l	@(r0,r3),r1
-	mov.l	r1,@r2				! set pc
-	get_current_thread_info r0, r1
-	mov.l	$current_thread_info,r1
-#ifdef CONFIG_SMP
-	mov.l	$cpuid,r3
-	mov.l	@r3,r3
-	mov.l	@r3,r3
-	shll2	r3
-	add	r3,r1
-#endif
-	mov.l	r0,@r1
-	mov.l	@r15+,r0
-	mov.l	@r15+,r1
-	mov.l	@r15+,r2
-	mov.l	@r15+,r3
-	mov.l	@r15+,r4
-	mov.l	@r15+,r5
-	mov.l	@r15+,r6
-	mov.l	@r15+,r7
-	mov.l	@r15+,r8
-	mov.l	@r15+,r9
-	mov.l	@r15+,r10
-	mov.l	@r15+,r11
-	mov.l	@r15+,r12
-	mov.l	@r15+,r13
-	mov.l	@r15+,r14
-	mov.l	@r15,r15
-	rte
-	 nop
-
-	.align 2
-__md_bit:
-	.long	0x40000000
-$current_thread_info:
-	.long	__current_thread_info
-$cpu_mode:	
-	.long	__cpu_mode
-#ifdef CONFIG_SMP
-$cpuid:
-	.long sh2_cpuid_addr
-#endif
-		
-! common exception handler
-#include "../../entry-common.S"
-
-#ifdef CONFIG_NR_CPUS
-#define NR_CPUS CONFIG_NR_CPUS
-#else
-#define NR_CPUS 1
-#endif
-	
-	.data
-! cpu operation mode 
-! bit30 = MD (compatible SH3/4)
-__cpu_mode:
-	.rept	NR_CPUS
-	.long	0x40000000
-	.endr
-
-#ifdef CONFIG_SMP
-.global sh2_cpuid_addr
-sh2_cpuid_addr:
-	.long	dummy_cpuid
-dummy_cpuid:
-	.long	0
-#endif
-		
-	.section	.bss
-__current_thread_info:
-	.rept	NR_CPUS
-	.long	0
-	.endr
-
-ENTRY(exception_handling_table)
-	.space	4*32
diff --git a/arch/sh/kernel/cpu/sh2/ex.S b/arch/sh/kernel/cpu/sh2/ex.S
deleted file mode 100644
index dd0cc887a3ca2c2f686a805a4d25d67f6bae4703..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/cpu/sh2/ex.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * arch/sh/kernel/cpu/sh2/ex.S
- *
- * The SH-2 exception vector table
- *
- * Copyright (C) 2005 Yoshinori Sato
- */
-
-#include <linux/linkage.h>
-
-!
-! convert Exception Vector to Exception Number
-!
-exception_entry:	
-no	=	0
-	.rept	256
-	mov.l	r1,@-sp
-	bra	exception_trampoline
-	mov	#no,r1
-no	=	no + 1
-	.endr
-exception_trampoline:
-	mov.l	r0,@-sp
-	mov.l	$exception_handler,r0
-	extu.b	r1,r1
-	jmp	@r0
-	  extu.w	r1,r1
-
-	.align	2
-$exception_entry:
-	.long	exception_entry
-$exception_handler:
-	.long	exception_handler
-!
-! Exception Vector Base
-!
-	.align	2
-ENTRY(vbr_base)
-vector	=	0
-	.rept	256
-	.long	exception_entry + vector * 6
-vector	=	vector + 1
-	.endr
diff --git a/arch/sh/kernel/cpu/sh2a/entry.S b/arch/sh/kernel/cpu/sh2a/entry.S
deleted file mode 100644
index 9f11fc8b505251a79bb5cc238faf328609e45ff3..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/cpu/sh2a/entry.S
+++ /dev/null
@@ -1,247 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * arch/sh/kernel/cpu/sh2a/entry.S
- *
- * The SH-2A exception entry
- *
- * Copyright (C) 2008 Yoshinori Sato
- * Based on arch/sh/kernel/cpu/sh2/entry.S
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <cpu/mmu_context.h>
-#include <asm/unistd.h>
-#include <asm/errno.h>
-#include <asm/page.h>
-	
-/* Offsets to the stack */
-OFF_R0  =  0		/* Return value. New ABI also arg4 */
-OFF_R1  =  4     	/* New ABI: arg5 */
-OFF_R2  =  8     	/* New ABI: arg6 */
-OFF_R3  =  12     	/* New ABI: syscall_nr */
-OFF_R4  =  16     	/* New ABI: arg0 */
-OFF_R5  =  20     	/* New ABI: arg1 */
-OFF_R6  =  24     	/* New ABI: arg2 */
-OFF_R7  =  28     	/* New ABI: arg3 */
-OFF_SP	=  (15*4)
-OFF_PC  =  (16*4)
-OFF_SR	=  (16*4+2*4)
-OFF_TRA	=  (16*4+6*4)
-
-#include <asm/entry-macros.S>
-
-ENTRY(exception_handler)
-	! stack
-	! r0 <- point sp
-	! r1
-	! pc
-	! sr
-	! r0 = temporary
-	! r1 = vector (pseudo EXPEVT / INTEVT / TRA)
-	mov.l	r2,@-sp
-	cli
-	mov.l	$cpu_mode,r2
-	bld.b	#6,@(0,r2)	!previus SR.MD
-	bst.b	#6,@(4*4,r15)	!set cpu mode to SR.MD
-	bt	1f
-	! switch to kernel mode
-	bset.b	#6,@(0,r2)	!set SR.MD
-	mov.l	$current_thread_info,r2
-	mov.l	@r2,r2
-	mov	#(THREAD_SIZE >> 8),r0
-	shll8	r0
-	add	r2,r0		! r0 = kernel stack tail
-	mov	r15,r2		! r2 = user stack top
-	mov	r0,r15		! switch kernel stack
-	mov.l	r1,@-r15	! TRA
-	sts.l	macl, @-r15
-	sts.l	mach, @-r15
-	stc.l	gbr, @-r15
-	mov.l	@(4*4,r2),r0
-	mov.l	r0,@-r15	! original SR
-	sts.l	pr,@-r15
-	mov.l	@(3*4,r2),r0
-	mov.l	r0,@-r15	! original PC
-	mov	r2,r0
-	add	#(3+2)*4,r0	! rewind r0 - r3 + exception frame
-	lds	r0,pr		! pr = original SP
-	movmu.l	r3,@-r15	! save regs
-	mov	r2,r8		! r8 =  previus stack top
-	mov	r1,r9		! r9 = interrupt vector
-	! restore previous stack
-	mov.l	@r8+,r2
-	mov.l	@r8+,r0
-	mov.l	@r8+,r1
-	bra	2f
-	 movml.l r2,@-r15
-1:
-	! in kernel exception
-	mov	r15,r2
-	add	#-((OFF_TRA + 4) - OFF_PC) + 5*4,r15
-	movmu.l	r3,@-r15
-	mov	r2,r8		! r8 = previous stack top
-	mov	r1,r9		! r9 = interrupt vector
-	! restore exception frame & regs
-	mov.l	@r8+,r2		! old R2
-	mov.l	@r8+,r0		! old R0
-	mov.l	@r8+,r1		! old R1
-	mov.l	@r8+,r10	! old PC
-	mov.l	@r8+,r11	! old SR
-	movml.l	r2,@-r15
-	mov.l	r10,@(OFF_PC,r15)
-	mov.l	r11,@(OFF_SR,r15)
-	mov.l	r8,@(OFF_SP,r15)	! save old sp
-	mov	r15,r8
-	add	#OFF_TRA + 4,r8
-	mov.l	r9,@-r8
-	sts.l	macl,@-r8
-	sts.l	mach,@-r8
-	stc.l	gbr,@-r8
-	add	#-4,r8
-	sts.l	pr,@-r8
-2:
-	! dispatch exception / interrupt
-	mov	#64,r8
-	cmp/hs	r8,r9
-	bt	interrupt_entry	! vec >= 64 is interrupt
-	mov	#31,r8
-	cmp/hs	r8,r9
-	bt	trap_entry	! 64 > vec >= 31  is trap
-
-	mov.l	4f,r8
-	mov	r9,r4
-	shll2	r9
-	add	r9,r8
-	mov.l	@r8,r8		! exception handler address
-	tst	r8,r8
-	bf	3f
-	mov.l	8f,r8		! unhandled exception
-3:
-	mov.l	5f,r10
-	jmp	@r8
-	 lds	r10,pr
-
-interrupt_entry:
-	mov	r9,r4
-	mov	r15,r5
-	mov.l	7f,r8
-	mov.l	6f,r9
-	jmp	@r8
-	 lds	r9,pr
-
-	.align	2
-4:	.long	exception_handling_table
-5:	.long	ret_from_exception
-6:	.long	ret_from_irq
-7:	.long	do_IRQ
-8:	.long	exception_error
-
-trap_entry:
-	mov	#0x30,r8
-	cmp/ge	r8,r9		! vector 0x1f-0x2f is systemcall
-	bt	1f
-	mov     #0x1f,r9	! convert to unified SH2/3/4 trap number
-1:	
-	shll2	r9			! TRA
-	bra	system_call	! jump common systemcall entry
-	 mov	r9,r8
-	
-#if defined(CONFIG_SH_STANDARD_BIOS)
-	/* Unwind the stack and jmp to the debug entry */
-ENTRY(sh_bios_handler)
-	mov	r15,r0
-	add	#(22-4)*4-4,r0
-	ldc.l	@r0+,gbr
-	lds.l	@r0+,mach
-	lds.l	@r0+,macl
-	mov	r15,r0
-	mov.l	@(OFF_SP,r0),r1
-	mov.l	@(OFF_SR,r2),r3
-	mov.l	r3,@-r1
-	mov.l	@(OFF_SP,r2),r3
-	mov.l	r3,@-r1
-	mov	r15,r0
-	add	#(22-4)*4-8,r0
-	mov.l	1f,r2
-	mov.l	@r2,r2
-	stc	sr,r3
-	mov.l	r2,@r0
-	mov.l	r3,@(4,r0)
-	mov.l	r1,@(8,r0)
-	movml.l	@r15+,r14
-	add	#8,r15
-	lds.l	@r15+, pr
-	mov.l	@r15+,r15
-	rte
-	 nop
-	.align	2
-1:	.long	gdb_vbr_vector
-#endif /* CONFIG_SH_STANDARD_BIOS */
-
-ENTRY(address_error_trap_handler)
-	mov	r15,r4				! regs
-	mov.l	@(OFF_PC,r15),r6		! pc
-	mov.l	1f,r0
-	jmp	@r0
-	 mov	#0,r5				! writeaccess is unknown
-
-	.align	2
-1:	.long	do_address_error
-
-restore_all:
-	stc	sr,r0
-	or	#0xf0,r0
-	ldc	r0,sr				! all interrupt block (same BL = 1)
-	! restore special register
-	! overlap exception frame
-	mov	r15,r0
-	add	#17*4,r0
-	lds.l	@r0+,pr
-	add	#4,r0
-	ldc.l	@r0+,gbr
-	lds.l	@r0+,mach
-	lds.l	@r0+,macl
-	mov	r15,r0
-	mov.l	$cpu_mode,r2
-	bld.b	#6,@(OFF_SR,r15)
-	bst.b	#6,@(0,r2)			! save CPU mode
-	mov.l	@(OFF_SR,r0),r1
-	shll2	r1
-	shlr2	r1				! clear MD bit
-	mov.l	@(OFF_SP,r0),r2
-	add	#-8,r2
-	mov.l	r2,@(OFF_SP,r0)			! point exception frame top
-	mov.l	r1,@(4,r2)			! set sr
-	mov.l	@(OFF_PC,r0),r1
-	mov.l	r1,@r2				! set pc
-	get_current_thread_info r0, r1
-	mov.l	$current_thread_info,r1
-	mov.l	r0,@r1
-	movml.l	@r15+,r14
-	mov.l	@r15,r15
-	rte
-	 nop
-
-	.align 2
-$current_thread_info:
-	.long	__current_thread_info
-$cpu_mode:	
-	.long	__cpu_mode
-		
-! common exception handler
-#include "../../entry-common.S"
-	
-	.data
-! cpu operation mode 
-! bit30 = MD (compatible SH3/4)
-__cpu_mode:
-	.long	0x40000000
-		
-	.section	.bss
-__current_thread_info:
-	.long	0
-
-ENTRY(exception_handling_table)
-	.space	4*32
diff --git a/arch/sh/kernel/cpu/sh2a/ex.S b/arch/sh/kernel/cpu/sh2a/ex.S
deleted file mode 100644
index ed91996287c7d9e5c0c0030cc803dc67653e37f9..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/cpu/sh2a/ex.S
+++ /dev/null
@@ -1,70 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * arch/sh/kernel/cpu/sh2a/ex.S
- *
- * The SH-2A exception vector table
- *
- * Copyright (C) 2008 Yoshinori Sato
- */
-
-#include <linux/linkage.h>
-
-!
-! convert Exception Vector to Exception Number
-!
-
-! exception no 0 to 255
-exception_entry0:
-no	=	0
-	.rept	256
-	mov.l	r1,@-sp
-	bra	exception_trampoline0
-	mov	#no,r1
-no	=	no + 1
-	.endr
-exception_trampoline0:
-	mov.l	r0,@-sp
-	mov.l	1f,r0
-	extu.b	r1,r1
-	jmp	@r0
-	  extu.w	r1,r1
-	 
-	.align	2
-1:	.long	exception_handler
-
-! exception no 256 to 511
-exception_entry1:
-no	=	0
-	.rept	256
-	mov.l	r1,@-sp
-	bra	exception_trampoline1
-	mov	#no,r1
-no	=	no + 1
-	.endr
-exception_trampoline1:
-	mov.l	r0,@-sp
-	extu.b	r1,r1
-	movi20	#0x100,r0
-	add	r0,r1
-	mov.l	1f,r0
-	jmp	@r0
-	  extu.w	r1,r1
-	
-	.align	2
-1:	.long	exception_handler
-
-	!
-! Exception Vector Base
-!
-	.align	2
-ENTRY(vbr_base)
-vector	=	0
-	.rept	256
-	.long	exception_entry0 + vector * 6
-vector	=	vector + 1
-	.endr
-vector	=	0
-	.rept	256
-	.long	exception_entry1 + vector * 6
-vector	=	vector + 1
-	.endr
diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S
deleted file mode 100644
index 25eb8090541604cadc83bb8db671ee13219c5778..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/cpu/sh3/entry.S
+++ /dev/null
@@ -1,510 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * arch/sh/kernel/cpu/sh3/entry.S
- *
- *  Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- *  Copyright (C) 2003 - 2012  Paul Mundt
- */
-#include <linux/sys.h>
-#include <linux/errno.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/unistd.h>
-#include <cpu/mmu_context.h>
-#include <asm/page.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-
-! NOTE:
-! GNU as (as of 2.9.1) changes bf/s into bt/s and bra, when the address
-! to be jumped is too far, but it causes illegal slot exception.
-
-/*	
- * entry.S contains the system-call and fault low-level handling routines.
- * This also contains the timer-interrupt handler, as well as all interrupts
- * and faults that can result in a task-switch.
- *
- * NOTE: This code handles signal-recognition, which happens every time
- * after a timer-interrupt and after each system call.
- *
- * NOTE: This code uses a convention that instructions in the delay slot
- * of a transfer-control instruction are indented by an extra space, thus:
- *
- *    jmp	@k0	    ! control-transfer instruction
- *     ldc	k1, ssr     ! delay slot
- *
- * Stack layout in 'ret_from_syscall':
- * 	ptrace needs to have all regs on the stack.
- *	if the order here is changed, it needs to be
- *	updated in ptrace.c and ptrace.h
- *
- *	r0
- *      ...
- *	r15 = stack pointer
- *	spc
- *	pr
- *	ssr
- *	gbr
- *	mach
- *	macl
- *	syscall #
- *
- */
-/* Offsets to the stack */
-OFF_R0  =  0		/* Return value. New ABI also arg4 */
-OFF_R1  =  4     	/* New ABI: arg5 */
-OFF_R2  =  8     	/* New ABI: arg6 */
-OFF_R3  =  12     	/* New ABI: syscall_nr */
-OFF_R4  =  16     	/* New ABI: arg0 */
-OFF_R5  =  20     	/* New ABI: arg1 */
-OFF_R6  =  24     	/* New ABI: arg2 */
-OFF_R7  =  28     	/* New ABI: arg3 */
-OFF_SP	=  (15*4)
-OFF_PC  =  (16*4)
-OFF_SR	=  (16*4+8)
-OFF_TRA	=  (16*4+6*4)
-
-#define k0	r0
-#define k1	r1
-#define k2	r2
-#define k3	r3
-#define k4	r4
-
-#define g_imask		r6	/* r6_bank1 */
-#define k_g_imask	r6_bank	/* r6_bank1 */
-#define current		r7	/* r7_bank1 */
-
-#include <asm/entry-macros.S>
-	
-/*
- * Kernel mode register usage:
- *	k0	scratch
- *	k1	scratch
- *	k2	scratch (Exception code)
- *	k3	scratch (Return address)
- *	k4	scratch
- *	k5	reserved
- *	k6	Global Interrupt Mask (0--15 << 4)
- *	k7	CURRENT_THREAD_INFO (pointer to current thread info)
- */
-
-!
-! TLB Miss / Initial Page write exception handling
-!			_and_
-! TLB hits, but the access violate the protection.
-! It can be valid access, such as stack grow and/or C-O-W.
-!
-!
-! Find the pmd/pte entry and loadtlb
-! If it's not found, cause address error (SEGV)
-!
-! Although this could be written in assembly language (and it'd be faster),
-! this first version depends *much* on C implementation.
-!
-
-#if defined(CONFIG_MMU)
-	.align	2
-ENTRY(tlb_miss_load)
-	bra	call_handle_tlbmiss
-	 mov	#0, r5
-
-	.align	2
-ENTRY(tlb_miss_store)
-	bra	call_handle_tlbmiss
-	 mov	#FAULT_CODE_WRITE, r5
-
-	.align	2
-ENTRY(initial_page_write)
-	bra	call_handle_tlbmiss
-	 mov	#FAULT_CODE_INITIAL, r5
-
-	.align	2
-ENTRY(tlb_protection_violation_load)
-	bra	call_do_page_fault
-	 mov	#FAULT_CODE_PROT, r5
-
-	.align	2
-ENTRY(tlb_protection_violation_store)
-	bra	call_do_page_fault
-	 mov	#(FAULT_CODE_PROT | FAULT_CODE_WRITE), r5
-
-call_handle_tlbmiss:
-	mov.l	1f, r0
-	mov	r5, r8
-	mov.l	@r0, r6
-	mov.l	2f, r0
-	sts	pr, r10
-	jsr	@r0
-	 mov	r15, r4
-	!
-	tst	r0, r0
-	bf/s	0f
-	 lds	r10, pr
-	rts
-	 nop
-0:
-	mov	r8, r5
-call_do_page_fault:
-	mov.l	1f, r0
-	mov.l	@r0, r6
-
-	mov.l	3f, r0
-	mov.l	4f, r1
-	mov	r15, r4
-	jmp	@r0
-	 lds	r1, pr
-
-	.align 2
-1:	.long	MMU_TEA
-2:	.long	handle_tlbmiss
-3:	.long	do_page_fault
-4:	.long	ret_from_exception
-
-	.align	2
-ENTRY(address_error_load)
-	bra	call_dae
-	 mov	#0,r5		! writeaccess = 0
-
-	.align	2
-ENTRY(address_error_store)
-	bra	call_dae
-	 mov	#1,r5		! writeaccess = 1
-
-	.align	2
-call_dae:
-	mov.l	1f, r0
-	mov.l	@r0, r6		! address
-	mov.l	2f, r0
-	jmp	@r0
-	 mov	r15, r4		! regs
-
-	.align 2
-1:	.long	MMU_TEA
-2:	.long   do_address_error
-#endif /* CONFIG_MMU */
-
-#if defined(CONFIG_SH_STANDARD_BIOS)
-	/* Unwind the stack and jmp to the debug entry */
-ENTRY(sh_bios_handler)
-	mov.l	1f, r8
-	bsr	restore_regs
-	 nop
-
-	lds	k2, pr			! restore pr
-	mov	k4, r15
-	!
-	mov.l	2f, k0
-	mov.l	@k0, k0
-	jmp	@k0
-	 ldc	k3, ssr
-	.align	2
-1:	.long	0x300000f0
-2:	.long	gdb_vbr_vector
-#endif /* CONFIG_SH_STANDARD_BIOS */
-
-! restore_regs()
-! - restore r0, r1, r2, r3, r4, r5, r6, r7 from the stack
-! - switch bank
-! - restore r8, r9, r10, r11, r12, r13, r14, r15 from the stack
-! - restore spc, pr*, ssr, gbr, mach, macl, skip default tra
-! k2 returns original pr
-! k3 returns original sr
-! k4 returns original stack pointer
-! r8 passes SR bitmask, overwritten with restored data on return
-! r9 trashed
-! BL=0 on entry, on exit BL=1 (depending on r8).
-
-ENTRY(restore_regs)
-	mov.l	@r15+, r0
-	mov.l	@r15+, r1
-	mov.l	@r15+, r2
-	mov.l	@r15+, r3
-	mov.l	@r15+, r4
-	mov.l	@r15+, r5
-	mov.l	@r15+, r6
-	mov.l	@r15+, r7
-	!
-	stc	sr, r9
-	or	r8, r9
-	ldc	r9, sr
-	!
-	mov.l	@r15+, r8
-	mov.l	@r15+, r9
-	mov.l	@r15+, r10
-	mov.l	@r15+, r11
-	mov.l	@r15+, r12
-	mov.l	@r15+, r13
-	mov.l	@r15+, r14
-	mov.l	@r15+, k4		! original stack pointer
-	ldc.l	@r15+, spc
-	mov.l	@r15+, k2		! original PR
-	mov.l	@r15+, k3		! original SR
-	ldc.l	@r15+, gbr
-	lds.l	@r15+, mach
-	lds.l	@r15+, macl
-	rts
-	 add	#4, r15			! Skip syscall number
-
-restore_all:
-	mov.l	7f, r8
-	bsr	restore_regs
-	 nop
-
-	lds	k2, pr			! restore pr
-	!
-	! Calculate new SR value
-	mov	k3, k2			! original SR value
-	mov	#0xfffffff0, k1
-	extu.b	k1, k1
-	not	k1, k1
-	and	k1, k2			! Mask original SR value
-	!
-	mov	k3, k0			! Calculate IMASK-bits
-	shlr2	k0
-	and	#0x3c, k0
-	cmp/eq	#0x3c, k0
-	bt/s	6f
-	 shll2	k0
-	mov	g_imask, k0
-	!
-6:	or	k0, k2			! Set the IMASK-bits
-	ldc	k2, ssr
-	!
-	mov	k4, r15
-	rte
-	 nop
-
-	.align	2
-5:	.long	0x00001000	! DSP
-7:	.long	0x30000000
-
-! common exception handler
-#include "../../entry-common.S"
-	
-! Exception Vector Base
-!
-!	Should be aligned page boundary.
-!
-	.balign 	4096,0,4096
-ENTRY(vbr_base)
-	.long	0
-!
-! 0x100: General exception vector
-!
-	.balign 	256,0,256
-general_exception:
-	bra	handle_exception
-	 sts	pr, k3		! save original pr value in k3
-
-! prepare_stack()
-! - roll back gRB
-! - switch to kernel stack
-! k0 returns original sp (after roll back)
-! k1 trashed
-! k2 trashed
-
-prepare_stack:
-#ifdef CONFIG_GUSA
-	! Check for roll back gRB (User and Kernel)
-	mov	r15, k0
-	shll	k0
-	bf/s	1f
-	 shll	k0
-	bf/s	1f
-	 stc	spc, k1
-	stc	r0_bank, k0
-	cmp/hs	k0, k1		! test k1 (saved PC) >= k0 (saved r0)
-	bt/s	2f
-	 stc	r1_bank, k1
-
-	add	#-2, k0
-	add	r15, k0
-	ldc	k0, spc		! PC = saved r0 + r15 - 2
-2:	mov	k1, r15		! SP = r1
-1:
-#endif
-	! Switch to kernel stack if needed
-	stc	ssr, k0		! Is it from kernel space?
-	shll	k0		! Check MD bit (bit30) by shifting it into...
-	shll	k0		!       ...the T bit
-	bt/s	1f		! It's a kernel to kernel transition.
-	 mov	r15, k0		! save original stack to k0
-	/* User space to kernel */
-	mov	#(THREAD_SIZE >> 10), k1
-	shll8	k1		! k1 := THREAD_SIZE
-	shll2	k1
-	add	current, k1
-	mov	k1, r15		! change to kernel stack
-	!
-1:
-	rts
-	 nop
-
-!
-! 0x400: Instruction and Data TLB miss exception vector
-!
-	.balign 	1024,0,1024
-tlb_miss:
-	sts	pr, k3		! save original pr value in k3
-
-handle_exception:
-	mova	exception_data, k0
-
-	! Setup stack and save DSP context (k0 contains original r15 on return)
-	bsr	prepare_stack
-	 PREF(k0)
-
-	! Save registers / Switch to bank 0
-	mov.l	5f, k2		! vector register address
-	mov.l	1f, k4		! SR bits to clear in k4
-	bsr	save_regs	! needs original pr value in k3
-	 mov.l	@k2, k2		! read out vector and keep in k2
-
-handle_exception_special:
-	setup_frame_reg
-
-	! Setup return address and jump to exception handler
-	mov.l	7f, r9		! fetch return address
-	stc	r2_bank, r0	! k2 (vector)
-	mov.l	6f, r10
-	shlr2	r0
-	shlr	r0
-	mov.l	@(r0, r10), r10
-	jmp	@r10
-	 lds	r9, pr		! put return address in pr
-
-	.align	L1_CACHE_SHIFT
-
-! save_regs()
-! - save default tra, macl, mach, gbr, ssr, pr* and spc on the stack
-! - save r15*, r14, r13, r12, r11, r10, r9, r8 on the stack
-! - switch bank
-! - save r7, r6, r5, r4, r3, r2, r1, r0 on the stack
-! k0 contains original stack pointer*
-! k1 trashed
-! k3 passes original pr*
-! k4 passes SR bitmask
-! BL=1 on entry, on exit BL=0.
-
-ENTRY(save_regs)
-	mov	#-1, r1
-	mov.l	k1, @-r15	! set TRA (default: -1)
-	sts.l	macl, @-r15
-	sts.l	mach, @-r15
-	stc.l	gbr, @-r15
-	stc.l	ssr, @-r15
-	mov.l	k3, @-r15	! original pr in k3
-	stc.l	spc, @-r15
-
-	mov.l	k0, @-r15	! original stack pointer in k0
-	mov.l	r14, @-r15
-	mov.l	r13, @-r15
-	mov.l	r12, @-r15
-	mov.l	r11, @-r15
-	mov.l	r10, @-r15
-	mov.l	r9, @-r15
-	mov.l	r8, @-r15
-
-	mov.l	0f, k3		! SR bits to set in k3
-
-	! fall-through
-
-! save_low_regs()
-! - modify SR for bank switch
-! - save r7, r6, r5, r4, r3, r2, r1, r0 on the stack
-! k3 passes bits to set in SR
-! k4 passes bits to clear in SR
-
-ENTRY(save_low_regs)
-	stc	sr, r8
-	or	k3, r8
-	and	k4, r8
-	ldc	r8, sr
-
-	mov.l	r7, @-r15
-	mov.l	r6, @-r15
-	mov.l	r5, @-r15
-	mov.l	r4, @-r15
-	mov.l	r3, @-r15
-	mov.l	r2, @-r15
-	mov.l	r1, @-r15
-	rts
-	 mov.l	r0, @-r15
-
-!
-! 0x600: Interrupt / NMI vector
-!
-	.balign 	512,0,512
-ENTRY(handle_interrupt)
-	sts	pr, k3		! save original pr value in k3
-	mova	exception_data, k0
-
-	! Setup stack and save DSP context (k0 contains original r15 on return)
-	bsr	prepare_stack
-	 PREF(k0)
-
-	! Save registers / Switch to bank 0
-	mov.l	1f, k4		! SR bits to clear in k4
-	bsr	save_regs	! needs original pr value in k3
-	 mov	#-1, k2		! default vector kept in k2
-
-	setup_frame_reg
-
-	stc	sr, r0	! get status register
-	shlr2	r0
-	and	#0x3c, r0
-	cmp/eq	#0x3c, r0
-	bf	9f
-	TRACE_IRQS_OFF
-9:
-
-	! Setup return address and jump to do_IRQ
-	mov.l	4f, r9		! fetch return address
-	lds	r9, pr		! put return address in pr
-	mov.l	2f, r4
-	mov.l	3f, r9
-	mov.l	@r4, r4		! pass INTEVT vector as arg0
-
-	shlr2	r4
-	shlr	r4
-	mov	r4, r0		! save vector->jmp table offset for later
-
-	shlr2	r4		! vector to IRQ# conversion
-	add	#-0x10, r4
-
-	cmp/pz	r4		! is it a valid IRQ?
-	bt	10f
-
-	/*
-	 * We got here as a result of taking the INTEVT path for something
-	 * that isn't a valid hard IRQ, therefore we bypass the do_IRQ()
-	 * path and special case the event dispatch instead.  This is the
-	 * expected path for the NMI (and any other brilliantly implemented
-	 * exception), which effectively wants regular exception dispatch
-	 * but is unfortunately reported through INTEVT rather than
-	 * EXPEVT.  Grr.
-	 */
-	mov.l	6f, r9
-	mov.l	@(r0, r9), r9
-	jmp	@r9
-	 mov	r15, r8		! trap handlers take saved regs in r8
-
-10:
-	jmp	@r9		! Off to do_IRQ() we go.
-	 mov	r15, r5		! pass saved registers as arg1
-
-ENTRY(exception_none)
-	rts
-	 nop
-
-	.align	L1_CACHE_SHIFT
-exception_data:
-0:	.long	0x000080f0	! FD=1, IMASK=15
-1:	.long	0xcfffffff	! RB=0, BL=0
-2:	.long	INTEVT
-3:	.long	do_IRQ
-4:	.long	ret_from_irq
-5:	.long	EXPEVT
-6:	.long	exception_handling_table
-7:	.long	ret_from_exception
diff --git a/arch/sh/kernel/cpu/sh3/ex.S b/arch/sh/kernel/cpu/sh3/ex.S
deleted file mode 100644
index ee2113f4215cb3ed3a9c8bbb506da0fc554e3ad2..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/cpu/sh3/ex.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- *  arch/sh/kernel/cpu/sh3/ex.S
- *
- *  The SH-3 and SH-4 exception vector table.
- *
- *  Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- *  Copyright (C) 2003 - 2008  Paul Mundt
- */
-#include <linux/linkage.h>
-
-#if !defined(CONFIG_MMU)
-#define	tlb_miss_load			exception_error
-#define tlb_miss_store			exception_error
-#define initial_page_write		exception_error
-#define tlb_protection_violation_load	exception_error
-#define tlb_protection_violation_store	exception_error
-#define address_error_load		exception_error
-#define address_error_store		exception_error
-#endif
-
-#if !defined(CONFIG_SH_FPU)
-#define	fpu_error_trap_handler		exception_error
-#endif
-
-#if !defined(CONFIG_KGDB)
-#define kgdb_handle_exception		exception_error
-#endif
-
-	.align 2
-	.data
-
-ENTRY(exception_handling_table)
-	.long	exception_error		/* 000 */
-	.long	exception_error
-	.long	tlb_miss_load		/* 040 */
-	.long	tlb_miss_store
-	.long	initial_page_write
-	.long	tlb_protection_violation_load
-	.long	tlb_protection_violation_store
-	.long	address_error_load
-	.long	address_error_store	/* 100 */
-	.long	fpu_error_trap_handler	/* 120 */
-	.long	exception_error		/* 140 */
-	.long	system_call	! Unconditional Trap	 /* 160 */
-	.long	exception_error	! reserved_instruction (filled by trap_init) /* 180 */
-	.long	exception_error	! illegal_slot_instruction (filled by trap_init) /*1A0*/
-	.long	nmi_trap_handler	/* 1C0 */	! Allow trap to debugger
-	.long	breakpoint_trap_handler	/* 1E0 */
-
-	/*
-	 * Pad the remainder of the table out, exceptions residing in far
-	 * away offsets can be manually inserted in to their appropriate
-	 * location via set_exception_table_{evt,vec}().
-	 */
-	.balign	4096,0,4096
diff --git a/arch/sh/kernel/cpu/sh3/swsusp.S b/arch/sh/kernel/cpu/sh3/swsusp.S
deleted file mode 100644
index dc111c4ccf219982821b307ea4e4b7cc24282b25..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/cpu/sh3/swsusp.S
+++ /dev/null
@@ -1,144 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * arch/sh/kernel/cpu/sh3/swsusp.S
- *
- * Copyright (C) 2009 Magnus Damm
- */
-#include <linux/sys.h>
-#include <linux/errno.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/page.h>
-
-#define k0	r0
-#define k1	r1
-#define k2	r2
-#define k3	r3
-#define k4	r4
-
-! swsusp_arch_resume()
-! - copy restore_pblist pages
-! - restore registers from swsusp_arch_regs_cpu0
-
-ENTRY(swsusp_arch_resume)
-	mov.l	1f, r15
-	mov.l	2f, r4
-	mov.l	@r4, r4
-
-swsusp_copy_loop:
-	mov	r4, r0
-	cmp/eq	#0, r0
-	bt	swsusp_restore_regs
-
-	mov.l	@(PBE_ADDRESS, r4), r2
-	mov.l	@(PBE_ORIG_ADDRESS, r4), r5
-
-	mov	#(PAGE_SIZE >> 10), r3
-	shll8	r3
-	shlr2	r3 /* PAGE_SIZE / 16 */
-swsusp_copy_page:
-	dt	r3
-	mov.l	@r2+,r1   /*  16n+0 */
-	mov.l	r1,@r5
-	add	#4,r5
-	mov.l	@r2+,r1	  /*  16n+4 */
-	mov.l	r1,@r5
-	add	#4,r5
-	mov.l	@r2+,r1   /*  16n+8 */
-	mov.l	r1,@r5
-	add	#4,r5
-	mov.l	@r2+,r1   /*  16n+12 */
-	mov.l	r1,@r5
-	bf/s	swsusp_copy_page
-	 add	#4,r5
-
-	bra	swsusp_copy_loop
-	 mov.l	@(PBE_NEXT, r4), r4
-
-swsusp_restore_regs:
-	! BL=0: R7->R0 is bank0
-	mov.l	3f, r8
-	mov.l	4f, r5
-	jsr	@r5
-	 nop
-
-	! BL=1: R7->R0 is bank1
-	lds	k2, pr
-	ldc	k3, ssr
-
-	mov.l	@r15+, r0
-	mov.l	@r15+, r1
-	mov.l	@r15+, r2
-	mov.l	@r15+, r3
-	mov.l	@r15+, r4
-	mov.l	@r15+, r5
-	mov.l	@r15+, r6
-	mov.l	@r15+, r7
-
-	rte
-	 nop
-	! BL=0: R7->R0 is bank0
-
-	.align	2
-1:	.long	swsusp_arch_regs_cpu0
-2:	.long	restore_pblist
-3:	.long	0x20000000 ! RB=1
-4:	.long	restore_regs
-
-! swsusp_arch_suspend()
-! - prepare pc for resume, return from function without swsusp_save on resume
-! - save registers in swsusp_arch_regs_cpu0
-! - call swsusp_save write suspend image
-
-ENTRY(swsusp_arch_suspend)
-	sts	pr, r0		! save pr in r0
-	mov	r15, r2		! save sp in r2
-	mov	r8, r5		! save r8 in r5
-	stc	sr, r1
-	ldc	r1, ssr		! save sr in ssr
-	mov.l	1f, r1
-	ldc	r1, spc		! setup pc value for resuming
-	mov.l	5f, r15		! use swsusp_arch_regs_cpu0 as stack
-	mov.l	6f, r3
-	add	r3, r15		! save from top of structure
-
-	! BL=0: R7->R0 is bank0
-	mov.l	2f, r3		! get new SR value for bank1
-	mov	#0, r4
-	mov.l	7f, r1
-	jsr	@r1		! switch to bank1 and save bank1 r7->r0
-	 not	r4, r4
-
-	! BL=1: R7->R0 is bank1
-	stc	r2_bank, k0	! fetch old sp from r2_bank0
-	mov.l	3f, k4		! SR bits to clear in k4
-	mov.l	8f, k1
-	jsr	@k1		! switch to bank0 and save all regs
-	 stc	r0_bank, k3	! fetch old pr from r0_bank0
-
-	! BL=0: R7->R0 is bank0
-	mov	r2, r15		! restore old sp
-	mov	r5, r8		! restore old r8
-	stc	ssr, r1
-	ldc	r1, sr		! restore old sr
-	lds	r0, pr		! restore old pr
-	mov.l	4f, r0
-	jmp	@r0
-	 nop
-
-swsusp_call_save:
-	mov	r2, r15		! restore old sp
-	mov	r5, r8		! restore old r8
-	lds	r0, pr		! restore old pr
-	rts
-	 mov	#0, r0
-
-	.align	2
-1:	.long	swsusp_call_save
-2:	.long	0x20000000 ! RB=1
-3:	.long	0xdfffffff ! RB=0
-4:	.long	swsusp_save
-5:	.long	swsusp_arch_regs_cpu0
-6:	.long	SWSUSP_ARCH_REGS_SIZE
-7:	.long	save_low_regs
-8:	.long	save_regs
diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S
deleted file mode 100644
index de68ffdfffbf535260132ed383b8f6b1d115f45c..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/cpu/sh5/entry.S
+++ /dev/null
@@ -1,2000 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * arch/sh/kernel/cpu/sh5/entry.S
- *
- * Copyright (C) 2000, 2001  Paolo Alberelli
- * Copyright (C) 2004 - 2008  Paul Mundt
- * Copyright (C) 2003, 2004  Richard Curnow
- */
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/sys.h>
-#include <cpu/registers.h>
-#include <asm/processor.h>
-#include <asm/unistd.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-
-/*
- * SR fields.
- */
-#define SR_ASID_MASK	0x00ff0000
-#define SR_FD_MASK	0x00008000
-#define SR_SS		0x08000000
-#define SR_BL		0x10000000
-#define SR_MD		0x40000000
-
-/*
- * Event code.
- */
-#define	EVENT_INTERRUPT		0
-#define	EVENT_FAULT_TLB		1
-#define	EVENT_FAULT_NOT_TLB	2
-#define	EVENT_DEBUG		3
-
-/* EXPEVT values */
-#define	RESET_CAUSE		0x20
-#define DEBUGSS_CAUSE		0x980
-
-/*
- * Frame layout. Quad index.
- */
-#define	FRAME_T(x)	FRAME_TBASE+(x*8)
-#define	FRAME_R(x)	FRAME_RBASE+(x*8)
-#define	FRAME_S(x)	FRAME_SBASE+(x*8)
-#define FSPC		0
-#define FSSR		1
-#define FSYSCALL_ID	2
-
-/* Arrange the save frame to be a multiple of 32 bytes long */
-#define FRAME_SBASE	0
-#define FRAME_RBASE	(FRAME_SBASE+(3*8))	/* SYSCALL_ID - SSR - SPC */
-#define FRAME_TBASE	(FRAME_RBASE+(63*8))	/* r0 - r62 */
-#define FRAME_PBASE	(FRAME_TBASE+(8*8))	/* tr0 -tr7 */
-#define	FRAME_SIZE	(FRAME_PBASE+(2*8))	/* pad0-pad1 */
-
-#define FP_FRAME_SIZE	FP_FRAME_BASE+(33*8)	/* dr0 - dr31 + fpscr */
-#define FP_FRAME_BASE	0
-
-#define	SAVED_R2	0*8
-#define	SAVED_R3	1*8
-#define	SAVED_R4	2*8
-#define	SAVED_R5	3*8
-#define	SAVED_R18	4*8
-#define	SAVED_R6	5*8
-#define	SAVED_TR0	6*8
-
-/* These are the registers saved in the TLB path that aren't saved in the first
-   level of the normal one. */
-#define	TLB_SAVED_R25	7*8
-#define	TLB_SAVED_TR1	8*8
-#define	TLB_SAVED_TR2	9*8
-#define	TLB_SAVED_TR3	10*8
-#define	TLB_SAVED_TR4	11*8
-/* Save R0/R1 : PT-migrating compiler currently dishounours -ffixed-r0 and -ffixed-r1 causing
-   breakage otherwise. */
-#define	TLB_SAVED_R0	12*8
-#define	TLB_SAVED_R1	13*8
-
-#define CLI()				\
-	getcon	SR, r6;			\
-	ori	r6, 0xf0, r6;		\
-	putcon	r6, SR;
-
-#define STI()				\
-	getcon	SR, r6;			\
-	andi	r6, ~0xf0, r6;		\
-	putcon	r6, SR;
-
-#ifdef CONFIG_PREEMPT
-#  define preempt_stop()	CLI()
-#else
-#  define preempt_stop()
-#  define resume_kernel		restore_all
-#endif
-
-	.section	.data, "aw"
-
-#define FAST_TLBMISS_STACK_CACHELINES 4
-#define FAST_TLBMISS_STACK_QUADWORDS (4*FAST_TLBMISS_STACK_CACHELINES)
-
-/* Register back-up area for all exceptions */
-	.balign	32
-	/* Allow for 16 quadwords to be pushed by fast tlbmiss handling
-	 * register saves etc. */
-	.fill FAST_TLBMISS_STACK_QUADWORDS, 8, 0x0
-/* This is 32 byte aligned by construction */
-/* Register back-up area for all exceptions */
-reg_save_area:
-	.quad	0
-	.quad	0
-	.quad	0
-	.quad	0
-
-	.quad	0
-	.quad	0
-	.quad	0
-	.quad	0
-
-	.quad	0
-	.quad	0
-	.quad	0
-	.quad	0
-
-	.quad	0
-	.quad   0
-
-/* Save area for RESVEC exceptions. We cannot use reg_save_area because of
- * reentrancy. Note this area may be accessed via physical address.
- * Align so this fits a whole single cache line, for ease of purging.
- */
-	.balign 32,0,32
-resvec_save_area:
-	.quad	0
-	.quad	0
-	.quad	0
-	.quad	0
-	.quad	0
-	.balign 32,0,32
-
-/* Jump table of 3rd level handlers  */
-trap_jtable:
-	.long	do_exception_error		/* 0x000 */
-	.long	do_exception_error		/* 0x020 */
-#ifdef CONFIG_MMU
-	.long	tlb_miss_load				/* 0x040 */
-	.long	tlb_miss_store				/* 0x060 */
-#else
-	.long	do_exception_error
-	.long	do_exception_error
-#endif
-	! ARTIFICIAL pseudo-EXPEVT setting
-	.long	do_debug_interrupt		/* 0x080 */
-#ifdef CONFIG_MMU
-	.long	tlb_miss_load				/* 0x0A0 */
-	.long	tlb_miss_store				/* 0x0C0 */
-#else
-	.long	do_exception_error
-	.long	do_exception_error
-#endif
-	.long	do_address_error_load	/* 0x0E0 */
-	.long	do_address_error_store	/* 0x100 */
-#ifdef CONFIG_SH_FPU
-	.long	do_fpu_error		/* 0x120 */
-#else
-	.long	do_exception_error		/* 0x120 */
-#endif
-	.long	do_exception_error		/* 0x140 */
-	.long	system_call				/* 0x160 */
-	.long	do_reserved_inst		/* 0x180 */
-	.long	do_illegal_slot_inst	/* 0x1A0 */
-	.long	do_exception_error		/* 0x1C0 - NMI */
-	.long	do_exception_error		/* 0x1E0 */
-	.rept 15
-		.long do_IRQ		/* 0x200 - 0x3C0 */
-	.endr
-	.long	do_exception_error		/* 0x3E0 */
-	.rept 32
-		.long do_IRQ		/* 0x400 - 0x7E0 */
-	.endr
-	.long	fpu_error_or_IRQA			/* 0x800 */
-	.long	fpu_error_or_IRQB			/* 0x820 */
-	.long	do_IRQ			/* 0x840 */
-	.long	do_IRQ			/* 0x860 */
-	.rept 6
-		.long do_exception_error	/* 0x880 - 0x920 */
-	.endr
-	.long	breakpoint_trap_handler	/* 0x940 */
-	.long	do_exception_error		/* 0x960 */
-	.long	do_single_step		/* 0x980 */
-
-	.rept 3
-		.long do_exception_error	/* 0x9A0 - 0x9E0 */
-	.endr
-	.long	do_IRQ			/* 0xA00 */
-	.long	do_IRQ			/* 0xA20 */
-#ifdef CONFIG_MMU
-	.long	itlb_miss_or_IRQ			/* 0xA40 */
-#else
-	.long	do_IRQ
-#endif
-	.long	do_IRQ			/* 0xA60 */
-	.long	do_IRQ			/* 0xA80 */
-#ifdef CONFIG_MMU
-	.long	itlb_miss_or_IRQ			/* 0xAA0 */
-#else
-	.long	do_IRQ
-#endif
-	.long	do_exception_error		/* 0xAC0 */
-	.long	do_address_error_exec	/* 0xAE0 */
-	.rept 8
-		.long do_exception_error	/* 0xB00 - 0xBE0 */
-	.endr
-	.rept 18
-		.long do_IRQ		/* 0xC00 - 0xE20 */
-	.endr
-
-	.section	.text64, "ax"
-
-/*
- * --- Exception/Interrupt/Event Handling Section
- */
-
-/*
- * VBR and RESVEC blocks.
- *
- * First level handler for VBR-based exceptions.
- *
- * To avoid waste of space, align to the maximum text block size.
- * This is assumed to be at most 128 bytes or 32 instructions.
- * DO NOT EXCEED 32 instructions on the first level handlers !
- *
- * Also note that RESVEC is contained within the VBR block
- * where the room left (1KB - TEXT_SIZE) allows placing
- * the RESVEC block (at most 512B + TEXT_SIZE).
- *
- * So first (and only) level handler for RESVEC-based exceptions.
- *
- * Where the fault/interrupt is handled (not_a_tlb_miss, tlb_miss
- * and interrupt) we are a lot tight with register space until
- * saving onto the stack frame, which is done in handle_exception().
- *
- */
-
-#define	TEXT_SIZE 	128
-#define	BLOCK_SIZE 	1664 		/* Dynamic check, 13*128 */
-
-	.balign TEXT_SIZE
-LVBR_block:
-	.space	256, 0			/* Power-on class handler, */
-					/* not required here       */
-not_a_tlb_miss:
-	synco	/* TAKum03020 (but probably a good idea anyway.) */
-	/* Save original stack pointer into KCR1 */
-	putcon	SP, KCR1
-
-	/* Save other original registers into reg_save_area */
-        movi  reg_save_area, SP
-	st.q	SP, SAVED_R2, r2
-	st.q	SP, SAVED_R3, r3
-	st.q	SP, SAVED_R4, r4
-	st.q	SP, SAVED_R5, r5
-	st.q	SP, SAVED_R6, r6
-	st.q	SP, SAVED_R18, r18
-	gettr	tr0, r3
-	st.q	SP, SAVED_TR0, r3
-
-	/* Set args for Non-debug, Not a TLB miss class handler */
-	getcon	EXPEVT, r2
-	movi	ret_from_exception, r3
-	ori	r3, 1, r3
-	movi	EVENT_FAULT_NOT_TLB, r4
-	or	SP, ZERO, r5
-	getcon	KCR1, SP
-	pta	handle_exception, tr0
-	blink	tr0, ZERO
-
-	.balign 256
-	! VBR+0x200
-	nop
-	.balign 256
-	! VBR+0x300
-	nop
-	.balign 256
-	/*
-	 * Instead of the natural .balign 1024 place RESVEC here
-	 * respecting the final 1KB alignment.
-	 */
-	.balign TEXT_SIZE
-	/*
-	 * Instead of '.space 1024-TEXT_SIZE' place the RESVEC
-	 * block making sure the final alignment is correct.
-	 */
-#ifdef CONFIG_MMU
-tlb_miss:
-	synco	/* TAKum03020 (but probably a good idea anyway.) */
-	putcon	SP, KCR1
-	movi	reg_save_area, SP
-	/* SP is guaranteed 32-byte aligned. */
-	st.q	SP, TLB_SAVED_R0 , r0
-	st.q	SP, TLB_SAVED_R1 , r1
-	st.q	SP, SAVED_R2 , r2
-	st.q	SP, SAVED_R3 , r3
-	st.q	SP, SAVED_R4 , r4
-	st.q	SP, SAVED_R5 , r5
-	st.q	SP, SAVED_R6 , r6
-	st.q	SP, SAVED_R18, r18
-
-	/* Save R25 for safety; as/ld may want to use it to achieve the call to
-	 * the code in mm/tlbmiss.c */
-	st.q	SP, TLB_SAVED_R25, r25
-	gettr	tr0, r2
-	gettr	tr1, r3
-	gettr	tr2, r4
-	gettr	tr3, r5
-	gettr	tr4, r18
-	st.q	SP, SAVED_TR0 , r2
-	st.q	SP, TLB_SAVED_TR1 , r3
-	st.q	SP, TLB_SAVED_TR2 , r4
-	st.q	SP, TLB_SAVED_TR3 , r5
-	st.q	SP, TLB_SAVED_TR4 , r18
-
-	pt	do_fast_page_fault, tr0
-	getcon	SSR, r2
-	getcon	EXPEVT, r3
-	getcon	TEA, r4
-	shlri	r2, 30, r2
-	andi	r2, 1, r2	/* r2 = SSR.MD */
-	blink 	tr0, LINK
-
-	pt	fixup_to_invoke_general_handler, tr1
-
-	/* If the fast path handler fixed the fault, just drop through quickly
-	   to the restore code right away to return to the excepting context.
-	   */
-	bnei/u	r2, 0, tr1
-
-fast_tlb_miss_restore:
-	ld.q	SP, SAVED_TR0, r2
-	ld.q	SP, TLB_SAVED_TR1, r3
-	ld.q	SP, TLB_SAVED_TR2, r4
-
-	ld.q	SP, TLB_SAVED_TR3, r5
-	ld.q	SP, TLB_SAVED_TR4, r18
-
-	ptabs	r2, tr0
-	ptabs	r3, tr1
-	ptabs	r4, tr2
-	ptabs	r5, tr3
-	ptabs	r18, tr4
-
-	ld.q	SP, TLB_SAVED_R0, r0
-	ld.q	SP, TLB_SAVED_R1, r1
-	ld.q	SP, SAVED_R2, r2
-	ld.q	SP, SAVED_R3, r3
-	ld.q	SP, SAVED_R4, r4
-	ld.q	SP, SAVED_R5, r5
-	ld.q	SP, SAVED_R6, r6
-	ld.q	SP, SAVED_R18, r18
-	ld.q	SP, TLB_SAVED_R25, r25
-
-	getcon	KCR1, SP
-	rte
-	nop /* for safety, in case the code is run on sh5-101 cut1.x */
-
-fixup_to_invoke_general_handler:
-
-	/* OK, new method.  Restore stuff that's not expected to get saved into
-	   the 'first-level' reg save area, then just fall through to setting
-	   up the registers and calling the second-level handler. */
-
-	/* 2nd level expects r2,3,4,5,6,18,tr0 to be saved.  So we must restore
-	   r25,tr1-4 and save r6 to get into the right state.  */
-
-	ld.q	SP, TLB_SAVED_TR1, r3
-	ld.q	SP, TLB_SAVED_TR2, r4
-	ld.q	SP, TLB_SAVED_TR3, r5
-	ld.q	SP, TLB_SAVED_TR4, r18
-	ld.q	SP, TLB_SAVED_R25, r25
-
-	ld.q	SP, TLB_SAVED_R0, r0
-	ld.q	SP, TLB_SAVED_R1, r1
-
-	ptabs/u	r3, tr1
-	ptabs/u	r4, tr2
-	ptabs/u	r5, tr3
-	ptabs/u	r18, tr4
-
-	/* Set args for Non-debug, TLB miss class handler */
-	getcon	EXPEVT, r2
-	movi	ret_from_exception, r3
-	ori	r3, 1, r3
-	movi	EVENT_FAULT_TLB, r4
-	or	SP, ZERO, r5
-	getcon	KCR1, SP
-	pta	handle_exception, tr0
-	blink	tr0, ZERO
-#else /* CONFIG_MMU */
-	.balign 256
-#endif
-
-/* NB TAKE GREAT CARE HERE TO ENSURE THAT THE INTERRUPT CODE
-   DOES END UP AT VBR+0x600 */
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	.balign 256
-	/* VBR + 0x600 */
-
-interrupt:
-	synco	/* TAKum03020 (but probably a good idea anyway.) */
-	/* Save original stack pointer into KCR1 */
-	putcon	SP, KCR1
-
-	/* Save other original registers into reg_save_area */
-        movi  reg_save_area, SP
-	st.q	SP, SAVED_R2, r2
-	st.q	SP, SAVED_R3, r3
-	st.q	SP, SAVED_R4, r4
-	st.q	SP, SAVED_R5, r5
-	st.q	SP, SAVED_R6, r6
-	st.q	SP, SAVED_R18, r18
-	gettr	tr0, r3
-	st.q	SP, SAVED_TR0, r3
-
-	/* Set args for interrupt class handler */
-	getcon	INTEVT, r2
-	movi	ret_from_irq, r3
-	ori	r3, 1, r3
-	movi	EVENT_INTERRUPT, r4
-	or	SP, ZERO, r5
-	getcon	KCR1, SP
-	pta	handle_exception, tr0
-	blink	tr0, ZERO
-	.balign	TEXT_SIZE		/* let's waste the bare minimum */
-
-LVBR_block_end:				/* Marker. Used for total checking */
-
-	.balign 256
-LRESVEC_block:
-	/* Panic handler. Called with MMU off. Possible causes/actions:
-	 * - Reset:		Jump to program start.
-	 * - Single Step:	Turn off Single Step & return.
-	 * - Others:		Call panic handler, passing PC as arg.
-	 *			(this may need to be extended...)
-	 */
-reset_or_panic:
-	synco	/* TAKum03020 (but probably a good idea anyway.) */
-	putcon	SP, DCR
-	/* First save r0-1 and tr0, as we need to use these */
-	movi	resvec_save_area-CONFIG_PAGE_OFFSET, SP
-	st.q	SP, 0, r0
-	st.q	SP, 8, r1
-	gettr	tr0, r0
-	st.q	SP, 32, r0
-
-	/* Check cause */
-	getcon	EXPEVT, r0
-	movi	RESET_CAUSE, r1
-	sub	r1, r0, r1		/* r1=0 if reset */
-	movi	_stext-CONFIG_PAGE_OFFSET, r0
-	ori	r0, 1, r0
-	ptabs	r0, tr0
-	beqi	r1, 0, tr0		/* Jump to start address if reset */
-
-	getcon	EXPEVT, r0
-	movi	DEBUGSS_CAUSE, r1
-	sub	r1, r0, r1		/* r1=0 if single step */
-	pta	single_step_panic, tr0
-	beqi	r1, 0, tr0		/* jump if single step */
-
-	/* Now jump to where we save the registers. */
-	movi	panic_stash_regs-CONFIG_PAGE_OFFSET, r1
-	ptabs	r1, tr0
-	blink	tr0, r63
-
-single_step_panic:
-	/* We are in a handler with Single Step set. We need to resume the
-	 * handler, by turning on MMU & turning off Single Step. */
-	getcon	SSR, r0
-	movi	SR_MMU, r1
-	or	r0, r1, r0
-	movi	~SR_SS, r1
-	and	r0, r1, r0
-	putcon	r0, SSR
-	/* Restore EXPEVT, as the rte won't do this */
-	getcon	PEXPEVT, r0
-	putcon	r0, EXPEVT
-	/* Restore regs */
-	ld.q	SP, 32, r0
-	ptabs	r0, tr0
-	ld.q	SP, 0, r0
-	ld.q	SP, 8, r1
-	getcon	DCR, SP
-	synco
-	rte
-
-
-	.balign	256
-debug_exception:
-	synco	/* TAKum03020 (but probably a good idea anyway.) */
-	/*
-	 * Single step/software_break_point first level handler.
-	 * Called with MMU off, so the first thing we do is enable it
-	 * by doing an rte with appropriate SSR.
-	 */
-	putcon	SP, DCR
-	/* Save SSR & SPC, together with R0 & R1, as we need to use 2 regs. */
-	movi	resvec_save_area-CONFIG_PAGE_OFFSET, SP
-
-	/* With the MMU off, we are bypassing the cache, so purge any
-         * data that will be made stale by the following stores.
-         */
-	ocbp	SP, 0
-	synco
-
-	st.q	SP, 0, r0
-	st.q	SP, 8, r1
-	getcon	SPC, r0
-	st.q	SP, 16, r0
-	getcon	SSR, r0
-	st.q	SP, 24, r0
-
-	/* Enable MMU, block exceptions, set priv mode, disable single step */
-	movi	SR_MMU | SR_BL | SR_MD, r1
-	or	r0, r1, r0
-	movi	~SR_SS, r1
-	and	r0, r1, r0
-	putcon	r0, SSR
-	/* Force control to debug_exception_2 when rte is executed */
-	movi	debug_exeception_2, r0
-	ori	r0, 1, r0      /* force SHmedia, just in case */
-	putcon	r0, SPC
-	getcon	DCR, SP
-	synco
-	rte
-debug_exeception_2:
-	/* Restore saved regs */
-	putcon	SP, KCR1
-	movi	resvec_save_area, SP
-	ld.q	SP, 24, r0
-	putcon	r0, SSR
-	ld.q	SP, 16, r0
-	putcon	r0, SPC
-	ld.q	SP, 0, r0
-	ld.q	SP, 8, r1
-
-	/* Save other original registers into reg_save_area */
-        movi  reg_save_area, SP
-	st.q	SP, SAVED_R2, r2
-	st.q	SP, SAVED_R3, r3
-	st.q	SP, SAVED_R4, r4
-	st.q	SP, SAVED_R5, r5
-	st.q	SP, SAVED_R6, r6
-	st.q	SP, SAVED_R18, r18
-	gettr	tr0, r3
-	st.q	SP, SAVED_TR0, r3
-
-	/* Set args for debug class handler */
-	getcon	EXPEVT, r2
-	movi	ret_from_exception, r3
-	ori	r3, 1, r3
-	movi	EVENT_DEBUG, r4
-	or	SP, ZERO, r5
-	getcon	KCR1, SP
-	pta	handle_exception, tr0
-	blink	tr0, ZERO
-
-	.balign	256
-debug_interrupt:
-	/* !!! WE COME HERE IN REAL MODE !!! */
-	/* Hook-up debug interrupt to allow various debugging options to be
-	 * hooked into its handler. */
-	/* Save original stack pointer into KCR1 */
-	synco
-	putcon	SP, KCR1
-	movi	resvec_save_area-CONFIG_PAGE_OFFSET, SP
-	ocbp	SP, 0
-	ocbp	SP, 32
-	synco
-
-	/* Save other original registers into reg_save_area thru real addresses */
-	st.q	SP, SAVED_R2, r2
-	st.q	SP, SAVED_R3, r3
-	st.q	SP, SAVED_R4, r4
-	st.q	SP, SAVED_R5, r5
-	st.q	SP, SAVED_R6, r6
-	st.q	SP, SAVED_R18, r18
-	gettr	tr0, r3
-	st.q	SP, SAVED_TR0, r3
-
-	/* move (spc,ssr)->(pspc,pssr).  The rte will shift
-	   them back again, so that they look like the originals
-	   as far as the real handler code is concerned. */
-	getcon	spc, r6
-	putcon	r6, pspc
-	getcon	ssr, r6
-	putcon	r6, pssr
-
-	! construct useful SR for handle_exception
-	movi	3, r6
-	shlli	r6, 30, r6
-	getcon	sr, r18
-	or	r18, r6, r6
-	putcon	r6, ssr
-
-	! SSR is now the current SR with the MD and MMU bits set
-	! i.e. the rte will switch back to priv mode and put
-	! the mmu back on
-
-	! construct spc
-	movi	handle_exception, r18
-	ori	r18, 1, r18		! for safety (do we need this?)
-	putcon	r18, spc
-
-	/* Set args for Non-debug, Not a TLB miss class handler */
-
-	! EXPEVT==0x80 is unused, so 'steal' this value to put the
-	! debug interrupt handler in the vectoring table
-	movi	0x80, r2
-	movi	ret_from_exception, r3
-	ori	r3, 1, r3
-	movi	EVENT_FAULT_NOT_TLB, r4
-
-	or	SP, ZERO, r5
-	movi	CONFIG_PAGE_OFFSET, r6
-	add	r6, r5, r5
-	getcon	KCR1, SP
-
-	synco	! for safety
-	rte	! -> handle_exception, switch back to priv mode again
-
-LRESVEC_block_end:			/* Marker. Unused. */
-
-	.balign	TEXT_SIZE
-
-/*
- * Second level handler for VBR-based exceptions. Pre-handler.
- * In common to all stack-frame sensitive handlers.
- *
- * Inputs:
- * (KCR0) Current [current task union]
- * (KCR1) Original SP
- * (r2)   INTEVT/EXPEVT
- * (r3)   appropriate return address
- * (r4)   Event (0 = interrupt, 1 = TLB miss fault, 2 = Not TLB miss fault, 3=debug)
- * (r5)   Pointer to reg_save_area
- * (SP)   Original SP
- *
- * Available registers:
- * (r6)
- * (r18)
- * (tr0)
- *
- */
-handle_exception:
-	/* Common 2nd level handler. */
-
-	/* First thing we need an appropriate stack pointer */
-	getcon	SSR, r6
-	shlri	r6, 30, r6
-	andi	r6, 1, r6
-	pta	stack_ok, tr0
-	bne	r6, ZERO, tr0		/* Original stack pointer is fine */
-
-	/* Set stack pointer for user fault */
-	getcon	KCR0, SP
-	movi	THREAD_SIZE, r6		/* Point to the end */
-	add	SP, r6, SP
-
-stack_ok:
-
-/* DEBUG : check for underflow/overflow of the kernel stack */
-	pta	no_underflow, tr0
-	getcon  KCR0, r6
-	movi	1024, r18
-	add	r6, r18, r6
-	bge	SP, r6, tr0 	! ? below 1k from bottom of stack : danger zone
-
-/* Just panic to cause a crash. */
-bad_sp:
-	ld.b	r63, 0, r6
-	nop
-
-no_underflow:
-	pta	bad_sp, tr0
-	getcon	kcr0, r6
-	movi	THREAD_SIZE, r18
-	add	r18, r6, r6
-	bgt	SP, r6, tr0	! sp above the stack
-
-	/* Make some room for the BASIC frame. */
-	movi	-(FRAME_SIZE), r6
-	add	SP, r6, SP
-
-/* Could do this with no stalling if we had another spare register, but the
-   code below will be OK. */
-	ld.q	r5, SAVED_R2, r6
-	ld.q	r5, SAVED_R3, r18
-	st.q	SP, FRAME_R(2), r6
-	ld.q	r5, SAVED_R4, r6
-	st.q	SP, FRAME_R(3), r18
-	ld.q	r5, SAVED_R5, r18
-	st.q	SP, FRAME_R(4), r6
-	ld.q	r5, SAVED_R6, r6
-	st.q	SP, FRAME_R(5), r18
-	ld.q	r5, SAVED_R18, r18
-	st.q	SP, FRAME_R(6), r6
-	ld.q	r5, SAVED_TR0, r6
-	st.q	SP, FRAME_R(18), r18
-	st.q	SP, FRAME_T(0), r6
-
-	/* Keep old SP around */
-	getcon	KCR1, r6
-
-	/* Save the rest of the general purpose registers */
-	st.q	SP, FRAME_R(0), r0
-	st.q	SP, FRAME_R(1), r1
-	st.q	SP, FRAME_R(7), r7
-	st.q	SP, FRAME_R(8), r8
-	st.q	SP, FRAME_R(9), r9
-	st.q	SP, FRAME_R(10), r10
-	st.q	SP, FRAME_R(11), r11
-	st.q	SP, FRAME_R(12), r12
-	st.q	SP, FRAME_R(13), r13
-	st.q	SP, FRAME_R(14), r14
-
-	/* SP is somewhere else */
-	st.q	SP, FRAME_R(15), r6
-
-	st.q	SP, FRAME_R(16), r16
-	st.q	SP, FRAME_R(17), r17
-	/* r18 is saved earlier. */
-	st.q	SP, FRAME_R(19), r19
-	st.q	SP, FRAME_R(20), r20
-	st.q	SP, FRAME_R(21), r21
-	st.q	SP, FRAME_R(22), r22
-	st.q	SP, FRAME_R(23), r23
-	st.q	SP, FRAME_R(24), r24
-	st.q	SP, FRAME_R(25), r25
-	st.q	SP, FRAME_R(26), r26
-	st.q	SP, FRAME_R(27), r27
-	st.q	SP, FRAME_R(28), r28
-	st.q	SP, FRAME_R(29), r29
-	st.q	SP, FRAME_R(30), r30
-	st.q	SP, FRAME_R(31), r31
-	st.q	SP, FRAME_R(32), r32
-	st.q	SP, FRAME_R(33), r33
-	st.q	SP, FRAME_R(34), r34
-	st.q	SP, FRAME_R(35), r35
-	st.q	SP, FRAME_R(36), r36
-	st.q	SP, FRAME_R(37), r37
-	st.q	SP, FRAME_R(38), r38
-	st.q	SP, FRAME_R(39), r39
-	st.q	SP, FRAME_R(40), r40
-	st.q	SP, FRAME_R(41), r41
-	st.q	SP, FRAME_R(42), r42
-	st.q	SP, FRAME_R(43), r43
-	st.q	SP, FRAME_R(44), r44
-	st.q	SP, FRAME_R(45), r45
-	st.q	SP, FRAME_R(46), r46
-	st.q	SP, FRAME_R(47), r47
-	st.q	SP, FRAME_R(48), r48
-	st.q	SP, FRAME_R(49), r49
-	st.q	SP, FRAME_R(50), r50
-	st.q	SP, FRAME_R(51), r51
-	st.q	SP, FRAME_R(52), r52
-	st.q	SP, FRAME_R(53), r53
-	st.q	SP, FRAME_R(54), r54
-	st.q	SP, FRAME_R(55), r55
-	st.q	SP, FRAME_R(56), r56
-	st.q	SP, FRAME_R(57), r57
-	st.q	SP, FRAME_R(58), r58
-	st.q	SP, FRAME_R(59), r59
-	st.q	SP, FRAME_R(60), r60
-	st.q	SP, FRAME_R(61), r61
-	st.q	SP, FRAME_R(62), r62
-
-	/*
-	 * Save the S* registers.
-	 */
-	getcon	SSR, r61
-	st.q	SP, FRAME_S(FSSR), r61
-	getcon	SPC, r62
-	st.q	SP, FRAME_S(FSPC), r62
-	movi	-1, r62			/* Reset syscall_nr */
-	st.q	SP, FRAME_S(FSYSCALL_ID), r62
-
-	/* Save the rest of the target registers */
-	gettr	tr1, r6
-	st.q	SP, FRAME_T(1), r6
-	gettr	tr2, r6
-	st.q	SP, FRAME_T(2), r6
-	gettr	tr3, r6
-	st.q	SP, FRAME_T(3), r6
-	gettr	tr4, r6
-	st.q	SP, FRAME_T(4), r6
-	gettr	tr5, r6
-	st.q	SP, FRAME_T(5), r6
-	gettr	tr6, r6
-	st.q	SP, FRAME_T(6), r6
-	gettr	tr7, r6
-	st.q	SP, FRAME_T(7), r6
-
-	! setup FP so that unwinder can wind back through nested kernel mode
-	! exceptions
-	add	SP, ZERO, r14
-
-	/* For syscall and debug race condition, get TRA now */
-	getcon	TRA, r5
-
-	/* We are in a safe position to turn SR.BL off, but set IMASK=0xf
-	 * Also set FD, to catch FPU usage in the kernel.
-	 *
-	 * benedict.gaster@superh.com 29/07/2002
-	 *
-	 * On all SH5-101 revisions it is unsafe to raise the IMASK and at the
-	 * same time change BL from 1->0, as any pending interrupt of a level
-	 * higher than he previous value of IMASK will leak through and be
-	 * taken unexpectedly.
-	 *
-	 * To avoid this we raise the IMASK and then issue another PUTCON to
-	 * enable interrupts.
-         */
-	getcon	SR, r6
-	movi	SR_IMASK | SR_FD, r7
-	or	r6, r7, r6
-	putcon	r6, SR
-	movi	SR_UNBLOCK_EXC, r7
-	and	r6, r7, r6
-	putcon	r6, SR
-
-
-	/* Now call the appropriate 3rd level handler */
-	or	r3, ZERO, LINK
-	movi	trap_jtable, r3
-	shlri	r2, 3, r2
-	ldx.l	r2, r3, r3
-	shlri	r2, 2, r2
-	ptabs	r3, tr0
-	or	SP, ZERO, r3
-	blink	tr0, ZERO
-
-/*
- * Second level handler for VBR-based exceptions. Post-handlers.
- *
- * Post-handlers for interrupts (ret_from_irq), exceptions
- * (ret_from_exception) and common reentrance doors (restore_all
- * to get back to the original context, ret_from_syscall loop to
- * check kernel exiting).
- *
- * ret_with_reschedule and work_notifysig are an inner lables of
- * the ret_from_syscall loop.
- *
- * In common to all stack-frame sensitive handlers.
- *
- * Inputs:
- * (SP)   struct pt_regs *, original register's frame pointer (basic)
- *
- */
-	.global ret_from_irq
-ret_from_irq:
-	ld.q	SP, FRAME_S(FSSR), r6
-	shlri	r6, 30, r6
-	andi	r6, 1, r6
-	pta	resume_kernel, tr0
-	bne	r6, ZERO, tr0		/* no further checks */
-	STI()
-	pta	ret_with_reschedule, tr0
-	blink	tr0, ZERO		/* Do not check softirqs */
-
-	.global ret_from_exception
-ret_from_exception:
-	preempt_stop()
-
-	ld.q	SP, FRAME_S(FSSR), r6
-	shlri	r6, 30, r6
-	andi	r6, 1, r6
-	pta	resume_kernel, tr0
-	bne	r6, ZERO, tr0		/* no further checks */
-
-	/* Check softirqs */
-
-#ifdef CONFIG_PREEMPT
-	pta   ret_from_syscall, tr0
-	blink   tr0, ZERO
-
-resume_kernel:
-	CLI()
-
-	pta	restore_all, tr0
-
-	getcon	KCR0, r6
-	ld.l	r6, TI_PRE_COUNT, r7
-	beq/u	r7, ZERO, tr0
-
-need_resched:
-	ld.l	r6, TI_FLAGS, r7
-	movi	(1 << TIF_NEED_RESCHED), r8
-	and	r8, r7, r8
-	bne	r8, ZERO, tr0
-
-	getcon	SR, r7
-	andi	r7, 0xf0, r7
-	bne	r7, ZERO, tr0
-
-	movi	preempt_schedule_irq, r7
-	ori	r7, 1, r7
-	ptabs	r7, tr1
-	blink	tr1, LINK
-
-	pta	need_resched, tr1
-	blink	tr1, ZERO
-#endif
-
-	.global ret_from_syscall
-ret_from_syscall:
-
-ret_with_reschedule:
-	getcon	KCR0, r6		! r6 contains current_thread_info
-	ld.l	r6, TI_FLAGS, r7	! r7 contains current_thread_info->flags
-
-	movi	_TIF_NEED_RESCHED, r8
-	and	r8, r7, r8
-	pta	work_resched, tr0
-	bne	r8, ZERO, tr0
-
-	pta	restore_all, tr1
-
-	movi	(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME), r8
-	and	r8, r7, r8
-	pta	work_notifysig, tr0
-	bne	r8, ZERO, tr0
-
-	blink	tr1, ZERO
-
-work_resched:
-	pta	ret_from_syscall, tr0
-	gettr	tr0, LINK
-	movi	schedule, r6
-	ptabs	r6, tr0
-	blink	tr0, ZERO		/* Call schedule(), return on top */
-
-work_notifysig:
-	gettr	tr1, LINK
-
-	movi	do_notify_resume, r6
-	ptabs	r6, tr0
-	or	SP, ZERO, r2
-	or	r7, ZERO, r3
-	blink	tr0, LINK	    /* Call do_notify_resume(regs, current_thread_info->flags), return here */
-
-restore_all:
-	/* Do prefetches */
-
-	ld.q	SP, FRAME_T(0), r6
-	ld.q	SP, FRAME_T(1), r7
-	ld.q	SP, FRAME_T(2), r8
-	ld.q	SP, FRAME_T(3), r9
-	ptabs	r6, tr0
-	ptabs	r7, tr1
-	ptabs	r8, tr2
-	ptabs	r9, tr3
-	ld.q	SP, FRAME_T(4), r6
-	ld.q	SP, FRAME_T(5), r7
-	ld.q	SP, FRAME_T(6), r8
-	ld.q	SP, FRAME_T(7), r9
-	ptabs	r6, tr4
-	ptabs	r7, tr5
-	ptabs	r8, tr6
-	ptabs	r9, tr7
-
-	ld.q	SP, FRAME_R(0), r0
-	ld.q	SP, FRAME_R(1), r1
-	ld.q	SP, FRAME_R(2), r2
-	ld.q	SP, FRAME_R(3), r3
-	ld.q	SP, FRAME_R(4), r4
-	ld.q	SP, FRAME_R(5), r5
-	ld.q	SP, FRAME_R(6), r6
-	ld.q	SP, FRAME_R(7), r7
-	ld.q	SP, FRAME_R(8), r8
-	ld.q	SP, FRAME_R(9), r9
-	ld.q	SP, FRAME_R(10), r10
-	ld.q	SP, FRAME_R(11), r11
-	ld.q	SP, FRAME_R(12), r12
-	ld.q	SP, FRAME_R(13), r13
-	ld.q	SP, FRAME_R(14), r14
-
-	ld.q	SP, FRAME_R(16), r16
-	ld.q	SP, FRAME_R(17), r17
-	ld.q	SP, FRAME_R(18), r18
-	ld.q	SP, FRAME_R(19), r19
-	ld.q	SP, FRAME_R(20), r20
-	ld.q	SP, FRAME_R(21), r21
-	ld.q	SP, FRAME_R(22), r22
-	ld.q	SP, FRAME_R(23), r23
-	ld.q	SP, FRAME_R(24), r24
-	ld.q	SP, FRAME_R(25), r25
-	ld.q	SP, FRAME_R(26), r26
-	ld.q	SP, FRAME_R(27), r27
-	ld.q	SP, FRAME_R(28), r28
-	ld.q	SP, FRAME_R(29), r29
-	ld.q	SP, FRAME_R(30), r30
-	ld.q	SP, FRAME_R(31), r31
-	ld.q	SP, FRAME_R(32), r32
-	ld.q	SP, FRAME_R(33), r33
-	ld.q	SP, FRAME_R(34), r34
-	ld.q	SP, FRAME_R(35), r35
-	ld.q	SP, FRAME_R(36), r36
-	ld.q	SP, FRAME_R(37), r37
-	ld.q	SP, FRAME_R(38), r38
-	ld.q	SP, FRAME_R(39), r39
-	ld.q	SP, FRAME_R(40), r40
-	ld.q	SP, FRAME_R(41), r41
-	ld.q	SP, FRAME_R(42), r42
-	ld.q	SP, FRAME_R(43), r43
-	ld.q	SP, FRAME_R(44), r44
-	ld.q	SP, FRAME_R(45), r45
-	ld.q	SP, FRAME_R(46), r46
-	ld.q	SP, FRAME_R(47), r47
-	ld.q	SP, FRAME_R(48), r48
-	ld.q	SP, FRAME_R(49), r49
-	ld.q	SP, FRAME_R(50), r50
-	ld.q	SP, FRAME_R(51), r51
-	ld.q	SP, FRAME_R(52), r52
-	ld.q	SP, FRAME_R(53), r53
-	ld.q	SP, FRAME_R(54), r54
-	ld.q	SP, FRAME_R(55), r55
-	ld.q	SP, FRAME_R(56), r56
-	ld.q	SP, FRAME_R(57), r57
-	ld.q	SP, FRAME_R(58), r58
-
-	getcon	SR, r59
-	movi	SR_BLOCK_EXC, r60
-	or	r59, r60, r59
-	putcon	r59, SR			/* SR.BL = 1, keep nesting out */
-	ld.q	SP, FRAME_S(FSSR), r61
-	ld.q	SP, FRAME_S(FSPC), r62
-	movi	SR_ASID_MASK, r60
-	and	r59, r60, r59
-	andc	r61, r60, r61		/* Clear out older ASID */
-	or	r59, r61, r61		/* Retain current ASID */
-	putcon	r61, SSR
-	putcon	r62, SPC
-
-	/* Ignore FSYSCALL_ID */
-
-	ld.q	SP, FRAME_R(59), r59
-	ld.q	SP, FRAME_R(60), r60
-	ld.q	SP, FRAME_R(61), r61
-	ld.q	SP, FRAME_R(62), r62
-
-	/* Last touch */
-	ld.q	SP, FRAME_R(15), SP
-	rte
-	nop
-
-/*
- * Third level handlers for VBR-based exceptions. Adapting args to
- * and/or deflecting to fourth level handlers.
- *
- * Fourth level handlers interface.
- * Most are C-coded handlers directly pointed by the trap_jtable.
- * (Third = Fourth level)
- * Inputs:
- * (r2)   fault/interrupt code, entry number (e.g. NMI = 14,
- *	  IRL0-3 (0000) = 16, RTLBMISS = 2, SYSCALL = 11, etc ...)
- * (r3)   struct pt_regs *, original register's frame pointer
- * (r4)   Event (0 = interrupt, 1 = TLB miss fault, 2 = Not TLB miss fault)
- * (r5)   TRA control register (for syscall/debug benefit only)
- * (LINK) return address
- * (SP)   = r3
- *
- * Kernel TLB fault handlers will get a slightly different interface.
- * (r2)   struct pt_regs *, original register's frame pointer
- * (r3)   page fault error code (see asm/thread_info.h)
- * (r4)   Effective Address of fault
- * (LINK) return address
- * (SP)   = r2
- *
- * fpu_error_or_IRQ? is a helper to deflect to the right cause.
- *
- */
-#ifdef CONFIG_MMU
-tlb_miss_load:
-	or	SP, ZERO, r2
-	or	ZERO, ZERO, r3		/* Read */
-	getcon	TEA, r4
-	pta	call_do_page_fault, tr0
-	beq	ZERO, ZERO, tr0
-
-tlb_miss_store:
-	or	SP, ZERO, r2
-	movi	FAULT_CODE_WRITE, r3		/* Write */
-	getcon	TEA, r4
-	pta	call_do_page_fault, tr0
-	beq	ZERO, ZERO, tr0
-
-itlb_miss_or_IRQ:
-	pta	its_IRQ, tr0
-	beqi/u	r4, EVENT_INTERRUPT, tr0
-
-	/* ITLB miss */
-	or	SP, ZERO, r2
-	movi	FAULT_CODE_ITLB, r3
-	getcon	TEA, r4
-	/* Fall through */
-
-call_do_page_fault:
-	movi	do_page_fault, r6
-        ptabs	r6, tr0
-        blink	tr0, ZERO
-#endif /* CONFIG_MMU */
-
-fpu_error_or_IRQA:
-	pta	its_IRQ, tr0
-	beqi/l	r4, EVENT_INTERRUPT, tr0
-#ifdef CONFIG_SH_FPU
-	movi	fpu_state_restore_trap_handler, r6
-#else
-	movi	do_exception_error, r6
-#endif
-	ptabs	r6, tr0
-	blink	tr0, ZERO
-
-fpu_error_or_IRQB:
-	pta	its_IRQ, tr0
-	beqi/l	r4, EVENT_INTERRUPT, tr0
-#ifdef CONFIG_SH_FPU
-	movi	fpu_state_restore_trap_handler, r6
-#else
-	movi	do_exception_error, r6
-#endif
-	ptabs	r6, tr0
-	blink	tr0, ZERO
-
-its_IRQ:
-	movi	do_IRQ, r6
-	ptabs	r6, tr0
-	blink	tr0, ZERO
-
-/*
- * system_call/unknown_trap third level handler:
- *
- * Inputs:
- * (r2)   fault/interrupt code, entry number (TRAP = 11)
- * (r3)   struct pt_regs *, original register's frame pointer
- * (r4)   Not used. Event (0=interrupt, 1=TLB miss fault, 2=Not TLB miss fault)
- * (r5)   TRA Control Reg (0x00xyzzzz: x=1 SYSCALL, y = #args, z=nr)
- * (SP)   = r3
- * (LINK) return address: ret_from_exception
- * (*r3)  Syscall parms: SC#, arg0, arg1, ..., arg5 in order (Saved r2/r7)
- *
- * Outputs:
- * (*r3)  Syscall reply (Saved r2)
- * (LINK) In case of syscall only it can be scrapped.
- *        Common second level post handler will be ret_from_syscall.
- *        Common (non-trace) exit point to that is syscall_ret (saving
- *        result to r2). Common bad exit point is syscall_bad (returning
- *        ENOSYS then saved to r2).
- *
- */
-
-unknown_trap:
-	/* Unknown Trap or User Trace */
-	movi	do_unknown_trapa, r6
-	ptabs	r6, tr0
-        ld.q    r3, FRAME_R(9), r2	/* r2 = #arg << 16 | syscall # */
-        andi    r2, 0x1ff, r2		/* r2 = syscall # */
-	blink	tr0, LINK
-
-	pta	syscall_ret, tr0
-	blink	tr0, ZERO
-
-        /* New syscall implementation*/
-system_call:
-	pta	unknown_trap, tr0
-        or      r5, ZERO, r4            /* TRA (=r5) -> r4 */
-        shlri   r4, 20, r4
-	bnei	r4, 1, tr0		/* unknown_trap if not 0x1yzzzz */
-
-        /* It's a system call */
-	st.q    r3, FRAME_S(FSYSCALL_ID), r5 	/* ID (0x1yzzzz) -> stack */
-	andi    r5, 0x1ff, r5			/* syscall # -> r5	  */
-
-	STI()
-
-	pta	syscall_allowed, tr0
-	movi	NR_syscalls - 1, r4	/* Last valid */
-	bgeu/l	r4, r5, tr0
-
-syscall_bad:
-	/* Return ENOSYS ! */
-	movi	-(ENOSYS), r2		/* Fall-through */
-
-	.global syscall_ret
-syscall_ret:
-	st.q	SP, FRAME_R(9), r2	/* Expecting SP back to BASIC frame */
-	ld.q	SP, FRAME_S(FSPC), r2
-	addi	r2, 4, r2		/* Move PC, being pre-execution event */
-	st.q	SP, FRAME_S(FSPC), r2
-	pta	ret_from_syscall, tr0
-	blink	tr0, ZERO
-
-
-/*  A different return path for ret_from_fork, because we now need
- *  to call schedule_tail with the later kernels. Because prev is
- *  loaded into r2 by switch_to() means we can just call it straight  away
- */
-
-.global	ret_from_fork
-ret_from_fork:
-
-	movi	schedule_tail,r5
-	ori	r5, 1, r5
-	ptabs	r5, tr0
-	blink	tr0, LINK
-
-	ld.q	SP, FRAME_S(FSPC), r2
-	addi	r2, 4, r2		/* Move PC, being pre-execution event */
-	st.q	SP, FRAME_S(FSPC), r2
-	pta	ret_from_syscall, tr0
-	blink	tr0, ZERO
-
-.global	ret_from_kernel_thread
-ret_from_kernel_thread:
-
-	movi	schedule_tail,r5
-	ori	r5, 1, r5
-	ptabs	r5, tr0
-	blink	tr0, LINK
-
-	ld.q	SP, FRAME_R(2), r2
-	ld.q	SP, FRAME_R(3), r3
-	ptabs	r3, tr0
-	blink	tr0, LINK
-
-	ld.q	SP, FRAME_S(FSPC), r2
-	addi	r2, 4, r2		/* Move PC, being pre-execution event */
-	st.q	SP, FRAME_S(FSPC), r2
-	pta	ret_from_syscall, tr0
-	blink	tr0, ZERO
-
-syscall_allowed:
-	/* Use LINK to deflect the exit point, default is syscall_ret */
-	pta	syscall_ret, tr0
-	gettr	tr0, LINK
-	pta	syscall_notrace, tr0
-
-	getcon	KCR0, r2
-	ld.l	r2, TI_FLAGS, r4
-	movi	_TIF_WORK_SYSCALL_MASK, r6
-	and	r6, r4, r6
-	beq/l	r6, ZERO, tr0
-
-	/* Trace it by calling syscall_trace before and after */
-	movi	do_syscall_trace_enter, r4
-	or	SP, ZERO, r2
-	ptabs	r4, tr0
-	blink	tr0, LINK
-
-	/* Save the retval */
-	st.q	SP, FRAME_R(2), r2
-
-	/* Reload syscall number as r5 is trashed by do_syscall_trace_enter */
-	ld.q	SP, FRAME_S(FSYSCALL_ID), r5
-	andi	r5, 0x1ff, r5
-
-	pta	syscall_ret_trace, tr0
-	gettr	tr0, LINK
-
-syscall_notrace:
-	/* Now point to the appropriate 4th level syscall handler */
-	movi	sys_call_table, r4
-	shlli	r5, 2, r5
-	ldx.l	r4, r5, r5
-	ptabs	r5, tr0
-
-	/* Prepare original args */
-	ld.q	SP, FRAME_R(2), r2
-	ld.q	SP, FRAME_R(3), r3
-	ld.q	SP, FRAME_R(4), r4
-	ld.q	SP, FRAME_R(5), r5
-	ld.q	SP, FRAME_R(6), r6
-	ld.q	SP, FRAME_R(7), r7
-
-	/* And now the trick for those syscalls requiring regs * ! */
-	or	SP, ZERO, r8
-
-	/* Call it */
-	blink	tr0, ZERO	/* LINK is already properly set */
-
-syscall_ret_trace:
-	/* We get back here only if under trace */
-	st.q	SP, FRAME_R(9), r2	/* Save return value */
-
-	movi	do_syscall_trace_leave, LINK
-	or	SP, ZERO, r2
-	ptabs	LINK, tr0
-	blink	tr0, LINK
-
-	/* This needs to be done after any syscall tracing */
-	ld.q	SP, FRAME_S(FSPC), r2
-	addi	r2, 4, r2	/* Move PC, being pre-execution event */
-	st.q	SP, FRAME_S(FSPC), r2
-
-	pta	ret_from_syscall, tr0
-	blink	tr0, ZERO		/* Resume normal return sequence */
-
-/*
- * --- Switch to running under a particular ASID and return the previous ASID value
- * --- The caller is assumed to have done a cli before calling this.
- *
- * Input r2 : new ASID
- * Output r2 : old ASID
- */
-
-	.global switch_and_save_asid
-switch_and_save_asid:
-	getcon	sr, r0
-	movi	255, r4
-	shlli 	r4, 16, r4	/* r4 = mask to select ASID */
-	and	r0, r4, r3	/* r3 = shifted old ASID */
-	andi	r2, 255, r2	/* mask down new ASID */
-	shlli	r2, 16, r2	/* align new ASID against SR.ASID */
-	andc	r0, r4, r0	/* efface old ASID from SR */
-	or	r0, r2, r0	/* insert the new ASID */
-	putcon	r0, ssr
-	movi	1f, r0
-	putcon	r0, spc
-	rte
-	nop
-1:
-	ptabs	LINK, tr0
-	shlri	r3, 16, r2	/* r2 = old ASID */
-	blink tr0, r63
-
-	.global	route_to_panic_handler
-route_to_panic_handler:
-	/* Switch to real mode, goto panic_handler, don't return.  Useful for
-	   last-chance debugging, e.g. if no output wants to go to the console.
-	   */
-
-	movi	panic_handler - CONFIG_PAGE_OFFSET, r1
-	ptabs	r1, tr0
-	pta	1f, tr1
-	gettr	tr1, r0
-	putcon	r0, spc
-	getcon	sr, r0
-	movi	1, r1
-	shlli	r1, 31, r1
-	andc	r0, r1, r0
-	putcon	r0, ssr
-	rte
-	nop
-1:	/* Now in real mode */
-	blink tr0, r63
-	nop
-
-	.global peek_real_address_q
-peek_real_address_q:
-	/* Two args:
-	   r2 : real mode address to peek
-	   r2(out) : result quadword
-
-	   This is provided as a cheapskate way of manipulating device
-	   registers for debugging (to avoid the need to ioremap the debug
-	   module, and to avoid the need to ioremap the watchpoint
-	   controller in a way that identity maps sufficient bits to avoid the
-	   SH5-101 cut2 silicon defect).
-
-	   This code is not performance critical
-	*/
-
-	add.l	r2, r63, r2	/* sign extend address */
-	getcon	sr, r0		/* r0 = saved original SR */
-	movi	1, r1
-	shlli	r1, 28, r1
-	or	r0, r1, r1	/* r0 with block bit set */
-	putcon	r1, sr		/* now in critical section */
-	movi	1, r36
-	shlli	r36, 31, r36
-	andc	r1, r36, r1	/* turn sr.mmu off in real mode section */
-
-	putcon	r1, ssr
-	movi	.peek0 - CONFIG_PAGE_OFFSET, r36 /* real mode target address */
-	movi	1f, r37		/* virtual mode return addr */
-	putcon	r36, spc
-
-	synco
-	rte
-	nop
-
-.peek0:	/* come here in real mode, don't touch caches!!
-           still in critical section (sr.bl==1) */
-	putcon	r0, ssr
-	putcon	r37, spc
-	/* Here's the actual peek.  If the address is bad, all bets are now off
-	 * what will happen (handlers invoked in real-mode = bad news) */
-	ld.q	r2, 0, r2
-	synco
-	rte	/* Back to virtual mode */
-	nop
-
-1:
-	ptabs	LINK, tr0
-	blink	tr0, r63
-
-	.global poke_real_address_q
-poke_real_address_q:
-	/* Two args:
-	   r2 : real mode address to poke
-	   r3 : quadword value to write.
-
-	   This is provided as a cheapskate way of manipulating device
-	   registers for debugging (to avoid the need to ioremap the debug
-	   module, and to avoid the need to ioremap the watchpoint
-	   controller in a way that identity maps sufficient bits to avoid the
-	   SH5-101 cut2 silicon defect).
-
-	   This code is not performance critical
-	*/
-
-	add.l	r2, r63, r2	/* sign extend address */
-	getcon	sr, r0		/* r0 = saved original SR */
-	movi	1, r1
-	shlli	r1, 28, r1
-	or	r0, r1, r1	/* r0 with block bit set */
-	putcon	r1, sr		/* now in critical section */
-	movi	1, r36
-	shlli	r36, 31, r36
-	andc	r1, r36, r1	/* turn sr.mmu off in real mode section */
-
-	putcon	r1, ssr
-	movi	.poke0-CONFIG_PAGE_OFFSET, r36 /* real mode target address */
-	movi	1f, r37		/* virtual mode return addr */
-	putcon	r36, spc
-
-	synco
-	rte
-	nop
-
-.poke0:	/* come here in real mode, don't touch caches!!
-           still in critical section (sr.bl==1) */
-	putcon	r0, ssr
-	putcon	r37, spc
-	/* Here's the actual poke.  If the address is bad, all bets are now off
-	 * what will happen (handlers invoked in real-mode = bad news) */
-	st.q	r2, 0, r3
-	synco
-	rte	/* Back to virtual mode */
-	nop
-
-1:
-	ptabs	LINK, tr0
-	blink	tr0, r63
-
-#ifdef CONFIG_MMU
-/*
- * --- User Access Handling Section
- */
-
-/*
- * User Access support. It all moved to non inlined Assembler
- * functions in here.
- *
- * __kernel_size_t __copy_user(void *__to, const void *__from,
- *			       __kernel_size_t __n)
- *
- * Inputs:
- * (r2)  target address
- * (r3)  source address
- * (r4)  size in bytes
- *
- * Ouputs:
- * (*r2) target data
- * (r2)  non-copied bytes
- *
- * If a fault occurs on the user pointer, bail out early and return the
- * number of bytes not copied in r2.
- * Strategy : for large blocks, call a real memcpy function which can
- * move >1 byte at a time using unaligned ld/st instructions, and can
- * manipulate the cache using prefetch + alloco to improve the speed
- * further.  If a fault occurs in that function, just revert to the
- * byte-by-byte approach used for small blocks; this is rare so the
- * performance hit for that case does not matter.
- *
- * For small blocks it's not worth the overhead of setting up and calling
- * the memcpy routine; do the copy a byte at a time.
- *
- */
-	.global	__copy_user
-__copy_user:
-	pta	__copy_user_byte_by_byte, tr1
-	movi	16, r0 ! this value is a best guess, should tune it by benchmarking
-	bge/u	r0, r4, tr1
-	pta copy_user_memcpy, tr0
-	addi	SP, -32, SP
-	/* Save arguments in case we have to fix-up unhandled page fault */
-	st.q	SP, 0, r2
-	st.q	SP, 8, r3
-	st.q	SP, 16, r4
-	st.q	SP, 24, r35 ! r35 is callee-save
-	/* Save LINK in a register to reduce RTS time later (otherwise
-	   ld SP,*,LINK;ptabs LINK;trn;blink trn,r63 becomes a critical path) */
-	ori	LINK, 0, r35
-	blink	tr0, LINK
-
-	/* Copy completed normally if we get back here */
-	ptabs	r35, tr0
-	ld.q	SP, 24, r35
-	/* don't restore r2-r4, pointless */
-	/* set result=r2 to zero as the copy must have succeeded. */
-	or	r63, r63, r2
-	addi	SP, 32, SP
-	blink	tr0, r63 ! RTS
-
-	.global __copy_user_fixup
-__copy_user_fixup:
-	/* Restore stack frame */
-	ori	r35, 0, LINK
-	ld.q	SP, 24, r35
-	ld.q	SP, 16, r4
-	ld.q	SP,  8, r3
-	ld.q	SP,  0, r2
-	addi	SP, 32, SP
-	/* Fall through to original code, in the 'same' state we entered with */
-
-/* The slow byte-by-byte method is used if the fast copy traps due to a bad
-   user address.  In that rare case, the speed drop can be tolerated. */
-__copy_user_byte_by_byte:
-	pta	___copy_user_exit, tr1
-	pta	___copy_user1, tr0
-	beq/u	r4, r63, tr1	/* early exit for zero length copy */
-	sub	r2, r3, r0
-	addi	r0, -1, r0
-
-___copy_user1:
-	ld.b	r3, 0, r5		/* Fault address 1 */
-
-	/* Could rewrite this to use just 1 add, but the second comes 'free'
-	   due to load latency */
-	addi	r3, 1, r3
-	addi	r4, -1, r4		/* No real fixup required */
-___copy_user2:
-	stx.b	r3, r0, r5		/* Fault address 2 */
-	bne     r4, ZERO, tr0
-
-___copy_user_exit:
-	or	r4, ZERO, r2
-	ptabs	LINK, tr0
-	blink	tr0, ZERO
-
-/*
- * __kernel_size_t __clear_user(void *addr, __kernel_size_t size)
- *
- * Inputs:
- * (r2)  target address
- * (r3)  size in bytes
- *
- * Ouputs:
- * (*r2) zero-ed target data
- * (r2)  non-zero-ed bytes
- */
-	.global	__clear_user
-__clear_user:
-	pta	___clear_user_exit, tr1
-	pta	___clear_user1, tr0
-	beq/u	r3, r63, tr1
-
-___clear_user1:
-	st.b	r2, 0, ZERO		/* Fault address */
-	addi	r2, 1, r2
-	addi	r3, -1, r3		/* No real fixup required */
-	bne     r3, ZERO, tr0
-
-___clear_user_exit:
-	or	r3, ZERO, r2
-	ptabs	LINK, tr0
-	blink	tr0, ZERO
-
-#endif /* CONFIG_MMU */
-
-/*
- * extern long __get_user_asm_?(void *val, long addr)
- *
- * Inputs:
- * (r2)  dest address
- * (r3)  source address (in User Space)
- *
- * Ouputs:
- * (r2)  -EFAULT (faulting)
- *       0 	 (not faulting)
- */
-	.global	__get_user_asm_b
-__get_user_asm_b:
-	or	r2, ZERO, r4
-	movi	-(EFAULT), r2		/* r2 = reply, no real fixup */
-
-___get_user_asm_b1:
-	ld.b	r3, 0, r5		/* r5 = data */
-	st.b	r4, 0, r5
-	or	ZERO, ZERO, r2
-
-___get_user_asm_b_exit:
-	ptabs	LINK, tr0
-	blink	tr0, ZERO
-
-
-	.global	__get_user_asm_w
-__get_user_asm_w:
-	or	r2, ZERO, r4
-	movi	-(EFAULT), r2		/* r2 = reply, no real fixup */
-
-___get_user_asm_w1:
-	ld.w	r3, 0, r5		/* r5 = data */
-	st.w	r4, 0, r5
-	or	ZERO, ZERO, r2
-
-___get_user_asm_w_exit:
-	ptabs	LINK, tr0
-	blink	tr0, ZERO
-
-
-	.global	__get_user_asm_l
-__get_user_asm_l:
-	or	r2, ZERO, r4
-	movi	-(EFAULT), r2		/* r2 = reply, no real fixup */
-
-___get_user_asm_l1:
-	ld.l	r3, 0, r5		/* r5 = data */
-	st.l	r4, 0, r5
-	or	ZERO, ZERO, r2
-
-___get_user_asm_l_exit:
-	ptabs	LINK, tr0
-	blink	tr0, ZERO
-
-
-	.global	__get_user_asm_q
-__get_user_asm_q:
-	or	r2, ZERO, r4
-	movi	-(EFAULT), r2		/* r2 = reply, no real fixup */
-
-___get_user_asm_q1:
-	ld.q	r3, 0, r5		/* r5 = data */
-	st.q	r4, 0, r5
-	or	ZERO, ZERO, r2
-
-___get_user_asm_q_exit:
-	ptabs	LINK, tr0
-	blink	tr0, ZERO
-
-/*
- * extern long __put_user_asm_?(void *pval, long addr)
- *
- * Inputs:
- * (r2)  kernel pointer to value
- * (r3)  dest address (in User Space)
- *
- * Ouputs:
- * (r2)  -EFAULT (faulting)
- *       0 	 (not faulting)
- */
-	.global	__put_user_asm_b
-__put_user_asm_b:
-	ld.b	r2, 0, r4		/* r4 = data */
-	movi	-(EFAULT), r2		/* r2 = reply, no real fixup */
-
-___put_user_asm_b1:
-	st.b	r3, 0, r4
-	or	ZERO, ZERO, r2
-
-___put_user_asm_b_exit:
-	ptabs	LINK, tr0
-	blink	tr0, ZERO
-
-
-	.global	__put_user_asm_w
-__put_user_asm_w:
-	ld.w	r2, 0, r4		/* r4 = data */
-	movi	-(EFAULT), r2		/* r2 = reply, no real fixup */
-
-___put_user_asm_w1:
-	st.w	r3, 0, r4
-	or	ZERO, ZERO, r2
-
-___put_user_asm_w_exit:
-	ptabs	LINK, tr0
-	blink	tr0, ZERO
-
-
-	.global	__put_user_asm_l
-__put_user_asm_l:
-	ld.l	r2, 0, r4		/* r4 = data */
-	movi	-(EFAULT), r2		/* r2 = reply, no real fixup */
-
-___put_user_asm_l1:
-	st.l	r3, 0, r4
-	or	ZERO, ZERO, r2
-
-___put_user_asm_l_exit:
-	ptabs	LINK, tr0
-	blink	tr0, ZERO
-
-
-	.global	__put_user_asm_q
-__put_user_asm_q:
-	ld.q	r2, 0, r4		/* r4 = data */
-	movi	-(EFAULT), r2		/* r2 = reply, no real fixup */
-
-___put_user_asm_q1:
-	st.q	r3, 0, r4
-	or	ZERO, ZERO, r2
-
-___put_user_asm_q_exit:
-	ptabs	LINK, tr0
-	blink	tr0, ZERO
-
-panic_stash_regs:
-	/* The idea is : when we get an unhandled panic, we dump the registers
-	   to a known memory location, the just sit in a tight loop.
-	   This allows the human to look at the memory region through the GDB
-	   session (assuming the debug module's SHwy initiator isn't locked up
-	   or anything), to hopefully analyze the cause of the panic. */
-
-	/* On entry, former r15 (SP) is in DCR
-	   former r0  is at resvec_saved_area + 0
-	   former r1  is at resvec_saved_area + 8
-	   former tr0 is at resvec_saved_area + 32
-	   DCR is the only register whose value is lost altogether.
-	*/
-
-	movi	0xffffffff80000000, r0 ! phy of dump area
-	ld.q	SP, 0x000, r1	! former r0
-	st.q	r0,  0x000, r1
-	ld.q	SP, 0x008, r1	! former r1
-	st.q	r0,  0x008, r1
-	st.q	r0,  0x010, r2
-	st.q	r0,  0x018, r3
-	st.q	r0,  0x020, r4
-	st.q	r0,  0x028, r5
-	st.q	r0,  0x030, r6
-	st.q	r0,  0x038, r7
-	st.q	r0,  0x040, r8
-	st.q	r0,  0x048, r9
-	st.q	r0,  0x050, r10
-	st.q	r0,  0x058, r11
-	st.q	r0,  0x060, r12
-	st.q	r0,  0x068, r13
-	st.q	r0,  0x070, r14
-	getcon	dcr, r14
-	st.q	r0,  0x078, r14
-	st.q	r0,  0x080, r16
-	st.q	r0,  0x088, r17
-	st.q	r0,  0x090, r18
-	st.q	r0,  0x098, r19
-	st.q	r0,  0x0a0, r20
-	st.q	r0,  0x0a8, r21
-	st.q	r0,  0x0b0, r22
-	st.q	r0,  0x0b8, r23
-	st.q	r0,  0x0c0, r24
-	st.q	r0,  0x0c8, r25
-	st.q	r0,  0x0d0, r26
-	st.q	r0,  0x0d8, r27
-	st.q	r0,  0x0e0, r28
-	st.q	r0,  0x0e8, r29
-	st.q	r0,  0x0f0, r30
-	st.q	r0,  0x0f8, r31
-	st.q	r0,  0x100, r32
-	st.q	r0,  0x108, r33
-	st.q	r0,  0x110, r34
-	st.q	r0,  0x118, r35
-	st.q	r0,  0x120, r36
-	st.q	r0,  0x128, r37
-	st.q	r0,  0x130, r38
-	st.q	r0,  0x138, r39
-	st.q	r0,  0x140, r40
-	st.q	r0,  0x148, r41
-	st.q	r0,  0x150, r42
-	st.q	r0,  0x158, r43
-	st.q	r0,  0x160, r44
-	st.q	r0,  0x168, r45
-	st.q	r0,  0x170, r46
-	st.q	r0,  0x178, r47
-	st.q	r0,  0x180, r48
-	st.q	r0,  0x188, r49
-	st.q	r0,  0x190, r50
-	st.q	r0,  0x198, r51
-	st.q	r0,  0x1a0, r52
-	st.q	r0,  0x1a8, r53
-	st.q	r0,  0x1b0, r54
-	st.q	r0,  0x1b8, r55
-	st.q	r0,  0x1c0, r56
-	st.q	r0,  0x1c8, r57
-	st.q	r0,  0x1d0, r58
-	st.q	r0,  0x1d8, r59
-	st.q	r0,  0x1e0, r60
-	st.q	r0,  0x1e8, r61
-	st.q	r0,  0x1f0, r62
-	st.q	r0,  0x1f8, r63	! bogus, but for consistency's sake...
-
-	ld.q	SP, 0x020, r1  ! former tr0
-	st.q	r0,  0x200, r1
-	gettr	tr1, r1
-	st.q	r0,  0x208, r1
-	gettr	tr2, r1
-	st.q	r0,  0x210, r1
-	gettr	tr3, r1
-	st.q	r0,  0x218, r1
-	gettr	tr4, r1
-	st.q	r0,  0x220, r1
-	gettr	tr5, r1
-	st.q	r0,  0x228, r1
-	gettr	tr6, r1
-	st.q	r0,  0x230, r1
-	gettr	tr7, r1
-	st.q	r0,  0x238, r1
-
-	getcon	sr,  r1
-	getcon	ssr,  r2
-	getcon	pssr,  r3
-	getcon	spc,  r4
-	getcon	pspc,  r5
-	getcon	intevt,  r6
-	getcon	expevt,  r7
-	getcon	pexpevt,  r8
-	getcon	tra,  r9
-	getcon	tea,  r10
-	getcon	kcr0, r11
-	getcon	kcr1, r12
-	getcon	vbr,  r13
-	getcon	resvec,  r14
-
-	st.q	r0,  0x240, r1
-	st.q	r0,  0x248, r2
-	st.q	r0,  0x250, r3
-	st.q	r0,  0x258, r4
-	st.q	r0,  0x260, r5
-	st.q	r0,  0x268, r6
-	st.q	r0,  0x270, r7
-	st.q	r0,  0x278, r8
-	st.q	r0,  0x280, r9
-	st.q	r0,  0x288, r10
-	st.q	r0,  0x290, r11
-	st.q	r0,  0x298, r12
-	st.q	r0,  0x2a0, r13
-	st.q	r0,  0x2a8, r14
-
-	getcon	SPC,r2
-	getcon	SSR,r3
-	getcon	EXPEVT,r4
-	/* Prepare to jump to C - physical address */
-	movi	panic_handler-CONFIG_PAGE_OFFSET, r1
-	ori	r1, 1, r1
-	ptabs   r1, tr0
-	getcon	DCR, SP
-	blink	tr0, ZERO
-	nop
-	nop
-	nop
-	nop
-
-
-
-
-/*
- * --- Signal Handling Section
- */
-
-/*
- * extern long long _sa_default_rt_restorer
- * extern long long _sa_default_restorer
- *
- *		 or, better,
- *
- * extern void _sa_default_rt_restorer(void)
- * extern void _sa_default_restorer(void)
- *
- * Code prototypes to do a sys_rt_sigreturn() or sys_sysreturn()
- * from user space. Copied into user space by signal management.
- * Both must be quad aligned and 2 quad long (4 instructions).
- *
- */
-	.balign 8
-	.global sa_default_rt_restorer
-sa_default_rt_restorer:
-	movi	0x10, r9
-	shori	__NR_rt_sigreturn, r9
-	trapa	r9
-	nop
-
-	.balign 8
-	.global sa_default_restorer
-sa_default_restorer:
-	movi	0x10, r9
-	shori	__NR_sigreturn, r9
-	trapa	r9
-	nop
-
-/*
- * --- __ex_table Section
- */
-
-/*
- * User Access Exception Table.
- */
-	.section	__ex_table,  "a"
-
-	.global asm_uaccess_start	/* Just a marker */
-asm_uaccess_start:
-
-#ifdef CONFIG_MMU
-	.long	___copy_user1, ___copy_user_exit
-	.long	___copy_user2, ___copy_user_exit
-	.long	___clear_user1, ___clear_user_exit
-#endif
-	.long	___get_user_asm_b1, ___get_user_asm_b_exit
-	.long	___get_user_asm_w1, ___get_user_asm_w_exit
-	.long	___get_user_asm_l1, ___get_user_asm_l_exit
-	.long	___get_user_asm_q1, ___get_user_asm_q_exit
-	.long	___put_user_asm_b1, ___put_user_asm_b_exit
-	.long	___put_user_asm_w1, ___put_user_asm_w_exit
-	.long	___put_user_asm_l1, ___put_user_asm_l_exit
-	.long	___put_user_asm_q1, ___put_user_asm_q_exit
-
-	.global asm_uaccess_end		/* Just a marker */
-asm_uaccess_end:
-
-
-
-
-/*
- * --- .init.text Section
- */
-
-	__INIT
-
-/*
- * void trap_init (void)
- *
- */
-	.global	trap_init
-trap_init:
-	addi	SP, -24, SP			/* Room to save r28/r29/r30 */
-	st.q	SP, 0, r28
-	st.q	SP, 8, r29
-	st.q	SP, 16, r30
-
-	/* Set VBR and RESVEC */
-	movi	LVBR_block, r19
-	andi	r19, -4, r19			/* reset MMUOFF + reserved */
-	/* For RESVEC exceptions we force the MMU off, which means we need the
-	   physical address. */
-	movi	LRESVEC_block-CONFIG_PAGE_OFFSET, r20
-	andi	r20, -4, r20			/* reset reserved */
-	ori	r20, 1, r20			/* set MMUOFF */
-	putcon	r19, VBR
-	putcon	r20, RESVEC
-
-	/* Sanity check */
-	movi	LVBR_block_end, r21
-	andi	r21, -4, r21
-	movi	BLOCK_SIZE, r29			/* r29 = expected size */
-	or	r19, ZERO, r30
-	add	r19, r29, r19
-
-	/*
-	 * Ugly, but better loop forever now than crash afterwards.
-	 * We should print a message, but if we touch LVBR or
-	 * LRESVEC blocks we should not be surprised if we get stuck
-	 * in trap_init().
-	 */
-	pta	trap_init_loop, tr1
-	gettr	tr1, r28			/* r28 = trap_init_loop */
-	sub	r21, r30, r30			/* r30 = actual size */
-
-	/*
-	 * VBR/RESVEC handlers overlap by being bigger than
-	 * allowed. Very bad. Just loop forever.
-	 * (r28) panic/loop address
-	 * (r29) expected size
-	 * (r30) actual size
-	 */
-trap_init_loop:
-	bne	r19, r21, tr1
-
-	/* Now that exception vectors are set up reset SR.BL */
-	getcon 	SR, r22
-	movi	SR_UNBLOCK_EXC, r23
-	and	r22, r23, r22
-	putcon	r22, SR
-
-	addi	SP, 24, SP
-	ptabs	LINK, tr0
-	blink	tr0, ZERO
-
diff --git a/arch/sh/kernel/cpu/sh5/switchto.S b/arch/sh/kernel/cpu/sh5/switchto.S
deleted file mode 100644
index d1beff755632aa8823af5d008dc6e772043114e6..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/cpu/sh5/switchto.S
+++ /dev/null
@@ -1,195 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * arch/sh/kernel/cpu/sh5/switchto.S
- *
- * sh64 context switch
- *
- * Copyright (C) 2004  Richard Curnow
-*/
-
-	.section .text..SHmedia32,"ax"
-	.little
-
-	.balign 32
-
-	.type sh64_switch_to,@function
-	.global sh64_switch_to
-	.global __sh64_switch_to_end
-sh64_switch_to:
-
-/* Incoming args
-   r2 - prev
-   r3 - &prev->thread
-   r4 - next
-   r5 - &next->thread
-
-   Outgoing results
-   r2 - last (=prev) : this just stays in r2 throughout
-
-   Want to create a full (struct pt_regs) on the stack to allow backtracing
-   functions to work.  However, we only need to populate the callee-save
-   register slots in this structure; since we're a function our ancestors must
-   have themselves preserved all caller saved state in the stack.  This saves
-   some wasted effort since we won't need to look at the values.
-
-   In particular, all caller-save registers are immediately available for
-   scratch use.
-
-*/
-
-#define FRAME_SIZE (76*8 + 8)
-
-	movi	FRAME_SIZE, r0
-	sub.l	r15, r0, r15
-	! Do normal-style register save to support backtrace
-
-	st.l	r15,   0, r18	! save link reg
-	st.l	r15,   4, r14	! save fp
-	add.l	r15, r63, r14	! setup frame pointer
-
-	! hopefully this looks normal to the backtrace now.
-
-	addi.l	r15,   8, r1    ! base of pt_regs
-	addi.l	r1,   24, r0    ! base of pt_regs.regs
-	addi.l	r0, (63*8), r8	! base of pt_regs.trregs
-
-	/* Note : to be fixed?
-	   struct pt_regs is really designed for holding the state on entry
-	   to an exception, i.e. pc,sr,regs etc.  However, for the context
-	   switch state, some of this is not required.  But the unwinder takes
-	   struct pt_regs * as an arg so we have to build this structure
-	   to allow unwinding switched tasks in show_state() */
-
-	st.q	r0, ( 9*8), r9
-	st.q	r0, (10*8), r10
-	st.q	r0, (11*8), r11
-	st.q	r0, (12*8), r12
-	st.q	r0, (13*8), r13
-	st.q	r0, (14*8), r14 ! for unwind, want to look as though we took a trap at
-	! the point where the process is left in suspended animation, i.e. current
-	! fp here, not the saved one.
-	st.q	r0, (16*8), r16
-
-	st.q	r0, (24*8), r24
-	st.q	r0, (25*8), r25
-	st.q	r0, (26*8), r26
-	st.q	r0, (27*8), r27
-	st.q	r0, (28*8), r28
-	st.q	r0, (29*8), r29
-	st.q	r0, (30*8), r30
-	st.q	r0, (31*8), r31
-	st.q	r0, (32*8), r32
-	st.q	r0, (33*8), r33
-	st.q	r0, (34*8), r34
-	st.q	r0, (35*8), r35
-
-	st.q	r0, (44*8), r44
-	st.q	r0, (45*8), r45
-	st.q	r0, (46*8), r46
-	st.q	r0, (47*8), r47
-	st.q	r0, (48*8), r48
-	st.q	r0, (49*8), r49
-	st.q	r0, (50*8), r50
-	st.q	r0, (51*8), r51
-	st.q	r0, (52*8), r52
-	st.q	r0, (53*8), r53
-	st.q	r0, (54*8), r54
-	st.q	r0, (55*8), r55
-	st.q	r0, (56*8), r56
-	st.q	r0, (57*8), r57
-	st.q	r0, (58*8), r58
-	st.q	r0, (59*8), r59
-
-	! do this early as pta->gettr has no pipeline forwarding (=> 5 cycle latency)
-	! Use a local label to avoid creating a symbol that will confuse the !
-	! backtrace
-	pta	.Lsave_pc, tr0
-
-	gettr	tr5, r45
-	gettr	tr6, r46
-	gettr	tr7, r47
-	st.q	r8, (5*8), r45
-	st.q	r8, (6*8), r46
-	st.q	r8, (7*8), r47
-
-	! Now switch context
-	gettr	tr0, r9
-	st.l	r3, 0, r15	! prev->thread.sp
-	st.l	r3, 8, r1	! prev->thread.kregs
-	st.l	r3, 4, r9	! prev->thread.pc
-	st.q	r1, 0, r9	! save prev->thread.pc into pt_regs->pc
-
-	! Load PC for next task (init value or save_pc later)
-	ld.l	r5, 4, r18	! next->thread.pc
-	! Switch stacks
-	ld.l	r5, 0, r15	! next->thread.sp
-	ptabs	r18, tr0
-
-	! Update current
-	ld.l	r4, 4, r9	! next->thread_info (2nd element of next task_struct)
-	putcon	r9, kcr0	! current = next->thread_info
-
-	! go to save_pc for a reschedule, or the initial thread.pc for a new process
-	blink	tr0, r63
-
-	! Restore (when we come back to a previously saved task)
-.Lsave_pc:
-	addi.l	r15, 32, r0	! r0 = next's regs
-	addi.l	r0, (63*8), r8	! r8 = next's tr_regs
-
-	ld.q	r8, (5*8), r45
-	ld.q	r8, (6*8), r46
-	ld.q	r8, (7*8), r47
-	ptabs	r45, tr5
-	ptabs	r46, tr6
-	ptabs	r47, tr7
-
-	ld.q	r0, ( 9*8), r9
-	ld.q	r0, (10*8), r10
-	ld.q	r0, (11*8), r11
-	ld.q	r0, (12*8), r12
-	ld.q	r0, (13*8), r13
-	ld.q	r0, (14*8), r14
-	ld.q	r0, (16*8), r16
-
-	ld.q	r0, (24*8), r24
-	ld.q	r0, (25*8), r25
-	ld.q	r0, (26*8), r26
-	ld.q	r0, (27*8), r27
-	ld.q	r0, (28*8), r28
-	ld.q	r0, (29*8), r29
-	ld.q	r0, (30*8), r30
-	ld.q	r0, (31*8), r31
-	ld.q	r0, (32*8), r32
-	ld.q	r0, (33*8), r33
-	ld.q	r0, (34*8), r34
-	ld.q	r0, (35*8), r35
-
-	ld.q	r0, (44*8), r44
-	ld.q	r0, (45*8), r45
-	ld.q	r0, (46*8), r46
-	ld.q	r0, (47*8), r47
-	ld.q	r0, (48*8), r48
-	ld.q	r0, (49*8), r49
-	ld.q	r0, (50*8), r50
-	ld.q	r0, (51*8), r51
-	ld.q	r0, (52*8), r52
-	ld.q	r0, (53*8), r53
-	ld.q	r0, (54*8), r54
-	ld.q	r0, (55*8), r55
-	ld.q	r0, (56*8), r56
-	ld.q	r0, (57*8), r57
-	ld.q	r0, (58*8), r58
-	ld.q	r0, (59*8), r59
-
-	! epilogue
-	ld.l	r15, 0, r18
-	ld.l	r15, 4, r14
-	ptabs	r18, tr0
-	movi	FRAME_SIZE, r0
-	add	r15, r0, r15
-	blink	tr0, r63
-__sh64_switch_to_end:
-.LFE1:
-	.size	sh64_switch_to,.LFE1-sh64_switch_to
-
diff --git a/arch/sh/kernel/cpu/shmobile/sleep.S b/arch/sh/kernel/cpu/shmobile/sleep.S
deleted file mode 100644
index f928c03151296cdf58b3b89e0c63aaadfc913179..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/cpu/shmobile/sleep.S
+++ /dev/null
@@ -1,402 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * arch/sh/kernel/cpu/sh4a/sleep-sh_mobile.S
- *
- * Sleep mode and Standby modes support for SuperH Mobile
- *
- *  Copyright (C) 2009 Magnus Damm
- */
-
-#include <linux/sys.h>
-#include <linux/errno.h>
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/suspend.h>
-
-/*
- * Kernel mode register usage, see entry.S:
- *	k0	scratch
- *	k1	scratch
- */
-#define k0	r0
-#define k1	r1
-
-/* manage self-refresh and enter standby mode. must be self-contained.
- * this code will be copied to on-chip memory and executed from there.
- */
-	.balign 4
-ENTRY(sh_mobile_sleep_enter_start)
-
-	/* save mode flags */
-	mov.l	r4, @(SH_SLEEP_MODE, r5)
-
-	/* save original vbr */
-	stc	vbr, r0
-	mov.l	r0, @(SH_SLEEP_VBR, r5)
-
-	/* point vbr to our on-chip memory page */
-	ldc	r5, vbr
-
-	/* save return address */
-	sts	pr, r0
-	mov.l	r0, @(SH_SLEEP_SPC, r5)
-
-	/* save sr */
-	stc	sr, r0
-	mov.l	r0, @(SH_SLEEP_SR, r5)
-
-	/* save general purpose registers to stack if needed */
-	mov.l	@(SH_SLEEP_MODE, r5), r0
-	tst	#SUSP_SH_REGS, r0
-	bt	skip_regs_save
-
-	sts.l	pr, @-r15
-	mov.l	r14, @-r15
-	mov.l	r13, @-r15
-	mov.l	r12, @-r15
-	mov.l	r11, @-r15
-	mov.l	r10, @-r15
-	mov.l	r9, @-r15
-	mov.l	r8, @-r15
-
-	/* make sure bank0 is selected, save low registers */
-	mov.l	rb_bit, r9
-	not	r9, r9
-	bsr	set_sr
-	 mov	#0, r10
-
-	bsr	save_low_regs
-	 nop
-
-	/* switch to bank 1, save low registers */
-	mov.l	rb_bit, r10
-	bsr	set_sr
-	 mov	#-1, r9
-
-	bsr	save_low_regs
-	 nop
-
-	/* switch back to bank 0 */
-	mov.l	rb_bit, r9
-	not	r9, r9
-	bsr	set_sr
-	 mov	#0, r10
-
-skip_regs_save:
-
-	/* save sp, also set to internal ram */
-	mov.l	r15, @(SH_SLEEP_SP, r5)
-	mov	r5, r15
-
-	/* save stbcr */
-	bsr     save_register
-	 mov    #SH_SLEEP_REG_STBCR, r0
-
-	/* save mmu and cache context if needed */
-	mov.l	@(SH_SLEEP_MODE, r5), r0
-	tst	#SUSP_SH_MMU, r0
-	bt	skip_mmu_save_disable
-
-	/* save mmu state */
-	bsr	save_register
-	 mov	#SH_SLEEP_REG_PTEH, r0
-
-	bsr	save_register
-	 mov	#SH_SLEEP_REG_PTEL, r0
-
-	bsr	save_register
-	 mov	#SH_SLEEP_REG_TTB, r0
-
-	bsr	save_register
-	 mov	#SH_SLEEP_REG_TEA, r0
-
-	bsr	save_register
-	 mov	#SH_SLEEP_REG_MMUCR, r0
-
-	bsr	save_register
-	 mov	#SH_SLEEP_REG_PTEA, r0
-
-	bsr	save_register
-	 mov	#SH_SLEEP_REG_PASCR, r0
-
-	bsr	save_register
-	 mov	#SH_SLEEP_REG_IRMCR, r0
-
-	/* invalidate TLBs and disable the MMU */
-	bsr	get_register
-	 mov	#SH_SLEEP_REG_MMUCR, r0
-	mov	#4, r1
-	mov.l	r1, @r0
-	icbi	@r0
-
-	/* save cache registers and disable caches */
-	bsr	save_register
-	 mov	#SH_SLEEP_REG_CCR, r0
-
-	bsr	save_register
-	 mov	#SH_SLEEP_REG_RAMCR, r0
-
-	bsr	get_register
-	 mov	#SH_SLEEP_REG_CCR, r0
-	mov	#0, r1
-	mov.l	r1, @r0
-	icbi	@r0
-
-skip_mmu_save_disable:
-	/* call self-refresh entering code if needed */
-	mov.l	@(SH_SLEEP_MODE, r5), r0
-	tst	#SUSP_SH_SF, r0
-	bt	skip_set_sf
-
-	mov.l	@(SH_SLEEP_SF_PRE, r5), r0
-	jsr	@r0
-	 nop
-
-skip_set_sf:
-	mov.l	@(SH_SLEEP_MODE, r5), r0
-	tst	#SUSP_SH_STANDBY, r0
-	bt	test_rstandby
-
-	/* set mode to "software standby mode" */
-	bra	do_sleep
-	 mov	#0x80, r1
-
-test_rstandby:
-	tst	#SUSP_SH_RSTANDBY, r0
-	bt	test_ustandby
-
-	/* setup BAR register */
-	bsr	get_register
-	 mov	#SH_SLEEP_REG_BAR, r0
-	mov.l	@(SH_SLEEP_RESUME, r5), r1
-	mov.l	r1, @r0
-
-	/* set mode to "r-standby mode" */
-	bra	do_sleep
-	 mov	#0x20, r1
-
-test_ustandby:
-	tst	#SUSP_SH_USTANDBY, r0
-	bt	force_sleep
-
-	/* set mode to "u-standby mode" */
-	bra	do_sleep
-	 mov	#0x10, r1
-
-force_sleep:
-
-	/* set mode to "sleep mode" */
-	mov	#0x00, r1
-
-do_sleep:
-	/* setup and enter selected standby mode */
-	bsr     get_register
-	 mov    #SH_SLEEP_REG_STBCR, r0
-	mov.l	r1, @r0
-again:
-	sleep
-	bra	again
-	 nop
-
-save_register:
-	add	#SH_SLEEP_BASE_ADDR, r0
-	mov.l	@(r0, r5), r1
-	add	#-SH_SLEEP_BASE_ADDR, r0
-	mov.l	@r1, r1
-	add	#SH_SLEEP_BASE_DATA, r0
-	mov.l	r1, @(r0, r5)
-	add	#-SH_SLEEP_BASE_DATA, r0
-	rts
-	 nop
-
-get_register:
-	add	#SH_SLEEP_BASE_ADDR, r0
-	mov.l	@(r0, r5), r0
-	rts
-	 nop
-
-set_sr:
-	stc	sr, r8
-	and	r9, r8
-	or	r10, r8
-	ldc	r8, sr
-	rts
-	 nop
-
-save_low_regs:
-	mov.l	r7, @-r15
-	mov.l	r6, @-r15
-	mov.l	r5, @-r15
-	mov.l	r4, @-r15
-	mov.l	r3, @-r15
-	mov.l	r2, @-r15
-	mov.l	r1, @-r15
-	rts
-	 mov.l	r0, @-r15
-
-	.balign 4
-rb_bit:	.long	0x20000000 ! RB=1
-
-ENTRY(sh_mobile_sleep_enter_end)
-
-	.balign 4
-ENTRY(sh_mobile_sleep_resume_start)
-
-	/* figure out start address */
-	bsr	0f
-	 nop
-0:
-	sts	pr, k1
-	mov.l	1f, k0
-	and	k0, k1
-
-	/* store pointer to data area in VBR */
-	ldc	k1, vbr
-
-	/* setup sr with saved sr */
-	mov.l	@(SH_SLEEP_SR, k1), k0
-	ldc	k0, sr
-
-	/* now: user register set! */
-	stc	vbr, r5
-
-	/* setup spc with return address to c code */
-	mov.l	@(SH_SLEEP_SPC, r5), r0
-	ldc	r0, spc
-
-	/* restore vbr */
-	mov.l	@(SH_SLEEP_VBR, r5), r0
-	ldc	r0, vbr
-
-	/* setup ssr with saved sr */
-	mov.l	@(SH_SLEEP_SR, r5), r0
-	ldc	r0, ssr
-
-	/* restore sp */
-	mov.l   @(SH_SLEEP_SP, r5), r15
-
-	/* restore sleep mode register */
-	bsr     restore_register
-	 mov    #SH_SLEEP_REG_STBCR, r0
-
-	/* call self-refresh resume code if needed */
-	mov.l	@(SH_SLEEP_MODE, r5), r0
-	tst	#SUSP_SH_SF, r0
-	bt	skip_restore_sf
-
-	mov.l	@(SH_SLEEP_SF_POST, r5), r0
-	jsr	@r0
-	 nop
-
-skip_restore_sf:
-	/* restore mmu and cache state if needed */
-	mov.l	@(SH_SLEEP_MODE, r5), r0
-	tst	#SUSP_SH_MMU, r0
-	bt	skip_restore_mmu
-
-	/* restore mmu state */
-	bsr	restore_register
-	 mov	#SH_SLEEP_REG_PTEH, r0
-
-	bsr	restore_register
-	 mov	#SH_SLEEP_REG_PTEL, r0
-
-	bsr	restore_register
-	 mov	#SH_SLEEP_REG_TTB, r0
-
-	bsr	restore_register
-	 mov	#SH_SLEEP_REG_TEA, r0
-
-	bsr	restore_register
-	 mov	#SH_SLEEP_REG_PTEA, r0
-
-	bsr	restore_register
-	 mov	#SH_SLEEP_REG_PASCR, r0
-
-	bsr	restore_register
-	 mov	#SH_SLEEP_REG_IRMCR, r0
-
-	bsr	restore_register
-	 mov	#SH_SLEEP_REG_MMUCR, r0
-	icbi	@r0
-
-	/* restore cache settings */
-	bsr	restore_register
-	 mov	#SH_SLEEP_REG_RAMCR, r0
-	icbi	@r0
-
-	bsr	restore_register
-	 mov	#SH_SLEEP_REG_CCR, r0
-	icbi	@r0
-
-skip_restore_mmu:
-
-	/* restore general purpose registers if needed */
-	mov.l	@(SH_SLEEP_MODE, r5), r0
-	tst	#SUSP_SH_REGS, r0
-	bt	skip_restore_regs
-
-	/* switch to bank 1, restore low registers */
-	mov.l	_rb_bit, r10
-	bsr	_set_sr
-	 mov	#-1, r9
-
-	bsr	restore_low_regs
-	 nop
-
-	/* switch to bank0, restore low registers */
-	mov.l	_rb_bit, r9
-	not	r9, r9
-	bsr	_set_sr
-	 mov	#0, r10
-
-	bsr	restore_low_regs
-	 nop
-
-	/* restore the rest of the registers */
-	mov.l	@r15+, r8
-	mov.l	@r15+, r9
-	mov.l	@r15+, r10
-	mov.l	@r15+, r11
-	mov.l	@r15+, r12
-	mov.l	@r15+, r13
-	mov.l	@r15+, r14
-	lds.l	@r15+, pr
-
-skip_restore_regs:
-	rte
-	 nop
-
-restore_register:
-	add	#SH_SLEEP_BASE_DATA, r0
-	mov.l	@(r0, r5), r1
-	add	#-SH_SLEEP_BASE_DATA, r0
-	add	#SH_SLEEP_BASE_ADDR, r0
-	mov.l	@(r0, r5), r0
-	mov.l	r1, @r0
-	rts
-	 nop
-
-_set_sr:
-	stc	sr, r8
-	and	r9, r8
-	or	r10, r8
-	ldc	r8, sr
-	rts
-	 nop
-
-restore_low_regs:
-	mov.l	@r15+, r0
-	mov.l	@r15+, r1
-	mov.l	@r15+, r2
-	mov.l	@r15+, r3
-	mov.l	@r15+, r4
-	mov.l	@r15+, r5
-	mov.l	@r15+, r6
-	rts
-	 mov.l	@r15+, r7
-
-	.balign 4
-_rb_bit:	.long	0x20000000 ! RB=1
-1:	.long	~0x7ff
-ENTRY(sh_mobile_sleep_resume_end)
diff --git a/arch/sh/kernel/debugtraps.S b/arch/sh/kernel/debugtraps.S
deleted file mode 100644
index ad07527e2a990aec9af2a301254f955587f7fa40..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/debugtraps.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * arch/sh/kernel/debugtraps.S
- *
- * Debug trap jump tables for SuperH
- *
- *  Copyright (C) 2006 - 2008  Paul Mundt
- */
-#include <linux/sys.h>
-#include <linux/linkage.h>
-
-#if !defined(CONFIG_KGDB)
-#define singlestep_trap_handler		debug_trap_handler
-#endif
-
-#if !defined(CONFIG_SH_STANDARD_BIOS)
-#define sh_bios_handler			debug_trap_handler
-#endif
-
-	.data
-
-ENTRY(debug_trap_table)
-	.long debug_trap_handler	/* 0x30 */
-	.long debug_trap_handler	/* 0x31 */
-	.long debug_trap_handler	/* 0x32 */
-	.long debug_trap_handler	/* 0x33 */
-	.long debug_trap_handler	/* 0x34 */
-	.long debug_trap_handler	/* 0x35 */
-	.long debug_trap_handler	/* 0x36 */
-	.long debug_trap_handler	/* 0x37 */
-	.long debug_trap_handler	/* 0x38 */
-	.long debug_trap_handler	/* 0x39 */
-	.long debug_trap_handler	/* 0x3a */
-	.long debug_trap_handler	/* 0x3b */
-	.long breakpoint_trap_handler	/* 0x3c */
-	.long singlestep_trap_handler	/* 0x3d */
-	.long bug_trap_handler		/* 0x3e */
-	.long sh_bios_handler		/* 0x3f */
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
deleted file mode 100644
index 4a8ec9e40cc2ab0669be377221e4b3e682b737ac..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/entry-common.S
+++ /dev/null
@@ -1,398 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- *  Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- *  Copyright (C) 2003 - 2008  Paul Mundt
- */
-
-! NOTE:
-! GNU as (as of 2.9.1) changes bf/s into bt/s and bra, when the address
-! to be jumped is too far, but it causes illegal slot exception.
-
-/*	
- * entry.S contains the system-call and fault low-level handling routines.
- * This also contains the timer-interrupt handler, as well as all interrupts
- * and faults that can result in a task-switch.
- *
- * NOTE: This code handles signal-recognition, which happens every time
- * after a timer-interrupt and after each system call.
- *
- * NOTE: This code uses a convention that instructions in the delay slot
- * of a transfer-control instruction are indented by an extra space, thus:
- *
- *    jmp	@k0	    ! control-transfer instruction
- *     ldc	k1, ssr     ! delay slot
- *
- * Stack layout in 'ret_from_syscall':
- * 	ptrace needs to have all regs on the stack.
- *	if the order here is changed, it needs to be
- *	updated in ptrace.c and ptrace.h
- *
- *	r0
- *      ...
- *	r15 = stack pointer
- *	spc
- *	pr
- *	ssr
- *	gbr
- *	mach
- *	macl
- *	syscall #
- *
- */
-#include <asm/dwarf.h>
-
-#if defined(CONFIG_PREEMPT)
-#  define preempt_stop()	cli ; TRACE_IRQS_OFF
-#else
-#  define preempt_stop()
-#  define resume_kernel		__restore_all
-#endif
-
-
-	.align	2
-ENTRY(exception_error)
-	!
-	TRACE_IRQS_ON
-	sti
-	mov.l	1f, r0
-	jmp	@r0
-	 nop
-
-	.align	2
-1:	.long	do_exception_error
-
-	.align	2
-ret_from_exception:
-	CFI_STARTPROC simple
-	CFI_DEF_CFA r14, 0
-	CFI_REL_OFFSET 17, 64
-	CFI_REL_OFFSET 15, 60
-	CFI_REL_OFFSET 14, 56
-	CFI_REL_OFFSET 13, 52
-	CFI_REL_OFFSET 12, 48
-	CFI_REL_OFFSET 11, 44
-	CFI_REL_OFFSET 10, 40
-	CFI_REL_OFFSET 9, 36
-	CFI_REL_OFFSET 8, 32
-	preempt_stop()
-ENTRY(ret_from_irq)
-	!
-	mov	#OFF_SR, r0
-	mov.l	@(r0,r15), r0	! get status register
-	shll	r0
-	shll	r0		! kernel space?
-	get_current_thread_info r8, r0
-	bt	resume_kernel	! Yes, it's from kernel, go back soon
-
-#ifdef CONFIG_PREEMPT
-	bra	resume_userspace
-	 nop
-ENTRY(resume_kernel)
-	cli
-	TRACE_IRQS_OFF
-	mov.l	@(TI_PRE_COUNT,r8), r0	! current_thread_info->preempt_count
-	tst	r0, r0
-	bf	noresched
-need_resched:
-	mov.l	@(TI_FLAGS,r8), r0	! current_thread_info->flags
-	tst	#_TIF_NEED_RESCHED, r0	! need_resched set?
-	bt	noresched
-
-	mov	#OFF_SR, r0
-	mov.l	@(r0,r15), r0		! get status register
-	shlr	r0
-	and	#(0xf0>>1), r0		! interrupts off (exception path)?
-	cmp/eq	#(0xf0>>1), r0
-	bt	noresched
-	mov.l	1f, r0
-	jsr	@r0			! call preempt_schedule_irq
-	 nop
-	bra	need_resched
-	 nop
-
-noresched:
-	bra	__restore_all
-	 nop
-
-	.align 2
-1:	.long	preempt_schedule_irq
-#endif
-
-ENTRY(resume_userspace)
-	! r8: current_thread_info
-	cli
-	TRACE_IRQS_OFF
-	mov.l	@(TI_FLAGS,r8), r0		! current_thread_info->flags
-	tst	#(_TIF_WORK_MASK & 0xff), r0
-	bt/s	__restore_all
-	 tst	#_TIF_NEED_RESCHED, r0
-
-	.align	2
-work_pending:
-	! r0: current_thread_info->flags
-	! r8: current_thread_info
-	! t:  result of "tst	#_TIF_NEED_RESCHED, r0"
-	bf/s	work_resched
-	 tst	#(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME), r0
-work_notifysig:
-	bt/s	__restore_all
-	 mov	r15, r4
-	mov	r12, r5		! set arg1(save_r0)
-	mov	r0, r6
-	sti
-	mov.l	2f, r1
-	mov.l	3f, r0
-	jmp	@r1
-	 lds	r0, pr
-work_resched:
-	mov.l	1f, r1
-	jsr	@r1				! schedule
-	 nop
-	cli
-	TRACE_IRQS_OFF
-	!
-	mov.l	@(TI_FLAGS,r8), r0		! current_thread_info->flags
-	tst	#(_TIF_WORK_MASK & 0xff), r0
-	bt	__restore_all
-	bra	work_pending
-	 tst	#_TIF_NEED_RESCHED, r0
-
-	.align	2
-1:	.long	schedule
-2:	.long	do_notify_resume
-3:	.long	resume_userspace
-
-	.align	2
-syscall_exit_work:
-	! r0: current_thread_info->flags
-	! r8: current_thread_info
-	tst	#(_TIF_WORK_SYSCALL_MASK & 0xff), r0
-	bt/s	work_pending
-	 tst	#_TIF_NEED_RESCHED, r0
-	TRACE_IRQS_ON
-	sti
-	mov	r15, r4
-	mov.l	8f, r0			! do_syscall_trace_leave
-	jsr	@r0
-	 nop
-	bra	resume_userspace
-	 nop
-
-	.align	2
-syscall_trace_entry:
-	!                     	Yes it is traced.
-	mov     r15, r4
-	mov.l	7f, r11		! Call do_syscall_trace_enter which notifies
-	jsr	@r11	    	! superior (will chomp R[0-7])
-	 nop
-	mov.l	r0, @(OFF_R0,r15)	! Save return value
-	!			Reload R0-R4 from kernel stack, where the
-	!   	    	    	parent may have modified them using
-	!   	    	    	ptrace(POKEUSR).  (Note that R0-R2 are
-	!   	    	    	reloaded from the kernel stack by syscall_call
-	!   	    	    	below, so don't need to be reloaded here.)
-	!   	    	    	This allows the parent to rewrite system calls
-	!   	    	    	and args on the fly.
-	mov.l	@(OFF_R4,r15), r4   ! arg0
-	mov.l	@(OFF_R5,r15), r5
-	mov.l	@(OFF_R6,r15), r6
-	mov.l	@(OFF_R7,r15), r7   ! arg3
-	mov.l	@(OFF_R3,r15), r3   ! syscall_nr
-	!
-	mov.l	6f, r10			! Number of syscalls
-	cmp/hs	r10, r3
-	bf	syscall_call
-	mov	#-ENOSYS, r0
-	bra	syscall_exit
-	 mov.l	r0, @(OFF_R0,r15)	! Return value
-
-__restore_all:
-	mov	#OFF_SR, r0
-	mov.l	@(r0,r15), r0	! get status register
-
-	shlr2	r0
-	and	#0x3c, r0
-	cmp/eq	#0x3c, r0
-	bt	1f
-	TRACE_IRQS_ON
-	bra	2f
-	 nop
-1:
-	TRACE_IRQS_OFF
-2:
-	mov.l	3f, r0
-	jmp	@r0
-	 nop
-
-	.align	2
-3:	.long	restore_all
-
-	.align	2
-syscall_badsys:			! Bad syscall number
-	get_current_thread_info r8, r0
-	mov	#-ENOSYS, r0
-	bra	resume_userspace
-	 mov.l	r0, @(OFF_R0,r15)	! Return value
-
-/*
- * The main debug trap handler.
- *
- * r8=TRA (not the trap number!)
- *
- * Note: This assumes that the trapa value is left in its original
- * form (without the shlr2 shift) so the calculation for the jump
- * call table offset remains a simple in place mask.
- */
-debug_trap:
-	mov	r8, r0
-	and	#(0xf << 2), r0
-	mov.l	1f, r8
-	add	r0, r8
-	mov.l	@r8, r8
-	jsr	@r8
-	 nop
-	bra	ret_from_exception
-	 nop
-	CFI_ENDPROC
-
-	.align	2
-1:	.long	debug_trap_table
-
-/*
- * Syscall interface:
- *
- *	Syscall #: R3
- *	Arguments #0 to #3: R4--R7
- *	Arguments #4 to #6: R0, R1, R2
- *	TRA: See following table.
- *
- * (TRA>>2)	Purpose
- * --------	-------
- * 0x00-0x0f	original SH-3/4 syscall ABI (not in general use).
- * 0x10-0x1f	general SH-3/4 syscall ABI.
- *      0x1f	unified SH-2/3/4 syscall ABI (preferred).
- * 0x20-0x2f	original SH-2 syscall ABI.
- * 0x30-0x3f	debug traps used by the kernel.
- * 0x40-0xff	Not supported by all parts, so left unhandled.
- *
- * For making system calls, any trap number in the range for the
- * given cpu model may be used, but the unified trap number 0x1f is
- * preferred for compatibility with all models.
- *
- * The low bits of the trap number were once documented as matching
- * the number of arguments, but they were never actually used as such
- * by the kernel. SH-2 originally used its own separate trap range
- * because several hardware exceptions fell in the range used for the
- * SH-3/4 syscall ABI.
- *
- * This code also handles delegating other traps to the BIOS/gdb stub.
- *
- * Note: When we're first called, the TRA value must be shifted
- * right 2 bits in order to get the value that was used as the "trapa"
- * argument.
- */
-
-	.align	2
-	.globl	ret_from_fork
-ret_from_fork:
-	mov.l	1f, r8
-	jsr	@r8
-	 mov	r0, r4
-	bra	syscall_exit
-	 nop
-
-	.align	2
-	.globl	ret_from_kernel_thread
-ret_from_kernel_thread:
-	mov.l	1f, r8
-	jsr	@r8
-	 mov	r0, r4
-	mov.l	@(OFF_R5,r15), r5   ! fn
-	jsr	@r5
-	 mov.l	@(OFF_R4,r15), r4   ! arg
-	bra	syscall_exit
-	 nop
-
-	.align	2
-1:	.long	schedule_tail
-
-/*
- * The poorly named main trapa decode and dispatch routine, for
- * system calls and debug traps through their respective jump tables.
- */
-ENTRY(system_call)
-	setup_frame_reg
-#if !defined(CONFIG_CPU_SH2)
-	mov.l	1f, r9
-	mov.l	@r9, r8		! Read from TRA (Trap Address) Register
-#endif
-
-	mov	#OFF_TRA, r10
-	add	r15, r10
-	mov.l	r8, @r10		! set TRA value to tra
-
-	/*
-	 * Check the trap type
-	 */
-	mov	#((0x20 << 2) - 1), r9
-	cmp/hi	r9, r8
-	bt/s	debug_trap		! it's a debug trap..
-	 nop
-
-	TRACE_IRQS_ON
-	sti
-
-	!
-	get_current_thread_info r8, r10
-	mov.l	@(TI_FLAGS,r8), r8
-	mov	#(_TIF_WORK_SYSCALL_MASK & 0xff), r10
-	mov	#(_TIF_WORK_SYSCALL_MASK >> 8), r9
-	tst	r10, r8
-	shll8	r9
-	bf	syscall_trace_entry
-	tst	r9, r8
-	bf	syscall_trace_entry
-	!
-	mov.l	6f, r8			! Number of syscalls
-	cmp/hs	r8, r3
-	bt	syscall_badsys
-	!
-syscall_call:
-	shll2	r3		! x4
-	mov.l	3f, r8		! Load the address of sys_call_table
-	add	r8, r3
-	mov.l	@r3, r8
-	mov.l	@(OFF_R2,r15), r2
-	mov.l	@(OFF_R1,r15), r1
-	mov.l	@(OFF_R0,r15), r0
-	mov.l	r2, @-r15
-	mov.l	r1, @-r15
-	mov.l	r0, @-r15
-	jsr	@r8	    	! jump to specific syscall handler
-	 nop
-	add	#12, r15
-	mov.l	@(OFF_R0,r15), r12		! save r0
-	mov.l	r0, @(OFF_R0,r15)		! save the return value
-	!
-syscall_exit:
-	cli
-	TRACE_IRQS_OFF
-	!
-	get_current_thread_info r8, r0
-	mov.l	@(TI_FLAGS,r8), r0		! current_thread_info->flags
-	tst	#(_TIF_ALLWORK_MASK & 0xff), r0
-	mov	#(_TIF_ALLWORK_MASK >> 8), r1
-	bf	syscall_exit_work
-	shlr8	r0
-	tst	r0, r1
-	bf	syscall_exit_work
-	bra	__restore_all
-	 nop
-	.align	2
-#if !defined(CONFIG_CPU_SH2)
-1:	.long	TRA
-#endif
-6:	.long	NR_syscalls
-3:	.long	sys_call_table
-7:	.long	do_syscall_trace_enter
-8:	.long	do_syscall_trace_leave
diff --git a/arch/sh/kernel/head_32.S b/arch/sh/kernel/head_32.S
deleted file mode 100644
index 4adbd4ade319460674ca1d7a56053c825b883ea3..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/head_32.S
+++ /dev/null
@@ -1,365 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- * $Id: head.S,v 1.7 2003/09/01 17:58:19 lethal Exp $
- *
- *  arch/sh/kernel/head.S
- *
- *  Copyright (C) 1999, 2000  Niibe Yutaka & Kaz Kojima
- *  Copyright (C) 2010  Matt Fleming
- *
- * Head.S contains the SH exception handlers and startup code.
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <asm/mmu.h>
-#include <cpu/mmu_context.h>
-
-#ifdef CONFIG_CPU_SH4A
-#define SYNCO()		synco
-
-#define PREFI(label, reg)	\
-	mov.l	label, reg;	\
-	prefi	@reg
-#else
-#define SYNCO()
-#define PREFI(label, reg)
-#endif
-
-	.section	.empty_zero_page, "aw"
-ENTRY(empty_zero_page)
-	.long	1		/* MOUNT_ROOT_RDONLY */
-	.long	0		/* RAMDISK_FLAGS */
-	.long	0x0200		/* ORIG_ROOT_DEV */
-	.long	1		/* LOADER_TYPE */
-	.long	0x00000000	/* INITRD_START */
-	.long	0x00000000	/* INITRD_SIZE */
-#ifdef CONFIG_32BIT
-	.long	0x53453f00 + 32	/* "SE?" = 32 bit */
-#else
-	.long	0x53453f00 + 29	/* "SE?" = 29 bit */
-#endif
-1:
-	.skip	PAGE_SIZE - empty_zero_page - 1b
-
-	__HEAD
-
-/*
- * Condition at the entry of _stext:
- *
- *   BSC has already been initialized.
- *   INTC may or may not be initialized.
- *   VBR may or may not be initialized.
- *   MMU may or may not be initialized.
- *   Cache may or may not be initialized.
- *   Hardware (including on-chip modules) may or may not be initialized. 
- *
- */
-ENTRY(_stext)
-	!			Initialize Status Register
-	mov.l	1f, r0		! MD=1, RB=0, BL=0, IMASK=0xF
-	ldc	r0, sr
-	!			Initialize global interrupt mask
-#ifdef CONFIG_CPU_HAS_SR_RB
-	mov	#0, r0
-	ldc	r0, r6_bank
-#endif
-
-#ifdef CONFIG_OF_FLATTREE
-	mov	r4, r12		! Store device tree blob pointer in r12
-#endif
-	
-	/*
-	 * Prefetch if possible to reduce cache miss penalty.
-	 *
-	 * We do this early on for SH-4A as a micro-optimization,
-	 * as later on we will have speculative execution enabled
-	 * and this will become less of an issue.
-	 */
-	PREFI(5f, r0)
-	PREFI(6f, r0)
-
-	!
-	mov.l	2f, r0
-	mov	r0, r15		! Set initial r15 (stack pointer)
-#ifdef CONFIG_CPU_HAS_SR_RB
-	mov.l	7f, r0
-	ldc	r0, r7_bank	! ... and initial thread_info
-#endif
-
-#ifdef CONFIG_PMB
-/*
- * Reconfigure the initial PMB mappings setup by the hardware.
- *
- * When we boot in 32-bit MMU mode there are 2 PMB entries already
- * setup for us.
- *
- * Entry       VPN	   PPN	    V	SZ	C	UB	WT
- * ---------------------------------------------------------------
- *   0	    0x80000000 0x00000000   1  512MB	1	0	1
- *   1	    0xA0000000 0x00000000   1  512MB	0	0	0
- *
- * But we reprogram them here because we want complete control over
- * our address space and the initial mappings may not map PAGE_OFFSET
- * to __MEMORY_START (or even map all of our RAM).
- *
- * Once we've setup cached and uncached mappings we clear the rest of the
- * PMB entries. This clearing also deals with the fact that PMB entries
- * can persist across reboots. The PMB could have been left in any state
- * when the reboot occurred, so to be safe we clear all entries and start
- * with with a clean slate.
- *
- * The uncached mapping is constructed using the smallest possible
- * mapping with a single unbufferable page. Only the kernel text needs to
- * be covered via the uncached mapping so that certain functions can be
- * run uncached.
- *
- * Drivers and the like that have previously abused the 1:1 identity
- * mapping are unsupported in 32-bit mode and must specify their caching
- * preference when page tables are constructed.
- *
- * This frees up the P2 space for more nefarious purposes.
- *
- * Register utilization is as follows:
- *
- *	r0 = PMB_DATA data field
- *	r1 = PMB_DATA address field
- *	r2 = PMB_ADDR data field
- *	r3 = PMB_ADDR address field
- *	r4 = PMB_E_SHIFT
- *	r5 = remaining amount of RAM to map
- *	r6 = PMB mapping size we're trying to use
- *	r7 = cached_to_uncached
- *	r8 = scratch register
- *	r9 = scratch register
- *	r10 = number of PMB entries we've setup
- *	r11 = scratch register
- */
-
-	mov.l	.LMMUCR, r1	/* Flush the TLB */
-	mov.l	@r1, r0
-	or	#MMUCR_TI, r0
-	mov.l	r0, @r1
-
-	mov.l	.LMEMORY_SIZE, r5
-
-	mov	#PMB_E_SHIFT, r0
-	mov	#0x1, r4
-	shld	r0, r4
-
-	mov.l	.LFIRST_DATA_ENTRY, r0
-	mov.l	.LPMB_DATA, r1
-	mov.l	.LFIRST_ADDR_ENTRY, r2
-	mov.l	.LPMB_ADDR, r3
-
-	/*
-	 * First we need to walk the PMB and figure out if there are any
-	 * existing mappings that match the initial mappings VPN/PPN.
-	 * If these have already been established by the bootloader, we
-	 * don't bother setting up new entries here, and let the late PMB
-	 * initialization take care of things instead.
-	 *
-	 * Note that we may need to coalesce and merge entries in order
-	 * to reclaim more available PMB slots, which is much more than
-	 * we want to do at this early stage.
-	 */
-	mov	#0, r10
-	mov	#NR_PMB_ENTRIES, r9
-
-	mov	r1, r7		/* temporary PMB_DATA iter */
-
-.Lvalidate_existing_mappings:
-
-	mov.l	.LPMB_DATA_MASK, r11
-	mov.l	@r7, r8
-	and	r11, r8
-	cmp/eq	r0, r8		/* Check for valid __MEMORY_START mappings */
-	bt	.Lpmb_done
-
-	add	#1, r10		/* Increment the loop counter */
-	cmp/eq	r9, r10
-	bf/s	.Lvalidate_existing_mappings
-	 add	r4, r7		/* Increment to the next PMB_DATA entry */
-
-	/*
-	 * If we've fallen through, continue with setting up the initial
-	 * mappings.
-	 */
-
-	mov	r5, r7		/* cached_to_uncached */
-	mov	#0, r10
-
-#ifdef CONFIG_UNCACHED_MAPPING
-	/*
-	 * Uncached mapping
-	 */
-	mov	#(PMB_SZ_16M >> 2), r9
-	shll2	r9
-
-	mov	#(PMB_UB >> 8), r8
-	shll8	r8
-
-	or	r0, r8
-	or	r9, r8
-	mov.l	r8, @r1
-	mov	r2, r8
-	add	r7, r8
-	mov.l	r8, @r3
-
-	add	r4, r1
-	add	r4, r3
-	add	#1, r10
-#endif
-
-/*
- * Iterate over all of the available sizes from largest to
- * smallest for constructing the cached mapping.
- */
-#define __PMB_ITER_BY_SIZE(size)			\
-.L##size:						\
-	mov	#(size >> 4), r6;			\
-	shll16	r6;					\
-	shll8	r6;					\
-							\
-	cmp/hi	r5, r6;					\
-	bt	9999f;					\
-							\
-	mov	#(PMB_SZ_##size##M >> 2), r9;		\
-	shll2	r9;					\
-							\
-	/*						\
-	 * Cached mapping				\
-	 */						\
-	mov	#PMB_C, r8;				\
-	or	r0, r8;					\
-	or	r9, r8;					\
-	mov.l	r8, @r1;				\
-	mov.l	r2, @r3;				\
-							\
-	/* Increment to the next PMB_DATA entry */	\
-	add	r4, r1;					\
-	/* Increment to the next PMB_ADDR entry */	\
-	add	r4, r3;					\
-	/* Increment number of PMB entries */		\
-	add	#1, r10;				\
-							\
-	sub	r6, r5;					\
-	add	r6, r0;					\
-	add	r6, r2;					\
-							\
-	bra	.L##size;				\
-9999:
-
-	__PMB_ITER_BY_SIZE(512)
-	__PMB_ITER_BY_SIZE(128)
-	__PMB_ITER_BY_SIZE(64)
-	__PMB_ITER_BY_SIZE(16)
-
-#ifdef CONFIG_UNCACHED_MAPPING
-	/*
-	 * Now that we can access it, update cached_to_uncached and
-	 * uncached_size.
-	 */
-	mov.l	.Lcached_to_uncached, r0
-	mov.l	r7, @r0
-
-	mov.l	.Luncached_size, r0
-	mov	#1, r7
-	shll16	r7
-	shll8	r7
-	mov.l	r7, @r0
-#endif
-
-	/*
-	 * Clear the remaining PMB entries.
-	 *
-	 * r3 = entry to begin clearing from
-	 * r10 = number of entries we've setup so far
-	 */
-	mov	#0, r1
-	mov	#NR_PMB_ENTRIES, r0
-
-.Lagain:
-	mov.l	r1, @r3		/* Clear PMB_ADDR entry */
-	add	#1, r10		/* Increment the loop counter */
-	cmp/eq	r0, r10
-	bf/s	.Lagain
-	 add	r4, r3		/* Increment to the next PMB_ADDR entry */
-
-	mov.l	6f, r0
-	icbi	@r0
-
-.Lpmb_done:
-#endif /* CONFIG_PMB */
-
-#ifndef CONFIG_SH_NO_BSS_INIT
-	/*
-	 * Don't clear BSS if running on slow platforms such as an RTL simulation,
-	 * remote memory via SHdebug link, etc.  For these the memory can be guaranteed
-	 * to be all zero on boot anyway.
-	 */
-				! Clear BSS area
-#ifdef CONFIG_SMP	
-	mov.l	3f, r0
-	cmp/eq	#0, r0		! skip clear if set to zero
-	bt	10f
-#endif
-	
-	mov.l	3f, r1
-	add	#4, r1
-	mov.l	4f, r2
-	mov	#0, r0
-9:	cmp/hs	r2, r1
-	bf/s	9b		! while (r1 < r2)
-	 mov.l	r0,@-r2
-
-10:		
-#endif
-
-#ifdef CONFIG_OF_FLATTREE
-	mov.l	8f, r0		! Make flat device tree available early.
-	jsr	@r0
-	 mov	r12, r4
-#endif
-
-	!			Additional CPU initialization
-	mov.l	6f, r0
-	jsr	@r0
-	 nop
-
-	SYNCO()			! Wait for pending instructions..
-	
-	!			Start kernel
-	mov.l	5f, r0
-	jmp	@r0
-	 nop
-
-	.balign 4
-#if defined(CONFIG_CPU_SH2)
-1:	.long	0x000000F0		! IMASK=0xF
-#else
-1:	.long	0x500080F0		! MD=1, RB=0, BL=1, FD=1, IMASK=0xF
-#endif
-ENTRY(stack_start)
-2:	.long	init_thread_union+THREAD_SIZE
-3:	.long	__bss_start
-4:	.long	_end
-5:	.long	start_kernel
-6:	.long	cpu_init
-7:	.long	init_thread_union
-#if defined(CONFIG_OF_FLATTREE)
-8:	.long	sh_fdt_init
-#endif
-
-#ifdef CONFIG_PMB
-.LPMB_ADDR:		.long	PMB_ADDR
-.LPMB_DATA:		.long	PMB_DATA
-.LPMB_DATA_MASK:	.long	PMB_PFN_MASK | PMB_V
-.LFIRST_ADDR_ENTRY:	.long	PAGE_OFFSET | PMB_V
-.LFIRST_DATA_ENTRY:	.long	__MEMORY_START | PMB_V
-.LMMUCR:		.long	MMUCR
-.LMEMORY_SIZE:		.long	__MEMORY_SIZE
-#ifdef CONFIG_UNCACHED_MAPPING
-.Lcached_to_uncached:	.long	cached_to_uncached
-.Luncached_size:	.long	uncached_size
-#endif
-#endif
diff --git a/arch/sh/kernel/head_64.S b/arch/sh/kernel/head_64.S
deleted file mode 100644
index 67685e1f00e1d935bf9d27b328fb3d16833f5f73..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/head_64.S
+++ /dev/null
@@ -1,346 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * arch/sh/kernel/head_64.S
- *
- * Copyright (C) 2000, 2001  Paolo Alberelli
- * Copyright (C) 2003, 2004  Paul Mundt
- */
-
-#include <linux/init.h>
-
-#include <asm/page.h>
-#include <asm/cache.h>
-#include <asm/tlb.h>
-#include <cpu/registers.h>
-#include <cpu/mmu_context.h>
-#include <asm/thread_info.h>
-
-/*
- * MMU defines: TLB boundaries.
- */
-
-#define MMUIR_FIRST	ITLB_FIXED
-#define MMUIR_END	ITLB_LAST_VAR_UNRESTRICTED+TLB_STEP
-#define MMUIR_STEP	TLB_STEP
-
-#define MMUDR_FIRST	DTLB_FIXED
-#define MMUDR_END	DTLB_LAST_VAR_UNRESTRICTED+TLB_STEP
-#define MMUDR_STEP	TLB_STEP
-
-/* Safety check : CONFIG_PAGE_OFFSET has to be a multiple of 512Mb */
-#if (CONFIG_PAGE_OFFSET & ((1UL<<29)-1))
-#error "CONFIG_PAGE_OFFSET must be a multiple of 512Mb"
-#endif
-
-/*
- * MMU defines: Fixed TLBs.
- */
-/* Deal safely with the case where the base of RAM is not 512Mb aligned */
-
-#define ALIGN_512M_MASK (0xffffffffe0000000)
-#define ALIGNED_EFFECTIVE ((CONFIG_PAGE_OFFSET + CONFIG_MEMORY_START) & ALIGN_512M_MASK)
-#define ALIGNED_PHYSICAL (CONFIG_MEMORY_START & ALIGN_512M_MASK)
-
-#define MMUIR_TEXT_H	(0x0000000000000003 | ALIGNED_EFFECTIVE)
-			/* Enabled, Shared, ASID 0, Eff. Add. 0xA0000000 */
-
-#define MMUIR_TEXT_L	(0x000000000000009a | ALIGNED_PHYSICAL)
-			/* 512 Mb, Cacheable, Write-back, execute, Not User, Ph. Add. */
-
-#define MMUDR_CACHED_H	0x0000000000000003 | ALIGNED_EFFECTIVE
-			/* Enabled, Shared, ASID 0, Eff. Add. 0xA0000000 */
-#define MMUDR_CACHED_L	0x000000000000015a | ALIGNED_PHYSICAL
-			/* 512 Mb, Cacheable, Write-back, read/write, Not User, Ph. Add. */
-
-#ifdef CONFIG_CACHE_OFF
-#define	ICCR0_INIT_VAL	ICCR0_OFF			/* ICACHE off */
-#else
-#define	ICCR0_INIT_VAL	ICCR0_ON | ICCR0_ICI		/* ICE + ICI */
-#endif
-#define	ICCR1_INIT_VAL	ICCR1_NOLOCK			/* No locking */
-
-#if defined (CONFIG_CACHE_OFF)
-#define	OCCR0_INIT_VAL	OCCR0_OFF			   /* D-cache: off  */
-#elif defined (CONFIG_CACHE_WRITETHROUGH)
-#define	OCCR0_INIT_VAL	OCCR0_ON | OCCR0_OCI | OCCR0_WT	   /* D-cache: on,   */
-							   /* WT, invalidate */
-#elif defined (CONFIG_CACHE_WRITEBACK)
-#define	OCCR0_INIT_VAL	OCCR0_ON | OCCR0_OCI | OCCR0_WB	   /* D-cache: on,   */
-							   /* WB, invalidate */
-#else
-#error preprocessor flag CONFIG_CACHE_... not recognized!
-#endif
-
-#define	OCCR1_INIT_VAL	OCCR1_NOLOCK			   /* No locking     */
-
-	.section	.empty_zero_page, "aw"
-	.global empty_zero_page
-
-empty_zero_page:
-	.long	1		/* MOUNT_ROOT_RDONLY */
-	.long	0		/* RAMDISK_FLAGS */
-	.long	0x0200		/* ORIG_ROOT_DEV */
-	.long	1		/* LOADER_TYPE */
-	.long	0x00800000	/* INITRD_START */
-	.long	0x00800000	/* INITRD_SIZE */
-	.long	0
-
-	.text
-	.balign 4096,0,4096
-
-	.section	.data, "aw"
-	.balign	PAGE_SIZE
-
-	.section	.data, "aw"
-	.balign	PAGE_SIZE
-
-	.global mmu_pdtp_cache
-mmu_pdtp_cache:
-	.space PAGE_SIZE, 0
-
-	.global	fpu_in_use
-fpu_in_use:	.quad	0
-
-
-	__HEAD
-	.balign L1_CACHE_BYTES
-/*
- * Condition at the entry of __stext:
- * . Reset state:
- *   . SR.FD    = 1		(FPU disabled)
- *   . SR.BL    = 1		(Exceptions disabled)
- *   . SR.MD    = 1		(Privileged Mode)
- *   . SR.MMU   = 0		(MMU Disabled)
- *   . SR.CD    = 0		(CTC User Visible)
- *   . SR.IMASK = Undefined	(Interrupt Mask)
- *
- * Operations supposed to be performed by __stext:
- * . prevent speculative fetch onto device memory while MMU is off
- * . reflect as much as possible SH5 ABI (r15, r26, r27, r18)
- * . first, save CPU state and set it to something harmless
- * . any CPU detection and/or endianness settings (?)
- * . initialize EMI/LMI (but not TMU/RTC/INTC/SCIF): TBD
- * . set initial TLB entries for cached and uncached regions
- *   (no fine granularity paging)
- * . set initial cache state
- * . enable MMU and caches
- * . set CPU to a consistent state
- *   . registers (including stack pointer and current/KCR0)
- *   . NOT expecting to set Exception handling nor VBR/RESVEC/DCR
- *     at this stage. This is all to later Linux initialization steps.
- *   . initialize FPU
- * . clear BSS
- * . jump into start_kernel()
- * . be prepared to hopeless start_kernel() returns.
- *
- */
-	.global _stext
-_stext:
-	/*
-	 * Prevent speculative fetch on device memory due to
-	 * uninitialized target registers.
-	 */
-	ptabs/u	ZERO, tr0
-	ptabs/u	ZERO, tr1
-	ptabs/u	ZERO, tr2
-	ptabs/u	ZERO, tr3
-	ptabs/u	ZERO, tr4
-	ptabs/u	ZERO, tr5
-	ptabs/u	ZERO, tr6
-	ptabs/u	ZERO, tr7
-	synci
-
-	/*
-	 * Read/Set CPU state. After this block:
-	 * r29 = Initial SR
-	 */
-	getcon	SR, r29
-	movi	SR_HARMLESS, r20
-	putcon	r20, SR
-
-	/*
-	 * Initialize EMI/LMI. To Be Done.
-	 */
-
-	/*
-	 * CPU detection and/or endianness settings (?). To Be Done.
-	 * Pure PIC code here, please ! Just save state into r30.
-         * After this block:
-	 * r30 = CPU type/Platform Endianness
-	 */
-
-	/*
-	 * Set initial TLB entries for cached and uncached regions.
-	 * Note: PTA/BLINK is PIC code, PTABS/BLINK isn't !
-	 */
-	/* Clear ITLBs */
-	pta	clear_ITLB, tr1
-	movi	MMUIR_FIRST, r21
-	movi	MMUIR_END, r22
-clear_ITLB:
-	putcfg	r21, 0, ZERO		/* Clear MMUIR[n].PTEH.V */
-	addi	r21, MMUIR_STEP, r21
-        bne	r21, r22, tr1
-
-	/* Clear DTLBs */
-	pta	clear_DTLB, tr1
-	movi	MMUDR_FIRST, r21
-	movi	MMUDR_END, r22
-clear_DTLB:
-	putcfg	r21, 0, ZERO		/* Clear MMUDR[n].PTEH.V */
-	addi	r21, MMUDR_STEP, r21
-        bne	r21, r22, tr1
-
-	/* Map one big (512Mb) page for ITLB */
-	movi	MMUIR_FIRST, r21
-	movi	MMUIR_TEXT_L, r22	/* PTEL first */
-	add.l	r22, r63, r22		/* Sign extend */
-	putcfg	r21, 1, r22		/* Set MMUIR[0].PTEL */
-	movi	MMUIR_TEXT_H, r22	/* PTEH last */
-	add.l	r22, r63, r22		/* Sign extend */
-	putcfg	r21, 0, r22		/* Set MMUIR[0].PTEH */
-
-	/* Map one big CACHED (512Mb) page for DTLB */
-	movi	MMUDR_FIRST, r21
-	movi	MMUDR_CACHED_L, r22	/* PTEL first */
-	add.l	r22, r63, r22		/* Sign extend */
-	putcfg	r21, 1, r22		/* Set MMUDR[0].PTEL */
-	movi	MMUDR_CACHED_H, r22	/* PTEH last */
-	add.l	r22, r63, r22		/* Sign extend */
-	putcfg	r21, 0, r22		/* Set MMUDR[0].PTEH */
-
-	/*
-	 * Setup a DTLB translation for SCIF phys.
-	 */
-	addi    r21, MMUDR_STEP, r21
-	movi    0x0a03, r22	/* SCIF phys */
-	shori   0x0148, r22
-	putcfg  r21, 1, r22	/* PTEL first */
-	movi    0xfa03, r22	/* 0xfa030000, fixed SCIF virt */
-	shori   0x0003, r22
-	putcfg  r21, 0, r22	/* PTEH last */
-
-	/*
-	 * Set cache behaviours.
-	 */
-	/* ICache */
-	movi	ICCR_BASE, r21
-	movi	ICCR0_INIT_VAL, r22
-	movi	ICCR1_INIT_VAL, r23
-	putcfg	r21, ICCR_REG0, r22
-	putcfg	r21, ICCR_REG1, r23
-
-	/* OCache */
-	movi	OCCR_BASE, r21
-	movi	OCCR0_INIT_VAL, r22
-	movi	OCCR1_INIT_VAL, r23
-	putcfg	r21, OCCR_REG0, r22
-	putcfg	r21, OCCR_REG1, r23
-
-
-	/*
-	 * Enable Caches and MMU. Do the first non-PIC jump.
-         * Now head.S global variables, constants and externs
-	 * can be used.
-	 */
-	getcon	SR, r21
-	movi	SR_ENABLE_MMU, r22
-	or	r21, r22, r21
-	putcon	r21, SSR
-	movi	hyperspace, r22
-	ori	r22, 1, r22	    /* Make it SHmedia, not required but..*/
-	putcon	r22, SPC
-	synco
-	rte			    /* And now go into the hyperspace ... */
-hyperspace:			    /* ... that's the next instruction !  */
-
-	/*
-	 * Set CPU to a consistent state.
-	 * r31 = FPU support flag
-	 * tr0/tr7 in use. Others give a chance to loop somewhere safe
-	 */
-	movi	start_kernel, r32
-	ori	r32, 1, r32
-
-	ptabs	r32, tr0		    /* r32 = _start_kernel address        */
-	pta/u	hopeless, tr1
-	pta/u	hopeless, tr2
-	pta/u	hopeless, tr3
-	pta/u	hopeless, tr4
-	pta/u	hopeless, tr5
-	pta/u	hopeless, tr6
-	pta/u	hopeless, tr7
-	gettr	tr1, r28			/* r28 = hopeless address */
-
-	/* Set initial stack pointer */
-	movi	init_thread_union, SP
-	putcon	SP, KCR0		/* Set current to init_task */
-	movi	THREAD_SIZE, r22	/* Point to the end */
-	add	SP, r22, SP
-
-	/*
-	 * Initialize FPU.
-	 * Keep FPU flag in r31. After this block:
-	 * r31 = FPU flag
-	 */
-	movi fpu_in_use, r31	/* Temporary */
-
-#ifdef CONFIG_SH_FPU
-	getcon	SR, r21
-	movi	SR_ENABLE_FPU, r22
-	and	r21, r22, r22
-	putcon	r22, SR			/* Try to enable */
-	getcon	SR, r22
-	xor	r21, r22, r21
-	shlri	r21, 15, r21		/* Supposedly 0/1 */
-	st.q	r31, 0 , r21		/* Set fpu_in_use */
-#else
-	movi	0, r21
-	st.q	r31, 0 , r21		/* Set fpu_in_use */
-#endif
-	or	r21, ZERO, r31		/* Set FPU flag at last */
-
-#ifndef CONFIG_SH_NO_BSS_INIT
-/* Don't clear BSS if running on slow platforms such as an RTL simulation,
-   remote memory via SHdebug link, etc.  For these the memory can be guaranteed
-   to be all zero on boot anyway. */
-	/*
-	 * Clear bss
-	 */
-	pta	clear_quad, tr1
-	movi	__bss_start, r22
-	movi	_end, r23
-clear_quad:
-	st.q	r22, 0, ZERO
-	addi	r22, 8, r22
-	bne	r22, r23, tr1		/* Both quad aligned, see vmlinux.lds.S */
-#endif
-	pta/u	hopeless, tr1
-
-	/* Say bye to head.S but be prepared to wrongly get back ... */
-	blink	tr0, LINK
-
-	/* If we ever get back here through LINK/tr1-tr7 */
-	pta/u	hopeless, tr7
-
-hopeless:
-	/*
-	 * Something's badly wrong here. Loop endlessly,
-         * there's nothing more we can do about it.
-	 *
-	 * Note on hopeless: it can be jumped into invariably
-	 * before or after jumping into hyperspace. The only
-	 * requirement is to be PIC called (PTA) before and
-	 * any way (PTA/PTABS) after. According to Virtual
-	 * to Physical mapping a simulator/emulator can easily
-	 * tell where we came here from just looking at hopeless
-	 * (PC) address.
-	 *
-	 * For debugging purposes:
-	 * (r28) hopeless/loop address
-	 * (r29) Original SR
-	 * (r30) CPU type/Platform endianness
-	 * (r31) FPU Support
-	 * (r32) _start_kernel address
-	 */
-	blink	tr7, ZERO
diff --git a/arch/sh/kernel/relocate_kernel.S b/arch/sh/kernel/relocate_kernel.S
deleted file mode 100644
index d9bf2b727b429ed26a78ff98d411a34629ff9f01..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/relocate_kernel.S
+++ /dev/null
@@ -1,230 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * relocate_kernel.S - put the kernel image in place to boot
- * 2005.9.17 kogiidena@eggplant.ddo.jp
- *
- * LANDISK/sh4 is supported. Maybe, SH archtecture works well.
- *
- * 2009-03-18 Magnus Damm - Added Kexec Jump support
- */
-#include <linux/linkage.h>
-#include <asm/addrspace.h>
-#include <asm/page.h>
-
-		.globl relocate_new_kernel
-relocate_new_kernel:
-	/* r4 = indirection_page   */
-	/* r5 = reboot_code_buffer */
-	/* r6 = start_address      */
-
-	mov.l	10f, r0		/* PAGE_SIZE */
-	add	r5, r0		/* setup new stack at end of control page */
-
-	/* save r15->r8 to new stack */
-	mov.l	r15, @-r0
-	mov	r0, r15
-	mov.l	r14, @-r15
-	mov.l	r13, @-r15
-	mov.l	r12, @-r15
-	mov.l	r11, @-r15
-	mov.l	r10, @-r15
-	mov.l	r9, @-r15
-	mov.l	r8, @-r15
-
-	/* save other random registers */
-	sts.l	macl, @-r15
-	sts.l	mach, @-r15
-	stc.l	gbr, @-r15
-	stc.l	ssr, @-r15
-	stc.l	sr, @-r15
-	sts.l	pr, @-r15
-	stc.l	spc, @-r15
-
-	/* switch to bank1 and save r7->r0 */
-	mov.l	12f, r9
-	stc	sr, r8
-	or	r9, r8
-	ldc	r8, sr
-	mov.l	r7, @-r15
-	mov.l	r6, @-r15
-	mov.l	r5, @-r15
-	mov.l	r4, @-r15
-	mov.l	r3, @-r15
-	mov.l	r2, @-r15
-	mov.l	r1, @-r15
-	mov.l	r0, @-r15
-
-	/* switch to bank0 and save r7->r0 */
-	mov.l	12f, r9
-	not	r9, r9
-	stc	sr, r8
-	and	r9, r8
-	ldc	r8, sr
-	mov.l	r7, @-r15
-	mov.l	r6, @-r15
-	mov.l	r5, @-r15
-	mov.l	r4, @-r15
-	mov.l	r3, @-r15
-	mov.l	r2, @-r15
-	mov.l	r1, @-r15
-	mov.l	r0, @-r15
-
-	mov.l	r4, @-r15	/* save indirection page again */
-
-	bsr	swap_pages	/* swap pages before jumping to new kernel */
-	 nop
-
-	mova	11f, r0
-	mov.l	r15, @r0	/* save pointer to stack */
-
-	jsr	@r6		/* hand over control to new kernel */
-	 nop
-
-	mov.l	11f, r15	/* get pointer to stack */
-	mov.l	@r15+, r4	/* restore r4 to get indirection page */
-
-	bsr	swap_pages	/* swap pages back to previous state */
-	 nop
-
-	/* make sure bank0 is active and restore r0->r7 */
-	mov.l	12f, r9
-	not	r9, r9
-	stc	sr, r8
-	and	r9, r8
-	ldc	r8, sr
-	mov.l	@r15+, r0
-	mov.l	@r15+, r1
-	mov.l	@r15+, r2
-	mov.l	@r15+, r3
-	mov.l	@r15+, r4
-	mov.l	@r15+, r5
-	mov.l	@r15+, r6
-	mov.l	@r15+, r7
-
-	/* switch to bank1 and restore r0->r7 */
-	mov.l	12f, r9
-	stc	sr, r8
-	or	r9, r8
-	ldc	r8, sr
-	mov.l	@r15+, r0
-	mov.l	@r15+, r1
-	mov.l	@r15+, r2
-	mov.l	@r15+, r3
-	mov.l	@r15+, r4
-	mov.l	@r15+, r5
-	mov.l	@r15+, r6
-	mov.l	@r15+, r7
-
-	/* switch back to bank0 */
-	mov.l	12f, r9
-	not	r9, r9
-	stc	sr, r8
-	and	r9, r8
-	ldc	r8, sr
-
-	/* restore other random registers */
-	ldc.l	@r15+, spc
-	lds.l	@r15+, pr
-	ldc.l	@r15+, sr
-	ldc.l	@r15+, ssr
-	ldc.l	@r15+, gbr
-	lds.l	@r15+, mach
-	lds.l	@r15+, macl
-
-	/* restore r8->r15 */
-	mov.l	@r15+, r8
-	mov.l	@r15+, r9
-	mov.l	@r15+, r10
-	mov.l	@r15+, r11
-	mov.l	@r15+, r12
-	mov.l	@r15+, r13
-	mov.l	@r15+, r14
-	mov.l	@r15+, r15
-	rts
-	 nop
-
-swap_pages:
-	bra	1f
-	 mov	r4,r0	  /* cmd = indirection_page */
-0:
-	mov.l	@r4+,r0	  /* cmd = *ind++ */
-
-1:	/* addr = cmd & 0xfffffff0 */
-	mov	r0,r2
-	mov	#-16,r1
-	and	r1,r2
-
-	/* if(cmd & IND_DESTINATION) dst = addr  */
-	tst	#1,r0
-	bt	2f
-	bra	0b
-	 mov	r2,r5
-
-2:	/* else if(cmd & IND_INDIRECTION) ind = addr  */
-	tst	#2,r0
-	bt	3f
-	bra	0b
-	 mov	r2,r4
-
-3:	/* else if(cmd & IND_DONE) return */
-	tst	#4,r0
-	bt	4f
-	rts
-	 nop
-
-4:	/* else if(cmd & IND_SOURCE) memcpy(dst,addr,PAGE_SIZE) */
-	tst	#8,r0
-	bt	0b
-
-	mov.l	10f,r3	  /* PAGE_SIZE */
-	shlr2	r3
-	shlr2	r3
-5:
-	dt	r3
-
-	/* regular kexec just overwrites the destination page
-	 * with the contents of the source page.
-	 * for the kexec jump case we need to swap the contents
-	 * of the pages.
-	 * to keep it simple swap the contents for both cases.
-	 */
-	mov.l	@(0, r2), r8
-	mov.l	@(0, r5), r1
-	mov.l	r8, @(0, r5)
-	mov.l	r1, @(0, r2)
-
-	mov.l	@(4, r2), r8
-	mov.l	@(4, r5), r1
-	mov.l	r8, @(4, r5)
-	mov.l	r1, @(4, r2)
-
-	mov.l	@(8, r2), r8
-	mov.l	@(8, r5), r1
-	mov.l	r8, @(8, r5)
-	mov.l	r1, @(8, r2)
-
-	mov.l	@(12, r2), r8
-	mov.l	@(12, r5), r1
-	mov.l	r8, @(12, r5)
-	mov.l	r1, @(12, r2)
-
-	add	#16,r5
-	add	#16,r2
-	bf	5b
-
-	bra	0b
-	 nop
-
-	.align 2
-10:
-	.long	PAGE_SIZE
-11:
-	.long	0
-12:
-	.long	0x20000000 ! RB=1
-
-relocate_new_kernel_end:
-
-	.globl relocate_new_kernel_size
-relocate_new_kernel_size:
-	.long relocate_new_kernel_end - relocate_new_kernel
diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
deleted file mode 100644
index bd1a9c54476732565b355476523dd656cfda86a7..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/syscalls_32.S
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * arch/sh/kernel/syscalls.S
- *
- * System call table for SuperH
- *
- *  Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- *  Copyright (C) 2003  Paul Mundt
- */
-#include <linux/sys.h>
-#include <linux/linkage.h>
-
-#define __SYSCALL(nr, entry)	.long entry
-	.data
-ENTRY(sys_call_table)
-#include <asm/syscall_table.h>
-#undef __SYSCALL
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
deleted file mode 100644
index 1bcb86f0b728f0ae4f1cf606f18fd1e11fcb608a..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/syscalls_64.S
+++ /dev/null
@@ -1,419 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * arch/sh/kernel/syscalls_64.S
- *
- * Copyright (C) 2000, 2001  Paolo Alberelli
- * Copyright (C) 2004 - 2007  Paul Mundt
- * Copyright (C) 2003, 2004 Richard Curnow
- */
-
-#include <linux/sys.h>
-
-	.section .data, "aw"
-	.balign 32
-
-/*
- * System calls jump table
- */
-	.globl  sys_call_table
-sys_call_table:
-	.long sys_restart_syscall	/* 0  -  old "setup()" system call  */
-	.long sys_exit
-	.long sys_fork
-	.long sys_read
-	.long sys_write
-	.long sys_open			/* 5 */
-	.long sys_close
-	.long sys_waitpid
-	.long sys_creat
-	.long sys_link
-	.long sys_unlink		/* 10 */
-	.long sys_execve
-	.long sys_chdir
-	.long sys_time
-	.long sys_mknod
-	.long sys_chmod			/* 15 */
-	.long sys_lchown16
-	.long sys_ni_syscall	/* old break syscall holder */
-	.long sys_stat
-	.long sys_lseek
-	.long sys_getpid		/* 20 */
-	.long sys_mount
-	.long sys_oldumount
-	.long sys_setuid16
-	.long sys_getuid16
-	.long sys_stime			/* 25 */
-	.long sh64_ptrace
-	.long sys_alarm
-	.long sys_fstat
-	.long sys_pause
-	.long sys_utime			/* 30 */
-	.long sys_ni_syscall	/* old stty syscall holder */
-	.long sys_ni_syscall	/* old gtty syscall holder */
-	.long sys_access
-	.long sys_nice
-	.long sys_ni_syscall		/* 35 */ /* old ftime syscall holder */
-	.long sys_sync
-	.long sys_kill
-	.long sys_rename
-	.long sys_mkdir
-	.long sys_rmdir			/* 40 */
-	.long sys_dup
-	.long sys_pipe
-	.long sys_times
-	.long sys_ni_syscall	/* old prof syscall holder */
-	.long sys_brk			/* 45 */
-	.long sys_setgid16
-	.long sys_getgid16
-	.long sys_signal
-	.long sys_geteuid16
-	.long sys_getegid16		/* 50 */
-	.long sys_acct
-	.long sys_umount		/* recycled never used phys( */
-	.long sys_ni_syscall	/* old lock syscall holder */
-	.long sys_ioctl
-	.long sys_fcntl			/* 55 */
-	.long sys_ni_syscall	/* old mpx syscall holder */
-	.long sys_setpgid
-	.long sys_ni_syscall	/* old ulimit syscall holder */
-	.long sys_ni_syscall	/* sys_olduname */
-	.long sys_umask			/* 60 */
-	.long sys_chroot
-	.long sys_ustat
-	.long sys_dup2
-	.long sys_getppid
-	.long sys_getpgrp		/* 65 */
-	.long sys_setsid
-	.long sys_sigaction
-	.long sys_sgetmask
-	.long sys_ssetmask
-	.long sys_setreuid16		/* 70 */
-	.long sys_setregid16
-	.long sys_sigsuspend
-	.long sys_sigpending
-	.long sys_sethostname
-	.long sys_setrlimit		/* 75 */
-	.long sys_old_getrlimit
-	.long sys_getrusage
-	.long sys_gettimeofday
-	.long sys_settimeofday
-	.long sys_getgroups16		/* 80 */
-	.long sys_setgroups16
-	.long sys_ni_syscall	/* sys_oldselect */
-	.long sys_symlink
-	.long sys_lstat
-	.long sys_readlink		/* 85 */
-	.long sys_uselib
-	.long sys_swapon
-	.long sys_reboot
-	.long sys_old_readdir
-	.long old_mmap			/* 90 */
-	.long sys_munmap
-	.long sys_truncate
-	.long sys_ftruncate
-	.long sys_fchmod
-	.long sys_fchown16		/* 95 */
-	.long sys_getpriority
-	.long sys_setpriority
-	.long sys_ni_syscall	/* old profil syscall holder */
-	.long sys_statfs
-	.long sys_fstatfs		/* 100 */
-	.long sys_ni_syscall	/* ioperm */
-	.long sys_socketcall	/* Obsolete implementation of socket syscall */
-	.long sys_syslog
-	.long sys_setitimer
-	.long sys_getitimer		/* 105 */
-	.long sys_newstat
-	.long sys_newlstat
-	.long sys_newfstat
-	.long sys_uname
-	.long sys_ni_syscall		/* 110 */ /* iopl */
-	.long sys_vhangup
-	.long sys_ni_syscall	/* idle */
-	.long sys_ni_syscall	/* vm86old */
-	.long sys_wait4
-	.long sys_swapoff		/* 115 */
-	.long sys_sysinfo
-	.long sys_ipc		/* Obsolete ipc syscall implementation */
-	.long sys_fsync
-	.long sys_sigreturn
-	.long sys_clone			/* 120 */
-	.long sys_setdomainname
-	.long sys_newuname
-	.long sys_cacheflush	/* x86: sys_modify_ldt */
-	.long sys_adjtimex
-	.long sys_mprotect		/* 125 */
-	.long sys_sigprocmask
-	.long sys_ni_syscall		/* old "create_module" */
-	.long sys_init_module
-	.long sys_delete_module
-	.long sys_ni_syscall		/* 130: old "get_kernel_syms" */
-	.long sys_quotactl
-	.long sys_getpgid
-	.long sys_fchdir
-	.long sys_bdflush
-	.long sys_sysfs			/* 135 */
-	.long sys_personality
-	.long sys_ni_syscall	/* for afs_syscall */
-	.long sys_setfsuid16
-	.long sys_setfsgid16
-	.long sys_llseek		/* 140 */
-	.long sys_getdents
-	.long sys_select
-	.long sys_flock
-	.long sys_msync
-	.long sys_readv			/* 145 */
-	.long sys_writev
-	.long sys_getsid
-	.long sys_fdatasync
-	.long sys_sysctl
-	.long sys_mlock			/* 150 */
-	.long sys_munlock
-	.long sys_mlockall
-	.long sys_munlockall
-	.long sys_sched_setparam
-	.long sys_sched_getparam	/* 155 */
-	.long sys_sched_setscheduler
-	.long sys_sched_getscheduler
-	.long sys_sched_yield
-	.long sys_sched_get_priority_max
-	.long sys_sched_get_priority_min  /* 160 */
-	.long sys_sched_rr_get_interval
-	.long sys_nanosleep
-	.long sys_mremap
-	.long sys_setresuid16
-	.long sys_getresuid16		/* 165 */
-	.long sys_ni_syscall	/* vm86 */
-	.long sys_ni_syscall	/* old "query_module" */
-	.long sys_poll
-	.long sys_ni_syscall	/* was nfsservctl */
-	.long sys_setresgid16		/* 170 */
-	.long sys_getresgid16
-	.long sys_prctl
-	.long sys_rt_sigreturn
-	.long sys_rt_sigaction
-	.long sys_rt_sigprocmask	/* 175 */
-	.long sys_rt_sigpending
-	.long sys_rt_sigtimedwait
-	.long sys_rt_sigqueueinfo
-	.long sys_rt_sigsuspend
-	.long sys_pread64		/* 180 */
-	.long sys_pwrite64
-	.long sys_chown16
-	.long sys_getcwd
-	.long sys_capget
-	.long sys_capset		/* 185 */
-	.long sys_sigaltstack
-	.long sys_sendfile
-	.long sys_ni_syscall	/* getpmsg */
-	.long sys_ni_syscall	/* putpmsg */
-	.long sys_vfork			/* 190 */
-	.long sys_getrlimit
-	.long sys_mmap2
-	.long sys_truncate64
-	.long sys_ftruncate64
-	.long sys_stat64		/* 195 */
-	.long sys_lstat64
-	.long sys_fstat64
-	.long sys_lchown
-	.long sys_getuid
-	.long sys_getgid		/* 200 */
-	.long sys_geteuid
-	.long sys_getegid
-	.long sys_setreuid
-	.long sys_setregid
-	.long sys_getgroups		/* 205 */
-	.long sys_setgroups
-	.long sys_fchown
-	.long sys_setresuid
-	.long sys_getresuid
-	.long sys_setresgid		/* 210 */
-	.long sys_getresgid
-	.long sys_chown
-	.long sys_setuid
-	.long sys_setgid
-	.long sys_setfsuid		/* 215 */
-	.long sys_setfsgid
-	.long sys_pivot_root
-	.long sys_mincore
-	.long sys_madvise
-	/* Broken-out socket family (maintain backwards compatibility in syscall
-	   numbering with 2.4) */
-	.long sys_socket		/* 220 */
-	.long sys_bind
-	.long sys_connect
-	.long sys_listen
-	.long sys_accept
-	.long sys_getsockname		/* 225 */
-	.long sys_getpeername
-	.long sys_socketpair
-	.long sys_send
-	.long sys_sendto
-	.long sys_recv			/* 230*/
-	.long sys_recvfrom
-	.long sys_shutdown
-	.long sys_setsockopt
-	.long sys_getsockopt
-	.long sys_sendmsg		/* 235 */
-	.long sys_recvmsg
-	/* Broken-out IPC family (maintain backwards compatibility in syscall
-	   numbering with 2.4) */
-	.long sys_semop
-	.long sys_semget
-	.long sys_semctl
-	.long sys_msgsnd		/* 240 */
-	.long sys_msgrcv
-	.long sys_msgget
-	.long sys_msgctl
-	.long sys_shmat
-	.long sys_shmdt			/* 245 */
-	.long sys_shmget
-	.long sys_shmctl
-	/* Rest of syscalls listed in 2.4 i386 unistd.h */
-	.long sys_getdents64
-	.long sys_fcntl64
-	.long sys_ni_syscall		/* 250 reserved for TUX */
-	.long sys_ni_syscall		/* Reserved for Security */
-	.long sys_gettid
-	.long sys_readahead
-	.long sys_setxattr
-	.long sys_lsetxattr		/* 255 */
-	.long sys_fsetxattr
-	.long sys_getxattr
-	.long sys_lgetxattr
-	.long sys_fgetxattr
-	.long sys_listxattr		/* 260 */
-	.long sys_llistxattr
-	.long sys_flistxattr
-	.long sys_removexattr
-	.long sys_lremovexattr
-	.long sys_fremovexattr  	/* 265 */
-	.long sys_tkill
-	.long sys_sendfile64
-	.long sys_futex
-	.long sys_sched_setaffinity
-	.long sys_sched_getaffinity	/* 270 */
-	.long sys_ni_syscall		/* reserved for set_thread_area */
-	.long sys_ni_syscall		/* reserved for get_thread_area */
-	.long sys_io_setup
-	.long sys_io_destroy
-	.long sys_io_getevents		/* 275 */
-	.long sys_io_submit
-	.long sys_io_cancel
-	.long sys_fadvise64
-	.long sys_ni_syscall
-	.long sys_exit_group		/* 280 */
-	/* Rest of new 2.6 syscalls */
-	.long sys_lookup_dcookie
-	.long sys_epoll_create
-	.long sys_epoll_ctl
-	.long sys_epoll_wait
- 	.long sys_remap_file_pages	/* 285 */
- 	.long sys_set_tid_address
- 	.long sys_timer_create
- 	.long sys_timer_settime
- 	.long sys_timer_gettime
- 	.long sys_timer_getoverrun	/* 290 */
- 	.long sys_timer_delete
- 	.long sys_clock_settime
- 	.long sys_clock_gettime
- 	.long sys_clock_getres
- 	.long sys_clock_nanosleep	/* 295 */
-	.long sys_statfs64
-	.long sys_fstatfs64
-	.long sys_tgkill
-	.long sys_utimes
- 	.long sys_fadvise64_64		/* 300 */
-	.long sys_ni_syscall	/* Reserved for vserver */
-	.long sys_ni_syscall	/* Reserved for mbind */
-	.long sys_ni_syscall	/* get_mempolicy */
-	.long sys_ni_syscall	/* set_mempolicy */
-	.long sys_mq_open		/* 305 */
-	.long sys_mq_unlink
-	.long sys_mq_timedsend
-	.long sys_mq_timedreceive
-	.long sys_mq_notify
-	.long sys_mq_getsetattr		/* 310 */
-	.long sys_ni_syscall	/* Reserved for kexec */
-	.long sys_waitid
-	.long sys_add_key
-	.long sys_request_key
-	.long sys_keyctl		/* 315 */
-	.long sys_ioprio_set
-	.long sys_ioprio_get
-	.long sys_inotify_init
-	.long sys_inotify_add_watch
-	.long sys_inotify_rm_watch	/* 320 */
-	.long sys_ni_syscall
-	.long sys_migrate_pages
-	.long sys_openat
-	.long sys_mkdirat
-	.long sys_mknodat		/* 325 */
-	.long sys_fchownat
-	.long sys_futimesat
-	.long sys_fstatat64
-	.long sys_unlinkat
-	.long sys_renameat		/* 330 */
-	.long sys_linkat
-	.long sys_symlinkat
-	.long sys_readlinkat
-	.long sys_fchmodat
-	.long sys_faccessat		/* 335 */
-	.long sys_pselect6
-	.long sys_ppoll
-	.long sys_unshare
-	.long sys_set_robust_list
-	.long sys_get_robust_list	/* 340 */
-	.long sys_splice
-	.long sys_sync_file_range
-	.long sys_tee
-	.long sys_vmsplice
-	.long sys_move_pages		/* 345 */
-	.long sys_getcpu
-	.long sys_epoll_pwait
-	.long sys_utimensat
-	.long sys_signalfd
-	.long sys_timerfd_create	/* 350 */
-	.long sys_eventfd
-	.long sys_fallocate
-	.long sys_timerfd_settime
-	.long sys_timerfd_gettime
-	.long sys_signalfd4		/* 355 */
-	.long sys_eventfd2
-	.long sys_epoll_create1
-	.long sys_dup3
-	.long sys_pipe2
-	.long sys_inotify_init1		/* 360 */
-	.long sys_preadv
-	.long sys_pwritev
-	.long sys_rt_tgsigqueueinfo
-	.long sys_perf_event_open
-	.long sys_recvmmsg		/* 365 */
-	.long sys_accept4
-	.long sys_fanotify_init
-	.long sys_fanotify_mark
-	.long sys_prlimit64
-	.long sys_name_to_handle_at	/* 370 */
-	.long sys_open_by_handle_at
-	.long sys_clock_adjtime
-	.long sys_syncfs
-	.long sys_sendmmsg
-	.long sys_setns			/* 375 */
-	.long sys_process_vm_readv
-	.long sys_process_vm_writev
-	.long sys_kcmp
-	.long sys_finit_module
-	.long sys_sched_getattr		/* 380 */
-	.long sys_sched_setattr
-	.long sys_renameat2
-	.long sys_seccomp
-	.long sys_getrandom
-	.long sys_memfd_create		/* 385 */
-	.long sys_bpf
-	.long sys_execveat
-	.long sys_userfaultfd
-	.long sys_membarrier
-	.long sys_mlock2		/* 390 */
-	.long sys_copy_file_range
-	.long sys_preadv2
-	.long sys_pwritev2
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
deleted file mode 100644
index 77a59d8c6b4d41a8f2597805cfac6d750ca5eebd..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,90 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ld script to make SuperH Linux kernel
- * Written by Niibe Yutaka and Paul Mundt
- */
-#ifdef CONFIG_SUPERH64
-#define LOAD_OFFSET	PAGE_OFFSET
-OUTPUT_ARCH(sh:sh5)
-#else
-#define LOAD_OFFSET	0
-OUTPUT_ARCH(sh)
-#endif
-
-#include <asm/thread_info.h>
-#include <asm/cache.h>
-#include <asm/vmlinux.lds.h>
-
-#ifdef CONFIG_PMB
- #define MEMORY_OFFSET	0
-#else
- #define MEMORY_OFFSET	__MEMORY_START
-#endif
-
-ENTRY(_start)
-SECTIONS
-{
-	. = PAGE_OFFSET + MEMORY_OFFSET + PHYSICAL_OFFSET + CONFIG_ZERO_PAGE_OFFSET;
-
-	_text = .;		/* Text and read-only data */
-
-	.empty_zero_page : AT(ADDR(.empty_zero_page) - LOAD_OFFSET) {
-		*(.empty_zero_page)
-	} = 0
-
-	.text : AT(ADDR(.text) - LOAD_OFFSET) {
-		HEAD_TEXT
-		TEXT_TEXT
-		EXTRA_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		IRQENTRY_TEXT
-		SOFTIRQENTRY_TEXT
-		*(.fixup)
-		*(.gnu.warning)
-		_etext = .;		/* End of text section */
-	} = 0x0009
-
-	EXCEPTION_TABLE(16)
-	NOTES
-
-	_sdata = .;
-	RO_DATA(PAGE_SIZE)
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
-	_edata = .;
-
-	DWARF_EH_FRAME
-
-	. = ALIGN(PAGE_SIZE);		/* Init code and data */
-	__init_begin = .;
-	INIT_TEXT_SECTION(PAGE_SIZE)
-	INIT_DATA_SECTION(16)
-
-	. = ALIGN(4);
-	.machvec.init : AT(ADDR(.machvec.init) - LOAD_OFFSET) {
-		__machvec_start = .;
-		*(.machvec.init)
-		__machvec_end = .;
-	}
-
-	PERCPU_SECTION(L1_CACHE_BYTES)
-
-	/*
-	 * .exit.text is discarded at runtime, not link time, to deal with
-	 * references from __bug_table
-	 */
-	.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { EXIT_TEXT }
-	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { EXIT_DATA }
-
-	. = ALIGN(PAGE_SIZE);
-	__init_end = .;
-	BSS_SECTION(0, PAGE_SIZE, 4)
-	_end = . ;
-
-	STABS_DEBUG
-	DWARF_DEBUG
-
-	DISCARDS
-}
diff --git a/arch/sh/kernel/vsyscall/vsyscall-note.S b/arch/sh/kernel/vsyscall/vsyscall-note.S
deleted file mode 100644
index bb350918bebd19811685bcfb65be9f2a2da5a722..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/vsyscall/vsyscall-note.S
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-
-#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type)			      \
-	.section name, flags;						      \
-	.balign 4;							      \
-	.long 1f - 0f;		/* name length */			      \
-	.long 3f - 2f;		/* data length */			      \
-	.long type;		/* note type */				      \
-0:	.asciz vendor;		/* vendor name */			      \
-1:	.balign 4;							      \
-2:
-
-#define ASM_ELF_NOTE_END						      \
-3:	.balign 4;		/* pad out section */			      \
-	.previous
-
-	ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0)
-	.long LINUX_VERSION_CODE
-	ASM_ELF_NOTE_END
diff --git a/arch/sh/kernel/vsyscall/vsyscall-sigreturn.S b/arch/sh/kernel/vsyscall/vsyscall-sigreturn.S
deleted file mode 100644
index bece5fa7361830f046fe353658f1dce6bdd3fa90..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/vsyscall/vsyscall-sigreturn.S
+++ /dev/null
@@ -1,75 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/unistd.h>
-
-	.text
-	.balign 32
-	.globl __kernel_sigreturn
-	.type __kernel_sigreturn,@function
-__kernel_sigreturn:
-.LSTART_sigreturn:
-	mov.w	1f, r3
-	trapa	#0x10
-	or	r0, r0
-	or	r0, r0
-	or	r0, r0
-	or	r0, r0
-	or	r0, r0
-
-1:	.short	__NR_sigreturn
-.LEND_sigreturn:
-	.size __kernel_sigreturn,.-.LSTART_sigreturn
-
-	.balign 32
-	.globl __kernel_rt_sigreturn
-	.type __kernel_rt_sigreturn,@function
-__kernel_rt_sigreturn:
-.LSTART_rt_sigreturn:
-	mov.w	1f, r3
-	trapa	#0x10
-	or	r0, r0
-	or	r0, r0
-	or	r0, r0
-	or	r0, r0
-	or	r0, r0
-
-1:	.short	__NR_rt_sigreturn
-.LEND_rt_sigreturn:
-	.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
-	.previous
-
-	.section .eh_frame,"a",@progbits
-.LCIE1:
-	.ualong	.LCIE1_end - .LCIE1_start
-.LCIE1_start:
-	.ualong	0		/* CIE ID */
-	.byte	0x1		/* Version number */
-	.string	"zRS"		/* NUL-terminated augmentation string */
-	.uleb128 0x1		/* Code alignment factor */
-	.sleb128 -4		/* Data alignment factor */
-	.byte	0x11		/* Return address register column */
-	.uleb128 0x1		/* Augmentation length and data */
-	.byte 0x1b              /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */
-	.byte	0xc, 0xf, 0x0	/* DW_CFA_def_cfa: r15 ofs 0 */
-
-	.align 2
-.LCIE1_end:
-
-	.ualong	.LFDE0_end-.LFDE0_start	/* Length FDE0 */
-.LFDE0_start:
-	.ualong	.LFDE0_start-.LCIE1	/* CIE pointer */
-	.ualong	.LSTART_sigreturn-.	/* PC-relative start address */
-	.ualong	.LEND_sigreturn-.LSTART_sigreturn
-	.uleb128 0			/* Augmentation */
-	.align 2
-.LFDE0_end:
-
-	.ualong	.LFDE1_end-.LFDE1_start	/* Length FDE1 */
-.LFDE1_start:
-	.ualong	.LFDE1_start-.LCIE1	/* CIE pointer */
-	.ualong	.LSTART_rt_sigreturn-.	/* PC-relative start address */
-	.ualong	.LEND_rt_sigreturn-.LSTART_rt_sigreturn
-	.uleb128 0			/* Augmentation */
-	.align 2
-.LFDE1_end:
-
-	.previous
diff --git a/arch/sh/kernel/vsyscall/vsyscall-syscall.S b/arch/sh/kernel/vsyscall/vsyscall-syscall.S
deleted file mode 100644
index 2aeaa2ddee50ebd529b7ebbf043a3388f7919129..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/vsyscall/vsyscall-syscall.S
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/init.h>
-
-__INITDATA
-
-	.globl vsyscall_trapa_start, vsyscall_trapa_end
-vsyscall_trapa_start:
-	.incbin "arch/sh/kernel/vsyscall/vsyscall-trapa.so"
-vsyscall_trapa_end:
-
-__FINIT
diff --git a/arch/sh/kernel/vsyscall/vsyscall-trapa.S b/arch/sh/kernel/vsyscall/vsyscall-trapa.S
deleted file mode 100644
index 854ea3235704153dfe0ee7216ee154d871340148..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/vsyscall/vsyscall-trapa.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.text
-	.globl __kernel_vsyscall
-	.type __kernel_vsyscall,@function
-__kernel_vsyscall:
-.LSTART_vsyscall:
-	trapa	#0x10
-	 nop
-.LEND_vsyscall:
-	.size __kernel_vsyscall,.-.LSTART_vsyscall
-	.previous
-
-	.section .eh_frame,"a",@progbits
-.LCIE:
-	.ualong	.LCIE_end - .LCIE_start
-.LCIE_start:
-	.ualong	0		/* CIE ID */
-	.byte	0x1		/* Version number */
-	.string	"zR"		/* NUL-terminated augmentation string */
-	.uleb128 0x1		/* Code alignment factor */
-	.sleb128 -4		/* Data alignment factor */
-	.byte	0x11		/* Return address register column */
-	.uleb128 0x1		/* Augmentation length and data */
-	.byte 0x1b              /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */
-	.byte	0xc,0xf,0x0	/* DW_CFA_def_cfa: r15 ofs 0 */
-	.align 2
-.LCIE_end:
-
-	.ualong	.LFDE_end-.LFDE_start	/* Length FDE */
-.LFDE_start:
-	.ualong	.LFDE_start-.LCIE	/* CIE pointer */
-	.ualong	.LSTART_vsyscall-.	/* PC-relative start address */
-	.ualong	.LEND_vsyscall-.LSTART_vsyscall
-	.uleb128 0			/* Augmentation */
-	.align 2
-.LFDE_end:
-	.previous
-
-/* Get the common code for the sigreturn entry points */
-#include "vsyscall-sigreturn.S"
diff --git a/arch/sh/kernel/vsyscall/vsyscall.lds.S b/arch/sh/kernel/vsyscall/vsyscall.lds.S
deleted file mode 100644
index e3582e03c0eb0bd33ce109035b3c495cfd3f00f0..0000000000000000000000000000000000000000
--- a/arch/sh/kernel/vsyscall/vsyscall.lds.S
+++ /dev/null
@@ -1,85 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Linker script for vsyscall DSO.  The vsyscall page is an ELF shared
- * object prelinked to its virtual address, and with only one read-only
- * segment (that fits in one page).  This script controls its layout.
- */
-#include <asm/asm-offsets.h>
-
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-OUTPUT_FORMAT("elf32-sh-linux", "elf32-sh-linux", "elf32-sh-linux")
-#else
-OUTPUT_FORMAT("elf32-shbig-linux", "elf32-shbig-linux", "elf32-shbig-linux")
-#endif
-OUTPUT_ARCH(sh)
-
-/* The ELF entry point can be used to set the AT_SYSINFO value.  */
-ENTRY(__kernel_vsyscall);
-
-SECTIONS
-{
-	. = SIZEOF_HEADERS;
-
-	.hash		: { *(.hash) }			:text
-	.gnu.hash	: { *(.gnu.hash) }
-	.dynsym		: { *(.dynsym) }
-	.dynstr		: { *(.dynstr) }
-	.gnu.version	: { *(.gnu.version) }
-	.gnu.version_d	: { *(.gnu.version_d) }
-	.gnu.version_r	: { *(.gnu.version_r) }
-
-	/*
-	 * This linker script is used both with -r and with -shared.
-	 * For the layouts to match, we need to skip more than enough
-	 * space for the dynamic symbol table et al.  If this amount
-	 * is insufficient, ld -shared will barf.  Just increase it here.
-	 */
-	. = 0x400;
-
-	.text		: { *(.text) } 			:text	=0x90909090
-	.note		: { *(.note.*) }		:text	:note
-	.eh_frame_hdr	: { *(.eh_frame_hdr ) }		:text	:eh_frame_hdr
-	.eh_frame	: {
-		KEEP (*(.eh_frame))
-		LONG (0)
-	}						:text
-	.dynamic	: { *(.dynamic) }		:text	:dynamic
-	.useless	: {
-	      *(.got.plt) *(.got)
-	      *(.data .data.* .gnu.linkonce.d.*)
-	      *(.dynbss)
-	      *(.bss .bss.* .gnu.linkonce.b.*)
-	}						:text
-}
-
-/*
- * Very old versions of ld do not recognize this name token; use the constant.
- */
-#define PT_GNU_EH_FRAME	0x6474e550
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
-	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
-	note		PT_NOTE FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-	LINUX_2.6 {
-	global:
-		__kernel_vsyscall;
-		__kernel_sigreturn;
-		__kernel_rt_sigreturn;
-
-	local: *;
-	};
-}
diff --git a/arch/sh/lib/__clear_user.S b/arch/sh/lib/__clear_user.S
deleted file mode 100644
index 0978606907ed3c01cf209fa847021c869f2a6d69..0000000000000000000000000000000000000000
--- a/arch/sh/lib/__clear_user.S
+++ /dev/null
@@ -1,109 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * __clear_user_page, __clear_user, clear_page implementation of SuperH
- *
- * Copyright (C) 2001  Kaz Kojima
- * Copyright (C) 2001, 2002  Niibe Yutaka
- * Copyright (C) 2006  Paul Mundt
- */
-#include <linux/linkage.h>
-#include <asm/page.h>
-
-ENTRY(__clear_user)
-	!
-	mov	#0, r0
-	mov	#0xffffffe0, r1
-	!
-	! r4..(r4+31)&~32 	   -------- not aligned	[ Area 0 ]
-	! (r4+31)&~32..(r4+r5)&~32 -------- aligned	[ Area 1 ]
-	! (r4+r5)&~32..r4+r5       -------- not aligned	[ Area 2 ]
-	!
-	! Clear area 0
-	mov	r4, r2
-	!
-	tst	r1, r5		! length < 32
-	bt	.Larea2		! skip to remainder
-	!
-	add	#31, r2
-	and	r1, r2
-	cmp/eq	r4, r2
-	bt	.Larea1
-	mov	r2, r3
-	sub	r4, r3
-	mov	r3, r7
-	mov	r4, r2
-	!
-.L0:	dt	r3
-0:	mov.b	r0, @r2
-	bf/s	.L0
-	 add	#1, r2
-	!
-	sub	r7, r5
-	mov	r2, r4
-.Larea1:
-	mov	r4, r3
-	add	r5, r3
-	and	r1, r3
-	cmp/hi	r2, r3
-	bf	.Larea2
-	!
-	! Clear area 1
-#if defined(CONFIG_CPU_SH4)
-1:	movca.l	r0, @r2
-#else
-1:	mov.l	r0, @r2
-#endif
-	add	#4, r2
-2:	mov.l	r0, @r2
-	add	#4, r2
-3:	mov.l	r0, @r2
-	add	#4, r2
-4:	mov.l	r0, @r2
-	add	#4, r2
-5:	mov.l	r0, @r2
-	add	#4, r2
-6:	mov.l	r0, @r2
-	add	#4, r2
-7:	mov.l	r0, @r2
-	add	#4, r2
-8:	mov.l	r0, @r2
-	add	#4, r2
-	cmp/hi	r2, r3
-	bt/s	1b
-	 nop
-	!
-	! Clear area 2
-.Larea2:
-	mov	r4, r3
-	add	r5, r3
-	cmp/hs	r3, r2
-	bt/s	.Ldone
-	 sub	r2, r3
-.L2:	dt	r3
-9:	mov.b	r0, @r2
-	bf/s	.L2
-	 add	#1, r2
-	!
-.Ldone:	rts
-	 mov	#0, r0	! return 0 as normal return
-
-	! return the number of bytes remained
-.Lbad_clear_user:
-	mov	r4, r0
-	add	r5, r0
-	rts
-	 sub	r2, r0
-
-.section __ex_table,"a"
-	.align 2
-	.long	0b, .Lbad_clear_user
-	.long	1b, .Lbad_clear_user
-	.long	2b, .Lbad_clear_user
-	.long	3b, .Lbad_clear_user
-	.long	4b, .Lbad_clear_user
-	.long	5b, .Lbad_clear_user
-	.long	6b, .Lbad_clear_user
-	.long	7b, .Lbad_clear_user
-	.long	8b, .Lbad_clear_user
-	.long	9b, .Lbad_clear_user
-.previous
diff --git a/arch/sh/lib/ashiftrt.S b/arch/sh/lib/ashiftrt.S
deleted file mode 100644
index 0f7145e3c51ee2df0c02a8750f367842415bd18c..0000000000000000000000000000000000000000
--- a/arch/sh/lib/ashiftrt.S
+++ /dev/null
@@ -1,128 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
-
-   Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2006
-   Free Software Foundation, Inc.
-*/
-
-!! libgcc routines for the Renesas / SuperH SH CPUs.
-!! Contributed by Steve Chamberlain.
-!! sac@cygnus.com
-
-!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
-!! recoded in assembly by Toshiyasu Morita
-!! tm@netcom.com
-
-/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
-   ELF local label prefixes by J"orn Rennecke
-   amylaar@cygnus.com  */
-
-	.global	__ashiftrt_r4_0
-	.global	__ashiftrt_r4_1
-	.global	__ashiftrt_r4_2
-	.global	__ashiftrt_r4_3
-	.global	__ashiftrt_r4_4
-	.global	__ashiftrt_r4_5
-	.global	__ashiftrt_r4_6
-	.global	__ashiftrt_r4_7
-	.global	__ashiftrt_r4_8
-	.global	__ashiftrt_r4_9
-	.global	__ashiftrt_r4_10
-	.global	__ashiftrt_r4_11
-	.global	__ashiftrt_r4_12
-	.global	__ashiftrt_r4_13
-	.global	__ashiftrt_r4_14
-	.global	__ashiftrt_r4_15
-	.global	__ashiftrt_r4_16
-	.global	__ashiftrt_r4_17
-	.global	__ashiftrt_r4_18
-	.global	__ashiftrt_r4_19
-	.global	__ashiftrt_r4_20
-	.global	__ashiftrt_r4_21
-	.global	__ashiftrt_r4_22
-	.global	__ashiftrt_r4_23
-	.global	__ashiftrt_r4_24
-	.global	__ashiftrt_r4_25
-	.global	__ashiftrt_r4_26
-	.global	__ashiftrt_r4_27
-	.global	__ashiftrt_r4_28
-	.global	__ashiftrt_r4_29
-	.global	__ashiftrt_r4_30
-	.global	__ashiftrt_r4_31
-	.global	__ashiftrt_r4_32
-
-	.align	1
-__ashiftrt_r4_32:
-__ashiftrt_r4_31:
-	rotcl	r4
-	rts
-	subc	r4,r4
-__ashiftrt_r4_30:
-	shar	r4
-__ashiftrt_r4_29:
-	shar	r4
-__ashiftrt_r4_28:
-	shar	r4
-__ashiftrt_r4_27:
-	shar	r4
-__ashiftrt_r4_26:
-	shar	r4
-__ashiftrt_r4_25:
-	shar	r4
-__ashiftrt_r4_24:
-	shlr16	r4
-	shlr8	r4
-	rts
-	exts.b	r4,r4
-__ashiftrt_r4_23:
-	shar	r4
-__ashiftrt_r4_22:
-	shar	r4
-__ashiftrt_r4_21:
-	shar	r4
-__ashiftrt_r4_20:
-	shar	r4
-__ashiftrt_r4_19:
-	shar	r4
-__ashiftrt_r4_18:
-	shar	r4
-__ashiftrt_r4_17:
-	shar	r4
-__ashiftrt_r4_16:
-	shlr16	r4
-	rts
-	exts.w	r4,r4
-__ashiftrt_r4_15:
-	shar	r4
-__ashiftrt_r4_14:
-	shar	r4
-__ashiftrt_r4_13:
-	shar	r4
-__ashiftrt_r4_12:
-	shar	r4
-__ashiftrt_r4_11:
-	shar	r4
-__ashiftrt_r4_10:
-	shar	r4
-__ashiftrt_r4_9:
-	shar	r4
-__ashiftrt_r4_8:
-	shar	r4
-__ashiftrt_r4_7:
-	shar	r4
-__ashiftrt_r4_6:
-	shar	r4
-__ashiftrt_r4_5:
-	shar	r4
-__ashiftrt_r4_4:
-	shar	r4
-__ashiftrt_r4_3:
-	shar	r4
-__ashiftrt_r4_2:
-	shar	r4
-__ashiftrt_r4_1:
-	rts
-	shar	r4
-__ashiftrt_r4_0:
-	rts
-	nop
diff --git a/arch/sh/lib/ashlsi3.S b/arch/sh/lib/ashlsi3.S
deleted file mode 100644
index 4df4401cdf31d5a16dff719568cd6d1834ebabba..0000000000000000000000000000000000000000
--- a/arch/sh/lib/ashlsi3.S
+++ /dev/null
@@ -1,189 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
-
-   Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2006
-   Free Software Foundation, Inc.
-*/
-
-!! libgcc routines for the Renesas / SuperH SH CPUs.
-!! Contributed by Steve Chamberlain.
-!! sac@cygnus.com
-
-!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
-!! recoded in assembly by Toshiyasu Morita
-!! tm@netcom.com
-
-/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
-   ELF local label prefixes by J"orn Rennecke
-   amylaar@cygnus.com  */
-
-!
-! __ashlsi3
-!
-! Entry:
-!
-! r4: Value to shift
-! r5: Shifts
-!
-! Exit:
-!
-! r0: Result
-!
-! Destroys:
-!
-! (none)
-!
-! __ashlsi3_r0
-!
-! Entry:
-!
-! r4: Value to shift
-! r0: Shifts
-!
-! Exit:
-!
-! r0: Result
-!
-! Destroys:
-!
-! (none)
-
-
-	.global	__ashlsi3
-	.global	__ashlsi3_r0
-	
-	.align	2
-__ashlsi3:
-	mov	r5,r0
-	.align	2
-__ashlsi3_r0:
-	and	#31,r0
-	mov.l	r4,@-r15
-	mov	r0,r4
-	mova	ashlsi3_table,r0
-	mov.b	@(r0,r4),r4
-	add	r4,r0
-	jmp	@r0
-	mov.l	@r15+,r0
-
-	.align	2
-ashlsi3_table:
-	.byte		ashlsi3_0-ashlsi3_table
-	.byte		ashlsi3_1-ashlsi3_table
-	.byte		ashlsi3_2-ashlsi3_table
-	.byte		ashlsi3_3-ashlsi3_table
-	.byte		ashlsi3_4-ashlsi3_table
-	.byte		ashlsi3_5-ashlsi3_table
-	.byte		ashlsi3_6-ashlsi3_table
-	.byte		ashlsi3_7-ashlsi3_table
-	.byte		ashlsi3_8-ashlsi3_table
-	.byte		ashlsi3_9-ashlsi3_table
-	.byte		ashlsi3_10-ashlsi3_table
-	.byte		ashlsi3_11-ashlsi3_table
-	.byte		ashlsi3_12-ashlsi3_table
-	.byte		ashlsi3_13-ashlsi3_table
-	.byte		ashlsi3_14-ashlsi3_table
-	.byte		ashlsi3_15-ashlsi3_table
-	.byte		ashlsi3_16-ashlsi3_table
-	.byte		ashlsi3_17-ashlsi3_table
-	.byte		ashlsi3_18-ashlsi3_table
-	.byte		ashlsi3_19-ashlsi3_table
-	.byte		ashlsi3_20-ashlsi3_table
-	.byte		ashlsi3_21-ashlsi3_table
-	.byte		ashlsi3_22-ashlsi3_table
-	.byte		ashlsi3_23-ashlsi3_table
-	.byte		ashlsi3_24-ashlsi3_table
-	.byte		ashlsi3_25-ashlsi3_table
-	.byte		ashlsi3_26-ashlsi3_table
-	.byte		ashlsi3_27-ashlsi3_table
-	.byte		ashlsi3_28-ashlsi3_table
-	.byte		ashlsi3_29-ashlsi3_table
-	.byte		ashlsi3_30-ashlsi3_table
-	.byte		ashlsi3_31-ashlsi3_table
-
-ashlsi3_6:
-	shll2	r0
-ashlsi3_4:
-	shll2	r0
-ashlsi3_2:
-	rts
-	shll2	r0
-
-ashlsi3_7:
-	shll2	r0
-ashlsi3_5:
-	shll2	r0
-ashlsi3_3:
-	shll2	r0
-ashlsi3_1:
-	rts
-	shll	r0
-
-ashlsi3_14:
-	shll2	r0
-ashlsi3_12:
-	shll2	r0
-ashlsi3_10:
-	shll2	r0
-ashlsi3_8:
-	rts
-	shll8	r0
-
-ashlsi3_15:
-	shll2	r0
-ashlsi3_13:
-	shll2	r0
-ashlsi3_11:
-	shll2	r0
-ashlsi3_9:
-	shll8	r0
-	rts
-	shll	r0
-
-ashlsi3_22:
-	shll2	r0
-ashlsi3_20:
-	shll2	r0
-ashlsi3_18:
-	shll2	r0
-ashlsi3_16:
-	rts
-	shll16	r0
-
-ashlsi3_23:
-	shll2	r0
-ashlsi3_21:
-	shll2	r0
-ashlsi3_19:
-	shll2	r0
-ashlsi3_17:
-	shll16	r0
-	rts
-	shll	r0
-
-ashlsi3_30:
-	shll2	r0
-ashlsi3_28:
-	shll2	r0
-ashlsi3_26:
-	shll2	r0
-ashlsi3_24:
-	shll16	r0
-	rts
-	shll8	r0
-
-ashlsi3_31:
-	shll2	r0
-ashlsi3_29:
-	shll2	r0
-ashlsi3_27:
-	shll2	r0
-ashlsi3_25:
-	shll16	r0
-	shll8	r0
-	rts
-	shll	r0
-
-ashlsi3_0:
-	rts
-	nop
diff --git a/arch/sh/lib/ashrsi3.S b/arch/sh/lib/ashrsi3.S
deleted file mode 100644
index bf3c4e03e6ffc9d4f523635457468210f8d7bfaa..0000000000000000000000000000000000000000
--- a/arch/sh/lib/ashrsi3.S
+++ /dev/null
@@ -1,179 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
-
-   Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2006
-   Free Software Foundation, Inc.
-*/
-
-!! libgcc routines for the Renesas / SuperH SH CPUs.
-!! Contributed by Steve Chamberlain.
-!! sac@cygnus.com
-
-!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
-!! recoded in assembly by Toshiyasu Morita
-!! tm@netcom.com
-
-/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
-   ELF local label prefixes by J"orn Rennecke
-   amylaar@cygnus.com  */
-
-!
-! __ashrsi3
-!
-! Entry:
-!
-! r4: Value to shift
-! r5: Shifts
-!
-! Exit:
-!
-! r0: Result
-!
-! Destroys:
-!
-! (none)
-!
-! __ashrsi3_r0
-!
-! Entry:
-!
-! r4: Value to shift
-! r0: Shifts
-!
-! Exit:
-!
-! r0: Result
-!
-! Destroys:
-!
-! (none)
-
-	.global	__ashrsi3
-	.global	__ashrsi3_r0
-	
-	.align	2
-__ashrsi3:
-	mov	r5,r0
-	.align	2
-__ashrsi3_r0:
-	and	#31,r0
-	mov.l	r4,@-r15
-	mov	r0,r4
-	mova	ashrsi3_table,r0
-	mov.b	@(r0,r4),r4
-	add	r4,r0
-	jmp	@r0
-	mov.l	@r15+,r0
-
-	.align	2
-ashrsi3_table:
-	.byte		ashrsi3_0-ashrsi3_table
-	.byte		ashrsi3_1-ashrsi3_table
-	.byte		ashrsi3_2-ashrsi3_table
-	.byte		ashrsi3_3-ashrsi3_table
-	.byte		ashrsi3_4-ashrsi3_table
-	.byte		ashrsi3_5-ashrsi3_table
-	.byte		ashrsi3_6-ashrsi3_table
-	.byte		ashrsi3_7-ashrsi3_table
-	.byte		ashrsi3_8-ashrsi3_table
-	.byte		ashrsi3_9-ashrsi3_table
-	.byte		ashrsi3_10-ashrsi3_table
-	.byte		ashrsi3_11-ashrsi3_table
-	.byte		ashrsi3_12-ashrsi3_table
-	.byte		ashrsi3_13-ashrsi3_table
-	.byte		ashrsi3_14-ashrsi3_table
-	.byte		ashrsi3_15-ashrsi3_table
-	.byte		ashrsi3_16-ashrsi3_table
-	.byte		ashrsi3_17-ashrsi3_table
-	.byte		ashrsi3_18-ashrsi3_table
-	.byte		ashrsi3_19-ashrsi3_table
-	.byte		ashrsi3_20-ashrsi3_table
-	.byte		ashrsi3_21-ashrsi3_table
-	.byte		ashrsi3_22-ashrsi3_table
-	.byte		ashrsi3_23-ashrsi3_table
-	.byte		ashrsi3_24-ashrsi3_table
-	.byte		ashrsi3_25-ashrsi3_table
-	.byte		ashrsi3_26-ashrsi3_table
-	.byte		ashrsi3_27-ashrsi3_table
-	.byte		ashrsi3_28-ashrsi3_table
-	.byte		ashrsi3_29-ashrsi3_table
-	.byte		ashrsi3_30-ashrsi3_table
-	.byte		ashrsi3_31-ashrsi3_table
-
-ashrsi3_31:
-	rotcl	r0
-	rts
-	subc	r0,r0
-
-ashrsi3_30:
-	shar	r0
-ashrsi3_29:
-	shar	r0
-ashrsi3_28:
-	shar	r0
-ashrsi3_27:
-	shar	r0
-ashrsi3_26:
-	shar	r0
-ashrsi3_25:
-	shar	r0
-ashrsi3_24:
-	shlr16	r0
-	shlr8	r0
-	rts
-	exts.b	r0,r0
-
-ashrsi3_23:
-	shar	r0
-ashrsi3_22:
-	shar	r0
-ashrsi3_21:
-	shar	r0
-ashrsi3_20:
-	shar	r0
-ashrsi3_19:
-	shar	r0
-ashrsi3_18:
-	shar	r0
-ashrsi3_17:
-	shar	r0
-ashrsi3_16:
-	shlr16	r0
-	rts
-	exts.w	r0,r0
-
-ashrsi3_15:
-	shar	r0
-ashrsi3_14:
-	shar	r0
-ashrsi3_13:
-	shar	r0
-ashrsi3_12:
-	shar	r0
-ashrsi3_11:
-	shar	r0
-ashrsi3_10:
-	shar	r0
-ashrsi3_9:
-	shar	r0
-ashrsi3_8:
-	shar	r0
-ashrsi3_7:
-	shar	r0
-ashrsi3_6:
-	shar	r0
-ashrsi3_5:
-	shar	r0
-ashrsi3_4:
-	shar	r0
-ashrsi3_3:
-	shar	r0
-ashrsi3_2:
-	shar	r0
-ashrsi3_1:
-	rts
-	shar	r0
-
-ashrsi3_0:
-	rts
-	nop
diff --git a/arch/sh/lib/checksum.S b/arch/sh/lib/checksum.S
deleted file mode 100644
index 97b5c2d9fec49ce684fc08cb33f110fdf0462f10..0000000000000000000000000000000000000000
--- a/arch/sh/lib/checksum.S
+++ /dev/null
@@ -1,414 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+
- *
- * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
- *
- * INET		An implementation of the TCP/IP protocol suite for the LINUX
- *		operating system.  INET is implemented using the  BSD Socket
- *		interface as the means of communication with the user level.
- *
- *		IP/TCP/UDP checksumming routines
- *
- * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
- *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- *		Tom May, <ftom@netcom.com>
- *              Pentium Pro/II routines:
- *              Alexander Kjeldaas <astor@guardian.no>
- *              Finn Arne Gangstad <finnag@guardian.no>
- *		Lots of code moved from tcp.c and ip.c; see those files
- *		for more names.
- *
- * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
- *			     handling.
- *		Andi Kleen,  add zeroing on error
- *                   converted to pure assembler
- *
- * SuperH version:  Copyright (C) 1999  Niibe Yutaka
- */
-
-#include <asm/errno.h>
-#include <linux/linkage.h>
-
-/*
- * computes a partial checksum, e.g. for TCP/UDP fragments
- */
-
-/*	
- * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
- */
-
-.text
-ENTRY(csum_partial)
-	  /*
-	   * Experiments with Ethernet and SLIP connections show that buff
-	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
-	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
-	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
-	   * alignment for the unrolled loop.
-	   */
-	mov	r4, r0
-	tst	#3, r0		! Check alignment.
-	bt/s	2f		! Jump if alignment is ok.
-	 mov	r4, r7		! Keep a copy to check for alignment
-	!
-	tst	#1, r0		! Check alignment.
-	bt	21f		! Jump if alignment is boundary of 2bytes.
-
-	! buf is odd
-	tst	r5, r5
-	add	#-1, r5
-	bt	9f
-	mov.b	@r4+, r0
-	extu.b	r0, r0
-	addc	r0, r6		! t=0 from previous tst
-	mov	r6, r0
-	shll8	r6
-	shlr16	r0
-	shlr8	r0
-	or	r0, r6
-	mov	r4, r0
-	tst	#2, r0
-	bt	2f
-21:
-	! buf is 2 byte aligned (len could be 0)
-	add	#-2, r5		! Alignment uses up two bytes.
-	cmp/pz	r5		!
-	bt/s	1f		! Jump if we had at least two bytes.
-	 clrt
-	bra	6f
-	 add	#2, r5		! r5 was < 2.  Deal with it.
-1:
-	mov.w	@r4+, r0
-	extu.w	r0, r0
-	addc	r0, r6
-	bf	2f
-	add	#1, r6
-2:
-	! buf is 4 byte aligned (len could be 0)
-	mov	r5, r1
-	mov	#-5, r0
-	shld	r0, r1
-	tst	r1, r1
-	bt/s	4f		! if it's =0, go to 4f
-	 clrt
-	.align	2
-3:
-	mov.l	@r4+, r0
-	mov.l	@r4+, r2
-	mov.l	@r4+, r3
-	addc	r0, r6
-	mov.l	@r4+, r0
-	addc	r2, r6
-	mov.l	@r4+, r2
-	addc	r3, r6
-	mov.l	@r4+, r3
-	addc	r0, r6
-	mov.l	@r4+, r0
-	addc	r2, r6
-	mov.l	@r4+, r2
-	addc	r3, r6
-	addc	r0, r6
-	addc	r2, r6
-	movt	r0
-	dt	r1
-	bf/s	3b
-	 cmp/eq	#1, r0
-	! here, we know r1==0
-	addc	r1, r6			! add carry to r6
-4:
-	mov	r5, r0
-	and	#0x1c, r0
-	tst	r0, r0
-	bt	6f
-	! 4 bytes or more remaining
-	mov	r0, r1
-	shlr2	r1
-	mov	#0, r2
-5:
-	addc	r2, r6
-	mov.l	@r4+, r2
-	movt	r0
-	dt	r1
-	bf/s	5b
-	 cmp/eq	#1, r0
-	addc	r2, r6
-	addc	r1, r6		! r1==0 here, so it means add carry-bit
-6:
-	! 3 bytes or less remaining
-	mov	#3, r0
-	and	r0, r5
-	tst	r5, r5
-	bt	9f		! if it's =0 go to 9f
-	mov	#2, r1
-	cmp/hs  r1, r5
-	bf	7f
-	mov.w	@r4+, r0
-	extu.w	r0, r0
-	cmp/eq	r1, r5
-	bt/s	8f
-	 clrt
-	shll16	r0
-	addc	r0, r6
-7:
-	mov.b	@r4+, r0
-	extu.b	r0, r0
-#ifndef	__LITTLE_ENDIAN__
-	shll8	r0
-#endif
-8:
-	addc	r0, r6
-	mov	#0, r0
-	addc	r0, r6
-9:
-	! Check if the buffer was misaligned, if so realign sum
-	mov	r7, r0
-	tst	#1, r0
-	bt	10f
-	mov	r6, r0
-	shll8	r6
-	shlr16	r0
-	shlr8	r0
-	or	r0, r6
-10:
-	rts
-	 mov	r6, r0
-
-/*
-unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, 
-					int sum, int *src_err_ptr, int *dst_err_ptr)
- */ 
-
-/*
- * Copy from ds while checksumming, otherwise like csum_partial
- *
- * The macros SRC and DST specify the type of access for the instruction.
- * thus we can call a custom exception handler for all access types.
- *
- * FIXME: could someone double-check whether I haven't mixed up some SRC and
- *	  DST definitions? It's damn hard to trigger all cases.  I hope I got
- *	  them all but there's no guarantee.
- */
-
-#define SRC(...)			\
-	9999: __VA_ARGS__ ;		\
-	.section __ex_table, "a";	\
-	.long 9999b, 6001f	;	\
-	.previous
-
-#define DST(...)			\
-	9999: __VA_ARGS__ ;		\
-	.section __ex_table, "a";	\
-	.long 9999b, 6002f	;	\
-	.previous
-
-!
-! r4:	const char *SRC
-! r5:	char *DST
-! r6:	int LEN
-! r7:	int SUM
-!
-! on stack:
-! int *SRC_ERR_PTR
-! int *DST_ERR_PTR
-!
-ENTRY(csum_partial_copy_generic)
-	mov.l	r5,@-r15
-	mov.l	r6,@-r15
-
-	mov	#3,r0		! Check src and dest are equally aligned
-	mov	r4,r1
-	and	r0,r1
-	and	r5,r0
-	cmp/eq	r1,r0
-	bf	3f		! Different alignments, use slow version
-	tst	#1,r0		! Check dest word aligned
-	bf	3f		! If not, do it the slow way
-
-	mov	#2,r0
-	tst	r0,r5		! Check dest alignment. 
-	bt	2f		! Jump if alignment is ok.
-	add	#-2,r6		! Alignment uses up two bytes.
-	cmp/pz	r6		! Jump if we had at least two bytes.
-	bt/s	1f
-	 clrt
-	add	#2,r6		! r6 was < 2.	Deal with it.
-	bra	4f
-	 mov	r6,r2
-
-3:	! Handle different src and dest alignments.
-	! This is not common, so simple byte by byte copy will do.
-	mov	r6,r2
-	shlr	r6
-	tst	r6,r6
-	bt	4f
-	clrt
-	.align	2
-5:
-SRC(	mov.b	@r4+,r1 	)
-SRC(	mov.b	@r4+,r0		)
-	extu.b	r1,r1
-DST(	mov.b	r1,@r5		)
-DST(	mov.b	r0,@(1,r5)	)
-	extu.b	r0,r0
-	add	#2,r5
-
-#ifdef	__LITTLE_ENDIAN__
-	shll8	r0
-#else
-	shll8	r1
-#endif
-	or	r1,r0
-
-	addc	r0,r7
-	movt	r0
-	dt	r6
-	bf/s	5b
-	 cmp/eq	#1,r0
-	mov	#0,r0
-	addc	r0, r7
-
-	mov	r2, r0
-	tst	#1, r0
-	bt	7f
-	bra	5f
-	 clrt
-
-	! src and dest equally aligned, but to a two byte boundary.
-	! Handle first two bytes as a special case
-	.align	2
-1:	
-SRC(	mov.w	@r4+,r0		)
-DST(	mov.w	r0,@r5		)
-	add	#2,r5
-	extu.w	r0,r0
-	addc	r0,r7
-	mov	#0,r0
-	addc	r0,r7
-2:
-	mov	r6,r2
-	mov	#-5,r0
-	shld	r0,r6
-	tst	r6,r6
-	bt/s	2f
-	 clrt
-	.align	2
-1:	
-SRC(	mov.l	@r4+,r0		)
-SRC(	mov.l	@r4+,r1		)
-	addc	r0,r7
-DST(	mov.l	r0,@r5		)
-DST(	mov.l	r1,@(4,r5)	)
-	addc	r1,r7
-
-SRC(	mov.l	@r4+,r0		)
-SRC(	mov.l	@r4+,r1		)
-	addc	r0,r7
-DST(	mov.l	r0,@(8,r5)	)
-DST(	mov.l	r1,@(12,r5)	)
-	addc	r1,r7
-
-SRC(	mov.l	@r4+,r0 	)
-SRC(	mov.l	@r4+,r1		)
-	addc	r0,r7
-DST(	mov.l	r0,@(16,r5)	)
-DST(	mov.l	r1,@(20,r5)	)
-	addc	r1,r7
-
-SRC(	mov.l	@r4+,r0		)
-SRC(	mov.l	@r4+,r1		)
-	addc	r0,r7
-DST(	mov.l	r0,@(24,r5)	)
-DST(	mov.l	r1,@(28,r5)	)
-	addc	r1,r7
-	add	#32,r5
-	movt	r0
-	dt	r6
-	bf/s	1b
-	 cmp/eq	#1,r0
-	mov	#0,r0
-	addc	r0,r7
-
-2:	mov	r2,r6
-	mov	#0x1c,r0
-	and	r0,r6
-	cmp/pl	r6
-	bf/s	4f
-	 clrt
-	shlr2	r6
-3:	
-SRC(	mov.l	@r4+,r0	)
-	addc	r0,r7
-DST(	mov.l	r0,@r5	)
-	add	#4,r5
-	movt	r0
-	dt	r6
-	bf/s	3b
-	 cmp/eq	#1,r0
-	mov	#0,r0
-	addc	r0,r7
-4:	mov	r2,r6
-	mov	#3,r0
-	and	r0,r6
-	cmp/pl	r6
-	bf	7f
-	mov	#2,r1
-	cmp/hs	r1,r6
-	bf	5f
-SRC(	mov.w	@r4+,r0	)
-DST(	mov.w	r0,@r5	)
-	extu.w	r0,r0
-	add	#2,r5
-	cmp/eq	r1,r6
-	bt/s	6f
-	 clrt
-	shll16	r0
-	addc	r0,r7
-5:	
-SRC(	mov.b	@r4+,r0	)
-DST(	mov.b	r0,@r5	)
-	extu.b	r0,r0
-#ifndef	__LITTLE_ENDIAN__
-	shll8	r0
-#endif
-6:	addc	r0,r7
-	mov	#0,r0
-	addc	r0,r7
-7:
-5000:
-
-# Exception handler:
-.section .fixup, "ax"							
-
-6001:
-	mov.l	@(8,r15),r0			! src_err_ptr
-	mov	#-EFAULT,r1
-	mov.l	r1,@r0
-
-	! zero the complete destination - computing the rest
-	! is too much work 
-	mov.l	@(4,r15),r5		! dst
-	mov.l	@r15,r6			! len
-	mov	#0,r7
-1:	mov.b	r7,@r5
-	dt	r6
-	bf/s	1b
-	 add	#1,r5
-	mov.l	8000f,r0
-	jmp	@r0
-	 nop
-	.align	2
-8000:	.long	5000b
-
-6002:
-	mov.l	@(12,r15),r0			! dst_err_ptr
-	mov	#-EFAULT,r1
-	mov.l	r1,@r0
-	mov.l	8001f,r0
-	jmp	@r0
-	 nop
-	.align	2
-8001:	.long	5000b
-
-.previous
-	add	#8,r15
-	rts
-	 mov	r7,r0
diff --git a/arch/sh/lib/copy_page.S b/arch/sh/lib/copy_page.S
deleted file mode 100644
index d4e9d18cee0bcc41a006bb9de5c10081d84f25bb..0000000000000000000000000000000000000000
--- a/arch/sh/lib/copy_page.S
+++ /dev/null
@@ -1,390 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * copy_page, __copy_user_page, __copy_user implementation of SuperH
- *
- * Copyright (C) 2001  Niibe Yutaka & Kaz Kojima
- * Copyright (C) 2002  Toshinobu Sugioka
- * Copyright (C) 2006  Paul Mundt
- */
-#include <linux/linkage.h>
-#include <asm/page.h>
-
-/*
- * copy_page
- * @to: P1 address
- * @from: P1 address
- *
- * void copy_page(void *to, void *from)
- */
-
-/*
- * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch 
- * r8 --- from + PAGE_SIZE
- * r9 --- not used
- * r10 --- to
- * r11 --- from
- */
-ENTRY(copy_page)
-	mov.l	r8,@-r15
-	mov.l	r10,@-r15
-	mov.l	r11,@-r15
-	mov	r4,r10
-	mov	r5,r11
-	mov	r5,r8
-	mov	#(PAGE_SIZE >> 10), r0
-	shll8	r0
-	shll2	r0
-	add	r0,r8
-	!
-1:	mov.l	@r11+,r0
-	mov.l	@r11+,r1
-	mov.l	@r11+,r2
-	mov.l	@r11+,r3
-	mov.l	@r11+,r4
-	mov.l	@r11+,r5
-	mov.l	@r11+,r6
-	mov.l	@r11+,r7
-#if defined(CONFIG_CPU_SH4)
-	movca.l	r0,@r10
-#else
-	mov.l	r0,@r10
-#endif
-	add	#32,r10
-	mov.l	r7,@-r10
-	mov.l	r6,@-r10
-	mov.l	r5,@-r10
-	mov.l	r4,@-r10
-	mov.l	r3,@-r10
-	mov.l	r2,@-r10
-	mov.l	r1,@-r10
-	cmp/eq	r11,r8
-	bf/s	1b
-	 add	#28,r10
-	!
-	mov.l	@r15+,r11
-	mov.l	@r15+,r10
-	mov.l	@r15+,r8
-	rts
-	 nop
-
-/*
- * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n);
- * Return the number of bytes NOT copied
- */
-#define EX(...)			\
-	9999: __VA_ARGS__ ;		\
-	.section __ex_table, "a";	\
-	.long 9999b, 6000f	;	\
-	.previous
-#define EX_NO_POP(...)			\
-	9999: __VA_ARGS__ ;		\
-	.section __ex_table, "a";	\
-	.long 9999b, 6005f	;	\
-	.previous
-ENTRY(__copy_user)
-	! Check if small number of bytes
-	mov	#11,r0
-	mov	r4,r3
-	cmp/gt	r0,r6		! r6 (len) > r0 (11)
-	bf/s	.L_cleanup_loop_no_pop
-	 add	r6,r3		! last destination address
-
-	! Calculate bytes needed to align to src
-	mov.l	r11,@-r15
-	neg	r5,r0
-	mov.l	r10,@-r15
-	add	#4,r0
-	mov.l	r9,@-r15
-	and	#3,r0
-	mov.l	r8,@-r15
-	tst	r0,r0
-	bt	2f
-
-1:
-	! Copy bytes to long word align src
-EX(	mov.b	@r5+,r1		)
-	dt	r0
-	add	#-1,r6
-EX(	mov.b	r1,@r4		)
-	bf/s	1b
-	 add	#1,r4
-
-	! Jump to appropriate routine depending on dest
-2:	mov	#3,r1
-	mov	r6, r2
-	and	r4,r1
-	shlr2	r2
-	shll2	r1
-	mova	.L_jump_tbl,r0
-	mov.l	@(r0,r1),r1
-	jmp	@r1
-	 nop
-
-	.align 2
-.L_jump_tbl:
-	.long	.L_dest00
-	.long	.L_dest01
-	.long	.L_dest10
-	.long	.L_dest11
-
-/*
- * Come here if there are less than 12 bytes to copy
- *
- * Keep the branch target close, so the bf/s callee doesn't overflow
- * and result in a more expensive branch being inserted. This is the
- * fast-path for small copies, the jump via the jump table will hit the
- * default slow-path cleanup. -PFM.
- */
-.L_cleanup_loop_no_pop:
-	tst	r6,r6		! Check explicitly for zero
-	bt	1f
-
-2:
-EX_NO_POP(	mov.b	@r5+,r0		)
-	dt	r6
-EX_NO_POP(	mov.b	r0,@r4		)
-	bf/s	2b
-	 add	#1,r4
-
-1:	mov	#0,r0		! normal return
-5000:
-
-# Exception handler:
-.section .fixup, "ax"
-6005:
-	mov.l	8000f,r1
-	mov	r3,r0
-	jmp	@r1
-	 sub	r4,r0
-	.align	2
-8000:	.long	5000b
-
-.previous
-	rts
-	 nop
-
-! Destination = 00
-
-.L_dest00:
-	! Skip the large copy for small transfers
-	mov	#(32+32-4), r0
-	cmp/gt	r6, r0		! r0 (60) > r6 (len)
-	bt	1f
-
-	! Align dest to a 32 byte boundary
-	neg	r4,r0
-	add	#0x20, r0
-	and	#0x1f, r0
-	tst	r0, r0
-	bt	2f
-
-	sub	r0, r6
-	shlr2	r0
-3:
-EX(	mov.l	@r5+,r1		)
-	dt	r0
-EX(	mov.l	r1,@r4		)
-	bf/s	3b
-	 add	#4,r4
-
-2:
-EX(	mov.l	@r5+,r0		)
-EX(	mov.l	@r5+,r1		)
-EX(	mov.l	@r5+,r2		)
-EX(	mov.l	@r5+,r7		)
-EX(	mov.l	@r5+,r8		)
-EX(	mov.l	@r5+,r9		)
-EX(	mov.l	@r5+,r10	)
-EX(	mov.l	@r5+,r11	)
-#ifdef CONFIG_CPU_SH4
-EX(	movca.l	r0,@r4		)
-#else
-EX(	mov.l	r0,@r4		)
-#endif
-	add	#-32, r6
-EX(	mov.l	r1,@(4,r4)	)
-	mov	#32, r0
-EX(	mov.l	r2,@(8,r4)	)
-	cmp/gt	r6, r0		! r0 (32) > r6 (len)
-EX(	mov.l	r7,@(12,r4)	)
-EX(	mov.l	r8,@(16,r4)	)
-EX(	mov.l	r9,@(20,r4)	)
-EX(	mov.l	r10,@(24,r4)	)
-EX(	mov.l	r11,@(28,r4)	)
-	bf/s	2b
-	 add	#32,r4
-
-1:	mov	r6, r0
-	shlr2	r0
-	tst	r0, r0
-	bt	.L_cleanup
-1:
-EX(	mov.l	@r5+,r1		)
-	dt	r0
-EX(	mov.l	r1,@r4		)
-	bf/s	1b
-	 add	#4,r4
-
-	bra	.L_cleanup
-	 nop
-
-! Destination = 10
-
-.L_dest10:
-	mov	r2,r7
-	shlr2	r7
-	shlr	r7
-	tst	r7,r7
-	mov	#7,r0
-	bt/s	1f
-	 and	r0,r2
-2:
-	dt	r7
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-EX(	mov.l	@r5+,r0		)
-EX(	mov.l	@r5+,r1		)
-EX(	mov.l	@r5+,r8		)
-EX(	mov.l	@r5+,r9		)
-EX(	mov.l	@r5+,r10	)
-EX(	mov.w	r0,@r4		)
-	add	#2,r4
-	xtrct	r1,r0
-	xtrct	r8,r1
-	xtrct	r9,r8
-	xtrct	r10,r9
-
-EX(	mov.l	r0,@r4		)
-EX(	mov.l	r1,@(4,r4)	)
-EX(	mov.l	r8,@(8,r4)	)
-EX(	mov.l	r9,@(12,r4)	)
-
-EX(	mov.l	@r5+,r1		)
-EX(	mov.l	@r5+,r8		)
-EX(	mov.l	@r5+,r0		)
-	xtrct	r1,r10
-	xtrct	r8,r1
-	xtrct	r0,r8
-	shlr16	r0
-EX(	mov.l	r10,@(16,r4)	)
-EX(	mov.l	r1,@(20,r4)	)
-EX(	mov.l	r8,@(24,r4)	)
-EX(	mov.w	r0,@(28,r4)	)
-	bf/s	2b
-	 add	#30,r4
-#else
-EX(	mov.l	@(28,r5),r0	)
-EX(	mov.l	@(24,r5),r8	)
-EX(	mov.l	@(20,r5),r9	)
-EX(	mov.l	@(16,r5),r10	)
-EX(	mov.w	r0,@(30,r4)	)
-	add	#-2,r4
-	xtrct	r8,r0
-	xtrct	r9,r8
-	xtrct	r10,r9
-EX(	mov.l	r0,@(28,r4)	)
-EX(	mov.l	r8,@(24,r4)	)
-EX(	mov.l	r9,@(20,r4)	)
-
-EX(	mov.l	@(12,r5),r0	)
-EX(	mov.l	@(8,r5),r8	)
-	xtrct	r0,r10
-EX(	mov.l	@(4,r5),r9	)
-	mov.l	r10,@(16,r4)
-EX(	mov.l	@r5,r10		)
-	xtrct	r8,r0
-	xtrct	r9,r8
-	xtrct	r10,r9
-EX(	mov.l	r0,@(12,r4)	)
-EX(	mov.l	r8,@(8,r4)	)
-	swap.w	r10,r0
-EX(	mov.l	r9,@(4,r4)	)
-EX(	mov.w	r0,@(2,r4)	)
-
-	add	#32,r5
-	bf/s	2b
-	 add	#34,r4
-#endif
-	tst	r2,r2
-	bt	.L_cleanup
-
-1:	! Read longword, write two words per iteration
-EX(	mov.l	@r5+,r0		)
-	dt	r2
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-EX(	mov.w	r0,@r4		)
-	shlr16	r0
-EX(	mov.w 	r0,@(2,r4)	)
-#else
-EX(	mov.w	r0,@(2,r4)	)
-	shlr16	r0
-EX(	mov.w	r0,@r4		)
-#endif
-	bf/s	1b
-	 add	#4,r4
-
-	bra	.L_cleanup
-	 nop
-
-! Destination = 01 or 11
-
-.L_dest01:
-.L_dest11:
-	! Read longword, write byte, word, byte per iteration
-EX(	mov.l	@r5+,r0		)
-	dt	r2
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-EX(	mov.b	r0,@r4		)
-	shlr8	r0
-	add	#1,r4
-EX(	mov.w	r0,@r4		)
-	shlr16	r0
-EX(	mov.b	r0,@(2,r4)	)
-	bf/s	.L_dest01
-	 add	#3,r4
-#else
-EX(	mov.b	r0,@(3,r4)	)
-	shlr8	r0
-	swap.w	r0,r7
-EX(	mov.b	r7,@r4		)
-	add	#1,r4
-EX(	mov.w	r0,@r4		)
-	bf/s	.L_dest01
-	 add	#3,r4
-#endif
-
-! Cleanup last few bytes
-.L_cleanup:
-	mov	r6,r0
-	and	#3,r0
-	tst	r0,r0
-	bt	.L_exit
-	mov	r0,r6
-
-.L_cleanup_loop:
-EX(	mov.b	@r5+,r0		)
-	dt	r6
-EX(	mov.b	r0,@r4		)
-	bf/s	.L_cleanup_loop
-	 add	#1,r4
-
-.L_exit:
-	mov	#0,r0		! normal return
-
-5000:
-
-# Exception handler:
-.section .fixup, "ax"
-6000:
-	mov.l	8000f,r1
-	mov	r3,r0
-	jmp	@r1
-	 sub	r4,r0
-	.align	2
-8000:	.long	5000b
-
-.previous
-	mov.l	@r15+,r8
-	mov.l	@r15+,r9
-	mov.l	@r15+,r10
-	rts
-	 mov.l	@r15+,r11
diff --git a/arch/sh/lib/div64.S b/arch/sh/lib/div64.S
deleted file mode 100644
index 4a9a966e71b0e7871536a68c36e0cc3c44000e4d..0000000000000000000000000000000000000000
--- a/arch/sh/lib/div64.S
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*	
- * unsigned long __xdiv64_32(unsigned long long n, unsigned long d); 
- */
-
-#include <linux/linkage.h>
-
-.text
-ENTRY(__xdiv64_32)
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-	mov	r4, r0
-	mov	r5, r1
-#else
-	mov	r4, r1
-	mov	r5, r0
-#endif
-	cmp/hs	r6, r1
-	bf.s	1f
-	 mov	#0, r2
-
-	mov	r1, r2
-	mov	#0, r3
-	div0u
-	.rept	32
-	rotcl	r2
-	div1	r6, r3
-	.endr
-	rotcl	r2
-	mul.l	r6, r2
-	sts	macl, r3
-	sub	r3, r1
-1:
-	div0u
-	.rept	32
-	rotcl	r0
-	div1	r6, r1
-	.endr
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-	mov	r2, r1
-	rts
-	 rotcl	r0
-#else
-	rotcl	r0
-	mov	r0, r1
-	rts
-	 mov	r2, r0
-#endif
diff --git a/arch/sh/lib/lshrsi3.S b/arch/sh/lib/lshrsi3.S
deleted file mode 100644
index b79b8170061f3350b2aca22570b1e6b25f0ef60f..0000000000000000000000000000000000000000
--- a/arch/sh/lib/lshrsi3.S
+++ /dev/null
@@ -1,188 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
-
-   Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2006
-   Free Software Foundation, Inc.
-*/
-
-!! libgcc routines for the Renesas / SuperH SH CPUs.
-!! Contributed by Steve Chamberlain.
-!! sac@cygnus.com
-
-!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
-!! recoded in assembly by Toshiyasu Morita
-!! tm@netcom.com
-
-/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
-   ELF local label prefixes by J"orn Rennecke
-   amylaar@cygnus.com  */
-
-!
-! __lshrsi3
-!
-! Entry:
-!
-! r4: Value to shift
-! r5: Shifts
-!
-! Exit:
-!
-! r0: Result
-!
-! Destroys:
-!
-! (none)
-!
-! __lshrsi3_r0
-!
-! Entry:
-!
-! r0: Value to shift
-! r5: Shifts
-!
-! Exit:
-!
-! r0: Result
-!
-! Destroys:
-!
-! (none)
-!
-	.global	__lshrsi3
-	.global	__lshrsi3_r0
-	
-	.align	2
-__lshrsi3:
-	mov	r5,r0
-	.align	2
-__lshrsi3_r0:
-	and	#31,r0
-	mov.l	r4,@-r15
-	mov	r0,r4
-	mova	lshrsi3_table,r0
-	mov.b	@(r0,r4),r4
-	add	r4,r0
-	jmp	@r0
-	mov.l	@r15+,r0
-
-	.align	2
-lshrsi3_table:
-	.byte		lshrsi3_0-lshrsi3_table
-	.byte		lshrsi3_1-lshrsi3_table
-	.byte		lshrsi3_2-lshrsi3_table
-	.byte		lshrsi3_3-lshrsi3_table
-	.byte		lshrsi3_4-lshrsi3_table
-	.byte		lshrsi3_5-lshrsi3_table
-	.byte		lshrsi3_6-lshrsi3_table
-	.byte		lshrsi3_7-lshrsi3_table
-	.byte		lshrsi3_8-lshrsi3_table
-	.byte		lshrsi3_9-lshrsi3_table
-	.byte		lshrsi3_10-lshrsi3_table
-	.byte		lshrsi3_11-lshrsi3_table
-	.byte		lshrsi3_12-lshrsi3_table
-	.byte		lshrsi3_13-lshrsi3_table
-	.byte		lshrsi3_14-lshrsi3_table
-	.byte		lshrsi3_15-lshrsi3_table
-	.byte		lshrsi3_16-lshrsi3_table
-	.byte		lshrsi3_17-lshrsi3_table
-	.byte		lshrsi3_18-lshrsi3_table
-	.byte		lshrsi3_19-lshrsi3_table
-	.byte		lshrsi3_20-lshrsi3_table
-	.byte		lshrsi3_21-lshrsi3_table
-	.byte		lshrsi3_22-lshrsi3_table
-	.byte		lshrsi3_23-lshrsi3_table
-	.byte		lshrsi3_24-lshrsi3_table
-	.byte		lshrsi3_25-lshrsi3_table
-	.byte		lshrsi3_26-lshrsi3_table
-	.byte		lshrsi3_27-lshrsi3_table
-	.byte		lshrsi3_28-lshrsi3_table
-	.byte		lshrsi3_29-lshrsi3_table
-	.byte		lshrsi3_30-lshrsi3_table
-	.byte		lshrsi3_31-lshrsi3_table
-
-lshrsi3_6:
-	shlr2	r0
-lshrsi3_4:
-	shlr2	r0
-lshrsi3_2:
-	rts
-	shlr2	r0
-
-lshrsi3_7:
-	shlr2	r0
-lshrsi3_5:
-	shlr2	r0
-lshrsi3_3:
-	shlr2	r0
-lshrsi3_1:
-	rts
-	shlr	r0
-
-lshrsi3_14:
-	shlr2	r0
-lshrsi3_12:
-	shlr2	r0
-lshrsi3_10:
-	shlr2	r0
-lshrsi3_8:
-	rts
-	shlr8	r0
-
-lshrsi3_15:
-	shlr2	r0
-lshrsi3_13:
-	shlr2	r0
-lshrsi3_11:
-	shlr2	r0
-lshrsi3_9:
-	shlr8	r0
-	rts
-	shlr	r0
-
-lshrsi3_22:
-	shlr2	r0
-lshrsi3_20:
-	shlr2	r0
-lshrsi3_18:
-	shlr2	r0
-lshrsi3_16:
-	rts
-	shlr16	r0
-
-lshrsi3_23:
-	shlr2	r0
-lshrsi3_21:
-	shlr2	r0
-lshrsi3_19:
-	shlr2	r0
-lshrsi3_17:
-	shlr16	r0
-	rts
-	shlr	r0
-
-lshrsi3_30:
-	shlr2	r0
-lshrsi3_28:
-	shlr2	r0
-lshrsi3_26:
-	shlr2	r0
-lshrsi3_24:
-	shlr16	r0
-	rts
-	shlr8	r0
-
-lshrsi3_31:
-	shlr2	r0
-lshrsi3_29:
-	shlr2	r0
-lshrsi3_27:
-	shlr2	r0
-lshrsi3_25:
-	shlr16	r0
-	shlr8	r0
-	rts
-	shlr	r0
-
-lshrsi3_0:
-	rts
-	nop
diff --git a/arch/sh/lib/mcount.S b/arch/sh/lib/mcount.S
deleted file mode 100644
index c6ca90cc960618726bd620aa2948bfaad014dc57..0000000000000000000000000000000000000000
--- a/arch/sh/lib/mcount.S
+++ /dev/null
@@ -1,287 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * arch/sh/lib/mcount.S
- *
- *  Copyright (C) 2008, 2009  Paul Mundt
- *  Copyright (C) 2008, 2009  Matt Fleming
- */
-#include <asm/ftrace.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-
-#define MCOUNT_ENTER()		\
-	mov.l	r4, @-r15;	\
-	mov.l	r5, @-r15;	\
-	mov.l	r6, @-r15;	\
-	mov.l	r7, @-r15;	\
-	sts.l	pr, @-r15;	\
-				\
-	mov.l	@(20,r15),r4;	\
-	sts	pr, r5
-
-#define MCOUNT_LEAVE()		\
-	lds.l	@r15+, pr;	\
-	mov.l	@r15+, r7;	\
-	mov.l	@r15+, r6;	\
-	mov.l	@r15+, r5;	\
-	rts;			\
-	 mov.l	@r15+, r4
-
-#ifdef CONFIG_STACK_DEBUG
-/*
- * Perform diagnostic checks on the state of the kernel stack.
- *
- * Check for stack overflow. If there is less than 1KB free
- * then it has overflowed.
- *
- * Make sure the stack pointer contains a valid address. Valid
- * addresses for kernel stacks are anywhere after the bss
- * (after __bss_stop) and anywhere in init_thread_union (init_stack).
- */
-#define STACK_CHECK()					\
-	mov	#(THREAD_SIZE >> 10), r0;		\
-	shll8	r0;					\
-	shll2	r0;					\
-							\
-	/* r1 = sp & (THREAD_SIZE - 1) */		\
-	mov	#-1, r1;				\
-	add	r0, r1;					\
-	and	r15, r1;				\
-							\
-	mov	#TI_SIZE, r3;				\
-	mov	#(STACK_WARN >> 8), r2;			\
-	shll8	r2;					\
-	add	r3, r2;					\
-							\
-	/* Is the stack overflowing? */			\
-	cmp/hi	r2, r1;					\
-	bf	stack_panic;				\
-							\
-	/* If sp > __bss_stop then we're OK. */		\
-	mov.l	.L_ebss, r1;				\
-	cmp/hi	r1, r15;				\
-	bt	1f;					\
-							\
-	/* If sp < init_stack, we're not OK. */		\
-	mov.l	.L_init_thread_union, r1;		\
-	cmp/hs	r1, r15;				\
-	bf	stack_panic;				\
-							\
-	/* If sp > init_stack && sp < __bss_stop, not OK. */	\
-	add	r0, r1;					\
-	cmp/hs	r1, r15;				\
-	bt	stack_panic;				\
-1:
-#else
-#define STACK_CHECK()
-#endif /* CONFIG_STACK_DEBUG */
-
-	.align 2
-	.globl	_mcount
-	.type	_mcount,@function
-	.globl	mcount
-	.type	mcount,@function
-_mcount:
-mcount:
-	STACK_CHECK()
-
-#ifndef CONFIG_FUNCTION_TRACER
-	rts
-	 nop
-#else
-	MCOUNT_ENTER()
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-	.globl	mcount_call
-mcount_call:
-	mov.l	.Lftrace_stub, r6
-#else
-	mov.l	.Lftrace_trace_function, r6
-	mov.l	ftrace_stub, r7
-	cmp/eq	r6, r7
-	bt	skip_trace
-	mov.l	@r6, r6
-#endif
-
-	jsr	@r6
-	 nop
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	mov.l   .Lftrace_graph_return, r6
-	mov.l   .Lftrace_stub, r7
-	cmp/eq  r6, r7
-	bt      1f
-
-	mov.l   .Lftrace_graph_caller, r0
-	jmp     @r0
-	 nop
-
-1:
-	mov.l	.Lftrace_graph_entry, r6
-	mov.l	.Lftrace_graph_entry_stub, r7
-	cmp/eq	r6, r7
-	bt	skip_trace
-
-	mov.l   .Lftrace_graph_caller, r0
-	jmp	@r0
-	 nop
-
-	.align 2
-.Lftrace_graph_return:
-	.long   ftrace_graph_return
-.Lftrace_graph_entry:
-	.long   ftrace_graph_entry
-.Lftrace_graph_entry_stub:
-	.long   ftrace_graph_entry_stub
-.Lftrace_graph_caller:
-	.long   ftrace_graph_caller
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-	.globl skip_trace
-skip_trace:
-	MCOUNT_LEAVE()
-
-	.align 2
-.Lftrace_trace_function:
-	.long   ftrace_trace_function
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-/*
- * NOTE: Do not move either ftrace_graph_call or ftrace_caller
- * as this will affect the calculation of GRAPH_INSN_OFFSET.
- */
-	.globl ftrace_graph_call
-ftrace_graph_call:
-	mov.l	.Lskip_trace, r0
-	jmp	@r0
-	 nop
-
-	.align 2
-.Lskip_trace:
-	.long	skip_trace
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-	.globl ftrace_caller
-ftrace_caller:
-	MCOUNT_ENTER()
-
-	.globl ftrace_call
-ftrace_call:
-	mov.l	.Lftrace_stub, r6
-	jsr	@r6
-	 nop
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	bra	ftrace_graph_call
-	 nop
-#else
-	MCOUNT_LEAVE()
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-	.align 2
-
-/*
- * NOTE: From here on the locations of the .Lftrace_stub label and
- * ftrace_stub itself are fixed. Adding additional data here will skew
- * the displacement for the memory table and break the block replacement.
- * Place new labels either after the ftrace_stub body, or before
- * ftrace_caller. You have been warned.
- */
-.Lftrace_stub:
-	.long	ftrace_stub
-
-	.globl	ftrace_stub
-ftrace_stub:
-	rts
-	 nop
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	.globl	ftrace_graph_caller
-ftrace_graph_caller:
-	mov.l	2f, r1
-	jmp	@r1
-	 nop
-1:
-	/*
-	 * MCOUNT_ENTER() pushed 5 registers onto the stack, so
-	 * the stack address containing our return address is
-	 * r15 + 20.
-	 */
-	mov	#20, r0
-	add	r15, r0
-	mov	r0, r4
-
-	mov.l	.Lprepare_ftrace_return, r0
-	jsr	@r0
-	 nop
-
-	MCOUNT_LEAVE()
-
-	.align 2
-2:	.long	skip_trace
-.Lprepare_ftrace_return:
-	.long	prepare_ftrace_return
-
-	.globl	return_to_handler
-return_to_handler:
-	/*
-	 * Save the return values.
-	 */
-	mov.l	r0, @-r15
-	mov.l	r1, @-r15
-
-	mov	#0, r4
-
-	mov.l	.Lftrace_return_to_handler, r0
-	jsr	@r0
-	 nop
-
-	/*
-	 * The return value from ftrace_return_handler has the real
-	 * address that we should return to.
-	 */
-	lds	r0, pr
-	mov.l	@r15+, r1
-	rts
-	 mov.l	@r15+, r0
-
-
-	.align 2
-.Lftrace_return_to_handler:
-	.long	ftrace_return_to_handler
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-#endif /* CONFIG_FUNCTION_TRACER */
-
-#ifdef CONFIG_STACK_DEBUG
-	.globl	stack_panic
-stack_panic:
-	mov.l	.Ldump_stack, r0
-	jsr	@r0
-	 nop
-
-	mov.l	.Lpanic, r0
-	jsr	@r0
-	 mov.l	.Lpanic_s, r4
-
-	rts
-	 nop
-
-	.align 2
-.L_init_thread_union:
-	.long	init_thread_union
-.L_ebss:
-	.long	__bss_stop
-.Lpanic:
-	.long	panic
-.Lpanic_s:
-	.long	.Lpanic_str
-.Ldump_stack:
-	.long	dump_stack
-
-	.section	.rodata
-	.align 2
-.Lpanic_str:
-	.string "Stack error"
-#endif /* CONFIG_STACK_DEBUG */
diff --git a/arch/sh/lib/memchr.S b/arch/sh/lib/memchr.S
deleted file mode 100644
index 8ded104077aa320b1f117712179b1e6d6405c9e4..0000000000000000000000000000000000000000
--- a/arch/sh/lib/memchr.S
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* $Id: memchr.S,v 1.1 2000/04/14 16:49:01 mjd Exp $
- *
- * "memchr" implementation of SuperH
- *
- * Copyright (C) 1999  Niibe Yutaka
- *
- */
-
-/*
- * void *memchr(const void *s, int c, size_t n);
- */
-
-#include <linux/linkage.h>
-ENTRY(memchr)
-	tst	r6,r6
-	bt/s	2f
-	 exts.b	r5,r5
-1:	mov.b	@r4,r1
-	cmp/eq	r1,r5
-	bt/s	3f
-	 dt	r6
-	bf/s	1b
-	 add	#1,r4
-2:	mov	#0,r4
-3:	rts
-	 mov	r4,r0
diff --git a/arch/sh/lib/memcpy-sh4.S b/arch/sh/lib/memcpy-sh4.S
deleted file mode 100644
index a2435c0f6302ff3e9a698fecccea77deea255b31..0000000000000000000000000000000000000000
--- a/arch/sh/lib/memcpy-sh4.S
+++ /dev/null
@@ -1,800 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * "memcpy" implementation of SuperH
- *
- * Copyright (C) 1999  Niibe Yutaka
- * Copyright (c) 2002  STMicroelectronics Ltd
- *   Modified from memcpy.S and micro-optimised for SH4
- *   Stuart Menefy (stuart.menefy@st.com)
- *
- */
-#include <linux/linkage.h>
-
-/*
- * void *memcpy(void *dst, const void *src, size_t n);
- *
- * It is assumed that there is no overlap between src and dst.
- * If there is an overlap, then the results are undefined.
- */
-
-	!
-	!	GHIJ KLMN OPQR -->  ...G HIJK LMNO PQR.
-	!
-
-	! Size is 16 or greater, and may have trailing bytes
-
-	.balign	32
-.Lcase1:
-	! Read a long word and write a long word at once
-	! At the start of each iteration, r7 contains last long load
-	add	#-1,r5		!  79 EX
-	mov	r4,r2		!   5 MT (0 cycles latency)
-
-	mov.l	@(r0,r5),r7	!  21 LS (2 cycles latency)
-	add	#-4,r5		!  50 EX
-
-	add	#7,r2		!  79 EX
-	!
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-	! 6 cycles, 4 bytes per iteration
-3:	mov.l	@(r0,r5),r1	!  21 LS (latency=2)	! NMLK
-	mov	r7, r3		!   5 MT (latency=0)	! RQPO
-
-	cmp/hi	r2,r0		!  57 MT
-	shll16	r3		! 103 EX
-
-	mov	r1,r6		!   5 MT (latency=0)
-	shll8	r3		! 102 EX		! Oxxx
-
-	shlr8	r6		! 106 EX		! xNML
-	mov	r1, r7		!   5 MT (latency=0)
-
-	or	r6,r3		!  82 EX		! ONML
-	bt/s	3b		! 109 BR
-
-	 mov.l	r3,@-r0		!  30 LS
-#else
-3:	mov.l	@(r0,r5),r1	!  21 LS (latency=2)	! KLMN
-	mov	r7,r3		!   5 MT (latency=0)	! OPQR
-
-	cmp/hi	r2,r0		!  57 MT
-	shlr16	r3		! 107 EX
-
-	shlr8	r3		! 106 EX		! xxxO
-	mov	r1,r6		!   5 MT (latency=0)
-
-	shll8	r6		! 102 EX		! LMNx
-	mov	r1,r7		!   5 MT (latency=0)
-
-	or	r6,r3		!  82 EX		! LMNO
-	bt/s	3b		! 109 BR
-
-	 mov.l	r3,@-r0		!  30 LS
-#endif
-	! Finally, copy a byte at once, if necessary
-
-	add	#4,r5		!  50 EX
-	cmp/eq	r4,r0		!  54 MT
-
-	add	#-6,r2		!  50 EX
-	bt	9f		! 109 BR
-
-8:	cmp/hi	r2,r0		!  57 MT
-	mov.b	@(r0,r5),r1	!  20 LS (latency=2)
-
-	bt/s	8b		! 109 BR
-
-	 mov.b	r1,@-r0		!  29 LS
-
-9:	rts
-	 nop
-
-
-	!
-	!	GHIJ KLMN OPQR -->  .GHI JKLM NOPQ R...
-	!
-
-	! Size is 16 or greater, and may have trailing bytes
-
-	.balign	32
-.Lcase3:
-	! Read a long word and write a long word at once
-	! At the start of each iteration, r7 contains last long load
-	add	#-3,r5		! 79 EX
-	mov	r4,r2		!  5 MT (0 cycles latency)
-
-	mov.l	@(r0,r5),r7	! 21 LS (2 cycles latency)
-	add	#-4,r5		! 50 EX
-
-	add	#7,r2		!  79 EX
-	!
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-	! 6 cycles, 4 bytes per iteration
-3:	mov.l	@(r0,r5),r1	!  21 LS (latency=2)	! NMLK
-	mov	r7, r3		!   5 MT (latency=0)	! RQPO
-
-	cmp/hi	r2,r0		!  57 MT
-	shll8	r3		! 102 EX		! QPOx
-
-	mov	r1,r6		!   5 MT (latency=0)
-	shlr16	r6		! 107 EX
-
-	shlr8	r6		! 106 EX		! xxxN
-	mov	r1, r7		!   5 MT (latency=0)
-
-	or	r6,r3		!  82 EX		! QPON
-	bt/s	3b		! 109 BR
-
-	 mov.l	r3,@-r0		!  30 LS
-#else
-3:	mov	r7,r3		! OPQR
-	shlr8	r3		! xOPQ
-	mov.l	@(r0,r5),r7	! KLMN
-	mov	r7,r6
-	shll16	r6
-	shll8	r6		! Nxxx
-	or	r6,r3		! NOPQ
-	cmp/hi	r2,r0
-	bt/s	3b
-	 mov.l	r3,@-r0
-#endif
-
-	! Finally, copy a byte at once, if necessary
-
-	add	#6,r5		!  50 EX
-	cmp/eq	r4,r0		!  54 MT
-
-	add	#-6,r2		!  50 EX
-	bt	9f		! 109 BR
-
-8:	cmp/hi	r2,r0		!  57 MT
-	mov.b	@(r0,r5),r1	!  20 LS (latency=2)
-
-	bt/s	8b		! 109 BR
-
-	 mov.b	r1,@-r0		!  29 LS
-
-9:	rts
-	 nop
-
-ENTRY(memcpy)
-
-	! Calculate the invariants which will be used in the remainder
-	! of the code:
-	!
-	!      r4   -->  [ ...  ] DST             [ ...  ] SRC
-	!	         [ ...  ]                 [ ...  ]
-	!	           :                        :
-	!      r0   -->  [ ...  ]       r0+r5 --> [ ...  ]
-	!
-	!
-
-	! Short circuit the common case of src, dst and len being 32 bit aligned
-	! and test for zero length move
-
-	mov	r6, r0		!   5 MT (0 cycle latency)
-	or	r4, r0		!  82 EX
-
-	or	r5, r0		!  82 EX
-	tst	r6, r6		!  86 MT
-
-	bt/s	99f		! 111 BR		(zero len)
-	 tst	#3, r0		!  87 MT
-
-	mov	r4, r0		!   5 MT (0 cycle latency)
-	add	r6, r0		!  49 EX
-
-	mov	#16, r1		!   6 EX
-	bt/s	.Lcase00	! 111 BR		(aligned)
-
-	 sub	r4, r5		!  75 EX
-
-	! Arguments are not nicely long word aligned or zero len.
-	! Check for small copies, and if so do a simple byte at a time copy.
-	!
-	! Deciding on an exact value of 'small' is not easy, as the point at which
-	! using the optimised routines become worthwhile varies (these are the
-	! cycle counts for differnet sizes using byte-at-a-time vs. optimised):
-	!	size	byte-at-time	long	word	byte
-	!	16	42		39-40	46-50	50-55
-	!	24	58		43-44	54-58	62-67
-	!	36	82		49-50	66-70	80-85
-	! However the penalty for getting it 'wrong' is much higher for long word
-	! aligned data (and this is more common), so use a value of 16.
-
-	cmp/gt	r6,r1		!  56 MT
-
-	add	#-1,r5		!  50 EX
-	bf/s	6f		! 108 BR		(not small)
-
-	 mov	r5, r3		!   5 MT (latency=0)
-	shlr	r6		! 104 EX
-
-	mov.b	@(r0,r5),r1	!  20 LS (latency=2)
-	bf/s	4f		! 111 BR
-
-	 add	#-1,r3		!  50 EX
-	tst	r6, r6		!  86 MT
-
-	bt/s	98f		! 110 BR
-	 mov.b	r1,@-r0		!  29 LS
-
-	! 4 cycles, 2 bytes per iteration
-3:	mov.b	@(r0,r5),r1	!  20 LS (latency=2)
-
-4:	mov.b	@(r0,r3),r2	!  20 LS (latency=2)
-	dt	r6		!  67 EX
-
-	mov.b	r1,@-r0		!  29 LS
-	bf/s	3b		! 111 BR
-
-	 mov.b	r2,@-r0		!  29 LS
-98:
-	rts
-	 nop
-
-99:	rts
-	 mov	r4, r0
-
-	! Size is not small, so its worthwhile looking for optimisations.
-	! First align destination to a long word boundary.
-	!
-	! r5 = normal value -1
-
-6:	tst	#3, r0		!  87 MT
-        mov	#3, r3		!   6 EX
-
-	bt/s	2f		! 111 BR
-	 and	r0,r3		!  78 EX
-
-	! 3 cycles, 1 byte per iteration
-1:	dt	r3		!  67 EX
-	mov.b	@(r0,r5),r1	!  19 LS (latency=2)
-
-	add	#-1, r6		!  79 EX
-	bf/s	1b		! 109 BR
-
-	 mov.b	r1,@-r0		!  28 LS
-
-2:	add	#1, r5		!  79 EX
-
-	! Now select the appropriate bulk transfer code based on relative
-	! alignment of src and dst.
-
-	mov	r0, r3		!   5 MT (latency=0)
-
-	mov	r5, r0		!   5 MT (latency=0)
-	tst	#1, r0		!  87 MT
-
-	bf/s	1f		! 111 BR
-	 mov	#64, r7		!   6 EX
-
-	! bit 0 clear
-
-	cmp/ge	r7, r6		!  55 MT
-
-	bt/s	2f		! 111 BR
-	 tst	#2, r0		!  87 MT
-
-	! small
-	bt/s	.Lcase0
-	 mov	r3, r0
-
-	bra	.Lcase2
-	 nop
-
-	! big
-2:	bt/s	.Lcase0b
-	 mov	r3, r0
-
-	bra	.Lcase2b
-	 nop
-
-	! bit 0 set
-1:	tst	#2, r0		! 87 MT
-
-	bt/s	.Lcase1
-	 mov	r3, r0
-
-	bra	.Lcase3
-	 nop
-
-
-	!
-	!	GHIJ KLMN OPQR -->  GHIJ KLMN OPQR
-	!
-
-	! src, dst and size are all long word aligned
-	! size is non-zero
-
-	.balign	32
-.Lcase00:
-	mov	#64, r1		!   6 EX
-	mov	r5, r3		!   5 MT (latency=0)
-
-	cmp/gt	r6, r1		!  56 MT
-	add	#-4, r5		!  50 EX
-
-	bf	.Lcase00b	! 108 BR		(big loop)
-	shlr2	r6		! 105 EX
-
-	shlr	r6		! 104 EX
-	mov.l	@(r0, r5), r1	!  21 LS (latency=2)
-
-	bf/s	4f		! 111 BR
-	 add	#-8, r3		!  50 EX
-
-	tst	r6, r6		!  86 MT
-	bt/s	5f		! 110 BR
-
-	 mov.l	r1,@-r0		!  30 LS
-
-	! 4 cycles, 2 long words per iteration
-3:	mov.l	@(r0, r5), r1	!  21 LS (latency=2)
-
-4:	mov.l	@(r0, r3), r2	!  21 LS (latency=2)
-	dt	r6		!  67 EX
-
-	mov.l	r1, @-r0	!  30 LS
-	bf/s	3b		! 109 BR
-
-	 mov.l	r2, @-r0	!  30 LS
-
-5:	rts
-	 nop
-
-
-	! Size is 16 or greater and less than 64, but may have trailing bytes
-
-	.balign	32
-.Lcase0:
-	add	#-4, r5		!  50 EX
-	mov	r4, r7		!   5 MT (latency=0)
-
-	mov.l	@(r0, r5), r1	!  21 LS (latency=2)
-	mov	#4, r2		!   6 EX
-
-	add	#11, r7		!  50 EX
-	tst	r2, r6		!  86 MT
-
-	mov	r5, r3		!   5 MT (latency=0)
-	bt/s	4f		! 111 BR
-
-	 add	#-4, r3		!  50 EX
-	mov.l	r1,@-r0		!  30 LS
-
-	! 4 cycles, 2 long words per iteration
-3:	mov.l	@(r0, r5), r1	!  21 LS (latency=2)
-
-4:	mov.l	@(r0, r3), r2	!  21 LS (latency=2)
-	cmp/hi	r7, r0
-
-	mov.l	r1, @-r0	!  30 LS
-	bt/s	3b		! 109 BR
-
-	 mov.l	r2, @-r0	!  30 LS
-
-	! Copy the final 0-3 bytes
-
-	add	#3,r5		!  50 EX
-
-	cmp/eq	r0, r4		!  54 MT
-	add	#-10, r7	!  50 EX
-
-	bt	9f		! 110 BR
-
-	! 3 cycles, 1 byte per iteration
-1:	mov.b	@(r0,r5),r1	!  19 LS
-	cmp/hi	r7,r0		!  57 MT
-
-	bt/s	1b		! 111 BR
-	 mov.b	r1,@-r0		!  28 LS
-
-9:	rts
-	 nop
-
-	! Size is at least 64 bytes, so will be going round the big loop at least once.
-	!
-	!   r2 = rounded up r4
-	!   r3 = rounded down r0
-
-	.balign	32
-.Lcase0b:
-	add	#-4, r5		!  50 EX
-
-.Lcase00b:
-	mov	r0, r3		!   5 MT (latency=0)
-	mov	#(~0x1f), r1	!   6 EX
-
-	and	r1, r3		!  78 EX
-	mov	r4, r2		!   5 MT (latency=0)
-
-	cmp/eq	r3, r0		!  54 MT
-	add	#0x1f, r2	!  50 EX
-
-	bt/s	1f		! 110 BR
-	 and	r1, r2		!  78 EX
-
-	! copy initial words until cache line aligned
-
-	mov.l	@(r0, r5), r1	!  21 LS (latency=2)
-	tst	#4, r0		!  87 MT
-
-	mov	r5, r6		!   5 MT (latency=0)
-	add	#-4, r6		!  50 EX
-
-	bt/s	4f		! 111 BR
-	 add	#8, r3		!  50 EX
-
-	tst	#0x18, r0	!  87 MT
-
-	bt/s	1f		! 109 BR
-	 mov.l	r1,@-r0		!  30 LS
-
-	! 4 cycles, 2 long words per iteration
-3:	mov.l	@(r0, r5), r1	!  21 LS (latency=2)
-
-4:	mov.l	@(r0, r6), r7	!  21 LS (latency=2)
-	cmp/eq	r3, r0		!  54 MT
-
-	mov.l	r1, @-r0	!  30 LS
-	bf/s	3b		! 109 BR
-
-	 mov.l	r7, @-r0	!  30 LS
-
-	! Copy the cache line aligned blocks
-	!
-	! In use: r0, r2, r4, r5
-	! Scratch: r1, r3, r6, r7
-	!
-	! We could do this with the four scratch registers, but if src
-	! and dest hit the same cache line, this will thrash, so make
-	! use of additional registers.
-	!
-	! We also need r0 as a temporary (for movca), so 'undo' the invariant:
-	!   r5:	 src (was r0+r5)
-	!   r1:	 dest (was r0)
-	! this can be reversed at the end, so we don't need to save any extra
-	! state.
-	!
-1:	mov.l	r8, @-r15	!  30 LS
-	add	r0, r5		!  49 EX
-
-	mov.l	r9, @-r15	!  30 LS
-	mov	r0, r1		!   5 MT (latency=0)
-
-	mov.l	r10, @-r15	!  30 LS
-	add	#-0x1c, r5	!  50 EX
-
-	mov.l	r11, @-r15	!  30 LS
-
-	! 16 cycles, 32 bytes per iteration
-2:	mov.l	@(0x00,r5),r0	! 18 LS (latency=2)
-	add	#-0x20, r1	! 50 EX
-	mov.l	@(0x04,r5),r3	! 18 LS (latency=2)
-	mov.l	@(0x08,r5),r6	! 18 LS (latency=2)
-	mov.l	@(0x0c,r5),r7	! 18 LS (latency=2)
-	mov.l	@(0x10,r5),r8	! 18 LS (latency=2)
-	mov.l	@(0x14,r5),r9	! 18 LS (latency=2)
-	mov.l	@(0x18,r5),r10	! 18 LS (latency=2)
-	mov.l	@(0x1c,r5),r11	! 18 LS (latency=2)
-	movca.l	r0,@r1		! 40 LS (latency=3-7)
-	mov.l	r3,@(0x04,r1)	! 33 LS
-	mov.l	r6,@(0x08,r1)	! 33 LS
-	mov.l	r7,@(0x0c,r1)	! 33 LS
-
-	mov.l	r8,@(0x10,r1)	! 33 LS
-	add	#-0x20, r5	! 50 EX
-
-	mov.l	r9,@(0x14,r1)	! 33 LS
-	cmp/eq	r2,r1		! 54 MT
-
-	mov.l	r10,@(0x18,r1)	!  33 LS
-	bf/s	2b		! 109 BR
-
-	 mov.l	r11,@(0x1c,r1)	!  33 LS
-
-	mov	r1, r0		!   5 MT (latency=0)
-
-	mov.l	@r15+, r11	!  15 LS
-	sub	r1, r5		!  75 EX
-
-	mov.l	@r15+, r10	!  15 LS
-	cmp/eq	r4, r0		!  54 MT
-
-	bf/s	1f		! 109 BR
-	 mov.l	 @r15+, r9	!  15 LS
-
-	rts
-1:	 mov.l	@r15+, r8	!  15 LS
-	sub	r4, r1		!  75 EX		(len remaining)
-
-	! number of trailing bytes is non-zero
-	!
-	! invariants restored (r5 already decremented by 4)
-	! also r1=num bytes remaining
-
-	mov	#4, r2		!   6 EX
-	mov	r4, r7		!   5 MT (latency=0)
-
-	add	#0x1c, r5	!  50 EX		(back to -4)
-	cmp/hs	r2, r1		!  58 MT
-
-	bf/s	5f		! 108 BR
-	 add	 #11, r7	!  50 EX
-
-	mov.l	@(r0, r5), r6	!  21 LS (latency=2)
-	tst	r2, r1		!  86 MT
-
-	mov	r5, r3		!   5 MT (latency=0)
-	bt/s	4f		! 111 BR
-
-	 add	#-4, r3		!  50 EX
-	cmp/hs	r2, r1		!  58 MT
-
-	bt/s	5f		! 111 BR
-	 mov.l	r6,@-r0		!  30 LS
-
-	! 4 cycles, 2 long words per iteration
-3:	mov.l	@(r0, r5), r6	!  21 LS (latency=2)
-
-4:	mov.l	@(r0, r3), r2	!  21 LS (latency=2)
-	cmp/hi	r7, r0
-
-	mov.l	r6, @-r0	!  30 LS
-	bt/s	3b		! 109 BR
-
-	 mov.l	r2, @-r0	!  30 LS
-
-	! Copy the final 0-3 bytes
-
-5:	cmp/eq	r0, r4		!  54 MT
-	add	#-10, r7	!  50 EX
-
-	bt	9f		! 110 BR
-	add	#3,r5		!  50 EX
-
-	! 3 cycles, 1 byte per iteration
-1:	mov.b	@(r0,r5),r1	!  19 LS
-	cmp/hi	r7,r0		!  57 MT
-
-	bt/s	1b		! 111 BR
-	 mov.b	r1,@-r0		!  28 LS
-
-9:	rts
-	 nop
-
-	!
-	!	GHIJ KLMN OPQR -->  ..GH IJKL MNOP QR..
-	!
-
-	.balign	32
-.Lcase2:
-	! Size is 16 or greater and less then 64, but may have trailing bytes
-
-2:	mov	r5, r6		!   5 MT (latency=0)
-	add	#-2,r5		!  50 EX
-
-	mov	r4,r2		!   5 MT (latency=0)
-	add	#-4,r6		!  50 EX
-
-	add	#7,r2		!  50 EX
-3:	mov.w	@(r0,r5),r1	!  20 LS (latency=2)
-
-	mov.w	@(r0,r6),r3	!  20 LS (latency=2)
-	cmp/hi	r2,r0		!  57 MT
-
-	mov.w	r1,@-r0		!  29 LS
-	bt/s	3b		! 111 BR
-
-	 mov.w	r3,@-r0		!  29 LS
-
-	bra	10f
-	 nop
-
-
-	.balign	32
-.Lcase2b:
-	! Size is at least 64 bytes, so will be going round the big loop at least once.
-	!
-	!   r2 = rounded up r4
-	!   r3 = rounded down r0
-
-	mov	r0, r3		!   5 MT (latency=0)
-	mov	#(~0x1f), r1	!   6 EX
-
-	and	r1, r3		!  78 EX
-	mov	r4, r2		!   5 MT (latency=0)
-
-	cmp/eq	r3, r0		!  54 MT
-	add	#0x1f, r2	!  50 EX
-
-	add	#-2, r5		!  50 EX
-	bt/s	1f		! 110 BR
-	 and	r1, r2		!  78 EX
-
-	! Copy a short word one at a time until we are cache line aligned
-	!   Normal values: r0, r2, r3, r4
-	!   Unused: r1, r6, r7
-	!   Mod: r5 (=r5-2)
-	!
-	add	#2, r3		!  50 EX
-
-2:	mov.w	@(r0,r5),r1	!  20 LS (latency=2)
-	cmp/eq	r3,r0		!  54 MT
-
-	bf/s	2b		! 111 BR
-
-	 mov.w	r1,@-r0		!  29 LS
-
-	! Copy the cache line aligned blocks
-	!
-	! In use: r0, r2, r4, r5 (=r5-2)
-	! Scratch: r1, r3, r6, r7
-	!
-	! We could do this with the four scratch registers, but if src
-	! and dest hit the same cache line, this will thrash, so make
-	! use of additional registers.
-	!
-	! We also need r0 as a temporary (for movca), so 'undo' the invariant:
-	!   r5:	 src (was r0+r5)
-	!   r1:	 dest (was r0)
-	! this can be reversed at the end, so we don't need to save any extra
-	! state.
-	!
-1:	mov.l	r8, @-r15	!  30 LS
-	add	r0, r5		!  49 EX
-
-	mov.l	r9, @-r15	!  30 LS
-	mov	r0, r1		!   5 MT (latency=0)
-
-	mov.l	r10, @-r15	!  30 LS
-	add	#-0x1e, r5	!  50 EX
-
-	mov.l	r11, @-r15	!  30 LS
-
-	mov.l	r12, @-r15	!  30 LS
-
-	! 17 cycles, 32 bytes per iteration
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-2:	mov.w	@r5+, r0	!  14 LS (latency=2)		..JI
-	add	#-0x20, r1	!  50 EX
-
-	mov.l	@r5+, r3	!  15 LS (latency=2)		NMLK
-
-	mov.l	@r5+, r6	!  15 LS (latency=2)		RQPO
-	shll16	r0		! 103 EX			JI..
-
-	mov.l	@r5+, r7	!  15 LS (latency=2)
-	xtrct	r3, r0		!  48 EX			LKJI
-
-	mov.l	@r5+, r8	!  15 LS (latency=2)
-	xtrct	r6, r3		!  48 EX			PONM
-
-	mov.l	@r5+, r9	!  15 LS (latency=2)
-	xtrct	r7, r6		!  48 EX
-
-	mov.l	@r5+, r10	!  15 LS (latency=2)
-	xtrct	r8, r7		!  48 EX
-
-	mov.l	@r5+, r11	!  15 LS (latency=2)
-	xtrct	r9, r8		!  48 EX
-
-	mov.w	@r5+, r12	!  15 LS (latency=2)
-	xtrct	r10, r9		!  48 EX
-
-	movca.l	r0,@r1		!  40 LS (latency=3-7)
-	xtrct	r11, r10	!  48 EX
-
-	mov.l	r3, @(0x04,r1)	!  33 LS
-	xtrct	r12, r11	!  48 EX
-
-	mov.l	r6, @(0x08,r1)	!  33 LS
-
-	mov.l	r7, @(0x0c,r1)	!  33 LS
-
-	mov.l	r8, @(0x10,r1)	!  33 LS
-	add	#-0x40, r5	!  50 EX
-
-	mov.l	r9, @(0x14,r1)	!  33 LS
-	cmp/eq	r2,r1		!  54 MT
-
-	mov.l	r10, @(0x18,r1)	!  33 LS
-	bf/s	2b		! 109 BR
-
-	 mov.l	r11, @(0x1c,r1)	!  33 LS
-#else
-2:	mov.w	@(0x1e,r5), r0	!  17 LS (latency=2)
-	add	#-2, r5		!  50 EX
-
-	mov.l	@(0x1c,r5), r3	!  18 LS (latency=2)
-	add	#-4, r1		!  50 EX
-
-	mov.l	@(0x18,r5), r6	!  18 LS (latency=2)
-	shll16	r0		! 103 EX
-
-	mov.l	@(0x14,r5), r7	!  18 LS (latency=2)
-	xtrct	r3, r0		!  48 EX
-
-	mov.l	@(0x10,r5), r8	!  18 LS (latency=2)
-	xtrct	r6, r3		!  48 EX
-
-	mov.l	@(0x0c,r5), r9	!  18 LS (latency=2)
-	xtrct	r7, r6		!  48 EX
-
-	mov.l	@(0x08,r5), r10	!  18 LS (latency=2)
-	xtrct	r8, r7		!  48 EX
-
-	mov.l	@(0x04,r5), r11	!  18 LS (latency=2)
-	xtrct	r9, r8		!  48 EX
-
-	mov.l   @(0x00,r5), r12 !  18 LS (latency=2)
-    	xtrct	r10, r9		!  48 EX
-
-	movca.l	r0,@r1		!  40 LS (latency=3-7)
-	add	#-0x1c, r1	!  50 EX
-
-	mov.l	r3, @(0x18,r1)	!  33 LS
-	xtrct	r11, r10	!  48 EX
-
-	mov.l	r6, @(0x14,r1)	!  33 LS
-	xtrct	r12, r11	!  48 EX
-
-	mov.l	r7, @(0x10,r1)	!  33 LS
-
-	mov.l	r8, @(0x0c,r1)	!  33 LS
-	add	#-0x1e, r5	!  50 EX
-
-	mov.l	r9, @(0x08,r1)	!  33 LS
-	cmp/eq	r2,r1		!  54 MT
-
-	mov.l	r10, @(0x04,r1)	!  33 LS
-	bf/s	2b		! 109 BR
-
-	 mov.l	r11, @(0x00,r1)	!  33 LS
-#endif
-
-	mov.l	@r15+, r12
-	mov	r1, r0		!   5 MT (latency=0)
-
-	mov.l	@r15+, r11	!  15 LS
-	sub	r1, r5		!  75 EX
-
-	mov.l	@r15+, r10	!  15 LS
-	cmp/eq	r4, r0		!  54 MT
-
-	bf/s	1f		! 109 BR
-	 mov.l	 @r15+, r9	!  15 LS
-
-	rts
-1:	 mov.l	@r15+, r8	!  15 LS
-
-	add	#0x1e, r5	!  50 EX
-
-	! Finish off a short word at a time
-	! r5 must be invariant - 2
-10:	mov	r4,r2		!   5 MT (latency=0)
-	add	#1,r2		!  50 EX
-
-	cmp/hi	r2, r0		!  57 MT
-	bf/s	1f		! 109 BR
-
-	 add	#2, r2		!  50 EX
-
-3:	mov.w	@(r0,r5),r1	!  20 LS
-	cmp/hi	r2,r0		!  57 MT
-
-	bt/s	3b		! 109 BR
-
-	 mov.w	r1,@-r0		!  29 LS
-1:
-
-	!
-	! Finally, copy the last byte if necessary
-	cmp/eq	r4,r0		!  54 MT
-	bt/s	9b
-	 add	#1,r5
-	mov.b	@(r0,r5),r1
-	rts
-	 mov.b	r1,@-r0
-
diff --git a/arch/sh/lib/memcpy.S b/arch/sh/lib/memcpy.S
deleted file mode 100644
index 08ab3062c4b27c284b3d1119515b467d524f5ba8..0000000000000000000000000000000000000000
--- a/arch/sh/lib/memcpy.S
+++ /dev/null
@@ -1,228 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* $Id: memcpy.S,v 1.3 2001/07/27 11:50:52 gniibe Exp $
- *
- * "memcpy" implementation of SuperH
- *
- * Copyright (C) 1999  Niibe Yutaka
- *
- */
-
-/*
- * void *memcpy(void *dst, const void *src, size_t n);
- * No overlap between the memory of DST and of SRC are assumed.
- */
-
-#include <linux/linkage.h>
-ENTRY(memcpy)
-	tst	r6,r6
-	bt/s	9f		! if n=0, do nothing
-	 mov	r4,r0
-	sub	r4,r5		! From here, r5 has the distance to r0
-	add	r6,r0		! From here, r0 points the end of copying point
-	mov	#12,r1
-	cmp/gt	r6,r1
-	bt/s	7f		! if it's too small, copy a byte at once
-	 add	#-1,r5
-	add	#1,r5
-	!			From here, r6 is free
-	!
-	!      r4   -->  [ ...  ] DST             [ ...  ] SRC
-	!	         [ ...  ]                 [ ...  ]
-	!	           :                        :
-	!      r0   -->  [ ...  ]       r0+r5 --> [ ...  ]
-	!
-	!
-	mov	r5,r1
-	mov	#3,r2
-	and	r2,r1
-	shll2	r1
-	mov	r0,r3		! Save the value on R0 to R3
-	mova	jmptable,r0
-	add	r1,r0
-	mov.l	@r0,r1
-	jmp	@r1
-	 mov	r3,r0		! and back to R0
-	.balign	4
-jmptable:
-	.long	case0
-	.long	case1
-	.long	case2
-	.long	case3
-
-	! copy a byte at once
-7:	mov	r4,r2
-	add	#1,r2
-8:
-	cmp/hi	r2,r0
-	mov.b	@(r0,r5),r1
-	bt/s	8b			! while (r0>r2)
-	 mov.b	r1,@-r0
-9:
-	rts
-	 nop
-
-case0:
-	!
-	!	GHIJ KLMN OPQR -->  GHIJ KLMN OPQR
-	!
-	! First, align to long word boundary
-	mov	r0,r3
-	and	r2,r3
-	tst	r3,r3
-	bt/s	2f
-	 add	#-4,r5
-	add	#3,r5
-1:	dt	r3
-	mov.b	@(r0,r5),r1
-	bf/s	1b
-	 mov.b	r1,@-r0
-	!
-	add	#-3,r5
-2:	! Second, copy a long word at once
-	mov	r4,r2
-	add	#7,r2
-3:	mov.l	@(r0,r5),r1
-	cmp/hi	r2,r0
-	bt/s	3b
-	 mov.l	r1,@-r0
-	!
-	! Third, copy a byte at once, if necessary
-	cmp/eq	r4,r0
-	bt/s	9b
-	 add	#3,r5
-	bra	8b
-	 add	#-6,r2
-
-case1:
-	!
-	!	GHIJ KLMN OPQR -->  ...G HIJK LMNO PQR.
-	!
-	! First, align to long word boundary
-	mov	r0,r3
-	and	r2,r3
-	tst	r3,r3
-	bt/s	2f
-	 add	#-1,r5
-1:	dt	r3
-	mov.b	@(r0,r5),r1
-	bf/s	1b
-	 mov.b	r1,@-r0
-	!
-2:	! Second, read a long word and write a long word at once
-	mov.l	@(r0,r5),r1
-	add	#-4,r5
-	mov	r4,r2
-	add	#7,r2
-	!
-#ifdef __LITTLE_ENDIAN__
-3:	mov	r1,r3		! RQPO
-	shll16	r3
-	shll8	r3		! Oxxx
-	mov.l	@(r0,r5),r1	! NMLK
-	mov	r1,r6
-	shlr8	r6		! xNML
-	or	r6,r3		! ONML
-	cmp/hi	r2,r0
-	bt/s	3b
-	 mov.l	r3,@-r0
-#else
-3:	mov	r1,r3		! OPQR
-	shlr16	r3
-	shlr8	r3		! xxxO
-	mov.l	@(r0,r5),r1	! KLMN
-	mov	r1,r6
-	shll8	r6		! LMNx
-	or	r6,r3		! LMNO
-	cmp/hi	r2,r0
-	bt/s	3b
-	 mov.l	r3,@-r0
-#endif
-	!
-	! Third, copy a byte at once, if necessary
-	cmp/eq	r4,r0
-	bt/s	9b
-	 add	#4,r5
-	bra	8b
-	 add	#-6,r2
-
-case2:
-	!
-	!	GHIJ KLMN OPQR -->  ..GH IJKL MNOP QR..
-	!
-	! First, align to word boundary
-	tst	#1,r0
-	bt/s	2f
-	 add	#-1,r5
-	mov.b	@(r0,r5),r1
-	mov.b	r1,@-r0
-	!
-2:	! Second, read a word and write a word at once
-	add	#-1,r5
-	mov	r4,r2
-	add	#3,r2
-	!
-3:	mov.w	@(r0,r5),r1
-	cmp/hi	r2,r0
-	bt/s	3b
-	 mov.w	r1,@-r0
-	!
-	! Third, copy a byte at once, if necessary
-	cmp/eq	r4,r0
-	bt/s	9b
-	 add	#1,r5
-	mov.b	@(r0,r5),r1
-	rts
-	 mov.b	r1,@-r0
-
-case3:
-	!
-	!	GHIJ KLMN OPQR -->  .GHI JKLM NOPQ R...
-	!
-	! First, align to long word boundary
-	mov	r0,r3
-	and	r2,r3
-	tst	r3,r3
-	bt/s	2f
-	 add	#-1,r5
-1:	dt	r3
-	mov.b	@(r0,r5),r1
-	bf/s	1b
-	 mov.b	r1,@-r0
-	!
-2:	! Second, read a long word and write a long word at once
-	add	#-2,r5
-	mov.l	@(r0,r5),r1
-	add	#-4,r5
-	mov	r4,r2
-	add	#7,r2
-	!
-#ifdef __LITTLE_ENDIAN__
-3:	mov	r1,r3		! RQPO
-	shll8	r3		! QPOx
-	mov.l	@(r0,r5),r1	! NMLK
-	mov	r1,r6
-	shlr16	r6
-	shlr8	r6		! xxxN
-	or	r6,r3		! QPON
-	cmp/hi	r2,r0
-	bt/s	3b
-	 mov.l	r3,@-r0
-#else
-3:	mov	r1,r3		! OPQR
-	shlr8	r3		! xOPQ
-	mov.l	@(r0,r5),r1	! KLMN
-	mov	r1,r6
-	shll16	r6
-	shll8	r6		! Nxxx
-	or	r6,r3		! NOPQ
-	cmp/hi	r2,r0
-	bt/s	3b
-	 mov.l	r3,@-r0
-#endif
-	!
-	! Third, copy a byte at once, if necessary
-	cmp/eq	r4,r0
-	bt/s	9b
-	 add	#6,r5
-	bra	8b
-	 add	#-6,r2
diff --git a/arch/sh/lib/memmove.S b/arch/sh/lib/memmove.S
deleted file mode 100644
index bdca321814a95aacbe19a4054e2574d1a77d6819..0000000000000000000000000000000000000000
--- a/arch/sh/lib/memmove.S
+++ /dev/null
@@ -1,255 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* $Id: memmove.S,v 1.2 2001/07/27 11:51:09 gniibe Exp $
- *
- * "memmove" implementation of SuperH
- *
- * Copyright (C) 1999  Niibe Yutaka
- *
- */
-
-/*
- * void *memmove(void *dst, const void *src, size_t n);
- * The memory areas may overlap.
- */
-
-#include <linux/linkage.h>
-ENTRY(memmove)
-	! if dest > src, call memcpy (it copies in decreasing order)
-	cmp/hi	r5,r4
-	bf	1f
-	mov.l	2f,r0
-	jmp	@r0
-	 nop
-	.balign 4
-2:	.long	memcpy
-1:
-	sub	r5,r4		! From here, r4 has the distance to r0
-	tst	r6,r6
-	bt/s	9f		! if n=0, do nothing
-	 mov	r5,r0
-	add	r6,r5
-	mov	#12,r1
-	cmp/gt	r6,r1
-	bt/s	8f		! if it's too small, copy a byte at once
-	 add	#-1,r4
-	add	#1,r4
-	!
-	!                [ ...  ] DST             [ ...  ] SRC
-	!	         [ ...  ]                 [ ...  ]
-	!	           :                        :
-	!      r0+r4-->  [ ...  ]       r0    --> [ ...  ]
-	!	           :                        :
-	!	         [ ...  ]                 [ ...  ]
-	!			        r5    -->
-	!
-	mov	r4,r1
-	mov	#3,r2
-	and	r2,r1
-	shll2	r1
-	mov	r0,r3		! Save the value on R0 to R3
-	mova	jmptable,r0
-	add	r1,r0
-	mov.l	@r0,r1
-	jmp	@r1
-	 mov	r3,r0		! and back to R0
-	.balign	4
-jmptable:
-	.long	case0
-	.long	case1
-	.long	case2
-	.long	case3
-
-	! copy a byte at once
-8:	mov.b	@r0+,r1
-	cmp/hs	r5,r0
-	bf/s	8b			! while (r0<r5)
-	 mov.b	r1,@(r0,r4)
-	add	#1,r4
-9:
-	add	r4,r0
-	rts
-	 sub	r6,r0
-
-case_none:
-	bra	8b
-	 add	#-1,r4
-
-case0:
-	!
-	!	GHIJ KLMN OPQR -->  GHIJ KLMN OPQR
-	!
-	! First, align to long word boundary
-	mov	r0,r3
-	and	r2,r3
-	tst	r3,r3
-	bt/s	2f
-	 add	#-1,r4
-	mov	#4,r2
-	sub	r3,r2
-1:	dt	r2
-	mov.b	@r0+,r1
-	bf/s	1b
-	 mov.b	r1,@(r0,r4)
-	!
-2:	! Second, copy a long word at once
-	add	#-3,r4
-	add	#-3,r5
-3:	mov.l	@r0+,r1
-	cmp/hs	r5,r0
-	bf/s	3b
-	 mov.l	r1,@(r0,r4)
-	add	#3,r5
-	!
-	! Third, copy a byte at once, if necessary
-	cmp/eq	r5,r0
-	bt/s	9b
-	 add	#4,r4
-	bra	8b
-	 add	#-1,r4
-
-case3:
-	!
-	!	GHIJ KLMN OPQR -->  ...G HIJK LMNO PQR.
-	!
-	! First, align to long word boundary
-	mov	r0,r3
-	and	r2,r3
-	tst	r3,r3
-	bt/s	2f
-	 add	#-1,r4
-	mov	#4,r2
-	sub	r3,r2
-1:	dt	r2
-	mov.b	@r0+,r1
-	bf/s	1b
-	 mov.b	r1,@(r0,r4)
-	!
-2:	! Second, read a long word and write a long word at once
-	add	#-2,r4
-	mov.l	@(r0,r4),r1
-	add	#-7,r5
-	add	#-4,r4
-	!
-#ifdef __LITTLE_ENDIAN__
-	shll8	r1
-3:	mov	r1,r3		! JIHG
-	shlr8	r3		! xJIH
-	mov.l	@r0+,r1		! NMLK
-	mov	r1,r2
-	shll16	r2
-	shll8	r2		! Kxxx
-	or	r2,r3		! KJIH
-	cmp/hs	r5,r0
-	bf/s	3b
-	 mov.l	r3,@(r0,r4)
-#else
-	shlr8	r1
-3:	mov	r1,r3		! GHIJ
-	shll8	r3		! HIJx
-	mov.l	@r0+,r1		! KLMN
-	mov	r1,r2
-	shlr16	r2
-	shlr8	r2		! xxxK
-	or	r2,r3		! HIJK
-	cmp/hs	r5,r0
-	bf/s	3b
-	 mov.l	r3,@(r0,r4)
-#endif
-	add	#7,r5
-	!
-	! Third, copy a byte at once, if necessary
-	cmp/eq	r5,r0
-	bt/s	9b
-	 add	#7,r4
-	add	#-3,r0
-	bra	8b
-	 add	#-1,r4
-
-case2:
-	!
-	!	GHIJ KLMN OPQR -->  ..GH IJKL MNOP QR..
-	!
-	! First, align to word boundary
-	tst	#1,r0
-	bt/s	2f
-	 add	#-1,r4
-	mov.b	@r0+,r1
-	mov.b	r1,@(r0,r4)
-	!
-2:	! Second, read a word and write a word at once
-	add	#-1,r4
-	add	#-1,r5
-	!
-3:	mov.w	@r0+,r1
-	cmp/hs	r5,r0
-	bf/s	3b
-	 mov.w	r1,@(r0,r4)
-	add	#1,r5
-	!
-	! Third, copy a byte at once, if necessary
-	cmp/eq	r5,r0
-	bt/s	9b
-	 add	#2,r4
-	mov.b	@r0,r1
-	mov.b	r1,@(r0,r4)
-	bra	9b
-	 add	#1,r0
-
-case1:
-	!
-	!	GHIJ KLMN OPQR -->  .GHI JKLM NOPQ R...
-	!
-	! First, align to long word boundary
-	mov	r0,r3
-	and	r2,r3
-	tst	r3,r3
-	bt/s	2f
-	 add	#-1,r4
-	mov	#4,r2
-	sub	r3,r2
-1:	dt	r2
-	mov.b	@r0+,r1
-	bf/s	1b
-	 mov.b	r1,@(r0,r4)
-	!
-2:	! Second, read a long word and write a long word at once
-	mov.l	@(r0,r4),r1
-	add	#-7,r5
-	add	#-4,r4
-	!
-#ifdef __LITTLE_ENDIAN__
-	shll16	r1
-	shll8	r1
-3:	mov	r1,r3		! JIHG
-	shlr16	r3
-	shlr8	r3		! xxxJ
-	mov.l	@r0+,r1		! NMLK
-	mov	r1,r2
-	shll8	r2		! MLKx
-	or	r2,r3		! MLKJ
-	cmp/hs	r5,r0
-	bf/s	3b
-	 mov.l	r3,@(r0,r4)
-#else
-	shlr16	r1
-	shlr8	r1
-3:	mov	r1,r3		! GHIJ
-	shll16	r3
-	shll8	r3		! Jxxx
-	mov.l	@r0+,r1		! KLMN
-	mov	r1,r2
-	shlr8	r2		! xKLM
-	or	r2,r3		! JKLM
-	cmp/hs	r5,r0
-	bf/s	3b		! while(r0<r5)
-	 mov.l	r3,@(r0,r4)
-#endif
-	add	#7,r5
-	!
-	! Third, copy a byte at once, if necessary
-	cmp/eq	r5,r0
-	bt/s	9b
-	 add	#5,r4
-	add	#-3,r0
-	bra	8b
-	 add	#-1,r4
diff --git a/arch/sh/lib/memset-sh4.S b/arch/sh/lib/memset-sh4.S
deleted file mode 100644
index 10649c4cdab8a47e6e20c4381111bad9c2a707d7..0000000000000000000000000000000000000000
--- a/arch/sh/lib/memset-sh4.S
+++ /dev/null
@@ -1,108 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * "memset" implementation for SH4
- *
- * Copyright (C) 1999  Niibe Yutaka
- * Copyright (c) 2009  STMicroelectronics Limited
- * Author: Stuart Menefy <stuart.menefy:st.com>
- */
-
-/*
- *            void *memset(void *s, int c, size_t n);
- */
-
-#include <linux/linkage.h>
-
-ENTRY(memset)
-	mov	#12,r0
-	add	r6,r4
-	cmp/gt	r6,r0
-	bt/s	40f		! if it's too small, set a byte at once
-	 mov	r4,r0
-	and	#3,r0
-	cmp/eq	#0,r0
-	bt/s	2f		! It's aligned
-	 sub	r0,r6
-1:
-	dt	r0
-	bf/s	1b
-	 mov.b	r5,@-r4
-2:				! make VVVV
-	extu.b	r5,r5
-	swap.b	r5,r0		!   V0
-	or	r0,r5		!   VV
-	swap.w	r5,r0		! VV00
-	or	r0,r5		! VVVV
-
-	! Check if enough bytes need to be copied to be worth the big loop
-	mov	#0x40, r0	! (MT)
-	cmp/gt	r6,r0		! (MT)  64 > len => slow loop
-
-	bt/s	22f
-	 mov	r6,r0
-
-	! align the dst to the cache block size if necessary
-	mov	r4, r3
-	mov	#~(0x1f), r1
-
-	and	r3, r1
-	cmp/eq	r3, r1
-
-	bt/s	11f		! dst is already aligned
-	 sub	r1, r3		! r3-r1 -> r3
-	shlr2	r3		! number of loops
-
-10:	mov.l	r5,@-r4
-	dt	r3
-	bf/s	10b
-	 add	#-4, r6
-
-11:	! dst is 32byte aligned
-	mov	r6,r2
-	mov	#-5,r0
-	shld	r0,r2		! number of loops
-
-	add	#-32, r4
-	mov	r5, r0
-12:
-	movca.l	r0,@r4
-	mov.l	r5,@(4, r4)
-	mov.l	r5,@(8, r4)
-	mov.l	r5,@(12,r4)
-	mov.l	r5,@(16,r4)
-	mov.l	r5,@(20,r4)
-	add	#-0x20, r6
-	mov.l	r5,@(24,r4)
-	dt	r2
-	mov.l	r5,@(28,r4)
-	bf/s	12b
-	 add	#-32, r4
-
-	add	#32, r4
-	mov	#8, r0
-	cmp/ge	r0, r6
-	bf	40f
-
-	mov	r6,r0
-22:
-	shlr2	r0
-	shlr	r0		! r0 = r6 >> 3
-3:
-	dt	r0
-	mov.l	r5,@-r4		! set 8-byte at once
-	bf/s	3b
-	 mov.l	r5,@-r4
-	!
-	mov	#7,r0
-	and	r0,r6
-
-	! fill bytes (length may be zero)
-40:	tst	r6,r6
-	bt	5f
-4:
-	dt	r6
-	bf/s	4b
-	 mov.b	r5,@-r4
-5:
-	rts
-	 mov	r4,r0
diff --git a/arch/sh/lib/memset.S b/arch/sh/lib/memset.S
deleted file mode 100644
index a6d5ec0bd9cf931c19ed6a960821e78efc8029d9..0000000000000000000000000000000000000000
--- a/arch/sh/lib/memset.S
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* $Id: memset.S,v 1.1 2000/04/14 16:49:01 mjd Exp $
- *
- * "memset" implementation of SuperH
- *
- * Copyright (C) 1999  Niibe Yutaka
- *
- */
-
-/*
- *            void *memset(void *s, int c, size_t n);
- */
-
-#include <linux/linkage.h>
-
-ENTRY(memset)
-	tst	r6,r6
-	bt/s	5f		! if n=0, do nothing
-	 add	r6,r4
-	mov	#12,r0
-	cmp/gt	r6,r0
-	bt/s	4f		! if it's too small, set a byte at once
-	 mov	r4,r0
-	and	#3,r0
-	cmp/eq	#0,r0
-	bt/s	2f		! It's aligned
-	 sub	r0,r6
-1:
-	dt	r0
-	bf/s	1b
-	 mov.b	r5,@-r4
-2:				! make VVVV
-	extu.b	r5,r5
-	swap.b	r5,r0		!   V0
-	or	r0,r5		!   VV
-	swap.w	r5,r0		! VV00
-	or	r0,r5		! VVVV
-	!
-	mov	r6,r0
-	shlr2	r0
-	shlr	r0		! r0 = r6 >> 3
-3:
-	dt	r0
-	mov.l	r5,@-r4		! set 8-byte at once
-	bf/s	3b
-	 mov.l	r5,@-r4
-	!
-	mov	#7,r0
-	and	r0,r6
-	tst	r6,r6
-	bt	5f
-	! fill bytes
-4:
-	dt	r6
-	bf/s	4b
-	 mov.b	r5,@-r4
-5:
-	rts
-	 mov	r4,r0
diff --git a/arch/sh/lib/movmem.S b/arch/sh/lib/movmem.S
deleted file mode 100644
index 8ac54d6b38a1b9f505370bc9f2c4b19e3cc73628..0000000000000000000000000000000000000000
--- a/arch/sh/lib/movmem.S
+++ /dev/null
@@ -1,217 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
-
-   Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2006
-   Free Software Foundation, Inc.
-*/
-
-!! libgcc routines for the Renesas / SuperH SH CPUs.
-!! Contributed by Steve Chamberlain.
-!! sac@cygnus.com
-
-!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
-!! recoded in assembly by Toshiyasu Morita
-!! tm@netcom.com
-
-/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
-   ELF local label prefixes by J"orn Rennecke
-   amylaar@cygnus.com  */
-
-	.text
-	.balign	4
-	.global	__movmem
-	.global __movstr
-	.set __movstr, __movmem	
-	/* This would be a lot simpler if r6 contained the byte count
-	   minus 64, and we wouldn't be called here for a byte count of 64.  */
-__movmem:
-	sts.l	pr,@-r15
-	shll2	r6
-	bsr	__movmemSI52+2
-	mov.l	@(48,r5),r0
-	.balign	4
-movmem_loop: /* Reached with rts */
-	mov.l	@(60,r5),r0
-	add	#-64,r6
-	mov.l	r0,@(60,r4)
-	tst	r6,r6
-	mov.l	@(56,r5),r0
-	bt	movmem_done
-	mov.l	r0,@(56,r4)
-	cmp/pl	r6
-	mov.l	@(52,r5),r0
-	add	#64,r5
-	mov.l	r0,@(52,r4)
-	add	#64,r4
-	bt	__movmemSI52
-! done all the large groups, do the remainder
-! jump to movmem+
-	mova	__movmemSI4+4,r0
-	add	r6,r0
-	jmp	@r0
-movmem_done: ! share slot insn, works out aligned.
-	lds.l	@r15+,pr
-	mov.l	r0,@(56,r4)
-	mov.l	@(52,r5),r0
-	rts
-	mov.l	r0,@(52,r4)
-	.balign	4
-
-	.global	__movmemSI64
-	.global __movstrSI64
-	.set	__movstrSI64, __movmemSI64
-__movmemSI64:
-	mov.l	@(60,r5),r0
-	mov.l	r0,@(60,r4)
-	.global	__movmemSI60
-	.global __movstrSI60
-	.set	__movstrSI60, __movmemSI60
-__movmemSI60:
-	mov.l	@(56,r5),r0
-	mov.l	r0,@(56,r4)
-	.global	__movmemSI56
-	.global __movstrSI56
-	.set	__movstrSI56, __movmemSI56
-__movmemSI56:
-	mov.l	@(52,r5),r0
-	mov.l	r0,@(52,r4)
-	.global	__movmemSI52
-	.global __movstrSI52
-	.set	__movstrSI52, __movmemSI52
-__movmemSI52:
-	mov.l	@(48,r5),r0
-	mov.l	r0,@(48,r4)
-	.global	__movmemSI48
-	.global	__movstrSI48
-	.set	__movstrSI48, __movmemSI48
-__movmemSI48:
-	mov.l	@(44,r5),r0
-	mov.l	r0,@(44,r4)
-	.global	__movmemSI44
-	.global	__movstrSI44
-	.set	__movstrSI44, __movmemSI44
-__movmemSI44:
-	mov.l	@(40,r5),r0
-	mov.l	r0,@(40,r4)
-	.global	__movmemSI40
-	.global __movstrSI40
-	.set	__movstrSI40, __movmemSI40
-__movmemSI40:
-	mov.l	@(36,r5),r0
-	mov.l	r0,@(36,r4)
-	.global	__movmemSI36
-	.global	__movstrSI36
-	.set	__movstrSI36, __movmemSI36
-__movmemSI36:
-	mov.l	@(32,r5),r0
-	mov.l	r0,@(32,r4)
-	.global	__movmemSI32
-	.global	__movstrSI32
-	.set	__movstrSI32, __movmemSI32
-__movmemSI32:
-	mov.l	@(28,r5),r0
-	mov.l	r0,@(28,r4)
-	.global	__movmemSI28
-	.global	__movstrSI28
-	.set	__movstrSI28, __movmemSI28
-__movmemSI28:
-	mov.l	@(24,r5),r0
-	mov.l	r0,@(24,r4)
-	.global	__movmemSI24
-	.global	__movstrSI24
-	.set	__movstrSI24, __movmemSI24
-__movmemSI24:
-	mov.l	@(20,r5),r0
-	mov.l	r0,@(20,r4)
-	.global	__movmemSI20
-	.global	__movstrSI20
-	.set	__movstrSI20, __movmemSI20
-__movmemSI20:
-	mov.l	@(16,r5),r0
-	mov.l	r0,@(16,r4)
-	.global	__movmemSI16
-	.global	__movstrSI16
-	.set	__movstrSI16, __movmemSI16
-__movmemSI16:
-	mov.l	@(12,r5),r0
-	mov.l	r0,@(12,r4)
-	.global	__movmemSI12
-	.global	__movstrSI12
-	.set	__movstrSI12, __movmemSI12
-__movmemSI12:
-	mov.l	@(8,r5),r0
-	mov.l	r0,@(8,r4)
-	.global	__movmemSI8
-	.global	__movstrSI8
-	.set	__movstrSI8, __movmemSI8
-__movmemSI8:
-	mov.l	@(4,r5),r0
-	mov.l	r0,@(4,r4)
-	.global	__movmemSI4
-	.global	__movstrSI4
-	.set	__movstrSI4, __movmemSI4
-__movmemSI4:
-	mov.l	@(0,r5),r0
-	rts
-	mov.l	r0,@(0,r4)
-
-	.global	__movmem_i4_even
-	.global	__movstr_i4_even
-	.set	__movstr_i4_even, __movmem_i4_even
-
-	.global	__movmem_i4_odd
-	.global	__movstr_i4_odd
-	.set	__movstr_i4_odd, __movmem_i4_odd
-
-	.global	__movmemSI12_i4
-	.global	__movstrSI12_i4
-	.set	__movstrSI12_i4, __movmemSI12_i4
-
-	.p2align	5
-L_movmem_2mod4_end:
-	mov.l	r0,@(16,r4)
-	rts
-	mov.l	r1,@(20,r4)
-
-	.p2align	2
-
-__movmem_i4_even:
-	mov.l	@r5+,r0
-	bra	L_movmem_start_even
-	mov.l	@r5+,r1
-
-__movmem_i4_odd:
-	mov.l	@r5+,r1
-	add	#-4,r4
-	mov.l	@r5+,r2
-	mov.l	@r5+,r3
-	mov.l	r1,@(4,r4)
-	mov.l	r2,@(8,r4)
-
-L_movmem_loop:
-	mov.l	r3,@(12,r4)
-	dt	r6
-	mov.l	@r5+,r0
-	bt/s	L_movmem_2mod4_end
-	mov.l	@r5+,r1
-	add	#16,r4
-L_movmem_start_even:
-	mov.l	@r5+,r2
-	mov.l	@r5+,r3
-	mov.l	r0,@r4
-	dt	r6
-	mov.l	r1,@(4,r4)
-	bf/s	L_movmem_loop
-	mov.l	r2,@(8,r4)
-	rts
-	mov.l	r3,@(12,r4)
-
-	.p2align	4
-__movmemSI12_i4:
-	mov.l	@r5,r0
-	mov.l	@(4,r5),r1
-	mov.l	@(8,r5),r2
-	mov.l	r0,@r4
-	mov.l	r1,@(4,r4)
-	rts
-	mov.l	r2,@(8,r4)
diff --git a/arch/sh/lib/strlen.S b/arch/sh/lib/strlen.S
deleted file mode 100644
index 80ea53dd34f6dd2298b95d0610b83ffc202a2958..0000000000000000000000000000000000000000
--- a/arch/sh/lib/strlen.S
+++ /dev/null
@@ -1,71 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* $Id: strlen.S,v 1.2 2001/06/29 14:07:15 gniibe Exp $
- *
- * "strlen" implementation of SuperH
- *
- * Copyright (C) 1999  Kaz Kojima
- *
- */
-
-/* size_t strlen (const char *s)  */
-
-#include <linux/linkage.h>
-ENTRY(strlen)
-	mov	r4,r0
-	and	#3,r0
-	tst	r0,r0
-	bt/s	1f
-	 mov	#0,r2
-
-	add	#-1,r0
-	shll2	r0
-	shll	r0
-	braf	r0
-	 nop
-
-	mov.b	@r4+,r1
-	tst	r1,r1
-	bt	8f
-	add	#1,r2
-
-	mov.b	@r4+,r1
-	tst	r1,r1
-	bt	8f
-	add	#1,r2
-
-	mov.b	@r4+,r1
-	tst	r1,r1
-	bt	8f
-	add	#1,r2
-
-1:
-	mov	#0,r3
-2:
-	mov.l	@r4+,r1
-	cmp/str	r3,r1
-	bf/s	2b
-	 add	#4,r2
-
-	add	#-4,r2
-#ifndef __LITTLE_ENDIAN__
-	swap.b	r1,r1
-	swap.w	r1,r1
-	swap.b	r1,r1
-#endif
-	extu.b	r1,r0
-	tst	r0,r0
-	bt/s	8f
-	 shlr8	r1
-	add	#1,r2
-	extu.b	r1,r0
-	tst	r0,r0
-	bt/s	8f
-	 shlr8	r1
-	add	#1,r2
-	extu.b	r1,r0
-	tst	r0,r0
-	bt	8f
-	add	#1,r2
-8:
-	rts
-	 mov	r2,r0
diff --git a/arch/sh/lib/udiv_qrnnd.S b/arch/sh/lib/udiv_qrnnd.S
deleted file mode 100644
index 28938daccd6b444c476c30f60b56adb995fef09c..0000000000000000000000000000000000000000
--- a/arch/sh/lib/udiv_qrnnd.S
+++ /dev/null
@@ -1,60 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
-
-   Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2006
-   Free Software Foundation, Inc.
-*/
-
-!! libgcc routines for the Renesas / SuperH SH CPUs.
-!! Contributed by Steve Chamberlain.
-!! sac@cygnus.com
-
-!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
-!! recoded in assembly by Toshiyasu Morita
-!! tm@netcom.com
-
-/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
-   ELF local label prefixes by J"orn Rennecke
-   amylaar@cygnus.com  */
-
-	/* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
-	/* n1 < d, but n1 might be larger than d1.  */
-	.global __udiv_qrnnd_16
-	.balign 8
-__udiv_qrnnd_16:
-	div0u
-	cmp/hi r6,r0
-	bt .Lots
-	.rept 16
-	div1 r6,r0 
-	.endr
-	extu.w r0,r1
-	bt 0f
-	add r6,r0
-0:	rotcl r1
-	mulu.w r1,r5
-	xtrct r4,r0
-	swap.w r0,r0
-	sts macl,r2
-	cmp/hs r2,r0
-	sub r2,r0
-	bt 0f
-	addc r5,r0
-	add #-1,r1
-	bt 0f
-1:	add #-1,r1
-	rts
-	add r5,r0
-	.balign 8
-.Lots:
-	sub r5,r0
-	swap.w r4,r1
-	xtrct r0,r1
-	clrt
-	mov r1,r0
-	addc r5,r0
-	mov #-1,r1
-	bf/s 1b
-	 shlr16 r1
-0:	rts
-	 nop
diff --git a/arch/sh/lib/udivsi3.S b/arch/sh/lib/udivsi3.S
deleted file mode 100644
index 09ed1f9deb2ea79e49d7bdae84bfcda2265ffa23..0000000000000000000000000000000000000000
--- a/arch/sh/lib/udivsi3.S
+++ /dev/null
@@ -1,66 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
-
-   Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005
-   Free Software Foundation, Inc.
-*/
-
-!! libgcc routines for the Renesas / SuperH SH CPUs.
-!! Contributed by Steve Chamberlain.
-!! sac@cygnus.com
-
-	.balign 4
-	.global	__udivsi3
-	.type	__udivsi3, @function
-div8:
-	div1 r5,r4
-div7:
-	div1 r5,r4; div1 r5,r4; div1 r5,r4
-	div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
-
-divx4:
-	div1 r5,r4; rotcl r0
-	div1 r5,r4; rotcl r0
-	div1 r5,r4; rotcl r0
-	rts; div1 r5,r4
-
-__udivsi3:
-	sts.l pr,@-r15
-	extu.w r5,r0
-	cmp/eq r5,r0
-	bf/s large_divisor
-	div0u
-	swap.w r4,r0
-	shlr16 r4
-	bsr div8
-	shll16 r5
-	bsr div7
-	div1 r5,r4
-	xtrct r4,r0
-	xtrct r0,r4
-	bsr div8
-	swap.w r4,r4
-	bsr div7
-	div1 r5,r4
-	lds.l @r15+,pr
-	xtrct r4,r0
-	swap.w r0,r0
-	rotcl r0
-	rts
-	shlr16 r5
-
-large_divisor:
-	mov #0,r0
-	xtrct r4,r0
-	xtrct r0,r4
-	bsr divx4
-	rotcl r0
-	bsr divx4
-	rotcl r0
-	bsr divx4
-	rotcl r0
-	bsr divx4
-	rotcl r0
-	lds.l @r15+,pr
-	rts
-	rotcl r0
diff --git a/arch/sh/lib/udivsi3_i4i-Os.S b/arch/sh/lib/udivsi3_i4i-Os.S
deleted file mode 100644
index fa4e4dff3da1420f94181d78bd3c47519d3a7dbe..0000000000000000000000000000000000000000
--- a/arch/sh/lib/udivsi3_i4i-Os.S
+++ /dev/null
@@ -1,128 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
- *
- * Copyright (C) 2006 Free Software Foundation, Inc.
- */
-
-/* Moderately Space-optimized libgcc routines for the Renesas SH /
-   STMicroelectronics ST40 CPUs.
-   Contributed by J"orn Rennecke joern.rennecke@st.com.  */
-
-/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
-   sh4-200 run times:
-   udiv small divisor: 55 cycles
-   udiv large divisor: 52 cycles
-   sdiv small divisor, positive result: 59 cycles
-   sdiv large divisor, positive result: 56 cycles
-   sdiv small divisor, negative result: 65 cycles (*)
-   sdiv large divisor, negative result: 62 cycles (*)
-   (*): r2 is restored in the rts delay slot and has a lingering latency
-        of two more cycles.  */
-	.balign 4
-	.global	__udivsi3_i4i
-	.global	__udivsi3_i4
-	.set	__udivsi3_i4, __udivsi3_i4i
-	.type	__udivsi3_i4i, @function
-	.type	__sdivsi3_i4i, @function
-__udivsi3_i4i:
-	sts pr,r1
-	mov.l r4,@-r15
-	extu.w r5,r0
-	cmp/eq r5,r0
-	swap.w r4,r0
-	shlr16 r4
-	bf/s large_divisor
-	div0u
-	mov.l r5,@-r15
-	shll16 r5
-sdiv_small_divisor:
-	div1 r5,r4
-	bsr div6
-	div1 r5,r4
-	div1 r5,r4
-	bsr div6
-	div1 r5,r4
-	xtrct r4,r0
-	xtrct r0,r4
-	bsr div7
-	swap.w r4,r4
-	div1 r5,r4
-	bsr div7
-	div1 r5,r4
-	xtrct r4,r0
-	mov.l @r15+,r5
-	swap.w r0,r0
-	mov.l @r15+,r4
-	jmp @r1
-	rotcl r0
-div7:
-	div1 r5,r4
-div6:
-	            div1 r5,r4; div1 r5,r4; div1 r5,r4
-	div1 r5,r4; div1 r5,r4; rts;        div1 r5,r4
-
-divx3:
-	rotcl r0
-	div1 r5,r4
-	rotcl r0
-	div1 r5,r4
-	rotcl r0
-	rts
-	div1 r5,r4
-
-large_divisor:
-	mov.l r5,@-r15
-sdiv_large_divisor:
-	xor r4,r0
-	.rept 4
-	rotcl r0
-	bsr divx3
-	div1 r5,r4
-	.endr
-	mov.l @r15+,r5
-	mov.l @r15+,r4
-	jmp @r1
-	rotcl r0
-
-	.global	__sdivsi3_i4i
-	.global __sdivsi3_i4
-	.global __sdivsi3
-	.set	__sdivsi3_i4, __sdivsi3_i4i
-	.set	__sdivsi3, __sdivsi3_i4i
-__sdivsi3_i4i:
-	mov.l r4,@-r15
-	cmp/pz r5
-	mov.l r5,@-r15
-	bt/s pos_divisor
-	cmp/pz r4
-	neg r5,r5
-	extu.w r5,r0
-	bt/s neg_result
-	cmp/eq r5,r0
-	neg r4,r4
-pos_result:
-	swap.w r4,r0
-	bra sdiv_check_divisor
-	sts pr,r1
-pos_divisor:
-	extu.w r5,r0
-	bt/s pos_result
-	cmp/eq r5,r0
-	neg r4,r4
-neg_result:
-	mova negate_result,r0
-	;
-	mov r0,r1
-	swap.w r4,r0
-	lds r2,macl
-	sts pr,r2
-sdiv_check_divisor:
-	shlr16 r4
-	bf/s sdiv_large_divisor
-	div0u
-	bra sdiv_small_divisor
-	shll16 r5
-	.balign 4
-negate_result:
-	neg r0,r0
-	jmp @r2
-	sts macl,r2
diff --git a/arch/sh/lib/udivsi3_i4i.S b/arch/sh/lib/udivsi3_i4i.S
deleted file mode 100644
index 6944eb6b4a75ca0158cfb3e8942d5dc51b9509ec..0000000000000000000000000000000000000000
--- a/arch/sh/lib/udivsi3_i4i.S
+++ /dev/null
@@ -1,645 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
-
-   Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2006
-   Free Software Foundation, Inc.
-*/
-
-!! libgcc routines for the Renesas / SuperH SH CPUs.
-!! Contributed by Steve Chamberlain.
-!! sac@cygnus.com
-
-!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
-!! recoded in assembly by Toshiyasu Morita
-!! tm@netcom.com
-
-/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
-   ELF local label prefixes by J"orn Rennecke
-   amylaar@cygnus.com  */
-
-/* This code used shld, thus is not suitable for SH1 / SH2.  */
-
-/* Signed / unsigned division without use of FPU, optimized for SH4.
-   Uses a lookup table for divisors in the range -128 .. +128, and
-   div1 with case distinction for larger divisors in three more ranges.
-   The code is lumped together with the table to allow the use of mova.  */
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-#define L_LSB 0
-#define L_LSWMSB 1
-#define L_MSWLSB 2
-#else
-#define L_LSB 3
-#define L_LSWMSB 2
-#define L_MSWLSB 1
-#endif
-
-	.balign 4
-	.global	__udivsi3_i4i
-	.global	__udivsi3_i4
-	.set	__udivsi3_i4, __udivsi3_i4i
-	.type	__udivsi3_i4i, @function
-__udivsi3_i4i:
-	mov.w c128_w, r1
-	div0u
-	mov r4,r0
-	shlr8 r0
-	cmp/hi r1,r5
-	extu.w r5,r1
-	bf udiv_le128
-	cmp/eq r5,r1
-	bf udiv_ge64k
-	shlr r0
-	mov r5,r1
-	shll16 r5
-	mov.l r4,@-r15
-	div1 r5,r0
-	mov.l r1,@-r15
-	div1 r5,r0
-	div1 r5,r0
-	bra udiv_25
-	div1 r5,r0
-
-div_le128:
-	mova div_table_ix,r0
-	bra div_le128_2
-	mov.b @(r0,r5),r1
-udiv_le128:
-	mov.l r4,@-r15
-	mova div_table_ix,r0
-	mov.b @(r0,r5),r1
-	mov.l r5,@-r15
-div_le128_2:
-	mova div_table_inv,r0
-	mov.l @(r0,r1),r1
-	mov r5,r0
-	tst #0xfe,r0
-	mova div_table_clz,r0
-	dmulu.l r1,r4
-	mov.b @(r0,r5),r1
-	bt/s div_by_1
-	mov r4,r0
-	mov.l @r15+,r5
-	sts mach,r0
-	/* clrt */
-	addc r4,r0
-	mov.l @r15+,r4
-	rotcr r0
-	rts
-	shld r1,r0
-
-div_by_1_neg:
-	neg r4,r0
-div_by_1:
-	mov.l @r15+,r5
-	rts
-	mov.l @r15+,r4
-
-div_ge64k:
-	bt/s div_r8
-	div0u
-	shll8 r5
-	bra div_ge64k_2
-	div1 r5,r0
-udiv_ge64k:
-	cmp/hi r0,r5
-	mov r5,r1
-	bt udiv_r8
-	shll8 r5
-	mov.l r4,@-r15
-	div1 r5,r0
-	mov.l r1,@-r15
-div_ge64k_2:
-	div1 r5,r0
-	mov.l zero_l,r1
-	.rept 4
-	div1 r5,r0
-	.endr
-	mov.l r1,@-r15
-	div1 r5,r0
-	mov.w m256_w,r1
-	div1 r5,r0
-	mov.b r0,@(L_LSWMSB,r15)
-	xor r4,r0
-	and r1,r0
-	bra div_ge64k_end
-	xor r4,r0
-	
-div_r8:
-	shll16 r4
-	bra div_r8_2
-	shll8 r4
-udiv_r8:
-	mov.l r4,@-r15
-	shll16 r4
-	clrt
-	shll8 r4
-	mov.l r5,@-r15
-div_r8_2:
-	rotcl r4
-	mov r0,r1
-	div1 r5,r1
-	mov r4,r0
-	rotcl r0
-	mov r5,r4
-	div1 r5,r1
-	.rept 5
-	rotcl r0; div1 r5,r1
-	.endr
-	rotcl r0
-	mov.l @r15+,r5
-	div1 r4,r1
-	mov.l @r15+,r4
-	rts
-	rotcl r0
-
-	.global	__sdivsi3_i4i
-	.global __sdivsi3_i4
-	.global	__sdivsi3
-	.set	__sdivsi3_i4, __sdivsi3_i4i
-	.set	__sdivsi3, __sdivsi3_i4i
-	.type	__sdivsi3_i4i, @function
-	/* This is link-compatible with a __sdivsi3 call,
-	   but we effectively clobber only r1.  */
-__sdivsi3_i4i:
-	mov.l r4,@-r15
-	cmp/pz r5
-	mov.w c128_w, r1
-	bt/s pos_divisor
-	cmp/pz r4
-	mov.l r5,@-r15
-	neg r5,r5
-	bt/s neg_result
-	cmp/hi r1,r5
-	neg r4,r4
-pos_result:
-	extu.w r5,r0
-	bf div_le128
-	cmp/eq r5,r0
-	mov r4,r0
-	shlr8 r0
-	bf/s div_ge64k
-	cmp/hi r0,r5
-	div0u
-	shll16 r5
-	div1 r5,r0
-	div1 r5,r0
-	div1 r5,r0
-udiv_25:
-	mov.l zero_l,r1
-	div1 r5,r0
-	div1 r5,r0
-	mov.l r1,@-r15
-	.rept 3
-	div1 r5,r0
-	.endr
-	mov.b r0,@(L_MSWLSB,r15)
-	xtrct r4,r0
-	swap.w r0,r0
-	.rept 8
-	div1 r5,r0
-	.endr
-	mov.b r0,@(L_LSWMSB,r15)
-div_ge64k_end:
-	.rept 8
-	div1 r5,r0
-	.endr
-	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
-	extu.b r0,r0
-	mov.l @r15+,r5
-	or r4,r0
-	mov.l @r15+,r4
-	rts
-	rotcl r0
-
-div_le128_neg:
-	tst #0xfe,r0
-	mova div_table_ix,r0
-	mov.b @(r0,r5),r1
-	mova div_table_inv,r0
-	bt/s div_by_1_neg
-	mov.l @(r0,r1),r1
-	mova div_table_clz,r0
-	dmulu.l r1,r4
-	mov.b @(r0,r5),r1
-	mov.l @r15+,r5
-	sts mach,r0
-	/* clrt */
-	addc r4,r0
-	mov.l @r15+,r4
-	rotcr r0
-	shld r1,r0
-	rts
-	neg r0,r0
-
-pos_divisor:
-	mov.l r5,@-r15
-	bt/s pos_result
-	cmp/hi r1,r5
-	neg r4,r4
-neg_result:
-	extu.w r5,r0
-	bf div_le128_neg
-	cmp/eq r5,r0
-	mov r4,r0
-	shlr8 r0
-	bf/s div_ge64k_neg
-	cmp/hi r0,r5
-	div0u
-	mov.l zero_l,r1
-	shll16 r5
-	div1 r5,r0
-	mov.l r1,@-r15
-	.rept 7
-	div1 r5,r0
-	.endr
-	mov.b r0,@(L_MSWLSB,r15)
-	xtrct r4,r0
-	swap.w r0,r0
-	.rept 8
-	div1 r5,r0
-	.endr
-	mov.b r0,@(L_LSWMSB,r15)
-div_ge64k_neg_end:
-	.rept 8
-	div1 r5,r0
-	.endr
-	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
-	extu.b r0,r1
-	mov.l @r15+,r5
-	or r4,r1
-div_r8_neg_end:
-	mov.l @r15+,r4
-	rotcl r1
-	rts
-	neg r1,r0
-
-div_ge64k_neg:
-	bt/s div_r8_neg
-	div0u
-	shll8 r5
-	mov.l zero_l,r1
-	.rept 6
-	div1 r5,r0
-	.endr
-	mov.l r1,@-r15
-	div1 r5,r0
-	mov.w m256_w,r1
-	div1 r5,r0
-	mov.b r0,@(L_LSWMSB,r15)
-	xor r4,r0
-	and r1,r0
-	bra div_ge64k_neg_end
-	xor r4,r0
-
-c128_w:
-	.word 128
-
-div_r8_neg:
-	clrt
-	shll16 r4
-	mov r4,r1
-	shll8 r1
-	mov r5,r4
-	.rept 7
-	rotcl r1; div1 r5,r0
-	.endr
-	mov.l @r15+,r5
-	rotcl r1
-	bra div_r8_neg_end
-	div1 r4,r0
-
-m256_w:
-	.word 0xff00
-/* This table has been generated by divtab-sh4.c.  */
-	.balign 4
-div_table_clz:
-	.byte	0
-	.byte	1
-	.byte	0
-	.byte	-1
-	.byte	-1
-	.byte	-2
-	.byte	-2
-	.byte	-2
-	.byte	-2
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-3
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-4
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-5
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-	.byte	-6
-/* Lookup table translating positive divisor to index into table of
-   normalized inverse.  N.B. the '0' entry is also the last entry of the
- previous table, and causes an unaligned access for division by zero.  */
-div_table_ix:
-	.byte	-6
-	.byte	-128
-	.byte	-128
-	.byte	0
-	.byte	-128
-	.byte	-64
-	.byte	0
-	.byte	64
-	.byte	-128
-	.byte	-96
-	.byte	-64
-	.byte	-32
-	.byte	0
-	.byte	32
-	.byte	64
-	.byte	96
-	.byte	-128
-	.byte	-112
-	.byte	-96
-	.byte	-80
-	.byte	-64
-	.byte	-48
-	.byte	-32
-	.byte	-16
-	.byte	0
-	.byte	16
-	.byte	32
-	.byte	48
-	.byte	64
-	.byte	80
-	.byte	96
-	.byte	112
-	.byte	-128
-	.byte	-120
-	.byte	-112
-	.byte	-104
-	.byte	-96
-	.byte	-88
-	.byte	-80
-	.byte	-72
-	.byte	-64
-	.byte	-56
-	.byte	-48
-	.byte	-40
-	.byte	-32
-	.byte	-24
-	.byte	-16
-	.byte	-8
-	.byte	0
-	.byte	8
-	.byte	16
-	.byte	24
-	.byte	32
-	.byte	40
-	.byte	48
-	.byte	56
-	.byte	64
-	.byte	72
-	.byte	80
-	.byte	88
-	.byte	96
-	.byte	104
-	.byte	112
-	.byte	120
-	.byte	-128
-	.byte	-124
-	.byte	-120
-	.byte	-116
-	.byte	-112
-	.byte	-108
-	.byte	-104
-	.byte	-100
-	.byte	-96
-	.byte	-92
-	.byte	-88
-	.byte	-84
-	.byte	-80
-	.byte	-76
-	.byte	-72
-	.byte	-68
-	.byte	-64
-	.byte	-60
-	.byte	-56
-	.byte	-52
-	.byte	-48
-	.byte	-44
-	.byte	-40
-	.byte	-36
-	.byte	-32
-	.byte	-28
-	.byte	-24
-	.byte	-20
-	.byte	-16
-	.byte	-12
-	.byte	-8
-	.byte	-4
-	.byte	0
-	.byte	4
-	.byte	8
-	.byte	12
-	.byte	16
-	.byte	20
-	.byte	24
-	.byte	28
-	.byte	32
-	.byte	36
-	.byte	40
-	.byte	44
-	.byte	48
-	.byte	52
-	.byte	56
-	.byte	60
-	.byte	64
-	.byte	68
-	.byte	72
-	.byte	76
-	.byte	80
-	.byte	84
-	.byte	88
-	.byte	92
-	.byte	96
-	.byte	100
-	.byte	104
-	.byte	108
-	.byte	112
-	.byte	116
-	.byte	120
-	.byte	124
-	.byte	-128
-/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */
-	.balign 4
-zero_l:
-	.long	0x0
-	.long	0xF81F81F9
-	.long	0xF07C1F08
-	.long	0xE9131AC0
-	.long	0xE1E1E1E2
-	.long	0xDAE6076C
-	.long	0xD41D41D5
-	.long	0xCD856891
-	.long	0xC71C71C8
-	.long	0xC0E07039
-	.long	0xBACF914D
-	.long	0xB4E81B4F
-	.long	0xAF286BCB
-	.long	0xA98EF607
-	.long	0xA41A41A5
-	.long	0x9EC8E952
-	.long	0x9999999A
-	.long	0x948B0FCE
-	.long	0x8F9C18FA
-	.long	0x8ACB90F7
-	.long	0x86186187
-	.long	0x81818182
-	.long	0x7D05F418
-	.long	0x78A4C818
-	.long	0x745D1746
-	.long	0x702E05C1
-	.long	0x6C16C16D
-	.long	0x68168169
-	.long	0x642C8591
-	.long	0x60581606
-	.long	0x5C9882BA
-	.long	0x58ED2309
-div_table_inv:
-	.long	0x55555556
-	.long	0x51D07EAF
-	.long	0x4E5E0A73
-	.long	0x4AFD6A06
-	.long	0x47AE147B
-	.long	0x446F8657
-	.long	0x41414142
-	.long	0x3E22CBCF
-	.long	0x3B13B13C
-	.long	0x38138139
-	.long	0x3521CFB3
-	.long	0x323E34A3
-	.long	0x2F684BDB
-	.long	0x2C9FB4D9
-	.long	0x29E4129F
-	.long	0x27350B89
-	.long	0x24924925
-	.long	0x21FB7813
-	.long	0x1F7047DD
-	.long	0x1CF06ADB
-	.long	0x1A7B9612
-	.long	0x18118119
-	.long	0x15B1E5F8
-	.long	0x135C8114
-	.long	0x11111112
-	.long	0xECF56BF
-	.long	0xC9714FC
-	.long	0xA6810A7
-	.long	0x8421085
-	.long	0x624DD30
-	.long	0x4104105
-	.long	0x2040811
-	/* maximum error: 0.987342 scaled: 0.921875*/
diff --git a/arch/sh/lib64/copy_page.S b/arch/sh/lib64/copy_page.S
deleted file mode 100644
index 0ec6fca63b563c70cb2861a039e8e26878a524d1..0000000000000000000000000000000000000000
--- a/arch/sh/lib64/copy_page.S
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
-   Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
-
-   This file is subject to the terms and conditions of the GNU General Public
-   License.  See the file "COPYING" in the main directory of this archive
-   for more details.
-
-   Tight version of mempy for the case of just copying a page.
-   Prefetch strategy empirically optimised against RTL simulations
-   of SH5-101 cut2 eval chip with Cayman board DDR memory.
-
-   Parameters:
-   r2 : destination effective address (start of page)
-   r3 : source effective address (start of page)
-
-   Always copies 4096 bytes.
-
-   Points to review.
-   * Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead.
-     It seems like the prefetch needs to be at at least 4 lines ahead to get
-     the data into the cache in time, and the allocos contend with outstanding
-     prefetches for the same cache set, so it's better to have the numbers
-     different.
-   */
-
-	.section .text..SHmedia32,"ax"
-	.little
-
-	.balign 8
-	.global copy_page
-copy_page:
-
-	/* Copy 4096 bytes worth of data from r3 to r2.
-	   Do prefetches 4 lines ahead.
-	   Do alloco 2 lines ahead */
-
-	pta 1f, tr1
-	pta 2f, tr2
-	pta 3f, tr3
-	ptabs r18, tr0
-
-#if 0
-	/* TAKum03020 */
-	ld.q r3, 0x00, r63
-	ld.q r3, 0x20, r63
-	ld.q r3, 0x40, r63
-	ld.q r3, 0x60, r63
-#endif
-	alloco r2, 0x00
-	synco		! TAKum03020
-	alloco r2, 0x20
-	synco		! TAKum03020
-
-	movi 3968, r6
-	add  r2, r6, r6
-	addi r6, 64, r7
-	addi r7, 64, r8
-	sub r3, r2, r60
-	addi r60, 8, r61
-	addi r61, 8, r62
-	addi r62, 8, r23
-	addi r60, 0x80, r22
-
-/* Minimal code size.  The extra branches inside the loop don't cost much
-   because they overlap with the time spent waiting for prefetches to
-   complete. */
-1:
-#if 0
-	/* TAKum03020 */
-	bge/u r2, r6, tr2  ! skip prefetch for last 4 lines
-	ldx.q r2, r22, r63 ! prefetch 4 lines hence
-#endif
-2:
-	bge/u r2, r7, tr3  ! skip alloco for last 2 lines
-	alloco r2, 0x40    ! alloc destination line 2 lines ahead
-	synco		! TAKum03020
-3:
-	ldx.q r2, r60, r36
-	ldx.q r2, r61, r37
-	ldx.q r2, r62, r38
-	ldx.q r2, r23, r39
-	st.q  r2,   0, r36
-	st.q  r2,   8, r37
-	st.q  r2,  16, r38
-	st.q  r2,  24, r39
-	addi r2, 32, r2
-	bgt/l r8, r2, tr1
-
-	blink tr0, r63	   ! return
diff --git a/arch/sh/lib64/copy_user_memcpy.S b/arch/sh/lib64/copy_user_memcpy.S
deleted file mode 100644
index 515f81b002021cb1f7a69fc6b544dbc7015e0e50..0000000000000000000000000000000000000000
--- a/arch/sh/lib64/copy_user_memcpy.S
+++ /dev/null
@@ -1,218 +0,0 @@
-! SPDX-License-Identifier: GPL-2.0
-!
-! Fast SH memcpy
-!
-! by Toshiyasu Morita (tm@netcom.com)
-! hacked by J"orn Rernnecke (joern.rennecke@superh.com) ("o for o-umlaut)
-! SH5 code Copyright 2002 SuperH Ltd.
-!
-! Entry: ARG0: destination pointer
-!        ARG1: source pointer
-!        ARG2: byte count
-!
-! Exit:  RESULT: destination pointer
-!        any other registers in the range r0-r7: trashed
-!
-! Notes: Usually one wants to do small reads and write a longword, but
-!        unfortunately it is difficult in some cases to concatanate bytes
-!        into a longword on the SH, so this does a longword read and small
-!        writes.
-!
-! This implementation makes two assumptions about how it is called:
-!
-! 1.: If the byte count is nonzero, the address of the last byte to be
-!     copied is unsigned greater than the address of the first byte to
-!     be copied.  This could be easily swapped for a signed comparison,
-!     but the algorithm used needs some comparison.
-!
-! 2.: When there are two or three bytes in the last word of an 11-or-more
-!     bytes memory chunk to b copied, the rest of the word can be read
-!     without side effects.
-!     This could be easily changed by increasing the minimum size of
-!     a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2,
-!     however, this would cost a few extra cyles on average.
-!     For SHmedia, the assumption is that any quadword can be read in its
-!     enirety if at least one byte is included in the copy.
-
-/* Imported into Linux kernel by Richard Curnow.  This is used to implement the
-   __copy_user function in the general case, so it has to be a distinct
-   function from intra-kernel memcpy to allow for exception fix-ups in the
-   event that the user pointer is bad somewhere in the copy (e.g. due to
-   running off the end of the vma).
-
-   Note, this algorithm will be slightly wasteful in the case where the source
-   and destination pointers are equally aligned, because the stlo/sthi pairs
-   could then be merged back into single stores.  If there are a lot of cache
-   misses, this is probably offset by the stall lengths on the preloads.
-
-*/
-
-/* NOTE : Prefetches removed and allocos guarded by synco to avoid TAKum03020
- * erratum.  The first two prefetches are nop-ed out to avoid upsetting the
- * instruction counts used in the jump address calculation.
- * */
-
-	.section .text..SHmedia32,"ax"
-	.little
-	.balign 32
-	.global copy_user_memcpy
-	.global copy_user_memcpy_end
-copy_user_memcpy:
-
-#define LDUAQ(P,O,D0,D1) ldlo.q P,O,D0; ldhi.q P,O+7,D1
-#define STUAQ(P,O,D0,D1) stlo.q P,O,D0; sthi.q P,O+7,D1
-#define LDUAL(P,O,D0,D1) ldlo.l P,O,D0; ldhi.l P,O+3,D1
-#define STUAL(P,O,D0,D1) stlo.l P,O,D0; sthi.l P,O+3,D1
-
-	nop ! ld.b r3,0,r63 ! TAKum03020
-	pta/l Large,tr0
-	movi 25,r0
-	bgeu/u r4,r0,tr0
-	nsb r4,r0
-	shlli r0,5,r0
-	movi (L1-L0+63*32 + 1) & 0xffff,r1
-	sub r1, r0, r0
-L0:	ptrel r0,tr0
-	add r2,r4,r5
-	ptabs r18,tr1
-	add r3,r4,r6
-	blink tr0,r63
-
-/* Rearranged to make cut2 safe */
-	.balign 8
-L4_7:	/* 4..7 byte memcpy cntd. */
-	stlo.l r2, 0, r0
-	or r6, r7, r6
-	sthi.l r5, -1, r6
-	stlo.l r5, -4, r6
-	blink tr1,r63
-
-	.balign 8
-L1:	/* 0 byte memcpy */
-	nop
-	blink tr1,r63
-	nop
-	nop
-	nop
-	nop
-
-L2_3:	/* 2 or 3 byte memcpy cntd. */
-	st.b r5,-1,r6
-	blink tr1,r63
-
-	/* 1 byte memcpy */
-	ld.b r3,0,r0
-	st.b r2,0,r0
-	blink tr1,r63
-
-L8_15:	/* 8..15 byte memcpy cntd. */
-	stlo.q r2, 0, r0
-	or r6, r7, r6
-	sthi.q r5, -1, r6
-	stlo.q r5, -8, r6
-	blink tr1,r63
-
-	/* 2 or 3 byte memcpy */
-	ld.b r3,0,r0
-	nop ! ld.b r2,0,r63 ! TAKum03020
-	ld.b r3,1,r1
-	st.b r2,0,r0
-	pta/l L2_3,tr0
-	ld.b r6,-1,r6
-	st.b r2,1,r1
-	blink tr0, r63
-
-	/* 4 .. 7 byte memcpy */
-	LDUAL (r3, 0, r0, r1)
-	pta L4_7, tr0
-	ldlo.l r6, -4, r7
-	or r0, r1, r0
-	sthi.l r2, 3, r0
-	ldhi.l r6, -1, r6
-	blink tr0, r63
-
-	/* 8 .. 15 byte memcpy */
-	LDUAQ (r3, 0, r0, r1)
-	pta L8_15, tr0
-	ldlo.q r6, -8, r7
-	or r0, r1, r0
-	sthi.q r2, 7, r0
-	ldhi.q r6, -1, r6
-	blink tr0, r63
-
-	/* 16 .. 24 byte memcpy */
-	LDUAQ (r3, 0, r0, r1)
-	LDUAQ (r3, 8, r8, r9)
-	or r0, r1, r0
-	sthi.q r2, 7, r0
-	or r8, r9, r8
-	sthi.q r2, 15, r8
-	ldlo.q r6, -8, r7
-	ldhi.q r6, -1, r6
-	stlo.q r2, 8, r8
-	stlo.q r2, 0, r0
-	or r6, r7, r6
-	sthi.q r5, -1, r6
-	stlo.q r5, -8, r6
-	blink tr1,r63
-
-Large:
-	! ld.b r2, 0, r63 ! TAKum03020
-	pta/l  Loop_ua, tr1
-	ori r3, -8, r7
-	sub r2, r7, r22
-	sub r3, r2, r6
-	add r2, r4, r5
-	ldlo.q r3, 0, r0
-	addi r5, -16, r5
-	movi 64+8, r27 ! could subtract r7 from that.
-	stlo.q r2, 0, r0
-	sthi.q r2, 7, r0
-	ldx.q r22, r6, r0
-	bgtu/l r27, r4, tr1
-
-	addi r5, -48, r27
-	pta/l Loop_line, tr0
-	addi r6, 64, r36
-	addi r6, -24, r19
-	addi r6, -16, r20
-	addi r6, -8, r21
-
-Loop_line:
-	! ldx.q r22, r36, r63 ! TAKum03020
-	alloco r22, 32
-	synco
-	addi r22, 32, r22
-	ldx.q r22, r19, r23
-	sthi.q r22, -25, r0
-	ldx.q r22, r20, r24
-	ldx.q r22, r21, r25
-	stlo.q r22, -32, r0
-	ldx.q r22, r6,  r0
-	sthi.q r22, -17, r23
-	sthi.q r22,  -9, r24
-	sthi.q r22,  -1, r25
-	stlo.q r22, -24, r23
-	stlo.q r22, -16, r24
-	stlo.q r22,  -8, r25
-	bgeu r27, r22, tr0
-
-Loop_ua:
-	addi r22, 8, r22
-	sthi.q r22, -1, r0
-	stlo.q r22, -8, r0
-	ldx.q r22, r6, r0
-	bgtu/l r5, r22, tr1
-
-	add r3, r4, r7
-	ldlo.q r7, -8, r1
-	sthi.q r22, 7, r0
-	ldhi.q r7, -1, r7
-	ptabs r18,tr1
-	stlo.q r22, 0, r0
-	or r1, r7, r1
-	sthi.q r5, 15, r1
-	stlo.q r5, 8, r1
-	blink tr1, r63
-copy_user_memcpy_end:
-	nop
diff --git a/arch/sh/lib64/memcpy.S b/arch/sh/lib64/memcpy.S
deleted file mode 100644
index 231ea595b39a464e65f92b302e4cb69370a99ee4..0000000000000000000000000000000000000000
--- a/arch/sh/lib64/memcpy.S
+++ /dev/null
@@ -1,202 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Cloned and hacked for uClibc by Paul Mundt, December 2003 */
-/* Modified by SuperH, Inc. September 2003 */
-!
-! Fast SH memcpy
-!
-! by Toshiyasu Morita (tm@netcom.com)
-! hacked by J"orn Rernnecke (joern.rennecke@superh.com) ("o for o-umlaut)
-! SH5 code Copyright 2002 SuperH Ltd.
-!
-! Entry: ARG0: destination pointer
-!        ARG1: source pointer
-!        ARG2: byte count
-!
-! Exit:  RESULT: destination pointer
-!        any other registers in the range r0-r7: trashed
-!
-! Notes: Usually one wants to do small reads and write a longword, but
-!        unfortunately it is difficult in some cases to concatanate bytes
-!        into a longword on the SH, so this does a longword read and small
-!        writes.
-!
-! This implementation makes two assumptions about how it is called:
-!
-! 1.: If the byte count is nonzero, the address of the last byte to be
-!     copied is unsigned greater than the address of the first byte to
-!     be copied.  This could be easily swapped for a signed comparison,
-!     but the algorithm used needs some comparison.
-!
-! 2.: When there are two or three bytes in the last word of an 11-or-more
-!     bytes memory chunk to b copied, the rest of the word can be read
-!     without side effects.
-!     This could be easily changed by increasing the minimum size of
-!     a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2,
-!     however, this would cost a few extra cyles on average.
-!     For SHmedia, the assumption is that any quadword can be read in its
-!     enirety if at least one byte is included in the copy.
-!
-
-	.section .text..SHmedia32,"ax"
-	.globl	memcpy
-	.type	memcpy, @function
-	.align	5
-
-memcpy:
-
-#define LDUAQ(P,O,D0,D1) ldlo.q P,O,D0; ldhi.q P,O+7,D1
-#define STUAQ(P,O,D0,D1) stlo.q P,O,D0; sthi.q P,O+7,D1
-#define LDUAL(P,O,D0,D1) ldlo.l P,O,D0; ldhi.l P,O+3,D1
-#define STUAL(P,O,D0,D1) stlo.l P,O,D0; sthi.l P,O+3,D1
-
-	ld.b r3,0,r63
-	pta/l Large,tr0
-	movi 25,r0
-	bgeu/u r4,r0,tr0
-	nsb r4,r0
-	shlli r0,5,r0
-	movi (L1-L0+63*32 + 1) & 0xffff,r1
-	sub r1, r0, r0
-L0:	ptrel r0,tr0
-	add r2,r4,r5
-	ptabs r18,tr1
-	add r3,r4,r6
-	blink tr0,r63
-	
-/* Rearranged to make cut2 safe */
-	.balign 8
-L4_7:	/* 4..7 byte memcpy cntd. */
-	stlo.l r2, 0, r0
-	or r6, r7, r6
-	sthi.l r5, -1, r6
-	stlo.l r5, -4, r6
-	blink tr1,r63
-
-	.balign 8
-L1:	/* 0 byte memcpy */
-	nop
-	blink tr1,r63
-	nop
-	nop
-	nop
-	nop
-
-L2_3:	/* 2 or 3 byte memcpy cntd. */
-	st.b r5,-1,r6
-	blink tr1,r63
-
-	/* 1 byte memcpy */
-	ld.b r3,0,r0
-	st.b r2,0,r0
-	blink tr1,r63
-
-L8_15:	/* 8..15 byte memcpy cntd. */
-	stlo.q r2, 0, r0
-	or r6, r7, r6
-	sthi.q r5, -1, r6
-	stlo.q r5, -8, r6
-	blink tr1,r63
-	
-	/* 2 or 3 byte memcpy */
-	ld.b r3,0,r0
-	ld.b r2,0,r63
-	ld.b r3,1,r1
-	st.b r2,0,r0
-	pta/l L2_3,tr0
-	ld.b r6,-1,r6
-	st.b r2,1,r1
-	blink tr0, r63
-
-	/* 4 .. 7 byte memcpy */
-	LDUAL (r3, 0, r0, r1)
-	pta L4_7, tr0
-	ldlo.l r6, -4, r7
-	or r0, r1, r0
-	sthi.l r2, 3, r0
-	ldhi.l r6, -1, r6
-	blink tr0, r63
-
-	/* 8 .. 15 byte memcpy */
-	LDUAQ (r3, 0, r0, r1)
-	pta L8_15, tr0
-	ldlo.q r6, -8, r7
-	or r0, r1, r0
-	sthi.q r2, 7, r0
-	ldhi.q r6, -1, r6
-	blink tr0, r63
-
-	/* 16 .. 24 byte memcpy */
-	LDUAQ (r3, 0, r0, r1)
-	LDUAQ (r3, 8, r8, r9)
-	or r0, r1, r0
-	sthi.q r2, 7, r0
-	or r8, r9, r8
-	sthi.q r2, 15, r8
-	ldlo.q r6, -8, r7
-	ldhi.q r6, -1, r6
-	stlo.q r2, 8, r8
-	stlo.q r2, 0, r0
-	or r6, r7, r6
-	sthi.q r5, -1, r6
-	stlo.q r5, -8, r6
-	blink tr1,r63
-
-Large:
-	ld.b r2, 0, r63
-	pta/l  Loop_ua, tr1
-	ori r3, -8, r7
-	sub r2, r7, r22
-	sub r3, r2, r6
-	add r2, r4, r5
-	ldlo.q r3, 0, r0
-	addi r5, -16, r5
-	movi 64+8, r27 // could subtract r7 from that.
-	stlo.q r2, 0, r0
-	sthi.q r2, 7, r0
-	ldx.q r22, r6, r0
-	bgtu/l r27, r4, tr1
-
-	addi r5, -48, r27
-	pta/l Loop_line, tr0
-	addi r6, 64, r36
-	addi r6, -24, r19
-	addi r6, -16, r20
-	addi r6, -8, r21
-
-Loop_line:
-	ldx.q r22, r36, r63
-	alloco r22, 32
-	addi r22, 32, r22
-	ldx.q r22, r19, r23
-	sthi.q r22, -25, r0
-	ldx.q r22, r20, r24
-	ldx.q r22, r21, r25
-	stlo.q r22, -32, r0
-	ldx.q r22, r6,  r0
-	sthi.q r22, -17, r23
-	sthi.q r22,  -9, r24
-	sthi.q r22,  -1, r25
-	stlo.q r22, -24, r23
-	stlo.q r22, -16, r24
-	stlo.q r22,  -8, r25
-	bgeu r27, r22, tr0
-
-Loop_ua:
-	addi r22, 8, r22
-	sthi.q r22, -1, r0
-	stlo.q r22, -8, r0
-	ldx.q r22, r6, r0
-	bgtu/l r5, r22, tr1
-
-	add r3, r4, r7
-	ldlo.q r7, -8, r1
-	sthi.q r22, 7, r0
-	ldhi.q r7, -1, r7
-	ptabs r18,tr1
-	stlo.q r22, 0, r0
-	or r1, r7, r1
-	sthi.q r5, 15, r1
-	stlo.q r5, 8, r1
-	blink tr1, r63
-
-	.size memcpy,.-memcpy
diff --git a/arch/sh/lib64/memset.S b/arch/sh/lib64/memset.S
deleted file mode 100644
index 453aa5f1d263eb84a8e97582ca87928b24d96524..0000000000000000000000000000000000000000
--- a/arch/sh/lib64/memset.S
+++ /dev/null
@@ -1,92 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Cloned and hacked for uClibc by Paul Mundt, December 2003 */
-/* Modified by SuperH, Inc. September 2003 */
-!
-! Fast SH memset
-!
-! by Toshiyasu Morita (tm@netcom.com)
-!
-! SH5 code by J"orn Rennecke (joern.rennecke@superh.com)
-! Copyright 2002 SuperH Ltd.
-!
-
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-#define SHHI shlld
-#define SHLO shlrd
-#else
-#define SHHI shlrd
-#define SHLO shlld
-#endif
-
-	.section .text..SHmedia32,"ax"
-	.globl	memset
-	.type	memset, @function
-
-	.align 5
-
-memset:
-	pta/l multiquad, tr0
-	andi r2, 7, r22
-	ptabs r18, tr2
-	mshflo.b r3,r3,r3
-	add r4, r22, r23
-	mperm.w r3, r63, r3	// Fill pattern now in every byte of r3
-
-	movi 8, r9
-	bgtu/u r23, r9, tr0 // multiquad
-
-	beqi/u r4, 0, tr2       // Return with size 0 - ensures no mem accesses
-	ldlo.q r2, 0, r7
-	shlli r4, 2, r4
-	movi -1, r8
-	SHHI r8, r4, r8
-	SHHI r8, r4, r8
-	mcmv r7, r8, r3
-	stlo.q r2, 0, r3
-	blink tr2, r63
-
-multiquad:
-	pta/l lastquad, tr0
-	stlo.q r2, 0, r3
-	shlri r23, 3, r24
-	add r2, r4, r5
-	beqi/u r24, 1, tr0 // lastquad
-	pta/l loop, tr1
-	sub r2, r22, r25
-	andi r5, -8, r20   // calculate end address and
-	addi r20, -7*8, r8 // loop end address; This might overflow, so we need
-	                   // to use a different test before we start the loop
-	bge/u r24, r9, tr1 // loop
-	st.q r25, 8, r3
-	st.q r20, -8, r3
-	shlri r24, 1, r24
-	beqi/u r24, 1, tr0 // lastquad
-	st.q r25, 16, r3
-	st.q r20, -16, r3
-	beqi/u r24, 2, tr0 // lastquad
-	st.q r25, 24, r3
-	st.q r20, -24, r3
-lastquad:
-	sthi.q r5, -1, r3
-	blink tr2,r63
-
-loop:
-!!!	alloco r25, 32	// QQQ comment out for short-term fix to SHUK #3895.
-			// QQQ commenting out is locically correct, but sub-optimal
-			// QQQ Sean McGoogan - 4th April 2003.
-	st.q r25, 8, r3
-	st.q r25, 16, r3
-	st.q r25, 24, r3
-	st.q r25, 32, r3
-	addi r25, 32, r25
-	bgeu/l r8, r25, tr1 // loop
-
-	st.q r20, -40, r3
-	st.q r20, -32, r3
-	st.q r20, -24, r3
-	st.q r20, -16, r3
-	st.q r20, -8, r3
-	sthi.q r5, -1, r3
-	blink tr2,r63
-
-	.size	memset,.-memset
diff --git a/arch/sh/lib64/sdivsi3.S b/arch/sh/lib64/sdivsi3.S
deleted file mode 100644
index b422e2374430665b869123a87bf827dd98357094..0000000000000000000000000000000000000000
--- a/arch/sh/lib64/sdivsi3.S
+++ /dev/null
@@ -1,136 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.global	__sdivsi3
-	.global	__sdivsi3_1
-	.global	__sdivsi3_2
-	.section	.text..SHmedia32,"ax"
-	.align	2
-
-	/* inputs: r4,r5 */
-	/* clobbered: r1,r18,r19,r20,r21,r25,tr0 */
-	/* result in r0 */
-__sdivsi3:
-__sdivsi3_1:
-	ptb __div_table,tr0
-	gettr tr0,r20
-
-__sdivsi3_2:
-	nsb r5, r1
-	shlld r5, r1, r25    /* normalize; [-2 ..1, 1..2) in s2.62 */
-	shari r25, 58, r21   /* extract 5(6) bit index (s2.4 with hole -1..1) */
-	/* bubble */
-	ldx.ub r20, r21, r19 /* u0.8 */
-	shari r25, 32, r25   /* normalize to s2.30 */
-	shlli r21, 1, r21
-	muls.l r25, r19, r19 /* s2.38 */
-	ldx.w r20, r21, r21  /* s2.14 */
-	ptabs r18, tr0
-	shari r19, 24, r19   /* truncate to s2.14 */
-	sub r21, r19, r19    /* some 11 bit inverse in s1.14 */
-	muls.l r19, r19, r21 /* u0.28 */
-	sub r63, r1, r1
-	addi r1, 92, r1
-	muls.l r25, r21, r18 /* s2.58 */
-	shlli r19, 45, r19   /* multiply by two and convert to s2.58 */
-	/* bubble */
-	sub r19, r18, r18
-	shari r18, 28, r18   /* some 22 bit inverse in s1.30 */
-	muls.l r18, r25, r0  /* s2.60 */
-	muls.l r18, r4, r25 /* s32.30 */
-	/* bubble */
-	shari r0, 16, r19   /* s-16.44 */
-	muls.l r19, r18, r19 /* s-16.74 */
-	shari r25, 63, r0
-	shari r4, 14, r18   /* s19.-14 */
-	shari r19, 30, r19   /* s-16.44 */
-	muls.l r19, r18, r19 /* s15.30 */
-	xor r21, r0, r21    /* You could also use the constant 1 << 27. */
-	add r21, r25, r21
-	sub r21, r19, r21
-	shard r21, r1, r21
-	sub r21, r0, r0
-	blink tr0, r63
-	
-/* This table has been generated by divtab.c .
-Defects for bias -330:
-   Max defect: 6.081536e-07 at -1.000000e+00
-   Min defect: 2.849516e-08 at 1.030651e+00
-   Max 2nd step defect: 9.606539e-12 at -1.000000e+00
-   Min 2nd step defect: 0.000000e+00 at 0.000000e+00
-   Defect at 1: 1.238659e-07
-   Defect at -2: 1.061708e-07 */
-
-	.balign 2
-	.type	__div_table,@object
-	.size	__div_table,128
-/* negative division constants */
-	.word	-16638
-	.word	-17135
-	.word	-17737
-	.word	-18433
-	.word	-19103
-	.word	-19751
-	.word	-20583
-	.word	-21383
-	.word	-22343
-	.word	-23353
-	.word	-24407
-	.word	-25582
-	.word	-26863
-	.word	-28382
-	.word	-29965
-	.word	-31800
-/* negative division factors */
-	.byte	66
-	.byte	70
-	.byte	75
-	.byte	81
-	.byte	87
-	.byte	93
-	.byte	101
-	.byte	109
-	.byte	119
-	.byte	130
-	.byte	142
-	.byte	156
-	.byte	172
-	.byte	192
-	.byte	214
-	.byte	241
-	.skip 16
-	.global	__div_table
-__div_table:
-	.skip 16
-/* positive division factors */
-	.byte	241
-	.byte	214
-	.byte	192
-	.byte	172
-	.byte	156
-	.byte	142
-	.byte	130
-	.byte	119
-	.byte	109
-	.byte	101
-	.byte	93
-	.byte	87
-	.byte	81
-	.byte	75
-	.byte	70
-	.byte	66
-/* positive division constants */
-	.word	31801
-	.word	29966
-	.word	28383
-	.word	26864
-	.word	25583
-	.word	24408
-	.word	23354
-	.word	22344
-	.word	21384
-	.word	20584
-	.word	19752
-	.word	19104
-	.word	18434
-	.word	17738
-	.word	17136
-	.word	16639
diff --git a/arch/sh/lib64/strcpy.S b/arch/sh/lib64/strcpy.S
deleted file mode 100644
index b61631e523d4d6c377bc3fa60ef087421fb33c99..0000000000000000000000000000000000000000
--- a/arch/sh/lib64/strcpy.S
+++ /dev/null
@@ -1,98 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Cloned and hacked for uClibc by Paul Mundt, December 2003 */
-/* Modified by SuperH, Inc. September 2003 */
-! Entry: arg0: destination
-!        arg1: source
-! Exit:  result: destination
-!
-! SH5 code Copyright 2002 SuperH Ltd.
-
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-#define SHHI shlld
-#define SHLO shlrd
-#else
-#define SHHI shlrd
-#define SHLO shlld
-#endif
-
-	.section .text..SHmedia32,"ax"
-	.globl	strcpy
-	.type	strcpy, @function
-	.align 5
-
-strcpy:
-
-	pta/l shortstring,tr1
-	ldlo.q r3,0,r4
-	ptabs r18,tr4
-	shlli r3,3,r7
-	addi r2, 8, r0
-	mcmpeq.b r4,r63,r6
-	SHHI r6,r7,r6
-	bnei/u r6,0,tr1 // shortstring
-	pta/l no_lddst, tr2
-	ori r3,-8,r23
-	sub r2, r23, r0
-	sub r3, r2, r21
-	addi r21, 8, r20
-	ldx.q r0, r21, r5
-	pta/l loop, tr0
-	ori r2,-8,r22
-	mcmpeq.b r5, r63, r6
-	bgt/u r22, r23, tr2 // no_lddst
-
-	// r22 < r23 :  Need to do a load from the destination.
-	// r22 == r23 : Doesn't actually need to load from destination,
-	//              but still can be handled here.
-	ldlo.q r2, 0, r9
-	movi -1, r8
-	SHLO r8, r7, r8
-	mcmv r4, r8, r9
-	stlo.q r2, 0, r9
-	beqi/l r6, 0, tr0 // loop
-
-	add r5, r63, r4
-	addi r0, 8, r0
-	blink tr1, r63 // shortstring
-no_lddst:
-	// r22 > r23: note that for r22 == r23 the sthi.q would clobber
-	//            bytes before the destination region.
-	stlo.q r2, 0, r4
-	SHHI r4, r7, r4
-	sthi.q r0, -1, r4
-	beqi/l r6, 0, tr0 // loop
-
-	add r5, r63, r4
-	addi r0, 8, r0
-shortstring:
-#if __BYTE_ORDER != __LITTLE_ENDIAN
-	pta/l shortstring2,tr1
-	byterev r4,r4
-#endif
-shortstring2:
-	st.b r0,-8,r4
-	andi r4,0xff,r5
-	shlri r4,8,r4
-	addi r0,1,r0
-	bnei/l r5,0,tr1
-	blink tr4,r63 // return
-	
-	.balign 8
-loop:
-	stlo.q r0, 0, r5
-	ldx.q r0, r20, r4
-	addi r0, 16, r0
-	sthi.q r0, -9, r5
-	mcmpeq.b r4, r63, r6
-	bnei/u r6, 0, tr1 // shortstring
-	ldx.q r0, r21, r5
-	stlo.q r0, -8, r4
-	sthi.q r0, -1, r4
-	mcmpeq.b r5, r63, r6
-	beqi/l r6, 0, tr0 // loop
-
-	add r5, r63, r4
-	addi r0, 8, r0
-	blink tr1, r63 // shortstring
-
-	.size	strcpy,.-strcpy
diff --git a/arch/sh/lib64/strlen.S b/arch/sh/lib64/strlen.S
deleted file mode 100644
index c00b972f999988fea3a80b45911a14d816ee3aff..0000000000000000000000000000000000000000
--- a/arch/sh/lib64/strlen.S
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Simplistic strlen() implementation for SHmedia.
- *
- * Copyright (C) 2003  Paul Mundt <lethal@linux-sh.org>
- */
-
-	.section .text..SHmedia32,"ax"
-	.globl	strlen
-	.type	strlen,@function
-
-	.balign 16
-strlen:
-	ptabs	r18, tr4
-
-	/*
-	 * Note: We could easily deal with the NULL case here with a simple
-	 * sanity check, though it seems that the behavior we want is to fault
-	 * in the event that r2 == NULL, so we don't bother.
-	 */
-/*	beqi    r2, 0, tr4 */	! Sanity check
-
-	movi	-1, r0
-	pta/l	loop, tr0
-loop:
-	ld.b	r2, 0, r1
-	addi	r2, 1, r2
-	addi	r0, 1, r0
-	bnei/l	r1, 0, tr0
-
-	or	r0, r63, r2
-	blink	tr4, r63
-
-	.size	strlen,.-strlen
diff --git a/arch/sh/lib64/udivdi3.S b/arch/sh/lib64/udivdi3.S
deleted file mode 100644
index c032cb15758988cabf6fd7a683c7266870ecc162..0000000000000000000000000000000000000000
--- a/arch/sh/lib64/udivdi3.S
+++ /dev/null
@@ -1,121 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.section	.text..SHmedia32,"ax"
-	.align	2
-	.global	__udivdi3
-__udivdi3:
-	shlri r3,1,r4
-	nsb r4,r22
-	shlld r3,r22,r6
-	shlri r6,49,r5
-	movi 0xffffffffffffbaf1,r21 /* .l shift count 17.  */
-	sub r21,r5,r1
-	mmulfx.w r1,r1,r4
-	mshflo.w r1,r63,r1
-	sub r63,r22,r20 // r63 == 64 % 64
-	mmulfx.w r5,r4,r4
-	pta large_divisor,tr0
-	addi r20,32,r9
-	msub.w r1,r4,r1
-	madd.w r1,r1,r1
-	mmulfx.w r1,r1,r4
-	shlri r6,32,r7
-	bgt/u r9,r63,tr0 // large_divisor
-	mmulfx.w r5,r4,r4
-	shlri r2,32+14,r19
-	addi r22,-31,r0
-	msub.w r1,r4,r1
-
-	mulu.l r1,r7,r4
-	addi r1,-3,r5
-	mulu.l r5,r19,r5
-	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
-	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
-	                 the case may be, %0000000000000000 000.11111111111, still */
-	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
-	mulu.l r5,r3,r8
-	mshalds.l r1,r21,r1
-	shari r4,26,r4
-	shlld r8,r0,r8
-	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
-	sub r2,r8,r2
-	/* Can do second step of 64 : 32 div now, using r1 and the rest in r2.  */
-
-	shlri r2,22,r21
-	mulu.l r21,r1,r21
-	shlld r5,r0,r8
-	addi r20,30-22,r0
-	shlrd r21,r0,r21
-	mulu.l r21,r3,r5
-	add r8,r21,r8
-	mcmpgt.l r21,r63,r21 // See Note 1
-	addi r20,30,r0
-	mshfhi.l r63,r21,r21
-	sub r2,r5,r2
-	andc r2,r21,r2
-
-	/* small divisor: need a third divide step */
-	mulu.l r2,r1,r7
-	ptabs r18,tr0
-	addi r2,1,r2
-	shlrd r7,r0,r7
-	mulu.l r7,r3,r5
-	add r8,r7,r8
-	sub r2,r3,r2
-	cmpgt r2,r5,r5
-	add r8,r5,r2
-	/* could test r3 here to check for divide by zero.  */
-	blink tr0,r63
-
-large_divisor:
-	mmulfx.w r5,r4,r4
-	shlrd r2,r9,r25
-	shlri r25,32,r8
-	msub.w r1,r4,r1
-
-	mulu.l r1,r7,r4
-	addi r1,-3,r5
-	mulu.l r5,r8,r5
-	sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
-	shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
-	                 the case may be, %0000000000000000 000.11111111111, still */
-	muls.l r1,r4,r4 /* leaving at least one sign bit.  */
-	shlri r5,14-1,r8
-	mulu.l r8,r7,r5
-	mshalds.l r1,r21,r1
-	shari r4,26,r4
-	add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
-	sub r25,r5,r25
-	/* Can do second step of 64 : 32 div now, using r1 and the rest in r25.  */
-
-	shlri r25,22,r21
-	mulu.l r21,r1,r21
-	pta no_lo_adj,tr0
-	addi r22,32,r0
-	shlri r21,40,r21
-	mulu.l r21,r7,r5
-	add r8,r21,r8
-	shlld r2,r0,r2
-	sub r25,r5,r25
-	bgtu/u r7,r25,tr0 // no_lo_adj
-	addi r8,1,r8
-	sub r25,r7,r25
-no_lo_adj:
-	mextr4 r2,r25,r2
-
-	/* large_divisor: only needs a few adjustments.  */
-	mulu.l r8,r6,r5
-	ptabs r18,tr0
-	/* bubble */
-	cmpgtu r5,r2,r5
-	sub r8,r5,r2
-	blink tr0,r63
-	
-/* Note 1: To shift the result of the second divide stage so that the result
-   always fits into 32 bits, yet we still reduce the rest sufficiently
-   would require a lot of instructions to do the shifts just right.  Using
-   the full 64 bit shift result to multiply with the divisor would require
-   four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
-   Fortunately, if the upper 32 bits of the shift result are nonzero, we
-   know that the rest after taking this partial result into account will
-   fit into 32 bits.  So we just clear the upper 32 bits of the rest if the
-   upper 32 bits of the partial result are nonzero.  */
diff --git a/arch/sh/lib64/udivsi3.S b/arch/sh/lib64/udivsi3.S
deleted file mode 100644
index e4788fb4fe82492e6358aa794f4d446a70ff3c7d..0000000000000000000000000000000000000000
--- a/arch/sh/lib64/udivsi3.S
+++ /dev/null
@@ -1,60 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.global	__udivsi3
-	.section	.text..SHmedia32,"ax"
-	.align	2
-
-/*
-   inputs: r4,r5
-   clobbered: r18,r19,r20,r21,r22,r25,tr0
-   result in r0.
- */
-__udivsi3:
-	addz.l r5,r63,r22
-	nsb r22,r0
-	shlld r22,r0,r25
-	shlri r25,48,r25
-	movi 0xffffffffffffbb0c,r20 /* shift count eqiv 76 */
-	sub r20,r25,r21
-	mmulfx.w r21,r21,r19
-	mshflo.w r21,r63,r21
-	ptabs r18,tr0
-	mmulfx.w r25,r19,r19
-	sub r20,r0,r0
-	/* bubble */
-	msub.w r21,r19,r19
-
-	/*
-	 * It would be nice for scheduling to do this add to r21 before
-	 * the msub.w, but we need a different value for r19 to keep
-	 * errors under control.
-	 */
-	addi r19,-2,r21
-	mulu.l r4,r21,r18
-	mmulfx.w r19,r19,r19
-	shlli r21,15,r21
-	shlrd r18,r0,r18
-	mulu.l r18,r22,r20
-	mmacnfx.wl r25,r19,r21
-	/* bubble */
-	sub r4,r20,r25
-
-	mulu.l r25,r21,r19
-	addi r0,14,r0
-	/* bubble */
-	shlrd r19,r0,r19
-	mulu.l r19,r22,r20
-	add r18,r19,r18
-	/* bubble */
-	sub.l r25,r20,r25
-
-	mulu.l r25,r21,r19
-	addz.l r25,r63,r25
-	sub r25,r22,r25
-	shlrd r19,r0,r19
-	mulu.l r19,r22,r20
-	addi r25,1,r25
-	add r18,r19,r18
-
-	cmpgt r25,r20,r25
-	add.l r18,r25,r0
-	blink tr0,r63
diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S
deleted file mode 100644
index 155cefb98520e9a7e3b2c9f86ac20b180ba8ff5b..0000000000000000000000000000000000000000
--- a/arch/sparc/crypto/aes_asm.S
+++ /dev/null
@@ -1,1544 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/visasm.h>
-
-#include "opcodes.h"
-
-#define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
-	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
-	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
-	AES_EROUND01(KEY_BASE +  4, T0, T1, I0) \
-	AES_EROUND23(KEY_BASE +  6, T0, T1, I1)
-
-#define ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
-	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
-	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
-	AES_EROUND01(KEY_BASE +  0, I2, I3, T2) \
-	AES_EROUND23(KEY_BASE +  2, I2, I3, T3) \
-	AES_EROUND01(KEY_BASE +  4, T0, T1, I0) \
-	AES_EROUND23(KEY_BASE +  6, T0, T1, I1) \
-	AES_EROUND01(KEY_BASE +  4, T2, T3, I2) \
-	AES_EROUND23(KEY_BASE +  6, T2, T3, I3)
-
-#define ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
-	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
-	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
-	AES_EROUND01_L(KEY_BASE +  4, T0, T1, I0) \
-	AES_EROUND23_L(KEY_BASE +  6, T0, T1, I1)
-
-#define ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
-	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
-	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
-	AES_EROUND01(KEY_BASE +  0, I2, I3, T2) \
-	AES_EROUND23(KEY_BASE +  2, I2, I3, T3) \
-	AES_EROUND01_L(KEY_BASE +  4, T0, T1, I0) \
-	AES_EROUND23_L(KEY_BASE +  6, T0, T1, I1) \
-	AES_EROUND01_L(KEY_BASE +  4, T2, T3, I2) \
-	AES_EROUND23_L(KEY_BASE +  6, T2, T3, I3)
-
-	/* 10 rounds */
-#define ENCRYPT_128(KEY_BASE, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
-
-#define ENCRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
-	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
-	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
-	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
-	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
-	ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3)
-
-	/* 12 rounds */
-#define ENCRYPT_192(KEY_BASE, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
-
-#define ENCRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
-	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
-	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
-	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
-	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
-	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \
-	ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3)
-
-	/* 14 rounds */
-#define ENCRYPT_256(KEY_BASE, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
-	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
-
-#define ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \
-	ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \
-			     TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6)
-
-#define ENCRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \
-	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, KEY_BASE + 48) \
-	ldd	[%o0 + 0xd0], %f56; \
-	ldd	[%o0 + 0xd8], %f58; \
-	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, KEY_BASE +  0) \
-	ldd	[%o0 + 0xe0], %f60; \
-	ldd	[%o0 + 0xe8], %f62; \
-	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE +  0) \
-	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE +  0) \
-	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE +  0) \
-	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE +  0) \
-	AES_EROUND01(KEY_BASE +  48, I0, I1, KEY_BASE + 0) \
-	AES_EROUND23(KEY_BASE +  50, I0, I1, KEY_BASE + 2) \
-	AES_EROUND01(KEY_BASE +  48, I2, I3, KEY_BASE + 4) \
-	AES_EROUND23(KEY_BASE +  50, I2, I3, KEY_BASE + 6) \
-	AES_EROUND01_L(KEY_BASE +  52, KEY_BASE + 0, KEY_BASE + 2, I0) \
-	AES_EROUND23_L(KEY_BASE +  54, KEY_BASE + 0, KEY_BASE + 2, I1) \
-	ldd	[%o0 + 0x10], %f8; \
-	ldd	[%o0 + 0x18], %f10; \
-	AES_EROUND01_L(KEY_BASE +  52, KEY_BASE + 4, KEY_BASE + 6, I2) \
-	AES_EROUND23_L(KEY_BASE +  54, KEY_BASE + 4, KEY_BASE + 6, I3) \
-	ldd	[%o0 + 0x20], %f12; \
-	ldd	[%o0 + 0x28], %f14;
-
-#define DECRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
-	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
-	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
-	AES_DROUND23(KEY_BASE +  4, T0, T1, I1) \
-	AES_DROUND01(KEY_BASE +  6, T0, T1, I0)
-
-#define DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
-	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
-	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
-	AES_DROUND23(KEY_BASE +  0, I2, I3, T3) \
-	AES_DROUND01(KEY_BASE +  2, I2, I3, T2) \
-	AES_DROUND23(KEY_BASE +  4, T0, T1, I1) \
-	AES_DROUND01(KEY_BASE +  6, T0, T1, I0) \
-	AES_DROUND23(KEY_BASE +  4, T2, T3, I3) \
-	AES_DROUND01(KEY_BASE +  6, T2, T3, I2)
-
-#define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
-	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
-	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
-	AES_DROUND23_L(KEY_BASE +  4, T0, T1, I1) \
-	AES_DROUND01_L(KEY_BASE +  6, T0, T1, I0)
-
-#define DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
-	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
-	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
-	AES_DROUND23(KEY_BASE +  0, I2, I3, T3) \
-	AES_DROUND01(KEY_BASE +  2, I2, I3, T2) \
-	AES_DROUND23_L(KEY_BASE +  4, T0, T1, I1) \
-	AES_DROUND01_L(KEY_BASE +  6, T0, T1, I0) \
-	AES_DROUND23_L(KEY_BASE +  4, T2, T3, I3) \
-	AES_DROUND01_L(KEY_BASE +  6, T2, T3, I2)
-
-	/* 10 rounds */
-#define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
-
-#define DECRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
-	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
-	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
-	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
-	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
-	DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3)
-
-	/* 12 rounds */
-#define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
-
-#define DECRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
-	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
-	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
-	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
-	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
-	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \
-	DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3)
-
-	/* 14 rounds */
-#define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
-	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
-
-#define DECRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \
-	DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \
-			     TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6)
-
-#define DECRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \
-	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, KEY_BASE + 48) \
-	ldd	[%o0 + 0x18], %f56; \
-	ldd	[%o0 + 0x10], %f58; \
-	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, KEY_BASE +  0) \
-	ldd	[%o0 + 0x08], %f60; \
-	ldd	[%o0 + 0x00], %f62; \
-	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE +  0) \
-	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE +  0) \
-	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE +  0) \
-	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE +  0) \
-	AES_DROUND23(KEY_BASE +  48, I0, I1, KEY_BASE + 2) \
-	AES_DROUND01(KEY_BASE +  50, I0, I1, KEY_BASE + 0) \
-	AES_DROUND23(KEY_BASE +  48, I2, I3, KEY_BASE + 6) \
-	AES_DROUND01(KEY_BASE +  50, I2, I3, KEY_BASE + 4) \
-	AES_DROUND23_L(KEY_BASE +  52, KEY_BASE + 0, KEY_BASE + 2, I1) \
-	AES_DROUND01_L(KEY_BASE +  54, KEY_BASE + 0, KEY_BASE + 2, I0) \
-	ldd	[%o0 + 0xd8], %f8; \
-	ldd	[%o0 + 0xd0], %f10; \
-	AES_DROUND23_L(KEY_BASE +  52, KEY_BASE + 4, KEY_BASE + 6, I3) \
-	AES_DROUND01_L(KEY_BASE +  54, KEY_BASE + 4, KEY_BASE + 6, I2) \
-	ldd	[%o0 + 0xc8], %f12; \
-	ldd	[%o0 + 0xc0], %f14;
-
-	.align	32
-ENTRY(aes_sparc64_key_expand)
-	/* %o0=input_key, %o1=output_key, %o2=key_len */
-	VISEntry
-	ld	[%o0 + 0x00], %f0
-	ld	[%o0 + 0x04], %f1
-	ld	[%o0 + 0x08], %f2
-	ld	[%o0 + 0x0c], %f3
-
-	std	%f0, [%o1 + 0x00]
-	std	%f2, [%o1 + 0x08]
-	add	%o1, 0x10, %o1
-
-	cmp	%o2, 24
-	bl	2f
-	 nop
-
-	be	1f
-	 nop
-
-	/* 256-bit key expansion */
-	ld	[%o0 + 0x10], %f4
-	ld	[%o0 + 0x14], %f5
-	ld	[%o0 + 0x18], %f6
-	ld	[%o0 + 0x1c], %f7
-
-	std	%f4, [%o1 + 0x00]
-	std	%f6, [%o1 + 0x08]
-	add	%o1, 0x10, %o1
-
-	AES_KEXPAND1(0, 6, 0x0, 8)
-	AES_KEXPAND2(2, 8, 10)
-	AES_KEXPAND0(4, 10, 12)
-	AES_KEXPAND2(6, 12, 14)
-	AES_KEXPAND1(8, 14, 0x1, 16)
-	AES_KEXPAND2(10, 16, 18)
-	AES_KEXPAND0(12, 18, 20)
-	AES_KEXPAND2(14, 20, 22)
-	AES_KEXPAND1(16, 22, 0x2, 24)
-	AES_KEXPAND2(18, 24, 26)
-	AES_KEXPAND0(20, 26, 28)
-	AES_KEXPAND2(22, 28, 30)
-	AES_KEXPAND1(24, 30, 0x3, 32)
-	AES_KEXPAND2(26, 32, 34)
-	AES_KEXPAND0(28, 34, 36)
-	AES_KEXPAND2(30, 36, 38)
-	AES_KEXPAND1(32, 38, 0x4, 40)
-	AES_KEXPAND2(34, 40, 42)
-	AES_KEXPAND0(36, 42, 44)
-	AES_KEXPAND2(38, 44, 46)
-	AES_KEXPAND1(40, 46, 0x5, 48)
-	AES_KEXPAND2(42, 48, 50)
-	AES_KEXPAND0(44, 50, 52)
-	AES_KEXPAND2(46, 52, 54)
-	AES_KEXPAND1(48, 54, 0x6, 56)
-	AES_KEXPAND2(50, 56, 58)
-
-	std	%f8, [%o1 + 0x00]
-	std	%f10, [%o1 + 0x08]
-	std	%f12, [%o1 + 0x10]
-	std	%f14, [%o1 + 0x18]
-	std	%f16, [%o1 + 0x20]
-	std	%f18, [%o1 + 0x28]
-	std	%f20, [%o1 + 0x30]
-	std	%f22, [%o1 + 0x38]
-	std	%f24, [%o1 + 0x40]
-	std	%f26, [%o1 + 0x48]
-	std	%f28, [%o1 + 0x50]
-	std	%f30, [%o1 + 0x58]
-	std	%f32, [%o1 + 0x60]
-	std	%f34, [%o1 + 0x68]
-	std	%f36, [%o1 + 0x70]
-	std	%f38, [%o1 + 0x78]
-	std	%f40, [%o1 + 0x80]
-	std	%f42, [%o1 + 0x88]
-	std	%f44, [%o1 + 0x90]
-	std	%f46, [%o1 + 0x98]
-	std	%f48, [%o1 + 0xa0]
-	std	%f50, [%o1 + 0xa8]
-	std	%f52, [%o1 + 0xb0]
-	std	%f54, [%o1 + 0xb8]
-	std	%f56, [%o1 + 0xc0]
-	ba,pt	%xcc, 80f
-	 std	%f58, [%o1 + 0xc8]
-
-1:	
-	/* 192-bit key expansion */
-	ld	[%o0 + 0x10], %f4
-	ld	[%o0 + 0x14], %f5
-
-	std	%f4, [%o1 + 0x00]
-	add	%o1, 0x08, %o1
-
-	AES_KEXPAND1(0, 4, 0x0, 6)
-	AES_KEXPAND2(2, 6, 8)
-	AES_KEXPAND2(4, 8, 10)
-	AES_KEXPAND1(6, 10, 0x1, 12)
-	AES_KEXPAND2(8, 12, 14)
-	AES_KEXPAND2(10, 14, 16)
-	AES_KEXPAND1(12, 16, 0x2, 18)
-	AES_KEXPAND2(14, 18, 20)
-	AES_KEXPAND2(16, 20, 22)
-	AES_KEXPAND1(18, 22, 0x3, 24)
-	AES_KEXPAND2(20, 24, 26)
-	AES_KEXPAND2(22, 26, 28)
-	AES_KEXPAND1(24, 28, 0x4, 30)
-	AES_KEXPAND2(26, 30, 32)
-	AES_KEXPAND2(28, 32, 34)
-	AES_KEXPAND1(30, 34, 0x5, 36)
-	AES_KEXPAND2(32, 36, 38)
-	AES_KEXPAND2(34, 38, 40)
-	AES_KEXPAND1(36, 40, 0x6, 42)
-	AES_KEXPAND2(38, 42, 44)
-	AES_KEXPAND2(40, 44, 46)
-	AES_KEXPAND1(42, 46, 0x7, 48)
-	AES_KEXPAND2(44, 48, 50)
-
-	std	%f6, [%o1 + 0x00]
-	std	%f8, [%o1 + 0x08]
-	std	%f10, [%o1 + 0x10]
-	std	%f12, [%o1 + 0x18]
-	std	%f14, [%o1 + 0x20]
-	std	%f16, [%o1 + 0x28]
-	std	%f18, [%o1 + 0x30]
-	std	%f20, [%o1 + 0x38]
-	std	%f22, [%o1 + 0x40]
-	std	%f24, [%o1 + 0x48]
-	std	%f26, [%o1 + 0x50]
-	std	%f28, [%o1 + 0x58]
-	std	%f30, [%o1 + 0x60]
-	std	%f32, [%o1 + 0x68]
-	std	%f34, [%o1 + 0x70]
-	std	%f36, [%o1 + 0x78]
-	std	%f38, [%o1 + 0x80]
-	std	%f40, [%o1 + 0x88]
-	std	%f42, [%o1 + 0x90]
-	std	%f44, [%o1 + 0x98]
-	std	%f46, [%o1 + 0xa0]
-	std	%f48, [%o1 + 0xa8]
-	ba,pt	%xcc, 80f
-	 std	%f50, [%o1 + 0xb0]
-
-2:
-	/* 128-bit key expansion */
-	AES_KEXPAND1(0, 2, 0x0, 4)
-	AES_KEXPAND2(2, 4, 6)
-	AES_KEXPAND1(4, 6, 0x1, 8)
-	AES_KEXPAND2(6, 8, 10)
-	AES_KEXPAND1(8, 10, 0x2, 12)
-	AES_KEXPAND2(10, 12, 14)
-	AES_KEXPAND1(12, 14, 0x3, 16)
-	AES_KEXPAND2(14, 16, 18)
-	AES_KEXPAND1(16, 18, 0x4, 20)
-	AES_KEXPAND2(18, 20, 22)
-	AES_KEXPAND1(20, 22, 0x5, 24)
-	AES_KEXPAND2(22, 24, 26)
-	AES_KEXPAND1(24, 26, 0x6, 28)
-	AES_KEXPAND2(26, 28, 30)
-	AES_KEXPAND1(28, 30, 0x7, 32)
-	AES_KEXPAND2(30, 32, 34)
-	AES_KEXPAND1(32, 34, 0x8, 36)
-	AES_KEXPAND2(34, 36, 38)
-	AES_KEXPAND1(36, 38, 0x9, 40)
-	AES_KEXPAND2(38, 40, 42)
-
-	std	%f4, [%o1 + 0x00]
-	std	%f6, [%o1 + 0x08]
-	std	%f8, [%o1 + 0x10]
-	std	%f10, [%o1 + 0x18]
-	std	%f12, [%o1 + 0x20]
-	std	%f14, [%o1 + 0x28]
-	std	%f16, [%o1 + 0x30]
-	std	%f18, [%o1 + 0x38]
-	std	%f20, [%o1 + 0x40]
-	std	%f22, [%o1 + 0x48]
-	std	%f24, [%o1 + 0x50]
-	std	%f26, [%o1 + 0x58]
-	std	%f28, [%o1 + 0x60]
-	std	%f30, [%o1 + 0x68]
-	std	%f32, [%o1 + 0x70]
-	std	%f34, [%o1 + 0x78]
-	std	%f36, [%o1 + 0x80]
-	std	%f38, [%o1 + 0x88]
-	std	%f40, [%o1 + 0x90]
-	std	%f42, [%o1 + 0x98]
-80:
-	retl
-	 VISExit
-ENDPROC(aes_sparc64_key_expand)
-
-	.align		32
-ENTRY(aes_sparc64_encrypt_128)
-	/* %o0=key, %o1=input, %o2=output */
-	VISEntry
-	ld		[%o1 + 0x00], %f4
-	ld		[%o1 + 0x04], %f5
-	ld		[%o1 + 0x08], %f6
-	ld		[%o1 + 0x0c], %f7
-	ldd		[%o0 + 0x00], %f8
-	ldd		[%o0 + 0x08], %f10
-	ldd		[%o0 + 0x10], %f12
-	ldd		[%o0 + 0x18], %f14
-	ldd		[%o0 + 0x20], %f16
-	ldd		[%o0 + 0x28], %f18
-	ldd		[%o0 + 0x30], %f20
-	ldd		[%o0 + 0x38], %f22
-	ldd		[%o0 + 0x40], %f24
-	ldd		[%o0 + 0x48], %f26
-	ldd		[%o0 + 0x50], %f28
-	ldd		[%o0 + 0x58], %f30
-	ldd		[%o0 + 0x60], %f32
-	ldd		[%o0 + 0x68], %f34
-	ldd		[%o0 + 0x70], %f36
-	ldd		[%o0 + 0x78], %f38
-	ldd		[%o0 + 0x80], %f40
-	ldd		[%o0 + 0x88], %f42
-	ldd		[%o0 + 0x90], %f44
-	ldd		[%o0 + 0x98], %f46
-	ldd		[%o0 + 0xa0], %f48
-	ldd		[%o0 + 0xa8], %f50
-	fxor		%f8, %f4, %f4
-	fxor		%f10, %f6, %f6
-	ENCRYPT_128(12, 4, 6, 0, 2)
-	st		%f4, [%o2 + 0x00]
-	st		%f5, [%o2 + 0x04]
-	st		%f6, [%o2 + 0x08]
-	st		%f7, [%o2 + 0x0c]
-	retl
-	 VISExit
-ENDPROC(aes_sparc64_encrypt_128)
-
-	.align		32
-ENTRY(aes_sparc64_encrypt_192)
-	/* %o0=key, %o1=input, %o2=output */
-	VISEntry
-	ld		[%o1 + 0x00], %f4
-	ld		[%o1 + 0x04], %f5
-	ld		[%o1 + 0x08], %f6
-	ld		[%o1 + 0x0c], %f7
-
-	ldd		[%o0 + 0x00], %f8
-	ldd		[%o0 + 0x08], %f10
-
-	fxor		%f8, %f4, %f4
-	fxor		%f10, %f6, %f6
-
-	ldd		[%o0 + 0x10], %f8
-	ldd		[%o0 + 0x18], %f10
-	ldd		[%o0 + 0x20], %f12
-	ldd		[%o0 + 0x28], %f14
-	add		%o0, 0x20, %o0
-
-	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
-
-	ldd		[%o0 + 0x10], %f12
-	ldd		[%o0 + 0x18], %f14
-	ldd		[%o0 + 0x20], %f16
-	ldd		[%o0 + 0x28], %f18
-	ldd		[%o0 + 0x30], %f20
-	ldd		[%o0 + 0x38], %f22
-	ldd		[%o0 + 0x40], %f24
-	ldd		[%o0 + 0x48], %f26
-	ldd		[%o0 + 0x50], %f28
-	ldd		[%o0 + 0x58], %f30
-	ldd		[%o0 + 0x60], %f32
-	ldd		[%o0 + 0x68], %f34
-	ldd		[%o0 + 0x70], %f36
-	ldd		[%o0 + 0x78], %f38
-	ldd		[%o0 + 0x80], %f40
-	ldd		[%o0 + 0x88], %f42
-	ldd		[%o0 + 0x90], %f44
-	ldd		[%o0 + 0x98], %f46
-	ldd		[%o0 + 0xa0], %f48
-	ldd		[%o0 + 0xa8], %f50
-
-
-	ENCRYPT_128(12, 4, 6, 0, 2)
-
-	st		%f4, [%o2 + 0x00]
-	st		%f5, [%o2 + 0x04]
-	st		%f6, [%o2 + 0x08]
-	st		%f7, [%o2 + 0x0c]
-
-	retl
-	 VISExit
-ENDPROC(aes_sparc64_encrypt_192)
-
-	.align		32
-ENTRY(aes_sparc64_encrypt_256)
-	/* %o0=key, %o1=input, %o2=output */
-	VISEntry
-	ld		[%o1 + 0x00], %f4
-	ld		[%o1 + 0x04], %f5
-	ld		[%o1 + 0x08], %f6
-	ld		[%o1 + 0x0c], %f7
-
-	ldd		[%o0 + 0x00], %f8
-	ldd		[%o0 + 0x08], %f10
-
-	fxor		%f8, %f4, %f4
-	fxor		%f10, %f6, %f6
-
-	ldd		[%o0 + 0x10], %f8
-
-	ldd		[%o0 + 0x18], %f10
-	ldd		[%o0 + 0x20], %f12
-	ldd		[%o0 + 0x28], %f14
-	add		%o0, 0x20, %o0
-
-	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
-
-	ldd		[%o0 + 0x10], %f8
-
-	ldd		[%o0 + 0x18], %f10
-	ldd		[%o0 + 0x20], %f12
-	ldd		[%o0 + 0x28], %f14
-	add		%o0, 0x20, %o0
-
-	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
-
-	ldd		[%o0 + 0x10], %f12
-	ldd		[%o0 + 0x18], %f14
-	ldd		[%o0 + 0x20], %f16
-	ldd		[%o0 + 0x28], %f18
-	ldd		[%o0 + 0x30], %f20
-	ldd		[%o0 + 0x38], %f22
-	ldd		[%o0 + 0x40], %f24
-	ldd		[%o0 + 0x48], %f26
-	ldd		[%o0 + 0x50], %f28
-	ldd		[%o0 + 0x58], %f30
-	ldd		[%o0 + 0x60], %f32
-	ldd		[%o0 + 0x68], %f34
-	ldd		[%o0 + 0x70], %f36
-	ldd		[%o0 + 0x78], %f38
-	ldd		[%o0 + 0x80], %f40
-	ldd		[%o0 + 0x88], %f42
-	ldd		[%o0 + 0x90], %f44
-	ldd		[%o0 + 0x98], %f46
-	ldd		[%o0 + 0xa0], %f48
-	ldd		[%o0 + 0xa8], %f50
-
-	ENCRYPT_128(12, 4, 6, 0, 2)
-
-	st		%f4, [%o2 + 0x00]
-	st		%f5, [%o2 + 0x04]
-	st		%f6, [%o2 + 0x08]
-	st		%f7, [%o2 + 0x0c]
-
-	retl
-	 VISExit
-ENDPROC(aes_sparc64_encrypt_256)
-
-	.align		32
-ENTRY(aes_sparc64_decrypt_128)
-	/* %o0=key, %o1=input, %o2=output */
-	VISEntry
-	ld		[%o1 + 0x00], %f4
-	ld		[%o1 + 0x04], %f5
-	ld		[%o1 + 0x08], %f6
-	ld		[%o1 + 0x0c], %f7
-	ldd		[%o0 + 0xa0], %f8
-	ldd		[%o0 + 0xa8], %f10
-	ldd		[%o0 + 0x98], %f12
-	ldd		[%o0 + 0x90], %f14
-	ldd		[%o0 + 0x88], %f16
-	ldd		[%o0 + 0x80], %f18
-	ldd		[%o0 + 0x78], %f20
-	ldd		[%o0 + 0x70], %f22
-	ldd		[%o0 + 0x68], %f24
-	ldd		[%o0 + 0x60], %f26
-	ldd		[%o0 + 0x58], %f28
-	ldd		[%o0 + 0x50], %f30
-	ldd		[%o0 + 0x48], %f32
-	ldd		[%o0 + 0x40], %f34
-	ldd		[%o0 + 0x38], %f36
-	ldd		[%o0 + 0x30], %f38
-	ldd		[%o0 + 0x28], %f40
-	ldd		[%o0 + 0x20], %f42
-	ldd		[%o0 + 0x18], %f44
-	ldd		[%o0 + 0x10], %f46
-	ldd		[%o0 + 0x08], %f48
-	ldd		[%o0 + 0x00], %f50
-	fxor		%f8, %f4, %f4
-	fxor		%f10, %f6, %f6
-	DECRYPT_128(12, 4, 6, 0, 2)
-	st		%f4, [%o2 + 0x00]
-	st		%f5, [%o2 + 0x04]
-	st		%f6, [%o2 + 0x08]
-	st		%f7, [%o2 + 0x0c]
-	retl
-	 VISExit
-ENDPROC(aes_sparc64_decrypt_128)
-
-	.align		32
-ENTRY(aes_sparc64_decrypt_192)
-	/* %o0=key, %o1=input, %o2=output */
-	VISEntry
-	ld		[%o1 + 0x00], %f4
-	ld		[%o1 + 0x04], %f5
-	ld		[%o1 + 0x08], %f6
-	ld		[%o1 + 0x0c], %f7
-	ldd		[%o0 + 0xc0], %f8
-	ldd		[%o0 + 0xc8], %f10
-	ldd		[%o0 + 0xb8], %f12
-	ldd		[%o0 + 0xb0], %f14
-	ldd		[%o0 + 0xa8], %f16
-	ldd		[%o0 + 0xa0], %f18
-	fxor		%f8, %f4, %f4
-	fxor		%f10, %f6, %f6
-	ldd		[%o0 + 0x98], %f20
-	ldd		[%o0 + 0x90], %f22
-	ldd		[%o0 + 0x88], %f24
-	ldd		[%o0 + 0x80], %f26
-	DECRYPT_TWO_ROUNDS(12, 4, 6, 0, 2)
-	ldd		[%o0 + 0x78], %f28
-	ldd		[%o0 + 0x70], %f30
-	ldd		[%o0 + 0x68], %f32
-	ldd		[%o0 + 0x60], %f34
-	ldd		[%o0 + 0x58], %f36
-	ldd		[%o0 + 0x50], %f38
-	ldd		[%o0 + 0x48], %f40
-	ldd		[%o0 + 0x40], %f42
-	ldd		[%o0 + 0x38], %f44
-	ldd		[%o0 + 0x30], %f46
-	ldd		[%o0 + 0x28], %f48
-	ldd		[%o0 + 0x20], %f50
-	ldd		[%o0 + 0x18], %f52
-	ldd		[%o0 + 0x10], %f54
-	ldd		[%o0 + 0x08], %f56
-	ldd		[%o0 + 0x00], %f58
-	DECRYPT_128(20, 4, 6, 0, 2)
-	st		%f4, [%o2 + 0x00]
-	st		%f5, [%o2 + 0x04]
-	st		%f6, [%o2 + 0x08]
-	st		%f7, [%o2 + 0x0c]
-	retl
-	 VISExit
-ENDPROC(aes_sparc64_decrypt_192)
-
-	.align		32
-ENTRY(aes_sparc64_decrypt_256)
-	/* %o0=key, %o1=input, %o2=output */
-	VISEntry
-	ld		[%o1 + 0x00], %f4
-	ld		[%o1 + 0x04], %f5
-	ld		[%o1 + 0x08], %f6
-	ld		[%o1 + 0x0c], %f7
-	ldd		[%o0 + 0xe0], %f8
-	ldd		[%o0 + 0xe8], %f10
-	ldd		[%o0 + 0xd8], %f12
-	ldd		[%o0 + 0xd0], %f14
-	ldd		[%o0 + 0xc8], %f16
-	fxor		%f8, %f4, %f4
-	ldd		[%o0 + 0xc0], %f18
-	fxor		%f10, %f6, %f6
-	ldd		[%o0 + 0xb8], %f20
-	AES_DROUND23(12, 4, 6, 2)
-	ldd		[%o0 + 0xb0], %f22
-	AES_DROUND01(14, 4, 6, 0)
-	ldd		[%o0 + 0xa8], %f24
-	AES_DROUND23(16, 0, 2, 6)
-	ldd		[%o0 + 0xa0], %f26
-	AES_DROUND01(18, 0, 2, 4)
-	ldd		[%o0 + 0x98], %f12
-	AES_DROUND23(20, 4, 6, 2)
-	ldd		[%o0 + 0x90], %f14
-	AES_DROUND01(22, 4, 6, 0)
-	ldd		[%o0 + 0x88], %f16
-	AES_DROUND23(24, 0, 2, 6)
-	ldd		[%o0 + 0x80], %f18
-	AES_DROUND01(26, 0, 2, 4)
-	ldd		[%o0 + 0x78], %f20
-	AES_DROUND23(12, 4, 6, 2)
-	ldd		[%o0 + 0x70], %f22
-	AES_DROUND01(14, 4, 6, 0)
-	ldd		[%o0 + 0x68], %f24
-	AES_DROUND23(16, 0, 2, 6)
-	ldd		[%o0 + 0x60], %f26
-	AES_DROUND01(18, 0, 2, 4)
-	ldd		[%o0 + 0x58], %f28
-	AES_DROUND23(20, 4, 6, 2)
-	ldd		[%o0 + 0x50], %f30
-	AES_DROUND01(22, 4, 6, 0)
-	ldd		[%o0 + 0x48], %f32
-	AES_DROUND23(24, 0, 2, 6)
-	ldd		[%o0 + 0x40], %f34
-	AES_DROUND01(26, 0, 2, 4)
-	ldd		[%o0 + 0x38], %f36
-	AES_DROUND23(28, 4, 6, 2)
-	ldd		[%o0 + 0x30], %f38
-	AES_DROUND01(30, 4, 6, 0)
-	ldd		[%o0 + 0x28], %f40
-	AES_DROUND23(32, 0, 2, 6)
-	ldd		[%o0 + 0x20], %f42
-	AES_DROUND01(34, 0, 2, 4)
-	ldd		[%o0 + 0x18], %f44
-	AES_DROUND23(36, 4, 6, 2)
-	ldd		[%o0 + 0x10], %f46
-	AES_DROUND01(38, 4, 6, 0)
-	ldd		[%o0 + 0x08], %f48
-	AES_DROUND23(40, 0, 2, 6)
-	ldd		[%o0 + 0x00], %f50
-	AES_DROUND01(42, 0, 2, 4)
-	AES_DROUND23(44, 4, 6, 2)
-	AES_DROUND01(46, 4, 6, 0)
-	AES_DROUND23_L(48, 0, 2, 6)
-	AES_DROUND01_L(50, 0, 2, 4)
-	st		%f4, [%o2 + 0x00]
-	st		%f5, [%o2 + 0x04]
-	st		%f6, [%o2 + 0x08]
-	st		%f7, [%o2 + 0x0c]
-	retl
-	 VISExit
-ENDPROC(aes_sparc64_decrypt_256)
-
-	.align		32
-ENTRY(aes_sparc64_load_encrypt_keys_128)
-	/* %o0=key */
-	VISEntry
-	ldd		[%o0 + 0x10], %f8
-	ldd		[%o0 + 0x18], %f10
-	ldd		[%o0 + 0x20], %f12
-	ldd		[%o0 + 0x28], %f14
-	ldd		[%o0 + 0x30], %f16
-	ldd		[%o0 + 0x38], %f18
-	ldd		[%o0 + 0x40], %f20
-	ldd		[%o0 + 0x48], %f22
-	ldd		[%o0 + 0x50], %f24
-	ldd		[%o0 + 0x58], %f26
-	ldd		[%o0 + 0x60], %f28
-	ldd		[%o0 + 0x68], %f30
-	ldd		[%o0 + 0x70], %f32
-	ldd		[%o0 + 0x78], %f34
-	ldd		[%o0 + 0x80], %f36
-	ldd		[%o0 + 0x88], %f38
-	ldd		[%o0 + 0x90], %f40
-	ldd		[%o0 + 0x98], %f42
-	ldd		[%o0 + 0xa0], %f44
-	retl
-	 ldd		[%o0 + 0xa8], %f46
-ENDPROC(aes_sparc64_load_encrypt_keys_128)
-
-	.align		32
-ENTRY(aes_sparc64_load_encrypt_keys_192)
-	/* %o0=key */
-	VISEntry
-	ldd		[%o0 + 0x10], %f8
-	ldd		[%o0 + 0x18], %f10
-	ldd		[%o0 + 0x20], %f12
-	ldd		[%o0 + 0x28], %f14
-	ldd		[%o0 + 0x30], %f16
-	ldd		[%o0 + 0x38], %f18
-	ldd		[%o0 + 0x40], %f20
-	ldd		[%o0 + 0x48], %f22
-	ldd		[%o0 + 0x50], %f24
-	ldd		[%o0 + 0x58], %f26
-	ldd		[%o0 + 0x60], %f28
-	ldd		[%o0 + 0x68], %f30
-	ldd		[%o0 + 0x70], %f32
-	ldd		[%o0 + 0x78], %f34
-	ldd		[%o0 + 0x80], %f36
-	ldd		[%o0 + 0x88], %f38
-	ldd		[%o0 + 0x90], %f40
-	ldd		[%o0 + 0x98], %f42
-	ldd		[%o0 + 0xa0], %f44
-	ldd		[%o0 + 0xa8], %f46
-	ldd		[%o0 + 0xb0], %f48
-	ldd		[%o0 + 0xb8], %f50
-	ldd		[%o0 + 0xc0], %f52
-	retl
-	 ldd		[%o0 + 0xc8], %f54
-ENDPROC(aes_sparc64_load_encrypt_keys_192)
-
-	.align		32
-ENTRY(aes_sparc64_load_encrypt_keys_256)
-	/* %o0=key */
-	VISEntry
-	ldd		[%o0 + 0x10], %f8
-	ldd		[%o0 + 0x18], %f10
-	ldd		[%o0 + 0x20], %f12
-	ldd		[%o0 + 0x28], %f14
-	ldd		[%o0 + 0x30], %f16
-	ldd		[%o0 + 0x38], %f18
-	ldd		[%o0 + 0x40], %f20
-	ldd		[%o0 + 0x48], %f22
-	ldd		[%o0 + 0x50], %f24
-	ldd		[%o0 + 0x58], %f26
-	ldd		[%o0 + 0x60], %f28
-	ldd		[%o0 + 0x68], %f30
-	ldd		[%o0 + 0x70], %f32
-	ldd		[%o0 + 0x78], %f34
-	ldd		[%o0 + 0x80], %f36
-	ldd		[%o0 + 0x88], %f38
-	ldd		[%o0 + 0x90], %f40
-	ldd		[%o0 + 0x98], %f42
-	ldd		[%o0 + 0xa0], %f44
-	ldd		[%o0 + 0xa8], %f46
-	ldd		[%o0 + 0xb0], %f48
-	ldd		[%o0 + 0xb8], %f50
-	ldd		[%o0 + 0xc0], %f52
-	ldd		[%o0 + 0xc8], %f54
-	ldd		[%o0 + 0xd0], %f56
-	ldd		[%o0 + 0xd8], %f58
-	ldd		[%o0 + 0xe0], %f60
-	retl
-	 ldd		[%o0 + 0xe8], %f62
-ENDPROC(aes_sparc64_load_encrypt_keys_256)
-
-	.align		32
-ENTRY(aes_sparc64_load_decrypt_keys_128)
-	/* %o0=key */
-	VISEntry
-	ldd		[%o0 + 0x98], %f8
-	ldd		[%o0 + 0x90], %f10
-	ldd		[%o0 + 0x88], %f12
-	ldd		[%o0 + 0x80], %f14
-	ldd		[%o0 + 0x78], %f16
-	ldd		[%o0 + 0x70], %f18
-	ldd		[%o0 + 0x68], %f20
-	ldd		[%o0 + 0x60], %f22
-	ldd		[%o0 + 0x58], %f24
-	ldd		[%o0 + 0x50], %f26
-	ldd		[%o0 + 0x48], %f28
-	ldd		[%o0 + 0x40], %f30
-	ldd		[%o0 + 0x38], %f32
-	ldd		[%o0 + 0x30], %f34
-	ldd		[%o0 + 0x28], %f36
-	ldd		[%o0 + 0x20], %f38
-	ldd		[%o0 + 0x18], %f40
-	ldd		[%o0 + 0x10], %f42
-	ldd		[%o0 + 0x08], %f44
-	retl
-	 ldd		[%o0 + 0x00], %f46
-ENDPROC(aes_sparc64_load_decrypt_keys_128)
-
-	.align		32
-ENTRY(aes_sparc64_load_decrypt_keys_192)
-	/* %o0=key */
-	VISEntry
-	ldd		[%o0 + 0xb8], %f8
-	ldd		[%o0 + 0xb0], %f10
-	ldd		[%o0 + 0xa8], %f12
-	ldd		[%o0 + 0xa0], %f14
-	ldd		[%o0 + 0x98], %f16
-	ldd		[%o0 + 0x90], %f18
-	ldd		[%o0 + 0x88], %f20
-	ldd		[%o0 + 0x80], %f22
-	ldd		[%o0 + 0x78], %f24
-	ldd		[%o0 + 0x70], %f26
-	ldd		[%o0 + 0x68], %f28
-	ldd		[%o0 + 0x60], %f30
-	ldd		[%o0 + 0x58], %f32
-	ldd		[%o0 + 0x50], %f34
-	ldd		[%o0 + 0x48], %f36
-	ldd		[%o0 + 0x40], %f38
-	ldd		[%o0 + 0x38], %f40
-	ldd		[%o0 + 0x30], %f42
-	ldd		[%o0 + 0x28], %f44
-	ldd		[%o0 + 0x20], %f46
-	ldd		[%o0 + 0x18], %f48
-	ldd		[%o0 + 0x10], %f50
-	ldd		[%o0 + 0x08], %f52
-	retl
-	 ldd		[%o0 + 0x00], %f54
-ENDPROC(aes_sparc64_load_decrypt_keys_192)
-
-	.align		32
-ENTRY(aes_sparc64_load_decrypt_keys_256)
-	/* %o0=key */
-	VISEntry
-	ldd		[%o0 + 0xd8], %f8
-	ldd		[%o0 + 0xd0], %f10
-	ldd		[%o0 + 0xc8], %f12
-	ldd		[%o0 + 0xc0], %f14
-	ldd		[%o0 + 0xb8], %f16
-	ldd		[%o0 + 0xb0], %f18
-	ldd		[%o0 + 0xa8], %f20
-	ldd		[%o0 + 0xa0], %f22
-	ldd		[%o0 + 0x98], %f24
-	ldd		[%o0 + 0x90], %f26
-	ldd		[%o0 + 0x88], %f28
-	ldd		[%o0 + 0x80], %f30
-	ldd		[%o0 + 0x78], %f32
-	ldd		[%o0 + 0x70], %f34
-	ldd		[%o0 + 0x68], %f36
-	ldd		[%o0 + 0x60], %f38
-	ldd		[%o0 + 0x58], %f40
-	ldd		[%o0 + 0x50], %f42
-	ldd		[%o0 + 0x48], %f44
-	ldd		[%o0 + 0x40], %f46
-	ldd		[%o0 + 0x38], %f48
-	ldd		[%o0 + 0x30], %f50
-	ldd		[%o0 + 0x28], %f52
-	ldd		[%o0 + 0x20], %f54
-	ldd		[%o0 + 0x18], %f56
-	ldd		[%o0 + 0x10], %f58
-	ldd		[%o0 + 0x08], %f60
-	retl
-	 ldd		[%o0 + 0x00], %f62
-ENDPROC(aes_sparc64_load_decrypt_keys_256)
-
-	.align		32
-ENTRY(aes_sparc64_ecb_encrypt_128)
-	/* %o0=key, %o1=input, %o2=output, %o3=len */
-	ldx		[%o0 + 0x00], %g1
-	subcc		%o3, 0x10, %o3
-	be		10f
-	 ldx		[%o0 + 0x08], %g2
-1:	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	ldx		[%o1 + 0x10], %o4
-	ldx		[%o1 + 0x18], %o5
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F4
-	MOVXTOD_G7_F6
-	xor		%g1, %o4, %g3
-	xor		%g2, %o5, %g7
-	MOVXTOD_G3_F60
-	MOVXTOD_G7_F62
-	ENCRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-	std		%f60, [%o2 + 0x10]
-	std		%f62, [%o2 + 0x18]
-	sub		%o3, 0x20, %o3
-	add		%o1, 0x20, %o1
-	brgz		%o3, 1b
-	 add		%o2, 0x20, %o2
-	brlz,pt		%o3, 11f
-	 nop
-10:	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F4
-	MOVXTOD_G7_F6
-	ENCRYPT_128(8, 4, 6, 0, 2)
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-11:	retl
-	 nop
-ENDPROC(aes_sparc64_ecb_encrypt_128)
-
-	.align		32
-ENTRY(aes_sparc64_ecb_encrypt_192)
-	/* %o0=key, %o1=input, %o2=output, %o3=len */
-	ldx		[%o0 + 0x00], %g1
-	subcc		%o3, 0x10, %o3
-	be		10f
-	 ldx		[%o0 + 0x08], %g2
-1:	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	ldx		[%o1 + 0x10], %o4
-	ldx		[%o1 + 0x18], %o5
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F4
-	MOVXTOD_G7_F6
-	xor		%g1, %o4, %g3
-	xor		%g2, %o5, %g7
-	MOVXTOD_G3_F60
-	MOVXTOD_G7_F62
-	ENCRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-	std		%f60, [%o2 + 0x10]
-	std		%f62, [%o2 + 0x18]
-	sub		%o3, 0x20, %o3
-	add		%o1, 0x20, %o1
-	brgz		%o3, 1b
-	 add		%o2, 0x20, %o2
-	brlz,pt		%o3, 11f
-	 nop
-10:	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F4
-	MOVXTOD_G7_F6
-	ENCRYPT_192(8, 4, 6, 0, 2)
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-11:	retl
-	 nop
-ENDPROC(aes_sparc64_ecb_encrypt_192)
-
-	.align		32
-ENTRY(aes_sparc64_ecb_encrypt_256)
-	/* %o0=key, %o1=input, %o2=output, %o3=len */
-	ldx		[%o0 + 0x00], %g1
-	subcc		%o3, 0x10, %o3
-	be		10f
-	 ldx		[%o0 + 0x08], %g2
-1:	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	ldx		[%o1 + 0x10], %o4
-	ldx		[%o1 + 0x18], %o5
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F4
-	MOVXTOD_G7_F6
-	xor		%g1, %o4, %g3
-	xor		%g2, %o5, %g7
-	MOVXTOD_G3_F0
-	MOVXTOD_G7_F2
-	ENCRYPT_256_2(8, 4, 6, 0, 2)
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-	std		%f0, [%o2 + 0x10]
-	std		%f2, [%o2 + 0x18]
-	sub		%o3, 0x20, %o3
-	add		%o1, 0x20, %o1
-	brgz		%o3, 1b
-	 add		%o2, 0x20, %o2
-	brlz,pt		%o3, 11f
-	 nop
-10:	ldd		[%o0 + 0xd0], %f56
-	ldd		[%o0 + 0xd8], %f58
-	ldd		[%o0 + 0xe0], %f60
-	ldd		[%o0 + 0xe8], %f62
-	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F4
-	MOVXTOD_G7_F6
-	ENCRYPT_256(8, 4, 6, 0, 2)
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-11:	retl
-	 nop
-ENDPROC(aes_sparc64_ecb_encrypt_256)
-
-	.align		32
-ENTRY(aes_sparc64_ecb_decrypt_128)
-	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
-	ldx		[%o0 - 0x10], %g1
-	subcc		%o3, 0x10, %o3
-	be		10f
-	 ldx		[%o0 - 0x08], %g2
-1:	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	ldx		[%o1 + 0x10], %o4
-	ldx		[%o1 + 0x18], %o5
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F4
-	MOVXTOD_G7_F6
-	xor		%g1, %o4, %g3
-	xor		%g2, %o5, %g7
-	MOVXTOD_G3_F60
-	MOVXTOD_G7_F62
-	DECRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-	std		%f60, [%o2 + 0x10]
-	std		%f62, [%o2 + 0x18]
-	sub		%o3, 0x20, %o3
-	add		%o1, 0x20, %o1
-	brgz,pt		%o3, 1b
-	 add		%o2, 0x20, %o2
-	brlz,pt		%o3, 11f
-	 nop
-10:	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F4
-	MOVXTOD_G7_F6
-	DECRYPT_128(8, 4, 6, 0, 2)
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-11:	retl
-	 nop
-ENDPROC(aes_sparc64_ecb_decrypt_128)
-
-	.align		32
-ENTRY(aes_sparc64_ecb_decrypt_192)
-	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
-	ldx		[%o0 - 0x10], %g1
-	subcc		%o3, 0x10, %o3
-	be		10f
-	 ldx		[%o0 - 0x08], %g2
-1:	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	ldx		[%o1 + 0x10], %o4
-	ldx		[%o1 + 0x18], %o5
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F4
-	MOVXTOD_G7_F6
-	xor		%g1, %o4, %g3
-	xor		%g2, %o5, %g7
-	MOVXTOD_G3_F60
-	MOVXTOD_G7_F62
-	DECRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-	std		%f60, [%o2 + 0x10]
-	std		%f62, [%o2 + 0x18]
-	sub		%o3, 0x20, %o3
-	add		%o1, 0x20, %o1
-	brgz,pt		%o3, 1b
-	 add		%o2, 0x20, %o2
-	brlz,pt		%o3, 11f
-	 nop
-10:	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F4
-	MOVXTOD_G7_F6
-	DECRYPT_192(8, 4, 6, 0, 2)
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-11:	retl
-	 nop
-ENDPROC(aes_sparc64_ecb_decrypt_192)
-
-	.align		32
-ENTRY(aes_sparc64_ecb_decrypt_256)
-	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
-	ldx		[%o0 - 0x10], %g1
-	subcc		%o3, 0x10, %o3
-	ldx		[%o0 - 0x08], %g2
-	be		10f
-	 sub		%o0, 0xf0, %o0
-1:	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	ldx		[%o1 + 0x10], %o4
-	ldx		[%o1 + 0x18], %o5
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F4
-	MOVXTOD_G7_F6
-	xor		%g1, %o4, %g3
-	xor		%g2, %o5, %g7
-	MOVXTOD_G3_F0
-	MOVXTOD_G7_F2
-	DECRYPT_256_2(8, 4, 6, 0, 2)
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-	std		%f0, [%o2 + 0x10]
-	std		%f2, [%o2 + 0x18]
-	sub		%o3, 0x20, %o3
-	add		%o1, 0x20, %o1
-	brgz,pt		%o3, 1b
-	 add		%o2, 0x20, %o2
-	brlz,pt		%o3, 11f
-	 nop
-10:	ldd		[%o0 + 0x18], %f56
-	ldd		[%o0 + 0x10], %f58
-	ldd		[%o0 + 0x08], %f60
-	ldd		[%o0 + 0x00], %f62
-	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F4
-	MOVXTOD_G7_F6
-	DECRYPT_256(8, 4, 6, 0, 2)
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-11:	retl
-	 nop
-ENDPROC(aes_sparc64_ecb_decrypt_256)
-
-	.align		32
-ENTRY(aes_sparc64_cbc_encrypt_128)
-	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
-	ldd		[%o4 + 0x00], %f4
-	ldd		[%o4 + 0x08], %f6
-	ldx		[%o0 + 0x00], %g1
-	ldx		[%o0 + 0x08], %g2
-1:	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	add		%o1, 0x10, %o1
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F0
-	MOVXTOD_G7_F2
-	fxor		%f4, %f0, %f4
-	fxor		%f6, %f2, %f6
-	ENCRYPT_128(8, 4, 6, 0, 2)
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-	subcc		%o3, 0x10, %o3
-	bne,pt		%xcc, 1b
-	 add		%o2, 0x10, %o2
-	std		%f4, [%o4 + 0x00]
-	std		%f6, [%o4 + 0x08]
-	retl
-	 nop
-ENDPROC(aes_sparc64_cbc_encrypt_128)
-
-	.align		32
-ENTRY(aes_sparc64_cbc_encrypt_192)
-	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
-	ldd		[%o4 + 0x00], %f4
-	ldd		[%o4 + 0x08], %f6
-	ldx		[%o0 + 0x00], %g1
-	ldx		[%o0 + 0x08], %g2
-1:	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	add		%o1, 0x10, %o1
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F0
-	MOVXTOD_G7_F2
-	fxor		%f4, %f0, %f4
-	fxor		%f6, %f2, %f6
-	ENCRYPT_192(8, 4, 6, 0, 2)
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-	subcc		%o3, 0x10, %o3
-	bne,pt		%xcc, 1b
-	 add		%o2, 0x10, %o2
-	std		%f4, [%o4 + 0x00]
-	std		%f6, [%o4 + 0x08]
-	retl
-	 nop
-ENDPROC(aes_sparc64_cbc_encrypt_192)
-
-	.align		32
-ENTRY(aes_sparc64_cbc_encrypt_256)
-	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
-	ldd		[%o4 + 0x00], %f4
-	ldd		[%o4 + 0x08], %f6
-	ldx		[%o0 + 0x00], %g1
-	ldx		[%o0 + 0x08], %g2
-1:	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	add		%o1, 0x10, %o1
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F0
-	MOVXTOD_G7_F2
-	fxor		%f4, %f0, %f4
-	fxor		%f6, %f2, %f6
-	ENCRYPT_256(8, 4, 6, 0, 2)
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-	subcc		%o3, 0x10, %o3
-	bne,pt		%xcc, 1b
-	 add		%o2, 0x10, %o2
-	std		%f4, [%o4 + 0x00]
-	std		%f6, [%o4 + 0x08]
-	retl
-	 nop
-ENDPROC(aes_sparc64_cbc_encrypt_256)
-
-	.align		32
-ENTRY(aes_sparc64_cbc_decrypt_128)
-	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
-	ldx		[%o0 - 0x10], %g1
-	ldx		[%o0 - 0x08], %g2
-	ldx		[%o4 + 0x00], %o0
-	ldx		[%o4 + 0x08], %o5
-1:	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	add		%o1, 0x10, %o1
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F4
-	MOVXTOD_G7_F6
-	DECRYPT_128(8, 4, 6, 0, 2)
-	MOVXTOD_O0_F0
-	MOVXTOD_O5_F2
-	xor		%g1, %g3, %o0
-	xor		%g2, %g7, %o5
-	fxor		%f4, %f0, %f4
-	fxor		%f6, %f2, %f6
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-	subcc		%o3, 0x10, %o3
-	bne,pt		%xcc, 1b
-	 add		%o2, 0x10, %o2
-	stx		%o0, [%o4 + 0x00]
-	stx		%o5, [%o4 + 0x08]
-	retl
-	 nop
-ENDPROC(aes_sparc64_cbc_decrypt_128)
-
-	.align		32
-ENTRY(aes_sparc64_cbc_decrypt_192)
-	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
-	ldx		[%o0 - 0x10], %g1
-	ldx		[%o0 - 0x08], %g2
-	ldx		[%o4 + 0x00], %o0
-	ldx		[%o4 + 0x08], %o5
-1:	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	add		%o1, 0x10, %o1
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F4
-	MOVXTOD_G7_F6
-	DECRYPT_192(8, 4, 6, 0, 2)
-	MOVXTOD_O0_F0
-	MOVXTOD_O5_F2
-	xor		%g1, %g3, %o0
-	xor		%g2, %g7, %o5
-	fxor		%f4, %f0, %f4
-	fxor		%f6, %f2, %f6
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-	subcc		%o3, 0x10, %o3
-	bne,pt		%xcc, 1b
-	 add		%o2, 0x10, %o2
-	stx		%o0, [%o4 + 0x00]
-	stx		%o5, [%o4 + 0x08]
-	retl
-	 nop
-ENDPROC(aes_sparc64_cbc_decrypt_192)
-
-	.align		32
-ENTRY(aes_sparc64_cbc_decrypt_256)
-	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
-	ldx		[%o0 - 0x10], %g1
-	ldx		[%o0 - 0x08], %g2
-	ldx		[%o4 + 0x00], %o0
-	ldx		[%o4 + 0x08], %o5
-1:	ldx		[%o1 + 0x00], %g3
-	ldx		[%o1 + 0x08], %g7
-	add		%o1, 0x10, %o1
-	xor		%g1, %g3, %g3
-	xor		%g2, %g7, %g7
-	MOVXTOD_G3_F4
-	MOVXTOD_G7_F6
-	DECRYPT_256(8, 4, 6, 0, 2)
-	MOVXTOD_O0_F0
-	MOVXTOD_O5_F2
-	xor		%g1, %g3, %o0
-	xor		%g2, %g7, %o5
-	fxor		%f4, %f0, %f4
-	fxor		%f6, %f2, %f6
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-	subcc		%o3, 0x10, %o3
-	bne,pt		%xcc, 1b
-	 add		%o2, 0x10, %o2
-	stx		%o0, [%o4 + 0x00]
-	stx		%o5, [%o4 + 0x08]
-	retl
-	 nop
-ENDPROC(aes_sparc64_cbc_decrypt_256)
-
-	.align		32
-ENTRY(aes_sparc64_ctr_crypt_128)
-	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
-	ldx		[%o4 + 0x00], %g3
-	ldx		[%o4 + 0x08], %g7
-	subcc		%o3, 0x10, %o3
-	ldx		[%o0 + 0x00], %g1
-	be		10f
-	 ldx		[%o0 + 0x08], %g2
-1:	xor		%g1, %g3, %o5
-	MOVXTOD_O5_F0
-	xor		%g2, %g7, %o5
-	MOVXTOD_O5_F2
-	add		%g7, 1, %g7
-	add		%g3, 1, %o5
-	movrz		%g7, %o5, %g3
-	xor		%g1, %g3, %o5
-	MOVXTOD_O5_F4
-	xor		%g2, %g7, %o5
-	MOVXTOD_O5_F6
-	add		%g7, 1, %g7
-	add		%g3, 1, %o5
-	movrz		%g7, %o5, %g3
-	ENCRYPT_128_2(8, 0, 2, 4, 6, 56, 58, 60, 62)
-	ldd		[%o1 + 0x00], %f56
-	ldd		[%o1 + 0x08], %f58
-	ldd		[%o1 + 0x10], %f60
-	ldd		[%o1 + 0x18], %f62
-	fxor		%f56, %f0, %f56
-	fxor		%f58, %f2, %f58
-	fxor		%f60, %f4, %f60
-	fxor		%f62, %f6, %f62
-	std		%f56, [%o2 + 0x00]
-	std		%f58, [%o2 + 0x08]
-	std		%f60, [%o2 + 0x10]
-	std		%f62, [%o2 + 0x18]
-	subcc		%o3, 0x20, %o3
-	add		%o1, 0x20, %o1
-	brgz		%o3, 1b
-	 add		%o2, 0x20, %o2
-	brlz,pt		%o3, 11f
-	 nop
-10:	xor		%g1, %g3, %o5
-	MOVXTOD_O5_F0
-	xor		%g2, %g7, %o5
-	MOVXTOD_O5_F2
-	add		%g7, 1, %g7
-	add		%g3, 1, %o5
-	movrz		%g7, %o5, %g3
-	ENCRYPT_128(8, 0, 2, 4, 6)
-	ldd		[%o1 + 0x00], %f4
-	ldd		[%o1 + 0x08], %f6
-	fxor		%f4, %f0, %f4
-	fxor		%f6, %f2, %f6
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-11:	stx		%g3, [%o4 + 0x00]
-	retl
-	 stx		%g7, [%o4 + 0x08]
-ENDPROC(aes_sparc64_ctr_crypt_128)
-
-	.align		32
-ENTRY(aes_sparc64_ctr_crypt_192)
-	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
-	ldx		[%o4 + 0x00], %g3
-	ldx		[%o4 + 0x08], %g7
-	subcc		%o3, 0x10, %o3
-	ldx		[%o0 + 0x00], %g1
-	be		10f
-	 ldx		[%o0 + 0x08], %g2
-1:	xor		%g1, %g3, %o5
-	MOVXTOD_O5_F0
-	xor		%g2, %g7, %o5
-	MOVXTOD_O5_F2
-	add		%g7, 1, %g7
-	add		%g3, 1, %o5
-	movrz		%g7, %o5, %g3
-	xor		%g1, %g3, %o5
-	MOVXTOD_O5_F4
-	xor		%g2, %g7, %o5
-	MOVXTOD_O5_F6
-	add		%g7, 1, %g7
-	add		%g3, 1, %o5
-	movrz		%g7, %o5, %g3
-	ENCRYPT_192_2(8, 0, 2, 4, 6, 56, 58, 60, 62)
-	ldd		[%o1 + 0x00], %f56
-	ldd		[%o1 + 0x08], %f58
-	ldd		[%o1 + 0x10], %f60
-	ldd		[%o1 + 0x18], %f62
-	fxor		%f56, %f0, %f56
-	fxor		%f58, %f2, %f58
-	fxor		%f60, %f4, %f60
-	fxor		%f62, %f6, %f62
-	std		%f56, [%o2 + 0x00]
-	std		%f58, [%o2 + 0x08]
-	std		%f60, [%o2 + 0x10]
-	std		%f62, [%o2 + 0x18]
-	subcc		%o3, 0x20, %o3
-	add		%o1, 0x20, %o1
-	brgz		%o3, 1b
-	 add		%o2, 0x20, %o2
-	brlz,pt		%o3, 11f
-	 nop
-10:	xor		%g1, %g3, %o5
-	MOVXTOD_O5_F0
-	xor		%g2, %g7, %o5
-	MOVXTOD_O5_F2
-	add		%g7, 1, %g7
-	add		%g3, 1, %o5
-	movrz		%g7, %o5, %g3
-	ENCRYPT_192(8, 0, 2, 4, 6)
-	ldd		[%o1 + 0x00], %f4
-	ldd		[%o1 + 0x08], %f6
-	fxor		%f4, %f0, %f4
-	fxor		%f6, %f2, %f6
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-11:	stx		%g3, [%o4 + 0x00]
-	retl
-	 stx		%g7, [%o4 + 0x08]
-ENDPROC(aes_sparc64_ctr_crypt_192)
-
-	.align		32
-ENTRY(aes_sparc64_ctr_crypt_256)
-	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
-	ldx		[%o4 + 0x00], %g3
-	ldx		[%o4 + 0x08], %g7
-	subcc		%o3, 0x10, %o3
-	ldx		[%o0 + 0x00], %g1
-	be		10f
-	 ldx		[%o0 + 0x08], %g2
-1:	xor		%g1, %g3, %o5
-	MOVXTOD_O5_F0
-	xor		%g2, %g7, %o5
-	MOVXTOD_O5_F2
-	add		%g7, 1, %g7
-	add		%g3, 1, %o5
-	movrz		%g7, %o5, %g3
-	xor		%g1, %g3, %o5
-	MOVXTOD_O5_F4
-	xor		%g2, %g7, %o5
-	MOVXTOD_O5_F6
-	add		%g7, 1, %g7
-	add		%g3, 1, %o5
-	movrz		%g7, %o5, %g3
-	ENCRYPT_256_2(8, 0, 2, 4, 6)
-	ldd		[%o1 + 0x00], %f56
-	ldd		[%o1 + 0x08], %f58
-	ldd		[%o1 + 0x10], %f60
-	ldd		[%o1 + 0x18], %f62
-	fxor		%f56, %f0, %f56
-	fxor		%f58, %f2, %f58
-	fxor		%f60, %f4, %f60
-	fxor		%f62, %f6, %f62
-	std		%f56, [%o2 + 0x00]
-	std		%f58, [%o2 + 0x08]
-	std		%f60, [%o2 + 0x10]
-	std		%f62, [%o2 + 0x18]
-	subcc		%o3, 0x20, %o3
-	add		%o1, 0x20, %o1
-	brgz		%o3, 1b
-	 add		%o2, 0x20, %o2
-	brlz,pt		%o3, 11f
-	 nop
-10:	ldd		[%o0 + 0xd0], %f56
-	ldd		[%o0 + 0xd8], %f58
-	ldd		[%o0 + 0xe0], %f60
-	ldd		[%o0 + 0xe8], %f62
-	xor		%g1, %g3, %o5
-	MOVXTOD_O5_F0
-	xor		%g2, %g7, %o5
-	MOVXTOD_O5_F2
-	add		%g7, 1, %g7
-	add		%g3, 1, %o5
-	movrz		%g7, %o5, %g3
-	ENCRYPT_256(8, 0, 2, 4, 6)
-	ldd		[%o1 + 0x00], %f4
-	ldd		[%o1 + 0x08], %f6
-	fxor		%f4, %f0, %f4
-	fxor		%f6, %f2, %f6
-	std		%f4, [%o2 + 0x00]
-	std		%f6, [%o2 + 0x08]
-11:	stx		%g3, [%o4 + 0x00]
-	retl
-	 stx		%g7, [%o4 + 0x08]
-ENDPROC(aes_sparc64_ctr_crypt_256)
diff --git a/arch/sparc/crypto/camellia_asm.S b/arch/sparc/crypto/camellia_asm.S
deleted file mode 100644
index dcdc9193fcd721cc298b33d89e524d0c86399b65..0000000000000000000000000000000000000000
--- a/arch/sparc/crypto/camellia_asm.S
+++ /dev/null
@@ -1,564 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/visasm.h>
-
-#include "opcodes.h"
-
-#define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
-	CAMELLIA_F(KEY_BASE +  0, I1, I0, I1) \
-	CAMELLIA_F(KEY_BASE +  2, I0, I1, I0) \
-	CAMELLIA_F(KEY_BASE +  4, I1, I0, I1) \
-	CAMELLIA_F(KEY_BASE +  6, I0, I1, I0) \
-	CAMELLIA_F(KEY_BASE +  8, I1, I0, I1) \
-	CAMELLIA_F(KEY_BASE + 10, I0, I1, I0)
-
-#define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \
-	CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
-	CAMELLIA_FL(KEY_BASE + 12, I0, I0) \
-	CAMELLIA_FLI(KEY_BASE + 14, I1, I1)
-
-	.data
-
-	.align	8
-SIGMA:	.xword	0xA09E667F3BCC908B
-	.xword	0xB67AE8584CAA73B2
-	.xword	0xC6EF372FE94F82BE
-	.xword	0x54FF53A5F1D36F1C
-	.xword	0x10E527FADE682D1D
-	.xword	0xB05688C2B3E6C1FD
-
-	.text
-
-	.align	32
-ENTRY(camellia_sparc64_key_expand)
-	/* %o0=in_key, %o1=encrypt_key, %o2=key_len, %o3=decrypt_key */
-	VISEntry
-	ld	[%o0 + 0x00], %f0	! i0, k[0]
-	ld	[%o0 + 0x04], %f1	! i1, k[1]
-	ld	[%o0 + 0x08], %f2	! i2, k[2]
-	ld	[%o0 + 0x0c], %f3	! i3, k[3]
-	std	%f0, [%o1 + 0x00]	! k[0, 1]
-	fsrc2	%f0, %f28
-	std	%f2, [%o1 + 0x08]	! k[2, 3]
-	cmp	%o2, 16
-	be	10f
-	 fsrc2	%f2, %f30
-
-	ld	[%o0 + 0x10], %f0
-	ld	[%o0 + 0x14], %f1
-	std	%f0, [%o1 + 0x20]	! k[8, 9]
-	cmp	%o2, 24
-	fone	%f10
-	be,a	1f
-	 fxor	%f10, %f0, %f2
-	ld	[%o0 + 0x18], %f2
-	ld	[%o0 + 0x1c], %f3
-1:
-	std	%f2, [%o1 + 0x28]	! k[10, 11]
-	fxor	%f28, %f0, %f0
-	fxor	%f30, %f2, %f2
-
-10:
-	sethi	%hi(SIGMA), %g3
-	or	%g3, %lo(SIGMA), %g3
-	ldd	[%g3 + 0x00], %f16
-	ldd	[%g3 + 0x08], %f18
-	ldd	[%g3 + 0x10], %f20
-	ldd	[%g3 + 0x18], %f22
-	ldd	[%g3 + 0x20], %f24
-	ldd	[%g3 + 0x28], %f26
-	CAMELLIA_F(16, 2, 0, 2)
-	CAMELLIA_F(18, 0, 2, 0)
-	fxor	%f28, %f0, %f0
-	fxor	%f30, %f2, %f2
-	CAMELLIA_F(20, 2, 0, 2)
-	CAMELLIA_F(22, 0, 2, 0)
-
-#define ROTL128(S01, S23, TMP1, TMP2, N)	\
-	srlx	S01, (64 - N), TMP1;		\
-	sllx	S01, N, S01;			\
-	srlx	S23, (64 - N), TMP2;		\
-	sllx	S23, N, S23;			\
-	or	S01, TMP2, S01;			\
-	or	S23, TMP1, S23
-
-	cmp	%o2, 16
-	bne	1f
-	 nop
-	/* 128-bit key */
-	std	%f0, [%o1 + 0x10]	! k[ 4,  5]
-	std	%f2, [%o1 + 0x18]	! k[ 6,  7]
-	MOVDTOX_F0_O4
-	MOVDTOX_F2_O5
-	ROTL128(%o4, %o5, %g2, %g3, 15)
-	stx	%o4, [%o1 + 0x30]	! k[12, 13]
-	stx	%o5, [%o1 + 0x38]	! k[14, 15]
-	ROTL128(%o4, %o5, %g2, %g3, 15)
-	stx	%o4, [%o1 + 0x40]	! k[16, 17]
-	stx	%o5, [%o1 + 0x48]	! k[18, 19]
-	ROTL128(%o4, %o5, %g2, %g3, 15)
-	stx	%o4, [%o1 + 0x60]	! k[24, 25]
-	ROTL128(%o4, %o5, %g2, %g3, 15)
-	stx	%o4, [%o1 + 0x70]	! k[28, 29]
-	stx	%o5, [%o1 + 0x78]	! k[30, 31]
-	ROTL128(%o4, %o5, %g2, %g3, 34)
-	stx	%o4, [%o1 + 0xa0]	! k[40, 41]
-	stx	%o5, [%o1 + 0xa8]	! k[42, 43]
-	ROTL128(%o4, %o5, %g2, %g3, 17)
-	stx	%o4, [%o1 + 0xc0]	! k[48, 49]
-	stx	%o5, [%o1 + 0xc8]	! k[50, 51]
-
-	ldx	[%o1 + 0x00], %o4	! k[ 0,  1]
-	ldx	[%o1 + 0x08], %o5	! k[ 2,  3]
-	ROTL128(%o4, %o5, %g2, %g3, 15)
-	stx	%o4, [%o1 + 0x20]	! k[ 8,  9]
-	stx	%o5, [%o1 + 0x28]	! k[10, 11]
-	ROTL128(%o4, %o5, %g2, %g3, 30)
-	stx	%o4, [%o1 + 0x50]	! k[20, 21]
-	stx	%o5, [%o1 + 0x58]	! k[22, 23]
-	ROTL128(%o4, %o5, %g2, %g3, 15)
-	stx	%o5, [%o1 + 0x68]	! k[26, 27]
-	ROTL128(%o4, %o5, %g2, %g3, 17)
-	stx	%o4, [%o1 + 0x80]	! k[32, 33]
-	stx	%o5, [%o1 + 0x88]	! k[34, 35]
-	ROTL128(%o4, %o5, %g2, %g3, 17)
-	stx	%o4, [%o1 + 0x90]	! k[36, 37]
-	stx	%o5, [%o1 + 0x98]	! k[38, 39]
-	ROTL128(%o4, %o5, %g2, %g3, 17)
-	stx	%o4, [%o1 + 0xb0]	! k[44, 45]
-	stx	%o5, [%o1 + 0xb8]	! k[46, 47]
-
-	ba,pt	%xcc, 2f
-	 mov	(3 * 16 * 4), %o0
-
-1:
-	/* 192-bit or 256-bit key */
-	std	%f0, [%o1 + 0x30]	! k[12, 13]
-	std	%f2, [%o1 + 0x38]	! k[14, 15]
-	ldd	[%o1 + 0x20], %f4	! k[ 8,  9]
-	ldd	[%o1 + 0x28], %f6	! k[10, 11]
-	fxor	%f0, %f4, %f0
-	fxor	%f2, %f6, %f2
-	CAMELLIA_F(24, 2, 0, 2)
-	CAMELLIA_F(26, 0, 2, 0)
-	std	%f0, [%o1 + 0x10]	! k[ 4,  5]
-	std	%f2, [%o1 + 0x18]	! k[ 6,  7]
-	MOVDTOX_F0_O4
-	MOVDTOX_F2_O5
-	ROTL128(%o4, %o5, %g2, %g3, 30)
-	stx	%o4, [%o1 + 0x50]	! k[20, 21]
-	stx	%o5, [%o1 + 0x58]	! k[22, 23]
-	ROTL128(%o4, %o5, %g2, %g3, 30)
-	stx	%o4, [%o1 + 0xa0]	! k[40, 41]
-	stx	%o5, [%o1 + 0xa8]	! k[42, 43]
-	ROTL128(%o4, %o5, %g2, %g3, 51)
-	stx	%o4, [%o1 + 0x100]	! k[64, 65]
-	stx	%o5, [%o1 + 0x108]	! k[66, 67]
-	ldx	[%o1 + 0x20], %o4	! k[ 8,  9]
-	ldx	[%o1 + 0x28], %o5	! k[10, 11]
-	ROTL128(%o4, %o5, %g2, %g3, 15)
-	stx	%o4, [%o1 + 0x20]	! k[ 8,  9]
-	stx	%o5, [%o1 + 0x28]	! k[10, 11]
-	ROTL128(%o4, %o5, %g2, %g3, 15)
-	stx	%o4, [%o1 + 0x40]	! k[16, 17]
-	stx	%o5, [%o1 + 0x48]	! k[18, 19]
-	ROTL128(%o4, %o5, %g2, %g3, 30)
-	stx	%o4, [%o1 + 0x90]	! k[36, 37]
-	stx	%o5, [%o1 + 0x98]	! k[38, 39]
-	ROTL128(%o4, %o5, %g2, %g3, 34)
-	stx	%o4, [%o1 + 0xd0]	! k[52, 53]
-	stx	%o5, [%o1 + 0xd8]	! k[54, 55]
-	ldx	[%o1 + 0x30], %o4	! k[12, 13]
-	ldx	[%o1 + 0x38], %o5	! k[14, 15]
-	ROTL128(%o4, %o5, %g2, %g3, 15)
-	stx	%o4, [%o1 + 0x30]	! k[12, 13]
-	stx	%o5, [%o1 + 0x38]	! k[14, 15]
-	ROTL128(%o4, %o5, %g2, %g3, 30)
-	stx	%o4, [%o1 + 0x70]	! k[28, 29]
-	stx	%o5, [%o1 + 0x78]	! k[30, 31]
-	srlx	%o4, 32, %g2
-	srlx	%o5, 32, %g3
-	stw	%o4, [%o1 + 0xc0]	! k[48]
-	stw	%g3, [%o1 + 0xc4]	! k[49]
-	stw	%o5, [%o1 + 0xc8]	! k[50]
-	stw	%g2, [%o1 + 0xcc]	! k[51]
-	ROTL128(%o4, %o5, %g2, %g3, 49)
-	stx	%o4, [%o1 + 0xe0]	! k[56, 57]
-	stx	%o5, [%o1 + 0xe8]	! k[58, 59]
-	ldx	[%o1 + 0x00], %o4	! k[ 0,  1]
-	ldx	[%o1 + 0x08], %o5	! k[ 2,  3]
-	ROTL128(%o4, %o5, %g2, %g3, 45)
-	stx	%o4, [%o1 + 0x60]	! k[24, 25]
-	stx	%o5, [%o1 + 0x68]	! k[26, 27]
-	ROTL128(%o4, %o5, %g2, %g3, 15)
-	stx	%o4, [%o1 + 0x80]	! k[32, 33]
-	stx	%o5, [%o1 + 0x88]	! k[34, 35]
-	ROTL128(%o4, %o5, %g2, %g3, 17)
-	stx	%o4, [%o1 + 0xb0]	! k[44, 45]
-	stx	%o5, [%o1 + 0xb8]	! k[46, 47]
-	ROTL128(%o4, %o5, %g2, %g3, 34)
-	stx	%o4, [%o1 + 0xf0]	! k[60, 61]
-	stx	%o5, [%o1 + 0xf8]	! k[62, 63]
-	mov	(4 * 16 * 4), %o0
-2:
-	add	%o1, %o0, %o1
-	ldd	[%o1 + 0x00], %f0
-	ldd	[%o1 + 0x08], %f2
-	std	%f0, [%o3 + 0x00]
-	std	%f2, [%o3 + 0x08]
-	add	%o3, 0x10, %o3
-1:
-	sub	%o1, (16 * 4), %o1
-	ldd	[%o1 + 0x38], %f0
-	ldd	[%o1 + 0x30], %f2
-	ldd	[%o1 + 0x28], %f4
-	ldd	[%o1 + 0x20], %f6
-	ldd	[%o1 + 0x18], %f8
-	ldd	[%o1 + 0x10], %f10
-	std	%f0, [%o3 + 0x00]
-	std	%f2, [%o3 + 0x08]
-	std	%f4, [%o3 + 0x10]
-	std	%f6, [%o3 + 0x18]
-	std	%f8, [%o3 + 0x20]
-	std	%f10, [%o3 + 0x28]
-
-	ldd	[%o1 + 0x08], %f0
-	ldd	[%o1 + 0x00], %f2
-	std	%f0, [%o3 + 0x30]
-	std	%f2, [%o3 + 0x38]
-	subcc	%o0, (16 * 4), %o0
-	bne,pt	%icc, 1b
-	 add	%o3, (16 * 4), %o3
-
-	std	%f2, [%o3 - 0x10]
-	std	%f0, [%o3 - 0x08]
-
-	retl
-	 VISExit
-ENDPROC(camellia_sparc64_key_expand)
-
-	.align	32
-ENTRY(camellia_sparc64_crypt)
-	/* %o0=key, %o1=input, %o2=output, %o3=key_len */
-	VISEntry
-
-	ld	[%o1 + 0x00], %f0
-	ld	[%o1 + 0x04], %f1
-	ld	[%o1 + 0x08], %f2
-	ld	[%o1 + 0x0c], %f3
-
-	ldd	[%o0 + 0x00], %f4
-	ldd	[%o0 + 0x08], %f6
-
-	cmp	%o3, 16
-	fxor	%f4, %f0, %f0
-	be	1f
-	 fxor	%f6, %f2, %f2
-
-	ldd	[%o0 + 0x10], %f8
-	ldd	[%o0 + 0x18], %f10
-	ldd	[%o0 + 0x20], %f12
-	ldd	[%o0 + 0x28], %f14
-	ldd	[%o0 + 0x30], %f16
-	ldd	[%o0 + 0x38], %f18
-	ldd	[%o0 + 0x40], %f20
-	ldd	[%o0 + 0x48], %f22
-	add	%o0, 0x40, %o0
-
-	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
-
-1:
-	ldd	[%o0 + 0x10], %f8
-	ldd	[%o0 + 0x18], %f10
-	ldd	[%o0 + 0x20], %f12
-	ldd	[%o0 + 0x28], %f14
-	ldd	[%o0 + 0x30], %f16
-	ldd	[%o0 + 0x38], %f18
-	ldd	[%o0 + 0x40], %f20
-	ldd	[%o0 + 0x48], %f22
-	ldd	[%o0 + 0x50], %f24
-	ldd	[%o0 + 0x58], %f26
-	ldd	[%o0 + 0x60], %f28
-	ldd	[%o0 + 0x68], %f30
-	ldd	[%o0 + 0x70], %f32
-	ldd	[%o0 + 0x78], %f34
-	ldd	[%o0 + 0x80], %f36
-	ldd	[%o0 + 0x88], %f38
-	ldd	[%o0 + 0x90], %f40
-	ldd	[%o0 + 0x98], %f42
-	ldd	[%o0 + 0xa0], %f44
-	ldd	[%o0 + 0xa8], %f46
-	ldd	[%o0 + 0xb0], %f48
-	ldd	[%o0 + 0xb8], %f50
-	ldd	[%o0 + 0xc0], %f52
-	ldd	[%o0 + 0xc8], %f54
-
-	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
-	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
-	CAMELLIA_6ROUNDS(40, 0, 2)
-	fxor	%f52, %f2, %f2
-	fxor	%f54, %f0, %f0
-
-	st	%f2, [%o2 + 0x00]
-	st	%f3, [%o2 + 0x04]
-	st	%f0, [%o2 + 0x08]
-	st	%f1, [%o2 + 0x0c]
-
-	retl
-	 VISExit
-ENDPROC(camellia_sparc64_crypt)
-
-	.align	32
-ENTRY(camellia_sparc64_load_keys)
-	/* %o0=key, %o1=key_len */
-	VISEntry
-	ldd	[%o0 + 0x00], %f4
-	ldd	[%o0 + 0x08], %f6
-	ldd	[%o0 + 0x10], %f8
-	ldd	[%o0 + 0x18], %f10
-	ldd	[%o0 + 0x20], %f12
-	ldd	[%o0 + 0x28], %f14
-	ldd	[%o0 + 0x30], %f16
-	ldd	[%o0 + 0x38], %f18
-	ldd	[%o0 + 0x40], %f20
-	ldd	[%o0 + 0x48], %f22
-	ldd	[%o0 + 0x50], %f24
-	ldd	[%o0 + 0x58], %f26
-	ldd	[%o0 + 0x60], %f28
-	ldd	[%o0 + 0x68], %f30
-	ldd	[%o0 + 0x70], %f32
-	ldd	[%o0 + 0x78], %f34
-	ldd	[%o0 + 0x80], %f36
-	ldd	[%o0 + 0x88], %f38
-	ldd	[%o0 + 0x90], %f40
-	ldd	[%o0 + 0x98], %f42
-	ldd	[%o0 + 0xa0], %f44
-	ldd	[%o0 + 0xa8], %f46
-	ldd	[%o0 + 0xb0], %f48
-	ldd	[%o0 + 0xb8], %f50
-	ldd	[%o0 + 0xc0], %f52
-	retl
-	 ldd	[%o0 + 0xc8], %f54
-ENDPROC(camellia_sparc64_load_keys)
-
-	.align	32
-ENTRY(camellia_sparc64_ecb_crypt_3_grand_rounds)
-	/* %o0=input, %o1=output, %o2=len, %o3=key */
-1:	ldd	[%o0 + 0x00], %f0
-	ldd	[%o0 + 0x08], %f2
-	add	%o0, 0x10, %o0
-	fxor	%f4, %f0, %f0
-	fxor	%f6, %f2, %f2
-	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
-	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
-	CAMELLIA_6ROUNDS(40, 0, 2)
-	fxor	%f52, %f2, %f2
-	fxor	%f54, %f0, %f0
-	std	%f2, [%o1 + 0x00]
-	std	%f0, [%o1 + 0x08]
-	subcc	%o2, 0x10, %o2
-	bne,pt	%icc, 1b
-	 add	%o1, 0x10, %o1
-	retl
-	 nop
-ENDPROC(camellia_sparc64_ecb_crypt_3_grand_rounds)
-
-	.align	32
-ENTRY(camellia_sparc64_ecb_crypt_4_grand_rounds)
-	/* %o0=input, %o1=output, %o2=len, %o3=key */
-1:	ldd	[%o0 + 0x00], %f0
-	ldd	[%o0 + 0x08], %f2
-	add	%o0, 0x10, %o0
-	fxor	%f4, %f0, %f0
-	fxor	%f6, %f2, %f2
-	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
-	ldd	[%o3 + 0xd0], %f8
-	ldd	[%o3 + 0xd8], %f10
-	ldd	[%o3 + 0xe0], %f12
-	ldd	[%o3 + 0xe8], %f14
-	ldd	[%o3 + 0xf0], %f16
-	ldd	[%o3 + 0xf8], %f18
-	ldd	[%o3 + 0x100], %f20
-	ldd	[%o3 + 0x108], %f22
-	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
-	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
-	CAMELLIA_F(8, 2, 0, 2)
-	CAMELLIA_F(10, 0, 2, 0)
-	ldd	[%o3 + 0x10], %f8
-	ldd	[%o3 + 0x18], %f10
-	CAMELLIA_F(12, 2, 0, 2)
-	CAMELLIA_F(14, 0, 2, 0)
-	ldd	[%o3 + 0x20], %f12
-	ldd	[%o3 + 0x28], %f14
-	CAMELLIA_F(16, 2, 0, 2)
-	CAMELLIA_F(18, 0, 2, 0)
-	ldd	[%o3 + 0x30], %f16
-	ldd	[%o3 + 0x38], %f18
-	fxor	%f20, %f2, %f2
-	fxor	%f22, %f0, %f0
-	ldd	[%o3 + 0x40], %f20
-	ldd	[%o3 + 0x48], %f22
-	std	%f2, [%o1 + 0x00]
-	std	%f0, [%o1 + 0x08]
-	subcc	%o2, 0x10, %o2
-	bne,pt	%icc, 1b
-	 add	%o1, 0x10, %o1
-	retl
-	 nop
-ENDPROC(camellia_sparc64_ecb_crypt_4_grand_rounds)
-
-	.align	32
-ENTRY(camellia_sparc64_cbc_encrypt_3_grand_rounds)
-	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
-	ldd	[%o4 + 0x00], %f60
-	ldd	[%o4 + 0x08], %f62
-1:	ldd	[%o0 + 0x00], %f0
-	ldd	[%o0 + 0x08], %f2
-	add	%o0, 0x10, %o0
-	fxor	%f60, %f0, %f0
-	fxor	%f62, %f2, %f2
-	fxor	%f4, %f0, %f0
-	fxor	%f6, %f2, %f2
-	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
-	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
-	CAMELLIA_6ROUNDS(40, 0, 2)
-	fxor	%f52, %f2, %f60
-	fxor	%f54, %f0, %f62
-	std	%f60, [%o1 + 0x00]
-	std	%f62, [%o1 + 0x08]
-	subcc	%o2, 0x10, %o2
-	bne,pt	%icc, 1b
-	 add	%o1, 0x10, %o1
-	std	%f60, [%o4 + 0x00]
-	retl
-	 std	%f62, [%o4 + 0x08]
-ENDPROC(camellia_sparc64_cbc_encrypt_3_grand_rounds)
-
-	.align	32
-ENTRY(camellia_sparc64_cbc_encrypt_4_grand_rounds)
-	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
-	ldd	[%o4 + 0x00], %f60
-	ldd	[%o4 + 0x08], %f62
-1:	ldd	[%o0 + 0x00], %f0
-	ldd	[%o0 + 0x08], %f2
-	add	%o0, 0x10, %o0
-	fxor	%f60, %f0, %f0
-	fxor	%f62, %f2, %f2
-	fxor	%f4, %f0, %f0
-	fxor	%f6, %f2, %f2
-	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
-	ldd	[%o3 + 0xd0], %f8
-	ldd	[%o3 + 0xd8], %f10
-	ldd	[%o3 + 0xe0], %f12
-	ldd	[%o3 + 0xe8], %f14
-	ldd	[%o3 + 0xf0], %f16
-	ldd	[%o3 + 0xf8], %f18
-	ldd	[%o3 + 0x100], %f20
-	ldd	[%o3 + 0x108], %f22
-	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
-	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
-	CAMELLIA_F(8, 2, 0, 2)
-	CAMELLIA_F(10, 0, 2, 0)
-	ldd	[%o3 + 0x10], %f8
-	ldd	[%o3 + 0x18], %f10
-	CAMELLIA_F(12, 2, 0, 2)
-	CAMELLIA_F(14, 0, 2, 0)
-	ldd	[%o3 + 0x20], %f12
-	ldd	[%o3 + 0x28], %f14
-	CAMELLIA_F(16, 2, 0, 2)
-	CAMELLIA_F(18, 0, 2, 0)
-	ldd	[%o3 + 0x30], %f16
-	ldd	[%o3 + 0x38], %f18
-	fxor	%f20, %f2, %f60
-	fxor	%f22, %f0, %f62
-	ldd	[%o3 + 0x40], %f20
-	ldd	[%o3 + 0x48], %f22
-	std	%f60, [%o1 + 0x00]
-	std	%f62, [%o1 + 0x08]
-	subcc	%o2, 0x10, %o2
-	bne,pt	%icc, 1b
-	 add	%o1, 0x10, %o1
-	std	%f60, [%o4 + 0x00]
-	retl
-	 std	%f62, [%o4 + 0x08]
-ENDPROC(camellia_sparc64_cbc_encrypt_4_grand_rounds)
-
-	.align	32
-ENTRY(camellia_sparc64_cbc_decrypt_3_grand_rounds)
-	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
-	ldd	[%o4 + 0x00], %f60
-	ldd	[%o4 + 0x08], %f62
-1:	ldd	[%o0 + 0x00], %f56
-	ldd	[%o0 + 0x08], %f58
-	add	%o0, 0x10, %o0
-	fxor	%f4, %f56, %f0
-	fxor	%f6, %f58, %f2
-	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
-	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
-	CAMELLIA_6ROUNDS(40, 0, 2)
-	fxor	%f52, %f2, %f2
-	fxor	%f54, %f0, %f0
-	fxor	%f60, %f2, %f2
-	fxor	%f62, %f0, %f0
-	fsrc2	%f56, %f60
-	fsrc2	%f58, %f62
-	std	%f2, [%o1 + 0x00]
-	std	%f0, [%o1 + 0x08]
-	subcc	%o2, 0x10, %o2
-	bne,pt	%icc, 1b
-	 add	%o1, 0x10, %o1
-	std	%f60, [%o4 + 0x00]
-	retl
-	 std	%f62, [%o4 + 0x08]
-ENDPROC(camellia_sparc64_cbc_decrypt_3_grand_rounds)
-
-	.align	32
-ENTRY(camellia_sparc64_cbc_decrypt_4_grand_rounds)
-	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
-	ldd	[%o4 + 0x00], %f60
-	ldd	[%o4 + 0x08], %f62
-1:	ldd	[%o0 + 0x00], %f56
-	ldd	[%o0 + 0x08], %f58
-	add	%o0, 0x10, %o0
-	fxor	%f4, %f56, %f0
-	fxor	%f6, %f58, %f2
-	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
-	ldd	[%o3 + 0xd0], %f8
-	ldd	[%o3 + 0xd8], %f10
-	ldd	[%o3 + 0xe0], %f12
-	ldd	[%o3 + 0xe8], %f14
-	ldd	[%o3 + 0xf0], %f16
-	ldd	[%o3 + 0xf8], %f18
-	ldd	[%o3 + 0x100], %f20
-	ldd	[%o3 + 0x108], %f22
-	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
-	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
-	CAMELLIA_F(8, 2, 0, 2)
-	CAMELLIA_F(10, 0, 2, 0)
-	ldd	[%o3 + 0x10], %f8
-	ldd	[%o3 + 0x18], %f10
-	CAMELLIA_F(12, 2, 0, 2)
-	CAMELLIA_F(14, 0, 2, 0)
-	ldd	[%o3 + 0x20], %f12
-	ldd	[%o3 + 0x28], %f14
-	CAMELLIA_F(16, 2, 0, 2)
-	CAMELLIA_F(18, 0, 2, 0)
-	ldd	[%o3 + 0x30], %f16
-	ldd	[%o3 + 0x38], %f18
-	fxor	%f20, %f2, %f2
-	fxor	%f22, %f0, %f0
-	ldd	[%o3 + 0x40], %f20
-	ldd	[%o3 + 0x48], %f22
-	fxor	%f60, %f2, %f2
-	fxor	%f62, %f0, %f0
-	fsrc2	%f56, %f60
-	fsrc2	%f58, %f62
-	std	%f2, [%o1 + 0x00]
-	std	%f0, [%o1 + 0x08]
-	subcc	%o2, 0x10, %o2
-	bne,pt	%icc, 1b
-	 add	%o1, 0x10, %o1
-	std	%f60, [%o4 + 0x00]
-	retl
-	 std	%f62, [%o4 + 0x08]
-ENDPROC(camellia_sparc64_cbc_decrypt_4_grand_rounds)
diff --git a/arch/sparc/crypto/crc32c_asm.S b/arch/sparc/crypto/crc32c_asm.S
deleted file mode 100644
index b8659a479242dadb26bfaa1665abf9b927c86a81..0000000000000000000000000000000000000000
--- a/arch/sparc/crypto/crc32c_asm.S
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/visasm.h>
-#include <asm/asi.h>
-
-#include "opcodes.h"
-
-ENTRY(crc32c_sparc64)
-	/* %o0=crc32p, %o1=data_ptr, %o2=len */
-	VISEntryHalf
-	lda	[%o0] ASI_PL, %f1
-1:	ldd	[%o1], %f2
-	CRC32C(0,2,0)
-	subcc	%o2, 8, %o2
-	bne,pt	%icc, 1b
-	 add	%o1, 0x8, %o1
-	sta	%f1, [%o0] ASI_PL
-	VISExitHalf
-2:	retl
-	 nop
-ENDPROC(crc32c_sparc64)
diff --git a/arch/sparc/crypto/des_asm.S b/arch/sparc/crypto/des_asm.S
deleted file mode 100644
index 7157468a679dfd81158c132d637a37498e685a91..0000000000000000000000000000000000000000
--- a/arch/sparc/crypto/des_asm.S
+++ /dev/null
@@ -1,420 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/visasm.h>
-
-#include "opcodes.h"
-
-	.align	32
-ENTRY(des_sparc64_key_expand)
-	/* %o0=input_key, %o1=output_key */
-	VISEntryHalf
-	ld	[%o0 + 0x00], %f0
-	ld	[%o0 + 0x04], %f1
-	DES_KEXPAND(0, 0, 0)
-	DES_KEXPAND(0, 1, 2)
-	DES_KEXPAND(2, 3, 6)
-	DES_KEXPAND(2, 2, 4)
-	DES_KEXPAND(6, 3, 10)
-	DES_KEXPAND(6, 2, 8)
-	DES_KEXPAND(10, 3, 14)
-	DES_KEXPAND(10, 2, 12)
-	DES_KEXPAND(14, 1, 16)
-	DES_KEXPAND(16, 3, 20)
-	DES_KEXPAND(16, 2, 18)
-	DES_KEXPAND(20, 3, 24)
-	DES_KEXPAND(20, 2, 22)
-	DES_KEXPAND(24, 3, 28)
-	DES_KEXPAND(24, 2, 26)
-	DES_KEXPAND(28, 1, 30)
-	std	%f0, [%o1 + 0x00]
-	std	%f2, [%o1 + 0x08]
-	std	%f4, [%o1 + 0x10]
-	std	%f6, [%o1 + 0x18]
-	std	%f8, [%o1 + 0x20]
-	std	%f10, [%o1 + 0x28]
-	std	%f12, [%o1 + 0x30]
-	std	%f14, [%o1 + 0x38]
-	std	%f16, [%o1 + 0x40]
-	std	%f18, [%o1 + 0x48]
-	std	%f20, [%o1 + 0x50]
-	std	%f22, [%o1 + 0x58]
-	std	%f24, [%o1 + 0x60]
-	std	%f26, [%o1 + 0x68]
-	std	%f28, [%o1 + 0x70]
-	std	%f30, [%o1 + 0x78]
-	retl
-	 VISExitHalf
-ENDPROC(des_sparc64_key_expand)
-
-	.align	32
-ENTRY(des_sparc64_crypt)
-	/* %o0=key, %o1=input, %o2=output */
-	VISEntry
-	ldd	[%o1 + 0x00], %f32
-	ldd	[%o0 + 0x00], %f0
-	ldd	[%o0 + 0x08], %f2
-	ldd	[%o0 + 0x10], %f4
-	ldd	[%o0 + 0x18], %f6
-	ldd	[%o0 + 0x20], %f8
-	ldd	[%o0 + 0x28], %f10
-	ldd	[%o0 + 0x30], %f12
-	ldd	[%o0 + 0x38], %f14
-	ldd	[%o0 + 0x40], %f16
-	ldd	[%o0 + 0x48], %f18
-	ldd	[%o0 + 0x50], %f20
-	ldd	[%o0 + 0x58], %f22
-	ldd	[%o0 + 0x60], %f24
-	ldd	[%o0 + 0x68], %f26
-	ldd	[%o0 + 0x70], %f28
-	ldd	[%o0 + 0x78], %f30
-	DES_IP(32, 32)
-	DES_ROUND(0, 2, 32, 32)
-	DES_ROUND(4, 6, 32, 32)
-	DES_ROUND(8, 10, 32, 32)
-	DES_ROUND(12, 14, 32, 32)
-	DES_ROUND(16, 18, 32, 32)
-	DES_ROUND(20, 22, 32, 32)
-	DES_ROUND(24, 26, 32, 32)
-	DES_ROUND(28, 30, 32, 32)
-	DES_IIP(32, 32)
-	std	%f32, [%o2 + 0x00]
-	retl
-	 VISExit
-ENDPROC(des_sparc64_crypt)
-
-	.align	32
-ENTRY(des_sparc64_load_keys)
-	/* %o0=key */
-	VISEntry
-	ldd	[%o0 + 0x00], %f0
-	ldd	[%o0 + 0x08], %f2
-	ldd	[%o0 + 0x10], %f4
-	ldd	[%o0 + 0x18], %f6
-	ldd	[%o0 + 0x20], %f8
-	ldd	[%o0 + 0x28], %f10
-	ldd	[%o0 + 0x30], %f12
-	ldd	[%o0 + 0x38], %f14
-	ldd	[%o0 + 0x40], %f16
-	ldd	[%o0 + 0x48], %f18
-	ldd	[%o0 + 0x50], %f20
-	ldd	[%o0 + 0x58], %f22
-	ldd	[%o0 + 0x60], %f24
-	ldd	[%o0 + 0x68], %f26
-	ldd	[%o0 + 0x70], %f28
-	retl
-	 ldd	[%o0 + 0x78], %f30
-ENDPROC(des_sparc64_load_keys)
-
-	.align	32
-ENTRY(des_sparc64_ecb_crypt)
-	/* %o0=input, %o1=output, %o2=len */
-1:	ldd	[%o0 + 0x00], %f32
-	add	%o0, 0x08, %o0
-	DES_IP(32, 32)
-	DES_ROUND(0, 2, 32, 32)
-	DES_ROUND(4, 6, 32, 32)
-	DES_ROUND(8, 10, 32, 32)
-	DES_ROUND(12, 14, 32, 32)
-	DES_ROUND(16, 18, 32, 32)
-	DES_ROUND(20, 22, 32, 32)
-	DES_ROUND(24, 26, 32, 32)
-	DES_ROUND(28, 30, 32, 32)
-	DES_IIP(32, 32)
-	std	%f32, [%o1 + 0x00]
-	subcc	%o2, 0x08, %o2
-	bne,pt	%icc, 1b
-	 add	%o1, 0x08, %o1
-	retl
-	 nop
-ENDPROC(des_sparc64_ecb_crypt)
-
-	.align	32
-ENTRY(des_sparc64_cbc_encrypt)
-	/* %o0=input, %o1=output, %o2=len, %o3=IV */
-	ldd	[%o3 + 0x00], %f32
-1:	ldd	[%o0 + 0x00], %f34
-	fxor	%f32, %f34, %f32
-	DES_IP(32, 32)
-	DES_ROUND(0, 2, 32, 32)
-	DES_ROUND(4, 6, 32, 32)
-	DES_ROUND(8, 10, 32, 32)
-	DES_ROUND(12, 14, 32, 32)
-	DES_ROUND(16, 18, 32, 32)
-	DES_ROUND(20, 22, 32, 32)
-	DES_ROUND(24, 26, 32, 32)
-	DES_ROUND(28, 30, 32, 32)
-	DES_IIP(32, 32)
-	std	%f32, [%o1 + 0x00]
-	add	%o0, 0x08, %o0
-	subcc	%o2, 0x08, %o2
-	bne,pt	%icc, 1b
-	 add	%o1, 0x08, %o1
-	retl
-	 std	%f32, [%o3 + 0x00]
-ENDPROC(des_sparc64_cbc_encrypt)
-
-	.align	32
-ENTRY(des_sparc64_cbc_decrypt)
-	/* %o0=input, %o1=output, %o2=len, %o3=IV */
-	ldd	[%o3 + 0x00], %f34
-1:	ldd	[%o0 + 0x00], %f36
-	DES_IP(36, 32)
-	DES_ROUND(0, 2, 32, 32)
-	DES_ROUND(4, 6, 32, 32)
-	DES_ROUND(8, 10, 32, 32)
-	DES_ROUND(12, 14, 32, 32)
-	DES_ROUND(16, 18, 32, 32)
-	DES_ROUND(20, 22, 32, 32)
-	DES_ROUND(24, 26, 32, 32)
-	DES_ROUND(28, 30, 32, 32)
-	DES_IIP(32, 32)
-	fxor	%f32, %f34, %f32
-	fsrc2	%f36, %f34
-	std	%f32, [%o1 + 0x00]
-	add	%o0, 0x08, %o0
-	subcc	%o2, 0x08, %o2
-	bne,pt	%icc, 1b
-	 add	%o1, 0x08, %o1
-	retl
-	 std	%f36, [%o3 + 0x00]
-ENDPROC(des_sparc64_cbc_decrypt)
-
-	.align	32
-ENTRY(des3_ede_sparc64_crypt)
-	/* %o0=key, %o1=input, %o2=output */
-	VISEntry
-	ldd	[%o1 + 0x00], %f32
-	ldd	[%o0 + 0x00], %f0
-	ldd	[%o0 + 0x08], %f2
-	ldd	[%o0 + 0x10], %f4
-	ldd	[%o0 + 0x18], %f6
-	ldd	[%o0 + 0x20], %f8
-	ldd	[%o0 + 0x28], %f10
-	ldd	[%o0 + 0x30], %f12
-	ldd	[%o0 + 0x38], %f14
-	ldd	[%o0 + 0x40], %f16
-	ldd	[%o0 + 0x48], %f18
-	ldd	[%o0 + 0x50], %f20
-	ldd	[%o0 + 0x58], %f22
-	ldd	[%o0 + 0x60], %f24
-	ldd	[%o0 + 0x68], %f26
-	ldd	[%o0 + 0x70], %f28
-	ldd	[%o0 + 0x78], %f30
-	DES_IP(32, 32)
-	DES_ROUND(0, 2, 32, 32)
-	ldd	[%o0 + 0x80], %f0
-	ldd	[%o0 + 0x88], %f2
-	DES_ROUND(4, 6, 32, 32)
-	ldd	[%o0 + 0x90], %f4
-	ldd	[%o0 + 0x98], %f6
-	DES_ROUND(8, 10, 32, 32)
-	ldd	[%o0 + 0xa0], %f8
-	ldd	[%o0 + 0xa8], %f10
-	DES_ROUND(12, 14, 32, 32)
-	ldd	[%o0 + 0xb0], %f12
-	ldd	[%o0 + 0xb8], %f14
-	DES_ROUND(16, 18, 32, 32)
-	ldd	[%o0 + 0xc0], %f16
-	ldd	[%o0 + 0xc8], %f18
-	DES_ROUND(20, 22, 32, 32)
-	ldd	[%o0 + 0xd0], %f20
-	ldd	[%o0 + 0xd8], %f22
-	DES_ROUND(24, 26, 32, 32)
-	ldd	[%o0 + 0xe0], %f24
-	ldd	[%o0 + 0xe8], %f26
-	DES_ROUND(28, 30, 32, 32)
-	ldd	[%o0 + 0xf0], %f28
-	ldd	[%o0 + 0xf8], %f30
-	DES_IIP(32, 32)
-	DES_IP(32, 32)
-	DES_ROUND(0, 2, 32, 32)
-	ldd	[%o0 + 0x100], %f0
-	ldd	[%o0 + 0x108], %f2
-	DES_ROUND(4, 6, 32, 32)
-	ldd	[%o0 + 0x110], %f4
-	ldd	[%o0 + 0x118], %f6
-	DES_ROUND(8, 10, 32, 32)
-	ldd	[%o0 + 0x120], %f8
-	ldd	[%o0 + 0x128], %f10
-	DES_ROUND(12, 14, 32, 32)
-	ldd	[%o0 + 0x130], %f12
-	ldd	[%o0 + 0x138], %f14
-	DES_ROUND(16, 18, 32, 32)
-	ldd	[%o0 + 0x140], %f16
-	ldd	[%o0 + 0x148], %f18
-	DES_ROUND(20, 22, 32, 32)
-	ldd	[%o0 + 0x150], %f20
-	ldd	[%o0 + 0x158], %f22
-	DES_ROUND(24, 26, 32, 32)
-	ldd	[%o0 + 0x160], %f24
-	ldd	[%o0 + 0x168], %f26
-	DES_ROUND(28, 30, 32, 32)
-	ldd	[%o0 + 0x170], %f28
-	ldd	[%o0 + 0x178], %f30
-	DES_IIP(32, 32)
-	DES_IP(32, 32)
-	DES_ROUND(0, 2, 32, 32)
-	DES_ROUND(4, 6, 32, 32)
-	DES_ROUND(8, 10, 32, 32)
-	DES_ROUND(12, 14, 32, 32)
-	DES_ROUND(16, 18, 32, 32)
-	DES_ROUND(20, 22, 32, 32)
-	DES_ROUND(24, 26, 32, 32)
-	DES_ROUND(28, 30, 32, 32)
-	DES_IIP(32, 32)
-
-	std	%f32, [%o2 + 0x00]
-	retl
-	 VISExit
-ENDPROC(des3_ede_sparc64_crypt)
-
-	.align	32
-ENTRY(des3_ede_sparc64_load_keys)
-	/* %o0=key */
-	VISEntry
-	ldd	[%o0 + 0x00], %f0
-	ldd	[%o0 + 0x08], %f2
-	ldd	[%o0 + 0x10], %f4
-	ldd	[%o0 + 0x18], %f6
-	ldd	[%o0 + 0x20], %f8
-	ldd	[%o0 + 0x28], %f10
-	ldd	[%o0 + 0x30], %f12
-	ldd	[%o0 + 0x38], %f14
-	ldd	[%o0 + 0x40], %f16
-	ldd	[%o0 + 0x48], %f18
-	ldd	[%o0 + 0x50], %f20
-	ldd	[%o0 + 0x58], %f22
-	ldd	[%o0 + 0x60], %f24
-	ldd	[%o0 + 0x68], %f26
-	ldd	[%o0 + 0x70], %f28
-	ldd	[%o0 + 0x78], %f30
-	ldd	[%o0 + 0x80], %f32
-	ldd	[%o0 + 0x88], %f34
-	ldd	[%o0 + 0x90], %f36
-	ldd	[%o0 + 0x98], %f38
-	ldd	[%o0 + 0xa0], %f40
-	ldd	[%o0 + 0xa8], %f42
-	ldd	[%o0 + 0xb0], %f44
-	ldd	[%o0 + 0xb8], %f46
-	ldd	[%o0 + 0xc0], %f48
-	ldd	[%o0 + 0xc8], %f50
-	ldd	[%o0 + 0xd0], %f52
-	ldd	[%o0 + 0xd8], %f54
-	ldd	[%o0 + 0xe0], %f56
-	retl
-	 ldd	[%o0 + 0xe8], %f58
-ENDPROC(des3_ede_sparc64_load_keys)
-
-#define DES3_LOOP_BODY(X) \
-	DES_IP(X, X) \
-	DES_ROUND(0, 2, X, X) \
-	DES_ROUND(4, 6, X, X) \
-	DES_ROUND(8, 10, X, X) \
-	DES_ROUND(12, 14, X, X) \
-	DES_ROUND(16, 18, X, X) \
-	ldd	[%o0 + 0xf0], %f16; \
-	ldd	[%o0 + 0xf8], %f18; \
-	DES_ROUND(20, 22, X, X) \
-	ldd	[%o0 + 0x100], %f20; \
-	ldd	[%o0 + 0x108], %f22; \
-	DES_ROUND(24, 26, X, X) \
-	ldd	[%o0 + 0x110], %f24; \
-	ldd	[%o0 + 0x118], %f26; \
-	DES_ROUND(28, 30, X, X) \
-	ldd	[%o0 + 0x120], %f28; \
-	ldd	[%o0 + 0x128], %f30; \
-	DES_IIP(X, X) \
-	DES_IP(X, X) \
-	DES_ROUND(32, 34, X, X) \
-	ldd	[%o0 + 0x130], %f0; \
-	ldd	[%o0 + 0x138], %f2; \
-	DES_ROUND(36, 38, X, X) \
-	ldd	[%o0 + 0x140], %f4; \
-	ldd	[%o0 + 0x148], %f6; \
-	DES_ROUND(40, 42, X, X) \
-	ldd	[%o0 + 0x150], %f8; \
-	ldd	[%o0 + 0x158], %f10; \
-	DES_ROUND(44, 46, X, X) \
-	ldd	[%o0 + 0x160], %f12; \
-	ldd	[%o0 + 0x168], %f14; \
-	DES_ROUND(48, 50, X, X) \
-	DES_ROUND(52, 54, X, X) \
-	DES_ROUND(56, 58, X, X) \
-	DES_ROUND(16, 18, X, X) \
-	ldd	[%o0 + 0x170], %f16; \
-	ldd	[%o0 + 0x178], %f18; \
-	DES_IIP(X, X) \
-	DES_IP(X, X) \
-	DES_ROUND(20, 22, X, X) \
-	ldd	[%o0 + 0x50], %f20; \
-	ldd	[%o0 + 0x58], %f22; \
-	DES_ROUND(24, 26, X, X) \
-	ldd	[%o0 + 0x60], %f24; \
-	ldd	[%o0 + 0x68], %f26; \
-	DES_ROUND(28, 30, X, X) \
-	ldd	[%o0 + 0x70], %f28; \
-	ldd	[%o0 + 0x78], %f30; \
-	DES_ROUND(0, 2, X, X) \
-	ldd	[%o0 + 0x00], %f0; \
-	ldd	[%o0 + 0x08], %f2; \
-	DES_ROUND(4, 6, X, X) \
-	ldd	[%o0 + 0x10], %f4; \
-	ldd	[%o0 + 0x18], %f6; \
-	DES_ROUND(8, 10, X, X) \
-	ldd	[%o0 + 0x20], %f8; \
-	ldd	[%o0 + 0x28], %f10; \
-	DES_ROUND(12, 14, X, X) \
-	ldd	[%o0 + 0x30], %f12; \
-	ldd	[%o0 + 0x38], %f14; \
-	DES_ROUND(16, 18, X, X) \
-	ldd	[%o0 + 0x40], %f16; \
-	ldd	[%o0 + 0x48], %f18; \
-	DES_IIP(X, X)
-
-	.align	32
-ENTRY(des3_ede_sparc64_ecb_crypt)
-	/* %o0=key, %o1=input, %o2=output, %o3=len */
-1:	ldd	[%o1 + 0x00], %f60
-	DES3_LOOP_BODY(60)
-	std	%f60, [%o2 + 0x00]
-	add	%o1, 0x08, %o1
-	subcc	%o3, 0x08, %o3
-	bne,pt	%icc, 1b
-	 add	%o2, 0x08, %o2
-	retl
-	 nop
-ENDPROC(des3_ede_sparc64_ecb_crypt)
-
-	.align	32
-ENTRY(des3_ede_sparc64_cbc_encrypt)
-	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
-	ldd	[%o4 + 0x00], %f60
-1:	ldd	[%o1 + 0x00], %f62
-	fxor	%f60, %f62, %f60
-	DES3_LOOP_BODY(60)
-	std	%f60, [%o2 + 0x00]
-	add	%o1, 0x08, %o1
-	subcc	%o3, 0x08, %o3
-	bne,pt	%icc, 1b
-	 add	%o2, 0x08, %o2
-	retl
-	 std	%f60, [%o4 + 0x00]
-ENDPROC(des3_ede_sparc64_cbc_encrypt)
-
-	.align	32
-ENTRY(des3_ede_sparc64_cbc_decrypt)
-	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
-	ldd	[%o4 + 0x00], %f62
-1:	ldx	[%o1 + 0x00], %g1
-	MOVXTOD_G1_F60
-	DES3_LOOP_BODY(60)
-	fxor	%f62, %f60, %f60
-	MOVXTOD_G1_F62
-	std	%f60, [%o2 + 0x00]
-	add	%o1, 0x08, %o1
-	subcc	%o3, 0x08, %o3
-	bne,pt	%icc, 1b
-	 add	%o2, 0x08, %o2
-	retl
-	 stx	%g1, [%o4 + 0x00]
-ENDPROC(des3_ede_sparc64_cbc_decrypt)
diff --git a/arch/sparc/crypto/md5_asm.S b/arch/sparc/crypto/md5_asm.S
deleted file mode 100644
index 7a6637455f37adb9a476affab560feb91bcd5356..0000000000000000000000000000000000000000
--- a/arch/sparc/crypto/md5_asm.S
+++ /dev/null
@@ -1,71 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/visasm.h>
-
-#include "opcodes.h"
-
-ENTRY(md5_sparc64_transform)
-	/* %o0 = digest, %o1 = data, %o2 = rounds */
-	VISEntryHalf
-	ld	[%o0 + 0x00], %f0
-	ld	[%o0 + 0x04], %f1
-	andcc	%o1, 0x7, %g0
-	ld	[%o0 + 0x08], %f2
-	bne,pn	%xcc, 10f
-	 ld	[%o0 + 0x0c], %f3
-
-1:
-	ldd	[%o1 + 0x00], %f8
-	ldd	[%o1 + 0x08], %f10
-	ldd	[%o1 + 0x10], %f12
-	ldd	[%o1 + 0x18], %f14
-	ldd	[%o1 + 0x20], %f16
-	ldd	[%o1 + 0x28], %f18
-	ldd	[%o1 + 0x30], %f20
-	ldd	[%o1 + 0x38], %f22
-
-	MD5
-
-	subcc	%o2, 1, %o2
-	bne,pt	%xcc, 1b
-	 add	%o1, 0x40, %o1
-
-5:
-	st	%f0, [%o0 + 0x00]
-	st	%f1, [%o0 + 0x04]
-	st	%f2, [%o0 + 0x08]
-	st	%f3, [%o0 + 0x0c]
-	retl
-	 VISExitHalf
-10:
-	alignaddr %o1, %g0, %o1
-
-	ldd	[%o1 + 0x00], %f10
-1:
-	ldd	[%o1 + 0x08], %f12
-	ldd	[%o1 + 0x10], %f14
-	ldd	[%o1 + 0x18], %f16
-	ldd	[%o1 + 0x20], %f18
-	ldd	[%o1 + 0x28], %f20
-	ldd	[%o1 + 0x30], %f22
-	ldd	[%o1 + 0x38], %f24
-	ldd	[%o1 + 0x40], %f26
-
-	faligndata %f10, %f12, %f8
-	faligndata %f12, %f14, %f10
-	faligndata %f14, %f16, %f12
-	faligndata %f16, %f18, %f14
-	faligndata %f18, %f20, %f16
-	faligndata %f20, %f22, %f18
-	faligndata %f22, %f24, %f20
-	faligndata %f24, %f26, %f22
-
-	MD5
-
-	subcc	%o2, 1, %o2
-	fsrc2	%f26, %f10
-	bne,pt	%xcc, 1b
-	 add	%o1, 0x40, %o1
-
-	ba,a,pt	%xcc, 5b
-ENDPROC(md5_sparc64_transform)
diff --git a/arch/sparc/crypto/sha1_asm.S b/arch/sparc/crypto/sha1_asm.S
deleted file mode 100644
index 7d8bf354f0e795413d18e7228b7932048d2afb76..0000000000000000000000000000000000000000
--- a/arch/sparc/crypto/sha1_asm.S
+++ /dev/null
@@ -1,73 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/visasm.h>
-
-#include "opcodes.h"
-
-ENTRY(sha1_sparc64_transform)
-	/* %o0 = digest, %o1 = data, %o2 = rounds */
-	VISEntryHalf
-	ld	[%o0 + 0x00], %f0
-	ld	[%o0 + 0x04], %f1
-	ld	[%o0 + 0x08], %f2
-	andcc	%o1, 0x7, %g0
-	ld	[%o0 + 0x0c], %f3
-	bne,pn	%xcc, 10f
-	 ld	[%o0 + 0x10], %f4
-
-1:
-	ldd	[%o1 + 0x00], %f8
-	ldd	[%o1 + 0x08], %f10
-	ldd	[%o1 + 0x10], %f12
-	ldd	[%o1 + 0x18], %f14
-	ldd	[%o1 + 0x20], %f16
-	ldd	[%o1 + 0x28], %f18
-	ldd	[%o1 + 0x30], %f20
-	ldd	[%o1 + 0x38], %f22
-
-	SHA1
-
-	subcc	%o2, 1, %o2
-	bne,pt	%xcc, 1b
-	 add	%o1, 0x40, %o1
-
-5:
-	st	%f0, [%o0 + 0x00]
-	st	%f1, [%o0 + 0x04]
-	st	%f2, [%o0 + 0x08]
-	st	%f3, [%o0 + 0x0c]
-	st	%f4, [%o0 + 0x10]
-	retl
-	 VISExitHalf
-10:
-	alignaddr %o1, %g0, %o1
-
-	ldd	[%o1 + 0x00], %f10
-1:
-	ldd	[%o1 + 0x08], %f12
-	ldd	[%o1 + 0x10], %f14
-	ldd	[%o1 + 0x18], %f16
-	ldd	[%o1 + 0x20], %f18
-	ldd	[%o1 + 0x28], %f20
-	ldd	[%o1 + 0x30], %f22
-	ldd	[%o1 + 0x38], %f24
-	ldd	[%o1 + 0x40], %f26
-
-	faligndata %f10, %f12, %f8
-	faligndata %f12, %f14, %f10
-	faligndata %f14, %f16, %f12
-	faligndata %f16, %f18, %f14
-	faligndata %f18, %f20, %f16
-	faligndata %f20, %f22, %f18
-	faligndata %f22, %f24, %f20
-	faligndata %f24, %f26, %f22
-
-	SHA1
-
-	subcc	%o2, 1, %o2
-	fsrc2	%f26, %f10
-	bne,pt	%xcc, 1b
-	 add	%o1, 0x40, %o1
-
-	ba,a,pt	%xcc, 5b
-ENDPROC(sha1_sparc64_transform)
diff --git a/arch/sparc/crypto/sha256_asm.S b/arch/sparc/crypto/sha256_asm.S
deleted file mode 100644
index 0b39ec7d7ca2983f47f3a0092913407a9b7878ae..0000000000000000000000000000000000000000
--- a/arch/sparc/crypto/sha256_asm.S
+++ /dev/null
@@ -1,79 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/visasm.h>
-
-#include "opcodes.h"
-
-ENTRY(sha256_sparc64_transform)
-	/* %o0 = digest, %o1 = data, %o2 = rounds */
-	VISEntryHalf
-	ld	[%o0 + 0x00], %f0
-	ld	[%o0 + 0x04], %f1
-	ld	[%o0 + 0x08], %f2
-	ld	[%o0 + 0x0c], %f3
-	ld	[%o0 + 0x10], %f4
-	ld	[%o0 + 0x14], %f5
-	andcc	%o1, 0x7, %g0
-	ld	[%o0 + 0x18], %f6
-	bne,pn	%xcc, 10f
-	 ld	[%o0 + 0x1c], %f7
-
-1:
-	ldd	[%o1 + 0x00], %f8
-	ldd	[%o1 + 0x08], %f10
-	ldd	[%o1 + 0x10], %f12
-	ldd	[%o1 + 0x18], %f14
-	ldd	[%o1 + 0x20], %f16
-	ldd	[%o1 + 0x28], %f18
-	ldd	[%o1 + 0x30], %f20
-	ldd	[%o1 + 0x38], %f22
-
-	SHA256
-
-	subcc	%o2, 1, %o2
-	bne,pt	%xcc, 1b
-	 add	%o1, 0x40, %o1
-
-5:
-	st	%f0, [%o0 + 0x00]
-	st	%f1, [%o0 + 0x04]
-	st	%f2, [%o0 + 0x08]
-	st	%f3, [%o0 + 0x0c]
-	st	%f4, [%o0 + 0x10]
-	st	%f5, [%o0 + 0x14]
-	st	%f6, [%o0 + 0x18]
-	st	%f7, [%o0 + 0x1c]
-	retl
-	 VISExitHalf
-10:
-	alignaddr %o1, %g0, %o1
-
-	ldd	[%o1 + 0x00], %f10
-1:
-	ldd	[%o1 + 0x08], %f12
-	ldd	[%o1 + 0x10], %f14
-	ldd	[%o1 + 0x18], %f16
-	ldd	[%o1 + 0x20], %f18
-	ldd	[%o1 + 0x28], %f20
-	ldd	[%o1 + 0x30], %f22
-	ldd	[%o1 + 0x38], %f24
-	ldd	[%o1 + 0x40], %f26
-
-	faligndata %f10, %f12, %f8
-	faligndata %f12, %f14, %f10
-	faligndata %f14, %f16, %f12
-	faligndata %f16, %f18, %f14
-	faligndata %f18, %f20, %f16
-	faligndata %f20, %f22, %f18
-	faligndata %f22, %f24, %f20
-	faligndata %f24, %f26, %f22
-
-	SHA256
-
-	subcc	%o2, 1, %o2
-	fsrc2	%f26, %f10
-	bne,pt	%xcc, 1b
-	 add	%o1, 0x40, %o1
-
-	ba,a,pt	%xcc, 5b
-ENDPROC(sha256_sparc64_transform)
diff --git a/arch/sparc/crypto/sha512_asm.S b/arch/sparc/crypto/sha512_asm.S
deleted file mode 100644
index b2f6e6728802366af17596f6c35dc827f98f1e8c..0000000000000000000000000000000000000000
--- a/arch/sparc/crypto/sha512_asm.S
+++ /dev/null
@@ -1,103 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/visasm.h>
-
-#include "opcodes.h"
-
-ENTRY(sha512_sparc64_transform)
-	/* %o0 = digest, %o1 = data, %o2 = rounds */
-	VISEntry
-	ldd	[%o0 + 0x00], %f0
-	ldd	[%o0 + 0x08], %f2
-	ldd	[%o0 + 0x10], %f4
-	ldd	[%o0 + 0x18], %f6
-	ldd	[%o0 + 0x20], %f8
-	ldd	[%o0 + 0x28], %f10
-	andcc	%o1, 0x7, %g0
-	ldd	[%o0 + 0x30], %f12
-	bne,pn	%xcc, 10f
-	 ldd	[%o0 + 0x38], %f14
-
-1:
-	ldd	[%o1 + 0x00], %f16
-	ldd	[%o1 + 0x08], %f18
-	ldd	[%o1 + 0x10], %f20
-	ldd	[%o1 + 0x18], %f22
-	ldd	[%o1 + 0x20], %f24
-	ldd	[%o1 + 0x28], %f26
-	ldd	[%o1 + 0x30], %f28
-	ldd	[%o1 + 0x38], %f30
-	ldd	[%o1 + 0x40], %f32
-	ldd	[%o1 + 0x48], %f34
-	ldd	[%o1 + 0x50], %f36
-	ldd	[%o1 + 0x58], %f38
-	ldd	[%o1 + 0x60], %f40
-	ldd	[%o1 + 0x68], %f42
-	ldd	[%o1 + 0x70], %f44
-	ldd	[%o1 + 0x78], %f46
-
-	SHA512
-
-	subcc	%o2, 1, %o2
-	bne,pt	%xcc, 1b
-	 add	%o1, 0x80, %o1
-
-5:
-	std	%f0, [%o0 + 0x00]
-	std	%f2, [%o0 + 0x08]
-	std	%f4, [%o0 + 0x10]
-	std	%f6, [%o0 + 0x18]
-	std	%f8, [%o0 + 0x20]
-	std	%f10, [%o0 + 0x28]
-	std	%f12, [%o0 + 0x30]
-	std	%f14, [%o0 + 0x38]
-	retl
-	 VISExit
-10:
-	alignaddr %o1, %g0, %o1
-
-	ldd	[%o1 + 0x00], %f18
-1:
-	ldd	[%o1 + 0x08], %f20
-	ldd	[%o1 + 0x10], %f22
-	ldd	[%o1 + 0x18], %f24
-	ldd	[%o1 + 0x20], %f26
-	ldd	[%o1 + 0x28], %f28
-	ldd	[%o1 + 0x30], %f30
-	ldd	[%o1 + 0x38], %f32
-	ldd	[%o1 + 0x40], %f34
-	ldd	[%o1 + 0x48], %f36
-	ldd	[%o1 + 0x50], %f38
-	ldd	[%o1 + 0x58], %f40
-	ldd	[%o1 + 0x60], %f42
-	ldd	[%o1 + 0x68], %f44
-	ldd	[%o1 + 0x70], %f46
-	ldd	[%o1 + 0x78], %f48
-	ldd	[%o1 + 0x80], %f50
-
-	faligndata %f18, %f20, %f16
-	faligndata %f20, %f22, %f18
-	faligndata %f22, %f24, %f20
-	faligndata %f24, %f26, %f22
-	faligndata %f26, %f28, %f24
-	faligndata %f28, %f30, %f26
-	faligndata %f30, %f32, %f28
-	faligndata %f32, %f34, %f30
-	faligndata %f34, %f36, %f32
-	faligndata %f36, %f38, %f34
-	faligndata %f38, %f40, %f36
-	faligndata %f40, %f42, %f38
-	faligndata %f42, %f44, %f40
-	faligndata %f44, %f46, %f42
-	faligndata %f46, %f48, %f44
-	faligndata %f48, %f50, %f46
-
-	SHA512
-
-	subcc	%o2, 1, %o2
-	fsrc2	%f50, %f18
-	bne,pt	%xcc, 1b
-	 add	%o1, 0x80, %o1
-
-	ba,a,pt	%xcc, 5b
-ENDPROC(sha512_sparc64_transform)
diff --git a/arch/sparc/kernel/cherrs.S b/arch/sparc/kernel/cherrs.S
deleted file mode 100644
index 7f3d3d264390ca36e7fd9466a86dc52afe9759e0..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/cherrs.S
+++ /dev/null
@@ -1,576 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	/* These get patched into the trap table at boot time
-	 * once we know we have a cheetah processor.
-	 */
-	.globl		cheetah_fecc_trap_vector
-	.type		cheetah_fecc_trap_vector,#function
-cheetah_fecc_trap_vector:
-	membar		#Sync
-	ldxa		[%g0] ASI_DCU_CONTROL_REG, %g1
-	andn		%g1, DCU_DC | DCU_IC, %g1
-	stxa		%g1, [%g0] ASI_DCU_CONTROL_REG
-	membar		#Sync
-	sethi		%hi(cheetah_fast_ecc), %g2
-	jmpl		%g2 + %lo(cheetah_fast_ecc), %g0
-	 mov		0, %g1
-	.size		cheetah_fecc_trap_vector,.-cheetah_fecc_trap_vector
-
-	.globl		cheetah_fecc_trap_vector_tl1
-	.type		cheetah_fecc_trap_vector_tl1,#function
-cheetah_fecc_trap_vector_tl1:
-	membar		#Sync
-	ldxa		[%g0] ASI_DCU_CONTROL_REG, %g1
-	andn		%g1, DCU_DC | DCU_IC, %g1
-	stxa		%g1, [%g0] ASI_DCU_CONTROL_REG
-	membar		#Sync
-	sethi		%hi(cheetah_fast_ecc), %g2
-	jmpl		%g2 + %lo(cheetah_fast_ecc), %g0
-	 mov		1, %g1
-	.size		cheetah_fecc_trap_vector_tl1,.-cheetah_fecc_trap_vector_tl1
-
-	.globl	cheetah_cee_trap_vector
-	.type	cheetah_cee_trap_vector,#function
-cheetah_cee_trap_vector:
-	membar		#Sync
-	ldxa		[%g0] ASI_DCU_CONTROL_REG, %g1
-	andn		%g1, DCU_IC, %g1
-	stxa		%g1, [%g0] ASI_DCU_CONTROL_REG
-	membar		#Sync
-	sethi		%hi(cheetah_cee), %g2
-	jmpl		%g2 + %lo(cheetah_cee), %g0
-	 mov		0, %g1
-	.size		cheetah_cee_trap_vector,.-cheetah_cee_trap_vector
-
-	.globl		cheetah_cee_trap_vector_tl1
-	.type		cheetah_cee_trap_vector_tl1,#function
-cheetah_cee_trap_vector_tl1:
-	membar		#Sync
-	ldxa		[%g0] ASI_DCU_CONTROL_REG, %g1
-	andn		%g1, DCU_IC, %g1
-	stxa		%g1, [%g0] ASI_DCU_CONTROL_REG
-	membar		#Sync
-	sethi		%hi(cheetah_cee), %g2
-	jmpl		%g2 + %lo(cheetah_cee), %g0
-	 mov		1, %g1
-	.size		cheetah_cee_trap_vector_tl1,.-cheetah_cee_trap_vector_tl1
-
-	.globl	cheetah_deferred_trap_vector
-	.type	cheetah_deferred_trap_vector,#function
-cheetah_deferred_trap_vector:
-	membar		#Sync
-	ldxa		[%g0] ASI_DCU_CONTROL_REG, %g1;
-	andn		%g1, DCU_DC | DCU_IC, %g1;
-	stxa		%g1, [%g0] ASI_DCU_CONTROL_REG;
-	membar		#Sync;
-	sethi		%hi(cheetah_deferred_trap), %g2
-	jmpl		%g2 + %lo(cheetah_deferred_trap), %g0
-	 mov		0, %g1
-	.size		cheetah_deferred_trap_vector,.-cheetah_deferred_trap_vector
-
-	.globl		cheetah_deferred_trap_vector_tl1
-	.type		cheetah_deferred_trap_vector_tl1,#function
-cheetah_deferred_trap_vector_tl1:
-	membar		#Sync;
-	ldxa		[%g0] ASI_DCU_CONTROL_REG, %g1;
-	andn		%g1, DCU_DC | DCU_IC, %g1;
-	stxa		%g1, [%g0] ASI_DCU_CONTROL_REG;
-	membar		#Sync;
-	sethi		%hi(cheetah_deferred_trap), %g2
-	jmpl		%g2 + %lo(cheetah_deferred_trap), %g0
-	 mov		1, %g1
-	.size		cheetah_deferred_trap_vector_tl1,.-cheetah_deferred_trap_vector_tl1
-
-	/* Cheetah+ specific traps. These are for the new I/D cache parity
-	 * error traps.  The first argument to cheetah_plus_parity_handler
-	 * is encoded as follows:
-	 *
-	 * Bit0:	0=dcache,1=icache
-	 * Bit1:	0=recoverable,1=unrecoverable
-	 */
-	.globl		cheetah_plus_dcpe_trap_vector
-	.type		cheetah_plus_dcpe_trap_vector,#function
-cheetah_plus_dcpe_trap_vector:
-	membar		#Sync
-	sethi		%hi(do_cheetah_plus_data_parity), %g7
-	jmpl		%g7 + %lo(do_cheetah_plus_data_parity), %g0
-	 nop
-	nop
-	nop
-	nop
-	nop
-	.size		cheetah_plus_dcpe_trap_vector,.-cheetah_plus_dcpe_trap_vector
-
-	.type		do_cheetah_plus_data_parity,#function
-do_cheetah_plus_data_parity:
-	rdpr		%pil, %g2
-	wrpr		%g0, PIL_NORMAL_MAX, %pil
-	ba,pt		%xcc, etrap_irq
-	 rd		%pc, %g7
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call		trace_hardirqs_off
-	 nop
-#endif
-	mov		0x0, %o0
-	call		cheetah_plus_parity_error
-	 add		%sp, PTREGS_OFF, %o1
-	ba,a,pt		%xcc, rtrap_irq
-	.size		do_cheetah_plus_data_parity,.-do_cheetah_plus_data_parity
-
-	.globl		cheetah_plus_dcpe_trap_vector_tl1
-	.type		cheetah_plus_dcpe_trap_vector_tl1,#function
-cheetah_plus_dcpe_trap_vector_tl1:
-	membar		#Sync
-	wrpr		PSTATE_IG | PSTATE_PEF | PSTATE_PRIV, %pstate
-	sethi		%hi(do_dcpe_tl1), %g3
-	jmpl		%g3 + %lo(do_dcpe_tl1), %g0
-	 nop
-	nop
-	nop
-	nop
-	.size		cheetah_plus_dcpe_trap_vector_tl1,.-cheetah_plus_dcpe_trap_vector_tl1
-
-	.globl		cheetah_plus_icpe_trap_vector
-	.type		cheetah_plus_icpe_trap_vector,#function
-cheetah_plus_icpe_trap_vector:
-	membar		#Sync
-	sethi		%hi(do_cheetah_plus_insn_parity), %g7
-	jmpl		%g7 + %lo(do_cheetah_plus_insn_parity), %g0
-	 nop
-	nop
-	nop
-	nop
-	nop
-	.size		cheetah_plus_icpe_trap_vector,.-cheetah_plus_icpe_trap_vector
-
-	.type		do_cheetah_plus_insn_parity,#function
-do_cheetah_plus_insn_parity:
-	rdpr		%pil, %g2
-	wrpr		%g0, PIL_NORMAL_MAX, %pil
-	ba,pt		%xcc, etrap_irq
-	 rd		%pc, %g7
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call		trace_hardirqs_off
-	 nop
-#endif
-	mov		0x1, %o0
-	call		cheetah_plus_parity_error
-	 add		%sp, PTREGS_OFF, %o1
-	ba,a,pt		%xcc, rtrap_irq
-	.size		do_cheetah_plus_insn_parity,.-do_cheetah_plus_insn_parity
-
-	.globl		cheetah_plus_icpe_trap_vector_tl1
-	.type		cheetah_plus_icpe_trap_vector_tl1,#function
-cheetah_plus_icpe_trap_vector_tl1:
-	membar		#Sync
-	wrpr		PSTATE_IG | PSTATE_PEF | PSTATE_PRIV, %pstate
-	sethi		%hi(do_icpe_tl1), %g3
-	jmpl		%g3 + %lo(do_icpe_tl1), %g0
-	 nop
-	nop
-	nop
-	nop
-	.size		cheetah_plus_icpe_trap_vector_tl1,.-cheetah_plus_icpe_trap_vector_tl1
-
-	/* If we take one of these traps when tl >= 1, then we
-	 * jump to interrupt globals.  If some trap level above us
-	 * was also using interrupt globals, we cannot recover.
-	 * We may use all interrupt global registers except %g6.
-	 */
-	.globl		do_dcpe_tl1
-	.type		do_dcpe_tl1,#function
-do_dcpe_tl1:
-	rdpr		%tl, %g1		! Save original trap level
-	mov		1, %g2			! Setup TSTATE checking loop
-	sethi		%hi(TSTATE_IG), %g3	! TSTATE mask bit
-1:	wrpr		%g2, %tl		! Set trap level to check
-	rdpr		%tstate, %g4		! Read TSTATE for this level
-	andcc		%g4, %g3, %g0		! Interrupt globals in use?
-	bne,a,pn	%xcc, do_dcpe_tl1_fatal	! Yep, irrecoverable
-	 wrpr		%g1, %tl		! Restore original trap level
-	add		%g2, 1, %g2		! Next trap level
-	cmp		%g2, %g1		! Hit them all yet?
-	ble,pt		%icc, 1b		! Not yet
-	 nop
-	wrpr		%g1, %tl		! Restore original trap level
-do_dcpe_tl1_nonfatal:	/* Ok we may use interrupt globals safely. */
-	sethi		%hi(dcache_parity_tl1_occurred), %g2
-	lduw		[%g2 + %lo(dcache_parity_tl1_occurred)], %g1
-	add		%g1, 1, %g1
-	stw		%g1, [%g2 + %lo(dcache_parity_tl1_occurred)]
-	/* Reset D-cache parity */
-	sethi		%hi(1 << 16), %g1	! D-cache size
-	mov		(1 << 5), %g2		! D-cache line size
-	sub		%g1, %g2, %g1		! Move down 1 cacheline
-1:	srl		%g1, 14, %g3		! Compute UTAG
-	membar		#Sync
-	stxa		%g3, [%g1] ASI_DCACHE_UTAG
-	membar		#Sync
-	sub		%g2, 8, %g3		! 64-bit data word within line
-2:	membar		#Sync
-	stxa		%g0, [%g1 + %g3] ASI_DCACHE_DATA
-	membar		#Sync
-	subcc		%g3, 8, %g3		! Next 64-bit data word
-	bge,pt		%icc, 2b
-	 nop
-	subcc		%g1, %g2, %g1		! Next cacheline
-	bge,pt		%icc, 1b
-	 nop
-	ba,a,pt		%xcc, dcpe_icpe_tl1_common
-
-do_dcpe_tl1_fatal:
-	sethi		%hi(1f), %g7
-	ba,pt		%xcc, etraptl1
-1:	or		%g7, %lo(1b), %g7
-	mov		0x2, %o0
-	call		cheetah_plus_parity_error
-	 add		%sp, PTREGS_OFF, %o1
-	ba,a,pt		%xcc, rtrap
-	.size		do_dcpe_tl1,.-do_dcpe_tl1
-
-	.globl		do_icpe_tl1
-	.type		do_icpe_tl1,#function
-do_icpe_tl1:
-	rdpr		%tl, %g1		! Save original trap level
-	mov		1, %g2			! Setup TSTATE checking loop
-	sethi		%hi(TSTATE_IG), %g3	! TSTATE mask bit
-1:	wrpr		%g2, %tl		! Set trap level to check
-	rdpr		%tstate, %g4		! Read TSTATE for this level
-	andcc		%g4, %g3, %g0		! Interrupt globals in use?
-	bne,a,pn	%xcc, do_icpe_tl1_fatal	! Yep, irrecoverable
-	 wrpr		%g1, %tl		! Restore original trap level
-	add		%g2, 1, %g2		! Next trap level
-	cmp		%g2, %g1		! Hit them all yet?
-	ble,pt		%icc, 1b		! Not yet
-	 nop
-	wrpr		%g1, %tl		! Restore original trap level
-do_icpe_tl1_nonfatal:	/* Ok we may use interrupt globals safely. */
-	sethi		%hi(icache_parity_tl1_occurred), %g2
-	lduw		[%g2 + %lo(icache_parity_tl1_occurred)], %g1
-	add		%g1, 1, %g1
-	stw		%g1, [%g2 + %lo(icache_parity_tl1_occurred)]
-	/* Flush I-cache */
-	sethi		%hi(1 << 15), %g1	! I-cache size
-	mov		(1 << 5), %g2		! I-cache line size
-	sub		%g1, %g2, %g1
-1:	or		%g1, (2 << 3), %g3
-	stxa		%g0, [%g3] ASI_IC_TAG
-	membar		#Sync
-	subcc		%g1, %g2, %g1
-	bge,pt		%icc, 1b
-	 nop
-	ba,a,pt		%xcc, dcpe_icpe_tl1_common
-
-do_icpe_tl1_fatal:
-	sethi		%hi(1f), %g7
-	ba,pt		%xcc, etraptl1
-1:	or		%g7, %lo(1b), %g7
-	mov		0x3, %o0
-	call		cheetah_plus_parity_error
-	 add		%sp, PTREGS_OFF, %o1
-	ba,a,pt		%xcc, rtrap
-	.size		do_icpe_tl1,.-do_icpe_tl1
-	
-	.type		dcpe_icpe_tl1_common,#function
-dcpe_icpe_tl1_common:
-	/* Flush D-cache, re-enable D/I caches in DCU and finally
-	 * retry the trapping instruction.
-	 */
-	sethi		%hi(1 << 16), %g1	! D-cache size
-	mov		(1 << 5), %g2		! D-cache line size
-	sub		%g1, %g2, %g1
-1:	stxa		%g0, [%g1] ASI_DCACHE_TAG
-	membar		#Sync
-	subcc		%g1, %g2, %g1
-	bge,pt		%icc, 1b
-	 nop
-	ldxa		[%g0] ASI_DCU_CONTROL_REG, %g1
-	or		%g1, (DCU_DC | DCU_IC), %g1
-	stxa		%g1, [%g0] ASI_DCU_CONTROL_REG
-	membar		#Sync
-	retry
-	.size		dcpe_icpe_tl1_common,.-dcpe_icpe_tl1_common
-
-	/* Capture I/D/E-cache state into per-cpu error scoreboard.
-	 *
-	 * %g1:		(TL>=0) ? 1 : 0
-	 * %g2:		scratch
-	 * %g3:		scratch
-	 * %g4:		AFSR
-	 * %g5:		AFAR
-	 * %g6:		unused, will have current thread ptr after etrap
-	 * %g7:		scratch
-	 */
-	.type		__cheetah_log_error,#function
-__cheetah_log_error:
-	/* Put "TL1" software bit into AFSR. */
-	and		%g1, 0x1, %g1
-	sllx		%g1, 63, %g2
-	or		%g4, %g2, %g4
-
-	/* Get log entry pointer for this cpu at this trap level. */
-	BRANCH_IF_JALAPENO(g2,g3,50f)
-	ldxa		[%g0] ASI_SAFARI_CONFIG, %g2
-	srlx		%g2, 17, %g2
-	ba,pt		%xcc, 60f
-	 and		%g2, 0x3ff, %g2
-
-50:	ldxa		[%g0] ASI_JBUS_CONFIG, %g2
-	srlx		%g2, 17, %g2
-	and		%g2, 0x1f, %g2
-
-60:	sllx		%g2, 9, %g2
-	sethi		%hi(cheetah_error_log), %g3
-	ldx		[%g3 + %lo(cheetah_error_log)], %g3
-	brz,pn		%g3, 80f
-	 nop
-
-	add		%g3, %g2, %g3
-	sllx		%g1, 8, %g1
-	add		%g3, %g1, %g1
-
-	/* %g1 holds pointer to the top of the logging scoreboard */
-	ldx		[%g1 + 0x0], %g7
-	cmp		%g7, -1
-	bne,pn		%xcc, 80f
-	 nop
-
-	stx		%g4, [%g1 + 0x0]
-	stx		%g5, [%g1 + 0x8]
-	add		%g1, 0x10, %g1
-
-	/* %g1 now points to D-cache logging area */
-	set		0x3ff8, %g2	/* DC_addr mask		*/
-	and		%g5, %g2, %g2	/* DC_addr bits of AFAR	*/
-	srlx		%g5, 12, %g3
-	or		%g3, 1, %g3	/* PHYS tag + valid	*/
-
-10:	ldxa		[%g2] ASI_DCACHE_TAG, %g7
-	cmp		%g3, %g7	/* TAG match?		*/
-	bne,pt		%xcc, 13f
-	 nop
-
-	/* Yep, what we want, capture state. */
-	stx		%g2, [%g1 + 0x20]
-	stx		%g7, [%g1 + 0x28]
-
-	/* A membar Sync is required before and after utag access. */
-	membar		#Sync
-	ldxa		[%g2] ASI_DCACHE_UTAG, %g7
-	membar		#Sync
-	stx		%g7, [%g1 + 0x30]
-	ldxa		[%g2] ASI_DCACHE_SNOOP_TAG, %g7
-	stx		%g7, [%g1 + 0x38]
-	clr		%g3
-
-12:	ldxa		[%g2 + %g3] ASI_DCACHE_DATA, %g7
-	stx		%g7, [%g1]
-	add		%g3, (1 << 5), %g3
-	cmp		%g3, (4 << 5)
-	bl,pt		%xcc, 12b
-	 add		%g1, 0x8, %g1
-
-	ba,pt		%xcc, 20f
-	 add		%g1, 0x20, %g1
-
-13:	sethi		%hi(1 << 14), %g7
-	add		%g2, %g7, %g2
-	srlx		%g2, 14, %g7
-	cmp		%g7, 4
-	bl,pt		%xcc, 10b
-	 nop
-
-	add		%g1, 0x40, %g1
-
-	/* %g1 now points to I-cache logging area */
-20:	set		0x1fe0, %g2	/* IC_addr mask		*/
-	and		%g5, %g2, %g2	/* IC_addr bits of AFAR	*/
-	sllx		%g2, 1, %g2	/* IC_addr[13:6]==VA[12:5] */
-	srlx		%g5, (13 - 8), %g3 /* Make PTAG */
-	andn		%g3, 0xff, %g3	/* Mask off undefined bits */
-
-21:	ldxa		[%g2] ASI_IC_TAG, %g7
-	andn		%g7, 0xff, %g7
-	cmp		%g3, %g7
-	bne,pt		%xcc, 23f
-	 nop
-
-	/* Yep, what we want, capture state. */
-	stx		%g2, [%g1 + 0x40]
-	stx		%g7, [%g1 + 0x48]
-	add		%g2, (1 << 3), %g2
-	ldxa		[%g2] ASI_IC_TAG, %g7
-	add		%g2, (1 << 3), %g2
-	stx		%g7, [%g1 + 0x50]
-	ldxa		[%g2] ASI_IC_TAG, %g7
-	add		%g2, (1 << 3), %g2
-	stx		%g7, [%g1 + 0x60]
-	ldxa		[%g2] ASI_IC_TAG, %g7
-	stx		%g7, [%g1 + 0x68]
-	sub		%g2, (3 << 3), %g2
-	ldxa		[%g2] ASI_IC_STAG, %g7
-	stx		%g7, [%g1 + 0x58]
-	clr		%g3
-	srlx		%g2, 2, %g2
-
-22:	ldxa		[%g2 + %g3] ASI_IC_INSTR, %g7
-	stx		%g7, [%g1]
-	add		%g3, (1 << 3), %g3
-	cmp		%g3, (8 << 3)
-	bl,pt		%xcc, 22b
-	 add		%g1, 0x8, %g1
-
-	ba,pt		%xcc, 30f
-	 add		%g1, 0x30, %g1
-
-23:	sethi		%hi(1 << 14), %g7
-	add		%g2, %g7, %g2
-	srlx		%g2, 14, %g7
-	cmp		%g7, 4
-	bl,pt		%xcc, 21b
-	 nop
-
-	add		%g1, 0x70, %g1
-
-	/* %g1 now points to E-cache logging area */
-30:	andn		%g5, (32 - 1), %g2
-	stx		%g2, [%g1 + 0x20]
-	ldxa		[%g2] ASI_EC_TAG_DATA, %g7
-	stx		%g7, [%g1 + 0x28]
-	ldxa		[%g2] ASI_EC_R, %g0
-	clr		%g3
-
-31:	ldxa		[%g3] ASI_EC_DATA, %g7
-	stx		%g7, [%g1 + %g3]
-	add		%g3, 0x8, %g3
-	cmp		%g3, 0x20
-
-	bl,pt		%xcc, 31b
-	 nop
-80:
-	rdpr		%tt, %g2
-	cmp		%g2, 0x70
-	be		c_fast_ecc
-	 cmp		%g2, 0x63
-	be		c_cee
-	 nop
-	ba,a,pt		%xcc, c_deferred
-	.size		__cheetah_log_error,.-__cheetah_log_error
-
-	/* Cheetah FECC trap handling, we get here from tl{0,1}_fecc
-	 * in the trap table.  That code has done a memory barrier
-	 * and has disabled both the I-cache and D-cache in the DCU
-	 * control register.  The I-cache is disabled so that we may
-	 * capture the corrupted cache line, and the D-cache is disabled
-	 * because corrupt data may have been placed there and we don't
-	 * want to reference it.
-	 *
-	 * %g1 is one if this trap occurred at %tl >= 1.
-	 *
-	 * Next, we turn off error reporting so that we don't recurse.
-	 */
-	.globl		cheetah_fast_ecc
-	.type		cheetah_fast_ecc,#function
-cheetah_fast_ecc:
-	ldxa		[%g0] ASI_ESTATE_ERROR_EN, %g2
-	andn		%g2, ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN, %g2
-	stxa		%g2, [%g0] ASI_ESTATE_ERROR_EN
-	membar		#Sync
-
-	/* Fetch and clear AFSR/AFAR */
-	ldxa		[%g0] ASI_AFSR, %g4
-	ldxa		[%g0] ASI_AFAR, %g5
-	stxa		%g4, [%g0] ASI_AFSR
-	membar		#Sync
-
-	ba,pt		%xcc, __cheetah_log_error
-	 nop
-	.size		cheetah_fast_ecc,.-cheetah_fast_ecc
-
-	.type		c_fast_ecc,#function
-c_fast_ecc:
-	rdpr		%pil, %g2
-	wrpr		%g0, PIL_NORMAL_MAX, %pil
-	ba,pt		%xcc, etrap_irq
-	 rd		%pc, %g7
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call		trace_hardirqs_off
-	 nop
-#endif
-	mov		%l4, %o1
-	mov		%l5, %o2
-	call		cheetah_fecc_handler
-	 add		%sp, PTREGS_OFF, %o0
-	ba,a,pt		%xcc, rtrap_irq
-	.size		c_fast_ecc,.-c_fast_ecc
-
-	/* Our caller has disabled I-cache and performed membar Sync. */
-	.globl		cheetah_cee
-	.type		cheetah_cee,#function
-cheetah_cee:
-	ldxa		[%g0] ASI_ESTATE_ERROR_EN, %g2
-	andn		%g2, ESTATE_ERROR_CEEN, %g2
-	stxa		%g2, [%g0] ASI_ESTATE_ERROR_EN
-	membar		#Sync
-
-	/* Fetch and clear AFSR/AFAR */
-	ldxa		[%g0] ASI_AFSR, %g4
-	ldxa		[%g0] ASI_AFAR, %g5
-	stxa		%g4, [%g0] ASI_AFSR
-	membar		#Sync
-
-	ba,pt		%xcc, __cheetah_log_error
-	 nop
-	.size		cheetah_cee,.-cheetah_cee
-
-	.type		c_cee,#function
-c_cee:
-	rdpr		%pil, %g2
-	wrpr		%g0, PIL_NORMAL_MAX, %pil
-	ba,pt		%xcc, etrap_irq
-	 rd		%pc, %g7
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call		trace_hardirqs_off
-	 nop
-#endif
-	mov		%l4, %o1
-	mov		%l5, %o2
-	call		cheetah_cee_handler
-	 add		%sp, PTREGS_OFF, %o0
-	ba,a,pt		%xcc, rtrap_irq
-	.size		c_cee,.-c_cee
-
-	/* Our caller has disabled I-cache+D-cache and performed membar Sync. */
-	.globl		cheetah_deferred_trap
-	.type		cheetah_deferred_trap,#function
-cheetah_deferred_trap:
-	ldxa		[%g0] ASI_ESTATE_ERROR_EN, %g2
-	andn		%g2, ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN, %g2
-	stxa		%g2, [%g0] ASI_ESTATE_ERROR_EN
-	membar		#Sync
-
-	/* Fetch and clear AFSR/AFAR */
-	ldxa		[%g0] ASI_AFSR, %g4
-	ldxa		[%g0] ASI_AFAR, %g5
-	stxa		%g4, [%g0] ASI_AFSR
-	membar		#Sync
-
-	ba,pt		%xcc, __cheetah_log_error
-	 nop
-	.size		cheetah_deferred_trap,.-cheetah_deferred_trap
-
-	.type		c_deferred,#function
-c_deferred:
-	rdpr		%pil, %g2
-	wrpr		%g0, PIL_NORMAL_MAX, %pil
-	ba,pt		%xcc, etrap_irq
-	 rd		%pc, %g7
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call		trace_hardirqs_off
-	 nop
-#endif
-	mov		%l4, %o1
-	mov		%l5, %o2
-	call		cheetah_deferred_handler
-	 add		%sp, PTREGS_OFF, %o0
-	ba,a,pt		%xcc, rtrap_irq
-	.size		c_deferred,.-c_deferred
diff --git a/arch/sparc/kernel/dtlb_miss.S b/arch/sparc/kernel/dtlb_miss.S
deleted file mode 100644
index fb9c788437b0ccb9c0f6dfe0efabe2618906fd35..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/dtlb_miss.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* DTLB ** ICACHE line 1: Context 0 check and TSB load	*/
-	ldxa	[%g0] ASI_DMMU_TSB_8KB_PTR, %g1	! Get TSB 8K pointer
-	ldxa	[%g0] ASI_DMMU, %g6		! Get TAG TARGET
-	srlx	%g6, 48, %g5			! Get context
-	sllx	%g6, 22, %g6			! Zero out context
-	brz,pn	%g5, kvmap_dtlb			! Context 0 processing
-	 srlx	%g6, 22, %g6			! Delay slot
-	TSB_LOAD_QUAD(%g1, %g4)			! Load TSB entry
-	cmp	%g4, %g6			! Compare TAG
-
-/* DTLB ** ICACHE line 2: TSB compare and TLB load	*/
-	bne,pn	%xcc, tsb_miss_dtlb		! Miss
-	 mov	FAULT_CODE_DTLB, %g3
-	stxa	%g5, [%g0] ASI_DTLB_DATA_IN	! Load TLB
-	retry					! Trap done
-	nop
-	nop
-	nop
-	nop
-
-/* DTLB ** ICACHE line 3:				*/
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-/* DTLB ** ICACHE line 4: 				*/
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
diff --git a/arch/sparc/kernel/dtlb_prot.S b/arch/sparc/kernel/dtlb_prot.S
deleted file mode 100644
index 9f945771bbd19139f9b4e6d99552815ba8a1562a..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/dtlb_prot.S
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * dtlb_prot.S: DTLB protection trap strategy.
- *              This is included directly into the trap table.
- *
- * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com)
- * Copyright (C) 1997,1998 Jakub Jelinek   (jj@ultra.linux.cz)
- */
-
-/* Ways we can get here:
- *
- * [TL == 0] 1) User stores to readonly pages.
- * [TL == 0] 2) Nucleus stores to user readonly pages.
- * [TL >  0] 3) Nucleus stores to user readonly stack frame.
- */
-
-/* PROT ** ICACHE line 1: User DTLB protection trap	*/
-	mov		TLB_SFSR, %g1
-	stxa		%g0, [%g1] ASI_DMMU		! Clear FaultValid bit
-	membar		#Sync				! Synchronize stores
-	rdpr		%pstate, %g5			! Move into alt-globals
-	wrpr		%g5, PSTATE_AG|PSTATE_MG, %pstate
-	rdpr		%tl, %g1			! Need a winfixup?
-	cmp		%g1, 1				! Trap level >1?
-	mov		TLB_TAG_ACCESS, %g4		! For reload of vaddr
-
-/* PROT ** ICACHE line 2: More real fault processing */
-	ldxa		[%g4] ASI_DMMU, %g5		! Put tagaccess in %g5
-	srlx		%g5, PAGE_SHIFT, %g5
-	sllx		%g5, PAGE_SHIFT, %g5		! Clear context ID bits
-	bgu,pn		%xcc, winfix_trampoline		! Yes, perform winfixup
-	 mov		FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4
-	ba,pt		%xcc, sparc64_realfault_common	! Nope, normal fault
-	 nop
-	nop
-
-/* PROT ** ICACHE line 3: Unused...	*/
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-/* PROT ** ICACHE line 4: Unused...	*/
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
deleted file mode 100644
index 4d369697332557c9f0a3b141c6f7c4fe48e8d2fd..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/entry.S
+++ /dev/null
@@ -1,1372 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* arch/sparc/kernel/entry.S:  Sparc trap low-level entry points.
- *
- * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net)
- * Copyright (C) 1996 Eddie C. Dost   (ecd@skynet.be)
- * Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx)
- * Copyright (C) 1996-1999 Jakub Jelinek   (jj@sunsite.mff.cuni.cz)
- * Copyright (C) 1997 Anton Blanchard (anton@progsoc.uts.edu.au)
- */
-
-#include <linux/linkage.h>
-#include <linux/errno.h>
-
-#include <asm/head.h>
-#include <asm/asi.h>
-#include <asm/smp.h>
-#include <asm/contregs.h>
-#include <asm/ptrace.h>
-#include <asm/asm-offsets.h>
-#include <asm/psr.h>
-#include <asm/vaddrs.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/winmacro.h>
-#include <asm/signal.h>
-#include <asm/obio.h>
-#include <asm/mxcc.h>
-#include <asm/thread_info.h>
-#include <asm/param.h>
-#include <asm/unistd.h>
-
-#include <asm/asmmacro.h>
-#include <asm/export.h>
-
-#define curptr      g6
-
-/* These are just handy. */
-#define _SV	save	%sp, -STACKFRAME_SZ, %sp
-#define _RS     restore 
-
-#define FLUSH_ALL_KERNEL_WINDOWS \
-	_SV; _SV; _SV; _SV; _SV; _SV; _SV; \
-	_RS; _RS; _RS; _RS; _RS; _RS; _RS;
-
-	.text
-
-#ifdef CONFIG_KGDB
-	.align	4
-	.globl		arch_kgdb_breakpoint
-	.type		arch_kgdb_breakpoint,#function
-arch_kgdb_breakpoint:
-	ta		0x7d
-	retl
-	 nop
-	.size		arch_kgdb_breakpoint,.-arch_kgdb_breakpoint
-#endif
-
-#if defined(CONFIG_BLK_DEV_FD) || defined(CONFIG_BLK_DEV_FD_MODULE)
-	.align	4
-	.globl	floppy_hardint
-floppy_hardint:
-	/*
-	 * This code cannot touch registers %l0 %l1 and %l2
-	 * because SAVE_ALL depends on their values. It depends
-	 * on %l3 also, but we regenerate it before a call.
-	 * Other registers are:
-	 * %l3 -- base address of fdc registers
-	 * %l4 -- pdma_vaddr
-	 * %l5 -- scratch for ld/st address
-	 * %l6 -- pdma_size
-	 * %l7 -- scratch [floppy byte, ld/st address, aux. data]
-	 */
-
-	/* Do we have work to do? */
-	sethi	%hi(doing_pdma), %l7
-	ld	[%l7 + %lo(doing_pdma)], %l7
-	cmp	%l7, 0
-	be	floppy_dosoftint
-	 nop
-
-	/* Load fdc register base */
-	sethi	%hi(fdc_status), %l3
-	ld	[%l3 + %lo(fdc_status)], %l3
-
-	/* Setup register addresses */
-	sethi	%hi(pdma_vaddr), %l5	! transfer buffer
-	ld	[%l5 + %lo(pdma_vaddr)], %l4
-	sethi	%hi(pdma_size), %l5	! bytes to go
-	ld	[%l5 + %lo(pdma_size)], %l6
-next_byte:
-  	ldub	[%l3], %l7
-
-	andcc	%l7, 0x80, %g0		! Does fifo still have data
-	bz	floppy_fifo_emptied	! fifo has been emptied...
-	 andcc	%l7, 0x20, %g0		! in non-dma mode still?
-	bz	floppy_overrun		! nope, overrun
-	 andcc	%l7, 0x40, %g0		! 0=write 1=read
-	bz	floppy_write
-	 sub	%l6, 0x1, %l6
-
-	/* Ok, actually read this byte */
-	ldub	[%l3 + 1], %l7
-	orcc	%g0, %l6, %g0
-	stb	%l7, [%l4]
-	bne	next_byte
-	 add	%l4, 0x1, %l4
-
-	b	floppy_tdone
-	 nop
-
-floppy_write:
-	/* Ok, actually write this byte */
-	ldub	[%l4], %l7
-	orcc	%g0, %l6, %g0
-	stb	%l7, [%l3 + 1]
-	bne	next_byte
-	 add	%l4, 0x1, %l4
-
-	/* fall through... */
-floppy_tdone:
-	sethi	%hi(pdma_vaddr), %l5
-	st	%l4, [%l5 + %lo(pdma_vaddr)]
-	sethi	%hi(pdma_size), %l5
-	st	%l6, [%l5 + %lo(pdma_size)]
-	/* Flip terminal count pin */
-	set	auxio_register, %l7
-	ld	[%l7], %l7
-
-	ldub	[%l7], %l5
-
-	or	%l5, 0xc2, %l5
-	stb	%l5, [%l7]
-	andn    %l5, 0x02, %l5
-
-2:
-	/* Kill some time so the bits set */
-	WRITE_PAUSE
-	WRITE_PAUSE
-
-	stb     %l5, [%l7]
-
-	/* Prevent recursion */
-	sethi	%hi(doing_pdma), %l7
-	b	floppy_dosoftint
-	 st	%g0, [%l7 + %lo(doing_pdma)]
-
-	/* We emptied the FIFO, but we haven't read everything
-	 * as of yet.  Store the current transfer address and
-	 * bytes left to read so we can continue when the next
-	 * fast IRQ comes in.
-	 */
-floppy_fifo_emptied:
-	sethi	%hi(pdma_vaddr), %l5
-	st	%l4, [%l5 + %lo(pdma_vaddr)]
-	sethi	%hi(pdma_size), %l7
-	st	%l6, [%l7 + %lo(pdma_size)]
-
-	/* Restore condition codes */
-	wr	%l0, 0x0, %psr
-	WRITE_PAUSE
-
-	jmp	%l1
-	rett	%l2
-
-floppy_overrun:
-	sethi	%hi(pdma_vaddr), %l5
-	st	%l4, [%l5 + %lo(pdma_vaddr)]
-	sethi	%hi(pdma_size), %l5
-	st	%l6, [%l5 + %lo(pdma_size)]
-	/* Prevent recursion */
-	sethi	%hi(doing_pdma), %l7
-	st	%g0, [%l7 + %lo(doing_pdma)]
-
-	/* fall through... */
-floppy_dosoftint:
-	rd	%wim, %l3
-	SAVE_ALL
-
-	/* Set all IRQs off. */
-	or	%l0, PSR_PIL, %l4
-	wr	%l4, 0x0, %psr
-	WRITE_PAUSE
-	wr	%l4, PSR_ET, %psr
-	WRITE_PAUSE
-
-	mov	11, %o0			! floppy irq level (unused anyway)
-	mov	%g0, %o1		! devid is not used in fast interrupts
-	call	sparc_floppy_irq
-	 add	%sp, STACKFRAME_SZ, %o2	! struct pt_regs *regs
-
-	RESTORE_ALL
-	
-#endif /* (CONFIG_BLK_DEV_FD) */
-
-	/* Bad trap handler */
-	.globl	bad_trap_handler
-bad_trap_handler:
-	SAVE_ALL
-
-	wr	%l0, PSR_ET, %psr
-	WRITE_PAUSE
-
-	add	%sp, STACKFRAME_SZ, %o0	! pt_regs
-	call	do_hw_interrupt
-	 mov	%l7, %o1		! trap number
-
-	RESTORE_ALL
-	
-/* For now all IRQ's not registered get sent here. handler_irq() will
- * see if a routine is registered to handle this interrupt and if not
- * it will say so on the console.
- */
-
-	.align	4
-	.globl	real_irq_entry, patch_handler_irq
-real_irq_entry:
-	SAVE_ALL
-
-#ifdef CONFIG_SMP
-	.globl	patchme_maybe_smp_msg
-
-	cmp	%l7, 11
-patchme_maybe_smp_msg:
-	bgu	maybe_smp4m_msg
-	 nop
-#endif
-
-real_irq_continue:
-	or	%l0, PSR_PIL, %g2
-	wr	%g2, 0x0, %psr
-	WRITE_PAUSE
-	wr	%g2, PSR_ET, %psr
-	WRITE_PAUSE
-	mov	%l7, %o0		! irq level
-patch_handler_irq:
-	call	handler_irq
-	 add	%sp, STACKFRAME_SZ, %o1	! pt_regs ptr
-	or	%l0, PSR_PIL, %g2	! restore PIL after handler_irq
-	wr	%g2, PSR_ET, %psr	! keep ET up
-	WRITE_PAUSE
-
-	RESTORE_ALL
-
-#ifdef CONFIG_SMP
-	/* SMP per-cpu ticker interrupts are handled specially. */
-smp4m_ticker:
-	bne	real_irq_continue+4
-	 or	%l0, PSR_PIL, %g2
-	wr	%g2, 0x0, %psr
-	WRITE_PAUSE
-	wr	%g2, PSR_ET, %psr
-	WRITE_PAUSE
-	call	smp4m_percpu_timer_interrupt
-	 add	%sp, STACKFRAME_SZ, %o0
-	wr	%l0, PSR_ET, %psr
-	WRITE_PAUSE
-	RESTORE_ALL
-
-#define GET_PROCESSOR4M_ID(reg)	\
-	rd	%tbr, %reg;	\
-	srl	%reg, 12, %reg;	\
-	and	%reg, 3, %reg;
-
-	/* Here is where we check for possible SMP IPI passed to us
-	 * on some level other than 15 which is the NMI and only used
-	 * for cross calls.  That has a separate entry point below.
-	 *
-	 * IPIs are sent on Level 12, 13 and 14. See IRQ_IPI_*.
-	 */
-maybe_smp4m_msg:
-	GET_PROCESSOR4M_ID(o3)
-	sethi	%hi(sun4m_irq_percpu), %l5
-	sll	%o3, 2, %o3
-	or	%l5, %lo(sun4m_irq_percpu), %o5
-	sethi	%hi(0x70000000), %o2	! Check all soft-IRQs
-	ld	[%o5 + %o3], %o1
-	ld	[%o1 + 0x00], %o3	! sun4m_irq_percpu[cpu]->pending
-	andcc	%o3, %o2, %g0
-	be,a	smp4m_ticker
-	 cmp	%l7, 14
-	/* Soft-IRQ IPI */
-	st	%o2, [%o1 + 0x04]	! sun4m_irq_percpu[cpu]->clear=0x70000000
-	WRITE_PAUSE
-	ld	[%o1 + 0x00], %g0	! sun4m_irq_percpu[cpu]->pending
-	WRITE_PAUSE
-	or	%l0, PSR_PIL, %l4
-	wr	%l4, 0x0, %psr
-	WRITE_PAUSE
-	wr	%l4, PSR_ET, %psr
-	WRITE_PAUSE
-	srl	%o3, 28, %o2		! shift for simpler checks below
-maybe_smp4m_msg_check_single:
-	andcc	%o2, 0x1, %g0
-	beq,a	maybe_smp4m_msg_check_mask
-	 andcc	%o2, 0x2, %g0
-	call	smp_call_function_single_interrupt
-	 nop
-	andcc	%o2, 0x2, %g0
-maybe_smp4m_msg_check_mask:
-	beq,a	maybe_smp4m_msg_check_resched
-	 andcc	%o2, 0x4, %g0
-	call	smp_call_function_interrupt
-	 nop
-	andcc	%o2, 0x4, %g0
-maybe_smp4m_msg_check_resched:
-	/* rescheduling is done in RESTORE_ALL regardless, but incr stats */
-	beq,a	maybe_smp4m_msg_out
-	 nop
-	call	smp_resched_interrupt
-	 nop
-maybe_smp4m_msg_out:
-	RESTORE_ALL
-
-	.align	4
-	.globl	linux_trap_ipi15_sun4m
-linux_trap_ipi15_sun4m:
-	SAVE_ALL
-	sethi	%hi(0x80000000), %o2
-	GET_PROCESSOR4M_ID(o0)
-	sethi	%hi(sun4m_irq_percpu), %l5
-	or	%l5, %lo(sun4m_irq_percpu), %o5
-	sll	%o0, 2, %o0
-	ld	[%o5 + %o0], %o5
-	ld	[%o5 + 0x00], %o3	! sun4m_irq_percpu[cpu]->pending
-	andcc	%o3, %o2, %g0
-	be	sun4m_nmi_error		! Must be an NMI async memory error
-	 st	%o2, [%o5 + 0x04]	! sun4m_irq_percpu[cpu]->clear=0x80000000
-	WRITE_PAUSE
-	ld	[%o5 + 0x00], %g0	! sun4m_irq_percpu[cpu]->pending
-	WRITE_PAUSE
-	or	%l0, PSR_PIL, %l4
-	wr	%l4, 0x0, %psr
-	WRITE_PAUSE
-	wr	%l4, PSR_ET, %psr
-	WRITE_PAUSE
-	call	smp4m_cross_call_irq
-	 nop
-	b	ret_trap_lockless_ipi
-	 clr	%l6
-
-	.globl	smp4d_ticker
-	/* SMP per-cpu ticker interrupts are handled specially. */
-smp4d_ticker:
-	SAVE_ALL
-	or	%l0, PSR_PIL, %g2
-	sethi	%hi(CC_ICLR), %o0
-	sethi	%hi(1 << 14), %o1
-	or	%o0, %lo(CC_ICLR), %o0
-	stha	%o1, [%o0] ASI_M_MXCC	/* Clear PIL 14 in MXCC's ICLR */
-	wr	%g2, 0x0, %psr
-	WRITE_PAUSE
-	wr	%g2, PSR_ET, %psr
-	WRITE_PAUSE
-	call	smp4d_percpu_timer_interrupt
-	 add	%sp, STACKFRAME_SZ, %o0
-	wr	%l0, PSR_ET, %psr
-	WRITE_PAUSE
-	RESTORE_ALL
-
-	.align	4
-	.globl	linux_trap_ipi15_sun4d
-linux_trap_ipi15_sun4d:
-	SAVE_ALL
-	sethi	%hi(CC_BASE), %o4
-	sethi	%hi(MXCC_ERR_ME|MXCC_ERR_PEW|MXCC_ERR_ASE|MXCC_ERR_PEE), %o2
-	or	%o4, (CC_EREG - CC_BASE), %o0
-	ldda	[%o0] ASI_M_MXCC, %o0
-	andcc	%o0, %o2, %g0
-	bne	1f
-	 sethi	%hi(BB_STAT2), %o2
-	lduba	[%o2] ASI_M_CTL, %o2
-	andcc	%o2, BB_STAT2_MASK, %g0
-	bne	2f
-	 or	%o4, (CC_ICLR - CC_BASE), %o0
-	sethi	%hi(1 << 15), %o1
-	stha	%o1, [%o0] ASI_M_MXCC	/* Clear PIL 15 in MXCC's ICLR */
-	or	%l0, PSR_PIL, %l4
-	wr	%l4, 0x0, %psr
-	WRITE_PAUSE
-	wr	%l4, PSR_ET, %psr
-	WRITE_PAUSE
-	call	smp4d_cross_call_irq
-	 nop
-	b	ret_trap_lockless_ipi
-	 clr	%l6
-
-1:	/* MXCC error */
-2:	/* BB error */
-	/* Disable PIL 15 */
-	set	CC_IMSK, %l4
-	lduha	[%l4] ASI_M_MXCC, %l5
-	sethi	%hi(1 << 15), %l7
-	or	%l5, %l7, %l5
-	stha	%l5, [%l4] ASI_M_MXCC
-	/* FIXME */
-1:	b,a	1b
-
-	.globl	smpleon_ipi
-	.extern leon_ipi_interrupt
-	/* SMP per-cpu IPI interrupts are handled specially. */
-smpleon_ipi:
-        SAVE_ALL
-	or	%l0, PSR_PIL, %g2
-	wr	%g2, 0x0, %psr
-	WRITE_PAUSE
-	wr	%g2, PSR_ET, %psr
-	WRITE_PAUSE
-	call	leonsmp_ipi_interrupt
-	 add	%sp, STACKFRAME_SZ, %o1 ! pt_regs
-	wr	%l0, PSR_ET, %psr
-	WRITE_PAUSE
-	RESTORE_ALL
-
-	.align	4
-	.globl	linux_trap_ipi15_leon
-linux_trap_ipi15_leon:
-	SAVE_ALL
-	or	%l0, PSR_PIL, %l4
-	wr	%l4, 0x0, %psr
-	WRITE_PAUSE
-	wr	%l4, PSR_ET, %psr
-	WRITE_PAUSE
-	call	leon_cross_call_irq
-	 nop
-	b	ret_trap_lockless_ipi
-	 clr	%l6
-
-#endif /* CONFIG_SMP */
-
-	/* This routine handles illegal instructions and privileged
-	 * instruction attempts from user code.
-	 */
-	.align	4
-	.globl	bad_instruction
-bad_instruction:
-	sethi	%hi(0xc1f80000), %l4
-	ld	[%l1], %l5
-	sethi	%hi(0x81d80000), %l7
-	and	%l5, %l4, %l5
-	cmp	%l5, %l7
-	be	1f
-	SAVE_ALL
-
-	wr	%l0, PSR_ET, %psr		! re-enable traps
-	WRITE_PAUSE
-
-	add	%sp, STACKFRAME_SZ, %o0
-	mov	%l1, %o1
-	mov	%l2, %o2
-	call	do_illegal_instruction
-	 mov	%l0, %o3
-
-	RESTORE_ALL
-
-1:	/* unimplemented flush - just skip */
-	jmpl	%l2, %g0
-	 rett	%l2 + 4
-
-	.align	4
-	.globl	priv_instruction
-priv_instruction:
-	SAVE_ALL
-
-	wr	%l0, PSR_ET, %psr
-	WRITE_PAUSE
-
-	add	%sp, STACKFRAME_SZ, %o0
-	mov	%l1, %o1
-	mov	%l2, %o2
-	call	do_priv_instruction
-	 mov	%l0, %o3
-
-	RESTORE_ALL
-
-	/* This routine handles unaligned data accesses. */
-	.align	4
-	.globl	mna_handler
-mna_handler:
-	andcc	%l0, PSR_PS, %g0
-	be	mna_fromuser
-	 nop
-
-	SAVE_ALL
-
-	wr	%l0, PSR_ET, %psr
-	WRITE_PAUSE
-
-	ld	[%l1], %o1
-	call	kernel_unaligned_trap
-	 add	%sp, STACKFRAME_SZ, %o0
-
-	RESTORE_ALL
-
-mna_fromuser:
-	SAVE_ALL
-
-	wr	%l0, PSR_ET, %psr		! re-enable traps
-	WRITE_PAUSE
-
-	ld	[%l1], %o1
-	call	user_unaligned_trap
-	 add	%sp, STACKFRAME_SZ, %o0
-
-	RESTORE_ALL
-
-	/* This routine handles floating point disabled traps. */
-	.align	4
-	.globl	fpd_trap_handler
-fpd_trap_handler:
-	SAVE_ALL
-
-	wr	%l0, PSR_ET, %psr		! re-enable traps
-	WRITE_PAUSE
-
-	add	%sp, STACKFRAME_SZ, %o0
-	mov	%l1, %o1
-	mov	%l2, %o2
-	call	do_fpd_trap
-	 mov	%l0, %o3
-
-	RESTORE_ALL
-
-	/* This routine handles Floating Point Exceptions. */
-	.align	4
-	.globl	fpe_trap_handler
-fpe_trap_handler:
-	set	fpsave_magic, %l5
-	cmp	%l1, %l5
-	be	1f
-	 sethi	%hi(fpsave), %l5
-	or	%l5, %lo(fpsave), %l5
-	cmp	%l1, %l5
-	bne	2f
-	 sethi	%hi(fpsave_catch2), %l5
-	or	%l5, %lo(fpsave_catch2), %l5
-	wr	%l0, 0x0, %psr
-	WRITE_PAUSE
-	jmp	%l5
-	 rett	%l5 + 4
-1:	
-	sethi	%hi(fpsave_catch), %l5
-	or	%l5, %lo(fpsave_catch), %l5
-	wr	%l0, 0x0, %psr
-	WRITE_PAUSE
-	jmp	%l5
-	 rett	%l5 + 4
-
-2:
-	SAVE_ALL
-
-	wr	%l0, PSR_ET, %psr		! re-enable traps
-	WRITE_PAUSE
-
-	add	%sp, STACKFRAME_SZ, %o0
-	mov	%l1, %o1
-	mov	%l2, %o2
-	call	do_fpe_trap
-	 mov	%l0, %o3
-
-	RESTORE_ALL
-
-	/* This routine handles Tag Overflow Exceptions. */
-	.align	4
-	.globl	do_tag_overflow
-do_tag_overflow:
-	SAVE_ALL
-
-	wr	%l0, PSR_ET, %psr		! re-enable traps
-	WRITE_PAUSE
-
-	add	%sp, STACKFRAME_SZ, %o0
-	mov	%l1, %o1
-	mov	%l2, %o2
-	call	handle_tag_overflow
-	 mov	%l0, %o3
-
-	RESTORE_ALL
-
-	/* This routine handles Watchpoint Exceptions. */
-	.align	4
-	.globl	do_watchpoint
-do_watchpoint:
-	SAVE_ALL
-
-	wr	%l0, PSR_ET, %psr		! re-enable traps
-	WRITE_PAUSE
-
-	add	%sp, STACKFRAME_SZ, %o0
-	mov	%l1, %o1
-	mov	%l2, %o2
-	call	handle_watchpoint
-	 mov	%l0, %o3
-
-	RESTORE_ALL
-
-	/* This routine handles Register Access Exceptions. */
-	.align	4
-	.globl	do_reg_access
-do_reg_access:
-	SAVE_ALL
-
-	wr	%l0, PSR_ET, %psr		! re-enable traps
-	WRITE_PAUSE
-
-	add	%sp, STACKFRAME_SZ, %o0
-	mov	%l1, %o1
-	mov	%l2, %o2
-	call	handle_reg_access
-	 mov	%l0, %o3
-
-	RESTORE_ALL
-
-	/* This routine handles Co-Processor Disabled Exceptions. */
-	.align	4
-	.globl	do_cp_disabled
-do_cp_disabled:
-	SAVE_ALL
-
-	wr	%l0, PSR_ET, %psr		! re-enable traps
-	WRITE_PAUSE
-
-	add	%sp, STACKFRAME_SZ, %o0
-	mov	%l1, %o1
-	mov	%l2, %o2
-	call	handle_cp_disabled
-	 mov	%l0, %o3
-
-	RESTORE_ALL
-
-	/* This routine handles Co-Processor Exceptions. */
-	.align	4
-	.globl	do_cp_exception
-do_cp_exception:
-	SAVE_ALL
-
-	wr	%l0, PSR_ET, %psr		! re-enable traps
-	WRITE_PAUSE
-
-	add	%sp, STACKFRAME_SZ, %o0
-	mov	%l1, %o1
-	mov	%l2, %o2
-	call	handle_cp_exception
-	 mov	%l0, %o3
-
-	RESTORE_ALL
-
-	/* This routine handles Hardware Divide By Zero Exceptions. */
-	.align	4
-	.globl	do_hw_divzero
-do_hw_divzero:
-	SAVE_ALL
-
-	wr	%l0, PSR_ET, %psr		! re-enable traps
-	WRITE_PAUSE
-
-	add	%sp, STACKFRAME_SZ, %o0
-	mov	%l1, %o1
-	mov	%l2, %o2
-	call	handle_hw_divzero
-	 mov	%l0, %o3
-
-	RESTORE_ALL
-
-	.align	4
-	.globl	do_flush_windows
-do_flush_windows:
-	SAVE_ALL
-
-	wr	%l0, PSR_ET, %psr
-	WRITE_PAUSE
-
-	andcc	%l0, PSR_PS, %g0
-	bne	dfw_kernel
-	 nop
-
-	call	flush_user_windows
-	 nop
-
-	/* Advance over the trap instruction. */
-	ld	[%sp + STACKFRAME_SZ + PT_NPC], %l1
-	add	%l1, 0x4, %l2
-	st	%l1, [%sp + STACKFRAME_SZ + PT_PC]
-	st	%l2, [%sp + STACKFRAME_SZ + PT_NPC]
-
-	RESTORE_ALL
-
-	.globl	flush_patch_one
-
-	/* We get these for debugging routines using __builtin_return_address() */
-dfw_kernel:
-flush_patch_one:
-	FLUSH_ALL_KERNEL_WINDOWS
-
-	/* Advance over the trap instruction. */
-	ld	[%sp + STACKFRAME_SZ + PT_NPC], %l1
-	add	%l1, 0x4, %l2
-	st	%l1, [%sp + STACKFRAME_SZ + PT_PC]
-	st	%l2, [%sp + STACKFRAME_SZ + PT_NPC]
-
-	RESTORE_ALL
-
-	/* The getcc software trap.  The user wants the condition codes from
-	 * the %psr in register %g1.
-	 */
-
-	.align	4
-	.globl	getcc_trap_handler
-getcc_trap_handler:
-	srl	%l0, 20, %g1	! give user
-	and	%g1, 0xf, %g1	! only ICC bits in %psr
-	jmp	%l2		! advance over trap instruction
-	rett	%l2 + 0x4	! like this...
-
-	/* The setcc software trap.  The user has condition codes in %g1
-	 * that it would like placed in the %psr.  Be careful not to flip
-	 * any unintentional bits!
-	 */
-
-	.align	4
-	.globl	setcc_trap_handler
-setcc_trap_handler:
-	sll	%g1, 0x14, %l4
-	set	PSR_ICC, %l5
-	andn	%l0, %l5, %l0	! clear ICC bits in %psr
-	and	%l4, %l5, %l4	! clear non-ICC bits in user value
-	or	%l4, %l0, %l4	! or them in... mix mix mix
-
-	wr	%l4, 0x0, %psr	! set new %psr
-	WRITE_PAUSE		! TI scumbags...
-
-	jmp	%l2		! advance over trap instruction
-	rett	%l2 + 0x4	! like this...
-
-sun4m_nmi_error:
-	/* NMI async memory error handling. */
-	sethi	%hi(0x80000000), %l4
-	sethi	%hi(sun4m_irq_global), %o5
-	ld	[%o5 + %lo(sun4m_irq_global)], %l5
-	st	%l4, [%l5 + 0x0c]	! sun4m_irq_global->mask_set=0x80000000
-	WRITE_PAUSE
-	ld	[%l5 + 0x00], %g0	! sun4m_irq_global->pending
-	WRITE_PAUSE
-	or	%l0, PSR_PIL, %l4
-	wr	%l4, 0x0, %psr
-	WRITE_PAUSE
-	wr	%l4, PSR_ET, %psr
-	WRITE_PAUSE
-	call	sun4m_nmi
-	 nop
-	st	%l4, [%l5 + 0x08]	! sun4m_irq_global->mask_clear=0x80000000
-	WRITE_PAUSE
-	ld	[%l5 + 0x00], %g0	! sun4m_irq_global->pending
-	WRITE_PAUSE
-	RESTORE_ALL
-
-#ifndef CONFIG_SMP
-	.align	4
-	.globl	linux_trap_ipi15_sun4m
-linux_trap_ipi15_sun4m:
-	SAVE_ALL
-
-	ba	sun4m_nmi_error
-	 nop
-#endif /* CONFIG_SMP */
-
-	.align	4
-	.globl	srmmu_fault
-srmmu_fault:
-	mov	0x400, %l5
-	mov	0x300, %l4
-
-LEON_PI(lda	[%l5] ASI_LEON_MMUREGS, %l6)	! read sfar first
-SUN_PI_(lda	[%l5] ASI_M_MMUREGS, %l6)	! read sfar first
-
-LEON_PI(lda	[%l4] ASI_LEON_MMUREGS, %l5)	! read sfsr last
-SUN_PI_(lda	[%l4] ASI_M_MMUREGS, %l5)	! read sfsr last
-
-	andn	%l6, 0xfff, %l6
-	srl	%l5, 6, %l5			! and encode all info into l7
-
-	and	%l5, 2, %l5
-	or	%l5, %l6, %l6
-
-	or	%l6, %l7, %l7			! l7 = [addr,write,txtfault]
-
-	SAVE_ALL
-
-	mov	%l7, %o1
-	mov	%l7, %o2
-	and	%o1, 1, %o1		! arg2 = text_faultp
-	mov	%l7, %o3
-	and	%o2, 2, %o2		! arg3 = writep
-	andn	%o3, 0xfff, %o3		! arg4 = faulting address
-
-	wr	%l0, PSR_ET, %psr
-	WRITE_PAUSE
-
-	call	do_sparc_fault
-	 add	%sp, STACKFRAME_SZ, %o0	! arg1 = pt_regs ptr
-
-	RESTORE_ALL
-
-	.align	4
-sunos_execv:
-	.globl	sunos_execv
-	b	sys_execve
-	 clr	%i2
-
-	.align	4
-	.globl	sys_sigstack
-sys_sigstack:
-	mov	%o7, %l5
-	mov	%fp, %o2
-	call	do_sys_sigstack
-	 mov	%l5, %o7
-
-	.align	4
-	.globl	sys_sigreturn
-sys_sigreturn:
-	call	do_sigreturn
-	 add	%sp, STACKFRAME_SZ, %o0
-
-	ld	[%curptr + TI_FLAGS], %l5
-	andcc	%l5, _TIF_SYSCALL_TRACE, %g0
-	be	1f
-	 nop
-
-	call	syscall_trace
-	 mov	1, %o1
-
-1:
-	/* We don't want to muck with user registers like a
-	 * normal syscall, just return.
-	 */
-	RESTORE_ALL
-
-	.align	4
-	.globl	sys_rt_sigreturn
-sys_rt_sigreturn:
-	call	do_rt_sigreturn
-	 add	%sp, STACKFRAME_SZ, %o0
-
-	ld	[%curptr + TI_FLAGS], %l5
-	andcc	%l5, _TIF_SYSCALL_TRACE, %g0
-	be	1f
-	 nop
-
-	add	%sp, STACKFRAME_SZ, %o0
-	call	syscall_trace
-	 mov	1, %o1
-
-1:
-	/* We are returning to a signal handler. */
-	RESTORE_ALL
-
-	/* Now that we have a real sys_clone, sys_fork() is
-	 * implemented in terms of it.  Our _real_ implementation
-	 * of SunOS vfork() will use sys_vfork().
-	 *
-	 * XXX These three should be consolidated into mostly shared
-	 * XXX code just like on sparc64... -DaveM
-	 */
-	.align	4
-	.globl	sys_fork, flush_patch_two
-sys_fork:
-	mov	%o7, %l5
-flush_patch_two:
-	FLUSH_ALL_KERNEL_WINDOWS;
-	ld	[%curptr + TI_TASK], %o4
-	rd	%psr, %g4
-	WRITE_PAUSE
-	mov	SIGCHLD, %o0			! arg0:	clone flags
-	rd	%wim, %g5
-	WRITE_PAUSE
-	mov	%fp, %o1			! arg1:	usp
-	std	%g4, [%o4 + AOFF_task_thread + AOFF_thread_fork_kpsr]
-	add	%sp, STACKFRAME_SZ, %o2		! arg2:	pt_regs ptr
-	mov	0, %o3
-	call	sparc_do_fork
-	 mov	%l5, %o7
-
-	/* Whee, kernel threads! */
-	.globl	sys_clone, flush_patch_three
-sys_clone:
-	mov	%o7, %l5
-flush_patch_three:
-	FLUSH_ALL_KERNEL_WINDOWS;
-	ld	[%curptr + TI_TASK], %o4
-	rd	%psr, %g4
-	WRITE_PAUSE
-
-	/* arg0,1: flags,usp  -- loaded already */
-	cmp	%o1, 0x0			! Is new_usp NULL?
-	rd	%wim, %g5
-	WRITE_PAUSE
-	be,a	1f
-	 mov	%fp, %o1			! yes, use callers usp
-	andn	%o1, 7, %o1			! no, align to 8 bytes
-1:
-	std	%g4, [%o4 + AOFF_task_thread + AOFF_thread_fork_kpsr]
-	add	%sp, STACKFRAME_SZ, %o2		! arg2:	pt_regs ptr
-	mov	0, %o3
-	call	sparc_do_fork
-	 mov	%l5, %o7
-
-	/* Whee, real vfork! */
-	.globl	sys_vfork, flush_patch_four
-sys_vfork:
-flush_patch_four:
-	FLUSH_ALL_KERNEL_WINDOWS;
-	ld	[%curptr + TI_TASK], %o4
-	rd	%psr, %g4
-	WRITE_PAUSE
-	rd	%wim, %g5
-	WRITE_PAUSE
-	std	%g4, [%o4 + AOFF_task_thread + AOFF_thread_fork_kpsr]
-	sethi	%hi(0x4000 | 0x0100 | SIGCHLD), %o0
-	mov	%fp, %o1
-	or	%o0, %lo(0x4000 | 0x0100 | SIGCHLD), %o0
-	sethi	%hi(sparc_do_fork), %l1
-	mov	0, %o3
-	jmpl	%l1 + %lo(sparc_do_fork), %g0
-	 add	%sp, STACKFRAME_SZ, %o2
-
-        .align  4
-linux_sparc_ni_syscall:
-	sethi   %hi(sys_ni_syscall), %l7
-	b       do_syscall
-	 or     %l7, %lo(sys_ni_syscall), %l7
-
-linux_syscall_trace:
-	add	%sp, STACKFRAME_SZ, %o0
-	call	syscall_trace
-	 mov	0, %o1
-	cmp	%o0, 0
-	bne	3f
-	 mov	-ENOSYS, %o0
-
-	/* Syscall tracing can modify the registers.  */
-	ld	[%sp + STACKFRAME_SZ + PT_G1], %g1
-	sethi	%hi(sys_call_table), %l7
-	ld	[%sp + STACKFRAME_SZ + PT_I0], %i0
-	or	%l7, %lo(sys_call_table), %l7
-	ld	[%sp + STACKFRAME_SZ + PT_I1], %i1
-	ld	[%sp + STACKFRAME_SZ + PT_I2], %i2
-	ld	[%sp + STACKFRAME_SZ + PT_I3], %i3
-	ld	[%sp + STACKFRAME_SZ + PT_I4], %i4
-	ld	[%sp + STACKFRAME_SZ + PT_I5], %i5
-	cmp	%g1, NR_syscalls
-	bgeu	3f
-	 mov	-ENOSYS, %o0
-
-	sll	%g1, 2, %l4
-	mov	%i0, %o0
-	ld	[%l7 + %l4], %l7
-	mov	%i1, %o1
-	mov	%i2, %o2
-	mov	%i3, %o3
-	b	2f
-	 mov	%i4, %o4
-
-	.globl	ret_from_fork
-ret_from_fork:
-	call	schedule_tail
-	 ld	[%g3 + TI_TASK], %o0
-	b	ret_sys_call
-	 ld	[%sp + STACKFRAME_SZ + PT_I0], %o0
-
-	.globl	ret_from_kernel_thread
-ret_from_kernel_thread:
-	call	schedule_tail
-	 ld	[%g3 + TI_TASK], %o0
-	ld	[%sp + STACKFRAME_SZ + PT_G1], %l0
-	call	%l0
-	 ld	[%sp + STACKFRAME_SZ + PT_G2], %o0
-	rd	%psr, %l1
-	ld	[%sp + STACKFRAME_SZ + PT_PSR], %l0
-	andn	%l0, PSR_CWP, %l0
-	nop
-	and	%l1, PSR_CWP, %l1
-	or	%l0, %l1, %l0
-	st	%l0, [%sp + STACKFRAME_SZ + PT_PSR]
-	b	ret_sys_call
-	 mov	0, %o0
-
-	/* Linux native system calls enter here... */
-	.align	4
-	.globl	linux_sparc_syscall
-linux_sparc_syscall:
-	sethi	%hi(PSR_SYSCALL), %l4
-	or	%l0, %l4, %l0
-	/* Direct access to user regs, must faster. */
-	cmp	%g1, NR_syscalls
-	bgeu	linux_sparc_ni_syscall
-	 sll	%g1, 2, %l4
-	ld	[%l7 + %l4], %l7
-
-do_syscall:
-	SAVE_ALL_HEAD
-	 rd	%wim, %l3
-
-	wr	%l0, PSR_ET, %psr
-	mov	%i0, %o0
-	mov	%i1, %o1
-	mov	%i2, %o2
-
-	ld	[%curptr + TI_FLAGS], %l5
-	mov	%i3, %o3
-	andcc	%l5, _TIF_SYSCALL_TRACE, %g0
-	mov	%i4, %o4
-	bne	linux_syscall_trace
-	 mov	%i0, %l5
-2:
-	call	%l7
-	 mov	%i5, %o5
-
-3:
-	st	%o0, [%sp + STACKFRAME_SZ + PT_I0]
-
-ret_sys_call:
-	ld	[%curptr + TI_FLAGS], %l6
-	cmp	%o0, -ERESTART_RESTARTBLOCK
-	ld	[%sp + STACKFRAME_SZ + PT_PSR], %g3
-	set	PSR_C, %g2
-	bgeu	1f
-	 andcc	%l6, _TIF_SYSCALL_TRACE, %g0
-
-	/* System call success, clear Carry condition code. */
-	andn	%g3, %g2, %g3
-	clr	%l6
-	st	%g3, [%sp + STACKFRAME_SZ + PT_PSR]	
-	bne	linux_syscall_trace2
-	 ld	[%sp + STACKFRAME_SZ + PT_NPC], %l1 /* pc = npc */
-	add	%l1, 0x4, %l2			/* npc = npc+4 */
-	st	%l1, [%sp + STACKFRAME_SZ + PT_PC]
-	b	ret_trap_entry
-	 st	%l2, [%sp + STACKFRAME_SZ + PT_NPC]
-1:
-	/* System call failure, set Carry condition code.
-	 * Also, get abs(errno) to return to the process.
-	 */
-	sub	%g0, %o0, %o0
-	or	%g3, %g2, %g3
-	st	%o0, [%sp + STACKFRAME_SZ + PT_I0]
-	mov	1, %l6
-	st	%g3, [%sp + STACKFRAME_SZ + PT_PSR]
-	bne	linux_syscall_trace2
-	 ld	[%sp + STACKFRAME_SZ + PT_NPC], %l1 /* pc = npc */
-	add	%l1, 0x4, %l2			/* npc = npc+4 */
-	st	%l1, [%sp + STACKFRAME_SZ + PT_PC]
-	b	ret_trap_entry
-	 st	%l2, [%sp + STACKFRAME_SZ + PT_NPC]
-
-linux_syscall_trace2:
-	add	%sp, STACKFRAME_SZ, %o0
-	mov	1, %o1
-	call	syscall_trace
-	 add	%l1, 0x4, %l2			/* npc = npc+4 */
-	st	%l1, [%sp + STACKFRAME_SZ + PT_PC]
-	b	ret_trap_entry
-	 st	%l2, [%sp + STACKFRAME_SZ + PT_NPC]
-
-
-/* Saving and restoring the FPU state is best done from lowlevel code.
- *
- * void fpsave(unsigned long *fpregs, unsigned long *fsr,
- *             void *fpqueue, unsigned long *fpqdepth)
- */
-
-	.globl	fpsave
-fpsave:
-	st	%fsr, [%o1]	! this can trap on us if fpu is in bogon state
-	ld	[%o1], %g1
-	set	0x2000, %g4
-	andcc	%g1, %g4, %g0
-	be	2f
-	 mov	0, %g2
-
-	/* We have an fpqueue to save. */
-1:
-	std	%fq, [%o2]
-fpsave_magic:
-	st	%fsr, [%o1]
-	ld	[%o1], %g3
-	andcc	%g3, %g4, %g0
-	add	%g2, 1, %g2
-	bne	1b
-	 add	%o2, 8, %o2
-
-2:
-	st	%g2, [%o3]
-
-	std	%f0, [%o0 + 0x00]
-	std	%f2, [%o0 + 0x08]
-	std	%f4, [%o0 + 0x10]
-	std	%f6, [%o0 + 0x18]
-	std	%f8, [%o0 + 0x20]
-	std	%f10, [%o0 + 0x28]
-	std	%f12, [%o0 + 0x30]
-	std	%f14, [%o0 + 0x38]
-	std	%f16, [%o0 + 0x40]
-	std	%f18, [%o0 + 0x48]
-	std	%f20, [%o0 + 0x50]
-	std	%f22, [%o0 + 0x58]
-	std	%f24, [%o0 + 0x60]
-	std	%f26, [%o0 + 0x68]
-	std	%f28, [%o0 + 0x70]
-	retl
-	 std	%f30, [%o0 + 0x78]
-
-	/* Thanks for Theo Deraadt and the authors of the Sprite/netbsd/openbsd
-	 * code for pointing out this possible deadlock, while we save state
-	 * above we could trap on the fsr store so our low level fpu trap
-	 * code has to know how to deal with this.
-	 */
-fpsave_catch:
-	b	fpsave_magic + 4
-	 st	%fsr, [%o1]
-
-fpsave_catch2:
-	b	fpsave + 4
-	 st	%fsr, [%o1]
-
-	/* void fpload(unsigned long *fpregs, unsigned long *fsr); */
-
-	.globl	fpload
-fpload:
-	ldd	[%o0 + 0x00], %f0
-	ldd	[%o0 + 0x08], %f2
-	ldd	[%o0 + 0x10], %f4
-	ldd	[%o0 + 0x18], %f6
-	ldd	[%o0 + 0x20], %f8
-	ldd	[%o0 + 0x28], %f10
-	ldd	[%o0 + 0x30], %f12
-	ldd	[%o0 + 0x38], %f14
-	ldd	[%o0 + 0x40], %f16
-	ldd	[%o0 + 0x48], %f18
-	ldd	[%o0 + 0x50], %f20
-	ldd	[%o0 + 0x58], %f22
-	ldd	[%o0 + 0x60], %f24
-	ldd	[%o0 + 0x68], %f26
-	ldd	[%o0 + 0x70], %f28
-	ldd	[%o0 + 0x78], %f30
-	ld	[%o1], %fsr
-	retl
-	 nop
-
-	/* __ndelay and __udelay take two arguments:
-	 * 0 - nsecs or usecs to delay
-	 * 1 - per_cpu udelay_val (loops per jiffy)
-	 *
-	 * Note that ndelay gives HZ times higher resolution but has a 10ms
-	 * limit.  udelay can handle up to 1s.
-	 */
-	.globl	__ndelay
-__ndelay:
-	save	%sp, -STACKFRAME_SZ, %sp
-	mov	%i0, %o0		! round multiplier up so large ns ok
-	mov	0x1ae, %o1		! 2**32 / (1 000 000 000 / HZ)
-	umul	%o0, %o1, %o0
-	rd	%y, %o1
-	mov	%i1, %o1		! udelay_val
-	umul	%o0, %o1, %o0
-	rd	%y, %o1
-	ba	delay_continue
-	 mov	%o1, %o0		! >>32 later for better resolution
-
-	.globl	__udelay
-__udelay:
-	save	%sp, -STACKFRAME_SZ, %sp
-	mov	%i0, %o0
-	sethi	%hi(0x10c7), %o1	! round multiplier up so large us ok
-	or	%o1, %lo(0x10c7), %o1	! 2**32 / 1 000 000
-	umul	%o0, %o1, %o0
-	rd	%y, %o1
-	mov	%i1, %o1		! udelay_val
-	umul	%o0, %o1, %o0
-	rd	%y, %o1
-	sethi	%hi(0x028f4b62), %l0	! Add in rounding constant * 2**32,
-	or	%g0, %lo(0x028f4b62), %l0
-	addcc	%o0, %l0, %o0		! 2**32 * 0.009 999
-	bcs,a	3f
-	 add	%o1, 0x01, %o1
-3:
-	mov	HZ, %o0			! >>32 earlier for wider range
-	umul	%o0, %o1, %o0
-	rd	%y, %o1
-
-delay_continue:
-	cmp	%o0, 0x0
-1:
-	bne	1b
-	 subcc	%o0, 1, %o0
-	
-	ret
-	restore
-EXPORT_SYMBOL(__udelay)
-EXPORT_SYMBOL(__ndelay)
-
-	/* Handle a software breakpoint */
-	/* We have to inform parent that child has stopped */
-	.align 4
-	.globl breakpoint_trap
-breakpoint_trap:
-	rd	%wim,%l3
-	SAVE_ALL
-	wr 	%l0, PSR_ET, %psr
-	WRITE_PAUSE
-
-	st	%i0, [%sp + STACKFRAME_SZ + PT_G0] ! for restarting syscalls
-	call	sparc_breakpoint
-	 add	%sp, STACKFRAME_SZ, %o0
-
-	RESTORE_ALL
-
-#ifdef CONFIG_KGDB
-	ENTRY(kgdb_trap_low)
-	rd	%wim,%l3
-	SAVE_ALL
-	wr 	%l0, PSR_ET, %psr
-	WRITE_PAUSE
-
-	mov	%l7, %o0		! trap_level
-	call	kgdb_trap
-	 add	%sp, STACKFRAME_SZ, %o1	! struct pt_regs *regs
-
-	RESTORE_ALL
-	ENDPROC(kgdb_trap_low)
-#endif
-
-	.align	4
-	.globl	flush_patch_exception
-flush_patch_exception:
-	FLUSH_ALL_KERNEL_WINDOWS;
-	ldd	[%o0], %o6
-	jmpl	%o7 + 0xc, %g0			! see asm-sparc/processor.h
-	 mov	1, %g1				! signal EFAULT condition
-
-	.align	4
-	.globl	kill_user_windows, kuw_patch1_7win
-	.globl	kuw_patch1
-kuw_patch1_7win:	sll	%o3, 6, %o3
-
-	/* No matter how much overhead this routine has in the worst
-	 * case scenario, it is several times better than taking the
-	 * traps with the old method of just doing flush_user_windows().
-	 */
-kill_user_windows:
-	ld	[%g6 + TI_UWINMASK], %o0	! get current umask
-	orcc	%g0, %o0, %g0			! if no bits set, we are done
-	be	3f				! nothing to do
-	 rd	%psr, %o5			! must clear interrupts
-	or	%o5, PSR_PIL, %o4		! or else that could change
-	wr	%o4, 0x0, %psr			! the uwinmask state
-	WRITE_PAUSE				! burn them cycles
-1:
-	ld	[%g6 + TI_UWINMASK], %o0	! get consistent state
-	orcc	%g0, %o0, %g0			! did an interrupt come in?
-	be	4f				! yep, we are done
-	 rd	%wim, %o3			! get current wim
-	srl	%o3, 1, %o4			! simulate a save
-kuw_patch1:
-	sll	%o3, 7, %o3			! compute next wim
-	or	%o4, %o3, %o3			! result
-	andncc	%o0, %o3, %o0			! clean this bit in umask
-	bne	kuw_patch1			! not done yet
-	 srl	%o3, 1, %o4			! begin another save simulation
-	wr	%o3, 0x0, %wim			! set the new wim
-	st	%g0, [%g6 + TI_UWINMASK]	! clear uwinmask
-4:
-	wr	%o5, 0x0, %psr			! re-enable interrupts
-	WRITE_PAUSE				! burn baby burn
-3:
-	retl					! return
-	 st	%g0, [%g6 + TI_W_SAVED]		! no windows saved
-
-	.align	4
-	.globl	restore_current
-restore_current:
-	LOAD_CURRENT(g6, o0)
-	retl
-	 nop
-
-#ifdef CONFIG_PCIC_PCI
-#include <asm/pcic.h>
-
-	.align	4
-	.globl	linux_trap_ipi15_pcic
-linux_trap_ipi15_pcic:
-	rd	%wim, %l3
-	SAVE_ALL
-
-	/*
-	 * First deactivate NMI
-	 * or we cannot drop ET, cannot get window spill traps.
-	 * The busy loop is necessary because the PIO error
-	 * sometimes does not go away quickly and we trap again.
-	 */
-	sethi	%hi(pcic_regs), %o1
-	ld	[%o1 + %lo(pcic_regs)], %o2
-
-	! Get pending status for printouts later.
-	ld	[%o2 + PCI_SYS_INT_PENDING], %o0
-
-	mov	PCI_SYS_INT_PENDING_CLEAR_ALL, %o1
-	stb	%o1, [%o2 + PCI_SYS_INT_PENDING_CLEAR]
-1:
-	ld	[%o2 + PCI_SYS_INT_PENDING], %o1
-	andcc	%o1, ((PCI_SYS_INT_PENDING_PIO|PCI_SYS_INT_PENDING_PCI)>>24), %g0
-	bne	1b
-	 nop
-
-	or	%l0, PSR_PIL, %l4
-	wr	%l4, 0x0, %psr
-	WRITE_PAUSE
-	wr	%l4, PSR_ET, %psr
-	WRITE_PAUSE
-
-	call	pcic_nmi
-	 add	%sp, STACKFRAME_SZ, %o1	! struct pt_regs *regs
-	RESTORE_ALL
-
-	.globl	pcic_nmi_trap_patch
-pcic_nmi_trap_patch:
-	sethi	%hi(linux_trap_ipi15_pcic), %l3
-	jmpl	%l3 + %lo(linux_trap_ipi15_pcic), %g0
-	 rd	%psr, %l0
-	.word	0
-
-#endif /* CONFIG_PCIC_PCI */
-
-	.globl	flushw_all
-flushw_all:
-	save	%sp, -0x40, %sp
-	save	%sp, -0x40, %sp
-	save	%sp, -0x40, %sp
-	save	%sp, -0x40, %sp
-	save	%sp, -0x40, %sp
-	save	%sp, -0x40, %sp
-	save	%sp, -0x40, %sp
-	restore
-	restore
-	restore
-	restore
-	restore
-	restore
-	ret
-	 restore
-
-#ifdef CONFIG_SMP
-ENTRY(hard_smp_processor_id)
-661:	rd		%tbr, %g1
-	srl		%g1, 12, %o0
-	and		%o0, 3, %o0
-	.section	.cpuid_patch, "ax"
-	/* Instruction location. */
-	.word		661b
-	/* SUN4D implementation. */
-	lda		[%g0] ASI_M_VIKING_TMP1, %o0
-	nop
-	nop
-	/* LEON implementation. */
-	rd		%asr17, %o0
-	srl		%o0, 0x1c, %o0
-	nop
-	.previous
-	retl
-	 nop
-ENDPROC(hard_smp_processor_id)
-#endif
-
-/* End of entry.S */
diff --git a/arch/sparc/kernel/etrap_32.S b/arch/sparc/kernel/etrap_32.S
deleted file mode 100644
index 9f243f918619e59217541fb216ad8b5f44264cbb..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/etrap_32.S
+++ /dev/null
@@ -1,281 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * etrap.S: Sparc trap window preparation for entry into the
- *          Linux kernel.
- *
- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
- */
-
-#include <asm/head.h>
-#include <asm/asi.h>
-#include <asm/contregs.h>
-#include <asm/page.h>
-#include <asm/psr.h>
-#include <asm/ptrace.h>
-#include <asm/winmacro.h>
-#include <asm/asmmacro.h>
-#include <asm/thread_info.h>
-
-/* Registers to not touch at all. */
-#define t_psr        l0 /* Set by caller */
-#define t_pc         l1 /* Set by caller */
-#define t_npc        l2 /* Set by caller */
-#define t_wim        l3 /* Set by caller */
-#define t_twinmask   l4 /* Set at beginning of this entry routine. */
-#define t_kstack     l5 /* Set right before pt_regs frame is built */
-#define t_retpc      l6 /* If you change this, change winmacro.h header file */
-#define t_systable   l7 /* Never touch this, could be the syscall table ptr. */
-#define curptr       g6 /* Set after pt_regs frame is built */
-
-	.text
-	.align 4
-
-	/* SEVEN WINDOW PATCH INSTRUCTIONS */
-	.globl	tsetup_7win_patch1, tsetup_7win_patch2
-	.globl	tsetup_7win_patch3, tsetup_7win_patch4
-	.globl	tsetup_7win_patch5, tsetup_7win_patch6
-tsetup_7win_patch1:	sll	%t_wim, 0x6, %t_wim
-tsetup_7win_patch2:	and	%g2, 0x7f, %g2
-tsetup_7win_patch3:	and	%g2, 0x7f, %g2
-tsetup_7win_patch4:	and	%g1, 0x7f, %g1
-tsetup_7win_patch5:	sll	%t_wim, 0x6, %t_wim
-tsetup_7win_patch6:	and	%g2, 0x7f, %g2
-	/* END OF PATCH INSTRUCTIONS */
-
-	/* At trap time, interrupts and all generic traps do the
-	 * following:
-	 *
-	 * rd	%psr, %l0
-	 * b	some_handler
-	 * rd	%wim, %l3
-	 * nop
-	 *
-	 * Then 'some_handler' if it needs a trap frame (ie. it has
-	 * to call c-code and the trap cannot be handled in-window)
-	 * then it does the SAVE_ALL macro in entry.S which does
-	 *
-	 * sethi	%hi(trap_setup), %l4
-	 * jmpl		%l4 + %lo(trap_setup), %l6
-	 * nop
-	 */
-
-	/* 2 3 4  window number
-	 * -----
-	 * O T S  mnemonic
-	 *
-	 * O == Current window before trap
-	 * T == Window entered when trap occurred
-	 * S == Window we will need to save if (1<<T) == %wim
-	 *
-	 * Before execution gets here, it must be guaranteed that
-	 * %l0 contains trap time %psr, %l1 and %l2 contain the
-	 * trap pc and npc, and %l3 contains the trap time %wim.
-	 */
-
-	.globl	trap_setup, tsetup_patch1, tsetup_patch2
-	.globl	tsetup_patch3, tsetup_patch4
-	.globl	tsetup_patch5, tsetup_patch6
-trap_setup:
-	/* Calculate mask of trap window.  See if from user
-	 * or kernel and branch conditionally.
-	 */
-	mov	1, %t_twinmask
-	andcc	%t_psr, PSR_PS, %g0		 ! fromsupv_p = (psr & PSR_PS)
-	be	trap_setup_from_user		 ! nope, from user mode
-	 sll	%t_twinmask, %t_psr, %t_twinmask ! t_twinmask = (1 << psr)
-
-	/* From kernel, allocate more kernel stack and
-	 * build a pt_regs trap frame.
-	 */
-	sub	%fp, (STACKFRAME_SZ + TRACEREG_SZ), %t_kstack
-	STORE_PT_ALL(t_kstack, t_psr, t_pc, t_npc, g2)
-
-	/* See if we are in the trap window. */
-	andcc	%t_twinmask, %t_wim, %g0
-	bne	trap_setup_kernel_spill		! in trap window, clean up
-	 nop
-
-	/* Trap from kernel with a window available.
-	 * Just do it...
-	 */
-	jmpl	%t_retpc + 0x8, %g0	! return to caller
-	 mov	%t_kstack, %sp		! jump onto new stack
-
-trap_setup_kernel_spill:
-	ld	[%curptr + TI_UWINMASK], %g1
-	orcc	%g0, %g1, %g0
-	bne	trap_setup_user_spill	! there are some user windows, yuck
-	/* Spill from kernel, but only kernel windows, adjust
-	 * %wim and go.
-	 */
-	 srl	%t_wim, 0x1, %g2	! begin computation of new %wim
-tsetup_patch1:
-	sll	%t_wim, 0x7, %t_wim	! patched on 7 window Sparcs
-	or	%t_wim, %g2, %g2
-tsetup_patch2:
-	and	%g2, 0xff, %g2		! patched on 7 window Sparcs
-
-	save	%g0, %g0, %g0
-
-	/* Set new %wim value */
-	wr	%g2, 0x0, %wim
-
-	/* Save the kernel window onto the corresponding stack. */
-	STORE_WINDOW(sp)
-
-	restore	%g0, %g0, %g0
-
-	jmpl	%t_retpc + 0x8, %g0	! return to caller
-	 mov	%t_kstack, %sp		! and onto new kernel stack
-
-#define STACK_OFFSET (THREAD_SIZE - TRACEREG_SZ - STACKFRAME_SZ)
-
-trap_setup_from_user:
-	/* We can't use %curptr yet. */
-	LOAD_CURRENT(t_kstack, t_twinmask)
-
-	sethi	%hi(STACK_OFFSET), %t_twinmask
-	or	%t_twinmask, %lo(STACK_OFFSET), %t_twinmask
-	add	%t_kstack, %t_twinmask, %t_kstack
-
-	mov	1, %t_twinmask
-	sll	%t_twinmask, %t_psr, %t_twinmask ! t_twinmask = (1 << psr)
-
-	/* Build pt_regs frame. */
-	STORE_PT_ALL(t_kstack, t_psr, t_pc, t_npc, g2)
-
-#if 0
-	/* If we're sure every task_struct is THREAD_SIZE aligned,
-	   we can speed this up. */
-	sethi	%hi(STACK_OFFSET), %curptr
-	or	%curptr, %lo(STACK_OFFSET), %curptr
-	sub	%t_kstack, %curptr, %curptr
-#else
-	sethi	%hi(~(THREAD_SIZE - 1)), %curptr
-	and	%t_kstack, %curptr, %curptr
-#endif
-
-	/* Clear current_thread_info->w_saved */
-	st	%g0, [%curptr + TI_W_SAVED]
-
-	/* See if we are in the trap window. */
-	andcc	%t_twinmask, %t_wim, %g0
-	bne	trap_setup_user_spill		! yep we are
-	 orn	%g0, %t_twinmask, %g1		! negate trap win mask into %g1
-
-	/* Trap from user, but not into the invalid window.
-	 * Calculate new umask.  The way this works is,
-	 * any window from the %wim at trap time until
-	 * the window right before the one we are in now,
-	 * is a user window.  A diagram:
-	 *
-	 *      7 6 5 4 3 2 1 0    window number
-	 *      ---------------
-	 *        I     L T        mnemonic
-	 *
-	 * Window 'I' is the invalid window in our example,
-	 * window 'L' is the window the user was in when
-	 * the trap occurred, window T is the trap window
-	 * we are in now.  So therefore, windows 5, 4 and
-	 * 3 are user windows.  The following sequence
-	 * computes the user winmask to represent this.
-	 */
-	subcc	%t_wim, %t_twinmask, %g2
-	bneg,a	1f
-	 sub	%g2, 0x1, %g2
-1:
-	andn	%g2, %t_twinmask, %g2
-tsetup_patch3:
-	and	%g2, 0xff, %g2			! patched on 7win Sparcs
-	st	%g2, [%curptr + TI_UWINMASK]	! store new umask
-
-	jmpl	%t_retpc + 0x8, %g0		! return to caller
-	 mov	%t_kstack, %sp			! and onto kernel stack
-
-trap_setup_user_spill:
-	/* A spill occurred from either kernel or user mode
-	 * and there exist some user windows to deal with.
-	 * A mask of the currently valid user windows
-	 * is in %g1 upon entry to here.
-	 */
-
-tsetup_patch4:
-	and	%g1, 0xff, %g1		! patched on 7win Sparcs, mask
-	srl	%t_wim, 0x1, %g2	! compute new %wim
-tsetup_patch5:
-	sll	%t_wim, 0x7, %t_wim	! patched on 7win Sparcs
-	or	%t_wim, %g2, %g2	! %g2 is new %wim
-tsetup_patch6:
-	and	%g2, 0xff, %g2		! patched on 7win Sparcs
-	andn	%g1, %g2, %g1		! clear this bit in %g1
-	st	%g1, [%curptr + TI_UWINMASK]
-
-	save	%g0, %g0, %g0
-
-	wr	%g2, 0x0, %wim
-
-	/* Call MMU-architecture dependent stack checking
-	 * routine.
-	 */
-	b	tsetup_srmmu_stackchk
-	 andcc	%sp, 0x7, %g0
-
-	/* Architecture specific stack checking routines.  When either
-	 * of these routines are called, the globals are free to use
-	 * as they have been safely stashed on the new kernel stack
-	 * pointer.  Thus the definition below for simplicity.
-	 */
-#define glob_tmp     g1
-
-	.globl	tsetup_srmmu_stackchk
-tsetup_srmmu_stackchk:
-	/* Check results of callers andcc %sp, 0x7, %g0 */
-	bne	trap_setup_user_stack_is_bolixed
-	 sethi   %hi(PAGE_OFFSET), %glob_tmp
-
-	cmp	%glob_tmp, %sp
-	bleu,a	1f
-LEON_PI( lda	[%g0] ASI_LEON_MMUREGS, %glob_tmp)	! read MMU control
-SUN_PI_( lda	[%g0] ASI_M_MMUREGS, %glob_tmp)		! read MMU control
-
-trap_setup_user_stack_is_bolixed:
-	/* From user/kernel into invalid window w/bad user
-	 * stack. Save bad user stack, and return to caller.
-	 */
-	SAVE_BOLIXED_USER_STACK(curptr, g3)
-	restore	%g0, %g0, %g0
-
-	jmpl	%t_retpc + 0x8, %g0
-	 mov	%t_kstack, %sp
-
-1:
-	/* Clear the fault status and turn on the no_fault bit. */
-	or	%glob_tmp, 0x2, %glob_tmp		! or in no_fault bit
-LEON_PI(sta	%glob_tmp, [%g0] ASI_LEON_MMUREGS)		! set it
-SUN_PI_(sta	%glob_tmp, [%g0] ASI_M_MMUREGS)		! set it
-
-	/* Dump the registers and cross fingers. */
-	STORE_WINDOW(sp)
-
-	/* Clear the no_fault bit and check the status. */
-	andn	%glob_tmp, 0x2, %glob_tmp
-LEON_PI(sta	%glob_tmp, [%g0] ASI_LEON_MMUREGS)
-SUN_PI_(sta	%glob_tmp, [%g0] ASI_M_MMUREGS)
-
-	mov	AC_M_SFAR, %glob_tmp
-LEON_PI(lda	[%glob_tmp] ASI_LEON_MMUREGS, %g0)
-SUN_PI_(lda	[%glob_tmp] ASI_M_MMUREGS, %g0)
-
-	mov	AC_M_SFSR, %glob_tmp
-LEON_PI(lda	[%glob_tmp] ASI_LEON_MMUREGS, %glob_tmp)! save away status of winstore
-SUN_PI_(lda	[%glob_tmp] ASI_M_MMUREGS, %glob_tmp)	! save away status of winstore
-
-	andcc	%glob_tmp, 0x2, %g0			! did we fault?
-	bne	trap_setup_user_stack_is_bolixed	! failure
-	 nop
-
-	restore %g0, %g0, %g0
-
-	jmpl	%t_retpc + 0x8, %g0
-	 mov	%t_kstack, %sp
-
diff --git a/arch/sparc/kernel/etrap_64.S b/arch/sparc/kernel/etrap_64.S
deleted file mode 100644
index 08cc41f647250693fd53c50fa16e93d4f49f8b7f..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/etrap_64.S
+++ /dev/null
@@ -1,284 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * etrap.S: Preparing for entry into the kernel on Sparc V9.
- *
- * Copyright (C) 1996, 1997 David S. Miller (davem@caip.rutgers.edu)
- * Copyright (C) 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
- */
-
-
-#include <asm/asi.h>
-#include <asm/pstate.h>
-#include <asm/ptrace.h>
-#include <asm/page.h>
-#include <asm/spitfire.h>
-#include <asm/head.h>
-#include <asm/processor.h>
-#include <asm/mmu.h>
-
-#define		TASK_REGOFF		(THREAD_SIZE-TRACEREG_SZ-STACKFRAME_SZ)
-#define		ETRAP_PSTATE1		(PSTATE_TSO | PSTATE_PRIV)
-#define		ETRAP_PSTATE2		\
-		(PSTATE_TSO | PSTATE_PEF | PSTATE_PRIV | PSTATE_IE)
-
-/*
- * On entry, %g7 is return address - 0x4.
- * %g4 and %g5 will be preserved %l4 and %l5 respectively.
- */
-
-		.text		
-		.align	64
-		.globl	etrap_syscall, etrap, etrap_irq, etraptl1
-etrap:		rdpr	%pil, %g2
-etrap_irq:	clr	%g3
-etrap_syscall:	TRAP_LOAD_THREAD_REG(%g6, %g1)
-		rdpr	%tstate, %g1
-		or	%g1, %g3, %g1
-		sllx	%g2, 20, %g3
-		andcc	%g1, TSTATE_PRIV, %g0
-		or	%g1, %g3, %g1
-		bne,pn	%xcc, 1f
-		 sub	%sp, STACKFRAME_SZ+TRACEREG_SZ-STACK_BIAS, %g2
-661:		wrpr	%g0, 7, %cleanwin
-		.section .fast_win_ctrl_1insn_patch, "ax"
-		.word	661b
-		.word	0x85880000	! allclean
-		.previous
-
-		sethi	%hi(TASK_REGOFF), %g2
-		sethi	%hi(TSTATE_PEF), %g3
-		or	%g2, %lo(TASK_REGOFF), %g2
-		and	%g1, %g3, %g3
-		brnz,pn	%g3, 1f
-		 add	%g6, %g2, %g2
-		wr	%g0, 0, %fprs
-1:		rdpr	%tpc, %g3
-
-		stx	%g1, [%g2 + STACKFRAME_SZ + PT_V9_TSTATE]
-		rdpr	%tnpc, %g1
-		stx	%g3, [%g2 + STACKFRAME_SZ + PT_V9_TPC]
-		rd	%y, %g3
-		stx	%g1, [%g2 + STACKFRAME_SZ + PT_V9_TNPC]
-		rdpr	%tt, %g1
-		st	%g3, [%g2 + STACKFRAME_SZ + PT_V9_Y]
-		sethi	%hi(PT_REGS_MAGIC), %g3
-		or	%g3, %g1, %g1
-		st	%g1, [%g2 + STACKFRAME_SZ + PT_V9_MAGIC]
-
-		rdpr	%cansave, %g1
-		brnz,pt %g1, etrap_save
-		 nop
-
-		rdpr	%cwp, %g1
-		add	%g1, 2, %g1
-		wrpr	%g1, %cwp
-		be,pt	%xcc, etrap_user_spill
-		 mov	ASI_AIUP, %g3
-
-		rdpr	%otherwin, %g3
-		brz	%g3, etrap_kernel_spill
-		 mov	ASI_AIUS, %g3
-
-etrap_user_spill:
-
-		wr	%g3, 0x0, %asi
-		ldx	[%g6 + TI_FLAGS], %g3
-		and	%g3, _TIF_32BIT, %g3
-		brnz,pt	%g3, etrap_user_spill_32bit
-		 nop
-		ba,a,pt	%xcc, etrap_user_spill_64bit
-
-etrap_save:	save	%g2, -STACK_BIAS, %sp
-		mov	%g6, %l6
-
-		bne,pn	%xcc, 3f
-		 mov	PRIMARY_CONTEXT, %l4
-661:		rdpr	%canrestore, %g3
-		.section .fast_win_ctrl_1insn_patch, "ax"
-		.word	661b
-		nop
-		.previous
-
-		rdpr	%wstate, %g2
-661:		wrpr	%g0, 0, %canrestore
-		.section .fast_win_ctrl_1insn_patch, "ax"
-		.word	661b
-		nop
-		.previous
-		sll	%g2, 3, %g2
-
-		/* Set TI_SYS_FPDEPTH to 1 and clear TI_SYS_NOERROR.  */
-		mov	1, %l5
-		sth	%l5, [%l6 + TI_SYS_NOERROR]
-
-661:		wrpr	%g3, 0, %otherwin
-		.section .fast_win_ctrl_1insn_patch, "ax"
-		.word	661b
-		.word	0x87880000	! otherw
-		.previous
-
-		wrpr	%g2, 0, %wstate
-		sethi	%hi(sparc64_kern_pri_context), %g2
-		ldx	[%g2 + %lo(sparc64_kern_pri_context)], %g3
-
-661:		stxa	%g3, [%l4] ASI_DMMU
-		.section .sun4v_1insn_patch, "ax"
-		.word	661b
-		stxa	%g3, [%l4] ASI_MMU
-		.previous
-
-		sethi	%hi(KERNBASE), %l4
-		flush	%l4
-		mov	ASI_AIUS, %l7
-2:		mov	%g4, %l4
-		mov	%g5, %l5
-		add	%g7, 4, %l2
-
-		/* Go to trap time globals so we can save them.  */
-661:		wrpr	%g0, ETRAP_PSTATE1, %pstate
-		.section .sun4v_1insn_patch, "ax"
-		.word	661b
-		SET_GL(0)
-		.previous
-
-		stx	%g1, [%sp + PTREGS_OFF + PT_V9_G1]
-		stx	%g2, [%sp + PTREGS_OFF + PT_V9_G2]
-		sllx	%l7, 24, %l7
-		stx	%g3, [%sp + PTREGS_OFF + PT_V9_G3]
-		rdpr	%cwp, %l0
-		stx	%g4, [%sp + PTREGS_OFF + PT_V9_G4]
-		stx	%g5, [%sp + PTREGS_OFF + PT_V9_G5]
-		stx	%g6, [%sp + PTREGS_OFF + PT_V9_G6]
-		stx	%g7, [%sp + PTREGS_OFF + PT_V9_G7]
-		or	%l7, %l0, %l7
-661:		sethi	%hi(TSTATE_TSO | TSTATE_PEF), %l0
-		/* If userspace is using ADI, it could potentially pass
-		 * a pointer with version tag embedded in it. To maintain
-		 * the ADI security, we must enable PSTATE.mcde. Userspace
-		 * would have already set TTE.mcd in an earlier call to
-		 * kernel and set the version tag for the address being
-		 * dereferenced. Setting PSTATE.mcde would ensure any
-		 * access to userspace data through a system call honors
-		 * ADI and does not allow a rogue app to bypass ADI by
-		 * using system calls. Setting PSTATE.mcde only affects
-		 * accesses to virtual addresses that have TTE.mcd set.
-		 * Set PMCDPER to ensure any exceptions caused by ADI
-		 * version tag mismatch are exposed before system call
-		 * returns to userspace. Setting PMCDPER affects only
-		 * writes to virtual addresses that have TTE.mcd set and
-		 * have a version tag set as well.
-		 */
-		.section .sun_m7_1insn_patch, "ax"
-		.word	661b
-		sethi	%hi(TSTATE_TSO | TSTATE_PEF | TSTATE_MCDE), %l0
-		.previous
-661:		nop
-		.section .sun_m7_1insn_patch, "ax"
-		.word	661b
-		.word 0xaf902001	/* wrpr %g0, 1, %pmcdper */
-		.previous
-		or	%l7, %l0, %l7
-		wrpr	%l2, %tnpc
-		wrpr	%l7, (TSTATE_PRIV | TSTATE_IE), %tstate
-		stx	%i0, [%sp + PTREGS_OFF + PT_V9_I0]
-		stx	%i1, [%sp + PTREGS_OFF + PT_V9_I1]
-		stx	%i2, [%sp + PTREGS_OFF + PT_V9_I2]
-		stx	%i3, [%sp + PTREGS_OFF + PT_V9_I3]
-		stx	%i4, [%sp + PTREGS_OFF + PT_V9_I4]
-		stx	%i5, [%sp + PTREGS_OFF + PT_V9_I5]
-		stx	%i6, [%sp + PTREGS_OFF + PT_V9_I6]
-		mov	%l6, %g6
-		stx	%i7, [%sp + PTREGS_OFF + PT_V9_I7]
-		LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %l1)
-		ldx	[%g6 + TI_TASK], %g4
-		done
-
-3:		mov	ASI_P, %l7
-		ldub	[%l6 + TI_FPDEPTH], %l5
-		add	%l6, TI_FPSAVED + 1, %l4
-		srl	%l5, 1, %l3
-		add	%l5, 2, %l5
-
-		/* Set TI_SYS_FPDEPTH to %l5 and clear TI_SYS_NOERROR.  */
-		sth	%l5, [%l6 + TI_SYS_NOERROR]
-		ba,pt	%xcc, 2b
-		 stb	%g0, [%l4 + %l3]
-		nop
-
-etraptl1:	/* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself.
-		 * We place this right after pt_regs on the trap stack.
-		 * The layout is:
-		 *	0x00	TL1's TSTATE
-		 *	0x08	TL1's TPC
-		 *	0x10	TL1's TNPC
-		 *	0x18	TL1's TT
-		 *	 ...
-		 *	0x58	TL4's TT
-		 *	0x60	TL
-		 */
-		TRAP_LOAD_THREAD_REG(%g6, %g1)
-		sub	%sp, ((4 * 8) * 4) + 8, %g2
-		rdpr	%tl, %g1
-
-		wrpr	%g0, 1, %tl
-		rdpr	%tstate, %g3
-		stx	%g3, [%g2 + STACK_BIAS + 0x00]
-		rdpr	%tpc, %g3
-		stx	%g3, [%g2 + STACK_BIAS + 0x08]
-		rdpr	%tnpc, %g3
-		stx	%g3, [%g2 + STACK_BIAS + 0x10]
-		rdpr	%tt, %g3
-		stx	%g3, [%g2 + STACK_BIAS + 0x18]
-
-		wrpr	%g0, 2, %tl
-		rdpr	%tstate, %g3
-		stx	%g3, [%g2 + STACK_BIAS + 0x20]
-		rdpr	%tpc, %g3
-		stx	%g3, [%g2 + STACK_BIAS + 0x28]
-		rdpr	%tnpc, %g3
-		stx	%g3, [%g2 + STACK_BIAS + 0x30]
-		rdpr	%tt, %g3
-		stx	%g3, [%g2 + STACK_BIAS + 0x38]
-
-		sethi	%hi(is_sun4v), %g3
-		lduw	[%g3 + %lo(is_sun4v)], %g3
-		brnz,pn	%g3, finish_tl1_capture
-		 nop
-
-		wrpr	%g0, 3, %tl
-		rdpr	%tstate, %g3
-		stx	%g3, [%g2 + STACK_BIAS + 0x40]
-		rdpr	%tpc, %g3
-		stx	%g3, [%g2 + STACK_BIAS + 0x48]
-		rdpr	%tnpc, %g3
-		stx	%g3, [%g2 + STACK_BIAS + 0x50]
-		rdpr	%tt, %g3
-		stx	%g3, [%g2 + STACK_BIAS + 0x58]
-
-		wrpr	%g0, 4, %tl
-		rdpr	%tstate, %g3
-		stx	%g3, [%g2 + STACK_BIAS + 0x60]
-		rdpr	%tpc, %g3
-		stx	%g3, [%g2 + STACK_BIAS + 0x68]
-		rdpr	%tnpc, %g3
-		stx	%g3, [%g2 + STACK_BIAS + 0x70]
-		rdpr	%tt, %g3
-		stx	%g3, [%g2 + STACK_BIAS + 0x78]
-
-		stx	%g1, [%g2 + STACK_BIAS + 0x80]
-
-finish_tl1_capture:
-		wrpr	%g0, 1, %tl
-661:		nop
-		.section .sun4v_1insn_patch, "ax"
-		.word	661b
-		SET_GL(1)
-		.previous
-
-		rdpr	%tstate, %g1
-		sub	%g2, STACKFRAME_SZ + TRACEREG_SZ - STACK_BIAS, %g2
-		ba,pt	%xcc, 1b
-		 andcc	%g1, TSTATE_PRIV, %g0
-
-#undef TASK_REGOFF
-#undef ETRAP_PSTATE1
diff --git a/arch/sparc/kernel/fpu_traps.S b/arch/sparc/kernel/fpu_traps.S
deleted file mode 100644
index 051659e29c7a62ef57d37718ee299f24662e703e..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/fpu_traps.S
+++ /dev/null
@@ -1,384 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	/* This is trivial with the new code... */
-	.globl		do_fpdis
-	.type		do_fpdis,#function
-do_fpdis:
-	sethi		%hi(TSTATE_PEF), %g4
-	rdpr		%tstate, %g5
-	andcc		%g5, %g4, %g0
-	be,pt		%xcc, 1f
-	 nop
-	rd		%fprs, %g5
-	andcc		%g5, FPRS_FEF, %g0
-	be,pt		%xcc, 1f
-	 nop
-
-	/* Legal state when DCR_IFPOE is set in Cheetah %dcr. */
-	sethi		%hi(109f), %g7
-	ba,pt		%xcc, etrap
-109:	 or		%g7, %lo(109b), %g7
-	add		%g0, %g0, %g0
-	ba,a,pt		%xcc, rtrap
-
-1:	TRAP_LOAD_THREAD_REG(%g6, %g1)
-	ldub		[%g6 + TI_FPSAVED], %g5
-	wr		%g0, FPRS_FEF, %fprs
-	andcc		%g5, FPRS_FEF, %g0
-	be,a,pt		%icc, 1f
-	 clr		%g7
-	ldx		[%g6 + TI_GSR], %g7
-1:	andcc		%g5, FPRS_DL, %g0
-	bne,pn		%icc, 2f
-	 fzero		%f0
-	andcc		%g5, FPRS_DU, %g0
-	bne,pn		%icc, 1f
-	 fzero		%f2
-	faddd		%f0, %f2, %f4
-	fmuld		%f0, %f2, %f6
-	faddd		%f0, %f2, %f8
-	fmuld		%f0, %f2, %f10
-	faddd		%f0, %f2, %f12
-	fmuld		%f0, %f2, %f14
-	faddd		%f0, %f2, %f16
-	fmuld		%f0, %f2, %f18
-	faddd		%f0, %f2, %f20
-	fmuld		%f0, %f2, %f22
-	faddd		%f0, %f2, %f24
-	fmuld		%f0, %f2, %f26
-	faddd		%f0, %f2, %f28
-	fmuld		%f0, %f2, %f30
-	faddd		%f0, %f2, %f32
-	fmuld		%f0, %f2, %f34
-	faddd		%f0, %f2, %f36
-	fmuld		%f0, %f2, %f38
-	faddd		%f0, %f2, %f40
-	fmuld		%f0, %f2, %f42
-	faddd		%f0, %f2, %f44
-	fmuld		%f0, %f2, %f46
-	faddd		%f0, %f2, %f48
-	fmuld		%f0, %f2, %f50
-	faddd		%f0, %f2, %f52
-	fmuld		%f0, %f2, %f54
-	faddd		%f0, %f2, %f56
-	fmuld		%f0, %f2, %f58
-	b,pt		%xcc, fpdis_exit2
-	 faddd		%f0, %f2, %f60
-1:	mov		SECONDARY_CONTEXT, %g3
-	add		%g6, TI_FPREGS + 0x80, %g1
-	faddd		%f0, %f2, %f4
-	fmuld		%f0, %f2, %f6
-
-661:	ldxa		[%g3] ASI_DMMU, %g5
-	.section	.sun4v_1insn_patch, "ax"
-	.word		661b
-	ldxa		[%g3] ASI_MMU, %g5
-	.previous
-
-	sethi		%hi(sparc64_kern_sec_context), %g2
-	ldx		[%g2 + %lo(sparc64_kern_sec_context)], %g2
-
-661:	stxa		%g2, [%g3] ASI_DMMU
-	.section	.sun4v_1insn_patch, "ax"
-	.word		661b
-	stxa		%g2, [%g3] ASI_MMU
-	.previous
-
-	membar		#Sync
-	add		%g6, TI_FPREGS + 0xc0, %g2
-	faddd		%f0, %f2, %f8
-	fmuld		%f0, %f2, %f10
-	membar		#Sync
-	ldda		[%g1] ASI_BLK_S, %f32
-	ldda		[%g2] ASI_BLK_S, %f48
-	membar		#Sync
-	faddd		%f0, %f2, %f12
-	fmuld		%f0, %f2, %f14
-	faddd		%f0, %f2, %f16
-	fmuld		%f0, %f2, %f18
-	faddd		%f0, %f2, %f20
-	fmuld		%f0, %f2, %f22
-	faddd		%f0, %f2, %f24
-	fmuld		%f0, %f2, %f26
-	faddd		%f0, %f2, %f28
-	fmuld		%f0, %f2, %f30
-	ba,a,pt		%xcc, fpdis_exit
-
-2:	andcc		%g5, FPRS_DU, %g0
-	bne,pt		%icc, 3f
-	 fzero		%f32
-	mov		SECONDARY_CONTEXT, %g3
-	fzero		%f34
-
-661:	ldxa		[%g3] ASI_DMMU, %g5
-	.section	.sun4v_1insn_patch, "ax"
-	.word		661b
-	ldxa		[%g3] ASI_MMU, %g5
-	.previous
-
-	add		%g6, TI_FPREGS, %g1
-	sethi		%hi(sparc64_kern_sec_context), %g2
-	ldx		[%g2 + %lo(sparc64_kern_sec_context)], %g2
-
-661:	stxa		%g2, [%g3] ASI_DMMU
-	.section	.sun4v_1insn_patch, "ax"
-	.word		661b
-	stxa		%g2, [%g3] ASI_MMU
-	.previous
-
-	membar		#Sync
-	add		%g6, TI_FPREGS + 0x40, %g2
-	faddd		%f32, %f34, %f36
-	fmuld		%f32, %f34, %f38
-	membar		#Sync
-	ldda		[%g1] ASI_BLK_S, %f0
-	ldda		[%g2] ASI_BLK_S, %f16
-	membar		#Sync
-	faddd		%f32, %f34, %f40
-	fmuld		%f32, %f34, %f42
-	faddd		%f32, %f34, %f44
-	fmuld		%f32, %f34, %f46
-	faddd		%f32, %f34, %f48
-	fmuld		%f32, %f34, %f50
-	faddd		%f32, %f34, %f52
-	fmuld		%f32, %f34, %f54
-	faddd		%f32, %f34, %f56
-	fmuld		%f32, %f34, %f58
-	faddd		%f32, %f34, %f60
-	fmuld		%f32, %f34, %f62
-	ba,a,pt		%xcc, fpdis_exit
-
-3:	mov		SECONDARY_CONTEXT, %g3
-	add		%g6, TI_FPREGS, %g1
-
-661:	ldxa		[%g3] ASI_DMMU, %g5
-	.section	.sun4v_1insn_patch, "ax"
-	.word		661b
-	ldxa		[%g3] ASI_MMU, %g5
-	.previous
-
-	sethi		%hi(sparc64_kern_sec_context), %g2
-	ldx		[%g2 + %lo(sparc64_kern_sec_context)], %g2
-
-661:	stxa		%g2, [%g3] ASI_DMMU
-	.section	.sun4v_1insn_patch, "ax"
-	.word		661b
-	stxa		%g2, [%g3] ASI_MMU
-	.previous
-
-	membar		#Sync
-	mov		0x40, %g2
-	membar		#Sync
-	ldda		[%g1] ASI_BLK_S, %f0
-	ldda		[%g1 + %g2] ASI_BLK_S, %f16
-	add		%g1, 0x80, %g1
-	ldda		[%g1] ASI_BLK_S, %f32
-	ldda		[%g1 + %g2] ASI_BLK_S, %f48
-	membar		#Sync
-fpdis_exit:
-
-661:	stxa		%g5, [%g3] ASI_DMMU
-	.section	.sun4v_1insn_patch, "ax"
-	.word		661b
-	stxa		%g5, [%g3] ASI_MMU
-	.previous
-
-	membar		#Sync
-fpdis_exit2:
-	wr		%g7, 0, %gsr
-	ldx		[%g6 + TI_XFSR], %fsr
-	rdpr		%tstate, %g3
-	or		%g3, %g4, %g3		! anal...
-	wrpr		%g3, %tstate
-	wr		%g0, FPRS_FEF, %fprs	! clean DU/DL bits
-	retry
-	.size		do_fpdis,.-do_fpdis
-
-	.align		32
-	.type		fp_other_bounce,#function
-fp_other_bounce:
-	call		do_fpother
-	 add		%sp, PTREGS_OFF, %o0
-	ba,a,pt		%xcc, rtrap
-	.size		fp_other_bounce,.-fp_other_bounce
-
-	.align		32
-	.globl		do_fpother_check_fitos
-	.type		do_fpother_check_fitos,#function
-do_fpother_check_fitos:
-	TRAP_LOAD_THREAD_REG(%g6, %g1)
-	sethi		%hi(fp_other_bounce - 4), %g7
-	or		%g7, %lo(fp_other_bounce - 4), %g7
-
-	/* NOTE: Need to preserve %g7 until we fully commit
-	 *       to the fitos fixup.
-	 */
-	stx		%fsr, [%g6 + TI_XFSR]
-	rdpr		%tstate, %g3
-	andcc		%g3, TSTATE_PRIV, %g0
-	bne,pn		%xcc, do_fptrap_after_fsr
-	 nop
-	ldx		[%g6 + TI_XFSR], %g3
-	srlx		%g3, 14, %g1
-	and		%g1, 7, %g1
-	cmp		%g1, 2			! Unfinished FP-OP
-	bne,pn		%xcc, do_fptrap_after_fsr
-	 sethi		%hi(1 << 23), %g1	! Inexact
-	andcc		%g3, %g1, %g0
-	bne,pn		%xcc, do_fptrap_after_fsr
-	 rdpr		%tpc, %g1
-	lduwa		[%g1] ASI_AIUP, %g3	! This cannot ever fail
-#define FITOS_MASK	0xc1f83fe0
-#define FITOS_COMPARE	0x81a01880
-	sethi		%hi(FITOS_MASK), %g1
-	or		%g1, %lo(FITOS_MASK), %g1
-	and		%g3, %g1, %g1
-	sethi		%hi(FITOS_COMPARE), %g2
-	or		%g2, %lo(FITOS_COMPARE), %g2
-	cmp		%g1, %g2
-	bne,pn		%xcc, do_fptrap_after_fsr
-	 nop
-	std		%f62, [%g6 + TI_FPREGS + (62 * 4)]
-	sethi		%hi(fitos_table_1), %g1
-	and		%g3, 0x1f, %g2
-	or		%g1, %lo(fitos_table_1),  %g1
-	sllx		%g2, 2, %g2
-	jmpl		%g1 + %g2, %g0
-	 ba,pt		%xcc, fitos_emul_continue
-
-fitos_table_1:
-	fitod		%f0, %f62
-	fitod		%f1, %f62
-	fitod		%f2, %f62
-	fitod		%f3, %f62
-	fitod		%f4, %f62
-	fitod		%f5, %f62
-	fitod		%f6, %f62
-	fitod		%f7, %f62
-	fitod		%f8, %f62
-	fitod		%f9, %f62
-	fitod		%f10, %f62
-	fitod		%f11, %f62
-	fitod		%f12, %f62
-	fitod		%f13, %f62
-	fitod		%f14, %f62
-	fitod		%f15, %f62
-	fitod		%f16, %f62
-	fitod		%f17, %f62
-	fitod		%f18, %f62
-	fitod		%f19, %f62
-	fitod		%f20, %f62
-	fitod		%f21, %f62
-	fitod		%f22, %f62
-	fitod		%f23, %f62
-	fitod		%f24, %f62
-	fitod		%f25, %f62
-	fitod		%f26, %f62
-	fitod		%f27, %f62
-	fitod		%f28, %f62
-	fitod		%f29, %f62
-	fitod		%f30, %f62
-	fitod		%f31, %f62
-
-fitos_emul_continue:
-	sethi		%hi(fitos_table_2), %g1
-	srl		%g3, 25, %g2
-	or		%g1, %lo(fitos_table_2), %g1
-	and		%g2, 0x1f, %g2
-	sllx		%g2, 2, %g2
-	jmpl		%g1 + %g2, %g0
-	 ba,pt		%xcc, fitos_emul_fini
-
-fitos_table_2:
-	fdtos		%f62, %f0
-	fdtos		%f62, %f1
-	fdtos		%f62, %f2
-	fdtos		%f62, %f3
-	fdtos		%f62, %f4
-	fdtos		%f62, %f5
-	fdtos		%f62, %f6
-	fdtos		%f62, %f7
-	fdtos		%f62, %f8
-	fdtos		%f62, %f9
-	fdtos		%f62, %f10
-	fdtos		%f62, %f11
-	fdtos		%f62, %f12
-	fdtos		%f62, %f13
-	fdtos		%f62, %f14
-	fdtos		%f62, %f15
-	fdtos		%f62, %f16
-	fdtos		%f62, %f17
-	fdtos		%f62, %f18
-	fdtos		%f62, %f19
-	fdtos		%f62, %f20
-	fdtos		%f62, %f21
-	fdtos		%f62, %f22
-	fdtos		%f62, %f23
-	fdtos		%f62, %f24
-	fdtos		%f62, %f25
-	fdtos		%f62, %f26
-	fdtos		%f62, %f27
-	fdtos		%f62, %f28
-	fdtos		%f62, %f29
-	fdtos		%f62, %f30
-	fdtos		%f62, %f31
-
-fitos_emul_fini:
-	ldd		[%g6 + TI_FPREGS + (62 * 4)], %f62
-	done
-	.size		do_fpother_check_fitos,.-do_fpother_check_fitos
-
-	.align		32
-	.globl		do_fptrap
-	.type		do_fptrap,#function
-do_fptrap:
-	TRAP_LOAD_THREAD_REG(%g6, %g1)
-	stx		%fsr, [%g6 + TI_XFSR]
-do_fptrap_after_fsr:
-	ldub		[%g6 + TI_FPSAVED], %g3
-	rd		%fprs, %g1
-	or		%g3, %g1, %g3
-	stb		%g3, [%g6 + TI_FPSAVED]
-	rd		%gsr, %g3
-	stx		%g3, [%g6 + TI_GSR]
-	mov		SECONDARY_CONTEXT, %g3
-
-661:	ldxa		[%g3] ASI_DMMU, %g5
-	.section	.sun4v_1insn_patch, "ax"
-	.word		661b
-	ldxa		[%g3] ASI_MMU, %g5
-	.previous
-
-	sethi		%hi(sparc64_kern_sec_context), %g2
-	ldx		[%g2 + %lo(sparc64_kern_sec_context)], %g2
-
-661:	stxa		%g2, [%g3] ASI_DMMU
-	.section	.sun4v_1insn_patch, "ax"
-	.word		661b
-	stxa		%g2, [%g3] ASI_MMU
-	.previous
-
-	membar		#Sync
-	add		%g6, TI_FPREGS, %g2
-	andcc		%g1, FPRS_DL, %g0
-	be,pn		%icc, 4f
-	 mov		0x40, %g3
-	stda		%f0, [%g2] ASI_BLK_S
-	stda		%f16, [%g2 + %g3] ASI_BLK_S
-	andcc		%g1, FPRS_DU, %g0
-	be,pn		%icc, 5f
-4:       add		%g2, 128, %g2
-	stda		%f32, [%g2] ASI_BLK_S
-	stda		%f48, [%g2 + %g3] ASI_BLK_S
-5:	mov		SECONDARY_CONTEXT, %g1
-	membar		#Sync
-
-661:	stxa		%g5, [%g1] ASI_DMMU
-	.section	.sun4v_1insn_patch, "ax"
-	.word		661b
-	stxa		%g5, [%g1] ASI_MMU
-	.previous
-
-	membar		#Sync
-	ba,pt		%xcc, etrap
-	 wr		%g0, 0, %fprs
-	.size		do_fptrap,.-do_fptrap
diff --git a/arch/sparc/kernel/getsetcc.S b/arch/sparc/kernel/getsetcc.S
deleted file mode 100644
index 181e09fd1c55c6a74c672e2d8670ed51033c0522..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/getsetcc.S
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.globl		getcc
-	.type		getcc,#function
-getcc:
-	ldx		[%o0 + PT_V9_TSTATE], %o1
-	srlx		%o1, 32, %o1
-	and		%o1, 0xf, %o1
-	retl
-	 stx		%o1, [%o0 + PT_V9_G1]
-	.size		getcc,.-getcc
-
-	.globl		setcc
-	.type		setcc,#function
-setcc:
-	ldx		[%o0 + PT_V9_TSTATE], %o1
-	ldx		[%o0 + PT_V9_G1], %o2
-	or		%g0, %ulo(TSTATE_ICC), %o3
-	sllx		%o3, 32, %o3
-	andn		%o1, %o3, %o1
-	sllx		%o2, 32, %o2
-	and		%o2, %o3, %o2
-	or		%o1, %o2, %o1
-	retl
-	 stx		%o1, [%o0 + PT_V9_TSTATE]
-	.size		setcc,.-setcc
diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S
deleted file mode 100644
index e55f2c0751656d946384dabf372fe4eefadbed58..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/head_32.S
+++ /dev/null
@@ -1,812 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * head.S: The initial boot code for the Sparc port of Linux.
- *
- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
- * Copyright (C) 1995,1999 Pete Zaitcev   (zaitcev@yahoo.com)
- * Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx)
- * Copyright (C) 1997 Jakub Jelinek   (jj@sunsite.mff.cuni.cz)
- * Copyright (C) 1997 Michael A. Griffith (grif@acm.org)
- *
- * CompactPCI platform by Eric Brower, 1999.
- */
-
-#include <linux/version.h>
-#include <linux/init.h>
-
-#include <asm/head.h>
-#include <asm/asi.h>
-#include <asm/contregs.h>
-#include <asm/ptrace.h>
-#include <asm/psr.h>
-#include <asm/page.h>
-#include <asm/kdebug.h>
-#include <asm/winmacro.h>
-#include <asm/thread_info.h>	/* TI_UWINMASK */
-#include <asm/errno.h>
-#include <asm/pgtsrmmu.h>	/* SRMMU_PGDIR_SHIFT */
-#include <asm/export.h>
-
-	.data
-/* The following are used with the prom_vector node-ops to figure out
- * the cpu-type
- */
-	.align 4
-	.globl cputypval
-cputypval:
-	.asciz "sun4m"
-	.ascii "     "
-
-/* Tested on SS-5, SS-10 */
-	.align 4
-cputypvar:
-	.asciz "compatible"
-
-	.align 4
-
-notsup:
-	.asciz	"Sparc-Linux sun4/sun4c or MMU-less not supported\n\n"
-	.align 4
-
-sun4e_notsup:
-        .asciz  "Sparc-Linux sun4e support does not exist\n\n"
-	.align 4
-
-/* The trap-table - located in the __HEAD section */
-#include "ttable_32.S"
-
-	.align PAGE_SIZE
-
-/* This was the only reasonable way I could think of to properly align
- * these page-table data structures.
- */
-	.globl empty_zero_page
-empty_zero_page:	.skip PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
-
-	.global root_flags
-	.global ram_flags
-	.global root_dev
-	.global sparc_ramdisk_image
-	.global sparc_ramdisk_size
-
-/* This stuff has to be in sync with SILO and other potential boot loaders
- * Fields should be kept upward compatible and whenever any change is made,
- * HdrS version should be incremented.
- */
-	.ascii	"HdrS"
-	.word	LINUX_VERSION_CODE
-	.half	0x0203		/* HdrS version */
-root_flags:
-	.half	1
-root_dev:
-	.half	0
-ram_flags:
-	.half	0
-sparc_ramdisk_image:
-	.word	0
-sparc_ramdisk_size:
-	.word	0
-	.word	reboot_command
-	.word	0, 0, 0
-	.word	_end
-
-/* Cool, here we go. Pick up the romvec pointer in %o0 and stash it in
- * %g7 and at prom_vector_p. And also quickly check whether we are on
- * a v0, v2, or v3 prom.
- */
-gokernel:
-		/* Ok, it's nice to know, as early as possible, if we
-		 * are already mapped where we expect to be in virtual
-		 * memory.  The Solaris /boot elf format bootloader
-		 * will peek into our elf header and load us where
-		 * we want to be, otherwise we have to re-map.
-		 *
-		 * Some boot loaders don't place the jmp'rs address
-		 * in %o7, so we do a pc-relative call to a local
-		 * label, then see what %o7 has.
-		 */
-
-		mov	%o7, %g4		! Save %o7
-
-		/* Jump to it, and pray... */
-current_pc:
-		call	1f
-		 nop
-
-1:
-		mov	%o7, %g3
-
-		tst	%o0
-		be	no_sun4u_here
-		 mov	%g4, %o7		/* Previous %o7. */
-
-		mov	%o0, %l0		! stash away romvec
-		mov	%o0, %g7		! put it here too
-		mov	%o1, %l1		! stash away debug_vec too
-
-		/* Ok, let's check out our run time program counter. */
-		set	current_pc, %g5
-		cmp	%g3, %g5
-		be	already_mapped
-		 nop
-
-		/* %l6 will hold the offset we have to subtract
-		 * from absolute symbols in order to access areas
-		 * in our own image.  If already mapped this is
-		 * just plain zero, else it is KERNBASE.
-		 */
-		set	KERNBASE, %l6
-		b	copy_prom_lvl14
-		 nop
-
-already_mapped:
-		mov	0, %l6
-
-		/* Copy over the Prom's level 14 clock handler. */
-copy_prom_lvl14:
-#if 1
-		/* DJHR
-		 * preserve our linked/calculated instructions
-		 */
-		set	lvl14_save, %g1
-		set	t_irq14, %g3
-		sub	%g1, %l6, %g1		! translate to physical
-		sub	%g3, %l6, %g3		! translate to physical
-		ldd	[%g3], %g4
-		std	%g4, [%g1]
-		ldd	[%g3+8], %g4
-		std	%g4, [%g1+8]
-#endif
-		rd	%tbr, %g1
-		andn	%g1, 0xfff, %g1		! proms trap table base
-		or	%g0, (0x1e<<4), %g2	! offset to lvl14 intr
-		or	%g1, %g2, %g2
-		set	t_irq14, %g3
-		sub	%g3, %l6, %g3
-		ldd	[%g2], %g4
-		std	%g4, [%g3]
-		ldd	[%g2 + 0x8], %g4
-		std	%g4, [%g3 + 0x8]	! Copy proms handler
-
-/* DON'T TOUCH %l0 thru %l5 in these remapping routines,
- * we need their values afterwards!
- */
-
-		/* Now check whether we are already mapped, if we
-		 * are we can skip all this garbage coming up.
-		 */
-copy_prom_done:
-		cmp	%l6, 0
-		be	go_to_highmem		! this will be a nop then
-		 nop
-
-		/* Validate that we are in fact running on an
-		 * SRMMU based cpu.
-		 */
-		set	0x4000, %g6
-		cmp	%g7, %g6
-		bne	not_a_sun4
-		 nop
-
-halt_notsup:
-		ld	[%g7 + 0x68], %o1
-		set	notsup, %o0
-		sub	%o0, %l6, %o0
-		call	%o1
-		 nop
-		ba	halt_me
-		 nop
-
-not_a_sun4:
-		/* It looks like this is a machine we support.
-		 * Now find out what MMU we are dealing with
-		 * LEON - identified by the psr.impl field
-		 * Viking - identified by the psr.impl field
-		 * In all other cases a sun4m srmmu.
-		 * We check that the MMU is enabled in all cases.
-		 */
-
-		/* Check if this is a LEON CPU */
-		rd	%psr, %g3
-		srl	%g3, PSR_IMPL_SHIFT, %g3
-		and	%g3, PSR_IMPL_SHIFTED_MASK, %g3
-		cmp	%g3, PSR_IMPL_LEON
-		be	leon_remap		/* It is a LEON - jump */
-		 nop
-
-		/* Sanity-check, is MMU enabled */
-		lda	[%g0] ASI_M_MMUREGS, %g1
-		andcc	%g1, 1, %g0
-		be	halt_notsup
-		 nop
-
-		/* Check for a viking (TI) module. */
-		cmp	%g3, PSR_IMPL_TI
-		bne	srmmu_not_viking
-		 nop
-
-		/* Figure out what kind of viking we are on.
-		 * We need to know if we have to play with the
-		 * AC bit and disable traps or not.
-		 */
-
-		/* I've only seen MicroSparc's on SparcClassics with this
-		 * bit set.
-		 */
-		set	0x800, %g2
-		lda	[%g0] ASI_M_MMUREGS, %g3	! peek in the control reg
-		and	%g2, %g3, %g3
-		subcc	%g3, 0x0, %g0
-		bnz	srmmu_not_viking			! is in mbus mode
-		 nop
-
-		rd	%psr, %g3			! DO NOT TOUCH %g3
-		andn	%g3, PSR_ET, %g2
-		wr	%g2, 0x0, %psr
-		WRITE_PAUSE
-
-		/* Get context table pointer, then convert to
-		 * a physical address, which is 36 bits.
-		 */
-		set	AC_M_CTPR, %g4
-		lda	[%g4] ASI_M_MMUREGS, %g4
-		sll	%g4, 0x4, %g4			! We use this below
-							! DO NOT TOUCH %g4
-
-		/* Set the AC bit in the Viking's MMU control reg. */
-		lda	[%g0] ASI_M_MMUREGS, %g5	! DO NOT TOUCH %g5
-		set	0x8000, %g6			! AC bit mask
-		or	%g5, %g6, %g6			! Or it in...
-		sta	%g6, [%g0] ASI_M_MMUREGS	! Close your eyes...
-
-		/* Grrr, why does it seem like every other load/store
-		 * on the sun4m is in some ASI space...
-		 * Fine with me, let's get the pointer to the level 1
-		 * page table directory and fetch its entry.
-		 */
-		lda	[%g4] ASI_M_BYPASS, %o1		! This is a level 1 ptr
-		srl	%o1, 0x4, %o1			! Clear low 4 bits
-		sll	%o1, 0x8, %o1			! Make physical
-
-		/* Ok, pull in the PTD. */
-		lda	[%o1] ASI_M_BYPASS, %o2		! This is the 0x0 16MB pgd
-
-		/* Calculate to KERNBASE entry. */
-		add	%o1, KERNBASE >> (SRMMU_PGDIR_SHIFT - 2), %o3
-
-		/* Poke the entry into the calculated address. */
-		sta	%o2, [%o3] ASI_M_BYPASS
-
-		/* I don't get it Sun, if you engineered all these
-		 * boot loaders and the PROM (thank you for the debugging
-		 * features btw) why did you not have them load kernel
-		 * images up in high address space, since this is necessary
-		 * for ABI compliance anyways?  Does this low-mapping provide
-		 * enhanced interoperability?
-		 *
-		 * "The PROM is the computer."
-		 */
-
-		/* Ok, restore the MMU control register we saved in %g5 */
-		sta	%g5, [%g0] ASI_M_MMUREGS	! POW... ouch
-
-		/* Turn traps back on.  We saved it in %g3 earlier. */
-		wr	%g3, 0x0, %psr			! tick tock, tick tock
-
-		/* Now we burn precious CPU cycles due to bad engineering. */
-		WRITE_PAUSE
-
-		/* Wow, all that just to move a 32-bit value from one
-		 * place to another...  Jump to high memory.
-		 */
-		b	go_to_highmem
-		 nop
-
-srmmu_not_viking:
-		/* This works on viking's in Mbus mode and all
-		 * other MBUS modules.  It is virtually the same as
-		 * the above madness sans turning traps off and flipping
-		 * the AC bit.
-		 */
-		set	AC_M_CTPR, %g1
-		lda	[%g1] ASI_M_MMUREGS, %g1	! get ctx table ptr
-		sll	%g1, 0x4, %g1			! make physical addr
-		lda	[%g1] ASI_M_BYPASS, %g1		! ptr to level 1 pg_table
-		srl	%g1, 0x4, %g1
-		sll	%g1, 0x8, %g1			! make phys addr for l1 tbl
-
-		lda	[%g1] ASI_M_BYPASS, %g2		! get level1 entry for 0x0
-		add	%g1, KERNBASE >> (SRMMU_PGDIR_SHIFT - 2), %g3
-		sta	%g2, [%g3] ASI_M_BYPASS		! place at KERNBASE entry
-		b	go_to_highmem
-		 nop					! wheee....
-
-
-leon_remap:
-		/* Sanity-check, is MMU enabled */
-		lda	[%g0] ASI_LEON_MMUREGS, %g1
-		andcc	%g1, 1, %g0
-		be	halt_notsup
-		 nop
-
-		/* Same code as in the srmmu_not_viking case,
-		 * with the LEON ASI for mmuregs
-		 */
-		set	AC_M_CTPR, %g1
-		lda	[%g1] ASI_LEON_MMUREGS, %g1	! get ctx table ptr
-		sll	%g1, 0x4, %g1			! make physical addr
-		lda	[%g1] ASI_M_BYPASS, %g1		! ptr to level 1 pg_table
-		srl	%g1, 0x4, %g1
-		sll	%g1, 0x8, %g1			! make phys addr for l1 tbl
-
-		lda	[%g1] ASI_M_BYPASS, %g2		! get level1 entry for 0x0
-		add	%g1, KERNBASE >> (SRMMU_PGDIR_SHIFT - 2), %g3
-		sta	%g2, [%g3] ASI_M_BYPASS		! place at KERNBASE entry
-		b	go_to_highmem
-		 nop					! wheee....
-
-/* Now do a non-relative jump so that PC is in high-memory */
-go_to_highmem:
-		set	execute_in_high_mem, %g1
-		jmpl	%g1, %g0
-		 nop
-
-/* The code above should be at beginning and we have to take care about
- * short jumps, as branching to .init.text section from .text is usually
- * impossible */
-		__INIT
-/* Acquire boot time privileged register values, this will help debugging.
- * I figure out and store nwindows and nwindowsm1 later on.
- */
-execute_in_high_mem:
-		mov	%l0, %o0		! put back romvec
-		mov	%l1, %o1		! and debug_vec
-
-		sethi	%hi(prom_vector_p), %g1
-		st	%o0, [%g1 + %lo(prom_vector_p)]
-
-		sethi	%hi(linux_dbvec), %g1
-		st	%o1, [%g1 + %lo(linux_dbvec)]
-
-		/* Get the machine type via the romvec
-		 * getprops node operation
-		 */
-		add	%g7, 0x1c, %l1
-		ld	[%l1], %l0
-		ld	[%l0], %l0
-		call	%l0
-		 or	%g0, %g0, %o0		! next_node(0) = first_node
-		or	%o0, %g0, %g6
-
-		sethi	%hi(cputypvar), %o1	! First node has cpu-arch
-		or	%o1, %lo(cputypvar), %o1
-		sethi	%hi(cputypval), %o2	! information, the string
-		or	%o2, %lo(cputypval), %o2
-		ld	[%l1], %l0		! 'compatible' tells
-		ld	[%l0 + 0xc], %l0	! that we want 'sun4x' where
-		call	%l0			! x is one of 'm', 'd' or 'e'.
-		 nop				! %o2 holds pointer
-						! to a buf where above string
-						! will get stored by the prom.
-
-
-		/* Check value of "compatible" property.
-		 * "value" => "model"
-		 * leon => sparc_leon
-		 * sun4m => sun4m
-		 * sun4s => sun4m
-		 * sun4d => sun4d
-		 * sun4e => "no_sun4e_here"
-		 * '*'   => "no_sun4u_here"
-		 * Check single letters only
-		 */
-
-		set	cputypval, %o2
-		/* If cputypval[0] == 'l' (lower case letter L) this is leon */
-		ldub	[%o2], %l1
-		cmp	%l1, 'l'
-		be	leon_init
-		 nop
-
-		/* Check cputypval[4] to find the sun model */
-		ldub	[%o2 + 0x4], %l1
-
-		cmp	%l1, 'm'
-		be	sun4m_init
-		 cmp	%l1, 's'
-		be	sun4m_init
-		 cmp	%l1, 'd'
-		be	sun4d_init
-		 cmp	%l1, 'e'
-		be	no_sun4e_here		! Could be a sun4e.
-		 nop
-		b	no_sun4u_here		! AIEEE, a V9 sun4u... Get our BIG BROTHER kernel :))
-		 nop
-
-leon_init:
-		/* LEON CPU - set boot_cpu_id */
-		sethi	%hi(boot_cpu_id), %g2	! boot-cpu index
-
-#ifdef CONFIG_SMP
-		ldub	[%g2 + %lo(boot_cpu_id)], %g1
-		cmp	%g1, 0xff		! unset means first CPU
-		bne	leon_smp_cpu_startup	! continue only with master
-		 nop
-#endif
-		/* Get CPU-ID from most significant 4-bit of ASR17 */
-		rd     %asr17, %g1
-		srl    %g1, 28, %g1
-
-		/* Update boot_cpu_id only on boot cpu */
-		stub	%g1, [%g2 + %lo(boot_cpu_id)]
-
-		ba continue_boot
-		 nop
-
-/* CPUID in bootbus can be found at PA 0xff0140000 */
-#define SUN4D_BOOTBUS_CPUID     0xf0140000
-
-sun4d_init:
-	/* Need to patch call to handler_irq */
-	set	patch_handler_irq, %g4
-	set	sun4d_handler_irq, %g5
-	sethi	%hi(0x40000000), %g3		! call
-	sub	%g5, %g4, %g5
-	srl	%g5, 2, %g5
-	or	%g5, %g3, %g5
-	st	%g5, [%g4]
-
-#ifdef CONFIG_SMP
-	/* Get our CPU id out of bootbus */
-	set     SUN4D_BOOTBUS_CPUID, %g3
-	lduba   [%g3] ASI_M_CTL, %g3
-	and     %g3, 0xf8, %g3
-	srl     %g3, 3, %g4
-	sta     %g4, [%g0] ASI_M_VIKING_TMP1
-	sethi	%hi(boot_cpu_id), %g5
-	stb	%g4, [%g5 + %lo(boot_cpu_id)]
-#endif
-
-	/* Fall through to sun4m_init */
-
-sun4m_init:
-/* Ok, the PROM could have done funny things and apple cider could still
- * be sitting in the fault status/address registers.  Read them all to
- * clear them so we don't get magic faults later on.
- */
-/* This sucks, apparently this makes Vikings call prom panic, will fix later */
-2:
-		rd	%psr, %o1
-		srl	%o1, PSR_IMPL_SHIFT, %o1	! Get a type of the CPU
-
-		subcc	%o1, PSR_IMPL_TI, %g0		! TI: Viking or MicroSPARC
-		be	continue_boot
-		 nop
-
-		set	AC_M_SFSR, %o0
-		lda	[%o0] ASI_M_MMUREGS, %g0
-		set	AC_M_SFAR, %o0
-		lda	[%o0] ASI_M_MMUREGS, %g0
-
-		/* Fujitsu MicroSPARC-II has no asynchronous flavors of FARs */
-		subcc	%o1, 0, %g0
-		be	continue_boot
-		 nop
-
-		set	AC_M_AFSR, %o0
-		lda	[%o0] ASI_M_MMUREGS, %g0
-		set	AC_M_AFAR, %o0
-		lda	[%o0] ASI_M_MMUREGS, %g0
-		 nop
-
-
-continue_boot:
-
-/* Aieee, now set PC and nPC, enable traps, give ourselves a stack and it's
- * show-time!
- */
-		/* Turn on Supervisor, EnableFloating, and all the PIL bits.
-		 * Also puts us in register window zero with traps off.
-		 */
-		set	(PSR_PS | PSR_S | PSR_PIL | PSR_EF), %g2
-		wr	%g2, 0x0, %psr
-		WRITE_PAUSE
-
-		/* I want a kernel stack NOW! */
-		set	init_thread_union, %g1
-		set	(THREAD_SIZE - STACKFRAME_SZ), %g2
-		add	%g1, %g2, %sp
-		mov	0, %fp			/* And for good luck */
-
-		/* Zero out our BSS section. */
-		set	__bss_start , %o0	! First address of BSS
-		set	_end , %o1		! Last address of BSS
-		add	%o0, 0x1, %o0
-1:
-		stb	%g0, [%o0]
-		subcc	%o0, %o1, %g0
-		bl	1b
-		 add	%o0, 0x1, %o0
-
-		/* If boot_cpu_id has not been setup by machine specific
-		 * init-code above we default it to zero.
-		 */
-		sethi	%hi(boot_cpu_id), %g2
-		ldub	[%g2 + %lo(boot_cpu_id)], %g3
-		cmp	%g3, 0xff
-		bne	1f
-		 nop
-		mov	%g0, %g3
-		stub	%g3, [%g2 + %lo(boot_cpu_id)]
-
-1:		sll	%g3, 2, %g3
-
-		/* Initialize the uwinmask value for init task just in case.
-		 * But first make current_set[boot_cpu_id] point to something useful.
-		 */
-		set	init_thread_union, %g6
-		set	current_set, %g2
-#ifdef CONFIG_SMP
-		st	%g6, [%g2]
-		add	%g2, %g3, %g2
-#endif
-		st	%g6, [%g2]
-
-		st	%g0, [%g6 + TI_UWINMASK]
-
-/* Compute NWINDOWS and stash it away. Now uses %wim trick explained
- * in the V8 manual. Ok, this method seems to work, Sparc is cool...
- * No, it doesn't work, have to play the save/readCWP/restore trick.
- */
-
-		wr	%g0, 0x0, %wim			! so we do not get a trap
-		WRITE_PAUSE
-
-		save
-
-		rd	%psr, %g3
-
-		restore
-
-		and	%g3, 0x1f, %g3
-		add	%g3, 0x1, %g3
-
-		mov	2, %g1
-		wr	%g1, 0x0, %wim			! make window 1 invalid
-		WRITE_PAUSE
-
-		cmp	%g3, 0x7
-		bne	2f
-		 nop
-
-		/* Adjust our window handling routines to
-		 * do things correctly on 7 window Sparcs.
-		 */
-
-#define		PATCH_INSN(src, dest) \
-		set	src, %g5; \
-		set	dest, %g2; \
-		ld	[%g5], %g4; \
-		st	%g4, [%g2];
-
-		/* Patch for window spills... */
-		PATCH_INSN(spnwin_patch1_7win, spnwin_patch1)
-		PATCH_INSN(spnwin_patch2_7win, spnwin_patch2)
-		PATCH_INSN(spnwin_patch3_7win, spnwin_patch3)
-
-		/* Patch for window fills... */
-		PATCH_INSN(fnwin_patch1_7win, fnwin_patch1)
-		PATCH_INSN(fnwin_patch2_7win, fnwin_patch2)
-
-		/* Patch for trap entry setup... */
-		PATCH_INSN(tsetup_7win_patch1, tsetup_patch1)
-		PATCH_INSN(tsetup_7win_patch2, tsetup_patch2)
-		PATCH_INSN(tsetup_7win_patch3, tsetup_patch3)
-		PATCH_INSN(tsetup_7win_patch4, tsetup_patch4)
-		PATCH_INSN(tsetup_7win_patch5, tsetup_patch5)
-		PATCH_INSN(tsetup_7win_patch6, tsetup_patch6)
-
-		/* Patch for returning from traps... */
-		PATCH_INSN(rtrap_7win_patch1, rtrap_patch1)
-		PATCH_INSN(rtrap_7win_patch2, rtrap_patch2)
-		PATCH_INSN(rtrap_7win_patch3, rtrap_patch3)
-		PATCH_INSN(rtrap_7win_patch4, rtrap_patch4)
-		PATCH_INSN(rtrap_7win_patch5, rtrap_patch5)
-
-		/* Patch for killing user windows from the register file. */
-		PATCH_INSN(kuw_patch1_7win, kuw_patch1)
-
-		/* Now patch the kernel window flush sequences.
-		 * This saves 2 traps on every switch and fork.
-		 */
-		set	0x01000000, %g4
-		set	flush_patch_one, %g5
-		st	%g4, [%g5 + 0x18]
-		st	%g4, [%g5 + 0x1c]
-		set	flush_patch_two, %g5
-		st	%g4, [%g5 + 0x18]
-		st	%g4, [%g5 + 0x1c]
-		set	flush_patch_three, %g5
-		st	%g4, [%g5 + 0x18]
-		st	%g4, [%g5 + 0x1c]
-		set	flush_patch_four, %g5
-		st	%g4, [%g5 + 0x18]
-		st	%g4, [%g5 + 0x1c]
-		set	flush_patch_exception, %g5
-		st	%g4, [%g5 + 0x18]
-		st	%g4, [%g5 + 0x1c]
-		set	flush_patch_switch, %g5
-		st	%g4, [%g5 + 0x18]
-		st	%g4, [%g5 + 0x1c]
-
-2:
-		sethi	%hi(nwindows), %g4
-		st	%g3, [%g4 + %lo(nwindows)]	! store final value
-		sub	%g3, 0x1, %g3
-		sethi	%hi(nwindowsm1), %g4
-		st	%g3, [%g4 + %lo(nwindowsm1)]
-
-		/* Here we go, start using Linux's trap table... */
-		set	trapbase, %g3
-		wr	%g3, 0x0, %tbr
-		WRITE_PAUSE
-
-		/* Finally, turn on traps so that we can call c-code. */
-		rd	%psr, %g3
-		wr	%g3, 0x0, %psr
-		WRITE_PAUSE
-
-		wr	%g3, PSR_ET, %psr
-		WRITE_PAUSE
-
-		/* Call sparc32_start_kernel(struct linux_romvec *rp) */
-		sethi	%hi(prom_vector_p), %g5
-		ld	[%g5 + %lo(prom_vector_p)], %o0
-		call	sparc32_start_kernel
-		 nop
-
-		/* We should not get here. */
-		call	halt_me
-		 nop
-
-no_sun4e_here:
-		ld	[%g7 + 0x68], %o1
-		set	sun4e_notsup, %o0
-		call	%o1
-		 nop
-		b	halt_me
-		 nop
-
-		__INITDATA
-
-sun4u_1:
-		.asciz "finddevice"
-		.align	4
-sun4u_2:
-		.asciz "/chosen"
-		.align	4
-sun4u_3:
-		.asciz "getprop"
-		.align	4
-sun4u_4:
-		.asciz "stdout"
-		.align	4
-sun4u_5:
-		.asciz "write"
-		.align	4
-sun4u_6:
-		.asciz  "\n\rOn sun4u you have to use sparc64 kernel\n\rand not a sparc32 version\n\r\n\r"
-sun4u_6e:
-		.align	4
-sun4u_7:
-		.asciz "exit"
-		.align	8
-sun4u_a1:
-		.word	0, sun4u_1, 0, 1, 0, 1, 0, sun4u_2, 0
-sun4u_r1:
-		.word	0
-sun4u_a2:
-		.word	0, sun4u_3, 0, 4, 0, 1, 0
-sun4u_i2:
-		.word	0, 0, sun4u_4, 0, sun4u_1, 0, 8, 0
-sun4u_r2:
-		.word	0
-sun4u_a3:
-		.word	0, sun4u_5, 0, 3, 0, 1, 0
-sun4u_i3:
-		.word	0, 0, sun4u_6, 0, sun4u_6e - sun4u_6 - 1, 0
-sun4u_r3:
-		.word	0
-sun4u_a4:
-		.word	0, sun4u_7, 0, 0, 0, 0
-sun4u_r4:
-
-		__INIT
-no_sun4u_here:
-		set	sun4u_a1, %o0
-		set	current_pc, %l2
-		cmp	%l2, %g3
-		be	1f
-		 mov	%o4, %l0
-		sub	%g3, %l2, %l6
-		add	%o0, %l6, %o0
-		mov	%o0, %l4
-		mov	sun4u_r4 - sun4u_a1, %l3
-		ld	[%l4], %l5
-2:
-		add	%l4, 4, %l4
-		cmp	%l5, %l2
-		add	%l5, %l6, %l5
-		bgeu,a	3f
-		 st	%l5, [%l4 - 4]
-3:
-		subcc	%l3, 4, %l3
-		bne	2b
-		 ld	[%l4], %l5
-1:
-		call	%l0
-		 mov	%o0, %l1
-
-		ld	[%l1 + (sun4u_r1 - sun4u_a1)], %o1
-		add	%l1, (sun4u_a2 - sun4u_a1), %o0
-		call	%l0
-		 st	%o1, [%o0 + (sun4u_i2 - sun4u_a2)]
-
-		ld	[%l1 + (sun4u_1 - sun4u_a1)], %o1
-		add	%l1, (sun4u_a3 - sun4u_a1), %o0
-		call	%l0
-		st	%o1, [%o0 + (sun4u_i3 - sun4u_a3)]
-
-		call	%l0
-		 add	%l1, (sun4u_a4 - sun4u_a1), %o0
-
-		/* Not reached */
-halt_me:
-		ld	[%g7 + 0x74], %o0
-		call	%o0			! Get us out of here...
-		 nop				! Apparently Solaris is better.
-
-/* Ok, now we continue in the .data/.text sections */
-
-	.data
-	.align 4
-
-/*
- * Fill up the prom vector, note in particular the kind first element,
- * no joke. I don't need all of them in here as the entire prom vector
- * gets initialized in c-code so all routines can use it.
- */
-
-prom_vector_p:
-		.word 0
-
-/* We calculate the following at boot time, window fills/spills and trap entry
- * code uses these to keep track of the register windows.
- */
-
-	.align 4
-	.globl	nwindows
-	.globl	nwindowsm1
-nwindows:
-	.word	8
-nwindowsm1:
-	.word	7
-
-/* Boot time debugger vector value.  We need this later on. */
-
-	.align 4
-	.globl	linux_dbvec
-linux_dbvec:
-	.word	0
-	.word	0
-
-	.align 8
-
-	.globl	lvl14_save
-lvl14_save:
-	.word	0
-	.word	0
-	.word	0
-	.word	0
-	.word	t_irq14
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
deleted file mode 100644
index 540bfc98472ce5e2f572817528172721058f9620..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/head_64.S
+++ /dev/null
@@ -1,969 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* head.S: Initial boot code for the Sparc64 port of Linux.
- *
- * Copyright (C) 1996, 1997, 2007 David S. Miller (davem@davemloft.net)
- * Copyright (C) 1996 David Sitsky (David.Sitsky@anu.edu.au)
- * Copyright (C) 1997, 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- * Copyright (C) 1997 Miguel de Icaza (miguel@nuclecu.unam.mx)
- */
-
-#include <linux/version.h>
-#include <linux/errno.h>
-#include <linux/threads.h>
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <asm/asi.h>
-#include <asm/pstate.h>
-#include <asm/ptrace.h>
-#include <asm/spitfire.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/errno.h>
-#include <asm/signal.h>
-#include <asm/processor.h>
-#include <asm/lsu.h>
-#include <asm/dcr.h>
-#include <asm/dcu.h>
-#include <asm/head.h>
-#include <asm/ttable.h>
-#include <asm/mmu.h>
-#include <asm/cpudata.h>
-#include <asm/pil.h>
-#include <asm/estate.h>
-#include <asm/sfafsr.h>
-#include <asm/unistd.h>
-#include <asm/export.h>
-
-/* This section from from _start to sparc64_boot_end should fit into
- * 0x0000000000404000 to 0x0000000000408000.
- */
-	.text
-	.globl	start, _start, stext, _stext
-_start:
-start:
-_stext:
-stext:
-! 0x0000000000404000
-	b	sparc64_boot
-	 flushw					/* Flush register file.      */
-
-/* This stuff has to be in sync with SILO and other potential boot loaders
- * Fields should be kept upward compatible and whenever any change is made,
- * HdrS version should be incremented.
- */
-        .global root_flags, ram_flags, root_dev
-        .global sparc_ramdisk_image, sparc_ramdisk_size
-	.global sparc_ramdisk_image64
-
-        .ascii  "HdrS"
-        .word   LINUX_VERSION_CODE
-
-	/* History:
-	 *
-	 * 0x0300 : Supports being located at other than 0x4000
-	 * 0x0202 : Supports kernel params string
-	 * 0x0201 : Supports reboot_command
-	 */
-	.half   0x0301          /* HdrS version */
-
-root_flags:
-        .half   1
-root_dev:
-        .half   0
-ram_flags:
-        .half   0
-sparc_ramdisk_image:
-        .word   0
-sparc_ramdisk_size:
-        .word   0
-        .xword  reboot_command
-	.xword	bootstr_info
-sparc_ramdisk_image64:
-	.xword	0
-	.word	_end
-
-	/* PROM cif handler code address is in %o4.  */
-sparc64_boot:
-	mov	%o4, %l7
-
-	/* We need to remap the kernel.  Use position independent
-	 * code to remap us to KERNBASE.
-	 *
-	 * SILO can invoke us with 32-bit address masking enabled,
-	 * so make sure that's clear.
-	 */
-	rdpr	%pstate, %g1
-	andn	%g1, PSTATE_AM, %g1
-	wrpr	%g1, 0x0, %pstate
-	ba,a,pt	%xcc, 1f
-	 nop
-
-	.globl	prom_finddev_name, prom_chosen_path, prom_root_node
-	.globl	prom_getprop_name, prom_mmu_name, prom_peer_name
-	.globl	prom_callmethod_name, prom_translate_name, prom_root_compatible
-	.globl	prom_map_name, prom_unmap_name, prom_mmu_ihandle_cache
-	.globl	prom_boot_mapped_pc, prom_boot_mapping_mode
-	.globl	prom_boot_mapping_phys_high, prom_boot_mapping_phys_low
-	.globl	prom_compatible_name, prom_cpu_path, prom_cpu_compatible
-	.globl	is_sun4v, sun4v_chip_type, prom_set_trap_table_name
-prom_peer_name:
-	.asciz	"peer"
-prom_compatible_name:
-	.asciz	"compatible"
-prom_finddev_name:
-	.asciz	"finddevice"
-prom_chosen_path:
-	.asciz	"/chosen"
-prom_cpu_path:
-	.asciz	"/cpu"
-prom_getprop_name:
-	.asciz	"getprop"
-prom_mmu_name:
-	.asciz	"mmu"
-prom_callmethod_name:
-	.asciz	"call-method"
-prom_translate_name:
-	.asciz	"translate"
-prom_map_name:
-	.asciz	"map"
-prom_unmap_name:
-	.asciz	"unmap"
-prom_set_trap_table_name:
-	.asciz	"SUNW,set-trap-table"
-prom_sun4v_name:
-	.asciz	"sun4v"
-prom_niagara_prefix:
-	.asciz	"SUNW,UltraSPARC-T"
-prom_sparc_prefix:
-	.asciz	"SPARC-"
-prom_sparc64x_prefix:
-	.asciz	"SPARC64-X"
-	.align	4
-prom_root_compatible:
-	.skip	64
-prom_cpu_compatible:
-	.skip	64
-prom_root_node:
-	.word	0
-EXPORT_SYMBOL(prom_root_node)
-prom_mmu_ihandle_cache:
-	.word	0
-prom_boot_mapped_pc:
-	.word	0
-prom_boot_mapping_mode:
-	.word	0
-	.align	8
-prom_boot_mapping_phys_high:
-	.xword	0
-prom_boot_mapping_phys_low:
-	.xword	0
-is_sun4v:
-	.word	0
-sun4v_chip_type:
-	.word	SUN4V_CHIP_INVALID
-EXPORT_SYMBOL(sun4v_chip_type)
-1:
-	rd	%pc, %l0
-
-	mov	(1b - prom_peer_name), %l1
-	sub	%l0, %l1, %l1
-	mov	0, %l2
-
-	/* prom_root_node = prom_peer(0) */
-	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "peer"
-	mov	1, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 1
-	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
-	stx	%l2, [%sp + 2047 + 128 + 0x18]	! arg1, 0
-	stx	%g0, [%sp + 2047 + 128 + 0x20]	! ret1
-	call	%l7
-	 add	%sp, (2047 + 128), %o0		! argument array
-
-	ldx	[%sp + 2047 + 128 + 0x20], %l4	! prom root node
-	mov	(1b - prom_root_node), %l1
-	sub	%l0, %l1, %l1
-	stw	%l4, [%l1]
-
-	mov	(1b - prom_getprop_name), %l1
-	mov	(1b - prom_compatible_name), %l2
-	mov	(1b - prom_root_compatible), %l5
-	sub	%l0, %l1, %l1
-	sub	%l0, %l2, %l2
-	sub	%l0, %l5, %l5
-
-	/* prom_getproperty(prom_root_node, "compatible",
-	 *                  &prom_root_compatible, 64)
-	 */
-	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "getprop"
-	mov	4, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 4
-	mov	1, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
-	stx	%l4, [%sp + 2047 + 128 + 0x18]	! arg1, prom_root_node
-	stx	%l2, [%sp + 2047 + 128 + 0x20]	! arg2, "compatible"
-	stx	%l5, [%sp + 2047 + 128 + 0x28]	! arg3, &prom_root_compatible
-	mov	64, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x30]	! arg4, size
-	stx	%g0, [%sp + 2047 + 128 + 0x38]	! ret1
-	call	%l7
-	 add	%sp, (2047 + 128), %o0		! argument array
-
-	mov	(1b - prom_finddev_name), %l1
-	mov	(1b - prom_chosen_path), %l2
-	mov	(1b - prom_boot_mapped_pc), %l3
-	sub	%l0, %l1, %l1
-	sub	%l0, %l2, %l2
-	sub	%l0, %l3, %l3
-	stw	%l0, [%l3]
-	sub	%sp, (192 + 128), %sp
-
-	/* chosen_node = prom_finddevice("/chosen") */
-	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "finddevice"
-	mov	1, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 1
-	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
-	stx	%l2, [%sp + 2047 + 128 + 0x18]	! arg1, "/chosen"
-	stx	%g0, [%sp + 2047 + 128 + 0x20]	! ret1
-	call	%l7
-	 add	%sp, (2047 + 128), %o0		! argument array
-
-	ldx	[%sp + 2047 + 128 + 0x20], %l4	! chosen device node
-
-	mov	(1b - prom_getprop_name), %l1
-	mov	(1b - prom_mmu_name), %l2
-	mov	(1b - prom_mmu_ihandle_cache), %l5
-	sub	%l0, %l1, %l1
-	sub	%l0, %l2, %l2
-	sub	%l0, %l5, %l5
-
-	/* prom_mmu_ihandle_cache = prom_getint(chosen_node, "mmu") */
-	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "getprop"
-	mov	4, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 4
-	mov	1, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
-	stx	%l4, [%sp + 2047 + 128 + 0x18]	! arg1, chosen_node
-	stx	%l2, [%sp + 2047 + 128 + 0x20]	! arg2, "mmu"
-	stx	%l5, [%sp + 2047 + 128 + 0x28]	! arg3, &prom_mmu_ihandle_cache
-	mov	4, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x30]	! arg4, sizeof(arg3)
-	stx	%g0, [%sp + 2047 + 128 + 0x38]	! ret1
-	call	%l7
-	 add	%sp, (2047 + 128), %o0		! argument array
-
-	mov	(1b - prom_callmethod_name), %l1
-	mov	(1b - prom_translate_name), %l2
-	sub	%l0, %l1, %l1
-	sub	%l0, %l2, %l2
-	lduw	[%l5], %l5			! prom_mmu_ihandle_cache
-
-	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "call-method"
-	mov	3, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 3
-	mov	5, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 5
-	stx	%l2, [%sp + 2047 + 128 + 0x18]	! arg1: "translate"
-	stx	%l5, [%sp + 2047 + 128 + 0x20]	! arg2: prom_mmu_ihandle_cache
-	/* PAGE align */
-	srlx	%l0, 13, %l3
-	sllx	%l3, 13, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x28]	! arg3: vaddr, our PC
-	stx	%g0, [%sp + 2047 + 128 + 0x30]	! res1
-	stx	%g0, [%sp + 2047 + 128 + 0x38]	! res2
-	stx	%g0, [%sp + 2047 + 128 + 0x40]	! res3
-	stx	%g0, [%sp + 2047 + 128 + 0x48]	! res4
-	stx	%g0, [%sp + 2047 + 128 + 0x50]	! res5
-	call	%l7
-	 add	%sp, (2047 + 128), %o0		! argument array
-
-	ldx	[%sp + 2047 + 128 + 0x40], %l1	! translation mode
-	mov	(1b - prom_boot_mapping_mode), %l4
-	sub	%l0, %l4, %l4
-	stw	%l1, [%l4]
-	mov	(1b - prom_boot_mapping_phys_high), %l4
-	sub	%l0, %l4, %l4
-	ldx	[%sp + 2047 + 128 + 0x48], %l2	! physaddr high
-	stx	%l2, [%l4 + 0x0]
-	ldx	[%sp + 2047 + 128 + 0x50], %l3	! physaddr low
-	/* 4MB align */
-	srlx	%l3, ILOG2_4MB, %l3
-	sllx	%l3, ILOG2_4MB, %l3
-	stx	%l3, [%l4 + 0x8]
-
-	/* Leave service as-is, "call-method" */
-	mov	7, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 7
-	mov	1, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
-	mov	(1b - prom_map_name), %l3
-	sub	%l0, %l3, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x18]	! arg1: "map"
-	/* Leave arg2 as-is, prom_mmu_ihandle_cache */
-	mov	-1, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x28]	! arg3: mode (-1 default)
-	/* 4MB align the kernel image size. */
-	set	(_end - KERNBASE), %l3
-	set	((4 * 1024 * 1024) - 1), %l4
-	add	%l3, %l4, %l3
-	andn	%l3, %l4, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x30]	! arg4: roundup(ksize, 4MB)
-	sethi	%hi(KERNBASE), %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x38]	! arg5: vaddr (KERNBASE)
-	stx	%g0, [%sp + 2047 + 128 + 0x40]	! arg6: empty
-	mov	(1b - prom_boot_mapping_phys_low), %l3
-	sub	%l0, %l3, %l3
-	ldx	[%l3], %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x48]	! arg7: phys addr
-	call	%l7
-	 add	%sp, (2047 + 128), %o0		! argument array
-
-	add	%sp, (192 + 128), %sp
-
-	sethi	%hi(prom_root_compatible), %g1
-	or	%g1, %lo(prom_root_compatible), %g1
-	sethi	%hi(prom_sun4v_name), %g7
-	or	%g7, %lo(prom_sun4v_name), %g7
-	mov	5, %g3
-90:	ldub	[%g7], %g2
-	ldub	[%g1], %g4
-	cmp	%g2, %g4
-	bne,pn	%icc, 80f
-	 add	%g7, 1, %g7
-	subcc	%g3, 1, %g3
-	bne,pt	%xcc, 90b
-	 add	%g1, 1, %g1
-
-	sethi	%hi(is_sun4v), %g1
-	or	%g1, %lo(is_sun4v), %g1
-	mov	1, %g7
-	stw	%g7, [%g1]
-
-	/* cpu_node = prom_finddevice("/cpu") */
-	mov	(1b - prom_finddev_name), %l1
-	mov	(1b - prom_cpu_path), %l2
-	sub	%l0, %l1, %l1
-	sub	%l0, %l2, %l2
-	sub	%sp, (192 + 128), %sp
-
-	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "finddevice"
-	mov	1, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 1
-	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
-	stx	%l2, [%sp + 2047 + 128 + 0x18]	! arg1, "/cpu"
-	stx	%g0, [%sp + 2047 + 128 + 0x20]	! ret1
-	call	%l7
-	 add	%sp, (2047 + 128), %o0		! argument array
-
-	ldx	[%sp + 2047 + 128 + 0x20], %l4	! cpu device node
-
-	mov	(1b - prom_getprop_name), %l1
-	mov	(1b - prom_compatible_name), %l2
-	mov	(1b - prom_cpu_compatible), %l5
-	sub	%l0, %l1, %l1
-	sub	%l0, %l2, %l2
-	sub	%l0, %l5, %l5
-
-	/* prom_getproperty(cpu_node, "compatible",
-	 *                  &prom_cpu_compatible, 64)
-	 */
-	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "getprop"
-	mov	4, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 4
-	mov	1, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
-	stx	%l4, [%sp + 2047 + 128 + 0x18]	! arg1, cpu_node
-	stx	%l2, [%sp + 2047 + 128 + 0x20]	! arg2, "compatible"
-	stx	%l5, [%sp + 2047 + 128 + 0x28]	! arg3, &prom_cpu_compatible
-	mov	64, %l3
-	stx	%l3, [%sp + 2047 + 128 + 0x30]	! arg4, size
-	stx	%g0, [%sp + 2047 + 128 + 0x38]	! ret1
-	call	%l7
-	 add	%sp, (2047 + 128), %o0		! argument array
-
-	add	%sp, (192 + 128), %sp
-
-	sethi	%hi(prom_cpu_compatible), %g1
-	or	%g1, %lo(prom_cpu_compatible), %g1
-	sethi	%hi(prom_niagara_prefix), %g7
-	or	%g7, %lo(prom_niagara_prefix), %g7
-	mov	17, %g3
-90:	ldub	[%g7], %g2
-	ldub	[%g1], %g4
-	cmp	%g2, %g4
-	bne,pn	%icc, 89f
-	 add	%g7, 1, %g7
-	subcc	%g3, 1, %g3
-	bne,pt	%xcc, 90b
-	 add	%g1, 1, %g1
-	ba,pt	%xcc, 91f
-	 nop
-
-89:	sethi	%hi(prom_cpu_compatible), %g1
-	or	%g1, %lo(prom_cpu_compatible), %g1
-	sethi	%hi(prom_sparc_prefix), %g7
-	or	%g7, %lo(prom_sparc_prefix), %g7
-	mov	6, %g3
-90:	ldub	[%g7], %g2
-	ldub	[%g1], %g4
-	cmp	%g2, %g4
-	bne,pn	%icc, 4f
-	 add	%g7, 1, %g7
-	subcc	%g3, 1, %g3
-	bne,pt	%xcc, 90b
-	 add	%g1, 1, %g1
-
-	sethi	%hi(prom_cpu_compatible), %g1
-	or	%g1, %lo(prom_cpu_compatible), %g1
-	ldub	[%g1 + 6], %g2
-	cmp	%g2, 'T'
-	be,pt	%xcc, 70f
-	 cmp	%g2, 'M'
-	be,pt	%xcc, 70f
-	 cmp	%g2, 'S'
-	bne,pn	%xcc, 49f
-	 nop
-
-70:	ldub	[%g1 + 7], %g2
-	cmp	%g2, CPU_ID_NIAGARA3
-	be,pt	%xcc, 5f
-	 mov	SUN4V_CHIP_NIAGARA3, %g4
-	cmp	%g2, CPU_ID_NIAGARA4
-	be,pt	%xcc, 5f
-	 mov	SUN4V_CHIP_NIAGARA4, %g4
-	cmp	%g2, CPU_ID_NIAGARA5
-	be,pt	%xcc, 5f
-	 mov	SUN4V_CHIP_NIAGARA5, %g4
-	cmp	%g2, CPU_ID_M6
-	be,pt	%xcc, 5f
-	 mov	SUN4V_CHIP_SPARC_M6, %g4
-	cmp	%g2, CPU_ID_M7
-	be,pt	%xcc, 5f
-	 mov	SUN4V_CHIP_SPARC_M7, %g4
-	cmp	%g2, CPU_ID_M8
-	be,pt	%xcc, 5f
-	 mov	SUN4V_CHIP_SPARC_M8, %g4
-	cmp	%g2, CPU_ID_SONOMA1
-	be,pt	%xcc, 5f
-	 mov	SUN4V_CHIP_SPARC_SN, %g4
-	ba,pt	%xcc, 49f
-	 nop
-
-91:	sethi	%hi(prom_cpu_compatible), %g1
-	or	%g1, %lo(prom_cpu_compatible), %g1
-	ldub	[%g1 + 17], %g2
-	cmp	%g2, CPU_ID_NIAGARA1
-	be,pt	%xcc, 5f
-	 mov	SUN4V_CHIP_NIAGARA1, %g4
-	cmp	%g2, CPU_ID_NIAGARA2
-	be,pt	%xcc, 5f
-	 mov	SUN4V_CHIP_NIAGARA2, %g4
-	
-4:
-	/* Athena */
-	sethi	%hi(prom_cpu_compatible), %g1
-	or	%g1, %lo(prom_cpu_compatible), %g1
-	sethi	%hi(prom_sparc64x_prefix), %g7
-	or	%g7, %lo(prom_sparc64x_prefix), %g7
-	mov	9, %g3
-41:	ldub	[%g7], %g2
-	ldub	[%g1], %g4
-	cmp	%g2, %g4
-	bne,pn	%icc, 49f
-	add	%g7, 1, %g7
-	subcc	%g3, 1, %g3
-	bne,pt	%xcc, 41b
-	add	%g1, 1, %g1
-	ba,pt	%xcc, 5f
-	 mov	SUN4V_CHIP_SPARC64X, %g4
-
-49:
-	mov	SUN4V_CHIP_UNKNOWN, %g4
-5:	sethi	%hi(sun4v_chip_type), %g2
-	or	%g2, %lo(sun4v_chip_type), %g2
-	stw	%g4, [%g2]
-
-80:
-	BRANCH_IF_SUN4V(g1, jump_to_sun4u_init)
-	BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot)
-	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot)
-	ba,pt	%xcc, spitfire_boot
-	 nop
-
-cheetah_plus_boot:
-	/* Preserve OBP chosen DCU and DCR register settings.  */
-	ba,pt	%xcc, cheetah_generic_boot
-	 nop
-
-cheetah_boot:
-	mov	DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1
-	wr	%g1, %asr18
-
-	sethi	%uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7
-	or	%g7, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7
-	sllx	%g7, 32, %g7
-	or	%g7, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g7
-	stxa	%g7, [%g0] ASI_DCU_CONTROL_REG
-	membar	#Sync
-
-cheetah_generic_boot:
-	mov	TSB_EXTENSION_P, %g3
-	stxa	%g0, [%g3] ASI_DMMU
-	stxa	%g0, [%g3] ASI_IMMU
-	membar	#Sync
-
-	mov	TSB_EXTENSION_S, %g3
-	stxa	%g0, [%g3] ASI_DMMU
-	membar	#Sync
-
-	mov	TSB_EXTENSION_N, %g3
-	stxa	%g0, [%g3] ASI_DMMU
-	stxa	%g0, [%g3] ASI_IMMU
-	membar	#Sync
-
-	ba,a,pt	%xcc, jump_to_sun4u_init
-
-spitfire_boot:
-	/* Typically PROM has already enabled both MMU's and both on-chip
-	 * caches, but we do it here anyway just to be paranoid.
-	 */
-	mov	(LSU_CONTROL_IC|LSU_CONTROL_DC|LSU_CONTROL_IM|LSU_CONTROL_DM), %g1
-	stxa	%g1, [%g0] ASI_LSU_CONTROL
-	membar	#Sync
-
-jump_to_sun4u_init:
-	/*
-	 * Make sure we are in privileged mode, have address masking,
-         * using the ordinary globals and have enabled floating
-         * point.
-	 *
-	 * Again, typically PROM has left %pil at 13 or similar, and
-	 * (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE) in %pstate.
-         */
-	wrpr    %g0, (PSTATE_PRIV|PSTATE_PEF|PSTATE_IE), %pstate
-	wr	%g0, 0, %fprs
-
-	set	sun4u_init, %g2
-	jmpl    %g2 + %g0, %g0
-	 nop
-
-	__REF
-sun4u_init:
-	BRANCH_IF_SUN4V(g1, sun4v_init)
-
-	/* Set ctx 0 */
-	mov		PRIMARY_CONTEXT, %g7
-	stxa		%g0, [%g7] ASI_DMMU
-	membar		#Sync
-
-	mov		SECONDARY_CONTEXT, %g7
-	stxa		%g0, [%g7] ASI_DMMU
-	membar	#Sync
-
-	ba,a,pt		%xcc, sun4u_continue
-
-sun4v_init:
-	/* Set ctx 0 */
-	mov		PRIMARY_CONTEXT, %g7
-	stxa		%g0, [%g7] ASI_MMU
-	membar		#Sync
-
-	mov		SECONDARY_CONTEXT, %g7
-	stxa		%g0, [%g7] ASI_MMU
-	membar		#Sync
-	ba,a,pt		%xcc, niagara_tlb_fixup
-
-sun4u_continue:
-	BRANCH_IF_ANY_CHEETAH(g1, g7, cheetah_tlb_fixup)
-
-	ba,a,pt	%xcc, spitfire_tlb_fixup
-
-niagara_tlb_fixup:
-	mov	3, %g2		/* Set TLB type to hypervisor. */
-	sethi	%hi(tlb_type), %g1
-	stw	%g2, [%g1 + %lo(tlb_type)]
-
-	/* Patch copy/clear ops.  */
-	sethi	%hi(sun4v_chip_type), %g1
-	lduw	[%g1 + %lo(sun4v_chip_type)], %g1
-	cmp	%g1, SUN4V_CHIP_NIAGARA1
-	be,pt	%xcc, niagara_patch
-	 cmp	%g1, SUN4V_CHIP_NIAGARA2
-	be,pt	%xcc, niagara2_patch
-	 nop
-	cmp	%g1, SUN4V_CHIP_NIAGARA3
-	be,pt	%xcc, niagara2_patch
-	 nop
-	cmp	%g1, SUN4V_CHIP_NIAGARA4
-	be,pt	%xcc, niagara4_patch
-	 nop
-	cmp	%g1, SUN4V_CHIP_NIAGARA5
-	be,pt	%xcc, niagara4_patch
-	 nop
-	cmp	%g1, SUN4V_CHIP_SPARC_M6
-	be,pt	%xcc, niagara4_patch
-	 nop
-	cmp	%g1, SUN4V_CHIP_SPARC_M7
-	be,pt	%xcc, sparc_m7_patch
-	 nop
-	cmp	%g1, SUN4V_CHIP_SPARC_M8
-	be,pt	%xcc, sparc_m7_patch
-	 nop
-	cmp	%g1, SUN4V_CHIP_SPARC_SN
-	be,pt	%xcc, niagara4_patch
-	 nop
-
-	call	generic_patch_copyops
-	 nop
-	call	generic_patch_bzero
-	 nop
-	call	generic_patch_pageops
-	 nop
-
-	ba,a,pt	%xcc, 80f
-	 nop
-
-sparc_m7_patch:
-	call	m7_patch_copyops
-	 nop
-	call	m7_patch_bzero
-	 nop
-	call	m7_patch_pageops
-	 nop
-
-	ba,a,pt	%xcc, 80f
-	 nop
-
-niagara4_patch:
-	call	niagara4_patch_copyops
-	 nop
-	call	niagara4_patch_bzero
-	 nop
-	call	niagara4_patch_pageops
-	 nop
-	call	niagara4_patch_fls
-	 nop
-
-	ba,a,pt	%xcc, 80f
-	 nop
-
-niagara2_patch:
-	call	niagara2_patch_copyops
-	 nop
-	call	niagara_patch_bzero
-	 nop
-	call	niagara_patch_pageops
-	 nop
-
-	ba,a,pt	%xcc, 80f
-	 nop
-
-niagara_patch:
-	call	niagara_patch_copyops
-	 nop
-	call	niagara_patch_bzero
-	 nop
-	call	niagara_patch_pageops
-	 nop
-
-80:
-	/* Patch TLB/cache ops.  */
-	call	hypervisor_patch_cachetlbops
-	 nop
-
-	ba,a,pt	%xcc, tlb_fixup_done
-
-cheetah_tlb_fixup:
-	mov	2, %g2		/* Set TLB type to cheetah+. */
-	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f)
-
-	mov	1, %g2		/* Set TLB type to cheetah. */
-
-1:	sethi	%hi(tlb_type), %g1
-	stw	%g2, [%g1 + %lo(tlb_type)]
-
-	/* Patch copy/page operations to cheetah optimized versions. */
-	call	cheetah_patch_copyops
-	 nop
-	call	cheetah_patch_copy_page
-	 nop
-	call	cheetah_patch_cachetlbops
-	 nop
-
-	ba,a,pt	%xcc, tlb_fixup_done
-
-spitfire_tlb_fixup:
-	/* Set TLB type to spitfire. */
-	mov	0, %g2
-	sethi	%hi(tlb_type), %g1
-	stw	%g2, [%g1 + %lo(tlb_type)]
-
-tlb_fixup_done:
-	sethi	%hi(init_thread_union), %g6
-	or	%g6, %lo(init_thread_union), %g6
-	ldx	[%g6 + TI_TASK], %g4
-
-	wr	%g0, ASI_P, %asi
-	mov	1, %g1
-	sllx	%g1, THREAD_SHIFT, %g1
-	sub	%g1, (STACKFRAME_SZ + STACK_BIAS), %g1
-	add	%g6, %g1, %sp
-
-	/* Set per-cpu pointer initially to zero, this makes
-	 * the boot-cpu use the in-kernel-image per-cpu areas
-	 * before setup_per_cpu_area() is invoked.
-	 */
-	clr	%g5
-
-	wrpr	%g0, 0, %wstate
-	wrpr	%g0, 0x0, %tl
-
-	/* Clear the bss */
-	sethi	%hi(__bss_start), %o0
-	or	%o0, %lo(__bss_start), %o0
-	sethi	%hi(_end), %o1
-	or	%o1, %lo(_end), %o1
-	call	__bzero
-	 sub	%o1, %o0, %o1
-
-	call	prom_init
-	 mov	%l7, %o0			! OpenPROM cif handler
-
-	/* To create a one-register-window buffer between the kernel's
-	 * initial stack and the last stack frame we use from the firmware,
-	 * do the rest of the boot from a C helper function.
-	 */
-	call	start_early_boot
-	 nop
-	/* Not reached... */
-
-	.previous
-
-	/* This is meant to allow the sharing of this code between
-	 * boot processor invocation (via setup_tba() below) and
-	 * secondary processor startup (via trampoline.S).  The
-	 * former does use this code, the latter does not yet due
-	 * to some complexities.  That should be fixed up at some
-	 * point.
-	 *
-	 * There used to be enormous complexity wrt. transferring
-	 * over from the firmware's trap table to the Linux kernel's.
-	 * For example, there was a chicken & egg problem wrt. building
-	 * the OBP page tables, yet needing to be on the Linux kernel
-	 * trap table (to translate PAGE_OFFSET addresses) in order to
-	 * do that.
-	 *
-	 * We now handle OBP tlb misses differently, via linear lookups
-	 * into the prom_trans[] array.  So that specific problem no
-	 * longer exists.  Yet, unfortunately there are still some issues
-	 * preventing trampoline.S from using this code... ho hum.
-	 */
-	.globl	setup_trap_table
-setup_trap_table:
-	save	%sp, -192, %sp
-
-	/* Force interrupts to be disabled. */
-	rdpr	%pstate, %l0
-	andn	%l0, PSTATE_IE, %o1
-	wrpr	%o1, 0x0, %pstate
-	rdpr	%pil, %l1
-	wrpr	%g0, PIL_NORMAL_MAX, %pil
-
-	/* Make the firmware call to jump over to the Linux trap table.  */
-	sethi	%hi(is_sun4v), %o0
-	lduw	[%o0 + %lo(is_sun4v)], %o0
-	brz,pt	%o0, 1f
-	 nop
-
-	TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
-	add	%g2, TRAP_PER_CPU_FAULT_INFO, %g2
-	stxa	%g2, [%g0] ASI_SCRATCHPAD
-
-	/* Compute physical address:
-	 *
-	 * paddr = kern_base + (mmfsa_vaddr - KERNBASE)
-	 */
-	sethi	%hi(KERNBASE), %g3
-	sub	%g2, %g3, %g2
-	sethi	%hi(kern_base), %g3
-	ldx	[%g3 + %lo(kern_base)], %g3
-	add	%g2, %g3, %o1
-	sethi	%hi(sparc64_ttable_tl0), %o0
-
-	set	prom_set_trap_table_name, %g2
-	stx	%g2, [%sp + 2047 + 128 + 0x00]
-	mov	2, %g2
-	stx	%g2, [%sp + 2047 + 128 + 0x08]
-	mov	0, %g2
-	stx	%g2, [%sp + 2047 + 128 + 0x10]
-	stx	%o0, [%sp + 2047 + 128 + 0x18]
-	stx	%o1, [%sp + 2047 + 128 + 0x20]
-	sethi	%hi(p1275buf), %g2
-	or	%g2, %lo(p1275buf), %g2
-	ldx	[%g2 + 0x08], %o1
-	call	%o1
-	 add	%sp, (2047 + 128), %o0
-
-	ba,a,pt	%xcc, 2f
-
-1:	sethi	%hi(sparc64_ttable_tl0), %o0
-	set	prom_set_trap_table_name, %g2
-	stx	%g2, [%sp + 2047 + 128 + 0x00]
-	mov	1, %g2
-	stx	%g2, [%sp + 2047 + 128 + 0x08]
-	mov	0, %g2
-	stx	%g2, [%sp + 2047 + 128 + 0x10]
-	stx	%o0, [%sp + 2047 + 128 + 0x18]
-	sethi	%hi(p1275buf), %g2
-	or	%g2, %lo(p1275buf), %g2
-	ldx	[%g2 + 0x08], %o1
-	call	%o1
-	 add	%sp, (2047 + 128), %o0
-
-	/* Start using proper page size encodings in ctx register.  */
-2:	sethi	%hi(sparc64_kern_pri_context), %g3
-	ldx	[%g3 + %lo(sparc64_kern_pri_context)], %g2
-
-	mov		PRIMARY_CONTEXT, %g1
-
-661:	stxa		%g2, [%g1] ASI_DMMU
-	.section	.sun4v_1insn_patch, "ax"
-	.word		661b
-	stxa		%g2, [%g1] ASI_MMU
-	.previous
-
-	membar	#Sync
-
-	BRANCH_IF_SUN4V(o2, 1f)
-
-	/* Kill PROM timer */
-	sethi	%hi(0x80000000), %o2
-	sllx	%o2, 32, %o2
-	wr	%o2, 0, %tick_cmpr
-
-	BRANCH_IF_ANY_CHEETAH(o2, o3, 1f)
-
-	ba,a,pt	%xcc, 2f
-
-	/* Disable STICK_INT interrupts. */
-1:
-	sethi	%hi(0x80000000), %o2
-	sllx	%o2, 32, %o2
-	wr	%o2, %asr25
-
-2:
-	wrpr	%g0, %g0, %wstate
-
-	call	init_irqwork_curcpu
-	 nop
-
-	/* Now we can restore interrupt state. */
-	wrpr	%l0, 0, %pstate
-	wrpr	%l1, 0x0, %pil
-
-	ret
-	 restore
-
-	.globl	setup_tba
-setup_tba:
-	save	%sp, -192, %sp
-
-	/* The boot processor is the only cpu which invokes this
-	 * routine, the other cpus set things up via trampoline.S.
-	 * So save the OBP trap table address here.
-	 */
-	rdpr	%tba, %g7
-	sethi	%hi(prom_tba), %o1
-	or	%o1, %lo(prom_tba), %o1
-	stx	%g7, [%o1]
-
-	call	setup_trap_table
-	 nop
-
-	ret
-	 restore
-sparc64_boot_end:
-
-#include "etrap_64.S"
-#include "rtrap_64.S"
-#include "winfixup.S"
-#include "fpu_traps.S"
-#include "ivec.S"
-#include "getsetcc.S"
-#include "utrap.S"
-#include "spiterrs.S"
-#include "cherrs.S"
-#include "misctrap.S"
-#include "syscalls.S"
-#include "helpers.S"
-#include "sun4v_tlb_miss.S"
-#include "sun4v_mcd.S"
-#include "sun4v_ivec.S"
-#include "ktlb.S"
-#include "tsb.S"
-
-/*
- * The following skip makes sure the trap table in ttable.S is aligned
- * on a 32K boundary as required by the v9 specs for TBA register.
- *
- * We align to a 32K boundary, then we have the 32K kernel TSB,
- * the 64K kernel 4MB TSB, and then the 32K aligned trap table.
- */
-1:
-	.skip	0x4000 + _start - 1b
-
-! 0x0000000000408000
-
-	.globl	swapper_tsb
-swapper_tsb:
-	.skip	(32 * 1024)
-
-	.globl	swapper_4m_tsb
-swapper_4m_tsb:
-	.skip	(64 * 1024)
-
-! 0x0000000000420000
-
-	/* Some care needs to be exercised if you try to move the
-	 * location of the trap table relative to other things.  For
-	 * one thing there are br* instructions in some of the
-	 * trap table entires which branch back to code in ktlb.S
-	 * Those instructions can only handle a signed 16-bit
-	 * displacement.
-	 *
-	 * There is a binutils bug (bugzilla #4558) which causes
-	 * the relocation overflow checks for such instructions to
-	 * not be done correctly.  So bintuils will not notice the
-	 * error and will instead write junk into the relocation and
-	 * you'll have an unbootable kernel.
-	 */
-#include "ttable_64.S"
-
-! 0x0000000000428000
-
-#include "hvcalls.S"
-#include "systbls_64.S"
-
-	.data
-	.align	8
-	.globl	prom_tba, tlb_type
-prom_tba:	.xword	0
-tlb_type:	.word	0	/* Must NOT end up in BSS */
-EXPORT_SYMBOL(tlb_type)
-	.section	".fixup",#alloc,#execinstr
-
-ENTRY(__retl_efault)
-	retl
-	 mov	-EFAULT, %o0
-ENDPROC(__retl_efault)
-
-ENTRY(__retl_o1)
-	retl
-	 mov	%o1, %o0
-ENDPROC(__retl_o1)
-
-ENTRY(__retl_o1_asi)
-	wr      %o5, 0x0, %asi
-	retl
-	 mov    %o1, %o0
-ENDPROC(__retl_o1_asi)
diff --git a/arch/sparc/kernel/helpers.S b/arch/sparc/kernel/helpers.S
deleted file mode 100644
index 9b3f74706cfbccf215826eeccc3983b480b69df1..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/helpers.S
+++ /dev/null
@@ -1,66 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-	.align	32
-	.globl	__flushw_user
-	.type	__flushw_user,#function
-__flushw_user:
-	rdpr	%otherwin, %g1
-	brz,pn	%g1, 2f
-	 clr	%g2
-1:	save	%sp, -128, %sp
-	rdpr	%otherwin, %g1
-	brnz,pt	%g1, 1b
-	 add	%g2, 1, %g2
-1:	sub	%g2, 1, %g2
-	brnz,pt	%g2, 1b
-	 restore %g0, %g0, %g0
-2:	retl
-	 nop
-	.size	__flushw_user,.-__flushw_user
-EXPORT_SYMBOL(__flushw_user)
-
-	/* Flush %fp and %i7 to the stack for all register
-	 * windows active inside of the cpu.  This allows
-	 * show_stack_trace() to avoid using an expensive
-	 * 'flushw'.
-	 */
-	.globl		stack_trace_flush
-	.type		stack_trace_flush,#function
-stack_trace_flush:
-	rdpr		%pstate, %o0
-	wrpr		%o0, PSTATE_IE, %pstate
-
-	rdpr		%cwp, %g1
-	rdpr		%canrestore, %g2
-	sub		%g1, 1, %g3
-
-1:	brz,pn		%g2, 2f
-	 sub		%g2, 1, %g2
-	wrpr		%g3, %cwp
-	stx		%fp, [%sp + STACK_BIAS + RW_V9_I6]
-	stx		%i7, [%sp + STACK_BIAS + RW_V9_I7]
-	ba,pt		%xcc, 1b
-	 sub		%g3, 1, %g3
-
-2:	wrpr		%g1, %cwp
-	wrpr		%o0, %pstate
-
-	retl
-	 nop
-	.size		stack_trace_flush,.-stack_trace_flush
-
-#ifdef CONFIG_SMP
-	.globl		hard_smp_processor_id
-	.type		hard_smp_processor_id,#function
-hard_smp_processor_id:
-#endif
-	.globl		real_hard_smp_processor_id
-	.type		real_hard_smp_processor_id,#function
-real_hard_smp_processor_id:
-	__GET_CPUID(%o0)
-	retl
-	 nop
-#ifdef CONFIG_SMP
-	.size		hard_smp_processor_id,.-hard_smp_processor_id
-#endif
-	.size		real_hard_smp_processor_id,.-real_hard_smp_processor_id
-EXPORT_SYMBOL_GPL(real_hard_smp_processor_id)
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
deleted file mode 100644
index 2f865a464576347a7a7e1939aa6c938d7ba0f0a6..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/hvcalls.S
+++ /dev/null
@@ -1,930 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	/* %o0: devhandle
-	 * %o1:	devino
-	 *
-	 * returns %o0: sysino
-	 */
-ENTRY(sun4v_devino_to_sysino)
-	mov	HV_FAST_INTR_DEVINO2SYSINO, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 mov	%o1, %o0
-ENDPROC(sun4v_devino_to_sysino)
-
-	/* %o0: sysino
-	 *
-	 * returns %o0: intr_enabled (HV_INTR_{DISABLED,ENABLED})
-	 */
-ENTRY(sun4v_intr_getenabled)
-	mov	HV_FAST_INTR_GETENABLED, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 mov	%o1, %o0
-ENDPROC(sun4v_intr_getenabled)
-
-	/* %o0: sysino
-	 * %o1: intr_enabled (HV_INTR_{DISABLED,ENABLED})
-	 */
-ENTRY(sun4v_intr_setenabled)
-	mov	HV_FAST_INTR_SETENABLED, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_intr_setenabled)
-
-	/* %o0: sysino
-	 *
-	 * returns %o0: intr_state (HV_INTR_STATE_*)
-	 */
-ENTRY(sun4v_intr_getstate)
-	mov	HV_FAST_INTR_GETSTATE, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 mov	%o1, %o0
-ENDPROC(sun4v_intr_getstate)
-
-	/* %o0: sysino
-	 * %o1: intr_state (HV_INTR_STATE_*)
-	 */
-ENTRY(sun4v_intr_setstate)
-	mov	HV_FAST_INTR_SETSTATE, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_intr_setstate)
-
-	/* %o0: sysino
-	 *
-	 * returns %o0: cpuid
-	 */
-ENTRY(sun4v_intr_gettarget)
-	mov	HV_FAST_INTR_GETTARGET, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 mov	%o1, %o0
-ENDPROC(sun4v_intr_gettarget)
-
-	/* %o0: sysino
-	 * %o1: cpuid
-	 */
-ENTRY(sun4v_intr_settarget)
-	mov	HV_FAST_INTR_SETTARGET, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_intr_settarget)
-
-	/* %o0:	cpuid
-	 * %o1: pc
-	 * %o2:	rtba
-	 * %o3:	arg0
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(sun4v_cpu_start)
-	mov	HV_FAST_CPU_START, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_cpu_start)
-
-	/* %o0:	cpuid
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_cpu_stop)
-	mov	HV_FAST_CPU_STOP, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_cpu_stop)
-
-	/* returns %o0:	status  */
-ENTRY(sun4v_cpu_yield)
-	mov	HV_FAST_CPU_YIELD, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_cpu_yield)
-
-	/* %o0: cpuid
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(sun4v_cpu_poke)
-	mov     HV_FAST_CPU_POKE, %o5
-	ta      HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_cpu_poke)
-
-	/* %o0:	type
-	 * %o1:	queue paddr
-	 * %o2:	num queue entries
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(sun4v_cpu_qconf)
-	mov	HV_FAST_CPU_QCONF, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_cpu_qconf)
-
-	/* %o0:	num cpus in cpu list
-	 * %o1:	cpu list paddr
-	 * %o2:	mondo block paddr
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_cpu_mondo_send)
-	mov	HV_FAST_CPU_MONDO_SEND, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_cpu_mondo_send)
-
-	/* %o0:	CPU ID
-	 *
-	 * returns %o0:	-status if status non-zero, else
-	 *         %o0:	cpu state as HV_CPU_STATE_*
-	 */
-ENTRY(sun4v_cpu_state)
-	mov	HV_FAST_CPU_STATE, %o5
-	ta	HV_FAST_TRAP
-	brnz,pn	%o0, 1f
-	 sub	%g0, %o0, %o0
-	mov	%o1, %o0
-1:	retl
-	 nop
-ENDPROC(sun4v_cpu_state)
-
-	/* %o0: virtual address
-	 * %o1: must be zero
-	 * %o2: TTE
-	 * %o3: HV_MMU_* flags
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_mmu_map_perm_addr)
-	mov	HV_FAST_MMU_MAP_PERM_ADDR, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_mmu_map_perm_addr)
-
-	/* %o0: number of TSB descriptions
-	 * %o1: TSB descriptions real address
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_mmu_tsb_ctx0)
-	mov	HV_FAST_MMU_TSB_CTX0, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_mmu_tsb_ctx0)
-
-	/* %o0:	API group number
-	 * %o1: pointer to unsigned long major number storage
-	 * %o2: pointer to unsigned long minor number storage
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_get_version)
-	mov	HV_CORE_GET_VER, %o5
-	mov	%o1, %o3
-	mov	%o2, %o4
-	ta	HV_CORE_TRAP
-	stx	%o1, [%o3]
-	retl
-	 stx	%o2, [%o4]
-ENDPROC(sun4v_get_version)
-
-	/* %o0: API group number
-	 * %o1: desired major number
-	 * %o2: desired minor number
-	 * %o3: pointer to unsigned long actual minor number storage
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_set_version)
-	mov	HV_CORE_SET_VER, %o5
-	mov	%o3, %o4
-	ta	HV_CORE_TRAP
-	retl
-	 stx	%o1, [%o4]
-ENDPROC(sun4v_set_version)
-
-	/* %o0: pointer to unsigned long time
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_tod_get)
-	mov	%o0, %o4
-	mov	HV_FAST_TOD_GET, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o4]
-	retl
-	 nop
-ENDPROC(sun4v_tod_get)
-
-	/* %o0: time
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_tod_set)
-	mov	HV_FAST_TOD_SET, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_tod_set)
-
-	/* %o0: pointer to unsigned long status
-	 *
-	 * returns %o0: signed character
-	 */
-ENTRY(sun4v_con_getchar)
-	mov	%o0, %o4
-	mov	HV_FAST_CONS_GETCHAR, %o5
-	clr	%o0
-	clr	%o1
-	ta	HV_FAST_TRAP
-	stx	%o0, [%o4]
-	retl
-	 sra	%o1, 0, %o0
-ENDPROC(sun4v_con_getchar)
-
-	/* %o0: signed long character
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_con_putchar)
-	mov	HV_FAST_CONS_PUTCHAR, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 sra	%o0, 0, %o0
-ENDPROC(sun4v_con_putchar)
-
-	/* %o0: buffer real address
-	 * %o1: buffer size
-	 * %o2: pointer to unsigned long bytes_read
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_con_read)
-	mov	%o2, %o4
-	mov	HV_FAST_CONS_READ, %o5
-	ta	HV_FAST_TRAP
-	brnz	%o0, 1f
-	 cmp	%o1, -1		/* break */
-	be,a,pn	%icc, 1f
-	 mov	%o1, %o0
-	cmp	%o1, -2		/* hup */
-	be,a,pn	%icc, 1f
-	 mov	%o1, %o0
-	stx	%o1, [%o4]
-1:	retl
-	 nop
-ENDPROC(sun4v_con_read)
-
-	/* %o0: buffer real address
-	 * %o1: buffer size
-	 * %o2: pointer to unsigned long bytes_written
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_con_write)
-	mov	%o2, %o4
-	mov	HV_FAST_CONS_WRITE, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o4]
-	retl
-	 nop
-ENDPROC(sun4v_con_write)
-
-	/* %o0:	soft state
-	 * %o1:	address of description string
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_mach_set_soft_state)
-	mov	HV_FAST_MACH_SET_SOFT_STATE, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_mach_set_soft_state)
-
-	/* %o0: exit code
-	 *
-	 * Does not return.
-	 */
-ENTRY(sun4v_mach_exit)
-	mov	HV_FAST_MACH_EXIT, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_mach_exit)
-
-	/* %o0: buffer real address
-	 * %o1: buffer length
-	 * %o2: pointer to unsigned long real_buf_len
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_mach_desc)
-	mov	%o2, %o4
-	mov	HV_FAST_MACH_DESC, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o4]
-	retl
-	 nop
-ENDPROC(sun4v_mach_desc)
-
-	/* %o0: new timeout in milliseconds
-	 * %o1: pointer to unsigned long orig_timeout
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_mach_set_watchdog)
-	mov	%o1, %o4
-	mov	HV_FAST_MACH_SET_WATCHDOG, %o5
-	ta	HV_FAST_TRAP
-	brnz,a,pn %o4, 0f
-	stx	%o1, [%o4]
-0:	retl
-	 nop
-ENDPROC(sun4v_mach_set_watchdog)
-EXPORT_SYMBOL(sun4v_mach_set_watchdog)
-
-	/* No inputs and does not return.  */
-ENTRY(sun4v_mach_sir)
-	mov	%o1, %o4
-	mov	HV_FAST_MACH_SIR, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o4]
-	retl
-	 nop
-ENDPROC(sun4v_mach_sir)
-
-	/* %o0: channel
-	 * %o1:	ra
-	 * %o2:	num_entries
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(sun4v_ldc_tx_qconf)
-	mov	HV_FAST_LDC_TX_QCONF, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_ldc_tx_qconf)
-
-	/* %o0: channel
-	 * %o1:	pointer to unsigned long ra
-	 * %o2:	pointer to unsigned long num_entries
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(sun4v_ldc_tx_qinfo)
-	mov	%o1, %g1
-	mov	%o2, %g2
-	mov	HV_FAST_LDC_TX_QINFO, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%g1]
-	stx	%o2, [%g2]
-	retl
-	 nop
-ENDPROC(sun4v_ldc_tx_qinfo)
-
-	/* %o0: channel
-	 * %o1:	pointer to unsigned long head_off
-	 * %o2:	pointer to unsigned long tail_off
-	 * %o2:	pointer to unsigned long chan_state
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(sun4v_ldc_tx_get_state)
-	mov	%o1, %g1
-	mov	%o2, %g2
-	mov	%o3, %g3
-	mov	HV_FAST_LDC_TX_GET_STATE, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%g1]
-	stx	%o2, [%g2]
-	stx	%o3, [%g3]
-	retl
-	 nop
-ENDPROC(sun4v_ldc_tx_get_state)
-
-	/* %o0: channel
-	 * %o1:	tail_off
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(sun4v_ldc_tx_set_qtail)
-	mov	HV_FAST_LDC_TX_SET_QTAIL, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_ldc_tx_set_qtail)
-
-	/* %o0: channel
-	 * %o1:	ra
-	 * %o2:	num_entries
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(sun4v_ldc_rx_qconf)
-	mov	HV_FAST_LDC_RX_QCONF, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_ldc_rx_qconf)
-
-	/* %o0: channel
-	 * %o1:	pointer to unsigned long ra
-	 * %o2:	pointer to unsigned long num_entries
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(sun4v_ldc_rx_qinfo)
-	mov	%o1, %g1
-	mov	%o2, %g2
-	mov	HV_FAST_LDC_RX_QINFO, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%g1]
-	stx	%o2, [%g2]
-	retl
-	 nop
-ENDPROC(sun4v_ldc_rx_qinfo)
-
-	/* %o0: channel
-	 * %o1:	pointer to unsigned long head_off
-	 * %o2:	pointer to unsigned long tail_off
-	 * %o2:	pointer to unsigned long chan_state
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(sun4v_ldc_rx_get_state)
-	mov	%o1, %g1
-	mov	%o2, %g2
-	mov	%o3, %g3
-	mov	HV_FAST_LDC_RX_GET_STATE, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%g1]
-	stx	%o2, [%g2]
-	stx	%o3, [%g3]
-	retl
-	 nop
-ENDPROC(sun4v_ldc_rx_get_state)
-
-	/* %o0: channel
-	 * %o1:	head_off
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(sun4v_ldc_rx_set_qhead)
-	mov	HV_FAST_LDC_RX_SET_QHEAD, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_ldc_rx_set_qhead)
-
-	/* %o0: channel
-	 * %o1:	ra
-	 * %o2:	num_entries
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_ldc_set_map_table)
-	mov	HV_FAST_LDC_SET_MAP_TABLE, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_ldc_set_map_table)
-
-	/* %o0: channel
-	 * %o1:	pointer to unsigned long ra
-	 * %o2:	pointer to unsigned long num_entries
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_ldc_get_map_table)
-	mov	%o1, %g1
-	mov	%o2, %g2
-	mov	HV_FAST_LDC_GET_MAP_TABLE, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%g1]
-	stx	%o2, [%g2]
-	retl
-	 nop
-ENDPROC(sun4v_ldc_get_map_table)
-
-	/* %o0:	channel
-	 * %o1:	dir_code
-	 * %o2:	tgt_raddr
-	 * %o3:	lcl_raddr
-	 * %o4:	len
-	 * %o5:	pointer to unsigned long actual_len
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(sun4v_ldc_copy)
-	mov	%o5, %g1
-	mov	HV_FAST_LDC_COPY, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%g1]
-	retl
-	 nop
-ENDPROC(sun4v_ldc_copy)
-
-	/* %o0:	channel
-	 * %o1:	cookie
-	 * %o2:	pointer to unsigned long ra
-	 * %o3:	pointer to unsigned long perm
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(sun4v_ldc_mapin)
-	mov	%o2, %g1
-	mov	%o3, %g2
-	mov	HV_FAST_LDC_MAPIN, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%g1]
-	stx	%o2, [%g2]
-	retl
-	 nop
-ENDPROC(sun4v_ldc_mapin)
-
-	/* %o0:	ra
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(sun4v_ldc_unmap)
-	mov	HV_FAST_LDC_UNMAP, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_ldc_unmap)
-
-	/* %o0: channel
-	 * %o1:	cookie
-	 * %o2:	mte_cookie
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(sun4v_ldc_revoke)
-	mov	HV_FAST_LDC_REVOKE, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_ldc_revoke)
-
-	/* %o0: device handle
-	 * %o1:	device INO
-	 * %o2:	pointer to unsigned long cookie
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_vintr_get_cookie)
-	mov	%o2, %g1
-	mov	HV_FAST_VINTR_GET_COOKIE, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%g1]
-	retl
-	 nop
-ENDPROC(sun4v_vintr_get_cookie)
-
-	/* %o0: device handle
-	 * %o1:	device INO
-	 * %o2:	cookie
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_vintr_set_cookie)
-	mov	HV_FAST_VINTR_SET_COOKIE, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_vintr_set_cookie)
-
-	/* %o0: device handle
-	 * %o1:	device INO
-	 * %o2:	pointer to unsigned long valid_state
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_vintr_get_valid)
-	mov	%o2, %g1
-	mov	HV_FAST_VINTR_GET_VALID, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%g1]
-	retl
-	 nop
-ENDPROC(sun4v_vintr_get_valid)
-
-	/* %o0: device handle
-	 * %o1:	device INO
-	 * %o2:	valid_state
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_vintr_set_valid)
-	mov	HV_FAST_VINTR_SET_VALID, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_vintr_set_valid)
-
-	/* %o0: device handle
-	 * %o1:	device INO
-	 * %o2:	pointer to unsigned long state
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_vintr_get_state)
-	mov	%o2, %g1
-	mov	HV_FAST_VINTR_GET_STATE, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%g1]
-	retl
-	 nop
-ENDPROC(sun4v_vintr_get_state)
-
-	/* %o0: device handle
-	 * %o1:	device INO
-	 * %o2:	state
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_vintr_set_state)
-	mov	HV_FAST_VINTR_SET_STATE, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_vintr_set_state)
-
-	/* %o0: device handle
-	 * %o1:	device INO
-	 * %o2:	pointer to unsigned long cpuid
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_vintr_get_target)
-	mov	%o2, %g1
-	mov	HV_FAST_VINTR_GET_TARGET, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%g1]
-	retl
-	 nop
-ENDPROC(sun4v_vintr_get_target)
-
-	/* %o0: device handle
-	 * %o1:	device INO
-	 * %o2:	cpuid
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(sun4v_vintr_set_target)
-	mov	HV_FAST_VINTR_SET_TARGET, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_vintr_set_target)
-
-	/* %o0: NCS sub-function
-	 * %o1:	sub-function arg real-address
-	 * %o2:	sub-function arg size
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(sun4v_ncs_request)
-	mov	HV_FAST_NCS_REQUEST, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_ncs_request)
-
-ENTRY(sun4v_svc_send)
-	save	%sp, -192, %sp
-	mov	%i0, %o0
-	mov	%i1, %o1
-	mov	%i2, %o2
-	mov	HV_FAST_SVC_SEND, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%i3]
-	ret
-	restore
-ENDPROC(sun4v_svc_send)
-
-ENTRY(sun4v_svc_recv)
-	save	%sp, -192, %sp
-	mov	%i0, %o0
-	mov	%i1, %o1
-	mov	%i2, %o2
-	mov	HV_FAST_SVC_RECV, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%i3]
-	ret
-	restore
-ENDPROC(sun4v_svc_recv)
-
-ENTRY(sun4v_svc_getstatus)
-	mov	HV_FAST_SVC_GETSTATUS, %o5
-	mov	%o1, %o4
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o4]
-	retl
-	 nop
-ENDPROC(sun4v_svc_getstatus)
-
-ENTRY(sun4v_svc_setstatus)
-	mov	HV_FAST_SVC_SETSTATUS, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_svc_setstatus)
-
-ENTRY(sun4v_svc_clrstatus)
-	mov	HV_FAST_SVC_CLRSTATUS, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_svc_clrstatus)
-
-ENTRY(sun4v_mmustat_conf)
-	mov	%o1, %o4
-	mov	HV_FAST_MMUSTAT_CONF, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o4]
-	retl
-	 nop
-ENDPROC(sun4v_mmustat_conf)
-
-ENTRY(sun4v_mmustat_info)
-	mov	%o0, %o4
-	mov	HV_FAST_MMUSTAT_INFO, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o4]
-	retl
-	 nop
-ENDPROC(sun4v_mmustat_info)
-
-ENTRY(sun4v_mmu_demap_all)
-	clr	%o0
-	clr	%o1
-	mov	HV_MMU_ALL, %o2
-	mov	HV_FAST_MMU_DEMAP_ALL, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_mmu_demap_all)
-
-ENTRY(sun4v_niagara_getperf)
-	mov	%o0, %o4
-	mov	HV_FAST_GET_PERFREG, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o4]
-	retl
-	 nop
-ENDPROC(sun4v_niagara_getperf)
-EXPORT_SYMBOL(sun4v_niagara_getperf)
-
-ENTRY(sun4v_niagara_setperf)
-	mov	HV_FAST_SET_PERFREG, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_niagara_setperf)
-EXPORT_SYMBOL(sun4v_niagara_setperf)
-
-ENTRY(sun4v_niagara2_getperf)
-	mov	%o0, %o4
-	mov	HV_FAST_N2_GET_PERFREG, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o4]
-	retl
-	 nop
-ENDPROC(sun4v_niagara2_getperf)
-EXPORT_SYMBOL(sun4v_niagara2_getperf)
-
-ENTRY(sun4v_niagara2_setperf)
-	mov	HV_FAST_N2_SET_PERFREG, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_niagara2_setperf)
-EXPORT_SYMBOL(sun4v_niagara2_setperf)
-
-ENTRY(sun4v_reboot_data_set)
-	mov	HV_FAST_REBOOT_DATA_SET, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_reboot_data_set)
-
-ENTRY(sun4v_vt_get_perfreg)
-	mov	%o1, %o4
-	mov	HV_FAST_VT_GET_PERFREG, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o4]
-	retl
-	 nop
-ENDPROC(sun4v_vt_get_perfreg)
-
-ENTRY(sun4v_vt_set_perfreg)
-	mov	HV_FAST_VT_SET_PERFREG, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_vt_set_perfreg)
-
-ENTRY(sun4v_t5_get_perfreg)
-	mov	%o1, %o4
-	mov	HV_FAST_T5_GET_PERFREG, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o4]
-	retl
-	 nop
-ENDPROC(sun4v_t5_get_perfreg)
-
-ENTRY(sun4v_t5_set_perfreg)
-	mov	HV_FAST_T5_SET_PERFREG, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_t5_set_perfreg)
-
-ENTRY(sun4v_m7_get_perfreg)
-	mov	%o1, %o4
-	mov	HV_FAST_M7_GET_PERFREG, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o4]
-	retl
-	nop
-ENDPROC(sun4v_m7_get_perfreg)
-
-ENTRY(sun4v_m7_set_perfreg)
-	mov	HV_FAST_M7_SET_PERFREG, %o5
-	ta	HV_FAST_TRAP
-	retl
-	nop
-ENDPROC(sun4v_m7_set_perfreg)
-
-	/* %o0: address of CCB array
-	 * %o1: size (in bytes) of CCB array
-	 * %o2: flags
-	 * %o3: reserved
-	 *
-	 * returns:
-	 * %o0: status
-	 * %o1: size (in bytes) of the CCB array that was accepted
-	 * %o2: status data
-	 * %o3: reserved
-	 */
-ENTRY(sun4v_ccb_submit)
-	mov	%o5, %g1
-	mov	HV_CCB_SUBMIT, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o4]
-	retl
-	 stx	%o2, [%g1]
-ENDPROC(sun4v_ccb_submit)
-EXPORT_SYMBOL(sun4v_ccb_submit)
-
-	/* %o0: completion area ra for the ccb to get info
-	 *
-	 * returns:
-	 * %o0: status
-	 * %o1: CCB state
-	 * %o2: position
-	 * %o3: dax unit
-	 * %o4: queue
-	 */
-ENTRY(sun4v_ccb_info)
-	mov	%o1, %g1
-	mov	HV_CCB_INFO, %o5
-	ta	HV_FAST_TRAP
-	sth	%o1, [%g1 + CCB_INFO_OFFSET_CCB_STATE]
-	sth	%o2, [%g1 + CCB_INFO_OFFSET_QUEUE_POS]
-	sth	%o3, [%g1 + CCB_INFO_OFFSET_DAX_UNIT]
-	retl
-	 sth	%o4, [%g1 + CCB_INFO_OFFSET_QUEUE_NUM]
-ENDPROC(sun4v_ccb_info)
-EXPORT_SYMBOL(sun4v_ccb_info)
-
-	/* %o0: completion area ra for the ccb to kill
-	 *
-	 * returns:
-	 * %o0: status
-	 * %o1: result of the kill
-	 */
-ENTRY(sun4v_ccb_kill)
-	mov	%o1, %g1
-	mov	HV_CCB_KILL, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 sth	%o1, [%g1]
-ENDPROC(sun4v_ccb_kill)
-EXPORT_SYMBOL(sun4v_ccb_kill)
diff --git a/arch/sparc/kernel/hvtramp.S b/arch/sparc/kernel/hvtramp.S
deleted file mode 100644
index f39220471b6516b27c2bab8a6b5c39900678a9bd..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/hvtramp.S
+++ /dev/null
@@ -1,137 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* hvtramp.S: Hypervisor start-cpu trampoline code.
- *
- * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
- */
-
-
-#include <asm/thread_info.h>
-#include <asm/hypervisor.h>
-#include <asm/scratchpad.h>
-#include <asm/spitfire.h>
-#include <asm/hvtramp.h>
-#include <asm/pstate.h>
-#include <asm/ptrace.h>
-#include <asm/head.h>
-#include <asm/asi.h>
-#include <asm/pil.h>
-
-	.align		8
-	.globl		hv_cpu_startup, hv_cpu_startup_end
-
-	/* This code executes directly out of the hypervisor
-	 * with physical addressing (va==pa).  %o0 contains
-	 * our client argument which for Linux points to
-	 * a descriptor data structure which defines the
-	 * MMU entries we need to load up.
-	 *
-	 * After we set things up we enable the MMU and call
-	 * into the kernel.
-	 *
-	 * First setup basic privileged cpu state.
-	 */
-hv_cpu_startup:
-	SET_GL(0)
-	wrpr		%g0, PIL_NORMAL_MAX, %pil
-	wrpr		%g0, 0, %canrestore
-	wrpr		%g0, 0, %otherwin
-	wrpr		%g0, 6, %cansave
-	wrpr		%g0, 6, %cleanwin
-	wrpr		%g0, 0, %cwp
-	wrpr		%g0, 0, %wstate
-	wrpr		%g0, 0, %tl
-
-	sethi		%hi(sparc64_ttable_tl0), %g1
-	wrpr		%g1, %tba
-
-	mov		%o0, %l0
-
-	lduw		[%l0 + HVTRAMP_DESCR_CPU], %g1
-	mov		SCRATCHPAD_CPUID, %g2
-	stxa		%g1, [%g2] ASI_SCRATCHPAD
-
-	ldx		[%l0 + HVTRAMP_DESCR_FAULT_INFO_VA], %g2
-	stxa		%g2, [%g0] ASI_SCRATCHPAD
-
-	mov		0, %l1
-	lduw		[%l0 + HVTRAMP_DESCR_NUM_MAPPINGS], %l2
-	add		%l0, HVTRAMP_DESCR_MAPS, %l3
-
-1:	ldx		[%l3 + HVTRAMP_MAPPING_VADDR], %o0
-	clr		%o1
-	ldx		[%l3 + HVTRAMP_MAPPING_TTE], %o2
-	mov		HV_MMU_IMMU | HV_MMU_DMMU, %o3
-	mov		HV_FAST_MMU_MAP_PERM_ADDR, %o5
-	ta		HV_FAST_TRAP
-
-	brnz,pn		%o0, 80f
-	 nop
-
-	add		%l1, 1, %l1
-	cmp		%l1, %l2
-	blt,a,pt	%xcc, 1b
-	 add		%l3, HVTRAMP_MAPPING_SIZE, %l3
-
-	ldx		[%l0 + HVTRAMP_DESCR_FAULT_INFO_PA], %o0
-	mov		HV_FAST_MMU_FAULT_AREA_CONF, %o5
-	ta		HV_FAST_TRAP
-
-	brnz,pn		%o0, 80f
-	 nop
-
-	wrpr		%g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
-
-	ldx		[%l0 + HVTRAMP_DESCR_THREAD_REG], %l6
-
-	mov		1, %o0
-	set		1f, %o1
-	mov		HV_FAST_MMU_ENABLE, %o5
-	ta		HV_FAST_TRAP
-
-	ba,pt		%xcc, 80f
-	 nop
-
-1:
-	wr		%g0, 0, %fprs
-	wr		%g0, ASI_P, %asi
-
-	mov		PRIMARY_CONTEXT, %g7
-	stxa		%g0, [%g7] ASI_MMU
-	membar		#Sync
-
-	mov		SECONDARY_CONTEXT, %g7
-	stxa		%g0, [%g7] ASI_MMU
-	membar		#Sync
-
-	mov		%l6, %g6
-	ldx		[%g6 + TI_TASK], %g4
-
-	mov		1, %g5
-	sllx		%g5, THREAD_SHIFT, %g5
-	sub		%g5, (STACKFRAME_SZ + STACK_BIAS), %g5
-	add		%g6, %g5, %sp
-
-	call		init_irqwork_curcpu
-	 nop
-	call		hard_smp_processor_id
-	 nop
-
-	call		sun4v_register_mondo_queues
-	 nop
-
-	call		init_cur_cpu_trap
-	 mov		%g6, %o0
-
-	wrpr		%g0, (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE), %pstate
-
-	call		smp_callin
-	 nop
-
-	call		cpu_panic
-	 nop
-
-80:	ba,pt		%xcc, 80b
-	 nop
-
-	.align		8
-hv_cpu_startup_end:
diff --git a/arch/sparc/kernel/itlb_miss.S b/arch/sparc/kernel/itlb_miss.S
deleted file mode 100644
index 5a5d92482e8d014c38d0be6c49880347bef225ad..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/itlb_miss.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* ITLB ** ICACHE line 1: Context 0 check and TSB load	*/
-	ldxa	[%g0] ASI_IMMU_TSB_8KB_PTR, %g1	! Get TSB 8K pointer
-	ldxa	[%g0] ASI_IMMU, %g6		! Get TAG TARGET
-	srlx	%g6, 48, %g5			! Get context
-	sllx	%g6, 22, %g6			! Zero out context
-	brz,pn	%g5, kvmap_itlb			! Context 0 processing
-	 srlx	%g6, 22, %g6			! Delay slot
-	TSB_LOAD_QUAD(%g1, %g4)			! Load TSB entry
-	cmp	%g4, %g6			! Compare TAG
-
-/* ITLB ** ICACHE line 2: TSB compare and TLB load	*/
-	bne,pn	%xcc, tsb_miss_itlb		! Miss
-	 mov	FAULT_CODE_ITLB, %g3
-	sethi	%hi(_PAGE_EXEC_4U), %g4
-	andcc	%g5, %g4, %g0			! Executable?
-	be,pn	%xcc, tsb_do_fault
-	 nop					! Delay slot, fill me
-	stxa	%g5, [%g0] ASI_ITLB_DATA_IN	! Load TLB
-	retry					! Trap done
-
-/* ITLB ** ICACHE line 3: 				*/
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-/* ITLB ** ICACHE line 4: 				*/
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
diff --git a/arch/sparc/kernel/ivec.S b/arch/sparc/kernel/ivec.S
deleted file mode 100644
index 94ba2c3a29c18fa0817c256722b3d5842c5d3a5a..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/ivec.S
+++ /dev/null
@@ -1,52 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	/* The registers for cross calls will be:
-	 *
-	 * DATA 0: [low 32-bits]  Address of function to call, jmp to this
-	 *         [high 32-bits] MMU Context Argument 0, place in %g5
-	 * DATA 1: Address Argument 1, place in %g1
-	 * DATA 2: Address Argument 2, place in %g7
-	 *
-	 * With this method we can do most of the cross-call tlb/cache
-	 * flushing very quickly.
-	 */
-	.align		32
-	.globl		do_ivec
-	.type		do_ivec,#function
-do_ivec:
-	mov		0x40, %g3
-	ldxa		[%g3 + %g0] ASI_INTR_R, %g3
-	sethi		%hi(KERNBASE), %g4
-	cmp		%g3, %g4
-	bgeu,pn		%xcc, do_ivec_xcall
-	 srlx		%g3, 32, %g5
-	stxa		%g0, [%g0] ASI_INTR_RECEIVE
-	membar		#Sync
-
-	sethi		%hi(ivector_table_pa), %g2
-	ldx		[%g2 + %lo(ivector_table_pa)], %g2
-	sllx		%g3, 4, %g3
-	add		%g2, %g3, %g3
-
-	TRAP_LOAD_IRQ_WORK_PA(%g6, %g1)
-
-	ldx		[%g6], %g5
-	stxa		%g5, [%g3] ASI_PHYS_USE_EC
-	stx		%g3, [%g6]
-	wr		%g0, 1 << PIL_DEVICE_IRQ, %set_softint
-	retry
-do_ivec_xcall:
-	mov		0x50, %g1
-	ldxa		[%g1 + %g0] ASI_INTR_R, %g1
-	srl		%g3, 0, %g3
-
-	mov		0x60, %g7
-	ldxa		[%g7 + %g0] ASI_INTR_R, %g7
-	stxa		%g0, [%g0] ASI_INTR_RECEIVE
-	membar		#Sync
-	ba,pt		%xcc, 1f
-	 nop
-
-	.align		32
-1:	jmpl		%g3, %g0
-	 nop
-	.size		do_ivec,.-do_ivec
diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S
deleted file mode 100644
index 1cf91c05e275af918f559756c58dc03b5a59fbde..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/ktlb.S
+++ /dev/null
@@ -1,270 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* arch/sparc64/kernel/ktlb.S: Kernel mapping TLB miss handling.
- *
- * Copyright (C) 1995, 1997, 2005, 2008 David S. Miller <davem@davemloft.net>
- * Copyright (C) 1996 Eddie C. Dost        (ecd@brainaid.de)
- * Copyright (C) 1996 Miguel de Icaza      (miguel@nuclecu.unam.mx)
- * Copyright (C) 1996,98,99 Jakub Jelinek  (jj@sunsite.mff.cuni.cz)
- */
-
-#include <asm/head.h>
-#include <asm/asi.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/tsb.h>
-
-	.text
-	.align		32
-
-kvmap_itlb:
-	/* g6: TAG TARGET */
-	mov		TLB_TAG_ACCESS, %g4
-	ldxa		[%g4] ASI_IMMU, %g4
-
-	/* The kernel executes in context zero, therefore we do not
-	 * need to clear the context ID bits out of %g4 here.
-	 */
-
-	/* sun4v_itlb_miss branches here with the missing virtual
-	 * address already loaded into %g4
-	 */
-kvmap_itlb_4v:
-
-	/* Catch kernel NULL pointer calls.  */
-	sethi		%hi(PAGE_SIZE), %g5
-	cmp		%g4, %g5
-	blu,pn		%xcc, kvmap_itlb_longpath
-	 nop
-
-	KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_itlb_load)
-
-kvmap_itlb_tsb_miss:
-	sethi		%hi(LOW_OBP_ADDRESS), %g5
-	cmp		%g4, %g5
-	blu,pn		%xcc, kvmap_itlb_vmalloc_addr
-	 mov		0x1, %g5
-	sllx		%g5, 32, %g5
-	cmp		%g4, %g5
-	blu,pn		%xcc, kvmap_itlb_obp
-	 nop
-
-kvmap_itlb_vmalloc_addr:
-	KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)
-
-	TSB_LOCK_TAG(%g1, %g2, %g7)
-	TSB_WRITE(%g1, %g5, %g6)
-
-	/* fallthrough to TLB load */
-
-kvmap_itlb_load:
-
-661:	stxa		%g5, [%g0] ASI_ITLB_DATA_IN
-	retry
-	.section	.sun4v_2insn_patch, "ax"
-	.word		661b
-	nop
-	nop
-	.previous
-
-	/* For sun4v the ASI_ITLB_DATA_IN store and the retry
-	 * instruction get nop'd out and we get here to branch
-	 * to the sun4v tlb load code.  The registers are setup
-	 * as follows:
-	 *
-	 * %g4: vaddr
-	 * %g5: PTE
-	 * %g6:	TAG
-	 *
-	 * The sun4v TLB load wants the PTE in %g3 so we fix that
-	 * up here.
-	 */
-	ba,pt		%xcc, sun4v_itlb_load
-	 mov		%g5, %g3
-
-kvmap_itlb_longpath:
-
-661:	rdpr	%pstate, %g5
-	wrpr	%g5, PSTATE_AG | PSTATE_MG, %pstate
-	.section .sun4v_2insn_patch, "ax"
-	.word	661b
-	SET_GL(1)
-	nop
-	.previous
-
-	rdpr	%tpc, %g5
-	ba,pt	%xcc, sparc64_realfault_common
-	 mov	FAULT_CODE_ITLB, %g4
-
-kvmap_itlb_obp:
-	OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath)
-
-	TSB_LOCK_TAG(%g1, %g2, %g7)
-
-	TSB_WRITE(%g1, %g5, %g6)
-
-	ba,pt		%xcc, kvmap_itlb_load
-	 nop
-
-kvmap_dtlb_obp:
-	OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath)
-
-	TSB_LOCK_TAG(%g1, %g2, %g7)
-
-	TSB_WRITE(%g1, %g5, %g6)
-
-	ba,pt		%xcc, kvmap_dtlb_load
-	 nop
-
-kvmap_linear_early:
-	sethi		%hi(kern_linear_pte_xor), %g7
-	ldx		[%g7 + %lo(kern_linear_pte_xor)], %g2
-	ba,pt		%xcc, kvmap_dtlb_tsb4m_load
-	 xor		%g2, %g4, %g5
-
-	.align		32
-kvmap_dtlb_tsb4m_load:
-	TSB_LOCK_TAG(%g1, %g2, %g7)
-	TSB_WRITE(%g1, %g5, %g6)
-	ba,pt		%xcc, kvmap_dtlb_load
-	 nop
-
-kvmap_dtlb:
-	/* %g6: TAG TARGET */
-	mov		TLB_TAG_ACCESS, %g4
-	ldxa		[%g4] ASI_DMMU, %g4
-
-	/* The kernel executes in context zero, therefore we do not
-	 * need to clear the context ID bits out of %g4 here.
-	 */
-
-	/* sun4v_dtlb_miss branches here with the missing virtual
-	 * address already loaded into %g4
-	 */
-kvmap_dtlb_4v:
-	brgez,pn	%g4, kvmap_dtlb_nonlinear
-	 nop
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
-	/* Index through the base page size TSB even for linear
-	 * mappings when using page allocation debugging.
-	 */
-	KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
-#else
-	/* Correct TAG_TARGET is already in %g6, check 4mb TSB.  */
-	KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
-#endif
-	/* Linear mapping TSB lookup failed.  Fallthrough to kernel
-	 * page table based lookup.
-	 */
-	.globl		kvmap_linear_patch
-kvmap_linear_patch:
-	ba,a,pt		%xcc, kvmap_linear_early
-
-kvmap_dtlb_vmalloc_addr:
-	KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
-
-	TSB_LOCK_TAG(%g1, %g2, %g7)
-	TSB_WRITE(%g1, %g5, %g6)
-
-	/* fallthrough to TLB load */
-
-kvmap_dtlb_load:
-
-661:	stxa		%g5, [%g0] ASI_DTLB_DATA_IN	! Reload TLB
-	retry
-	.section	.sun4v_2insn_patch, "ax"
-	.word		661b
-	nop
-	nop
-	.previous
-
-	/* For sun4v the ASI_DTLB_DATA_IN store and the retry
-	 * instruction get nop'd out and we get here to branch
-	 * to the sun4v tlb load code.  The registers are setup
-	 * as follows:
-	 *
-	 * %g4: vaddr
-	 * %g5: PTE
-	 * %g6:	TAG
-	 *
-	 * The sun4v TLB load wants the PTE in %g3 so we fix that
-	 * up here.
-	 */
-	ba,pt		%xcc, sun4v_dtlb_load
-	 mov		%g5, %g3
-
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-kvmap_vmemmap:
-	KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
-	ba,a,pt		%xcc, kvmap_dtlb_load
-#endif
-
-kvmap_dtlb_nonlinear:
-	/* Catch kernel NULL pointer derefs.  */
-	sethi		%hi(PAGE_SIZE), %g5
-	cmp		%g4, %g5
-	bleu,pn		%xcc, kvmap_dtlb_longpath
-	 nop
-
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-	/* Do not use the TSB for vmemmap.  */
-	sethi		%hi(VMEMMAP_BASE), %g5
-	ldx		[%g5 + %lo(VMEMMAP_BASE)], %g5
-	cmp		%g4,%g5
-	bgeu,pn		%xcc, kvmap_vmemmap
-	 nop
-#endif
-
-	KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
-
-kvmap_dtlb_tsbmiss:
-	sethi		%hi(MODULES_VADDR), %g5
-	cmp		%g4, %g5
-	blu,pn		%xcc, kvmap_dtlb_longpath
-	 sethi		%hi(VMALLOC_END), %g5
-	ldx		[%g5 + %lo(VMALLOC_END)], %g5
-	cmp		%g4, %g5
-	bgeu,pn		%xcc, kvmap_dtlb_longpath
-	 nop
-
-kvmap_check_obp:
-	sethi		%hi(LOW_OBP_ADDRESS), %g5
-	cmp		%g4, %g5
-	blu,pn		%xcc, kvmap_dtlb_vmalloc_addr
-	 mov		0x1, %g5
-	sllx		%g5, 32, %g5
-	cmp		%g4, %g5
-	blu,pn		%xcc, kvmap_dtlb_obp
-	 nop
-	ba,pt		%xcc, kvmap_dtlb_vmalloc_addr
-	 nop
-
-kvmap_dtlb_longpath:
-
-661:	rdpr	%pstate, %g5
-	wrpr	%g5, PSTATE_AG | PSTATE_MG, %pstate
-	.section .sun4v_2insn_patch, "ax"
-	.word	661b
-	SET_GL(1)
-	ldxa		[%g0] ASI_SCRATCHPAD, %g5
-	.previous
-
-	rdpr	%tl, %g3
-	cmp	%g3, 1
-
-661:	mov	TLB_TAG_ACCESS, %g4
-	ldxa	[%g4] ASI_DMMU, %g5
-	.section .sun4v_2insn_patch, "ax"
-	.word	661b
-	ldx	[%g5 + HV_FAULT_D_ADDR_OFFSET], %g5
-	nop
-	.previous
-
-	/* The kernel executes in context zero, therefore we do not
-	 * need to clear the context ID bits out of %g5 here.
-	 */
-
-	be,pt	%xcc, sparc64_realfault_common
-	 mov	FAULT_CODE_DTLB, %g4
-	ba,pt	%xcc, winfix_trampoline
-	 nop
diff --git a/arch/sparc/kernel/misctrap.S b/arch/sparc/kernel/misctrap.S
deleted file mode 100644
index b5c84177521e3e3ad1b8a93ce6e2ca3010dab22f..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/misctrap.S
+++ /dev/null
@@ -1,95 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_KGDB
-	.globl		arch_kgdb_breakpoint
-	.type		arch_kgdb_breakpoint,#function
-arch_kgdb_breakpoint:
-	ta		0x72
-	retl
-	 nop
-	.size		arch_kgdb_breakpoint,.-arch_kgdb_breakpoint
-#endif
-
-	.type		__do_privact,#function
-__do_privact:
-	mov		TLB_SFSR, %g3
-	stxa		%g0, [%g3] ASI_DMMU	! Clear FaultValid bit
-	membar		#Sync
-	sethi		%hi(109f), %g7
-	ba,pt		%xcc, etrap
-109:	or		%g7, %lo(109b), %g7
-	call		do_privact
-	 add		%sp, PTREGS_OFF, %o0
-	ba,a,pt		%xcc, rtrap
-	.size		__do_privact,.-__do_privact
-
-	.type		do_mna,#function
-do_mna:
-	rdpr		%tl, %g3
-	cmp		%g3, 1
-
-	/* Setup %g4/%g5 now as they are used in the
-	 * winfixup code.
-	 */
-	mov		TLB_SFSR, %g3
-	mov		DMMU_SFAR, %g4
-	ldxa		[%g4] ASI_DMMU, %g4
-	ldxa		[%g3] ASI_DMMU, %g5
-	stxa		%g0, [%g3] ASI_DMMU	! Clear FaultValid bit
-	membar		#Sync
-	bgu,pn		%icc, winfix_mna
-	 rdpr		%tpc, %g3
-
-1:	sethi		%hi(109f), %g7
-	ba,pt		%xcc, etrap
-109:	 or		%g7, %lo(109b), %g7
-	mov		%l4, %o1
-	mov		%l5, %o2
-	call		mem_address_unaligned
-	 add		%sp, PTREGS_OFF, %o0
-	ba,a,pt		%xcc, rtrap
-	.size		do_mna,.-do_mna
-
-	.type		do_lddfmna,#function
-do_lddfmna:
-	sethi		%hi(109f), %g7
-	mov		TLB_SFSR, %g4
-	ldxa		[%g4] ASI_DMMU, %g5
-	stxa		%g0, [%g4] ASI_DMMU	! Clear FaultValid bit
-	membar		#Sync
-	mov		DMMU_SFAR, %g4
-	ldxa		[%g4] ASI_DMMU, %g4
-	ba,pt		%xcc, etrap
-109:	 or		%g7, %lo(109b), %g7
-	mov		%l4, %o1
-	mov		%l5, %o2
-	call		handle_lddfmna
-	 add		%sp, PTREGS_OFF, %o0
-	ba,a,pt		%xcc, rtrap
-	.size		do_lddfmna,.-do_lddfmna
-
-	.type		do_stdfmna,#function
-do_stdfmna:
-	sethi		%hi(109f), %g7
-	mov		TLB_SFSR, %g4
-	ldxa		[%g4] ASI_DMMU, %g5
-	stxa		%g0, [%g4] ASI_DMMU	! Clear FaultValid bit
-	membar		#Sync
-	mov		DMMU_SFAR, %g4
-	ldxa		[%g4] ASI_DMMU, %g4
-	ba,pt		%xcc, etrap
-109:	 or		%g7, %lo(109b), %g7
-	mov		%l4, %o1
-	mov		%l5, %o2
-	call		handle_stdfmna
-	 add		%sp, PTREGS_OFF, %o0
-	ba,a,pt		%xcc, rtrap
-	 nop
-	.size		do_stdfmna,.-do_stdfmna
-
-	.type		breakpoint_trap,#function
-breakpoint_trap:
-	call		sparc_breakpoint
-	 add		%sp, PTREGS_OFF, %o0
-	ba,pt		%xcc, rtrap
-	 nop
-	.size		breakpoint_trap,.-breakpoint_trap
diff --git a/arch/sparc/kernel/pci_sun4v_asm.S b/arch/sparc/kernel/pci_sun4v_asm.S
deleted file mode 100644
index 2b8051871a155b88ac21ca3d18bb41a79b06257e..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/pci_sun4v_asm.S
+++ /dev/null
@@ -1,431 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* pci_sun4v_asm: Hypervisor calls for PCI support.
- *
- * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net>
- */
-
-#include <linux/linkage.h>
-#include <asm/hypervisor.h>
-
-	/* %o0: devhandle
-	 * %o1:	tsbid
-	 * %o2:	num ttes
-	 * %o3:	io_attributes
-	 * %o4:	io_page_list phys address
-	 *
-	 * returns %o0:	-status if status was non-zero, else
-	 *         %o0:	num pages mapped
-	 */
-ENTRY(pci_sun4v_iommu_map)
-	mov	%o5, %g1
-	mov	HV_FAST_PCI_IOMMU_MAP, %o5
-	ta	HV_FAST_TRAP
-	brnz,pn %o0, 1f
-	 sub	%g0, %o0, %o0
-	mov	%o1, %o0
-1:	retl
-	 nop
-ENDPROC(pci_sun4v_iommu_map)
-
-	/* %o0: devhandle
-	 * %o1:	tsbid
-	 * %o2:	num ttes
-	 *
-	 * returns %o0:	num ttes demapped
-	 */
-ENTRY(pci_sun4v_iommu_demap)
-	mov	HV_FAST_PCI_IOMMU_DEMAP, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 mov	%o1, %o0
-ENDPROC(pci_sun4v_iommu_demap)
-
-	/* %o0: devhandle
-	 * %o1:	tsbid
-	 * %o2:	&io_attributes
-	 * %o3:	&real_address
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(pci_sun4v_iommu_getmap)
-	mov	%o2, %o4
-	mov	HV_FAST_PCI_IOMMU_GETMAP, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o4]
-	stx	%o2, [%o3]
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_iommu_getmap)
-
-	/* %o0: devhandle
-	 * %o1:	pci_device
-	 * %o2:	pci_config_offset
-	 * %o3:	size
-	 *
-	 * returns %o0:	data
-	 *
-	 * If there is an error, the data will be returned
-	 * as all 1's.
-	 */
-ENTRY(pci_sun4v_config_get)
-	mov	HV_FAST_PCI_CONFIG_GET, %o5
-	ta	HV_FAST_TRAP
-	brnz,a,pn %o1, 1f
-	 mov	-1, %o2
-1:	retl
-	 mov	%o2, %o0
-ENDPROC(pci_sun4v_config_get)
-
-	/* %o0: devhandle
-	 * %o1:	pci_device
-	 * %o2:	pci_config_offset
-	 * %o3:	size
-	 * %o4:	data
-	 *
-	 * returns %o0:	status
-	 *
-	 * status will be zero if the operation completed
-	 * successfully, else -1 if not
-	 */
-ENTRY(pci_sun4v_config_put)
-	mov	HV_FAST_PCI_CONFIG_PUT, %o5
-	ta	HV_FAST_TRAP
-	brnz,a,pn %o1, 1f
-	 mov	-1, %o1
-1:	retl
-	 mov	%o1, %o0
-ENDPROC(pci_sun4v_config_put)
-
-	/* %o0: devhandle
-	 * %o1: msiqid
-	 * %o2: msiq phys address
-	 * %o3: num entries
-	 *
-	 * returns %o0: status
-	 *
-	 * status will be zero if the operation completed
-	 * successfully, else -1 if not
-	 */
-ENTRY(pci_sun4v_msiq_conf)
-	mov	HV_FAST_PCI_MSIQ_CONF, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msiq_conf)
-
-	/* %o0: devhandle
-	 * %o1: msiqid
-	 * %o2:	&msiq_phys_addr
-	 * %o3:	&msiq_num_entries
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msiq_info)
-	mov	%o2, %o4
-	mov	HV_FAST_PCI_MSIQ_INFO, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o4]
-	stx	%o2, [%o3]
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msiq_info)
-
-	/* %o0: devhandle
-	 * %o1: msiqid
-	 * %o2:	&valid
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msiq_getvalid)
-	mov	HV_FAST_PCI_MSIQ_GETVALID, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o2]
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msiq_getvalid)
-
-	/* %o0: devhandle
-	 * %o1: msiqid
-	 * %o2:	valid
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msiq_setvalid)
-	mov	HV_FAST_PCI_MSIQ_SETVALID, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msiq_setvalid)
-
-	/* %o0: devhandle
-	 * %o1: msiqid
-	 * %o2:	&state
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msiq_getstate)
-	mov	HV_FAST_PCI_MSIQ_GETSTATE, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o2]
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msiq_getstate)
-
-	/* %o0: devhandle
-	 * %o1: msiqid
-	 * %o2:	state
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msiq_setstate)
-	mov	HV_FAST_PCI_MSIQ_SETSTATE, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msiq_setstate)
-
-	/* %o0: devhandle
-	 * %o1: msiqid
-	 * %o2:	&head
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msiq_gethead)
-	mov	HV_FAST_PCI_MSIQ_GETHEAD, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o2]
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msiq_gethead)
-
-	/* %o0: devhandle
-	 * %o1: msiqid
-	 * %o2:	head
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msiq_sethead)
-	mov	HV_FAST_PCI_MSIQ_SETHEAD, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msiq_sethead)
-
-	/* %o0: devhandle
-	 * %o1: msiqid
-	 * %o2:	&tail
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msiq_gettail)
-	mov	HV_FAST_PCI_MSIQ_GETTAIL, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o2]
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msiq_gettail)
-
-	/* %o0: devhandle
-	 * %o1: msinum
-	 * %o2:	&valid
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msi_getvalid)
-	mov	HV_FAST_PCI_MSI_GETVALID, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o2]
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msi_getvalid)
-
-	/* %o0: devhandle
-	 * %o1: msinum
-	 * %o2:	valid
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msi_setvalid)
-	mov	HV_FAST_PCI_MSI_SETVALID, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msi_setvalid)
-
-	/* %o0: devhandle
-	 * %o1: msinum
-	 * %o2:	&msiq
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msi_getmsiq)
-	mov	HV_FAST_PCI_MSI_GETMSIQ, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o2]
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msi_getmsiq)
-
-	/* %o0: devhandle
-	 * %o1: msinum
-	 * %o2:	msitype
-	 * %o3:	msiq
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msi_setmsiq)
-	mov	HV_FAST_PCI_MSI_SETMSIQ, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msi_setmsiq)
-
-	/* %o0: devhandle
-	 * %o1: msinum
-	 * %o2:	&state
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msi_getstate)
-	mov	HV_FAST_PCI_MSI_GETSTATE, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o2]
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msi_getstate)
-
-	/* %o0: devhandle
-	 * %o1: msinum
-	 * %o2:	state
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msi_setstate)
-	mov	HV_FAST_PCI_MSI_SETSTATE, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msi_setstate)
-
-	/* %o0: devhandle
-	 * %o1: msinum
-	 * %o2:	&msiq
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msg_getmsiq)
-	mov	HV_FAST_PCI_MSG_GETMSIQ, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o2]
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msg_getmsiq)
-
-	/* %o0: devhandle
-	 * %o1: msinum
-	 * %o2:	msiq
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msg_setmsiq)
-	mov	HV_FAST_PCI_MSG_SETMSIQ, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msg_setmsiq)
-
-	/* %o0: devhandle
-	 * %o1: msinum
-	 * %o2:	&valid
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msg_getvalid)
-	mov	HV_FAST_PCI_MSG_GETVALID, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o2]
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msg_getvalid)
-
-	/* %o0: devhandle
-	 * %o1: msinum
-	 * %o2:	valid
-	 *
-	 * returns %o0: status
-	 */
-ENTRY(pci_sun4v_msg_setvalid)
-	mov	HV_FAST_PCI_MSG_SETVALID, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 mov	%o0, %o0
-ENDPROC(pci_sun4v_msg_setvalid)
-
-	/*
-	 * %o0:	devhandle
-	 * %o1:	r_addr
-	 * %o2:	size
-	 * %o3:	pagesize
-	 * %o4:	virt
-	 * %o5: &iotsb_num/&iotsb_handle
-	 *
-	 * returns %o0:	status
-	 *         %o1:	iotsb_num/iotsb_handle
-	 */
-ENTRY(pci_sun4v_iotsb_conf)
-	mov	%o5, %g1
-	mov	HV_FAST_PCI_IOTSB_CONF, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 stx	%o1, [%g1]
-ENDPROC(pci_sun4v_iotsb_conf)
-
-	/*
-	 * %o0:	devhandle
-	 * %o1:	iotsb_num/iotsb_handle
-	 * %o2:	pci_device
-	 *
-	 * returns %o0:	status
-	 */
-ENTRY(pci_sun4v_iotsb_bind)
-	mov	HV_FAST_PCI_IOTSB_BIND, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(pci_sun4v_iotsb_bind)
-
-	/*
-	 * %o0:	devhandle
-	 * %o1:	iotsb_num/iotsb_handle
-	 * %o2:	index_count
-	 * %o3:	iotte_attributes
-	 * %o4:	io_page_list_p
-	 * %o5: &mapped
-	 *
-	 * returns %o0:	status
-	 *         %o1:	#mapped
-	 */
-ENTRY(pci_sun4v_iotsb_map)
-	mov	%o5, %g1
-	mov	HV_FAST_PCI_IOTSB_MAP, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 stx	%o1, [%g1]
-ENDPROC(pci_sun4v_iotsb_map)
-
-	/*
-	 * %o0:	devhandle
-	 * %o1:	iotsb_num/iotsb_handle
-	 * %o2:	iotsb_index
-	 * %o3:	#iottes
-	 * %o4: &demapped
-	 *
-	 * returns %o0:	status
-	 *         %o1:	#demapped
-	 */
-ENTRY(pci_sun4v_iotsb_demap)
-	mov	HV_FAST_PCI_IOTSB_DEMAP, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 stx	%o1, [%o4]
-ENDPROC(pci_sun4v_iotsb_demap)
diff --git a/arch/sparc/kernel/rtrap_32.S b/arch/sparc/kernel/rtrap_32.S
deleted file mode 100644
index dca8ed81004660458e6961c5270c738340013936..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/rtrap_32.S
+++ /dev/null
@@ -1,265 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * rtrap.S: Return from Sparc trap low-level code.
- *
- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
- */
-
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include <asm/psr.h>
-#include <asm/asi.h>
-#include <asm/smp.h>
-#include <asm/contregs.h>
-#include <asm/winmacro.h>
-#include <asm/asmmacro.h>
-#include <asm/thread_info.h>
-
-#define t_psr     l0
-#define t_pc      l1
-#define t_npc     l2
-#define t_wim     l3
-#define twin_tmp1 l4
-#define glob_tmp  g4
-#define curptr    g6
-
-	/* 7 WINDOW SPARC PATCH INSTRUCTIONS */
-	.globl	rtrap_7win_patch1, rtrap_7win_patch2, rtrap_7win_patch3
-	.globl	rtrap_7win_patch4, rtrap_7win_patch5
-rtrap_7win_patch1:	srl	%t_wim, 0x6, %glob_tmp
-rtrap_7win_patch2:	and	%glob_tmp, 0x7f, %glob_tmp
-rtrap_7win_patch3:	srl	%g1, 7, %g2
-rtrap_7win_patch4:	srl	%g2, 6, %g2
-rtrap_7win_patch5:	and	%g1, 0x7f, %g1
-	/* END OF PATCH INSTRUCTIONS */
-
-	/* We need to check for a few things which are:
-	 * 1) The need to call schedule() because this
-	 *    processes quantum is up.
-	 * 2) Pending signals for this process, if any
-	 *    exist we need to call do_signal() to do
-	 *    the needy.
-	 *
-	 * Else we just check if the rett would land us
-	 * in an invalid window, if so we need to grab
-	 * it off the user/kernel stack first.
-	 */
-
-	.globl	ret_trap_entry, rtrap_patch1, rtrap_patch2
-	.globl	rtrap_patch3, rtrap_patch4, rtrap_patch5
-	.globl	ret_trap_lockless_ipi
-ret_trap_entry:
-ret_trap_lockless_ipi:
-	andcc	%t_psr, PSR_PS, %g0
-	sethi	%hi(PSR_SYSCALL), %g1
-	be	1f
-	 andn	%t_psr, %g1, %t_psr
-
-	wr	%t_psr, 0x0, %psr
-	b	ret_trap_kernel
-	 nop
-
-1:
-	ld	[%curptr + TI_FLAGS], %g2
-	andcc	%g2, (_TIF_NEED_RESCHED), %g0
-	be	signal_p
-	 nop
-
-	call	schedule
-	 nop
-
-	ld	[%curptr + TI_FLAGS], %g2
-signal_p:
-	andcc	%g2, _TIF_DO_NOTIFY_RESUME_MASK, %g0
-	bz,a	ret_trap_continue
-	 ld	[%sp + STACKFRAME_SZ + PT_PSR], %t_psr
-
-	mov	%g2, %o2
-	mov	%l5, %o1
-	call	do_notify_resume
-	 add	%sp, STACKFRAME_SZ, %o0	! pt_regs ptr
-
-	b	signal_p
-	 ld	[%curptr + TI_FLAGS], %g2
-
-ret_trap_continue:
-	sethi	%hi(PSR_SYSCALL), %g1
-	andn	%t_psr, %g1, %t_psr
-	wr	%t_psr, 0x0, %psr
-	WRITE_PAUSE
-
-	ld	[%curptr + TI_W_SAVED], %twin_tmp1
-	orcc	%g0, %twin_tmp1, %g0
-	be	ret_trap_nobufwins
-	 nop
-
-	wr	%t_psr, PSR_ET, %psr
-	WRITE_PAUSE
-
-	mov	1, %o1
-	call	try_to_clear_window_buffer
-	 add	%sp, STACKFRAME_SZ, %o0
-
-	b	signal_p
-	 ld	[%curptr + TI_FLAGS], %g2
-
-ret_trap_nobufwins:
-	/* Load up the user's out registers so we can pull
-	 * a window from the stack, if necessary.
-	 */
-	LOAD_PT_INS(sp)
-
-	/* If there are already live user windows in the
-	 * set we can return from trap safely.
-	 */
-	ld	[%curptr + TI_UWINMASK], %twin_tmp1
-	orcc	%g0, %twin_tmp1, %g0
-	bne	ret_trap_userwins_ok
-	 nop
-
-		/* Calculate new %wim, we have to pull a register
-		 * window from the users stack.
-		 */
-ret_trap_pull_one_window:
-		rd	%wim, %t_wim
-		sll	%t_wim, 0x1, %twin_tmp1
-rtrap_patch1:	srl	%t_wim, 0x7, %glob_tmp
-		or	%glob_tmp, %twin_tmp1, %glob_tmp
-rtrap_patch2:	and	%glob_tmp, 0xff, %glob_tmp
-
-		wr	%glob_tmp, 0x0, %wim
-
-	/* Here comes the architecture specific
-	 * branch to the user stack checking routine
-	 * for return from traps.
-	 */
-	b	srmmu_rett_stackchk
-	 andcc	%fp, 0x7, %g0
-
-ret_trap_userwins_ok:
-	LOAD_PT_PRIV(sp, t_psr, t_pc, t_npc)
-	or	%t_pc, %t_npc, %g2
-	andcc	%g2, 0x3, %g0
-	sethi	%hi(PSR_SYSCALL), %g2
-	be	1f
-	 andn	%t_psr, %g2, %t_psr
-
-	b	ret_trap_unaligned_pc
-	 add	%sp, STACKFRAME_SZ, %o0
-
-1:
-	LOAD_PT_YREG(sp, g1)
-	LOAD_PT_GLOBALS(sp)
-
-	wr	%t_psr, 0x0, %psr
-	WRITE_PAUSE
-
-	jmp	%t_pc
-	rett	%t_npc
-	
-ret_trap_unaligned_pc:
-	ld	[%sp + STACKFRAME_SZ + PT_PC], %o1
-	ld	[%sp + STACKFRAME_SZ + PT_NPC], %o2
-	ld	[%sp + STACKFRAME_SZ + PT_PSR], %o3
-
-	wr	%t_wim, 0x0, %wim		! or else...
-
-	wr	%t_psr, PSR_ET, %psr
-	WRITE_PAUSE
-
-	call	do_memaccess_unaligned
-	 nop
-
-	b	signal_p
-	 ld	[%curptr + TI_FLAGS], %g2
-
-ret_trap_kernel:
-		/* Will the rett land us in the invalid window? */
-		mov	2, %g1
-		sll	%g1, %t_psr, %g1
-rtrap_patch3:	srl	%g1, 8, %g2
-		or	%g1, %g2, %g1
-		rd	%wim, %g2
-		andcc	%g2, %g1, %g0
-		be	1f		! Nope, just return from the trap
-		 sll	%g2, 0x1, %g1
-
-		/* We have to grab a window before returning. */
-rtrap_patch4:	srl	%g2, 7,  %g2
-		or	%g1, %g2, %g1
-rtrap_patch5:	and	%g1, 0xff, %g1
-
-	wr	%g1, 0x0, %wim
-
-	/* Grrr, make sure we load from the right %sp... */
-	LOAD_PT_ALL(sp, t_psr, t_pc, t_npc, g1)
-
-	restore	%g0, %g0, %g0
-	LOAD_WINDOW(sp)
-	b	2f
-	 save	%g0, %g0, %g0
-
-	/* Reload the entire frame in case this is from a
-	 * kernel system call or whatever...
-	 */
-1:
-	LOAD_PT_ALL(sp, t_psr, t_pc, t_npc, g1)
-2:
-	sethi	%hi(PSR_SYSCALL), %twin_tmp1
-	andn	%t_psr, %twin_tmp1, %t_psr
-	wr	%t_psr, 0x0, %psr
-	WRITE_PAUSE
-
-	jmp	%t_pc
-	rett	%t_npc
-
-ret_trap_user_stack_is_bolixed:
-	wr	%t_wim, 0x0, %wim
-
-	wr	%t_psr, PSR_ET, %psr
-	WRITE_PAUSE
-
-	call	window_ret_fault
-	 add	%sp, STACKFRAME_SZ, %o0
-
-	b	signal_p
-	 ld	[%curptr + TI_FLAGS], %g2
-
-	.globl	srmmu_rett_stackchk
-srmmu_rett_stackchk:
-	bne	ret_trap_user_stack_is_bolixed
-	 sethi   %hi(PAGE_OFFSET), %g1
-	cmp	%g1, %fp
-	bleu	ret_trap_user_stack_is_bolixed
-	 mov	AC_M_SFSR, %g1
-LEON_PI(lda	[%g1] ASI_LEON_MMUREGS, %g0)
-SUN_PI_(lda	[%g1] ASI_M_MMUREGS, %g0)
-
-LEON_PI(lda	[%g0] ASI_LEON_MMUREGS, %g1)
-SUN_PI_(lda	[%g0] ASI_M_MMUREGS, %g1)
-	or	%g1, 0x2, %g1
-LEON_PI(sta	%g1, [%g0] ASI_LEON_MMUREGS)
-SUN_PI_(sta	%g1, [%g0] ASI_M_MMUREGS)
-
-	restore	%g0, %g0, %g0
-
-	LOAD_WINDOW(sp)
-
-	save	%g0, %g0, %g0
-
-	andn	%g1, 0x2, %g1
-LEON_PI(sta	%g1, [%g0] ASI_LEON_MMUREGS)
-SUN_PI_(sta	%g1, [%g0] ASI_M_MMUREGS)
-
-	mov	AC_M_SFAR, %g2
-LEON_PI(lda	[%g2] ASI_LEON_MMUREGS, %g2)
-SUN_PI_(lda	[%g2] ASI_M_MMUREGS, %g2)
-
-	mov	AC_M_SFSR, %g1
-LEON_PI(lda	[%g1] ASI_LEON_MMUREGS, %g1)
-SUN_PI_(lda	[%g1] ASI_M_MMUREGS, %g1)
-	andcc	%g1, 0x2, %g0
-	be	ret_trap_userwins_ok
-	 nop
-
-	b,a	ret_trap_user_stack_is_bolixed
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
deleted file mode 100644
index 29aa34f11720cea3e098372995f751298b09e641..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/rtrap_64.S
+++ /dev/null
@@ -1,378 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * rtrap.S: Preparing for return from trap on Sparc V9.
- *
- * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
- */
-
-
-#include <asm/asi.h>
-#include <asm/pstate.h>
-#include <asm/ptrace.h>
-#include <asm/spitfire.h>
-#include <asm/head.h>
-#include <asm/visasm.h>
-#include <asm/processor.h>
-
-#ifdef CONFIG_CONTEXT_TRACKING
-# define SCHEDULE_USER schedule_user
-#else
-# define SCHEDULE_USER schedule
-#endif
-
-		.text
-		.align			32
-__handle_preemption:
-		call			SCHEDULE_USER
-661:		 wrpr			%g0, RTRAP_PSTATE, %pstate
-		/* If userspace is using ADI, it could potentially pass
-		 * a pointer with version tag embedded in it. To maintain
-		 * the ADI security, we must re-enable PSTATE.mcde before
-		 * we continue execution in the kernel for another thread.
-		 */
-		.section .sun_m7_1insn_patch, "ax"
-		.word	661b
-		 wrpr			%g0, RTRAP_PSTATE|PSTATE_MCDE, %pstate
-		.previous
-		ba,pt			%xcc, __handle_preemption_continue
-		 wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
-
-__handle_user_windows:
-		add			%sp, PTREGS_OFF, %o0
-		call			fault_in_user_windows
-661:		 wrpr			%g0, RTRAP_PSTATE, %pstate
-		/* If userspace is using ADI, it could potentially pass
-		 * a pointer with version tag embedded in it. To maintain
-		 * the ADI security, we must re-enable PSTATE.mcde before
-		 * we continue execution in the kernel for another thread.
-		 */
-		.section .sun_m7_1insn_patch, "ax"
-		.word	661b
-		 wrpr			%g0, RTRAP_PSTATE|PSTATE_MCDE, %pstate
-		.previous
-		ba,pt			%xcc, __handle_preemption_continue
-		 wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
-
-__handle_userfpu:
-		rd			%fprs, %l5
-		andcc			%l5, FPRS_FEF, %g0
-		sethi			%hi(TSTATE_PEF), %o0
-		be,a,pn			%icc, __handle_userfpu_continue
-		 andn			%l1, %o0, %l1
-		ba,a,pt			%xcc, __handle_userfpu_continue
-
-__handle_signal:
-		mov			%l5, %o1
-		add			%sp, PTREGS_OFF, %o0
-		mov			%l0, %o2
-		call			do_notify_resume
-661:		 wrpr			%g0, RTRAP_PSTATE, %pstate
-		/* If userspace is using ADI, it could potentially pass
-		 * a pointer with version tag embedded in it. To maintain
-		 * the ADI security, we must re-enable PSTATE.mcde before
-		 * we continue execution in the kernel for another thread.
-		 */
-		.section .sun_m7_1insn_patch, "ax"
-		.word	661b
-		 wrpr			%g0, RTRAP_PSTATE|PSTATE_MCDE, %pstate
-		.previous
-		wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
-
-		/* Signal delivery can modify pt_regs tstate, so we must
-		 * reload it.
-		 */
-		ldx			[%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
-		sethi			%hi(0xf << 20), %l4
-		and			%l1, %l4, %l4
-		andn			%l1, %l4, %l1
-		ba,pt			%xcc, __handle_preemption_continue
-		 srl			%l4, 20, %l4
-
-		/* When returning from a NMI (%pil==15) interrupt we want to
-		 * avoid running softirqs, doing IRQ tracing, preempting, etc.
-		 */
-		.globl			rtrap_nmi
-rtrap_nmi:	ldx			[%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
-		sethi			%hi(0xf << 20), %l4
-		and			%l1, %l4, %l4
-		andn			%l1, %l4, %l1
-		srl			%l4, 20, %l4
-		ba,pt			%xcc, rtrap_no_irq_enable
-		nop
-		/* Do not actually set the %pil here.  We will do that
-		 * below after we clear PSTATE_IE in the %pstate register.
-		 * If we re-enable interrupts here, we can recurse down
-		 * the hardirq stack potentially endlessly, causing a
-		 * stack overflow.
-		 */
-
-		.align			64
-		.globl			rtrap_irq, rtrap, irqsz_patchme, rtrap_xcall
-rtrap_irq:
-rtrap:
-		/* mm/ultra.S:xcall_report_regs KNOWS about this load. */
-		ldx			[%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
-rtrap_xcall:
-		sethi			%hi(0xf << 20), %l4
-		and			%l1, %l4, %l4
-		andn			%l1, %l4, %l1
-		srl			%l4, 20, %l4
-#ifdef CONFIG_TRACE_IRQFLAGS
-		brnz,pn			%l4, rtrap_no_irq_enable
-		 nop
-		call			trace_hardirqs_on
-		 nop
-		/* Do not actually set the %pil here.  We will do that
-		 * below after we clear PSTATE_IE in the %pstate register.
-		 * If we re-enable interrupts here, we can recurse down
-		 * the hardirq stack potentially endlessly, causing a
-		 * stack overflow.
-		 *
-		 * It is tempting to put this test and trace_hardirqs_on
-		 * call at the 'rt_continue' label, but that will not work
-		 * as that path hits unconditionally and we do not want to
-		 * execute this in NMI return paths, for example.
-		 */
-#endif
-rtrap_no_irq_enable:
-		andcc			%l1, TSTATE_PRIV, %l3
-		bne,pn			%icc, to_kernel
-		 nop
-
-		/* We must hold IRQs off and atomically test schedule+signal
-		 * state, then hold them off all the way back to userspace.
-		 * If we are returning to kernel, none of this matters.  Note
-		 * that we are disabling interrupts via PSTATE_IE, not using
-		 * %pil.
-		 *
-		 * If we do not do this, there is a window where we would do
-		 * the tests, later the signal/resched event arrives but we do
-		 * not process it since we are still in kernel mode.  It would
-		 * take until the next local IRQ before the signal/resched
-		 * event would be handled.
-		 *
-		 * This also means that if we have to deal with user
-		 * windows, we have to redo all of these sched+signal checks
-		 * with IRQs disabled.
-		 */
-to_user:	wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
-		wrpr			0, %pil
-__handle_preemption_continue:
-		ldx			[%g6 + TI_FLAGS], %l0
-		sethi			%hi(_TIF_USER_WORK_MASK), %o0
-		or			%o0, %lo(_TIF_USER_WORK_MASK), %o0
-		andcc			%l0, %o0, %g0
-		sethi			%hi(TSTATE_PEF), %o0
-		be,pt			%xcc, user_nowork
-		 andcc			%l1, %o0, %g0
-		andcc			%l0, _TIF_NEED_RESCHED, %g0
-		bne,pn			%xcc, __handle_preemption
-		 andcc			%l0, _TIF_DO_NOTIFY_RESUME_MASK, %g0
-		bne,pn			%xcc, __handle_signal
-		 ldub			[%g6 + TI_WSAVED], %o2
-		brnz,pn			%o2, __handle_user_windows
-		 nop
-		sethi			%hi(TSTATE_PEF), %o0
-		andcc			%l1, %o0, %g0
-
-		/* This fpdepth clear is necessary for non-syscall rtraps only */
-user_nowork:
-		bne,pn			%xcc, __handle_userfpu
-		 stb			%g0, [%g6 + TI_FPDEPTH]
-__handle_userfpu_continue:
-
-rt_continue:	ldx			[%sp + PTREGS_OFF + PT_V9_G1], %g1
-		ldx			[%sp + PTREGS_OFF + PT_V9_G2], %g2
-
-		ldx			[%sp + PTREGS_OFF + PT_V9_G3], %g3
-		ldx			[%sp + PTREGS_OFF + PT_V9_G4], %g4
-		ldx			[%sp + PTREGS_OFF + PT_V9_G5], %g5
-		brz,pt			%l3, 1f
-		mov			%g6, %l2
-
-		/* Must do this before thread reg is clobbered below.  */
-		LOAD_PER_CPU_BASE(%g5, %g6, %i0, %i1, %i2)
-1:
-		ldx			[%sp + PTREGS_OFF + PT_V9_G6], %g6
-		ldx			[%sp + PTREGS_OFF + PT_V9_G7], %g7
-
-		/* Normal globals are restored, go to trap globals.  */
-661:		wrpr			%g0, RTRAP_PSTATE_AG_IRQOFF, %pstate
-		nop
-		.section		.sun4v_2insn_patch, "ax"
-		.word			661b
-		wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
-		SET_GL(1)
-		.previous
-
-		mov			%l2, %g6
-
-		ldx			[%sp + PTREGS_OFF + PT_V9_I0], %i0
-		ldx			[%sp + PTREGS_OFF + PT_V9_I1], %i1
-
-		ldx			[%sp + PTREGS_OFF + PT_V9_I2], %i2
-		ldx			[%sp + PTREGS_OFF + PT_V9_I3], %i3
-		ldx			[%sp + PTREGS_OFF + PT_V9_I4], %i4
-		ldx			[%sp + PTREGS_OFF + PT_V9_I5], %i5
-		ldx			[%sp + PTREGS_OFF + PT_V9_I6], %i6
-		ldx			[%sp + PTREGS_OFF + PT_V9_I7], %i7
-		ldx			[%sp + PTREGS_OFF + PT_V9_TPC], %l2
-		ldx			[%sp + PTREGS_OFF + PT_V9_TNPC], %o2
-
-		ld			[%sp + PTREGS_OFF + PT_V9_Y], %o3
-		wr			%o3, %g0, %y
-		wrpr			%l4, 0x0, %pil
-		wrpr			%g0, 0x1, %tl
-		andn			%l1, TSTATE_SYSCALL, %l1
-		wrpr			%l1, %g0, %tstate
-		wrpr			%l2, %g0, %tpc
-		wrpr			%o2, %g0, %tnpc
-
-		brnz,pn			%l3, kern_rtt
-		 mov			PRIMARY_CONTEXT, %l7
-
-661:		ldxa			[%l7 + %l7] ASI_DMMU, %l0
-		.section		.sun4v_1insn_patch, "ax"
-		.word			661b
-		ldxa			[%l7 + %l7] ASI_MMU, %l0
-		.previous
-
-		sethi			%hi(sparc64_kern_pri_nuc_bits), %l1
-		ldx			[%l1 + %lo(sparc64_kern_pri_nuc_bits)], %l1
-		or			%l0, %l1, %l0
-
-661:		stxa			%l0, [%l7] ASI_DMMU
-		.section		.sun4v_1insn_patch, "ax"
-		.word			661b
-		stxa			%l0, [%l7] ASI_MMU
-		.previous
-
-		sethi			%hi(KERNBASE), %l7
-		flush			%l7
-		rdpr			%wstate, %l1
-		rdpr			%otherwin, %l2
-		srl			%l1, 3, %l1
-
-661:		wrpr			%l2, %g0, %canrestore
-		.section		.fast_win_ctrl_1insn_patch, "ax"
-		.word			661b
-		.word			0x89880000	! normalw
-		.previous
-
-		wrpr			%l1, %g0, %wstate
-		brnz,pt			%l2, user_rtt_restore
-661:		 wrpr			%g0, %g0, %otherwin
-		.section		.fast_win_ctrl_1insn_patch, "ax"
-		.word			661b
-		 nop
-		.previous
-
-		ldx			[%g6 + TI_FLAGS], %g3
-		wr			%g0, ASI_AIUP, %asi
-		rdpr			%cwp, %g1
-		andcc			%g3, _TIF_32BIT, %g0
-		sub			%g1, 1, %g1
-		bne,pt			%xcc, user_rtt_fill_32bit
-		 wrpr			%g1, %cwp
-		ba,a,pt			%xcc, user_rtt_fill_64bit
-		 nop
-
-user_rtt_fill_fixup_dax:
-		ba,pt	%xcc, user_rtt_fill_fixup_common
-		 mov	1, %g3
-
-user_rtt_fill_fixup_mna:
-		ba,pt	%xcc, user_rtt_fill_fixup_common
-		 mov	2, %g3
-
-user_rtt_fill_fixup:
-		ba,pt	%xcc, user_rtt_fill_fixup_common
-		 clr	%g3
-
-user_rtt_pre_restore:
-		add			%g1, 1, %g1
-		wrpr			%g1, 0x0, %cwp
-
-user_rtt_restore:
-		restore
-		rdpr			%canrestore, %g1
-		wrpr			%g1, 0x0, %cleanwin
-		retry
-		nop
-
-kern_rtt:	rdpr			%canrestore, %g1
-		brz,pn			%g1, kern_rtt_fill
-		 nop
-kern_rtt_restore:
-		stw			%g0, [%sp + PTREGS_OFF + PT_V9_MAGIC]
-		restore
-		retry
-
-to_kernel:
-#ifdef CONFIG_PREEMPT
-		ldsw			[%g6 + TI_PRE_COUNT], %l5
-		brnz			%l5, kern_fpucheck
-		 ldx			[%g6 + TI_FLAGS], %l5
-		andcc			%l5, _TIF_NEED_RESCHED, %g0
-		be,pt			%xcc, kern_fpucheck
-		 nop
-		cmp			%l4, 0
-		bne,pn			%xcc, kern_fpucheck
-		 nop
-		call			preempt_schedule_irq
-		 nop
-		ba,pt			%xcc, rtrap
-#endif
-kern_fpucheck:	ldub			[%g6 + TI_FPDEPTH], %l5
-		brz,pt			%l5, rt_continue
-		 srl			%l5, 1, %o0
-		add			%g6, TI_FPSAVED, %l6
-		ldub			[%l6 + %o0], %l2
-		sub			%l5, 2, %l5
-
-		add			%g6, TI_GSR, %o1
-		andcc			%l2, (FPRS_FEF|FPRS_DU), %g0
-		be,pt			%icc, 2f
-		 and			%l2, FPRS_DL, %l6
-		andcc			%l2, FPRS_FEF, %g0
-		be,pn			%icc, 5f
-		 sll			%o0, 3, %o5
-		rd			%fprs, %g1
-
-		wr			%g1, FPRS_FEF, %fprs
-		ldx			[%o1 + %o5], %g1
-		add			%g6, TI_XFSR, %o1
-		sll			%o0, 8, %o2
-		add			%g6, TI_FPREGS, %o3
-		brz,pn			%l6, 1f
-		 add			%g6, TI_FPREGS+0x40, %o4
-
-		membar			#Sync
-		ldda			[%o3 + %o2] ASI_BLK_P, %f0
-		ldda			[%o4 + %o2] ASI_BLK_P, %f16
-		membar			#Sync
-1:		andcc			%l2, FPRS_DU, %g0
-		be,pn			%icc, 1f
-		 wr			%g1, 0, %gsr
-		add			%o2, 0x80, %o2
-		membar			#Sync
-		ldda			[%o3 + %o2] ASI_BLK_P, %f32
-		ldda			[%o4 + %o2] ASI_BLK_P, %f48
-1:		membar			#Sync
-		ldx			[%o1 + %o5], %fsr
-2:		stb			%l5, [%g6 + TI_FPDEPTH]
-		ba,pt			%xcc, rt_continue
-		 nop
-5:		wr			%g0, FPRS_FEF, %fprs
-		sll			%o0, 8, %o2
-
-		add			%g6, TI_FPREGS+0x80, %o3
-		add			%g6, TI_FPREGS+0xc0, %o4
-		membar			#Sync
-		ldda			[%o3 + %o2] ASI_BLK_P, %f32
-		ldda			[%o4 + %o2] ASI_BLK_P, %f48
-		membar			#Sync
-		wr			%g0, FPRS_DU, %fprs
-		ba,pt			%xcc, rt_continue
-		 stb			%l5, [%g6 + TI_FPDEPTH]
diff --git a/arch/sparc/kernel/spiterrs.S b/arch/sparc/kernel/spiterrs.S
deleted file mode 100644
index 5427af44099ad12657678969fd273ad5b7319b0e..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/spiterrs.S
+++ /dev/null
@@ -1,241 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	/* We need to carefully read the error status, ACK the errors,
-	 * prevent recursive traps, and pass the information on to C
-	 * code for logging.
-	 *
-	 * We pass the AFAR in as-is, and we encode the status
-	 * information as described in asm-sparc64/sfafsr.h
-	 */
-	.type		__spitfire_access_error,#function
-__spitfire_access_error:
-	/* Disable ESTATE error reporting so that we do not take
-	 * recursive traps and RED state the processor.
-	 */
-	stxa		%g0, [%g0] ASI_ESTATE_ERROR_EN
-	membar		#Sync
-
-	mov		UDBE_UE, %g1
-	ldxa		[%g0] ASI_AFSR, %g4	! Get AFSR
-
-	/* __spitfire_cee_trap branches here with AFSR in %g4 and
-	 * UDBE_CE in %g1.  It only clears ESTATE_ERR_CE in the ESTATE
-	 * Error Enable register.
-	 */
-__spitfire_cee_trap_continue:
-	ldxa		[%g0] ASI_AFAR, %g5	! Get AFAR
-
-	rdpr		%tt, %g3
-	and		%g3, 0x1ff, %g3		! Paranoia
-	sllx		%g3, SFSTAT_TRAP_TYPE_SHIFT, %g3
-	or		%g4, %g3, %g4
-	rdpr		%tl, %g3
-	cmp		%g3, 1
-	mov		1, %g3
-	bleu		%xcc, 1f
-	 sllx		%g3, SFSTAT_TL_GT_ONE_SHIFT, %g3
-
-	or		%g4, %g3, %g4
-
-	/* Read in the UDB error register state, clearing the sticky
-	 * error bits as-needed.  We only clear them if the UE bit is
-	 * set.  Likewise, __spitfire_cee_trap below will only do so
-	 * if the CE bit is set.
-	 *
-	 * NOTE: UltraSparc-I/II have high and low UDB error
-	 *       registers, corresponding to the two UDB units
-	 *       present on those chips.  UltraSparc-IIi only
-	 *       has a single UDB, called "SDB" in the manual.
-	 *       For IIi the upper UDB register always reads
-	 *       as zero so for our purposes things will just
-	 *       work with the checks below.
-	 */
-1:	ldxa		[%g0] ASI_UDBH_ERROR_R, %g3
-	and		%g3, 0x3ff, %g7		! Paranoia
-	sllx		%g7, SFSTAT_UDBH_SHIFT, %g7
-	or		%g4, %g7, %g4
-	andcc		%g3, %g1, %g3		! UDBE_UE or UDBE_CE
-	be,pn		%xcc, 1f
-	 nop
-	stxa		%g3, [%g0] ASI_UDB_ERROR_W
-	membar		#Sync
-
-1:	mov		0x18, %g3
-	ldxa		[%g3] ASI_UDBL_ERROR_R, %g3
-	and		%g3, 0x3ff, %g7		! Paranoia
-	sllx		%g7, SFSTAT_UDBL_SHIFT, %g7
-	or		%g4, %g7, %g4
-	andcc		%g3, %g1, %g3		! UDBE_UE or UDBE_CE
-	be,pn		%xcc, 1f
-	 nop
-	mov		0x18, %g7
-	stxa		%g3, [%g7] ASI_UDB_ERROR_W
-	membar		#Sync
-
-1:	/* Ok, now that we've latched the error state, clear the
-	 * sticky bits in the AFSR.
-	 */
-	stxa		%g4, [%g0] ASI_AFSR
-	membar		#Sync
-
-	rdpr		%tl, %g2
-	cmp		%g2, 1
-	rdpr		%pil, %g2
-	bleu,pt		%xcc, 1f
-	 wrpr		%g0, PIL_NORMAL_MAX, %pil
-
-	ba,pt		%xcc, etraptl1
-	 rd		%pc, %g7
-
-	ba,a,pt		%xcc, 2f
-	 nop
-
-1:	ba,pt		%xcc, etrap_irq
-	 rd		%pc, %g7
-
-2:
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call	trace_hardirqs_off
-	 nop
-#endif
-	mov		%l4, %o1
-	mov		%l5, %o2
-	call		spitfire_access_error
-	 add		%sp, PTREGS_OFF, %o0
-	ba,a,pt		%xcc, rtrap
-	.size		__spitfire_access_error,.-__spitfire_access_error
-
-	/* This is the trap handler entry point for ECC correctable
-	 * errors.  They are corrected, but we listen for the trap so
-	 * that the event can be logged.
-	 *
-	 * Disrupting errors are either:
-	 * 1) single-bit ECC errors during UDB reads to system
-	 *    memory
-	 * 2) data parity errors during write-back events
-	 *
-	 * As far as I can make out from the manual, the CEE trap is
-	 * only for correctable errors during memory read accesses by
-	 * the front-end of the processor.
-	 *
-	 * The code below is only for trap level 1 CEE events, as it
-	 * is the only situation where we can safely record and log.
-	 * For trap level >1 we just clear the CE bit in the AFSR and
-	 * return.
-	 *
-	 * This is just like __spiftire_access_error above, but it
-	 * specifically handles correctable errors.  If an
-	 * uncorrectable error is indicated in the AFSR we will branch
-	 * directly above to __spitfire_access_error to handle it
-	 * instead.  Uncorrectable therefore takes priority over
-	 * correctable, and the error logging C code will notice this
-	 * case by inspecting the trap type.
-	 */
-	.type		__spitfire_cee_trap,#function
-__spitfire_cee_trap:
-	ldxa		[%g0] ASI_AFSR, %g4	! Get AFSR
-	mov		1, %g3
-	sllx		%g3, SFAFSR_UE_SHIFT, %g3
-	andcc		%g4, %g3, %g0		! Check for UE
-	bne,pn		%xcc, __spitfire_access_error
-	 nop
-
-	/* Ok, in this case we only have a correctable error.
-	 * Indicate we only wish to capture that state in register
-	 * %g1, and we only disable CE error reporting unlike UE
-	 * handling which disables all errors.
-	 */
-	ldxa		[%g0] ASI_ESTATE_ERROR_EN, %g3
-	andn		%g3, ESTATE_ERR_CE, %g3
-	stxa		%g3, [%g0] ASI_ESTATE_ERROR_EN
-	membar		#Sync
-
-	/* Preserve AFSR in %g4, indicate UDB state to capture in %g1 */
-	ba,pt		%xcc, __spitfire_cee_trap_continue
-	 mov		UDBE_CE, %g1
-	.size		__spitfire_cee_trap,.-__spitfire_cee_trap
-
-	.type		__spitfire_data_access_exception_tl1,#function
-__spitfire_data_access_exception_tl1:
-	rdpr		%pstate, %g4
-	wrpr		%g4, PSTATE_MG|PSTATE_AG, %pstate
-	mov		TLB_SFSR, %g3
-	mov		DMMU_SFAR, %g5
-	ldxa		[%g3] ASI_DMMU, %g4	! Get SFSR
-	ldxa		[%g5] ASI_DMMU, %g5	! Get SFAR
-	stxa		%g0, [%g3] ASI_DMMU	! Clear SFSR.FaultValid bit
-	membar		#Sync
-	rdpr		%tt, %g3
-	cmp		%g3, 0x80		! first win spill/fill trap
-	blu,pn		%xcc, 1f
-	 cmp		%g3, 0xff		! last win spill/fill trap
-	bgu,pn		%xcc, 1f
-	 nop
-	ba,pt		%xcc, winfix_dax
-	 rdpr		%tpc, %g3
-1:	sethi		%hi(109f), %g7
-	ba,pt		%xcc, etraptl1
-109:	 or		%g7, %lo(109b), %g7
-	mov		%l4, %o1
-	mov		%l5, %o2
-	call		spitfire_data_access_exception_tl1
-	 add		%sp, PTREGS_OFF, %o0
-	ba,a,pt		%xcc, rtrap
-	.size		__spitfire_data_access_exception_tl1,.-__spitfire_data_access_exception_tl1
-
-	.type		__spitfire_data_access_exception,#function
-__spitfire_data_access_exception:
-	rdpr		%pstate, %g4
-	wrpr		%g4, PSTATE_MG|PSTATE_AG, %pstate
-	mov		TLB_SFSR, %g3
-	mov		DMMU_SFAR, %g5
-	ldxa		[%g3] ASI_DMMU, %g4	! Get SFSR
-	ldxa		[%g5] ASI_DMMU, %g5	! Get SFAR
-	stxa		%g0, [%g3] ASI_DMMU	! Clear SFSR.FaultValid bit
-	membar		#Sync
-	sethi		%hi(109f), %g7
-	ba,pt		%xcc, etrap
-109:	 or		%g7, %lo(109b), %g7
-	mov		%l4, %o1
-	mov		%l5, %o2
-	call		spitfire_data_access_exception
-	 add		%sp, PTREGS_OFF, %o0
-	ba,a,pt		%xcc, rtrap
-	.size		__spitfire_data_access_exception,.-__spitfire_data_access_exception
-
-	.type		__spitfire_insn_access_exception_tl1,#function
-__spitfire_insn_access_exception_tl1:
-	rdpr		%pstate, %g4
-	wrpr		%g4, PSTATE_MG|PSTATE_AG, %pstate
-	mov		TLB_SFSR, %g3
-	ldxa		[%g3] ASI_IMMU, %g4	! Get SFSR
-	rdpr		%tpc, %g5		! IMMU has no SFAR, use TPC
-	stxa		%g0, [%g3] ASI_IMMU	! Clear FaultValid bit
-	membar		#Sync
-	sethi		%hi(109f), %g7
-	ba,pt		%xcc, etraptl1
-109:	 or		%g7, %lo(109b), %g7
-	mov		%l4, %o1
-	mov		%l5, %o2
-	call		spitfire_insn_access_exception_tl1
-	 add		%sp, PTREGS_OFF, %o0
-	ba,a,pt		%xcc, rtrap
-	.size		__spitfire_insn_access_exception_tl1,.-__spitfire_insn_access_exception_tl1
-
-	.type		__spitfire_insn_access_exception,#function
-__spitfire_insn_access_exception:
-	rdpr		%pstate, %g4
-	wrpr		%g4, PSTATE_MG|PSTATE_AG, %pstate
-	mov		TLB_SFSR, %g3
-	ldxa		[%g3] ASI_IMMU, %g4	! Get SFSR
-	rdpr		%tpc, %g5		! IMMU has no SFAR, use TPC
-	stxa		%g0, [%g3] ASI_IMMU	! Clear FaultValid bit
-	membar		#Sync
-	sethi		%hi(109f), %g7
-	ba,pt		%xcc, etrap
-109:	 or		%g7, %lo(109b), %g7
-	mov		%l4, %o1
-	mov		%l5, %o2
-	call		spitfire_insn_access_exception
-	 add		%sp, PTREGS_OFF, %o0
-	ba,a,pt		%xcc, rtrap
-	.size		__spitfire_insn_access_exception,.-__spitfire_insn_access_exception
diff --git a/arch/sparc/kernel/sun4v_ivec.S b/arch/sparc/kernel/sun4v_ivec.S
deleted file mode 100644
index 6478ef4f6a15a67b1ae681b2a11097dd56b1243d..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/sun4v_ivec.S
+++ /dev/null
@@ -1,357 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* sun4v_ivec.S: Sun4v interrupt vector handling.
- *
- * Copyright (C) 2006 <davem@davemloft.net>
- */
-
-#include <asm/cpudata.h>
-#include <asm/intr_queue.h>
-#include <asm/pil.h>
-
-	.text
-	.align	32
-
-sun4v_cpu_mondo:
-	/* Head offset in %g2, tail offset in %g4.
-	 * If they are the same, no work.
-	 */
-	mov	INTRQ_CPU_MONDO_HEAD, %g2
-	ldxa	[%g2] ASI_QUEUE, %g2
-	mov	INTRQ_CPU_MONDO_TAIL, %g4
-	ldxa	[%g4] ASI_QUEUE, %g4
-	cmp	%g2, %g4
-	be,pn	%xcc, sun4v_cpu_mondo_queue_empty
-	 nop
-
-	/* Get &trap_block[smp_processor_id()] into %g4.  */
-	ldxa	[%g0] ASI_SCRATCHPAD, %g4
-	sub	%g4, TRAP_PER_CPU_FAULT_INFO, %g4
-
-	/* Get smp_processor_id() into %g3 */
-	sethi	%hi(trap_block), %g5
-	or	%g5, %lo(trap_block), %g5
-	sub	%g4, %g5, %g3
-	srlx	%g3, TRAP_BLOCK_SZ_SHIFT, %g3
-
-	/* Increment cpu_mondo_counter[smp_processor_id()] */
-	sethi	%hi(cpu_mondo_counter), %g5
-	or	%g5, %lo(cpu_mondo_counter), %g5
-	sllx	%g3, 3, %g3
-	add	%g5, %g3, %g5
-	ldx	[%g5], %g3
-	add	%g3, 1, %g3
-	stx	%g3, [%g5]
-
-	/* Get CPU mondo queue base phys address into %g7.  */
-	ldx	[%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7
-
-	/* Now get the cross-call arguments and handler PC, same
-	 * layout as sun4u:
-	 *
-	 * 1st 64-bit word: low half is 32-bit PC, put into %g3 and jmpl to it
-	 *                  high half is context arg to MMU flushes, into %g5
-	 * 2nd 64-bit word: 64-bit arg, load into %g1
-	 * 3rd 64-bit word: 64-bit arg, load into %g7
-	 */
-	ldxa	[%g7 + %g2] ASI_PHYS_USE_EC, %g3
-	add	%g2, 0x8, %g2
-	srlx	%g3, 32, %g5
-	ldxa	[%g7 + %g2] ASI_PHYS_USE_EC, %g1
-	add	%g2, 0x8, %g2
-	srl	%g3, 0, %g3
-	ldxa	[%g7 + %g2] ASI_PHYS_USE_EC, %g7
-	add	%g2, 0x40 - 0x8 - 0x8, %g2
-
-	/* Update queue head pointer.  */
-	lduw	[%g4 + TRAP_PER_CPU_CPU_MONDO_QMASK], %g4
-	and	%g2, %g4, %g2
-
-	mov	INTRQ_CPU_MONDO_HEAD, %g4
-	stxa	%g2, [%g4] ASI_QUEUE
-	membar	#Sync
-
-	jmpl	%g3, %g0
-	 nop
-
-sun4v_cpu_mondo_queue_empty:
-	retry
-
-sun4v_dev_mondo:
-	/* Head offset in %g2, tail offset in %g4.  */
-	mov	INTRQ_DEVICE_MONDO_HEAD, %g2
-	ldxa	[%g2] ASI_QUEUE, %g2
-	mov	INTRQ_DEVICE_MONDO_TAIL, %g4
-	ldxa	[%g4] ASI_QUEUE, %g4
-	cmp	%g2, %g4
-	be,pn	%xcc, sun4v_dev_mondo_queue_empty
-	 nop
-
-	/* Get &trap_block[smp_processor_id()] into %g4.  */
-	ldxa	[%g0] ASI_SCRATCHPAD, %g4
-	sub	%g4, TRAP_PER_CPU_FAULT_INFO, %g4
-
-	/* Get DEV mondo queue base phys address into %g5.  */
-	ldx	[%g4 + TRAP_PER_CPU_DEV_MONDO_PA], %g5
-
-	/* Load IVEC into %g3.  */
-	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
-	add	%g2, 0x40, %g2
-
-	/* XXX There can be a full 64-byte block of data here.
-	 * XXX This is how we can get at MSI vector data.
-	 * XXX Current we do not capture this, but when we do we'll
-	 * XXX need to add a 64-byte storage area in the struct ino_bucket
-	 * XXX or the struct irq_desc.
-	 */
-
-	/* Update queue head pointer, this frees up some registers.  */
-	lduw	[%g4 + TRAP_PER_CPU_DEV_MONDO_QMASK], %g4
-	and	%g2, %g4, %g2
-
-	mov	INTRQ_DEVICE_MONDO_HEAD, %g4
-	stxa	%g2, [%g4] ASI_QUEUE
-	membar	#Sync
-
-	TRAP_LOAD_IRQ_WORK_PA(%g1, %g4)
-
-	/* For VIRQs, cookie is encoded as ~bucket_phys_addr  */
-	brlz,pt %g3, 1f
-	 xnor	%g3, %g0, %g4
-
-	/* Get __pa(&ivector_table[IVEC]) into %g4.  */
-	sethi	%hi(ivector_table_pa), %g4
-	ldx	[%g4 + %lo(ivector_table_pa)], %g4
-	sllx	%g3, 4, %g3
-	add	%g4, %g3, %g4
-
-1:	ldx	[%g1], %g2
-	stxa	%g2, [%g4] ASI_PHYS_USE_EC
-	stx	%g4, [%g1]
-
-	/* Signal the interrupt by setting (1 << pil) in %softint.  */
-	wr	%g0, 1 << PIL_DEVICE_IRQ, %set_softint
-
-sun4v_dev_mondo_queue_empty:
-	retry
-
-sun4v_res_mondo:
-	/* Head offset in %g2, tail offset in %g4.  */
-	mov	INTRQ_RESUM_MONDO_HEAD, %g2
-	ldxa	[%g2] ASI_QUEUE, %g2
-	mov	INTRQ_RESUM_MONDO_TAIL, %g4
-	ldxa	[%g4] ASI_QUEUE, %g4
-	cmp	%g2, %g4
-	be,pn	%xcc, sun4v_res_mondo_queue_empty
-	 nop
-
-	/* Get &trap_block[smp_processor_id()] into %g3.  */
-	ldxa	[%g0] ASI_SCRATCHPAD, %g3
-	sub	%g3, TRAP_PER_CPU_FAULT_INFO, %g3
-
-	/* Get RES mondo queue base phys address into %g5.  */
-	ldx	[%g3 + TRAP_PER_CPU_RESUM_MONDO_PA], %g5
-
-	/* Get RES kernel buffer base phys address into %g7.  */
-	ldx	[%g3 + TRAP_PER_CPU_RESUM_KBUF_PA], %g7
-
-	/* If the first word is non-zero, queue is full.  */
-	ldxa	[%g7 + %g2] ASI_PHYS_USE_EC, %g1
-	brnz,pn	%g1, sun4v_res_mondo_queue_full
-	 nop
-
-	lduw	[%g3 + TRAP_PER_CPU_RESUM_QMASK], %g4
-
-	/* Remember this entry's offset in %g1.  */
-	mov	%g2, %g1
-
-	/* Copy 64-byte queue entry into kernel buffer.  */
-	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
-	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
-	add	%g2, 0x08, %g2
-	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
-	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
-	add	%g2, 0x08, %g2
-	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
-	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
-	add	%g2, 0x08, %g2
-	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
-	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
-	add	%g2, 0x08, %g2
-	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
-	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
-	add	%g2, 0x08, %g2
-	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
-	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
-	add	%g2, 0x08, %g2
-	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
-	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
-	add	%g2, 0x08, %g2
-	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
-	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
-	add	%g2, 0x08, %g2
-
-	/* Update queue head pointer.  */
-	and	%g2, %g4, %g2
-
-	mov	INTRQ_RESUM_MONDO_HEAD, %g4
-	stxa	%g2, [%g4] ASI_QUEUE
-	membar	#Sync
-
-	/* Disable interrupts and save register state so we can call
-	 * C code.  The etrap handling will leave %g4 in %l4 for us
-	 * when it's done.
-	 */
-	rdpr	%pil, %g2
-	wrpr	%g0, PIL_NORMAL_MAX, %pil
-	mov	%g1, %g4
-	ba,pt	%xcc, etrap_irq
-	 rd	%pc, %g7
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call		trace_hardirqs_off
-	 nop
-#endif
-	/* Log the event.  */
-	add	%sp, PTREGS_OFF, %o0
-	call	sun4v_resum_error
-	 mov	%l4, %o1
-
-	/* Return from trap.  */
-	ba,pt	%xcc, rtrap_irq
-	 nop
-
-sun4v_res_mondo_queue_empty:
-	retry
-
-sun4v_res_mondo_queue_full:
-	/* The queue is full, consolidate our damage by setting
-	 * the head equal to the tail.  We'll just trap again otherwise.
-	 * Call C code to log the event.
-	 */
-	mov	INTRQ_RESUM_MONDO_HEAD, %g2
-	stxa	%g4, [%g2] ASI_QUEUE
-	membar	#Sync
-
-	rdpr	%pil, %g2
-	wrpr	%g0, PIL_NORMAL_MAX, %pil
-	ba,pt	%xcc, etrap_irq
-	 rd	%pc, %g7
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call		trace_hardirqs_off
-	 nop
-#endif
-	call	sun4v_resum_overflow
-	 add	%sp, PTREGS_OFF, %o0
-
-	ba,pt	%xcc, rtrap_irq
-	 nop
-
-sun4v_nonres_mondo:
-	/* Head offset in %g2, tail offset in %g4.  */
-	mov	INTRQ_NONRESUM_MONDO_HEAD, %g2
-	ldxa	[%g2] ASI_QUEUE, %g2
-	mov	INTRQ_NONRESUM_MONDO_TAIL, %g4
-	ldxa	[%g4] ASI_QUEUE, %g4
-	cmp	%g2, %g4
-	be,pn	%xcc, sun4v_nonres_mondo_queue_empty
-	 nop
-
-	/* Get &trap_block[smp_processor_id()] into %g3.  */
-	ldxa	[%g0] ASI_SCRATCHPAD, %g3
-	sub	%g3, TRAP_PER_CPU_FAULT_INFO, %g3
-
-	/* Get RES mondo queue base phys address into %g5.  */
-	ldx	[%g3 + TRAP_PER_CPU_NONRESUM_MONDO_PA], %g5
-
-	/* Get RES kernel buffer base phys address into %g7.  */
-	ldx	[%g3 + TRAP_PER_CPU_NONRESUM_KBUF_PA], %g7
-
-	/* If the first word is non-zero, queue is full.  */
-	ldxa	[%g7 + %g2] ASI_PHYS_USE_EC, %g1
-	brnz,pn	%g1, sun4v_nonres_mondo_queue_full
-	 nop
-
-	lduw	[%g3 + TRAP_PER_CPU_NONRESUM_QMASK], %g4
-
-	/* Remember this entry's offset in %g1.  */
-	mov	%g2, %g1
-
-	/* Copy 64-byte queue entry into kernel buffer.  */
-	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
-	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
-	add	%g2, 0x08, %g2
-	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
-	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
-	add	%g2, 0x08, %g2
-	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
-	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
-	add	%g2, 0x08, %g2
-	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
-	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
-	add	%g2, 0x08, %g2
-	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
-	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
-	add	%g2, 0x08, %g2
-	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
-	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
-	add	%g2, 0x08, %g2
-	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
-	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
-	add	%g2, 0x08, %g2
-	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
-	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
-	add	%g2, 0x08, %g2
-
-	/* Update queue head pointer.  */
-	and	%g2, %g4, %g2
-
-	mov	INTRQ_NONRESUM_MONDO_HEAD, %g4
-	stxa	%g2, [%g4] ASI_QUEUE
-	membar	#Sync
-
-	/* Disable interrupts and save register state so we can call
-	 * C code.  The etrap handling will leave %g4 in %l4 for us
-	 * when it's done.
-	 */
-	rdpr	%pil, %g2
-	wrpr	%g0, PIL_NORMAL_MAX, %pil
-	mov	%g1, %g4
-	ba,pt	%xcc, etrap_irq
-	 rd	%pc, %g7
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call		trace_hardirqs_off
-	 nop
-#endif
-	/* Log the event.  */
-	add	%sp, PTREGS_OFF, %o0
-	call	sun4v_nonresum_error
-	 mov	%l4, %o1
-
-	/* Return from trap.  */
-	ba,pt	%xcc, rtrap_irq
-	 nop
-
-sun4v_nonres_mondo_queue_empty:
-	retry
-
-sun4v_nonres_mondo_queue_full:
-	/* The queue is full, consolidate our damage by setting
-	 * the head equal to the tail.  We'll just trap again otherwise.
-	 * Call C code to log the event.
-	 */
-	mov	INTRQ_NONRESUM_MONDO_HEAD, %g2
-	stxa	%g4, [%g2] ASI_QUEUE
-	membar	#Sync
-
-	rdpr	%pil, %g2
-	wrpr	%g0, PIL_NORMAL_MAX, %pil
-	ba,pt	%xcc, etrap_irq
-	 rd	%pc, %g7
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call		trace_hardirqs_off
-	 nop
-#endif
-	call	sun4v_nonresum_overflow
-	 add	%sp, PTREGS_OFF, %o0
-
-	ba,pt	%xcc, rtrap_irq
-	 nop
diff --git a/arch/sparc/kernel/sun4v_mcd.S b/arch/sparc/kernel/sun4v_mcd.S
deleted file mode 100644
index a419b73184064ec94b0179a98b6a32083b3f2728..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/sun4v_mcd.S
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* sun4v_mcd.S: Sun4v memory corruption detected precise exception handler
- *
- * Copyright (c) 2015 Oracle and/or its affiliates. All rights reserved.
- * Authors: Bob Picco <bob.picco@oracle.com>,
- *          Khalid Aziz <khalid.aziz@oracle.com>
- */
-	.text
-	.align 32
-
-sun4v_mcd_detect_precise:
-	mov	%l4, %o1
-	mov 	%l5, %o2
-	call	sun4v_mem_corrupt_detect_precise
-	 add	%sp, PTREGS_OFF, %o0
-	ba,a,pt	%xcc, rtrap
-	 nop
diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S
deleted file mode 100644
index 7ac9f3367674812c0ad93b473015329be749cb2e..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/sun4v_tlb_miss.S
+++ /dev/null
@@ -1,437 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* sun4v_tlb_miss.S: Sun4v TLB miss handlers.
- *
- * Copyright (C) 2006 <davem@davemloft.net>
- */
-
-	.text
-	.align	32
-
-	/* Load ITLB fault information into VADDR and CTX, using BASE.  */
-#define LOAD_ITLB_INFO(BASE, VADDR, CTX) \
-	ldx	[BASE + HV_FAULT_I_ADDR_OFFSET], VADDR; \
-	ldx	[BASE + HV_FAULT_I_CTX_OFFSET], CTX;
-
-	/* Load DTLB fault information into VADDR and CTX, using BASE.  */
-#define LOAD_DTLB_INFO(BASE, VADDR, CTX) \
-	ldx	[BASE + HV_FAULT_D_ADDR_OFFSET], VADDR; \
-	ldx	[BASE + HV_FAULT_D_CTX_OFFSET], CTX;
-
-	/* DEST = (VADDR >> 22)
-	 *
-	 * Branch to ZERO_CTX_LABEL if context is zero.
-	 */
-#define	COMPUTE_TAG_TARGET(DEST, VADDR, CTX, ZERO_CTX_LABEL) \
-	srlx	VADDR, 22, DEST; \
-	brz,pn	CTX, ZERO_CTX_LABEL; \
-	 nop;
-
-	/* Create TSB pointer.  This is something like:
-	 *
-	 * index_mask = (512 << (tsb_reg & 0x7UL)) - 1UL;
-	 * tsb_base = tsb_reg & ~0x7UL;
-	 * tsb_index = ((vaddr >> HASH_SHIFT) & tsb_mask);
-	 * tsb_ptr = tsb_base + (tsb_index * 16);
-	 */
-#define COMPUTE_TSB_PTR(TSB_PTR, VADDR, HASH_SHIFT, TMP1, TMP2) \
-	and	TSB_PTR, 0x7, TMP1;			\
-	mov	512, TMP2;				\
-	andn	TSB_PTR, 0x7, TSB_PTR;			\
-	sllx	TMP2, TMP1, TMP2;			\
-	srlx	VADDR, HASH_SHIFT, TMP1;		\
-	sub	TMP2, 1, TMP2;				\
-	and	TMP1, TMP2, TMP1;			\
-	sllx	TMP1, 4, TMP1;				\
-	add	TSB_PTR, TMP1, TSB_PTR;
-
-sun4v_itlb_miss:
-	/* Load MMU Miss base into %g2.  */
-	ldxa	[%g0] ASI_SCRATCHPAD, %g2
-	
-	/* Load UTSB reg into %g1.  */
-	mov	SCRATCHPAD_UTSBREG1, %g1
-	ldxa	[%g1] ASI_SCRATCHPAD, %g1
-
-	LOAD_ITLB_INFO(%g2, %g4, %g5)
-	COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_itlb_4v)
-	COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g3, %g7)
-
-	/* Load TSB tag/pte into %g2/%g3 and compare the tag.  */
-	ldda	[%g1] ASI_QUAD_LDD_PHYS_4V, %g2
-	cmp	%g2, %g6
-	bne,a,pn %xcc, tsb_miss_page_table_walk
-	 mov	FAULT_CODE_ITLB, %g3
-	andcc	%g3, _PAGE_EXEC_4V, %g0
-	be,a,pn	%xcc, tsb_do_fault
-	 mov	FAULT_CODE_ITLB, %g3
-
-	/* We have a valid entry, make hypervisor call to load
-	 * I-TLB and return from trap.
-	 *
-	 * %g3:	PTE
-	 * %g4:	vaddr
-	 */
-sun4v_itlb_load:
-	ldxa	[%g0] ASI_SCRATCHPAD, %g6
-	mov	%o0, %g1		! save %o0
-	mov	%o1, %g2		! save %o1
-	mov	%o2, %g5		! save %o2
-	mov	%o3, %g7		! save %o3
-	mov	%g4, %o0		! vaddr
-	ldx	[%g6 + HV_FAULT_I_CTX_OFFSET], %o1	! ctx
-	mov	%g3, %o2		! PTE
-	mov	HV_MMU_IMMU, %o3	! flags
-	ta	HV_MMU_MAP_ADDR_TRAP
-	brnz,pn	%o0, sun4v_itlb_error
-	 mov	%g2, %o1		! restore %o1
-	mov	%g1, %o0		! restore %o0
-	mov	%g5, %o2		! restore %o2
-	mov	%g7, %o3		! restore %o3
-
-	retry
-
-sun4v_dtlb_miss:
-	/* Load MMU Miss base into %g2.  */
-	ldxa	[%g0] ASI_SCRATCHPAD, %g2
-	
-	/* Load UTSB reg into %g1.  */
-	mov	SCRATCHPAD_UTSBREG1, %g1
-	ldxa	[%g1] ASI_SCRATCHPAD, %g1
-
-	LOAD_DTLB_INFO(%g2, %g4, %g5)
-	COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_dtlb_4v)
-	COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g3, %g7)
-
-	/* Load TSB tag/pte into %g2/%g3 and compare the tag.  */
-	ldda	[%g1] ASI_QUAD_LDD_PHYS_4V, %g2
-	cmp	%g2, %g6
-	bne,a,pn %xcc, tsb_miss_page_table_walk
-	 mov	FAULT_CODE_DTLB, %g3
-
-	/* We have a valid entry, make hypervisor call to load
-	 * D-TLB and return from trap.
-	 *
-	 * %g3:	PTE
-	 * %g4:	vaddr
-	 */
-sun4v_dtlb_load:
-	ldxa	[%g0] ASI_SCRATCHPAD, %g6
-	mov	%o0, %g1		! save %o0
-	mov	%o1, %g2		! save %o1
-	mov	%o2, %g5		! save %o2
-	mov	%o3, %g7		! save %o3
-	mov	%g4, %o0		! vaddr
-	ldx	[%g6 + HV_FAULT_D_CTX_OFFSET], %o1	! ctx
-	mov	%g3, %o2		! PTE
-	mov	HV_MMU_DMMU, %o3	! flags
-	ta	HV_MMU_MAP_ADDR_TRAP
-	brnz,pn	%o0, sun4v_dtlb_error
-	 mov	%g2, %o1		! restore %o1
-	mov	%g1, %o0		! restore %o0
-	mov	%g5, %o2		! restore %o2
-	mov	%g7, %o3		! restore %o3
-
-	retry
-
-sun4v_dtlb_prot:
-	SET_GL(1)
-
-	/* Load MMU Miss base into %g5.  */
-	ldxa	[%g0] ASI_SCRATCHPAD, %g5
-	
-	ldx	[%g5 + HV_FAULT_D_ADDR_OFFSET], %g5
-	rdpr	%tl, %g1
-	cmp	%g1, 1
-	bgu,pn	%xcc, winfix_trampoline
-	 mov	FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4
-	ba,pt	%xcc, sparc64_realfault_common
-	 nop
-
-	/* Called from trap table:
-	 * %g4:	vaddr
-	 * %g5:	context
-	 * %g6: TAG TARGET
-	 */
-sun4v_itsb_miss:
-	mov	SCRATCHPAD_UTSBREG1, %g1
-	ldxa	[%g1] ASI_SCRATCHPAD, %g1
-	brz,pn	%g5, kvmap_itlb_4v
-	 mov	FAULT_CODE_ITLB, %g3
-	ba,a,pt	%xcc, sun4v_tsb_miss_common
-
-	/* Called from trap table:
-	 * %g4:	vaddr
-	 * %g5:	context
-	 * %g6: TAG TARGET
-	 */
-sun4v_dtsb_miss:
-	mov	SCRATCHPAD_UTSBREG1, %g1
-	ldxa	[%g1] ASI_SCRATCHPAD, %g1
-	brz,pn	%g5, kvmap_dtlb_4v
-	 mov	FAULT_CODE_DTLB, %g3
-
-	/* fallthrough */
-
-sun4v_tsb_miss_common:
-	COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g5, %g7)
-
-	sub	%g2, TRAP_PER_CPU_FAULT_INFO, %g2
-
-#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-	mov	SCRATCHPAD_UTSBREG2, %g5
-	ldxa	[%g5] ASI_SCRATCHPAD, %g5
-	cmp	%g5, -1
-	be,pt	%xcc, 80f
-	 nop
-	COMPUTE_TSB_PTR(%g5, %g4, REAL_HPAGE_SHIFT, %g2, %g7)
-
-	/* That clobbered %g2, reload it.  */
-	ldxa	[%g0] ASI_SCRATCHPAD, %g2
-	sub	%g2, TRAP_PER_CPU_FAULT_INFO, %g2
-
-80:	stx	%g5, [%g2 + TRAP_PER_CPU_TSB_HUGE_TEMP]
-#endif
-
-	ba,pt	%xcc, tsb_miss_page_table_walk_sun4v_fastpath
-	 ldx	[%g2 + TRAP_PER_CPU_PGD_PADDR], %g7
-
-sun4v_itlb_error:
-	rdpr	%tl, %g1
-	cmp	%g1, 1
-	ble,pt	%icc, sun4v_bad_ra
-	 or	%g0, FAULT_CODE_BAD_RA | FAULT_CODE_ITLB, %g1
-
-	sethi	%hi(sun4v_err_itlb_vaddr), %g1
-	stx	%g4, [%g1 + %lo(sun4v_err_itlb_vaddr)]
-	sethi	%hi(sun4v_err_itlb_ctx), %g1
-	ldxa	[%g0] ASI_SCRATCHPAD, %g6
-	ldx	[%g6 + HV_FAULT_I_CTX_OFFSET], %o1
-	stx	%o1, [%g1 + %lo(sun4v_err_itlb_ctx)]
-	sethi	%hi(sun4v_err_itlb_pte), %g1
-	stx	%g3, [%g1 + %lo(sun4v_err_itlb_pte)]
-	sethi	%hi(sun4v_err_itlb_error), %g1
-	stx	%o0, [%g1 + %lo(sun4v_err_itlb_error)]
-
-	sethi	%hi(1f), %g7
-	rdpr	%tl, %g4
-	ba,pt	%xcc, etraptl1
-1:	 or	%g7, %lo(1f), %g7
-	mov	%l4, %o1
-	call	sun4v_itlb_error_report
-	 add	%sp, PTREGS_OFF, %o0
-
-	/* NOTREACHED */
-
-sun4v_dtlb_error:
-	rdpr	%tl, %g1
-	cmp	%g1, 1
-	ble,pt	%icc, sun4v_bad_ra
-	 or	%g0, FAULT_CODE_BAD_RA | FAULT_CODE_DTLB, %g1
-
-	sethi	%hi(sun4v_err_dtlb_vaddr), %g1
-	stx	%g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)]
-	sethi	%hi(sun4v_err_dtlb_ctx), %g1
-	ldxa	[%g0] ASI_SCRATCHPAD, %g6
-	ldx	[%g6 + HV_FAULT_D_CTX_OFFSET], %o1
-	stx	%o1, [%g1 + %lo(sun4v_err_dtlb_ctx)]
-	sethi	%hi(sun4v_err_dtlb_pte), %g1
-	stx	%g3, [%g1 + %lo(sun4v_err_dtlb_pte)]
-	sethi	%hi(sun4v_err_dtlb_error), %g1
-	stx	%o0, [%g1 + %lo(sun4v_err_dtlb_error)]
-
-	sethi	%hi(1f), %g7
-	rdpr	%tl, %g4
-	ba,pt	%xcc, etraptl1
-1:	 or	%g7, %lo(1f), %g7
-	mov	%l4, %o1
-	call	sun4v_dtlb_error_report
-	 add	%sp, PTREGS_OFF, %o0
-
-	/* NOTREACHED */
-
-sun4v_bad_ra:
-	or	%g0, %g4, %g5
-	ba,pt	%xcc, sparc64_realfault_common
-	 or	%g1, %g0, %g4
-
-	/* NOTREACHED */
-
-	/* Instruction Access Exception, tl0. */
-sun4v_iacc:
-	ldxa	[%g0] ASI_SCRATCHPAD, %g2
-	ldx	[%g2 + HV_FAULT_I_TYPE_OFFSET], %g3
-	ldx	[%g2 + HV_FAULT_I_ADDR_OFFSET], %g4
-	ldx	[%g2 + HV_FAULT_I_CTX_OFFSET], %g5
-	sllx	%g3, 16, %g3
-	or	%g5, %g3, %g5
-	ba,pt	%xcc, etrap
-	 rd	%pc, %g7
-	mov	%l4, %o1
-	mov	%l5, %o2
-	call	sun4v_insn_access_exception
-	 add	%sp, PTREGS_OFF, %o0
-	ba,a,pt	%xcc, rtrap
-
-	/* Instruction Access Exception, tl1. */
-sun4v_iacc_tl1:
-	ldxa	[%g0] ASI_SCRATCHPAD, %g2
-	ldx	[%g2 + HV_FAULT_I_TYPE_OFFSET], %g3
-	ldx	[%g2 + HV_FAULT_I_ADDR_OFFSET], %g4
-	ldx	[%g2 + HV_FAULT_I_CTX_OFFSET], %g5
-	sllx	%g3, 16, %g3
-	or	%g5, %g3, %g5
-	ba,pt	%xcc, etraptl1
-	 rd	%pc, %g7
-	mov	%l4, %o1
-	mov	%l5, %o2
-	call	sun4v_insn_access_exception_tl1
-	 add	%sp, PTREGS_OFF, %o0
-	ba,a,pt	%xcc, rtrap
-
-	/* Data Access Exception, tl0. */
-sun4v_dacc:
-	ldxa	[%g0] ASI_SCRATCHPAD, %g2
-	ldx	[%g2 + HV_FAULT_D_TYPE_OFFSET], %g3
-	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
-	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5
-	sllx	%g3, 16, %g3
-	or	%g5, %g3, %g5
-	ba,pt	%xcc, etrap
-	 rd	%pc, %g7
-	mov	%l4, %o1
-	mov	%l5, %o2
-	call	sun4v_data_access_exception
-	 add	%sp, PTREGS_OFF, %o0
-	ba,a,pt	%xcc, rtrap
-
-	/* Data Access Exception, tl1. */
-sun4v_dacc_tl1:
-	ldxa	[%g0] ASI_SCRATCHPAD, %g2
-	ldx	[%g2 + HV_FAULT_D_TYPE_OFFSET], %g3
-	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
-	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5
-	sllx	%g3, 16, %g3
-	or	%g5, %g3, %g5
-	ba,pt	%xcc, etraptl1
-	 rd	%pc, %g7
-	mov	%l4, %o1
-	mov	%l5, %o2
-	call	sun4v_data_access_exception_tl1
-	 add	%sp, PTREGS_OFF, %o0
-	ba,a,pt	%xcc, rtrap
-
-	/* Memory Address Unaligned.  */
-sun4v_mna:
-	/* Window fixup? */
-	rdpr	%tl, %g2
-	cmp	%g2, 1
-	ble,pt	%icc, 1f
-	 nop
-
-	SET_GL(1)
-	ldxa	[%g0] ASI_SCRATCHPAD, %g2
-	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g5
-	mov	HV_FAULT_TYPE_UNALIGNED, %g3
-	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g4
-	sllx	%g3, 16, %g3
-	or	%g4, %g3, %g4
-	ba,pt	%xcc, winfix_mna
-	 rdpr	%tpc, %g3
-	/* not reached */
-
-1:	ldxa	[%g0] ASI_SCRATCHPAD, %g2
-	mov	HV_FAULT_TYPE_UNALIGNED, %g3
-	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
-	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5
-	sllx	%g3, 16, %g3
-	or	%g5, %g3, %g5
-
-	ba,pt	%xcc, etrap
-	 rd	%pc, %g7
-	mov	%l4, %o1
-	mov	%l5, %o2
-	call	sun4v_do_mna
-	 add	%sp, PTREGS_OFF, %o0
-	ba,a,pt	%xcc, rtrap
-	 nop
-
-	/* Privileged Action.  */
-sun4v_privact:
-	ba,pt	%xcc, etrap
-	 rd	%pc, %g7
-	call	do_privact
-	 add	%sp, PTREGS_OFF, %o0
-	ba,a,pt	%xcc, rtrap
-
-	/* Unaligned ldd float, tl0. */
-sun4v_lddfmna:
-	ldxa	[%g0] ASI_SCRATCHPAD, %g2
-	ldx	[%g2 + HV_FAULT_D_TYPE_OFFSET], %g3
-	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
-	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5
-	sllx	%g3, 16, %g3
-	or	%g5, %g3, %g5
-	ba,pt	%xcc, etrap
-	 rd	%pc, %g7
-	mov	%l4, %o1
-	mov	%l5, %o2
-	call	handle_lddfmna
-	 add	%sp, PTREGS_OFF, %o0
-	ba,a,pt	%xcc, rtrap
-
-	/* Unaligned std float, tl0. */
-sun4v_stdfmna:
-	ldxa	[%g0] ASI_SCRATCHPAD, %g2
-	ldx	[%g2 + HV_FAULT_D_TYPE_OFFSET], %g3
-	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
-	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5
-	sllx	%g3, 16, %g3
-	or	%g5, %g3, %g5
-	ba,pt	%xcc, etrap
-	 rd	%pc, %g7
-	mov	%l4, %o1
-	mov	%l5, %o2
-	call	handle_stdfmna
-	 add	%sp, PTREGS_OFF, %o0
-	ba,a,pt	%xcc, rtrap
-
-#define BRANCH_ALWAYS	0x10680000
-#define NOP		0x01000000
-#define SUN4V_DO_PATCH(OLD, NEW)	\
-	sethi	%hi(NEW), %g1; \
-	or	%g1, %lo(NEW), %g1; \
-	sethi	%hi(OLD), %g2; \
-	or	%g2, %lo(OLD), %g2; \
-	sub	%g1, %g2, %g1; \
-	sethi	%hi(BRANCH_ALWAYS), %g3; \
-	sll	%g1, 11, %g1; \
-	srl	%g1, 11 + 2, %g1; \
-	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
-	or	%g3, %g1, %g3; \
-	stw	%g3, [%g2]; \
-	sethi	%hi(NOP), %g3; \
-	or	%g3, %lo(NOP), %g3; \
-	stw	%g3, [%g2 + 0x4]; \
-	flush	%g2;
-
-	.globl	sun4v_patch_tlb_handlers
-	.type	sun4v_patch_tlb_handlers,#function
-sun4v_patch_tlb_handlers:
-	SUN4V_DO_PATCH(tl0_iamiss, sun4v_itlb_miss)
-	SUN4V_DO_PATCH(tl1_iamiss, sun4v_itlb_miss)
-	SUN4V_DO_PATCH(tl0_damiss, sun4v_dtlb_miss)
-	SUN4V_DO_PATCH(tl1_damiss, sun4v_dtlb_miss)
-	SUN4V_DO_PATCH(tl0_daprot, sun4v_dtlb_prot)
-	SUN4V_DO_PATCH(tl1_daprot, sun4v_dtlb_prot)
-	SUN4V_DO_PATCH(tl0_iax, sun4v_iacc)
-	SUN4V_DO_PATCH(tl1_iax, sun4v_iacc_tl1)
-	SUN4V_DO_PATCH(tl0_dax, sun4v_dacc)
-	SUN4V_DO_PATCH(tl1_dax, sun4v_dacc_tl1)
-	SUN4V_DO_PATCH(tl0_mna, sun4v_mna)
-	SUN4V_DO_PATCH(tl1_mna, sun4v_mna)
-	SUN4V_DO_PATCH(tl0_lddfmna, sun4v_lddfmna)
-	SUN4V_DO_PATCH(tl0_stdfmna, sun4v_stdfmna)
-	SUN4V_DO_PATCH(tl0_privact, sun4v_privact)
-	retl
-	 nop
-	.size	sun4v_patch_tlb_handlers,.-sun4v_patch_tlb_handlers
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
deleted file mode 100644
index 489ffab918a835a02df9dbf62f69fc84cdcd0f85..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/sys32.S
+++ /dev/null
@@ -1,241 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * sys32.S: I-cache tricks for 32-bit compatibility layer simple
- *          conversions.
- *
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
- * Copyright (C) 1998 Jakub Jelinek   (jj@ultra.linux.cz)
- */
-
-#include <asm/errno.h>
-
-/* NOTE: call as jump breaks return stack, we have to avoid that */
-
-	.text
-
-	.globl		sys32_mmap2
-sys32_mmap2:
-	sethi		%hi(sys_mmap), %g1
-	jmpl		%g1 + %lo(sys_mmap), %g0
-	 sllx		%o5, 12, %o5
-
-	.align		32
-	.globl		sys32_socketcall
-sys32_socketcall:	/* %o0=call, %o1=args */
-	cmp		%o0, 1
-	bl,pn		%xcc, do_einval
-	 cmp		%o0, 18
-	bg,pn		%xcc, do_einval
-	 sub		%o0, 1, %o0
-	sllx		%o0, 5, %o0
-	sethi		%hi(__socketcall_table_begin), %g2
-	or		%g2, %lo(__socketcall_table_begin), %g2
-	jmpl		%g2 + %o0, %g0
-	 nop
-do_einval:
-	retl
-	 mov		-EINVAL, %o0
-
-	.align		32
-__socketcall_table_begin:
-
-	/* Each entry is exactly 32 bytes. */
-do_sys_socket: /* sys_socket(int, int, int) */
-1:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(sys_socket), %g1
-2:	ldswa		[%o1 + 0x8] %asi, %o2
-	jmpl		%g1 + %lo(sys_socket), %g0
-3:	 ldswa		[%o1 + 0x4] %asi, %o1
-	nop
-	nop
-	nop
-do_sys_bind: /* sys_bind(int fd, struct sockaddr *, int) */
-4:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(sys_bind), %g1
-5:	ldswa		[%o1 + 0x8] %asi, %o2
-	jmpl		%g1 + %lo(sys_bind), %g0
-6:	 lduwa		[%o1 + 0x4] %asi, %o1
-	nop
-	nop
-	nop
-do_sys_connect: /* sys_connect(int, struct sockaddr *, int) */
-7:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(sys_connect), %g1
-8:	ldswa		[%o1 + 0x8] %asi, %o2
-	jmpl		%g1 + %lo(sys_connect), %g0
-9:	 lduwa		[%o1 + 0x4] %asi, %o1
-	nop
-	nop
-	nop
-do_sys_listen: /* sys_listen(int, int) */
-10:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(sys_listen), %g1
-	jmpl		%g1 + %lo(sys_listen), %g0
-11:	 ldswa		[%o1 + 0x4] %asi, %o1
-	nop
-	nop
-	nop
-	nop
-do_sys_accept: /* sys_accept(int, struct sockaddr *, int *) */
-12:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(sys_accept), %g1
-13:	lduwa		[%o1 + 0x8] %asi, %o2
-	jmpl		%g1 + %lo(sys_accept), %g0
-14:	 lduwa		[%o1 + 0x4] %asi, %o1
-	nop
-	nop
-	nop
-do_sys_getsockname: /* sys_getsockname(int, struct sockaddr *, int *) */
-15:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(sys_getsockname), %g1
-16:	lduwa		[%o1 + 0x8] %asi, %o2
-	jmpl		%g1 + %lo(sys_getsockname), %g0
-17:	 lduwa		[%o1 + 0x4] %asi, %o1
-	nop
-	nop
-	nop
-do_sys_getpeername: /* sys_getpeername(int, struct sockaddr *, int *) */
-18:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(sys_getpeername), %g1
-19:	lduwa		[%o1 + 0x8] %asi, %o2
-	jmpl		%g1 + %lo(sys_getpeername), %g0
-20:	 lduwa		[%o1 + 0x4] %asi, %o1
-	nop
-	nop
-	nop
-do_sys_socketpair: /* sys_socketpair(int, int, int, int *) */
-21:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(sys_socketpair), %g1
-22:	ldswa		[%o1 + 0x8] %asi, %o2
-23:	lduwa		[%o1 + 0xc] %asi, %o3
-	jmpl		%g1 + %lo(sys_socketpair), %g0
-24:	 ldswa		[%o1 + 0x4] %asi, %o1
-	nop
-	nop
-do_sys_send: /* sys_send(int, void *, size_t, unsigned int) */
-25:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(sys_send), %g1
-26:	lduwa		[%o1 + 0x8] %asi, %o2
-27:	lduwa		[%o1 + 0xc] %asi, %o3
-	jmpl		%g1 + %lo(sys_send), %g0
-28:	 lduwa		[%o1 + 0x4] %asi, %o1
-	nop
-	nop
-do_sys_recv: /* sys_recv(int, void *, size_t, unsigned int) */
-29:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(sys_recv), %g1
-30:	lduwa		[%o1 + 0x8] %asi, %o2
-31:	lduwa		[%o1 + 0xc] %asi, %o3
-	jmpl		%g1 + %lo(sys_recv), %g0
-32:	 lduwa		[%o1 + 0x4] %asi, %o1
-	nop
-	nop
-do_sys_sendto: /* sys_sendto(int, u32, compat_size_t, unsigned int, u32, int) */
-33:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(sys_sendto), %g1
-34:	lduwa		[%o1 + 0x8] %asi, %o2
-35:	lduwa		[%o1 + 0xc] %asi, %o3
-36:	lduwa		[%o1 + 0x10] %asi, %o4
-37:	ldswa		[%o1 + 0x14] %asi, %o5
-	jmpl		%g1 + %lo(sys_sendto), %g0
-38:	 lduwa		[%o1 + 0x4] %asi, %o1
-do_sys_recvfrom: /* sys_recvfrom(int, u32, compat_size_t, unsigned int, u32, u32) */
-39:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(sys_recvfrom), %g1
-40:	lduwa		[%o1 + 0x8] %asi, %o2
-41:	lduwa		[%o1 + 0xc] %asi, %o3
-42:	lduwa		[%o1 + 0x10] %asi, %o4
-43:	lduwa		[%o1 + 0x14] %asi, %o5
-	jmpl		%g1 + %lo(sys_recvfrom), %g0
-44:	 lduwa		[%o1 + 0x4] %asi, %o1
-do_sys_shutdown: /* sys_shutdown(int, int) */
-45:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(sys_shutdown), %g1
-	jmpl		%g1 + %lo(sys_shutdown), %g0
-46:	 ldswa		[%o1 + 0x4] %asi, %o1
-	nop
-	nop
-	nop
-	nop
-do_sys_setsockopt: /* compat_sys_setsockopt(int, int, int, char *, int) */
-47:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(compat_sys_setsockopt), %g1
-48:	ldswa		[%o1 + 0x8] %asi, %o2
-49:	lduwa		[%o1 + 0xc] %asi, %o3
-50:	ldswa		[%o1 + 0x10] %asi, %o4
-	jmpl		%g1 + %lo(compat_sys_setsockopt), %g0
-51:	 ldswa		[%o1 + 0x4] %asi, %o1
-	nop
-do_sys_getsockopt: /* compat_sys_getsockopt(int, int, int, u32, u32) */
-52:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(compat_sys_getsockopt), %g1
-53:	ldswa		[%o1 + 0x8] %asi, %o2
-54:	lduwa		[%o1 + 0xc] %asi, %o3
-55:	lduwa		[%o1 + 0x10] %asi, %o4
-	jmpl		%g1 + %lo(compat_sys_getsockopt), %g0
-56:	 ldswa		[%o1 + 0x4] %asi, %o1
-	nop
-do_sys_sendmsg: /* compat_sys_sendmsg(int, struct compat_msghdr *, unsigned int) */
-57:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(compat_sys_sendmsg), %g1
-58:	lduwa		[%o1 + 0x8] %asi, %o2
-	jmpl		%g1 + %lo(compat_sys_sendmsg), %g0
-59:	 lduwa		[%o1 + 0x4] %asi, %o1
-	nop
-	nop
-	nop
-do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int) */
-60:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(compat_sys_recvmsg), %g1
-61:	lduwa		[%o1 + 0x8] %asi, %o2
-	jmpl		%g1 + %lo(compat_sys_recvmsg), %g0
-62:	 lduwa		[%o1 + 0x4] %asi, %o1
-	nop
-	nop
-	nop
-do_sys_accept4: /* sys_accept4(int, struct sockaddr *, int *, int) */
-63:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(sys_accept4), %g1
-64:	lduwa		[%o1 + 0x8] %asi, %o2
-65:	ldswa		[%o1 + 0xc] %asi, %o3
-	jmpl		%g1 + %lo(sys_accept4), %g0
-66:	 lduwa		[%o1 + 0x4] %asi, %o1
-	nop
-	nop
-
-	.section	__ex_table,"a"
-	.align		4
-	.word		1b, __retl_efault, 2b, __retl_efault
-	.word		3b, __retl_efault, 4b, __retl_efault
-	.word		5b, __retl_efault, 6b, __retl_efault
-	.word		7b, __retl_efault, 8b, __retl_efault
-	.word		9b, __retl_efault, 10b, __retl_efault
-	.word		11b, __retl_efault, 12b, __retl_efault
-	.word		13b, __retl_efault, 14b, __retl_efault
-	.word		15b, __retl_efault, 16b, __retl_efault
-	.word		17b, __retl_efault, 18b, __retl_efault
-	.word		19b, __retl_efault, 20b, __retl_efault
-	.word		21b, __retl_efault, 22b, __retl_efault
-	.word		23b, __retl_efault, 24b, __retl_efault
-	.word		25b, __retl_efault, 26b, __retl_efault
-	.word		27b, __retl_efault, 28b, __retl_efault
-	.word		29b, __retl_efault, 30b, __retl_efault
-	.word		31b, __retl_efault, 32b, __retl_efault
-	.word		33b, __retl_efault, 34b, __retl_efault
-	.word		35b, __retl_efault, 36b, __retl_efault
-	.word		37b, __retl_efault, 38b, __retl_efault
-	.word		39b, __retl_efault, 40b, __retl_efault
-	.word		41b, __retl_efault, 42b, __retl_efault
-	.word		43b, __retl_efault, 44b, __retl_efault
-	.word		45b, __retl_efault, 46b, __retl_efault
-	.word		47b, __retl_efault, 48b, __retl_efault
-	.word		49b, __retl_efault, 50b, __retl_efault
-	.word		51b, __retl_efault, 52b, __retl_efault
-	.word		53b, __retl_efault, 54b, __retl_efault
-	.word		55b, __retl_efault, 56b, __retl_efault
-	.word		57b, __retl_efault, 58b, __retl_efault
-	.word		59b, __retl_efault, 60b, __retl_efault
-	.word		61b, __retl_efault, 62b, __retl_efault
-	.word		63b, __retl_efault, 64b, __retl_efault
-	.word		65b, __retl_efault, 66b, __retl_efault
-	.previous
diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S
deleted file mode 100644
index db42b4fb370844b44576d322717b928a5a7ec22a..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/syscalls.S
+++ /dev/null
@@ -1,298 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	/* SunOS's execv() call only specifies the argv argument, the
-	 * environment settings are the same as the calling processes.
-	 */
-sys64_execve:
-	set	sys_execve, %g1
-	jmpl	%g1, %g0
-	 flushw
-
-sys64_execveat:
-	set	sys_execveat, %g1
-	jmpl	%g1, %g0
-	 flushw
-
-#ifdef CONFIG_COMPAT
-sunos_execv:
-	mov	%g0, %o2
-sys32_execve:
-	set	compat_sys_execve, %g1
-	jmpl	%g1, %g0
-	 flushw
-
-sys32_execveat:
-	set	compat_sys_execveat, %g1
-	jmpl	%g1, %g0
-	 flushw
-#endif
-
-	.align	32
-#ifdef CONFIG_COMPAT
-sys32_sigstack:
-	ba,pt	%xcc, do_sys32_sigstack
-	 mov	%i6, %o2
-#endif
-	.align	32
-#ifdef CONFIG_COMPAT
-sys32_sigreturn:
-	add	%sp, PTREGS_OFF, %o0
-	call	do_sigreturn32
-	 add	%o7, 1f-.-4, %o7
-	nop
-#endif
-sys_rt_sigreturn:
-	add	%sp, PTREGS_OFF, %o0
-	call	do_rt_sigreturn
-	 add	%o7, 1f-.-4, %o7
-	nop
-#ifdef CONFIG_COMPAT
-sys32_rt_sigreturn:
-	add	%sp, PTREGS_OFF, %o0
-	call	do_rt_sigreturn32
-	 add	%o7, 1f-.-4, %o7
-	nop
-#endif
-	.align	32
-1:	ldx	[%g6 + TI_FLAGS], %l5
-	andcc	%l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT|_TIF_NOHZ), %g0
-	be,pt	%icc, rtrap
-	 nop
-	call	syscall_trace_leave
-	 add	%sp, PTREGS_OFF, %o0
-	ba,pt	%xcc, rtrap
-	 nop
-
-	/* This is how fork() was meant to be done, 8 instruction entry.
-	 *
-	 * I questioned the following code briefly, let me clear things
-	 * up so you must not reason on it like I did.
-	 *
-	 * Know the fork_kpsr etc. we use in the sparc32 port?  We don't
-	 * need it here because the only piece of window state we copy to
-	 * the child is the CWP register.  Even if the parent sleeps,
-	 * we are safe because we stuck it into pt_regs of the parent
-	 * so it will not change.
-	 *
-	 * XXX This raises the question, whether we can do the same on
-	 * XXX sparc32 to get rid of fork_kpsr _and_ fork_kwim.  The
-	 * XXX answer is yes.  We stick fork_kpsr in UREG_G0 and
-	 * XXX fork_kwim in UREG_G1 (global registers are considered
-	 * XXX volatile across a system call in the sparc ABI I think
-	 * XXX if it isn't we can use regs->y instead, anyone who depends
-	 * XXX upon the Y register being preserved across a fork deserves
-	 * XXX to lose).
-	 *
-	 * In fact we should take advantage of that fact for other things
-	 * during system calls...
-	 */
-	.align	32
-sys_vfork: /* Under Linux, vfork and fork are just special cases of clone. */
-	sethi	%hi(0x4000 | 0x0100 | SIGCHLD), %o0
-	or	%o0, %lo(0x4000 | 0x0100 | SIGCHLD), %o0
-	ba,pt	%xcc, sys_clone
-sys_fork:
-	 clr	%o1
-	mov	SIGCHLD, %o0
-sys_clone:
-	flushw
-	movrz	%o1, %fp, %o1
-	mov	0, %o3
-	ba,pt	%xcc, sparc_do_fork
-	 add	%sp, PTREGS_OFF, %o2
-
-	.globl	ret_from_fork
-ret_from_fork:
-	/* Clear current_thread_info()->new_child. */
-	stb	%g0, [%g6 + TI_NEW_CHILD]
-	call	schedule_tail
-	 mov	%g7, %o0
-	ldx	[%sp + PTREGS_OFF + PT_V9_I0], %o0
-	brnz,pt	%o0, ret_sys_call
-	 ldx	[%g6 + TI_FLAGS], %l0
-	ldx	[%sp + PTREGS_OFF + PT_V9_G1], %l1
-	call	%l1
-	 ldx	[%sp + PTREGS_OFF + PT_V9_G2], %o0
-	ba,pt	%xcc, ret_sys_call
-	 mov	0, %o0
-
-	.globl	sparc_exit_group
-	.type	sparc_exit_group,#function
-sparc_exit_group:
-	sethi	%hi(sys_exit_group), %g7
-	ba,pt	%xcc, 1f
-	 or	%g7, %lo(sys_exit_group), %g7
-	.size	sparc_exit_group,.-sparc_exit_group
-
-	.globl	sparc_exit
-	.type	sparc_exit,#function
-sparc_exit:
-	sethi	%hi(sys_exit), %g7
-	or	%g7, %lo(sys_exit), %g7
-1:	rdpr	%pstate, %g2
-	wrpr	%g2, PSTATE_IE, %pstate
-	rdpr	%otherwin, %g1
-	rdpr	%cansave, %g3
-	add	%g3, %g1, %g3
-	wrpr	%g3, 0x0, %cansave
-	wrpr	%g0, 0x0, %otherwin
-	wrpr	%g2, 0x0, %pstate
-	jmpl	%g7, %g0
-	 stb	%g0, [%g6 + TI_WSAVED]
-	.size	sparc_exit,.-sparc_exit
-
-linux_sparc_ni_syscall:
-	sethi	%hi(sys_ni_syscall), %l7
-	ba,pt	%xcc, 4f
-	 or	%l7, %lo(sys_ni_syscall), %l7
-
-linux_syscall_trace32:
-	call	syscall_trace_enter
-	 add	%sp, PTREGS_OFF, %o0
-	brnz,pn	%o0, 3f
-	 mov	-ENOSYS, %o0
-
-	/* Syscall tracing can modify the registers.  */
-	ldx	[%sp + PTREGS_OFF + PT_V9_G1], %g1
-	sethi	%hi(sys_call_table32), %l7
-	ldx	[%sp + PTREGS_OFF + PT_V9_I0], %i0
-	or	%l7, %lo(sys_call_table32), %l7
-	ldx	[%sp + PTREGS_OFF + PT_V9_I1], %i1
-	ldx	[%sp + PTREGS_OFF + PT_V9_I2], %i2
-	ldx	[%sp + PTREGS_OFF + PT_V9_I3], %i3
-	ldx	[%sp + PTREGS_OFF + PT_V9_I4], %i4
-	ldx	[%sp + PTREGS_OFF + PT_V9_I5], %i5
-
-	cmp	%g1, NR_syscalls
-	bgeu,pn	%xcc, 3f
-	 mov	-ENOSYS, %o0
-
-	sll	%g1, 2, %l4
-	srl	%i0, 0, %o0
-	lduw	[%l7 + %l4], %l7
-	srl	%i4, 0, %o4
-	srl	%i1, 0, %o1
-	srl	%i2, 0, %o2
-	ba,pt	%xcc, 5f
-	 srl	%i3, 0, %o3
-
-linux_syscall_trace:
-	call	syscall_trace_enter
-	 add	%sp, PTREGS_OFF, %o0
-	brnz,pn	%o0, 3f
-	 mov	-ENOSYS, %o0
-
-	/* Syscall tracing can modify the registers.  */
-	ldx	[%sp + PTREGS_OFF + PT_V9_G1], %g1
-	sethi	%hi(sys_call_table64), %l7
-	ldx	[%sp + PTREGS_OFF + PT_V9_I0], %i0
-	or	%l7, %lo(sys_call_table64), %l7
-	ldx	[%sp + PTREGS_OFF + PT_V9_I1], %i1
-	ldx	[%sp + PTREGS_OFF + PT_V9_I2], %i2
-	ldx	[%sp + PTREGS_OFF + PT_V9_I3], %i3
-	ldx	[%sp + PTREGS_OFF + PT_V9_I4], %i4
-	ldx	[%sp + PTREGS_OFF + PT_V9_I5], %i5
-
-	cmp	%g1, NR_syscalls
-	bgeu,pn	%xcc, 3f
-	 mov	-ENOSYS, %o0
-
-	sll	%g1, 2, %l4
-	mov	%i0, %o0
-	lduw	[%l7 + %l4], %l7
-	mov	%i1, %o1
-	mov	%i2, %o2
-	mov	%i3, %o3
-	b,pt	%xcc, 2f
-	 mov	%i4, %o4
-
-
-	/* Linux 32-bit system calls enter here... */
-	.align	32
-	.globl	linux_sparc_syscall32
-linux_sparc_syscall32:
-	/* Direct access to user regs, much faster. */
-	cmp	%g1, NR_syscalls			! IEU1	Group
-	bgeu,pn	%xcc, linux_sparc_ni_syscall		! CTI
-	 srl	%i0, 0, %o0				! IEU0
-	sll	%g1, 2, %l4				! IEU0	Group
-	srl	%i4, 0, %o4				! IEU1
-	lduw	[%l7 + %l4], %l7			! Load
-	srl	%i1, 0, %o1				! IEU0	Group
-	ldx	[%g6 + TI_FLAGS], %l0		! Load
-
-	srl	%i3, 0, %o3				! IEU0
-	srl	%i2, 0, %o2				! IEU0	Group
-	andcc	%l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT|_TIF_NOHZ), %g0
-	bne,pn	%icc, linux_syscall_trace32		! CTI
-	 mov	%i0, %l5				! IEU1
-5:	call	%l7					! CTI	Group brk forced
-	 srl	%i5, 0, %o5				! IEU1
-	ba,pt	%xcc, 3f
-	 sra	%o0, 0, %o0
-
-	/* Linux native system calls enter here... */
-	.align	32
-	.globl	linux_sparc_syscall
-linux_sparc_syscall:
-	/* Direct access to user regs, much faster. */
-	cmp	%g1, NR_syscalls			! IEU1	Group
-	bgeu,pn	%xcc, linux_sparc_ni_syscall		! CTI
-	 mov	%i0, %o0				! IEU0
-	sll	%g1, 2, %l4				! IEU0	Group
-	mov	%i1, %o1				! IEU1
-	lduw	[%l7 + %l4], %l7			! Load
-4:	mov	%i2, %o2				! IEU0	Group
-	ldx	[%g6 + TI_FLAGS], %l0		! Load
-
-	mov	%i3, %o3				! IEU1
-	mov	%i4, %o4				! IEU0	Group
-	andcc	%l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT|_TIF_NOHZ), %g0
-	bne,pn	%icc, linux_syscall_trace		! CTI	Group
-	 mov	%i0, %l5				! IEU0
-2:	call	%l7					! CTI	Group brk forced
-	 mov	%i5, %o5				! IEU0
-	nop
-
-3:	stx	%o0, [%sp + PTREGS_OFF + PT_V9_I0]
-ret_sys_call:
-	ldx	[%sp + PTREGS_OFF + PT_V9_TSTATE], %g3
-	mov	%ulo(TSTATE_XCARRY | TSTATE_ICARRY), %g2
-	sllx	%g2, 32, %g2
-
-	cmp	%o0, -ERESTART_RESTARTBLOCK
-	bgeu,pn	%xcc, 1f
-	 andcc	%l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT|_TIF_NOHZ), %g0
-	ldx	[%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc
-
-2:
-	/* System call success, clear Carry condition code. */
-	andn	%g3, %g2, %g3
-3:
-	stx	%g3, [%sp + PTREGS_OFF + PT_V9_TSTATE]	
-	bne,pn	%icc, linux_syscall_trace2
-	 add	%l1, 0x4, %l2			! npc = npc+4
-	stx	%l1, [%sp + PTREGS_OFF + PT_V9_TPC]
-	ba,pt	%xcc, rtrap
-	 stx	%l2, [%sp + PTREGS_OFF + PT_V9_TNPC]
-
-1:
-	/* Check if force_successful_syscall_return()
-	 * was invoked.
-	 */
-	ldub	[%g6 + TI_SYS_NOERROR], %l2
-	brnz,pn %l2, 2b
-	 ldx	[%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc
-	/* System call failure, set Carry condition code.
-	 * Also, get abs(errno) to return to the process.
-	 */
-	sub	%g0, %o0, %o0
-	stx	%o0, [%sp + PTREGS_OFF + PT_V9_I0]
-	ba,pt	%xcc, 3b
-	 or	%g3, %g2, %g3
-
-linux_syscall_trace2:
-	call	syscall_trace_leave
-	 add	%sp, PTREGS_OFF, %o0
-	stx	%l1, [%sp + PTREGS_OFF + PT_V9_TPC]
-	ba,pt	%xcc, rtrap
-	 stx	%l2, [%sp + PTREGS_OFF + PT_V9_TNPC]
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
deleted file mode 100644
index ab9e4d57685a9d1d706156452b494b2add6350ef..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/systbls_32.S
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* systbls.S: System call entry point tables for OS compatibility.
- *            The native Linux system call table lives here also.
- *
- * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net)
- *
- * Based upon preliminary work which is:
- *
- * Copyright (C) 1995 Adrian M. Rodriguez (adrian@remus.rutgers.edu)
- */
-
-#define __SYSCALL(nr, entry, nargs) .long entry
-	.data
-	.align 4
-	.globl sys_call_table
-sys_call_table:
-#include <asm/syscall_table_32.h>	/* 32-bit native syscalls */
-#undef __SYSCALL
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
deleted file mode 100644
index a27394bf7d7f63adc2019bb714f1f0331eedab42..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/systbls_64.S
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* systbls.S: System call entry point tables for OS compatibility.
- *            The native Linux system call table lives here also.
- *
- * Copyright (C) 1995, 1996, 2007 David S. Miller (davem@davemloft.net)
- * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- *
- * Based upon preliminary work which is:
- *
- * Copyright (C) 1995 Adrian M. Rodriguez (adrian@remus.rutgers.edu)
- */
-
-#define __SYSCALL(nr, entry, nargs) .word entry
-	.text
-	.align	4
-#ifdef CONFIG_COMPAT
-	.globl sys_call_table32
-sys_call_table32:
-#include <asm/syscall_table_c32.h>	/* Compat syscalls */
-#endif /* CONFIG_COMPAT */
-
-	.align	4
-	.globl sys_call_table64, sys_call_table
-sys_call_table64:
-sys_call_table:
-#include <asm/syscall_table_64.h>	/* 64-bit native syscalls */
-#undef __SYSCALL
diff --git a/arch/sparc/kernel/trampoline_32.S b/arch/sparc/kernel/trampoline_32.S
deleted file mode 100644
index 82fafeeb3a62f49b31fd46a637e2f2838fed29cb..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/trampoline_32.S
+++ /dev/null
@@ -1,201 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * trampoline.S: SMP cpu boot-up trampoline code.
- *
- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
- * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- */
-
-#include <asm/head.h>
-#include <asm/psr.h>
-#include <asm/page.h>
-#include <asm/asi.h>
-#include <asm/ptrace.h>
-#include <asm/vaddrs.h>
-#include <asm/contregs.h>
-#include <asm/thread_info.h>
-
-	.globl sun4m_cpu_startup
-	.globl sun4d_cpu_startup
-
-	.align 4
-
-/* When we start up a cpu for the first time it enters this routine.
- * This initializes the chip from whatever state the prom left it
- * in and sets PIL in %psr to 15, no irqs.
- */
-
-sun4m_cpu_startup:
-cpu1_startup:
-	sethi	%hi(trapbase_cpu1), %g3
-	b	1f
-	 or	%g3, %lo(trapbase_cpu1), %g3
-
-cpu2_startup:
-	sethi	%hi(trapbase_cpu2), %g3
-	b	1f
-	 or	%g3, %lo(trapbase_cpu2), %g3
-
-cpu3_startup:
-	sethi	%hi(trapbase_cpu3), %g3
-	b	1f
-	 or	%g3, %lo(trapbase_cpu3), %g3
-
-1:
-	/* Set up a sane %psr -- PIL<0xf> S<0x1> PS<0x1> CWP<0x0> */
-	set	(PSR_PIL | PSR_S | PSR_PS), %g1
-	wr	%g1, 0x0, %psr		! traps off though
-	WRITE_PAUSE
-
-	/* Our %wim is one behind CWP */
-	mov	2, %g1
-	wr	%g1, 0x0, %wim
-	WRITE_PAUSE
-
-	/* This identifies "this cpu". */
-	wr	%g3, 0x0, %tbr
-	WRITE_PAUSE
-
-	/* Give ourselves a stack and curptr. */
-	set	current_set, %g5
-	srl	%g3, 10, %g4
-	and	%g4, 0xc, %g4
-	ld	[%g5 + %g4], %g6
-
-	sethi	%hi(THREAD_SIZE - STACKFRAME_SZ), %sp
-	or	%sp, %lo(THREAD_SIZE - STACKFRAME_SZ), %sp
-	add	%g6, %sp, %sp
-
-	/* Turn on traps (PSR_ET). */
-	rd	%psr, %g1
-	wr	%g1, PSR_ET, %psr	! traps on
-	WRITE_PAUSE
-
-	/* Init our caches, etc. */
-	set	poke_srmmu, %g5
-	ld	[%g5], %g5
-	call	%g5
-	 nop
-
-	/* Start this processor. */
-	call	smp_callin
-	 nop
-
-	b,a	smp_panic
-
-	.text
-	.align	4
-
-smp_panic:
-	call	cpu_panic
-	 nop
-
-/* CPUID in bootbus can be found at PA 0xff0140000 */
-#define SUN4D_BOOTBUS_CPUID	0xf0140000
-
-	.align	4
-
-sun4d_cpu_startup:
-	/* Set up a sane %psr -- PIL<0xf> S<0x1> PS<0x1> CWP<0x0> */
-	set	(PSR_PIL | PSR_S | PSR_PS), %g1
-	wr	%g1, 0x0, %psr		! traps off though
-	WRITE_PAUSE
-
-	/* Our %wim is one behind CWP */
-	mov	2, %g1
-	wr	%g1, 0x0, %wim
-	WRITE_PAUSE
-
-	/* Set tbr - we use just one trap table. */
-	set	trapbase, %g1
-	wr	%g1, 0x0, %tbr
-	WRITE_PAUSE
-
-	/* Get our CPU id out of bootbus */
-	set	SUN4D_BOOTBUS_CPUID, %g3
-	lduba	[%g3] ASI_M_CTL, %g3
-	and	%g3, 0xf8, %g3
-	srl	%g3, 3, %g1
-	sta	%g1, [%g0] ASI_M_VIKING_TMP1
-
-	/* Give ourselves a stack and curptr. */
-	set	current_set, %g5
-	srl	%g3, 1, %g4
-	ld	[%g5 + %g4], %g6
-
-	sethi	%hi(THREAD_SIZE - STACKFRAME_SZ), %sp
-	or	%sp, %lo(THREAD_SIZE - STACKFRAME_SZ), %sp
-	add	%g6, %sp, %sp
-
-	/* Turn on traps (PSR_ET). */
-	rd	%psr, %g1
-	wr	%g1, PSR_ET, %psr	! traps on
-	WRITE_PAUSE
-
-	/* Init our caches, etc. */
-	set	poke_srmmu, %g5
-	ld	[%g5], %g5
-	call	%g5
-	 nop
-
-	/* Start this processor. */
-	call	smp_callin
-	 nop
-
-	b,a	smp_panic
-
-	.align	4
-        .global leon_smp_cpu_startup, smp_penguin_ctable
-
-leon_smp_cpu_startup:
-
-        set smp_penguin_ctable,%g1
-        ld [%g1+4],%g1
-        srl %g1,4,%g1
-        set 0x00000100,%g5 /* SRMMU_CTXTBL_PTR */
-	sta %g1, [%g5] ASI_LEON_MMUREGS
-
-	/* Set up a sane %psr -- PIL<0xf> S<0x1> PS<0x1> CWP<0x0> */
-	set	(PSR_PIL | PSR_S | PSR_PS), %g1
-	wr	%g1, 0x0, %psr		! traps off though
-	WRITE_PAUSE
-
-	/* Our %wim is one behind CWP */
-	mov	2, %g1
-	wr	%g1, 0x0, %wim
-	WRITE_PAUSE
-
-	/* Set tbr - we use just one trap table. */
-	set	trapbase, %g1
-	wr	%g1, 0x0, %tbr
-	WRITE_PAUSE
-
-	/* Get our CPU id */
-        rd     %asr17,%g3
-
-	/* Give ourselves a stack and curptr. */
-	set	current_set, %g5
-	srl	%g3, 28, %g4
-	sll	%g4, 2, %g4
-	ld	[%g5 + %g4], %g6
-
-	sethi	%hi(THREAD_SIZE - STACKFRAME_SZ), %sp
-	or	%sp, %lo(THREAD_SIZE - STACKFRAME_SZ), %sp
-	add	%g6, %sp, %sp
-
-	/* Turn on traps (PSR_ET). */
-	rd	%psr, %g1
-	wr	%g1, PSR_ET, %psr	! traps on
-	WRITE_PAUSE
-
-	/* Init our caches, etc. */
-	set	poke_srmmu, %g5
-	ld	[%g5], %g5
-	call	%g5
-	 nop
-
-	/* Start this processor. */
-	call	smp_callin
-	 nop
-
-	b,a	smp_panic
diff --git a/arch/sparc/kernel/trampoline_64.S b/arch/sparc/kernel/trampoline_64.S
deleted file mode 100644
index fe59122d257ded1d97d12f30dfab48b8facf0de1..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/trampoline_64.S
+++ /dev/null
@@ -1,414 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * trampoline.S: Jump start slave processors on sparc64.
- *
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
- */
-
-
-#include <asm/head.h>
-#include <asm/asi.h>
-#include <asm/lsu.h>
-#include <asm/dcr.h>
-#include <asm/dcu.h>
-#include <asm/pstate.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/spitfire.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-#include <asm/mmu.h>
-#include <asm/hypervisor.h>
-#include <asm/cpudata.h>
-
-	.data
-	.align	8
-call_method:
-	.asciz	"call-method"
-	.align	8
-itlb_load:
-	.asciz	"SUNW,itlb-load"
-	.align	8
-dtlb_load:
-	.asciz	"SUNW,dtlb-load"
-
-#define TRAMP_STACK_SIZE	1024
-	.align	16
-tramp_stack:
-	.skip	TRAMP_STACK_SIZE
-
-	.align		8
-	.globl		sparc64_cpu_startup, sparc64_cpu_startup_end
-sparc64_cpu_startup:
-	BRANCH_IF_SUN4V(g1, niagara_startup)
-	BRANCH_IF_CHEETAH_BASE(g1, g5, cheetah_startup)
-	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1, g5, cheetah_plus_startup)
-
-	ba,pt	%xcc, spitfire_startup
-	 nop
-
-cheetah_plus_startup:
-	/* Preserve OBP chosen DCU and DCR register settings.  */
-	ba,pt	%xcc, cheetah_generic_startup
-	 nop
-
-cheetah_startup:
-	mov	DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1
-	wr	%g1, %asr18
-
-	sethi	%uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5
-	or	%g5, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5
-	sllx	%g5, 32, %g5
-	or	%g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5
-	stxa	%g5, [%g0] ASI_DCU_CONTROL_REG
-	membar	#Sync
-	/* fallthru */
-
-cheetah_generic_startup:
-	mov	TSB_EXTENSION_P, %g3
-	stxa	%g0, [%g3] ASI_DMMU
-	stxa	%g0, [%g3] ASI_IMMU
-	membar	#Sync
-
-	mov	TSB_EXTENSION_S, %g3
-	stxa	%g0, [%g3] ASI_DMMU
-	membar	#Sync
-
-	mov	TSB_EXTENSION_N, %g3
-	stxa	%g0, [%g3] ASI_DMMU
-	stxa	%g0, [%g3] ASI_IMMU
-	membar	#Sync
-	/* fallthru */
-
-niagara_startup:
-	/* Disable STICK_INT interrupts. */
-	sethi		%hi(0x80000000), %g5
-	sllx		%g5, 32, %g5
-	wr		%g5, %asr25
-
-	ba,pt		%xcc, startup_continue
-	 nop
-
-spitfire_startup:
-	mov		(LSU_CONTROL_IC | LSU_CONTROL_DC | LSU_CONTROL_IM | LSU_CONTROL_DM), %g1
-	stxa		%g1, [%g0] ASI_LSU_CONTROL
-	membar		#Sync
-
-startup_continue:
-	mov		%o0, %l0
-	BRANCH_IF_SUN4V(g1, niagara_lock_tlb)
-
-	sethi		%hi(0x80000000), %g2
-	sllx		%g2, 32, %g2
-	wr		%g2, 0, %tick_cmpr
-
-	/* Call OBP by hand to lock KERNBASE into i/d tlbs.
-	 * We lock 'num_kernel_image_mappings' consequetive entries.
-	 */
-	sethi		%hi(prom_entry_lock), %g2
-1:	ldstub		[%g2 + %lo(prom_entry_lock)], %g1
-	brnz,pn		%g1, 1b
-	 nop
-
-	/* Get onto temporary stack which will be in the locked
-	 * kernel image.
-	 */
-	sethi		%hi(tramp_stack), %g1
-	or		%g1, %lo(tramp_stack), %g1
-	add		%g1, TRAMP_STACK_SIZE, %g1
-	sub		%g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp
-	flushw
-
-	/* Setup the loop variables:
-	 * %l3: VADDR base
-	 * %l4: TTE base
-	 * %l5: Loop iterator, iterates from 0 to 'num_kernel_image_mappings'
-	 * %l6: Number of TTE entries to map
-	 * %l7: Highest TTE entry number, we count down
-	 */
-	sethi		%hi(KERNBASE), %l3
-	sethi		%hi(kern_locked_tte_data), %l4
-	ldx		[%l4 + %lo(kern_locked_tte_data)], %l4
-	clr		%l5
-	sethi		%hi(num_kernel_image_mappings), %l6
-	lduw		[%l6 + %lo(num_kernel_image_mappings)], %l6
-
-	mov		15, %l7
-	BRANCH_IF_ANY_CHEETAH(g1,g5,2f)
-
-	mov		63, %l7
-2:
-
-3:
-	/* Lock into I-MMU */
-	sethi		%hi(call_method), %g2
-	or		%g2, %lo(call_method), %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x00]
-	mov		5, %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x08]
-	mov		1, %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x10]
-	sethi		%hi(itlb_load), %g2
-	or		%g2, %lo(itlb_load), %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x18]
-	sethi		%hi(prom_mmu_ihandle_cache), %g2
-	lduw		[%g2 + %lo(prom_mmu_ihandle_cache)], %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x20]
-
-	/* Each TTE maps 4MB, convert index to offset.  */
-	sllx		%l5, 22, %g1
-
-	add		%l3, %g1, %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x28]	! VADDR
-	add		%l4, %g1, %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x30]	! TTE
-
-	/* TTE index is highest minus loop index.  */
-	sub		%l7, %l5, %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x38]
-
-	sethi		%hi(p1275buf), %g2
-	or		%g2, %lo(p1275buf), %g2
-	ldx		[%g2 + 0x08], %o1
-	call		%o1
-	 add		%sp, (2047 + 128), %o0
-
-	/* Lock into D-MMU */
-	sethi		%hi(call_method), %g2
-	or		%g2, %lo(call_method), %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x00]
-	mov		5, %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x08]
-	mov		1, %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x10]
-	sethi		%hi(dtlb_load), %g2
-	or		%g2, %lo(dtlb_load), %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x18]
-	sethi		%hi(prom_mmu_ihandle_cache), %g2
-	lduw		[%g2 + %lo(prom_mmu_ihandle_cache)], %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x20]
-
-	/* Each TTE maps 4MB, convert index to offset.  */
-	sllx		%l5, 22, %g1
-
-	add		%l3, %g1, %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x28]	! VADDR
-	add		%l4, %g1, %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x30]	! TTE
-
-	/* TTE index is highest minus loop index.  */
-	sub		%l7, %l5, %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x38]
-
-	sethi		%hi(p1275buf), %g2
-	or		%g2, %lo(p1275buf), %g2
-	ldx		[%g2 + 0x08], %o1
-	call		%o1
-	 add		%sp, (2047 + 128), %o0
-
-	add		%l5, 1, %l5
-	cmp		%l5, %l6
-	bne,pt		%xcc, 3b
-	 nop
-
-	sethi		%hi(prom_entry_lock), %g2
-	stb		%g0, [%g2 + %lo(prom_entry_lock)]
-
-	ba,pt		%xcc, after_lock_tlb
-	 nop
-
-niagara_lock_tlb:
-	sethi		%hi(KERNBASE), %l3
-	sethi		%hi(kern_locked_tte_data), %l4
-	ldx		[%l4 + %lo(kern_locked_tte_data)], %l4
-	clr		%l5
-	sethi		%hi(num_kernel_image_mappings), %l6
-	lduw		[%l6 + %lo(num_kernel_image_mappings)], %l6
-
-1:
-	mov		HV_FAST_MMU_MAP_PERM_ADDR, %o5
-	sllx		%l5, 22, %g2
-	add		%l3, %g2, %o0
-	clr		%o1
-	add		%l4, %g2, %o2
-	mov		HV_MMU_IMMU, %o3
-	ta		HV_FAST_TRAP
-
-	mov		HV_FAST_MMU_MAP_PERM_ADDR, %o5
-	sllx		%l5, 22, %g2
-	add		%l3, %g2, %o0
-	clr		%o1
-	add		%l4, %g2, %o2
-	mov		HV_MMU_DMMU, %o3
-	ta		HV_FAST_TRAP
-
-	add		%l5, 1, %l5
-	cmp		%l5, %l6
-	bne,pt		%xcc, 1b
-	 nop
-
-after_lock_tlb:
-	wrpr		%g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
-	wr		%g0, 0, %fprs
-
-	wr		%g0, ASI_P, %asi
-
-	mov		PRIMARY_CONTEXT, %g7
-
-661:	stxa		%g0, [%g7] ASI_DMMU
-	.section	.sun4v_1insn_patch, "ax"
-	.word		661b
-	stxa		%g0, [%g7] ASI_MMU
-	.previous
-
-	membar		#Sync
-	mov		SECONDARY_CONTEXT, %g7
-
-661:	stxa		%g0, [%g7] ASI_DMMU
-	.section	.sun4v_1insn_patch, "ax"
-	.word		661b
-	stxa		%g0, [%g7] ASI_MMU
-	.previous
-
-	membar		#Sync
-
-	/* Everything we do here, until we properly take over the
-	 * trap table, must be done with extreme care.  We cannot
-	 * make any references to %g6 (current thread pointer),
-	 * %g4 (current task pointer), or %g5 (base of current cpu's
-	 * per-cpu area) until we properly take over the trap table
-	 * from the firmware and hypervisor.
-	 *
-	 * Get onto temporary stack which is in the locked kernel image.
-	 */
-	sethi		%hi(tramp_stack), %g1
-	or		%g1, %lo(tramp_stack), %g1
-	add		%g1, TRAMP_STACK_SIZE, %g1
-	sub		%g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp
-	mov		0, %fp
-
-	/* Put garbage in these registers to trap any access to them.  */
-	set		0xdeadbeef, %g4
-	set		0xdeadbeef, %g5
-	set		0xdeadbeef, %g6
-
-	call		init_irqwork_curcpu
-	 nop
-
-	sethi		%hi(tlb_type), %g3
-	lduw		[%g3 + %lo(tlb_type)], %g2
-	cmp		%g2, 3
-	bne,pt		%icc, 1f
-	 nop
-
-	call		hard_smp_processor_id
-	 nop
-	
-	call		sun4v_register_mondo_queues
-	 nop
-
-1:	call		init_cur_cpu_trap
-	 ldx		[%l0], %o0
-
-	/* Start using proper page size encodings in ctx register.  */
-	sethi		%hi(sparc64_kern_pri_context), %g3
-	ldx		[%g3 + %lo(sparc64_kern_pri_context)], %g2
-	mov		PRIMARY_CONTEXT, %g1
-
-661:	stxa		%g2, [%g1] ASI_DMMU
-	.section	.sun4v_1insn_patch, "ax"
-	.word		661b
-	stxa		%g2, [%g1] ASI_MMU
-	.previous
-
-	membar		#Sync
-
-	wrpr		%g0, 0, %wstate
-
-	sethi		%hi(prom_entry_lock), %g2
-1:	ldstub		[%g2 + %lo(prom_entry_lock)], %g1
-	brnz,pn		%g1, 1b
-	 nop
-
-	/* As a hack, put &init_thread_union into %g6.
-	 * prom_world() loads from here to restore the %asi
-	 * register.
-	 */
-	sethi		%hi(init_thread_union), %g6
-	or		%g6, %lo(init_thread_union), %g6
-
-	sethi		%hi(is_sun4v), %o0
-	lduw		[%o0 + %lo(is_sun4v)], %o0
-	brz,pt		%o0, 2f
-	 nop
-
-	TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
-	add		%g2, TRAP_PER_CPU_FAULT_INFO, %g2
-	stxa		%g2, [%g0] ASI_SCRATCHPAD
-
-	/* Compute physical address:
-	 *
-	 * paddr = kern_base + (mmfsa_vaddr - KERNBASE)
-	 */
-	sethi		%hi(KERNBASE), %g3
-	sub		%g2, %g3, %g2
-	sethi		%hi(kern_base), %g3
-	ldx		[%g3 + %lo(kern_base)], %g3
-	add		%g2, %g3, %o1
-	sethi		%hi(sparc64_ttable_tl0), %o0
-
-	set		prom_set_trap_table_name, %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x00]
-	mov		2, %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x08]
-	mov		0, %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x10]
-	stx		%o0, [%sp + 2047 + 128 + 0x18]
-	stx		%o1, [%sp + 2047 + 128 + 0x20]
-	sethi		%hi(p1275buf), %g2
-	or		%g2, %lo(p1275buf), %g2
-	ldx		[%g2 + 0x08], %o1
-	call		%o1
-	 add		%sp, (2047 + 128), %o0
-
-	ba,pt		%xcc, 3f
-	 nop
-
-2:	sethi		%hi(sparc64_ttable_tl0), %o0
-	set		prom_set_trap_table_name, %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x00]
-	mov		1, %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x08]
-	mov		0, %g2
-	stx		%g2, [%sp + 2047 + 128 + 0x10]
-	stx		%o0, [%sp + 2047 + 128 + 0x18]
-	sethi		%hi(p1275buf), %g2
-	or		%g2, %lo(p1275buf), %g2
-	ldx		[%g2 + 0x08], %o1
-	call		%o1
-	 add		%sp, (2047 + 128), %o0
-
-3:	sethi		%hi(prom_entry_lock), %g2
-	stb		%g0, [%g2 + %lo(prom_entry_lock)]
-
-	ldx		[%l0], %g6
-	ldx		[%g6 + TI_TASK], %g4
-
-	mov		1, %g5
-	sllx		%g5, THREAD_SHIFT, %g5
-	sub		%g5, (STACKFRAME_SZ + STACK_BIAS), %g5
-	add		%g6, %g5, %sp
-
-	rdpr		%pstate, %o1
-	or		%o1, PSTATE_IE, %o1
-	wrpr		%o1, 0, %pstate
-
-	call		smp_callin
-	 nop
-
-	call		cpu_panic
-	 nop
-1:	b,a,pt		%xcc, 1b
-
-	.align		8
-sparc64_cpu_startup_end:
diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
deleted file mode 100644
index eaed39ce8938c052bbdd22429f8725078385d3a1..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/tsb.S
+++ /dev/null
@@ -1,591 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* tsb.S: Sparc64 TSB table handling.
- *
- * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
- */
-
-
-#include <asm/tsb.h>
-#include <asm/hypervisor.h>
-#include <asm/page.h>
-#include <asm/cpudata.h>
-#include <asm/mmu.h>
-
-	.text
-	.align	32
-
-	/* Invoked from TLB miss handler, we are in the
-	 * MMU global registers and they are setup like
-	 * this:
-	 *
-	 * %g1: TSB entry pointer
-	 * %g2:	available temporary
-	 * %g3:	FAULT_CODE_{D,I}TLB
-	 * %g4:	available temporary
-	 * %g5:	available temporary
-	 * %g6: TAG TARGET
-	 * %g7:	available temporary, will be loaded by us with
-	 *      the physical address base of the linux page
-	 *      tables for the current address space
-	 */
-tsb_miss_dtlb:
-	mov		TLB_TAG_ACCESS, %g4
-	ldxa		[%g4] ASI_DMMU, %g4
-	srlx		%g4, PAGE_SHIFT, %g4
-	ba,pt		%xcc, tsb_miss_page_table_walk
-	 sllx		%g4, PAGE_SHIFT, %g4
-
-tsb_miss_itlb:
-	mov		TLB_TAG_ACCESS, %g4
-	ldxa		[%g4] ASI_IMMU, %g4
-	srlx		%g4, PAGE_SHIFT, %g4
-	ba,pt		%xcc, tsb_miss_page_table_walk
-	 sllx		%g4, PAGE_SHIFT, %g4
-
-	/* At this point we have:
-	 * %g1 --	PAGE_SIZE TSB entry address
-	 * %g3 --	FAULT_CODE_{D,I}TLB
-	 * %g4 --	missing virtual address
-	 * %g6 --	TAG TARGET (vaddr >> 22)
-	 */
-tsb_miss_page_table_walk:
-	TRAP_LOAD_TRAP_BLOCK(%g7, %g5)
-
-	/* Before committing to a full page table walk,
-	 * check the huge page TSB.
-	 */
-#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-
-661:	ldx		[%g7 + TRAP_PER_CPU_TSB_HUGE], %g5
-	nop
-	.section	.sun4v_2insn_patch, "ax"
-	.word		661b
-	mov		SCRATCHPAD_UTSBREG2, %g5
-	ldxa		[%g5] ASI_SCRATCHPAD, %g5
-	.previous
-
-	cmp		%g5, -1
-	be,pt		%xcc, 80f
-	 nop
-
-	/* We need an aligned pair of registers containing 2 values
-	 * which can be easily rematerialized.  %g6 and %g7 foot the
-	 * bill just nicely.  We'll save %g6 away into %g2 for the
-	 * huge page TSB TAG comparison.
-	 *
-	 * Perform a huge page TSB lookup.
-	 */
-	mov		%g6, %g2
-	and		%g5, 0x7, %g6
-	mov		512, %g7
-	andn		%g5, 0x7, %g5
-	sllx		%g7, %g6, %g7
-	srlx		%g4, REAL_HPAGE_SHIFT, %g6
-	sub		%g7, 1, %g7
-	and		%g6, %g7, %g6
-	sllx		%g6, 4, %g6
-	add		%g5, %g6, %g5
-
-	TSB_LOAD_QUAD(%g5, %g6)
-	cmp		%g6, %g2
-	be,a,pt		%xcc, tsb_tlb_reload
-	 mov		%g7, %g5
-
-	/* No match, remember the huge page TSB entry address,
-	 * and restore %g6 and %g7.
-	 */
-	TRAP_LOAD_TRAP_BLOCK(%g7, %g6)
-	srlx		%g4, 22, %g6
-80:	stx		%g5, [%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP]
-
-#endif
-
-	ldx		[%g7 + TRAP_PER_CPU_PGD_PADDR], %g7
-
-	/* At this point we have:
-	 * %g1 --	TSB entry address
-	 * %g3 --	FAULT_CODE_{D,I}TLB
-	 * %g4 --	missing virtual address
-	 * %g6 --	TAG TARGET (vaddr >> 22)
-	 * %g7 --	page table physical address
-	 *
-	 * We know that both the base PAGE_SIZE TSB and the HPAGE_SIZE
-	 * TSB both lack a matching entry.
-	 */
-tsb_miss_page_table_walk_sun4v_fastpath:
-	USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)
-
-	/* Valid PTE is now in %g5.  */
-
-#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-	sethi		%uhi(_PAGE_PMD_HUGE | _PAGE_PUD_HUGE), %g7
-	sllx		%g7, 32, %g7
-
-	andcc		%g5, %g7, %g0
-	be,pt		%xcc, 60f
-	 nop
-
-	/* It is a huge page, use huge page TSB entry address we
-	 * calculated above.  If the huge page TSB has not been
-	 * allocated, setup a trap stack and call hugetlb_setup()
-	 * to do so, then return from the trap to replay the TLB
-	 * miss.
-	 *
-	 * This is necessary to handle the case of transparent huge
-	 * pages where we don't really have a non-atomic context
-	 * in which to allocate the hugepage TSB hash table.  When
-	 * the 'mm' faults in the hugepage for the first time, we
-	 * thus handle it here.  This also makes sure that we can
-	 * allocate the TSB hash table on the correct NUMA node.
-	 */
-	TRAP_LOAD_TRAP_BLOCK(%g7, %g2)
-	ldx		[%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP], %g1
-	cmp		%g1, -1
-	bne,pt		%xcc, 60f
-	 nop
-
-661:	rdpr		%pstate, %g5
-	wrpr		%g5, PSTATE_AG | PSTATE_MG, %pstate
-	.section	.sun4v_2insn_patch, "ax"
-	.word		661b
-	SET_GL(1)
-	nop
-	.previous
-
-	rdpr	%tl, %g7
-	cmp	%g7, 1
-	bne,pn	%xcc, winfix_trampoline
-	 mov	%g3, %g4
-	ba,pt	%xcc, etrap
-	 rd	%pc, %g7
-	call	hugetlb_setup
-	 add	%sp, PTREGS_OFF, %o0
-	ba,pt	%xcc, rtrap
-	 nop
-
-60:
-#endif
-
-	/* At this point we have:
-	 * %g1 --	TSB entry address
-	 * %g3 --	FAULT_CODE_{D,I}TLB
-	 * %g5 --	valid PTE
-	 * %g6 --	TAG TARGET (vaddr >> 22)
-	 */
-tsb_reload:
-	TSB_LOCK_TAG(%g1, %g2, %g7)
-	TSB_WRITE(%g1, %g5, %g6)
-
-	/* Finally, load TLB and return from trap.  */
-tsb_tlb_reload:
-	cmp		%g3, FAULT_CODE_DTLB
-	bne,pn		%xcc, tsb_itlb_load
-	 nop
-
-tsb_dtlb_load:
-
-661:	stxa		%g5, [%g0] ASI_DTLB_DATA_IN
-	retry
-	.section	.sun4v_2insn_patch, "ax"
-	.word		661b
-	nop
-	nop
-	.previous
-
-	/* For sun4v the ASI_DTLB_DATA_IN store and the retry
-	 * instruction get nop'd out and we get here to branch
-	 * to the sun4v tlb load code.  The registers are setup
-	 * as follows:
-	 *
-	 * %g4: vaddr
-	 * %g5: PTE
-	 * %g6:	TAG
-	 *
-	 * The sun4v TLB load wants the PTE in %g3 so we fix that
-	 * up here.
-	 */
-	ba,pt		%xcc, sun4v_dtlb_load
-	 mov		%g5, %g3
-
-tsb_itlb_load:
-	/* Executable bit must be set.  */
-661:	sethi		%hi(_PAGE_EXEC_4U), %g4
-	andcc		%g5, %g4, %g0
-	.section	.sun4v_2insn_patch, "ax"
-	.word		661b
-	andcc		%g5, _PAGE_EXEC_4V, %g0
-	nop
-	.previous
-
-	be,pn		%xcc, tsb_do_fault
-	 nop
-
-661:	stxa		%g5, [%g0] ASI_ITLB_DATA_IN
-	retry
-	.section	.sun4v_2insn_patch, "ax"
-	.word		661b
-	nop
-	nop
-	.previous
-
-	/* For sun4v the ASI_ITLB_DATA_IN store and the retry
-	 * instruction get nop'd out and we get here to branch
-	 * to the sun4v tlb load code.  The registers are setup
-	 * as follows:
-	 *
-	 * %g4: vaddr
-	 * %g5: PTE
-	 * %g6:	TAG
-	 *
-	 * The sun4v TLB load wants the PTE in %g3 so we fix that
-	 * up here.
-	 */
-	ba,pt		%xcc, sun4v_itlb_load
-	 mov		%g5, %g3
-
-	/* No valid entry in the page tables, do full fault
-	 * processing.
-	 */
-
-	.globl		tsb_do_fault
-tsb_do_fault:
-	cmp		%g3, FAULT_CODE_DTLB
-
-661:	rdpr		%pstate, %g5
-	wrpr		%g5, PSTATE_AG | PSTATE_MG, %pstate
-	.section	.sun4v_2insn_patch, "ax"
-	.word		661b
-	SET_GL(1)
-	ldxa		[%g0] ASI_SCRATCHPAD, %g4
-	.previous
-
-	bne,pn		%xcc, tsb_do_itlb_fault
-	 nop
-
-tsb_do_dtlb_fault:
-	rdpr	%tl, %g3
-	cmp	%g3, 1
-
-661:	mov	TLB_TAG_ACCESS, %g4
-	ldxa	[%g4] ASI_DMMU, %g5
-	.section .sun4v_2insn_patch, "ax"
-	.word	661b
-	ldx	[%g4 + HV_FAULT_D_ADDR_OFFSET], %g5
-	nop
-	.previous
-
-	/* Clear context ID bits.  */
-	srlx		%g5, PAGE_SHIFT, %g5
-	sllx		%g5, PAGE_SHIFT, %g5
-
-	be,pt	%xcc, sparc64_realfault_common
-	 mov	FAULT_CODE_DTLB, %g4
-	ba,pt	%xcc, winfix_trampoline
-	 nop
-
-tsb_do_itlb_fault:
-	rdpr	%tpc, %g5
-	ba,pt	%xcc, sparc64_realfault_common
-	 mov	FAULT_CODE_ITLB, %g4
-
-	.globl	sparc64_realfault_common
-sparc64_realfault_common:
-	/* fault code in %g4, fault address in %g5, etrap will
-	 * preserve these two values in %l4 and %l5 respectively
-	 */
-	ba,pt	%xcc, etrap			! Save trap state
-1:	 rd	%pc, %g7			! ...
-	stb	%l4, [%g6 + TI_FAULT_CODE]	! Save fault code
-	stx	%l5, [%g6 + TI_FAULT_ADDR]	! Save fault address
-	call	do_sparc64_fault		! Call fault handler
-	 add	%sp, PTREGS_OFF, %o0		! Compute pt_regs arg
-	ba,pt	%xcc, rtrap			! Restore cpu state
-	 nop					! Delay slot (fill me)
-
-winfix_trampoline:
-	rdpr	%tpc, %g3			! Prepare winfixup TNPC
-	or	%g3, 0x7c, %g3			! Compute branch offset
-	wrpr	%g3, %tnpc			! Write it into TNPC
-	done					! Trap return
-
-	/* Insert an entry into the TSB.
-	 *
-	 * %o0: TSB entry pointer (virt or phys address)
-	 * %o1: tag
-	 * %o2:	pte
-	 */
-	.align	32
-	.globl	__tsb_insert
-__tsb_insert:
-	rdpr	%pstate, %o5
-	wrpr	%o5, PSTATE_IE, %pstate
-	TSB_LOCK_TAG(%o0, %g2, %g3)
-	TSB_WRITE(%o0, %o2, %o1)
-	wrpr	%o5, %pstate
-	retl
-	 nop
-	.size	__tsb_insert, .-__tsb_insert
-
-	/* Flush the given TSB entry if it has the matching
-	 * tag.
-	 *
-	 * %o0: TSB entry pointer (virt or phys address)
-	 * %o1:	tag
-	 */
-	.align	32
-	.globl	tsb_flush
-	.type	tsb_flush,#function
-tsb_flush:
-	sethi	%hi(TSB_TAG_LOCK_HIGH), %g2
-1:	TSB_LOAD_TAG(%o0, %g1)
-	srlx	%g1, 32, %o3
-	andcc	%o3, %g2, %g0
-	bne,pn	%icc, 1b
-	 nop
-	cmp	%g1, %o1
-	mov	1, %o3
-	bne,pt	%xcc, 2f
-	 sllx	%o3, TSB_TAG_INVALID_BIT, %o3
-	TSB_CAS_TAG(%o0, %g1, %o3)
-	cmp	%g1, %o3
-	bne,pn	%xcc, 1b
-	 nop
-2:	retl
-	 nop
-	.size	tsb_flush, .-tsb_flush
-
-	/* Reload MMU related context switch state at
-	 * schedule() time.
-	 *
-	 * %o0: page table physical address
-	 * %o1:	TSB base config pointer
-	 * %o2:	TSB huge config pointer, or NULL if none
-	 * %o3:	Hypervisor TSB descriptor physical address
-	 * %o4: Secondary context to load, if non-zero
-	 *
-	 * We have to run this whole thing with interrupts
-	 * disabled so that the current cpu doesn't change
-	 * due to preemption.
-	 */
-	.align	32
-	.globl	__tsb_context_switch
-	.type	__tsb_context_switch,#function
-__tsb_context_switch:
-	rdpr	%pstate, %g1
-	wrpr	%g1, PSTATE_IE, %pstate
-
-	brz,pn	%o4, 1f
-	 mov	SECONDARY_CONTEXT, %o5
-
-661:	stxa	%o4, [%o5] ASI_DMMU
-	.section .sun4v_1insn_patch, "ax"
-	.word	661b
-	stxa	%o4, [%o5] ASI_MMU
-	.previous
-	flush	%g6
-
-1:
-	TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
-
-	stx	%o0, [%g2 + TRAP_PER_CPU_PGD_PADDR]
-
-	ldx	[%o1 + TSB_CONFIG_REG_VAL], %o0
-	brz,pt	%o2, 1f
-	 mov	-1, %g3
-
-	ldx	[%o2 + TSB_CONFIG_REG_VAL], %g3
-
-1:	stx	%g3, [%g2 + TRAP_PER_CPU_TSB_HUGE]
-
-	sethi	%hi(tlb_type), %g2
-	lduw	[%g2 + %lo(tlb_type)], %g2
-	cmp	%g2, 3
-	bne,pt	%icc, 50f
-	 nop
-
-	/* Hypervisor TSB switch. */
-	mov	SCRATCHPAD_UTSBREG1, %o5
-	stxa	%o0, [%o5] ASI_SCRATCHPAD
-	mov	SCRATCHPAD_UTSBREG2, %o5
-	stxa	%g3, [%o5] ASI_SCRATCHPAD
-
-	mov	2, %o0
-	cmp	%g3, -1
-	move	%xcc, 1, %o0
-
-	mov	HV_FAST_MMU_TSB_CTXNON0, %o5
-	mov	%o3, %o1
-	ta	HV_FAST_TRAP
-
-	/* Finish up.  */
-	ba,pt	%xcc, 9f
-	 nop
-
-	/* SUN4U TSB switch.  */
-50:	mov	TSB_REG, %o5
-	stxa	%o0, [%o5] ASI_DMMU
-	membar	#Sync
-	stxa	%o0, [%o5] ASI_IMMU
-	membar	#Sync
-
-2:	ldx	[%o1 + TSB_CONFIG_MAP_VADDR], %o4
-	brz	%o4, 9f
-	 ldx	[%o1 + TSB_CONFIG_MAP_PTE], %o5
-
-	sethi	%hi(sparc64_highest_unlocked_tlb_ent), %g2
-	mov	TLB_TAG_ACCESS, %g3
-	lduw	[%g2 + %lo(sparc64_highest_unlocked_tlb_ent)], %g2
-	stxa	%o4, [%g3] ASI_DMMU
-	membar	#Sync
-	sllx	%g2, 3, %g2
-	stxa	%o5, [%g2] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-
-	brz,pt	%o2, 9f
-	 nop
-
-	ldx	[%o2 + TSB_CONFIG_MAP_VADDR], %o4
-	ldx	[%o2 + TSB_CONFIG_MAP_PTE], %o5
-	mov	TLB_TAG_ACCESS, %g3
-	stxa	%o4, [%g3] ASI_DMMU
-	membar	#Sync
-	sub	%g2, (1 << 3), %g2
-	stxa	%o5, [%g2] ASI_DTLB_DATA_ACCESS
-	membar	#Sync
-
-9:
-	wrpr	%g1, %pstate
-
-	retl
-	 nop
-	.size	__tsb_context_switch, .-__tsb_context_switch
-
-#define TSB_PASS_BITS	((1 << TSB_TAG_LOCK_BIT) | \
-			 (1 << TSB_TAG_INVALID_BIT))
-
-	.align	32
-	.globl	copy_tsb
-	.type	copy_tsb,#function
-copy_tsb:		/* %o0=old_tsb_base, %o1=old_tsb_size
-			 * %o2=new_tsb_base, %o3=new_tsb_size
-			 * %o4=page_size_shift
-			 */
-	sethi		%uhi(TSB_PASS_BITS), %g7
-	srlx		%o3, 4, %o3
-	add		%o0, %o1, %o1	/* end of old tsb */
-	sllx		%g7, 32, %g7
-	sub		%o3, 1, %o3	/* %o3 == new tsb hash mask */
-
-	mov		%o4, %g1	/* page_size_shift */
-
-661:	prefetcha	[%o0] ASI_N, #one_read
-	.section	.tsb_phys_patch, "ax"
-	.word		661b
-	prefetcha	[%o0] ASI_PHYS_USE_EC, #one_read
-	.previous
-
-90:	andcc		%o0, (64 - 1), %g0
-	bne		1f
-	 add		%o0, 64, %o5
-
-661:	prefetcha	[%o5] ASI_N, #one_read
-	.section	.tsb_phys_patch, "ax"
-	.word		661b
-	prefetcha	[%o5] ASI_PHYS_USE_EC, #one_read
-	.previous
-
-1:	TSB_LOAD_QUAD(%o0, %g2)		/* %g2/%g3 == TSB entry */
-	andcc		%g2, %g7, %g0	/* LOCK or INVALID set? */
-	bne,pn		%xcc, 80f	/* Skip it */
-	 sllx		%g2, 22, %o4	/* TAG --> VADDR */
-
-	/* This can definitely be computed faster... */
-	srlx		%o0, 4, %o5	/* Build index */
-	and		%o5, 511, %o5	/* Mask index */
-	sllx		%o5, %g1, %o5	/* Put into vaddr position */
-	or		%o4, %o5, %o4	/* Full VADDR. */
-	srlx		%o4, %g1, %o4	/* Shift down to create index */
-	and		%o4, %o3, %o4	/* Mask with new_tsb_nents-1 */
-	sllx		%o4, 4, %o4	/* Shift back up into tsb ent offset */
-	TSB_STORE(%o2 + %o4, %g2)	/* Store TAG */
-	add		%o4, 0x8, %o4	/* Advance to TTE */
-	TSB_STORE(%o2 + %o4, %g3)	/* Store TTE */
-
-80:	add		%o0, 16, %o0
-	cmp		%o0, %o1
-	bne,pt		%xcc, 90b
-	 nop
-
-	retl
-	 nop
-	.size		copy_tsb, .-copy_tsb
-
-	/* Set the invalid bit in all TSB entries.  */
-	.align		32
-	.globl		tsb_init
-	.type		tsb_init,#function
-tsb_init:		/* %o0 = TSB vaddr, %o1 = size in bytes */
-	prefetch	[%o0 + 0x000], #n_writes
-	mov		1, %g1
-	prefetch	[%o0 + 0x040], #n_writes
-	sllx		%g1, TSB_TAG_INVALID_BIT, %g1
-	prefetch	[%o0 + 0x080], #n_writes
-1:	prefetch	[%o0 + 0x0c0], #n_writes
-	stx		%g1, [%o0 + 0x00]
-	stx		%g1, [%o0 + 0x10]
-	stx		%g1, [%o0 + 0x20]
-	stx		%g1, [%o0 + 0x30]
-	prefetch	[%o0 + 0x100], #n_writes
-	stx		%g1, [%o0 + 0x40]
-	stx		%g1, [%o0 + 0x50]
-	stx		%g1, [%o0 + 0x60]
-	stx		%g1, [%o0 + 0x70]
-	prefetch	[%o0 + 0x140], #n_writes
-	stx		%g1, [%o0 + 0x80]
-	stx		%g1, [%o0 + 0x90]
-	stx		%g1, [%o0 + 0xa0]
-	stx		%g1, [%o0 + 0xb0]
-	prefetch	[%o0 + 0x180], #n_writes
-	stx		%g1, [%o0 + 0xc0]
-	stx		%g1, [%o0 + 0xd0]
-	stx		%g1, [%o0 + 0xe0]
-	stx		%g1, [%o0 + 0xf0]
-	subcc		%o1, 0x100, %o1
-	bne,pt		%xcc, 1b
-	 add		%o0, 0x100, %o0
-	retl
-	 nop
-	nop
-	nop
-	.size		tsb_init, .-tsb_init
-
-	.globl		NGtsb_init
-	.type		NGtsb_init,#function
-NGtsb_init:
-	rd		%asi, %g2
-	mov		1, %g1
-	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
-	sllx		%g1, TSB_TAG_INVALID_BIT, %g1
-1:	stxa		%g1, [%o0 + 0x00] %asi
-	stxa		%g1, [%o0 + 0x10] %asi
-	stxa		%g1, [%o0 + 0x20] %asi
-	stxa		%g1, [%o0 + 0x30] %asi
-	stxa		%g1, [%o0 + 0x40] %asi
-	stxa		%g1, [%o0 + 0x50] %asi
-	stxa		%g1, [%o0 + 0x60] %asi
-	stxa		%g1, [%o0 + 0x70] %asi
-	stxa		%g1, [%o0 + 0x80] %asi
-	stxa		%g1, [%o0 + 0x90] %asi
-	stxa		%g1, [%o0 + 0xa0] %asi
-	stxa		%g1, [%o0 + 0xb0] %asi
-	stxa		%g1, [%o0 + 0xc0] %asi
-	stxa		%g1, [%o0 + 0xd0] %asi
-	stxa		%g1, [%o0 + 0xe0] %asi
-	stxa		%g1, [%o0 + 0xf0] %asi
-	subcc		%o1, 0x100, %o1
-	bne,pt		%xcc, 1b
-	 add		%o0, 0x100, %o0
-	membar		#Sync
-	retl
-	 wr		%g2, 0x0, %asi
-	.size		NGtsb_init, .-NGtsb_init
diff --git a/arch/sparc/kernel/ttable_32.S b/arch/sparc/kernel/ttable_32.S
deleted file mode 100644
index e79fd786fbbb2595299fd09e33fd8188a081cf7c..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/ttable_32.S
+++ /dev/null
@@ -1,418 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* The Sparc trap table, bootloader gives us control at _start. */
-        __HEAD
-
-        .globl  _start
-_start:
-
-	.globl _stext
-_stext:
-
-	.globl  trapbase
-trapbase:
-
-#ifdef CONFIG_SMP
-trapbase_cpu0:
-#endif
-/* We get control passed to us here at t_zero. */
-t_zero:	b gokernel; nop; nop; nop;
-t_tflt:	SRMMU_TFAULT                        /* Inst. Access Exception        */
-t_bins:	TRAP_ENTRY(0x2, bad_instruction)    /* Illegal Instruction           */
-t_pins:	TRAP_ENTRY(0x3, priv_instruction)   /* Privileged Instruction        */
-t_fpd:	TRAP_ENTRY(0x4, fpd_trap_handler)   /* Floating Point Disabled       */
-t_wovf:	WINDOW_SPILL                        /* Window Overflow               */
-t_wunf:	WINDOW_FILL                         /* Window Underflow              */
-t_mna:	TRAP_ENTRY(0x7, mna_handler)        /* Memory Address Not Aligned    */
-t_fpe:	TRAP_ENTRY(0x8, fpe_trap_handler)   /* Floating Point Exception      */
-t_dflt:	SRMMU_DFAULT                        /* Data Miss Exception           */
-t_tio:	TRAP_ENTRY(0xa, do_tag_overflow)    /* Tagged Instruction Ovrflw     */
-t_wpt:	TRAP_ENTRY(0xb, do_watchpoint)      /* Watchpoint Detected           */
-t_badc:	BAD_TRAP(0xc) BAD_TRAP(0xd) BAD_TRAP(0xe) BAD_TRAP(0xf) BAD_TRAP(0x10)
-t_irq1:	TRAP_ENTRY_INTERRUPT(1)             /* IRQ Software/SBUS Level 1     */
-t_irq2:	TRAP_ENTRY_INTERRUPT(2)             /* IRQ SBUS Level 2              */
-t_irq3:	TRAP_ENTRY_INTERRUPT(3)             /* IRQ SCSI/DMA/SBUS Level 3     */
-t_irq4:	TRAP_ENTRY_INTERRUPT(4)             /* IRQ Software Level 4          */
-t_irq5:	TRAP_ENTRY_INTERRUPT(5)             /* IRQ SBUS/Ethernet Level 5     */
-t_irq6:	TRAP_ENTRY_INTERRUPT(6)             /* IRQ Software Level 6          */
-t_irq7:	TRAP_ENTRY_INTERRUPT(7)             /* IRQ Video/SBUS Level 5        */
-t_irq8:	TRAP_ENTRY_INTERRUPT(8)             /* IRQ SBUS Level 6              */
-t_irq9:	TRAP_ENTRY_INTERRUPT(9)             /* IRQ SBUS Level 7              */
-t_irq10:TRAP_ENTRY_INTERRUPT(10)            /* IRQ Timer #1 (one we use)     */
-t_irq11:TRAP_ENTRY_INTERRUPT(11)            /* IRQ Floppy Intr.              */
-t_irq12:TRAP_ENTRY_INTERRUPT(12)            /* IRQ Zilog serial chip         */
-t_irq13:TRAP_ENTRY_INTERRUPT(13)            /* IRQ Audio Intr.               */
-t_irq14:TRAP_ENTRY_INTERRUPT(14)            /* IRQ Timer #2                  */
-
-	.globl	t_nmi
-t_nmi:	TRAP_ENTRY(0x1f, linux_trap_ipi15_sun4m)
-
-t_racc:	TRAP_ENTRY(0x20, do_reg_access)     /* General Register Access Error */
-t_iacce:BAD_TRAP(0x21)                      /* Instr Access Error            */
-t_bad22:BAD_TRAP(0x22)
-	BAD_TRAP(0x23)
-t_cpdis:TRAP_ENTRY(0x24, do_cp_disabled)    /* Co-Processor Disabled         */
-t_uflsh:SKIP_TRAP(0x25, unimp_flush)        /* Unimplemented FLUSH inst.     */
-t_bad26:BAD_TRAP(0x26) BAD_TRAP(0x27)
-t_cpexc:TRAP_ENTRY(0x28, do_cp_exception)   /* Co-Processor Exception        */
-t_dacce:SRMMU_DFAULT                        /* Data Access Error             */
-t_hwdz:	TRAP_ENTRY(0x2a, do_hw_divzero)     /* Division by zero, you lose... */
-t_dserr:BAD_TRAP(0x2b)                      /* Data Store Error              */
-t_daccm:BAD_TRAP(0x2c)                      /* Data Access MMU-Miss          */
-t_bad2d:BAD_TRAP(0x2d) BAD_TRAP(0x2e) BAD_TRAP(0x2f) BAD_TRAP(0x30) BAD_TRAP(0x31)
-t_bad32:BAD_TRAP(0x32) BAD_TRAP(0x33) BAD_TRAP(0x34) BAD_TRAP(0x35) BAD_TRAP(0x36)
-t_bad37:BAD_TRAP(0x37) BAD_TRAP(0x38) BAD_TRAP(0x39) BAD_TRAP(0x3a) BAD_TRAP(0x3b)
-t_iaccm:BAD_TRAP(0x3c)                      /* Instr Access MMU-Miss         */
-t_bad3d:BAD_TRAP(0x3d) BAD_TRAP(0x3e) BAD_TRAP(0x3f) BAD_TRAP(0x40) BAD_TRAP(0x41)
-t_bad42:BAD_TRAP(0x42) BAD_TRAP(0x43) BAD_TRAP(0x44) BAD_TRAP(0x45) BAD_TRAP(0x46)
-t_bad47:BAD_TRAP(0x47) BAD_TRAP(0x48) BAD_TRAP(0x49) BAD_TRAP(0x4a) BAD_TRAP(0x4b)
-t_bad4c:BAD_TRAP(0x4c) BAD_TRAP(0x4d) BAD_TRAP(0x4e) BAD_TRAP(0x4f) BAD_TRAP(0x50)
-t_bad51:BAD_TRAP(0x51) BAD_TRAP(0x52) BAD_TRAP(0x53) BAD_TRAP(0x54) BAD_TRAP(0x55)
-t_bad56:BAD_TRAP(0x56) BAD_TRAP(0x57) BAD_TRAP(0x58) BAD_TRAP(0x59) BAD_TRAP(0x5a)
-t_bad5b:BAD_TRAP(0x5b) BAD_TRAP(0x5c) BAD_TRAP(0x5d) BAD_TRAP(0x5e) BAD_TRAP(0x5f)
-t_bad60:BAD_TRAP(0x60) BAD_TRAP(0x61) BAD_TRAP(0x62) BAD_TRAP(0x63) BAD_TRAP(0x64)
-t_bad65:BAD_TRAP(0x65) BAD_TRAP(0x66) BAD_TRAP(0x67) BAD_TRAP(0x68) BAD_TRAP(0x69)
-t_bad6a:BAD_TRAP(0x6a) BAD_TRAP(0x6b) BAD_TRAP(0x6c) BAD_TRAP(0x6d) BAD_TRAP(0x6e)
-t_bad6f:BAD_TRAP(0x6f) BAD_TRAP(0x70) BAD_TRAP(0x71) BAD_TRAP(0x72) BAD_TRAP(0x73)
-t_bad74:BAD_TRAP(0x74) BAD_TRAP(0x75) BAD_TRAP(0x76) BAD_TRAP(0x77) BAD_TRAP(0x78)
-t_bad79:BAD_TRAP(0x79) BAD_TRAP(0x7a) BAD_TRAP(0x7b) BAD_TRAP(0x7c) BAD_TRAP(0x7d)
-t_bad7e:BAD_TRAP(0x7e) BAD_TRAP(0x7f)
-t_bad80:BAD_TRAP(0x80)                      /* SunOS System Call             */
-t_sbkpt:BREAKPOINT_TRAP                     /* Software Breakpoint/KGDB      */
-t_divz:	TRAP_ENTRY(0x82, do_hw_divzero)     /* Divide by zero trap           */
-t_flwin:TRAP_ENTRY(0x83, do_flush_windows)  /* Flush Windows Trap            */
-t_clwin:BAD_TRAP(0x84)                      /* Clean Windows Trap            */
-t_rchk:	BAD_TRAP(0x85)                      /* Range Check                   */
-t_funal:BAD_TRAP(0x86)                      /* Fix Unaligned Access Trap     */
-t_iovf:	BAD_TRAP(0x87)                      /* Integer Overflow Trap         */
-t_bad88:BAD_TRAP(0x88)                      /* Slowaris System Call          */
-t_bad89:BAD_TRAP(0x89)                      /* Net-B.S. System Call          */
-t_bad8a:BAD_TRAP(0x8a) BAD_TRAP(0x8b) BAD_TRAP(0x8c) BAD_TRAP(0x8d) BAD_TRAP(0x8e)
-t_bad8f:BAD_TRAP(0x8f)
-t_linux:LINUX_SYSCALL_TRAP                  /* Linux System Call             */
-t_bad91:BAD_TRAP(0x91) BAD_TRAP(0x92) BAD_TRAP(0x93) BAD_TRAP(0x94) BAD_TRAP(0x95)
-t_bad96:BAD_TRAP(0x96) BAD_TRAP(0x97) BAD_TRAP(0x98) BAD_TRAP(0x99) BAD_TRAP(0x9a)
-t_bad9b:BAD_TRAP(0x9b) BAD_TRAP(0x9c) BAD_TRAP(0x9d) BAD_TRAP(0x9e) BAD_TRAP(0x9f)
-t_getcc:GETCC_TRAP                          /* Get Condition Codes           */
-t_setcc:SETCC_TRAP                          /* Set Condition Codes           */
-t_getpsr:GETPSR_TRAP                        /* Get PSR Register              */
-t_bada3:BAD_TRAP(0xa3) BAD_TRAP(0xa4) BAD_TRAP(0xa5) BAD_TRAP(0xa6)
-t_bada7:BAD_TRAP(0xa7)
-t_bada8:BAD_TRAP(0xa8) BAD_TRAP(0xa9) BAD_TRAP(0xaa) BAD_TRAP(0xab)
-t_badac:BAD_TRAP(0xac) BAD_TRAP(0xad) BAD_TRAP(0xae) BAD_TRAP(0xaf) BAD_TRAP(0xb0)
-t_badb1:BAD_TRAP(0xb1) BAD_TRAP(0xb2) BAD_TRAP(0xb3) BAD_TRAP(0xb4) BAD_TRAP(0xb5)
-t_badb6:BAD_TRAP(0xb6) BAD_TRAP(0xb7) BAD_TRAP(0xb8) BAD_TRAP(0xb9) BAD_TRAP(0xba)
-t_badbb:BAD_TRAP(0xbb) BAD_TRAP(0xbc) BAD_TRAP(0xbd) BAD_TRAP(0xbe) BAD_TRAP(0xbf)
-t_badc0:BAD_TRAP(0xc0) BAD_TRAP(0xc1) BAD_TRAP(0xc2) BAD_TRAP(0xc3) BAD_TRAP(0xc4)
-t_badc5:BAD_TRAP(0xc5) BAD_TRAP(0xc6) BAD_TRAP(0xc7) BAD_TRAP(0xc8) BAD_TRAP(0xc9)
-t_badca:BAD_TRAP(0xca) BAD_TRAP(0xcb) BAD_TRAP(0xcc) BAD_TRAP(0xcd) BAD_TRAP(0xce)
-t_badcf:BAD_TRAP(0xcf) BAD_TRAP(0xd0) BAD_TRAP(0xd1) BAD_TRAP(0xd2) BAD_TRAP(0xd3)
-t_badd4:BAD_TRAP(0xd4) BAD_TRAP(0xd5) BAD_TRAP(0xd6) BAD_TRAP(0xd7) BAD_TRAP(0xd8)
-t_badd9:BAD_TRAP(0xd9) BAD_TRAP(0xda) BAD_TRAP(0xdb) BAD_TRAP(0xdc) BAD_TRAP(0xdd)
-t_badde:BAD_TRAP(0xde) BAD_TRAP(0xdf) BAD_TRAP(0xe0) BAD_TRAP(0xe1) BAD_TRAP(0xe2)
-t_bade3:BAD_TRAP(0xe3) BAD_TRAP(0xe4) BAD_TRAP(0xe5) BAD_TRAP(0xe6) BAD_TRAP(0xe7)
-t_bade8:BAD_TRAP(0xe8) BAD_TRAP(0xe9) BAD_TRAP(0xea) BAD_TRAP(0xeb) BAD_TRAP(0xec)
-t_baded:BAD_TRAP(0xed) BAD_TRAP(0xee) BAD_TRAP(0xef) BAD_TRAP(0xf0) BAD_TRAP(0xf1)
-t_badf2:BAD_TRAP(0xf2) BAD_TRAP(0xf3) BAD_TRAP(0xf4) BAD_TRAP(0xf5) BAD_TRAP(0xf6)
-t_badf7:BAD_TRAP(0xf7) BAD_TRAP(0xf8) BAD_TRAP(0xf9) BAD_TRAP(0xfa) BAD_TRAP(0xfb)
-t_badfc:BAD_TRAP(0xfc)
-t_kgdb:	KGDB_TRAP(0xfd)
-dbtrap:	BAD_TRAP(0xfe)                      /* Debugger/PROM breakpoint #1   */
-dbtrap2:BAD_TRAP(0xff)                      /* Debugger/PROM breakpoint #2   */
-
-	.globl	end_traptable
-end_traptable:
-
-#ifdef CONFIG_SMP
-	/* Trap tables for the other cpus. */
-	.globl	trapbase_cpu1, trapbase_cpu2, trapbase_cpu3
-trapbase_cpu1:
-	BAD_TRAP(0x0)
-	SRMMU_TFAULT
-	TRAP_ENTRY(0x2, bad_instruction)
-	TRAP_ENTRY(0x3, priv_instruction)
-	TRAP_ENTRY(0x4, fpd_trap_handler)
-	WINDOW_SPILL
-	WINDOW_FILL
-	TRAP_ENTRY(0x7, mna_handler)
-	TRAP_ENTRY(0x8, fpe_trap_handler)
-	SRMMU_DFAULT
-	TRAP_ENTRY(0xa, do_tag_overflow)
-	TRAP_ENTRY(0xb, do_watchpoint)
-	BAD_TRAP(0xc) BAD_TRAP(0xd) BAD_TRAP(0xe) BAD_TRAP(0xf) BAD_TRAP(0x10)
-	TRAP_ENTRY_INTERRUPT(1) TRAP_ENTRY_INTERRUPT(2)
-	TRAP_ENTRY_INTERRUPT(3) TRAP_ENTRY_INTERRUPT(4)
-	TRAP_ENTRY_INTERRUPT(5) TRAP_ENTRY_INTERRUPT(6)
-	TRAP_ENTRY_INTERRUPT(7)	TRAP_ENTRY_INTERRUPT(8)
-	TRAP_ENTRY_INTERRUPT(9) TRAP_ENTRY_INTERRUPT(10)
-	TRAP_ENTRY_INTERRUPT(11) TRAP_ENTRY_INTERRUPT(12)
-	TRAP_ENTRY_INTERRUPT(13) TRAP_ENTRY_INTERRUPT(14)
-	TRAP_ENTRY(0x1f, linux_trap_ipi15_sun4m)
-	TRAP_ENTRY(0x20, do_reg_access)
-	BAD_TRAP(0x21)
-	BAD_TRAP(0x22)
-	BAD_TRAP(0x23)
-	TRAP_ENTRY(0x24, do_cp_disabled)
-	SKIP_TRAP(0x25, unimp_flush)
-	BAD_TRAP(0x26)
-	BAD_TRAP(0x27)
-	TRAP_ENTRY(0x28, do_cp_exception)
-	SRMMU_DFAULT
-	TRAP_ENTRY(0x2a, do_hw_divzero)
-	BAD_TRAP(0x2b)
-	BAD_TRAP(0x2c)
-	BAD_TRAP(0x2d) BAD_TRAP(0x2e) BAD_TRAP(0x2f) BAD_TRAP(0x30) BAD_TRAP(0x31)
-	BAD_TRAP(0x32) BAD_TRAP(0x33) BAD_TRAP(0x34) BAD_TRAP(0x35) BAD_TRAP(0x36)
-	BAD_TRAP(0x37) BAD_TRAP(0x38) BAD_TRAP(0x39) BAD_TRAP(0x3a) BAD_TRAP(0x3b)
-	BAD_TRAP(0x3c) BAD_TRAP(0x3d) BAD_TRAP(0x3e) BAD_TRAP(0x3f) BAD_TRAP(0x40)
-	BAD_TRAP(0x41) BAD_TRAP(0x42) BAD_TRAP(0x43) BAD_TRAP(0x44) BAD_TRAP(0x45)
-	BAD_TRAP(0x46) BAD_TRAP(0x47) BAD_TRAP(0x48) BAD_TRAP(0x49) BAD_TRAP(0x4a)
-	BAD_TRAP(0x4b) BAD_TRAP(0x4c) BAD_TRAP(0x4d) BAD_TRAP(0x4e) BAD_TRAP(0x4f)
-	BAD_TRAP(0x50)
-	BAD_TRAP(0x51) BAD_TRAP(0x52) BAD_TRAP(0x53) BAD_TRAP(0x54) BAD_TRAP(0x55)
-	BAD_TRAP(0x56) BAD_TRAP(0x57) BAD_TRAP(0x58) BAD_TRAP(0x59) BAD_TRAP(0x5a)
-	BAD_TRAP(0x5b) BAD_TRAP(0x5c) BAD_TRAP(0x5d) BAD_TRAP(0x5e) BAD_TRAP(0x5f)
-	BAD_TRAP(0x60) BAD_TRAP(0x61) BAD_TRAP(0x62) BAD_TRAP(0x63) BAD_TRAP(0x64)
-	BAD_TRAP(0x65) BAD_TRAP(0x66) BAD_TRAP(0x67) BAD_TRAP(0x68) BAD_TRAP(0x69)
-	BAD_TRAP(0x6a) BAD_TRAP(0x6b) BAD_TRAP(0x6c) BAD_TRAP(0x6d) BAD_TRAP(0x6e)
-	BAD_TRAP(0x6f) BAD_TRAP(0x70) BAD_TRAP(0x71) BAD_TRAP(0x72) BAD_TRAP(0x73)
-	BAD_TRAP(0x74) BAD_TRAP(0x75) BAD_TRAP(0x76) BAD_TRAP(0x77) BAD_TRAP(0x78)
-	BAD_TRAP(0x79) BAD_TRAP(0x7a) BAD_TRAP(0x7b) BAD_TRAP(0x7c) BAD_TRAP(0x7d)
-	BAD_TRAP(0x7e) BAD_TRAP(0x7f)
-	BAD_TRAP(0x80)
-	BREAKPOINT_TRAP
-	TRAP_ENTRY(0x82, do_hw_divzero)
-	TRAP_ENTRY(0x83, do_flush_windows)
-	BAD_TRAP(0x84) BAD_TRAP(0x85) BAD_TRAP(0x86)
-	BAD_TRAP(0x87) BAD_TRAP(0x88) BAD_TRAP(0x89)
-	BAD_TRAP(0x8a) BAD_TRAP(0x8b) BAD_TRAP(0x8c)
-	BAD_TRAP(0x8d) BAD_TRAP(0x8e) BAD_TRAP(0x8f)
-	LINUX_SYSCALL_TRAP BAD_TRAP(0x91)
-	BAD_TRAP(0x92) BAD_TRAP(0x93) BAD_TRAP(0x94)
-	BAD_TRAP(0x95) BAD_TRAP(0x96) BAD_TRAP(0x97) BAD_TRAP(0x98) BAD_TRAP(0x99)
-	BAD_TRAP(0x9a) BAD_TRAP(0x9b) BAD_TRAP(0x9c) BAD_TRAP(0x9d) BAD_TRAP(0x9e)
-	BAD_TRAP(0x9f)
-	GETCC_TRAP
-	SETCC_TRAP
-	GETPSR_TRAP
-	BAD_TRAP(0xa3) BAD_TRAP(0xa4) BAD_TRAP(0xa5) BAD_TRAP(0xa6)
-	BAD_TRAP(0xa7) BAD_TRAP(0xa8) BAD_TRAP(0xa9) BAD_TRAP(0xaa) BAD_TRAP(0xab)
-	BAD_TRAP(0xac) BAD_TRAP(0xad) BAD_TRAP(0xae) BAD_TRAP(0xaf) BAD_TRAP(0xb0)
-	BAD_TRAP(0xb1) BAD_TRAP(0xb2) BAD_TRAP(0xb3) BAD_TRAP(0xb4) BAD_TRAP(0xb5)
-	BAD_TRAP(0xb6) BAD_TRAP(0xb7) BAD_TRAP(0xb8) BAD_TRAP(0xb9) BAD_TRAP(0xba)
-	BAD_TRAP(0xbb) BAD_TRAP(0xbc) BAD_TRAP(0xbd) BAD_TRAP(0xbe) BAD_TRAP(0xbf)
-	BAD_TRAP(0xc0) BAD_TRAP(0xc1) BAD_TRAP(0xc2) BAD_TRAP(0xc3) BAD_TRAP(0xc4)
-	BAD_TRAP(0xc5) BAD_TRAP(0xc6) BAD_TRAP(0xc7) BAD_TRAP(0xc8) BAD_TRAP(0xc9)
-	BAD_TRAP(0xca) BAD_TRAP(0xcb) BAD_TRAP(0xcc) BAD_TRAP(0xcd) BAD_TRAP(0xce)
-	BAD_TRAP(0xcf) BAD_TRAP(0xd0) BAD_TRAP(0xd1) BAD_TRAP(0xd2) BAD_TRAP(0xd3)
-	BAD_TRAP(0xd4) BAD_TRAP(0xd5) BAD_TRAP(0xd6) BAD_TRAP(0xd7) BAD_TRAP(0xd8)
-	BAD_TRAP(0xd9) BAD_TRAP(0xda) BAD_TRAP(0xdb) BAD_TRAP(0xdc) BAD_TRAP(0xdd)
-	BAD_TRAP(0xde) BAD_TRAP(0xdf) BAD_TRAP(0xe0) BAD_TRAP(0xe1) BAD_TRAP(0xe2)
-	BAD_TRAP(0xe3) BAD_TRAP(0xe4) BAD_TRAP(0xe5) BAD_TRAP(0xe6) BAD_TRAP(0xe7)
-	BAD_TRAP(0xe8) BAD_TRAP(0xe9) BAD_TRAP(0xea) BAD_TRAP(0xeb) BAD_TRAP(0xec)
-	BAD_TRAP(0xed) BAD_TRAP(0xee) BAD_TRAP(0xef) BAD_TRAP(0xf0) BAD_TRAP(0xf1)
-	BAD_TRAP(0xf2) BAD_TRAP(0xf3) BAD_TRAP(0xf4) BAD_TRAP(0xf5) BAD_TRAP(0xf6)
-	BAD_TRAP(0xf7) BAD_TRAP(0xf8) BAD_TRAP(0xf9) BAD_TRAP(0xfa) BAD_TRAP(0xfb)
-	BAD_TRAP(0xfc)
-	KGDB_TRAP(0xfd)
-	BAD_TRAP(0xfe)
-	BAD_TRAP(0xff)
-
-trapbase_cpu2:
-	BAD_TRAP(0x0)
-	SRMMU_TFAULT
-	TRAP_ENTRY(0x2, bad_instruction)
-	TRAP_ENTRY(0x3, priv_instruction)
-	TRAP_ENTRY(0x4, fpd_trap_handler)
-	WINDOW_SPILL
-	WINDOW_FILL
-	TRAP_ENTRY(0x7, mna_handler)
-	TRAP_ENTRY(0x8, fpe_trap_handler)
-	SRMMU_DFAULT
-	TRAP_ENTRY(0xa, do_tag_overflow)
-	TRAP_ENTRY(0xb, do_watchpoint)
-	BAD_TRAP(0xc) BAD_TRAP(0xd) BAD_TRAP(0xe) BAD_TRAP(0xf) BAD_TRAP(0x10)
-	TRAP_ENTRY_INTERRUPT(1)
-	TRAP_ENTRY_INTERRUPT(2)
-	TRAP_ENTRY_INTERRUPT(3)
-	TRAP_ENTRY_INTERRUPT(4)
-	TRAP_ENTRY_INTERRUPT(5)
-	TRAP_ENTRY_INTERRUPT(6)
-	TRAP_ENTRY_INTERRUPT(7)
-	TRAP_ENTRY_INTERRUPT(8)
-	TRAP_ENTRY_INTERRUPT(9)
-	TRAP_ENTRY_INTERRUPT(10)
-	TRAP_ENTRY_INTERRUPT(11)
-	TRAP_ENTRY_INTERRUPT(12)
-	TRAP_ENTRY_INTERRUPT(13)
-	TRAP_ENTRY_INTERRUPT(14)
-	TRAP_ENTRY(0x1f, linux_trap_ipi15_sun4m)
-	TRAP_ENTRY(0x20, do_reg_access)
-	BAD_TRAP(0x21)
-	BAD_TRAP(0x22)
-	BAD_TRAP(0x23)
-	TRAP_ENTRY(0x24, do_cp_disabled)
-	SKIP_TRAP(0x25, unimp_flush)
-	BAD_TRAP(0x26)
-	BAD_TRAP(0x27)
-	TRAP_ENTRY(0x28, do_cp_exception)
-	SRMMU_DFAULT
-	TRAP_ENTRY(0x2a, do_hw_divzero)
-	BAD_TRAP(0x2b)
-	BAD_TRAP(0x2c)
-	BAD_TRAP(0x2d) BAD_TRAP(0x2e) BAD_TRAP(0x2f) BAD_TRAP(0x30) BAD_TRAP(0x31)
-	BAD_TRAP(0x32) BAD_TRAP(0x33) BAD_TRAP(0x34) BAD_TRAP(0x35) BAD_TRAP(0x36)
-	BAD_TRAP(0x37) BAD_TRAP(0x38) BAD_TRAP(0x39) BAD_TRAP(0x3a) BAD_TRAP(0x3b)
-	BAD_TRAP(0x3c) BAD_TRAP(0x3d) BAD_TRAP(0x3e) BAD_TRAP(0x3f) BAD_TRAP(0x40)
-	BAD_TRAP(0x41) BAD_TRAP(0x42) BAD_TRAP(0x43) BAD_TRAP(0x44) BAD_TRAP(0x45)
-	BAD_TRAP(0x46) BAD_TRAP(0x47) BAD_TRAP(0x48) BAD_TRAP(0x49) BAD_TRAP(0x4a)
-	BAD_TRAP(0x4b) BAD_TRAP(0x4c) BAD_TRAP(0x4d) BAD_TRAP(0x4e) BAD_TRAP(0x4f)
-	BAD_TRAP(0x50)
-	BAD_TRAP(0x51) BAD_TRAP(0x52) BAD_TRAP(0x53) BAD_TRAP(0x54) BAD_TRAP(0x55)
-	BAD_TRAP(0x56) BAD_TRAP(0x57) BAD_TRAP(0x58) BAD_TRAP(0x59) BAD_TRAP(0x5a)
-	BAD_TRAP(0x5b) BAD_TRAP(0x5c) BAD_TRAP(0x5d) BAD_TRAP(0x5e) BAD_TRAP(0x5f)
-	BAD_TRAP(0x60) BAD_TRAP(0x61) BAD_TRAP(0x62) BAD_TRAP(0x63) BAD_TRAP(0x64)
-	BAD_TRAP(0x65) BAD_TRAP(0x66) BAD_TRAP(0x67) BAD_TRAP(0x68) BAD_TRAP(0x69)
-	BAD_TRAP(0x6a) BAD_TRAP(0x6b) BAD_TRAP(0x6c) BAD_TRAP(0x6d) BAD_TRAP(0x6e)
-	BAD_TRAP(0x6f) BAD_TRAP(0x70) BAD_TRAP(0x71) BAD_TRAP(0x72) BAD_TRAP(0x73)
-	BAD_TRAP(0x74) BAD_TRAP(0x75) BAD_TRAP(0x76) BAD_TRAP(0x77) BAD_TRAP(0x78)
-	BAD_TRAP(0x79) BAD_TRAP(0x7a) BAD_TRAP(0x7b) BAD_TRAP(0x7c) BAD_TRAP(0x7d)
-	BAD_TRAP(0x7e) BAD_TRAP(0x7f)
-	BAD_TRAP(0x80)
-	BREAKPOINT_TRAP
-	TRAP_ENTRY(0x82, do_hw_divzero)
-	TRAP_ENTRY(0x83, do_flush_windows)
-	BAD_TRAP(0x84)
-	BAD_TRAP(0x85)
-	BAD_TRAP(0x86) BAD_TRAP(0x87) BAD_TRAP(0x88)
-	BAD_TRAP(0x89) BAD_TRAP(0x8a) BAD_TRAP(0x8b) BAD_TRAP(0x8c)
-	BAD_TRAP(0x8d) BAD_TRAP(0x8e) BAD_TRAP(0x8f)
-	LINUX_SYSCALL_TRAP BAD_TRAP(0x91)
-	BAD_TRAP(0x92) BAD_TRAP(0x93) BAD_TRAP(0x94)
-	BAD_TRAP(0x95) BAD_TRAP(0x96) BAD_TRAP(0x97) BAD_TRAP(0x98) BAD_TRAP(0x99)
-	BAD_TRAP(0x9a) BAD_TRAP(0x9b) BAD_TRAP(0x9c) BAD_TRAP(0x9d) BAD_TRAP(0x9e)
-	BAD_TRAP(0x9f)
-	GETCC_TRAP
-	SETCC_TRAP
-	GETPSR_TRAP
-	BAD_TRAP(0xa3) BAD_TRAP(0xa4) BAD_TRAP(0xa5) BAD_TRAP(0xa6)
-	BAD_TRAP(0xa7) BAD_TRAP(0xa8) BAD_TRAP(0xa9) BAD_TRAP(0xaa) BAD_TRAP(0xab)
-	BAD_TRAP(0xac) BAD_TRAP(0xad) BAD_TRAP(0xae) BAD_TRAP(0xaf) BAD_TRAP(0xb0)
-	BAD_TRAP(0xb1) BAD_TRAP(0xb2) BAD_TRAP(0xb3) BAD_TRAP(0xb4) BAD_TRAP(0xb5)
-	BAD_TRAP(0xb6) BAD_TRAP(0xb7) BAD_TRAP(0xb8) BAD_TRAP(0xb9) BAD_TRAP(0xba)
-	BAD_TRAP(0xbb) BAD_TRAP(0xbc) BAD_TRAP(0xbd) BAD_TRAP(0xbe) BAD_TRAP(0xbf)
-	BAD_TRAP(0xc0) BAD_TRAP(0xc1) BAD_TRAP(0xc2) BAD_TRAP(0xc3) BAD_TRAP(0xc4)
-	BAD_TRAP(0xc5) BAD_TRAP(0xc6) BAD_TRAP(0xc7) BAD_TRAP(0xc8) BAD_TRAP(0xc9)
-	BAD_TRAP(0xca) BAD_TRAP(0xcb) BAD_TRAP(0xcc) BAD_TRAP(0xcd) BAD_TRAP(0xce)
-	BAD_TRAP(0xcf) BAD_TRAP(0xd0) BAD_TRAP(0xd1) BAD_TRAP(0xd2) BAD_TRAP(0xd3)
-	BAD_TRAP(0xd4) BAD_TRAP(0xd5) BAD_TRAP(0xd6) BAD_TRAP(0xd7) BAD_TRAP(0xd8)
-	BAD_TRAP(0xd9) BAD_TRAP(0xda) BAD_TRAP(0xdb) BAD_TRAP(0xdc) BAD_TRAP(0xdd)
-	BAD_TRAP(0xde) BAD_TRAP(0xdf) BAD_TRAP(0xe0) BAD_TRAP(0xe1) BAD_TRAP(0xe2)
-	BAD_TRAP(0xe3) BAD_TRAP(0xe4) BAD_TRAP(0xe5) BAD_TRAP(0xe6) BAD_TRAP(0xe7)
-	BAD_TRAP(0xe8) BAD_TRAP(0xe9) BAD_TRAP(0xea) BAD_TRAP(0xeb) BAD_TRAP(0xec)
-	BAD_TRAP(0xed) BAD_TRAP(0xee) BAD_TRAP(0xef) BAD_TRAP(0xf0) BAD_TRAP(0xf1)
-	BAD_TRAP(0xf2) BAD_TRAP(0xf3) BAD_TRAP(0xf4) BAD_TRAP(0xf5) BAD_TRAP(0xf6)
-	BAD_TRAP(0xf7) BAD_TRAP(0xf8) BAD_TRAP(0xf9) BAD_TRAP(0xfa) BAD_TRAP(0xfb)
-	BAD_TRAP(0xfc)
-	KGDB_TRAP(0xfd)
-	BAD_TRAP(0xfe)
-	BAD_TRAP(0xff)
-
-trapbase_cpu3:
-	BAD_TRAP(0x0)
-	SRMMU_TFAULT
-	TRAP_ENTRY(0x2, bad_instruction)
-	TRAP_ENTRY(0x3, priv_instruction)
-	TRAP_ENTRY(0x4, fpd_trap_handler)
-	WINDOW_SPILL
-	WINDOW_FILL
-	TRAP_ENTRY(0x7, mna_handler)
-	TRAP_ENTRY(0x8, fpe_trap_handler)
-	SRMMU_DFAULT
-	TRAP_ENTRY(0xa, do_tag_overflow)
-	TRAP_ENTRY(0xb, do_watchpoint)
-	BAD_TRAP(0xc) BAD_TRAP(0xd) BAD_TRAP(0xe) BAD_TRAP(0xf) BAD_TRAP(0x10)
-	TRAP_ENTRY_INTERRUPT(1)
-	TRAP_ENTRY_INTERRUPT(2)
-	TRAP_ENTRY_INTERRUPT(3)
-	TRAP_ENTRY_INTERRUPT(4)
-	TRAP_ENTRY_INTERRUPT(5)
-	TRAP_ENTRY_INTERRUPT(6)
-	TRAP_ENTRY_INTERRUPT(7)
-	TRAP_ENTRY_INTERRUPT(8)
-	TRAP_ENTRY_INTERRUPT(9)
-	TRAP_ENTRY_INTERRUPT(10)
-	TRAP_ENTRY_INTERRUPT(11)
-	TRAP_ENTRY_INTERRUPT(12)
-	TRAP_ENTRY_INTERRUPT(13)
-	TRAP_ENTRY_INTERRUPT(14)
-	TRAP_ENTRY(0x1f, linux_trap_ipi15_sun4m)
-	TRAP_ENTRY(0x20, do_reg_access)
-	BAD_TRAP(0x21)
-	BAD_TRAP(0x22)
-	BAD_TRAP(0x23)
-	TRAP_ENTRY(0x24, do_cp_disabled)
-	SKIP_TRAP(0x25, unimp_flush)
-	BAD_TRAP(0x26)
-	BAD_TRAP(0x27)
-	TRAP_ENTRY(0x28, do_cp_exception)
-	SRMMU_DFAULT
-	TRAP_ENTRY(0x2a, do_hw_divzero)
-	BAD_TRAP(0x2b) BAD_TRAP(0x2c)
-	BAD_TRAP(0x2d) BAD_TRAP(0x2e) BAD_TRAP(0x2f) BAD_TRAP(0x30) BAD_TRAP(0x31)
-	BAD_TRAP(0x32) BAD_TRAP(0x33) BAD_TRAP(0x34) BAD_TRAP(0x35) BAD_TRAP(0x36)
-	BAD_TRAP(0x37) BAD_TRAP(0x38) BAD_TRAP(0x39) BAD_TRAP(0x3a) BAD_TRAP(0x3b)
-	BAD_TRAP(0x3c) BAD_TRAP(0x3d) BAD_TRAP(0x3e) BAD_TRAP(0x3f) BAD_TRAP(0x40)
-	BAD_TRAP(0x41) BAD_TRAP(0x42) BAD_TRAP(0x43) BAD_TRAP(0x44) BAD_TRAP(0x45)
-	BAD_TRAP(0x46) BAD_TRAP(0x47) BAD_TRAP(0x48) BAD_TRAP(0x49) BAD_TRAP(0x4a)
-	BAD_TRAP(0x4b) BAD_TRAP(0x4c) BAD_TRAP(0x4d) BAD_TRAP(0x4e) BAD_TRAP(0x4f)
-	BAD_TRAP(0x50)
-	BAD_TRAP(0x51) BAD_TRAP(0x52) BAD_TRAP(0x53) BAD_TRAP(0x54) BAD_TRAP(0x55)
-	BAD_TRAP(0x56) BAD_TRAP(0x57) BAD_TRAP(0x58) BAD_TRAP(0x59) BAD_TRAP(0x5a)
-	BAD_TRAP(0x5b) BAD_TRAP(0x5c) BAD_TRAP(0x5d) BAD_TRAP(0x5e) BAD_TRAP(0x5f)
-	BAD_TRAP(0x60) BAD_TRAP(0x61) BAD_TRAP(0x62) BAD_TRAP(0x63) BAD_TRAP(0x64)
-	BAD_TRAP(0x65) BAD_TRAP(0x66) BAD_TRAP(0x67) BAD_TRAP(0x68) BAD_TRAP(0x69)
-	BAD_TRAP(0x6a) BAD_TRAP(0x6b) BAD_TRAP(0x6c) BAD_TRAP(0x6d) BAD_TRAP(0x6e)
-	BAD_TRAP(0x6f) BAD_TRAP(0x70) BAD_TRAP(0x71) BAD_TRAP(0x72) BAD_TRAP(0x73)
-	BAD_TRAP(0x74) BAD_TRAP(0x75) BAD_TRAP(0x76) BAD_TRAP(0x77) BAD_TRAP(0x78)
-	BAD_TRAP(0x79) BAD_TRAP(0x7a) BAD_TRAP(0x7b) BAD_TRAP(0x7c) BAD_TRAP(0x7d)
-	BAD_TRAP(0x7e) BAD_TRAP(0x7f)
-	BAD_TRAP(0x80)
-	BREAKPOINT_TRAP
-	TRAP_ENTRY(0x82, do_hw_divzero)
-	TRAP_ENTRY(0x83, do_flush_windows)
-	BAD_TRAP(0x84) BAD_TRAP(0x85)
-	BAD_TRAP(0x86) BAD_TRAP(0x87) BAD_TRAP(0x88)
-	BAD_TRAP(0x89) BAD_TRAP(0x8a) BAD_TRAP(0x8b) BAD_TRAP(0x8c)
-	BAD_TRAP(0x8d) BAD_TRAP(0x8e) BAD_TRAP(0x8f)
-	LINUX_SYSCALL_TRAP
-	BAD_TRAP(0x91) BAD_TRAP(0x92) BAD_TRAP(0x93) BAD_TRAP(0x94)
-	BAD_TRAP(0x95) BAD_TRAP(0x96) BAD_TRAP(0x97) BAD_TRAP(0x98) BAD_TRAP(0x99)
-	BAD_TRAP(0x9a) BAD_TRAP(0x9b) BAD_TRAP(0x9c) BAD_TRAP(0x9d) BAD_TRAP(0x9e)
-	BAD_TRAP(0x9f)
-	GETCC_TRAP
-	SETCC_TRAP
-	GETPSR_TRAP
-	BAD_TRAP(0xa3) BAD_TRAP(0xa4) BAD_TRAP(0xa5) BAD_TRAP(0xa6)
-	BAD_TRAP(0xa7) BAD_TRAP(0xa8) BAD_TRAP(0xa9) BAD_TRAP(0xaa) BAD_TRAP(0xab)
-	BAD_TRAP(0xac) BAD_TRAP(0xad) BAD_TRAP(0xae) BAD_TRAP(0xaf) BAD_TRAP(0xb0)
-	BAD_TRAP(0xb1) BAD_TRAP(0xb2) BAD_TRAP(0xb3) BAD_TRAP(0xb4) BAD_TRAP(0xb5)
-	BAD_TRAP(0xb6) BAD_TRAP(0xb7) BAD_TRAP(0xb8) BAD_TRAP(0xb9) BAD_TRAP(0xba)
-	BAD_TRAP(0xbb) BAD_TRAP(0xbc) BAD_TRAP(0xbd) BAD_TRAP(0xbe) BAD_TRAP(0xbf)
-	BAD_TRAP(0xc0) BAD_TRAP(0xc1) BAD_TRAP(0xc2) BAD_TRAP(0xc3) BAD_TRAP(0xc4)
-	BAD_TRAP(0xc5) BAD_TRAP(0xc6) BAD_TRAP(0xc7) BAD_TRAP(0xc8) BAD_TRAP(0xc9)
-	BAD_TRAP(0xca) BAD_TRAP(0xcb) BAD_TRAP(0xcc) BAD_TRAP(0xcd) BAD_TRAP(0xce)
-	BAD_TRAP(0xcf) BAD_TRAP(0xd0) BAD_TRAP(0xd1) BAD_TRAP(0xd2) BAD_TRAP(0xd3)
-	BAD_TRAP(0xd4) BAD_TRAP(0xd5) BAD_TRAP(0xd6) BAD_TRAP(0xd7) BAD_TRAP(0xd8)
-	BAD_TRAP(0xd9) BAD_TRAP(0xda) BAD_TRAP(0xdb) BAD_TRAP(0xdc) BAD_TRAP(0xdd)
-	BAD_TRAP(0xde) BAD_TRAP(0xdf) BAD_TRAP(0xe0) BAD_TRAP(0xe1) BAD_TRAP(0xe2)
-	BAD_TRAP(0xe3) BAD_TRAP(0xe4) BAD_TRAP(0xe5) BAD_TRAP(0xe6) BAD_TRAP(0xe7)
-	BAD_TRAP(0xe8) BAD_TRAP(0xe9) BAD_TRAP(0xea) BAD_TRAP(0xeb) BAD_TRAP(0xec)
-	BAD_TRAP(0xed) BAD_TRAP(0xee) BAD_TRAP(0xef) BAD_TRAP(0xf0) BAD_TRAP(0xf1)
-	BAD_TRAP(0xf2) BAD_TRAP(0xf3) BAD_TRAP(0xf4) BAD_TRAP(0xf5) BAD_TRAP(0xf6)
-	BAD_TRAP(0xf7) BAD_TRAP(0xf8) BAD_TRAP(0xf9) BAD_TRAP(0xfa) BAD_TRAP(0xfb)
-	BAD_TRAP(0xfc)
-	KGDB_TRAP(0xfd)
-	BAD_TRAP(0xfe)
-	BAD_TRAP(0xff)
-
-#endif
diff --git a/arch/sparc/kernel/ttable_64.S b/arch/sparc/kernel/ttable_64.S
deleted file mode 100644
index 86e737e59c7e1d10eb16d4aab800556b98b36b05..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/ttable_64.S
+++ /dev/null
@@ -1,275 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* ttable.S: Sparc V9 Trap Table(s) with SpitFire/Cheetah/SUN4V extensions.
- *
- * Copyright (C) 1996, 2001, 2006 David S. Miller (davem@davemloft.net)
- */
-
-
-	.globl	sparc64_ttable_tl0, sparc64_ttable_tl1
-	.globl	tl0_icpe, tl1_icpe
-	.globl	tl0_dcpe, tl1_dcpe
-	.globl	tl0_fecc, tl1_fecc
-	.globl	tl0_cee, tl1_cee
-	.globl	tl0_iae, tl1_iae
-	.globl	tl0_dae, tl1_dae
-
-sparc64_ttable_tl0:
-tl0_resv000:	BOOT_KERNEL BTRAP(0x1) BTRAP(0x2) BTRAP(0x3)
-tl0_resv004:	BTRAP(0x4)  BTRAP(0x5) BTRAP(0x6) BTRAP(0x7)
-tl0_iax:	membar #Sync
-		TRAP_NOSAVE_7INSNS(__spitfire_insn_access_exception)
-tl0_itsb_4v:	SUN4V_ITSB_MISS
-tl0_iae:	membar #Sync
-		TRAP_NOSAVE_7INSNS(__spitfire_access_error)
-tl0_resv00b:	BTRAP(0xb) BTRAP(0xc) BTRAP(0xd) BTRAP(0xe) BTRAP(0xf)
-tl0_ill:	membar #Sync
-		TRAP_7INSNS(do_illegal_instruction)
-tl0_privop:	TRAP(do_privop)
-tl0_resv012:	BTRAP(0x12) BTRAP(0x13) BTRAP(0x14) BTRAP(0x15) BTRAP(0x16) BTRAP(0x17)
-tl0_resv018:	BTRAP(0x18) BTRAP(0x19)
-tl0_mcd:	SUN4V_MCD_PRECISE
-tl0_resv01b:	BTRAP(0x1b)
-tl0_resv01c:	BTRAP(0x1c) BTRAP(0x1d)	BTRAP(0x1e) BTRAP(0x1f)
-tl0_fpdis:	TRAP_NOSAVE(do_fpdis)
-tl0_fpieee:	TRAP_SAVEFPU(do_fpieee)
-tl0_fpother:	TRAP_NOSAVE(do_fpother_check_fitos)
-tl0_tof:	TRAP(do_tof)
-tl0_cwin:	CLEAN_WINDOW
-tl0_div0:	TRAP(do_div0)
-tl0_resv029:	BTRAP(0x29) BTRAP(0x2a) BTRAP(0x2b) BTRAP(0x2c) BTRAP(0x2d) BTRAP(0x2e)
-tl0_resv02f:	BTRAP(0x2f)
-tl0_dax:	TRAP_NOSAVE(__spitfire_data_access_exception)
-tl0_dtsb_4v:	SUN4V_DTSB_MISS
-tl0_dae:	membar #Sync
-		TRAP_NOSAVE_7INSNS(__spitfire_access_error)
-tl0_resv033:	BTRAP(0x33)
-tl0_mna:	TRAP_NOSAVE(do_mna)
-tl0_lddfmna:	TRAP_NOSAVE(do_lddfmna)
-tl0_stdfmna:	TRAP_NOSAVE(do_stdfmna)
-tl0_privact:	TRAP_NOSAVE(__do_privact)
-tl0_resv038:	BTRAP(0x38) BTRAP(0x39) BTRAP(0x3a) BTRAP(0x3b) BTRAP(0x3c) BTRAP(0x3d)
-tl0_resv03e:	BTRAP(0x3e) BTRAP(0x3f) BTRAP(0x40)
-#ifdef CONFIG_SMP
-tl0_irq1:	TRAP_IRQ(smp_call_function_client, 1)
-tl0_irq2:	TRAP_IRQ(smp_receive_signal_client, 2)
-tl0_irq3:	TRAP_IRQ(smp_penguin_jailcell, 3)
-tl0_irq4:       BTRAP(0x44)
-#else
-tl0_irq1:	BTRAP(0x41)
-tl0_irq2:	BTRAP(0x42)
-tl0_irq3:	BTRAP(0x43)
-tl0_irq4:	BTRAP(0x44)
-#endif
-tl0_irq5:	TRAP_IRQ(handler_irq, 5)
-#ifdef CONFIG_SMP
-tl0_irq6:	TRAP_IRQ(smp_call_function_single_client, 6)
-#else
-tl0_irq6:	BTRAP(0x46)
-#endif
-tl0_irq7:	TRAP_IRQ(deferred_pcr_work_irq, 7)
-#if defined(CONFIG_KGDB) && defined(CONFIG_SMP)
-tl0_irq8:	TRAP_IRQ(smp_kgdb_capture_client, 8)
-#else
-tl0_irq8:	BTRAP(0x48)
-#endif
-tl0_irq9:	BTRAP(0x49)
-tl0_irq10:	BTRAP(0x4a) BTRAP(0x4b) BTRAP(0x4c) BTRAP(0x4d)
-tl0_irq14:	TRAP_IRQ(timer_interrupt, 14)
-tl0_irq15:	TRAP_NMI_IRQ(perfctr_irq, 15)
-tl0_resv050:	BTRAP(0x50) BTRAP(0x51) BTRAP(0x52) BTRAP(0x53) BTRAP(0x54) BTRAP(0x55)
-tl0_resv056:	BTRAP(0x56) BTRAP(0x57) BTRAP(0x58) BTRAP(0x59) BTRAP(0x5a) BTRAP(0x5b)
-tl0_resv05c:	BTRAP(0x5c) BTRAP(0x5d) BTRAP(0x5e) BTRAP(0x5f)
-tl0_ivec:	TRAP_IVEC
-tl0_paw:	TRAP(do_paw)
-tl0_vaw:	TRAP(do_vaw)
-tl0_cee:	membar #Sync
-		TRAP_NOSAVE_7INSNS(__spitfire_cee_trap)
-tl0_iamiss:
-#include	"itlb_miss.S"
-tl0_damiss:
-#include	"dtlb_miss.S"
-tl0_daprot:
-#include	"dtlb_prot.S"
-tl0_fecc:	BTRAP(0x70)	/* Fast-ECC on Cheetah */
-tl0_dcpe:	BTRAP(0x71)	/* D-cache Parity Error on Cheetah+ */
-tl0_icpe:	BTRAP(0x72)	/* I-cache Parity Error on Cheetah+ */
-tl0_resv073:	BTRAP(0x73) BTRAP(0x74) BTRAP(0x75)
-tl0_resv076:	BTRAP(0x76) BTRAP(0x77) BTRAP(0x78) BTRAP(0x79) BTRAP(0x7a) BTRAP(0x7b)
-tl0_cpu_mondo:	TRAP_NOSAVE(sun4v_cpu_mondo)
-tl0_dev_mondo:	TRAP_NOSAVE(sun4v_dev_mondo)
-tl0_res_mondo:	TRAP_NOSAVE(sun4v_res_mondo)
-tl0_nres_mondo:	TRAP_NOSAVE(sun4v_nonres_mondo)
-tl0_s0n:	SPILL_0_NORMAL
-tl0_s1n:	SPILL_1_NORMAL
-tl0_s2n:	SPILL_2_NORMAL
-tl0_s3n:	SPILL_0_NORMAL_ETRAP
-tl0_s4n:	SPILL_1_GENERIC_ETRAP
-tl0_s5n:	SPILL_1_GENERIC_ETRAP_FIXUP
-tl0_s6n:	SPILL_2_GENERIC_ETRAP
-tl0_s7n:	SPILL_2_GENERIC_ETRAP_FIXUP
-tl0_s0o:	SPILL_0_OTHER
-tl0_s1o:	SPILL_1_OTHER
-tl0_s2o:	SPILL_2_OTHER
-tl0_s3o:	SPILL_3_OTHER
-tl0_s4o:	SPILL_4_OTHER
-tl0_s5o:	SPILL_5_OTHER
-tl0_s6o:	SPILL_6_OTHER
-tl0_s7o:	SPILL_7_OTHER
-tl0_f0n:	FILL_0_NORMAL
-tl0_f1n:	FILL_1_NORMAL
-tl0_f2n:	FILL_2_NORMAL
-tl0_f3n:	FILL_3_NORMAL
-tl0_f4n:	FILL_4_NORMAL
-tl0_f5n:	FILL_0_NORMAL_RTRAP
-tl0_f6n:	FILL_1_GENERIC_RTRAP
-tl0_f7n:	FILL_2_GENERIC_RTRAP
-tl0_f0o:	FILL_0_OTHER
-tl0_f1o:	FILL_1_OTHER
-tl0_f2o:	FILL_2_OTHER
-tl0_f3o:	FILL_3_OTHER
-tl0_f4o:	FILL_4_OTHER
-tl0_f5o:	FILL_5_OTHER
-tl0_f6o:	FILL_6_OTHER
-tl0_f7o:	FILL_7_OTHER
-tl0_resv100:	BTRAP(0x100)
-tl0_bkpt:	BREAKPOINT_TRAP
-tl0_divz:	TRAP(do_div0)
-tl0_flushw:	FLUSH_WINDOW_TRAP
-tl0_resv104:	BTRAP(0x104) BTRAP(0x105) BTRAP(0x106) BTRAP(0x107) BTRAP(0x108)
-tl0_resv109:	BTRAP(0x109) BTRAP(0x10a) BTRAP(0x10b) BTRAP(0x10c) BTRAP(0x10d)
-tl0_resv10e:	BTRAP(0x10e) BTRAP(0x10f)
-tl0_linux32:	LINUX_32BIT_SYSCALL_TRAP
-tl0_oldlinux64:	LINUX_64BIT_SYSCALL_TRAP
-tl0_resv112:	TRAP_UTRAP(UT_TRAP_INSTRUCTION_18,0x112) TRAP_UTRAP(UT_TRAP_INSTRUCTION_19,0x113)
-tl0_resv114:	TRAP_UTRAP(UT_TRAP_INSTRUCTION_20,0x114) TRAP_UTRAP(UT_TRAP_INSTRUCTION_21,0x115)
-tl0_resv116:	TRAP_UTRAP(UT_TRAP_INSTRUCTION_22,0x116) TRAP_UTRAP(UT_TRAP_INSTRUCTION_23,0x117)
-tl0_resv118:	TRAP_UTRAP(UT_TRAP_INSTRUCTION_24,0x118) TRAP_UTRAP(UT_TRAP_INSTRUCTION_25,0x119)
-tl0_resv11a:	TRAP_UTRAP(UT_TRAP_INSTRUCTION_26,0x11a) TRAP_UTRAP(UT_TRAP_INSTRUCTION_27,0x11b)
-tl0_resv11c:	TRAP_UTRAP(UT_TRAP_INSTRUCTION_28,0x11c) TRAP_UTRAP(UT_TRAP_INSTRUCTION_29,0x11d)
-tl0_resv11e:	TRAP_UTRAP(UT_TRAP_INSTRUCTION_30,0x11e) TRAP_UTRAP(UT_TRAP_INSTRUCTION_31,0x11f)
-tl0_getcc:	GETCC_TRAP
-tl0_setcc:	SETCC_TRAP
-tl0_getpsr:	TRAP(do_getpsr)
-tl0_resv123:	BTRAP(0x123) BTRAP(0x124) BTRAP(0x125) BTRAP(0x126) BTRAP(0x127)
-tl0_resv128:	BTRAP(0x128) BTRAP(0x129) BTRAP(0x12a) BTRAP(0x12b) BTRAP(0x12c)
-tl0_resv12d:	BTRAP(0x12d) BTRAP(0x12e) BTRAP(0x12f) BTRAP(0x130) BTRAP(0x131)
-tl0_resv132:	BTRAP(0x132) BTRAP(0x133) BTRAP(0x134) BTRAP(0x135) BTRAP(0x136)
-tl0_resv137:	BTRAP(0x137) BTRAP(0x138) BTRAP(0x139) BTRAP(0x13a) BTRAP(0x13b)
-tl0_resv13c:	BTRAP(0x13c) BTRAP(0x13d) BTRAP(0x13e) BTRAP(0x13f) BTRAP(0x140)
-tl0_resv141:	BTRAP(0x141) BTRAP(0x142) BTRAP(0x143) BTRAP(0x144) BTRAP(0x145)
-tl0_resv146:	BTRAP(0x146) BTRAP(0x147) BTRAP(0x148) BTRAP(0x149) BTRAP(0x14a)
-tl0_resv14b:	BTRAP(0x14b) BTRAP(0x14c) BTRAP(0x14d) BTRAP(0x14e) BTRAP(0x14f)
-tl0_resv150:	BTRAP(0x150) BTRAP(0x151) BTRAP(0x152) BTRAP(0x153) BTRAP(0x154)
-tl0_resv155:	BTRAP(0x155) BTRAP(0x156) BTRAP(0x157) BTRAP(0x158) BTRAP(0x159)
-tl0_resv15a:	BTRAP(0x15a) BTRAP(0x15b) BTRAP(0x15c) BTRAP(0x15d) BTRAP(0x15e)
-tl0_resv15f:	BTRAP(0x15f) BTRAP(0x160) BTRAP(0x161) BTRAP(0x162) BTRAP(0x163)
-tl0_resv164:	BTRAP(0x164) BTRAP(0x165) BTRAP(0x166) BTRAP(0x167) BTRAP(0x168)
-tl0_resv169:	BTRAP(0x169) BTRAP(0x16a) BTRAP(0x16b) BTRAP(0x16c)
-tl0_linux64:	LINUX_64BIT_SYSCALL_TRAP
-tl0_gsctx:	TRAP(sparc64_get_context) TRAP(sparc64_set_context)
-tl0_resv170:	KPROBES_TRAP(0x170) KPROBES_TRAP(0x171) KGDB_TRAP(0x172)
-tl0_resv173:	UPROBES_TRAP(0x173) UPROBES_TRAP(0x174) BTRAP(0x175) BTRAP(0x176) BTRAP(0x177)
-tl0_resv178:	BTRAP(0x178) BTRAP(0x179) BTRAP(0x17a) BTRAP(0x17b) BTRAP(0x17c)
-tl0_resv17d:	BTRAP(0x17d) BTRAP(0x17e) BTRAP(0x17f)
-#define BTRAPS(x) BTRAP(x) BTRAP(x+1) BTRAP(x+2) BTRAP(x+3) BTRAP(x+4) BTRAP(x+5) BTRAP(x+6) BTRAP(x+7)
-tl0_resv180:	BTRAPS(0x180) BTRAPS(0x188)
-tl0_resv190:	BTRAPS(0x190) BTRAPS(0x198)
-tl0_resv1a0:	BTRAPS(0x1a0) BTRAPS(0x1a8)
-tl0_resv1b0:	BTRAPS(0x1b0) BTRAPS(0x1b8)
-tl0_resv1c0:	BTRAPS(0x1c0) BTRAPS(0x1c8)
-tl0_resv1d0:	BTRAPS(0x1d0) BTRAPS(0x1d8)
-tl0_resv1e0:	BTRAPS(0x1e0) BTRAPS(0x1e8)
-tl0_resv1f0:	BTRAPS(0x1f0) BTRAPS(0x1f8)
-
-sparc64_ttable_tl1:
-tl1_resv000:	BOOT_KERNEL    BTRAPTL1(0x1) BTRAPTL1(0x2) BTRAPTL1(0x3)
-tl1_resv004:	BTRAPTL1(0x4)  BTRAPTL1(0x5) BTRAPTL1(0x6) BTRAPTL1(0x7)
-tl1_iax:	TRAP_NOSAVE(__spitfire_insn_access_exception_tl1)
-tl1_itsb_4v:	SUN4V_ITSB_MISS
-tl1_iae:	membar #Sync
-		TRAP_NOSAVE_7INSNS(__spitfire_access_error)
-tl1_resv00b:	BTRAPTL1(0xb) BTRAPTL1(0xc) BTRAPTL1(0xd) BTRAPTL1(0xe) BTRAPTL1(0xf)
-tl1_ill:	TRAPTL1(do_ill_tl1)
-tl1_privop:	BTRAPTL1(0x11)
-tl1_resv012:	BTRAPTL1(0x12) BTRAPTL1(0x13) BTRAPTL1(0x14) BTRAPTL1(0x15)
-tl1_resv016:	BTRAPTL1(0x16) BTRAPTL1(0x17) BTRAPTL1(0x18) BTRAPTL1(0x19)
-tl1_resv01a:	BTRAPTL1(0x1a) BTRAPTL1(0x1b) BTRAPTL1(0x1c) BTRAPTL1(0x1d)
-tl1_resv01e:	BTRAPTL1(0x1e) BTRAPTL1(0x1f)
-tl1_fpdis:	TRAP_NOSAVE(do_fpdis)
-tl1_fpieee:	TRAPTL1(do_fpieee_tl1)
-tl1_fpother:	TRAPTL1(do_fpother_tl1)
-tl1_tof:	TRAPTL1(do_tof_tl1)
-tl1_cwin:	CLEAN_WINDOW
-tl1_div0:	TRAPTL1(do_div0_tl1)
-tl1_resv029:	BTRAPTL1(0x29) BTRAPTL1(0x2a) BTRAPTL1(0x2b) BTRAPTL1(0x2c)
-tl1_resv02d:	BTRAPTL1(0x2d) BTRAPTL1(0x2e) BTRAPTL1(0x2f)
-tl1_dax:	TRAP_NOSAVE(__spitfire_data_access_exception_tl1)
-tl1_dtsb_4v:	SUN4V_DTSB_MISS
-tl1_dae:	membar #Sync
-		TRAP_NOSAVE_7INSNS(__spitfire_access_error)
-tl1_resv033:	BTRAPTL1(0x33)
-tl1_mna:	TRAP_NOSAVE(do_mna)
-tl1_lddfmna:	TRAPTL1(do_lddfmna_tl1)
-tl1_stdfmna:	TRAPTL1(do_stdfmna_tl1)
-tl1_privact:	BTRAPTL1(0x37)
-tl1_resv038:	BTRAPTL1(0x38) BTRAPTL1(0x39) BTRAPTL1(0x3a) BTRAPTL1(0x3b)
-tl1_resv03c:	BTRAPTL1(0x3c) BTRAPTL1(0x3d) BTRAPTL1(0x3e) BTRAPTL1(0x3f)
-tl1_resv040:	BTRAPTL1(0x40)
-tl1_irq1:	TRAP_IRQ(do_irq_tl1, 1)  TRAP_IRQ(do_irq_tl1, 2)  TRAP_IRQ(do_irq_tl1, 3)
-tl1_irq4:	TRAP_IRQ(do_irq_tl1, 4)  TRAP_IRQ(do_irq_tl1, 5)  TRAP_IRQ(do_irq_tl1, 6)
-tl1_irq7:	TRAP_IRQ(do_irq_tl1, 7)  TRAP_IRQ(do_irq_tl1, 8)  TRAP_IRQ(do_irq_tl1, 9)
-tl1_irq10:	TRAP_IRQ(do_irq_tl1, 10) TRAP_IRQ(do_irq_tl1, 11)
-tl1_irq12:	TRAP_IRQ(do_irq_tl1, 12) TRAP_IRQ(do_irq_tl1, 13)
-tl1_irq14:	TRAP_IRQ(do_irq_tl1, 14) TRAP_IRQ(do_irq_tl1, 15)
-tl1_resv050:	BTRAPTL1(0x50) BTRAPTL1(0x51) BTRAPTL1(0x52) BTRAPTL1(0x53)
-tl1_resv054:	BTRAPTL1(0x54) BTRAPTL1(0x55) BTRAPTL1(0x56) BTRAPTL1(0x57)
-tl1_resv058:	BTRAPTL1(0x58) BTRAPTL1(0x59) BTRAPTL1(0x5a) BTRAPTL1(0x5b)
-tl1_resv05c:	BTRAPTL1(0x5c) BTRAPTL1(0x5d) BTRAPTL1(0x5e) BTRAPTL1(0x5f)
-tl1_ivec:	TRAP_IVEC
-tl1_paw:	TRAPTL1(do_paw_tl1)
-tl1_vaw:	TRAPTL1(do_vaw_tl1)
-tl1_cee:	BTRAPTL1(0x63)
-tl1_iamiss:	BTRAPTL1(0x64) BTRAPTL1(0x65) BTRAPTL1(0x66) BTRAPTL1(0x67)
-tl1_damiss:
-#include	"dtlb_miss.S"
-tl1_daprot:
-#include	"dtlb_prot.S"
-tl1_fecc:	BTRAPTL1(0x70)	/* Fast-ECC on Cheetah */
-tl1_dcpe:	BTRAPTL1(0x71)	/* D-cache Parity Error on Cheetah+ */
-tl1_icpe:	BTRAPTL1(0x72)	/* I-cache Parity Error on Cheetah+ */
-tl1_resv073:	BTRAPTL1(0x73)
-tl1_resv074:	BTRAPTL1(0x74) BTRAPTL1(0x75) BTRAPTL1(0x76) BTRAPTL1(0x77)
-tl1_resv078:	BTRAPTL1(0x78) BTRAPTL1(0x79) BTRAPTL1(0x7a) BTRAPTL1(0x7b)
-tl1_resv07c:	BTRAPTL1(0x7c) BTRAPTL1(0x7d) BTRAPTL1(0x7e) BTRAPTL1(0x7f)
-tl1_s0n:	SPILL_0_NORMAL
-tl1_s1n:	SPILL_1_NORMAL
-tl1_s2n:	SPILL_2_NORMAL
-tl1_s3n:	SPILL_3_NORMAL
-tl1_s4n:	SPILL_4_NORMAL
-tl1_s5n:	SPILL_5_NORMAL
-tl1_s6n:	SPILL_6_NORMAL
-tl1_s7n:	SPILL_7_NORMAL
-tl1_s0o:	SPILL_0_OTHER
-tl1_s1o:	SPILL_1_OTHER
-tl1_s2o:	SPILL_2_OTHER
-tl1_s3o:	SPILL_3_OTHER
-tl1_s4o:	SPILL_4_OTHER
-tl1_s5o:	SPILL_5_OTHER
-tl1_s6o:	SPILL_6_OTHER
-tl1_s7o:	SPILL_7_OTHER
-tl1_f0n:	FILL_0_NORMAL
-tl1_f1n:	FILL_1_NORMAL
-tl1_f2n:	FILL_2_NORMAL
-tl1_f3n:	FILL_3_NORMAL
-tl1_f4n:	FILL_4_NORMAL
-tl1_f5n:	FILL_5_NORMAL
-tl1_f6n:	FILL_6_NORMAL
-tl1_f7n:	FILL_7_NORMAL
-tl1_f0o:	FILL_0_OTHER
-tl1_f1o:	FILL_1_OTHER
-tl1_f2o:	FILL_2_OTHER
-tl1_f3o:	FILL_3_OTHER
-tl1_f4o:	FILL_4_OTHER
-tl1_f5o:	FILL_5_OTHER
-tl1_f6o:	FILL_6_OTHER
-tl1_f7o:	FILL_7_OTHER
diff --git a/arch/sparc/kernel/una_asm_32.S b/arch/sparc/kernel/una_asm_32.S
deleted file mode 100644
index f8bf839289fb1907f30f211bd42bf71ef9a64bb6..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/una_asm_32.S
+++ /dev/null
@@ -1,154 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* una_asm.S: Kernel unaligned trap assembler helpers.
- *
- * Copyright (C) 1996,2005,2008 David S. Miller (davem@davemloft.net)
- * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- */
-
-#include <linux/errno.h>
-
-	.text
-
-retl_efault:
-	retl
-	 mov	-EFAULT, %o0
-
-	/* int __do_int_store(unsigned long *dst_addr, int size,
-	 *                    unsigned long *src_val)
-	 *
-	 * %o0 = dest_addr
-	 * %o1 = size
-	 * %o2 = src_val
-	 *
-	 * Return '0' on success, -EFAULT on failure.
-	 */
-	.globl	__do_int_store
-__do_int_store:
-	ld	[%o2], %g1
-	cmp	%o1, 2
-	be	2f
-	 cmp	%o1, 4
-	be	1f
-	 srl	%g1, 24, %g2
-	srl	%g1, 16, %g7
-4:	stb	%g2, [%o0]
-	srl	%g1, 8, %g2
-5:	stb	%g7, [%o0 + 1]
-	ld	[%o2 + 4], %g7
-6:	stb	%g2, [%o0 + 2]
-	srl	%g7, 24, %g2
-7:	stb	%g1, [%o0 + 3]
-	srl	%g7, 16, %g1
-8:	stb	%g2, [%o0 + 4]
-	srl	%g7, 8, %g2
-9:	stb	%g1, [%o0 + 5]
-10:	stb	%g2, [%o0 + 6]
-	b	0f
-11:	 stb	%g7, [%o0 + 7]
-1:	srl	%g1, 16, %g7
-12:	stb	%g2, [%o0]
-	srl	%g1, 8, %g2
-13:	stb	%g7, [%o0 + 1]
-14:	stb	%g2, [%o0 + 2]
-	b	0f
-15:	 stb	%g1, [%o0 + 3]
-2:	srl	%g1, 8, %g2
-16:	stb	%g2, [%o0]
-17:	stb	%g1, [%o0 + 1]
-0:	retl
-	 mov	0, %o0
-
-	.section __ex_table,#alloc
-	.word	4b, retl_efault
-	.word	5b, retl_efault
-	.word	6b, retl_efault
-	.word	7b, retl_efault
-	.word	8b, retl_efault
-	.word	9b, retl_efault
-	.word	10b, retl_efault
-	.word	11b, retl_efault
-	.word	12b, retl_efault
-	.word	13b, retl_efault
-	.word	14b, retl_efault
-	.word	15b, retl_efault
-	.word	16b, retl_efault
-	.word	17b, retl_efault
-	.previous
-
-	/* int do_int_load(unsigned long *dest_reg, int size,
-	 *                 unsigned long *saddr, int is_signed)
-	 *
-	 * %o0 = dest_reg
-	 * %o1 = size
-	 * %o2 = saddr
-	 * %o3 = is_signed
-	 *
-	 * Return '0' on success, -EFAULT on failure.
-	 */
-	.globl	do_int_load
-do_int_load:
-	cmp	%o1, 8
-	be	9f
-	 cmp	%o1, 4
-	be	6f
-4:	 ldub	[%o2], %g1
-5:	ldub	[%o2 + 1], %g2
-	sll	%g1, 8, %g1
-	tst	%o3
-	be	3f
-	 or	%g1, %g2, %g1
-	sll	%g1, 16, %g1
-	sra	%g1, 16, %g1
-3:	b	0f
-	 st	%g1, [%o0]
-6:	ldub	[%o2 + 1], %g2
-	sll	%g1, 24, %g1
-7:	ldub	[%o2 + 2], %g7
-	sll	%g2, 16, %g2
-8:	ldub	[%o2 + 3], %g3
-	sll	%g7, 8, %g7
-	or	%g3, %g2, %g3
-	or	%g7, %g3, %g7
-	or	%g1, %g7, %g1
-	b	0f
-	 st	%g1, [%o0]
-9:	ldub	[%o2], %g1
-10:	ldub	[%o2 + 1], %g2
-	sll	%g1, 24, %g1
-11:	ldub	[%o2 + 2], %g7
-	sll	%g2, 16, %g2
-12:	ldub	[%o2 + 3], %g3
-	sll	%g7, 8, %g7
-	or	%g1, %g2, %g1
-	or	%g7, %g3, %g7
-	or	%g1, %g7, %g7
-13:	ldub	[%o2 + 4], %g1
-	st	%g7, [%o0]
-14:	ldub	[%o2 + 5], %g2
-	sll	%g1, 24, %g1
-15:	ldub	[%o2 + 6], %g7
-	sll	%g2, 16, %g2
-16:	ldub	[%o2 + 7], %g3
-	sll	%g7, 8, %g7
-	or	%g1, %g2, %g1
-	or	%g7, %g3, %g7
-	or	%g1, %g7, %g7
-	st	%g7, [%o0 + 4]
-0:	retl
-	 mov	0, %o0
-
-	.section __ex_table,#alloc
-	.word	4b, retl_efault
-	.word	5b, retl_efault
-	.word	6b, retl_efault
-	.word	7b, retl_efault
-	.word	8b, retl_efault
-	.word	9b, retl_efault
-	.word	10b, retl_efault
-	.word	11b, retl_efault
-	.word	12b, retl_efault
-	.word	13b, retl_efault
-	.word	14b, retl_efault
-	.word	15b, retl_efault
-	.word	16b, retl_efault
-	.previous
diff --git a/arch/sparc/kernel/una_asm_64.S b/arch/sparc/kernel/una_asm_64.S
deleted file mode 100644
index e256f395e9f6aa9b080f34fff839aa1571ec75dc..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/una_asm_64.S
+++ /dev/null
@@ -1,147 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* una_asm.S: Kernel unaligned trap assembler helpers.
- *
- * Copyright (C) 1996,2005 David S. Miller (davem@davemloft.net)
- * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- */
-
-	.text
-
-	.globl	__do_int_store
-__do_int_store:
-	rd	%asi, %o4
-	wr	%o3, 0, %asi
-	mov	%o2, %g3
-	cmp	%o1, 2
-	be,pn	%icc, 2f
-	 cmp	%o1, 4
-	be,pt	%icc, 1f
-	 srlx	%g3, 24, %g2
-	srlx	%g3, 56, %g1
-	srlx	%g3, 48, %g7
-4:	stba	%g1, [%o0] %asi
-	srlx	%g3, 40, %g1
-5:	stba	%g7, [%o0 + 1] %asi
-	srlx	%g3, 32, %g7
-6:	stba	%g1, [%o0 + 2] %asi
-7:	stba	%g7, [%o0 + 3] %asi
-	srlx	%g3, 16, %g1
-8:	stba	%g2, [%o0 + 4] %asi
-	srlx	%g3, 8, %g7
-9:	stba	%g1, [%o0 + 5] %asi
-10:	stba	%g7, [%o0 + 6] %asi
-	ba,pt	%xcc, 0f
-11:	 stba	%g3, [%o0 + 7] %asi
-1:	srl	%g3, 16, %g7
-12:	stba	%g2, [%o0] %asi
-	srl	%g3, 8, %g2
-13:	stba	%g7, [%o0 + 1] %asi
-14:	stba	%g2, [%o0 + 2] %asi
-	ba,pt	%xcc, 0f
-15:	 stba	%g3, [%o0 + 3] %asi
-2:	srl	%g3, 8, %g2
-16:	stba	%g2, [%o0] %asi
-17:	stba	%g3, [%o0 + 1] %asi
-0:
-	wr	%o4, 0x0, %asi
-	retl
-	 mov	0, %o0
-	.size	__do_int_store, .-__do_int_store
-
-	.section	__ex_table,"a"
-	.word		4b, __retl_efault
-	.word		5b, __retl_efault
-	.word		6b, __retl_efault
-	.word		7b, __retl_efault
-	.word		8b, __retl_efault
-	.word		9b, __retl_efault
-	.word		10b, __retl_efault
-	.word		11b, __retl_efault
-	.word		12b, __retl_efault
-	.word		13b, __retl_efault
-	.word		14b, __retl_efault
-	.word		15b, __retl_efault
-	.word		16b, __retl_efault
-	.word		17b, __retl_efault
-	.previous
-
-	.globl	do_int_load
-do_int_load:
-	rd	%asi, %o5
-	wr	%o4, 0, %asi
-	cmp	%o1, 8
-	bge,pn	%icc, 9f
-	 cmp	%o1, 4
-	be,pt	%icc, 6f
-4:	 lduba	[%o2] %asi, %g2
-5:	lduba	[%o2 + 1] %asi, %g3
-	sll	%g2, 8, %g2
-	brz,pt	%o3, 3f
-	 add	%g2, %g3, %g2
-	sllx	%g2, 48, %g2
-	srax	%g2, 48, %g2
-3:	ba,pt	%xcc, 0f
-	 stx	%g2, [%o0]
-6:	lduba	[%o2 + 1] %asi, %g3
-	sll	%g2, 24, %g2
-7:	lduba	[%o2 + 2] %asi, %g7
-	sll	%g3, 16, %g3
-8:	lduba	[%o2 + 3] %asi, %g1
-	sll	%g7, 8, %g7
-	or	%g2, %g3, %g2
-	or	%g7, %g1, %g7
-	or	%g2, %g7, %g2
-	brnz,a,pt %o3, 3f
-	 sra	%g2, 0, %g2
-3:	ba,pt	%xcc, 0f
-	 stx	%g2, [%o0]
-9:	lduba	[%o2] %asi, %g2
-10:	lduba	[%o2 + 1] %asi, %g3
-	sllx	%g2, 56, %g2
-11:	lduba	[%o2 + 2] %asi, %g7
-	sllx	%g3, 48, %g3
-12:	lduba	[%o2 + 3] %asi, %g1
-	sllx	%g7, 40, %g7
-	sllx	%g1, 32, %g1
-	or	%g2, %g3, %g2
-	or	%g7, %g1, %g7
-13:	lduba	[%o2 + 4] %asi, %g3
-	or	%g2, %g7, %g7
-14:	lduba	[%o2 + 5] %asi, %g1
-	sllx	%g3, 24, %g3
-15:	lduba	[%o2 + 6] %asi, %g2
-	sllx	%g1, 16, %g1
-	or	%g7, %g3, %g7
-16:	lduba	[%o2 + 7] %asi, %g3
-	sllx	%g2, 8, %g2
-	or	%g7, %g1, %g7
-	or	%g2, %g3, %g2
-	or	%g7, %g2, %g7
-	cmp	%o1, 8
-	be,a,pt %icc, 0f
-	 stx	%g7, [%o0]
-	srlx	%g7, 32, %g2
-	sra	%g7, 0, %g7
-	stx	%g2, [%o0]
-	stx	%g7, [%o0 + 8]
-0:
-	wr	%o5, 0x0, %asi
-	retl
-	 mov	0, %o0
-	.size	do_int_load, .-do_int_load
-
-	.section	__ex_table,"a"
-	.word		4b, __retl_efault
-	.word		5b, __retl_efault
-	.word		6b, __retl_efault
-	.word		7b, __retl_efault
-	.word		8b, __retl_efault
-	.word		9b, __retl_efault
-	.word		10b, __retl_efault
-	.word		11b, __retl_efault
-	.word		12b, __retl_efault
-	.word		13b, __retl_efault
-	.word		14b, __retl_efault
-	.word		15b, __retl_efault
-	.word		16b, __retl_efault
-	.previous
diff --git a/arch/sparc/kernel/urtt_fill.S b/arch/sparc/kernel/urtt_fill.S
deleted file mode 100644
index e4cee7be5cd02ee536d06d2e3eb7613d021fdb6a..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/urtt_fill.S
+++ /dev/null
@@ -1,105 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/thread_info.h>
-#include <asm/trap_block.h>
-#include <asm/spitfire.h>
-#include <asm/ptrace.h>
-#include <asm/head.h>
-
-		.text
-		.align	8
-		.globl	user_rtt_fill_fixup_common
-user_rtt_fill_fixup_common:
-		rdpr	%cwp, %g1
-		add	%g1, 1, %g1
-		wrpr	%g1, 0x0, %cwp
-
-		rdpr	%wstate, %g2
-		sll	%g2, 3, %g2
-		wrpr	%g2, 0x0, %wstate
-
-		/* We know %canrestore and %otherwin are both zero.  */
-
-		sethi	%hi(sparc64_kern_pri_context), %g2
-		ldx	[%g2 + %lo(sparc64_kern_pri_context)], %g2
-		mov	PRIMARY_CONTEXT, %g1
-
-661:		stxa	%g2, [%g1] ASI_DMMU
-		.section .sun4v_1insn_patch, "ax"
-		.word	661b
-		stxa	%g2, [%g1] ASI_MMU
-		.previous
-
-		sethi	%hi(KERNBASE), %g1
-		flush	%g1
-
-		mov	%g4, %l4
-		mov	%g5, %l5
-		brnz,pn	%g3, 1f
-		 mov	%g3, %l3
-
-		or	%g4, FAULT_CODE_WINFIXUP, %g4
-		stb	%g4, [%g6 + TI_FAULT_CODE]
-		stx	%g5, [%g6 + TI_FAULT_ADDR]
-1:
-		mov	%g6, %l1
-		wrpr	%g0, 0x0, %tl
-
-661:		nop
-		.section		.sun4v_1insn_patch, "ax"
-		.word			661b
-		SET_GL(0)
-		.previous
-
-661:		wrpr	%g0, RTRAP_PSTATE, %pstate
-		.section		.sun_m7_1insn_patch, "ax"
-		.word			661b
-		/* Re-enable PSTATE.mcde to maintain ADI security */
-		wrpr	%g0, RTRAP_PSTATE|PSTATE_MCDE, %pstate
-		.previous
-
-		mov	%l1, %g6
-		ldx	[%g6 + TI_TASK], %g4
-		LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3)
-
-		brnz,pn	%l3, 1f
-		 nop
-
-		call	do_sparc64_fault
-		 add	%sp, PTREGS_OFF, %o0
-		ba,pt	%xcc, rtrap
-		 nop
-
-1:		cmp	%g3, 2
-		bne,pn	%xcc, 2f
-		 nop
-
-		sethi	%hi(tlb_type), %g1
-		lduw	[%g1 + %lo(tlb_type)], %g1
-		cmp	%g1, 3
-		bne,pt	%icc, 1f
-		 add	%sp, PTREGS_OFF, %o0
-		mov	%l4, %o2
-		call	sun4v_do_mna
-		 mov	%l5, %o1
-		ba,a,pt	%xcc, rtrap
-1:		mov	%l4, %o1
-		mov	%l5, %o2
-		call	mem_address_unaligned
-		 nop
-		ba,a,pt	%xcc, rtrap
-
-2:		sethi	%hi(tlb_type), %g1
-		mov	%l4, %o1
-		lduw	[%g1 + %lo(tlb_type)], %g1
-		mov	%l5, %o2
-		cmp	%g1, 3
-		bne,pt	%icc, 1f
-		 add	%sp, PTREGS_OFF, %o0
-		call	sun4v_data_access_exception
-		 nop
-		ba,a,pt	%xcc, rtrap
-		 nop
-
-1:		call	spitfire_data_access_exception
-		 nop
-		ba,a,pt	%xcc, rtrap
diff --git a/arch/sparc/kernel/utrap.S b/arch/sparc/kernel/utrap.S
deleted file mode 100644
index 7a2d9a9bea59e0036f297516e0ef676f6f64e781..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/utrap.S
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.globl		utrap_trap
-	.type		utrap_trap,#function
-utrap_trap:		/* %g3=handler,%g4=level */
-	TRAP_LOAD_THREAD_REG(%g6, %g1)
-	ldx		[%g6 + TI_UTRAPS], %g1
-	brnz,pt		%g1, invoke_utrap
-	 nop
-
-	ba,pt		%xcc, etrap
-	 rd		%pc, %g7
-	mov		%l4, %o1
-        call		bad_trap
-	 add		%sp, PTREGS_OFF, %o0
-	ba,a,pt		%xcc, rtrap
-
-invoke_utrap:
-	sllx		%g3, 3, %g3
-	ldx		[%g1 + %g3], %g1
-	save		%sp, -128, %sp
-	rdpr		%tstate, %l6
-	rdpr		%cwp, %l7
-	andn		%l6, TSTATE_CWP, %l6
-	wrpr		%l6, %l7, %tstate
-	rdpr		%tpc, %l6
-	rdpr		%tnpc, %l7
-	wrpr		%g1, 0, %tnpc
-	done
-	.size		utrap_trap,.-utrap_trap
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
deleted file mode 100644
index 59b6df13ddeadf0f08bc693132617c439ac3f9da..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,193 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* ld script for sparc32/sparc64 kernel */
-
-#include <asm-generic/vmlinux.lds.h>
-
-#include <asm/page.h>
-#include <asm/thread_info.h>
-
-#ifdef CONFIG_SPARC32
-#define INITIAL_ADDRESS  0x10000 + SIZEOF_HEADERS
-#define TEXTSTART	0xf0004000
-
-#define SMP_CACHE_BYTES_SHIFT 5
-
-#else
-#define SMP_CACHE_BYTES_SHIFT 6
-#define INITIAL_ADDRESS 0x4000
-#define TEXTSTART      0x0000000000404000
-
-#endif
-
-#define SMP_CACHE_BYTES (1 << SMP_CACHE_BYTES_SHIFT)
-
-#ifdef CONFIG_SPARC32
-OUTPUT_FORMAT("elf32-sparc", "elf32-sparc", "elf32-sparc")
-OUTPUT_ARCH(sparc)
-ENTRY(_start)
-jiffies = jiffies_64 + 4;
-#else
-/* sparc64 */
-OUTPUT_FORMAT("elf64-sparc", "elf64-sparc", "elf64-sparc")
-OUTPUT_ARCH(sparc:v9a)
-ENTRY(_start)
-jiffies = jiffies_64;
-#endif
-
-#ifdef CONFIG_SPARC64
-ASSERT((swapper_tsb == 0x0000000000408000), "Error: sparc64 early assembler too large")
-#endif
-
-SECTIONS
-{
-#ifdef CONFIG_SPARC64
-	swapper_pg_dir = 0x0000000000402000;
-#endif
-	. = INITIAL_ADDRESS;
-	.text TEXTSTART :
-	{
-		_text = .;
-		HEAD_TEXT
-		TEXT_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		IRQENTRY_TEXT
-		SOFTIRQENTRY_TEXT
-		*(.gnu.warning)
-	} = 0
-	_etext = .;
-
-	RO_DATA(PAGE_SIZE)
-
-	/* Start of data section */
-	_sdata = .;
-
-	.data1 : {
-		*(.data1)
-	}
-	RW_DATA_SECTION(SMP_CACHE_BYTES, 0, THREAD_SIZE)
-
-	/* End of data section */
-	_edata = .;
-
-	.fixup : {
-		__start___fixup = .;
-		*(.fixup)
-		__stop___fixup = .;
-	}
-	EXCEPTION_TABLE(16)
-	NOTES
-
-	. = ALIGN(PAGE_SIZE);
-	__init_begin = ALIGN(PAGE_SIZE);
-	INIT_TEXT_SECTION(PAGE_SIZE)
-	__init_text_end = .;
-	INIT_DATA_SECTION(16)
-
-	. = ALIGN(4);
-	.tsb_ldquad_phys_patch : {
-		__tsb_ldquad_phys_patch = .;
-		*(.tsb_ldquad_phys_patch)
-		__tsb_ldquad_phys_patch_end = .;
-	}
-
-	.tsb_phys_patch : {
-		__tsb_phys_patch = .;
-		*(.tsb_phys_patch)
-		__tsb_phys_patch_end = .;
-	}
-
-	.cpuid_patch : {
-		__cpuid_patch = .;
-		*(.cpuid_patch)
-		__cpuid_patch_end = .;
-	}
-
-	.sun4v_1insn_patch : {
-		__sun4v_1insn_patch = .;
-		*(.sun4v_1insn_patch)
-		__sun4v_1insn_patch_end = .;
-	}
-	.sun4v_2insn_patch : {
-		__sun4v_2insn_patch = .;
-		*(.sun4v_2insn_patch)
-		__sun4v_2insn_patch_end = .;
-	}
-	.leon_1insn_patch : {
-		__leon_1insn_patch = .;
-		*(.leon_1insn_patch)
-		__leon_1insn_patch_end = .;
-	}
-	.swapper_tsb_phys_patch : {
-		__swapper_tsb_phys_patch = .;
-		*(.swapper_tsb_phys_patch)
-		__swapper_tsb_phys_patch_end = .;
-	}
-	.swapper_4m_tsb_phys_patch : {
-		__swapper_4m_tsb_phys_patch = .;
-		*(.swapper_4m_tsb_phys_patch)
-		__swapper_4m_tsb_phys_patch_end = .;
-	}
-	.popc_3insn_patch : {
-		__popc_3insn_patch = .;
-		*(.popc_3insn_patch)
-		__popc_3insn_patch_end = .;
-	}
-	.popc_6insn_patch : {
-		__popc_6insn_patch = .;
-		*(.popc_6insn_patch)
-		__popc_6insn_patch_end = .;
-	}
-	.pause_3insn_patch : {
-		__pause_3insn_patch = .;
-		*(.pause_3insn_patch)
-		__pause_3insn_patch_end = .;
-	}
-	.sun_m7_1insn_patch : {
-		__sun_m7_1insn_patch = .;
-		*(.sun_m7_1insn_patch)
-		__sun_m7_1insn_patch_end = .;
-	}
-	.sun_m7_2insn_patch : {
-		__sun_m7_2insn_patch = .;
-		*(.sun_m7_2insn_patch)
-		__sun_m7_2insn_patch_end = .;
-	}
-	.get_tick_patch : {
-		__get_tick_patch = .;
-		*(.get_tick_patch)
-		__get_tick_patch_end = .;
-	}
-	.pud_huge_patch : {
-		__pud_huge_patch = .;
-		*(.pud_huge_patch)
-		__pud_huge_patch_end = .;
-	}
-	.fast_win_ctrl_1insn_patch : {
-		__fast_win_ctrl_1insn_patch = .;
-		*(.fast_win_ctrl_1insn_patch)
-		__fast_win_ctrl_1insn_patch_end = .;
-	}
-	PERCPU_SECTION(SMP_CACHE_BYTES)
-
-	. = ALIGN(PAGE_SIZE);
-	.exit.text : {
-		EXIT_TEXT
-	}
-
-	.exit.data : {
-		EXIT_DATA
-	}
-
-	. = ALIGN(PAGE_SIZE);
-	__init_end = .;
-	BSS_SECTION(0, 0, 0)
-	_end = . ;
-
-	STABS_DEBUG
-	DWARF_DEBUG
-
-	DISCARDS
-}
diff --git a/arch/sparc/kernel/winfixup.S b/arch/sparc/kernel/winfixup.S
deleted file mode 100644
index 448accee090f00d07ad7ce15fea58c23003f29b2..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/winfixup.S
+++ /dev/null
@@ -1,160 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* winfixup.S: Handle cases where user stack pointer is found to be bogus.
- *
- * Copyright (C) 1997, 2006 David S. Miller (davem@davemloft.net)
- */
-
-#include <asm/asi.h>
-#include <asm/head.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include <asm/processor.h>
-#include <asm/spitfire.h>
-#include <asm/thread_info.h>
-
-	.text
-
-	/* It used to be the case that these register window fault
-	 * handlers could run via the save and restore instructions
-	 * done by the trap entry and exit code.  They now do the
-	 * window spill/fill by hand, so that case no longer can occur.
-	 */
-
-	.align	32
-fill_fixup:
-	TRAP_LOAD_THREAD_REG(%g6, %g1)
-	rdpr	%tstate, %g1
-	and	%g1, TSTATE_CWP, %g1
-	or	%g4, FAULT_CODE_WINFIXUP, %g4
-	stb	%g4, [%g6 + TI_FAULT_CODE]
-	stx	%g5, [%g6 + TI_FAULT_ADDR]
-	wrpr	%g1, %cwp
-	ba,pt	%xcc, etrap
-	 rd	%pc, %g7
-	call	do_sparc64_fault
-	 add	%sp, PTREGS_OFF, %o0
-	ba,a,pt	%xcc, rtrap
-
-	/* Be very careful about usage of the trap globals here.
-	 * You cannot touch %g5 as that has the fault information.
-	 */
-spill_fixup:
-spill_fixup_mna:
-spill_fixup_dax:
-	TRAP_LOAD_THREAD_REG(%g6, %g1)
-	ldx	[%g6 + TI_FLAGS], %g1
-	andcc	%sp, 0x1, %g0
-	movne	%icc, 0, %g1
-	andcc	%g1, _TIF_32BIT, %g0
-	ldub	[%g6 + TI_WSAVED], %g1
-	sll	%g1, 3, %g3
-	add	%g6, %g3, %g3
-	stx	%sp, [%g3 + TI_RWIN_SPTRS]
-	sll	%g1, 7, %g3
-	bne,pt	%xcc, 1f
-	 add	%g6, %g3, %g3
-	stx	%l0, [%g3 + TI_REG_WINDOW + 0x00]
-	stx	%l1, [%g3 + TI_REG_WINDOW + 0x08]
-	stx	%l2, [%g3 + TI_REG_WINDOW + 0x10]
-	stx	%l3, [%g3 + TI_REG_WINDOW + 0x18]
-	stx	%l4, [%g3 + TI_REG_WINDOW + 0x20]
-	stx	%l5, [%g3 + TI_REG_WINDOW + 0x28]
-	stx	%l6, [%g3 + TI_REG_WINDOW + 0x30]
-	stx	%l7, [%g3 + TI_REG_WINDOW + 0x38]
-	stx	%i0, [%g3 + TI_REG_WINDOW + 0x40]
-	stx	%i1, [%g3 + TI_REG_WINDOW + 0x48]
-	stx	%i2, [%g3 + TI_REG_WINDOW + 0x50]
-	stx	%i3, [%g3 + TI_REG_WINDOW + 0x58]
-	stx	%i4, [%g3 + TI_REG_WINDOW + 0x60]
-	stx	%i5, [%g3 + TI_REG_WINDOW + 0x68]
-	stx	%i6, [%g3 + TI_REG_WINDOW + 0x70]
-	ba,pt	%xcc, 2f
-	 stx	%i7, [%g3 + TI_REG_WINDOW + 0x78]
-1:	stw	%l0, [%g3 + TI_REG_WINDOW + 0x00]
-	stw	%l1, [%g3 + TI_REG_WINDOW + 0x04]
-	stw	%l2, [%g3 + TI_REG_WINDOW + 0x08]
-	stw	%l3, [%g3 + TI_REG_WINDOW + 0x0c]
-	stw	%l4, [%g3 + TI_REG_WINDOW + 0x10]
-	stw	%l5, [%g3 + TI_REG_WINDOW + 0x14]
-	stw	%l6, [%g3 + TI_REG_WINDOW + 0x18]
-	stw	%l7, [%g3 + TI_REG_WINDOW + 0x1c]
-	stw	%i0, [%g3 + TI_REG_WINDOW + 0x20]
-	stw	%i1, [%g3 + TI_REG_WINDOW + 0x24]
-	stw	%i2, [%g3 + TI_REG_WINDOW + 0x28]
-	stw	%i3, [%g3 + TI_REG_WINDOW + 0x2c]
-	stw	%i4, [%g3 + TI_REG_WINDOW + 0x30]
-	stw	%i5, [%g3 + TI_REG_WINDOW + 0x34]
-	stw	%i6, [%g3 + TI_REG_WINDOW + 0x38]
-	stw	%i7, [%g3 + TI_REG_WINDOW + 0x3c]
-2:	add	%g1, 1, %g1
-	stb	%g1, [%g6 + TI_WSAVED]
-	rdpr	%tstate, %g1
-	andcc	%g1, TSTATE_PRIV, %g0
-	saved
-	be,pn	%xcc, 1f
-	 and	%g1, TSTATE_CWP, %g1
-	retry
-1:	mov	FAULT_CODE_WRITE | FAULT_CODE_DTLB | FAULT_CODE_WINFIXUP, %g4
-	stb	%g4, [%g6 + TI_FAULT_CODE]
-	stx	%g5, [%g6 + TI_FAULT_ADDR]
-	wrpr	%g1, %cwp
-	ba,pt	%xcc, etrap
-	 rd	%pc, %g7
-	call	do_sparc64_fault
-	 add	%sp, PTREGS_OFF, %o0
-	ba,a,pt	%xcc, rtrap
-
-winfix_mna:
-	andn	%g3, 0x7f, %g3
-	add	%g3, 0x78, %g3
-	wrpr	%g3, %tnpc
-	done
-
-fill_fixup_mna:
-	rdpr	%tstate, %g1
-	and	%g1, TSTATE_CWP, %g1
-	wrpr	%g1, %cwp
-	ba,pt	%xcc, etrap
-	 rd	%pc, %g7
-	sethi	%hi(tlb_type), %g1
-	lduw	[%g1 + %lo(tlb_type)], %g1
-	cmp	%g1, 3
-	bne,pt	%icc, 1f
-	 add	%sp, PTREGS_OFF, %o0
-	mov	%l4, %o2
-	call	sun4v_do_mna
-	 mov	%l5, %o1
-	ba,a,pt	%xcc, rtrap
-1:	mov	%l4, %o1
-	mov	%l5, %o2
-	call	mem_address_unaligned
-	 nop
-	ba,a,pt	%xcc, rtrap
-
-winfix_dax:
-	andn	%g3, 0x7f, %g3
-	add	%g3, 0x74, %g3
-	wrpr	%g3, %tnpc
-	done
-
-fill_fixup_dax:
-	rdpr	%tstate, %g1
-	and	%g1, TSTATE_CWP, %g1
-	wrpr	%g1, %cwp
-	ba,pt	%xcc, etrap
-	 rd	%pc, %g7
-	sethi	%hi(tlb_type), %g1
-	mov	%l4, %o1
-	lduw	[%g1 + %lo(tlb_type)], %g1
-	mov	%l5, %o2
-	cmp	%g1, 3
-	bne,pt	%icc, 1f
-	 add	%sp, PTREGS_OFF, %o0
-	call	sun4v_data_access_exception
-	 nop
-	ba,a,pt	%xcc, rtrap
-	 nop
-1:	call	spitfire_data_access_exception
-	 nop
-	ba,a,pt	%xcc, rtrap
-	 nop
diff --git a/arch/sparc/kernel/wof.S b/arch/sparc/kernel/wof.S
deleted file mode 100644
index 96a3a112423acefc61448db436f0ec1a320b3e71..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/wof.S
+++ /dev/null
@@ -1,366 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * wof.S: Sparc window overflow handler.
- *
- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
- */
-
-#include <asm/contregs.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include <asm/psr.h>
-#include <asm/smp.h>
-#include <asm/asi.h>
-#include <asm/winmacro.h>
-#include <asm/asmmacro.h>
-#include <asm/thread_info.h>
-
-/* WARNING: This routine is hairy and _very_ complicated, but it
- *          must be as fast as possible as it handles the allocation
- *          of register windows to the user and kernel.  If you touch
- *          this code be _very_ careful as many other pieces of the
- *          kernel depend upon how this code behaves.  You have been
- *          duly warned...
- */
-
-/* We define macro's for registers which have a fixed
- * meaning throughout this entire routine.  The 'T' in
- * the comments mean that the register can only be
- * accessed when in the 'trap' window, 'G' means
- * accessible in any window.  Do not change these registers
- * after they have been set, until you are ready to return
- * from the trap.
- */
-#define t_psr       l0 /* %psr at trap time                     T */
-#define t_pc        l1 /* PC for trap return                    T */
-#define t_npc       l2 /* NPC for trap return                   T */
-#define t_wim       l3 /* %wim at trap time                     T */
-#define saved_g5    l5 /* Global save register                  T */
-#define saved_g6    l6 /* Global save register                  T */
-#define curptr      g6 /* Gets set to 'current' then stays      G */
-
-/* Now registers whose values can change within the handler.      */
-#define twin_tmp    l4 /* Temp reg, only usable in trap window  T */
-#define glob_tmp    g5 /* Global temporary reg, usable anywhere G */
-
-	.text
-	.align	4
-	/* BEGINNING OF PATCH INSTRUCTIONS */
-	/* On a 7-window Sparc the boot code patches spnwin_*
-	 * instructions with the following ones.
-	 */
-	.globl	spnwin_patch1_7win, spnwin_patch2_7win, spnwin_patch3_7win
-spnwin_patch1_7win:	sll	%t_wim, 6, %glob_tmp
-spnwin_patch2_7win:	and	%glob_tmp, 0x7f, %glob_tmp
-spnwin_patch3_7win:	and	%twin_tmp, 0x7f, %twin_tmp
-	/* END OF PATCH INSTRUCTIONS */
-
-	/* The trap entry point has done the following:
-	 *
-	 * rd    %psr, %l0
-	 * rd    %wim, %l3
-	 * b     spill_window_entry
-	 * andcc %l0, PSR_PS, %g0
-	 */
-
-	/* Datum current_thread_info->uwinmask contains at all times a bitmask
-	 * where if any user windows are active, at least one bit will
-	 * be set in to mask.  If no user windows are active, the bitmask
-	 * will be all zeroes.
-	 */
-	.globl	spill_window_entry 
-	.globl	spnwin_patch1, spnwin_patch2, spnwin_patch3
-spill_window_entry:
-	/* LOCATION: Trap Window */
-
-	mov	%g5, %saved_g5		! save away global temp register
-	mov	%g6, %saved_g6		! save away 'current' ptr register
-
-	/* Compute what the new %wim will be if we save the
-	 * window properly in this trap handler.
-	 *
-	 * newwim = ((%wim>>1) | (%wim<<(nwindows - 1)));
-	 */
-		srl	%t_wim, 0x1, %twin_tmp
-spnwin_patch1:	sll	%t_wim, 7, %glob_tmp
-		or	%glob_tmp, %twin_tmp, %glob_tmp
-spnwin_patch2:	and	%glob_tmp, 0xff, %glob_tmp
-
-	/* The trap entry point has set the condition codes
-	 * up for us to see if this is from user or kernel.
-	 * Get the load of 'curptr' out of the way.
-	 */
-	LOAD_CURRENT(curptr, twin_tmp)
-
-	andcc	%t_psr, PSR_PS, %g0
-	be,a	spwin_fromuser				! all user wins, branch
-	 save	%g0, %g0, %g0				! Go where saving will occur
-	
-	/* See if any user windows are active in the set. */
-	ld	[%curptr + TI_UWINMASK], %twin_tmp	! grab win mask
-	orcc	%g0, %twin_tmp, %g0			! check for set bits
-	bne	spwin_exist_uwins			! yep, there are some
-	 andn	%twin_tmp, %glob_tmp, %twin_tmp		! compute new uwinmask
-
-	/* Save into the window which must be saved and do it.
-	 * Basically if we are here, this means that we trapped
-	 * from kernel mode with only kernel windows in the register
-	 * file.
-	 */
-	save	%g0, %g0, %g0		! save into the window to stash away
-	wr	%glob_tmp, 0x0, %wim	! set new %wim, this is safe now
-
-spwin_no_userwins_from_kernel:
-	/* LOCATION: Window to be saved */
-
-	STORE_WINDOW(sp)		! stash the window
-	restore	%g0, %g0, %g0		! go back into trap window
-
-	/* LOCATION: Trap window */
-	mov	%saved_g5, %g5		! restore %glob_tmp
-	mov	%saved_g6, %g6		! restore %curptr
-	wr	%t_psr, 0x0, %psr	! restore condition codes in %psr
-	WRITE_PAUSE			! waste some time
-	jmp	%t_pc			! Return from trap
-	rett	%t_npc			! we are done
-
-spwin_exist_uwins:
-	/* LOCATION: Trap window */
-
-	/* Wow, user windows have to be dealt with, this is dirty
-	 * and messy as all hell.  And difficult to follow if you
-	 * are approaching the infamous register window trap handling
-	 * problem for the first time. DON'T LOOK!
-	 *
-	 * Note that how the execution path works out, the new %wim
-	 * will be left for us in the global temporary register,
-	 * %glob_tmp.  We cannot set the new %wim first because we
-	 * need to save into the appropriate window without inducing
-	 * a trap (traps are off, we'd get a watchdog wheee)...
-	 * But first, store the new user window mask calculated
-	 * above.
-	 */
-	st	%twin_tmp, [%curptr + TI_UWINMASK]
-	save	%g0, %g0, %g0		! Go to where the saving will occur
-
-spwin_fromuser:
-	/* LOCATION: Window to be saved */
-	wr	%glob_tmp, 0x0, %wim	! Now it is safe to set new %wim
-
-	/* LOCATION: Window to be saved */
-
-	/* This instruction branches to a routine which will check
-	 * to validity of the users stack pointer by whatever means
-	 * are necessary.  This means that this is architecture
-	 * specific and thus this branch instruction will need to
-	 * be patched at boot time once the machine type is known.
-	 * This routine _shall not_ touch %curptr under any
-	 * circumstances whatsoever!  It will branch back to the
-	 * label 'spwin_good_ustack' if the stack is ok but still
-	 * needs to be dumped (SRMMU for instance will not need to
-	 * do this) or 'spwin_finish_up' if the stack is ok and the
-	 * registers have already been saved.  If the stack is found
-	 * to be bogus for some reason the routine shall branch to
-	 * the label 'spwin_user_stack_is_bolixed' which will take
-	 * care of things at that point.
-	 */
-	b	spwin_srmmu_stackchk
-	 andcc	%sp, 0x7, %g0
-
-spwin_good_ustack:
-	/* LOCATION: Window to be saved */
-
-	/* The users stack is ok and we can safely save it at
-	 * %sp.
-	 */
-	STORE_WINDOW(sp)
-
-spwin_finish_up:
-	restore	%g0, %g0, %g0		/* Back to trap window. */
-
-	/* LOCATION: Trap window */
-
-	/* We have spilled successfully, and we have properly stored
-	 * the appropriate window onto the stack.
-	 */
-
-	/* Restore saved globals */
-	mov	%saved_g5, %g5
-	mov	%saved_g6, %g6
-
-	wr	%t_psr, 0x0, %psr
-	WRITE_PAUSE
-	jmp	%t_pc
-	rett	%t_npc
-
-spwin_user_stack_is_bolixed:
-	/* LOCATION: Window to be saved */
-
-	/* Wheee, user has trashed his/her stack.  We have to decide
-	 * how to proceed based upon whether we came from kernel mode
-	 * or not.  If we came from kernel mode, toss the window into
-	 * a special buffer and proceed, the kernel _needs_ a window
-	 * and we could be in an interrupt handler so timing is crucial.
-	 * If we came from user land we build a full stack frame and call
-	 * c-code to gun down the process.
-	 */
-	rd	%psr, %glob_tmp
-	andcc	%glob_tmp, PSR_PS, %g0
-	bne	spwin_bad_ustack_from_kernel
-	 nop
-
-	/* Oh well, throw this one window into the per-task window
-	 * buffer, the first one.
-	 */
-	st	%sp, [%curptr + TI_RWIN_SPTRS]
-	STORE_WINDOW(curptr + TI_REG_WINDOW)
-	restore	%g0, %g0, %g0
-
-	/* LOCATION: Trap Window */
-
-	/* Back in the trap window, update winbuffer save count. */
-	mov	1, %twin_tmp
-	st	%twin_tmp, [%curptr + TI_W_SAVED]
-
-		/* Compute new user window mask.  What we are basically
-		 * doing is taking two windows, the invalid one at trap
-		 * time and the one we attempted to throw onto the users
-		 * stack, and saying that everything else is an ok user
-		 * window.  umask = ((~(%t_wim | %wim)) & valid_wim_bits)
-		 */
-		rd	%wim, %twin_tmp
-		or	%twin_tmp, %t_wim, %twin_tmp
-		not	%twin_tmp
-spnwin_patch3:	and	%twin_tmp, 0xff, %twin_tmp	! patched on 7win Sparcs
-		st	%twin_tmp, [%curptr + TI_UWINMASK]
-
-#define STACK_OFFSET (THREAD_SIZE - TRACEREG_SZ - STACKFRAME_SZ)
-
-	sethi	%hi(STACK_OFFSET), %sp
-	or	%sp, %lo(STACK_OFFSET), %sp
-	add	%curptr, %sp, %sp
-
-	/* Restore the saved globals and build a pt_regs frame. */
-	mov	%saved_g5, %g5
-	mov	%saved_g6, %g6
-	STORE_PT_ALL(sp, t_psr, t_pc, t_npc, g1)
-
-	sethi	%hi(STACK_OFFSET), %g6
-	or	%g6, %lo(STACK_OFFSET), %g6
-	sub	%sp, %g6, %g6		! curptr
-
-	/* Turn on traps and call c-code to deal with it. */
-	wr	%t_psr, PSR_ET, %psr
-	nop
-	call	window_overflow_fault
-	 nop
-
-	/* Return from trap if C-code actually fixes things, if it
-	 * doesn't then we never get this far as the process will
-	 * be given the look of death from Commander Peanut.
-	 */
-	b	ret_trap_entry
-	 clr	%l6
-
-spwin_bad_ustack_from_kernel:
-	/* LOCATION: Window to be saved */
-
-	/* The kernel provoked a spill window trap, but the window we
-	 * need to save is a user one and the process has trashed its
-	 * stack pointer.  We need to be quick, so we throw it into
-	 * a per-process window buffer until we can properly handle
-	 * this later on.
-	 */
-	SAVE_BOLIXED_USER_STACK(curptr, glob_tmp)
-	restore	%g0, %g0, %g0
-
-	/* LOCATION: Trap window */
-
-	/* Restore globals, condition codes in the %psr and
-	 * return from trap.  Note, restoring %g6 when returning
-	 * to kernel mode is not necessarily these days. ;-)
-	 */
-	mov	%saved_g5, %g5
-	mov	%saved_g6, %g6
-
-	wr	%t_psr, 0x0, %psr
-	WRITE_PAUSE
-
-	jmp	%t_pc
-	rett	%t_npc
-
-/* Undefine the register macros which would only cause trouble
- * if used below.  This helps find 'stupid' coding errors that
- * produce 'odd' behavior.  The routines below are allowed to
- * make usage of glob_tmp and t_psr so we leave them defined.
- */
-#undef twin_tmp
-#undef curptr
-#undef t_pc
-#undef t_npc
-#undef t_wim
-#undef saved_g5
-#undef saved_g6
-
-/* Now come the per-architecture window overflow stack checking routines.
- * As noted above %curptr cannot be touched by this routine at all.
- */
-
-	/* This is a generic SRMMU routine.  As far as I know this
-	 * works for all current v8/srmmu implementations, we'll
-	 * see...
-	 */
-	.globl	spwin_srmmu_stackchk
-spwin_srmmu_stackchk:
-	/* LOCATION: Window to be saved on the stack */
-
-	/* Because of SMP concerns and speed we play a trick.
-	 * We disable fault traps in the MMU control register,
-	 * Execute the stores, then check the fault registers
-	 * to see what happens.  I can hear Linus now
-	 * "disgusting... broken hardware...".
-	 *
-	 * But first, check to see if the users stack has ended
-	 * up in kernel vma, then we would succeed for the 'wrong'
-	 * reason... ;(  Note that the 'sethi' below assumes the
-	 * kernel is page aligned, which should always be the case.
-	 */
-	/* Check results of callers andcc %sp, 0x7, %g0 */
-	bne	spwin_user_stack_is_bolixed
-	 sethi   %hi(PAGE_OFFSET), %glob_tmp
-	cmp	%glob_tmp, %sp
-	bleu	spwin_user_stack_is_bolixed
-	 mov	AC_M_SFSR, %glob_tmp
-
-	/* Clear the fault status and turn on the no_fault bit. */
-LEON_PI(lda	[%glob_tmp] ASI_LEON_MMUREGS, %g0)	! eat SFSR
-SUN_PI_(lda	[%glob_tmp] ASI_M_MMUREGS, %g0)		! eat SFSR
-
-LEON_PI(lda	[%g0] ASI_LEON_MMUREGS, %glob_tmp)	! read MMU control
-SUN_PI_(lda	[%g0] ASI_M_MMUREGS, %glob_tmp)		! read MMU control
-	or	%glob_tmp, 0x2, %glob_tmp		! or in no_fault bit
-LEON_PI(sta	%glob_tmp, [%g0] ASI_LEON_MMUREGS)	! set it
-SUN_PI_(sta	%glob_tmp, [%g0] ASI_M_MMUREGS)		! set it
-
-	/* Dump the registers and cross fingers. */
-	STORE_WINDOW(sp)
-
-	/* Clear the no_fault bit and check the status. */
-	andn	%glob_tmp, 0x2, %glob_tmp
-LEON_PI(sta	%glob_tmp, [%g0] ASI_LEON_MMUREGS)
-SUN_PI_(sta	%glob_tmp, [%g0] ASI_M_MMUREGS)
-
-	mov	AC_M_SFAR, %glob_tmp
-LEON_PI(lda	[%glob_tmp] ASI_LEON_MMUREGS, %g0)
-SUN_PI_(lda	[%glob_tmp] ASI_M_MMUREGS, %g0)
-
-	mov	AC_M_SFSR, %glob_tmp
-LEON_PI(lda	[%glob_tmp] ASI_LEON_MMUREGS, %glob_tmp)
-SUN_PI_(lda	[%glob_tmp] ASI_M_MMUREGS, %glob_tmp)
-	andcc	%glob_tmp, 0x2, %g0			! did we fault?
-	be,a	spwin_finish_up + 0x4			! cool beans, success
-	 restore %g0, %g0, %g0
-
-	rd	%psr, %glob_tmp
-	b	spwin_user_stack_is_bolixed + 0x4	! we faulted, ugh
-	 nop
diff --git a/arch/sparc/kernel/wuf.S b/arch/sparc/kernel/wuf.S
deleted file mode 100644
index 1a4ca490e9c22b8b20f130f7b2a4102012ca13e4..0000000000000000000000000000000000000000
--- a/arch/sparc/kernel/wuf.S
+++ /dev/null
@@ -1,313 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * wuf.S: Window underflow trap handler for the Sparc.
- *
- * Copyright (C) 1995 David S. Miller
- */
-
-#include <asm/contregs.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include <asm/psr.h>
-#include <asm/smp.h>
-#include <asm/asi.h>
-#include <asm/winmacro.h>
-#include <asm/asmmacro.h>
-#include <asm/thread_info.h>
-
-/* Just like the overflow handler we define macros for registers
- * with fixed meanings in this routine.
- */
-#define t_psr       l0
-#define t_pc        l1
-#define t_npc       l2
-#define t_wim       l3
-/* Don't touch the above registers or else you die horribly... */
-
-/* Now macros for the available scratch registers in this routine. */
-#define twin_tmp1    l4
-#define twin_tmp2    l5
-
-#define curptr       g6
-
-	.text
-	.align	4
-
-	/* The trap entry point has executed the following:
-	 *
-	 * rd    %psr, %l0
-	 * rd    %wim, %l3
-	 * b     fill_window_entry
-	 * andcc %l0, PSR_PS, %g0
-	 */
-
-	/* Datum current_thread_info->uwinmask contains at all times a bitmask
-	 * where if any user windows are active, at least one bit will
-	 * be set in to mask.  If no user windows are active, the bitmask
-	 * will be all zeroes.
-	 */
-
-	/* To get an idea of what has just happened to cause this
-	 * trap take a look at this diagram:
-	 *
-	 *      1  2  3  4     <--  Window number
-	 *      ----------
-	 *      T  O  W  I     <--  Symbolic name
-	 *
-	 *      O == the window that execution was in when
-	 *           the restore was attempted
-	 *
-	 *      T == the trap itself has save'd us into this
-	 *           window
-	 *
-	 *      W == this window is the one which is now invalid
-	 *           and must be made valid plus loaded from the
-	 *           stack
-	 *
-	 *      I == this window will be the invalid one when we
-	 *           are done and return from trap if successful
-	 */
-
-	/* BEGINNING OF PATCH INSTRUCTIONS */
-
-	/* On 7-window Sparc the boot code patches fnwin_patch1
-	 * with the following instruction.
-	 */
-	.globl	fnwin_patch1_7win, fnwin_patch2_7win
-fnwin_patch1_7win:	srl	%t_wim, 6, %twin_tmp2
-fnwin_patch2_7win:	and	%twin_tmp1, 0x7f, %twin_tmp1
-	/* END OF PATCH INSTRUCTIONS */
-
-	.globl	fill_window_entry, fnwin_patch1, fnwin_patch2
-fill_window_entry:
-	/* LOCATION: Window 'T' */
-
-	/* Compute what the new %wim is going to be if we retrieve
-	 * the proper window off of the stack.
-	 */
-		sll	%t_wim, 1, %twin_tmp1
-fnwin_patch1:	srl	%t_wim, 7, %twin_tmp2
-		or	%twin_tmp1, %twin_tmp2, %twin_tmp1
-fnwin_patch2:	and	%twin_tmp1, 0xff, %twin_tmp1
-
-	wr	%twin_tmp1, 0x0, %wim	/* Make window 'I' invalid */
-
-	andcc	%t_psr, PSR_PS, %g0
-	be	fwin_from_user
-	 restore	%g0, %g0, %g0		/* Restore to window 'O' */
-
-	/* Trapped from kernel, we trust that the kernel does not
-	 * 'over restore' sorta speak and just grab the window
-	 * from the stack and return.  Easy enough.
-	 */
-fwin_from_kernel:
-	/* LOCATION: Window 'O' */
-
-	restore %g0, %g0, %g0
-
-	/* LOCATION: Window 'W' */
-
-	LOAD_WINDOW(sp)	                /* Load it up */
-
-	/* Spin the wheel... */
-	save	%g0, %g0, %g0
-	save	%g0, %g0, %g0
-	/* I'd like to buy a vowel please... */
-
-	/* LOCATION: Window 'T' */
-
-	/* Now preserve the condition codes in %psr, pause, and
-	 * return from trap.  This is the simplest case of all.
-	 */
-	wr	%t_psr, 0x0, %psr
-	WRITE_PAUSE
-
-	jmp	%t_pc
-	rett	%t_npc
-
-fwin_from_user:
-	/* LOCATION: Window 'O' */
-
-	restore	%g0, %g0, %g0		/* Restore to window 'W' */
-
-	/* LOCATION: Window 'W' */
-
-	/* Branch to the stack validation routine */
-	b	srmmu_fwin_stackchk
-	 andcc	%sp, 0x7, %g0
-
-#define STACK_OFFSET (THREAD_SIZE - TRACEREG_SZ - STACKFRAME_SZ)
-
-fwin_user_stack_is_bolixed:
-	/* LOCATION: Window 'W' */
-
-	/* Place a pt_regs frame on the kernel stack, save back
-	 * to the trap window and call c-code to deal with this.
-	 */
-	LOAD_CURRENT(l4, l5)
-
-	sethi	%hi(STACK_OFFSET), %l5
-	or	%l5, %lo(STACK_OFFSET), %l5
-	add	%l4, %l5, %l5
-
-	/* Store globals into pt_regs frame. */
-	STORE_PT_GLOBALS(l5)
-	STORE_PT_YREG(l5, g3)
-
-	/* Save current in a global while we change windows. */
-	mov	%l4, %curptr
-
-	save	%g0, %g0, %g0
-
-	/* LOCATION: Window 'O' */
-
-	rd	%psr, %g3		/* Read %psr in live user window */
-	mov	%fp, %g4		/* Save bogus frame pointer. */
-
-	save	%g0, %g0, %g0
-
-	/* LOCATION: Window 'T' */
-
-	sethi	%hi(STACK_OFFSET), %l5
-	or	%l5, %lo(STACK_OFFSET), %l5
-	add	%curptr, %l5, %sp
-
-	/* Build rest of pt_regs. */
-	STORE_PT_INS(sp)
-	STORE_PT_PRIV(sp, t_psr, t_pc, t_npc)
-
-	/* re-set trap time %wim value */
-	wr	%t_wim, 0x0, %wim
-
-	/* Fix users window mask and buffer save count. */
-	mov	0x1, %g5
-	sll	%g5, %g3, %g5
-	st	%g5, [%curptr + TI_UWINMASK]		! one live user window still
-	st	%g0, [%curptr + TI_W_SAVED]		! no windows in the buffer
-
-	wr	%t_psr, PSR_ET, %psr			! enable traps
-	nop
-	call	window_underflow_fault
-	 mov	%g4, %o0
-
-	b	ret_trap_entry
-	 clr	%l6
-
-fwin_user_stack_is_ok:
-	/* LOCATION: Window 'W' */
-
-	/* The users stack area is kosher and mapped, load the
-	 * window and fall through to the finish up routine.
-	 */
-	LOAD_WINDOW(sp)
-
-	/* Round and round she goes... */
-	save	%g0, %g0, %g0		/* Save to window 'O' */
-	save	%g0, %g0, %g0		/* Save to window 'T' */
-	/* Where she'll trap nobody knows... */
-
-	/* LOCATION: Window 'T' */
-
-fwin_user_finish_up:
-	/* LOCATION: Window 'T' */
-
-	wr	%t_psr, 0x0, %psr
-	WRITE_PAUSE	
-
-	jmp	%t_pc
-	rett	%t_npc
-
-	/* Here come the architecture specific checks for stack.
-	 * mappings.  Note that unlike the window overflow handler
-	 * we only need to check whether the user can read from
-	 * the appropriate addresses.  Also note that we are in
-	 * an invalid window which will be loaded, and this means
-	 * that until we actually load the window up we are free
-	 * to use any of the local registers contained within.
-	 *
-	 * On success these routine branch to fwin_user_stack_is_ok
-	 * if the area at %sp is user readable and the window still
-	 * needs to be loaded, else fwin_user_finish_up if the
-	 * routine has done the loading itself.  On failure (bogus
-	 * user stack) the routine shall branch to the label called
-	 * fwin_user_stack_is_bolixed.
-	 *
-	 * Contrary to the arch-specific window overflow stack
-	 * check routines in wof.S, these routines are free to use
-	 * any of the local registers they want to as this window
-	 * does not belong to anyone at this point, however the
-	 * outs and ins are still verboten as they are part of
-	 * 'someone elses' window possibly.
-	 */
-
-	.globl	srmmu_fwin_stackchk
-srmmu_fwin_stackchk:
-	/* LOCATION: Window 'W' */
-
-	/* Caller did 'andcc %sp, 0x7, %g0' */
-	bne	fwin_user_stack_is_bolixed
-	 sethi   %hi(PAGE_OFFSET), %l5
-
-	/* Check if the users stack is in kernel vma, then our
-	 * trial and error technique below would succeed for
-	 * the 'wrong' reason.
-	 */
-	mov	AC_M_SFSR, %l4
-	cmp	%l5, %sp
-	bleu	fwin_user_stack_is_bolixed
-LEON_PI( lda	[%l4] ASI_LEON_MMUREGS, %g0)	! clear fault status
-SUN_PI_( lda	[%l4] ASI_M_MMUREGS, %g0)	! clear fault status
-
-	/* The technique is, turn off faults on this processor,
-	 * just let the load rip, then check the sfsr to see if
-	 * a fault did occur.  Then we turn on fault traps again
-	 * and branch conditionally based upon what happened.
-	 */
-LEON_PI(lda	[%g0] ASI_LEON_MMUREGS, %l5)	! read mmu-ctrl reg
-SUN_PI_(lda	[%g0] ASI_M_MMUREGS, %l5)	! read mmu-ctrl reg
-	or	%l5, 0x2, %l5			! turn on no-fault bit
-LEON_PI(sta	%l5, [%g0] ASI_LEON_MMUREGS)	! store it
-SUN_PI_(sta	%l5, [%g0] ASI_M_MMUREGS)	! store it
-
-	/* Cross fingers and go for it. */
-	LOAD_WINDOW(sp)
-
-	/* A penny 'saved'... */
-	save	%g0, %g0, %g0
-	save	%g0, %g0, %g0
-	/* Is a BADTRAP earned... */
-
-	/* LOCATION: Window 'T' */
-
-LEON_PI(lda	[%g0] ASI_LEON_MMUREGS, %twin_tmp1)	! load mmu-ctrl again
-SUN_PI_(lda	[%g0] ASI_M_MMUREGS, %twin_tmp1)	! load mmu-ctrl again
-	andn	%twin_tmp1, 0x2, %twin_tmp1		! clear no-fault bit
-LEON_PI(sta	%twin_tmp1, [%g0] ASI_LEON_MMUREGS)	! store it
-SUN_PI_(sta	%twin_tmp1, [%g0] ASI_M_MMUREGS)	! store it
-
-	mov	AC_M_SFAR, %twin_tmp2
-LEON_PI(lda	[%twin_tmp2] ASI_LEON_MMUREGS, %g0)	! read fault address
-SUN_PI_(lda	[%twin_tmp2] ASI_M_MMUREGS, %g0)	! read fault address
-
-	mov	AC_M_SFSR, %twin_tmp2
-LEON_PI(lda	[%twin_tmp2] ASI_LEON_MMUREGS, %twin_tmp2) ! read fault status
-SUN_PI_(lda	[%twin_tmp2] ASI_M_MMUREGS, %twin_tmp2)	   ! read fault status
-	andcc	%twin_tmp2, 0x2, %g0			   ! did fault occur?
-
-	bne	1f					   ! yep, cleanup
-	 nop
-
-	wr	%t_psr, 0x0, %psr
-	nop
-	b	fwin_user_finish_up + 0x4
-	 nop
-
-	/* Did I ever tell you about my window lobotomy?
-	 * anyways... fwin_user_stack_is_bolixed expects
-	 * to be in window 'W' so make it happy or else
-	 * we watchdog badly.
-	 */
-1:
-	restore	%g0, %g0, %g0
-	b	fwin_user_stack_is_bolixed	! oh well
-	 restore	%g0, %g0, %g0
diff --git a/arch/sparc/lib/GENbzero.S b/arch/sparc/lib/GENbzero.S
deleted file mode 100644
index 63d618857d49d7f2590a260f699910f476b4399d..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/GENbzero.S
+++ /dev/null
@@ -1,157 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* GENbzero.S: Generic sparc64 memset/clear_user.
- *
- * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
- */
-#include <asm/asi.h>
-
-#define EX_ST(x,y)		\
-98:	x,y;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, __retl_o1_asi;\
-	.text;			\
-	.align 4;
-
-	.align	32
-	.text
-
-	.globl		GENmemset
-	.type		GENmemset, #function
-GENmemset:		/* %o0=buf, %o1=pat, %o2=len */
-	and		%o1, 0xff, %o3
-	mov		%o2, %o1
-	sllx		%o3, 8, %g1
-	or		%g1, %o3, %o2
-	sllx		%o2, 16, %g1
-	or		%g1, %o2, %o2
-	sllx		%o2, 32, %g1
-	ba,pt		%xcc, 1f
-	 or		%g1, %o2, %o2
-
-	.globl		GENbzero
-	.type		GENbzero, #function
-GENbzero:
-	clr		%o2
-1:	brz,pn		%o1, GENbzero_return
-	 mov		%o0, %o3
-
-	/* %o5: saved %asi, restored at GENbzero_done
-	 * %o4:	store %asi to use
-	 */
-	rd		%asi, %o5
-	mov		ASI_P, %o4
-	wr		%o4, 0x0, %asi
-
-GENbzero_from_clear_user:
-	cmp		%o1, 15
-	bl,pn		%icc, GENbzero_tiny
-	 andcc		%o0, 0x7, %g1
-	be,pt		%xcc, 2f
-	 mov		8, %g2
-	sub		%g2, %g1, %g1
-	sub		%o1, %g1, %o1
-1:	EX_ST(stba %o2, [%o0 + 0x00] %asi)
-	subcc		%g1, 1, %g1
-	bne,pt		%xcc, 1b
-	 add		%o0, 1, %o0
-2:	cmp		%o1, 128
-	bl,pn		%icc, GENbzero_medium
-	 andcc		%o0, (64 - 1), %g1
-	be,pt		%xcc, GENbzero_pre_loop
-	 mov		64, %g2
-	sub		%g2, %g1, %g1
-	sub		%o1, %g1, %o1
-1:	EX_ST(stxa %o2, [%o0 + 0x00] %asi)
-	subcc		%g1, 8, %g1
-	bne,pt		%xcc, 1b
-	 add		%o0, 8, %o0
-
-GENbzero_pre_loop:
-	andn		%o1, (64 - 1), %g1
-	sub		%o1, %g1, %o1
-GENbzero_loop:
-	EX_ST(stxa %o2, [%o0 + 0x00] %asi)
-	EX_ST(stxa %o2, [%o0 + 0x08] %asi)
-	EX_ST(stxa %o2, [%o0 + 0x10] %asi)
-	EX_ST(stxa %o2, [%o0 + 0x18] %asi)
-	EX_ST(stxa %o2, [%o0 + 0x20] %asi)
-	EX_ST(stxa %o2, [%o0 + 0x28] %asi)
-	EX_ST(stxa %o2, [%o0 + 0x30] %asi)
-	EX_ST(stxa %o2, [%o0 + 0x38] %asi)
-	subcc		%g1, 64, %g1
-	bne,pt		%xcc, GENbzero_loop
-	 add		%o0, 64, %o0
-
-	membar		#Sync
-	wr		%o4, 0x0, %asi
-	brz,pn		%o1, GENbzero_done
-GENbzero_medium:
-	 andncc		%o1, 0x7, %g1
-	be,pn		%xcc, 2f
-	 sub		%o1, %g1, %o1
-1:	EX_ST(stxa %o2, [%o0 + 0x00] %asi)
-	subcc		%g1, 8, %g1
-	bne,pt		%xcc, 1b
-	 add		%o0, 8, %o0
-2:	brz,pt		%o1, GENbzero_done
-	 nop
-
-GENbzero_tiny:
-1:	EX_ST(stba %o2, [%o0 + 0x00] %asi)
-	subcc		%o1, 1, %o1
-	bne,pt		%icc, 1b
-	 add		%o0, 1, %o0
-
-	/* fallthrough */
-
-GENbzero_done:
-	wr		%o5, 0x0, %asi
-
-GENbzero_return:
-	retl
-	 mov		%o3, %o0
-	.size		GENbzero, .-GENbzero
-	.size		GENmemset, .-GENmemset
-
-	.globl		GENclear_user
-	.type		GENclear_user, #function
-GENclear_user:		/* %o0=buf, %o1=len */
-	rd		%asi, %o5
-	brz,pn		%o1, GENbzero_done
-	 clr		%o3
-	cmp		%o5, ASI_AIUS
-	bne,pn		%icc, GENbzero
-	 clr		%o2
-	ba,pt		%xcc, GENbzero_from_clear_user
-	 mov		ASI_AIUS, %o4
-	.size		GENclear_user, .-GENclear_user
-
-#define BRANCH_ALWAYS	0x10680000
-#define NOP		0x01000000
-#define GEN_DO_PATCH(OLD, NEW)	\
-	sethi	%hi(NEW), %g1; \
-	or	%g1, %lo(NEW), %g1; \
-	sethi	%hi(OLD), %g2; \
-	or	%g2, %lo(OLD), %g2; \
-	sub	%g1, %g2, %g1; \
-	sethi	%hi(BRANCH_ALWAYS), %g3; \
-	sll	%g1, 11, %g1; \
-	srl	%g1, 11 + 2, %g1; \
-	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
-	or	%g3, %g1, %g3; \
-	stw	%g3, [%g2]; \
-	sethi	%hi(NOP), %g3; \
-	or	%g3, %lo(NOP), %g3; \
-	stw	%g3, [%g2 + 0x4]; \
-	flush	%g2;
-
-	.globl	generic_patch_bzero
-	.type	generic_patch_bzero,#function
-generic_patch_bzero:
-	GEN_DO_PATCH(memset, GENmemset)
-	GEN_DO_PATCH(__bzero, GENbzero)
-	GEN_DO_PATCH(__clear_user, GENclear_user)
-	retl
-	 nop
-	.size	generic_patch_bzero,.-generic_patch_bzero
diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S
deleted file mode 100644
index 6891a5678ea343538c8c7ab4d16b41e8792fd95b..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/GENcopy_from_user.S
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* GENcopy_from_user.S: Generic sparc64 copy from userspace.
- *
- * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
- */
-
-#define EX_LD(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y;		\
-	.text;			\
-	.align 4;
-
-#ifndef ASI_AIUS
-#define ASI_AIUS	0x11
-#endif
-
-#define FUNC_NAME		GENcopy_from_user
-#define LOAD(type,addr,dest)	type##a [addr] ASI_AIUS, dest
-#define EX_RETVAL(x)		0
-
-#ifdef __KERNEL__
-#define PREAMBLE					\
-	rd		%asi, %g1;			\
-	cmp		%g1, ASI_AIUS;			\
-	bne,pn		%icc, raw_copy_in_user;		\
-	 nop
-#endif
-
-#include "GENmemcpy.S"
diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S
deleted file mode 100644
index df75b532a9347d8ae20819590969f1954d78439e..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/GENcopy_to_user.S
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* GENcopy_to_user.S: Generic sparc64 copy to userspace.
- *
- * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
- */
-
-#define EX_ST(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y;		\
-	.text;			\
-	.align 4;
-
-#ifndef ASI_AIUS
-#define ASI_AIUS	0x11
-#endif
-
-#define FUNC_NAME		GENcopy_to_user
-#define STORE(type,src,addr)	type##a src, [addr] ASI_AIUS
-#define EX_RETVAL(x)		0
-
-#ifdef __KERNEL__
-	/* Writing to %asi is _expensive_ so we hardcode it.
-	 * Reading %asi to check for KERNEL_DS is comparatively
-	 * cheap.
-	 */
-#define PREAMBLE					\
-	rd		%asi, %g1;			\
-	cmp		%g1, ASI_AIUS;			\
-	bne,pn		%icc, raw_copy_in_user;		\
-	 nop
-#endif
-
-#include "GENmemcpy.S"
diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S
deleted file mode 100644
index 114340a0d36e200c6931e4f0a267455b70ea02f4..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/GENmemcpy.S
+++ /dev/null
@@ -1,142 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* GENmemcpy.S: Generic sparc64 memcpy.
- *
- * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
- */
-
-#ifdef __KERNEL__
-#include <linux/linkage.h>
-#define GLOBAL_SPARE	%g7
-#else
-#define GLOBAL_SPARE	%g5
-#endif
-
-#ifndef EX_LD
-#define EX_LD(x,y)	x
-#endif
-
-#ifndef EX_ST
-#define EX_ST(x,y)	x
-#endif
-
-#ifndef LOAD
-#define LOAD(type,addr,dest)	type [addr], dest
-#endif
-
-#ifndef STORE
-#define STORE(type,src,addr)	type src, [addr]
-#endif
-
-#ifndef FUNC_NAME
-#define FUNC_NAME	GENmemcpy
-#endif
-
-#ifndef PREAMBLE
-#define PREAMBLE
-#endif
-
-#ifndef XCC
-#define XCC xcc
-#endif
-
-	.register	%g2,#scratch
-	.register	%g3,#scratch
-
-	.text
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)	x
-ENTRY(GEN_retl_o4_1)
-	add	%o4, %o2, %o4
-	retl
-	 add	%o4, 1, %o0
-ENDPROC(GEN_retl_o4_1)
-ENTRY(GEN_retl_g1_8)
-	add	%g1, %o2, %g1
-	retl
-	 add	%g1, 8, %o0
-ENDPROC(GEN_retl_g1_8)
-ENTRY(GEN_retl_o2_4)
-	retl
-	 add	%o2, 4, %o0
-ENDPROC(GEN_retl_o2_4)
-ENTRY(GEN_retl_o2_1)
-	retl
-	 add	%o2, 1, %o0
-ENDPROC(GEN_retl_o2_1)
-#endif
-
-	.align		64
-
-	.globl	FUNC_NAME
-	.type	FUNC_NAME,#function
-FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
-	srlx		%o2, 31, %g2
-	cmp		%g2, 0
-	tne		%XCC, 5
-	PREAMBLE
-	mov		%o0, GLOBAL_SPARE
-
-	cmp		%o2, 0
-	be,pn		%XCC, 85f
-	 or		%o0, %o1, %o3
-	cmp		%o2, 16
-	blu,a,pn	%XCC, 80f
-	 or		%o3, %o2, %o3
-
-	xor		%o0, %o1, %o4
-	andcc		%o4, 0x7, %g0
-	bne,a,pn	%XCC, 90f
-	 sub		%o0, %o1, %o3
-
-	and		%o0, 0x7, %o4
-	sub		%o4, 0x8, %o4
-	sub		%g0, %o4, %o4
-	sub		%o2, %o4, %o2
-1:	subcc		%o4, 1, %o4
-	EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1)
-	EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1)
-	add		%o1, 1, %o1
-	bne,pt		%XCC, 1b
-	add		%o0, 1, %o0
-
-	andn		%o2, 0x7, %g1
-	sub		%o2, %g1, %o2
-1:	subcc		%g1, 0x8, %g1
-	EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8)
-	EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8)
-	add		%o1, 0x8, %o1
-	bne,pt		%XCC, 1b
-	 add		%o0, 0x8, %o0
-
-	brz,pt		%o2, 85f
-	 sub		%o0, %o1, %o3
-	ba,a,pt		%XCC, 90f
-
-	.align		64
-80: /* 0 < len <= 16 */
-	andcc		%o3, 0x3, %g0
-	bne,pn		%XCC, 90f
-	 sub		%o0, %o1, %o3
-
-1:
-	subcc		%o2, 4, %o2
-	EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4)
-	EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4)
-	bgu,pt		%XCC, 1b
-	 add		%o1, 4, %o1
-
-85:	retl
-	 mov		EX_RETVAL(GLOBAL_SPARE), %o0
-
-	.align		32
-90:
-	subcc		%o2, 1, %o2
-	EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1)
-	EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1)
-	bgu,pt		%XCC, 90b
-	 add		%o1, 1, %o1
-	retl
-	 mov		EX_RETVAL(GLOBAL_SPARE), %o0
-
-	.size		FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/GENpage.S b/arch/sparc/lib/GENpage.S
deleted file mode 100644
index c143c4d1de3fab4ceb5c048fdd634aaec1c9e6ad..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/GENpage.S
+++ /dev/null
@@ -1,78 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* GENpage.S: Generic clear and copy page.
- *
- * Copyright (C) 2007 (davem@davemloft.net)
- */
-#include <asm/page.h>
-
-	.text
-	.align	32
-
-GENcopy_user_page:
-	set	PAGE_SIZE, %g7
-1:	ldx	[%o1 + 0x00], %o2
-	ldx	[%o1 + 0x08], %o3
-	ldx	[%o1 + 0x10], %o4
-	ldx	[%o1 + 0x18], %o5
-	stx	%o2, [%o0 + 0x00]
-	stx	%o3, [%o0 + 0x08]
-	stx	%o4, [%o0 + 0x10]
-	stx	%o5, [%o0 + 0x18]
-	ldx	[%o1 + 0x20], %o2
-	ldx	[%o1 + 0x28], %o3
-	ldx	[%o1 + 0x30], %o4
-	ldx	[%o1 + 0x38], %o5
-	stx	%o2, [%o0 + 0x20]
-	stx	%o3, [%o0 + 0x28]
-	stx	%o4, [%o0 + 0x30]
-	stx	%o5, [%o0 + 0x38]
-	subcc	%g7, 64, %g7
-	add	%o1, 64, %o1
-	bne,pt	%xcc, 1b
-	 add	%o0, 64, %o0
-	retl
-	 nop
-
-GENclear_page:
-GENclear_user_page:
-	set	PAGE_SIZE, %g7
-1:	stx	%g0, [%o0 + 0x00]
-	stx	%g0, [%o0 + 0x08]
-	stx	%g0, [%o0 + 0x10]
-	stx	%g0, [%o0 + 0x18]
-	stx	%g0, [%o0 + 0x20]
-	stx	%g0, [%o0 + 0x28]
-	stx	%g0, [%o0 + 0x30]
-	stx	%g0, [%o0 + 0x38]
-	subcc	%g7, 64, %g7
-	bne,pt	%xcc, 1b
-	 add	%o0, 64, %o0
-
-#define BRANCH_ALWAYS	0x10680000
-#define NOP		0x01000000
-#define GEN_DO_PATCH(OLD, NEW)	\
-	sethi	%hi(NEW), %g1; \
-	or	%g1, %lo(NEW), %g1; \
-	sethi	%hi(OLD), %g2; \
-	or	%g2, %lo(OLD), %g2; \
-	sub	%g1, %g2, %g1; \
-	sethi	%hi(BRANCH_ALWAYS), %g3; \
-	sll	%g1, 11, %g1; \
-	srl	%g1, 11 + 2, %g1; \
-	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
-	or	%g3, %g1, %g3; \
-	stw	%g3, [%g2]; \
-	sethi	%hi(NOP), %g3; \
-	or	%g3, %lo(NOP), %g3; \
-	stw	%g3, [%g2 + 0x4]; \
-	flush	%g2;
-
-	.globl	generic_patch_pageops
-	.type	generic_patch_pageops,#function
-generic_patch_pageops:
-	GEN_DO_PATCH(copy_user_page, GENcopy_user_page)
-	GEN_DO_PATCH(_clear_page, GENclear_page)
-	GEN_DO_PATCH(clear_user_page, GENclear_user_page)
-	retl
-	 nop
-	.size	generic_patch_pageops,.-generic_patch_pageops
diff --git a/arch/sparc/lib/GENpatch.S b/arch/sparc/lib/GENpatch.S
deleted file mode 100644
index 1ec1f02c8b7bfcdcae04e95bc86120ecd763a168..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/GENpatch.S
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* GENpatch.S: Patch Ultra-I routines with generic variant.
- *
- * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
- */
-
-#define BRANCH_ALWAYS	0x10680000
-#define NOP		0x01000000
-#define GEN_DO_PATCH(OLD, NEW)	\
-	sethi	%hi(NEW), %g1; \
-	or	%g1, %lo(NEW), %g1; \
-	sethi	%hi(OLD), %g2; \
-	or	%g2, %lo(OLD), %g2; \
-	sub	%g1, %g2, %g1; \
-	sethi	%hi(BRANCH_ALWAYS), %g3; \
-	sll	%g1, 11, %g1; \
-	srl	%g1, 11 + 2, %g1; \
-	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
-	or	%g3, %g1, %g3; \
-	stw	%g3, [%g2]; \
-	sethi	%hi(NOP), %g3; \
-	or	%g3, %lo(NOP), %g3; \
-	stw	%g3, [%g2 + 0x4]; \
-	flush	%g2;
-
-	.globl	generic_patch_copyops
-	.type	generic_patch_copyops,#function
-generic_patch_copyops:
-	GEN_DO_PATCH(memcpy, GENmemcpy)
-	GEN_DO_PATCH(raw_copy_from_user, GENcopy_from_user)
-	GEN_DO_PATCH(raw_copy_to_user, GENcopy_to_user)
-	retl
-	 nop
-	.size	generic_patch_copyops,.-generic_patch_copyops
diff --git a/arch/sparc/lib/M7copy_from_user.S b/arch/sparc/lib/M7copy_from_user.S
deleted file mode 100644
index 66464b3e364984d4234f991f406b24e83cffaa68..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/M7copy_from_user.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * M7copy_from_user.S: SPARC M7 optimized copy from userspace.
- *
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- */
-
-
-#define EX_LD(x, y)			\
-98:	x;				\
-	.section __ex_table,"a";	\
-	.align 4;			\
-	.word 98b, y;			\
-	.text;				\
-	.align 4;
-
-#define EX_LD_FP(x, y)			\
-98:	x;				\
-	.section __ex_table,"a";	\
-	.align 4;			\
-	.word 98b, y##_fp;		\
-	.text;				\
-	.align 4;
-
-#ifndef ASI_AIUS
-#define ASI_AIUS	0x11
-#endif
-
-#define FUNC_NAME		M7copy_from_user
-#define LOAD(type,addr,dest)	type##a [addr] %asi, dest
-#define EX_RETVAL(x)		0
-
-#ifdef __KERNEL__
-#define PREAMBLE					\
-	rd		%asi, %g1;			\
-	cmp		%g1, ASI_AIUS;			\
-	bne,pn		%icc, raw_copy_in_user;		\
-	 nop
-#endif
-
-#include "M7memcpy.S"
diff --git a/arch/sparc/lib/M7copy_to_user.S b/arch/sparc/lib/M7copy_to_user.S
deleted file mode 100644
index a60ac467f808d260d2deda875614af12ec33150e..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/M7copy_to_user.S
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * M7copy_to_user.S: SPARC M7 optimized copy to userspace.
- *
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- */
-
-
-#define EX_ST(x, y)			\
-98:	x;				\
-	.section __ex_table,"a";	\
-	.align 4;			\
-	.word 98b, y;			\
-	.text;				\
-	.align 4;
-
-#define EX_ST_FP(x, y)			\
-98:	x;				\
-	.section __ex_table,"a";	\
-	.align 4;			\
-	.word 98b, y##_fp;		\
-	.text;				\
-	.align 4;
-
-
-#ifndef ASI_AIUS
-#define ASI_AIUS	0x11
-#endif
-
-#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS
-#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23
-#endif
-
-#define FUNC_NAME		M7copy_to_user
-#define STORE(type,src,addr)	type##a src, [addr] %asi
-#define STORE_ASI		ASI_BLK_INIT_QUAD_LDD_AIUS
-#define	STORE_MRU_ASI		ASI_ST_BLKINIT_MRU_S
-#define EX_RETVAL(x)		0
-
-#ifdef __KERNEL__
-	/* Writing to %asi is _expensive_ so we hardcode it.
-	 * Reading %asi to check for KERNEL_DS is comparatively
-	 * cheap.
-	 */
-#define PREAMBLE					\
-	rd		%asi, %g1;			\
-	cmp		%g1, ASI_AIUS;			\
-	bne,pn		%icc, raw_copy_in_user;		\
-	 nop
-#endif
-
-#include "M7memcpy.S"
diff --git a/arch/sparc/lib/M7memcpy.S b/arch/sparc/lib/M7memcpy.S
deleted file mode 100644
index cbd42ea7c3f7c25a369536eaf8d0b28a808ae5ba..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/M7memcpy.S
+++ /dev/null
@@ -1,923 +0,0 @@
-/*
- * M7memcpy: Optimized SPARC M7 memcpy
- *
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- */
-
-	.file	"M7memcpy.S"
-
-/*
- * memcpy(s1, s2, len)
- *
- * Copy s2 to s1, always copy n bytes.
- * Note: this C code does not work for overlapped copies.
- *
- * Fast assembler language version of the following C-program for memcpy
- * which represents the `standard' for the C-library.
- *
- *	void *
- *	memcpy(void *s, const void *s0, size_t n)
- *	{
- *		if (n != 0) {
- *		    char *s1 = s;
- *		    const char *s2 = s0;
- *		    do {
- *			*s1++ = *s2++;
- *		    } while (--n != 0);
- *		}
- *		return (s);
- *	}
- *
- *
- * SPARC T7/M7 Flow :
- *
- * if (count < SMALL_MAX) {
- *   if count < SHORTCOPY              (SHORTCOPY=3)
- *	copy bytes; exit with dst addr
- *   if src & dst aligned on word boundary but not long word boundary,
- *     copy with ldw/stw; branch to finish_up
- *   if src & dst aligned on long word boundary
- *     copy with ldx/stx; branch to finish_up
- *   if src & dst not aligned and length <= SHORTCHECK   (SHORTCHECK=14)
- *     copy bytes; exit with dst addr
- *   move enough bytes to get src to word boundary
- *   if dst now on word boundary
- * move_words:
- *     copy words; branch to finish_up
- *   if dst now on half word boundary
- *     load words, shift half words, store words; branch to finish_up
- *   if dst on byte 1
- *     load words, shift 3 bytes, store words; branch to finish_up
- *   if dst on byte 3
- *     load words, shift 1 byte, store words; branch to finish_up
- * finish_up:
- *     copy bytes; exit with dst addr
- * } else {                                         More than SMALL_MAX bytes
- *   move bytes until dst is on long word boundary
- *   if( src is on long word boundary ) {
- *     if (count < MED_MAX) {
- * finish_long:					   src/dst aligned on 8 bytes
- *       copy with ldx/stx in 8-way unrolled loop;
- *       copy final 0-63 bytes; exit with dst addr
- *     } else {				     src/dst aligned; count > MED_MAX
- *       align dst on 64 byte boundary; for main data movement:
- *       prefetch src data to L2 cache; let HW prefetch move data to L1 cache
- *       Use BIS (block initializing store) to avoid copying store cache
- *       lines from memory. But pre-store first element of each cache line
- *       ST_CHUNK lines in advance of the rest of that cache line. That
- *       gives time for replacement cache lines to be written back without
- *       excess STQ and Miss Buffer filling. Repeat until near the end,
- *       then finish up storing before going to finish_long.
- *     }
- *   } else {                                   src/dst not aligned on 8 bytes
- *     if src is word aligned and count < MED_WMAX
- *       move words in 8-way unrolled loop
- *       move final 0-31 bytes; exit with dst addr
- *     if count < MED_UMAX
- *       use alignaddr/faligndata combined with ldd/std in 8-way
- *       unrolled loop to move data.
- *       go to unalign_done
- *     else
- *       setup alignaddr for faligndata instructions
- *       align dst on 64 byte boundary; prefetch src data to L1 cache
- *       loadx8, falign, block-store, prefetch loop
- *	 (only use block-init-store when src/dst on 8 byte boundaries.)
- * unalign_done:
- *       move remaining bytes for unaligned cases. exit with dst addr.
- * }
- *
- */
-
-#include <asm/visasm.h>
-#include <asm/asi.h>
-
-#if !defined(EX_LD) && !defined(EX_ST)
-#define NON_USER_COPY
-#endif
-
-#ifndef EX_LD
-#define EX_LD(x,y)	x
-#endif
-#ifndef EX_LD_FP
-#define EX_LD_FP(x,y)	x
-#endif
-
-#ifndef EX_ST
-#define EX_ST(x,y)	x
-#endif
-#ifndef EX_ST_FP
-#define EX_ST_FP(x,y)	x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)    x
-#endif
-
-#ifndef LOAD
-#define LOAD(type,addr,dest)	type [addr], dest
-#endif
-
-#ifndef STORE
-#define STORE(type,src,addr)	type src, [addr]
-#endif
-
-/*
- * ASI_BLK_INIT_QUAD_LDD_P/ASI_BLK_INIT_QUAD_LDD_S marks the cache
- * line as "least recently used" which means if many threads are
- * active, it has a high probability of being pushed out of the cache
- * between the first initializing store and the final stores.
- * Thus, we use ASI_ST_BLKINIT_MRU_P/ASI_ST_BLKINIT_MRU_S which
- * marks the cache line as "most recently used" for all
- * but the last cache line
- */
-#ifndef STORE_ASI
-#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
-#define STORE_ASI	ASI_BLK_INIT_QUAD_LDD_P
-#else
-#define STORE_ASI	0x80		/* ASI_P */
-#endif
-#endif
-
-#ifndef STORE_MRU_ASI
-#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
-#define STORE_MRU_ASI	ASI_ST_BLKINIT_MRU_P
-#else
-#define STORE_MRU_ASI	0x80		/* ASI_P */
-#endif
-#endif
-
-#ifndef STORE_INIT
-#define STORE_INIT(src,addr)	stxa src, [addr] STORE_ASI
-#endif
-
-#ifndef STORE_INIT_MRU
-#define STORE_INIT_MRU(src,addr)	stxa src, [addr] STORE_MRU_ASI
-#endif
-
-#ifndef FUNC_NAME
-#define FUNC_NAME	M7memcpy
-#endif
-
-#ifndef PREAMBLE
-#define PREAMBLE
-#endif
-
-#define	BLOCK_SIZE	64
-#define	SHORTCOPY	3
-#define	SHORTCHECK	14
-#define	SHORT_LONG	64	/* max copy for short longword-aligned case */
-				/* must be at least 64 */
-#define	SMALL_MAX	128
-#define	MED_UMAX	1024	/* max copy for medium un-aligned case */
-#define	MED_WMAX	1024	/* max copy for medium word-aligned case */
-#define	MED_MAX		1024	/* max copy for medium longword-aligned case */
-#define ST_CHUNK	24	/* ST_CHUNK - block of values for BIS Store */
-#define ALIGN_PRE	24	/* distance for aligned prefetch loop */
-
-	.register	%g2,#scratch
-
-	.section	".text"
-	.global		FUNC_NAME
-	.type		FUNC_NAME, #function
-	.align		16
-FUNC_NAME:
-	srlx            %o2, 31, %g2
-	cmp             %g2, 0
-	tne             %xcc, 5
-	PREAMBLE
-	mov		%o0, %g1	! save %o0
-	brz,pn          %o2, .Lsmallx
-	 cmp            %o2, 3
-	ble,pn          %icc, .Ltiny_cp
-	 cmp            %o2, 19
-	ble,pn          %icc, .Lsmall_cp
-	 or             %o0, %o1, %g2
-	cmp             %o2, SMALL_MAX
-	bl,pn           %icc, .Lmedium_cp
-	 nop
-
-.Lmedium:
-	neg	%o0, %o5
-	andcc	%o5, 7, %o5		! bytes till DST 8 byte aligned
-	brz,pt	%o5, .Ldst_aligned_on_8
-
-	! %o5 has the bytes to be written in partial store.
-	 sub	%o2, %o5, %o2
-	sub	%o1, %o0, %o1		! %o1 gets the difference
-7:					! dst aligning loop
-	add	%o1, %o0, %o4
-	EX_LD(LOAD(ldub, %o4, %o4), memcpy_retl_o2_plus_o5)	! load one byte
-	subcc	%o5, 1, %o5
-	EX_ST(STORE(stb, %o4, %o0), memcpy_retl_o2_plus_o5_plus_1)
-	bgu,pt	%xcc, 7b
-	 add	%o0, 1, %o0		! advance dst
-	add	%o1, %o0, %o1		! restore %o1
-.Ldst_aligned_on_8:
-	andcc	%o1, 7, %o5
-	brnz,pt	%o5, .Lsrc_dst_unaligned_on_8
-	 nop
-
-.Lsrc_dst_aligned_on_8:
-	! check if we are copying MED_MAX or more bytes
-	set MED_MAX, %o3
-	cmp %o2, %o3 			! limit to store buffer size
-	bgu,pn	%xcc, .Llarge_align8_copy
-	 nop
-
-/*
- * Special case for handling when src and dest are both long word aligned
- * and total data to move is less than MED_MAX bytes
- */
-.Lmedlong:
-	subcc	%o2, 63, %o2		! adjust length to allow cc test
-	ble,pn	%xcc, .Lmedl63		! skip big loop if less than 64 bytes
-	 nop
-.Lmedl64:
-	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_63)	! load
-	subcc	%o2, 64, %o2		! decrement length count
-	EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_63_64)	! and store
-	EX_LD(LOAD(ldx, %o1+8, %o3), memcpy_retl_o2_plus_63_56)	! a block of 64
-	EX_ST(STORE(stx, %o3, %o0+8), memcpy_retl_o2_plus_63_56)
-	EX_LD(LOAD(ldx, %o1+16, %o4), memcpy_retl_o2_plus_63_48)
-	EX_ST(STORE(stx, %o4, %o0+16), memcpy_retl_o2_plus_63_48)
-	EX_LD(LOAD(ldx, %o1+24, %o3), memcpy_retl_o2_plus_63_40)
-	EX_ST(STORE(stx, %o3, %o0+24), memcpy_retl_o2_plus_63_40)
-	EX_LD(LOAD(ldx, %o1+32, %o4), memcpy_retl_o2_plus_63_32)! load and store
-	EX_ST(STORE(stx, %o4, %o0+32), memcpy_retl_o2_plus_63_32)
-	EX_LD(LOAD(ldx, %o1+40, %o3), memcpy_retl_o2_plus_63_24)! a block of 64
-	add	%o1, 64, %o1		! increase src ptr by 64
-	EX_ST(STORE(stx, %o3, %o0+40), memcpy_retl_o2_plus_63_24)
-	EX_LD(LOAD(ldx, %o1-16, %o4), memcpy_retl_o2_plus_63_16)
-	add	%o0, 64, %o0		! increase dst ptr by 64
-	EX_ST(STORE(stx, %o4, %o0-16), memcpy_retl_o2_plus_63_16)
-	EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_63_8)
-	bgu,pt	%xcc, .Lmedl64		! repeat if at least 64 bytes left
-	 EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_63_8)
-.Lmedl63:
-	addcc	%o2, 32, %o2		! adjust remaining count
-	ble,pt	%xcc, .Lmedl31		! to skip if 31 or fewer bytes left
-	 nop
-	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_31)	! load
-	sub	%o2, 32, %o2		! decrement length count
-	EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_31_32)	! and store
-	EX_LD(LOAD(ldx, %o1+8, %o3), memcpy_retl_o2_plus_31_24)	! a block of 32
-	add	%o1, 32, %o1		! increase src ptr by 32
-	EX_ST(STORE(stx, %o3, %o0+8), memcpy_retl_o2_plus_31_24)
-	EX_LD(LOAD(ldx, %o1-16, %o4), memcpy_retl_o2_plus_31_16)
-	add	%o0, 32, %o0		! increase dst ptr by 32
-	EX_ST(STORE(stx, %o4, %o0-16), memcpy_retl_o2_plus_31_16)
-	EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_31_8)
-	EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_31_8)
-.Lmedl31:
-	addcc	%o2, 16, %o2		! adjust remaining count
-	ble,pt	%xcc, .Lmedl15		! skip if 15 or fewer bytes left
-	 nop				!
-	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_15)
-	add	%o1, 16, %o1		! increase src ptr by 16
-	EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_15)
-	sub	%o2, 16, %o2		! decrease count by 16
-	EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_15_8)
-	add	%o0, 16, %o0		! increase dst ptr by 16
-	EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_15_8)
-.Lmedl15:
-	addcc	%o2, 15, %o2		! restore count
-	bz,pt	%xcc, .Lsmallx	! exit if finished
-	 cmp	%o2, 8
-	blt,pt	%xcc, .Lmedw7		! skip if 7 or fewer bytes left
-	 tst	%o2
-	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2)	! load 8 bytes
-	add	%o1, 8, %o1		! increase src ptr by 8
-	add	%o0, 8, %o0		! increase dst ptr by 8
-	subcc	%o2, 8, %o2		! decrease count by 8
-	bnz,pn	%xcc, .Lmedw7
-	 EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8)	! and store 8
-	retl
-	 mov	EX_RETVAL(%g1), %o0	! restore %o0
-
-	.align 16
-.Lsrc_dst_unaligned_on_8:
-	! DST is 8-byte aligned, src is not
-2:
-	andcc	%o1, 0x3, %o5		! test word alignment
-	bnz,pt	%xcc, .Lunalignsetup	! branch to skip if not word aligned
-	 nop
-
-/*
- * Handle all cases where src and dest are aligned on word
- * boundaries. Use unrolled loops for better performance.
- * This option wins over standard large data move when
- * source and destination is in cache for.Lmedium
- * to short data moves.
- */
-	set MED_WMAX, %o3
-	cmp %o2, %o3 			! limit to store buffer size
-	bge,pt	%xcc, .Lunalignrejoin	! otherwise rejoin main loop
-	 nop
-
-	subcc	%o2, 31, %o2		! adjust length to allow cc test
-					! for end of loop
-	ble,pt	%xcc, .Lmedw31		! skip big loop if less than 16
-.Lmedw32:
-	EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2_plus_31)! move a block of 32
-	sllx	%o4, 32, %o5
-	EX_LD(LOAD(ld, %o1+4, %o4), memcpy_retl_o2_plus_31)
-	or	%o4, %o5, %o5
-	EX_ST(STORE(stx, %o5, %o0), memcpy_retl_o2_plus_31)
-	subcc	%o2, 32, %o2		! decrement length count
-	EX_LD(LOAD(ld, %o1+8, %o4), memcpy_retl_o2_plus_31_24)
-	sllx	%o4, 32, %o5
-	EX_LD(LOAD(ld, %o1+12, %o4), memcpy_retl_o2_plus_31_24)
-	or	%o4, %o5, %o5
-	EX_ST(STORE(stx, %o5, %o0+8), memcpy_retl_o2_plus_31_24)
-	add	%o1, 32, %o1		! increase src ptr by 32
-	EX_LD(LOAD(ld, %o1-16, %o4), memcpy_retl_o2_plus_31_16)
-	sllx	%o4, 32, %o5
-	EX_LD(LOAD(ld, %o1-12, %o4), memcpy_retl_o2_plus_31_16)
-	or	%o4, %o5, %o5
-	EX_ST(STORE(stx, %o5, %o0+16), memcpy_retl_o2_plus_31_16)
-	add	%o0, 32, %o0		! increase dst ptr by 32
-	EX_LD(LOAD(ld, %o1-8, %o4), memcpy_retl_o2_plus_31_8)
-	sllx	%o4, 32, %o5
-	EX_LD(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_31_8)
-	or	%o4, %o5, %o5
-	bgu,pt	%xcc, .Lmedw32		! repeat if at least 32 bytes left
-	 EX_ST(STORE(stx, %o5, %o0-8), memcpy_retl_o2_plus_31_8)
-.Lmedw31:
-	addcc	%o2, 31, %o2		! restore count
-
-	bz,pt	%xcc, .Lsmallx	! exit if finished
-	 nop
-	cmp	%o2, 16
-	blt,pt	%xcc, .Lmedw15
-	 nop
-	EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2)! move a block of 16 bytes
-	sllx	%o4, 32, %o5
-	subcc	%o2, 16, %o2		! decrement length count
-	EX_LD(LOAD(ld, %o1+4, %o4), memcpy_retl_o2_plus_16)
-	or	%o4, %o5, %o5
-	EX_ST(STORE(stx, %o5, %o0), memcpy_retl_o2_plus_16)
-	add	%o1, 16, %o1		! increase src ptr by 16
-	EX_LD(LOAD(ld, %o1-8, %o4), memcpy_retl_o2_plus_8)
-	add	%o0, 16, %o0		! increase dst ptr by 16
-	sllx	%o4, 32, %o5
-	EX_LD(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_8)
-	or	%o4, %o5, %o5
-	EX_ST(STORE(stx, %o5, %o0-8), memcpy_retl_o2_plus_8)
-.Lmedw15:
-	bz,pt	%xcc, .Lsmallx	! exit if finished
-	 cmp	%o2, 8
-	blt,pn	%xcc, .Lmedw7		! skip if 7 or fewer bytes left
-	 tst	%o2
-	EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2)	! load 4 bytes
-	subcc	%o2, 8, %o2		! decrease count by 8
-	EX_ST(STORE(stw, %o4, %o0), memcpy_retl_o2_plus_8)! and store 4 bytes
-	add	%o1, 8, %o1		! increase src ptr by 8
-	EX_LD(LOAD(ld, %o1-4, %o3), memcpy_retl_o2_plus_4)	! load 4 bytes
-	add	%o0, 8, %o0		! increase dst ptr by 8
-	EX_ST(STORE(stw, %o3, %o0-4), memcpy_retl_o2_plus_4)! and store 4 bytes
-	bz,pt	%xcc, .Lsmallx	! exit if finished
-.Lmedw7:				! count is ge 1, less than 8
-	cmp	%o2, 4			! check for 4 bytes left
-	blt,pn	%xcc, .Lsmallleft3	! skip if 3 or fewer bytes left
-	 nop				!
-	EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2)	! load 4 bytes
-	add	%o1, 4, %o1		! increase src ptr by 4
-	add	%o0, 4, %o0		! increase dst ptr by 4
-	subcc	%o2, 4, %o2		! decrease count by 4
-	bnz	.Lsmallleft3
-	 EX_ST(STORE(stw, %o4, %o0-4), memcpy_retl_o2_plus_4)! and store 4 bytes
-	retl
-	 mov	EX_RETVAL(%g1), %o0
-
-	.align 16
-.Llarge_align8_copy:			! Src and dst share 8 byte alignment
-	! align dst to 64 byte boundary
-	andcc	%o0, 0x3f, %o3		! %o3 == 0 means dst is 64 byte aligned
-	brz,pn	%o3, .Laligned_to_64
-	 andcc	%o0, 8, %o3		! odd long words to move?
-	brz,pt	%o3, .Laligned_to_16
-	 nop
-	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2)
-	sub	%o2, 8, %o2
-	add	%o1, 8, %o1		! increment src ptr
-	add	%o0, 8, %o0		! increment dst ptr
-	EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8)
-.Laligned_to_16:
-	andcc	%o0, 16, %o3		! pair of long words to move?
-	brz,pt	%o3, .Laligned_to_32
-	 nop
-	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2)
-	sub	%o2, 16, %o2
-	EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_16)
-	add	%o1, 16, %o1		! increment src ptr
-	EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_8)
-	add	%o0, 16, %o0		! increment dst ptr
-	EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8)
-.Laligned_to_32:
-	andcc	%o0, 32, %o3		! four long words to move?
-	brz,pt	%o3, .Laligned_to_64
-	 nop
-	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2)
-	sub	%o2, 32, %o2
-	EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_32)
-	EX_LD(LOAD(ldx, %o1+8, %o4), memcpy_retl_o2_plus_24)
-	EX_ST(STORE(stx, %o4, %o0+8), memcpy_retl_o2_plus_24)
-	EX_LD(LOAD(ldx, %o1+16, %o4), memcpy_retl_o2_plus_16)
-	EX_ST(STORE(stx, %o4, %o0+16), memcpy_retl_o2_plus_16)
-	add	%o1, 32, %o1		! increment src ptr
-	EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_8)
-	add	%o0, 32, %o0		! increment dst ptr
-	EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8)
-.Laligned_to_64:
-!
-!	Using block init store (BIS) instructions to avoid fetching cache
-!	lines from memory. Use ST_CHUNK stores to first element of each cache
-!	line (similar to prefetching) to avoid overfilling STQ or miss buffers.
-!	Gives existing cache lines time to be moved out of L1/L2/L3 cache.
-!	Initial stores using MRU version of BIS to keep cache line in
-!	cache until we are ready to store final element of cache line.
-!	Then store last element using the LRU version of BIS.
-!
-	andn	%o2, 0x3f, %o5		! %o5 is multiple of block size
-	and	%o2, 0x3f, %o2		! residue bytes in %o2
-!
-!	We use STORE_MRU_ASI for the first seven stores to each cache line
-!	followed by STORE_ASI (mark as LRU) for the last store. That
-!	mixed approach reduces the probability that the cache line is removed
-!	before we finish setting it, while minimizing the effects on
-!	other cached values during a large memcpy
-!
-!	ST_CHUNK batches up initial BIS operations for several cache lines
-!	to allow multiple requests to not be blocked by overflowing the
-!	the store miss buffer. Then the matching stores for all those
-!	BIS operations are executed.
-!
-
-	sub	%o0, 8, %o0		! adjust %o0 for ASI alignment
-.Lalign_loop:
-	cmp	%o5, ST_CHUNK*64
-	blu,pt	%xcc, .Lalign_loop_fin
-	 mov	ST_CHUNK,%o3
-.Lalign_loop_start:
-	prefetch [%o1 + (ALIGN_PRE * BLOCK_SIZE)], 21
-	subcc	%o3, 1, %o3
-	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_o5)
-	add	%o1, 64, %o1
-	add	%o0, 8, %o0
-	EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5)
-	bgu	%xcc,.Lalign_loop_start
-	 add	%o0, 56, %o0
-
-	mov	ST_CHUNK,%o3
-	sllx	%o3, 6, %o4		! ST_CHUNK*64
-	sub	%o1, %o4, %o1		! reset %o1
-	sub	%o0, %o4, %o0		! reset %o0
-
-.Lalign_loop_rest:
-	EX_LD(LOAD(ldx, %o1+8, %o4), memcpy_retl_o2_plus_o5)
-	add	%o0, 16, %o0
-	EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5)
-	EX_LD(LOAD(ldx, %o1+16, %o4), memcpy_retl_o2_plus_o5)
-	add	%o0, 8, %o0
-	EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5)
-	subcc	%o3, 1, %o3
-	EX_LD(LOAD(ldx, %o1+24, %o4), memcpy_retl_o2_plus_o5)
-	add	%o0, 8, %o0
-	EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5)
-	EX_LD(LOAD(ldx, %o1+32, %o4), memcpy_retl_o2_plus_o5)
-	add	%o0, 8, %o0
-	EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5)
-	EX_LD(LOAD(ldx, %o1+40, %o4), memcpy_retl_o2_plus_o5)
-	add	%o0, 8, %o0
-	EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5)
-	EX_LD(LOAD(ldx, %o1+48, %o4), memcpy_retl_o2_plus_o5)
-	add	%o1, 64, %o1
-	add	%o0, 8, %o0
-	EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5)
-	add	%o0, 8, %o0
-	EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_o5)
-	sub	%o5, 64, %o5
-	bgu	%xcc,.Lalign_loop_rest
-	! mark cache line as LRU
-	 EX_ST(STORE_INIT(%o4, %o0), memcpy_retl_o2_plus_o5_plus_64)
-
-	cmp	%o5, ST_CHUNK*64
-	bgu,pt	%xcc, .Lalign_loop_start
-	 mov	ST_CHUNK,%o3
-
-	cmp	%o5, 0
-	beq	.Lalign_done
-	 nop
-.Lalign_loop_fin:
-	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_o5)
-	EX_ST(STORE(stx, %o4, %o0+8), memcpy_retl_o2_plus_o5)
-	EX_LD(LOAD(ldx, %o1+8, %o4), memcpy_retl_o2_plus_o5)
-	EX_ST(STORE(stx, %o4, %o0+8+8), memcpy_retl_o2_plus_o5)
-	EX_LD(LOAD(ldx, %o1+16, %o4), memcpy_retl_o2_plus_o5)
-	EX_ST(STORE(stx, %o4, %o0+8+16), memcpy_retl_o2_plus_o5)
-	subcc	%o5, 64, %o5
-	EX_LD(LOAD(ldx, %o1+24, %o4), memcpy_retl_o2_plus_o5_64)
-	EX_ST(STORE(stx, %o4, %o0+8+24), memcpy_retl_o2_plus_o5_64)
-	EX_LD(LOAD(ldx, %o1+32, %o4), memcpy_retl_o2_plus_o5_64)
-	EX_ST(STORE(stx, %o4, %o0+8+32), memcpy_retl_o2_plus_o5_64)
-	EX_LD(LOAD(ldx, %o1+40, %o4), memcpy_retl_o2_plus_o5_64)
-	EX_ST(STORE(stx, %o4, %o0+8+40), memcpy_retl_o2_plus_o5_64)
-	EX_LD(LOAD(ldx, %o1+48, %o4), memcpy_retl_o2_plus_o5_64)
-	add	%o1, 64, %o1
-	EX_ST(STORE(stx, %o4, %o0+8+48), memcpy_retl_o2_plus_o5_64)
-	add	%o0, 64, %o0
-	EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_o5_64)
-	bgu	%xcc,.Lalign_loop_fin
-	 EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_o5_64)
-
-.Lalign_done:
-	add	%o0, 8, %o0		! restore %o0 from ASI alignment
-	membar	#StoreStore
-	sub	%o2, 63, %o2		! adjust length to allow cc test
-	ba	.Lmedl63		! in .Lmedl63
-	 nop
-
-	.align 16
-	! Dst is on 8 byte boundary; src is not; remaining count > SMALL_MAX
-.Lunalignsetup:
-.Lunalignrejoin:
-	mov	%g1, %o3	! save %g1 as VISEntryHalf clobbers it
-#ifdef NON_USER_COPY
-	VISEntryHalfFast(.Lmedium_vis_entry_fail_cp)
-#else
-	VISEntryHalf
-#endif
-	mov	%o3, %g1	! restore %g1
-
-	set MED_UMAX, %o3
-	cmp %o2, %o3 		! check for.Lmedium unaligned limit
-	bge,pt	%xcc,.Lunalign_large
-	 prefetch [%o1 + (4 * BLOCK_SIZE)], 20
-	andn	%o2, 0x3f, %o5		! %o5 is multiple of block size
-	and	%o2, 0x3f, %o2		! residue bytes in %o2
-	cmp	%o2, 8			! Insure we do not load beyond
-	bgt	.Lunalign_adjust	! end of source buffer
-	 andn	%o1, 0x7, %o4		! %o4 has long word aligned src address
-	add	%o2, 64, %o2		! adjust to leave loop
-	sub	%o5, 64, %o5		! early if necessary
-.Lunalign_adjust:
-	alignaddr %o1, %g0, %g0		! generate %gsr
-	add	%o1, %o5, %o1		! advance %o1 to after blocks
-	EX_LD_FP(LOAD(ldd, %o4, %f0), memcpy_retl_o2_plus_o5)
-.Lunalign_loop:
-	EX_LD_FP(LOAD(ldd, %o4+8, %f2), memcpy_retl_o2_plus_o5)
-	faligndata %f0, %f2, %f16
-	EX_LD_FP(LOAD(ldd, %o4+16, %f4), memcpy_retl_o2_plus_o5)
-	subcc	%o5, BLOCK_SIZE, %o5
-	EX_ST_FP(STORE(std, %f16, %o0), memcpy_retl_o2_plus_o5_plus_64)
-	faligndata %f2, %f4, %f18
-	EX_LD_FP(LOAD(ldd, %o4+24, %f6), memcpy_retl_o2_plus_o5_plus_56)
-	EX_ST_FP(STORE(std, %f18, %o0+8), memcpy_retl_o2_plus_o5_plus_56)
-	faligndata %f4, %f6, %f20
-	EX_LD_FP(LOAD(ldd, %o4+32, %f8), memcpy_retl_o2_plus_o5_plus_48)
-	EX_ST_FP(STORE(std, %f20, %o0+16), memcpy_retl_o2_plus_o5_plus_48)
-	faligndata %f6, %f8, %f22
-	EX_LD_FP(LOAD(ldd, %o4+40, %f10), memcpy_retl_o2_plus_o5_plus_40)
-	EX_ST_FP(STORE(std, %f22, %o0+24), memcpy_retl_o2_plus_o5_plus_40)
-	faligndata %f8, %f10, %f24
-	EX_LD_FP(LOAD(ldd, %o4+48, %f12), memcpy_retl_o2_plus_o5_plus_32)
-	EX_ST_FP(STORE(std, %f24, %o0+32), memcpy_retl_o2_plus_o5_plus_32)
-	faligndata %f10, %f12, %f26
-	EX_LD_FP(LOAD(ldd, %o4+56, %f14), memcpy_retl_o2_plus_o5_plus_24)
-	add	%o4, BLOCK_SIZE, %o4
-	EX_ST_FP(STORE(std, %f26, %o0+40), memcpy_retl_o2_plus_o5_plus_24)
-	faligndata %f12, %f14, %f28
-	EX_LD_FP(LOAD(ldd, %o4, %f0), memcpy_retl_o2_plus_o5_plus_16)
-	EX_ST_FP(STORE(std, %f28, %o0+48), memcpy_retl_o2_plus_o5_plus_16)
-	faligndata %f14, %f0, %f30
-	EX_ST_FP(STORE(std, %f30, %o0+56), memcpy_retl_o2_plus_o5_plus_8)
-	add	%o0, BLOCK_SIZE, %o0
-	bgu,pt	%xcc, .Lunalign_loop
-	 prefetch [%o4 + (5 * BLOCK_SIZE)], 20
-	ba	.Lunalign_done
-	 nop
-
-.Lunalign_large:
-	andcc	%o0, 0x3f, %o3		! is dst 64-byte block aligned?
-	bz	%xcc, .Lunalignsrc
-	 sub	%o3, 64, %o3		! %o3 will be multiple of 8
-	neg	%o3			! bytes until dest is 64 byte aligned
-	sub	%o2, %o3, %o2		! update cnt with bytes to be moved
-	! Move bytes according to source alignment
-	andcc	%o1, 0x1, %o5
-	bnz	%xcc, .Lunalignbyte	! check for byte alignment
-	 nop
-	andcc	%o1, 2, %o5		! check for half word alignment
-	bnz	%xcc, .Lunalignhalf
-	 nop
-	! Src is word aligned
-.Lunalignword:
-	EX_LD_FP(LOAD(ld, %o1, %o4), memcpy_retl_o2_plus_o3)	! load 4 bytes
-	add	%o1, 8, %o1		! increase src ptr by 8
-	EX_ST_FP(STORE(stw, %o4, %o0), memcpy_retl_o2_plus_o3)	! and store 4
-	subcc	%o3, 8, %o3		! decrease count by 8
-	EX_LD_FP(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_o3_plus_4)! load 4
-	add	%o0, 8, %o0		! increase dst ptr by 8
-	bnz	%xcc, .Lunalignword
-	 EX_ST_FP(STORE(stw, %o4, %o0-4), memcpy_retl_o2_plus_o3_plus_4)
-	ba	.Lunalignsrc
-	 nop
-
-	! Src is half-word aligned
-.Lunalignhalf:
-	EX_LD_FP(LOAD(lduh, %o1, %o4), memcpy_retl_o2_plus_o3)	! load 2 bytes
-	sllx	%o4, 32, %o5		! shift left
-	EX_LD_FP(LOAD(lduw, %o1+2, %o4), memcpy_retl_o2_plus_o3)
-	or	%o4, %o5, %o5
-	sllx	%o5, 16, %o5
-	EX_LD_FP(LOAD(lduh, %o1+6, %o4), memcpy_retl_o2_plus_o3)
-	or	%o4, %o5, %o5
-	EX_ST_FP(STORE(stx, %o5, %o0), memcpy_retl_o2_plus_o3)
-	add	%o1, 8, %o1
-	subcc	%o3, 8, %o3
-	bnz	%xcc, .Lunalignhalf
-	 add	%o0, 8, %o0
-	ba	.Lunalignsrc
-	 nop
-
-	! Src is Byte aligned
-.Lunalignbyte:
-	sub	%o0, %o1, %o0		! share pointer advance
-.Lunalignbyte_loop:
-	EX_LD_FP(LOAD(ldub, %o1, %o4), memcpy_retl_o2_plus_o3)
-	sllx	%o4, 56, %o5
-	EX_LD_FP(LOAD(lduh, %o1+1, %o4), memcpy_retl_o2_plus_o3)
-	sllx	%o4, 40, %o4
-	or	%o4, %o5, %o5
-	EX_LD_FP(LOAD(lduh, %o1+3, %o4), memcpy_retl_o2_plus_o3)
-	sllx	%o4, 24, %o4
-	or	%o4, %o5, %o5
-	EX_LD_FP(LOAD(lduh, %o1+5, %o4), memcpy_retl_o2_plus_o3)
-	sllx	%o4,  8, %o4
-	or	%o4, %o5, %o5
-	EX_LD_FP(LOAD(ldub, %o1+7, %o4), memcpy_retl_o2_plus_o3)
-	or	%o4, %o5, %o5
-	add	%o0, %o1, %o0
-	EX_ST_FP(STORE(stx, %o5, %o0), memcpy_retl_o2_plus_o3)
-	sub	%o0, %o1, %o0
-	subcc	%o3, 8, %o3
-	bnz	%xcc, .Lunalignbyte_loop
-	 add	%o1, 8, %o1
-	add	%o0,%o1, %o0 		! restore pointer
-
-	! Destination is now block (64 byte aligned)
-.Lunalignsrc:
-	andn	%o2, 0x3f, %o5		! %o5 is multiple of block size
-	and	%o2, 0x3f, %o2		! residue bytes in %o2
-	add	%o2, 64, %o2		! Insure we do not load beyond
-	sub	%o5, 64, %o5		! end of source buffer
-
-	andn	%o1, 0x7, %o4		! %o4 has long word aligned src address
-	alignaddr %o1, %g0, %g0		! generate %gsr
-	add	%o1, %o5, %o1		! advance %o1 to after blocks
-
-	EX_LD_FP(LOAD(ldd, %o4, %f14), memcpy_retl_o2_plus_o5)
-	add	%o4, 8, %o4
-.Lunalign_sloop:
-	EX_LD_FP(LOAD(ldd, %o4, %f16), memcpy_retl_o2_plus_o5)
-	faligndata %f14, %f16, %f0
-	EX_LD_FP(LOAD(ldd, %o4+8, %f18), memcpy_retl_o2_plus_o5)
-	faligndata %f16, %f18, %f2
-	EX_LD_FP(LOAD(ldd, %o4+16, %f20), memcpy_retl_o2_plus_o5)
-	faligndata %f18, %f20, %f4
-	EX_ST_FP(STORE(std, %f0, %o0), memcpy_retl_o2_plus_o5)
-	subcc	%o5, 64, %o5
-	EX_LD_FP(LOAD(ldd, %o4+24, %f22), memcpy_retl_o2_plus_o5_plus_56)
-	faligndata %f20, %f22, %f6
-	EX_ST_FP(STORE(std, %f2, %o0+8), memcpy_retl_o2_plus_o5_plus_56)
-	EX_LD_FP(LOAD(ldd, %o4+32, %f24), memcpy_retl_o2_plus_o5_plus_48)
-	faligndata %f22, %f24, %f8
-	EX_ST_FP(STORE(std, %f4, %o0+16), memcpy_retl_o2_plus_o5_plus_48)
-	EX_LD_FP(LOAD(ldd, %o4+40, %f26), memcpy_retl_o2_plus_o5_plus_40)
-	faligndata %f24, %f26, %f10
-	EX_ST_FP(STORE(std, %f6, %o0+24), memcpy_retl_o2_plus_o5_plus_40)
-	EX_LD_FP(LOAD(ldd, %o4+48, %f28), memcpy_retl_o2_plus_o5_plus_40)
-	faligndata %f26, %f28, %f12
-	EX_ST_FP(STORE(std, %f8, %o0+32), memcpy_retl_o2_plus_o5_plus_40)
-	add	%o4, 64, %o4
-	EX_LD_FP(LOAD(ldd, %o4-8, %f30), memcpy_retl_o2_plus_o5_plus_40)
-	faligndata %f28, %f30, %f14
-	EX_ST_FP(STORE(std, %f10, %o0+40), memcpy_retl_o2_plus_o5_plus_40)
-	EX_ST_FP(STORE(std, %f12, %o0+48), memcpy_retl_o2_plus_o5_plus_40)
-	add	%o0, 64, %o0
-	EX_ST_FP(STORE(std, %f14, %o0-8), memcpy_retl_o2_plus_o5_plus_40)
-	fsrc2	%f30, %f14
-	bgu,pt	%xcc, .Lunalign_sloop
-	 prefetch [%o4 + (8 * BLOCK_SIZE)], 20
-
-.Lunalign_done:
-	! Handle trailing bytes, 64 to 127
-	! Dest long word aligned, Src not long word aligned
-	cmp	%o2, 15
-	bleu	%xcc, .Lunalign_short
-
-	 andn	%o2, 0x7, %o5		! %o5 is multiple of 8
-	and	%o2, 0x7, %o2		! residue bytes in %o2
-	add	%o2, 8, %o2
-	sub	%o5, 8, %o5		! insure we do not load past end of src
-	andn	%o1, 0x7, %o4		! %o4 has long word aligned src address
-	add	%o1, %o5, %o1		! advance %o1 to after multiple of 8
-	EX_LD_FP(LOAD(ldd, %o4, %f0), memcpy_retl_o2_plus_o5)! fetch partialword
-.Lunalign_by8:
-	EX_LD_FP(LOAD(ldd, %o4+8, %f2), memcpy_retl_o2_plus_o5)
-	add	%o4, 8, %o4
-	faligndata %f0, %f2, %f16
-	subcc	%o5, 8, %o5
-	EX_ST_FP(STORE(std, %f16, %o0), memcpy_retl_o2_plus_o5)
-	fsrc2	%f2, %f0
-	bgu,pt	%xcc, .Lunalign_by8
-	 add	%o0, 8, %o0
-
-.Lunalign_short:
-#ifdef NON_USER_COPY
-	VISExitHalfFast
-#else
-	VISExitHalf
-#endif
-	ba	.Lsmallrest
-	 nop
-
-/*
- * This is a special case of nested memcpy. This can happen when kernel
- * calls unaligned memcpy back to back without saving FP registers. We need
- * traps(context switch) to save/restore FP registers. If the kernel calls
- * memcpy without this trap sequence we will hit FP corruption. Let's use
- * the normal integer load/store method in this case.
- */
-
-#ifdef NON_USER_COPY
-.Lmedium_vis_entry_fail_cp:
-	or	%o0, %o1, %g2
-#endif
-.Lmedium_cp:
-	LOAD(prefetch, %o1 + 0x40, #n_reads_strong)
-	andcc	%g2, 0x7, %g0
-	bne,pn	%xcc, .Lmedium_unaligned_cp
-	 nop
-
-.Lmedium_noprefetch_cp:
-	andncc	%o2, 0x20 - 1, %o5
-	be,pn	%xcc, 2f
-	 sub	%o2, %o5, %o2
-1:	EX_LD(LOAD(ldx, %o1 + 0x00, %o3), memcpy_retl_o2_plus_o5)
-	EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memcpy_retl_o2_plus_o5)
-	EX_LD(LOAD(ldx, %o1 + 0x10, %g7), memcpy_retl_o2_plus_o5)
-	EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memcpy_retl_o2_plus_o5)
-	add	%o1, 0x20, %o1
-	subcc	%o5, 0x20, %o5
-	EX_ST(STORE(stx, %o3, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_32)
-	EX_ST(STORE(stx, %g2, %o0 + 0x08), memcpy_retl_o2_plus_o5_plus_24)
-	EX_ST(STORE(stx, %g7, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_24)
-	EX_ST(STORE(stx, %o4, %o0 + 0x18), memcpy_retl_o2_plus_o5_plus_8)
-	bne,pt	%xcc, 1b
-	 add	%o0, 0x20, %o0
-2:	andcc	%o2, 0x18, %o5
-	be,pt	%xcc, 3f
-	 sub	%o2, %o5, %o2
-1:	EX_LD(LOAD(ldx, %o1 + 0x00, %o3), memcpy_retl_o2_plus_o5)
-	add	%o1, 0x08, %o1
-	add	%o0, 0x08, %o0
-	subcc	%o5, 0x08, %o5
-	bne,pt	%xcc, 1b
-	 EX_ST(STORE(stx, %o3, %o0 - 0x08), memcpy_retl_o2_plus_o5_plus_8)
-3:	brz,pt	%o2, .Lexit_cp
-	 cmp	%o2, 0x04
-	bl,pn	%xcc, .Ltiny_cp
-	 nop
-	EX_LD(LOAD(lduw, %o1 + 0x00, %o3), memcpy_retl_o2)
-	add	%o1, 0x04, %o1
-	add	%o0, 0x04, %o0
-	subcc	%o2, 0x04, %o2
-	bne,pn	%xcc, .Ltiny_cp
-	 EX_ST(STORE(stw, %o3, %o0 - 0x04), memcpy_retl_o2_plus_4)
-	ba,a,pt	%xcc, .Lexit_cp
-
-.Lmedium_unaligned_cp:
-	/* First get dest 8 byte aligned.  */
-	sub	%g0, %o0, %o3
-	and	%o3, 0x7, %o3
-	brz,pt	%o3, 2f
-	 sub	%o2, %o3, %o2
-
-1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1)
-	add	%o1, 1, %o1
-	subcc	%o3, 1, %o3
-	add	%o0, 1, %o0
-	bne,pt	%xcc, 1b
-	 EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1)
-2:
-	and	%o1, 0x7, %o3
-	brz,pn	%o3, .Lmedium_noprefetch_cp
-	 sll	%o3, 3, %o3
-	mov	64, %g2
-	sub	%g2, %o3, %g2
-	andn	%o1, 0x7, %o1
-	EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memcpy_retl_o2)
-	sllx	%o4, %o3, %o4
-	andn	%o2, 0x08 - 1, %o5
-	sub	%o2, %o5, %o2
-
-1:	EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memcpy_retl_o2_plus_o5)
-	add	%o1, 0x08, %o1
-	subcc	%o5, 0x08, %o5
-	srlx	%g3, %g2, %g7
-	or	%g7, %o4, %g7
-	EX_ST(STORE(stx, %g7, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_8)
-	add	%o0, 0x08, %o0
-	bne,pt	%xcc, 1b
-	 sllx	%g3, %o3, %o4
-	srl	%o3, 3, %o3
-	add	%o1, %o3, %o1
-	brz,pn	%o2, .Lexit_cp
-	 nop
-	ba,pt	%xcc, .Lsmall_unaligned_cp
-
-.Ltiny_cp:
-	EX_LD(LOAD(ldub, %o1 + 0x00, %o3), memcpy_retl_o2)
-	subcc	%o2, 1, %o2
-	be,pn	%xcc, .Lexit_cp
-	 EX_ST(STORE(stb, %o3, %o0 + 0x00), memcpy_retl_o2_plus_1)
-	EX_LD(LOAD(ldub, %o1 + 0x01, %o3), memcpy_retl_o2)
-	subcc	%o2, 1, %o2
-	be,pn	%xcc, .Lexit_cp
-	 EX_ST(STORE(stb, %o3, %o0 + 0x01), memcpy_retl_o2_plus_1)
-	EX_LD(LOAD(ldub, %o1 + 0x02, %o3), memcpy_retl_o2)
-	ba,pt	%xcc, .Lexit_cp
-	 EX_ST(STORE(stb, %o3, %o0 + 0x02), memcpy_retl_o2)
-
-.Lsmall_cp:
-	andcc	%g2, 0x3, %g0
-	bne,pn	%xcc, .Lsmall_unaligned_cp
-	 andn	%o2, 0x4 - 1, %o5
-	sub	%o2, %o5, %o2
-1:
-	EX_LD(LOAD(lduw, %o1 + 0x00, %o3), memcpy_retl_o2_plus_o5)
-	add	%o1, 0x04, %o1
-	subcc	%o5, 0x04, %o5
-	add	%o0, 0x04, %o0
-	bne,pt	%xcc, 1b
-	 EX_ST(STORE(stw, %o3, %o0 - 0x04), memcpy_retl_o2_plus_o5_plus_4)
-	brz,pt	%o2, .Lexit_cp
-	 nop
-	ba,a,pt	%xcc, .Ltiny_cp
-
-.Lsmall_unaligned_cp:
-1:	EX_LD(LOAD(ldub, %o1 + 0x00, %o3), memcpy_retl_o2)
-	add	%o1, 1, %o1
-	add	%o0, 1, %o0
-	subcc	%o2, 1, %o2
-	bne,pt	%xcc, 1b
-	 EX_ST(STORE(stb, %o3, %o0 - 0x01), memcpy_retl_o2_plus_1)
-	ba,a,pt	%xcc, .Lexit_cp
-
-.Lsmallrest:
-	tst	%o2
-	bz,pt	%xcc, .Lsmallx
-	 cmp	%o2, 4
-	blt,pn	%xcc, .Lsmallleft3
-	 nop
-	sub	%o2, 3, %o2
-.Lsmallnotalign4:
-	EX_LD(LOAD(ldub, %o1, %o3), memcpy_retl_o2_plus_3)! read byte
-	subcc	%o2, 4, %o2		! reduce count by 4
-	EX_ST(STORE(stb, %o3, %o0), memcpy_retl_o2_plus_7)! write byte & repeat
-	EX_LD(LOAD(ldub, %o1+1, %o3), memcpy_retl_o2_plus_6)! for total of 4
-	add	%o1, 4, %o1		! advance SRC by 4
-	EX_ST(STORE(stb, %o3, %o0+1), memcpy_retl_o2_plus_6)
-	EX_LD(LOAD(ldub, %o1-2, %o3), memcpy_retl_o2_plus_5)
-	add	%o0, 4, %o0		! advance DST by 4
-	EX_ST(STORE(stb, %o3, %o0-2), memcpy_retl_o2_plus_5)
-	EX_LD(LOAD(ldub, %o1-1, %o3), memcpy_retl_o2_plus_4)
-	bgu,pt	%xcc, .Lsmallnotalign4	! loop til 3 or fewer bytes remain
-	EX_ST(STORE(stb, %o3, %o0-1), memcpy_retl_o2_plus_4)
-	addcc	%o2, 3, %o2		! restore count
-	bz,pt	%xcc, .Lsmallx
-.Lsmallleft3:				! 1, 2, or 3 bytes remain
-	subcc	%o2, 1, %o2
-	EX_LD(LOAD(ldub, %o1, %o3), memcpy_retl_o2_plus_1)	! load one byte
-	bz,pt	%xcc, .Lsmallx
-	EX_ST(STORE(stb, %o3, %o0), memcpy_retl_o2_plus_1)	! store one byte
-	EX_LD(LOAD(ldub, %o1+1, %o3), memcpy_retl_o2)	! load second byte
-	subcc	%o2, 1, %o2
-	bz,pt	%xcc, .Lsmallx
-	EX_ST(STORE(stb, %o3, %o0+1), memcpy_retl_o2_plus_1)! store second byte
-	EX_LD(LOAD(ldub, %o1+2, %o3), memcpy_retl_o2)	! load third byte
-	EX_ST(STORE(stb, %o3, %o0+2), memcpy_retl_o2)	! store third byte
-.Lsmallx:
-	retl
-	 mov	EX_RETVAL(%g1), %o0
-.Lsmallfin:
-	tst	%o2
-	bnz,pn	%xcc, .Lsmallleft3
-	 nop
-	retl
-	 mov	EX_RETVAL(%g1), %o0	! restore %o0
-.Lexit_cp:
-	retl
-	 mov	EX_RETVAL(%g1), %o0
-	.size  FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/M7memset.S b/arch/sparc/lib/M7memset.S
deleted file mode 100644
index 62ea91b3a6b8360070b27b2f33e19d6f102048d8..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/M7memset.S
+++ /dev/null
@@ -1,352 +0,0 @@
-/*
- * M7memset.S: SPARC M7 optimized memset.
- *
- * Copyright (c) 2016, Oracle and/or its affiliates.  All rights reserved.
- */
-
-/*
- * M7memset.S: M7 optimized memset.
- *
- * char *memset(sp, c, n)
- *
- * Set an array of n chars starting at sp to the character c.
- * Return sp.
- *
- * Fast assembler language version of the following C-program for memset
- * which represents the `standard' for the C-library.
- *
- *	void *
- *	memset(void *sp1, int c, size_t n)
- *	{
- *	    if (n != 0) {
- *		char *sp = sp1;
- *		do {
- *		    *sp++ = (char)c;
- *		} while (--n != 0);
- *	    }
- *	    return (sp1);
- *	}
- *
- * The algorithm is as follows :
- *
- *	For small 6 or fewer bytes stores, bytes will be stored.
- *
- *	For less than 32 bytes stores, align the address on 4 byte boundary.
- *	Then store as many 4-byte chunks, followed by trailing bytes.
- *
- *	For sizes greater than 32 bytes, align the address on 8 byte boundary.
- *	if (count >= 64) {
- *      	store 8-bytes chunks to align the address on 64 byte boundary
- *		if (value to be set is zero && count >= MIN_ZERO) {
- *              	Using BIS stores, set the first long word of each
- *			64-byte cache line to zero which will also clear the
- *			other seven long words of the cache line.
- *       	}
- *       	else if (count >= MIN_LOOP) {
- *       		Using BIS stores, set the first long word of each of
- *              	ST_CHUNK cache lines (64 bytes each) before the main
- *			loop is entered.
- *              	In the main loop, continue pre-setting the first long
- *              	word of each cache line ST_CHUNK lines in advance while
- *              	setting the other seven long words (56 bytes) of each
- * 			cache line until fewer than ST_CHUNK*64 bytes remain.
- *			Then set the remaining seven long words of each cache
- * 			line that has already had its first long word set.
- *       	}
- *       	store remaining data in 64-byte chunks until less than
- *       	64 bytes remain.
- *       }
- *       Store as many 8-byte chunks, followed by trailing bytes.
- *
- * BIS = Block Init Store
- *   Doing the advance store of the first element of the cache line
- *   initiates the displacement of a cache line while only using a single
- *   instruction in the pipeline. That avoids various pipeline delays,
- *   such as filling the miss buffer. The performance effect is
- *   similar to prefetching for normal stores.
- *   The special case for zero fills runs faster and uses fewer instruction
- *   cycles than the normal memset loop.
- *
- * We only use BIS for memset of greater than MIN_LOOP bytes because a sequence
- * BIS stores must be followed by a membar #StoreStore. The benefit of
- * the BIS store must be balanced against the cost of the membar operation.
- */
-
-/*
- * ASI_STBI_P marks the cache line as "least recently used"
- * which means if many threads are active, it has a high chance
- * of being pushed out of the cache between the first initializing
- * store and the final stores.
- * Thus, we use ASI_STBIMRU_P which marks the cache line as
- * "most recently used" for all but the last store to the cache line.
- */
-
-#include <asm/asi.h>
-#include <asm/page.h>
-
-#define ASI_STBI_P      ASI_BLK_INIT_QUAD_LDD_P
-#define ASI_STBIMRU_P   ASI_ST_BLKINIT_MRU_P
-
-
-#define ST_CHUNK        24   /* multiple of 4 due to loop unrolling */
-#define MIN_LOOP        16320
-#define MIN_ZERO        512
-
-	.section	".text"
-	.align		32
-
-/*
- * Define clear_page(dest) as memset(dest, 0, PAGE_SIZE)
- * (can create a more optimized version later.)
- */
-	.globl		M7clear_page
-	.globl		M7clear_user_page
-M7clear_page:		/* clear_page(dest) */
-M7clear_user_page:
-	set	PAGE_SIZE, %o1
-	/* fall through into bzero code */
-
-	.size		M7clear_page,.-M7clear_page
-	.size		M7clear_user_page,.-M7clear_user_page
-
-/*
- * Define bzero(dest, n) as memset(dest, 0, n)
- * (can create a more optimized version later.)
- */
-	.globl		M7bzero
-M7bzero:		/* bzero(dest, size) */
-	mov	%o1, %o2
-	mov	0, %o1
-	/* fall through into memset code */
-
-	.size		M7bzero,.-M7bzero
-
-	.global		M7memset
-	.type		M7memset, #function
-	.register	%g3, #scratch
-M7memset:
-	mov     %o0, %o5                ! copy sp1 before using it
-	cmp     %o2, 7                  ! if small counts, just write bytes
-	bleu,pn %xcc, .wrchar
-	 and     %o1, 0xff, %o1          ! o1 is (char)c
-
-	sll     %o1, 8, %o3
-	or      %o1, %o3, %o1           ! now o1 has 2 bytes of c
-	sll     %o1, 16, %o3
-	cmp     %o2, 32
-	blu,pn  %xcc, .wdalign
-	 or      %o1, %o3, %o1           ! now o1 has 4 bytes of c
-
-	sllx    %o1, 32, %o3
-	or      %o1, %o3, %o1           ! now o1 has 8 bytes of c
-
-.dbalign:
-	andcc   %o5, 7, %o3             ! is sp1 aligned on a 8 byte bound?
-	bz,pt   %xcc, .blkalign         ! already long word aligned
-	 sub     %o3, 8, %o3             ! -(bytes till long word aligned)
-
-	add     %o2, %o3, %o2           ! update o2 with new count
-	! Set -(%o3) bytes till sp1 long word aligned
-1:	stb     %o1, [%o5]              ! there is at least 1 byte to set
-	inccc   %o3                     ! byte clearing loop
-	bl,pt   %xcc, 1b
-	 inc     %o5
-
-	! Now sp1 is long word aligned (sp1 is found in %o5)
-.blkalign:
-	cmp     %o2, 64                 ! check if there are 64 bytes to set
-	blu,pn  %xcc, .wrshort
-	 mov     %o2, %o3
-
-	andcc   %o5, 63, %o3            ! is sp1 block aligned?
-	bz,pt   %xcc, .blkwr            ! now block aligned
-	 sub     %o3, 64, %o3            ! o3 is -(bytes till block aligned)
-	add     %o2, %o3, %o2           ! o2 is the remainder
-
-	! Store -(%o3) bytes till dst is block (64 byte) aligned.
-	! Use long word stores.
-	! Recall that dst is already long word aligned
-1:
-	addcc   %o3, 8, %o3
-	stx     %o1, [%o5]
-	bl,pt   %xcc, 1b
-	 add     %o5, 8, %o5
-
-	! Now sp1 is block aligned
-.blkwr:
-	andn    %o2, 63, %o4            ! calculate size of blocks in bytes
-	brz,pn  %o1, .wrzero            ! special case if c == 0
-	 and     %o2, 63, %o3            ! %o3 = bytes left after blk stores.
-
-	set     MIN_LOOP, %g1
-	cmp     %o4, %g1                ! check there are enough bytes to set
-	blu,pn  %xcc, .short_set        ! to justify cost of membar
-	                                ! must be > pre-cleared lines
-	 nop
-
-	! initial cache-clearing stores
-	! get store pipeline moving
-	rd	%asi, %g3		! save %asi to be restored later
-	wr     %g0, ASI_STBIMRU_P, %asi
-
-	! Primary memset loop for large memsets
-.wr_loop:
-	sub     %o5, 8, %o5		! adjust %o5 for ASI store alignment
-	mov     ST_CHUNK, %g1
-.wr_loop_start:
-	stxa    %o1, [%o5+8]%asi
-	subcc   %g1, 4, %g1
-	stxa    %o1, [%o5+8+64]%asi
-	add     %o5, 256, %o5
-	stxa    %o1, [%o5+8-128]%asi
-	bgu     %xcc, .wr_loop_start
-	 stxa    %o1, [%o5+8-64]%asi
-
-	sub     %o5, ST_CHUNK*64, %o5	! reset %o5
-	mov     ST_CHUNK, %g1
-
-.wr_loop_rest:
-	stxa    %o1, [%o5+8+8]%asi
-	sub     %o4, 64, %o4
-	stxa    %o1, [%o5+16+8]%asi
-	subcc   %g1, 1, %g1
-	stxa    %o1, [%o5+24+8]%asi
-	stxa    %o1, [%o5+32+8]%asi
-	stxa    %o1, [%o5+40+8]%asi
-	add     %o5, 64, %o5
-	stxa    %o1, [%o5-8]%asi
-	bgu     %xcc, .wr_loop_rest
-	 stxa    %o1, [%o5]ASI_STBI_P
-
-	! If more than ST_CHUNK*64 bytes remain to set, continue
-	! setting the first long word of each cache line in advance
-	! to keep the store pipeline moving.
-
-	cmp     %o4, ST_CHUNK*64
-	bge,pt  %xcc, .wr_loop_start
-	 mov     ST_CHUNK, %g1
-
-	brz,a,pn %o4, .asi_done
-	 add     %o5, 8, %o5             ! restore %o5 offset
-
-.wr_loop_small:
-	stxa    %o1, [%o5+8]%asi
-	stxa    %o1, [%o5+8+8]%asi
-	stxa    %o1, [%o5+16+8]%asi
-	stxa    %o1, [%o5+24+8]%asi
-	stxa    %o1, [%o5+32+8]%asi
-	subcc   %o4, 64, %o4
-	stxa    %o1, [%o5+40+8]%asi
-	add     %o5, 64, %o5
-	stxa    %o1, [%o5-8]%asi
-	bgu,pt  %xcc, .wr_loop_small
-	 stxa    %o1, [%o5]ASI_STBI_P
-
-	ba      .asi_done
-	 add     %o5, 8, %o5             ! restore %o5 offset
-
-	! Special case loop for zero fill memsets
-	! For each 64 byte cache line, single STBI to first element
-	! clears line
-.wrzero:
-	cmp     %o4, MIN_ZERO           ! check if enough bytes to set
-					! to pay %asi + membar cost
-	blu     %xcc, .short_set
-	 nop
-	sub     %o4, 256, %o4
-
-.wrzero_loop:
-	mov     64, %g3
-	stxa    %o1, [%o5]ASI_STBI_P
-	subcc   %o4, 256, %o4
-	stxa    %o1, [%o5+%g3]ASI_STBI_P
-	add     %o5, 256, %o5
-	sub     %g3, 192, %g3
-	stxa    %o1, [%o5+%g3]ASI_STBI_P
-	add %g3, 64, %g3
-	bge,pt  %xcc, .wrzero_loop
-	 stxa    %o1, [%o5+%g3]ASI_STBI_P
-	add     %o4, 256, %o4
-
-	brz,pn  %o4, .bsi_done
-	 nop
-
-.wrzero_small:
-	stxa    %o1, [%o5]ASI_STBI_P
-	subcc   %o4, 64, %o4
-	bgu,pt  %xcc, .wrzero_small
-	 add     %o5, 64, %o5
-	ba,a	.bsi_done
-
-.asi_done:
-	wr	%g3, 0x0, %asi		! restored saved %asi
-.bsi_done:
-	membar  #StoreStore             ! required by use of Block Store Init
-
-.short_set:
-	cmp     %o4, 64                 ! check if 64 bytes to set
-	blu     %xcc, 5f
-	 nop
-4:                                      ! set final blocks of 64 bytes
-	stx     %o1, [%o5]
-	stx     %o1, [%o5+8]
-	stx     %o1, [%o5+16]
-	stx     %o1, [%o5+24]
-	subcc   %o4, 64, %o4
-	stx     %o1, [%o5+32]
-	stx     %o1, [%o5+40]
-	add     %o5, 64, %o5
-	stx     %o1, [%o5-16]
-	bgu,pt  %xcc, 4b
-	 stx     %o1, [%o5-8]
-
-5:
-	! Set the remaining long words
-.wrshort:
-	subcc   %o3, 8, %o3             ! Can we store any long words?
-	blu,pn  %xcc, .wrchars
-	 and     %o2, 7, %o2             ! calc bytes left after long words
-6:
-	subcc   %o3, 8, %o3
-	stx     %o1, [%o5]              ! store the long words
-	bgeu,pt %xcc, 6b
-	 add     %o5, 8, %o5
-
-.wrchars:                               ! check for extra chars
-	brnz    %o2, .wrfin
-	 nop
-	retl
-	 nop
-
-.wdalign:
-	andcc   %o5, 3, %o3             ! is sp1 aligned on a word boundary
-	bz,pn   %xcc, .wrword
-	 andn    %o2, 3, %o3             ! create word sized count in %o3
-
-	dec     %o2                     ! decrement count
-	stb     %o1, [%o5]              ! clear a byte
-	b       .wdalign
-	 inc     %o5                     ! next byte
-
-.wrword:
-	subcc   %o3, 4, %o3
-	st      %o1, [%o5]              ! 4-byte writing loop
-	bnz,pt  %xcc, .wrword
-	 add     %o5, 4, %o5
-
-	and     %o2, 3, %o2             ! leftover count, if any
-
-.wrchar:
-	! Set the remaining bytes, if any
-	brz     %o2, .exit
-	 nop
-.wrfin:
-	deccc   %o2
-	stb     %o1, [%o5]
-	bgu,pt  %xcc, .wrfin
-	 inc     %o5
-.exit:
-	retl                            ! %o0 was preserved
-	 nop
-
-	.size		M7memset,.-M7memset
diff --git a/arch/sparc/lib/M7patch.S b/arch/sparc/lib/M7patch.S
deleted file mode 100644
index 9000b7bc5f2bf863be4348dbe279e38d37a9fd52..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/M7patch.S
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * M7patch.S: Patch generic routines with M7 variant.
- *
- * Copyright (c) 2016, Oracle and/or its affiliates.  All rights reserved.
- */
-
-#include <linux/linkage.h>
-
-#define BRANCH_ALWAYS	0x10680000
-#define NOP		0x01000000
-#define NG_DO_PATCH(OLD, NEW)	\
-	sethi	%hi(NEW), %g1; \
-	or	%g1, %lo(NEW), %g1; \
-	sethi	%hi(OLD), %g2; \
-	or	%g2, %lo(OLD), %g2; \
-	sub	%g1, %g2, %g1; \
-	sethi	%hi(BRANCH_ALWAYS), %g3; \
-	sll	%g1, 11, %g1; \
-	srl	%g1, 11 + 2, %g1; \
-	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
-	or	%g3, %g1, %g3; \
-	stw	%g3, [%g2]; \
-	sethi	%hi(NOP), %g3; \
-	or	%g3, %lo(NOP), %g3; \
-	stw	%g3, [%g2 + 0x4]; \
-	flush	%g2;
-
-ENTRY(m7_patch_copyops)
-	NG_DO_PATCH(memcpy, M7memcpy)
-	NG_DO_PATCH(raw_copy_from_user, M7copy_from_user)
-	NG_DO_PATCH(raw_copy_to_user, M7copy_to_user)
-	retl
-	 nop
-ENDPROC(m7_patch_copyops)
-
-ENTRY(m7_patch_bzero)
-	NG_DO_PATCH(memset, M7memset)
-	NG_DO_PATCH(__bzero, M7bzero)
-	NG_DO_PATCH(__clear_user, NGclear_user)
-	NG_DO_PATCH(tsb_init, NGtsb_init)
-	retl
-	 nop
-ENDPROC(m7_patch_bzero)
-
-ENTRY(m7_patch_pageops)
-	NG_DO_PATCH(copy_user_page, NG4copy_user_page)
-	NG_DO_PATCH(_clear_page, M7clear_page)
-	NG_DO_PATCH(clear_user_page, M7clear_user_page)
-	retl
-	 nop
-ENDPROC(m7_patch_pageops)
diff --git a/arch/sparc/lib/Memcpy_utils.S b/arch/sparc/lib/Memcpy_utils.S
deleted file mode 100644
index 64fbac28b3db18864b0d067acaee0d7898feec7a..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/Memcpy_utils.S
+++ /dev/null
@@ -1,345 +0,0 @@
-#ifndef __ASM_MEMCPY_UTILS
-#define __ASM_MEMCPY_UTILS
-
-#include <linux/linkage.h>
-#include <asm/asi.h>
-#include <asm/visasm.h>
-
-ENTRY(__restore_asi_fp)
-	VISExitHalf
-	retl
-	 wr	%g0, ASI_AIUS, %asi
-ENDPROC(__restore_asi_fp)
-
-ENTRY(__restore_asi)
-	retl
-	 wr	%g0, ASI_AIUS, %asi
-ENDPROC(__restore_asi)
-
-ENTRY(memcpy_retl_o2)
-	ba,pt	%xcc, __restore_asi
-	 mov	%o2, %o0
-ENDPROC(memcpy_retl_o2)
-ENTRY(memcpy_retl_o2_plus_1)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 1, %o0
-ENDPROC(memcpy_retl_o2_plus_1)
-ENTRY(memcpy_retl_o2_plus_3)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 3, %o0
-ENDPROC(memcpy_retl_o2_plus_3)
-ENTRY(memcpy_retl_o2_plus_4)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 4, %o0
-ENDPROC(memcpy_retl_o2_plus_4)
-ENTRY(memcpy_retl_o2_plus_5)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 5, %o0
-ENDPROC(memcpy_retl_o2_plus_5)
-ENTRY(memcpy_retl_o2_plus_6)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 6, %o0
-ENDPROC(memcpy_retl_o2_plus_6)
-ENTRY(memcpy_retl_o2_plus_7)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 7, %o0
-ENDPROC(memcpy_retl_o2_plus_7)
-ENTRY(memcpy_retl_o2_plus_8)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 8, %o0
-ENDPROC(memcpy_retl_o2_plus_8)
-ENTRY(memcpy_retl_o2_plus_15)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 15, %o0
-ENDPROC(memcpy_retl_o2_plus_15)
-ENTRY(memcpy_retl_o2_plus_15_8)
-	 add	%o2, 15, %o2
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 8, %o0
-ENDPROC(memcpy_retl_o2_plus_15_8)
-ENTRY(memcpy_retl_o2_plus_16)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 16, %o0
-ENDPROC(memcpy_retl_o2_plus_16)
-ENTRY(memcpy_retl_o2_plus_24)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 24, %o0
-ENDPROC(memcpy_retl_o2_plus_24)
-ENTRY(memcpy_retl_o2_plus_31)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 31, %o0
-ENDPROC(memcpy_retl_o2_plus_31)
-ENTRY(memcpy_retl_o2_plus_32)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 32, %o0
-ENDPROC(memcpy_retl_o2_plus_32)
-ENTRY(memcpy_retl_o2_plus_31_32)
-	add	%o2, 31, %o2
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 32, %o0
-ENDPROC(memcpy_retl_o2_plus_31_32)
-ENTRY(memcpy_retl_o2_plus_31_24)
-	add	%o2, 31, %o2
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 24, %o0
-ENDPROC(memcpy_retl_o2_plus_31_24)
-ENTRY(memcpy_retl_o2_plus_31_16)
-	add	%o2, 31, %o2
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 16, %o0
-ENDPROC(memcpy_retl_o2_plus_31_16)
-ENTRY(memcpy_retl_o2_plus_31_8)
-	add	%o2, 31, %o2
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 8, %o0
-ENDPROC(memcpy_retl_o2_plus_31_8)
-ENTRY(memcpy_retl_o2_plus_63)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 63, %o0
-ENDPROC(memcpy_retl_o2_plus_63)
-ENTRY(memcpy_retl_o2_plus_63_64)
-	 add	%o2, 63, %o2
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 64, %o0
-ENDPROC(memcpy_retl_o2_plus_63_64)
-ENTRY(memcpy_retl_o2_plus_63_56)
-	 add	%o2, 63, %o2
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 56, %o0
-ENDPROC(memcpy_retl_o2_plus_63_56)
-ENTRY(memcpy_retl_o2_plus_63_48)
-	 add	%o2, 63, %o2
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 48, %o0
-ENDPROC(memcpy_retl_o2_plus_63_48)
-ENTRY(memcpy_retl_o2_plus_63_40)
-	 add	%o2, 63, %o2
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 40, %o0
-ENDPROC(memcpy_retl_o2_plus_63_40)
-ENTRY(memcpy_retl_o2_plus_63_32)
-	 add	%o2, 63, %o2
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 32, %o0
-ENDPROC(memcpy_retl_o2_plus_63_32)
-ENTRY(memcpy_retl_o2_plus_63_24)
-	 add	%o2, 63, %o2
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 24, %o0
-ENDPROC(memcpy_retl_o2_plus_63_24)
-ENTRY(memcpy_retl_o2_plus_63_16)
-	 add	%o2, 63, %o2
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 16, %o0
-ENDPROC(memcpy_retl_o2_plus_63_16)
-ENTRY(memcpy_retl_o2_plus_63_8)
-	 add	%o2, 63, %o2
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 8, %o0
-ENDPROC(memcpy_retl_o2_plus_63_8)
-ENTRY(memcpy_retl_o2_plus_o5)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5)
-ENTRY(memcpy_retl_o2_plus_o5_plus_1)
-	add	%o5, 1, %o5
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5_plus_1)
-ENTRY(memcpy_retl_o2_plus_o5_plus_4)
-	add	%o5, 4, %o5
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5_plus_4)
-ENTRY(memcpy_retl_o2_plus_o5_plus_8)
-	add	%o5, 8, %o5
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5_plus_8)
-ENTRY(memcpy_retl_o2_plus_o5_plus_16)
-	add	%o5, 16, %o5
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5_plus_16)
-ENTRY(memcpy_retl_o2_plus_o5_plus_24)
-	add	%o5, 24, %o5
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5_plus_24)
-ENTRY(memcpy_retl_o2_plus_o5_plus_32)
-	add	%o5, 32, %o5
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5_plus_32)
-ENTRY(memcpy_retl_o2_plus_o5_64)
-	add	%o5, 32, %o5
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5_64)
-ENTRY(memcpy_retl_o2_plus_g1)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %g1, %o0
-ENDPROC(memcpy_retl_o2_plus_g1)
-ENTRY(memcpy_retl_o2_plus_g1_plus_1)
-	add	%g1, 1, %g1
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %g1, %o0
-ENDPROC(memcpy_retl_o2_plus_g1_plus_1)
-ENTRY(memcpy_retl_o2_plus_g1_plus_8)
-	add	%g1, 8, %g1
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %g1, %o0
-ENDPROC(memcpy_retl_o2_plus_g1_plus_8)
-ENTRY(memcpy_retl_o2_plus_o4)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4)
-ENTRY(memcpy_retl_o2_plus_o4_plus_8)
-	add	%o4, 8, %o4
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4_plus_8)
-ENTRY(memcpy_retl_o2_plus_o4_plus_16)
-	add	%o4, 16, %o4
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4_plus_16)
-ENTRY(memcpy_retl_o2_plus_o4_plus_24)
-	add	%o4, 24, %o4
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4_plus_24)
-ENTRY(memcpy_retl_o2_plus_o4_plus_32)
-	add	%o4, 32, %o4
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4_plus_32)
-ENTRY(memcpy_retl_o2_plus_o4_plus_40)
-	add	%o4, 40, %o4
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4_plus_40)
-ENTRY(memcpy_retl_o2_plus_o4_plus_48)
-	add	%o4, 48, %o4
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4_plus_48)
-ENTRY(memcpy_retl_o2_plus_o4_plus_56)
-	add	%o4, 56, %o4
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4_plus_56)
-ENTRY(memcpy_retl_o2_plus_o4_plus_64)
-	add	%o4, 64, %o4
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4_plus_64)
-ENTRY(memcpy_retl_o2_plus_o5_plus_64)
-	add	%o5, 64, %o5
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5_plus_64)
-ENTRY(memcpy_retl_o2_plus_o3_fp)
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o3, %o0
-ENDPROC(memcpy_retl_o2_plus_o3_fp)
-ENTRY(memcpy_retl_o2_plus_o3_plus_1_fp)
-	add	%o3, 1, %o3
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o3, %o0
-ENDPROC(memcpy_retl_o2_plus_o3_plus_1_fp)
-ENTRY(memcpy_retl_o2_plus_o3_plus_4_fp)
-	add	%o3, 4, %o3
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o3, %o0
-ENDPROC(memcpy_retl_o2_plus_o3_plus_4_fp)
-ENTRY(memcpy_retl_o2_plus_o4_fp)
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4_fp)
-ENTRY(memcpy_retl_o2_plus_o4_plus_8_fp)
-	add	%o4, 8, %o4
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4_plus_8_fp)
-ENTRY(memcpy_retl_o2_plus_o4_plus_16_fp)
-	add	%o4, 16, %o4
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4_plus_16_fp)
-ENTRY(memcpy_retl_o2_plus_o4_plus_24_fp)
-	add	%o4, 24, %o4
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4_plus_24_fp)
-ENTRY(memcpy_retl_o2_plus_o4_plus_32_fp)
-	add	%o4, 32, %o4
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4_plus_32_fp)
-ENTRY(memcpy_retl_o2_plus_o4_plus_40_fp)
-	add	%o4, 40, %o4
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4_plus_40_fp)
-ENTRY(memcpy_retl_o2_plus_o4_plus_48_fp)
-	add	%o4, 48, %o4
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4_plus_48_fp)
-ENTRY(memcpy_retl_o2_plus_o4_plus_56_fp)
-	add	%o4, 56, %o4
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4_plus_56_fp)
-ENTRY(memcpy_retl_o2_plus_o4_plus_64_fp)
-	add	%o4, 64, %o4
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o4, %o0
-ENDPROC(memcpy_retl_o2_plus_o4_plus_64_fp)
-ENTRY(memcpy_retl_o2_plus_o5_fp)
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5_fp)
-ENTRY(memcpy_retl_o2_plus_o5_plus_64_fp)
-	add	%o5, 64, %o5
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5_plus_64_fp)
-ENTRY(memcpy_retl_o2_plus_o5_plus_56_fp)
-	add	%o5, 56, %o5
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5_plus_56_fp)
-ENTRY(memcpy_retl_o2_plus_o5_plus_48_fp)
-	add	%o5, 48, %o5
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5_plus_48_fp)
-ENTRY(memcpy_retl_o2_plus_o5_plus_40_fp)
-	add	%o5, 40, %o5
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5_plus_40_fp)
-ENTRY(memcpy_retl_o2_plus_o5_plus_32_fp)
-	add	%o5, 32, %o5
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5_plus_32_fp)
-ENTRY(memcpy_retl_o2_plus_o5_plus_24_fp)
-	add	%o5, 24, %o5
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5_plus_24_fp)
-ENTRY(memcpy_retl_o2_plus_o5_plus_16_fp)
-	add	%o5, 16, %o5
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5_plus_16_fp)
-ENTRY(memcpy_retl_o2_plus_o5_plus_8_fp)
-	add	%o5, 8, %o5
-	ba,pt	%xcc, __restore_asi_fp
-	 add	%o2, %o5, %o0
-ENDPROC(memcpy_retl_o2_plus_o5_plus_8_fp)
-
-#endif
diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S
deleted file mode 100644
index e57bc514f538a941323eee1ee63c0cd2bd24fabe..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NG2copy_from_user.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* NG2copy_from_user.S: Niagara-2 optimized copy from userspace.
- *
- * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
- */
-
-#define EX_LD(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y;		\
-	.text;			\
-	.align 4;
-
-#define EX_LD_FP(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y##_fp;	\
-	.text;			\
-	.align 4;
-
-#ifndef ASI_AIUS
-#define ASI_AIUS	0x11
-#endif
-
-#ifndef ASI_BLK_AIUS_4V
-#define ASI_BLK_AIUS_4V	0x17
-#endif
-
-#define FUNC_NAME		NG2copy_from_user
-#define LOAD(type,addr,dest)	type##a [addr] %asi, dest
-#define LOAD_BLK(addr,dest)	ldda [addr] ASI_BLK_AIUS_4V, dest
-#define EX_RETVAL(x)		0
-
-#ifdef __KERNEL__
-#define PREAMBLE					\
-	rd		%asi, %g1;			\
-	cmp		%g1, ASI_AIUS;			\
-	bne,pn		%icc, raw_copy_in_user;		\
-	 nop
-#endif
-
-#include "NG2memcpy.S"
diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S
deleted file mode 100644
index 367c0bf01518214b758c1cf705124f5484b41479..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NG2copy_to_user.S
+++ /dev/null
@@ -1,53 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* NG2copy_to_user.S: Niagara-2 optimized copy to userspace.
- *
- * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
- */
-
-#define EX_ST(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y;		\
-	.text;			\
-	.align 4;
-
-#define EX_ST_FP(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y##_fp;	\
-	.text;			\
-	.align 4;
-
-#ifndef ASI_AIUS
-#define ASI_AIUS	0x11
-#endif
-
-#ifndef ASI_BLK_AIUS_4V
-#define ASI_BLK_AIUS_4V	0x17
-#endif
-
-#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS
-#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23
-#endif
-
-#define FUNC_NAME		NG2copy_to_user
-#define STORE(type,src,addr)	type##a src, [addr] ASI_AIUS
-#define STORE_ASI		ASI_BLK_INIT_QUAD_LDD_AIUS
-#define STORE_BLK(src,addr)	stda src, [addr] ASI_BLK_AIUS_4V
-#define EX_RETVAL(x)		0
-
-#ifdef __KERNEL__
-	/* Writing to %asi is _expensive_ so we hardcode it.
-	 * Reading %asi to check for KERNEL_DS is comparatively
-	 * cheap.
-	 */
-#define PREAMBLE					\
-	rd		%asi, %g1;			\
-	cmp		%g1, ASI_AIUS;			\
-	bne,pn		%icc, raw_copy_in_user;		\
-	 nop
-#endif
-
-#include "NG2memcpy.S"
diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S
deleted file mode 100644
index bcb21b3a82f1f414f9aeb2d6c3539f1b2221b6cb..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NG2memcpy.S
+++ /dev/null
@@ -1,594 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* NG2memcpy.S: Niagara-2 optimized memcpy.
- *
- * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
- */
-
-#ifdef __KERNEL__
-#include <linux/linkage.h>
-#include <asm/visasm.h>
-#include <asm/asi.h>
-#define GLOBAL_SPARE	%g7
-#else
-#define ASI_PNF 0x82
-#define ASI_BLK_P 0xf0
-#define ASI_BLK_INIT_QUAD_LDD_P 0xe2
-#define FPRS_FEF  0x04
-#ifdef MEMCPY_DEBUG
-#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \
-		     clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0;
-#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
-#else
-#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
-#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
-#endif
-#define GLOBAL_SPARE	%g5
-#endif
-
-#ifndef STORE_ASI
-#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
-#define STORE_ASI	ASI_BLK_INIT_QUAD_LDD_P
-#else
-#define STORE_ASI	0x80		/* ASI_P */
-#endif
-#endif
-
-#ifndef EX_LD
-#define EX_LD(x,y)	x
-#endif
-#ifndef EX_LD_FP
-#define EX_LD_FP(x,y)	x
-#endif
-
-#ifndef EX_ST
-#define EX_ST(x,y)	x
-#endif
-#ifndef EX_ST_FP
-#define EX_ST_FP(x,y)	x
-#endif
-
-#ifndef LOAD
-#define LOAD(type,addr,dest)	type [addr], dest
-#endif
-
-#ifndef LOAD_BLK
-#define LOAD_BLK(addr,dest)	ldda [addr] ASI_BLK_P, dest
-#endif
-
-#ifndef STORE
-#ifndef MEMCPY_DEBUG
-#define STORE(type,src,addr)	type src, [addr]
-#else
-#define STORE(type,src,addr)	type##a src, [addr] 0x80
-#endif
-#endif
-
-#ifndef STORE_BLK
-#define STORE_BLK(src,addr)	stda src, [addr] ASI_BLK_P
-#endif
-
-#ifndef STORE_INIT
-#define STORE_INIT(src,addr)	stxa src, [addr] STORE_ASI
-#endif
-
-#ifndef FUNC_NAME
-#define FUNC_NAME	NG2memcpy
-#endif
-
-#ifndef PREAMBLE
-#define PREAMBLE
-#endif
-
-#ifndef XCC
-#define XCC xcc
-#endif
-
-#define FREG_FROB(x0, x1, x2, x3, x4, x5, x6, x7, x8) \
-	faligndata	%x0, %x1, %f0; \
-	faligndata	%x1, %x2, %f2; \
-	faligndata	%x2, %x3, %f4; \
-	faligndata	%x3, %x4, %f6; \
-	faligndata	%x4, %x5, %f8; \
-	faligndata	%x5, %x6, %f10; \
-	faligndata	%x6, %x7, %f12; \
-	faligndata	%x7, %x8, %f14;
-
-#define FREG_MOVE_1(x0) \
-	fsrc2		%x0, %f0;
-#define FREG_MOVE_2(x0, x1) \
-	fsrc2		%x0, %f0; \
-	fsrc2		%x1, %f2;
-#define FREG_MOVE_3(x0, x1, x2) \
-	fsrc2		%x0, %f0; \
-	fsrc2		%x1, %f2; \
-	fsrc2		%x2, %f4;
-#define FREG_MOVE_4(x0, x1, x2, x3) \
-	fsrc2		%x0, %f0; \
-	fsrc2		%x1, %f2; \
-	fsrc2		%x2, %f4; \
-	fsrc2		%x3, %f6;
-#define FREG_MOVE_5(x0, x1, x2, x3, x4) \
-	fsrc2		%x0, %f0; \
-	fsrc2		%x1, %f2; \
-	fsrc2		%x2, %f4; \
-	fsrc2		%x3, %f6; \
-	fsrc2		%x4, %f8;
-#define FREG_MOVE_6(x0, x1, x2, x3, x4, x5) \
-	fsrc2		%x0, %f0; \
-	fsrc2		%x1, %f2; \
-	fsrc2		%x2, %f4; \
-	fsrc2		%x3, %f6; \
-	fsrc2		%x4, %f8; \
-	fsrc2		%x5, %f10;
-#define FREG_MOVE_7(x0, x1, x2, x3, x4, x5, x6) \
-	fsrc2		%x0, %f0; \
-	fsrc2		%x1, %f2; \
-	fsrc2		%x2, %f4; \
-	fsrc2		%x3, %f6; \
-	fsrc2		%x4, %f8; \
-	fsrc2		%x5, %f10; \
-	fsrc2		%x6, %f12;
-#define FREG_MOVE_8(x0, x1, x2, x3, x4, x5, x6, x7) \
-	fsrc2		%x0, %f0; \
-	fsrc2		%x1, %f2; \
-	fsrc2		%x2, %f4; \
-	fsrc2		%x3, %f6; \
-	fsrc2		%x4, %f8; \
-	fsrc2		%x5, %f10; \
-	fsrc2		%x6, %f12; \
-	fsrc2		%x7, %f14;
-#define FREG_LOAD_1(base, x0) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1)
-#define FREG_LOAD_2(base, x0, x1) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1);
-#define FREG_LOAD_3(base, x0, x1, x2) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1);
-#define FREG_LOAD_4(base, x0, x1, x2, x3) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1);
-#define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1);
-#define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1);
-#define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
-	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \
-	EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1);
-
-	.register	%g2,#scratch
-	.register	%g3,#scratch
-
-	.text
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)	x
-__restore_fp:
-	VISExitHalf
-__restore_asi:
-	retl
-	 wr	%g0, ASI_AIUS, %asi
-ENTRY(NG2_retl_o2)
-	ba,pt	%xcc, __restore_asi
-	 mov	%o2, %o0
-ENDPROC(NG2_retl_o2)
-ENTRY(NG2_retl_o2_plus_1)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 1, %o0
-ENDPROC(NG2_retl_o2_plus_1)
-ENTRY(NG2_retl_o2_plus_4)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 4, %o0
-ENDPROC(NG2_retl_o2_plus_4)
-ENTRY(NG2_retl_o2_plus_8)
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, 8, %o0
-ENDPROC(NG2_retl_o2_plus_8)
-ENTRY(NG2_retl_o2_plus_o4_plus_1)
-	add	%o4, 1, %o4
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o4, %o0
-ENDPROC(NG2_retl_o2_plus_o4_plus_1)
-ENTRY(NG2_retl_o2_plus_o4_plus_8)
-	add	%o4, 8, %o4
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o4, %o0
-ENDPROC(NG2_retl_o2_plus_o4_plus_8)
-ENTRY(NG2_retl_o2_plus_o4_plus_16)
-	add	%o4, 16, %o4
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o4, %o0
-ENDPROC(NG2_retl_o2_plus_o4_plus_16)
-ENTRY(NG2_retl_o2_plus_g1_fp)
-	ba,pt	%xcc, __restore_fp
-	 add	%o2, %g1, %o0
-ENDPROC(NG2_retl_o2_plus_g1_fp)
-ENTRY(NG2_retl_o2_plus_g1_plus_64_fp)
-	add	%g1, 64, %g1
-	ba,pt	%xcc, __restore_fp
-	 add	%o2, %g1, %o0
-ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp)
-ENTRY(NG2_retl_o2_plus_g1_plus_1)
-	add	%g1, 1, %g1
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %g1, %o0
-ENDPROC(NG2_retl_o2_plus_g1_plus_1)
-ENTRY(NG2_retl_o2_and_7_plus_o4)
-	and	%o2, 7, %o2
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o4, %o0
-ENDPROC(NG2_retl_o2_and_7_plus_o4)
-ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8)
-	and	%o2, 7, %o2
-	add	%o4, 8, %o4
-	ba,pt	%xcc, __restore_asi
-	 add	%o2, %o4, %o0
-ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8)
-#endif
-
-	.align		64
-
-	.globl	FUNC_NAME
-	.type	FUNC_NAME,#function
-FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
-	srlx		%o2, 31, %g2
-	cmp		%g2, 0
-	tne		%xcc, 5
-	PREAMBLE
-	mov		%o0, %o3
-	cmp		%o2, 0
-	be,pn		%XCC, 85f
-	 or		%o0, %o1, GLOBAL_SPARE
-	cmp		%o2, 16
-	blu,a,pn	%XCC, 80f
-	 or		GLOBAL_SPARE, %o2, GLOBAL_SPARE
-
-	/* 2 blocks (128 bytes) is the minimum we can do the block
-	 * copy with.  We need to ensure that we'll iterate at least
-	 * once in the block copy loop.  At worst we'll need to align
-	 * the destination to a 64-byte boundary which can chew up
-	 * to (64 - 1) bytes from the length before we perform the
-	 * block copy loop.
-	 *
-	 * However, the cut-off point, performance wise, is around
-	 * 4 64-byte blocks.
-	 */
-	cmp		%o2, (4 * 64)
-	blu,pt		%XCC, 75f
-	 andcc		GLOBAL_SPARE, 0x7, %g0
-
-	/* %o0:	dst
-	 * %o1:	src
-	 * %o2:	len  (known to be >= 128)
-	 *
-	 * The block copy loops can use %o4, %g2, %g3 as
-	 * temporaries while copying the data.  %o5 must
-	 * be preserved between VISEntryHalf and VISExitHalf
-	 */
-
-	LOAD(prefetch, %o1 + 0x000, #one_read)
-	LOAD(prefetch, %o1 + 0x040, #one_read)
-	LOAD(prefetch, %o1 + 0x080, #one_read)
-
-	/* Align destination on 64-byte boundary.  */
-	andcc		%o0, (64 - 1), %o4
-	be,pt		%XCC, 2f
-	 sub		%o4, 64, %o4
-	sub		%g0, %o4, %o4	! bytes to align dst
-	sub		%o2, %o4, %o2
-1:	subcc		%o4, 1, %o4
-	EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1)
-	EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1)
-	add		%o1, 1, %o1
-	bne,pt		%XCC, 1b
-	add		%o0, 1, %o0
-
-2:
-	/* Clobbers o5/g1/g2/g3/g7/icc/xcc.  We must preserve
-	 * o5 from here until we hit VISExitHalf.
-	 */
-	VISEntryHalf
-
-	membar		#Sync
-	alignaddr	%o1, %g0, %g0
-
-	add		%o1, (64 - 1), %o4
-	andn		%o4, (64 - 1), %o4
-	andn		%o2, (64 - 1), %g1
-	sub		%o2, %g1, %o2
-
-	and		%o1, (64 - 1), %g2
-	add		%o1, %g1, %o1
-	sub		%o0, %o4, %g3
-	brz,pt		%g2, 190f
-	 cmp		%g2, 32
-	blu,a		5f
-	 cmp		%g2, 16
-	cmp		%g2, 48
-	blu,a		4f
-	 cmp		%g2, 40
-	cmp		%g2, 56
-	blu		170f
-	 nop
-	ba,a,pt		%xcc, 180f
-	 nop
-
-4:	/* 32 <= low bits < 48 */
-	blu		150f
-	 nop
-	ba,a,pt		%xcc, 160f
-	 nop
-5:	/* 0 < low bits < 32 */
-	blu,a		6f
-	 cmp		%g2, 8
-	cmp		%g2, 24
-	blu		130f
-	 nop
-	ba,a,pt		%xcc, 140f
-	 nop
-6:	/* 0 < low bits < 16 */
-	bgeu		120f
-	 nop
-	/* fall through for 0 < low bits < 8 */
-110:	sub		%o4, 64, %g2
-	EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
-	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
-	FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
-	FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
-	subcc		%g1, 64, %g1
-	add		%o4, 64, %o4
-	bne,pt		%xcc, 1b
-	 LOAD(prefetch, %o4 + 64, #one_read)
-	ba,pt		%xcc, 195f
-	 nop
-
-120:	sub		%o4, 56, %g2
-	FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
-	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
-	FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
-	FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
-	subcc		%g1, 64, %g1
-	add		%o4, 64, %o4
-	bne,pt		%xcc, 1b
-	 LOAD(prefetch, %o4 + 64, #one_read)
-	ba,pt		%xcc, 195f
-	 nop
-
-130:	sub		%o4, 48, %g2
-	FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
-	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
-	FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
-	FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
-	subcc		%g1, 64, %g1
-	add		%o4, 64, %o4
-	bne,pt		%xcc, 1b
-	 LOAD(prefetch, %o4 + 64, #one_read)
-	ba,pt		%xcc, 195f
-	 nop
-
-140:	sub		%o4, 40, %g2
-	FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
-	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
-	FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
-	FREG_MOVE_5(f22, f24, f26, f28, f30)
-	subcc		%g1, 64, %g1
-	add		%o4, 64, %o4
-	bne,pt		%xcc, 1b
-	 LOAD(prefetch, %o4 + 64, #one_read)
-	ba,pt		%xcc, 195f
-	 nop
-
-150:	sub		%o4, 32, %g2
-	FREG_LOAD_4(%g2, f0, f2, f4, f6)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
-	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
-	FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
-	FREG_MOVE_4(f24, f26, f28, f30)
-	subcc		%g1, 64, %g1
-	add		%o4, 64, %o4
-	bne,pt		%xcc, 1b
-	 LOAD(prefetch, %o4 + 64, #one_read)
-	ba,pt		%xcc, 195f
-	 nop
-
-160:	sub		%o4, 24, %g2
-	FREG_LOAD_3(%g2, f0, f2, f4)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
-	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
-	FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
-	FREG_MOVE_3(f26, f28, f30)
-	subcc		%g1, 64, %g1
-	add		%o4, 64, %o4
-	bne,pt		%xcc, 1b
-	 LOAD(prefetch, %o4 + 64, #one_read)
-	ba,pt		%xcc, 195f
-	 nop
-
-170:	sub		%o4, 16, %g2
-	FREG_LOAD_2(%g2, f0, f2)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
-	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
-	FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
-	FREG_MOVE_2(f28, f30)
-	subcc		%g1, 64, %g1
-	add		%o4, 64, %o4
-	bne,pt		%xcc, 1b
-	 LOAD(prefetch, %o4 + 64, #one_read)
-	ba,pt		%xcc, 195f
-	 nop
-
-180:	sub		%o4, 8, %g2
-	FREG_LOAD_1(%g2, f0)
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
-	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
-	FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
-	FREG_MOVE_1(f30)
-	subcc		%g1, 64, %g1
-	add		%o4, 64, %o4
-	bne,pt		%xcc, 1b
-	 LOAD(prefetch, %o4 + 64, #one_read)
-	ba,pt		%xcc, 195f
-	 nop
-
-190:
-1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
-	subcc		%g1, 64, %g1
-	EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64)
-	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64)
-	add		%o4, 64, %o4
-	bne,pt		%xcc, 1b
-	 LOAD(prefetch, %o4 + 64, #one_read)
-
-195:
-	add		%o4, %g3, %o0
-	membar		#Sync
-
-	VISExitHalf
-
-	/* %o2 contains any final bytes still needed to be copied
-	 * over. If anything is left, we copy it one byte at a time.
-	 */
-	brz,pt		%o2, 85f
-	 sub		%o0, %o1, GLOBAL_SPARE
-	ba,a,pt		%XCC, 90f
-	 nop
-
-	.align		64
-75: /* 16 < len <= 64 */
-	bne,pn		%XCC, 75f
-	 sub		%o0, %o1, GLOBAL_SPARE
-
-72:
-	andn		%o2, 0xf, %o4
-	and		%o2, 0xf, %o2
-1:	subcc		%o4, 0x10, %o4
-	EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16)
-	add		%o1, 0x08, %o1
-	EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16)
-	sub		%o1, 0x08, %o1
-	EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16)
-	add		%o1, 0x8, %o1
-	EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8)
-	bgu,pt		%XCC, 1b
-	 add		%o1, 0x8, %o1
-73:	andcc		%o2, 0x8, %g0
-	be,pt		%XCC, 1f
-	 nop
-	sub		%o2, 0x8, %o2
-	EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8)
-	EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8)
-	add		%o1, 0x8, %o1
-1:	andcc		%o2, 0x4, %g0
-	be,pt		%XCC, 1f
-	 nop
-	sub		%o2, 0x4, %o2
-	EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4)
-	EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
-	add		%o1, 0x4, %o1
-1:	cmp		%o2, 0
-	be,pt		%XCC, 85f
-	 nop
-	ba,pt		%xcc, 90f
-	 nop
-
-75:
-	andcc		%o0, 0x7, %g1
-	sub		%g1, 0x8, %g1
-	be,pn		%icc, 2f
-	 sub		%g0, %g1, %g1
-	sub		%o2, %g1, %o2
-
-1:	subcc		%g1, 1, %g1
-	EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1)
-	EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1)
-	bgu,pt		%icc, 1b
-	 add		%o1, 1, %o1
-
-2:	add		%o1, GLOBAL_SPARE, %o0
-	andcc		%o1, 0x7, %g1
-	bne,pt		%icc, 8f
-	 sll		%g1, 3, %g1
-
-	cmp		%o2, 16
-	bgeu,pt		%icc, 72b
-	 nop
-	ba,a,pt		%xcc, 73b
-
-8:	mov		64, GLOBAL_SPARE
-	andn		%o1, 0x7, %o1
-	EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2)
-	sub		GLOBAL_SPARE, %g1, GLOBAL_SPARE
-	andn		%o2, 0x7, %o4
-	sllx		%g2, %g1, %g2
-1:	add		%o1, 0x8, %o1
-	EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4)
-	subcc		%o4, 0x8, %o4
-	srlx		%g3, GLOBAL_SPARE, %o5
-	or		%o5, %g2, %o5
-	EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8)
-	add		%o0, 0x8, %o0
-	bgu,pt		%icc, 1b
-	 sllx		%g3, %g1, %g2
-
-	srl		%g1, 3, %g1
-	andcc		%o2, 0x7, %o2
-	be,pn		%icc, 85f
-	 add		%o1, %g1, %o1
-	ba,pt		%xcc, 90f
-	 sub		%o0, %o1, GLOBAL_SPARE
-
-	.align		64
-80: /* 0 < len <= 16 */
-	andcc		GLOBAL_SPARE, 0x3, %g0
-	bne,pn		%XCC, 90f
-	 sub		%o0, %o1, GLOBAL_SPARE
-
-1:
-	subcc		%o2, 4, %o2
-	EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4)
-	EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
-	bgu,pt		%XCC, 1b
-	 add		%o1, 4, %o1
-
-85:	retl
-	 mov		EX_RETVAL(%o3), %o0
-
-	.align		32
-90:
-	subcc		%o2, 1, %o2
-	EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1)
-	EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1)
-	bgu,pt		%XCC, 90b
-	 add		%o1, 1, %o1
-	retl
-	 mov		EX_RETVAL(%o3), %o0
-
-	.size		FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/NG2patch.S b/arch/sparc/lib/NG2patch.S
deleted file mode 100644
index 72431b24491ae3af3d5e055ac40e4fda65c00444..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NG2patch.S
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* NG2patch.S: Patch Ultra-I routines with Niagara-2 variant.
- *
- * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
- */
-
-#define BRANCH_ALWAYS	0x10680000
-#define NOP		0x01000000
-#define NG_DO_PATCH(OLD, NEW)	\
-	sethi	%hi(NEW), %g1; \
-	or	%g1, %lo(NEW), %g1; \
-	sethi	%hi(OLD), %g2; \
-	or	%g2, %lo(OLD), %g2; \
-	sub	%g1, %g2, %g1; \
-	sethi	%hi(BRANCH_ALWAYS), %g3; \
-	sll	%g1, 11, %g1; \
-	srl	%g1, 11 + 2, %g1; \
-	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
-	or	%g3, %g1, %g3; \
-	stw	%g3, [%g2]; \
-	sethi	%hi(NOP), %g3; \
-	or	%g3, %lo(NOP), %g3; \
-	stw	%g3, [%g2 + 0x4]; \
-	flush	%g2;
-
-	.globl	niagara2_patch_copyops
-	.type	niagara2_patch_copyops,#function
-niagara2_patch_copyops:
-	NG_DO_PATCH(memcpy, NG2memcpy)
-	NG_DO_PATCH(raw_copy_from_user, NG2copy_from_user)
-	NG_DO_PATCH(raw_copy_to_user, NG2copy_to_user)
-	retl
-	 nop
-	.size	niagara2_patch_copyops,.-niagara2_patch_copyops
diff --git a/arch/sparc/lib/NG4clear_page.S b/arch/sparc/lib/NG4clear_page.S
deleted file mode 100644
index d91d6b5f2444aef6679c3c26b2c489b8bd07e5c0..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NG4clear_page.S
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* NG4copy_page.S: Niagara-4 optimized clear page.
- *
- * Copyright (C) 2012 (davem@davemloft.net)
- */
-
-#include <asm/asi.h>
-#include <asm/page.h>
-
-	.text
-
-	.register	%g3, #scratch
-
-	.align		32
-	.globl		NG4clear_page
-	.globl		NG4clear_user_page
-NG4clear_page:		/* %o0=dest */
-NG4clear_user_page:	/* %o0=dest, %o1=vaddr */
-	set		PAGE_SIZE, %g7
-	mov		0x20, %g3
-1:	stxa		%g0, [%o0 + %g0] ASI_ST_BLKINIT_MRU_P
-	subcc		%g7, 0x40, %g7
-	stxa		%g0, [%o0 + %g3] ASI_ST_BLKINIT_MRU_P
-	bne,pt		%xcc, 1b
-	 add		%o0, 0x40, %o0
-	membar		#StoreLoad|#StoreStore
-	retl
-	 nop
-	.size		NG4clear_page,.-NG4clear_page
-	.size		NG4clear_user_page,.-NG4clear_user_page
diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S
deleted file mode 100644
index 0cac15a6db3c0a289fdb2d112d1b41dd9395c306..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NG4copy_from_user.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* NG4copy_from_user.S: Niagara-4 optimized copy from userspace.
- *
- * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
- */
-
-#define EX_LD(x, y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y;		\
-	.text;			\
-	.align 4;
-
-#define EX_LD_FP(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y##_fp;	\
-	.text;			\
-	.align 4;
-
-#ifndef ASI_AIUS
-#define ASI_AIUS	0x11
-#endif
-
-#define FUNC_NAME		NG4copy_from_user
-#define LOAD(type,addr,dest)	type##a [addr] %asi, dest
-#define EX_RETVAL(x)		0
-
-#ifdef __KERNEL__
-#define PREAMBLE					\
-	rd		%asi, %g1;			\
-	cmp		%g1, ASI_AIUS;			\
-	bne,pn		%icc, raw_copy_in_user;		\
-	 nop
-#endif
-
-#include "NG4memcpy.S"
diff --git a/arch/sparc/lib/NG4copy_page.S b/arch/sparc/lib/NG4copy_page.S
deleted file mode 100644
index 581062f8ba5fe871ee1f66c18f0a6e04009da2fa..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NG4copy_page.S
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* NG4copy_page.S: Niagara-4 optimized copy page.
- *
- * Copyright (C) 2012 (davem@davemloft.net)
- */
-
-#include <asm/asi.h>
-#include <asm/page.h>
-
-	.text
-	.align		32
-
-	.register	%g2, #scratch
-	.register	%g3, #scratch
-
-	.globl		NG4copy_user_page
-NG4copy_user_page:	/* %o0=dest, %o1=src, %o2=vaddr */
-	prefetch	[%o1 + 0x000], #n_reads_strong
-	prefetch	[%o1 + 0x040], #n_reads_strong
-	prefetch	[%o1 + 0x080], #n_reads_strong
-	prefetch	[%o1 + 0x0c0], #n_reads_strong
-	set		PAGE_SIZE, %g7
-	prefetch	[%o1 + 0x100], #n_reads_strong
-	prefetch	[%o1 + 0x140], #n_reads_strong
-	prefetch	[%o1 + 0x180], #n_reads_strong
-	prefetch	[%o1 + 0x1c0], #n_reads_strong
-1:
-	ldx		[%o1 + 0x00], %o2
-	subcc		%g7, 0x40, %g7
-	ldx		[%o1 + 0x08], %o3
-	ldx		[%o1 + 0x10], %o4
-	ldx		[%o1 + 0x18], %o5
-	ldx		[%o1 + 0x20], %g1
-	stxa		%o2, [%o0] ASI_ST_BLKINIT_MRU_P
-	add		%o0, 0x08, %o0
-	ldx		[%o1 + 0x28], %g2
-	stxa		%o3, [%o0] ASI_ST_BLKINIT_MRU_P
-	add		%o0, 0x08, %o0
-	ldx		[%o1 + 0x30], %g3
-	stxa		%o4, [%o0] ASI_ST_BLKINIT_MRU_P
-	add		%o0, 0x08, %o0
-	ldx		[%o1 + 0x38], %o2
-	add		%o1, 0x40, %o1
-	stxa		%o5, [%o0] ASI_ST_BLKINIT_MRU_P
-	add		%o0, 0x08, %o0
-	stxa		%g1, [%o0] ASI_ST_BLKINIT_MRU_P
-	add		%o0, 0x08, %o0
-	stxa		%g2, [%o0] ASI_ST_BLKINIT_MRU_P
-	add		%o0, 0x08, %o0
-	stxa		%g3, [%o0] ASI_ST_BLKINIT_MRU_P
-	add		%o0, 0x08, %o0
-	stxa		%o2, [%o0] ASI_ST_BLKINIT_MRU_P
-	add		%o0, 0x08, %o0
-	bne,pt		%icc, 1b
-	 prefetch	[%o1 + 0x200], #n_reads_strong
-	retl
-	 membar		#StoreLoad | #StoreStore
-	.size		NG4copy_user_page,.-NG4copy_user_page
diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S
deleted file mode 100644
index c5c9abb3cb773aacb912e09d8f8bffb5a6fd40e1..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NG4copy_to_user.S
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* NG4copy_to_user.S: Niagara-4 optimized copy to userspace.
- *
- * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
- */
-
-#define EX_ST(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y;		\
-	.text;			\
-	.align 4;
-
-#define EX_ST_FP(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y##_fp;	\
-	.text;			\
-	.align 4;
-
-#ifndef ASI_AIUS
-#define ASI_AIUS	0x11
-#endif
-
-#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS
-#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23
-#endif
-
-#define FUNC_NAME		NG4copy_to_user
-#define STORE(type,src,addr)	type##a src, [addr] %asi
-#define STORE_ASI		ASI_BLK_INIT_QUAD_LDD_AIUS
-#define EX_RETVAL(x)		0
-
-#ifdef __KERNEL__
-	/* Writing to %asi is _expensive_ so we hardcode it.
-	 * Reading %asi to check for KERNEL_DS is comparatively
-	 * cheap.
-	 */
-#define PREAMBLE					\
-	rd		%asi, %g1;			\
-	cmp		%g1, ASI_AIUS;			\
-	bne,pn		%icc, raw_copy_in_user;		\
-	 nop
-#endif
-
-#include "NG4memcpy.S"
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
deleted file mode 100644
index 2d0991e5b03417c4993e118e6f424385d4ec54e1..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NG4fls.S
+++ /dev/null
@@ -1,30 +0,0 @@
-/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
- *
- * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
- */
-
-#include <linux/linkage.h>
-
-#define LZCNT_O0_G2	\
-	.word	0x85b002e8
-
-	.text
-	.register	%g2, #scratch
-	.register	%g3, #scratch
-
-ENTRY(NG4fls)
-	LZCNT_O0_G2	!lzcnt	%o0, %g2
-	mov	64, %g3
-	retl
-	 sub	%g3, %g2, %o0
-ENDPROC(NG4fls)
-
-ENTRY(__NG4fls)
-	brz,pn	%o0, 1f
-	LZCNT_O0_G2	!lzcnt	%o0, %g2
-	mov	63, %g3
-	sub	%g3, %g2, %o0
-1:
-	retl
-	 nop
-ENDPROC(__NG4fls)
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
deleted file mode 100644
index 7ad58ebe0d0096ffce4a9a333eb9cc7113c065fe..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NG4memcpy.S
+++ /dev/null
@@ -1,386 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* NG4memcpy.S: Niagara-4 optimized memcpy.
- *
- * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
- */
-
-#ifdef __KERNEL__
-#include <linux/linkage.h>
-#include <asm/visasm.h>
-#include <asm/asi.h>
-#define GLOBAL_SPARE	%g7
-#else
-#define ASI_BLK_INIT_QUAD_LDD_P 0xe2
-#define FPRS_FEF  0x04
-
-/* On T4 it is very expensive to access ASRs like %fprs and
- * %asi, avoiding a read or a write can save ~50 cycles.
- */
-#define FPU_ENTER			\
-	rd	%fprs, %o5;		\
-	andcc	%o5, FPRS_FEF, %g0;	\
-	be,a,pn	%icc, 999f;		\
-	 wr	%g0, FPRS_FEF, %fprs;	\
-	999:
-
-#ifdef MEMCPY_DEBUG
-#define VISEntryHalf FPU_ENTER; \
-		     clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0;
-#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
-#else
-#define VISEntryHalf FPU_ENTER
-#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
-#endif
-
-#define GLOBAL_SPARE	%g5
-#endif
-
-#ifndef STORE_ASI
-#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
-#define STORE_ASI	ASI_BLK_INIT_QUAD_LDD_P
-#else
-#define STORE_ASI	0x80		/* ASI_P */
-#endif
-#endif
-
-#if !defined(EX_LD) && !defined(EX_ST)
-#define NON_USER_COPY
-#endif
-
-#ifndef EX_LD
-#define EX_LD(x,y)	x
-#endif
-#ifndef EX_LD_FP
-#define EX_LD_FP(x,y)	x
-#endif
-
-#ifndef EX_ST
-#define EX_ST(x,y)	x
-#endif
-#ifndef EX_ST_FP
-#define EX_ST_FP(x,y)	x
-#endif
-
-
-#ifndef LOAD
-#define LOAD(type,addr,dest)	type [addr], dest
-#endif
-
-#ifndef STORE
-#ifndef MEMCPY_DEBUG
-#define STORE(type,src,addr)	type src, [addr]
-#else
-#define STORE(type,src,addr)	type##a src, [addr] %asi
-#endif
-#endif
-
-#ifndef STORE_INIT
-#define STORE_INIT(src,addr)	stxa src, [addr] STORE_ASI
-#endif
-
-#ifndef FUNC_NAME
-#define FUNC_NAME	NG4memcpy
-#endif
-#ifndef PREAMBLE
-#define PREAMBLE
-#endif
-
-#ifndef XCC
-#define XCC xcc
-#endif
-
-	.register	%g2,#scratch
-	.register	%g3,#scratch
-
-	.text
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)	x
-#endif
-	.align		64
-
-	.globl	FUNC_NAME
-	.type	FUNC_NAME,#function
-FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
-#ifdef MEMCPY_DEBUG
-	wr		%g0, 0x80, %asi
-#endif
-	srlx		%o2, 31, %g2
-	cmp		%g2, 0
-	tne		%XCC, 5
-	PREAMBLE
-	mov		%o0, %o3
-	brz,pn		%o2, .Lexit
-	 cmp		%o2, 3
-	ble,pn		%icc, .Ltiny
-	 cmp		%o2, 19
-	ble,pn		%icc, .Lsmall
-	 or		%o0, %o1, %g2
-	cmp		%o2, 128
-	bl,pn		%icc, .Lmedium
-	 nop
-
-.Llarge:/* len >= 0x80 */
-	/* First get dest 8 byte aligned.  */
-	sub		%g0, %o0, %g1
-	and		%g1, 0x7, %g1
-	brz,pt		%g1, 51f
-	 sub		%o2, %g1, %o2
-
-
-1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1)
-	add		%o1, 1, %o1
-	subcc		%g1, 1, %g1
-	add		%o0, 1, %o0
-	bne,pt		%icc, 1b
-	 EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1)
-
-51:	LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
-	LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
-	LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong)
-	LOAD(prefetch, %o1 + 0x100, #n_reads_strong)
-	LOAD(prefetch, %o1 + 0x140, #n_reads_strong)
-	LOAD(prefetch, %o1 + 0x180, #n_reads_strong)
-	LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong)
-	LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
-
-	/* Check if we can use the straight fully aligned
-	 * loop, or we require the alignaddr/faligndata variant.
-	 */
-	andcc		%o1, 0x7, %o5
-	bne,pn		%icc, .Llarge_src_unaligned
-	 sub		%g0, %o0, %g1
-
-	/* Legitimize the use of initializing stores by getting dest
-	 * to be 64-byte aligned.
-	 */
-	and		%g1, 0x3f, %g1
-	brz,pt		%g1, .Llarge_aligned
-	 sub		%o2, %g1, %o2
-
-1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1)
-	add		%o1, 8, %o1
-	subcc		%g1, 8, %g1
-	add		%o0, 8, %o0
-	bne,pt		%icc, 1b
-	 EX_ST(STORE(stx, %g2, %o0 - 0x08), memcpy_retl_o2_plus_g1_plus_8)
-
-.Llarge_aligned:
-	/* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
-	andn		%o2, 0x3f, %o4
-	sub		%o2, %o4, %o2
-
-1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o4)
-	add		%o1, 0x40, %o1
-	EX_LD(LOAD(ldx, %o1 - 0x38, %g2), memcpy_retl_o2_plus_o4)
-	subcc		%o4, 0x40, %o4
-	EX_LD(LOAD(ldx, %o1 - 0x30, %g3), memcpy_retl_o2_plus_o4_plus_64)
-	EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_64)
-	EX_LD(LOAD(ldx, %o1 - 0x20, %o5), memcpy_retl_o2_plus_o4_plus_64)
-	EX_ST(STORE_INIT(%g1, %o0), memcpy_retl_o2_plus_o4_plus_64)
-	add		%o0, 0x08, %o0
-	EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_56)
-	add		%o0, 0x08, %o0
-	EX_LD(LOAD(ldx, %o1 - 0x18, %g2), memcpy_retl_o2_plus_o4_plus_48)
-	EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_48)
-	add		%o0, 0x08, %o0
-	EX_LD(LOAD(ldx, %o1 - 0x10, %g3), memcpy_retl_o2_plus_o4_plus_40)
-	EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_40)
-	add		%o0, 0x08, %o0
-	EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_32)
-	EX_ST(STORE_INIT(%o5, %o0), memcpy_retl_o2_plus_o4_plus_32)
-	add		%o0, 0x08, %o0
-	EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_24)
-	add		%o0, 0x08, %o0
-	EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_16)
-	add		%o0, 0x08, %o0
-	EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_8)
-	add		%o0, 0x08, %o0
-	bne,pt		%icc, 1b
-	 LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
-
-	membar		#StoreLoad | #StoreStore
-
-	brz,pn		%o2, .Lexit
-	 cmp		%o2, 19
-	ble,pn		%icc, .Lsmall_unaligned
-	 nop
-	ba,a,pt		%icc, .Lmedium_noprefetch
-
-.Lexit:	retl
-	 mov		EX_RETVAL(%o3), %o0
-
-.Llarge_src_unaligned:
-#ifdef NON_USER_COPY
-	VISEntryHalfFast(.Lmedium_vis_entry_fail)
-#else
-	VISEntryHalf
-#endif
-	andn		%o2, 0x3f, %o4
-	sub		%o2, %o4, %o2
-	alignaddr	%o1, %g0, %g1
-	add		%o1, %o4, %o1
-	EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), memcpy_retl_o2_plus_o4)
-1:	EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), memcpy_retl_o2_plus_o4)
-	subcc		%o4, 0x40, %o4
-	EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), memcpy_retl_o2_plus_o4_plus_64)
-	EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), memcpy_retl_o2_plus_o4_plus_64)
-	EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), memcpy_retl_o2_plus_o4_plus_64)
-	EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), memcpy_retl_o2_plus_o4_plus_64)
-	EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), memcpy_retl_o2_plus_o4_plus_64)
-	EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), memcpy_retl_o2_plus_o4_plus_64)
-	faligndata	%f0, %f2, %f16
-	EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), memcpy_retl_o2_plus_o4_plus_64)
-	faligndata	%f2, %f4, %f18
-	add		%g1, 0x40, %g1
-	faligndata	%f4, %f6, %f20
-	faligndata	%f6, %f8, %f22
-	faligndata	%f8, %f10, %f24
-	faligndata	%f10, %f12, %f26
-	faligndata	%f12, %f14, %f28
-	faligndata	%f14, %f0, %f30
-	EX_ST_FP(STORE(std, %f16, %o0 + 0x00), memcpy_retl_o2_plus_o4_plus_64)
-	EX_ST_FP(STORE(std, %f18, %o0 + 0x08), memcpy_retl_o2_plus_o4_plus_56)
-	EX_ST_FP(STORE(std, %f20, %o0 + 0x10), memcpy_retl_o2_plus_o4_plus_48)
-	EX_ST_FP(STORE(std, %f22, %o0 + 0x18), memcpy_retl_o2_plus_o4_plus_40)
-	EX_ST_FP(STORE(std, %f24, %o0 + 0x20), memcpy_retl_o2_plus_o4_plus_32)
-	EX_ST_FP(STORE(std, %f26, %o0 + 0x28), memcpy_retl_o2_plus_o4_plus_24)
-	EX_ST_FP(STORE(std, %f28, %o0 + 0x30), memcpy_retl_o2_plus_o4_plus_16)
-	EX_ST_FP(STORE(std, %f30, %o0 + 0x38), memcpy_retl_o2_plus_o4_plus_8)
-	add		%o0, 0x40, %o0
-	bne,pt		%icc, 1b
-	 LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
-#ifdef NON_USER_COPY
-	VISExitHalfFast
-#else
-	VISExitHalf
-#endif
-	brz,pn		%o2, .Lexit
-	 cmp		%o2, 19
-	ble,pn		%icc, .Lsmall_unaligned
-	 nop
-	ba,a,pt		%icc, .Lmedium_unaligned
-
-#ifdef NON_USER_COPY
-.Lmedium_vis_entry_fail:
-	 or		%o0, %o1, %g2
-#endif
-.Lmedium:
-	LOAD(prefetch, %o1 + 0x40, #n_reads_strong)
-	andcc		%g2, 0x7, %g0
-	bne,pn		%icc, .Lmedium_unaligned
-	 nop
-.Lmedium_noprefetch:
-	andncc		%o2, 0x20 - 1, %o5
-	be,pn		%icc, 2f
-	 sub		%o2, %o5, %o2
-1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5)
-	EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memcpy_retl_o2_plus_o5)
-	EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), memcpy_retl_o2_plus_o5)
-	EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memcpy_retl_o2_plus_o5)
-	add		%o1, 0x20, %o1
-	subcc		%o5, 0x20, %o5
-	EX_ST(STORE(stx, %g1, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_32)
-	EX_ST(STORE(stx, %g2, %o0 + 0x08), memcpy_retl_o2_plus_o5_plus_24)
-	EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_24)
-	EX_ST(STORE(stx, %o4, %o0 + 0x18), memcpy_retl_o2_plus_o5_plus_8)
-	bne,pt		%icc, 1b
-	 add		%o0, 0x20, %o0
-2:	andcc		%o2, 0x18, %o5
-	be,pt		%icc, 3f
-	 sub		%o2, %o5, %o2
-
-1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5)
-	add		%o1, 0x08, %o1
-	add		%o0, 0x08, %o0
-	subcc		%o5, 0x08, %o5
-	bne,pt		%icc, 1b
-	 EX_ST(STORE(stx, %g1, %o0 - 0x08), memcpy_retl_o2_plus_o5_plus_8)
-3:	brz,pt		%o2, .Lexit
-	 cmp		%o2, 0x04
-	bl,pn		%icc, .Ltiny
-	 nop
-	EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2)
-	add		%o1, 0x04, %o1
-	add		%o0, 0x04, %o0
-	subcc		%o2, 0x04, %o2
-	bne,pn		%icc, .Ltiny
-	 EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_4)
-	ba,a,pt		%icc, .Lexit
-.Lmedium_unaligned:
-	/* First get dest 8 byte aligned.  */
-	sub		%g0, %o0, %g1
-	and		%g1, 0x7, %g1
-	brz,pt		%g1, 2f
-	 sub		%o2, %g1, %o2
-
-1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1)
-	add		%o1, 1, %o1
-	subcc		%g1, 1, %g1
-	add		%o0, 1, %o0
-	bne,pt		%icc, 1b
-	 EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1)
-2:
-	and		%o1, 0x7, %g1
-	brz,pn		%g1, .Lmedium_noprefetch
-	 sll		%g1, 3, %g1
-	mov		64, %g2
-	sub		%g2, %g1, %g2
-	andn		%o1, 0x7, %o1
-	EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memcpy_retl_o2)
-	sllx		%o4, %g1, %o4
-	andn		%o2, 0x08 - 1, %o5
-	sub		%o2, %o5, %o2
-1:	EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memcpy_retl_o2_plus_o5)
-	add		%o1, 0x08, %o1
-	subcc		%o5, 0x08, %o5
-	srlx		%g3, %g2, GLOBAL_SPARE
-	or		GLOBAL_SPARE, %o4, GLOBAL_SPARE
-	EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_8)
-	add		%o0, 0x08, %o0
-	bne,pt		%icc, 1b
-	 sllx		%g3, %g1, %o4
-	srl		%g1, 3, %g1
-	add		%o1, %g1, %o1
-	brz,pn		%o2, .Lexit
-	 nop
-	ba,pt		%icc, .Lsmall_unaligned
-
-.Ltiny:
-	EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2)
-	subcc		%o2, 1, %o2
-	be,pn		%icc, .Lexit
-	 EX_ST(STORE(stb, %g1, %o0 + 0x00), memcpy_retl_o2_plus_1)
-	EX_LD(LOAD(ldub, %o1 + 0x01, %g1), memcpy_retl_o2)
-	subcc		%o2, 1, %o2
-	be,pn		%icc, .Lexit
-	 EX_ST(STORE(stb, %g1, %o0 + 0x01), memcpy_retl_o2_plus_1)
-	EX_LD(LOAD(ldub, %o1 + 0x02, %g1), memcpy_retl_o2)
-	ba,pt		%icc, .Lexit
-	 EX_ST(STORE(stb, %g1, %o0 + 0x02), memcpy_retl_o2)
-
-.Lsmall:
-	andcc		%g2, 0x3, %g0
-	bne,pn		%icc, .Lsmall_unaligned
-	 andn		%o2, 0x4 - 1, %o5
-	sub		%o2, %o5, %o2
-1:
-	EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5)
-	add		%o1, 0x04, %o1
-	subcc		%o5, 0x04, %o5
-	add		%o0, 0x04, %o0
-	bne,pt		%icc, 1b
-	 EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_o5_plus_4)
-	brz,pt		%o2, .Lexit
-	 nop
-	ba,a,pt		%icc, .Ltiny
-
-.Lsmall_unaligned:
-1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2)
-	add		%o1, 1, %o1
-	add		%o0, 1, %o0
-	subcc		%o2, 1, %o2
-	bne,pt		%icc, 1b
-	 EX_ST(STORE(stb, %g1, %o0 - 0x01), memcpy_retl_o2_plus_1)
-	ba,a,pt		%icc, .Lexit
-	 nop
-	.size		FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/NG4memset.S b/arch/sparc/lib/NG4memset.S
deleted file mode 100644
index f81ee5419e2c3f9cf0fdb9e1dcbd9cbd93bc30e9..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NG4memset.S
+++ /dev/null
@@ -1,107 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* NG4memset.S: Niagara-4 optimized memset/bzero.
- *
- * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
- */
-
-#include <asm/asi.h>
-
-	.register	%g2, #scratch
-	.register	%g3, #scratch
-
-	.text
-	.align		32
-	.globl		NG4memset
-NG4memset:
-	andcc		%o1, 0xff, %o4
-	be,pt		%icc, 1f
-	 mov		%o2, %o1
-	sllx		%o4, 8, %g1
-	or		%g1, %o4, %o2
-	sllx		%o2, 16, %g1
-	or		%g1, %o2, %o2
-	sllx		%o2, 32, %g1
-	ba,pt		%icc, 1f
-	 or		%g1, %o2, %o4
-	.size		NG4memset,.-NG4memset
-
-	.align		32
-	.globl		NG4bzero
-NG4bzero:
-	clr		%o4
-1:	cmp		%o1, 16
-	ble		%icc, .Ltiny
-	 mov		%o0, %o3
-	sub		%g0, %o0, %g1
-	and		%g1, 0x7, %g1
-	brz,pt		%g1, .Laligned8
-	 sub		%o1, %g1, %o1
-1:	stb		%o4, [%o0 + 0x00]
-	subcc		%g1, 1, %g1
-	bne,pt		%icc, 1b
-	 add		%o0, 1, %o0
-.Laligned8:
-	cmp		%o1, 64 + (64 - 8)
-	ble		.Lmedium
-	 sub		%g0, %o0, %g1
-	andcc		%g1, (64 - 1), %g1
-	brz,pn		%g1, .Laligned64
-	 sub		%o1, %g1, %o1
-1:	stx		%o4, [%o0 + 0x00]
-	subcc		%g1, 8, %g1
-	bne,pt		%icc, 1b
-	 add		%o0, 0x8, %o0
-.Laligned64:
-	andn		%o1, 64 - 1, %g1
-	sub		%o1, %g1, %o1
-	brnz,pn		%o4, .Lnon_bzero_loop
-	 mov		0x20, %g2
-1:	stxa		%o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
-	subcc		%g1, 0x40, %g1
-	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
-	bne,pt		%icc, 1b
-	 add		%o0, 0x40, %o0
-.Lpostloop:
-	cmp		%o1, 8
-	bl,pn		%icc, .Ltiny
-	 membar		#StoreStore|#StoreLoad
-.Lmedium:
-	andn		%o1, 0x7, %g1
-	sub		%o1, %g1, %o1
-1:	stx		%o4, [%o0 + 0x00]
-	subcc		%g1, 0x8, %g1
-	bne,pt		%icc, 1b
-	 add		%o0, 0x08, %o0
-	andcc		%o1, 0x4, %g1
-	be,pt		%icc, .Ltiny
-	 sub		%o1, %g1, %o1
-	stw		%o4, [%o0 + 0x00]
-	add		%o0, 0x4, %o0
-.Ltiny:
-	cmp		%o1, 0
-	be,pn		%icc, .Lexit
-1:	 subcc		%o1, 1, %o1
-	stb		%o4, [%o0 + 0x00]
-	bne,pt		%icc, 1b
-	 add		%o0, 1, %o0
-.Lexit:
-	retl
-	 mov		%o3, %o0
-.Lnon_bzero_loop:
-	mov		0x08, %g3
-	mov		0x28, %o5
-1:	stxa		%o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
-	subcc		%g1, 0x40, %g1
-	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
-	stxa		%o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
-	stxa		%o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
-	add		%o0, 0x10, %o0
-	stxa		%o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
-	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
-	stxa		%o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
-	stxa		%o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
-	bne,pt		%icc, 1b
-	 add		%o0, 0x30, %o0
-	ba,a,pt		%icc, .Lpostloop
-	 nop
-	.size		NG4bzero,.-NG4bzero
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
deleted file mode 100644
index 37866175c921e2bfdca67d990a2a18cf5a24f08b..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NG4patch.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* NG4patch.S: Patch Ultra-I routines with Niagara-4 variant.
- *
- * Copyright (C) 2012 David S. Miller <davem@davemloft.net>
- */
-
-#include <linux/linkage.h>
-
-#define BRANCH_ALWAYS	0x10680000
-#define NOP		0x01000000
-#define NG_DO_PATCH(OLD, NEW)	\
-	sethi	%hi(NEW), %g1; \
-	or	%g1, %lo(NEW), %g1; \
-	sethi	%hi(OLD), %g2; \
-	or	%g2, %lo(OLD), %g2; \
-	sub	%g1, %g2, %g1; \
-	sethi	%hi(BRANCH_ALWAYS), %g3; \
-	sll	%g1, 11, %g1; \
-	srl	%g1, 11 + 2, %g1; \
-	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
-	or	%g3, %g1, %g3; \
-	stw	%g3, [%g2]; \
-	sethi	%hi(NOP), %g3; \
-	or	%g3, %lo(NOP), %g3; \
-	stw	%g3, [%g2 + 0x4]; \
-	flush	%g2;
-
-	.globl	niagara4_patch_copyops
-	.type	niagara4_patch_copyops,#function
-niagara4_patch_copyops:
-	NG_DO_PATCH(memcpy, NG4memcpy)
-	NG_DO_PATCH(raw_copy_from_user, NG4copy_from_user)
-	NG_DO_PATCH(raw_copy_to_user, NG4copy_to_user)
-	retl
-	 nop
-	.size	niagara4_patch_copyops,.-niagara4_patch_copyops
-
-	.globl	niagara4_patch_bzero
-	.type	niagara4_patch_bzero,#function
-niagara4_patch_bzero:
-	NG_DO_PATCH(memset, NG4memset)
-	NG_DO_PATCH(__bzero, NG4bzero)
-	NG_DO_PATCH(__clear_user, NGclear_user)
-	NG_DO_PATCH(tsb_init, NGtsb_init)
-	retl
-	 nop
-	.size	niagara4_patch_bzero,.-niagara4_patch_bzero
-
-	.globl	niagara4_patch_pageops
-	.type	niagara4_patch_pageops,#function
-niagara4_patch_pageops:
-	NG_DO_PATCH(copy_user_page, NG4copy_user_page)
-	NG_DO_PATCH(_clear_page, NG4clear_page)
-	NG_DO_PATCH(clear_user_page, NG4clear_user_page)
-	retl
-	 nop
-	.size	niagara4_patch_pageops,.-niagara4_patch_pageops
-
-ENTRY(niagara4_patch_fls)
-	NG_DO_PATCH(fls, NG4fls)
-	NG_DO_PATCH(__fls, __NG4fls)
-	retl
-	 nop
-ENDPROC(niagara4_patch_fls)
diff --git a/arch/sparc/lib/NGbzero.S b/arch/sparc/lib/NGbzero.S
deleted file mode 100644
index 19327614d57db6249f3c5e7c92d64bb70204a429..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NGbzero.S
+++ /dev/null
@@ -1,161 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* NGbzero.S: Niagara optimized memset/clear_user.
- *
- * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
- */
-#include <asm/asi.h>
-
-#define EX_ST(x,y)		\
-98:	x,y;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, __retl_o1_asi;\
-	.text;			\
-	.align 4;
-
-	.text
-
-	.globl		NGmemset
-	.type		NGmemset, #function
-NGmemset:		/* %o0=buf, %o1=pat, %o2=len */
-	and		%o1, 0xff, %o3
-	mov		%o2, %o1
-	sllx		%o3, 8, %g1
-	or		%g1, %o3, %o2
-	sllx		%o2, 16, %g1
-	or		%g1, %o2, %o2
-	sllx		%o2, 32, %g1
-	ba,pt		%xcc, 1f
-	 or		%g1, %o2, %o2
-
-	.globl		NGbzero
-	.type		NGbzero, #function
-NGbzero:
-	clr		%o2
-1:	brz,pn		%o1, NGbzero_return
-	 mov		%o0, %o3
-
-	/* %o5: saved %asi, restored at NGbzero_done
-	 * %g7: store-init %asi to use
-	 * %o4:	non-store-init %asi to use
-	 */
-	rd		%asi, %o5
-	mov		ASI_BLK_INIT_QUAD_LDD_P, %g7
-	mov		ASI_P, %o4
-	wr		%o4, 0x0, %asi
-
-NGbzero_from_clear_user:
-	cmp		%o1, 15
-	bl,pn		%icc, NGbzero_tiny
-	 andcc		%o0, 0x7, %g1
-	be,pt		%xcc, 2f
-	 mov		8, %g2
-	sub		%g2, %g1, %g1
-	sub		%o1, %g1, %o1
-1:	EX_ST(stba %o2, [%o0 + 0x00] %asi)
-	subcc		%g1, 1, %g1
-	bne,pt		%xcc, 1b
-	 add		%o0, 1, %o0
-2:	cmp		%o1, 128
-	bl,pn		%icc, NGbzero_medium
-	 andcc		%o0, (64 - 1), %g1
-	be,pt		%xcc, NGbzero_pre_loop
-	 mov		64, %g2
-	sub		%g2, %g1, %g1
-	sub		%o1, %g1, %o1
-1:	EX_ST(stxa %o2, [%o0 + 0x00] %asi)
-	subcc		%g1, 8, %g1
-	bne,pt		%xcc, 1b
-	 add		%o0, 8, %o0
-
-NGbzero_pre_loop:
-	wr		%g7, 0x0, %asi
-	andn		%o1, (64 - 1), %g1
-	sub		%o1, %g1, %o1
-NGbzero_loop:
-	EX_ST(stxa %o2, [%o0 + 0x00] %asi)
-	EX_ST(stxa %o2, [%o0 + 0x08] %asi)
-	EX_ST(stxa %o2, [%o0 + 0x10] %asi)
-	EX_ST(stxa %o2, [%o0 + 0x18] %asi)
-	EX_ST(stxa %o2, [%o0 + 0x20] %asi)
-	EX_ST(stxa %o2, [%o0 + 0x28] %asi)
-	EX_ST(stxa %o2, [%o0 + 0x30] %asi)
-	EX_ST(stxa %o2, [%o0 + 0x38] %asi)
-	subcc		%g1, 64, %g1
-	bne,pt		%xcc, NGbzero_loop
-	 add		%o0, 64, %o0
-
-	membar		#Sync
-	wr		%o4, 0x0, %asi
-	brz,pn		%o1, NGbzero_done
-NGbzero_medium:
-	 andncc		%o1, 0x7, %g1
-	be,pn		%xcc, 2f
-	 sub		%o1, %g1, %o1
-1:	EX_ST(stxa %o2, [%o0 + 0x00] %asi)
-	subcc		%g1, 8, %g1
-	bne,pt		%xcc, 1b
-	 add		%o0, 8, %o0
-2:	brz,pt		%o1, NGbzero_done
-	 nop
-
-NGbzero_tiny:
-1:	EX_ST(stba %o2, [%o0 + 0x00] %asi)
-	subcc		%o1, 1, %o1
-	bne,pt		%icc, 1b
-	 add		%o0, 1, %o0
-
-	/* fallthrough */
-
-NGbzero_done:
-	wr		%o5, 0x0, %asi
-
-NGbzero_return:
-	retl
-	 mov		%o3, %o0
-	.size		NGbzero, .-NGbzero
-	.size		NGmemset, .-NGmemset
-
-	.globl		NGclear_user
-	.type		NGclear_user, #function
-NGclear_user:		/* %o0=buf, %o1=len */
-	rd		%asi, %o5
-	brz,pn		%o1, NGbzero_done
-	 clr		%o3
-	cmp		%o5, ASI_AIUS
-	bne,pn		%icc, NGbzero
-	 clr		%o2
-	mov		ASI_BLK_INIT_QUAD_LDD_AIUS, %g7
-	ba,pt		%xcc, NGbzero_from_clear_user
-	 mov		ASI_AIUS, %o4
-	.size		NGclear_user, .-NGclear_user
-
-#define BRANCH_ALWAYS	0x10680000
-#define NOP		0x01000000
-#define NG_DO_PATCH(OLD, NEW)	\
-	sethi	%hi(NEW), %g1; \
-	or	%g1, %lo(NEW), %g1; \
-	sethi	%hi(OLD), %g2; \
-	or	%g2, %lo(OLD), %g2; \
-	sub	%g1, %g2, %g1; \
-	sethi	%hi(BRANCH_ALWAYS), %g3; \
-	sll	%g1, 11, %g1; \
-	srl	%g1, 11 + 2, %g1; \
-	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
-	or	%g3, %g1, %g3; \
-	stw	%g3, [%g2]; \
-	sethi	%hi(NOP), %g3; \
-	or	%g3, %lo(NOP), %g3; \
-	stw	%g3, [%g2 + 0x4]; \
-	flush	%g2;
-
-	.globl	niagara_patch_bzero
-	.type	niagara_patch_bzero,#function
-niagara_patch_bzero:
-	NG_DO_PATCH(memset, NGmemset)
-	NG_DO_PATCH(__bzero, NGbzero)
-	NG_DO_PATCH(__clear_user, NGclear_user)
-	NG_DO_PATCH(tsb_init, NGtsb_init)
-	retl
-	 nop
-	.size	niagara_patch_bzero,.-niagara_patch_bzero
diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S
deleted file mode 100644
index 9abc49fcdbbefb230c071be0098b9a8f2f29ecd8..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NGcopy_from_user.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* NGcopy_from_user.S: Niagara optimized copy from userspace.
- *
- * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
- */
-
-#define EX_LD(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y;		\
-	.text;			\
-	.align 4;
-
-#ifndef ASI_AIUS
-#define ASI_AIUS	0x11
-#endif
-
-#define FUNC_NAME		NGcopy_from_user
-#define LOAD(type,addr,dest)	type##a [addr] ASI_AIUS, dest
-#define LOAD_TWIN(addr_reg,dest0,dest1)	\
-	ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_AIUS, dest0
-#define EX_RETVAL(x)		%g0
-
-#ifdef __KERNEL__
-#define PREAMBLE					\
-	rd		%asi, %g1;			\
-	cmp		%g1, ASI_AIUS;			\
-	bne,pn		%icc, raw_copy_in_user;		\
-	 nop
-#endif
-
-#include "NGmemcpy.S"
diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S
deleted file mode 100644
index 9cbe2f18e5ccb055a7d5d029feb4dd828935b99f..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NGcopy_to_user.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* NGcopy_to_user.S: Niagara optimized copy to userspace.
- *
- * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
- */
-
-#define EX_ST(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y;		\
-	.text;			\
-	.align 4;
-
-#ifndef ASI_AIUS
-#define ASI_AIUS	0x11
-#endif
-
-#define FUNC_NAME		NGcopy_to_user
-#define STORE(type,src,addr)	type##a src, [addr] ASI_AIUS
-#define STORE_ASI		ASI_BLK_INIT_QUAD_LDD_AIUS
-#define EX_RETVAL(x)		%g0
-
-#ifdef __KERNEL__
-	/* Writing to %asi is _expensive_ so we hardcode it.
-	 * Reading %asi to check for KERNEL_DS is comparatively
-	 * cheap.
-	 */
-#define PREAMBLE					\
-	rd		%asi, %g1;			\
-	cmp		%g1, ASI_AIUS;			\
-	bne,pn		%icc, raw_copy_in_user;		\
-	 nop
-#endif
-
-#include "NGmemcpy.S"
diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S
deleted file mode 100644
index 8e4d22a6ba0b2131aa161c8626f1490998767883..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NGmemcpy.S
+++ /dev/null
@@ -1,510 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* NGmemcpy.S: Niagara optimized memcpy.
- *
- * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
- */
-
-#ifdef __KERNEL__
-#include <linux/linkage.h>
-#include <asm/asi.h>
-#include <asm/thread_info.h>
-#define GLOBAL_SPARE	%g7
-#define RESTORE_ASI(TMP)	\
-	ldub	[%g6 + TI_CURRENT_DS], TMP;  \
-	wr	TMP, 0x0, %asi;
-#else
-#define GLOBAL_SPARE	%g5
-#define RESTORE_ASI(TMP)	\
-	wr	%g0, ASI_PNF, %asi
-#endif
-
-#ifdef __sparc_v9__
-#define SAVE_AMOUNT	128
-#else
-#define SAVE_AMOUNT	64
-#endif
-
-#ifndef STORE_ASI
-#define STORE_ASI	ASI_BLK_INIT_QUAD_LDD_P
-#endif
-
-#ifndef EX_LD
-#define EX_LD(x,y)	x
-#endif
-
-#ifndef EX_ST
-#define EX_ST(x,y)	x
-#endif
-
-#ifndef LOAD
-#ifndef MEMCPY_DEBUG
-#define LOAD(type,addr,dest)	type [addr], dest
-#else
-#define LOAD(type,addr,dest)	type##a [addr] 0x80, dest
-#endif
-#endif
-
-#ifndef LOAD_TWIN
-#define LOAD_TWIN(addr_reg,dest0,dest1)	\
-	ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_P, dest0
-#endif
-
-#ifndef STORE
-#define STORE(type,src,addr)	type src, [addr]
-#endif
-
-#ifndef STORE_INIT
-#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
-#define STORE_INIT(src,addr)	stxa src, [addr] %asi
-#else
-#define STORE_INIT(src,addr)	stx src, [addr + 0x00]
-#endif
-#endif
-
-#ifndef FUNC_NAME
-#define FUNC_NAME	NGmemcpy
-#endif
-
-#ifndef PREAMBLE
-#define PREAMBLE
-#endif
-
-#ifndef XCC
-#define XCC xcc
-#endif
-
-	.register	%g2,#scratch
-	.register	%g3,#scratch
-
-	.text
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)	x
-__restore_asi:
-	ret
-	wr	%g0, ASI_AIUS, %asi
-	 restore
-ENTRY(NG_ret_i2_plus_i4_plus_1)
-	ba,pt	%xcc, __restore_asi
-	 add	%i2, %i5, %i0
-ENDPROC(NG_ret_i2_plus_i4_plus_1)
-ENTRY(NG_ret_i2_plus_g1)
-	ba,pt	%xcc, __restore_asi
-	 add	%i2, %g1, %i0
-ENDPROC(NG_ret_i2_plus_g1)
-ENTRY(NG_ret_i2_plus_g1_minus_8)
-	sub	%g1, 8, %g1
-	ba,pt	%xcc, __restore_asi
-	 add	%i2, %g1, %i0
-ENDPROC(NG_ret_i2_plus_g1_minus_8)
-ENTRY(NG_ret_i2_plus_g1_minus_16)
-	sub	%g1, 16, %g1
-	ba,pt	%xcc, __restore_asi
-	 add	%i2, %g1, %i0
-ENDPROC(NG_ret_i2_plus_g1_minus_16)
-ENTRY(NG_ret_i2_plus_g1_minus_24)
-	sub	%g1, 24, %g1
-	ba,pt	%xcc, __restore_asi
-	 add	%i2, %g1, %i0
-ENDPROC(NG_ret_i2_plus_g1_minus_24)
-ENTRY(NG_ret_i2_plus_g1_minus_32)
-	sub	%g1, 32, %g1
-	ba,pt	%xcc, __restore_asi
-	 add	%i2, %g1, %i0
-ENDPROC(NG_ret_i2_plus_g1_minus_32)
-ENTRY(NG_ret_i2_plus_g1_minus_40)
-	sub	%g1, 40, %g1
-	ba,pt	%xcc, __restore_asi
-	 add	%i2, %g1, %i0
-ENDPROC(NG_ret_i2_plus_g1_minus_40)
-ENTRY(NG_ret_i2_plus_g1_minus_48)
-	sub	%g1, 48, %g1
-	ba,pt	%xcc, __restore_asi
-	 add	%i2, %g1, %i0
-ENDPROC(NG_ret_i2_plus_g1_minus_48)
-ENTRY(NG_ret_i2_plus_g1_minus_56)
-	sub	%g1, 56, %g1
-	ba,pt	%xcc, __restore_asi
-	 add	%i2, %g1, %i0
-ENDPROC(NG_ret_i2_plus_g1_minus_56)
-ENTRY(NG_ret_i2_plus_i4)
-	ba,pt	%xcc, __restore_asi
-	 add	%i2, %i4, %i0
-ENDPROC(NG_ret_i2_plus_i4)
-ENTRY(NG_ret_i2_plus_i4_minus_8)
-	sub	%i4, 8, %i4
-	ba,pt	%xcc, __restore_asi
-	 add	%i2, %i4, %i0
-ENDPROC(NG_ret_i2_plus_i4_minus_8)
-ENTRY(NG_ret_i2_plus_8)
-	ba,pt	%xcc, __restore_asi
-	 add	%i2, 8, %i0
-ENDPROC(NG_ret_i2_plus_8)
-ENTRY(NG_ret_i2_plus_4)
-	ba,pt	%xcc, __restore_asi
-	 add	%i2, 4, %i0
-ENDPROC(NG_ret_i2_plus_4)
-ENTRY(NG_ret_i2_plus_1)
-	ba,pt	%xcc, __restore_asi
-	 add	%i2, 1, %i0
-ENDPROC(NG_ret_i2_plus_1)
-ENTRY(NG_ret_i2_plus_g1_plus_1)
-	add	%g1, 1, %g1
-	ba,pt	%xcc, __restore_asi
-	 add	%i2, %g1, %i0
-ENDPROC(NG_ret_i2_plus_g1_plus_1)
-ENTRY(NG_ret_i2)
-	ba,pt	%xcc, __restore_asi
-	 mov	%i2, %i0
-ENDPROC(NG_ret_i2)
-ENTRY(NG_ret_i2_and_7_plus_i4)
-	and	%i2, 7, %i2
-	ba,pt	%xcc, __restore_asi
-	 add	%i2, %i4, %i0
-ENDPROC(NG_ret_i2_and_7_plus_i4)
-#endif
-
-	.align		64
-
-	.globl	FUNC_NAME
-	.type	FUNC_NAME,#function
-FUNC_NAME:	/* %i0=dst, %i1=src, %i2=len */
-	PREAMBLE
-	save		%sp, -SAVE_AMOUNT, %sp
-	srlx		%i2, 31, %g2
-	cmp		%g2, 0
-	tne		%xcc, 5
-	mov		%i0, %o0
-	cmp		%i2, 0
-	be,pn		%XCC, 85f
-	 or		%o0, %i1, %i3
-	cmp		%i2, 16
-	blu,a,pn	%XCC, 80f
-	 or		%i3, %i2, %i3
-
-	/* 2 blocks (128 bytes) is the minimum we can do the block
-	 * copy with.  We need to ensure that we'll iterate at least
-	 * once in the block copy loop.  At worst we'll need to align
-	 * the destination to a 64-byte boundary which can chew up
-	 * to (64 - 1) bytes from the length before we perform the
-	 * block copy loop.
-	 */
-	cmp		%i2, (2 * 64)
-	blu,pt		%XCC, 70f
-	 andcc		%i3, 0x7, %g0
-
-	/* %o0:	dst
-	 * %i1:	src
-	 * %i2:	len  (known to be >= 128)
-	 *
-	 * The block copy loops will use %i4/%i5,%g2/%g3 as
-	 * temporaries while copying the data.
-	 */
-
-	LOAD(prefetch, %i1, #one_read)
-	wr		%g0, STORE_ASI, %asi
-
-	/* Align destination on 64-byte boundary.  */
-	andcc		%o0, (64 - 1), %i4
-	be,pt		%XCC, 2f
-	 sub		%i4, 64, %i4
-	sub		%g0, %i4, %i4	! bytes to align dst
-	sub		%i2, %i4, %i2
-1:	subcc		%i4, 1, %i4
-	EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1)
-	EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1)
-	add		%i1, 1, %i1
-	bne,pt		%XCC, 1b
-	add		%o0, 1, %o0
-
-	/* If the source is on a 16-byte boundary we can do
-	 * the direct block copy loop.  If it is 8-byte aligned
-	 * we can do the 16-byte loads offset by -8 bytes and the
-	 * init stores offset by one register.
-	 *
-	 * If the source is not even 8-byte aligned, we need to do
-	 * shifting and masking (basically integer faligndata).
-	 *
-	 * The careful bit with init stores is that if we store
-	 * to any part of the cache line we have to store the whole
-	 * cacheline else we can end up with corrupt L2 cache line
-	 * contents.  Since the loop works on 64-bytes of 64-byte
-	 * aligned store data at a time, this is easy to ensure.
-	 */
-2:
-	andcc		%i1, (16 - 1), %i4
-	andn		%i2, (64 - 1), %g1	! block copy loop iterator
-	be,pt		%XCC, 50f
-	 sub		%i2, %g1, %i2		! final sub-block copy bytes
-
-	cmp		%i4, 8
-	be,pt		%XCC, 10f
-	 sub		%i1, %i4, %i1
-
-	/* Neither 8-byte nor 16-byte aligned, shift and mask.  */
-	and		%i4, 0x7, GLOBAL_SPARE
-	sll		GLOBAL_SPARE, 3, GLOBAL_SPARE
-	mov		64, %i5
-	EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1)
-	sub		%i5, GLOBAL_SPARE, %i5
-	mov		16, %o4
-	mov		32, %o5
-	mov		48, %o7
-	mov		64, %i3
-
-	bg,pn	   	%XCC, 9f
-	 nop
-
-#define MIX_THREE_WORDS(WORD1, WORD2, WORD3, PRE_SHIFT, POST_SHIFT, TMP) \
-	sllx		WORD1, POST_SHIFT, WORD1; \
-	srlx		WORD2, PRE_SHIFT, TMP; \
-	sllx		WORD2, POST_SHIFT, WORD2; \
-	or		WORD1, TMP, WORD1; \
-	srlx		WORD3, PRE_SHIFT, TMP; \
-	or		WORD2, TMP, WORD2;
-
-8:	EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
-	MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
-	LOAD(prefetch, %i1 + %i3, #one_read)
-
-	EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1)
-	EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
-
-	EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
-	MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
-
-	EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
-	EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
-
-	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
-	MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
-
-	EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
-	EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
-
-	EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
-	add		%i1, 64, %i1
-	MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
-
-	EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
-	EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
-
-	subcc		%g1, 64, %g1
-	bne,pt		%XCC, 8b
-	 add		%o0, 64, %o0
-
-	ba,pt		%XCC, 60f
-	 add		%i1, %i4, %i1
-
-9:	EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
-	MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
-	LOAD(prefetch, %i1 + %i3, #one_read)
-
-	EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1)
-	EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
-
-	EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
-	MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
-
-	EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
-	EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
-
-	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
-	MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
-
-	EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
-	EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
-
-	EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
-	add		%i1, 64, %i1
-	MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
-
-	EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
-	EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
-
-	subcc		%g1, 64, %g1
-	bne,pt		%XCC, 9b
-	 add		%o0, 64, %o0
-
-	ba,pt		%XCC, 60f
-	 add		%i1, %i4, %i1
-
-10:	/* Destination is 64-byte aligned, source was only 8-byte
-	 * aligned but it has been subtracted by 8 and we perform
-	 * one twin load ahead, then add 8 back into source when
-	 * we finish the loop.
-	 */
-	EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1)
-	mov	16, %o7
-	mov	32, %g2
-	mov	48, %g3
-	mov	64, %o1
-1:	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
-	LOAD(prefetch, %i1 + %o1, #one_read)
-	EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1)	! initializes cache line
-	EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
-	EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
-	EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
-	EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
-	EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
-	EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
-	EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
-	EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48)
-	add		%i1, 64, %i1
-	EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
-	EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
-	subcc		%g1, 64, %g1
-	bne,pt		%XCC, 1b
-	 add		%o0, 64, %o0
-
-	ba,pt		%XCC, 60f
-	 add		%i1, 0x8, %i1
-
-50:	/* Destination is 64-byte aligned, and source is 16-byte
-	 * aligned.
-	 */
-	mov	16, %o7
-	mov	32, %g2
-	mov	48, %g3
-	mov	64, %o1
-1:	EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1)
-	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
-	LOAD(prefetch, %i1 + %o1, #one_read)
-	EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1)	! initializes cache line
-	EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
-	EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
-	EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
-	EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
-	EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
-	add	%i1, 64, %i1
-	EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
-	EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
-	EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
-	EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
-	subcc	%g1, 64, %g1
-	bne,pt	%XCC, 1b
-	 add	%o0, 64, %o0
-	/* fall through */
-
-60:	
-	membar		#Sync
-
-	/* %i2 contains any final bytes still needed to be copied
-	 * over. If anything is left, we copy it one byte at a time.
-	 */
-	RESTORE_ASI(%i3)
-	brz,pt		%i2, 85f
-	 sub		%o0, %i1, %i3
-	ba,a,pt		%XCC, 90f
-	 nop
-
-	.align		64
-70: /* 16 < len <= 64 */
-	bne,pn		%XCC, 75f
-	 sub		%o0, %i1, %i3
-
-72:
-	andn		%i2, 0xf, %i4
-	and		%i2, 0xf, %i2
-1:	subcc		%i4, 0x10, %i4
-	EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4)
-	add		%i1, 0x08, %i1
-	EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4)
-	sub		%i1, 0x08, %i1
-	EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4)
-	add		%i1, 0x8, %i1
-	EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8)
-	bgu,pt		%XCC, 1b
-	 add		%i1, 0x8, %i1
-73:	andcc		%i2, 0x8, %g0
-	be,pt		%XCC, 1f
-	 nop
-	sub		%i2, 0x8, %i2
-	EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8)
-	EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8)
-	add		%i1, 0x8, %i1
-1:	andcc		%i2, 0x4, %g0
-	be,pt		%XCC, 1f
-	 nop
-	sub		%i2, 0x4, %i2
-	EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4)
-	EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4)
-	add		%i1, 0x4, %i1
-1:	cmp		%i2, 0
-	be,pt		%XCC, 85f
-	 nop
-	ba,pt		%xcc, 90f
-	 nop
-
-75:
-	andcc		%o0, 0x7, %g1
-	sub		%g1, 0x8, %g1
-	be,pn		%icc, 2f
-	 sub		%g0, %g1, %g1
-	sub		%i2, %g1, %i2
-
-1:	subcc		%g1, 1, %g1
-	EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1)
-	EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1)
-	bgu,pt		%icc, 1b
-	 add		%i1, 1, %i1
-
-2:	add		%i1, %i3, %o0
-	andcc		%i1, 0x7, %g1
-	bne,pt		%icc, 8f
-	 sll		%g1, 3, %g1
-
-	cmp		%i2, 16
-	bgeu,pt		%icc, 72b
-	 nop
-	ba,a,pt		%xcc, 73b
-
-8:	mov		64, %i3
-	andn		%i1, 0x7, %i1
-	EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2)
-	sub		%i3, %g1, %i3
-	andn		%i2, 0x7, %i4
-	sllx		%g2, %g1, %g2
-1:	add		%i1, 0x8, %i1
-	EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4)
-	subcc		%i4, 0x8, %i4
-	srlx		%g3, %i3, %i5
-	or		%i5, %g2, %i5
-	EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4)
-	add		%o0, 0x8, %o0
-	bgu,pt		%icc, 1b
-	 sllx		%g3, %g1, %g2
-
-	srl		%g1, 3, %g1
-	andcc		%i2, 0x7, %i2
-	be,pn		%icc, 85f
-	 add		%i1, %g1, %i1
-	ba,pt		%xcc, 90f
-	 sub		%o0, %i1, %i3
-
-	.align		64
-80: /* 0 < len <= 16 */
-	andcc		%i3, 0x3, %g0
-	bne,pn		%XCC, 90f
-	 sub		%o0, %i1, %i3
-
-1:
-	subcc		%i2, 4, %i2
-	EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4)
-	EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4)
-	bgu,pt		%XCC, 1b
-	 add		%i1, 4, %i1
-
-85:	ret
-	 restore	EX_RETVAL(%i0), %g0, %o0
-
-	.align		32
-90:
-	subcc		%i2, 1, %i2
-	EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1)
-	EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1)
-	bgu,pt		%XCC, 90b
-	 add		%i1, 1, %i1
-	ret
-	 restore	EX_RETVAL(%i0), %g0, %o0
-
-	.size		FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/NGpage.S b/arch/sparc/lib/NGpage.S
deleted file mode 100644
index 88fec781806587980c2fb855cf447c28bcc52942..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NGpage.S
+++ /dev/null
@@ -1,138 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* NGpage.S: Niagara optimize clear and copy page.
- *
- * Copyright (C) 2006 (davem@davemloft.net)
- */
-
-#include <asm/asi.h>
-#include <asm/page.h>
-
-	.text
-	.align	32
-
-	/* This is heavily simplified from the sun4u variants
-	 * because Niagara does not have any D-cache aliasing issues
-	 * and also we don't need to use the FPU in order to implement
-	 * an optimal page copy/clear.
-	 */
-
-NGcopy_user_page:	/* %o0=dest, %o1=src, %o2=vaddr */
-	save		%sp, -192, %sp
-	rd		%asi, %g3
-	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
-	set		PAGE_SIZE, %g7
-	prefetch	[%i1 + 0x00], #one_read
-	prefetch	[%i1 + 0x40], #one_read
-
-1:	prefetch	[%i1 + 0x80], #one_read
-	prefetch	[%i1 + 0xc0], #one_read
-	ldda		[%i1 + 0x00] %asi, %o2
-	ldda		[%i1 + 0x10] %asi, %o4
-	ldda		[%i1 + 0x20] %asi, %l2
-	ldda		[%i1 + 0x30] %asi, %l4
-	stxa		%o2, [%i0 + 0x00] %asi
-	stxa		%o3, [%i0 + 0x08] %asi
-	stxa		%o4, [%i0 + 0x10] %asi
-	stxa		%o5, [%i0 + 0x18] %asi
-	stxa		%l2, [%i0 + 0x20] %asi
-	stxa		%l3, [%i0 + 0x28] %asi
-	stxa		%l4, [%i0 + 0x30] %asi
-	stxa		%l5, [%i0 + 0x38] %asi
-	ldda		[%i1 + 0x40] %asi, %o2
-	ldda		[%i1 + 0x50] %asi, %o4
-	ldda		[%i1 + 0x60] %asi, %l2
-	ldda		[%i1 + 0x70] %asi, %l4
-	stxa		%o2, [%i0 + 0x40] %asi
-	stxa		%o3, [%i0 + 0x48] %asi
-	stxa		%o4, [%i0 + 0x50] %asi
-	stxa		%o5, [%i0 + 0x58] %asi
-	stxa		%l2, [%i0 + 0x60] %asi
-	stxa		%l3, [%i0 + 0x68] %asi
-	stxa		%l4, [%i0 + 0x70] %asi
-	stxa		%l5, [%i0 + 0x78] %asi
-	add		%i1, 128, %i1
-	subcc		%g7, 128, %g7
-	bne,pt		%xcc, 1b
-	 add		%i0, 128, %i0
-	wr		%g3, 0x0, %asi
-	membar		#Sync
-	ret
-	 restore
-
-	.align		32
-	.globl		NGclear_page
-	.globl		NGclear_user_page
-NGclear_page:		/* %o0=dest */
-NGclear_user_page:	/* %o0=dest, %o1=vaddr */
-	rd		%asi, %g3
-	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
-	set		PAGE_SIZE, %g7
-
-1:	stxa		%g0, [%o0 + 0x00] %asi
-	stxa		%g0, [%o0 + 0x08] %asi
-	stxa		%g0, [%o0 + 0x10] %asi
-	stxa		%g0, [%o0 + 0x18] %asi
-	stxa		%g0, [%o0 + 0x20] %asi
-	stxa		%g0, [%o0 + 0x28] %asi
-	stxa		%g0, [%o0 + 0x30] %asi
-	stxa		%g0, [%o0 + 0x38] %asi
-	stxa		%g0, [%o0 + 0x40] %asi
-	stxa		%g0, [%o0 + 0x48] %asi
-	stxa		%g0, [%o0 + 0x50] %asi
-	stxa		%g0, [%o0 + 0x58] %asi
-	stxa		%g0, [%o0 + 0x60] %asi
-	stxa		%g0, [%o0 + 0x68] %asi
-	stxa		%g0, [%o0 + 0x70] %asi
-	stxa		%g0, [%o0 + 0x78] %asi
-	stxa		%g0, [%o0 + 0x80] %asi
-	stxa		%g0, [%o0 + 0x88] %asi
-	stxa		%g0, [%o0 + 0x90] %asi
-	stxa		%g0, [%o0 + 0x98] %asi
-	stxa		%g0, [%o0 + 0xa0] %asi
-	stxa		%g0, [%o0 + 0xa8] %asi
-	stxa		%g0, [%o0 + 0xb0] %asi
-	stxa		%g0, [%o0 + 0xb8] %asi
-	stxa		%g0, [%o0 + 0xc0] %asi
-	stxa		%g0, [%o0 + 0xc8] %asi
-	stxa		%g0, [%o0 + 0xd0] %asi
-	stxa		%g0, [%o0 + 0xd8] %asi
-	stxa		%g0, [%o0 + 0xe0] %asi
-	stxa		%g0, [%o0 + 0xe8] %asi
-	stxa		%g0, [%o0 + 0xf0] %asi
-	stxa		%g0, [%o0 + 0xf8] %asi
-	subcc		%g7, 256, %g7
-	bne,pt		%xcc, 1b
-	 add		%o0, 256, %o0
-	wr		%g3, 0x0, %asi
-	membar		#Sync
-	retl
-	 nop
-
-#define BRANCH_ALWAYS	0x10680000
-#define NOP		0x01000000
-#define NG_DO_PATCH(OLD, NEW)	\
-	sethi	%hi(NEW), %g1; \
-	or	%g1, %lo(NEW), %g1; \
-	sethi	%hi(OLD), %g2; \
-	or	%g2, %lo(OLD), %g2; \
-	sub	%g1, %g2, %g1; \
-	sethi	%hi(BRANCH_ALWAYS), %g3; \
-	sll	%g1, 11, %g1; \
-	srl	%g1, 11 + 2, %g1; \
-	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
-	or	%g3, %g1, %g3; \
-	stw	%g3, [%g2]; \
-	sethi	%hi(NOP), %g3; \
-	or	%g3, %lo(NOP), %g3; \
-	stw	%g3, [%g2 + 0x4]; \
-	flush	%g2;
-
-	.globl	niagara_patch_pageops
-	.type	niagara_patch_pageops,#function
-niagara_patch_pageops:
-	NG_DO_PATCH(copy_user_page, NGcopy_user_page)
-	NG_DO_PATCH(_clear_page, NGclear_page)
-	NG_DO_PATCH(clear_user_page, NGclear_user_page)
-	retl
-	 nop
-	.size	niagara_patch_pageops,.-niagara_patch_pageops
diff --git a/arch/sparc/lib/NGpatch.S b/arch/sparc/lib/NGpatch.S
deleted file mode 100644
index e9f843f1063ede7fca60fcb901afe5c13768b689..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/NGpatch.S
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* NGpatch.S: Patch Ultra-I routines with Niagara variant.
- *
- * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
- */
-
-#define BRANCH_ALWAYS	0x10680000
-#define NOP		0x01000000
-#define NG_DO_PATCH(OLD, NEW)	\
-	sethi	%hi(NEW), %g1; \
-	or	%g1, %lo(NEW), %g1; \
-	sethi	%hi(OLD), %g2; \
-	or	%g2, %lo(OLD), %g2; \
-	sub	%g1, %g2, %g1; \
-	sethi	%hi(BRANCH_ALWAYS), %g3; \
-	sll	%g1, 11, %g1; \
-	srl	%g1, 11 + 2, %g1; \
-	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
-	or	%g3, %g1, %g3; \
-	stw	%g3, [%g2]; \
-	sethi	%hi(NOP), %g3; \
-	or	%g3, %lo(NOP), %g3; \
-	stw	%g3, [%g2 + 0x4]; \
-	flush	%g2;
-
-	.globl	niagara_patch_copyops
-	.type	niagara_patch_copyops,#function
-niagara_patch_copyops:
-	NG_DO_PATCH(memcpy, NGmemcpy)
-	NG_DO_PATCH(raw_copy_from_user, NGcopy_from_user)
-	NG_DO_PATCH(raw_copy_to_user, NGcopy_to_user)
-	retl
-	 nop
-	.size	niagara_patch_copyops,.-niagara_patch_copyops
diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S
deleted file mode 100644
index bf08d1c78836eaf13bf8edd971ce00274a8d21c1..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/U1copy_from_user.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* U1copy_from_user.S: UltraSparc-I/II/IIi/IIe optimized copy from userspace.
- *
- * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
- */
-
-#define EX_LD(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y;		\
-	.text;			\
-	.align 4;
-
-#define EX_LD_FP(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y;		\
-	.text;			\
-	.align 4;
-
-#define FUNC_NAME		raw_copy_from_user
-#define LOAD(type,addr,dest)	type##a [addr] %asi, dest
-#define LOAD_BLK(addr,dest)	ldda [addr] ASI_BLK_AIUS, dest
-#define EX_RETVAL(x)		0
-
-	/* Writing to %asi is _expensive_ so we hardcode it.
-	 * Reading %asi to check for KERNEL_DS is comparatively
-	 * cheap.
-	 */
-#define PREAMBLE					\
-	rd		%asi, %g1;			\
-	cmp		%g1, ASI_AIUS;			\
-	bne,pn		%icc, raw_copy_in_user;		\
-	 nop;						\
-
-#include "U1memcpy.S"
diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S
deleted file mode 100644
index 15169851e7ab2e7b10d56865a4210de12bbd464e..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/U1copy_to_user.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* U1copy_to_user.S: UltraSparc-I/II/IIi/IIe optimized copy to userspace.
- *
- * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
- */
-
-#define EX_ST(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y;		\
-	.text;			\
-	.align 4;
-
-#define EX_ST_FP(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y;		\
-	.text;			\
-	.align 4;
-
-#define FUNC_NAME		raw_copy_to_user
-#define STORE(type,src,addr)	type##a src, [addr] ASI_AIUS
-#define STORE_BLK(src,addr)	stda src, [addr] ASI_BLK_AIUS
-#define EX_RETVAL(x)		0
-
-	/* Writing to %asi is _expensive_ so we hardcode it.
-	 * Reading %asi to check for KERNEL_DS is comparatively
-	 * cheap.
-	 */
-#define PREAMBLE					\
-	rd		%asi, %g1;			\
-	cmp		%g1, ASI_AIUS;			\
-	bne,pn		%icc, raw_copy_in_user;		\
-	 nop;						\
-
-#include "U1memcpy.S"
diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S
deleted file mode 100644
index a6f4ee3918977c35ff9744effb7701702b318b5d..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/U1memcpy.S
+++ /dev/null
@@ -1,685 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* U1memcpy.S: UltraSPARC-I/II/IIi/IIe optimized memcpy.
- *
- * Copyright (C) 1997, 2004 David S. Miller (davem@redhat.com)
- * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
- */
-
-#ifdef __KERNEL__
-#include <linux/linkage.h>
-#include <asm/visasm.h>
-#include <asm/asi.h>
-#include <asm/export.h>
-#define GLOBAL_SPARE	g7
-#else
-#define GLOBAL_SPARE	g5
-#define ASI_BLK_P 0xf0
-#define FPRS_FEF  0x04
-#ifdef MEMCPY_DEBUG
-#define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \
-		 clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0;
-#define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
-#else
-#define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
-#define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
-#endif
-#endif
-
-#ifndef EX_LD
-#define EX_LD(x,y)	x
-#endif
-#ifndef EX_LD_FP
-#define EX_LD_FP(x,y)	x
-#endif
-
-#ifndef EX_ST
-#define EX_ST(x,y)	x
-#endif
-#ifndef EX_ST_FP
-#define EX_ST_FP(x,y)	x
-#endif
-
-#ifndef LOAD
-#define LOAD(type,addr,dest)	type [addr], dest
-#endif
-
-#ifndef LOAD_BLK
-#define LOAD_BLK(addr,dest)	ldda [addr] ASI_BLK_P, dest
-#endif
-
-#ifndef STORE
-#define STORE(type,src,addr)	type src, [addr]
-#endif
-
-#ifndef STORE_BLK
-#define STORE_BLK(src,addr)	stda src, [addr] ASI_BLK_P
-#endif
-
-#ifndef FUNC_NAME
-#define FUNC_NAME	memcpy
-#endif
-
-#ifndef PREAMBLE
-#define PREAMBLE
-#endif
-
-#ifndef XCC
-#define XCC xcc
-#endif
-
-#define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9)		\
-	faligndata		%f1, %f2, %f48;			\
-	faligndata		%f2, %f3, %f50;			\
-	faligndata		%f3, %f4, %f52;			\
-	faligndata		%f4, %f5, %f54;			\
-	faligndata		%f5, %f6, %f56;			\
-	faligndata		%f6, %f7, %f58;			\
-	faligndata		%f7, %f8, %f60;			\
-	faligndata		%f8, %f9, %f62;
-
-#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt)			\
-	EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp);			\
-	EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp);			\
-	add			%src, 0x40, %src;			\
-	subcc			%GLOBAL_SPARE, 0x40, %GLOBAL_SPARE;	\
-	be,pn			%xcc, jmptgt;				\
-	 add			%dest, 0x40, %dest;			\
-
-#define LOOP_CHUNK1(src, dest, branch_dest)		\
-	MAIN_LOOP_CHUNK(src, dest, f0,  f48, branch_dest)
-#define LOOP_CHUNK2(src, dest, branch_dest)		\
-	MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest)
-#define LOOP_CHUNK3(src, dest, branch_dest)		\
-	MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest)
-
-#define DO_SYNC			membar	#Sync;
-#define STORE_SYNC(dest, fsrc)				\
-	EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp);	\
-	add			%dest, 0x40, %dest;	\
-	DO_SYNC
-
-#define STORE_JUMP(dest, fsrc, target)			\
-	EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp);	\
-	add			%dest, 0x40, %dest;	\
-	ba,pt			%xcc, target;		\
-	 nop;
-
-#define FINISH_VISCHUNK(dest, f0, f1)			\
-	subcc			%g3, 8, %g3;		\
-	bl,pn			%xcc, 95f;		\
-	 faligndata		%f0, %f1, %f48;		\
-	EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp);	\
-	add			%dest, 8, %dest;
-
-#define UNEVEN_VISCHUNK_LAST(dest, f0, f1)	\
-	subcc			%g3, 8, %g3;	\
-	bl,pn			%xcc, 95f;	\
-	 fsrc2			%f0, %f1;
-
-#define UNEVEN_VISCHUNK(dest, f0, f1)		\
-	UNEVEN_VISCHUNK_LAST(dest, f0, f1)	\
-	ba,a,pt			%xcc, 93f;
-
-	.register	%g2,#scratch
-	.register	%g3,#scratch
-
-	.text
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)	x
-ENTRY(U1_g1_1_fp)
-	VISExitHalf
-	add		%g1, 1, %g1
-	add		%g1, %g2, %g1
-	retl
-	 add		%g1, %o2, %o0
-ENDPROC(U1_g1_1_fp)
-ENTRY(U1_g2_0_fp)
-	VISExitHalf
-	retl
-	 add		%g2, %o2, %o0
-ENDPROC(U1_g2_0_fp)
-ENTRY(U1_g2_8_fp)
-	VISExitHalf
-	add		%g2, 8, %g2
-	retl
-	 add		%g2, %o2, %o0
-ENDPROC(U1_g2_8_fp)
-ENTRY(U1_gs_0_fp)
-	VISExitHalf
-	add		%GLOBAL_SPARE, %g3, %o0
-	retl
-	 add		%o0, %o2, %o0
-ENDPROC(U1_gs_0_fp)
-ENTRY(U1_gs_80_fp)
-	VISExitHalf
-	add		%GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
-	add		%GLOBAL_SPARE, %g3, %o0
-	retl
-	 add		%o0, %o2, %o0
-ENDPROC(U1_gs_80_fp)
-ENTRY(U1_gs_40_fp)
-	VISExitHalf
-	add		%GLOBAL_SPARE, 0x40, %GLOBAL_SPARE
-	add		%GLOBAL_SPARE, %g3, %o0
-	retl
-	 add		%o0, %o2, %o0
-ENDPROC(U1_gs_40_fp)
-ENTRY(U1_g3_0_fp)
-	VISExitHalf
-	retl
-	 add		%g3, %o2, %o0
-ENDPROC(U1_g3_0_fp)
-ENTRY(U1_g3_8_fp)
-	VISExitHalf
-	add		%g3, 8, %g3
-	retl
-	 add		%g3, %o2, %o0
-ENDPROC(U1_g3_8_fp)
-ENTRY(U1_o2_0_fp)
-	VISExitHalf
-	retl
-	 mov		%o2, %o0
-ENDPROC(U1_o2_0_fp)
-ENTRY(U1_o2_1_fp)
-	VISExitHalf
-	retl
-	 add		%o2, 1, %o0
-ENDPROC(U1_o2_1_fp)
-ENTRY(U1_gs_0)
-	VISExitHalf
-	retl
-	 add		%GLOBAL_SPARE, %o2, %o0
-ENDPROC(U1_gs_0)
-ENTRY(U1_gs_8)
-	VISExitHalf
-	add		%GLOBAL_SPARE, %o2, %GLOBAL_SPARE
-	retl
-	 add		%GLOBAL_SPARE, 0x8, %o0
-ENDPROC(U1_gs_8)
-ENTRY(U1_gs_10)
-	VISExitHalf
-	add		%GLOBAL_SPARE, %o2, %GLOBAL_SPARE
-	retl
-	 add		%GLOBAL_SPARE, 0x10, %o0
-ENDPROC(U1_gs_10)
-ENTRY(U1_o2_0)
-	retl
-	 mov		%o2, %o0
-ENDPROC(U1_o2_0)
-ENTRY(U1_o2_8)
-	retl
-	 add		%o2, 8, %o0
-ENDPROC(U1_o2_8)
-ENTRY(U1_o2_4)
-	retl
-	 add		%o2, 4, %o0
-ENDPROC(U1_o2_4)
-ENTRY(U1_o2_1)
-	retl
-	 add		%o2, 1, %o0
-ENDPROC(U1_o2_1)
-ENTRY(U1_g1_0)
-	retl
-	 add		%g1, %o2, %o0
-ENDPROC(U1_g1_0)
-ENTRY(U1_g1_1)
-	add		%g1, 1, %g1
-	retl
-	 add		%g1, %o2, %o0
-ENDPROC(U1_g1_1)
-ENTRY(U1_gs_0_o2_adj)
-	and		%o2, 7, %o2
-	retl
-	 add		%GLOBAL_SPARE, %o2, %o0
-ENDPROC(U1_gs_0_o2_adj)
-ENTRY(U1_gs_8_o2_adj)
-	and		%o2, 7, %o2
-	add		%GLOBAL_SPARE, 8, %GLOBAL_SPARE
-	retl
-	 add		%GLOBAL_SPARE, %o2, %o0
-ENDPROC(U1_gs_8_o2_adj)
-#endif
-
-	.align		64
-
-	.globl		FUNC_NAME
-	.type		FUNC_NAME,#function
-FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
-	srlx		%o2, 31, %g2
-	cmp		%g2, 0
-	tne		%xcc, 5
-	PREAMBLE
-	mov		%o0, %o4
-	cmp		%o2, 0
-	be,pn		%XCC, 85f
-	 or		%o0, %o1, %o3
-	cmp		%o2, 16
-	blu,a,pn	%XCC, 80f
-	 or		%o3, %o2, %o3
-
-	cmp		%o2, (5 * 64)
-	blu,pt		%XCC, 70f
-	 andcc		%o3, 0x7, %g0
-
-	/* Clobbers o5/g1/g2/g3/g7/icc/xcc.  */
-	VISEntry
-
-	/* Is 'dst' already aligned on an 64-byte boundary? */
-	andcc		%o0, 0x3f, %g2
-	be,pt		%XCC, 2f
-
-	/* Compute abs((dst & 0x3f) - 0x40) into %g2.  This is the number
-	 * of bytes to copy to make 'dst' 64-byte aligned.  We pre-
-	 * subtract this from 'len'.
-	 */
-	 sub		%o0, %o1, %GLOBAL_SPARE
-	sub		%g2, 0x40, %g2
-	sub		%g0, %g2, %g2
-	sub		%o2, %g2, %o2
-	andcc		%g2, 0x7, %g1
-	be,pt		%icc, 2f
-	 and		%g2, 0x38, %g2
-
-1:	subcc		%g1, 0x1, %g1
-	EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp)
-	EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp)
-	bgu,pt		%XCC, 1b
-	 add		%o1, 0x1, %o1
-
-	add		%o1, %GLOBAL_SPARE, %o0
-
-2:	cmp		%g2, 0x0
-	and		%o1, 0x7, %g1
-	be,pt		%icc, 3f
-	 alignaddr	%o1, %g0, %o1
-
-	EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp)
-1:	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp)
-	add		%o1, 0x8, %o1
-	subcc		%g2, 0x8, %g2
-	faligndata	%f4, %f6, %f0
-	EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
-	be,pn		%icc, 3f
-	 add		%o0, 0x8, %o0
-
-	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp)
-	add		%o1, 0x8, %o1
-	subcc		%g2, 0x8, %g2
-	faligndata	%f6, %f4, %f0
-	EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
-	bne,pt		%icc, 1b
-	 add		%o0, 0x8, %o0
-
-	/* Destination is 64-byte aligned.  */
-3:	
-	membar		  #LoadStore | #StoreStore | #StoreLoad
-
-	subcc		%o2, 0x40, %GLOBAL_SPARE
-	add		%o1, %g1, %g1
-	andncc		%GLOBAL_SPARE, (0x40 - 1), %GLOBAL_SPARE
-	srl		%g1, 3, %g2
-	sub		%o2, %GLOBAL_SPARE, %g3
-	andn		%o1, (0x40 - 1), %o1
-	and		%g2, 7, %g2
-	andncc		%g3, 0x7, %g3
-	fsrc2		%f0, %f2
-	sub		%g3, 0x8, %g3
-	sub		%o2, %GLOBAL_SPARE, %o2
-
-	add		%g1, %GLOBAL_SPARE, %g1
-	subcc		%o2, %g3, %o2
-
-	EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp)
-	add		%o1, 0x40, %o1
-	add		%g1, %g3, %g1
-	EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp)
-	add		%o1, 0x40, %o1
-	sub		%GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
-	EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp)
-	add		%o1, 0x40, %o1
-
-	/* There are 8 instances of the unrolled loop,
-	 * one for each possible alignment of the
-	 * source buffer.  Each loop instance is 452
-	 * bytes.
-	 */
-	sll		%g2, 3, %o3
-	sub		%o3, %g2, %o3
-	sllx		%o3, 4, %o3
-	add		%o3, %g2, %o3
-	sllx		%o3, 2, %g2
-1:	rd		%pc, %o3
-	add		%o3, %lo(1f - 1b), %o3
-	jmpl		%o3 + %g2, %g0
-	 nop
-
-	.align		64
-1:	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
-	LOOP_CHUNK1(o1, o0, 1f)
-	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
-	LOOP_CHUNK2(o1, o0, 2f)
-	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
-	LOOP_CHUNK3(o1, o0, 3f)
-	ba,pt		%xcc, 1b+4
-	 faligndata	%f0, %f2, %f48
-1:	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
-	STORE_JUMP(o0, f48, 40f)
-2:	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
-	STORE_JUMP(o0, f48, 48f)
-3:	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
-	STORE_JUMP(o0, f48, 56f)
-
-1:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
-	LOOP_CHUNK1(o1, o0, 1f)
-	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
-	LOOP_CHUNK2(o1, o0, 2f)
-	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
-	LOOP_CHUNK3(o1, o0, 3f)
-	ba,pt		%xcc, 1b+4
-	 faligndata	%f2, %f4, %f48
-1:	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
-	STORE_JUMP(o0, f48, 41f)
-2:	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
-	STORE_JUMP(o0, f48, 49f)
-3:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
-	STORE_JUMP(o0, f48, 57f)
-
-1:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
-	LOOP_CHUNK1(o1, o0, 1f)
-	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
-	LOOP_CHUNK2(o1, o0, 2f)
-	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
-	LOOP_CHUNK3(o1, o0, 3f)
-	ba,pt		%xcc, 1b+4
-	 faligndata	%f4, %f6, %f48
-1:	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
-	STORE_JUMP(o0, f48, 42f)
-2:	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
-	STORE_JUMP(o0, f48, 50f)
-3:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
-	STORE_JUMP(o0, f48, 58f)
-
-1:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
-	LOOP_CHUNK1(o1, o0, 1f)
-	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
-	LOOP_CHUNK2(o1, o0, 2f)
-	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 
-	LOOP_CHUNK3(o1, o0, 3f)
-	ba,pt		%xcc, 1b+4
-	 faligndata	%f6, %f8, %f48
-1:	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
-	STORE_JUMP(o0, f48, 43f)
-2:	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
-	STORE_JUMP(o0, f48, 51f)
-3:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
-	STORE_JUMP(o0, f48, 59f)
-
-1:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
-	LOOP_CHUNK1(o1, o0, 1f)
-	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
-	LOOP_CHUNK2(o1, o0, 2f)
-	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
-	LOOP_CHUNK3(o1, o0, 3f)
-	ba,pt		%xcc, 1b+4
-	 faligndata	%f8, %f10, %f48
-1:	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
-	STORE_JUMP(o0, f48, 44f)
-2:	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
-	STORE_JUMP(o0, f48, 52f)
-3:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
-	STORE_JUMP(o0, f48, 60f)
-
-1:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
-	LOOP_CHUNK1(o1, o0, 1f)
-	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
-	LOOP_CHUNK2(o1, o0, 2f)
-	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
-	LOOP_CHUNK3(o1, o0, 3f)
-	ba,pt		%xcc, 1b+4
-	 faligndata	%f10, %f12, %f48
-1:	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
-	STORE_JUMP(o0, f48, 45f)
-2:	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
-	STORE_JUMP(o0, f48, 53f)
-3:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
-	STORE_JUMP(o0, f48, 61f)
-
-1:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
-	LOOP_CHUNK1(o1, o0, 1f)
-	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
-	LOOP_CHUNK2(o1, o0, 2f)
-	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
-	LOOP_CHUNK3(o1, o0, 3f)
-	ba,pt		%xcc, 1b+4
-	 faligndata	%f12, %f14, %f48
-1:	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
-	STORE_JUMP(o0, f48, 46f)
-2:	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
-	STORE_JUMP(o0, f48, 54f)
-3:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
-	STORE_JUMP(o0, f48, 62f)
-
-1:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
-	LOOP_CHUNK1(o1, o0, 1f)
-	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
-	LOOP_CHUNK2(o1, o0, 2f)
-	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
-	LOOP_CHUNK3(o1, o0, 3f)
-	ba,pt		%xcc, 1b+4
-	 faligndata	%f14, %f16, %f48
-1:	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
-	STORE_JUMP(o0, f48, 47f)
-2:	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
-	STORE_JUMP(o0, f48, 55f)
-3:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
-	STORE_SYNC(o0, f48)
-	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
-	STORE_JUMP(o0, f48, 63f)
-
-40:	FINISH_VISCHUNK(o0, f0,  f2)
-41:	FINISH_VISCHUNK(o0, f2,  f4)
-42:	FINISH_VISCHUNK(o0, f4,  f6)
-43:	FINISH_VISCHUNK(o0, f6,  f8)
-44:	FINISH_VISCHUNK(o0, f8,  f10)
-45:	FINISH_VISCHUNK(o0, f10, f12)
-46:	FINISH_VISCHUNK(o0, f12, f14)
-47:	UNEVEN_VISCHUNK(o0, f14, f0)
-48:	FINISH_VISCHUNK(o0, f16, f18)
-49:	FINISH_VISCHUNK(o0, f18, f20)
-50:	FINISH_VISCHUNK(o0, f20, f22)
-51:	FINISH_VISCHUNK(o0, f22, f24)
-52:	FINISH_VISCHUNK(o0, f24, f26)
-53:	FINISH_VISCHUNK(o0, f26, f28)
-54:	FINISH_VISCHUNK(o0, f28, f30)
-55:	UNEVEN_VISCHUNK(o0, f30, f0)
-56:	FINISH_VISCHUNK(o0, f32, f34)
-57:	FINISH_VISCHUNK(o0, f34, f36)
-58:	FINISH_VISCHUNK(o0, f36, f38)
-59:	FINISH_VISCHUNK(o0, f38, f40)
-60:	FINISH_VISCHUNK(o0, f40, f42)
-61:	FINISH_VISCHUNK(o0, f42, f44)
-62:	FINISH_VISCHUNK(o0, f44, f46)
-63:	UNEVEN_VISCHUNK_LAST(o0, f46, f0)
-
-93:	EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp)
-	add		%o1, 8, %o1
-	subcc		%g3, 8, %g3
-	faligndata	%f0, %f2, %f8
-	EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
-	bl,pn		%xcc, 95f
-	 add		%o0, 8, %o0
-	EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp)
-	add		%o1, 8, %o1
-	subcc		%g3, 8, %g3
-	faligndata	%f2, %f0, %f8
-	EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
-	bge,pt		%xcc, 93b
-	 add		%o0, 8, %o0
-
-95:	brz,pt		%o2, 2f
-	 mov		%g1, %o1
-
-1:	EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp)
-	add		%o1, 1, %o1
-	subcc		%o2, 1, %o2
-	EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp)
-	bne,pt		%xcc, 1b
-	 add		%o0, 1, %o0
-
-2:	membar		#StoreLoad | #StoreStore
-	VISExit
-	retl
-	 mov		EX_RETVAL(%o4), %o0
-
-	.align		64
-70:	/* 16 < len <= (5 * 64) */
-	bne,pn		%XCC, 75f
-	 sub		%o0, %o1, %o3
-
-72:	andn		%o2, 0xf, %GLOBAL_SPARE
-	and		%o2, 0xf, %o2
-1:	EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0)
-	EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0)
-	subcc		%GLOBAL_SPARE, 0x10, %GLOBAL_SPARE
-	EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10)
-	add		%o1, 0x8, %o1
-	EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8)
-	bgu,pt		%XCC, 1b
-	 add		%o1, 0x8, %o1
-73:	andcc		%o2, 0x8, %g0
-	be,pt		%XCC, 1f
-	 nop
-	EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)
-	sub		%o2, 0x8, %o2
-	EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8)
-	add		%o1, 0x8, %o1
-1:	andcc		%o2, 0x4, %g0
-	be,pt		%XCC, 1f
-	 nop
-	EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)
-	sub		%o2, 0x4, %o2
-	EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4)
-	add		%o1, 0x4, %o1
-1:	cmp		%o2, 0
-	be,pt		%XCC, 85f
-	 nop
-	ba,pt		%xcc, 90f
-	 nop
-
-75:	andcc		%o0, 0x7, %g1
-	sub		%g1, 0x8, %g1
-	be,pn		%icc, 2f
-	 sub		%g0, %g1, %g1
-	sub		%o2, %g1, %o2
-
-1:	EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)
-	subcc		%g1, 1, %g1
-	EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1)
-	bgu,pt		%icc, 1b
-	 add		%o1, 1, %o1
-
-2:	add		%o1, %o3, %o0
-	andcc		%o1, 0x7, %g1
-	bne,pt		%icc, 8f
-	 sll		%g1, 3, %g1
-
-	cmp		%o2, 16
-	bgeu,pt		%icc, 72b
-	 nop
-	ba,a,pt		%xcc, 73b
-
-8:	mov		64, %o3
-	andn		%o1, 0x7, %o1
-	EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)
-	sub		%o3, %g1, %o3
-	andn		%o2, 0x7, %GLOBAL_SPARE
-	sllx		%g2, %g1, %g2
-1:	EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj)
-	subcc		%GLOBAL_SPARE, 0x8, %GLOBAL_SPARE
-	add		%o1, 0x8, %o1
-	srlx		%g3, %o3, %o5
-	or		%o5, %g2, %o5
-	EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj)
-	add		%o0, 0x8, %o0
-	bgu,pt		%icc, 1b
-	 sllx		%g3, %g1, %g2
-
-	srl		%g1, 3, %g1
-	andcc		%o2, 0x7, %o2
-	be,pn		%icc, 85f
-	 add		%o1, %g1, %o1
-	ba,pt		%xcc, 90f
-	 sub		%o0, %o1, %o3
-
-	.align		64
-80:	/* 0 < len <= 16 */
-	andcc		%o3, 0x3, %g0
-	bne,pn		%XCC, 90f
-	 sub		%o0, %o1, %o3
-
-1:	EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)
-	subcc		%o2, 4, %o2
-	EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4)
-	bgu,pt		%XCC, 1b
-	 add		%o1, 4, %o1
-
-85:	retl
-	 mov		EX_RETVAL(%o4), %o0
-
-	.align		32
-90:	EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)
-	subcc		%o2, 1, %o2
-	EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1)
-	bgu,pt		%XCC, 90b
-	 add		%o1, 1, %o1
-	retl
-	 mov		EX_RETVAL(%o4), %o0
-
-	.size		FUNC_NAME, .-FUNC_NAME
-EXPORT_SYMBOL(FUNC_NAME)
diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S
deleted file mode 100644
index 9c891e9edc7b1cffe633ba76de8fe98c5fa1f5ba..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/U3copy_from_user.S
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* U3copy_from_user.S: UltraSparc-III optimized copy from userspace.
- *
- * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
- */
-
-#define EX_LD(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y;		\
-	.text;			\
-	.align 4;
-
-#define EX_LD_FP(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y##_fp;	\
-	.text;			\
-	.align 4;
-
-#define FUNC_NAME		U3copy_from_user
-#define LOAD(type,addr,dest)	type##a [addr] %asi, dest
-#define EX_RETVAL(x)		0
-
-#include "U3memcpy.S"
diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S
deleted file mode 100644
index da424608272c6bcce1893dba1636d4a8a10cb33f..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/U3copy_to_user.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* U3copy_to_user.S: UltraSparc-III optimized copy to userspace.
- *
- * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
- */
-
-#define EX_ST(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y;		\
-	.text;			\
-	.align 4;
-
-#define EX_ST_FP(x,y)		\
-98:	x;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, y##_fp;	\
-	.text;			\
-	.align 4;
-
-#define FUNC_NAME		U3copy_to_user
-#define STORE(type,src,addr)	type##a src, [addr] ASI_AIUS
-#define STORE_BLK(src,addr)	stda src, [addr] ASI_BLK_AIUS
-#define EX_RETVAL(x)		0
-
-	/* Writing to %asi is _expensive_ so we hardcode it.
-	 * Reading %asi to check for KERNEL_DS is comparatively
-	 * cheap.
-	 */
-#define PREAMBLE					\
-	rd		%asi, %g1;			\
-	cmp		%g1, ASI_AIUS;			\
-	bne,pn		%icc, raw_copy_in_user;		\
-	 nop;						\
-
-#include "U3memcpy.S"
diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S
deleted file mode 100644
index 9248d59c734ce200f1f55e6d9913277f18715a87..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/U3memcpy.S
+++ /dev/null
@@ -1,520 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* U3memcpy.S: UltraSparc-III optimized memcpy.
- *
- * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
- */
-
-#ifdef __KERNEL__
-#include <linux/linkage.h>
-#include <asm/visasm.h>
-#include <asm/asi.h>
-#define GLOBAL_SPARE	%g7
-#else
-#define ASI_BLK_P 0xf0
-#define FPRS_FEF  0x04
-#ifdef MEMCPY_DEBUG
-#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \
-		     clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0;
-#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
-#else
-#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
-#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
-#endif
-#define GLOBAL_SPARE	%g5
-#endif
-
-#ifndef EX_LD
-#define EX_LD(x,y)	x
-#endif
-#ifndef EX_LD_FP
-#define EX_LD_FP(x,y)	x
-#endif
-
-#ifndef EX_ST
-#define EX_ST(x,y)	x
-#endif
-#ifndef EX_ST_FP
-#define EX_ST_FP(x,y)	x
-#endif
-
-#ifndef LOAD
-#define LOAD(type,addr,dest)	type [addr], dest
-#endif
-
-#ifndef STORE
-#define STORE(type,src,addr)	type src, [addr]
-#endif
-
-#ifndef STORE_BLK
-#define STORE_BLK(src,addr)	stda src, [addr] ASI_BLK_P
-#endif
-
-#ifndef FUNC_NAME
-#define FUNC_NAME	U3memcpy
-#endif
-
-#ifndef PREAMBLE
-#define PREAMBLE
-#endif
-
-#ifndef XCC
-#define XCC xcc
-#endif
-
-	.register	%g2,#scratch
-	.register	%g3,#scratch
-
-	/* Special/non-trivial issues of this code:
-	 *
-	 * 1) %o5 is preserved from VISEntryHalf to VISExitHalf
-	 * 2) Only low 32 FPU registers are used so that only the
-	 *    lower half of the FPU register set is dirtied by this
-	 *    code.  This is especially important in the kernel.
-	 * 3) This code never prefetches cachelines past the end
-	 *    of the source buffer.
-	 */
-
-	.text
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)	x
-__restore_fp:
-	VISExitHalf
-	retl
-	 nop
-ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
-	add	%g1, 1, %g1
-	add	%g2, %g1, %g2
-	ba,pt	%xcc, __restore_fp
-	 add	%o2, %g2, %o0
-ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
-ENTRY(U3_retl_o2_plus_g2_fp)
-	ba,pt	%xcc, __restore_fp
-	 add	%o2, %g2, %o0
-ENDPROC(U3_retl_o2_plus_g2_fp)
-ENTRY(U3_retl_o2_plus_g2_plus_8_fp)
-	add	%g2, 8, %g2
-	ba,pt	%xcc, __restore_fp
-	 add	%o2, %g2, %o0
-ENDPROC(U3_retl_o2_plus_g2_plus_8_fp)
-ENTRY(U3_retl_o2)
-	retl
-	 mov	%o2, %o0
-ENDPROC(U3_retl_o2)
-ENTRY(U3_retl_o2_plus_1)
-	retl
-	 add	%o2, 1, %o0
-ENDPROC(U3_retl_o2_plus_1)
-ENTRY(U3_retl_o2_plus_4)
-	retl
-	 add	%o2, 4, %o0
-ENDPROC(U3_retl_o2_plus_4)
-ENTRY(U3_retl_o2_plus_8)
-	retl
-	 add	%o2, 8, %o0
-ENDPROC(U3_retl_o2_plus_8)
-ENTRY(U3_retl_o2_plus_g1_plus_1)
-	add	%g1, 1, %g1
-	retl
-	 add	%o2, %g1, %o0
-ENDPROC(U3_retl_o2_plus_g1_plus_1)
-ENTRY(U3_retl_o2_fp)
-	ba,pt	%xcc, __restore_fp
-	 mov	%o2, %o0
-ENDPROC(U3_retl_o2_fp)
-ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
-	sll	%o3, 6, %o3
-	add	%o3, 0x80, %o3
-	ba,pt	%xcc, __restore_fp
-	 add	%o2, %o3, %o0
-ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
-ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
-	sll	%o3, 6, %o3
-	add	%o3, 0x40, %o3
-	ba,pt	%xcc, __restore_fp
-	 add	%o2, %o3, %o0
-ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
-ENTRY(U3_retl_o2_plus_GS_plus_0x10)
-	add	GLOBAL_SPARE, 0x10, GLOBAL_SPARE
-	retl
-	 add	%o2, GLOBAL_SPARE, %o0
-ENDPROC(U3_retl_o2_plus_GS_plus_0x10)
-ENTRY(U3_retl_o2_plus_GS_plus_0x08)
-	add	GLOBAL_SPARE, 0x08, GLOBAL_SPARE
-	retl
-	 add	%o2, GLOBAL_SPARE, %o0
-ENDPROC(U3_retl_o2_plus_GS_plus_0x08)
-ENTRY(U3_retl_o2_and_7_plus_GS)
-	and	%o2, 7, %o2
-	retl
-	 add	%o2, GLOBAL_SPARE, %o0
-ENDPROC(U3_retl_o2_and_7_plus_GS)
-ENTRY(U3_retl_o2_and_7_plus_GS_plus_8)
-	add	GLOBAL_SPARE, 8, GLOBAL_SPARE
-	and	%o2, 7, %o2
-	retl
-	 add	%o2, GLOBAL_SPARE, %o0
-ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8)
-#endif
-
-	.align		64
-
-	/* The cheetah's flexible spine, oversized liver, enlarged heart,
-	 * slender muscular body, and claws make it the swiftest hunter
-	 * in Africa and the fastest animal on land.  Can reach speeds
-	 * of up to 2.4GB per second.
-	 */
-
-	.globl	FUNC_NAME
-	.type	FUNC_NAME,#function
-FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
-	srlx		%o2, 31, %g2
-	cmp		%g2, 0
-
-	/* software trap 5 "Range Check" if dst >= 0x80000000 */
-	tne		%xcc, 5
-	PREAMBLE
-	mov		%o0, %o4
-
-	/* if len == 0 */
-	cmp		%o2, 0
-	be,pn		%XCC, end_return
-	 or		%o0, %o1, %o3
-
-	/* if len < 16 */
-	cmp		%o2, 16
-	blu,a,pn	%XCC, less_than_16
-	 or		%o3, %o2, %o3
-
-	/* if len < 192 */
-	cmp		%o2, (3 * 64)
-	blu,pt		%XCC, less_than_192
-	 andcc		%o3, 0x7, %g0
-
-	/* Clobbers o5/g1/g2/g3/g7/icc/xcc.  We must preserve
-	 * o5 from here until we hit VISExitHalf.
-	 */
-	VISEntryHalf
-
-	/* Is 'dst' already aligned on an 64-byte boundary? */
-	andcc		%o0, 0x3f, %g2
-	be,pt		%XCC, 2f
-
-	/* Compute abs((dst & 0x3f) - 0x40) into %g2.  This is the number
-	 * of bytes to copy to make 'dst' 64-byte aligned.  We pre-
-	 * subtract this from 'len'.
-	 */
-	 sub		%o0, %o1, GLOBAL_SPARE
-	sub		%g2, 0x40, %g2
-	sub		%g0, %g2, %g2
-	sub		%o2, %g2, %o2
-	andcc		%g2, 0x7, %g1
-	be,pt		%icc, 2f
-	 and		%g2, 0x38, %g2
-
-1:	subcc		%g1, 0x1, %g1
-	EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1)
-	EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1)
-	bgu,pt		%XCC, 1b
-	 add		%o1, 0x1, %o1
-
-	add		%o1, GLOBAL_SPARE, %o0
-
-2:	cmp		%g2, 0x0
-	and		%o1, 0x7, %g1
-	be,pt		%icc, 3f
-	 alignaddr	%o1, %g0, %o1
-
-	EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2)
-1:	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2)
-	add		%o1, 0x8, %o1
-	subcc		%g2, 0x8, %g2
-	faligndata	%f4, %f6, %f0
-	EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8)
-	be,pn		%icc, 3f
-	 add		%o0, 0x8, %o0
-
-	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2)
-	add		%o1, 0x8, %o1
-	subcc		%g2, 0x8, %g2
-	faligndata	%f6, %f4, %f2
-	EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8)
-	bne,pt		%icc, 1b
-	 add		%o0, 0x8, %o0
-
-3:	LOAD(prefetch, %o1 + 0x000, #one_read)
-	LOAD(prefetch, %o1 + 0x040, #one_read)
-	andn		%o2, (0x40 - 1), GLOBAL_SPARE
-	LOAD(prefetch, %o1 + 0x080, #one_read)
-	LOAD(prefetch, %o1 + 0x0c0, #one_read)
-	LOAD(prefetch, %o1 + 0x100, #one_read)
-	EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2)
-	LOAD(prefetch, %o1 + 0x140, #one_read)
-	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2)
-	LOAD(prefetch, %o1 + 0x180, #one_read)
-	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2)
-	LOAD(prefetch, %o1 + 0x1c0, #one_read)
-	faligndata	%f0, %f2, %f16
-	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2)
-	faligndata	%f2, %f4, %f18
-	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2)
-	faligndata	%f4, %f6, %f20
-	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2)
-	faligndata	%f6, %f8, %f22
-
-	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2)
-	faligndata	%f8, %f10, %f24
-	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2)
-	faligndata	%f10, %f12, %f26
-	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2)
-
-	subcc		GLOBAL_SPARE, 0x80, GLOBAL_SPARE
-	add		%o1, 0x40, %o1
-	bgu,pt		%XCC, 1f
-	 srl		GLOBAL_SPARE, 6, %o3
-	ba,pt		%xcc, 2f
-	 nop
-
-	.align		64
-1:
-	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
-	faligndata	%f12, %f14, %f28
-	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
-	faligndata	%f14, %f0, %f30
-	EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
-	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
-	faligndata	%f0, %f2, %f16
-	add		%o0, 0x40, %o0
-
-	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
-	faligndata	%f2, %f4, %f18
-	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
-	faligndata	%f4, %f6, %f20
-	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
-	subcc		%o3, 0x01, %o3
-	faligndata	%f6, %f8, %f22
-	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80)
-
-	faligndata	%f8, %f10, %f24
-	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
-	LOAD(prefetch, %o1 + 0x1c0, #one_read)
-	faligndata	%f10, %f12, %f26
-	bg,pt		%XCC, 1b
-	 add		%o1, 0x40, %o1
-
-	/* Finally we copy the last full 64-byte block. */
-2:
-	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
-	faligndata	%f12, %f14, %f28
-	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
-	faligndata	%f14, %f0, %f30
-	EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
-	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
-	faligndata	%f0, %f2, %f16
-	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
-	faligndata	%f2, %f4, %f18
-	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
-	faligndata	%f4, %f6, %f20
-	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
-	faligndata	%f6, %f8, %f22
-	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40)
-	faligndata	%f8, %f10, %f24
-	cmp		%g1, 0
-	be,pt		%XCC, 1f
-	 add		%o0, 0x40, %o0
-	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
-1:	faligndata	%f10, %f12, %f26
-	faligndata	%f12, %f14, %f28
-	faligndata	%f14, %f0, %f30
-	EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
-	add		%o0, 0x40, %o0
-	add		%o1, 0x40, %o1
-	membar		#Sync
-
-	/* Now we copy the (len modulo 64) bytes at the end.
-	 * Note how we borrow the %f0 loaded above.
-	 *
-	 * Also notice how this code is careful not to perform a
-	 * load past the end of the src buffer.
-	 */
-	and		%o2, 0x3f, %o2
-	andcc		%o2, 0x38, %g2
-	be,pn		%XCC, 2f
-	 subcc		%g2, 0x8, %g2
-	be,pn		%XCC, 2f
-	 cmp		%g1, 0
-
-	sub		%o2, %g2, %o2
-	be,a,pt		%XCC, 1f
-	 EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2)
-
-1:	EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2)
-	add		%o1, 0x8, %o1
-	subcc		%g2, 0x8, %g2
-	faligndata	%f0, %f2, %f8
-	EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
-	be,pn		%XCC, 2f
-	 add		%o0, 0x8, %o0
-	EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2)
-	add		%o1, 0x8, %o1
-	subcc		%g2, 0x8, %g2
-	faligndata	%f2, %f0, %f8
-	EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
-	bne,pn		%XCC, 1b
-	 add		%o0, 0x8, %o0
-
-	/* If anything is left, we copy it one byte at a time.
-	 * Note that %g1 is (src & 0x3) saved above before the
-	 * alignaddr was performed.
-	 */
-2:
-	cmp		%o2, 0
-	add		%o1, %g1, %o1
-	VISExitHalf
-	be,pn		%XCC, end_return
-	 sub		%o0, %o1, %o3
-
-	andcc		%g1, 0x7, %g0
-	bne,pn		%icc, 90f
-	 andcc		%o2, 0x8, %g0
-	be,pt		%icc, 1f
-	 nop
-	EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2)
-	EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2)
-	add		%o1, 0x8, %o1
-	sub		%o2, 8, %o2
-
-1:	andcc		%o2, 0x4, %g0
-	be,pt		%icc, 1f
-	 nop
-	EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2)
-	EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2)
-	add		%o1, 0x4, %o1
-	sub		%o2, 4, %o2
-
-1:	andcc		%o2, 0x2, %g0
-	be,pt		%icc, 1f
-	 nop
-	EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2)
-	EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2)
-	add		%o1, 0x2, %o1
-	sub		%o2, 2, %o2
-
-1:	andcc		%o2, 0x1, %g0
-	be,pt		%icc, end_return
-	 nop
-	EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2)
-	ba,pt		%xcc, end_return
-	 EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2)
-
-	.align		64
-	/* 16 <= len < 192 */
-less_than_192:
-	bne,pn		%XCC, 75f
-	 sub		%o0, %o1, %o3
-
-72:
-	andn		%o2, 0xf, GLOBAL_SPARE
-	and		%o2, 0xf, %o2
-1:	subcc		GLOBAL_SPARE, 0x10, GLOBAL_SPARE
-	EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10)
-	EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10)
-	EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10)
-	add		%o1, 0x8, %o1
-	EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08)
-	bgu,pt		%XCC, 1b
-	 add		%o1, 0x8, %o1
-73:	andcc		%o2, 0x8, %g0
-	be,pt		%XCC, 1f
-	 nop
-	sub		%o2, 0x8, %o2
-	EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8)
-	EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8)
-	add		%o1, 0x8, %o1
-1:	andcc		%o2, 0x4, %g0
-	be,pt		%XCC, 1f
-	 nop
-	sub		%o2, 0x4, %o2
-	EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4)
-	EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4)
-	add		%o1, 0x4, %o1
-1:	cmp		%o2, 0
-	be,pt		%XCC, end_return
-	 nop
-	ba,pt		%xcc, 90f
-	 nop
-
-75:
-	andcc		%o0, 0x7, %g1
-	sub		%g1, 0x8, %g1
-	be,pn		%icc, 2f
-	 sub		%g0, %g1, %g1
-	sub		%o2, %g1, %o2
-
-1:	subcc		%g1, 1, %g1
-	EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1)
-	EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1)
-	bgu,pt		%icc, 1b
-	 add		%o1, 1, %o1
-
-2:	add		%o1, %o3, %o0
-	andcc		%o1, 0x7, %g1
-	bne,pt		%icc, 8f
-	 sll		%g1, 3, %g1
-
-	cmp		%o2, 16
-	bgeu,pt		%icc, 72b
-	 nop
-	ba,a,pt		%xcc, 73b
-
-8:	mov		64, %o3
-	andn		%o1, 0x7, %o1
-	EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2)
-	sub		%o3, %g1, %o3
-	andn		%o2, 0x7, GLOBAL_SPARE
-	sllx		%g2, %g1, %g2
-1:	EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS)
-	subcc		GLOBAL_SPARE, 0x8, GLOBAL_SPARE
-	add		%o1, 0x8, %o1
-	srlx		%g3, %o3, %o5
-	or		%o5, %g2, %o5
-	EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8)
-	add		%o0, 0x8, %o0
-	bgu,pt		%icc, 1b
-	 sllx		%g3, %g1, %g2
-
-	srl		%g1, 3, %g1
-	andcc		%o2, 0x7, %o2
-	be,pn		%icc, end_return
-	 add		%o1, %g1, %o1
-	ba,pt		%xcc, 90f
-	 sub		%o0, %o1, %o3
-
-	.align		64
-	/* 0 < len < 16 */
-less_than_16:
-	andcc		%o3, 0x3, %g0
-	bne,pn		%XCC, 90f
-	 sub		%o0, %o1, %o3
-
-1:
-	subcc		%o2, 4, %o2
-	EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4)
-	EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4)
-	bgu,pt		%XCC, 1b
-	 add		%o1, 4, %o1
-
-end_return:
-	retl
-	 mov		EX_RETVAL(%o4), %o0
-
-	.align		32
-90:
-	subcc		%o2, 1, %o2
-	EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1)
-	EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1)
-	bgu,pt		%XCC, 90b
-	 add		%o1, 1, %o1
-	retl
-	 mov		EX_RETVAL(%o4), %o0
-
-	.size		FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/U3patch.S b/arch/sparc/lib/U3patch.S
deleted file mode 100644
index 9a888088f3c9d658c089c3f8efdc250644c82b8e..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/U3patch.S
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* U3patch.S: Patch Ultra-I routines with Ultra-III variant.
- *
- * Copyright (C) 2004 David S. Miller <davem@redhat.com>
- */
-
-#define BRANCH_ALWAYS	0x10680000
-#define NOP		0x01000000
-#define ULTRA3_DO_PATCH(OLD, NEW)	\
-	sethi	%hi(NEW), %g1; \
-	or	%g1, %lo(NEW), %g1; \
-	sethi	%hi(OLD), %g2; \
-	or	%g2, %lo(OLD), %g2; \
-	sub	%g1, %g2, %g1; \
-	sethi	%hi(BRANCH_ALWAYS), %g3; \
-	sll	%g1, 11, %g1; \
-	srl	%g1, 11 + 2, %g1; \
-	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
-	or	%g3, %g1, %g3; \
-	stw	%g3, [%g2]; \
-	sethi	%hi(NOP), %g3; \
-	or	%g3, %lo(NOP), %g3; \
-	stw	%g3, [%g2 + 0x4]; \
-	flush	%g2;
-
-	.globl	cheetah_patch_copyops
-	.type	cheetah_patch_copyops,#function
-cheetah_patch_copyops:
-	ULTRA3_DO_PATCH(memcpy, U3memcpy)
-	ULTRA3_DO_PATCH(raw_copy_from_user, U3copy_from_user)
-	ULTRA3_DO_PATCH(raw_copy_to_user, U3copy_to_user)
-	retl
-	 nop
-	.size	cheetah_patch_copyops,.-cheetah_patch_copyops
diff --git a/arch/sparc/lib/VISsave.S b/arch/sparc/lib/VISsave.S
deleted file mode 100644
index 9c8eb2017d5b4be09e5465c3fb25e7d77fe671d3..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/VISsave.S
+++ /dev/null
@@ -1,84 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * VISsave.S: Code for saving FPU register state for
- *            VIS routines. One should not call this directly,
- *            but use macros provided in <asm/visasm.h>.
- *
- * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
- */
-
-#include <linux/linkage.h>
-
-#include <asm/asi.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include <asm/visasm.h>
-#include <asm/thread_info.h>
-#include <asm/export.h>
-
-	/* On entry: %o5=current FPRS value, %g7 is callers address */
-	/* May clobber %o5, %g1, %g2, %g3, %g7, %icc, %xcc */
-
-	/* Nothing special need be done here to handle pre-emption, this
-	 * FPU save/restore mechanism is already preemption safe.
-	 */
-	.text
-	.align		32
-ENTRY(VISenter)
-	ldub		[%g6 + TI_FPDEPTH], %g1
-	brnz,a,pn	%g1, 1f
-	 cmp		%g1, 1
-	stb		%g0, [%g6 + TI_FPSAVED]
-	stx		%fsr, [%g6 + TI_XFSR]
-9:	jmpl		%g7 + %g0, %g0
-	 nop
-1:	bne,pn		%icc, 2f
-
-	 srl		%g1, 1, %g1
-vis1:	ldub		[%g6 + TI_FPSAVED], %g3
-	stx		%fsr, [%g6 + TI_XFSR]
-	or		%g3, %o5, %g3
-	stb		%g3, [%g6 + TI_FPSAVED]
-	rd		%gsr, %g3
-	clr		%g1
-	ba,pt		%xcc, 3f
-
-	 stx		%g3, [%g6 + TI_GSR]
-2:	add		%g6, %g1, %g3
-	mov		FPRS_DU | FPRS_DL | FPRS_FEF, %o5
-	sll		%g1, 3, %g1
-	stb		%o5, [%g3 + TI_FPSAVED]
-	rd		%gsr, %g2
-	add		%g6, %g1, %g3
-	stx		%g2, [%g3 + TI_GSR]
-
-	add		%g6, %g1, %g2
-	stx		%fsr, [%g2 + TI_XFSR]
-	sll		%g1, 5, %g1
-3:	andcc		%o5, FPRS_DL|FPRS_DU, %g0
-	be,pn		%icc, 9b
-	 add		%g6, TI_FPREGS, %g2
-	andcc		%o5, FPRS_DL, %g0
-
-	be,pn		%icc, 4f
-	 add		%g6, TI_FPREGS+0x40, %g3
-	membar		#Sync
-	stda		%f0, [%g2 + %g1] ASI_BLK_P
-	stda		%f16, [%g3 + %g1] ASI_BLK_P
-	membar		#Sync
-	andcc		%o5, FPRS_DU, %g0
-	be,pn		%icc, 5f
-4:	 add		%g1, 128, %g1
-	membar		#Sync
-	stda		%f32, [%g2 + %g1] ASI_BLK_P
-
-	stda		%f48, [%g3 + %g1] ASI_BLK_P
-5:	membar		#Sync
-	ba,pt		%xcc, 80f
-	 nop
-
-	.align		32
-80:	jmpl		%g7 + %g0, %g0
-	 nop
-ENDPROC(VISenter)
-EXPORT_SYMBOL(VISenter)
diff --git a/arch/sparc/lib/ashldi3.S b/arch/sparc/lib/ashldi3.S
deleted file mode 100644
index 2d72de88af905737dc8fb7fac35cd0917a3611e4..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/ashldi3.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ashldi3.S:	GCC emits these for certain drivers playing
- *		with long longs.
- *
- * Copyright (C) 1999 David S. Miller (davem@redhat.com)
- */
-
-#include <linux/linkage.h>
-#include <asm/export.h>
-
-	.text
-ENTRY(__ashldi3)
-	cmp	%o2, 0
-	be	9f
-	 mov	0x20, %g2
-
-	sub	%g2, %o2, %g2
-	cmp	%g2, 0
-	bg	7f
-	 sll	%o0, %o2, %g3
-
-	neg	%g2
-	clr	%o5
-	b	8f
-	 sll	%o1, %g2, %o4
-7:
-	srl	%o1, %g2, %g2
-	sll	%o1, %o2, %o5
-	or	%g3, %g2, %o4
-8:
-	mov	%o4, %o0
-	mov	%o5, %o1
-9:
-	retl
-	 nop
-ENDPROC(__ashldi3)
-EXPORT_SYMBOL(__ashldi3)
diff --git a/arch/sparc/lib/ashrdi3.S b/arch/sparc/lib/ashrdi3.S
deleted file mode 100644
index 05dfda9f500519636502efe5471a168fe78d07bd..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/ashrdi3.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ashrdi3.S:	The filesystem code creates all kinds of references to
- *              this little routine on the sparc with gcc.
- *
- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
- */
-
-#include <linux/linkage.h>
-#include <asm/export.h>
-
-	.text
-ENTRY(__ashrdi3)
-	tst	%o2
-	be	3f
-	 or	%g0, 32, %g2
-
-	sub	%g2, %o2, %g2
-
-	tst	%g2
-	bg	1f
-	 sra	%o0, %o2, %o4
-
-	sra	%o0, 31, %o4
-	sub	%g0, %g2, %g2
-	ba	2f
-	 sra	%o0, %g2, %o5
-
-1:
-	sll	%o0, %g2, %g3
-	srl	%o1, %o2, %g2
-	or	%g2, %g3, %o5
-2:
-	or	%g0, %o4, %o0
-	or	%g0, %o5, %o1
-3:
-	jmpl	%o7 + 8, %g0
-	 nop
-ENDPROC(__ashrdi3)
-EXPORT_SYMBOL(__ashrdi3)
diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S
deleted file mode 100644
index 456b65a30ecf6b1c6b301e16756785b762c44fb6..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/atomic_64.S
+++ /dev/null
@@ -1,166 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* atomic.S: These things are too big to do inline.
- *
- * Copyright (C) 1999, 2007 2012 David S. Miller (davem@davemloft.net)
- */
-
-#include <linux/linkage.h>
-#include <asm/asi.h>
-#include <asm/backoff.h>
-#include <asm/export.h>
-
-	.text
-
-	/* Three versions of the atomic routines, one that
-	 * does not return a value and does not perform
-	 * memory barriers, and a two which return
-	 * a value, the new and old value resp. and does the
-	 * barriers.
-	 */
-
-#define ATOMIC_OP(op)							\
-ENTRY(atomic_##op) /* %o0 = increment, %o1 = atomic_ptr */		\
-	BACKOFF_SETUP(%o2);						\
-1:	lduw	[%o1], %g1;						\
-	op	%g1, %o0, %g7;						\
-	cas	[%o1], %g1, %g7;					\
-	cmp	%g1, %g7;						\
-	bne,pn	%icc, BACKOFF_LABEL(2f, 1b);				\
-	 nop;								\
-	retl;								\
-	 nop;								\
-2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
-ENDPROC(atomic_##op);							\
-EXPORT_SYMBOL(atomic_##op);
-
-#define ATOMIC_OP_RETURN(op)						\
-ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */	\
-	BACKOFF_SETUP(%o2);						\
-1:	lduw	[%o1], %g1;						\
-	op	%g1, %o0, %g7;						\
-	cas	[%o1], %g1, %g7;					\
-	cmp	%g1, %g7;						\
-	bne,pn	%icc, BACKOFF_LABEL(2f, 1b);				\
-	 op	%g1, %o0, %g1;						\
-	retl;								\
-	 sra	%g1, 0, %o0;						\
-2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
-ENDPROC(atomic_##op##_return);						\
-EXPORT_SYMBOL(atomic_##op##_return);
-
-#define ATOMIC_FETCH_OP(op)						\
-ENTRY(atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */	\
-	BACKOFF_SETUP(%o2);						\
-1:	lduw	[%o1], %g1;						\
-	op	%g1, %o0, %g7;						\
-	cas	[%o1], %g1, %g7;					\
-	cmp	%g1, %g7;						\
-	bne,pn	%icc, BACKOFF_LABEL(2f, 1b);				\
-	 nop;								\
-	retl;								\
-	 sra	%g1, 0, %o0;						\
-2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
-ENDPROC(atomic_fetch_##op);						\
-EXPORT_SYMBOL(atomic_fetch_##op);
-
-ATOMIC_OP(add)
-ATOMIC_OP_RETURN(add)
-ATOMIC_FETCH_OP(add)
-
-ATOMIC_OP(sub)
-ATOMIC_OP_RETURN(sub)
-ATOMIC_FETCH_OP(sub)
-
-ATOMIC_OP(and)
-ATOMIC_FETCH_OP(and)
-
-ATOMIC_OP(or)
-ATOMIC_FETCH_OP(or)
-
-ATOMIC_OP(xor)
-ATOMIC_FETCH_OP(xor)
-
-#undef ATOMIC_FETCH_OP
-#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
-
-#define ATOMIC64_OP(op)							\
-ENTRY(atomic64_##op) /* %o0 = increment, %o1 = atomic_ptr */		\
-	BACKOFF_SETUP(%o2);						\
-1:	ldx	[%o1], %g1;						\
-	op	%g1, %o0, %g7;						\
-	casx	[%o1], %g1, %g7;					\
-	cmp	%g1, %g7;						\
-	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b);				\
-	 nop;								\
-	retl;								\
-	 nop;								\
-2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
-ENDPROC(atomic64_##op);							\
-EXPORT_SYMBOL(atomic64_##op);
-
-#define ATOMIC64_OP_RETURN(op)						\
-ENTRY(atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */	\
-	BACKOFF_SETUP(%o2);						\
-1:	ldx	[%o1], %g1;						\
-	op	%g1, %o0, %g7;						\
-	casx	[%o1], %g1, %g7;					\
-	cmp	%g1, %g7;						\
-	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b);				\
-	 nop;								\
-	retl;								\
-	 op	%g1, %o0, %o0;						\
-2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
-ENDPROC(atomic64_##op##_return);					\
-EXPORT_SYMBOL(atomic64_##op##_return);
-
-#define ATOMIC64_FETCH_OP(op)						\
-ENTRY(atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */	\
-	BACKOFF_SETUP(%o2);						\
-1:	ldx	[%o1], %g1;						\
-	op	%g1, %o0, %g7;						\
-	casx	[%o1], %g1, %g7;					\
-	cmp	%g1, %g7;						\
-	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b);				\
-	 nop;								\
-	retl;								\
-	 mov	%g1, %o0;						\
-2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
-ENDPROC(atomic64_fetch_##op);						\
-EXPORT_SYMBOL(atomic64_fetch_##op);
-
-ATOMIC64_OP(add)
-ATOMIC64_OP_RETURN(add)
-ATOMIC64_FETCH_OP(add)
-
-ATOMIC64_OP(sub)
-ATOMIC64_OP_RETURN(sub)
-ATOMIC64_FETCH_OP(sub)
-
-ATOMIC64_OP(and)
-ATOMIC64_FETCH_OP(and)
-
-ATOMIC64_OP(or)
-ATOMIC64_FETCH_OP(or)
-
-ATOMIC64_OP(xor)
-ATOMIC64_FETCH_OP(xor)
-
-#undef ATOMIC64_FETCH_OP
-#undef ATOMIC64_OP_RETURN
-#undef ATOMIC64_OP
-
-ENTRY(atomic64_dec_if_positive) /* %o0 = atomic_ptr */
-	BACKOFF_SETUP(%o2)
-1:	ldx	[%o0], %g1
-	brlez,pn %g1, 3f
-	 sub	%g1, 1, %g7
-	casx	[%o0], %g1, %g7
-	cmp	%g1, %g7
-	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b)
-	 nop
-3:	retl
-	 sub	%g1, 1, %o0
-2:	BACKOFF_SPIN(%o2, %o3, 1b)
-ENDPROC(atomic64_dec_if_positive)
-EXPORT_SYMBOL(atomic64_dec_if_positive)
diff --git a/arch/sparc/lib/bitops.S b/arch/sparc/lib/bitops.S
deleted file mode 100644
index 9d647f977618c5541662403010642a89b1d77c6f..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/bitops.S
+++ /dev/null
@@ -1,138 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* bitops.S: Sparc64 atomic bit operations.
- *
- * Copyright (C) 2000, 2007 David S. Miller (davem@davemloft.net)
- */
-
-#include <linux/linkage.h>
-#include <asm/asi.h>
-#include <asm/backoff.h>
-#include <asm/export.h>
-
-	.text
-
-ENTRY(test_and_set_bit)	/* %o0=nr, %o1=addr */
-	BACKOFF_SETUP(%o3)
-	srlx	%o0, 6, %g1
-	mov	1, %o2
-	sllx	%g1, 3, %g3
-	and	%o0, 63, %g2
-	sllx	%o2, %g2, %o2
-	add	%o1, %g3, %o1
-1:	ldx	[%o1], %g7
-	or	%g7, %o2, %g1
-	casx	[%o1], %g7, %g1
-	cmp	%g7, %g1
-	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b)
-	 and	%g7, %o2, %g2
-	clr	%o0
-	movrne	%g2, 1, %o0
-	retl
-	 nop
-2:	BACKOFF_SPIN(%o3, %o4, 1b)
-ENDPROC(test_and_set_bit)
-EXPORT_SYMBOL(test_and_set_bit)
-
-ENTRY(test_and_clear_bit) /* %o0=nr, %o1=addr */
-	BACKOFF_SETUP(%o3)
-	srlx	%o0, 6, %g1
-	mov	1, %o2
-	sllx	%g1, 3, %g3
-	and	%o0, 63, %g2
-	sllx	%o2, %g2, %o2
-	add	%o1, %g3, %o1
-1:	ldx	[%o1], %g7
-	andn	%g7, %o2, %g1
-	casx	[%o1], %g7, %g1
-	cmp	%g7, %g1
-	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b)
-	 and	%g7, %o2, %g2
-	clr	%o0
-	movrne	%g2, 1, %o0
-	retl
-	 nop
-2:	BACKOFF_SPIN(%o3, %o4, 1b)
-ENDPROC(test_and_clear_bit)
-EXPORT_SYMBOL(test_and_clear_bit)
-
-ENTRY(test_and_change_bit) /* %o0=nr, %o1=addr */
-	BACKOFF_SETUP(%o3)
-	srlx	%o0, 6, %g1
-	mov	1, %o2
-	sllx	%g1, 3, %g3
-	and	%o0, 63, %g2
-	sllx	%o2, %g2, %o2
-	add	%o1, %g3, %o1
-1:	ldx	[%o1], %g7
-	xor	%g7, %o2, %g1
-	casx	[%o1], %g7, %g1
-	cmp	%g7, %g1
-	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b)
-	 and	%g7, %o2, %g2
-	clr	%o0
-	movrne	%g2, 1, %o0
-	retl
-	 nop
-2:	BACKOFF_SPIN(%o3, %o4, 1b)
-ENDPROC(test_and_change_bit)
-EXPORT_SYMBOL(test_and_change_bit)
-
-ENTRY(set_bit) /* %o0=nr, %o1=addr */
-	BACKOFF_SETUP(%o3)
-	srlx	%o0, 6, %g1
-	mov	1, %o2
-	sllx	%g1, 3, %g3
-	and	%o0, 63, %g2
-	sllx	%o2, %g2, %o2
-	add	%o1, %g3, %o1
-1:	ldx	[%o1], %g7
-	or	%g7, %o2, %g1
-	casx	[%o1], %g7, %g1
-	cmp	%g7, %g1
-	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b)
-	 nop
-	retl
-	 nop
-2:	BACKOFF_SPIN(%o3, %o4, 1b)
-ENDPROC(set_bit)
-EXPORT_SYMBOL(set_bit)
-
-ENTRY(clear_bit) /* %o0=nr, %o1=addr */
-	BACKOFF_SETUP(%o3)
-	srlx	%o0, 6, %g1
-	mov	1, %o2
-	sllx	%g1, 3, %g3
-	and	%o0, 63, %g2
-	sllx	%o2, %g2, %o2
-	add	%o1, %g3, %o1
-1:	ldx	[%o1], %g7
-	andn	%g7, %o2, %g1
-	casx	[%o1], %g7, %g1
-	cmp	%g7, %g1
-	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b)
-	 nop
-	retl
-	 nop
-2:	BACKOFF_SPIN(%o3, %o4, 1b)
-ENDPROC(clear_bit)
-EXPORT_SYMBOL(clear_bit)
-
-ENTRY(change_bit) /* %o0=nr, %o1=addr */
-	BACKOFF_SETUP(%o3)
-	srlx	%o0, 6, %g1
-	mov	1, %o2
-	sllx	%g1, 3, %g3
-	and	%o0, 63, %g2
-	sllx	%o2, %g2, %o2
-	add	%o1, %g3, %o1
-1:	ldx	[%o1], %g7
-	xor	%g7, %o2, %g1
-	casx	[%o1], %g7, %g1
-	cmp	%g7, %g1
-	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b)
-	 nop
-	retl
-	 nop
-2:	BACKOFF_SPIN(%o3, %o4, 1b)
-ENDPROC(change_bit)
-EXPORT_SYMBOL(change_bit)
diff --git a/arch/sparc/lib/blockops.S b/arch/sparc/lib/blockops.S
deleted file mode 100644
index 76ddd1ff6833ecce6ac7f14f187fe16ae3dc9708..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/blockops.S
+++ /dev/null
@@ -1,93 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * blockops.S: Common block zero optimized routines.
- *
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
- */
-
-#include <linux/linkage.h>
-#include <asm/page.h>
-#include <asm/export.h>
-
-	/* Zero out 64 bytes of memory at (buf + offset).
-	 * Assumes %g1 contains zero.
-	 */
-#define BLAST_BLOCK(buf, offset) \
-	std	%g0, [buf + offset + 0x38]; \
-	std	%g0, [buf + offset + 0x30]; \
-	std	%g0, [buf + offset + 0x28]; \
-	std	%g0, [buf + offset + 0x20]; \
-	std	%g0, [buf + offset + 0x18]; \
-	std	%g0, [buf + offset + 0x10]; \
-	std	%g0, [buf + offset + 0x08]; \
-	std	%g0, [buf + offset + 0x00];
-
-	/* Copy 32 bytes of memory at (src + offset) to
-	 * (dst + offset).
-	 */
-#define MIRROR_BLOCK(dst, src, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
-	ldd	[src + offset + 0x18], t0; \
-	ldd	[src + offset + 0x10], t2; \
-	ldd	[src + offset + 0x08], t4; \
-	ldd	[src + offset + 0x00], t6; \
-	std	t0, [dst + offset + 0x18]; \
-	std	t2, [dst + offset + 0x10]; \
-	std	t4, [dst + offset + 0x08]; \
-	std	t6, [dst + offset + 0x00];
-
-	/* Profiling evidence indicates that memset() is
-	 * commonly called for blocks of size PAGE_SIZE,
-	 * and (2 * PAGE_SIZE) (for kernel stacks)
-	 * and with a second arg of zero.  We assume in
-	 * all of these cases that the buffer is aligned
-	 * on at least an 8 byte boundary.
-	 *
-	 * Therefore we special case them to make them
-	 * as fast as possible.
-	 */
-
-	.text
-ENTRY(bzero_1page)
-/* NOTE: If you change the number of insns of this routine, please check
- * arch/sparc/mm/hypersparc.S */
-	/* %o0 = buf */
-	or	%g0, %g0, %g1
-	or	%o0, %g0, %o1
-	or	%g0, (PAGE_SIZE >> 8), %g2
-1:
-	BLAST_BLOCK(%o0, 0x00)
-	BLAST_BLOCK(%o0, 0x40)
-	BLAST_BLOCK(%o0, 0x80)
-	BLAST_BLOCK(%o0, 0xc0)
-	subcc	%g2, 1, %g2
-	bne	1b
-	 add	%o0, 0x100, %o0
-
-	retl
-	 nop
-ENDPROC(bzero_1page)
-EXPORT_SYMBOL(bzero_1page)
-
-ENTRY(__copy_1page)
-/* NOTE: If you change the number of insns of this routine, please check
- * arch/sparc/mm/hypersparc.S */
-	/* %o0 = dst, %o1 = src */
-	or	%g0, (PAGE_SIZE >> 8), %g1
-1:
-	MIRROR_BLOCK(%o0, %o1, 0x00, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5)
-	MIRROR_BLOCK(%o0, %o1, 0x20, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5)
-	MIRROR_BLOCK(%o0, %o1, 0x40, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5)
-	MIRROR_BLOCK(%o0, %o1, 0x60, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5)
-	MIRROR_BLOCK(%o0, %o1, 0x80, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5)
-	MIRROR_BLOCK(%o0, %o1, 0xa0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5)
-	MIRROR_BLOCK(%o0, %o1, 0xc0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5)
-	MIRROR_BLOCK(%o0, %o1, 0xe0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5)
-	subcc	%g1, 1, %g1
-	add	%o0, 0x100, %o0
-	bne	1b
-	 add	%o1, 0x100, %o1
-
-	retl
-	 nop
-ENDPROC(__copy_1page)
-EXPORT_SYMBOL(__copy_1page)
diff --git a/arch/sparc/lib/bzero.S b/arch/sparc/lib/bzero.S
deleted file mode 100644
index 87fec4cbe10c50eb5b570dfebc7d941940bbe760..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/bzero.S
+++ /dev/null
@@ -1,150 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* bzero.S: Simple prefetching memset, bzero, and clear_user
- *          implementations.
- *
- * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
- */
-
-#include <linux/linkage.h>
-#include <asm/export.h>
-
-	.text
-
-ENTRY(memset) /* %o0=buf, %o1=pat, %o2=len */
-	and		%o1, 0xff, %o3
-	mov		%o2, %o1
-	sllx		%o3, 8, %g1
-	or		%g1, %o3, %o2
-	sllx		%o2, 16, %g1
-	or		%g1, %o2, %o2
-	sllx		%o2, 32, %g1
-	ba,pt		%xcc, 1f
-	 or		%g1, %o2, %o2
-
-ENTRY(__bzero) /* %o0=buf, %o1=len */
-	clr		%o2
-1:	mov		%o0, %o3
-	brz,pn		%o1, __bzero_done
-	 cmp		%o1, 16
-	bl,pn		%icc, __bzero_tiny
-	 prefetch	[%o0 + 0x000], #n_writes
-	andcc		%o0, 0x3, %g0
-	be,pt		%icc, 2f
-1:	 stb		%o2, [%o0 + 0x00]
-	add		%o0, 1, %o0
-	andcc		%o0, 0x3, %g0
-	bne,pn		%icc, 1b
-	 sub		%o1, 1, %o1
-2:	andcc		%o0, 0x7, %g0
-	be,pt		%icc, 3f
-	 stw		%o2, [%o0 + 0x00]
-	sub		%o1, 4, %o1
-	add		%o0, 4, %o0
-3:	and		%o1, 0x38, %g1
-	cmp		%o1, 0x40
-	andn		%o1, 0x3f, %o4
-	bl,pn		%icc, 5f
-	 and		%o1, 0x7, %o1
-	prefetch	[%o0 + 0x040], #n_writes
-	prefetch	[%o0 + 0x080], #n_writes
-	prefetch	[%o0 + 0x0c0], #n_writes
-	prefetch	[%o0 + 0x100], #n_writes
-	prefetch	[%o0 + 0x140], #n_writes
-4:	prefetch	[%o0 + 0x180], #n_writes
-	stx		%o2, [%o0 + 0x00]
-	stx		%o2, [%o0 + 0x08]
-	stx		%o2, [%o0 + 0x10]
-	stx		%o2, [%o0 + 0x18]
-	stx		%o2, [%o0 + 0x20]
-	stx		%o2, [%o0 + 0x28]
-	stx		%o2, [%o0 + 0x30]
-	stx		%o2, [%o0 + 0x38]
-	subcc		%o4, 0x40, %o4
-	bne,pt		%icc, 4b
-	 add		%o0, 0x40, %o0
-	brz,pn		%g1, 6f
-	 nop
-5:	stx		%o2, [%o0 + 0x00]
-	subcc		%g1, 8, %g1
-	bne,pt		%icc, 5b
-	 add		%o0, 0x8, %o0
-6:	brz,pt		%o1, __bzero_done
-	 nop
-__bzero_tiny:
-1:	stb		%o2, [%o0 + 0x00]
-	subcc		%o1, 1, %o1
-	bne,pt		%icc, 1b
-	 add		%o0, 1, %o0
-__bzero_done:
-	retl
-	 mov		%o3, %o0
-ENDPROC(__bzero)
-ENDPROC(memset)
-EXPORT_SYMBOL(__bzero)
-EXPORT_SYMBOL(memset)
-
-#define EX_ST(x,y)		\
-98:	x,y;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, __retl_o1;	\
-	.text;			\
-	.align 4;
-
-ENTRY(__clear_user) /* %o0=buf, %o1=len */
-	brz,pn		%o1, __clear_user_done
-	 cmp		%o1, 16
-	bl,pn		%icc, __clear_user_tiny
-	 EX_ST(prefetcha [%o0 + 0x00] %asi, #n_writes)
-	andcc		%o0, 0x3, %g0
-	be,pt		%icc, 2f
-1:	 EX_ST(stba	%g0, [%o0 + 0x00] %asi)
-	add		%o0, 1, %o0
-	andcc		%o0, 0x3, %g0
-	bne,pn		%icc, 1b
-	 sub		%o1, 1, %o1
-2:	andcc		%o0, 0x7, %g0
-	be,pt		%icc, 3f
-	 EX_ST(stwa	%g0, [%o0 + 0x00] %asi)
-	sub		%o1, 4, %o1
-	add		%o0, 4, %o0
-3:	and		%o1, 0x38, %g1
-	cmp		%o1, 0x40
-	andn		%o1, 0x3f, %o4
-	bl,pn		%icc, 5f
-	 and		%o1, 0x7, %o1
-	EX_ST(prefetcha	[%o0 + 0x040] %asi, #n_writes)
-	EX_ST(prefetcha	[%o0 + 0x080] %asi, #n_writes)
-	EX_ST(prefetcha	[%o0 + 0x0c0] %asi, #n_writes)
-	EX_ST(prefetcha	[%o0 + 0x100] %asi, #n_writes)
-	EX_ST(prefetcha	[%o0 + 0x140] %asi, #n_writes)
-4:	EX_ST(prefetcha	[%o0 + 0x180] %asi, #n_writes)
-	EX_ST(stxa	%g0, [%o0 + 0x00] %asi)
-	EX_ST(stxa	%g0, [%o0 + 0x08] %asi)
-	EX_ST(stxa	%g0, [%o0 + 0x10] %asi)
-	EX_ST(stxa	%g0, [%o0 + 0x18] %asi)
-	EX_ST(stxa	%g0, [%o0 + 0x20] %asi)
-	EX_ST(stxa	%g0, [%o0 + 0x28] %asi)
-	EX_ST(stxa	%g0, [%o0 + 0x30] %asi)
-	EX_ST(stxa	%g0, [%o0 + 0x38] %asi)
-	subcc		%o4, 0x40, %o4
-	bne,pt		%icc, 4b
-	 add		%o0, 0x40, %o0
-	brz,pn		%g1, 6f
-	 nop
-5:	EX_ST(stxa	%g0, [%o0 + 0x00] %asi)
-	subcc		%g1, 8, %g1
-	bne,pt		%icc, 5b
-	 add		%o0, 0x8, %o0
-6:	brz,pt		%o1, __clear_user_done
-	 nop
-__clear_user_tiny:
-1:	EX_ST(stba	%g0, [%o0 + 0x00] %asi)
-	subcc		%o1, 1, %o1
-	bne,pt		%icc, 1b
-	 add		%o0, 1, %o0
-__clear_user_done:
-	retl
-	 clr		%o0
-ENDPROC(__clear_user)
-EXPORT_SYMBOL(__clear_user)
diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S
deleted file mode 100644
index 6a5469c97246fe75305f2b1ecce3808cd78fea29..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/checksum_32.S
+++ /dev/null
@@ -1,593 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* checksum.S: Sparc optimized checksum code.
- *
- *  Copyright(C) 1995 Linus Torvalds
- *  Copyright(C) 1995 Miguel de Icaza
- *  Copyright(C) 1996 David S. Miller
- *  Copyright(C) 1997 Jakub Jelinek
- *
- * derived from:
- *	Linux/Alpha checksum c-code
- *      Linux/ix86 inline checksum assembly
- *      RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
- *	David Mosberger-Tang for optimized reference c-code
- *	BSD4.4 portable checksum routine
- */
-
-#include <asm/errno.h>
-#include <asm/export.h>
-
-#define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5)	\
-	ldd	[buf + offset + 0x00], t0;			\
-	ldd	[buf + offset + 0x08], t2;			\
-	addxcc	t0, sum, sum;					\
-	addxcc	t1, sum, sum;					\
-	ldd	[buf + offset + 0x10], t4;			\
-	addxcc	t2, sum, sum;					\
-	addxcc	t3, sum, sum;					\
-	ldd	[buf + offset + 0x18], t0;			\
-	addxcc	t4, sum, sum;					\
-	addxcc	t5, sum, sum;					\
-	addxcc	t0, sum, sum;					\
-	addxcc	t1, sum, sum;
-
-#define CSUM_LASTCHUNK(buf, offset, sum, t0, t1, t2, t3)	\
-	ldd	[buf - offset - 0x08], t0;			\
-	ldd	[buf - offset - 0x00], t2;			\
-	addxcc	t0, sum, sum;					\
-	addxcc	t1, sum, sum;					\
-	addxcc	t2, sum, sum;					\
-	addxcc	t3, sum, sum;
-
-	/* Do end cruft out of band to get better cache patterns. */
-csum_partial_end_cruft:
-	be	1f				! caller asks %o1 & 0x8
-	 andcc	%o1, 4, %g0			! nope, check for word remaining
-	ldd	[%o0], %g2			! load two
-	addcc	%g2, %o2, %o2			! add first word to sum
-	addxcc	%g3, %o2, %o2			! add second word as well
-	add	%o0, 8, %o0			! advance buf ptr
-	addx	%g0, %o2, %o2			! add in final carry
-	andcc	%o1, 4, %g0			! check again for word remaining
-1:	be	1f				! nope, skip this code
-	 andcc	%o1, 3, %o1			! check for trailing bytes
-	ld	[%o0], %g2			! load it
-	addcc	%g2, %o2, %o2			! add to sum
-	add	%o0, 4, %o0			! advance buf ptr
-	addx	%g0, %o2, %o2			! add in final carry
-	andcc	%o1, 3, %g0			! check again for trailing bytes
-1:	be	1f				! no trailing bytes, return
-	 addcc	%o1, -1, %g0			! only one byte remains?
-	bne	2f				! at least two bytes more
-	 subcc	%o1, 2, %o1			! only two bytes more?
-	b	4f				! only one byte remains
-	 or	%g0, %g0, %o4			! clear fake hword value
-2:	lduh	[%o0], %o4			! get hword
-	be	6f				! jmp if only hword remains
-	 add	%o0, 2, %o0			! advance buf ptr either way
-	sll	%o4, 16, %o4			! create upper hword
-4:	ldub	[%o0], %o5			! get final byte
-	sll	%o5, 8, %o5			! put into place
-	or	%o5, %o4, %o4			! coalese with hword (if any)
-6:	addcc	%o4, %o2, %o2			! add to sum
-1:	retl					! get outta here
-	 addx	%g0, %o2, %o0			! add final carry into retval
-
-	/* Also do alignment out of band to get better cache patterns. */
-csum_partial_fix_alignment:
-	cmp	%o1, 6
-	bl	cpte - 0x4
-	 andcc	%o0, 0x2, %g0
-	be	1f
-	 andcc	%o0, 0x4, %g0
-	lduh	[%o0 + 0x00], %g2
-	sub	%o1, 2, %o1
-	add	%o0, 2, %o0
-	sll	%g2, 16, %g2
-	addcc	%g2, %o2, %o2
-	srl	%o2, 16, %g3
-	addx	%g0, %g3, %g2
-	sll	%o2, 16, %o2
-	sll	%g2, 16, %g3
-	srl	%o2, 16, %o2
-	andcc	%o0, 0x4, %g0
-	or	%g3, %o2, %o2
-1:	be	cpa
-	 andcc	%o1, 0xffffff80, %o3
-	ld	[%o0 + 0x00], %g2
-	sub	%o1, 4, %o1
-	addcc	%g2, %o2, %o2
-	add	%o0, 4, %o0
-	addx	%g0, %o2, %o2
-	b	cpa
-	 andcc	%o1, 0xffffff80, %o3
-
-	/* The common case is to get called with a nicely aligned
-	 * buffer of size 0x20.  Follow the code path for that case.
-	 */
-	.globl	csum_partial
-	EXPORT_SYMBOL(csum_partial)
-csum_partial:			/* %o0=buf, %o1=len, %o2=sum */
-	andcc	%o0, 0x7, %g0				! alignment problems?
-	bne	csum_partial_fix_alignment		! yep, handle it
-	 sethi	%hi(cpte - 8), %g7			! prepare table jmp ptr
-	andcc	%o1, 0xffffff80, %o3			! num loop iterations
-cpa:	be	3f					! none to do
-	 andcc	%o1, 0x70, %g1				! clears carry flag too
-5:	CSUM_BIGCHUNK(%o0, 0x00, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
-	CSUM_BIGCHUNK(%o0, 0x20, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
-	CSUM_BIGCHUNK(%o0, 0x40, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
-	CSUM_BIGCHUNK(%o0, 0x60, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
-	addx	%g0, %o2, %o2				! sink in final carry
-	subcc	%o3, 128, %o3				! detract from loop iters
-	bne	5b					! more to do
-	 add	%o0, 128, %o0				! advance buf ptr
-	andcc	%o1, 0x70, %g1				! clears carry flag too
-3:	be	cpte					! nope
-	 andcc	%o1, 0xf, %g0				! anything left at all?
-	srl	%g1, 1, %o4				! compute offset
-	sub	%g7, %g1, %g7				! adjust jmp ptr
-	sub	%g7, %o4, %g7				! final jmp ptr adjust
-	jmp	%g7 + %lo(cpte - 8)			! enter the table
-	 add	%o0, %g1, %o0				! advance buf ptr
-cptbl:	CSUM_LASTCHUNK(%o0, 0x68, %o2, %g2, %g3, %g4, %g5)
-	CSUM_LASTCHUNK(%o0, 0x58, %o2, %g2, %g3, %g4, %g5)
-	CSUM_LASTCHUNK(%o0, 0x48, %o2, %g2, %g3, %g4, %g5)
-	CSUM_LASTCHUNK(%o0, 0x38, %o2, %g2, %g3, %g4, %g5)
-	CSUM_LASTCHUNK(%o0, 0x28, %o2, %g2, %g3, %g4, %g5)
-	CSUM_LASTCHUNK(%o0, 0x18, %o2, %g2, %g3, %g4, %g5)
-	CSUM_LASTCHUNK(%o0, 0x08, %o2, %g2, %g3, %g4, %g5)
-	addx	%g0, %o2, %o2				! fetch final carry
-	andcc	%o1, 0xf, %g0				! anything left at all?
-cpte:	bne	csum_partial_end_cruft			! yep, handle it
-	 andcc	%o1, 8, %g0				! check how much
-cpout:	retl						! get outta here
-	 mov	%o2, %o0				! return computed csum
-
-	.globl __csum_partial_copy_start, __csum_partial_copy_end
-__csum_partial_copy_start:
-
-/* Work around cpp -rob */
-#define ALLOC #alloc
-#define EXECINSTR #execinstr
-#define EX(x,y,a,b)				\
-98:     x,y;                                    \
-        .section .fixup,ALLOC,EXECINSTR;	\
-        .align  4;                              \
-99:     ba 30f;                                 \
-         a, b, %o3;                             \
-        .section __ex_table,ALLOC;		\
-        .align  4;                              \
-        .word   98b, 99b;                       \
-        .text;                                  \
-        .align  4
-
-#define EX2(x,y)				\
-98:     x,y;                                    \
-        .section __ex_table,ALLOC;		\
-        .align  4;                              \
-        .word   98b, 30f;                       \
-        .text;                                  \
-        .align  4
-
-#define EX3(x,y)				\
-98:     x,y;                                    \
-        .section __ex_table,ALLOC;		\
-        .align  4;                              \
-        .word   98b, 96f;                       \
-        .text;                                  \
-        .align  4
-
-#define EXT(start,end,handler)			\
-        .section __ex_table,ALLOC;		\
-        .align  4;                              \
-        .word   start, 0, end, handler;         \
-        .text;                                  \
-        .align  4
-
-	/* This aligned version executes typically in 8.5 superscalar cycles, this
-	 * is the best I can do.  I say 8.5 because the final add will pair with
-	 * the next ldd in the main unrolled loop.  Thus the pipe is always full.
-	 * If you change these macros (including order of instructions),
-	 * please check the fixup code below as well.
-	 */
-#define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7)	\
-	ldd	[src + off + 0x00], t0;							\
-	ldd	[src + off + 0x08], t2;							\
-	addxcc	t0, sum, sum;								\
-	ldd	[src + off + 0x10], t4;							\
-	addxcc	t1, sum, sum;								\
-	ldd	[src + off + 0x18], t6;							\
-	addxcc	t2, sum, sum;								\
-	std	t0, [dst + off + 0x00];							\
-	addxcc	t3, sum, sum;								\
-	std	t2, [dst + off + 0x08];							\
-	addxcc	t4, sum, sum;								\
-	std	t4, [dst + off + 0x10];							\
-	addxcc	t5, sum, sum;								\
-	std	t6, [dst + off + 0x18];							\
-	addxcc	t6, sum, sum;								\
-	addxcc	t7, sum, sum;
-
-	/* 12 superscalar cycles seems to be the limit for this case,
-	 * because of this we thus do all the ldd's together to get
-	 * Viking MXCC into streaming mode.  Ho hum...
-	 */
-#define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7)	\
-	ldd	[src + off + 0x00], t0;						\
-	ldd	[src + off + 0x08], t2;						\
-	ldd	[src + off + 0x10], t4;						\
-	ldd	[src + off + 0x18], t6;						\
-	st	t0, [dst + off + 0x00];						\
-	addxcc	t0, sum, sum;							\
-	st	t1, [dst + off + 0x04];						\
-	addxcc	t1, sum, sum;							\
-	st	t2, [dst + off + 0x08];						\
-	addxcc	t2, sum, sum;							\
-	st	t3, [dst + off + 0x0c];						\
-	addxcc	t3, sum, sum;							\
-	st	t4, [dst + off + 0x10];						\
-	addxcc	t4, sum, sum;							\
-	st	t5, [dst + off + 0x14];						\
-	addxcc	t5, sum, sum;							\
-	st	t6, [dst + off + 0x18];						\
-	addxcc	t6, sum, sum;							\
-	st	t7, [dst + off + 0x1c];						\
-	addxcc	t7, sum, sum;
-
-	/* Yuck, 6 superscalar cycles... */
-#define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3)	\
-	ldd	[src - off - 0x08], t0;				\
-	ldd	[src - off - 0x00], t2;				\
-	addxcc	t0, sum, sum;					\
-	st	t0, [dst - off - 0x08];				\
-	addxcc	t1, sum, sum;					\
-	st	t1, [dst - off - 0x04];				\
-	addxcc	t2, sum, sum;					\
-	st	t2, [dst - off - 0x00];				\
-	addxcc	t3, sum, sum;					\
-	st	t3, [dst - off + 0x04];
-
-	/* Handle the end cruft code out of band for better cache patterns. */
-cc_end_cruft:
-	be	1f
-	 andcc	%o3, 4, %g0
-	EX(ldd	[%o0 + 0x00], %g2, and %o3, 0xf)
-	add	%o1, 8, %o1
-	addcc	%g2, %g7, %g7
-	add	%o0, 8, %o0
-	addxcc	%g3, %g7, %g7
-	EX2(st	%g2, [%o1 - 0x08])
-	addx	%g0, %g7, %g7
-	andcc	%o3, 4, %g0
-	EX2(st	%g3, [%o1 - 0x04])
-1:	be	1f
-	 andcc	%o3, 3, %o3
-	EX(ld	[%o0 + 0x00], %g2, add %o3, 4)
-	add	%o1, 4, %o1
-	addcc	%g2, %g7, %g7
-	EX2(st	%g2, [%o1 - 0x04])
-	addx	%g0, %g7, %g7
-	andcc	%o3, 3, %g0
-	add	%o0, 4, %o0
-1:	be	1f
-	 addcc	%o3, -1, %g0
-	bne	2f
-	 subcc	%o3, 2, %o3
-	b	4f
-	 or	%g0, %g0, %o4
-2:	EX(lduh	[%o0 + 0x00], %o4, add %o3, 2)
-	add	%o0, 2, %o0
-	EX2(sth	%o4, [%o1 + 0x00])
-	be	6f
-	 add	%o1, 2, %o1
-	sll	%o4, 16, %o4
-4:	EX(ldub	[%o0 + 0x00], %o5, add %g0, 1)
-	EX2(stb	%o5, [%o1 + 0x00])
-	sll	%o5, 8, %o5
-	or	%o5, %o4, %o4
-6:	addcc	%o4, %g7, %g7
-1:	retl
-	 addx	%g0, %g7, %o0
-
-	/* Also, handle the alignment code out of band. */
-cc_dword_align:
-	cmp	%g1, 16
-	bge	1f
-	 srl	%g1, 1, %o3
-2:	cmp	%o3, 0
-	be,a	ccte
-	 andcc	%g1, 0xf, %o3
-	andcc	%o3, %o0, %g0	! Check %o0 only (%o1 has the same last 2 bits)
-	be,a	2b
-	 srl	%o3, 1, %o3
-1:	andcc	%o0, 0x1, %g0
-	bne	ccslow
-	 andcc	%o0, 0x2, %g0
-	be	1f
-	 andcc	%o0, 0x4, %g0
-	EX(lduh	[%o0 + 0x00], %g4, add %g1, 0)
-	sub	%g1, 2, %g1
-	EX2(sth	%g4, [%o1 + 0x00])
-	add	%o0, 2, %o0
-	sll	%g4, 16, %g4
-	addcc	%g4, %g7, %g7
-	add	%o1, 2, %o1
-	srl	%g7, 16, %g3
-	addx	%g0, %g3, %g4
-	sll	%g7, 16, %g7
-	sll	%g4, 16, %g3
-	srl	%g7, 16, %g7
-	andcc	%o0, 0x4, %g0
-	or	%g3, %g7, %g7
-1:	be	3f
-	 andcc	%g1, 0xffffff80, %g0
-	EX(ld	[%o0 + 0x00], %g4, add %g1, 0)
-	sub	%g1, 4, %g1
-	EX2(st	%g4, [%o1 + 0x00])
-	add	%o0, 4, %o0
-	addcc	%g4, %g7, %g7
-	add	%o1, 4, %o1
-	addx	%g0, %g7, %g7
-	b	3f
-	 andcc	%g1, 0xffffff80, %g0
-
-	/* Sun, you just can't beat me, you just can't.  Stop trying,
-	 * give up.  I'm serious, I am going to kick the living shit
-	 * out of you, game over, lights out.
-	 */
-	.align	8
-	.globl	__csum_partial_copy_sparc_generic
-	EXPORT_SYMBOL(__csum_partial_copy_sparc_generic)
-__csum_partial_copy_sparc_generic:
-					/* %o0=src, %o1=dest, %g1=len, %g7=sum */
-	xor	%o0, %o1, %o4		! get changing bits
-	andcc	%o4, 3, %g0		! check for mismatched alignment
-	bne	ccslow			! better this than unaligned/fixups
-	 andcc	%o0, 7, %g0		! need to align things?
-	bne	cc_dword_align		! yes, we check for short lengths there
-	 andcc	%g1, 0xffffff80, %g0	! can we use unrolled loop?
-3:	be	3f			! nope, less than one loop remains
-	 andcc	%o1, 4, %g0		! dest aligned on 4 or 8 byte boundary?
-	be	ccdbl + 4		! 8 byte aligned, kick ass
-5:	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
-	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
-	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
-	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
-10:	EXT(5b, 10b, 20f)		! note for exception handling
-	sub	%g1, 128, %g1		! detract from length
-	addx	%g0, %g7, %g7		! add in last carry bit
-	andcc	%g1, 0xffffff80, %g0	! more to csum?
-	add	%o0, 128, %o0		! advance src ptr
-	bne	5b			! we did not go negative, continue looping
-	 add	%o1, 128, %o1		! advance dest ptr
-3:	andcc	%g1, 0x70, %o2		! can use table?
-ccmerge:be	ccte			! nope, go and check for end cruft
-	 andcc	%g1, 0xf, %o3		! get low bits of length (clears carry btw)
-	srl	%o2, 1, %o4		! begin negative offset computation
-	sethi	%hi(12f), %o5		! set up table ptr end
-	add	%o0, %o2, %o0		! advance src ptr
-	sub	%o5, %o4, %o5		! continue table calculation
-	sll	%o2, 1, %g2		! constant multiplies are fun...
-	sub	%o5, %g2, %o5		! some more adjustments
-	jmp	%o5 + %lo(12f)		! jump into it, duff style, wheee...
-	 add	%o1, %o2, %o1		! advance dest ptr (carry is clear btw)
-cctbl:	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g2,%g3,%g4,%g5)
-	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x58,%g2,%g3,%g4,%g5)
-	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x48,%g2,%g3,%g4,%g5)
-	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x38,%g2,%g3,%g4,%g5)
-	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5)
-	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5)
-	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5)
-12:	EXT(cctbl, 12b, 22f)		! note for exception table handling
-	addx	%g0, %g7, %g7
-	andcc	%o3, 0xf, %g0		! check for low bits set
-ccte:	bne	cc_end_cruft		! something left, handle it out of band
-	 andcc	%o3, 8, %g0		! begin checks for that code
-	retl				! return
-	 mov	%g7, %o0		! give em the computed checksum
-ccdbl:	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
-	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
-	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
-	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
-11:	EXT(ccdbl, 11b, 21f)		! note for exception table handling
-	sub	%g1, 128, %g1		! detract from length
-	addx	%g0, %g7, %g7		! add in last carry bit
-	andcc	%g1, 0xffffff80, %g0	! more to csum?
-	add	%o0, 128, %o0		! advance src ptr
-	bne	ccdbl			! we did not go negative, continue looping
-	 add	%o1, 128, %o1		! advance dest ptr
-	b	ccmerge			! finish it off, above
-	 andcc	%g1, 0x70, %o2		! can use table? (clears carry btw)
-
-ccslow:	cmp	%g1, 0
-	mov	0, %g5
-	bleu	4f
-	 andcc	%o0, 1, %o5		
-	be,a	1f
-	 srl	%g1, 1, %g4		
-	sub	%g1, 1, %g1	
-	EX(ldub	[%o0], %g5, add %g1, 1)
-	add	%o0, 1, %o0	
-	EX2(stb	%g5, [%o1])
-	srl	%g1, 1, %g4
-	add	%o1, 1, %o1
-1:	cmp	%g4, 0		
-	be,a	3f
-	 andcc	%g1, 1, %g0
-	andcc	%o0, 2, %g0	
-	be,a	1f
-	 srl	%g4, 1, %g4
-	EX(lduh	[%o0], %o4, add %g1, 0)
-	sub	%g1, 2, %g1	
-	srl	%o4, 8, %g2
-	sub	%g4, 1, %g4	
-	EX2(stb	%g2, [%o1])
-	add	%o4, %g5, %g5
-	EX2(stb	%o4, [%o1 + 1])
-	add	%o0, 2, %o0	
-	srl	%g4, 1, %g4
-	add	%o1, 2, %o1
-1:	cmp	%g4, 0		
-	be,a	2f
-	 andcc	%g1, 2, %g0
-	EX3(ld	[%o0], %o4)
-5:	srl	%o4, 24, %g2
-	srl	%o4, 16, %g3
-	EX2(stb	%g2, [%o1])
-	srl	%o4, 8, %g2
-	EX2(stb	%g3, [%o1 + 1])
-	add	%o0, 4, %o0
-	EX2(stb	%g2, [%o1 + 2])
-	addcc	%o4, %g5, %g5
-	EX2(stb	%o4, [%o1 + 3])
-	addx	%g5, %g0, %g5	! I am now to lazy to optimize this (question it
-	add	%o1, 4, %o1	! is worthy). Maybe some day - with the sll/srl
-	subcc	%g4, 1, %g4	! tricks
-	bne,a	5b
-	 EX3(ld	[%o0], %o4)
-	sll	%g5, 16, %g2
-	srl	%g5, 16, %g5
-	srl	%g2, 16, %g2
-	andcc	%g1, 2, %g0
-	add	%g2, %g5, %g5 
-2:	be,a	3f		
-	 andcc	%g1, 1, %g0
-	EX(lduh	[%o0], %o4, and %g1, 3)
-	andcc	%g1, 1, %g0
-	srl	%o4, 8, %g2
-	add	%o0, 2, %o0	
-	EX2(stb	%g2, [%o1])
-	add	%g5, %o4, %g5
-	EX2(stb	%o4, [%o1 + 1])
-	add	%o1, 2, %o1
-3:	be,a	1f		
-	 sll	%g5, 16, %o4
-	EX(ldub	[%o0], %g2, add %g0, 1)
-	sll	%g2, 8, %o4	
-	EX2(stb	%g2, [%o1])
-	add	%g5, %o4, %g5
-	sll	%g5, 16, %o4
-1:	addcc	%o4, %g5, %g5
-	srl	%g5, 16, %o4
-	addx	%g0, %o4, %g5
-	orcc	%o5, %g0, %g0
-	be	4f
-	 srl	%g5, 8, %o4
-	and	%g5, 0xff, %g2
-	and	%o4, 0xff, %o4
-	sll	%g2, 8, %g2
-	or	%g2, %o4, %g5
-4:	addcc	%g7, %g5, %g7
-	retl	
-	 addx	%g0, %g7, %o0
-__csum_partial_copy_end:
-
-/* We do these strange calculations for the csum_*_from_user case only, ie.
- * we only bother with faults on loads... */
-
-/* o2 = ((g2%20)&3)*8
- * o3 = g1 - (g2/20)*32 - o2 */
-20:
-	cmp	%g2, 20
-	blu,a	1f
-	 and	%g2, 3, %o2
-	sub	%g1, 32, %g1
-	b	20b
-	 sub	%g2, 20, %g2
-1:
-	sll	%o2, 3, %o2
-	b	31f
-	 sub	%g1, %o2, %o3
-
-/* o2 = (!(g2 & 15) ? 0 : (((g2 & 15) + 1) & ~1)*8)
- * o3 = g1 - (g2/16)*32 - o2 */
-21:
-	andcc	%g2, 15, %o3
-	srl	%g2, 4, %g2
-	be,a	1f
-	 clr	%o2
-	add	%o3, 1, %o3
-	and	%o3, 14, %o3
-	sll	%o3, 3, %o2
-1:
-	sll	%g2, 5, %g2
-	sub	%g1, %g2, %o3
-	b	31f
-	 sub	%o3, %o2, %o3
-
-/* o0 += (g2/10)*16 - 0x70
- * 01 += (g2/10)*16 - 0x70
- * o2 = (g2 % 10) ? 8 : 0
- * o3 += 0x70 - (g2/10)*16 - o2 */
-22:
-	cmp	%g2, 10
-	blu,a	1f
-	 sub	%o0, 0x70, %o0
-	add	%o0, 16, %o0
-	add	%o1, 16, %o1
-	sub	%o3, 16, %o3
-	b	22b
-	 sub	%g2, 10, %g2
-1:
-	sub	%o1, 0x70, %o1
-	add	%o3, 0x70, %o3
-	clr	%o2
-	tst	%g2
-	bne,a	1f
-	 mov	8, %o2
-1:
-	b	31f
-	 sub	%o3, %o2, %o3
-96:
-	and	%g1, 3, %g1
-	sll	%g4, 2, %g4
-	add	%g1, %g4, %o3
-30:
-/* %o1 is dst
- * %o3 is # bytes to zero out
- * %o4 is faulting address
- * %o5 is %pc where fault occurred */
-	clr	%o2
-31:
-/* %o0 is src
- * %o1 is dst
- * %o2 is # of bytes to copy from src to dst
- * %o3 is # bytes to zero out
- * %o4 is faulting address
- * %o5 is %pc where fault occurred */
-	save	%sp, -104, %sp
-        mov     %i5, %o0
-        mov     %i7, %o1
-        mov	%i4, %o2
-        call    lookup_fault
-	 mov	%g7, %i4
-	cmp	%o0, 2
-	bne	1f	
-	 add	%g0, -EFAULT, %i5
-	tst	%i2
-	be	2f
-	 mov	%i0, %o1
-	mov	%i1, %o0
-5:
-	call	memcpy
-	 mov	%i2, %o2
-	tst	%o0
-	bne,a	2f
-	 add	%i3, %i2, %i3
-	add	%i1, %i2, %i1
-2:
-	mov	%i1, %o0
-6:
-	call	__bzero
-	 mov	%i3, %o1
-1:
-	ld	[%sp + 168], %o2		! struct_ptr of parent
-	st	%i5, [%o2]
-	ret
-	 restore
-
-        .section __ex_table,#alloc
-        .align 4
-        .word 5b,2
-	.word 6b,2
diff --git a/arch/sparc/lib/checksum_64.S b/arch/sparc/lib/checksum_64.S
deleted file mode 100644
index 9700ef1730df63fb91bc5b8c14d9a91587143b5f..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/checksum_64.S
+++ /dev/null
@@ -1,177 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* checksum.S: Sparc V9 optimized checksum code.
- *
- *  Copyright(C) 1995 Linus Torvalds
- *  Copyright(C) 1995 Miguel de Icaza
- *  Copyright(C) 1996, 2000 David S. Miller
- *  Copyright(C) 1997 Jakub Jelinek
- *
- * derived from:
- *	Linux/Alpha checksum c-code
- *      Linux/ix86 inline checksum assembly
- *      RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
- *	David Mosberger-Tang for optimized reference c-code
- *	BSD4.4 portable checksum routine
- */
-
-#include <asm/export.h>
-	.text
-
-csum_partial_fix_alignment:
-	/* We checked for zero length already, so there must be
-	 * at least one byte.
-	 */
-	be,pt		%icc, 1f
-	 nop
-	ldub		[%o0 + 0x00], %o4
-	add		%o0, 1, %o0
-	sub		%o1, 1, %o1
-1:	andcc		%o0, 0x2, %g0
-	be,pn		%icc, csum_partial_post_align
-	 cmp		%o1, 2
-	blu,pn		%icc, csum_partial_end_cruft
-	 nop
-	lduh		[%o0 + 0x00], %o5
-	add		%o0, 2, %o0
-	sub		%o1, 2, %o1
-	ba,pt		%xcc, csum_partial_post_align
-	 add		%o5, %o4, %o4
-
-	.align		32
-	.globl		csum_partial
-	.type		csum_partial,#function
-	EXPORT_SYMBOL(csum_partial)
-csum_partial:		/* %o0=buff, %o1=len, %o2=sum */
-	prefetch	[%o0 + 0x000], #n_reads
-	clr		%o4
-	prefetch	[%o0 + 0x040], #n_reads
-	brz,pn		%o1, csum_partial_finish
-	 andcc		%o0, 0x3, %g0
-
-	/* We "remember" whether the lowest bit in the address
-	 * was set in %g7.  Because if it is, we have to swap
-	 * upper and lower 8 bit fields of the sum we calculate.
-	*/
-	bne,pn		%icc, csum_partial_fix_alignment
-	 andcc		%o0, 0x1, %g7
-
-csum_partial_post_align:
-	prefetch	[%o0 + 0x080], #n_reads
-	andncc		%o1, 0x3f, %o3
-
-	prefetch	[%o0 + 0x0c0], #n_reads
-	sub		%o1, %o3, %o1
-	brz,pn		%o3, 2f
-	 prefetch	[%o0 + 0x100], #n_reads
-
-	/* So that we don't need to use the non-pairing
-	 * add-with-carry instructions we accumulate 32-bit
-	 * values into a 64-bit register.  At the end of the
-	 * loop we fold it down to 32-bits and so on.
-	 */
-	prefetch	[%o0 + 0x140], #n_reads
-1:	lduw		[%o0 + 0x00], %o5
-	lduw		[%o0 + 0x04], %g1
-	lduw		[%o0 + 0x08], %g2
-	add		%o4, %o5, %o4
-	lduw		[%o0 + 0x0c], %g3
-	add		%o4, %g1, %o4
-	lduw		[%o0 + 0x10], %o5
-	add		%o4, %g2, %o4
-	lduw		[%o0 + 0x14], %g1
-	add		%o4, %g3, %o4
-	lduw		[%o0 + 0x18], %g2
-	add		%o4, %o5, %o4
-	lduw		[%o0 + 0x1c], %g3
-	add		%o4, %g1, %o4
-	lduw		[%o0 + 0x20], %o5
-	add		%o4, %g2, %o4
-	lduw		[%o0 + 0x24], %g1
-	add		%o4, %g3, %o4
-	lduw		[%o0 + 0x28], %g2
-	add		%o4, %o5, %o4
-	lduw		[%o0 + 0x2c], %g3
-	add		%o4, %g1, %o4
-	lduw		[%o0 + 0x30], %o5
-	add		%o4, %g2, %o4
-	lduw		[%o0 + 0x34], %g1
-	add		%o4, %g3, %o4
-	lduw		[%o0 + 0x38], %g2
-	add		%o4, %o5, %o4
-	lduw		[%o0 + 0x3c], %g3
-	add		%o4, %g1, %o4
-	prefetch	[%o0 + 0x180], #n_reads
-	add		%o4, %g2, %o4
-	subcc		%o3, 0x40, %o3
-	add		%o0, 0x40, %o0
-	bne,pt		%icc, 1b
-	 add		%o4, %g3, %o4
-
-2:	and		%o1, 0x3c, %o3
-	brz,pn		%o3, 2f
-	 sub		%o1, %o3, %o1
-1:	lduw		[%o0 + 0x00], %o5
-	subcc		%o3, 0x4, %o3
-	add		%o0, 0x4, %o0
-	bne,pt		%icc, 1b
-	 add		%o4, %o5, %o4
-
-2:
-	/* fold 64-->32 */
-	srlx		%o4, 32, %o5
-	srl		%o4, 0, %o4
-	add		%o4, %o5, %o4
-	srlx		%o4, 32, %o5
-	srl		%o4, 0, %o4
-	add		%o4, %o5, %o4
-
-	/* fold 32-->16 */
-	sethi		%hi(0xffff0000), %g1
-	srl		%o4, 16, %o5
-	andn		%o4, %g1, %g2
-	add		%o5, %g2, %o4
-	srl		%o4, 16, %o5
-	andn		%o4, %g1, %g2
-	add		%o5, %g2, %o4
-
-csum_partial_end_cruft:
-	/* %o4 has the 16-bit sum we have calculated so-far.  */
-	cmp		%o1, 2
-	blu,pt		%icc, 1f
-	 nop
-	lduh		[%o0 + 0x00], %o5
-	sub		%o1, 2, %o1
-	add		%o0, 2, %o0
-	add		%o4, %o5, %o4
-1:	brz,pt		%o1, 1f
-	 nop
-	ldub		[%o0 + 0x00], %o5
-	sub		%o1, 1, %o1
-	add		%o0, 1, %o0
-	sllx		%o5, 8, %o5
-	add		%o4, %o5, %o4
-1:
-	/* fold 32-->16 */
-	sethi		%hi(0xffff0000), %g1
-	srl		%o4, 16, %o5
-	andn		%o4, %g1, %g2
-	add		%o5, %g2, %o4
-	srl		%o4, 16, %o5
-	andn		%o4, %g1, %g2
-	add		%o5, %g2, %o4
-
-1:	brz,pt		%g7, 1f
-	 nop
-
-	/* We started with an odd byte, byte-swap the result.  */
-	srl		%o4, 8, %o5
-	and		%o4, 0xff, %g1
-	sll		%g1, 8, %g1
-	or		%o5, %g1, %o4
-
-1:	addcc		%o2, %o4, %o2
-	addc		%g0, %o2, %o2
-
-csum_partial_finish:
-	retl
-	 srl		%o2, 0, %o0
diff --git a/arch/sparc/lib/clear_page.S b/arch/sparc/lib/clear_page.S
deleted file mode 100644
index 8a6c783a63014d6e241192575a14655878add9ed..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/clear_page.S
+++ /dev/null
@@ -1,107 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* clear_page.S: UltraSparc optimized clear page.
- *
- * Copyright (C) 1996, 1998, 1999, 2000, 2004 David S. Miller (davem@redhat.com)
- * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com)
- */
-
-#include <asm/visasm.h>
-#include <asm/thread_info.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/spitfire.h>
-#include <asm/head.h>
-#include <asm/export.h>
-
-	/* What we used to do was lock a TLB entry into a specific
-	 * TLB slot, clear the page with interrupts disabled, then
-	 * restore the original TLB entry.  This was great for
-	 * disturbing the TLB as little as possible, but it meant
-	 * we had to keep interrupts disabled for a long time.
-	 *
-	 * Now, we simply use the normal TLB loading mechanism,
-	 * and this makes the cpu choose a slot all by itself.
-	 * Then we do a normal TLB flush on exit.  We need only
-	 * disable preemption during the clear.
-	 */
-
-	.text
-
-	.globl		_clear_page
-	EXPORT_SYMBOL(_clear_page)
-_clear_page:		/* %o0=dest */
-	ba,pt		%xcc, clear_page_common
-	 clr		%o4
-
-	/* This thing is pretty important, it shows up
-	 * on the profiles via do_anonymous_page().
-	 */
-	.align		32
-	.globl		clear_user_page
-	EXPORT_SYMBOL(clear_user_page)
-clear_user_page:	/* %o0=dest, %o1=vaddr */
-	lduw		[%g6 + TI_PRE_COUNT], %o2
-	sethi		%hi(PAGE_OFFSET), %g2
-	sethi		%hi(PAGE_SIZE), %o4
-
-	ldx		[%g2 + %lo(PAGE_OFFSET)], %g2
-	sethi		%hi(PAGE_KERNEL_LOCKED), %g3
-
-	ldx		[%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3
-	sub		%o0, %g2, %g1		! paddr
-
-	and		%o1, %o4, %o0		! vaddr D-cache alias bit
-
-	or		%g1, %g3, %g1		! TTE data
-	sethi		%hi(TLBTEMP_BASE), %o3
-
-	add		%o2, 1, %o4
-	add		%o0, %o3, %o0		! TTE vaddr
-
-	/* Disable preemption.  */
-	mov		TLB_TAG_ACCESS, %g3
-	stw		%o4, [%g6 + TI_PRE_COUNT]
-
-	/* Load TLB entry.  */
-	rdpr		%pstate, %o4
-	wrpr		%o4, PSTATE_IE, %pstate
-	stxa		%o0, [%g3] ASI_DMMU
-	stxa		%g1, [%g0] ASI_DTLB_DATA_IN
-	sethi		%hi(KERNBASE), %g1
-	flush		%g1
-	wrpr		%o4, 0x0, %pstate
-
-	mov		1, %o4
-
-clear_page_common:
-	VISEntryHalf
-	membar		#StoreLoad | #StoreStore | #LoadStore
-	fzero		%f0
-	sethi		%hi(PAGE_SIZE/64), %o1
-	mov		%o0, %g1		! remember vaddr for tlbflush
-	fzero		%f2
-	or		%o1, %lo(PAGE_SIZE/64), %o1
-	faddd		%f0, %f2, %f4
-	fmuld		%f0, %f2, %f6
-	faddd		%f0, %f2, %f8
-	fmuld		%f0, %f2, %f10
-
-	faddd		%f0, %f2, %f12
-	fmuld		%f0, %f2, %f14
-1:	stda		%f0, [%o0 + %g0] ASI_BLK_P
-	subcc		%o1, 1, %o1
-	bne,pt		%icc, 1b
-	 add		%o0, 0x40, %o0
-	membar		#Sync
-	VISExitHalf
-
-	brz,pn		%o4, out
-	 nop
-
-	stxa		%g0, [%g1] ASI_DMMU_DEMAP
-	membar		#Sync
-	stw		%o2, [%g6 + TI_PRE_COUNT]
-
-out:	retl
-	 nop
-
diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S
deleted file mode 100644
index 66e90bf528e2bf3a4f9ba710418eee539d38c816..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/copy_in_user.S
+++ /dev/null
@@ -1,110 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* copy_in_user.S: Copy from userspace to userspace.
- *
- * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
- */
-
-#include <linux/linkage.h>
-#include <asm/asi.h>
-#include <asm/export.h>
-
-#define XCC xcc
-
-#define EX(x,y,z)		\
-98:	x,y;			\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, z;		\
-	.text;			\
-	.align 4;
-
-#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8)
-#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4)
-#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1)
-
-	.register	%g2,#scratch
-	.register	%g3,#scratch
-
-	.text
-__retl_o4_plus_8:
-	add	%o4, %o2, %o4
-	retl
-	 add	%o4, 8, %o0
-__retl_o2_plus_4:
-	retl
-	 add	%o2, 4, %o0
-__retl_o2_plus_1:
-	retl
-	 add	%o2, 1, %o0
-
-	.align	32
-
-	/* Don't try to get too fancy here, just nice and
-	 * simple.  This is predominantly used for well aligned
-	 * small copies in the compat layer.  It is also used
-	 * to copy register windows around during thread cloning.
-	 */
-
-ENTRY(raw_copy_in_user)	/* %o0=dst, %o1=src, %o2=len */
-	cmp		%o2, 0
-	be,pn		%XCC, 85f
-	 or		%o0, %o1, %o3
-	cmp		%o2, 16
-	bleu,a,pn	%XCC, 80f
-	 or		%o3, %o2, %o3
-
-	/* 16 < len <= 64 */
-	andcc		%o3, 0x7, %g0
-	bne,pn		%XCC, 90f
-	 nop
-
-	andn		%o2, 0x7, %o4
-	and		%o2, 0x7, %o2
-1:	subcc		%o4, 0x8, %o4
-	EX_O4(ldxa [%o1] %asi, %o5)
-	EX_O4(stxa %o5, [%o0] %asi)
-	add		%o1, 0x8, %o1
-	bgu,pt		%XCC, 1b
-	 add		%o0, 0x8, %o0
-	andcc		%o2, 0x4, %g0
-	be,pt		%XCC, 1f
-	 nop
-	sub		%o2, 0x4, %o2
-	EX_O2_4(lduwa [%o1] %asi, %o5)
-	EX_O2_4(stwa %o5, [%o0] %asi)
-	add		%o1, 0x4, %o1
-	add		%o0, 0x4, %o0
-1:	cmp		%o2, 0
-	be,pt		%XCC, 85f
-	 nop
-	ba,pt		%xcc, 90f
-	 nop
-
-80:	/* 0 < len <= 16 */
-	andcc		%o3, 0x3, %g0
-	bne,pn		%XCC, 90f
-	 nop
-
-82:
-	subcc		%o2, 4, %o2
-	EX_O2_4(lduwa [%o1] %asi, %g1)
-	EX_O2_4(stwa %g1, [%o0] %asi)
-	add		%o1, 4, %o1
-	bgu,pt		%XCC, 82b
-	 add		%o0, 4, %o0
-
-85:	retl
-	 clr		%o0
-
-	.align	32
-90:
-	subcc		%o2, 1, %o2
-	EX_O2_1(lduba [%o1] %asi, %g1)
-	EX_O2_1(stba %g1, [%o0] %asi)
-	add		%o1, 1, %o1
-	bgu,pt		%XCC, 90b
-	 add		%o0, 1, %o0
-	retl
-	 clr		%o0
-ENDPROC(raw_copy_in_user)
-EXPORT_SYMBOL(raw_copy_in_user)
diff --git a/arch/sparc/lib/copy_page.S b/arch/sparc/lib/copy_page.S
deleted file mode 100644
index c088e871e8e39332058d447a412c410aade19375..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/copy_page.S
+++ /dev/null
@@ -1,253 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* clear_page.S: UltraSparc optimized copy page.
- *
- * Copyright (C) 1996, 1998, 1999, 2000, 2004 David S. Miller (davem@redhat.com)
- * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com)
- */
-
-#include <asm/visasm.h>
-#include <asm/thread_info.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/spitfire.h>
-#include <asm/head.h>
-#include <asm/export.h>
-
-	/* What we used to do was lock a TLB entry into a specific
-	 * TLB slot, clear the page with interrupts disabled, then
-	 * restore the original TLB entry.  This was great for
-	 * disturbing the TLB as little as possible, but it meant
-	 * we had to keep interrupts disabled for a long time.
-	 *
-	 * Now, we simply use the normal TLB loading mechanism,
-	 * and this makes the cpu choose a slot all by itself.
-	 * Then we do a normal TLB flush on exit.  We need only
-	 * disable preemption during the clear.
-	 */
-
-#define	DCACHE_SIZE	(PAGE_SIZE * 2)
-
-#if (PAGE_SHIFT == 13)
-#define PAGE_SIZE_REM	0x80
-#elif (PAGE_SHIFT == 16)
-#define PAGE_SIZE_REM	0x100
-#else
-#error Wrong PAGE_SHIFT specified
-#endif
-
-#define TOUCH(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7)	\
-	fsrc2	%reg0, %f48; 	fsrc2	%reg1, %f50;		\
-	fsrc2	%reg2, %f52; 	fsrc2	%reg3, %f54;		\
-	fsrc2	%reg4, %f56; 	fsrc2	%reg5, %f58;		\
-	fsrc2	%reg6, %f60; 	fsrc2	%reg7, %f62;
-
-	.text
-
-	.align		32
-	.globl		copy_user_page
-	.type		copy_user_page,#function
-	EXPORT_SYMBOL(copy_user_page)
-copy_user_page:		/* %o0=dest, %o1=src, %o2=vaddr */
-	lduw		[%g6 + TI_PRE_COUNT], %o4
-	sethi		%hi(PAGE_OFFSET), %g2
-	sethi		%hi(PAGE_SIZE), %o3
-
-	ldx		[%g2 + %lo(PAGE_OFFSET)], %g2
-	sethi		%hi(PAGE_KERNEL_LOCKED), %g3
-
-	ldx		[%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3
-	sub		%o0, %g2, %g1		! dest paddr
-
-	sub		%o1, %g2, %g2		! src paddr
-
-	and		%o2, %o3, %o0		! vaddr D-cache alias bit
-	or		%g1, %g3, %g1		! dest TTE data
-
-	or		%g2, %g3, %g2		! src TTE data
-	sethi		%hi(TLBTEMP_BASE), %o3
-
-	sethi		%hi(DCACHE_SIZE), %o1
-	add		%o0, %o3, %o0		! dest TTE vaddr
-
-	add		%o4, 1, %o2
-	add		%o0, %o1, %o1		! src TTE vaddr
-
-	/* Disable preemption.  */
-	mov		TLB_TAG_ACCESS, %g3
-	stw		%o2, [%g6 + TI_PRE_COUNT]
-
-	/* Load TLB entries.  */
-	rdpr		%pstate, %o2
-	wrpr		%o2, PSTATE_IE, %pstate
-	stxa		%o0, [%g3] ASI_DMMU
-	stxa		%g1, [%g0] ASI_DTLB_DATA_IN
-	membar		#Sync
-	stxa		%o1, [%g3] ASI_DMMU
-	stxa		%g2, [%g0] ASI_DTLB_DATA_IN
-	membar		#Sync
-	wrpr		%o2, 0x0, %pstate
-
-cheetah_copy_page_insn:
-	ba,pt		%xcc, 9f
-	 nop
-
-1:
-	VISEntryHalf
-	membar		#StoreLoad | #StoreStore | #LoadStore
-	sethi		%hi((PAGE_SIZE/64)-2), %o2
-	mov		%o0, %g1
-	prefetch	[%o1 + 0x000], #one_read
-	or		%o2, %lo((PAGE_SIZE/64)-2), %o2
-	prefetch	[%o1 + 0x040], #one_read
-	prefetch	[%o1 + 0x080], #one_read
-	prefetch	[%o1 + 0x0c0], #one_read
-	ldd		[%o1 + 0x000], %f0
-	prefetch	[%o1 + 0x100], #one_read
-	ldd		[%o1 + 0x008], %f2
-	prefetch	[%o1 + 0x140], #one_read
-	ldd		[%o1 + 0x010], %f4
-	prefetch	[%o1 + 0x180], #one_read
-	fsrc2		%f0, %f16
-	ldd		[%o1 + 0x018], %f6
-	fsrc2		%f2, %f18
-	ldd		[%o1 + 0x020], %f8
-	fsrc2		%f4, %f20
-	ldd		[%o1 + 0x028], %f10
-	fsrc2		%f6, %f22
-	ldd		[%o1 + 0x030], %f12
-	fsrc2		%f8, %f24
-	ldd		[%o1 + 0x038], %f14
-	fsrc2		%f10, %f26
-	ldd		[%o1 + 0x040], %f0
-1:	ldd		[%o1 + 0x048], %f2
-	fsrc2		%f12, %f28
-	ldd		[%o1 + 0x050], %f4
-	fsrc2		%f14, %f30
-	stda		%f16, [%o0] ASI_BLK_P
-	ldd		[%o1 + 0x058], %f6
-	fsrc2		%f0, %f16
-	ldd		[%o1 + 0x060], %f8
-	fsrc2		%f2, %f18
-	ldd		[%o1 + 0x068], %f10
-	fsrc2		%f4, %f20
-	ldd		[%o1 + 0x070], %f12
-	fsrc2		%f6, %f22
-	ldd		[%o1 + 0x078], %f14
-	fsrc2		%f8, %f24
-	ldd		[%o1 + 0x080], %f0
-	prefetch	[%o1 + 0x180], #one_read
-	fsrc2		%f10, %f26
-	subcc		%o2, 1, %o2
-	add		%o0, 0x40, %o0
-	bne,pt		%xcc, 1b
-	 add		%o1, 0x40, %o1
-
-	ldd		[%o1 + 0x048], %f2
-	fsrc2		%f12, %f28
-	ldd		[%o1 + 0x050], %f4
-	fsrc2		%f14, %f30
-	stda		%f16, [%o0] ASI_BLK_P
-	ldd		[%o1 + 0x058], %f6
-	fsrc2		%f0, %f16
-	ldd		[%o1 + 0x060], %f8
-	fsrc2		%f2, %f18
-	ldd		[%o1 + 0x068], %f10
-	fsrc2		%f4, %f20
-	ldd		[%o1 + 0x070], %f12
-	fsrc2		%f6, %f22
-	add		%o0, 0x40, %o0
-	ldd		[%o1 + 0x078], %f14
-	fsrc2		%f8, %f24
-	fsrc2		%f10, %f26
-	fsrc2		%f12, %f28
-	fsrc2		%f14, %f30
-	stda		%f16, [%o0] ASI_BLK_P
-	membar		#Sync
-	VISExitHalf
-	ba,pt		%xcc, 5f
-	 nop
-
-9:
-	VISEntry
-	ldub		[%g6 + TI_FAULT_CODE], %g3
-	mov		%o0, %g1
-	cmp		%g3, 0
-	rd		%asi, %g3
-	be,a,pt		%icc, 1f
-	 wr		%g0, ASI_BLK_P, %asi
-	wr		%g0, ASI_BLK_COMMIT_P, %asi
-1:	ldda		[%o1] ASI_BLK_P, %f0
-	add		%o1, 0x40, %o1
-	ldda		[%o1] ASI_BLK_P, %f16
-	add		%o1, 0x40, %o1
-	sethi		%hi(PAGE_SIZE), %o2
-1:	TOUCH(f0, f2, f4, f6, f8, f10, f12, f14)
-	ldda		[%o1] ASI_BLK_P, %f32
-	stda		%f48, [%o0] %asi
-	add		%o1, 0x40, %o1
-	sub		%o2, 0x40, %o2
-	add		%o0, 0x40, %o0
-	TOUCH(f16, f18, f20, f22, f24, f26, f28, f30)
-	ldda		[%o1] ASI_BLK_P, %f0
-	stda		%f48, [%o0] %asi
-	add		%o1, 0x40, %o1
-	sub		%o2, 0x40, %o2
-	add		%o0, 0x40, %o0
-	TOUCH(f32, f34, f36, f38, f40, f42, f44, f46)
-	ldda		[%o1] ASI_BLK_P, %f16
-	stda		%f48, [%o0] %asi
-	sub		%o2, 0x40, %o2
-	add		%o1, 0x40, %o1
-	cmp		%o2, PAGE_SIZE_REM
-	bne,pt		%xcc, 1b
-	 add		%o0, 0x40, %o0
-#if (PAGE_SHIFT == 16)
-	TOUCH(f0, f2, f4, f6, f8, f10, f12, f14)
-	ldda		[%o1] ASI_BLK_P, %f32
-	stda		%f48, [%o0] %asi
-	add		%o1, 0x40, %o1
-	sub		%o2, 0x40, %o2
-	add		%o0, 0x40, %o0
-	TOUCH(f16, f18, f20, f22, f24, f26, f28, f30)
-	ldda		[%o1] ASI_BLK_P, %f0
-	stda		%f48, [%o0] %asi
-	add		%o1, 0x40, %o1
-	sub		%o2, 0x40, %o2
-	add		%o0, 0x40, %o0
-	membar		#Sync
-	stda		%f32, [%o0] %asi
-	add		%o0, 0x40, %o0
-	stda		%f0, [%o0] %asi
-#else
-	membar		#Sync
-	stda		%f0, [%o0] %asi
-	add		%o0, 0x40, %o0
-	stda		%f16, [%o0] %asi
-#endif
-	membar		#Sync
-	wr		%g3, 0x0, %asi
-	VISExit
-
-5:
-	stxa		%g0, [%g1] ASI_DMMU_DEMAP
-	membar		#Sync
-
-	sethi		%hi(DCACHE_SIZE), %g2
-	stxa		%g0, [%g1 + %g2] ASI_DMMU_DEMAP
-	membar		#Sync
-
-	retl
-	 stw		%o4, [%g6 + TI_PRE_COUNT]
-
-	.size		copy_user_page, .-copy_user_page
-
-	.globl		cheetah_patch_copy_page
-cheetah_patch_copy_page:
-	sethi		%hi(0x01000000), %o1	! NOP
-	sethi		%hi(cheetah_copy_page_insn), %o0
-	or		%o0, %lo(cheetah_copy_page_insn), %o0
-	stw		%o1, [%o0]
-	membar		#StoreStore
-	flush		%o0
-	retl
-	 nop
diff --git a/arch/sparc/lib/copy_user.S b/arch/sparc/lib/copy_user.S
deleted file mode 100644
index dc72f2b970b7b3328b0ccc1566c2d64951669abf..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/copy_user.S
+++ /dev/null
@@ -1,485 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* copy_user.S: Sparc optimized copy_from_user and copy_to_user code.
- *
- *  Copyright(C) 1995 Linus Torvalds
- *  Copyright(C) 1996 David S. Miller
- *  Copyright(C) 1996 Eddie C. Dost
- *  Copyright(C) 1996,1998 Jakub Jelinek
- *
- * derived from:
- *	e-mail between David and Eddie.
- *
- * Returns 0 if successful, otherwise count of bytes not copied yet
- */
-
-#include <asm/ptrace.h>
-#include <asm/asmmacro.h>
-#include <asm/page.h>
-#include <asm/thread_info.h>
-#include <asm/export.h>
-
-/* Work around cpp -rob */
-#define ALLOC #alloc
-#define EXECINSTR #execinstr
-#define EX(x,y,a,b) 				\
-98: 	x,y;					\
-	.section .fixup,ALLOC,EXECINSTR;	\
-	.align	4;				\
-99:	ba fixupretl;				\
-	 a, b, %g3;				\
-	.section __ex_table,ALLOC;		\
-	.align	4;				\
-	.word	98b, 99b;			\
-	.text;					\
-	.align	4
-
-#define EX2(x,y,c,d,e,a,b) 			\
-98: 	x,y;					\
-	.section .fixup,ALLOC,EXECINSTR;	\
-	.align	4;				\
-99:	c, d, e;				\
-	ba fixupretl;				\
-	 a, b, %g3;				\
-	.section __ex_table,ALLOC;		\
-	.align	4;				\
-	.word	98b, 99b;			\
-	.text;					\
-	.align	4
-
-#define EXO2(x,y) 				\
-98: 	x, y;					\
-	.section __ex_table,ALLOC;		\
-	.align	4;				\
-	.word	98b, 97f;			\
-	.text;					\
-	.align	4
-
-#define EXT(start,end,handler)			\
-	.section __ex_table,ALLOC;		\
-	.align	4;				\
-	.word	start, 0, end, handler;		\
-	.text;					\
-	.align	4
-
-/* Please do not change following macros unless you change logic used
- * in .fixup at the end of this file as well
- */
-
-/* Both these macros have to start with exactly the same insn */
-#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
-	ldd	[%src + (offset) + 0x00], %t0; \
-	ldd	[%src + (offset) + 0x08], %t2; \
-	ldd	[%src + (offset) + 0x10], %t4; \
-	ldd	[%src + (offset) + 0x18], %t6; \
-	st	%t0, [%dst + (offset) + 0x00]; \
-	st	%t1, [%dst + (offset) + 0x04]; \
-	st	%t2, [%dst + (offset) + 0x08]; \
-	st	%t3, [%dst + (offset) + 0x0c]; \
-	st	%t4, [%dst + (offset) + 0x10]; \
-	st	%t5, [%dst + (offset) + 0x14]; \
-	st	%t6, [%dst + (offset) + 0x18]; \
-	st	%t7, [%dst + (offset) + 0x1c];
-
-#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
-	ldd	[%src + (offset) + 0x00], %t0; \
-	ldd	[%src + (offset) + 0x08], %t2; \
-	ldd	[%src + (offset) + 0x10], %t4; \
-	ldd	[%src + (offset) + 0x18], %t6; \
-	std	%t0, [%dst + (offset) + 0x00]; \
-	std	%t2, [%dst + (offset) + 0x08]; \
-	std	%t4, [%dst + (offset) + 0x10]; \
-	std	%t6, [%dst + (offset) + 0x18];
-
-#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
-	ldd	[%src - (offset) - 0x10], %t0; \
-	ldd	[%src - (offset) - 0x08], %t2; \
-	st	%t0, [%dst - (offset) - 0x10]; \
-	st	%t1, [%dst - (offset) - 0x0c]; \
-	st	%t2, [%dst - (offset) - 0x08]; \
-	st	%t3, [%dst - (offset) - 0x04];
-
-#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
-	lduh	[%src + (offset) + 0x00], %t0; \
-	lduh	[%src + (offset) + 0x02], %t1; \
-	lduh	[%src + (offset) + 0x04], %t2; \
-	lduh	[%src + (offset) + 0x06], %t3; \
-	sth	%t0, [%dst + (offset) + 0x00]; \
-	sth	%t1, [%dst + (offset) + 0x02]; \
-	sth	%t2, [%dst + (offset) + 0x04]; \
-	sth	%t3, [%dst + (offset) + 0x06];
-
-#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
-	ldub	[%src - (offset) - 0x02], %t0; \
-	ldub	[%src - (offset) - 0x01], %t1; \
-	stb	%t0, [%dst - (offset) - 0x02]; \
-	stb	%t1, [%dst - (offset) - 0x01];
-
-	.text
-	.align	4
-
-	.globl  __copy_user_begin
-__copy_user_begin:
-
-	.globl	__copy_user
-	EXPORT_SYMBOL(__copy_user)
-dword_align:
-	andcc	%o1, 1, %g0
-	be	4f
-	 andcc	%o1, 2, %g0
-
-	EXO2(ldub [%o1], %g2)
-	add	%o1, 1, %o1
-	EXO2(stb %g2, [%o0])
-	sub	%o2, 1, %o2
-	bne	3f
-	 add	%o0, 1, %o0
-
-	EXO2(lduh [%o1], %g2)
-	add	%o1, 2, %o1
-	EXO2(sth %g2, [%o0])
-	sub	%o2, 2, %o2
-	b	3f
-	 add	%o0, 2, %o0
-4:
-	EXO2(lduh [%o1], %g2)
-	add	%o1, 2, %o1
-	EXO2(sth %g2, [%o0])
-	sub	%o2, 2, %o2
-	b	3f
-	 add	%o0, 2, %o0
-
-__copy_user:	/* %o0=dst %o1=src %o2=len */
-	xor	%o0, %o1, %o4
-1:
-	andcc	%o4, 3, %o5
-2:
-	bne	cannot_optimize
-	 cmp	%o2, 15
-
-	bleu	short_aligned_end
-	 andcc	%o1, 3, %g0
-
-	bne	dword_align
-3:
-	 andcc	%o1, 4, %g0
-
-	be	2f
-	 mov	%o2, %g1
-
-	EXO2(ld [%o1], %o4)
-	sub	%g1, 4, %g1
-	EXO2(st %o4, [%o0])
-	add	%o1, 4, %o1
-	add	%o0, 4, %o0
-2:
-	andcc	%g1, 0xffffff80, %g7
-	be	3f
-	 andcc	%o0, 4, %g0
-
-	be	ldd_std + 4
-5:
-	MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
-	MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
-	MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
-	MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-80:
-	EXT(5b, 80b, 50f)
-	subcc	%g7, 128, %g7
-	add	%o1, 128, %o1
-	bne	5b
-	 add	%o0, 128, %o0
-3:
-	andcc	%g1, 0x70, %g7
-	be	copy_user_table_end
-	 andcc	%g1, 8, %g0
-
-	sethi	%hi(copy_user_table_end), %o5
-	srl	%g7, 1, %o4
-	add	%g7, %o4, %o4
-	add	%o1, %g7, %o1
-	sub	%o5, %o4, %o5
-	jmpl	%o5 + %lo(copy_user_table_end), %g0
-	 add	%o0, %g7, %o0
-
-copy_user_table:
-	MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
-	MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
-	MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
-	MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
-	MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
-	MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
-	MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
-copy_user_table_end:
-	EXT(copy_user_table, copy_user_table_end, 51f)
-	be	copy_user_last7
-	 andcc	%g1, 4, %g0
-
-	EX(ldd	[%o1], %g2, and %g1, 0xf)
-	add	%o0, 8, %o0
-	add	%o1, 8, %o1
-	EX(st	%g2, [%o0 - 0x08], and %g1, 0xf)
-	EX2(st	%g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4)
-copy_user_last7:
-	be	1f
-	 andcc	%g1, 2, %g0
-
-	EX(ld	[%o1], %g2, and %g1, 7)
-	add	%o1, 4, %o1
-	EX(st	%g2, [%o0], and %g1, 7)
-	add	%o0, 4, %o0
-1:
-	be	1f
-	 andcc	%g1, 1, %g0
-
-	EX(lduh	[%o1], %g2, and %g1, 3)
-	add	%o1, 2, %o1
-	EX(sth	%g2, [%o0], and %g1, 3)
-	add	%o0, 2, %o0
-1:
-	be	1f
-	 nop
-
-	EX(ldub	[%o1], %g2, add %g0, 1)
-	EX(stb	%g2, [%o0], add %g0, 1)
-1:
-	retl
- 	 clr	%o0
-
-ldd_std:
-	MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
-	MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
-	MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
-	MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-81:
-	EXT(ldd_std, 81b, 52f)
-	subcc	%g7, 128, %g7
-	add	%o1, 128, %o1
-	bne	ldd_std
-	 add	%o0, 128, %o0
-
-	andcc	%g1, 0x70, %g7
-	be	copy_user_table_end
-	 andcc	%g1, 8, %g0
-
-	sethi	%hi(copy_user_table_end), %o5
-	srl	%g7, 1, %o4
-	add	%g7, %o4, %o4
-	add	%o1, %g7, %o1
-	sub	%o5, %o4, %o5
-	jmpl	%o5 + %lo(copy_user_table_end), %g0
-	 add	%o0, %g7, %o0
-
-cannot_optimize:
-	bleu	short_end
-	 cmp	%o5, 2
-
-	bne	byte_chunk
-	 and	%o2, 0xfffffff0, %o3
-	 
-	andcc	%o1, 1, %g0
-	be	10f
-	 nop
-
-	EXO2(ldub [%o1], %g2)
-	add	%o1, 1, %o1
-	EXO2(stb %g2, [%o0])
-	sub	%o2, 1, %o2
-	andcc	%o2, 0xfffffff0, %o3
-	be	short_end
-	 add	%o0, 1, %o0
-10:
-	MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
-	MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5)
-82:
-	EXT(10b, 82b, 53f)
-	subcc	%o3, 0x10, %o3
-	add	%o1, 0x10, %o1
-	bne	10b
-	 add	%o0, 0x10, %o0
-	b	2f
-	 and	%o2, 0xe, %o3
-	
-byte_chunk:
-	MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3)
-83:
-	EXT(byte_chunk, 83b, 54f)
-	subcc	%o3, 0x10, %o3
-	add	%o1, 0x10, %o1
-	bne	byte_chunk
-	 add	%o0, 0x10, %o0
-
-short_end:
-	and	%o2, 0xe, %o3
-2:
-	sethi	%hi(short_table_end), %o5
-	sll	%o3, 3, %o4
-	add	%o0, %o3, %o0
-	sub	%o5, %o4, %o5
-	add	%o1, %o3, %o1
-	jmpl	%o5 + %lo(short_table_end), %g0
-	 andcc	%o2, 1, %g0
-84:
-	MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
-short_table_end:
-	EXT(84b, short_table_end, 55f)
-	be	1f
-	 nop
-	EX(ldub	[%o1], %g2, add %g0, 1)
-	EX(stb	%g2, [%o0], add %g0, 1)
-1:
-	retl
- 	 clr	%o0
-
-short_aligned_end:
-	bne	short_end
-	 andcc	%o2, 8, %g0
-
-	be	1f
-	 andcc	%o2, 4, %g0
-
-	EXO2(ld	[%o1 + 0x00], %g2)
-	EXO2(ld	[%o1 + 0x04], %g3)
-	add	%o1, 8, %o1
-	EXO2(st	%g2, [%o0 + 0x00])
-	EX(st	%g3, [%o0 + 0x04], sub %o2, 4)
-	add	%o0, 8, %o0
-1:
-	b	copy_user_last7
-	 mov	%o2, %g1
-
-	.section .fixup,#alloc,#execinstr
-	.align	4
-97:
-	mov	%o2, %g3
-fixupretl:
-	retl
-	 mov	%g3, %o0
-
-/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */
-50:
-/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK
- * happens. This is derived from the amount ldd reads, st stores, etc.
- * x = g2 % 12;
- * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4);
- * o0 += (g2 / 12) * 32;
- */
-	cmp	%g2, 12
-	add	%o0, %g7, %o0
-	bcs	1f
-	 cmp	%g2, 24
-	bcs	2f
-	 cmp	%g2, 36
-	bcs	3f
-	 nop
-	sub	%g2, 12, %g2
-	sub	%g7, 32, %g7
-3:	sub	%g2, 12, %g2
-	sub	%g7, 32, %g7
-2:	sub	%g2, 12, %g2
-	sub	%g7, 32, %g7
-1:	cmp	%g2, 4
-	bcs,a	60f
-	 clr	%g2
-	sub	%g2, 4, %g2
-	sll	%g2, 2, %g2
-60:	and	%g1, 0x7f, %g3
-	sub	%o0, %g7, %o0
-	add	%g3, %g7, %g3
-	ba	fixupretl
-	 sub	%g3, %g2, %g3
-51:
-/* i = 41 - g2; j = i % 6;
- * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16;
- * o0 -= (i / 6) * 16 + 16;
- */
-	neg	%g2
-	and	%g1, 0xf, %g1
-	add	%g2, 41, %g2
-	add	%o0, %g1, %o0
-1:	cmp	%g2, 6
-	bcs,a	2f
-	 cmp	%g2, 4
-	add	%g1, 16, %g1
-	b	1b
-	 sub	%g2, 6, %g2
-2:	bcc,a	2f
-	 mov	16, %g2
-	inc	%g2
-	sll	%g2, 2, %g2
-2:	add	%g1, %g2, %g3
-	ba	fixupretl
-	 sub	%o0, %g3, %o0
-52:
-/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0;
-   o0 += (g2 / 8) * 32 */
-	andn	%g2, 7, %g4
-	add	%o0, %g7, %o0
-	andcc	%g2, 4, %g0
-	and	%g2, 3, %g2
-	sll	%g4, 2, %g4
-	sll	%g2, 3, %g2
-	bne	60b
-	 sub	%g7, %g4, %g7
-	ba	60b
-	 clr	%g2
-53:
-/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0;
-   o0 += (g2 & 8) */
-	and	%g2, 3, %g4
-	andcc	%g2, 4, %g0
-	and	%g2, 8, %g2
-	sll	%g4, 1, %g4
-	be	1f
-	 add	%o0, %g2, %o0
-	add	%g2, %g4, %g2
-1:	and	%o2, 0xf, %g3
-	add	%g3, %o3, %g3
-	ba	fixupretl
-	 sub	%g3, %g2, %g3
-54:
-/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0;
-   o0 += (g2 / 4) * 2 */
-	srl	%g2, 2, %o4
-	and	%g2, 1, %o5
-	srl	%g2, 1, %g2
-	add	%o4, %o4, %o4
-	and	%o5, %g2, %o5
-	and	%o2, 0xf, %o2
-	add	%o0, %o4, %o0
-	sub	%o3, %o5, %o3
-	sub	%o2, %o4, %o2
-	ba	fixupretl
-	 add	%o2, %o3, %g3
-55:
-/* i = 27 - g2;
-   g3 = (o2 & 1) + i / 4 * 2 + !(i & 3);
-   o0 -= i / 4 * 2 + 1 */
-	neg	%g2
-	and	%o2, 1, %o2
-	add	%g2, 27, %g2
-	srl	%g2, 2, %o5
-	andcc	%g2, 3, %g0
-	mov	1, %g2
-	add	%o5, %o5, %o5
-	be,a	1f
-	 clr	%g2
-1:	add	%g2, %o5, %g3
-	sub	%o0, %g3, %o0
-	ba	fixupretl
-	 add	%g3, %o2, %g3
-
-	.globl  __copy_user_end
-__copy_user_end:
diff --git a/arch/sparc/lib/csum_copy.S b/arch/sparc/lib/csum_copy.S
deleted file mode 100644
index 26c644ba3ecb0acc4c42bdc47cf7f7f5a12a6525..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/csum_copy.S
+++ /dev/null
@@ -1,314 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* csum_copy.S: Checksum+copy code for sparc64
- *
- * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
- */
-
-#include <asm/export.h>
-
-#ifdef __KERNEL__
-#define GLOBAL_SPARE	%g7
-#else
-#define GLOBAL_SPARE	%g5
-#endif
-
-#ifndef EX_LD
-#define EX_LD(x)	x
-#endif
-
-#ifndef EX_ST
-#define EX_ST(x)	x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)	x
-#endif
-
-#ifndef LOAD
-#define LOAD(type,addr,dest)	type [addr], dest
-#endif
-
-#ifndef STORE
-#define STORE(type,src,addr)	type src, [addr]
-#endif
-
-#ifndef FUNC_NAME
-#define FUNC_NAME	csum_partial_copy_nocheck
-#endif
-
-	.register	%g2, #scratch
-	.register	%g3, #scratch
-
-	.text
-
-90:
-	/* We checked for zero length already, so there must be
-	 * at least one byte.
-	 */
-	be,pt		%icc, 1f
-	 nop
-	EX_LD(LOAD(ldub, %o0 + 0x00, %o4))
-	add		%o0, 1, %o0
-	sub		%o2, 1, %o2
-	EX_ST(STORE(stb, %o4, %o1 + 0x00))
-	add		%o1, 1, %o1
-1:	andcc		%o0, 0x2, %g0
-	be,pn		%icc, 80f
-	 cmp		%o2, 2
-	blu,pn		%icc, 60f
-	 nop
-	EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
-	add		%o0, 2, %o0
-	sub		%o2, 2, %o2
-	EX_ST(STORE(sth, %o5, %o1 + 0x00))
-	add		%o1, 2, %o1
-	ba,pt		%xcc, 80f
-	 add		%o5, %o4, %o4
-
-	.globl		FUNC_NAME
-	.type		FUNC_NAME,#function
-	EXPORT_SYMBOL(FUNC_NAME)
-FUNC_NAME:		/* %o0=src, %o1=dst, %o2=len, %o3=sum */
-	LOAD(prefetch, %o0 + 0x000, #n_reads)
-	xor		%o0, %o1, %g1
-	clr		%o4
-	andcc		%g1, 0x3, %g0
-	bne,pn		%icc, 95f
-	 LOAD(prefetch, %o0 + 0x040, #n_reads)
-	
-	brz,pn		%o2, 70f
-	 andcc		%o0, 0x3, %g0
-
-	/* We "remember" whether the lowest bit in the address
-	 * was set in GLOBAL_SPARE.  Because if it is, we have to swap
-	 * upper and lower 8 bit fields of the sum we calculate.
-	*/
-	bne,pn		%icc, 90b
-	 andcc		%o0, 0x1, GLOBAL_SPARE
-
-80:
-	LOAD(prefetch, %o0 + 0x080, #n_reads)
-	andncc		%o2, 0x3f, %g3
-
-	LOAD(prefetch, %o0 + 0x0c0, #n_reads)
-	sub		%o2, %g3, %o2
-	brz,pn		%g3, 2f
-	 LOAD(prefetch, %o0 + 0x100, #n_reads)
-
-	/* So that we don't need to use the non-pairing
-	 * add-with-carry instructions we accumulate 32-bit
-	 * values into a 64-bit register.  At the end of the
-	 * loop we fold it down to 32-bits and so on.
-	 */
-	ba,pt		%xcc, 1f
-	LOAD(prefetch, %o0 + 0x140, #n_reads)
-
-	.align		32
-1:	EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
-	EX_LD(LOAD(lduw, %o0 + 0x04, %g1))
-	EX_LD(LOAD(lduw, %o0 + 0x08, %g2))
-	add		%o4, %o5, %o4
-	EX_ST(STORE(stw, %o5, %o1 + 0x00))
-	EX_LD(LOAD(lduw, %o0 + 0x0c, %o5))
-	add		%o4, %g1, %o4
-	EX_ST(STORE(stw, %g1, %o1 + 0x04))
-	EX_LD(LOAD(lduw, %o0 + 0x10, %g1))
-	add		%o4, %g2, %o4
-	EX_ST(STORE(stw, %g2, %o1 + 0x08))
-	EX_LD(LOAD(lduw, %o0 + 0x14, %g2))
-	add		%o4, %o5, %o4
-	EX_ST(STORE(stw, %o5, %o1 + 0x0c))
-	EX_LD(LOAD(lduw, %o0 + 0x18, %o5))
-	add		%o4, %g1, %o4
-	EX_ST(STORE(stw, %g1, %o1 + 0x10))
-	EX_LD(LOAD(lduw, %o0 + 0x1c, %g1))
-	add		%o4, %g2, %o4
-	EX_ST(STORE(stw, %g2, %o1 + 0x14))
-	EX_LD(LOAD(lduw, %o0 + 0x20, %g2))
-	add		%o4, %o5, %o4
-	EX_ST(STORE(stw, %o5, %o1 + 0x18))
-	EX_LD(LOAD(lduw, %o0 + 0x24, %o5))
-	add		%o4, %g1, %o4
-	EX_ST(STORE(stw, %g1, %o1 + 0x1c))
-	EX_LD(LOAD(lduw, %o0 + 0x28, %g1))
-	add		%o4, %g2, %o4
-	EX_ST(STORE(stw, %g2, %o1 + 0x20))
-	EX_LD(LOAD(lduw, %o0 + 0x2c, %g2))
-	add		%o4, %o5, %o4
-	EX_ST(STORE(stw, %o5, %o1 + 0x24))
-	EX_LD(LOAD(lduw, %o0 + 0x30, %o5))
-	add		%o4, %g1, %o4
-	EX_ST(STORE(stw, %g1, %o1 + 0x28))
-	EX_LD(LOAD(lduw, %o0 + 0x34, %g1))
-	add		%o4, %g2, %o4
-	EX_ST(STORE(stw, %g2, %o1 + 0x2c))
-	EX_LD(LOAD(lduw, %o0 + 0x38, %g2))
-	add		%o4, %o5, %o4
-	EX_ST(STORE(stw, %o5, %o1 + 0x30))
-	EX_LD(LOAD(lduw, %o0 + 0x3c, %o5))
-	add		%o4, %g1, %o4
-	EX_ST(STORE(stw, %g1, %o1 + 0x34))
-	LOAD(prefetch, %o0 + 0x180, #n_reads)
-	add		%o4, %g2, %o4
-	EX_ST(STORE(stw, %g2, %o1 + 0x38))
-	subcc		%g3, 0x40, %g3
-	add		%o0, 0x40, %o0
-	add		%o4, %o5, %o4
-	EX_ST(STORE(stw, %o5, %o1 + 0x3c))
-	bne,pt		%icc, 1b
-	 add		%o1, 0x40, %o1
-
-2:	and		%o2, 0x3c, %g3
-	brz,pn		%g3, 2f
-	 sub		%o2, %g3, %o2
-1:	EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
-	subcc		%g3, 0x4, %g3
-	add		%o0, 0x4, %o0
-	add		%o4, %o5, %o4
-	EX_ST(STORE(stw, %o5, %o1 + 0x00))
-	bne,pt		%icc, 1b
-	 add		%o1, 0x4, %o1
-
-2:
-	/* fold 64-->32 */
-	srlx		%o4, 32, %o5
-	srl		%o4, 0, %o4
-	add		%o4, %o5, %o4
-	srlx		%o4, 32, %o5
-	srl		%o4, 0, %o4
-	add		%o4, %o5, %o4
-
-	/* fold 32-->16 */
-	sethi		%hi(0xffff0000), %g1
-	srl		%o4, 16, %o5
-	andn		%o4, %g1, %g2
-	add		%o5, %g2, %o4
-	srl		%o4, 16, %o5
-	andn		%o4, %g1, %g2
-	add		%o5, %g2, %o4
-
-60:
-	/* %o4 has the 16-bit sum we have calculated so-far.  */
-	cmp		%o2, 2
-	blu,pt		%icc, 1f
-	 nop
-	EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
-	sub		%o2, 2, %o2
-	add		%o0, 2, %o0
-	add		%o4, %o5, %o4
-	EX_ST(STORE(sth, %o5, %o1 + 0x00))
-	add		%o1, 0x2, %o1
-1:	brz,pt		%o2, 1f
-	 nop
-	EX_LD(LOAD(ldub, %o0 + 0x00, %o5))
-	sub		%o2, 1, %o2
-	add		%o0, 1, %o0
-	EX_ST(STORE(stb, %o5, %o1 + 0x00))
-	sllx		%o5, 8, %o5
-	add		%o1, 1, %o1
-	add		%o4, %o5, %o4
-1:
-	/* fold 32-->16 */
-	sethi		%hi(0xffff0000), %g1
-	srl		%o4, 16, %o5
-	andn		%o4, %g1, %g2
-	add		%o5, %g2, %o4
-	srl		%o4, 16, %o5
-	andn		%o4, %g1, %g2
-	add		%o5, %g2, %o4
-
-1:	brz,pt		GLOBAL_SPARE, 1f
-	 nop
-
-	/* We started with an odd byte, byte-swap the result.  */
-	srl		%o4, 8, %o5
-	and		%o4, 0xff, %g1
-	sll		%g1, 8, %g1
-	or		%o5, %g1, %o4
-
-1:	addcc		%o3, %o4, %o3
-	addc		%g0, %o3, %o3
-
-70:
-	retl
-	 srl		%o3, 0, %o0
-
-95:	mov		0, GLOBAL_SPARE
-	brlez,pn	%o2, 4f
-	 andcc		%o0, 1, %o5		
-	be,a,pt		%icc, 1f
-	 srl		%o2, 1, %g1		
-	sub		%o2, 1, %o2	
-	EX_LD(LOAD(ldub, %o0, GLOBAL_SPARE))
-	add		%o0, 1, %o0	
-	EX_ST(STORE(stb, GLOBAL_SPARE, %o1))
-	srl		%o2, 1, %g1
-	add		%o1, 1, %o1
-1:	brz,a,pn	%g1, 3f
-	 andcc		%o2, 1, %g0
-	andcc		%o0, 2, %g0	
-	be,a,pt		%icc, 1f
-	 srl		%g1, 1, %g1
-	EX_LD(LOAD(lduh, %o0, %o4))
-	sub		%o2, 2, %o2	
-	srl		%o4, 8, %g2
-	sub		%g1, 1, %g1	
-	EX_ST(STORE(stb, %g2, %o1))
-	add		%o4, GLOBAL_SPARE, GLOBAL_SPARE
-	EX_ST(STORE(stb, %o4, %o1 + 1))
-	add		%o0, 2, %o0	
-	srl		%g1, 1, %g1
-	add		%o1, 2, %o1
-1:	brz,a,pn	%g1, 2f		
-	 andcc		%o2, 2, %g0
-	EX_LD(LOAD(lduw, %o0, %o4))
-5:	srl		%o4, 24, %g2
-	srl		%o4, 16, %g3
-	EX_ST(STORE(stb, %g2, %o1))
-	srl		%o4, 8, %g2
-	EX_ST(STORE(stb, %g3, %o1 + 1))
-	add		%o0, 4, %o0
-	EX_ST(STORE(stb, %g2, %o1 + 2))
-	addcc		%o4, GLOBAL_SPARE, GLOBAL_SPARE
-	EX_ST(STORE(stb, %o4, %o1 + 3))
-	addc		GLOBAL_SPARE, %g0, GLOBAL_SPARE
-	add		%o1, 4, %o1
-	subcc		%g1, 1, %g1
-	bne,a,pt	%icc, 5b
-	 EX_LD(LOAD(lduw, %o0, %o4))
-	sll		GLOBAL_SPARE, 16, %g2
-	srl		GLOBAL_SPARE, 16, GLOBAL_SPARE
-	srl		%g2, 16, %g2
-	andcc		%o2, 2, %g0
-	add		%g2, GLOBAL_SPARE, GLOBAL_SPARE 
-2:	be,a,pt		%icc, 3f		
-	 andcc		%o2, 1, %g0
-	EX_LD(LOAD(lduh, %o0, %o4))
-	andcc		%o2, 1, %g0
-	srl		%o4, 8, %g2
-	add		%o0, 2, %o0	
-	EX_ST(STORE(stb, %g2, %o1))
-	add		GLOBAL_SPARE, %o4, GLOBAL_SPARE
-	EX_ST(STORE(stb, %o4, %o1 + 1))
-	add		%o1, 2, %o1
-3:	be,a,pt		%icc, 1f		
-	 sll		GLOBAL_SPARE, 16, %o4
-	EX_LD(LOAD(ldub, %o0, %g2))
-	sll		%g2, 8, %o4	
-	EX_ST(STORE(stb, %g2, %o1))
-	add		GLOBAL_SPARE, %o4, GLOBAL_SPARE
-	sll		GLOBAL_SPARE, 16, %o4
-1:	addcc		%o4, GLOBAL_SPARE, GLOBAL_SPARE
-	srl		GLOBAL_SPARE, 16, %o4
-	addc		%g0, %o4, GLOBAL_SPARE
-	brz,pt		%o5, 4f
-	 srl		GLOBAL_SPARE, 8, %o4
-	and		GLOBAL_SPARE, 0xff, %g2
-	and		%o4, 0xff, %o4
-	sll		%g2, 8, %g2
-	or		%g2, %o4, GLOBAL_SPARE
-4:	addcc		%o3, GLOBAL_SPARE, %o3
-	addc		%g0, %o3, %o0
-	retl
-	 srl		%o0, 0, %o0
-	.size		FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/csum_copy_from_user.S b/arch/sparc/lib/csum_copy_from_user.S
deleted file mode 100644
index d20b9594f0c72202e4684a7fe866a8aaf0e01b78..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/csum_copy_from_user.S
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* csum_copy_from_user.S: Checksum+copy from userspace.
- *
- * Copyright (C) 2005 David S. Miller (davem@davemloft.net)
- */
-
-#define EX_LD(x)		\
-98:	x;			\
-	.section .fixup, "ax";	\
-	.align 4;		\
-99:	retl;			\
-	 mov	-1, %o0;	\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, 99b;		\
-	.text;			\
-	.align 4;
-
-#define FUNC_NAME		__csum_partial_copy_from_user
-#define LOAD(type,addr,dest)	type##a [addr] %asi, dest
-
-#include "csum_copy.S"
diff --git a/arch/sparc/lib/csum_copy_to_user.S b/arch/sparc/lib/csum_copy_to_user.S
deleted file mode 100644
index d71c0c81e8ab2cb6c204678cbc5fc56557530002..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/csum_copy_to_user.S
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* csum_copy_to_user.S: Checksum+copy to userspace.
- *
- * Copyright (C) 2005 David S. Miller (davem@davemloft.net)
- */
-
-#define EX_ST(x)		\
-98:	x;			\
-	.section .fixup,"ax";	\
-	.align 4;		\
-99:	retl;			\
-	 mov	-1, %o0;	\
-	.section __ex_table,"a";\
-	.align 4;		\
-	.word 98b, 99b;		\
-	.text;			\
-	.align 4;
-
-#define FUNC_NAME		__csum_partial_copy_to_user
-#define STORE(type,src,addr)	type##a src, [addr] %asi
-
-#include "csum_copy.S"
diff --git a/arch/sparc/lib/divdi3.S b/arch/sparc/lib/divdi3.S
deleted file mode 100644
index a7389409d9fafd3f84b0a5dc06e545c19f335efe..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/divdi3.S
+++ /dev/null
@@ -1,271 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
- */
-
-#include <asm/export.h>
-	.text
-	.align 4
-	.globl __divdi3
-__divdi3:
-	save %sp,-104,%sp
-	cmp %i0,0
-	bge .LL40
-	mov 0,%l4
-	mov -1,%l4
-	sub %g0,%i1,%o0
-	mov %o0,%o5
-	subcc %g0,%o0,%g0
-	sub %g0,%i0,%o0
-	subx %o0,0,%o4
-	mov %o4,%i0
-	mov %o5,%i1
-.LL40:
-	cmp %i2,0
-	bge .LL84
-	mov %i3,%o4
-	xnor %g0,%l4,%l4
-	sub %g0,%i3,%o0
-	mov %o0,%o3
-	subcc %g0,%o0,%g0
-	sub %g0,%i2,%o0
-	subx %o0,0,%o2
-	mov %o2,%i2
-	mov %o3,%i3
-	mov %i3,%o4
-.LL84:
-	cmp %i2,0
-	bne .LL45
-	mov %i1,%i3
-	cmp %o4,%i0
-	bleu .LL46
-	mov %i3,%o1
-	mov	32,%g1
-	subcc	%i0,%o4,%g0
-1:	bcs	5f
-	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
-	sub	%i0,%o4,%i0	! this kills msb of n
-	addx	%i0,%i0,%i0	! so this cannot give carry
-	subcc	%g1,1,%g1
-2:	bne	1b
-	 subcc	%i0,%o4,%g0
-	bcs	3f
-	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
-	b	3f
-	 sub	%i0,%o4,%i0	! this kills msb of n
-4:	sub	%i0,%o4,%i0
-5:	addxcc	%i0,%i0,%i0
-	bcc	2b
-	 subcc	%g1,1,%g1
-! Got carry from n.  Subtract next step to cancel this carry.
-	bne	4b
-	 addcc	%o1,%o1,%o1	! shift n1n0 and a 0-bit in lsb
-	sub	%i0,%o4,%i0
-3:	xnor	%o1,0,%o1
-	b .LL50
-	mov 0,%o2
-.LL46:
-	cmp %o4,0
-	bne .LL85
-	mov %i0,%o2
-	mov 1,%o0
-	mov 0,%o1
-	wr %g0, 0, %y
-	udiv %o0, %o1, %o0
-	mov %o0,%o4
-	mov %i0,%o2
-.LL85:
-	mov 0,%g3
-	mov	32,%g1
-	subcc	%g3,%o4,%g0
-1:	bcs	5f
-	 addxcc %o2,%o2,%o2	! shift n1n0 and a q-bit in lsb
-	sub	%g3,%o4,%g3	! this kills msb of n
-	addx	%g3,%g3,%g3	! so this cannot give carry
-	subcc	%g1,1,%g1
-2:	bne	1b
-	 subcc	%g3,%o4,%g0
-	bcs	3f
-	 addxcc %o2,%o2,%o2	! shift n1n0 and a q-bit in lsb
-	b	3f
-	 sub	%g3,%o4,%g3	! this kills msb of n
-4:	sub	%g3,%o4,%g3
-5:	addxcc	%g3,%g3,%g3
-	bcc	2b
-	 subcc	%g1,1,%g1
-! Got carry from n.  Subtract next step to cancel this carry.
-	bne	4b
-	 addcc	%o2,%o2,%o2	! shift n1n0 and a 0-bit in lsb
-	sub	%g3,%o4,%g3
-3:	xnor	%o2,0,%o2
-	mov %g3,%i0
-	mov %i3,%o1
-	mov	32,%g1
-	subcc	%i0,%o4,%g0
-1:	bcs	5f
-	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
-	sub	%i0,%o4,%i0	! this kills msb of n
-	addx	%i0,%i0,%i0	! so this cannot give carry
-	subcc	%g1,1,%g1
-2:	bne	1b
-	 subcc	%i0,%o4,%g0
-	bcs	3f
-	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
-	b	3f
-	 sub	%i0,%o4,%i0	! this kills msb of n
-4:	sub	%i0,%o4,%i0
-5:	addxcc	%i0,%i0,%i0
-	bcc	2b
-	 subcc	%g1,1,%g1
-! Got carry from n.  Subtract next step to cancel this carry.
-	bne	4b
-	 addcc	%o1,%o1,%o1	! shift n1n0 and a 0-bit in lsb
-	sub	%i0,%o4,%i0
-3:	xnor	%o1,0,%o1
-	b .LL86
-	mov %o1,%l1
-.LL45:
-	cmp %i2,%i0
-	bleu .LL51
-	sethi %hi(65535),%o0
-	b .LL78
-	mov 0,%o1
-.LL51:
-	or %o0,%lo(65535),%o0
-	cmp %i2,%o0
-	bgu .LL58
-	mov %i2,%o1
-	cmp %i2,256
-	addx %g0,-1,%o0
-	b .LL64
-	and %o0,8,%o2
-.LL58:
-	sethi %hi(16777215),%o0
-	or %o0,%lo(16777215),%o0
-	cmp %i2,%o0
-	bgu .LL64
-	mov 24,%o2
-	mov 16,%o2
-.LL64:
-	srl %o1,%o2,%o0
-	sethi %hi(__clz_tab),%o1
-	or %o1,%lo(__clz_tab),%o1
-	ldub [%o0+%o1],%o0
-	add %o0,%o2,%o0
-	mov 32,%o1
-	subcc %o1,%o0,%o3
-	bne,a .LL72
-	sub %o1,%o3,%o1
-	cmp %i0,%i2
-	bgu .LL74
-	cmp %i3,%o4
-	blu .LL78
-	mov 0,%o1
-.LL74:
-	b .LL78
-	mov 1,%o1
-.LL72:
-	sll %i2,%o3,%o2
-	srl %o4,%o1,%o0
-	or %o2,%o0,%i2
-	sll %o4,%o3,%o4
-	srl %i0,%o1,%o2
-	sll %i0,%o3,%o0
-	srl %i3,%o1,%o1
-	or %o0,%o1,%i0
-	sll %i3,%o3,%i3
-	mov %i0,%o1
-	mov	32,%g1
-	subcc	%o2,%i2,%g0
-1:	bcs	5f
-	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
-	sub	%o2,%i2,%o2	! this kills msb of n
-	addx	%o2,%o2,%o2	! so this cannot give carry
-	subcc	%g1,1,%g1
-2:	bne	1b
-	 subcc	%o2,%i2,%g0
-	bcs	3f
-	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
-	b	3f
-	 sub	%o2,%i2,%o2	! this kills msb of n
-4:	sub	%o2,%i2,%o2
-5:	addxcc	%o2,%o2,%o2
-	bcc	2b
-	 subcc	%g1,1,%g1
-! Got carry from n.  Subtract next step to cancel this carry.
-	bne	4b
-	 addcc	%o1,%o1,%o1	! shift n1n0 and a 0-bit in lsb
-	sub	%o2,%i2,%o2
-3:	xnor	%o1,0,%o1
-	mov %o2,%i0
-	wr	%g0,%o1,%y	! SPARC has 0-3 delay insn after a wr
-	sra	%o4,31,%g2	! Do not move this insn
-	and	%o1,%g2,%g2	! Do not move this insn
-	andcc	%g0,0,%g1	! Do not move this insn
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,%o4,%g1
-	mulscc	%g1,0,%g1
-	add	%g1,%g2,%o0
-	rd	%y,%o2
-	cmp %o0,%i0
-	bgu,a .LL78
-	add %o1,-1,%o1
-	bne,a .LL50
-	mov 0,%o2
-	cmp %o2,%i3
-	bleu .LL50
-	mov 0,%o2
-	add %o1,-1,%o1
-.LL78:
-	mov 0,%o2
-.LL50:
-	mov %o1,%l1
-.LL86:
-	mov %o2,%l0
-	mov %l0,%i0
-	mov %l1,%i1
-	cmp %l4,0
-	be .LL81
-	sub %g0,%i1,%o0
-	mov %o0,%l3
-	subcc %g0,%o0,%g0
-	sub %g0,%i0,%o0
-	subx %o0,0,%l2
-	mov %l2,%i0
-	mov %l3,%i1
-.LL81:
-	ret
-	restore
-EXPORT_SYMBOL(__divdi3)
diff --git a/arch/sparc/lib/ffs.S b/arch/sparc/lib/ffs.S
deleted file mode 100644
index 5a11d864fa052036ea3f04e16dcfb219844256d3..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/ffs.S
+++ /dev/null
@@ -1,88 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/export.h>
-
-	.register	%g2,#scratch
-
-	.text
-	.align	32
-
-ENTRY(ffs)
-	brnz,pt	%o0, 1f
-	 mov	1, %o1
-	retl
-	 clr	%o0
-	nop
-	nop
-ENTRY(__ffs)
-	sllx	%o0, 32, %g1		/* 1  */
-	srlx	%o0, 32, %g2
-
-	clr	%o1			/* 2  */
-	movrz	%g1, %g2, %o0
-
-	movrz	%g1, 32, %o1		/* 3  */
-1:	clr	%o2
-
-	sllx	%o0, (64 - 16), %g1	/* 4  */
-	srlx	%o0, 16, %g2
-
-	movrz	%g1, %g2, %o0		/* 5  */
-	clr	%o3
-
-	movrz	%g1, 16, %o2		/* 6  */
-	clr	%o4
-
-	and	%o0, 0xff, %g1		/* 7  */
-	srlx	%o0, 8, %g2
-
-	movrz	%g1, %g2, %o0		/* 8  */
-	clr	%o5
-
-	movrz	%g1, 8, %o3		/* 9  */
-	add	%o2, %o1, %o2
-
-	and	%o0, 0xf, %g1		/* 10 */
-	srlx	%o0, 4, %g2
-
-	movrz	%g1, %g2, %o0		/* 11 */
-	add	%o2, %o3, %o2
-
-	movrz	%g1, 4, %o4		/* 12 */
-
-	and	%o0, 0x3, %g1		/* 13 */
-	srlx	%o0, 2, %g2
-
-	movrz	%g1, %g2, %o0		/* 14 */
-	add	%o2, %o4, %o2
-
-	movrz	%g1, 2, %o5		/* 15 */
-
-	and	%o0, 0x1, %g1		/* 16 */
-
-	add	%o2, %o5, %o2		/* 17 */
-	xor	%g1, 0x1, %g1
-
-	retl				/* 18 */
-	 add	%o2, %g1, %o0
-ENDPROC(ffs)
-ENDPROC(__ffs)
-EXPORT_SYMBOL(__ffs)
-EXPORT_SYMBOL(ffs)
-
-	.section	.popc_6insn_patch, "ax"
-	.word		ffs
-	brz,pn	%o0, 98f
-	 neg	%o0, %g1
-	xnor	%o0, %g1, %o1
-	popc	%o1, %o0
-98:	retl
-	 nop
-	.word		__ffs
-	neg	%o0, %g1
-	xnor	%o0, %g1, %o1
-	popc	%o1, %o0
-	retl
-	 sub	%o0, 1, %o0
-	nop
-	.previous
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
deleted file mode 100644
index 06b8d300bcae276807d9b9d164325e0beaf6d86d..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/fls.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/* fls.S: SPARC default fls definition.
- *
- * SPARC default fls definition, which follows the same algorithm as
- * in generic fls(). This function will be boot time patched on T4
- * and onward.
- */
-
-#include <linux/linkage.h>
-#include <asm/export.h>
-
-	.text
-	.register	%g2, #scratch
-	.register	%g3, #scratch
-ENTRY(fls)
-	brz,pn	%o0, 6f
-	 mov	0, %o1
-	sethi	%hi(0xffff0000), %g3
-	mov	%o0, %g2
-	andcc	%o0, %g3, %g0
-	be,pt	%icc, 8f
-	 mov	32, %o1
-	sethi	%hi(0xff000000), %g3
-	andcc	%g2, %g3, %g0
-	bne,pt	%icc, 3f
-	 sethi	%hi(0xf0000000), %g3
-	sll	%o0, 8, %o0
-1:
-	add	%o1, -8, %o1
-	sra	%o0, 0, %o0
-	mov	%o0, %g2
-2:
-	sethi	%hi(0xf0000000), %g3
-3:
-	andcc	%g2, %g3, %g0
-	bne,pt	%icc, 4f
-	 sethi	%hi(0xc0000000), %g3
-	sll	%o0, 4, %o0
-	add	%o1, -4, %o1
-	sra	%o0, 0, %o0
-	mov	%o0, %g2
-4:
-	andcc	%g2, %g3, %g0
-	be,a,pt	%icc, 7f
-	 sll	%o0, 2, %o0
-5:
-	xnor	%g0, %o0, %o0
-	srl	%o0, 31, %o0
-	sub	%o1, %o0, %o1
-6:
-	jmp	%o7 + 8
-	 sra	%o1, 0, %o0
-7:
-	add	%o1, -2, %o1
-	ba,pt	%xcc, 5b
-	 sra	%o0, 0, %o0
-8:
-	sll	%o0, 16, %o0
-	sethi	%hi(0xff000000), %g3
-	sra	%o0, 0, %o0
-	mov	%o0, %g2
-	andcc	%g2, %g3, %g0
-	bne,pt	%icc, 2b
-	 mov	16, %o1
-	ba,pt	%xcc, 1b
-	 sll	%o0, 8, %o0
-ENDPROC(fls)
-EXPORT_SYMBOL(fls)
diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S
deleted file mode 100644
index c83e22ae9586400bdd776ddff6fc7cd673b71c37..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/fls64.S
+++ /dev/null
@@ -1,61 +0,0 @@
-/* fls64.S: SPARC default __fls definition.
- *
- * SPARC default __fls definition, which follows the same algorithm as
- * in generic __fls(). This function will be boot time patched on T4
- * and onward.
- */
-
-#include <linux/linkage.h>
-#include <asm/export.h>
-
-	.text
-	.register	%g2, #scratch
-	.register	%g3, #scratch
-ENTRY(__fls)
-	mov	-1, %g2
-	sllx	%g2, 32, %g2
-	and	%o0, %g2, %g2
-	brnz,pt	%g2, 1f
-	 mov	63, %g1
-	sllx	%o0, 32, %o0
-	mov	31, %g1
-1:
-	mov	-1, %g2
-	sllx	%g2, 48, %g2
-	and	%o0, %g2, %g2
-	brnz,pt	%g2, 2f
-	 mov	-1, %g2
-	sllx	%o0, 16, %o0
-	add	%g1, -16, %g1
-2:
-	mov	-1, %g2
-	sllx	%g2, 56, %g2
-	and	%o0, %g2, %g2
-	brnz,pt	%g2, 3f
-	 mov	-1, %g2
-	sllx	%o0, 8, %o0
-	add	%g1, -8, %g1
-3:
-	sllx	%g2, 60, %g2
-	and	%o0, %g2, %g2
-	brnz,pt	%g2, 4f
-	 mov	-1, %g2
-	sllx	%o0, 4, %o0
-	add	%g1, -4, %g1
-4:
-	sllx	%g2, 62, %g2
-	and	%o0, %g2, %g2
-	brnz,pt	%g2, 5f
-	 mov	-1, %g3
-	sllx	%o0, 2, %o0
-	add	%g1, -2, %g1
-5:
-	mov	0, %g2
-	sllx	%g3, 63, %g3
-	and	%o0, %g3, %o0
-	movre	%o0, 1, %g2
-	sub	%g1, %g2, %g1
-	jmp	%o7+8
-	 sra	%g1, 0, %o0
-ENDPROC(__fls)
-EXPORT_SYMBOL(__fls)
diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S
deleted file mode 100644
index 0ddbbb03182232fe199f5222248617c72a2f23b7..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/hweight.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/export.h>
-
-	.text
-	.align	32
-ENTRY(__arch_hweight8)
-	sethi	%hi(__sw_hweight8), %g1
-	jmpl	%g1 + %lo(__sw_hweight8), %g0
-	 nop
-ENDPROC(__arch_hweight8)
-EXPORT_SYMBOL(__arch_hweight8)
-	.section	.popc_3insn_patch, "ax"
-	.word		__arch_hweight8
-	sllx		%o0, 64-8, %g1
-	retl
-	 popc		%g1, %o0
-	.previous
-
-ENTRY(__arch_hweight16)
-	sethi	%hi(__sw_hweight16), %g1
-	jmpl	%g1 + %lo(__sw_hweight16), %g0
-	 nop
-ENDPROC(__arch_hweight16)
-EXPORT_SYMBOL(__arch_hweight16)
-	.section	.popc_3insn_patch, "ax"
-	.word		__arch_hweight16
-	sllx		%o0, 64-16, %g1
-	retl
-	 popc		%g1, %o0
-	.previous
-
-ENTRY(__arch_hweight32)
-	sethi	%hi(__sw_hweight32), %g1
-	jmpl	%g1 + %lo(__sw_hweight32), %g0
-	 nop
-ENDPROC(__arch_hweight32)
-EXPORT_SYMBOL(__arch_hweight32)
-	.section	.popc_3insn_patch, "ax"
-	.word		__arch_hweight32
-	sllx		%o0, 64-32, %g1
-	retl
-	 popc		%g1, %o0
-	.previous
-
-ENTRY(__arch_hweight64)
-	sethi	%hi(__sw_hweight64), %g1
-	jmpl	%g1 + %lo(__sw_hweight64), %g0
-	 nop
-ENDPROC(__arch_hweight64)
-EXPORT_SYMBOL(__arch_hweight64)
-	.section	.popc_3insn_patch, "ax"
-	.word		__arch_hweight64
-	retl
-	 popc		%o0, %o0
-	nop
-	.previous
diff --git a/arch/sparc/lib/ipcsum.S b/arch/sparc/lib/ipcsum.S
deleted file mode 100644
index 531d89c9d5d9a38129e2dab3d43e1127e572f6a3..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/ipcsum.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/export.h>
-
-	.text
-ENTRY(ip_fast_csum) /* %o0 = iph, %o1 = ihl */
-	sub	%o1, 4, %g7
-	lduw	[%o0 + 0x00], %o2
-	lduw	[%o0 + 0x04], %g2
-	lduw	[%o0 + 0x08], %g3
-	addcc	%g2, %o2, %o2
-	lduw	[%o0 + 0x0c], %g2
-	addccc	%g3, %o2, %o2
-	lduw	[%o0 + 0x10], %g3
-
-	addccc	%g2, %o2, %o2
-	addc	%o2, %g0, %o2
-1:	addcc	%g3, %o2, %o2
-	add	%o0, 4, %o0
-	addccc	%o2, %g0, %o2
-	subcc	%g7, 1, %g7
-	be,a,pt	%icc, 2f
-	 sll	%o2, 16, %g2
-
-	lduw	[%o0 + 0x10], %g3
-	ba,pt	%xcc, 1b
-	 nop
-2:	addcc	%o2, %g2, %g2
-	srl	%g2, 16, %o2
-	addc	%o2, %g0, %o2
-	xnor	%g0, %o2, %o2
-	set	0xffff, %o1
-	retl
-	 and	%o2, %o1, %o0
-ENDPROC(ip_fast_csum)
-EXPORT_SYMBOL(ip_fast_csum)
diff --git a/arch/sparc/lib/locks.S b/arch/sparc/lib/locks.S
deleted file mode 100644
index 9a1289a3fb28a7c81e055ed0171a19151398eb1b..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/locks.S
+++ /dev/null
@@ -1,98 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * locks.S: SMP low-level lock primitives on Sparc.
- *
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
- * Copyright (C) 1998 Anton Blanchard (anton@progsoc.uts.edu.au)
- * Copyright (C) 1998 Jakub Jelinek   (jj@ultra.linux.cz)
- */
-
-#include <asm/ptrace.h>
-#include <asm/psr.h>
-#include <asm/smp.h>
-#include <asm/spinlock.h>
-#include <asm/export.h>
-
-	.text
-	.align	4
-
-	/* Read/writer locks, as usual this is overly clever to make it
-	 * as fast as possible.
-	 */
-
-	/* caches... */
-___rw_read_enter_spin_on_wlock:
-	orcc	%g2, 0x0, %g0
-	be,a	___rw_read_enter
-	 ldstub	[%g1 + 3], %g2
-	b	___rw_read_enter_spin_on_wlock
-	 ldub	[%g1 + 3], %g2
-___rw_read_try_spin_on_wlock:
-	andcc	%g2, 0xff, %g0
-	be,a	___rw_read_try
-	 ldstub	[%g1 + 3], %g2
-	xnorcc	%g2, 0x0, %o0	/* if g2 is ~0, set o0 to 0 and bugger off */
-	bne,a	___rw_read_enter_spin_on_wlock
-	 ld	[%g1], %g2
-	retl
-	 mov	%g4, %o7
-___rw_read_exit_spin_on_wlock:
-	orcc	%g2, 0x0, %g0
-	be,a	___rw_read_exit
-	 ldstub	[%g1 + 3], %g2
-	b	___rw_read_exit_spin_on_wlock
-	 ldub	[%g1 + 3], %g2
-___rw_write_enter_spin_on_wlock:
-	orcc	%g2, 0x0, %g0
-	be,a	___rw_write_enter
-	 ldstub	[%g1 + 3], %g2
-	b	___rw_write_enter_spin_on_wlock
-	 ld	[%g1], %g2
-
-	.globl	___rw_read_enter
-EXPORT_SYMBOL(___rw_read_enter)
-___rw_read_enter:
-	orcc	%g2, 0x0, %g0
-	bne,a	___rw_read_enter_spin_on_wlock
-	 ldub	[%g1 + 3], %g2
-	ld	[%g1], %g2
-	add	%g2, 1, %g2
-	st	%g2, [%g1]
-	retl
-	 mov	%g4, %o7
-
-	.globl	___rw_read_exit
-EXPORT_SYMBOL(___rw_read_exit)
-___rw_read_exit:
-	orcc	%g2, 0x0, %g0
-	bne,a	___rw_read_exit_spin_on_wlock
-	 ldub	[%g1 + 3], %g2
-	ld	[%g1], %g2
-	sub	%g2, 0x1ff, %g2
-	st	%g2, [%g1]
-	retl
-	 mov	%g4, %o7
-
-	.globl	___rw_read_try
-EXPORT_SYMBOL(___rw_read_try)
-___rw_read_try:
-	orcc	%g2, 0x0, %g0
-	bne	___rw_read_try_spin_on_wlock
-	 ld	[%g1], %g2
-	add	%g2, 1, %g2
-	st	%g2, [%g1]
-	set	1, %o1
-	retl
-	 mov	%g4, %o7
-
-	.globl	___rw_write_enter
-EXPORT_SYMBOL(___rw_write_enter)
-___rw_write_enter:
-	orcc	%g2, 0x0, %g0
-	bne	___rw_write_enter_spin_on_wlock
-	 ld	[%g1], %g2
-	andncc	%g2, 0xff, %g0
-	bne,a	___rw_write_enter_spin_on_wlock
-	 stb	%g0, [%g1 + 3]
-	retl
-	 mov	%g4, %o7
diff --git a/arch/sparc/lib/lshrdi3.S b/arch/sparc/lib/lshrdi3.S
deleted file mode 100644
index 509ca6682da8d113935c6f6fdc9402aa51e5ca3f..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/lshrdi3.S
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/export.h>
-
-ENTRY(__lshrdi3)
-	cmp	%o2, 0
-	be	3f
-	 mov	0x20, %g2
-
-	sub	%g2, %o2, %g2
-	cmp	%g2, 0
-	bg	1f
-	 srl	%o0, %o2, %o4
-
-	clr	%o4
-	neg	%g2
-	b	2f
-	 srl	%o0, %g2, %o5
-1:
-	sll  %o0, %g2, %g3
-	srl  %o1, %o2, %g2
-	or  %g2, %g3, %o5
-2:
-	mov  %o4, %o0
-	mov  %o5, %o1
-3:
-	retl 
-	 nop 
-ENDPROC(__lshrdi3)
-EXPORT_SYMBOL(__lshrdi3)
diff --git a/arch/sparc/lib/mcount.S b/arch/sparc/lib/mcount.S
deleted file mode 100644
index deba6fa0bc78bfd48ffdb7b96219d1072109ca06..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/mcount.S
+++ /dev/null
@@ -1,126 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2000 Anton Blanchard (anton@linuxcare.com)
- *
- * This file implements mcount(), which is used to collect profiling data.
- * This can also be tweaked for kernel stack overflow detection.
- */
-
-#include <linux/linkage.h>
-#include <asm/export.h>
-
-/*
- * This is the main variant and is called by C code.  GCC's -pg option
- * automatically instruments every C function with a call to this.
- */
-
-	.text
-	.align		32
-	.globl		_mcount
-	.type		_mcount,#function
-	EXPORT_SYMBOL(_mcount)
-	.globl		mcount
-	.type		mcount,#function
-_mcount:
-mcount:
-#ifdef CONFIG_FUNCTION_TRACER
-#ifdef CONFIG_DYNAMIC_FTRACE
-	/* Do nothing, the retl/nop below is all we need.  */
-#else
-	sethi		%hi(ftrace_trace_function), %g1
-	sethi		%hi(ftrace_stub), %g2
-	ldx		[%g1 + %lo(ftrace_trace_function)], %g1
-	or		%g2, %lo(ftrace_stub), %g2
-	cmp		%g1, %g2
-	be,pn		%icc, 1f
-	 mov		%i7, %g3
-	save		%sp, -176, %sp
-	mov		%g3, %o1
-	jmpl		%g1, %o7
-	 mov		%i7, %o0
-	ret
-	 restore
-	/* not reached */
-1:
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	sethi		%hi(ftrace_graph_return), %g1
-	ldx		[%g1 + %lo(ftrace_graph_return)], %g3
-	cmp		%g2, %g3
-	bne,pn		%xcc, 5f
-	 sethi		%hi(ftrace_graph_entry_stub), %g2
-	sethi		%hi(ftrace_graph_entry), %g1
-	or		%g2, %lo(ftrace_graph_entry_stub), %g2
-	ldx		[%g1 + %lo(ftrace_graph_entry)], %g1
-	cmp		%g1, %g2
-	be,pt		%xcc, 2f
-	 nop
-5:	mov		%i7, %g2
-	mov		%fp, %g3
-	save		%sp, -176, %sp
-	mov		%g2, %l0
-	ba,pt		%xcc, ftrace_graph_caller
-	 mov		%g3, %l1
-#endif
-2:
-#endif
-#endif
-	retl
-	 nop
-	.size		_mcount,.-_mcount
-	.size		mcount,.-mcount
-
-#ifdef CONFIG_FUNCTION_TRACER
-	.globl		ftrace_stub
-	.type		ftrace_stub,#function
-ftrace_stub:
-	retl
-	 nop
-	.size		ftrace_stub,.-ftrace_stub
-#ifdef CONFIG_DYNAMIC_FTRACE
-	.globl		ftrace_caller
-	.type		ftrace_caller,#function
-ftrace_caller:
-	mov		%i7, %g2
-	mov		%fp, %g3
-	save		%sp, -176, %sp
-	mov		%g2, %o1
-	mov		%g2, %l0
-	mov		%g3, %l1
-	.globl		ftrace_call
-ftrace_call:
-	call		ftrace_stub
-	 mov		%i7, %o0
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	.globl		ftrace_graph_call
-ftrace_graph_call:
-	call		ftrace_stub
-	 nop
-#endif
-	ret
-	 restore
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	.size		ftrace_graph_call,.-ftrace_graph_call
-#endif
-	.size		ftrace_call,.-ftrace_call
-	.size		ftrace_caller,.-ftrace_caller
-#endif
-#endif
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
-	mov		%l0, %o0
-	mov		%i7, %o1
-	call		prepare_ftrace_return
-	 mov		%l1, %o2
-	ret
-	 restore	%o0, -8, %i7
-END(ftrace_graph_caller)
-
-ENTRY(return_to_handler)
-	save		%sp, -176, %sp
-	call		ftrace_return_to_handler
-	 mov		%fp, %o0
-	jmpl		%o0 + 8, %g0
-	 restore
-END(return_to_handler)
-#endif
diff --git a/arch/sparc/lib/memcmp.S b/arch/sparc/lib/memcmp.S
deleted file mode 100644
index a18076ef5af1479c5eba57643c7220f72ef7e94b..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/memcmp.S
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Sparc optimized memcmp code.
- *
- * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- * Copyright (C) 2000, 2008 David S. Miller (davem@davemloft.net)
- */
-
-#include <linux/linkage.h>
-#include <asm/asm.h>
-#include <asm/export.h>
-
-	.text
-ENTRY(memcmp)
-	cmp	%o2, 0
-1:	BRANCH32(be, pn, 2f)
-	 nop
-	ldub	[%o0], %g7
-	ldub	[%o1], %g3
-	sub	%o2, 1, %o2
-	add	%o0, 1, %o0
-	add	%o1, 1, %o1
-	subcc	%g7, %g3, %g3
-	BRANCH32(be, pt, 1b)
-	 cmp	%o2, 0
-	retl
-	 mov	%g3, %o0
-2:	retl
-	 mov	0, %o0
-ENDPROC(memcmp)
-EXPORT_SYMBOL(memcmp)
diff --git a/arch/sparc/lib/memcpy.S b/arch/sparc/lib/memcpy.S
deleted file mode 100644
index ee823d8c92157d0842e586839f1a61a9802a008b..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/memcpy.S
+++ /dev/null
@@ -1,462 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* memcpy.S: Sparc optimized memcpy and memmove code
- * Hand optimized from GNU libc's memcpy and memmove
- * Copyright (C) 1991,1996 Free Software Foundation
- * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi)
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
- * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
- * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- */
-
-#include <asm/export.h>
-#define FUNC(x) 		\
-	.globl	x;		\
-	.type	x,@function;	\
-	.align	4;		\
-x:
-
-/* Both these macros have to start with exactly the same insn */
-#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
-	ldd	[%src + (offset) + 0x00], %t0; \
-	ldd	[%src + (offset) + 0x08], %t2; \
-	ldd	[%src + (offset) + 0x10], %t4; \
-	ldd	[%src + (offset) + 0x18], %t6; \
-	st	%t0, [%dst + (offset) + 0x00]; \
-	st	%t1, [%dst + (offset) + 0x04]; \
-	st	%t2, [%dst + (offset) + 0x08]; \
-	st	%t3, [%dst + (offset) + 0x0c]; \
-	st	%t4, [%dst + (offset) + 0x10]; \
-	st	%t5, [%dst + (offset) + 0x14]; \
-	st	%t6, [%dst + (offset) + 0x18]; \
-	st	%t7, [%dst + (offset) + 0x1c];
-
-#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
-	ldd	[%src + (offset) + 0x00], %t0; \
-	ldd	[%src + (offset) + 0x08], %t2; \
-	ldd	[%src + (offset) + 0x10], %t4; \
-	ldd	[%src + (offset) + 0x18], %t6; \
-	std	%t0, [%dst + (offset) + 0x00]; \
-	std	%t2, [%dst + (offset) + 0x08]; \
-	std	%t4, [%dst + (offset) + 0x10]; \
-	std	%t6, [%dst + (offset) + 0x18];
-
-#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
-	ldd	[%src - (offset) - 0x10], %t0; \
-	ldd	[%src - (offset) - 0x08], %t2; \
-	st	%t0, [%dst - (offset) - 0x10]; \
-	st	%t1, [%dst - (offset) - 0x0c]; \
-	st	%t2, [%dst - (offset) - 0x08]; \
-	st	%t3, [%dst - (offset) - 0x04];
-
-#define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
-	ldd	[%src - (offset) - 0x10], %t0; \
-	ldd	[%src - (offset) - 0x08], %t2; \
-	std	%t0, [%dst - (offset) - 0x10]; \
-	std	%t2, [%dst - (offset) - 0x08];
-
-#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
-	ldub	[%src - (offset) - 0x02], %t0; \
-	ldub	[%src - (offset) - 0x01], %t1; \
-	stb	%t0, [%dst - (offset) - 0x02]; \
-	stb	%t1, [%dst - (offset) - 0x01];
-
-	.text
-	.align	4
-
-FUNC(memmove)
-EXPORT_SYMBOL(memmove)
-	cmp		%o0, %o1
-	mov		%o0, %g7
-	bleu		9f
-	 sub		%o0, %o1, %o4
-
-	add		%o1, %o2, %o3
-	cmp		%o3, %o0
-	bleu		0f
-	 andcc		%o4, 3, %o5
-
-	add		%o1, %o2, %o1
-	add		%o0, %o2, %o0
-	sub		%o1, 1, %o1
-	sub		%o0, 1, %o0
-	
-1:	/* reverse_bytes */
-
-	ldub		[%o1], %o4
-	subcc		%o2, 1, %o2
-	stb		%o4, [%o0]
-	sub		%o1, 1, %o1
-	bne		1b
-	 sub		%o0, 1, %o0
-
-	retl
-	 mov		%g7, %o0
-
-/* NOTE: This code is executed just for the cases,
-         where %src (=%o1) & 3 is != 0.
-	 We need to align it to 4. So, for (%src & 3)
-	 1 we need to do ldub,lduh
-	 2 lduh
-	 3 just ldub
-         so even if it looks weird, the branches
-         are correct here. -jj
- */
-78:	/* dword_align */
-
-	andcc		%o1, 1, %g0
-	be		4f
-	 andcc		%o1, 2, %g0
-
-	ldub		[%o1], %g2
-	add		%o1, 1, %o1
-	stb		%g2, [%o0]
-	sub		%o2, 1, %o2
-	bne		3f
-	 add		%o0, 1, %o0
-4:
-	lduh		[%o1], %g2
-	add		%o1, 2, %o1
-	sth		%g2, [%o0]
-	sub		%o2, 2, %o2
-	b		3f
-	 add		%o0, 2, %o0
-
-FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
-EXPORT_SYMBOL(memcpy)
-
-	sub		%o0, %o1, %o4
-	mov		%o0, %g7
-9:
-	andcc		%o4, 3, %o5
-0:
-	bne		86f
-	 cmp		%o2, 15
-
-	bleu		90f
-	 andcc		%o1, 3, %g0
-
-	bne		78b
-3:
-	 andcc		%o1, 4, %g0
-
-	be		2f
-	 mov		%o2, %g1
-
-	ld		[%o1], %o4
-	sub		%g1, 4, %g1
-	st		%o4, [%o0]
-	add		%o1, 4, %o1
-	add		%o0, 4, %o0
-2:
-	andcc		%g1, 0xffffff80, %g0
-	be		3f
-	 andcc		%o0, 4, %g0
-
-	be		82f + 4
-5:
-	MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
-	MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
-	MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
-	MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-	sub		%g1, 128, %g1
-	add		%o1, 128, %o1
-	cmp		%g1, 128
-	bge		5b
-	 add		%o0, 128, %o0
-3:
-	andcc		%g1, 0x70, %g4
-	be		80f
-	 andcc		%g1, 8, %g0
-
-	sethi		%hi(80f), %o5
-	srl		%g4, 1, %o4
-	add		%g4, %o4, %o4
-	add		%o1, %g4, %o1
-	sub		%o5, %o4, %o5
-	jmpl		%o5 + %lo(80f), %g0
-	 add		%o0, %g4, %o0
-
-79:	/* memcpy_table */
-
-	MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
-	MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
-	MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
-	MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
-	MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
-	MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
-	MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
-
-80:	/* memcpy_table_end */
-	be		81f
-	 andcc		%g1, 4, %g0
-
-	ldd		[%o1], %g2
-	add		%o0, 8, %o0
-	st		%g2, [%o0 - 0x08]
-	add		%o1, 8, %o1
-	st		%g3, [%o0 - 0x04]
-
-81:	/* memcpy_last7 */
-
-	be		1f
-	 andcc		%g1, 2, %g0
-
-	ld		[%o1], %g2
-	add		%o1, 4, %o1
-	st		%g2, [%o0]
-	add		%o0, 4, %o0
-1:
-	be		1f
-	 andcc		%g1, 1, %g0
-
-	lduh		[%o1], %g2
-	add		%o1, 2, %o1
-	sth		%g2, [%o0]
-	add		%o0, 2, %o0
-1:
-	be		1f
-	 nop
-
-	ldub		[%o1], %g2
-	stb		%g2, [%o0]
-1:
-	retl
-	 mov		%g7, %o0
-
-82:	/* ldd_std */
-	MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
-	MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
-	MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
-	MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-	subcc		%g1, 128, %g1
-	add		%o1, 128, %o1
-	cmp		%g1, 128
-	bge		82b
-	 add		%o0, 128, %o0
-
-	andcc		%g1, 0x70, %g4
-	be		84f
-	 andcc		%g1, 8, %g0
-
-	sethi		%hi(84f), %o5
-	add		%o1, %g4, %o1
-	sub		%o5, %g4, %o5
-	jmpl		%o5 + %lo(84f), %g0
-	 add		%o0, %g4, %o0
-
-83:	/* amemcpy_table */
-
-	MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
-	MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
-	MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
-	MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
-	MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
-	MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
-	MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
-
-84:	/* amemcpy_table_end */
-	be		85f
-	 andcc		%g1, 4, %g0
-
-	ldd		[%o1], %g2
-	add		%o0, 8, %o0
-	std		%g2, [%o0 - 0x08]
-	add		%o1, 8, %o1
-85:	/* amemcpy_last7 */
-	be		1f
-	 andcc		%g1, 2, %g0
-
-	ld		[%o1], %g2
-	add		%o1, 4, %o1
-	st		%g2, [%o0]
-	add		%o0, 4, %o0
-1:
-	be		1f
-	 andcc		%g1, 1, %g0
-
-	lduh		[%o1], %g2
-	add		%o1, 2, %o1
-	sth		%g2, [%o0]
-	add		%o0, 2, %o0
-1:
-	be		1f
-	 nop
-
-	ldub		[%o1], %g2
-	stb		%g2, [%o0]
-1:
-	retl
-	 mov		%g7, %o0
-
-86:	/* non_aligned */
-	cmp		%o2, 6
-	bleu		88f
-	 nop
-
-	save		%sp, -96, %sp
-	andcc		%i0, 3, %g0
-	be		61f
-	 andcc		%i0, 1, %g0
-	be		60f
-	 andcc		%i0, 2, %g0
-
-	ldub		[%i1], %g5
-	add		%i1, 1, %i1
-	stb		%g5, [%i0]
-	sub		%i2, 1, %i2
-	bne		61f
-	 add		%i0, 1, %i0
-60:
-	ldub		[%i1], %g3
-	add		%i1, 2, %i1
-	stb		%g3, [%i0]
-	sub		%i2, 2, %i2
-	ldub		[%i1 - 1], %g3
-	add		%i0, 2, %i0
-	stb		%g3, [%i0 - 1]
-61:
-	and		%i1, 3, %g2
-	and		%i2, 0xc, %g3
-	and		%i1, -4, %i1
-	cmp		%g3, 4
-	sll		%g2, 3, %g4
-	mov		32, %g2
-	be		4f
-	 sub		%g2, %g4, %l0
-	
-	blu		3f
-	 cmp		%g3, 0x8
-
-	be		2f
-	 srl		%i2, 2, %g3
-
-	ld		[%i1], %i3
-	add		%i0, -8, %i0
-	ld		[%i1 + 4], %i4
-	b		8f
-	 add		%g3, 1, %g3
-2:
-	ld		[%i1], %i4
-	add		%i0, -12, %i0
-	ld		[%i1 + 4], %i5
-	add		%g3, 2, %g3
-	b		9f
-	 add		%i1, -4, %i1
-3:
-	ld		[%i1], %g1
-	add		%i0, -4, %i0
-	ld		[%i1 + 4], %i3
-	srl		%i2, 2, %g3
-	b		7f
-	 add		%i1, 4, %i1
-4:
-	ld		[%i1], %i5
-	cmp		%i2, 7
-	ld		[%i1 + 4], %g1
-	srl		%i2, 2, %g3
-	bleu		10f
-	 add		%i1, 8, %i1
-
-	ld		[%i1], %i3
-	add		%g3, -1, %g3
-5:
-	sll		%i5, %g4, %g2
-	srl		%g1, %l0, %g5
-	or		%g2, %g5, %g2
-	st		%g2, [%i0]
-7:
-	ld		[%i1 + 4], %i4
-	sll		%g1, %g4, %g2
-	srl		%i3, %l0, %g5
-	or		%g2, %g5, %g2
-	st		%g2, [%i0 + 4]
-8:
-	ld		[%i1 + 8], %i5
-	sll		%i3, %g4, %g2
-	srl		%i4, %l0, %g5
-	or		%g2, %g5, %g2
-	st		%g2, [%i0 + 8]
-9:
-	ld		[%i1 + 12], %g1
-	sll		%i4, %g4, %g2
-	srl		%i5, %l0, %g5
-	addcc		%g3, -4, %g3
-	or		%g2, %g5, %g2
-	add		%i1, 16, %i1
-	st		%g2, [%i0 + 12]
-	add		%i0, 16, %i0
-	bne,a		5b
-	 ld		[%i1], %i3
-10:
-	sll		%i5, %g4, %g2
-	srl		%g1, %l0, %g5
-	srl		%l0, 3, %g3
-	or		%g2, %g5, %g2
-	sub		%i1, %g3, %i1
-	andcc		%i2, 2, %g0
-	st		%g2, [%i0]
-	be		1f
-	 andcc		%i2, 1, %g0
-
-	ldub		[%i1], %g2
-	add		%i1, 2, %i1
-	stb		%g2, [%i0 + 4]
-	add		%i0, 2, %i0
-	ldub		[%i1 - 1], %g2
-	stb		%g2, [%i0 + 3]
-1:
-	be		1f
-	 nop
-	ldub		[%i1], %g2
-	stb		%g2, [%i0 + 4]
-1:
-	ret
-	 restore	%g7, %g0, %o0
-
-88:	/* short_end */
-
-	and		%o2, 0xe, %o3
-20:
-	sethi		%hi(89f), %o5
-	sll		%o3, 3, %o4
-	add		%o0, %o3, %o0
-	sub		%o5, %o4, %o5
-	add		%o1, %o3, %o1
-	jmpl		%o5 + %lo(89f), %g0
-	 andcc		%o2, 1, %g0
-
-	MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
-
-89:	/* short_table_end */
-
-	be		1f
-	 nop
-
-	ldub		[%o1], %g2
-	stb		%g2, [%o0]
-1:
-	retl
-	 mov		%g7, %o0
-
-90:	/* short_aligned_end */
-	bne		88b
-	 andcc		%o2, 8, %g0
-
-	be		1f
-	 andcc		%o2, 4, %g0
-
-	ld		[%o1 + 0x00], %g2
-	ld		[%o1 + 0x04], %g3
-	add		%o1, 8, %o1
-	st		%g2, [%o0 + 0x00]
-	st		%g3, [%o0 + 0x04]
-	add		%o0, 8, %o0
-1:
-	b		81b
-	 mov		%o2, %g1
diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S
deleted file mode 100644
index 3132b6316144f3e40b281bbe7a9748363ef64d4f..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/memmove.S
+++ /dev/null
@@ -1,62 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* memmove.S: Simple memmove implementation.
- *
- * Copyright (C) 1997, 2004 David S. Miller (davem@redhat.com)
- * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
- */
-
-#include <linux/linkage.h>
-#include <asm/export.h>
-
-	.text
-ENTRY(memmove) /* o0=dst o1=src o2=len */
-	brz,pn		%o2, 99f
-	 mov		%o0, %g1
-
-	cmp		%o0, %o1
-	bleu,pt		%xcc, 2f
-	 add		%o1, %o2, %g7
-	cmp		%g7, %o0
-	bleu,pt		%xcc, memcpy
-	 add		%o0, %o2, %o5
-	sub		%g7, 1, %o1
-
-	sub		%o5, 1, %o0
-1:	ldub		[%o1], %g7
-	subcc		%o2, 1, %o2
-	sub		%o1, 1, %o1
-	stb		%g7, [%o0]
-	bne,pt		%icc, 1b
-	 sub		%o0, 1, %o0
-99:
-	retl
-	 mov		%g1, %o0
-
-	/* We can't just call memcpy for these memmove cases.  On some
-	 * chips the memcpy uses cache initializing stores and when dst
-	 * and src are close enough, those can clobber the source data
-	 * before we've loaded it in.
-	 */
-2:	or		%o0, %o1, %g7
-	or		%o2, %g7, %g7
-	andcc		%g7, 0x7, %g0
-	bne,pn		%xcc, 4f
-	 nop
-
-3:	ldx		[%o1], %g7
-	add		%o1, 8, %o1
-	subcc		%o2, 8, %o2
-	add		%o0, 8, %o0
-	bne,pt		%icc, 3b
-	 stx		%g7, [%o0 - 0x8]
-	ba,a,pt		%xcc, 99b
-
-4:	ldub		[%o1], %g7
-	add		%o1, 1, %o1
-	subcc		%o2, 1, %o2
-	add		%o0, 1, %o0
-	bne,pt		%icc, 4b
-	 stb		%g7, [%o0 - 0x1]
-	ba,a,pt		%xcc, 99b
-ENDPROC(memmove)
-EXPORT_SYMBOL(memmove)
diff --git a/arch/sparc/lib/memscan_32.S b/arch/sparc/lib/memscan_32.S
deleted file mode 100644
index c4c2d5b3a2e95a93418811d3bcb148a8df5b2ac2..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/memscan_32.S
+++ /dev/null
@@ -1,138 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * memscan.S: Optimized memscan for the Sparc.
- *
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
- */
-
-#include <asm/export.h>
-
-/* In essence, this is just a fancy strlen. */
-
-#define LO_MAGIC 0x01010101
-#define HI_MAGIC 0x80808080
-
-	.text
-	.align	4
-	.globl	__memscan_zero, __memscan_generic
-	.globl	memscan
-EXPORT_SYMBOL(__memscan_zero)
-EXPORT_SYMBOL(__memscan_generic)
-__memscan_zero:
-	/* %o0 = addr, %o1 = size */
-	cmp	%o1, 0
-	bne,a	1f
-	 andcc	%o0, 3, %g0
-
-	retl
-	 nop
-
-1:
-	be	mzero_scan_word
-	 sethi	%hi(HI_MAGIC), %g2
-
-	ldsb	[%o0], %g3
-mzero_still_not_word_aligned:
-	cmp	%g3, 0
-	bne	1f
-	 add	%o0, 1, %o0
-
-	retl
-	 sub	%o0, 1, %o0
-
-1:
-	subcc	%o1, 1, %o1
-	bne,a	1f
-	 andcc	%o0, 3, %g0
-
-	retl
-	 nop
-
-1:
-	bne,a	mzero_still_not_word_aligned
-	 ldsb	[%o0], %g3
-
-	sethi	%hi(HI_MAGIC), %g2
-mzero_scan_word:
-	or	%g2, %lo(HI_MAGIC), %o3
-	sethi	%hi(LO_MAGIC), %g3
-	or	%g3, %lo(LO_MAGIC), %o2
-mzero_next_word:
-	ld	[%o0], %g2
-mzero_next_word_preloaded:
-	sub	%g2, %o2, %g2
-mzero_next_word_preloaded_next:
-	andcc	%g2, %o3, %g0
-	bne	mzero_byte_zero
-	 add	%o0, 4, %o0
-
-mzero_check_out_of_fuel:
-	subcc	%o1, 4, %o1
-	bg,a	1f
-	 ld	[%o0], %g2
-
-	retl
-	 nop
-
-1:
-	b	mzero_next_word_preloaded_next
-	 sub	%g2, %o2, %g2
-
-	/* Check every byte. */
-mzero_byte_zero:
-	ldsb	[%o0 - 4], %g2
-	cmp	%g2, 0
-	bne	mzero_byte_one
-	 sub	%o0, 4, %g3
-
-	retl
-	 mov	%g3, %o0
-
-mzero_byte_one:
-	ldsb	[%o0 - 3], %g2
-	cmp	%g2, 0
-	bne,a	mzero_byte_two_and_three
-	 ldsb	[%o0 - 2], %g2
-
-	retl
-	 sub	%o0, 3, %o0
-
-mzero_byte_two_and_three:
-	cmp	%g2, 0
-	bne,a	1f
-	 ldsb	[%o0 - 1], %g2
-
-	retl
-	 sub	%o0, 2, %o0
-
-1:
-	cmp	%g2, 0
-	bne,a	mzero_next_word_preloaded
-	 ld	[%o0], %g2
-
-	retl
-	 sub	%o0, 1, %o0
-
-mzero_found_it:
-	retl
-	 sub	%o0, 2, %o0
-
-memscan:
-__memscan_generic:
-	/* %o0 = addr, %o1 = c, %o2 = size */
-	cmp	%o2, 0
-	bne,a	0f
-	 ldub	[%o0], %g2
-
-	b,a	2f
-1:
-	ldub	[%o0], %g2
-0:
-	cmp	%g2, %o1
-	be	2f
-	 addcc	%o2, -1, %o2
-	bne	1b
-	 add	%o0, 1, %o0
-2:
-	retl
-	 nop
diff --git a/arch/sparc/lib/memscan_64.S b/arch/sparc/lib/memscan_64.S
deleted file mode 100644
index 36dd638905c34566ca421c38a2fc40f918deaefd..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/memscan_64.S
+++ /dev/null
@@ -1,136 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * memscan.S: Optimized memscan for Sparc64.
- *
- * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
- * Copyright (C) 1998 David S. Miller (davem@redhat.com)
- */
-
-	#include <asm/export.h>
-
-#define HI_MAGIC	0x8080808080808080
-#define LO_MAGIC	0x0101010101010101
-#define ASI_PL		0x88
-
-	.text
-	.align	32
-	.globl		__memscan_zero, __memscan_generic
-	.type		__memscan_zero,#function
-	.type		__memscan_generic,#function
-	.globl		memscan
-	EXPORT_SYMBOL(__memscan_zero)
-	EXPORT_SYMBOL(__memscan_generic)
-
-__memscan_zero:
-	/* %o0 = bufp, %o1 = size */
-	brlez,pn	%o1, szzero
-	 andcc		%o0, 7, %g0
-	be,pt		%icc, we_are_aligned
-	 sethi		%hi(HI_MAGIC), %o4
-	ldub		[%o0], %o5
-1:	subcc		%o1, 1, %o1
-	brz,pn		%o5, 10f
-	 add		%o0, 1, %o0
-
-	be,pn		%xcc, szzero
-	 andcc		%o0, 7, %g0
-	bne,a,pn	%icc, 1b
-	 ldub		[%o0], %o5
-we_are_aligned:
-	ldxa		[%o0] ASI_PL, %o5
-	or		%o4, %lo(HI_MAGIC), %o3
-	sllx		%o3, 32, %o4
-	or		%o4, %o3, %o3
-
-	srlx		%o3, 7, %o2
-msloop:
-	sub		%o1, 8, %o1
-	add		%o0, 8, %o0
-	sub		%o5, %o2, %o4
-	xor		%o4, %o5, %o4
-	andcc		%o4, %o3, %g3
-	bne,pn		%xcc, check_bytes
-	 srlx		%o4, 32, %g3
-
-	brgz,a,pt	%o1, msloop
-	 ldxa		[%o0] ASI_PL, %o5
-check_bytes:
-	bne,a,pn	%icc, 2f
-	 andcc		%o5, 0xff, %g0
-	add		%o0, -5, %g2
-	ba,pt		%xcc, 3f
-	 srlx		%o5, 32, %g7
-
-2:	srlx		%o5, 8, %g7
-	be,pn		%icc, 1f
-	 add		%o0, -8, %g2
-	andcc		%g7, 0xff, %g0
-	srlx		%g7, 8, %g7
-	be,pn		%icc, 1f
-	 inc		%g2
-	andcc		%g7, 0xff, %g0
-
-	srlx		%g7, 8, %g7
-	be,pn		%icc, 1f
-	 inc		%g2
-	andcc		%g7, 0xff, %g0
-	srlx		%g7, 8, %g7
-	be,pn		%icc, 1f
-	 inc		%g2
-	andcc		%g3, %o3, %g0
-
-	be,a,pn		%icc, 2f
-	 mov		%o0, %g2
-3:	andcc		%g7, 0xff, %g0
-	srlx		%g7, 8, %g7
-	be,pn		%icc, 1f
-	 inc		%g2
-	andcc		%g7, 0xff, %g0
-	srlx		%g7, 8, %g7
-
-	be,pn		%icc, 1f
-	 inc		%g2
-	andcc		%g7, 0xff, %g0
-	srlx		%g7, 8, %g7
-	be,pn		%icc, 1f
-	 inc		%g2
-	andcc		%g7, 0xff, %g0
-	srlx		%g7, 8, %g7
-
-	be,pn		%icc, 1f
-	 inc		%g2
-2:	brgz,a,pt	%o1, msloop
-	 ldxa		[%o0] ASI_PL, %o5
-	inc		%g2
-1:	add		%o0, %o1, %o0
-	cmp		%g2, %o0
-	retl
-
-	 movle		%xcc, %g2, %o0
-10:	retl
-	 sub		%o0, 1, %o0
-szzero:	retl
-	 nop
-
-memscan:
-__memscan_generic:
-	/* %o0 = addr, %o1 = c, %o2 = size */
-	brz,pn		%o2, 3f
-	 add		%o0, %o2, %o3
-	ldub		[%o0], %o5
-	sub		%g0, %o2, %o4
-1:
-	cmp		%o5, %o1
-	be,pn		%icc, 2f
-	 addcc		%o4, 1, %o4
-	bne,a,pt 	%xcc, 1b
-	 ldub		[%o3 + %o4], %o5
-	retl
-	/* The delay slot is the same as the next insn, this is just to make it look more awful */
-2:
-	 add		%o3, %o4, %o0
-	retl
-	 sub		%o0, 1, %o0
-3:
-	retl
-	 nop
diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S
deleted file mode 100644
index f427f34b8b79bd6798fb90f756035d8f989d3909..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/memset.S
+++ /dev/null
@@ -1,218 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* linux/arch/sparc/lib/memset.S: Sparc optimized memset, bzero and clear_user code
- * Copyright (C) 1991,1996 Free Software Foundation
- * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
- *
- * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and
- * number of bytes not yet set if exception occurs and we were called as
- * clear_user.
- */
-
-#include <asm/ptrace.h>
-#include <asm/export.h>
-
-/* Work around cpp -rob */
-#define ALLOC #alloc
-#define EXECINSTR #execinstr
-#define EX(x,y,a,b) 				\
-98: 	x,y;					\
-	.section .fixup,ALLOC,EXECINSTR;	\
-	.align	4;				\
-99:	ba 30f;					\
-	 a, b, %o0;				\
-	.section __ex_table,ALLOC;		\
-	.align	4;				\
-	.word	98b, 99b;			\
-	.text;					\
-	.align	4
-
-#define EXT(start,end,handler) 			\
-	.section __ex_table,ALLOC;		\
-	.align	4;				\
-	.word	start, 0, end, handler;		\
-	.text;					\
-	.align	4
-
-/* Please don't change these macros, unless you change the logic
- * in the .fixup section below as well.
- * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */
-#define ZERO_BIG_BLOCK(base, offset, source)    \
-	std	source, [base + offset + 0x00]; \
-	std	source, [base + offset + 0x08]; \
-	std	source, [base + offset + 0x10]; \
-	std	source, [base + offset + 0x18]; \
-	std	source, [base + offset + 0x20]; \
-	std	source, [base + offset + 0x28]; \
-	std	source, [base + offset + 0x30]; \
-	std	source, [base + offset + 0x38];
-
-#define ZERO_LAST_BLOCKS(base, offset, source)	\
-	std	source, [base - offset - 0x38]; \
-	std	source, [base - offset - 0x30]; \
-	std	source, [base - offset - 0x28]; \
-	std	source, [base - offset - 0x20]; \
-	std	source, [base - offset - 0x18]; \
-	std	source, [base - offset - 0x10]; \
-	std	source, [base - offset - 0x08]; \
-	std	source, [base - offset - 0x00];
-
-	.text
-	.align 4
-
-        .globl  __bzero_begin
-__bzero_begin:
-
-	.globl	__bzero
-	.type	__bzero,#function
-	.globl	memset
-	EXPORT_SYMBOL(__bzero)
-	EXPORT_SYMBOL(memset)
-	.globl	__memset_start, __memset_end
-__memset_start:
-memset:
-	mov	%o0, %g1
-	mov	1, %g4
-	and	%o1, 0xff, %g3
-	sll	%g3, 8, %g2
-	or	%g3, %g2, %g3
-	sll	%g3, 16, %g2
-	or	%g3, %g2, %g3
-	b	1f
-	 mov	%o2, %o1
-3:
-	cmp	%o2, 3
-	be	2f
-	 EX(stb	%g3, [%o0], sub %o1, 0)
-
-	cmp	%o2, 2
-	be	2f
-	 EX(stb	%g3, [%o0 + 0x01], sub %o1, 1)
-
-	EX(stb	%g3, [%o0 + 0x02], sub %o1, 2)
-2:
-	sub	%o2, 4, %o2
-	add	%o1, %o2, %o1
-	b	4f
-	 sub	%o0, %o2, %o0
-
-__bzero:
-	clr	%g4
-	mov	%g0, %g3
-1:
-	cmp	%o1, 7
-	bleu	7f
-	 andcc	%o0, 3, %o2
-
-	bne	3b
-4:
-	 andcc	%o0, 4, %g0
-
-	be	2f
-	 mov	%g3, %g2
-
-	EX(st	%g3, [%o0], sub %o1, 0)
-	sub	%o1, 4, %o1
-	add	%o0, 4, %o0
-2:
-	andcc	%o1, 0xffffff80, %o3	! Now everything is 8 aligned and o1 is len to run
-	be	9f
-	 andcc	%o1, 0x78, %o2
-10:
-	ZERO_BIG_BLOCK(%o0, 0x00, %g2)
-	subcc	%o3, 128, %o3
-	ZERO_BIG_BLOCK(%o0, 0x40, %g2)
-11:
-	EXT(10b, 11b, 20f)
-	bne	10b
-	 add	%o0, 128, %o0
-
-	orcc	%o2, %g0, %g0
-9:
-	be	13f
-	 andcc	%o1, 7, %o1
-
-	srl	%o2, 1, %o3
-	set	13f, %o4
-	sub	%o4, %o3, %o4
-	jmp	%o4
-	 add	%o0, %o2, %o0
-
-12:
-	ZERO_LAST_BLOCKS(%o0, 0x48, %g2)
-	ZERO_LAST_BLOCKS(%o0, 0x08, %g2)
-13:
-	EXT(12b, 13b, 21f)
-	be	8f
-	 andcc	%o1, 4, %g0
-
-	be	1f
-	 andcc	%o1, 2, %g0
-
-	EX(st	%g3, [%o0], and %o1, 7)
-	add	%o0, 4, %o0
-1:
-	be	1f
-	 andcc	%o1, 1, %g0
-
-	EX(sth	%g3, [%o0], and %o1, 3)
-	add	%o0, 2, %o0
-1:
-	bne,a	8f
-	 EX(stb	%g3, [%o0], and %o1, 1)
-8:
-	b	0f
-	 nop
-7:
-	be	13b
-	 orcc	%o1, 0, %g0
-
-	be	0f
-8:
-	 add	%o0, 1, %o0
-	subcc	%o1, 1, %o1
-	bne	8b
-	 EX(stb	%g3, [%o0 - 1], add %o1, 1)
-0:
-	andcc	%g4, 1, %g0
-	be	5f
-	 nop
-	retl
-	 mov	%g1, %o0
-5:
-	retl
-	 clr	%o0
-__memset_end:
-
-	.section .fixup,#alloc,#execinstr
-	.align	4
-20:
-	cmp	%g2, 8
-	bleu	1f
-	 and	%o1, 0x7f, %o1
-	sub	%g2, 9, %g2
-	add	%o3, 64, %o3
-1:
-	sll	%g2, 3, %g2
-	add	%o3, %o1, %o0
-	b 30f
-	 sub	%o0, %g2, %o0
-21:
-	mov	8, %o0
-	and	%o1, 7, %o1
-	sub	%o0, %g2, %o0
-	sll	%o0, 3, %o0
-	b 30f
-	 add	%o0, %o1, %o0
-30:
-/* %o4 is faulting address, %o5 is %pc where fault occurred */
-	save	%sp, -104, %sp
-	mov	%i5, %o0
-	mov	%i7, %o1
-	call	lookup_fault
-	 mov	%i4, %o2
-	ret
-	 restore
-
-	.globl __bzero_end
-__bzero_end:
diff --git a/arch/sparc/lib/muldi3.S b/arch/sparc/lib/muldi3.S
deleted file mode 100644
index 53054dee66d6713db5783d25296107a916f0b3dc..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/muldi3.S
+++ /dev/null
@@ -1,66 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
- */
-
-#include <asm/export.h>
-	.text
-	.align 4
-	.globl __muldi3
-__muldi3:
-	save  %sp, -104, %sp
-	wr  %g0, %i1, %y
-	sra  %i3, 0x1f, %g2
-	and  %i1, %g2, %g2
-	andcc  %g0, 0, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, %i3, %g1
-	mulscc  %g1, 0, %g1
-	add  %g1, %g2, %l2
-	rd  %y, %o1
-	mov  %o1, %l3
-	mov  %i1, %o0
-	mov  %i2, %o1
-	umul %o0, %o1, %o0
-	mov  %o0, %l0
-	mov  %i0, %o0
-	mov  %i3, %o1
-	umul %o0, %o1, %o0
-	add  %l0, %o0, %l0
-	mov  %l2, %i0
-	add  %l2, %l0, %i0
-	ret 
-	restore  %g0, %l3, %o1
-EXPORT_SYMBOL(__muldi3)
diff --git a/arch/sparc/lib/multi3.S b/arch/sparc/lib/multi3.S
deleted file mode 100644
index 2f187b299345ff7527d1ce643937bfc523a6f280..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/multi3.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/export.h>
-
-	.text
-	.align	4
-ENTRY(__multi3) /* %o0 = u, %o1 = v */
-	mov	%o1, %g1
-	srl	%o3, 0, %o4
-	mulx	%o4, %g1, %o1
-	srlx	%g1, 0x20, %g3
-	mulx	%g3, %o4, %g7
-	sllx	%g7, 0x20, %o5
-	srl	%g1, 0, %o4
-	sub	%o1, %o5, %o5
-	srlx	%o5, 0x20, %o5
-	addcc	%g7, %o5, %g7
-	srlx	%o3, 0x20, %o5
-	mulx	%o4, %o5, %o4
-	mulx	%g3, %o5, %o5
-	sethi	%hi(0x80000000), %g3
-	addcc	%g7, %o4, %g7
-	srlx	%g7, 0x20, %g7
-	add	%g3, %g3, %g3
-	movcc	%xcc, %g0, %g3
-	addcc	%o5, %g7, %o5
-	sllx	%o4, 0x20, %o4
-	add	%o1, %o4, %o1
-	add	%o5, %g3, %g2
-	mulx	%g1, %o2, %g1
-	add	%g1, %g2, %g1
-	mulx	%o0, %o3, %o0
-	retl
-	 add	%g1, %o0, %o0
-ENDPROC(__multi3)
-EXPORT_SYMBOL(__multi3)
diff --git a/arch/sparc/lib/strlen.S b/arch/sparc/lib/strlen.S
deleted file mode 100644
index dd111bbad5df9063337ba7f2c9b72903399416f1..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/strlen.S
+++ /dev/null
@@ -1,83 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* strlen.S: Sparc optimized strlen code
- * Hand optimized from GNU libc's strlen
- * Copyright (C) 1991,1996 Free Software Foundation
- * Copyright (C) 1996,2008 David S. Miller (davem@davemloft.net)
- * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- */
-
-#include <linux/linkage.h>
-#include <asm/asm.h>
-#include <asm/export.h>
-
-#define LO_MAGIC 0x01010101
-#define HI_MAGIC 0x80808080
-
-	.text
-ENTRY(strlen)
-	mov	%o0, %o1
-	andcc	%o0, 3, %g0
-	BRANCH32(be, pt, 9f)
-	 sethi	%hi(HI_MAGIC), %o4
-	ldub	[%o0], %o5
-	BRANCH_REG_ZERO(pn, %o5, 11f)
-	 add	%o0, 1, %o0
-	andcc	%o0, 3, %g0
-	BRANCH32(be, pn, 4f)
-	 or	%o4, %lo(HI_MAGIC), %o3
-	ldub	[%o0], %o5
-	BRANCH_REG_ZERO(pn, %o5, 12f)
-	 add	%o0, 1, %o0
-	andcc	%o0, 3, %g0
-	BRANCH32(be, pt, 5f)
-	 sethi	%hi(LO_MAGIC), %o4
-	ldub	[%o0], %o5
-	BRANCH_REG_ZERO(pn, %o5, 13f)
-	 add	%o0, 1, %o0
-	BRANCH32(ba, pt, 8f)
-	 or	%o4, %lo(LO_MAGIC), %o2
-9:
-	or	%o4, %lo(HI_MAGIC), %o3
-4:
-	sethi	%hi(LO_MAGIC), %o4
-5:
-	or	%o4, %lo(LO_MAGIC), %o2
-8:
-	ld	[%o0], %o5
-2:
-	sub	%o5, %o2, %o4
-	andcc	%o4, %o3, %g0
-	BRANCH32(be, pt, 8b)
-	 add	%o0, 4, %o0
-
-	/* Check every byte. */
-	srl	%o5, 24, %g7
-	andcc	%g7, 0xff, %g0
-	BRANCH32(be, pn, 1f)
-	 add	%o0, -4, %o4
-	srl	%o5, 16, %g7
-	andcc	%g7, 0xff, %g0
-	BRANCH32(be, pn, 1f)
-	 add	%o4, 1, %o4
-	srl	%o5, 8, %g7
-	andcc	%g7, 0xff, %g0
-	BRANCH32(be, pn, 1f)
-	 add	%o4, 1, %o4
-	andcc	%o5, 0xff, %g0
-	BRANCH32_ANNUL(bne, pt, 2b)
-	 ld	[%o0], %o5
-	add	%o4, 1, %o4
-1:
-	retl
-	 sub	%o4, %o1, %o0
-11:
-	retl
-	 mov	0, %o0
-12:
-	retl
-	 mov	1, %o0
-13:
-	retl
-	 mov	2, %o0
-ENDPROC(strlen)
-EXPORT_SYMBOL(strlen)
diff --git a/arch/sparc/lib/strncmp_32.S b/arch/sparc/lib/strncmp_32.S
deleted file mode 100644
index 794733f036b6c66653b582b120e5e2f615aa89e9..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/strncmp_32.S
+++ /dev/null
@@ -1,121 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * strncmp.S: Hand optimized Sparc assembly of GCC output from GNU libc
- *            generic strncmp routine.
- */
-
-#include <linux/linkage.h>
-#include <asm/export.h>
-
-	.text
-ENTRY(strncmp)
-	mov	%o0, %g3
-	mov	0, %o3
-
-	cmp	%o2, 3
-	ble	7f
-	 mov	0, %g2
-
-	sra	%o2, 2, %o4
-	ldub	[%g3], %o3
-
-0:
-	ldub	[%o1], %g2
-	add	%g3, 1, %g3
-	and	%o3, 0xff, %o0
-
-	cmp	%o0, 0
-	be	8f
-	 add	%o1, 1, %o1
-
-	cmp	%o0, %g2
-	be,a	1f
-	 ldub	[%g3], %o3
-
-	retl
-	 sub	%o0, %g2, %o0
-
-1:
-	ldub	[%o1], %g2
-	add	%g3,1, %g3
-	and	%o3, 0xff, %o0
-
-	cmp	%o0, 0
-	be	8f
-	 add	%o1, 1, %o1
-
-	cmp	%o0, %g2
-	be,a	1f
-	 ldub	[%g3], %o3
-
-	retl
-	 sub	%o0, %g2, %o0
-
-1:
-	ldub	[%o1], %g2
-	add	%g3, 1, %g3
-	and	%o3, 0xff, %o0
-
-	cmp	%o0, 0
-	be	8f
-	 add	%o1, 1, %o1
-
-	cmp	%o0, %g2
-	be,a	1f
-	 ldub	[%g3], %o3
-
-	retl
-	 sub	%o0, %g2, %o0
-
-1:
-	ldub	[%o1], %g2
-	add	%g3, 1, %g3
-	and	%o3, 0xff, %o0
-
-	cmp	%o0, 0
-	be	8f
-	 add	%o1, 1, %o1
-
-	cmp	%o0, %g2
-	be	1f
-	 add	%o4, -1, %o4
-
-	retl
-	 sub	%o0, %g2, %o0
-
-1:
-
-	cmp	%o4, 0
-	bg,a	0b
-	 ldub	[%g3], %o3
-
-	b	7f
-	 and	%o2, 3, %o2
-
-9:
-	ldub	[%o1], %g2
-	add	%g3, 1, %g3
-	and	%o3, 0xff, %o0
-
-	cmp	%o0, 0
-	be	8f
-	 add	%o1, 1, %o1
-
-	cmp	%o0, %g2
-	be	7f
-	 add	%o2, -1, %o2
-
-8:
-	retl
-	 sub	%o0, %g2, %o0
-
-7:
-	cmp	%o2, 0
-	bg,a	9b
-	 ldub	[%g3], %o3
-
-	and	%g2, 0xff, %o0
-	retl
-	 sub	%o3, %o0, %o0
-ENDPROC(strncmp)
-EXPORT_SYMBOL(strncmp)
diff --git a/arch/sparc/lib/strncmp_64.S b/arch/sparc/lib/strncmp_64.S
deleted file mode 100644
index 3d37d65f674c36b21d707e9223ed3193eaf5a92c..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/strncmp_64.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Sparc64 optimized strncmp code.
- *
- * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- */
-
-#include <linux/linkage.h>
-#include <asm/asi.h>
-#include <asm/export.h>
-
-	.text
-ENTRY(strncmp)
-	brlez,pn %o2, 3f
-	 lduba	[%o0] (ASI_PNF), %o3
-1:
-	add	%o0, 1, %o0
-	ldub	[%o1], %o4
-	brz,pn	%o3, 2f
-	 add	%o1, 1, %o1
-	cmp	%o3, %o4
-	bne,pn	%icc, 2f
-	 subcc	%o2, 1, %o2
-	bne,a,pt %xcc, 1b
-	 ldub	[%o0], %o3
-2:
-	retl
-	 sub	%o3, %o4, %o0
-3:
-	retl
-	 clr	%o0
-ENDPROC(strncmp)
-EXPORT_SYMBOL(strncmp)
diff --git a/arch/sparc/lib/udivdi3.S b/arch/sparc/lib/udivdi3.S
deleted file mode 100644
index 7a1117ec769632238c442afd36bf74be284828d5..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/udivdi3.S
+++ /dev/null
@@ -1,247 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
- */
-
-	.text
-	.align 4
-	.globl __udivdi3
-__udivdi3:
-	save %sp,-104,%sp
-	mov %i3,%o3
-	cmp %i2,0
-	bne .LL40
-	mov %i1,%i3
-	cmp %o3,%i0
-	bleu .LL41
-	mov %i3,%o1
-	! Inlined udiv_qrnnd
-	mov	32,%g1
-	subcc	%i0,%o3,%g0
-1:	bcs	5f
-	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
-	sub	%i0,%o3,%i0	! this kills msb of n
-	addx	%i0,%i0,%i0	! so this cannot give carry
-	subcc	%g1,1,%g1
-2:	bne	1b
-	 subcc	%i0,%o3,%g0
-	bcs	3f
-	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
-	b	3f
-	 sub	%i0,%o3,%i0	! this kills msb of n
-4:	sub	%i0,%o3,%i0
-5:	addxcc	%i0,%i0,%i0
-	bcc	2b
-	 subcc	%g1,1,%g1
-! Got carry from n.  Subtract next step to cancel this carry.
-	bne	4b
-	 addcc	%o1,%o1,%o1	! shift n1n0 and a 0-bit in lsb
-	sub	%i0,%o3,%i0
-3:	xnor	%o1,0,%o1
-	! End of inline udiv_qrnnd
-	b .LL45
-	mov 0,%o2
-.LL41:
-	cmp %o3,0
-	bne .LL77
-	mov %i0,%o2
-	mov 1,%o0
-	mov 0,%o1
-	wr %g0, 0, %y
-	udiv %o0, %o1, %o0
-	mov %o0,%o3
-	mov %i0,%o2
-.LL77:
-	mov 0,%o4
-	! Inlined udiv_qrnnd
-	mov	32,%g1
-	subcc	%o4,%o3,%g0
-1:	bcs	5f
-	 addxcc %o2,%o2,%o2	! shift n1n0 and a q-bit in lsb
-	sub	%o4,%o3,%o4	! this kills msb of n
-	addx	%o4,%o4,%o4	! so this cannot give carry
-	subcc	%g1,1,%g1
-2:	bne	1b
-	 subcc	%o4,%o3,%g0
-	bcs	3f
-	 addxcc %o2,%o2,%o2	! shift n1n0 and a q-bit in lsb
-	b	3f
-	 sub	%o4,%o3,%o4	! this kills msb of n
-4:	sub	%o4,%o3,%o4
-5:	addxcc	%o4,%o4,%o4
-	bcc	2b
-	 subcc	%g1,1,%g1
-! Got carry from n.  Subtract next step to cancel this carry.
-	bne	4b
-	 addcc	%o2,%o2,%o2	! shift n1n0 and a 0-bit in lsb
-	sub	%o4,%o3,%o4
-3:	xnor	%o2,0,%o2
-	! End of inline udiv_qrnnd
-	mov %o4,%i0
-	mov %i3,%o1
-	! Inlined udiv_qrnnd
-	mov	32,%g1
-	subcc	%i0,%o3,%g0
-1:	bcs	5f
-	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
-	sub	%i0,%o3,%i0	! this kills msb of n
-	addx	%i0,%i0,%i0	! so this cannot give carry
-	subcc	%g1,1,%g1
-2:	bne	1b
-	 subcc	%i0,%o3,%g0
-	bcs	3f
-	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
-	b	3f
-	 sub	%i0,%o3,%i0	! this kills msb of n
-4:	sub	%i0,%o3,%i0
-5:	addxcc	%i0,%i0,%i0
-	bcc	2b
-	 subcc	%g1,1,%g1
-! Got carry from n.  Subtract next step to cancel this carry.
-	bne	4b
-	 addcc	%o1,%o1,%o1	! shift n1n0 and a 0-bit in lsb
-	sub	%i0,%o3,%i0
-3:	xnor	%o1,0,%o1
-	! End of inline udiv_qrnnd
-	b .LL78
-	mov %o1,%l1
-.LL40:
-	cmp %i2,%i0
-	bleu .LL46
-	sethi %hi(65535),%o0
-	b .LL73
-	mov 0,%o1
-.LL46:
-	or %o0,%lo(65535),%o0
-	cmp %i2,%o0
-	bgu .LL53
-	mov %i2,%o1
-	cmp %i2,256
-	addx %g0,-1,%o0
-	b .LL59
-	and %o0,8,%o2
-.LL53:
-	sethi %hi(16777215),%o0
-	or %o0,%lo(16777215),%o0
-	cmp %o1,%o0
-	bgu .LL59
-	mov 24,%o2
-	mov 16,%o2
-.LL59:
-	srl %o1,%o2,%o1
-	sethi %hi(__clz_tab),%o0
-	or %o0,%lo(__clz_tab),%o0
-	ldub [%o1+%o0],%o0
-	add %o0,%o2,%o0
-	mov 32,%o1
-	subcc %o1,%o0,%o2
-	bne,a .LL67
-	mov 32,%o0
-	cmp %i0,%i2
-	bgu .LL69
-	cmp %i3,%o3
-	blu .LL73
-	mov 0,%o1
-.LL69:
-	b .LL73
-	mov 1,%o1
-.LL67:
-	sub %o0,%o2,%o0
-	sll %i2,%o2,%i2
-	srl %o3,%o0,%o1
-	or %i2,%o1,%i2
-	sll %o3,%o2,%o3
-	srl %i0,%o0,%o1
-	sll %i0,%o2,%i0
-	srl %i3,%o0,%o0
-	or %i0,%o0,%i0
-	sll %i3,%o2,%i3
-	mov %i0,%o5
-	mov %o1,%o4
-	! Inlined udiv_qrnnd
-	mov	32,%g1
-	subcc	%o4,%i2,%g0
-1:	bcs	5f
-	 addxcc %o5,%o5,%o5	! shift n1n0 and a q-bit in lsb
-	sub	%o4,%i2,%o4	! this kills msb of n
-	addx	%o4,%o4,%o4	! so this cannot give carry
-	subcc	%g1,1,%g1
-2:	bne	1b
-	 subcc	%o4,%i2,%g0
-	bcs	3f
-	 addxcc %o5,%o5,%o5	! shift n1n0 and a q-bit in lsb
-	b	3f
-	 sub	%o4,%i2,%o4	! this kills msb of n
-4:	sub	%o4,%i2,%o4
-5:	addxcc	%o4,%o4,%o4
-	bcc	2b
-	 subcc	%g1,1,%g1
-! Got carry from n.  Subtract next step to cancel this carry.
-	bne	4b
-	 addcc	%o5,%o5,%o5	! shift n1n0 and a 0-bit in lsb
-	sub	%o4,%i2,%o4
-3:	xnor	%o5,0,%o5
-	! End of inline udiv_qrnnd
-	mov %o4,%i0
-	mov %o5,%o1
-	! Inlined umul_ppmm
-	wr	%g0,%o1,%y	! SPARC has 0-3 delay insn after a wr
-	sra	%o3,31,%g2	! Do not move this insn
-	and	%o1,%g2,%g2	! Do not move this insn
-	andcc	%g0,0,%g1	! Do not move this insn
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,%o3,%g1
-	mulscc	%g1,0,%g1
-	add	%g1,%g2,%o0
-	rd	%y,%o2
-	cmp %o0,%i0
-	bgu,a .LL73
-	add %o1,-1,%o1
-	bne,a .LL45
-	mov 0,%o2
-	cmp %o2,%i3
-	bleu .LL45
-	mov 0,%o2
-	add %o1,-1,%o1
-.LL73:
-	mov 0,%o2
-.LL45:
-	mov %o1,%l1
-.LL78:
-	mov %o2,%l0
-	mov %l0,%i0
-	mov %l1,%i1
-	ret
-	restore
diff --git a/arch/sparc/lib/xor.S b/arch/sparc/lib/xor.S
deleted file mode 100644
index f6af7c7ee6fc16a6ad7640e1be0986c086b62363..0000000000000000000000000000000000000000
--- a/arch/sparc/lib/xor.S
+++ /dev/null
@@ -1,646 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/sparc64/lib/xor.S
- *
- * High speed xor_block operation for RAID4/5 utilizing the
- * UltraSparc Visual Instruction Set and Niagara store-init/twin-load.
- *
- * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
- * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
- */
-
-#include <linux/linkage.h>
-#include <asm/visasm.h>
-#include <asm/asi.h>
-#include <asm/dcu.h>
-#include <asm/spitfire.h>
-#include <asm/export.h>
-
-/*
- *	Requirements:
- *	!(((long)dest | (long)sourceN) & (64 - 1)) &&
- *	!(len & 127) && len >= 256
- */
-	.text
-
-	/* VIS versions. */
-ENTRY(xor_vis_2)
-	rd	%fprs, %o5
-	andcc	%o5, FPRS_FEF|FPRS_DU, %g0
-	be,pt	%icc, 0f
-	 sethi	%hi(VISenter), %g1
-	jmpl	%g1 + %lo(VISenter), %g7
-	 add	%g7, 8, %g7
-0:	wr	%g0, FPRS_FEF, %fprs
-	rd	%asi, %g1
-	wr	%g0, ASI_BLK_P, %asi
-	membar	#LoadStore|#StoreLoad|#StoreStore
-	sub	%o0, 128, %o0
-	ldda	[%o1] %asi, %f0
-	ldda	[%o2] %asi, %f16
-
-2:	ldda	[%o1 + 64] %asi, %f32
-	fxor	%f0, %f16, %f16
-	fxor	%f2, %f18, %f18
-	fxor	%f4, %f20, %f20
-	fxor	%f6, %f22, %f22
-	fxor	%f8, %f24, %f24
-	fxor	%f10, %f26, %f26
-	fxor	%f12, %f28, %f28
-	fxor	%f14, %f30, %f30
-	stda	%f16, [%o1] %asi
-	ldda	[%o2 + 64] %asi, %f48
-	ldda	[%o1 + 128] %asi, %f0
-	fxor	%f32, %f48, %f48
-	fxor	%f34, %f50, %f50
-	add	%o1, 128, %o1
-	fxor	%f36, %f52, %f52
-	add	%o2, 128, %o2
-	fxor	%f38, %f54, %f54
-	subcc	%o0, 128, %o0
-	fxor	%f40, %f56, %f56
-	fxor	%f42, %f58, %f58
-	fxor	%f44, %f60, %f60
-	fxor	%f46, %f62, %f62
-	stda	%f48, [%o1 - 64] %asi
-	bne,pt	%xcc, 2b
-	 ldda	[%o2] %asi, %f16
-
-	ldda	[%o1 + 64] %asi, %f32
-	fxor	%f0, %f16, %f16
-	fxor	%f2, %f18, %f18
-	fxor	%f4, %f20, %f20
-	fxor	%f6, %f22, %f22
-	fxor	%f8, %f24, %f24
-	fxor	%f10, %f26, %f26
-	fxor	%f12, %f28, %f28
-	fxor	%f14, %f30, %f30
-	stda	%f16, [%o1] %asi
-	ldda	[%o2 + 64] %asi, %f48
-	membar	#Sync
-	fxor	%f32, %f48, %f48
-	fxor	%f34, %f50, %f50
-	fxor	%f36, %f52, %f52
-	fxor	%f38, %f54, %f54
-	fxor	%f40, %f56, %f56
-	fxor	%f42, %f58, %f58
-	fxor	%f44, %f60, %f60
-	fxor	%f46, %f62, %f62
-	stda	%f48, [%o1 + 64] %asi
-	membar	#Sync|#StoreStore|#StoreLoad
-	wr	%g1, %g0, %asi
-	retl
-	  wr	%g0, 0, %fprs
-ENDPROC(xor_vis_2)
-EXPORT_SYMBOL(xor_vis_2)
-
-ENTRY(xor_vis_3)
-	rd	%fprs, %o5
-	andcc	%o5, FPRS_FEF|FPRS_DU, %g0
-	be,pt	%icc, 0f
-	 sethi	%hi(VISenter), %g1
-	jmpl	%g1 + %lo(VISenter), %g7
-	 add	%g7, 8, %g7
-0:	wr	%g0, FPRS_FEF, %fprs
-	rd	%asi, %g1
-	wr	%g0, ASI_BLK_P, %asi
-	membar	#LoadStore|#StoreLoad|#StoreStore
-	sub	%o0, 64, %o0
-	ldda	[%o1] %asi, %f0
-	ldda	[%o2] %asi, %f16
-
-3:	ldda	[%o3] %asi, %f32
-	fxor	%f0, %f16, %f48
-	fxor	%f2, %f18, %f50
-	add	%o1, 64, %o1
-	fxor	%f4, %f20, %f52
-	fxor	%f6, %f22, %f54
-	add	%o2, 64, %o2
-	fxor	%f8, %f24, %f56
-	fxor	%f10, %f26, %f58
-	fxor	%f12, %f28, %f60
-	fxor	%f14, %f30, %f62
-	ldda	[%o1] %asi, %f0
-	fxor	%f48, %f32, %f48
-	fxor	%f50, %f34, %f50
-	fxor	%f52, %f36, %f52
-	fxor	%f54, %f38, %f54
-	add	%o3, 64, %o3
-	fxor	%f56, %f40, %f56
-	fxor	%f58, %f42, %f58
-	subcc	%o0, 64, %o0
-	fxor	%f60, %f44, %f60
-	fxor	%f62, %f46, %f62
-	stda	%f48, [%o1 - 64] %asi
-	bne,pt	%xcc, 3b
-	 ldda	[%o2] %asi, %f16
-
-	ldda	[%o3] %asi, %f32
-	fxor	%f0, %f16, %f48
-	fxor	%f2, %f18, %f50
-	fxor	%f4, %f20, %f52
-	fxor	%f6, %f22, %f54
-	fxor	%f8, %f24, %f56
-	fxor	%f10, %f26, %f58
-	fxor	%f12, %f28, %f60
-	fxor	%f14, %f30, %f62
-	membar	#Sync
-	fxor	%f48, %f32, %f48
-	fxor	%f50, %f34, %f50
-	fxor	%f52, %f36, %f52
-	fxor	%f54, %f38, %f54
-	fxor	%f56, %f40, %f56
-	fxor	%f58, %f42, %f58
-	fxor	%f60, %f44, %f60
-	fxor	%f62, %f46, %f62
-	stda	%f48, [%o1] %asi
-	membar	#Sync|#StoreStore|#StoreLoad
-	wr	%g1, %g0, %asi
-	retl
-	 wr	%g0, 0, %fprs
-ENDPROC(xor_vis_3)
-EXPORT_SYMBOL(xor_vis_3)
-
-ENTRY(xor_vis_4)
-	rd	%fprs, %o5
-	andcc	%o5, FPRS_FEF|FPRS_DU, %g0
-	be,pt	%icc, 0f
-	 sethi	%hi(VISenter), %g1
-	jmpl	%g1 + %lo(VISenter), %g7
-	 add	%g7, 8, %g7
-0:	wr	%g0, FPRS_FEF, %fprs
-	rd	%asi, %g1
-	wr	%g0, ASI_BLK_P, %asi
-	membar	#LoadStore|#StoreLoad|#StoreStore
-	sub	%o0, 64, %o0
-	ldda	[%o1] %asi, %f0
-	ldda	[%o2] %asi, %f16
-
-4:	ldda	[%o3] %asi, %f32
-	fxor	%f0, %f16, %f16
-	fxor	%f2, %f18, %f18
-	add	%o1, 64, %o1
-	fxor	%f4, %f20, %f20
-	fxor	%f6, %f22, %f22
-	add	%o2, 64, %o2
-	fxor	%f8, %f24, %f24
-	fxor	%f10, %f26, %f26
-	fxor	%f12, %f28, %f28
-	fxor	%f14, %f30, %f30
-	ldda	[%o4] %asi, %f48
-	fxor	%f16, %f32, %f32
-	fxor	%f18, %f34, %f34
-	fxor	%f20, %f36, %f36
-	fxor	%f22, %f38, %f38
-	add	%o3, 64, %o3
-	fxor	%f24, %f40, %f40
-	fxor	%f26, %f42, %f42
-	fxor	%f28, %f44, %f44
-	fxor	%f30, %f46, %f46
-	ldda	[%o1] %asi, %f0
-	fxor	%f32, %f48, %f48
-	fxor	%f34, %f50, %f50
-	fxor	%f36, %f52, %f52
-	add	%o4, 64, %o4
-	fxor	%f38, %f54, %f54
-	fxor	%f40, %f56, %f56
-	fxor	%f42, %f58, %f58
-	subcc	%o0, 64, %o0
-	fxor	%f44, %f60, %f60
-	fxor	%f46, %f62, %f62
-	stda	%f48, [%o1 - 64] %asi
-	bne,pt	%xcc, 4b
-	 ldda	[%o2] %asi, %f16
-
-	ldda	[%o3] %asi, %f32
-	fxor	%f0, %f16, %f16
-	fxor	%f2, %f18, %f18
-	fxor	%f4, %f20, %f20
-	fxor	%f6, %f22, %f22
-	fxor	%f8, %f24, %f24
-	fxor	%f10, %f26, %f26
-	fxor	%f12, %f28, %f28
-	fxor	%f14, %f30, %f30
-	ldda	[%o4] %asi, %f48
-	fxor	%f16, %f32, %f32
-	fxor	%f18, %f34, %f34
-	fxor	%f20, %f36, %f36
-	fxor	%f22, %f38, %f38
-	fxor	%f24, %f40, %f40
-	fxor	%f26, %f42, %f42
-	fxor	%f28, %f44, %f44
-	fxor	%f30, %f46, %f46
-	membar	#Sync
-	fxor	%f32, %f48, %f48
-	fxor	%f34, %f50, %f50
-	fxor	%f36, %f52, %f52
-	fxor	%f38, %f54, %f54
-	fxor	%f40, %f56, %f56
-	fxor	%f42, %f58, %f58
-	fxor	%f44, %f60, %f60
-	fxor	%f46, %f62, %f62
-	stda	%f48, [%o1] %asi
-	membar	#Sync|#StoreStore|#StoreLoad
-	wr	%g1, %g0, %asi
-	retl
-	 wr	%g0, 0, %fprs
-ENDPROC(xor_vis_4)
-EXPORT_SYMBOL(xor_vis_4)
-
-ENTRY(xor_vis_5)
-	save	%sp, -192, %sp
-	rd	%fprs, %o5
-	andcc	%o5, FPRS_FEF|FPRS_DU, %g0
-	be,pt	%icc, 0f
-	 sethi	%hi(VISenter), %g1
-	jmpl	%g1 + %lo(VISenter), %g7
-	 add	%g7, 8, %g7
-0:	wr	%g0, FPRS_FEF, %fprs
-	rd	%asi, %g1
-	wr	%g0, ASI_BLK_P, %asi
-	membar	#LoadStore|#StoreLoad|#StoreStore
-	sub	%i0, 64, %i0
-	ldda	[%i1] %asi, %f0
-	ldda	[%i2] %asi, %f16
-
-5:	ldda	[%i3] %asi, %f32
-	fxor	%f0, %f16, %f48
-	fxor	%f2, %f18, %f50
-	add	%i1, 64, %i1
-	fxor	%f4, %f20, %f52
-	fxor	%f6, %f22, %f54
-	add	%i2, 64, %i2
-	fxor	%f8, %f24, %f56
-	fxor	%f10, %f26, %f58
-	fxor	%f12, %f28, %f60
-	fxor	%f14, %f30, %f62
-	ldda	[%i4] %asi, %f16
-	fxor	%f48, %f32, %f48
-	fxor	%f50, %f34, %f50
-	fxor	%f52, %f36, %f52
-	fxor	%f54, %f38, %f54
-	add	%i3, 64, %i3
-	fxor	%f56, %f40, %f56
-	fxor	%f58, %f42, %f58
-	fxor	%f60, %f44, %f60
-	fxor	%f62, %f46, %f62
-	ldda	[%i5] %asi, %f32
-	fxor	%f48, %f16, %f48
-	fxor	%f50, %f18, %f50
-	add	%i4, 64, %i4
-	fxor	%f52, %f20, %f52
-	fxor	%f54, %f22, %f54
-	add	%i5, 64, %i5
-	fxor	%f56, %f24, %f56
-	fxor	%f58, %f26, %f58
-	fxor	%f60, %f28, %f60
-	fxor	%f62, %f30, %f62
-	ldda	[%i1] %asi, %f0
-	fxor	%f48, %f32, %f48
-	fxor	%f50, %f34, %f50
-	fxor	%f52, %f36, %f52
-	fxor	%f54, %f38, %f54
-	fxor	%f56, %f40, %f56
-	fxor	%f58, %f42, %f58
-	subcc	%i0, 64, %i0
-	fxor	%f60, %f44, %f60
-	fxor	%f62, %f46, %f62
-	stda	%f48, [%i1 - 64] %asi
-	bne,pt	%xcc, 5b
-	 ldda	[%i2] %asi, %f16
-
-	ldda	[%i3] %asi, %f32
-	fxor	%f0, %f16, %f48
-	fxor	%f2, %f18, %f50
-	fxor	%f4, %f20, %f52
-	fxor	%f6, %f22, %f54
-	fxor	%f8, %f24, %f56
-	fxor	%f10, %f26, %f58
-	fxor	%f12, %f28, %f60
-	fxor	%f14, %f30, %f62
-	ldda	[%i4] %asi, %f16
-	fxor	%f48, %f32, %f48
-	fxor	%f50, %f34, %f50
-	fxor	%f52, %f36, %f52
-	fxor	%f54, %f38, %f54
-	fxor	%f56, %f40, %f56
-	fxor	%f58, %f42, %f58
-	fxor	%f60, %f44, %f60
-	fxor	%f62, %f46, %f62
-	ldda	[%i5] %asi, %f32
-	fxor	%f48, %f16, %f48
-	fxor	%f50, %f18, %f50
-	fxor	%f52, %f20, %f52
-	fxor	%f54, %f22, %f54
-	fxor	%f56, %f24, %f56
-	fxor	%f58, %f26, %f58
-	fxor	%f60, %f28, %f60
-	fxor	%f62, %f30, %f62
-	membar	#Sync
-	fxor	%f48, %f32, %f48
-	fxor	%f50, %f34, %f50
-	fxor	%f52, %f36, %f52
-	fxor	%f54, %f38, %f54
-	fxor	%f56, %f40, %f56
-	fxor	%f58, %f42, %f58
-	fxor	%f60, %f44, %f60
-	fxor	%f62, %f46, %f62
-	stda	%f48, [%i1] %asi
-	membar	#Sync|#StoreStore|#StoreLoad
-	wr	%g1, %g0, %asi
-	wr	%g0, 0, %fprs
-	ret
-	 restore
-ENDPROC(xor_vis_5)
-EXPORT_SYMBOL(xor_vis_5)
-
-	/* Niagara versions. */
-ENTRY(xor_niagara_2) /* %o0=bytes, %o1=dest, %o2=src */
-	save		%sp, -192, %sp
-	prefetch	[%i1], #n_writes
-	prefetch	[%i2], #one_read
-	rd		%asi, %g7
-	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
-	srlx		%i0, 6, %g1
-	mov		%i1, %i0
-	mov		%i2, %i1
-1:	ldda		[%i1 + 0x00] %asi, %i2	/* %i2/%i3 = src  + 0x00 */
-	ldda		[%i1 + 0x10] %asi, %i4	/* %i4/%i5 = src  + 0x10 */
-	ldda		[%i1 + 0x20] %asi, %g2	/* %g2/%g3 = src  + 0x20 */
-	ldda		[%i1 + 0x30] %asi, %l0	/* %l0/%l1 = src  + 0x30 */
-	prefetch	[%i1 + 0x40], #one_read
-	ldda		[%i0 + 0x00] %asi, %o0  /* %o0/%o1 = dest + 0x00 */
-	ldda		[%i0 + 0x10] %asi, %o2  /* %o2/%o3 = dest + 0x10 */
-	ldda		[%i0 + 0x20] %asi, %o4  /* %o4/%o5 = dest + 0x20 */
-	ldda		[%i0 + 0x30] %asi, %l2  /* %l2/%l3 = dest + 0x30 */
-	prefetch	[%i0 + 0x40], #n_writes
-	xor		%o0, %i2, %o0
-	xor		%o1, %i3, %o1
-	stxa		%o0, [%i0 + 0x00] %asi
-	stxa		%o1, [%i0 + 0x08] %asi
-	xor		%o2, %i4, %o2
-	xor		%o3, %i5, %o3
-	stxa		%o2, [%i0 + 0x10] %asi
-	stxa		%o3, [%i0 + 0x18] %asi
-	xor		%o4, %g2, %o4
-	xor		%o5, %g3, %o5
-	stxa		%o4, [%i0 + 0x20] %asi
-	stxa		%o5, [%i0 + 0x28] %asi
-	xor		%l2, %l0, %l2
-	xor		%l3, %l1, %l3
-	stxa		%l2, [%i0 + 0x30] %asi
-	stxa		%l3, [%i0 + 0x38] %asi
-	add		%i0, 0x40, %i0
-	subcc		%g1, 1, %g1
-	bne,pt		%xcc, 1b
-	 add		%i1, 0x40, %i1
-	membar		#Sync
-	wr		%g7, 0x0, %asi
-	ret
-	 restore
-ENDPROC(xor_niagara_2)
-EXPORT_SYMBOL(xor_niagara_2)
-
-ENTRY(xor_niagara_3) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */
-	save		%sp, -192, %sp
-	prefetch	[%i1], #n_writes
-	prefetch	[%i2], #one_read
-	prefetch	[%i3], #one_read
-	rd		%asi, %g7
-	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
-	srlx		%i0, 6, %g1
-	mov		%i1, %i0
-	mov		%i2, %i1
-	mov		%i3, %l7
-1:	ldda		[%i1 + 0x00] %asi, %i2	/* %i2/%i3 = src1 + 0x00 */
-	ldda		[%i1 + 0x10] %asi, %i4	/* %i4/%i5 = src1 + 0x10 */
-	ldda		[%l7 + 0x00] %asi, %g2	/* %g2/%g3 = src2 + 0x00 */
-	ldda		[%l7 + 0x10] %asi, %l0	/* %l0/%l1 = src2 + 0x10 */
-	ldda		[%i0 + 0x00] %asi, %o0  /* %o0/%o1 = dest + 0x00 */
-	ldda		[%i0 + 0x10] %asi, %o2  /* %o2/%o3 = dest + 0x10 */
-	xor		%g2, %i2, %g2
-	xor		%g3, %i3, %g3
-	xor		%o0, %g2, %o0
-	xor		%o1, %g3, %o1
-	stxa		%o0, [%i0 + 0x00] %asi
-	stxa		%o1, [%i0 + 0x08] %asi
-	ldda		[%i1 + 0x20] %asi, %i2	/* %i2/%i3 = src1 + 0x20 */
-	ldda		[%l7 + 0x20] %asi, %g2	/* %g2/%g3 = src2 + 0x20 */
-	ldda		[%i0 + 0x20] %asi, %o0	/* %o0/%o1 = dest + 0x20 */
-	xor		%l0, %i4, %l0
-	xor		%l1, %i5, %l1
-	xor		%o2, %l0, %o2
-	xor		%o3, %l1, %o3
-	stxa		%o2, [%i0 + 0x10] %asi
-	stxa		%o3, [%i0 + 0x18] %asi
-	ldda		[%i1 + 0x30] %asi, %i4	/* %i4/%i5 = src1 + 0x30 */
-	ldda		[%l7 + 0x30] %asi, %l0	/* %l0/%l1 = src2 + 0x30 */
-	ldda		[%i0 + 0x30] %asi, %o2	/* %o2/%o3 = dest + 0x30 */
-	prefetch	[%i1 + 0x40], #one_read
-	prefetch	[%l7 + 0x40], #one_read
-	prefetch	[%i0 + 0x40], #n_writes
-	xor		%g2, %i2, %g2
-	xor		%g3, %i3, %g3
-	xor		%o0, %g2, %o0
-	xor		%o1, %g3, %o1
-	stxa		%o0, [%i0 + 0x20] %asi
-	stxa		%o1, [%i0 + 0x28] %asi
-	xor		%l0, %i4, %l0
-	xor		%l1, %i5, %l1
-	xor		%o2, %l0, %o2
-	xor		%o3, %l1, %o3
-	stxa		%o2, [%i0 + 0x30] %asi
-	stxa		%o3, [%i0 + 0x38] %asi
-	add		%i0, 0x40, %i0
-	add		%i1, 0x40, %i1
-	subcc		%g1, 1, %g1
-	bne,pt		%xcc, 1b
-	 add		%l7, 0x40, %l7
-	membar		#Sync
-	wr		%g7, 0x0, %asi
-	ret
-	 restore
-ENDPROC(xor_niagara_3)
-EXPORT_SYMBOL(xor_niagara_3)
-
-ENTRY(xor_niagara_4) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */
-	save		%sp, -192, %sp
-	prefetch	[%i1], #n_writes
-	prefetch	[%i2], #one_read
-	prefetch	[%i3], #one_read
-	prefetch	[%i4], #one_read
-	rd		%asi, %g7
-	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
-	srlx		%i0, 6, %g1
-	mov		%i1, %i0
-	mov		%i2, %i1
-	mov		%i3, %l7
-	mov		%i4, %l6
-1:	ldda		[%i1 + 0x00] %asi, %i2	/* %i2/%i3 = src1 + 0x00 */
-	ldda		[%l7 + 0x00] %asi, %i4	/* %i4/%i5 = src2 + 0x00 */
-	ldda		[%l6 + 0x00] %asi, %g2	/* %g2/%g3 = src3 + 0x00 */
-	ldda		[%i0 + 0x00] %asi, %l0	/* %l0/%l1 = dest + 0x00 */
-	xor		%i4, %i2, %i4
-	xor		%i5, %i3, %i5
-	ldda		[%i1 + 0x10] %asi, %i2	/* %i2/%i3 = src1 + 0x10 */
-	xor		%g2, %i4, %g2
-	xor		%g3, %i5, %g3
-	ldda		[%l7 + 0x10] %asi, %i4	/* %i4/%i5 = src2 + 0x10 */
-	xor		%l0, %g2, %l0
-	xor		%l1, %g3, %l1
-	stxa		%l0, [%i0 + 0x00] %asi
-	stxa		%l1, [%i0 + 0x08] %asi
-	ldda		[%l6 + 0x10] %asi, %g2	/* %g2/%g3 = src3 + 0x10 */
-	ldda		[%i0 + 0x10] %asi, %l0	/* %l0/%l1 = dest + 0x10 */
-
-	xor		%i4, %i2, %i4
-	xor		%i5, %i3, %i5
-	ldda		[%i1 + 0x20] %asi, %i2	/* %i2/%i3 = src1 + 0x20 */
-	xor		%g2, %i4, %g2
-	xor		%g3, %i5, %g3
-	ldda		[%l7 + 0x20] %asi, %i4	/* %i4/%i5 = src2 + 0x20 */
-	xor		%l0, %g2, %l0
-	xor		%l1, %g3, %l1
-	stxa		%l0, [%i0 + 0x10] %asi
-	stxa		%l1, [%i0 + 0x18] %asi
-	ldda		[%l6 + 0x20] %asi, %g2	/* %g2/%g3 = src3 + 0x20 */
-	ldda		[%i0 + 0x20] %asi, %l0	/* %l0/%l1 = dest + 0x20 */
-
-	xor		%i4, %i2, %i4
-	xor		%i5, %i3, %i5
-	ldda		[%i1 + 0x30] %asi, %i2	/* %i2/%i3 = src1 + 0x30 */
-	xor		%g2, %i4, %g2
-	xor		%g3, %i5, %g3
-	ldda		[%l7 + 0x30] %asi, %i4	/* %i4/%i5 = src2 + 0x30 */
-	xor		%l0, %g2, %l0
-	xor		%l1, %g3, %l1
-	stxa		%l0, [%i0 + 0x20] %asi
-	stxa		%l1, [%i0 + 0x28] %asi
-	ldda		[%l6 + 0x30] %asi, %g2	/* %g2/%g3 = src3 + 0x30 */
-	ldda		[%i0 + 0x30] %asi, %l0	/* %l0/%l1 = dest + 0x30 */
-
-	prefetch	[%i1 + 0x40], #one_read
-	prefetch	[%l7 + 0x40], #one_read
-	prefetch	[%l6 + 0x40], #one_read
-	prefetch	[%i0 + 0x40], #n_writes
-
-	xor		%i4, %i2, %i4
-	xor		%i5, %i3, %i5
-	xor		%g2, %i4, %g2
-	xor		%g3, %i5, %g3
-	xor		%l0, %g2, %l0
-	xor		%l1, %g3, %l1
-	stxa		%l0, [%i0 + 0x30] %asi
-	stxa		%l1, [%i0 + 0x38] %asi
-
-	add		%i0, 0x40, %i0
-	add		%i1, 0x40, %i1
-	add		%l7, 0x40, %l7
-	subcc		%g1, 1, %g1
-	bne,pt		%xcc, 1b
-	 add		%l6, 0x40, %l6
-	membar		#Sync
-	wr		%g7, 0x0, %asi
-	ret
-	 restore
-ENDPROC(xor_niagara_4)
-EXPORT_SYMBOL(xor_niagara_4)
-
-ENTRY(xor_niagara_5) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 */
-	save		%sp, -192, %sp
-	prefetch	[%i1], #n_writes
-	prefetch	[%i2], #one_read
-	prefetch	[%i3], #one_read
-	prefetch	[%i4], #one_read
-	prefetch	[%i5], #one_read
-	rd		%asi, %g7
-	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
-	srlx		%i0, 6, %g1
-	mov		%i1, %i0
-	mov		%i2, %i1
-	mov		%i3, %l7
-	mov		%i4, %l6
-	mov		%i5, %l5
-1:	ldda		[%i1 + 0x00] %asi, %i2	/* %i2/%i3 = src1 + 0x00 */
-	ldda		[%l7 + 0x00] %asi, %i4	/* %i4/%i5 = src2 + 0x00 */
-	ldda		[%l6 + 0x00] %asi, %g2	/* %g2/%g3 = src3 + 0x00 */
-	ldda		[%l5 + 0x00] %asi, %l0	/* %l0/%l1 = src4 + 0x00 */
-	ldda		[%i0 + 0x00] %asi, %l2	/* %l2/%l3 = dest + 0x00 */
-	xor		%i4, %i2, %i4
-	xor		%i5, %i3, %i5
-	ldda		[%i1 + 0x10] %asi, %i2	/* %i2/%i3 = src1 + 0x10 */
-	xor		%g2, %i4, %g2
-	xor		%g3, %i5, %g3
-	ldda		[%l7 + 0x10] %asi, %i4	/* %i4/%i5 = src2 + 0x10 */
-	xor		%l0, %g2, %l0
-	xor		%l1, %g3, %l1
-	ldda		[%l6 + 0x10] %asi, %g2	/* %g2/%g3 = src3 + 0x10 */
-	xor		%l2, %l0, %l2
-	xor		%l3, %l1, %l3
-	stxa		%l2, [%i0 + 0x00] %asi
-	stxa		%l3, [%i0 + 0x08] %asi
-	ldda		[%l5 + 0x10] %asi, %l0	/* %l0/%l1 = src4 + 0x10 */
-	ldda		[%i0 + 0x10] %asi, %l2	/* %l2/%l3 = dest + 0x10 */
-
-	xor		%i4, %i2, %i4
-	xor		%i5, %i3, %i5
-	ldda		[%i1 + 0x20] %asi, %i2	/* %i2/%i3 = src1 + 0x20 */
-	xor		%g2, %i4, %g2
-	xor		%g3, %i5, %g3
-	ldda		[%l7 + 0x20] %asi, %i4	/* %i4/%i5 = src2 + 0x20 */
-	xor		%l0, %g2, %l0
-	xor		%l1, %g3, %l1
-	ldda		[%l6 + 0x20] %asi, %g2	/* %g2/%g3 = src3 + 0x20 */
-	xor		%l2, %l0, %l2
-	xor		%l3, %l1, %l3
-	stxa		%l2, [%i0 + 0x10] %asi
-	stxa		%l3, [%i0 + 0x18] %asi
-	ldda		[%l5 + 0x20] %asi, %l0	/* %l0/%l1 = src4 + 0x20 */
-	ldda		[%i0 + 0x20] %asi, %l2	/* %l2/%l3 = dest + 0x20 */
-
-	xor		%i4, %i2, %i4
-	xor		%i5, %i3, %i5
-	ldda		[%i1 + 0x30] %asi, %i2	/* %i2/%i3 = src1 + 0x30 */
-	xor		%g2, %i4, %g2
-	xor		%g3, %i5, %g3
-	ldda		[%l7 + 0x30] %asi, %i4	/* %i4/%i5 = src2 + 0x30 */
-	xor		%l0, %g2, %l0
-	xor		%l1, %g3, %l1
-	ldda		[%l6 + 0x30] %asi, %g2	/* %g2/%g3 = src3 + 0x30 */
-	xor		%l2, %l0, %l2
-	xor		%l3, %l1, %l3
-	stxa		%l2, [%i0 + 0x20] %asi
-	stxa		%l3, [%i0 + 0x28] %asi
-	ldda		[%l5 + 0x30] %asi, %l0	/* %l0/%l1 = src4 + 0x30 */
-	ldda		[%i0 + 0x30] %asi, %l2	/* %l2/%l3 = dest + 0x30 */
-
-	prefetch	[%i1 + 0x40], #one_read
-	prefetch	[%l7 + 0x40], #one_read
-	prefetch	[%l6 + 0x40], #one_read
-	prefetch	[%l5 + 0x40], #one_read
-	prefetch	[%i0 + 0x40], #n_writes
-
-	xor		%i4, %i2, %i4
-	xor		%i5, %i3, %i5
-	xor		%g2, %i4, %g2
-	xor		%g3, %i5, %g3
-	xor		%l0, %g2, %l0
-	xor		%l1, %g3, %l1
-	xor		%l2, %l0, %l2
-	xor		%l3, %l1, %l3
-	stxa		%l2, [%i0 + 0x30] %asi
-	stxa		%l3, [%i0 + 0x38] %asi
-
-	add		%i0, 0x40, %i0
-	add		%i1, 0x40, %i1
-	add		%l7, 0x40, %l7
-	add		%l6, 0x40, %l6
-	subcc		%g1, 1, %g1
-	bne,pt		%xcc, 1b
-	 add		%l5, 0x40, %l5
-	membar		#Sync
-	wr		%g7, 0x0, %asi
-	ret
-	 restore
-ENDPROC(xor_niagara_5)
-EXPORT_SYMBOL(xor_niagara_5)
diff --git a/arch/sparc/mm/hypersparc.S b/arch/sparc/mm/hypersparc.S
deleted file mode 100644
index 66885a8dc50afe8d5a4675011de49a0c109f287b..0000000000000000000000000000000000000000
--- a/arch/sparc/mm/hypersparc.S
+++ /dev/null
@@ -1,413 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * hypersparc.S: High speed Hypersparc mmu/cache operations.
- *
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
- */
-
-#include <asm/ptrace.h>
-#include <asm/psr.h>
-#include <asm/asm-offsets.h>
-#include <asm/asi.h>
-#include <asm/page.h>
-#include <asm/pgtsrmmu.h>
-#include <linux/init.h>
-
-	.text
-	.align	4
-
-	.globl	hypersparc_flush_cache_all, hypersparc_flush_cache_mm
-	.globl	hypersparc_flush_cache_range, hypersparc_flush_cache_page
-	.globl	hypersparc_flush_page_to_ram
-	.globl	hypersparc_flush_page_for_dma, hypersparc_flush_sig_insns
-	.globl	hypersparc_flush_tlb_all, hypersparc_flush_tlb_mm
-	.globl	hypersparc_flush_tlb_range, hypersparc_flush_tlb_page
-
-hypersparc_flush_cache_all:
-	WINDOW_FLUSH(%g4, %g5)
-	sethi	%hi(vac_cache_size), %g4
-	ld	[%g4 + %lo(vac_cache_size)], %g5
-	sethi	%hi(vac_line_size), %g1
-	ld	[%g1 + %lo(vac_line_size)], %g2
-1:	
-	subcc	%g5, %g2, %g5			! hyper_flush_unconditional_combined
-	bne	1b
-	 sta	%g0, [%g5] ASI_M_FLUSH_CTX
-	retl
-	 sta	%g0, [%g0] ASI_M_FLUSH_IWHOLE	! hyper_flush_whole_icache
-
-	/* We expand the window flush to get maximum performance. */
-hypersparc_flush_cache_mm:
-#ifndef CONFIG_SMP
-	ld	[%o0 + AOFF_mm_context], %g1
-	cmp	%g1, -1
-	be	hypersparc_flush_cache_mm_out
-#endif
-	WINDOW_FLUSH(%g4, %g5)
-
-	sethi	%hi(vac_line_size), %g1
-	ld	[%g1 + %lo(vac_line_size)], %o1
-	sethi	%hi(vac_cache_size), %g2
-	ld	[%g2 + %lo(vac_cache_size)], %o0
-	add	%o1, %o1, %g1
-	add	%o1, %g1, %g2
-	add	%o1, %g2, %g3
-	add	%o1, %g3, %g4
-	add	%o1, %g4, %g5
-	add	%o1, %g5, %o4
-	add	%o1, %o4, %o5
-
-	/* BLAMMO! */
-1:
-	subcc	%o0, %o5, %o0				! hyper_flush_cache_user
-	sta	%g0, [%o0 + %g0] ASI_M_FLUSH_USER
-	sta	%g0, [%o0 + %o1] ASI_M_FLUSH_USER
-	sta	%g0, [%o0 + %g1] ASI_M_FLUSH_USER
-	sta	%g0, [%o0 + %g2] ASI_M_FLUSH_USER
-	sta	%g0, [%o0 + %g3] ASI_M_FLUSH_USER
-	sta	%g0, [%o0 + %g4] ASI_M_FLUSH_USER
-	sta	%g0, [%o0 + %g5] ASI_M_FLUSH_USER
-	bne	1b
-	 sta	%g0, [%o0 + %o4] ASI_M_FLUSH_USER
-hypersparc_flush_cache_mm_out:
-	retl
-	 nop
-
-	/* The things we do for performance... */
-hypersparc_flush_cache_range:
-	ld	[%o0 + VMA_VM_MM], %o0
-#ifndef CONFIG_SMP
-	ld	[%o0 + AOFF_mm_context], %g1
-	cmp	%g1, -1
-	be	hypersparc_flush_cache_range_out
-#endif
-	WINDOW_FLUSH(%g4, %g5)
-
-	sethi	%hi(vac_line_size), %g1
-	ld	[%g1 + %lo(vac_line_size)], %o4
-	sethi	%hi(vac_cache_size), %g2
-	ld	[%g2 + %lo(vac_cache_size)], %o3
-
-	/* Here comes the fun part... */
-	add	%o2, (PAGE_SIZE - 1), %o2
-	andn	%o1, (PAGE_SIZE - 1), %o1
-	add	%o4, %o4, %o5
-	andn	%o2, (PAGE_SIZE - 1), %o2
-	add	%o4, %o5, %g1
-	sub	%o2, %o1, %g4
-	add	%o4, %g1, %g2
-	sll	%o3, 2, %g5
-	add	%o4, %g2, %g3
-	cmp	%g4, %g5
-	add	%o4, %g3, %g4
-	blu	0f
-	 add	%o4, %g4, %g5
-	add	%o4, %g5, %g7
-
-	/* Flush entire user space, believe it or not this is quicker
-	 * than page at a time flushings for range > (cache_size<<2).
-	 */
-1:
-	subcc	%o3, %g7, %o3
-	sta	%g0, [%o3 + %g0] ASI_M_FLUSH_USER
-	sta	%g0, [%o3 + %o4] ASI_M_FLUSH_USER
-	sta	%g0, [%o3 + %o5] ASI_M_FLUSH_USER
-	sta	%g0, [%o3 + %g1] ASI_M_FLUSH_USER
-	sta	%g0, [%o3 + %g2] ASI_M_FLUSH_USER
-	sta	%g0, [%o3 + %g3] ASI_M_FLUSH_USER
-	sta	%g0, [%o3 + %g4] ASI_M_FLUSH_USER
-	bne	1b
-	 sta	%g0, [%o3 + %g5] ASI_M_FLUSH_USER
-	retl
-	 nop
-
-	/* Below our threshold, flush one page at a time. */
-0:
-	ld	[%o0 + AOFF_mm_context], %o0
-	mov	SRMMU_CTX_REG, %g7
-	lda	[%g7] ASI_M_MMUREGS, %o3
-	sta	%o0, [%g7] ASI_M_MMUREGS
-	add	%o2, -PAGE_SIZE, %o0
-1:
-	or	%o0, 0x400, %g7
-	lda	[%g7] ASI_M_FLUSH_PROBE, %g7
-	orcc	%g7, 0, %g0
-	be,a	3f
-	 mov	%o0, %o2
-	add	%o4, %g5, %g7
-2:
-	sub	%o2, %g7, %o2
-	sta	%g0, [%o2 + %g0] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o2 + %o4] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o2 + %o5] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o2 + %g1] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o2 + %g2] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o2 + %g3] ASI_M_FLUSH_PAGE
-	andcc	%o2, 0xffc, %g0
-	sta	%g0, [%o2 + %g4] ASI_M_FLUSH_PAGE
-	bne	2b
-	 sta	%g0, [%o2 + %g5] ASI_M_FLUSH_PAGE
-3:
-	cmp	%o2, %o1
-	bne	1b
-	 add	%o2, -PAGE_SIZE, %o0
-	mov	SRMMU_FAULT_STATUS, %g5
-	lda	[%g5] ASI_M_MMUREGS, %g0
-	mov	SRMMU_CTX_REG, %g7
-	sta	%o3, [%g7] ASI_M_MMUREGS
-hypersparc_flush_cache_range_out:
-	retl
-	 nop
-
-	/* HyperSparc requires a valid mapping where we are about to flush
-	 * in order to check for a physical tag match during the flush.
-	 */
-	/* Verified, my ass... */
-hypersparc_flush_cache_page:
-	ld	[%o0 + VMA_VM_MM], %o0
-	ld	[%o0 + AOFF_mm_context], %g2
-#ifndef CONFIG_SMP
-	cmp	%g2, -1
-	be	hypersparc_flush_cache_page_out
-#endif
-	WINDOW_FLUSH(%g4, %g5)
-
-	sethi	%hi(vac_line_size), %g1
-	ld	[%g1 + %lo(vac_line_size)], %o4
-	mov	SRMMU_CTX_REG, %o3
-	andn	%o1, (PAGE_SIZE - 1), %o1
-	lda	[%o3] ASI_M_MMUREGS, %o2
-	sta	%g2, [%o3] ASI_M_MMUREGS
-	or	%o1, 0x400, %o5
-	lda	[%o5] ASI_M_FLUSH_PROBE, %g1
-	orcc	%g0, %g1, %g0
-	be	2f
-	 add	%o4, %o4, %o5
-	sub	%o1, -PAGE_SIZE, %o1
-	add	%o4, %o5, %g1
-	add	%o4, %g1, %g2
-	add	%o4, %g2, %g3
-	add	%o4, %g3, %g4
-	add	%o4, %g4, %g5
-	add	%o4, %g5, %g7
-
-	/* BLAMMO! */
-1:
-	sub	%o1, %g7, %o1
-	sta	%g0, [%o1 + %g0] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %o4] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %o5] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %g1] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %g2] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %g3] ASI_M_FLUSH_PAGE
-	andcc	%o1, 0xffc, %g0
-	sta	%g0, [%o1 + %g4] ASI_M_FLUSH_PAGE
-	bne	1b
-	 sta	%g0, [%o1 + %g5] ASI_M_FLUSH_PAGE
-2:
-	mov	SRMMU_FAULT_STATUS, %g7
-	mov	SRMMU_CTX_REG, %g4
-	lda	[%g7] ASI_M_MMUREGS, %g0
-	sta	%o2, [%g4] ASI_M_MMUREGS
-hypersparc_flush_cache_page_out:
-	retl
-	 nop
-
-hypersparc_flush_sig_insns:
-	flush	%o1
-	retl
-	 flush	%o1 + 4
-
-	/* HyperSparc is copy-back. */
-hypersparc_flush_page_to_ram:
-	sethi	%hi(vac_line_size), %g1
-	ld	[%g1 + %lo(vac_line_size)], %o4
-	andn	%o0, (PAGE_SIZE - 1), %o0
-	add	%o4, %o4, %o5
-	or	%o0, 0x400, %g7
-	lda	[%g7] ASI_M_FLUSH_PROBE, %g5
-	add	%o4, %o5, %g1
-	orcc	%g5, 0, %g0
-	be	2f
-	 add	%o4, %g1, %g2
-	add	%o4, %g2, %g3
-	sub	%o0, -PAGE_SIZE, %o0
-	add	%o4, %g3, %g4
-	add	%o4, %g4, %g5
-	add	%o4, %g5, %g7
-
-	/* BLAMMO! */
-1:
-	sub	%o0, %g7, %o0
-	sta	%g0, [%o0 + %g0] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o0 + %o4] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o0 + %o5] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o0 + %g1] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o0 + %g2] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o0 + %g3] ASI_M_FLUSH_PAGE
-	andcc	%o0, 0xffc, %g0
-	sta	%g0, [%o0 + %g4] ASI_M_FLUSH_PAGE
-	bne	1b
-	 sta	%g0, [%o0 + %g5] ASI_M_FLUSH_PAGE
-2:
-	mov	SRMMU_FAULT_STATUS, %g1
-	retl
-	 lda	[%g1] ASI_M_MMUREGS, %g0
-
-	/* HyperSparc is IO cache coherent. */
-hypersparc_flush_page_for_dma:
-	retl
-	 nop
-
-	/* It was noted that at boot time a TLB flush all in a delay slot
-	 * can deliver an illegal instruction to the processor if the timing
-	 * is just right...
-	 */
-hypersparc_flush_tlb_all:
-	mov	0x400, %g1
-	sta	%g0, [%g1] ASI_M_FLUSH_PROBE
-	retl
-	 nop
-
-hypersparc_flush_tlb_mm:
-	mov	SRMMU_CTX_REG, %g1
-	ld	[%o0 + AOFF_mm_context], %o1
-	lda	[%g1] ASI_M_MMUREGS, %g5
-#ifndef CONFIG_SMP
-	cmp	%o1, -1
-	be	hypersparc_flush_tlb_mm_out
-#endif
-	 mov	0x300, %g2
-	sta	%o1, [%g1] ASI_M_MMUREGS
-	sta	%g0, [%g2] ASI_M_FLUSH_PROBE
-hypersparc_flush_tlb_mm_out:
-	retl
-	 sta	%g5, [%g1] ASI_M_MMUREGS
-
-hypersparc_flush_tlb_range:
-	ld	[%o0 + VMA_VM_MM], %o0
-	mov	SRMMU_CTX_REG, %g1
-	ld	[%o0 + AOFF_mm_context], %o3
-	lda	[%g1] ASI_M_MMUREGS, %g5
-#ifndef CONFIG_SMP
-	cmp	%o3, -1
-	be	hypersparc_flush_tlb_range_out
-#endif
-	 sethi	%hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4
-	sta	%o3, [%g1] ASI_M_MMUREGS
-	and	%o1, %o4, %o1
-	add	%o1, 0x200, %o1
-	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
-1:
-	sub	%o1, %o4, %o1
-	cmp	%o1, %o2
-	blu,a	1b
-	 sta	%g0, [%o1] ASI_M_FLUSH_PROBE
-hypersparc_flush_tlb_range_out:
-	retl
-	 sta	%g5, [%g1] ASI_M_MMUREGS
-
-hypersparc_flush_tlb_page:
-	ld	[%o0 + VMA_VM_MM], %o0
-	mov	SRMMU_CTX_REG, %g1
-	ld	[%o0 + AOFF_mm_context], %o3
-	andn	%o1, (PAGE_SIZE - 1), %o1
-#ifndef CONFIG_SMP
-	cmp	%o3, -1
-	be	hypersparc_flush_tlb_page_out
-#endif
-	 lda	[%g1] ASI_M_MMUREGS, %g5
-	sta	%o3, [%g1] ASI_M_MMUREGS
-	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
-hypersparc_flush_tlb_page_out:
-	retl
-	 sta	%g5, [%g1] ASI_M_MMUREGS
-
-	__INIT
-	
-	/* High speed page clear/copy. */
-hypersparc_bzero_1page:
-/* NOTE: This routine has to be shorter than 40insns --jj */
-	clr	%g1
-	mov	32, %g2
-	mov	64, %g3
-	mov	96, %g4
-	mov	128, %g5
-	mov	160, %g7
-	mov	192, %o2
-	mov	224, %o3
-	mov	16, %o1
-1:
-	stda	%g0, [%o0 + %g0] ASI_M_BFILL
-	stda	%g0, [%o0 + %g2] ASI_M_BFILL
-	stda	%g0, [%o0 + %g3] ASI_M_BFILL
-	stda	%g0, [%o0 + %g4] ASI_M_BFILL
-	stda	%g0, [%o0 + %g5] ASI_M_BFILL
-	stda	%g0, [%o0 + %g7] ASI_M_BFILL
-	stda	%g0, [%o0 + %o2] ASI_M_BFILL
-	stda	%g0, [%o0 + %o3] ASI_M_BFILL
-	subcc	%o1, 1, %o1
-	bne	1b
-	 add	%o0, 256, %o0
-
-	retl
-	 nop
-
-hypersparc_copy_1page:
-/* NOTE: This routine has to be shorter than 70insns --jj */
-	sub	%o1, %o0, %o2		! difference
-	mov	16, %g1
-1:
-	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
-	add	%o0, 32, %o0
-	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
-	add	%o0, 32, %o0
-	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
-	add	%o0, 32, %o0
-	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
-	add	%o0, 32, %o0
-	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
-	add	%o0, 32, %o0
-	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
-	add	%o0, 32, %o0
-	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
-	add	%o0, 32, %o0
-	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
-	subcc	%g1, 1, %g1
-	bne	1b
-	 add	%o0, 32, %o0
-
-	retl
-	 nop
-
-	.globl	hypersparc_setup_blockops
-hypersparc_setup_blockops:
-	sethi	%hi(bzero_1page), %o0
-	or	%o0, %lo(bzero_1page), %o0
-	sethi	%hi(hypersparc_bzero_1page), %o1
-	or	%o1, %lo(hypersparc_bzero_1page), %o1
-	sethi	%hi(hypersparc_copy_1page), %o2
-	or	%o2, %lo(hypersparc_copy_1page), %o2
-	ld	[%o1], %o4
-1:
-	add	%o1, 4, %o1
-	st	%o4, [%o0]
-	add	%o0, 4, %o0
-	cmp	%o1, %o2
-	bne	1b
-	 ld	[%o1], %o4
-	sethi	%hi(__copy_1page), %o0
-	or	%o0, %lo(__copy_1page), %o0
-	sethi	%hi(hypersparc_setup_blockops), %o2
-	or	%o2, %lo(hypersparc_setup_blockops), %o2
-	ld	[%o1], %o4
-1:
-	add	%o1, 4, %o1
-	st	%o4, [%o0]
-	add	%o0, 4, %o0
-	cmp	%o1, %o2
-	bne	1b
-	 ld	[%o1], %o4
-	sta	%g0, [%g0] ASI_M_FLUSH_IWHOLE
-	retl
-	 nop
diff --git a/arch/sparc/mm/srmmu_access.S b/arch/sparc/mm/srmmu_access.S
deleted file mode 100644
index d8d2e644a5ca349908015314753f6c4d3bcdac1f..0000000000000000000000000000000000000000
--- a/arch/sparc/mm/srmmu_access.S
+++ /dev/null
@@ -1,83 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Assembler variants of srmmu access functions.
- * Implemented in assembler to allow run-time patching.
- * LEON uses a different ASI for MMUREGS than SUN.
- *
- * The leon_1insn_patch infrastructure is used
- * for the run-time patching.
- */
-
-#include <linux/linkage.h>
-
-#include <asm/asmmacro.h>
-#include <asm/pgtsrmmu.h>
-#include <asm/asi.h>
-
-/* unsigned int srmmu_get_mmureg(void) */
-ENTRY(srmmu_get_mmureg)
-LEON_PI(lda	[%g0] ASI_LEON_MMUREGS, %o0)
-SUN_PI_(lda	[%g0] ASI_M_MMUREGS, %o0)
-	retl
-	 nop
-ENDPROC(srmmu_get_mmureg)
-
-/* void srmmu_set_mmureg(unsigned long regval) */
-ENTRY(srmmu_set_mmureg)
-LEON_PI(sta	%o0, [%g0] ASI_LEON_MMUREGS)
-SUN_PI_(sta	%o0, [%g0] ASI_M_MMUREGS)
-	retl
-	 nop
-ENDPROC(srmmu_set_mmureg)
-
-/* void srmmu_set_ctable_ptr(unsigned long paddr) */
-ENTRY(srmmu_set_ctable_ptr)
-	/* paddr = ((paddr >> 4) & SRMMU_CTX_PMASK); */
-	srl	%o0, 4, %g1
-	and	%g1, SRMMU_CTX_PMASK, %g1
-
-	mov	SRMMU_CTXTBL_PTR, %g2
-LEON_PI(sta	%g1, [%g2] ASI_LEON_MMUREGS)
-SUN_PI_(sta	%g1, [%g2] ASI_M_MMUREGS)
-	retl
-	 nop
-ENDPROC(srmmu_set_ctable_ptr)
-
-
-/* void srmmu_set_context(int context) */
-ENTRY(srmmu_set_context)
-	mov	SRMMU_CTX_REG, %g1
-LEON_PI(sta	%o0, [%g1] ASI_LEON_MMUREGS)
-SUN_PI_(sta	%o0, [%g1] ASI_M_MMUREGS)
-	retl
-	 nop
-ENDPROC(srmmu_set_context)
-
-
-/* int srmmu_get_context(void) */
-ENTRY(srmmu_get_context)
-	mov	SRMMU_CTX_REG, %o0
-LEON_PI(lda     [%o0] ASI_LEON_MMUREGS, %o0)
-SUN_PI_(lda	[%o0] ASI_M_MMUREGS, %o0)
-	retl
-	 nop
-ENDPROC(srmmu_get_context)
-
-
-/* unsigned int srmmu_get_fstatus(void) */
-ENTRY(srmmu_get_fstatus)
-	mov	SRMMU_FAULT_STATUS, %o0
-LEON_PI(lda     [%o0] ASI_LEON_MMUREGS, %o0)
-SUN_PI_(lda	[%o0] ASI_M_MMUREGS, %o0)
-	retl
-	 nop
-ENDPROC(srmmu_get_fstatus)
-
-
-/* unsigned int srmmu_get_faddr(void) */
-ENTRY(srmmu_get_faddr)
-	mov	SRMMU_FAULT_ADDR, %o0
-LEON_PI(lda     [%o0] ASI_LEON_MMUREGS, %o0)
-SUN_PI_(lda	[%o0] ASI_M_MMUREGS, %o0)
-	retl
-	 nop
-ENDPROC(srmmu_get_faddr)
diff --git a/arch/sparc/mm/swift.S b/arch/sparc/mm/swift.S
deleted file mode 100644
index f414bfd8d899603e5fb0554d09422b291eb14402..0000000000000000000000000000000000000000
--- a/arch/sparc/mm/swift.S
+++ /dev/null
@@ -1,256 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * swift.S: MicroSparc-II mmu/cache operations.
- *
- * Copyright (C) 1999 David S. Miller (davem@redhat.com)
- */
-
-#include <asm/psr.h>
-#include <asm/asi.h>
-#include <asm/page.h>
-#include <asm/pgtsrmmu.h>
-#include <asm/asm-offsets.h>
-
-	.text
-	.align	4
-
-#if 1	/* XXX screw this, I can't get the VAC flushes working
-	 * XXX reliably... -DaveM
-	 */
-	.globl	swift_flush_cache_all, swift_flush_cache_mm
-	.globl	swift_flush_cache_range, swift_flush_cache_page
-	.globl	swift_flush_page_for_dma
-	.globl	swift_flush_page_to_ram
-
-swift_flush_cache_all:
-swift_flush_cache_mm:
-swift_flush_cache_range:
-swift_flush_cache_page:
-swift_flush_page_for_dma:
-swift_flush_page_to_ram:
-	sethi	%hi(0x2000), %o0
-1:	subcc	%o0, 0x10, %o0
-	add	%o0, %o0, %o1
-	sta	%g0, [%o0] ASI_M_DATAC_TAG
-	bne	1b
-	 sta	%g0, [%o1] ASI_M_TXTC_TAG
-	retl
-	 nop
-#else
-
-	.globl	swift_flush_cache_all
-swift_flush_cache_all:
-	WINDOW_FLUSH(%g4, %g5)
-
-	/* Just clear out all the tags. */
-	sethi	%hi(16 * 1024), %o0
-1:	subcc	%o0, 16, %o0
-	sta	%g0, [%o0] ASI_M_TXTC_TAG
-	bne	1b
-	 sta	%g0, [%o0] ASI_M_DATAC_TAG
-	retl
-	 nop
-
-	.globl	swift_flush_cache_mm
-swift_flush_cache_mm:
-	ld	[%o0 + AOFF_mm_context], %g2
-	cmp	%g2, -1
-	be	swift_flush_cache_mm_out
-	WINDOW_FLUSH(%g4, %g5)
-	rd	%psr, %g1
-	andn	%g1, PSR_ET, %g3
-	wr	%g3, 0x0, %psr
-	nop
-	nop
-	mov	SRMMU_CTX_REG, %g7
-	lda	[%g7] ASI_M_MMUREGS, %g5
-	sta	%g2, [%g7] ASI_M_MMUREGS
-
-#if 1
-	sethi	%hi(0x2000), %o0
-1:	subcc	%o0, 0x10, %o0
-	sta	%g0, [%o0] ASI_M_FLUSH_CTX
-	bne	1b
-	 nop
-#else
-	clr	%o0
-	or	%g0, 2048, %g7
-	or	%g0, 2048, %o1
-	add	%o1, 2048, %o2
-	add	%o2, 2048, %o3
-	mov	16, %o4
-	add	%o4, 2048, %o5
-	add	%o5, 2048, %g2
-	add	%g2, 2048, %g3
-1:	sta	%g0, [%o0      ] ASI_M_FLUSH_CTX
-	sta	%g0, [%o0 + %o1] ASI_M_FLUSH_CTX
-	sta	%g0, [%o0 + %o2] ASI_M_FLUSH_CTX
-	sta	%g0, [%o0 + %o3] ASI_M_FLUSH_CTX
-	sta	%g0, [%o0 + %o4] ASI_M_FLUSH_CTX
-	sta	%g0, [%o0 + %o5] ASI_M_FLUSH_CTX
-	sta	%g0, [%o0 + %g2] ASI_M_FLUSH_CTX
-	sta	%g0, [%o0 + %g3] ASI_M_FLUSH_CTX
-	subcc	%g7, 32, %g7
-	bne	1b
-	 add	%o0, 32, %o0
-#endif
-
-	mov	SRMMU_CTX_REG, %g7
-	sta	%g5, [%g7] ASI_M_MMUREGS
-	wr	%g1, 0x0, %psr
-	nop
-	nop
-swift_flush_cache_mm_out:
-	retl
-	 nop
-
-	.globl	swift_flush_cache_range
-swift_flush_cache_range:
-	ld	[%o0 + VMA_VM_MM], %o0
-	sub	%o2, %o1, %o2
-	sethi	%hi(4096), %o3
-	cmp	%o2, %o3
-	bgu	swift_flush_cache_mm
-	 nop
-	b	70f
-	 nop
-
-	.globl	swift_flush_cache_page
-swift_flush_cache_page:
-	ld	[%o0 + VMA_VM_MM], %o0
-70:
-	ld	[%o0 + AOFF_mm_context], %g2
-	cmp	%g2, -1
-	be	swift_flush_cache_page_out
-	WINDOW_FLUSH(%g4, %g5)
-	rd	%psr, %g1
-	andn	%g1, PSR_ET, %g3
-	wr	%g3, 0x0, %psr
-	nop
-	nop
-	mov	SRMMU_CTX_REG, %g7
-	lda	[%g7] ASI_M_MMUREGS, %g5
-	sta	%g2, [%g7] ASI_M_MMUREGS
-
-	andn	%o1, (PAGE_SIZE - 1), %o1
-#if 1
-	sethi	%hi(0x1000), %o0
-1:	subcc	%o0, 0x10, %o0
-	sta	%g0, [%o1 + %o0] ASI_M_FLUSH_PAGE
-	bne	1b
-	 nop
-#else
-	or	%g0, 512, %g7
-	or	%g0, 512, %o0
-	add	%o0, 512, %o2
-	add	%o2, 512, %o3
-	add	%o3, 512, %o4
-	add	%o4, 512, %o5
-	add	%o5, 512, %g3
-	add	%g3, 512, %g4
-1:	sta	%g0, [%o1      ] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %o0] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %o2] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %o3] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %o4] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %o5] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %g3] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %g4] ASI_M_FLUSH_PAGE
-	subcc	%g7, 16, %g7
-	bne	1b
-	 add	%o1, 16, %o1
-#endif
-
-	mov	SRMMU_CTX_REG, %g7
-	sta	%g5, [%g7] ASI_M_MMUREGS
-	wr	%g1, 0x0, %psr
-	nop
-	nop
-swift_flush_cache_page_out:
-	retl
-	 nop
-
-	/* Swift is write-thru, however it is not
-	 * I/O nor TLB-walk coherent.  Also it has
-	 * caches which are virtually indexed and tagged.
-	 */
-	.globl	swift_flush_page_for_dma
-	.globl	swift_flush_page_to_ram
-swift_flush_page_for_dma:
-swift_flush_page_to_ram:
-	andn	%o0, (PAGE_SIZE - 1), %o1
-#if 1
-	sethi	%hi(0x1000), %o0
-1:	subcc	%o0, 0x10, %o0
-	sta	%g0, [%o1 + %o0] ASI_M_FLUSH_PAGE
-	bne	1b
-	 nop
-#else
-	or	%g0, 512, %g7
-	or	%g0, 512, %o0
-	add	%o0, 512, %o2
-	add	%o2, 512, %o3
-	add	%o3, 512, %o4
-	add	%o4, 512, %o5
-	add	%o5, 512, %g3
-	add	%g3, 512, %g4
-1:	sta	%g0, [%o1      ] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %o0] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %o2] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %o3] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %o4] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %o5] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %g3] ASI_M_FLUSH_PAGE
-	sta	%g0, [%o1 + %g4] ASI_M_FLUSH_PAGE
-	subcc	%g7, 16, %g7
-	bne	1b
-	 add	%o1, 16, %o1
-#endif
-	retl
-	 nop
-#endif
-
-	.globl	swift_flush_sig_insns
-swift_flush_sig_insns:
-	flush	%o1
-	retl
-	 flush	%o1 + 4
-
-	.globl	swift_flush_tlb_mm
-	.globl	swift_flush_tlb_range
-	.globl	swift_flush_tlb_all
-swift_flush_tlb_range:
-	ld	[%o0 + VMA_VM_MM], %o0
-swift_flush_tlb_mm:
-	ld	[%o0 + AOFF_mm_context], %g2
-	cmp	%g2, -1
-	be	swift_flush_tlb_all_out
-swift_flush_tlb_all:
-	mov	0x400, %o1
-	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
-swift_flush_tlb_all_out:
-	retl
-	 nop
-
-	.globl	swift_flush_tlb_page
-swift_flush_tlb_page:
-	ld	[%o0 + VMA_VM_MM], %o0
-	mov	SRMMU_CTX_REG, %g1
-	ld	[%o0 + AOFF_mm_context], %o3
-	andn	%o1, (PAGE_SIZE - 1), %o1
-	cmp	%o3, -1
-	be	swift_flush_tlb_page_out
-	 nop
-#if 1
-	mov	0x400, %o1
-	sta	%g0, [%o1] ASI_M_FLUSH_PROBE	
-#else
-	lda	[%g1] ASI_M_MMUREGS, %g5
-	sta	%o3, [%g1] ASI_M_MMUREGS
-	sta	%g0, [%o1] ASI_M_FLUSH_PAGE	/* rem. virt. cache. prot. */
-	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
-	sta	%g5, [%g1] ASI_M_MMUREGS
-#endif
-swift_flush_tlb_page_out:
-	retl
-	 nop
diff --git a/arch/sparc/mm/tsunami.S b/arch/sparc/mm/tsunami.S
deleted file mode 100644
index 62b742df65dce7eeffcee0956b2f88b1e4f4460f..0000000000000000000000000000000000000000
--- a/arch/sparc/mm/tsunami.S
+++ /dev/null
@@ -1,132 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * tsunami.S: High speed MicroSparc-I mmu/cache operations.
- *
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
- */
-
-#include <asm/ptrace.h>
-#include <asm/asm-offsets.h>
-#include <asm/psr.h>
-#include <asm/asi.h>
-#include <asm/page.h>
-#include <asm/pgtsrmmu.h>
-
-	.text
-	.align	4
-
-	.globl	tsunami_flush_cache_all, tsunami_flush_cache_mm
-	.globl	tsunami_flush_cache_range, tsunami_flush_cache_page
-	.globl	tsunami_flush_page_to_ram, tsunami_flush_page_for_dma
-	.globl	tsunami_flush_sig_insns
-	.globl	tsunami_flush_tlb_all, tsunami_flush_tlb_mm
-	.globl	tsunami_flush_tlb_range, tsunami_flush_tlb_page
-
-	/* Sliiick... */
-tsunami_flush_cache_page:
-tsunami_flush_cache_range:
-	ld	[%o0 + VMA_VM_MM], %o0
-tsunami_flush_cache_mm:
-	ld	[%o0 + AOFF_mm_context], %g2
-	cmp	%g2, -1
-	be	tsunami_flush_cache_out
-tsunami_flush_cache_all:
-	WINDOW_FLUSH(%g4, %g5)
-tsunami_flush_page_for_dma:
-	sta	%g0, [%g0] ASI_M_IC_FLCLEAR
-	sta	%g0, [%g0] ASI_M_DC_FLCLEAR
-tsunami_flush_cache_out:
-tsunami_flush_page_to_ram:
-	retl
-	 nop
-
-tsunami_flush_sig_insns:
-	flush	%o1
-	retl
-	 flush	%o1 + 4
-
-	/* More slick stuff... */
-tsunami_flush_tlb_range:
-	ld	[%o0 + VMA_VM_MM], %o0
-tsunami_flush_tlb_mm:
-	ld	[%o0 + AOFF_mm_context], %g2
-	cmp	%g2, -1
-	be	tsunami_flush_tlb_out
-tsunami_flush_tlb_all:
-	 mov	0x400, %o1
-	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
-	nop
-	nop
-	nop
-	nop
-	nop
-tsunami_flush_tlb_out:
-	retl
-	 nop
-
-	/* This one can be done in a fine grained manner... */
-tsunami_flush_tlb_page:
-	ld	[%o0 + VMA_VM_MM], %o0
-	mov	SRMMU_CTX_REG, %g1
-	ld	[%o0 + AOFF_mm_context], %o3
-	andn	%o1, (PAGE_SIZE - 1), %o1
-	cmp	%o3, -1
-	be	tsunami_flush_tlb_page_out
-	 lda	[%g1] ASI_M_MMUREGS, %g5
-	sta	%o3, [%g1] ASI_M_MMUREGS
-	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
-	nop
-	nop
-	nop
-	nop
-	nop
-tsunami_flush_tlb_page_out:
-	retl
-	 sta	%g5, [%g1] ASI_M_MMUREGS
-
-#define MIRROR_BLOCK(dst, src, offset, t0, t1, t2, t3) \
-	ldd	[src + offset + 0x18], t0; \
-	std	t0, [dst + offset + 0x18]; \
-	ldd	[src + offset + 0x10], t2; \
-	std	t2, [dst + offset + 0x10]; \
-	ldd	[src + offset + 0x08], t0; \
-	std	t0, [dst + offset + 0x08]; \
-	ldd	[src + offset + 0x00], t2; \
-	std	t2, [dst + offset + 0x00];
-
-tsunami_copy_1page:
-/* NOTE: This routine has to be shorter than 70insns --jj */
-	or	%g0, (PAGE_SIZE >> 8), %g1
-1:
-	MIRROR_BLOCK(%o0, %o1, 0x00, %o2, %o3, %o4, %o5)
-	MIRROR_BLOCK(%o0, %o1, 0x20, %o2, %o3, %o4, %o5)
-	MIRROR_BLOCK(%o0, %o1, 0x40, %o2, %o3, %o4, %o5)
-	MIRROR_BLOCK(%o0, %o1, 0x60, %o2, %o3, %o4, %o5)
-	MIRROR_BLOCK(%o0, %o1, 0x80, %o2, %o3, %o4, %o5)
-	MIRROR_BLOCK(%o0, %o1, 0xa0, %o2, %o3, %o4, %o5)
-	MIRROR_BLOCK(%o0, %o1, 0xc0, %o2, %o3, %o4, %o5)
-	MIRROR_BLOCK(%o0, %o1, 0xe0, %o2, %o3, %o4, %o5)
-	subcc	%g1, 1, %g1
-	add	%o0, 0x100, %o0
-	bne	1b
-	 add	%o1, 0x100, %o1
-
-	.globl	tsunami_setup_blockops
-tsunami_setup_blockops:
-	sethi	%hi(__copy_1page), %o0
-	or	%o0, %lo(__copy_1page), %o0
-	sethi	%hi(tsunami_copy_1page), %o1
-	or	%o1, %lo(tsunami_copy_1page), %o1
-	sethi	%hi(tsunami_setup_blockops), %o2
-	or	%o2, %lo(tsunami_setup_blockops), %o2
-	ld	[%o1], %o4
-1:	add	%o1, 4, %o1
-	st	%o4, [%o0]
-	add	%o0, 4, %o0
-	cmp	%o1, %o2
-	bne	1b
-	ld	[%o1], %o4
-	sta	%g0, [%g0] ASI_M_IC_FLCLEAR
-	sta	%g0, [%g0] ASI_M_DC_FLCLEAR
-	retl
-	 nop
diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S
deleted file mode 100644
index d220b6848746c5ce46095fd2fb4aae4669ccb9ea..0000000000000000000000000000000000000000
--- a/arch/sparc/mm/ultra.S
+++ /dev/null
@@ -1,1102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ultra.S: Don't expand these all over the place...
- *
- * Copyright (C) 1997, 2000, 2008 David S. Miller (davem@davemloft.net)
- */
-
-#include <asm/asi.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/spitfire.h>
-#include <asm/mmu_context.h>
-#include <asm/mmu.h>
-#include <asm/pil.h>
-#include <asm/head.h>
-#include <asm/thread_info.h>
-#include <asm/cacheflush.h>
-#include <asm/hypervisor.h>
-#include <asm/cpudata.h>
-
-	/* Basically, most of the Spitfire vs. Cheetah madness
-	 * has to do with the fact that Cheetah does not support
-	 * IMMU flushes out of the secondary context.  Someone needs
-	 * to throw a south lake birthday party for the folks
-	 * in Microelectronics who refused to fix this shit.
-	 */
-
-	/* This file is meant to be read efficiently by the CPU, not humans.
-	 * Staraj sie tego nikomu nie pierdolnac...
-	 */
-	.text
-	.align		32
-	.globl		__flush_tlb_mm
-__flush_tlb_mm:		/* 19 insns */
-	/* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
-	ldxa		[%o1] ASI_DMMU, %g2
-	cmp		%g2, %o0
-	bne,pn		%icc, __spitfire_flush_tlb_mm_slow
-	 mov		0x50, %g3
-	stxa		%g0, [%g3] ASI_DMMU_DEMAP
-	stxa		%g0, [%g3] ASI_IMMU_DEMAP
-	sethi		%hi(KERNBASE), %g3
-	flush		%g3
-	retl
-	 nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	.align		32
-	.globl		__flush_tlb_page
-__flush_tlb_page:	/* 22 insns */
-	/* %o0 = context, %o1 = vaddr */
-	rdpr		%pstate, %g7
-	andn		%g7, PSTATE_IE, %g2
-	wrpr		%g2, %pstate
-	mov		SECONDARY_CONTEXT, %o4
-	ldxa		[%o4] ASI_DMMU, %g2
-	stxa		%o0, [%o4] ASI_DMMU
-	andcc		%o1, 1, %g0
-	andn		%o1, 1, %o3
-	be,pn		%icc, 1f
-	 or		%o3, 0x10, %o3
-	stxa		%g0, [%o3] ASI_IMMU_DEMAP
-1:	stxa		%g0, [%o3] ASI_DMMU_DEMAP
-	membar		#Sync
-	stxa		%g2, [%o4] ASI_DMMU
-	sethi		%hi(KERNBASE), %o4
-	flush		%o4
-	retl
-	 wrpr		%g7, 0x0, %pstate
-	nop
-	nop
-	nop
-	nop
-
-	.align		32
-	.globl		__flush_tlb_pending
-__flush_tlb_pending:	/* 27 insns */
-	/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
-	rdpr		%pstate, %g7
-	sllx		%o1, 3, %o1
-	andn		%g7, PSTATE_IE, %g2
-	wrpr		%g2, %pstate
-	mov		SECONDARY_CONTEXT, %o4
-	ldxa		[%o4] ASI_DMMU, %g2
-	stxa		%o0, [%o4] ASI_DMMU
-1:	sub		%o1, (1 << 3), %o1
-	ldx		[%o2 + %o1], %o3
-	andcc		%o3, 1, %g0
-	andn		%o3, 1, %o3
-	be,pn		%icc, 2f
-	 or		%o3, 0x10, %o3
-	stxa		%g0, [%o3] ASI_IMMU_DEMAP
-2:	stxa		%g0, [%o3] ASI_DMMU_DEMAP
-	membar		#Sync
-	brnz,pt		%o1, 1b
-	 nop
-	stxa		%g2, [%o4] ASI_DMMU
-	sethi		%hi(KERNBASE), %o4
-	flush		%o4
-	retl
-	 wrpr		%g7, 0x0, %pstate
-	nop
-	nop
-	nop
-	nop
-
-	.align		32
-	.globl		__flush_tlb_kernel_range
-__flush_tlb_kernel_range:	/* 31 insns */
-	/* %o0=start, %o1=end */
-	cmp		%o0, %o1
-	be,pn		%xcc, 2f
-	 sub		%o1, %o0, %o3
-	srlx		%o3, 18, %o4
-	brnz,pn		%o4, __spitfire_flush_tlb_kernel_range_slow
-	 sethi		%hi(PAGE_SIZE), %o4
-	sub		%o3, %o4, %o3
-	or		%o0, 0x20, %o0		! Nucleus
-1:	stxa		%g0, [%o0 + %o3] ASI_DMMU_DEMAP
-	stxa		%g0, [%o0 + %o3] ASI_IMMU_DEMAP
-	membar		#Sync
-	brnz,pt		%o3, 1b
-	 sub		%o3, %o4, %o3
-2:	sethi		%hi(KERNBASE), %o3
-	flush		%o3
-	retl
-	 nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-__spitfire_flush_tlb_kernel_range_slow:
-	mov		63 * 8, %o4
-1:	ldxa		[%o4] ASI_ITLB_DATA_ACCESS, %o3
-	andcc		%o3, 0x40, %g0			/* _PAGE_L_4U */
-	bne,pn		%xcc, 2f
-	 mov		TLB_TAG_ACCESS, %o3
-	stxa		%g0, [%o3] ASI_IMMU
-	stxa		%g0, [%o4] ASI_ITLB_DATA_ACCESS
-	membar		#Sync
-2:	ldxa		[%o4] ASI_DTLB_DATA_ACCESS, %o3
-	andcc		%o3, 0x40, %g0
-	bne,pn		%xcc, 2f
-	 mov		TLB_TAG_ACCESS, %o3
-	stxa		%g0, [%o3] ASI_DMMU
-	stxa		%g0, [%o4] ASI_DTLB_DATA_ACCESS
-	membar		#Sync
-2:	sub		%o4, 8, %o4
-	brgez,pt	%o4, 1b
-	 nop
-	retl
-	 nop
-
-__spitfire_flush_tlb_mm_slow:
-	rdpr		%pstate, %g1
-	wrpr		%g1, PSTATE_IE, %pstate
-	stxa		%o0, [%o1] ASI_DMMU
-	stxa		%g0, [%g3] ASI_DMMU_DEMAP
-	stxa		%g0, [%g3] ASI_IMMU_DEMAP
-	flush		%g6
-	stxa		%g2, [%o1] ASI_DMMU
-	sethi		%hi(KERNBASE), %o1
-	flush		%o1
-	retl
-	 wrpr		%g1, 0, %pstate
-
-/*
- * The following code flushes one page_size worth.
- */
-	.section .kprobes.text, "ax"
-	.align		32
-	.globl		__flush_icache_page
-__flush_icache_page:	/* %o0 = phys_page */
-	srlx		%o0, PAGE_SHIFT, %o0
-	sethi		%hi(PAGE_OFFSET), %g1
-	sllx		%o0, PAGE_SHIFT, %o0
-	sethi		%hi(PAGE_SIZE), %g2
-	ldx		[%g1 + %lo(PAGE_OFFSET)], %g1
-	add		%o0, %g1, %o0
-1:	subcc		%g2, 32, %g2
-	bne,pt		%icc, 1b
-	 flush		%o0 + %g2
-	retl
-	 nop
-
-#ifdef DCACHE_ALIASING_POSSIBLE
-
-#if (PAGE_SHIFT != 13)
-#error only page shift of 13 is supported by dcache flush
-#endif
-
-#define DTAG_MASK 0x3
-
-	/* This routine is Spitfire specific so the hardcoded
-	 * D-cache size and line-size are OK.
-	 */
-	.align		64
-	.globl		__flush_dcache_page
-__flush_dcache_page:	/* %o0=kaddr, %o1=flush_icache */
-	sethi		%hi(PAGE_OFFSET), %g1
-	ldx		[%g1 + %lo(PAGE_OFFSET)], %g1
-	sub		%o0, %g1, %o0			! physical address
-	srlx		%o0, 11, %o0			! make D-cache TAG
-	sethi		%hi(1 << 14), %o2		! D-cache size
-	sub		%o2, (1 << 5), %o2		! D-cache line size
-1:	ldxa		[%o2] ASI_DCACHE_TAG, %o3	! load D-cache TAG
-	andcc		%o3, DTAG_MASK, %g0		! Valid?
-	be,pn		%xcc, 2f			! Nope, branch
-	 andn		%o3, DTAG_MASK, %o3		! Clear valid bits
-	cmp		%o3, %o0			! TAG match?
-	bne,pt		%xcc, 2f			! Nope, branch
-	 nop
-	stxa		%g0, [%o2] ASI_DCACHE_TAG	! Invalidate TAG
-	membar		#Sync
-2:	brnz,pt		%o2, 1b
-	 sub		%o2, (1 << 5), %o2		! D-cache line size
-
-	/* The I-cache does not snoop local stores so we
-	 * better flush that too when necessary.
-	 */
-	brnz,pt		%o1, __flush_icache_page
-	 sllx		%o0, 11, %o0
-	retl
-	 nop
-
-#endif /* DCACHE_ALIASING_POSSIBLE */
-
-	.previous
-
-	/* Cheetah specific versions, patched at boot time. */
-__cheetah_flush_tlb_mm: /* 19 insns */
-	rdpr		%pstate, %g7
-	andn		%g7, PSTATE_IE, %g2
-	wrpr		%g2, 0x0, %pstate
-	wrpr		%g0, 1, %tl
-	mov		PRIMARY_CONTEXT, %o2
-	mov		0x40, %g3
-	ldxa		[%o2] ASI_DMMU, %g2
-	srlx		%g2, CTX_PGSZ1_NUC_SHIFT, %o1
-	sllx		%o1, CTX_PGSZ1_NUC_SHIFT, %o1
-	or		%o0, %o1, %o0	/* Preserve nucleus page size fields */
-	stxa		%o0, [%o2] ASI_DMMU
-	stxa		%g0, [%g3] ASI_DMMU_DEMAP
-	stxa		%g0, [%g3] ASI_IMMU_DEMAP
-	stxa		%g2, [%o2] ASI_DMMU
-	sethi		%hi(KERNBASE), %o2
-	flush		%o2
-	wrpr		%g0, 0, %tl
-	retl
-	 wrpr		%g7, 0x0, %pstate
-
-__cheetah_flush_tlb_page:	/* 22 insns */
-	/* %o0 = context, %o1 = vaddr */
-	rdpr		%pstate, %g7
-	andn		%g7, PSTATE_IE, %g2
-	wrpr		%g2, 0x0, %pstate
-	wrpr		%g0, 1, %tl
-	mov		PRIMARY_CONTEXT, %o4
-	ldxa		[%o4] ASI_DMMU, %g2
-	srlx		%g2, CTX_PGSZ1_NUC_SHIFT, %o3
-	sllx		%o3, CTX_PGSZ1_NUC_SHIFT, %o3
-	or		%o0, %o3, %o0	/* Preserve nucleus page size fields */
-	stxa		%o0, [%o4] ASI_DMMU
-	andcc		%o1, 1, %g0
-	be,pn		%icc, 1f
-	 andn		%o1, 1, %o3
-	stxa		%g0, [%o3] ASI_IMMU_DEMAP
-1:	stxa		%g0, [%o3] ASI_DMMU_DEMAP	
-	membar		#Sync
-	stxa		%g2, [%o4] ASI_DMMU
-	sethi		%hi(KERNBASE), %o4
-	flush		%o4
-	wrpr		%g0, 0, %tl
-	retl
-	 wrpr		%g7, 0x0, %pstate
-
-__cheetah_flush_tlb_pending:	/* 27 insns */
-	/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
-	rdpr		%pstate, %g7
-	sllx		%o1, 3, %o1
-	andn		%g7, PSTATE_IE, %g2
-	wrpr		%g2, 0x0, %pstate
-	wrpr		%g0, 1, %tl
-	mov		PRIMARY_CONTEXT, %o4
-	ldxa		[%o4] ASI_DMMU, %g2
-	srlx		%g2, CTX_PGSZ1_NUC_SHIFT, %o3
-	sllx		%o3, CTX_PGSZ1_NUC_SHIFT, %o3
-	or		%o0, %o3, %o0	/* Preserve nucleus page size fields */
-	stxa		%o0, [%o4] ASI_DMMU
-1:	sub		%o1, (1 << 3), %o1
-	ldx		[%o2 + %o1], %o3
-	andcc		%o3, 1, %g0
-	be,pn		%icc, 2f
-	 andn		%o3, 1, %o3
-	stxa		%g0, [%o3] ASI_IMMU_DEMAP
-2:	stxa		%g0, [%o3] ASI_DMMU_DEMAP	
-	membar		#Sync
-	brnz,pt		%o1, 1b
-	 nop
-	stxa		%g2, [%o4] ASI_DMMU
-	sethi		%hi(KERNBASE), %o4
-	flush		%o4
-	wrpr		%g0, 0, %tl
-	retl
-	 wrpr		%g7, 0x0, %pstate
-
-__cheetah_flush_tlb_kernel_range:	/* 31 insns */
-	/* %o0=start, %o1=end */
-	cmp		%o0, %o1
-	be,pn		%xcc, 2f
-	 sub		%o1, %o0, %o3
-	srlx		%o3, 18, %o4
-	brnz,pn		%o4, 3f
-	 sethi		%hi(PAGE_SIZE), %o4
-	sub		%o3, %o4, %o3
-	or		%o0, 0x20, %o0		! Nucleus
-1:	stxa		%g0, [%o0 + %o3] ASI_DMMU_DEMAP
-	stxa		%g0, [%o0 + %o3] ASI_IMMU_DEMAP
-	membar		#Sync
-	brnz,pt		%o3, 1b
-	 sub		%o3, %o4, %o3
-2:	sethi		%hi(KERNBASE), %o3
-	flush		%o3
-	retl
-	 nop
-3:	mov		0x80, %o4
-	stxa		%g0, [%o4] ASI_DMMU_DEMAP
-	membar		#Sync
-	stxa		%g0, [%o4] ASI_IMMU_DEMAP
-	membar		#Sync
-	retl
-	 nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-#ifdef DCACHE_ALIASING_POSSIBLE
-__cheetah_flush_dcache_page: /* 11 insns */
-	sethi		%hi(PAGE_OFFSET), %g1
-	ldx		[%g1 + %lo(PAGE_OFFSET)], %g1
-	sub		%o0, %g1, %o0
-	sethi		%hi(PAGE_SIZE), %o4
-1:	subcc		%o4, (1 << 5), %o4
-	stxa		%g0, [%o0 + %o4] ASI_DCACHE_INVALIDATE
-	membar		#Sync
-	bne,pt		%icc, 1b
-	 nop
-	retl		/* I-cache flush never needed on Cheetah, see callers. */
-	 nop
-#endif /* DCACHE_ALIASING_POSSIBLE */
-
-	/* Hypervisor specific versions, patched at boot time.  */
-__hypervisor_tlb_tl0_error:
-	save		%sp, -192, %sp
-	mov		%i0, %o0
-	call		hypervisor_tlbop_error
-	 mov		%i1, %o1
-	ret
-	 restore
-
-__hypervisor_flush_tlb_mm: /* 19 insns */
-	mov		%o0, %o2	/* ARG2: mmu context */
-	mov		0, %o0		/* ARG0: CPU lists unimplemented */
-	mov		0, %o1		/* ARG1: CPU lists unimplemented */
-	mov		HV_MMU_ALL, %o3	/* ARG3: flags */
-	mov		HV_FAST_MMU_DEMAP_CTX, %o5
-	ta		HV_FAST_TRAP
-	brnz,pn		%o0, 1f
-	 mov		HV_FAST_MMU_DEMAP_CTX, %o1
-	retl
-	 nop
-1:	sethi		%hi(__hypervisor_tlb_tl0_error), %o5
-	jmpl		%o5 + %lo(__hypervisor_tlb_tl0_error), %g0
-	 nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-__hypervisor_flush_tlb_page: /* 22 insns */
-	/* %o0 = context, %o1 = vaddr */
-	mov		%o0, %g2
-	mov		%o1, %o0              /* ARG0: vaddr + IMMU-bit */
-	mov		%g2, %o1	      /* ARG1: mmu context */
-	mov		HV_MMU_ALL, %o2	      /* ARG2: flags */
-	srlx		%o0, PAGE_SHIFT, %o0
-	sllx		%o0, PAGE_SHIFT, %o0
-	ta		HV_MMU_UNMAP_ADDR_TRAP
-	brnz,pn		%o0, 1f
-	 mov		HV_MMU_UNMAP_ADDR_TRAP, %o1
-	retl
-	 nop
-1:	sethi		%hi(__hypervisor_tlb_tl0_error), %o2
-	jmpl		%o2 + %lo(__hypervisor_tlb_tl0_error), %g0
-	 nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-__hypervisor_flush_tlb_pending: /* 27 insns */
-	/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
-	sllx		%o1, 3, %g1
-	mov		%o2, %g2
-	mov		%o0, %g3
-1:	sub		%g1, (1 << 3), %g1
-	ldx		[%g2 + %g1], %o0      /* ARG0: vaddr + IMMU-bit */
-	mov		%g3, %o1	      /* ARG1: mmu context */
-	mov		HV_MMU_ALL, %o2	      /* ARG2: flags */
-	srlx		%o0, PAGE_SHIFT, %o0
-	sllx		%o0, PAGE_SHIFT, %o0
-	ta		HV_MMU_UNMAP_ADDR_TRAP
-	brnz,pn		%o0, 1f
-	 mov		HV_MMU_UNMAP_ADDR_TRAP, %o1
-	brnz,pt		%g1, 1b
-	 nop
-	retl
-	 nop
-1:	sethi		%hi(__hypervisor_tlb_tl0_error), %o2
-	jmpl		%o2 + %lo(__hypervisor_tlb_tl0_error), %g0
-	 nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-__hypervisor_flush_tlb_kernel_range: /* 31 insns */
-	/* %o0=start, %o1=end */
-	cmp		%o0, %o1
-	be,pn		%xcc, 2f
-	 sub		%o1, %o0, %g2
-	srlx		%g2, 18, %g3
-	brnz,pn		%g3, 4f
-	 mov		%o0, %g1
-	sethi		%hi(PAGE_SIZE), %g3
-	sub		%g2, %g3, %g2
-1:	add		%g1, %g2, %o0	/* ARG0: virtual address */
-	mov		0, %o1		/* ARG1: mmu context */
-	mov		HV_MMU_ALL, %o2	/* ARG2: flags */
-	ta		HV_MMU_UNMAP_ADDR_TRAP
-	brnz,pn		%o0, 3f
-	 mov		HV_MMU_UNMAP_ADDR_TRAP, %o1
-	brnz,pt		%g2, 1b
-	 sub		%g2, %g3, %g2
-2:	retl
-	 nop
-3:	sethi		%hi(__hypervisor_tlb_tl0_error), %o2
-	jmpl		%o2 + %lo(__hypervisor_tlb_tl0_error), %g0
-	 nop
-4:	mov		0, %o0		/* ARG0: CPU lists unimplemented */
-	mov		0, %o1		/* ARG1: CPU lists unimplemented */
-	mov		0, %o2		/* ARG2: mmu context == nucleus */
-	mov		HV_MMU_ALL, %o3	/* ARG3: flags */
-	mov		HV_FAST_MMU_DEMAP_CTX, %o5
-	ta		HV_FAST_TRAP
-	brnz,pn		%o0, 3b
-	 mov		HV_FAST_MMU_DEMAP_CTX, %o1
-	retl
-	 nop
-
-#ifdef DCACHE_ALIASING_POSSIBLE
-	/* XXX Niagara and friends have an 8K cache, so no aliasing is
-	 * XXX possible, but nothing explicit in the Hypervisor API
-	 * XXX guarantees this.
-	 */
-__hypervisor_flush_dcache_page:	/* 2 insns */
-	retl
-	 nop
-#endif
-
-tlb_patch_one:
-1:	lduw		[%o1], %g1
-	stw		%g1, [%o0]
-	flush		%o0
-	subcc		%o2, 1, %o2
-	add		%o1, 4, %o1
-	bne,pt		%icc, 1b
-	 add		%o0, 4, %o0
-	retl
-	 nop
-
-#ifdef CONFIG_SMP
-	/* These are all called by the slaves of a cross call, at
-	 * trap level 1, with interrupts fully disabled.
-	 *
-	 * Register usage:
-	 *   %g5	mm->context	(all tlb flushes)
-	 *   %g1	address arg 1	(tlb page and range flushes)
-	 *   %g7	address arg 2	(tlb range flush only)
-	 *
-	 *   %g6	scratch 1
-	 *   %g2	scratch 2
-	 *   %g3	scratch 3
-	 *   %g4	scratch 4
-	 */
-	.align		32
-	.globl		xcall_flush_tlb_mm
-xcall_flush_tlb_mm:	/* 24 insns */
-	mov		PRIMARY_CONTEXT, %g2
-	ldxa		[%g2] ASI_DMMU, %g3
-	srlx		%g3, CTX_PGSZ1_NUC_SHIFT, %g4
-	sllx		%g4, CTX_PGSZ1_NUC_SHIFT, %g4
-	or		%g5, %g4, %g5	/* Preserve nucleus page size fields */
-	stxa		%g5, [%g2] ASI_DMMU
-	mov		0x40, %g4
-	stxa		%g0, [%g4] ASI_DMMU_DEMAP
-	stxa		%g0, [%g4] ASI_IMMU_DEMAP
-	stxa		%g3, [%g2] ASI_DMMU
-	retry
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	.globl		xcall_flush_tlb_page
-xcall_flush_tlb_page:	/* 20 insns */
-	/* %g5=context, %g1=vaddr */
-	mov		PRIMARY_CONTEXT, %g4
-	ldxa		[%g4] ASI_DMMU, %g2
-	srlx		%g2, CTX_PGSZ1_NUC_SHIFT, %g4
-	sllx		%g4, CTX_PGSZ1_NUC_SHIFT, %g4
-	or		%g5, %g4, %g5
-	mov		PRIMARY_CONTEXT, %g4
-	stxa		%g5, [%g4] ASI_DMMU
-	andcc		%g1, 0x1, %g0
-	be,pn		%icc, 2f
-	 andn		%g1, 0x1, %g5
-	stxa		%g0, [%g5] ASI_IMMU_DEMAP
-2:	stxa		%g0, [%g5] ASI_DMMU_DEMAP
-	membar		#Sync
-	stxa		%g2, [%g4] ASI_DMMU
-	retry
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	.globl		xcall_flush_tlb_kernel_range
-xcall_flush_tlb_kernel_range:	/* 44 insns */
-	sethi		%hi(PAGE_SIZE - 1), %g2
-	or		%g2, %lo(PAGE_SIZE - 1), %g2
-	andn		%g1, %g2, %g1
-	andn		%g7, %g2, %g7
-	sub		%g7, %g1, %g3
-	srlx		%g3, 18, %g2
-	brnz,pn		%g2, 2f
-	 sethi		%hi(PAGE_SIZE), %g2
-	sub		%g3, %g2, %g3
-	or		%g1, 0x20, %g1		! Nucleus
-1:	stxa		%g0, [%g1 + %g3] ASI_DMMU_DEMAP
-	stxa		%g0, [%g1 + %g3] ASI_IMMU_DEMAP
-	membar		#Sync
-	brnz,pt		%g3, 1b
-	 sub		%g3, %g2, %g3
-	retry
-2:	mov		63 * 8, %g1
-1:	ldxa		[%g1] ASI_ITLB_DATA_ACCESS, %g2
-	andcc		%g2, 0x40, %g0			/* _PAGE_L_4U */
-	bne,pn		%xcc, 2f
-	 mov		TLB_TAG_ACCESS, %g2
-	stxa		%g0, [%g2] ASI_IMMU
-	stxa		%g0, [%g1] ASI_ITLB_DATA_ACCESS
-	membar		#Sync
-2:	ldxa		[%g1] ASI_DTLB_DATA_ACCESS, %g2
-	andcc		%g2, 0x40, %g0
-	bne,pn		%xcc, 2f
-	 mov		TLB_TAG_ACCESS, %g2
-	stxa		%g0, [%g2] ASI_DMMU
-	stxa		%g0, [%g1] ASI_DTLB_DATA_ACCESS
-	membar		#Sync
-2:	sub		%g1, 8, %g1
-	brgez,pt	%g1, 1b
-	 nop
-	retry
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	/* This runs in a very controlled environment, so we do
-	 * not need to worry about BH races etc.
-	 */
-	.globl		xcall_sync_tick
-xcall_sync_tick:
-
-661:	rdpr		%pstate, %g2
-	wrpr		%g2, PSTATE_IG | PSTATE_AG, %pstate
-	.section	.sun4v_2insn_patch, "ax"
-	.word		661b
-	nop
-	nop
-	.previous
-
-	rdpr		%pil, %g2
-	wrpr		%g0, PIL_NORMAL_MAX, %pil
-	sethi		%hi(109f), %g7
-	b,pt		%xcc, etrap_irq
-109:	 or		%g7, %lo(109b), %g7
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call		trace_hardirqs_off
-	 nop
-#endif
-	call		smp_synchronize_tick_client
-	 nop
-	b		rtrap_xcall
-	 ldx		[%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
-
-	.globl		xcall_fetch_glob_regs
-xcall_fetch_glob_regs:
-	sethi		%hi(global_cpu_snapshot), %g1
-	or		%g1, %lo(global_cpu_snapshot), %g1
-	__GET_CPUID(%g2)
-	sllx		%g2, 6, %g3
-	add		%g1, %g3, %g1
-	rdpr		%tstate, %g7
-	stx		%g7, [%g1 + GR_SNAP_TSTATE]
-	rdpr		%tpc, %g7
-	stx		%g7, [%g1 + GR_SNAP_TPC]
-	rdpr		%tnpc, %g7
-	stx		%g7, [%g1 + GR_SNAP_TNPC]
-	stx		%o7, [%g1 + GR_SNAP_O7]
-	stx		%i7, [%g1 + GR_SNAP_I7]
-	/* Don't try this at home kids... */
-	rdpr		%cwp, %g3
-	sub		%g3, 1, %g7
-	wrpr		%g7, %cwp
-	mov		%i7, %g7
-	wrpr		%g3, %cwp
-	stx		%g7, [%g1 + GR_SNAP_RPC]
-	sethi		%hi(trap_block), %g7
-	or		%g7, %lo(trap_block), %g7
-	sllx		%g2, TRAP_BLOCK_SZ_SHIFT, %g2
-	add		%g7, %g2, %g7
-	ldx		[%g7 + TRAP_PER_CPU_THREAD], %g3
-	stx		%g3, [%g1 + GR_SNAP_THREAD]
-	retry
-
-	.globl		xcall_fetch_glob_pmu
-xcall_fetch_glob_pmu:
-	sethi		%hi(global_cpu_snapshot), %g1
-	or		%g1, %lo(global_cpu_snapshot), %g1
-	__GET_CPUID(%g2)
-	sllx		%g2, 6, %g3
-	add		%g1, %g3, %g1
-	rd		%pic, %g7
-	stx		%g7, [%g1 + (4 * 8)]
-	rd		%pcr, %g7
-	stx		%g7, [%g1 + (0 * 8)]
-	retry
-
-	.globl		xcall_fetch_glob_pmu_n4
-xcall_fetch_glob_pmu_n4:
-	sethi		%hi(global_cpu_snapshot), %g1
-	or		%g1, %lo(global_cpu_snapshot), %g1
-	__GET_CPUID(%g2)
-	sllx		%g2, 6, %g3
-	add		%g1, %g3, %g1
-
-	ldxa		[%g0] ASI_PIC, %g7
-	stx		%g7, [%g1 + (4 * 8)]
-	mov		0x08, %g3
-	ldxa		[%g3] ASI_PIC, %g7
-	stx		%g7, [%g1 + (5 * 8)]
-	mov		0x10, %g3
-	ldxa		[%g3] ASI_PIC, %g7
-	stx		%g7, [%g1 + (6 * 8)]
-	mov		0x18, %g3
-	ldxa		[%g3] ASI_PIC, %g7
-	stx		%g7, [%g1 + (7 * 8)]
-
-	mov		%o0, %g2
-	mov		%o1, %g3
-	mov		%o5, %g7
-
-	mov		HV_FAST_VT_GET_PERFREG, %o5
-	mov		3, %o0
-	ta		HV_FAST_TRAP
-	stx		%o1, [%g1 + (3 * 8)]
-	mov		HV_FAST_VT_GET_PERFREG, %o5
-	mov		2, %o0
-	ta		HV_FAST_TRAP
-	stx		%o1, [%g1 + (2 * 8)]
-	mov		HV_FAST_VT_GET_PERFREG, %o5
-	mov		1, %o0
-	ta		HV_FAST_TRAP
-	stx		%o1, [%g1 + (1 * 8)]
-	mov		HV_FAST_VT_GET_PERFREG, %o5
-	mov		0, %o0
-	ta		HV_FAST_TRAP
-	stx		%o1, [%g1 + (0 * 8)]
-
-	mov		%g2, %o0
-	mov		%g3, %o1
-	mov		%g7, %o5
-
-	retry
-
-__cheetah_xcall_flush_tlb_kernel_range:	/* 44 insns */
-	sethi		%hi(PAGE_SIZE - 1), %g2
-	or		%g2, %lo(PAGE_SIZE - 1), %g2
-	andn		%g1, %g2, %g1
-	andn		%g7, %g2, %g7
-	sub		%g7, %g1, %g3
-	srlx		%g3, 18, %g2
-	brnz,pn		%g2, 2f
-	 sethi		%hi(PAGE_SIZE), %g2
-	sub		%g3, %g2, %g3
-	or		%g1, 0x20, %g1		! Nucleus
-1:	stxa		%g0, [%g1 + %g3] ASI_DMMU_DEMAP
-	stxa		%g0, [%g1 + %g3] ASI_IMMU_DEMAP
-	membar		#Sync
-	brnz,pt		%g3, 1b
-	 sub		%g3, %g2, %g3
-	retry
-2:	mov		0x80, %g2
-	stxa		%g0, [%g2] ASI_DMMU_DEMAP
-	membar		#Sync
-	stxa		%g0, [%g2] ASI_IMMU_DEMAP
-	membar		#Sync
-	retry
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-#ifdef DCACHE_ALIASING_POSSIBLE
-	.align		32
-	.globl		xcall_flush_dcache_page_cheetah
-xcall_flush_dcache_page_cheetah: /* %g1 == physical page address */
-	sethi		%hi(PAGE_SIZE), %g3
-1:	subcc		%g3, (1 << 5), %g3
-	stxa		%g0, [%g1 + %g3] ASI_DCACHE_INVALIDATE
-	membar		#Sync
-	bne,pt		%icc, 1b
-	 nop
-	retry
-	nop
-#endif /* DCACHE_ALIASING_POSSIBLE */
-
-	.globl		xcall_flush_dcache_page_spitfire
-xcall_flush_dcache_page_spitfire: /* %g1 == physical page address
-				     %g7 == kernel page virtual address
-				     %g5 == (page->mapping != NULL)  */
-#ifdef DCACHE_ALIASING_POSSIBLE
-	srlx		%g1, (13 - 2), %g1	! Form tag comparitor
-	sethi		%hi(L1DCACHE_SIZE), %g3	! D$ size == 16K
-	sub		%g3, (1 << 5), %g3	! D$ linesize == 32
-1:	ldxa		[%g3] ASI_DCACHE_TAG, %g2
-	andcc		%g2, 0x3, %g0
-	be,pn		%xcc, 2f
-	 andn		%g2, 0x3, %g2
-	cmp		%g2, %g1
-
-	bne,pt		%xcc, 2f
-	 nop
-	stxa		%g0, [%g3] ASI_DCACHE_TAG
-	membar		#Sync
-2:	cmp		%g3, 0
-	bne,pt		%xcc, 1b
-	 sub		%g3, (1 << 5), %g3
-
-	brz,pn		%g5, 2f
-#endif /* DCACHE_ALIASING_POSSIBLE */
-	 sethi		%hi(PAGE_SIZE), %g3
-
-1:	flush		%g7
-	subcc		%g3, (1 << 5), %g3
-	bne,pt		%icc, 1b
-	 add		%g7, (1 << 5), %g7
-
-2:	retry
-	nop
-	nop
-
-	/* %g5:	error
-	 * %g6:	tlb op
-	 */
-__hypervisor_tlb_xcall_error:
-	mov	%g5, %g4
-	mov	%g6, %g5
-	ba,pt	%xcc, etrap
-	 rd	%pc, %g7
-	mov	%l4, %o0
-	call	hypervisor_tlbop_error_xcall
-	 mov	%l5, %o1
-	ba,a,pt	%xcc, rtrap
-
-	.globl		__hypervisor_xcall_flush_tlb_mm
-__hypervisor_xcall_flush_tlb_mm: /* 24 insns */
-	/* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */
-	mov		%o0, %g2
-	mov		%o1, %g3
-	mov		%o2, %g4
-	mov		%o3, %g1
-	mov		%o5, %g7
-	clr		%o0		/* ARG0: CPU lists unimplemented */
-	clr		%o1		/* ARG1: CPU lists unimplemented */
-	mov		%g5, %o2	/* ARG2: mmu context */
-	mov		HV_MMU_ALL, %o3	/* ARG3: flags */
-	mov		HV_FAST_MMU_DEMAP_CTX, %o5
-	ta		HV_FAST_TRAP
-	mov		HV_FAST_MMU_DEMAP_CTX, %g6
-	brnz,pn		%o0, 1f
-	 mov		%o0, %g5
-	mov		%g2, %o0
-	mov		%g3, %o1
-	mov		%g4, %o2
-	mov		%g1, %o3
-	mov		%g7, %o5
-	membar		#Sync
-	retry
-1:	sethi		%hi(__hypervisor_tlb_xcall_error), %g4
-	jmpl		%g4 + %lo(__hypervisor_tlb_xcall_error), %g0
-	 nop
-
-	.globl		__hypervisor_xcall_flush_tlb_page
-__hypervisor_xcall_flush_tlb_page: /* 20 insns */
-	/* %g5=ctx, %g1=vaddr */
-	mov		%o0, %g2
-	mov		%o1, %g3
-	mov		%o2, %g4
-	mov		%g1, %o0	        /* ARG0: virtual address */
-	mov		%g5, %o1		/* ARG1: mmu context */
-	mov		HV_MMU_ALL, %o2		/* ARG2: flags */
-	srlx		%o0, PAGE_SHIFT, %o0
-	sllx		%o0, PAGE_SHIFT, %o0
-	ta		HV_MMU_UNMAP_ADDR_TRAP
-	mov		HV_MMU_UNMAP_ADDR_TRAP, %g6
-	brnz,a,pn	%o0, 1f
-	 mov		%o0, %g5
-	mov		%g2, %o0
-	mov		%g3, %o1
-	mov		%g4, %o2
-	membar		#Sync
-	retry
-1:	sethi		%hi(__hypervisor_tlb_xcall_error), %g4
-	jmpl		%g4 + %lo(__hypervisor_tlb_xcall_error), %g0
-	 nop
-
-	.globl		__hypervisor_xcall_flush_tlb_kernel_range
-__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */
-	/* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */
-	sethi		%hi(PAGE_SIZE - 1), %g2
-	or		%g2, %lo(PAGE_SIZE - 1), %g2
-	andn		%g1, %g2, %g1
-	andn		%g7, %g2, %g7
-	sub		%g7, %g1, %g3
-	srlx		%g3, 18, %g7
-	add		%g2, 1, %g2
-	sub		%g3, %g2, %g3
-	mov		%o0, %g2
-	mov		%o1, %g4
-	brnz,pn		%g7, 2f
-	 mov		%o2, %g7
-1:	add		%g1, %g3, %o0	/* ARG0: virtual address */
-	mov		0, %o1		/* ARG1: mmu context */
-	mov		HV_MMU_ALL, %o2	/* ARG2: flags */
-	ta		HV_MMU_UNMAP_ADDR_TRAP
-	mov		HV_MMU_UNMAP_ADDR_TRAP, %g6
-	brnz,pn		%o0, 1f
-	 mov		%o0, %g5
-	sethi		%hi(PAGE_SIZE), %o2
-	brnz,pt		%g3, 1b
-	 sub		%g3, %o2, %g3
-5:	mov		%g2, %o0
-	mov		%g4, %o1
-	mov		%g7, %o2
-	membar		#Sync
-	retry
-1:	sethi		%hi(__hypervisor_tlb_xcall_error), %g4
-	jmpl		%g4 + %lo(__hypervisor_tlb_xcall_error), %g0
-	 nop
-2:	mov		%o3, %g1
-	mov		%o5, %g3
-	mov		0, %o0		/* ARG0: CPU lists unimplemented */
-	mov		0, %o1		/* ARG1: CPU lists unimplemented */
-	mov		0, %o2		/* ARG2: mmu context == nucleus */
-	mov		HV_MMU_ALL, %o3	/* ARG3: flags */
-	mov		HV_FAST_MMU_DEMAP_CTX, %o5
-	ta		HV_FAST_TRAP
-	mov		%g1, %o3
-	brz,pt		%o0, 5b
-	 mov		%g3, %o5
-	mov		HV_FAST_MMU_DEMAP_CTX, %g6
-	ba,pt		%xcc, 1b
-	 clr		%g5
-
-	/* These just get rescheduled to PIL vectors. */
-	.globl		xcall_call_function
-xcall_call_function:
-	wr		%g0, (1 << PIL_SMP_CALL_FUNC), %set_softint
-	retry
-
-	.globl		xcall_call_function_single
-xcall_call_function_single:
-	wr		%g0, (1 << PIL_SMP_CALL_FUNC_SNGL), %set_softint
-	retry
-
-	.globl		xcall_receive_signal
-xcall_receive_signal:
-	wr		%g0, (1 << PIL_SMP_RECEIVE_SIGNAL), %set_softint
-	retry
-
-	.globl		xcall_capture
-xcall_capture:
-	wr		%g0, (1 << PIL_SMP_CAPTURE), %set_softint
-	retry
-
-#ifdef CONFIG_KGDB
-	.globl		xcall_kgdb_capture
-xcall_kgdb_capture:
-	wr		%g0, (1 << PIL_KGDB_CAPTURE), %set_softint
-	retry
-#endif
-
-#endif /* CONFIG_SMP */
-
-	.globl		cheetah_patch_cachetlbops
-cheetah_patch_cachetlbops:
-	save		%sp, -128, %sp
-
-	sethi		%hi(__flush_tlb_mm), %o0
-	or		%o0, %lo(__flush_tlb_mm), %o0
-	sethi		%hi(__cheetah_flush_tlb_mm), %o1
-	or		%o1, %lo(__cheetah_flush_tlb_mm), %o1
-	call		tlb_patch_one
-	 mov		19, %o2
-
-	sethi		%hi(__flush_tlb_page), %o0
-	or		%o0, %lo(__flush_tlb_page), %o0
-	sethi		%hi(__cheetah_flush_tlb_page), %o1
-	or		%o1, %lo(__cheetah_flush_tlb_page), %o1
-	call		tlb_patch_one
-	 mov		22, %o2
-
-	sethi		%hi(__flush_tlb_pending), %o0
-	or		%o0, %lo(__flush_tlb_pending), %o0
-	sethi		%hi(__cheetah_flush_tlb_pending), %o1
-	or		%o1, %lo(__cheetah_flush_tlb_pending), %o1
-	call		tlb_patch_one
-	 mov		27, %o2
-
-	sethi		%hi(__flush_tlb_kernel_range), %o0
-	or		%o0, %lo(__flush_tlb_kernel_range), %o0
-	sethi		%hi(__cheetah_flush_tlb_kernel_range), %o1
-	or		%o1, %lo(__cheetah_flush_tlb_kernel_range), %o1
-	call		tlb_patch_one
-	 mov		31, %o2
-
-#ifdef DCACHE_ALIASING_POSSIBLE
-	sethi		%hi(__flush_dcache_page), %o0
-	or		%o0, %lo(__flush_dcache_page), %o0
-	sethi		%hi(__cheetah_flush_dcache_page), %o1
-	or		%o1, %lo(__cheetah_flush_dcache_page), %o1
-	call		tlb_patch_one
-	 mov		11, %o2
-#endif /* DCACHE_ALIASING_POSSIBLE */
-
-#ifdef CONFIG_SMP
-	sethi		%hi(xcall_flush_tlb_kernel_range), %o0
-	or		%o0, %lo(xcall_flush_tlb_kernel_range), %o0
-	sethi		%hi(__cheetah_xcall_flush_tlb_kernel_range), %o1
-	or		%o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1
-	call		tlb_patch_one
-	 mov		44, %o2
-#endif /* CONFIG_SMP */
-
-	ret
-	 restore
-
-	.globl		hypervisor_patch_cachetlbops
-hypervisor_patch_cachetlbops:
-	save		%sp, -128, %sp
-
-	sethi		%hi(__flush_tlb_mm), %o0
-	or		%o0, %lo(__flush_tlb_mm), %o0
-	sethi		%hi(__hypervisor_flush_tlb_mm), %o1
-	or		%o1, %lo(__hypervisor_flush_tlb_mm), %o1
-	call		tlb_patch_one
-	 mov		19, %o2
-
-	sethi		%hi(__flush_tlb_page), %o0
-	or		%o0, %lo(__flush_tlb_page), %o0
-	sethi		%hi(__hypervisor_flush_tlb_page), %o1
-	or		%o1, %lo(__hypervisor_flush_tlb_page), %o1
-	call		tlb_patch_one
-	 mov		22, %o2
-
-	sethi		%hi(__flush_tlb_pending), %o0
-	or		%o0, %lo(__flush_tlb_pending), %o0
-	sethi		%hi(__hypervisor_flush_tlb_pending), %o1
-	or		%o1, %lo(__hypervisor_flush_tlb_pending), %o1
-	call		tlb_patch_one
-	 mov		27, %o2
-
-	sethi		%hi(__flush_tlb_kernel_range), %o0
-	or		%o0, %lo(__flush_tlb_kernel_range), %o0
-	sethi		%hi(__hypervisor_flush_tlb_kernel_range), %o1
-	or		%o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1
-	call		tlb_patch_one
-	 mov		31, %o2
-
-#ifdef DCACHE_ALIASING_POSSIBLE
-	sethi		%hi(__flush_dcache_page), %o0
-	or		%o0, %lo(__flush_dcache_page), %o0
-	sethi		%hi(__hypervisor_flush_dcache_page), %o1
-	or		%o1, %lo(__hypervisor_flush_dcache_page), %o1
-	call		tlb_patch_one
-	 mov		2, %o2
-#endif /* DCACHE_ALIASING_POSSIBLE */
-
-#ifdef CONFIG_SMP
-	sethi		%hi(xcall_flush_tlb_mm), %o0
-	or		%o0, %lo(xcall_flush_tlb_mm), %o0
-	sethi		%hi(__hypervisor_xcall_flush_tlb_mm), %o1
-	or		%o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1
-	call		tlb_patch_one
-	 mov		24, %o2
-
-	sethi		%hi(xcall_flush_tlb_page), %o0
-	or		%o0, %lo(xcall_flush_tlb_page), %o0
-	sethi		%hi(__hypervisor_xcall_flush_tlb_page), %o1
-	or		%o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1
-	call		tlb_patch_one
-	 mov		20, %o2
-
-	sethi		%hi(xcall_flush_tlb_kernel_range), %o0
-	or		%o0, %lo(xcall_flush_tlb_kernel_range), %o0
-	sethi		%hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1
-	or		%o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1
-	call		tlb_patch_one
-	 mov		44, %o2
-#endif /* CONFIG_SMP */
-
-	ret
-	 restore
diff --git a/arch/sparc/mm/viking.S b/arch/sparc/mm/viking.S
deleted file mode 100644
index adaef6e7b8cf5df7acf318ddaf077ef9a0aaf84e..0000000000000000000000000000000000000000
--- a/arch/sparc/mm/viking.S
+++ /dev/null
@@ -1,283 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * viking.S: High speed Viking cache/mmu operations
- *
- * Copyright (C) 1997  Eddie C. Dost  (ecd@skynet.be)
- * Copyright (C) 1997,1998,1999  Jakub Jelinek  (jj@ultra.linux.cz)
- * Copyright (C) 1999  Pavel Semerad  (semerad@ss1000.ms.mff.cuni.cz)
- */
-
-#include <asm/ptrace.h>
-#include <asm/psr.h>
-#include <asm/asm-offsets.h>
-#include <asm/asi.h>
-#include <asm/mxcc.h>
-#include <asm/page.h>
-#include <asm/pgtsrmmu.h>
-#include <asm/viking.h>
-
-#ifdef CONFIG_SMP
-	.data
-	.align	4
-sun4dsmp_flush_tlb_spin:
-	.word	0
-#endif
-
-	.text
-	.align	4
-
-	.globl	viking_flush_cache_all, viking_flush_cache_mm
-	.globl	viking_flush_cache_range, viking_flush_cache_page
-	.globl	viking_flush_page, viking_mxcc_flush_page
-	.globl	viking_flush_page_for_dma, viking_flush_page_to_ram
-	.globl	viking_flush_sig_insns
-	.globl	viking_flush_tlb_all, viking_flush_tlb_mm
-	.globl	viking_flush_tlb_range, viking_flush_tlb_page
-
-viking_flush_page:
-	sethi	%hi(PAGE_OFFSET), %g2
-	sub	%o0, %g2, %g3
-	srl	%g3, 12, %g1		! ppage >> 12
-
-	clr	%o1			! set counter, 0 - 127
-	sethi	%hi(PAGE_OFFSET + PAGE_SIZE - 0x80000000), %o3
-	sethi	%hi(0x80000000), %o4
-	sethi	%hi(VIKING_PTAG_VALID), %o5
-	sethi	%hi(2*PAGE_SIZE), %o0
-	sethi	%hi(PAGE_SIZE), %g7
-	clr	%o2			! block counter, 0 - 3
-5:
-	sll	%o1, 5, %g4
-	or	%g4, %o4, %g4		! 0x80000000 | (set << 5)
-
-	sll	%o2, 26, %g5		! block << 26
-6:
-	or	%g5, %g4, %g5
-	ldda	[%g5] ASI_M_DATAC_TAG, %g2
-	cmp	%g3, %g1		! ptag == ppage?
-	bne	7f
-	 inc	%o2
-
-	andcc	%g2, %o5, %g0		! ptag VALID?
-	be	7f
-	 add	%g4, %o3, %g2		! (PAGE_OFFSET + PAGE_SIZE) | (set << 5)
-	ld	[%g2], %g3
-	ld	[%g2 + %g7], %g3
-	add	%g2, %o0, %g2
-	ld	[%g2], %g3
-	ld	[%g2 + %g7], %g3
-	add	%g2, %o0, %g2
-	ld	[%g2], %g3
-	ld	[%g2 + %g7], %g3
-	add	%g2, %o0, %g2
-	ld	[%g2], %g3
-	b	8f
-	 ld	[%g2 + %g7], %g3
-
-7:
-	cmp	%o2, 3
-	ble	6b
-	 sll	%o2, 26, %g5			! block << 26
-
-8:	inc	%o1
-	cmp	%o1, 0x7f
-	ble	5b
-	 clr	%o2
-
-9:	retl
-	 nop
-
-viking_mxcc_flush_page:
-	sethi	%hi(PAGE_OFFSET), %g2
-	sub	%o0, %g2, %g3
-	sub	%g3, -PAGE_SIZE, %g3		! ppage + PAGE_SIZE
-	sethi	%hi(MXCC_SRCSTREAM), %o3	! assume %hi(MXCC_SRCSTREAM) == %hi(MXCC_DESTSTREAM)
-	mov	0x10, %g2			! set cacheable bit
-	or	%o3, %lo(MXCC_SRCSTREAM), %o2
-	or	%o3, %lo(MXCC_DESSTREAM), %o3
-	sub	%g3, MXCC_STREAM_SIZE, %g3
-6:
-	stda	%g2, [%o2] ASI_M_MXCC
-	stda	%g2, [%o3] ASI_M_MXCC
-	andncc	%g3, PAGE_MASK, %g0
-	bne	6b
-	 sub	%g3, MXCC_STREAM_SIZE, %g3
-
-9:	retl
-	 nop
-
-viking_flush_cache_page:
-viking_flush_cache_range:
-#ifndef CONFIG_SMP
-	ld	[%o0 + VMA_VM_MM], %o0
-#endif
-viking_flush_cache_mm:
-#ifndef CONFIG_SMP
-	ld	[%o0 + AOFF_mm_context], %g1
-	cmp	%g1, -1
-	bne	viking_flush_cache_all
-	 nop
-	b,a	viking_flush_cache_out
-#endif
-viking_flush_cache_all:
-	WINDOW_FLUSH(%g4, %g5)
-viking_flush_cache_out:
-	retl
-	 nop
-
-viking_flush_tlb_all:
-	mov	0x400, %g1
-	retl
-	 sta	%g0, [%g1] ASI_M_FLUSH_PROBE
-
-viking_flush_tlb_mm:
-	mov	SRMMU_CTX_REG, %g1
-	ld	[%o0 + AOFF_mm_context], %o1
-	lda	[%g1] ASI_M_MMUREGS, %g5
-#ifndef CONFIG_SMP
-	cmp	%o1, -1
-	be	1f
-#endif
-	mov	0x300, %g2
-	sta	%o1, [%g1] ASI_M_MMUREGS
-	sta	%g0, [%g2] ASI_M_FLUSH_PROBE
-	retl
-	 sta	%g5, [%g1] ASI_M_MMUREGS
-#ifndef CONFIG_SMP
-1:	retl
-	 nop
-#endif
-
-viking_flush_tlb_range:
-	ld	[%o0 + VMA_VM_MM], %o0
-	mov	SRMMU_CTX_REG, %g1
-	ld	[%o0 + AOFF_mm_context], %o3
-	lda	[%g1] ASI_M_MMUREGS, %g5
-#ifndef CONFIG_SMP
-	cmp	%o3, -1
-	be	2f
-#endif
-	sethi	%hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4
-	sta	%o3, [%g1] ASI_M_MMUREGS
-	and	%o1, %o4, %o1
-	add	%o1, 0x200, %o1
-	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
-1:	sub	%o1, %o4, %o1
-	cmp	%o1, %o2
-	blu,a	1b
-	 sta	%g0, [%o1] ASI_M_FLUSH_PROBE
-	retl
-	 sta	%g5, [%g1] ASI_M_MMUREGS
-#ifndef CONFIG_SMP
-2:	retl
-	 nop
-#endif
-
-viking_flush_tlb_page:
-	ld	[%o0 + VMA_VM_MM], %o0
-	mov	SRMMU_CTX_REG, %g1
-	ld	[%o0 + AOFF_mm_context], %o3
-	lda	[%g1] ASI_M_MMUREGS, %g5
-#ifndef CONFIG_SMP
-	cmp	%o3, -1
-	be	1f
-#endif
-	and	%o1, PAGE_MASK, %o1
-	sta	%o3, [%g1] ASI_M_MMUREGS
-	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
-	retl
-	 sta	%g5, [%g1] ASI_M_MMUREGS
-#ifndef CONFIG_SMP
-1:	retl
-	 nop
-#endif
-
-viking_flush_page_to_ram:
-viking_flush_page_for_dma:
-viking_flush_sig_insns:
-	retl
-	 nop
-
-#ifdef CONFIG_SMP
-	.globl	sun4dsmp_flush_tlb_all, sun4dsmp_flush_tlb_mm
-	.globl	sun4dsmp_flush_tlb_range, sun4dsmp_flush_tlb_page
-sun4dsmp_flush_tlb_all:
-	sethi	%hi(sun4dsmp_flush_tlb_spin), %g3
-1:	ldstub	[%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
-	tst	%g5
-	bne	2f
-	 mov	0x400, %g1
-	sta	%g0, [%g1] ASI_M_FLUSH_PROBE
-	retl
-	 stb	%g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
-2:	tst	%g5
-	bne,a	2b
-	 ldub	[%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
-	b,a	1b
-
-sun4dsmp_flush_tlb_mm:
-	sethi	%hi(sun4dsmp_flush_tlb_spin), %g3
-1:	ldstub	[%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
-	tst	%g5
-	bne	2f
-	 mov	SRMMU_CTX_REG, %g1
-	ld	[%o0 + AOFF_mm_context], %o1
-	lda	[%g1] ASI_M_MMUREGS, %g5
-	mov	0x300, %g2
-	sta	%o1, [%g1] ASI_M_MMUREGS
-	sta	%g0, [%g2] ASI_M_FLUSH_PROBE
-	sta	%g5, [%g1] ASI_M_MMUREGS
-	retl
-	 stb	%g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
-2:	tst	%g5
-	bne,a	2b
-	 ldub	[%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
-	b,a	1b
-
-sun4dsmp_flush_tlb_range:
-	sethi	%hi(sun4dsmp_flush_tlb_spin), %g3
-1:	ldstub	[%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
-	tst	%g5
-	bne	3f
-	 mov	SRMMU_CTX_REG, %g1
-	ld	[%o0 + VMA_VM_MM], %o0
-	ld	[%o0 + AOFF_mm_context], %o3
-	lda	[%g1] ASI_M_MMUREGS, %g5
-	sethi	%hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4
-	sta	%o3, [%g1] ASI_M_MMUREGS
-	and	%o1, %o4, %o1
-	add	%o1, 0x200, %o1
-	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
-2:	sub	%o1, %o4, %o1
-	cmp	%o1, %o2
-	blu,a	2b
-	 sta	%g0, [%o1] ASI_M_FLUSH_PROBE
-	sta	%g5, [%g1] ASI_M_MMUREGS
-	retl
-	 stb	%g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
-3:	tst	%g5
-	bne,a	3b
-	 ldub	[%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
-	b,a	1b
-
-sun4dsmp_flush_tlb_page:
-	sethi	%hi(sun4dsmp_flush_tlb_spin), %g3
-1:	ldstub	[%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
-	tst	%g5
-	bne	2f
-	 mov	SRMMU_CTX_REG, %g1
-	ld	[%o0 + VMA_VM_MM], %o0
-	ld	[%o0 + AOFF_mm_context], %o3
-	lda	[%g1] ASI_M_MMUREGS, %g5
-	and	%o1, PAGE_MASK, %o1
-	sta	%o3, [%g1] ASI_M_MMUREGS
-	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
-	sta	%g5, [%g1] ASI_M_MMUREGS
-	retl
-	 stb	%g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
-2:	tst	%g5
-	bne,a	2b
-	 ldub	[%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
-	b,a	1b
-	 nop
-#endif
diff --git a/arch/sparc/net/bpf_jit_asm_32.S b/arch/sparc/net/bpf_jit_asm_32.S
deleted file mode 100644
index a2e28e0464a33fa7102ab47ffdae1894fdec8467..0000000000000000000000000000000000000000
--- a/arch/sparc/net/bpf_jit_asm_32.S
+++ /dev/null
@@ -1,202 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/ptrace.h>
-
-#include "bpf_jit_32.h"
-
-#define SAVE_SZ		96
-#define SCRATCH_OFF	72
-#define BE_PTR(label)	be label
-#define SIGN_EXTEND(reg)
-
-#define SKF_MAX_NEG_OFF	(-0x200000) /* SKF_LL_OFF from filter.h */
-
-	.text
-	.globl	bpf_jit_load_word
-bpf_jit_load_word:
-	cmp	r_OFF, 0
-	bl	bpf_slow_path_word_neg
-	 nop
-	.globl	bpf_jit_load_word_positive_offset
-bpf_jit_load_word_positive_offset:
-	sub	r_HEADLEN, r_OFF, r_TMP
-	cmp	r_TMP, 3
-	ble	bpf_slow_path_word
-	 add	r_SKB_DATA, r_OFF, r_TMP
-	andcc	r_TMP, 3, %g0
-	bne	load_word_unaligned
-	 nop
-	retl
-	 ld	[r_TMP], r_A
-load_word_unaligned:
-	ldub	[r_TMP + 0x0], r_OFF
-	ldub	[r_TMP + 0x1], r_TMP2
-	sll	r_OFF, 8, r_OFF
-	or	r_OFF, r_TMP2, r_OFF
-	ldub	[r_TMP + 0x2], r_TMP2
-	sll	r_OFF, 8, r_OFF
-	or	r_OFF, r_TMP2, r_OFF
-	ldub	[r_TMP + 0x3], r_TMP2
-	sll	r_OFF, 8, r_OFF
-	retl
-	 or	r_OFF, r_TMP2, r_A
-
-	.globl	bpf_jit_load_half
-bpf_jit_load_half:
-	cmp	r_OFF, 0
-	bl	bpf_slow_path_half_neg
-	 nop
-	.globl	bpf_jit_load_half_positive_offset
-bpf_jit_load_half_positive_offset:
-	sub	r_HEADLEN, r_OFF, r_TMP
-	cmp	r_TMP, 1
-	ble	bpf_slow_path_half
-	 add	r_SKB_DATA, r_OFF, r_TMP
-	andcc	r_TMP, 1, %g0
-	bne	load_half_unaligned
-	 nop
-	retl
-	 lduh	[r_TMP], r_A
-load_half_unaligned:
-	ldub	[r_TMP + 0x0], r_OFF
-	ldub	[r_TMP + 0x1], r_TMP2
-	sll	r_OFF, 8, r_OFF
-	retl
-	 or	r_OFF, r_TMP2, r_A
-
-	.globl	bpf_jit_load_byte
-bpf_jit_load_byte:
-	cmp	r_OFF, 0
-	bl	bpf_slow_path_byte_neg
-	 nop
-	.globl	bpf_jit_load_byte_positive_offset
-bpf_jit_load_byte_positive_offset:
-	cmp	r_OFF, r_HEADLEN
-	bge	bpf_slow_path_byte
-	 nop
-	retl
-	 ldub	[r_SKB_DATA + r_OFF], r_A
-
-	.globl	bpf_jit_load_byte_msh
-bpf_jit_load_byte_msh:
-	cmp	r_OFF, 0
-	bl	bpf_slow_path_byte_msh_neg
-	 nop
-	.globl	bpf_jit_load_byte_msh_positive_offset
-bpf_jit_load_byte_msh_positive_offset:
-	cmp	r_OFF, r_HEADLEN
-	bge	bpf_slow_path_byte_msh
-	 nop
-	ldub	[r_SKB_DATA + r_OFF], r_OFF
-	and	r_OFF, 0xf, r_OFF
-	retl
-	 sll	r_OFF, 2, r_X
-
-#define bpf_slow_path_common(LEN)	\
-	save	%sp, -SAVE_SZ, %sp;	\
-	mov	%i0, %o0;		\
-	mov	r_OFF, %o1;		\
-	add	%fp, SCRATCH_OFF, %o2;	\
-	call	skb_copy_bits;		\
-	 mov	(LEN), %o3;		\
-	cmp	%o0, 0;			\
-	restore;
-
-bpf_slow_path_word:
-	bpf_slow_path_common(4)
-	bl	bpf_error
-	 ld	[%sp + SCRATCH_OFF], r_A
-	retl
-	 nop
-bpf_slow_path_half:
-	bpf_slow_path_common(2)
-	bl	bpf_error
-	 lduh	[%sp + SCRATCH_OFF], r_A
-	retl
-	 nop
-bpf_slow_path_byte:
-	bpf_slow_path_common(1)
-	bl	bpf_error
-	 ldub	[%sp + SCRATCH_OFF], r_A
-	retl
-	 nop
-bpf_slow_path_byte_msh:
-	bpf_slow_path_common(1)
-	bl	bpf_error
-	 ldub	[%sp + SCRATCH_OFF], r_A
-	and	r_OFF, 0xf, r_OFF
-	retl
-	 sll	r_OFF, 2, r_X
-
-#define bpf_negative_common(LEN)			\
-	save	%sp, -SAVE_SZ, %sp;			\
-	mov	%i0, %o0;				\
-	mov	r_OFF, %o1;				\
-	SIGN_EXTEND(%o1);				\
-	call	bpf_internal_load_pointer_neg_helper;	\
-	 mov	(LEN), %o2;				\
-	mov	%o0, r_TMP;				\
-	cmp	%o0, 0;					\
-	BE_PTR(bpf_error);				\
-	 restore;
-
-bpf_slow_path_word_neg:
-	sethi	%hi(SKF_MAX_NEG_OFF), r_TMP
-	cmp	r_OFF, r_TMP
-	bl	bpf_error
-	 nop
-	.globl	bpf_jit_load_word_negative_offset
-bpf_jit_load_word_negative_offset:
-	bpf_negative_common(4)
-	andcc	r_TMP, 3, %g0
-	bne	load_word_unaligned
-	 nop
-	retl
-	 ld	[r_TMP], r_A
-
-bpf_slow_path_half_neg:
-	sethi	%hi(SKF_MAX_NEG_OFF), r_TMP
-	cmp	r_OFF, r_TMP
-	bl	bpf_error
-	 nop
-	.globl	bpf_jit_load_half_negative_offset
-bpf_jit_load_half_negative_offset:
-	bpf_negative_common(2)
-	andcc	r_TMP, 1, %g0
-	bne	load_half_unaligned
-	 nop
-	retl
-	 lduh	[r_TMP], r_A
-
-bpf_slow_path_byte_neg:
-	sethi	%hi(SKF_MAX_NEG_OFF), r_TMP
-	cmp	r_OFF, r_TMP
-	bl	bpf_error
-	 nop
-	.globl	bpf_jit_load_byte_negative_offset
-bpf_jit_load_byte_negative_offset:
-	bpf_negative_common(1)
-	retl
-	 ldub	[r_TMP], r_A
-
-bpf_slow_path_byte_msh_neg:
-	sethi	%hi(SKF_MAX_NEG_OFF), r_TMP
-	cmp	r_OFF, r_TMP
-	bl	bpf_error
-	 nop
-	.globl	bpf_jit_load_byte_msh_negative_offset
-bpf_jit_load_byte_msh_negative_offset:
-	bpf_negative_common(1)
-	ldub	[r_TMP], r_OFF
-	and	r_OFF, 0xf, r_OFF
-	retl
-	 sll	r_OFF, 2, r_X
-
-bpf_error:
-	/* Make the JIT program return zero.  The JIT epilogue
-	 * stores away the original %o7 into r_saved_O7.  The
-	 * normal leaf function return is to use "retl" which
-	 * would evalute to "jmpl %o7 + 8, %g0" but we want to
-	 * use the saved value thus the sequence you see here.
-	 */
-	jmpl	r_saved_O7 + 8, %g0
-	 clr	%o0
diff --git a/arch/sparc/power/hibernate_asm.S b/arch/sparc/power/hibernate_asm.S
deleted file mode 100644
index 8cfaf5b6a32e4f30d36c9efbe0495a1597a5804e..0000000000000000000000000000000000000000
--- a/arch/sparc/power/hibernate_asm.S
+++ /dev/null
@@ -1,132 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * hibernate_asm.S:  Hibernaton support specific for sparc64.
- *
- * Copyright (C) 2013 Kirill V Tkhai (tkhai@yandex.ru)
- */
-
-#include <linux/linkage.h>
-
-#include <asm/asm-offsets.h>
-#include <asm/cpudata.h>
-#include <asm/page.h>
-
-ENTRY(swsusp_arch_suspend)
-	save	%sp, -128, %sp
-	save	%sp, -128, %sp
-	flushw
-
-	setuw	saved_context, %g3
-
-	/* Save window regs */
-	rdpr	%cwp, %g2
-	stx	%g2, [%g3 + SC_REG_CWP]
-	rdpr	%wstate, %g2
-	stx	%g2, [%g3 + SC_REG_WSTATE]
-	stx	%fp, [%g3 + SC_REG_FP]
-
-	/* Save state regs */
-	rdpr	%tick, %g2
-	stx	%g2, [%g3 + SC_REG_TICK]
-	rdpr	%pstate, %g2
-	stx	%g2, [%g3 + SC_REG_PSTATE]
-
-	/* Save global regs */
-	stx	%g4, [%g3 + SC_REG_G4]
-	stx	%g5, [%g3 + SC_REG_G5]
-	stx	%g6, [%g3 + SC_REG_G6]
-
-	call	swsusp_save
-	 nop
-
-	mov	%o0, %i0
-	restore
-
-	mov	%o0, %i0
-	ret
-	 restore
-
-ENTRY(swsusp_arch_resume)
-	/* Write restore_pblist to %l0 */
-	sethi	%hi(restore_pblist), %l0
-	ldx	[%l0 + %lo(restore_pblist)], %l0
-
-	call	__flush_tlb_all
-	 nop
-
-	/* Write PAGE_OFFSET to %g7 */
-	sethi	%hi(PAGE_OFFSET), %g7
-	ldx	[%g7 + %lo(PAGE_OFFSET)], %g7
-
-	setuw	(PAGE_SIZE-8), %g3
-
-	/* Use MMU Bypass */
-	rd	%asi, %g1
-	wr	%g0, ASI_PHYS_USE_EC, %asi
-
-	ba	fill_itlb
-	 nop
-
-pbe_loop:
-	cmp	%l0, %g0
-	be	restore_ctx
-	 sub	%l0, %g7, %l0
-
-	ldxa	[%l0    ] %asi, %l1 /* address */
-	ldxa	[%l0 + 8] %asi, %l2 /* orig_address */
-
-	/* phys addr */
-	sub	%l1, %g7, %l1
-	sub	%l2, %g7, %l2
-
-	mov	%g3, %l3 /* PAGE_SIZE-8 */
-copy_loop:
-	ldxa	[%l1 + %l3] ASI_PHYS_USE_EC, %g2
-	stxa	%g2, [%l2 + %l3] ASI_PHYS_USE_EC
-	cmp	%l3, %g0
-	bne	copy_loop
-	 sub	%l3, 8, %l3
-
-	/* next pbe */
-	ba	pbe_loop
-	 ldxa	[%l0 + 16] %asi, %l0
-
-restore_ctx:
-	setuw	saved_context, %g3
-
-	/* Restore window regs */
-	wrpr    %g0, 0, %canrestore
-	wrpr    %g0, 0, %otherwin
-	wrpr	%g0, 6, %cansave
-	wrpr    %g0, 0, %cleanwin
-
-	ldxa	[%g3 + SC_REG_CWP] %asi, %g2
-	wrpr	%g2, %cwp
-	ldxa	[%g3 + SC_REG_WSTATE] %asi, %g2
-	wrpr	%g2, %wstate
-	ldxa	[%g3 + SC_REG_FP] %asi, %fp
-
-	/* Restore state regs */
-	ldxa	[%g3 + SC_REG_PSTATE] %asi, %g2
-	wrpr	%g2, %pstate
-	ldxa	[%g3 + SC_REG_TICK] %asi, %g2
-	wrpr	%g2, %tick
-
-	/* Restore global regs */
-	ldxa	[%g3 + SC_REG_G4] %asi, %g4
-	ldxa	[%g3 + SC_REG_G5] %asi, %g5
-	ldxa	[%g3 + SC_REG_G6] %asi, %g6
-
-	wr	%g1, %g0, %asi
-
-	restore
-	restore
-
-	wrpr	%g0, 14, %pil
-
-	retl
-	 mov	%g0, %o0
-
-fill_itlb:
-	ba	pbe_loop
-	 wrpr	%g0, 15, %pil
diff --git a/arch/sparc/prom/cif.S b/arch/sparc/prom/cif.S
deleted file mode 100644
index dd06bb1fb90c4088648225da3d0989e17e719890..0000000000000000000000000000000000000000
--- a/arch/sparc/prom/cif.S
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* cif.S: PROM entry/exit assembler trampolines.
- *
- * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- * Copyright (C) 2005, 2006 David S. Miller <davem@davemloft.net>
- */
-
-#include <asm/pstate.h>
-#include <asm/cpudata.h>
-#include <asm/thread_info.h>
-
-	.text
-	.globl	prom_cif_direct
-prom_cif_direct:
-	save	%sp, -192, %sp
-	sethi	%hi(p1275buf), %o1
-	or	%o1, %lo(p1275buf), %o1
-	ldx	[%o1 + 0x0008], %l2	! prom_cif_handler
-	mov	%g4, %l0
-	mov	%g5, %l1
-	mov	%g6, %l3
-	call	%l2
-	 mov	%i0, %o0		! prom_args
-	mov	%l0, %g4
-	mov	%l1, %g5
-	mov	%l3, %g6
-	ret
-	 restore
-
-	.globl	prom_cif_callback
-prom_cif_callback:
-	sethi	%hi(p1275buf), %o1
-	or	%o1, %lo(p1275buf), %o1
-	save	%sp, -192, %sp
-	TRAP_LOAD_THREAD_REG(%g6, %g1)
-	LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %o0)
-	ldx	[%g6 + TI_TASK], %g4
-	call	prom_world
-	 mov	0, %o0
-	ldx	[%i1 + 0x000], %l2
-	call	%l2
-	 mov	%i0, %o0
-	mov	%o0, %l1
-	call	prom_world
-	 mov	1, %o0
-	ret
-	 restore %l1, 0, %o0
-
diff --git a/arch/sparc/vdso/vdso-layout.lds.S b/arch/sparc/vdso/vdso-layout.lds.S
deleted file mode 100644
index d31e57e8a3bbffd3afb187cd631e2491fde64148..0000000000000000000000000000000000000000
--- a/arch/sparc/vdso/vdso-layout.lds.S
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Linker script for vDSO.  This is an ELF shared object prelinked to
- * its virtual address, and with only one read-only segment.
- * This script controls its layout.
- */
-
-#if defined(BUILD_VDSO64)
-# define SHDR_SIZE 64
-#elif defined(BUILD_VDSO32)
-# define SHDR_SIZE 40
-#else
-# error unknown VDSO target
-#endif
-
-#define NUM_FAKE_SHDRS 7
-
-SECTIONS
-{
-	/*
-	 * User/kernel shared data is before the vDSO.  This may be a little
-	 * uglier than putting it after the vDSO, but it avoids issues with
-	 * non-allocatable things that dangle past the end of the PT_LOAD
-	 * segment. Page size is 8192 for both 64-bit and 32-bit vdso binaries
-	 */
-
-	vvar_start = . -8192;
-	vvar_data = vvar_start;
-
-	. = SIZEOF_HEADERS;
-
-	.hash		: { *(.hash) }			:text
-	.gnu.hash	: { *(.gnu.hash) }
-	.dynsym		: { *(.dynsym) }
-	.dynstr		: { *(.dynstr) }
-	.gnu.version	: { *(.gnu.version) }
-	.gnu.version_d	: { *(.gnu.version_d) }
-	.gnu.version_r	: { *(.gnu.version_r) }
-
-	.dynamic	: { *(.dynamic) }		:text	:dynamic
-
-	.rodata		: {
-		*(.rodata*)
-		*(.data*)
-		*(.sdata*)
-		*(.got.plt) *(.got)
-		*(.gnu.linkonce.d.*)
-		*(.bss*)
-		*(.dynbss*)
-		*(.gnu.linkonce.b.*)
-
-		/*
-		 * Ideally this would live in a C file: kept in here for
-		 * compatibility with x86-64.
-		 */
-		VDSO_FAKE_SECTION_TABLE_START = .;
-		. = . + NUM_FAKE_SHDRS * SHDR_SIZE;
-		VDSO_FAKE_SECTION_TABLE_END = .;
-	}						:text
-
-	.fake_shstrtab	: { *(.fake_shstrtab) }		:text
-
-
-	.note		: { *(.note.*) }		:text	:note
-
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-
-
-	/*
-	 * Text is well-separated from actual data: there's plenty of
-	 * stuff that isn't used at runtime in between.
-	 */
-
-	.text		: { *(.text*) }			:text	=0x90909090,
-
-	/DISCARD/ : {
-		*(.discard)
-		*(.discard.*)
-		*(__bug_table)
-	}
-}
-
-/*
- * Very old versions of ld do not recognize this name token; use the constant.
- */
-#define PT_GNU_EH_FRAME	0x6474e550
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
-	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
-	note		PT_NOTE		FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
-}
diff --git a/arch/sparc/vdso/vdso-note.S b/arch/sparc/vdso/vdso-note.S
deleted file mode 100644
index 79a071e4357e4bc51f8813427d0f0ffa4c5ba56c..0000000000000000000000000000000000000000
--- a/arch/sparc/vdso/vdso-note.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
-	.long LINUX_VERSION_CODE
-ELFNOTE_END
diff --git a/arch/sparc/vdso/vdso.lds.S b/arch/sparc/vdso/vdso.lds.S
deleted file mode 100644
index 629ab6900df7156fc18b450dc3bbfba1bbd20e65..0000000000000000000000000000000000000000
--- a/arch/sparc/vdso/vdso.lds.S
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Linker script for 64-bit vDSO.
- * We #include the file to define the layout details.
- *
- * This file defines the version script giving the user-exported symbols in
- * the DSO.
- */
-
-#define BUILD_VDSO64
-
-#include "vdso-layout.lds.S"
-
-/*
- * This controls what userland symbols we export from the vDSO.
- */
-VERSION {
-	LINUX_2.6 {
-	global:
-		clock_gettime;
-		__vdso_clock_gettime;
-		__vdso_clock_gettime_stick;
-		gettimeofday;
-		__vdso_gettimeofday;
-		__vdso_gettimeofday_stick;
-	local: *;
-	};
-}
diff --git a/arch/sparc/vdso/vdso32/vdso-note.S b/arch/sparc/vdso/vdso32/vdso-note.S
deleted file mode 100644
index e234983cf0d81a306d7b0f49d77a40ed8c82a929..0000000000000000000000000000000000000000
--- a/arch/sparc/vdso/vdso32/vdso-note.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO
- * text. Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
-	.long	LINUX_VERSION_CODE
-ELFNOTE_END
diff --git a/arch/sparc/vdso/vdso32/vdso32.lds.S b/arch/sparc/vdso/vdso32/vdso32.lds.S
deleted file mode 100644
index 218930fdff03d598d74a991657c109c3b15ce752..0000000000000000000000000000000000000000
--- a/arch/sparc/vdso/vdso32/vdso32.lds.S
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Linker script for sparc32 vDSO
- * We #include the file to define the layout details.
- *
- * This file defines the version script giving the user-exported symbols in
- * the DSO.
- */
-
-#define	BUILD_VDSO32
-#include "../vdso-layout.lds.S"
-
-/*
- * This controls what userland symbols we export from the vDSO.
- */
-VERSION {
-	LINUX_2.6 {
-	global:
-		clock_gettime;
-		__vdso_clock_gettime;
-		__vdso_clock_gettime_stick;
-		gettimeofday;
-		__vdso_gettimeofday;
-		__vdso_gettimeofday_stick;
-	local: *;
-	};
-}
diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S
deleted file mode 100644
index 4049f2c463876205ccd9ff965e03fa111df6489e..0000000000000000000000000000000000000000
--- a/arch/um/include/asm/common.lds.S
+++ /dev/null
@@ -1,100 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm-generic/vmlinux.lds.h>
-
-  .fini      : { *(.fini)    } =0x9090
-  _etext = .;
-  PROVIDE (etext = .);
-
-  . = ALIGN(4096);
-  _sdata = .;
-  PROVIDE (sdata = .);
-
-  RODATA
-
-  .unprotected : { *(.unprotected) }
-  . = ALIGN(4096);
-  PROVIDE (_unprotected_end = .);
-
-  . = ALIGN(4096);
-  NOTES
-  EXCEPTION_TABLE(0)
-
-  BUG_TABLE
-
-  .uml.setup.init : {
-	__uml_setup_start = .;
-	*(.uml.setup.init)
-	__uml_setup_end = .;
-  }
-	
-  .uml.help.init : {
-	__uml_help_start = .;
-	*(.uml.help.init)
-	__uml_help_end = .;
-  }
-	
-  .uml.postsetup.init : {
-	__uml_postsetup_start = .;
-	*(.uml.postsetup.init)
-	__uml_postsetup_end = .;
-  }
-	
-  .init.setup : {
-	INIT_SETUP(0)
-  }
-
-  PERCPU_SECTION(32)
-	
-  .initcall.init : {
-	INIT_CALLS
-  }
-
-  .con_initcall.init : {
-	CON_INITCALL
-  }
-
-  .exitcall : {
-	__exitcall_begin = .;
-	*(.exitcall.exit)
-	__exitcall_end = .;
-  }
-
-  .uml.exitcall : {
-	__uml_exitcall_begin = .;
-	*(.uml.exitcall.exit)
-	__uml_exitcall_end = .;
-  }
-
-  . = ALIGN(4);
-  .altinstructions : {
-	__alt_instructions = .;
-	*(.altinstructions)
-	__alt_instructions_end = .;
-  }
-  .altinstr_replacement : { *(.altinstr_replacement) }
-  /* .exit.text is discard at runtime, not link time, to deal with references
-     from .altinstructions and .eh_frame */
-  .exit.text : { EXIT_TEXT }
-  .exit.data : { *(.exit.data) }
-
-  .preinit_array : {
-	__preinit_array_start = .;
-	*(.preinit_array)
-	__preinit_array_end = .;
-  }
-  .init_array : {
-	__init_array_start = .;
-	*(.init_array)
-	__init_array_end = .;
-  }
-  .fini_array : {
-	__fini_array_start = .;
-	*(.fini_array)
-	__fini_array_end = .;
-  }
-
-   . = ALIGN(4096);
-  .init.ramfs : {
-	INIT_RAM_FS
-  }
-
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
deleted file mode 100644
index f5001481010c0a5cb80a60b1d8ab5ad0e9c4ce5d..0000000000000000000000000000000000000000
--- a/arch/um/kernel/dyn.lds.S
+++ /dev/null
@@ -1,171 +0,0 @@
-#include <asm/vmlinux.lds.h>
-#include <asm/page.h>
-
-OUTPUT_FORMAT(ELF_FORMAT)
-OUTPUT_ARCH(ELF_ARCH)
-ENTRY(_start)
-jiffies = jiffies_64;
-
-SECTIONS
-{
-  PROVIDE (__executable_start = START);
-  . = START + SIZEOF_HEADERS;
-  .interp         : { *(.interp) }
-  __binary_start = .;
-  . = ALIGN(4096);		/* Init code and data */
-  _text = .;
-  INIT_TEXT_SECTION(PAGE_SIZE)
-
-  . = ALIGN(PAGE_SIZE);
-
-  /* Read-only sections, merged into text segment: */
-  .hash           : { *(.hash) }
-  .gnu.hash       : { *(.gnu.hash) }
-  .dynsym         : { *(.dynsym) }
-  .dynstr         : { *(.dynstr) }
-  .gnu.version    : { *(.gnu.version) }
-  .gnu.version_d  : { *(.gnu.version_d) }
-  .gnu.version_r  : { *(.gnu.version_r) }
-  .rel.init       : { *(.rel.init) }
-  .rela.init      : { *(.rela.init) }
-  .rel.text       : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) }
-  .rela.text      : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
-  .rel.fini       : { *(.rel.fini) }
-  .rela.fini      : { *(.rela.fini) }
-  .rel.rodata     : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) }
-  .rela.rodata    : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
-  .rel.data       : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) }
-  .rela.data      : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
-  .rel.tdata	  : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) }
-  .rela.tdata	  : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
-  .rel.tbss	  : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) }
-  .rela.tbss	  : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
-  .rel.ctors      : { *(.rel.ctors) }
-  .rela.ctors     : { *(.rela.ctors) }
-  .rel.dtors      : { *(.rel.dtors) }
-  .rela.dtors     : { *(.rela.dtors) }
-  .rel.got        : { *(.rel.got) }
-  .rela.got       : { *(.rela.got) }
-  .rel.bss        : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) }
-  .rela.bss       : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
-  .rel.plt : {
-	*(.rel.plt)
-	PROVIDE_HIDDEN(__rel_iplt_start = .);
-	*(.rel.iplt)
-	PROVIDE_HIDDEN(__rel_iplt_end = .);
-  }
-  .rela.plt : {
-	*(.rela.plt)
-	PROVIDE_HIDDEN(__rela_iplt_start = .);
-	*(.rela.iplt)
-	PROVIDE_HIDDEN(__rela_iplt_end = .);
-  }
-  .init           : {
-    KEEP (*(.init))
-  } =0x90909090
-  .plt            : { *(.plt) }
-  .text           : {
-    _stext = .;
-    TEXT_TEXT
-    SCHED_TEXT
-    CPUIDLE_TEXT
-    LOCK_TEXT
-    IRQENTRY_TEXT
-    SOFTIRQENTRY_TEXT
-    *(.fixup)
-    *(.stub .text.* .gnu.linkonce.t.*)
-    /* .gnu.warning sections are handled specially by elf32.em.  */
-    *(.gnu.warning)
-
-    . = ALIGN(PAGE_SIZE);
-  } =0x90909090
-  . = ALIGN(PAGE_SIZE);
-  .syscall_stub : {
-	__syscall_stub_start = .;
-	*(.__syscall_stub*)
-	__syscall_stub_end = .;
-  }
-  .fini           : {
-    KEEP (*(.fini))
-  } =0x90909090
-
-  .kstrtab : { *(.kstrtab) }
-
-  #include <asm/common.lds.S>
-
-  __init_begin = .;
-  init.data : { INIT_DATA }
-  __init_end = .;
-
-  /* Ensure the __preinit_array_start label is properly aligned.  We
-     could instead move the label definition inside the section, but
-     the linker would then create the section even if it turns out to
-     be empty, which isn't pretty.  */
-  . = ALIGN(32 / 8);
-  .preinit_array     : { *(.preinit_array) }
-  .init_array     : { *(.init_array) }
-  .fini_array     : { *(.fini_array) }
-  .data           : {
-    INIT_TASK_DATA(KERNEL_STACK_SIZE)
-    . = ALIGN(KERNEL_STACK_SIZE);
-    *(.data..init_irqstack)
-    DATA_DATA
-    *(.data.* .gnu.linkonce.d.*)
-    SORT(CONSTRUCTORS)
-  }
-  .data1          : { *(.data1) }
-  .tdata	  : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
-  .tbss		  : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
-  .eh_frame       : { KEEP (*(.eh_frame)) }
-  .gcc_except_table   : { *(.gcc_except_table) }
-  .dynamic        : { *(.dynamic) }
-  .ctors          : {
-    /* gcc uses crtbegin.o to find the start of
-       the constructors, so we make sure it is
-       first.  Because this is a wildcard, it
-       doesn't matter if the user does not
-       actually link against crtbegin.o; the
-       linker won't look for a file to match a
-       wildcard.  The wildcard also means that it
-       doesn't matter which directory crtbegin.o
-       is in.  */
-    KEEP (*crtbegin.o(.ctors))
-    /* We don't want to include the .ctor section from
-       from the crtend.o file until after the sorted ctors.
-       The .ctor section from the crtend file contains the
-       end of ctors marker and it must be last */
-    KEEP (*(EXCLUDE_FILE (*crtend.o ) .ctors))
-    KEEP (*(SORT(.ctors.*)))
-    KEEP (*(.ctors))
-  }
-  .dtors          : {
-    KEEP (*crtbegin.o(.dtors))
-    KEEP (*(EXCLUDE_FILE (*crtend.o ) .dtors))
-    KEEP (*(SORT(.dtors.*)))
-    KEEP (*(.dtors))
-  }
-  .jcr            : { KEEP (*(.jcr)) }
-  .got            : { *(.got.plt) *(.got) }
-  _edata = .;
-  PROVIDE (edata = .);
-  .bss            : {
-   __bss_start = .;
-   *(.dynbss)
-   *(.bss .bss.* .gnu.linkonce.b.*)
-   *(COMMON)
-   /* Align here to ensure that the .bss section occupies space up to
-      _end.  Align after .bss to ensure correct alignment even if the
-      .bss section disappears because there are no input sections.  */
-   . = ALIGN(32 / 8);
-  . = ALIGN(32 / 8);
-  }
-   __bss_stop = .;
-  _end = .;
-  PROVIDE (end = .);
-
-  STABS_DEBUG
-
-  DWARF_DEBUG
-
-  DISCARDS
-}
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
deleted file mode 100644
index 9f21443be2c9e73f2fe0fbd089fc927760709d7e..0000000000000000000000000000000000000000
--- a/arch/um/kernel/uml.lds.S
+++ /dev/null
@@ -1,115 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/vmlinux.lds.h>
-#include <asm/page.h>
-
-OUTPUT_FORMAT(ELF_FORMAT)
-OUTPUT_ARCH(ELF_ARCH)
-ENTRY(_start)
-jiffies = jiffies_64;
-
-SECTIONS
-{
-  /* This must contain the right address - not quite the default ELF one.*/
-  PROVIDE (__executable_start = START);
-  /* Static binaries stick stuff here, like the sigreturn trampoline,
-   * invisibly to objdump.  So, just make __binary_start equal to the very
-   * beginning of the executable, and if there are unmapped pages after this,
-   * they are forever unusable.
-   */
-  __binary_start = START;
-
-  . = START + SIZEOF_HEADERS;
-
-  _text = .;
-  INIT_TEXT_SECTION(0)
-  . = ALIGN(PAGE_SIZE);
-
-  .text      :
-  {
-    _stext = .;
-    TEXT_TEXT
-    SCHED_TEXT
-    CPUIDLE_TEXT
-    LOCK_TEXT
-    IRQENTRY_TEXT
-    SOFTIRQENTRY_TEXT
-    *(.fixup)
-    /* .gnu.warning sections are handled specially by elf32.em.  */
-    *(.gnu.warning)
-    *(.gnu.linkonce.t*)
-  }
-
-  . = ALIGN(PAGE_SIZE);
-  .syscall_stub : {
-	__syscall_stub_start = .;
-	*(.__syscall_stub*)
-	__syscall_stub_end = .;
-  }
-
-  /*
-   * These are needed even in a static link, even if they wind up being empty.
-   * Newer glibc needs these __rel{,a}_iplt_{start,end} symbols.
-   */
-  .rel.plt : {
-	*(.rel.plt)
-	PROVIDE_HIDDEN(__rel_iplt_start = .);
-	*(.rel.iplt)
-	PROVIDE_HIDDEN(__rel_iplt_end = .);
-  }
-  .rela.plt : {
-	*(.rela.plt)
-	PROVIDE_HIDDEN(__rela_iplt_start = .);
-	*(.rela.iplt)
-	PROVIDE_HIDDEN(__rela_iplt_end = .);
-  }
-
-  #include <asm/common.lds.S>
-
-  __init_begin = .;
-  init.data : { INIT_DATA }
-  __init_end = .;
-
-  .data    :
-  {
-    INIT_TASK_DATA(KERNEL_STACK_SIZE)
-    . = ALIGN(KERNEL_STACK_SIZE);
-    *(.data..init_irqstack)
-    DATA_DATA
-    *(.gnu.linkonce.d*)
-    CONSTRUCTORS
-  }
-  .data1   : { *(.data1) }
-  .ctors         :
-  {
-    *(.ctors)
-  }
-  .dtors         :
-  {
-    *(.dtors)
-  }
-
-  .got           : { *(.got.plt) *(.got) }
-  .dynamic       : { *(.dynamic) }
-  .tdata	  : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
-  .tbss		  : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
-  /* We want the small data sections together, so single-instruction offsets
-     can access them all, and initialized data all before uninitialized, so
-     we can shorten the on-disk segment size.  */
-  .sdata     : { *(.sdata) }
-  _edata  =  .;
-  PROVIDE (edata = .);
-  . = ALIGN(PAGE_SIZE);
-  __bss_start = .;
-  PROVIDE(_bss_start = .);
-  SBSS(0)
-  BSS(0)
-   __bss_stop = .;
-  _end = .;
-  PROVIDE (end = .);
-
-  STABS_DEBUG
-
-  DWARF_DEBUG
-
-  DISCARDS
-}
diff --git a/arch/um/kernel/vmlinux.lds.S b/arch/um/kernel/vmlinux.lds.S
deleted file mode 100644
index 16e49bfa2b426c6a324b7102a877a0fb14e94a4e..0000000000000000000000000000000000000000
--- a/arch/um/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,8 +0,0 @@
-
-KERNEL_STACK_SIZE = 4096 * (1 << CONFIG_KERNEL_STACK_ORDER);
-
-#ifdef CONFIG_LD_SCRIPT_STATIC
-#include "uml.lds.S"
-#else
-#include "dyn.lds.S"
-#endif
diff --git a/arch/unicore32/boot/compressed/head.S b/arch/unicore32/boot/compressed/head.S
deleted file mode 100644
index 5f72662cd29480f8a43137184e21c2aef7952c4e..0000000000000000000000000000000000000000
--- a/arch/unicore32/boot/compressed/head.S
+++ /dev/null
@@ -1,201 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/boot/compressed/head.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/linkage.h>
-#include <mach/memory.h>
-
-#define csub	cmpsub
-#define cand	cmpand
-#define nop8	nop; nop; nop; nop; nop; nop; nop; nop
-
-		.section ".start", #alloc, #execinstr
-		.text
-start:
-		.type	start,#function
-
-		/* Initialize ASR, PRIV mode and INTR off */
-		mov	r0, #0xD3
-		mov.a	asr, r0
-
-		adr	r0, LC0
-		ldm	(r1, r2, r3, r5, r6, r7, r8), [r0]+
-		ldw	sp, [r0+], #28
-		sub.a	r0, r0, r1		@ calculate the delta offset
-
-		/*
-		 * if delta is zero, we are running at the address
-		 * we were linked at.
-		 */
-		beq	not_relocated
-
-		/*
-		 * We're running at a different address.  We need to fix
-		 * up various pointers:
-		 *   r5 - zImage base address (_start)
-		 *   r7 - GOT start
-		 *   r8 - GOT end
-		 */
-		add	r5, r5, r0
-		add	r7, r7, r0
-		add	r8, r8, r0
-
-		/*
-		 * we need to fix up pointers into the BSS region.
-		 *   r2 - BSS start
-		 *   r3 - BSS end
-		 *   sp - stack pointer
-		 */
-		add	r2, r2, r0
-		add	r3, r3, r0
-		add	sp, sp, r0
-
-		/*
-		 * Relocate all entries in the GOT table.
-		 * This fixes up the C references.
-		 *   r7 - GOT start
-		 *   r8 - GOT end
-		 */
-1001:		ldw	r1, [r7+], #0
-		add	r1, r1, r0
-		stw.w	r1, [r7]+, #4
-		csub.a	r7, r8
-		bub	1001b
-
-not_relocated:
-		/*
-		 * Clear BSS region.
-		 *   r2 - BSS start
-		 *   r3 - BSS end
-		 */
-		mov	r0, #0
-1002:		stw.w	r0, [r2]+, #4
-		csub.a	r2, r3
-		bub	1002b
-
-		/*
-		 * Turn on the cache.
-		 */
-                mov     r0, #0
-                movc    p0.c5, r0, #28		@ cache invalidate all
-                nop8
-                movc    p0.c6, r0, #6		@ tlb invalidate all
-                nop8
-
-                mov     r0, #0x1c		@ en icache and wb dcache
-                movc    p0.c1, r0, #0
-                nop8
-
-		/*
-		 * Set up some pointers, for starting decompressing.
-		 */
-
-		mov	r1, sp			@ malloc space above stack
-		add	r2, sp, #0x10000	@ 64k max
-
-		/*
-		 * Check to see if we will overwrite ourselves.
-		 *   r4 = final kernel address
-		 *   r5 = start of this image
-		 *   r6 = size of decompressed image
-		 *   r2 = end of malloc space (and therefore this image)
-		 * We basically want:
-		 *   r4 >= r2 -> OK
-		 *   r4 + image length <= r5 -> OK
-		 */
-		ldw	r4, =KERNEL_IMAGE_START
-		csub.a	r4, r2
-		bea	wont_overwrite
-		add	r0, r4, r6
-		csub.a	r0, r5
-		beb	wont_overwrite
-
-		/*
-		 * If overwrite, just print error message
-		 */
-		b	__error_overwrite
-
-		/*
-		 * We're not in danger of overwriting ourselves.
-		 * Do this the simple way.
-		 */
-wont_overwrite:
-		/*
-		 * decompress_kernel:
-		 *   r0: output_start
-		 *   r1: free_mem_ptr_p
-		 *   r2: free_mem_ptr_end_p
-		 */
-		mov	r0, r4
-		b.l	decompress_kernel	@ C functions
-
-		/*
-		 * Clean and flush the cache to maintain consistency.
-		 */
-		mov	r0, #0
-                movc    p0.c5, r0, #14		@ flush dcache
-		nop8
-                movc    p0.c5, r0, #20		@ icache invalidate all
-                nop8
-
-		/*
-		 * Turn off the Cache and MMU.
-		 */
-		mov	r0, #0			@ disable i/d cache and MMU
-		movc	p0.c1, r0, #0
-                nop8
-
-		mov	r0, #0			@ must be zero
-		ldw	r4, =KERNEL_IMAGE_START
-		mov	pc, r4			@ call kernel
-
-
-		.align	2
-		.type	LC0, #object
-LC0:		.word	LC0			@ r1
-		.word	__bss_start		@ r2
-		.word	_end			@ r3
-		.word	_start			@ r5
-		.word	_image_size		@ r6
-		.word	_got_start		@ r7
-		.word	_got_end		@ r8
-		.word	decompress_stack_end	@ sp
-		.size	LC0, . - LC0
-
-print_string:
-#ifdef CONFIG_DEBUG_OCD
-2001:		ldb.w	r1, [r0]+, #1
-		csub.a	r1, #0
-		bne	2002f
-		mov	pc, lr
-2002:
-		movc	r2, p1.c0, #0
-		cand.a	r2, #2
-		bne	2002b
-		movc	p1.c1, r1, #1
-		csub.a	r1, #'\n'
-		cmoveq	r1, #'\r'
-		beq	2002b
-		b	2001b
-#else
-		mov	pc, lr
-#endif
-
-__error_overwrite:
-		adr	r0, str_error
-		b.l	print_string
-2001:		nop8
-		b	2001b
-str_error:	.asciz	"\nError: Kernel address OVERWRITE\n"
-		.align
-
-		.ltorg
-
-		.align	4
-		.section ".stack", "aw", %nobits
-decompress_stack:	.space	4096
-decompress_stack_end:
diff --git a/arch/unicore32/boot/compressed/vmlinux.lds.S b/arch/unicore32/boot/compressed/vmlinux.lds.S
deleted file mode 100644
index edda4ddfa35720fb2f2594abccd3cddede4a14c0..0000000000000000000000000000000000000000
--- a/arch/unicore32/boot/compressed/vmlinux.lds.S
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore/boot/compressed/vmlinux.lds.in
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-OUTPUT_ARCH(unicore32)
-ENTRY(_start)
-SECTIONS
-{
-  /DISCARD/ : {
-    /*
-     * Discard any r/w data - this produces a link error if we have any,
-     * which is required for PIC decompression.  Local data generates
-     * GOTOFF relocations, which prevents it being relocated independently
-     * of the text/got segments.
-     */
-    *(.data)
-  }
-
-  . = TEXT_START;
-  _text = .;
-
-  .text : {
-    _start = .;
-    *(.start)
-    *(.text)
-    *(.text.*)
-    *(.fixup)
-    *(.gnu.warning)
-    *(.rodata)
-    *(.rodata.*)
-    *(.piggydata)
-    . = ALIGN(4);
-  }
-
-  _etext = .;
-
-  /* Assume size of decompressed image is 4x the compressed image */
-  _image_size = (_etext - _text) * 4;
-
-  _got_start = .;
-  .got			: { *(.got) }
-  _got_end = .;
-  .got.plt		: { *(.got.plt) }
-  _edata = .;
-
-  . = BSS_START;
-  __bss_start = .;
-  .bss			: { *(.bss) }
-  _end = .;
-
-  .stack		: { *(.stack) }
-  .comment 0		: { *(.comment) }
-}
-
diff --git a/arch/unicore32/kernel/debug-macro.S b/arch/unicore32/kernel/debug-macro.S
deleted file mode 100644
index 7e2da0de4f7150511ef3fdc4e27e611ba76c9cc6..0000000000000000000000000000000000000000
--- a/arch/unicore32/kernel/debug-macro.S
+++ /dev/null
@@ -1,86 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/kernel/debug-macro.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * Debugging macro include header
- */
-#include <generated/asm-offsets.h>
-#include <mach/hardware.h>
-
-		.macro	put_word_ocd, rd, rx=r16
-1001:		movc		\rx, p1.c0, #0
-		cand.a	\rx, #2
-		bne	1001b
-		movc		p1.c1, \rd, #1
-		.endm
-
-#ifdef CONFIG_DEBUG_OCD
-		/* debug using UniCore On-Chip-Debugger */
-		.macro	addruart, rx
-		.endm
-
-		.macro	senduart, rd, rx
-		put_word_ocd	\rd, \rx
-		.endm
-
-		.macro	busyuart, rd, rx
-		.endm
-
-		.macro	waituart, rd, rx
-		.endm
-#else
-#define UART_CLK_DEFAULT        3686400 * 20
-	/* Uartclk = MCLK/ 2, The MCLK on my board is 3686400 * 40  */
-#define BAUD_RATE_DEFAULT	115200
-	/* The baud rate of the serial port */
-
-#define UART_DIVISOR_DEFAULT	(UART_CLK_DEFAULT \
-				/ (16 * BAUD_RATE_DEFAULT) - 1)
-
-		.macro	addruart,rx
-		mrc	p0, #0, \rx, c1, c0
-		tst	\rx, #1			@ MMU enabled?
-		moveq	\rx, #0xee000000	@ physical base address
-		movne	\rx, #0x6e000000	@ virtual address
-
-		@ We probe for the active serial port here
-		@ However, now we assume UART0 is active:	epip4d
-		@ We assume r1 and r2 can be clobbered.
-
-		movl 	r2, #UART_DIVISOR_DEFAULT
-		mov 	r1, #0x80
-		str	r1, [\rx, #UART_LCR_OFFSET]
-		and	r1, r2, #0xff00
-		mov	r1, r1, lsr #8
-		str	r1, [\rx, #UART_DLH_OFFSET]
-		and	r1, r2, #0xff
-		str	r1, [\rx, #UART_DLL_OFFSET]
-		mov 	r1, #0x7
-		str	r1, [\rx, #UART_FCR_OFFSET]
-		mov 	r1, #0x3
-		str	r1, [\rx, #UART_LCR_OFFSET]
-		mov 	r1, #0x0
-		str	r1, [\rx, #UART_IER_OFFSET]
-		.endm
-
-		.macro	senduart,rd,rx
-		str	\rd, [\rx, #UART_THR_OFFSET]
-		.endm
-
-		.macro	waituart,rd,rx
-1001:		ldr	\rd, [\rx, #UART_LSR_OFFSET]
-		tst	\rd, #UART_LSR_THRE
-		beq	1001b
-		.endm
-
-		.macro	busyuart,rd,rx
-1001:		ldr	\rd, [\rx, #UART_LSR_OFFSET]
-		tst	\rd, #UART_LSR_TEMT
-		bne	1001b
-		.endm
-#endif
-
diff --git a/arch/unicore32/kernel/debug.S b/arch/unicore32/kernel/debug.S
deleted file mode 100644
index 13bc8c8550e4a3252edef95c69bbbe2b1a02223a..0000000000000000000000000000000000000000
--- a/arch/unicore32/kernel/debug.S
+++ /dev/null
@@ -1,82 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/kernel/debug.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- *  32-bit debugging code
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-		.text
-
-/*
- * Some debugging routines (useful if you've got MM problems and
- * printk isn't working).  For DEBUGGING ONLY!!!  Do not leave
- * references to these in a production kernel!
- */
-#include "debug-macro.S"
-
-/*
- * Useful debugging routines
- */
-ENTRY(printhex8)
-		mov	r1, #8
-		b	printhex
-ENDPROC(printhex8)
-
-ENTRY(printhex4)
-		mov	r1, #4
-		b	printhex
-ENDPROC(printhex4)
-
-ENTRY(printhex2)
-		mov	r1, #2
-printhex:	adr	r2, hexbuf
-		add	r3, r2, r1
-		mov	r1, #0
-		stb	r1, [r3]
-1:		and	r1, r0, #15
-		mov	r0, r0 >> #4
-		csub.a	r1, #10
-		beg	2f
-		add	r1, r1, #'0' - 'a' + 10
-2:		add	r1, r1, #'a' - 10
-		stb.w	r1, [r3+], #-1
-		cxor.a	r3, r2
-		bne	1b
-		mov	r0, r2
-		b	printascii
-ENDPROC(printhex2)
-
-		.ltorg
-
-ENTRY(printascii)
-		addruart r3
-		b	2f
-1:		waituart r2, r3
-		senduart r1, r3
-		busyuart r2, r3
-		cxor.a	r1, #'\n'
-		cmoveq	r1, #'\r'
-		beq	1b
-2:		cxor.a	r0, #0
-		beq	3f
-		ldb.w	r1, [r0]+, #1
-		cxor.a	r1, #0
-		bne	1b
-3:		mov	pc, lr
-ENDPROC(printascii)
-
-ENTRY(printch)
-		addruart r3
-		mov	r1, r0
-		mov	r0, #0
-		b	1b
-ENDPROC(printch)
-
-hexbuf:		.space 16
-
diff --git a/arch/unicore32/kernel/entry.S b/arch/unicore32/kernel/entry.S
deleted file mode 100644
index b35dc83069cb7489373375c0f0d98567f983b536..0000000000000000000000000000000000000000
--- a/arch/unicore32/kernel/entry.S
+++ /dev/null
@@ -1,802 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/kernel/entry.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- *  Low-level vector interface routines
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-#include <asm/thread_info.h>
-#include <asm/memory.h>
-#include <asm/unistd.h>
-#include <generated/asm-offsets.h>
-#include "debug-macro.S"
-
-@
-@ Most of the stack format comes from struct pt_regs, but with
-@ the addition of 8 bytes for storing syscall args 5 and 6.
-@
-#define S_OFF		8
-
-/*
- * The SWI code relies on the fact that R0 is at the bottom of the stack
- * (due to slow/fast restore user regs).
- */
-#if S_R0 != 0
-#error "Please fix"
-#endif
-
-	.macro	zero_fp
-#ifdef CONFIG_FRAME_POINTER
-	mov	fp, #0
-#endif
-	.endm
-
-	.macro	alignment_trap, rtemp
-#ifdef CONFIG_ALIGNMENT_TRAP
-	ldw	\rtemp, .LCcralign
-	ldw	\rtemp, [\rtemp]
-	movc	p0.c1, \rtemp, #0
-#endif
-	.endm
-
-	.macro	load_user_sp_lr, rd, rtemp, offset = 0
-	mov	\rtemp, asr
-	xor	\rtemp, \rtemp, #(PRIV_MODE ^ SUSR_MODE)
-	mov.a	asr, \rtemp			@ switch to the SUSR mode
-
-	ldw	sp, [\rd+], #\offset		@ load sp_user
-	ldw	lr, [\rd+], #\offset + 4	@ load lr_user
-
-	xor	\rtemp, \rtemp, #(PRIV_MODE ^ SUSR_MODE)
-	mov.a	asr, \rtemp			@ switch back to the PRIV mode
-	.endm
-
-	.macro	priv_exit, rpsr
-	mov.a	bsr, \rpsr
-	ldm.w	(r0 - r15), [sp]+
-	ldm.b	(r16 - pc), [sp]+		@ load r0 - pc, asr
-	.endm
-
-	.macro	restore_user_regs, fast = 0, offset = 0
-	ldw	r1, [sp+], #\offset + S_PSR	@ get calling asr
-	ldw	lr, [sp+], #\offset + S_PC	@ get pc
-	mov.a	bsr, r1				@ save in bsr_priv
-	.if	\fast
-	add	sp, sp, #\offset + S_R1		@ r0 is syscall return value
-	ldm.w	(r1 - r15), [sp]+		@ get calling r1 - r15
-	ldur	(r16 - lr), [sp]+		@ get calling r16 - lr
-	.else
-	ldm.w	(r0 - r15), [sp]+		@ get calling r0 - r15
-	ldur	(r16 - lr), [sp]+		@ get calling r16 - lr
-	.endif
-	nop
-	add	sp, sp, #S_FRAME_SIZE - S_R16
-	mov.a	pc, lr				@ return
-						@ and move bsr_priv into asr
-	.endm
-
-	.macro	get_thread_info, rd
-	mov	\rd, sp >> #13
-	mov	\rd, \rd << #13
-	.endm
-
-	.macro	get_irqnr_and_base, irqnr, irqstat, base, tmp
-	ldw	\base, =(PKUNITY_INTC_BASE)
-	ldw	\irqstat, [\base+], #0xC	@ INTC_ICIP
-	ldw	\tmp,	  [\base+], #0x4	@ INTC_ICMR
-	and.a	\irqstat, \irqstat, \tmp
-	beq	1001f
-	cntlz	\irqnr, \irqstat
-	rsub	\irqnr, \irqnr, #31
-1001:	/* EQ will be set if no irqs pending */
-	.endm
-
-#ifdef CONFIG_DEBUG_LL
-	.macro	printreg, reg, temp
-		adr	\temp, 901f
-		stm	(r0-r3), [\temp]+
-		stw	lr, [\temp+], #0x10
-		mov	r0, \reg
-		b.l	printhex8
-		mov	r0, #':'
-		b.l	printch
-		mov	r0, pc
-		b.l	printhex8
-		adr	r0, 902f
-		b.l	printascii
-		adr	\temp, 901f
-		ldm	(r0-r3), [\temp]+
-		ldw	lr, [\temp+], #0x10
-		b	903f
-901:	.word	0, 0, 0, 0, 0	@ r0-r3, lr
-902:	.asciz	": epip4d\n"
-	.align
-903:
-	.endm
-#endif
-
-/*
- * These are the registers used in the syscall handler, and allow us to
- * have in theory up to 7 arguments to a function - r0 to r6.
- *
- * Note that tbl == why is intentional.
- *
- * We must set at least "tsk" and "why" when calling ret_with_reschedule.
- */
-scno	.req	r21		@ syscall number
-tbl	.req	r22		@ syscall table pointer
-why	.req	r22		@ Linux syscall (!= 0)
-tsk	.req	r23		@ current thread_info
-
-/*
- * Interrupt handling.  Preserves r17, r18, r19
- */
-	.macro	intr_handler
-1:	get_irqnr_and_base r0, r6, r5, lr
-	beq	2f
-	mov	r1, sp
-	@
-	@ routine called with r0 = irq number, r1 = struct pt_regs *
-	@
-	adr	lr, 1b
-	b	asm_do_IRQ
-2:
-	.endm
-
-/*
- * PRIV mode handlers
- */
-	.macro	priv_entry
-	sub	sp, sp, #(S_FRAME_SIZE - 4)
-	stm	(r1 - r15), [sp]+
-	add	r5, sp, #S_R15
-	stm	(r16 - r28), [r5]+
-
-	ldm	(r1 - r3), [r0]+
-	add	r5, sp, #S_SP - 4	@ here for interlock avoidance
-	mov	r4, #-1			@  ""  ""      ""       ""
-	add	r0, sp, #(S_FRAME_SIZE - 4)
-	stw.w	r1, [sp+], #-4		@ save the "real" r0 copied
-					@ from the exception stack
-
-	mov	r1, lr
-
-	@
-	@ We are now ready to fill in the remaining blanks on the stack:
-	@
-	@  r0 - sp_priv
-	@  r1 - lr_priv
-	@  r2 - lr_<exception>, already fixed up for correct return/restart
-	@  r3 - bsr_<exception>
-	@  r4 - orig_r0 (see pt_regs definition in ptrace.h)
-	@
-	stm	(r0 - r4), [r5]+
-	.endm
-
-/*
- * User mode handlers
- *
- */
-	.macro	user_entry
-	sub	sp, sp, #S_FRAME_SIZE
-	stm	(r1 - r15), [sp+]
-	add	r4, sp, #S_R16
-	stm	(r16 - r28), [r4]+
-
-	ldm	(r1 - r3), [r0]+
-	add	r0, sp, #S_PC		@ here for interlock avoidance
-	mov	r4, #-1			@  ""  ""     ""        ""
-
-	stw	r1, [sp]		@ save the "real" r0 copied
-					@ from the exception stack
-
-	@
-	@ We are now ready to fill in the remaining blanks on the stack:
-	@
-	@  r2 - lr_<exception>, already fixed up for correct return/restart
-	@  r3 - bsr_<exception>
-	@  r4 - orig_r0 (see pt_regs definition in ptrace.h)
-	@
-	@ Also, separately save sp_user and lr_user
-	@
-	stm	(r2 - r4), [r0]+
-	stur	(sp, lr), [r0-]
-
-	@
-	@ Enable the alignment trap while in kernel mode
-	@
-	alignment_trap r0
-
-	@
-	@ Clear FP to mark the first stack frame
-	@
-	zero_fp
-	.endm
-
-	.text
-
-@
-@ __invalid - generic code for failed exception
-@			(re-entrant version of handlers)
-@
-__invalid:
-	sub	sp, sp, #S_FRAME_SIZE
-	stm	(r1 - r15), [sp+]
-	add	r1, sp, #S_R16
-	stm	(r16 - r28, sp, lr), [r1]+
-
-	zero_fp
-
-	ldm	(r4 - r6), [r0]+
-	add	r0, sp, #S_PC		@ here for interlock avoidance
-	mov	r7, #-1			@  ""   ""    ""        ""
-	stw	r4, [sp]		@ save preserved r0
-	stm	(r5 - r7), [r0]+	@ lr_<exception>,
-					@ asr_<exception>, "old_r0"
-
-	mov	r0, sp
-	mov	r1, asr
-	b	bad_mode
-ENDPROC(__invalid)
-
-	.align	5
-__dabt_priv:
-	priv_entry
-
-	@
-	@ get ready to re-enable interrupts if appropriate
-	@
-	mov	r17, asr
-	cand.a	r3, #PSR_I_BIT
-	bne	1f
-	andn	r17, r17, #PSR_I_BIT
-1:
-
-	@
-	@ Call the processor-specific abort handler:
-	@
-	@  r2 - aborted context pc
-	@  r3 - aborted context asr
-	@
-	@ The abort handler must return the aborted address in r0, and
-	@ the fault status register in r1.
-	@
-	movc	r1, p0.c3, #0		@ get FSR
-	movc	r0, p0.c4, #0		@ get FAR
-
-	@
-	@ set desired INTR state, then call main handler
-	@
-	mov.a	asr, r17
-	mov	r2, sp
-	b.l	do_DataAbort
-
-	@
-	@ INTRs off again before pulling preserved data off the stack
-	@
-	disable_irq r0
-
-	@
-	@ restore BSR and restart the instruction
-	@
-	ldw	r2, [sp+], #S_PSR
-	priv_exit r2				@ return from exception
-ENDPROC(__dabt_priv)
-
-	.align	5
-__intr_priv:
-	priv_entry
-
-	intr_handler
-
-	mov	r0, #0				@ epip4d
-	movc	p0.c5, r0, #14
-	nop; nop; nop; nop; nop; nop; nop; nop
-
-	ldw	r4, [sp+], #S_PSR		@ irqs are already disabled
-
-	priv_exit r4				@ return from exception
-ENDPROC(__intr_priv)
-
-	.ltorg
-
-	.align	5
-__extn_priv:
-	priv_entry
-
-	mov	r0, sp				@ struct pt_regs *regs
-	mov	r1, asr
-	b	bad_mode			@ not supported
-ENDPROC(__extn_priv)
-
-	.align	5
-__pabt_priv:
-	priv_entry
-
-	@
-	@ re-enable interrupts if appropriate
-	@
-	mov	r17, asr
-	cand.a	r3, #PSR_I_BIT
-	bne	1f
-	andn	r17, r17, #PSR_I_BIT
-1:
-
-	@
-	@ set args, then call main handler
-	@
-	@  r0 - address of faulting instruction
-	@  r1 - pointer to registers on stack
-	@
-	mov	r0, r2			@ pass address of aborted instruction
-	mov	r1, #5
-	mov.a	asr, r17
-	mov	r2, sp			@ regs
-	b.l	do_PrefetchAbort	@ call abort handler
-
-	@
-	@ INTRs off again before pulling preserved data off the stack
-	@
-	disable_irq r0
-
-	@
-	@ restore BSR and restart the instruction
-	@
-	ldw	r2, [sp+], #S_PSR
-	priv_exit r2			@ return from exception
-ENDPROC(__pabt_priv)
-
-	.align	5
-.LCcralign:
-	.word	cr_alignment
-
-	.align	5
-__dabt_user:
-	user_entry
-
-#ifdef CONFIG_UNICORE_FPU_F64
-	cff	ip, s31
-	cand.a	ip, #0x08000000		@ FPU execption traps?
-	beq	209f
-
-	ldw	ip, [sp+], #S_PC
-	add	ip, ip, #4
-	stw	ip, [sp+], #S_PC
-	@
-	@ fall through to the emulation code, which returns using r19 if
-	@ it has emulated the instruction, or the more conventional lr
-	@ if we are to treat this as a real extended instruction
-	@
-	@  r0 - instruction
-	@
-1:	ldw.u	r0, [r2]
-	adr	r19, ret_from_exception
-	adr	lr, 209f
-	@
-	@ fallthrough to call do_uc_f64
-	@
-/*
- * Check whether the instruction is a co-processor instruction.
- * If yes, we need to call the relevant co-processor handler.
- *
- * Note that we don't do a full check here for the co-processor
- * instructions; all instructions with bit 27 set are well
- * defined.  The only instructions that should fault are the
- * co-processor instructions.
- *
- * Emulators may wish to make use of the following registers:
- *  r0  = instruction opcode.
- *  r2  = PC
- *  r19 = normal "successful" return address
- *  r20 = this threads thread_info structure.
- *  lr  = unrecognised instruction return address
- */
-	get_thread_info r20			@ get current thread
-	and	r8, r0, #0x00003c00		@ mask out CP number
-	mov	r7, #1
-	stb	r7, [r20+], #TI_USED_CP + 2	@ set appropriate used_cp[]
-
-	@ F64 hardware support entry point.
-	@  r0  = faulted instruction
-	@  r19 = return address
-	@  r20 = fp_state
-	enable_irq r4
-	add	r20, r20, #TI_FPSTATE	@ r20 = workspace
-	cff	r1, s31			@ get fpu FPSCR
-	andn    r2, r1, #0x08000000
-	ctf     r2, s31			@ clear 27 bit
-	mov	r2, sp			@ nothing stacked - regdump is at TOS
-	mov	lr, r19			@ setup for a return to the user code
-
-	@ Now call the C code to package up the bounce to the support code
-	@   r0 holds the trigger instruction
-	@   r1 holds the FPSCR value
-	@   r2 pointer to register dump
-	b	ucf64_exchandler
-209:
-#endif
-	@
-	@ Call the processor-specific abort handler:
-	@
-	@  r2 - aborted context pc
-	@  r3 - aborted context asr
-	@
-	@ The abort handler must return the aborted address in r0, and
-	@ the fault status register in r1.
-	@
-	movc	r1, p0.c3, #0		@ get FSR
-	movc	r0, p0.c4, #0		@ get FAR
-
-	@
-	@ INTRs on, then call the main handler
-	@
-	enable_irq r2
-	mov	r2, sp
-	adr	lr, ret_from_exception
-	b	do_DataAbort
-ENDPROC(__dabt_user)
-
-	.align	5
-__intr_user:
-	user_entry
-
-	get_thread_info tsk
-
-	intr_handler
-
-	mov	why, #0
-	b	ret_to_user
-ENDPROC(__intr_user)
-
-	.ltorg
-
-	.align	5
-__extn_user:
-	user_entry
-
-	mov	r0, sp
-	mov	r1, asr
-	b	bad_mode
-ENDPROC(__extn_user)
-
-	.align	5
-__pabt_user:
-	user_entry
-
-	mov	r0, r2			@ pass address of aborted instruction.
-	mov	r1, #5
-	enable_irq r1			@ Enable interrupts
-	mov	r2, sp			@ regs
-	b.l	do_PrefetchAbort	@ call abort handler
-	/* fall through */
-/*
- * This is the return code to user mode for abort handlers
- */
-ENTRY(ret_from_exception)
-	get_thread_info tsk
-	mov	why, #0
-	b	ret_to_user
-ENDPROC(__pabt_user)
-ENDPROC(ret_from_exception)
-
-/*
- * Register switch for UniCore V2 processors
- * r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info
- * previous and next are guaranteed not to be the same.
- */
-ENTRY(__switch_to)
-	add	ip, r1, #TI_CPU_SAVE
-	stm.w	(r4 - r15), [ip]+
-	stm.w	(r16 - r27, sp, lr), [ip]+
-
-#ifdef	CONFIG_UNICORE_FPU_F64
-	add	ip, r1, #TI_FPSTATE
-	sfm.w	(f0  - f7 ), [ip]+
-	sfm.w	(f8  - f15), [ip]+
-	sfm.w	(f16 - f23), [ip]+
-	sfm.w	(f24 - f31), [ip]+
-	cff	r4, s31
-	stw	r4, [ip]
-
-	add	ip, r2, #TI_FPSTATE
-	lfm.w	(f0  - f7 ), [ip]+
-	lfm.w	(f8  - f15), [ip]+
-	lfm.w	(f16 - f23), [ip]+
-	lfm.w	(f24 - f31), [ip]+
-	ldw	r4, [ip]
-	ctf	r4, s31
-#endif
-	add	ip, r2, #TI_CPU_SAVE
-	ldm.w	(r4 - r15), [ip]+
-	ldm	(r16 - r27, sp, pc), [ip]+	@ Load all regs saved previously
-ENDPROC(__switch_to)
-
-	.align	5
-/*
- * This is the fast syscall return path.  We do as little as
- * possible here, and this includes saving r0 back into the PRIV
- * stack.
- */
-ret_fast_syscall:
-	disable_irq r1				@ disable interrupts
-	ldw	r1, [tsk+], #TI_FLAGS
-	cand.a	r1, #_TIF_WORK_MASK
-	bne	fast_work_pending
-
-	@ fast_restore_user_regs
-	restore_user_regs fast = 1, offset = S_OFF
-
-/*
- * Ok, we need to do extra processing, enter the slow path.
- */
-fast_work_pending:
-	stw.w	r0, [sp+], #S_R0+S_OFF		@ returned r0
-work_pending:
-	cand.a	r1, #_TIF_NEED_RESCHED
-	bne	work_resched
-	mov	r0, sp				@ 'regs'
-	mov	r2, why				@ 'syscall'
-	cand.a	r1, #_TIF_SIGPENDING		@ delivering a signal?
-	cmovne	why, #0				@ prevent further restarts
-	b.l	do_notify_resume
-	b	ret_slow_syscall		@ Check work again
-
-work_resched:
-	b.l	schedule
-/*
- * "slow" syscall return path.  "why" tells us if this was a real syscall.
- */
-ENTRY(ret_to_user)
-ret_slow_syscall:
-	disable_irq r1				@ disable interrupts
-	get_thread_info tsk			@ epip4d, one path error?!
-	ldw	r1, [tsk+], #TI_FLAGS
-	cand.a	r1, #_TIF_WORK_MASK
-	bne	work_pending
-no_work_pending:
-	@ slow_restore_user_regs
-	restore_user_regs fast = 0, offset = 0
-ENDPROC(ret_to_user)
-
-/*
- * This is how we return from a fork.
- */
-ENTRY(ret_from_fork)
-	b.l	schedule_tail
-	b	ret_slow_syscall
-ENDPROC(ret_from_fork)
-
-ENTRY(ret_from_kernel_thread)
-	b.l	schedule_tail
-	mov	r0, r5
-	adr	lr, ret_slow_syscall
-	mov	pc, r4
-ENDPROC(ret_from_kernel_thread)
-
-/*=============================================================================
- * SWI handler
- *-----------------------------------------------------------------------------
- */
-	.align	5
-ENTRY(vector_swi)
-	sub	sp, sp, #S_FRAME_SIZE
-	stm	(r0 - r15), [sp]+		@ Calling r0 - r15
-	add	r8, sp, #S_R16
-	stm	(r16 - r28), [r8]+		@ Calling r16 - r28
-	add	r8, sp, #S_PC
-	stur	(sp, lr), [r8-]			@ Calling sp, lr
-	mov	r8, bsr				@ called from non-REAL mode
-	stw	lr, [sp+], #S_PC		@ Save calling PC
-	stw	r8, [sp+], #S_PSR		@ Save ASR
-	stw	r0, [sp+], #S_OLD_R0		@ Save OLD_R0
-	zero_fp
-
-	/*
-	 * Get the system call number.
-	 */
-	sub	ip, lr, #4
-	ldw.u	scno, [ip]			@ get SWI instruction
-
-#ifdef CONFIG_ALIGNMENT_TRAP
-	ldw	ip, __cr_alignment
-	ldw	ip, [ip]
-	movc	p0.c1, ip, #0                   @ update control register
-#endif
-	enable_irq ip
-
-	get_thread_info tsk
-	ldw	tbl, =sys_call_table		@ load syscall table pointer
-
-	andn	scno, scno, #0xff000000		@ mask off SWI op-code
-	andn	scno, scno, #0x00ff0000		@ mask off SWI op-code
-
-	stm.w	(r4, r5), [sp-]			@ push fifth and sixth args
-	ldw	ip, [tsk+], #TI_FLAGS		@ check for syscall tracing
-	cand.a	ip, #_TIF_SYSCALL_TRACE		@ are we tracing syscalls?
-	bne	__sys_trace
-
-	csub.a	scno, #__NR_syscalls		@ check upper syscall limit
-	adr	lr, ret_fast_syscall		@ return address
-	bea	1f
-	ldw	pc, [tbl+], scno << #2		@ call sys_* routine
-1:
-	add	r1, sp, #S_OFF
-2:	mov	why, #0				@ no longer a real syscall
-	b	sys_ni_syscall			@ not private func
-
-	/*
-	 * This is the really slow path.  We're going to be doing
-	 * context switches, and waiting for our parent to respond.
-	 */
-__sys_trace:
-	mov	r2, scno
-	add	r1, sp, #S_OFF
-	mov	r0, #0				@ trace entry [IP = 0]
-	b.l	syscall_trace
-
-	adr	lr, __sys_trace_return		@ return address
-	mov	scno, r0			@ syscall number (possibly new)
-	add	r1, sp, #S_R0 + S_OFF		@ pointer to regs
-	csub.a	scno, #__NR_syscalls		@ check upper syscall limit
-	bea	2b
-	ldm	(r0 - r3), [r1]+		@ have to reload r0 - r3
-	ldw	pc, [tbl+], scno << #2		@ call sys_* routine
-
-__sys_trace_return:
-	stw.w	r0, [sp+], #S_R0 + S_OFF	@ save returned r0
-	mov	r2, scno
-	mov	r1, sp
-	mov	r0, #1				@ trace exit [IP = 1]
-	b.l	syscall_trace
-	b	ret_slow_syscall
-
-	.align	5
-#ifdef CONFIG_ALIGNMENT_TRAP
-	.type	__cr_alignment, #object
-__cr_alignment:
-	.word	cr_alignment
-#endif
-	.ltorg
-
-ENTRY(sys_rt_sigreturn)
-		add	r0, sp, #S_OFF
-		mov	why, #0		@ prevent syscall restart handling
-		b	__sys_rt_sigreturn
-ENDPROC(sys_rt_sigreturn)
-
-	__INIT
-
-/*
- * Vector stubs.
- *
- * This code is copied to 0xffff0200 so we can use branches in the
- * vectors, rather than ldr's.  Note that this code must not
- * exceed 0x300 bytes.
- *
- * Common stub entry macro:
- *   Enter in INTR mode, bsr = PRIV/USER ASR, lr = PRIV/USER PC
- *
- * SP points to a minimal amount of processor-private memory, the address
- * of which is copied into r0 for the mode specific abort handler.
- */
-	.macro	vector_stub, name, mode
-	.align	5
-
-vector_\name:
-	@
-	@ Save r0, lr_<exception> (parent PC) and bsr_<exception>
-	@ (parent ASR)
-	@
-	stw	r0, [sp]
-	stw	lr, [sp+], #4		@ save r0, lr
-	mov	lr, bsr
-	stw	lr, [sp+], #8		@ save bsr
-
-	@
-	@ Prepare for PRIV mode.  INTRs remain disabled.
-	@
-	mov	r0, asr
-	xor	r0, r0, #(\mode ^ PRIV_MODE)
-	mov.a	bsr, r0
-
-	@
-	@ the branch table must immediately follow this code
-	@
-	and	lr, lr, #0x03
-	add	lr, lr, #1
-	mov	r0, sp
-	ldw	lr, [pc+], lr << #2
-	mov.a	pc, lr			@ branch to handler in PRIV mode
-ENDPROC(vector_\name)
-	.align	2
-	@ handler addresses follow this label
-	.endm
-
-	.globl	__stubs_start
-__stubs_start:
-/*
- * Interrupt dispatcher
- */
-	vector_stub	intr, INTR_MODE
-
-	.long	__intr_user			@  0  (USER)
-	.long	__invalid			@  1
-	.long	__invalid			@  2
-	.long	__intr_priv			@  3  (PRIV)
-
-/*
- * Data abort dispatcher
- * Enter in ABT mode, bsr = USER ASR, lr = USER PC
- */
-	vector_stub	dabt, ABRT_MODE
-
-	.long	__dabt_user			@  0  (USER)
-	.long	__invalid			@  1
-	.long	__invalid			@  2  (INTR)
-	.long	__dabt_priv			@  3  (PRIV)
-
-/*
- * Prefetch abort dispatcher
- * Enter in ABT mode, bsr = USER ASR, lr = USER PC
- */
-	vector_stub	pabt, ABRT_MODE
-
-	.long	__pabt_user			@  0 (USER)
-	.long	__invalid			@  1
-	.long	__invalid			@  2 (INTR)
-	.long	__pabt_priv			@  3 (PRIV)
-
-/*
- * Undef instr entry dispatcher
- * Enter in EXTN mode, bsr = PRIV/USER ASR, lr = PRIV/USER PC
- */
-	vector_stub	extn, EXTN_MODE
-
-	.long	__extn_user			@  0 (USER)
-	.long	__invalid			@  1
-	.long	__invalid			@  2 (INTR)
-	.long	__extn_priv			@  3 (PRIV)
-
-/*
- * We group all the following data together to optimise
- * for CPUs with separate I & D caches.
- */
-	.align	5
-
-.LCvswi:
-	.word	vector_swi
-
-	.globl	__stubs_end
-__stubs_end:
-
-	.equ	stubs_offset, __vectors_start + 0x200 - __stubs_start
-
-	.globl	__vectors_start
-__vectors_start:
-	jepriv	SYS_ERROR0
-	b	vector_extn + stubs_offset
-	ldw	pc, .LCvswi + stubs_offset
-	b	vector_pabt + stubs_offset
-	b	vector_dabt + stubs_offset
-	jepriv	SYS_ERROR0
-	b	vector_intr + stubs_offset
-	jepriv	SYS_ERROR0
-
-	.globl	__vectors_end
-__vectors_end:
-
-	.data
-
-	.globl	cr_alignment
-	.globl	cr_no_alignment
-cr_alignment:
-	.space	4
-cr_no_alignment:
-	.space	4
diff --git a/arch/unicore32/kernel/head.S b/arch/unicore32/kernel/head.S
deleted file mode 100644
index 9bbb8668f9f79e6d4acb350dffdc907fb2ac785d..0000000000000000000000000000000000000000
--- a/arch/unicore32/kernel/head.S
+++ /dev/null
@@ -1,249 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/kernel/head.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-
-#include <asm/assembler.h>
-#include <asm/ptrace.h>
-#include <generated/asm-offsets.h>
-#include <asm/memory.h>
-#include <asm/thread_info.h>
-#include <asm/hwdef-copro.h>
-#include <asm/pgtable-hwdef.h>
-
-#if (PHYS_OFFSET & 0x003fffff)
-#error "PHYS_OFFSET must be at an even 4MiB boundary!"
-#endif
-
-#define KERNEL_RAM_VADDR	(PAGE_OFFSET + KERNEL_IMAGE_START)
-#define KERNEL_RAM_PADDR	(PHYS_OFFSET + KERNEL_IMAGE_START)
-
-#define KERNEL_PGD_PADDR	(KERNEL_RAM_PADDR - 0x1000)
-#define KERNEL_PGD_VADDR	(KERNEL_RAM_VADDR - 0x1000)
-
-#define KERNEL_START		KERNEL_RAM_VADDR
-#define KERNEL_END		_end
-
-/*
- * swapper_pg_dir is the virtual address of the initial page table.
- * We place the page tables 4K below KERNEL_RAM_VADDR.  Therefore, we must
- * make sure that KERNEL_RAM_VADDR is correctly set.  Currently, we expect
- * the least significant 16 bits to be 0x8000, but we could probably
- * relax this restriction to KERNEL_RAM_VADDR >= PAGE_OFFSET + 0x1000.
- */
-#if (KERNEL_RAM_VADDR & 0xffff) != 0x8000
-#error KERNEL_RAM_VADDR must start at 0xXXXX8000
-#endif
-
-	.globl	swapper_pg_dir
-	.equ	swapper_pg_dir, KERNEL_RAM_VADDR - 0x1000
-
-/*
- * Kernel startup entry point.
- * ---------------------------
- *
- * This is normally called from the decompressor code.  The requirements
- * are: MMU = off, D-cache = off, I-cache = dont care
- *
- * This code is mostly position independent, so if you link the kernel at
- * 0xc0008000, you call this at __pa(0xc0008000).
- */
-	__HEAD
-ENTRY(stext)
-	@ set asr
-	mov	r0, #PRIV_MODE			@ ensure priv mode
-	or	r0, #PSR_R_BIT | PSR_I_BIT	@ disable irqs
-	mov.a	asr, r0
-
-	@ process identify
-	movc	r0, p0.c0, #0			@ cpuid
-	movl	r1, 0xff00ffff			@ mask
-	movl	r2, 0x4d000863			@ value
-	and	r0, r1, r0
-	cxor.a	r0, r2
-	bne	__error_p			@ invalid processor id
-
-	/*
-	 * Clear the 4K level 1 swapper page table
-	 */
-	movl	r0, #KERNEL_PGD_PADDR		@ page table address
-	mov	r1, #0
-	add	r2, r0, #0x1000
-101:	stw.w	r1, [r0]+, #4
-	stw.w	r1, [r0]+, #4
-	stw.w	r1, [r0]+, #4
-	stw.w	r1, [r0]+, #4
-	cxor.a	r0, r2
-	bne	101b
-
-	movl	r4, #KERNEL_PGD_PADDR		@ page table address
-	mov	r7, #PMD_TYPE_SECT | PMD_PRESENT	@ page size: section
-	or	r7, r7, #PMD_SECT_CACHEABLE		@ cacheable
-	or	r7, r7, #PMD_SECT_READ | PMD_SECT_WRITE | PMD_SECT_EXEC
-
-	/*
-	 * Create identity mapping for first 4MB of kernel to
-	 * cater for the MMU enable.  This identity mapping
-	 * will be removed by paging_init().  We use our current program
-	 * counter to determine corresponding section base address.
-	 */
-	mov	r6, pc
-	mov	r6, r6 >> #22			@ start of kernel section
-	or	r1, r7, r6 << #22		@ flags + kernel base
-	stw	r1, [r4+], r6 << #2		@ identity mapping
-
-	/*
-	 * Now setup the pagetables for our kernel direct
-	 * mapped region.
-	 */
-	add	r0, r4,  #(KERNEL_START & 0xff000000) >> 20
-	stw.w	r1, [r0+], #(KERNEL_START & 0x00c00000) >> 20
-	movl	r6, #(KERNEL_END - 1)
-	add	r0, r0, #4
-	add	r6, r4, r6 >> #20
-102:	csub.a	r0, r6
-	add	r1, r1, #1 << 22
-	bua	103f
-	stw.w	r1, [r0]+, #4
-	b	102b
-103:
-	/*
-	 * Then map first 4MB of ram in case it contains our boot params.
-	 */
-	add	r0, r4, #PAGE_OFFSET >> 20
-	or	r6, r7, #(PHYS_OFFSET & 0xffc00000)
-	stw	r6, [r0]
-
-	ldw	r15, __switch_data		@ address to jump to after
-
-	/*
-	 * Initialise TLB, Caches, and MMU state ready to switch the MMU
-	 * on.
-	 */
-	mov	r0, #0
-	movc	p0.c5, r0, #28			@ cache invalidate all
-	nop8
-	movc	p0.c6, r0, #6			@ TLB invalidate all
-	nop8
-
-	/*
-	 * ..V. .... ..TB IDAM
-	 * ..1. .... ..01 1111
-	 */
-	movl	r0, #0x201f			@ control register setting
-
-	/*
-	 * Setup common bits before finally enabling the MMU.  Essentially
-	 * this is just loading the page table pointer and domain access
-	 * registers.
-	 */
-	#ifndef CONFIG_ALIGNMENT_TRAP
-		andn	r0, r0, #CR_A
-	#endif
-	#ifdef CONFIG_CPU_DCACHE_DISABLE
-		andn	r0, r0, #CR_D
-	#endif
-	#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-		andn	r0, r0, #CR_B
-	#endif
-	#ifdef CONFIG_CPU_ICACHE_DISABLE
-		andn	r0, r0, #CR_I
-	#endif
-
-	movc	p0.c2, r4, #0			@ set pgd
-	b	__turn_mmu_on
-ENDPROC(stext)
-
-/*
- * Enable the MMU.  This completely changes the structure of the visible
- * memory space.  You will not be able to trace execution through this.
- *
- *  r0  = cp#0 control register
- *  r15 = *virtual* address to jump to upon completion
- */
-	.align	5
-__turn_mmu_on:
-	mov	r0, r0
-	movc	p0.c1, r0, #0			@ write control reg
-	nop					@ fetch inst by phys addr
-	mov	pc, r15
-	nop8					@ fetch inst by phys addr
-ENDPROC(__turn_mmu_on)
-
-/*
- * Setup the initial page tables.  We only setup the barest
- * amount which are required to get the kernel running, which
- * generally means mapping in the kernel code.
- *
- * r9  = cpuid
- * r10 = procinfo
- *
- * Returns:
- *  r0, r3, r6, r7 corrupted
- *  r4 = physical page table address
- */
-	.ltorg
-
-	.align	2
-	.type	__switch_data, %object
-__switch_data:
-	.long	__mmap_switched
-	.long	__bss_start			@ r6
-	.long	_end				@ r7
-	.long	cr_alignment			@ r8
-	.long	init_thread_union + THREAD_START_SP @ sp
-
-/*
- * The following fragment of code is executed with the MMU on in MMU mode,
- * and uses absolute addresses; this is not position independent.
- *
- *  r0  = cp#0 control register
- */
-__mmap_switched:
-	adr	r3, __switch_data + 4
-
-	ldm.w	(r6, r7, r8), [r3]+
-	ldw	sp, [r3]
-
-	mov	fp, #0				@ Clear BSS (and zero fp)
-203:	csub.a	r6, r7
-	bea	204f
-	stw.w	fp, [r6]+,#4
-	b	203b
-204:
-	andn	r1, r0, #CR_A			@ Clear 'A' bit
-	stm	(r0, r1), [r8]+			@ Save control register values
-	b	start_kernel
-ENDPROC(__mmap_switched)
-
-/*
- * Exception handling.  Something went wrong and we can't proceed.  We
- * ought to tell the user, but since we don't have any guarantee that
- * we're even running on the right architecture, we do virtually nothing.
- *
- * If CONFIG_DEBUG_LL is set we try to print out something about the error
- * and hope for the best (useful if bootloader fails to pass a proper
- * machine ID for example).
- */
-__error_p:
-#ifdef CONFIG_DEBUG_LL
-	adr	r0, str_p1
-	b.l	printascii
-	mov	r0, r9
-	b.l	printhex8
-	adr	r0, str_p2
-	b.l	printascii
-901:	nop8
-	b	901b
-str_p1:	.asciz	"\nError: unrecognized processor variant (0x"
-str_p2:	.asciz	").\n"
-	.align
-#endif
-ENDPROC(__error_p)
-
diff --git a/arch/unicore32/kernel/hibernate_asm.S b/arch/unicore32/kernel/hibernate_asm.S
deleted file mode 100644
index 7e7499c49089ebf2dd9dc498e07f0daf714c7640..0000000000000000000000000000000000000000
--- a/arch/unicore32/kernel/hibernate_asm.S
+++ /dev/null
@@ -1,114 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/kernel/hibernate_asm.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *	Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *	Copyright (C) 2001-2010 Guan Xuetao
- */
-
-#include <linux/sys.h>
-#include <linux/errno.h>
-#include <linux/linkage.h>
-#include <generated/asm-offsets.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/assembler.h>
-
-@ restore_image(pgd_t *resume_pg_dir, struct pbe *restore_pblist)
-@ r0: resume_pg_dir
-@ r1: restore_pblist
-@ copy restore_pblist pages
-@ restore registers from swsusp_arch_regs_cpu0
-@
-ENTRY(restore_image)
-	sub	r0, r0, #PAGE_OFFSET
-	mov	r5, #0
-	movc	p0.c6, r5, #6	@invalidate ITLB & DTLB
-	movc	p0.c2, r0, #0
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-	.p2align 4,,7
-101:
-	csub.a	r1, #0
-	beq	109f
-
-	ldw	r6, [r1+], #PBE_ADDRESS
-	ldw	r7, [r1+], #PBE_ORIN_ADDRESS
-
-	movl	ip, #128
-102:	ldm.w	(r8 - r15), [r6]+
-	stm.w	(r8 - r15), [r7]+
-	sub.a	ip, ip, #1
-	bne	102b
-
-	ldw	r1, [r1+], #PBE_NEXT
-	b	101b
-
-	.p2align 4,,7
-109:
-	/* go back to the original page tables */
-	ldw	r0, =swapper_pg_dir
-	sub	r0, r0, #PAGE_OFFSET
-	mov	r5, #0
-	movc	p0.c6, r5, #6
-	movc	p0.c2, r0, #0
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-
-#ifdef	CONFIG_UNICORE_FPU_F64
-	ldw	ip, 1f
-	add	ip, ip, #SWSUSP_FPSTATE
-	lfm.w	(f0  - f7 ), [ip]+
-	lfm.w	(f8  - f15), [ip]+
-	lfm.w	(f16 - f23), [ip]+
-	lfm.w	(f24 - f31), [ip]+
-	ldw	r4, [ip]
-	ctf	r4, s31
-#endif
-	mov	r0, #0x0
-	ldw	ip, 1f
-	add	ip, ip, #SWSUSP_CPU
-	ldm.w	(r4 - r15), [ip]+
-	ldm	(r16 - r27, sp, pc), [ip]+	@ Load all regs saved previously
-
-	.align	2
-1:	.long	swsusp_arch_regs_cpu0
-
-
-@ swsusp_arch_suspend()
-@ - prepare pc for resume, return from function without swsusp_save on resume
-@ - save registers in swsusp_arch_regs_cpu0
-@ - call swsusp_save write suspend image
-
-ENTRY(swsusp_arch_suspend)
-	ldw	ip, 1f
-	add	ip, ip, #SWSUSP_CPU
-	stm.w	(r4 - r15), [ip]+
-	stm.w	(r16 - r27, sp, lr), [ip]+
-
-#ifdef	CONFIG_UNICORE_FPU_F64
-	ldw	ip, 1f
-	add	ip, ip, #SWSUSP_FPSTATE
-	sfm.w	(f0  - f7 ), [ip]+
-	sfm.w	(f8  - f15), [ip]+
-	sfm.w	(f16 - f23), [ip]+
-	sfm.w	(f24 - f31), [ip]+
-	cff	r4, s31
-	stw	r4, [ip]
-#endif
-	b	swsusp_save			@ no return
-
-1:	.long	swsusp_arch_regs_cpu0
diff --git a/arch/unicore32/kernel/sleep.S b/arch/unicore32/kernel/sleep.S
deleted file mode 100644
index 23151abe53c67b690033b782336a8193e9b7ebfe..0000000000000000000000000000000000000000
--- a/arch/unicore32/kernel/sleep.S
+++ /dev/null
@@ -1,199 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/kernel/sleep.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- *	Maintained by GUAN Xue-tao <gxt@mprc.pku.edu.cn>
- *	Copyright (C) 2001-2010 Guan Xuetao
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <mach/hardware.h>
-
-		.text
-
-pkunity_cpu_save_cp:
-
-	@ get coprocessor registers
-
-	movc	r3, p0.c7, #0			@ PID
-	movc	r4, p0.c2, #0			@ translation table base addr
-	movc	r5, p0.c1, #0			@ control reg
-
-
-	@ store them plus current virtual stack ptr on stack
-	mov	r6, sp
-	stm.w	(r3 - r6), [sp-]
-
-	mov	pc, lr
-
-pkunity_cpu_save_sp:
-	@ preserve phys address of stack
-	mov	r0, sp
-	stw.w	lr, [sp+], #-4
-	b.l	sleep_phys_sp
-	ldw	r1, =sleep_save_sp
-	stw	r0, [r1]
-	ldw.w	pc, [sp]+, #4
-
-/*
- * puv3_cpu_suspend()
- *
- * Forces CPU into sleep state.
- *
- * r0 = value for PWRMODE M field for desired sleep state
- */
-
-ENTRY(puv3_cpu_suspend)
-	stm.w	(r16 - r27, lr), [sp-]		@ save registers on stack
-	stm.w	(r4 - r15), [sp-]		@ save registers on stack
-
-#ifdef	CONFIG_UNICORE_FPU_F64
-	sfm.w	(f0  - f7 ), [sp-]
-	sfm.w	(f8  - f15), [sp-]
-	sfm.w	(f16 - f23), [sp-]
-	sfm.w	(f24 - f31), [sp-]
-	cff	r4, s31
-	stm.w	(r4), [sp-]
-#endif
-	b.l	pkunity_cpu_save_cp
-
-	b.l	pkunity_cpu_save_sp
-
-	@ clean data cache
-	mov	r1, #0
-	movc	p0.c5, r1, #14
-	nop
-	nop
-	nop
-	nop
-
-
-
-	@ DDR2 BaseAddr
-	ldw	r0, =(PKUNITY_DDR2CTRL_BASE)
-
-	@ PM BaseAddr
-	ldw	r1, =(PKUNITY_PM_BASE)
-
-	@ set PLL_SYS_CFG reg, 275
-	movl	r6, #0x00002401
-	stw	r6, [r1+], #0x18
-	@ set PLL_DDR_CFG reg, 66MHz
-	movl	r6, #0x00100c00
-	stw	r6, [r1+], #0x1c
-
-	@ set wake up source
-	movl	r8, #0x800001ff		@ epip4d
-	stw	r8, [r1+], #0xc
-
-	@ set PGSR
-	movl	r5, #0x40000
-	stw	r5, [r1+], #0x10
-
-	@ prepare DDR2 refresh settings
-	ldw	r5, [r0+], #0x24
-	or	r5, r5, #0x00000001
-
-	@ prepare PMCR for PLL changing
-	movl	r6, #0xc
-
-	@ prepare for closing PLL
-	movl	r7, #0x1
-
-	@ prepare sleep mode
-	mov	r8, #0x1
-
-@	movl	r0, 0x11111111
-@	put_word_ocd r0
-	b	pkunity_cpu_do_suspend
-
-	.ltorg
-	.align	5
-pkunity_cpu_do_suspend:
-	b	101f
-	@ put DDR2 into self-refresh
-100:	stw	r5, [r0+], #0x24
-	@ change PLL
-	stw	r6, [r1]
-	b	1f
-
-	.ltorg
-	.align	5
-101:	b	102f
-	@ wait for PLL changing complete
-1:	ldw	r6, [r1+], #0x44
-	csub.a	r6, #0x1
-	bne	1b
-	b	2f
-
-	.ltorg
-	.align	5
-102:	b	100b
-	@ close PLL
-2:	stw	r7, [r1+], #0x4
-	@ enter sleep mode
-	stw	r8, [r1]
-3:	b	3b
-
-
-
-
-/*
- * puv3_cpu_resume()
- *
- * entry point from bootloader into kernel during resume
- *
- * Note: Yes, part of the following code is located into the .data section.
- *       This is to allow sleep_save_sp to be accessed with a relative load
- *       while we can't rely on any MMU translation.  We could have put
- *       sleep_save_sp in the .text section as well, but some setups might
- *       insist on it to be truly read-only.
- */
-
-	.data
-	.align 5
-ENTRY(puv3_cpu_resume)
-@	movl	r0, 0x20202020
-@	put_word_ocd r0
-
-	ldw	r0, sleep_save_sp		@ stack phys addr
-	ldw	r2, =resume_after_mmu		@ its absolute virtual address
-	ldm	(r3 - r6), [r0]+		@ CP regs + virt stack ptr
-	mov	sp, r6				@ CP regs + virt stack ptr
-
-	mov	r1, #0
-	movc	p0.c6, r1, #6			@ invalidate I & D TLBs
-	movc	p0.c5, r1, #28			@ invalidate I & D caches, BTB
-
-	movc	p0.c7, r3, #0			@ PID
-	movc	p0.c2, r4, #0			@ translation table base addr
-	movc	p0.c1, r5, #0			@ control reg, turn on mmu
-	nop
-	jump	r2
-	nop
-	nop
-	nop
-	nop
-	nop
-
-sleep_save_sp:
-	.word	0				@ preserve stack phys ptr here
-
-	.text
-resume_after_mmu:
-@	movl	r0, 0x30303030
-@	put_word_ocd r0
-
-#ifdef	CONFIG_UNICORE_FPU_F64
-	lfm.w	(f0  - f7 ), [sp]+
-	lfm.w	(f8  - f15), [sp]+
-	lfm.w	(f16 - f23), [sp]+
-	lfm.w	(f24 - f31), [sp]+
-	ldm.w	(r4), [sp]+
-	ctf	r4, s31
-#endif
-	ldm.w	(r4 - r15), [sp]+		@ restore registers from stack
-	ldm.w	(r16 - r27, pc), [sp]+		@ return to caller
diff --git a/arch/unicore32/kernel/vmlinux.lds.S b/arch/unicore32/kernel/vmlinux.lds.S
deleted file mode 100644
index 7abf90537cd5173e70c4d89ebe1d4e608214d60a..0000000000000000000000000000000000000000
--- a/arch/unicore32/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,60 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/kernel/vmlinux.lds.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/thread_info.h>
-#include <asm/memory.h>
-#include <asm/page.h>
-#include <asm/cache.h>
-
-OUTPUT_ARCH(unicore32)
-ENTRY(stext)
-
-jiffies = jiffies_64;
-
-SECTIONS
-{
-	. = PAGE_OFFSET + KERNEL_IMAGE_START;
-
-	_text = .;
-	__init_begin = .;
-	HEAD_TEXT_SECTION
-	INIT_TEXT_SECTION(PAGE_SIZE)
-	INIT_DATA_SECTION(16)
-	PERCPU_SECTION(L1_CACHE_BYTES)
-	__init_end = .;
-
-	_stext = .;
-	.text : {		/* Real text segment */
-		TEXT_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-
-		*(.fixup)
-		*(.gnu.warning)
-	}
-	_etext = .;
-
-	_sdata = .;
-	RO_DATA_SECTION(PAGE_SIZE)
-	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
-	_edata = .;
-
-	EXCEPTION_TABLE(L1_CACHE_BYTES)
-	NOTES
-
-	BSS_SECTION(0, 0, 0)
-	_end = .;
-
-	STABS_DEBUG
-	DWARF_DEBUG
-
-	DISCARDS		/* Exit code and data */
-}
diff --git a/arch/unicore32/lib/backtrace.S b/arch/unicore32/lib/backtrace.S
deleted file mode 100644
index f303671e2a4e9a7bb3a1ae6e3c690cd166cba0a9..0000000000000000000000000000000000000000
--- a/arch/unicore32/lib/backtrace.S
+++ /dev/null
@@ -1,160 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/backtrace.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-		.text
-
-@ fp is 0 or stack frame
-
-#define frame	v4
-#define sv_fp	v5
-#define sv_pc	v6
-#define offset	v8
-
-ENTRY(__backtrace)
-		mov	r0, fp
-
-ENTRY(c_backtrace)
-
-#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
-		mov	pc, lr
-ENDPROC(__backtrace)
-ENDPROC(c_backtrace)
-#else
-		stm.w	(v4 - v8, lr), [sp-]	@ Save an extra register
-						@ so we have a location...
-		mov.a	frame, r0		@ if frame pointer is zero
-		beq	no_frame		@ we have no stack frames
-
-1:		stm.w	(pc), [sp-]		@ calculate offset of PC stored
-		ldw.w	r0, [sp]+, #4		@ by stmfd for this CPU
-		adr	r1, 1b
-		sub	offset, r0, r1
-
-/*
- * Stack frame layout:
- *             optionally saved caller registers (r4 - r10)
- *             saved fp
- *             saved sp
- *             saved lr
- *    frame => saved pc
- *             optionally saved arguments (r0 - r3)
- * saved sp => <next word>
- *
- * Functions start with the following code sequence:
- *                  mov   ip, sp
- *                  stm.w (r0 - r3), [sp-] (optional)
- * corrected pc =>  stm.w sp, (..., fp, ip, lr, pc)
- */
-for_each_frame:
-
-1001:		ldw	sv_pc, [frame+], #0	@ get saved pc
-1002:		ldw	sv_fp, [frame+], #-12	@ get saved fp
-
-		sub	sv_pc, sv_pc, offset	@ Correct PC for prefetching
-
-1003:		ldw	r2, [sv_pc+], #-4	@ if stmfd sp, {args} exists,
-		ldw	r3, .Ldsi+4		@ adjust saved 'pc' back one
-		cxor.a	r3, r2 >> #14		@ instruction
-		beq	201f
-		sub	r0, sv_pc, #4		@ allow for mov
-		b	202f
-201:
-		sub	r0, sv_pc, #8		@ allow for mov + stmia
-202:
-		ldw	r1, [frame+], #-4	@ get saved lr
-		mov	r2, frame
-		b.l	dump_backtrace_entry
-
-		ldw	r1, [sv_pc+], #-4	@ if stmfd sp, {args} exists,
-		ldw	r3, .Ldsi+4
-		cxor.a	r3, r1 >> #14
-		bne	1004f
-		ldw	r0, [frame+], #-8	@ get sp
-		sub	r0, r0, #4		@ point at the last arg
-		b.l	.Ldumpstm		@ dump saved registers
-
-1004:		ldw	r1, [sv_pc+], #0	@ if stmfd {, fp, ip, lr, pc}
-		ldw	r3, .Ldsi		@ instruction exists,
-		cxor.a	r3, r1 >> #14
-		bne	201f
-		sub	r0, frame, #16
-		b.l	.Ldumpstm		@ dump saved registers
-201:
-		cxor.a	sv_fp, #0		@ zero saved fp means
-		beq	no_frame		@ no further frames
-
-		csub.a	sv_fp, frame		@ next frame must be
-		mov	frame, sv_fp		@ above the current frame
-		bua	for_each_frame
-
-1006:		adr	r0, .Lbad
-		mov	r1, frame
-		b.l	printk
-no_frame:	ldm.w	(v4 - v8, pc), [sp]+
-ENDPROC(__backtrace)
-ENDPROC(c_backtrace)
-
-		.pushsection __ex_table,"a"
-		.align	3
-		.long	1001b, 1006b
-		.long	1002b, 1006b
-		.long	1003b, 1006b
-		.long	1004b, 1006b
-		.popsection
-
-#define instr v4
-#define reg   v5
-#define stack v6
-
-.Ldumpstm:	stm.w	(instr, reg, stack, v7, lr), [sp-]
-		mov	stack, r0
-		mov	instr, r1
-		mov	reg, #14
-		mov	v7, #0
-1:		mov	r3, #1
-		csub.a	reg, #8
-		bne	201f
-		sub	reg, reg, #3
-201:
-		cand.a	instr, r3 << reg
-		beq	2f
-		add	v7, v7, #1
-		cxor.a	v7, #6
-		cmoveq	v7, #1
-		cmoveq	r1, #'\n'
-		cmovne	r1, #' '
-		ldw.w	r3, [stack]+, #-4
-		mov	r2, reg
-		csub.a	r2, #8
-		bsl	201f
-		sub	r2, r2, #3
-201:
-		cand.a	instr, #0x40		@ if H is 1, high 16 regs
-		beq	201f
-		add	r2, r2, #0x10		@ so r2 need add 16
-201:
-		adr	r0, .Lfp
-		b.l	printk
-2:		sub.a	reg, reg, #1
-		bns	1b
-		cxor.a	v7, #0
-		beq	201f
-		adr	r0, .Lcr
-		b.l	printk
-201:		ldm.w	(instr, reg, stack, v7, pc), [sp]+
-
-.Lfp:		.asciz	"%cr%d:%08x"
-.Lcr:		.asciz	"\n"
-.Lbad:		.asciz	"Backtrace aborted due to bad frame pointer <%p>\n"
-		.align
-.Ldsi:		.word	0x92eec000 >> 14	@ stm.w sp, (... fp, ip, lr, pc)
-		.word	0x92e10000 >> 14	@ stm.w sp, ()
-
-#endif
diff --git a/arch/unicore32/lib/clear_user.S b/arch/unicore32/lib/clear_user.S
deleted file mode 100644
index c6ca431b109098b7e4bb821a628186a7e356ed91..0000000000000000000000000000000000000000
--- a/arch/unicore32/lib/clear_user.S
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/clear_user.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-		.text
-
-/* Prototype: int __clear_user(void *addr, size_t sz)
- * Purpose  : clear some user memory
- * Params   : addr - user memory address to clear
- *          : sz   - number of bytes to clear
- * Returns  : number of bytes NOT cleared
- */
-WEAK(__clear_user)
-		stm.w	(lr), [sp-]
-		stm.w	(r1), [sp-]
-		mov	r2, #0
-		csub.a	r1, #4
-		bsl	2f
-		and.a	ip, r0, #3
-		beq	1f
-		csub.a	ip, #2
-		strusr	r2, r0, 1
-		strusr	r2, r0, 1, el
-		strusr	r2, r0, 1, sl
-		rsub	ip, ip, #4
-		sub	r1, r1, ip		@  7  6  5  4  3  2  1
-1:		sub.a	r1, r1, #8		@ -1 -2 -3 -4 -5 -6 -7
-		strusr	r2, r0, 4, ns, rept=2
-		bns	1b
-		add.a	r1, r1, #4		@  3  2  1  0 -1 -2 -3
-		strusr	r2, r0, 4, ns
-2:		cand.a	r1, #2			@ 1x 1x 0x 0x 1x 1x 0x
-		strusr	r2, r0, 1, ne, rept=2
-		cand.a	r1, #1			@ x1 x0 x1 x0 x1 x0 x1
-		beq	3f
-USER(		stb.u	r2, [r0])
-3:		mov	r0, #0
-		ldm.w	(r1), [sp]+
-		ldm.w	(pc), [sp]+
-ENDPROC(__clear_user)
-
-		.pushsection .fixup,"ax"
-		.align	0
-9001:		ldm.w	(r0), [sp]+
-		ldm.w	(pc), [sp]+
-		.popsection
-
diff --git a/arch/unicore32/lib/copy_from_user.S b/arch/unicore32/lib/copy_from_user.S
deleted file mode 100644
index affb43920ac02dbda201a133d68d77b1adaab827..0000000000000000000000000000000000000000
--- a/arch/unicore32/lib/copy_from_user.S
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/copy_from_user.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * Prototype:
- *
- *	size_t raw_copy_from_user(void *to, const void *from, size_t n)
- *
- * Purpose:
- *
- *	copy a block to kernel memory from user memory
- *
- * Params:
- *
- *	to = kernel memory
- *	from = user memory
- *	n = number of bytes to copy
- *
- * Return value:
- *
- *	Number of bytes NOT copied.
- */
-
-	.macro ldr1w ptr reg abort
-	ldrusr	\reg, \ptr, 4, abort=\abort
-	.endm
-
-	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
-100:	ldm.w	(\reg1, \reg2, \reg3, \reg4), [\ptr]+
-	.pushsection __ex_table, "a"
-	.align	3
-	.long 100b, \abort
-	.popsection
-	.endm
-
-	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-100:	ldm.w (\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8), [\ptr]+
-	.pushsection __ex_table, "a"
-	.align	3
-	.long 100b, \abort
-	.popsection
-	.endm
-
-	.macro ldr1b ptr reg cond=al abort
-	ldrusr	\reg, \ptr, 1, \cond, abort=\abort
-	.endm
-
-	.macro str1w ptr reg abort
-	stw.w \reg, [\ptr]+, #4
-	.endm
-
-	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-	stm.w (\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8), [\ptr]+
-	.endm
-
-	.macro str1b ptr reg cond=al abort
-	.ifnc	\cond, al
-	b\cond	201f
-	b	202f
-	.endif
-201:	stb.w \reg, [\ptr]+, #1
-202:
-	.endm
-
-	.macro enter
-	mov	r3, #0
-	stm.w	(r0, r2, r3), [sp-]
-	.endm
-
-	.macro exit
-	add	sp, sp, #8
-	ldm.w	(r0), [sp]+
-	mov	pc, lr
-	.endm
-
-	.text
-
-ENTRY(raw_copy_from_user)
-
-#include "copy_template.S"
-
-ENDPROC(raw_copy_from_user)
-
-	.pushsection .fixup,"ax"
-	.align 0
-	copy_abort_preamble
-	ldm.w	(r1, r2, r3), [sp]+
-	sub	r0, r0, r1
-	rsub	r0, r0, r2
-	copy_abort_end
-	.popsection
-
diff --git a/arch/unicore32/lib/copy_page.S b/arch/unicore32/lib/copy_page.S
deleted file mode 100644
index dc163f2d1af0c4660ca6fba5b1d997789a7279ce..0000000000000000000000000000000000000000
--- a/arch/unicore32/lib/copy_page.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/copy_page.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- *  ASM optimised string functions
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <generated/asm-offsets.h>
-#include <asm/cache.h>
-
-#define COPY_COUNT (PAGE_SZ/256)
-
-		.text
-		.align	5
-/*
- * UniCore optimised copy_page routine
- */
-ENTRY(copy_page)
-		stm.w	(r17 - r19, lr), [sp-]
-		mov	r17, r0
-		mov	r18, r1
-		mov	r19, #COPY_COUNT
-1:
-	.rept	4
-		ldm.w	(r0 - r15), [r18]+
-		stm.w	(r0 - r15), [r17]+
-	.endr
-		sub.a	r19, r19, #1
-		bne	1b
-		ldm.w	(r17 - r19, pc), [sp]+
-ENDPROC(copy_page)
diff --git a/arch/unicore32/lib/copy_template.S b/arch/unicore32/lib/copy_template.S
deleted file mode 100644
index 02a7aef83fbf5916d000e6a6f6590395d038c448..0000000000000000000000000000000000000000
--- a/arch/unicore32/lib/copy_template.S
+++ /dev/null
@@ -1,211 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/copy_template.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-/*
- * Theory of operation
- * -------------------
- *
- * This file provides the core code for a forward memory copy used in
- * the implementation of memcopy(), copy_to_user() and copy_from_user().
- *
- * The including file must define the following accessor macros
- * according to the need of the given function:
- *
- * ldr1w ptr reg abort
- *
- *	This loads one word from 'ptr', stores it in 'reg' and increments
- *	'ptr' to the next word. The 'abort' argument is used for fixup tables.
- *
- * ldr4w ptr reg1 reg2 reg3 reg4 abort
- * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
- *
- *	This loads four or eight words starting from 'ptr', stores them
- *	in provided registers and increments 'ptr' past those words.
- *	The'abort' argument is used for fixup tables.
- *
- * ldr1b ptr reg cond abort
- *
- *	Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
- *	It also must apply the condition code if provided, otherwise the
- *	"al" condition is assumed by default.
- *
- * str1w ptr reg abort
- * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
- * str1b ptr reg cond abort
- *
- *	Same as their ldr* counterparts, but data is stored to 'ptr' location
- *	rather than being loaded.
- *
- * enter
- *
- *	Preserve the provided registers on the stack plus any additional
- *	data as needed by the implementation including this code. Called
- *	upon code entry.
- *
- * exit
- *
- *	Restore registers with the values previously saved with the
- *	'preserv' macro. Called upon code termination.
- */
-
-
-		enter
-
-		sub.a	r2, r2, #4
-		bsl	8f
-		and.a	ip, r0, #3
-		bne	9f
-		and.a	ip, r1, #3
-		bne	10f
-
-1:		sub.a	r2, r2, #(28)
-		stm.w	(r5 - r8), [sp-]
-		bsl	5f
-
-3:
-4:		ldr8w	r1, r3, r4, r5, r6, r7, r8, r10, r11, abort=20f
-		sub.a	r2, r2, #32
-		str8w	r0, r3, r4, r5, r6, r7, r8, r10, r11, abort=20f
-		beg	3b
-
-5:		and.a	ip, r2, #28
-		rsub	ip, ip, #32
-		beq	7f
-		add	pc, pc, ip		@ C is always clear here
-		nop
-
-		ldr1w	r1, r3, abort=20f
-		ldr1w	r1, r4, abort=20f
-		ldr1w	r1, r5, abort=20f
-		ldr1w	r1, r6, abort=20f
-		ldr1w	r1, r7, abort=20f
-		ldr1w	r1, r8, abort=20f
-		ldr1w	r1, r11, abort=20f
-
-		add	pc, pc, ip
-		nop
-
-		str1w	r0, r3, abort=20f
-		str1w	r0, r4, abort=20f
-		str1w	r0, r5, abort=20f
-		str1w	r0, r6, abort=20f
-		str1w	r0, r7, abort=20f
-		str1w	r0, r8, abort=20f
-		str1w	r0, r11, abort=20f
-
-7:		ldm.w	(r5 - r8), [sp]+
-
-8:		mov.a	r2, r2 << #31
-		ldr1b	r1, r3, ne, abort=21f
-		ldr1b	r1, r4, ea, abort=21f
-		ldr1b	r1, r10, ea, abort=21f
-		str1b	r0, r3, ne, abort=21f
-		str1b	r0, r4, ea, abort=21f
-		str1b	r0, r10, ea, abort=21f
-
-		exit
-
-9:		rsub	ip, ip, #4
-		csub.a	ip, #2
-		ldr1b	r1, r3, sg, abort=21f
-		ldr1b	r1, r4, eg, abort=21f
-		ldr1b	r1, r11, abort=21f
-		str1b	r0, r3, sg, abort=21f
-		str1b	r0, r4, eg, abort=21f
-		sub.a	r2, r2, ip
-		str1b	r0, r11, abort=21f
-		bsl	8b
-		and.a	ip, r1, #3
-		beq	1b
-
-10:		andn	r1, r1, #3
-		csub.a	ip, #2
-		ldr1w	r1, r11, abort=21f
-		beq	17f
-		bsg	18f
-
-
-		.macro	forward_copy_shift a b
-
-		sub.a	r2, r2, #28
-		bsl	14f
-
-11:		stm.w	(r5 - r9), [sp-]
-
-12:
-		ldr4w	r1, r4, r5, r6, r7, abort=19f
-		mov	r3, r11 pull #\a
-		sub.a	r2, r2, #32
-		ldr4w	r1, r8, r9, r10, r11, abort=19f
-		or	r3, r3, r4 push #\b
-		mov	r4, r4 pull #\a
-		or	r4, r4, r5 push #\b
-		mov	r5, r5 pull #\a
-		or	r5, r5, r6 push #\b
-		mov	r6, r6 pull #\a
-		or	r6, r6, r7 push #\b
-		mov	r7, r7 pull #\a
-		or	r7, r7, r8 push #\b
-		mov	r8, r8 pull #\a
-		or	r8, r8, r9 push #\b
-		mov	r9, r9 pull #\a
-		or	r9, r9, r10 push #\b
-		mov	r10, r10 pull #\a
-		or	r10, r10, r11 push #\b
-		str8w	r0, r3, r4, r5, r6, r7, r8, r9, r10, , abort=19f
-		beg	12b
-
-		ldm.w	(r5 - r9), [sp]+
-
-14:		and.a	ip, r2, #28
-		beq	16f
-
-15:		mov	r3, r11 pull #\a
-		ldr1w	r1, r11, abort=21f
-		sub.a	ip, ip, #4
-		or	r3, r3, r11 push #\b
-		str1w	r0, r3, abort=21f
-		bsg	15b
-
-16:		sub	r1, r1, #(\b / 8)
-		b	8b
-
-		.endm
-
-
-		forward_copy_shift	a=8	b=24
-
-17:		forward_copy_shift	a=16	b=16
-
-18:		forward_copy_shift	a=24	b=8
-
-
-/*
- * Abort preamble and completion macros.
- * If a fixup handler is required then those macros must surround it.
- * It is assumed that the fixup code will handle the private part of
- * the exit macro.
- */
-
-	.macro	copy_abort_preamble
-19:	ldm.w	(r5 - r9), [sp]+
-	b	21f
-299:	.word	0			@ store lr
-					@ to avoid function call in fixup
-20:	ldm.w	(r5 - r8), [sp]+
-21:
-	adr	r1, 299b
-	stw	lr, [r1]
-	.endm
-
-	.macro	copy_abort_end
-	adr	lr, 299b
-	ldw	pc, [lr]
-	.endm
-
diff --git a/arch/unicore32/lib/copy_to_user.S b/arch/unicore32/lib/copy_to_user.S
deleted file mode 100644
index c867f08f89ce9d5e561d45ac0af4f3ff8c4b53f6..0000000000000000000000000000000000000000
--- a/arch/unicore32/lib/copy_to_user.S
+++ /dev/null
@@ -1,93 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/copy_to_user.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-/*
- * Prototype:
- *
- *	size_t raw_copy_to_user(void *to, const void *from, size_t n)
- *
- * Purpose:
- *
- *	copy a block to user memory from kernel memory
- *
- * Params:
- *
- *	to = user memory
- *	from = kernel memory
- *	n = number of bytes to copy
- *
- * Return value:
- *
- *	Number of bytes NOT copied.
- */
-
-	.macro ldr1w ptr reg abort
-	ldw.w \reg, [\ptr]+, #4
-	.endm
-
-	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
-	ldm.w	(\reg1, \reg2, \reg3, \reg4), [\ptr]+
-	.endm
-
-	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-	ldm.w (\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8), [\ptr]+
-	.endm
-
-	.macro ldr1b ptr reg cond=al abort
-	notcond	\cond, .+8
-	ldb.w \reg, [\ptr]+, #1
-	.endm
-
-	.macro str1w ptr reg abort
-	strusr	\reg, \ptr, 4, abort=\abort
-	.endm
-
-	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-100:	stm.w (\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8), [\ptr]+
-
-	.pushsection __ex_table, "a"
-	.long 100b, \abort
-	.popsection
-	.endm
-
-	.macro str1b ptr reg cond=al abort
-	strusr	\reg, \ptr, 1, \cond, abort=\abort
-	.endm
-
-	.macro enter
-	mov	r3, #0
-	stm.w	(r0, r2, r3), [sp-]
-	.endm
-
-	.macro exit
-	add	sp, sp, #8
-	ldm.w	(r0), [sp]+
-	mov	pc, lr
-	.endm
-
-	.text
-
-WEAK(raw_copy_to_user)
-
-#include "copy_template.S"
-
-ENDPROC(raw_copy_to_user)
-
-	.pushsection .fixup,"ax"
-	.align 0
-	copy_abort_preamble
-	ldm.w	(r1, r2, r3), [sp]+
-	sub	r0, r0, r1
-	rsub	r0, r0, r2
-	copy_abort_end
-	.popsection
-
diff --git a/arch/unicore32/lib/delay.S b/arch/unicore32/lib/delay.S
deleted file mode 100644
index 6a359dd034e5ea753d00a11a59568ef8d6bc6a29..0000000000000000000000000000000000000000
--- a/arch/unicore32/lib/delay.S
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/delay.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/param.h>
-		.text
-
-.LC0:		.word	loops_per_jiffy
-.LC1:		.word	(2199023*HZ)>>11
-
-/*
- * r0  <= 2000
- * lpj <= 0x01ffffff (max. 3355 bogomips)
- * HZ  <= 1000
- */
-
-ENTRY(__udelay)
-		ldw	r2, .LC1
-		mul	r0, r2, r0
-ENTRY(__const_udelay)				@ 0 <= r0 <= 0x7fffff06
-		ldw	r2, .LC0
-		ldw	r2, [r2]		@ max = 0x01ffffff
-		mov	r0, r0 >> #14		@ max = 0x0001ffff
-		mov	r2, r2 >> #10		@ max = 0x00007fff
-		mul	r0, r2, r0		@ max = 2^32-1
-		mov.a	r0, r0 >> #6
-		cmoveq	pc, lr
-
-/*
- * loops = r0 * HZ * loops_per_jiffy / 1000000
- *
- * Oh, if only we had a cycle counter...
- */
-
-@ Delay routine
-ENTRY(__delay)
-		sub.a	r0, r0, #2
-		bua	__delay
-		mov	pc, lr
-ENDPROC(__udelay)
-ENDPROC(__const_udelay)
-ENDPROC(__delay)
diff --git a/arch/unicore32/lib/findbit.S b/arch/unicore32/lib/findbit.S
deleted file mode 100644
index 42f1282670d2ac97257eec61eae9dd9e6a78c492..0000000000000000000000000000000000000000
--- a/arch/unicore32/lib/findbit.S
+++ /dev/null
@@ -1,97 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/findbit.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-                .text
-
-/*
- * Purpose  : Find a 'zero' bit
- * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit);
- */
-ENTRY(find_first_zero_bit)
-		cxor.a	r1, #0
-		beq	3f
-		mov	r2, #0
-1:		ldb	r3, [r0+], r2 >> #3
-		xor.a	r3, r3, #0xff		@ invert bits
-		bne	.L_found		@ any now set - found zero bit
-		add	r2, r2, #8		@ next bit pointer
-2:		csub.a	r2, r1			@ any more?
-		bub	1b
-3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
-ENDPROC(find_first_zero_bit)
-
-/*
- * Purpose  : Find next 'zero' bit
- * Prototype: int find_next_zero_bit
- *		(void *addr, unsigned int maxbit, int offset)
- */
-ENTRY(find_next_zero_bit)
-		cxor.a	r1, #0
-		beq	3b
-		and.a	ip, r2, #7
-		beq	1b			@ If new byte, goto old routine
-		ldb	r3, [r0+], r2 >> #3
-		xor	r3, r3, #0xff		@ now looking for a 1 bit
-		mov.a	r3, r3 >> ip		@ shift off unused bits
-		bne	.L_found
-		or	r2, r2, #7		@ if zero, then no bits here
-		add	r2, r2, #1		@ align bit pointer
-		b	2b			@ loop for next bit
-ENDPROC(find_next_zero_bit)
-
-/*
- * Purpose  : Find a 'one' bit
- * Prototype: int find_first_bit
- *		(const unsigned long *addr, unsigned int maxbit);
- */
-ENTRY(find_first_bit)
-		cxor.a	r1, #0
-		beq	3f
-		mov	r2, #0
-1:		ldb	r3, [r0+], r2 >> #3
-		mov.a	r3, r3
-		bne	.L_found		@ any now set - found zero bit
-		add	r2, r2, #8		@ next bit pointer
-2:		csub.a	r2, r1			@ any more?
-		bub	1b
-3:		mov	r0, r1			@ no free bits
-		mov	pc, lr
-ENDPROC(find_first_bit)
-
-/*
- * Purpose  : Find next 'one' bit
- * Prototype: int find_next_zero_bit
- *		(void *addr, unsigned int maxbit, int offset)
- */
-ENTRY(find_next_bit)
-		cxor.a	r1, #0
-		beq	3b
-		and.a	ip, r2, #7
-		beq	1b			@ If new byte, goto old routine
-		ldb	r3, [r0+], r2 >> #3
-		mov.a	r3, r3 >> ip		@ shift off unused bits
-		bne	.L_found
-		or	r2, r2, #7		@ if zero, then no bits here
-		add	r2, r2, #1		@ align bit pointer
-		b	2b			@ loop for next bit
-ENDPROC(find_next_bit)
-
-/*
- * One or more bits in the LSB of r3 are assumed to be set.
- */
-.L_found:
-		rsub	r1, r3, #0
-		and	r3, r3, r1
-		cntlz	r3, r3
-		rsub	r3, r3, #31
-		add	r0, r2, r3
-		mov	pc, lr
-
diff --git a/arch/unicore32/lib/strncpy_from_user.S b/arch/unicore32/lib/strncpy_from_user.S
deleted file mode 100644
index f227b8227a4c15728a4f2796c52eae70cecc3c07..0000000000000000000000000000000000000000
--- a/arch/unicore32/lib/strncpy_from_user.S
+++ /dev/null
@@ -1,42 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/strncpy_from_user.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
-	.text
-	.align	5
-
-/*
- * Copy a string from user space to kernel space.
- *  r0 = dst, r1 = src, r2 = byte length
- * returns the number of characters copied (strlen of copied string),
- *  -EFAULT on exception, or "len" if we fill the whole buffer
- */
-ENTRY(__strncpy_from_user)
-	mov	ip, r1
-1:	sub.a	r2, r2, #1
-	ldrusr	r3, r1, 1, ns
-	bfs	2f
-	stb.w	r3, [r0]+, #1
-	cxor.a	r3, #0
-	bne	1b
-	sub	r1, r1, #1	@ take NUL character out of count
-2:	sub	r0, r1, ip
-	mov	pc, lr
-ENDPROC(__strncpy_from_user)
-
-	.pushsection .fixup,"ax"
-	.align	0
-9001:	mov	r3, #0
-	stb	r3, [r0+], #0	@ null terminate
-	mov	r0, #-EFAULT
-	mov	pc, lr
-	.popsection
-
diff --git a/arch/unicore32/lib/strnlen_user.S b/arch/unicore32/lib/strnlen_user.S
deleted file mode 100644
index c836b12776fe7ca2451b882de3ed1fb912ee04b3..0000000000000000000000000000000000000000
--- a/arch/unicore32/lib/strnlen_user.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/lib/strnlen_user.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
-	.text
-	.align	5
-
-/* Prototype: unsigned long __strnlen_user(const char *str, long n)
- * Purpose  : get length of a string in user memory
- * Params   : str - address of string in user memory
- * Returns  : length of string *including terminator*
- *	      or zero on exception, or n + 1 if too long
- */
-ENTRY(__strnlen_user)
-	mov	r2, r0
-1:
-	ldrusr	r3, r0, 1
-	cxor.a	r3, #0
-	beq	2f
-	sub.a	r1, r1, #1
-	bne	1b
-	add	r0, r0, #1
-2:	sub	r0, r0, r2
-	mov	pc, lr
-ENDPROC(__strnlen_user)
-
-	.pushsection .fixup,"ax"
-	.align	0
-9001:	mov	r0, #0
-	mov	pc, lr
-	.popsection
diff --git a/arch/unicore32/mm/cache-ucv2.S b/arch/unicore32/mm/cache-ucv2.S
deleted file mode 100644
index 2108837d6f4fbae9aea799bc8de170bf2f77f522..0000000000000000000000000000000000000000
--- a/arch/unicore32/mm/cache-ucv2.S
+++ /dev/null
@@ -1,209 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/mm/cache-ucv2.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- *  This is the "shell" of the UniCore-v2 processor support.
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/page.h>
-
-#include "proc-macros.S"
-
-/*
- *	__cpuc_flush_icache_all()
- *	__cpuc_flush_kern_all()
- *	__cpuc_flush_user_all()
- *
- *	Flush the entire cache.
- */
-ENTRY(__cpuc_flush_icache_all)
-	/*FALLTHROUGH*/
-ENTRY(__cpuc_flush_kern_all)
-	/*FALLTHROUGH*/
-ENTRY(__cpuc_flush_user_all)
-	mov	r0, #0
-	movc	p0.c5, r0, #14			@ Dcache flush all
-	nop8
-
-	mov	r0, #0
-	movc	p0.c5, r0, #20			@ Icache invalidate all
-	nop8
-
-	mov	pc, lr
-
-/*
- *	__cpuc_flush_user_range(start, end, flags)
- *
- *	Flush a range of TLB entries in the specified address space.
- *
- *	- start - start address (may not be aligned)
- *	- end   - end address (exclusive, may not be aligned)
- *	- flags	- vm_area_struct flags describing address space
- */
-ENTRY(__cpuc_flush_user_range)
-	cxor.a	r2, #0
-	beq	__cpuc_dma_flush_range
-
-#ifndef CONFIG_CPU_DCACHE_LINE_DISABLE
-	andn	r0, r0, #CACHE_LINESIZE - 1	@ Safety check
-	sub	r1, r1, r0
-	csub.a	r1, #MAX_AREA_SIZE
-	bsg	2f
-
-	andn	r1, r1, #CACHE_LINESIZE - 1
-	add	r1, r1, #CACHE_LINESIZE
-
-101:	dcacheline_flush	r0, r11, r12
-
-	add	r0, r0, #CACHE_LINESIZE
-	sub.a	r1, r1, #CACHE_LINESIZE
-	bns	101b
-	b	3f
-#endif
-2:	mov	ip, #0
-	movc	p0.c5, ip, #14			@ Dcache flush all
-	nop8
-
-3:	mov	ip, #0
-	movc	p0.c5, ip, #20			@ Icache invalidate all
-	nop8
-
-	mov	pc, lr
-
-/*
- *	__cpuc_coherent_kern_range(start,end)
- *	__cpuc_coherent_user_range(start,end)
- *
- *	Ensure that the I and D caches are coherent within specified
- *	region.  This is typically used when code has been written to
- *	a memory region, and will be executed.
- *
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-ENTRY(__cpuc_coherent_kern_range)
-	/* FALLTHROUGH */
-ENTRY(__cpuc_coherent_user_range)
-#ifndef CONFIG_CPU_DCACHE_LINE_DISABLE
-	andn	r0, r0, #CACHE_LINESIZE - 1	@ Safety check
-	sub	r1, r1, r0
-	csub.a	r1, #MAX_AREA_SIZE
-	bsg	2f
-
-	andn	r1, r1, #CACHE_LINESIZE - 1
-	add	r1, r1, #CACHE_LINESIZE
-
-	@ r0 va2pa r10
-	mov	r9, #PAGE_SZ
-	sub	r9, r9, #1			@ PAGE_MASK
-101:	va2pa	r0, r10, r11, r12, r13, 2f	@ r10 is PA
-	b	103f
-102:	cand.a	r0, r9
-	beq	101b
-
-103:	movc	p0.c5, r10, #11			@ Dcache clean line of R10
-	nop8
-
-	add	r0, r0, #CACHE_LINESIZE
-	add	r10, r10, #CACHE_LINESIZE
-	sub.a	r1, r1, #CACHE_LINESIZE
-	bns	102b
-	b	3f
-#endif
-2:	mov	ip, #0
-	movc	p0.c5, ip, #10			@ Dcache clean all
-	nop8
-
-3:	mov	ip, #0
-	movc	p0.c5, ip, #20			@ Icache invalidate all
-	nop8
-
-	mov	pc, lr
-
-/*
- *	__cpuc_flush_kern_dcache_area(void *addr, size_t size)
- *
- *	- addr	- kernel address
- *	- size	- region size
- */
-ENTRY(__cpuc_flush_kern_dcache_area)
-	mov	ip, #0
-	movc	p0.c5, ip, #14			@ Dcache flush all
-	nop8
-	mov	pc, lr
-
-/*
- *	__cpuc_dma_clean_range(start,end)
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-ENTRY(__cpuc_dma_clean_range)
-#ifndef CONFIG_CPU_DCACHE_LINE_DISABLE
-	andn	r0, r0, #CACHE_LINESIZE - 1
-	sub	r1, r1, r0
-	andn	r1, r1, #CACHE_LINESIZE - 1
-	add	r1, r1, #CACHE_LINESIZE
-
-	csub.a	r1, #MAX_AREA_SIZE
-	bsg	2f
-
-	@ r0 va2pa r10
-	mov	r9, #PAGE_SZ
-	sub	r9, r9, #1			@ PAGE_MASK
-101:	va2pa	r0, r10, r11, r12, r13, 2f	@ r10 is PA
-	b	1f
-102:	cand.a	r0, r9
-	beq	101b
-
-1:	movc	p0.c5, r10, #11			@ Dcache clean line of R10
-	nop8
-	add	r0, r0, #CACHE_LINESIZE
-	add	r10, r10, #CACHE_LINESIZE
-	sub.a	r1, r1, #CACHE_LINESIZE
-	bns	102b
-	mov	pc, lr
-#endif
-2:	mov	ip, #0
-	movc	p0.c5, ip, #10			@ Dcache clean all
-	nop8
-
-	mov	pc, lr
-
-/*
- *	__cpuc_dma_inv_range(start,end)
- *	__cpuc_dma_flush_range(start,end)
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-__cpuc_dma_inv_range:
-	/* FALLTHROUGH */
-ENTRY(__cpuc_dma_flush_range)
-#ifndef CONFIG_CPU_DCACHE_LINE_DISABLE
-	andn	r0, r0, #CACHE_LINESIZE - 1
-	sub	r1, r1, r0
-	andn	r1, r1, #CACHE_LINESIZE - 1
-	add	r1, r1, #CACHE_LINESIZE
-
-	csub.a	r1, #MAX_AREA_SIZE
-	bsg	2f
-
-	@ r0 va2pa r10
-101:	dcacheline_flush	r0, r11, r12
-
-	add	r0, r0, #CACHE_LINESIZE
-	sub.a	r1, r1, #CACHE_LINESIZE
-	bns	101b
-	mov	pc, lr
-#endif
-2:	mov	ip, #0
-	movc	p0.c5, ip, #14			@ Dcache flush all
-	nop8
-
-	mov	pc, lr
-
diff --git a/arch/unicore32/mm/proc-macros.S b/arch/unicore32/mm/proc-macros.S
deleted file mode 100644
index 3b0ae7d5bd80889ebd464f48e9a44ef63c7463f9..0000000000000000000000000000000000000000
--- a/arch/unicore32/mm/proc-macros.S
+++ /dev/null
@@ -1,142 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/mm/proc-macros.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- *
- * We need constants.h for:
- *  VMA_VM_MM
- *  VMA_VM_FLAGS
- *  VM_EXEC
- */
-#include <generated/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/memory.h>
-
-/*
- * the cache line sizes of the I and D cache are the same
- */
-#define CACHE_LINESIZE	32
-
-/*
- * This is the maximum size of an area which will be invalidated
- * using the single invalidate entry instructions.  Anything larger
- * than this, and we go for the whole cache.
- *
- * This value should be chosen such that we choose the cheapest
- * alternative.
- */
-#ifdef CONFIG_CPU_UCV2
-#define MAX_AREA_SIZE	0x800		/* 64 cache line */
-#endif
-
-/*
- * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
- */
-	.macro	vma_vm_mm, rd, rn
-	ldw	\rd, [\rn+], #VMA_VM_MM
-	.endm
-
-/*
- * vma_vm_flags - get vma->vm_flags
- */
-	.macro	vma_vm_flags, rd, rn
-	ldw	\rd, [\rn+], #VMA_VM_FLAGS
-	.endm
-
-	.macro	tsk_mm, rd, rn
-	ldw	\rd, [\rn+], #TI_TASK
-	ldw	\rd, [\rd+], #TSK_ACTIVE_MM
-	.endm
-
-/*
- * act_mm - get current->active_mm
- */
-	.macro	act_mm, rd
-	andn	\rd, sp, #8128
-	andn	\rd, \rd, #63
-	ldw	\rd, [\rd+], #TI_TASK
-	ldw	\rd, [\rd+], #TSK_ACTIVE_MM
-	.endm
-
-/*
- * mmid - get context id from mm pointer (mm->context.id)
- */
-	.macro	mmid, rd, rn
-	ldw	\rd, [\rn+], #MM_CONTEXT_ID
-	.endm
-
-/*
- * mask_asid - mask the ASID from the context ID
- */
-	.macro	asid, rd, rn
-	and	\rd, \rn, #255
-	.endm
-
-	.macro	crval, clear, mmuset, ucset
-	.word	\clear
-	.word	\mmuset
-	.endm
-
-#ifndef CONFIG_CPU_DCACHE_LINE_DISABLE
-/*
- * va2pa va, pa, tbl, msk, off, err
- *	This macro is used to translate virtual address to its physical address.
- *
- *	va: virtual address
- *	pa: physical address, result is stored in this register
- *	tbl, msk, off:	temp registers, will be destroyed
- *	err: jump to error label if the physical address not exist
- * NOTE: all regs must be different
- */
-	.macro	va2pa, va, pa, tbl, msk, off, err=990f
-	movc	\pa, p0.c2, #0
-	mov	\off, \va >> #22		@ off <- index of 1st page table
-	adr	\tbl, 910f			@ tbl <- table of 1st page table
-900:						@ ---- handle 1, 2 page table
-	add	\pa, \pa, #PAGE_OFFSET		@ pa <- virt addr of page table
-	ldw	\pa, [\pa+], \off << #2		@ pa <- the content of pt
-	cand.a	\pa, #4				@ test exist bit
-	beq	\err				@ if not exist
-	and	\off, \pa, #3			@ off <- the last 2 bits
-	add	\tbl, \tbl, \off << #3		@ cmove table pointer
-	ldw	\msk, [\tbl+], #0		@ get the mask
-	ldw	pc, [\tbl+], #4
-930:						@ ---- handle 2nd page table
-	and	\pa, \pa, \msk			@ pa <- phys addr of 2nd pt
-	mov	\off, \va << #10
-	cntlo	\tbl, \msk			@ use tbl as temp reg
-	mov	\off, \off >> \tbl
-	mov	\off, \off >> #2		@ off <- index of 2nd pt
-	adr	\tbl, 920f			@ tbl <- table of 2nd pt
-	b	900b
-910:						@ 1st level page table
-	.word	0xfffff000, 930b		@ second level page table
-	.word	0xfffffc00, 930b		@ second level large page table
-	.word	0x00000000, \err		@ invalid
-	.word	0xffc00000, 980f		@ super page
-
-920:						@ 2nd level page table
-	.word	0xfffff000, 980f		@ page
-	.word	0xffffc000, 980f		@ middle page
-	.word	0xffff0000, 980f		@ large page
-	.word	0x00000000, \err		@ invalid
-980:
-	andn	\tbl, \va, \msk
-	and	\pa, \pa, \msk
-	or	\pa, \pa, \tbl
-990:
-	.endm
-#endif
-
-	.macro dcacheline_flush, addr, t1, t2
-	mov	\t1, \addr << #20
-	ldw	\t2, =_stext			@ _stext must ALIGN(4096)
-	add	\t2, \t2, \t1 >> #20
-	ldw	\t1, [\t2+], #0x0000
-	ldw	\t1, [\t2+], #0x1000
-	ldw	\t1, [\t2+], #0x2000
-	ldw	\t1, [\t2+], #0x3000
-	.endm
diff --git a/arch/unicore32/mm/proc-ucv2.S b/arch/unicore32/mm/proc-ucv2.S
deleted file mode 100644
index 8cc9a1b16d608179c94d2ed8417e7115770eecab..0000000000000000000000000000000000000000
--- a/arch/unicore32/mm/proc-ucv2.S
+++ /dev/null
@@ -1,131 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/mm/proc-ucv2.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
-#include <asm/pgtable.h>
-
-#include "proc-macros.S"
-
-ENTRY(cpu_proc_fin)
-	stm.w	(lr), [sp-]
-	mov	ip, #PSR_R_BIT | PSR_I_BIT | PRIV_MODE
-	mov.a	asr, ip
-	b.l	__cpuc_flush_kern_all
-	ldm.w	(pc), [sp]+
-
-/*
- *	cpu_reset(loc)
- *
- *	Perform a soft reset of the system.  Put the CPU into the
- *	same state as it would be if it had been reset, and branch
- *	to what would be the reset vector.
- *
- *	- loc   - location to jump to for soft reset
- */
-	.align	5
-ENTRY(cpu_reset)
-	mov	ip, #0
-	movc	p0.c5, ip, #28			@ Cache invalidate all
-	nop8
-
-	movc	p0.c6, ip, #6			@ TLB invalidate all
-	nop8
-
-	movc	ip, p0.c1, #0			@ ctrl register
-	or	ip, ip, #0x2000			@ vector base address
-	andn	ip, ip, #0x000f			@ ............idam
-	movc	p0.c1, ip, #0			@ disable caches and mmu
-	nop
-	mov	pc, r0				@ jump to loc
-	nop8
-
-/*
- *	cpu_do_idle()
- *
- *	Idle the processor (eg, wait for interrupt).
- *
- *	IRQs are already disabled.
- */
-ENTRY(cpu_do_idle)
-	mov	r0, #0				@ PCI address
-	.rept	8
-	ldw	r1, [r0]
-	.endr
-	mov	pc, lr
-
-ENTRY(cpu_dcache_clean_area)
-#ifndef CONFIG_CPU_DCACHE_LINE_DISABLE
-	csub.a	r1, #MAX_AREA_SIZE
-	bsg	101f
-	mov	r9, #PAGE_SZ
-	sub	r9, r9, #1			@ PAGE_MASK
-1:	va2pa	r0, r10, r11, r12, r13		@ r10 is PA
-	b	3f
-2:	cand.a	r0, r9
-	beq	1b
-3:	movc	p0.c5, r10, #11			@ clean D entry
-	nop8
-	add	r0, r0, #CACHE_LINESIZE
-	add	r10, r10, #CACHE_LINESIZE
-	sub.a	r1, r1, #CACHE_LINESIZE
-	bua	2b
-	mov	pc, lr
-#endif
-101:	mov	ip, #0
-	movc	p0.c5, ip, #10			@ Dcache clean all
-	nop8
-
-	mov	pc, lr
-
-/*
- *	cpu_do_switch_mm(pgd_phys)
- *
- *	Set the translation table base pointer to be pgd_phys
- *
- *	- pgd_phys - physical address of new pgd
- *
- *	It is assumed that:
- *	- we are not using split page tables
- */
-	.align	5
-ENTRY(cpu_do_switch_mm)
-	movc	p0.c2, r0, #0			@ update page table ptr
-	nop8
-
-	movc	p0.c6, ip, #6			@ TLB invalidate all
-	nop8
-
-	mov	pc, lr
-
-/*
- *	cpu_set_pte(ptep, pte)
- *
- *	Set a level 2 translation table entry.
- *
- *	- ptep  - pointer to level 2 translation table entry
- *	- pte   - PTE value to store
- */
-	.align	5
-ENTRY(cpu_set_pte)
-	stw	r1, [r0]
-#ifndef CONFIG_CPU_DCACHE_LINE_DISABLE
-	sub	r2, r0, #PAGE_OFFSET
-	movc	p0.c5, r2, #11				@ Dcache clean line
-	nop8
-#else
-	mov	ip, #0
-	movc	p0.c5, ip, #10				@ Dcache clean all
-	nop8
-	@dcacheline_flush	r0, r2, ip
-#endif
-	mov	pc, lr
-
diff --git a/arch/unicore32/mm/tlb-ucv2.S b/arch/unicore32/mm/tlb-ucv2.S
deleted file mode 100644
index 0ce9c6b6f1dbf4cc5261e8cd17fedcc1f8a3e67c..0000000000000000000000000000000000000000
--- a/arch/unicore32/mm/tlb-ucv2.S
+++ /dev/null
@@ -1,86 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/arch/unicore32/mm/tlb-ucv2.S
- *
- * Code specific to PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2010 GUAN Xue-tao
- */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/page.h>
-#include <asm/tlbflush.h>
-#include "proc-macros.S"
-
-/*
- *	__cpu_flush_user_tlb_range(start, end, vma)
- *
- *	Invalidate a range of TLB entries in the specified address space.
- *
- *	- start - start address (may not be aligned)
- *	- end   - end address (exclusive, may not be aligned)
- *	- vma   - vma_struct describing address range
- */
-ENTRY(__cpu_flush_user_tlb_range)
-#ifndef	CONFIG_CPU_TLB_SINGLE_ENTRY_DISABLE
-	mov	r0, r0 >> #PAGE_SHIFT		@ align address
-	mov	r0, r0 << #PAGE_SHIFT
-	vma_vm_flags r2, r2			@ get vma->vm_flags
-1:
-	movc	p0.c6, r0, #3
-	nop8
-
-	cand.a	r2, #VM_EXEC			@ Executable area ?
-	beq	2f
-
-	movc	p0.c6, r0, #5
-	nop8
-2:
-	add	r0, r0, #PAGE_SZ
-	csub.a	r0, r1
-	beb	1b
-#else
-	movc	p0.c6, r0, #2
-	nop8
-
-	cand.a	r2, #VM_EXEC			@ Executable area ?
-	beq	2f
-
-	movc	p0.c6, r0, #4
-	nop8
-2:
-#endif
-	mov	pc, lr
-
-/*
- *	__cpu_flush_kern_tlb_range(start,end)
- *
- *	Invalidate a range of kernel TLB entries
- *
- *	- start - start address (may not be aligned)
- *	- end   - end address (exclusive, may not be aligned)
- */
-ENTRY(__cpu_flush_kern_tlb_range)
-#ifndef	CONFIG_CPU_TLB_SINGLE_ENTRY_DISABLE
-	mov	r0, r0 >> #PAGE_SHIFT		@ align address
-	mov	r0, r0 << #PAGE_SHIFT
-1:
-	movc	p0.c6, r0, #3
-	nop8
-
-	movc	p0.c6, r0, #5
-	nop8
-
-	add	r0, r0, #PAGE_SZ
-	csub.a	r0, r1
-	beb	1b
-#else
-	movc	p0.c6, r0, #2
-	nop8
-
-	movc	p0.c6, r0, #4
-	nop8
-#endif
-	mov	pc, lr
-
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
deleted file mode 100644
index 5521ea12f44e068281ef99c309a5f6ebf736ff71..0000000000000000000000000000000000000000
--- a/arch/x86/boot/bioscall.S
+++ /dev/null
@@ -1,79 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -----------------------------------------------------------------------
- *
- *   Copyright 2009-2014 Intel Corporation; author H. Peter Anvin
- *
- * ----------------------------------------------------------------------- */
-
-/*
- * "Glove box" for BIOS calls.  Avoids the constant problems with BIOSes
- * touching registers they shouldn't be.
- */
-
-	.code16
-	.section ".inittext","ax"
-	.globl	intcall
-	.type	intcall, @function
-intcall:
-	/* Self-modify the INT instruction.  Ugly, but works. */
-	cmpb	%al, 3f
-	je	1f
-	movb	%al, 3f
-	jmp	1f		/* Synchronize pipeline */
-1:
-	/* Save state */
-	pushfl
-	pushw	%fs
-	pushw	%gs
-	pushal
-
-	/* Copy input state to stack frame */
-	subw	$44, %sp
-	movw	%dx, %si
-	movw	%sp, %di
-	movw	$11, %cx
-	rep; movsd
-
-	/* Pop full state from the stack */
-	popal
-	popw	%gs
-	popw	%fs
-	popw	%es
-	popw	%ds
-	popfl
-
-	/* Actual INT */
-	.byte	0xcd		/* INT opcode */
-3:	.byte	0
-
-	/* Push full state to the stack */
-	pushfl
-	pushw	%ds
-	pushw	%es
-	pushw	%fs
-	pushw	%gs
-	pushal
-
-	/* Re-establish C environment invariants */
-	cld
-	movzwl	%sp, %esp
-	movw	%cs, %ax
-	movw	%ax, %ds
-	movw	%ax, %es
-
-	/* Copy output state from stack frame */
-	movw	68(%esp), %di	/* Original %cx == 3rd argument */
-	andw	%di, %di
-	jz	4f
-	movw	%sp, %si
-	movw	$11, %cx
-	rep; movsd
-4:	addw	$44, %sp
-
-	/* Restore state and return */
-	popal
-	popw	%gs
-	popw	%fs
-	popfl
-	retl
-	.size	intcall, .-intcall
diff --git a/arch/x86/boot/compressed/efi_stub_32.S b/arch/x86/boot/compressed/efi_stub_32.S
deleted file mode 100644
index 257e341fd2c808021a0e97462cb77ce4476f89ca..0000000000000000000000000000000000000000
--- a/arch/x86/boot/compressed/efi_stub_32.S
+++ /dev/null
@@ -1,87 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * EFI call stub for IA32.
- *
- * This stub allows us to make EFI calls in physical mode with interrupts
- * turned off. Note that this implementation is different from the one in
- * arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical
- * mode at this point.
- */
-
-#include <linux/linkage.h>
-#include <asm/page_types.h>
-
-/*
- * efi_call_phys(void *, ...) is a function with variable parameters.
- * All the callers of this function assure that all the parameters are 4-bytes.
- */
-
-/*
- * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
- * So we'd better save all of them at the beginning of this function and restore
- * at the end no matter how many we use, because we can not assure EFI runtime
- * service functions will comply with gcc calling convention, too.
- */
-
-.text
-ENTRY(efi_call_phys)
-	/*
-	 * 0. The function can only be called in Linux kernel. So CS has been
-	 * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
-	 * the values of these registers are the same. And, the corresponding
-	 * GDT entries are identical. So I will do nothing about segment reg
-	 * and GDT, but change GDT base register in prelog and epilog.
-	 */
-
-	/*
-	 * 1. Because we haven't been relocated by this point we need to
-	 * use relative addressing.
-	 */
-	call	1f
-1:	popl	%edx
-	subl	$1b, %edx
-
-	/*
-	 * 2. Now on the top of stack is the return
-	 * address in the caller of efi_call_phys(), then parameter 1,
-	 * parameter 2, ..., param n. To make things easy, we save the return
-	 * address of efi_call_phys in a global variable.
-	 */
-	popl	%ecx
-	movl	%ecx, saved_return_addr(%edx)
-	/* get the function pointer into ECX*/
-	popl	%ecx
-	movl	%ecx, efi_rt_function_ptr(%edx)
-
-	/*
-	 * 3. Call the physical function.
-	 */
-	call	*%ecx
-
-	/*
-	 * 4. Balance the stack. And because EAX contain the return value,
-	 * we'd better not clobber it. We need to calculate our address
-	 * again because %ecx and %edx are not preserved across EFI function
-	 * calls.
-	 */
-	call	1f
-1:	popl	%edx
-	subl	$1b, %edx
-
-	movl	efi_rt_function_ptr(%edx), %ecx
-	pushl	%ecx
-
-	/*
-	 * 10. Push the saved return address onto the stack and return.
-	 */
-	movl	saved_return_addr(%edx), %ecx
-	pushl	%ecx
-	ret
-ENDPROC(efi_call_phys)
-.previous
-
-.data
-saved_return_addr:
-	.long 0
-efi_rt_function_ptr:
-	.long 0
diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S
deleted file mode 100644
index 99494dff2113e5ac63bcc77aaa58bb01b0506fbb..0000000000000000000000000000000000000000
--- a/arch/x86/boot/compressed/efi_stub_64.S
+++ /dev/null
@@ -1,5 +0,0 @@
-#include <asm/segment.h>
-#include <asm/msr.h>
-#include <asm/processor-flags.h>
-
-#include "../../platform/efi/efi_stub_64.S"
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
deleted file mode 100644
index bff9ab7c6317af218c99bbf262f8a089865c8daa..0000000000000000000000000000000000000000
--- a/arch/x86/boot/compressed/efi_thunk_64.S
+++ /dev/null
@@ -1,197 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming
- *
- * Early support for invoking 32-bit EFI services from a 64-bit kernel.
- *
- * Because this thunking occurs before ExitBootServices() we have to
- * restore the firmware's 32-bit GDT before we make EFI serivce calls,
- * since the firmware's 32-bit IDT is still currently installed and it
- * needs to be able to service interrupts.
- *
- * On the plus side, we don't have to worry about mangling 64-bit
- * addresses into 32-bits because we're executing with an identify
- * mapped pagetable and haven't transitioned to 64-bit virtual addresses
- * yet.
- */
-
-#include <linux/linkage.h>
-#include <asm/msr.h>
-#include <asm/page_types.h>
-#include <asm/processor-flags.h>
-#include <asm/segment.h>
-
-	.code64
-	.text
-ENTRY(efi64_thunk)
-	push	%rbp
-	push	%rbx
-
-	subq	$8, %rsp
-	leaq	efi_exit32(%rip), %rax
-	movl	%eax, 4(%rsp)
-	leaq	efi_gdt64(%rip), %rax
-	movl	%eax, (%rsp)
-	movl	%eax, 2(%rax)		/* Fixup the gdt base address */
-
-	movl	%ds, %eax
-	push	%rax
-	movl	%es, %eax
-	push	%rax
-	movl	%ss, %eax
-	push	%rax
-
-	/*
-	 * Convert x86-64 ABI params to i386 ABI
-	 */
-	subq	$32, %rsp
-	movl	%esi, 0x0(%rsp)
-	movl	%edx, 0x4(%rsp)
-	movl	%ecx, 0x8(%rsp)
-	movq	%r8, %rsi
-	movl	%esi, 0xc(%rsp)
-	movq	%r9, %rsi
-	movl	%esi,  0x10(%rsp)
-
-	sgdt	save_gdt(%rip)
-
-	leaq	1f(%rip), %rbx
-	movq	%rbx, func_rt_ptr(%rip)
-
-	/*
-	 * Switch to gdt with 32-bit segments. This is the firmware GDT
-	 * that was installed when the kernel started executing. This
-	 * pointer was saved at the EFI stub entry point in head_64.S.
-	 */
-	leaq	efi32_boot_gdt(%rip), %rax
-	lgdt	(%rax)
-
-	pushq	$__KERNEL_CS
-	leaq	efi_enter32(%rip), %rax
-	pushq	%rax
-	lretq
-
-1:	addq	$32, %rsp
-
-	lgdt	save_gdt(%rip)
-
-	pop	%rbx
-	movl	%ebx, %ss
-	pop	%rbx
-	movl	%ebx, %es
-	pop	%rbx
-	movl	%ebx, %ds
-
-	/*
-	 * Convert 32-bit status code into 64-bit.
-	 */
-	test	%rax, %rax
-	jz	1f
-	movl	%eax, %ecx
-	andl	$0x0fffffff, %ecx
-	andl	$0xf0000000, %eax
-	shl	$32, %rax
-	or	%rcx, %rax
-1:
-	addq	$8, %rsp
-	pop	%rbx
-	pop	%rbp
-	ret
-ENDPROC(efi64_thunk)
-
-ENTRY(efi_exit32)
-	movq	func_rt_ptr(%rip), %rax
-	push	%rax
-	mov	%rdi, %rax
-	ret
-ENDPROC(efi_exit32)
-
-	.code32
-/*
- * EFI service pointer must be in %edi.
- *
- * The stack should represent the 32-bit calling convention.
- */
-ENTRY(efi_enter32)
-	movl	$__KERNEL_DS, %eax
-	movl	%eax, %ds
-	movl	%eax, %es
-	movl	%eax, %ss
-
-	/* Reload pgtables */
-	movl	%cr3, %eax
-	movl	%eax, %cr3
-
-	/* Disable paging */
-	movl	%cr0, %eax
-	btrl	$X86_CR0_PG_BIT, %eax
-	movl	%eax, %cr0
-
-	/* Disable long mode via EFER */
-	movl	$MSR_EFER, %ecx
-	rdmsr
-	btrl	$_EFER_LME, %eax
-	wrmsr
-
-	call	*%edi
-
-	/* We must preserve return value */
-	movl	%eax, %edi
-
-	/*
-	 * Some firmware will return with interrupts enabled. Be sure to
-	 * disable them before we switch GDTs.
-	 */
-	cli
-
-	movl	56(%esp), %eax
-	movl	%eax, 2(%eax)
-	lgdtl	(%eax)
-
-	movl	%cr4, %eax
-	btsl	$(X86_CR4_PAE_BIT), %eax
-	movl	%eax, %cr4
-
-	movl	%cr3, %eax
-	movl	%eax, %cr3
-
-	movl	$MSR_EFER, %ecx
-	rdmsr
-	btsl	$_EFER_LME, %eax
-	wrmsr
-
-	xorl	%eax, %eax
-	lldt	%ax
-
-	movl	60(%esp), %eax
-	pushl	$__KERNEL_CS
-	pushl	%eax
-
-	/* Enable paging */
-	movl	%cr0, %eax
-	btsl	$X86_CR0_PG_BIT, %eax
-	movl	%eax, %cr0
-	lret
-ENDPROC(efi_enter32)
-
-	.data
-	.balign	8
-	.global	efi32_boot_gdt
-efi32_boot_gdt:	.word	0
-		.quad	0
-
-save_gdt:	.word	0
-		.quad	0
-func_rt_ptr:	.quad	0
-
-	.global efi_gdt64
-efi_gdt64:
-	.word	efi_gdt64_end - efi_gdt64
-	.long	0			/* Filled out by user */
-	.word	0
-	.quad	0x0000000000000000	/* NULL descriptor */
-	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
-	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
-	.quad	0x0080890000000000	/* TS descriptor */
-	.quad   0x0000000000000000	/* TS continued */
-efi_gdt64_end:
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
deleted file mode 100644
index d7c0fcc1dbf9e74e333ae39117b2913ef7eb7d67..0000000000000000000000000000000000000000
--- a/arch/x86/boot/compressed/head_32.S
+++ /dev/null
@@ -1,283 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  linux/boot/head.S
- *
- *  Copyright (C) 1991, 1992, 1993  Linus Torvalds
- */
-
-/*
- *  head.S contains the 32-bit startup code.
- *
- * NOTE!!! Startup happens at absolute address 0x00001000, which is also where
- * the page directory will exist. The startup code will be overwritten by
- * the page directory. [According to comments etc elsewhere on a compressed
- * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
- *
- * Page 0 is deliberately kept safe, since System Management Mode code in
- * laptops may need to access the BIOS data stored there.  This is also
- * useful for future device drivers that either access the BIOS via VM86
- * mode.
- */
-
-/*
- * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
- */
-	.text
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/page_types.h>
-#include <asm/boot.h>
-#include <asm/asm-offsets.h>
-#include <asm/bootparam.h>
-
-/*
- * The 32-bit x86 assembler in binutils 2.26 will generate R_386_GOT32X
- * relocation to get the symbol address in PIC.  When the compressed x86
- * kernel isn't built as PIC, the linker optimizes R_386_GOT32X
- * relocations to their fixed symbol addresses.  However, when the
- * compressed x86 kernel is loaded at a different address, it leads
- * to the following load failure:
- *
- *   Failed to allocate space for phdrs
- *
- * during the decompression stage.
- *
- * If the compressed x86 kernel is relocatable at run-time, it should be
- * compiled with -fPIE, instead of -fPIC, if possible and should be built as
- * Position Independent Executable (PIE) so that linker won't optimize
- * R_386_GOT32X relocation to its fixed symbol address.  Older
- * linkers generate R_386_32 relocations against locally defined symbols,
- * _bss, _ebss, _got, _egot and _end, in PIE.  It isn't wrong, just less
- * optimal than R_386_RELATIVE.  But the x86 kernel fails to properly handle
- * R_386_32 relocations when relocating the kernel.  To generate
- * R_386_RELATIVE relocations, we mark _bss, _ebss, _got, _egot and _end as
- * hidden:
- */
-	.hidden _bss
-	.hidden _ebss
-	.hidden _got
-	.hidden _egot
-	.hidden _end
-
-	__HEAD
-ENTRY(startup_32)
-	cld
-	/*
-	 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
-	 * us to not reload segments
-	 */
-	testb	$KEEP_SEGMENTS, BP_loadflags(%esi)
-	jnz	1f
-
-	cli
-	movl	$__BOOT_DS, %eax
-	movl	%eax, %ds
-	movl	%eax, %es
-	movl	%eax, %fs
-	movl	%eax, %gs
-	movl	%eax, %ss
-1:
-
-/*
- * Calculate the delta between where we were compiled to run
- * at and where we were actually loaded at.  This can only be done
- * with a short local call on x86.  Nothing  else will tell us what
- * address we are running at.  The reserved chunk of the real-mode
- * data at 0x1e4 (defined as a scratch field) are used as the stack
- * for this calculation. Only 4 bytes are needed.
- */
-	leal	(BP_scratch+4)(%esi), %esp
-	call	1f
-1:	popl	%ebp
-	subl	$1b, %ebp
-
-/*
- * %ebp contains the address we are loaded at by the boot loader and %ebx
- * contains the address where we should move the kernel image temporarily
- * for safe in-place decompression.
- */
-
-#ifdef CONFIG_RELOCATABLE
-	movl	%ebp, %ebx
-	movl	BP_kernel_alignment(%esi), %eax
-	decl	%eax
-	addl    %eax, %ebx
-	notl	%eax
-	andl    %eax, %ebx
-	cmpl	$LOAD_PHYSICAL_ADDR, %ebx
-	jae	1f
-#endif
-	movl	$LOAD_PHYSICAL_ADDR, %ebx
-1:
-
-	/* Target address to relocate to for decompression */
-	movl    BP_init_size(%esi), %eax
-	subl    $_end, %eax
-	addl    %eax, %ebx
-
-	/* Set up the stack */
-	leal	boot_stack_end(%ebx), %esp
-
-	/* Zero EFLAGS */
-	pushl	$0
-	popfl
-
-/*
- * Copy the compressed kernel to the end of our buffer
- * where decompression in place becomes safe.
- */
-	pushl	%esi
-	leal	(_bss-4)(%ebp), %esi
-	leal	(_bss-4)(%ebx), %edi
-	movl	$(_bss - startup_32), %ecx
-	shrl	$2, %ecx
-	std
-	rep	movsl
-	cld
-	popl	%esi
-
-/*
- * Jump to the relocated address.
- */
-	leal	.Lrelocated(%ebx), %eax
-	jmp	*%eax
-ENDPROC(startup_32)
-
-#ifdef CONFIG_EFI_STUB
-/*
- * We don't need the return address, so set up the stack so efi_main() can find
- * its arguments.
- */
-ENTRY(efi_pe_entry)
-	add	$0x4, %esp
-
-	call	1f
-1:	popl	%esi
-	subl	$1b, %esi
-
-	popl	%ecx
-	movl	%ecx, efi32_config(%esi)	/* Handle */
-	popl	%ecx
-	movl	%ecx, efi32_config+8(%esi)	/* EFI System table pointer */
-
-	/* Relocate efi_config->call() */
-	leal	efi32_config(%esi), %eax
-	add	%esi, 40(%eax)
-	pushl	%eax
-
-	call	make_boot_params
-	cmpl	$0, %eax
-	je	fail
-	movl	%esi, BP_code32_start(%eax)
-	popl	%ecx
-	pushl	%eax
-	pushl	%ecx
-	jmp	2f		/* Skip efi_config initialization */
-ENDPROC(efi_pe_entry)
-
-ENTRY(efi32_stub_entry)
-	add	$0x4, %esp
-	popl	%ecx
-	popl	%edx
-
-	call	1f
-1:	popl	%esi
-	subl	$1b, %esi
-
-	movl	%ecx, efi32_config(%esi)	/* Handle */
-	movl	%edx, efi32_config+8(%esi)	/* EFI System table pointer */
-
-	/* Relocate efi_config->call() */
-	leal	efi32_config(%esi), %eax
-	add	%esi, 40(%eax)
-	pushl	%eax
-2:
-	call	efi_main
-	cmpl	$0, %eax
-	movl	%eax, %esi
-	jne	2f
-fail:
-	/* EFI init failed, so hang. */
-	hlt
-	jmp	fail
-2:
-	movl	BP_code32_start(%esi), %eax
-	leal	startup_32(%eax), %eax
-	jmp	*%eax
-ENDPROC(efi32_stub_entry)
-#endif
-
-	.text
-.Lrelocated:
-
-/*
- * Clear BSS (stack is currently empty)
- */
-	xorl	%eax, %eax
-	leal	_bss(%ebx), %edi
-	leal	_ebss(%ebx), %ecx
-	subl	%edi, %ecx
-	shrl	$2, %ecx
-	rep	stosl
-
-/*
- * Adjust our own GOT
- */
-	leal	_got(%ebx), %edx
-	leal	_egot(%ebx), %ecx
-1:
-	cmpl	%ecx, %edx
-	jae	2f
-	addl	%ebx, (%edx)
-	addl	$4, %edx
-	jmp	1b
-2:
-
-/*
- * Do the extraction, and jump to the new kernel..
- */
-				/* push arguments for extract_kernel: */
-	pushl	$z_output_len	/* decompressed length, end of relocs */
-
-	movl    BP_init_size(%esi), %eax
-	subl    $_end, %eax
-	movl    %ebx, %ebp
-	subl    %eax, %ebp
-	pushl	%ebp		/* output address */
-
-	pushl	$z_input_len	/* input_len */
-	leal	input_data(%ebx), %eax
-	pushl	%eax		/* input_data */
-	leal	boot_heap(%ebx), %eax
-	pushl	%eax		/* heap area */
-	pushl	%esi		/* real mode pointer */
-	call	extract_kernel	/* returns kernel location in %eax */
-	addl	$24, %esp
-
-/*
- * Jump to the extracted kernel.
- */
-	xorl	%ebx, %ebx
-	jmp	*%eax
-
-#ifdef CONFIG_EFI_STUB
-	.data
-efi32_config:
-	.fill 5,8,0
-	.long efi_call_phys
-	.long 0
-	.byte 0
-#endif
-
-/*
- * Stack and heap for uncompression
- */
-	.bss
-	.balign 4
-boot_heap:
-	.fill BOOT_HEAP_SIZE, 1, 0
-boot_stack:
-	.fill BOOT_STACK_SIZE, 1, 0
-boot_stack_end:
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
deleted file mode 100644
index 50c9eeb36f0d8a8cb54b4862909497bca6e0198d..0000000000000000000000000000000000000000
--- a/arch/x86/boot/compressed/head_64.S
+++ /dev/null
@@ -1,723 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  linux/boot/head.S
- *
- *  Copyright (C) 1991, 1992, 1993  Linus Torvalds
- */
-
-/*
- *  head.S contains the 32-bit startup code.
- *
- * NOTE!!! Startup happens at absolute address 0x00001000, which is also where
- * the page directory will exist. The startup code will be overwritten by
- * the page directory. [According to comments etc elsewhere on a compressed
- * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
- *
- * Page 0 is deliberately kept safe, since System Management Mode code in 
- * laptops may need to access the BIOS data stored there.  This is also
- * useful for future device drivers that either access the BIOS via VM86 
- * mode.
- */
-
-/*
- * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
- */
-	.code32
-	.text
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/boot.h>
-#include <asm/msr.h>
-#include <asm/processor-flags.h>
-#include <asm/asm-offsets.h>
-#include <asm/bootparam.h>
-#include "pgtable.h"
-
-/*
- * Locally defined symbols should be marked hidden:
- */
-	.hidden _bss
-	.hidden _ebss
-	.hidden _got
-	.hidden _egot
-	.hidden _end
-
-	__HEAD
-	.code32
-ENTRY(startup_32)
-	/*
-	 * 32bit entry is 0 and it is ABI so immutable!
-	 * If we come here directly from a bootloader,
-	 * kernel(text+data+bss+brk) ramdisk, zero_page, command line
-	 * all need to be under the 4G limit.
-	 */
-	cld
-	/*
-	 * Test KEEP_SEGMENTS flag to see if the bootloader is asking
-	 * us to not reload segments
-	 */
-	testb $KEEP_SEGMENTS, BP_loadflags(%esi)
-	jnz 1f
-
-	cli
-	movl	$(__BOOT_DS), %eax
-	movl	%eax, %ds
-	movl	%eax, %es
-	movl	%eax, %ss
-1:
-
-/*
- * Calculate the delta between where we were compiled to run
- * at and where we were actually loaded at.  This can only be done
- * with a short local call on x86.  Nothing  else will tell us what
- * address we are running at.  The reserved chunk of the real-mode
- * data at 0x1e4 (defined as a scratch field) are used as the stack
- * for this calculation. Only 4 bytes are needed.
- */
-	leal	(BP_scratch+4)(%esi), %esp
-	call	1f
-1:	popl	%ebp
-	subl	$1b, %ebp
-
-/* setup a stack and make sure cpu supports long mode. */
-	movl	$boot_stack_end, %eax
-	addl	%ebp, %eax
-	movl	%eax, %esp
-
-	call	verify_cpu
-	testl	%eax, %eax
-	jnz	.Lno_longmode
-
-/*
- * Compute the delta between where we were compiled to run at
- * and where the code will actually run at.
- *
- * %ebp contains the address we are loaded at by the boot loader and %ebx
- * contains the address where we should move the kernel image temporarily
- * for safe in-place decompression.
- */
-
-#ifdef CONFIG_RELOCATABLE
-	movl	%ebp, %ebx
-	movl	BP_kernel_alignment(%esi), %eax
-	decl	%eax
-	addl	%eax, %ebx
-	notl	%eax
-	andl	%eax, %ebx
-	cmpl	$LOAD_PHYSICAL_ADDR, %ebx
-	jae	1f
-#endif
-	movl	$LOAD_PHYSICAL_ADDR, %ebx
-1:
-
-	/* Target address to relocate to for decompression */
-	movl	BP_init_size(%esi), %eax
-	subl	$_end, %eax
-	addl	%eax, %ebx
-
-/*
- * Prepare for entering 64 bit mode
- */
-
-	/* Load new GDT with the 64bit segments using 32bit descriptor */
-	addl	%ebp, gdt+2(%ebp)
-	lgdt	gdt(%ebp)
-
-	/* Enable PAE mode */
-	movl	%cr4, %eax
-	orl	$X86_CR4_PAE, %eax
-	movl	%eax, %cr4
-
- /*
-  * Build early 4G boot pagetable
-  */
-	/*
-	 * If SEV is active then set the encryption mask in the page tables.
-	 * This will insure that when the kernel is copied and decompressed
-	 * it will be done so encrypted.
-	 */
-	call	get_sev_encryption_bit
-	xorl	%edx, %edx
-	testl	%eax, %eax
-	jz	1f
-	subl	$32, %eax	/* Encryption bit is always above bit 31 */
-	bts	%eax, %edx	/* Set encryption mask for page tables */
-1:
-
-	/* Initialize Page tables to 0 */
-	leal	pgtable(%ebx), %edi
-	xorl	%eax, %eax
-	movl	$(BOOT_INIT_PGT_SIZE/4), %ecx
-	rep	stosl
-
-	/* Build Level 4 */
-	leal	pgtable + 0(%ebx), %edi
-	leal	0x1007 (%edi), %eax
-	movl	%eax, 0(%edi)
-	addl	%edx, 4(%edi)
-
-	/* Build Level 3 */
-	leal	pgtable + 0x1000(%ebx), %edi
-	leal	0x1007(%edi), %eax
-	movl	$4, %ecx
-1:	movl	%eax, 0x00(%edi)
-	addl	%edx, 0x04(%edi)
-	addl	$0x00001000, %eax
-	addl	$8, %edi
-	decl	%ecx
-	jnz	1b
-
-	/* Build Level 2 */
-	leal	pgtable + 0x2000(%ebx), %edi
-	movl	$0x00000183, %eax
-	movl	$2048, %ecx
-1:	movl	%eax, 0(%edi)
-	addl	%edx, 4(%edi)
-	addl	$0x00200000, %eax
-	addl	$8, %edi
-	decl	%ecx
-	jnz	1b
-
-	/* Enable the boot page tables */
-	leal	pgtable(%ebx), %eax
-	movl	%eax, %cr3
-
-	/* Enable Long mode in EFER (Extended Feature Enable Register) */
-	movl	$MSR_EFER, %ecx
-	rdmsr
-	btsl	$_EFER_LME, %eax
-	wrmsr
-
-	/* After gdt is loaded */
-	xorl	%eax, %eax
-	lldt	%ax
-	movl    $__BOOT_TSS, %eax
-	ltr	%ax
-
-	/*
-	 * Setup for the jump to 64bit mode
-	 *
-	 * When the jump is performend we will be in long mode but
-	 * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
-	 * (and in turn EFER.LMA = 1).	To jump into 64bit mode we use
-	 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
-	 * We place all of the values on our mini stack so lret can
-	 * used to perform that far jump.
-	 */
-	pushl	$__KERNEL_CS
-	leal	startup_64(%ebp), %eax
-#ifdef CONFIG_EFI_MIXED
-	movl	efi32_config(%ebp), %ebx
-	cmp	$0, %ebx
-	jz	1f
-	leal	handover_entry(%ebp), %eax
-1:
-#endif
-	pushl	%eax
-
-	/* Enter paged protected Mode, activating Long Mode */
-	movl	$(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */
-	movl	%eax, %cr0
-
-	/* Jump from 32bit compatibility mode into 64bit mode. */
-	lret
-ENDPROC(startup_32)
-
-#ifdef CONFIG_EFI_MIXED
-	.org 0x190
-ENTRY(efi32_stub_entry)
-	add	$0x4, %esp		/* Discard return address */
-	popl	%ecx
-	popl	%edx
-	popl	%esi
-
-	leal	(BP_scratch+4)(%esi), %esp
-	call	1f
-1:	pop	%ebp
-	subl	$1b, %ebp
-
-	movl	%ecx, efi32_config(%ebp)
-	movl	%edx, efi32_config+8(%ebp)
-	sgdtl	efi32_boot_gdt(%ebp)
-
-	leal	efi32_config(%ebp), %eax
-	movl	%eax, efi_config(%ebp)
-
-	/* Disable paging */
-	movl	%cr0, %eax
-	btrl	$X86_CR0_PG_BIT, %eax
-	movl	%eax, %cr0
-
-	jmp	startup_32
-ENDPROC(efi32_stub_entry)
-#endif
-
-	.code64
-	.org 0x200
-ENTRY(startup_64)
-	/*
-	 * 64bit entry is 0x200 and it is ABI so immutable!
-	 * We come here either from startup_32 or directly from a
-	 * 64bit bootloader.
-	 * If we come here from a bootloader, kernel(text+data+bss+brk),
-	 * ramdisk, zero_page, command line could be above 4G.
-	 * We depend on an identity mapped page table being provided
-	 * that maps our entire kernel(text+data+bss+brk), zero page
-	 * and command line.
-	 */
-
-	/* Setup data segments. */
-	xorl	%eax, %eax
-	movl	%eax, %ds
-	movl	%eax, %es
-	movl	%eax, %ss
-	movl	%eax, %fs
-	movl	%eax, %gs
-
-	/*
-	 * Compute the decompressed kernel start address.  It is where
-	 * we were loaded at aligned to a 2M boundary. %rbp contains the
-	 * decompressed kernel start address.
-	 *
-	 * If it is a relocatable kernel then decompress and run the kernel
-	 * from load address aligned to 2MB addr, otherwise decompress and
-	 * run the kernel from LOAD_PHYSICAL_ADDR
-	 *
-	 * We cannot rely on the calculation done in 32-bit mode, since we
-	 * may have been invoked via the 64-bit entry point.
-	 */
-
-	/* Start with the delta to where the kernel will run at. */
-#ifdef CONFIG_RELOCATABLE
-	leaq	startup_32(%rip) /* - $startup_32 */, %rbp
-	movl	BP_kernel_alignment(%rsi), %eax
-	decl	%eax
-	addq	%rax, %rbp
-	notq	%rax
-	andq	%rax, %rbp
-	cmpq	$LOAD_PHYSICAL_ADDR, %rbp
-	jae	1f
-#endif
-	movq	$LOAD_PHYSICAL_ADDR, %rbp
-1:
-
-	/* Target address to relocate to for decompression */
-	movl	BP_init_size(%rsi), %ebx
-	subl	$_end, %ebx
-	addq	%rbp, %rbx
-
-	/* Set up the stack */
-	leaq	boot_stack_end(%rbx), %rsp
-
-	/*
-	 * paging_prepare() and cleanup_trampoline() below can have GOT
-	 * references. Adjust the table with address we are running at.
-	 *
-	 * Zero RAX for adjust_got: the GOT was not adjusted before;
-	 * there's no adjustment to undo.
-	 */
-	xorq	%rax, %rax
-
-	/*
-	 * Calculate the address the binary is loaded at and use it as
-	 * a GOT adjustment.
-	 */
-	call	1f
-1:	popq	%rdi
-	subq	$1b, %rdi
-
-	call	.Ladjust_got
-
-	/*
-	 * At this point we are in long mode with 4-level paging enabled,
-	 * but we might want to enable 5-level paging or vice versa.
-	 *
-	 * The problem is that we cannot do it directly. Setting or clearing
-	 * CR4.LA57 in long mode would trigger #GP. So we need to switch off
-	 * long mode and paging first.
-	 *
-	 * We also need a trampoline in lower memory to switch over from
-	 * 4- to 5-level paging for cases when the bootloader puts the kernel
-	 * above 4G, but didn't enable 5-level paging for us.
-	 *
-	 * The same trampoline can be used to switch from 5- to 4-level paging
-	 * mode, like when starting 4-level paging kernel via kexec() when
-	 * original kernel worked in 5-level paging mode.
-	 *
-	 * For the trampoline, we need the top page table to reside in lower
-	 * memory as we don't have a way to load 64-bit values into CR3 in
-	 * 32-bit mode.
-	 *
-	 * We go though the trampoline even if we don't have to: if we're
-	 * already in a desired paging mode. This way the trampoline code gets
-	 * tested on every boot.
-	 */
-
-	/* Make sure we have GDT with 32-bit code segment */
-	leaq	gdt(%rip), %rax
-	movq	%rax, gdt64+2(%rip)
-	lgdt	gdt64(%rip)
-
-	/*
-	 * paging_prepare() sets up the trampoline and checks if we need to
-	 * enable 5-level paging.
-	 *
-	 * paging_prepare() returns a two-quadword structure which lands
-	 * into RDX:RAX:
-	 *   - Address of the trampoline is returned in RAX.
-	 *   - Non zero RDX means trampoline needs to enable 5-level
-	 *     paging.
-	 *
-	 * RSI holds real mode data and needs to be preserved across
-	 * this function call.
-	 */
-	pushq	%rsi
-	movq	%rsi, %rdi		/* real mode address */
-	call	paging_prepare
-	popq	%rsi
-
-	/* Save the trampoline address in RCX */
-	movq	%rax, %rcx
-
-	/*
-	 * Load the address of trampoline_return() into RDI.
-	 * It will be used by the trampoline to return to the main code.
-	 */
-	leaq	trampoline_return(%rip), %rdi
-
-	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
-	pushq	$__KERNEL32_CS
-	leaq	TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
-	pushq	%rax
-	lretq
-trampoline_return:
-	/* Restore the stack, the 32-bit trampoline uses its own stack */
-	leaq	boot_stack_end(%rbx), %rsp
-
-	/*
-	 * cleanup_trampoline() would restore trampoline memory.
-	 *
-	 * RDI is address of the page table to use instead of page table
-	 * in trampoline memory (if required).
-	 *
-	 * RSI holds real mode data and needs to be preserved across
-	 * this function call.
-	 */
-	pushq	%rsi
-	leaq	top_pgtable(%rbx), %rdi
-	call	cleanup_trampoline
-	popq	%rsi
-
-	/* Zero EFLAGS */
-	pushq	$0
-	popfq
-
-	/*
-	 * Previously we've adjusted the GOT with address the binary was
-	 * loaded at. Now we need to re-adjust for relocation address.
-	 *
-	 * Calculate the address the binary is loaded at, so that we can
-	 * undo the previous GOT adjustment.
-	 */
-	call	1f
-1:	popq	%rax
-	subq	$1b, %rax
-
-	/* The new adjustment is the relocation address */
-	movq	%rbx, %rdi
-	call	.Ladjust_got
-
-/*
- * Copy the compressed kernel to the end of our buffer
- * where decompression in place becomes safe.
- */
-	pushq	%rsi
-	leaq	(_bss-8)(%rip), %rsi
-	leaq	(_bss-8)(%rbx), %rdi
-	movq	$_bss /* - $startup_32 */, %rcx
-	shrq	$3, %rcx
-	std
-	rep	movsq
-	cld
-	popq	%rsi
-
-/*
- * Jump to the relocated address.
- */
-	leaq	.Lrelocated(%rbx), %rax
-	jmp	*%rax
-
-#ifdef CONFIG_EFI_STUB
-
-/* The entry point for the PE/COFF executable is efi_pe_entry. */
-ENTRY(efi_pe_entry)
-	movq	%rcx, efi64_config(%rip)	/* Handle */
-	movq	%rdx, efi64_config+8(%rip) /* EFI System table pointer */
-
-	leaq	efi64_config(%rip), %rax
-	movq	%rax, efi_config(%rip)
-
-	call	1f
-1:	popq	%rbp
-	subq	$1b, %rbp
-
-	/*
-	 * Relocate efi_config->call().
-	 */
-	addq	%rbp, efi64_config+40(%rip)
-
-	movq	%rax, %rdi
-	call	make_boot_params
-	cmpq	$0,%rax
-	je	fail
-	mov	%rax, %rsi
-	leaq	startup_32(%rip), %rax
-	movl	%eax, BP_code32_start(%rsi)
-	jmp	2f		/* Skip the relocation */
-
-handover_entry:
-	call	1f
-1:	popq	%rbp
-	subq	$1b, %rbp
-
-	/*
-	 * Relocate efi_config->call().
-	 */
-	movq	efi_config(%rip), %rax
-	addq	%rbp, 40(%rax)
-2:
-	movq	efi_config(%rip), %rdi
-	call	efi_main
-	movq	%rax,%rsi
-	cmpq	$0,%rax
-	jne	2f
-fail:
-	/* EFI init failed, so hang. */
-	hlt
-	jmp	fail
-2:
-	movl	BP_code32_start(%esi), %eax
-	leaq	startup_64(%rax), %rax
-	jmp	*%rax
-ENDPROC(efi_pe_entry)
-
-	.org 0x390
-ENTRY(efi64_stub_entry)
-	movq	%rdi, efi64_config(%rip)	/* Handle */
-	movq	%rsi, efi64_config+8(%rip) /* EFI System table pointer */
-
-	leaq	efi64_config(%rip), %rax
-	movq	%rax, efi_config(%rip)
-
-	movq	%rdx, %rsi
-	jmp	handover_entry
-ENDPROC(efi64_stub_entry)
-#endif
-
-	.text
-.Lrelocated:
-
-/*
- * Clear BSS (stack is currently empty)
- */
-	xorl	%eax, %eax
-	leaq    _bss(%rip), %rdi
-	leaq    _ebss(%rip), %rcx
-	subq	%rdi, %rcx
-	shrq	$3, %rcx
-	rep	stosq
-
-/*
- * Do the extraction, and jump to the new kernel..
- */
-	pushq	%rsi			/* Save the real mode argument */
-	movq	%rsi, %rdi		/* real mode address */
-	leaq	boot_heap(%rip), %rsi	/* malloc area for uncompression */
-	leaq	input_data(%rip), %rdx  /* input_data */
-	movl	$z_input_len, %ecx	/* input_len */
-	movq	%rbp, %r8		/* output target address */
-	movq	$z_output_len, %r9	/* decompressed length, end of relocs */
-	call	extract_kernel		/* returns kernel location in %rax */
-	popq	%rsi
-
-/*
- * Jump to the decompressed kernel.
- */
-	jmp	*%rax
-
-/*
- * Adjust the global offset table
- *
- * RAX is the previous adjustment of the table to undo (use 0 if it's the
- * first time we touch GOT).
- * RDI is the new adjustment to apply.
- */
-.Ladjust_got:
-	/* Walk through the GOT adding the address to the entries */
-	leaq	_got(%rip), %rdx
-	leaq	_egot(%rip), %rcx
-1:
-	cmpq	%rcx, %rdx
-	jae	2f
-	subq	%rax, (%rdx)	/* Undo previous adjustment */
-	addq	%rdi, (%rdx)	/* Apply the new adjustment */
-	addq	$8, %rdx
-	jmp	1b
-2:
-	ret
-
-	.code32
-/*
- * This is the 32-bit trampoline that will be copied over to low memory.
- *
- * RDI contains the return address (might be above 4G).
- * ECX contains the base address of the trampoline memory.
- * Non zero RDX means trampoline needs to enable 5-level paging.
- */
-ENTRY(trampoline_32bit_src)
-	/* Set up data and stack segments */
-	movl	$__KERNEL_DS, %eax
-	movl	%eax, %ds
-	movl	%eax, %ss
-
-	/* Set up new stack */
-	leal	TRAMPOLINE_32BIT_STACK_END(%ecx), %esp
-
-	/* Disable paging */
-	movl	%cr0, %eax
-	btrl	$X86_CR0_PG_BIT, %eax
-	movl	%eax, %cr0
-
-	/* Check what paging mode we want to be in after the trampoline */
-	cmpl	$0, %edx
-	jz	1f
-
-	/* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
-	movl	%cr4, %eax
-	testl	$X86_CR4_LA57, %eax
-	jnz	3f
-	jmp	2f
-1:
-	/* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
-	movl	%cr4, %eax
-	testl	$X86_CR4_LA57, %eax
-	jz	3f
-2:
-	/* Point CR3 to the trampoline's new top level page table */
-	leal	TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
-	movl	%eax, %cr3
-3:
-	/* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
-	pushl	%ecx
-	pushl	%edx
-	movl	$MSR_EFER, %ecx
-	rdmsr
-	btsl	$_EFER_LME, %eax
-	wrmsr
-	popl	%edx
-	popl	%ecx
-
-	/* Enable PAE and LA57 (if required) paging modes */
-	movl	$X86_CR4_PAE, %eax
-	cmpl	$0, %edx
-	jz	1f
-	orl	$X86_CR4_LA57, %eax
-1:
-	movl	%eax, %cr4
-
-	/* Calculate address of paging_enabled() once we are executing in the trampoline */
-	leal	.Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
-
-	/* Prepare the stack for far return to Long Mode */
-	pushl	$__KERNEL_CS
-	pushl	%eax
-
-	/* Enable paging again */
-	movl	$(X86_CR0_PG | X86_CR0_PE), %eax
-	movl	%eax, %cr0
-
-	lret
-
-	.code64
-.Lpaging_enabled:
-	/* Return from the trampoline */
-	jmp	*%rdi
-
-	/*
-         * The trampoline code has a size limit.
-         * Make sure we fail to compile if the trampoline code grows
-         * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
-	 */
-	.org	trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
-
-	.code32
-.Lno_longmode:
-	/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
-1:
-	hlt
-	jmp     1b
-
-#include "../../kernel/verify_cpu.S"
-
-	.data
-gdt64:
-	.word	gdt_end - gdt
-	.quad   0
-	.balign	8
-gdt:
-	.word	gdt_end - gdt
-	.long	gdt
-	.word	0
-	.quad	0x00cf9a000000ffff	/* __KERNEL32_CS */
-	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
-	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
-	.quad	0x0080890000000000	/* TS descriptor */
-	.quad   0x0000000000000000	/* TS continued */
-gdt_end:
-
-#ifdef CONFIG_EFI_STUB
-efi_config:
-	.quad	0
-
-#ifdef CONFIG_EFI_MIXED
-	.global efi32_config
-efi32_config:
-	.fill	5,8,0
-	.quad	efi64_thunk
-	.byte	0
-#endif
-
-	.global efi64_config
-efi64_config:
-	.fill	5,8,0
-	.quad	efi_call
-	.byte	1
-#endif /* CONFIG_EFI_STUB */
-
-/*
- * Stack and heap for uncompression
- */
-	.bss
-	.balign 4
-boot_heap:
-	.fill BOOT_HEAP_SIZE, 1, 0
-boot_stack:
-	.fill BOOT_STACK_SIZE, 1, 0
-boot_stack_end:
-
-/*
- * Space for page tables (not in .bss so not zeroed)
- */
-	.section ".pgtable","a",@nobits
-	.balign 4096
-pgtable:
-	.fill BOOT_PGT_SIZE, 1, 0
-
-/*
- * The page table is going to be used instead of page table in the trampoline
- * memory.
- */
-top_pgtable:
-	.fill PAGE_SIZE, 1, 0
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
deleted file mode 100644
index 6afb7130a38791570441d98b18fe2216306dbe26..0000000000000000000000000000000000000000
--- a/arch/x86/boot/compressed/mem_encrypt.S
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * AMD Memory Encryption Support
- *
- * Copyright (C) 2017 Advanced Micro Devices, Inc.
- *
- * Author: Tom Lendacky <thomas.lendacky@amd.com>
- */
-
-#include <linux/linkage.h>
-
-#include <asm/processor-flags.h>
-#include <asm/msr.h>
-#include <asm/asm-offsets.h>
-
-	.text
-	.code32
-ENTRY(get_sev_encryption_bit)
-	xor	%eax, %eax
-
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-	push	%ebx
-	push	%ecx
-	push	%edx
-
-	/* Check if running under a hypervisor */
-	movl	$1, %eax
-	cpuid
-	bt	$31, %ecx		/* Check the hypervisor bit */
-	jnc	.Lno_sev
-
-	movl	$0x80000000, %eax	/* CPUID to check the highest leaf */
-	cpuid
-	cmpl	$0x8000001f, %eax	/* See if 0x8000001f is available */
-	jb	.Lno_sev
-
-	/*
-	 * Check for the SEV feature:
-	 *   CPUID Fn8000_001F[EAX] - Bit 1
-	 *   CPUID Fn8000_001F[EBX] - Bits 5:0
-	 *     Pagetable bit position used to indicate encryption
-	 */
-	movl	$0x8000001f, %eax
-	cpuid
-	bt	$1, %eax		/* Check if SEV is available */
-	jnc	.Lno_sev
-
-	movl	$MSR_AMD64_SEV, %ecx	/* Read the SEV MSR */
-	rdmsr
-	bt	$MSR_AMD64_SEV_ENABLED_BIT, %eax	/* Check if SEV is active */
-	jnc	.Lno_sev
-
-	movl	%ebx, %eax
-	andl	$0x3f, %eax		/* Return the encryption bit location */
-	jmp	.Lsev_exit
-
-.Lno_sev:
-	xor	%eax, %eax
-
-.Lsev_exit:
-	pop	%edx
-	pop	%ecx
-	pop	%ebx
-
-#endif	/* CONFIG_AMD_MEM_ENCRYPT */
-
-	ret
-ENDPROC(get_sev_encryption_bit)
-
-	.code64
-ENTRY(set_sev_encryption_mask)
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-	push	%rbp
-	push	%rdx
-
-	movq	%rsp, %rbp		/* Save current stack pointer */
-
-	call	get_sev_encryption_bit	/* Get the encryption bit position */
-	testl	%eax, %eax
-	jz	.Lno_sev_mask
-
-	bts	%rax, sme_me_mask(%rip)	/* Create the encryption mask */
-
-.Lno_sev_mask:
-	movq	%rbp, %rsp		/* Restore original stack pointer */
-
-	pop	%rdx
-	pop	%rbp
-#endif
-
-	xor	%rax, %rax
-	ret
-ENDPROC(set_sev_encryption_mask)
-
-	.data
-
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-	.balign	8
-GLOBAL(sme_me_mask)
-	.quad	0
-#endif
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
deleted file mode 100644
index 508cfa6828c5d88cd8f616e28df686ec625dd5f5..0000000000000000000000000000000000000000
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ /dev/null
@@ -1,76 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm-generic/vmlinux.lds.h>
-
-OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT)
-
-#undef i386
-
-#include <asm/cache.h>
-#include <asm/page_types.h>
-
-#ifdef CONFIG_X86_64
-OUTPUT_ARCH(i386:x86-64)
-ENTRY(startup_64)
-#else
-OUTPUT_ARCH(i386)
-ENTRY(startup_32)
-#endif
-
-SECTIONS
-{
-	/* Be careful parts of head_64.S assume startup_32 is at
-	 * address 0.
-	 */
-	. = 0;
-	.head.text : {
-		_head = . ;
-		HEAD_TEXT
-		_ehead = . ;
-	}
-	.rodata..compressed : {
-		*(.rodata..compressed)
-	}
-	.text :	{
-		_text = .; 	/* Text */
-		*(.text)
-		*(.text.*)
-		_etext = . ;
-	}
-	.rodata : {
-		_rodata = . ;
-		*(.rodata)	 /* read-only data */
-		*(.rodata.*)
-		_erodata = . ;
-	}
-	.got : {
-		_got = .;
-		KEEP(*(.got.plt))
-		KEEP(*(.got))
-		_egot = .;
-	}
-	.data :	{
-		_data = . ;
-		*(.data)
-		*(.data.*)
-		_edata = . ;
-	}
-	. = ALIGN(L1_CACHE_BYTES);
-	.bss : {
-		_bss = . ;
-		*(.bss)
-		*(.bss.*)
-		*(COMMON)
-		. = ALIGN(8);	/* For convenience during zeroing */
-		_ebss = .;
-	}
-#ifdef CONFIG_X86_64
-       . = ALIGN(PAGE_SIZE);
-       .pgtable : {
-		_pgtable = . ;
-		*(.pgtable)
-		_epgtable = . ;
-	}
-#endif
-	. = ALIGN(PAGE_SIZE);	/* keep ZO size page aligned */
-	_end = .;
-}
diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S
deleted file mode 100644
index 4c5f4f4ad035ef3d5ee939d570baf8d8b442e07d..0000000000000000000000000000000000000000
--- a/arch/x86/boot/copy.S
+++ /dev/null
@@ -1,65 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* ----------------------------------------------------------------------- *
- *
- *   Copyright (C) 1991, 1992 Linus Torvalds
- *   Copyright 2007 rPath, Inc. - All Rights Reserved
- *
- * ----------------------------------------------------------------------- */
-
-#include <linux/linkage.h>
-
-/*
- * Memory copy routines
- */
-
-	.code16
-	.text
-
-GLOBAL(memcpy)
-	pushw	%si
-	pushw	%di
-	movw	%ax, %di
-	movw	%dx, %si
-	pushw	%cx
-	shrw	$2, %cx
-	rep; movsl
-	popw	%cx
-	andw	$3, %cx
-	rep; movsb
-	popw	%di
-	popw	%si
-	retl
-ENDPROC(memcpy)
-
-GLOBAL(memset)
-	pushw	%di
-	movw	%ax, %di
-	movzbl	%dl, %eax
-	imull	$0x01010101,%eax
-	pushw	%cx
-	shrw	$2, %cx
-	rep; stosl
-	popw	%cx
-	andw	$3, %cx
-	rep; stosb
-	popw	%di
-	retl
-ENDPROC(memset)
-
-GLOBAL(copy_from_fs)
-	pushw	%ds
-	pushw	%fs
-	popw	%ds
-	calll	memcpy
-	popw	%ds
-	retl
-ENDPROC(copy_from_fs)
-
-GLOBAL(copy_to_fs)
-	pushw	%es
-	pushw	%fs
-	popw	%es
-	calll	memcpy
-	popw	%es
-	retl
-ENDPROC(copy_to_fs)
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
deleted file mode 100644
index 2c11c0f45d49b402ad85c907ec9b3c9109d68ff0..0000000000000000000000000000000000000000
--- a/arch/x86/boot/header.S
+++ /dev/null
@@ -1,646 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *	header.S
- *
- *	Copyright (C) 1991, 1992 Linus Torvalds
- *
- *	Based on bootsect.S and setup.S
- *	modified by more people than can be counted
- *
- *	Rewritten as a common file by H. Peter Anvin (Apr 2007)
- *
- * BIG FAT NOTE: We're in real mode using 64k segments.  Therefore segment
- * addresses must be multiplied by 16 to obtain their respective linear
- * addresses. To avoid confusion, linear addresses are written using leading
- * hex while segment addresses are written as segment:offset.
- *
- */
-
-#include <asm/segment.h>
-#include <asm/boot.h>
-#include <asm/page_types.h>
-#include <asm/setup.h>
-#include <asm/bootparam.h>
-#include "boot.h"
-#include "voffset.h"
-#include "zoffset.h"
-
-BOOTSEG		= 0x07C0		/* original address of boot-sector */
-SYSSEG		= 0x1000		/* historical load address >> 4 */
-
-#ifndef SVGA_MODE
-#define SVGA_MODE ASK_VGA
-#endif
-
-#ifndef ROOT_RDONLY
-#define ROOT_RDONLY 1
-#endif
-
-	.code16
-	.section ".bstext", "ax"
-
-	.global bootsect_start
-bootsect_start:
-#ifdef CONFIG_EFI_STUB
-	# "MZ", MS-DOS header
-	.byte 0x4d
-	.byte 0x5a
-#endif
-
-	# Normalize the start address
-	ljmp	$BOOTSEG, $start2
-
-start2:
-	movw	%cs, %ax
-	movw	%ax, %ds
-	movw	%ax, %es
-	movw	%ax, %ss
-	xorw	%sp, %sp
-	sti
-	cld
-
-	movw	$bugger_off_msg, %si
-
-msg_loop:
-	lodsb
-	andb	%al, %al
-	jz	bs_die
-	movb	$0xe, %ah
-	movw	$7, %bx
-	int	$0x10
-	jmp	msg_loop
-
-bs_die:
-	# Allow the user to press a key, then reboot
-	xorw	%ax, %ax
-	int	$0x16
-	int	$0x19
-
-	# int 0x19 should never return.  In case it does anyway,
-	# invoke the BIOS reset code...
-	ljmp	$0xf000,$0xfff0
-
-#ifdef CONFIG_EFI_STUB
-	.org	0x3c
-	#
-	# Offset to the PE header.
-	#
-	.long	pe_header
-#endif /* CONFIG_EFI_STUB */
-
-	.section ".bsdata", "a"
-bugger_off_msg:
-	.ascii	"Use a boot loader.\r\n"
-	.ascii	"\n"
-	.ascii	"Remove disk and press any key to reboot...\r\n"
-	.byte	0
-
-#ifdef CONFIG_EFI_STUB
-pe_header:
-	.ascii	"PE"
-	.word 	0
-
-coff_header:
-#ifdef CONFIG_X86_32
-	.word	0x14c				# i386
-#else
-	.word	0x8664				# x86-64
-#endif
-	.word	4				# nr_sections
-	.long	0 				# TimeDateStamp
-	.long	0				# PointerToSymbolTable
-	.long	1				# NumberOfSymbols
-	.word	section_table - optional_header	# SizeOfOptionalHeader
-#ifdef CONFIG_X86_32
-	.word	0x306				# Characteristics.
-						# IMAGE_FILE_32BIT_MACHINE |
-						# IMAGE_FILE_DEBUG_STRIPPED |
-						# IMAGE_FILE_EXECUTABLE_IMAGE |
-						# IMAGE_FILE_LINE_NUMS_STRIPPED
-#else
-	.word	0x206				# Characteristics
-						# IMAGE_FILE_DEBUG_STRIPPED |
-						# IMAGE_FILE_EXECUTABLE_IMAGE |
-						# IMAGE_FILE_LINE_NUMS_STRIPPED
-#endif
-
-optional_header:
-#ifdef CONFIG_X86_32
-	.word	0x10b				# PE32 format
-#else
-	.word	0x20b 				# PE32+ format
-#endif
-	.byte	0x02				# MajorLinkerVersion
-	.byte	0x14				# MinorLinkerVersion
-
-	# Filled in by build.c
-	.long	0				# SizeOfCode
-
-	.long	0				# SizeOfInitializedData
-	.long	0				# SizeOfUninitializedData
-
-	# Filled in by build.c
-	.long	0x0000				# AddressOfEntryPoint
-
-	.long	0x0200				# BaseOfCode
-#ifdef CONFIG_X86_32
-	.long	0				# data
-#endif
-
-extra_header_fields:
-#ifdef CONFIG_X86_32
-	.long	0				# ImageBase
-#else
-	.quad	0				# ImageBase
-#endif
-	.long	0x20				# SectionAlignment
-	.long	0x20				# FileAlignment
-	.word	0				# MajorOperatingSystemVersion
-	.word	0				# MinorOperatingSystemVersion
-	.word	0				# MajorImageVersion
-	.word	0				# MinorImageVersion
-	.word	0				# MajorSubsystemVersion
-	.word	0				# MinorSubsystemVersion
-	.long	0				# Win32VersionValue
-
-	#
-	# The size of the bzImage is written in tools/build.c
-	#
-	.long	0				# SizeOfImage
-
-	.long	0x200				# SizeOfHeaders
-	.long	0				# CheckSum
-	.word	0xa				# Subsystem (EFI application)
-	.word	0				# DllCharacteristics
-#ifdef CONFIG_X86_32
-	.long	0				# SizeOfStackReserve
-	.long	0				# SizeOfStackCommit
-	.long	0				# SizeOfHeapReserve
-	.long	0				# SizeOfHeapCommit
-#else
-	.quad	0				# SizeOfStackReserve
-	.quad	0				# SizeOfStackCommit
-	.quad	0				# SizeOfHeapReserve
-	.quad	0				# SizeOfHeapCommit
-#endif
-	.long	0				# LoaderFlags
-	.long	0x6				# NumberOfRvaAndSizes
-
-	.quad	0				# ExportTable
-	.quad	0				# ImportTable
-	.quad	0				# ResourceTable
-	.quad	0				# ExceptionTable
-	.quad	0				# CertificationTable
-	.quad	0				# BaseRelocationTable
-
-	# Section table
-section_table:
-	#
-	# The offset & size fields are filled in by build.c.
-	#
-	.ascii	".setup"
-	.byte	0
-	.byte	0
-	.long	0
-	.long	0x0				# startup_{32,64}
-	.long	0				# Size of initialized data
-						# on disk
-	.long	0x0				# startup_{32,64}
-	.long	0				# PointerToRelocations
-	.long	0				# PointerToLineNumbers
-	.word	0				# NumberOfRelocations
-	.word	0				# NumberOfLineNumbers
-	.long	0x60500020			# Characteristics (section flags)
-
-	#
-	# The EFI application loader requires a relocation section
-	# because EFI applications must be relocatable. The .reloc
-	# offset & size fields are filled in by build.c.
-	#
-	.ascii	".reloc"
-	.byte	0
-	.byte	0
-	.long	0
-	.long	0
-	.long	0				# SizeOfRawData
-	.long	0				# PointerToRawData
-	.long	0				# PointerToRelocations
-	.long	0				# PointerToLineNumbers
-	.word	0				# NumberOfRelocations
-	.word	0				# NumberOfLineNumbers
-	.long	0x42100040			# Characteristics (section flags)
-
-	#
-	# The offset & size fields are filled in by build.c.
-	#
-	.ascii	".text"
-	.byte	0
-	.byte	0
-	.byte	0
-	.long	0
-	.long	0x0				# startup_{32,64}
-	.long	0				# Size of initialized data
-						# on disk
-	.long	0x0				# startup_{32,64}
-	.long	0				# PointerToRelocations
-	.long	0				# PointerToLineNumbers
-	.word	0				# NumberOfRelocations
-	.word	0				# NumberOfLineNumbers
-	.long	0x60500020			# Characteristics (section flags)
-
-	#
-	# The offset & size fields are filled in by build.c.
-	#
-	.ascii	".bss"
-	.byte	0
-	.byte	0
-	.byte	0
-	.byte	0
-	.long	0
-	.long	0x0
-	.long	0				# Size of initialized data
-						# on disk
-	.long	0x0
-	.long	0				# PointerToRelocations
-	.long	0				# PointerToLineNumbers
-	.word	0				# NumberOfRelocations
-	.word	0				# NumberOfLineNumbers
-	.long	0xc8000080			# Characteristics (section flags)
-
-#endif /* CONFIG_EFI_STUB */
-
-	# Kernel attributes; used by setup.  This is part 1 of the
-	# header, from the old boot sector.
-
-	.section ".header", "a"
-	.globl	sentinel
-sentinel:	.byte 0xff, 0xff        /* Used to detect broken loaders */
-
-	.globl	hdr
-hdr:
-setup_sects:	.byte 0			/* Filled in by build.c */
-root_flags:	.word ROOT_RDONLY
-syssize:	.long 0			/* Filled in by build.c */
-ram_size:	.word 0			/* Obsolete */
-vid_mode:	.word SVGA_MODE
-root_dev:	.word 0			/* Filled in by build.c */
-boot_flag:	.word 0xAA55
-
-	# offset 512, entry point
-
-	.globl	_start
-_start:
-		# Explicitly enter this as bytes, or the assembler
-		# tries to generate a 3-byte jump here, which causes
-		# everything else to push off to the wrong offset.
-		.byte	0xeb		# short (2-byte) jump
-		.byte	start_of_setup-1f
-1:
-
-	# Part 2 of the header, from the old setup.S
-
-		.ascii	"HdrS"		# header signature
-		.word	0x020d		# header version number (>= 0x0105)
-					# or else old loadlin-1.5 will fail)
-		.globl realmode_swtch
-realmode_swtch:	.word	0, 0		# default_switch, SETUPSEG
-start_sys_seg:	.word	SYSSEG		# obsolete and meaningless, but just
-					# in case something decided to "use" it
-		.word	kernel_version-512 # pointing to kernel version string
-					# above section of header is compatible
-					# with loadlin-1.5 (header v1.5). Don't
-					# change it.
-
-type_of_loader:	.byte	0		# 0 means ancient bootloader, newer
-					# bootloaders know to change this.
-					# See Documentation/x86/boot.rst for
-					# assigned ids
-
-# flags, unused bits must be zero (RFU) bit within loadflags
-loadflags:
-		.byte	LOADED_HIGH	# The kernel is to be loaded high
-
-setup_move_size: .word  0x8000		# size to move, when setup is not
-					# loaded at 0x90000. We will move setup
-					# to 0x90000 then just before jumping
-					# into the kernel. However, only the
-					# loader knows how much data behind
-					# us also needs to be loaded.
-
-code32_start:				# here loaders can put a different
-					# start address for 32-bit code.
-		.long	0x100000	# 0x100000 = default for big kernel
-
-ramdisk_image:	.long	0		# address of loaded ramdisk image
-					# Here the loader puts the 32-bit
-					# address where it loaded the image.
-					# This only will be read by the kernel.
-
-ramdisk_size:	.long	0		# its size in bytes
-
-bootsect_kludge:
-		.long	0		# obsolete
-
-heap_end_ptr:	.word	_end+STACK_SIZE-512
-					# (Header version 0x0201 or later)
-					# space from here (exclusive) down to
-					# end of setup code can be used by setup
-					# for local heap purposes.
-
-ext_loader_ver:
-		.byte	0		# Extended boot loader version
-ext_loader_type:
-		.byte	0		# Extended boot loader type
-
-cmd_line_ptr:	.long	0		# (Header version 0x0202 or later)
-					# If nonzero, a 32-bit pointer
-					# to the kernel command line.
-					# The command line should be
-					# located between the start of
-					# setup and the end of low
-					# memory (0xa0000), or it may
-					# get overwritten before it
-					# gets read.  If this field is
-					# used, there is no longer
-					# anything magical about the
-					# 0x90000 segment; the setup
-					# can be located anywhere in
-					# low memory 0x10000 or higher.
-
-initrd_addr_max: .long 0x7fffffff
-					# (Header version 0x0203 or later)
-					# The highest safe address for
-					# the contents of an initrd
-					# The current kernel allows up to 4 GB,
-					# but leave it at 2 GB to avoid
-					# possible bootloader bugs.
-
-kernel_alignment:  .long CONFIG_PHYSICAL_ALIGN	#physical addr alignment
-						#required for protected mode
-						#kernel
-#ifdef CONFIG_RELOCATABLE
-relocatable_kernel:    .byte 1
-#else
-relocatable_kernel:    .byte 0
-#endif
-min_alignment:		.byte MIN_KERNEL_ALIGN_LG2	# minimum alignment
-
-xloadflags:
-#ifdef CONFIG_X86_64
-# define XLF0 XLF_KERNEL_64			/* 64-bit kernel */
-#else
-# define XLF0 0
-#endif
-
-#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64)
-   /* kernel/boot_param/ramdisk could be loaded above 4g */
-# define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G
-#else
-# define XLF1 0
-#endif
-
-#ifdef CONFIG_EFI_STUB
-# ifdef CONFIG_EFI_MIXED
-#  define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64)
-# else
-#  ifdef CONFIG_X86_64
-#   define XLF23 XLF_EFI_HANDOVER_64		/* 64-bit EFI handover ok */
-#  else
-#   define XLF23 XLF_EFI_HANDOVER_32		/* 32-bit EFI handover ok */
-#  endif
-# endif
-#else
-# define XLF23 0
-#endif
-
-#if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC_CORE)
-# define XLF4 XLF_EFI_KEXEC
-#else
-# define XLF4 0
-#endif
-
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_X86_5LEVEL
-#define XLF56 (XLF_5LEVEL|XLF_5LEVEL_ENABLED)
-#else
-#define XLF56 XLF_5LEVEL
-#endif
-#else
-#define XLF56 0
-#endif
-
-			.word XLF0 | XLF1 | XLF23 | XLF4 | XLF56
-
-cmdline_size:   .long   COMMAND_LINE_SIZE-1     #length of the command line,
-                                                #added with boot protocol
-                                                #version 2.06
-
-hardware_subarch:	.long 0			# subarchitecture, added with 2.07
-						# default to 0 for normal x86 PC
-
-hardware_subarch_data:	.quad 0
-
-payload_offset:		.long ZO_input_data
-payload_length:		.long ZO_z_input_len
-
-setup_data:		.quad 0			# 64-bit physical pointer to
-						# single linked list of
-						# struct setup_data
-
-pref_address:		.quad LOAD_PHYSICAL_ADDR	# preferred load addr
-
-#
-# Getting to provably safe in-place decompression is hard. Worst case
-# behaviours need to be analyzed. Here let's take the decompression of
-# a gzip-compressed kernel as example, to illustrate it:
-#
-# The file layout of gzip compressed kernel is:
-#
-#    magic[2]
-#    method[1]
-#    flags[1]
-#    timestamp[4]
-#    extraflags[1]
-#    os[1]
-#    compressed data blocks[N]
-#    crc[4] orig_len[4]
-#
-# ... resulting in +18 bytes overhead of uncompressed data.
-#
-# (For more information, please refer to RFC 1951 and RFC 1952.)
-#
-# Files divided into blocks
-# 1 bit (last block flag)
-# 2 bits (block type)
-#
-# 1 block occurs every 32K -1 bytes or when there 50% compression
-# has been achieved. The smallest block type encoding is always used.
-#
-# stored:
-#    32 bits length in bytes.
-#
-# fixed:
-#    magic fixed tree.
-#    symbols.
-#
-# dynamic:
-#    dynamic tree encoding.
-#    symbols.
-#
-#
-# The buffer for decompression in place is the length of the uncompressed
-# data, plus a small amount extra to keep the algorithm safe. The
-# compressed data is placed at the end of the buffer.  The output pointer
-# is placed at the start of the buffer and the input pointer is placed
-# where the compressed data starts. Problems will occur when the output
-# pointer overruns the input pointer.
-#
-# The output pointer can only overrun the input pointer if the input
-# pointer is moving faster than the output pointer.  A condition only
-# triggered by data whose compressed form is larger than the uncompressed
-# form.
-#
-# The worst case at the block level is a growth of the compressed data
-# of 5 bytes per 32767 bytes.
-#
-# The worst case internal to a compressed block is very hard to figure.
-# The worst case can at least be bounded by having one bit that represents
-# 32764 bytes and then all of the rest of the bytes representing the very
-# very last byte.
-#
-# All of which is enough to compute an amount of extra data that is required
-# to be safe.  To avoid problems at the block level allocating 5 extra bytes
-# per 32767 bytes of data is sufficient.  To avoid problems internal to a
-# block adding an extra 32767 bytes (the worst case uncompressed block size)
-# is sufficient, to ensure that in the worst case the decompressed data for
-# block will stop the byte before the compressed data for a block begins.
-# To avoid problems with the compressed data's meta information an extra 18
-# bytes are needed.  Leading to the formula:
-#
-# extra_bytes = (uncompressed_size >> 12) + 32768 + 18
-#
-# Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
-# Adding 32768 instead of 32767 just makes for round numbers.
-#
-# Above analysis is for decompressing gzip compressed kernel only. Up to
-# now 6 different decompressor are supported all together. And among them
-# xz stores data in chunks and has maximum chunk of 64K. Hence safety
-# margin should be updated to cover all decompressors so that we don't
-# need to deal with each of them separately. Please check
-# the description in lib/decompressor_xxx.c for specific information.
-#
-# extra_bytes = (uncompressed_size >> 12) + 65536 + 128
-#
-# LZ4 is even worse: data that cannot be further compressed grows by 0.4%,
-# or one byte per 256 bytes. OTOH, we can safely get rid of the +128 as
-# the size-dependent part now grows so fast.
-#
-# extra_bytes = (uncompressed_size >> 8) + 65536
-
-#define ZO_z_extra_bytes	((ZO_z_output_len >> 8) + 65536)
-#if ZO_z_output_len > ZO_z_input_len
-# define ZO_z_extract_offset	(ZO_z_output_len + ZO_z_extra_bytes - \
-				 ZO_z_input_len)
-#else
-# define ZO_z_extract_offset	ZO_z_extra_bytes
-#endif
-
-/*
- * The extract_offset has to be bigger than ZO head section. Otherwise when
- * the head code is running to move ZO to the end of the buffer, it will
- * overwrite the head code itself.
- */
-#if (ZO__ehead - ZO_startup_32) > ZO_z_extract_offset
-# define ZO_z_min_extract_offset ((ZO__ehead - ZO_startup_32 + 4095) & ~4095)
-#else
-# define ZO_z_min_extract_offset ((ZO_z_extract_offset + 4095) & ~4095)
-#endif
-
-#define ZO_INIT_SIZE	(ZO__end - ZO_startup_32 + ZO_z_min_extract_offset)
-
-#define VO_INIT_SIZE	(VO__end - VO__text)
-#if ZO_INIT_SIZE > VO_INIT_SIZE
-# define INIT_SIZE ZO_INIT_SIZE
-#else
-# define INIT_SIZE VO_INIT_SIZE
-#endif
-
-init_size:		.long INIT_SIZE		# kernel initialization size
-handover_offset:	.long 0			# Filled in by build.c
-
-# End of setup header #####################################################
-
-	.section ".entrytext", "ax"
-start_of_setup:
-# Force %es = %ds
-	movw	%ds, %ax
-	movw	%ax, %es
-	cld
-
-# Apparently some ancient versions of LILO invoked the kernel with %ss != %ds,
-# which happened to work by accident for the old code.  Recalculate the stack
-# pointer if %ss is invalid.  Otherwise leave it alone, LOADLIN sets up the
-# stack behind its own code, so we can't blindly put it directly past the heap.
-
-	movw	%ss, %dx
-	cmpw	%ax, %dx	# %ds == %ss?
-	movw	%sp, %dx
-	je	2f		# -> assume %sp is reasonably set
-
-	# Invalid %ss, make up a new stack
-	movw	$_end, %dx
-	testb	$CAN_USE_HEAP, loadflags
-	jz	1f
-	movw	heap_end_ptr, %dx
-1:	addw	$STACK_SIZE, %dx
-	jnc	2f
-	xorw	%dx, %dx	# Prevent wraparound
-
-2:	# Now %dx should point to the end of our stack space
-	andw	$~3, %dx	# dword align (might as well...)
-	jnz	3f
-	movw	$0xfffc, %dx	# Make sure we're not zero
-3:	movw	%ax, %ss
-	movzwl	%dx, %esp	# Clear upper half of %esp
-	sti			# Now we should have a working stack
-
-# We will have entered with %cs = %ds+0x20, normalize %cs so
-# it is on par with the other segments.
-	pushw	%ds
-	pushw	$6f
-	lretw
-6:
-
-# Check signature at end of setup
-	cmpl	$0x5a5aaa55, setup_sig
-	jne	setup_bad
-
-# Zero the bss
-	movw	$__bss_start, %di
-	movw	$_end+3, %cx
-	xorl	%eax, %eax
-	subw	%di, %cx
-	shrw	$2, %cx
-	rep; stosl
-
-# Jump to C code (should not return)
-	calll	main
-
-# Setup corrupt somehow...
-setup_bad:
-	movl	$setup_corrupt, %eax
-	calll	puts
-	# Fall through...
-
-	.globl	die
-	.type	die, @function
-die:
-	hlt
-	jmp	die
-
-	.size	die, .-die
-
-	.section ".initdata", "a"
-setup_corrupt:
-	.byte	7
-	.string	"No setup signature found...\n"
diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
deleted file mode 100644
index c22f9a7d1aeb992e9a9057b5f881e44e92a1e96e..0000000000000000000000000000000000000000
--- a/arch/x86/boot/pmjump.S
+++ /dev/null
@@ -1,75 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* ----------------------------------------------------------------------- *
- *
- *   Copyright (C) 1991, 1992 Linus Torvalds
- *   Copyright 2007 rPath, Inc. - All Rights Reserved
- *
- * ----------------------------------------------------------------------- */
-
-/*
- * The actual transition into protected mode
- */
-
-#include <asm/boot.h>
-#include <asm/processor-flags.h>
-#include <asm/segment.h>
-#include <linux/linkage.h>
-
-	.text
-	.code16
-
-/*
- * void protected_mode_jump(u32 entrypoint, u32 bootparams);
- */
-GLOBAL(protected_mode_jump)
-	movl	%edx, %esi		# Pointer to boot_params table
-
-	xorl	%ebx, %ebx
-	movw	%cs, %bx
-	shll	$4, %ebx
-	addl	%ebx, 2f
-	jmp	1f			# Short jump to serialize on 386/486
-1:
-
-	movw	$__BOOT_DS, %cx
-	movw	$__BOOT_TSS, %di
-
-	movl	%cr0, %edx
-	orb	$X86_CR0_PE, %dl	# Protected mode
-	movl	%edx, %cr0
-
-	# Transition to 32-bit mode
-	.byte	0x66, 0xea		# ljmpl opcode
-2:	.long	in_pm32			# offset
-	.word	__BOOT_CS		# segment
-ENDPROC(protected_mode_jump)
-
-	.code32
-	.section ".text32","ax"
-GLOBAL(in_pm32)
-	# Set up data segments for flat 32-bit mode
-	movl	%ecx, %ds
-	movl	%ecx, %es
-	movl	%ecx, %fs
-	movl	%ecx, %gs
-	movl	%ecx, %ss
-	# The 32-bit code sets up its own stack, but this way we do have
-	# a valid stack if some debugging hack wants to use it.
-	addl	%ebx, %esp
-
-	# Set up TR to make Intel VT happy
-	ltr	%di
-
-	# Clear registers to allow for future extensions to the
-	# 32-bit boot protocol
-	xorl	%ecx, %ecx
-	xorl	%edx, %edx
-	xorl	%ebx, %ebx
-	xorl	%ebp, %ebp
-	xorl	%edi, %edi
-
-	# Set up LDTR to make Intel VT happy
-	lldt	%cx
-
-	jmpl	*%eax			# Jump to the 32-bit entrypoint
-ENDPROC(in_pm32)
diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
deleted file mode 100644
index 4434607e366dcfae3ecb77db803d949770d6fb70..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ /dev/null
@@ -1,747 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * AES-NI + SSE2 implementation of AEGIS-128
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-#define STATE0	%xmm0
-#define STATE1	%xmm1
-#define STATE2	%xmm2
-#define STATE3	%xmm3
-#define STATE4	%xmm4
-#define KEY	%xmm5
-#define MSG	%xmm5
-#define T0	%xmm6
-#define T1	%xmm7
-
-#define STATEP	%rdi
-#define LEN	%rsi
-#define SRC	%rdx
-#define DST	%rcx
-
-.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
-.align 16
-.Laegis128_const_0:
-	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
-	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
-.Laegis128_const_1:
-	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
-	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
-
-.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
-.align 16
-.Laegis128_counter:
-	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
-
-.text
-
-/*
- * aegis128_update
- * input:
- *   STATE[0-4] - input state
- * output:
- *   STATE[0-4] - output state (shifted positions)
- * changed:
- *   T0
- */
-.macro aegis128_update
-	movdqa STATE4, T0
-	aesenc STATE0, STATE4
-	aesenc STATE1, STATE0
-	aesenc STATE2, STATE1
-	aesenc STATE3, STATE2
-	aesenc T0,     STATE3
-.endm
-
-/*
- * __load_partial: internal ABI
- * input:
- *   LEN - bytes
- *   SRC - src
- * output:
- *   MSG  - message block
- * changed:
- *   T0
- *   %r8
- *   %r9
- */
-__load_partial:
-	xor %r9d, %r9d
-	pxor MSG, MSG
-
-	mov LEN, %r8
-	and $0x1, %r8
-	jz .Lld_partial_1
-
-	mov LEN, %r8
-	and $0x1E, %r8
-	add SRC, %r8
-	mov (%r8), %r9b
-
-.Lld_partial_1:
-	mov LEN, %r8
-	and $0x2, %r8
-	jz .Lld_partial_2
-
-	mov LEN, %r8
-	and $0x1C, %r8
-	add SRC, %r8
-	shl $0x10, %r9
-	mov (%r8), %r9w
-
-.Lld_partial_2:
-	mov LEN, %r8
-	and $0x4, %r8
-	jz .Lld_partial_4
-
-	mov LEN, %r8
-	and $0x18, %r8
-	add SRC, %r8
-	shl $32, %r9
-	mov (%r8), %r8d
-	xor %r8, %r9
-
-.Lld_partial_4:
-	movq %r9, MSG
-
-	mov LEN, %r8
-	and $0x8, %r8
-	jz .Lld_partial_8
-
-	mov LEN, %r8
-	and $0x10, %r8
-	add SRC, %r8
-	pslldq $8, MSG
-	movq (%r8), T0
-	pxor T0, MSG
-
-.Lld_partial_8:
-	ret
-ENDPROC(__load_partial)
-
-/*
- * __store_partial: internal ABI
- * input:
- *   LEN - bytes
- *   DST - dst
- * output:
- *   T0   - message block
- * changed:
- *   %r8
- *   %r9
- *   %r10
- */
-__store_partial:
-	mov LEN, %r8
-	mov DST, %r9
-
-	movq T0, %r10
-
-	cmp $8, %r8
-	jl .Lst_partial_8
-
-	mov %r10, (%r9)
-	psrldq $8, T0
-	movq T0, %r10
-
-	sub $8, %r8
-	add $8, %r9
-
-.Lst_partial_8:
-	cmp $4, %r8
-	jl .Lst_partial_4
-
-	mov %r10d, (%r9)
-	shr $32, %r10
-
-	sub $4, %r8
-	add $4, %r9
-
-.Lst_partial_4:
-	cmp $2, %r8
-	jl .Lst_partial_2
-
-	mov %r10w, (%r9)
-	shr $0x10, %r10
-
-	sub $2, %r8
-	add $2, %r9
-
-.Lst_partial_2:
-	cmp $1, %r8
-	jl .Lst_partial_1
-
-	mov %r10b, (%r9)
-
-.Lst_partial_1:
-	ret
-ENDPROC(__store_partial)
-
-/*
- * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
- */
-ENTRY(crypto_aegis128_aesni_init)
-	FRAME_BEGIN
-
-	/* load IV: */
-	movdqu (%rdx), T1
-
-	/* load key: */
-	movdqa (%rsi), KEY
-	pxor KEY, T1
-	movdqa T1, STATE0
-	movdqa KEY, STATE3
-	movdqa KEY, STATE4
-
-	/* load the constants: */
-	movdqa .Laegis128_const_0, STATE2
-	movdqa .Laegis128_const_1, STATE1
-	pxor STATE2, STATE3
-	pxor STATE1, STATE4
-
-	/* update 10 times with KEY / KEY xor IV: */
-	aegis128_update; pxor KEY, STATE4
-	aegis128_update; pxor T1,  STATE3
-	aegis128_update; pxor KEY, STATE2
-	aegis128_update; pxor T1,  STATE1
-	aegis128_update; pxor KEY, STATE0
-	aegis128_update; pxor T1,  STATE4
-	aegis128_update; pxor KEY, STATE3
-	aegis128_update; pxor T1,  STATE2
-	aegis128_update; pxor KEY, STATE1
-	aegis128_update; pxor T1,  STATE0
-
-	/* store the state: */
-	movdqu STATE0, 0x00(STATEP)
-	movdqu STATE1, 0x10(STATEP)
-	movdqu STATE2, 0x20(STATEP)
-	movdqu STATE3, 0x30(STATEP)
-	movdqu STATE4, 0x40(STATEP)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128_aesni_init)
-
-/*
- * void crypto_aegis128_aesni_ad(void *state, unsigned int length,
- *                               const void *data);
- */
-ENTRY(crypto_aegis128_aesni_ad)
-	FRAME_BEGIN
-
-	cmp $0x10, LEN
-	jb .Lad_out
-
-	/* load the state: */
-	movdqu 0x00(STATEP), STATE0
-	movdqu 0x10(STATEP), STATE1
-	movdqu 0x20(STATEP), STATE2
-	movdqu 0x30(STATEP), STATE3
-	movdqu 0x40(STATEP), STATE4
-
-	mov SRC, %r8
-	and $0xF, %r8
-	jnz .Lad_u_loop
-
-.align 8
-.Lad_a_loop:
-	movdqa 0x00(SRC), MSG
-	aegis128_update
-	pxor MSG, STATE4
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lad_out_1
-
-	movdqa 0x10(SRC), MSG
-	aegis128_update
-	pxor MSG, STATE3
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lad_out_2
-
-	movdqa 0x20(SRC), MSG
-	aegis128_update
-	pxor MSG, STATE2
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lad_out_3
-
-	movdqa 0x30(SRC), MSG
-	aegis128_update
-	pxor MSG, STATE1
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lad_out_4
-
-	movdqa 0x40(SRC), MSG
-	aegis128_update
-	pxor MSG, STATE0
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lad_out_0
-
-	add $0x50, SRC
-	jmp .Lad_a_loop
-
-.align 8
-.Lad_u_loop:
-	movdqu 0x00(SRC), MSG
-	aegis128_update
-	pxor MSG, STATE4
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lad_out_1
-
-	movdqu 0x10(SRC), MSG
-	aegis128_update
-	pxor MSG, STATE3
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lad_out_2
-
-	movdqu 0x20(SRC), MSG
-	aegis128_update
-	pxor MSG, STATE2
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lad_out_3
-
-	movdqu 0x30(SRC), MSG
-	aegis128_update
-	pxor MSG, STATE1
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lad_out_4
-
-	movdqu 0x40(SRC), MSG
-	aegis128_update
-	pxor MSG, STATE0
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lad_out_0
-
-	add $0x50, SRC
-	jmp .Lad_u_loop
-
-	/* store the state: */
-.Lad_out_0:
-	movdqu STATE0, 0x00(STATEP)
-	movdqu STATE1, 0x10(STATEP)
-	movdqu STATE2, 0x20(STATEP)
-	movdqu STATE3, 0x30(STATEP)
-	movdqu STATE4, 0x40(STATEP)
-	FRAME_END
-	ret
-
-.Lad_out_1:
-	movdqu STATE4, 0x00(STATEP)
-	movdqu STATE0, 0x10(STATEP)
-	movdqu STATE1, 0x20(STATEP)
-	movdqu STATE2, 0x30(STATEP)
-	movdqu STATE3, 0x40(STATEP)
-	FRAME_END
-	ret
-
-.Lad_out_2:
-	movdqu STATE3, 0x00(STATEP)
-	movdqu STATE4, 0x10(STATEP)
-	movdqu STATE0, 0x20(STATEP)
-	movdqu STATE1, 0x30(STATEP)
-	movdqu STATE2, 0x40(STATEP)
-	FRAME_END
-	ret
-
-.Lad_out_3:
-	movdqu STATE2, 0x00(STATEP)
-	movdqu STATE3, 0x10(STATEP)
-	movdqu STATE4, 0x20(STATEP)
-	movdqu STATE0, 0x30(STATEP)
-	movdqu STATE1, 0x40(STATEP)
-	FRAME_END
-	ret
-
-.Lad_out_4:
-	movdqu STATE1, 0x00(STATEP)
-	movdqu STATE2, 0x10(STATEP)
-	movdqu STATE3, 0x20(STATEP)
-	movdqu STATE4, 0x30(STATEP)
-	movdqu STATE0, 0x40(STATEP)
-	FRAME_END
-	ret
-
-.Lad_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128_aesni_ad)
-
-.macro encrypt_block a s0 s1 s2 s3 s4 i
-	movdq\a (\i * 0x10)(SRC), MSG
-	movdqa MSG, T0
-	pxor \s1, T0
-	pxor \s4, T0
-	movdqa \s2, T1
-	pand \s3, T1
-	pxor T1, T0
-	movdq\a T0, (\i * 0x10)(DST)
-
-	aegis128_update
-	pxor MSG, \s4
-
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lenc_out_\i
-.endm
-
-/*
- * void crypto_aegis128_aesni_enc(void *state, unsigned int length,
- *                                const void *src, void *dst);
- */
-ENTRY(crypto_aegis128_aesni_enc)
-	FRAME_BEGIN
-
-	cmp $0x10, LEN
-	jb .Lenc_out
-
-	/* load the state: */
-	movdqu 0x00(STATEP), STATE0
-	movdqu 0x10(STATEP), STATE1
-	movdqu 0x20(STATEP), STATE2
-	movdqu 0x30(STATEP), STATE3
-	movdqu 0x40(STATEP), STATE4
-
-	mov  SRC,  %r8
-	or   DST,  %r8
-	and $0xF, %r8
-	jnz .Lenc_u_loop
-
-.align 8
-.Lenc_a_loop:
-	encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
-	encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
-	encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
-	encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
-	encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
-
-	add $0x50, SRC
-	add $0x50, DST
-	jmp .Lenc_a_loop
-
-.align 8
-.Lenc_u_loop:
-	encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
-	encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
-	encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
-	encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
-	encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
-
-	add $0x50, SRC
-	add $0x50, DST
-	jmp .Lenc_u_loop
-
-	/* store the state: */
-.Lenc_out_0:
-	movdqu STATE4, 0x00(STATEP)
-	movdqu STATE0, 0x10(STATEP)
-	movdqu STATE1, 0x20(STATEP)
-	movdqu STATE2, 0x30(STATEP)
-	movdqu STATE3, 0x40(STATEP)
-	FRAME_END
-	ret
-
-.Lenc_out_1:
-	movdqu STATE3, 0x00(STATEP)
-	movdqu STATE4, 0x10(STATEP)
-	movdqu STATE0, 0x20(STATEP)
-	movdqu STATE1, 0x30(STATEP)
-	movdqu STATE2, 0x40(STATEP)
-	FRAME_END
-	ret
-
-.Lenc_out_2:
-	movdqu STATE2, 0x00(STATEP)
-	movdqu STATE3, 0x10(STATEP)
-	movdqu STATE4, 0x20(STATEP)
-	movdqu STATE0, 0x30(STATEP)
-	movdqu STATE1, 0x40(STATEP)
-	FRAME_END
-	ret
-
-.Lenc_out_3:
-	movdqu STATE1, 0x00(STATEP)
-	movdqu STATE2, 0x10(STATEP)
-	movdqu STATE3, 0x20(STATEP)
-	movdqu STATE4, 0x30(STATEP)
-	movdqu STATE0, 0x40(STATEP)
-	FRAME_END
-	ret
-
-.Lenc_out_4:
-	movdqu STATE0, 0x00(STATEP)
-	movdqu STATE1, 0x10(STATEP)
-	movdqu STATE2, 0x20(STATEP)
-	movdqu STATE3, 0x30(STATEP)
-	movdqu STATE4, 0x40(STATEP)
-	FRAME_END
-	ret
-
-.Lenc_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128_aesni_enc)
-
-/*
- * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
- *                                     const void *src, void *dst);
- */
-ENTRY(crypto_aegis128_aesni_enc_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu 0x00(STATEP), STATE0
-	movdqu 0x10(STATEP), STATE1
-	movdqu 0x20(STATEP), STATE2
-	movdqu 0x30(STATEP), STATE3
-	movdqu 0x40(STATEP), STATE4
-
-	/* encrypt message: */
-	call __load_partial
-
-	movdqa MSG, T0
-	pxor STATE1, T0
-	pxor STATE4, T0
-	movdqa STATE2, T1
-	pand STATE3, T1
-	pxor T1, T0
-
-	call __store_partial
-
-	aegis128_update
-	pxor MSG, STATE4
-
-	/* store the state: */
-	movdqu STATE4, 0x00(STATEP)
-	movdqu STATE0, 0x10(STATEP)
-	movdqu STATE1, 0x20(STATEP)
-	movdqu STATE2, 0x30(STATEP)
-	movdqu STATE3, 0x40(STATEP)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128_aesni_enc_tail)
-
-.macro decrypt_block a s0 s1 s2 s3 s4 i
-	movdq\a (\i * 0x10)(SRC), MSG
-	pxor \s1, MSG
-	pxor \s4, MSG
-	movdqa \s2, T1
-	pand \s3, T1
-	pxor T1, MSG
-	movdq\a MSG, (\i * 0x10)(DST)
-
-	aegis128_update
-	pxor MSG, \s4
-
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Ldec_out_\i
-.endm
-
-/*
- * void crypto_aegis128_aesni_dec(void *state, unsigned int length,
- *                                const void *src, void *dst);
- */
-ENTRY(crypto_aegis128_aesni_dec)
-	FRAME_BEGIN
-
-	cmp $0x10, LEN
-	jb .Ldec_out
-
-	/* load the state: */
-	movdqu 0x00(STATEP), STATE0
-	movdqu 0x10(STATEP), STATE1
-	movdqu 0x20(STATEP), STATE2
-	movdqu 0x30(STATEP), STATE3
-	movdqu 0x40(STATEP), STATE4
-
-	mov  SRC, %r8
-	or   DST, %r8
-	and $0xF, %r8
-	jnz .Ldec_u_loop
-
-.align 8
-.Ldec_a_loop:
-	decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
-	decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
-	decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
-	decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
-	decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
-
-	add $0x50, SRC
-	add $0x50, DST
-	jmp .Ldec_a_loop
-
-.align 8
-.Ldec_u_loop:
-	decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
-	decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
-	decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
-	decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
-	decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
-
-	add $0x50, SRC
-	add $0x50, DST
-	jmp .Ldec_u_loop
-
-	/* store the state: */
-.Ldec_out_0:
-	movdqu STATE4, 0x00(STATEP)
-	movdqu STATE0, 0x10(STATEP)
-	movdqu STATE1, 0x20(STATEP)
-	movdqu STATE2, 0x30(STATEP)
-	movdqu STATE3, 0x40(STATEP)
-	FRAME_END
-	ret
-
-.Ldec_out_1:
-	movdqu STATE3, 0x00(STATEP)
-	movdqu STATE4, 0x10(STATEP)
-	movdqu STATE0, 0x20(STATEP)
-	movdqu STATE1, 0x30(STATEP)
-	movdqu STATE2, 0x40(STATEP)
-	FRAME_END
-	ret
-
-.Ldec_out_2:
-	movdqu STATE2, 0x00(STATEP)
-	movdqu STATE3, 0x10(STATEP)
-	movdqu STATE4, 0x20(STATEP)
-	movdqu STATE0, 0x30(STATEP)
-	movdqu STATE1, 0x40(STATEP)
-	FRAME_END
-	ret
-
-.Ldec_out_3:
-	movdqu STATE1, 0x00(STATEP)
-	movdqu STATE2, 0x10(STATEP)
-	movdqu STATE3, 0x20(STATEP)
-	movdqu STATE4, 0x30(STATEP)
-	movdqu STATE0, 0x40(STATEP)
-	FRAME_END
-	ret
-
-.Ldec_out_4:
-	movdqu STATE0, 0x00(STATEP)
-	movdqu STATE1, 0x10(STATEP)
-	movdqu STATE2, 0x20(STATEP)
-	movdqu STATE3, 0x30(STATEP)
-	movdqu STATE4, 0x40(STATEP)
-	FRAME_END
-	ret
-
-.Ldec_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128_aesni_dec)
-
-/*
- * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
- *                                     const void *src, void *dst);
- */
-ENTRY(crypto_aegis128_aesni_dec_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu 0x00(STATEP), STATE0
-	movdqu 0x10(STATEP), STATE1
-	movdqu 0x20(STATEP), STATE2
-	movdqu 0x30(STATEP), STATE3
-	movdqu 0x40(STATEP), STATE4
-
-	/* decrypt message: */
-	call __load_partial
-
-	pxor STATE1, MSG
-	pxor STATE4, MSG
-	movdqa STATE2, T1
-	pand STATE3, T1
-	pxor T1, MSG
-
-	movdqa MSG, T0
-	call __store_partial
-
-	/* mask with byte count: */
-	movq LEN, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	movdqa .Laegis128_counter, T1
-	pcmpgtb T1, T0
-	pand T0, MSG
-
-	aegis128_update
-	pxor MSG, STATE4
-
-	/* store the state: */
-	movdqu STATE4, 0x00(STATEP)
-	movdqu STATE0, 0x10(STATEP)
-	movdqu STATE1, 0x20(STATEP)
-	movdqu STATE2, 0x30(STATEP)
-	movdqu STATE3, 0x40(STATEP)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128_aesni_dec_tail)
-
-/*
- * void crypto_aegis128_aesni_final(void *state, void *tag_xor,
- *                                  u64 assoclen, u64 cryptlen);
- */
-ENTRY(crypto_aegis128_aesni_final)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu 0x00(STATEP), STATE0
-	movdqu 0x10(STATEP), STATE1
-	movdqu 0x20(STATEP), STATE2
-	movdqu 0x30(STATEP), STATE3
-	movdqu 0x40(STATEP), STATE4
-
-	/* prepare length block: */
-	movq %rdx, MSG
-	movq %rcx, T0
-	pslldq $8, T0
-	pxor T0, MSG
-	psllq $3, MSG /* multiply by 8 (to get bit count) */
-
-	pxor STATE3, MSG
-
-	/* update state: */
-	aegis128_update; pxor MSG, STATE4
-	aegis128_update; pxor MSG, STATE3
-	aegis128_update; pxor MSG, STATE2
-	aegis128_update; pxor MSG, STATE1
-	aegis128_update; pxor MSG, STATE0
-	aegis128_update; pxor MSG, STATE4
-	aegis128_update; pxor MSG, STATE3
-
-	/* xor tag: */
-	movdqu (%rsi), MSG
-
-	pxor STATE0, MSG
-	pxor STATE1, MSG
-	pxor STATE2, MSG
-	pxor STATE3, MSG
-	pxor STATE4, MSG
-
-	movdqu MSG, (%rsi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128_aesni_final)
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
deleted file mode 100644
index 77043a82da510c5c608da94d75016edc60add92c..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+++ /dev/null
@@ -1,569 +0,0 @@
-/*
- *	Implement AES CTR mode by8 optimization with AVX instructions. (x86_64)
- *
- * This is AES128/192/256 CTR mode optimization implementation. It requires
- * the support of Intel(R) AESNI and AVX instructions.
- *
- * This work was inspired by the AES CTR mode optimization published
- * in Intel Optimized IPSEC Cryptograhpic library.
- * Additional information on it can be found at:
- *    http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=22972
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * James Guilford <james.guilford@intel.com>
- * Sean Gulley <sean.m.gulley@intel.com>
- * Chandramouli Narayanan <mouli@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/inst.h>
-
-#define VMOVDQ		vmovdqu
-
-#define xdata0		%xmm0
-#define xdata1		%xmm1
-#define xdata2		%xmm2
-#define xdata3		%xmm3
-#define xdata4		%xmm4
-#define xdata5		%xmm5
-#define xdata6		%xmm6
-#define xdata7		%xmm7
-#define xcounter	%xmm8
-#define xbyteswap	%xmm9
-#define xkey0		%xmm10
-#define xkey4		%xmm11
-#define xkey8		%xmm12
-#define xkey12		%xmm13
-#define xkeyA		%xmm14
-#define xkeyB		%xmm15
-
-#define p_in		%rdi
-#define p_iv		%rsi
-#define p_keys		%rdx
-#define p_out		%rcx
-#define num_bytes	%r8
-
-#define tmp		%r10
-#define	DDQ_DATA	0
-#define	XDATA		1
-#define KEY_128		1
-#define KEY_192		2
-#define KEY_256		3
-
-.section .rodata
-.align 16
-
-byteswap_const:
-	.octa 0x000102030405060708090A0B0C0D0E0F
-ddq_low_msk:
-	.octa 0x0000000000000000FFFFFFFFFFFFFFFF
-ddq_high_add_1:
-	.octa 0x00000000000000010000000000000000
-ddq_add_1:
-	.octa 0x00000000000000000000000000000001
-ddq_add_2:
-	.octa 0x00000000000000000000000000000002
-ddq_add_3:
-	.octa 0x00000000000000000000000000000003
-ddq_add_4:
-	.octa 0x00000000000000000000000000000004
-ddq_add_5:
-	.octa 0x00000000000000000000000000000005
-ddq_add_6:
-	.octa 0x00000000000000000000000000000006
-ddq_add_7:
-	.octa 0x00000000000000000000000000000007
-ddq_add_8:
-	.octa 0x00000000000000000000000000000008
-
-.text
-
-/* generate a unique variable for ddq_add_x */
-
-/* generate a unique variable for xmm register */
-.macro setxdata n
-	var_xdata = %xmm\n
-.endm
-
-/* club the numeric 'id' to the symbol 'name' */
-
-.macro club name, id
-.altmacro
-	.if \name == XDATA
-		setxdata %\id
-	.endif
-.noaltmacro
-.endm
-
-/*
- * do_aes num_in_par load_keys key_len
- * This increments p_in, but not p_out
- */
-.macro do_aes b, k, key_len
-	.set by, \b
-	.set load_keys, \k
-	.set klen, \key_len
-
-	.if (load_keys)
-		vmovdqa	0*16(p_keys), xkey0
-	.endif
-
-	vpshufb	xbyteswap, xcounter, xdata0
-
-	.set i, 1
-	.rept (by - 1)
-		club XDATA, i
-		vpaddq	(ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata
-		vptest	ddq_low_msk(%rip), var_xdata
-		jnz 1f
-		vpaddq	ddq_high_add_1(%rip), var_xdata, var_xdata
-		vpaddq	ddq_high_add_1(%rip), xcounter, xcounter
-		1:
-		vpshufb	xbyteswap, var_xdata, var_xdata
-		.set i, (i +1)
-	.endr
-
-	vmovdqa	1*16(p_keys), xkeyA
-
-	vpxor	xkey0, xdata0, xdata0
-	vpaddq	(ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter
-	vptest	ddq_low_msk(%rip), xcounter
-	jnz	1f
-	vpaddq	ddq_high_add_1(%rip), xcounter, xcounter
-	1:
-
-	.set i, 1
-	.rept (by - 1)
-		club XDATA, i
-		vpxor	xkey0, var_xdata, var_xdata
-		.set i, (i +1)
-	.endr
-
-	vmovdqa	2*16(p_keys), xkeyB
-
-	.set i, 0
-	.rept by
-		club XDATA, i
-		vaesenc	xkeyA, var_xdata, var_xdata		/* key 1 */
-		.set i, (i +1)
-	.endr
-
-	.if (klen == KEY_128)
-		.if (load_keys)
-			vmovdqa	3*16(p_keys), xkey4
-		.endif
-	.else
-		vmovdqa	3*16(p_keys), xkeyA
-	.endif
-
-	.set i, 0
-	.rept by
-		club XDATA, i
-		vaesenc	xkeyB, var_xdata, var_xdata		/* key 2 */
-		.set i, (i +1)
-	.endr
-
-	add	$(16*by), p_in
-
-	.if (klen == KEY_128)
-		vmovdqa	4*16(p_keys), xkeyB
-	.else
-		.if (load_keys)
-			vmovdqa	4*16(p_keys), xkey4
-		.endif
-	.endif
-
-	.set i, 0
-	.rept by
-		club XDATA, i
-		/* key 3 */
-		.if (klen == KEY_128)
-			vaesenc	xkey4, var_xdata, var_xdata
-		.else
-			vaesenc	xkeyA, var_xdata, var_xdata
-		.endif
-		.set i, (i +1)
-	.endr
-
-	vmovdqa	5*16(p_keys), xkeyA
-
-	.set i, 0
-	.rept by
-		club XDATA, i
-		/* key 4 */
-		.if (klen == KEY_128)
-			vaesenc	xkeyB, var_xdata, var_xdata
-		.else
-			vaesenc	xkey4, var_xdata, var_xdata
-		.endif
-		.set i, (i +1)
-	.endr
-
-	.if (klen == KEY_128)
-		.if (load_keys)
-			vmovdqa	6*16(p_keys), xkey8
-		.endif
-	.else
-		vmovdqa	6*16(p_keys), xkeyB
-	.endif
-
-	.set i, 0
-	.rept by
-		club XDATA, i
-		vaesenc	xkeyA, var_xdata, var_xdata		/* key 5 */
-		.set i, (i +1)
-	.endr
-
-	vmovdqa	7*16(p_keys), xkeyA
-
-	.set i, 0
-	.rept by
-		club XDATA, i
-		/* key 6 */
-		.if (klen == KEY_128)
-			vaesenc	xkey8, var_xdata, var_xdata
-		.else
-			vaesenc	xkeyB, var_xdata, var_xdata
-		.endif
-		.set i, (i +1)
-	.endr
-
-	.if (klen == KEY_128)
-		vmovdqa	8*16(p_keys), xkeyB
-	.else
-		.if (load_keys)
-			vmovdqa	8*16(p_keys), xkey8
-		.endif
-	.endif
-
-	.set i, 0
-	.rept by
-		club XDATA, i
-		vaesenc	xkeyA, var_xdata, var_xdata		/* key 7 */
-		.set i, (i +1)
-	.endr
-
-	.if (klen == KEY_128)
-		.if (load_keys)
-			vmovdqa	9*16(p_keys), xkey12
-		.endif
-	.else
-		vmovdqa	9*16(p_keys), xkeyA
-	.endif
-
-	.set i, 0
-	.rept by
-		club XDATA, i
-		/* key 8 */
-		.if (klen == KEY_128)
-			vaesenc	xkeyB, var_xdata, var_xdata
-		.else
-			vaesenc	xkey8, var_xdata, var_xdata
-		.endif
-		.set i, (i +1)
-	.endr
-
-	vmovdqa	10*16(p_keys), xkeyB
-
-	.set i, 0
-	.rept by
-		club XDATA, i
-		/* key 9 */
-		.if (klen == KEY_128)
-			vaesenc	xkey12, var_xdata, var_xdata
-		.else
-			vaesenc	xkeyA, var_xdata, var_xdata
-		.endif
-		.set i, (i +1)
-	.endr
-
-	.if (klen != KEY_128)
-		vmovdqa	11*16(p_keys), xkeyA
-	.endif
-
-	.set i, 0
-	.rept by
-		club XDATA, i
-		/* key 10 */
-		.if (klen == KEY_128)
-			vaesenclast	xkeyB, var_xdata, var_xdata
-		.else
-			vaesenc	xkeyB, var_xdata, var_xdata
-		.endif
-		.set i, (i +1)
-	.endr
-
-	.if (klen != KEY_128)
-		.if (load_keys)
-			vmovdqa	12*16(p_keys), xkey12
-		.endif
-
-		.set i, 0
-		.rept by
-			club XDATA, i
-			vaesenc	xkeyA, var_xdata, var_xdata	/* key 11 */
-			.set i, (i +1)
-		.endr
-
-		.if (klen == KEY_256)
-			vmovdqa	13*16(p_keys), xkeyA
-		.endif
-
-		.set i, 0
-		.rept by
-			club XDATA, i
-			.if (klen == KEY_256)
-				/* key 12 */
-				vaesenc	xkey12, var_xdata, var_xdata
-			.else
-				vaesenclast xkey12, var_xdata, var_xdata
-			.endif
-			.set i, (i +1)
-		.endr
-
-		.if (klen == KEY_256)
-			vmovdqa	14*16(p_keys), xkeyB
-
-			.set i, 0
-			.rept by
-				club XDATA, i
-				/* key 13 */
-				vaesenc	xkeyA, var_xdata, var_xdata
-				.set i, (i +1)
-			.endr
-
-			.set i, 0
-			.rept by
-				club XDATA, i
-				/* key 14 */
-				vaesenclast	xkeyB, var_xdata, var_xdata
-				.set i, (i +1)
-			.endr
-		.endif
-	.endif
-
-	.set i, 0
-	.rept (by / 2)
-		.set j, (i+1)
-		VMOVDQ	(i*16 - 16*by)(p_in), xkeyA
-		VMOVDQ	(j*16 - 16*by)(p_in), xkeyB
-		club XDATA, i
-		vpxor	xkeyA, var_xdata, var_xdata
-		club XDATA, j
-		vpxor	xkeyB, var_xdata, var_xdata
-		.set i, (i+2)
-	.endr
-
-	.if (i < by)
-		VMOVDQ	(i*16 - 16*by)(p_in), xkeyA
-		club XDATA, i
-		vpxor	xkeyA, var_xdata, var_xdata
-	.endif
-
-	.set i, 0
-	.rept by
-		club XDATA, i
-		VMOVDQ	var_xdata, i*16(p_out)
-		.set i, (i+1)
-	.endr
-.endm
-
-.macro do_aes_load val, key_len
-	do_aes \val, 1, \key_len
-.endm
-
-.macro do_aes_noload val, key_len
-	do_aes \val, 0, \key_len
-.endm
-
-/* main body of aes ctr load */
-
-.macro do_aes_ctrmain key_len
-	cmp	$16, num_bytes
-	jb	.Ldo_return2\key_len
-
-	vmovdqa	byteswap_const(%rip), xbyteswap
-	vmovdqu	(p_iv), xcounter
-	vpshufb	xbyteswap, xcounter, xcounter
-
-	mov	num_bytes, tmp
-	and	$(7*16), tmp
-	jz	.Lmult_of_8_blks\key_len
-
-	/* 1 <= tmp <= 7 */
-	cmp	$(4*16), tmp
-	jg	.Lgt4\key_len
-	je	.Leq4\key_len
-
-.Llt4\key_len:
-	cmp	$(2*16), tmp
-	jg	.Leq3\key_len
-	je	.Leq2\key_len
-
-.Leq1\key_len:
-	do_aes_load	1, \key_len
-	add	$(1*16), p_out
-	and	$(~7*16), num_bytes
-	jz	.Ldo_return2\key_len
-	jmp	.Lmain_loop2\key_len
-
-.Leq2\key_len:
-	do_aes_load	2, \key_len
-	add	$(2*16), p_out
-	and	$(~7*16), num_bytes
-	jz	.Ldo_return2\key_len
-	jmp	.Lmain_loop2\key_len
-
-
-.Leq3\key_len:
-	do_aes_load	3, \key_len
-	add	$(3*16), p_out
-	and	$(~7*16), num_bytes
-	jz	.Ldo_return2\key_len
-	jmp	.Lmain_loop2\key_len
-
-.Leq4\key_len:
-	do_aes_load	4, \key_len
-	add	$(4*16), p_out
-	and	$(~7*16), num_bytes
-	jz	.Ldo_return2\key_len
-	jmp	.Lmain_loop2\key_len
-
-.Lgt4\key_len:
-	cmp	$(6*16), tmp
-	jg	.Leq7\key_len
-	je	.Leq6\key_len
-
-.Leq5\key_len:
-	do_aes_load	5, \key_len
-	add	$(5*16), p_out
-	and	$(~7*16), num_bytes
-	jz	.Ldo_return2\key_len
-	jmp	.Lmain_loop2\key_len
-
-.Leq6\key_len:
-	do_aes_load	6, \key_len
-	add	$(6*16), p_out
-	and	$(~7*16), num_bytes
-	jz	.Ldo_return2\key_len
-	jmp	.Lmain_loop2\key_len
-
-.Leq7\key_len:
-	do_aes_load	7, \key_len
-	add	$(7*16), p_out
-	and	$(~7*16), num_bytes
-	jz	.Ldo_return2\key_len
-	jmp	.Lmain_loop2\key_len
-
-.Lmult_of_8_blks\key_len:
-	.if (\key_len != KEY_128)
-		vmovdqa	0*16(p_keys), xkey0
-		vmovdqa	4*16(p_keys), xkey4
-		vmovdqa	8*16(p_keys), xkey8
-		vmovdqa	12*16(p_keys), xkey12
-	.else
-		vmovdqa	0*16(p_keys), xkey0
-		vmovdqa	3*16(p_keys), xkey4
-		vmovdqa	6*16(p_keys), xkey8
-		vmovdqa	9*16(p_keys), xkey12
-	.endif
-.align 16
-.Lmain_loop2\key_len:
-	/* num_bytes is a multiple of 8 and >0 */
-	do_aes_noload	8, \key_len
-	add	$(8*16), p_out
-	sub	$(8*16), num_bytes
-	jne	.Lmain_loop2\key_len
-
-.Ldo_return2\key_len:
-	/* return updated IV */
-	vpshufb	xbyteswap, xcounter, xcounter
-	vmovdqu	xcounter, (p_iv)
-	ret
-.endm
-
-/*
- * routine to do AES128 CTR enc/decrypt "by8"
- * XMM registers are clobbered.
- * Saving/restoring must be done at a higher level
- * aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out,
- *			unsigned int num_bytes)
- */
-ENTRY(aes_ctr_enc_128_avx_by8)
-	/* call the aes main loop */
-	do_aes_ctrmain KEY_128
-
-ENDPROC(aes_ctr_enc_128_avx_by8)
-
-/*
- * routine to do AES192 CTR enc/decrypt "by8"
- * XMM registers are clobbered.
- * Saving/restoring must be done at a higher level
- * aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out,
- *			unsigned int num_bytes)
- */
-ENTRY(aes_ctr_enc_192_avx_by8)
-	/* call the aes main loop */
-	do_aes_ctrmain KEY_192
-
-ENDPROC(aes_ctr_enc_192_avx_by8)
-
-/*
- * routine to do AES256 CTR enc/decrypt "by8"
- * XMM registers are clobbered.
- * Saving/restoring must be done at a higher level
- * aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out,
- *			unsigned int num_bytes)
- */
-ENTRY(aes_ctr_enc_256_avx_by8)
-	/* call the aes main loop */
-	do_aes_ctrmain KEY_256
-
-ENDPROC(aes_ctr_enc_256_avx_by8)
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
deleted file mode 100644
index dd954d8db629b4d412874094395aead424c781d1..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ /dev/null
@@ -1,2863 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Implement AES algorithm in Intel AES-NI instructions.
- *
- * The white paper of AES-NI instructions can be downloaded from:
- *   http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
- *
- * Copyright (C) 2008, Intel Corp.
- *    Author: Huang Ying <ying.huang@intel.com>
- *            Vinodh Gopal <vinodh.gopal@intel.com>
- *            Kahraman Akdemir
- *
- * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
- * interface for 64-bit kernels.
- *    Authors: Erdinc Ozturk (erdinc.ozturk@intel.com)
- *             Aidan O'Mahony (aidan.o.mahony@intel.com)
- *             Adrian Hoban <adrian.hoban@intel.com>
- *             James Guilford (james.guilford@intel.com)
- *             Gabriele Paoloni <gabriele.paoloni@intel.com>
- *             Tadeusz Struk (tadeusz.struk@intel.com)
- *             Wajdi Feghali (wajdi.k.feghali@intel.com)
- *    Copyright (c) 2010, Intel Corporation.
- *
- * Ported x86_64 version to x86:
- *    Author: Mathias Krause <minipli@googlemail.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/inst.h>
-#include <asm/frame.h>
-#include <asm/nospec-branch.h>
-
-/*
- * The following macros are used to move an (un)aligned 16 byte value to/from
- * an XMM register.  This can done for either FP or integer values, for FP use
- * movaps (move aligned packed single) or integer use movdqa (move double quad
- * aligned).  It doesn't make a performance difference which instruction is used
- * since Nehalem (original Core i7) was released.  However, the movaps is a byte
- * shorter, so that is the one we'll use for now. (same for unaligned).
- */
-#define MOVADQ	movaps
-#define MOVUDQ	movups
-
-#ifdef __x86_64__
-
-# constants in mergeable sections, linker can reorder and merge
-.section	.rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16
-.align 16
-.Lgf128mul_x_ble_mask:
-	.octa 0x00000000000000010000000000000087
-.section	.rodata.cst16.POLY, "aM", @progbits, 16
-.align 16
-POLY:   .octa 0xC2000000000000000000000000000001
-.section	.rodata.cst16.TWOONE, "aM", @progbits, 16
-.align 16
-TWOONE: .octa 0x00000001000000000000000000000001
-
-.section	.rodata.cst16.SHUF_MASK, "aM", @progbits, 16
-.align 16
-SHUF_MASK:  .octa 0x000102030405060708090A0B0C0D0E0F
-.section	.rodata.cst16.MASK1, "aM", @progbits, 16
-.align 16
-MASK1:      .octa 0x0000000000000000ffffffffffffffff
-.section	.rodata.cst16.MASK2, "aM", @progbits, 16
-.align 16
-MASK2:      .octa 0xffffffffffffffff0000000000000000
-.section	.rodata.cst16.ONE, "aM", @progbits, 16
-.align 16
-ONE:        .octa 0x00000000000000000000000000000001
-.section	.rodata.cst16.F_MIN_MASK, "aM", @progbits, 16
-.align 16
-F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0
-.section	.rodata.cst16.dec, "aM", @progbits, 16
-.align 16
-dec:        .octa 0x1
-.section	.rodata.cst16.enc, "aM", @progbits, 16
-.align 16
-enc:        .octa 0x2
-
-# order of these constants should not change.
-# more specifically, ALL_F should follow SHIFT_MASK,
-# and zero should follow ALL_F
-.section	.rodata, "a", @progbits
-.align 16
-SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
-ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
-            .octa 0x00000000000000000000000000000000
-
-.text
-
-
-#define	STACK_OFFSET    8*3
-
-#define AadHash 16*0
-#define AadLen 16*1
-#define InLen (16*1)+8
-#define PBlockEncKey 16*2
-#define OrigIV 16*3
-#define CurCount 16*4
-#define PBlockLen 16*5
-#define	HashKey		16*6	// store HashKey <<1 mod poly here
-#define	HashKey_2	16*7	// store HashKey^2 <<1 mod poly here
-#define	HashKey_3	16*8	// store HashKey^3 <<1 mod poly here
-#define	HashKey_4	16*9	// store HashKey^4 <<1 mod poly here
-#define	HashKey_k	16*10	// store XOR of High 64 bits and Low 64
-				// bits of  HashKey <<1 mod poly here
-				//(for Karatsuba purposes)
-#define	HashKey_2_k	16*11	// store XOR of High 64 bits and Low 64
-				// bits of  HashKey^2 <<1 mod poly here
-				// (for Karatsuba purposes)
-#define	HashKey_3_k	16*12	// store XOR of High 64 bits and Low 64
-				// bits of  HashKey^3 <<1 mod poly here
-				// (for Karatsuba purposes)
-#define	HashKey_4_k	16*13	// store XOR of High 64 bits and Low 64
-				// bits of  HashKey^4 <<1 mod poly here
-				// (for Karatsuba purposes)
-
-#define arg1 rdi
-#define arg2 rsi
-#define arg3 rdx
-#define arg4 rcx
-#define arg5 r8
-#define arg6 r9
-#define arg7 STACK_OFFSET+8(%rsp)
-#define arg8 STACK_OFFSET+16(%rsp)
-#define arg9 STACK_OFFSET+24(%rsp)
-#define arg10 STACK_OFFSET+32(%rsp)
-#define arg11 STACK_OFFSET+40(%rsp)
-#define keysize 2*15*16(%arg1)
-#endif
-
-
-#define STATE1	%xmm0
-#define STATE2	%xmm4
-#define STATE3	%xmm5
-#define STATE4	%xmm6
-#define STATE	STATE1
-#define IN1	%xmm1
-#define IN2	%xmm7
-#define IN3	%xmm8
-#define IN4	%xmm9
-#define IN	IN1
-#define KEY	%xmm2
-#define IV	%xmm3
-
-#define BSWAP_MASK %xmm10
-#define CTR	%xmm11
-#define INC	%xmm12
-
-#define GF128MUL_MASK %xmm10
-
-#ifdef __x86_64__
-#define AREG	%rax
-#define KEYP	%rdi
-#define OUTP	%rsi
-#define UKEYP	OUTP
-#define INP	%rdx
-#define LEN	%rcx
-#define IVP	%r8
-#define KLEN	%r9d
-#define T1	%r10
-#define TKEYP	T1
-#define T2	%r11
-#define TCTR_LOW T2
-#else
-#define AREG	%eax
-#define KEYP	%edi
-#define OUTP	AREG
-#define UKEYP	OUTP
-#define INP	%edx
-#define LEN	%esi
-#define IVP	%ebp
-#define KLEN	%ebx
-#define T1	%ecx
-#define TKEYP	T1
-#endif
-
-.macro FUNC_SAVE
-	push	%r12
-	push	%r13
-	push	%r14
-#
-# states of %xmm registers %xmm6:%xmm15 not saved
-# all %xmm registers are clobbered
-#
-.endm
-
-
-.macro FUNC_RESTORE
-	pop	%r14
-	pop	%r13
-	pop	%r12
-.endm
-
-# Precompute hashkeys.
-# Input: Hash subkey.
-# Output: HashKeys stored in gcm_context_data.  Only needs to be called
-# once per key.
-# clobbers r12, and tmp xmm registers.
-.macro PRECOMPUTE SUBKEY TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7
-	mov	\SUBKEY, %r12
-	movdqu	(%r12), \TMP3
-	movdqa	SHUF_MASK(%rip), \TMP2
-	PSHUFB_XMM \TMP2, \TMP3
-
-	# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
-
-	movdqa	\TMP3, \TMP2
-	psllq	$1, \TMP3
-	psrlq	$63, \TMP2
-	movdqa	\TMP2, \TMP1
-	pslldq	$8, \TMP2
-	psrldq	$8, \TMP1
-	por	\TMP2, \TMP3
-
-	# reduce HashKey<<1
-
-	pshufd	$0x24, \TMP1, \TMP2
-	pcmpeqd TWOONE(%rip), \TMP2
-	pand	POLY(%rip), \TMP2
-	pxor	\TMP2, \TMP3
-	movdqu	\TMP3, HashKey(%arg2)
-
-	movdqa	   \TMP3, \TMP5
-	pshufd	   $78, \TMP3, \TMP1
-	pxor	   \TMP3, \TMP1
-	movdqu	   \TMP1, HashKey_k(%arg2)
-
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^2<<1 (mod poly)
-	movdqu	   \TMP5, HashKey_2(%arg2)
-# HashKey_2 = HashKey^2<<1 (mod poly)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqu	   \TMP1, HashKey_2_k(%arg2)
-
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqu	   \TMP5, HashKey_3(%arg2)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqu	   \TMP1, HashKey_3_k(%arg2)
-
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqu	   \TMP5, HashKey_4(%arg2)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqu	   \TMP1, HashKey_4_k(%arg2)
-.endm
-
-# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding.
-# Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13
-.macro GCM_INIT Iv SUBKEY AAD AADLEN
-	mov \AADLEN, %r11
-	mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length
-	xor %r11d, %r11d
-	mov %r11, InLen(%arg2) # ctx_data.in_length = 0
-	mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0
-	mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0
-	mov \Iv, %rax
-	movdqu (%rax), %xmm0
-	movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv
-
-	movdqa  SHUF_MASK(%rip), %xmm2
-	PSHUFB_XMM %xmm2, %xmm0
-	movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
-
-	PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7
-	movdqu HashKey(%arg2), %xmm13
-
-	CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
-	%xmm4, %xmm5, %xmm6
-.endm
-
-# GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context
-# struct has been initialized by GCM_INIT.
-# Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK
-# Clobbers rax, r10-r13, and xmm0-xmm15
-.macro GCM_ENC_DEC operation
-	movdqu AadHash(%arg2), %xmm8
-	movdqu HashKey(%arg2), %xmm13
-	add %arg5, InLen(%arg2)
-
-	xor %r11d, %r11d # initialise the data pointer offset as zero
-	PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
-
-	sub %r11, %arg5		# sub partial block data used
-	mov %arg5, %r13		# save the number of bytes
-
-	and $-16, %r13		# %r13 = %r13 - (%r13 mod 16)
-	mov %r13, %r12
-	# Encrypt/Decrypt first few blocks
-
-	and	$(3<<4), %r12
-	jz	_initial_num_blocks_is_0_\@
-	cmp	$(2<<4), %r12
-	jb	_initial_num_blocks_is_1_\@
-	je	_initial_num_blocks_is_2_\@
-_initial_num_blocks_is_3_\@:
-	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation
-	sub	$48, %r13
-	jmp	_initial_blocks_\@
-_initial_num_blocks_is_2_\@:
-	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation
-	sub	$32, %r13
-	jmp	_initial_blocks_\@
-_initial_num_blocks_is_1_\@:
-	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation
-	sub	$16, %r13
-	jmp	_initial_blocks_\@
-_initial_num_blocks_is_0_\@:
-	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation
-_initial_blocks_\@:
-
-	# Main loop - Encrypt/Decrypt remaining blocks
-
-	test	%r13, %r13
-	je	_zero_cipher_left_\@
-	sub	$64, %r13
-	je	_four_cipher_left_\@
-_crypt_by_4_\@:
-	GHASH_4_ENCRYPT_4_PARALLEL_\operation	%xmm9, %xmm10, %xmm11, %xmm12, \
-	%xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \
-	%xmm7, %xmm8, enc
-	add	$64, %r11
-	sub	$64, %r13
-	jne	_crypt_by_4_\@
-_four_cipher_left_\@:
-	GHASH_LAST_4	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
-%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
-_zero_cipher_left_\@:
-	movdqu %xmm8, AadHash(%arg2)
-	movdqu %xmm0, CurCount(%arg2)
-
-	mov	%arg5, %r13
-	and	$15, %r13			# %r13 = arg5 (mod 16)
-	je	_multiple_of_16_bytes_\@
-
-	mov %r13, PBlockLen(%arg2)
-
-	# Handle the last <16 Byte block separately
-	paddd ONE(%rip), %xmm0                # INCR CNT to get Yn
-	movdqu %xmm0, CurCount(%arg2)
-	movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm0
-
-	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm1        # Encrypt(K, Yn)
-	movdqu %xmm0, PBlockEncKey(%arg2)
-
-	cmp	$16, %arg5
-	jge _large_enough_update_\@
-
-	lea (%arg4,%r11,1), %r10
-	mov %r13, %r12
-	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
-	jmp _data_read_\@
-
-_large_enough_update_\@:
-	sub	$16, %r11
-	add	%r13, %r11
-
-	# receive the last <16 Byte block
-	movdqu	(%arg4, %r11, 1), %xmm1
-
-	sub	%r13, %r11
-	add	$16, %r11
-
-	lea	SHIFT_MASK+16(%rip), %r12
-	# adjust the shuffle mask pointer to be able to shift 16-r13 bytes
-	# (r13 is the number of bytes in plaintext mod 16)
-	sub	%r13, %r12
-	# get the appropriate shuffle mask
-	movdqu	(%r12), %xmm2
-	# shift right 16-r13 bytes
-	PSHUFB_XMM  %xmm2, %xmm1
-
-_data_read_\@:
-	lea ALL_F+16(%rip), %r12
-	sub %r13, %r12
-
-.ifc \operation, dec
-	movdqa  %xmm1, %xmm2
-.endif
-	pxor	%xmm1, %xmm0            # XOR Encrypt(K, Yn)
-	movdqu	(%r12), %xmm1
-	# get the appropriate mask to mask out top 16-r13 bytes of xmm0
-	pand	%xmm1, %xmm0            # mask out top 16-r13 bytes of xmm0
-.ifc \operation, dec
-	pand    %xmm1, %xmm2
-	movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10 ,%xmm2
-
-	pxor %xmm2, %xmm8
-.else
-	movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10,%xmm0
-
-	pxor	%xmm0, %xmm8
-.endif
-
-	movdqu %xmm8, AadHash(%arg2)
-.ifc \operation, enc
-	# GHASH computation for the last <16 byte block
-	movdqa SHUF_MASK(%rip), %xmm10
-	# shuffle xmm0 back to output as ciphertext
-	PSHUFB_XMM %xmm10, %xmm0
-.endif
-
-	# Output %r13 bytes
-	MOVQ_R64_XMM %xmm0, %rax
-	cmp $8, %r13
-	jle _less_than_8_bytes_left_\@
-	mov %rax, (%arg3 , %r11, 1)
-	add $8, %r11
-	psrldq $8, %xmm0
-	MOVQ_R64_XMM %xmm0, %rax
-	sub $8, %r13
-_less_than_8_bytes_left_\@:
-	mov %al,  (%arg3, %r11, 1)
-	add $1, %r11
-	shr $8, %rax
-	sub $1, %r13
-	jne _less_than_8_bytes_left_\@
-_multiple_of_16_bytes_\@:
-.endm
-
-# GCM_COMPLETE Finishes update of tag of last partial block
-# Output: Authorization Tag (AUTH_TAG)
-# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
-.macro GCM_COMPLETE AUTHTAG AUTHTAGLEN
-	movdqu AadHash(%arg2), %xmm8
-	movdqu HashKey(%arg2), %xmm13
-
-	mov PBlockLen(%arg2), %r12
-
-	test %r12, %r12
-	je _partial_done\@
-
-	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-
-_partial_done\@:
-	mov AadLen(%arg2), %r12  # %r13 = aadLen (number of bytes)
-	shl	$3, %r12		  # convert into number of bits
-	movd	%r12d, %xmm15		  # len(A) in %xmm15
-	mov InLen(%arg2), %r12
-	shl     $3, %r12                  # len(C) in bits (*128)
-	MOVQ_R64_XMM    %r12, %xmm1
-
-	pslldq	$8, %xmm15		  # %xmm15 = len(A)||0x0000000000000000
-	pxor	%xmm1, %xmm15		  # %xmm15 = len(A)||len(C)
-	pxor	%xmm15, %xmm8
-	GHASH_MUL	%xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-	# final GHASH computation
-	movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm8
-
-	movdqu OrigIV(%arg2), %xmm0       # %xmm0 = Y0
-	ENCRYPT_SINGLE_BLOCK	%xmm0,  %xmm1	  # E(K, Y0)
-	pxor	%xmm8, %xmm0
-_return_T_\@:
-	mov	\AUTHTAG, %r10                     # %r10 = authTag
-	mov	\AUTHTAGLEN, %r11                    # %r11 = auth_tag_len
-	cmp	$16, %r11
-	je	_T_16_\@
-	cmp	$8, %r11
-	jl	_T_4_\@
-_T_8_\@:
-	MOVQ_R64_XMM	%xmm0, %rax
-	mov	%rax, (%r10)
-	add	$8, %r10
-	sub	$8, %r11
-	psrldq	$8, %xmm0
-	test	%r11, %r11
-	je	_return_T_done_\@
-_T_4_\@:
-	movd	%xmm0, %eax
-	mov	%eax, (%r10)
-	add	$4, %r10
-	sub	$4, %r11
-	psrldq	$4, %xmm0
-	test	%r11, %r11
-	je	_return_T_done_\@
-_T_123_\@:
-	movd	%xmm0, %eax
-	cmp	$2, %r11
-	jl	_T_1_\@
-	mov	%ax, (%r10)
-	cmp	$2, %r11
-	je	_return_T_done_\@
-	add	$2, %r10
-	sar	$16, %eax
-_T_1_\@:
-	mov	%al, (%r10)
-	jmp	_return_T_done_\@
-_T_16_\@:
-	movdqu	%xmm0, (%r10)
-_return_T_done_\@:
-.endm
-
-#ifdef __x86_64__
-/* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
-*
-*
-* Input: A and B (128-bits each, bit-reflected)
-* Output: C = A*B*x mod poly, (i.e. >>1 )
-* To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input
-* GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly.
-*
-*/
-.macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5
-	movdqa	  \GH, \TMP1
-	pshufd	  $78, \GH, \TMP2
-	pshufd	  $78, \HK, \TMP3
-	pxor	  \GH, \TMP2            # TMP2 = a1+a0
-	pxor	  \HK, \TMP3            # TMP3 = b1+b0
-	PCLMULQDQ 0x11, \HK, \TMP1     # TMP1 = a1*b1
-	PCLMULQDQ 0x00, \HK, \GH       # GH = a0*b0
-	PCLMULQDQ 0x00, \TMP3, \TMP2   # TMP2 = (a0+a1)*(b1+b0)
-	pxor	  \GH, \TMP2
-	pxor	  \TMP1, \TMP2          # TMP2 = (a0*b0)+(a1*b0)
-	movdqa	  \TMP2, \TMP3
-	pslldq	  $8, \TMP3             # left shift TMP3 2 DWs
-	psrldq	  $8, \TMP2             # right shift TMP2 2 DWs
-	pxor	  \TMP3, \GH
-	pxor	  \TMP2, \TMP1          # TMP2:GH holds the result of GH*HK
-
-        # first phase of the reduction
-
-	movdqa    \GH, \TMP2
-	movdqa    \GH, \TMP3
-	movdqa    \GH, \TMP4            # copy GH into TMP2,TMP3 and TMP4
-					# in in order to perform
-					# independent shifts
-	pslld     $31, \TMP2            # packed right shift <<31
-	pslld     $30, \TMP3            # packed right shift <<30
-	pslld     $25, \TMP4            # packed right shift <<25
-	pxor      \TMP3, \TMP2          # xor the shifted versions
-	pxor      \TMP4, \TMP2
-	movdqa    \TMP2, \TMP5
-	psrldq    $4, \TMP5             # right shift TMP5 1 DW
-	pslldq    $12, \TMP2            # left shift TMP2 3 DWs
-	pxor      \TMP2, \GH
-
-        # second phase of the reduction
-
-	movdqa    \GH,\TMP2             # copy GH into TMP2,TMP3 and TMP4
-					# in in order to perform
-					# independent shifts
-	movdqa    \GH,\TMP3
-	movdqa    \GH,\TMP4
-	psrld     $1,\TMP2              # packed left shift >>1
-	psrld     $2,\TMP3              # packed left shift >>2
-	psrld     $7,\TMP4              # packed left shift >>7
-	pxor      \TMP3,\TMP2		# xor the shifted versions
-	pxor      \TMP4,\TMP2
-	pxor      \TMP5, \TMP2
-	pxor      \TMP2, \GH
-	pxor      \TMP1, \GH            # result is in TMP1
-.endm
-
-# Reads DLEN bytes starting at DPTR and stores in XMMDst
-# where 0 < DLEN < 16
-# Clobbers %rax, DLEN and XMM1
-.macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst
-        cmp $8, \DLEN
-        jl _read_lt8_\@
-        mov (\DPTR), %rax
-        MOVQ_R64_XMM %rax, \XMMDst
-        sub $8, \DLEN
-        jz _done_read_partial_block_\@
-	xor %eax, %eax
-_read_next_byte_\@:
-        shl $8, %rax
-        mov 7(\DPTR, \DLEN, 1), %al
-        dec \DLEN
-        jnz _read_next_byte_\@
-        MOVQ_R64_XMM %rax, \XMM1
-	pslldq $8, \XMM1
-        por \XMM1, \XMMDst
-	jmp _done_read_partial_block_\@
-_read_lt8_\@:
-	xor %eax, %eax
-_read_next_byte_lt8_\@:
-        shl $8, %rax
-        mov -1(\DPTR, \DLEN, 1), %al
-        dec \DLEN
-        jnz _read_next_byte_lt8_\@
-        MOVQ_R64_XMM %rax, \XMMDst
-_done_read_partial_block_\@:
-.endm
-
-# CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted.
-# clobbers r10-11, xmm14
-.macro CALC_AAD_HASH HASHKEY AAD AADLEN TMP1 TMP2 TMP3 TMP4 TMP5 \
-	TMP6 TMP7
-	MOVADQ	   SHUF_MASK(%rip), %xmm14
-	mov	   \AAD, %r10		# %r10 = AAD
-	mov	   \AADLEN, %r11		# %r11 = aadLen
-	pxor	   \TMP7, \TMP7
-	pxor	   \TMP6, \TMP6
-
-	cmp	   $16, %r11
-	jl	   _get_AAD_rest\@
-_get_AAD_blocks\@:
-	movdqu	   (%r10), \TMP7
-	PSHUFB_XMM   %xmm14, \TMP7 # byte-reflect the AAD data
-	pxor	   \TMP7, \TMP6
-	GHASH_MUL  \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
-	add	   $16, %r10
-	sub	   $16, %r11
-	cmp	   $16, %r11
-	jge	   _get_AAD_blocks\@
-
-	movdqu	   \TMP6, \TMP7
-
-	/* read the last <16B of AAD */
-_get_AAD_rest\@:
-	test	   %r11, %r11
-	je	   _get_AAD_done\@
-
-	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
-	PSHUFB_XMM   %xmm14, \TMP7 # byte-reflect the AAD data
-	pxor	   \TMP6, \TMP7
-	GHASH_MUL  \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
-	movdqu \TMP7, \TMP6
-
-_get_AAD_done\@:
-	movdqu \TMP6, AadHash(%arg2)
-.endm
-
-# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks
-# between update calls.
-# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK
-# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context
-# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13
-.macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
-	AAD_HASH operation
-	mov 	PBlockLen(%arg2), %r13
-	test	%r13, %r13
-	je	_partial_block_done_\@	# Leave Macro if no partial blocks
-	# Read in input data without over reading
-	cmp	$16, \PLAIN_CYPH_LEN
-	jl	_fewer_than_16_bytes_\@
-	movups	(\PLAIN_CYPH_IN), %xmm1	# If more than 16 bytes, just fill xmm
-	jmp	_data_read_\@
-
-_fewer_than_16_bytes_\@:
-	lea	(\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10
-	mov	\PLAIN_CYPH_LEN, %r12
-	READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1
-
-	mov PBlockLen(%arg2), %r13
-
-_data_read_\@:				# Finished reading in data
-
-	movdqu	PBlockEncKey(%arg2), %xmm9
-	movdqu	HashKey(%arg2), %xmm13
-
-	lea	SHIFT_MASK(%rip), %r12
-
-	# adjust the shuffle mask pointer to be able to shift r13 bytes
-	# r16-r13 is the number of bytes in plaintext mod 16)
-	add	%r13, %r12
-	movdqu	(%r12), %xmm2		# get the appropriate shuffle mask
-	PSHUFB_XMM %xmm2, %xmm9		# shift right r13 bytes
-
-.ifc \operation, dec
-	movdqa	%xmm1, %xmm3
-	pxor	%xmm1, %xmm9		# Cyphertext XOR E(K, Yn)
-
-	mov	\PLAIN_CYPH_LEN, %r10
-	add	%r13, %r10
-	# Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
-	sub	$16, %r10
-	# Determine if if partial block is not being filled and
-	# shift mask accordingly
-	jge	_no_extra_mask_1_\@
-	sub	%r10, %r12
-_no_extra_mask_1_\@:
-
-	movdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
-	# get the appropriate mask to mask out bottom r13 bytes of xmm9
-	pand	%xmm1, %xmm9		# mask out bottom r13 bytes of xmm9
-
-	pand	%xmm1, %xmm3
-	movdqa	SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM	%xmm10, %xmm3
-	PSHUFB_XMM	%xmm2, %xmm3
-	pxor	%xmm3, \AAD_HASH
-
-	test	%r10, %r10
-	jl	_partial_incomplete_1_\@
-
-	# GHASH computation for the last <16 Byte block
-	GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
-	xor	%eax, %eax
-
-	mov	%rax, PBlockLen(%arg2)
-	jmp	_dec_done_\@
-_partial_incomplete_1_\@:
-	add	\PLAIN_CYPH_LEN, PBlockLen(%arg2)
-_dec_done_\@:
-	movdqu	\AAD_HASH, AadHash(%arg2)
-.else
-	pxor	%xmm1, %xmm9			# Plaintext XOR E(K, Yn)
-
-	mov	\PLAIN_CYPH_LEN, %r10
-	add	%r13, %r10
-	# Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
-	sub	$16, %r10
-	# Determine if if partial block is not being filled and
-	# shift mask accordingly
-	jge	_no_extra_mask_2_\@
-	sub	%r10, %r12
-_no_extra_mask_2_\@:
-
-	movdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
-	# get the appropriate mask to mask out bottom r13 bytes of xmm9
-	pand	%xmm1, %xmm9
-
-	movdqa	SHUF_MASK(%rip), %xmm1
-	PSHUFB_XMM %xmm1, %xmm9
-	PSHUFB_XMM %xmm2, %xmm9
-	pxor	%xmm9, \AAD_HASH
-
-	test	%r10, %r10
-	jl	_partial_incomplete_2_\@
-
-	# GHASH computation for the last <16 Byte block
-	GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
-	xor	%eax, %eax
-
-	mov	%rax, PBlockLen(%arg2)
-	jmp	_encode_done_\@
-_partial_incomplete_2_\@:
-	add	\PLAIN_CYPH_LEN, PBlockLen(%arg2)
-_encode_done_\@:
-	movdqu	\AAD_HASH, AadHash(%arg2)
-
-	movdqa	SHUF_MASK(%rip), %xmm10
-	# shuffle xmm9 back to output as ciphertext
-	PSHUFB_XMM	%xmm10, %xmm9
-	PSHUFB_XMM	%xmm2, %xmm9
-.endif
-	# output encrypted Bytes
-	test	%r10, %r10
-	jl	_partial_fill_\@
-	mov	%r13, %r12
-	mov	$16, %r13
-	# Set r13 to be the number of bytes to write out
-	sub	%r12, %r13
-	jmp	_count_set_\@
-_partial_fill_\@:
-	mov	\PLAIN_CYPH_LEN, %r13
-_count_set_\@:
-	movdqa	%xmm9, %xmm0
-	MOVQ_R64_XMM	%xmm0, %rax
-	cmp	$8, %r13
-	jle	_less_than_8_bytes_left_\@
-
-	mov	%rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
-	add	$8, \DATA_OFFSET
-	psrldq	$8, %xmm0
-	MOVQ_R64_XMM	%xmm0, %rax
-	sub	$8, %r13
-_less_than_8_bytes_left_\@:
-	movb	%al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
-	add	$1, \DATA_OFFSET
-	shr	$8, %rax
-	sub	$1, %r13
-	jne	_less_than_8_bytes_left_\@
-_partial_block_done_\@:
-.endm # PARTIAL_BLOCK
-
-/*
-* if a = number of total plaintext bytes
-* b = floor(a/16)
-* num_initial_blocks = b mod 4
-* encrypt the initial num_initial_blocks blocks and apply ghash on
-* the ciphertext
-* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
-* are clobbered
-* arg1, %arg2, %arg3 are used as a pointer only, not modified
-*/
-
-
-.macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
-	XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
-	MOVADQ		SHUF_MASK(%rip), %xmm14
-
-	movdqu AadHash(%arg2), %xmm\i		    # XMM0 = Y0
-
-	# start AES for num_initial_blocks blocks
-
-	movdqu CurCount(%arg2), \XMM0                # XMM0 = Y0
-
-.if (\i == 5) || (\i == 6) || (\i == 7)
-
-	MOVADQ		ONE(%RIP),\TMP1
-	MOVADQ		0(%arg1),\TMP2
-.irpc index, \i_seq
-	paddd		\TMP1, \XMM0                 # INCR Y0
-.ifc \operation, dec
-        movdqa     \XMM0, %xmm\index
-.else
-	MOVADQ		\XMM0, %xmm\index
-.endif
-	PSHUFB_XMM	%xmm14, %xmm\index      # perform a 16 byte swap
-	pxor		\TMP2, %xmm\index
-.endr
-	lea	0x10(%arg1),%r10
-	mov	keysize,%eax
-	shr	$2,%eax				# 128->4, 192->6, 256->8
-	add	$5,%eax			      # 128->9, 192->11, 256->13
-
-aes_loop_initial_\@:
-	MOVADQ	(%r10),\TMP1
-.irpc	index, \i_seq
-	AESENC	\TMP1, %xmm\index
-.endr
-	add	$16,%r10
-	sub	$1,%eax
-	jnz	aes_loop_initial_\@
-
-	MOVADQ	(%r10), \TMP1
-.irpc index, \i_seq
-	AESENCLAST \TMP1, %xmm\index         # Last Round
-.endr
-.irpc index, \i_seq
-	movdqu	   (%arg4 , %r11, 1), \TMP1
-	pxor	   \TMP1, %xmm\index
-	movdqu	   %xmm\index, (%arg3 , %r11, 1)
-	# write back plaintext/ciphertext for num_initial_blocks
-	add	   $16, %r11
-
-.ifc \operation, dec
-	movdqa     \TMP1, %xmm\index
-.endif
-	PSHUFB_XMM	   %xmm14, %xmm\index
-
-		# prepare plaintext/ciphertext for GHASH computation
-.endr
-.endif
-
-        # apply GHASH on num_initial_blocks blocks
-
-.if \i == 5
-        pxor       %xmm5, %xmm6
-	GHASH_MUL  %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm6, %xmm7
-	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.elseif \i == 6
-        pxor       %xmm6, %xmm7
-	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.elseif \i == 7
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.endif
-	cmp	   $64, %r13
-	jl	_initial_blocks_done\@
-	# no need for precomputed values
-/*
-*
-* Precomputations for HashKey parallel with encryption of first 4 blocks.
-* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
-*/
-	MOVADQ	   ONE(%RIP),\TMP1
-	paddd	   \TMP1, \XMM0              # INCR Y0
-	MOVADQ	   \XMM0, \XMM1
-	PSHUFB_XMM  %xmm14, \XMM1        # perform a 16 byte swap
-
-	paddd	   \TMP1, \XMM0              # INCR Y0
-	MOVADQ	   \XMM0, \XMM2
-	PSHUFB_XMM  %xmm14, \XMM2        # perform a 16 byte swap
-
-	paddd	   \TMP1, \XMM0              # INCR Y0
-	MOVADQ	   \XMM0, \XMM3
-	PSHUFB_XMM %xmm14, \XMM3        # perform a 16 byte swap
-
-	paddd	   \TMP1, \XMM0              # INCR Y0
-	MOVADQ	   \XMM0, \XMM4
-	PSHUFB_XMM %xmm14, \XMM4        # perform a 16 byte swap
-
-	MOVADQ	   0(%arg1),\TMP1
-	pxor	   \TMP1, \XMM1
-	pxor	   \TMP1, \XMM2
-	pxor	   \TMP1, \XMM3
-	pxor	   \TMP1, \XMM4
-.irpc index, 1234 # do 4 rounds
-	movaps 0x10*\index(%arg1), \TMP1
-	AESENC	   \TMP1, \XMM1
-	AESENC	   \TMP1, \XMM2
-	AESENC	   \TMP1, \XMM3
-	AESENC	   \TMP1, \XMM4
-.endr
-.irpc index, 56789 # do next 5 rounds
-	movaps 0x10*\index(%arg1), \TMP1
-	AESENC	   \TMP1, \XMM1
-	AESENC	   \TMP1, \XMM2
-	AESENC	   \TMP1, \XMM3
-	AESENC	   \TMP1, \XMM4
-.endr
-	lea	   0xa0(%arg1),%r10
-	mov	   keysize,%eax
-	shr	   $2,%eax			# 128->4, 192->6, 256->8
-	sub	   $4,%eax			# 128->0, 192->2, 256->4
-	jz	   aes_loop_pre_done\@
-
-aes_loop_pre_\@:
-	MOVADQ	   (%r10),\TMP2
-.irpc	index, 1234
-	AESENC	   \TMP2, %xmm\index
-.endr
-	add	   $16,%r10
-	sub	   $1,%eax
-	jnz	   aes_loop_pre_\@
-
-aes_loop_pre_done\@:
-	MOVADQ	   (%r10), \TMP2
-	AESENCLAST \TMP2, \XMM1
-	AESENCLAST \TMP2, \XMM2
-	AESENCLAST \TMP2, \XMM3
-	AESENCLAST \TMP2, \XMM4
-	movdqu	   16*0(%arg4 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM1
-.ifc \operation, dec
-	movdqu     \XMM1, 16*0(%arg3 , %r11 , 1)
-	movdqa     \TMP1, \XMM1
-.endif
-	movdqu	   16*1(%arg4 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM2
-.ifc \operation, dec
-	movdqu     \XMM2, 16*1(%arg3 , %r11 , 1)
-	movdqa     \TMP1, \XMM2
-.endif
-	movdqu	   16*2(%arg4 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM3
-.ifc \operation, dec
-	movdqu     \XMM3, 16*2(%arg3 , %r11 , 1)
-	movdqa     \TMP1, \XMM3
-.endif
-	movdqu	   16*3(%arg4 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM4
-.ifc \operation, dec
-	movdqu     \XMM4, 16*3(%arg3 , %r11 , 1)
-	movdqa     \TMP1, \XMM4
-.else
-	movdqu     \XMM1, 16*0(%arg3 , %r11 , 1)
-	movdqu     \XMM2, 16*1(%arg3 , %r11 , 1)
-	movdqu     \XMM3, 16*2(%arg3 , %r11 , 1)
-	movdqu     \XMM4, 16*3(%arg3 , %r11 , 1)
-.endif
-
-	add	   $64, %r11
-	PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
-	pxor	   \XMMDst, \XMM1
-# combine GHASHed value with the corresponding ciphertext
-	PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
-	PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
-	PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
-
-_initial_blocks_done\@:
-
-.endm
-
-/*
-* encrypt 4 blocks at a time
-* ghash the 4 previously encrypted ciphertext blocks
-* arg1, %arg3, %arg4 are used as pointers only, not modified
-* %r11 is the data offset value
-*/
-.macro GHASH_4_ENCRYPT_4_PARALLEL_enc TMP1 TMP2 TMP3 TMP4 TMP5 \
-TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
-
-	movdqa	  \XMM1, \XMM5
-	movdqa	  \XMM2, \XMM6
-	movdqa	  \XMM3, \XMM7
-	movdqa	  \XMM4, \XMM8
-
-        movdqa    SHUF_MASK(%rip), %xmm15
-        # multiply TMP5 * HashKey using karatsuba
-
-	movdqa	  \XMM5, \TMP4
-	pshufd	  $78, \XMM5, \TMP6
-	pxor	  \XMM5, \TMP6
-	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqu	  HashKey_4(%arg2), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
-	movdqa    \XMM0, \XMM1
-	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa    \XMM0, \XMM2
-	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa    \XMM0, \XMM3
-	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa    \XMM0, \XMM4
-	PSHUFB_XMM %xmm15, \XMM1	# perform a 16 byte swap
-	PCLMULQDQ 0x00, \TMP5, \XMM5           # XMM5 = a0*b0
-	PSHUFB_XMM %xmm15, \XMM2	# perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM3	# perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM4	# perform a 16 byte swap
-
-	pxor	  (%arg1), \XMM1
-	pxor	  (%arg1), \XMM2
-	pxor	  (%arg1), \XMM3
-	pxor	  (%arg1), \XMM4
-	movdqu	  HashKey_4_k(%arg2), \TMP5
-	PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
-	movaps 0x10(%arg1), \TMP1
-	AESENC	  \TMP1, \XMM1              # Round 1
-	AESENC	  \TMP1, \XMM2
-	AESENC	  \TMP1, \XMM3
-	AESENC	  \TMP1, \XMM4
-	movaps 0x20(%arg1), \TMP1
-	AESENC	  \TMP1, \XMM1              # Round 2
-	AESENC	  \TMP1, \XMM2
-	AESENC	  \TMP1, \XMM3
-	AESENC	  \TMP1, \XMM4
-	movdqa	  \XMM6, \TMP1
-	pshufd	  $78, \XMM6, \TMP2
-	pxor	  \XMM6, \TMP2
-	movdqu	  HashKey_3(%arg2), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
-	movaps 0x30(%arg1), \TMP3
-	AESENC    \TMP3, \XMM1              # Round 3
-	AESENC    \TMP3, \XMM2
-	AESENC    \TMP3, \XMM3
-	AESENC    \TMP3, \XMM4
-	PCLMULQDQ 0x00, \TMP5, \XMM6           # XMM6 = a0*b0
-	movaps 0x40(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1              # Round 4
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	movdqu	  HashKey_3_k(%arg2), \TMP5
-	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
-	movaps 0x50(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1              # Round 5
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	pxor	  \TMP1, \TMP4
-# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
-	pxor	  \XMM6, \XMM5
-	pxor	  \TMP2, \TMP6
-	movdqa	  \XMM7, \TMP1
-	pshufd	  $78, \XMM7, \TMP2
-	pxor	  \XMM7, \TMP2
-	movdqu	  HashKey_2(%arg2), \TMP5
-
-        # Multiply TMP5 * HashKey using karatsuba
-
-	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1*b1
-	movaps 0x60(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1              # Round 6
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	PCLMULQDQ 0x00, \TMP5, \XMM7           # XMM7 = a0*b0
-	movaps 0x70(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1             # Round 7
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	movdqu	  HashKey_2_k(%arg2), \TMP5
-	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
-	movaps 0x80(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1             # Round 8
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	pxor	  \TMP1, \TMP4
-# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
-	pxor	  \XMM7, \XMM5
-	pxor	  \TMP2, \TMP6
-
-        # Multiply XMM8 * HashKey
-        # XMM8 and TMP5 hold the values for the two operands
-
-	movdqa	  \XMM8, \TMP1
-	pshufd	  $78, \XMM8, \TMP2
-	pxor	  \XMM8, \TMP2
-	movdqu	  HashKey(%arg2), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
-	movaps 0x90(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1            # Round 9
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	PCLMULQDQ 0x00, \TMP5, \XMM8          # XMM8 = a0*b0
-	lea	  0xa0(%arg1),%r10
-	mov	  keysize,%eax
-	shr	  $2,%eax			# 128->4, 192->6, 256->8
-	sub	  $4,%eax			# 128->0, 192->2, 256->4
-	jz	  aes_loop_par_enc_done\@
-
-aes_loop_par_enc\@:
-	MOVADQ	  (%r10),\TMP3
-.irpc	index, 1234
-	AESENC	  \TMP3, %xmm\index
-.endr
-	add	  $16,%r10
-	sub	  $1,%eax
-	jnz	  aes_loop_par_enc\@
-
-aes_loop_par_enc_done\@:
-	MOVADQ	  (%r10), \TMP3
-	AESENCLAST \TMP3, \XMM1           # Round 10
-	AESENCLAST \TMP3, \XMM2
-	AESENCLAST \TMP3, \XMM3
-	AESENCLAST \TMP3, \XMM4
-	movdqu    HashKey_k(%arg2), \TMP5
-	PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
-	movdqu	  (%arg4,%r11,1), \TMP3
-	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
-	movdqu	  16(%arg4,%r11,1), \TMP3
-	pxor	  \TMP3, \XMM2                 # Ciphertext/Plaintext XOR EK
-	movdqu	  32(%arg4,%r11,1), \TMP3
-	pxor	  \TMP3, \XMM3                 # Ciphertext/Plaintext XOR EK
-	movdqu	  48(%arg4,%r11,1), \TMP3
-	pxor	  \TMP3, \XMM4                 # Ciphertext/Plaintext XOR EK
-        movdqu    \XMM1, (%arg3,%r11,1)        # Write to the ciphertext buffer
-        movdqu    \XMM2, 16(%arg3,%r11,1)      # Write to the ciphertext buffer
-        movdqu    \XMM3, 32(%arg3,%r11,1)      # Write to the ciphertext buffer
-        movdqu    \XMM4, 48(%arg3,%r11,1)      # Write to the ciphertext buffer
-	PSHUFB_XMM %xmm15, \XMM1        # perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM2	# perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM3	# perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM4	# perform a 16 byte swap
-
-	pxor	  \TMP4, \TMP1
-	pxor	  \XMM8, \XMM5
-	pxor	  \TMP6, \TMP2
-	pxor	  \TMP1, \TMP2
-	pxor	  \XMM5, \TMP2
-	movdqa	  \TMP2, \TMP3
-	pslldq	  $8, \TMP3                    # left shift TMP3 2 DWs
-	psrldq	  $8, \TMP2                    # right shift TMP2 2 DWs
-	pxor	  \TMP3, \XMM5
-	pxor	  \TMP2, \TMP1	  # accumulate the results in TMP1:XMM5
-
-        # first phase of reduction
-
-	movdqa    \XMM5, \TMP2
-	movdqa    \XMM5, \TMP3
-	movdqa    \XMM5, \TMP4
-# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently
-	pslld     $31, \TMP2                   # packed right shift << 31
-	pslld     $30, \TMP3                   # packed right shift << 30
-	pslld     $25, \TMP4                   # packed right shift << 25
-	pxor      \TMP3, \TMP2	               # xor the shifted versions
-	pxor      \TMP4, \TMP2
-	movdqa    \TMP2, \TMP5
-	psrldq    $4, \TMP5                    # right shift T5 1 DW
-	pslldq    $12, \TMP2                   # left shift T2 3 DWs
-	pxor      \TMP2, \XMM5
-
-        # second phase of reduction
-
-	movdqa    \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4
-	movdqa    \XMM5,\TMP3
-	movdqa    \XMM5,\TMP4
-	psrld     $1, \TMP2                    # packed left shift >>1
-	psrld     $2, \TMP3                    # packed left shift >>2
-	psrld     $7, \TMP4                    # packed left shift >>7
-	pxor      \TMP3,\TMP2		       # xor the shifted versions
-	pxor      \TMP4,\TMP2
-	pxor      \TMP5, \TMP2
-	pxor      \TMP2, \XMM5
-	pxor      \TMP1, \XMM5                 # result is in TMP1
-
-	pxor	  \XMM5, \XMM1
-.endm
-
-/*
-* decrypt 4 blocks at a time
-* ghash the 4 previously decrypted ciphertext blocks
-* arg1, %arg3, %arg4 are used as pointers only, not modified
-* %r11 is the data offset value
-*/
-.macro GHASH_4_ENCRYPT_4_PARALLEL_dec TMP1 TMP2 TMP3 TMP4 TMP5 \
-TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
-
-	movdqa	  \XMM1, \XMM5
-	movdqa	  \XMM2, \XMM6
-	movdqa	  \XMM3, \XMM7
-	movdqa	  \XMM4, \XMM8
-
-        movdqa    SHUF_MASK(%rip), %xmm15
-        # multiply TMP5 * HashKey using karatsuba
-
-	movdqa	  \XMM5, \TMP4
-	pshufd	  $78, \XMM5, \TMP6
-	pxor	  \XMM5, \TMP6
-	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqu	  HashKey_4(%arg2), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
-	movdqa    \XMM0, \XMM1
-	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa    \XMM0, \XMM2
-	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa    \XMM0, \XMM3
-	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa    \XMM0, \XMM4
-	PSHUFB_XMM %xmm15, \XMM1	# perform a 16 byte swap
-	PCLMULQDQ 0x00, \TMP5, \XMM5           # XMM5 = a0*b0
-	PSHUFB_XMM %xmm15, \XMM2	# perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM3	# perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM4	# perform a 16 byte swap
-
-	pxor	  (%arg1), \XMM1
-	pxor	  (%arg1), \XMM2
-	pxor	  (%arg1), \XMM3
-	pxor	  (%arg1), \XMM4
-	movdqu	  HashKey_4_k(%arg2), \TMP5
-	PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
-	movaps 0x10(%arg1), \TMP1
-	AESENC	  \TMP1, \XMM1              # Round 1
-	AESENC	  \TMP1, \XMM2
-	AESENC	  \TMP1, \XMM3
-	AESENC	  \TMP1, \XMM4
-	movaps 0x20(%arg1), \TMP1
-	AESENC	  \TMP1, \XMM1              # Round 2
-	AESENC	  \TMP1, \XMM2
-	AESENC	  \TMP1, \XMM3
-	AESENC	  \TMP1, \XMM4
-	movdqa	  \XMM6, \TMP1
-	pshufd	  $78, \XMM6, \TMP2
-	pxor	  \XMM6, \TMP2
-	movdqu	  HashKey_3(%arg2), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
-	movaps 0x30(%arg1), \TMP3
-	AESENC    \TMP3, \XMM1              # Round 3
-	AESENC    \TMP3, \XMM2
-	AESENC    \TMP3, \XMM3
-	AESENC    \TMP3, \XMM4
-	PCLMULQDQ 0x00, \TMP5, \XMM6           # XMM6 = a0*b0
-	movaps 0x40(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1              # Round 4
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	movdqu	  HashKey_3_k(%arg2), \TMP5
-	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
-	movaps 0x50(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1              # Round 5
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	pxor	  \TMP1, \TMP4
-# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
-	pxor	  \XMM6, \XMM5
-	pxor	  \TMP2, \TMP6
-	movdqa	  \XMM7, \TMP1
-	pshufd	  $78, \XMM7, \TMP2
-	pxor	  \XMM7, \TMP2
-	movdqu	  HashKey_2(%arg2), \TMP5
-
-        # Multiply TMP5 * HashKey using karatsuba
-
-	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1*b1
-	movaps 0x60(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1              # Round 6
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	PCLMULQDQ 0x00, \TMP5, \XMM7           # XMM7 = a0*b0
-	movaps 0x70(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1             # Round 7
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	movdqu	  HashKey_2_k(%arg2), \TMP5
-	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
-	movaps 0x80(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1             # Round 8
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	pxor	  \TMP1, \TMP4
-# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
-	pxor	  \XMM7, \XMM5
-	pxor	  \TMP2, \TMP6
-
-        # Multiply XMM8 * HashKey
-        # XMM8 and TMP5 hold the values for the two operands
-
-	movdqa	  \XMM8, \TMP1
-	pshufd	  $78, \XMM8, \TMP2
-	pxor	  \XMM8, \TMP2
-	movdqu	  HashKey(%arg2), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
-	movaps 0x90(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1            # Round 9
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	PCLMULQDQ 0x00, \TMP5, \XMM8          # XMM8 = a0*b0
-	lea	  0xa0(%arg1),%r10
-	mov	  keysize,%eax
-	shr	  $2,%eax		        # 128->4, 192->6, 256->8
-	sub	  $4,%eax			# 128->0, 192->2, 256->4
-	jz	  aes_loop_par_dec_done\@
-
-aes_loop_par_dec\@:
-	MOVADQ	  (%r10),\TMP3
-.irpc	index, 1234
-	AESENC	  \TMP3, %xmm\index
-.endr
-	add	  $16,%r10
-	sub	  $1,%eax
-	jnz	  aes_loop_par_dec\@
-
-aes_loop_par_dec_done\@:
-	MOVADQ	  (%r10), \TMP3
-	AESENCLAST \TMP3, \XMM1           # last round
-	AESENCLAST \TMP3, \XMM2
-	AESENCLAST \TMP3, \XMM3
-	AESENCLAST \TMP3, \XMM4
-	movdqu    HashKey_k(%arg2), \TMP5
-	PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
-	movdqu	  (%arg4,%r11,1), \TMP3
-	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM1, (%arg3,%r11,1)        # Write to plaintext buffer
-	movdqa    \TMP3, \XMM1
-	movdqu	  16(%arg4,%r11,1), \TMP3
-	pxor	  \TMP3, \XMM2                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM2, 16(%arg3,%r11,1)      # Write to plaintext buffer
-	movdqa    \TMP3, \XMM2
-	movdqu	  32(%arg4,%r11,1), \TMP3
-	pxor	  \TMP3, \XMM3                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM3, 32(%arg3,%r11,1)      # Write to plaintext buffer
-	movdqa    \TMP3, \XMM3
-	movdqu	  48(%arg4,%r11,1), \TMP3
-	pxor	  \TMP3, \XMM4                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM4, 48(%arg3,%r11,1)      # Write to plaintext buffer
-	movdqa    \TMP3, \XMM4
-	PSHUFB_XMM %xmm15, \XMM1        # perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM2	# perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM3	# perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM4	# perform a 16 byte swap
-
-	pxor	  \TMP4, \TMP1
-	pxor	  \XMM8, \XMM5
-	pxor	  \TMP6, \TMP2
-	pxor	  \TMP1, \TMP2
-	pxor	  \XMM5, \TMP2
-	movdqa	  \TMP2, \TMP3
-	pslldq	  $8, \TMP3                    # left shift TMP3 2 DWs
-	psrldq	  $8, \TMP2                    # right shift TMP2 2 DWs
-	pxor	  \TMP3, \XMM5
-	pxor	  \TMP2, \TMP1	  # accumulate the results in TMP1:XMM5
-
-        # first phase of reduction
-
-	movdqa    \XMM5, \TMP2
-	movdqa    \XMM5, \TMP3
-	movdqa    \XMM5, \TMP4
-# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently
-	pslld     $31, \TMP2                   # packed right shift << 31
-	pslld     $30, \TMP3                   # packed right shift << 30
-	pslld     $25, \TMP4                   # packed right shift << 25
-	pxor      \TMP3, \TMP2	               # xor the shifted versions
-	pxor      \TMP4, \TMP2
-	movdqa    \TMP2, \TMP5
-	psrldq    $4, \TMP5                    # right shift T5 1 DW
-	pslldq    $12, \TMP2                   # left shift T2 3 DWs
-	pxor      \TMP2, \XMM5
-
-        # second phase of reduction
-
-	movdqa    \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4
-	movdqa    \XMM5,\TMP3
-	movdqa    \XMM5,\TMP4
-	psrld     $1, \TMP2                    # packed left shift >>1
-	psrld     $2, \TMP3                    # packed left shift >>2
-	psrld     $7, \TMP4                    # packed left shift >>7
-	pxor      \TMP3,\TMP2		       # xor the shifted versions
-	pxor      \TMP4,\TMP2
-	pxor      \TMP5, \TMP2
-	pxor      \TMP2, \XMM5
-	pxor      \TMP1, \XMM5                 # result is in TMP1
-
-	pxor	  \XMM5, \XMM1
-.endm
-
-/* GHASH the last 4 ciphertext blocks. */
-.macro	GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \
-TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
-
-        # Multiply TMP6 * HashKey (using Karatsuba)
-
-	movdqa	  \XMM1, \TMP6
-	pshufd	  $78, \XMM1, \TMP2
-	pxor	  \XMM1, \TMP2
-	movdqu	  HashKey_4(%arg2), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP6       # TMP6 = a1*b1
-	PCLMULQDQ 0x00, \TMP5, \XMM1       # XMM1 = a0*b0
-	movdqu	  HashKey_4_k(%arg2), \TMP4
-	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
-	movdqa	  \XMM1, \XMMDst
-	movdqa	  \TMP2, \XMM1              # result in TMP6, XMMDst, XMM1
-
-        # Multiply TMP1 * HashKey (using Karatsuba)
-
-	movdqa	  \XMM2, \TMP1
-	pshufd	  $78, \XMM2, \TMP2
-	pxor	  \XMM2, \TMP2
-	movdqu	  HashKey_3(%arg2), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
-	PCLMULQDQ 0x00, \TMP5, \XMM2       # XMM2 = a0*b0
-	movdqu	  HashKey_3_k(%arg2), \TMP4
-	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
-	pxor	  \TMP1, \TMP6
-	pxor	  \XMM2, \XMMDst
-	pxor	  \TMP2, \XMM1
-# results accumulated in TMP6, XMMDst, XMM1
-
-        # Multiply TMP1 * HashKey (using Karatsuba)
-
-	movdqa	  \XMM3, \TMP1
-	pshufd	  $78, \XMM3, \TMP2
-	pxor	  \XMM3, \TMP2
-	movdqu	  HashKey_2(%arg2), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
-	PCLMULQDQ 0x00, \TMP5, \XMM3       # XMM3 = a0*b0
-	movdqu	  HashKey_2_k(%arg2), \TMP4
-	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
-	pxor	  \TMP1, \TMP6
-	pxor	  \XMM3, \XMMDst
-	pxor	  \TMP2, \XMM1   # results accumulated in TMP6, XMMDst, XMM1
-
-        # Multiply TMP1 * HashKey (using Karatsuba)
-	movdqa	  \XMM4, \TMP1
-	pshufd	  $78, \XMM4, \TMP2
-	pxor	  \XMM4, \TMP2
-	movdqu	  HashKey(%arg2), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP1	    # TMP1 = a1*b1
-	PCLMULQDQ 0x00, \TMP5, \XMM4       # XMM4 = a0*b0
-	movdqu	  HashKey_k(%arg2), \TMP4
-	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
-	pxor	  \TMP1, \TMP6
-	pxor	  \XMM4, \XMMDst
-	pxor	  \XMM1, \TMP2
-	pxor	  \TMP6, \TMP2
-	pxor	  \XMMDst, \TMP2
-	# middle section of the temp results combined as in karatsuba algorithm
-	movdqa	  \TMP2, \TMP4
-	pslldq	  $8, \TMP4                 # left shift TMP4 2 DWs
-	psrldq	  $8, \TMP2                 # right shift TMP2 2 DWs
-	pxor	  \TMP4, \XMMDst
-	pxor	  \TMP2, \TMP6
-# TMP6:XMMDst holds the result of the accumulated carry-less multiplications
-	# first phase of the reduction
-	movdqa    \XMMDst, \TMP2
-	movdqa    \XMMDst, \TMP3
-	movdqa    \XMMDst, \TMP4
-# move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently
-	pslld     $31, \TMP2                # packed right shifting << 31
-	pslld     $30, \TMP3                # packed right shifting << 30
-	pslld     $25, \TMP4                # packed right shifting << 25
-	pxor      \TMP3, \TMP2              # xor the shifted versions
-	pxor      \TMP4, \TMP2
-	movdqa    \TMP2, \TMP7
-	psrldq    $4, \TMP7                 # right shift TMP7 1 DW
-	pslldq    $12, \TMP2                # left shift TMP2 3 DWs
-	pxor      \TMP2, \XMMDst
-
-        # second phase of the reduction
-	movdqa    \XMMDst, \TMP2
-	# make 3 copies of XMMDst for doing 3 shift operations
-	movdqa    \XMMDst, \TMP3
-	movdqa    \XMMDst, \TMP4
-	psrld     $1, \TMP2                 # packed left shift >> 1
-	psrld     $2, \TMP3                 # packed left shift >> 2
-	psrld     $7, \TMP4                 # packed left shift >> 7
-	pxor      \TMP3, \TMP2              # xor the shifted versions
-	pxor      \TMP4, \TMP2
-	pxor      \TMP7, \TMP2
-	pxor      \TMP2, \XMMDst
-	pxor      \TMP6, \XMMDst            # reduced result is in XMMDst
-.endm
-
-
-/* Encryption of a single block
-* uses eax & r10
-*/
-
-.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1
-
-	pxor		(%arg1), \XMM0
-	mov		keysize,%eax
-	shr		$2,%eax			# 128->4, 192->6, 256->8
-	add		$5,%eax			# 128->9, 192->11, 256->13
-	lea		16(%arg1), %r10	  # get first expanded key address
-
-_esb_loop_\@:
-	MOVADQ		(%r10),\TMP1
-	AESENC		\TMP1,\XMM0
-	add		$16,%r10
-	sub		$1,%eax
-	jnz		_esb_loop_\@
-
-	MOVADQ		(%r10),\TMP1
-	AESENCLAST	\TMP1,\XMM0
-.endm
-/*****************************************************************************
-* void aesni_gcm_dec(void *aes_ctx,    // AES Key schedule. Starts on a 16 byte boundary.
-*                   struct gcm_context_data *data
-*                                      // Context data
-*                   u8 *out,           // Plaintext output. Encrypt in-place is allowed.
-*                   const u8 *in,      // Ciphertext input
-*                   u64 plaintext_len, // Length of data in bytes for decryption.
-*                   u8 *iv,            // Pre-counter block j0: 4 byte salt (from Security Association)
-*                                      // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
-*                                      // concatenated with 0x00000001. 16-byte aligned pointer.
-*                   u8 *hash_subkey,   // H, the Hash sub key input. Data starts on a 16-byte boundary.
-*                   const u8 *aad,     // Additional Authentication Data (AAD)
-*                   u64 aad_len,       // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes
-*                   u8  *auth_tag,     // Authenticated Tag output. The driver will compare this to the
-*                                      // given authentication tag and only return the plaintext if they match.
-*                   u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16
-*                                      // (most likely), 12 or 8.
-*
-* Assumptions:
-*
-* keys:
-*       keys are pre-expanded and aligned to 16 bytes. we are using the first
-*       set of 11 keys in the data structure void *aes_ctx
-*
-* iv:
-*       0                   1                   2                   3
-*       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                             Salt  (From the SA)               |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                     Initialization Vector                     |
-*       |         (This is the sequence number from IPSec header)       |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                              0x1                              |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*
-*
-*
-* AAD:
-*       AAD padded to 128 bits with 0
-*       for example, assume AAD is a u32 vector
-*
-*       if AAD is 8 bytes:
-*       AAD[3] = {A0, A1};
-*       padded AAD in xmm register = {A1 A0 0 0}
-*
-*       0                   1                   2                   3
-*       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                               SPI (A1)                        |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                     32-bit Sequence Number (A0)               |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                              0x0                              |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*
-*                                       AAD Format with 32-bit Sequence Number
-*
-*       if AAD is 12 bytes:
-*       AAD[3] = {A0, A1, A2};
-*       padded AAD in xmm register = {A2 A1 A0 0}
-*
-*       0                   1                   2                   3
-*       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                               SPI (A2)                        |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                 64-bit Extended Sequence Number {A1,A0}       |
-*       |                                                               |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                              0x0                              |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*
-*                        AAD Format with 64-bit Extended Sequence Number
-*
-* poly = x^128 + x^127 + x^126 + x^121 + 1
-*
-*****************************************************************************/
-ENTRY(aesni_gcm_dec)
-	FUNC_SAVE
-
-	GCM_INIT %arg6, arg7, arg8, arg9
-	GCM_ENC_DEC dec
-	GCM_COMPLETE arg10, arg11
-	FUNC_RESTORE
-	ret
-ENDPROC(aesni_gcm_dec)
-
-
-/*****************************************************************************
-* void aesni_gcm_enc(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
-*                    struct gcm_context_data *data
-*                                        // Context data
-*                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
-*                    const u8 *in,       // Plaintext input
-*                    u64 plaintext_len,  // Length of data in bytes for encryption.
-*                    u8 *iv,             // Pre-counter block j0: 4 byte salt (from Security Association)
-*                                        // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
-*                                        // concatenated with 0x00000001. 16-byte aligned pointer.
-*                    u8 *hash_subkey,    // H, the Hash sub key input. Data starts on a 16-byte boundary.
-*                    const u8 *aad,      // Additional Authentication Data (AAD)
-*                    u64 aad_len,        // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes
-*                    u8 *auth_tag,       // Authenticated Tag output.
-*                    u64 auth_tag_len);  // Authenticated Tag Length in bytes. Valid values are 16 (most likely),
-*                                        // 12 or 8.
-*
-* Assumptions:
-*
-* keys:
-*       keys are pre-expanded and aligned to 16 bytes. we are using the
-*       first set of 11 keys in the data structure void *aes_ctx
-*
-*
-* iv:
-*       0                   1                   2                   3
-*       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                             Salt  (From the SA)               |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                     Initialization Vector                     |
-*       |         (This is the sequence number from IPSec header)       |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                              0x1                              |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*
-*
-*
-* AAD:
-*       AAD padded to 128 bits with 0
-*       for example, assume AAD is a u32 vector
-*
-*       if AAD is 8 bytes:
-*       AAD[3] = {A0, A1};
-*       padded AAD in xmm register = {A1 A0 0 0}
-*
-*       0                   1                   2                   3
-*       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                               SPI (A1)                        |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                     32-bit Sequence Number (A0)               |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                              0x0                              |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*
-*                                 AAD Format with 32-bit Sequence Number
-*
-*       if AAD is 12 bytes:
-*       AAD[3] = {A0, A1, A2};
-*       padded AAD in xmm register = {A2 A1 A0 0}
-*
-*       0                   1                   2                   3
-*       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                               SPI (A2)                        |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                 64-bit Extended Sequence Number {A1,A0}       |
-*       |                                                               |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                              0x0                              |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*
-*                         AAD Format with 64-bit Extended Sequence Number
-*
-* poly = x^128 + x^127 + x^126 + x^121 + 1
-***************************************************************************/
-ENTRY(aesni_gcm_enc)
-	FUNC_SAVE
-
-	GCM_INIT %arg6, arg7, arg8, arg9
-	GCM_ENC_DEC enc
-
-	GCM_COMPLETE arg10, arg11
-	FUNC_RESTORE
-	ret
-ENDPROC(aesni_gcm_enc)
-
-/*****************************************************************************
-* void aesni_gcm_init(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
-*                     struct gcm_context_data *data,
-*                                         // context data
-*                     u8 *iv,             // Pre-counter block j0: 4 byte salt (from Security Association)
-*                                         // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
-*                                         // concatenated with 0x00000001. 16-byte aligned pointer.
-*                     u8 *hash_subkey,    // H, the Hash sub key input. Data starts on a 16-byte boundary.
-*                     const u8 *aad,      // Additional Authentication Data (AAD)
-*                     u64 aad_len)        // Length of AAD in bytes.
-*/
-ENTRY(aesni_gcm_init)
-	FUNC_SAVE
-	GCM_INIT %arg3, %arg4,%arg5, %arg6
-	FUNC_RESTORE
-	ret
-ENDPROC(aesni_gcm_init)
-
-/*****************************************************************************
-* void aesni_gcm_enc_update(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
-*                    struct gcm_context_data *data,
-*                                        // context data
-*                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
-*                    const u8 *in,       // Plaintext input
-*                    u64 plaintext_len,  // Length of data in bytes for encryption.
-*/
-ENTRY(aesni_gcm_enc_update)
-	FUNC_SAVE
-	GCM_ENC_DEC enc
-	FUNC_RESTORE
-	ret
-ENDPROC(aesni_gcm_enc_update)
-
-/*****************************************************************************
-* void aesni_gcm_dec_update(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
-*                    struct gcm_context_data *data,
-*                                        // context data
-*                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
-*                    const u8 *in,       // Plaintext input
-*                    u64 plaintext_len,  // Length of data in bytes for encryption.
-*/
-ENTRY(aesni_gcm_dec_update)
-	FUNC_SAVE
-	GCM_ENC_DEC dec
-	FUNC_RESTORE
-	ret
-ENDPROC(aesni_gcm_dec_update)
-
-/*****************************************************************************
-* void aesni_gcm_finalize(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
-*                    struct gcm_context_data *data,
-*                                        // context data
-*                    u8 *auth_tag,       // Authenticated Tag output.
-*                    u64 auth_tag_len);  // Authenticated Tag Length in bytes. Valid values are 16 (most likely),
-*                                        // 12 or 8.
-*/
-ENTRY(aesni_gcm_finalize)
-	FUNC_SAVE
-	GCM_COMPLETE %arg3 %arg4
-	FUNC_RESTORE
-	ret
-ENDPROC(aesni_gcm_finalize)
-
-#endif
-
-
-.align 4
-_key_expansion_128:
-_key_expansion_256a:
-	pshufd $0b11111111, %xmm1, %xmm1
-	shufps $0b00010000, %xmm0, %xmm4
-	pxor %xmm4, %xmm0
-	shufps $0b10001100, %xmm0, %xmm4
-	pxor %xmm4, %xmm0
-	pxor %xmm1, %xmm0
-	movaps %xmm0, (TKEYP)
-	add $0x10, TKEYP
-	ret
-ENDPROC(_key_expansion_128)
-ENDPROC(_key_expansion_256a)
-
-.align 4
-_key_expansion_192a:
-	pshufd $0b01010101, %xmm1, %xmm1
-	shufps $0b00010000, %xmm0, %xmm4
-	pxor %xmm4, %xmm0
-	shufps $0b10001100, %xmm0, %xmm4
-	pxor %xmm4, %xmm0
-	pxor %xmm1, %xmm0
-
-	movaps %xmm2, %xmm5
-	movaps %xmm2, %xmm6
-	pslldq $4, %xmm5
-	pshufd $0b11111111, %xmm0, %xmm3
-	pxor %xmm3, %xmm2
-	pxor %xmm5, %xmm2
-
-	movaps %xmm0, %xmm1
-	shufps $0b01000100, %xmm0, %xmm6
-	movaps %xmm6, (TKEYP)
-	shufps $0b01001110, %xmm2, %xmm1
-	movaps %xmm1, 0x10(TKEYP)
-	add $0x20, TKEYP
-	ret
-ENDPROC(_key_expansion_192a)
-
-.align 4
-_key_expansion_192b:
-	pshufd $0b01010101, %xmm1, %xmm1
-	shufps $0b00010000, %xmm0, %xmm4
-	pxor %xmm4, %xmm0
-	shufps $0b10001100, %xmm0, %xmm4
-	pxor %xmm4, %xmm0
-	pxor %xmm1, %xmm0
-
-	movaps %xmm2, %xmm5
-	pslldq $4, %xmm5
-	pshufd $0b11111111, %xmm0, %xmm3
-	pxor %xmm3, %xmm2
-	pxor %xmm5, %xmm2
-
-	movaps %xmm0, (TKEYP)
-	add $0x10, TKEYP
-	ret
-ENDPROC(_key_expansion_192b)
-
-.align 4
-_key_expansion_256b:
-	pshufd $0b10101010, %xmm1, %xmm1
-	shufps $0b00010000, %xmm2, %xmm4
-	pxor %xmm4, %xmm2
-	shufps $0b10001100, %xmm2, %xmm4
-	pxor %xmm4, %xmm2
-	pxor %xmm1, %xmm2
-	movaps %xmm2, (TKEYP)
-	add $0x10, TKEYP
-	ret
-ENDPROC(_key_expansion_256b)
-
-/*
- * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
- *                   unsigned int key_len)
- */
-ENTRY(aesni_set_key)
-	FRAME_BEGIN
-#ifndef __x86_64__
-	pushl KEYP
-	movl (FRAME_OFFSET+8)(%esp), KEYP	# ctx
-	movl (FRAME_OFFSET+12)(%esp), UKEYP	# in_key
-	movl (FRAME_OFFSET+16)(%esp), %edx	# key_len
-#endif
-	movups (UKEYP), %xmm0		# user key (first 16 bytes)
-	movaps %xmm0, (KEYP)
-	lea 0x10(KEYP), TKEYP		# key addr
-	movl %edx, 480(KEYP)
-	pxor %xmm4, %xmm4		# xmm4 is assumed 0 in _key_expansion_x
-	cmp $24, %dl
-	jb .Lenc_key128
-	je .Lenc_key192
-	movups 0x10(UKEYP), %xmm2	# other user key
-	movaps %xmm2, (TKEYP)
-	add $0x10, TKEYP
-	AESKEYGENASSIST 0x1 %xmm2 %xmm1		# round 1
-	call _key_expansion_256a
-	AESKEYGENASSIST 0x1 %xmm0 %xmm1
-	call _key_expansion_256b
-	AESKEYGENASSIST 0x2 %xmm2 %xmm1		# round 2
-	call _key_expansion_256a
-	AESKEYGENASSIST 0x2 %xmm0 %xmm1
-	call _key_expansion_256b
-	AESKEYGENASSIST 0x4 %xmm2 %xmm1		# round 3
-	call _key_expansion_256a
-	AESKEYGENASSIST 0x4 %xmm0 %xmm1
-	call _key_expansion_256b
-	AESKEYGENASSIST 0x8 %xmm2 %xmm1		# round 4
-	call _key_expansion_256a
-	AESKEYGENASSIST 0x8 %xmm0 %xmm1
-	call _key_expansion_256b
-	AESKEYGENASSIST 0x10 %xmm2 %xmm1	# round 5
-	call _key_expansion_256a
-	AESKEYGENASSIST 0x10 %xmm0 %xmm1
-	call _key_expansion_256b
-	AESKEYGENASSIST 0x20 %xmm2 %xmm1	# round 6
-	call _key_expansion_256a
-	AESKEYGENASSIST 0x20 %xmm0 %xmm1
-	call _key_expansion_256b
-	AESKEYGENASSIST 0x40 %xmm2 %xmm1	# round 7
-	call _key_expansion_256a
-	jmp .Ldec_key
-.Lenc_key192:
-	movq 0x10(UKEYP), %xmm2		# other user key
-	AESKEYGENASSIST 0x1 %xmm2 %xmm1		# round 1
-	call _key_expansion_192a
-	AESKEYGENASSIST 0x2 %xmm2 %xmm1		# round 2
-	call _key_expansion_192b
-	AESKEYGENASSIST 0x4 %xmm2 %xmm1		# round 3
-	call _key_expansion_192a
-	AESKEYGENASSIST 0x8 %xmm2 %xmm1		# round 4
-	call _key_expansion_192b
-	AESKEYGENASSIST 0x10 %xmm2 %xmm1	# round 5
-	call _key_expansion_192a
-	AESKEYGENASSIST 0x20 %xmm2 %xmm1	# round 6
-	call _key_expansion_192b
-	AESKEYGENASSIST 0x40 %xmm2 %xmm1	# round 7
-	call _key_expansion_192a
-	AESKEYGENASSIST 0x80 %xmm2 %xmm1	# round 8
-	call _key_expansion_192b
-	jmp .Ldec_key
-.Lenc_key128:
-	AESKEYGENASSIST 0x1 %xmm0 %xmm1		# round 1
-	call _key_expansion_128
-	AESKEYGENASSIST 0x2 %xmm0 %xmm1		# round 2
-	call _key_expansion_128
-	AESKEYGENASSIST 0x4 %xmm0 %xmm1		# round 3
-	call _key_expansion_128
-	AESKEYGENASSIST 0x8 %xmm0 %xmm1		# round 4
-	call _key_expansion_128
-	AESKEYGENASSIST 0x10 %xmm0 %xmm1	# round 5
-	call _key_expansion_128
-	AESKEYGENASSIST 0x20 %xmm0 %xmm1	# round 6
-	call _key_expansion_128
-	AESKEYGENASSIST 0x40 %xmm0 %xmm1	# round 7
-	call _key_expansion_128
-	AESKEYGENASSIST 0x80 %xmm0 %xmm1	# round 8
-	call _key_expansion_128
-	AESKEYGENASSIST 0x1b %xmm0 %xmm1	# round 9
-	call _key_expansion_128
-	AESKEYGENASSIST 0x36 %xmm0 %xmm1	# round 10
-	call _key_expansion_128
-.Ldec_key:
-	sub $0x10, TKEYP
-	movaps (KEYP), %xmm0
-	movaps (TKEYP), %xmm1
-	movaps %xmm0, 240(TKEYP)
-	movaps %xmm1, 240(KEYP)
-	add $0x10, KEYP
-	lea 240-16(TKEYP), UKEYP
-.align 4
-.Ldec_key_loop:
-	movaps (KEYP), %xmm0
-	AESIMC %xmm0 %xmm1
-	movaps %xmm1, (UKEYP)
-	add $0x10, KEYP
-	sub $0x10, UKEYP
-	cmp TKEYP, KEYP
-	jb .Ldec_key_loop
-	xor AREG, AREG
-#ifndef __x86_64__
-	popl KEYP
-#endif
-	FRAME_END
-	ret
-ENDPROC(aesni_set_key)
-
-/*
- * void aesni_enc(const void *ctx, u8 *dst, const u8 *src)
- */
-ENTRY(aesni_enc)
-	FRAME_BEGIN
-#ifndef __x86_64__
-	pushl KEYP
-	pushl KLEN
-	movl (FRAME_OFFSET+12)(%esp), KEYP	# ctx
-	movl (FRAME_OFFSET+16)(%esp), OUTP	# dst
-	movl (FRAME_OFFSET+20)(%esp), INP	# src
-#endif
-	movl 480(KEYP), KLEN		# key length
-	movups (INP), STATE		# input
-	call _aesni_enc1
-	movups STATE, (OUTP)		# output
-#ifndef __x86_64__
-	popl KLEN
-	popl KEYP
-#endif
-	FRAME_END
-	ret
-ENDPROC(aesni_enc)
-
-/*
- * _aesni_enc1:		internal ABI
- * input:
- *	KEYP:		key struct pointer
- *	KLEN:		round count
- *	STATE:		initial state (input)
- * output:
- *	STATE:		finial state (output)
- * changed:
- *	KEY
- *	TKEYP (T1)
- */
-.align 4
-_aesni_enc1:
-	movaps (KEYP), KEY		# key
-	mov KEYP, TKEYP
-	pxor KEY, STATE		# round 0
-	add $0x30, TKEYP
-	cmp $24, KLEN
-	jb .Lenc128
-	lea 0x20(TKEYP), TKEYP
-	je .Lenc192
-	add $0x20, TKEYP
-	movaps -0x60(TKEYP), KEY
-	AESENC KEY STATE
-	movaps -0x50(TKEYP), KEY
-	AESENC KEY STATE
-.align 4
-.Lenc192:
-	movaps -0x40(TKEYP), KEY
-	AESENC KEY STATE
-	movaps -0x30(TKEYP), KEY
-	AESENC KEY STATE
-.align 4
-.Lenc128:
-	movaps -0x20(TKEYP), KEY
-	AESENC KEY STATE
-	movaps -0x10(TKEYP), KEY
-	AESENC KEY STATE
-	movaps (TKEYP), KEY
-	AESENC KEY STATE
-	movaps 0x10(TKEYP), KEY
-	AESENC KEY STATE
-	movaps 0x20(TKEYP), KEY
-	AESENC KEY STATE
-	movaps 0x30(TKEYP), KEY
-	AESENC KEY STATE
-	movaps 0x40(TKEYP), KEY
-	AESENC KEY STATE
-	movaps 0x50(TKEYP), KEY
-	AESENC KEY STATE
-	movaps 0x60(TKEYP), KEY
-	AESENC KEY STATE
-	movaps 0x70(TKEYP), KEY
-	AESENCLAST KEY STATE
-	ret
-ENDPROC(_aesni_enc1)
-
-/*
- * _aesni_enc4:	internal ABI
- * input:
- *	KEYP:		key struct pointer
- *	KLEN:		round count
- *	STATE1:		initial state (input)
- *	STATE2
- *	STATE3
- *	STATE4
- * output:
- *	STATE1:		finial state (output)
- *	STATE2
- *	STATE3
- *	STATE4
- * changed:
- *	KEY
- *	TKEYP (T1)
- */
-.align 4
-_aesni_enc4:
-	movaps (KEYP), KEY		# key
-	mov KEYP, TKEYP
-	pxor KEY, STATE1		# round 0
-	pxor KEY, STATE2
-	pxor KEY, STATE3
-	pxor KEY, STATE4
-	add $0x30, TKEYP
-	cmp $24, KLEN
-	jb .L4enc128
-	lea 0x20(TKEYP), TKEYP
-	je .L4enc192
-	add $0x20, TKEYP
-	movaps -0x60(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps -0x50(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-#.align 4
-.L4enc192:
-	movaps -0x40(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps -0x30(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-#.align 4
-.L4enc128:
-	movaps -0x20(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps -0x10(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps (TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps 0x10(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps 0x20(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps 0x30(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps 0x40(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps 0x50(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps 0x60(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps 0x70(TKEYP), KEY
-	AESENCLAST KEY STATE1		# last round
-	AESENCLAST KEY STATE2
-	AESENCLAST KEY STATE3
-	AESENCLAST KEY STATE4
-	ret
-ENDPROC(_aesni_enc4)
-
-/*
- * void aesni_dec (const void *ctx, u8 *dst, const u8 *src)
- */
-ENTRY(aesni_dec)
-	FRAME_BEGIN
-#ifndef __x86_64__
-	pushl KEYP
-	pushl KLEN
-	movl (FRAME_OFFSET+12)(%esp), KEYP	# ctx
-	movl (FRAME_OFFSET+16)(%esp), OUTP	# dst
-	movl (FRAME_OFFSET+20)(%esp), INP	# src
-#endif
-	mov 480(KEYP), KLEN		# key length
-	add $240, KEYP
-	movups (INP), STATE		# input
-	call _aesni_dec1
-	movups STATE, (OUTP)		#output
-#ifndef __x86_64__
-	popl KLEN
-	popl KEYP
-#endif
-	FRAME_END
-	ret
-ENDPROC(aesni_dec)
-
-/*
- * _aesni_dec1:		internal ABI
- * input:
- *	KEYP:		key struct pointer
- *	KLEN:		key length
- *	STATE:		initial state (input)
- * output:
- *	STATE:		finial state (output)
- * changed:
- *	KEY
- *	TKEYP (T1)
- */
-.align 4
-_aesni_dec1:
-	movaps (KEYP), KEY		# key
-	mov KEYP, TKEYP
-	pxor KEY, STATE		# round 0
-	add $0x30, TKEYP
-	cmp $24, KLEN
-	jb .Ldec128
-	lea 0x20(TKEYP), TKEYP
-	je .Ldec192
-	add $0x20, TKEYP
-	movaps -0x60(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps -0x50(TKEYP), KEY
-	AESDEC KEY STATE
-.align 4
-.Ldec192:
-	movaps -0x40(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps -0x30(TKEYP), KEY
-	AESDEC KEY STATE
-.align 4
-.Ldec128:
-	movaps -0x20(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps -0x10(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps (TKEYP), KEY
-	AESDEC KEY STATE
-	movaps 0x10(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps 0x20(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps 0x30(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps 0x40(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps 0x50(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps 0x60(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps 0x70(TKEYP), KEY
-	AESDECLAST KEY STATE
-	ret
-ENDPROC(_aesni_dec1)
-
-/*
- * _aesni_dec4:	internal ABI
- * input:
- *	KEYP:		key struct pointer
- *	KLEN:		key length
- *	STATE1:		initial state (input)
- *	STATE2
- *	STATE3
- *	STATE4
- * output:
- *	STATE1:		finial state (output)
- *	STATE2
- *	STATE3
- *	STATE4
- * changed:
- *	KEY
- *	TKEYP (T1)
- */
-.align 4
-_aesni_dec4:
-	movaps (KEYP), KEY		# key
-	mov KEYP, TKEYP
-	pxor KEY, STATE1		# round 0
-	pxor KEY, STATE2
-	pxor KEY, STATE3
-	pxor KEY, STATE4
-	add $0x30, TKEYP
-	cmp $24, KLEN
-	jb .L4dec128
-	lea 0x20(TKEYP), TKEYP
-	je .L4dec192
-	add $0x20, TKEYP
-	movaps -0x60(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps -0x50(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-.align 4
-.L4dec192:
-	movaps -0x40(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps -0x30(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-.align 4
-.L4dec128:
-	movaps -0x20(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps -0x10(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps (TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps 0x10(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps 0x20(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps 0x30(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps 0x40(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps 0x50(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps 0x60(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps 0x70(TKEYP), KEY
-	AESDECLAST KEY STATE1		# last round
-	AESDECLAST KEY STATE2
-	AESDECLAST KEY STATE3
-	AESDECLAST KEY STATE4
-	ret
-ENDPROC(_aesni_dec4)
-
-/*
- * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
- *		      size_t len)
- */
-ENTRY(aesni_ecb_enc)
-	FRAME_BEGIN
-#ifndef __x86_64__
-	pushl LEN
-	pushl KEYP
-	pushl KLEN
-	movl (FRAME_OFFSET+16)(%esp), KEYP	# ctx
-	movl (FRAME_OFFSET+20)(%esp), OUTP	# dst
-	movl (FRAME_OFFSET+24)(%esp), INP	# src
-	movl (FRAME_OFFSET+28)(%esp), LEN	# len
-#endif
-	test LEN, LEN		# check length
-	jz .Lecb_enc_ret
-	mov 480(KEYP), KLEN
-	cmp $16, LEN
-	jb .Lecb_enc_ret
-	cmp $64, LEN
-	jb .Lecb_enc_loop1
-.align 4
-.Lecb_enc_loop4:
-	movups (INP), STATE1
-	movups 0x10(INP), STATE2
-	movups 0x20(INP), STATE3
-	movups 0x30(INP), STATE4
-	call _aesni_enc4
-	movups STATE1, (OUTP)
-	movups STATE2, 0x10(OUTP)
-	movups STATE3, 0x20(OUTP)
-	movups STATE4, 0x30(OUTP)
-	sub $64, LEN
-	add $64, INP
-	add $64, OUTP
-	cmp $64, LEN
-	jge .Lecb_enc_loop4
-	cmp $16, LEN
-	jb .Lecb_enc_ret
-.align 4
-.Lecb_enc_loop1:
-	movups (INP), STATE1
-	call _aesni_enc1
-	movups STATE1, (OUTP)
-	sub $16, LEN
-	add $16, INP
-	add $16, OUTP
-	cmp $16, LEN
-	jge .Lecb_enc_loop1
-.Lecb_enc_ret:
-#ifndef __x86_64__
-	popl KLEN
-	popl KEYP
-	popl LEN
-#endif
-	FRAME_END
-	ret
-ENDPROC(aesni_ecb_enc)
-
-/*
- * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
- *		      size_t len);
- */
-ENTRY(aesni_ecb_dec)
-	FRAME_BEGIN
-#ifndef __x86_64__
-	pushl LEN
-	pushl KEYP
-	pushl KLEN
-	movl (FRAME_OFFSET+16)(%esp), KEYP	# ctx
-	movl (FRAME_OFFSET+20)(%esp), OUTP	# dst
-	movl (FRAME_OFFSET+24)(%esp), INP	# src
-	movl (FRAME_OFFSET+28)(%esp), LEN	# len
-#endif
-	test LEN, LEN
-	jz .Lecb_dec_ret
-	mov 480(KEYP), KLEN
-	add $240, KEYP
-	cmp $16, LEN
-	jb .Lecb_dec_ret
-	cmp $64, LEN
-	jb .Lecb_dec_loop1
-.align 4
-.Lecb_dec_loop4:
-	movups (INP), STATE1
-	movups 0x10(INP), STATE2
-	movups 0x20(INP), STATE3
-	movups 0x30(INP), STATE4
-	call _aesni_dec4
-	movups STATE1, (OUTP)
-	movups STATE2, 0x10(OUTP)
-	movups STATE3, 0x20(OUTP)
-	movups STATE4, 0x30(OUTP)
-	sub $64, LEN
-	add $64, INP
-	add $64, OUTP
-	cmp $64, LEN
-	jge .Lecb_dec_loop4
-	cmp $16, LEN
-	jb .Lecb_dec_ret
-.align 4
-.Lecb_dec_loop1:
-	movups (INP), STATE1
-	call _aesni_dec1
-	movups STATE1, (OUTP)
-	sub $16, LEN
-	add $16, INP
-	add $16, OUTP
-	cmp $16, LEN
-	jge .Lecb_dec_loop1
-.Lecb_dec_ret:
-#ifndef __x86_64__
-	popl KLEN
-	popl KEYP
-	popl LEN
-#endif
-	FRAME_END
-	ret
-ENDPROC(aesni_ecb_dec)
-
-/*
- * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
- *		      size_t len, u8 *iv)
- */
-ENTRY(aesni_cbc_enc)
-	FRAME_BEGIN
-#ifndef __x86_64__
-	pushl IVP
-	pushl LEN
-	pushl KEYP
-	pushl KLEN
-	movl (FRAME_OFFSET+20)(%esp), KEYP	# ctx
-	movl (FRAME_OFFSET+24)(%esp), OUTP	# dst
-	movl (FRAME_OFFSET+28)(%esp), INP	# src
-	movl (FRAME_OFFSET+32)(%esp), LEN	# len
-	movl (FRAME_OFFSET+36)(%esp), IVP	# iv
-#endif
-	cmp $16, LEN
-	jb .Lcbc_enc_ret
-	mov 480(KEYP), KLEN
-	movups (IVP), STATE	# load iv as initial state
-.align 4
-.Lcbc_enc_loop:
-	movups (INP), IN	# load input
-	pxor IN, STATE
-	call _aesni_enc1
-	movups STATE, (OUTP)	# store output
-	sub $16, LEN
-	add $16, INP
-	add $16, OUTP
-	cmp $16, LEN
-	jge .Lcbc_enc_loop
-	movups STATE, (IVP)
-.Lcbc_enc_ret:
-#ifndef __x86_64__
-	popl KLEN
-	popl KEYP
-	popl LEN
-	popl IVP
-#endif
-	FRAME_END
-	ret
-ENDPROC(aesni_cbc_enc)
-
-/*
- * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
- *		      size_t len, u8 *iv)
- */
-ENTRY(aesni_cbc_dec)
-	FRAME_BEGIN
-#ifndef __x86_64__
-	pushl IVP
-	pushl LEN
-	pushl KEYP
-	pushl KLEN
-	movl (FRAME_OFFSET+20)(%esp), KEYP	# ctx
-	movl (FRAME_OFFSET+24)(%esp), OUTP	# dst
-	movl (FRAME_OFFSET+28)(%esp), INP	# src
-	movl (FRAME_OFFSET+32)(%esp), LEN	# len
-	movl (FRAME_OFFSET+36)(%esp), IVP	# iv
-#endif
-	cmp $16, LEN
-	jb .Lcbc_dec_just_ret
-	mov 480(KEYP), KLEN
-	add $240, KEYP
-	movups (IVP), IV
-	cmp $64, LEN
-	jb .Lcbc_dec_loop1
-.align 4
-.Lcbc_dec_loop4:
-	movups (INP), IN1
-	movaps IN1, STATE1
-	movups 0x10(INP), IN2
-	movaps IN2, STATE2
-#ifdef __x86_64__
-	movups 0x20(INP), IN3
-	movaps IN3, STATE3
-	movups 0x30(INP), IN4
-	movaps IN4, STATE4
-#else
-	movups 0x20(INP), IN1
-	movaps IN1, STATE3
-	movups 0x30(INP), IN2
-	movaps IN2, STATE4
-#endif
-	call _aesni_dec4
-	pxor IV, STATE1
-#ifdef __x86_64__
-	pxor IN1, STATE2
-	pxor IN2, STATE3
-	pxor IN3, STATE4
-	movaps IN4, IV
-#else
-	pxor IN1, STATE4
-	movaps IN2, IV
-	movups (INP), IN1
-	pxor IN1, STATE2
-	movups 0x10(INP), IN2
-	pxor IN2, STATE3
-#endif
-	movups STATE1, (OUTP)
-	movups STATE2, 0x10(OUTP)
-	movups STATE3, 0x20(OUTP)
-	movups STATE4, 0x30(OUTP)
-	sub $64, LEN
-	add $64, INP
-	add $64, OUTP
-	cmp $64, LEN
-	jge .Lcbc_dec_loop4
-	cmp $16, LEN
-	jb .Lcbc_dec_ret
-.align 4
-.Lcbc_dec_loop1:
-	movups (INP), IN
-	movaps IN, STATE
-	call _aesni_dec1
-	pxor IV, STATE
-	movups STATE, (OUTP)
-	movaps IN, IV
-	sub $16, LEN
-	add $16, INP
-	add $16, OUTP
-	cmp $16, LEN
-	jge .Lcbc_dec_loop1
-.Lcbc_dec_ret:
-	movups IV, (IVP)
-.Lcbc_dec_just_ret:
-#ifndef __x86_64__
-	popl KLEN
-	popl KEYP
-	popl LEN
-	popl IVP
-#endif
-	FRAME_END
-	ret
-ENDPROC(aesni_cbc_dec)
-
-#ifdef __x86_64__
-.pushsection .rodata
-.align 16
-.Lbswap_mask:
-	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-.popsection
-
-/*
- * _aesni_inc_init:	internal ABI
- *	setup registers used by _aesni_inc
- * input:
- *	IV
- * output:
- *	CTR:	== IV, in little endian
- *	TCTR_LOW: == lower qword of CTR
- *	INC:	== 1, in little endian
- *	BSWAP_MASK == endian swapping mask
- */
-.align 4
-_aesni_inc_init:
-	movaps .Lbswap_mask, BSWAP_MASK
-	movaps IV, CTR
-	PSHUFB_XMM BSWAP_MASK CTR
-	mov $1, TCTR_LOW
-	MOVQ_R64_XMM TCTR_LOW INC
-	MOVQ_R64_XMM CTR TCTR_LOW
-	ret
-ENDPROC(_aesni_inc_init)
-
-/*
- * _aesni_inc:		internal ABI
- *	Increase IV by 1, IV is in big endian
- * input:
- *	IV
- *	CTR:	== IV, in little endian
- *	TCTR_LOW: == lower qword of CTR
- *	INC:	== 1, in little endian
- *	BSWAP_MASK == endian swapping mask
- * output:
- *	IV:	Increase by 1
- * changed:
- *	CTR:	== output IV, in little endian
- *	TCTR_LOW: == lower qword of CTR
- */
-.align 4
-_aesni_inc:
-	paddq INC, CTR
-	add $1, TCTR_LOW
-	jnc .Linc_low
-	pslldq $8, INC
-	paddq INC, CTR
-	psrldq $8, INC
-.Linc_low:
-	movaps CTR, IV
-	PSHUFB_XMM BSWAP_MASK IV
-	ret
-ENDPROC(_aesni_inc)
-
-/*
- * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
- *		      size_t len, u8 *iv)
- */
-ENTRY(aesni_ctr_enc)
-	FRAME_BEGIN
-	cmp $16, LEN
-	jb .Lctr_enc_just_ret
-	mov 480(KEYP), KLEN
-	movups (IVP), IV
-	call _aesni_inc_init
-	cmp $64, LEN
-	jb .Lctr_enc_loop1
-.align 4
-.Lctr_enc_loop4:
-	movaps IV, STATE1
-	call _aesni_inc
-	movups (INP), IN1
-	movaps IV, STATE2
-	call _aesni_inc
-	movups 0x10(INP), IN2
-	movaps IV, STATE3
-	call _aesni_inc
-	movups 0x20(INP), IN3
-	movaps IV, STATE4
-	call _aesni_inc
-	movups 0x30(INP), IN4
-	call _aesni_enc4
-	pxor IN1, STATE1
-	movups STATE1, (OUTP)
-	pxor IN2, STATE2
-	movups STATE2, 0x10(OUTP)
-	pxor IN3, STATE3
-	movups STATE3, 0x20(OUTP)
-	pxor IN4, STATE4
-	movups STATE4, 0x30(OUTP)
-	sub $64, LEN
-	add $64, INP
-	add $64, OUTP
-	cmp $64, LEN
-	jge .Lctr_enc_loop4
-	cmp $16, LEN
-	jb .Lctr_enc_ret
-.align 4
-.Lctr_enc_loop1:
-	movaps IV, STATE
-	call _aesni_inc
-	movups (INP), IN
-	call _aesni_enc1
-	pxor IN, STATE
-	movups STATE, (OUTP)
-	sub $16, LEN
-	add $16, INP
-	add $16, OUTP
-	cmp $16, LEN
-	jge .Lctr_enc_loop1
-.Lctr_enc_ret:
-	movups IV, (IVP)
-.Lctr_enc_just_ret:
-	FRAME_END
-	ret
-ENDPROC(aesni_ctr_enc)
-
-/*
- * _aesni_gf128mul_x_ble:		internal ABI
- *	Multiply in GF(2^128) for XTS IVs
- * input:
- *	IV:	current IV
- *	GF128MUL_MASK == mask with 0x87 and 0x01
- * output:
- *	IV:	next IV
- * changed:
- *	CTR:	== temporary value
- */
-#define _aesni_gf128mul_x_ble() \
-	pshufd $0x13, IV, CTR; \
-	paddq IV, IV; \
-	psrad $31, CTR; \
-	pand GF128MUL_MASK, CTR; \
-	pxor CTR, IV;
-
-/*
- * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
- *			  const u8 *src, unsigned int len, le128 *iv)
- */
-ENTRY(aesni_xts_encrypt)
-	FRAME_BEGIN
-
-	movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
-	movups (IVP), IV
-
-	mov 480(KEYP), KLEN
-
-.Lxts_enc_loop4:
-	movdqa IV, STATE1
-	movdqu 0x00(INP), INC
-	pxor INC, STATE1
-	movdqu IV, 0x00(OUTP)
-
-	_aesni_gf128mul_x_ble()
-	movdqa IV, STATE2
-	movdqu 0x10(INP), INC
-	pxor INC, STATE2
-	movdqu IV, 0x10(OUTP)
-
-	_aesni_gf128mul_x_ble()
-	movdqa IV, STATE3
-	movdqu 0x20(INP), INC
-	pxor INC, STATE3
-	movdqu IV, 0x20(OUTP)
-
-	_aesni_gf128mul_x_ble()
-	movdqa IV, STATE4
-	movdqu 0x30(INP), INC
-	pxor INC, STATE4
-	movdqu IV, 0x30(OUTP)
-
-	call _aesni_enc4
-
-	movdqu 0x00(OUTP), INC
-	pxor INC, STATE1
-	movdqu STATE1, 0x00(OUTP)
-
-	movdqu 0x10(OUTP), INC
-	pxor INC, STATE2
-	movdqu STATE2, 0x10(OUTP)
-
-	movdqu 0x20(OUTP), INC
-	pxor INC, STATE3
-	movdqu STATE3, 0x20(OUTP)
-
-	movdqu 0x30(OUTP), INC
-	pxor INC, STATE4
-	movdqu STATE4, 0x30(OUTP)
-
-	_aesni_gf128mul_x_ble()
-
-	add $64, INP
-	add $64, OUTP
-	sub $64, LEN
-	ja .Lxts_enc_loop4
-
-	movups IV, (IVP)
-
-	FRAME_END
-	ret
-ENDPROC(aesni_xts_encrypt)
-
-/*
- * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
- *			  const u8 *src, unsigned int len, le128 *iv)
- */
-ENTRY(aesni_xts_decrypt)
-	FRAME_BEGIN
-
-	movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
-	movups (IVP), IV
-
-	mov 480(KEYP), KLEN
-	add $240, KEYP
-
-.Lxts_dec_loop4:
-	movdqa IV, STATE1
-	movdqu 0x00(INP), INC
-	pxor INC, STATE1
-	movdqu IV, 0x00(OUTP)
-
-	_aesni_gf128mul_x_ble()
-	movdqa IV, STATE2
-	movdqu 0x10(INP), INC
-	pxor INC, STATE2
-	movdqu IV, 0x10(OUTP)
-
-	_aesni_gf128mul_x_ble()
-	movdqa IV, STATE3
-	movdqu 0x20(INP), INC
-	pxor INC, STATE3
-	movdqu IV, 0x20(OUTP)
-
-	_aesni_gf128mul_x_ble()
-	movdqa IV, STATE4
-	movdqu 0x30(INP), INC
-	pxor INC, STATE4
-	movdqu IV, 0x30(OUTP)
-
-	call _aesni_dec4
-
-	movdqu 0x00(OUTP), INC
-	pxor INC, STATE1
-	movdqu STATE1, 0x00(OUTP)
-
-	movdqu 0x10(OUTP), INC
-	pxor INC, STATE2
-	movdqu STATE2, 0x10(OUTP)
-
-	movdqu 0x20(OUTP), INC
-	pxor INC, STATE3
-	movdqu STATE3, 0x20(OUTP)
-
-	movdqu 0x30(OUTP), INC
-	pxor INC, STATE4
-	movdqu STATE4, 0x30(OUTP)
-
-	_aesni_gf128mul_x_ble()
-
-	add $64, INP
-	add $64, OUTP
-	sub $64, LEN
-	ja .Lxts_dec_loop4
-
-	movups IV, (IVP)
-
-	FRAME_END
-	ret
-ENDPROC(aesni_xts_decrypt)
-
-#endif
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
deleted file mode 100644
index 4e4d34956170bae4a8fca687c3ccbb27daad5890..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ /dev/null
@@ -1,2843 +0,0 @@
-########################################################################
-# Copyright (c) 2013, Intel Corporation
-#
-# This software is available to you under a choice of one of two
-# licenses.  You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in the
-#   documentation and/or other materials provided with the
-#   distribution.
-#
-# * Neither the name of the Intel Corporation nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-#
-# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES# LOSS OF USE, DATA, OR
-# PROFITS# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-########################################################################
-##
-## Authors:
-##	Erdinc Ozturk <erdinc.ozturk@intel.com>
-##	Vinodh Gopal <vinodh.gopal@intel.com>
-##	James Guilford <james.guilford@intel.com>
-##	Tim Chen <tim.c.chen@linux.intel.com>
-##
-## References:
-##       This code was derived and highly optimized from the code described in paper:
-##               Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation
-##			on Intel Architecture Processors. August, 2010
-##       The details of the implementation is explained in:
-##               Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode
-##			on Intel Architecture Processors. October, 2012.
-##
-## Assumptions:
-##
-##
-##
-## iv:
-##       0                   1                   2                   3
-##       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-##       |                             Salt  (From the SA)               |
-##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-##       |                     Initialization Vector                     |
-##       |         (This is the sequence number from IPSec header)       |
-##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-##       |                              0x1                              |
-##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-##
-##
-##
-## AAD:
-##       AAD padded to 128 bits with 0
-##       for example, assume AAD is a u32 vector
-##
-##       if AAD is 8 bytes:
-##       AAD[3] = {A0, A1}#
-##       padded AAD in xmm register = {A1 A0 0 0}
-##
-##       0                   1                   2                   3
-##       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-##       |                               SPI (A1)                        |
-##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-##       |                     32-bit Sequence Number (A0)               |
-##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-##       |                              0x0                              |
-##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-##
-##                                       AAD Format with 32-bit Sequence Number
-##
-##       if AAD is 12 bytes:
-##       AAD[3] = {A0, A1, A2}#
-##       padded AAD in xmm register = {A2 A1 A0 0}
-##
-##       0                   1                   2                   3
-##       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-##       |                               SPI (A2)                        |
-##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-##       |                 64-bit Extended Sequence Number {A1,A0}       |
-##       |                                                               |
-##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-##       |                              0x0                              |
-##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-##
-##        AAD Format with 64-bit Extended Sequence Number
-##
-##
-## aadLen:
-##       from the definition of the spec, aadLen can only be 8 or 12 bytes.
-##	 The code additionally supports aadLen of length 16 bytes.
-##
-## TLen:
-##       from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
-##
-## poly = x^128 + x^127 + x^126 + x^121 + 1
-## throughout the code, one tab and two tab indentations are used. one tab is
-## for GHASH part, two tabs is for AES part.
-##
-
-#include <linux/linkage.h>
-#include <asm/inst.h>
-
-# constants in mergeable sections, linker can reorder and merge
-.section	.rodata.cst16.POLY, "aM", @progbits, 16
-.align 16
-POLY:            .octa     0xC2000000000000000000000000000001
-
-.section	.rodata.cst16.POLY2, "aM", @progbits, 16
-.align 16
-POLY2:           .octa     0xC20000000000000000000001C2000000
-
-.section	.rodata.cst16.TWOONE, "aM", @progbits, 16
-.align 16
-TWOONE:          .octa     0x00000001000000000000000000000001
-
-.section	.rodata.cst16.SHUF_MASK, "aM", @progbits, 16
-.align 16
-SHUF_MASK:       .octa     0x000102030405060708090A0B0C0D0E0F
-
-.section	.rodata.cst16.ONE, "aM", @progbits, 16
-.align 16
-ONE:             .octa     0x00000000000000000000000000000001
-
-.section	.rodata.cst16.ONEf, "aM", @progbits, 16
-.align 16
-ONEf:            .octa     0x01000000000000000000000000000000
-
-# order of these constants should not change.
-# more specifically, ALL_F should follow SHIFT_MASK, and zero should follow ALL_F
-.section	.rodata, "a", @progbits
-.align 16
-SHIFT_MASK:      .octa     0x0f0e0d0c0b0a09080706050403020100
-ALL_F:           .octa     0xffffffffffffffffffffffffffffffff
-                 .octa     0x00000000000000000000000000000000
-
-.section .rodata
-.align 16
-.type aad_shift_arr, @object
-.size aad_shift_arr, 272
-aad_shift_arr:
-        .octa     0xffffffffffffffffffffffffffffffff
-        .octa     0xffffffffffffffffffffffffffffff0C
-        .octa     0xffffffffffffffffffffffffffff0D0C
-        .octa     0xffffffffffffffffffffffffff0E0D0C
-        .octa     0xffffffffffffffffffffffff0F0E0D0C
-        .octa     0xffffffffffffffffffffff0C0B0A0908
-        .octa     0xffffffffffffffffffff0D0C0B0A0908
-        .octa     0xffffffffffffffffff0E0D0C0B0A0908
-        .octa     0xffffffffffffffff0F0E0D0C0B0A0908
-        .octa     0xffffffffffffff0C0B0A090807060504
-        .octa     0xffffffffffff0D0C0B0A090807060504
-        .octa     0xffffffffff0E0D0C0B0A090807060504
-        .octa     0xffffffff0F0E0D0C0B0A090807060504
-        .octa     0xffffff0C0B0A09080706050403020100
-        .octa     0xffff0D0C0B0A09080706050403020100
-        .octa     0xff0E0D0C0B0A09080706050403020100
-        .octa     0x0F0E0D0C0B0A09080706050403020100
-
-
-.text
-
-
-#define AadHash 16*0
-#define AadLen 16*1
-#define InLen (16*1)+8
-#define PBlockEncKey 16*2
-#define OrigIV 16*3
-#define CurCount 16*4
-#define PBlockLen 16*5
-
-HashKey        = 16*6   # store HashKey <<1 mod poly here
-HashKey_2      = 16*7   # store HashKey^2 <<1 mod poly here
-HashKey_3      = 16*8   # store HashKey^3 <<1 mod poly here
-HashKey_4      = 16*9   # store HashKey^4 <<1 mod poly here
-HashKey_5      = 16*10   # store HashKey^5 <<1 mod poly here
-HashKey_6      = 16*11   # store HashKey^6 <<1 mod poly here
-HashKey_7      = 16*12   # store HashKey^7 <<1 mod poly here
-HashKey_8      = 16*13   # store HashKey^8 <<1 mod poly here
-HashKey_k      = 16*14   # store XOR of HashKey <<1 mod poly here (for Karatsuba purposes)
-HashKey_2_k    = 16*15   # store XOR of HashKey^2 <<1 mod poly here (for Karatsuba purposes)
-HashKey_3_k    = 16*16   # store XOR of HashKey^3 <<1 mod poly here (for Karatsuba purposes)
-HashKey_4_k    = 16*17   # store XOR of HashKey^4 <<1 mod poly here (for Karatsuba purposes)
-HashKey_5_k    = 16*18   # store XOR of HashKey^5 <<1 mod poly here (for Karatsuba purposes)
-HashKey_6_k    = 16*19   # store XOR of HashKey^6 <<1 mod poly here (for Karatsuba purposes)
-HashKey_7_k    = 16*20   # store XOR of HashKey^7 <<1 mod poly here (for Karatsuba purposes)
-HashKey_8_k    = 16*21   # store XOR of HashKey^8 <<1 mod poly here (for Karatsuba purposes)
-
-#define arg1 %rdi
-#define arg2 %rsi
-#define arg3 %rdx
-#define arg4 %rcx
-#define arg5 %r8
-#define arg6 %r9
-#define arg7 STACK_OFFSET+8*1(%r14)
-#define arg8 STACK_OFFSET+8*2(%r14)
-#define arg9 STACK_OFFSET+8*3(%r14)
-#define arg10 STACK_OFFSET+8*4(%r14)
-#define keysize 2*15*16(arg1)
-
-i = 0
-j = 0
-
-out_order = 0
-in_order = 1
-DEC = 0
-ENC = 1
-
-.macro define_reg r n
-reg_\r = %xmm\n
-.endm
-
-.macro setreg
-.altmacro
-define_reg i %i
-define_reg j %j
-.noaltmacro
-.endm
-
-# need to push 4 registers into stack to maintain
-STACK_OFFSET = 8*4
-
-TMP1 =   16*0    # Temporary storage for AAD
-TMP2 =   16*1    # Temporary storage for AES State 2 (State 1 is stored in an XMM register)
-TMP3 =   16*2    # Temporary storage for AES State 3
-TMP4 =   16*3    # Temporary storage for AES State 4
-TMP5 =   16*4    # Temporary storage for AES State 5
-TMP6 =   16*5    # Temporary storage for AES State 6
-TMP7 =   16*6    # Temporary storage for AES State 7
-TMP8 =   16*7    # Temporary storage for AES State 8
-
-VARIABLE_OFFSET = 16*8
-
-################################
-# Utility Macros
-################################
-
-.macro FUNC_SAVE
-        #the number of pushes must equal STACK_OFFSET
-        push    %r12
-        push    %r13
-        push    %r14
-        push    %r15
-
-        mov     %rsp, %r14
-
-
-
-        sub     $VARIABLE_OFFSET, %rsp
-        and     $~63, %rsp                    # align rsp to 64 bytes
-.endm
-
-.macro FUNC_RESTORE
-        mov     %r14, %rsp
-
-        pop     %r15
-        pop     %r14
-        pop     %r13
-        pop     %r12
-.endm
-
-# Encryption of a single block
-.macro ENCRYPT_SINGLE_BLOCK REP XMM0
-                vpxor    (arg1), \XMM0, \XMM0
-               i = 1
-               setreg
-.rep \REP
-                vaesenc  16*i(arg1), \XMM0, \XMM0
-               i = (i+1)
-               setreg
-.endr
-                vaesenclast 16*i(arg1), \XMM0, \XMM0
-.endm
-
-# combined for GCM encrypt and decrypt functions
-# clobbering all xmm registers
-# clobbering r10, r11, r12, r13, r14, r15
-.macro  GCM_ENC_DEC INITIAL_BLOCKS GHASH_8_ENCRYPT_8_PARALLEL GHASH_LAST_8 GHASH_MUL ENC_DEC REP
-        vmovdqu AadHash(arg2), %xmm8
-        vmovdqu  HashKey(arg2), %xmm13      # xmm13 = HashKey
-        add arg5, InLen(arg2)
-
-        # initialize the data pointer offset as zero
-        xor     %r11d, %r11d
-
-        PARTIAL_BLOCK \GHASH_MUL, arg3, arg4, arg5, %r11, %xmm8, \ENC_DEC
-        sub %r11, arg5
-
-        mov     arg5, %r13                  # save the number of bytes of plaintext/ciphertext
-        and     $-16, %r13                  # r13 = r13 - (r13 mod 16)
-
-        mov     %r13, %r12
-        shr     $4, %r12
-        and     $7, %r12
-        jz      _initial_num_blocks_is_0\@
-
-        cmp     $7, %r12
-        je      _initial_num_blocks_is_7\@
-        cmp     $6, %r12
-        je      _initial_num_blocks_is_6\@
-        cmp     $5, %r12
-        je      _initial_num_blocks_is_5\@
-        cmp     $4, %r12
-        je      _initial_num_blocks_is_4\@
-        cmp     $3, %r12
-        je      _initial_num_blocks_is_3\@
-        cmp     $2, %r12
-        je      _initial_num_blocks_is_2\@
-
-        jmp     _initial_num_blocks_is_1\@
-
-_initial_num_blocks_is_7\@:
-        \INITIAL_BLOCKS  \REP, 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
-        sub     $16*7, %r13
-        jmp     _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_6\@:
-        \INITIAL_BLOCKS  \REP, 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
-        sub     $16*6, %r13
-        jmp     _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_5\@:
-        \INITIAL_BLOCKS  \REP, 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
-        sub     $16*5, %r13
-        jmp     _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_4\@:
-        \INITIAL_BLOCKS  \REP, 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
-        sub     $16*4, %r13
-        jmp     _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_3\@:
-        \INITIAL_BLOCKS  \REP, 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
-        sub     $16*3, %r13
-        jmp     _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_2\@:
-        \INITIAL_BLOCKS  \REP, 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
-        sub     $16*2, %r13
-        jmp     _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_1\@:
-        \INITIAL_BLOCKS  \REP, 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
-        sub     $16*1, %r13
-        jmp     _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_0\@:
-        \INITIAL_BLOCKS  \REP, 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
-
-
-_initial_blocks_encrypted\@:
-        test    %r13, %r13
-        je      _zero_cipher_left\@
-
-        sub     $128, %r13
-        je      _eight_cipher_left\@
-
-
-
-
-        vmovd   %xmm9, %r15d
-        and     $255, %r15d
-        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
-
-
-_encrypt_by_8_new\@:
-        cmp     $(255-8), %r15d
-        jg      _encrypt_by_8\@
-
-
-
-        add     $8, %r15b
-        \GHASH_8_ENCRYPT_8_PARALLEL      \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC
-        add     $128, %r11
-        sub     $128, %r13
-        jne     _encrypt_by_8_new\@
-
-        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
-        jmp     _eight_cipher_left\@
-
-_encrypt_by_8\@:
-        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
-        add     $8, %r15b
-        \GHASH_8_ENCRYPT_8_PARALLEL      \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC
-        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
-        add     $128, %r11
-        sub     $128, %r13
-        jne     _encrypt_by_8_new\@
-
-        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
-
-
-
-
-_eight_cipher_left\@:
-        \GHASH_LAST_8    %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8
-
-
-_zero_cipher_left\@:
-        vmovdqu %xmm14, AadHash(arg2)
-        vmovdqu %xmm9, CurCount(arg2)
-
-        # check for 0 length
-        mov     arg5, %r13
-        and     $15, %r13                            # r13 = (arg5 mod 16)
-
-        je      _multiple_of_16_bytes\@
-
-        # handle the last <16 Byte block separately
-
-        mov %r13, PBlockLen(arg2)
-
-        vpaddd  ONE(%rip), %xmm9, %xmm9              # INCR CNT to get Yn
-        vmovdqu %xmm9, CurCount(arg2)
-        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
-
-        ENCRYPT_SINGLE_BLOCK    \REP, %xmm9                # E(K, Yn)
-        vmovdqu %xmm9, PBlockEncKey(arg2)
-
-        cmp $16, arg5
-        jge _large_enough_update\@
-
-        lea (arg4,%r11,1), %r10
-        mov %r13, %r12
-
-        READ_PARTIAL_BLOCK %r10 %r12 %xmm1
-
-        lea     SHIFT_MASK+16(%rip), %r12
-        sub     %r13, %r12                           # adjust the shuffle mask pointer to be
-						     # able to shift 16-r13 bytes (r13 is the
-	# number of bytes in plaintext mod 16)
-
-        jmp _final_ghash_mul\@
-
-_large_enough_update\@:
-        sub $16, %r11
-        add %r13, %r11
-
-        # receive the last <16 Byte block
-        vmovdqu	(arg4, %r11, 1), %xmm1
-
-        sub	%r13, %r11
-        add	$16, %r11
-
-        lea	SHIFT_MASK+16(%rip), %r12
-        # adjust the shuffle mask pointer to be able to shift 16-r13 bytes
-        # (r13 is the number of bytes in plaintext mod 16)
-        sub	%r13, %r12
-        # get the appropriate shuffle mask
-        vmovdqu	(%r12), %xmm2
-        # shift right 16-r13 bytes
-        vpshufb  %xmm2, %xmm1, %xmm1
-
-_final_ghash_mul\@:
-        .if  \ENC_DEC ==  DEC
-        vmovdqa %xmm1, %xmm2
-        vpxor   %xmm1, %xmm9, %xmm9                  # Plaintext XOR E(K, Yn)
-        vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1        # get the appropriate mask to
-						     # mask out top 16-r13 bytes of xmm9
-        vpand   %xmm1, %xmm9, %xmm9                  # mask out top 16-r13 bytes of xmm9
-        vpand   %xmm1, %xmm2, %xmm2
-        vpshufb SHUF_MASK(%rip), %xmm2, %xmm2
-        vpxor   %xmm2, %xmm14, %xmm14
-
-        vmovdqu %xmm14, AadHash(arg2)
-        .else
-        vpxor   %xmm1, %xmm9, %xmm9                  # Plaintext XOR E(K, Yn)
-        vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1        # get the appropriate mask to
-						     # mask out top 16-r13 bytes of xmm9
-        vpand   %xmm1, %xmm9, %xmm9                  # mask out top 16-r13 bytes of xmm9
-        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
-        vpxor   %xmm9, %xmm14, %xmm14
-
-        vmovdqu %xmm14, AadHash(arg2)
-        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9        # shuffle xmm9 back to output as ciphertext
-        .endif
-
-
-        #############################
-        # output r13 Bytes
-        vmovq   %xmm9, %rax
-        cmp     $8, %r13
-        jle     _less_than_8_bytes_left\@
-
-        mov     %rax, (arg3 , %r11)
-        add     $8, %r11
-        vpsrldq $8, %xmm9, %xmm9
-        vmovq   %xmm9, %rax
-        sub     $8, %r13
-
-_less_than_8_bytes_left\@:
-        movb    %al, (arg3 , %r11)
-        add     $1, %r11
-        shr     $8, %rax
-        sub     $1, %r13
-        jne     _less_than_8_bytes_left\@
-        #############################
-
-_multiple_of_16_bytes\@:
-.endm
-
-
-# GCM_COMPLETE Finishes update of tag of last partial block
-# Output: Authorization Tag (AUTH_TAG)
-# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
-.macro GCM_COMPLETE GHASH_MUL REP AUTH_TAG AUTH_TAG_LEN
-        vmovdqu AadHash(arg2), %xmm14
-        vmovdqu HashKey(arg2), %xmm13
-
-        mov PBlockLen(arg2), %r12
-        test %r12, %r12
-        je _partial_done\@
-
-	#GHASH computation for the last <16 Byte block
-        \GHASH_MUL       %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
-
-_partial_done\@:
-        mov AadLen(arg2), %r12                          # r12 = aadLen (number of bytes)
-        shl     $3, %r12                             # convert into number of bits
-        vmovd   %r12d, %xmm15                        # len(A) in xmm15
-
-        mov InLen(arg2), %r12
-        shl     $3, %r12                        # len(C) in bits  (*128)
-        vmovq   %r12, %xmm1
-        vpslldq $8, %xmm15, %xmm15                   # xmm15 = len(A)|| 0x0000000000000000
-        vpxor   %xmm1, %xmm15, %xmm15                # xmm15 = len(A)||len(C)
-
-        vpxor   %xmm15, %xmm14, %xmm14
-        \GHASH_MUL       %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6    # final GHASH computation
-        vpshufb SHUF_MASK(%rip), %xmm14, %xmm14      # perform a 16Byte swap
-
-        vmovdqu OrigIV(arg2), %xmm9
-
-        ENCRYPT_SINGLE_BLOCK    \REP, %xmm9                # E(K, Y0)
-
-        vpxor   %xmm14, %xmm9, %xmm9
-
-
-
-_return_T\@:
-        mov     \AUTH_TAG, %r10              # r10 = authTag
-        mov     \AUTH_TAG_LEN, %r11              # r11 = auth_tag_len
-
-        cmp     $16, %r11
-        je      _T_16\@
-
-        cmp     $8, %r11
-        jl      _T_4\@
-
-_T_8\@:
-        vmovq   %xmm9, %rax
-        mov     %rax, (%r10)
-        add     $8, %r10
-        sub     $8, %r11
-        vpsrldq $8, %xmm9, %xmm9
-        test    %r11, %r11
-        je     _return_T_done\@
-_T_4\@:
-        vmovd   %xmm9, %eax
-        mov     %eax, (%r10)
-        add     $4, %r10
-        sub     $4, %r11
-        vpsrldq     $4, %xmm9, %xmm9
-        test    %r11, %r11
-        je     _return_T_done\@
-_T_123\@:
-        vmovd     %xmm9, %eax
-        cmp     $2, %r11
-        jl     _T_1\@
-        mov     %ax, (%r10)
-        cmp     $2, %r11
-        je     _return_T_done\@
-        add     $2, %r10
-        sar     $16, %eax
-_T_1\@:
-        mov     %al, (%r10)
-        jmp     _return_T_done\@
-
-_T_16\@:
-        vmovdqu %xmm9, (%r10)
-
-_return_T_done\@:
-.endm
-
-.macro CALC_AAD_HASH GHASH_MUL AAD AADLEN T1 T2 T3 T4 T5 T6 T7 T8
-
-	mov     \AAD, %r10                      # r10 = AAD
-	mov     \AADLEN, %r12                      # r12 = aadLen
-
-
-	mov     %r12, %r11
-
-	vpxor   \T8, \T8, \T8
-	vpxor   \T7, \T7, \T7
-	cmp     $16, %r11
-	jl      _get_AAD_rest8\@
-_get_AAD_blocks\@:
-	vmovdqu (%r10), \T7
-	vpshufb SHUF_MASK(%rip), \T7, \T7
-	vpxor   \T7, \T8, \T8
-	\GHASH_MUL       \T8, \T2, \T1, \T3, \T4, \T5, \T6
-	add     $16, %r10
-	sub     $16, %r12
-	sub     $16, %r11
-	cmp     $16, %r11
-	jge     _get_AAD_blocks\@
-	vmovdqu \T8, \T7
-	test    %r11, %r11
-	je      _get_AAD_done\@
-
-	vpxor   \T7, \T7, \T7
-
-	/* read the last <16B of AAD. since we have at least 4B of
-	data right after the AAD (the ICV, and maybe some CT), we can
-	read 4B/8B blocks safely, and then get rid of the extra stuff */
-_get_AAD_rest8\@:
-	cmp     $4, %r11
-	jle     _get_AAD_rest4\@
-	movq    (%r10), \T1
-	add     $8, %r10
-	sub     $8, %r11
-	vpslldq $8, \T1, \T1
-	vpsrldq $8, \T7, \T7
-	vpxor   \T1, \T7, \T7
-	jmp     _get_AAD_rest8\@
-_get_AAD_rest4\@:
-	test    %r11, %r11
-	jle      _get_AAD_rest0\@
-	mov     (%r10), %eax
-	movq    %rax, \T1
-	add     $4, %r10
-	sub     $4, %r11
-	vpslldq $12, \T1, \T1
-	vpsrldq $4, \T7, \T7
-	vpxor   \T1, \T7, \T7
-_get_AAD_rest0\@:
-	/* finalize: shift out the extra bytes we read, and align
-	left. since pslldq can only shift by an immediate, we use
-	vpshufb and an array of shuffle masks */
-	movq    %r12, %r11
-	salq    $4, %r11
-	vmovdqu  aad_shift_arr(%r11), \T1
-	vpshufb \T1, \T7, \T7
-_get_AAD_rest_final\@:
-	vpshufb SHUF_MASK(%rip), \T7, \T7
-	vpxor   \T8, \T7, \T7
-	\GHASH_MUL       \T7, \T2, \T1, \T3, \T4, \T5, \T6
-
-_get_AAD_done\@:
-        vmovdqu \T7, AadHash(arg2)
-.endm
-
-.macro INIT GHASH_MUL PRECOMPUTE
-        mov arg6, %r11
-        mov %r11, AadLen(arg2) # ctx_data.aad_length = aad_length
-        xor %r11d, %r11d
-        mov %r11, InLen(arg2) # ctx_data.in_length = 0
-
-        mov %r11, PBlockLen(arg2) # ctx_data.partial_block_length = 0
-        mov %r11, PBlockEncKey(arg2) # ctx_data.partial_block_enc_key = 0
-        mov arg3, %rax
-        movdqu (%rax), %xmm0
-        movdqu %xmm0, OrigIV(arg2) # ctx_data.orig_IV = iv
-
-        vpshufb SHUF_MASK(%rip), %xmm0, %xmm0
-        movdqu %xmm0, CurCount(arg2) # ctx_data.current_counter = iv
-
-        vmovdqu  (arg4), %xmm6              # xmm6 = HashKey
-
-        vpshufb  SHUF_MASK(%rip), %xmm6, %xmm6
-        ###############  PRECOMPUTATION of HashKey<<1 mod poly from the HashKey
-        vmovdqa  %xmm6, %xmm2
-        vpsllq   $1, %xmm6, %xmm6
-        vpsrlq   $63, %xmm2, %xmm2
-        vmovdqa  %xmm2, %xmm1
-        vpslldq  $8, %xmm2, %xmm2
-        vpsrldq  $8, %xmm1, %xmm1
-        vpor     %xmm2, %xmm6, %xmm6
-        #reduction
-        vpshufd  $0b00100100, %xmm1, %xmm2
-        vpcmpeqd TWOONE(%rip), %xmm2, %xmm2
-        vpand    POLY(%rip), %xmm2, %xmm2
-        vpxor    %xmm2, %xmm6, %xmm6        # xmm6 holds the HashKey<<1 mod poly
-        #######################################################################
-        vmovdqu  %xmm6, HashKey(arg2)       # store HashKey<<1 mod poly
-
-        CALC_AAD_HASH \GHASH_MUL, arg5, arg6, %xmm2, %xmm6, %xmm3, %xmm4, %xmm5, %xmm7, %xmm1, %xmm0
-
-        \PRECOMPUTE  %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5
-.endm
-
-
-# Reads DLEN bytes starting at DPTR and stores in XMMDst
-# where 0 < DLEN < 16
-# Clobbers %rax, DLEN
-.macro READ_PARTIAL_BLOCK DPTR DLEN XMMDst
-        vpxor \XMMDst, \XMMDst, \XMMDst
-
-        cmp $8, \DLEN
-        jl _read_lt8_\@
-        mov (\DPTR), %rax
-        vpinsrq $0, %rax, \XMMDst, \XMMDst
-        sub $8, \DLEN
-        jz _done_read_partial_block_\@
-        xor %eax, %eax
-_read_next_byte_\@:
-        shl $8, %rax
-        mov 7(\DPTR, \DLEN, 1), %al
-        dec \DLEN
-        jnz _read_next_byte_\@
-        vpinsrq $1, %rax, \XMMDst, \XMMDst
-        jmp _done_read_partial_block_\@
-_read_lt8_\@:
-        xor %eax, %eax
-_read_next_byte_lt8_\@:
-        shl $8, %rax
-        mov -1(\DPTR, \DLEN, 1), %al
-        dec \DLEN
-        jnz _read_next_byte_lt8_\@
-        vpinsrq $0, %rax, \XMMDst, \XMMDst
-_done_read_partial_block_\@:
-.endm
-
-# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks
-# between update calls.
-# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK
-# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context
-# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13
-.macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
-        AAD_HASH ENC_DEC
-        mov 	PBlockLen(arg2), %r13
-        test	%r13, %r13
-        je	_partial_block_done_\@	# Leave Macro if no partial blocks
-        # Read in input data without over reading
-        cmp	$16, \PLAIN_CYPH_LEN
-        jl	_fewer_than_16_bytes_\@
-        vmovdqu	(\PLAIN_CYPH_IN), %xmm1	# If more than 16 bytes, just fill xmm
-        jmp	_data_read_\@
-
-_fewer_than_16_bytes_\@:
-        lea	(\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10
-        mov	\PLAIN_CYPH_LEN, %r12
-        READ_PARTIAL_BLOCK %r10 %r12 %xmm1
-
-        mov PBlockLen(arg2), %r13
-
-_data_read_\@:				# Finished reading in data
-
-        vmovdqu	PBlockEncKey(arg2), %xmm9
-        vmovdqu	HashKey(arg2), %xmm13
-
-        lea	SHIFT_MASK(%rip), %r12
-
-        # adjust the shuffle mask pointer to be able to shift r13 bytes
-        # r16-r13 is the number of bytes in plaintext mod 16)
-        add	%r13, %r12
-        vmovdqu	(%r12), %xmm2		# get the appropriate shuffle mask
-        vpshufb %xmm2, %xmm9, %xmm9		# shift right r13 bytes
-
-.if  \ENC_DEC ==  DEC
-        vmovdqa	%xmm1, %xmm3
-        pxor	%xmm1, %xmm9		# Cyphertext XOR E(K, Yn)
-
-        mov	\PLAIN_CYPH_LEN, %r10
-        add	%r13, %r10
-        # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
-        sub	$16, %r10
-        # Determine if if partial block is not being filled and
-        # shift mask accordingly
-        jge	_no_extra_mask_1_\@
-        sub	%r10, %r12
-_no_extra_mask_1_\@:
-
-        vmovdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
-        # get the appropriate mask to mask out bottom r13 bytes of xmm9
-        vpand	%xmm1, %xmm9, %xmm9		# mask out bottom r13 bytes of xmm9
-
-        vpand	%xmm1, %xmm3, %xmm3
-        vmovdqa	SHUF_MASK(%rip), %xmm10
-        vpshufb	%xmm10, %xmm3, %xmm3
-        vpshufb	%xmm2, %xmm3, %xmm3
-        vpxor	%xmm3, \AAD_HASH, \AAD_HASH
-
-        test	%r10, %r10
-        jl	_partial_incomplete_1_\@
-
-        # GHASH computation for the last <16 Byte block
-        \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
-        xor	%eax,%eax
-
-        mov	%rax, PBlockLen(arg2)
-        jmp	_dec_done_\@
-_partial_incomplete_1_\@:
-        add	\PLAIN_CYPH_LEN, PBlockLen(arg2)
-_dec_done_\@:
-        vmovdqu	\AAD_HASH, AadHash(arg2)
-.else
-        vpxor	%xmm1, %xmm9, %xmm9			# Plaintext XOR E(K, Yn)
-
-        mov	\PLAIN_CYPH_LEN, %r10
-        add	%r13, %r10
-        # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
-        sub	$16, %r10
-        # Determine if if partial block is not being filled and
-        # shift mask accordingly
-        jge	_no_extra_mask_2_\@
-        sub	%r10, %r12
-_no_extra_mask_2_\@:
-
-        vmovdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
-        # get the appropriate mask to mask out bottom r13 bytes of xmm9
-        vpand	%xmm1, %xmm9, %xmm9
-
-        vmovdqa	SHUF_MASK(%rip), %xmm1
-        vpshufb %xmm1, %xmm9, %xmm9
-        vpshufb %xmm2, %xmm9, %xmm9
-        vpxor	%xmm9, \AAD_HASH, \AAD_HASH
-
-        test	%r10, %r10
-        jl	_partial_incomplete_2_\@
-
-        # GHASH computation for the last <16 Byte block
-        \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
-        xor	%eax,%eax
-
-        mov	%rax, PBlockLen(arg2)
-        jmp	_encode_done_\@
-_partial_incomplete_2_\@:
-        add	\PLAIN_CYPH_LEN, PBlockLen(arg2)
-_encode_done_\@:
-        vmovdqu	\AAD_HASH, AadHash(arg2)
-
-        vmovdqa	SHUF_MASK(%rip), %xmm10
-        # shuffle xmm9 back to output as ciphertext
-        vpshufb	%xmm10, %xmm9, %xmm9
-        vpshufb	%xmm2, %xmm9, %xmm9
-.endif
-        # output encrypted Bytes
-        test	%r10, %r10
-        jl	_partial_fill_\@
-        mov	%r13, %r12
-        mov	$16, %r13
-        # Set r13 to be the number of bytes to write out
-        sub	%r12, %r13
-        jmp	_count_set_\@
-_partial_fill_\@:
-        mov	\PLAIN_CYPH_LEN, %r13
-_count_set_\@:
-        vmovdqa	%xmm9, %xmm0
-        vmovq	%xmm0, %rax
-        cmp	$8, %r13
-        jle	_less_than_8_bytes_left_\@
-
-        mov	%rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
-        add	$8, \DATA_OFFSET
-        psrldq	$8, %xmm0
-        vmovq	%xmm0, %rax
-        sub	$8, %r13
-_less_than_8_bytes_left_\@:
-        movb	%al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
-        add	$1, \DATA_OFFSET
-        shr	$8, %rax
-        sub	$1, %r13
-        jne	_less_than_8_bytes_left_\@
-_partial_block_done_\@:
-.endm # PARTIAL_BLOCK
-
-#ifdef CONFIG_AS_AVX
-###############################################################################
-# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
-# Input: A and B (128-bits each, bit-reflected)
-# Output: C = A*B*x mod poly, (i.e. >>1 )
-# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input
-# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly.
-###############################################################################
-.macro  GHASH_MUL_AVX GH HK T1 T2 T3 T4 T5
-
-        vpshufd         $0b01001110, \GH, \T2
-        vpshufd         $0b01001110, \HK, \T3
-        vpxor           \GH     , \T2, \T2      # T2 = (a1+a0)
-        vpxor           \HK     , \T3, \T3      # T3 = (b1+b0)
-
-        vpclmulqdq      $0x11, \HK, \GH, \T1    # T1 = a1*b1
-        vpclmulqdq      $0x00, \HK, \GH, \GH    # GH = a0*b0
-        vpclmulqdq      $0x00, \T3, \T2, \T2    # T2 = (a1+a0)*(b1+b0)
-        vpxor           \GH, \T2,\T2
-        vpxor           \T1, \T2,\T2            # T2 = a0*b1+a1*b0
-
-        vpslldq         $8, \T2,\T3             # shift-L T3 2 DWs
-        vpsrldq         $8, \T2,\T2             # shift-R T2 2 DWs
-        vpxor           \T3, \GH, \GH
-        vpxor           \T2, \T1, \T1           # <T1:GH> = GH x HK
-
-        #first phase of the reduction
-        vpslld  $31, \GH, \T2                   # packed right shifting << 31
-        vpslld  $30, \GH, \T3                   # packed right shifting shift << 30
-        vpslld  $25, \GH, \T4                   # packed right shifting shift << 25
-
-        vpxor   \T3, \T2, \T2                   # xor the shifted versions
-        vpxor   \T4, \T2, \T2
-
-        vpsrldq $4, \T2, \T5                    # shift-R T5 1 DW
-
-        vpslldq $12, \T2, \T2                   # shift-L T2 3 DWs
-        vpxor   \T2, \GH, \GH                   # first phase of the reduction complete
-
-        #second phase of the reduction
-
-        vpsrld  $1,\GH, \T2                     # packed left shifting >> 1
-        vpsrld  $2,\GH, \T3                     # packed left shifting >> 2
-        vpsrld  $7,\GH, \T4                     # packed left shifting >> 7
-        vpxor   \T3, \T2, \T2                   # xor the shifted versions
-        vpxor   \T4, \T2, \T2
-
-        vpxor   \T5, \T2, \T2
-        vpxor   \T2, \GH, \GH
-        vpxor   \T1, \GH, \GH                   # the result is in GH
-
-
-.endm
-
-.macro PRECOMPUTE_AVX HK T1 T2 T3 T4 T5 T6
-
-        # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
-        vmovdqa  \HK, \T5
-
-        vpshufd  $0b01001110, \T5, \T1
-        vpxor    \T5, \T1, \T1
-        vmovdqu  \T1, HashKey_k(arg2)
-
-        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^2<<1 mod poly
-        vmovdqu  \T5, HashKey_2(arg2)                    #  [HashKey_2] = HashKey^2<<1 mod poly
-        vpshufd  $0b01001110, \T5, \T1
-        vpxor    \T5, \T1, \T1
-        vmovdqu  \T1, HashKey_2_k(arg2)
-
-        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^3<<1 mod poly
-        vmovdqu  \T5, HashKey_3(arg2)
-        vpshufd  $0b01001110, \T5, \T1
-        vpxor    \T5, \T1, \T1
-        vmovdqu  \T1, HashKey_3_k(arg2)
-
-        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^4<<1 mod poly
-        vmovdqu  \T5, HashKey_4(arg2)
-        vpshufd  $0b01001110, \T5, \T1
-        vpxor    \T5, \T1, \T1
-        vmovdqu  \T1, HashKey_4_k(arg2)
-
-        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^5<<1 mod poly
-        vmovdqu  \T5, HashKey_5(arg2)
-        vpshufd  $0b01001110, \T5, \T1
-        vpxor    \T5, \T1, \T1
-        vmovdqu  \T1, HashKey_5_k(arg2)
-
-        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^6<<1 mod poly
-        vmovdqu  \T5, HashKey_6(arg2)
-        vpshufd  $0b01001110, \T5, \T1
-        vpxor    \T5, \T1, \T1
-        vmovdqu  \T1, HashKey_6_k(arg2)
-
-        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^7<<1 mod poly
-        vmovdqu  \T5, HashKey_7(arg2)
-        vpshufd  $0b01001110, \T5, \T1
-        vpxor    \T5, \T1, \T1
-        vmovdqu  \T1, HashKey_7_k(arg2)
-
-        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^8<<1 mod poly
-        vmovdqu  \T5, HashKey_8(arg2)
-        vpshufd  $0b01001110, \T5, \T1
-        vpxor    \T5, \T1, \T1
-        vmovdqu  \T1, HashKey_8_k(arg2)
-
-.endm
-
-## if a = number of total plaintext bytes
-## b = floor(a/16)
-## num_initial_blocks = b mod 4#
-## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext
-## r10, r11, r12, rax are clobbered
-## arg1, arg3, arg4, r14 are used as a pointer only, not modified
-
-.macro INITIAL_BLOCKS_AVX REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC
-	i = (8-\num_initial_blocks)
-	setreg
-        vmovdqu AadHash(arg2), reg_i
-
-	# start AES for num_initial_blocks blocks
-	vmovdqu CurCount(arg2), \CTR
-
-	i = (9-\num_initial_blocks)
-	setreg
-.rep \num_initial_blocks
-                vpaddd  ONE(%rip), \CTR, \CTR		# INCR Y0
-                vmovdqa \CTR, reg_i
-                vpshufb SHUF_MASK(%rip), reg_i, reg_i   # perform a 16Byte swap
-	i = (i+1)
-	setreg
-.endr
-
-	vmovdqa  (arg1), \T_key
-	i = (9-\num_initial_blocks)
-	setreg
-.rep \num_initial_blocks
-                vpxor   \T_key, reg_i, reg_i
-	i = (i+1)
-	setreg
-.endr
-
-       j = 1
-       setreg
-.rep \REP
-       vmovdqa  16*j(arg1), \T_key
-	i = (9-\num_initial_blocks)
-	setreg
-.rep \num_initial_blocks
-        vaesenc \T_key, reg_i, reg_i
-	i = (i+1)
-	setreg
-.endr
-
-       j = (j+1)
-       setreg
-.endr
-
-	vmovdqa  16*j(arg1), \T_key
-	i = (9-\num_initial_blocks)
-	setreg
-.rep \num_initial_blocks
-        vaesenclast      \T_key, reg_i, reg_i
-	i = (i+1)
-	setreg
-.endr
-
-	i = (9-\num_initial_blocks)
-	setreg
-.rep \num_initial_blocks
-                vmovdqu (arg4, %r11), \T1
-                vpxor   \T1, reg_i, reg_i
-                vmovdqu reg_i, (arg3 , %r11)           # write back ciphertext for num_initial_blocks blocks
-                add     $16, %r11
-.if  \ENC_DEC == DEC
-                vmovdqa \T1, reg_i
-.endif
-                vpshufb SHUF_MASK(%rip), reg_i, reg_i  # prepare ciphertext for GHASH computations
-	i = (i+1)
-	setreg
-.endr
-
-
-	i = (8-\num_initial_blocks)
-	j = (9-\num_initial_blocks)
-	setreg
-
-.rep \num_initial_blocks
-        vpxor    reg_i, reg_j, reg_j
-        GHASH_MUL_AVX       reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks
-	i = (i+1)
-	j = (j+1)
-	setreg
-.endr
-        # XMM8 has the combined result here
-
-        vmovdqa  \XMM8, TMP1(%rsp)
-        vmovdqa  \XMM8, \T3
-
-        cmp     $128, %r13
-        jl      _initial_blocks_done\@                  # no need for precomputed constants
-
-###############################################################################
-# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
-                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
-                vmovdqa  \CTR, \XMM1
-                vpshufb  SHUF_MASK(%rip), \XMM1, \XMM1  # perform a 16Byte swap
-
-                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
-                vmovdqa  \CTR, \XMM2
-                vpshufb  SHUF_MASK(%rip), \XMM2, \XMM2  # perform a 16Byte swap
-
-                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
-                vmovdqa  \CTR, \XMM3
-                vpshufb  SHUF_MASK(%rip), \XMM3, \XMM3  # perform a 16Byte swap
-
-                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
-                vmovdqa  \CTR, \XMM4
-                vpshufb  SHUF_MASK(%rip), \XMM4, \XMM4  # perform a 16Byte swap
-
-                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
-                vmovdqa  \CTR, \XMM5
-                vpshufb  SHUF_MASK(%rip), \XMM5, \XMM5  # perform a 16Byte swap
-
-                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
-                vmovdqa  \CTR, \XMM6
-                vpshufb  SHUF_MASK(%rip), \XMM6, \XMM6  # perform a 16Byte swap
-
-                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
-                vmovdqa  \CTR, \XMM7
-                vpshufb  SHUF_MASK(%rip), \XMM7, \XMM7  # perform a 16Byte swap
-
-                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
-                vmovdqa  \CTR, \XMM8
-                vpshufb  SHUF_MASK(%rip), \XMM8, \XMM8  # perform a 16Byte swap
-
-                vmovdqa  (arg1), \T_key
-                vpxor    \T_key, \XMM1, \XMM1
-                vpxor    \T_key, \XMM2, \XMM2
-                vpxor    \T_key, \XMM3, \XMM3
-                vpxor    \T_key, \XMM4, \XMM4
-                vpxor    \T_key, \XMM5, \XMM5
-                vpxor    \T_key, \XMM6, \XMM6
-                vpxor    \T_key, \XMM7, \XMM7
-                vpxor    \T_key, \XMM8, \XMM8
-
-               i = 1
-               setreg
-.rep    \REP       # do REP rounds
-                vmovdqa  16*i(arg1), \T_key
-                vaesenc  \T_key, \XMM1, \XMM1
-                vaesenc  \T_key, \XMM2, \XMM2
-                vaesenc  \T_key, \XMM3, \XMM3
-                vaesenc  \T_key, \XMM4, \XMM4
-                vaesenc  \T_key, \XMM5, \XMM5
-                vaesenc  \T_key, \XMM6, \XMM6
-                vaesenc  \T_key, \XMM7, \XMM7
-                vaesenc  \T_key, \XMM8, \XMM8
-               i = (i+1)
-               setreg
-.endr
-
-                vmovdqa  16*i(arg1), \T_key
-                vaesenclast  \T_key, \XMM1, \XMM1
-                vaesenclast  \T_key, \XMM2, \XMM2
-                vaesenclast  \T_key, \XMM3, \XMM3
-                vaesenclast  \T_key, \XMM4, \XMM4
-                vaesenclast  \T_key, \XMM5, \XMM5
-                vaesenclast  \T_key, \XMM6, \XMM6
-                vaesenclast  \T_key, \XMM7, \XMM7
-                vaesenclast  \T_key, \XMM8, \XMM8
-
-                vmovdqu  (arg4, %r11), \T1
-                vpxor    \T1, \XMM1, \XMM1
-                vmovdqu  \XMM1, (arg3 , %r11)
-                .if   \ENC_DEC == DEC
-                vmovdqa  \T1, \XMM1
-                .endif
-
-                vmovdqu  16*1(arg4, %r11), \T1
-                vpxor    \T1, \XMM2, \XMM2
-                vmovdqu  \XMM2, 16*1(arg3 , %r11)
-                .if   \ENC_DEC == DEC
-                vmovdqa  \T1, \XMM2
-                .endif
-
-                vmovdqu  16*2(arg4, %r11), \T1
-                vpxor    \T1, \XMM3, \XMM3
-                vmovdqu  \XMM3, 16*2(arg3 , %r11)
-                .if   \ENC_DEC == DEC
-                vmovdqa  \T1, \XMM3
-                .endif
-
-                vmovdqu  16*3(arg4, %r11), \T1
-                vpxor    \T1, \XMM4, \XMM4
-                vmovdqu  \XMM4, 16*3(arg3 , %r11)
-                .if   \ENC_DEC == DEC
-                vmovdqa  \T1, \XMM4
-                .endif
-
-                vmovdqu  16*4(arg4, %r11), \T1
-                vpxor    \T1, \XMM5, \XMM5
-                vmovdqu  \XMM5, 16*4(arg3 , %r11)
-                .if   \ENC_DEC == DEC
-                vmovdqa  \T1, \XMM5
-                .endif
-
-                vmovdqu  16*5(arg4, %r11), \T1
-                vpxor    \T1, \XMM6, \XMM6
-                vmovdqu  \XMM6, 16*5(arg3 , %r11)
-                .if   \ENC_DEC == DEC
-                vmovdqa  \T1, \XMM6
-                .endif
-
-                vmovdqu  16*6(arg4, %r11), \T1
-                vpxor    \T1, \XMM7, \XMM7
-                vmovdqu  \XMM7, 16*6(arg3 , %r11)
-                .if   \ENC_DEC == DEC
-                vmovdqa  \T1, \XMM7
-                .endif
-
-                vmovdqu  16*7(arg4, %r11), \T1
-                vpxor    \T1, \XMM8, \XMM8
-                vmovdqu  \XMM8, 16*7(arg3 , %r11)
-                .if   \ENC_DEC == DEC
-                vmovdqa  \T1, \XMM8
-                .endif
-
-                add     $128, %r11
-
-                vpshufb  SHUF_MASK(%rip), \XMM1, \XMM1     # perform a 16Byte swap
-                vpxor    TMP1(%rsp), \XMM1, \XMM1          # combine GHASHed value with the corresponding ciphertext
-                vpshufb  SHUF_MASK(%rip), \XMM2, \XMM2     # perform a 16Byte swap
-                vpshufb  SHUF_MASK(%rip), \XMM3, \XMM3     # perform a 16Byte swap
-                vpshufb  SHUF_MASK(%rip), \XMM4, \XMM4     # perform a 16Byte swap
-                vpshufb  SHUF_MASK(%rip), \XMM5, \XMM5     # perform a 16Byte swap
-                vpshufb  SHUF_MASK(%rip), \XMM6, \XMM6     # perform a 16Byte swap
-                vpshufb  SHUF_MASK(%rip), \XMM7, \XMM7     # perform a 16Byte swap
-                vpshufb  SHUF_MASK(%rip), \XMM8, \XMM8     # perform a 16Byte swap
-
-###############################################################################
-
-_initial_blocks_done\@:
-
-.endm
-
-# encrypt 8 blocks at a time
-# ghash the 8 previously encrypted ciphertext blocks
-# arg1, arg3, arg4 are used as pointers only, not modified
-# r11 is the data offset value
-.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
-
-        vmovdqa \XMM1, \T2
-        vmovdqa \XMM2, TMP2(%rsp)
-        vmovdqa \XMM3, TMP3(%rsp)
-        vmovdqa \XMM4, TMP4(%rsp)
-        vmovdqa \XMM5, TMP5(%rsp)
-        vmovdqa \XMM6, TMP6(%rsp)
-        vmovdqa \XMM7, TMP7(%rsp)
-        vmovdqa \XMM8, TMP8(%rsp)
-
-.if \loop_idx == in_order
-                vpaddd  ONE(%rip), \CTR, \XMM1           # INCR CNT
-                vpaddd  ONE(%rip), \XMM1, \XMM2
-                vpaddd  ONE(%rip), \XMM2, \XMM3
-                vpaddd  ONE(%rip), \XMM3, \XMM4
-                vpaddd  ONE(%rip), \XMM4, \XMM5
-                vpaddd  ONE(%rip), \XMM5, \XMM6
-                vpaddd  ONE(%rip), \XMM6, \XMM7
-                vpaddd  ONE(%rip), \XMM7, \XMM8
-                vmovdqa \XMM8, \CTR
-
-                vpshufb SHUF_MASK(%rip), \XMM1, \XMM1    # perform a 16Byte swap
-                vpshufb SHUF_MASK(%rip), \XMM2, \XMM2    # perform a 16Byte swap
-                vpshufb SHUF_MASK(%rip), \XMM3, \XMM3    # perform a 16Byte swap
-                vpshufb SHUF_MASK(%rip), \XMM4, \XMM4    # perform a 16Byte swap
-                vpshufb SHUF_MASK(%rip), \XMM5, \XMM5    # perform a 16Byte swap
-                vpshufb SHUF_MASK(%rip), \XMM6, \XMM6    # perform a 16Byte swap
-                vpshufb SHUF_MASK(%rip), \XMM7, \XMM7    # perform a 16Byte swap
-                vpshufb SHUF_MASK(%rip), \XMM8, \XMM8    # perform a 16Byte swap
-.else
-                vpaddd  ONEf(%rip), \CTR, \XMM1           # INCR CNT
-                vpaddd  ONEf(%rip), \XMM1, \XMM2
-                vpaddd  ONEf(%rip), \XMM2, \XMM3
-                vpaddd  ONEf(%rip), \XMM3, \XMM4
-                vpaddd  ONEf(%rip), \XMM4, \XMM5
-                vpaddd  ONEf(%rip), \XMM5, \XMM6
-                vpaddd  ONEf(%rip), \XMM6, \XMM7
-                vpaddd  ONEf(%rip), \XMM7, \XMM8
-                vmovdqa \XMM8, \CTR
-.endif
-
-
-        #######################################################################
-
-                vmovdqu (arg1), \T1
-                vpxor   \T1, \XMM1, \XMM1
-                vpxor   \T1, \XMM2, \XMM2
-                vpxor   \T1, \XMM3, \XMM3
-                vpxor   \T1, \XMM4, \XMM4
-                vpxor   \T1, \XMM5, \XMM5
-                vpxor   \T1, \XMM6, \XMM6
-                vpxor   \T1, \XMM7, \XMM7
-                vpxor   \T1, \XMM8, \XMM8
-
-        #######################################################################
-
-
-
-
-
-                vmovdqu 16*1(arg1), \T1
-                vaesenc \T1, \XMM1, \XMM1
-                vaesenc \T1, \XMM2, \XMM2
-                vaesenc \T1, \XMM3, \XMM3
-                vaesenc \T1, \XMM4, \XMM4
-                vaesenc \T1, \XMM5, \XMM5
-                vaesenc \T1, \XMM6, \XMM6
-                vaesenc \T1, \XMM7, \XMM7
-                vaesenc \T1, \XMM8, \XMM8
-
-                vmovdqu 16*2(arg1), \T1
-                vaesenc \T1, \XMM1, \XMM1
-                vaesenc \T1, \XMM2, \XMM2
-                vaesenc \T1, \XMM3, \XMM3
-                vaesenc \T1, \XMM4, \XMM4
-                vaesenc \T1, \XMM5, \XMM5
-                vaesenc \T1, \XMM6, \XMM6
-                vaesenc \T1, \XMM7, \XMM7
-                vaesenc \T1, \XMM8, \XMM8
-
-
-        #######################################################################
-
-        vmovdqu         HashKey_8(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \T2, \T4             # T4 = a1*b1
-        vpclmulqdq      $0x00, \T5, \T2, \T7             # T7 = a0*b0
-
-        vpshufd         $0b01001110, \T2, \T6
-        vpxor           \T2, \T6, \T6
-
-        vmovdqu         HashKey_8_k(arg2), \T5
-        vpclmulqdq      $0x00, \T5, \T6, \T6
-
-                vmovdqu 16*3(arg1), \T1
-                vaesenc \T1, \XMM1, \XMM1
-                vaesenc \T1, \XMM2, \XMM2
-                vaesenc \T1, \XMM3, \XMM3
-                vaesenc \T1, \XMM4, \XMM4
-                vaesenc \T1, \XMM5, \XMM5
-                vaesenc \T1, \XMM6, \XMM6
-                vaesenc \T1, \XMM7, \XMM7
-                vaesenc \T1, \XMM8, \XMM8
-
-        vmovdqa         TMP2(%rsp), \T1
-        vmovdqu         HashKey_7(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \T1, \T3
-        vpxor           \T3, \T4, \T4
-        vpclmulqdq      $0x00, \T5, \T1, \T3
-        vpxor           \T3, \T7, \T7
-
-        vpshufd         $0b01001110, \T1, \T3
-        vpxor           \T1, \T3, \T3
-        vmovdqu         HashKey_7_k(arg2), \T5
-        vpclmulqdq      $0x10, \T5, \T3, \T3
-        vpxor           \T3, \T6, \T6
-
-                vmovdqu 16*4(arg1), \T1
-                vaesenc \T1, \XMM1, \XMM1
-                vaesenc \T1, \XMM2, \XMM2
-                vaesenc \T1, \XMM3, \XMM3
-                vaesenc \T1, \XMM4, \XMM4
-                vaesenc \T1, \XMM5, \XMM5
-                vaesenc \T1, \XMM6, \XMM6
-                vaesenc \T1, \XMM7, \XMM7
-                vaesenc \T1, \XMM8, \XMM8
-
-        #######################################################################
-
-        vmovdqa         TMP3(%rsp), \T1
-        vmovdqu         HashKey_6(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \T1, \T3
-        vpxor           \T3, \T4, \T4
-        vpclmulqdq      $0x00, \T5, \T1, \T3
-        vpxor           \T3, \T7, \T7
-
-        vpshufd         $0b01001110, \T1, \T3
-        vpxor           \T1, \T3, \T3
-        vmovdqu         HashKey_6_k(arg2), \T5
-        vpclmulqdq      $0x10, \T5, \T3, \T3
-        vpxor           \T3, \T6, \T6
-
-                vmovdqu 16*5(arg1), \T1
-                vaesenc \T1, \XMM1, \XMM1
-                vaesenc \T1, \XMM2, \XMM2
-                vaesenc \T1, \XMM3, \XMM3
-                vaesenc \T1, \XMM4, \XMM4
-                vaesenc \T1, \XMM5, \XMM5
-                vaesenc \T1, \XMM6, \XMM6
-                vaesenc \T1, \XMM7, \XMM7
-                vaesenc \T1, \XMM8, \XMM8
-
-        vmovdqa         TMP4(%rsp), \T1
-        vmovdqu         HashKey_5(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \T1, \T3
-        vpxor           \T3, \T4, \T4
-        vpclmulqdq      $0x00, \T5, \T1, \T3
-        vpxor           \T3, \T7, \T7
-
-        vpshufd         $0b01001110, \T1, \T3
-        vpxor           \T1, \T3, \T3
-        vmovdqu         HashKey_5_k(arg2), \T5
-        vpclmulqdq      $0x10, \T5, \T3, \T3
-        vpxor           \T3, \T6, \T6
-
-                vmovdqu 16*6(arg1), \T1
-                vaesenc \T1, \XMM1, \XMM1
-                vaesenc \T1, \XMM2, \XMM2
-                vaesenc \T1, \XMM3, \XMM3
-                vaesenc \T1, \XMM4, \XMM4
-                vaesenc \T1, \XMM5, \XMM5
-                vaesenc \T1, \XMM6, \XMM6
-                vaesenc \T1, \XMM7, \XMM7
-                vaesenc \T1, \XMM8, \XMM8
-
-
-        vmovdqa         TMP5(%rsp), \T1
-        vmovdqu         HashKey_4(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \T1, \T3
-        vpxor           \T3, \T4, \T4
-        vpclmulqdq      $0x00, \T5, \T1, \T3
-        vpxor           \T3, \T7, \T7
-
-        vpshufd         $0b01001110, \T1, \T3
-        vpxor           \T1, \T3, \T3
-        vmovdqu         HashKey_4_k(arg2), \T5
-        vpclmulqdq      $0x10, \T5, \T3, \T3
-        vpxor           \T3, \T6, \T6
-
-                vmovdqu 16*7(arg1), \T1
-                vaesenc \T1, \XMM1, \XMM1
-                vaesenc \T1, \XMM2, \XMM2
-                vaesenc \T1, \XMM3, \XMM3
-                vaesenc \T1, \XMM4, \XMM4
-                vaesenc \T1, \XMM5, \XMM5
-                vaesenc \T1, \XMM6, \XMM6
-                vaesenc \T1, \XMM7, \XMM7
-                vaesenc \T1, \XMM8, \XMM8
-
-        vmovdqa         TMP6(%rsp), \T1
-        vmovdqu         HashKey_3(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \T1, \T3
-        vpxor           \T3, \T4, \T4
-        vpclmulqdq      $0x00, \T5, \T1, \T3
-        vpxor           \T3, \T7, \T7
-
-        vpshufd         $0b01001110, \T1, \T3
-        vpxor           \T1, \T3, \T3
-        vmovdqu         HashKey_3_k(arg2), \T5
-        vpclmulqdq      $0x10, \T5, \T3, \T3
-        vpxor           \T3, \T6, \T6
-
-
-                vmovdqu 16*8(arg1), \T1
-                vaesenc \T1, \XMM1, \XMM1
-                vaesenc \T1, \XMM2, \XMM2
-                vaesenc \T1, \XMM3, \XMM3
-                vaesenc \T1, \XMM4, \XMM4
-                vaesenc \T1, \XMM5, \XMM5
-                vaesenc \T1, \XMM6, \XMM6
-                vaesenc \T1, \XMM7, \XMM7
-                vaesenc \T1, \XMM8, \XMM8
-
-        vmovdqa         TMP7(%rsp), \T1
-        vmovdqu         HashKey_2(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \T1, \T3
-        vpxor           \T3, \T4, \T4
-        vpclmulqdq      $0x00, \T5, \T1, \T3
-        vpxor           \T3, \T7, \T7
-
-        vpshufd         $0b01001110, \T1, \T3
-        vpxor           \T1, \T3, \T3
-        vmovdqu         HashKey_2_k(arg2), \T5
-        vpclmulqdq      $0x10, \T5, \T3, \T3
-        vpxor           \T3, \T6, \T6
-
-        #######################################################################
-
-                vmovdqu 16*9(arg1), \T5
-                vaesenc \T5, \XMM1, \XMM1
-                vaesenc \T5, \XMM2, \XMM2
-                vaesenc \T5, \XMM3, \XMM3
-                vaesenc \T5, \XMM4, \XMM4
-                vaesenc \T5, \XMM5, \XMM5
-                vaesenc \T5, \XMM6, \XMM6
-                vaesenc \T5, \XMM7, \XMM7
-                vaesenc \T5, \XMM8, \XMM8
-
-        vmovdqa         TMP8(%rsp), \T1
-        vmovdqu         HashKey(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \T1, \T3
-        vpxor           \T3, \T4, \T4
-        vpclmulqdq      $0x00, \T5, \T1, \T3
-        vpxor           \T3, \T7, \T7
-
-        vpshufd         $0b01001110, \T1, \T3
-        vpxor           \T1, \T3, \T3
-        vmovdqu         HashKey_k(arg2), \T5
-        vpclmulqdq      $0x10, \T5, \T3, \T3
-        vpxor           \T3, \T6, \T6
-
-        vpxor           \T4, \T6, \T6
-        vpxor           \T7, \T6, \T6
-
-                vmovdqu 16*10(arg1), \T5
-
-        i = 11
-        setreg
-.rep (\REP-9)
-
-        vaesenc \T5, \XMM1, \XMM1
-        vaesenc \T5, \XMM2, \XMM2
-        vaesenc \T5, \XMM3, \XMM3
-        vaesenc \T5, \XMM4, \XMM4
-        vaesenc \T5, \XMM5, \XMM5
-        vaesenc \T5, \XMM6, \XMM6
-        vaesenc \T5, \XMM7, \XMM7
-        vaesenc \T5, \XMM8, \XMM8
-
-        vmovdqu 16*i(arg1), \T5
-        i = i + 1
-        setreg
-.endr
-
-	i = 0
-	j = 1
-	setreg
-.rep 8
-		vpxor	16*i(arg4, %r11), \T5, \T2
-                .if \ENC_DEC == ENC
-                vaesenclast     \T2, reg_j, reg_j
-                .else
-                vaesenclast     \T2, reg_j, \T3
-                vmovdqu 16*i(arg4, %r11), reg_j
-                vmovdqu \T3, 16*i(arg3, %r11)
-                .endif
-	i = (i+1)
-	j = (j+1)
-	setreg
-.endr
-	#######################################################################
-
-
-	vpslldq	$8, \T6, \T3				# shift-L T3 2 DWs
-	vpsrldq	$8, \T6, \T6				# shift-R T2 2 DWs
-	vpxor	\T3, \T7, \T7
-	vpxor	\T4, \T6, \T6				# accumulate the results in T6:T7
-
-
-
-	#######################################################################
-	#first phase of the reduction
-	#######################################################################
-        vpslld  $31, \T7, \T2                           # packed right shifting << 31
-        vpslld  $30, \T7, \T3                           # packed right shifting shift << 30
-        vpslld  $25, \T7, \T4                           # packed right shifting shift << 25
-
-        vpxor   \T3, \T2, \T2                           # xor the shifted versions
-        vpxor   \T4, \T2, \T2
-
-        vpsrldq $4, \T2, \T1                            # shift-R T1 1 DW
-
-        vpslldq $12, \T2, \T2                           # shift-L T2 3 DWs
-        vpxor   \T2, \T7, \T7                           # first phase of the reduction complete
-	#######################################################################
-                .if \ENC_DEC == ENC
-		vmovdqu	 \XMM1,	16*0(arg3,%r11)		# Write to the Ciphertext buffer
-		vmovdqu	 \XMM2,	16*1(arg3,%r11)		# Write to the Ciphertext buffer
-		vmovdqu	 \XMM3,	16*2(arg3,%r11)		# Write to the Ciphertext buffer
-		vmovdqu	 \XMM4,	16*3(arg3,%r11)		# Write to the Ciphertext buffer
-		vmovdqu	 \XMM5,	16*4(arg3,%r11)		# Write to the Ciphertext buffer
-		vmovdqu	 \XMM6,	16*5(arg3,%r11)		# Write to the Ciphertext buffer
-		vmovdqu	 \XMM7,	16*6(arg3,%r11)		# Write to the Ciphertext buffer
-		vmovdqu	 \XMM8,	16*7(arg3,%r11)		# Write to the Ciphertext buffer
-                .endif
-
-	#######################################################################
-	#second phase of the reduction
-        vpsrld  $1, \T7, \T2                            # packed left shifting >> 1
-        vpsrld  $2, \T7, \T3                            # packed left shifting >> 2
-        vpsrld  $7, \T7, \T4                            # packed left shifting >> 7
-        vpxor   \T3, \T2, \T2                           # xor the shifted versions
-        vpxor   \T4, \T2, \T2
-
-        vpxor   \T1, \T2, \T2
-        vpxor   \T2, \T7, \T7
-        vpxor   \T7, \T6, \T6                           # the result is in T6
-	#######################################################################
-
-		vpshufb	SHUF_MASK(%rip), \XMM1, \XMM1	# perform a 16Byte swap
-		vpshufb	SHUF_MASK(%rip), \XMM2, \XMM2	# perform a 16Byte swap
-		vpshufb	SHUF_MASK(%rip), \XMM3, \XMM3	# perform a 16Byte swap
-		vpshufb	SHUF_MASK(%rip), \XMM4, \XMM4	# perform a 16Byte swap
-		vpshufb	SHUF_MASK(%rip), \XMM5, \XMM5	# perform a 16Byte swap
-		vpshufb	SHUF_MASK(%rip), \XMM6, \XMM6	# perform a 16Byte swap
-		vpshufb	SHUF_MASK(%rip), \XMM7, \XMM7	# perform a 16Byte swap
-		vpshufb	SHUF_MASK(%rip), \XMM8, \XMM8	# perform a 16Byte swap
-
-
-	vpxor	\T6, \XMM1, \XMM1
-
-
-
-.endm
-
-
-# GHASH the last 4 ciphertext blocks.
-.macro  GHASH_LAST_8_AVX T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8
-
-        ## Karatsuba Method
-
-
-        vpshufd         $0b01001110, \XMM1, \T2
-        vpxor           \XMM1, \T2, \T2
-        vmovdqu         HashKey_8(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \XMM1, \T6
-        vpclmulqdq      $0x00, \T5, \XMM1, \T7
-
-        vmovdqu         HashKey_8_k(arg2), \T3
-        vpclmulqdq      $0x00, \T3, \T2, \XMM1
-
-        ######################
-
-        vpshufd         $0b01001110, \XMM2, \T2
-        vpxor           \XMM2, \T2, \T2
-        vmovdqu         HashKey_7(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \XMM2, \T4
-        vpxor           \T4, \T6, \T6
-
-        vpclmulqdq      $0x00, \T5, \XMM2, \T4
-        vpxor           \T4, \T7, \T7
-
-        vmovdqu         HashKey_7_k(arg2), \T3
-        vpclmulqdq      $0x00, \T3, \T2, \T2
-        vpxor           \T2, \XMM1, \XMM1
-
-        ######################
-
-        vpshufd         $0b01001110, \XMM3, \T2
-        vpxor           \XMM3, \T2, \T2
-        vmovdqu         HashKey_6(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \XMM3, \T4
-        vpxor           \T4, \T6, \T6
-
-        vpclmulqdq      $0x00, \T5, \XMM3, \T4
-        vpxor           \T4, \T7, \T7
-
-        vmovdqu         HashKey_6_k(arg2), \T3
-        vpclmulqdq      $0x00, \T3, \T2, \T2
-        vpxor           \T2, \XMM1, \XMM1
-
-        ######################
-
-        vpshufd         $0b01001110, \XMM4, \T2
-        vpxor           \XMM4, \T2, \T2
-        vmovdqu         HashKey_5(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \XMM4, \T4
-        vpxor           \T4, \T6, \T6
-
-        vpclmulqdq      $0x00, \T5, \XMM4, \T4
-        vpxor           \T4, \T7, \T7
-
-        vmovdqu         HashKey_5_k(arg2), \T3
-        vpclmulqdq      $0x00, \T3, \T2, \T2
-        vpxor           \T2, \XMM1, \XMM1
-
-        ######################
-
-        vpshufd         $0b01001110, \XMM5, \T2
-        vpxor           \XMM5, \T2, \T2
-        vmovdqu         HashKey_4(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \XMM5, \T4
-        vpxor           \T4, \T6, \T6
-
-        vpclmulqdq      $0x00, \T5, \XMM5, \T4
-        vpxor           \T4, \T7, \T7
-
-        vmovdqu         HashKey_4_k(arg2), \T3
-        vpclmulqdq      $0x00, \T3, \T2, \T2
-        vpxor           \T2, \XMM1, \XMM1
-
-        ######################
-
-        vpshufd         $0b01001110, \XMM6, \T2
-        vpxor           \XMM6, \T2, \T2
-        vmovdqu         HashKey_3(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \XMM6, \T4
-        vpxor           \T4, \T6, \T6
-
-        vpclmulqdq      $0x00, \T5, \XMM6, \T4
-        vpxor           \T4, \T7, \T7
-
-        vmovdqu         HashKey_3_k(arg2), \T3
-        vpclmulqdq      $0x00, \T3, \T2, \T2
-        vpxor           \T2, \XMM1, \XMM1
-
-        ######################
-
-        vpshufd         $0b01001110, \XMM7, \T2
-        vpxor           \XMM7, \T2, \T2
-        vmovdqu         HashKey_2(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \XMM7, \T4
-        vpxor           \T4, \T6, \T6
-
-        vpclmulqdq      $0x00, \T5, \XMM7, \T4
-        vpxor           \T4, \T7, \T7
-
-        vmovdqu         HashKey_2_k(arg2), \T3
-        vpclmulqdq      $0x00, \T3, \T2, \T2
-        vpxor           \T2, \XMM1, \XMM1
-
-        ######################
-
-        vpshufd         $0b01001110, \XMM8, \T2
-        vpxor           \XMM8, \T2, \T2
-        vmovdqu         HashKey(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \XMM8, \T4
-        vpxor           \T4, \T6, \T6
-
-        vpclmulqdq      $0x00, \T5, \XMM8, \T4
-        vpxor           \T4, \T7, \T7
-
-        vmovdqu         HashKey_k(arg2), \T3
-        vpclmulqdq      $0x00, \T3, \T2, \T2
-
-        vpxor           \T2, \XMM1, \XMM1
-        vpxor           \T6, \XMM1, \XMM1
-        vpxor           \T7, \XMM1, \T2
-
-
-
-
-        vpslldq $8, \T2, \T4
-        vpsrldq $8, \T2, \T2
-
-        vpxor   \T4, \T7, \T7
-        vpxor   \T2, \T6, \T6   # <T6:T7> holds the result of
-				# the accumulated carry-less multiplications
-
-        #######################################################################
-        #first phase of the reduction
-        vpslld  $31, \T7, \T2   # packed right shifting << 31
-        vpslld  $30, \T7, \T3   # packed right shifting shift << 30
-        vpslld  $25, \T7, \T4   # packed right shifting shift << 25
-
-        vpxor   \T3, \T2, \T2   # xor the shifted versions
-        vpxor   \T4, \T2, \T2
-
-        vpsrldq $4, \T2, \T1    # shift-R T1 1 DW
-
-        vpslldq $12, \T2, \T2   # shift-L T2 3 DWs
-        vpxor   \T2, \T7, \T7   # first phase of the reduction complete
-        #######################################################################
-
-
-        #second phase of the reduction
-        vpsrld  $1, \T7, \T2    # packed left shifting >> 1
-        vpsrld  $2, \T7, \T3    # packed left shifting >> 2
-        vpsrld  $7, \T7, \T4    # packed left shifting >> 7
-        vpxor   \T3, \T2, \T2   # xor the shifted versions
-        vpxor   \T4, \T2, \T2
-
-        vpxor   \T1, \T2, \T2
-        vpxor   \T2, \T7, \T7
-        vpxor   \T7, \T6, \T6   # the result is in T6
-
-.endm
-
-#############################################################
-#void   aesni_gcm_precomp_avx_gen2
-#        (gcm_data     *my_ctx_data,
-#         gcm_context_data *data,
-#        u8     *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */
-#        u8      *iv, /* Pre-counter block j0: 4 byte salt
-#			(from Security Association) concatenated with 8 byte
-#			Initialisation Vector (from IPSec ESP Payload)
-#			concatenated with 0x00000001. 16-byte aligned pointer. */
-#        const   u8 *aad, /* Additional Authentication Data (AAD)*/
-#        u64     aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
-#############################################################
-ENTRY(aesni_gcm_init_avx_gen2)
-        FUNC_SAVE
-        INIT GHASH_MUL_AVX, PRECOMPUTE_AVX
-        FUNC_RESTORE
-        ret
-ENDPROC(aesni_gcm_init_avx_gen2)
-
-###############################################################################
-#void   aesni_gcm_enc_update_avx_gen2(
-#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
-#        gcm_context_data *data,
-#        u8      *out, /* Ciphertext output. Encrypt in-place is allowed.  */
-#        const   u8 *in, /* Plaintext input */
-#        u64     plaintext_len) /* Length of data in Bytes for encryption. */
-###############################################################################
-ENTRY(aesni_gcm_enc_update_avx_gen2)
-        FUNC_SAVE
-        mov     keysize, %eax
-        cmp     $32, %eax
-        je      key_256_enc_update
-        cmp     $16, %eax
-        je      key_128_enc_update
-        # must be 192
-        GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 11
-        FUNC_RESTORE
-        ret
-key_128_enc_update:
-        GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 9
-        FUNC_RESTORE
-        ret
-key_256_enc_update:
-        GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13
-        FUNC_RESTORE
-        ret
-ENDPROC(aesni_gcm_enc_update_avx_gen2)
-
-###############################################################################
-#void   aesni_gcm_dec_update_avx_gen2(
-#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
-#        gcm_context_data *data,
-#        u8      *out, /* Plaintext output. Decrypt in-place is allowed.  */
-#        const   u8 *in, /* Ciphertext input */
-#        u64     plaintext_len) /* Length of data in Bytes for encryption. */
-###############################################################################
-ENTRY(aesni_gcm_dec_update_avx_gen2)
-        FUNC_SAVE
-        mov     keysize,%eax
-        cmp     $32, %eax
-        je      key_256_dec_update
-        cmp     $16, %eax
-        je      key_128_dec_update
-        # must be 192
-        GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 11
-        FUNC_RESTORE
-        ret
-key_128_dec_update:
-        GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 9
-        FUNC_RESTORE
-        ret
-key_256_dec_update:
-        GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13
-        FUNC_RESTORE
-        ret
-ENDPROC(aesni_gcm_dec_update_avx_gen2)
-
-###############################################################################
-#void   aesni_gcm_finalize_avx_gen2(
-#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
-#        gcm_context_data *data,
-#        u8      *auth_tag, /* Authenticated Tag output. */
-#        u64     auth_tag_len)# /* Authenticated Tag Length in bytes.
-#				Valid values are 16 (most likely), 12 or 8. */
-###############################################################################
-ENTRY(aesni_gcm_finalize_avx_gen2)
-        FUNC_SAVE
-        mov	keysize,%eax
-        cmp     $32, %eax
-        je      key_256_finalize
-        cmp     $16, %eax
-        je      key_128_finalize
-        # must be 192
-        GCM_COMPLETE GHASH_MUL_AVX, 11, arg3, arg4
-        FUNC_RESTORE
-        ret
-key_128_finalize:
-        GCM_COMPLETE GHASH_MUL_AVX, 9, arg3, arg4
-        FUNC_RESTORE
-        ret
-key_256_finalize:
-        GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4
-        FUNC_RESTORE
-        ret
-ENDPROC(aesni_gcm_finalize_avx_gen2)
-
-#endif /* CONFIG_AS_AVX */
-
-#ifdef CONFIG_AS_AVX2
-###############################################################################
-# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
-# Input: A and B (128-bits each, bit-reflected)
-# Output: C = A*B*x mod poly, (i.e. >>1 )
-# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input
-# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly.
-###############################################################################
-.macro  GHASH_MUL_AVX2 GH HK T1 T2 T3 T4 T5
-
-        vpclmulqdq      $0x11,\HK,\GH,\T1      # T1 = a1*b1
-        vpclmulqdq      $0x00,\HK,\GH,\T2      # T2 = a0*b0
-        vpclmulqdq      $0x01,\HK,\GH,\T3      # T3 = a1*b0
-        vpclmulqdq      $0x10,\HK,\GH,\GH      # GH = a0*b1
-        vpxor           \T3, \GH, \GH
-
-
-        vpsrldq         $8 , \GH, \T3          # shift-R GH 2 DWs
-        vpslldq         $8 , \GH, \GH          # shift-L GH 2 DWs
-
-        vpxor           \T3, \T1, \T1
-        vpxor           \T2, \GH, \GH
-
-        #######################################################################
-        #first phase of the reduction
-        vmovdqa         POLY2(%rip), \T3
-
-        vpclmulqdq      $0x01, \GH, \T3, \T2
-        vpslldq         $8, \T2, \T2           # shift-L T2 2 DWs
-
-        vpxor           \T2, \GH, \GH          # first phase of the reduction complete
-        #######################################################################
-        #second phase of the reduction
-        vpclmulqdq      $0x00, \GH, \T3, \T2
-        vpsrldq         $4, \T2, \T2           # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R)
-
-        vpclmulqdq      $0x10, \GH, \T3, \GH
-        vpslldq         $4, \GH, \GH           # shift-L GH 1 DW (Shift-L 1-DW to obtain result with no shifts)
-
-        vpxor           \T2, \GH, \GH          # second phase of the reduction complete
-        #######################################################################
-        vpxor           \T1, \GH, \GH          # the result is in GH
-
-
-.endm
-
-.macro PRECOMPUTE_AVX2 HK T1 T2 T3 T4 T5 T6
-
-        # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
-        vmovdqa  \HK, \T5
-        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^2<<1 mod poly
-        vmovdqu  \T5, HashKey_2(arg2)                       #  [HashKey_2] = HashKey^2<<1 mod poly
-
-        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^3<<1 mod poly
-        vmovdqu  \T5, HashKey_3(arg2)
-
-        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^4<<1 mod poly
-        vmovdqu  \T5, HashKey_4(arg2)
-
-        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^5<<1 mod poly
-        vmovdqu  \T5, HashKey_5(arg2)
-
-        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^6<<1 mod poly
-        vmovdqu  \T5, HashKey_6(arg2)
-
-        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^7<<1 mod poly
-        vmovdqu  \T5, HashKey_7(arg2)
-
-        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^8<<1 mod poly
-        vmovdqu  \T5, HashKey_8(arg2)
-
-.endm
-
-## if a = number of total plaintext bytes
-## b = floor(a/16)
-## num_initial_blocks = b mod 4#
-## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext
-## r10, r11, r12, rax are clobbered
-## arg1, arg3, arg4, r14 are used as a pointer only, not modified
-
-.macro INITIAL_BLOCKS_AVX2 REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER
-	i = (8-\num_initial_blocks)
-	setreg
-	vmovdqu AadHash(arg2), reg_i
-
-	# start AES for num_initial_blocks blocks
-	vmovdqu CurCount(arg2), \CTR
-
-	i = (9-\num_initial_blocks)
-	setreg
-.rep \num_initial_blocks
-                vpaddd  ONE(%rip), \CTR, \CTR   # INCR Y0
-                vmovdqa \CTR, reg_i
-                vpshufb SHUF_MASK(%rip), reg_i, reg_i     # perform a 16Byte swap
-	i = (i+1)
-	setreg
-.endr
-
-	vmovdqa  (arg1), \T_key
-	i = (9-\num_initial_blocks)
-	setreg
-.rep \num_initial_blocks
-                vpxor   \T_key, reg_i, reg_i
-	i = (i+1)
-	setreg
-.endr
-
-	j = 1
-	setreg
-.rep \REP
-	vmovdqa  16*j(arg1), \T_key
-	i = (9-\num_initial_blocks)
-	setreg
-.rep \num_initial_blocks
-        vaesenc \T_key, reg_i, reg_i
-	i = (i+1)
-	setreg
-.endr
-
-	j = (j+1)
-	setreg
-.endr
-
-
-	vmovdqa  16*j(arg1), \T_key
-	i = (9-\num_initial_blocks)
-	setreg
-.rep \num_initial_blocks
-        vaesenclast      \T_key, reg_i, reg_i
-	i = (i+1)
-	setreg
-.endr
-
-	i = (9-\num_initial_blocks)
-	setreg
-.rep \num_initial_blocks
-                vmovdqu (arg4, %r11), \T1
-                vpxor   \T1, reg_i, reg_i
-                vmovdqu reg_i, (arg3 , %r11)           # write back ciphertext for
-						       # num_initial_blocks blocks
-                add     $16, %r11
-.if  \ENC_DEC == DEC
-                vmovdqa \T1, reg_i
-.endif
-                vpshufb SHUF_MASK(%rip), reg_i, reg_i  # prepare ciphertext for GHASH computations
-	i = (i+1)
-	setreg
-.endr
-
-
-	i = (8-\num_initial_blocks)
-	j = (9-\num_initial_blocks)
-	setreg
-
-.rep \num_initial_blocks
-        vpxor    reg_i, reg_j, reg_j
-        GHASH_MUL_AVX2       reg_j, \T2, \T1, \T3, \T4, \T5, \T6  # apply GHASH on num_initial_blocks blocks
-	i = (i+1)
-	j = (j+1)
-	setreg
-.endr
-        # XMM8 has the combined result here
-
-        vmovdqa  \XMM8, TMP1(%rsp)
-        vmovdqa  \XMM8, \T3
-
-        cmp     $128, %r13
-        jl      _initial_blocks_done\@                  # no need for precomputed constants
-
-###############################################################################
-# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
-                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
-                vmovdqa  \CTR, \XMM1
-                vpshufb  SHUF_MASK(%rip), \XMM1, \XMM1  # perform a 16Byte swap
-
-                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
-                vmovdqa  \CTR, \XMM2
-                vpshufb  SHUF_MASK(%rip), \XMM2, \XMM2  # perform a 16Byte swap
-
-                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
-                vmovdqa  \CTR, \XMM3
-                vpshufb  SHUF_MASK(%rip), \XMM3, \XMM3  # perform a 16Byte swap
-
-                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
-                vmovdqa  \CTR, \XMM4
-                vpshufb  SHUF_MASK(%rip), \XMM4, \XMM4  # perform a 16Byte swap
-
-                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
-                vmovdqa  \CTR, \XMM5
-                vpshufb  SHUF_MASK(%rip), \XMM5, \XMM5  # perform a 16Byte swap
-
-                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
-                vmovdqa  \CTR, \XMM6
-                vpshufb  SHUF_MASK(%rip), \XMM6, \XMM6  # perform a 16Byte swap
-
-                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
-                vmovdqa  \CTR, \XMM7
-                vpshufb  SHUF_MASK(%rip), \XMM7, \XMM7  # perform a 16Byte swap
-
-                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
-                vmovdqa  \CTR, \XMM8
-                vpshufb  SHUF_MASK(%rip), \XMM8, \XMM8  # perform a 16Byte swap
-
-                vmovdqa  (arg1), \T_key
-                vpxor    \T_key, \XMM1, \XMM1
-                vpxor    \T_key, \XMM2, \XMM2
-                vpxor    \T_key, \XMM3, \XMM3
-                vpxor    \T_key, \XMM4, \XMM4
-                vpxor    \T_key, \XMM5, \XMM5
-                vpxor    \T_key, \XMM6, \XMM6
-                vpxor    \T_key, \XMM7, \XMM7
-                vpxor    \T_key, \XMM8, \XMM8
-
-		i = 1
-		setreg
-.rep    \REP       # do REP rounds
-                vmovdqa  16*i(arg1), \T_key
-                vaesenc  \T_key, \XMM1, \XMM1
-                vaesenc  \T_key, \XMM2, \XMM2
-                vaesenc  \T_key, \XMM3, \XMM3
-                vaesenc  \T_key, \XMM4, \XMM4
-                vaesenc  \T_key, \XMM5, \XMM5
-                vaesenc  \T_key, \XMM6, \XMM6
-                vaesenc  \T_key, \XMM7, \XMM7
-                vaesenc  \T_key, \XMM8, \XMM8
-		i = (i+1)
-		setreg
-.endr
-
-
-                vmovdqa  16*i(arg1), \T_key
-                vaesenclast  \T_key, \XMM1, \XMM1
-                vaesenclast  \T_key, \XMM2, \XMM2
-                vaesenclast  \T_key, \XMM3, \XMM3
-                vaesenclast  \T_key, \XMM4, \XMM4
-                vaesenclast  \T_key, \XMM5, \XMM5
-                vaesenclast  \T_key, \XMM6, \XMM6
-                vaesenclast  \T_key, \XMM7, \XMM7
-                vaesenclast  \T_key, \XMM8, \XMM8
-
-                vmovdqu  (arg4, %r11), \T1
-                vpxor    \T1, \XMM1, \XMM1
-                vmovdqu  \XMM1, (arg3 , %r11)
-                .if   \ENC_DEC == DEC
-                vmovdqa  \T1, \XMM1
-                .endif
-
-                vmovdqu  16*1(arg4, %r11), \T1
-                vpxor    \T1, \XMM2, \XMM2
-                vmovdqu  \XMM2, 16*1(arg3 , %r11)
-                .if   \ENC_DEC == DEC
-                vmovdqa  \T1, \XMM2
-                .endif
-
-                vmovdqu  16*2(arg4, %r11), \T1
-                vpxor    \T1, \XMM3, \XMM3
-                vmovdqu  \XMM3, 16*2(arg3 , %r11)
-                .if   \ENC_DEC == DEC
-                vmovdqa  \T1, \XMM3
-                .endif
-
-                vmovdqu  16*3(arg4, %r11), \T1
-                vpxor    \T1, \XMM4, \XMM4
-                vmovdqu  \XMM4, 16*3(arg3 , %r11)
-                .if   \ENC_DEC == DEC
-                vmovdqa  \T1, \XMM4
-                .endif
-
-                vmovdqu  16*4(arg4, %r11), \T1
-                vpxor    \T1, \XMM5, \XMM5
-                vmovdqu  \XMM5, 16*4(arg3 , %r11)
-                .if   \ENC_DEC == DEC
-                vmovdqa  \T1, \XMM5
-                .endif
-
-                vmovdqu  16*5(arg4, %r11), \T1
-                vpxor    \T1, \XMM6, \XMM6
-                vmovdqu  \XMM6, 16*5(arg3 , %r11)
-                .if   \ENC_DEC == DEC
-                vmovdqa  \T1, \XMM6
-                .endif
-
-                vmovdqu  16*6(arg4, %r11), \T1
-                vpxor    \T1, \XMM7, \XMM7
-                vmovdqu  \XMM7, 16*6(arg3 , %r11)
-                .if   \ENC_DEC == DEC
-                vmovdqa  \T1, \XMM7
-                .endif
-
-                vmovdqu  16*7(arg4, %r11), \T1
-                vpxor    \T1, \XMM8, \XMM8
-                vmovdqu  \XMM8, 16*7(arg3 , %r11)
-                .if   \ENC_DEC == DEC
-                vmovdqa  \T1, \XMM8
-                .endif
-
-                add     $128, %r11
-
-                vpshufb  SHUF_MASK(%rip), \XMM1, \XMM1     # perform a 16Byte swap
-                vpxor    TMP1(%rsp), \XMM1, \XMM1          # combine GHASHed value with
-							   # the corresponding ciphertext
-                vpshufb  SHUF_MASK(%rip), \XMM2, \XMM2     # perform a 16Byte swap
-                vpshufb  SHUF_MASK(%rip), \XMM3, \XMM3     # perform a 16Byte swap
-                vpshufb  SHUF_MASK(%rip), \XMM4, \XMM4     # perform a 16Byte swap
-                vpshufb  SHUF_MASK(%rip), \XMM5, \XMM5     # perform a 16Byte swap
-                vpshufb  SHUF_MASK(%rip), \XMM6, \XMM6     # perform a 16Byte swap
-                vpshufb  SHUF_MASK(%rip), \XMM7, \XMM7     # perform a 16Byte swap
-                vpshufb  SHUF_MASK(%rip), \XMM8, \XMM8     # perform a 16Byte swap
-
-###############################################################################
-
-_initial_blocks_done\@:
-
-
-.endm
-
-
-
-# encrypt 8 blocks at a time
-# ghash the 8 previously encrypted ciphertext blocks
-# arg1, arg3, arg4 are used as pointers only, not modified
-# r11 is the data offset value
-.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
-
-        vmovdqa \XMM1, \T2
-        vmovdqa \XMM2, TMP2(%rsp)
-        vmovdqa \XMM3, TMP3(%rsp)
-        vmovdqa \XMM4, TMP4(%rsp)
-        vmovdqa \XMM5, TMP5(%rsp)
-        vmovdqa \XMM6, TMP6(%rsp)
-        vmovdqa \XMM7, TMP7(%rsp)
-        vmovdqa \XMM8, TMP8(%rsp)
-
-.if \loop_idx == in_order
-                vpaddd  ONE(%rip), \CTR, \XMM1            # INCR CNT
-                vpaddd  ONE(%rip), \XMM1, \XMM2
-                vpaddd  ONE(%rip), \XMM2, \XMM3
-                vpaddd  ONE(%rip), \XMM3, \XMM4
-                vpaddd  ONE(%rip), \XMM4, \XMM5
-                vpaddd  ONE(%rip), \XMM5, \XMM6
-                vpaddd  ONE(%rip), \XMM6, \XMM7
-                vpaddd  ONE(%rip), \XMM7, \XMM8
-                vmovdqa \XMM8, \CTR
-
-                vpshufb SHUF_MASK(%rip), \XMM1, \XMM1     # perform a 16Byte swap
-                vpshufb SHUF_MASK(%rip), \XMM2, \XMM2     # perform a 16Byte swap
-                vpshufb SHUF_MASK(%rip), \XMM3, \XMM3     # perform a 16Byte swap
-                vpshufb SHUF_MASK(%rip), \XMM4, \XMM4     # perform a 16Byte swap
-                vpshufb SHUF_MASK(%rip), \XMM5, \XMM5     # perform a 16Byte swap
-                vpshufb SHUF_MASK(%rip), \XMM6, \XMM6     # perform a 16Byte swap
-                vpshufb SHUF_MASK(%rip), \XMM7, \XMM7     # perform a 16Byte swap
-                vpshufb SHUF_MASK(%rip), \XMM8, \XMM8     # perform a 16Byte swap
-.else
-                vpaddd  ONEf(%rip), \CTR, \XMM1            # INCR CNT
-                vpaddd  ONEf(%rip), \XMM1, \XMM2
-                vpaddd  ONEf(%rip), \XMM2, \XMM3
-                vpaddd  ONEf(%rip), \XMM3, \XMM4
-                vpaddd  ONEf(%rip), \XMM4, \XMM5
-                vpaddd  ONEf(%rip), \XMM5, \XMM6
-                vpaddd  ONEf(%rip), \XMM6, \XMM7
-                vpaddd  ONEf(%rip), \XMM7, \XMM8
-                vmovdqa \XMM8, \CTR
-.endif
-
-
-        #######################################################################
-
-                vmovdqu (arg1), \T1
-                vpxor   \T1, \XMM1, \XMM1
-                vpxor   \T1, \XMM2, \XMM2
-                vpxor   \T1, \XMM3, \XMM3
-                vpxor   \T1, \XMM4, \XMM4
-                vpxor   \T1, \XMM5, \XMM5
-                vpxor   \T1, \XMM6, \XMM6
-                vpxor   \T1, \XMM7, \XMM7
-                vpxor   \T1, \XMM8, \XMM8
-
-        #######################################################################
-
-
-
-
-
-                vmovdqu 16*1(arg1), \T1
-                vaesenc \T1, \XMM1, \XMM1
-                vaesenc \T1, \XMM2, \XMM2
-                vaesenc \T1, \XMM3, \XMM3
-                vaesenc \T1, \XMM4, \XMM4
-                vaesenc \T1, \XMM5, \XMM5
-                vaesenc \T1, \XMM6, \XMM6
-                vaesenc \T1, \XMM7, \XMM7
-                vaesenc \T1, \XMM8, \XMM8
-
-                vmovdqu 16*2(arg1), \T1
-                vaesenc \T1, \XMM1, \XMM1
-                vaesenc \T1, \XMM2, \XMM2
-                vaesenc \T1, \XMM3, \XMM3
-                vaesenc \T1, \XMM4, \XMM4
-                vaesenc \T1, \XMM5, \XMM5
-                vaesenc \T1, \XMM6, \XMM6
-                vaesenc \T1, \XMM7, \XMM7
-                vaesenc \T1, \XMM8, \XMM8
-
-
-        #######################################################################
-
-        vmovdqu         HashKey_8(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \T2, \T4              # T4 = a1*b1
-        vpclmulqdq      $0x00, \T5, \T2, \T7              # T7 = a0*b0
-        vpclmulqdq      $0x01, \T5, \T2, \T6              # T6 = a1*b0
-        vpclmulqdq      $0x10, \T5, \T2, \T5              # T5 = a0*b1
-        vpxor           \T5, \T6, \T6
-
-                vmovdqu 16*3(arg1), \T1
-                vaesenc \T1, \XMM1, \XMM1
-                vaesenc \T1, \XMM2, \XMM2
-                vaesenc \T1, \XMM3, \XMM3
-                vaesenc \T1, \XMM4, \XMM4
-                vaesenc \T1, \XMM5, \XMM5
-                vaesenc \T1, \XMM6, \XMM6
-                vaesenc \T1, \XMM7, \XMM7
-                vaesenc \T1, \XMM8, \XMM8
-
-        vmovdqa         TMP2(%rsp), \T1
-        vmovdqu         HashKey_7(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \T1, \T3
-        vpxor           \T3, \T4, \T4
-
-        vpclmulqdq      $0x00, \T5, \T1, \T3
-        vpxor           \T3, \T7, \T7
-
-        vpclmulqdq      $0x01, \T5, \T1, \T3
-        vpxor           \T3, \T6, \T6
-
-        vpclmulqdq      $0x10, \T5, \T1, \T3
-        vpxor           \T3, \T6, \T6
-
-                vmovdqu 16*4(arg1), \T1
-                vaesenc \T1, \XMM1, \XMM1
-                vaesenc \T1, \XMM2, \XMM2
-                vaesenc \T1, \XMM3, \XMM3
-                vaesenc \T1, \XMM4, \XMM4
-                vaesenc \T1, \XMM5, \XMM5
-                vaesenc \T1, \XMM6, \XMM6
-                vaesenc \T1, \XMM7, \XMM7
-                vaesenc \T1, \XMM8, \XMM8
-
-        #######################################################################
-
-        vmovdqa         TMP3(%rsp), \T1
-        vmovdqu         HashKey_6(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \T1, \T3
-        vpxor           \T3, \T4, \T4
-
-        vpclmulqdq      $0x00, \T5, \T1, \T3
-        vpxor           \T3, \T7, \T7
-
-        vpclmulqdq      $0x01, \T5, \T1, \T3
-        vpxor           \T3, \T6, \T6
-
-        vpclmulqdq      $0x10, \T5, \T1, \T3
-        vpxor           \T3, \T6, \T6
-
-                vmovdqu 16*5(arg1), \T1
-                vaesenc \T1, \XMM1, \XMM1
-                vaesenc \T1, \XMM2, \XMM2
-                vaesenc \T1, \XMM3, \XMM3
-                vaesenc \T1, \XMM4, \XMM4
-                vaesenc \T1, \XMM5, \XMM5
-                vaesenc \T1, \XMM6, \XMM6
-                vaesenc \T1, \XMM7, \XMM7
-                vaesenc \T1, \XMM8, \XMM8
-
-        vmovdqa         TMP4(%rsp), \T1
-        vmovdqu         HashKey_5(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \T1, \T3
-        vpxor           \T3, \T4, \T4
-
-        vpclmulqdq      $0x00, \T5, \T1, \T3
-        vpxor           \T3, \T7, \T7
-
-        vpclmulqdq      $0x01, \T5, \T1, \T3
-        vpxor           \T3, \T6, \T6
-
-        vpclmulqdq      $0x10, \T5, \T1, \T3
-        vpxor           \T3, \T6, \T6
-
-                vmovdqu 16*6(arg1), \T1
-                vaesenc \T1, \XMM1, \XMM1
-                vaesenc \T1, \XMM2, \XMM2
-                vaesenc \T1, \XMM3, \XMM3
-                vaesenc \T1, \XMM4, \XMM4
-                vaesenc \T1, \XMM5, \XMM5
-                vaesenc \T1, \XMM6, \XMM6
-                vaesenc \T1, \XMM7, \XMM7
-                vaesenc \T1, \XMM8, \XMM8
-
-
-        vmovdqa         TMP5(%rsp), \T1
-        vmovdqu         HashKey_4(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \T1, \T3
-        vpxor           \T3, \T4, \T4
-
-        vpclmulqdq      $0x00, \T5, \T1, \T3
-        vpxor           \T3, \T7, \T7
-
-        vpclmulqdq      $0x01, \T5, \T1, \T3
-        vpxor           \T3, \T6, \T6
-
-        vpclmulqdq      $0x10, \T5, \T1, \T3
-        vpxor           \T3, \T6, \T6
-
-                vmovdqu 16*7(arg1), \T1
-                vaesenc \T1, \XMM1, \XMM1
-                vaesenc \T1, \XMM2, \XMM2
-                vaesenc \T1, \XMM3, \XMM3
-                vaesenc \T1, \XMM4, \XMM4
-                vaesenc \T1, \XMM5, \XMM5
-                vaesenc \T1, \XMM6, \XMM6
-                vaesenc \T1, \XMM7, \XMM7
-                vaesenc \T1, \XMM8, \XMM8
-
-        vmovdqa         TMP6(%rsp), \T1
-        vmovdqu         HashKey_3(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \T1, \T3
-        vpxor           \T3, \T4, \T4
-
-        vpclmulqdq      $0x00, \T5, \T1, \T3
-        vpxor           \T3, \T7, \T7
-
-        vpclmulqdq      $0x01, \T5, \T1, \T3
-        vpxor           \T3, \T6, \T6
-
-        vpclmulqdq      $0x10, \T5, \T1, \T3
-        vpxor           \T3, \T6, \T6
-
-                vmovdqu 16*8(arg1), \T1
-                vaesenc \T1, \XMM1, \XMM1
-                vaesenc \T1, \XMM2, \XMM2
-                vaesenc \T1, \XMM3, \XMM3
-                vaesenc \T1, \XMM4, \XMM4
-                vaesenc \T1, \XMM5, \XMM5
-                vaesenc \T1, \XMM6, \XMM6
-                vaesenc \T1, \XMM7, \XMM7
-                vaesenc \T1, \XMM8, \XMM8
-
-        vmovdqa         TMP7(%rsp), \T1
-        vmovdqu         HashKey_2(arg2), \T5
-        vpclmulqdq      $0x11, \T5, \T1, \T3
-        vpxor           \T3, \T4, \T4
-
-        vpclmulqdq      $0x00, \T5, \T1, \T3
-        vpxor           \T3, \T7, \T7
-
-        vpclmulqdq      $0x01, \T5, \T1, \T3
-        vpxor           \T3, \T6, \T6
-
-        vpclmulqdq      $0x10, \T5, \T1, \T3
-        vpxor           \T3, \T6, \T6
-
-
-        #######################################################################
-
-                vmovdqu 16*9(arg1), \T5
-                vaesenc \T5, \XMM1, \XMM1
-                vaesenc \T5, \XMM2, \XMM2
-                vaesenc \T5, \XMM3, \XMM3
-                vaesenc \T5, \XMM4, \XMM4
-                vaesenc \T5, \XMM5, \XMM5
-                vaesenc \T5, \XMM6, \XMM6
-                vaesenc \T5, \XMM7, \XMM7
-                vaesenc \T5, \XMM8, \XMM8
-
-        vmovdqa         TMP8(%rsp), \T1
-        vmovdqu         HashKey(arg2), \T5
-
-        vpclmulqdq      $0x00, \T5, \T1, \T3
-        vpxor           \T3, \T7, \T7
-
-        vpclmulqdq      $0x01, \T5, \T1, \T3
-        vpxor           \T3, \T6, \T6
-
-        vpclmulqdq      $0x10, \T5, \T1, \T3
-        vpxor           \T3, \T6, \T6
-
-        vpclmulqdq      $0x11, \T5, \T1, \T3
-        vpxor           \T3, \T4, \T1
-
-
-                vmovdqu 16*10(arg1), \T5
-
-        i = 11
-        setreg
-.rep (\REP-9)
-        vaesenc \T5, \XMM1, \XMM1
-        vaesenc \T5, \XMM2, \XMM2
-        vaesenc \T5, \XMM3, \XMM3
-        vaesenc \T5, \XMM4, \XMM4
-        vaesenc \T5, \XMM5, \XMM5
-        vaesenc \T5, \XMM6, \XMM6
-        vaesenc \T5, \XMM7, \XMM7
-        vaesenc \T5, \XMM8, \XMM8
-
-        vmovdqu 16*i(arg1), \T5
-        i = i + 1
-        setreg
-.endr
-
-	i = 0
-	j = 1
-	setreg
-.rep 8
-		vpxor	16*i(arg4, %r11), \T5, \T2
-                .if \ENC_DEC == ENC
-                vaesenclast     \T2, reg_j, reg_j
-                .else
-                vaesenclast     \T2, reg_j, \T3
-                vmovdqu 16*i(arg4, %r11), reg_j
-                vmovdqu \T3, 16*i(arg3, %r11)
-                .endif
-	i = (i+1)
-	j = (j+1)
-	setreg
-.endr
-	#######################################################################
-
-
-	vpslldq	$8, \T6, \T3				# shift-L T3 2 DWs
-	vpsrldq	$8, \T6, \T6				# shift-R T2 2 DWs
-	vpxor	\T3, \T7, \T7
-	vpxor	\T6, \T1, \T1				# accumulate the results in T1:T7
-
-
-
-	#######################################################################
-	#first phase of the reduction
-	vmovdqa         POLY2(%rip), \T3
-
-	vpclmulqdq	$0x01, \T7, \T3, \T2
-	vpslldq		$8, \T2, \T2			# shift-L xmm2 2 DWs
-
-	vpxor		\T2, \T7, \T7			# first phase of the reduction complete
-	#######################################################################
-                .if \ENC_DEC == ENC
-		vmovdqu	 \XMM1,	16*0(arg3,%r11)		# Write to the Ciphertext buffer
-		vmovdqu	 \XMM2,	16*1(arg3,%r11)		# Write to the Ciphertext buffer
-		vmovdqu	 \XMM3,	16*2(arg3,%r11)		# Write to the Ciphertext buffer
-		vmovdqu	 \XMM4,	16*3(arg3,%r11)		# Write to the Ciphertext buffer
-		vmovdqu	 \XMM5,	16*4(arg3,%r11)		# Write to the Ciphertext buffer
-		vmovdqu	 \XMM6,	16*5(arg3,%r11)		# Write to the Ciphertext buffer
-		vmovdqu	 \XMM7,	16*6(arg3,%r11)		# Write to the Ciphertext buffer
-		vmovdqu	 \XMM8,	16*7(arg3,%r11)		# Write to the Ciphertext buffer
-                .endif
-
-	#######################################################################
-	#second phase of the reduction
-	vpclmulqdq	$0x00, \T7, \T3, \T2
-	vpsrldq		$4, \T2, \T2			# shift-R xmm2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R)
-
-	vpclmulqdq	$0x10, \T7, \T3, \T4
-	vpslldq		$4, \T4, \T4			# shift-L xmm0 1 DW (Shift-L 1-DW to obtain result with no shifts)
-
-	vpxor		\T2, \T4, \T4			# second phase of the reduction complete
-	#######################################################################
-	vpxor		\T4, \T1, \T1			# the result is in T1
-
-		vpshufb	SHUF_MASK(%rip), \XMM1, \XMM1	# perform a 16Byte swap
-		vpshufb	SHUF_MASK(%rip), \XMM2, \XMM2	# perform a 16Byte swap
-		vpshufb	SHUF_MASK(%rip), \XMM3, \XMM3	# perform a 16Byte swap
-		vpshufb	SHUF_MASK(%rip), \XMM4, \XMM4	# perform a 16Byte swap
-		vpshufb	SHUF_MASK(%rip), \XMM5, \XMM5	# perform a 16Byte swap
-		vpshufb	SHUF_MASK(%rip), \XMM6, \XMM6	# perform a 16Byte swap
-		vpshufb	SHUF_MASK(%rip), \XMM7, \XMM7	# perform a 16Byte swap
-		vpshufb	SHUF_MASK(%rip), \XMM8, \XMM8	# perform a 16Byte swap
-
-
-	vpxor	\T1, \XMM1, \XMM1
-
-
-
-.endm
-
-
-# GHASH the last 4 ciphertext blocks.
-.macro  GHASH_LAST_8_AVX2 T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8
-
-        ## Karatsuba Method
-
-        vmovdqu         HashKey_8(arg2), \T5
-
-        vpshufd         $0b01001110, \XMM1, \T2
-        vpshufd         $0b01001110, \T5, \T3
-        vpxor           \XMM1, \T2, \T2
-        vpxor           \T5, \T3, \T3
-
-        vpclmulqdq      $0x11, \T5, \XMM1, \T6
-        vpclmulqdq      $0x00, \T5, \XMM1, \T7
-
-        vpclmulqdq      $0x00, \T3, \T2, \XMM1
-
-        ######################
-
-        vmovdqu         HashKey_7(arg2), \T5
-        vpshufd         $0b01001110, \XMM2, \T2
-        vpshufd         $0b01001110, \T5, \T3
-        vpxor           \XMM2, \T2, \T2
-        vpxor           \T5, \T3, \T3
-
-        vpclmulqdq      $0x11, \T5, \XMM2, \T4
-        vpxor           \T4, \T6, \T6
-
-        vpclmulqdq      $0x00, \T5, \XMM2, \T4
-        vpxor           \T4, \T7, \T7
-
-        vpclmulqdq      $0x00, \T3, \T2, \T2
-
-        vpxor           \T2, \XMM1, \XMM1
-
-        ######################
-
-        vmovdqu         HashKey_6(arg2), \T5
-        vpshufd         $0b01001110, \XMM3, \T2
-        vpshufd         $0b01001110, \T5, \T3
-        vpxor           \XMM3, \T2, \T2
-        vpxor           \T5, \T3, \T3
-
-        vpclmulqdq      $0x11, \T5, \XMM3, \T4
-        vpxor           \T4, \T6, \T6
-
-        vpclmulqdq      $0x00, \T5, \XMM3, \T4
-        vpxor           \T4, \T7, \T7
-
-        vpclmulqdq      $0x00, \T3, \T2, \T2
-
-        vpxor           \T2, \XMM1, \XMM1
-
-        ######################
-
-        vmovdqu         HashKey_5(arg2), \T5
-        vpshufd         $0b01001110, \XMM4, \T2
-        vpshufd         $0b01001110, \T5, \T3
-        vpxor           \XMM4, \T2, \T2
-        vpxor           \T5, \T3, \T3
-
-        vpclmulqdq      $0x11, \T5, \XMM4, \T4
-        vpxor           \T4, \T6, \T6
-
-        vpclmulqdq      $0x00, \T5, \XMM4, \T4
-        vpxor           \T4, \T7, \T7
-
-        vpclmulqdq      $0x00, \T3, \T2, \T2
-
-        vpxor           \T2, \XMM1, \XMM1
-
-        ######################
-
-        vmovdqu         HashKey_4(arg2), \T5
-        vpshufd         $0b01001110, \XMM5, \T2
-        vpshufd         $0b01001110, \T5, \T3
-        vpxor           \XMM5, \T2, \T2
-        vpxor           \T5, \T3, \T3
-
-        vpclmulqdq      $0x11, \T5, \XMM5, \T4
-        vpxor           \T4, \T6, \T6
-
-        vpclmulqdq      $0x00, \T5, \XMM5, \T4
-        vpxor           \T4, \T7, \T7
-
-        vpclmulqdq      $0x00, \T3, \T2, \T2
-
-        vpxor           \T2, \XMM1, \XMM1
-
-        ######################
-
-        vmovdqu         HashKey_3(arg2), \T5
-        vpshufd         $0b01001110, \XMM6, \T2
-        vpshufd         $0b01001110, \T5, \T3
-        vpxor           \XMM6, \T2, \T2
-        vpxor           \T5, \T3, \T3
-
-        vpclmulqdq      $0x11, \T5, \XMM6, \T4
-        vpxor           \T4, \T6, \T6
-
-        vpclmulqdq      $0x00, \T5, \XMM6, \T4
-        vpxor           \T4, \T7, \T7
-
-        vpclmulqdq      $0x00, \T3, \T2, \T2
-
-        vpxor           \T2, \XMM1, \XMM1
-
-        ######################
-
-        vmovdqu         HashKey_2(arg2), \T5
-        vpshufd         $0b01001110, \XMM7, \T2
-        vpshufd         $0b01001110, \T5, \T3
-        vpxor           \XMM7, \T2, \T2
-        vpxor           \T5, \T3, \T3
-
-        vpclmulqdq      $0x11, \T5, \XMM7, \T4
-        vpxor           \T4, \T6, \T6
-
-        vpclmulqdq      $0x00, \T5, \XMM7, \T4
-        vpxor           \T4, \T7, \T7
-
-        vpclmulqdq      $0x00, \T3, \T2, \T2
-
-        vpxor           \T2, \XMM1, \XMM1
-
-        ######################
-
-        vmovdqu         HashKey(arg2), \T5
-        vpshufd         $0b01001110, \XMM8, \T2
-        vpshufd         $0b01001110, \T5, \T3
-        vpxor           \XMM8, \T2, \T2
-        vpxor           \T5, \T3, \T3
-
-        vpclmulqdq      $0x11, \T5, \XMM8, \T4
-        vpxor           \T4, \T6, \T6
-
-        vpclmulqdq      $0x00, \T5, \XMM8, \T4
-        vpxor           \T4, \T7, \T7
-
-        vpclmulqdq      $0x00, \T3, \T2, \T2
-
-        vpxor           \T2, \XMM1, \XMM1
-        vpxor           \T6, \XMM1, \XMM1
-        vpxor           \T7, \XMM1, \T2
-
-
-
-
-        vpslldq $8, \T2, \T4
-        vpsrldq $8, \T2, \T2
-
-        vpxor   \T4, \T7, \T7
-        vpxor   \T2, \T6, \T6                      # <T6:T7> holds the result of the
-						   # accumulated carry-less multiplications
-
-        #######################################################################
-        #first phase of the reduction
-        vmovdqa         POLY2(%rip), \T3
-
-        vpclmulqdq      $0x01, \T7, \T3, \T2
-        vpslldq         $8, \T2, \T2               # shift-L xmm2 2 DWs
-
-        vpxor           \T2, \T7, \T7              # first phase of the reduction complete
-        #######################################################################
-
-
-        #second phase of the reduction
-        vpclmulqdq      $0x00, \T7, \T3, \T2
-        vpsrldq         $4, \T2, \T2               # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R)
-
-        vpclmulqdq      $0x10, \T7, \T3, \T4
-        vpslldq         $4, \T4, \T4               # shift-L T4 1 DW (Shift-L 1-DW to obtain result with no shifts)
-
-        vpxor           \T2, \T4, \T4              # second phase of the reduction complete
-        #######################################################################
-        vpxor           \T4, \T6, \T6              # the result is in T6
-.endm
-
-
-
-#############################################################
-#void   aesni_gcm_init_avx_gen4
-#        (gcm_data     *my_ctx_data,
-#         gcm_context_data *data,
-#        u8      *iv, /* Pre-counter block j0: 4 byte salt
-#			(from Security Association) concatenated with 8 byte
-#			Initialisation Vector (from IPSec ESP Payload)
-#			concatenated with 0x00000001. 16-byte aligned pointer. */
-#        u8     *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */
-#        const   u8 *aad, /* Additional Authentication Data (AAD)*/
-#        u64     aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
-#############################################################
-ENTRY(aesni_gcm_init_avx_gen4)
-        FUNC_SAVE
-        INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2
-        FUNC_RESTORE
-        ret
-ENDPROC(aesni_gcm_init_avx_gen4)
-
-###############################################################################
-#void   aesni_gcm_enc_avx_gen4(
-#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
-#        gcm_context_data *data,
-#        u8      *out, /* Ciphertext output. Encrypt in-place is allowed.  */
-#        const   u8 *in, /* Plaintext input */
-#        u64     plaintext_len) /* Length of data in Bytes for encryption. */
-###############################################################################
-ENTRY(aesni_gcm_enc_update_avx_gen4)
-        FUNC_SAVE
-        mov     keysize,%eax
-        cmp     $32, %eax
-        je      key_256_enc_update4
-        cmp     $16, %eax
-        je      key_128_enc_update4
-        # must be 192
-        GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 11
-        FUNC_RESTORE
-	ret
-key_128_enc_update4:
-        GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 9
-        FUNC_RESTORE
-	ret
-key_256_enc_update4:
-        GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13
-        FUNC_RESTORE
-	ret
-ENDPROC(aesni_gcm_enc_update_avx_gen4)
-
-###############################################################################
-#void   aesni_gcm_dec_update_avx_gen4(
-#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
-#        gcm_context_data *data,
-#        u8      *out, /* Plaintext output. Decrypt in-place is allowed.  */
-#        const   u8 *in, /* Ciphertext input */
-#        u64     plaintext_len) /* Length of data in Bytes for encryption. */
-###############################################################################
-ENTRY(aesni_gcm_dec_update_avx_gen4)
-        FUNC_SAVE
-        mov     keysize,%eax
-        cmp     $32, %eax
-        je      key_256_dec_update4
-        cmp     $16, %eax
-        je      key_128_dec_update4
-        # must be 192
-        GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 11
-        FUNC_RESTORE
-        ret
-key_128_dec_update4:
-        GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 9
-        FUNC_RESTORE
-        ret
-key_256_dec_update4:
-        GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13
-        FUNC_RESTORE
-        ret
-ENDPROC(aesni_gcm_dec_update_avx_gen4)
-
-###############################################################################
-#void   aesni_gcm_finalize_avx_gen4(
-#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
-#        gcm_context_data *data,
-#        u8      *auth_tag, /* Authenticated Tag output. */
-#        u64     auth_tag_len)# /* Authenticated Tag Length in bytes.
-#                              Valid values are 16 (most likely), 12 or 8. */
-###############################################################################
-ENTRY(aesni_gcm_finalize_avx_gen4)
-        FUNC_SAVE
-        mov	keysize,%eax
-        cmp     $32, %eax
-        je      key_256_finalize4
-        cmp     $16, %eax
-        je      key_128_finalize4
-        # must be 192
-        GCM_COMPLETE GHASH_MUL_AVX2, 11, arg3, arg4
-        FUNC_RESTORE
-        ret
-key_128_finalize4:
-        GCM_COMPLETE GHASH_MUL_AVX2, 9, arg3, arg4
-        FUNC_RESTORE
-        ret
-key_256_finalize4:
-        GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4
-        FUNC_RESTORE
-        ret
-ENDPROC(aesni_gcm_finalize_avx_gen4)
-
-#endif /* CONFIG_AS_AVX2 */
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
deleted file mode 100644
index 330db7a48af812f91bb8c7e81576a48b4e01f977..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
+++ /dev/null
@@ -1,368 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Blowfish Cipher Algorithm (x86_64)
- *
- * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- */
-
-#include <linux/linkage.h>
-
-.file "blowfish-x86_64-asm.S"
-.text
-
-/* structure of crypto context */
-#define p	0
-#define s0	((16 + 2) * 4)
-#define s1	((16 + 2 + (1 * 256)) * 4)
-#define s2	((16 + 2 + (2 * 256)) * 4)
-#define s3	((16 + 2 + (3 * 256)) * 4)
-
-/* register macros */
-#define CTX %r12
-#define RIO %rsi
-
-#define RX0 %rax
-#define RX1 %rbx
-#define RX2 %rcx
-#define RX3 %rdx
-
-#define RX0d %eax
-#define RX1d %ebx
-#define RX2d %ecx
-#define RX3d %edx
-
-#define RX0bl %al
-#define RX1bl %bl
-#define RX2bl %cl
-#define RX3bl %dl
-
-#define RX0bh %ah
-#define RX1bh %bh
-#define RX2bh %ch
-#define RX3bh %dh
-
-#define RT0 %rdi
-#define RT1 %rsi
-#define RT2 %r8
-#define RT3 %r9
-
-#define RT0d %edi
-#define RT1d %esi
-#define RT2d %r8d
-#define RT3d %r9d
-
-#define RKEY %r10
-
-/***********************************************************************
- * 1-way blowfish
- ***********************************************************************/
-#define F() \
-	rorq $16,		RX0; \
-	movzbl RX0bh,		RT0d; \
-	movzbl RX0bl,		RT1d; \
-	rolq $16,		RX0; \
-	movl s0(CTX,RT0,4),	RT0d; \
-	addl s1(CTX,RT1,4),	RT0d; \
-	movzbl RX0bh,		RT1d; \
-	movzbl RX0bl,		RT2d; \
-	rolq $32,		RX0; \
-	xorl s2(CTX,RT1,4),	RT0d; \
-	addl s3(CTX,RT2,4),	RT0d; \
-	xorq RT0,		RX0;
-
-#define add_roundkey_enc(n) \
-	xorq p+4*(n)(CTX), 	RX0;
-
-#define round_enc(n) \
-	add_roundkey_enc(n); \
-	\
-	F(); \
-	F();
-
-#define add_roundkey_dec(n) \
-	movq p+4*(n-1)(CTX),	RT0; \
-	rorq $32,		RT0; \
-	xorq RT0,		RX0;
-
-#define round_dec(n) \
-	add_roundkey_dec(n); \
-	\
-	F(); \
-	F(); \
-
-#define read_block() \
-	movq (RIO), 		RX0; \
-	rorq $32, 		RX0; \
-	bswapq 			RX0;
-
-#define write_block() \
-	bswapq 			RX0; \
-	movq RX0, 		(RIO);
-
-#define xor_block() \
-	bswapq 			RX0; \
-	xorq RX0, 		(RIO);
-
-ENTRY(__blowfish_enc_blk)
-	/* input:
-	 *	%rdi: ctx
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: bool, if true: xor output
-	 */
-	movq %r12, %r11;
-
-	movq %rdi, CTX;
-	movq %rsi, %r10;
-	movq %rdx, RIO;
-
-	read_block();
-
-	round_enc(0);
-	round_enc(2);
-	round_enc(4);
-	round_enc(6);
-	round_enc(8);
-	round_enc(10);
-	round_enc(12);
-	round_enc(14);
-	add_roundkey_enc(16);
-
-	movq %r11, %r12;
-
-	movq %r10, RIO;
-	test %cl, %cl;
-	jnz .L__enc_xor;
-
-	write_block();
-	ret;
-.L__enc_xor:
-	xor_block();
-	ret;
-ENDPROC(__blowfish_enc_blk)
-
-ENTRY(blowfish_dec_blk)
-	/* input:
-	 *	%rdi: ctx
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	movq %r12, %r11;
-
-	movq %rdi, CTX;
-	movq %rsi, %r10;
-	movq %rdx, RIO;
-
-	read_block();
-
-	round_dec(17);
-	round_dec(15);
-	round_dec(13);
-	round_dec(11);
-	round_dec(9);
-	round_dec(7);
-	round_dec(5);
-	round_dec(3);
-	add_roundkey_dec(1);
-
-	movq %r10, RIO;
-	write_block();
-
-	movq %r11, %r12;
-
-	ret;
-ENDPROC(blowfish_dec_blk)
-
-/**********************************************************************
-  4-way blowfish, four blocks parallel
- **********************************************************************/
-
-/* F() for 4-way. Slower when used alone/1-way, but faster when used
- * parallel/4-way (tested on AMD Phenom II & Intel Xeon E7330).
- */
-#define F4(x) \
-	movzbl x ## bh,		RT1d; \
-	movzbl x ## bl,		RT3d; \
-	rorq $16,		x; \
-	movzbl x ## bh,		RT0d; \
-	movzbl x ## bl,		RT2d; \
-	rorq $16,		x; \
-	movl s0(CTX,RT0,4),	RT0d; \
-	addl s1(CTX,RT2,4),	RT0d; \
-	xorl s2(CTX,RT1,4),	RT0d; \
-	addl s3(CTX,RT3,4),	RT0d; \
-	xorq RT0,		x;
-
-#define add_preloaded_roundkey4() \
-	xorq RKEY,		RX0; \
-	xorq RKEY,		RX1; \
-	xorq RKEY,		RX2; \
-	xorq RKEY,		RX3;
-
-#define preload_roundkey_enc(n) \
-	movq p+4*(n)(CTX),	RKEY;
-
-#define add_roundkey_enc4(n) \
-	add_preloaded_roundkey4(); \
-	preload_roundkey_enc(n + 2);
-
-#define round_enc4(n) \
-	add_roundkey_enc4(n); \
-	\
-	F4(RX0); \
-	F4(RX1); \
-	F4(RX2); \
-	F4(RX3); \
-	\
-	F4(RX0); \
-	F4(RX1); \
-	F4(RX2); \
-	F4(RX3);
-
-#define preload_roundkey_dec(n) \
-	movq p+4*((n)-1)(CTX),	RKEY; \
-	rorq $32,		RKEY;
-
-#define add_roundkey_dec4(n) \
-	add_preloaded_roundkey4(); \
-	preload_roundkey_dec(n - 2);
-
-#define round_dec4(n) \
-	add_roundkey_dec4(n); \
-	\
-	F4(RX0); \
-	F4(RX1); \
-	F4(RX2); \
-	F4(RX3); \
-	\
-	F4(RX0); \
-	F4(RX1); \
-	F4(RX2); \
-	F4(RX3);
-
-#define read_block4() \
-	movq (RIO),		RX0; \
-	rorq $32,		RX0; \
-	bswapq 			RX0; \
-	\
-	movq 8(RIO),		RX1; \
-	rorq $32,		RX1; \
-	bswapq 			RX1; \
-	\
-	movq 16(RIO),		RX2; \
-	rorq $32,		RX2; \
-	bswapq 			RX2; \
-	\
-	movq 24(RIO),		RX3; \
-	rorq $32,		RX3; \
-	bswapq 			RX3;
-
-#define write_block4() \
-	bswapq 			RX0; \
-	movq RX0,		(RIO); \
-	\
-	bswapq 			RX1; \
-	movq RX1,		8(RIO); \
-	\
-	bswapq 			RX2; \
-	movq RX2,		16(RIO); \
-	\
-	bswapq 			RX3; \
-	movq RX3,		24(RIO);
-
-#define xor_block4() \
-	bswapq 			RX0; \
-	xorq RX0,		(RIO); \
-	\
-	bswapq 			RX1; \
-	xorq RX1,		8(RIO); \
-	\
-	bswapq 			RX2; \
-	xorq RX2,		16(RIO); \
-	\
-	bswapq 			RX3; \
-	xorq RX3,		24(RIO);
-
-ENTRY(__blowfish_enc_blk_4way)
-	/* input:
-	 *	%rdi: ctx
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: bool, if true: xor output
-	 */
-	pushq %r12;
-	pushq %rbx;
-	pushq %rcx;
-
-	movq %rdi, CTX
-	movq %rsi, %r11;
-	movq %rdx, RIO;
-
-	preload_roundkey_enc(0);
-
-	read_block4();
-
-	round_enc4(0);
-	round_enc4(2);
-	round_enc4(4);
-	round_enc4(6);
-	round_enc4(8);
-	round_enc4(10);
-	round_enc4(12);
-	round_enc4(14);
-	add_preloaded_roundkey4();
-
-	popq %r12;
-	movq %r11, RIO;
-
-	test %r12b, %r12b;
-	jnz .L__enc_xor4;
-
-	write_block4();
-
-	popq %rbx;
-	popq %r12;
-	ret;
-
-.L__enc_xor4:
-	xor_block4();
-
-	popq %rbx;
-	popq %r12;
-	ret;
-ENDPROC(__blowfish_enc_blk_4way)
-
-ENTRY(blowfish_dec_blk_4way)
-	/* input:
-	 *	%rdi: ctx
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	pushq %r12;
-	pushq %rbx;
-
-	movq %rdi, CTX;
-	movq %rsi, %r11
-	movq %rdx, RIO;
-
-	preload_roundkey_dec(17);
-	read_block4();
-
-	round_dec4(17);
-	round_dec4(15);
-	round_dec4(13);
-	round_dec4(11);
-	round_dec4(9);
-	round_dec4(7);
-	round_dec4(5);
-	round_dec4(3);
-	add_preloaded_roundkey4();
-
-	movq %r11, RIO;
-	write_block4();
-
-	popq %rbx;
-	popq %r12;
-
-	ret;
-ENDPROC(blowfish_dec_blk_4way)
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
deleted file mode 100644
index a14af6eb09cb074a1dcf42d0adaaf1c388ceb30b..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ /dev/null
@@ -1,1289 +0,0 @@
-/*
- * x86_64/AVX/AES-NI assembler implementation of Camellia
- *
- * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-
-/*
- * Version licensed under 2-clause BSD License is available at:
- *	http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include <asm/nospec-branch.h>
-
-#define CAMELLIA_TABLE_BYTE_LEN 272
-
-/* struct camellia_ctx: */
-#define key_table 0
-#define key_length CAMELLIA_TABLE_BYTE_LEN
-
-/* register macros */
-#define CTX %rdi
-
-/**********************************************************************
-  16-way camellia
- **********************************************************************/
-#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \
-	vpand x, mask4bit, tmp0; \
-	vpandn x, mask4bit, x; \
-	vpsrld $4, x, x; \
-	\
-	vpshufb tmp0, lo_t, tmp0; \
-	vpshufb x, hi_t, x; \
-	vpxor tmp0, x, x;
-
-/*
- * IN:
- *   x0..x7: byte-sliced AB state
- *   mem_cd: register pointer storing CD state
- *   key: index for key material
- * OUT:
- *   x0..x7: new byte-sliced CD state
- */
-#define roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, \
-		  t7, mem_cd, key) \
-	/* \
-	 * S-function with AES subbytes \
-	 */ \
-	vmovdqa .Linv_shift_row, t4; \
-	vbroadcastss .L0f0f0f0f, t7; \
-	vmovdqa .Lpre_tf_lo_s1, t0; \
-	vmovdqa .Lpre_tf_hi_s1, t1; \
-	\
-	/* AES inverse shift rows */ \
-	vpshufb t4, x0, x0; \
-	vpshufb t4, x7, x7; \
-	vpshufb t4, x1, x1; \
-	vpshufb t4, x4, x4; \
-	vpshufb t4, x2, x2; \
-	vpshufb t4, x5, x5; \
-	vpshufb t4, x3, x3; \
-	vpshufb t4, x6, x6; \
-	\
-	/* prefilter sboxes 1, 2 and 3 */ \
-	vmovdqa .Lpre_tf_lo_s4, t2; \
-	vmovdqa .Lpre_tf_hi_s4, t3; \
-	filter_8bit(x0, t0, t1, t7, t6); \
-	filter_8bit(x7, t0, t1, t7, t6); \
-	filter_8bit(x1, t0, t1, t7, t6); \
-	filter_8bit(x4, t0, t1, t7, t6); \
-	filter_8bit(x2, t0, t1, t7, t6); \
-	filter_8bit(x5, t0, t1, t7, t6); \
-	\
-	/* prefilter sbox 4 */ \
-	vpxor t4, t4, t4; \
-	filter_8bit(x3, t2, t3, t7, t6); \
-	filter_8bit(x6, t2, t3, t7, t6); \
-	\
-	/* AES subbytes + AES shift rows */ \
-	vmovdqa .Lpost_tf_lo_s1, t0; \
-	vmovdqa .Lpost_tf_hi_s1, t1; \
-	vaesenclast t4, x0, x0; \
-	vaesenclast t4, x7, x7; \
-	vaesenclast t4, x1, x1; \
-	vaesenclast t4, x4, x4; \
-	vaesenclast t4, x2, x2; \
-	vaesenclast t4, x5, x5; \
-	vaesenclast t4, x3, x3; \
-	vaesenclast t4, x6, x6; \
-	\
-	/* postfilter sboxes 1 and 4 */ \
-	vmovdqa .Lpost_tf_lo_s3, t2; \
-	vmovdqa .Lpost_tf_hi_s3, t3; \
-	filter_8bit(x0, t0, t1, t7, t6); \
-	filter_8bit(x7, t0, t1, t7, t6); \
-	filter_8bit(x3, t0, t1, t7, t6); \
-	filter_8bit(x6, t0, t1, t7, t6); \
-	\
-	/* postfilter sbox 3 */ \
-	vmovdqa .Lpost_tf_lo_s2, t4; \
-	vmovdqa .Lpost_tf_hi_s2, t5; \
-	filter_8bit(x2, t2, t3, t7, t6); \
-	filter_8bit(x5, t2, t3, t7, t6); \
-	\
-	vpxor t6, t6, t6; \
-	vmovq key, t0; \
-	\
-	/* postfilter sbox 2 */ \
-	filter_8bit(x1, t4, t5, t7, t2); \
-	filter_8bit(x4, t4, t5, t7, t2); \
-	\
-	vpsrldq $5, t0, t5; \
-	vpsrldq $1, t0, t1; \
-	vpsrldq $2, t0, t2; \
-	vpsrldq $3, t0, t3; \
-	vpsrldq $4, t0, t4; \
-	vpshufb t6, t0, t0; \
-	vpshufb t6, t1, t1; \
-	vpshufb t6, t2, t2; \
-	vpshufb t6, t3, t3; \
-	vpshufb t6, t4, t4; \
-	vpsrldq $2, t5, t7; \
-	vpshufb t6, t7, t7; \
-	\
-	/* \
-	 * P-function \
-	 */ \
-	vpxor x5, x0, x0; \
-	vpxor x6, x1, x1; \
-	vpxor x7, x2, x2; \
-	vpxor x4, x3, x3; \
-	\
-	vpxor x2, x4, x4; \
-	vpxor x3, x5, x5; \
-	vpxor x0, x6, x6; \
-	vpxor x1, x7, x7; \
-	\
-	vpxor x7, x0, x0; \
-	vpxor x4, x1, x1; \
-	vpxor x5, x2, x2; \
-	vpxor x6, x3, x3; \
-	\
-	vpxor x3, x4, x4; \
-	vpxor x0, x5, x5; \
-	vpxor x1, x6, x6; \
-	vpxor x2, x7, x7; /* note: high and low parts swapped */ \
-	\
-	/* \
-	 * Add key material and result to CD (x becomes new CD) \
-	 */ \
-	\
-	vpxor t3, x4, x4; \
-	vpxor 0 * 16(mem_cd), x4, x4; \
-	\
-	vpxor t2, x5, x5; \
-	vpxor 1 * 16(mem_cd), x5, x5; \
-	\
-	vpsrldq $1, t5, t3; \
-	vpshufb t6, t5, t5; \
-	vpshufb t6, t3, t6; \
-	\
-	vpxor t1, x6, x6; \
-	vpxor 2 * 16(mem_cd), x6, x6; \
-	\
-	vpxor t0, x7, x7; \
-	vpxor 3 * 16(mem_cd), x7, x7; \
-	\
-	vpxor t7, x0, x0; \
-	vpxor 4 * 16(mem_cd), x0, x0; \
-	\
-	vpxor t6, x1, x1; \
-	vpxor 5 * 16(mem_cd), x1, x1; \
-	\
-	vpxor t5, x2, x2; \
-	vpxor 6 * 16(mem_cd), x2, x2; \
-	\
-	vpxor t4, x3, x3; \
-	vpxor 7 * 16(mem_cd), x3, x3;
-
-/*
- * Size optimization... with inlined roundsm16, binary would be over 5 times
- * larger and would only be 0.5% faster (on sandy-bridge).
- */
-.align 8
-roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
-	roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-		  %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
-		  %rcx, (%r9));
-	ret;
-ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
-
-.align 8
-roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
-	roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3,
-		  %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
-		  %rax, (%r9));
-	ret;
-ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
-
-/*
- * IN/OUT:
- *  x0..x7: byte-sliced AB state preloaded
- *  mem_ab: byte-sliced AB state in memory
- *  mem_cb: byte-sliced CD state in memory
- */
-#define two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, i, dir, store_ab) \
-	leaq (key_table + (i) * 8)(CTX), %r9; \
-	call roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd; \
-	\
-	vmovdqu x4, 0 * 16(mem_cd); \
-	vmovdqu x5, 1 * 16(mem_cd); \
-	vmovdqu x6, 2 * 16(mem_cd); \
-	vmovdqu x7, 3 * 16(mem_cd); \
-	vmovdqu x0, 4 * 16(mem_cd); \
-	vmovdqu x1, 5 * 16(mem_cd); \
-	vmovdqu x2, 6 * 16(mem_cd); \
-	vmovdqu x3, 7 * 16(mem_cd); \
-	\
-	leaq (key_table + ((i) + (dir)) * 8)(CTX), %r9; \
-	call roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab; \
-	\
-	store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab);
-
-#define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */
-
-#define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \
-	/* Store new AB state */ \
-	vmovdqu x0, 0 * 16(mem_ab); \
-	vmovdqu x1, 1 * 16(mem_ab); \
-	vmovdqu x2, 2 * 16(mem_ab); \
-	vmovdqu x3, 3 * 16(mem_ab); \
-	vmovdqu x4, 4 * 16(mem_ab); \
-	vmovdqu x5, 5 * 16(mem_ab); \
-	vmovdqu x6, 6 * 16(mem_ab); \
-	vmovdqu x7, 7 * 16(mem_ab);
-
-#define enc_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, i) \
-	two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \
-	two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \
-	two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store);
-
-#define dec_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, i) \
-	two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \
-	two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \
-	two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store);
-
-/*
- * IN:
- *  v0..3: byte-sliced 32-bit integers
- * OUT:
- *  v0..3: (IN <<< 1)
- */
-#define rol32_1_16(v0, v1, v2, v3, t0, t1, t2, zero) \
-	vpcmpgtb v0, zero, t0; \
-	vpaddb v0, v0, v0; \
-	vpabsb t0, t0; \
-	\
-	vpcmpgtb v1, zero, t1; \
-	vpaddb v1, v1, v1; \
-	vpabsb t1, t1; \
-	\
-	vpcmpgtb v2, zero, t2; \
-	vpaddb v2, v2, v2; \
-	vpabsb t2, t2; \
-	\
-	vpor t0, v1, v1; \
-	\
-	vpcmpgtb v3, zero, t0; \
-	vpaddb v3, v3, v3; \
-	vpabsb t0, t0; \
-	\
-	vpor t1, v2, v2; \
-	vpor t2, v3, v3; \
-	vpor t0, v0, v0;
-
-/*
- * IN:
- *   r: byte-sliced AB state in memory
- *   l: byte-sliced CD state in memory
- * OUT:
- *   x0..x7: new byte-sliced CD state
- */
-#define fls16(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \
-	      tt1, tt2, tt3, kll, klr, krl, krr) \
-	/* \
-	 * t0 = kll; \
-	 * t0 &= ll; \
-	 * lr ^= rol32(t0, 1); \
-	 */ \
-	vpxor tt0, tt0, tt0; \
-	vmovd kll, t0; \
-	vpshufb tt0, t0, t3; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t2; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t1; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t0; \
-	\
-	vpand l0, t0, t0; \
-	vpand l1, t1, t1; \
-	vpand l2, t2, t2; \
-	vpand l3, t3, t3; \
-	\
-	rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
-	\
-	vpxor l4, t0, l4; \
-	vmovdqu l4, 4 * 16(l); \
-	vpxor l5, t1, l5; \
-	vmovdqu l5, 5 * 16(l); \
-	vpxor l6, t2, l6; \
-	vmovdqu l6, 6 * 16(l); \
-	vpxor l7, t3, l7; \
-	vmovdqu l7, 7 * 16(l); \
-	\
-	/* \
-	 * t2 = krr; \
-	 * t2 |= rr; \
-	 * rl ^= t2; \
-	 */ \
-	\
-	vmovd krr, t0; \
-	vpshufb tt0, t0, t3; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t2; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t1; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t0; \
-	\
-	vpor 4 * 16(r), t0, t0; \
-	vpor 5 * 16(r), t1, t1; \
-	vpor 6 * 16(r), t2, t2; \
-	vpor 7 * 16(r), t3, t3; \
-	\
-	vpxor 0 * 16(r), t0, t0; \
-	vpxor 1 * 16(r), t1, t1; \
-	vpxor 2 * 16(r), t2, t2; \
-	vpxor 3 * 16(r), t3, t3; \
-	vmovdqu t0, 0 * 16(r); \
-	vmovdqu t1, 1 * 16(r); \
-	vmovdqu t2, 2 * 16(r); \
-	vmovdqu t3, 3 * 16(r); \
-	\
-	/* \
-	 * t2 = krl; \
-	 * t2 &= rl; \
-	 * rr ^= rol32(t2, 1); \
-	 */ \
-	vmovd krl, t0; \
-	vpshufb tt0, t0, t3; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t2; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t1; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t0; \
-	\
-	vpand 0 * 16(r), t0, t0; \
-	vpand 1 * 16(r), t1, t1; \
-	vpand 2 * 16(r), t2, t2; \
-	vpand 3 * 16(r), t3, t3; \
-	\
-	rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
-	\
-	vpxor 4 * 16(r), t0, t0; \
-	vpxor 5 * 16(r), t1, t1; \
-	vpxor 6 * 16(r), t2, t2; \
-	vpxor 7 * 16(r), t3, t3; \
-	vmovdqu t0, 4 * 16(r); \
-	vmovdqu t1, 5 * 16(r); \
-	vmovdqu t2, 6 * 16(r); \
-	vmovdqu t3, 7 * 16(r); \
-	\
-	/* \
-	 * t0 = klr; \
-	 * t0 |= lr; \
-	 * ll ^= t0; \
-	 */ \
-	\
-	vmovd klr, t0; \
-	vpshufb tt0, t0, t3; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t2; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t1; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t0; \
-	\
-	vpor l4, t0, t0; \
-	vpor l5, t1, t1; \
-	vpor l6, t2, t2; \
-	vpor l7, t3, t3; \
-	\
-	vpxor l0, t0, l0; \
-	vmovdqu l0, 0 * 16(l); \
-	vpxor l1, t1, l1; \
-	vmovdqu l1, 1 * 16(l); \
-	vpxor l2, t2, l2; \
-	vmovdqu l2, 2 * 16(l); \
-	vpxor l3, t3, l3; \
-	vmovdqu l3, 3 * 16(l);
-
-#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
-	vpunpckhdq x1, x0, t2; \
-	vpunpckldq x1, x0, x0; \
-	\
-	vpunpckldq x3, x2, t1; \
-	vpunpckhdq x3, x2, x2; \
-	\
-	vpunpckhqdq t1, x0, x1; \
-	vpunpcklqdq t1, x0, x0; \
-	\
-	vpunpckhqdq x2, t2, x3; \
-	vpunpcklqdq x2, t2, x2;
-
-#define byteslice_16x16b(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, a3, \
-			 b3, c3, d3, st0, st1) \
-	vmovdqu d2, st0; \
-	vmovdqu d3, st1; \
-	transpose_4x4(a0, a1, a2, a3, d2, d3); \
-	transpose_4x4(b0, b1, b2, b3, d2, d3); \
-	vmovdqu st0, d2; \
-	vmovdqu st1, d3; \
-	\
-	vmovdqu a0, st0; \
-	vmovdqu a1, st1; \
-	transpose_4x4(c0, c1, c2, c3, a0, a1); \
-	transpose_4x4(d0, d1, d2, d3, a0, a1); \
-	\
-	vmovdqu .Lshufb_16x16b, a0; \
-	vmovdqu st1, a1; \
-	vpshufb a0, a2, a2; \
-	vpshufb a0, a3, a3; \
-	vpshufb a0, b0, b0; \
-	vpshufb a0, b1, b1; \
-	vpshufb a0, b2, b2; \
-	vpshufb a0, b3, b3; \
-	vpshufb a0, a1, a1; \
-	vpshufb a0, c0, c0; \
-	vpshufb a0, c1, c1; \
-	vpshufb a0, c2, c2; \
-	vpshufb a0, c3, c3; \
-	vpshufb a0, d0, d0; \
-	vpshufb a0, d1, d1; \
-	vpshufb a0, d2, d2; \
-	vpshufb a0, d3, d3; \
-	vmovdqu d3, st1; \
-	vmovdqu st0, d3; \
-	vpshufb a0, d3, a0; \
-	vmovdqu d2, st0; \
-	\
-	transpose_4x4(a0, b0, c0, d0, d2, d3); \
-	transpose_4x4(a1, b1, c1, d1, d2, d3); \
-	vmovdqu st0, d2; \
-	vmovdqu st1, d3; \
-	\
-	vmovdqu b0, st0; \
-	vmovdqu b1, st1; \
-	transpose_4x4(a2, b2, c2, d2, b0, b1); \
-	transpose_4x4(a3, b3, c3, d3, b0, b1); \
-	vmovdqu st0, b0; \
-	vmovdqu st1, b1; \
-	/* does not adjust output bytes inside vectors */
-
-/* load blocks to registers and apply pre-whitening */
-#define inpack16_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		     y6, y7, rio, key) \
-	vmovq key, x0; \
-	vpshufb .Lpack_bswap, x0, x0; \
-	\
-	vpxor 0 * 16(rio), x0, y7; \
-	vpxor 1 * 16(rio), x0, y6; \
-	vpxor 2 * 16(rio), x0, y5; \
-	vpxor 3 * 16(rio), x0, y4; \
-	vpxor 4 * 16(rio), x0, y3; \
-	vpxor 5 * 16(rio), x0, y2; \
-	vpxor 6 * 16(rio), x0, y1; \
-	vpxor 7 * 16(rio), x0, y0; \
-	vpxor 8 * 16(rio), x0, x7; \
-	vpxor 9 * 16(rio), x0, x6; \
-	vpxor 10 * 16(rio), x0, x5; \
-	vpxor 11 * 16(rio), x0, x4; \
-	vpxor 12 * 16(rio), x0, x3; \
-	vpxor 13 * 16(rio), x0, x2; \
-	vpxor 14 * 16(rio), x0, x1; \
-	vpxor 15 * 16(rio), x0, x0;
-
-/* byteslice pre-whitened blocks and store to temporary memory */
-#define inpack16_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd) \
-	byteslice_16x16b(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \
-			 y5, y6, y7, (mem_ab), (mem_cd)); \
-	\
-	vmovdqu x0, 0 * 16(mem_ab); \
-	vmovdqu x1, 1 * 16(mem_ab); \
-	vmovdqu x2, 2 * 16(mem_ab); \
-	vmovdqu x3, 3 * 16(mem_ab); \
-	vmovdqu x4, 4 * 16(mem_ab); \
-	vmovdqu x5, 5 * 16(mem_ab); \
-	vmovdqu x6, 6 * 16(mem_ab); \
-	vmovdqu x7, 7 * 16(mem_ab); \
-	vmovdqu y0, 0 * 16(mem_cd); \
-	vmovdqu y1, 1 * 16(mem_cd); \
-	vmovdqu y2, 2 * 16(mem_cd); \
-	vmovdqu y3, 3 * 16(mem_cd); \
-	vmovdqu y4, 4 * 16(mem_cd); \
-	vmovdqu y5, 5 * 16(mem_cd); \
-	vmovdqu y6, 6 * 16(mem_cd); \
-	vmovdqu y7, 7 * 16(mem_cd);
-
-/* de-byteslice, apply post-whitening and store blocks */
-#define outunpack16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \
-		    y5, y6, y7, key, stack_tmp0, stack_tmp1) \
-	byteslice_16x16b(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, y3, \
-			 y7, x3, x7, stack_tmp0, stack_tmp1); \
-	\
-	vmovdqu x0, stack_tmp0; \
-	\
-	vmovq key, x0; \
-	vpshufb .Lpack_bswap, x0, x0; \
-	\
-	vpxor x0, y7, y7; \
-	vpxor x0, y6, y6; \
-	vpxor x0, y5, y5; \
-	vpxor x0, y4, y4; \
-	vpxor x0, y3, y3; \
-	vpxor x0, y2, y2; \
-	vpxor x0, y1, y1; \
-	vpxor x0, y0, y0; \
-	vpxor x0, x7, x7; \
-	vpxor x0, x6, x6; \
-	vpxor x0, x5, x5; \
-	vpxor x0, x4, x4; \
-	vpxor x0, x3, x3; \
-	vpxor x0, x2, x2; \
-	vpxor x0, x1, x1; \
-	vpxor stack_tmp0, x0, x0;
-
-#define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		     y6, y7, rio) \
-	vmovdqu x0, 0 * 16(rio); \
-	vmovdqu x1, 1 * 16(rio); \
-	vmovdqu x2, 2 * 16(rio); \
-	vmovdqu x3, 3 * 16(rio); \
-	vmovdqu x4, 4 * 16(rio); \
-	vmovdqu x5, 5 * 16(rio); \
-	vmovdqu x6, 6 * 16(rio); \
-	vmovdqu x7, 7 * 16(rio); \
-	vmovdqu y0, 8 * 16(rio); \
-	vmovdqu y1, 9 * 16(rio); \
-	vmovdqu y2, 10 * 16(rio); \
-	vmovdqu y3, 11 * 16(rio); \
-	vmovdqu y4, 12 * 16(rio); \
-	vmovdqu y5, 13 * 16(rio); \
-	vmovdqu y6, 14 * 16(rio); \
-	vmovdqu y7, 15 * 16(rio);
-
-
-/* NB: section is mergeable, all elements must be aligned 16-byte blocks */
-.section	.rodata.cst16, "aM", @progbits, 16
-.align 16
-
-#define SHUFB_BYTES(idx) \
-	0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
-
-.Lshufb_16x16b:
-	.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3);
-
-.Lpack_bswap:
-	.long 0x00010203
-	.long 0x04050607
-	.long 0x80808080
-	.long 0x80808080
-
-/* For CTR-mode IV byteswap */
-.Lbswap128_mask:
-	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-
-/* For XTS mode IV generation */
-.Lxts_gf128mul_and_shl1_mask:
-	.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
-
-/*
- * pre-SubByte transform
- *
- * pre-lookup for sbox1, sbox2, sbox3:
- *   swap_bitendianness(
- *       isom_map_camellia_to_aes(
- *           camellia_f(
- *               swap_bitendianess(in)
- *           )
- *       )
- *   )
- *
- * (note: '⊕ 0xc5' inside camellia_f())
- */
-.Lpre_tf_lo_s1:
-	.byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86
-	.byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88
-.Lpre_tf_hi_s1:
-	.byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a
-	.byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23
-
-/*
- * pre-SubByte transform
- *
- * pre-lookup for sbox4:
- *   swap_bitendianness(
- *       isom_map_camellia_to_aes(
- *           camellia_f(
- *               swap_bitendianess(in <<< 1)
- *           )
- *       )
- *   )
- *
- * (note: '⊕ 0xc5' inside camellia_f())
- */
-.Lpre_tf_lo_s4:
-	.byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25
-	.byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74
-.Lpre_tf_hi_s4:
-	.byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72
-	.byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf
-
-/*
- * post-SubByte transform
- *
- * post-lookup for sbox1, sbox4:
- *  swap_bitendianness(
- *      camellia_h(
- *          isom_map_aes_to_camellia(
- *              swap_bitendianness(
- *                  aes_inverse_affine_transform(in)
- *              )
- *          )
- *      )
- *  )
- *
- * (note: '⊕ 0x6e' inside camellia_h())
- */
-.Lpost_tf_lo_s1:
-	.byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31
-	.byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1
-.Lpost_tf_hi_s1:
-	.byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8
-	.byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c
-
-/*
- * post-SubByte transform
- *
- * post-lookup for sbox2:
- *  swap_bitendianness(
- *      camellia_h(
- *          isom_map_aes_to_camellia(
- *              swap_bitendianness(
- *                  aes_inverse_affine_transform(in)
- *              )
- *          )
- *      )
- *  ) <<< 1
- *
- * (note: '⊕ 0x6e' inside camellia_h())
- */
-.Lpost_tf_lo_s2:
-	.byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62
-	.byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3
-.Lpost_tf_hi_s2:
-	.byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51
-	.byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18
-
-/*
- * post-SubByte transform
- *
- * post-lookup for sbox3:
- *  swap_bitendianness(
- *      camellia_h(
- *          isom_map_aes_to_camellia(
- *              swap_bitendianness(
- *                  aes_inverse_affine_transform(in)
- *              )
- *          )
- *      )
- *  ) >>> 1
- *
- * (note: '⊕ 0x6e' inside camellia_h())
- */
-.Lpost_tf_lo_s3:
-	.byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98
-	.byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8
-.Lpost_tf_hi_s3:
-	.byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54
-	.byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06
-
-/* For isolating SubBytes from AESENCLAST, inverse shift row */
-.Linv_shift_row:
-	.byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
-	.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
-
-/* 4-bit mask */
-.section	.rodata.cst4.L0f0f0f0f, "aM", @progbits, 4
-.align 4
-.L0f0f0f0f:
-	.long 0x0f0f0f0f
-
-.text
-
-.align 8
-__camellia_enc_blk16:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rax: temporary storage, 256 bytes
-	 *	%xmm0..%xmm15: 16 plaintext blocks
-	 * output:
-	 *	%xmm0..%xmm15: 16 encrypted blocks, order swapped:
-	 *       7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
-	 */
-	FRAME_BEGIN
-
-	leaq 8 * 16(%rax), %rcx;
-
-	inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-		      %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-		      %xmm15, %rax, %rcx);
-
-	enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-		     %xmm15, %rax, %rcx, 0);
-
-	fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-	      %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-	      %xmm15,
-	      ((key_table + (8) * 8) + 0)(CTX),
-	      ((key_table + (8) * 8) + 4)(CTX),
-	      ((key_table + (8) * 8) + 8)(CTX),
-	      ((key_table + (8) * 8) + 12)(CTX));
-
-	enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-		     %xmm15, %rax, %rcx, 8);
-
-	fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-	      %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-	      %xmm15,
-	      ((key_table + (16) * 8) + 0)(CTX),
-	      ((key_table + (16) * 8) + 4)(CTX),
-	      ((key_table + (16) * 8) + 8)(CTX),
-	      ((key_table + (16) * 8) + 12)(CTX));
-
-	enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-		     %xmm15, %rax, %rcx, 16);
-
-	movl $24, %r8d;
-	cmpl $16, key_length(CTX);
-	jne .Lenc_max32;
-
-.Lenc_done:
-	/* load CD for output */
-	vmovdqu 0 * 16(%rcx), %xmm8;
-	vmovdqu 1 * 16(%rcx), %xmm9;
-	vmovdqu 2 * 16(%rcx), %xmm10;
-	vmovdqu 3 * 16(%rcx), %xmm11;
-	vmovdqu 4 * 16(%rcx), %xmm12;
-	vmovdqu 5 * 16(%rcx), %xmm13;
-	vmovdqu 6 * 16(%rcx), %xmm14;
-	vmovdqu 7 * 16(%rcx), %xmm15;
-
-	outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-		    %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-		    %xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax));
-
-	FRAME_END
-	ret;
-
-.align 8
-.Lenc_max32:
-	movl $32, %r8d;
-
-	fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-	      %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-	      %xmm15,
-	      ((key_table + (24) * 8) + 0)(CTX),
-	      ((key_table + (24) * 8) + 4)(CTX),
-	      ((key_table + (24) * 8) + 8)(CTX),
-	      ((key_table + (24) * 8) + 12)(CTX));
-
-	enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-		     %xmm15, %rax, %rcx, 24);
-
-	jmp .Lenc_done;
-ENDPROC(__camellia_enc_blk16)
-
-.align 8
-__camellia_dec_blk16:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rax: temporary storage, 256 bytes
-	 *	%r8d: 24 for 16 byte key, 32 for larger
-	 *	%xmm0..%xmm15: 16 encrypted blocks
-	 * output:
-	 *	%xmm0..%xmm15: 16 plaintext blocks, order swapped:
-	 *       7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
-	 */
-	FRAME_BEGIN
-
-	leaq 8 * 16(%rax), %rcx;
-
-	inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-		      %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-		      %xmm15, %rax, %rcx);
-
-	cmpl $32, %r8d;
-	je .Ldec_max32;
-
-.Ldec_max24:
-	dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-		     %xmm15, %rax, %rcx, 16);
-
-	fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-	      %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-	      %xmm15,
-	      ((key_table + (16) * 8) + 8)(CTX),
-	      ((key_table + (16) * 8) + 12)(CTX),
-	      ((key_table + (16) * 8) + 0)(CTX),
-	      ((key_table + (16) * 8) + 4)(CTX));
-
-	dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-		     %xmm15, %rax, %rcx, 8);
-
-	fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-	      %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-	      %xmm15,
-	      ((key_table + (8) * 8) + 8)(CTX),
-	      ((key_table + (8) * 8) + 12)(CTX),
-	      ((key_table + (8) * 8) + 0)(CTX),
-	      ((key_table + (8) * 8) + 4)(CTX));
-
-	dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-		     %xmm15, %rax, %rcx, 0);
-
-	/* load CD for output */
-	vmovdqu 0 * 16(%rcx), %xmm8;
-	vmovdqu 1 * 16(%rcx), %xmm9;
-	vmovdqu 2 * 16(%rcx), %xmm10;
-	vmovdqu 3 * 16(%rcx), %xmm11;
-	vmovdqu 4 * 16(%rcx), %xmm12;
-	vmovdqu 5 * 16(%rcx), %xmm13;
-	vmovdqu 6 * 16(%rcx), %xmm14;
-	vmovdqu 7 * 16(%rcx), %xmm15;
-
-	outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-		    %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-		    %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax));
-
-	FRAME_END
-	ret;
-
-.align 8
-.Ldec_max32:
-	dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-		     %xmm15, %rax, %rcx, 24);
-
-	fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-	      %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-	      %xmm15,
-	      ((key_table + (24) * 8) + 8)(CTX),
-	      ((key_table + (24) * 8) + 12)(CTX),
-	      ((key_table + (24) * 8) + 0)(CTX),
-	      ((key_table + (24) * 8) + 4)(CTX));
-
-	jmp .Ldec_max24;
-ENDPROC(__camellia_dec_blk16)
-
-ENTRY(camellia_ecb_enc_16way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst (16 blocks)
-	 *	%rdx: src (16 blocks)
-	 */
-	 FRAME_BEGIN
-
-	inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-		     %xmm15, %rdx, (key_table)(CTX));
-
-	/* now dst can be used as temporary buffer (even in src == dst case) */
-	movq	%rsi, %rax;
-
-	call __camellia_enc_blk16;
-
-	write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
-		     %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
-		     %xmm8, %rsi);
-
-	FRAME_END
-	ret;
-ENDPROC(camellia_ecb_enc_16way)
-
-ENTRY(camellia_ecb_dec_16way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst (16 blocks)
-	 *	%rdx: src (16 blocks)
-	 */
-	 FRAME_BEGIN
-
-	cmpl $16, key_length(CTX);
-	movl $32, %r8d;
-	movl $24, %eax;
-	cmovel %eax, %r8d; /* max */
-
-	inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-		     %xmm15, %rdx, (key_table)(CTX, %r8, 8));
-
-	/* now dst can be used as temporary buffer (even in src == dst case) */
-	movq	%rsi, %rax;
-
-	call __camellia_dec_blk16;
-
-	write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
-		     %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
-		     %xmm8, %rsi);
-
-	FRAME_END
-	ret;
-ENDPROC(camellia_ecb_dec_16way)
-
-ENTRY(camellia_cbc_dec_16way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst (16 blocks)
-	 *	%rdx: src (16 blocks)
-	 */
-	FRAME_BEGIN
-
-	cmpl $16, key_length(CTX);
-	movl $32, %r8d;
-	movl $24, %eax;
-	cmovel %eax, %r8d; /* max */
-
-	inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
-		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
-		     %xmm15, %rdx, (key_table)(CTX, %r8, 8));
-
-	/*
-	 * dst might still be in-use (in case dst == src), so use stack for
-	 * temporary storage.
-	 */
-	subq $(16 * 16), %rsp;
-	movq %rsp, %rax;
-
-	call __camellia_dec_blk16;
-
-	addq $(16 * 16), %rsp;
-
-	vpxor (0 * 16)(%rdx), %xmm6, %xmm6;
-	vpxor (1 * 16)(%rdx), %xmm5, %xmm5;
-	vpxor (2 * 16)(%rdx), %xmm4, %xmm4;
-	vpxor (3 * 16)(%rdx), %xmm3, %xmm3;
-	vpxor (4 * 16)(%rdx), %xmm2, %xmm2;
-	vpxor (5 * 16)(%rdx), %xmm1, %xmm1;
-	vpxor (6 * 16)(%rdx), %xmm0, %xmm0;
-	vpxor (7 * 16)(%rdx), %xmm15, %xmm15;
-	vpxor (8 * 16)(%rdx), %xmm14, %xmm14;
-	vpxor (9 * 16)(%rdx), %xmm13, %xmm13;
-	vpxor (10 * 16)(%rdx), %xmm12, %xmm12;
-	vpxor (11 * 16)(%rdx), %xmm11, %xmm11;
-	vpxor (12 * 16)(%rdx), %xmm10, %xmm10;
-	vpxor (13 * 16)(%rdx), %xmm9, %xmm9;
-	vpxor (14 * 16)(%rdx), %xmm8, %xmm8;
-	write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
-		     %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
-		     %xmm8, %rsi);
-
-	FRAME_END
-	ret;
-ENDPROC(camellia_cbc_dec_16way)
-
-#define inc_le128(x, minus_one, tmp) \
-	vpcmpeqq minus_one, x, tmp; \
-	vpsubq minus_one, x, x; \
-	vpslldq $8, tmp, tmp; \
-	vpsubq tmp, x, x;
-
-ENTRY(camellia_ctr_16way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst (16 blocks)
-	 *	%rdx: src (16 blocks)
-	 *	%rcx: iv (little endian, 128bit)
-	 */
-	FRAME_BEGIN
-
-	subq $(16 * 16), %rsp;
-	movq %rsp, %rax;
-
-	vmovdqa .Lbswap128_mask, %xmm14;
-
-	/* load IV and byteswap */
-	vmovdqu (%rcx), %xmm0;
-	vpshufb %xmm14, %xmm0, %xmm15;
-	vmovdqu %xmm15, 15 * 16(%rax);
-
-	vpcmpeqd %xmm15, %xmm15, %xmm15;
-	vpsrldq $8, %xmm15, %xmm15; /* low: -1, high: 0 */
-
-	/* construct IVs */
-	inc_le128(%xmm0, %xmm15, %xmm13);
-	vpshufb %xmm14, %xmm0, %xmm13;
-	vmovdqu %xmm13, 14 * 16(%rax);
-	inc_le128(%xmm0, %xmm15, %xmm13);
-	vpshufb %xmm14, %xmm0, %xmm13;
-	vmovdqu %xmm13, 13 * 16(%rax);
-	inc_le128(%xmm0, %xmm15, %xmm13);
-	vpshufb %xmm14, %xmm0, %xmm12;
-	inc_le128(%xmm0, %xmm15, %xmm13);
-	vpshufb %xmm14, %xmm0, %xmm11;
-	inc_le128(%xmm0, %xmm15, %xmm13);
-	vpshufb %xmm14, %xmm0, %xmm10;
-	inc_le128(%xmm0, %xmm15, %xmm13);
-	vpshufb %xmm14, %xmm0, %xmm9;
-	inc_le128(%xmm0, %xmm15, %xmm13);
-	vpshufb %xmm14, %xmm0, %xmm8;
-	inc_le128(%xmm0, %xmm15, %xmm13);
-	vpshufb %xmm14, %xmm0, %xmm7;
-	inc_le128(%xmm0, %xmm15, %xmm13);
-	vpshufb %xmm14, %xmm0, %xmm6;
-	inc_le128(%xmm0, %xmm15, %xmm13);
-	vpshufb %xmm14, %xmm0, %xmm5;
-	inc_le128(%xmm0, %xmm15, %xmm13);
-	vpshufb %xmm14, %xmm0, %xmm4;
-	inc_le128(%xmm0, %xmm15, %xmm13);
-	vpshufb %xmm14, %xmm0, %xmm3;
-	inc_le128(%xmm0, %xmm15, %xmm13);
-	vpshufb %xmm14, %xmm0, %xmm2;
-	inc_le128(%xmm0, %xmm15, %xmm13);
-	vpshufb %xmm14, %xmm0, %xmm1;
-	inc_le128(%xmm0, %xmm15, %xmm13);
-	vmovdqa %xmm0, %xmm13;
-	vpshufb %xmm14, %xmm0, %xmm0;
-	inc_le128(%xmm13, %xmm15, %xmm14);
-	vmovdqu %xmm13, (%rcx);
-
-	/* inpack16_pre: */
-	vmovq (key_table)(CTX), %xmm15;
-	vpshufb .Lpack_bswap, %xmm15, %xmm15;
-	vpxor %xmm0, %xmm15, %xmm0;
-	vpxor %xmm1, %xmm15, %xmm1;
-	vpxor %xmm2, %xmm15, %xmm2;
-	vpxor %xmm3, %xmm15, %xmm3;
-	vpxor %xmm4, %xmm15, %xmm4;
-	vpxor %xmm5, %xmm15, %xmm5;
-	vpxor %xmm6, %xmm15, %xmm6;
-	vpxor %xmm7, %xmm15, %xmm7;
-	vpxor %xmm8, %xmm15, %xmm8;
-	vpxor %xmm9, %xmm15, %xmm9;
-	vpxor %xmm10, %xmm15, %xmm10;
-	vpxor %xmm11, %xmm15, %xmm11;
-	vpxor %xmm12, %xmm15, %xmm12;
-	vpxor 13 * 16(%rax), %xmm15, %xmm13;
-	vpxor 14 * 16(%rax), %xmm15, %xmm14;
-	vpxor 15 * 16(%rax), %xmm15, %xmm15;
-
-	call __camellia_enc_blk16;
-
-	addq $(16 * 16), %rsp;
-
-	vpxor 0 * 16(%rdx), %xmm7, %xmm7;
-	vpxor 1 * 16(%rdx), %xmm6, %xmm6;
-	vpxor 2 * 16(%rdx), %xmm5, %xmm5;
-	vpxor 3 * 16(%rdx), %xmm4, %xmm4;
-	vpxor 4 * 16(%rdx), %xmm3, %xmm3;
-	vpxor 5 * 16(%rdx), %xmm2, %xmm2;
-	vpxor 6 * 16(%rdx), %xmm1, %xmm1;
-	vpxor 7 * 16(%rdx), %xmm0, %xmm0;
-	vpxor 8 * 16(%rdx), %xmm15, %xmm15;
-	vpxor 9 * 16(%rdx), %xmm14, %xmm14;
-	vpxor 10 * 16(%rdx), %xmm13, %xmm13;
-	vpxor 11 * 16(%rdx), %xmm12, %xmm12;
-	vpxor 12 * 16(%rdx), %xmm11, %xmm11;
-	vpxor 13 * 16(%rdx), %xmm10, %xmm10;
-	vpxor 14 * 16(%rdx), %xmm9, %xmm9;
-	vpxor 15 * 16(%rdx), %xmm8, %xmm8;
-	write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
-		     %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
-		     %xmm8, %rsi);
-
-	FRAME_END
-	ret;
-ENDPROC(camellia_ctr_16way)
-
-#define gf128mul_x_ble(iv, mask, tmp) \
-	vpsrad $31, iv, tmp; \
-	vpaddq iv, iv, iv; \
-	vpshufd $0x13, tmp, tmp; \
-	vpand mask, tmp, tmp; \
-	vpxor tmp, iv, iv;
-
-.align 8
-camellia_xts_crypt_16way:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst (16 blocks)
-	 *	%rdx: src (16 blocks)
-	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
-	 *	%r8: index for input whitening key
-	 *	%r9: pointer to  __camellia_enc_blk16 or __camellia_dec_blk16
-	 */
-	FRAME_BEGIN
-
-	subq $(16 * 16), %rsp;
-	movq %rsp, %rax;
-
-	vmovdqa .Lxts_gf128mul_and_shl1_mask, %xmm14;
-
-	/* load IV */
-	vmovdqu (%rcx), %xmm0;
-	vpxor 0 * 16(%rdx), %xmm0, %xmm15;
-	vmovdqu %xmm15, 15 * 16(%rax);
-	vmovdqu %xmm0, 0 * 16(%rsi);
-
-	/* construct IVs */
-	gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
-	vpxor 1 * 16(%rdx), %xmm0, %xmm15;
-	vmovdqu %xmm15, 14 * 16(%rax);
-	vmovdqu %xmm0, 1 * 16(%rsi);
-
-	gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
-	vpxor 2 * 16(%rdx), %xmm0, %xmm13;
-	vmovdqu %xmm0, 2 * 16(%rsi);
-
-	gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
-	vpxor 3 * 16(%rdx), %xmm0, %xmm12;
-	vmovdqu %xmm0, 3 * 16(%rsi);
-
-	gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
-	vpxor 4 * 16(%rdx), %xmm0, %xmm11;
-	vmovdqu %xmm0, 4 * 16(%rsi);
-
-	gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
-	vpxor 5 * 16(%rdx), %xmm0, %xmm10;
-	vmovdqu %xmm0, 5 * 16(%rsi);
-
-	gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
-	vpxor 6 * 16(%rdx), %xmm0, %xmm9;
-	vmovdqu %xmm0, 6 * 16(%rsi);
-
-	gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
-	vpxor 7 * 16(%rdx), %xmm0, %xmm8;
-	vmovdqu %xmm0, 7 * 16(%rsi);
-
-	gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
-	vpxor 8 * 16(%rdx), %xmm0, %xmm7;
-	vmovdqu %xmm0, 8 * 16(%rsi);
-
-	gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
-	vpxor 9 * 16(%rdx), %xmm0, %xmm6;
-	vmovdqu %xmm0, 9 * 16(%rsi);
-
-	gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
-	vpxor 10 * 16(%rdx), %xmm0, %xmm5;
-	vmovdqu %xmm0, 10 * 16(%rsi);
-
-	gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
-	vpxor 11 * 16(%rdx), %xmm0, %xmm4;
-	vmovdqu %xmm0, 11 * 16(%rsi);
-
-	gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
-	vpxor 12 * 16(%rdx), %xmm0, %xmm3;
-	vmovdqu %xmm0, 12 * 16(%rsi);
-
-	gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
-	vpxor 13 * 16(%rdx), %xmm0, %xmm2;
-	vmovdqu %xmm0, 13 * 16(%rsi);
-
-	gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
-	vpxor 14 * 16(%rdx), %xmm0, %xmm1;
-	vmovdqu %xmm0, 14 * 16(%rsi);
-
-	gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
-	vpxor 15 * 16(%rdx), %xmm0, %xmm15;
-	vmovdqu %xmm15, 0 * 16(%rax);
-	vmovdqu %xmm0, 15 * 16(%rsi);
-
-	gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
-	vmovdqu %xmm0, (%rcx);
-
-	/* inpack16_pre: */
-	vmovq (key_table)(CTX, %r8, 8), %xmm15;
-	vpshufb .Lpack_bswap, %xmm15, %xmm15;
-	vpxor 0 * 16(%rax), %xmm15, %xmm0;
-	vpxor %xmm1, %xmm15, %xmm1;
-	vpxor %xmm2, %xmm15, %xmm2;
-	vpxor %xmm3, %xmm15, %xmm3;
-	vpxor %xmm4, %xmm15, %xmm4;
-	vpxor %xmm5, %xmm15, %xmm5;
-	vpxor %xmm6, %xmm15, %xmm6;
-	vpxor %xmm7, %xmm15, %xmm7;
-	vpxor %xmm8, %xmm15, %xmm8;
-	vpxor %xmm9, %xmm15, %xmm9;
-	vpxor %xmm10, %xmm15, %xmm10;
-	vpxor %xmm11, %xmm15, %xmm11;
-	vpxor %xmm12, %xmm15, %xmm12;
-	vpxor %xmm13, %xmm15, %xmm13;
-	vpxor 14 * 16(%rax), %xmm15, %xmm14;
-	vpxor 15 * 16(%rax), %xmm15, %xmm15;
-
-	CALL_NOSPEC %r9;
-
-	addq $(16 * 16), %rsp;
-
-	vpxor 0 * 16(%rsi), %xmm7, %xmm7;
-	vpxor 1 * 16(%rsi), %xmm6, %xmm6;
-	vpxor 2 * 16(%rsi), %xmm5, %xmm5;
-	vpxor 3 * 16(%rsi), %xmm4, %xmm4;
-	vpxor 4 * 16(%rsi), %xmm3, %xmm3;
-	vpxor 5 * 16(%rsi), %xmm2, %xmm2;
-	vpxor 6 * 16(%rsi), %xmm1, %xmm1;
-	vpxor 7 * 16(%rsi), %xmm0, %xmm0;
-	vpxor 8 * 16(%rsi), %xmm15, %xmm15;
-	vpxor 9 * 16(%rsi), %xmm14, %xmm14;
-	vpxor 10 * 16(%rsi), %xmm13, %xmm13;
-	vpxor 11 * 16(%rsi), %xmm12, %xmm12;
-	vpxor 12 * 16(%rsi), %xmm11, %xmm11;
-	vpxor 13 * 16(%rsi), %xmm10, %xmm10;
-	vpxor 14 * 16(%rsi), %xmm9, %xmm9;
-	vpxor 15 * 16(%rsi), %xmm8, %xmm8;
-	write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
-		     %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
-		     %xmm8, %rsi);
-
-	FRAME_END
-	ret;
-ENDPROC(camellia_xts_crypt_16way)
-
-ENTRY(camellia_xts_enc_16way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst (16 blocks)
-	 *	%rdx: src (16 blocks)
-	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
-	 */
-	xorl %r8d, %r8d; /* input whitening key, 0 for enc */
-
-	leaq __camellia_enc_blk16, %r9;
-
-	jmp camellia_xts_crypt_16way;
-ENDPROC(camellia_xts_enc_16way)
-
-ENTRY(camellia_xts_dec_16way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst (16 blocks)
-	 *	%rdx: src (16 blocks)
-	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
-	 */
-
-	cmpl $16, key_length(CTX);
-	movl $32, %r8d;
-	movl $24, %eax;
-	cmovel %eax, %r8d;  /* input whitening key, last for dec */
-
-	leaq __camellia_dec_blk16, %r9;
-
-	jmp camellia_xts_crypt_16way;
-ENDPROC(camellia_xts_dec_16way)
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
deleted file mode 100644
index 4be4c7c3ba27370158fc80e339ffba8be87eccec..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ /dev/null
@@ -1,1403 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * x86_64/AVX2/AES-NI assembler implementation of Camellia
- *
- * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include <asm/nospec-branch.h>
-
-#define CAMELLIA_TABLE_BYTE_LEN 272
-
-/* struct camellia_ctx: */
-#define key_table 0
-#define key_length CAMELLIA_TABLE_BYTE_LEN
-
-/* register macros */
-#define CTX %rdi
-#define RIO %r8
-
-/**********************************************************************
-  helper macros
- **********************************************************************/
-#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \
-	vpand x, mask4bit, tmp0; \
-	vpandn x, mask4bit, x; \
-	vpsrld $4, x, x; \
-	\
-	vpshufb tmp0, lo_t, tmp0; \
-	vpshufb x, hi_t, x; \
-	vpxor tmp0, x, x;
-
-#define ymm0_x xmm0
-#define ymm1_x xmm1
-#define ymm2_x xmm2
-#define ymm3_x xmm3
-#define ymm4_x xmm4
-#define ymm5_x xmm5
-#define ymm6_x xmm6
-#define ymm7_x xmm7
-#define ymm8_x xmm8
-#define ymm9_x xmm9
-#define ymm10_x xmm10
-#define ymm11_x xmm11
-#define ymm12_x xmm12
-#define ymm13_x xmm13
-#define ymm14_x xmm14
-#define ymm15_x xmm15
-
-/**********************************************************************
-  32-way camellia
- **********************************************************************/
-
-/*
- * IN:
- *   x0..x7: byte-sliced AB state
- *   mem_cd: register pointer storing CD state
- *   key: index for key material
- * OUT:
- *   x0..x7: new byte-sliced CD state
- */
-#define roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, \
-		  t7, mem_cd, key) \
-	/* \
-	 * S-function with AES subbytes \
-	 */ \
-	vbroadcasti128 .Linv_shift_row, t4; \
-	vpbroadcastd .L0f0f0f0f, t7; \
-	vbroadcasti128 .Lpre_tf_lo_s1, t5; \
-	vbroadcasti128 .Lpre_tf_hi_s1, t6; \
-	vbroadcasti128 .Lpre_tf_lo_s4, t2; \
-	vbroadcasti128 .Lpre_tf_hi_s4, t3; \
-	\
-	/* AES inverse shift rows */ \
-	vpshufb t4, x0, x0; \
-	vpshufb t4, x7, x7; \
-	vpshufb t4, x3, x3; \
-	vpshufb t4, x6, x6; \
-	vpshufb t4, x2, x2; \
-	vpshufb t4, x5, x5; \
-	vpshufb t4, x1, x1; \
-	vpshufb t4, x4, x4; \
-	\
-	/* prefilter sboxes 1, 2 and 3 */ \
-	/* prefilter sbox 4 */ \
-	filter_8bit(x0, t5, t6, t7, t4); \
-	filter_8bit(x7, t5, t6, t7, t4); \
-	vextracti128 $1, x0, t0##_x; \
-	vextracti128 $1, x7, t1##_x; \
-	filter_8bit(x3, t2, t3, t7, t4); \
-	filter_8bit(x6, t2, t3, t7, t4); \
-	vextracti128 $1, x3, t3##_x; \
-	vextracti128 $1, x6, t2##_x; \
-	filter_8bit(x2, t5, t6, t7, t4); \
-	filter_8bit(x5, t5, t6, t7, t4); \
-	filter_8bit(x1, t5, t6, t7, t4); \
-	filter_8bit(x4, t5, t6, t7, t4); \
-	\
-	vpxor t4##_x, t4##_x, t4##_x; \
-	\
-	/* AES subbytes + AES shift rows */ \
-	vextracti128 $1, x2, t6##_x; \
-	vextracti128 $1, x5, t5##_x; \
-	vaesenclast t4##_x, x0##_x, x0##_x; \
-	vaesenclast t4##_x, t0##_x, t0##_x; \
-	vinserti128 $1, t0##_x, x0, x0; \
-	vaesenclast t4##_x, x7##_x, x7##_x; \
-	vaesenclast t4##_x, t1##_x, t1##_x; \
-	vinserti128 $1, t1##_x, x7, x7; \
-	vaesenclast t4##_x, x3##_x, x3##_x; \
-	vaesenclast t4##_x, t3##_x, t3##_x; \
-	vinserti128 $1, t3##_x, x3, x3; \
-	vaesenclast t4##_x, x6##_x, x6##_x; \
-	vaesenclast t4##_x, t2##_x, t2##_x; \
-	vinserti128 $1, t2##_x, x6, x6; \
-	vextracti128 $1, x1, t3##_x; \
-	vextracti128 $1, x4, t2##_x; \
-	vbroadcasti128 .Lpost_tf_lo_s1, t0; \
-	vbroadcasti128 .Lpost_tf_hi_s1, t1; \
-	vaesenclast t4##_x, x2##_x, x2##_x; \
-	vaesenclast t4##_x, t6##_x, t6##_x; \
-	vinserti128 $1, t6##_x, x2, x2; \
-	vaesenclast t4##_x, x5##_x, x5##_x; \
-	vaesenclast t4##_x, t5##_x, t5##_x; \
-	vinserti128 $1, t5##_x, x5, x5; \
-	vaesenclast t4##_x, x1##_x, x1##_x; \
-	vaesenclast t4##_x, t3##_x, t3##_x; \
-	vinserti128 $1, t3##_x, x1, x1; \
-	vaesenclast t4##_x, x4##_x, x4##_x; \
-	vaesenclast t4##_x, t2##_x, t2##_x; \
-	vinserti128 $1, t2##_x, x4, x4; \
-	\
-	/* postfilter sboxes 1 and 4 */ \
-	vbroadcasti128 .Lpost_tf_lo_s3, t2; \
-	vbroadcasti128 .Lpost_tf_hi_s3, t3; \
-	filter_8bit(x0, t0, t1, t7, t6); \
-	filter_8bit(x7, t0, t1, t7, t6); \
-	filter_8bit(x3, t0, t1, t7, t6); \
-	filter_8bit(x6, t0, t1, t7, t6); \
-	\
-	/* postfilter sbox 3 */ \
-	vbroadcasti128 .Lpost_tf_lo_s2, t4; \
-	vbroadcasti128 .Lpost_tf_hi_s2, t5; \
-	filter_8bit(x2, t2, t3, t7, t6); \
-	filter_8bit(x5, t2, t3, t7, t6); \
-	\
-	vpbroadcastq key, t0; /* higher 64-bit duplicate ignored */ \
-	\
-	/* postfilter sbox 2 */ \
-	filter_8bit(x1, t4, t5, t7, t2); \
-	filter_8bit(x4, t4, t5, t7, t2); \
-	vpxor t7, t7, t7; \
-	\
-	vpsrldq $1, t0, t1; \
-	vpsrldq $2, t0, t2; \
-	vpshufb t7, t1, t1; \
-	vpsrldq $3, t0, t3; \
-	\
-	/* P-function */ \
-	vpxor x5, x0, x0; \
-	vpxor x6, x1, x1; \
-	vpxor x7, x2, x2; \
-	vpxor x4, x3, x3; \
-	\
-	vpshufb t7, t2, t2; \
-	vpsrldq $4, t0, t4; \
-	vpshufb t7, t3, t3; \
-	vpsrldq $5, t0, t5; \
-	vpshufb t7, t4, t4; \
-	\
-	vpxor x2, x4, x4; \
-	vpxor x3, x5, x5; \
-	vpxor x0, x6, x6; \
-	vpxor x1, x7, x7; \
-	\
-	vpsrldq $6, t0, t6; \
-	vpshufb t7, t5, t5; \
-	vpshufb t7, t6, t6; \
-	\
-	vpxor x7, x0, x0; \
-	vpxor x4, x1, x1; \
-	vpxor x5, x2, x2; \
-	vpxor x6, x3, x3; \
-	\
-	vpxor x3, x4, x4; \
-	vpxor x0, x5, x5; \
-	vpxor x1, x6, x6; \
-	vpxor x2, x7, x7; /* note: high and low parts swapped */ \
-	\
-	/* Add key material and result to CD (x becomes new CD) */ \
-	\
-	vpxor t6, x1, x1; \
-	vpxor 5 * 32(mem_cd), x1, x1; \
-	\
-	vpsrldq $7, t0, t6; \
-	vpshufb t7, t0, t0; \
-	vpshufb t7, t6, t7; \
-	\
-	vpxor t7, x0, x0; \
-	vpxor 4 * 32(mem_cd), x0, x0; \
-	\
-	vpxor t5, x2, x2; \
-	vpxor 6 * 32(mem_cd), x2, x2; \
-	\
-	vpxor t4, x3, x3; \
-	vpxor 7 * 32(mem_cd), x3, x3; \
-	\
-	vpxor t3, x4, x4; \
-	vpxor 0 * 32(mem_cd), x4, x4; \
-	\
-	vpxor t2, x5, x5; \
-	vpxor 1 * 32(mem_cd), x5, x5; \
-	\
-	vpxor t1, x6, x6; \
-	vpxor 2 * 32(mem_cd), x6, x6; \
-	\
-	vpxor t0, x7, x7; \
-	vpxor 3 * 32(mem_cd), x7, x7;
-
-/*
- * Size optimization... with inlined roundsm32 binary would be over 5 times
- * larger and would only marginally faster.
- */
-.align 8
-roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
-	roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-		  %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15,
-		  %rcx, (%r9));
-	ret;
-ENDPROC(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
-
-.align 8
-roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
-	roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3,
-		  %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11,
-		  %rax, (%r9));
-	ret;
-ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
-
-/*
- * IN/OUT:
- *  x0..x7: byte-sliced AB state preloaded
- *  mem_ab: byte-sliced AB state in memory
- *  mem_cb: byte-sliced CD state in memory
- */
-#define two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, i, dir, store_ab) \
-	leaq (key_table + (i) * 8)(CTX), %r9; \
-	call roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd; \
-	\
-	vmovdqu x0, 4 * 32(mem_cd); \
-	vmovdqu x1, 5 * 32(mem_cd); \
-	vmovdqu x2, 6 * 32(mem_cd); \
-	vmovdqu x3, 7 * 32(mem_cd); \
-	vmovdqu x4, 0 * 32(mem_cd); \
-	vmovdqu x5, 1 * 32(mem_cd); \
-	vmovdqu x6, 2 * 32(mem_cd); \
-	vmovdqu x7, 3 * 32(mem_cd); \
-	\
-	leaq (key_table + ((i) + (dir)) * 8)(CTX), %r9; \
-	call roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab; \
-	\
-	store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab);
-
-#define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */
-
-#define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \
-	/* Store new AB state */ \
-	vmovdqu x4, 4 * 32(mem_ab); \
-	vmovdqu x5, 5 * 32(mem_ab); \
-	vmovdqu x6, 6 * 32(mem_ab); \
-	vmovdqu x7, 7 * 32(mem_ab); \
-	vmovdqu x0, 0 * 32(mem_ab); \
-	vmovdqu x1, 1 * 32(mem_ab); \
-	vmovdqu x2, 2 * 32(mem_ab); \
-	vmovdqu x3, 3 * 32(mem_ab);
-
-#define enc_rounds32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, i) \
-	two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \
-	two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \
-	two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store);
-
-#define dec_rounds32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, i) \
-	two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \
-	two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \
-	two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store);
-
-/*
- * IN:
- *  v0..3: byte-sliced 32-bit integers
- * OUT:
- *  v0..3: (IN <<< 1)
- */
-#define rol32_1_32(v0, v1, v2, v3, t0, t1, t2, zero) \
-	vpcmpgtb v0, zero, t0; \
-	vpaddb v0, v0, v0; \
-	vpabsb t0, t0; \
-	\
-	vpcmpgtb v1, zero, t1; \
-	vpaddb v1, v1, v1; \
-	vpabsb t1, t1; \
-	\
-	vpcmpgtb v2, zero, t2; \
-	vpaddb v2, v2, v2; \
-	vpabsb t2, t2; \
-	\
-	vpor t0, v1, v1; \
-	\
-	vpcmpgtb v3, zero, t0; \
-	vpaddb v3, v3, v3; \
-	vpabsb t0, t0; \
-	\
-	vpor t1, v2, v2; \
-	vpor t2, v3, v3; \
-	vpor t0, v0, v0;
-
-/*
- * IN:
- *   r: byte-sliced AB state in memory
- *   l: byte-sliced CD state in memory
- * OUT:
- *   x0..x7: new byte-sliced CD state
- */
-#define fls32(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \
-	      tt1, tt2, tt3, kll, klr, krl, krr) \
-	/* \
-	 * t0 = kll; \
-	 * t0 &= ll; \
-	 * lr ^= rol32(t0, 1); \
-	 */ \
-	vpbroadcastd kll, t0; /* only lowest 32-bit used */ \
-	vpxor tt0, tt0, tt0; \
-	vpshufb tt0, t0, t3; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t2; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t1; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t0; \
-	\
-	vpand l0, t0, t0; \
-	vpand l1, t1, t1; \
-	vpand l2, t2, t2; \
-	vpand l3, t3, t3; \
-	\
-	rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
-	\
-	vpxor l4, t0, l4; \
-	vpbroadcastd krr, t0; /* only lowest 32-bit used */ \
-	vmovdqu l4, 4 * 32(l); \
-	vpxor l5, t1, l5; \
-	vmovdqu l5, 5 * 32(l); \
-	vpxor l6, t2, l6; \
-	vmovdqu l6, 6 * 32(l); \
-	vpxor l7, t3, l7; \
-	vmovdqu l7, 7 * 32(l); \
-	\
-	/* \
-	 * t2 = krr; \
-	 * t2 |= rr; \
-	 * rl ^= t2; \
-	 */ \
-	\
-	vpshufb tt0, t0, t3; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t2; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t1; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t0; \
-	\
-	vpor 4 * 32(r), t0, t0; \
-	vpor 5 * 32(r), t1, t1; \
-	vpor 6 * 32(r), t2, t2; \
-	vpor 7 * 32(r), t3, t3; \
-	\
-	vpxor 0 * 32(r), t0, t0; \
-	vpxor 1 * 32(r), t1, t1; \
-	vpxor 2 * 32(r), t2, t2; \
-	vpxor 3 * 32(r), t3, t3; \
-	vmovdqu t0, 0 * 32(r); \
-	vpbroadcastd krl, t0; /* only lowest 32-bit used */ \
-	vmovdqu t1, 1 * 32(r); \
-	vmovdqu t2, 2 * 32(r); \
-	vmovdqu t3, 3 * 32(r); \
-	\
-	/* \
-	 * t2 = krl; \
-	 * t2 &= rl; \
-	 * rr ^= rol32(t2, 1); \
-	 */ \
-	vpshufb tt0, t0, t3; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t2; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t1; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t0; \
-	\
-	vpand 0 * 32(r), t0, t0; \
-	vpand 1 * 32(r), t1, t1; \
-	vpand 2 * 32(r), t2, t2; \
-	vpand 3 * 32(r), t3, t3; \
-	\
-	rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
-	\
-	vpxor 4 * 32(r), t0, t0; \
-	vpxor 5 * 32(r), t1, t1; \
-	vpxor 6 * 32(r), t2, t2; \
-	vpxor 7 * 32(r), t3, t3; \
-	vmovdqu t0, 4 * 32(r); \
-	vpbroadcastd klr, t0; /* only lowest 32-bit used */ \
-	vmovdqu t1, 5 * 32(r); \
-	vmovdqu t2, 6 * 32(r); \
-	vmovdqu t3, 7 * 32(r); \
-	\
-	/* \
-	 * t0 = klr; \
-	 * t0 |= lr; \
-	 * ll ^= t0; \
-	 */ \
-	\
-	vpshufb tt0, t0, t3; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t2; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t1; \
-	vpsrldq $1, t0, t0; \
-	vpshufb tt0, t0, t0; \
-	\
-	vpor l4, t0, t0; \
-	vpor l5, t1, t1; \
-	vpor l6, t2, t2; \
-	vpor l7, t3, t3; \
-	\
-	vpxor l0, t0, l0; \
-	vmovdqu l0, 0 * 32(l); \
-	vpxor l1, t1, l1; \
-	vmovdqu l1, 1 * 32(l); \
-	vpxor l2, t2, l2; \
-	vmovdqu l2, 2 * 32(l); \
-	vpxor l3, t3, l3; \
-	vmovdqu l3, 3 * 32(l);
-
-#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
-	vpunpckhdq x1, x0, t2; \
-	vpunpckldq x1, x0, x0; \
-	\
-	vpunpckldq x3, x2, t1; \
-	vpunpckhdq x3, x2, x2; \
-	\
-	vpunpckhqdq t1, x0, x1; \
-	vpunpcklqdq t1, x0, x0; \
-	\
-	vpunpckhqdq x2, t2, x3; \
-	vpunpcklqdq x2, t2, x2;
-
-#define byteslice_16x16b_fast(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, \
-			      a3, b3, c3, d3, st0, st1) \
-	vmovdqu d2, st0; \
-	vmovdqu d3, st1; \
-	transpose_4x4(a0, a1, a2, a3, d2, d3); \
-	transpose_4x4(b0, b1, b2, b3, d2, d3); \
-	vmovdqu st0, d2; \
-	vmovdqu st1, d3; \
-	\
-	vmovdqu a0, st0; \
-	vmovdqu a1, st1; \
-	transpose_4x4(c0, c1, c2, c3, a0, a1); \
-	transpose_4x4(d0, d1, d2, d3, a0, a1); \
-	\
-	vbroadcasti128 .Lshufb_16x16b, a0; \
-	vmovdqu st1, a1; \
-	vpshufb a0, a2, a2; \
-	vpshufb a0, a3, a3; \
-	vpshufb a0, b0, b0; \
-	vpshufb a0, b1, b1; \
-	vpshufb a0, b2, b2; \
-	vpshufb a0, b3, b3; \
-	vpshufb a0, a1, a1; \
-	vpshufb a0, c0, c0; \
-	vpshufb a0, c1, c1; \
-	vpshufb a0, c2, c2; \
-	vpshufb a0, c3, c3; \
-	vpshufb a0, d0, d0; \
-	vpshufb a0, d1, d1; \
-	vpshufb a0, d2, d2; \
-	vpshufb a0, d3, d3; \
-	vmovdqu d3, st1; \
-	vmovdqu st0, d3; \
-	vpshufb a0, d3, a0; \
-	vmovdqu d2, st0; \
-	\
-	transpose_4x4(a0, b0, c0, d0, d2, d3); \
-	transpose_4x4(a1, b1, c1, d1, d2, d3); \
-	vmovdqu st0, d2; \
-	vmovdqu st1, d3; \
-	\
-	vmovdqu b0, st0; \
-	vmovdqu b1, st1; \
-	transpose_4x4(a2, b2, c2, d2, b0, b1); \
-	transpose_4x4(a3, b3, c3, d3, b0, b1); \
-	vmovdqu st0, b0; \
-	vmovdqu st1, b1; \
-	/* does not adjust output bytes inside vectors */
-
-/* load blocks to registers and apply pre-whitening */
-#define inpack32_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		     y6, y7, rio, key) \
-	vpbroadcastq key, x0; \
-	vpshufb .Lpack_bswap, x0, x0; \
-	\
-	vpxor 0 * 32(rio), x0, y7; \
-	vpxor 1 * 32(rio), x0, y6; \
-	vpxor 2 * 32(rio), x0, y5; \
-	vpxor 3 * 32(rio), x0, y4; \
-	vpxor 4 * 32(rio), x0, y3; \
-	vpxor 5 * 32(rio), x0, y2; \
-	vpxor 6 * 32(rio), x0, y1; \
-	vpxor 7 * 32(rio), x0, y0; \
-	vpxor 8 * 32(rio), x0, x7; \
-	vpxor 9 * 32(rio), x0, x6; \
-	vpxor 10 * 32(rio), x0, x5; \
-	vpxor 11 * 32(rio), x0, x4; \
-	vpxor 12 * 32(rio), x0, x3; \
-	vpxor 13 * 32(rio), x0, x2; \
-	vpxor 14 * 32(rio), x0, x1; \
-	vpxor 15 * 32(rio), x0, x0;
-
-/* byteslice pre-whitened blocks and store to temporary memory */
-#define inpack32_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		      y6, y7, mem_ab, mem_cd) \
-	byteslice_16x16b_fast(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, \
-			      y4, y5, y6, y7, (mem_ab), (mem_cd)); \
-	\
-	vmovdqu x0, 0 * 32(mem_ab); \
-	vmovdqu x1, 1 * 32(mem_ab); \
-	vmovdqu x2, 2 * 32(mem_ab); \
-	vmovdqu x3, 3 * 32(mem_ab); \
-	vmovdqu x4, 4 * 32(mem_ab); \
-	vmovdqu x5, 5 * 32(mem_ab); \
-	vmovdqu x6, 6 * 32(mem_ab); \
-	vmovdqu x7, 7 * 32(mem_ab); \
-	vmovdqu y0, 0 * 32(mem_cd); \
-	vmovdqu y1, 1 * 32(mem_cd); \
-	vmovdqu y2, 2 * 32(mem_cd); \
-	vmovdqu y3, 3 * 32(mem_cd); \
-	vmovdqu y4, 4 * 32(mem_cd); \
-	vmovdqu y5, 5 * 32(mem_cd); \
-	vmovdqu y6, 6 * 32(mem_cd); \
-	vmovdqu y7, 7 * 32(mem_cd);
-
-/* de-byteslice, apply post-whitening and store blocks */
-#define outunpack32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \
-		    y5, y6, y7, key, stack_tmp0, stack_tmp1) \
-	byteslice_16x16b_fast(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, \
-			      y3, y7, x3, x7, stack_tmp0, stack_tmp1); \
-	\
-	vmovdqu x0, stack_tmp0; \
-	\
-	vpbroadcastq key, x0; \
-	vpshufb .Lpack_bswap, x0, x0; \
-	\
-	vpxor x0, y7, y7; \
-	vpxor x0, y6, y6; \
-	vpxor x0, y5, y5; \
-	vpxor x0, y4, y4; \
-	vpxor x0, y3, y3; \
-	vpxor x0, y2, y2; \
-	vpxor x0, y1, y1; \
-	vpxor x0, y0, y0; \
-	vpxor x0, x7, x7; \
-	vpxor x0, x6, x6; \
-	vpxor x0, x5, x5; \
-	vpxor x0, x4, x4; \
-	vpxor x0, x3, x3; \
-	vpxor x0, x2, x2; \
-	vpxor x0, x1, x1; \
-	vpxor stack_tmp0, x0, x0;
-
-#define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
-		     y6, y7, rio) \
-	vmovdqu x0, 0 * 32(rio); \
-	vmovdqu x1, 1 * 32(rio); \
-	vmovdqu x2, 2 * 32(rio); \
-	vmovdqu x3, 3 * 32(rio); \
-	vmovdqu x4, 4 * 32(rio); \
-	vmovdqu x5, 5 * 32(rio); \
-	vmovdqu x6, 6 * 32(rio); \
-	vmovdqu x7, 7 * 32(rio); \
-	vmovdqu y0, 8 * 32(rio); \
-	vmovdqu y1, 9 * 32(rio); \
-	vmovdqu y2, 10 * 32(rio); \
-	vmovdqu y3, 11 * 32(rio); \
-	vmovdqu y4, 12 * 32(rio); \
-	vmovdqu y5, 13 * 32(rio); \
-	vmovdqu y6, 14 * 32(rio); \
-	vmovdqu y7, 15 * 32(rio);
-
-
-.section	.rodata.cst32.shufb_16x16b, "aM", @progbits, 32
-.align 32
-#define SHUFB_BYTES(idx) \
-	0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
-.Lshufb_16x16b:
-	.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
-	.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
-
-.section	.rodata.cst32.pack_bswap, "aM", @progbits, 32
-.align 32
-.Lpack_bswap:
-	.long 0x00010203, 0x04050607, 0x80808080, 0x80808080
-	.long 0x00010203, 0x04050607, 0x80808080, 0x80808080
-
-/* NB: section is mergeable, all elements must be aligned 16-byte blocks */
-.section	.rodata.cst16, "aM", @progbits, 16
-.align 16
-
-/* For CTR-mode IV byteswap */
-.Lbswap128_mask:
-	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-
-/* For XTS mode */
-.Lxts_gf128mul_and_shl1_mask_0:
-	.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
-.Lxts_gf128mul_and_shl1_mask_1:
-	.byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
-
-/*
- * pre-SubByte transform
- *
- * pre-lookup for sbox1, sbox2, sbox3:
- *   swap_bitendianness(
- *       isom_map_camellia_to_aes(
- *           camellia_f(
- *               swap_bitendianess(in)
- *           )
- *       )
- *   )
- *
- * (note: '⊕ 0xc5' inside camellia_f())
- */
-.Lpre_tf_lo_s1:
-	.byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86
-	.byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88
-.Lpre_tf_hi_s1:
-	.byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a
-	.byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23
-
-/*
- * pre-SubByte transform
- *
- * pre-lookup for sbox4:
- *   swap_bitendianness(
- *       isom_map_camellia_to_aes(
- *           camellia_f(
- *               swap_bitendianess(in <<< 1)
- *           )
- *       )
- *   )
- *
- * (note: '⊕ 0xc5' inside camellia_f())
- */
-.Lpre_tf_lo_s4:
-	.byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25
-	.byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74
-.Lpre_tf_hi_s4:
-	.byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72
-	.byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf
-
-/*
- * post-SubByte transform
- *
- * post-lookup for sbox1, sbox4:
- *  swap_bitendianness(
- *      camellia_h(
- *          isom_map_aes_to_camellia(
- *              swap_bitendianness(
- *                  aes_inverse_affine_transform(in)
- *              )
- *          )
- *      )
- *  )
- *
- * (note: '⊕ 0x6e' inside camellia_h())
- */
-.Lpost_tf_lo_s1:
-	.byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31
-	.byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1
-.Lpost_tf_hi_s1:
-	.byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8
-	.byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c
-
-/*
- * post-SubByte transform
- *
- * post-lookup for sbox2:
- *  swap_bitendianness(
- *      camellia_h(
- *          isom_map_aes_to_camellia(
- *              swap_bitendianness(
- *                  aes_inverse_affine_transform(in)
- *              )
- *          )
- *      )
- *  ) <<< 1
- *
- * (note: '⊕ 0x6e' inside camellia_h())
- */
-.Lpost_tf_lo_s2:
-	.byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62
-	.byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3
-.Lpost_tf_hi_s2:
-	.byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51
-	.byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18
-
-/*
- * post-SubByte transform
- *
- * post-lookup for sbox3:
- *  swap_bitendianness(
- *      camellia_h(
- *          isom_map_aes_to_camellia(
- *              swap_bitendianness(
- *                  aes_inverse_affine_transform(in)
- *              )
- *          )
- *      )
- *  ) >>> 1
- *
- * (note: '⊕ 0x6e' inside camellia_h())
- */
-.Lpost_tf_lo_s3:
-	.byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98
-	.byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8
-.Lpost_tf_hi_s3:
-	.byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54
-	.byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06
-
-/* For isolating SubBytes from AESENCLAST, inverse shift row */
-.Linv_shift_row:
-	.byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
-	.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
-
-.section	.rodata.cst4.L0f0f0f0f, "aM", @progbits, 4
-.align 4
-/* 4-bit mask */
-.L0f0f0f0f:
-	.long 0x0f0f0f0f
-
-.text
-
-.align 8
-__camellia_enc_blk32:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rax: temporary storage, 512 bytes
-	 *	%ymm0..%ymm15: 32 plaintext blocks
-	 * output:
-	 *	%ymm0..%ymm15: 32 encrypted blocks, order swapped:
-	 *       7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
-	 */
-	FRAME_BEGIN
-
-	leaq 8 * 32(%rax), %rcx;
-
-	inpack32_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-		      %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-		      %ymm15, %rax, %rcx);
-
-	enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-		     %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-		     %ymm15, %rax, %rcx, 0);
-
-	fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-	      %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-	      %ymm15,
-	      ((key_table + (8) * 8) + 0)(CTX),
-	      ((key_table + (8) * 8) + 4)(CTX),
-	      ((key_table + (8) * 8) + 8)(CTX),
-	      ((key_table + (8) * 8) + 12)(CTX));
-
-	enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-		     %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-		     %ymm15, %rax, %rcx, 8);
-
-	fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-	      %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-	      %ymm15,
-	      ((key_table + (16) * 8) + 0)(CTX),
-	      ((key_table + (16) * 8) + 4)(CTX),
-	      ((key_table + (16) * 8) + 8)(CTX),
-	      ((key_table + (16) * 8) + 12)(CTX));
-
-	enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-		     %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-		     %ymm15, %rax, %rcx, 16);
-
-	movl $24, %r8d;
-	cmpl $16, key_length(CTX);
-	jne .Lenc_max32;
-
-.Lenc_done:
-	/* load CD for output */
-	vmovdqu 0 * 32(%rcx), %ymm8;
-	vmovdqu 1 * 32(%rcx), %ymm9;
-	vmovdqu 2 * 32(%rcx), %ymm10;
-	vmovdqu 3 * 32(%rcx), %ymm11;
-	vmovdqu 4 * 32(%rcx), %ymm12;
-	vmovdqu 5 * 32(%rcx), %ymm13;
-	vmovdqu 6 * 32(%rcx), %ymm14;
-	vmovdqu 7 * 32(%rcx), %ymm15;
-
-	outunpack32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-		    %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-		    %ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax));
-
-	FRAME_END
-	ret;
-
-.align 8
-.Lenc_max32:
-	movl $32, %r8d;
-
-	fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-	      %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-	      %ymm15,
-	      ((key_table + (24) * 8) + 0)(CTX),
-	      ((key_table + (24) * 8) + 4)(CTX),
-	      ((key_table + (24) * 8) + 8)(CTX),
-	      ((key_table + (24) * 8) + 12)(CTX));
-
-	enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-		     %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-		     %ymm15, %rax, %rcx, 24);
-
-	jmp .Lenc_done;
-ENDPROC(__camellia_enc_blk32)
-
-.align 8
-__camellia_dec_blk32:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rax: temporary storage, 512 bytes
-	 *	%r8d: 24 for 16 byte key, 32 for larger
-	 *	%ymm0..%ymm15: 16 encrypted blocks
-	 * output:
-	 *	%ymm0..%ymm15: 16 plaintext blocks, order swapped:
-	 *       7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
-	 */
-	FRAME_BEGIN
-
-	leaq 8 * 32(%rax), %rcx;
-
-	inpack32_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-		      %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-		      %ymm15, %rax, %rcx);
-
-	cmpl $32, %r8d;
-	je .Ldec_max32;
-
-.Ldec_max24:
-	dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-		     %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-		     %ymm15, %rax, %rcx, 16);
-
-	fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-	      %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-	      %ymm15,
-	      ((key_table + (16) * 8) + 8)(CTX),
-	      ((key_table + (16) * 8) + 12)(CTX),
-	      ((key_table + (16) * 8) + 0)(CTX),
-	      ((key_table + (16) * 8) + 4)(CTX));
-
-	dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-		     %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-		     %ymm15, %rax, %rcx, 8);
-
-	fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-	      %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-	      %ymm15,
-	      ((key_table + (8) * 8) + 8)(CTX),
-	      ((key_table + (8) * 8) + 12)(CTX),
-	      ((key_table + (8) * 8) + 0)(CTX),
-	      ((key_table + (8) * 8) + 4)(CTX));
-
-	dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-		     %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-		     %ymm15, %rax, %rcx, 0);
-
-	/* load CD for output */
-	vmovdqu 0 * 32(%rcx), %ymm8;
-	vmovdqu 1 * 32(%rcx), %ymm9;
-	vmovdqu 2 * 32(%rcx), %ymm10;
-	vmovdqu 3 * 32(%rcx), %ymm11;
-	vmovdqu 4 * 32(%rcx), %ymm12;
-	vmovdqu 5 * 32(%rcx), %ymm13;
-	vmovdqu 6 * 32(%rcx), %ymm14;
-	vmovdqu 7 * 32(%rcx), %ymm15;
-
-	outunpack32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-		    %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-		    %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax));
-
-	FRAME_END
-	ret;
-
-.align 8
-.Ldec_max32:
-	dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-		     %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-		     %ymm15, %rax, %rcx, 24);
-
-	fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-	      %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-	      %ymm15,
-	      ((key_table + (24) * 8) + 8)(CTX),
-	      ((key_table + (24) * 8) + 12)(CTX),
-	      ((key_table + (24) * 8) + 0)(CTX),
-	      ((key_table + (24) * 8) + 4)(CTX));
-
-	jmp .Ldec_max24;
-ENDPROC(__camellia_dec_blk32)
-
-ENTRY(camellia_ecb_enc_32way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst (32 blocks)
-	 *	%rdx: src (32 blocks)
-	 */
-	FRAME_BEGIN
-
-	vzeroupper;
-
-	inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-		     %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-		     %ymm15, %rdx, (key_table)(CTX));
-
-	/* now dst can be used as temporary buffer (even in src == dst case) */
-	movq	%rsi, %rax;
-
-	call __camellia_enc_blk32;
-
-	write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
-		     %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
-		     %ymm8, %rsi);
-
-	vzeroupper;
-
-	FRAME_END
-	ret;
-ENDPROC(camellia_ecb_enc_32way)
-
-ENTRY(camellia_ecb_dec_32way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst (32 blocks)
-	 *	%rdx: src (32 blocks)
-	 */
-	FRAME_BEGIN
-
-	vzeroupper;
-
-	cmpl $16, key_length(CTX);
-	movl $32, %r8d;
-	movl $24, %eax;
-	cmovel %eax, %r8d; /* max */
-
-	inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-		     %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-		     %ymm15, %rdx, (key_table)(CTX, %r8, 8));
-
-	/* now dst can be used as temporary buffer (even in src == dst case) */
-	movq	%rsi, %rax;
-
-	call __camellia_dec_blk32;
-
-	write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
-		     %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
-		     %ymm8, %rsi);
-
-	vzeroupper;
-
-	FRAME_END
-	ret;
-ENDPROC(camellia_ecb_dec_32way)
-
-ENTRY(camellia_cbc_dec_32way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst (32 blocks)
-	 *	%rdx: src (32 blocks)
-	 */
-	FRAME_BEGIN
-
-	vzeroupper;
-
-	cmpl $16, key_length(CTX);
-	movl $32, %r8d;
-	movl $24, %eax;
-	cmovel %eax, %r8d; /* max */
-
-	inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
-		     %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
-		     %ymm15, %rdx, (key_table)(CTX, %r8, 8));
-
-	movq %rsp, %r10;
-	cmpq %rsi, %rdx;
-	je .Lcbc_dec_use_stack;
-
-	/* dst can be used as temporary storage, src is not overwritten. */
-	movq %rsi, %rax;
-	jmp .Lcbc_dec_continue;
-
-.Lcbc_dec_use_stack:
-	/*
-	 * dst still in-use (because dst == src), so use stack for temporary
-	 * storage.
-	 */
-	subq $(16 * 32), %rsp;
-	movq %rsp, %rax;
-
-.Lcbc_dec_continue:
-	call __camellia_dec_blk32;
-
-	vmovdqu %ymm7, (%rax);
-	vpxor %ymm7, %ymm7, %ymm7;
-	vinserti128 $1, (%rdx), %ymm7, %ymm7;
-	vpxor (%rax), %ymm7, %ymm7;
-	movq %r10, %rsp;
-	vpxor (0 * 32 + 16)(%rdx), %ymm6, %ymm6;
-	vpxor (1 * 32 + 16)(%rdx), %ymm5, %ymm5;
-	vpxor (2 * 32 + 16)(%rdx), %ymm4, %ymm4;
-	vpxor (3 * 32 + 16)(%rdx), %ymm3, %ymm3;
-	vpxor (4 * 32 + 16)(%rdx), %ymm2, %ymm2;
-	vpxor (5 * 32 + 16)(%rdx), %ymm1, %ymm1;
-	vpxor (6 * 32 + 16)(%rdx), %ymm0, %ymm0;
-	vpxor (7 * 32 + 16)(%rdx), %ymm15, %ymm15;
-	vpxor (8 * 32 + 16)(%rdx), %ymm14, %ymm14;
-	vpxor (9 * 32 + 16)(%rdx), %ymm13, %ymm13;
-	vpxor (10 * 32 + 16)(%rdx), %ymm12, %ymm12;
-	vpxor (11 * 32 + 16)(%rdx), %ymm11, %ymm11;
-	vpxor (12 * 32 + 16)(%rdx), %ymm10, %ymm10;
-	vpxor (13 * 32 + 16)(%rdx), %ymm9, %ymm9;
-	vpxor (14 * 32 + 16)(%rdx), %ymm8, %ymm8;
-	write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
-		     %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
-		     %ymm8, %rsi);
-
-	vzeroupper;
-
-	FRAME_END
-	ret;
-ENDPROC(camellia_cbc_dec_32way)
-
-#define inc_le128(x, minus_one, tmp) \
-	vpcmpeqq minus_one, x, tmp; \
-	vpsubq minus_one, x, x; \
-	vpslldq $8, tmp, tmp; \
-	vpsubq tmp, x, x;
-
-#define add2_le128(x, minus_one, minus_two, tmp1, tmp2) \
-	vpcmpeqq minus_one, x, tmp1; \
-	vpcmpeqq minus_two, x, tmp2; \
-	vpsubq minus_two, x, x; \
-	vpor tmp2, tmp1, tmp1; \
-	vpslldq $8, tmp1, tmp1; \
-	vpsubq tmp1, x, x;
-
-ENTRY(camellia_ctr_32way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst (32 blocks)
-	 *	%rdx: src (32 blocks)
-	 *	%rcx: iv (little endian, 128bit)
-	 */
-	FRAME_BEGIN
-
-	vzeroupper;
-
-	movq %rsp, %r10;
-	cmpq %rsi, %rdx;
-	je .Lctr_use_stack;
-
-	/* dst can be used as temporary storage, src is not overwritten. */
-	movq %rsi, %rax;
-	jmp .Lctr_continue;
-
-.Lctr_use_stack:
-	subq $(16 * 32), %rsp;
-	movq %rsp, %rax;
-
-.Lctr_continue:
-	vpcmpeqd %ymm15, %ymm15, %ymm15;
-	vpsrldq $8, %ymm15, %ymm15; /* ab: -1:0 ; cd: -1:0 */
-	vpaddq %ymm15, %ymm15, %ymm12; /* ab: -2:0 ; cd: -2:0 */
-
-	/* load IV and byteswap */
-	vmovdqu (%rcx), %xmm0;
-	vmovdqa %xmm0, %xmm1;
-	inc_le128(%xmm0, %xmm15, %xmm14);
-	vbroadcasti128 .Lbswap128_mask, %ymm14;
-	vinserti128 $1, %xmm0, %ymm1, %ymm0;
-	vpshufb %ymm14, %ymm0, %ymm13;
-	vmovdqu %ymm13, 15 * 32(%rax);
-
-	/* construct IVs */
-	add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); /* ab:le2 ; cd:le3 */
-	vpshufb %ymm14, %ymm0, %ymm13;
-	vmovdqu %ymm13, 14 * 32(%rax);
-	add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
-	vpshufb %ymm14, %ymm0, %ymm13;
-	vmovdqu %ymm13, 13 * 32(%rax);
-	add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
-	vpshufb %ymm14, %ymm0, %ymm13;
-	vmovdqu %ymm13, 12 * 32(%rax);
-	add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
-	vpshufb %ymm14, %ymm0, %ymm13;
-	vmovdqu %ymm13, 11 * 32(%rax);
-	add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
-	vpshufb %ymm14, %ymm0, %ymm10;
-	add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
-	vpshufb %ymm14, %ymm0, %ymm9;
-	add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
-	vpshufb %ymm14, %ymm0, %ymm8;
-	add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
-	vpshufb %ymm14, %ymm0, %ymm7;
-	add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
-	vpshufb %ymm14, %ymm0, %ymm6;
-	add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
-	vpshufb %ymm14, %ymm0, %ymm5;
-	add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
-	vpshufb %ymm14, %ymm0, %ymm4;
-	add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
-	vpshufb %ymm14, %ymm0, %ymm3;
-	add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
-	vpshufb %ymm14, %ymm0, %ymm2;
-	add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
-	vpshufb %ymm14, %ymm0, %ymm1;
-	add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
-	vextracti128 $1, %ymm0, %xmm13;
-	vpshufb %ymm14, %ymm0, %ymm0;
-	inc_le128(%xmm13, %xmm15, %xmm14);
-	vmovdqu %xmm13, (%rcx);
-
-	/* inpack32_pre: */
-	vpbroadcastq (key_table)(CTX), %ymm15;
-	vpshufb .Lpack_bswap, %ymm15, %ymm15;
-	vpxor %ymm0, %ymm15, %ymm0;
-	vpxor %ymm1, %ymm15, %ymm1;
-	vpxor %ymm2, %ymm15, %ymm2;
-	vpxor %ymm3, %ymm15, %ymm3;
-	vpxor %ymm4, %ymm15, %ymm4;
-	vpxor %ymm5, %ymm15, %ymm5;
-	vpxor %ymm6, %ymm15, %ymm6;
-	vpxor %ymm7, %ymm15, %ymm7;
-	vpxor %ymm8, %ymm15, %ymm8;
-	vpxor %ymm9, %ymm15, %ymm9;
-	vpxor %ymm10, %ymm15, %ymm10;
-	vpxor 11 * 32(%rax), %ymm15, %ymm11;
-	vpxor 12 * 32(%rax), %ymm15, %ymm12;
-	vpxor 13 * 32(%rax), %ymm15, %ymm13;
-	vpxor 14 * 32(%rax), %ymm15, %ymm14;
-	vpxor 15 * 32(%rax), %ymm15, %ymm15;
-
-	call __camellia_enc_blk32;
-
-	movq %r10, %rsp;
-
-	vpxor 0 * 32(%rdx), %ymm7, %ymm7;
-	vpxor 1 * 32(%rdx), %ymm6, %ymm6;
-	vpxor 2 * 32(%rdx), %ymm5, %ymm5;
-	vpxor 3 * 32(%rdx), %ymm4, %ymm4;
-	vpxor 4 * 32(%rdx), %ymm3, %ymm3;
-	vpxor 5 * 32(%rdx), %ymm2, %ymm2;
-	vpxor 6 * 32(%rdx), %ymm1, %ymm1;
-	vpxor 7 * 32(%rdx), %ymm0, %ymm0;
-	vpxor 8 * 32(%rdx), %ymm15, %ymm15;
-	vpxor 9 * 32(%rdx), %ymm14, %ymm14;
-	vpxor 10 * 32(%rdx), %ymm13, %ymm13;
-	vpxor 11 * 32(%rdx), %ymm12, %ymm12;
-	vpxor 12 * 32(%rdx), %ymm11, %ymm11;
-	vpxor 13 * 32(%rdx), %ymm10, %ymm10;
-	vpxor 14 * 32(%rdx), %ymm9, %ymm9;
-	vpxor 15 * 32(%rdx), %ymm8, %ymm8;
-	write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
-		     %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
-		     %ymm8, %rsi);
-
-	vzeroupper;
-
-	FRAME_END
-	ret;
-ENDPROC(camellia_ctr_32way)
-
-#define gf128mul_x_ble(iv, mask, tmp) \
-	vpsrad $31, iv, tmp; \
-	vpaddq iv, iv, iv; \
-	vpshufd $0x13, tmp, tmp; \
-	vpand mask, tmp, tmp; \
-	vpxor tmp, iv, iv;
-
-#define gf128mul_x2_ble(iv, mask1, mask2, tmp0, tmp1) \
-	vpsrad $31, iv, tmp0; \
-	vpaddq iv, iv, tmp1; \
-	vpsllq $2, iv, iv; \
-	vpshufd $0x13, tmp0, tmp0; \
-	vpsrad $31, tmp1, tmp1; \
-	vpand mask2, tmp0, tmp0; \
-	vpshufd $0x13, tmp1, tmp1; \
-	vpxor tmp0, iv, iv; \
-	vpand mask1, tmp1, tmp1; \
-	vpxor tmp1, iv, iv;
-
-.align 8
-camellia_xts_crypt_32way:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst (32 blocks)
-	 *	%rdx: src (32 blocks)
-	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
-	 *	%r8: index for input whitening key
-	 *	%r9: pointer to  __camellia_enc_blk32 or __camellia_dec_blk32
-	 */
-	FRAME_BEGIN
-
-	vzeroupper;
-
-	subq $(16 * 32), %rsp;
-	movq %rsp, %rax;
-
-	vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_0, %ymm12;
-
-	/* load IV and construct second IV */
-	vmovdqu (%rcx), %xmm0;
-	vmovdqa %xmm0, %xmm15;
-	gf128mul_x_ble(%xmm0, %xmm12, %xmm13);
-	vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_1, %ymm13;
-	vinserti128 $1, %xmm0, %ymm15, %ymm0;
-	vpxor 0 * 32(%rdx), %ymm0, %ymm15;
-	vmovdqu %ymm15, 15 * 32(%rax);
-	vmovdqu %ymm0, 0 * 32(%rsi);
-
-	/* construct IVs */
-	gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
-	vpxor 1 * 32(%rdx), %ymm0, %ymm15;
-	vmovdqu %ymm15, 14 * 32(%rax);
-	vmovdqu %ymm0, 1 * 32(%rsi);
-
-	gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
-	vpxor 2 * 32(%rdx), %ymm0, %ymm15;
-	vmovdqu %ymm15, 13 * 32(%rax);
-	vmovdqu %ymm0, 2 * 32(%rsi);
-
-	gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
-	vpxor 3 * 32(%rdx), %ymm0, %ymm15;
-	vmovdqu %ymm15, 12 * 32(%rax);
-	vmovdqu %ymm0, 3 * 32(%rsi);
-
-	gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
-	vpxor 4 * 32(%rdx), %ymm0, %ymm11;
-	vmovdqu %ymm0, 4 * 32(%rsi);
-
-	gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
-	vpxor 5 * 32(%rdx), %ymm0, %ymm10;
-	vmovdqu %ymm0, 5 * 32(%rsi);
-
-	gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
-	vpxor 6 * 32(%rdx), %ymm0, %ymm9;
-	vmovdqu %ymm0, 6 * 32(%rsi);
-
-	gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
-	vpxor 7 * 32(%rdx), %ymm0, %ymm8;
-	vmovdqu %ymm0, 7 * 32(%rsi);
-
-	gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
-	vpxor 8 * 32(%rdx), %ymm0, %ymm7;
-	vmovdqu %ymm0, 8 * 32(%rsi);
-
-	gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
-	vpxor 9 * 32(%rdx), %ymm0, %ymm6;
-	vmovdqu %ymm0, 9 * 32(%rsi);
-
-	gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
-	vpxor 10 * 32(%rdx), %ymm0, %ymm5;
-	vmovdqu %ymm0, 10 * 32(%rsi);
-
-	gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
-	vpxor 11 * 32(%rdx), %ymm0, %ymm4;
-	vmovdqu %ymm0, 11 * 32(%rsi);
-
-	gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
-	vpxor 12 * 32(%rdx), %ymm0, %ymm3;
-	vmovdqu %ymm0, 12 * 32(%rsi);
-
-	gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
-	vpxor 13 * 32(%rdx), %ymm0, %ymm2;
-	vmovdqu %ymm0, 13 * 32(%rsi);
-
-	gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
-	vpxor 14 * 32(%rdx), %ymm0, %ymm1;
-	vmovdqu %ymm0, 14 * 32(%rsi);
-
-	gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
-	vpxor 15 * 32(%rdx), %ymm0, %ymm15;
-	vmovdqu %ymm15, 0 * 32(%rax);
-	vmovdqu %ymm0, 15 * 32(%rsi);
-
-	vextracti128 $1, %ymm0, %xmm0;
-	gf128mul_x_ble(%xmm0, %xmm12, %xmm15);
-	vmovdqu %xmm0, (%rcx);
-
-	/* inpack32_pre: */
-	vpbroadcastq (key_table)(CTX, %r8, 8), %ymm15;
-	vpshufb .Lpack_bswap, %ymm15, %ymm15;
-	vpxor 0 * 32(%rax), %ymm15, %ymm0;
-	vpxor %ymm1, %ymm15, %ymm1;
-	vpxor %ymm2, %ymm15, %ymm2;
-	vpxor %ymm3, %ymm15, %ymm3;
-	vpxor %ymm4, %ymm15, %ymm4;
-	vpxor %ymm5, %ymm15, %ymm5;
-	vpxor %ymm6, %ymm15, %ymm6;
-	vpxor %ymm7, %ymm15, %ymm7;
-	vpxor %ymm8, %ymm15, %ymm8;
-	vpxor %ymm9, %ymm15, %ymm9;
-	vpxor %ymm10, %ymm15, %ymm10;
-	vpxor %ymm11, %ymm15, %ymm11;
-	vpxor 12 * 32(%rax), %ymm15, %ymm12;
-	vpxor 13 * 32(%rax), %ymm15, %ymm13;
-	vpxor 14 * 32(%rax), %ymm15, %ymm14;
-	vpxor 15 * 32(%rax), %ymm15, %ymm15;
-
-	CALL_NOSPEC %r9;
-
-	addq $(16 * 32), %rsp;
-
-	vpxor 0 * 32(%rsi), %ymm7, %ymm7;
-	vpxor 1 * 32(%rsi), %ymm6, %ymm6;
-	vpxor 2 * 32(%rsi), %ymm5, %ymm5;
-	vpxor 3 * 32(%rsi), %ymm4, %ymm4;
-	vpxor 4 * 32(%rsi), %ymm3, %ymm3;
-	vpxor 5 * 32(%rsi), %ymm2, %ymm2;
-	vpxor 6 * 32(%rsi), %ymm1, %ymm1;
-	vpxor 7 * 32(%rsi), %ymm0, %ymm0;
-	vpxor 8 * 32(%rsi), %ymm15, %ymm15;
-	vpxor 9 * 32(%rsi), %ymm14, %ymm14;
-	vpxor 10 * 32(%rsi), %ymm13, %ymm13;
-	vpxor 11 * 32(%rsi), %ymm12, %ymm12;
-	vpxor 12 * 32(%rsi), %ymm11, %ymm11;
-	vpxor 13 * 32(%rsi), %ymm10, %ymm10;
-	vpxor 14 * 32(%rsi), %ymm9, %ymm9;
-	vpxor 15 * 32(%rsi), %ymm8, %ymm8;
-	write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
-		     %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
-		     %ymm8, %rsi);
-
-	vzeroupper;
-
-	FRAME_END
-	ret;
-ENDPROC(camellia_xts_crypt_32way)
-
-ENTRY(camellia_xts_enc_32way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst (32 blocks)
-	 *	%rdx: src (32 blocks)
-	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
-	 */
-
-	xorl %r8d, %r8d; /* input whitening key, 0 for enc */
-
-	leaq __camellia_enc_blk32, %r9;
-
-	jmp camellia_xts_crypt_32way;
-ENDPROC(camellia_xts_enc_32way)
-
-ENTRY(camellia_xts_dec_32way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst (32 blocks)
-	 *	%rdx: src (32 blocks)
-	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
-	 */
-
-	cmpl $16, key_length(CTX);
-	movl $32, %r8d;
-	movl $24, %eax;
-	cmovel %eax, %r8d;  /* input whitening key, last for dec */
-
-	leaq __camellia_dec_blk32, %r9;
-
-	jmp camellia_xts_crypt_32way;
-ENDPROC(camellia_xts_dec_32way)
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
deleted file mode 100644
index 23528bc18fc6c59b42a42b35d089e416468cf959..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ /dev/null
@@ -1,499 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Camellia Cipher Algorithm (x86_64)
- *
- * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- */
-
-#include <linux/linkage.h>
-
-.file "camellia-x86_64-asm_64.S"
-.text
-
-.extern camellia_sp10011110;
-.extern camellia_sp22000222;
-.extern camellia_sp03303033;
-.extern camellia_sp00444404;
-.extern camellia_sp02220222;
-.extern camellia_sp30333033;
-.extern camellia_sp44044404;
-.extern camellia_sp11101110;
-
-#define sp10011110 camellia_sp10011110
-#define sp22000222 camellia_sp22000222
-#define sp03303033 camellia_sp03303033
-#define sp00444404 camellia_sp00444404
-#define sp02220222 camellia_sp02220222
-#define sp30333033 camellia_sp30333033
-#define sp44044404 camellia_sp44044404
-#define sp11101110 camellia_sp11101110
-
-#define CAMELLIA_TABLE_BYTE_LEN 272
-
-/* struct camellia_ctx: */
-#define key_table 0
-#define key_length CAMELLIA_TABLE_BYTE_LEN
-
-/* register macros */
-#define CTX %rdi
-#define RIO %rsi
-#define RIOd %esi
-
-#define RAB0 %rax
-#define RCD0 %rcx
-#define RAB1 %rbx
-#define RCD1 %rdx
-
-#define RAB0d %eax
-#define RCD0d %ecx
-#define RAB1d %ebx
-#define RCD1d %edx
-
-#define RAB0bl %al
-#define RCD0bl %cl
-#define RAB1bl %bl
-#define RCD1bl %dl
-
-#define RAB0bh %ah
-#define RCD0bh %ch
-#define RAB1bh %bh
-#define RCD1bh %dh
-
-#define RT0 %rsi
-#define RT1 %r12
-#define RT2 %r8
-
-#define RT0d %esi
-#define RT1d %r12d
-#define RT2d %r8d
-
-#define RT2bl %r8b
-
-#define RXOR %r9
-#define RR12 %r10
-#define RDST %r11
-
-#define RXORd %r9d
-#define RXORbl %r9b
-
-#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
-	movzbl ab ## bl,		tmp2 ## d; \
-	movzbl ab ## bh,		tmp1 ## d; \
-	rorq $16,			ab; \
-	xorq T0(, tmp2, 8),		dst; \
-	xorq T1(, tmp1, 8),		dst;
-
-/**********************************************************************
-  1-way camellia
- **********************************************************************/
-#define roundsm(ab, subkey, cd) \
-	movq (key_table + ((subkey) * 2) * 4)(CTX),	RT2; \
-	\
-	xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
-	xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
-	xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
-	xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
-	\
-	xorq RT2,					cd ## 0;
-
-#define fls(l, r, kl, kr) \
-	movl (key_table + ((kl) * 2) * 4)(CTX),		RT0d; \
-	andl l ## 0d,					RT0d; \
-	roll $1,					RT0d; \
-	shlq $32,					RT0; \
-	xorq RT0,					l ## 0; \
-	movq (key_table + ((kr) * 2) * 4)(CTX),		RT1; \
-	orq r ## 0,					RT1; \
-	shrq $32,					RT1; \
-	xorq RT1,					r ## 0; \
-	\
-	movq (key_table + ((kl) * 2) * 4)(CTX),		RT2; \
-	orq l ## 0,					RT2; \
-	shrq $32,					RT2; \
-	xorq RT2,					l ## 0; \
-	movl (key_table + ((kr) * 2) * 4)(CTX),		RT0d; \
-	andl r ## 0d,					RT0d; \
-	roll $1,					RT0d; \
-	shlq $32,					RT0; \
-	xorq RT0,					r ## 0;
-
-#define enc_rounds(i) \
-	roundsm(RAB, i + 2, RCD); \
-	roundsm(RCD, i + 3, RAB); \
-	roundsm(RAB, i + 4, RCD); \
-	roundsm(RCD, i + 5, RAB); \
-	roundsm(RAB, i + 6, RCD); \
-	roundsm(RCD, i + 7, RAB);
-
-#define enc_fls(i) \
-	fls(RAB, RCD, i + 0, i + 1);
-
-#define enc_inpack() \
-	movq (RIO),			RAB0; \
-	bswapq				RAB0; \
-	rolq $32,			RAB0; \
-	movq 4*2(RIO),			RCD0; \
-	bswapq				RCD0; \
-	rorq $32,			RCD0; \
-	xorq key_table(CTX),		RAB0;
-
-#define enc_outunpack(op, max) \
-	xorq key_table(CTX, max, 8),	RCD0; \
-	rorq $32,			RCD0; \
-	bswapq				RCD0; \
-	op ## q RCD0,			(RIO); \
-	rolq $32,			RAB0; \
-	bswapq				RAB0; \
-	op ## q RAB0,			4*2(RIO);
-
-#define dec_rounds(i) \
-	roundsm(RAB, i + 7, RCD); \
-	roundsm(RCD, i + 6, RAB); \
-	roundsm(RAB, i + 5, RCD); \
-	roundsm(RCD, i + 4, RAB); \
-	roundsm(RAB, i + 3, RCD); \
-	roundsm(RCD, i + 2, RAB);
-
-#define dec_fls(i) \
-	fls(RAB, RCD, i + 1, i + 0);
-
-#define dec_inpack(max) \
-	movq (RIO),			RAB0; \
-	bswapq				RAB0; \
-	rolq $32,			RAB0; \
-	movq 4*2(RIO),			RCD0; \
-	bswapq				RCD0; \
-	rorq $32,			RCD0; \
-	xorq key_table(CTX, max, 8),	RAB0;
-
-#define dec_outunpack() \
-	xorq key_table(CTX),		RCD0; \
-	rorq $32,			RCD0; \
-	bswapq				RCD0; \
-	movq RCD0,			(RIO); \
-	rolq $32,			RAB0; \
-	bswapq				RAB0; \
-	movq RAB0,			4*2(RIO);
-
-ENTRY(__camellia_enc_blk)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: bool xor
-	 */
-	movq %r12, RR12;
-
-	movq %rcx, RXOR;
-	movq %rsi, RDST;
-	movq %rdx, RIO;
-
-	enc_inpack();
-
-	enc_rounds(0);
-	enc_fls(8);
-	enc_rounds(8);
-	enc_fls(16);
-	enc_rounds(16);
-	movl $24, RT1d; /* max */
-
-	cmpb $16, key_length(CTX);
-	je .L__enc_done;
-
-	enc_fls(24);
-	enc_rounds(24);
-	movl $32, RT1d; /* max */
-
-.L__enc_done:
-	testb RXORbl, RXORbl;
-	movq RDST, RIO;
-
-	jnz .L__enc_xor;
-
-	enc_outunpack(mov, RT1);
-
-	movq RR12, %r12;
-	ret;
-
-.L__enc_xor:
-	enc_outunpack(xor, RT1);
-
-	movq RR12, %r12;
-	ret;
-ENDPROC(__camellia_enc_blk)
-
-ENTRY(camellia_dec_blk)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	cmpl $16, key_length(CTX);
-	movl $32, RT2d;
-	movl $24, RXORd;
-	cmovel RXORd, RT2d; /* max */
-
-	movq %r12, RR12;
-	movq %rsi, RDST;
-	movq %rdx, RIO;
-
-	dec_inpack(RT2);
-
-	cmpb $24, RT2bl;
-	je .L__dec_rounds16;
-
-	dec_rounds(24);
-	dec_fls(24);
-
-.L__dec_rounds16:
-	dec_rounds(16);
-	dec_fls(16);
-	dec_rounds(8);
-	dec_fls(8);
-	dec_rounds(0);
-
-	movq RDST, RIO;
-
-	dec_outunpack();
-
-	movq RR12, %r12;
-	ret;
-ENDPROC(camellia_dec_blk)
-
-/**********************************************************************
-  2-way camellia
- **********************************************************************/
-#define roundsm2(ab, subkey, cd) \
-	movq (key_table + ((subkey) * 2) * 4)(CTX),	RT2; \
-	xorq RT2,					cd ## 1; \
-	\
-	xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
-	xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
-	xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
-	xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
-	\
-		xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \
-		xorq RT2,					cd ## 0; \
-		xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \
-		xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \
-		xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1);
-
-#define fls2(l, r, kl, kr) \
-	movl (key_table + ((kl) * 2) * 4)(CTX),		RT0d; \
-	andl l ## 0d,					RT0d; \
-	roll $1,					RT0d; \
-	shlq $32,					RT0; \
-	xorq RT0,					l ## 0; \
-	movq (key_table + ((kr) * 2) * 4)(CTX),		RT1; \
-	orq r ## 0,					RT1; \
-	shrq $32,					RT1; \
-	xorq RT1,					r ## 0; \
-	\
-		movl (key_table + ((kl) * 2) * 4)(CTX),		RT2d; \
-		andl l ## 1d,					RT2d; \
-		roll $1,					RT2d; \
-		shlq $32,					RT2; \
-		xorq RT2,					l ## 1; \
-		movq (key_table + ((kr) * 2) * 4)(CTX),		RT0; \
-		orq r ## 1,					RT0; \
-		shrq $32,					RT0; \
-		xorq RT0,					r ## 1; \
-	\
-	movq (key_table + ((kl) * 2) * 4)(CTX),		RT1; \
-	orq l ## 0,					RT1; \
-	shrq $32,					RT1; \
-	xorq RT1,					l ## 0; \
-	movl (key_table + ((kr) * 2) * 4)(CTX),		RT2d; \
-	andl r ## 0d,					RT2d; \
-	roll $1,					RT2d; \
-	shlq $32,					RT2; \
-	xorq RT2,					r ## 0; \
-	\
-		movq (key_table + ((kl) * 2) * 4)(CTX),		RT0; \
-		orq l ## 1,					RT0; \
-		shrq $32,					RT0; \
-		xorq RT0,					l ## 1; \
-		movl (key_table + ((kr) * 2) * 4)(CTX),		RT1d; \
-		andl r ## 1d,					RT1d; \
-		roll $1,					RT1d; \
-		shlq $32,					RT1; \
-		xorq RT1,					r ## 1;
-
-#define enc_rounds2(i) \
-	roundsm2(RAB, i + 2, RCD); \
-	roundsm2(RCD, i + 3, RAB); \
-	roundsm2(RAB, i + 4, RCD); \
-	roundsm2(RCD, i + 5, RAB); \
-	roundsm2(RAB, i + 6, RCD); \
-	roundsm2(RCD, i + 7, RAB);
-
-#define enc_fls2(i) \
-	fls2(RAB, RCD, i + 0, i + 1);
-
-#define enc_inpack2() \
-	movq (RIO),			RAB0; \
-	bswapq				RAB0; \
-	rorq $32,			RAB0; \
-	movq 4*2(RIO),			RCD0; \
-	bswapq				RCD0; \
-	rolq $32,			RCD0; \
-	xorq key_table(CTX),		RAB0; \
-	\
-		movq 8*2(RIO),			RAB1; \
-		bswapq				RAB1; \
-		rorq $32,			RAB1; \
-		movq 12*2(RIO),			RCD1; \
-		bswapq				RCD1; \
-		rolq $32,			RCD1; \
-		xorq key_table(CTX),		RAB1;
-
-#define enc_outunpack2(op, max) \
-	xorq key_table(CTX, max, 8),	RCD0; \
-	rolq $32,			RCD0; \
-	bswapq				RCD0; \
-	op ## q RCD0,			(RIO); \
-	rorq $32,			RAB0; \
-	bswapq				RAB0; \
-	op ## q RAB0,			4*2(RIO); \
-	\
-		xorq key_table(CTX, max, 8),	RCD1; \
-		rolq $32,			RCD1; \
-		bswapq				RCD1; \
-		op ## q RCD1,			8*2(RIO); \
-		rorq $32,			RAB1; \
-		bswapq				RAB1; \
-		op ## q RAB1,			12*2(RIO);
-
-#define dec_rounds2(i) \
-	roundsm2(RAB, i + 7, RCD); \
-	roundsm2(RCD, i + 6, RAB); \
-	roundsm2(RAB, i + 5, RCD); \
-	roundsm2(RCD, i + 4, RAB); \
-	roundsm2(RAB, i + 3, RCD); \
-	roundsm2(RCD, i + 2, RAB);
-
-#define dec_fls2(i) \
-	fls2(RAB, RCD, i + 1, i + 0);
-
-#define dec_inpack2(max) \
-	movq (RIO),			RAB0; \
-	bswapq				RAB0; \
-	rorq $32,			RAB0; \
-	movq 4*2(RIO),			RCD0; \
-	bswapq				RCD0; \
-	rolq $32,			RCD0; \
-	xorq key_table(CTX, max, 8),	RAB0; \
-	\
-		movq 8*2(RIO),			RAB1; \
-		bswapq				RAB1; \
-		rorq $32,			RAB1; \
-		movq 12*2(RIO),			RCD1; \
-		bswapq				RCD1; \
-		rolq $32,			RCD1; \
-		xorq key_table(CTX, max, 8),	RAB1;
-
-#define dec_outunpack2() \
-	xorq key_table(CTX),		RCD0; \
-	rolq $32,			RCD0; \
-	bswapq				RCD0; \
-	movq RCD0,			(RIO); \
-	rorq $32,			RAB0; \
-	bswapq				RAB0; \
-	movq RAB0,			4*2(RIO); \
-	\
-		xorq key_table(CTX),		RCD1; \
-		rolq $32,			RCD1; \
-		bswapq				RCD1; \
-		movq RCD1,			8*2(RIO); \
-		rorq $32,			RAB1; \
-		bswapq				RAB1; \
-		movq RAB1,			12*2(RIO);
-
-ENTRY(__camellia_enc_blk_2way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: bool xor
-	 */
-	pushq %rbx;
-
-	movq %r12, RR12;
-	movq %rcx, RXOR;
-	movq %rsi, RDST;
-	movq %rdx, RIO;
-
-	enc_inpack2();
-
-	enc_rounds2(0);
-	enc_fls2(8);
-	enc_rounds2(8);
-	enc_fls2(16);
-	enc_rounds2(16);
-	movl $24, RT2d; /* max */
-
-	cmpb $16, key_length(CTX);
-	je .L__enc2_done;
-
-	enc_fls2(24);
-	enc_rounds2(24);
-	movl $32, RT2d; /* max */
-
-.L__enc2_done:
-	test RXORbl, RXORbl;
-	movq RDST, RIO;
-	jnz .L__enc2_xor;
-
-	enc_outunpack2(mov, RT2);
-
-	movq RR12, %r12;
-	popq %rbx;
-	ret;
-
-.L__enc2_xor:
-	enc_outunpack2(xor, RT2);
-
-	movq RR12, %r12;
-	popq %rbx;
-	ret;
-ENDPROC(__camellia_enc_blk_2way)
-
-ENTRY(camellia_dec_blk_2way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	cmpl $16, key_length(CTX);
-	movl $32, RT2d;
-	movl $24, RXORd;
-	cmovel RXORd, RT2d; /* max */
-
-	movq %rbx, RXOR;
-	movq %r12, RR12;
-	movq %rsi, RDST;
-	movq %rdx, RIO;
-
-	dec_inpack2(RT2);
-
-	cmpb $24, RT2bl;
-	je .L__dec2_rounds16;
-
-	dec_rounds2(24);
-	dec_fls2(24);
-
-.L__dec2_rounds16:
-	dec_rounds2(16);
-	dec_fls2(16);
-	dec_rounds2(8);
-	dec_fls2(8);
-	dec_rounds2(0);
-
-	movq RDST, RIO;
-
-	dec_outunpack2();
-
-	movq RR12, %r12;
-	movq RXOR, %rbx;
-	ret;
-ENDPROC(camellia_dec_blk_2way)
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
deleted file mode 100644
index dc55c3332fcc4fe5a100ce110977485e31df5997..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ /dev/null
@@ -1,563 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Cast5 Cipher 16-way parallel algorithm (AVX/x86_64)
- *
- * Copyright (C) 2012 Johannes Goetzfried
- *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
- *
- * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-.file "cast5-avx-x86_64-asm_64.S"
-
-.extern cast_s1
-.extern cast_s2
-.extern cast_s3
-.extern cast_s4
-
-/* structure of crypto context */
-#define km	0
-#define kr	(16*4)
-#define rr	((16*4)+16)
-
-/* s-boxes */
-#define s1	cast_s1
-#define s2	cast_s2
-#define s3	cast_s3
-#define s4	cast_s4
-
-/**********************************************************************
-  16-way AVX cast5
- **********************************************************************/
-#define CTX %r15
-
-#define RL1 %xmm0
-#define RR1 %xmm1
-#define RL2 %xmm2
-#define RR2 %xmm3
-#define RL3 %xmm4
-#define RR3 %xmm5
-#define RL4 %xmm6
-#define RR4 %xmm7
-
-#define RX %xmm8
-
-#define RKM  %xmm9
-#define RKR  %xmm10
-#define RKRF %xmm11
-#define RKRR %xmm12
-
-#define R32  %xmm13
-#define R1ST %xmm14
-
-#define RTMP %xmm15
-
-#define RID1  %rdi
-#define RID1d %edi
-#define RID2  %rsi
-#define RID2d %esi
-
-#define RGI1   %rdx
-#define RGI1bl %dl
-#define RGI1bh %dh
-#define RGI2   %rcx
-#define RGI2bl %cl
-#define RGI2bh %ch
-
-#define RGI3   %rax
-#define RGI3bl %al
-#define RGI3bh %ah
-#define RGI4   %rbx
-#define RGI4bl %bl
-#define RGI4bh %bh
-
-#define RFS1  %r8
-#define RFS1d %r8d
-#define RFS2  %r9
-#define RFS2d %r9d
-#define RFS3  %r10
-#define RFS3d %r10d
-
-
-#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
-	movzbl		src ## bh,     RID1d;    \
-	movzbl		src ## bl,     RID2d;    \
-	shrq $16,	src;                     \
-	movl		s1(, RID1, 4), dst ## d; \
-	op1		s2(, RID2, 4), dst ## d; \
-	movzbl		src ## bh,     RID1d;    \
-	movzbl		src ## bl,     RID2d;    \
-	interleave_op(il_reg);			 \
-	op2		s3(, RID1, 4), dst ## d; \
-	op3		s4(, RID2, 4), dst ## d;
-
-#define dummy(d) /* do nothing */
-
-#define shr_next(reg) \
-	shrq $16,	reg;
-
-#define F_head(a, x, gi1, gi2, op0) \
-	op0	a,	RKM,  x;                 \
-	vpslld	RKRF,	x,    RTMP;              \
-	vpsrld	RKRR,	x,    x;                 \
-	vpor	RTMP,	x,    x;                 \
-	\
-	vmovq		x,    gi1;               \
-	vpextrq $1,	x,    gi2;
-
-#define F_tail(a, x, gi1, gi2, op1, op2, op3) \
-	lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \
-	lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \
-	\
-	lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none);     \
-	shlq $32,	RFS2;                                      \
-	orq		RFS1, RFS2;                                \
-	lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none);     \
-	shlq $32,	RFS1;                                      \
-	orq		RFS1, RFS3;                                \
-	\
-	vmovq		RFS2, x;                                   \
-	vpinsrq $1,	RFS3, x, x;
-
-#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \
-	F_head(b1, RX, RGI1, RGI2, op0);              \
-	F_head(b2, RX, RGI3, RGI4, op0);              \
-	\
-	F_tail(b1, RX, RGI1, RGI2, op1, op2, op3);    \
-	F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3);  \
-	\
-	vpxor		a1, RX,   a1;                 \
-	vpxor		a2, RTMP, a2;
-
-#define F1_2(a1, b1, a2, b2) \
-	F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl)
-#define F2_2(a1, b1, a2, b2) \
-	F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl)
-#define F3_2(a1, b1, a2, b2) \
-	F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl)
-
-#define subround(a1, b1, a2, b2, f) \
-	F ## f ## _2(a1, b1, a2, b2);
-
-#define round(l, r, n, f) \
-	vbroadcastss 	(km+(4*n))(CTX), RKM;        \
-	vpand		R1ST,            RKR,  RKRF; \
-	vpsubq		RKRF,            R32,  RKRR; \
-	vpsrldq $1,	RKR,             RKR;        \
-	subround(l ## 1, r ## 1, l ## 2, r ## 2, f); \
-	subround(l ## 3, r ## 3, l ## 4, r ## 4, f);
-
-#define enc_preload_rkr() \
-	vbroadcastss	.L16_mask,                RKR;      \
-	/* add 16-bit rotation to key rotations (mod 32) */ \
-	vpxor		kr(CTX),                  RKR, RKR;
-
-#define dec_preload_rkr() \
-	vbroadcastss	.L16_mask,                RKR;      \
-	/* add 16-bit rotation to key rotations (mod 32) */ \
-	vpxor		kr(CTX),                  RKR, RKR; \
-	vpshufb		.Lbswap128_mask,          RKR, RKR;
-
-#define transpose_2x4(x0, x1, t0, t1) \
-	vpunpckldq		x1, x0, t0; \
-	vpunpckhdq		x1, x0, t1; \
-	\
-	vpunpcklqdq		t1, t0, x0; \
-	vpunpckhqdq		t1, t0, x1;
-
-#define inpack_blocks(x0, x1, t0, t1, rmask) \
-	vpshufb rmask, 	x0,	x0; \
-	vpshufb rmask, 	x1,	x1; \
-	\
-	transpose_2x4(x0, x1, t0, t1)
-
-#define outunpack_blocks(x0, x1, t0, t1, rmask) \
-	transpose_2x4(x0, x1, t0, t1) \
-	\
-	vpshufb rmask,	x0, x0;           \
-	vpshufb rmask,	x1, x1;
-
-.section	.rodata.cst16.bswap_mask, "aM", @progbits, 16
-.align 16
-.Lbswap_mask:
-	.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
-.section	.rodata.cst16.bswap128_mask, "aM", @progbits, 16
-.align 16
-.Lbswap128_mask:
-	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-.section	.rodata.cst16.bswap_iv_mask, "aM", @progbits, 16
-.align 16
-.Lbswap_iv_mask:
-	.byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
-
-.section	.rodata.cst4.16_mask, "aM", @progbits, 4
-.align 4
-.L16_mask:
-	.byte 16, 16, 16, 16
-.section	.rodata.cst4.32_mask, "aM", @progbits, 4
-.align 4
-.L32_mask:
-	.byte 32, 0, 0, 0
-.section	.rodata.cst4.first_mask, "aM", @progbits, 4
-.align 4
-.Lfirst_mask:
-	.byte 0x1f, 0, 0, 0
-
-.text
-
-.align 16
-__cast5_enc_blk16:
-	/* input:
-	 *	%rdi: ctx
-	 *	RL1: blocks 1 and 2
-	 *	RR1: blocks 3 and 4
-	 *	RL2: blocks 5 and 6
-	 *	RR2: blocks 7 and 8
-	 *	RL3: blocks 9 and 10
-	 *	RR3: blocks 11 and 12
-	 *	RL4: blocks 13 and 14
-	 *	RR4: blocks 15 and 16
-	 * output:
-	 *	RL1: encrypted blocks 1 and 2
-	 *	RR1: encrypted blocks 3 and 4
-	 *	RL2: encrypted blocks 5 and 6
-	 *	RR2: encrypted blocks 7 and 8
-	 *	RL3: encrypted blocks 9 and 10
-	 *	RR3: encrypted blocks 11 and 12
-	 *	RL4: encrypted blocks 13 and 14
-	 *	RR4: encrypted blocks 15 and 16
-	 */
-
-	pushq %r15;
-	pushq %rbx;
-
-	movq %rdi, CTX;
-
-	vmovdqa .Lbswap_mask, RKM;
-	vmovd .Lfirst_mask, R1ST;
-	vmovd .L32_mask, R32;
-	enc_preload_rkr();
-
-	inpack_blocks(RL1, RR1, RTMP, RX, RKM);
-	inpack_blocks(RL2, RR2, RTMP, RX, RKM);
-	inpack_blocks(RL3, RR3, RTMP, RX, RKM);
-	inpack_blocks(RL4, RR4, RTMP, RX, RKM);
-
-	round(RL, RR, 0, 1);
-	round(RR, RL, 1, 2);
-	round(RL, RR, 2, 3);
-	round(RR, RL, 3, 1);
-	round(RL, RR, 4, 2);
-	round(RR, RL, 5, 3);
-	round(RL, RR, 6, 1);
-	round(RR, RL, 7, 2);
-	round(RL, RR, 8, 3);
-	round(RR, RL, 9, 1);
-	round(RL, RR, 10, 2);
-	round(RR, RL, 11, 3);
-
-	movzbl rr(CTX), %eax;
-	testl %eax, %eax;
-	jnz .L__skip_enc;
-
-	round(RL, RR, 12, 1);
-	round(RR, RL, 13, 2);
-	round(RL, RR, 14, 3);
-	round(RR, RL, 15, 1);
-
-.L__skip_enc:
-	popq %rbx;
-	popq %r15;
-
-	vmovdqa .Lbswap_mask, RKM;
-
-	outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
-	outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
-	outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
-	outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
-
-	ret;
-ENDPROC(__cast5_enc_blk16)
-
-.align 16
-__cast5_dec_blk16:
-	/* input:
-	 *	%rdi: ctx
-	 *	RL1: encrypted blocks 1 and 2
-	 *	RR1: encrypted blocks 3 and 4
-	 *	RL2: encrypted blocks 5 and 6
-	 *	RR2: encrypted blocks 7 and 8
-	 *	RL3: encrypted blocks 9 and 10
-	 *	RR3: encrypted blocks 11 and 12
-	 *	RL4: encrypted blocks 13 and 14
-	 *	RR4: encrypted blocks 15 and 16
-	 * output:
-	 *	RL1: decrypted blocks 1 and 2
-	 *	RR1: decrypted blocks 3 and 4
-	 *	RL2: decrypted blocks 5 and 6
-	 *	RR2: decrypted blocks 7 and 8
-	 *	RL3: decrypted blocks 9 and 10
-	 *	RR3: decrypted blocks 11 and 12
-	 *	RL4: decrypted blocks 13 and 14
-	 *	RR4: decrypted blocks 15 and 16
-	 */
-
-	pushq %r15;
-	pushq %rbx;
-
-	movq %rdi, CTX;
-
-	vmovdqa .Lbswap_mask, RKM;
-	vmovd .Lfirst_mask, R1ST;
-	vmovd .L32_mask, R32;
-	dec_preload_rkr();
-
-	inpack_blocks(RL1, RR1, RTMP, RX, RKM);
-	inpack_blocks(RL2, RR2, RTMP, RX, RKM);
-	inpack_blocks(RL3, RR3, RTMP, RX, RKM);
-	inpack_blocks(RL4, RR4, RTMP, RX, RKM);
-
-	movzbl rr(CTX), %eax;
-	testl %eax, %eax;
-	jnz .L__skip_dec;
-
-	round(RL, RR, 15, 1);
-	round(RR, RL, 14, 3);
-	round(RL, RR, 13, 2);
-	round(RR, RL, 12, 1);
-
-.L__dec_tail:
-	round(RL, RR, 11, 3);
-	round(RR, RL, 10, 2);
-	round(RL, RR, 9, 1);
-	round(RR, RL, 8, 3);
-	round(RL, RR, 7, 2);
-	round(RR, RL, 6, 1);
-	round(RL, RR, 5, 3);
-	round(RR, RL, 4, 2);
-	round(RL, RR, 3, 1);
-	round(RR, RL, 2, 3);
-	round(RL, RR, 1, 2);
-	round(RR, RL, 0, 1);
-
-	vmovdqa .Lbswap_mask, RKM;
-	popq %rbx;
-	popq %r15;
-
-	outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
-	outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
-	outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
-	outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
-
-	ret;
-
-.L__skip_dec:
-	vpsrldq $4, RKR, RKR;
-	jmp .L__dec_tail;
-ENDPROC(__cast5_dec_blk16)
-
-ENTRY(cast5_ecb_enc_16way)
-	/* input:
-	 *	%rdi: ctx
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	FRAME_BEGIN
-	pushq %r15;
-
-	movq %rdi, CTX;
-	movq %rsi, %r11;
-
-	vmovdqu (0*4*4)(%rdx), RL1;
-	vmovdqu (1*4*4)(%rdx), RR1;
-	vmovdqu (2*4*4)(%rdx), RL2;
-	vmovdqu (3*4*4)(%rdx), RR2;
-	vmovdqu (4*4*4)(%rdx), RL3;
-	vmovdqu (5*4*4)(%rdx), RR3;
-	vmovdqu (6*4*4)(%rdx), RL4;
-	vmovdqu (7*4*4)(%rdx), RR4;
-
-	call __cast5_enc_blk16;
-
-	vmovdqu RR1, (0*4*4)(%r11);
-	vmovdqu RL1, (1*4*4)(%r11);
-	vmovdqu RR2, (2*4*4)(%r11);
-	vmovdqu RL2, (3*4*4)(%r11);
-	vmovdqu RR3, (4*4*4)(%r11);
-	vmovdqu RL3, (5*4*4)(%r11);
-	vmovdqu RR4, (6*4*4)(%r11);
-	vmovdqu RL4, (7*4*4)(%r11);
-
-	popq %r15;
-	FRAME_END
-	ret;
-ENDPROC(cast5_ecb_enc_16way)
-
-ENTRY(cast5_ecb_dec_16way)
-	/* input:
-	 *	%rdi: ctx
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-
-	FRAME_BEGIN
-	pushq %r15;
-
-	movq %rdi, CTX;
-	movq %rsi, %r11;
-
-	vmovdqu (0*4*4)(%rdx), RL1;
-	vmovdqu (1*4*4)(%rdx), RR1;
-	vmovdqu (2*4*4)(%rdx), RL2;
-	vmovdqu (3*4*4)(%rdx), RR2;
-	vmovdqu (4*4*4)(%rdx), RL3;
-	vmovdqu (5*4*4)(%rdx), RR3;
-	vmovdqu (6*4*4)(%rdx), RL4;
-	vmovdqu (7*4*4)(%rdx), RR4;
-
-	call __cast5_dec_blk16;
-
-	vmovdqu RR1, (0*4*4)(%r11);
-	vmovdqu RL1, (1*4*4)(%r11);
-	vmovdqu RR2, (2*4*4)(%r11);
-	vmovdqu RL2, (3*4*4)(%r11);
-	vmovdqu RR3, (4*4*4)(%r11);
-	vmovdqu RL3, (5*4*4)(%r11);
-	vmovdqu RR4, (6*4*4)(%r11);
-	vmovdqu RL4, (7*4*4)(%r11);
-
-	popq %r15;
-	FRAME_END
-	ret;
-ENDPROC(cast5_ecb_dec_16way)
-
-ENTRY(cast5_cbc_dec_16way)
-	/* input:
-	 *	%rdi: ctx
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	FRAME_BEGIN
-	pushq %r12;
-	pushq %r15;
-
-	movq %rdi, CTX;
-	movq %rsi, %r11;
-	movq %rdx, %r12;
-
-	vmovdqu (0*16)(%rdx), RL1;
-	vmovdqu (1*16)(%rdx), RR1;
-	vmovdqu (2*16)(%rdx), RL2;
-	vmovdqu (3*16)(%rdx), RR2;
-	vmovdqu (4*16)(%rdx), RL3;
-	vmovdqu (5*16)(%rdx), RR3;
-	vmovdqu (6*16)(%rdx), RL4;
-	vmovdqu (7*16)(%rdx), RR4;
-
-	call __cast5_dec_blk16;
-
-	/* xor with src */
-	vmovq (%r12), RX;
-	vpshufd $0x4f, RX, RX;
-	vpxor RX, RR1, RR1;
-	vpxor 0*16+8(%r12), RL1, RL1;
-	vpxor 1*16+8(%r12), RR2, RR2;
-	vpxor 2*16+8(%r12), RL2, RL2;
-	vpxor 3*16+8(%r12), RR3, RR3;
-	vpxor 4*16+8(%r12), RL3, RL3;
-	vpxor 5*16+8(%r12), RR4, RR4;
-	vpxor 6*16+8(%r12), RL4, RL4;
-
-	vmovdqu RR1, (0*16)(%r11);
-	vmovdqu RL1, (1*16)(%r11);
-	vmovdqu RR2, (2*16)(%r11);
-	vmovdqu RL2, (3*16)(%r11);
-	vmovdqu RR3, (4*16)(%r11);
-	vmovdqu RL3, (5*16)(%r11);
-	vmovdqu RR4, (6*16)(%r11);
-	vmovdqu RL4, (7*16)(%r11);
-
-	popq %r15;
-	popq %r12;
-	FRAME_END
-	ret;
-ENDPROC(cast5_cbc_dec_16way)
-
-ENTRY(cast5_ctr_16way)
-	/* input:
-	 *	%rdi: ctx
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: iv (big endian, 64bit)
-	 */
-	FRAME_BEGIN
-	pushq %r12;
-	pushq %r15;
-
-	movq %rdi, CTX;
-	movq %rsi, %r11;
-	movq %rdx, %r12;
-
-	vpcmpeqd RTMP, RTMP, RTMP;
-	vpsrldq $8, RTMP, RTMP; /* low: -1, high: 0 */
-
-	vpcmpeqd RKR, RKR, RKR;
-	vpaddq RKR, RKR, RKR; /* low: -2, high: -2 */
-	vmovdqa .Lbswap_iv_mask, R1ST;
-	vmovdqa .Lbswap128_mask, RKM;
-
-	/* load IV and byteswap */
-	vmovq (%rcx), RX;
-	vpshufb R1ST, RX, RX;
-
-	/* construct IVs */
-	vpsubq RTMP, RX, RX;  /* le: IV1, IV0 */
-	vpshufb RKM, RX, RL1; /* be: IV0, IV1 */
-	vpsubq RKR, RX, RX;
-	vpshufb RKM, RX, RR1; /* be: IV2, IV3 */
-	vpsubq RKR, RX, RX;
-	vpshufb RKM, RX, RL2; /* be: IV4, IV5 */
-	vpsubq RKR, RX, RX;
-	vpshufb RKM, RX, RR2; /* be: IV6, IV7 */
-	vpsubq RKR, RX, RX;
-	vpshufb RKM, RX, RL3; /* be: IV8, IV9 */
-	vpsubq RKR, RX, RX;
-	vpshufb RKM, RX, RR3; /* be: IV10, IV11 */
-	vpsubq RKR, RX, RX;
-	vpshufb RKM, RX, RL4; /* be: IV12, IV13 */
-	vpsubq RKR, RX, RX;
-	vpshufb RKM, RX, RR4; /* be: IV14, IV15 */
-
-	/* store last IV */
-	vpsubq RTMP, RX, RX; /* le: IV16, IV14 */
-	vpshufb R1ST, RX, RX; /* be: IV16, IV16 */
-	vmovq RX, (%rcx);
-
-	call __cast5_enc_blk16;
-
-	/* dst = src ^ iv */
-	vpxor (0*16)(%r12), RR1, RR1;
-	vpxor (1*16)(%r12), RL1, RL1;
-	vpxor (2*16)(%r12), RR2, RR2;
-	vpxor (3*16)(%r12), RL2, RL2;
-	vpxor (4*16)(%r12), RR3, RR3;
-	vpxor (5*16)(%r12), RL3, RL3;
-	vpxor (6*16)(%r12), RR4, RR4;
-	vpxor (7*16)(%r12), RL4, RL4;
-	vmovdqu RR1, (0*16)(%r11);
-	vmovdqu RL1, (1*16)(%r11);
-	vmovdqu RR2, (2*16)(%r11);
-	vmovdqu RL2, (3*16)(%r11);
-	vmovdqu RR3, (4*16)(%r11);
-	vmovdqu RL3, (5*16)(%r11);
-	vmovdqu RR4, (6*16)(%r11);
-	vmovdqu RL4, (7*16)(%r11);
-
-	popq %r15;
-	popq %r12;
-	FRAME_END
-	ret;
-ENDPROC(cast5_ctr_16way)
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
deleted file mode 100644
index 4f0a7cdb94d9d3f9ae89c4a7ae80549893394051..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ /dev/null
@@ -1,496 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Cast6 Cipher 8-way parallel algorithm (AVX/x86_64)
- *
- * Copyright (C) 2012 Johannes Goetzfried
- *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
- *
- * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "glue_helper-asm-avx.S"
-
-.file "cast6-avx-x86_64-asm_64.S"
-
-.extern cast_s1
-.extern cast_s2
-.extern cast_s3
-.extern cast_s4
-
-/* structure of crypto context */
-#define km	0
-#define kr	(12*4*4)
-
-/* s-boxes */
-#define s1	cast_s1
-#define s2	cast_s2
-#define s3	cast_s3
-#define s4	cast_s4
-
-/**********************************************************************
-  8-way AVX cast6
- **********************************************************************/
-#define CTX %r15
-
-#define RA1 %xmm0
-#define RB1 %xmm1
-#define RC1 %xmm2
-#define RD1 %xmm3
-
-#define RA2 %xmm4
-#define RB2 %xmm5
-#define RC2 %xmm6
-#define RD2 %xmm7
-
-#define RX  %xmm8
-
-#define RKM  %xmm9
-#define RKR  %xmm10
-#define RKRF %xmm11
-#define RKRR %xmm12
-#define R32  %xmm13
-#define R1ST %xmm14
-
-#define RTMP %xmm15
-
-#define RID1  %rdi
-#define RID1d %edi
-#define RID2  %rsi
-#define RID2d %esi
-
-#define RGI1   %rdx
-#define RGI1bl %dl
-#define RGI1bh %dh
-#define RGI2   %rcx
-#define RGI2bl %cl
-#define RGI2bh %ch
-
-#define RGI3   %rax
-#define RGI3bl %al
-#define RGI3bh %ah
-#define RGI4   %rbx
-#define RGI4bl %bl
-#define RGI4bh %bh
-
-#define RFS1  %r8
-#define RFS1d %r8d
-#define RFS2  %r9
-#define RFS2d %r9d
-#define RFS3  %r10
-#define RFS3d %r10d
-
-
-#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
-	movzbl		src ## bh,     RID1d;    \
-	movzbl		src ## bl,     RID2d;    \
-	shrq $16,	src;                     \
-	movl		s1(, RID1, 4), dst ## d; \
-	op1		s2(, RID2, 4), dst ## d; \
-	movzbl		src ## bh,     RID1d;    \
-	movzbl		src ## bl,     RID2d;    \
-	interleave_op(il_reg);			 \
-	op2		s3(, RID1, 4), dst ## d; \
-	op3		s4(, RID2, 4), dst ## d;
-
-#define dummy(d) /* do nothing */
-
-#define shr_next(reg) \
-	shrq $16,	reg;
-
-#define F_head(a, x, gi1, gi2, op0) \
-	op0	a,	RKM,  x;                 \
-	vpslld	RKRF,	x,    RTMP;              \
-	vpsrld	RKRR,	x,    x;                 \
-	vpor	RTMP,	x,    x;                 \
-	\
-	vmovq		x,    gi1;               \
-	vpextrq $1,	x,    gi2;
-
-#define F_tail(a, x, gi1, gi2, op1, op2, op3) \
-	lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \
-	lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \
-	\
-	lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none);     \
-	shlq $32,	RFS2;                                      \
-	orq		RFS1, RFS2;                                \
-	lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none);     \
-	shlq $32,	RFS1;                                      \
-	orq		RFS1, RFS3;                                \
-	\
-	vmovq		RFS2, x;                                   \
-	vpinsrq $1,	RFS3, x, x;
-
-#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \
-	F_head(b1, RX, RGI1, RGI2, op0);              \
-	F_head(b2, RX, RGI3, RGI4, op0);              \
-	\
-	F_tail(b1, RX, RGI1, RGI2, op1, op2, op3);    \
-	F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3);  \
-	\
-	vpxor		a1, RX,   a1;                 \
-	vpxor		a2, RTMP, a2;
-
-#define F1_2(a1, b1, a2, b2) \
-	F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl)
-#define F2_2(a1, b1, a2, b2) \
-	F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl)
-#define F3_2(a1, b1, a2, b2) \
-	F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl)
-
-#define qop(in, out, f) \
-	F ## f ## _2(out ## 1, in ## 1, out ## 2, in ## 2);
-
-#define get_round_keys(nn) \
-	vbroadcastss	(km+(4*(nn)))(CTX), RKM;        \
-	vpand		R1ST,               RKR,  RKRF; \
-	vpsubq		RKRF,               R32,  RKRR; \
-	vpsrldq $1,	RKR,                RKR;
-
-#define Q(n) \
-	get_round_keys(4*n+0); \
-	qop(RD, RC, 1);        \
-	\
-	get_round_keys(4*n+1); \
-	qop(RC, RB, 2);        \
-	\
-	get_round_keys(4*n+2); \
-	qop(RB, RA, 3);        \
-	\
-	get_round_keys(4*n+3); \
-	qop(RA, RD, 1);
-
-#define QBAR(n) \
-	get_round_keys(4*n+3); \
-	qop(RA, RD, 1);        \
-	\
-	get_round_keys(4*n+2); \
-	qop(RB, RA, 3);        \
-	\
-	get_round_keys(4*n+1); \
-	qop(RC, RB, 2);        \
-	\
-	get_round_keys(4*n+0); \
-	qop(RD, RC, 1);
-
-#define shuffle(mask) \
-	vpshufb		mask,            RKR, RKR;
-
-#define preload_rkr(n, do_mask, mask) \
-	vbroadcastss	.L16_mask,                RKR;      \
-	/* add 16-bit rotation to key rotations (mod 32) */ \
-	vpxor		(kr+n*16)(CTX),           RKR, RKR; \
-	do_mask(mask);
-
-#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	vpunpckldq		x1, x0, t0; \
-	vpunpckhdq		x1, x0, t2; \
-	vpunpckldq		x3, x2, t1; \
-	vpunpckhdq		x3, x2, x3; \
-	\
-	vpunpcklqdq		t1, t0, x0; \
-	vpunpckhqdq		t1, t0, x1; \
-	vpunpcklqdq		x3, t2, x2; \
-	vpunpckhqdq		x3, t2, x3;
-
-#define inpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \
-	vpshufb rmask, x0,	x0; \
-	vpshufb rmask, x1,	x1; \
-	vpshufb rmask, x2,	x2; \
-	vpshufb rmask, x3,	x3; \
-	\
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
-
-#define outunpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	\
-	vpshufb rmask,		x0, x0;       \
-	vpshufb rmask,		x1, x1;       \
-	vpshufb rmask,		x2, x2;       \
-	vpshufb rmask,		x3, x3;
-
-.section	.rodata.cst16, "aM", @progbits, 16
-.align 16
-.Lxts_gf128mul_and_shl1_mask:
-	.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
-.Lbswap_mask:
-	.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
-.Lbswap128_mask:
-	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-.Lrkr_enc_Q_Q_QBAR_QBAR:
-	.byte 0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8, 15, 14, 13, 12
-.Lrkr_enc_QBAR_QBAR_QBAR_QBAR:
-	.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
-.Lrkr_dec_Q_Q_Q_Q:
-	.byte 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3
-.Lrkr_dec_Q_Q_QBAR_QBAR:
-	.byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0
-.Lrkr_dec_QBAR_QBAR_QBAR_QBAR:
-	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-
-.section	.rodata.cst4.L16_mask, "aM", @progbits, 4
-.align 4
-.L16_mask:
-	.byte 16, 16, 16, 16
-
-.section	.rodata.cst4.L32_mask, "aM", @progbits, 4
-.align 4
-.L32_mask:
-	.byte 32, 0, 0, 0
-
-.section	.rodata.cst4.first_mask, "aM", @progbits, 4
-.align 4
-.Lfirst_mask:
-	.byte 0x1f, 0, 0, 0
-
-.text
-
-.align 8
-__cast6_enc_blk8:
-	/* input:
-	 *	%rdi: ctx
-	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
-	 * output:
-	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
-	 */
-
-	pushq %r15;
-	pushq %rbx;
-
-	movq %rdi, CTX;
-
-	vmovdqa .Lbswap_mask, RKM;
-	vmovd .Lfirst_mask, R1ST;
-	vmovd .L32_mask, R32;
-
-	inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
-	inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
-
-	preload_rkr(0, dummy, none);
-	Q(0);
-	Q(1);
-	Q(2);
-	Q(3);
-	preload_rkr(1, shuffle, .Lrkr_enc_Q_Q_QBAR_QBAR);
-	Q(4);
-	Q(5);
-	QBAR(6);
-	QBAR(7);
-	preload_rkr(2, shuffle, .Lrkr_enc_QBAR_QBAR_QBAR_QBAR);
-	QBAR(8);
-	QBAR(9);
-	QBAR(10);
-	QBAR(11);
-
-	popq %rbx;
-	popq %r15;
-
-	vmovdqa .Lbswap_mask, RKM;
-
-	outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
-	outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
-
-	ret;
-ENDPROC(__cast6_enc_blk8)
-
-.align 8
-__cast6_dec_blk8:
-	/* input:
-	 *	%rdi: ctx
-	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
-	 * output:
-	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
-	 */
-
-	pushq %r15;
-	pushq %rbx;
-
-	movq %rdi, CTX;
-
-	vmovdqa .Lbswap_mask, RKM;
-	vmovd .Lfirst_mask, R1ST;
-	vmovd .L32_mask, R32;
-
-	inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
-	inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
-
-	preload_rkr(2, shuffle, .Lrkr_dec_Q_Q_Q_Q);
-	Q(11);
-	Q(10);
-	Q(9);
-	Q(8);
-	preload_rkr(1, shuffle, .Lrkr_dec_Q_Q_QBAR_QBAR);
-	Q(7);
-	Q(6);
-	QBAR(5);
-	QBAR(4);
-	preload_rkr(0, shuffle, .Lrkr_dec_QBAR_QBAR_QBAR_QBAR);
-	QBAR(3);
-	QBAR(2);
-	QBAR(1);
-	QBAR(0);
-
-	popq %rbx;
-	popq %r15;
-
-	vmovdqa .Lbswap_mask, RKM;
-	outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
-	outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
-
-	ret;
-ENDPROC(__cast6_dec_blk8)
-
-ENTRY(cast6_ecb_enc_8way)
-	/* input:
-	 *	%rdi: ctx
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	FRAME_BEGIN
-	pushq %r15;
-
-	movq %rdi, CTX;
-	movq %rsi, %r11;
-
-	load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	call __cast6_enc_blk8;
-
-	store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	popq %r15;
-	FRAME_END
-	ret;
-ENDPROC(cast6_ecb_enc_8way)
-
-ENTRY(cast6_ecb_dec_8way)
-	/* input:
-	 *	%rdi: ctx
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	FRAME_BEGIN
-	pushq %r15;
-
-	movq %rdi, CTX;
-	movq %rsi, %r11;
-
-	load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	call __cast6_dec_blk8;
-
-	store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	popq %r15;
-	FRAME_END
-	ret;
-ENDPROC(cast6_ecb_dec_8way)
-
-ENTRY(cast6_cbc_dec_8way)
-	/* input:
-	 *	%rdi: ctx
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	FRAME_BEGIN
-	pushq %r12;
-	pushq %r15;
-
-	movq %rdi, CTX;
-	movq %rsi, %r11;
-	movq %rdx, %r12;
-
-	load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	call __cast6_dec_blk8;
-
-	store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	popq %r15;
-	popq %r12;
-	FRAME_END
-	ret;
-ENDPROC(cast6_cbc_dec_8way)
-
-ENTRY(cast6_ctr_8way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: iv (little endian, 128bit)
-	 */
-	FRAME_BEGIN
-	pushq %r12;
-	pushq %r15
-
-	movq %rdi, CTX;
-	movq %rsi, %r11;
-	movq %rdx, %r12;
-
-	load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
-		      RD2, RX, RKR, RKM);
-
-	call __cast6_enc_blk8;
-
-	store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	popq %r15;
-	popq %r12;
-	FRAME_END
-	ret;
-ENDPROC(cast6_ctr_8way)
-
-ENTRY(cast6_xts_enc_8way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
-	 */
-	FRAME_BEGIN
-	pushq %r15;
-
-	movq %rdi, CTX
-	movq %rsi, %r11;
-
-	/* regs <= src, dst <= IVs, regs <= regs xor IVs */
-	load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
-		      RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask);
-
-	call __cast6_enc_blk8;
-
-	/* dst <= regs xor IVs(in dst) */
-	store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	popq %r15;
-	FRAME_END
-	ret;
-ENDPROC(cast6_xts_enc_8way)
-
-ENTRY(cast6_xts_dec_8way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
-	 */
-	FRAME_BEGIN
-	pushq %r15;
-
-	movq %rdi, CTX
-	movq %rsi, %r11;
-
-	/* regs <= src, dst <= IVs, regs <= regs xor IVs */
-	load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
-		      RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask);
-
-	call __cast6_dec_blk8;
-
-	/* dst <= regs xor IVs(in dst) */
-	store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	popq %r15;
-	FRAME_END
-	ret;
-ENDPROC(cast6_xts_dec_8way)
diff --git a/arch/x86/crypto/chacha-avx2-x86_64.S b/arch/x86/crypto/chacha-avx2-x86_64.S
deleted file mode 100644
index 831e4434fc208c26db85371f178eb5666143c2c5..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/chacha-avx2-x86_64.S
+++ /dev/null
@@ -1,1021 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * ChaCha 256-bit cipher algorithm, x64 AVX2 functions
- *
- * Copyright (C) 2015 Martin Willi
- */
-
-#include <linux/linkage.h>
-
-.section	.rodata.cst32.ROT8, "aM", @progbits, 32
-.align 32
-ROT8:	.octa 0x0e0d0c0f0a09080b0605040702010003
-	.octa 0x0e0d0c0f0a09080b0605040702010003
-
-.section	.rodata.cst32.ROT16, "aM", @progbits, 32
-.align 32
-ROT16:	.octa 0x0d0c0f0e09080b0a0504070601000302
-	.octa 0x0d0c0f0e09080b0a0504070601000302
-
-.section	.rodata.cst32.CTRINC, "aM", @progbits, 32
-.align 32
-CTRINC:	.octa 0x00000003000000020000000100000000
-	.octa 0x00000007000000060000000500000004
-
-.section	.rodata.cst32.CTR2BL, "aM", @progbits, 32
-.align 32
-CTR2BL:	.octa 0x00000000000000000000000000000000
-	.octa 0x00000000000000000000000000000001
-
-.section	.rodata.cst32.CTR4BL, "aM", @progbits, 32
-.align 32
-CTR4BL:	.octa 0x00000000000000000000000000000002
-	.octa 0x00000000000000000000000000000003
-
-.text
-
-ENTRY(chacha_2block_xor_avx2)
-	# %rdi: Input state matrix, s
-	# %rsi: up to 2 data blocks output, o
-	# %rdx: up to 2 data blocks input, i
-	# %rcx: input/output length in bytes
-	# %r8d: nrounds
-
-	# This function encrypts two ChaCha blocks by loading the state
-	# matrix twice across four AVX registers. It performs matrix operations
-	# on four words in each matrix in parallel, but requires shuffling to
-	# rearrange the words after each round.
-
-	vzeroupper
-
-	# x0..3[0-2] = s0..3
-	vbroadcasti128	0x00(%rdi),%ymm0
-	vbroadcasti128	0x10(%rdi),%ymm1
-	vbroadcasti128	0x20(%rdi),%ymm2
-	vbroadcasti128	0x30(%rdi),%ymm3
-
-	vpaddd		CTR2BL(%rip),%ymm3,%ymm3
-
-	vmovdqa		%ymm0,%ymm8
-	vmovdqa		%ymm1,%ymm9
-	vmovdqa		%ymm2,%ymm10
-	vmovdqa		%ymm3,%ymm11
-
-	vmovdqa		ROT8(%rip),%ymm4
-	vmovdqa		ROT16(%rip),%ymm5
-
-	mov		%rcx,%rax
-
-.Ldoubleround:
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	vpaddd		%ymm1,%ymm0,%ymm0
-	vpxor		%ymm0,%ymm3,%ymm3
-	vpshufb		%ymm5,%ymm3,%ymm3
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	vpaddd		%ymm3,%ymm2,%ymm2
-	vpxor		%ymm2,%ymm1,%ymm1
-	vmovdqa		%ymm1,%ymm6
-	vpslld		$12,%ymm6,%ymm6
-	vpsrld		$20,%ymm1,%ymm1
-	vpor		%ymm6,%ymm1,%ymm1
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	vpaddd		%ymm1,%ymm0,%ymm0
-	vpxor		%ymm0,%ymm3,%ymm3
-	vpshufb		%ymm4,%ymm3,%ymm3
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	vpaddd		%ymm3,%ymm2,%ymm2
-	vpxor		%ymm2,%ymm1,%ymm1
-	vmovdqa		%ymm1,%ymm7
-	vpslld		$7,%ymm7,%ymm7
-	vpsrld		$25,%ymm1,%ymm1
-	vpor		%ymm7,%ymm1,%ymm1
-
-	# x1 = shuffle32(x1, MASK(0, 3, 2, 1))
-	vpshufd		$0x39,%ymm1,%ymm1
-	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	vpshufd		$0x4e,%ymm2,%ymm2
-	# x3 = shuffle32(x3, MASK(2, 1, 0, 3))
-	vpshufd		$0x93,%ymm3,%ymm3
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	vpaddd		%ymm1,%ymm0,%ymm0
-	vpxor		%ymm0,%ymm3,%ymm3
-	vpshufb		%ymm5,%ymm3,%ymm3
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	vpaddd		%ymm3,%ymm2,%ymm2
-	vpxor		%ymm2,%ymm1,%ymm1
-	vmovdqa		%ymm1,%ymm6
-	vpslld		$12,%ymm6,%ymm6
-	vpsrld		$20,%ymm1,%ymm1
-	vpor		%ymm6,%ymm1,%ymm1
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	vpaddd		%ymm1,%ymm0,%ymm0
-	vpxor		%ymm0,%ymm3,%ymm3
-	vpshufb		%ymm4,%ymm3,%ymm3
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	vpaddd		%ymm3,%ymm2,%ymm2
-	vpxor		%ymm2,%ymm1,%ymm1
-	vmovdqa		%ymm1,%ymm7
-	vpslld		$7,%ymm7,%ymm7
-	vpsrld		$25,%ymm1,%ymm1
-	vpor		%ymm7,%ymm1,%ymm1
-
-	# x1 = shuffle32(x1, MASK(2, 1, 0, 3))
-	vpshufd		$0x93,%ymm1,%ymm1
-	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	vpshufd		$0x4e,%ymm2,%ymm2
-	# x3 = shuffle32(x3, MASK(0, 3, 2, 1))
-	vpshufd		$0x39,%ymm3,%ymm3
-
-	sub		$2,%r8d
-	jnz		.Ldoubleround
-
-	# o0 = i0 ^ (x0 + s0)
-	vpaddd		%ymm8,%ymm0,%ymm7
-	cmp		$0x10,%rax
-	jl		.Lxorpart2
-	vpxor		0x00(%rdx),%xmm7,%xmm6
-	vmovdqu		%xmm6,0x00(%rsi)
-	vextracti128	$1,%ymm7,%xmm0
-	# o1 = i1 ^ (x1 + s1)
-	vpaddd		%ymm9,%ymm1,%ymm7
-	cmp		$0x20,%rax
-	jl		.Lxorpart2
-	vpxor		0x10(%rdx),%xmm7,%xmm6
-	vmovdqu		%xmm6,0x10(%rsi)
-	vextracti128	$1,%ymm7,%xmm1
-	# o2 = i2 ^ (x2 + s2)
-	vpaddd		%ymm10,%ymm2,%ymm7
-	cmp		$0x30,%rax
-	jl		.Lxorpart2
-	vpxor		0x20(%rdx),%xmm7,%xmm6
-	vmovdqu		%xmm6,0x20(%rsi)
-	vextracti128	$1,%ymm7,%xmm2
-	# o3 = i3 ^ (x3 + s3)
-	vpaddd		%ymm11,%ymm3,%ymm7
-	cmp		$0x40,%rax
-	jl		.Lxorpart2
-	vpxor		0x30(%rdx),%xmm7,%xmm6
-	vmovdqu		%xmm6,0x30(%rsi)
-	vextracti128	$1,%ymm7,%xmm3
-
-	# xor and write second block
-	vmovdqa		%xmm0,%xmm7
-	cmp		$0x50,%rax
-	jl		.Lxorpart2
-	vpxor		0x40(%rdx),%xmm7,%xmm6
-	vmovdqu		%xmm6,0x40(%rsi)
-
-	vmovdqa		%xmm1,%xmm7
-	cmp		$0x60,%rax
-	jl		.Lxorpart2
-	vpxor		0x50(%rdx),%xmm7,%xmm6
-	vmovdqu		%xmm6,0x50(%rsi)
-
-	vmovdqa		%xmm2,%xmm7
-	cmp		$0x70,%rax
-	jl		.Lxorpart2
-	vpxor		0x60(%rdx),%xmm7,%xmm6
-	vmovdqu		%xmm6,0x60(%rsi)
-
-	vmovdqa		%xmm3,%xmm7
-	cmp		$0x80,%rax
-	jl		.Lxorpart2
-	vpxor		0x70(%rdx),%xmm7,%xmm6
-	vmovdqu		%xmm6,0x70(%rsi)
-
-.Ldone2:
-	vzeroupper
-	ret
-
-.Lxorpart2:
-	# xor remaining bytes from partial register into output
-	mov		%rax,%r9
-	and		$0x0f,%r9
-	jz		.Ldone2
-	and		$~0x0f,%rax
-
-	mov		%rsi,%r11
-
-	lea		8(%rsp),%r10
-	sub		$0x10,%rsp
-	and		$~31,%rsp
-
-	lea		(%rdx,%rax),%rsi
-	mov		%rsp,%rdi
-	mov		%r9,%rcx
-	rep movsb
-
-	vpxor		0x00(%rsp),%xmm7,%xmm7
-	vmovdqa		%xmm7,0x00(%rsp)
-
-	mov		%rsp,%rsi
-	lea		(%r11,%rax),%rdi
-	mov		%r9,%rcx
-	rep movsb
-
-	lea		-8(%r10),%rsp
-	jmp		.Ldone2
-
-ENDPROC(chacha_2block_xor_avx2)
-
-ENTRY(chacha_4block_xor_avx2)
-	# %rdi: Input state matrix, s
-	# %rsi: up to 4 data blocks output, o
-	# %rdx: up to 4 data blocks input, i
-	# %rcx: input/output length in bytes
-	# %r8d: nrounds
-
-	# This function encrypts four ChaCha blocks by loading the state
-	# matrix four times across eight AVX registers. It performs matrix
-	# operations on four words in two matrices in parallel, sequentially
-	# to the operations on the four words of the other two matrices. The
-	# required word shuffling has a rather high latency, we can do the
-	# arithmetic on two matrix-pairs without much slowdown.
-
-	vzeroupper
-
-	# x0..3[0-4] = s0..3
-	vbroadcasti128	0x00(%rdi),%ymm0
-	vbroadcasti128	0x10(%rdi),%ymm1
-	vbroadcasti128	0x20(%rdi),%ymm2
-	vbroadcasti128	0x30(%rdi),%ymm3
-
-	vmovdqa		%ymm0,%ymm4
-	vmovdqa		%ymm1,%ymm5
-	vmovdqa		%ymm2,%ymm6
-	vmovdqa		%ymm3,%ymm7
-
-	vpaddd		CTR2BL(%rip),%ymm3,%ymm3
-	vpaddd		CTR4BL(%rip),%ymm7,%ymm7
-
-	vmovdqa		%ymm0,%ymm11
-	vmovdqa		%ymm1,%ymm12
-	vmovdqa		%ymm2,%ymm13
-	vmovdqa		%ymm3,%ymm14
-	vmovdqa		%ymm7,%ymm15
-
-	vmovdqa		ROT8(%rip),%ymm8
-	vmovdqa		ROT16(%rip),%ymm9
-
-	mov		%rcx,%rax
-
-.Ldoubleround4:
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	vpaddd		%ymm1,%ymm0,%ymm0
-	vpxor		%ymm0,%ymm3,%ymm3
-	vpshufb		%ymm9,%ymm3,%ymm3
-
-	vpaddd		%ymm5,%ymm4,%ymm4
-	vpxor		%ymm4,%ymm7,%ymm7
-	vpshufb		%ymm9,%ymm7,%ymm7
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	vpaddd		%ymm3,%ymm2,%ymm2
-	vpxor		%ymm2,%ymm1,%ymm1
-	vmovdqa		%ymm1,%ymm10
-	vpslld		$12,%ymm10,%ymm10
-	vpsrld		$20,%ymm1,%ymm1
-	vpor		%ymm10,%ymm1,%ymm1
-
-	vpaddd		%ymm7,%ymm6,%ymm6
-	vpxor		%ymm6,%ymm5,%ymm5
-	vmovdqa		%ymm5,%ymm10
-	vpslld		$12,%ymm10,%ymm10
-	vpsrld		$20,%ymm5,%ymm5
-	vpor		%ymm10,%ymm5,%ymm5
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	vpaddd		%ymm1,%ymm0,%ymm0
-	vpxor		%ymm0,%ymm3,%ymm3
-	vpshufb		%ymm8,%ymm3,%ymm3
-
-	vpaddd		%ymm5,%ymm4,%ymm4
-	vpxor		%ymm4,%ymm7,%ymm7
-	vpshufb		%ymm8,%ymm7,%ymm7
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	vpaddd		%ymm3,%ymm2,%ymm2
-	vpxor		%ymm2,%ymm1,%ymm1
-	vmovdqa		%ymm1,%ymm10
-	vpslld		$7,%ymm10,%ymm10
-	vpsrld		$25,%ymm1,%ymm1
-	vpor		%ymm10,%ymm1,%ymm1
-
-	vpaddd		%ymm7,%ymm6,%ymm6
-	vpxor		%ymm6,%ymm5,%ymm5
-	vmovdqa		%ymm5,%ymm10
-	vpslld		$7,%ymm10,%ymm10
-	vpsrld		$25,%ymm5,%ymm5
-	vpor		%ymm10,%ymm5,%ymm5
-
-	# x1 = shuffle32(x1, MASK(0, 3, 2, 1))
-	vpshufd		$0x39,%ymm1,%ymm1
-	vpshufd		$0x39,%ymm5,%ymm5
-	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	vpshufd		$0x4e,%ymm2,%ymm2
-	vpshufd		$0x4e,%ymm6,%ymm6
-	# x3 = shuffle32(x3, MASK(2, 1, 0, 3))
-	vpshufd		$0x93,%ymm3,%ymm3
-	vpshufd		$0x93,%ymm7,%ymm7
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	vpaddd		%ymm1,%ymm0,%ymm0
-	vpxor		%ymm0,%ymm3,%ymm3
-	vpshufb		%ymm9,%ymm3,%ymm3
-
-	vpaddd		%ymm5,%ymm4,%ymm4
-	vpxor		%ymm4,%ymm7,%ymm7
-	vpshufb		%ymm9,%ymm7,%ymm7
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	vpaddd		%ymm3,%ymm2,%ymm2
-	vpxor		%ymm2,%ymm1,%ymm1
-	vmovdqa		%ymm1,%ymm10
-	vpslld		$12,%ymm10,%ymm10
-	vpsrld		$20,%ymm1,%ymm1
-	vpor		%ymm10,%ymm1,%ymm1
-
-	vpaddd		%ymm7,%ymm6,%ymm6
-	vpxor		%ymm6,%ymm5,%ymm5
-	vmovdqa		%ymm5,%ymm10
-	vpslld		$12,%ymm10,%ymm10
-	vpsrld		$20,%ymm5,%ymm5
-	vpor		%ymm10,%ymm5,%ymm5
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	vpaddd		%ymm1,%ymm0,%ymm0
-	vpxor		%ymm0,%ymm3,%ymm3
-	vpshufb		%ymm8,%ymm3,%ymm3
-
-	vpaddd		%ymm5,%ymm4,%ymm4
-	vpxor		%ymm4,%ymm7,%ymm7
-	vpshufb		%ymm8,%ymm7,%ymm7
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	vpaddd		%ymm3,%ymm2,%ymm2
-	vpxor		%ymm2,%ymm1,%ymm1
-	vmovdqa		%ymm1,%ymm10
-	vpslld		$7,%ymm10,%ymm10
-	vpsrld		$25,%ymm1,%ymm1
-	vpor		%ymm10,%ymm1,%ymm1
-
-	vpaddd		%ymm7,%ymm6,%ymm6
-	vpxor		%ymm6,%ymm5,%ymm5
-	vmovdqa		%ymm5,%ymm10
-	vpslld		$7,%ymm10,%ymm10
-	vpsrld		$25,%ymm5,%ymm5
-	vpor		%ymm10,%ymm5,%ymm5
-
-	# x1 = shuffle32(x1, MASK(2, 1, 0, 3))
-	vpshufd		$0x93,%ymm1,%ymm1
-	vpshufd		$0x93,%ymm5,%ymm5
-	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	vpshufd		$0x4e,%ymm2,%ymm2
-	vpshufd		$0x4e,%ymm6,%ymm6
-	# x3 = shuffle32(x3, MASK(0, 3, 2, 1))
-	vpshufd		$0x39,%ymm3,%ymm3
-	vpshufd		$0x39,%ymm7,%ymm7
-
-	sub		$2,%r8d
-	jnz		.Ldoubleround4
-
-	# o0 = i0 ^ (x0 + s0), first block
-	vpaddd		%ymm11,%ymm0,%ymm10
-	cmp		$0x10,%rax
-	jl		.Lxorpart4
-	vpxor		0x00(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x00(%rsi)
-	vextracti128	$1,%ymm10,%xmm0
-	# o1 = i1 ^ (x1 + s1), first block
-	vpaddd		%ymm12,%ymm1,%ymm10
-	cmp		$0x20,%rax
-	jl		.Lxorpart4
-	vpxor		0x10(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x10(%rsi)
-	vextracti128	$1,%ymm10,%xmm1
-	# o2 = i2 ^ (x2 + s2), first block
-	vpaddd		%ymm13,%ymm2,%ymm10
-	cmp		$0x30,%rax
-	jl		.Lxorpart4
-	vpxor		0x20(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x20(%rsi)
-	vextracti128	$1,%ymm10,%xmm2
-	# o3 = i3 ^ (x3 + s3), first block
-	vpaddd		%ymm14,%ymm3,%ymm10
-	cmp		$0x40,%rax
-	jl		.Lxorpart4
-	vpxor		0x30(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x30(%rsi)
-	vextracti128	$1,%ymm10,%xmm3
-
-	# xor and write second block
-	vmovdqa		%xmm0,%xmm10
-	cmp		$0x50,%rax
-	jl		.Lxorpart4
-	vpxor		0x40(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x40(%rsi)
-
-	vmovdqa		%xmm1,%xmm10
-	cmp		$0x60,%rax
-	jl		.Lxorpart4
-	vpxor		0x50(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x50(%rsi)
-
-	vmovdqa		%xmm2,%xmm10
-	cmp		$0x70,%rax
-	jl		.Lxorpart4
-	vpxor		0x60(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x60(%rsi)
-
-	vmovdqa		%xmm3,%xmm10
-	cmp		$0x80,%rax
-	jl		.Lxorpart4
-	vpxor		0x70(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x70(%rsi)
-
-	# o0 = i0 ^ (x0 + s0), third block
-	vpaddd		%ymm11,%ymm4,%ymm10
-	cmp		$0x90,%rax
-	jl		.Lxorpart4
-	vpxor		0x80(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x80(%rsi)
-	vextracti128	$1,%ymm10,%xmm4
-	# o1 = i1 ^ (x1 + s1), third block
-	vpaddd		%ymm12,%ymm5,%ymm10
-	cmp		$0xa0,%rax
-	jl		.Lxorpart4
-	vpxor		0x90(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x90(%rsi)
-	vextracti128	$1,%ymm10,%xmm5
-	# o2 = i2 ^ (x2 + s2), third block
-	vpaddd		%ymm13,%ymm6,%ymm10
-	cmp		$0xb0,%rax
-	jl		.Lxorpart4
-	vpxor		0xa0(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0xa0(%rsi)
-	vextracti128	$1,%ymm10,%xmm6
-	# o3 = i3 ^ (x3 + s3), third block
-	vpaddd		%ymm15,%ymm7,%ymm10
-	cmp		$0xc0,%rax
-	jl		.Lxorpart4
-	vpxor		0xb0(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0xb0(%rsi)
-	vextracti128	$1,%ymm10,%xmm7
-
-	# xor and write fourth block
-	vmovdqa		%xmm4,%xmm10
-	cmp		$0xd0,%rax
-	jl		.Lxorpart4
-	vpxor		0xc0(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0xc0(%rsi)
-
-	vmovdqa		%xmm5,%xmm10
-	cmp		$0xe0,%rax
-	jl		.Lxorpart4
-	vpxor		0xd0(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0xd0(%rsi)
-
-	vmovdqa		%xmm6,%xmm10
-	cmp		$0xf0,%rax
-	jl		.Lxorpart4
-	vpxor		0xe0(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0xe0(%rsi)
-
-	vmovdqa		%xmm7,%xmm10
-	cmp		$0x100,%rax
-	jl		.Lxorpart4
-	vpxor		0xf0(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0xf0(%rsi)
-
-.Ldone4:
-	vzeroupper
-	ret
-
-.Lxorpart4:
-	# xor remaining bytes from partial register into output
-	mov		%rax,%r9
-	and		$0x0f,%r9
-	jz		.Ldone4
-	and		$~0x0f,%rax
-
-	mov		%rsi,%r11
-
-	lea		8(%rsp),%r10
-	sub		$0x10,%rsp
-	and		$~31,%rsp
-
-	lea		(%rdx,%rax),%rsi
-	mov		%rsp,%rdi
-	mov		%r9,%rcx
-	rep movsb
-
-	vpxor		0x00(%rsp),%xmm10,%xmm10
-	vmovdqa		%xmm10,0x00(%rsp)
-
-	mov		%rsp,%rsi
-	lea		(%r11,%rax),%rdi
-	mov		%r9,%rcx
-	rep movsb
-
-	lea		-8(%r10),%rsp
-	jmp		.Ldone4
-
-ENDPROC(chacha_4block_xor_avx2)
-
-ENTRY(chacha_8block_xor_avx2)
-	# %rdi: Input state matrix, s
-	# %rsi: up to 8 data blocks output, o
-	# %rdx: up to 8 data blocks input, i
-	# %rcx: input/output length in bytes
-	# %r8d: nrounds
-
-	# This function encrypts eight consecutive ChaCha blocks by loading
-	# the state matrix in AVX registers eight times. As we need some
-	# scratch registers, we save the first four registers on the stack. The
-	# algorithm performs each operation on the corresponding word of each
-	# state matrix, hence requires no word shuffling. For final XORing step
-	# we transpose the matrix by interleaving 32-, 64- and then 128-bit
-	# words, which allows us to do XOR in AVX registers. 8/16-bit word
-	# rotation is done with the slightly better performing byte shuffling,
-	# 7/12-bit word rotation uses traditional shift+OR.
-
-	vzeroupper
-	# 4 * 32 byte stack, 32-byte aligned
-	lea		8(%rsp),%r10
-	and		$~31, %rsp
-	sub		$0x80, %rsp
-	mov		%rcx,%rax
-
-	# x0..15[0-7] = s[0..15]
-	vpbroadcastd	0x00(%rdi),%ymm0
-	vpbroadcastd	0x04(%rdi),%ymm1
-	vpbroadcastd	0x08(%rdi),%ymm2
-	vpbroadcastd	0x0c(%rdi),%ymm3
-	vpbroadcastd	0x10(%rdi),%ymm4
-	vpbroadcastd	0x14(%rdi),%ymm5
-	vpbroadcastd	0x18(%rdi),%ymm6
-	vpbroadcastd	0x1c(%rdi),%ymm7
-	vpbroadcastd	0x20(%rdi),%ymm8
-	vpbroadcastd	0x24(%rdi),%ymm9
-	vpbroadcastd	0x28(%rdi),%ymm10
-	vpbroadcastd	0x2c(%rdi),%ymm11
-	vpbroadcastd	0x30(%rdi),%ymm12
-	vpbroadcastd	0x34(%rdi),%ymm13
-	vpbroadcastd	0x38(%rdi),%ymm14
-	vpbroadcastd	0x3c(%rdi),%ymm15
-	# x0..3 on stack
-	vmovdqa		%ymm0,0x00(%rsp)
-	vmovdqa		%ymm1,0x20(%rsp)
-	vmovdqa		%ymm2,0x40(%rsp)
-	vmovdqa		%ymm3,0x60(%rsp)
-
-	vmovdqa		CTRINC(%rip),%ymm1
-	vmovdqa		ROT8(%rip),%ymm2
-	vmovdqa		ROT16(%rip),%ymm3
-
-	# x12 += counter values 0-3
-	vpaddd		%ymm1,%ymm12,%ymm12
-
-.Ldoubleround8:
-	# x0 += x4, x12 = rotl32(x12 ^ x0, 16)
-	vpaddd		0x00(%rsp),%ymm4,%ymm0
-	vmovdqa		%ymm0,0x00(%rsp)
-	vpxor		%ymm0,%ymm12,%ymm12
-	vpshufb		%ymm3,%ymm12,%ymm12
-	# x1 += x5, x13 = rotl32(x13 ^ x1, 16)
-	vpaddd		0x20(%rsp),%ymm5,%ymm0
-	vmovdqa		%ymm0,0x20(%rsp)
-	vpxor		%ymm0,%ymm13,%ymm13
-	vpshufb		%ymm3,%ymm13,%ymm13
-	# x2 += x6, x14 = rotl32(x14 ^ x2, 16)
-	vpaddd		0x40(%rsp),%ymm6,%ymm0
-	vmovdqa		%ymm0,0x40(%rsp)
-	vpxor		%ymm0,%ymm14,%ymm14
-	vpshufb		%ymm3,%ymm14,%ymm14
-	# x3 += x7, x15 = rotl32(x15 ^ x3, 16)
-	vpaddd		0x60(%rsp),%ymm7,%ymm0
-	vmovdqa		%ymm0,0x60(%rsp)
-	vpxor		%ymm0,%ymm15,%ymm15
-	vpshufb		%ymm3,%ymm15,%ymm15
-
-	# x8 += x12, x4 = rotl32(x4 ^ x8, 12)
-	vpaddd		%ymm12,%ymm8,%ymm8
-	vpxor		%ymm8,%ymm4,%ymm4
-	vpslld		$12,%ymm4,%ymm0
-	vpsrld		$20,%ymm4,%ymm4
-	vpor		%ymm0,%ymm4,%ymm4
-	# x9 += x13, x5 = rotl32(x5 ^ x9, 12)
-	vpaddd		%ymm13,%ymm9,%ymm9
-	vpxor		%ymm9,%ymm5,%ymm5
-	vpslld		$12,%ymm5,%ymm0
-	vpsrld		$20,%ymm5,%ymm5
-	vpor		%ymm0,%ymm5,%ymm5
-	# x10 += x14, x6 = rotl32(x6 ^ x10, 12)
-	vpaddd		%ymm14,%ymm10,%ymm10
-	vpxor		%ymm10,%ymm6,%ymm6
-	vpslld		$12,%ymm6,%ymm0
-	vpsrld		$20,%ymm6,%ymm6
-	vpor		%ymm0,%ymm6,%ymm6
-	# x11 += x15, x7 = rotl32(x7 ^ x11, 12)
-	vpaddd		%ymm15,%ymm11,%ymm11
-	vpxor		%ymm11,%ymm7,%ymm7
-	vpslld		$12,%ymm7,%ymm0
-	vpsrld		$20,%ymm7,%ymm7
-	vpor		%ymm0,%ymm7,%ymm7
-
-	# x0 += x4, x12 = rotl32(x12 ^ x0, 8)
-	vpaddd		0x00(%rsp),%ymm4,%ymm0
-	vmovdqa		%ymm0,0x00(%rsp)
-	vpxor		%ymm0,%ymm12,%ymm12
-	vpshufb		%ymm2,%ymm12,%ymm12
-	# x1 += x5, x13 = rotl32(x13 ^ x1, 8)
-	vpaddd		0x20(%rsp),%ymm5,%ymm0
-	vmovdqa		%ymm0,0x20(%rsp)
-	vpxor		%ymm0,%ymm13,%ymm13
-	vpshufb		%ymm2,%ymm13,%ymm13
-	# x2 += x6, x14 = rotl32(x14 ^ x2, 8)
-	vpaddd		0x40(%rsp),%ymm6,%ymm0
-	vmovdqa		%ymm0,0x40(%rsp)
-	vpxor		%ymm0,%ymm14,%ymm14
-	vpshufb		%ymm2,%ymm14,%ymm14
-	# x3 += x7, x15 = rotl32(x15 ^ x3, 8)
-	vpaddd		0x60(%rsp),%ymm7,%ymm0
-	vmovdqa		%ymm0,0x60(%rsp)
-	vpxor		%ymm0,%ymm15,%ymm15
-	vpshufb		%ymm2,%ymm15,%ymm15
-
-	# x8 += x12, x4 = rotl32(x4 ^ x8, 7)
-	vpaddd		%ymm12,%ymm8,%ymm8
-	vpxor		%ymm8,%ymm4,%ymm4
-	vpslld		$7,%ymm4,%ymm0
-	vpsrld		$25,%ymm4,%ymm4
-	vpor		%ymm0,%ymm4,%ymm4
-	# x9 += x13, x5 = rotl32(x5 ^ x9, 7)
-	vpaddd		%ymm13,%ymm9,%ymm9
-	vpxor		%ymm9,%ymm5,%ymm5
-	vpslld		$7,%ymm5,%ymm0
-	vpsrld		$25,%ymm5,%ymm5
-	vpor		%ymm0,%ymm5,%ymm5
-	# x10 += x14, x6 = rotl32(x6 ^ x10, 7)
-	vpaddd		%ymm14,%ymm10,%ymm10
-	vpxor		%ymm10,%ymm6,%ymm6
-	vpslld		$7,%ymm6,%ymm0
-	vpsrld		$25,%ymm6,%ymm6
-	vpor		%ymm0,%ymm6,%ymm6
-	# x11 += x15, x7 = rotl32(x7 ^ x11, 7)
-	vpaddd		%ymm15,%ymm11,%ymm11
-	vpxor		%ymm11,%ymm7,%ymm7
-	vpslld		$7,%ymm7,%ymm0
-	vpsrld		$25,%ymm7,%ymm7
-	vpor		%ymm0,%ymm7,%ymm7
-
-	# x0 += x5, x15 = rotl32(x15 ^ x0, 16)
-	vpaddd		0x00(%rsp),%ymm5,%ymm0
-	vmovdqa		%ymm0,0x00(%rsp)
-	vpxor		%ymm0,%ymm15,%ymm15
-	vpshufb		%ymm3,%ymm15,%ymm15
-	# x1 += x6, x12 = rotl32(x12 ^ x1, 16)%ymm0
-	vpaddd		0x20(%rsp),%ymm6,%ymm0
-	vmovdqa		%ymm0,0x20(%rsp)
-	vpxor		%ymm0,%ymm12,%ymm12
-	vpshufb		%ymm3,%ymm12,%ymm12
-	# x2 += x7, x13 = rotl32(x13 ^ x2, 16)
-	vpaddd		0x40(%rsp),%ymm7,%ymm0
-	vmovdqa		%ymm0,0x40(%rsp)
-	vpxor		%ymm0,%ymm13,%ymm13
-	vpshufb		%ymm3,%ymm13,%ymm13
-	# x3 += x4, x14 = rotl32(x14 ^ x3, 16)
-	vpaddd		0x60(%rsp),%ymm4,%ymm0
-	vmovdqa		%ymm0,0x60(%rsp)
-	vpxor		%ymm0,%ymm14,%ymm14
-	vpshufb		%ymm3,%ymm14,%ymm14
-
-	# x10 += x15, x5 = rotl32(x5 ^ x10, 12)
-	vpaddd		%ymm15,%ymm10,%ymm10
-	vpxor		%ymm10,%ymm5,%ymm5
-	vpslld		$12,%ymm5,%ymm0
-	vpsrld		$20,%ymm5,%ymm5
-	vpor		%ymm0,%ymm5,%ymm5
-	# x11 += x12, x6 = rotl32(x6 ^ x11, 12)
-	vpaddd		%ymm12,%ymm11,%ymm11
-	vpxor		%ymm11,%ymm6,%ymm6
-	vpslld		$12,%ymm6,%ymm0
-	vpsrld		$20,%ymm6,%ymm6
-	vpor		%ymm0,%ymm6,%ymm6
-	# x8 += x13, x7 = rotl32(x7 ^ x8, 12)
-	vpaddd		%ymm13,%ymm8,%ymm8
-	vpxor		%ymm8,%ymm7,%ymm7
-	vpslld		$12,%ymm7,%ymm0
-	vpsrld		$20,%ymm7,%ymm7
-	vpor		%ymm0,%ymm7,%ymm7
-	# x9 += x14, x4 = rotl32(x4 ^ x9, 12)
-	vpaddd		%ymm14,%ymm9,%ymm9
-	vpxor		%ymm9,%ymm4,%ymm4
-	vpslld		$12,%ymm4,%ymm0
-	vpsrld		$20,%ymm4,%ymm4
-	vpor		%ymm0,%ymm4,%ymm4
-
-	# x0 += x5, x15 = rotl32(x15 ^ x0, 8)
-	vpaddd		0x00(%rsp),%ymm5,%ymm0
-	vmovdqa		%ymm0,0x00(%rsp)
-	vpxor		%ymm0,%ymm15,%ymm15
-	vpshufb		%ymm2,%ymm15,%ymm15
-	# x1 += x6, x12 = rotl32(x12 ^ x1, 8)
-	vpaddd		0x20(%rsp),%ymm6,%ymm0
-	vmovdqa		%ymm0,0x20(%rsp)
-	vpxor		%ymm0,%ymm12,%ymm12
-	vpshufb		%ymm2,%ymm12,%ymm12
-	# x2 += x7, x13 = rotl32(x13 ^ x2, 8)
-	vpaddd		0x40(%rsp),%ymm7,%ymm0
-	vmovdqa		%ymm0,0x40(%rsp)
-	vpxor		%ymm0,%ymm13,%ymm13
-	vpshufb		%ymm2,%ymm13,%ymm13
-	# x3 += x4, x14 = rotl32(x14 ^ x3, 8)
-	vpaddd		0x60(%rsp),%ymm4,%ymm0
-	vmovdqa		%ymm0,0x60(%rsp)
-	vpxor		%ymm0,%ymm14,%ymm14
-	vpshufb		%ymm2,%ymm14,%ymm14
-
-	# x10 += x15, x5 = rotl32(x5 ^ x10, 7)
-	vpaddd		%ymm15,%ymm10,%ymm10
-	vpxor		%ymm10,%ymm5,%ymm5
-	vpslld		$7,%ymm5,%ymm0
-	vpsrld		$25,%ymm5,%ymm5
-	vpor		%ymm0,%ymm5,%ymm5
-	# x11 += x12, x6 = rotl32(x6 ^ x11, 7)
-	vpaddd		%ymm12,%ymm11,%ymm11
-	vpxor		%ymm11,%ymm6,%ymm6
-	vpslld		$7,%ymm6,%ymm0
-	vpsrld		$25,%ymm6,%ymm6
-	vpor		%ymm0,%ymm6,%ymm6
-	# x8 += x13, x7 = rotl32(x7 ^ x8, 7)
-	vpaddd		%ymm13,%ymm8,%ymm8
-	vpxor		%ymm8,%ymm7,%ymm7
-	vpslld		$7,%ymm7,%ymm0
-	vpsrld		$25,%ymm7,%ymm7
-	vpor		%ymm0,%ymm7,%ymm7
-	# x9 += x14, x4 = rotl32(x4 ^ x9, 7)
-	vpaddd		%ymm14,%ymm9,%ymm9
-	vpxor		%ymm9,%ymm4,%ymm4
-	vpslld		$7,%ymm4,%ymm0
-	vpsrld		$25,%ymm4,%ymm4
-	vpor		%ymm0,%ymm4,%ymm4
-
-	sub		$2,%r8d
-	jnz		.Ldoubleround8
-
-	# x0..15[0-3] += s[0..15]
-	vpbroadcastd	0x00(%rdi),%ymm0
-	vpaddd		0x00(%rsp),%ymm0,%ymm0
-	vmovdqa		%ymm0,0x00(%rsp)
-	vpbroadcastd	0x04(%rdi),%ymm0
-	vpaddd		0x20(%rsp),%ymm0,%ymm0
-	vmovdqa		%ymm0,0x20(%rsp)
-	vpbroadcastd	0x08(%rdi),%ymm0
-	vpaddd		0x40(%rsp),%ymm0,%ymm0
-	vmovdqa		%ymm0,0x40(%rsp)
-	vpbroadcastd	0x0c(%rdi),%ymm0
-	vpaddd		0x60(%rsp),%ymm0,%ymm0
-	vmovdqa		%ymm0,0x60(%rsp)
-	vpbroadcastd	0x10(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm4,%ymm4
-	vpbroadcastd	0x14(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm5,%ymm5
-	vpbroadcastd	0x18(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm6,%ymm6
-	vpbroadcastd	0x1c(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm7,%ymm7
-	vpbroadcastd	0x20(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm8,%ymm8
-	vpbroadcastd	0x24(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm9,%ymm9
-	vpbroadcastd	0x28(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm10,%ymm10
-	vpbroadcastd	0x2c(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm11,%ymm11
-	vpbroadcastd	0x30(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm12,%ymm12
-	vpbroadcastd	0x34(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm13,%ymm13
-	vpbroadcastd	0x38(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm14,%ymm14
-	vpbroadcastd	0x3c(%rdi),%ymm0
-	vpaddd		%ymm0,%ymm15,%ymm15
-
-	# x12 += counter values 0-3
-	vpaddd		%ymm1,%ymm12,%ymm12
-
-	# interleave 32-bit words in state n, n+1
-	vmovdqa		0x00(%rsp),%ymm0
-	vmovdqa		0x20(%rsp),%ymm1
-	vpunpckldq	%ymm1,%ymm0,%ymm2
-	vpunpckhdq	%ymm1,%ymm0,%ymm1
-	vmovdqa		%ymm2,0x00(%rsp)
-	vmovdqa		%ymm1,0x20(%rsp)
-	vmovdqa		0x40(%rsp),%ymm0
-	vmovdqa		0x60(%rsp),%ymm1
-	vpunpckldq	%ymm1,%ymm0,%ymm2
-	vpunpckhdq	%ymm1,%ymm0,%ymm1
-	vmovdqa		%ymm2,0x40(%rsp)
-	vmovdqa		%ymm1,0x60(%rsp)
-	vmovdqa		%ymm4,%ymm0
-	vpunpckldq	%ymm5,%ymm0,%ymm4
-	vpunpckhdq	%ymm5,%ymm0,%ymm5
-	vmovdqa		%ymm6,%ymm0
-	vpunpckldq	%ymm7,%ymm0,%ymm6
-	vpunpckhdq	%ymm7,%ymm0,%ymm7
-	vmovdqa		%ymm8,%ymm0
-	vpunpckldq	%ymm9,%ymm0,%ymm8
-	vpunpckhdq	%ymm9,%ymm0,%ymm9
-	vmovdqa		%ymm10,%ymm0
-	vpunpckldq	%ymm11,%ymm0,%ymm10
-	vpunpckhdq	%ymm11,%ymm0,%ymm11
-	vmovdqa		%ymm12,%ymm0
-	vpunpckldq	%ymm13,%ymm0,%ymm12
-	vpunpckhdq	%ymm13,%ymm0,%ymm13
-	vmovdqa		%ymm14,%ymm0
-	vpunpckldq	%ymm15,%ymm0,%ymm14
-	vpunpckhdq	%ymm15,%ymm0,%ymm15
-
-	# interleave 64-bit words in state n, n+2
-	vmovdqa		0x00(%rsp),%ymm0
-	vmovdqa		0x40(%rsp),%ymm2
-	vpunpcklqdq	%ymm2,%ymm0,%ymm1
-	vpunpckhqdq	%ymm2,%ymm0,%ymm2
-	vmovdqa		%ymm1,0x00(%rsp)
-	vmovdqa		%ymm2,0x40(%rsp)
-	vmovdqa		0x20(%rsp),%ymm0
-	vmovdqa		0x60(%rsp),%ymm2
-	vpunpcklqdq	%ymm2,%ymm0,%ymm1
-	vpunpckhqdq	%ymm2,%ymm0,%ymm2
-	vmovdqa		%ymm1,0x20(%rsp)
-	vmovdqa		%ymm2,0x60(%rsp)
-	vmovdqa		%ymm4,%ymm0
-	vpunpcklqdq	%ymm6,%ymm0,%ymm4
-	vpunpckhqdq	%ymm6,%ymm0,%ymm6
-	vmovdqa		%ymm5,%ymm0
-	vpunpcklqdq	%ymm7,%ymm0,%ymm5
-	vpunpckhqdq	%ymm7,%ymm0,%ymm7
-	vmovdqa		%ymm8,%ymm0
-	vpunpcklqdq	%ymm10,%ymm0,%ymm8
-	vpunpckhqdq	%ymm10,%ymm0,%ymm10
-	vmovdqa		%ymm9,%ymm0
-	vpunpcklqdq	%ymm11,%ymm0,%ymm9
-	vpunpckhqdq	%ymm11,%ymm0,%ymm11
-	vmovdqa		%ymm12,%ymm0
-	vpunpcklqdq	%ymm14,%ymm0,%ymm12
-	vpunpckhqdq	%ymm14,%ymm0,%ymm14
-	vmovdqa		%ymm13,%ymm0
-	vpunpcklqdq	%ymm15,%ymm0,%ymm13
-	vpunpckhqdq	%ymm15,%ymm0,%ymm15
-
-	# interleave 128-bit words in state n, n+4
-	# xor/write first four blocks
-	vmovdqa		0x00(%rsp),%ymm1
-	vperm2i128	$0x20,%ymm4,%ymm1,%ymm0
-	cmp		$0x0020,%rax
-	jl		.Lxorpart8
-	vpxor		0x0000(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x0000(%rsi)
-	vperm2i128	$0x31,%ymm4,%ymm1,%ymm4
-
-	vperm2i128	$0x20,%ymm12,%ymm8,%ymm0
-	cmp		$0x0040,%rax
-	jl		.Lxorpart8
-	vpxor		0x0020(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x0020(%rsi)
-	vperm2i128	$0x31,%ymm12,%ymm8,%ymm12
-
-	vmovdqa		0x40(%rsp),%ymm1
-	vperm2i128	$0x20,%ymm6,%ymm1,%ymm0
-	cmp		$0x0060,%rax
-	jl		.Lxorpart8
-	vpxor		0x0040(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x0040(%rsi)
-	vperm2i128	$0x31,%ymm6,%ymm1,%ymm6
-
-	vperm2i128	$0x20,%ymm14,%ymm10,%ymm0
-	cmp		$0x0080,%rax
-	jl		.Lxorpart8
-	vpxor		0x0060(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x0060(%rsi)
-	vperm2i128	$0x31,%ymm14,%ymm10,%ymm14
-
-	vmovdqa		0x20(%rsp),%ymm1
-	vperm2i128	$0x20,%ymm5,%ymm1,%ymm0
-	cmp		$0x00a0,%rax
-	jl		.Lxorpart8
-	vpxor		0x0080(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x0080(%rsi)
-	vperm2i128	$0x31,%ymm5,%ymm1,%ymm5
-
-	vperm2i128	$0x20,%ymm13,%ymm9,%ymm0
-	cmp		$0x00c0,%rax
-	jl		.Lxorpart8
-	vpxor		0x00a0(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x00a0(%rsi)
-	vperm2i128	$0x31,%ymm13,%ymm9,%ymm13
-
-	vmovdqa		0x60(%rsp),%ymm1
-	vperm2i128	$0x20,%ymm7,%ymm1,%ymm0
-	cmp		$0x00e0,%rax
-	jl		.Lxorpart8
-	vpxor		0x00c0(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x00c0(%rsi)
-	vperm2i128	$0x31,%ymm7,%ymm1,%ymm7
-
-	vperm2i128	$0x20,%ymm15,%ymm11,%ymm0
-	cmp		$0x0100,%rax
-	jl		.Lxorpart8
-	vpxor		0x00e0(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x00e0(%rsi)
-	vperm2i128	$0x31,%ymm15,%ymm11,%ymm15
-
-	# xor remaining blocks, write to output
-	vmovdqa		%ymm4,%ymm0
-	cmp		$0x0120,%rax
-	jl		.Lxorpart8
-	vpxor		0x0100(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x0100(%rsi)
-
-	vmovdqa		%ymm12,%ymm0
-	cmp		$0x0140,%rax
-	jl		.Lxorpart8
-	vpxor		0x0120(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x0120(%rsi)
-
-	vmovdqa		%ymm6,%ymm0
-	cmp		$0x0160,%rax
-	jl		.Lxorpart8
-	vpxor		0x0140(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x0140(%rsi)
-
-	vmovdqa		%ymm14,%ymm0
-	cmp		$0x0180,%rax
-	jl		.Lxorpart8
-	vpxor		0x0160(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x0160(%rsi)
-
-	vmovdqa		%ymm5,%ymm0
-	cmp		$0x01a0,%rax
-	jl		.Lxorpart8
-	vpxor		0x0180(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x0180(%rsi)
-
-	vmovdqa		%ymm13,%ymm0
-	cmp		$0x01c0,%rax
-	jl		.Lxorpart8
-	vpxor		0x01a0(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x01a0(%rsi)
-
-	vmovdqa		%ymm7,%ymm0
-	cmp		$0x01e0,%rax
-	jl		.Lxorpart8
-	vpxor		0x01c0(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x01c0(%rsi)
-
-	vmovdqa		%ymm15,%ymm0
-	cmp		$0x0200,%rax
-	jl		.Lxorpart8
-	vpxor		0x01e0(%rdx),%ymm0,%ymm0
-	vmovdqu		%ymm0,0x01e0(%rsi)
-
-.Ldone8:
-	vzeroupper
-	lea		-8(%r10),%rsp
-	ret
-
-.Lxorpart8:
-	# xor remaining bytes from partial register into output
-	mov		%rax,%r9
-	and		$0x1f,%r9
-	jz		.Ldone8
-	and		$~0x1f,%rax
-
-	mov		%rsi,%r11
-
-	lea		(%rdx,%rax),%rsi
-	mov		%rsp,%rdi
-	mov		%r9,%rcx
-	rep movsb
-
-	vpxor		0x00(%rsp),%ymm0,%ymm0
-	vmovdqa		%ymm0,0x00(%rsp)
-
-	mov		%rsp,%rsi
-	lea		(%r11,%rax),%rdi
-	mov		%r9,%rcx
-	rep movsb
-
-	jmp		.Ldone8
-
-ENDPROC(chacha_8block_xor_avx2)
diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/crypto/chacha-avx512vl-x86_64.S
deleted file mode 100644
index 848f9c75fd4f5fd4ff2a280006e237bf65088b04..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/chacha-avx512vl-x86_64.S
+++ /dev/null
@@ -1,836 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * ChaCha 256-bit cipher algorithm, x64 AVX-512VL functions
- *
- * Copyright (C) 2018 Martin Willi
- */
-
-#include <linux/linkage.h>
-
-.section	.rodata.cst32.CTR2BL, "aM", @progbits, 32
-.align 32
-CTR2BL:	.octa 0x00000000000000000000000000000000
-	.octa 0x00000000000000000000000000000001
-
-.section	.rodata.cst32.CTR4BL, "aM", @progbits, 32
-.align 32
-CTR4BL:	.octa 0x00000000000000000000000000000002
-	.octa 0x00000000000000000000000000000003
-
-.section	.rodata.cst32.CTR8BL, "aM", @progbits, 32
-.align 32
-CTR8BL:	.octa 0x00000003000000020000000100000000
-	.octa 0x00000007000000060000000500000004
-
-.text
-
-ENTRY(chacha_2block_xor_avx512vl)
-	# %rdi: Input state matrix, s
-	# %rsi: up to 2 data blocks output, o
-	# %rdx: up to 2 data blocks input, i
-	# %rcx: input/output length in bytes
-	# %r8d: nrounds
-
-	# This function encrypts two ChaCha blocks by loading the state
-	# matrix twice across four AVX registers. It performs matrix operations
-	# on four words in each matrix in parallel, but requires shuffling to
-	# rearrange the words after each round.
-
-	vzeroupper
-
-	# x0..3[0-2] = s0..3
-	vbroadcasti128	0x00(%rdi),%ymm0
-	vbroadcasti128	0x10(%rdi),%ymm1
-	vbroadcasti128	0x20(%rdi),%ymm2
-	vbroadcasti128	0x30(%rdi),%ymm3
-
-	vpaddd		CTR2BL(%rip),%ymm3,%ymm3
-
-	vmovdqa		%ymm0,%ymm8
-	vmovdqa		%ymm1,%ymm9
-	vmovdqa		%ymm2,%ymm10
-	vmovdqa		%ymm3,%ymm11
-
-.Ldoubleround:
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	vpaddd		%ymm1,%ymm0,%ymm0
-	vpxord		%ymm0,%ymm3,%ymm3
-	vprold		$16,%ymm3,%ymm3
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	vpaddd		%ymm3,%ymm2,%ymm2
-	vpxord		%ymm2,%ymm1,%ymm1
-	vprold		$12,%ymm1,%ymm1
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	vpaddd		%ymm1,%ymm0,%ymm0
-	vpxord		%ymm0,%ymm3,%ymm3
-	vprold		$8,%ymm3,%ymm3
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	vpaddd		%ymm3,%ymm2,%ymm2
-	vpxord		%ymm2,%ymm1,%ymm1
-	vprold		$7,%ymm1,%ymm1
-
-	# x1 = shuffle32(x1, MASK(0, 3, 2, 1))
-	vpshufd		$0x39,%ymm1,%ymm1
-	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	vpshufd		$0x4e,%ymm2,%ymm2
-	# x3 = shuffle32(x3, MASK(2, 1, 0, 3))
-	vpshufd		$0x93,%ymm3,%ymm3
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	vpaddd		%ymm1,%ymm0,%ymm0
-	vpxord		%ymm0,%ymm3,%ymm3
-	vprold		$16,%ymm3,%ymm3
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	vpaddd		%ymm3,%ymm2,%ymm2
-	vpxord		%ymm2,%ymm1,%ymm1
-	vprold		$12,%ymm1,%ymm1
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	vpaddd		%ymm1,%ymm0,%ymm0
-	vpxord		%ymm0,%ymm3,%ymm3
-	vprold		$8,%ymm3,%ymm3
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	vpaddd		%ymm3,%ymm2,%ymm2
-	vpxord		%ymm2,%ymm1,%ymm1
-	vprold		$7,%ymm1,%ymm1
-
-	# x1 = shuffle32(x1, MASK(2, 1, 0, 3))
-	vpshufd		$0x93,%ymm1,%ymm1
-	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	vpshufd		$0x4e,%ymm2,%ymm2
-	# x3 = shuffle32(x3, MASK(0, 3, 2, 1))
-	vpshufd		$0x39,%ymm3,%ymm3
-
-	sub		$2,%r8d
-	jnz		.Ldoubleround
-
-	# o0 = i0 ^ (x0 + s0)
-	vpaddd		%ymm8,%ymm0,%ymm7
-	cmp		$0x10,%rcx
-	jl		.Lxorpart2
-	vpxord		0x00(%rdx),%xmm7,%xmm6
-	vmovdqu		%xmm6,0x00(%rsi)
-	vextracti128	$1,%ymm7,%xmm0
-	# o1 = i1 ^ (x1 + s1)
-	vpaddd		%ymm9,%ymm1,%ymm7
-	cmp		$0x20,%rcx
-	jl		.Lxorpart2
-	vpxord		0x10(%rdx),%xmm7,%xmm6
-	vmovdqu		%xmm6,0x10(%rsi)
-	vextracti128	$1,%ymm7,%xmm1
-	# o2 = i2 ^ (x2 + s2)
-	vpaddd		%ymm10,%ymm2,%ymm7
-	cmp		$0x30,%rcx
-	jl		.Lxorpart2
-	vpxord		0x20(%rdx),%xmm7,%xmm6
-	vmovdqu		%xmm6,0x20(%rsi)
-	vextracti128	$1,%ymm7,%xmm2
-	# o3 = i3 ^ (x3 + s3)
-	vpaddd		%ymm11,%ymm3,%ymm7
-	cmp		$0x40,%rcx
-	jl		.Lxorpart2
-	vpxord		0x30(%rdx),%xmm7,%xmm6
-	vmovdqu		%xmm6,0x30(%rsi)
-	vextracti128	$1,%ymm7,%xmm3
-
-	# xor and write second block
-	vmovdqa		%xmm0,%xmm7
-	cmp		$0x50,%rcx
-	jl		.Lxorpart2
-	vpxord		0x40(%rdx),%xmm7,%xmm6
-	vmovdqu		%xmm6,0x40(%rsi)
-
-	vmovdqa		%xmm1,%xmm7
-	cmp		$0x60,%rcx
-	jl		.Lxorpart2
-	vpxord		0x50(%rdx),%xmm7,%xmm6
-	vmovdqu		%xmm6,0x50(%rsi)
-
-	vmovdqa		%xmm2,%xmm7
-	cmp		$0x70,%rcx
-	jl		.Lxorpart2
-	vpxord		0x60(%rdx),%xmm7,%xmm6
-	vmovdqu		%xmm6,0x60(%rsi)
-
-	vmovdqa		%xmm3,%xmm7
-	cmp		$0x80,%rcx
-	jl		.Lxorpart2
-	vpxord		0x70(%rdx),%xmm7,%xmm6
-	vmovdqu		%xmm6,0x70(%rsi)
-
-.Ldone2:
-	vzeroupper
-	ret
-
-.Lxorpart2:
-	# xor remaining bytes from partial register into output
-	mov		%rcx,%rax
-	and		$0xf,%rcx
-	jz		.Ldone8
-	mov		%rax,%r9
-	and		$~0xf,%r9
-
-	mov		$1,%rax
-	shld		%cl,%rax,%rax
-	sub		$1,%rax
-	kmovq		%rax,%k1
-
-	vmovdqu8	(%rdx,%r9),%xmm1{%k1}{z}
-	vpxord		%xmm7,%xmm1,%xmm1
-	vmovdqu8	%xmm1,(%rsi,%r9){%k1}
-
-	jmp		.Ldone2
-
-ENDPROC(chacha_2block_xor_avx512vl)
-
-ENTRY(chacha_4block_xor_avx512vl)
-	# %rdi: Input state matrix, s
-	# %rsi: up to 4 data blocks output, o
-	# %rdx: up to 4 data blocks input, i
-	# %rcx: input/output length in bytes
-	# %r8d: nrounds
-
-	# This function encrypts four ChaCha blocks by loading the state
-	# matrix four times across eight AVX registers. It performs matrix
-	# operations on four words in two matrices in parallel, sequentially
-	# to the operations on the four words of the other two matrices. The
-	# required word shuffling has a rather high latency, we can do the
-	# arithmetic on two matrix-pairs without much slowdown.
-
-	vzeroupper
-
-	# x0..3[0-4] = s0..3
-	vbroadcasti128	0x00(%rdi),%ymm0
-	vbroadcasti128	0x10(%rdi),%ymm1
-	vbroadcasti128	0x20(%rdi),%ymm2
-	vbroadcasti128	0x30(%rdi),%ymm3
-
-	vmovdqa		%ymm0,%ymm4
-	vmovdqa		%ymm1,%ymm5
-	vmovdqa		%ymm2,%ymm6
-	vmovdqa		%ymm3,%ymm7
-
-	vpaddd		CTR2BL(%rip),%ymm3,%ymm3
-	vpaddd		CTR4BL(%rip),%ymm7,%ymm7
-
-	vmovdqa		%ymm0,%ymm11
-	vmovdqa		%ymm1,%ymm12
-	vmovdqa		%ymm2,%ymm13
-	vmovdqa		%ymm3,%ymm14
-	vmovdqa		%ymm7,%ymm15
-
-.Ldoubleround4:
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	vpaddd		%ymm1,%ymm0,%ymm0
-	vpxord		%ymm0,%ymm3,%ymm3
-	vprold		$16,%ymm3,%ymm3
-
-	vpaddd		%ymm5,%ymm4,%ymm4
-	vpxord		%ymm4,%ymm7,%ymm7
-	vprold		$16,%ymm7,%ymm7
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	vpaddd		%ymm3,%ymm2,%ymm2
-	vpxord		%ymm2,%ymm1,%ymm1
-	vprold		$12,%ymm1,%ymm1
-
-	vpaddd		%ymm7,%ymm6,%ymm6
-	vpxord		%ymm6,%ymm5,%ymm5
-	vprold		$12,%ymm5,%ymm5
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	vpaddd		%ymm1,%ymm0,%ymm0
-	vpxord		%ymm0,%ymm3,%ymm3
-	vprold		$8,%ymm3,%ymm3
-
-	vpaddd		%ymm5,%ymm4,%ymm4
-	vpxord		%ymm4,%ymm7,%ymm7
-	vprold		$8,%ymm7,%ymm7
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	vpaddd		%ymm3,%ymm2,%ymm2
-	vpxord		%ymm2,%ymm1,%ymm1
-	vprold		$7,%ymm1,%ymm1
-
-	vpaddd		%ymm7,%ymm6,%ymm6
-	vpxord		%ymm6,%ymm5,%ymm5
-	vprold		$7,%ymm5,%ymm5
-
-	# x1 = shuffle32(x1, MASK(0, 3, 2, 1))
-	vpshufd		$0x39,%ymm1,%ymm1
-	vpshufd		$0x39,%ymm5,%ymm5
-	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	vpshufd		$0x4e,%ymm2,%ymm2
-	vpshufd		$0x4e,%ymm6,%ymm6
-	# x3 = shuffle32(x3, MASK(2, 1, 0, 3))
-	vpshufd		$0x93,%ymm3,%ymm3
-	vpshufd		$0x93,%ymm7,%ymm7
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	vpaddd		%ymm1,%ymm0,%ymm0
-	vpxord		%ymm0,%ymm3,%ymm3
-	vprold		$16,%ymm3,%ymm3
-
-	vpaddd		%ymm5,%ymm4,%ymm4
-	vpxord		%ymm4,%ymm7,%ymm7
-	vprold		$16,%ymm7,%ymm7
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	vpaddd		%ymm3,%ymm2,%ymm2
-	vpxord		%ymm2,%ymm1,%ymm1
-	vprold		$12,%ymm1,%ymm1
-
-	vpaddd		%ymm7,%ymm6,%ymm6
-	vpxord		%ymm6,%ymm5,%ymm5
-	vprold		$12,%ymm5,%ymm5
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	vpaddd		%ymm1,%ymm0,%ymm0
-	vpxord		%ymm0,%ymm3,%ymm3
-	vprold		$8,%ymm3,%ymm3
-
-	vpaddd		%ymm5,%ymm4,%ymm4
-	vpxord		%ymm4,%ymm7,%ymm7
-	vprold		$8,%ymm7,%ymm7
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	vpaddd		%ymm3,%ymm2,%ymm2
-	vpxord		%ymm2,%ymm1,%ymm1
-	vprold		$7,%ymm1,%ymm1
-
-	vpaddd		%ymm7,%ymm6,%ymm6
-	vpxord		%ymm6,%ymm5,%ymm5
-	vprold		$7,%ymm5,%ymm5
-
-	# x1 = shuffle32(x1, MASK(2, 1, 0, 3))
-	vpshufd		$0x93,%ymm1,%ymm1
-	vpshufd		$0x93,%ymm5,%ymm5
-	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	vpshufd		$0x4e,%ymm2,%ymm2
-	vpshufd		$0x4e,%ymm6,%ymm6
-	# x3 = shuffle32(x3, MASK(0, 3, 2, 1))
-	vpshufd		$0x39,%ymm3,%ymm3
-	vpshufd		$0x39,%ymm7,%ymm7
-
-	sub		$2,%r8d
-	jnz		.Ldoubleround4
-
-	# o0 = i0 ^ (x0 + s0), first block
-	vpaddd		%ymm11,%ymm0,%ymm10
-	cmp		$0x10,%rcx
-	jl		.Lxorpart4
-	vpxord		0x00(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x00(%rsi)
-	vextracti128	$1,%ymm10,%xmm0
-	# o1 = i1 ^ (x1 + s1), first block
-	vpaddd		%ymm12,%ymm1,%ymm10
-	cmp		$0x20,%rcx
-	jl		.Lxorpart4
-	vpxord		0x10(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x10(%rsi)
-	vextracti128	$1,%ymm10,%xmm1
-	# o2 = i2 ^ (x2 + s2), first block
-	vpaddd		%ymm13,%ymm2,%ymm10
-	cmp		$0x30,%rcx
-	jl		.Lxorpart4
-	vpxord		0x20(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x20(%rsi)
-	vextracti128	$1,%ymm10,%xmm2
-	# o3 = i3 ^ (x3 + s3), first block
-	vpaddd		%ymm14,%ymm3,%ymm10
-	cmp		$0x40,%rcx
-	jl		.Lxorpart4
-	vpxord		0x30(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x30(%rsi)
-	vextracti128	$1,%ymm10,%xmm3
-
-	# xor and write second block
-	vmovdqa		%xmm0,%xmm10
-	cmp		$0x50,%rcx
-	jl		.Lxorpart4
-	vpxord		0x40(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x40(%rsi)
-
-	vmovdqa		%xmm1,%xmm10
-	cmp		$0x60,%rcx
-	jl		.Lxorpart4
-	vpxord		0x50(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x50(%rsi)
-
-	vmovdqa		%xmm2,%xmm10
-	cmp		$0x70,%rcx
-	jl		.Lxorpart4
-	vpxord		0x60(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x60(%rsi)
-
-	vmovdqa		%xmm3,%xmm10
-	cmp		$0x80,%rcx
-	jl		.Lxorpart4
-	vpxord		0x70(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x70(%rsi)
-
-	# o0 = i0 ^ (x0 + s0), third block
-	vpaddd		%ymm11,%ymm4,%ymm10
-	cmp		$0x90,%rcx
-	jl		.Lxorpart4
-	vpxord		0x80(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x80(%rsi)
-	vextracti128	$1,%ymm10,%xmm4
-	# o1 = i1 ^ (x1 + s1), third block
-	vpaddd		%ymm12,%ymm5,%ymm10
-	cmp		$0xa0,%rcx
-	jl		.Lxorpart4
-	vpxord		0x90(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0x90(%rsi)
-	vextracti128	$1,%ymm10,%xmm5
-	# o2 = i2 ^ (x2 + s2), third block
-	vpaddd		%ymm13,%ymm6,%ymm10
-	cmp		$0xb0,%rcx
-	jl		.Lxorpart4
-	vpxord		0xa0(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0xa0(%rsi)
-	vextracti128	$1,%ymm10,%xmm6
-	# o3 = i3 ^ (x3 + s3), third block
-	vpaddd		%ymm15,%ymm7,%ymm10
-	cmp		$0xc0,%rcx
-	jl		.Lxorpart4
-	vpxord		0xb0(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0xb0(%rsi)
-	vextracti128	$1,%ymm10,%xmm7
-
-	# xor and write fourth block
-	vmovdqa		%xmm4,%xmm10
-	cmp		$0xd0,%rcx
-	jl		.Lxorpart4
-	vpxord		0xc0(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0xc0(%rsi)
-
-	vmovdqa		%xmm5,%xmm10
-	cmp		$0xe0,%rcx
-	jl		.Lxorpart4
-	vpxord		0xd0(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0xd0(%rsi)
-
-	vmovdqa		%xmm6,%xmm10
-	cmp		$0xf0,%rcx
-	jl		.Lxorpart4
-	vpxord		0xe0(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0xe0(%rsi)
-
-	vmovdqa		%xmm7,%xmm10
-	cmp		$0x100,%rcx
-	jl		.Lxorpart4
-	vpxord		0xf0(%rdx),%xmm10,%xmm9
-	vmovdqu		%xmm9,0xf0(%rsi)
-
-.Ldone4:
-	vzeroupper
-	ret
-
-.Lxorpart4:
-	# xor remaining bytes from partial register into output
-	mov		%rcx,%rax
-	and		$0xf,%rcx
-	jz		.Ldone8
-	mov		%rax,%r9
-	and		$~0xf,%r9
-
-	mov		$1,%rax
-	shld		%cl,%rax,%rax
-	sub		$1,%rax
-	kmovq		%rax,%k1
-
-	vmovdqu8	(%rdx,%r9),%xmm1{%k1}{z}
-	vpxord		%xmm10,%xmm1,%xmm1
-	vmovdqu8	%xmm1,(%rsi,%r9){%k1}
-
-	jmp		.Ldone4
-
-ENDPROC(chacha_4block_xor_avx512vl)
-
-ENTRY(chacha_8block_xor_avx512vl)
-	# %rdi: Input state matrix, s
-	# %rsi: up to 8 data blocks output, o
-	# %rdx: up to 8 data blocks input, i
-	# %rcx: input/output length in bytes
-	# %r8d: nrounds
-
-	# This function encrypts eight consecutive ChaCha blocks by loading
-	# the state matrix in AVX registers eight times. Compared to AVX2, this
-	# mostly benefits from the new rotate instructions in VL and the
-	# additional registers.
-
-	vzeroupper
-
-	# x0..15[0-7] = s[0..15]
-	vpbroadcastd	0x00(%rdi),%ymm0
-	vpbroadcastd	0x04(%rdi),%ymm1
-	vpbroadcastd	0x08(%rdi),%ymm2
-	vpbroadcastd	0x0c(%rdi),%ymm3
-	vpbroadcastd	0x10(%rdi),%ymm4
-	vpbroadcastd	0x14(%rdi),%ymm5
-	vpbroadcastd	0x18(%rdi),%ymm6
-	vpbroadcastd	0x1c(%rdi),%ymm7
-	vpbroadcastd	0x20(%rdi),%ymm8
-	vpbroadcastd	0x24(%rdi),%ymm9
-	vpbroadcastd	0x28(%rdi),%ymm10
-	vpbroadcastd	0x2c(%rdi),%ymm11
-	vpbroadcastd	0x30(%rdi),%ymm12
-	vpbroadcastd	0x34(%rdi),%ymm13
-	vpbroadcastd	0x38(%rdi),%ymm14
-	vpbroadcastd	0x3c(%rdi),%ymm15
-
-	# x12 += counter values 0-3
-	vpaddd		CTR8BL(%rip),%ymm12,%ymm12
-
-	vmovdqa64	%ymm0,%ymm16
-	vmovdqa64	%ymm1,%ymm17
-	vmovdqa64	%ymm2,%ymm18
-	vmovdqa64	%ymm3,%ymm19
-	vmovdqa64	%ymm4,%ymm20
-	vmovdqa64	%ymm5,%ymm21
-	vmovdqa64	%ymm6,%ymm22
-	vmovdqa64	%ymm7,%ymm23
-	vmovdqa64	%ymm8,%ymm24
-	vmovdqa64	%ymm9,%ymm25
-	vmovdqa64	%ymm10,%ymm26
-	vmovdqa64	%ymm11,%ymm27
-	vmovdqa64	%ymm12,%ymm28
-	vmovdqa64	%ymm13,%ymm29
-	vmovdqa64	%ymm14,%ymm30
-	vmovdqa64	%ymm15,%ymm31
-
-.Ldoubleround8:
-	# x0 += x4, x12 = rotl32(x12 ^ x0, 16)
-	vpaddd		%ymm0,%ymm4,%ymm0
-	vpxord		%ymm0,%ymm12,%ymm12
-	vprold		$16,%ymm12,%ymm12
-	# x1 += x5, x13 = rotl32(x13 ^ x1, 16)
-	vpaddd		%ymm1,%ymm5,%ymm1
-	vpxord		%ymm1,%ymm13,%ymm13
-	vprold		$16,%ymm13,%ymm13
-	# x2 += x6, x14 = rotl32(x14 ^ x2, 16)
-	vpaddd		%ymm2,%ymm6,%ymm2
-	vpxord		%ymm2,%ymm14,%ymm14
-	vprold		$16,%ymm14,%ymm14
-	# x3 += x7, x15 = rotl32(x15 ^ x3, 16)
-	vpaddd		%ymm3,%ymm7,%ymm3
-	vpxord		%ymm3,%ymm15,%ymm15
-	vprold		$16,%ymm15,%ymm15
-
-	# x8 += x12, x4 = rotl32(x4 ^ x8, 12)
-	vpaddd		%ymm12,%ymm8,%ymm8
-	vpxord		%ymm8,%ymm4,%ymm4
-	vprold		$12,%ymm4,%ymm4
-	# x9 += x13, x5 = rotl32(x5 ^ x9, 12)
-	vpaddd		%ymm13,%ymm9,%ymm9
-	vpxord		%ymm9,%ymm5,%ymm5
-	vprold		$12,%ymm5,%ymm5
-	# x10 += x14, x6 = rotl32(x6 ^ x10, 12)
-	vpaddd		%ymm14,%ymm10,%ymm10
-	vpxord		%ymm10,%ymm6,%ymm6
-	vprold		$12,%ymm6,%ymm6
-	# x11 += x15, x7 = rotl32(x7 ^ x11, 12)
-	vpaddd		%ymm15,%ymm11,%ymm11
-	vpxord		%ymm11,%ymm7,%ymm7
-	vprold		$12,%ymm7,%ymm7
-
-	# x0 += x4, x12 = rotl32(x12 ^ x0, 8)
-	vpaddd		%ymm0,%ymm4,%ymm0
-	vpxord		%ymm0,%ymm12,%ymm12
-	vprold		$8,%ymm12,%ymm12
-	# x1 += x5, x13 = rotl32(x13 ^ x1, 8)
-	vpaddd		%ymm1,%ymm5,%ymm1
-	vpxord		%ymm1,%ymm13,%ymm13
-	vprold		$8,%ymm13,%ymm13
-	# x2 += x6, x14 = rotl32(x14 ^ x2, 8)
-	vpaddd		%ymm2,%ymm6,%ymm2
-	vpxord		%ymm2,%ymm14,%ymm14
-	vprold		$8,%ymm14,%ymm14
-	# x3 += x7, x15 = rotl32(x15 ^ x3, 8)
-	vpaddd		%ymm3,%ymm7,%ymm3
-	vpxord		%ymm3,%ymm15,%ymm15
-	vprold		$8,%ymm15,%ymm15
-
-	# x8 += x12, x4 = rotl32(x4 ^ x8, 7)
-	vpaddd		%ymm12,%ymm8,%ymm8
-	vpxord		%ymm8,%ymm4,%ymm4
-	vprold		$7,%ymm4,%ymm4
-	# x9 += x13, x5 = rotl32(x5 ^ x9, 7)
-	vpaddd		%ymm13,%ymm9,%ymm9
-	vpxord		%ymm9,%ymm5,%ymm5
-	vprold		$7,%ymm5,%ymm5
-	# x10 += x14, x6 = rotl32(x6 ^ x10, 7)
-	vpaddd		%ymm14,%ymm10,%ymm10
-	vpxord		%ymm10,%ymm6,%ymm6
-	vprold		$7,%ymm6,%ymm6
-	# x11 += x15, x7 = rotl32(x7 ^ x11, 7)
-	vpaddd		%ymm15,%ymm11,%ymm11
-	vpxord		%ymm11,%ymm7,%ymm7
-	vprold		$7,%ymm7,%ymm7
-
-	# x0 += x5, x15 = rotl32(x15 ^ x0, 16)
-	vpaddd		%ymm0,%ymm5,%ymm0
-	vpxord		%ymm0,%ymm15,%ymm15
-	vprold		$16,%ymm15,%ymm15
-	# x1 += x6, x12 = rotl32(x12 ^ x1, 16)
-	vpaddd		%ymm1,%ymm6,%ymm1
-	vpxord		%ymm1,%ymm12,%ymm12
-	vprold		$16,%ymm12,%ymm12
-	# x2 += x7, x13 = rotl32(x13 ^ x2, 16)
-	vpaddd		%ymm2,%ymm7,%ymm2
-	vpxord		%ymm2,%ymm13,%ymm13
-	vprold		$16,%ymm13,%ymm13
-	# x3 += x4, x14 = rotl32(x14 ^ x3, 16)
-	vpaddd		%ymm3,%ymm4,%ymm3
-	vpxord		%ymm3,%ymm14,%ymm14
-	vprold		$16,%ymm14,%ymm14
-
-	# x10 += x15, x5 = rotl32(x5 ^ x10, 12)
-	vpaddd		%ymm15,%ymm10,%ymm10
-	vpxord		%ymm10,%ymm5,%ymm5
-	vprold		$12,%ymm5,%ymm5
-	# x11 += x12, x6 = rotl32(x6 ^ x11, 12)
-	vpaddd		%ymm12,%ymm11,%ymm11
-	vpxord		%ymm11,%ymm6,%ymm6
-	vprold		$12,%ymm6,%ymm6
-	# x8 += x13, x7 = rotl32(x7 ^ x8, 12)
-	vpaddd		%ymm13,%ymm8,%ymm8
-	vpxord		%ymm8,%ymm7,%ymm7
-	vprold		$12,%ymm7,%ymm7
-	# x9 += x14, x4 = rotl32(x4 ^ x9, 12)
-	vpaddd		%ymm14,%ymm9,%ymm9
-	vpxord		%ymm9,%ymm4,%ymm4
-	vprold		$12,%ymm4,%ymm4
-
-	# x0 += x5, x15 = rotl32(x15 ^ x0, 8)
-	vpaddd		%ymm0,%ymm5,%ymm0
-	vpxord		%ymm0,%ymm15,%ymm15
-	vprold		$8,%ymm15,%ymm15
-	# x1 += x6, x12 = rotl32(x12 ^ x1, 8)
-	vpaddd		%ymm1,%ymm6,%ymm1
-	vpxord		%ymm1,%ymm12,%ymm12
-	vprold		$8,%ymm12,%ymm12
-	# x2 += x7, x13 = rotl32(x13 ^ x2, 8)
-	vpaddd		%ymm2,%ymm7,%ymm2
-	vpxord		%ymm2,%ymm13,%ymm13
-	vprold		$8,%ymm13,%ymm13
-	# x3 += x4, x14 = rotl32(x14 ^ x3, 8)
-	vpaddd		%ymm3,%ymm4,%ymm3
-	vpxord		%ymm3,%ymm14,%ymm14
-	vprold		$8,%ymm14,%ymm14
-
-	# x10 += x15, x5 = rotl32(x5 ^ x10, 7)
-	vpaddd		%ymm15,%ymm10,%ymm10
-	vpxord		%ymm10,%ymm5,%ymm5
-	vprold		$7,%ymm5,%ymm5
-	# x11 += x12, x6 = rotl32(x6 ^ x11, 7)
-	vpaddd		%ymm12,%ymm11,%ymm11
-	vpxord		%ymm11,%ymm6,%ymm6
-	vprold		$7,%ymm6,%ymm6
-	# x8 += x13, x7 = rotl32(x7 ^ x8, 7)
-	vpaddd		%ymm13,%ymm8,%ymm8
-	vpxord		%ymm8,%ymm7,%ymm7
-	vprold		$7,%ymm7,%ymm7
-	# x9 += x14, x4 = rotl32(x4 ^ x9, 7)
-	vpaddd		%ymm14,%ymm9,%ymm9
-	vpxord		%ymm9,%ymm4,%ymm4
-	vprold		$7,%ymm4,%ymm4
-
-	sub		$2,%r8d
-	jnz		.Ldoubleround8
-
-	# x0..15[0-3] += s[0..15]
-	vpaddd		%ymm16,%ymm0,%ymm0
-	vpaddd		%ymm17,%ymm1,%ymm1
-	vpaddd		%ymm18,%ymm2,%ymm2
-	vpaddd		%ymm19,%ymm3,%ymm3
-	vpaddd		%ymm20,%ymm4,%ymm4
-	vpaddd		%ymm21,%ymm5,%ymm5
-	vpaddd		%ymm22,%ymm6,%ymm6
-	vpaddd		%ymm23,%ymm7,%ymm7
-	vpaddd		%ymm24,%ymm8,%ymm8
-	vpaddd		%ymm25,%ymm9,%ymm9
-	vpaddd		%ymm26,%ymm10,%ymm10
-	vpaddd		%ymm27,%ymm11,%ymm11
-	vpaddd		%ymm28,%ymm12,%ymm12
-	vpaddd		%ymm29,%ymm13,%ymm13
-	vpaddd		%ymm30,%ymm14,%ymm14
-	vpaddd		%ymm31,%ymm15,%ymm15
-
-	# interleave 32-bit words in state n, n+1
-	vpunpckldq	%ymm1,%ymm0,%ymm16
-	vpunpckhdq	%ymm1,%ymm0,%ymm17
-	vpunpckldq	%ymm3,%ymm2,%ymm18
-	vpunpckhdq	%ymm3,%ymm2,%ymm19
-	vpunpckldq	%ymm5,%ymm4,%ymm20
-	vpunpckhdq	%ymm5,%ymm4,%ymm21
-	vpunpckldq	%ymm7,%ymm6,%ymm22
-	vpunpckhdq	%ymm7,%ymm6,%ymm23
-	vpunpckldq	%ymm9,%ymm8,%ymm24
-	vpunpckhdq	%ymm9,%ymm8,%ymm25
-	vpunpckldq	%ymm11,%ymm10,%ymm26
-	vpunpckhdq	%ymm11,%ymm10,%ymm27
-	vpunpckldq	%ymm13,%ymm12,%ymm28
-	vpunpckhdq	%ymm13,%ymm12,%ymm29
-	vpunpckldq	%ymm15,%ymm14,%ymm30
-	vpunpckhdq	%ymm15,%ymm14,%ymm31
-
-	# interleave 64-bit words in state n, n+2
-	vpunpcklqdq	%ymm18,%ymm16,%ymm0
-	vpunpcklqdq	%ymm19,%ymm17,%ymm1
-	vpunpckhqdq	%ymm18,%ymm16,%ymm2
-	vpunpckhqdq	%ymm19,%ymm17,%ymm3
-	vpunpcklqdq	%ymm22,%ymm20,%ymm4
-	vpunpcklqdq	%ymm23,%ymm21,%ymm5
-	vpunpckhqdq	%ymm22,%ymm20,%ymm6
-	vpunpckhqdq	%ymm23,%ymm21,%ymm7
-	vpunpcklqdq	%ymm26,%ymm24,%ymm8
-	vpunpcklqdq	%ymm27,%ymm25,%ymm9
-	vpunpckhqdq	%ymm26,%ymm24,%ymm10
-	vpunpckhqdq	%ymm27,%ymm25,%ymm11
-	vpunpcklqdq	%ymm30,%ymm28,%ymm12
-	vpunpcklqdq	%ymm31,%ymm29,%ymm13
-	vpunpckhqdq	%ymm30,%ymm28,%ymm14
-	vpunpckhqdq	%ymm31,%ymm29,%ymm15
-
-	# interleave 128-bit words in state n, n+4
-	# xor/write first four blocks
-	vmovdqa64	%ymm0,%ymm16
-	vperm2i128	$0x20,%ymm4,%ymm0,%ymm0
-	cmp		$0x0020,%rcx
-	jl		.Lxorpart8
-	vpxord		0x0000(%rdx),%ymm0,%ymm0
-	vmovdqu64	%ymm0,0x0000(%rsi)
-	vmovdqa64	%ymm16,%ymm0
-	vperm2i128	$0x31,%ymm4,%ymm0,%ymm4
-
-	vperm2i128	$0x20,%ymm12,%ymm8,%ymm0
-	cmp		$0x0040,%rcx
-	jl		.Lxorpart8
-	vpxord		0x0020(%rdx),%ymm0,%ymm0
-	vmovdqu64	%ymm0,0x0020(%rsi)
-	vperm2i128	$0x31,%ymm12,%ymm8,%ymm12
-
-	vperm2i128	$0x20,%ymm6,%ymm2,%ymm0
-	cmp		$0x0060,%rcx
-	jl		.Lxorpart8
-	vpxord		0x0040(%rdx),%ymm0,%ymm0
-	vmovdqu64	%ymm0,0x0040(%rsi)
-	vperm2i128	$0x31,%ymm6,%ymm2,%ymm6
-
-	vperm2i128	$0x20,%ymm14,%ymm10,%ymm0
-	cmp		$0x0080,%rcx
-	jl		.Lxorpart8
-	vpxord		0x0060(%rdx),%ymm0,%ymm0
-	vmovdqu64	%ymm0,0x0060(%rsi)
-	vperm2i128	$0x31,%ymm14,%ymm10,%ymm14
-
-	vperm2i128	$0x20,%ymm5,%ymm1,%ymm0
-	cmp		$0x00a0,%rcx
-	jl		.Lxorpart8
-	vpxord		0x0080(%rdx),%ymm0,%ymm0
-	vmovdqu64	%ymm0,0x0080(%rsi)
-	vperm2i128	$0x31,%ymm5,%ymm1,%ymm5
-
-	vperm2i128	$0x20,%ymm13,%ymm9,%ymm0
-	cmp		$0x00c0,%rcx
-	jl		.Lxorpart8
-	vpxord		0x00a0(%rdx),%ymm0,%ymm0
-	vmovdqu64	%ymm0,0x00a0(%rsi)
-	vperm2i128	$0x31,%ymm13,%ymm9,%ymm13
-
-	vperm2i128	$0x20,%ymm7,%ymm3,%ymm0
-	cmp		$0x00e0,%rcx
-	jl		.Lxorpart8
-	vpxord		0x00c0(%rdx),%ymm0,%ymm0
-	vmovdqu64	%ymm0,0x00c0(%rsi)
-	vperm2i128	$0x31,%ymm7,%ymm3,%ymm7
-
-	vperm2i128	$0x20,%ymm15,%ymm11,%ymm0
-	cmp		$0x0100,%rcx
-	jl		.Lxorpart8
-	vpxord		0x00e0(%rdx),%ymm0,%ymm0
-	vmovdqu64	%ymm0,0x00e0(%rsi)
-	vperm2i128	$0x31,%ymm15,%ymm11,%ymm15
-
-	# xor remaining blocks, write to output
-	vmovdqa64	%ymm4,%ymm0
-	cmp		$0x0120,%rcx
-	jl		.Lxorpart8
-	vpxord		0x0100(%rdx),%ymm0,%ymm0
-	vmovdqu64	%ymm0,0x0100(%rsi)
-
-	vmovdqa64	%ymm12,%ymm0
-	cmp		$0x0140,%rcx
-	jl		.Lxorpart8
-	vpxord		0x0120(%rdx),%ymm0,%ymm0
-	vmovdqu64	%ymm0,0x0120(%rsi)
-
-	vmovdqa64	%ymm6,%ymm0
-	cmp		$0x0160,%rcx
-	jl		.Lxorpart8
-	vpxord		0x0140(%rdx),%ymm0,%ymm0
-	vmovdqu64	%ymm0,0x0140(%rsi)
-
-	vmovdqa64	%ymm14,%ymm0
-	cmp		$0x0180,%rcx
-	jl		.Lxorpart8
-	vpxord		0x0160(%rdx),%ymm0,%ymm0
-	vmovdqu64	%ymm0,0x0160(%rsi)
-
-	vmovdqa64	%ymm5,%ymm0
-	cmp		$0x01a0,%rcx
-	jl		.Lxorpart8
-	vpxord		0x0180(%rdx),%ymm0,%ymm0
-	vmovdqu64	%ymm0,0x0180(%rsi)
-
-	vmovdqa64	%ymm13,%ymm0
-	cmp		$0x01c0,%rcx
-	jl		.Lxorpart8
-	vpxord		0x01a0(%rdx),%ymm0,%ymm0
-	vmovdqu64	%ymm0,0x01a0(%rsi)
-
-	vmovdqa64	%ymm7,%ymm0
-	cmp		$0x01e0,%rcx
-	jl		.Lxorpart8
-	vpxord		0x01c0(%rdx),%ymm0,%ymm0
-	vmovdqu64	%ymm0,0x01c0(%rsi)
-
-	vmovdqa64	%ymm15,%ymm0
-	cmp		$0x0200,%rcx
-	jl		.Lxorpart8
-	vpxord		0x01e0(%rdx),%ymm0,%ymm0
-	vmovdqu64	%ymm0,0x01e0(%rsi)
-
-.Ldone8:
-	vzeroupper
-	ret
-
-.Lxorpart8:
-	# xor remaining bytes from partial register into output
-	mov		%rcx,%rax
-	and		$0x1f,%rcx
-	jz		.Ldone8
-	mov		%rax,%r9
-	and		$~0x1f,%r9
-
-	mov		$1,%rax
-	shld		%cl,%rax,%rax
-	sub		$1,%rax
-	kmovq		%rax,%k1
-
-	vmovdqu8	(%rdx,%r9),%ymm1{%k1}{z}
-	vpxord		%ymm0,%ymm1,%ymm1
-	vmovdqu8	%ymm1,(%rsi,%r9){%k1}
-
-	jmp		.Ldone8
-
-ENDPROC(chacha_8block_xor_avx512vl)
diff --git a/arch/x86/crypto/chacha-ssse3-x86_64.S b/arch/x86/crypto/chacha-ssse3-x86_64.S
deleted file mode 100644
index 2d86c7d6dc88c619ae4d3f87ea80b16172844b91..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/chacha-ssse3-x86_64.S
+++ /dev/null
@@ -1,791 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * ChaCha 256-bit cipher algorithm, x64 SSSE3 functions
- *
- * Copyright (C) 2015 Martin Willi
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-.section	.rodata.cst16.ROT8, "aM", @progbits, 16
-.align 16
-ROT8:	.octa 0x0e0d0c0f0a09080b0605040702010003
-.section	.rodata.cst16.ROT16, "aM", @progbits, 16
-.align 16
-ROT16:	.octa 0x0d0c0f0e09080b0a0504070601000302
-.section	.rodata.cst16.CTRINC, "aM", @progbits, 16
-.align 16
-CTRINC:	.octa 0x00000003000000020000000100000000
-
-.text
-
-/*
- * chacha_permute - permute one block
- *
- * Permute one 64-byte block where the state matrix is in %xmm0-%xmm3.  This
- * function performs matrix operations on four words in parallel, but requires
- * shuffling to rearrange the words after each round.  8/16-bit word rotation is
- * done with the slightly better performing SSSE3 byte shuffling, 7/12-bit word
- * rotation uses traditional shift+OR.
- *
- * The round count is given in %r8d.
- *
- * Clobbers: %r8d, %xmm4-%xmm7
- */
-chacha_permute:
-
-	movdqa		ROT8(%rip),%xmm4
-	movdqa		ROT16(%rip),%xmm5
-
-.Ldoubleround:
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	paddd		%xmm1,%xmm0
-	pxor		%xmm0,%xmm3
-	pshufb		%xmm5,%xmm3
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	paddd		%xmm3,%xmm2
-	pxor		%xmm2,%xmm1
-	movdqa		%xmm1,%xmm6
-	pslld		$12,%xmm6
-	psrld		$20,%xmm1
-	por		%xmm6,%xmm1
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	paddd		%xmm1,%xmm0
-	pxor		%xmm0,%xmm3
-	pshufb		%xmm4,%xmm3
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	paddd		%xmm3,%xmm2
-	pxor		%xmm2,%xmm1
-	movdqa		%xmm1,%xmm7
-	pslld		$7,%xmm7
-	psrld		$25,%xmm1
-	por		%xmm7,%xmm1
-
-	# x1 = shuffle32(x1, MASK(0, 3, 2, 1))
-	pshufd		$0x39,%xmm1,%xmm1
-	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	pshufd		$0x4e,%xmm2,%xmm2
-	# x3 = shuffle32(x3, MASK(2, 1, 0, 3))
-	pshufd		$0x93,%xmm3,%xmm3
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
-	paddd		%xmm1,%xmm0
-	pxor		%xmm0,%xmm3
-	pshufb		%xmm5,%xmm3
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
-	paddd		%xmm3,%xmm2
-	pxor		%xmm2,%xmm1
-	movdqa		%xmm1,%xmm6
-	pslld		$12,%xmm6
-	psrld		$20,%xmm1
-	por		%xmm6,%xmm1
-
-	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
-	paddd		%xmm1,%xmm0
-	pxor		%xmm0,%xmm3
-	pshufb		%xmm4,%xmm3
-
-	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
-	paddd		%xmm3,%xmm2
-	pxor		%xmm2,%xmm1
-	movdqa		%xmm1,%xmm7
-	pslld		$7,%xmm7
-	psrld		$25,%xmm1
-	por		%xmm7,%xmm1
-
-	# x1 = shuffle32(x1, MASK(2, 1, 0, 3))
-	pshufd		$0x93,%xmm1,%xmm1
-	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
-	pshufd		$0x4e,%xmm2,%xmm2
-	# x3 = shuffle32(x3, MASK(0, 3, 2, 1))
-	pshufd		$0x39,%xmm3,%xmm3
-
-	sub		$2,%r8d
-	jnz		.Ldoubleround
-
-	ret
-ENDPROC(chacha_permute)
-
-ENTRY(chacha_block_xor_ssse3)
-	# %rdi: Input state matrix, s
-	# %rsi: up to 1 data block output, o
-	# %rdx: up to 1 data block input, i
-	# %rcx: input/output length in bytes
-	# %r8d: nrounds
-	FRAME_BEGIN
-
-	# x0..3 = s0..3
-	movdqa		0x00(%rdi),%xmm0
-	movdqa		0x10(%rdi),%xmm1
-	movdqa		0x20(%rdi),%xmm2
-	movdqa		0x30(%rdi),%xmm3
-	movdqa		%xmm0,%xmm8
-	movdqa		%xmm1,%xmm9
-	movdqa		%xmm2,%xmm10
-	movdqa		%xmm3,%xmm11
-
-	mov		%rcx,%rax
-	call		chacha_permute
-
-	# o0 = i0 ^ (x0 + s0)
-	paddd		%xmm8,%xmm0
-	cmp		$0x10,%rax
-	jl		.Lxorpart
-	movdqu		0x00(%rdx),%xmm4
-	pxor		%xmm4,%xmm0
-	movdqu		%xmm0,0x00(%rsi)
-	# o1 = i1 ^ (x1 + s1)
-	paddd		%xmm9,%xmm1
-	movdqa		%xmm1,%xmm0
-	cmp		$0x20,%rax
-	jl		.Lxorpart
-	movdqu		0x10(%rdx),%xmm0
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0x10(%rsi)
-	# o2 = i2 ^ (x2 + s2)
-	paddd		%xmm10,%xmm2
-	movdqa		%xmm2,%xmm0
-	cmp		$0x30,%rax
-	jl		.Lxorpart
-	movdqu		0x20(%rdx),%xmm0
-	pxor		%xmm2,%xmm0
-	movdqu		%xmm0,0x20(%rsi)
-	# o3 = i3 ^ (x3 + s3)
-	paddd		%xmm11,%xmm3
-	movdqa		%xmm3,%xmm0
-	cmp		$0x40,%rax
-	jl		.Lxorpart
-	movdqu		0x30(%rdx),%xmm0
-	pxor		%xmm3,%xmm0
-	movdqu		%xmm0,0x30(%rsi)
-
-.Ldone:
-	FRAME_END
-	ret
-
-.Lxorpart:
-	# xor remaining bytes from partial register into output
-	mov		%rax,%r9
-	and		$0x0f,%r9
-	jz		.Ldone
-	and		$~0x0f,%rax
-
-	mov		%rsi,%r11
-
-	lea		8(%rsp),%r10
-	sub		$0x10,%rsp
-	and		$~31,%rsp
-
-	lea		(%rdx,%rax),%rsi
-	mov		%rsp,%rdi
-	mov		%r9,%rcx
-	rep movsb
-
-	pxor		0x00(%rsp),%xmm0
-	movdqa		%xmm0,0x00(%rsp)
-
-	mov		%rsp,%rsi
-	lea		(%r11,%rax),%rdi
-	mov		%r9,%rcx
-	rep movsb
-
-	lea		-8(%r10),%rsp
-	jmp		.Ldone
-
-ENDPROC(chacha_block_xor_ssse3)
-
-ENTRY(hchacha_block_ssse3)
-	# %rdi: Input state matrix, s
-	# %rsi: output (8 32-bit words)
-	# %edx: nrounds
-	FRAME_BEGIN
-
-	movdqa		0x00(%rdi),%xmm0
-	movdqa		0x10(%rdi),%xmm1
-	movdqa		0x20(%rdi),%xmm2
-	movdqa		0x30(%rdi),%xmm3
-
-	mov		%edx,%r8d
-	call		chacha_permute
-
-	movdqu		%xmm0,0x00(%rsi)
-	movdqu		%xmm3,0x10(%rsi)
-
-	FRAME_END
-	ret
-ENDPROC(hchacha_block_ssse3)
-
-ENTRY(chacha_4block_xor_ssse3)
-	# %rdi: Input state matrix, s
-	# %rsi: up to 4 data blocks output, o
-	# %rdx: up to 4 data blocks input, i
-	# %rcx: input/output length in bytes
-	# %r8d: nrounds
-
-	# This function encrypts four consecutive ChaCha blocks by loading the
-	# the state matrix in SSE registers four times. As we need some scratch
-	# registers, we save the first four registers on the stack. The
-	# algorithm performs each operation on the corresponding word of each
-	# state matrix, hence requires no word shuffling. For final XORing step
-	# we transpose the matrix by interleaving 32- and then 64-bit words,
-	# which allows us to do XOR in SSE registers. 8/16-bit word rotation is
-	# done with the slightly better performing SSSE3 byte shuffling,
-	# 7/12-bit word rotation uses traditional shift+OR.
-
-	lea		8(%rsp),%r10
-	sub		$0x80,%rsp
-	and		$~63,%rsp
-	mov		%rcx,%rax
-
-	# x0..15[0-3] = s0..3[0..3]
-	movq		0x00(%rdi),%xmm1
-	pshufd		$0x00,%xmm1,%xmm0
-	pshufd		$0x55,%xmm1,%xmm1
-	movq		0x08(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	movq		0x10(%rdi),%xmm5
-	pshufd		$0x00,%xmm5,%xmm4
-	pshufd		$0x55,%xmm5,%xmm5
-	movq		0x18(%rdi),%xmm7
-	pshufd		$0x00,%xmm7,%xmm6
-	pshufd		$0x55,%xmm7,%xmm7
-	movq		0x20(%rdi),%xmm9
-	pshufd		$0x00,%xmm9,%xmm8
-	pshufd		$0x55,%xmm9,%xmm9
-	movq		0x28(%rdi),%xmm11
-	pshufd		$0x00,%xmm11,%xmm10
-	pshufd		$0x55,%xmm11,%xmm11
-	movq		0x30(%rdi),%xmm13
-	pshufd		$0x00,%xmm13,%xmm12
-	pshufd		$0x55,%xmm13,%xmm13
-	movq		0x38(%rdi),%xmm15
-	pshufd		$0x00,%xmm15,%xmm14
-	pshufd		$0x55,%xmm15,%xmm15
-	# x0..3 on stack
-	movdqa		%xmm0,0x00(%rsp)
-	movdqa		%xmm1,0x10(%rsp)
-	movdqa		%xmm2,0x20(%rsp)
-	movdqa		%xmm3,0x30(%rsp)
-
-	movdqa		CTRINC(%rip),%xmm1
-	movdqa		ROT8(%rip),%xmm2
-	movdqa		ROT16(%rip),%xmm3
-
-	# x12 += counter values 0-3
-	paddd		%xmm1,%xmm12
-
-.Ldoubleround4:
-	# x0 += x4, x12 = rotl32(x12 ^ x0, 16)
-	movdqa		0x00(%rsp),%xmm0
-	paddd		%xmm4,%xmm0
-	movdqa		%xmm0,0x00(%rsp)
-	pxor		%xmm0,%xmm12
-	pshufb		%xmm3,%xmm12
-	# x1 += x5, x13 = rotl32(x13 ^ x1, 16)
-	movdqa		0x10(%rsp),%xmm0
-	paddd		%xmm5,%xmm0
-	movdqa		%xmm0,0x10(%rsp)
-	pxor		%xmm0,%xmm13
-	pshufb		%xmm3,%xmm13
-	# x2 += x6, x14 = rotl32(x14 ^ x2, 16)
-	movdqa		0x20(%rsp),%xmm0
-	paddd		%xmm6,%xmm0
-	movdqa		%xmm0,0x20(%rsp)
-	pxor		%xmm0,%xmm14
-	pshufb		%xmm3,%xmm14
-	# x3 += x7, x15 = rotl32(x15 ^ x3, 16)
-	movdqa		0x30(%rsp),%xmm0
-	paddd		%xmm7,%xmm0
-	movdqa		%xmm0,0x30(%rsp)
-	pxor		%xmm0,%xmm15
-	pshufb		%xmm3,%xmm15
-
-	# x8 += x12, x4 = rotl32(x4 ^ x8, 12)
-	paddd		%xmm12,%xmm8
-	pxor		%xmm8,%xmm4
-	movdqa		%xmm4,%xmm0
-	pslld		$12,%xmm0
-	psrld		$20,%xmm4
-	por		%xmm0,%xmm4
-	# x9 += x13, x5 = rotl32(x5 ^ x9, 12)
-	paddd		%xmm13,%xmm9
-	pxor		%xmm9,%xmm5
-	movdqa		%xmm5,%xmm0
-	pslld		$12,%xmm0
-	psrld		$20,%xmm5
-	por		%xmm0,%xmm5
-	# x10 += x14, x6 = rotl32(x6 ^ x10, 12)
-	paddd		%xmm14,%xmm10
-	pxor		%xmm10,%xmm6
-	movdqa		%xmm6,%xmm0
-	pslld		$12,%xmm0
-	psrld		$20,%xmm6
-	por		%xmm0,%xmm6
-	# x11 += x15, x7 = rotl32(x7 ^ x11, 12)
-	paddd		%xmm15,%xmm11
-	pxor		%xmm11,%xmm7
-	movdqa		%xmm7,%xmm0
-	pslld		$12,%xmm0
-	psrld		$20,%xmm7
-	por		%xmm0,%xmm7
-
-	# x0 += x4, x12 = rotl32(x12 ^ x0, 8)
-	movdqa		0x00(%rsp),%xmm0
-	paddd		%xmm4,%xmm0
-	movdqa		%xmm0,0x00(%rsp)
-	pxor		%xmm0,%xmm12
-	pshufb		%xmm2,%xmm12
-	# x1 += x5, x13 = rotl32(x13 ^ x1, 8)
-	movdqa		0x10(%rsp),%xmm0
-	paddd		%xmm5,%xmm0
-	movdqa		%xmm0,0x10(%rsp)
-	pxor		%xmm0,%xmm13
-	pshufb		%xmm2,%xmm13
-	# x2 += x6, x14 = rotl32(x14 ^ x2, 8)
-	movdqa		0x20(%rsp),%xmm0
-	paddd		%xmm6,%xmm0
-	movdqa		%xmm0,0x20(%rsp)
-	pxor		%xmm0,%xmm14
-	pshufb		%xmm2,%xmm14
-	# x3 += x7, x15 = rotl32(x15 ^ x3, 8)
-	movdqa		0x30(%rsp),%xmm0
-	paddd		%xmm7,%xmm0
-	movdqa		%xmm0,0x30(%rsp)
-	pxor		%xmm0,%xmm15
-	pshufb		%xmm2,%xmm15
-
-	# x8 += x12, x4 = rotl32(x4 ^ x8, 7)
-	paddd		%xmm12,%xmm8
-	pxor		%xmm8,%xmm4
-	movdqa		%xmm4,%xmm0
-	pslld		$7,%xmm0
-	psrld		$25,%xmm4
-	por		%xmm0,%xmm4
-	# x9 += x13, x5 = rotl32(x5 ^ x9, 7)
-	paddd		%xmm13,%xmm9
-	pxor		%xmm9,%xmm5
-	movdqa		%xmm5,%xmm0
-	pslld		$7,%xmm0
-	psrld		$25,%xmm5
-	por		%xmm0,%xmm5
-	# x10 += x14, x6 = rotl32(x6 ^ x10, 7)
-	paddd		%xmm14,%xmm10
-	pxor		%xmm10,%xmm6
-	movdqa		%xmm6,%xmm0
-	pslld		$7,%xmm0
-	psrld		$25,%xmm6
-	por		%xmm0,%xmm6
-	# x11 += x15, x7 = rotl32(x7 ^ x11, 7)
-	paddd		%xmm15,%xmm11
-	pxor		%xmm11,%xmm7
-	movdqa		%xmm7,%xmm0
-	pslld		$7,%xmm0
-	psrld		$25,%xmm7
-	por		%xmm0,%xmm7
-
-	# x0 += x5, x15 = rotl32(x15 ^ x0, 16)
-	movdqa		0x00(%rsp),%xmm0
-	paddd		%xmm5,%xmm0
-	movdqa		%xmm0,0x00(%rsp)
-	pxor		%xmm0,%xmm15
-	pshufb		%xmm3,%xmm15
-	# x1 += x6, x12 = rotl32(x12 ^ x1, 16)
-	movdqa		0x10(%rsp),%xmm0
-	paddd		%xmm6,%xmm0
-	movdqa		%xmm0,0x10(%rsp)
-	pxor		%xmm0,%xmm12
-	pshufb		%xmm3,%xmm12
-	# x2 += x7, x13 = rotl32(x13 ^ x2, 16)
-	movdqa		0x20(%rsp),%xmm0
-	paddd		%xmm7,%xmm0
-	movdqa		%xmm0,0x20(%rsp)
-	pxor		%xmm0,%xmm13
-	pshufb		%xmm3,%xmm13
-	# x3 += x4, x14 = rotl32(x14 ^ x3, 16)
-	movdqa		0x30(%rsp),%xmm0
-	paddd		%xmm4,%xmm0
-	movdqa		%xmm0,0x30(%rsp)
-	pxor		%xmm0,%xmm14
-	pshufb		%xmm3,%xmm14
-
-	# x10 += x15, x5 = rotl32(x5 ^ x10, 12)
-	paddd		%xmm15,%xmm10
-	pxor		%xmm10,%xmm5
-	movdqa		%xmm5,%xmm0
-	pslld		$12,%xmm0
-	psrld		$20,%xmm5
-	por		%xmm0,%xmm5
-	# x11 += x12, x6 = rotl32(x6 ^ x11, 12)
-	paddd		%xmm12,%xmm11
-	pxor		%xmm11,%xmm6
-	movdqa		%xmm6,%xmm0
-	pslld		$12,%xmm0
-	psrld		$20,%xmm6
-	por		%xmm0,%xmm6
-	# x8 += x13, x7 = rotl32(x7 ^ x8, 12)
-	paddd		%xmm13,%xmm8
-	pxor		%xmm8,%xmm7
-	movdqa		%xmm7,%xmm0
-	pslld		$12,%xmm0
-	psrld		$20,%xmm7
-	por		%xmm0,%xmm7
-	# x9 += x14, x4 = rotl32(x4 ^ x9, 12)
-	paddd		%xmm14,%xmm9
-	pxor		%xmm9,%xmm4
-	movdqa		%xmm4,%xmm0
-	pslld		$12,%xmm0
-	psrld		$20,%xmm4
-	por		%xmm0,%xmm4
-
-	# x0 += x5, x15 = rotl32(x15 ^ x0, 8)
-	movdqa		0x00(%rsp),%xmm0
-	paddd		%xmm5,%xmm0
-	movdqa		%xmm0,0x00(%rsp)
-	pxor		%xmm0,%xmm15
-	pshufb		%xmm2,%xmm15
-	# x1 += x6, x12 = rotl32(x12 ^ x1, 8)
-	movdqa		0x10(%rsp),%xmm0
-	paddd		%xmm6,%xmm0
-	movdqa		%xmm0,0x10(%rsp)
-	pxor		%xmm0,%xmm12
-	pshufb		%xmm2,%xmm12
-	# x2 += x7, x13 = rotl32(x13 ^ x2, 8)
-	movdqa		0x20(%rsp),%xmm0
-	paddd		%xmm7,%xmm0
-	movdqa		%xmm0,0x20(%rsp)
-	pxor		%xmm0,%xmm13
-	pshufb		%xmm2,%xmm13
-	# x3 += x4, x14 = rotl32(x14 ^ x3, 8)
-	movdqa		0x30(%rsp),%xmm0
-	paddd		%xmm4,%xmm0
-	movdqa		%xmm0,0x30(%rsp)
-	pxor		%xmm0,%xmm14
-	pshufb		%xmm2,%xmm14
-
-	# x10 += x15, x5 = rotl32(x5 ^ x10, 7)
-	paddd		%xmm15,%xmm10
-	pxor		%xmm10,%xmm5
-	movdqa		%xmm5,%xmm0
-	pslld		$7,%xmm0
-	psrld		$25,%xmm5
-	por		%xmm0,%xmm5
-	# x11 += x12, x6 = rotl32(x6 ^ x11, 7)
-	paddd		%xmm12,%xmm11
-	pxor		%xmm11,%xmm6
-	movdqa		%xmm6,%xmm0
-	pslld		$7,%xmm0
-	psrld		$25,%xmm6
-	por		%xmm0,%xmm6
-	# x8 += x13, x7 = rotl32(x7 ^ x8, 7)
-	paddd		%xmm13,%xmm8
-	pxor		%xmm8,%xmm7
-	movdqa		%xmm7,%xmm0
-	pslld		$7,%xmm0
-	psrld		$25,%xmm7
-	por		%xmm0,%xmm7
-	# x9 += x14, x4 = rotl32(x4 ^ x9, 7)
-	paddd		%xmm14,%xmm9
-	pxor		%xmm9,%xmm4
-	movdqa		%xmm4,%xmm0
-	pslld		$7,%xmm0
-	psrld		$25,%xmm4
-	por		%xmm0,%xmm4
-
-	sub		$2,%r8d
-	jnz		.Ldoubleround4
-
-	# x0[0-3] += s0[0]
-	# x1[0-3] += s0[1]
-	movq		0x00(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	paddd		0x00(%rsp),%xmm2
-	movdqa		%xmm2,0x00(%rsp)
-	paddd		0x10(%rsp),%xmm3
-	movdqa		%xmm3,0x10(%rsp)
-	# x2[0-3] += s0[2]
-	# x3[0-3] += s0[3]
-	movq		0x08(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	paddd		0x20(%rsp),%xmm2
-	movdqa		%xmm2,0x20(%rsp)
-	paddd		0x30(%rsp),%xmm3
-	movdqa		%xmm3,0x30(%rsp)
-
-	# x4[0-3] += s1[0]
-	# x5[0-3] += s1[1]
-	movq		0x10(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	paddd		%xmm2,%xmm4
-	paddd		%xmm3,%xmm5
-	# x6[0-3] += s1[2]
-	# x7[0-3] += s1[3]
-	movq		0x18(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	paddd		%xmm2,%xmm6
-	paddd		%xmm3,%xmm7
-
-	# x8[0-3] += s2[0]
-	# x9[0-3] += s2[1]
-	movq		0x20(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	paddd		%xmm2,%xmm8
-	paddd		%xmm3,%xmm9
-	# x10[0-3] += s2[2]
-	# x11[0-3] += s2[3]
-	movq		0x28(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	paddd		%xmm2,%xmm10
-	paddd		%xmm3,%xmm11
-
-	# x12[0-3] += s3[0]
-	# x13[0-3] += s3[1]
-	movq		0x30(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	paddd		%xmm2,%xmm12
-	paddd		%xmm3,%xmm13
-	# x14[0-3] += s3[2]
-	# x15[0-3] += s3[3]
-	movq		0x38(%rdi),%xmm3
-	pshufd		$0x00,%xmm3,%xmm2
-	pshufd		$0x55,%xmm3,%xmm3
-	paddd		%xmm2,%xmm14
-	paddd		%xmm3,%xmm15
-
-	# x12 += counter values 0-3
-	paddd		%xmm1,%xmm12
-
-	# interleave 32-bit words in state n, n+1
-	movdqa		0x00(%rsp),%xmm0
-	movdqa		0x10(%rsp),%xmm1
-	movdqa		%xmm0,%xmm2
-	punpckldq	%xmm1,%xmm2
-	punpckhdq	%xmm1,%xmm0
-	movdqa		%xmm2,0x00(%rsp)
-	movdqa		%xmm0,0x10(%rsp)
-	movdqa		0x20(%rsp),%xmm0
-	movdqa		0x30(%rsp),%xmm1
-	movdqa		%xmm0,%xmm2
-	punpckldq	%xmm1,%xmm2
-	punpckhdq	%xmm1,%xmm0
-	movdqa		%xmm2,0x20(%rsp)
-	movdqa		%xmm0,0x30(%rsp)
-	movdqa		%xmm4,%xmm0
-	punpckldq	%xmm5,%xmm4
-	punpckhdq	%xmm5,%xmm0
-	movdqa		%xmm0,%xmm5
-	movdqa		%xmm6,%xmm0
-	punpckldq	%xmm7,%xmm6
-	punpckhdq	%xmm7,%xmm0
-	movdqa		%xmm0,%xmm7
-	movdqa		%xmm8,%xmm0
-	punpckldq	%xmm9,%xmm8
-	punpckhdq	%xmm9,%xmm0
-	movdqa		%xmm0,%xmm9
-	movdqa		%xmm10,%xmm0
-	punpckldq	%xmm11,%xmm10
-	punpckhdq	%xmm11,%xmm0
-	movdqa		%xmm0,%xmm11
-	movdqa		%xmm12,%xmm0
-	punpckldq	%xmm13,%xmm12
-	punpckhdq	%xmm13,%xmm0
-	movdqa		%xmm0,%xmm13
-	movdqa		%xmm14,%xmm0
-	punpckldq	%xmm15,%xmm14
-	punpckhdq	%xmm15,%xmm0
-	movdqa		%xmm0,%xmm15
-
-	# interleave 64-bit words in state n, n+2
-	movdqa		0x00(%rsp),%xmm0
-	movdqa		0x20(%rsp),%xmm1
-	movdqa		%xmm0,%xmm2
-	punpcklqdq	%xmm1,%xmm2
-	punpckhqdq	%xmm1,%xmm0
-	movdqa		%xmm2,0x00(%rsp)
-	movdqa		%xmm0,0x20(%rsp)
-	movdqa		0x10(%rsp),%xmm0
-	movdqa		0x30(%rsp),%xmm1
-	movdqa		%xmm0,%xmm2
-	punpcklqdq	%xmm1,%xmm2
-	punpckhqdq	%xmm1,%xmm0
-	movdqa		%xmm2,0x10(%rsp)
-	movdqa		%xmm0,0x30(%rsp)
-	movdqa		%xmm4,%xmm0
-	punpcklqdq	%xmm6,%xmm4
-	punpckhqdq	%xmm6,%xmm0
-	movdqa		%xmm0,%xmm6
-	movdqa		%xmm5,%xmm0
-	punpcklqdq	%xmm7,%xmm5
-	punpckhqdq	%xmm7,%xmm0
-	movdqa		%xmm0,%xmm7
-	movdqa		%xmm8,%xmm0
-	punpcklqdq	%xmm10,%xmm8
-	punpckhqdq	%xmm10,%xmm0
-	movdqa		%xmm0,%xmm10
-	movdqa		%xmm9,%xmm0
-	punpcklqdq	%xmm11,%xmm9
-	punpckhqdq	%xmm11,%xmm0
-	movdqa		%xmm0,%xmm11
-	movdqa		%xmm12,%xmm0
-	punpcklqdq	%xmm14,%xmm12
-	punpckhqdq	%xmm14,%xmm0
-	movdqa		%xmm0,%xmm14
-	movdqa		%xmm13,%xmm0
-	punpcklqdq	%xmm15,%xmm13
-	punpckhqdq	%xmm15,%xmm0
-	movdqa		%xmm0,%xmm15
-
-	# xor with corresponding input, write to output
-	movdqa		0x00(%rsp),%xmm0
-	cmp		$0x10,%rax
-	jl		.Lxorpart4
-	movdqu		0x00(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0x00(%rsi)
-
-	movdqu		%xmm4,%xmm0
-	cmp		$0x20,%rax
-	jl		.Lxorpart4
-	movdqu		0x10(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0x10(%rsi)
-
-	movdqu		%xmm8,%xmm0
-	cmp		$0x30,%rax
-	jl		.Lxorpart4
-	movdqu		0x20(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0x20(%rsi)
-
-	movdqu		%xmm12,%xmm0
-	cmp		$0x40,%rax
-	jl		.Lxorpart4
-	movdqu		0x30(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0x30(%rsi)
-
-	movdqa		0x20(%rsp),%xmm0
-	cmp		$0x50,%rax
-	jl		.Lxorpart4
-	movdqu		0x40(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0x40(%rsi)
-
-	movdqu		%xmm6,%xmm0
-	cmp		$0x60,%rax
-	jl		.Lxorpart4
-	movdqu		0x50(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0x50(%rsi)
-
-	movdqu		%xmm10,%xmm0
-	cmp		$0x70,%rax
-	jl		.Lxorpart4
-	movdqu		0x60(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0x60(%rsi)
-
-	movdqu		%xmm14,%xmm0
-	cmp		$0x80,%rax
-	jl		.Lxorpart4
-	movdqu		0x70(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0x70(%rsi)
-
-	movdqa		0x10(%rsp),%xmm0
-	cmp		$0x90,%rax
-	jl		.Lxorpart4
-	movdqu		0x80(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0x80(%rsi)
-
-	movdqu		%xmm5,%xmm0
-	cmp		$0xa0,%rax
-	jl		.Lxorpart4
-	movdqu		0x90(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0x90(%rsi)
-
-	movdqu		%xmm9,%xmm0
-	cmp		$0xb0,%rax
-	jl		.Lxorpart4
-	movdqu		0xa0(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0xa0(%rsi)
-
-	movdqu		%xmm13,%xmm0
-	cmp		$0xc0,%rax
-	jl		.Lxorpart4
-	movdqu		0xb0(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0xb0(%rsi)
-
-	movdqa		0x30(%rsp),%xmm0
-	cmp		$0xd0,%rax
-	jl		.Lxorpart4
-	movdqu		0xc0(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0xc0(%rsi)
-
-	movdqu		%xmm7,%xmm0
-	cmp		$0xe0,%rax
-	jl		.Lxorpart4
-	movdqu		0xd0(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0xd0(%rsi)
-
-	movdqu		%xmm11,%xmm0
-	cmp		$0xf0,%rax
-	jl		.Lxorpart4
-	movdqu		0xe0(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0xe0(%rsi)
-
-	movdqu		%xmm15,%xmm0
-	cmp		$0x100,%rax
-	jl		.Lxorpart4
-	movdqu		0xf0(%rdx),%xmm1
-	pxor		%xmm1,%xmm0
-	movdqu		%xmm0,0xf0(%rsi)
-
-.Ldone4:
-	lea		-8(%r10),%rsp
-	ret
-
-.Lxorpart4:
-	# xor remaining bytes from partial register into output
-	mov		%rax,%r9
-	and		$0x0f,%r9
-	jz		.Ldone4
-	and		$~0x0f,%rax
-
-	mov		%rsi,%r11
-
-	lea		(%rdx,%rax),%rsi
-	mov		%rsp,%rdi
-	mov		%r9,%rcx
-	rep movsb
-
-	pxor		0x00(%rsp),%xmm0
-	movdqa		%xmm0,0x00(%rsp)
-
-	mov		%rsp,%rsi
-	lea		(%r11,%rax),%rdi
-	mov		%r9,%rcx
-	rep movsb
-
-	jmp		.Ldone4
-
-ENDPROC(chacha_4block_xor_ssse3)
diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S
deleted file mode 100644
index 1c099dc08cc33151ec97c0a250813783c9bf9c4c..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/crc32-pclmul_asm.S
+++ /dev/null
@@ -1,241 +0,0 @@
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please  visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
- * calculation.
- * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
- * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
- * at:
- * http://www.intel.com/products/processor/manuals/
- * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
- * Volume 2B: Instruction Set Reference, N-Z
- *
- * Authors:   Gregory Prestas <Gregory_Prestas@us.xyratex.com>
- *	      Alexander Boyko <Alexander_Boyko@xyratex.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/inst.h>
-
-
-.section .rodata
-.align 16
-/*
- * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
- * #define CONSTANT_R1  0x154442bd4LL
- *
- * [(x4*128-32 mod P(x) << 32)]' << 1   = 0x1c6e41596
- * #define CONSTANT_R2  0x1c6e41596LL
- */
-.Lconstant_R2R1:
-	.octa 0x00000001c6e415960000000154442bd4
-/*
- * [(x128+32 mod P(x) << 32)]'   << 1   = 0x1751997d0
- * #define CONSTANT_R3  0x1751997d0LL
- *
- * [(x128-32 mod P(x) << 32)]'   << 1   = 0x0ccaa009e
- * #define CONSTANT_R4  0x0ccaa009eLL
- */
-.Lconstant_R4R3:
-	.octa 0x00000000ccaa009e00000001751997d0
-/*
- * [(x64 mod P(x) << 32)]'       << 1   = 0x163cd6124
- * #define CONSTANT_R5  0x163cd6124LL
- */
-.Lconstant_R5:
-	.octa 0x00000000000000000000000163cd6124
-.Lconstant_mask32:
-	.octa 0x000000000000000000000000FFFFFFFF
-/*
- * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
- *
- * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
- * #define CONSTANT_RU  0x1F7011641LL
- */
-.Lconstant_RUpoly:
-	.octa 0x00000001F701164100000001DB710641
-
-#define CONSTANT %xmm0
-
-#ifdef __x86_64__
-#define BUF     %rdi
-#define LEN     %rsi
-#define CRC     %edx
-#else
-#define BUF     %eax
-#define LEN     %edx
-#define CRC     %ecx
-#endif
-
-
-
-.text
-/**
- *      Calculate crc32
- *      BUF - buffer (16 bytes aligned)
- *      LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63
- *      CRC - initial crc32
- *      return %eax crc32
- *      uint crc32_pclmul_le_16(unsigned char const *buffer,
- *	                     size_t len, uint crc32)
- */
-
-ENTRY(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
-	movdqa  (BUF), %xmm1
-	movdqa  0x10(BUF), %xmm2
-	movdqa  0x20(BUF), %xmm3
-	movdqa  0x30(BUF), %xmm4
-	movd    CRC, CONSTANT
-	pxor    CONSTANT, %xmm1
-	sub     $0x40, LEN
-	add     $0x40, BUF
-	cmp     $0x40, LEN
-	jb      less_64
-
-#ifdef __x86_64__
-	movdqa .Lconstant_R2R1(%rip), CONSTANT
-#else
-	movdqa .Lconstant_R2R1, CONSTANT
-#endif
-
-loop_64:/*  64 bytes Full cache line folding */
-	prefetchnta    0x40(BUF)
-	movdqa  %xmm1, %xmm5
-	movdqa  %xmm2, %xmm6
-	movdqa  %xmm3, %xmm7
-#ifdef __x86_64__
-	movdqa  %xmm4, %xmm8
-#endif
-	PCLMULQDQ 00, CONSTANT, %xmm1
-	PCLMULQDQ 00, CONSTANT, %xmm2
-	PCLMULQDQ 00, CONSTANT, %xmm3
-#ifdef __x86_64__
-	PCLMULQDQ 00, CONSTANT, %xmm4
-#endif
-	PCLMULQDQ 0x11, CONSTANT, %xmm5
-	PCLMULQDQ 0x11, CONSTANT, %xmm6
-	PCLMULQDQ 0x11, CONSTANT, %xmm7
-#ifdef __x86_64__
-	PCLMULQDQ 0x11, CONSTANT, %xmm8
-#endif
-	pxor    %xmm5, %xmm1
-	pxor    %xmm6, %xmm2
-	pxor    %xmm7, %xmm3
-#ifdef __x86_64__
-	pxor    %xmm8, %xmm4
-#else
-	/* xmm8 unsupported for x32 */
-	movdqa  %xmm4, %xmm5
-	PCLMULQDQ 00, CONSTANT, %xmm4
-	PCLMULQDQ 0x11, CONSTANT, %xmm5
-	pxor    %xmm5, %xmm4
-#endif
-
-	pxor    (BUF), %xmm1
-	pxor    0x10(BUF), %xmm2
-	pxor    0x20(BUF), %xmm3
-	pxor    0x30(BUF), %xmm4
-
-	sub     $0x40, LEN
-	add     $0x40, BUF
-	cmp     $0x40, LEN
-	jge     loop_64
-less_64:/*  Folding cache line into 128bit */
-#ifdef __x86_64__
-	movdqa  .Lconstant_R4R3(%rip), CONSTANT
-#else
-	movdqa  .Lconstant_R4R3, CONSTANT
-#endif
-	prefetchnta     (BUF)
-
-	movdqa  %xmm1, %xmm5
-	PCLMULQDQ 0x00, CONSTANT, %xmm1
-	PCLMULQDQ 0x11, CONSTANT, %xmm5
-	pxor    %xmm5, %xmm1
-	pxor    %xmm2, %xmm1
-
-	movdqa  %xmm1, %xmm5
-	PCLMULQDQ 0x00, CONSTANT, %xmm1
-	PCLMULQDQ 0x11, CONSTANT, %xmm5
-	pxor    %xmm5, %xmm1
-	pxor    %xmm3, %xmm1
-
-	movdqa  %xmm1, %xmm5
-	PCLMULQDQ 0x00, CONSTANT, %xmm1
-	PCLMULQDQ 0x11, CONSTANT, %xmm5
-	pxor    %xmm5, %xmm1
-	pxor    %xmm4, %xmm1
-
-	cmp     $0x10, LEN
-	jb      fold_64
-loop_16:/* Folding rest buffer into 128bit */
-	movdqa  %xmm1, %xmm5
-	PCLMULQDQ 0x00, CONSTANT, %xmm1
-	PCLMULQDQ 0x11, CONSTANT, %xmm5
-	pxor    %xmm5, %xmm1
-	pxor    (BUF), %xmm1
-	sub     $0x10, LEN
-	add     $0x10, BUF
-	cmp     $0x10, LEN
-	jge     loop_16
-
-fold_64:
-	/* perform the last 64 bit fold, also adds 32 zeroes
-	 * to the input stream */
-	PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
-	psrldq  $0x08, %xmm1
-	pxor    CONSTANT, %xmm1
-
-	/* final 32-bit fold */
-	movdqa  %xmm1, %xmm2
-#ifdef __x86_64__
-	movdqa  .Lconstant_R5(%rip), CONSTANT
-	movdqa  .Lconstant_mask32(%rip), %xmm3
-#else
-	movdqa  .Lconstant_R5, CONSTANT
-	movdqa  .Lconstant_mask32, %xmm3
-#endif
-	psrldq  $0x04, %xmm2
-	pand    %xmm3, %xmm1
-	PCLMULQDQ 0x00, CONSTANT, %xmm1
-	pxor    %xmm2, %xmm1
-
-	/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
-#ifdef __x86_64__
-	movdqa  .Lconstant_RUpoly(%rip), CONSTANT
-#else
-	movdqa  .Lconstant_RUpoly, CONSTANT
-#endif
-	movdqa  %xmm1, %xmm2
-	pand    %xmm3, %xmm1
-	PCLMULQDQ 0x10, CONSTANT, %xmm1
-	pand    %xmm3, %xmm1
-	PCLMULQDQ 0x00, CONSTANT, %xmm1
-	pxor    %xmm2, %xmm1
-	PEXTRD  0x01, %xmm1, %eax
-
-	ret
-ENDPROC(crc32_pclmul_le_16)
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
deleted file mode 100644
index 3c6e01520a973c25fed6c62699c3157963b2871f..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ /dev/null
@@ -1,464 +0,0 @@
-/*
- * Implement fast CRC32C with PCLMULQDQ instructions. (x86_64)
- *
- * The white papers on CRC32C calculations with PCLMULQDQ instruction can be
- * downloaded from:
- * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
- * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf
- *
- * Copyright (C) 2012 Intel Corporation.
- *
- * Authors:
- *	Wajdi Feghali <wajdi.k.feghali@intel.com>
- *	James Guilford <james.guilford@intel.com>
- *	David Cote <david.m.cote@intel.com>
- *	Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <asm/inst.h>
-#include <linux/linkage.h>
-#include <asm/nospec-branch.h>
-
-## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
-
-.macro LABEL prefix n
-\prefix\n\():
-.endm
-
-.macro JMPTBL_ENTRY i
-.word crc_\i - crc_array
-.endm
-
-.macro JNC_LESS_THAN j
-	jnc less_than_\j
-.endm
-
-# Define threshold where buffers are considered "small" and routed to more
-# efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so
-# SMALL_SIZE can be no larger than 255.
-
-#define SMALL_SIZE 200
-
-.if (SMALL_SIZE > 255)
-.error "SMALL_ SIZE must be < 256"
-.endif
-
-# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
-
-.text
-ENTRY(crc_pcl)
-#define    bufp		%rdi
-#define    bufp_dw	%edi
-#define    bufp_w	%di
-#define    bufp_b	%dil
-#define    bufptmp	%rcx
-#define    block_0	%rcx
-#define    block_1	%rdx
-#define    block_2	%r11
-#define    len		%rsi
-#define    len_dw	%esi
-#define    len_w	%si
-#define    len_b	%sil
-#define    crc_init_arg %rdx
-#define    tmp		%rbx
-#define    crc_init	%r8
-#define    crc_init_dw	%r8d
-#define    crc1		%r9
-#define    crc2		%r10
-
-	pushq   %rbx
-	pushq   %rdi
-	pushq   %rsi
-
-	## Move crc_init for Linux to a different
-	mov     crc_init_arg, crc_init
-
-	################################################################
-	## 1) ALIGN:
-	################################################################
-
-	mov     bufp, bufptmp		# rdi = *buf
-	neg     bufp
-	and     $7, bufp		# calculate the unalignment amount of
-					# the address
-	je      proc_block		# Skip if aligned
-
-	## If len is less than 8 and we're unaligned, we need to jump
-	## to special code to avoid reading beyond the end of the buffer
-	cmp     $8, len
-	jae     do_align
-	# less_than_8 expects length in upper 3 bits of len_dw
-	# less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
-	shl     $32-3+1, len_dw
-	jmp     less_than_8_post_shl1
-
-do_align:
-	#### Calculate CRC of unaligned bytes of the buffer (if any)
-	movq    (bufptmp), tmp		# load a quadward from the buffer
-	add     bufp, bufptmp		# align buffer pointer for quadword
-					# processing
-	sub     bufp, len		# update buffer length
-align_loop:
-	crc32b  %bl, crc_init_dw 	# compute crc32 of 1-byte
-	shr     $8, tmp			# get next byte
-	dec     bufp
-	jne     align_loop
-
-proc_block:
-
-	################################################################
-	## 2) PROCESS  BLOCKS:
-	################################################################
-
-	## compute num of bytes to be processed
-	movq    len, tmp		# save num bytes in tmp
-
-	cmpq    $128*24, len
-	jae     full_block
-
-continue_block:
-	cmpq    $SMALL_SIZE, len
-	jb      small
-
-	## len < 128*24
-	movq    $2731, %rax		# 2731 = ceil(2^16 / 24)
-	mul     len_dw
-	shrq    $16, %rax
-
-	## eax contains floor(bytes / 24) = num 24-byte chunks to do
-
-	## process rax 24-byte chunks (128 >= rax >= 0)
-
-	## compute end address of each block
-	## block 0 (base addr + RAX * 8)
-	## block 1 (base addr + RAX * 16)
-	## block 2 (base addr + RAX * 24)
-	lea     (bufptmp, %rax, 8), block_0
-	lea     (block_0, %rax, 8), block_1
-	lea     (block_1, %rax, 8), block_2
-
-	xor     crc1, crc1
-	xor     crc2, crc2
-
-	## branch into array
-	lea	jump_table(%rip), bufp
-	movzwq  (bufp, %rax, 2), len
-	lea	crc_array(%rip), bufp
-	lea     (bufp, len, 1), bufp
-	JMP_NOSPEC bufp
-
-	################################################################
-	## 2a) PROCESS FULL BLOCKS:
-	################################################################
-full_block:
-	movl    $128,%eax
-	lea     128*8*2(block_0), block_1
-	lea     128*8*3(block_0), block_2
-	add     $128*8*1, block_0
-
-	xor     crc1,crc1
-	xor     crc2,crc2
-
-	# Fall thruogh into top of crc array (crc_128)
-
-	################################################################
-	## 3) CRC Array:
-	################################################################
-
-crc_array:
-	i=128
-.rept 128-1
-.altmacro
-LABEL crc_ %i
-.noaltmacro
-	crc32q   -i*8(block_0), crc_init
-	crc32q   -i*8(block_1), crc1
-	crc32q   -i*8(block_2), crc2
-	i=(i-1)
-.endr
-
-.altmacro
-LABEL crc_ %i
-.noaltmacro
-	crc32q   -i*8(block_0), crc_init
-	crc32q   -i*8(block_1), crc1
-# SKIP  crc32  -i*8(block_2), crc2 ; Don't do this one yet
-
-	mov     block_2, block_0
-
-	################################################################
-	## 4) Combine three results:
-	################################################################
-
-	lea	(K_table-8)(%rip), bufp		# first entry is for idx 1
-	shlq    $3, %rax			# rax *= 8
-	pmovzxdq (bufp,%rax), %xmm0		# 2 consts: K1:K2
-	leal	(%eax,%eax,2), %eax		# rax *= 3 (total *24)
-	subq    %rax, tmp			# tmp -= rax*24
-
-	movq    crc_init, %xmm1			# CRC for block 1
-	PCLMULQDQ 0x00,%xmm0,%xmm1		# Multiply by K2
-
-	movq    crc1, %xmm2			# CRC for block 2
-	PCLMULQDQ 0x10, %xmm0, %xmm2		# Multiply by K1
-
-	pxor    %xmm2,%xmm1
-	movq    %xmm1, %rax
-	xor     -i*8(block_2), %rax
-	mov     crc2, crc_init
-	crc32   %rax, crc_init
-
-	################################################################
-	## 5) Check for end:
-	################################################################
-
-LABEL crc_ 0
-	mov     tmp, len
-	cmp     $128*24, tmp
-	jae     full_block
-	cmp     $24, tmp
-	jae     continue_block
-
-less_than_24:
-	shl     $32-4, len_dw			# less_than_16 expects length
-						# in upper 4 bits of len_dw
-	jnc     less_than_16
-	crc32q  (bufptmp), crc_init
-	crc32q  8(bufptmp), crc_init
-	jz      do_return
-	add     $16, bufptmp
-	# len is less than 8 if we got here
-	# less_than_8 expects length in upper 3 bits of len_dw
-	# less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
-	shl     $2, len_dw
-	jmp     less_than_8_post_shl1
-
-	#######################################################################
-	## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full)
-	#######################################################################
-small:
-	shl $32-8, len_dw		# Prepare len_dw for less_than_256
-	j=256
-.rept 5					# j = {256, 128, 64, 32, 16}
-.altmacro
-LABEL less_than_ %j			# less_than_j: Length should be in
-					# upper lg(j) bits of len_dw
-	j=(j/2)
-	shl     $1, len_dw		# Get next MSB
-	JNC_LESS_THAN %j
-.noaltmacro
-	i=0
-.rept (j/8)
-	crc32q  i(bufptmp), crc_init	# Compute crc32 of 8-byte data
-	i=i+8
-.endr
-	jz      do_return		# Return if remaining length is zero
-	add     $j, bufptmp		# Advance buf
-.endr
-
-less_than_8:				# Length should be stored in
-					# upper 3 bits of len_dw
-	shl     $1, len_dw
-less_than_8_post_shl1:
-	jnc     less_than_4
-	crc32l  (bufptmp), crc_init_dw	# CRC of 4 bytes
-	jz      do_return		# return if remaining data is zero
-	add     $4, bufptmp
-less_than_4:				# Length should be stored in
-					# upper 2 bits of len_dw
-	shl     $1, len_dw
-	jnc     less_than_2
-	crc32w  (bufptmp), crc_init_dw	# CRC of 2 bytes
-	jz      do_return		# return if remaining data is zero
-	add     $2, bufptmp
-less_than_2:				# Length should be stored in the MSB
-					# of len_dw
-	shl     $1, len_dw
-	jnc     less_than_1
-	crc32b  (bufptmp), crc_init_dw	# CRC of 1 byte
-less_than_1:				# Length should be zero
-do_return:
-	movq    crc_init, %rax
-	popq    %rsi
-	popq    %rdi
-	popq    %rbx
-        ret
-ENDPROC(crc_pcl)
-
-.section	.rodata, "a", @progbits
-        ################################################################
-        ## jump table        Table is 129 entries x 2 bytes each
-        ################################################################
-.align 4
-jump_table:
-	i=0
-.rept 129
-.altmacro
-JMPTBL_ENTRY %i
-.noaltmacro
-	i=i+1
-.endr
-
-
-	################################################################
-	## PCLMULQDQ tables
-	## Table is 128 entries x 2 words (8 bytes) each
-	################################################################
-.align 8
-K_table:
-	.long 0x493c7d27, 0x00000001
-	.long 0xba4fc28e, 0x493c7d27
-	.long 0xddc0152b, 0xf20c0dfe
-	.long 0x9e4addf8, 0xba4fc28e
-	.long 0x39d3b296, 0x3da6d0cb
-	.long 0x0715ce53, 0xddc0152b
-	.long 0x47db8317, 0x1c291d04
-	.long 0x0d3b6092, 0x9e4addf8
-	.long 0xc96cfdc0, 0x740eef02
-	.long 0x878a92a7, 0x39d3b296
-	.long 0xdaece73e, 0x083a6eec
-	.long 0xab7aff2a, 0x0715ce53
-	.long 0x2162d385, 0xc49f4f67
-	.long 0x83348832, 0x47db8317
-	.long 0x299847d5, 0x2ad91c30
-	.long 0xb9e02b86, 0x0d3b6092
-	.long 0x18b33a4e, 0x6992cea2
-	.long 0xb6dd949b, 0xc96cfdc0
-	.long 0x78d9ccb7, 0x7e908048
-	.long 0xbac2fd7b, 0x878a92a7
-	.long 0xa60ce07b, 0x1b3d8f29
-	.long 0xce7f39f4, 0xdaece73e
-	.long 0x61d82e56, 0xf1d0f55e
-	.long 0xd270f1a2, 0xab7aff2a
-	.long 0xc619809d, 0xa87ab8a8
-	.long 0x2b3cac5d, 0x2162d385
-	.long 0x65863b64, 0x8462d800
-	.long 0x1b03397f, 0x83348832
-	.long 0xebb883bd, 0x71d111a8
-	.long 0xb3e32c28, 0x299847d5
-	.long 0x064f7f26, 0xffd852c6
-	.long 0xdd7e3b0c, 0xb9e02b86
-	.long 0xf285651c, 0xdcb17aa4
-	.long 0x10746f3c, 0x18b33a4e
-	.long 0xc7a68855, 0xf37c5aee
-	.long 0x271d9844, 0xb6dd949b
-	.long 0x8e766a0c, 0x6051d5a2
-	.long 0x93a5f730, 0x78d9ccb7
-	.long 0x6cb08e5c, 0x18b0d4ff
-	.long 0x6b749fb2, 0xbac2fd7b
-	.long 0x1393e203, 0x21f3d99c
-	.long 0xcec3662e, 0xa60ce07b
-	.long 0x96c515bb, 0x8f158014
-	.long 0xe6fc4e6a, 0xce7f39f4
-	.long 0x8227bb8a, 0xa00457f7
-	.long 0xb0cd4768, 0x61d82e56
-	.long 0x39c7ff35, 0x8d6d2c43
-	.long 0xd7a4825c, 0xd270f1a2
-	.long 0x0ab3844b, 0x00ac29cf
-	.long 0x0167d312, 0xc619809d
-	.long 0xf6076544, 0xe9adf796
-	.long 0x26f6a60a, 0x2b3cac5d
-	.long 0xa741c1bf, 0x96638b34
-	.long 0x98d8d9cb, 0x65863b64
-	.long 0x49c3cc9c, 0xe0e9f351
-	.long 0x68bce87a, 0x1b03397f
-	.long 0x57a3d037, 0x9af01f2d
-	.long 0x6956fc3b, 0xebb883bd
-	.long 0x42d98888, 0x2cff42cf
-	.long 0x3771e98f, 0xb3e32c28
-	.long 0xb42ae3d9, 0x88f25a3a
-	.long 0x2178513a, 0x064f7f26
-	.long 0xe0ac139e, 0x4e36f0b0
-	.long 0x170076fa, 0xdd7e3b0c
-	.long 0x444dd413, 0xbd6f81f8
-	.long 0x6f345e45, 0xf285651c
-	.long 0x41d17b64, 0x91c9bd4b
-	.long 0xff0dba97, 0x10746f3c
-	.long 0xa2b73df1, 0x885f087b
-	.long 0xf872e54c, 0xc7a68855
-	.long 0x1e41e9fc, 0x4c144932
-	.long 0x86d8e4d2, 0x271d9844
-	.long 0x651bd98b, 0x52148f02
-	.long 0x5bb8f1bc, 0x8e766a0c
-	.long 0xa90fd27a, 0xa3c6f37a
-	.long 0xb3af077a, 0x93a5f730
-	.long 0x4984d782, 0xd7c0557f
-	.long 0xca6ef3ac, 0x6cb08e5c
-	.long 0x234e0b26, 0x63ded06a
-	.long 0xdd66cbbb, 0x6b749fb2
-	.long 0x4597456a, 0x4d56973c
-	.long 0xe9e28eb4, 0x1393e203
-	.long 0x7b3ff57a, 0x9669c9df
-	.long 0xc9c8b782, 0xcec3662e
-	.long 0x3f70cc6f, 0xe417f38a
-	.long 0x93e106a4, 0x96c515bb
-	.long 0x62ec6c6d, 0x4b9e0f71
-	.long 0xd813b325, 0xe6fc4e6a
-	.long 0x0df04680, 0xd104b8fc
-	.long 0x2342001e, 0x8227bb8a
-	.long 0x0a2a8d7e, 0x5b397730
-	.long 0x6d9a4957, 0xb0cd4768
-	.long 0xe8b6368b, 0xe78eb416
-	.long 0xd2c3ed1a, 0x39c7ff35
-	.long 0x995a5724, 0x61ff0e01
-	.long 0x9ef68d35, 0xd7a4825c
-	.long 0x0c139b31, 0x8d96551c
-	.long 0xf2271e60, 0x0ab3844b
-	.long 0x0b0bf8ca, 0x0bf80dd2
-	.long 0x2664fd8b, 0x0167d312
-	.long 0xed64812d, 0x8821abed
-	.long 0x02ee03b2, 0xf6076544
-	.long 0x8604ae0f, 0x6a45d2b2
-	.long 0x363bd6b3, 0x26f6a60a
-	.long 0x135c83fd, 0xd8d26619
-	.long 0x5fabe670, 0xa741c1bf
-	.long 0x35ec3279, 0xde87806c
-	.long 0x00bcf5f6, 0x98d8d9cb
-	.long 0x8ae00689, 0x14338754
-	.long 0x17f27698, 0x49c3cc9c
-	.long 0x58ca5f00, 0x5bd2011f
-	.long 0xaa7c7ad5, 0x68bce87a
-	.long 0xb5cfca28, 0xdd07448e
-	.long 0xded288f8, 0x57a3d037
-	.long 0x59f229bc, 0xdde8f5b9
-	.long 0x6d390dec, 0x6956fc3b
-	.long 0x37170390, 0xa3e3e02c
-	.long 0x6353c1cc, 0x42d98888
-	.long 0xc4584f5c, 0xd73c7bea
-	.long 0xf48642e9, 0x3771e98f
-	.long 0x531377e2, 0x80ff0093
-	.long 0xdd35bc8d, 0xb42ae3d9
-	.long 0xb25b29f2, 0x8fe4c34d
-	.long 0x9a5ede41, 0x2178513a
-	.long 0xa563905d, 0xdf99fc11
-	.long 0x45cddf4e, 0xe0ac139e
-	.long 0xacfa3103, 0x6c23e841
-	.long 0xa51b6135, 0x170076fa
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S
deleted file mode 100644
index 3d873e67749d7b95adc6b0e65354a11caedd0507..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/crct10dif-pcl-asm_64.S
+++ /dev/null
@@ -1,333 +0,0 @@
-########################################################################
-# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
-#
-# Copyright (c) 2013, Intel Corporation
-#
-# Authors:
-#     Erdinc Ozturk <erdinc.ozturk@intel.com>
-#     Vinodh Gopal <vinodh.gopal@intel.com>
-#     James Guilford <james.guilford@intel.com>
-#     Tim Chen <tim.c.chen@linux.intel.com>
-#
-# This software is available to you under a choice of one of two
-# licenses.  You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in the
-#   documentation and/or other materials provided with the
-#   distribution.
-#
-# * Neither the name of the Intel Corporation nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-#
-# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-#       Reference paper titled "Fast CRC Computation for Generic
-#	Polynomials Using PCLMULQDQ Instruction"
-#       URL: http://www.intel.com/content/dam/www/public/us/en/documents
-#  /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
-#
-
-#include <linux/linkage.h>
-
-.text
-
-#define		init_crc	%edi
-#define		buf		%rsi
-#define		len		%rdx
-
-#define		FOLD_CONSTS	%xmm10
-#define		BSWAP_MASK	%xmm11
-
-# Fold reg1, reg2 into the next 32 data bytes, storing the result back into
-# reg1, reg2.
-.macro	fold_32_bytes	offset, reg1, reg2
-	movdqu	\offset(buf), %xmm9
-	movdqu	\offset+16(buf), %xmm12
-	pshufb	BSWAP_MASK, %xmm9
-	pshufb	BSWAP_MASK, %xmm12
-	movdqa	\reg1, %xmm8
-	movdqa	\reg2, %xmm13
-	pclmulqdq	$0x00, FOLD_CONSTS, \reg1
-	pclmulqdq	$0x11, FOLD_CONSTS, %xmm8
-	pclmulqdq	$0x00, FOLD_CONSTS, \reg2
-	pclmulqdq	$0x11, FOLD_CONSTS, %xmm13
-	pxor	%xmm9 , \reg1
-	xorps	%xmm8 , \reg1
-	pxor	%xmm12, \reg2
-	xorps	%xmm13, \reg2
-.endm
-
-# Fold src_reg into dst_reg.
-.macro	fold_16_bytes	src_reg, dst_reg
-	movdqa	\src_reg, %xmm8
-	pclmulqdq	$0x11, FOLD_CONSTS, \src_reg
-	pclmulqdq	$0x00, FOLD_CONSTS, %xmm8
-	pxor	%xmm8, \dst_reg
-	xorps	\src_reg, \dst_reg
-.endm
-
-#
-# u16 crc_t10dif_pcl(u16 init_crc, const *u8 buf, size_t len);
-#
-# Assumes len >= 16.
-#
-.align 16
-ENTRY(crc_t10dif_pcl)
-
-	movdqa	.Lbswap_mask(%rip), BSWAP_MASK
-
-	# For sizes less than 256 bytes, we can't fold 128 bytes at a time.
-	cmp	$256, len
-	jl	.Lless_than_256_bytes
-
-	# Load the first 128 data bytes.  Byte swapping is necessary to make the
-	# bit order match the polynomial coefficient order.
-	movdqu	16*0(buf), %xmm0
-	movdqu	16*1(buf), %xmm1
-	movdqu	16*2(buf), %xmm2
-	movdqu	16*3(buf), %xmm3
-	movdqu	16*4(buf), %xmm4
-	movdqu	16*5(buf), %xmm5
-	movdqu	16*6(buf), %xmm6
-	movdqu	16*7(buf), %xmm7
-	add	$128, buf
-	pshufb	BSWAP_MASK, %xmm0
-	pshufb	BSWAP_MASK, %xmm1
-	pshufb	BSWAP_MASK, %xmm2
-	pshufb	BSWAP_MASK, %xmm3
-	pshufb	BSWAP_MASK, %xmm4
-	pshufb	BSWAP_MASK, %xmm5
-	pshufb	BSWAP_MASK, %xmm6
-	pshufb	BSWAP_MASK, %xmm7
-
-	# XOR the first 16 data *bits* with the initial CRC value.
-	pxor	%xmm8, %xmm8
-	pinsrw	$7, init_crc, %xmm8
-	pxor	%xmm8, %xmm0
-
-	movdqa	.Lfold_across_128_bytes_consts(%rip), FOLD_CONSTS
-
-	# Subtract 128 for the 128 data bytes just consumed.  Subtract another
-	# 128 to simplify the termination condition of the following loop.
-	sub	$256, len
-
-	# While >= 128 data bytes remain (not counting xmm0-7), fold the 128
-	# bytes xmm0-7 into them, storing the result back into xmm0-7.
-.Lfold_128_bytes_loop:
-	fold_32_bytes	0, %xmm0, %xmm1
-	fold_32_bytes	32, %xmm2, %xmm3
-	fold_32_bytes	64, %xmm4, %xmm5
-	fold_32_bytes	96, %xmm6, %xmm7
-	add	$128, buf
-	sub	$128, len
-	jge	.Lfold_128_bytes_loop
-
-	# Now fold the 112 bytes in xmm0-xmm6 into the 16 bytes in xmm7.
-
-	# Fold across 64 bytes.
-	movdqa	.Lfold_across_64_bytes_consts(%rip), FOLD_CONSTS
-	fold_16_bytes	%xmm0, %xmm4
-	fold_16_bytes	%xmm1, %xmm5
-	fold_16_bytes	%xmm2, %xmm6
-	fold_16_bytes	%xmm3, %xmm7
-	# Fold across 32 bytes.
-	movdqa	.Lfold_across_32_bytes_consts(%rip), FOLD_CONSTS
-	fold_16_bytes	%xmm4, %xmm6
-	fold_16_bytes	%xmm5, %xmm7
-	# Fold across 16 bytes.
-	movdqa	.Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS
-	fold_16_bytes	%xmm6, %xmm7
-
-	# Add 128 to get the correct number of data bytes remaining in 0...127
-	# (not counting xmm7), following the previous extra subtraction by 128.
-	# Then subtract 16 to simplify the termination condition of the
-	# following loop.
-	add	$128-16, len
-
-	# While >= 16 data bytes remain (not counting xmm7), fold the 16 bytes
-	# xmm7 into them, storing the result back into xmm7.
-	jl	.Lfold_16_bytes_loop_done
-.Lfold_16_bytes_loop:
-	movdqa	%xmm7, %xmm8
-	pclmulqdq	$0x11, FOLD_CONSTS, %xmm7
-	pclmulqdq	$0x00, FOLD_CONSTS, %xmm8
-	pxor	%xmm8, %xmm7
-	movdqu	(buf), %xmm0
-	pshufb	BSWAP_MASK, %xmm0
-	pxor	%xmm0 , %xmm7
-	add	$16, buf
-	sub	$16, len
-	jge	.Lfold_16_bytes_loop
-
-.Lfold_16_bytes_loop_done:
-	# Add 16 to get the correct number of data bytes remaining in 0...15
-	# (not counting xmm7), following the previous extra subtraction by 16.
-	add	$16, len
-	je	.Lreduce_final_16_bytes
-
-.Lhandle_partial_segment:
-	# Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first 16
-	# bytes are in xmm7 and the rest are the remaining data in 'buf'.  To do
-	# this without needing a fold constant for each possible 'len', redivide
-	# the bytes into a first chunk of 'len' bytes and a second chunk of 16
-	# bytes, then fold the first chunk into the second.
-
-	movdqa	%xmm7, %xmm2
-
-	# xmm1 = last 16 original data bytes
-	movdqu	-16(buf, len), %xmm1
-	pshufb	BSWAP_MASK, %xmm1
-
-	# xmm2 = high order part of second chunk: xmm7 left-shifted by 'len' bytes.
-	lea	.Lbyteshift_table+16(%rip), %rax
-	sub	len, %rax
-	movdqu	(%rax), %xmm0
-	pshufb	%xmm0, %xmm2
-
-	# xmm7 = first chunk: xmm7 right-shifted by '16-len' bytes.
-	pxor	.Lmask1(%rip), %xmm0
-	pshufb	%xmm0, %xmm7
-
-	# xmm1 = second chunk: 'len' bytes from xmm1 (low-order bytes),
-	# then '16-len' bytes from xmm2 (high-order bytes).
-	pblendvb	%xmm2, %xmm1	#xmm0 is implicit
-
-	# Fold the first chunk into the second chunk, storing the result in xmm7.
-	movdqa	%xmm7, %xmm8
-	pclmulqdq	$0x11, FOLD_CONSTS, %xmm7
-	pclmulqdq	$0x00, FOLD_CONSTS, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm1, %xmm7
-
-.Lreduce_final_16_bytes:
-	# Reduce the 128-bit value M(x), stored in xmm7, to the final 16-bit CRC
-
-	# Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'.
-	movdqa	.Lfinal_fold_consts(%rip), FOLD_CONSTS
-
-	# Fold the high 64 bits into the low 64 bits, while also multiplying by
-	# x^64.  This produces a 128-bit value congruent to x^64 * M(x) and
-	# whose low 48 bits are 0.
-	movdqa	%xmm7, %xmm0
-	pclmulqdq	$0x11, FOLD_CONSTS, %xmm7 # high bits * x^48 * (x^80 mod G(x))
-	pslldq	$8, %xmm0
-	pxor	%xmm0, %xmm7			  # + low bits * x^64
-
-	# Fold the high 32 bits into the low 96 bits.  This produces a 96-bit
-	# value congruent to x^64 * M(x) and whose low 48 bits are 0.
-	movdqa	%xmm7, %xmm0
-	pand	.Lmask2(%rip), %xmm0		  # zero high 32 bits
-	psrldq	$12, %xmm7			  # extract high 32 bits
-	pclmulqdq	$0x00, FOLD_CONSTS, %xmm7 # high 32 bits * x^48 * (x^48 mod G(x))
-	pxor	%xmm0, %xmm7			  # + low bits
-
-	# Load G(x) and floor(x^48 / G(x)).
-	movdqa	.Lbarrett_reduction_consts(%rip), FOLD_CONSTS
-
-	# Use Barrett reduction to compute the final CRC value.
-	movdqa	%xmm7, %xmm0
-	pclmulqdq	$0x11, FOLD_CONSTS, %xmm7 # high 32 bits * floor(x^48 / G(x))
-	psrlq	$32, %xmm7			  # /= x^32
-	pclmulqdq	$0x00, FOLD_CONSTS, %xmm7 # *= G(x)
-	psrlq	$48, %xmm0
-	pxor	%xmm7, %xmm0		     # + low 16 nonzero bits
-	# Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0.
-
-	pextrw	$0, %xmm0, %eax
-	ret
-
-.align 16
-.Lless_than_256_bytes:
-	# Checksumming a buffer of length 16...255 bytes
-
-	# Load the first 16 data bytes.
-	movdqu	(buf), %xmm7
-	pshufb	BSWAP_MASK, %xmm7
-	add	$16, buf
-
-	# XOR the first 16 data *bits* with the initial CRC value.
-	pxor	%xmm0, %xmm0
-	pinsrw	$7, init_crc, %xmm0
-	pxor	%xmm0, %xmm7
-
-	movdqa	.Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS
-	cmp	$16, len
-	je	.Lreduce_final_16_bytes		# len == 16
-	sub	$32, len
-	jge	.Lfold_16_bytes_loop		# 32 <= len <= 255
-	add	$16, len
-	jmp	.Lhandle_partial_segment	# 17 <= len <= 31
-ENDPROC(crc_t10dif_pcl)
-
-.section	.rodata, "a", @progbits
-.align 16
-
-# Fold constants precomputed from the polynomial 0x18bb7
-# G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
-.Lfold_across_128_bytes_consts:
-	.quad		0x0000000000006123	# x^(8*128)	mod G(x)
-	.quad		0x0000000000002295	# x^(8*128+64)	mod G(x)
-.Lfold_across_64_bytes_consts:
-	.quad		0x0000000000001069	# x^(4*128)	mod G(x)
-	.quad		0x000000000000dd31	# x^(4*128+64)	mod G(x)
-.Lfold_across_32_bytes_consts:
-	.quad		0x000000000000857d	# x^(2*128)	mod G(x)
-	.quad		0x0000000000007acc	# x^(2*128+64)	mod G(x)
-.Lfold_across_16_bytes_consts:
-	.quad		0x000000000000a010	# x^(1*128)	mod G(x)
-	.quad		0x0000000000001faa	# x^(1*128+64)	mod G(x)
-.Lfinal_fold_consts:
-	.quad		0x1368000000000000	# x^48 * (x^48 mod G(x))
-	.quad		0x2d56000000000000	# x^48 * (x^80 mod G(x))
-.Lbarrett_reduction_consts:
-	.quad		0x0000000000018bb7	# G(x)
-	.quad		0x00000001f65a57f8	# floor(x^48 / G(x))
-
-.section	.rodata.cst16.mask1, "aM", @progbits, 16
-.align 16
-.Lmask1:
-	.octa	0x80808080808080808080808080808080
-
-.section	.rodata.cst16.mask2, "aM", @progbits, 16
-.align 16
-.Lmask2:
-	.octa	0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
-
-.section	.rodata.cst16.bswap_mask, "aM", @progbits, 16
-.align 16
-.Lbswap_mask:
-	.octa	0x000102030405060708090A0B0C0D0E0F
-
-.section	.rodata.cst32.byteshift_table, "aM", @progbits, 32
-.align 16
-# For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 - len]
-# is the index vector to shift left by 'len' bytes, and is also {0x80, ...,
-# 0x80} XOR the index vector to shift right by '16 - len' bytes.
-.Lbyteshift_table:
-	.byte		 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
-	.byte		0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
-	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
-	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe , 0x0
diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S
deleted file mode 100644
index 7fca43099a5f8818e14f9cd03c10f21b891cd45d..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/des3_ede-asm_64.S
+++ /dev/null
@@ -1,799 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * des3_ede-asm_64.S  -  x86-64 assembly implementation of 3DES cipher
- *
- * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- */
-
-#include <linux/linkage.h>
-
-.file "des3_ede-asm_64.S"
-.text
-
-#define s1 .L_s1
-#define s2 ((s1) + (64*8))
-#define s3 ((s2) + (64*8))
-#define s4 ((s3) + (64*8))
-#define s5 ((s4) + (64*8))
-#define s6 ((s5) + (64*8))
-#define s7 ((s6) + (64*8))
-#define s8 ((s7) + (64*8))
-
-/* register macros */
-#define CTX %rdi
-
-#define RL0 %r8
-#define RL1 %r9
-#define RL2 %r10
-
-#define RL0d %r8d
-#define RL1d %r9d
-#define RL2d %r10d
-
-#define RR0 %r11
-#define RR1 %r12
-#define RR2 %r13
-
-#define RR0d %r11d
-#define RR1d %r12d
-#define RR2d %r13d
-
-#define RW0 %rax
-#define RW1 %rbx
-#define RW2 %rcx
-
-#define RW0d %eax
-#define RW1d %ebx
-#define RW2d %ecx
-
-#define RW0bl %al
-#define RW1bl %bl
-#define RW2bl %cl
-
-#define RW0bh %ah
-#define RW1bh %bh
-#define RW2bh %ch
-
-#define RT0 %r15
-#define RT1 %rsi
-#define RT2 %r14
-#define RT3 %rdx
-
-#define RT0d %r15d
-#define RT1d %esi
-#define RT2d %r14d
-#define RT3d %edx
-
-/***********************************************************************
- * 1-way 3DES
- ***********************************************************************/
-#define do_permutation(a, b, offset, mask) \
-	movl a, RT0d; \
-	shrl $(offset), RT0d; \
-	xorl b, RT0d; \
-	andl $(mask), RT0d; \
-	xorl RT0d, b; \
-	shll $(offset), RT0d; \
-	xorl RT0d, a;
-
-#define expand_to_64bits(val, mask) \
-	movl val##d, RT0d; \
-	rorl $4, RT0d; \
-	shlq $32, RT0; \
-	orq RT0, val; \
-	andq mask, val;
-
-#define compress_to_64bits(val) \
-	movq val, RT0; \
-	shrq $32, RT0; \
-	roll $4, RT0d; \
-	orl RT0d, val##d;
-
-#define initial_permutation(left, right) \
-	do_permutation(left##d, right##d,  4, 0x0f0f0f0f); \
-	do_permutation(left##d, right##d, 16, 0x0000ffff); \
-	do_permutation(right##d, left##d,  2, 0x33333333); \
-	do_permutation(right##d, left##d,  8, 0x00ff00ff); \
-	movabs $0x3f3f3f3f3f3f3f3f, RT3; \
-	movl left##d, RW0d; \
-	roll $1, right##d; \
-	xorl right##d, RW0d; \
-	andl $0xaaaaaaaa, RW0d; \
-	xorl RW0d, left##d; \
-	xorl RW0d, right##d; \
-	roll $1, left##d; \
-	expand_to_64bits(right, RT3); \
-	expand_to_64bits(left, RT3);
-
-#define final_permutation(left, right) \
-	compress_to_64bits(right); \
-	compress_to_64bits(left); \
-	movl right##d, RW0d; \
-	rorl $1, left##d; \
-	xorl left##d, RW0d; \
-	andl $0xaaaaaaaa, RW0d; \
-	xorl RW0d, right##d; \
-	xorl RW0d, left##d; \
-	rorl $1, right##d; \
-	do_permutation(right##d, left##d,  8, 0x00ff00ff); \
-	do_permutation(right##d, left##d,  2, 0x33333333); \
-	do_permutation(left##d, right##d, 16, 0x0000ffff); \
-	do_permutation(left##d, right##d,  4, 0x0f0f0f0f);
-
-#define round1(n, from, to, load_next_key) \
-	xorq from, RW0; \
-	\
-	movzbl RW0bl, RT0d; \
-	movzbl RW0bh, RT1d; \
-	shrq $16, RW0; \
-	movzbl RW0bl, RT2d; \
-	movzbl RW0bh, RT3d; \
-	shrq $16, RW0; \
-	movq s8(, RT0, 8), RT0; \
-	xorq s6(, RT1, 8), to; \
-	movzbl RW0bl, RL1d; \
-	movzbl RW0bh, RT1d; \
-	shrl $16, RW0d; \
-	xorq s4(, RT2, 8), RT0; \
-	xorq s2(, RT3, 8), to; \
-	movzbl RW0bl, RT2d; \
-	movzbl RW0bh, RT3d; \
-	xorq s7(, RL1, 8), RT0; \
-	xorq s5(, RT1, 8), to; \
-	xorq s3(, RT2, 8), RT0; \
-	load_next_key(n, RW0); \
-	xorq RT0, to; \
-	xorq s1(, RT3, 8), to; \
-
-#define load_next_key(n, RWx) \
-	movq (((n) + 1) * 8)(CTX), RWx;
-
-#define dummy2(a, b) /*_*/
-
-#define read_block(io, left, right) \
-	movl    (io), left##d; \
-	movl   4(io), right##d; \
-	bswapl left##d; \
-	bswapl right##d;
-
-#define write_block(io, left, right) \
-	bswapl left##d; \
-	bswapl right##d; \
-	movl   left##d,   (io); \
-	movl   right##d, 4(io);
-
-ENTRY(des3_ede_x86_64_crypt_blk)
-	/* input:
-	 *	%rdi: round keys, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	pushq %rbx;
-	pushq %r12;
-	pushq %r13;
-	pushq %r14;
-	pushq %r15;
-
-	pushq %rsi; /* dst */
-
-	read_block(%rdx, RL0, RR0);
-	initial_permutation(RL0, RR0);
-
-	movq (CTX), RW0;
-
-	round1(0, RR0, RL0, load_next_key);
-	round1(1, RL0, RR0, load_next_key);
-	round1(2, RR0, RL0, load_next_key);
-	round1(3, RL0, RR0, load_next_key);
-	round1(4, RR0, RL0, load_next_key);
-	round1(5, RL0, RR0, load_next_key);
-	round1(6, RR0, RL0, load_next_key);
-	round1(7, RL0, RR0, load_next_key);
-	round1(8, RR0, RL0, load_next_key);
-	round1(9, RL0, RR0, load_next_key);
-	round1(10, RR0, RL0, load_next_key);
-	round1(11, RL0, RR0, load_next_key);
-	round1(12, RR0, RL0, load_next_key);
-	round1(13, RL0, RR0, load_next_key);
-	round1(14, RR0, RL0, load_next_key);
-	round1(15, RL0, RR0, load_next_key);
-
-	round1(16+0, RL0, RR0, load_next_key);
-	round1(16+1, RR0, RL0, load_next_key);
-	round1(16+2, RL0, RR0, load_next_key);
-	round1(16+3, RR0, RL0, load_next_key);
-	round1(16+4, RL0, RR0, load_next_key);
-	round1(16+5, RR0, RL0, load_next_key);
-	round1(16+6, RL0, RR0, load_next_key);
-	round1(16+7, RR0, RL0, load_next_key);
-	round1(16+8, RL0, RR0, load_next_key);
-	round1(16+9, RR0, RL0, load_next_key);
-	round1(16+10, RL0, RR0, load_next_key);
-	round1(16+11, RR0, RL0, load_next_key);
-	round1(16+12, RL0, RR0, load_next_key);
-	round1(16+13, RR0, RL0, load_next_key);
-	round1(16+14, RL0, RR0, load_next_key);
-	round1(16+15, RR0, RL0, load_next_key);
-
-	round1(32+0, RR0, RL0, load_next_key);
-	round1(32+1, RL0, RR0, load_next_key);
-	round1(32+2, RR0, RL0, load_next_key);
-	round1(32+3, RL0, RR0, load_next_key);
-	round1(32+4, RR0, RL0, load_next_key);
-	round1(32+5, RL0, RR0, load_next_key);
-	round1(32+6, RR0, RL0, load_next_key);
-	round1(32+7, RL0, RR0, load_next_key);
-	round1(32+8, RR0, RL0, load_next_key);
-	round1(32+9, RL0, RR0, load_next_key);
-	round1(32+10, RR0, RL0, load_next_key);
-	round1(32+11, RL0, RR0, load_next_key);
-	round1(32+12, RR0, RL0, load_next_key);
-	round1(32+13, RL0, RR0, load_next_key);
-	round1(32+14, RR0, RL0, load_next_key);
-	round1(32+15, RL0, RR0, dummy2);
-
-	final_permutation(RR0, RL0);
-
-	popq %rsi /* dst */
-	write_block(%rsi, RR0, RL0);
-
-	popq %r15;
-	popq %r14;
-	popq %r13;
-	popq %r12;
-	popq %rbx;
-
-	ret;
-ENDPROC(des3_ede_x86_64_crypt_blk)
-
-/***********************************************************************
- * 3-way 3DES
- ***********************************************************************/
-#define expand_to_64bits(val, mask) \
-	movl val##d, RT0d; \
-	rorl $4, RT0d; \
-	shlq $32, RT0; \
-	orq RT0, val; \
-	andq mask, val;
-
-#define compress_to_64bits(val) \
-	movq val, RT0; \
-	shrq $32, RT0; \
-	roll $4, RT0d; \
-	orl RT0d, val##d;
-
-#define initial_permutation3(left, right) \
-	do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
-	do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
-	  do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
-	  do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
-	    do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f); \
-	    do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
-	    \
-	do_permutation(right##0d, left##0d,  2, 0x33333333); \
-	do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
-	  do_permutation(right##1d, left##1d,  2, 0x33333333); \
-	  do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
-	    do_permutation(right##2d, left##2d,  2, 0x33333333); \
-	    do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
-	    \
-	movabs $0x3f3f3f3f3f3f3f3f, RT3; \
-	    \
-	movl left##0d, RW0d; \
-	roll $1, right##0d; \
-	xorl right##0d, RW0d; \
-	andl $0xaaaaaaaa, RW0d; \
-	xorl RW0d, left##0d; \
-	xorl RW0d, right##0d; \
-	roll $1, left##0d; \
-	expand_to_64bits(right##0, RT3); \
-	expand_to_64bits(left##0, RT3); \
-	  movl left##1d, RW1d; \
-	  roll $1, right##1d; \
-	  xorl right##1d, RW1d; \
-	  andl $0xaaaaaaaa, RW1d; \
-	  xorl RW1d, left##1d; \
-	  xorl RW1d, right##1d; \
-	  roll $1, left##1d; \
-	  expand_to_64bits(right##1, RT3); \
-	  expand_to_64bits(left##1, RT3); \
-	    movl left##2d, RW2d; \
-	    roll $1, right##2d; \
-	    xorl right##2d, RW2d; \
-	    andl $0xaaaaaaaa, RW2d; \
-	    xorl RW2d, left##2d; \
-	    xorl RW2d, right##2d; \
-	    roll $1, left##2d; \
-	    expand_to_64bits(right##2, RT3); \
-	    expand_to_64bits(left##2, RT3);
-
-#define final_permutation3(left, right) \
-	compress_to_64bits(right##0); \
-	compress_to_64bits(left##0); \
-	movl right##0d, RW0d; \
-	rorl $1, left##0d; \
-	xorl left##0d, RW0d; \
-	andl $0xaaaaaaaa, RW0d; \
-	xorl RW0d, right##0d; \
-	xorl RW0d, left##0d; \
-	rorl $1, right##0d; \
-	  compress_to_64bits(right##1); \
-	  compress_to_64bits(left##1); \
-	  movl right##1d, RW1d; \
-	  rorl $1, left##1d; \
-	  xorl left##1d, RW1d; \
-	  andl $0xaaaaaaaa, RW1d; \
-	  xorl RW1d, right##1d; \
-	  xorl RW1d, left##1d; \
-	  rorl $1, right##1d; \
-	    compress_to_64bits(right##2); \
-	    compress_to_64bits(left##2); \
-	    movl right##2d, RW2d; \
-	    rorl $1, left##2d; \
-	    xorl left##2d, RW2d; \
-	    andl $0xaaaaaaaa, RW2d; \
-	    xorl RW2d, right##2d; \
-	    xorl RW2d, left##2d; \
-	    rorl $1, right##2d; \
-	    \
-	do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
-	do_permutation(right##0d, left##0d,  2, 0x33333333); \
-	  do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
-	  do_permutation(right##1d, left##1d,  2, 0x33333333); \
-	    do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
-	    do_permutation(right##2d, left##2d,  2, 0x33333333); \
-	    \
-	do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
-	do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
-	  do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
-	  do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
-	    do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
-	    do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f);
-
-#define round3(n, from, to, load_next_key, do_movq) \
-	xorq from##0, RW0; \
-	movzbl RW0bl, RT3d; \
-	movzbl RW0bh, RT1d; \
-	shrq $16, RW0; \
-	xorq s8(, RT3, 8), to##0; \
-	xorq s6(, RT1, 8), to##0; \
-	movzbl RW0bl, RT3d; \
-	movzbl RW0bh, RT1d; \
-	shrq $16, RW0; \
-	xorq s4(, RT3, 8), to##0; \
-	xorq s2(, RT1, 8), to##0; \
-	movzbl RW0bl, RT3d; \
-	movzbl RW0bh, RT1d; \
-	shrl $16, RW0d; \
-	xorq s7(, RT3, 8), to##0; \
-	xorq s5(, RT1, 8), to##0; \
-	movzbl RW0bl, RT3d; \
-	movzbl RW0bh, RT1d; \
-	load_next_key(n, RW0); \
-	xorq s3(, RT3, 8), to##0; \
-	xorq s1(, RT1, 8), to##0; \
-		xorq from##1, RW1; \
-		movzbl RW1bl, RT3d; \
-		movzbl RW1bh, RT1d; \
-		shrq $16, RW1; \
-		xorq s8(, RT3, 8), to##1; \
-		xorq s6(, RT1, 8), to##1; \
-		movzbl RW1bl, RT3d; \
-		movzbl RW1bh, RT1d; \
-		shrq $16, RW1; \
-		xorq s4(, RT3, 8), to##1; \
-		xorq s2(, RT1, 8), to##1; \
-		movzbl RW1bl, RT3d; \
-		movzbl RW1bh, RT1d; \
-		shrl $16, RW1d; \
-		xorq s7(, RT3, 8), to##1; \
-		xorq s5(, RT1, 8), to##1; \
-		movzbl RW1bl, RT3d; \
-		movzbl RW1bh, RT1d; \
-		do_movq(RW0, RW1); \
-		xorq s3(, RT3, 8), to##1; \
-		xorq s1(, RT1, 8), to##1; \
-			xorq from##2, RW2; \
-			movzbl RW2bl, RT3d; \
-			movzbl RW2bh, RT1d; \
-			shrq $16, RW2; \
-			xorq s8(, RT3, 8), to##2; \
-			xorq s6(, RT1, 8), to##2; \
-			movzbl RW2bl, RT3d; \
-			movzbl RW2bh, RT1d; \
-			shrq $16, RW2; \
-			xorq s4(, RT3, 8), to##2; \
-			xorq s2(, RT1, 8), to##2; \
-			movzbl RW2bl, RT3d; \
-			movzbl RW2bh, RT1d; \
-			shrl $16, RW2d; \
-			xorq s7(, RT3, 8), to##2; \
-			xorq s5(, RT1, 8), to##2; \
-			movzbl RW2bl, RT3d; \
-			movzbl RW2bh, RT1d; \
-			do_movq(RW0, RW2); \
-			xorq s3(, RT3, 8), to##2; \
-			xorq s1(, RT1, 8), to##2;
-
-#define __movq(src, dst) \
-	movq src, dst;
-
-ENTRY(des3_ede_x86_64_crypt_blk_3way)
-	/* input:
-	 *	%rdi: ctx, round keys
-	 *	%rsi: dst (3 blocks)
-	 *	%rdx: src (3 blocks)
-	 */
-
-	pushq %rbx;
-	pushq %r12;
-	pushq %r13;
-	pushq %r14;
-	pushq %r15;
-
-	pushq %rsi /* dst */
-
-	/* load input */
-	movl 0 * 4(%rdx), RL0d;
-	movl 1 * 4(%rdx), RR0d;
-	movl 2 * 4(%rdx), RL1d;
-	movl 3 * 4(%rdx), RR1d;
-	movl 4 * 4(%rdx), RL2d;
-	movl 5 * 4(%rdx), RR2d;
-
-	bswapl RL0d;
-	bswapl RR0d;
-	bswapl RL1d;
-	bswapl RR1d;
-	bswapl RL2d;
-	bswapl RR2d;
-
-	initial_permutation3(RL, RR);
-
-	movq 0(CTX), RW0;
-	movq RW0, RW1;
-	movq RW0, RW2;
-
-	round3(0, RR, RL, load_next_key, __movq);
-	round3(1, RL, RR, load_next_key, __movq);
-	round3(2, RR, RL, load_next_key, __movq);
-	round3(3, RL, RR, load_next_key, __movq);
-	round3(4, RR, RL, load_next_key, __movq);
-	round3(5, RL, RR, load_next_key, __movq);
-	round3(6, RR, RL, load_next_key, __movq);
-	round3(7, RL, RR, load_next_key, __movq);
-	round3(8, RR, RL, load_next_key, __movq);
-	round3(9, RL, RR, load_next_key, __movq);
-	round3(10, RR, RL, load_next_key, __movq);
-	round3(11, RL, RR, load_next_key, __movq);
-	round3(12, RR, RL, load_next_key, __movq);
-	round3(13, RL, RR, load_next_key, __movq);
-	round3(14, RR, RL, load_next_key, __movq);
-	round3(15, RL, RR, load_next_key, __movq);
-
-	round3(16+0, RL, RR, load_next_key, __movq);
-	round3(16+1, RR, RL, load_next_key, __movq);
-	round3(16+2, RL, RR, load_next_key, __movq);
-	round3(16+3, RR, RL, load_next_key, __movq);
-	round3(16+4, RL, RR, load_next_key, __movq);
-	round3(16+5, RR, RL, load_next_key, __movq);
-	round3(16+6, RL, RR, load_next_key, __movq);
-	round3(16+7, RR, RL, load_next_key, __movq);
-	round3(16+8, RL, RR, load_next_key, __movq);
-	round3(16+9, RR, RL, load_next_key, __movq);
-	round3(16+10, RL, RR, load_next_key, __movq);
-	round3(16+11, RR, RL, load_next_key, __movq);
-	round3(16+12, RL, RR, load_next_key, __movq);
-	round3(16+13, RR, RL, load_next_key, __movq);
-	round3(16+14, RL, RR, load_next_key, __movq);
-	round3(16+15, RR, RL, load_next_key, __movq);
-
-	round3(32+0, RR, RL, load_next_key, __movq);
-	round3(32+1, RL, RR, load_next_key, __movq);
-	round3(32+2, RR, RL, load_next_key, __movq);
-	round3(32+3, RL, RR, load_next_key, __movq);
-	round3(32+4, RR, RL, load_next_key, __movq);
-	round3(32+5, RL, RR, load_next_key, __movq);
-	round3(32+6, RR, RL, load_next_key, __movq);
-	round3(32+7, RL, RR, load_next_key, __movq);
-	round3(32+8, RR, RL, load_next_key, __movq);
-	round3(32+9, RL, RR, load_next_key, __movq);
-	round3(32+10, RR, RL, load_next_key, __movq);
-	round3(32+11, RL, RR, load_next_key, __movq);
-	round3(32+12, RR, RL, load_next_key, __movq);
-	round3(32+13, RL, RR, load_next_key, __movq);
-	round3(32+14, RR, RL, load_next_key, __movq);
-	round3(32+15, RL, RR, dummy2, dummy2);
-
-	final_permutation3(RR, RL);
-
-	bswapl RR0d;
-	bswapl RL0d;
-	bswapl RR1d;
-	bswapl RL1d;
-	bswapl RR2d;
-	bswapl RL2d;
-
-	popq %rsi /* dst */
-	movl RR0d, 0 * 4(%rsi);
-	movl RL0d, 1 * 4(%rsi);
-	movl RR1d, 2 * 4(%rsi);
-	movl RL1d, 3 * 4(%rsi);
-	movl RR2d, 4 * 4(%rsi);
-	movl RL2d, 5 * 4(%rsi);
-
-	popq %r15;
-	popq %r14;
-	popq %r13;
-	popq %r12;
-	popq %rbx;
-
-	ret;
-ENDPROC(des3_ede_x86_64_crypt_blk_3way)
-
-.section	.rodata, "a", @progbits
-.align 16
-.L_s1:
-	.quad 0x0010100001010400, 0x0000000000000000
-	.quad 0x0000100000010000, 0x0010100001010404
-	.quad 0x0010100001010004, 0x0000100000010404
-	.quad 0x0000000000000004, 0x0000100000010000
-	.quad 0x0000000000000400, 0x0010100001010400
-	.quad 0x0010100001010404, 0x0000000000000400
-	.quad 0x0010000001000404, 0x0010100001010004
-	.quad 0x0010000001000000, 0x0000000000000004
-	.quad 0x0000000000000404, 0x0010000001000400
-	.quad 0x0010000001000400, 0x0000100000010400
-	.quad 0x0000100000010400, 0x0010100001010000
-	.quad 0x0010100001010000, 0x0010000001000404
-	.quad 0x0000100000010004, 0x0010000001000004
-	.quad 0x0010000001000004, 0x0000100000010004
-	.quad 0x0000000000000000, 0x0000000000000404
-	.quad 0x0000100000010404, 0x0010000001000000
-	.quad 0x0000100000010000, 0x0010100001010404
-	.quad 0x0000000000000004, 0x0010100001010000
-	.quad 0x0010100001010400, 0x0010000001000000
-	.quad 0x0010000001000000, 0x0000000000000400
-	.quad 0x0010100001010004, 0x0000100000010000
-	.quad 0x0000100000010400, 0x0010000001000004
-	.quad 0x0000000000000400, 0x0000000000000004
-	.quad 0x0010000001000404, 0x0000100000010404
-	.quad 0x0010100001010404, 0x0000100000010004
-	.quad 0x0010100001010000, 0x0010000001000404
-	.quad 0x0010000001000004, 0x0000000000000404
-	.quad 0x0000100000010404, 0x0010100001010400
-	.quad 0x0000000000000404, 0x0010000001000400
-	.quad 0x0010000001000400, 0x0000000000000000
-	.quad 0x0000100000010004, 0x0000100000010400
-	.quad 0x0000000000000000, 0x0010100001010004
-.L_s2:
-	.quad 0x0801080200100020, 0x0800080000000000
-	.quad 0x0000080000000000, 0x0001080200100020
-	.quad 0x0001000000100000, 0x0000000200000020
-	.quad 0x0801000200100020, 0x0800080200000020
-	.quad 0x0800000200000020, 0x0801080200100020
-	.quad 0x0801080000100000, 0x0800000000000000
-	.quad 0x0800080000000000, 0x0001000000100000
-	.quad 0x0000000200000020, 0x0801000200100020
-	.quad 0x0001080000100000, 0x0001000200100020
-	.quad 0x0800080200000020, 0x0000000000000000
-	.quad 0x0800000000000000, 0x0000080000000000
-	.quad 0x0001080200100020, 0x0801000000100000
-	.quad 0x0001000200100020, 0x0800000200000020
-	.quad 0x0000000000000000, 0x0001080000100000
-	.quad 0x0000080200000020, 0x0801080000100000
-	.quad 0x0801000000100000, 0x0000080200000020
-	.quad 0x0000000000000000, 0x0001080200100020
-	.quad 0x0801000200100020, 0x0001000000100000
-	.quad 0x0800080200000020, 0x0801000000100000
-	.quad 0x0801080000100000, 0x0000080000000000
-	.quad 0x0801000000100000, 0x0800080000000000
-	.quad 0x0000000200000020, 0x0801080200100020
-	.quad 0x0001080200100020, 0x0000000200000020
-	.quad 0x0000080000000000, 0x0800000000000000
-	.quad 0x0000080200000020, 0x0801080000100000
-	.quad 0x0001000000100000, 0x0800000200000020
-	.quad 0x0001000200100020, 0x0800080200000020
-	.quad 0x0800000200000020, 0x0001000200100020
-	.quad 0x0001080000100000, 0x0000000000000000
-	.quad 0x0800080000000000, 0x0000080200000020
-	.quad 0x0800000000000000, 0x0801000200100020
-	.quad 0x0801080200100020, 0x0001080000100000
-.L_s3:
-	.quad 0x0000002000000208, 0x0000202008020200
-	.quad 0x0000000000000000, 0x0000200008020008
-	.quad 0x0000002008000200, 0x0000000000000000
-	.quad 0x0000202000020208, 0x0000002008000200
-	.quad 0x0000200000020008, 0x0000000008000008
-	.quad 0x0000000008000008, 0x0000200000020000
-	.quad 0x0000202008020208, 0x0000200000020008
-	.quad 0x0000200008020000, 0x0000002000000208
-	.quad 0x0000000008000000, 0x0000000000000008
-	.quad 0x0000202008020200, 0x0000002000000200
-	.quad 0x0000202000020200, 0x0000200008020000
-	.quad 0x0000200008020008, 0x0000202000020208
-	.quad 0x0000002008000208, 0x0000202000020200
-	.quad 0x0000200000020000, 0x0000002008000208
-	.quad 0x0000000000000008, 0x0000202008020208
-	.quad 0x0000002000000200, 0x0000000008000000
-	.quad 0x0000202008020200, 0x0000000008000000
-	.quad 0x0000200000020008, 0x0000002000000208
-	.quad 0x0000200000020000, 0x0000202008020200
-	.quad 0x0000002008000200, 0x0000000000000000
-	.quad 0x0000002000000200, 0x0000200000020008
-	.quad 0x0000202008020208, 0x0000002008000200
-	.quad 0x0000000008000008, 0x0000002000000200
-	.quad 0x0000000000000000, 0x0000200008020008
-	.quad 0x0000002008000208, 0x0000200000020000
-	.quad 0x0000000008000000, 0x0000202008020208
-	.quad 0x0000000000000008, 0x0000202000020208
-	.quad 0x0000202000020200, 0x0000000008000008
-	.quad 0x0000200008020000, 0x0000002008000208
-	.quad 0x0000002000000208, 0x0000200008020000
-	.quad 0x0000202000020208, 0x0000000000000008
-	.quad 0x0000200008020008, 0x0000202000020200
-.L_s4:
-	.quad 0x1008020000002001, 0x1000020800002001
-	.quad 0x1000020800002001, 0x0000000800000000
-	.quad 0x0008020800002000, 0x1008000800000001
-	.quad 0x1008000000000001, 0x1000020000002001
-	.quad 0x0000000000000000, 0x0008020000002000
-	.quad 0x0008020000002000, 0x1008020800002001
-	.quad 0x1000000800000001, 0x0000000000000000
-	.quad 0x0008000800000000, 0x1008000000000001
-	.quad 0x1000000000000001, 0x0000020000002000
-	.quad 0x0008000000000000, 0x1008020000002001
-	.quad 0x0000000800000000, 0x0008000000000000
-	.quad 0x1000020000002001, 0x0000020800002000
-	.quad 0x1008000800000001, 0x1000000000000001
-	.quad 0x0000020800002000, 0x0008000800000000
-	.quad 0x0000020000002000, 0x0008020800002000
-	.quad 0x1008020800002001, 0x1000000800000001
-	.quad 0x0008000800000000, 0x1008000000000001
-	.quad 0x0008020000002000, 0x1008020800002001
-	.quad 0x1000000800000001, 0x0000000000000000
-	.quad 0x0000000000000000, 0x0008020000002000
-	.quad 0x0000020800002000, 0x0008000800000000
-	.quad 0x1008000800000001, 0x1000000000000001
-	.quad 0x1008020000002001, 0x1000020800002001
-	.quad 0x1000020800002001, 0x0000000800000000
-	.quad 0x1008020800002001, 0x1000000800000001
-	.quad 0x1000000000000001, 0x0000020000002000
-	.quad 0x1008000000000001, 0x1000020000002001
-	.quad 0x0008020800002000, 0x1008000800000001
-	.quad 0x1000020000002001, 0x0000020800002000
-	.quad 0x0008000000000000, 0x1008020000002001
-	.quad 0x0000000800000000, 0x0008000000000000
-	.quad 0x0000020000002000, 0x0008020800002000
-.L_s5:
-	.quad 0x0000001000000100, 0x0020001002080100
-	.quad 0x0020000002080000, 0x0420001002000100
-	.quad 0x0000000000080000, 0x0000001000000100
-	.quad 0x0400000000000000, 0x0020000002080000
-	.quad 0x0400001000080100, 0x0000000000080000
-	.quad 0x0020001002000100, 0x0400001000080100
-	.quad 0x0420001002000100, 0x0420000002080000
-	.quad 0x0000001000080100, 0x0400000000000000
-	.quad 0x0020000002000000, 0x0400000000080000
-	.quad 0x0400000000080000, 0x0000000000000000
-	.quad 0x0400001000000100, 0x0420001002080100
-	.quad 0x0420001002080100, 0x0020001002000100
-	.quad 0x0420000002080000, 0x0400001000000100
-	.quad 0x0000000000000000, 0x0420000002000000
-	.quad 0x0020001002080100, 0x0020000002000000
-	.quad 0x0420000002000000, 0x0000001000080100
-	.quad 0x0000000000080000, 0x0420001002000100
-	.quad 0x0000001000000100, 0x0020000002000000
-	.quad 0x0400000000000000, 0x0020000002080000
-	.quad 0x0420001002000100, 0x0400001000080100
-	.quad 0x0020001002000100, 0x0400000000000000
-	.quad 0x0420000002080000, 0x0020001002080100
-	.quad 0x0400001000080100, 0x0000001000000100
-	.quad 0x0020000002000000, 0x0420000002080000
-	.quad 0x0420001002080100, 0x0000001000080100
-	.quad 0x0420000002000000, 0x0420001002080100
-	.quad 0x0020000002080000, 0x0000000000000000
-	.quad 0x0400000000080000, 0x0420000002000000
-	.quad 0x0000001000080100, 0x0020001002000100
-	.quad 0x0400001000000100, 0x0000000000080000
-	.quad 0x0000000000000000, 0x0400000000080000
-	.quad 0x0020001002080100, 0x0400001000000100
-.L_s6:
-	.quad 0x0200000120000010, 0x0204000020000000
-	.quad 0x0000040000000000, 0x0204040120000010
-	.quad 0x0204000020000000, 0x0000000100000010
-	.quad 0x0204040120000010, 0x0004000000000000
-	.quad 0x0200040020000000, 0x0004040100000010
-	.quad 0x0004000000000000, 0x0200000120000010
-	.quad 0x0004000100000010, 0x0200040020000000
-	.quad 0x0200000020000000, 0x0000040100000010
-	.quad 0x0000000000000000, 0x0004000100000010
-	.quad 0x0200040120000010, 0x0000040000000000
-	.quad 0x0004040000000000, 0x0200040120000010
-	.quad 0x0000000100000010, 0x0204000120000010
-	.quad 0x0204000120000010, 0x0000000000000000
-	.quad 0x0004040100000010, 0x0204040020000000
-	.quad 0x0000040100000010, 0x0004040000000000
-	.quad 0x0204040020000000, 0x0200000020000000
-	.quad 0x0200040020000000, 0x0000000100000010
-	.quad 0x0204000120000010, 0x0004040000000000
-	.quad 0x0204040120000010, 0x0004000000000000
-	.quad 0x0000040100000010, 0x0200000120000010
-	.quad 0x0004000000000000, 0x0200040020000000
-	.quad 0x0200000020000000, 0x0000040100000010
-	.quad 0x0200000120000010, 0x0204040120000010
-	.quad 0x0004040000000000, 0x0204000020000000
-	.quad 0x0004040100000010, 0x0204040020000000
-	.quad 0x0000000000000000, 0x0204000120000010
-	.quad 0x0000000100000010, 0x0000040000000000
-	.quad 0x0204000020000000, 0x0004040100000010
-	.quad 0x0000040000000000, 0x0004000100000010
-	.quad 0x0200040120000010, 0x0000000000000000
-	.quad 0x0204040020000000, 0x0200000020000000
-	.quad 0x0004000100000010, 0x0200040120000010
-.L_s7:
-	.quad 0x0002000000200000, 0x2002000004200002
-	.quad 0x2000000004000802, 0x0000000000000000
-	.quad 0x0000000000000800, 0x2000000004000802
-	.quad 0x2002000000200802, 0x0002000004200800
-	.quad 0x2002000004200802, 0x0002000000200000
-	.quad 0x0000000000000000, 0x2000000004000002
-	.quad 0x2000000000000002, 0x0000000004000000
-	.quad 0x2002000004200002, 0x2000000000000802
-	.quad 0x0000000004000800, 0x2002000000200802
-	.quad 0x2002000000200002, 0x0000000004000800
-	.quad 0x2000000004000002, 0x0002000004200000
-	.quad 0x0002000004200800, 0x2002000000200002
-	.quad 0x0002000004200000, 0x0000000000000800
-	.quad 0x2000000000000802, 0x2002000004200802
-	.quad 0x0002000000200800, 0x2000000000000002
-	.quad 0x0000000004000000, 0x0002000000200800
-	.quad 0x0000000004000000, 0x0002000000200800
-	.quad 0x0002000000200000, 0x2000000004000802
-	.quad 0x2000000004000802, 0x2002000004200002
-	.quad 0x2002000004200002, 0x2000000000000002
-	.quad 0x2002000000200002, 0x0000000004000000
-	.quad 0x0000000004000800, 0x0002000000200000
-	.quad 0x0002000004200800, 0x2000000000000802
-	.quad 0x2002000000200802, 0x0002000004200800
-	.quad 0x2000000000000802, 0x2000000004000002
-	.quad 0x2002000004200802, 0x0002000004200000
-	.quad 0x0002000000200800, 0x0000000000000000
-	.quad 0x2000000000000002, 0x2002000004200802
-	.quad 0x0000000000000000, 0x2002000000200802
-	.quad 0x0002000004200000, 0x0000000000000800
-	.quad 0x2000000004000002, 0x0000000004000800
-	.quad 0x0000000000000800, 0x2002000000200002
-.L_s8:
-	.quad 0x0100010410001000, 0x0000010000001000
-	.quad 0x0000000000040000, 0x0100010410041000
-	.quad 0x0100000010000000, 0x0100010410001000
-	.quad 0x0000000400000000, 0x0100000010000000
-	.quad 0x0000000400040000, 0x0100000010040000
-	.quad 0x0100010410041000, 0x0000010000041000
-	.quad 0x0100010010041000, 0x0000010400041000
-	.quad 0x0000010000001000, 0x0000000400000000
-	.quad 0x0100000010040000, 0x0100000410000000
-	.quad 0x0100010010001000, 0x0000010400001000
-	.quad 0x0000010000041000, 0x0000000400040000
-	.quad 0x0100000410040000, 0x0100010010041000
-	.quad 0x0000010400001000, 0x0000000000000000
-	.quad 0x0000000000000000, 0x0100000410040000
-	.quad 0x0100000410000000, 0x0100010010001000
-	.quad 0x0000010400041000, 0x0000000000040000
-	.quad 0x0000010400041000, 0x0000000000040000
-	.quad 0x0100010010041000, 0x0000010000001000
-	.quad 0x0000000400000000, 0x0100000410040000
-	.quad 0x0000010000001000, 0x0000010400041000
-	.quad 0x0100010010001000, 0x0000000400000000
-	.quad 0x0100000410000000, 0x0100000010040000
-	.quad 0x0100000410040000, 0x0100000010000000
-	.quad 0x0000000000040000, 0x0100010410001000
-	.quad 0x0000000000000000, 0x0100010410041000
-	.quad 0x0000000400040000, 0x0100000410000000
-	.quad 0x0100000010040000, 0x0100010010001000
-	.quad 0x0100010410001000, 0x0000000000000000
-	.quad 0x0100010410041000, 0x0000010000041000
-	.quad 0x0000010000041000, 0x0000010400001000
-	.quad 0x0000010400001000, 0x0000000400040000
-	.quad 0x0100000010000000, 0x0100010010041000
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
deleted file mode 100644
index 5d53effe8abee9dca99e407f4bab5efd4b18d307..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ /dev/null
@@ -1,133 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
- * instructions. This file contains accelerated part of ghash
- * implementation. More information about PCLMULQDQ can be found at:
- *
- * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
- *
- * Copyright (c) 2009 Intel Corp.
- *   Author: Huang Ying <ying.huang@intel.com>
- *	     Vinodh Gopal
- *	     Erdinc Ozturk
- *	     Deniz Karakoyunlu
- */
-
-#include <linux/linkage.h>
-#include <asm/inst.h>
-#include <asm/frame.h>
-
-.section	.rodata.cst16.bswap_mask, "aM", @progbits, 16
-.align 16
-.Lbswap_mask:
-	.octa 0x000102030405060708090a0b0c0d0e0f
-
-#define DATA	%xmm0
-#define SHASH	%xmm1
-#define T1	%xmm2
-#define T2	%xmm3
-#define T3	%xmm4
-#define BSWAP	%xmm5
-#define IN1	%xmm6
-
-.text
-
-/*
- * __clmul_gf128mul_ble:	internal ABI
- * input:
- *	DATA:			operand1
- *	SHASH:			operand2, hash_key << 1 mod poly
- * output:
- *	DATA:			operand1 * operand2 mod poly
- * changed:
- *	T1
- *	T2
- *	T3
- */
-__clmul_gf128mul_ble:
-	movaps DATA, T1
-	pshufd $0b01001110, DATA, T2
-	pshufd $0b01001110, SHASH, T3
-	pxor DATA, T2
-	pxor SHASH, T3
-
-	PCLMULQDQ 0x00 SHASH DATA	# DATA = a0 * b0
-	PCLMULQDQ 0x11 SHASH T1		# T1 = a1 * b1
-	PCLMULQDQ 0x00 T3 T2		# T2 = (a1 + a0) * (b1 + b0)
-	pxor DATA, T2
-	pxor T1, T2			# T2 = a0 * b1 + a1 * b0
-
-	movaps T2, T3
-	pslldq $8, T3
-	psrldq $8, T2
-	pxor T3, DATA
-	pxor T2, T1			# <T1:DATA> is result of
-					# carry-less multiplication
-
-	# first phase of the reduction
-	movaps DATA, T3
-	psllq $1, T3
-	pxor DATA, T3
-	psllq $5, T3
-	pxor DATA, T3
-	psllq $57, T3
-	movaps T3, T2
-	pslldq $8, T2
-	psrldq $8, T3
-	pxor T2, DATA
-	pxor T3, T1
-
-	# second phase of the reduction
-	movaps DATA, T2
-	psrlq $5, T2
-	pxor DATA, T2
-	psrlq $1, T2
-	pxor DATA, T2
-	psrlq $1, T2
-	pxor T2, T1
-	pxor T1, DATA
-	ret
-ENDPROC(__clmul_gf128mul_ble)
-
-/* void clmul_ghash_mul(char *dst, const u128 *shash) */
-ENTRY(clmul_ghash_mul)
-	FRAME_BEGIN
-	movups (%rdi), DATA
-	movups (%rsi), SHASH
-	movaps .Lbswap_mask, BSWAP
-	PSHUFB_XMM BSWAP DATA
-	call __clmul_gf128mul_ble
-	PSHUFB_XMM BSWAP DATA
-	movups DATA, (%rdi)
-	FRAME_END
-	ret
-ENDPROC(clmul_ghash_mul)
-
-/*
- * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
- *			   const u128 *shash);
- */
-ENTRY(clmul_ghash_update)
-	FRAME_BEGIN
-	cmp $16, %rdx
-	jb .Lupdate_just_ret	# check length
-	movaps .Lbswap_mask, BSWAP
-	movups (%rdi), DATA
-	movups (%rcx), SHASH
-	PSHUFB_XMM BSWAP DATA
-.align 4
-.Lupdate_loop:
-	movups (%rsi), IN1
-	PSHUFB_XMM BSWAP IN1
-	pxor IN1, DATA
-	call __clmul_gf128mul_ble
-	sub $16, %rdx
-	add $16, %rsi
-	cmp $16, %rdx
-	jge .Lupdate_loop
-	PSHUFB_XMM BSWAP DATA
-	movups DATA, (%rdi)
-.Lupdate_just_ret:
-	FRAME_END
-	ret
-ENDPROC(clmul_ghash_update)
diff --git a/arch/x86/crypto/glue_helper-asm-avx.S b/arch/x86/crypto/glue_helper-asm-avx.S
deleted file mode 100644
index d08fc575ef7f8e3364ac5ba62152289baa1ab132..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/glue_helper-asm-avx.S
+++ /dev/null
@@ -1,140 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Shared glue code for 128bit block ciphers, AVX assembler macros
- *
- * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- */
-
-#define load_8way(src, x0, x1, x2, x3, x4, x5, x6, x7) \
-	vmovdqu (0*16)(src), x0; \
-	vmovdqu (1*16)(src), x1; \
-	vmovdqu (2*16)(src), x2; \
-	vmovdqu (3*16)(src), x3; \
-	vmovdqu (4*16)(src), x4; \
-	vmovdqu (5*16)(src), x5; \
-	vmovdqu (6*16)(src), x6; \
-	vmovdqu (7*16)(src), x7;
-
-#define store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
-	vmovdqu x0, (0*16)(dst); \
-	vmovdqu x1, (1*16)(dst); \
-	vmovdqu x2, (2*16)(dst); \
-	vmovdqu x3, (3*16)(dst); \
-	vmovdqu x4, (4*16)(dst); \
-	vmovdqu x5, (5*16)(dst); \
-	vmovdqu x6, (6*16)(dst); \
-	vmovdqu x7, (7*16)(dst);
-
-#define store_cbc_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
-	vpxor (0*16)(src), x1, x1; \
-	vpxor (1*16)(src), x2, x2; \
-	vpxor (2*16)(src), x3, x3; \
-	vpxor (3*16)(src), x4, x4; \
-	vpxor (4*16)(src), x5, x5; \
-	vpxor (5*16)(src), x6, x6; \
-	vpxor (6*16)(src), x7, x7; \
-	store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
-
-#define inc_le128(x, minus_one, tmp) \
-	vpcmpeqq minus_one, x, tmp; \
-	vpsubq minus_one, x, x; \
-	vpslldq $8, tmp, tmp; \
-	vpsubq tmp, x, x;
-
-#define load_ctr_8way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2) \
-	vpcmpeqd t0, t0, t0; \
-	vpsrldq $8, t0, t0; /* low: -1, high: 0 */ \
-	vmovdqa bswap, t1; \
-	\
-	/* load IV and byteswap */ \
-	vmovdqu (iv), x7; \
-	vpshufb t1, x7, x0; \
-	\
-	/* construct IVs */ \
-	inc_le128(x7, t0, t2); \
-	vpshufb t1, x7, x1; \
-	inc_le128(x7, t0, t2); \
-	vpshufb t1, x7, x2; \
-	inc_le128(x7, t0, t2); \
-	vpshufb t1, x7, x3; \
-	inc_le128(x7, t0, t2); \
-	vpshufb t1, x7, x4; \
-	inc_le128(x7, t0, t2); \
-	vpshufb t1, x7, x5; \
-	inc_le128(x7, t0, t2); \
-	vpshufb t1, x7, x6; \
-	inc_le128(x7, t0, t2); \
-	vmovdqa x7, t2; \
-	vpshufb t1, x7, x7; \
-	inc_le128(t2, t0, t1); \
-	vmovdqu t2, (iv);
-
-#define store_ctr_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
-	vpxor (0*16)(src), x0, x0; \
-	vpxor (1*16)(src), x1, x1; \
-	vpxor (2*16)(src), x2, x2; \
-	vpxor (3*16)(src), x3, x3; \
-	vpxor (4*16)(src), x4, x4; \
-	vpxor (5*16)(src), x5, x5; \
-	vpxor (6*16)(src), x6, x6; \
-	vpxor (7*16)(src), x7, x7; \
-	store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
-
-#define gf128mul_x_ble(iv, mask, tmp) \
-	vpsrad $31, iv, tmp; \
-	vpaddq iv, iv, iv; \
-	vpshufd $0x13, tmp, tmp; \
-	vpand mask, tmp, tmp; \
-	vpxor tmp, iv, iv;
-
-#define load_xts_8way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, t0, \
-		      t1, xts_gf128mul_and_shl1_mask) \
-	vmovdqa xts_gf128mul_and_shl1_mask, t0; \
-	\
-	/* load IV */ \
-	vmovdqu (iv), tiv; \
-	vpxor (0*16)(src), tiv, x0; \
-	vmovdqu tiv, (0*16)(dst); \
-	\
-	/* construct and store IVs, also xor with source */ \
-	gf128mul_x_ble(tiv, t0, t1); \
-	vpxor (1*16)(src), tiv, x1; \
-	vmovdqu tiv, (1*16)(dst); \
-	\
-	gf128mul_x_ble(tiv, t0, t1); \
-	vpxor (2*16)(src), tiv, x2; \
-	vmovdqu tiv, (2*16)(dst); \
-	\
-	gf128mul_x_ble(tiv, t0, t1); \
-	vpxor (3*16)(src), tiv, x3; \
-	vmovdqu tiv, (3*16)(dst); \
-	\
-	gf128mul_x_ble(tiv, t0, t1); \
-	vpxor (4*16)(src), tiv, x4; \
-	vmovdqu tiv, (4*16)(dst); \
-	\
-	gf128mul_x_ble(tiv, t0, t1); \
-	vpxor (5*16)(src), tiv, x5; \
-	vmovdqu tiv, (5*16)(dst); \
-	\
-	gf128mul_x_ble(tiv, t0, t1); \
-	vpxor (6*16)(src), tiv, x6; \
-	vmovdqu tiv, (6*16)(dst); \
-	\
-	gf128mul_x_ble(tiv, t0, t1); \
-	vpxor (7*16)(src), tiv, x7; \
-	vmovdqu tiv, (7*16)(dst); \
-	\
-	gf128mul_x_ble(tiv, t0, t1); \
-	vmovdqu tiv, (iv);
-
-#define store_xts_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
-	vpxor (0*16)(dst), x0, x0; \
-	vpxor (1*16)(dst), x1, x1; \
-	vpxor (2*16)(dst), x2, x2; \
-	vpxor (3*16)(dst), x3, x3; \
-	vpxor (4*16)(dst), x4, x4; \
-	vpxor (5*16)(dst), x5, x5; \
-	vpxor (6*16)(dst), x6, x6; \
-	vpxor (7*16)(dst), x7, x7; \
-	store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
diff --git a/arch/x86/crypto/glue_helper-asm-avx2.S b/arch/x86/crypto/glue_helper-asm-avx2.S
deleted file mode 100644
index d84508c85c136030c36d03be6b22b62fad664ea5..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/glue_helper-asm-avx2.S
+++ /dev/null
@@ -1,175 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Shared glue code for 128bit block ciphers, AVX2 assembler macros
- *
- * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- */
-
-#define load_16way(src, x0, x1, x2, x3, x4, x5, x6, x7) \
-	vmovdqu (0*32)(src), x0; \
-	vmovdqu (1*32)(src), x1; \
-	vmovdqu (2*32)(src), x2; \
-	vmovdqu (3*32)(src), x3; \
-	vmovdqu (4*32)(src), x4; \
-	vmovdqu (5*32)(src), x5; \
-	vmovdqu (6*32)(src), x6; \
-	vmovdqu (7*32)(src), x7;
-
-#define store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
-	vmovdqu x0, (0*32)(dst); \
-	vmovdqu x1, (1*32)(dst); \
-	vmovdqu x2, (2*32)(dst); \
-	vmovdqu x3, (3*32)(dst); \
-	vmovdqu x4, (4*32)(dst); \
-	vmovdqu x5, (5*32)(dst); \
-	vmovdqu x6, (6*32)(dst); \
-	vmovdqu x7, (7*32)(dst);
-
-#define store_cbc_16way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7, t0) \
-	vpxor t0, t0, t0; \
-	vinserti128 $1, (src), t0, t0; \
-	vpxor t0, x0, x0; \
-	vpxor (0*32+16)(src), x1, x1; \
-	vpxor (1*32+16)(src), x2, x2; \
-	vpxor (2*32+16)(src), x3, x3; \
-	vpxor (3*32+16)(src), x4, x4; \
-	vpxor (4*32+16)(src), x5, x5; \
-	vpxor (5*32+16)(src), x6, x6; \
-	vpxor (6*32+16)(src), x7, x7; \
-	store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
-
-#define inc_le128(x, minus_one, tmp) \
-	vpcmpeqq minus_one, x, tmp; \
-	vpsubq minus_one, x, x; \
-	vpslldq $8, tmp, tmp; \
-	vpsubq tmp, x, x;
-
-#define add2_le128(x, minus_one, minus_two, tmp1, tmp2) \
-	vpcmpeqq minus_one, x, tmp1; \
-	vpcmpeqq minus_two, x, tmp2; \
-	vpsubq minus_two, x, x; \
-	vpor tmp2, tmp1, tmp1; \
-	vpslldq $8, tmp1, tmp1; \
-	vpsubq tmp1, x, x;
-
-#define load_ctr_16way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t0x, t1, \
-		       t1x, t2, t2x, t3, t3x, t4, t5) \
-	vpcmpeqd t0, t0, t0; \
-	vpsrldq $8, t0, t0; /* ab: -1:0 ; cd: -1:0 */ \
-	vpaddq t0, t0, t4; /* ab: -2:0 ; cd: -2:0 */\
-	\
-	/* load IV and byteswap */ \
-	vmovdqu (iv), t2x; \
-	vmovdqa t2x, t3x; \
-	inc_le128(t2x, t0x, t1x); \
-	vbroadcasti128 bswap, t1; \
-	vinserti128 $1, t2x, t3, t2; /* ab: le0 ; cd: le1 */ \
-	vpshufb t1, t2, x0; \
-	\
-	/* construct IVs */ \
-	add2_le128(t2, t0, t4, t3, t5); /* ab: le2 ; cd: le3 */ \
-	vpshufb t1, t2, x1; \
-	add2_le128(t2, t0, t4, t3, t5); \
-	vpshufb t1, t2, x2; \
-	add2_le128(t2, t0, t4, t3, t5); \
-	vpshufb t1, t2, x3; \
-	add2_le128(t2, t0, t4, t3, t5); \
-	vpshufb t1, t2, x4; \
-	add2_le128(t2, t0, t4, t3, t5); \
-	vpshufb t1, t2, x5; \
-	add2_le128(t2, t0, t4, t3, t5); \
-	vpshufb t1, t2, x6; \
-	add2_le128(t2, t0, t4, t3, t5); \
-	vpshufb t1, t2, x7; \
-	vextracti128 $1, t2, t2x; \
-	inc_le128(t2x, t0x, t3x); \
-	vmovdqu t2x, (iv);
-
-#define store_ctr_16way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
-	vpxor (0*32)(src), x0, x0; \
-	vpxor (1*32)(src), x1, x1; \
-	vpxor (2*32)(src), x2, x2; \
-	vpxor (3*32)(src), x3, x3; \
-	vpxor (4*32)(src), x4, x4; \
-	vpxor (5*32)(src), x5, x5; \
-	vpxor (6*32)(src), x6, x6; \
-	vpxor (7*32)(src), x7, x7; \
-	store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
-
-#define gf128mul_x_ble(iv, mask, tmp) \
-	vpsrad $31, iv, tmp; \
-	vpaddq iv, iv, iv; \
-	vpshufd $0x13, tmp, tmp; \
-	vpand mask, tmp, tmp; \
-	vpxor tmp, iv, iv;
-
-#define gf128mul_x2_ble(iv, mask1, mask2, tmp0, tmp1) \
-	vpsrad $31, iv, tmp0; \
-	vpaddq iv, iv, tmp1; \
-	vpsllq $2, iv, iv; \
-	vpshufd $0x13, tmp0, tmp0; \
-	vpsrad $31, tmp1, tmp1; \
-	vpand mask2, tmp0, tmp0; \
-	vpshufd $0x13, tmp1, tmp1; \
-	vpxor tmp0, iv, iv; \
-	vpand mask1, tmp1, tmp1; \
-	vpxor tmp1, iv, iv;
-
-#define load_xts_16way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, \
-		       tivx, t0, t0x, t1, t1x, t2, t2x, t3, \
-		       xts_gf128mul_and_shl1_mask_0, \
-		       xts_gf128mul_and_shl1_mask_1) \
-	vbroadcasti128 xts_gf128mul_and_shl1_mask_0, t1; \
-	\
-	/* load IV and construct second IV */ \
-	vmovdqu (iv), tivx; \
-	vmovdqa tivx, t0x; \
-	gf128mul_x_ble(tivx, t1x, t2x); \
-	vbroadcasti128 xts_gf128mul_and_shl1_mask_1, t2; \
-	vinserti128 $1, tivx, t0, tiv; \
-	vpxor (0*32)(src), tiv, x0; \
-	vmovdqu tiv, (0*32)(dst); \
-	\
-	/* construct and store IVs, also xor with source */ \
-	gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
-	vpxor (1*32)(src), tiv, x1; \
-	vmovdqu tiv, (1*32)(dst); \
-	\
-	gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
-	vpxor (2*32)(src), tiv, x2; \
-	vmovdqu tiv, (2*32)(dst); \
-	\
-	gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
-	vpxor (3*32)(src), tiv, x3; \
-	vmovdqu tiv, (3*32)(dst); \
-	\
-	gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
-	vpxor (4*32)(src), tiv, x4; \
-	vmovdqu tiv, (4*32)(dst); \
-	\
-	gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
-	vpxor (5*32)(src), tiv, x5; \
-	vmovdqu tiv, (5*32)(dst); \
-	\
-	gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
-	vpxor (6*32)(src), tiv, x6; \
-	vmovdqu tiv, (6*32)(dst); \
-	\
-	gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
-	vpxor (7*32)(src), tiv, x7; \
-	vmovdqu tiv, (7*32)(dst); \
-	\
-	vextracti128 $1, tiv, tivx; \
-	gf128mul_x_ble(tivx, t1x, t2x); \
-	vmovdqu tivx, (iv);
-
-#define store_xts_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
-	vpxor (0*32)(dst), x0, x0; \
-	vpxor (1*32)(dst), x1, x1; \
-	vpxor (2*32)(dst), x2, x2; \
-	vpxor (3*32)(dst), x3, x3; \
-	vpxor (4*32)(dst), x4, x4; \
-	vpxor (5*32)(dst), x5, x5; \
-	vpxor (6*32)(dst), x6, x6; \
-	vpxor (7*32)(dst), x7, x7; \
-	store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S
deleted file mode 100644
index f7946ea1b70467da4ac4b989aead1256c7040373..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/nh-avx2-x86_64.S
+++ /dev/null
@@ -1,157 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * NH - ε-almost-universal hash function, x86_64 AVX2 accelerated
- *
- * Copyright 2018 Google LLC
- *
- * Author: Eric Biggers <ebiggers@google.com>
- */
-
-#include <linux/linkage.h>
-
-#define		PASS0_SUMS	%ymm0
-#define		PASS1_SUMS	%ymm1
-#define		PASS2_SUMS	%ymm2
-#define		PASS3_SUMS	%ymm3
-#define		K0		%ymm4
-#define		K0_XMM		%xmm4
-#define		K1		%ymm5
-#define		K1_XMM		%xmm5
-#define		K2		%ymm6
-#define		K2_XMM		%xmm6
-#define		K3		%ymm7
-#define		K3_XMM		%xmm7
-#define		T0		%ymm8
-#define		T1		%ymm9
-#define		T2		%ymm10
-#define		T2_XMM		%xmm10
-#define		T3		%ymm11
-#define		T3_XMM		%xmm11
-#define		T4		%ymm12
-#define		T5		%ymm13
-#define		T6		%ymm14
-#define		T7		%ymm15
-#define		KEY		%rdi
-#define		MESSAGE		%rsi
-#define		MESSAGE_LEN	%rdx
-#define		HASH		%rcx
-
-.macro _nh_2xstride	k0, k1, k2, k3
-
-	// Add message words to key words
-	vpaddd		\k0, T3, T0
-	vpaddd		\k1, T3, T1
-	vpaddd		\k2, T3, T2
-	vpaddd		\k3, T3, T3
-
-	// Multiply 32x32 => 64 and accumulate
-	vpshufd		$0x10, T0, T4
-	vpshufd		$0x32, T0, T0
-	vpshufd		$0x10, T1, T5
-	vpshufd		$0x32, T1, T1
-	vpshufd		$0x10, T2, T6
-	vpshufd		$0x32, T2, T2
-	vpshufd		$0x10, T3, T7
-	vpshufd		$0x32, T3, T3
-	vpmuludq	T4, T0, T0
-	vpmuludq	T5, T1, T1
-	vpmuludq	T6, T2, T2
-	vpmuludq	T7, T3, T3
-	vpaddq		T0, PASS0_SUMS, PASS0_SUMS
-	vpaddq		T1, PASS1_SUMS, PASS1_SUMS
-	vpaddq		T2, PASS2_SUMS, PASS2_SUMS
-	vpaddq		T3, PASS3_SUMS, PASS3_SUMS
-.endm
-
-/*
- * void nh_avx2(const u32 *key, const u8 *message, size_t message_len,
- *		u8 hash[NH_HASH_BYTES])
- *
- * It's guaranteed that message_len % 16 == 0.
- */
-ENTRY(nh_avx2)
-
-	vmovdqu		0x00(KEY), K0
-	vmovdqu		0x10(KEY), K1
-	add		$0x20, KEY
-	vpxor		PASS0_SUMS, PASS0_SUMS, PASS0_SUMS
-	vpxor		PASS1_SUMS, PASS1_SUMS, PASS1_SUMS
-	vpxor		PASS2_SUMS, PASS2_SUMS, PASS2_SUMS
-	vpxor		PASS3_SUMS, PASS3_SUMS, PASS3_SUMS
-
-	sub		$0x40, MESSAGE_LEN
-	jl		.Lloop4_done
-.Lloop4:
-	vmovdqu		(MESSAGE), T3
-	vmovdqu		0x00(KEY), K2
-	vmovdqu		0x10(KEY), K3
-	_nh_2xstride	K0, K1, K2, K3
-
-	vmovdqu		0x20(MESSAGE), T3
-	vmovdqu		0x20(KEY), K0
-	vmovdqu		0x30(KEY), K1
-	_nh_2xstride	K2, K3, K0, K1
-
-	add		$0x40, MESSAGE
-	add		$0x40, KEY
-	sub		$0x40, MESSAGE_LEN
-	jge		.Lloop4
-
-.Lloop4_done:
-	and		$0x3f, MESSAGE_LEN
-	jz		.Ldone
-
-	cmp		$0x20, MESSAGE_LEN
-	jl		.Llast
-
-	// 2 or 3 strides remain; do 2 more.
-	vmovdqu		(MESSAGE), T3
-	vmovdqu		0x00(KEY), K2
-	vmovdqu		0x10(KEY), K3
-	_nh_2xstride	K0, K1, K2, K3
-	add		$0x20, MESSAGE
-	add		$0x20, KEY
-	sub		$0x20, MESSAGE_LEN
-	jz		.Ldone
-	vmovdqa		K2, K0
-	vmovdqa		K3, K1
-.Llast:
-	// Last stride.  Zero the high 128 bits of the message and keys so they
-	// don't affect the result when processing them like 2 strides.
-	vmovdqu		(MESSAGE), T3_XMM
-	vmovdqa		K0_XMM, K0_XMM
-	vmovdqa		K1_XMM, K1_XMM
-	vmovdqu		0x00(KEY), K2_XMM
-	vmovdqu		0x10(KEY), K3_XMM
-	_nh_2xstride	K0, K1, K2, K3
-
-.Ldone:
-	// Sum the accumulators for each pass, then store the sums to 'hash'
-
-	// PASS0_SUMS is (0A 0B 0C 0D)
-	// PASS1_SUMS is (1A 1B 1C 1D)
-	// PASS2_SUMS is (2A 2B 2C 2D)
-	// PASS3_SUMS is (3A 3B 3C 3D)
-	// We need the horizontal sums:
-	//     (0A + 0B + 0C + 0D,
-	//	1A + 1B + 1C + 1D,
-	//	2A + 2B + 2C + 2D,
-	//	3A + 3B + 3C + 3D)
-	//
-
-	vpunpcklqdq	PASS1_SUMS, PASS0_SUMS, T0	// T0 = (0A 1A 0C 1C)
-	vpunpckhqdq	PASS1_SUMS, PASS0_SUMS, T1	// T1 = (0B 1B 0D 1D)
-	vpunpcklqdq	PASS3_SUMS, PASS2_SUMS, T2	// T2 = (2A 3A 2C 3C)
-	vpunpckhqdq	PASS3_SUMS, PASS2_SUMS, T3	// T3 = (2B 3B 2D 3D)
-
-	vinserti128	$0x1, T2_XMM, T0, T4		// T4 = (0A 1A 2A 3A)
-	vinserti128	$0x1, T3_XMM, T1, T5		// T5 = (0B 1B 2B 3B)
-	vperm2i128	$0x31, T2, T0, T0		// T0 = (0C 1C 2C 3C)
-	vperm2i128	$0x31, T3, T1, T1		// T1 = (0D 1D 2D 3D)
-
-	vpaddq		T5, T4, T4
-	vpaddq		T1, T0, T0
-	vpaddq		T4, T0, T0
-	vmovdqu		T0, (HASH)
-	ret
-ENDPROC(nh_avx2)
diff --git a/arch/x86/crypto/nh-sse2-x86_64.S b/arch/x86/crypto/nh-sse2-x86_64.S
deleted file mode 100644
index 51f52d4ab4bbc75dde98199f3330c483a631b006..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/nh-sse2-x86_64.S
+++ /dev/null
@@ -1,123 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * NH - ε-almost-universal hash function, x86_64 SSE2 accelerated
- *
- * Copyright 2018 Google LLC
- *
- * Author: Eric Biggers <ebiggers@google.com>
- */
-
-#include <linux/linkage.h>
-
-#define		PASS0_SUMS	%xmm0
-#define		PASS1_SUMS	%xmm1
-#define		PASS2_SUMS	%xmm2
-#define		PASS3_SUMS	%xmm3
-#define		K0		%xmm4
-#define		K1		%xmm5
-#define		K2		%xmm6
-#define		K3		%xmm7
-#define		T0		%xmm8
-#define		T1		%xmm9
-#define		T2		%xmm10
-#define		T3		%xmm11
-#define		T4		%xmm12
-#define		T5		%xmm13
-#define		T6		%xmm14
-#define		T7		%xmm15
-#define		KEY		%rdi
-#define		MESSAGE		%rsi
-#define		MESSAGE_LEN	%rdx
-#define		HASH		%rcx
-
-.macro _nh_stride	k0, k1, k2, k3, offset
-
-	// Load next message stride
-	movdqu		\offset(MESSAGE), T1
-
-	// Load next key stride
-	movdqu		\offset(KEY), \k3
-
-	// Add message words to key words
-	movdqa		T1, T2
-	movdqa		T1, T3
-	paddd		T1, \k0    // reuse k0 to avoid a move
-	paddd		\k1, T1
-	paddd		\k2, T2
-	paddd		\k3, T3
-
-	// Multiply 32x32 => 64 and accumulate
-	pshufd		$0x10, \k0, T4
-	pshufd		$0x32, \k0, \k0
-	pshufd		$0x10, T1, T5
-	pshufd		$0x32, T1, T1
-	pshufd		$0x10, T2, T6
-	pshufd		$0x32, T2, T2
-	pshufd		$0x10, T3, T7
-	pshufd		$0x32, T3, T3
-	pmuludq		T4, \k0
-	pmuludq		T5, T1
-	pmuludq		T6, T2
-	pmuludq		T7, T3
-	paddq		\k0, PASS0_SUMS
-	paddq		T1, PASS1_SUMS
-	paddq		T2, PASS2_SUMS
-	paddq		T3, PASS3_SUMS
-.endm
-
-/*
- * void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
- *		u8 hash[NH_HASH_BYTES])
- *
- * It's guaranteed that message_len % 16 == 0.
- */
-ENTRY(nh_sse2)
-
-	movdqu		0x00(KEY), K0
-	movdqu		0x10(KEY), K1
-	movdqu		0x20(KEY), K2
-	add		$0x30, KEY
-	pxor		PASS0_SUMS, PASS0_SUMS
-	pxor		PASS1_SUMS, PASS1_SUMS
-	pxor		PASS2_SUMS, PASS2_SUMS
-	pxor		PASS3_SUMS, PASS3_SUMS
-
-	sub		$0x40, MESSAGE_LEN
-	jl		.Lloop4_done
-.Lloop4:
-	_nh_stride	K0, K1, K2, K3, 0x00
-	_nh_stride	K1, K2, K3, K0, 0x10
-	_nh_stride	K2, K3, K0, K1, 0x20
-	_nh_stride	K3, K0, K1, K2, 0x30
-	add		$0x40, KEY
-	add		$0x40, MESSAGE
-	sub		$0x40, MESSAGE_LEN
-	jge		.Lloop4
-
-.Lloop4_done:
-	and		$0x3f, MESSAGE_LEN
-	jz		.Ldone
-	_nh_stride	K0, K1, K2, K3, 0x00
-
-	sub		$0x10, MESSAGE_LEN
-	jz		.Ldone
-	_nh_stride	K1, K2, K3, K0, 0x10
-
-	sub		$0x10, MESSAGE_LEN
-	jz		.Ldone
-	_nh_stride	K2, K3, K0, K1, 0x20
-
-.Ldone:
-	// Sum the accumulators for each pass, then store the sums to 'hash'
-	movdqa		PASS0_SUMS, T0
-	movdqa		PASS2_SUMS, T1
-	punpcklqdq	PASS1_SUMS, T0		// => (PASS0_SUM_A PASS1_SUM_A)
-	punpcklqdq	PASS3_SUMS, T1		// => (PASS2_SUM_A PASS3_SUM_A)
-	punpckhqdq	PASS1_SUMS, PASS0_SUMS	// => (PASS0_SUM_B PASS1_SUM_B)
-	punpckhqdq	PASS3_SUMS, PASS2_SUMS	// => (PASS2_SUM_B PASS3_SUM_B)
-	paddq		PASS0_SUMS, T0
-	paddq		PASS2_SUMS, T1
-	movdqu		T0, 0x00(HASH)
-	movdqu		T1, 0x10(HASH)
-	ret
-ENDPROC(nh_sse2)
diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S
deleted file mode 100644
index 8b341bc29d416d8c762ac85ff2fbad75fdc5fe97..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/poly1305-avx2-x86_64.S
+++ /dev/null
@@ -1,390 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Poly1305 authenticator algorithm, RFC7539, x64 AVX2 functions
- *
- * Copyright (C) 2015 Martin Willi
- */
-
-#include <linux/linkage.h>
-
-.section	.rodata.cst32.ANMASK, "aM", @progbits, 32
-.align 32
-ANMASK:	.octa 0x0000000003ffffff0000000003ffffff
-	.octa 0x0000000003ffffff0000000003ffffff
-
-.section	.rodata.cst32.ORMASK, "aM", @progbits, 32
-.align 32
-ORMASK:	.octa 0x00000000010000000000000001000000
-	.octa 0x00000000010000000000000001000000
-
-.text
-
-#define h0 0x00(%rdi)
-#define h1 0x04(%rdi)
-#define h2 0x08(%rdi)
-#define h3 0x0c(%rdi)
-#define h4 0x10(%rdi)
-#define r0 0x00(%rdx)
-#define r1 0x04(%rdx)
-#define r2 0x08(%rdx)
-#define r3 0x0c(%rdx)
-#define r4 0x10(%rdx)
-#define u0 0x00(%r8)
-#define u1 0x04(%r8)
-#define u2 0x08(%r8)
-#define u3 0x0c(%r8)
-#define u4 0x10(%r8)
-#define w0 0x14(%r8)
-#define w1 0x18(%r8)
-#define w2 0x1c(%r8)
-#define w3 0x20(%r8)
-#define w4 0x24(%r8)
-#define y0 0x28(%r8)
-#define y1 0x2c(%r8)
-#define y2 0x30(%r8)
-#define y3 0x34(%r8)
-#define y4 0x38(%r8)
-#define m %rsi
-#define hc0 %ymm0
-#define hc1 %ymm1
-#define hc2 %ymm2
-#define hc3 %ymm3
-#define hc4 %ymm4
-#define hc0x %xmm0
-#define hc1x %xmm1
-#define hc2x %xmm2
-#define hc3x %xmm3
-#define hc4x %xmm4
-#define t1 %ymm5
-#define t2 %ymm6
-#define t1x %xmm5
-#define t2x %xmm6
-#define ruwy0 %ymm7
-#define ruwy1 %ymm8
-#define ruwy2 %ymm9
-#define ruwy3 %ymm10
-#define ruwy4 %ymm11
-#define ruwy0x %xmm7
-#define ruwy1x %xmm8
-#define ruwy2x %xmm9
-#define ruwy3x %xmm10
-#define ruwy4x %xmm11
-#define svxz1 %ymm12
-#define svxz2 %ymm13
-#define svxz3 %ymm14
-#define svxz4 %ymm15
-#define d0 %r9
-#define d1 %r10
-#define d2 %r11
-#define d3 %r12
-#define d4 %r13
-
-ENTRY(poly1305_4block_avx2)
-	# %rdi: Accumulator h[5]
-	# %rsi: 64 byte input block m
-	# %rdx: Poly1305 key r[5]
-	# %rcx: Quadblock count
-	# %r8:  Poly1305 derived key r^2 u[5], r^3 w[5], r^4 y[5],
-
-	# This four-block variant uses loop unrolled block processing. It
-	# requires 4 Poly1305 keys: r, r^2, r^3 and r^4:
-	# h = (h + m) * r  =>  h = (h + m1) * r^4 + m2 * r^3 + m3 * r^2 + m4 * r
-
-	vzeroupper
-	push		%rbx
-	push		%r12
-	push		%r13
-
-	# combine r0,u0,w0,y0
-	vmovd		y0,ruwy0x
-	vmovd		w0,t1x
-	vpunpcklqdq	t1,ruwy0,ruwy0
-	vmovd		u0,t1x
-	vmovd		r0,t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,ruwy0,ruwy0
-
-	# combine r1,u1,w1,y1 and s1=r1*5,v1=u1*5,x1=w1*5,z1=y1*5
-	vmovd		y1,ruwy1x
-	vmovd		w1,t1x
-	vpunpcklqdq	t1,ruwy1,ruwy1
-	vmovd		u1,t1x
-	vmovd		r1,t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,ruwy1,ruwy1
-	vpslld		$2,ruwy1,svxz1
-	vpaddd		ruwy1,svxz1,svxz1
-
-	# combine r2,u2,w2,y2 and s2=r2*5,v2=u2*5,x2=w2*5,z2=y2*5
-	vmovd		y2,ruwy2x
-	vmovd		w2,t1x
-	vpunpcklqdq	t1,ruwy2,ruwy2
-	vmovd		u2,t1x
-	vmovd		r2,t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,ruwy2,ruwy2
-	vpslld		$2,ruwy2,svxz2
-	vpaddd		ruwy2,svxz2,svxz2
-
-	# combine r3,u3,w3,y3 and s3=r3*5,v3=u3*5,x3=w3*5,z3=y3*5
-	vmovd		y3,ruwy3x
-	vmovd		w3,t1x
-	vpunpcklqdq	t1,ruwy3,ruwy3
-	vmovd		u3,t1x
-	vmovd		r3,t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,ruwy3,ruwy3
-	vpslld		$2,ruwy3,svxz3
-	vpaddd		ruwy3,svxz3,svxz3
-
-	# combine r4,u4,w4,y4 and s4=r4*5,v4=u4*5,x4=w4*5,z4=y4*5
-	vmovd		y4,ruwy4x
-	vmovd		w4,t1x
-	vpunpcklqdq	t1,ruwy4,ruwy4
-	vmovd		u4,t1x
-	vmovd		r4,t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,ruwy4,ruwy4
-	vpslld		$2,ruwy4,svxz4
-	vpaddd		ruwy4,svxz4,svxz4
-
-.Ldoblock4:
-	# hc0 = [m[48-51] & 0x3ffffff, m[32-35] & 0x3ffffff,
-	#	 m[16-19] & 0x3ffffff, m[ 0- 3] & 0x3ffffff + h0]
-	vmovd		0x00(m),hc0x
-	vmovd		0x10(m),t1x
-	vpunpcklqdq	t1,hc0,hc0
-	vmovd		0x20(m),t1x
-	vmovd		0x30(m),t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,hc0,hc0
-	vpand		ANMASK(%rip),hc0,hc0
-	vmovd		h0,t1x
-	vpaddd		t1,hc0,hc0
-	# hc1 = [(m[51-54] >> 2) & 0x3ffffff, (m[35-38] >> 2) & 0x3ffffff,
-	#	 (m[19-22] >> 2) & 0x3ffffff, (m[ 3- 6] >> 2) & 0x3ffffff + h1]
-	vmovd		0x03(m),hc1x
-	vmovd		0x13(m),t1x
-	vpunpcklqdq	t1,hc1,hc1
-	vmovd		0x23(m),t1x
-	vmovd		0x33(m),t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,hc1,hc1
-	vpsrld		$2,hc1,hc1
-	vpand		ANMASK(%rip),hc1,hc1
-	vmovd		h1,t1x
-	vpaddd		t1,hc1,hc1
-	# hc2 = [(m[54-57] >> 4) & 0x3ffffff, (m[38-41] >> 4) & 0x3ffffff,
-	#	 (m[22-25] >> 4) & 0x3ffffff, (m[ 6- 9] >> 4) & 0x3ffffff + h2]
-	vmovd		0x06(m),hc2x
-	vmovd		0x16(m),t1x
-	vpunpcklqdq	t1,hc2,hc2
-	vmovd		0x26(m),t1x
-	vmovd		0x36(m),t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,hc2,hc2
-	vpsrld		$4,hc2,hc2
-	vpand		ANMASK(%rip),hc2,hc2
-	vmovd		h2,t1x
-	vpaddd		t1,hc2,hc2
-	# hc3 = [(m[57-60] >> 6) & 0x3ffffff, (m[41-44] >> 6) & 0x3ffffff,
-	#	 (m[25-28] >> 6) & 0x3ffffff, (m[ 9-12] >> 6) & 0x3ffffff + h3]
-	vmovd		0x09(m),hc3x
-	vmovd		0x19(m),t1x
-	vpunpcklqdq	t1,hc3,hc3
-	vmovd		0x29(m),t1x
-	vmovd		0x39(m),t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,hc3,hc3
-	vpsrld		$6,hc3,hc3
-	vpand		ANMASK(%rip),hc3,hc3
-	vmovd		h3,t1x
-	vpaddd		t1,hc3,hc3
-	# hc4 = [(m[60-63] >> 8) | (1<<24), (m[44-47] >> 8) | (1<<24),
-	#	 (m[28-31] >> 8) | (1<<24), (m[12-15] >> 8) | (1<<24) + h4]
-	vmovd		0x0c(m),hc4x
-	vmovd		0x1c(m),t1x
-	vpunpcklqdq	t1,hc4,hc4
-	vmovd		0x2c(m),t1x
-	vmovd		0x3c(m),t2x
-	vpunpcklqdq	t2,t1,t1
-	vperm2i128	$0x20,t1,hc4,hc4
-	vpsrld		$8,hc4,hc4
-	vpor		ORMASK(%rip),hc4,hc4
-	vmovd		h4,t1x
-	vpaddd		t1,hc4,hc4
-
-	# t1 = [ hc0[3] * r0, hc0[2] * u0, hc0[1] * w0, hc0[0] * y0 ]
-	vpmuludq	hc0,ruwy0,t1
-	# t1 += [ hc1[3] * s4, hc1[2] * v4, hc1[1] * x4, hc1[0] * z4 ]
-	vpmuludq	hc1,svxz4,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc2[3] * s3, hc2[2] * v3, hc2[1] * x3, hc2[0] * z3 ]
-	vpmuludq	hc2,svxz3,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc3[3] * s2, hc3[2] * v2, hc3[1] * x2, hc3[0] * z2 ]
-	vpmuludq	hc3,svxz2,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc4[3] * s1, hc4[2] * v1, hc4[1] * x1, hc4[0] * z1 ]
-	vpmuludq	hc4,svxz1,t2
-	vpaddq		t2,t1,t1
-	# d0 = t1[0] + t1[1] + t[2] + t[3]
-	vpermq		$0xee,t1,t2
-	vpaddq		t2,t1,t1
-	vpsrldq		$8,t1,t2
-	vpaddq		t2,t1,t1
-	vmovq		t1x,d0
-
-	# t1 = [ hc0[3] * r1, hc0[2] * u1,hc0[1] * w1, hc0[0] * y1 ]
-	vpmuludq	hc0,ruwy1,t1
-	# t1 += [ hc1[3] * r0, hc1[2] * u0, hc1[1] * w0, hc1[0] * y0 ]
-	vpmuludq	hc1,ruwy0,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc2[3] * s4, hc2[2] * v4, hc2[1] * x4, hc2[0] * z4 ]
-	vpmuludq	hc2,svxz4,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc3[3] * s3, hc3[2] * v3, hc3[1] * x3, hc3[0] * z3 ]
-	vpmuludq	hc3,svxz3,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc4[3] * s2, hc4[2] * v2, hc4[1] * x2, hc4[0] * z2 ]
-	vpmuludq	hc4,svxz2,t2
-	vpaddq		t2,t1,t1
-	# d1 = t1[0] + t1[1] + t1[3] + t1[4]
-	vpermq		$0xee,t1,t2
-	vpaddq		t2,t1,t1
-	vpsrldq		$8,t1,t2
-	vpaddq		t2,t1,t1
-	vmovq		t1x,d1
-
-	# t1 = [ hc0[3] * r2, hc0[2] * u2, hc0[1] * w2, hc0[0] * y2 ]
-	vpmuludq	hc0,ruwy2,t1
-	# t1 += [ hc1[3] * r1, hc1[2] * u1, hc1[1] * w1, hc1[0] * y1 ]
-	vpmuludq	hc1,ruwy1,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc2[3] * r0, hc2[2] * u0, hc2[1] * w0, hc2[0] * y0 ]
-	vpmuludq	hc2,ruwy0,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc3[3] * s4, hc3[2] * v4, hc3[1] * x4, hc3[0] * z4 ]
-	vpmuludq	hc3,svxz4,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc4[3] * s3, hc4[2] * v3, hc4[1] * x3, hc4[0] * z3 ]
-	vpmuludq	hc4,svxz3,t2
-	vpaddq		t2,t1,t1
-	# d2 = t1[0] + t1[1] + t1[2] + t1[3]
-	vpermq		$0xee,t1,t2
-	vpaddq		t2,t1,t1
-	vpsrldq		$8,t1,t2
-	vpaddq		t2,t1,t1
-	vmovq		t1x,d2
-
-	# t1 = [ hc0[3] * r3, hc0[2] * u3, hc0[1] * w3, hc0[0] * y3 ]
-	vpmuludq	hc0,ruwy3,t1
-	# t1 += [ hc1[3] * r2, hc1[2] * u2, hc1[1] * w2, hc1[0] * y2 ]
-	vpmuludq	hc1,ruwy2,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc2[3] * r1, hc2[2] * u1, hc2[1] * w1, hc2[0] * y1 ]
-	vpmuludq	hc2,ruwy1,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc3[3] * r0, hc3[2] * u0, hc3[1] * w0, hc3[0] * y0 ]
-	vpmuludq	hc3,ruwy0,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc4[3] * s4, hc4[2] * v4, hc4[1] * x4, hc4[0] * z4 ]
-	vpmuludq	hc4,svxz4,t2
-	vpaddq		t2,t1,t1
-	# d3 = t1[0] + t1[1] + t1[2] + t1[3]
-	vpermq		$0xee,t1,t2
-	vpaddq		t2,t1,t1
-	vpsrldq		$8,t1,t2
-	vpaddq		t2,t1,t1
-	vmovq		t1x,d3
-
-	# t1 = [ hc0[3] * r4, hc0[2] * u4, hc0[1] * w4, hc0[0] * y4 ]
-	vpmuludq	hc0,ruwy4,t1
-	# t1 += [ hc1[3] * r3, hc1[2] * u3, hc1[1] * w3, hc1[0] * y3 ]
-	vpmuludq	hc1,ruwy3,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc2[3] * r2, hc2[2] * u2, hc2[1] * w2, hc2[0] * y2 ]
-	vpmuludq	hc2,ruwy2,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc3[3] * r1, hc3[2] * u1, hc3[1] * w1, hc3[0] * y1 ]
-	vpmuludq	hc3,ruwy1,t2
-	vpaddq		t2,t1,t1
-	# t1 += [ hc4[3] * r0, hc4[2] * u0, hc4[1] * w0, hc4[0] * y0 ]
-	vpmuludq	hc4,ruwy0,t2
-	vpaddq		t2,t1,t1
-	# d4 = t1[0] + t1[1] + t1[2] + t1[3]
-	vpermq		$0xee,t1,t2
-	vpaddq		t2,t1,t1
-	vpsrldq		$8,t1,t2
-	vpaddq		t2,t1,t1
-	vmovq		t1x,d4
-
-	# Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
-	# h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
-	# amount.  Careful: we must not assume the carry bits 'd0 >> 26',
-	# 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
-	# integers.  It's true in a single-block implementation, but not here.
-
-	# d1 += d0 >> 26
-	mov		d0,%rax
-	shr		$26,%rax
-	add		%rax,d1
-	# h0 = d0 & 0x3ffffff
-	mov		d0,%rbx
-	and		$0x3ffffff,%ebx
-
-	# d2 += d1 >> 26
-	mov		d1,%rax
-	shr		$26,%rax
-	add		%rax,d2
-	# h1 = d1 & 0x3ffffff
-	mov		d1,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h1
-
-	# d3 += d2 >> 26
-	mov		d2,%rax
-	shr		$26,%rax
-	add		%rax,d3
-	# h2 = d2 & 0x3ffffff
-	mov		d2,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h2
-
-	# d4 += d3 >> 26
-	mov		d3,%rax
-	shr		$26,%rax
-	add		%rax,d4
-	# h3 = d3 & 0x3ffffff
-	mov		d3,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h3
-
-	# h0 += (d4 >> 26) * 5
-	mov		d4,%rax
-	shr		$26,%rax
-	lea		(%rax,%rax,4),%rax
-	add		%rax,%rbx
-	# h4 = d4 & 0x3ffffff
-	mov		d4,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h4
-
-	# h1 += h0 >> 26
-	mov		%rbx,%rax
-	shr		$26,%rax
-	add		%eax,h1
-	# h0 = h0 & 0x3ffffff
-	andl		$0x3ffffff,%ebx
-	mov		%ebx,h0
-
-	add		$0x40,m
-	dec		%rcx
-	jnz		.Ldoblock4
-
-	vzeroupper
-	pop		%r13
-	pop		%r12
-	pop		%rbx
-	ret
-ENDPROC(poly1305_4block_avx2)
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S
deleted file mode 100644
index 5578f846e622b6345572ef8ef462a848809f9e1b..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/poly1305-sse2-x86_64.S
+++ /dev/null
@@ -1,590 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Poly1305 authenticator algorithm, RFC7539, x64 SSE2 functions
- *
- * Copyright (C) 2015 Martin Willi
- */
-
-#include <linux/linkage.h>
-
-.section	.rodata.cst16.ANMASK, "aM", @progbits, 16
-.align 16
-ANMASK:	.octa 0x0000000003ffffff0000000003ffffff
-
-.section	.rodata.cst16.ORMASK, "aM", @progbits, 16
-.align 16
-ORMASK:	.octa 0x00000000010000000000000001000000
-
-.text
-
-#define h0 0x00(%rdi)
-#define h1 0x04(%rdi)
-#define h2 0x08(%rdi)
-#define h3 0x0c(%rdi)
-#define h4 0x10(%rdi)
-#define r0 0x00(%rdx)
-#define r1 0x04(%rdx)
-#define r2 0x08(%rdx)
-#define r3 0x0c(%rdx)
-#define r4 0x10(%rdx)
-#define s1 0x00(%rsp)
-#define s2 0x04(%rsp)
-#define s3 0x08(%rsp)
-#define s4 0x0c(%rsp)
-#define m %rsi
-#define h01 %xmm0
-#define h23 %xmm1
-#define h44 %xmm2
-#define t1 %xmm3
-#define t2 %xmm4
-#define t3 %xmm5
-#define t4 %xmm6
-#define mask %xmm7
-#define d0 %r8
-#define d1 %r9
-#define d2 %r10
-#define d3 %r11
-#define d4 %r12
-
-ENTRY(poly1305_block_sse2)
-	# %rdi: Accumulator h[5]
-	# %rsi: 16 byte input block m
-	# %rdx: Poly1305 key r[5]
-	# %rcx: Block count
-
-	# This single block variant tries to improve performance by doing two
-	# multiplications in parallel using SSE instructions. There is quite
-	# some quardword packing involved, hence the speedup is marginal.
-
-	push		%rbx
-	push		%r12
-	sub		$0x10,%rsp
-
-	# s1..s4 = r1..r4 * 5
-	mov		r1,%eax
-	lea		(%eax,%eax,4),%eax
-	mov		%eax,s1
-	mov		r2,%eax
-	lea		(%eax,%eax,4),%eax
-	mov		%eax,s2
-	mov		r3,%eax
-	lea		(%eax,%eax,4),%eax
-	mov		%eax,s3
-	mov		r4,%eax
-	lea		(%eax,%eax,4),%eax
-	mov		%eax,s4
-
-	movdqa		ANMASK(%rip),mask
-
-.Ldoblock:
-	# h01 = [0, h1, 0, h0]
-	# h23 = [0, h3, 0, h2]
-	# h44 = [0, h4, 0, h4]
-	movd		h0,h01
-	movd		h1,t1
-	movd		h2,h23
-	movd		h3,t2
-	movd		h4,h44
-	punpcklqdq	t1,h01
-	punpcklqdq	t2,h23
-	punpcklqdq	h44,h44
-
-	# h01 += [ (m[3-6] >> 2) & 0x3ffffff, m[0-3] & 0x3ffffff ]
-	movd		0x00(m),t1
-	movd		0x03(m),t2
-	psrld		$2,t2
-	punpcklqdq	t2,t1
-	pand		mask,t1
-	paddd		t1,h01
-	# h23 += [ (m[9-12] >> 6) & 0x3ffffff, (m[6-9] >> 4) & 0x3ffffff ]
-	movd		0x06(m),t1
-	movd		0x09(m),t2
-	psrld		$4,t1
-	psrld		$6,t2
-	punpcklqdq	t2,t1
-	pand		mask,t1
-	paddd		t1,h23
-	# h44 += [ (m[12-15] >> 8) | (1 << 24), (m[12-15] >> 8) | (1 << 24) ]
-	mov		0x0c(m),%eax
-	shr		$8,%eax
-	or		$0x01000000,%eax
-	movd		%eax,t1
-	pshufd		$0xc4,t1,t1
-	paddd		t1,h44
-
-	# t1[0] = h0 * r0 + h2 * s3
-	# t1[1] = h1 * s4 + h3 * s2
-	movd		r0,t1
-	movd		s4,t2
-	punpcklqdq	t2,t1
-	pmuludq		h01,t1
-	movd		s3,t2
-	movd		s2,t3
-	punpcklqdq	t3,t2
-	pmuludq		h23,t2
-	paddq		t2,t1
-	# t2[0] = h0 * r1 + h2 * s4
-	# t2[1] = h1 * r0 + h3 * s3
-	movd		r1,t2
-	movd		r0,t3
-	punpcklqdq	t3,t2
-	pmuludq		h01,t2
-	movd		s4,t3
-	movd		s3,t4
-	punpcklqdq	t4,t3
-	pmuludq		h23,t3
-	paddq		t3,t2
-	# t3[0] = h4 * s1
-	# t3[1] = h4 * s2
-	movd		s1,t3
-	movd		s2,t4
-	punpcklqdq	t4,t3
-	pmuludq		h44,t3
-	# d0 = t1[0] + t1[1] + t3[0]
-	# d1 = t2[0] + t2[1] + t3[1]
-	movdqa		t1,t4
-	punpcklqdq	t2,t4
-	punpckhqdq	t2,t1
-	paddq		t4,t1
-	paddq		t3,t1
-	movq		t1,d0
-	psrldq		$8,t1
-	movq		t1,d1
-
-	# t1[0] = h0 * r2 + h2 * r0
-	# t1[1] = h1 * r1 + h3 * s4
-	movd		r2,t1
-	movd		r1,t2
-	punpcklqdq 	t2,t1
-	pmuludq		h01,t1
-	movd		r0,t2
-	movd		s4,t3
-	punpcklqdq	t3,t2
-	pmuludq		h23,t2
-	paddq		t2,t1
-	# t2[0] = h0 * r3 + h2 * r1
-	# t2[1] = h1 * r2 + h3 * r0
-	movd		r3,t2
-	movd		r2,t3
-	punpcklqdq	t3,t2
-	pmuludq		h01,t2
-	movd		r1,t3
-	movd		r0,t4
-	punpcklqdq	t4,t3
-	pmuludq		h23,t3
-	paddq		t3,t2
-	# t3[0] = h4 * s3
-	# t3[1] = h4 * s4
-	movd		s3,t3
-	movd		s4,t4
-	punpcklqdq	t4,t3
-	pmuludq		h44,t3
-	# d2 = t1[0] + t1[1] + t3[0]
-	# d3 = t2[0] + t2[1] + t3[1]
-	movdqa		t1,t4
-	punpcklqdq	t2,t4
-	punpckhqdq	t2,t1
-	paddq		t4,t1
-	paddq		t3,t1
-	movq		t1,d2
-	psrldq		$8,t1
-	movq		t1,d3
-
-	# t1[0] = h0 * r4 + h2 * r2
-	# t1[1] = h1 * r3 + h3 * r1
-	movd		r4,t1
-	movd		r3,t2
-	punpcklqdq	t2,t1
-	pmuludq		h01,t1
-	movd		r2,t2
-	movd		r1,t3
-	punpcklqdq	t3,t2
-	pmuludq		h23,t2
-	paddq		t2,t1
-	# t3[0] = h4 * r0
-	movd		r0,t3
-	pmuludq		h44,t3
-	# d4 = t1[0] + t1[1] + t3[0]
-	movdqa		t1,t4
-	psrldq		$8,t4
-	paddq		t4,t1
-	paddq		t3,t1
-	movq		t1,d4
-
-	# d1 += d0 >> 26
-	mov		d0,%rax
-	shr		$26,%rax
-	add		%rax,d1
-	# h0 = d0 & 0x3ffffff
-	mov		d0,%rbx
-	and		$0x3ffffff,%ebx
-
-	# d2 += d1 >> 26
-	mov		d1,%rax
-	shr		$26,%rax
-	add		%rax,d2
-	# h1 = d1 & 0x3ffffff
-	mov		d1,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h1
-
-	# d3 += d2 >> 26
-	mov		d2,%rax
-	shr		$26,%rax
-	add		%rax,d3
-	# h2 = d2 & 0x3ffffff
-	mov		d2,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h2
-
-	# d4 += d3 >> 26
-	mov		d3,%rax
-	shr		$26,%rax
-	add		%rax,d4
-	# h3 = d3 & 0x3ffffff
-	mov		d3,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h3
-
-	# h0 += (d4 >> 26) * 5
-	mov		d4,%rax
-	shr		$26,%rax
-	lea		(%rax,%rax,4),%rax
-	add		%rax,%rbx
-	# h4 = d4 & 0x3ffffff
-	mov		d4,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h4
-
-	# h1 += h0 >> 26
-	mov		%rbx,%rax
-	shr		$26,%rax
-	add		%eax,h1
-	# h0 = h0 & 0x3ffffff
-	andl		$0x3ffffff,%ebx
-	mov		%ebx,h0
-
-	add		$0x10,m
-	dec		%rcx
-	jnz		.Ldoblock
-
-	# Zeroing of key material
-	mov		%rcx,0x00(%rsp)
-	mov		%rcx,0x08(%rsp)
-
-	add		$0x10,%rsp
-	pop		%r12
-	pop		%rbx
-	ret
-ENDPROC(poly1305_block_sse2)
-
-
-#define u0 0x00(%r8)
-#define u1 0x04(%r8)
-#define u2 0x08(%r8)
-#define u3 0x0c(%r8)
-#define u4 0x10(%r8)
-#define hc0 %xmm0
-#define hc1 %xmm1
-#define hc2 %xmm2
-#define hc3 %xmm5
-#define hc4 %xmm6
-#define ru0 %xmm7
-#define ru1 %xmm8
-#define ru2 %xmm9
-#define ru3 %xmm10
-#define ru4 %xmm11
-#define sv1 %xmm12
-#define sv2 %xmm13
-#define sv3 %xmm14
-#define sv4 %xmm15
-#undef d0
-#define d0 %r13
-
-ENTRY(poly1305_2block_sse2)
-	# %rdi: Accumulator h[5]
-	# %rsi: 16 byte input block m
-	# %rdx: Poly1305 key r[5]
-	# %rcx: Doubleblock count
-	# %r8:  Poly1305 derived key r^2 u[5]
-
-	# This two-block variant further improves performance by using loop
-	# unrolled block processing. This is more straight forward and does
-	# less byte shuffling, but requires a second Poly1305 key r^2:
-	# h = (h + m) * r    =>    h = (h + m1) * r^2 + m2 * r
-
-	push		%rbx
-	push		%r12
-	push		%r13
-
-	# combine r0,u0
-	movd		u0,ru0
-	movd		r0,t1
-	punpcklqdq	t1,ru0
-
-	# combine r1,u1 and s1=r1*5,v1=u1*5
-	movd		u1,ru1
-	movd		r1,t1
-	punpcklqdq	t1,ru1
-	movdqa		ru1,sv1
-	pslld		$2,sv1
-	paddd		ru1,sv1
-
-	# combine r2,u2 and s2=r2*5,v2=u2*5
-	movd		u2,ru2
-	movd		r2,t1
-	punpcklqdq	t1,ru2
-	movdqa		ru2,sv2
-	pslld		$2,sv2
-	paddd		ru2,sv2
-
-	# combine r3,u3 and s3=r3*5,v3=u3*5
-	movd		u3,ru3
-	movd		r3,t1
-	punpcklqdq	t1,ru3
-	movdqa		ru3,sv3
-	pslld		$2,sv3
-	paddd		ru3,sv3
-
-	# combine r4,u4 and s4=r4*5,v4=u4*5
-	movd		u4,ru4
-	movd		r4,t1
-	punpcklqdq	t1,ru4
-	movdqa		ru4,sv4
-	pslld		$2,sv4
-	paddd		ru4,sv4
-
-.Ldoblock2:
-	# hc0 = [ m[16-19] & 0x3ffffff, h0 + m[0-3] & 0x3ffffff ]
-	movd		0x00(m),hc0
-	movd		0x10(m),t1
-	punpcklqdq	t1,hc0
-	pand		ANMASK(%rip),hc0
-	movd		h0,t1
-	paddd		t1,hc0
-	# hc1 = [ (m[19-22] >> 2) & 0x3ffffff, h1 + (m[3-6] >> 2) & 0x3ffffff ]
-	movd		0x03(m),hc1
-	movd		0x13(m),t1
-	punpcklqdq	t1,hc1
-	psrld		$2,hc1
-	pand		ANMASK(%rip),hc1
-	movd		h1,t1
-	paddd		t1,hc1
-	# hc2 = [ (m[22-25] >> 4) & 0x3ffffff, h2 + (m[6-9] >> 4) & 0x3ffffff ]
-	movd		0x06(m),hc2
-	movd		0x16(m),t1
-	punpcklqdq	t1,hc2
-	psrld		$4,hc2
-	pand		ANMASK(%rip),hc2
-	movd		h2,t1
-	paddd		t1,hc2
-	# hc3 = [ (m[25-28] >> 6) & 0x3ffffff, h3 + (m[9-12] >> 6) & 0x3ffffff ]
-	movd		0x09(m),hc3
-	movd		0x19(m),t1
-	punpcklqdq	t1,hc3
-	psrld		$6,hc3
-	pand		ANMASK(%rip),hc3
-	movd		h3,t1
-	paddd		t1,hc3
-	# hc4 = [ (m[28-31] >> 8) | (1<<24), h4 + (m[12-15] >> 8) | (1<<24) ]
-	movd		0x0c(m),hc4
-	movd		0x1c(m),t1
-	punpcklqdq	t1,hc4
-	psrld		$8,hc4
-	por		ORMASK(%rip),hc4
-	movd		h4,t1
-	paddd		t1,hc4
-
-	# t1 = [ hc0[1] * r0, hc0[0] * u0 ]
-	movdqa		ru0,t1
-	pmuludq		hc0,t1
-	# t1 += [ hc1[1] * s4, hc1[0] * v4 ]
-	movdqa		sv4,t2
-	pmuludq		hc1,t2
-	paddq		t2,t1
-	# t1 += [ hc2[1] * s3, hc2[0] * v3 ]
-	movdqa		sv3,t2
-	pmuludq		hc2,t2
-	paddq		t2,t1
-	# t1 += [ hc3[1] * s2, hc3[0] * v2 ]
-	movdqa		sv2,t2
-	pmuludq		hc3,t2
-	paddq		t2,t1
-	# t1 += [ hc4[1] * s1, hc4[0] * v1 ]
-	movdqa		sv1,t2
-	pmuludq		hc4,t2
-	paddq		t2,t1
-	# d0 = t1[0] + t1[1]
-	movdqa		t1,t2
-	psrldq		$8,t2
-	paddq		t2,t1
-	movq		t1,d0
-
-	# t1 = [ hc0[1] * r1, hc0[0] * u1 ]
-	movdqa		ru1,t1
-	pmuludq		hc0,t1
-	# t1 += [ hc1[1] * r0, hc1[0] * u0 ]
-	movdqa		ru0,t2
-	pmuludq		hc1,t2
-	paddq		t2,t1
-	# t1 += [ hc2[1] * s4, hc2[0] * v4 ]
-	movdqa		sv4,t2
-	pmuludq		hc2,t2
-	paddq		t2,t1
-	# t1 += [ hc3[1] * s3, hc3[0] * v3 ]
-	movdqa		sv3,t2
-	pmuludq		hc3,t2
-	paddq		t2,t1
-	# t1 += [ hc4[1] * s2, hc4[0] * v2 ]
-	movdqa		sv2,t2
-	pmuludq		hc4,t2
-	paddq		t2,t1
-	# d1 = t1[0] + t1[1]
-	movdqa		t1,t2
-	psrldq		$8,t2
-	paddq		t2,t1
-	movq		t1,d1
-
-	# t1 = [ hc0[1] * r2, hc0[0] * u2 ]
-	movdqa		ru2,t1
-	pmuludq		hc0,t1
-	# t1 += [ hc1[1] * r1, hc1[0] * u1 ]
-	movdqa		ru1,t2
-	pmuludq		hc1,t2
-	paddq		t2,t1
-	# t1 += [ hc2[1] * r0, hc2[0] * u0 ]
-	movdqa		ru0,t2
-	pmuludq		hc2,t2
-	paddq		t2,t1
-	# t1 += [ hc3[1] * s4, hc3[0] * v4 ]
-	movdqa		sv4,t2
-	pmuludq		hc3,t2
-	paddq		t2,t1
-	# t1 += [ hc4[1] * s3, hc4[0] * v3 ]
-	movdqa		sv3,t2
-	pmuludq		hc4,t2
-	paddq		t2,t1
-	# d2 = t1[0] + t1[1]
-	movdqa		t1,t2
-	psrldq		$8,t2
-	paddq		t2,t1
-	movq		t1,d2
-
-	# t1 = [ hc0[1] * r3, hc0[0] * u3 ]
-	movdqa		ru3,t1
-	pmuludq		hc0,t1
-	# t1 += [ hc1[1] * r2, hc1[0] * u2 ]
-	movdqa		ru2,t2
-	pmuludq		hc1,t2
-	paddq		t2,t1
-	# t1 += [ hc2[1] * r1, hc2[0] * u1 ]
-	movdqa		ru1,t2
-	pmuludq		hc2,t2
-	paddq		t2,t1
-	# t1 += [ hc3[1] * r0, hc3[0] * u0 ]
-	movdqa		ru0,t2
-	pmuludq		hc3,t2
-	paddq		t2,t1
-	# t1 += [ hc4[1] * s4, hc4[0] * v4 ]
-	movdqa		sv4,t2
-	pmuludq		hc4,t2
-	paddq		t2,t1
-	# d3 = t1[0] + t1[1]
-	movdqa		t1,t2
-	psrldq		$8,t2
-	paddq		t2,t1
-	movq		t1,d3
-
-	# t1 = [ hc0[1] * r4, hc0[0] * u4 ]
-	movdqa		ru4,t1
-	pmuludq		hc0,t1
-	# t1 += [ hc1[1] * r3, hc1[0] * u3 ]
-	movdqa		ru3,t2
-	pmuludq		hc1,t2
-	paddq		t2,t1
-	# t1 += [ hc2[1] * r2, hc2[0] * u2 ]
-	movdqa		ru2,t2
-	pmuludq		hc2,t2
-	paddq		t2,t1
-	# t1 += [ hc3[1] * r1, hc3[0] * u1 ]
-	movdqa		ru1,t2
-	pmuludq		hc3,t2
-	paddq		t2,t1
-	# t1 += [ hc4[1] * r0, hc4[0] * u0 ]
-	movdqa		ru0,t2
-	pmuludq		hc4,t2
-	paddq		t2,t1
-	# d4 = t1[0] + t1[1]
-	movdqa		t1,t2
-	psrldq		$8,t2
-	paddq		t2,t1
-	movq		t1,d4
-
-	# Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
-	# h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
-	# amount.  Careful: we must not assume the carry bits 'd0 >> 26',
-	# 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
-	# integers.  It's true in a single-block implementation, but not here.
-
-	# d1 += d0 >> 26
-	mov		d0,%rax
-	shr		$26,%rax
-	add		%rax,d1
-	# h0 = d0 & 0x3ffffff
-	mov		d0,%rbx
-	and		$0x3ffffff,%ebx
-
-	# d2 += d1 >> 26
-	mov		d1,%rax
-	shr		$26,%rax
-	add		%rax,d2
-	# h1 = d1 & 0x3ffffff
-	mov		d1,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h1
-
-	# d3 += d2 >> 26
-	mov		d2,%rax
-	shr		$26,%rax
-	add		%rax,d3
-	# h2 = d2 & 0x3ffffff
-	mov		d2,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h2
-
-	# d4 += d3 >> 26
-	mov		d3,%rax
-	shr		$26,%rax
-	add		%rax,d4
-	# h3 = d3 & 0x3ffffff
-	mov		d3,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h3
-
-	# h0 += (d4 >> 26) * 5
-	mov		d4,%rax
-	shr		$26,%rax
-	lea		(%rax,%rax,4),%rax
-	add		%rax,%rbx
-	# h4 = d4 & 0x3ffffff
-	mov		d4,%rax
-	and		$0x3ffffff,%eax
-	mov		%eax,h4
-
-	# h1 += h0 >> 26
-	mov		%rbx,%rax
-	shr		$26,%rax
-	add		%eax,h1
-	# h0 = h0 & 0x3ffffff
-	andl		$0x3ffffff,%ebx
-	mov		%ebx,h0
-
-	add		$0x20,m
-	dec		%rcx
-	jnz		.Ldoblock2
-
-	pop		%r13
-	pop		%r12
-	pop		%rbx
-	ret
-ENDPROC(poly1305_2block_sse2)
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
deleted file mode 100644
index ddc51dbba3af9b49823752e0faa7af6bc7f114ca..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+++ /dev/null
@@ -1,781 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Serpent Cipher 8-way parallel algorithm (x86_64/AVX)
- *
- * Copyright (C) 2012 Johannes Goetzfried
- *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
- *
- * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "glue_helper-asm-avx.S"
-
-.file "serpent-avx-x86_64-asm_64.S"
-
-.section	.rodata.cst16.bswap128_mask, "aM", @progbits, 16
-.align 16
-.Lbswap128_mask:
-	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-.section	.rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16
-.align 16
-.Lxts_gf128mul_and_shl1_mask:
-	.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
-
-.text
-
-#define CTX %rdi
-
-/**********************************************************************
-  8-way AVX serpent
- **********************************************************************/
-#define RA1 %xmm0
-#define RB1 %xmm1
-#define RC1 %xmm2
-#define RD1 %xmm3
-#define RE1 %xmm4
-
-#define tp  %xmm5
-
-#define RA2 %xmm6
-#define RB2 %xmm7
-#define RC2 %xmm8
-#define RD2 %xmm9
-#define RE2 %xmm10
-
-#define RNOT %xmm11
-
-#define RK0 %xmm12
-#define RK1 %xmm13
-#define RK2 %xmm14
-#define RK3 %xmm15
-
-
-#define S0_1(x0, x1, x2, x3, x4)      \
-	vpor		x0,   x3, tp; \
-	vpxor		x3,   x0, x0; \
-	vpxor		x2,   x3, x4; \
-	vpxor		RNOT, x4, x4; \
-	vpxor		x1,   tp, x3; \
-	vpand		x0,   x1, x1; \
-	vpxor		x4,   x1, x1; \
-	vpxor		x0,   x2, x2;
-#define S0_2(x0, x1, x2, x3, x4)      \
-	vpxor		x3,   x0, x0; \
-	vpor		x0,   x4, x4; \
-	vpxor		x2,   x0, x0; \
-	vpand		x1,   x2, x2; \
-	vpxor		x2,   x3, x3; \
-	vpxor		RNOT, x1, x1; \
-	vpxor		x4,   x2, x2; \
-	vpxor		x2,   x1, x1;
-
-#define S1_1(x0, x1, x2, x3, x4)      \
-	vpxor		x0,   x1, tp; \
-	vpxor		x3,   x0, x0; \
-	vpxor		RNOT, x3, x3; \
-	vpand		tp,   x1, x4; \
-	vpor		tp,   x0, x0; \
-	vpxor		x2,   x3, x3; \
-	vpxor		x3,   x0, x0; \
-	vpxor		x3,   tp, x1;
-#define S1_2(x0, x1, x2, x3, x4)      \
-	vpxor		x4,   x3, x3; \
-	vpor		x4,   x1, x1; \
-	vpxor		x2,   x4, x4; \
-	vpand		x0,   x2, x2; \
-	vpxor		x1,   x2, x2; \
-	vpor		x0,   x1, x1; \
-	vpxor		RNOT, x0, x0; \
-	vpxor		x2,   x0, x0; \
-	vpxor		x1,   x4, x4;
-
-#define S2_1(x0, x1, x2, x3, x4)      \
-	vpxor		RNOT, x3, x3; \
-	vpxor		x0,   x1, x1; \
-	vpand		x2,   x0, tp; \
-	vpxor		x3,   tp, tp; \
-	vpor		x0,   x3, x3; \
-	vpxor		x1,   x2, x2; \
-	vpxor		x1,   x3, x3; \
-	vpand		tp,   x1, x1;
-#define S2_2(x0, x1, x2, x3, x4)      \
-	vpxor		x2,   tp, tp; \
-	vpand		x3,   x2, x2; \
-	vpor		x1,   x3, x3; \
-	vpxor		RNOT, tp, tp; \
-	vpxor		tp,   x3, x3; \
-	vpxor		tp,   x0, x4; \
-	vpxor		x2,   tp, x0; \
-	vpor		x2,   x1, x1;
-
-#define S3_1(x0, x1, x2, x3, x4)      \
-	vpxor		x3,   x1, tp; \
-	vpor		x0,   x3, x3; \
-	vpand		x0,   x1, x4; \
-	vpxor		x2,   x0, x0; \
-	vpxor		tp,   x2, x2; \
-	vpand		x3,   tp, x1; \
-	vpxor		x3,   x2, x2; \
-	vpor		x4,   x0, x0; \
-	vpxor		x3,   x4, x4;
-#define S3_2(x0, x1, x2, x3, x4)      \
-	vpxor		x0,   x1, x1; \
-	vpand		x3,   x0, x0; \
-	vpand		x4,   x3, x3; \
-	vpxor		x2,   x3, x3; \
-	vpor		x1,   x4, x4; \
-	vpand		x1,   x2, x2; \
-	vpxor		x3,   x4, x4; \
-	vpxor		x3,   x0, x0; \
-	vpxor		x2,   x3, x3;
-
-#define S4_1(x0, x1, x2, x3, x4)      \
-	vpand		x0,   x3, tp; \
-	vpxor		x3,   x0, x0; \
-	vpxor		x2,   tp, tp; \
-	vpor		x3,   x2, x2; \
-	vpxor		x1,   x0, x0; \
-	vpxor		tp,   x3, x4; \
-	vpor		x0,   x2, x2; \
-	vpxor		x1,   x2, x2;
-#define S4_2(x0, x1, x2, x3, x4)      \
-	vpand		x0,   x1, x1; \
-	vpxor		x4,   x1, x1; \
-	vpand		x2,   x4, x4; \
-	vpxor		tp,   x2, x2; \
-	vpxor		x0,   x4, x4; \
-	vpor		x1,   tp, x3; \
-	vpxor		RNOT, x1, x1; \
-	vpxor		x0,   x3, x3;
-
-#define S5_1(x0, x1, x2, x3, x4)      \
-	vpor		x0,   x1, tp; \
-	vpxor		tp,   x2, x2; \
-	vpxor		RNOT, x3, x3; \
-	vpxor		x0,   x1, x4; \
-	vpxor		x2,   x0, x0; \
-	vpand		x4,   tp, x1; \
-	vpor		x3,   x4, x4; \
-	vpxor		x0,   x4, x4;
-#define S5_2(x0, x1, x2, x3, x4)      \
-	vpand		x3,   x0, x0; \
-	vpxor		x3,   x1, x1; \
-	vpxor		x2,   x3, x3; \
-	vpxor		x1,   x0, x0; \
-	vpand		x4,   x2, x2; \
-	vpxor		x2,   x1, x1; \
-	vpand		x0,   x2, x2; \
-	vpxor		x2,   x3, x3;
-
-#define S6_1(x0, x1, x2, x3, x4)      \
-	vpxor		x0,   x3, x3; \
-	vpxor		x2,   x1, tp; \
-	vpxor		x0,   x2, x2; \
-	vpand		x3,   x0, x0; \
-	vpor		x3,   tp, tp; \
-	vpxor		RNOT, x1, x4; \
-	vpxor		tp,   x0, x0; \
-	vpxor		x2,   tp, x1;
-#define S6_2(x0, x1, x2, x3, x4)      \
-	vpxor		x4,   x3, x3; \
-	vpxor		x0,   x4, x4; \
-	vpand		x0,   x2, x2; \
-	vpxor		x1,   x4, x4; \
-	vpxor		x3,   x2, x2; \
-	vpand		x1,   x3, x3; \
-	vpxor		x0,   x3, x3; \
-	vpxor		x2,   x1, x1;
-
-#define S7_1(x0, x1, x2, x3, x4)      \
-	vpxor		RNOT, x1, tp; \
-	vpxor		RNOT, x0, x0; \
-	vpand		x2,   tp, x1; \
-	vpxor		x3,   x1, x1; \
-	vpor		tp,   x3, x3; \
-	vpxor		x2,   tp, x4; \
-	vpxor		x3,   x2, x2; \
-	vpxor		x0,   x3, x3; \
-	vpor		x1,   x0, x0;
-#define S7_2(x0, x1, x2, x3, x4)      \
-	vpand		x0,   x2, x2; \
-	vpxor		x4,   x0, x0; \
-	vpxor		x3,   x4, x4; \
-	vpand		x0,   x3, x3; \
-	vpxor		x1,   x4, x4; \
-	vpxor		x4,   x2, x2; \
-	vpxor		x1,   x3, x3; \
-	vpor		x0,   x4, x4; \
-	vpxor		x1,   x4, x4;
-
-#define SI0_1(x0, x1, x2, x3, x4)     \
-	vpxor		x0,   x1, x1; \
-	vpor		x1,   x3, tp; \
-	vpxor		x1,   x3, x4; \
-	vpxor		RNOT, x0, x0; \
-	vpxor		tp,   x2, x2; \
-	vpxor		x0,   tp, x3; \
-	vpand		x1,   x0, x0; \
-	vpxor		x2,   x0, x0;
-#define SI0_2(x0, x1, x2, x3, x4)     \
-	vpand		x3,   x2, x2; \
-	vpxor		x4,   x3, x3; \
-	vpxor		x3,   x2, x2; \
-	vpxor		x3,   x1, x1; \
-	vpand		x0,   x3, x3; \
-	vpxor		x0,   x1, x1; \
-	vpxor		x2,   x0, x0; \
-	vpxor		x3,   x4, x4;
-
-#define SI1_1(x0, x1, x2, x3, x4)     \
-	vpxor		x3,   x1, x1; \
-	vpxor		x2,   x0, tp; \
-	vpxor		RNOT, x2, x2; \
-	vpor		x1,   x0, x4; \
-	vpxor		x3,   x4, x4; \
-	vpand		x1,   x3, x3; \
-	vpxor		x2,   x1, x1; \
-	vpand		x4,   x2, x2;
-#define SI1_2(x0, x1, x2, x3, x4)     \
-	vpxor		x1,   x4, x4; \
-	vpor		x3,   x1, x1; \
-	vpxor		tp,   x3, x3; \
-	vpxor		tp,   x2, x2; \
-	vpor		x4,   tp, x0; \
-	vpxor		x4,   x2, x2; \
-	vpxor		x0,   x1, x1; \
-	vpxor		x1,   x4, x4;
-
-#define SI2_1(x0, x1, x2, x3, x4)     \
-	vpxor		x1,   x2, x2; \
-	vpxor		RNOT, x3, tp; \
-	vpor		x2,   tp, tp; \
-	vpxor		x3,   x2, x2; \
-	vpxor		x0,   x3, x4; \
-	vpxor		x1,   tp, x3; \
-	vpor		x2,   x1, x1; \
-	vpxor		x0,   x2, x2;
-#define SI2_2(x0, x1, x2, x3, x4)     \
-	vpxor		x4,   x1, x1; \
-	vpor		x3,   x4, x4; \
-	vpxor		x3,   x2, x2; \
-	vpxor		x2,   x4, x4; \
-	vpand		x1,   x2, x2; \
-	vpxor		x3,   x2, x2; \
-	vpxor		x4,   x3, x3; \
-	vpxor		x0,   x4, x4;
-
-#define SI3_1(x0, x1, x2, x3, x4)     \
-	vpxor		x1,   x2, x2; \
-	vpand		x2,   x1, tp; \
-	vpxor		x0,   tp, tp; \
-	vpor		x1,   x0, x0; \
-	vpxor		x3,   x1, x4; \
-	vpxor		x3,   x0, x0; \
-	vpor		tp,   x3, x3; \
-	vpxor		x2,   tp, x1;
-#define SI3_2(x0, x1, x2, x3, x4)     \
-	vpxor		x3,   x1, x1; \
-	vpxor		x2,   x0, x0; \
-	vpxor		x3,   x2, x2; \
-	vpand		x1,   x3, x3; \
-	vpxor		x0,   x1, x1; \
-	vpand		x2,   x0, x0; \
-	vpxor		x3,   x4, x4; \
-	vpxor		x0,   x3, x3; \
-	vpxor		x1,   x0, x0;
-
-#define SI4_1(x0, x1, x2, x3, x4)     \
-	vpxor		x3,   x2, x2; \
-	vpand		x1,   x0, tp; \
-	vpxor		x2,   tp, tp; \
-	vpor		x3,   x2, x2; \
-	vpxor		RNOT, x0, x4; \
-	vpxor		tp,   x1, x1; \
-	vpxor		x2,   tp, x0; \
-	vpand		x4,   x2, x2;
-#define SI4_2(x0, x1, x2, x3, x4)     \
-	vpxor		x0,   x2, x2; \
-	vpor		x4,   x0, x0; \
-	vpxor		x3,   x0, x0; \
-	vpand		x2,   x3, x3; \
-	vpxor		x3,   x4, x4; \
-	vpxor		x1,   x3, x3; \
-	vpand		x0,   x1, x1; \
-	vpxor		x1,   x4, x4; \
-	vpxor		x3,   x0, x0;
-
-#define SI5_1(x0, x1, x2, x3, x4)     \
-	vpor		x2,   x1, tp; \
-	vpxor		x1,   x2, x2; \
-	vpxor		x3,   tp, tp; \
-	vpand		x1,   x3, x3; \
-	vpxor		x3,   x2, x2; \
-	vpor		x0,   x3, x3; \
-	vpxor		RNOT, x0, x0; \
-	vpxor		x2,   x3, x3; \
-	vpor		x0,   x2, x2;
-#define SI5_2(x0, x1, x2, x3, x4)     \
-	vpxor		tp,   x1, x4; \
-	vpxor		x4,   x2, x2; \
-	vpand		x0,   x4, x4; \
-	vpxor		tp,   x0, x0; \
-	vpxor		x3,   tp, x1; \
-	vpand		x2,   x0, x0; \
-	vpxor		x3,   x2, x2; \
-	vpxor		x2,   x0, x0; \
-	vpxor		x4,   x2, x2; \
-	vpxor		x3,   x4, x4;
-
-#define SI6_1(x0, x1, x2, x3, x4)     \
-	vpxor		x2,   x0, x0; \
-	vpand		x3,   x0, tp; \
-	vpxor		x3,   x2, x2; \
-	vpxor		x2,   tp, tp; \
-	vpxor		x1,   x3, x3; \
-	vpor		x0,   x2, x2; \
-	vpxor		x3,   x2, x2; \
-	vpand		tp,   x3, x3;
-#define SI6_2(x0, x1, x2, x3, x4)     \
-	vpxor		RNOT, tp, tp; \
-	vpxor		x1,   x3, x3; \
-	vpand		x2,   x1, x1; \
-	vpxor		tp,   x0, x4; \
-	vpxor		x4,   x3, x3; \
-	vpxor		x2,   x4, x4; \
-	vpxor		x1,   tp, x0; \
-	vpxor		x0,   x2, x2;
-
-#define SI7_1(x0, x1, x2, x3, x4)     \
-	vpand		x0,   x3, tp; \
-	vpxor		x2,   x0, x0; \
-	vpor		x3,   x2, x2; \
-	vpxor		x1,   x3, x4; \
-	vpxor		RNOT, x0, x0; \
-	vpor		tp,   x1, x1; \
-	vpxor		x0,   x4, x4; \
-	vpand		x2,   x0, x0; \
-	vpxor		x1,   x0, x0;
-#define SI7_2(x0, x1, x2, x3, x4)     \
-	vpand		x2,   x1, x1; \
-	vpxor		x2,   tp, x3; \
-	vpxor		x3,   x4, x4; \
-	vpand		x3,   x2, x2; \
-	vpor		x0,   x3, x3; \
-	vpxor		x4,   x1, x1; \
-	vpxor		x4,   x3, x3; \
-	vpand		x0,   x4, x4; \
-	vpxor		x2,   x4, x4;
-
-#define get_key(i, j, t) \
-	vbroadcastss (4*(i)+(j))*4(CTX), t;
-
-#define K2(x0, x1, x2, x3, x4, i) \
-	get_key(i, 0, RK0); \
-	get_key(i, 1, RK1); \
-	get_key(i, 2, RK2); \
-	get_key(i, 3, RK3); \
-	vpxor RK0,	x0 ## 1, x0 ## 1; \
-	vpxor RK1,	x1 ## 1, x1 ## 1; \
-	vpxor RK2,	x2 ## 1, x2 ## 1; \
-	vpxor RK3,	x3 ## 1, x3 ## 1; \
-		vpxor RK0,	x0 ## 2, x0 ## 2; \
-		vpxor RK1,	x1 ## 2, x1 ## 2; \
-		vpxor RK2,	x2 ## 2, x2 ## 2; \
-		vpxor RK3,	x3 ## 2, x3 ## 2;
-
-#define LK2(x0, x1, x2, x3, x4, i) \
-	vpslld $13,		x0 ## 1, x4 ## 1;          \
-	vpsrld $(32 - 13),	x0 ## 1, x0 ## 1;          \
-	vpor			x4 ## 1, x0 ## 1, x0 ## 1; \
-	vpxor			x0 ## 1, x1 ## 1, x1 ## 1; \
-	vpslld $3,		x2 ## 1, x4 ## 1;          \
-	vpsrld $(32 - 3),	x2 ## 1, x2 ## 1;          \
-	vpor			x4 ## 1, x2 ## 1, x2 ## 1; \
-	vpxor			x2 ## 1, x1 ## 1, x1 ## 1; \
-		vpslld $13,		x0 ## 2, x4 ## 2;          \
-		vpsrld $(32 - 13),	x0 ## 2, x0 ## 2;          \
-		vpor			x4 ## 2, x0 ## 2, x0 ## 2; \
-		vpxor			x0 ## 2, x1 ## 2, x1 ## 2; \
-		vpslld $3,		x2 ## 2, x4 ## 2;          \
-		vpsrld $(32 - 3),	x2 ## 2, x2 ## 2;          \
-		vpor			x4 ## 2, x2 ## 2, x2 ## 2; \
-		vpxor			x2 ## 2, x1 ## 2, x1 ## 2; \
-	vpslld $1,		x1 ## 1, x4 ## 1;          \
-	vpsrld $(32 - 1),	x1 ## 1, x1 ## 1;          \
-	vpor			x4 ## 1, x1 ## 1, x1 ## 1; \
-	vpslld $3,		x0 ## 1, x4 ## 1;          \
-	vpxor			x2 ## 1, x3 ## 1, x3 ## 1; \
-	vpxor			x4 ## 1, x3 ## 1, x3 ## 1; \
-	get_key(i, 1, RK1); \
-		vpslld $1,		x1 ## 2, x4 ## 2;          \
-		vpsrld $(32 - 1),	x1 ## 2, x1 ## 2;          \
-		vpor			x4 ## 2, x1 ## 2, x1 ## 2; \
-		vpslld $3,		x0 ## 2, x4 ## 2;          \
-		vpxor			x2 ## 2, x3 ## 2, x3 ## 2; \
-		vpxor			x4 ## 2, x3 ## 2, x3 ## 2; \
-		get_key(i, 3, RK3); \
-	vpslld $7,		x3 ## 1, x4 ## 1;          \
-	vpsrld $(32 - 7),	x3 ## 1, x3 ## 1;          \
-	vpor			x4 ## 1, x3 ## 1, x3 ## 1; \
-	vpslld $7,		x1 ## 1, x4 ## 1;          \
-	vpxor			x1 ## 1, x0 ## 1, x0 ## 1; \
-	vpxor			x3 ## 1, x0 ## 1, x0 ## 1; \
-	vpxor			x3 ## 1, x2 ## 1, x2 ## 1; \
-	vpxor			x4 ## 1, x2 ## 1, x2 ## 1; \
-	get_key(i, 0, RK0); \
-		vpslld $7,		x3 ## 2, x4 ## 2;          \
-		vpsrld $(32 - 7),	x3 ## 2, x3 ## 2;          \
-		vpor			x4 ## 2, x3 ## 2, x3 ## 2; \
-		vpslld $7,		x1 ## 2, x4 ## 2;          \
-		vpxor			x1 ## 2, x0 ## 2, x0 ## 2; \
-		vpxor			x3 ## 2, x0 ## 2, x0 ## 2; \
-		vpxor			x3 ## 2, x2 ## 2, x2 ## 2; \
-		vpxor			x4 ## 2, x2 ## 2, x2 ## 2; \
-		get_key(i, 2, RK2); \
-	vpxor			RK1, x1 ## 1, x1 ## 1;     \
-	vpxor			RK3, x3 ## 1, x3 ## 1;     \
-	vpslld $5,		x0 ## 1, x4 ## 1;          \
-	vpsrld $(32 - 5),	x0 ## 1, x0 ## 1;          \
-	vpor			x4 ## 1, x0 ## 1, x0 ## 1; \
-	vpslld $22,		x2 ## 1, x4 ## 1;          \
-	vpsrld $(32 - 22),	x2 ## 1, x2 ## 1;          \
-	vpor			x4 ## 1, x2 ## 1, x2 ## 1; \
-	vpxor			RK0, x0 ## 1, x0 ## 1;     \
-	vpxor			RK2, x2 ## 1, x2 ## 1;     \
-		vpxor			RK1, x1 ## 2, x1 ## 2;     \
-		vpxor			RK3, x3 ## 2, x3 ## 2;     \
-		vpslld $5,		x0 ## 2, x4 ## 2;          \
-		vpsrld $(32 - 5),	x0 ## 2, x0 ## 2;          \
-		vpor			x4 ## 2, x0 ## 2, x0 ## 2; \
-		vpslld $22,		x2 ## 2, x4 ## 2;          \
-		vpsrld $(32 - 22),	x2 ## 2, x2 ## 2;          \
-		vpor			x4 ## 2, x2 ## 2, x2 ## 2; \
-		vpxor			RK0, x0 ## 2, x0 ## 2;     \
-		vpxor			RK2, x2 ## 2, x2 ## 2;
-
-#define KL2(x0, x1, x2, x3, x4, i) \
-	vpxor			RK0, x0 ## 1, x0 ## 1;     \
-	vpxor			RK2, x2 ## 1, x2 ## 1;     \
-	vpsrld $5,		x0 ## 1, x4 ## 1;          \
-	vpslld $(32 - 5),	x0 ## 1, x0 ## 1;          \
-	vpor			x4 ## 1, x0 ## 1, x0 ## 1; \
-	vpxor			RK3, x3 ## 1, x3 ## 1;     \
-	vpxor			RK1, x1 ## 1, x1 ## 1;     \
-	vpsrld $22,		x2 ## 1, x4 ## 1;          \
-	vpslld $(32 - 22),	x2 ## 1, x2 ## 1;          \
-	vpor			x4 ## 1, x2 ## 1, x2 ## 1; \
-	vpxor			x3 ## 1, x2 ## 1, x2 ## 1; \
-		vpxor			RK0, x0 ## 2, x0 ## 2;     \
-		vpxor			RK2, x2 ## 2, x2 ## 2;     \
-		vpsrld $5,		x0 ## 2, x4 ## 2;          \
-		vpslld $(32 - 5),	x0 ## 2, x0 ## 2;          \
-		vpor			x4 ## 2, x0 ## 2, x0 ## 2; \
-		vpxor			RK3, x3 ## 2, x3 ## 2;     \
-		vpxor			RK1, x1 ## 2, x1 ## 2;     \
-		vpsrld $22,		x2 ## 2, x4 ## 2;          \
-		vpslld $(32 - 22),	x2 ## 2, x2 ## 2;          \
-		vpor			x4 ## 2, x2 ## 2, x2 ## 2; \
-		vpxor			x3 ## 2, x2 ## 2, x2 ## 2; \
-	vpxor			x3 ## 1, x0 ## 1, x0 ## 1; \
-	vpslld $7,		x1 ## 1, x4 ## 1;          \
-	vpxor			x1 ## 1, x0 ## 1, x0 ## 1; \
-	vpxor			x4 ## 1, x2 ## 1, x2 ## 1; \
-	vpsrld $1,		x1 ## 1, x4 ## 1;          \
-	vpslld $(32 - 1),	x1 ## 1, x1 ## 1;          \
-	vpor			x4 ## 1, x1 ## 1, x1 ## 1; \
-		vpxor			x3 ## 2, x0 ## 2, x0 ## 2; \
-		vpslld $7,		x1 ## 2, x4 ## 2;          \
-		vpxor			x1 ## 2, x0 ## 2, x0 ## 2; \
-		vpxor			x4 ## 2, x2 ## 2, x2 ## 2; \
-		vpsrld $1,		x1 ## 2, x4 ## 2;          \
-		vpslld $(32 - 1),	x1 ## 2, x1 ## 2;          \
-		vpor			x4 ## 2, x1 ## 2, x1 ## 2; \
-	vpsrld $7,		x3 ## 1, x4 ## 1;          \
-	vpslld $(32 - 7),	x3 ## 1, x3 ## 1;          \
-	vpor			x4 ## 1, x3 ## 1, x3 ## 1; \
-	vpxor			x0 ## 1, x1 ## 1, x1 ## 1; \
-	vpslld $3,		x0 ## 1, x4 ## 1;          \
-	vpxor			x4 ## 1, x3 ## 1, x3 ## 1; \
-		vpsrld $7,		x3 ## 2, x4 ## 2;          \
-		vpslld $(32 - 7),	x3 ## 2, x3 ## 2;          \
-		vpor			x4 ## 2, x3 ## 2, x3 ## 2; \
-		vpxor			x0 ## 2, x1 ## 2, x1 ## 2; \
-		vpslld $3,		x0 ## 2, x4 ## 2;          \
-		vpxor			x4 ## 2, x3 ## 2, x3 ## 2; \
-	vpsrld $13,		x0 ## 1, x4 ## 1;          \
-	vpslld $(32 - 13),	x0 ## 1, x0 ## 1;          \
-	vpor			x4 ## 1, x0 ## 1, x0 ## 1; \
-	vpxor			x2 ## 1, x1 ## 1, x1 ## 1; \
-	vpxor			x2 ## 1, x3 ## 1, x3 ## 1; \
-	vpsrld $3,		x2 ## 1, x4 ## 1;          \
-	vpslld $(32 - 3),	x2 ## 1, x2 ## 1;          \
-	vpor			x4 ## 1, x2 ## 1, x2 ## 1; \
-		vpsrld $13,		x0 ## 2, x4 ## 2;          \
-		vpslld $(32 - 13),	x0 ## 2, x0 ## 2;          \
-		vpor			x4 ## 2, x0 ## 2, x0 ## 2; \
-		vpxor			x2 ## 2, x1 ## 2, x1 ## 2; \
-		vpxor			x2 ## 2, x3 ## 2, x3 ## 2; \
-		vpsrld $3,		x2 ## 2, x4 ## 2;          \
-		vpslld $(32 - 3),	x2 ## 2, x2 ## 2;          \
-		vpor			x4 ## 2, x2 ## 2, x2 ## 2;
-
-#define S(SBOX, x0, x1, x2, x3, x4) \
-	SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
-	SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
-	SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
-	SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2);
-
-#define SP(SBOX, x0, x1, x2, x3, x4, i) \
-	get_key(i, 0, RK0); \
-	SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
-	get_key(i, 2, RK2); \
-	SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
-	get_key(i, 3, RK3); \
-	SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
-	get_key(i, 1, RK1); \
-	SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
-
-#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	vpunpckldq		x1, x0, t0; \
-	vpunpckhdq		x1, x0, t2; \
-	vpunpckldq		x3, x2, t1; \
-	vpunpckhdq		x3, x2, x3; \
-	\
-	vpunpcklqdq		t1, t0, x0; \
-	vpunpckhqdq		t1, t0, x1; \
-	vpunpcklqdq		x3, t2, x2; \
-	vpunpckhqdq		x3, t2, x3;
-
-#define read_blocks(x0, x1, x2, x3, t0, t1, t2) \
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
-
-#define write_blocks(x0, x1, x2, x3, t0, t1, t2) \
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
-
-.align 8
-__serpent_enc_blk8_avx:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
-	 * output:
-	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
-	 */
-
-	vpcmpeqd RNOT, RNOT, RNOT;
-
-	read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
-	read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
-
-						 K2(RA, RB, RC, RD, RE, 0);
-	S(S0, RA, RB, RC, RD, RE);		LK2(RC, RB, RD, RA, RE, 1);
-	S(S1, RC, RB, RD, RA, RE);		LK2(RE, RD, RA, RC, RB, 2);
-	S(S2, RE, RD, RA, RC, RB);		LK2(RB, RD, RE, RC, RA, 3);
-	S(S3, RB, RD, RE, RC, RA);		LK2(RC, RA, RD, RB, RE, 4);
-	S(S4, RC, RA, RD, RB, RE);		LK2(RA, RD, RB, RE, RC, 5);
-	S(S5, RA, RD, RB, RE, RC);		LK2(RC, RA, RD, RE, RB, 6);
-	S(S6, RC, RA, RD, RE, RB);		LK2(RD, RB, RA, RE, RC, 7);
-	S(S7, RD, RB, RA, RE, RC);		LK2(RC, RA, RE, RD, RB, 8);
-	S(S0, RC, RA, RE, RD, RB);		LK2(RE, RA, RD, RC, RB, 9);
-	S(S1, RE, RA, RD, RC, RB);		LK2(RB, RD, RC, RE, RA, 10);
-	S(S2, RB, RD, RC, RE, RA);		LK2(RA, RD, RB, RE, RC, 11);
-	S(S3, RA, RD, RB, RE, RC);		LK2(RE, RC, RD, RA, RB, 12);
-	S(S4, RE, RC, RD, RA, RB);		LK2(RC, RD, RA, RB, RE, 13);
-	S(S5, RC, RD, RA, RB, RE);		LK2(RE, RC, RD, RB, RA, 14);
-	S(S6, RE, RC, RD, RB, RA);		LK2(RD, RA, RC, RB, RE, 15);
-	S(S7, RD, RA, RC, RB, RE);		LK2(RE, RC, RB, RD, RA, 16);
-	S(S0, RE, RC, RB, RD, RA);		LK2(RB, RC, RD, RE, RA, 17);
-	S(S1, RB, RC, RD, RE, RA);		LK2(RA, RD, RE, RB, RC, 18);
-	S(S2, RA, RD, RE, RB, RC);		LK2(RC, RD, RA, RB, RE, 19);
-	S(S3, RC, RD, RA, RB, RE);		LK2(RB, RE, RD, RC, RA, 20);
-	S(S4, RB, RE, RD, RC, RA);		LK2(RE, RD, RC, RA, RB, 21);
-	S(S5, RE, RD, RC, RA, RB);		LK2(RB, RE, RD, RA, RC, 22);
-	S(S6, RB, RE, RD, RA, RC);		LK2(RD, RC, RE, RA, RB, 23);
-	S(S7, RD, RC, RE, RA, RB);		LK2(RB, RE, RA, RD, RC, 24);
-	S(S0, RB, RE, RA, RD, RC);		LK2(RA, RE, RD, RB, RC, 25);
-	S(S1, RA, RE, RD, RB, RC);		LK2(RC, RD, RB, RA, RE, 26);
-	S(S2, RC, RD, RB, RA, RE);		LK2(RE, RD, RC, RA, RB, 27);
-	S(S3, RE, RD, RC, RA, RB);		LK2(RA, RB, RD, RE, RC, 28);
-	S(S4, RA, RB, RD, RE, RC);		LK2(RB, RD, RE, RC, RA, 29);
-	S(S5, RB, RD, RE, RC, RA);		LK2(RA, RB, RD, RC, RE, 30);
-	S(S6, RA, RB, RD, RC, RE);		LK2(RD, RE, RB, RC, RA, 31);
-	S(S7, RD, RE, RB, RC, RA);		 K2(RA, RB, RC, RD, RE, 32);
-
-	write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
-	write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
-
-	ret;
-ENDPROC(__serpent_enc_blk8_avx)
-
-.align 8
-__serpent_dec_blk8_avx:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
-	 * output:
-	 *	RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2: decrypted blocks
-	 */
-
-	vpcmpeqd RNOT, RNOT, RNOT;
-
-	read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
-	read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
-
-						 K2(RA, RB, RC, RD, RE, 32);
-	SP(SI7, RA, RB, RC, RD, RE, 31);	KL2(RB, RD, RA, RE, RC, 31);
-	SP(SI6, RB, RD, RA, RE, RC, 30);	KL2(RA, RC, RE, RB, RD, 30);
-	SP(SI5, RA, RC, RE, RB, RD, 29);	KL2(RC, RD, RA, RE, RB, 29);
-	SP(SI4, RC, RD, RA, RE, RB, 28);	KL2(RC, RA, RB, RE, RD, 28);
-	SP(SI3, RC, RA, RB, RE, RD, 27);	KL2(RB, RC, RD, RE, RA, 27);
-	SP(SI2, RB, RC, RD, RE, RA, 26);	KL2(RC, RA, RE, RD, RB, 26);
-	SP(SI1, RC, RA, RE, RD, RB, 25);	KL2(RB, RA, RE, RD, RC, 25);
-	SP(SI0, RB, RA, RE, RD, RC, 24);	KL2(RE, RC, RA, RB, RD, 24);
-	SP(SI7, RE, RC, RA, RB, RD, 23);	KL2(RC, RB, RE, RD, RA, 23);
-	SP(SI6, RC, RB, RE, RD, RA, 22);	KL2(RE, RA, RD, RC, RB, 22);
-	SP(SI5, RE, RA, RD, RC, RB, 21);	KL2(RA, RB, RE, RD, RC, 21);
-	SP(SI4, RA, RB, RE, RD, RC, 20);	KL2(RA, RE, RC, RD, RB, 20);
-	SP(SI3, RA, RE, RC, RD, RB, 19);	KL2(RC, RA, RB, RD, RE, 19);
-	SP(SI2, RC, RA, RB, RD, RE, 18);	KL2(RA, RE, RD, RB, RC, 18);
-	SP(SI1, RA, RE, RD, RB, RC, 17);	KL2(RC, RE, RD, RB, RA, 17);
-	SP(SI0, RC, RE, RD, RB, RA, 16);	KL2(RD, RA, RE, RC, RB, 16);
-	SP(SI7, RD, RA, RE, RC, RB, 15);	KL2(RA, RC, RD, RB, RE, 15);
-	SP(SI6, RA, RC, RD, RB, RE, 14);	KL2(RD, RE, RB, RA, RC, 14);
-	SP(SI5, RD, RE, RB, RA, RC, 13);	KL2(RE, RC, RD, RB, RA, 13);
-	SP(SI4, RE, RC, RD, RB, RA, 12);	KL2(RE, RD, RA, RB, RC, 12);
-	SP(SI3, RE, RD, RA, RB, RC, 11);	KL2(RA, RE, RC, RB, RD, 11);
-	SP(SI2, RA, RE, RC, RB, RD, 10);	KL2(RE, RD, RB, RC, RA, 10);
-	SP(SI1, RE, RD, RB, RC, RA, 9);		KL2(RA, RD, RB, RC, RE, 9);
-	SP(SI0, RA, RD, RB, RC, RE, 8);		KL2(RB, RE, RD, RA, RC, 8);
-	SP(SI7, RB, RE, RD, RA, RC, 7);		KL2(RE, RA, RB, RC, RD, 7);
-	SP(SI6, RE, RA, RB, RC, RD, 6);		KL2(RB, RD, RC, RE, RA, 6);
-	SP(SI5, RB, RD, RC, RE, RA, 5);		KL2(RD, RA, RB, RC, RE, 5);
-	SP(SI4, RD, RA, RB, RC, RE, 4);		KL2(RD, RB, RE, RC, RA, 4);
-	SP(SI3, RD, RB, RE, RC, RA, 3);		KL2(RE, RD, RA, RC, RB, 3);
-	SP(SI2, RE, RD, RA, RC, RB, 2);		KL2(RD, RB, RC, RA, RE, 2);
-	SP(SI1, RD, RB, RC, RA, RE, 1);		KL2(RE, RB, RC, RA, RD, 1);
-	S(SI0, RE, RB, RC, RA, RD);		 K2(RC, RD, RB, RE, RA, 0);
-
-	write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
-	write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
-
-	ret;
-ENDPROC(__serpent_dec_blk8_avx)
-
-ENTRY(serpent_ecb_enc_8way_avx)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	FRAME_BEGIN
-
-	load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	call __serpent_enc_blk8_avx;
-
-	store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	FRAME_END
-	ret;
-ENDPROC(serpent_ecb_enc_8way_avx)
-
-ENTRY(serpent_ecb_dec_8way_avx)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	FRAME_BEGIN
-
-	load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	call __serpent_dec_blk8_avx;
-
-	store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
-
-	FRAME_END
-	ret;
-ENDPROC(serpent_ecb_dec_8way_avx)
-
-ENTRY(serpent_cbc_dec_8way_avx)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	FRAME_BEGIN
-
-	load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	call __serpent_dec_blk8_avx;
-
-	store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
-
-	FRAME_END
-	ret;
-ENDPROC(serpent_cbc_dec_8way_avx)
-
-ENTRY(serpent_ctr_8way_avx)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: iv (little endian, 128bit)
-	 */
-	FRAME_BEGIN
-
-	load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
-		      RD2, RK0, RK1, RK2);
-
-	call __serpent_enc_blk8_avx;
-
-	store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	FRAME_END
-	ret;
-ENDPROC(serpent_ctr_8way_avx)
-
-ENTRY(serpent_xts_enc_8way_avx)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
-	 */
-	FRAME_BEGIN
-
-	/* regs <= src, dst <= IVs, regs <= regs xor IVs */
-	load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
-		      RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask);
-
-	call __serpent_enc_blk8_avx;
-
-	/* dst <= regs xor IVs(in dst) */
-	store_xts_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	FRAME_END
-	ret;
-ENDPROC(serpent_xts_enc_8way_avx)
-
-ENTRY(serpent_xts_dec_8way_avx)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
-	 */
-	FRAME_BEGIN
-
-	/* regs <= src, dst <= IVs, regs <= regs xor IVs */
-	load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
-		      RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask);
-
-	call __serpent_dec_blk8_avx;
-
-	/* dst <= regs xor IVs(in dst) */
-	store_xts_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
-
-	FRAME_END
-	ret;
-ENDPROC(serpent_xts_dec_8way_avx)
diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S
deleted file mode 100644
index 37bc1d48106c4bfc8da9e1f020c0a6e62077268d..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/serpent-avx2-asm_64.S
+++ /dev/null
@@ -1,813 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * x86_64/AVX2 assembler optimized version of Serpent
- *
- * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * Based on AVX assembler implementation of Serpent by:
- *  Copyright © 2012 Johannes Goetzfried
- *      <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "glue_helper-asm-avx2.S"
-
-.file "serpent-avx2-asm_64.S"
-
-.section	.rodata.cst16.bswap128_mask, "aM", @progbits, 16
-.align 16
-.Lbswap128_mask:
-	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-
-.section	.rodata.cst16.xts_gf128mul_and_shl1_mask_0, "aM", @progbits, 16
-.align 16
-.Lxts_gf128mul_and_shl1_mask_0:
-	.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
-
-.section	.rodata.cst16.xts_gf128mul_and_shl1_mask_1, "aM", @progbits, 16
-.align 16
-.Lxts_gf128mul_and_shl1_mask_1:
-	.byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
-
-.text
-
-#define CTX %rdi
-
-#define RNOT %ymm0
-#define tp  %ymm1
-
-#define RA1 %ymm2
-#define RA2 %ymm3
-#define RB1 %ymm4
-#define RB2 %ymm5
-#define RC1 %ymm6
-#define RC2 %ymm7
-#define RD1 %ymm8
-#define RD2 %ymm9
-#define RE1 %ymm10
-#define RE2 %ymm11
-
-#define RK0 %ymm12
-#define RK1 %ymm13
-#define RK2 %ymm14
-#define RK3 %ymm15
-
-#define RK0x %xmm12
-#define RK1x %xmm13
-#define RK2x %xmm14
-#define RK3x %xmm15
-
-#define S0_1(x0, x1, x2, x3, x4)      \
-	vpor		x0,   x3, tp; \
-	vpxor		x3,   x0, x0; \
-	vpxor		x2,   x3, x4; \
-	vpxor		RNOT, x4, x4; \
-	vpxor		x1,   tp, x3; \
-	vpand		x0,   x1, x1; \
-	vpxor		x4,   x1, x1; \
-	vpxor		x0,   x2, x2;
-#define S0_2(x0, x1, x2, x3, x4)      \
-	vpxor		x3,   x0, x0; \
-	vpor		x0,   x4, x4; \
-	vpxor		x2,   x0, x0; \
-	vpand		x1,   x2, x2; \
-	vpxor		x2,   x3, x3; \
-	vpxor		RNOT, x1, x1; \
-	vpxor		x4,   x2, x2; \
-	vpxor		x2,   x1, x1;
-
-#define S1_1(x0, x1, x2, x3, x4)      \
-	vpxor		x0,   x1, tp; \
-	vpxor		x3,   x0, x0; \
-	vpxor		RNOT, x3, x3; \
-	vpand		tp,   x1, x4; \
-	vpor		tp,   x0, x0; \
-	vpxor		x2,   x3, x3; \
-	vpxor		x3,   x0, x0; \
-	vpxor		x3,   tp, x1;
-#define S1_2(x0, x1, x2, x3, x4)      \
-	vpxor		x4,   x3, x3; \
-	vpor		x4,   x1, x1; \
-	vpxor		x2,   x4, x4; \
-	vpand		x0,   x2, x2; \
-	vpxor		x1,   x2, x2; \
-	vpor		x0,   x1, x1; \
-	vpxor		RNOT, x0, x0; \
-	vpxor		x2,   x0, x0; \
-	vpxor		x1,   x4, x4;
-
-#define S2_1(x0, x1, x2, x3, x4)      \
-	vpxor		RNOT, x3, x3; \
-	vpxor		x0,   x1, x1; \
-	vpand		x2,   x0, tp; \
-	vpxor		x3,   tp, tp; \
-	vpor		x0,   x3, x3; \
-	vpxor		x1,   x2, x2; \
-	vpxor		x1,   x3, x3; \
-	vpand		tp,   x1, x1;
-#define S2_2(x0, x1, x2, x3, x4)      \
-	vpxor		x2,   tp, tp; \
-	vpand		x3,   x2, x2; \
-	vpor		x1,   x3, x3; \
-	vpxor		RNOT, tp, tp; \
-	vpxor		tp,   x3, x3; \
-	vpxor		tp,   x0, x4; \
-	vpxor		x2,   tp, x0; \
-	vpor		x2,   x1, x1;
-
-#define S3_1(x0, x1, x2, x3, x4)      \
-	vpxor		x3,   x1, tp; \
-	vpor		x0,   x3, x3; \
-	vpand		x0,   x1, x4; \
-	vpxor		x2,   x0, x0; \
-	vpxor		tp,   x2, x2; \
-	vpand		x3,   tp, x1; \
-	vpxor		x3,   x2, x2; \
-	vpor		x4,   x0, x0; \
-	vpxor		x3,   x4, x4;
-#define S3_2(x0, x1, x2, x3, x4)      \
-	vpxor		x0,   x1, x1; \
-	vpand		x3,   x0, x0; \
-	vpand		x4,   x3, x3; \
-	vpxor		x2,   x3, x3; \
-	vpor		x1,   x4, x4; \
-	vpand		x1,   x2, x2; \
-	vpxor		x3,   x4, x4; \
-	vpxor		x3,   x0, x0; \
-	vpxor		x2,   x3, x3;
-
-#define S4_1(x0, x1, x2, x3, x4)      \
-	vpand		x0,   x3, tp; \
-	vpxor		x3,   x0, x0; \
-	vpxor		x2,   tp, tp; \
-	vpor		x3,   x2, x2; \
-	vpxor		x1,   x0, x0; \
-	vpxor		tp,   x3, x4; \
-	vpor		x0,   x2, x2; \
-	vpxor		x1,   x2, x2;
-#define S4_2(x0, x1, x2, x3, x4)      \
-	vpand		x0,   x1, x1; \
-	vpxor		x4,   x1, x1; \
-	vpand		x2,   x4, x4; \
-	vpxor		tp,   x2, x2; \
-	vpxor		x0,   x4, x4; \
-	vpor		x1,   tp, x3; \
-	vpxor		RNOT, x1, x1; \
-	vpxor		x0,   x3, x3;
-
-#define S5_1(x0, x1, x2, x3, x4)      \
-	vpor		x0,   x1, tp; \
-	vpxor		tp,   x2, x2; \
-	vpxor		RNOT, x3, x3; \
-	vpxor		x0,   x1, x4; \
-	vpxor		x2,   x0, x0; \
-	vpand		x4,   tp, x1; \
-	vpor		x3,   x4, x4; \
-	vpxor		x0,   x4, x4;
-#define S5_2(x0, x1, x2, x3, x4)      \
-	vpand		x3,   x0, x0; \
-	vpxor		x3,   x1, x1; \
-	vpxor		x2,   x3, x3; \
-	vpxor		x1,   x0, x0; \
-	vpand		x4,   x2, x2; \
-	vpxor		x2,   x1, x1; \
-	vpand		x0,   x2, x2; \
-	vpxor		x2,   x3, x3;
-
-#define S6_1(x0, x1, x2, x3, x4)      \
-	vpxor		x0,   x3, x3; \
-	vpxor		x2,   x1, tp; \
-	vpxor		x0,   x2, x2; \
-	vpand		x3,   x0, x0; \
-	vpor		x3,   tp, tp; \
-	vpxor		RNOT, x1, x4; \
-	vpxor		tp,   x0, x0; \
-	vpxor		x2,   tp, x1;
-#define S6_2(x0, x1, x2, x3, x4)      \
-	vpxor		x4,   x3, x3; \
-	vpxor		x0,   x4, x4; \
-	vpand		x0,   x2, x2; \
-	vpxor		x1,   x4, x4; \
-	vpxor		x3,   x2, x2; \
-	vpand		x1,   x3, x3; \
-	vpxor		x0,   x3, x3; \
-	vpxor		x2,   x1, x1;
-
-#define S7_1(x0, x1, x2, x3, x4)      \
-	vpxor		RNOT, x1, tp; \
-	vpxor		RNOT, x0, x0; \
-	vpand		x2,   tp, x1; \
-	vpxor		x3,   x1, x1; \
-	vpor		tp,   x3, x3; \
-	vpxor		x2,   tp, x4; \
-	vpxor		x3,   x2, x2; \
-	vpxor		x0,   x3, x3; \
-	vpor		x1,   x0, x0;
-#define S7_2(x0, x1, x2, x3, x4)      \
-	vpand		x0,   x2, x2; \
-	vpxor		x4,   x0, x0; \
-	vpxor		x3,   x4, x4; \
-	vpand		x0,   x3, x3; \
-	vpxor		x1,   x4, x4; \
-	vpxor		x4,   x2, x2; \
-	vpxor		x1,   x3, x3; \
-	vpor		x0,   x4, x4; \
-	vpxor		x1,   x4, x4;
-
-#define SI0_1(x0, x1, x2, x3, x4)     \
-	vpxor		x0,   x1, x1; \
-	vpor		x1,   x3, tp; \
-	vpxor		x1,   x3, x4; \
-	vpxor		RNOT, x0, x0; \
-	vpxor		tp,   x2, x2; \
-	vpxor		x0,   tp, x3; \
-	vpand		x1,   x0, x0; \
-	vpxor		x2,   x0, x0;
-#define SI0_2(x0, x1, x2, x3, x4)     \
-	vpand		x3,   x2, x2; \
-	vpxor		x4,   x3, x3; \
-	vpxor		x3,   x2, x2; \
-	vpxor		x3,   x1, x1; \
-	vpand		x0,   x3, x3; \
-	vpxor		x0,   x1, x1; \
-	vpxor		x2,   x0, x0; \
-	vpxor		x3,   x4, x4;
-
-#define SI1_1(x0, x1, x2, x3, x4)     \
-	vpxor		x3,   x1, x1; \
-	vpxor		x2,   x0, tp; \
-	vpxor		RNOT, x2, x2; \
-	vpor		x1,   x0, x4; \
-	vpxor		x3,   x4, x4; \
-	vpand		x1,   x3, x3; \
-	vpxor		x2,   x1, x1; \
-	vpand		x4,   x2, x2;
-#define SI1_2(x0, x1, x2, x3, x4)     \
-	vpxor		x1,   x4, x4; \
-	vpor		x3,   x1, x1; \
-	vpxor		tp,   x3, x3; \
-	vpxor		tp,   x2, x2; \
-	vpor		x4,   tp, x0; \
-	vpxor		x4,   x2, x2; \
-	vpxor		x0,   x1, x1; \
-	vpxor		x1,   x4, x4;
-
-#define SI2_1(x0, x1, x2, x3, x4)     \
-	vpxor		x1,   x2, x2; \
-	vpxor		RNOT, x3, tp; \
-	vpor		x2,   tp, tp; \
-	vpxor		x3,   x2, x2; \
-	vpxor		x0,   x3, x4; \
-	vpxor		x1,   tp, x3; \
-	vpor		x2,   x1, x1; \
-	vpxor		x0,   x2, x2;
-#define SI2_2(x0, x1, x2, x3, x4)     \
-	vpxor		x4,   x1, x1; \
-	vpor		x3,   x4, x4; \
-	vpxor		x3,   x2, x2; \
-	vpxor		x2,   x4, x4; \
-	vpand		x1,   x2, x2; \
-	vpxor		x3,   x2, x2; \
-	vpxor		x4,   x3, x3; \
-	vpxor		x0,   x4, x4;
-
-#define SI3_1(x0, x1, x2, x3, x4)     \
-	vpxor		x1,   x2, x2; \
-	vpand		x2,   x1, tp; \
-	vpxor		x0,   tp, tp; \
-	vpor		x1,   x0, x0; \
-	vpxor		x3,   x1, x4; \
-	vpxor		x3,   x0, x0; \
-	vpor		tp,   x3, x3; \
-	vpxor		x2,   tp, x1;
-#define SI3_2(x0, x1, x2, x3, x4)     \
-	vpxor		x3,   x1, x1; \
-	vpxor		x2,   x0, x0; \
-	vpxor		x3,   x2, x2; \
-	vpand		x1,   x3, x3; \
-	vpxor		x0,   x1, x1; \
-	vpand		x2,   x0, x0; \
-	vpxor		x3,   x4, x4; \
-	vpxor		x0,   x3, x3; \
-	vpxor		x1,   x0, x0;
-
-#define SI4_1(x0, x1, x2, x3, x4)     \
-	vpxor		x3,   x2, x2; \
-	vpand		x1,   x0, tp; \
-	vpxor		x2,   tp, tp; \
-	vpor		x3,   x2, x2; \
-	vpxor		RNOT, x0, x4; \
-	vpxor		tp,   x1, x1; \
-	vpxor		x2,   tp, x0; \
-	vpand		x4,   x2, x2;
-#define SI4_2(x0, x1, x2, x3, x4)     \
-	vpxor		x0,   x2, x2; \
-	vpor		x4,   x0, x0; \
-	vpxor		x3,   x0, x0; \
-	vpand		x2,   x3, x3; \
-	vpxor		x3,   x4, x4; \
-	vpxor		x1,   x3, x3; \
-	vpand		x0,   x1, x1; \
-	vpxor		x1,   x4, x4; \
-	vpxor		x3,   x0, x0;
-
-#define SI5_1(x0, x1, x2, x3, x4)     \
-	vpor		x2,   x1, tp; \
-	vpxor		x1,   x2, x2; \
-	vpxor		x3,   tp, tp; \
-	vpand		x1,   x3, x3; \
-	vpxor		x3,   x2, x2; \
-	vpor		x0,   x3, x3; \
-	vpxor		RNOT, x0, x0; \
-	vpxor		x2,   x3, x3; \
-	vpor		x0,   x2, x2;
-#define SI5_2(x0, x1, x2, x3, x4)     \
-	vpxor		tp,   x1, x4; \
-	vpxor		x4,   x2, x2; \
-	vpand		x0,   x4, x4; \
-	vpxor		tp,   x0, x0; \
-	vpxor		x3,   tp, x1; \
-	vpand		x2,   x0, x0; \
-	vpxor		x3,   x2, x2; \
-	vpxor		x2,   x0, x0; \
-	vpxor		x4,   x2, x2; \
-	vpxor		x3,   x4, x4;
-
-#define SI6_1(x0, x1, x2, x3, x4)     \
-	vpxor		x2,   x0, x0; \
-	vpand		x3,   x0, tp; \
-	vpxor		x3,   x2, x2; \
-	vpxor		x2,   tp, tp; \
-	vpxor		x1,   x3, x3; \
-	vpor		x0,   x2, x2; \
-	vpxor		x3,   x2, x2; \
-	vpand		tp,   x3, x3;
-#define SI6_2(x0, x1, x2, x3, x4)     \
-	vpxor		RNOT, tp, tp; \
-	vpxor		x1,   x3, x3; \
-	vpand		x2,   x1, x1; \
-	vpxor		tp,   x0, x4; \
-	vpxor		x4,   x3, x3; \
-	vpxor		x2,   x4, x4; \
-	vpxor		x1,   tp, x0; \
-	vpxor		x0,   x2, x2;
-
-#define SI7_1(x0, x1, x2, x3, x4)     \
-	vpand		x0,   x3, tp; \
-	vpxor		x2,   x0, x0; \
-	vpor		x3,   x2, x2; \
-	vpxor		x1,   x3, x4; \
-	vpxor		RNOT, x0, x0; \
-	vpor		tp,   x1, x1; \
-	vpxor		x0,   x4, x4; \
-	vpand		x2,   x0, x0; \
-	vpxor		x1,   x0, x0;
-#define SI7_2(x0, x1, x2, x3, x4)     \
-	vpand		x2,   x1, x1; \
-	vpxor		x2,   tp, x3; \
-	vpxor		x3,   x4, x4; \
-	vpand		x3,   x2, x2; \
-	vpor		x0,   x3, x3; \
-	vpxor		x4,   x1, x1; \
-	vpxor		x4,   x3, x3; \
-	vpand		x0,   x4, x4; \
-	vpxor		x2,   x4, x4;
-
-#define get_key(i,j,t) \
-	vpbroadcastd (4*(i)+(j))*4(CTX), t;
-
-#define K2(x0, x1, x2, x3, x4, i) \
-	get_key(i, 0, RK0); \
-	get_key(i, 1, RK1); \
-	get_key(i, 2, RK2); \
-	get_key(i, 3, RK3); \
-	vpxor RK0,	x0 ## 1, x0 ## 1; \
-	vpxor RK1,	x1 ## 1, x1 ## 1; \
-	vpxor RK2,	x2 ## 1, x2 ## 1; \
-	vpxor RK3,	x3 ## 1, x3 ## 1; \
-		vpxor RK0,	x0 ## 2, x0 ## 2; \
-		vpxor RK1,	x1 ## 2, x1 ## 2; \
-		vpxor RK2,	x2 ## 2, x2 ## 2; \
-		vpxor RK3,	x3 ## 2, x3 ## 2;
-
-#define LK2(x0, x1, x2, x3, x4, i) \
-	vpslld $13,		x0 ## 1, x4 ## 1;          \
-	vpsrld $(32 - 13),	x0 ## 1, x0 ## 1;          \
-	vpor			x4 ## 1, x0 ## 1, x0 ## 1; \
-	vpxor			x0 ## 1, x1 ## 1, x1 ## 1; \
-	vpslld $3,		x2 ## 1, x4 ## 1;          \
-	vpsrld $(32 - 3),	x2 ## 1, x2 ## 1;          \
-	vpor			x4 ## 1, x2 ## 1, x2 ## 1; \
-	vpxor			x2 ## 1, x1 ## 1, x1 ## 1; \
-		vpslld $13,		x0 ## 2, x4 ## 2;          \
-		vpsrld $(32 - 13),	x0 ## 2, x0 ## 2;          \
-		vpor			x4 ## 2, x0 ## 2, x0 ## 2; \
-		vpxor			x0 ## 2, x1 ## 2, x1 ## 2; \
-		vpslld $3,		x2 ## 2, x4 ## 2;          \
-		vpsrld $(32 - 3),	x2 ## 2, x2 ## 2;          \
-		vpor			x4 ## 2, x2 ## 2, x2 ## 2; \
-		vpxor			x2 ## 2, x1 ## 2, x1 ## 2; \
-	vpslld $1,		x1 ## 1, x4 ## 1;          \
-	vpsrld $(32 - 1),	x1 ## 1, x1 ## 1;          \
-	vpor			x4 ## 1, x1 ## 1, x1 ## 1; \
-	vpslld $3,		x0 ## 1, x4 ## 1;          \
-	vpxor			x2 ## 1, x3 ## 1, x3 ## 1; \
-	vpxor			x4 ## 1, x3 ## 1, x3 ## 1; \
-	get_key(i, 1, RK1); \
-		vpslld $1,		x1 ## 2, x4 ## 2;          \
-		vpsrld $(32 - 1),	x1 ## 2, x1 ## 2;          \
-		vpor			x4 ## 2, x1 ## 2, x1 ## 2; \
-		vpslld $3,		x0 ## 2, x4 ## 2;          \
-		vpxor			x2 ## 2, x3 ## 2, x3 ## 2; \
-		vpxor			x4 ## 2, x3 ## 2, x3 ## 2; \
-		get_key(i, 3, RK3); \
-	vpslld $7,		x3 ## 1, x4 ## 1;          \
-	vpsrld $(32 - 7),	x3 ## 1, x3 ## 1;          \
-	vpor			x4 ## 1, x3 ## 1, x3 ## 1; \
-	vpslld $7,		x1 ## 1, x4 ## 1;          \
-	vpxor			x1 ## 1, x0 ## 1, x0 ## 1; \
-	vpxor			x3 ## 1, x0 ## 1, x0 ## 1; \
-	vpxor			x3 ## 1, x2 ## 1, x2 ## 1; \
-	vpxor			x4 ## 1, x2 ## 1, x2 ## 1; \
-	get_key(i, 0, RK0); \
-		vpslld $7,		x3 ## 2, x4 ## 2;          \
-		vpsrld $(32 - 7),	x3 ## 2, x3 ## 2;          \
-		vpor			x4 ## 2, x3 ## 2, x3 ## 2; \
-		vpslld $7,		x1 ## 2, x4 ## 2;          \
-		vpxor			x1 ## 2, x0 ## 2, x0 ## 2; \
-		vpxor			x3 ## 2, x0 ## 2, x0 ## 2; \
-		vpxor			x3 ## 2, x2 ## 2, x2 ## 2; \
-		vpxor			x4 ## 2, x2 ## 2, x2 ## 2; \
-		get_key(i, 2, RK2); \
-	vpxor			RK1, x1 ## 1, x1 ## 1;     \
-	vpxor			RK3, x3 ## 1, x3 ## 1;     \
-	vpslld $5,		x0 ## 1, x4 ## 1;          \
-	vpsrld $(32 - 5),	x0 ## 1, x0 ## 1;          \
-	vpor			x4 ## 1, x0 ## 1, x0 ## 1; \
-	vpslld $22,		x2 ## 1, x4 ## 1;          \
-	vpsrld $(32 - 22),	x2 ## 1, x2 ## 1;          \
-	vpor			x4 ## 1, x2 ## 1, x2 ## 1; \
-	vpxor			RK0, x0 ## 1, x0 ## 1;     \
-	vpxor			RK2, x2 ## 1, x2 ## 1;     \
-		vpxor			RK1, x1 ## 2, x1 ## 2;     \
-		vpxor			RK3, x3 ## 2, x3 ## 2;     \
-		vpslld $5,		x0 ## 2, x4 ## 2;          \
-		vpsrld $(32 - 5),	x0 ## 2, x0 ## 2;          \
-		vpor			x4 ## 2, x0 ## 2, x0 ## 2; \
-		vpslld $22,		x2 ## 2, x4 ## 2;          \
-		vpsrld $(32 - 22),	x2 ## 2, x2 ## 2;          \
-		vpor			x4 ## 2, x2 ## 2, x2 ## 2; \
-		vpxor			RK0, x0 ## 2, x0 ## 2;     \
-		vpxor			RK2, x2 ## 2, x2 ## 2;
-
-#define KL2(x0, x1, x2, x3, x4, i) \
-	vpxor			RK0, x0 ## 1, x0 ## 1;     \
-	vpxor			RK2, x2 ## 1, x2 ## 1;     \
-	vpsrld $5,		x0 ## 1, x4 ## 1;          \
-	vpslld $(32 - 5),	x0 ## 1, x0 ## 1;          \
-	vpor			x4 ## 1, x0 ## 1, x0 ## 1; \
-	vpxor			RK3, x3 ## 1, x3 ## 1;     \
-	vpxor			RK1, x1 ## 1, x1 ## 1;     \
-	vpsrld $22,		x2 ## 1, x4 ## 1;          \
-	vpslld $(32 - 22),	x2 ## 1, x2 ## 1;          \
-	vpor			x4 ## 1, x2 ## 1, x2 ## 1; \
-	vpxor			x3 ## 1, x2 ## 1, x2 ## 1; \
-		vpxor			RK0, x0 ## 2, x0 ## 2;     \
-		vpxor			RK2, x2 ## 2, x2 ## 2;     \
-		vpsrld $5,		x0 ## 2, x4 ## 2;          \
-		vpslld $(32 - 5),	x0 ## 2, x0 ## 2;          \
-		vpor			x4 ## 2, x0 ## 2, x0 ## 2; \
-		vpxor			RK3, x3 ## 2, x3 ## 2;     \
-		vpxor			RK1, x1 ## 2, x1 ## 2;     \
-		vpsrld $22,		x2 ## 2, x4 ## 2;          \
-		vpslld $(32 - 22),	x2 ## 2, x2 ## 2;          \
-		vpor			x4 ## 2, x2 ## 2, x2 ## 2; \
-		vpxor			x3 ## 2, x2 ## 2, x2 ## 2; \
-	vpxor			x3 ## 1, x0 ## 1, x0 ## 1; \
-	vpslld $7,		x1 ## 1, x4 ## 1;          \
-	vpxor			x1 ## 1, x0 ## 1, x0 ## 1; \
-	vpxor			x4 ## 1, x2 ## 1, x2 ## 1; \
-	vpsrld $1,		x1 ## 1, x4 ## 1;          \
-	vpslld $(32 - 1),	x1 ## 1, x1 ## 1;          \
-	vpor			x4 ## 1, x1 ## 1, x1 ## 1; \
-		vpxor			x3 ## 2, x0 ## 2, x0 ## 2; \
-		vpslld $7,		x1 ## 2, x4 ## 2;          \
-		vpxor			x1 ## 2, x0 ## 2, x0 ## 2; \
-		vpxor			x4 ## 2, x2 ## 2, x2 ## 2; \
-		vpsrld $1,		x1 ## 2, x4 ## 2;          \
-		vpslld $(32 - 1),	x1 ## 2, x1 ## 2;          \
-		vpor			x4 ## 2, x1 ## 2, x1 ## 2; \
-	vpsrld $7,		x3 ## 1, x4 ## 1;          \
-	vpslld $(32 - 7),	x3 ## 1, x3 ## 1;          \
-	vpor			x4 ## 1, x3 ## 1, x3 ## 1; \
-	vpxor			x0 ## 1, x1 ## 1, x1 ## 1; \
-	vpslld $3,		x0 ## 1, x4 ## 1;          \
-	vpxor			x4 ## 1, x3 ## 1, x3 ## 1; \
-		vpsrld $7,		x3 ## 2, x4 ## 2;          \
-		vpslld $(32 - 7),	x3 ## 2, x3 ## 2;          \
-		vpor			x4 ## 2, x3 ## 2, x3 ## 2; \
-		vpxor			x0 ## 2, x1 ## 2, x1 ## 2; \
-		vpslld $3,		x0 ## 2, x4 ## 2;          \
-		vpxor			x4 ## 2, x3 ## 2, x3 ## 2; \
-	vpsrld $13,		x0 ## 1, x4 ## 1;          \
-	vpslld $(32 - 13),	x0 ## 1, x0 ## 1;          \
-	vpor			x4 ## 1, x0 ## 1, x0 ## 1; \
-	vpxor			x2 ## 1, x1 ## 1, x1 ## 1; \
-	vpxor			x2 ## 1, x3 ## 1, x3 ## 1; \
-	vpsrld $3,		x2 ## 1, x4 ## 1;          \
-	vpslld $(32 - 3),	x2 ## 1, x2 ## 1;          \
-	vpor			x4 ## 1, x2 ## 1, x2 ## 1; \
-		vpsrld $13,		x0 ## 2, x4 ## 2;          \
-		vpslld $(32 - 13),	x0 ## 2, x0 ## 2;          \
-		vpor			x4 ## 2, x0 ## 2, x0 ## 2; \
-		vpxor			x2 ## 2, x1 ## 2, x1 ## 2; \
-		vpxor			x2 ## 2, x3 ## 2, x3 ## 2; \
-		vpsrld $3,		x2 ## 2, x4 ## 2;          \
-		vpslld $(32 - 3),	x2 ## 2, x2 ## 2;          \
-		vpor			x4 ## 2, x2 ## 2, x2 ## 2;
-
-#define S(SBOX, x0, x1, x2, x3, x4) \
-	SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
-	SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
-	SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
-	SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2);
-
-#define SP(SBOX, x0, x1, x2, x3, x4, i) \
-	get_key(i, 0, RK0); \
-	SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
-	get_key(i, 2, RK2); \
-	SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
-	get_key(i, 3, RK3); \
-	SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
-	get_key(i, 1, RK1); \
-	SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
-
-#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	vpunpckldq		x1, x0, t0; \
-	vpunpckhdq		x1, x0, t2; \
-	vpunpckldq		x3, x2, t1; \
-	vpunpckhdq		x3, x2, x3; \
-	\
-	vpunpcklqdq		t1, t0, x0; \
-	vpunpckhqdq		t1, t0, x1; \
-	vpunpcklqdq		x3, t2, x2; \
-	vpunpckhqdq		x3, t2, x3;
-
-#define read_blocks(x0, x1, x2, x3, t0, t1, t2) \
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
-
-#define write_blocks(x0, x1, x2, x3, t0, t1, t2) \
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
-
-.align 8
-__serpent_enc_blk16:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: plaintext
-	 * output:
-	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext
-	 */
-
-	vpcmpeqd RNOT, RNOT, RNOT;
-
-	read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
-	read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
-
-						 K2(RA, RB, RC, RD, RE, 0);
-	S(S0, RA, RB, RC, RD, RE);		LK2(RC, RB, RD, RA, RE, 1);
-	S(S1, RC, RB, RD, RA, RE);		LK2(RE, RD, RA, RC, RB, 2);
-	S(S2, RE, RD, RA, RC, RB);		LK2(RB, RD, RE, RC, RA, 3);
-	S(S3, RB, RD, RE, RC, RA);		LK2(RC, RA, RD, RB, RE, 4);
-	S(S4, RC, RA, RD, RB, RE);		LK2(RA, RD, RB, RE, RC, 5);
-	S(S5, RA, RD, RB, RE, RC);		LK2(RC, RA, RD, RE, RB, 6);
-	S(S6, RC, RA, RD, RE, RB);		LK2(RD, RB, RA, RE, RC, 7);
-	S(S7, RD, RB, RA, RE, RC);		LK2(RC, RA, RE, RD, RB, 8);
-	S(S0, RC, RA, RE, RD, RB);		LK2(RE, RA, RD, RC, RB, 9);
-	S(S1, RE, RA, RD, RC, RB);		LK2(RB, RD, RC, RE, RA, 10);
-	S(S2, RB, RD, RC, RE, RA);		LK2(RA, RD, RB, RE, RC, 11);
-	S(S3, RA, RD, RB, RE, RC);		LK2(RE, RC, RD, RA, RB, 12);
-	S(S4, RE, RC, RD, RA, RB);		LK2(RC, RD, RA, RB, RE, 13);
-	S(S5, RC, RD, RA, RB, RE);		LK2(RE, RC, RD, RB, RA, 14);
-	S(S6, RE, RC, RD, RB, RA);		LK2(RD, RA, RC, RB, RE, 15);
-	S(S7, RD, RA, RC, RB, RE);		LK2(RE, RC, RB, RD, RA, 16);
-	S(S0, RE, RC, RB, RD, RA);		LK2(RB, RC, RD, RE, RA, 17);
-	S(S1, RB, RC, RD, RE, RA);		LK2(RA, RD, RE, RB, RC, 18);
-	S(S2, RA, RD, RE, RB, RC);		LK2(RC, RD, RA, RB, RE, 19);
-	S(S3, RC, RD, RA, RB, RE);		LK2(RB, RE, RD, RC, RA, 20);
-	S(S4, RB, RE, RD, RC, RA);		LK2(RE, RD, RC, RA, RB, 21);
-	S(S5, RE, RD, RC, RA, RB);		LK2(RB, RE, RD, RA, RC, 22);
-	S(S6, RB, RE, RD, RA, RC);		LK2(RD, RC, RE, RA, RB, 23);
-	S(S7, RD, RC, RE, RA, RB);		LK2(RB, RE, RA, RD, RC, 24);
-	S(S0, RB, RE, RA, RD, RC);		LK2(RA, RE, RD, RB, RC, 25);
-	S(S1, RA, RE, RD, RB, RC);		LK2(RC, RD, RB, RA, RE, 26);
-	S(S2, RC, RD, RB, RA, RE);		LK2(RE, RD, RC, RA, RB, 27);
-	S(S3, RE, RD, RC, RA, RB);		LK2(RA, RB, RD, RE, RC, 28);
-	S(S4, RA, RB, RD, RE, RC);		LK2(RB, RD, RE, RC, RA, 29);
-	S(S5, RB, RD, RE, RC, RA);		LK2(RA, RB, RD, RC, RE, 30);
-	S(S6, RA, RB, RD, RC, RE);		LK2(RD, RE, RB, RC, RA, 31);
-	S(S7, RD, RE, RB, RC, RA);		 K2(RA, RB, RC, RD, RE, 32);
-
-	write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
-	write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
-
-	ret;
-ENDPROC(__serpent_enc_blk16)
-
-.align 8
-__serpent_dec_blk16:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext
-	 * output:
-	 *	RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2: plaintext
-	 */
-
-	vpcmpeqd RNOT, RNOT, RNOT;
-
-	read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
-	read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
-
-						 K2(RA, RB, RC, RD, RE, 32);
-	SP(SI7, RA, RB, RC, RD, RE, 31);	KL2(RB, RD, RA, RE, RC, 31);
-	SP(SI6, RB, RD, RA, RE, RC, 30);	KL2(RA, RC, RE, RB, RD, 30);
-	SP(SI5, RA, RC, RE, RB, RD, 29);	KL2(RC, RD, RA, RE, RB, 29);
-	SP(SI4, RC, RD, RA, RE, RB, 28);	KL2(RC, RA, RB, RE, RD, 28);
-	SP(SI3, RC, RA, RB, RE, RD, 27);	KL2(RB, RC, RD, RE, RA, 27);
-	SP(SI2, RB, RC, RD, RE, RA, 26);	KL2(RC, RA, RE, RD, RB, 26);
-	SP(SI1, RC, RA, RE, RD, RB, 25);	KL2(RB, RA, RE, RD, RC, 25);
-	SP(SI0, RB, RA, RE, RD, RC, 24);	KL2(RE, RC, RA, RB, RD, 24);
-	SP(SI7, RE, RC, RA, RB, RD, 23);	KL2(RC, RB, RE, RD, RA, 23);
-	SP(SI6, RC, RB, RE, RD, RA, 22);	KL2(RE, RA, RD, RC, RB, 22);
-	SP(SI5, RE, RA, RD, RC, RB, 21);	KL2(RA, RB, RE, RD, RC, 21);
-	SP(SI4, RA, RB, RE, RD, RC, 20);	KL2(RA, RE, RC, RD, RB, 20);
-	SP(SI3, RA, RE, RC, RD, RB, 19);	KL2(RC, RA, RB, RD, RE, 19);
-	SP(SI2, RC, RA, RB, RD, RE, 18);	KL2(RA, RE, RD, RB, RC, 18);
-	SP(SI1, RA, RE, RD, RB, RC, 17);	KL2(RC, RE, RD, RB, RA, 17);
-	SP(SI0, RC, RE, RD, RB, RA, 16);	KL2(RD, RA, RE, RC, RB, 16);
-	SP(SI7, RD, RA, RE, RC, RB, 15);	KL2(RA, RC, RD, RB, RE, 15);
-	SP(SI6, RA, RC, RD, RB, RE, 14);	KL2(RD, RE, RB, RA, RC, 14);
-	SP(SI5, RD, RE, RB, RA, RC, 13);	KL2(RE, RC, RD, RB, RA, 13);
-	SP(SI4, RE, RC, RD, RB, RA, 12);	KL2(RE, RD, RA, RB, RC, 12);
-	SP(SI3, RE, RD, RA, RB, RC, 11);	KL2(RA, RE, RC, RB, RD, 11);
-	SP(SI2, RA, RE, RC, RB, RD, 10);	KL2(RE, RD, RB, RC, RA, 10);
-	SP(SI1, RE, RD, RB, RC, RA, 9);		KL2(RA, RD, RB, RC, RE, 9);
-	SP(SI0, RA, RD, RB, RC, RE, 8);		KL2(RB, RE, RD, RA, RC, 8);
-	SP(SI7, RB, RE, RD, RA, RC, 7);		KL2(RE, RA, RB, RC, RD, 7);
-	SP(SI6, RE, RA, RB, RC, RD, 6);		KL2(RB, RD, RC, RE, RA, 6);
-	SP(SI5, RB, RD, RC, RE, RA, 5);		KL2(RD, RA, RB, RC, RE, 5);
-	SP(SI4, RD, RA, RB, RC, RE, 4);		KL2(RD, RB, RE, RC, RA, 4);
-	SP(SI3, RD, RB, RE, RC, RA, 3);		KL2(RE, RD, RA, RC, RB, 3);
-	SP(SI2, RE, RD, RA, RC, RB, 2);		KL2(RD, RB, RC, RA, RE, 2);
-	SP(SI1, RD, RB, RC, RA, RE, 1);		KL2(RE, RB, RC, RA, RD, 1);
-	S(SI0, RE, RB, RC, RA, RD);		 K2(RC, RD, RB, RE, RA, 0);
-
-	write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
-	write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
-
-	ret;
-ENDPROC(__serpent_dec_blk16)
-
-ENTRY(serpent_ecb_enc_16way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	FRAME_BEGIN
-
-	vzeroupper;
-
-	load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	call __serpent_enc_blk16;
-
-	store_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	vzeroupper;
-
-	FRAME_END
-	ret;
-ENDPROC(serpent_ecb_enc_16way)
-
-ENTRY(serpent_ecb_dec_16way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	FRAME_BEGIN
-
-	vzeroupper;
-
-	load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	call __serpent_dec_blk16;
-
-	store_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
-
-	vzeroupper;
-
-	FRAME_END
-	ret;
-ENDPROC(serpent_ecb_dec_16way)
-
-ENTRY(serpent_cbc_dec_16way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	FRAME_BEGIN
-
-	vzeroupper;
-
-	load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	call __serpent_dec_blk16;
-
-	store_cbc_16way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2,
-			RK0);
-
-	vzeroupper;
-
-	FRAME_END
-	ret;
-ENDPROC(serpent_cbc_dec_16way)
-
-ENTRY(serpent_ctr_16way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst (16 blocks)
-	 *	%rdx: src (16 blocks)
-	 *	%rcx: iv (little endian, 128bit)
-	 */
-	FRAME_BEGIN
-
-	vzeroupper;
-
-	load_ctr_16way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
-		       RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT,
-		       tp);
-
-	call __serpent_enc_blk16;
-
-	store_ctr_16way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	vzeroupper;
-
-	FRAME_END
-	ret;
-ENDPROC(serpent_ctr_16way)
-
-ENTRY(serpent_xts_enc_16way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst (16 blocks)
-	 *	%rdx: src (16 blocks)
-	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
-	 */
-	FRAME_BEGIN
-
-	vzeroupper;
-
-	load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
-		       RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT,
-		       .Lxts_gf128mul_and_shl1_mask_0,
-		       .Lxts_gf128mul_and_shl1_mask_1);
-
-	call __serpent_enc_blk16;
-
-	store_xts_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	vzeroupper;
-
-	FRAME_END
-	ret;
-ENDPROC(serpent_xts_enc_16way)
-
-ENTRY(serpent_xts_dec_16way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst (16 blocks)
-	 *	%rdx: src (16 blocks)
-	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
-	 */
-	FRAME_BEGIN
-
-	vzeroupper;
-
-	load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
-		       RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT,
-		       .Lxts_gf128mul_and_shl1_mask_0,
-		       .Lxts_gf128mul_and_shl1_mask_1);
-
-	call __serpent_dec_blk16;
-
-	store_xts_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
-
-	vzeroupper;
-
-	FRAME_END
-	ret;
-ENDPROC(serpent_xts_dec_16way)
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
deleted file mode 100644
index e5c4a4690ca9d7391a8b940d34d511ffe525f84f..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+++ /dev/null
@@ -1,616 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Serpent Cipher 4-way parallel algorithm (i586/SSE2)
- *
- * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * Based on crypto/serpent.c by
- *  Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no>
- *                2003 Herbert Valerio Riedel <hvr@gnu.org>
- */
-
-#include <linux/linkage.h>
-
-.file "serpent-sse2-i586-asm_32.S"
-.text
-
-#define arg_ctx 4
-#define arg_dst 8
-#define arg_src 12
-#define arg_xor 16
-
-/**********************************************************************
-  4-way SSE2 serpent
- **********************************************************************/
-#define CTX %edx
-
-#define RA %xmm0
-#define RB %xmm1
-#define RC %xmm2
-#define RD %xmm3
-#define RE %xmm4
-
-#define RT0 %xmm5
-#define RT1 %xmm6
-
-#define RNOT %xmm7
-
-#define get_key(i, j, t) \
-	movd (4*(i)+(j))*4(CTX), t; \
-	pshufd $0, t, t;
-
-#define K(x0, x1, x2, x3, x4, i) \
-	get_key(i, 0, x4); \
-	get_key(i, 1, RT0); \
-	get_key(i, 2, RT1); \
-	pxor x4,		x0; \
-	pxor RT0,		x1; \
-	pxor RT1,		x2; \
-	get_key(i, 3, x4); \
-	pxor x4,		x3;
-
-#define LK(x0, x1, x2, x3, x4, i) \
-	movdqa x0,		x4; \
-	pslld $13,		x0; \
-	psrld $(32 - 13),	x4; \
-	por x4,			x0; \
-	pxor x0,		x1; \
-	movdqa x2,		x4; \
-	pslld $3,		x2; \
-	psrld $(32 - 3),	x4; \
-	por x4,			x2; \
-	pxor x2,		x1; \
-	movdqa x1,		x4; \
-	pslld $1,		x1; \
-	psrld $(32 - 1),	x4; \
-	por x4,			x1; \
-	movdqa x0,		x4; \
-	pslld $3,		x4; \
-	pxor x2,		x3; \
-	pxor x4,		x3; \
-	movdqa x3,		x4; \
-	pslld $7,		x3; \
-	psrld $(32 - 7),	x4; \
-	por x4,			x3; \
-	movdqa x1,		x4; \
-	pslld $7,		x4; \
-	pxor x1,		x0; \
-	pxor x3,		x0; \
-	pxor x3,		x2; \
-	pxor x4,		x2; \
-	movdqa x0,		x4; \
-	get_key(i, 1, RT0); \
-	pxor RT0,		x1; \
-	get_key(i, 3, RT0); \
-	pxor RT0,		x3; \
-	pslld $5,		x0; \
-	psrld $(32 - 5),	x4; \
-	por x4,			x0; \
-	movdqa x2,		x4; \
-	pslld $22,		x2; \
-	psrld $(32 - 22),	x4; \
-	por x4,			x2; \
-	get_key(i, 0, RT0); \
-	pxor RT0,		x0; \
-	get_key(i, 2, RT0); \
-	pxor RT0,		x2;
-
-#define KL(x0, x1, x2, x3, x4, i) \
-	K(x0, x1, x2, x3, x4, i); \
-	movdqa x0,		x4; \
-	psrld $5,		x0; \
-	pslld $(32 - 5),	x4; \
-	por x4,			x0; \
-	movdqa x2,		x4; \
-	psrld $22,		x2; \
-	pslld $(32 - 22),	x4; \
-	por x4,			x2; \
-	pxor x3,		x2; \
-	pxor x3,		x0; \
-	movdqa x1,		x4; \
-	pslld $7,		x4; \
-	pxor x1,		x0; \
-	pxor x4,		x2; \
-	movdqa x1,		x4; \
-	psrld $1,		x1; \
-	pslld $(32 - 1),	x4; \
-	por x4,			x1; \
-	movdqa x3,		x4; \
-	psrld $7,		x3; \
-	pslld $(32 - 7),	x4; \
-	por x4,			x3; \
-	pxor x0,		x1; \
-	movdqa x0,		x4; \
-	pslld $3,		x4; \
-	pxor x4,		x3; \
-	movdqa x0,		x4; \
-	psrld $13,		x0; \
-	pslld $(32 - 13),	x4; \
-	por x4,			x0; \
-	pxor x2,		x1; \
-	pxor x2,		x3; \
-	movdqa x2,		x4; \
-	psrld $3,		x2; \
-	pslld $(32 - 3),	x4; \
-	por x4,			x2;
-
-#define S0(x0, x1, x2, x3, x4) \
-	movdqa x3,		x4; \
-	por x0,			x3; \
-	pxor x4,		x0; \
-	pxor x2,		x4; \
-	pxor RNOT,		x4; \
-	pxor x1,		x3; \
-	pand x0,		x1; \
-	pxor x4,		x1; \
-	pxor x0,		x2; \
-	pxor x3,		x0; \
-	por x0,			x4; \
-	pxor x2,		x0; \
-	pand x1,		x2; \
-	pxor x2,		x3; \
-	pxor RNOT,		x1; \
-	pxor x4,		x2; \
-	pxor x2,		x1;
-
-#define S1(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	pxor x0,		x1; \
-	pxor x3,		x0; \
-	pxor RNOT,		x3; \
-	pand x1,		x4; \
-	por x1,			x0; \
-	pxor x2,		x3; \
-	pxor x3,		x0; \
-	pxor x3,		x1; \
-	pxor x4,		x3; \
-	por x4,			x1; \
-	pxor x2,		x4; \
-	pand x0,		x2; \
-	pxor x1,		x2; \
-	por x0,			x1; \
-	pxor RNOT,		x0; \
-	pxor x2,		x0; \
-	pxor x1,		x4;
-
-#define S2(x0, x1, x2, x3, x4) \
-	pxor RNOT,		x3; \
-	pxor x0,		x1; \
-	movdqa x0,		x4; \
-	pand x2,		x0; \
-	pxor x3,		x0; \
-	por x4,			x3; \
-	pxor x1,		x2; \
-	pxor x1,		x3; \
-	pand x0,		x1; \
-	pxor x2,		x0; \
-	pand x3,		x2; \
-	por x1,			x3; \
-	pxor RNOT,		x0; \
-	pxor x0,		x3; \
-	pxor x0,		x4; \
-	pxor x2,		x0; \
-	por x2,			x1;
-
-#define S3(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	pxor x3,		x1; \
-	por x0,			x3; \
-	pand x0,		x4; \
-	pxor x2,		x0; \
-	pxor x1,		x2; \
-	pand x3,		x1; \
-	pxor x3,		x2; \
-	por x4,			x0; \
-	pxor x3,		x4; \
-	pxor x0,		x1; \
-	pand x3,		x0; \
-	pand x4,		x3; \
-	pxor x2,		x3; \
-	por x1,			x4; \
-	pand x1,		x2; \
-	pxor x3,		x4; \
-	pxor x3,		x0; \
-	pxor x2,		x3;
-
-#define S4(x0, x1, x2, x3, x4) \
-	movdqa x3,		x4; \
-	pand x0,		x3; \
-	pxor x4,		x0; \
-	pxor x2,		x3; \
-	por x4,			x2; \
-	pxor x1,		x0; \
-	pxor x3,		x4; \
-	por x0,			x2; \
-	pxor x1,		x2; \
-	pand x0,		x1; \
-	pxor x4,		x1; \
-	pand x2,		x4; \
-	pxor x3,		x2; \
-	pxor x0,		x4; \
-	por x1,			x3; \
-	pxor RNOT,		x1; \
-	pxor x0,		x3;
-
-#define S5(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	por x0,			x1; \
-	pxor x1,		x2; \
-	pxor RNOT,		x3; \
-	pxor x0,		x4; \
-	pxor x2,		x0; \
-	pand x4,		x1; \
-	por x3,			x4; \
-	pxor x0,		x4; \
-	pand x3,		x0; \
-	pxor x3,		x1; \
-	pxor x2,		x3; \
-	pxor x1,		x0; \
-	pand x4,		x2; \
-	pxor x2,		x1; \
-	pand x0,		x2; \
-	pxor x2,		x3;
-
-#define S6(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	pxor x0,		x3; \
-	pxor x2,		x1; \
-	pxor x0,		x2; \
-	pand x3,		x0; \
-	por x3,			x1; \
-	pxor RNOT,		x4; \
-	pxor x1,		x0; \
-	pxor x2,		x1; \
-	pxor x4,		x3; \
-	pxor x0,		x4; \
-	pand x0,		x2; \
-	pxor x1,		x4; \
-	pxor x3,		x2; \
-	pand x1,		x3; \
-	pxor x0,		x3; \
-	pxor x2,		x1;
-
-#define S7(x0, x1, x2, x3, x4) \
-	pxor RNOT,		x1; \
-	movdqa x1,		x4; \
-	pxor RNOT,		x0; \
-	pand x2,		x1; \
-	pxor x3,		x1; \
-	por x4,			x3; \
-	pxor x2,		x4; \
-	pxor x3,		x2; \
-	pxor x0,		x3; \
-	por x1,			x0; \
-	pand x0,		x2; \
-	pxor x4,		x0; \
-	pxor x3,		x4; \
-	pand x0,		x3; \
-	pxor x1,		x4; \
-	pxor x4,		x2; \
-	pxor x1,		x3; \
-	por x0,			x4; \
-	pxor x1,		x4;
-
-#define SI0(x0, x1, x2, x3, x4) \
-	movdqa x3,		x4; \
-	pxor x0,		x1; \
-	por x1,			x3; \
-	pxor x1,		x4; \
-	pxor RNOT,		x0; \
-	pxor x3,		x2; \
-	pxor x0,		x3; \
-	pand x1,		x0; \
-	pxor x2,		x0; \
-	pand x3,		x2; \
-	pxor x4,		x3; \
-	pxor x3,		x2; \
-	pxor x3,		x1; \
-	pand x0,		x3; \
-	pxor x0,		x1; \
-	pxor x2,		x0; \
-	pxor x3,		x4;
-
-#define SI1(x0, x1, x2, x3, x4) \
-	pxor x3,		x1; \
-	movdqa x0,		x4; \
-	pxor x2,		x0; \
-	pxor RNOT,		x2; \
-	por x1,			x4; \
-	pxor x3,		x4; \
-	pand x1,		x3; \
-	pxor x2,		x1; \
-	pand x4,		x2; \
-	pxor x1,		x4; \
-	por x3,			x1; \
-	pxor x0,		x3; \
-	pxor x0,		x2; \
-	por x4,			x0; \
-	pxor x4,		x2; \
-	pxor x0,		x1; \
-	pxor x1,		x4;
-
-#define SI2(x0, x1, x2, x3, x4) \
-	pxor x1,		x2; \
-	movdqa x3,		x4; \
-	pxor RNOT,		x3; \
-	por x2,			x3; \
-	pxor x4,		x2; \
-	pxor x0,		x4; \
-	pxor x1,		x3; \
-	por x2,			x1; \
-	pxor x0,		x2; \
-	pxor x4,		x1; \
-	por x3,			x4; \
-	pxor x3,		x2; \
-	pxor x2,		x4; \
-	pand x1,		x2; \
-	pxor x3,		x2; \
-	pxor x4,		x3; \
-	pxor x0,		x4;
-
-#define SI3(x0, x1, x2, x3, x4) \
-	pxor x1,		x2; \
-	movdqa x1,		x4; \
-	pand x2,		x1; \
-	pxor x0,		x1; \
-	por x4,			x0; \
-	pxor x3,		x4; \
-	pxor x3,		x0; \
-	por x1,			x3; \
-	pxor x2,		x1; \
-	pxor x3,		x1; \
-	pxor x2,		x0; \
-	pxor x3,		x2; \
-	pand x1,		x3; \
-	pxor x0,		x1; \
-	pand x2,		x0; \
-	pxor x3,		x4; \
-	pxor x0,		x3; \
-	pxor x1,		x0;
-
-#define SI4(x0, x1, x2, x3, x4) \
-	pxor x3,		x2; \
-	movdqa x0,		x4; \
-	pand x1,		x0; \
-	pxor x2,		x0; \
-	por x3,			x2; \
-	pxor RNOT,		x4; \
-	pxor x0,		x1; \
-	pxor x2,		x0; \
-	pand x4,		x2; \
-	pxor x0,		x2; \
-	por x4,			x0; \
-	pxor x3,		x0; \
-	pand x2,		x3; \
-	pxor x3,		x4; \
-	pxor x1,		x3; \
-	pand x0,		x1; \
-	pxor x1,		x4; \
-	pxor x3,		x0;
-
-#define SI5(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	por x2,			x1; \
-	pxor x4,		x2; \
-	pxor x3,		x1; \
-	pand x4,		x3; \
-	pxor x3,		x2; \
-	por x0,			x3; \
-	pxor RNOT,		x0; \
-	pxor x2,		x3; \
-	por x0,			x2; \
-	pxor x1,		x4; \
-	pxor x4,		x2; \
-	pand x0,		x4; \
-	pxor x1,		x0; \
-	pxor x3,		x1; \
-	pand x2,		x0; \
-	pxor x3,		x2; \
-	pxor x2,		x0; \
-	pxor x4,		x2; \
-	pxor x3,		x4;
-
-#define SI6(x0, x1, x2, x3, x4) \
-	pxor x2,		x0; \
-	movdqa x0,		x4; \
-	pand x3,		x0; \
-	pxor x3,		x2; \
-	pxor x2,		x0; \
-	pxor x1,		x3; \
-	por x4,			x2; \
-	pxor x3,		x2; \
-	pand x0,		x3; \
-	pxor RNOT,		x0; \
-	pxor x1,		x3; \
-	pand x2,		x1; \
-	pxor x0,		x4; \
-	pxor x4,		x3; \
-	pxor x2,		x4; \
-	pxor x1,		x0; \
-	pxor x0,		x2;
-
-#define SI7(x0, x1, x2, x3, x4) \
-	movdqa x3,		x4; \
-	pand x0,		x3; \
-	pxor x2,		x0; \
-	por x4,			x2; \
-	pxor x1,		x4; \
-	pxor RNOT,		x0; \
-	por x3,			x1; \
-	pxor x0,		x4; \
-	pand x2,		x0; \
-	pxor x1,		x0; \
-	pand x2,		x1; \
-	pxor x2,		x3; \
-	pxor x3,		x4; \
-	pand x3,		x2; \
-	por x0,			x3; \
-	pxor x4,		x1; \
-	pxor x4,		x3; \
-	pand x0,		x4; \
-	pxor x2,		x4;
-
-#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	movdqa x0,		t2; \
-	punpckldq x1,		x0; \
-	punpckhdq x1,		t2; \
-	movdqa x2,		t1; \
-	punpckhdq x3,		x2; \
-	punpckldq x3,		t1; \
-	movdqa x0,		x1; \
-	punpcklqdq t1,		x0; \
-	punpckhqdq t1,		x1; \
-	movdqa t2,		x3; \
-	punpcklqdq x2,		t2; \
-	punpckhqdq x2,		x3; \
-	movdqa t2,		x2;
-
-#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
-	movdqu (0*4*4)(in),	x0; \
-	movdqu (1*4*4)(in),	x1; \
-	movdqu (2*4*4)(in),	x2; \
-	movdqu (3*4*4)(in),	x3; \
-	\
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
-
-#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	\
-	movdqu x0, (0*4*4)(out); \
-	movdqu x1, (1*4*4)(out); \
-	movdqu x2, (2*4*4)(out); \
-	movdqu x3, (3*4*4)(out);
-
-#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	\
-	movdqu (0*4*4)(out),	t0; \
-	pxor t0,		x0; \
-	movdqu x0,		(0*4*4)(out); \
-	movdqu (1*4*4)(out),	t0; \
-	pxor t0,		x1; \
-	movdqu x1,		(1*4*4)(out); \
-	movdqu (2*4*4)(out),	t0; \
-	pxor t0,		x2; \
-	movdqu x2,		(2*4*4)(out); \
-	movdqu (3*4*4)(out),	t0; \
-	pxor t0,		x3; \
-	movdqu x3,		(3*4*4)(out);
-
-ENTRY(__serpent_enc_blk_4way)
-	/* input:
-	 *	arg_ctx(%esp): ctx, CTX
-	 *	arg_dst(%esp): dst
-	 *	arg_src(%esp): src
-	 *	arg_xor(%esp): bool, if true: xor output
-	 */
-
-	pcmpeqd RNOT, RNOT;
-
-	movl arg_ctx(%esp), CTX;
-
-	movl arg_src(%esp), %eax;
-	read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
-
-					 K(RA, RB, RC, RD, RE, 0);
-	S0(RA, RB, RC, RD, RE);		LK(RC, RB, RD, RA, RE, 1);
-	S1(RC, RB, RD, RA, RE);		LK(RE, RD, RA, RC, RB, 2);
-	S2(RE, RD, RA, RC, RB);		LK(RB, RD, RE, RC, RA, 3);
-	S3(RB, RD, RE, RC, RA);		LK(RC, RA, RD, RB, RE, 4);
-	S4(RC, RA, RD, RB, RE);		LK(RA, RD, RB, RE, RC, 5);
-	S5(RA, RD, RB, RE, RC);		LK(RC, RA, RD, RE, RB, 6);
-	S6(RC, RA, RD, RE, RB);		LK(RD, RB, RA, RE, RC, 7);
-	S7(RD, RB, RA, RE, RC);		LK(RC, RA, RE, RD, RB, 8);
-	S0(RC, RA, RE, RD, RB);		LK(RE, RA, RD, RC, RB, 9);
-	S1(RE, RA, RD, RC, RB);		LK(RB, RD, RC, RE, RA, 10);
-	S2(RB, RD, RC, RE, RA);		LK(RA, RD, RB, RE, RC, 11);
-	S3(RA, RD, RB, RE, RC);		LK(RE, RC, RD, RA, RB, 12);
-	S4(RE, RC, RD, RA, RB);		LK(RC, RD, RA, RB, RE, 13);
-	S5(RC, RD, RA, RB, RE);		LK(RE, RC, RD, RB, RA, 14);
-	S6(RE, RC, RD, RB, RA);		LK(RD, RA, RC, RB, RE, 15);
-	S7(RD, RA, RC, RB, RE);		LK(RE, RC, RB, RD, RA, 16);
-	S0(RE, RC, RB, RD, RA);		LK(RB, RC, RD, RE, RA, 17);
-	S1(RB, RC, RD, RE, RA);		LK(RA, RD, RE, RB, RC, 18);
-	S2(RA, RD, RE, RB, RC);		LK(RC, RD, RA, RB, RE, 19);
-	S3(RC, RD, RA, RB, RE);		LK(RB, RE, RD, RC, RA, 20);
-	S4(RB, RE, RD, RC, RA);		LK(RE, RD, RC, RA, RB, 21);
-	S5(RE, RD, RC, RA, RB);		LK(RB, RE, RD, RA, RC, 22);
-	S6(RB, RE, RD, RA, RC);		LK(RD, RC, RE, RA, RB, 23);
-	S7(RD, RC, RE, RA, RB);		LK(RB, RE, RA, RD, RC, 24);
-	S0(RB, RE, RA, RD, RC);		LK(RA, RE, RD, RB, RC, 25);
-	S1(RA, RE, RD, RB, RC);		LK(RC, RD, RB, RA, RE, 26);
-	S2(RC, RD, RB, RA, RE);		LK(RE, RD, RC, RA, RB, 27);
-	S3(RE, RD, RC, RA, RB);		LK(RA, RB, RD, RE, RC, 28);
-	S4(RA, RB, RD, RE, RC);		LK(RB, RD, RE, RC, RA, 29);
-	S5(RB, RD, RE, RC, RA);		LK(RA, RB, RD, RC, RE, 30);
-	S6(RA, RB, RD, RC, RE);		LK(RD, RE, RB, RC, RA, 31);
-	S7(RD, RE, RB, RC, RA);		 K(RA, RB, RC, RD, RE, 32);
-
-	movl arg_dst(%esp), %eax;
-
-	cmpb $0, arg_xor(%esp);
-	jnz .L__enc_xor4;
-
-	write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
-
-	ret;
-
-.L__enc_xor4:
-	xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
-
-	ret;
-ENDPROC(__serpent_enc_blk_4way)
-
-ENTRY(serpent_dec_blk_4way)
-	/* input:
-	 *	arg_ctx(%esp): ctx, CTX
-	 *	arg_dst(%esp): dst
-	 *	arg_src(%esp): src
-	 */
-
-	pcmpeqd RNOT, RNOT;
-
-	movl arg_ctx(%esp), CTX;
-
-	movl arg_src(%esp), %eax;
-	read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
-
-					 K(RA, RB, RC, RD, RE, 32);
-	SI7(RA, RB, RC, RD, RE);	KL(RB, RD, RA, RE, RC, 31);
-	SI6(RB, RD, RA, RE, RC);	KL(RA, RC, RE, RB, RD, 30);
-	SI5(RA, RC, RE, RB, RD);	KL(RC, RD, RA, RE, RB, 29);
-	SI4(RC, RD, RA, RE, RB);	KL(RC, RA, RB, RE, RD, 28);
-	SI3(RC, RA, RB, RE, RD);	KL(RB, RC, RD, RE, RA, 27);
-	SI2(RB, RC, RD, RE, RA);	KL(RC, RA, RE, RD, RB, 26);
-	SI1(RC, RA, RE, RD, RB);	KL(RB, RA, RE, RD, RC, 25);
-	SI0(RB, RA, RE, RD, RC);	KL(RE, RC, RA, RB, RD, 24);
-	SI7(RE, RC, RA, RB, RD);	KL(RC, RB, RE, RD, RA, 23);
-	SI6(RC, RB, RE, RD, RA);	KL(RE, RA, RD, RC, RB, 22);
-	SI5(RE, RA, RD, RC, RB);	KL(RA, RB, RE, RD, RC, 21);
-	SI4(RA, RB, RE, RD, RC);	KL(RA, RE, RC, RD, RB, 20);
-	SI3(RA, RE, RC, RD, RB);	KL(RC, RA, RB, RD, RE, 19);
-	SI2(RC, RA, RB, RD, RE);	KL(RA, RE, RD, RB, RC, 18);
-	SI1(RA, RE, RD, RB, RC);	KL(RC, RE, RD, RB, RA, 17);
-	SI0(RC, RE, RD, RB, RA);	KL(RD, RA, RE, RC, RB, 16);
-	SI7(RD, RA, RE, RC, RB);	KL(RA, RC, RD, RB, RE, 15);
-	SI6(RA, RC, RD, RB, RE);	KL(RD, RE, RB, RA, RC, 14);
-	SI5(RD, RE, RB, RA, RC);	KL(RE, RC, RD, RB, RA, 13);
-	SI4(RE, RC, RD, RB, RA);	KL(RE, RD, RA, RB, RC, 12);
-	SI3(RE, RD, RA, RB, RC);	KL(RA, RE, RC, RB, RD, 11);
-	SI2(RA, RE, RC, RB, RD);	KL(RE, RD, RB, RC, RA, 10);
-	SI1(RE, RD, RB, RC, RA);	KL(RA, RD, RB, RC, RE, 9);
-	SI0(RA, RD, RB, RC, RE);	KL(RB, RE, RD, RA, RC, 8);
-	SI7(RB, RE, RD, RA, RC);	KL(RE, RA, RB, RC, RD, 7);
-	SI6(RE, RA, RB, RC, RD);	KL(RB, RD, RC, RE, RA, 6);
-	SI5(RB, RD, RC, RE, RA);	KL(RD, RA, RB, RC, RE, 5);
-	SI4(RD, RA, RB, RC, RE);	KL(RD, RB, RE, RC, RA, 4);
-	SI3(RD, RB, RE, RC, RA);	KL(RE, RD, RA, RC, RB, 3);
-	SI2(RE, RD, RA, RC, RB);	KL(RD, RB, RC, RA, RE, 2);
-	SI1(RD, RB, RC, RA, RE);	KL(RE, RB, RC, RA, RD, 1);
-	SI0(RE, RB, RC, RA, RD);	 K(RC, RD, RB, RE, RA, 0);
-
-	movl arg_dst(%esp), %eax;
-	write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
-
-	ret;
-ENDPROC(serpent_dec_blk_4way)
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
deleted file mode 100644
index 5e0b3a3e97af5dd6677c859936df476f1f4c6cc9..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+++ /dev/null
@@ -1,739 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Serpent Cipher 8-way parallel algorithm (x86_64/SSE2)
- *
- * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * Based on crypto/serpent.c by
- *  Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no>
- *                2003 Herbert Valerio Riedel <hvr@gnu.org>
- */
-
-#include <linux/linkage.h>
-
-.file "serpent-sse2-x86_64-asm_64.S"
-.text
-
-#define CTX %rdi
-
-/**********************************************************************
-  8-way SSE2 serpent
- **********************************************************************/
-#define RA1 %xmm0
-#define RB1 %xmm1
-#define RC1 %xmm2
-#define RD1 %xmm3
-#define RE1 %xmm4
-
-#define RA2 %xmm5
-#define RB2 %xmm6
-#define RC2 %xmm7
-#define RD2 %xmm8
-#define RE2 %xmm9
-
-#define RNOT %xmm10
-
-#define RK0 %xmm11
-#define RK1 %xmm12
-#define RK2 %xmm13
-#define RK3 %xmm14
-
-#define S0_1(x0, x1, x2, x3, x4) \
-	movdqa x3,		x4; \
-	por x0,			x3; \
-	pxor x4,		x0; \
-	pxor x2,		x4; \
-	pxor RNOT,		x4; \
-	pxor x1,		x3; \
-	pand x0,		x1; \
-	pxor x4,		x1; \
-	pxor x0,		x2;
-#define S0_2(x0, x1, x2, x3, x4) \
-	pxor x3,		x0; \
-	por x0,			x4; \
-	pxor x2,		x0; \
-	pand x1,		x2; \
-	pxor x2,		x3; \
-	pxor RNOT,		x1; \
-	pxor x4,		x2; \
-	pxor x2,		x1;
-
-#define S1_1(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	pxor x0,		x1; \
-	pxor x3,		x0; \
-	pxor RNOT,		x3; \
-	pand x1,		x4; \
-	por x1,			x0; \
-	pxor x2,		x3; \
-	pxor x3,		x0; \
-	pxor x3,		x1;
-#define S1_2(x0, x1, x2, x3, x4) \
-	pxor x4,		x3; \
-	por x4,			x1; \
-	pxor x2,		x4; \
-	pand x0,		x2; \
-	pxor x1,		x2; \
-	por x0,			x1; \
-	pxor RNOT,		x0; \
-	pxor x2,		x0; \
-	pxor x1,		x4;
-
-#define S2_1(x0, x1, x2, x3, x4) \
-	pxor RNOT,		x3; \
-	pxor x0,		x1; \
-	movdqa x0,		x4; \
-	pand x2,		x0; \
-	pxor x3,		x0; \
-	por x4,			x3; \
-	pxor x1,		x2; \
-	pxor x1,		x3; \
-	pand x0,		x1;
-#define S2_2(x0, x1, x2, x3, x4) \
-	pxor x2,		x0; \
-	pand x3,		x2; \
-	por x1,			x3; \
-	pxor RNOT,		x0; \
-	pxor x0,		x3; \
-	pxor x0,		x4; \
-	pxor x2,		x0; \
-	por x2,			x1;
-
-#define S3_1(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	pxor x3,		x1; \
-	por x0,			x3; \
-	pand x0,		x4; \
-	pxor x2,		x0; \
-	pxor x1,		x2; \
-	pand x3,		x1; \
-	pxor x3,		x2; \
-	por x4,			x0; \
-	pxor x3,		x4;
-#define S3_2(x0, x1, x2, x3, x4) \
-	pxor x0,		x1; \
-	pand x3,		x0; \
-	pand x4,		x3; \
-	pxor x2,		x3; \
-	por x1,			x4; \
-	pand x1,		x2; \
-	pxor x3,		x4; \
-	pxor x3,		x0; \
-	pxor x2,		x3;
-
-#define S4_1(x0, x1, x2, x3, x4) \
-	movdqa x3,		x4; \
-	pand x0,		x3; \
-	pxor x4,		x0; \
-	pxor x2,		x3; \
-	por x4,			x2; \
-	pxor x1,		x0; \
-	pxor x3,		x4; \
-	por x0,			x2; \
-	pxor x1,		x2;
-#define S4_2(x0, x1, x2, x3, x4) \
-	pand x0,		x1; \
-	pxor x4,		x1; \
-	pand x2,		x4; \
-	pxor x3,		x2; \
-	pxor x0,		x4; \
-	por x1,			x3; \
-	pxor RNOT,		x1; \
-	pxor x0,		x3;
-
-#define S5_1(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	por x0,			x1; \
-	pxor x1,		x2; \
-	pxor RNOT,		x3; \
-	pxor x0,		x4; \
-	pxor x2,		x0; \
-	pand x4,		x1; \
-	por x3,			x4; \
-	pxor x0,		x4;
-#define S5_2(x0, x1, x2, x3, x4) \
-	pand x3,		x0; \
-	pxor x3,		x1; \
-	pxor x2,		x3; \
-	pxor x1,		x0; \
-	pand x4,		x2; \
-	pxor x2,		x1; \
-	pand x0,		x2; \
-	pxor x2,		x3;
-
-#define S6_1(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	pxor x0,		x3; \
-	pxor x2,		x1; \
-	pxor x0,		x2; \
-	pand x3,		x0; \
-	por x3,			x1; \
-	pxor RNOT,		x4; \
-	pxor x1,		x0; \
-	pxor x2,		x1;
-#define S6_2(x0, x1, x2, x3, x4) \
-	pxor x4,		x3; \
-	pxor x0,		x4; \
-	pand x0,		x2; \
-	pxor x1,		x4; \
-	pxor x3,		x2; \
-	pand x1,		x3; \
-	pxor x0,		x3; \
-	pxor x2,		x1;
-
-#define S7_1(x0, x1, x2, x3, x4) \
-	pxor RNOT,		x1; \
-	movdqa x1,		x4; \
-	pxor RNOT,		x0; \
-	pand x2,		x1; \
-	pxor x3,		x1; \
-	por x4,			x3; \
-	pxor x2,		x4; \
-	pxor x3,		x2; \
-	pxor x0,		x3; \
-	por x1,			x0;
-#define S7_2(x0, x1, x2, x3, x4) \
-	pand x0,		x2; \
-	pxor x4,		x0; \
-	pxor x3,		x4; \
-	pand x0,		x3; \
-	pxor x1,		x4; \
-	pxor x4,		x2; \
-	pxor x1,		x3; \
-	por x0,			x4; \
-	pxor x1,		x4;
-
-#define SI0_1(x0, x1, x2, x3, x4) \
-	movdqa x3,		x4; \
-	pxor x0,		x1; \
-	por x1,			x3; \
-	pxor x1,		x4; \
-	pxor RNOT,		x0; \
-	pxor x3,		x2; \
-	pxor x0,		x3; \
-	pand x1,		x0; \
-	pxor x2,		x0;
-#define SI0_2(x0, x1, x2, x3, x4) \
-	pand x3,		x2; \
-	pxor x4,		x3; \
-	pxor x3,		x2; \
-	pxor x3,		x1; \
-	pand x0,		x3; \
-	pxor x0,		x1; \
-	pxor x2,		x0; \
-	pxor x3,		x4;
-
-#define SI1_1(x0, x1, x2, x3, x4) \
-	pxor x3,		x1; \
-	movdqa x0,		x4; \
-	pxor x2,		x0; \
-	pxor RNOT,		x2; \
-	por x1,			x4; \
-	pxor x3,		x4; \
-	pand x1,		x3; \
-	pxor x2,		x1; \
-	pand x4,		x2;
-#define SI1_2(x0, x1, x2, x3, x4) \
-	pxor x1,		x4; \
-	por x3,			x1; \
-	pxor x0,		x3; \
-	pxor x0,		x2; \
-	por x4,			x0; \
-	pxor x4,		x2; \
-	pxor x0,		x1; \
-	pxor x1,		x4;
-
-#define SI2_1(x0, x1, x2, x3, x4) \
-	pxor x1,		x2; \
-	movdqa x3,		x4; \
-	pxor RNOT,		x3; \
-	por x2,			x3; \
-	pxor x4,		x2; \
-	pxor x0,		x4; \
-	pxor x1,		x3; \
-	por x2,			x1; \
-	pxor x0,		x2;
-#define SI2_2(x0, x1, x2, x3, x4) \
-	pxor x4,		x1; \
-	por x3,			x4; \
-	pxor x3,		x2; \
-	pxor x2,		x4; \
-	pand x1,		x2; \
-	pxor x3,		x2; \
-	pxor x4,		x3; \
-	pxor x0,		x4;
-
-#define SI3_1(x0, x1, x2, x3, x4) \
-	pxor x1,		x2; \
-	movdqa x1,		x4; \
-	pand x2,		x1; \
-	pxor x0,		x1; \
-	por x4,			x0; \
-	pxor x3,		x4; \
-	pxor x3,		x0; \
-	por x1,			x3; \
-	pxor x2,		x1;
-#define SI3_2(x0, x1, x2, x3, x4) \
-	pxor x3,		x1; \
-	pxor x2,		x0; \
-	pxor x3,		x2; \
-	pand x1,		x3; \
-	pxor x0,		x1; \
-	pand x2,		x0; \
-	pxor x3,		x4; \
-	pxor x0,		x3; \
-	pxor x1,		x0;
-
-#define SI4_1(x0, x1, x2, x3, x4) \
-	pxor x3,		x2; \
-	movdqa x0,		x4; \
-	pand x1,		x0; \
-	pxor x2,		x0; \
-	por x3,			x2; \
-	pxor RNOT,		x4; \
-	pxor x0,		x1; \
-	pxor x2,		x0; \
-	pand x4,		x2;
-#define SI4_2(x0, x1, x2, x3, x4) \
-	pxor x0,		x2; \
-	por x4,			x0; \
-	pxor x3,		x0; \
-	pand x2,		x3; \
-	pxor x3,		x4; \
-	pxor x1,		x3; \
-	pand x0,		x1; \
-	pxor x1,		x4; \
-	pxor x3,		x0;
-
-#define SI5_1(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	por x2,			x1; \
-	pxor x4,		x2; \
-	pxor x3,		x1; \
-	pand x4,		x3; \
-	pxor x3,		x2; \
-	por x0,			x3; \
-	pxor RNOT,		x0; \
-	pxor x2,		x3; \
-	por x0,			x2;
-#define SI5_2(x0, x1, x2, x3, x4) \
-	pxor x1,		x4; \
-	pxor x4,		x2; \
-	pand x0,		x4; \
-	pxor x1,		x0; \
-	pxor x3,		x1; \
-	pand x2,		x0; \
-	pxor x3,		x2; \
-	pxor x2,		x0; \
-	pxor x4,		x2; \
-	pxor x3,		x4;
-
-#define SI6_1(x0, x1, x2, x3, x4) \
-	pxor x2,		x0; \
-	movdqa x0,		x4; \
-	pand x3,		x0; \
-	pxor x3,		x2; \
-	pxor x2,		x0; \
-	pxor x1,		x3; \
-	por x4,			x2; \
-	pxor x3,		x2; \
-	pand x0,		x3;
-#define SI6_2(x0, x1, x2, x3, x4) \
-	pxor RNOT,		x0; \
-	pxor x1,		x3; \
-	pand x2,		x1; \
-	pxor x0,		x4; \
-	pxor x4,		x3; \
-	pxor x2,		x4; \
-	pxor x1,		x0; \
-	pxor x0,		x2;
-
-#define SI7_1(x0, x1, x2, x3, x4) \
-	movdqa x3,		x4; \
-	pand x0,		x3; \
-	pxor x2,		x0; \
-	por x4,			x2; \
-	pxor x1,		x4; \
-	pxor RNOT,		x0; \
-	por x3,			x1; \
-	pxor x0,		x4; \
-	pand x2,		x0; \
-	pxor x1,		x0;
-#define SI7_2(x0, x1, x2, x3, x4) \
-	pand x2,		x1; \
-	pxor x2,		x3; \
-	pxor x3,		x4; \
-	pand x3,		x2; \
-	por x0,			x3; \
-	pxor x4,		x1; \
-	pxor x4,		x3; \
-	pand x0,		x4; \
-	pxor x2,		x4;
-
-#define get_key(i, j, t) \
-	movd (4*(i)+(j))*4(CTX), t; \
-	pshufd $0, t, t;
-
-#define K2(x0, x1, x2, x3, x4, i) \
-	get_key(i, 0, RK0); \
-	get_key(i, 1, RK1); \
-	get_key(i, 2, RK2); \
-	get_key(i, 3, RK3); \
-	pxor RK0,		x0 ## 1; \
-	pxor RK1,		x1 ## 1; \
-	pxor RK2,		x2 ## 1; \
-	pxor RK3,		x3 ## 1; \
-		pxor RK0,		x0 ## 2; \
-		pxor RK1,		x1 ## 2; \
-		pxor RK2,		x2 ## 2; \
-		pxor RK3,		x3 ## 2;
-
-#define LK2(x0, x1, x2, x3, x4, i) \
-	movdqa x0 ## 1,		x4 ## 1; \
-	pslld $13,		x0 ## 1; \
-	psrld $(32 - 13),	x4 ## 1; \
-	por x4 ## 1,		x0 ## 1; \
-	pxor x0 ## 1,		x1 ## 1; \
-	movdqa x2 ## 1,		x4 ## 1; \
-	pslld $3,		x2 ## 1; \
-	psrld $(32 - 3),	x4 ## 1; \
-	por x4 ## 1,		x2 ## 1; \
-	pxor x2 ## 1,		x1 ## 1; \
-		movdqa x0 ## 2,		x4 ## 2; \
-		pslld $13,		x0 ## 2; \
-		psrld $(32 - 13),	x4 ## 2; \
-		por x4 ## 2,		x0 ## 2; \
-		pxor x0 ## 2,		x1 ## 2; \
-		movdqa x2 ## 2,		x4 ## 2; \
-		pslld $3,		x2 ## 2; \
-		psrld $(32 - 3),	x4 ## 2; \
-		por x4 ## 2,		x2 ## 2; \
-		pxor x2 ## 2,		x1 ## 2; \
-	movdqa x1 ## 1,		x4 ## 1; \
-	pslld $1,		x1 ## 1; \
-	psrld $(32 - 1),	x4 ## 1; \
-	por x4 ## 1,		x1 ## 1; \
-	movdqa x0 ## 1,		x4 ## 1; \
-	pslld $3,		x4 ## 1; \
-	pxor x2 ## 1,		x3 ## 1; \
-	pxor x4 ## 1,		x3 ## 1; \
-	movdqa x3 ## 1,		x4 ## 1; \
-	get_key(i, 1, RK1); \
-		movdqa x1 ## 2,		x4 ## 2; \
-		pslld $1,		x1 ## 2; \
-		psrld $(32 - 1),	x4 ## 2; \
-		por x4 ## 2,		x1 ## 2; \
-		movdqa x0 ## 2,		x4 ## 2; \
-		pslld $3,		x4 ## 2; \
-		pxor x2 ## 2,		x3 ## 2; \
-		pxor x4 ## 2,		x3 ## 2; \
-		movdqa x3 ## 2,		x4 ## 2; \
-		get_key(i, 3, RK3); \
-	pslld $7,		x3 ## 1; \
-	psrld $(32 - 7),	x4 ## 1; \
-	por x4 ## 1,		x3 ## 1; \
-	movdqa x1 ## 1,		x4 ## 1; \
-	pslld $7,		x4 ## 1; \
-	pxor x1 ## 1,		x0 ## 1; \
-	pxor x3 ## 1,		x0 ## 1; \
-	pxor x3 ## 1,		x2 ## 1; \
-	pxor x4 ## 1,		x2 ## 1; \
-	get_key(i, 0, RK0); \
-		pslld $7,		x3 ## 2; \
-		psrld $(32 - 7),	x4 ## 2; \
-		por x4 ## 2,		x3 ## 2; \
-		movdqa x1 ## 2,		x4 ## 2; \
-		pslld $7,		x4 ## 2; \
-		pxor x1 ## 2,		x0 ## 2; \
-		pxor x3 ## 2,		x0 ## 2; \
-		pxor x3 ## 2,		x2 ## 2; \
-		pxor x4 ## 2,		x2 ## 2; \
-		get_key(i, 2, RK2); \
-	pxor RK1,		x1 ## 1; \
-	pxor RK3,		x3 ## 1; \
-	movdqa x0 ## 1,		x4 ## 1; \
-	pslld $5,		x0 ## 1; \
-	psrld $(32 - 5),	x4 ## 1; \
-	por x4 ## 1,		x0 ## 1; \
-	movdqa x2 ## 1,		x4 ## 1; \
-	pslld $22,		x2 ## 1; \
-	psrld $(32 - 22),	x4 ## 1; \
-	por x4 ## 1,		x2 ## 1; \
-	pxor RK0,		x0 ## 1; \
-	pxor RK2,		x2 ## 1; \
-		pxor RK1,		x1 ## 2; \
-		pxor RK3,		x3 ## 2; \
-		movdqa x0 ## 2,		x4 ## 2; \
-		pslld $5,		x0 ## 2; \
-		psrld $(32 - 5),	x4 ## 2; \
-		por x4 ## 2,		x0 ## 2; \
-		movdqa x2 ## 2,		x4 ## 2; \
-		pslld $22,		x2 ## 2; \
-		psrld $(32 - 22),	x4 ## 2; \
-		por x4 ## 2,		x2 ## 2; \
-		pxor RK0,		x0 ## 2; \
-		pxor RK2,		x2 ## 2;
-
-#define KL2(x0, x1, x2, x3, x4, i) \
-	pxor RK0,		x0 ## 1; \
-	pxor RK2,		x2 ## 1; \
-	movdqa x0 ## 1,		x4 ## 1; \
-	psrld $5,		x0 ## 1; \
-	pslld $(32 - 5),	x4 ## 1; \
-	por x4 ## 1,		x0 ## 1; \
-	pxor RK3,		x3 ## 1; \
-	pxor RK1,		x1 ## 1; \
-	movdqa x2 ## 1,		x4 ## 1; \
-	psrld $22,		x2 ## 1; \
-	pslld $(32 - 22),	x4 ## 1; \
-	por x4 ## 1,		x2 ## 1; \
-	pxor x3 ## 1,		x2 ## 1; \
-		pxor RK0,		x0 ## 2; \
-		pxor RK2,		x2 ## 2; \
-		movdqa x0 ## 2,		x4 ## 2; \
-		psrld $5,		x0 ## 2; \
-		pslld $(32 - 5),	x4 ## 2; \
-		por x4 ## 2,		x0 ## 2; \
-		pxor RK3,		x3 ## 2; \
-		pxor RK1,		x1 ## 2; \
-		movdqa x2 ## 2,		x4 ## 2; \
-		psrld $22,		x2 ## 2; \
-		pslld $(32 - 22),	x4 ## 2; \
-		por x4 ## 2,		x2 ## 2; \
-		pxor x3 ## 2,		x2 ## 2; \
-	pxor x3 ## 1,		x0 ## 1; \
-	movdqa x1 ## 1,		x4 ## 1; \
-	pslld $7,		x4 ## 1; \
-	pxor x1 ## 1,		x0 ## 1; \
-	pxor x4 ## 1,		x2 ## 1; \
-	movdqa x1 ## 1,		x4 ## 1; \
-	psrld $1,		x1 ## 1; \
-	pslld $(32 - 1),	x4 ## 1; \
-	por x4 ## 1,		x1 ## 1; \
-		pxor x3 ## 2,		x0 ## 2; \
-		movdqa x1 ## 2,		x4 ## 2; \
-		pslld $7,		x4 ## 2; \
-		pxor x1 ## 2,		x0 ## 2; \
-		pxor x4 ## 2,		x2 ## 2; \
-		movdqa x1 ## 2,		x4 ## 2; \
-		psrld $1,		x1 ## 2; \
-		pslld $(32 - 1),	x4 ## 2; \
-		por x4 ## 2,		x1 ## 2; \
-	movdqa x3 ## 1,		x4 ## 1; \
-	psrld $7,		x3 ## 1; \
-	pslld $(32 - 7),	x4 ## 1; \
-	por x4 ## 1,		x3 ## 1; \
-	pxor x0 ## 1,		x1 ## 1; \
-	movdqa x0 ## 1,		x4 ## 1; \
-	pslld $3,		x4 ## 1; \
-	pxor x4 ## 1,		x3 ## 1; \
-	movdqa x0 ## 1,		x4 ## 1; \
-		movdqa x3 ## 2,		x4 ## 2; \
-		psrld $7,		x3 ## 2; \
-		pslld $(32 - 7),	x4 ## 2; \
-		por x4 ## 2,		x3 ## 2; \
-		pxor x0 ## 2,		x1 ## 2; \
-		movdqa x0 ## 2,		x4 ## 2; \
-		pslld $3,		x4 ## 2; \
-		pxor x4 ## 2,		x3 ## 2; \
-		movdqa x0 ## 2,		x4 ## 2; \
-	psrld $13,		x0 ## 1; \
-	pslld $(32 - 13),	x4 ## 1; \
-	por x4 ## 1,		x0 ## 1; \
-	pxor x2 ## 1,		x1 ## 1; \
-	pxor x2 ## 1,		x3 ## 1; \
-	movdqa x2 ## 1,		x4 ## 1; \
-	psrld $3,		x2 ## 1; \
-	pslld $(32 - 3),	x4 ## 1; \
-	por x4 ## 1,		x2 ## 1; \
-		psrld $13,		x0 ## 2; \
-		pslld $(32 - 13),	x4 ## 2; \
-		por x4 ## 2,		x0 ## 2; \
-		pxor x2 ## 2,		x1 ## 2; \
-		pxor x2 ## 2,		x3 ## 2; \
-		movdqa x2 ## 2,		x4 ## 2; \
-		psrld $3,		x2 ## 2; \
-		pslld $(32 - 3),	x4 ## 2; \
-		por x4 ## 2,		x2 ## 2;
-
-#define S(SBOX, x0, x1, x2, x3, x4) \
-	SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
-	SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
-	SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
-	SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2);
-
-#define SP(SBOX, x0, x1, x2, x3, x4, i) \
-	get_key(i, 0, RK0); \
-	SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
-	get_key(i, 2, RK2); \
-	SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
-	get_key(i, 3, RK3); \
-	SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
-	get_key(i, 1, RK1); \
-	SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
-
-#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	movdqa x0,		t2; \
-	punpckldq x1,		x0; \
-	punpckhdq x1,		t2; \
-	movdqa x2,		t1; \
-	punpckhdq x3,		x2; \
-	punpckldq x3,		t1; \
-	movdqa x0,		x1; \
-	punpcklqdq t1,		x0; \
-	punpckhqdq t1,		x1; \
-	movdqa t2,		x3; \
-	punpcklqdq x2,		t2; \
-	punpckhqdq x2,		x3; \
-	movdqa t2,		x2;
-
-#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
-	movdqu (0*4*4)(in),	x0; \
-	movdqu (1*4*4)(in),	x1; \
-	movdqu (2*4*4)(in),	x2; \
-	movdqu (3*4*4)(in),	x3; \
-	\
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
-
-#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	\
-	movdqu x0,		(0*4*4)(out); \
-	movdqu x1,		(1*4*4)(out); \
-	movdqu x2,		(2*4*4)(out); \
-	movdqu x3,		(3*4*4)(out);
-
-#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	\
-	movdqu (0*4*4)(out),	t0; \
-	pxor t0,		x0; \
-	movdqu x0,		(0*4*4)(out); \
-	movdqu (1*4*4)(out),	t0; \
-	pxor t0,		x1; \
-	movdqu x1,		(1*4*4)(out); \
-	movdqu (2*4*4)(out),	t0; \
-	pxor t0,		x2; \
-	movdqu x2,		(2*4*4)(out); \
-	movdqu (3*4*4)(out),	t0; \
-	pxor t0,		x3; \
-	movdqu x3,		(3*4*4)(out);
-
-ENTRY(__serpent_enc_blk_8way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: bool, if true: xor output
-	 */
-
-	pcmpeqd RNOT, RNOT;
-
-	leaq (4*4*4)(%rdx), %rax;
-	read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
-	read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
-
-						 K2(RA, RB, RC, RD, RE, 0);
-	S(S0, RA, RB, RC, RD, RE);		LK2(RC, RB, RD, RA, RE, 1);
-	S(S1, RC, RB, RD, RA, RE);		LK2(RE, RD, RA, RC, RB, 2);
-	S(S2, RE, RD, RA, RC, RB);		LK2(RB, RD, RE, RC, RA, 3);
-	S(S3, RB, RD, RE, RC, RA);		LK2(RC, RA, RD, RB, RE, 4);
-	S(S4, RC, RA, RD, RB, RE);		LK2(RA, RD, RB, RE, RC, 5);
-	S(S5, RA, RD, RB, RE, RC);		LK2(RC, RA, RD, RE, RB, 6);
-	S(S6, RC, RA, RD, RE, RB);		LK2(RD, RB, RA, RE, RC, 7);
-	S(S7, RD, RB, RA, RE, RC);		LK2(RC, RA, RE, RD, RB, 8);
-	S(S0, RC, RA, RE, RD, RB);		LK2(RE, RA, RD, RC, RB, 9);
-	S(S1, RE, RA, RD, RC, RB);		LK2(RB, RD, RC, RE, RA, 10);
-	S(S2, RB, RD, RC, RE, RA);		LK2(RA, RD, RB, RE, RC, 11);
-	S(S3, RA, RD, RB, RE, RC);		LK2(RE, RC, RD, RA, RB, 12);
-	S(S4, RE, RC, RD, RA, RB);		LK2(RC, RD, RA, RB, RE, 13);
-	S(S5, RC, RD, RA, RB, RE);		LK2(RE, RC, RD, RB, RA, 14);
-	S(S6, RE, RC, RD, RB, RA);		LK2(RD, RA, RC, RB, RE, 15);
-	S(S7, RD, RA, RC, RB, RE);		LK2(RE, RC, RB, RD, RA, 16);
-	S(S0, RE, RC, RB, RD, RA);		LK2(RB, RC, RD, RE, RA, 17);
-	S(S1, RB, RC, RD, RE, RA);		LK2(RA, RD, RE, RB, RC, 18);
-	S(S2, RA, RD, RE, RB, RC);		LK2(RC, RD, RA, RB, RE, 19);
-	S(S3, RC, RD, RA, RB, RE);		LK2(RB, RE, RD, RC, RA, 20);
-	S(S4, RB, RE, RD, RC, RA);		LK2(RE, RD, RC, RA, RB, 21);
-	S(S5, RE, RD, RC, RA, RB);		LK2(RB, RE, RD, RA, RC, 22);
-	S(S6, RB, RE, RD, RA, RC);		LK2(RD, RC, RE, RA, RB, 23);
-	S(S7, RD, RC, RE, RA, RB);		LK2(RB, RE, RA, RD, RC, 24);
-	S(S0, RB, RE, RA, RD, RC);		LK2(RA, RE, RD, RB, RC, 25);
-	S(S1, RA, RE, RD, RB, RC);		LK2(RC, RD, RB, RA, RE, 26);
-	S(S2, RC, RD, RB, RA, RE);		LK2(RE, RD, RC, RA, RB, 27);
-	S(S3, RE, RD, RC, RA, RB);		LK2(RA, RB, RD, RE, RC, 28);
-	S(S4, RA, RB, RD, RE, RC);		LK2(RB, RD, RE, RC, RA, 29);
-	S(S5, RB, RD, RE, RC, RA);		LK2(RA, RB, RD, RC, RE, 30);
-	S(S6, RA, RB, RD, RC, RE);		LK2(RD, RE, RB, RC, RA, 31);
-	S(S7, RD, RE, RB, RC, RA);		 K2(RA, RB, RC, RD, RE, 32);
-
-	leaq (4*4*4)(%rsi), %rax;
-
-	testb %cl, %cl;
-	jnz .L__enc_xor8;
-
-	write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
-	write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
-
-	ret;
-
-.L__enc_xor8:
-	xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
-	xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
-
-	ret;
-ENDPROC(__serpent_enc_blk_8way)
-
-ENTRY(serpent_dec_blk_8way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-
-	pcmpeqd RNOT, RNOT;
-
-	leaq (4*4*4)(%rdx), %rax;
-	read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
-	read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
-
-						 K2(RA, RB, RC, RD, RE, 32);
-	SP(SI7, RA, RB, RC, RD, RE, 31);	KL2(RB, RD, RA, RE, RC, 31);
-	SP(SI6, RB, RD, RA, RE, RC, 30);	KL2(RA, RC, RE, RB, RD, 30);
-	SP(SI5, RA, RC, RE, RB, RD, 29);	KL2(RC, RD, RA, RE, RB, 29);
-	SP(SI4, RC, RD, RA, RE, RB, 28);	KL2(RC, RA, RB, RE, RD, 28);
-	SP(SI3, RC, RA, RB, RE, RD, 27);	KL2(RB, RC, RD, RE, RA, 27);
-	SP(SI2, RB, RC, RD, RE, RA, 26);	KL2(RC, RA, RE, RD, RB, 26);
-	SP(SI1, RC, RA, RE, RD, RB, 25);	KL2(RB, RA, RE, RD, RC, 25);
-	SP(SI0, RB, RA, RE, RD, RC, 24);	KL2(RE, RC, RA, RB, RD, 24);
-	SP(SI7, RE, RC, RA, RB, RD, 23);	KL2(RC, RB, RE, RD, RA, 23);
-	SP(SI6, RC, RB, RE, RD, RA, 22);	KL2(RE, RA, RD, RC, RB, 22);
-	SP(SI5, RE, RA, RD, RC, RB, 21);	KL2(RA, RB, RE, RD, RC, 21);
-	SP(SI4, RA, RB, RE, RD, RC, 20);	KL2(RA, RE, RC, RD, RB, 20);
-	SP(SI3, RA, RE, RC, RD, RB, 19);	KL2(RC, RA, RB, RD, RE, 19);
-	SP(SI2, RC, RA, RB, RD, RE, 18);	KL2(RA, RE, RD, RB, RC, 18);
-	SP(SI1, RA, RE, RD, RB, RC, 17);	KL2(RC, RE, RD, RB, RA, 17);
-	SP(SI0, RC, RE, RD, RB, RA, 16);	KL2(RD, RA, RE, RC, RB, 16);
-	SP(SI7, RD, RA, RE, RC, RB, 15);	KL2(RA, RC, RD, RB, RE, 15);
-	SP(SI6, RA, RC, RD, RB, RE, 14);	KL2(RD, RE, RB, RA, RC, 14);
-	SP(SI5, RD, RE, RB, RA, RC, 13);	KL2(RE, RC, RD, RB, RA, 13);
-	SP(SI4, RE, RC, RD, RB, RA, 12);	KL2(RE, RD, RA, RB, RC, 12);
-	SP(SI3, RE, RD, RA, RB, RC, 11);	KL2(RA, RE, RC, RB, RD, 11);
-	SP(SI2, RA, RE, RC, RB, RD, 10);	KL2(RE, RD, RB, RC, RA, 10);
-	SP(SI1, RE, RD, RB, RC, RA, 9);		KL2(RA, RD, RB, RC, RE, 9);
-	SP(SI0, RA, RD, RB, RC, RE, 8);		KL2(RB, RE, RD, RA, RC, 8);
-	SP(SI7, RB, RE, RD, RA, RC, 7);		KL2(RE, RA, RB, RC, RD, 7);
-	SP(SI6, RE, RA, RB, RC, RD, 6);		KL2(RB, RD, RC, RE, RA, 6);
-	SP(SI5, RB, RD, RC, RE, RA, 5);		KL2(RD, RA, RB, RC, RE, 5);
-	SP(SI4, RD, RA, RB, RC, RE, 4);		KL2(RD, RB, RE, RC, RA, 4);
-	SP(SI3, RD, RB, RE, RC, RA, 3);		KL2(RE, RD, RA, RC, RB, 3);
-	SP(SI2, RE, RD, RA, RC, RB, 2);		KL2(RD, RB, RC, RA, RE, 2);
-	SP(SI1, RD, RB, RC, RA, RE, 1);		KL2(RE, RB, RC, RA, RD, 1);
-	S(SI0, RE, RB, RC, RA, RD);		 K2(RC, RD, RB, RE, RA, 0);
-
-	leaq (4*4*4)(%rsi), %rax;
-	write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2);
-	write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
-
-	ret;
-ENDPROC(serpent_dec_blk_8way)
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
deleted file mode 100644
index 9f712a7dfd797cc499e1ef52ba7999078530494a..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ /dev/null
@@ -1,711 +0,0 @@
-/*
- *	Implement fast SHA-1 with AVX2 instructions. (x86_64)
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Ilya Albrekht <ilya.albrekht@intel.com>
- * Maxim Locktyukhin <maxim.locktyukhin@intel.com>
- * Ronen Zohar <ronen.zohar@intel.com>
- * Chandramouli Narayanan <mouli@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-/*
- * SHA-1 implementation with Intel(R) AVX2 instruction set extensions.
- *
- *This implementation is based on the previous SSSE3 release:
- *Visit http://software.intel.com/en-us/articles/
- *and refer to improving-the-performance-of-the-secure-hash-algorithm-1/
- *
- *Updates 20-byte SHA-1 record in 'hash' for even number of
- *'num_blocks' consecutive 64-byte blocks
- *
- *extern "C" void sha1_transform_avx2(
- *	int *hash, const char* input, size_t num_blocks );
- */
-
-#include <linux/linkage.h>
-
-#define	CTX	%rdi	/* arg1 */
-#define BUF	%rsi	/* arg2 */
-#define CNT	%rdx	/* arg3 */
-
-#define	REG_A	%ecx
-#define	REG_B	%esi
-#define	REG_C	%edi
-#define	REG_D	%eax
-#define	REG_E	%edx
-#define	REG_TB	%ebx
-#define	REG_TA	%r12d
-#define	REG_RA	%rcx
-#define	REG_RB	%rsi
-#define	REG_RC	%rdi
-#define	REG_RD	%rax
-#define	REG_RE	%rdx
-#define	REG_RTA	%r12
-#define	REG_RTB	%rbx
-#define	REG_T1	%r11d
-#define	xmm_mov	vmovups
-#define	avx2_zeroupper	vzeroupper
-#define	RND_F1	1
-#define	RND_F2	2
-#define	RND_F3	3
-
-.macro REGALLOC
-	.set A, REG_A
-	.set B, REG_B
-	.set C, REG_C
-	.set D, REG_D
-	.set E, REG_E
-	.set TB, REG_TB
-	.set TA, REG_TA
-
-	.set RA, REG_RA
-	.set RB, REG_RB
-	.set RC, REG_RC
-	.set RD, REG_RD
-	.set RE, REG_RE
-
-	.set RTA, REG_RTA
-	.set RTB, REG_RTB
-
-	.set T1, REG_T1
-.endm
-
-#define HASH_PTR	%r9
-#define BLOCKS_CTR	%r8
-#define BUFFER_PTR	%r10
-#define BUFFER_PTR2	%r13
-
-#define PRECALC_BUF	%r14
-#define WK_BUF		%r15
-
-#define W_TMP		%xmm0
-#define WY_TMP		%ymm0
-#define WY_TMP2		%ymm9
-
-# AVX2 variables
-#define WY0		%ymm3
-#define WY4		%ymm5
-#define WY08		%ymm7
-#define WY12		%ymm8
-#define WY16		%ymm12
-#define WY20		%ymm13
-#define WY24		%ymm14
-#define WY28		%ymm15
-
-#define YMM_SHUFB_BSWAP	%ymm10
-
-/*
- * Keep 2 iterations precalculated at a time:
- *    - 80 DWORDs per iteration * 2
- */
-#define W_SIZE		(80*2*2 +16)
-
-#define WK(t)	((((t) % 80) / 4)*32 + ( (t) % 4)*4 + ((t)/80)*16 )(WK_BUF)
-#define PRECALC_WK(t)	((t)*2*2)(PRECALC_BUF)
-
-
-.macro UPDATE_HASH  hash, val
-	add	\hash, \val
-	mov	\val, \hash
-.endm
-
-.macro PRECALC_RESET_WY
-	.set WY_00, WY0
-	.set WY_04, WY4
-	.set WY_08, WY08
-	.set WY_12, WY12
-	.set WY_16, WY16
-	.set WY_20, WY20
-	.set WY_24, WY24
-	.set WY_28, WY28
-	.set WY_32, WY_00
-.endm
-
-.macro PRECALC_ROTATE_WY
-	/* Rotate macros */
-	.set WY_32, WY_28
-	.set WY_28, WY_24
-	.set WY_24, WY_20
-	.set WY_20, WY_16
-	.set WY_16, WY_12
-	.set WY_12, WY_08
-	.set WY_08, WY_04
-	.set WY_04, WY_00
-	.set WY_00, WY_32
-
-	/* Define register aliases */
-	.set WY, WY_00
-	.set WY_minus_04, WY_04
-	.set WY_minus_08, WY_08
-	.set WY_minus_12, WY_12
-	.set WY_minus_16, WY_16
-	.set WY_minus_20, WY_20
-	.set WY_minus_24, WY_24
-	.set WY_minus_28, WY_28
-	.set WY_minus_32, WY
-.endm
-
-.macro PRECALC_00_15
-	.if (i == 0) # Initialize and rotate registers
-		PRECALC_RESET_WY
-		PRECALC_ROTATE_WY
-	.endif
-
-	/* message scheduling pre-compute for rounds 0-15 */
-	.if   ((i & 7) == 0)
-		/*
-		 * blended AVX2 and ALU instruction scheduling
-		 * 1 vector iteration per 8 rounds
-		 */
-		vmovdqu (i * 2)(BUFFER_PTR), W_TMP
-	.elseif ((i & 7) == 1)
-		vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\
-			 WY_TMP, WY_TMP
-	.elseif ((i & 7) == 2)
-		vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY
-	.elseif ((i & 7) == 4)
-		vpaddd  K_XMM + K_XMM_AR(%rip), WY, WY_TMP
-	.elseif ((i & 7) == 7)
-		vmovdqu  WY_TMP, PRECALC_WK(i&~7)
-
-		PRECALC_ROTATE_WY
-	.endif
-.endm
-
-.macro PRECALC_16_31
-	/*
-	 * message scheduling pre-compute for rounds 16-31
-	 * calculating last 32 w[i] values in 8 XMM registers
-	 * pre-calculate K+w[i] values and store to mem
-	 * for later load by ALU add instruction
-	 *
-	 * "brute force" vectorization for rounds 16-31 only
-	 * due to w[i]->w[i-3] dependency
-	 */
-	.if   ((i & 7) == 0)
-		/*
-		 * blended AVX2 and ALU instruction scheduling
-		 * 1 vector iteration per 8 rounds
-		 */
-		/* w[i-14] */
-		vpalignr	$8, WY_minus_16, WY_minus_12, WY
-		vpsrldq	$4, WY_minus_04, WY_TMP               /* w[i-3] */
-	.elseif ((i & 7) == 1)
-		vpxor	WY_minus_08, WY, WY
-		vpxor	WY_minus_16, WY_TMP, WY_TMP
-	.elseif ((i & 7) == 2)
-		vpxor	WY_TMP, WY, WY
-		vpslldq	$12, WY, WY_TMP2
-	.elseif ((i & 7) == 3)
-		vpslld	$1, WY, WY_TMP
-		vpsrld	$31, WY, WY
-	.elseif ((i & 7) == 4)
-		vpor	WY, WY_TMP, WY_TMP
-		vpslld	$2, WY_TMP2, WY
-	.elseif ((i & 7) == 5)
-		vpsrld	$30, WY_TMP2, WY_TMP2
-		vpxor	WY, WY_TMP, WY_TMP
-	.elseif ((i & 7) == 7)
-		vpxor	WY_TMP2, WY_TMP, WY
-		vpaddd  K_XMM + K_XMM_AR(%rip), WY, WY_TMP
-		vmovdqu	WY_TMP, PRECALC_WK(i&~7)
-
-		PRECALC_ROTATE_WY
-	.endif
-.endm
-
-.macro PRECALC_32_79
-	/*
-	 * in SHA-1 specification:
-	 * w[i] = (w[i-3] ^ w[i-8]  ^ w[i-14] ^ w[i-16]) rol 1
-	 * instead we do equal:
-	 * w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2
-	 * allows more efficient vectorization
-	 * since w[i]=>w[i-3] dependency is broken
-	 */
-
-	.if   ((i & 7) == 0)
-	/*
-	 * blended AVX2 and ALU instruction scheduling
-	 * 1 vector iteration per 8 rounds
-	 */
-		vpalignr	$8, WY_minus_08, WY_minus_04, WY_TMP
-	.elseif ((i & 7) == 1)
-		/* W is W_minus_32 before xor */
-		vpxor	WY_minus_28, WY, WY
-	.elseif ((i & 7) == 2)
-		vpxor	WY_minus_16, WY_TMP, WY_TMP
-	.elseif ((i & 7) == 3)
-		vpxor	WY_TMP, WY, WY
-	.elseif ((i & 7) == 4)
-		vpslld	$2, WY, WY_TMP
-	.elseif ((i & 7) == 5)
-		vpsrld	$30, WY, WY
-		vpor	WY, WY_TMP, WY
-	.elseif ((i & 7) == 7)
-		vpaddd  K_XMM + K_XMM_AR(%rip), WY, WY_TMP
-		vmovdqu	WY_TMP, PRECALC_WK(i&~7)
-
-		PRECALC_ROTATE_WY
-	.endif
-.endm
-
-.macro PRECALC r, s
-	.set i, \r
-
-	.if (i < 40)
-		.set K_XMM, 32*0
-	.elseif (i < 80)
-		.set K_XMM, 32*1
-	.elseif (i < 120)
-		.set K_XMM, 32*2
-	.else
-		.set K_XMM, 32*3
-	.endif
-
-	.if (i<32)
-		PRECALC_00_15	\s
-	.elseif (i<64)
-		PRECALC_16_31	\s
-	.elseif (i < 160)
-		PRECALC_32_79	\s
-	.endif
-.endm
-
-.macro ROTATE_STATE
-	.set T_REG, E
-	.set E, D
-	.set D, C
-	.set C, B
-	.set B, TB
-	.set TB, A
-	.set A, T_REG
-
-	.set T_REG, RE
-	.set RE, RD
-	.set RD, RC
-	.set RC, RB
-	.set RB, RTB
-	.set RTB, RA
-	.set RA, T_REG
-.endm
-
-/* Macro relies on saved ROUND_Fx */
-
-.macro RND_FUN f, r
-	.if (\f == RND_F1)
-		ROUND_F1	\r
-	.elseif (\f == RND_F2)
-		ROUND_F2	\r
-	.elseif (\f == RND_F3)
-		ROUND_F3	\r
-	.endif
-.endm
-
-.macro RR r
-	.set round_id, (\r % 80)
-
-	.if (round_id == 0)        /* Precalculate F for first round */
-		.set ROUND_FUNC, RND_F1
-		mov	B, TB
-
-		rorx	$(32-30), B, B    /* b>>>2 */
-		andn	D, TB, T1
-		and	C, TB
-		xor	T1, TB
-	.endif
-
-	RND_FUN ROUND_FUNC, \r
-	ROTATE_STATE
-
-	.if   (round_id == 18)
-		.set ROUND_FUNC, RND_F2
-	.elseif (round_id == 38)
-		.set ROUND_FUNC, RND_F3
-	.elseif (round_id == 58)
-		.set ROUND_FUNC, RND_F2
-	.endif
-
-	.set round_id, ( (\r+1) % 80)
-
-	RND_FUN ROUND_FUNC, (\r+1)
-	ROTATE_STATE
-.endm
-
-.macro ROUND_F1 r
-	add	WK(\r), E
-
-	andn	C, A, T1			/* ~b&d */
-	lea	(RE,RTB), E		/* Add F from the previous round */
-
-	rorx	$(32-5), A, TA		/* T2 = A >>> 5 */
-	rorx	$(32-30),A, TB		/* b>>>2 for next round */
-
-	PRECALC	(\r)			/* msg scheduling for next 2 blocks */
-
-	/*
-	 * Calculate F for the next round
-	 * (b & c) ^ andn[b, d]
-	 */
-	and	B, A			/* b&c */
-	xor	T1, A			/* F1 = (b&c) ^ (~b&d) */
-
-	lea	(RE,RTA), E		/* E += A >>> 5 */
-.endm
-
-.macro ROUND_F2 r
-	add	WK(\r), E
-	lea	(RE,RTB), E		/* Add F from the previous round */
-
-	/* Calculate F for the next round */
-	rorx	$(32-5), A, TA		/* T2 = A >>> 5 */
-	.if ((round_id) < 79)
-		rorx	$(32-30), A, TB	/* b>>>2 for next round */
-	.endif
-	PRECALC	(\r)			/* msg scheduling for next 2 blocks */
-
-	.if ((round_id) < 79)
-		xor	B, A
-	.endif
-
-	add	TA, E			/* E += A >>> 5 */
-
-	.if ((round_id) < 79)
-		xor	C, A
-	.endif
-.endm
-
-.macro ROUND_F3 r
-	add	WK(\r), E
-	PRECALC	(\r)			/* msg scheduling for next 2 blocks */
-
-	lea	(RE,RTB), E		/* Add F from the previous round */
-
-	mov	B, T1
-	or	A, T1
-
-	rorx	$(32-5), A, TA		/* T2 = A >>> 5 */
-	rorx	$(32-30), A, TB		/* b>>>2 for next round */
-
-	/* Calculate F for the next round
-	 * (b and c) or (d and (b or c))
-	 */
-	and	C, T1
-	and	B, A
-	or	T1, A
-
-	add	TA, E			/* E += A >>> 5 */
-
-.endm
-
-/* Add constant only if (%2 > %3) condition met (uses RTA as temp)
- * %1 + %2 >= %3 ? %4 : 0
- */
-.macro ADD_IF_GE a, b, c, d
-	mov     \a, RTA
-	add     $\d, RTA
-	cmp     $\c, \b
-	cmovge  RTA, \a
-.endm
-
-/*
- * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
- */
-.macro SHA1_PIPELINED_MAIN_BODY
-
-	REGALLOC
-
-	mov	(HASH_PTR), A
-	mov	4(HASH_PTR), B
-	mov	8(HASH_PTR), C
-	mov	12(HASH_PTR), D
-	mov	16(HASH_PTR), E
-
-	mov	%rsp, PRECALC_BUF
-	lea	(2*4*80+32)(%rsp), WK_BUF
-
-	# Precalc WK for first 2 blocks
-	ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 2, 64
-	.set i, 0
-	.rept    160
-		PRECALC i
-		.set i, i + 1
-	.endr
-
-	/* Go to next block if needed */
-	ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 3, 128
-	ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
-	xchg	WK_BUF, PRECALC_BUF
-
-	.align 32
-_loop:
-	/*
-	 * code loops through more than one block
-	 * we use K_BASE value as a signal of a last block,
-	 * it is set below by: cmovae BUFFER_PTR, K_BASE
-	 */
-	test BLOCKS_CTR, BLOCKS_CTR
-	jnz _begin
-	.align 32
-	jmp	_end
-	.align 32
-_begin:
-
-	/*
-	 * Do first block
-	 * rounds: 0,2,4,6,8
-	 */
-	.set j, 0
-	.rept 5
-		RR	j
-		.set j, j+2
-	.endr
-
-	jmp _loop0
-_loop0:
-
-	/*
-	 * rounds:
-	 * 10,12,14,16,18
-	 * 20,22,24,26,28
-	 * 30,32,34,36,38
-	 * 40,42,44,46,48
-	 * 50,52,54,56,58
-	 */
-	.rept 25
-		RR	j
-		.set j, j+2
-	.endr
-
-	/* Update Counter */
-	sub $1, BLOCKS_CTR
-	/* Move to the next block only if needed*/
-	ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 4, 128
-	/*
-	 * rounds
-	 * 60,62,64,66,68
-	 * 70,72,74,76,78
-	 */
-	.rept 10
-		RR	j
-		.set j, j+2
-	.endr
-
-	UPDATE_HASH	(HASH_PTR), A
-	UPDATE_HASH	4(HASH_PTR), TB
-	UPDATE_HASH	8(HASH_PTR), C
-	UPDATE_HASH	12(HASH_PTR), D
-	UPDATE_HASH	16(HASH_PTR), E
-
-	test	BLOCKS_CTR, BLOCKS_CTR
-	jz	_loop
-
-	mov	TB, B
-
-	/* Process second block */
-	/*
-	 * rounds
-	 *  0+80, 2+80, 4+80, 6+80, 8+80
-	 * 10+80,12+80,14+80,16+80,18+80
-	 */
-
-	.set j, 0
-	.rept 10
-		RR	j+80
-		.set j, j+2
-	.endr
-
-	jmp	_loop1
-_loop1:
-	/*
-	 * rounds
-	 * 20+80,22+80,24+80,26+80,28+80
-	 * 30+80,32+80,34+80,36+80,38+80
-	 */
-	.rept 10
-		RR	j+80
-		.set j, j+2
-	.endr
-
-	jmp	_loop2
-_loop2:
-
-	/*
-	 * rounds
-	 * 40+80,42+80,44+80,46+80,48+80
-	 * 50+80,52+80,54+80,56+80,58+80
-	 */
-	.rept 10
-		RR	j+80
-		.set j, j+2
-	.endr
-
-	/* update counter */
-	sub     $1, BLOCKS_CTR
-	/* Move to the next block only if needed*/
-	ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
-
-	jmp	_loop3
-_loop3:
-
-	/*
-	 * rounds
-	 * 60+80,62+80,64+80,66+80,68+80
-	 * 70+80,72+80,74+80,76+80,78+80
-	 */
-	.rept 10
-		RR	j+80
-		.set j, j+2
-	.endr
-
-	UPDATE_HASH	(HASH_PTR), A
-	UPDATE_HASH	4(HASH_PTR), TB
-	UPDATE_HASH	8(HASH_PTR), C
-	UPDATE_HASH	12(HASH_PTR), D
-	UPDATE_HASH	16(HASH_PTR), E
-
-	/* Reset state for AVX2 reg permutation */
-	mov	A, TA
-	mov	TB, A
-	mov	C, TB
-	mov	E, C
-	mov	D, B
-	mov	TA, D
-
-	REGALLOC
-
-	xchg	WK_BUF, PRECALC_BUF
-
-	jmp	_loop
-
-	.align 32
-	_end:
-
-.endm
-/*
- * macro implements SHA-1 function's body for several 64-byte blocks
- * param: function's name
- */
-.macro SHA1_VECTOR_ASM  name
-	ENTRY(\name)
-
-	push	%rbx
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-
-	RESERVE_STACK  = (W_SIZE*4 + 8+24)
-
-	/* Align stack */
-	mov	%rsp, %rbx
-	and	$~(0x20-1), %rsp
-	push	%rbx
-	sub	$RESERVE_STACK, %rsp
-
-	avx2_zeroupper
-
-	/* Setup initial values */
-	mov	CTX, HASH_PTR
-	mov	BUF, BUFFER_PTR
-
-	mov	BUF, BUFFER_PTR2
-	mov	CNT, BLOCKS_CTR
-
-	xmm_mov	BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP
-
-	SHA1_PIPELINED_MAIN_BODY
-
-	avx2_zeroupper
-
-	add	$RESERVE_STACK, %rsp
-	pop	%rsp
-
-	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbx
-
-	ret
-
-	ENDPROC(\name)
-.endm
-
-.section .rodata
-
-#define K1 0x5a827999
-#define K2 0x6ed9eba1
-#define K3 0x8f1bbcdc
-#define K4 0xca62c1d6
-
-.align 128
-K_XMM_AR:
-	.long K1, K1, K1, K1
-	.long K1, K1, K1, K1
-	.long K2, K2, K2, K2
-	.long K2, K2, K2, K2
-	.long K3, K3, K3, K3
-	.long K3, K3, K3, K3
-	.long K4, K4, K4, K4
-	.long K4, K4, K4, K4
-
-BSWAP_SHUFB_CTL:
-	.long 0x00010203
-	.long 0x04050607
-	.long 0x08090a0b
-	.long 0x0c0d0e0f
-	.long 0x00010203
-	.long 0x04050607
-	.long 0x08090a0b
-	.long 0x0c0d0e0f
-.text
-
-SHA1_VECTOR_ASM     sha1_transform_avx2
diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S
deleted file mode 100644
index ebbdba72ae07a892494aa90a247edab709bd9118..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/sha1_ni_asm.S
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Intel SHA Extensions optimized implementation of a SHA-1 update function
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * 	Sean Gulley <sean.m.gulley@intel.com>
- * 	Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 	* Redistributions of source code must retain the above copyright
- * 	  notice, this list of conditions and the following disclaimer.
- * 	* Redistributions in binary form must reproduce the above copyright
- * 	  notice, this list of conditions and the following disclaimer in
- * 	  the documentation and/or other materials provided with the
- * 	  distribution.
- * 	* Neither the name of Intel Corporation nor the names of its
- * 	  contributors may be used to endorse or promote products derived
- * 	  from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/linkage.h>
-
-#define DIGEST_PTR	%rdi	/* 1st arg */
-#define DATA_PTR	%rsi	/* 2nd arg */
-#define NUM_BLKS	%rdx	/* 3rd arg */
-
-#define RSPSAVE		%rax
-
-/* gcc conversion */
-#define FRAME_SIZE	32	/* space for 2x16 bytes */
-
-#define ABCD		%xmm0
-#define E0		%xmm1	/* Need two E's b/c they ping pong */
-#define E1		%xmm2
-#define MSG0		%xmm3
-#define MSG1		%xmm4
-#define MSG2		%xmm5
-#define MSG3		%xmm6
-#define SHUF_MASK	%xmm7
-
-
-/*
- * Intel SHA Extensions optimized implementation of a SHA-1 update function
- *
- * The function takes a pointer to the current hash values, a pointer to the
- * input data, and a number of 64 byte blocks to process.  Once all blocks have
- * been processed, the digest pointer is  updated with the resulting hash value.
- * The function only processes complete blocks, there is no functionality to
- * store partial blocks. All message padding and hash value initialization must
- * be done outside the update function.
- *
- * The indented lines in the loop are instructions related to rounds processing.
- * The non-indented lines are instructions related to the message schedule.
- *
- * void sha1_ni_transform(uint32_t *digest, const void *data,
-		uint32_t numBlocks)
- * digest : pointer to digest
- * data: pointer to input data
- * numBlocks: Number of blocks to process
- */
-.text
-.align 32
-ENTRY(sha1_ni_transform)
-	mov		%rsp, RSPSAVE
-	sub		$FRAME_SIZE, %rsp
-	and		$~0xF, %rsp
-
-	shl		$6, NUM_BLKS		/* convert to bytes */
-	jz		.Ldone_hash
-	add		DATA_PTR, NUM_BLKS	/* pointer to end of data */
-
-	/* load initial hash values */
-	pinsrd		$3, 1*16(DIGEST_PTR), E0
-	movdqu		0*16(DIGEST_PTR), ABCD
-	pand		UPPER_WORD_MASK(%rip), E0
-	pshufd		$0x1B, ABCD, ABCD
-
-	movdqa		PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
-
-.Lloop0:
-	/* Save hash values for addition after rounds */
-	movdqa		E0, (0*16)(%rsp)
-	movdqa		ABCD, (1*16)(%rsp)
-
-	/* Rounds 0-3 */
-	movdqu		0*16(DATA_PTR), MSG0
-	pshufb		SHUF_MASK, MSG0
-		paddd		MSG0, E0
-		movdqa		ABCD, E1
-		sha1rnds4	$0, E0, ABCD
-
-	/* Rounds 4-7 */
-	movdqu		1*16(DATA_PTR), MSG1
-	pshufb		SHUF_MASK, MSG1
-		sha1nexte	MSG1, E1
-		movdqa		ABCD, E0
-		sha1rnds4	$0, E1, ABCD
-	sha1msg1	MSG1, MSG0
-
-	/* Rounds 8-11 */
-	movdqu		2*16(DATA_PTR), MSG2
-	pshufb		SHUF_MASK, MSG2
-		sha1nexte	MSG2, E0
-		movdqa		ABCD, E1
-		sha1rnds4	$0, E0, ABCD
-	sha1msg1	MSG2, MSG1
-	pxor		MSG2, MSG0
-
-	/* Rounds 12-15 */
-	movdqu		3*16(DATA_PTR), MSG3
-	pshufb		SHUF_MASK, MSG3
-		sha1nexte	MSG3, E1
-		movdqa		ABCD, E0
-	sha1msg2	MSG3, MSG0
-		sha1rnds4	$0, E1, ABCD
-	sha1msg1	MSG3, MSG2
-	pxor		MSG3, MSG1
-
-	/* Rounds 16-19 */
-		sha1nexte	MSG0, E0
-		movdqa		ABCD, E1
-	sha1msg2	MSG0, MSG1
-		sha1rnds4	$0, E0, ABCD
-	sha1msg1	MSG0, MSG3
-	pxor		MSG0, MSG2
-
-	/* Rounds 20-23 */
-		sha1nexte	MSG1, E1
-		movdqa		ABCD, E0
-	sha1msg2	MSG1, MSG2
-		sha1rnds4	$1, E1, ABCD
-	sha1msg1	MSG1, MSG0
-	pxor		MSG1, MSG3
-
-	/* Rounds 24-27 */
-		sha1nexte	MSG2, E0
-		movdqa		ABCD, E1
-	sha1msg2	MSG2, MSG3
-		sha1rnds4	$1, E0, ABCD
-	sha1msg1	MSG2, MSG1
-	pxor		MSG2, MSG0
-
-	/* Rounds 28-31 */
-		sha1nexte	MSG3, E1
-		movdqa		ABCD, E0
-	sha1msg2	MSG3, MSG0
-		sha1rnds4	$1, E1, ABCD
-	sha1msg1	MSG3, MSG2
-	pxor		MSG3, MSG1
-
-	/* Rounds 32-35 */
-		sha1nexte	MSG0, E0
-		movdqa		ABCD, E1
-	sha1msg2	MSG0, MSG1
-		sha1rnds4	$1, E0, ABCD
-	sha1msg1	MSG0, MSG3
-	pxor		MSG0, MSG2
-
-	/* Rounds 36-39 */
-		sha1nexte	MSG1, E1
-		movdqa		ABCD, E0
-	sha1msg2	MSG1, MSG2
-		sha1rnds4	$1, E1, ABCD
-	sha1msg1	MSG1, MSG0
-	pxor		MSG1, MSG3
-
-	/* Rounds 40-43 */
-		sha1nexte	MSG2, E0
-		movdqa		ABCD, E1
-	sha1msg2	MSG2, MSG3
-		sha1rnds4	$2, E0, ABCD
-	sha1msg1	MSG2, MSG1
-	pxor		MSG2, MSG0
-
-	/* Rounds 44-47 */
-		sha1nexte	MSG3, E1
-		movdqa		ABCD, E0
-	sha1msg2	MSG3, MSG0
-		sha1rnds4	$2, E1, ABCD
-	sha1msg1	MSG3, MSG2
-	pxor		MSG3, MSG1
-
-	/* Rounds 48-51 */
-		sha1nexte	MSG0, E0
-		movdqa		ABCD, E1
-	sha1msg2	MSG0, MSG1
-		sha1rnds4	$2, E0, ABCD
-	sha1msg1	MSG0, MSG3
-	pxor		MSG0, MSG2
-
-	/* Rounds 52-55 */
-		sha1nexte	MSG1, E1
-		movdqa		ABCD, E0
-	sha1msg2	MSG1, MSG2
-		sha1rnds4	$2, E1, ABCD
-	sha1msg1	MSG1, MSG0
-	pxor		MSG1, MSG3
-
-	/* Rounds 56-59 */
-		sha1nexte	MSG2, E0
-		movdqa		ABCD, E1
-	sha1msg2	MSG2, MSG3
-		sha1rnds4	$2, E0, ABCD
-	sha1msg1	MSG2, MSG1
-	pxor		MSG2, MSG0
-
-	/* Rounds 60-63 */
-		sha1nexte	MSG3, E1
-		movdqa		ABCD, E0
-	sha1msg2	MSG3, MSG0
-		sha1rnds4	$3, E1, ABCD
-	sha1msg1	MSG3, MSG2
-	pxor		MSG3, MSG1
-
-	/* Rounds 64-67 */
-		sha1nexte	MSG0, E0
-		movdqa		ABCD, E1
-	sha1msg2	MSG0, MSG1
-		sha1rnds4	$3, E0, ABCD
-	sha1msg1	MSG0, MSG3
-	pxor		MSG0, MSG2
-
-	/* Rounds 68-71 */
-		sha1nexte	MSG1, E1
-		movdqa		ABCD, E0
-	sha1msg2	MSG1, MSG2
-		sha1rnds4	$3, E1, ABCD
-	pxor		MSG1, MSG3
-
-	/* Rounds 72-75 */
-		sha1nexte	MSG2, E0
-		movdqa		ABCD, E1
-	sha1msg2	MSG2, MSG3
-		sha1rnds4	$3, E0, ABCD
-
-	/* Rounds 76-79 */
-		sha1nexte	MSG3, E1
-		movdqa		ABCD, E0
-		sha1rnds4	$3, E1, ABCD
-
-	/* Add current hash values with previously saved */
-	sha1nexte	(0*16)(%rsp), E0
-	paddd		(1*16)(%rsp), ABCD
-
-	/* Increment data pointer and loop if more to process */
-	add		$64, DATA_PTR
-	cmp		NUM_BLKS, DATA_PTR
-	jne		.Lloop0
-
-	/* Write hash values back in the correct order */
-	pshufd		$0x1B, ABCD, ABCD
-	movdqu		ABCD, 0*16(DIGEST_PTR)
-	pextrd		$3, E0, 1*16(DIGEST_PTR)
-
-.Ldone_hash:
-	mov		RSPSAVE, %rsp
-
-	ret
-ENDPROC(sha1_ni_transform)
-
-.section	.rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
-.align 16
-PSHUFFLE_BYTE_FLIP_MASK:
-	.octa 0x000102030405060708090a0b0c0d0e0f
-
-.section	.rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16
-.align 16
-UPPER_WORD_MASK:
-	.octa 0xFFFFFFFF000000000000000000000000
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
deleted file mode 100644
index 99c5b8c4dc3828a926ebf44de1971277731c6489..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ /dev/null
@@ -1,553 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This is a SIMD SHA-1 implementation. It requires the Intel(R) Supplemental
- * SSE3 instruction set extensions introduced in Intel Core Microarchitecture
- * processors. CPUs supporting Intel(R) AVX extensions will get an additional
- * boost.
- *
- * This work was inspired by the vectorized implementation of Dean Gaudet.
- * Additional information on it can be found at:
- *    http://www.arctic.org/~dean/crypto/sha1.html
- *
- * It was improved upon with more efficient vectorization of the message
- * scheduling. This implementation has also been optimized for all current and
- * several future generations of Intel CPUs.
- *
- * See this article for more information about the implementation details:
- *   http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1/
- *
- * Copyright (C) 2010, Intel Corp.
- *   Authors: Maxim Locktyukhin <maxim.locktyukhin@intel.com>
- *            Ronen Zohar <ronen.zohar@intel.com>
- *
- * Converted to AT&T syntax and adapted for inclusion in the Linux kernel:
- *   Author: Mathias Krause <minipli@googlemail.com>
- */
-
-#include <linux/linkage.h>
-
-#define CTX	%rdi	// arg1
-#define BUF	%rsi	// arg2
-#define CNT	%rdx	// arg3
-
-#define REG_A	%ecx
-#define REG_B	%esi
-#define REG_C	%edi
-#define REG_D	%r12d
-#define REG_E	%edx
-
-#define REG_T1	%eax
-#define REG_T2	%ebx
-
-#define K_BASE		%r8
-#define HASH_PTR	%r9
-#define BUFFER_PTR	%r10
-#define BUFFER_END	%r11
-
-#define W_TMP1	%xmm0
-#define W_TMP2	%xmm9
-
-#define W0	%xmm1
-#define W4	%xmm2
-#define W8	%xmm3
-#define W12	%xmm4
-#define W16	%xmm5
-#define W20	%xmm6
-#define W24	%xmm7
-#define W28	%xmm8
-
-#define XMM_SHUFB_BSWAP	%xmm10
-
-/* we keep window of 64 w[i]+K pre-calculated values in a circular buffer */
-#define WK(t)	(((t) & 15) * 4)(%rsp)
-#define W_PRECALC_AHEAD	16
-
-/*
- * This macro implements the SHA-1 function's body for single 64-byte block
- * param: function's name
- */
-.macro SHA1_VECTOR_ASM  name
-	ENTRY(\name)
-
-	push	%rbx
-	push	%r12
-	push	%rbp
-	mov	%rsp, %rbp
-
-	sub	$64, %rsp		# allocate workspace
-	and	$~15, %rsp		# align stack
-
-	mov	CTX, HASH_PTR
-	mov	BUF, BUFFER_PTR
-
-	shl	$6, CNT			# multiply by 64
-	add	BUF, CNT
-	mov	CNT, BUFFER_END
-
-	lea	K_XMM_AR(%rip), K_BASE
-	xmm_mov	BSWAP_SHUFB_CTL(%rip), XMM_SHUFB_BSWAP
-
-	SHA1_PIPELINED_MAIN_BODY
-
-	# cleanup workspace
-	mov	$8, %ecx
-	mov	%rsp, %rdi
-	xor	%eax, %eax
-	rep stosq
-
-	mov	%rbp, %rsp		# deallocate workspace
-	pop	%rbp
-	pop	%r12
-	pop	%rbx
-	ret
-
-	ENDPROC(\name)
-.endm
-
-/*
- * This macro implements 80 rounds of SHA-1 for one 64-byte block
- */
-.macro SHA1_PIPELINED_MAIN_BODY
-	INIT_REGALLOC
-
-	mov	  (HASH_PTR), A
-	mov	 4(HASH_PTR), B
-	mov	 8(HASH_PTR), C
-	mov	12(HASH_PTR), D
-	mov	16(HASH_PTR), E
-
-  .set i, 0
-  .rept W_PRECALC_AHEAD
-	W_PRECALC i
-    .set i, (i+1)
-  .endr
-
-.align 4
-1:
-	RR F1,A,B,C,D,E,0
-	RR F1,D,E,A,B,C,2
-	RR F1,B,C,D,E,A,4
-	RR F1,E,A,B,C,D,6
-	RR F1,C,D,E,A,B,8
-
-	RR F1,A,B,C,D,E,10
-	RR F1,D,E,A,B,C,12
-	RR F1,B,C,D,E,A,14
-	RR F1,E,A,B,C,D,16
-	RR F1,C,D,E,A,B,18
-
-	RR F2,A,B,C,D,E,20
-	RR F2,D,E,A,B,C,22
-	RR F2,B,C,D,E,A,24
-	RR F2,E,A,B,C,D,26
-	RR F2,C,D,E,A,B,28
-
-	RR F2,A,B,C,D,E,30
-	RR F2,D,E,A,B,C,32
-	RR F2,B,C,D,E,A,34
-	RR F2,E,A,B,C,D,36
-	RR F2,C,D,E,A,B,38
-
-	RR F3,A,B,C,D,E,40
-	RR F3,D,E,A,B,C,42
-	RR F3,B,C,D,E,A,44
-	RR F3,E,A,B,C,D,46
-	RR F3,C,D,E,A,B,48
-
-	RR F3,A,B,C,D,E,50
-	RR F3,D,E,A,B,C,52
-	RR F3,B,C,D,E,A,54
-	RR F3,E,A,B,C,D,56
-	RR F3,C,D,E,A,B,58
-
-	add	$64, BUFFER_PTR		# move to the next 64-byte block
-	cmp	BUFFER_END, BUFFER_PTR	# if the current is the last one use
-	cmovae	K_BASE, BUFFER_PTR	# dummy source to avoid buffer overrun
-
-	RR F4,A,B,C,D,E,60
-	RR F4,D,E,A,B,C,62
-	RR F4,B,C,D,E,A,64
-	RR F4,E,A,B,C,D,66
-	RR F4,C,D,E,A,B,68
-
-	RR F4,A,B,C,D,E,70
-	RR F4,D,E,A,B,C,72
-	RR F4,B,C,D,E,A,74
-	RR F4,E,A,B,C,D,76
-	RR F4,C,D,E,A,B,78
-
-	UPDATE_HASH   (HASH_PTR), A
-	UPDATE_HASH  4(HASH_PTR), B
-	UPDATE_HASH  8(HASH_PTR), C
-	UPDATE_HASH 12(HASH_PTR), D
-	UPDATE_HASH 16(HASH_PTR), E
-
-	RESTORE_RENAMED_REGS
-	cmp	K_BASE, BUFFER_PTR	# K_BASE means, we reached the end
-	jne	1b
-.endm
-
-.macro INIT_REGALLOC
-  .set A, REG_A
-  .set B, REG_B
-  .set C, REG_C
-  .set D, REG_D
-  .set E, REG_E
-  .set T1, REG_T1
-  .set T2, REG_T2
-.endm
-
-.macro RESTORE_RENAMED_REGS
-	# order is important (REG_C is where it should be)
-	mov	B, REG_B
-	mov	D, REG_D
-	mov	A, REG_A
-	mov	E, REG_E
-.endm
-
-.macro SWAP_REG_NAMES  a, b
-  .set _T, \a
-  .set \a, \b
-  .set \b, _T
-.endm
-
-.macro F1  b, c, d
-	mov	\c, T1
-	SWAP_REG_NAMES \c, T1
-	xor	\d, T1
-	and	\b, T1
-	xor	\d, T1
-.endm
-
-.macro F2  b, c, d
-	mov	\d, T1
-	SWAP_REG_NAMES \d, T1
-	xor	\c, T1
-	xor	\b, T1
-.endm
-
-.macro F3  b, c ,d
-	mov	\c, T1
-	SWAP_REG_NAMES \c, T1
-	mov	\b, T2
-	or	\b, T1
-	and	\c, T2
-	and	\d, T1
-	or	T2, T1
-.endm
-
-.macro F4  b, c, d
-	F2 \b, \c, \d
-.endm
-
-.macro UPDATE_HASH  hash, val
-	add	\hash, \val
-	mov	\val, \hash
-.endm
-
-/*
- * RR does two rounds of SHA-1 back to back with W[] pre-calc
- *   t1 = F(b, c, d);   e += w(i)
- *   e += t1;           b <<= 30;   d  += w(i+1);
- *   t1 = F(a, b, c);
- *   d += t1;           a <<= 5;
- *   e += a;
- *   t1 = e;            a >>= 7;
- *   t1 <<= 5;
- *   d += t1;
- */
-.macro RR  F, a, b, c, d, e, round
-	add	WK(\round), \e
-	\F   \b, \c, \d		# t1 = F(b, c, d);
-	W_PRECALC (\round + W_PRECALC_AHEAD)
-	rol	$30, \b
-	add	T1, \e
-	add	WK(\round + 1), \d
-
-	\F   \a, \b, \c
-	W_PRECALC (\round + W_PRECALC_AHEAD + 1)
-	rol	$5, \a
-	add	\a, \e
-	add	T1, \d
-	ror	$7, \a		# (a <<r 5) >>r 7) => a <<r 30)
-
-	mov	\e, T1
-	SWAP_REG_NAMES \e, T1
-
-	rol	$5, T1
-	add	T1, \d
-
-	# write:  \a, \b
-	# rotate: \a<=\d, \b<=\e, \c<=\a, \d<=\b, \e<=\c
-.endm
-
-.macro W_PRECALC  r
-  .set i, \r
-
-  .if (i < 20)
-    .set K_XMM, 0
-  .elseif (i < 40)
-    .set K_XMM, 16
-  .elseif (i < 60)
-    .set K_XMM, 32
-  .elseif (i < 80)
-    .set K_XMM, 48
-  .endif
-
-  .if ((i < 16) || ((i >= 80) && (i < (80 + W_PRECALC_AHEAD))))
-    .set i, ((\r) % 80)	    # pre-compute for the next iteration
-    .if (i == 0)
-	W_PRECALC_RESET
-    .endif
-	W_PRECALC_00_15
-  .elseif (i<32)
-	W_PRECALC_16_31
-  .elseif (i < 80)   // rounds 32-79
-	W_PRECALC_32_79
-  .endif
-.endm
-
-.macro W_PRECALC_RESET
-  .set W,          W0
-  .set W_minus_04, W4
-  .set W_minus_08, W8
-  .set W_minus_12, W12
-  .set W_minus_16, W16
-  .set W_minus_20, W20
-  .set W_minus_24, W24
-  .set W_minus_28, W28
-  .set W_minus_32, W
-.endm
-
-.macro W_PRECALC_ROTATE
-  .set W_minus_32, W_minus_28
-  .set W_minus_28, W_minus_24
-  .set W_minus_24, W_minus_20
-  .set W_minus_20, W_minus_16
-  .set W_minus_16, W_minus_12
-  .set W_minus_12, W_minus_08
-  .set W_minus_08, W_minus_04
-  .set W_minus_04, W
-  .set W,          W_minus_32
-.endm
-
-.macro W_PRECALC_SSSE3
-
-.macro W_PRECALC_00_15
-	W_PRECALC_00_15_SSSE3
-.endm
-.macro W_PRECALC_16_31
-	W_PRECALC_16_31_SSSE3
-.endm
-.macro W_PRECALC_32_79
-	W_PRECALC_32_79_SSSE3
-.endm
-
-/* message scheduling pre-compute for rounds 0-15 */
-.macro W_PRECALC_00_15_SSSE3
-  .if ((i & 3) == 0)
-	movdqu	(i*4)(BUFFER_PTR), W_TMP1
-  .elseif ((i & 3) == 1)
-	pshufb	XMM_SHUFB_BSWAP, W_TMP1
-	movdqa	W_TMP1, W
-  .elseif ((i & 3) == 2)
-	paddd	(K_BASE), W_TMP1
-  .elseif ((i & 3) == 3)
-	movdqa  W_TMP1, WK(i&~3)
-	W_PRECALC_ROTATE
-  .endif
-.endm
-
-/* message scheduling pre-compute for rounds 16-31
- *
- * - calculating last 32 w[i] values in 8 XMM registers
- * - pre-calculate K+w[i] values and store to mem, for later load by ALU add
- *   instruction
- *
- * some "heavy-lifting" vectorization for rounds 16-31 due to w[i]->w[i-3]
- * dependency, but improves for 32-79
- */
-.macro W_PRECALC_16_31_SSSE3
-  # blended scheduling of vector and scalar instruction streams, one 4-wide
-  # vector iteration / 4 scalar rounds
-  .if ((i & 3) == 0)
-	movdqa	W_minus_12, W
-	palignr	$8, W_minus_16, W	# w[i-14]
-	movdqa	W_minus_04, W_TMP1
-	psrldq	$4, W_TMP1		# w[i-3]
-	pxor	W_minus_08, W
-  .elseif ((i & 3) == 1)
-	pxor	W_minus_16, W_TMP1
-	pxor	W_TMP1, W
-	movdqa	W, W_TMP2
-	movdqa	W, W_TMP1
-	pslldq	$12, W_TMP2
-  .elseif ((i & 3) == 2)
-	psrld	$31, W
-	pslld	$1, W_TMP1
-	por	W, W_TMP1
-	movdqa	W_TMP2, W
-	psrld	$30, W_TMP2
-	pslld	$2, W
-  .elseif ((i & 3) == 3)
-	pxor	W, W_TMP1
-	pxor	W_TMP2, W_TMP1
-	movdqa	W_TMP1, W
-	paddd	K_XMM(K_BASE), W_TMP1
-	movdqa	W_TMP1, WK(i&~3)
-	W_PRECALC_ROTATE
-  .endif
-.endm
-
-/* message scheduling pre-compute for rounds 32-79
- *
- * in SHA-1 specification: w[i] = (w[i-3] ^ w[i-8]  ^ w[i-14] ^ w[i-16]) rol 1
- * instead we do equal:    w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2
- * allows more efficient vectorization since w[i]=>w[i-3] dependency is broken
- */
-.macro W_PRECALC_32_79_SSSE3
-  .if ((i & 3) == 0)
-	movdqa	W_minus_04, W_TMP1
-	pxor	W_minus_28, W		# W is W_minus_32 before xor
-	palignr	$8, W_minus_08, W_TMP1
-  .elseif ((i & 3) == 1)
-	pxor	W_minus_16, W
-	pxor	W_TMP1, W
-	movdqa	W, W_TMP1
-  .elseif ((i & 3) == 2)
-	psrld	$30, W
-	pslld	$2, W_TMP1
-	por	W, W_TMP1
-  .elseif ((i & 3) == 3)
-	movdqa	W_TMP1, W
-	paddd	K_XMM(K_BASE), W_TMP1
-	movdqa	W_TMP1, WK(i&~3)
-	W_PRECALC_ROTATE
-  .endif
-.endm
-
-.endm		// W_PRECALC_SSSE3
-
-
-#define K1	0x5a827999
-#define K2	0x6ed9eba1
-#define K3	0x8f1bbcdc
-#define K4	0xca62c1d6
-
-.section .rodata
-.align 16
-
-K_XMM_AR:
-	.long K1, K1, K1, K1
-	.long K2, K2, K2, K2
-	.long K3, K3, K3, K3
-	.long K4, K4, K4, K4
-
-BSWAP_SHUFB_CTL:
-	.long 0x00010203
-	.long 0x04050607
-	.long 0x08090a0b
-	.long 0x0c0d0e0f
-
-
-.section .text
-
-W_PRECALC_SSSE3
-.macro xmm_mov a, b
-	movdqu	\a,\b
-.endm
-
-/* SSSE3 optimized implementation:
- *  extern "C" void sha1_transform_ssse3(u32 *digest, const char *data, u32 *ws,
- *                                       unsigned int rounds);
- */
-SHA1_VECTOR_ASM     sha1_transform_ssse3
-
-#ifdef CONFIG_AS_AVX
-
-.macro W_PRECALC_AVX
-
-.purgem W_PRECALC_00_15
-.macro  W_PRECALC_00_15
-    W_PRECALC_00_15_AVX
-.endm
-.purgem W_PRECALC_16_31
-.macro  W_PRECALC_16_31
-    W_PRECALC_16_31_AVX
-.endm
-.purgem W_PRECALC_32_79
-.macro  W_PRECALC_32_79
-    W_PRECALC_32_79_AVX
-.endm
-
-.macro W_PRECALC_00_15_AVX
-  .if ((i & 3) == 0)
-	vmovdqu	(i*4)(BUFFER_PTR), W_TMP1
-  .elseif ((i & 3) == 1)
-	vpshufb	XMM_SHUFB_BSWAP, W_TMP1, W
-  .elseif ((i & 3) == 2)
-	vpaddd	(K_BASE), W, W_TMP1
-  .elseif ((i & 3) == 3)
-	vmovdqa	W_TMP1, WK(i&~3)
-	W_PRECALC_ROTATE
-  .endif
-.endm
-
-.macro W_PRECALC_16_31_AVX
-  .if ((i & 3) == 0)
-	vpalignr $8, W_minus_16, W_minus_12, W	# w[i-14]
-	vpsrldq	$4, W_minus_04, W_TMP1		# w[i-3]
-	vpxor	W_minus_08, W, W
-	vpxor	W_minus_16, W_TMP1, W_TMP1
-  .elseif ((i & 3) == 1)
-	vpxor	W_TMP1, W, W
-	vpslldq	$12, W, W_TMP2
-	vpslld	$1, W, W_TMP1
-  .elseif ((i & 3) == 2)
-	vpsrld	$31, W, W
-	vpor	W, W_TMP1, W_TMP1
-	vpslld	$2, W_TMP2, W
-	vpsrld	$30, W_TMP2, W_TMP2
-  .elseif ((i & 3) == 3)
-	vpxor	W, W_TMP1, W_TMP1
-	vpxor	W_TMP2, W_TMP1, W
-	vpaddd	K_XMM(K_BASE), W, W_TMP1
-	vmovdqu	W_TMP1, WK(i&~3)
-	W_PRECALC_ROTATE
-  .endif
-.endm
-
-.macro W_PRECALC_32_79_AVX
-  .if ((i & 3) == 0)
-	vpalignr $8, W_minus_08, W_minus_04, W_TMP1
-	vpxor	W_minus_28, W, W		# W is W_minus_32 before xor
-  .elseif ((i & 3) == 1)
-	vpxor	W_minus_16, W_TMP1, W_TMP1
-	vpxor	W_TMP1, W, W
-  .elseif ((i & 3) == 2)
-	vpslld	$2, W, W_TMP1
-	vpsrld	$30, W, W
-	vpor	W, W_TMP1, W
-  .elseif ((i & 3) == 3)
-	vpaddd	K_XMM(K_BASE), W, W_TMP1
-	vmovdqu	W_TMP1, WK(i&~3)
-	W_PRECALC_ROTATE
-  .endif
-.endm
-
-.endm    // W_PRECALC_AVX
-
-W_PRECALC_AVX
-.purgem xmm_mov
-.macro xmm_mov a, b
-	vmovdqu	\a,\b
-.endm
-
-
-/* AVX optimized implementation:
- *  extern "C" void sha1_transform_avx(u32 *digest, const char *data, u32 *ws,
- *                                     unsigned int rounds);
- */
-SHA1_VECTOR_ASM     sha1_transform_avx
-
-#endif
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
deleted file mode 100644
index 001bbcf93c79ba08628abcf30dd49ad77158420d..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ /dev/null
@@ -1,502 +0,0 @@
-########################################################################
-# Implement fast SHA-256 with AVX1 instructions. (x86_64)
-#
-# Copyright (C) 2013 Intel Corporation.
-#
-# Authors:
-#     James Guilford <james.guilford@intel.com>
-#     Kirk Yap <kirk.s.yap@intel.com>
-#     Tim Chen <tim.c.chen@linux.intel.com>
-#
-# This software is available to you under a choice of one of two
-# licenses.  You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-#     Redistribution and use in source and binary forms, with or
-#     without modification, are permitted provided that the following
-#     conditions are met:
-#
-#      - Redistributions of source code must retain the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer.
-#
-#      - Redistributions in binary form must reproduce the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer in the documentation and/or other materials
-#        provided with the distribution.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-########################################################################
-#
-# This code is described in an Intel White-Paper:
-# "Fast SHA-256 Implementations on Intel Architecture Processors"
-#
-# To find it, surf to http://www.intel.com/p/en_US/embedded
-# and search for that title.
-#
-########################################################################
-# This code schedules 1 block at a time, with 4 lanes per block
-########################################################################
-
-#ifdef CONFIG_AS_AVX
-#include <linux/linkage.h>
-
-## assume buffers not aligned
-#define    VMOVDQ vmovdqu
-
-################################ Define Macros
-
-# addm [mem], reg
-# Add reg to mem using reg-mem add and store
-.macro addm p1 p2
-	add     \p1, \p2
-	mov     \p2, \p1
-.endm
-
-
-.macro MY_ROR p1 p2
-	shld    $(32-(\p1)), \p2, \p2
-.endm
-
-################################
-
-# COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask
-# Load xmm with mem and byte swap each dword
-.macro COPY_XMM_AND_BSWAP p1 p2 p3
-	VMOVDQ \p2, \p1
-	vpshufb \p3, \p1, \p1
-.endm
-
-################################
-
-X0 = %xmm4
-X1 = %xmm5
-X2 = %xmm6
-X3 = %xmm7
-
-XTMP0 = %xmm0
-XTMP1 = %xmm1
-XTMP2 = %xmm2
-XTMP3 = %xmm3
-XTMP4 = %xmm8
-XFER = %xmm9
-XTMP5 = %xmm11
-
-SHUF_00BA = %xmm10      # shuffle xBxA -> 00BA
-SHUF_DC00 = %xmm12      # shuffle xDxC -> DC00
-BYTE_FLIP_MASK = %xmm13
-
-NUM_BLKS = %rdx   # 3rd arg
-INP = %rsi        # 2nd arg
-CTX = %rdi        # 1st arg
-
-SRND = %rsi       # clobbers INP
-c = %ecx
-d = %r8d
-e = %edx
-TBL = %r12
-a = %eax
-b = %ebx
-
-f = %r9d
-g = %r10d
-h = %r11d
-
-y0 = %r13d
-y1 = %r14d
-y2 = %r15d
-
-
-_INP_END_SIZE = 8
-_INP_SIZE = 8
-_XFER_SIZE = 16
-_XMM_SAVE_SIZE = 0
-
-_INP_END = 0
-_INP            = _INP_END  + _INP_END_SIZE
-_XFER           = _INP      + _INP_SIZE
-_XMM_SAVE       = _XFER     + _XFER_SIZE
-STACK_SIZE      = _XMM_SAVE + _XMM_SAVE_SIZE
-
-# rotate_Xs
-# Rotate values of symbols X0...X3
-.macro rotate_Xs
-X_ = X0
-X0 = X1
-X1 = X2
-X2 = X3
-X3 = X_
-.endm
-
-# ROTATE_ARGS
-# Rotate values of symbols a...h
-.macro ROTATE_ARGS
-TMP_ = h
-h = g
-g = f
-f = e
-e = d
-d = c
-c = b
-b = a
-a = TMP_
-.endm
-
-.macro FOUR_ROUNDS_AND_SCHED
-	## compute s0 four at a time and s1 two at a time
-	## compute W[-16] + W[-7] 4 at a time
-
-	mov     e, y0			# y0 = e
-	MY_ROR  (25-11), y0             # y0 = e >> (25-11)
-	mov     a, y1                   # y1 = a
-	vpalignr $4, X2, X3, XTMP0      # XTMP0 = W[-7]
-	MY_ROR  (22-13), y1             # y1 = a >> (22-13)
-	xor     e, y0                   # y0 = e ^ (e >> (25-11))
-	mov     f, y2                   # y2 = f
-	MY_ROR  (11-6), y0              # y0 = (e >> (11-6)) ^ (e >> (25-6))
-	xor     a, y1                   # y1 = a ^ (a >> (22-13)
-	xor     g, y2                   # y2 = f^g
-	vpaddd  X0, XTMP0, XTMP0        # XTMP0 = W[-7] + W[-16]
-	xor     e, y0                   # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
-	and     e, y2                   # y2 = (f^g)&e
-	MY_ROR  (13-2), y1              # y1 = (a >> (13-2)) ^ (a >> (22-2))
-	## compute s0
-	vpalignr $4, X0, X1, XTMP1      # XTMP1 = W[-15]
-	xor     a, y1                   # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
-	MY_ROR  6, y0                   # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
-	xor     g, y2                   # y2 = CH = ((f^g)&e)^g
-	MY_ROR  2, y1                   # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
-	add     y0, y2                  # y2 = S1 + CH
-	add     _XFER(%rsp), y2         # y2 = k + w + S1 + CH
-	mov     a, y0                   # y0 = a
-	add     y2, h                   # h = h + S1 + CH + k + w
-	mov     a, y2                   # y2 = a
-	vpsrld  $7, XTMP1, XTMP2
-	or      c, y0                   # y0 = a|c
-	add     h, d                    # d = d + h + S1 + CH + k + w
-	and     c, y2                   # y2 = a&c
-	vpslld  $(32-7), XTMP1, XTMP3
-	and     b, y0                   # y0 = (a|c)&b
-	add     y1, h                   # h = h + S1 + CH + k + w + S0
-	vpor    XTMP2, XTMP3, XTMP3     # XTMP1 = W[-15] MY_ROR 7
-	or      y2, y0                  # y0 = MAJ = (a|c)&b)|(a&c)
-	add     y0, h                   # h = h + S1 + CH + k + w + S0 + MAJ
-	ROTATE_ARGS
-	mov     e, y0                   # y0 = e
-	mov     a, y1                   # y1 = a
-	MY_ROR  (25-11), y0             # y0 = e >> (25-11)
-	xor     e, y0                   # y0 = e ^ (e >> (25-11))
-	mov     f, y2                   # y2 = f
-	MY_ROR  (22-13), y1             # y1 = a >> (22-13)
-	vpsrld  $18, XTMP1, XTMP2       #
-	xor     a, y1                   # y1 = a ^ (a >> (22-13)
-	MY_ROR  (11-6), y0              # y0 = (e >> (11-6)) ^ (e >> (25-6))
-	xor     g, y2                   # y2 = f^g
-	vpsrld  $3, XTMP1, XTMP4        # XTMP4 = W[-15] >> 3
-	MY_ROR  (13-2), y1              # y1 = (a >> (13-2)) ^ (a >> (22-2))
-	xor     e, y0                   # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
-	and     e, y2                   # y2 = (f^g)&e
-	MY_ROR  6, y0                   # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
-	vpslld  $(32-18), XTMP1, XTMP1
-	xor     a, y1                   # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
-	xor     g, y2                   # y2 = CH = ((f^g)&e)^g
-	vpxor   XTMP1, XTMP3, XTMP3     #
-	add     y0, y2                  # y2 = S1 + CH
-	add     (1*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH
-	MY_ROR  2, y1                   # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
-	vpxor   XTMP2, XTMP3, XTMP3     # XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR
-	mov     a, y0                   # y0 = a
-	add     y2, h                   # h = h + S1 + CH + k + w
-	mov     a, y2                   # y2 = a
-	vpxor   XTMP4, XTMP3, XTMP1     # XTMP1 = s0
-	or      c, y0                   # y0 = a|c
-	add     h, d                    # d = d + h + S1 + CH + k + w
-	and     c, y2                   # y2 = a&c
-	## compute low s1
-	vpshufd $0b11111010, X3, XTMP2  # XTMP2 = W[-2] {BBAA}
-	and     b, y0                   # y0 = (a|c)&b
-	add     y1, h                   # h = h + S1 + CH + k + w + S0
-	vpaddd  XTMP1, XTMP0, XTMP0     # XTMP0 = W[-16] + W[-7] + s0
-	or      y2, y0                  # y0 = MAJ = (a|c)&b)|(a&c)
-	add     y0, h                   # h = h + S1 + CH + k + w + S0 + MAJ
-	ROTATE_ARGS
-	mov     e, y0                   # y0 = e
-	mov     a, y1                   # y1 = a
-	MY_ROR  (25-11), y0             # y0 = e >> (25-11)
-	xor     e, y0                   # y0 = e ^ (e >> (25-11))
-	MY_ROR  (22-13), y1             # y1 = a >> (22-13)
-	mov     f, y2                   # y2 = f
-	xor     a, y1                   # y1 = a ^ (a >> (22-13)
-	MY_ROR  (11-6), y0              # y0 = (e >> (11-6)) ^ (e >> (25-6))
-	vpsrld  $10, XTMP2, XTMP4       # XTMP4 = W[-2] >> 10 {BBAA}
-	xor     g, y2                   # y2 = f^g
-	vpsrlq  $19, XTMP2, XTMP3       # XTMP3 = W[-2] MY_ROR 19 {xBxA}
-	xor     e, y0                   # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
-	and     e, y2                   # y2 = (f^g)&e
-	vpsrlq  $17, XTMP2, XTMP2       # XTMP2 = W[-2] MY_ROR 17 {xBxA}
-	MY_ROR  (13-2), y1              # y1 = (a >> (13-2)) ^ (a >> (22-2))
-	xor     a, y1                   # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
-	xor     g, y2                   # y2 = CH = ((f^g)&e)^g
-	MY_ROR  6, y0                   # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
-	vpxor   XTMP3, XTMP2, XTMP2     #
-	add     y0, y2                  # y2 = S1 + CH
-	MY_ROR  2, y1                   # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
-	add     (2*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH
-	vpxor   XTMP2, XTMP4, XTMP4     # XTMP4 = s1 {xBxA}
-	mov     a, y0                   # y0 = a
-	add     y2, h                   # h = h + S1 + CH + k + w
-	mov     a, y2                   # y2 = a
-	vpshufb SHUF_00BA, XTMP4, XTMP4 # XTMP4 = s1 {00BA}
-	or      c, y0                   # y0 = a|c
-	add     h, d                    # d = d + h + S1 + CH + k + w
-	and     c, y2                   # y2 = a&c
-	vpaddd  XTMP4, XTMP0, XTMP0     # XTMP0 = {..., ..., W[1], W[0]}
-	and     b, y0                   # y0 = (a|c)&b
-	add     y1, h                   # h = h + S1 + CH + k + w + S0
-	## compute high s1
-	vpshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {DDCC}
-	or      y2, y0                  # y0 = MAJ = (a|c)&b)|(a&c)
-	add     y0, h                   # h = h + S1 + CH + k + w + S0 + MAJ
-	ROTATE_ARGS
-	mov     e, y0                   # y0 = e
-	MY_ROR  (25-11), y0             # y0 = e >> (25-11)
-	mov     a, y1                   # y1 = a
-	MY_ROR  (22-13), y1             # y1 = a >> (22-13)
-	xor     e, y0                   # y0 = e ^ (e >> (25-11))
-	mov     f, y2                   # y2 = f
-	MY_ROR  (11-6), y0              # y0 = (e >> (11-6)) ^ (e >> (25-6))
-	vpsrld  $10, XTMP2, XTMP5       # XTMP5 = W[-2] >> 10 {DDCC}
-	xor     a, y1                   # y1 = a ^ (a >> (22-13)
-	xor     g, y2                   # y2 = f^g
-	vpsrlq  $19, XTMP2, XTMP3       # XTMP3 = W[-2] MY_ROR 19 {xDxC}
-	xor     e, y0                   # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
-	and     e, y2                   # y2 = (f^g)&e
-	MY_ROR  (13-2), y1              # y1 = (a >> (13-2)) ^ (a >> (22-2))
-	vpsrlq  $17, XTMP2, XTMP2       # XTMP2 = W[-2] MY_ROR 17 {xDxC}
-	xor     a, y1                   # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
-	MY_ROR  6, y0                   # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
-	xor     g, y2                   # y2 = CH = ((f^g)&e)^g
-	vpxor   XTMP3, XTMP2, XTMP2
-	MY_ROR  2, y1                   # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
-	add     y0, y2                  # y2 = S1 + CH
-	add     (3*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH
-	vpxor   XTMP2, XTMP5, XTMP5     # XTMP5 = s1 {xDxC}
-	mov     a, y0                   # y0 = a
-	add     y2, h                   # h = h + S1 + CH + k + w
-	mov     a, y2                   # y2 = a
-	vpshufb SHUF_DC00, XTMP5, XTMP5 # XTMP5 = s1 {DC00}
-	or      c, y0                   # y0 = a|c
-	add     h, d                    # d = d + h + S1 + CH + k + w
-	and     c, y2                   # y2 = a&c
-	vpaddd  XTMP0, XTMP5, X0        # X0 = {W[3], W[2], W[1], W[0]}
-	and     b, y0                   # y0 = (a|c)&b
-	add     y1, h                   # h = h + S1 + CH + k + w + S0
-	or      y2, y0                  # y0 = MAJ = (a|c)&b)|(a&c)
-	add     y0, h                   # h = h + S1 + CH + k + w + S0 + MAJ
-	ROTATE_ARGS
-	rotate_Xs
-.endm
-
-## input is [rsp + _XFER + %1 * 4]
-.macro DO_ROUND round
-	mov	e, y0			# y0 = e
-        MY_ROR  (25-11), y0             # y0 = e >> (25-11)
-        mov     a, y1                   # y1 = a
-        xor     e, y0                   # y0 = e ^ (e >> (25-11))
-        MY_ROR  (22-13), y1             # y1 = a >> (22-13)
-        mov     f, y2                   # y2 = f
-        xor     a, y1                   # y1 = a ^ (a >> (22-13)
-        MY_ROR  (11-6), y0              # y0 = (e >> (11-6)) ^ (e >> (25-6))
-        xor     g, y2                   # y2 = f^g
-        xor     e, y0                   # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
-        MY_ROR  (13-2), y1              # y1 = (a >> (13-2)) ^ (a >> (22-2))
-        and     e, y2                   # y2 = (f^g)&e
-        xor     a, y1                   # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
-        MY_ROR  6, y0                   # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
-        xor     g, y2                   # y2 = CH = ((f^g)&e)^g
-        add     y0, y2                  # y2 = S1 + CH
-        MY_ROR  2, y1                   # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
-        offset = \round * 4 + _XFER     #
-        add     offset(%rsp), y2	# y2 = k + w + S1 + CH
-        mov     a, y0			# y0 = a
-        add     y2, h                   # h = h + S1 + CH + k + w
-        mov     a, y2                   # y2 = a
-        or      c, y0                   # y0 = a|c
-        add     h, d                    # d = d + h + S1 + CH + k + w
-        and     c, y2                   # y2 = a&c
-        and     b, y0                   # y0 = (a|c)&b
-        add     y1, h                   # h = h + S1 + CH + k + w + S0
-        or      y2, y0                  # y0 = MAJ = (a|c)&b)|(a&c)
-        add     y0, h                   # h = h + S1 + CH + k + w + S0 + MAJ
-        ROTATE_ARGS
-.endm
-
-########################################################################
-## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks)
-## arg 1 : pointer to digest
-## arg 2 : pointer to input data
-## arg 3 : Num blocks
-########################################################################
-.text
-ENTRY(sha256_transform_avx)
-.align 32
-	pushq   %rbx
-	pushq   %r12
-	pushq   %r13
-	pushq   %r14
-	pushq   %r15
-	pushq	%rbp
-	movq	%rsp, %rbp
-
-	subq    $STACK_SIZE, %rsp	# allocate stack space
-	and	$~15, %rsp		# align stack pointer
-
-	shl     $6, NUM_BLKS		# convert to bytes
-	jz      done_hash
-	add     INP, NUM_BLKS		# pointer to end of data
-	mov     NUM_BLKS, _INP_END(%rsp)
-
-	## load initial digest
-	mov     4*0(CTX), a
-	mov     4*1(CTX), b
-	mov     4*2(CTX), c
-	mov     4*3(CTX), d
-	mov     4*4(CTX), e
-	mov     4*5(CTX), f
-	mov     4*6(CTX), g
-	mov     4*7(CTX), h
-
-	vmovdqa  PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
-	vmovdqa  _SHUF_00BA(%rip), SHUF_00BA
-	vmovdqa  _SHUF_DC00(%rip), SHUF_DC00
-loop0:
-	lea     K256(%rip), TBL
-
-	## byte swap first 16 dwords
-	COPY_XMM_AND_BSWAP      X0, 0*16(INP), BYTE_FLIP_MASK
-	COPY_XMM_AND_BSWAP      X1, 1*16(INP), BYTE_FLIP_MASK
-	COPY_XMM_AND_BSWAP      X2, 2*16(INP), BYTE_FLIP_MASK
-	COPY_XMM_AND_BSWAP      X3, 3*16(INP), BYTE_FLIP_MASK
-
-	mov     INP, _INP(%rsp)
-
-	## schedule 48 input dwords, by doing 3 rounds of 16 each
-	mov     $3, SRND
-.align 16
-loop1:
-	vpaddd  (TBL), X0, XFER
-	vmovdqa XFER, _XFER(%rsp)
-	FOUR_ROUNDS_AND_SCHED
-
-	vpaddd  1*16(TBL), X0, XFER
-	vmovdqa XFER, _XFER(%rsp)
-	FOUR_ROUNDS_AND_SCHED
-
-	vpaddd  2*16(TBL), X0, XFER
-	vmovdqa XFER, _XFER(%rsp)
-	FOUR_ROUNDS_AND_SCHED
-
-	vpaddd  3*16(TBL), X0, XFER
-	vmovdqa XFER, _XFER(%rsp)
-	add	$4*16, TBL
-	FOUR_ROUNDS_AND_SCHED
-
-	sub     $1, SRND
-	jne     loop1
-
-	mov     $2, SRND
-loop2:
-	vpaddd  (TBL), X0, XFER
-	vmovdqa XFER, _XFER(%rsp)
-	DO_ROUND        0
-	DO_ROUND        1
-	DO_ROUND        2
-	DO_ROUND        3
-
-	vpaddd  1*16(TBL), X1, XFER
-	vmovdqa XFER, _XFER(%rsp)
-	add     $2*16, TBL
-	DO_ROUND        0
-	DO_ROUND        1
-	DO_ROUND        2
-	DO_ROUND        3
-
-	vmovdqa X2, X0
-	vmovdqa X3, X1
-
-	sub     $1, SRND
-	jne     loop2
-
-	addm    (4*0)(CTX),a
-	addm    (4*1)(CTX),b
-	addm    (4*2)(CTX),c
-	addm    (4*3)(CTX),d
-	addm    (4*4)(CTX),e
-	addm    (4*5)(CTX),f
-	addm    (4*6)(CTX),g
-	addm    (4*7)(CTX),h
-
-	mov     _INP(%rsp), INP
-	add     $64, INP
-	cmp     _INP_END(%rsp), INP
-	jne     loop0
-
-done_hash:
-
-	mov	%rbp, %rsp
-	popq	%rbp
-	popq    %r15
-	popq    %r14
-	popq    %r13
-	popq	%r12
-	popq    %rbx
-	ret
-ENDPROC(sha256_transform_avx)
-
-.section	.rodata.cst256.K256, "aM", @progbits, 256
-.align 64
-K256:
-	.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
-	.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
-	.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
-	.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
-	.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
-	.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
-	.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
-	.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
-	.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
-	.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
-	.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
-	.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
-	.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
-	.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
-	.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
-	.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-
-.section	.rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
-.align 16
-PSHUFFLE_BYTE_FLIP_MASK:
-	.octa 0x0c0d0e0f08090a0b0405060700010203
-
-.section	.rodata.cst16._SHUF_00BA, "aM", @progbits, 16
-.align 16
-# shuffle xBxA -> 00BA
-_SHUF_00BA:
-	.octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
-
-.section	.rodata.cst16._SHUF_DC00, "aM", @progbits, 16
-.align 16
-# shuffle xDxC -> DC00
-_SHUF_DC00:
-	.octa 0x0b0a090803020100FFFFFFFFFFFFFFFF
-
-#endif
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
deleted file mode 100644
index 1420db15dcddc8505b57faad5e6e6701f972b517..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ /dev/null
@@ -1,771 +0,0 @@
-########################################################################
-# Implement fast SHA-256 with AVX2 instructions. (x86_64)
-#
-# Copyright (C) 2013 Intel Corporation.
-#
-# Authors:
-#     James Guilford <james.guilford@intel.com>
-#     Kirk Yap <kirk.s.yap@intel.com>
-#     Tim Chen <tim.c.chen@linux.intel.com>
-#
-# This software is available to you under a choice of one of two
-# licenses.  You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-#     Redistribution and use in source and binary forms, with or
-#     without modification, are permitted provided that the following
-#     conditions are met:
-#
-#      - Redistributions of source code must retain the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer.
-#
-#      - Redistributions in binary form must reproduce the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer in the documentation and/or other materials
-#        provided with the distribution.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-#
-########################################################################
-#
-# This code is described in an Intel White-Paper:
-# "Fast SHA-256 Implementations on Intel Architecture Processors"
-#
-# To find it, surf to http://www.intel.com/p/en_US/embedded
-# and search for that title.
-#
-########################################################################
-# This code schedules 2 blocks at a time, with 4 lanes per block
-########################################################################
-
-#ifdef CONFIG_AS_AVX2
-#include <linux/linkage.h>
-
-## assume buffers not aligned
-#define	VMOVDQ vmovdqu
-
-################################ Define Macros
-
-# addm [mem], reg
-# Add reg to mem using reg-mem add and store
-.macro addm p1 p2
-	add	\p1, \p2
-	mov	\p2, \p1
-.endm
-
-################################
-
-X0 = %ymm4
-X1 = %ymm5
-X2 = %ymm6
-X3 = %ymm7
-
-# XMM versions of above
-XWORD0 = %xmm4
-XWORD1 = %xmm5
-XWORD2 = %xmm6
-XWORD3 = %xmm7
-
-XTMP0 = %ymm0
-XTMP1 = %ymm1
-XTMP2 = %ymm2
-XTMP3 = %ymm3
-XTMP4 = %ymm8
-XFER  = %ymm9
-XTMP5 = %ymm11
-
-SHUF_00BA =	%ymm10 # shuffle xBxA -> 00BA
-SHUF_DC00 =	%ymm12 # shuffle xDxC -> DC00
-BYTE_FLIP_MASK = %ymm13
-
-X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK
-
-NUM_BLKS = %rdx	# 3rd arg
-INP	= %rsi  # 2nd arg
-CTX	= %rdi	# 1st arg
-c	= %ecx
-d	= %r8d
-e       = %edx	# clobbers NUM_BLKS
-y3	= %esi	# clobbers INP
-
-SRND	= CTX	# SRND is same register as CTX
-
-a = %eax
-b = %ebx
-f = %r9d
-g = %r10d
-h = %r11d
-old_h = %r11d
-
-T1 = %r12d
-y0 = %r13d
-y1 = %r14d
-y2 = %r15d
-
-
-_XFER_SIZE	= 2*64*4	# 2 blocks, 64 rounds, 4 bytes/round
-_XMM_SAVE_SIZE	= 0
-_INP_END_SIZE	= 8
-_INP_SIZE	= 8
-_CTX_SIZE	= 8
-_RSP_SIZE	= 8
-
-_XFER		= 0
-_XMM_SAVE	= _XFER     + _XFER_SIZE
-_INP_END	= _XMM_SAVE + _XMM_SAVE_SIZE
-_INP		= _INP_END  + _INP_END_SIZE
-_CTX		= _INP      + _INP_SIZE
-_RSP		= _CTX      + _CTX_SIZE
-STACK_SIZE	= _RSP      + _RSP_SIZE
-
-# rotate_Xs
-# Rotate values of symbols X0...X3
-.macro rotate_Xs
-	X_ = X0
-	X0 = X1
-	X1 = X2
-	X2 = X3
-	X3 = X_
-.endm
-
-# ROTATE_ARGS
-# Rotate values of symbols a...h
-.macro ROTATE_ARGS
-	old_h = h
-	TMP_ = h
-	h = g
-	g = f
-	f = e
-	e = d
-	d = c
-	c = b
-	b = a
-	a = TMP_
-.endm
-
-.macro FOUR_ROUNDS_AND_SCHED disp
-################################### RND N + 0 ############################
-
-	mov	a, y3		# y3 = a                                # MAJA
-	rorx	$25, e, y0	# y0 = e >> 25				# S1A
-	rorx	$11, e, y1	# y1 = e >> 11				# S1B
-
-	addl	\disp(%rsp, SRND), h		# h = k + w + h         # --
-	or	c, y3		# y3 = a|c                              # MAJA
-	vpalignr $4, X2, X3, XTMP0 # XTMP0 = W[-7]
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$13, a, T1	# T1 = a >> 13				# S0B
-
-	xor	y1, y0		# y0 = (e>>25) ^ (e>>11)		# S1
-	xor	g, y2		# y2 = f^g                              # CH
-	vpaddd	X0, XTMP0, XTMP0 # XTMP0 = W[-7] + W[-16]# y1 = (e >> 6)# S1
-	rorx	$6, e, y1	# y1 = (e >> 6)				# S1
-
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	xor	y1, y0		# y0 = (e>>25) ^ (e>>11) ^ (e>>6)	# S1
-	rorx	$22, a, y1	# y1 = a >> 22				# S0A
-	add	h, d		# d = k + w + h + d                     # --
-
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	vpalignr $4, X0, X1, XTMP1	# XTMP1 = W[-15]
-	xor	T1, y1		# y1 = (a>>22) ^ (a>>13)		# S0
-	rorx	$2, a, T1	# T1 = (a >> 2)				# S0
-
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-	vpsrld	$7, XTMP1, XTMP2
-	xor	T1, y1		# y1 = (a>>22) ^ (a>>13) ^ (a>>2)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	c, T1		# T1 = a&c                              # MAJB
-
-	add	y0, y2		# y2 = S1 + CH                          # --
-	vpslld	$(32-7), XTMP1, XTMP3
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-	vpor	XTMP2, XTMP3, XTMP3	# XTMP3 = W[-15] ror 7
-
-	vpsrld	$18, XTMP1, XTMP2
-	add	y2, h		# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	add	y3, h		# h = t1 + S0 + MAJ                     # --
-
-
-	ROTATE_ARGS
-
-################################### RND N + 1 ############################
-
-	mov	a, y3		# y3 = a                                # MAJA
-	rorx	$25, e, y0	# y0 = e >> 25				# S1A
-	rorx	$11, e, y1	# y1 = e >> 11				# S1B
-	offset = \disp + 1*4
-	addl	offset(%rsp, SRND), h	# h = k + w + h         # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-
-	vpsrld	$3, XTMP1, XTMP4 # XTMP4 = W[-15] >> 3
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$13, a, T1	# T1 = a >> 13				# S0B
-	xor	y1, y0		# y0 = (e>>25) ^ (e>>11)		# S1
-	xor	g, y2		# y2 = f^g                              # CH
-
-
-	rorx	$6, e, y1	# y1 = (e >> 6)				# S1
-	xor	y1, y0		# y0 = (e>>25) ^ (e>>11) ^ (e>>6)	# S1
-	rorx	$22, a, y1	# y1 = a >> 22				# S0A
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	add	h, d		# d = k + w + h + d                     # --
-
-	vpslld	$(32-18), XTMP1, XTMP1
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	xor	T1, y1		# y1 = (a>>22) ^ (a>>13)		# S0
-
-	vpxor	XTMP1, XTMP3, XTMP3
-	rorx	$2, a, T1	# T1 = (a >> 2)				# S0
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-
-	vpxor	XTMP2, XTMP3, XTMP3	# XTMP3 = W[-15] ror 7 ^ W[-15] ror 18
-	xor	T1, y1		# y1 = (a>>22) ^ (a>>13) ^ (a>>2)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-	vpxor	XTMP4, XTMP3, XTMP1	# XTMP1 = s0
-	vpshufd	$0b11111010, X3, XTMP2	# XTMP2 = W[-2] {BBAA}
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-
-	vpaddd	XTMP1, XTMP0, XTMP0	# XTMP0 = W[-16] + W[-7] + s0
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-	add	y2, h		# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	add	y3, h		# h = t1 + S0 + MAJ                     # --
-
-	vpsrld	$10, XTMP2, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA}
-
-
-	ROTATE_ARGS
-
-################################### RND N + 2 ############################
-
-	mov	a, y3		# y3 = a                                # MAJA
-	rorx	$25, e, y0	# y0 = e >> 25				# S1A
-	offset = \disp + 2*4
-	addl	offset(%rsp, SRND), h	# h = k + w + h         # --
-
-	vpsrlq	$19, XTMP2, XTMP3 # XTMP3 = W[-2] ror 19 {xBxA}
-	rorx	$11, e, y1	# y1 = e >> 11				# S1B
-	or	c, y3		# y3 = a|c                              # MAJA
-	mov	f, y2		# y2 = f                                # CH
-	xor	g, y2		# y2 = f^g                              # CH
-
-	rorx	$13, a, T1	# T1 = a >> 13				# S0B
-	xor	y1, y0		# y0 = (e>>25) ^ (e>>11)		# S1
-	vpsrlq	$17, XTMP2, XTMP2	# XTMP2 = W[-2] ror 17 {xBxA}
-	and	e, y2		# y2 = (f^g)&e                          # CH
-
-	rorx	$6, e, y1	# y1 = (e >> 6)				# S1
-	vpxor	XTMP3, XTMP2, XTMP2
-	add	h, d		# d = k + w + h + d                     # --
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-
-	xor	y1, y0		# y0 = (e>>25) ^ (e>>11) ^ (e>>6)	# S1
-	rorx	$22, a, y1	# y1 = a >> 22				# S0A
-	vpxor	XTMP2, XTMP4, XTMP4	# XTMP4 = s1 {xBxA}
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-
-	vpshufb	SHUF_00BA, XTMP4, XTMP4	# XTMP4 = s1 {00BA}
-	xor	T1, y1		# y1 = (a>>22) ^ (a>>13)		# S0
-	rorx	$2, a ,T1	# T1 = (a >> 2)				# S0
-	vpaddd	XTMP4, XTMP0, XTMP0	# XTMP0 = {..., ..., W[1], W[0]}
-
-	xor	T1, y1		# y1 = (a>>22) ^ (a>>13) ^ (a>>2)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-	vpshufd	$0b01010000, XTMP0, XTMP2	# XTMP2 = W[-2] {DDCC}
-
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1,h		# h = k + w + h + S0                    # --
-	add	y2,d		# d = k + w + h + d + S1 + CH = d + t1  # --
-	add	y2,h		# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-
-	add	y3,h		# h = t1 + S0 + MAJ                     # --
-
-
-	ROTATE_ARGS
-
-################################### RND N + 3 ############################
-
-	mov	a, y3		# y3 = a                                # MAJA
-	rorx	$25, e, y0	# y0 = e >> 25				# S1A
-	rorx	$11, e, y1	# y1 = e >> 11				# S1B
-	offset = \disp + 3*4
-	addl	offset(%rsp, SRND), h	# h = k + w + h         # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-
-	vpsrld	$10, XTMP2, XTMP5	# XTMP5 = W[-2] >> 10 {DDCC}
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$13, a, T1	# T1 = a >> 13				# S0B
-	xor	y1, y0		# y0 = (e>>25) ^ (e>>11)		# S1
-	xor	g, y2		# y2 = f^g                              # CH
-
-
-	vpsrlq	$19, XTMP2, XTMP3	# XTMP3 = W[-2] ror 19 {xDxC}
-	rorx	$6, e, y1	# y1 = (e >> 6)				# S1
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	add	h, d		# d = k + w + h + d                     # --
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-
-	vpsrlq	$17, XTMP2, XTMP2	# XTMP2 = W[-2] ror 17 {xDxC}
-	xor	y1, y0		# y0 = (e>>25) ^ (e>>11) ^ (e>>6)	# S1
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-
-	vpxor	XTMP3, XTMP2, XTMP2
-	rorx	$22, a, y1	# y1 = a >> 22				# S0A
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-	vpxor	XTMP2, XTMP5, XTMP5	# XTMP5 = s1 {xDxC}
-	xor	T1, y1		# y1 = (a>>22) ^ (a>>13)		# S0
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-
-	rorx	$2, a, T1	# T1 = (a >> 2)				# S0
-	vpshufb	SHUF_DC00, XTMP5, XTMP5	# XTMP5 = s1 {DC00}
-
-	vpaddd	XTMP0, XTMP5, X0	# X0 = {W[3], W[2], W[1], W[0]}
-	xor	T1, y1		# y1 = (a>>22) ^ (a>>13) ^ (a>>2)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	c, T1		# T1 = a&c                              # MAJB
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-
-	add	y1, h		# h = k + w + h + S0                    # --
-	add	y2, h		# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	add	y3, h		# h = t1 + S0 + MAJ                     # --
-
-	ROTATE_ARGS
-	rotate_Xs
-.endm
-
-.macro DO_4ROUNDS disp
-################################### RND N + 0 ###########################
-
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$25, e, y0	# y0 = e >> 25				# S1A
-	rorx	$11, e, y1	# y1 = e >> 11				# S1B
-	xor	g, y2		# y2 = f^g                              # CH
-
-	xor	y1, y0		# y0 = (e>>25) ^ (e>>11)		# S1
-	rorx	$6, e, y1	# y1 = (e >> 6)				# S1
-	and	e, y2		# y2 = (f^g)&e                          # CH
-
-	xor	y1, y0		# y0 = (e>>25) ^ (e>>11) ^ (e>>6)	# S1
-	rorx	$13, a, T1	# T1 = a >> 13				# S0B
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-	rorx	$22, a, y1	# y1 = a >> 22				# S0A
-	mov	a, y3		# y3 = a                                # MAJA
-
-	xor	T1, y1		# y1 = (a>>22) ^ (a>>13)		# S0
-	rorx	$2, a, T1	# T1 = (a >> 2)				# S0
-	addl	\disp(%rsp, SRND), h		# h = k + w + h # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-	xor	T1, y1		# y1 = (a>>22) ^ (a>>13) ^ (a>>2)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-
-	add	h, d		# d = k + w + h + d                     # --
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-
-	ROTATE_ARGS
-
-################################### RND N + 1 ###########################
-
-	add	y2, old_h	# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$25, e, y0	# y0 = e >> 25				# S1A
-	rorx	$11, e, y1	# y1 = e >> 11				# S1B
-	xor	g, y2		# y2 = f^g                              # CH
-
-	xor	y1, y0		# y0 = (e>>25) ^ (e>>11)		# S1
-	rorx	$6, e, y1	# y1 = (e >> 6)				# S1
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	add	y3, old_h	# h = t1 + S0 + MAJ                     # --
-
-	xor	y1, y0		# y0 = (e>>25) ^ (e>>11) ^ (e>>6)	# S1
-	rorx	$13, a, T1	# T1 = a >> 13				# S0B
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-	rorx	$22, a, y1	# y1 = a >> 22				# S0A
-	mov	a, y3		# y3 = a                                # MAJA
-
-	xor	T1, y1		# y1 = (a>>22) ^ (a>>13)		# S0
-	rorx	$2, a, T1	# T1 = (a >> 2)				# S0
-	offset = 4*1 + \disp
-	addl	offset(%rsp, SRND), h		# h = k + w + h # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-	xor	T1, y1		# y1 = (a>>22) ^ (a>>13) ^ (a>>2)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-
-	add	h, d		# d = k + w + h + d                     # --
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-
-	ROTATE_ARGS
-
-################################### RND N + 2 ##############################
-
-	add	y2, old_h	# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$25, e, y0	# y0 = e >> 25				# S1A
-	rorx	$11, e, y1	# y1 = e >> 11				# S1B
-	xor	g, y2		# y2 = f^g                              # CH
-
-	xor	y1, y0		# y0 = (e>>25) ^ (e>>11)		# S1
-	rorx	$6, e, y1	# y1 = (e >> 6)				# S1
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	add	y3, old_h	# h = t1 + S0 + MAJ                     # --
-
-	xor	y1, y0		# y0 = (e>>25) ^ (e>>11) ^ (e>>6)	# S1
-	rorx	$13, a, T1	# T1 = a >> 13				# S0B
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-	rorx	$22, a, y1	# y1 = a >> 22				# S0A
-	mov	a, y3		# y3 = a                                # MAJA
-
-	xor	T1, y1		# y1 = (a>>22) ^ (a>>13)		# S0
-	rorx	$2, a, T1	# T1 = (a >> 2)				# S0
-	offset = 4*2 + \disp
-	addl	offset(%rsp, SRND), h		# h = k + w + h # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-	xor	T1, y1		# y1 = (a>>22) ^ (a>>13) ^ (a>>2)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-
-	add	h, d		# d = k + w + h + d                     # --
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-
-	ROTATE_ARGS
-
-################################### RND N + 3 ###########################
-
-	add	y2, old_h	# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$25, e, y0	# y0 = e >> 25				# S1A
-	rorx	$11, e, y1	# y1 = e >> 11				# S1B
-	xor	g, y2		# y2 = f^g                              # CH
-
-	xor	y1, y0		# y0 = (e>>25) ^ (e>>11)		# S1
-	rorx	$6, e, y1	# y1 = (e >> 6)				# S1
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	add	y3, old_h	# h = t1 + S0 + MAJ                     # --
-
-	xor	y1, y0		# y0 = (e>>25) ^ (e>>11) ^ (e>>6)	# S1
-	rorx	$13, a, T1	# T1 = a >> 13				# S0B
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-	rorx	$22, a, y1	# y1 = a >> 22				# S0A
-	mov	a, y3		# y3 = a                                # MAJA
-
-	xor	T1, y1		# y1 = (a>>22) ^ (a>>13)		# S0
-	rorx	$2, a, T1	# T1 = (a >> 2)				# S0
-	offset = 4*3 + \disp
-	addl	offset(%rsp, SRND), h		# h = k + w + h # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-	xor	T1, y1		# y1 = (a>>22) ^ (a>>13) ^ (a>>2)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-
-	add	h, d		# d = k + w + h + d                     # --
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-
-
-	add	y2, h		# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-
-	add	y3, h		# h = t1 + S0 + MAJ                     # --
-
-	ROTATE_ARGS
-
-.endm
-
-########################################################################
-## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks)
-## arg 1 : pointer to digest
-## arg 2 : pointer to input data
-## arg 3 : Num blocks
-########################################################################
-.text
-ENTRY(sha256_transform_rorx)
-.align 32
-	pushq	%rbx
-	pushq	%r12
-	pushq	%r13
-	pushq	%r14
-	pushq	%r15
-
-	mov	%rsp, %rax
-	subq	$STACK_SIZE, %rsp
-	and	$-32, %rsp	# align rsp to 32 byte boundary
-	mov	%rax, _RSP(%rsp)
-
-
-	shl	$6, NUM_BLKS	# convert to bytes
-	jz	done_hash
-	lea	-64(INP, NUM_BLKS), NUM_BLKS # pointer to last block
-	mov	NUM_BLKS, _INP_END(%rsp)
-
-	cmp	NUM_BLKS, INP
-	je	only_one_block
-
-	## load initial digest
-	mov	(CTX), a
-	mov	4*1(CTX), b
-	mov	4*2(CTX), c
-	mov	4*3(CTX), d
-	mov	4*4(CTX), e
-	mov	4*5(CTX), f
-	mov	4*6(CTX), g
-	mov	4*7(CTX), h
-
-	vmovdqa  PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
-	vmovdqa  _SHUF_00BA(%rip), SHUF_00BA
-	vmovdqa  _SHUF_DC00(%rip), SHUF_DC00
-
-	mov	CTX, _CTX(%rsp)
-
-loop0:
-	## Load first 16 dwords from two blocks
-	VMOVDQ	0*32(INP),XTMP0
-	VMOVDQ	1*32(INP),XTMP1
-	VMOVDQ	2*32(INP),XTMP2
-	VMOVDQ	3*32(INP),XTMP3
-
-	## byte swap data
-	vpshufb	BYTE_FLIP_MASK, XTMP0, XTMP0
-	vpshufb	BYTE_FLIP_MASK, XTMP1, XTMP1
-	vpshufb	BYTE_FLIP_MASK, XTMP2, XTMP2
-	vpshufb	BYTE_FLIP_MASK, XTMP3, XTMP3
-
-	## transpose data into high/low halves
-	vperm2i128	$0x20, XTMP2, XTMP0, X0
-	vperm2i128	$0x31, XTMP2, XTMP0, X1
-	vperm2i128	$0x20, XTMP3, XTMP1, X2
-	vperm2i128	$0x31, XTMP3, XTMP1, X3
-
-last_block_enter:
-	add	$64, INP
-	mov	INP, _INP(%rsp)
-
-	## schedule 48 input dwords, by doing 3 rounds of 12 each
-	xor	SRND, SRND
-
-.align 16
-loop1:
-	vpaddd	K256+0*32(SRND), X0, XFER
-	vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
-	FOUR_ROUNDS_AND_SCHED	_XFER + 0*32
-
-	vpaddd	K256+1*32(SRND), X0, XFER
-	vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
-	FOUR_ROUNDS_AND_SCHED	_XFER + 1*32
-
-	vpaddd	K256+2*32(SRND), X0, XFER
-	vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
-	FOUR_ROUNDS_AND_SCHED	_XFER + 2*32
-
-	vpaddd	K256+3*32(SRND), X0, XFER
-	vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
-	FOUR_ROUNDS_AND_SCHED	_XFER + 3*32
-
-	add	$4*32, SRND
-	cmp	$3*4*32, SRND
-	jb	loop1
-
-loop2:
-	## Do last 16 rounds with no scheduling
-	vpaddd	K256+0*32(SRND), X0, XFER
-	vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
-	DO_4ROUNDS	_XFER + 0*32
-
-	vpaddd	K256+1*32(SRND), X1, XFER
-	vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
-	DO_4ROUNDS	_XFER + 1*32
-	add	$2*32, SRND
-
-	vmovdqa	X2, X0
-	vmovdqa	X3, X1
-
-	cmp	$4*4*32, SRND
-	jb	loop2
-
-	mov	_CTX(%rsp), CTX
-	mov	_INP(%rsp), INP
-
-	addm    (4*0)(CTX),a
-	addm    (4*1)(CTX),b
-	addm    (4*2)(CTX),c
-	addm    (4*3)(CTX),d
-	addm    (4*4)(CTX),e
-	addm    (4*5)(CTX),f
-	addm    (4*6)(CTX),g
-	addm    (4*7)(CTX),h
-
-	cmp	_INP_END(%rsp), INP
-	ja	done_hash
-
-	#### Do second block using previously scheduled results
-	xor	SRND, SRND
-.align 16
-loop3:
-	DO_4ROUNDS	 _XFER + 0*32 + 16
-	DO_4ROUNDS	 _XFER + 1*32 + 16
-	add	$2*32, SRND
-	cmp	$4*4*32, SRND
-	jb	loop3
-
-	mov	_CTX(%rsp), CTX
-	mov	_INP(%rsp), INP
-	add	$64, INP
-
-	addm    (4*0)(CTX),a
-	addm    (4*1)(CTX),b
-	addm    (4*2)(CTX),c
-	addm    (4*3)(CTX),d
-	addm    (4*4)(CTX),e
-	addm    (4*5)(CTX),f
-	addm    (4*6)(CTX),g
-	addm    (4*7)(CTX),h
-
-	cmp	_INP_END(%rsp), INP
-	jb	loop0
-	ja	done_hash
-
-do_last_block:
-	VMOVDQ	0*16(INP),XWORD0
-	VMOVDQ	1*16(INP),XWORD1
-	VMOVDQ	2*16(INP),XWORD2
-	VMOVDQ	3*16(INP),XWORD3
-
-	vpshufb	X_BYTE_FLIP_MASK, XWORD0, XWORD0
-	vpshufb	X_BYTE_FLIP_MASK, XWORD1, XWORD1
-	vpshufb	X_BYTE_FLIP_MASK, XWORD2, XWORD2
-	vpshufb	X_BYTE_FLIP_MASK, XWORD3, XWORD3
-
-	jmp	last_block_enter
-
-only_one_block:
-
-	## load initial digest
-	mov	(4*0)(CTX),a
-	mov	(4*1)(CTX),b
-	mov	(4*2)(CTX),c
-	mov	(4*3)(CTX),d
-	mov	(4*4)(CTX),e
-	mov	(4*5)(CTX),f
-	mov	(4*6)(CTX),g
-	mov	(4*7)(CTX),h
-
-	vmovdqa	PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
-	vmovdqa	_SHUF_00BA(%rip), SHUF_00BA
-	vmovdqa	_SHUF_DC00(%rip), SHUF_DC00
-
-	mov	CTX, _CTX(%rsp)
-	jmp	do_last_block
-
-done_hash:
-
-	mov	_RSP(%rsp), %rsp
-
-	popq	%r15
-	popq	%r14
-	popq	%r13
-	popq	%r12
-	popq	%rbx
-	ret
-ENDPROC(sha256_transform_rorx)
-
-.section	.rodata.cst512.K256, "aM", @progbits, 512
-.align 64
-K256:
-	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
-	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
-	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
-	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
-	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
-	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
-	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
-	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
-	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
-	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
-	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
-	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
-	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
-	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
-	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
-	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
-	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
-	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
-	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
-	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
-	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
-	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
-	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
-	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
-	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
-	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
-	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
-	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
-	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
-	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
-	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-
-.section	.rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
-.align 32
-PSHUFFLE_BYTE_FLIP_MASK:
-	.octa 0x0c0d0e0f08090a0b0405060700010203,0x0c0d0e0f08090a0b0405060700010203
-
-# shuffle xBxA -> 00BA
-.section	.rodata.cst32._SHUF_00BA, "aM", @progbits, 32
-.align 32
-_SHUF_00BA:
-	.octa 0xFFFFFFFFFFFFFFFF0b0a090803020100,0xFFFFFFFFFFFFFFFF0b0a090803020100
-
-# shuffle xDxC -> DC00
-.section	.rodata.cst32._SHUF_DC00, "aM", @progbits, 32
-.align 32
-_SHUF_DC00:
-	.octa 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF
-
-#endif
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
deleted file mode 100644
index c6c05ed2c16a593390ddf7617070f45e58a40ad4..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ /dev/null
@@ -1,511 +0,0 @@
-########################################################################
-# Implement fast SHA-256 with SSSE3 instructions. (x86_64)
-#
-# Copyright (C) 2013 Intel Corporation.
-#
-# Authors:
-#     James Guilford <james.guilford@intel.com>
-#     Kirk Yap <kirk.s.yap@intel.com>
-#     Tim Chen <tim.c.chen@linux.intel.com>
-#
-# This software is available to you under a choice of one of two
-# licenses.  You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-#     Redistribution and use in source and binary forms, with or
-#     without modification, are permitted provided that the following
-#     conditions are met:
-#
-#      - Redistributions of source code must retain the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer.
-#
-#      - Redistributions in binary form must reproduce the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer in the documentation and/or other materials
-#        provided with the distribution.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-#
-########################################################################
-#
-# This code is described in an Intel White-Paper:
-# "Fast SHA-256 Implementations on Intel Architecture Processors"
-#
-# To find it, surf to http://www.intel.com/p/en_US/embedded
-# and search for that title.
-#
-########################################################################
-
-#include <linux/linkage.h>
-
-## assume buffers not aligned
-#define    MOVDQ movdqu
-
-################################ Define Macros
-
-# addm [mem], reg
-# Add reg to mem using reg-mem add and store
-.macro addm p1 p2
-        add     \p1, \p2
-        mov     \p2, \p1
-.endm
-
-################################
-
-# COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask
-# Load xmm with mem and byte swap each dword
-.macro COPY_XMM_AND_BSWAP p1 p2 p3
-        MOVDQ \p2, \p1
-        pshufb \p3, \p1
-.endm
-
-################################
-
-X0 = %xmm4
-X1 = %xmm5
-X2 = %xmm6
-X3 = %xmm7
-
-XTMP0 = %xmm0
-XTMP1 = %xmm1
-XTMP2 = %xmm2
-XTMP3 = %xmm3
-XTMP4 = %xmm8
-XFER = %xmm9
-
-SHUF_00BA = %xmm10      # shuffle xBxA -> 00BA
-SHUF_DC00 = %xmm11      # shuffle xDxC -> DC00
-BYTE_FLIP_MASK = %xmm12
-
-NUM_BLKS = %rdx   # 3rd arg
-INP = %rsi        # 2nd arg
-CTX = %rdi        # 1st arg
-
-SRND = %rsi       # clobbers INP
-c = %ecx
-d = %r8d
-e = %edx
-TBL = %r12
-a = %eax
-b = %ebx
-
-f = %r9d
-g = %r10d
-h = %r11d
-
-y0 = %r13d
-y1 = %r14d
-y2 = %r15d
-
-
-
-_INP_END_SIZE = 8
-_INP_SIZE = 8
-_XFER_SIZE = 16
-_XMM_SAVE_SIZE = 0
-
-_INP_END = 0
-_INP            = _INP_END  + _INP_END_SIZE
-_XFER           = _INP      + _INP_SIZE
-_XMM_SAVE       = _XFER     + _XFER_SIZE
-STACK_SIZE      = _XMM_SAVE + _XMM_SAVE_SIZE
-
-# rotate_Xs
-# Rotate values of symbols X0...X3
-.macro rotate_Xs
-X_ = X0
-X0 = X1
-X1 = X2
-X2 = X3
-X3 = X_
-.endm
-
-# ROTATE_ARGS
-# Rotate values of symbols a...h
-.macro ROTATE_ARGS
-TMP_ = h
-h = g
-g = f
-f = e
-e = d
-d = c
-c = b
-b = a
-a = TMP_
-.endm
-
-.macro FOUR_ROUNDS_AND_SCHED
-	## compute s0 four at a time and s1 two at a time
-	## compute W[-16] + W[-7] 4 at a time
-	movdqa  X3, XTMP0
-	mov     e, y0			# y0 = e
-	ror     $(25-11), y0            # y0 = e >> (25-11)
-	mov     a, y1                   # y1 = a
-	palignr $4, X2, XTMP0           # XTMP0 = W[-7]
-	ror     $(22-13), y1            # y1 = a >> (22-13)
-	xor     e, y0                   # y0 = e ^ (e >> (25-11))
-	mov     f, y2                   # y2 = f
-	ror     $(11-6), y0             # y0 = (e >> (11-6)) ^ (e >> (25-6))
-	movdqa  X1, XTMP1
-	xor     a, y1                   # y1 = a ^ (a >> (22-13)
-	xor     g, y2                   # y2 = f^g
-	paddd   X0, XTMP0               # XTMP0 = W[-7] + W[-16]
-	xor     e, y0                   # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
-	and     e, y2                   # y2 = (f^g)&e
-	ror     $(13-2), y1             # y1 = (a >> (13-2)) ^ (a >> (22-2))
-	## compute s0
-	palignr $4, X0, XTMP1           # XTMP1 = W[-15]
-	xor     a, y1                   # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
-	ror     $6, y0                  # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
-	xor     g, y2                   # y2 = CH = ((f^g)&e)^g
-	movdqa  XTMP1, XTMP2            # XTMP2 = W[-15]
-	ror     $2, y1                  # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
-	add     y0, y2                  # y2 = S1 + CH
-	add     _XFER(%rsp) , y2        # y2 = k + w + S1 + CH
-	movdqa  XTMP1, XTMP3            # XTMP3 = W[-15]
-	mov     a, y0                   # y0 = a
-	add     y2, h                   # h = h + S1 + CH + k + w
-	mov     a, y2                   # y2 = a
-	pslld   $(32-7), XTMP1          #
-	or      c, y0                   # y0 = a|c
-	add     h, d                    # d = d + h + S1 + CH + k + w
-	and     c, y2                   # y2 = a&c
-	psrld   $7, XTMP2               #
-	and     b, y0                   # y0 = (a|c)&b
-	add     y1, h                   # h = h + S1 + CH + k + w + S0
-	por     XTMP2, XTMP1            # XTMP1 = W[-15] ror 7
-	or      y2, y0                  # y0 = MAJ = (a|c)&b)|(a&c)
-	add     y0, h                   # h = h + S1 + CH + k + w + S0 + MAJ
-					#
-	ROTATE_ARGS                     #
-	movdqa  XTMP3, XTMP2            # XTMP2 = W[-15]
-	mov     e, y0                   # y0 = e
-	mov     a, y1                   # y1 = a
-	movdqa  XTMP3, XTMP4            # XTMP4 = W[-15]
-	ror     $(25-11), y0            # y0 = e >> (25-11)
-	xor     e, y0                   # y0 = e ^ (e >> (25-11))
-	mov     f, y2                   # y2 = f
-	ror     $(22-13), y1            # y1 = a >> (22-13)
-	pslld   $(32-18), XTMP3         #
-	xor     a, y1                   # y1 = a ^ (a >> (22-13)
-	ror     $(11-6), y0             # y0 = (e >> (11-6)) ^ (e >> (25-6))
-	xor     g, y2                   # y2 = f^g
-	psrld   $18, XTMP2              #
-	ror     $(13-2), y1             # y1 = (a >> (13-2)) ^ (a >> (22-2))
-	xor     e, y0                   # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
-	and     e, y2                   # y2 = (f^g)&e
-	ror     $6, y0                  # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
-	pxor    XTMP3, XTMP1
-	xor     a, y1                   # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
-	xor     g, y2                   # y2 = CH = ((f^g)&e)^g
-	psrld   $3, XTMP4               # XTMP4 = W[-15] >> 3
-	add     y0, y2                  # y2 = S1 + CH
-	add     (1*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH
-	ror     $2, y1                  # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
-	pxor    XTMP2, XTMP1            # XTMP1 = W[-15] ror 7 ^ W[-15] ror 18
-	mov     a, y0                   # y0 = a
-	add     y2, h                   # h = h + S1 + CH + k + w
-	mov     a, y2                   # y2 = a
-	pxor    XTMP4, XTMP1            # XTMP1 = s0
-	or      c, y0                   # y0 = a|c
-	add     h, d                    # d = d + h + S1 + CH + k + w
-	and     c, y2                   # y2 = a&c
-	## compute low s1
-	pshufd  $0b11111010, X3, XTMP2   # XTMP2 = W[-2] {BBAA}
-	and     b, y0			# y0 = (a|c)&b
-	add     y1, h                   # h = h + S1 + CH + k + w + S0
-	paddd   XTMP1, XTMP0            # XTMP0 = W[-16] + W[-7] + s0
-	or      y2, y0                  # y0 = MAJ = (a|c)&b)|(a&c)
-	add     y0, h                   # h = h + S1 + CH + k + w + S0 + MAJ
-
-	ROTATE_ARGS
-	movdqa  XTMP2, XTMP3            # XTMP3 = W[-2] {BBAA}
-	mov     e, y0                   # y0 = e
-	mov     a, y1                   # y1 = a
-	ror     $(25-11), y0            # y0 = e >> (25-11)
-	movdqa  XTMP2, XTMP4            # XTMP4 = W[-2] {BBAA}
-	xor     e, y0                   # y0 = e ^ (e >> (25-11))
-	ror     $(22-13), y1            # y1 = a >> (22-13)
-	mov     f, y2                   # y2 = f
-	xor     a, y1                   # y1 = a ^ (a >> (22-13)
-	ror     $(11-6), y0             # y0 = (e >> (11-6)) ^ (e >> (25-6))
-	psrlq   $17, XTMP2              # XTMP2 = W[-2] ror 17 {xBxA}
-	xor     g, y2                   # y2 = f^g
-	psrlq   $19, XTMP3              # XTMP3 = W[-2] ror 19 {xBxA}
-	xor     e, y0                   # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
-	and     e, y2                   # y2 = (f^g)&e
-	psrld   $10, XTMP4              # XTMP4 = W[-2] >> 10 {BBAA}
-	ror     $(13-2), y1             # y1 = (a >> (13-2)) ^ (a >> (22-2))
-	xor     a, y1                   # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
-	xor     g, y2                   # y2 = CH = ((f^g)&e)^g
-	ror     $6, y0                  # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
-	pxor    XTMP3, XTMP2
-	add     y0, y2                  # y2 = S1 + CH
-	ror     $2, y1                  # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
-	add     (2*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH
-	pxor    XTMP2, XTMP4            # XTMP4 = s1 {xBxA}
-	mov     a, y0                   # y0 = a
-	add     y2, h                   # h = h + S1 + CH + k + w
-	mov     a, y2                   # y2 = a
-	pshufb  SHUF_00BA, XTMP4        # XTMP4 = s1 {00BA}
-	or      c, y0                   # y0 = a|c
-	add     h, d                    # d = d + h + S1 + CH + k + w
-	and     c, y2                   # y2 = a&c
-	paddd   XTMP4, XTMP0            # XTMP0 = {..., ..., W[1], W[0]}
-	and     b, y0                   # y0 = (a|c)&b
-	add     y1, h                   # h = h + S1 + CH + k + w + S0
-	## compute high s1
-	pshufd  $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {BBAA}
-	or      y2, y0                  # y0 = MAJ = (a|c)&b)|(a&c)
-	add     y0, h                   # h = h + S1 + CH + k + w + S0 + MAJ
-					#
-	ROTATE_ARGS                     #
-	movdqa  XTMP2, XTMP3            # XTMP3 = W[-2] {DDCC}
-	mov     e, y0                   # y0 = e
-	ror     $(25-11), y0            # y0 = e >> (25-11)
-	mov     a, y1                   # y1 = a
-	movdqa  XTMP2, X0               # X0    = W[-2] {DDCC}
-	ror     $(22-13), y1            # y1 = a >> (22-13)
-	xor     e, y0                   # y0 = e ^ (e >> (25-11))
-	mov     f, y2                   # y2 = f
-	ror     $(11-6), y0             # y0 = (e >> (11-6)) ^ (e >> (25-6))
-	psrlq   $17, XTMP2              # XTMP2 = W[-2] ror 17 {xDxC}
-	xor     a, y1                   # y1 = a ^ (a >> (22-13)
-	xor     g, y2                   # y2 = f^g
-	psrlq   $19, XTMP3              # XTMP3 = W[-2] ror 19 {xDxC}
-	xor     e, y0                   # y0 = e ^ (e >> (11-6)) ^ (e >> (25
-	and     e, y2                   # y2 = (f^g)&e
-	ror     $(13-2), y1             # y1 = (a >> (13-2)) ^ (a >> (22-2))
-	psrld   $10, X0                 # X0 = W[-2] >> 10 {DDCC}
-	xor     a, y1                   # y1 = a ^ (a >> (13-2)) ^ (a >> (22
-	ror     $6, y0                  # y0 = S1 = (e>>6) & (e>>11) ^ (e>>2
-	xor     g, y2                   # y2 = CH = ((f^g)&e)^g
-	pxor    XTMP3, XTMP2            #
-	ror     $2, y1                  # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>2
-	add     y0, y2                  # y2 = S1 + CH
-	add     (3*4 + _XFER)(%rsp), y2 # y2 = k + w + S1 + CH
-	pxor    XTMP2, X0               # X0 = s1 {xDxC}
-	mov     a, y0                   # y0 = a
-	add     y2, h                   # h = h + S1 + CH + k + w
-	mov     a, y2                   # y2 = a
-	pshufb  SHUF_DC00, X0           # X0 = s1 {DC00}
-	or      c, y0                   # y0 = a|c
-	add     h, d                    # d = d + h + S1 + CH + k + w
-	and     c, y2                   # y2 = a&c
-	paddd   XTMP0, X0               # X0 = {W[3], W[2], W[1], W[0]}
-	and     b, y0                   # y0 = (a|c)&b
-	add     y1, h                   # h = h + S1 + CH + k + w + S0
-	or      y2, y0                  # y0 = MAJ = (a|c)&b)|(a&c)
-	add     y0, h                   # h = h + S1 + CH + k + w + S0 + MAJ
-
-	ROTATE_ARGS
-	rotate_Xs
-.endm
-
-## input is [rsp + _XFER + %1 * 4]
-.macro DO_ROUND round
-	mov     e, y0                 # y0 = e
-	ror     $(25-11), y0          # y0 = e >> (25-11)
-	mov     a, y1                 # y1 = a
-	xor     e, y0                 # y0 = e ^ (e >> (25-11))
-	ror     $(22-13), y1          # y1 = a >> (22-13)
-	mov     f, y2                 # y2 = f
-	xor     a, y1                 # y1 = a ^ (a >> (22-13)
-	ror     $(11-6), y0           # y0 = (e >> (11-6)) ^ (e >> (25-6))
-	xor     g, y2                 # y2 = f^g
-	xor     e, y0                 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
-	ror     $(13-2), y1           # y1 = (a >> (13-2)) ^ (a >> (22-2))
-	and     e, y2                 # y2 = (f^g)&e
-	xor     a, y1                 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
-	ror     $6, y0                # y0 = S1 = (e>>6) & (e>>11) ^ (e>>25)
-	xor     g, y2                 # y2 = CH = ((f^g)&e)^g
-	add     y0, y2                # y2 = S1 + CH
-	ror     $2, y1                # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
-	offset = \round * 4 + _XFER
-	add     offset(%rsp), y2      # y2 = k + w + S1 + CH
-	mov     a, y0                 # y0 = a
-	add     y2, h                 # h = h + S1 + CH + k + w
-	mov     a, y2                 # y2 = a
-	or      c, y0                 # y0 = a|c
-	add     h, d                  # d = d + h + S1 + CH + k + w
-	and     c, y2                 # y2 = a&c
-	and     b, y0                 # y0 = (a|c)&b
-	add     y1, h                 # h = h + S1 + CH + k + w + S0
-	or      y2, y0		      # y0 = MAJ = (a|c)&b)|(a&c)
-	add     y0, h		      # h = h + S1 + CH + k + w + S0 + MAJ
-	ROTATE_ARGS
-.endm
-
-########################################################################
-## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks)
-## arg 1 : pointer to digest
-## arg 2 : pointer to input data
-## arg 3 : Num blocks
-########################################################################
-.text
-ENTRY(sha256_transform_ssse3)
-.align 32
-	pushq   %rbx
-	pushq   %r12
-	pushq   %r13
-	pushq   %r14
-	pushq   %r15
-	pushq   %rbp
-	mov	%rsp, %rbp
-
-	subq    $STACK_SIZE, %rsp
-	and	$~15, %rsp
-
-	shl     $6, NUM_BLKS		 # convert to bytes
-	jz      done_hash
-	add     INP, NUM_BLKS
-	mov     NUM_BLKS, _INP_END(%rsp) # pointer to end of data
-
-	## load initial digest
-	mov     4*0(CTX), a
-	mov     4*1(CTX), b
-	mov     4*2(CTX), c
-	mov     4*3(CTX), d
-	mov     4*4(CTX), e
-	mov     4*5(CTX), f
-	mov     4*6(CTX), g
-	mov     4*7(CTX), h
-
-	movdqa  PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
-	movdqa  _SHUF_00BA(%rip), SHUF_00BA
-	movdqa  _SHUF_DC00(%rip), SHUF_DC00
-
-loop0:
-	lea     K256(%rip), TBL
-
-	## byte swap first 16 dwords
-	COPY_XMM_AND_BSWAP      X0, 0*16(INP), BYTE_FLIP_MASK
-	COPY_XMM_AND_BSWAP      X1, 1*16(INP), BYTE_FLIP_MASK
-	COPY_XMM_AND_BSWAP      X2, 2*16(INP), BYTE_FLIP_MASK
-	COPY_XMM_AND_BSWAP      X3, 3*16(INP), BYTE_FLIP_MASK
-
-	mov     INP, _INP(%rsp)
-
-	## schedule 48 input dwords, by doing 3 rounds of 16 each
-	mov     $3, SRND
-.align 16
-loop1:
-	movdqa  (TBL), XFER
-	paddd   X0, XFER
-	movdqa  XFER, _XFER(%rsp)
-	FOUR_ROUNDS_AND_SCHED
-
-	movdqa  1*16(TBL), XFER
-	paddd   X0, XFER
-	movdqa  XFER, _XFER(%rsp)
-	FOUR_ROUNDS_AND_SCHED
-
-	movdqa  2*16(TBL), XFER
-	paddd   X0, XFER
-	movdqa  XFER, _XFER(%rsp)
-	FOUR_ROUNDS_AND_SCHED
-
-	movdqa  3*16(TBL), XFER
-	paddd   X0, XFER
-	movdqa  XFER, _XFER(%rsp)
-	add     $4*16, TBL
-	FOUR_ROUNDS_AND_SCHED
-
-	sub     $1, SRND
-	jne     loop1
-
-	mov     $2, SRND
-loop2:
-	paddd   (TBL), X0
-	movdqa  X0, _XFER(%rsp)
-	DO_ROUND        0
-	DO_ROUND        1
-	DO_ROUND        2
-	DO_ROUND        3
-	paddd   1*16(TBL), X1
-	movdqa  X1, _XFER(%rsp)
-	add     $2*16, TBL
-	DO_ROUND        0
-	DO_ROUND        1
-	DO_ROUND        2
-	DO_ROUND        3
-
-	movdqa  X2, X0
-	movdqa  X3, X1
-
-	sub     $1, SRND
-	jne     loop2
-
-	addm    (4*0)(CTX),a
-	addm    (4*1)(CTX),b
-	addm    (4*2)(CTX),c
-	addm    (4*3)(CTX),d
-	addm    (4*4)(CTX),e
-	addm    (4*5)(CTX),f
-	addm    (4*6)(CTX),g
-	addm    (4*7)(CTX),h
-
-	mov     _INP(%rsp), INP
-	add     $64, INP
-	cmp     _INP_END(%rsp), INP
-	jne     loop0
-
-done_hash:
-
-	mov	%rbp, %rsp
-	popq	%rbp
-	popq    %r15
-	popq    %r14
-	popq    %r13
-	popq    %r12
-	popq    %rbx
-
-	ret
-ENDPROC(sha256_transform_ssse3)
-
-.section	.rodata.cst256.K256, "aM", @progbits, 256
-.align 64
-K256:
-        .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
-        .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
-        .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
-        .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
-        .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
-        .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
-        .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
-        .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
-        .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
-        .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
-        .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
-        .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
-        .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
-        .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
-        .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
-        .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-
-.section	.rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
-.align 16
-PSHUFFLE_BYTE_FLIP_MASK:
-	.octa 0x0c0d0e0f08090a0b0405060700010203
-
-.section	.rodata.cst16._SHUF_00BA, "aM", @progbits, 16
-.align 16
-# shuffle xBxA -> 00BA
-_SHUF_00BA:
-	.octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
-
-.section	.rodata.cst16._SHUF_DC00, "aM", @progbits, 16
-.align 16
-# shuffle xDxC -> DC00
-_SHUF_DC00:
-	.octa 0x0b0a090803020100FFFFFFFFFFFFFFFF
diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S
deleted file mode 100644
index fb58f58ecfbc76cf541c40f6153ca11175659714..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/sha256_ni_asm.S
+++ /dev/null
@@ -1,355 +0,0 @@
-/*
- * Intel SHA Extensions optimized implementation of a SHA-256 update function
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * 	Sean Gulley <sean.m.gulley@intel.com>
- * 	Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 	* Redistributions of source code must retain the above copyright
- * 	  notice, this list of conditions and the following disclaimer.
- * 	* Redistributions in binary form must reproduce the above copyright
- * 	  notice, this list of conditions and the following disclaimer in
- * 	  the documentation and/or other materials provided with the
- * 	  distribution.
- * 	* Neither the name of Intel Corporation nor the names of its
- * 	  contributors may be used to endorse or promote products derived
- * 	  from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <linux/linkage.h>
-
-#define DIGEST_PTR	%rdi	/* 1st arg */
-#define DATA_PTR	%rsi	/* 2nd arg */
-#define NUM_BLKS	%rdx	/* 3rd arg */
-
-#define SHA256CONSTANTS	%rax
-
-#define MSG		%xmm0
-#define STATE0		%xmm1
-#define STATE1		%xmm2
-#define MSGTMP0		%xmm3
-#define MSGTMP1		%xmm4
-#define MSGTMP2		%xmm5
-#define MSGTMP3		%xmm6
-#define MSGTMP4		%xmm7
-
-#define SHUF_MASK	%xmm8
-
-#define ABEF_SAVE	%xmm9
-#define CDGH_SAVE	%xmm10
-
-/*
- * Intel SHA Extensions optimized implementation of a SHA-256 update function
- *
- * The function takes a pointer to the current hash values, a pointer to the
- * input data, and a number of 64 byte blocks to process.  Once all blocks have
- * been processed, the digest pointer is  updated with the resulting hash value.
- * The function only processes complete blocks, there is no functionality to
- * store partial blocks.  All message padding and hash value initialization must
- * be done outside the update function.
- *
- * The indented lines in the loop are instructions related to rounds processing.
- * The non-indented lines are instructions related to the message schedule.
- *
- * void sha256_ni_transform(uint32_t *digest, const void *data,
-		uint32_t numBlocks);
- * digest : pointer to digest
- * data: pointer to input data
- * numBlocks: Number of blocks to process
- */
-
-.text
-.align 32
-ENTRY(sha256_ni_transform)
-
-	shl		$6, NUM_BLKS		/*  convert to bytes */
-	jz		.Ldone_hash
-	add		DATA_PTR, NUM_BLKS	/* pointer to end of data */
-
-	/*
-	 * load initial hash values
-	 * Need to reorder these appropriately
-	 * DCBA, HGFE -> ABEF, CDGH
-	 */
-	movdqu		0*16(DIGEST_PTR), STATE0
-	movdqu		1*16(DIGEST_PTR), STATE1
-
-	pshufd		$0xB1, STATE0,  STATE0		/* CDAB */
-	pshufd		$0x1B, STATE1,  STATE1		/* EFGH */
-	movdqa		STATE0, MSGTMP4
-	palignr		$8, STATE1,  STATE0		/* ABEF */
-	pblendw		$0xF0, MSGTMP4, STATE1		/* CDGH */
-
-	movdqa		PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
-	lea		K256(%rip), SHA256CONSTANTS
-
-.Lloop0:
-	/* Save hash values for addition after rounds */
-	movdqa		STATE0, ABEF_SAVE
-	movdqa		STATE1, CDGH_SAVE
-
-	/* Rounds 0-3 */
-	movdqu		0*16(DATA_PTR), MSG
-	pshufb		SHUF_MASK, MSG
-	movdqa		MSG, MSGTMP0
-		paddd		0*16(SHA256CONSTANTS), MSG
-		sha256rnds2	STATE0, STATE1
-		pshufd 		$0x0E, MSG, MSG
-		sha256rnds2	STATE1, STATE0
-
-	/* Rounds 4-7 */
-	movdqu		1*16(DATA_PTR), MSG
-	pshufb		SHUF_MASK, MSG
-	movdqa		MSG, MSGTMP1
-		paddd		1*16(SHA256CONSTANTS), MSG
-		sha256rnds2	STATE0, STATE1
-		pshufd 		$0x0E, MSG, MSG
-		sha256rnds2	STATE1, STATE0
-	sha256msg1	MSGTMP1, MSGTMP0
-
-	/* Rounds 8-11 */
-	movdqu		2*16(DATA_PTR), MSG
-	pshufb		SHUF_MASK, MSG
-	movdqa		MSG, MSGTMP2
-		paddd		2*16(SHA256CONSTANTS), MSG
-		sha256rnds2	STATE0, STATE1
-		pshufd 		$0x0E, MSG, MSG
-		sha256rnds2	STATE1, STATE0
-	sha256msg1	MSGTMP2, MSGTMP1
-
-	/* Rounds 12-15 */
-	movdqu		3*16(DATA_PTR), MSG
-	pshufb		SHUF_MASK, MSG
-	movdqa		MSG, MSGTMP3
-		paddd		3*16(SHA256CONSTANTS), MSG
-		sha256rnds2	STATE0, STATE1
-	movdqa		MSGTMP3, MSGTMP4
-	palignr		$4, MSGTMP2, MSGTMP4
-	paddd		MSGTMP4, MSGTMP0
-	sha256msg2	MSGTMP3, MSGTMP0
-		pshufd 		$0x0E, MSG, MSG
-		sha256rnds2	STATE1, STATE0
-	sha256msg1	MSGTMP3, MSGTMP2
-
-	/* Rounds 16-19 */
-	movdqa		MSGTMP0, MSG
-		paddd		4*16(SHA256CONSTANTS), MSG
-		sha256rnds2	STATE0, STATE1
-	movdqa		MSGTMP0, MSGTMP4
-	palignr		$4, MSGTMP3, MSGTMP4
-	paddd		MSGTMP4, MSGTMP1
-	sha256msg2	MSGTMP0, MSGTMP1
-		pshufd 		$0x0E, MSG, MSG
-		sha256rnds2	STATE1, STATE0
-	sha256msg1	MSGTMP0, MSGTMP3
-
-	/* Rounds 20-23 */
-	movdqa		MSGTMP1, MSG
-		paddd		5*16(SHA256CONSTANTS), MSG
-		sha256rnds2	STATE0, STATE1
-	movdqa		MSGTMP1, MSGTMP4
-	palignr		$4, MSGTMP0, MSGTMP4
-	paddd		MSGTMP4, MSGTMP2
-	sha256msg2	MSGTMP1, MSGTMP2
-		pshufd 		$0x0E, MSG, MSG
-		sha256rnds2	STATE1, STATE0
-	sha256msg1	MSGTMP1, MSGTMP0
-
-	/* Rounds 24-27 */
-	movdqa		MSGTMP2, MSG
-		paddd		6*16(SHA256CONSTANTS), MSG
-		sha256rnds2	STATE0, STATE1
-	movdqa		MSGTMP2, MSGTMP4
-	palignr		$4, MSGTMP1, MSGTMP4
-	paddd		MSGTMP4, MSGTMP3
-	sha256msg2	MSGTMP2, MSGTMP3
-		pshufd 		$0x0E, MSG, MSG
-		sha256rnds2	STATE1, STATE0
-	sha256msg1	MSGTMP2, MSGTMP1
-
-	/* Rounds 28-31 */
-	movdqa		MSGTMP3, MSG
-		paddd		7*16(SHA256CONSTANTS), MSG
-		sha256rnds2	STATE0, STATE1
-	movdqa		MSGTMP3, MSGTMP4
-	palignr		$4, MSGTMP2, MSGTMP4
-	paddd		MSGTMP4, MSGTMP0
-	sha256msg2	MSGTMP3, MSGTMP0
-		pshufd 		$0x0E, MSG, MSG
-		sha256rnds2	STATE1, STATE0
-	sha256msg1	MSGTMP3, MSGTMP2
-
-	/* Rounds 32-35 */
-	movdqa		MSGTMP0, MSG
-		paddd		8*16(SHA256CONSTANTS), MSG
-		sha256rnds2	STATE0, STATE1
-	movdqa		MSGTMP0, MSGTMP4
-	palignr		$4, MSGTMP3, MSGTMP4
-	paddd		MSGTMP4, MSGTMP1
-	sha256msg2	MSGTMP0, MSGTMP1
-		pshufd 		$0x0E, MSG, MSG
-		sha256rnds2	STATE1, STATE0
-	sha256msg1	MSGTMP0, MSGTMP3
-
-	/* Rounds 36-39 */
-	movdqa		MSGTMP1, MSG
-		paddd		9*16(SHA256CONSTANTS), MSG
-		sha256rnds2	STATE0, STATE1
-	movdqa		MSGTMP1, MSGTMP4
-	palignr		$4, MSGTMP0, MSGTMP4
-	paddd		MSGTMP4, MSGTMP2
-	sha256msg2	MSGTMP1, MSGTMP2
-		pshufd 		$0x0E, MSG, MSG
-		sha256rnds2	STATE1, STATE0
-	sha256msg1	MSGTMP1, MSGTMP0
-
-	/* Rounds 40-43 */
-	movdqa		MSGTMP2, MSG
-		paddd		10*16(SHA256CONSTANTS), MSG
-		sha256rnds2	STATE0, STATE1
-	movdqa		MSGTMP2, MSGTMP4
-	palignr		$4, MSGTMP1, MSGTMP4
-	paddd		MSGTMP4, MSGTMP3
-	sha256msg2	MSGTMP2, MSGTMP3
-		pshufd 		$0x0E, MSG, MSG
-		sha256rnds2	STATE1, STATE0
-	sha256msg1	MSGTMP2, MSGTMP1
-
-	/* Rounds 44-47 */
-	movdqa		MSGTMP3, MSG
-		paddd		11*16(SHA256CONSTANTS), MSG
-		sha256rnds2	STATE0, STATE1
-	movdqa		MSGTMP3, MSGTMP4
-	palignr		$4, MSGTMP2, MSGTMP4
-	paddd		MSGTMP4, MSGTMP0
-	sha256msg2	MSGTMP3, MSGTMP0
-		pshufd 		$0x0E, MSG, MSG
-		sha256rnds2	STATE1, STATE0
-	sha256msg1	MSGTMP3, MSGTMP2
-
-	/* Rounds 48-51 */
-	movdqa		MSGTMP0, MSG
-		paddd		12*16(SHA256CONSTANTS), MSG
-		sha256rnds2	STATE0, STATE1
-	movdqa		MSGTMP0, MSGTMP4
-	palignr		$4, MSGTMP3, MSGTMP4
-	paddd		MSGTMP4, MSGTMP1
-	sha256msg2	MSGTMP0, MSGTMP1
-		pshufd 		$0x0E, MSG, MSG
-		sha256rnds2	STATE1, STATE0
-	sha256msg1	MSGTMP0, MSGTMP3
-
-	/* Rounds 52-55 */
-	movdqa		MSGTMP1, MSG
-		paddd		13*16(SHA256CONSTANTS), MSG
-		sha256rnds2	STATE0, STATE1
-	movdqa		MSGTMP1, MSGTMP4
-	palignr		$4, MSGTMP0, MSGTMP4
-	paddd		MSGTMP4, MSGTMP2
-	sha256msg2	MSGTMP1, MSGTMP2
-		pshufd 		$0x0E, MSG, MSG
-		sha256rnds2	STATE1, STATE0
-
-	/* Rounds 56-59 */
-	movdqa		MSGTMP2, MSG
-		paddd		14*16(SHA256CONSTANTS), MSG
-		sha256rnds2	STATE0, STATE1
-	movdqa		MSGTMP2, MSGTMP4
-	palignr		$4, MSGTMP1, MSGTMP4
-	paddd		MSGTMP4, MSGTMP3
-	sha256msg2	MSGTMP2, MSGTMP3
-		pshufd 		$0x0E, MSG, MSG
-		sha256rnds2	STATE1, STATE0
-
-	/* Rounds 60-63 */
-	movdqa		MSGTMP3, MSG
-		paddd		15*16(SHA256CONSTANTS), MSG
-		sha256rnds2	STATE0, STATE1
-		pshufd 		$0x0E, MSG, MSG
-		sha256rnds2	STATE1, STATE0
-
-	/* Add current hash values with previously saved */
-	paddd		ABEF_SAVE, STATE0
-	paddd		CDGH_SAVE, STATE1
-
-	/* Increment data pointer and loop if more to process */
-	add		$64, DATA_PTR
-	cmp		NUM_BLKS, DATA_PTR
-	jne		.Lloop0
-
-	/* Write hash values back in the correct order */
-	pshufd		$0x1B, STATE0,  STATE0		/* FEBA */
-	pshufd		$0xB1, STATE1,  STATE1		/* DCHG */
-	movdqa		STATE0, MSGTMP4
-	pblendw		$0xF0, STATE1,  STATE0		/* DCBA */
-	palignr		$8, MSGTMP4, STATE1		/* HGFE */
-
-	movdqu		STATE0, 0*16(DIGEST_PTR)
-	movdqu		STATE1, 1*16(DIGEST_PTR)
-
-.Ldone_hash:
-
-	ret
-ENDPROC(sha256_ni_transform)
-
-.section	.rodata.cst256.K256, "aM", @progbits, 256
-.align 64
-K256:
-	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
-	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
-	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
-	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
-	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
-	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
-	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
-	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
-	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
-	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
-	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
-	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
-	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
-	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
-	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
-	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-
-.section	.rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
-.align 16
-PSHUFFLE_BYTE_FLIP_MASK:
-	.octa 0x0c0d0e0f08090a0b0405060700010203
diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S
deleted file mode 100644
index 39235fefe6f7c0c8e15c39c5828d9ff93e2609d6..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/sha512-avx-asm.S
+++ /dev/null
@@ -1,426 +0,0 @@
-########################################################################
-# Implement fast SHA-512 with AVX instructions. (x86_64)
-#
-# Copyright (C) 2013 Intel Corporation.
-#
-# Authors:
-#     James Guilford <james.guilford@intel.com>
-#     Kirk Yap <kirk.s.yap@intel.com>
-#     David Cote <david.m.cote@intel.com>
-#     Tim Chen <tim.c.chen@linux.intel.com>
-#
-# This software is available to you under a choice of one of two
-# licenses.  You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-#     Redistribution and use in source and binary forms, with or
-#     without modification, are permitted provided that the following
-#     conditions are met:
-#
-#      - Redistributions of source code must retain the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer.
-#
-#      - Redistributions in binary form must reproduce the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer in the documentation and/or other materials
-#        provided with the distribution.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-#
-########################################################################
-#
-# This code is described in an Intel White-Paper:
-# "Fast SHA-512 Implementations on Intel Architecture Processors"
-#
-# To find it, surf to http://www.intel.com/p/en_US/embedded
-# and search for that title.
-#
-########################################################################
-
-#ifdef CONFIG_AS_AVX
-#include <linux/linkage.h>
-
-.text
-
-# Virtual Registers
-# ARG1
-digest	= %rdi
-# ARG2
-msg	= %rsi
-# ARG3
-msglen	= %rdx
-T1	= %rcx
-T2	= %r8
-a_64	= %r9
-b_64	= %r10
-c_64	= %r11
-d_64	= %r12
-e_64	= %r13
-f_64	= %r14
-g_64	= %r15
-h_64	= %rbx
-tmp0	= %rax
-
-# Local variables (stack frame)
-
-# Message Schedule
-W_SIZE = 80*8
-# W[t] + K[t] | W[t+1] + K[t+1]
-WK_SIZE = 2*8
-RSPSAVE_SIZE = 1*8
-GPRSAVE_SIZE = 5*8
-
-frame_W = 0
-frame_WK = frame_W + W_SIZE
-frame_RSPSAVE = frame_WK + WK_SIZE
-frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
-frame_size = frame_GPRSAVE + GPRSAVE_SIZE
-
-# Useful QWORD "arrays" for simpler memory references
-# MSG, DIGEST, K_t, W_t are arrays
-# WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even
-
-# Input message (arg1)
-#define MSG(i)    8*i(msg)
-
-# Output Digest (arg2)
-#define DIGEST(i) 8*i(digest)
-
-# SHA Constants (static mem)
-#define K_t(i)    8*i+K512(%rip)
-
-# Message Schedule (stack frame)
-#define W_t(i)    8*i+frame_W(%rsp)
-
-# W[t]+K[t] (stack frame)
-#define WK_2(i)   8*((i%2))+frame_WK(%rsp)
-
-.macro RotateState
-	# Rotate symbols a..h right
-	TMP   = h_64
-	h_64  = g_64
-	g_64  = f_64
-	f_64  = e_64
-	e_64  = d_64
-	d_64  = c_64
-	c_64  = b_64
-	b_64  = a_64
-	a_64  = TMP
-.endm
-
-.macro RORQ p1 p2
-	# shld is faster than ror on Sandybridge
-	shld	$(64-\p2), \p1, \p1
-.endm
-
-.macro SHA512_Round rnd
-	# Compute Round %%t
-	mov     f_64, T1          # T1 = f
-	mov     e_64, tmp0        # tmp = e
-	xor     g_64, T1          # T1 = f ^ g
-	RORQ    tmp0, 23   # 41    # tmp = e ror 23
-	and     e_64, T1          # T1 = (f ^ g) & e
-	xor     e_64, tmp0        # tmp = (e ror 23) ^ e
-	xor     g_64, T1          # T1 = ((f ^ g) & e) ^ g = CH(e,f,g)
-	idx = \rnd
-	add     WK_2(idx), T1     # W[t] + K[t] from message scheduler
-	RORQ    tmp0, 4   # 18    # tmp = ((e ror 23) ^ e) ror 4
-	xor     e_64, tmp0        # tmp = (((e ror 23) ^ e) ror 4) ^ e
-	mov     a_64, T2          # T2 = a
-	add     h_64, T1          # T1 = CH(e,f,g) + W[t] + K[t] + h
-	RORQ    tmp0, 14  # 14    # tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e)
-	add     tmp0, T1          # T1 = CH(e,f,g) + W[t] + K[t] + S1(e)
-	mov     a_64, tmp0        # tmp = a
-	xor     c_64, T2          # T2 = a ^ c
-	and     c_64, tmp0        # tmp = a & c
-	and     b_64, T2          # T2 = (a ^ c) & b
-	xor     tmp0, T2          # T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c)
-	mov     a_64, tmp0        # tmp = a
-	RORQ    tmp0, 5  # 39     # tmp = a ror 5
-	xor     a_64, tmp0        # tmp = (a ror 5) ^ a
-	add     T1, d_64          # e(next_state) = d + T1
-	RORQ    tmp0, 6  # 34     # tmp = ((a ror 5) ^ a) ror 6
-	xor     a_64, tmp0        # tmp = (((a ror 5) ^ a) ror 6) ^ a
-	lea     (T1, T2), h_64    # a(next_state) = T1 + Maj(a,b,c)
-	RORQ    tmp0, 28  # 28    # tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a)
-	add     tmp0, h_64        # a(next_state) = T1 + Maj(a,b,c) S0(a)
-	RotateState
-.endm
-
-.macro SHA512_2Sched_2Round_avx rnd
-	# Compute rounds t-2 and t-1
-	# Compute message schedule QWORDS t and t+1
-
-	#   Two rounds are computed based on the values for K[t-2]+W[t-2] and
-	# K[t-1]+W[t-1] which were previously stored at WK_2 by the message
-	# scheduler.
-	#   The two new schedule QWORDS are stored at [W_t(t)] and [W_t(t+1)].
-	# They are then added to their respective SHA512 constants at
-	# [K_t(t)] and [K_t(t+1)] and stored at dqword [WK_2(t)]
-	#   For brievity, the comments following vectored instructions only refer to
-	# the first of a pair of QWORDS.
-	# Eg. XMM4=W[t-2] really means XMM4={W[t-2]|W[t-1]}
-	#   The computation of the message schedule and the rounds are tightly
-	# stitched to take advantage of instruction-level parallelism.
-
-	idx = \rnd - 2
-	vmovdqa	W_t(idx), %xmm4		# XMM4 = W[t-2]
-	idx = \rnd - 15
-	vmovdqu	W_t(idx), %xmm5		# XMM5 = W[t-15]
-	mov	f_64, T1
-	vpsrlq	$61, %xmm4, %xmm0	# XMM0 = W[t-2]>>61
-	mov	e_64, tmp0
-	vpsrlq	$1, %xmm5, %xmm6	# XMM6 = W[t-15]>>1
-	xor	g_64, T1
-	RORQ	tmp0, 23 # 41
-	vpsrlq	$19, %xmm4, %xmm1	# XMM1 = W[t-2]>>19
-	and	e_64, T1
-	xor	e_64, tmp0
-	vpxor	%xmm1, %xmm0, %xmm0	# XMM0 = W[t-2]>>61 ^ W[t-2]>>19
-	xor	g_64, T1
-	idx = \rnd
-	add	WK_2(idx), T1#
-	vpsrlq	$8, %xmm5, %xmm7	# XMM7 = W[t-15]>>8
-	RORQ	tmp0, 4 # 18
-	vpsrlq	$6, %xmm4, %xmm2	# XMM2 = W[t-2]>>6
-	xor	e_64, tmp0
-	mov	a_64, T2
-	add	h_64, T1
-	vpxor	%xmm7, %xmm6, %xmm6	# XMM6 = W[t-15]>>1 ^ W[t-15]>>8
-	RORQ	tmp0, 14 # 14
-	add	tmp0, T1
-	vpsrlq	$7, %xmm5, %xmm8	# XMM8 = W[t-15]>>7
-	mov	a_64, tmp0
-	xor	c_64, T2
-	vpsllq	$(64-61), %xmm4, %xmm3  # XMM3 = W[t-2]<<3
-	and	c_64, tmp0
-	and	b_64, T2
-	vpxor	%xmm3, %xmm2, %xmm2	# XMM2 = W[t-2]>>6 ^ W[t-2]<<3
-	xor	tmp0, T2
-	mov	a_64, tmp0
-	vpsllq	$(64-1), %xmm5, %xmm9	# XMM9 = W[t-15]<<63
-	RORQ	tmp0, 5 # 39
-	vpxor	%xmm9, %xmm8, %xmm8	# XMM8 = W[t-15]>>7 ^ W[t-15]<<63
-	xor	a_64, tmp0
-	add	T1, d_64
-	RORQ	tmp0, 6 # 34
-	xor	a_64, tmp0
-	vpxor	%xmm8, %xmm6, %xmm6	# XMM6 = W[t-15]>>1 ^ W[t-15]>>8 ^
-					#  W[t-15]>>7 ^ W[t-15]<<63
-	lea	(T1, T2), h_64
-	RORQ	tmp0, 28 # 28
-	vpsllq	$(64-19), %xmm4, %xmm4  # XMM4 = W[t-2]<<25
-	add	tmp0, h_64
-	RotateState
-	vpxor	%xmm4, %xmm0, %xmm0     # XMM0 = W[t-2]>>61 ^ W[t-2]>>19 ^
-					#        W[t-2]<<25
-	mov	f_64, T1
-	vpxor	%xmm2, %xmm0, %xmm0     # XMM0 = s1(W[t-2])
-	mov	e_64, tmp0
-	xor	g_64, T1
-	idx = \rnd - 16
-	vpaddq	W_t(idx), %xmm0, %xmm0  # XMM0 = s1(W[t-2]) + W[t-16]
-	idx = \rnd - 7
-	vmovdqu	W_t(idx), %xmm1		# XMM1 = W[t-7]
-	RORQ	tmp0, 23 # 41
-	and	e_64, T1
-	xor	e_64, tmp0
-	xor	g_64, T1
-	vpsllq	$(64-8), %xmm5, %xmm5   # XMM5 = W[t-15]<<56
-	idx = \rnd + 1
-	add	WK_2(idx), T1
-	vpxor	%xmm5, %xmm6, %xmm6     # XMM6 = s0(W[t-15])
-	RORQ	tmp0, 4 # 18
-	vpaddq	%xmm6, %xmm0, %xmm0     # XMM0 = s1(W[t-2]) + W[t-16] + s0(W[t-15])
-	xor	e_64, tmp0
-	vpaddq	%xmm1, %xmm0, %xmm0     # XMM0 = W[t] = s1(W[t-2]) + W[t-7] +
-					#               s0(W[t-15]) + W[t-16]
-	mov	a_64, T2
-	add	h_64, T1
-	RORQ	tmp0, 14 # 14
-	add	tmp0, T1
-	idx = \rnd
-	vmovdqa	%xmm0, W_t(idx)		# Store W[t]
-	vpaddq	K_t(idx), %xmm0, %xmm0  # Compute W[t]+K[t]
-	vmovdqa	%xmm0, WK_2(idx)	# Store W[t]+K[t] for next rounds
-	mov	a_64, tmp0
-	xor	c_64, T2
-	and	c_64, tmp0
-	and	b_64, T2
-	xor	tmp0, T2
-	mov	a_64, tmp0
-	RORQ	tmp0, 5 # 39
-	xor	a_64, tmp0
-	add	T1, d_64
-	RORQ	tmp0, 6 # 34
-	xor	a_64, tmp0
-	lea	(T1, T2), h_64
-	RORQ	tmp0, 28 # 28
-	add	tmp0, h_64
-	RotateState
-.endm
-
-########################################################################
-# void sha512_transform_avx(void* D, const void* M, u64 L)
-# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
-# The size of the message pointed to by M must be an integer multiple of SHA512
-# message blocks.
-# L is the message length in SHA512 blocks
-########################################################################
-ENTRY(sha512_transform_avx)
-	cmp $0, msglen
-	je nowork
-
-	# Allocate Stack Space
-	mov	%rsp, %rax
-	sub     $frame_size, %rsp
-	and	$~(0x20 - 1), %rsp
-	mov	%rax, frame_RSPSAVE(%rsp)
-
-	# Save GPRs
-	mov     %rbx, frame_GPRSAVE(%rsp)
-	mov     %r12, frame_GPRSAVE +8*1(%rsp)
-	mov     %r13, frame_GPRSAVE +8*2(%rsp)
-	mov     %r14, frame_GPRSAVE +8*3(%rsp)
-	mov     %r15, frame_GPRSAVE +8*4(%rsp)
-
-updateblock:
-
-	# Load state variables
-	mov     DIGEST(0), a_64
-	mov     DIGEST(1), b_64
-	mov     DIGEST(2), c_64
-	mov     DIGEST(3), d_64
-	mov     DIGEST(4), e_64
-	mov     DIGEST(5), f_64
-	mov     DIGEST(6), g_64
-	mov     DIGEST(7), h_64
-
-	t = 0
-	.rept 80/2 + 1
-	# (80 rounds) / (2 rounds/iteration) + (1 iteration)
-	# +1 iteration because the scheduler leads hashing by 1 iteration
-		.if t < 2
-			# BSWAP 2 QWORDS
-			vmovdqa  XMM_QWORD_BSWAP(%rip), %xmm1
-			vmovdqu  MSG(t), %xmm0
-			vpshufb  %xmm1, %xmm0, %xmm0    # BSWAP
-			vmovdqa  %xmm0, W_t(t) # Store Scheduled Pair
-			vpaddq   K_t(t), %xmm0, %xmm0 # Compute W[t]+K[t]
-			vmovdqa  %xmm0, WK_2(t) # Store into WK for rounds
-		.elseif t < 16
-			# BSWAP 2 QWORDS# Compute 2 Rounds
-			vmovdqu  MSG(t), %xmm0
-			vpshufb  %xmm1, %xmm0, %xmm0    # BSWAP
-			SHA512_Round t-2    # Round t-2
-			vmovdqa  %xmm0, W_t(t) # Store Scheduled Pair
-			vpaddq   K_t(t), %xmm0, %xmm0 # Compute W[t]+K[t]
-			SHA512_Round t-1    # Round t-1
-			vmovdqa  %xmm0, WK_2(t)# Store W[t]+K[t] into WK
-		.elseif t < 79
-			# Schedule 2 QWORDS# Compute 2 Rounds
-			SHA512_2Sched_2Round_avx t
-		.else
-			# Compute 2 Rounds
-			SHA512_Round t-2
-			SHA512_Round t-1
-		.endif
-		t = t+2
-	.endr
-
-	# Update digest
-	add     a_64, DIGEST(0)
-	add     b_64, DIGEST(1)
-	add     c_64, DIGEST(2)
-	add     d_64, DIGEST(3)
-	add     e_64, DIGEST(4)
-	add     f_64, DIGEST(5)
-	add     g_64, DIGEST(6)
-	add     h_64, DIGEST(7)
-
-	# Advance to next message block
-	add     $16*8, msg
-	dec     msglen
-	jnz     updateblock
-
-	# Restore GPRs
-	mov     frame_GPRSAVE(%rsp),      %rbx
-	mov     frame_GPRSAVE +8*1(%rsp), %r12
-	mov     frame_GPRSAVE +8*2(%rsp), %r13
-	mov     frame_GPRSAVE +8*3(%rsp), %r14
-	mov     frame_GPRSAVE +8*4(%rsp), %r15
-
-	# Restore Stack Pointer
-	mov	frame_RSPSAVE(%rsp), %rsp
-
-nowork:
-	ret
-ENDPROC(sha512_transform_avx)
-
-########################################################################
-### Binary Data
-
-.section	.rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16
-.align 16
-# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
-XMM_QWORD_BSWAP:
-	.octa 0x08090a0b0c0d0e0f0001020304050607
-
-# Mergeable 640-byte rodata section. This allows linker to merge the table
-# with other, exactly the same 640-byte fragment of another rodata section
-# (if such section exists).
-.section	.rodata.cst640.K512, "aM", @progbits, 640
-.align 64
-# K[t] used in SHA512 hashing
-K512:
-	.quad 0x428a2f98d728ae22,0x7137449123ef65cd
-	.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
-	.quad 0x3956c25bf348b538,0x59f111f1b605d019
-	.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
-	.quad 0xd807aa98a3030242,0x12835b0145706fbe
-	.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
-	.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
-	.quad 0x9bdc06a725c71235,0xc19bf174cf692694
-	.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
-	.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
-	.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
-	.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
-	.quad 0x983e5152ee66dfab,0xa831c66d2db43210
-	.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
-	.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
-	.quad 0x06ca6351e003826f,0x142929670a0e6e70
-	.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
-	.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
-	.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
-	.quad 0x81c2c92e47edaee6,0x92722c851482353b
-	.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
-	.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
-	.quad 0xd192e819d6ef5218,0xd69906245565a910
-	.quad 0xf40e35855771202a,0x106aa07032bbd1b8
-	.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
-	.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
-	.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
-	.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
-	.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
-	.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
-	.quad 0x90befffa23631e28,0xa4506cebde82bde9
-	.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
-	.quad 0xca273eceea26619c,0xd186b8c721c0c207
-	.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
-	.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
-	.quad 0x113f9804bef90dae,0x1b710b35131c471b
-	.quad 0x28db77f523047d84,0x32caab7b40c72493
-	.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
-	.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
-	.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
-#endif
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
deleted file mode 100644
index b16d560051629a1c6d518321b93be2672b861c9e..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ /dev/null
@@ -1,752 +0,0 @@
-########################################################################
-# Implement fast SHA-512 with AVX2 instructions. (x86_64)
-#
-# Copyright (C) 2013 Intel Corporation.
-#
-# Authors:
-#     James Guilford <james.guilford@intel.com>
-#     Kirk Yap <kirk.s.yap@intel.com>
-#     David Cote <david.m.cote@intel.com>
-#     Tim Chen <tim.c.chen@linux.intel.com>
-#
-# This software is available to you under a choice of one of two
-# licenses.  You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-#     Redistribution and use in source and binary forms, with or
-#     without modification, are permitted provided that the following
-#     conditions are met:
-#
-#      - Redistributions of source code must retain the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer.
-#
-#      - Redistributions in binary form must reproduce the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer in the documentation and/or other materials
-#        provided with the distribution.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-#
-########################################################################
-#
-# This code is described in an Intel White-Paper:
-# "Fast SHA-512 Implementations on Intel Architecture Processors"
-#
-# To find it, surf to http://www.intel.com/p/en_US/embedded
-# and search for that title.
-#
-########################################################################
-# This code schedules 1 blocks at a time, with 4 lanes per block
-########################################################################
-
-#ifdef CONFIG_AS_AVX2
-#include <linux/linkage.h>
-
-.text
-
-# Virtual Registers
-Y_0 = %ymm4
-Y_1 = %ymm5
-Y_2 = %ymm6
-Y_3 = %ymm7
-
-YTMP0 = %ymm0
-YTMP1 = %ymm1
-YTMP2 = %ymm2
-YTMP3 = %ymm3
-YTMP4 = %ymm8
-XFER  = YTMP0
-
-BYTE_FLIP_MASK  = %ymm9
-
-# 1st arg is %rdi, which is saved to the stack and accessed later via %r12
-CTX1        = %rdi
-CTX2        = %r12
-# 2nd arg
-INP         = %rsi
-# 3rd arg
-NUM_BLKS    = %rdx
-
-c           = %rcx
-d           = %r8
-e           = %rdx
-y3          = %rsi
-
-TBL   = %rdi # clobbers CTX1
-
-a     = %rax
-b     = %rbx
-
-f     = %r9
-g     = %r10
-h     = %r11
-old_h = %r11
-
-T1    = %r12 # clobbers CTX2
-y0    = %r13
-y1    = %r14
-y2    = %r15
-
-# Local variables (stack frame)
-XFER_SIZE = 4*8
-SRND_SIZE = 1*8
-INP_SIZE = 1*8
-INPEND_SIZE = 1*8
-CTX_SIZE = 1*8
-RSPSAVE_SIZE = 1*8
-GPRSAVE_SIZE = 5*8
-
-frame_XFER = 0
-frame_SRND = frame_XFER + XFER_SIZE
-frame_INP = frame_SRND + SRND_SIZE
-frame_INPEND = frame_INP + INP_SIZE
-frame_CTX = frame_INPEND + INPEND_SIZE
-frame_RSPSAVE = frame_CTX + CTX_SIZE
-frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
-frame_size = frame_GPRSAVE + GPRSAVE_SIZE
-
-## assume buffers not aligned
-#define	VMOVDQ vmovdqu
-
-# addm [mem], reg
-# Add reg to mem using reg-mem add and store
-.macro addm p1 p2
-	add	\p1, \p2
-	mov	\p2, \p1
-.endm
-
-
-# COPY_YMM_AND_BSWAP ymm, [mem], byte_flip_mask
-# Load ymm with mem and byte swap each dword
-.macro COPY_YMM_AND_BSWAP p1 p2 p3
-	VMOVDQ \p2, \p1
-	vpshufb \p3, \p1, \p1
-.endm
-# rotate_Ys
-# Rotate values of symbols Y0...Y3
-.macro rotate_Ys
-	Y_ = Y_0
-	Y_0 = Y_1
-	Y_1 = Y_2
-	Y_2 = Y_3
-	Y_3 = Y_
-.endm
-
-# RotateState
-.macro RotateState
-	# Rotate symbols a..h right
-	old_h  = h
-	TMP_   = h
-	h      = g
-	g      = f
-	f      = e
-	e      = d
-	d      = c
-	c      = b
-	b      = a
-	a      = TMP_
-.endm
-
-# macro MY_VPALIGNR	YDST, YSRC1, YSRC2, RVAL
-# YDST = {YSRC1, YSRC2} >> RVAL*8
-.macro MY_VPALIGNR YDST YSRC1 YSRC2 RVAL
-	vperm2f128      $0x3, \YSRC2, \YSRC1, \YDST     # YDST = {YS1_LO, YS2_HI}
-	vpalignr        $\RVAL, \YSRC2, \YDST, \YDST    # YDST = {YDS1, YS2} >> RVAL*8
-.endm
-
-.macro FOUR_ROUNDS_AND_SCHED
-################################### RND N + 0 #########################################
-
-	# Extract w[t-7]
-	MY_VPALIGNR	YTMP0, Y_3, Y_2, 8		# YTMP0 = W[-7]
-	# Calculate w[t-16] + w[t-7]
-	vpaddq		Y_0, YTMP0, YTMP0		# YTMP0 = W[-7] + W[-16]
-	# Extract w[t-15]
-	MY_VPALIGNR	YTMP1, Y_1, Y_0, 8		# YTMP1 = W[-15]
-
-	# Calculate sigma0
-
-	# Calculate w[t-15] ror 1
-	vpsrlq		$1, YTMP1, YTMP2
-	vpsllq		$(64-1), YTMP1, YTMP3
-	vpor		YTMP2, YTMP3, YTMP3		# YTMP3 = W[-15] ror 1
-	# Calculate w[t-15] shr 7
-	vpsrlq		$7, YTMP1, YTMP4		# YTMP4 = W[-15] >> 7
-
-	mov	a, y3		# y3 = a                                # MAJA
-	rorx	$41, e, y0	# y0 = e >> 41				# S1A
-	rorx	$18, e, y1	# y1 = e >> 18				# S1B
-	add	frame_XFER(%rsp),h		# h = k + w + h         # --
-	or	c, y3		# y3 = a|c                              # MAJA
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$34, a, T1	# T1 = a >> 34				# S0B
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18)		# S1
-	xor	g, y2		# y2 = f^g                              # CH
-	rorx	$14, e, y1	# y1 = (e >> 14)			# S1
-
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18) ^ (e>>14)	# S1
-	rorx	$39, a, y1	# y1 = a >> 39				# S0A
-	add	h, d		# d = k + w + h + d                     # --
-
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34)		# S0
-	rorx	$28, a, T1	# T1 = (a >> 28)			# S0
-
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34) ^ (a>>28)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	c, T1		# T1 = a&c                              # MAJB
-
-	add	y0, y2		# y2 = S1 + CH                          # --
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-
-	add	y2, h		# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	add	y3, h		# h = t1 + S0 + MAJ                     # --
-
-	RotateState
-
-################################### RND N + 1 #########################################
-
-	# Calculate w[t-15] ror 8
-	vpsrlq		$8, YTMP1, YTMP2
-	vpsllq		$(64-8), YTMP1, YTMP1
-	vpor		YTMP2, YTMP1, YTMP1		# YTMP1 = W[-15] ror 8
-	# XOR the three components
-	vpxor		YTMP4, YTMP3, YTMP3		# YTMP3 = W[-15] ror 1 ^ W[-15] >> 7
-	vpxor		YTMP1, YTMP3, YTMP1		# YTMP1 = s0
-
-
-	# Add three components, w[t-16], w[t-7] and sigma0
-	vpaddq		YTMP1, YTMP0, YTMP0		# YTMP0 = W[-16] + W[-7] + s0
-	# Move to appropriate lanes for calculating w[16] and w[17]
-	vperm2f128	$0x0, YTMP0, YTMP0, Y_0		# Y_0 = W[-16] + W[-7] + s0 {BABA}
-	# Move to appropriate lanes for calculating w[18] and w[19]
-	vpand		MASK_YMM_LO(%rip), YTMP0, YTMP0	# YTMP0 = W[-16] + W[-7] + s0 {DC00}
-
-	# Calculate w[16] and w[17] in both 128 bit lanes
-
-	# Calculate sigma1 for w[16] and w[17] on both 128 bit lanes
-	vperm2f128	$0x11, Y_3, Y_3, YTMP2		# YTMP2 = W[-2] {BABA}
-	vpsrlq		$6, YTMP2, YTMP4		# YTMP4 = W[-2] >> 6 {BABA}
-
-
-	mov	a, y3		# y3 = a                                # MAJA
-	rorx	$41, e, y0	# y0 = e >> 41				# S1A
-	rorx	$18, e, y1	# y1 = e >> 18				# S1B
-	add	1*8+frame_XFER(%rsp), h		# h = k + w + h         # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$34, a, T1	# T1 = a >> 34				# S0B
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18)		# S1
-	xor	g, y2		# y2 = f^g                              # CH
-
-
-	rorx	$14, e, y1	# y1 = (e >> 14)			# S1
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18) ^ (e>>14)	# S1
-	rorx	$39, a, y1	# y1 = a >> 39				# S0A
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	add	h, d		# d = k + w + h + d                     # --
-
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34)		# S0
-
-	rorx	$28, a, T1	# T1 = (a >> 28)			# S0
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34) ^ (a>>28)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-	add	y2, h		# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	add	y3, h		# h = t1 + S0 + MAJ                     # --
-
-	RotateState
-
-
-################################### RND N + 2 #########################################
-
-	vpsrlq		$19, YTMP2, YTMP3		# YTMP3 = W[-2] >> 19 {BABA}
-	vpsllq		$(64-19), YTMP2, YTMP1		# YTMP1 = W[-2] << 19 {BABA}
-	vpor		YTMP1, YTMP3, YTMP3		# YTMP3 = W[-2] ror 19 {BABA}
-	vpxor		YTMP3, YTMP4, YTMP4		# YTMP4 = W[-2] ror 19 ^ W[-2] >> 6 {BABA}
-	vpsrlq		$61, YTMP2, YTMP3		# YTMP3 = W[-2] >> 61 {BABA}
-	vpsllq		$(64-61), YTMP2, YTMP1		# YTMP1 = W[-2] << 61 {BABA}
-	vpor		YTMP1, YTMP3, YTMP3		# YTMP3 = W[-2] ror 61 {BABA}
-	vpxor		YTMP3, YTMP4, YTMP4		# YTMP4 = s1 = (W[-2] ror 19) ^
-							#  (W[-2] ror 61) ^ (W[-2] >> 6) {BABA}
-
-	# Add sigma1 to the other compunents to get w[16] and w[17]
-	vpaddq		YTMP4, Y_0, Y_0			# Y_0 = {W[1], W[0], W[1], W[0]}
-
-	# Calculate sigma1 for w[18] and w[19] for upper 128 bit lane
-	vpsrlq		$6, Y_0, YTMP4			# YTMP4 = W[-2] >> 6 {DC--}
-
-	mov	a, y3		# y3 = a                                # MAJA
-	rorx	$41, e, y0	# y0 = e >> 41				# S1A
-	add	2*8+frame_XFER(%rsp), h		# h = k + w + h         # --
-
-	rorx	$18, e, y1	# y1 = e >> 18				# S1B
-	or	c, y3		# y3 = a|c                              # MAJA
-	mov	f, y2		# y2 = f                                # CH
-	xor	g, y2		# y2 = f^g                              # CH
-
-	rorx	$34, a, T1	# T1 = a >> 34				# S0B
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18)		# S1
-	and	e, y2		# y2 = (f^g)&e                          # CH
-
-	rorx	$14, e, y1	# y1 = (e >> 14)			# S1
-	add	h, d		# d = k + w + h + d                     # --
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18) ^ (e>>14)	# S1
-	rorx	$39, a, y1	# y1 = a >> 39				# S0A
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34)		# S0
-	rorx	$28, a, T1	# T1 = (a >> 28)			# S0
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34) ^ (a>>28)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-	add	y2, h		# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-
-	add	y3, h		# h = t1 + S0 + MAJ                     # --
-
-	RotateState
-
-################################### RND N + 3 #########################################
-
-	vpsrlq		$19, Y_0, YTMP3			# YTMP3 = W[-2] >> 19 {DC--}
-	vpsllq		$(64-19), Y_0, YTMP1		# YTMP1 = W[-2] << 19 {DC--}
-	vpor		YTMP1, YTMP3, YTMP3		# YTMP3 = W[-2] ror 19 {DC--}
-	vpxor		YTMP3, YTMP4, YTMP4		# YTMP4 = W[-2] ror 19 ^ W[-2] >> 6 {DC--}
-	vpsrlq		$61, Y_0, YTMP3			# YTMP3 = W[-2] >> 61 {DC--}
-	vpsllq		$(64-61), Y_0, YTMP1		# YTMP1 = W[-2] << 61 {DC--}
-	vpor		YTMP1, YTMP3, YTMP3		# YTMP3 = W[-2] ror 61 {DC--}
-	vpxor		YTMP3, YTMP4, YTMP4		# YTMP4 = s1 = (W[-2] ror 19) ^
-							#  (W[-2] ror 61) ^ (W[-2] >> 6) {DC--}
-
-	# Add the sigma0 + w[t-7] + w[t-16] for w[18] and w[19]
-	# to newly calculated sigma1 to get w[18] and w[19]
-	vpaddq		YTMP4, YTMP0, YTMP2		# YTMP2 = {W[3], W[2], --, --}
-
-	# Form w[19, w[18], w17], w[16]
-	vpblendd		$0xF0, YTMP2, Y_0, Y_0		# Y_0 = {W[3], W[2], W[1], W[0]}
-
-	mov	a, y3		# y3 = a                                # MAJA
-	rorx	$41, e, y0	# y0 = e >> 41				# S1A
-	rorx	$18, e, y1	# y1 = e >> 18				# S1B
-	add	3*8+frame_XFER(%rsp), h		# h = k + w + h         # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$34, a, T1	# T1 = a >> 34				# S0B
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18)		# S1
-	xor	g, y2		# y2 = f^g                              # CH
-
-
-	rorx	$14, e, y1	# y1 = (e >> 14)			# S1
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	add	h, d		# d = k + w + h + d                     # --
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18) ^ (e>>14)	# S1
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-
-	rorx	$39, a, y1	# y1 = a >> 39				# S0A
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34)		# S0
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-
-	rorx	$28, a, T1	# T1 = (a >> 28)			# S0
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34) ^ (a>>28)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	c, T1		# T1 = a&c                              # MAJB
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-
-	add	y1, h		# h = k + w + h + S0                    # --
-	add	y2, h		# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	add	y3, h		# h = t1 + S0 + MAJ                     # --
-
-	RotateState
-
-	rotate_Ys
-.endm
-
-.macro DO_4ROUNDS
-
-################################### RND N + 0 #########################################
-
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$41, e, y0	# y0 = e >> 41				# S1A
-	rorx	$18, e, y1	# y1 = e >> 18				# S1B
-	xor	g, y2		# y2 = f^g                              # CH
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18)		# S1
-	rorx	$14, e, y1	# y1 = (e >> 14)			# S1
-	and	e, y2		# y2 = (f^g)&e                          # CH
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18) ^ (e>>14)	# S1
-	rorx	$34, a, T1	# T1 = a >> 34				# S0B
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-	rorx	$39, a, y1	# y1 = a >> 39				# S0A
-	mov	a, y3		# y3 = a                                # MAJA
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34)		# S0
-	rorx	$28, a, T1	# T1 = (a >> 28)			# S0
-	add	frame_XFER(%rsp), h		# h = k + w + h         # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34) ^ (a>>28)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-	add	h, d		# d = k + w + h + d                     # --
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-
-	RotateState
-
-################################### RND N + 1 #########################################
-
-	add	y2, old_h	# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$41, e, y0	# y0 = e >> 41				# S1A
-	rorx	$18, e, y1	# y1 = e >> 18				# S1B
-	xor	g, y2		# y2 = f^g                              # CH
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18)		# S1
-	rorx	$14, e, y1	# y1 = (e >> 14)			# S1
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	add	y3, old_h	# h = t1 + S0 + MAJ                     # --
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18) ^ (e>>14)	# S1
-	rorx	$34, a, T1	# T1 = a >> 34				# S0B
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-	rorx	$39, a, y1	# y1 = a >> 39				# S0A
-	mov	a, y3		# y3 = a                                # MAJA
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34)		# S0
-	rorx	$28, a, T1	# T1 = (a >> 28)			# S0
-	add	8*1+frame_XFER(%rsp), h		# h = k + w + h         # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34) ^ (a>>28)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-	add	h, d		# d = k + w + h + d                     # --
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-
-	RotateState
-
-################################### RND N + 2 #########################################
-
-	add	y2, old_h	# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$41, e, y0	# y0 = e >> 41				# S1A
-	rorx	$18, e, y1	# y1 = e >> 18				# S1B
-	xor	g, y2		# y2 = f^g                              # CH
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18)		# S1
-	rorx	$14, e, y1	# y1 = (e >> 14)			# S1
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	add	y3, old_h	# h = t1 + S0 + MAJ                     # --
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18) ^ (e>>14)	# S1
-	rorx	$34, a, T1	# T1 = a >> 34				# S0B
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-	rorx	$39, a, y1	# y1 = a >> 39				# S0A
-	mov	a, y3		# y3 = a                                # MAJA
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34)		# S0
-	rorx	$28, a, T1	# T1 = (a >> 28)			# S0
-	add	8*2+frame_XFER(%rsp), h		# h = k + w + h         # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34) ^ (a>>28)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-	add	h, d		# d = k + w + h + d                     # --
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-
-	RotateState
-
-################################### RND N + 3 #########################################
-
-	add	y2, old_h	# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-	mov	f, y2		# y2 = f                                # CH
-	rorx	$41, e, y0	# y0 = e >> 41				# S1A
-	rorx	$18, e, y1	# y1 = e >> 18				# S1B
-	xor	g, y2		# y2 = f^g                              # CH
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18)		# S1
-	rorx	$14, e, y1	# y1 = (e >> 14)			# S1
-	and	e, y2		# y2 = (f^g)&e                          # CH
-	add	y3, old_h	# h = t1 + S0 + MAJ                     # --
-
-	xor	y1, y0		# y0 = (e>>41) ^ (e>>18) ^ (e>>14)	# S1
-	rorx	$34, a, T1	# T1 = a >> 34				# S0B
-	xor	g, y2		# y2 = CH = ((f^g)&e)^g                 # CH
-	rorx	$39, a, y1	# y1 = a >> 39				# S0A
-	mov	a, y3		# y3 = a                                # MAJA
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34)		# S0
-	rorx	$28, a, T1	# T1 = (a >> 28)			# S0
-	add	8*3+frame_XFER(%rsp), h		# h = k + w + h         # --
-	or	c, y3		# y3 = a|c                              # MAJA
-
-	xor	T1, y1		# y1 = (a>>39) ^ (a>>34) ^ (a>>28)	# S0
-	mov	a, T1		# T1 = a                                # MAJB
-	and	b, y3		# y3 = (a|c)&b                          # MAJA
-	and	c, T1		# T1 = a&c                              # MAJB
-	add	y0, y2		# y2 = S1 + CH                          # --
-
-
-	add	h, d		# d = k + w + h + d                     # --
-	or	T1, y3		# y3 = MAJ = (a|c)&b)|(a&c)             # MAJ
-	add	y1, h		# h = k + w + h + S0                    # --
-
-	add	y2, d		# d = k + w + h + d + S1 + CH = d + t1  # --
-
-	add	y2, h		# h = k + w + h + S0 + S1 + CH = t1 + S0# --
-
-	add	y3, h		# h = t1 + S0 + MAJ                     # --
-
-	RotateState
-
-.endm
-
-########################################################################
-# void sha512_transform_rorx(void* D, const void* M, uint64_t L)#
-# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
-# The size of the message pointed to by M must be an integer multiple of SHA512
-#   message blocks.
-# L is the message length in SHA512 blocks
-########################################################################
-ENTRY(sha512_transform_rorx)
-	# Allocate Stack Space
-	mov	%rsp, %rax
-	sub	$frame_size, %rsp
-	and	$~(0x20 - 1), %rsp
-	mov	%rax, frame_RSPSAVE(%rsp)
-
-	# Save GPRs
-	mov	%rbx, 8*0+frame_GPRSAVE(%rsp)
-	mov	%r12, 8*1+frame_GPRSAVE(%rsp)
-	mov	%r13, 8*2+frame_GPRSAVE(%rsp)
-	mov	%r14, 8*3+frame_GPRSAVE(%rsp)
-	mov	%r15, 8*4+frame_GPRSAVE(%rsp)
-
-	shl	$7, NUM_BLKS	# convert to bytes
-	jz	done_hash
-	add	INP, NUM_BLKS	# pointer to end of data
-	mov	NUM_BLKS, frame_INPEND(%rsp)
-
-	## load initial digest
-	mov	8*0(CTX1), a
-	mov	8*1(CTX1), b
-	mov	8*2(CTX1), c
-	mov	8*3(CTX1), d
-	mov	8*4(CTX1), e
-	mov	8*5(CTX1), f
-	mov	8*6(CTX1), g
-	mov	8*7(CTX1), h
-
-	# save %rdi (CTX) before it gets clobbered
-	mov	%rdi, frame_CTX(%rsp)
-
-	vmovdqa	PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
-
-loop0:
-	lea	K512(%rip), TBL
-
-	## byte swap first 16 dwords
-	COPY_YMM_AND_BSWAP	Y_0, (INP), BYTE_FLIP_MASK
-	COPY_YMM_AND_BSWAP	Y_1, 1*32(INP), BYTE_FLIP_MASK
-	COPY_YMM_AND_BSWAP	Y_2, 2*32(INP), BYTE_FLIP_MASK
-	COPY_YMM_AND_BSWAP	Y_3, 3*32(INP), BYTE_FLIP_MASK
-
-	mov	INP, frame_INP(%rsp)
-
-	## schedule 64 input dwords, by doing 12 rounds of 4 each
-	movq	$4, frame_SRND(%rsp)
-
-.align 16
-loop1:
-	vpaddq	(TBL), Y_0, XFER
-	vmovdqa XFER, frame_XFER(%rsp)
-	FOUR_ROUNDS_AND_SCHED
-
-	vpaddq	1*32(TBL), Y_0, XFER
-	vmovdqa XFER, frame_XFER(%rsp)
-	FOUR_ROUNDS_AND_SCHED
-
-	vpaddq	2*32(TBL), Y_0, XFER
-	vmovdqa XFER, frame_XFER(%rsp)
-	FOUR_ROUNDS_AND_SCHED
-
-	vpaddq	3*32(TBL), Y_0, XFER
-	vmovdqa XFER, frame_XFER(%rsp)
-	add	$(4*32), TBL
-	FOUR_ROUNDS_AND_SCHED
-
-	subq	$1, frame_SRND(%rsp)
-	jne	loop1
-
-	movq	$2, frame_SRND(%rsp)
-loop2:
-	vpaddq	(TBL), Y_0, XFER
-	vmovdqa XFER, frame_XFER(%rsp)
-	DO_4ROUNDS
-	vpaddq	1*32(TBL), Y_1, XFER
-	vmovdqa XFER, frame_XFER(%rsp)
-	add	$(2*32), TBL
-	DO_4ROUNDS
-
-	vmovdqa	Y_2, Y_0
-	vmovdqa	Y_3, Y_1
-
-	subq	$1, frame_SRND(%rsp)
-	jne	loop2
-
-	mov	frame_CTX(%rsp), CTX2
-	addm	8*0(CTX2), a
-	addm	8*1(CTX2), b
-	addm	8*2(CTX2), c
-	addm	8*3(CTX2), d
-	addm	8*4(CTX2), e
-	addm	8*5(CTX2), f
-	addm	8*6(CTX2), g
-	addm	8*7(CTX2), h
-
-	mov	frame_INP(%rsp), INP
-	add	$128, INP
-	cmp	frame_INPEND(%rsp), INP
-	jne	loop0
-
-done_hash:
-
-# Restore GPRs
-	mov	8*0+frame_GPRSAVE(%rsp), %rbx
-	mov	8*1+frame_GPRSAVE(%rsp), %r12
-	mov	8*2+frame_GPRSAVE(%rsp), %r13
-	mov	8*3+frame_GPRSAVE(%rsp), %r14
-	mov	8*4+frame_GPRSAVE(%rsp), %r15
-
-	# Restore Stack Pointer
-	mov	frame_RSPSAVE(%rsp), %rsp
-	ret
-ENDPROC(sha512_transform_rorx)
-
-########################################################################
-### Binary Data
-
-
-# Mergeable 640-byte rodata section. This allows linker to merge the table
-# with other, exactly the same 640-byte fragment of another rodata section
-# (if such section exists).
-.section	.rodata.cst640.K512, "aM", @progbits, 640
-.align 64
-# K[t] used in SHA512 hashing
-K512:
-	.quad	0x428a2f98d728ae22,0x7137449123ef65cd
-	.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
-	.quad	0x3956c25bf348b538,0x59f111f1b605d019
-	.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
-	.quad	0xd807aa98a3030242,0x12835b0145706fbe
-	.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
-	.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
-	.quad	0x9bdc06a725c71235,0xc19bf174cf692694
-	.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
-	.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
-	.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
-	.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
-	.quad	0x983e5152ee66dfab,0xa831c66d2db43210
-	.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
-	.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
-	.quad	0x06ca6351e003826f,0x142929670a0e6e70
-	.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
-	.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
-	.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
-	.quad	0x81c2c92e47edaee6,0x92722c851482353b
-	.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
-	.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
-	.quad	0xd192e819d6ef5218,0xd69906245565a910
-	.quad	0xf40e35855771202a,0x106aa07032bbd1b8
-	.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
-	.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
-	.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
-	.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
-	.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
-	.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
-	.quad	0x90befffa23631e28,0xa4506cebde82bde9
-	.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
-	.quad	0xca273eceea26619c,0xd186b8c721c0c207
-	.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
-	.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
-	.quad	0x113f9804bef90dae,0x1b710b35131c471b
-	.quad	0x28db77f523047d84,0x32caab7b40c72493
-	.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
-	.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
-	.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
-
-.section	.rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
-.align 32
-# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
-PSHUFFLE_BYTE_FLIP_MASK:
-	.octa 0x08090a0b0c0d0e0f0001020304050607
-	.octa 0x18191a1b1c1d1e1f1011121314151617
-
-.section	.rodata.cst32.MASK_YMM_LO, "aM", @progbits, 32
-.align 32
-MASK_YMM_LO:
-	.octa 0x00000000000000000000000000000000
-	.octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
-
-#endif
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S
deleted file mode 100644
index 66bbd9058a90d67f51a44340e66ddfcd3801ec60..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/sha512-ssse3-asm.S
+++ /dev/null
@@ -1,424 +0,0 @@
-########################################################################
-# Implement fast SHA-512 with SSSE3 instructions. (x86_64)
-#
-# Copyright (C) 2013 Intel Corporation.
-#
-# Authors:
-#     James Guilford <james.guilford@intel.com>
-#     Kirk Yap <kirk.s.yap@intel.com>
-#     David Cote <david.m.cote@intel.com>
-#     Tim Chen <tim.c.chen@linux.intel.com>
-#
-# This software is available to you under a choice of one of two
-# licenses.  You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-#     Redistribution and use in source and binary forms, with or
-#     without modification, are permitted provided that the following
-#     conditions are met:
-#
-#      - Redistributions of source code must retain the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer.
-#
-#      - Redistributions in binary form must reproduce the above
-#        copyright notice, this list of conditions and the following
-#        disclaimer in the documentation and/or other materials
-#        provided with the distribution.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-#
-########################################################################
-#
-# This code is described in an Intel White-Paper:
-# "Fast SHA-512 Implementations on Intel Architecture Processors"
-#
-# To find it, surf to http://www.intel.com/p/en_US/embedded
-# and search for that title.
-#
-########################################################################
-
-#include <linux/linkage.h>
-
-.text
-
-# Virtual Registers
-# ARG1
-digest =	%rdi
-# ARG2
-msg =		%rsi
-# ARG3
-msglen =	%rdx
-T1 =		%rcx
-T2 =		%r8
-a_64 =		%r9
-b_64 =		%r10
-c_64 =		%r11
-d_64 =		%r12
-e_64 =		%r13
-f_64 =		%r14
-g_64 =		%r15
-h_64 =		%rbx
-tmp0 =		%rax
-
-# Local variables (stack frame)
-
-W_SIZE = 80*8
-WK_SIZE = 2*8
-RSPSAVE_SIZE = 1*8
-GPRSAVE_SIZE = 5*8
-
-frame_W = 0
-frame_WK = frame_W + W_SIZE
-frame_RSPSAVE = frame_WK + WK_SIZE
-frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
-frame_size = frame_GPRSAVE + GPRSAVE_SIZE
-
-# Useful QWORD "arrays" for simpler memory references
-# MSG, DIGEST, K_t, W_t are arrays
-# WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even
-
-# Input message (arg1)
-#define MSG(i)    8*i(msg)
-
-# Output Digest (arg2)
-#define DIGEST(i) 8*i(digest)
-
-# SHA Constants (static mem)
-#define K_t(i)    8*i+K512(%rip)
-
-# Message Schedule (stack frame)
-#define W_t(i)    8*i+frame_W(%rsp)
-
-# W[t]+K[t] (stack frame)
-#define WK_2(i)   8*((i%2))+frame_WK(%rsp)
-
-.macro RotateState
-	# Rotate symbols a..h right
-	TMP   = h_64
-	h_64  = g_64
-	g_64  = f_64
-	f_64  = e_64
-	e_64  = d_64
-	d_64  = c_64
-	c_64  = b_64
-	b_64  = a_64
-	a_64  = TMP
-.endm
-
-.macro SHA512_Round rnd
-
-	# Compute Round %%t
-	mov	f_64, T1          # T1 = f
-	mov	e_64, tmp0        # tmp = e
-	xor	g_64, T1          # T1 = f ^ g
-	ror	$23, tmp0 # 41    # tmp = e ror 23
-	and	e_64, T1          # T1 = (f ^ g) & e
-	xor	e_64, tmp0        # tmp = (e ror 23) ^ e
-	xor	g_64, T1          # T1 = ((f ^ g) & e) ^ g = CH(e,f,g)
-	idx = \rnd
-	add	WK_2(idx), T1     # W[t] + K[t] from message scheduler
-	ror	$4, tmp0  # 18    # tmp = ((e ror 23) ^ e) ror 4
-	xor	e_64, tmp0        # tmp = (((e ror 23) ^ e) ror 4) ^ e
-	mov	a_64, T2          # T2 = a
-	add	h_64, T1          # T1 = CH(e,f,g) + W[t] + K[t] + h
-	ror	$14, tmp0 # 14    # tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e)
-	add	tmp0, T1          # T1 = CH(e,f,g) + W[t] + K[t] + S1(e)
-	mov	a_64, tmp0        # tmp = a
-	xor	c_64, T2          # T2 = a ^ c
-	and	c_64, tmp0        # tmp = a & c
-	and	b_64, T2          # T2 = (a ^ c) & b
-	xor	tmp0, T2          # T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c)
-	mov	a_64, tmp0        # tmp = a
-	ror	$5, tmp0 # 39     # tmp = a ror 5
-	xor	a_64, tmp0        # tmp = (a ror 5) ^ a
-	add	T1, d_64          # e(next_state) = d + T1
-	ror	$6, tmp0 # 34     # tmp = ((a ror 5) ^ a) ror 6
-	xor	a_64, tmp0        # tmp = (((a ror 5) ^ a) ror 6) ^ a
-	lea	(T1, T2), h_64    # a(next_state) = T1 + Maj(a,b,c)
-	ror	$28, tmp0 # 28    # tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a)
-	add	tmp0, h_64        # a(next_state) = T1 + Maj(a,b,c) S0(a)
-	RotateState
-.endm
-
-.macro SHA512_2Sched_2Round_sse rnd
-
-	# Compute rounds t-2 and t-1
-	# Compute message schedule QWORDS t and t+1
-
-	#   Two rounds are computed based on the values for K[t-2]+W[t-2] and
-	# K[t-1]+W[t-1] which were previously stored at WK_2 by the message
-	# scheduler.
-	#   The two new schedule QWORDS are stored at [W_t(%%t)] and [W_t(%%t+1)].
-	# They are then added to their respective SHA512 constants at
-	# [K_t(%%t)] and [K_t(%%t+1)] and stored at dqword [WK_2(%%t)]
-	#   For brievity, the comments following vectored instructions only refer to
-	# the first of a pair of QWORDS.
-	# Eg. XMM2=W[t-2] really means XMM2={W[t-2]|W[t-1]}
-	#   The computation of the message schedule and the rounds are tightly
-	# stitched to take advantage of instruction-level parallelism.
-	# For clarity, integer instructions (for the rounds calculation) are indented
-	# by one tab. Vectored instructions (for the message scheduler) are indented
-	# by two tabs.
-
-	mov	f_64, T1
-	idx = \rnd -2
-	movdqa	W_t(idx), %xmm2		    # XMM2 = W[t-2]
-	xor	g_64, T1
-	and	e_64, T1
-	movdqa	%xmm2, %xmm0	            # XMM0 = W[t-2]
-	xor	g_64, T1
-	idx = \rnd
-	add	WK_2(idx), T1
-	idx = \rnd - 15
-	movdqu	W_t(idx), %xmm5		    # XMM5 = W[t-15]
-	mov	e_64, tmp0
-	ror	$23, tmp0 # 41
-	movdqa	%xmm5, %xmm3	            # XMM3 = W[t-15]
-	xor	e_64, tmp0
-	ror	$4, tmp0 # 18
-	psrlq	$61-19, %xmm0		    # XMM0 = W[t-2] >> 42
-	xor	e_64, tmp0
-	ror	$14, tmp0 # 14
-	psrlq	$(8-7), %xmm3		    # XMM3 = W[t-15] >> 1
-	add	tmp0, T1
-	add	h_64, T1
-	pxor	%xmm2, %xmm0                # XMM0 = (W[t-2] >> 42) ^ W[t-2]
-	mov	a_64, T2
-	xor	c_64, T2
-	pxor	%xmm5, %xmm3                # XMM3 = (W[t-15] >> 1) ^ W[t-15]
-	and	b_64, T2
-	mov	a_64, tmp0
-	psrlq	$(19-6), %xmm0		    # XMM0 = ((W[t-2]>>42)^W[t-2])>>13
-	and	c_64, tmp0
-	xor	tmp0, T2
-	psrlq	$(7-1), %xmm3		    # XMM3 = ((W[t-15]>>1)^W[t-15])>>6
-	mov	a_64, tmp0
-	ror	$5, tmp0 # 39
-	pxor	%xmm2, %xmm0	            # XMM0 = (((W[t-2]>>42)^W[t-2])>>13)^W[t-2]
-	xor	a_64, tmp0
-	ror	$6, tmp0 # 34
-	pxor	%xmm5, %xmm3                # XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15]
-	xor	a_64, tmp0
-	ror	$28, tmp0 # 28
-	psrlq	$6, %xmm0                   # XMM0 = ((((W[t-2]>>42)^W[t-2])>>13)^W[t-2])>>6
-	add	tmp0, T2
-	add	T1, d_64
-	psrlq	$1, %xmm3                   # XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15]>>1
-	lea	(T1, T2), h_64
-	RotateState
-	movdqa	%xmm2, %xmm1	            # XMM1 = W[t-2]
-	mov	f_64, T1
-	xor	g_64, T1
-	movdqa	%xmm5, %xmm4		    # XMM4 = W[t-15]
-	and	e_64, T1
-	xor	g_64, T1
-	psllq	$(64-19)-(64-61) , %xmm1    # XMM1 = W[t-2] << 42
-	idx = \rnd + 1
-	add	WK_2(idx), T1
-	mov	e_64, tmp0
-	psllq	$(64-1)-(64-8), %xmm4	    # XMM4 = W[t-15] << 7
-	ror	$23, tmp0 # 41
-	xor	e_64, tmp0
-	pxor	%xmm2, %xmm1		    # XMM1 = (W[t-2] << 42)^W[t-2]
-	ror	$4, tmp0 # 18
-	xor	e_64, tmp0
-	pxor	%xmm5, %xmm4		    # XMM4 = (W[t-15]<<7)^W[t-15]
-	ror	$14, tmp0 # 14
-	add	tmp0, T1
-	psllq	$(64-61), %xmm1		    # XMM1 = ((W[t-2] << 42)^W[t-2])<<3
-	add	h_64, T1
-	mov	a_64, T2
-	psllq	$(64-8), %xmm4		    # XMM4 = ((W[t-15]<<7)^W[t-15])<<56
-	xor	c_64, T2
-	and	b_64, T2
-	pxor	%xmm1, %xmm0		    # XMM0 = s1(W[t-2])
-	mov	a_64, tmp0
-	and	c_64, tmp0
-	idx = \rnd - 7
-	movdqu	W_t(idx), %xmm1		    # XMM1 = W[t-7]
-	xor	tmp0, T2
-	pxor	%xmm4, %xmm3                # XMM3 = s0(W[t-15])
-	mov	a_64, tmp0
-	paddq	%xmm3, %xmm0		    # XMM0 = s1(W[t-2]) + s0(W[t-15])
-	ror	$5, tmp0 # 39
-	idx =\rnd-16
-	paddq	W_t(idx), %xmm0		    # XMM0 = s1(W[t-2]) + s0(W[t-15]) + W[t-16]
-	xor	a_64, tmp0
-	paddq	%xmm1, %xmm0	            # XMM0 = s1(W[t-2]) + W[t-7] + s0(W[t-15]) + W[t-16]
-	ror	$6, tmp0 # 34
-	movdqa	%xmm0, W_t(\rnd)	    # Store scheduled qwords
-	xor	a_64, tmp0
-	paddq	K_t(\rnd), %xmm0	    # Compute W[t]+K[t]
-	ror	$28, tmp0 # 28
-	idx = \rnd
-	movdqa	%xmm0, WK_2(idx)	    # Store W[t]+K[t] for next rounds
-	add	tmp0, T2
-	add	T1, d_64
-	lea	(T1, T2), h_64
-	RotateState
-.endm
-
-########################################################################
-# void sha512_transform_ssse3(void* D, const void* M, u64 L)#
-# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
-# The size of the message pointed to by M must be an integer multiple of SHA512
-#   message blocks.
-# L is the message length in SHA512 blocks.
-########################################################################
-ENTRY(sha512_transform_ssse3)
-
-	cmp $0, msglen
-	je nowork
-
-	# Allocate Stack Space
-	mov	%rsp, %rax
-	sub	$frame_size, %rsp
-	and	$~(0x20 - 1), %rsp
-	mov	%rax, frame_RSPSAVE(%rsp)
-
-	# Save GPRs
-	mov	%rbx, frame_GPRSAVE(%rsp)
-	mov	%r12, frame_GPRSAVE +8*1(%rsp)
-	mov	%r13, frame_GPRSAVE +8*2(%rsp)
-	mov	%r14, frame_GPRSAVE +8*3(%rsp)
-	mov	%r15, frame_GPRSAVE +8*4(%rsp)
-
-updateblock:
-
-# Load state variables
-	mov	DIGEST(0), a_64
-	mov	DIGEST(1), b_64
-	mov	DIGEST(2), c_64
-	mov	DIGEST(3), d_64
-	mov	DIGEST(4), e_64
-	mov	DIGEST(5), f_64
-	mov	DIGEST(6), g_64
-	mov	DIGEST(7), h_64
-
-	t = 0
-	.rept 80/2 + 1
-	# (80 rounds) / (2 rounds/iteration) + (1 iteration)
-	# +1 iteration because the scheduler leads hashing by 1 iteration
-		.if t < 2
-			# BSWAP 2 QWORDS
-			movdqa	XMM_QWORD_BSWAP(%rip), %xmm1
-			movdqu	MSG(t), %xmm0
-			pshufb	%xmm1, %xmm0	# BSWAP
-			movdqa	%xmm0, W_t(t)	# Store Scheduled Pair
-			paddq	K_t(t), %xmm0	# Compute W[t]+K[t]
-			movdqa	%xmm0, WK_2(t)	# Store into WK for rounds
-		.elseif t < 16
-			# BSWAP 2 QWORDS# Compute 2 Rounds
-			movdqu	MSG(t), %xmm0
-			pshufb	%xmm1, %xmm0	# BSWAP
-			SHA512_Round t-2	# Round t-2
-			movdqa	%xmm0, W_t(t)	# Store Scheduled Pair
-			paddq	K_t(t), %xmm0	# Compute W[t]+K[t]
-			SHA512_Round t-1	# Round t-1
-			movdqa	%xmm0, WK_2(t)	# Store W[t]+K[t] into WK
-		.elseif t < 79
-			# Schedule 2 QWORDS# Compute 2 Rounds
-			SHA512_2Sched_2Round_sse t
-		.else
-			# Compute 2 Rounds
-			SHA512_Round t-2
-			SHA512_Round t-1
-		.endif
-		t = t+2
-	.endr
-
-	# Update digest
-	add	a_64, DIGEST(0)
-	add	b_64, DIGEST(1)
-	add	c_64, DIGEST(2)
-	add	d_64, DIGEST(3)
-	add	e_64, DIGEST(4)
-	add	f_64, DIGEST(5)
-	add	g_64, DIGEST(6)
-	add	h_64, DIGEST(7)
-
-	# Advance to next message block
-	add	$16*8, msg
-	dec	msglen
-	jnz	updateblock
-
-	# Restore GPRs
-	mov	frame_GPRSAVE(%rsp),      %rbx
-	mov	frame_GPRSAVE +8*1(%rsp), %r12
-	mov	frame_GPRSAVE +8*2(%rsp), %r13
-	mov	frame_GPRSAVE +8*3(%rsp), %r14
-	mov	frame_GPRSAVE +8*4(%rsp), %r15
-
-	# Restore Stack Pointer
-	mov	frame_RSPSAVE(%rsp), %rsp
-
-nowork:
-	ret
-ENDPROC(sha512_transform_ssse3)
-
-########################################################################
-### Binary Data
-
-.section	.rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16
-.align 16
-# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
-XMM_QWORD_BSWAP:
-	.octa 0x08090a0b0c0d0e0f0001020304050607
-
-# Mergeable 640-byte rodata section. This allows linker to merge the table
-# with other, exactly the same 640-byte fragment of another rodata section
-# (if such section exists).
-.section	.rodata.cst640.K512, "aM", @progbits, 640
-.align 64
-# K[t] used in SHA512 hashing
-K512:
-	.quad 0x428a2f98d728ae22,0x7137449123ef65cd
-	.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
-	.quad 0x3956c25bf348b538,0x59f111f1b605d019
-	.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
-	.quad 0xd807aa98a3030242,0x12835b0145706fbe
-	.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
-	.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
-	.quad 0x9bdc06a725c71235,0xc19bf174cf692694
-	.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
-	.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
-	.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
-	.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
-	.quad 0x983e5152ee66dfab,0xa831c66d2db43210
-	.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
-	.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
-	.quad 0x06ca6351e003826f,0x142929670a0e6e70
-	.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
-	.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
-	.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
-	.quad 0x81c2c92e47edaee6,0x92722c851482353b
-	.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
-	.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
-	.quad 0xd192e819d6ef5218,0xd69906245565a910
-	.quad 0xf40e35855771202a,0x106aa07032bbd1b8
-	.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
-	.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
-	.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
-	.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
-	.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
-	.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
-	.quad 0x90befffa23631e28,0xa4506cebde82bde9
-	.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
-	.quad 0xca273eceea26619c,0xd186b8c721c0c207
-	.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
-	.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
-	.quad 0x113f9804bef90dae,0x1b710b35131c471b
-	.quad 0x28db77f523047d84,0x32caab7b40c72493
-	.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
-	.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
-	.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
deleted file mode 100644
index 698b8f2a56e2851e295c81da793e6cfd4096cd1f..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ /dev/null
@@ -1,456 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Twofish Cipher 8-way parallel algorithm (AVX/x86_64)
- *
- * Copyright (C) 2012 Johannes Goetzfried
- *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
- *
- * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "glue_helper-asm-avx.S"
-
-.file "twofish-avx-x86_64-asm_64.S"
-
-.section	.rodata.cst16.bswap128_mask, "aM", @progbits, 16
-.align 16
-.Lbswap128_mask:
-	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-
-.section	.rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16
-.align 16
-.Lxts_gf128mul_and_shl1_mask:
-	.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
-
-.text
-
-/* structure of crypto context */
-#define s0	0
-#define s1	1024
-#define s2	2048
-#define s3	3072
-#define w	4096
-#define k	4128
-
-/**********************************************************************
-  8-way AVX twofish
- **********************************************************************/
-#define CTX %rdi
-
-#define RA1 %xmm0
-#define RB1 %xmm1
-#define RC1 %xmm2
-#define RD1 %xmm3
-
-#define RA2 %xmm4
-#define RB2 %xmm5
-#define RC2 %xmm6
-#define RD2 %xmm7
-
-#define RX0 %xmm8
-#define RY0 %xmm9
-
-#define RX1 %xmm10
-#define RY1 %xmm11
-
-#define RK1 %xmm12
-#define RK2 %xmm13
-
-#define RT %xmm14
-#define RR %xmm15
-
-#define RID1  %r13
-#define RID1d %r13d
-#define RID2  %rsi
-#define RID2d %esi
-
-#define RGI1   %rdx
-#define RGI1bl %dl
-#define RGI1bh %dh
-#define RGI2   %rcx
-#define RGI2bl %cl
-#define RGI2bh %ch
-
-#define RGI3   %rax
-#define RGI3bl %al
-#define RGI3bh %ah
-#define RGI4   %rbx
-#define RGI4bl %bl
-#define RGI4bh %bh
-
-#define RGS1  %r8
-#define RGS1d %r8d
-#define RGS2  %r9
-#define RGS2d %r9d
-#define RGS3  %r10
-#define RGS3d %r10d
-
-
-#define lookup_32bit(t0, t1, t2, t3, src, dst, interleave_op, il_reg) \
-	movzbl		src ## bl,        RID1d;     \
-	movzbl		src ## bh,        RID2d;     \
-	shrq $16,	src;                         \
-	movl		t0(CTX, RID1, 4), dst ## d;  \
-	movl		t1(CTX, RID2, 4), RID2d;     \
-	movzbl		src ## bl,        RID1d;     \
-	xorl		RID2d,            dst ## d;  \
-	movzbl		src ## bh,        RID2d;     \
-	interleave_op(il_reg);			     \
-	xorl		t2(CTX, RID1, 4), dst ## d;  \
-	xorl		t3(CTX, RID2, 4), dst ## d;
-
-#define dummy(d) /* do nothing */
-
-#define shr_next(reg) \
-	shrq $16,	reg;
-
-#define G(gi1, gi2, x, t0, t1, t2, t3) \
-	lookup_32bit(t0, t1, t2, t3, ##gi1, RGS1, shr_next, ##gi1);  \
-	lookup_32bit(t0, t1, t2, t3, ##gi2, RGS3, shr_next, ##gi2);  \
-	\
-	lookup_32bit(t0, t1, t2, t3, ##gi1, RGS2, dummy, none);      \
-	shlq $32,	RGS2;                                        \
-	orq		RGS1, RGS2;                                  \
-	lookup_32bit(t0, t1, t2, t3, ##gi2, RGS1, dummy, none);      \
-	shlq $32,	RGS1;                                        \
-	orq		RGS1, RGS3;
-
-#define round_head_2(a, b, x1, y1, x2, y2) \
-	vmovq		b ## 1, RGI3;           \
-	vpextrq $1,	b ## 1, RGI4;           \
-	\
-	G(RGI1, RGI2, x1, s0, s1, s2, s3);      \
-	vmovq		a ## 2, RGI1;           \
-	vpextrq $1,	a ## 2, RGI2;           \
-	vmovq		RGS2, x1;               \
-	vpinsrq $1,	RGS3, x1, x1;           \
-	\
-	G(RGI3, RGI4, y1, s1, s2, s3, s0);      \
-	vmovq		b ## 2, RGI3;           \
-	vpextrq $1,	b ## 2, RGI4;           \
-	vmovq		RGS2, y1;               \
-	vpinsrq $1,	RGS3, y1, y1;           \
-	\
-	G(RGI1, RGI2, x2, s0, s1, s2, s3);      \
-	vmovq		RGS2, x2;               \
-	vpinsrq $1,	RGS3, x2, x2;           \
-	\
-	G(RGI3, RGI4, y2, s1, s2, s3, s0);      \
-	vmovq		RGS2, y2;               \
-	vpinsrq $1,	RGS3, y2, y2;
-
-#define encround_tail(a, b, c, d, x, y, prerotate) \
-	vpaddd			x, y,   x; \
-	vpaddd			x, RK1, RT;\
-	prerotate(b);			   \
-	vpxor			RT, c,  c; \
-	vpaddd			y, x,   y; \
-	vpaddd			y, RK2, y; \
-	vpsrld $1,		c, RT;     \
-	vpslld $(32 - 1),	c, c;      \
-	vpor			c, RT,  c; \
-	vpxor			d, y,   d; \
-
-#define decround_tail(a, b, c, d, x, y, prerotate) \
-	vpaddd			x, y,   x; \
-	vpaddd			x, RK1, RT;\
-	prerotate(a);			   \
-	vpxor			RT, c,  c; \
-	vpaddd			y, x,   y; \
-	vpaddd			y, RK2, y; \
-	vpxor			d, y,   d; \
-	vpsrld $1,		d, y;      \
-	vpslld $(32 - 1),	d, d;      \
-	vpor			d, y,   d; \
-
-#define rotate_1l(x) \
-	vpslld $1,		x, RR;     \
-	vpsrld $(32 - 1),	x, x;      \
-	vpor			x, RR,  x;
-
-#define preload_rgi(c) \
-	vmovq			c, RGI1; \
-	vpextrq $1,		c, RGI2;
-
-#define encrypt_round(n, a, b, c, d, preload, prerotate) \
-	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;                  \
-	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;                  \
-	round_head_2(a, b, RX0, RY0, RX1, RY1);                  \
-	encround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \
-	preload(c ## 1);                                         \
-	encround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate);
-
-#define decrypt_round(n, a, b, c, d, preload, prerotate) \
-	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;                  \
-	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;                  \
-	round_head_2(a, b, RX0, RY0, RX1, RY1);                  \
-	decround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \
-	preload(c ## 1);                                         \
-	decround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate);
-
-#define encrypt_cycle(n) \
-	encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \
-	encrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l);
-
-#define encrypt_cycle_last(n) \
-	encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \
-	encrypt_round(((2*n) + 1), RC, RD, RA, RB, dummy, dummy);
-
-#define decrypt_cycle(n) \
-	decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \
-	decrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l);
-
-#define decrypt_cycle_last(n) \
-	decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \
-	decrypt_round((2*n), RA, RB, RC, RD, dummy, dummy);
-
-#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	vpunpckldq		x1, x0, t0; \
-	vpunpckhdq		x1, x0, t2; \
-	vpunpckldq		x3, x2, t1; \
-	vpunpckhdq		x3, x2, x3; \
-	\
-	vpunpcklqdq		t1, t0, x0; \
-	vpunpckhqdq		t1, t0, x1; \
-	vpunpcklqdq		x3, t2, x2; \
-	vpunpckhqdq		x3, t2, x3;
-
-#define inpack_blocks(x0, x1, x2, x3, wkey, t0, t1, t2) \
-	vpxor		x0, wkey, x0; \
-	vpxor		x1, wkey, x1; \
-	vpxor		x2, wkey, x2; \
-	vpxor		x3, wkey, x3; \
-	\
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
-
-#define outunpack_blocks(x0, x1, x2, x3, wkey, t0, t1, t2) \
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	\
-	vpxor		x0, wkey, x0; \
-	vpxor		x1, wkey, x1; \
-	vpxor		x2, wkey, x2; \
-	vpxor		x3, wkey, x3;
-
-.align 8
-__twofish_enc_blk8:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
-	 * output:
-	 *	RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks
-	 */
-
-	vmovdqu w(CTX), RK1;
-
-	pushq %r13;
-	pushq %rbx;
-	pushq %rcx;
-
-	inpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
-	preload_rgi(RA1);
-	rotate_1l(RD1);
-	inpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
-	rotate_1l(RD2);
-
-	encrypt_cycle(0);
-	encrypt_cycle(1);
-	encrypt_cycle(2);
-	encrypt_cycle(3);
-	encrypt_cycle(4);
-	encrypt_cycle(5);
-	encrypt_cycle(6);
-	encrypt_cycle_last(7);
-
-	vmovdqu (w+4*4)(CTX), RK1;
-
-	popq %rcx;
-	popq %rbx;
-	popq %r13;
-
-	outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
-	outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
-
-	ret;
-ENDPROC(__twofish_enc_blk8)
-
-.align 8
-__twofish_dec_blk8:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks
-	 * output:
-	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
-	 */
-
-	vmovdqu (w+4*4)(CTX), RK1;
-
-	pushq %r13;
-	pushq %rbx;
-
-	inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
-	preload_rgi(RC1);
-	rotate_1l(RA1);
-	inpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
-	rotate_1l(RA2);
-
-	decrypt_cycle(7);
-	decrypt_cycle(6);
-	decrypt_cycle(5);
-	decrypt_cycle(4);
-	decrypt_cycle(3);
-	decrypt_cycle(2);
-	decrypt_cycle(1);
-	decrypt_cycle_last(0);
-
-	vmovdqu (w)(CTX), RK1;
-
-	popq %rbx;
-	popq %r13;
-
-	outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
-	outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
-
-	ret;
-ENDPROC(__twofish_dec_blk8)
-
-ENTRY(twofish_ecb_enc_8way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	FRAME_BEGIN
-
-	movq %rsi, %r11;
-
-	load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	call __twofish_enc_blk8;
-
-	store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
-
-	FRAME_END
-	ret;
-ENDPROC(twofish_ecb_enc_8way)
-
-ENTRY(twofish_ecb_dec_8way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	FRAME_BEGIN
-
-	movq %rsi, %r11;
-
-	load_8way(%rdx, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
-
-	call __twofish_dec_blk8;
-
-	store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	FRAME_END
-	ret;
-ENDPROC(twofish_ecb_dec_8way)
-
-ENTRY(twofish_cbc_dec_8way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	FRAME_BEGIN
-
-	pushq %r12;
-
-	movq %rsi, %r11;
-	movq %rdx, %r12;
-
-	load_8way(%rdx, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
-
-	call __twofish_dec_blk8;
-
-	store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	popq %r12;
-
-	FRAME_END
-	ret;
-ENDPROC(twofish_cbc_dec_8way)
-
-ENTRY(twofish_ctr_8way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: iv (little endian, 128bit)
-	 */
-	FRAME_BEGIN
-
-	pushq %r12;
-
-	movq %rsi, %r11;
-	movq %rdx, %r12;
-
-	load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
-		      RD2, RX0, RX1, RY0);
-
-	call __twofish_enc_blk8;
-
-	store_ctr_8way(%r12, %r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
-
-	popq %r12;
-
-	FRAME_END
-	ret;
-ENDPROC(twofish_ctr_8way)
-
-ENTRY(twofish_xts_enc_8way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
-	 */
-	FRAME_BEGIN
-
-	movq %rsi, %r11;
-
-	/* regs <= src, dst <= IVs, regs <= regs xor IVs */
-	load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
-		      RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask);
-
-	call __twofish_enc_blk8;
-
-	/* dst <= regs xor IVs(in dst) */
-	store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
-
-	FRAME_END
-	ret;
-ENDPROC(twofish_xts_enc_8way)
-
-ENTRY(twofish_xts_dec_8way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
-	 */
-	FRAME_BEGIN
-
-	movq %rsi, %r11;
-
-	/* regs <= src, dst <= IVs, regs <= regs xor IVs */
-	load_xts_8way(%rcx, %rdx, %rsi, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2,
-		      RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask);
-
-	call __twofish_dec_blk8;
-
-	/* dst <= regs xor IVs(in dst) */
-	store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
-	FRAME_END
-	ret;
-ENDPROC(twofish_xts_dec_8way)
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S
deleted file mode 100644
index 290cc4e9a6fefc869a88032b3b170c5918de5d6e..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/twofish-i586-asm_32.S
+++ /dev/null
@@ -1,321 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/***************************************************************************
-*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
-*                                                                         *
-***************************************************************************/
-
-.file "twofish-i586-asm.S"
-.text
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-
-/* return address at 0 */
-
-#define in_blk    12  /* input byte array address parameter*/
-#define out_blk   8  /* output byte array address parameter*/
-#define ctx       4  /* Twofish context structure */
-
-#define a_offset	0
-#define b_offset	4
-#define c_offset	8
-#define d_offset	12
-
-/* Structure of the crypto context struct*/
-
-#define s0	0	/* S0 Array 256 Words each */
-#define s1	1024	/* S1 Array */
-#define s2	2048	/* S2 Array */
-#define s3	3072	/* S3 Array */
-#define w	4096	/* 8 whitening keys (word) */
-#define k	4128	/* key 1-32 ( word ) */
-
-/* define a few register aliases to allow macro substitution */
-
-#define R0D    %eax
-#define R0B    %al
-#define R0H    %ah
-
-#define R1D    %ebx
-#define R1B    %bl
-#define R1H    %bh
-
-#define R2D    %ecx
-#define R2B    %cl
-#define R2H    %ch
-
-#define R3D    %edx
-#define R3B    %dl
-#define R3H    %dh
-
-
-/* performs input whitening */
-#define input_whitening(src,context,offset)\
-	xor	w+offset(context),	src;
-
-/* performs input whitening */
-#define output_whitening(src,context,offset)\
-	xor	w+16+offset(context),	src;
-
-/*
- * a input register containing a (rotated 16)
- * b input register containing b
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- */
-#define encrypt_round(a,b,c,d,round)\
-	push	d ## D;\
-	movzx	b ## B,		%edi;\
-	mov	s1(%ebp,%edi,4),d ## D;\
-	movzx	a ## B,		%edi;\
-	mov	s2(%ebp,%edi,4),%esi;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	s2(%ebp,%edi,4),d ## D;\
-	movzx	a ## H,		%edi;\
-	ror	$16,		a ## D;\
-	xor	s3(%ebp,%edi,4),%esi;\
-	movzx	b ## B,		%edi;\
-	xor	s3(%ebp,%edi,4),d ## D;\
-	movzx	a ## B,		%edi;\
-	xor	(%ebp,%edi,4),	%esi;\
-	movzx	b ## H,		%edi;\
-	ror	$15,		b ## D;\
-	xor	(%ebp,%edi,4),	d ## D;\
-	movzx	a ## H,		%edi;\
-	xor	s1(%ebp,%edi,4),%esi;\
-	pop	%edi;\
-	add	d ## D,		%esi;\
-	add	%esi,		d ## D;\
-	add	k+round(%ebp),	%esi;\
-	xor	%esi,		c ## D;\
-	rol	$15,		c ## D;\
-	add	k+4+round(%ebp),d ## D;\
-	xor	%edi,		d ## D;
-
-/*
- * a input register containing a (rotated 16)
- * b input register containing b
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- * last round has different rotations for the output preparation
- */
-#define encrypt_last_round(a,b,c,d,round)\
-	push	d ## D;\
-	movzx	b ## B,		%edi;\
-	mov	s1(%ebp,%edi,4),d ## D;\
-	movzx	a ## B,		%edi;\
-	mov	s2(%ebp,%edi,4),%esi;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	s2(%ebp,%edi,4),d ## D;\
-	movzx	a ## H,		%edi;\
-	ror	$16,		a ## D;\
-	xor	s3(%ebp,%edi,4),%esi;\
-	movzx	b ## B,		%edi;\
-	xor	s3(%ebp,%edi,4),d ## D;\
-	movzx	a ## B,		%edi;\
-	xor	(%ebp,%edi,4),	%esi;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	(%ebp,%edi,4),	d ## D;\
-	movzx	a ## H,		%edi;\
-	xor	s1(%ebp,%edi,4),%esi;\
-	pop	%edi;\
-	add	d ## D,		%esi;\
-	add	%esi,		d ## D;\
-	add	k+round(%ebp),	%esi;\
-	xor	%esi,		c ## D;\
-	ror	$1,		c ## D;\
-	add	k+4+round(%ebp),d ## D;\
-	xor	%edi,		d ## D;
-
-/*
- * a input register containing a
- * b input register containing b (rotated 16)
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- */
-#define decrypt_round(a,b,c,d,round)\
-	push	c ## D;\
-	movzx	a ## B,		%edi;\
-	mov	(%ebp,%edi,4),	c ## D;\
-	movzx	b ## B,		%edi;\
-	mov	s3(%ebp,%edi,4),%esi;\
-	movzx	a ## H,		%edi;\
-	ror	$16,		a ## D;\
-	xor	s1(%ebp,%edi,4),c ## D;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	(%ebp,%edi,4),	%esi;\
-	movzx	a ## B,		%edi;\
-	xor	s2(%ebp,%edi,4),c ## D;\
-	movzx	b ## B,		%edi;\
-	xor	s1(%ebp,%edi,4),%esi;\
-	movzx	a ## H,		%edi;\
-	ror	$15,		a ## D;\
-	xor	s3(%ebp,%edi,4),c ## D;\
-	movzx	b ## H,		%edi;\
-	xor	s2(%ebp,%edi,4),%esi;\
-	pop	%edi;\
-	add	%esi,		c ## D;\
-	add	c ## D,		%esi;\
-	add	k+round(%ebp),	c ## D;\
-	xor	%edi,		c ## D;\
-	add	k+4+round(%ebp),%esi;\
-	xor	%esi,		d ## D;\
-	rol	$15,		d ## D;
-
-/*
- * a input register containing a
- * b input register containing b (rotated 16)
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- * last round has different rotations for the output preparation
- */
-#define decrypt_last_round(a,b,c,d,round)\
-	push	c ## D;\
-	movzx	a ## B,		%edi;\
-	mov	(%ebp,%edi,4),	c ## D;\
-	movzx	b ## B,		%edi;\
-	mov	s3(%ebp,%edi,4),%esi;\
-	movzx	a ## H,		%edi;\
-	ror	$16,		a ## D;\
-	xor	s1(%ebp,%edi,4),c ## D;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	(%ebp,%edi,4),	%esi;\
-	movzx	a ## B,		%edi;\
-	xor	s2(%ebp,%edi,4),c ## D;\
-	movzx	b ## B,		%edi;\
-	xor	s1(%ebp,%edi,4),%esi;\
-	movzx	a ## H,		%edi;\
-	ror	$16,		a ## D;\
-	xor	s3(%ebp,%edi,4),c ## D;\
-	movzx	b ## H,		%edi;\
-	xor	s2(%ebp,%edi,4),%esi;\
-	pop	%edi;\
-	add	%esi,		c ## D;\
-	add	c ## D,		%esi;\
-	add	k+round(%ebp),	c ## D;\
-	xor	%edi,		c ## D;\
-	add	k+4+round(%ebp),%esi;\
-	xor	%esi,		d ## D;\
-	ror	$1,		d ## D;
-
-ENTRY(twofish_enc_blk)
-	push	%ebp			/* save registers according to calling convention*/
-	push    %ebx
-	push    %esi
-	push    %edi
-
-	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
-					 * pointer to the ctx address */
-	mov     in_blk+16(%esp),%edi	/* input address in edi */
-
-	mov	(%edi),		%eax
-	mov	b_offset(%edi),	%ebx
-	mov	c_offset(%edi),	%ecx
-	mov	d_offset(%edi),	%edx
-	input_whitening(%eax,%ebp,a_offset)
-	ror	$16,	%eax
-	input_whitening(%ebx,%ebp,b_offset)
-	input_whitening(%ecx,%ebp,c_offset)
-	input_whitening(%edx,%ebp,d_offset)
-	rol	$1,	%edx
-
-	encrypt_round(R0,R1,R2,R3,0);
-	encrypt_round(R2,R3,R0,R1,8);
-	encrypt_round(R0,R1,R2,R3,2*8);
-	encrypt_round(R2,R3,R0,R1,3*8);
-	encrypt_round(R0,R1,R2,R3,4*8);
-	encrypt_round(R2,R3,R0,R1,5*8);
-	encrypt_round(R0,R1,R2,R3,6*8);
-	encrypt_round(R2,R3,R0,R1,7*8);
-	encrypt_round(R0,R1,R2,R3,8*8);
-	encrypt_round(R2,R3,R0,R1,9*8);
-	encrypt_round(R0,R1,R2,R3,10*8);
-	encrypt_round(R2,R3,R0,R1,11*8);
-	encrypt_round(R0,R1,R2,R3,12*8);
-	encrypt_round(R2,R3,R0,R1,13*8);
-	encrypt_round(R0,R1,R2,R3,14*8);
-	encrypt_last_round(R2,R3,R0,R1,15*8);
-
-	output_whitening(%eax,%ebp,c_offset)
-	output_whitening(%ebx,%ebp,d_offset)
-	output_whitening(%ecx,%ebp,a_offset)
-	output_whitening(%edx,%ebp,b_offset)
-	mov	out_blk+16(%esp),%edi;
-	mov	%eax,		c_offset(%edi)
-	mov	%ebx,		d_offset(%edi)
-	mov	%ecx,		(%edi)
-	mov	%edx,		b_offset(%edi)
-
-	pop	%edi
-	pop	%esi
-	pop	%ebx
-	pop	%ebp
-	mov	$1,	%eax
-	ret
-ENDPROC(twofish_enc_blk)
-
-ENTRY(twofish_dec_blk)
-	push	%ebp			/* save registers according to calling convention*/
-	push    %ebx
-	push    %esi
-	push    %edi
-
-
-	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
-					 * pointer to the ctx address */
-	mov     in_blk+16(%esp),%edi	/* input address in edi */
-
-	mov	(%edi),		%eax
-	mov	b_offset(%edi),	%ebx
-	mov	c_offset(%edi),	%ecx
-	mov	d_offset(%edi),	%edx
-	output_whitening(%eax,%ebp,a_offset)
-	output_whitening(%ebx,%ebp,b_offset)
-	ror	$16,	%ebx
-	output_whitening(%ecx,%ebp,c_offset)
-	output_whitening(%edx,%ebp,d_offset)
-	rol	$1,	%ecx
-
-	decrypt_round(R0,R1,R2,R3,15*8);
-	decrypt_round(R2,R3,R0,R1,14*8);
-	decrypt_round(R0,R1,R2,R3,13*8);
-	decrypt_round(R2,R3,R0,R1,12*8);
-	decrypt_round(R0,R1,R2,R3,11*8);
-	decrypt_round(R2,R3,R0,R1,10*8);
-	decrypt_round(R0,R1,R2,R3,9*8);
-	decrypt_round(R2,R3,R0,R1,8*8);
-	decrypt_round(R0,R1,R2,R3,7*8);
-	decrypt_round(R2,R3,R0,R1,6*8);
-	decrypt_round(R0,R1,R2,R3,5*8);
-	decrypt_round(R2,R3,R0,R1,4*8);
-	decrypt_round(R0,R1,R2,R3,3*8);
-	decrypt_round(R2,R3,R0,R1,2*8);
-	decrypt_round(R0,R1,R2,R3,1*8);
-	decrypt_last_round(R2,R3,R0,R1,0);
-
-	input_whitening(%eax,%ebp,c_offset)
-	input_whitening(%ebx,%ebp,d_offset)
-	input_whitening(%ecx,%ebp,a_offset)
-	input_whitening(%edx,%ebp,b_offset)
-	mov	out_blk+16(%esp),%edi;
-	mov	%eax,		c_offset(%edi)
-	mov	%ebx,		d_offset(%edi)
-	mov	%ecx,		(%edi)
-	mov	%edx,		b_offset(%edi)
-
-	pop	%edi
-	pop	%esi
-	pop	%ebx
-	pop	%ebp
-	mov	$1,	%eax
-	ret
-ENDPROC(twofish_dec_blk)
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
deleted file mode 100644
index e495e07c7f1bbbd770640d663d572a2fa1ebafe0..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+++ /dev/null
@@ -1,305 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Twofish Cipher 3-way parallel algorithm (x86_64)
- *
- * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- */
-
-#include <linux/linkage.h>
-
-.file "twofish-x86_64-asm-3way.S"
-.text
-
-/* structure of crypto context */
-#define s0	0
-#define s1	1024
-#define s2	2048
-#define s3	3072
-#define w	4096
-#define k	4128
-
-/**********************************************************************
-  3-way twofish
- **********************************************************************/
-#define CTX %rdi
-#define RIO %rdx
-
-#define RAB0 %rax
-#define RAB1 %rbx
-#define RAB2 %rcx
-
-#define RAB0d %eax
-#define RAB1d %ebx
-#define RAB2d %ecx
-
-#define RAB0bh %ah
-#define RAB1bh %bh
-#define RAB2bh %ch
-
-#define RAB0bl %al
-#define RAB1bl %bl
-#define RAB2bl %cl
-
-#define CD0 0x0(%rsp)
-#define CD1 0x8(%rsp)
-#define CD2 0x10(%rsp)
-
-# used only before/after all rounds
-#define RCD0 %r8
-#define RCD1 %r9
-#define RCD2 %r10
-
-# used only during rounds
-#define RX0 %r8
-#define RX1 %r9
-#define RX2 %r10
-
-#define RX0d %r8d
-#define RX1d %r9d
-#define RX2d %r10d
-
-#define RY0 %r11
-#define RY1 %r12
-#define RY2 %r13
-
-#define RY0d %r11d
-#define RY1d %r12d
-#define RY2d %r13d
-
-#define RT0 %rdx
-#define RT1 %rsi
-
-#define RT0d %edx
-#define RT1d %esi
-
-#define RT1bl %sil
-
-#define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \
-	movzbl ab ## bl,		tmp2 ## d; \
-	movzbl ab ## bh,		tmp1 ## d; \
-	rorq $(rot),			ab; \
-	op1##l T0(CTX, tmp2, 4),	dst ## d; \
-	op2##l T1(CTX, tmp1, 4),	dst ## d;
-
-#define swap_ab_with_cd(ab, cd, tmp)	\
-	movq cd, tmp;			\
-	movq ab, cd;			\
-	movq tmp, ab;
-
-/*
- * Combined G1 & G2 function. Reordered with help of rotates to have moves
- * at begining.
- */
-#define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \
-	/* G1,1 && G2,1 */ \
-	do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \
-	do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \
-	\
-	do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \
-	do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \
-	\
-	do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \
-	do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \
-	\
-	/* G1,2 && G2,2 */ \
-	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \
-	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \
-	swap_ab_with_cd(ab ## 0, cd ## 0, RT0); \
-	\
-	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \
-	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \
-	swap_ab_with_cd(ab ## 1, cd ## 1, RT0); \
-	\
-	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \
-	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \
-	swap_ab_with_cd(ab ## 2, cd ## 2, RT0);
-
-#define enc_round_end(ab, x, y, n) \
-	addl y ## d,			x ## d; \
-	addl x ## d,			y ## d; \
-	addl k+4*(2*(n))(CTX),		x ## d; \
-	xorl ab ## d,			x ## d; \
-	addl k+4*(2*(n)+1)(CTX),	y ## d; \
-	shrq $32,			ab; \
-	roll $1,			ab ## d; \
-	xorl y ## d,			ab ## d; \
-	shlq $32,			ab; \
-	rorl $1,			x ## d; \
-	orq x,				ab;
-
-#define dec_round_end(ba, x, y, n) \
-	addl y ## d,			x ## d; \
-	addl x ## d,			y ## d; \
-	addl k+4*(2*(n))(CTX),		x ## d; \
-	addl k+4*(2*(n)+1)(CTX),	y ## d; \
-	xorl ba ## d,			y ## d; \
-	shrq $32,			ba; \
-	roll $1,			ba ## d; \
-	xorl x ## d,			ba ## d; \
-	shlq $32,			ba; \
-	rorl $1,			y ## d; \
-	orq y,				ba;
-
-#define encrypt_round3(ab, cd, n) \
-	g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \
-	\
-	enc_round_end(ab ## 0, RX0, RY0, n); \
-	enc_round_end(ab ## 1, RX1, RY1, n); \
-	enc_round_end(ab ## 2, RX2, RY2, n);
-
-#define decrypt_round3(ba, dc, n) \
-	g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \
-	\
-	dec_round_end(ba ## 0, RX0, RY0, n); \
-	dec_round_end(ba ## 1, RX1, RY1, n); \
-	dec_round_end(ba ## 2, RX2, RY2, n);
-
-#define encrypt_cycle3(ab, cd, n) \
-	encrypt_round3(ab, cd, n*2); \
-	encrypt_round3(ab, cd, (n*2)+1);
-
-#define decrypt_cycle3(ba, dc, n) \
-	decrypt_round3(ba, dc, (n*2)+1); \
-	decrypt_round3(ba, dc, (n*2));
-
-#define push_cd()	\
-	pushq RCD2;	\
-	pushq RCD1;	\
-	pushq RCD0;
-
-#define pop_cd()	\
-	popq RCD0;	\
-	popq RCD1;	\
-	popq RCD2;
-
-#define inpack3(in, n, xy, m) \
-	movq 4*(n)(in),			xy ## 0; \
-	xorq w+4*m(CTX),		xy ## 0; \
-	\
-	movq 4*(4+(n))(in),		xy ## 1; \
-	xorq w+4*m(CTX),		xy ## 1; \
-	\
-	movq 4*(8+(n))(in),		xy ## 2; \
-	xorq w+4*m(CTX),		xy ## 2;
-
-#define outunpack3(op, out, n, xy, m) \
-	xorq w+4*m(CTX),		xy ## 0; \
-	op ## q xy ## 0,		4*(n)(out); \
-	\
-	xorq w+4*m(CTX),		xy ## 1; \
-	op ## q xy ## 1,		4*(4+(n))(out); \
-	\
-	xorq w+4*m(CTX),		xy ## 2; \
-	op ## q xy ## 2,		4*(8+(n))(out);
-
-#define inpack_enc3() \
-	inpack3(RIO, 0, RAB, 0); \
-	inpack3(RIO, 2, RCD, 2);
-
-#define outunpack_enc3(op) \
-	outunpack3(op, RIO, 2, RAB, 6); \
-	outunpack3(op, RIO, 0, RCD, 4);
-
-#define inpack_dec3() \
-	inpack3(RIO, 0, RAB, 4); \
-	rorq $32,			RAB0; \
-	rorq $32,			RAB1; \
-	rorq $32,			RAB2; \
-	inpack3(RIO, 2, RCD, 6); \
-	rorq $32,			RCD0; \
-	rorq $32,			RCD1; \
-	rorq $32,			RCD2;
-
-#define outunpack_dec3() \
-	rorq $32,			RCD0; \
-	rorq $32,			RCD1; \
-	rorq $32,			RCD2; \
-	outunpack3(mov, RIO, 0, RCD, 0); \
-	rorq $32,			RAB0; \
-	rorq $32,			RAB1; \
-	rorq $32,			RAB2; \
-	outunpack3(mov, RIO, 2, RAB, 2);
-
-ENTRY(__twofish_enc_blk_3way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src, RIO
-	 *	%rcx: bool, if true: xor output
-	 */
-	pushq %r13;
-	pushq %r12;
-	pushq %rbx;
-
-	pushq %rcx; /* bool xor */
-	pushq %rsi; /* dst */
-
-	inpack_enc3();
-
-	push_cd();
-	encrypt_cycle3(RAB, CD, 0);
-	encrypt_cycle3(RAB, CD, 1);
-	encrypt_cycle3(RAB, CD, 2);
-	encrypt_cycle3(RAB, CD, 3);
-	encrypt_cycle3(RAB, CD, 4);
-	encrypt_cycle3(RAB, CD, 5);
-	encrypt_cycle3(RAB, CD, 6);
-	encrypt_cycle3(RAB, CD, 7);
-	pop_cd();
-
-	popq RIO; /* dst */
-	popq RT1; /* bool xor */
-
-	testb RT1bl, RT1bl;
-	jnz .L__enc_xor3;
-
-	outunpack_enc3(mov);
-
-	popq %rbx;
-	popq %r12;
-	popq %r13;
-	ret;
-
-.L__enc_xor3:
-	outunpack_enc3(xor);
-
-	popq %rbx;
-	popq %r12;
-	popq %r13;
-	ret;
-ENDPROC(__twofish_enc_blk_3way)
-
-ENTRY(twofish_dec_blk_3way)
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src, RIO
-	 */
-	pushq %r13;
-	pushq %r12;
-	pushq %rbx;
-
-	pushq %rsi; /* dst */
-
-	inpack_dec3();
-
-	push_cd();
-	decrypt_cycle3(RAB, CD, 7);
-	decrypt_cycle3(RAB, CD, 6);
-	decrypt_cycle3(RAB, CD, 5);
-	decrypt_cycle3(RAB, CD, 4);
-	decrypt_cycle3(RAB, CD, 3);
-	decrypt_cycle3(RAB, CD, 2);
-	decrypt_cycle3(RAB, CD, 1);
-	decrypt_cycle3(RAB, CD, 0);
-	pop_cd();
-
-	popq RIO; /* dst */
-
-	outunpack_dec3();
-
-	popq %rbx;
-	popq %r12;
-	popq %r13;
-	ret;
-ENDPROC(twofish_dec_blk_3way)
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
deleted file mode 100644
index ecef2cb9f43f1a495e46b82d652cffd79876f8ad..0000000000000000000000000000000000000000
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ /dev/null
@@ -1,308 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/***************************************************************************
-*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
-*                                                                         *
-***************************************************************************/
-
-.file "twofish-x86_64-asm.S"
-.text
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-
-#define a_offset	0
-#define b_offset	4
-#define c_offset	8
-#define d_offset	12
-
-/* Structure of the crypto context struct*/
-
-#define s0	0	/* S0 Array 256 Words each */
-#define s1	1024	/* S1 Array */
-#define s2	2048	/* S2 Array */
-#define s3	3072	/* S3 Array */
-#define w	4096	/* 8 whitening keys (word) */
-#define k	4128	/* key 1-32 ( word ) */
-
-/* define a few register aliases to allow macro substitution */
-
-#define R0     %rax
-#define R0D    %eax
-#define R0B    %al
-#define R0H    %ah
-
-#define R1     %rbx
-#define R1D    %ebx
-#define R1B    %bl
-#define R1H    %bh
-
-#define R2     %rcx
-#define R2D    %ecx
-#define R2B    %cl
-#define R2H    %ch
-
-#define R3     %rdx
-#define R3D    %edx
-#define R3B    %dl
-#define R3H    %dh
-
-
-/* performs input whitening */
-#define input_whitening(src,context,offset)\
-	xor	w+offset(context),	src;
-
-/* performs input whitening */
-#define output_whitening(src,context,offset)\
-	xor	w+16+offset(context),	src;
-
-
-/*
- * a input register containing a (rotated 16)
- * b input register containing b
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- */
-#define encrypt_round(a,b,c,d,round)\
-	movzx	b ## B,		%edi;\
-	mov	s1(%r11,%rdi,4),%r8d;\
-	movzx	a ## B,		%edi;\
-	mov	s2(%r11,%rdi,4),%r9d;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	s2(%r11,%rdi,4),%r8d;\
-	movzx	a ## H,		%edi;\
-	ror	$16,		a ## D;\
-	xor	s3(%r11,%rdi,4),%r9d;\
-	movzx	b ## B,		%edi;\
-	xor	s3(%r11,%rdi,4),%r8d;\
-	movzx	a ## B,		%edi;\
-	xor	(%r11,%rdi,4),	%r9d;\
-	movzx	b ## H,		%edi;\
-	ror	$15,		b ## D;\
-	xor	(%r11,%rdi,4),	%r8d;\
-	movzx	a ## H,		%edi;\
-	xor	s1(%r11,%rdi,4),%r9d;\
-	add	%r8d,		%r9d;\
-	add	%r9d,		%r8d;\
-	add	k+round(%r11),	%r9d;\
-	xor	%r9d,		c ## D;\
-	rol	$15,		c ## D;\
-	add	k+4+round(%r11),%r8d;\
-	xor	%r8d,		d ## D;
-
-/*
- * a input register containing a(rotated 16)
- * b input register containing b
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- * during the round a and b are prepared for the output whitening
- */
-#define encrypt_last_round(a,b,c,d,round)\
-	mov	b ## D,		%r10d;\
-	shl	$32,		%r10;\
-	movzx	b ## B,		%edi;\
-	mov	s1(%r11,%rdi,4),%r8d;\
-	movzx	a ## B,		%edi;\
-	mov	s2(%r11,%rdi,4),%r9d;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	s2(%r11,%rdi,4),%r8d;\
-	movzx	a ## H,		%edi;\
-	ror	$16,		a ## D;\
-	xor	s3(%r11,%rdi,4),%r9d;\
-	movzx	b ## B,		%edi;\
-	xor	s3(%r11,%rdi,4),%r8d;\
-	movzx	a ## B,		%edi;\
-	xor	(%r11,%rdi,4),	%r9d;\
-	xor	a,		%r10;\
-	movzx	b ## H,		%edi;\
-	xor	(%r11,%rdi,4),	%r8d;\
-	movzx	a ## H,		%edi;\
-	xor	s1(%r11,%rdi,4),%r9d;\
-	add	%r8d,		%r9d;\
-	add	%r9d,		%r8d;\
-	add	k+round(%r11),	%r9d;\
-	xor	%r9d,		c ## D;\
-	ror	$1,		c ## D;\
-	add	k+4+round(%r11),%r8d;\
-	xor	%r8d,		d ## D
-
-/*
- * a input register containing a
- * b input register containing b (rotated 16)
- * c input register containing c (already rol $1)
- * d input register containing d
- * operations on a and b are interleaved to increase performance
- */
-#define decrypt_round(a,b,c,d,round)\
-	movzx	a ## B,		%edi;\
-	mov	(%r11,%rdi,4),	%r9d;\
-	movzx	b ## B,		%edi;\
-	mov	s3(%r11,%rdi,4),%r8d;\
-	movzx	a ## H,		%edi;\
-	ror	$16,		a ## D;\
-	xor	s1(%r11,%rdi,4),%r9d;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	(%r11,%rdi,4),	%r8d;\
-	movzx	a ## B,		%edi;\
-	xor	s2(%r11,%rdi,4),%r9d;\
-	movzx	b ## B,		%edi;\
-	xor	s1(%r11,%rdi,4),%r8d;\
-	movzx	a ## H,		%edi;\
-	ror	$15,		a ## D;\
-	xor	s3(%r11,%rdi,4),%r9d;\
-	movzx	b ## H,		%edi;\
-	xor	s2(%r11,%rdi,4),%r8d;\
-	add	%r8d,		%r9d;\
-	add	%r9d,		%r8d;\
-	add	k+round(%r11),	%r9d;\
-	xor	%r9d,		c ## D;\
-	add	k+4+round(%r11),%r8d;\
-	xor	%r8d,		d ## D;\
-	rol	$15,		d ## D;
-
-/*
- * a input register containing a
- * b input register containing b
- * c input register containing c (already rol $1)
- * d input register containing d
- * operations on a and b are interleaved to increase performance
- * during the round a and b are prepared for the output whitening
- */
-#define decrypt_last_round(a,b,c,d,round)\
-	movzx	a ## B,		%edi;\
-	mov	(%r11,%rdi,4),	%r9d;\
-	movzx	b ## B,		%edi;\
-	mov	s3(%r11,%rdi,4),%r8d;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	(%r11,%rdi,4),	%r8d;\
-	movzx	a ## H,		%edi;\
-	mov	b ## D,		%r10d;\
-	shl	$32,		%r10;\
-	xor	a,		%r10;\
-	ror	$16,		a ## D;\
-	xor	s1(%r11,%rdi,4),%r9d;\
-	movzx	b ## B,		%edi;\
-	xor	s1(%r11,%rdi,4),%r8d;\
-	movzx	a ## B,		%edi;\
-	xor	s2(%r11,%rdi,4),%r9d;\
-	movzx	b ## H,		%edi;\
-	xor	s2(%r11,%rdi,4),%r8d;\
-	movzx	a ## H,		%edi;\
-	xor	s3(%r11,%rdi,4),%r9d;\
-	add	%r8d,		%r9d;\
-	add	%r9d,		%r8d;\
-	add	k+round(%r11),	%r9d;\
-	xor	%r9d,		c ## D;\
-	add	k+4+round(%r11),%r8d;\
-	xor	%r8d,		d ## D;\
-	ror	$1,		d ## D;
-
-ENTRY(twofish_enc_blk)
-	pushq    R1
-
-	/* %rdi contains the ctx address */
-	/* %rsi contains the output address */
-	/* %rdx contains the input address */
-	/* ctx address is moved to free one non-rex register
-	as target for the 8bit high operations */
-	mov	%rdi,		%r11
-
-	movq	(R3),	R1
-	movq	8(R3),	R3
-	input_whitening(R1,%r11,a_offset)
-	input_whitening(R3,%r11,c_offset)
-	mov	R1D,	R0D
-	rol	$16,	R0D
-	shr	$32,	R1
-	mov	R3D,	R2D
-	shr	$32,	R3
-	rol	$1,	R3D
-
-	encrypt_round(R0,R1,R2,R3,0);
-	encrypt_round(R2,R3,R0,R1,8);
-	encrypt_round(R0,R1,R2,R3,2*8);
-	encrypt_round(R2,R3,R0,R1,3*8);
-	encrypt_round(R0,R1,R2,R3,4*8);
-	encrypt_round(R2,R3,R0,R1,5*8);
-	encrypt_round(R0,R1,R2,R3,6*8);
-	encrypt_round(R2,R3,R0,R1,7*8);
-	encrypt_round(R0,R1,R2,R3,8*8);
-	encrypt_round(R2,R3,R0,R1,9*8);
-	encrypt_round(R0,R1,R2,R3,10*8);
-	encrypt_round(R2,R3,R0,R1,11*8);
-	encrypt_round(R0,R1,R2,R3,12*8);
-	encrypt_round(R2,R3,R0,R1,13*8);
-	encrypt_round(R0,R1,R2,R3,14*8);
-	encrypt_last_round(R2,R3,R0,R1,15*8);
-
-
-	output_whitening(%r10,%r11,a_offset)
-	movq	%r10,	(%rsi)
-
-	shl	$32,	R1
-	xor	R0,	R1
-
-	output_whitening(R1,%r11,c_offset)
-	movq	R1,	8(%rsi)
-
-	popq	R1
-	movl	$1,%eax
-	ret
-ENDPROC(twofish_enc_blk)
-
-ENTRY(twofish_dec_blk)
-	pushq    R1
-
-	/* %rdi contains the ctx address */
-	/* %rsi contains the output address */
-	/* %rdx contains the input address */
-	/* ctx address is moved to free one non-rex register
-	as target for the 8bit high operations */
-	mov	%rdi,		%r11
-
-	movq	(R3),	R1
-	movq	8(R3),	R3
-	output_whitening(R1,%r11,a_offset)
-	output_whitening(R3,%r11,c_offset)
-	mov	R1D,	R0D
-	shr	$32,	R1
-	rol	$16,	R1D
-	mov	R3D,	R2D
-	shr	$32,	R3
-	rol	$1,	R2D
-
-	decrypt_round(R0,R1,R2,R3,15*8);
-	decrypt_round(R2,R3,R0,R1,14*8);
-	decrypt_round(R0,R1,R2,R3,13*8);
-	decrypt_round(R2,R3,R0,R1,12*8);
-	decrypt_round(R0,R1,R2,R3,11*8);
-	decrypt_round(R2,R3,R0,R1,10*8);
-	decrypt_round(R0,R1,R2,R3,9*8);
-	decrypt_round(R2,R3,R0,R1,8*8);
-	decrypt_round(R0,R1,R2,R3,7*8);
-	decrypt_round(R2,R3,R0,R1,6*8);
-	decrypt_round(R0,R1,R2,R3,5*8);
-	decrypt_round(R2,R3,R0,R1,4*8);
-	decrypt_round(R0,R1,R2,R3,3*8);
-	decrypt_round(R2,R3,R0,R1,2*8);
-	decrypt_round(R0,R1,R2,R3,1*8);
-	decrypt_last_round(R2,R3,R0,R1,0);
-
-	input_whitening(%r10,%r11,a_offset)
-	movq	%r10,	(%rsi)
-
-	shl	$32,	R1
-	xor	R0,	R1
-
-	input_whitening(R1,%r11,c_offset)
-	movq	R1,	8(%rsi)
-
-	popq	R1
-	movl	$1,%eax
-	ret
-ENDPROC(twofish_dec_blk)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
deleted file mode 100644
index bde3e0f85425fc98b50236df01adf838da41c9e0..0000000000000000000000000000000000000000
--- a/arch/x86/entry/entry_32.S
+++ /dev/null
@@ -1,1673 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  Copyright (C) 1991,1992  Linus Torvalds
- *
- * entry_32.S contains the system-call and low-level fault and trap handling routines.
- *
- * Stack layout while running C code:
- *	ptrace needs to have all registers on the stack.
- *	If the order here is changed, it needs to be
- *	updated in fork.c:copy_process(), signal.c:do_signal(),
- *	ptrace.c and ptrace.h
- *
- *	 0(%esp) - %ebx
- *	 4(%esp) - %ecx
- *	 8(%esp) - %edx
- *	 C(%esp) - %esi
- *	10(%esp) - %edi
- *	14(%esp) - %ebp
- *	18(%esp) - %eax
- *	1C(%esp) - %ds
- *	20(%esp) - %es
- *	24(%esp) - %fs
- *	28(%esp) - %gs		saved iff !CONFIG_X86_32_LAZY_GS
- *	2C(%esp) - orig_eax
- *	30(%esp) - %eip
- *	34(%esp) - %cs
- *	38(%esp) - %eflags
- *	3C(%esp) - %oldesp
- *	40(%esp) - %oldss
- */
-
-#include <linux/linkage.h>
-#include <linux/err.h>
-#include <asm/thread_info.h>
-#include <asm/irqflags.h>
-#include <asm/errno.h>
-#include <asm/segment.h>
-#include <asm/smp.h>
-#include <asm/percpu.h>
-#include <asm/processor-flags.h>
-#include <asm/irq_vectors.h>
-#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
-#include <asm/asm.h>
-#include <asm/smap.h>
-#include <asm/frame.h>
-#include <asm/nospec-branch.h>
-
-#include "calling.h"
-
-	.section .entry.text, "ax"
-
-/*
- * We use macros for low-level operations which need to be overridden
- * for paravirtualization.  The following will never clobber any registers:
- *   INTERRUPT_RETURN (aka. "iret")
- *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
- *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
- *
- * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
- * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
- * Allowing a register to be clobbered can shrink the paravirt replacement
- * enough to patch inline, increasing performance.
- */
-
-#ifdef CONFIG_PREEMPTION
-# define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
-#else
-# define preempt_stop(clobbers)
-#endif
-
-.macro TRACE_IRQS_IRET
-#ifdef CONFIG_TRACE_IRQFLAGS
-	testl	$X86_EFLAGS_IF, PT_EFLAGS(%esp)     # interrupts off?
-	jz	1f
-	TRACE_IRQS_ON
-1:
-#endif
-.endm
-
-#define PTI_SWITCH_MASK         (1 << PAGE_SHIFT)
-
-/*
- * User gs save/restore
- *
- * %gs is used for userland TLS and kernel only uses it for stack
- * canary which is required to be at %gs:20 by gcc.  Read the comment
- * at the top of stackprotector.h for more info.
- *
- * Local labels 98 and 99 are used.
- */
-#ifdef CONFIG_X86_32_LAZY_GS
-
- /* unfortunately push/pop can't be no-op */
-.macro PUSH_GS
-	pushl	$0
-.endm
-.macro POP_GS pop=0
-	addl	$(4 + \pop), %esp
-.endm
-.macro POP_GS_EX
-.endm
-
- /* all the rest are no-op */
-.macro PTGS_TO_GS
-.endm
-.macro PTGS_TO_GS_EX
-.endm
-.macro GS_TO_REG reg
-.endm
-.macro REG_TO_PTGS reg
-.endm
-.macro SET_KERNEL_GS reg
-.endm
-
-#else	/* CONFIG_X86_32_LAZY_GS */
-
-.macro PUSH_GS
-	pushl	%gs
-.endm
-
-.macro POP_GS pop=0
-98:	popl	%gs
-  .if \pop <> 0
-	add	$\pop, %esp
-  .endif
-.endm
-.macro POP_GS_EX
-.pushsection .fixup, "ax"
-99:	movl	$0, (%esp)
-	jmp	98b
-.popsection
-	_ASM_EXTABLE(98b, 99b)
-.endm
-
-.macro PTGS_TO_GS
-98:	mov	PT_GS(%esp), %gs
-.endm
-.macro PTGS_TO_GS_EX
-.pushsection .fixup, "ax"
-99:	movl	$0, PT_GS(%esp)
-	jmp	98b
-.popsection
-	_ASM_EXTABLE(98b, 99b)
-.endm
-
-.macro GS_TO_REG reg
-	movl	%gs, \reg
-.endm
-.macro REG_TO_PTGS reg
-	movl	\reg, PT_GS(%esp)
-.endm
-.macro SET_KERNEL_GS reg
-	movl	$(__KERNEL_STACK_CANARY), \reg
-	movl	\reg, %gs
-.endm
-
-#endif /* CONFIG_X86_32_LAZY_GS */
-
-/* Unconditionally switch to user cr3 */
-.macro SWITCH_TO_USER_CR3 scratch_reg:req
-	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
-
-	movl	%cr3, \scratch_reg
-	orl	$PTI_SWITCH_MASK, \scratch_reg
-	movl	\scratch_reg, %cr3
-.Lend_\@:
-.endm
-
-.macro BUG_IF_WRONG_CR3 no_user_check=0
-#ifdef CONFIG_DEBUG_ENTRY
-	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
-	.if \no_user_check == 0
-	/* coming from usermode? */
-	testl	$USER_SEGMENT_RPL_MASK, PT_CS(%esp)
-	jz	.Lend_\@
-	.endif
-	/* On user-cr3? */
-	movl	%cr3, %eax
-	testl	$PTI_SWITCH_MASK, %eax
-	jnz	.Lend_\@
-	/* From userspace with kernel cr3 - BUG */
-	ud2
-.Lend_\@:
-#endif
-.endm
-
-/*
- * Switch to kernel cr3 if not already loaded and return current cr3 in
- * \scratch_reg
- */
-.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
-	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
-	movl	%cr3, \scratch_reg
-	/* Test if we are already on kernel CR3 */
-	testl	$PTI_SWITCH_MASK, \scratch_reg
-	jz	.Lend_\@
-	andl	$(~PTI_SWITCH_MASK), \scratch_reg
-	movl	\scratch_reg, %cr3
-	/* Return original CR3 in \scratch_reg */
-	orl	$PTI_SWITCH_MASK, \scratch_reg
-.Lend_\@:
-.endm
-
-#define CS_FROM_ENTRY_STACK	(1 << 31)
-#define CS_FROM_USER_CR3	(1 << 30)
-#define CS_FROM_KERNEL		(1 << 29)
-#define CS_FROM_ESPFIX		(1 << 28)
-
-.macro FIXUP_FRAME
-	/*
-	 * The high bits of the CS dword (__csh) are used for CS_FROM_*.
-	 * Clear them in case hardware didn't do this for us.
-	 */
-	andl	$0x0000ffff, 4*4(%esp)
-
-#ifdef CONFIG_VM86
-	testl	$X86_EFLAGS_VM, 5*4(%esp)
-	jnz	.Lfrom_usermode_no_fixup_\@
-#endif
-	testl	$USER_SEGMENT_RPL_MASK, 4*4(%esp)
-	jnz	.Lfrom_usermode_no_fixup_\@
-
-	orl	$CS_FROM_KERNEL, 4*4(%esp)
-
-	/*
-	 * When we're here from kernel mode; the (exception) stack looks like:
-	 *
-	 *  6*4(%esp) - <previous context>
-	 *  5*4(%esp) - flags
-	 *  4*4(%esp) - cs
-	 *  3*4(%esp) - ip
-	 *  2*4(%esp) - orig_eax
-	 *  1*4(%esp) - gs / function
-	 *  0*4(%esp) - fs
-	 *
-	 * Lets build a 5 entry IRET frame after that, such that struct pt_regs
-	 * is complete and in particular regs->sp is correct. This gives us
-	 * the original 6 enties as gap:
-	 *
-	 * 14*4(%esp) - <previous context>
-	 * 13*4(%esp) - gap / flags
-	 * 12*4(%esp) - gap / cs
-	 * 11*4(%esp) - gap / ip
-	 * 10*4(%esp) - gap / orig_eax
-	 *  9*4(%esp) - gap / gs / function
-	 *  8*4(%esp) - gap / fs
-	 *  7*4(%esp) - ss
-	 *  6*4(%esp) - sp
-	 *  5*4(%esp) - flags
-	 *  4*4(%esp) - cs
-	 *  3*4(%esp) - ip
-	 *  2*4(%esp) - orig_eax
-	 *  1*4(%esp) - gs / function
-	 *  0*4(%esp) - fs
-	 */
-
-	pushl	%ss		# ss
-	pushl	%esp		# sp (points at ss)
-	addl	$7*4, (%esp)	# point sp back at the previous context
-	pushl	7*4(%esp)	# flags
-	pushl	7*4(%esp)	# cs
-	pushl	7*4(%esp)	# ip
-	pushl	7*4(%esp)	# orig_eax
-	pushl	7*4(%esp)	# gs / function
-	pushl	7*4(%esp)	# fs
-.Lfrom_usermode_no_fixup_\@:
-.endm
-
-.macro IRET_FRAME
-	/*
-	 * We're called with %ds, %es, %fs, and %gs from the interrupted
-	 * frame, so we shouldn't use them.  Also, we may be in ESPFIX
-	 * mode and therefore have a nonzero SS base and an offset ESP,
-	 * so any attempt to access the stack needs to use SS.  (except for
-	 * accesses through %esp, which automatically use SS.)
-	 */
-	testl $CS_FROM_KERNEL, 1*4(%esp)
-	jz .Lfinished_frame_\@
-
-	/*
-	 * Reconstruct the 3 entry IRET frame right after the (modified)
-	 * regs->sp without lowering %esp in between, such that an NMI in the
-	 * middle doesn't scribble our stack.
-	 */
-	pushl	%eax
-	pushl	%ecx
-	movl	5*4(%esp), %eax		# (modified) regs->sp
-
-	movl	4*4(%esp), %ecx		# flags
-	movl	%ecx, %ss:-1*4(%eax)
-
-	movl	3*4(%esp), %ecx		# cs
-	andl	$0x0000ffff, %ecx
-	movl	%ecx, %ss:-2*4(%eax)
-
-	movl	2*4(%esp), %ecx		# ip
-	movl	%ecx, %ss:-3*4(%eax)
-
-	movl	1*4(%esp), %ecx		# eax
-	movl	%ecx, %ss:-4*4(%eax)
-
-	popl	%ecx
-	lea	-4*4(%eax), %esp
-	popl	%eax
-.Lfinished_frame_\@:
-.endm
-
-.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0
-	cld
-.if \skip_gs == 0
-	PUSH_GS
-.endif
-	pushl	%fs
-
-	pushl	%eax
-	movl	$(__KERNEL_PERCPU), %eax
-	movl	%eax, %fs
-.if \unwind_espfix > 0
-	UNWIND_ESPFIX_STACK
-.endif
-	popl	%eax
-
-	FIXUP_FRAME
-	pushl	%es
-	pushl	%ds
-	pushl	\pt_regs_ax
-	pushl	%ebp
-	pushl	%edi
-	pushl	%esi
-	pushl	%edx
-	pushl	%ecx
-	pushl	%ebx
-	movl	$(__USER_DS), %edx
-	movl	%edx, %ds
-	movl	%edx, %es
-.if \skip_gs == 0
-	SET_KERNEL_GS %edx
-.endif
-	/* Switch to kernel stack if necessary */
-.if \switch_stacks > 0
-	SWITCH_TO_KERNEL_STACK
-.endif
-.endm
-
-.macro SAVE_ALL_NMI cr3_reg:req unwind_espfix=0
-	SAVE_ALL unwind_espfix=\unwind_espfix
-
-	BUG_IF_WRONG_CR3
-
-	/*
-	 * Now switch the CR3 when PTI is enabled.
-	 *
-	 * We can enter with either user or kernel cr3, the code will
-	 * store the old cr3 in \cr3_reg and switches to the kernel cr3
-	 * if necessary.
-	 */
-	SWITCH_TO_KERNEL_CR3 scratch_reg=\cr3_reg
-
-.Lend_\@:
-.endm
-
-.macro RESTORE_INT_REGS
-	popl	%ebx
-	popl	%ecx
-	popl	%edx
-	popl	%esi
-	popl	%edi
-	popl	%ebp
-	popl	%eax
-.endm
-
-.macro RESTORE_REGS pop=0
-	RESTORE_INT_REGS
-1:	popl	%ds
-2:	popl	%es
-3:	popl	%fs
-	POP_GS \pop
-	IRET_FRAME
-.pushsection .fixup, "ax"
-4:	movl	$0, (%esp)
-	jmp	1b
-5:	movl	$0, (%esp)
-	jmp	2b
-6:	movl	$0, (%esp)
-	jmp	3b
-.popsection
-	_ASM_EXTABLE(1b, 4b)
-	_ASM_EXTABLE(2b, 5b)
-	_ASM_EXTABLE(3b, 6b)
-	POP_GS_EX
-.endm
-
-.macro RESTORE_ALL_NMI cr3_reg:req pop=0
-	/*
-	 * Now switch the CR3 when PTI is enabled.
-	 *
-	 * We enter with kernel cr3 and switch the cr3 to the value
-	 * stored on \cr3_reg, which is either a user or a kernel cr3.
-	 */
-	ALTERNATIVE "jmp .Lswitched_\@", "", X86_FEATURE_PTI
-
-	testl	$PTI_SWITCH_MASK, \cr3_reg
-	jz	.Lswitched_\@
-
-	/* User cr3 in \cr3_reg - write it to hardware cr3 */
-	movl	\cr3_reg, %cr3
-
-.Lswitched_\@:
-
-	BUG_IF_WRONG_CR3
-
-	RESTORE_REGS pop=\pop
-.endm
-
-.macro CHECK_AND_APPLY_ESPFIX
-#ifdef CONFIG_X86_ESPFIX32
-#define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8)
-#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET
-
-	ALTERNATIVE	"jmp .Lend_\@", "", X86_BUG_ESPFIX
-
-	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS, SS and CS
-	/*
-	 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we
-	 * are returning to the kernel.
-	 * See comments in process.c:copy_thread() for details.
-	 */
-	movb	PT_OLDSS(%esp), %ah
-	movb	PT_CS(%esp), %al
-	andl	$(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
-	cmpl	$((SEGMENT_LDT << 8) | USER_RPL), %eax
-	jne	.Lend_\@	# returning to user-space with LDT SS
-
-	/*
-	 * Setup and switch to ESPFIX stack
-	 *
-	 * We're returning to userspace with a 16 bit stack. The CPU will not
-	 * restore the high word of ESP for us on executing iret... This is an
-	 * "official" bug of all the x86-compatible CPUs, which we can work
-	 * around to make dosemu and wine happy. We do this by preloading the
-	 * high word of ESP with the high word of the userspace ESP while
-	 * compensating for the offset by changing to the ESPFIX segment with
-	 * a base address that matches for the difference.
-	 */
-	mov	%esp, %edx			/* load kernel esp */
-	mov	PT_OLDESP(%esp), %eax		/* load userspace esp */
-	mov	%dx, %ax			/* eax: new kernel esp */
-	sub	%eax, %edx			/* offset (low word is 0) */
-	shr	$16, %edx
-	mov	%dl, GDT_ESPFIX_SS + 4		/* bits 16..23 */
-	mov	%dh, GDT_ESPFIX_SS + 7		/* bits 24..31 */
-	pushl	$__ESPFIX_SS
-	pushl	%eax				/* new kernel esp */
-	/*
-	 * Disable interrupts, but do not irqtrace this section: we
-	 * will soon execute iret and the tracer was already set to
-	 * the irqstate after the IRET:
-	 */
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	lss	(%esp), %esp			/* switch to espfix segment */
-.Lend_\@:
-#endif /* CONFIG_X86_ESPFIX32 */
-.endm
-
-/*
- * Called with pt_regs fully populated and kernel segments loaded,
- * so we can access PER_CPU and use the integer registers.
- *
- * We need to be very careful here with the %esp switch, because an NMI
- * can happen everywhere. If the NMI handler finds itself on the
- * entry-stack, it will overwrite the task-stack and everything we
- * copied there. So allocate the stack-frame on the task-stack and
- * switch to it before we do any copying.
- */
-
-.macro SWITCH_TO_KERNEL_STACK
-
-	ALTERNATIVE     "", "jmp .Lend_\@", X86_FEATURE_XENPV
-
-	BUG_IF_WRONG_CR3
-
-	SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
-
-	/*
-	 * %eax now contains the entry cr3 and we carry it forward in
-	 * that register for the time this macro runs
-	 */
-
-	/* Are we on the entry stack? Bail out if not! */
-	movl	PER_CPU_VAR(cpu_entry_area), %ecx
-	addl	$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
-	subl	%esp, %ecx	/* ecx = (end of entry_stack) - esp */
-	cmpl	$SIZEOF_entry_stack, %ecx
-	jae	.Lend_\@
-
-	/* Load stack pointer into %esi and %edi */
-	movl	%esp, %esi
-	movl	%esi, %edi
-
-	/* Move %edi to the top of the entry stack */
-	andl	$(MASK_entry_stack), %edi
-	addl	$(SIZEOF_entry_stack), %edi
-
-	/* Load top of task-stack into %edi */
-	movl	TSS_entry2task_stack(%edi), %edi
-
-	/* Special case - entry from kernel mode via entry stack */
-#ifdef CONFIG_VM86
-	movl	PT_EFLAGS(%esp), %ecx		# mix EFLAGS and CS
-	movb	PT_CS(%esp), %cl
-	andl	$(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %ecx
-#else
-	movl	PT_CS(%esp), %ecx
-	andl	$SEGMENT_RPL_MASK, %ecx
-#endif
-	cmpl	$USER_RPL, %ecx
-	jb	.Lentry_from_kernel_\@
-
-	/* Bytes to copy */
-	movl	$PTREGS_SIZE, %ecx
-
-#ifdef CONFIG_VM86
-	testl	$X86_EFLAGS_VM, PT_EFLAGS(%esi)
-	jz	.Lcopy_pt_regs_\@
-
-	/*
-	 * Stack-frame contains 4 additional segment registers when
-	 * coming from VM86 mode
-	 */
-	addl	$(4 * 4), %ecx
-
-#endif
-.Lcopy_pt_regs_\@:
-
-	/* Allocate frame on task-stack */
-	subl	%ecx, %edi
-
-	/* Switch to task-stack */
-	movl	%edi, %esp
-
-	/*
-	 * We are now on the task-stack and can safely copy over the
-	 * stack-frame
-	 */
-	shrl	$2, %ecx
-	cld
-	rep movsl
-
-	jmp .Lend_\@
-
-.Lentry_from_kernel_\@:
-
-	/*
-	 * This handles the case when we enter the kernel from
-	 * kernel-mode and %esp points to the entry-stack. When this
-	 * happens we need to switch to the task-stack to run C code,
-	 * but switch back to the entry-stack again when we approach
-	 * iret and return to the interrupted code-path. This usually
-	 * happens when we hit an exception while restoring user-space
-	 * segment registers on the way back to user-space or when the
-	 * sysenter handler runs with eflags.tf set.
-	 *
-	 * When we switch to the task-stack here, we can't trust the
-	 * contents of the entry-stack anymore, as the exception handler
-	 * might be scheduled out or moved to another CPU. Therefore we
-	 * copy the complete entry-stack to the task-stack and set a
-	 * marker in the iret-frame (bit 31 of the CS dword) to detect
-	 * what we've done on the iret path.
-	 *
-	 * On the iret path we copy everything back and switch to the
-	 * entry-stack, so that the interrupted kernel code-path
-	 * continues on the same stack it was interrupted with.
-	 *
-	 * Be aware that an NMI can happen anytime in this code.
-	 *
-	 * %esi: Entry-Stack pointer (same as %esp)
-	 * %edi: Top of the task stack
-	 * %eax: CR3 on kernel entry
-	 */
-
-	/* Calculate number of bytes on the entry stack in %ecx */
-	movl	%esi, %ecx
-
-	/* %ecx to the top of entry-stack */
-	andl	$(MASK_entry_stack), %ecx
-	addl	$(SIZEOF_entry_stack), %ecx
-
-	/* Number of bytes on the entry stack to %ecx */
-	sub	%esi, %ecx
-
-	/* Mark stackframe as coming from entry stack */
-	orl	$CS_FROM_ENTRY_STACK, PT_CS(%esp)
-
-	/*
-	 * Test the cr3 used to enter the kernel and add a marker
-	 * so that we can switch back to it before iret.
-	 */
-	testl	$PTI_SWITCH_MASK, %eax
-	jz	.Lcopy_pt_regs_\@
-	orl	$CS_FROM_USER_CR3, PT_CS(%esp)
-
-	/*
-	 * %esi and %edi are unchanged, %ecx contains the number of
-	 * bytes to copy. The code at .Lcopy_pt_regs_\@ will allocate
-	 * the stack-frame on task-stack and copy everything over
-	 */
-	jmp .Lcopy_pt_regs_\@
-
-.Lend_\@:
-.endm
-
-/*
- * Switch back from the kernel stack to the entry stack.
- *
- * The %esp register must point to pt_regs on the task stack. It will
- * first calculate the size of the stack-frame to copy, depending on
- * whether we return to VM86 mode or not. With that it uses 'rep movsl'
- * to copy the contents of the stack over to the entry stack.
- *
- * We must be very careful here, as we can't trust the contents of the
- * task-stack once we switched to the entry-stack. When an NMI happens
- * while on the entry-stack, the NMI handler will switch back to the top
- * of the task stack, overwriting our stack-frame we are about to copy.
- * Therefore we switch the stack only after everything is copied over.
- */
-.macro SWITCH_TO_ENTRY_STACK
-
-	ALTERNATIVE     "", "jmp .Lend_\@", X86_FEATURE_XENPV
-
-	/* Bytes to copy */
-	movl	$PTREGS_SIZE, %ecx
-
-#ifdef CONFIG_VM86
-	testl	$(X86_EFLAGS_VM), PT_EFLAGS(%esp)
-	jz	.Lcopy_pt_regs_\@
-
-	/* Additional 4 registers to copy when returning to VM86 mode */
-	addl    $(4 * 4), %ecx
-
-.Lcopy_pt_regs_\@:
-#endif
-
-	/* Initialize source and destination for movsl */
-	movl	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %edi
-	subl	%ecx, %edi
-	movl	%esp, %esi
-
-	/* Save future stack pointer in %ebx */
-	movl	%edi, %ebx
-
-	/* Copy over the stack-frame */
-	shrl	$2, %ecx
-	cld
-	rep movsl
-
-	/*
-	 * Switch to entry-stack - needs to happen after everything is
-	 * copied because the NMI handler will overwrite the task-stack
-	 * when on entry-stack
-	 */
-	movl	%ebx, %esp
-
-.Lend_\@:
-.endm
-
-/*
- * This macro handles the case when we return to kernel-mode on the iret
- * path and have to switch back to the entry stack and/or user-cr3
- *
- * See the comments below the .Lentry_from_kernel_\@ label in the
- * SWITCH_TO_KERNEL_STACK macro for more details.
- */
-.macro PARANOID_EXIT_TO_KERNEL_MODE
-
-	/*
-	 * Test if we entered the kernel with the entry-stack. Most
-	 * likely we did not, because this code only runs on the
-	 * return-to-kernel path.
-	 */
-	testl	$CS_FROM_ENTRY_STACK, PT_CS(%esp)
-	jz	.Lend_\@
-
-	/* Unlikely slow-path */
-
-	/* Clear marker from stack-frame */
-	andl	$(~CS_FROM_ENTRY_STACK), PT_CS(%esp)
-
-	/* Copy the remaining task-stack contents to entry-stack */
-	movl	%esp, %esi
-	movl	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %edi
-
-	/* Bytes on the task-stack to ecx */
-	movl	PER_CPU_VAR(cpu_tss_rw + TSS_sp1), %ecx
-	subl	%esi, %ecx
-
-	/* Allocate stack-frame on entry-stack */
-	subl	%ecx, %edi
-
-	/*
-	 * Save future stack-pointer, we must not switch until the
-	 * copy is done, otherwise the NMI handler could destroy the
-	 * contents of the task-stack we are about to copy.
-	 */
-	movl	%edi, %ebx
-
-	/* Do the copy */
-	shrl	$2, %ecx
-	cld
-	rep movsl
-
-	/* Safe to switch to entry-stack now */
-	movl	%ebx, %esp
-
-	/*
-	 * We came from entry-stack and need to check if we also need to
-	 * switch back to user cr3.
-	 */
-	testl	$CS_FROM_USER_CR3, PT_CS(%esp)
-	jz	.Lend_\@
-
-	/* Clear marker from stack-frame */
-	andl	$(~CS_FROM_USER_CR3), PT_CS(%esp)
-
-	SWITCH_TO_USER_CR3 scratch_reg=%eax
-
-.Lend_\@:
-.endm
-/*
- * %eax: prev task
- * %edx: next task
- */
-ENTRY(__switch_to_asm)
-	/*
-	 * Save callee-saved registers
-	 * This must match the order in struct inactive_task_frame
-	 */
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	pushfl
-
-	/* switch stack */
-	movl	%esp, TASK_threadsp(%eax)
-	movl	TASK_threadsp(%edx), %esp
-
-#ifdef CONFIG_STACKPROTECTOR
-	movl	TASK_stack_canary(%edx), %ebx
-	movl	%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
-#endif
-
-#ifdef CONFIG_RETPOLINE
-	/*
-	 * When switching from a shallower to a deeper call stack
-	 * the RSB may either underflow or use entries populated
-	 * with userspace addresses. On CPUs where those concerns
-	 * exist, overwrite the RSB with entries which capture
-	 * speculative execution to prevent attack.
-	 */
-	FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
-#endif
-
-	/* restore callee-saved registers */
-	popfl
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-
-	jmp	__switch_to
-END(__switch_to_asm)
-
-/*
- * The unwinder expects the last frame on the stack to always be at the same
- * offset from the end of the page, which allows it to validate the stack.
- * Calling schedule_tail() directly would break that convention because its an
- * asmlinkage function so its argument has to be pushed on the stack.  This
- * wrapper creates a proper "end of stack" frame header before the call.
- */
-ENTRY(schedule_tail_wrapper)
-	FRAME_BEGIN
-
-	pushl	%eax
-	call	schedule_tail
-	popl	%eax
-
-	FRAME_END
-	ret
-ENDPROC(schedule_tail_wrapper)
-/*
- * A newly forked process directly context switches into this address.
- *
- * eax: prev task we switched from
- * ebx: kernel thread func (NULL for user thread)
- * edi: kernel thread arg
- */
-ENTRY(ret_from_fork)
-	call	schedule_tail_wrapper
-
-	testl	%ebx, %ebx
-	jnz	1f		/* kernel threads are uncommon */
-
-2:
-	/* When we fork, we trace the syscall return in the child, too. */
-	movl    %esp, %eax
-	call    syscall_return_slowpath
-	STACKLEAK_ERASE
-	jmp     restore_all
-
-	/* kernel thread */
-1:	movl	%edi, %eax
-	CALL_NOSPEC %ebx
-	/*
-	 * A kernel thread is allowed to return here after successfully
-	 * calling do_execve().  Exit to userspace to complete the execve()
-	 * syscall.
-	 */
-	movl	$0, PT_EAX(%esp)
-	jmp	2b
-END(ret_from_fork)
-
-/*
- * Return to user mode is not as complex as all this looks,
- * but we want the default path for a system call return to
- * go as quickly as possible which is why some of this is
- * less clear than it otherwise should be.
- */
-
-	# userspace resumption stub bypassing syscall exit tracing
-	ALIGN
-ret_from_exception:
-	preempt_stop(CLBR_ANY)
-ret_from_intr:
-#ifdef CONFIG_VM86
-	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS and CS
-	movb	PT_CS(%esp), %al
-	andl	$(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
-#else
-	/*
-	 * We can be coming here from child spawned by kernel_thread().
-	 */
-	movl	PT_CS(%esp), %eax
-	andl	$SEGMENT_RPL_MASK, %eax
-#endif
-	cmpl	$USER_RPL, %eax
-	jb	restore_all_kernel		# not returning to v8086 or userspace
-
-ENTRY(resume_userspace)
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF
-	movl	%esp, %eax
-	call	prepare_exit_to_usermode
-	jmp	restore_all
-END(ret_from_exception)
-
-GLOBAL(__begin_SYSENTER_singlestep_region)
-/*
- * All code from here through __end_SYSENTER_singlestep_region is subject
- * to being single-stepped if a user program sets TF and executes SYSENTER.
- * There is absolutely nothing that we can do to prevent this from happening
- * (thanks Intel!).  To keep our handling of this situation as simple as
- * possible, we handle TF just like AC and NT, except that our #DB handler
- * will ignore all of the single-step traps generated in this range.
- */
-
-#ifdef CONFIG_XEN_PV
-/*
- * Xen doesn't set %esp to be precisely what the normal SYSENTER
- * entry point expects, so fix it up before using the normal path.
- */
-SYM_CODE_START(xen_sysenter_target)
-	addl	$5*4, %esp			/* remove xen-provided frame */
-	jmp	.Lsysenter_past_esp
-SYM_CODE_END(xen_sysenter_target)
-#endif
-
-/*
- * 32-bit SYSENTER entry.
- *
- * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
- * if X86_FEATURE_SEP is available.  This is the preferred system call
- * entry on 32-bit systems.
- *
- * The SYSENTER instruction, in principle, should *only* occur in the
- * vDSO.  In practice, a small number of Android devices were shipped
- * with a copy of Bionic that inlined a SYSENTER instruction.  This
- * never happened in any of Google's Bionic versions -- it only happened
- * in a narrow range of Intel-provided versions.
- *
- * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs.
- * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
- * SYSENTER does not save anything on the stack,
- * and does not save old EIP (!!!), ESP, or EFLAGS.
- *
- * To avoid losing track of EFLAGS.VM (and thus potentially corrupting
- * user and/or vm86 state), we explicitly disable the SYSENTER
- * instruction in vm86 mode by reprogramming the MSRs.
- *
- * Arguments:
- * eax  system call number
- * ebx  arg1
- * ecx  arg2
- * edx  arg3
- * esi  arg4
- * edi  arg5
- * ebp  user stack
- * 0(%ebp) arg6
- */
-ENTRY(entry_SYSENTER_32)
-	/*
-	 * On entry-stack with all userspace-regs live - save and
-	 * restore eflags and %eax to use it as scratch-reg for the cr3
-	 * switch.
-	 */
-	pushfl
-	pushl	%eax
-	BUG_IF_WRONG_CR3 no_user_check=1
-	SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
-	popl	%eax
-	popfl
-
-	/* Stack empty again, switch to task stack */
-	movl	TSS_entry2task_stack(%esp), %esp
-
-.Lsysenter_past_esp:
-	pushl	$__USER_DS		/* pt_regs->ss */
-	pushl	%ebp			/* pt_regs->sp (stashed in bp) */
-	pushfl				/* pt_regs->flags (except IF = 0) */
-	orl	$X86_EFLAGS_IF, (%esp)	/* Fix IF */
-	pushl	$__USER_CS		/* pt_regs->cs */
-	pushl	$0			/* pt_regs->ip = 0 (placeholder) */
-	pushl	%eax			/* pt_regs->orig_ax */
-	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest, stack already switched */
-
-	/*
-	 * SYSENTER doesn't filter flags, so we need to clear NT, AC
-	 * and TF ourselves.  To save a few cycles, we can check whether
-	 * either was set instead of doing an unconditional popfq.
-	 * This needs to happen before enabling interrupts so that
-	 * we don't get preempted with NT set.
-	 *
-	 * If TF is set, we will single-step all the way to here -- do_debug
-	 * will ignore all the traps.  (Yes, this is slow, but so is
-	 * single-stepping in general.  This allows us to avoid having
-	 * a more complicated code to handle the case where a user program
-	 * forces us to single-step through the SYSENTER entry code.)
-	 *
-	 * NB.: .Lsysenter_fix_flags is a label with the code under it moved
-	 * out-of-line as an optimization: NT is unlikely to be set in the
-	 * majority of the cases and instead of polluting the I$ unnecessarily,
-	 * we're keeping that code behind a branch which will predict as
-	 * not-taken and therefore its instructions won't be fetched.
-	 */
-	testl	$X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp)
-	jnz	.Lsysenter_fix_flags
-.Lsysenter_flags_fixed:
-
-	/*
-	 * User mode is traced as though IRQs are on, and SYSENTER
-	 * turned them off.
-	 */
-	TRACE_IRQS_OFF
-
-	movl	%esp, %eax
-	call	do_fast_syscall_32
-	/* XEN PV guests always use IRET path */
-	ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
-		    "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
-
-	STACKLEAK_ERASE
-
-/* Opportunistic SYSEXIT */
-	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
-
-	/*
-	 * Setup entry stack - we keep the pointer in %eax and do the
-	 * switch after almost all user-state is restored.
-	 */
-
-	/* Load entry stack pointer and allocate frame for eflags/eax */
-	movl	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %eax
-	subl	$(2*4), %eax
-
-	/* Copy eflags and eax to entry stack */
-	movl	PT_EFLAGS(%esp), %edi
-	movl	PT_EAX(%esp), %esi
-	movl	%edi, (%eax)
-	movl	%esi, 4(%eax)
-
-	/* Restore user registers and segments */
-	movl	PT_EIP(%esp), %edx	/* pt_regs->ip */
-	movl	PT_OLDESP(%esp), %ecx	/* pt_regs->sp */
-1:	mov	PT_FS(%esp), %fs
-	PTGS_TO_GS
-
-	popl	%ebx			/* pt_regs->bx */
-	addl	$2*4, %esp		/* skip pt_regs->cx and pt_regs->dx */
-	popl	%esi			/* pt_regs->si */
-	popl	%edi			/* pt_regs->di */
-	popl	%ebp			/* pt_regs->bp */
-
-	/* Switch to entry stack */
-	movl	%eax, %esp
-
-	/* Now ready to switch the cr3 */
-	SWITCH_TO_USER_CR3 scratch_reg=%eax
-
-	/*
-	 * Restore all flags except IF. (We restore IF separately because
-	 * STI gives a one-instruction window in which we won't be interrupted,
-	 * whereas POPF does not.)
-	 */
-	btrl	$X86_EFLAGS_IF_BIT, (%esp)
-	BUG_IF_WRONG_CR3 no_user_check=1
-	popfl
-	popl	%eax
-
-	/*
-	 * Return back to the vDSO, which will pop ecx and edx.
-	 * Don't bother with DS and ES (they already contain __USER_DS).
-	 */
-	sti
-	sysexit
-
-.pushsection .fixup, "ax"
-2:	movl	$0, PT_FS(%esp)
-	jmp	1b
-.popsection
-	_ASM_EXTABLE(1b, 2b)
-	PTGS_TO_GS_EX
-
-.Lsysenter_fix_flags:
-	pushl	$X86_EFLAGS_FIXED
-	popfl
-	jmp	.Lsysenter_flags_fixed
-GLOBAL(__end_SYSENTER_singlestep_region)
-ENDPROC(entry_SYSENTER_32)
-
-/*
- * 32-bit legacy system call entry.
- *
- * 32-bit x86 Linux system calls traditionally used the INT $0x80
- * instruction.  INT $0x80 lands here.
- *
- * This entry point can be used by any 32-bit perform system calls.
- * Instances of INT $0x80 can be found inline in various programs and
- * libraries.  It is also used by the vDSO's __kernel_vsyscall
- * fallback for hardware that doesn't support a faster entry method.
- * Restarted 32-bit system calls also fall back to INT $0x80
- * regardless of what instruction was originally used to do the system
- * call.  (64-bit programs can use INT $0x80 as well, but they can
- * only run on 64-bit kernels and therefore land in
- * entry_INT80_compat.)
- *
- * This is considered a slow path.  It is not used by most libc
- * implementations on modern hardware except during process startup.
- *
- * Arguments:
- * eax  system call number
- * ebx  arg1
- * ecx  arg2
- * edx  arg3
- * esi  arg4
- * edi  arg5
- * ebp  arg6
- */
-ENTRY(entry_INT80_32)
-	ASM_CLAC
-	pushl	%eax			/* pt_regs->orig_ax */
-
-	SAVE_ALL pt_regs_ax=$-ENOSYS switch_stacks=1	/* save rest */
-
-	/*
-	 * User mode is traced as though IRQs are on, and the interrupt gate
-	 * turned them off.
-	 */
-	TRACE_IRQS_OFF
-
-	movl	%esp, %eax
-	call	do_int80_syscall_32
-.Lsyscall_32_done:
-
-	STACKLEAK_ERASE
-
-restore_all:
-	TRACE_IRQS_IRET
-	SWITCH_TO_ENTRY_STACK
-.Lrestore_all_notrace:
-	CHECK_AND_APPLY_ESPFIX
-.Lrestore_nocheck:
-	/* Switch back to user CR3 */
-	SWITCH_TO_USER_CR3 scratch_reg=%eax
-
-	BUG_IF_WRONG_CR3
-
-	/* Restore user state */
-	RESTORE_REGS pop=4			# skip orig_eax/error_code
-.Lirq_return:
-	/*
-	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
-	 * when returning from IPI handler and when returning from
-	 * scheduler to user-space.
-	 */
-	INTERRUPT_RETURN
-
-restore_all_kernel:
-#ifdef CONFIG_PREEMPTION
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	cmpl	$0, PER_CPU_VAR(__preempt_count)
-	jnz	.Lno_preempt
-	testl	$X86_EFLAGS_IF, PT_EFLAGS(%esp)	# interrupts off (exception path) ?
-	jz	.Lno_preempt
-	call	preempt_schedule_irq
-.Lno_preempt:
-#endif
-	TRACE_IRQS_IRET
-	PARANOID_EXIT_TO_KERNEL_MODE
-	BUG_IF_WRONG_CR3
-	RESTORE_REGS 4
-	jmp	.Lirq_return
-
-.section .fixup, "ax"
-ENTRY(iret_exc	)
-	pushl	$0				# no error code
-	pushl	$do_iret_error
-
-#ifdef CONFIG_DEBUG_ENTRY
-	/*
-	 * The stack-frame here is the one that iret faulted on, so its a
-	 * return-to-user frame. We are on kernel-cr3 because we come here from
-	 * the fixup code. This confuses the CR3 checker, so switch to user-cr3
-	 * as the checker expects it.
-	 */
-	pushl	%eax
-	SWITCH_TO_USER_CR3 scratch_reg=%eax
-	popl	%eax
-#endif
-
-	jmp	common_exception
-.previous
-	_ASM_EXTABLE(.Lirq_return, iret_exc)
-ENDPROC(entry_INT80_32)
-
-.macro FIXUP_ESPFIX_STACK
-/*
- * Switch back for ESPFIX stack to the normal zerobased stack
- *
- * We can't call C functions using the ESPFIX stack. This code reads
- * the high word of the segment base from the GDT and swiches to the
- * normal stack and adjusts ESP with the matching offset.
- *
- * We might be on user CR3 here, so percpu data is not mapped and we can't
- * access the GDT through the percpu segment.  Instead, use SGDT to find
- * the cpu_entry_area alias of the GDT.
- */
-#ifdef CONFIG_X86_ESPFIX32
-	/* fixup the stack */
-	pushl	%ecx
-	subl	$2*4, %esp
-	sgdt	(%esp)
-	movl	2(%esp), %ecx				/* GDT address */
-	/*
-	 * Careful: ECX is a linear pointer, so we need to force base
-	 * zero.  %cs is the only known-linear segment we have right now.
-	 */
-	mov	%cs:GDT_ESPFIX_OFFSET + 4(%ecx), %al	/* bits 16..23 */
-	mov	%cs:GDT_ESPFIX_OFFSET + 7(%ecx), %ah	/* bits 24..31 */
-	shl	$16, %eax
-	addl	$2*4, %esp
-	popl	%ecx
-	addl	%esp, %eax			/* the adjusted stack pointer */
-	pushl	$__KERNEL_DS
-	pushl	%eax
-	lss	(%esp), %esp			/* switch to the normal stack segment */
-#endif
-.endm
-
-.macro UNWIND_ESPFIX_STACK
-	/* It's safe to clobber %eax, all other regs need to be preserved */
-#ifdef CONFIG_X86_ESPFIX32
-	movl	%ss, %eax
-	/* see if on espfix stack */
-	cmpw	$__ESPFIX_SS, %ax
-	jne	.Lno_fixup_\@
-	/* switch to normal stack */
-	FIXUP_ESPFIX_STACK
-.Lno_fixup_\@:
-#endif
-.endm
-
-/*
- * Build the entry stubs with some assembler magic.
- * We pack 1 stub into every 8-byte block.
- */
-	.align 8
-ENTRY(irq_entries_start)
-    vector=FIRST_EXTERNAL_VECTOR
-    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
-	pushl	$(~vector+0x80)			/* Note: always in signed byte range */
-    vector=vector+1
-	jmp	common_interrupt
-	.align	8
-    .endr
-END(irq_entries_start)
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	.align 8
-ENTRY(spurious_entries_start)
-    vector=FIRST_SYSTEM_VECTOR
-    .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
-	pushl	$(~vector+0x80)			/* Note: always in signed byte range */
-    vector=vector+1
-	jmp	common_spurious
-	.align	8
-    .endr
-END(spurious_entries_start)
-
-common_spurious:
-	ASM_CLAC
-	addl	$-0x80, (%esp)			/* Adjust vector into the [-256, -1] range */
-	SAVE_ALL switch_stacks=1
-	ENCODE_FRAME_POINTER
-	TRACE_IRQS_OFF
-	movl	%esp, %eax
-	call	smp_spurious_interrupt
-	jmp	ret_from_intr
-ENDPROC(common_spurious)
-#endif
-
-/*
- * the CPU automatically disables interrupts when executing an IRQ vector,
- * so IRQ-flags tracing has to follow that:
- */
-	.p2align CONFIG_X86_L1_CACHE_SHIFT
-common_interrupt:
-	ASM_CLAC
-	addl	$-0x80, (%esp)			/* Adjust vector into the [-256, -1] range */
-
-	SAVE_ALL switch_stacks=1
-	ENCODE_FRAME_POINTER
-	TRACE_IRQS_OFF
-	movl	%esp, %eax
-	call	do_IRQ
-	jmp	ret_from_intr
-ENDPROC(common_interrupt)
-
-#define BUILD_INTERRUPT3(name, nr, fn)			\
-ENTRY(name)						\
-	ASM_CLAC;					\
-	pushl	$~(nr);					\
-	SAVE_ALL switch_stacks=1;			\
-	ENCODE_FRAME_POINTER;				\
-	TRACE_IRQS_OFF					\
-	movl	%esp, %eax;				\
-	call	fn;					\
-	jmp	ret_from_intr;				\
-ENDPROC(name)
-
-#define BUILD_INTERRUPT(name, nr)		\
-	BUILD_INTERRUPT3(name, nr, smp_##name);	\
-
-/* The include is where all of the SMP etc. interrupts come from */
-#include <asm/entry_arch.h>
-
-ENTRY(coprocessor_error)
-	ASM_CLAC
-	pushl	$0
-	pushl	$do_coprocessor_error
-	jmp	common_exception
-END(coprocessor_error)
-
-ENTRY(simd_coprocessor_error)
-	ASM_CLAC
-	pushl	$0
-#ifdef CONFIG_X86_INVD_BUG
-	/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
-	ALTERNATIVE "pushl	$do_general_protection",	\
-		    "pushl	$do_simd_coprocessor_error",	\
-		    X86_FEATURE_XMM
-#else
-	pushl	$do_simd_coprocessor_error
-#endif
-	jmp	common_exception
-END(simd_coprocessor_error)
-
-ENTRY(device_not_available)
-	ASM_CLAC
-	pushl	$-1				# mark this as an int
-	pushl	$do_device_not_available
-	jmp	common_exception
-END(device_not_available)
-
-#ifdef CONFIG_PARAVIRT
-ENTRY(native_iret)
-	iret
-	_ASM_EXTABLE(native_iret, iret_exc)
-END(native_iret)
-#endif
-
-ENTRY(overflow)
-	ASM_CLAC
-	pushl	$0
-	pushl	$do_overflow
-	jmp	common_exception
-END(overflow)
-
-ENTRY(bounds)
-	ASM_CLAC
-	pushl	$0
-	pushl	$do_bounds
-	jmp	common_exception
-END(bounds)
-
-ENTRY(invalid_op)
-	ASM_CLAC
-	pushl	$0
-	pushl	$do_invalid_op
-	jmp	common_exception
-END(invalid_op)
-
-ENTRY(coprocessor_segment_overrun)
-	ASM_CLAC
-	pushl	$0
-	pushl	$do_coprocessor_segment_overrun
-	jmp	common_exception
-END(coprocessor_segment_overrun)
-
-ENTRY(invalid_TSS)
-	ASM_CLAC
-	pushl	$do_invalid_TSS
-	jmp	common_exception
-END(invalid_TSS)
-
-ENTRY(segment_not_present)
-	ASM_CLAC
-	pushl	$do_segment_not_present
-	jmp	common_exception
-END(segment_not_present)
-
-ENTRY(stack_segment)
-	ASM_CLAC
-	pushl	$do_stack_segment
-	jmp	common_exception
-END(stack_segment)
-
-ENTRY(alignment_check)
-	ASM_CLAC
-	pushl	$do_alignment_check
-	jmp	common_exception
-END(alignment_check)
-
-ENTRY(divide_error)
-	ASM_CLAC
-	pushl	$0				# no error code
-	pushl	$do_divide_error
-	jmp	common_exception
-END(divide_error)
-
-#ifdef CONFIG_X86_MCE
-ENTRY(machine_check)
-	ASM_CLAC
-	pushl	$0
-	pushl	machine_check_vector
-	jmp	common_exception
-END(machine_check)
-#endif
-
-ENTRY(spurious_interrupt_bug)
-	ASM_CLAC
-	pushl	$0
-	pushl	$do_spurious_interrupt_bug
-	jmp	common_exception
-END(spurious_interrupt_bug)
-
-#ifdef CONFIG_XEN_PV
-ENTRY(xen_hypervisor_callback)
-	/*
-	 * Check to see if we got the event in the critical
-	 * region in xen_iret_direct, after we've reenabled
-	 * events and checked for pending events.  This simulates
-	 * iret instruction's behaviour where it delivers a
-	 * pending interrupt when enabling interrupts:
-	 */
-	cmpl	$xen_iret_start_crit, (%esp)
-	jb	1f
-	cmpl	$xen_iret_end_crit, (%esp)
-	jae	1f
-	call	xen_iret_crit_fixup
-1:
-	pushl	$-1				/* orig_ax = -1 => not a system call */
-	SAVE_ALL
-	ENCODE_FRAME_POINTER
-	TRACE_IRQS_OFF
-	mov	%esp, %eax
-	call	xen_evtchn_do_upcall
-#ifndef CONFIG_PREEMPTION
-	call	xen_maybe_preempt_hcall
-#endif
-	jmp	ret_from_intr
-ENDPROC(xen_hypervisor_callback)
-
-/*
- * Hypervisor uses this for application faults while it executes.
- * We get here for two reasons:
- *  1. Fault while reloading DS, ES, FS or GS
- *  2. Fault while executing IRET
- * Category 1 we fix up by reattempting the load, and zeroing the segment
- * register if the load fails.
- * Category 2 we fix up by jumping to do_iret_error. We cannot use the
- * normal Linux return path in this case because if we use the IRET hypercall
- * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
- * We distinguish between categories by maintaining a status value in EAX.
- */
-ENTRY(xen_failsafe_callback)
-	pushl	%eax
-	movl	$1, %eax
-1:	mov	4(%esp), %ds
-2:	mov	8(%esp), %es
-3:	mov	12(%esp), %fs
-4:	mov	16(%esp), %gs
-	/* EAX == 0 => Category 1 (Bad segment)
-	   EAX != 0 => Category 2 (Bad IRET) */
-	testl	%eax, %eax
-	popl	%eax
-	lea	16(%esp), %esp
-	jz	5f
-	jmp	iret_exc
-5:	pushl	$-1				/* orig_ax = -1 => not a system call */
-	SAVE_ALL
-	ENCODE_FRAME_POINTER
-	jmp	ret_from_exception
-
-.section .fixup, "ax"
-6:	xorl	%eax, %eax
-	movl	%eax, 4(%esp)
-	jmp	1b
-7:	xorl	%eax, %eax
-	movl	%eax, 8(%esp)
-	jmp	2b
-8:	xorl	%eax, %eax
-	movl	%eax, 12(%esp)
-	jmp	3b
-9:	xorl	%eax, %eax
-	movl	%eax, 16(%esp)
-	jmp	4b
-.previous
-	_ASM_EXTABLE(1b, 6b)
-	_ASM_EXTABLE(2b, 7b)
-	_ASM_EXTABLE(3b, 8b)
-	_ASM_EXTABLE(4b, 9b)
-ENDPROC(xen_failsafe_callback)
-#endif /* CONFIG_XEN_PV */
-
-#ifdef CONFIG_XEN_PVHVM
-BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
-		 xen_evtchn_do_upcall)
-#endif
-
-
-#if IS_ENABLED(CONFIG_HYPERV)
-
-BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
-		 hyperv_vector_handler)
-
-BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR,
-		 hyperv_reenlightenment_intr)
-
-BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
-		 hv_stimer0_vector_handler)
-
-#endif /* CONFIG_HYPERV */
-
-ENTRY(page_fault)
-	ASM_CLAC
-	pushl	$do_page_fault
-	jmp	common_exception_read_cr2
-END(page_fault)
-
-common_exception_read_cr2:
-	/* the function address is in %gs's slot on the stack */
-	SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
-
-	ENCODE_FRAME_POINTER
-
-	/* fixup %gs */
-	GS_TO_REG %ecx
-	movl	PT_GS(%esp), %edi
-	REG_TO_PTGS %ecx
-	SET_KERNEL_GS %ecx
-
-	GET_CR2_INTO(%ecx)			# might clobber %eax
-
-	/* fixup orig %eax */
-	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
-	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
-
-	TRACE_IRQS_OFF
-	movl	%esp, %eax			# pt_regs pointer
-	CALL_NOSPEC %edi
-	jmp	ret_from_exception
-END(common_exception_read_cr2)
-
-common_exception:
-	/* the function address is in %gs's slot on the stack */
-	SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
-	ENCODE_FRAME_POINTER
-
-	/* fixup %gs */
-	GS_TO_REG %ecx
-	movl	PT_GS(%esp), %edi		# get the function address
-	REG_TO_PTGS %ecx
-	SET_KERNEL_GS %ecx
-
-	/* fixup orig %eax */
-	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
-	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
-
-	TRACE_IRQS_OFF
-	movl	%esp, %eax			# pt_regs pointer
-	CALL_NOSPEC %edi
-	jmp	ret_from_exception
-END(common_exception)
-
-ENTRY(debug)
-	/*
-	 * Entry from sysenter is now handled in common_exception
-	 */
-	ASM_CLAC
-	pushl	$-1				# mark this as an int
-	pushl	$do_debug
-	jmp	common_exception
-END(debug)
-
-/*
- * NMI is doubly nasty.  It can happen on the first instruction of
- * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
- * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32
- * switched stacks.  We handle both conditions by simply checking whether we
- * interrupted kernel code running on the SYSENTER stack.
- */
-ENTRY(nmi)
-	ASM_CLAC
-
-#ifdef CONFIG_X86_ESPFIX32
-	/*
-	 * ESPFIX_SS is only ever set on the return to user path
-	 * after we've switched to the entry stack.
-	 */
-	pushl	%eax
-	movl	%ss, %eax
-	cmpw	$__ESPFIX_SS, %ax
-	popl	%eax
-	je	.Lnmi_espfix_stack
-#endif
-
-	pushl	%eax				# pt_regs->orig_ax
-	SAVE_ALL_NMI cr3_reg=%edi
-	ENCODE_FRAME_POINTER
-	xorl	%edx, %edx			# zero error code
-	movl	%esp, %eax			# pt_regs pointer
-
-	/* Are we currently on the SYSENTER stack? */
-	movl	PER_CPU_VAR(cpu_entry_area), %ecx
-	addl	$CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
-	subl	%eax, %ecx	/* ecx = (end of entry_stack) - esp */
-	cmpl	$SIZEOF_entry_stack, %ecx
-	jb	.Lnmi_from_sysenter_stack
-
-	/* Not on SYSENTER stack. */
-	call	do_nmi
-	jmp	.Lnmi_return
-
-.Lnmi_from_sysenter_stack:
-	/*
-	 * We're on the SYSENTER stack.  Switch off.  No one (not even debug)
-	 * is using the thread stack right now, so it's safe for us to use it.
-	 */
-	movl	%esp, %ebx
-	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
-	call	do_nmi
-	movl	%ebx, %esp
-
-.Lnmi_return:
-#ifdef CONFIG_X86_ESPFIX32
-	testl	$CS_FROM_ESPFIX, PT_CS(%esp)
-	jnz	.Lnmi_from_espfix
-#endif
-
-	CHECK_AND_APPLY_ESPFIX
-	RESTORE_ALL_NMI cr3_reg=%edi pop=4
-	jmp	.Lirq_return
-
-#ifdef CONFIG_X86_ESPFIX32
-.Lnmi_espfix_stack:
-	/*
-	 * Create the pointer to LSS back
-	 */
-	pushl	%ss
-	pushl	%esp
-	addl	$4, (%esp)
-
-	/* Copy the (short) IRET frame */
-	pushl	4*4(%esp)	# flags
-	pushl	4*4(%esp)	# cs
-	pushl	4*4(%esp)	# ip
-
-	pushl	%eax		# orig_ax
-
-	SAVE_ALL_NMI cr3_reg=%edi unwind_espfix=1
-	ENCODE_FRAME_POINTER
-
-	/* clear CS_FROM_KERNEL, set CS_FROM_ESPFIX */
-	xorl	$(CS_FROM_ESPFIX | CS_FROM_KERNEL), PT_CS(%esp)
-
-	xorl	%edx, %edx			# zero error code
-	movl	%esp, %eax			# pt_regs pointer
-	jmp	.Lnmi_from_sysenter_stack
-
-.Lnmi_from_espfix:
-	RESTORE_ALL_NMI cr3_reg=%edi
-	/*
-	 * Because we cleared CS_FROM_KERNEL, IRET_FRAME 'forgot' to
-	 * fix up the gap and long frame:
-	 *
-	 *  3 - original frame	(exception)
-	 *  2 - ESPFIX block	(above)
-	 *  6 - gap		(FIXUP_FRAME)
-	 *  5 - long frame	(FIXUP_FRAME)
-	 *  1 - orig_ax
-	 */
-	lss	(1+5+6)*4(%esp), %esp			# back to espfix stack
-	jmp	.Lirq_return
-#endif
-END(nmi)
-
-ENTRY(int3)
-	ASM_CLAC
-	pushl	$-1				# mark this as an int
-
-	SAVE_ALL switch_stacks=1
-	ENCODE_FRAME_POINTER
-	TRACE_IRQS_OFF
-	xorl	%edx, %edx			# zero error code
-	movl	%esp, %eax			# pt_regs pointer
-	call	do_int3
-	jmp	ret_from_exception
-END(int3)
-
-ENTRY(general_protection)
-	ASM_CLAC
-	pushl	$do_general_protection
-	jmp	common_exception
-END(general_protection)
-
-#ifdef CONFIG_KVM_GUEST
-ENTRY(async_page_fault)
-	ASM_CLAC
-	pushl	$do_async_page_fault
-	jmp	common_exception_read_cr2
-END(async_page_fault)
-#endif
-
-ENTRY(rewind_stack_do_exit)
-	/* Prevent any naive code from trying to unwind to our caller. */
-	xorl	%ebp, %ebp
-
-	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esi
-	leal	-TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
-
-	call	do_exit
-1:	jmp 1b
-END(rewind_stack_do_exit)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
deleted file mode 100644
index 5c1813e49ee7fd81cd07e133a153b117212f9468..0000000000000000000000000000000000000000
--- a/arch/x86/entry/entry_64.S
+++ /dev/null
@@ -1,1825 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  linux/arch/x86_64/entry.S
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *  Copyright (C) 2000, 2001, 2002  Andi Kleen SuSE Labs
- *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
- *
- * entry.S contains the system-call and fault low-level handling routines.
- *
- * Some of this is documented in Documentation/x86/entry_64.rst
- *
- * A note on terminology:
- * - iret frame:	Architecture defined interrupt frame from SS to RIP
- *			at the top of the kernel process stack.
- *
- * Some macro usage:
- * - ENTRY/END:		Define functions in the symbol table.
- * - TRACE_IRQ_*:	Trace hardirq state for lock debugging.
- * - idtentry:		Define exception entry points.
- */
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
-#include <asm/asm-offsets.h>
-#include <asm/msr.h>
-#include <asm/unistd.h>
-#include <asm/thread_info.h>
-#include <asm/hw_irq.h>
-#include <asm/page_types.h>
-#include <asm/irqflags.h>
-#include <asm/paravirt.h>
-#include <asm/percpu.h>
-#include <asm/asm.h>
-#include <asm/smap.h>
-#include <asm/pgtable_types.h>
-#include <asm/export.h>
-#include <asm/frame.h>
-#include <asm/nospec-branch.h>
-#include <asm/fsgsbase.h>
-#include <linux/err.h>
-
-#include "calling.h"
-
-.code64
-.section .entry.text, "ax"
-
-#ifdef CONFIG_PARAVIRT
-ENTRY(native_usergs_sysret64)
-	UNWIND_HINT_EMPTY
-	swapgs
-	sysretq
-END(native_usergs_sysret64)
-#endif /* CONFIG_PARAVIRT */
-
-.macro TRACE_IRQS_FLAGS flags:req
-#ifdef CONFIG_TRACE_IRQFLAGS
-	btl	$9, \flags		/* interrupts off? */
-	jnc	1f
-	TRACE_IRQS_ON
-1:
-#endif
-.endm
-
-.macro TRACE_IRQS_IRETQ
-	TRACE_IRQS_FLAGS EFLAGS(%rsp)
-.endm
-
-/*
- * When dynamic function tracer is enabled it will add a breakpoint
- * to all locations that it is about to modify, sync CPUs, update
- * all the code, sync CPUs, then remove the breakpoints. In this time
- * if lockdep is enabled, it might jump back into the debug handler
- * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF).
- *
- * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to
- * make sure the stack pointer does not get reset back to the top
- * of the debug stack, and instead just reuses the current stack.
- */
-#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
-
-.macro TRACE_IRQS_OFF_DEBUG
-	call	debug_stack_set_zero
-	TRACE_IRQS_OFF
-	call	debug_stack_reset
-.endm
-
-.macro TRACE_IRQS_ON_DEBUG
-	call	debug_stack_set_zero
-	TRACE_IRQS_ON
-	call	debug_stack_reset
-.endm
-
-.macro TRACE_IRQS_IRETQ_DEBUG
-	btl	$9, EFLAGS(%rsp)		/* interrupts off? */
-	jnc	1f
-	TRACE_IRQS_ON_DEBUG
-1:
-.endm
-
-#else
-# define TRACE_IRQS_OFF_DEBUG			TRACE_IRQS_OFF
-# define TRACE_IRQS_ON_DEBUG			TRACE_IRQS_ON
-# define TRACE_IRQS_IRETQ_DEBUG			TRACE_IRQS_IRETQ
-#endif
-
-/*
- * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
- *
- * This is the only entry point used for 64-bit system calls.  The
- * hardware interface is reasonably well designed and the register to
- * argument mapping Linux uses fits well with the registers that are
- * available when SYSCALL is used.
- *
- * SYSCALL instructions can be found inlined in libc implementations as
- * well as some other programs and libraries.  There are also a handful
- * of SYSCALL instructions in the vDSO used, for example, as a
- * clock_gettimeofday fallback.
- *
- * 64-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
- * then loads new ss, cs, and rip from previously programmed MSRs.
- * rflags gets masked by a value from another MSR (so CLD and CLAC
- * are not needed). SYSCALL does not save anything on the stack
- * and does not change rsp.
- *
- * Registers on entry:
- * rax  system call number
- * rcx  return address
- * r11  saved rflags (note: r11 is callee-clobbered register in C ABI)
- * rdi  arg0
- * rsi  arg1
- * rdx  arg2
- * r10  arg3 (needs to be moved to rcx to conform to C ABI)
- * r8   arg4
- * r9   arg5
- * (note: r12-r15, rbp, rbx are callee-preserved in C ABI)
- *
- * Only called from user space.
- *
- * When user can change pt_regs->foo always force IRET. That is because
- * it deals with uncanonical addresses better. SYSRET has trouble
- * with them due to bugs in both AMD and Intel CPUs.
- */
-
-ENTRY(entry_SYSCALL_64)
-	UNWIND_HINT_EMPTY
-	/*
-	 * Interrupts are off on entry.
-	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
-	 * it is too small to ever cause noticeable irq latency.
-	 */
-
-	swapgs
-	/* tss.sp2 is scratch space. */
-	movq	%rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
-	SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
-	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-
-	/* Construct struct pt_regs on stack */
-	pushq	$__USER_DS				/* pt_regs->ss */
-	pushq	PER_CPU_VAR(cpu_tss_rw + TSS_sp2)	/* pt_regs->sp */
-	pushq	%r11					/* pt_regs->flags */
-	pushq	$__USER_CS				/* pt_regs->cs */
-	pushq	%rcx					/* pt_regs->ip */
-GLOBAL(entry_SYSCALL_64_after_hwframe)
-	pushq	%rax					/* pt_regs->orig_ax */
-
-	PUSH_AND_CLEAR_REGS rax=$-ENOSYS
-
-	TRACE_IRQS_OFF
-
-	/* IRQs are off. */
-	movq	%rax, %rdi
-	movq	%rsp, %rsi
-	call	do_syscall_64		/* returns with IRQs disabled */
-
-	TRACE_IRQS_IRETQ		/* we're about to change IF */
-
-	/*
-	 * Try to use SYSRET instead of IRET if we're returning to
-	 * a completely clean 64-bit userspace context.  If we're not,
-	 * go to the slow exit path.
-	 */
-	movq	RCX(%rsp), %rcx
-	movq	RIP(%rsp), %r11
-
-	cmpq	%rcx, %r11	/* SYSRET requires RCX == RIP */
-	jne	swapgs_restore_regs_and_return_to_usermode
-
-	/*
-	 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
-	 * in kernel space.  This essentially lets the user take over
-	 * the kernel, since userspace controls RSP.
-	 *
-	 * If width of "canonical tail" ever becomes variable, this will need
-	 * to be updated to remain correct on both old and new CPUs.
-	 *
-	 * Change top bits to match most significant bit (47th or 56th bit
-	 * depending on paging mode) in the address.
-	 */
-#ifdef CONFIG_X86_5LEVEL
-	ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
-		"shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
-#else
-	shl	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
-	sar	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
-#endif
-
-	/* If this changed %rcx, it was not canonical */
-	cmpq	%rcx, %r11
-	jne	swapgs_restore_regs_and_return_to_usermode
-
-	cmpq	$__USER_CS, CS(%rsp)		/* CS must match SYSRET */
-	jne	swapgs_restore_regs_and_return_to_usermode
-
-	movq	R11(%rsp), %r11
-	cmpq	%r11, EFLAGS(%rsp)		/* R11 == RFLAGS */
-	jne	swapgs_restore_regs_and_return_to_usermode
-
-	/*
-	 * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
-	 * restore RF properly. If the slowpath sets it for whatever reason, we
-	 * need to restore it correctly.
-	 *
-	 * SYSRET can restore TF, but unlike IRET, restoring TF results in a
-	 * trap from userspace immediately after SYSRET.  This would cause an
-	 * infinite loop whenever #DB happens with register state that satisfies
-	 * the opportunistic SYSRET conditions.  For example, single-stepping
-	 * this user code:
-	 *
-	 *           movq	$stuck_here, %rcx
-	 *           pushfq
-	 *           popq %r11
-	 *   stuck_here:
-	 *
-	 * would never get past 'stuck_here'.
-	 */
-	testq	$(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
-	jnz	swapgs_restore_regs_and_return_to_usermode
-
-	/* nothing to check for RSP */
-
-	cmpq	$__USER_DS, SS(%rsp)		/* SS must match SYSRET */
-	jne	swapgs_restore_regs_and_return_to_usermode
-
-	/*
-	 * We win! This label is here just for ease of understanding
-	 * perf profiles. Nothing jumps here.
-	 */
-syscall_return_via_sysret:
-	/* rcx and r11 are already restored (see code above) */
-	POP_REGS pop_rdi=0 skip_r11rcx=1
-
-	/*
-	 * Now all regs are restored except RSP and RDI.
-	 * Save old stack pointer and switch to trampoline stack.
-	 */
-	movq	%rsp, %rdi
-	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
-	UNWIND_HINT_EMPTY
-
-	pushq	RSP-RDI(%rdi)	/* RSP */
-	pushq	(%rdi)		/* RDI */
-
-	/*
-	 * We are on the trampoline stack.  All regs except RDI are live.
-	 * We can do future final exit work right here.
-	 */
-	STACKLEAK_ERASE_NOCLOBBER
-
-	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
-
-	popq	%rdi
-	popq	%rsp
-	USERGS_SYSRET64
-END(entry_SYSCALL_64)
-
-/*
- * %rdi: prev task
- * %rsi: next task
- */
-ENTRY(__switch_to_asm)
-	UNWIND_HINT_FUNC
-	/*
-	 * Save callee-saved registers
-	 * This must match the order in inactive_task_frame
-	 */
-	pushq	%rbp
-	pushq	%rbx
-	pushq	%r12
-	pushq	%r13
-	pushq	%r14
-	pushq	%r15
-
-	/* switch stack */
-	movq	%rsp, TASK_threadsp(%rdi)
-	movq	TASK_threadsp(%rsi), %rsp
-
-#ifdef CONFIG_STACKPROTECTOR
-	movq	TASK_stack_canary(%rsi), %rbx
-	movq	%rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
-#endif
-
-#ifdef CONFIG_RETPOLINE
-	/*
-	 * When switching from a shallower to a deeper call stack
-	 * the RSB may either underflow or use entries populated
-	 * with userspace addresses. On CPUs where those concerns
-	 * exist, overwrite the RSB with entries which capture
-	 * speculative execution to prevent attack.
-	 */
-	FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
-#endif
-
-	/* restore callee-saved registers */
-	popq	%r15
-	popq	%r14
-	popq	%r13
-	popq	%r12
-	popq	%rbx
-	popq	%rbp
-
-	jmp	__switch_to
-END(__switch_to_asm)
-
-/*
- * A newly forked process directly context switches into this address.
- *
- * rax: prev task we switched from
- * rbx: kernel thread func (NULL for user thread)
- * r12: kernel thread arg
- */
-ENTRY(ret_from_fork)
-	UNWIND_HINT_EMPTY
-	movq	%rax, %rdi
-	call	schedule_tail			/* rdi: 'prev' task parameter */
-
-	testq	%rbx, %rbx			/* from kernel_thread? */
-	jnz	1f				/* kernel threads are uncommon */
-
-2:
-	UNWIND_HINT_REGS
-	movq	%rsp, %rdi
-	call	syscall_return_slowpath	/* returns with IRQs disabled */
-	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
-	jmp	swapgs_restore_regs_and_return_to_usermode
-
-1:
-	/* kernel thread */
-	UNWIND_HINT_EMPTY
-	movq	%r12, %rdi
-	CALL_NOSPEC %rbx
-	/*
-	 * A kernel thread is allowed to return here after successfully
-	 * calling do_execve().  Exit to userspace to complete the execve()
-	 * syscall.
-	 */
-	movq	$0, RAX(%rsp)
-	jmp	2b
-END(ret_from_fork)
-
-/*
- * Build the entry stubs with some assembler magic.
- * We pack 1 stub into every 8-byte block.
- */
-	.align 8
-ENTRY(irq_entries_start)
-    vector=FIRST_EXTERNAL_VECTOR
-    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
-	UNWIND_HINT_IRET_REGS
-	pushq	$(~vector+0x80)			/* Note: always in signed byte range */
-	jmp	common_interrupt
-	.align	8
-	vector=vector+1
-    .endr
-END(irq_entries_start)
-
-	.align 8
-ENTRY(spurious_entries_start)
-    vector=FIRST_SYSTEM_VECTOR
-    .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
-	UNWIND_HINT_IRET_REGS
-	pushq	$(~vector+0x80)			/* Note: always in signed byte range */
-	jmp	common_spurious
-	.align	8
-	vector=vector+1
-    .endr
-END(spurious_entries_start)
-
-.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
-#ifdef CONFIG_DEBUG_ENTRY
-	pushq %rax
-	SAVE_FLAGS(CLBR_RAX)
-	testl $X86_EFLAGS_IF, %eax
-	jz .Lokay_\@
-	ud2
-.Lokay_\@:
-	popq %rax
-#endif
-.endm
-
-/*
- * Enters the IRQ stack if we're not already using it.  NMI-safe.  Clobbers
- * flags and puts old RSP into old_rsp, and leaves all other GPRs alone.
- * Requires kernel GSBASE.
- *
- * The invariant is that, if irq_count != -1, then the IRQ stack is in use.
- */
-.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
-	DEBUG_ENTRY_ASSERT_IRQS_OFF
-
-	.if \save_ret
-	/*
-	 * If save_ret is set, the original stack contains one additional
-	 * entry -- the return address. Therefore, move the address one
-	 * entry below %rsp to \old_rsp.
-	 */
-	leaq	8(%rsp), \old_rsp
-	.else
-	movq	%rsp, \old_rsp
-	.endif
-
-	.if \regs
-	UNWIND_HINT_REGS base=\old_rsp
-	.endif
-
-	incl	PER_CPU_VAR(irq_count)
-	jnz	.Lirq_stack_push_old_rsp_\@
-
-	/*
-	 * Right now, if we just incremented irq_count to zero, we've
-	 * claimed the IRQ stack but we haven't switched to it yet.
-	 *
-	 * If anything is added that can interrupt us here without using IST,
-	 * it must be *extremely* careful to limit its stack usage.  This
-	 * could include kprobes and a hypothetical future IST-less #DB
-	 * handler.
-	 *
-	 * The OOPS unwinder relies on the word at the top of the IRQ
-	 * stack linking back to the previous RSP for the entire time we're
-	 * on the IRQ stack.  For this to work reliably, we need to write
-	 * it before we actually move ourselves to the IRQ stack.
-	 */
-
-	movq	\old_rsp, PER_CPU_VAR(irq_stack_backing_store + IRQ_STACK_SIZE - 8)
-	movq	PER_CPU_VAR(hardirq_stack_ptr), %rsp
-
-#ifdef CONFIG_DEBUG_ENTRY
-	/*
-	 * If the first movq above becomes wrong due to IRQ stack layout
-	 * changes, the only way we'll notice is if we try to unwind right
-	 * here.  Assert that we set up the stack right to catch this type
-	 * of bug quickly.
-	 */
-	cmpq	-8(%rsp), \old_rsp
-	je	.Lirq_stack_okay\@
-	ud2
-	.Lirq_stack_okay\@:
-#endif
-
-.Lirq_stack_push_old_rsp_\@:
-	pushq	\old_rsp
-
-	.if \regs
-	UNWIND_HINT_REGS indirect=1
-	.endif
-
-	.if \save_ret
-	/*
-	 * Push the return address to the stack. This return address can
-	 * be found at the "real" original RSP, which was offset by 8 at
-	 * the beginning of this macro.
-	 */
-	pushq	-8(\old_rsp)
-	.endif
-.endm
-
-/*
- * Undoes ENTER_IRQ_STACK.
- */
-.macro LEAVE_IRQ_STACK regs=1
-	DEBUG_ENTRY_ASSERT_IRQS_OFF
-	/* We need to be off the IRQ stack before decrementing irq_count. */
-	popq	%rsp
-
-	.if \regs
-	UNWIND_HINT_REGS
-	.endif
-
-	/*
-	 * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming
-	 * the irq stack but we're not on it.
-	 */
-
-	decl	PER_CPU_VAR(irq_count)
-.endm
-
-/*
- * Interrupt entry helper function.
- *
- * Entry runs with interrupts off. Stack layout at entry:
- * +----------------------------------------------------+
- * | regs->ss						|
- * | regs->rsp						|
- * | regs->eflags					|
- * | regs->cs						|
- * | regs->ip						|
- * +----------------------------------------------------+
- * | regs->orig_ax = ~(interrupt number)		|
- * +----------------------------------------------------+
- * | return address					|
- * +----------------------------------------------------+
- */
-ENTRY(interrupt_entry)
-	UNWIND_HINT_IRET_REGS offset=16
-	ASM_CLAC
-	cld
-
-	testb	$3, CS-ORIG_RAX+8(%rsp)
-	jz	1f
-	SWAPGS
-	FENCE_SWAPGS_USER_ENTRY
-	/*
-	 * Switch to the thread stack. The IRET frame and orig_ax are
-	 * on the stack, as well as the return address. RDI..R12 are
-	 * not (yet) on the stack and space has not (yet) been
-	 * allocated for them.
-	 */
-	pushq	%rdi
-
-	/* Need to switch before accessing the thread stack. */
-	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
-	movq	%rsp, %rdi
-	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-
-	 /*
-	  * We have RDI, return address, and orig_ax on the stack on
-	  * top of the IRET frame. That means offset=24
-	  */
-	UNWIND_HINT_IRET_REGS base=%rdi offset=24
-
-	pushq	7*8(%rdi)		/* regs->ss */
-	pushq	6*8(%rdi)		/* regs->rsp */
-	pushq	5*8(%rdi)		/* regs->eflags */
-	pushq	4*8(%rdi)		/* regs->cs */
-	pushq	3*8(%rdi)		/* regs->ip */
-	UNWIND_HINT_IRET_REGS
-	pushq	2*8(%rdi)		/* regs->orig_ax */
-	pushq	8(%rdi)			/* return address */
-
-	movq	(%rdi), %rdi
-	jmp	2f
-1:
-	FENCE_SWAPGS_KERNEL_ENTRY
-2:
-	PUSH_AND_CLEAR_REGS save_ret=1
-	ENCODE_FRAME_POINTER 8
-
-	testb	$3, CS+8(%rsp)
-	jz	1f
-
-	/*
-	 * IRQ from user mode.
-	 *
-	 * We need to tell lockdep that IRQs are off.  We can't do this until
-	 * we fix gsbase, and we should do it before enter_from_user_mode
-	 * (which can take locks).  Since TRACE_IRQS_OFF is idempotent,
-	 * the simplest way to handle it is to just call it twice if
-	 * we enter from user mode.  There's no reason to optimize this since
-	 * TRACE_IRQS_OFF is a no-op if lockdep is off.
-	 */
-	TRACE_IRQS_OFF
-
-	CALL_enter_from_user_mode
-
-1:
-	ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
-	/* We entered an interrupt context - irqs are off: */
-	TRACE_IRQS_OFF
-
-	ret
-END(interrupt_entry)
-_ASM_NOKPROBE(interrupt_entry)
-
-
-/* Interrupt entry/exit. */
-
-/*
- * The interrupt stubs push (~vector+0x80) onto the stack and
- * then jump to common_spurious/interrupt.
- */
-common_spurious:
-	addq	$-0x80, (%rsp)			/* Adjust vector to [-256, -1] range */
-	call	interrupt_entry
-	UNWIND_HINT_REGS indirect=1
-	call	smp_spurious_interrupt		/* rdi points to pt_regs */
-	jmp	ret_from_intr
-END(common_spurious)
-_ASM_NOKPROBE(common_spurious)
-
-/* common_interrupt is a hotpath. Align it */
-	.p2align CONFIG_X86_L1_CACHE_SHIFT
-common_interrupt:
-	addq	$-0x80, (%rsp)			/* Adjust vector to [-256, -1] range */
-	call	interrupt_entry
-	UNWIND_HINT_REGS indirect=1
-	call	do_IRQ	/* rdi points to pt_regs */
-	/* 0(%rsp): old RSP */
-ret_from_intr:
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF
-
-	LEAVE_IRQ_STACK
-
-	testb	$3, CS(%rsp)
-	jz	retint_kernel
-
-	/* Interrupt came from user space */
-GLOBAL(retint_user)
-	mov	%rsp,%rdi
-	call	prepare_exit_to_usermode
-	TRACE_IRQS_IRETQ
-
-GLOBAL(swapgs_restore_regs_and_return_to_usermode)
-#ifdef CONFIG_DEBUG_ENTRY
-	/* Assert that pt_regs indicates user mode. */
-	testb	$3, CS(%rsp)
-	jnz	1f
-	ud2
-1:
-#endif
-	POP_REGS pop_rdi=0
-
-	/*
-	 * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
-	 * Save old stack pointer and switch to trampoline stack.
-	 */
-	movq	%rsp, %rdi
-	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
-	UNWIND_HINT_EMPTY
-
-	/* Copy the IRET frame to the trampoline stack. */
-	pushq	6*8(%rdi)	/* SS */
-	pushq	5*8(%rdi)	/* RSP */
-	pushq	4*8(%rdi)	/* EFLAGS */
-	pushq	3*8(%rdi)	/* CS */
-	pushq	2*8(%rdi)	/* RIP */
-
-	/* Push user RDI on the trampoline stack. */
-	pushq	(%rdi)
-
-	/*
-	 * We are on the trampoline stack.  All regs except RDI are live.
-	 * We can do future final exit work right here.
-	 */
-	STACKLEAK_ERASE_NOCLOBBER
-
-	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
-
-	/* Restore RDI. */
-	popq	%rdi
-	SWAPGS
-	INTERRUPT_RETURN
-
-
-/* Returning to kernel space */
-retint_kernel:
-#ifdef CONFIG_PREEMPTION
-	/* Interrupts are off */
-	/* Check if we need preemption */
-	btl	$9, EFLAGS(%rsp)		/* were interrupts off? */
-	jnc	1f
-	cmpl	$0, PER_CPU_VAR(__preempt_count)
-	jnz	1f
-	call	preempt_schedule_irq
-1:
-#endif
-	/*
-	 * The iretq could re-enable interrupts:
-	 */
-	TRACE_IRQS_IRETQ
-
-GLOBAL(restore_regs_and_return_to_kernel)
-#ifdef CONFIG_DEBUG_ENTRY
-	/* Assert that pt_regs indicates kernel mode. */
-	testb	$3, CS(%rsp)
-	jz	1f
-	ud2
-1:
-#endif
-	POP_REGS
-	addq	$8, %rsp	/* skip regs->orig_ax */
-	/*
-	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
-	 * when returning from IPI handler.
-	 */
-	INTERRUPT_RETURN
-
-ENTRY(native_iret)
-	UNWIND_HINT_IRET_REGS
-	/*
-	 * Are we returning to a stack segment from the LDT?  Note: in
-	 * 64-bit mode SS:RSP on the exception stack is always valid.
-	 */
-#ifdef CONFIG_X86_ESPFIX64
-	testb	$4, (SS-RIP)(%rsp)
-	jnz	native_irq_return_ldt
-#endif
-
-.global native_irq_return_iret
-native_irq_return_iret:
-	/*
-	 * This may fault.  Non-paranoid faults on return to userspace are
-	 * handled by fixup_bad_iret.  These include #SS, #GP, and #NP.
-	 * Double-faults due to espfix64 are handled in do_double_fault.
-	 * Other faults here are fatal.
-	 */
-	iretq
-
-#ifdef CONFIG_X86_ESPFIX64
-native_irq_return_ldt:
-	/*
-	 * We are running with user GSBASE.  All GPRs contain their user
-	 * values.  We have a percpu ESPFIX stack that is eight slots
-	 * long (see ESPFIX_STACK_SIZE).  espfix_waddr points to the bottom
-	 * of the ESPFIX stack.
-	 *
-	 * We clobber RAX and RDI in this code.  We stash RDI on the
-	 * normal stack and RAX on the ESPFIX stack.
-	 *
-	 * The ESPFIX stack layout we set up looks like this:
-	 *
-	 * --- top of ESPFIX stack ---
-	 * SS
-	 * RSP
-	 * RFLAGS
-	 * CS
-	 * RIP  <-- RSP points here when we're done
-	 * RAX  <-- espfix_waddr points here
-	 * --- bottom of ESPFIX stack ---
-	 */
-
-	pushq	%rdi				/* Stash user RDI */
-	SWAPGS					/* to kernel GS */
-	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi	/* to kernel CR3 */
-
-	movq	PER_CPU_VAR(espfix_waddr), %rdi
-	movq	%rax, (0*8)(%rdi)		/* user RAX */
-	movq	(1*8)(%rsp), %rax		/* user RIP */
-	movq	%rax, (1*8)(%rdi)
-	movq	(2*8)(%rsp), %rax		/* user CS */
-	movq	%rax, (2*8)(%rdi)
-	movq	(3*8)(%rsp), %rax		/* user RFLAGS */
-	movq	%rax, (3*8)(%rdi)
-	movq	(5*8)(%rsp), %rax		/* user SS */
-	movq	%rax, (5*8)(%rdi)
-	movq	(4*8)(%rsp), %rax		/* user RSP */
-	movq	%rax, (4*8)(%rdi)
-	/* Now RAX == RSP. */
-
-	andl	$0xffff0000, %eax		/* RAX = (RSP & 0xffff0000) */
-
-	/*
-	 * espfix_stack[31:16] == 0.  The page tables are set up such that
-	 * (espfix_stack | (X & 0xffff0000)) points to a read-only alias of
-	 * espfix_waddr for any X.  That is, there are 65536 RO aliases of
-	 * the same page.  Set up RSP so that RSP[31:16] contains the
-	 * respective 16 bits of the /userspace/ RSP and RSP nonetheless
-	 * still points to an RO alias of the ESPFIX stack.
-	 */
-	orq	PER_CPU_VAR(espfix_stack), %rax
-
-	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
-	SWAPGS					/* to user GS */
-	popq	%rdi				/* Restore user RDI */
-
-	movq	%rax, %rsp
-	UNWIND_HINT_IRET_REGS offset=8
-
-	/*
-	 * At this point, we cannot write to the stack any more, but we can
-	 * still read.
-	 */
-	popq	%rax				/* Restore user RAX */
-
-	/*
-	 * RSP now points to an ordinary IRET frame, except that the page
-	 * is read-only and RSP[31:16] are preloaded with the userspace
-	 * values.  We can now IRET back to userspace.
-	 */
-	jmp	native_irq_return_iret
-#endif
-END(common_interrupt)
-_ASM_NOKPROBE(common_interrupt)
-
-/*
- * APIC interrupts.
- */
-.macro apicinterrupt3 num sym do_sym
-ENTRY(\sym)
-	UNWIND_HINT_IRET_REGS
-	pushq	$~(\num)
-.Lcommon_\sym:
-	call	interrupt_entry
-	UNWIND_HINT_REGS indirect=1
-	call	\do_sym	/* rdi points to pt_regs */
-	jmp	ret_from_intr
-END(\sym)
-_ASM_NOKPROBE(\sym)
-.endm
-
-/* Make sure APIC interrupt handlers end up in the irqentry section: */
-#define PUSH_SECTION_IRQENTRY	.pushsection .irqentry.text, "ax"
-#define POP_SECTION_IRQENTRY	.popsection
-
-.macro apicinterrupt num sym do_sym
-PUSH_SECTION_IRQENTRY
-apicinterrupt3 \num \sym \do_sym
-POP_SECTION_IRQENTRY
-.endm
-
-#ifdef CONFIG_SMP
-apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR		irq_move_cleanup_interrupt	smp_irq_move_cleanup_interrupt
-apicinterrupt3 REBOOT_VECTOR			reboot_interrupt		smp_reboot_interrupt
-#endif
-
-#ifdef CONFIG_X86_UV
-apicinterrupt3 UV_BAU_MESSAGE			uv_bau_message_intr1		uv_bau_message_interrupt
-#endif
-
-apicinterrupt LOCAL_TIMER_VECTOR		apic_timer_interrupt		smp_apic_timer_interrupt
-apicinterrupt X86_PLATFORM_IPI_VECTOR		x86_platform_ipi		smp_x86_platform_ipi
-
-#ifdef CONFIG_HAVE_KVM
-apicinterrupt3 POSTED_INTR_VECTOR		kvm_posted_intr_ipi		smp_kvm_posted_intr_ipi
-apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR	kvm_posted_intr_wakeup_ipi	smp_kvm_posted_intr_wakeup_ipi
-apicinterrupt3 POSTED_INTR_NESTED_VECTOR	kvm_posted_intr_nested_ipi	smp_kvm_posted_intr_nested_ipi
-#endif
-
-#ifdef CONFIG_X86_MCE_THRESHOLD
-apicinterrupt THRESHOLD_APIC_VECTOR		threshold_interrupt		smp_threshold_interrupt
-#endif
-
-#ifdef CONFIG_X86_MCE_AMD
-apicinterrupt DEFERRED_ERROR_VECTOR		deferred_error_interrupt	smp_deferred_error_interrupt
-#endif
-
-#ifdef CONFIG_X86_THERMAL_VECTOR
-apicinterrupt THERMAL_APIC_VECTOR		thermal_interrupt		smp_thermal_interrupt
-#endif
-
-#ifdef CONFIG_SMP
-apicinterrupt CALL_FUNCTION_SINGLE_VECTOR	call_function_single_interrupt	smp_call_function_single_interrupt
-apicinterrupt CALL_FUNCTION_VECTOR		call_function_interrupt		smp_call_function_interrupt
-apicinterrupt RESCHEDULE_VECTOR			reschedule_interrupt		smp_reschedule_interrupt
-#endif
-
-apicinterrupt ERROR_APIC_VECTOR			error_interrupt			smp_error_interrupt
-apicinterrupt SPURIOUS_APIC_VECTOR		spurious_interrupt		smp_spurious_interrupt
-
-#ifdef CONFIG_IRQ_WORK
-apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
-#endif
-
-/*
- * Exception entry points.
- */
-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
-
-.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0
-
-	.if \paranoid
-	call	paranoid_entry
-	/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
-	.else
-	call	error_entry
-	.endif
-	UNWIND_HINT_REGS
-
-	.if \read_cr2
-	/*
-	 * Store CR2 early so subsequent faults cannot clobber it. Use R12 as
-	 * intermediate storage as RDX can be clobbered in enter_from_user_mode().
-	 * GET_CR2_INTO can clobber RAX.
-	 */
-	GET_CR2_INTO(%r12);
-	.endif
-
-	.if \shift_ist != -1
-	TRACE_IRQS_OFF_DEBUG			/* reload IDT in case of recursion */
-	.else
-	TRACE_IRQS_OFF
-	.endif
-
-	.if \paranoid == 0
-	testb	$3, CS(%rsp)
-	jz	.Lfrom_kernel_no_context_tracking_\@
-	CALL_enter_from_user_mode
-.Lfrom_kernel_no_context_tracking_\@:
-	.endif
-
-	movq	%rsp, %rdi			/* pt_regs pointer */
-
-	.if \has_error_code
-	movq	ORIG_RAX(%rsp), %rsi		/* get error code */
-	movq	$-1, ORIG_RAX(%rsp)		/* no syscall to restart */
-	.else
-	xorl	%esi, %esi			/* no error code */
-	.endif
-
-	.if \shift_ist != -1
-	subq	$\ist_offset, CPU_TSS_IST(\shift_ist)
-	.endif
-
-	.if \read_cr2
-	movq	%r12, %rdx			/* Move CR2 into 3rd argument */
-	.endif
-
-	call	\do_sym
-
-	.if \shift_ist != -1
-	addq	$\ist_offset, CPU_TSS_IST(\shift_ist)
-	.endif
-
-	.if \paranoid
-	/* this procedure expect "no swapgs" flag in ebx */
-	jmp	paranoid_exit
-	.else
-	jmp	error_exit
-	.endif
-
-.endm
-
-/**
- * idtentry - Generate an IDT entry stub
- * @sym:		Name of the generated entry point
- * @do_sym:		C function to be called
- * @has_error_code:	True if this IDT vector has an error code on the stack
- * @paranoid:		non-zero means that this vector may be invoked from
- *			kernel mode with user GSBASE and/or user CR3.
- *			2 is special -- see below.
- * @shift_ist:		Set to an IST index if entries from kernel mode should
- *			decrement the IST stack so that nested entries get a
- *			fresh stack.  (This is for #DB, which has a nasty habit
- *			of recursing.)
- * @create_gap:		create a 6-word stack gap when coming from kernel mode.
- * @read_cr2:		load CR2 into the 3rd argument; done before calling any C code
- *
- * idtentry generates an IDT stub that sets up a usable kernel context,
- * creates struct pt_regs, and calls @do_sym.  The stub has the following
- * special behaviors:
- *
- * On an entry from user mode, the stub switches from the trampoline or
- * IST stack to the normal thread stack.  On an exit to user mode, the
- * normal exit-to-usermode path is invoked.
- *
- * On an exit to kernel mode, if @paranoid == 0, we check for preemption,
- * whereas we omit the preemption check if @paranoid != 0.  This is purely
- * because the implementation is simpler this way.  The kernel only needs
- * to check for asynchronous kernel preemption when IRQ handlers return.
- *
- * If @paranoid == 0, then the stub will handle IRET faults by pretending
- * that the fault came from user mode.  It will handle gs_change faults by
- * pretending that the fault happened with kernel GSBASE.  Since this handling
- * is omitted for @paranoid != 0, the #GP, #SS, and #NP stubs must have
- * @paranoid == 0.  This special handling will do the wrong thing for
- * espfix-induced #DF on IRET, so #DF must not use @paranoid == 0.
- *
- * @paranoid == 2 is special: the stub will never switch stacks.  This is for
- * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
- */
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0
-ENTRY(\sym)
-	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
-
-	/* Sanity check */
-	.if \shift_ist != -1 && \paranoid != 1
-	.error "using shift_ist requires paranoid=1"
-	.endif
-
-	.if \create_gap && \paranoid
-	.error "using create_gap requires paranoid=0"
-	.endif
-
-	ASM_CLAC
-
-	.if \has_error_code == 0
-	pushq	$-1				/* ORIG_RAX: no syscall to restart */
-	.endif
-
-	.if \paranoid == 1
-	testb	$3, CS-ORIG_RAX(%rsp)		/* If coming from userspace, switch stacks */
-	jnz	.Lfrom_usermode_switch_stack_\@
-	.endif
-
-	.if \create_gap == 1
-	/*
-	 * If coming from kernel space, create a 6-word gap to allow the
-	 * int3 handler to emulate a call instruction.
-	 */
-	testb	$3, CS-ORIG_RAX(%rsp)
-	jnz	.Lfrom_usermode_no_gap_\@
-	.rept	6
-	pushq	5*8(%rsp)
-	.endr
-	UNWIND_HINT_IRET_REGS offset=8
-.Lfrom_usermode_no_gap_\@:
-	.endif
-
-	idtentry_part \do_sym, \has_error_code, \read_cr2, \paranoid, \shift_ist, \ist_offset
-
-	.if \paranoid == 1
-	/*
-	 * Entry from userspace.  Switch stacks and treat it
-	 * as a normal entry.  This means that paranoid handlers
-	 * run in real process context if user_mode(regs).
-	 */
-.Lfrom_usermode_switch_stack_\@:
-	idtentry_part \do_sym, \has_error_code, \read_cr2, paranoid=0
-	.endif
-
-_ASM_NOKPROBE(\sym)
-END(\sym)
-.endm
-
-idtentry divide_error			do_divide_error			has_error_code=0
-idtentry overflow			do_overflow			has_error_code=0
-idtentry bounds				do_bounds			has_error_code=0
-idtentry invalid_op			do_invalid_op			has_error_code=0
-idtentry device_not_available		do_device_not_available		has_error_code=0
-idtentry double_fault			do_double_fault			has_error_code=1 paranoid=2 read_cr2=1
-idtentry coprocessor_segment_overrun	do_coprocessor_segment_overrun	has_error_code=0
-idtentry invalid_TSS			do_invalid_TSS			has_error_code=1
-idtentry segment_not_present		do_segment_not_present		has_error_code=1
-idtentry spurious_interrupt_bug		do_spurious_interrupt_bug	has_error_code=0
-idtentry coprocessor_error		do_coprocessor_error		has_error_code=0
-idtentry alignment_check		do_alignment_check		has_error_code=1
-idtentry simd_coprocessor_error		do_simd_coprocessor_error	has_error_code=0
-
-
-	/*
-	 * Reload gs selector with exception handling
-	 * edi:  new selector
-	 */
-ENTRY(native_load_gs_index)
-	FRAME_BEGIN
-	pushfq
-	DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
-	TRACE_IRQS_OFF
-	SWAPGS
-.Lgs_change:
-	movl	%edi, %gs
-2:	ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
-	SWAPGS
-	TRACE_IRQS_FLAGS (%rsp)
-	popfq
-	FRAME_END
-	ret
-ENDPROC(native_load_gs_index)
-EXPORT_SYMBOL(native_load_gs_index)
-
-	_ASM_EXTABLE(.Lgs_change, .Lbad_gs)
-	.section .fixup, "ax"
-	/* running with kernelgs */
-.Lbad_gs:
-	SWAPGS					/* switch back to user gs */
-.macro ZAP_GS
-	/* This can't be a string because the preprocessor needs to see it. */
-	movl $__USER_DS, %eax
-	movl %eax, %gs
-.endm
-	ALTERNATIVE "", "ZAP_GS", X86_BUG_NULL_SEG
-	xorl	%eax, %eax
-	movl	%eax, %gs
-	jmp	2b
-	.previous
-
-/* Call softirq on interrupt stack. Interrupts are off. */
-ENTRY(do_softirq_own_stack)
-	pushq	%rbp
-	mov	%rsp, %rbp
-	ENTER_IRQ_STACK regs=0 old_rsp=%r11
-	call	__do_softirq
-	LEAVE_IRQ_STACK regs=0
-	leaveq
-	ret
-ENDPROC(do_softirq_own_stack)
-
-#ifdef CONFIG_XEN_PV
-idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
-
-/*
- * A note on the "critical region" in our callback handler.
- * We want to avoid stacking callback handlers due to events occurring
- * during handling of the last event. To do this, we keep events disabled
- * until we've done all processing. HOWEVER, we must enable events before
- * popping the stack frame (can't be done atomically) and so it would still
- * be possible to get enough handler activations to overflow the stack.
- * Although unlikely, bugs of that kind are hard to track down, so we'd
- * like to avoid the possibility.
- * So, on entry to the handler we detect whether we interrupted an
- * existing activation in its critical region -- if so, we pop the current
- * activation and restart the handler using the previous one.
- */
-ENTRY(xen_do_hypervisor_callback)		/* do_hypervisor_callback(struct *pt_regs) */
-
-/*
- * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
- * see the correct pointer to the pt_regs
- */
-	UNWIND_HINT_FUNC
-	movq	%rdi, %rsp			/* we don't return, adjust the stack frame */
-	UNWIND_HINT_REGS
-
-	ENTER_IRQ_STACK old_rsp=%r10
-	call	xen_evtchn_do_upcall
-	LEAVE_IRQ_STACK
-
-#ifndef CONFIG_PREEMPTION
-	call	xen_maybe_preempt_hcall
-#endif
-	jmp	error_exit
-END(xen_do_hypervisor_callback)
-
-/*
- * Hypervisor uses this for application faults while it executes.
- * We get here for two reasons:
- *  1. Fault while reloading DS, ES, FS or GS
- *  2. Fault while executing IRET
- * Category 1 we do not need to fix up as Xen has already reloaded all segment
- * registers that could be reloaded and zeroed the others.
- * Category 2 we fix up by killing the current process. We cannot use the
- * normal Linux return path in this case because if we use the IRET hypercall
- * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
- * We distinguish between categories by comparing each saved segment register
- * with its current contents: any discrepancy means we in category 1.
- */
-ENTRY(xen_failsafe_callback)
-	UNWIND_HINT_EMPTY
-	movl	%ds, %ecx
-	cmpw	%cx, 0x10(%rsp)
-	jne	1f
-	movl	%es, %ecx
-	cmpw	%cx, 0x18(%rsp)
-	jne	1f
-	movl	%fs, %ecx
-	cmpw	%cx, 0x20(%rsp)
-	jne	1f
-	movl	%gs, %ecx
-	cmpw	%cx, 0x28(%rsp)
-	jne	1f
-	/* All segments match their saved values => Category 2 (Bad IRET). */
-	movq	(%rsp), %rcx
-	movq	8(%rsp), %r11
-	addq	$0x30, %rsp
-	pushq	$0				/* RIP */
-	UNWIND_HINT_IRET_REGS offset=8
-	jmp	general_protection
-1:	/* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
-	movq	(%rsp), %rcx
-	movq	8(%rsp), %r11
-	addq	$0x30, %rsp
-	UNWIND_HINT_IRET_REGS
-	pushq	$-1 /* orig_ax = -1 => not a system call */
-	PUSH_AND_CLEAR_REGS
-	ENCODE_FRAME_POINTER
-	jmp	error_exit
-END(xen_failsafe_callback)
-#endif /* CONFIG_XEN_PV */
-
-#ifdef CONFIG_XEN_PVHVM
-apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
-	xen_hvm_callback_vector xen_evtchn_do_upcall
-#endif
-
-
-#if IS_ENABLED(CONFIG_HYPERV)
-apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
-	hyperv_callback_vector hyperv_vector_handler
-
-apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \
-	hyperv_reenlightenment_vector hyperv_reenlightenment_intr
-
-apicinterrupt3 HYPERV_STIMER0_VECTOR \
-	hv_stimer0_callback_vector hv_stimer0_vector_handler
-#endif /* CONFIG_HYPERV */
-
-#if IS_ENABLED(CONFIG_ACRN_GUEST)
-apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
-	acrn_hv_callback_vector acrn_hv_vector_handler
-#endif
-
-idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
-idtentry int3			do_int3			has_error_code=0	create_gap=1
-idtentry stack_segment		do_stack_segment	has_error_code=1
-
-#ifdef CONFIG_XEN_PV
-idtentry xennmi			do_nmi			has_error_code=0
-idtentry xendebug		do_debug		has_error_code=0
-#endif
-
-idtentry general_protection	do_general_protection	has_error_code=1
-idtentry page_fault		do_page_fault		has_error_code=1	read_cr2=1
-
-#ifdef CONFIG_KVM_GUEST
-idtentry async_page_fault	do_async_page_fault	has_error_code=1	read_cr2=1
-#endif
-
-#ifdef CONFIG_X86_MCE
-idtentry machine_check		do_mce			has_error_code=0	paranoid=1
-#endif
-
-/*
- * Save all registers in pt_regs. Return GSBASE related information
- * in EBX depending on the availability of the FSGSBASE instructions:
- *
- * FSGSBASE	R/EBX
- *     N        0 -> SWAPGS on exit
- *              1 -> no SWAPGS on exit
- *
- *     Y        GSBASE value at entry, must be restored in paranoid_exit
- */
-ENTRY(paranoid_entry)
-	UNWIND_HINT_FUNC
-	cld
-	PUSH_AND_CLEAR_REGS save_ret=1
-	ENCODE_FRAME_POINTER 8
-
-	/*
-	 * Always stash CR3 in %r14.  This value will be restored,
-	 * verbatim, at exit.  Needed if paranoid_entry interrupted
-	 * another entry that already switched to the user CR3 value
-	 * but has not yet returned to userspace.
-	 *
-	 * This is also why CS (stashed in the "iret frame" by the
-	 * hardware at entry) can not be used: this may be a return
-	 * to kernel code, but with a user CR3 value.
-	 *
-	 * Switching CR3 does not depend on kernel GSBASE so it can
-	 * be done before switching to the kernel GSBASE. This is
-	 * required for FSGSBASE because the kernel GSBASE has to
-	 * be retrieved from a kernel internal table.
-	 */
-	SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
-
-	/*
-	 * Handling GSBASE depends on the availability of FSGSBASE.
-	 *
-	 * Without FSGSBASE the kernel enforces that negative GSBASE
-	 * values indicate kernel GSBASE. With FSGSBASE no assumptions
-	 * can be made about the GSBASE value when entering from user
-	 * space.
-	 */
-	ALTERNATIVE "jmp .Lparanoid_entry_checkgs", "", X86_FEATURE_FSGSBASE
-
-	/*
-	 * Read the current GSBASE and store it in %rbx unconditionally,
-	 * retrieve and set the current CPUs kernel GSBASE. The stored value
-	 * has to be restored in paranoid_exit unconditionally.
-	 *
-	 * The unconditional write to GS base below ensures that no subsequent
-	 * loads based on a mispredicted GS base can happen, therefore no LFENCE
-	 * is needed here.
-	 */
-	SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
-	ret
-
-.Lparanoid_entry_checkgs:
-	/* EBX = 1 -> kernel GSBASE active, no restore required */
-	movl	$1, %ebx
-	/*
-	 * The kernel-enforced convention is a negative GSBASE indicates
-	 * a kernel value. No SWAPGS needed on entry and exit.
-	 */
-	movl	$MSR_GS_BASE, %ecx
-	rdmsr
-	testl	%edx, %edx
-	jns	.Lparanoid_entry_swapgs
-	ret
-
-.Lparanoid_entry_swapgs:
-	SWAPGS
-
-	/*
-	 * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
-	 * unconditional CR3 write, even in the PTI case.  So do an lfence
-	 * to prevent GS speculation, regardless of whether PTI is enabled.
-	 */
-	FENCE_SWAPGS_KERNEL_ENTRY
-
-	/* EBX = 0 -> SWAPGS required on exit */
-	xorl	%ebx, %ebx
-	ret
-END(paranoid_entry)
-
-/*
- * "Paranoid" exit path from exception stack.  This is invoked
- * only on return from non-NMI IST interrupts that came
- * from kernel space.
- *
- * We may be returning to very strange contexts (e.g. very early
- * in syscall entry), so checking for preemption here would
- * be complicated.  Fortunately, there's no good reason to try
- * to handle preemption here.
- *
- * R/EBX contains the GSBASE related information depending on the
- * availability of the FSGSBASE instructions:
- *
- * FSGSBASE	R/EBX
- *     N        0 -> SWAPGS on exit
- *              1 -> no SWAPGS on exit
- *
- *     Y        User space GSBASE, must be restored unconditionally
- */
-ENTRY(paranoid_exit)
-	UNWIND_HINT_REGS
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF_DEBUG
-	TRACE_IRQS_IRETQ
-	/*
-	 * The order of operations is important. RESTORE_CR3 requires
-	 * kernel GSBASE.
-	 *
-	 * NB to anyone to try to optimize this code: this code does
-	 * not execute at all for exceptions from user mode. Those
-	 * exceptions go through error_exit instead.
-	 */
-	RESTORE_CR3	scratch_reg=%rax save_reg=%r14
-
-	/* Handle the three GSBASE cases */
-	ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE
-
-	/* With FSGSBASE enabled, unconditionally restore GSBASE */
-	wrgsbase	%rbx
-	jmp		restore_regs_and_return_to_kernel
-
-.Lparanoid_exit_checkgs:
-	/* On non-FSGSBASE systems, conditionally do SWAPGS */
-	testl		%ebx, %ebx
-	jnz		restore_regs_and_return_to_kernel
-
-	/* We are returning to a context with user GSBASE */
-	SWAPGS_UNSAFE_STACK
-	jmp		restore_regs_and_return_to_kernel
-END(paranoid_exit)
-
-/*
- * Save all registers in pt_regs, and switch GS if needed.
- */
-ENTRY(error_entry)
-	UNWIND_HINT_FUNC
-	cld
-	PUSH_AND_CLEAR_REGS save_ret=1
-	ENCODE_FRAME_POINTER 8
-	testb	$3, CS+8(%rsp)
-	jz	.Lerror_kernelspace
-
-	/*
-	 * We entered from user mode or we're pretending to have entered
-	 * from user mode due to an IRET fault.
-	 */
-	SWAPGS
-	FENCE_SWAPGS_USER_ENTRY
-	/* We have user CR3.  Change to kernel CR3. */
-	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
-
-.Lerror_entry_from_usermode_after_swapgs:
-	/* Put us onto the real thread stack. */
-	popq	%r12				/* save return addr in %12 */
-	movq	%rsp, %rdi			/* arg0 = pt_regs pointer */
-	call	sync_regs
-	movq	%rax, %rsp			/* switch stack */
-	ENCODE_FRAME_POINTER
-	pushq	%r12
-	ret
-
-.Lerror_entry_done_lfence:
-	FENCE_SWAPGS_KERNEL_ENTRY
-.Lerror_entry_done:
-	ret
-
-	/*
-	 * There are two places in the kernel that can potentially fault with
-	 * usergs. Handle them here.  B stepping K8s sometimes report a
-	 * truncated RIP for IRET exceptions returning to compat mode. Check
-	 * for these here too.
-	 */
-.Lerror_kernelspace:
-	leaq	native_irq_return_iret(%rip), %rcx
-	cmpq	%rcx, RIP+8(%rsp)
-	je	.Lerror_bad_iret
-	movl	%ecx, %eax			/* zero extend */
-	cmpq	%rax, RIP+8(%rsp)
-	je	.Lbstep_iret
-	cmpq	$.Lgs_change, RIP+8(%rsp)
-	jne	.Lerror_entry_done_lfence
-
-	/*
-	 * hack: .Lgs_change can fail with user gsbase.  If this happens, fix up
-	 * gsbase and proceed.  We'll fix up the exception and land in
-	 * .Lgs_change's error handler with kernel gsbase.
-	 */
-	SWAPGS
-	FENCE_SWAPGS_USER_ENTRY
-	jmp .Lerror_entry_done
-
-.Lbstep_iret:
-	/* Fix truncated RIP */
-	movq	%rcx, RIP+8(%rsp)
-	/* fall through */
-
-.Lerror_bad_iret:
-	/*
-	 * We came from an IRET to user mode, so we have user
-	 * gsbase and CR3.  Switch to kernel gsbase and CR3:
-	 */
-	SWAPGS
-	FENCE_SWAPGS_USER_ENTRY
-	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
-
-	/*
-	 * Pretend that the exception came from user mode: set up pt_regs
-	 * as if we faulted immediately after IRET.
-	 */
-	mov	%rsp, %rdi
-	call	fixup_bad_iret
-	mov	%rax, %rsp
-	jmp	.Lerror_entry_from_usermode_after_swapgs
-END(error_entry)
-
-ENTRY(error_exit)
-	UNWIND_HINT_REGS
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF
-	testb	$3, CS(%rsp)
-	jz	retint_kernel
-	jmp	retint_user
-END(error_exit)
-
-/*
- * Runs on exception stack.  Xen PV does not go through this path at all,
- * so we can use real assembly here.
- *
- * Registers:
- *	%r14: Used to save/restore the CR3 of the interrupted context
- *	      when PAGE_TABLE_ISOLATION is in use.  Do not clobber.
- */
-ENTRY(nmi)
-	UNWIND_HINT_IRET_REGS
-
-	/*
-	 * We allow breakpoints in NMIs. If a breakpoint occurs, then
-	 * the iretq it performs will take us out of NMI context.
-	 * This means that we can have nested NMIs where the next
-	 * NMI is using the top of the stack of the previous NMI. We
-	 * can't let it execute because the nested NMI will corrupt the
-	 * stack of the previous NMI. NMI handlers are not re-entrant
-	 * anyway.
-	 *
-	 * To handle this case we do the following:
-	 *  Check the a special location on the stack that contains
-	 *  a variable that is set when NMIs are executing.
-	 *  The interrupted task's stack is also checked to see if it
-	 *  is an NMI stack.
-	 *  If the variable is not set and the stack is not the NMI
-	 *  stack then:
-	 *    o Set the special variable on the stack
-	 *    o Copy the interrupt frame into an "outermost" location on the
-	 *      stack
-	 *    o Copy the interrupt frame into an "iret" location on the stack
-	 *    o Continue processing the NMI
-	 *  If the variable is set or the previous stack is the NMI stack:
-	 *    o Modify the "iret" location to jump to the repeat_nmi
-	 *    o return back to the first NMI
-	 *
-	 * Now on exit of the first NMI, we first clear the stack variable
-	 * The NMI stack will tell any nested NMIs at that point that it is
-	 * nested. Then we pop the stack normally with iret, and if there was
-	 * a nested NMI that updated the copy interrupt stack frame, a
-	 * jump will be made to the repeat_nmi code that will handle the second
-	 * NMI.
-	 *
-	 * However, espfix prevents us from directly returning to userspace
-	 * with a single IRET instruction.  Similarly, IRET to user mode
-	 * can fault.  We therefore handle NMIs from user space like
-	 * other IST entries.
-	 */
-
-	ASM_CLAC
-
-	/* Use %rdx as our temp variable throughout */
-	pushq	%rdx
-
-	testb	$3, CS-RIP+8(%rsp)
-	jz	.Lnmi_from_kernel
-
-	/*
-	 * NMI from user mode.  We need to run on the thread stack, but we
-	 * can't go through the normal entry paths: NMIs are masked, and
-	 * we don't want to enable interrupts, because then we'll end
-	 * up in an awkward situation in which IRQs are on but NMIs
-	 * are off.
-	 *
-	 * We also must not push anything to the stack before switching
-	 * stacks lest we corrupt the "NMI executing" variable.
-	 */
-
-	swapgs
-	cld
-	FENCE_SWAPGS_USER_ENTRY
-	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
-	movq	%rsp, %rdx
-	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-	UNWIND_HINT_IRET_REGS base=%rdx offset=8
-	pushq	5*8(%rdx)	/* pt_regs->ss */
-	pushq	4*8(%rdx)	/* pt_regs->rsp */
-	pushq	3*8(%rdx)	/* pt_regs->flags */
-	pushq	2*8(%rdx)	/* pt_regs->cs */
-	pushq	1*8(%rdx)	/* pt_regs->rip */
-	UNWIND_HINT_IRET_REGS
-	pushq   $-1		/* pt_regs->orig_ax */
-	PUSH_AND_CLEAR_REGS rdx=(%rdx)
-	ENCODE_FRAME_POINTER
-
-	/*
-	 * At this point we no longer need to worry about stack damage
-	 * due to nesting -- we're on the normal thread stack and we're
-	 * done with the NMI stack.
-	 */
-
-	movq	%rsp, %rdi
-	movq	$-1, %rsi
-	call	do_nmi
-
-	/*
-	 * Return back to user mode.  We must *not* do the normal exit
-	 * work, because we don't want to enable interrupts.
-	 */
-	jmp	swapgs_restore_regs_and_return_to_usermode
-
-.Lnmi_from_kernel:
-	/*
-	 * Here's what our stack frame will look like:
-	 * +---------------------------------------------------------+
-	 * | original SS                                             |
-	 * | original Return RSP                                     |
-	 * | original RFLAGS                                         |
-	 * | original CS                                             |
-	 * | original RIP                                            |
-	 * +---------------------------------------------------------+
-	 * | temp storage for rdx                                    |
-	 * +---------------------------------------------------------+
-	 * | "NMI executing" variable                                |
-	 * +---------------------------------------------------------+
-	 * | iret SS          } Copied from "outermost" frame        |
-	 * | iret Return RSP  } on each loop iteration; overwritten  |
-	 * | iret RFLAGS      } by a nested NMI to force another     |
-	 * | iret CS          } iteration if needed.                 |
-	 * | iret RIP         }                                      |
-	 * +---------------------------------------------------------+
-	 * | outermost SS          } initialized in first_nmi;       |
-	 * | outermost Return RSP  } will not be changed before      |
-	 * | outermost RFLAGS      } NMI processing is done.         |
-	 * | outermost CS          } Copied to "iret" frame on each  |
-	 * | outermost RIP         } iteration.                      |
-	 * +---------------------------------------------------------+
-	 * | pt_regs                                                 |
-	 * +---------------------------------------------------------+
-	 *
-	 * The "original" frame is used by hardware.  Before re-enabling
-	 * NMIs, we need to be done with it, and we need to leave enough
-	 * space for the asm code here.
-	 *
-	 * We return by executing IRET while RSP points to the "iret" frame.
-	 * That will either return for real or it will loop back into NMI
-	 * processing.
-	 *
-	 * The "outermost" frame is copied to the "iret" frame on each
-	 * iteration of the loop, so each iteration starts with the "iret"
-	 * frame pointing to the final return target.
-	 */
-
-	/*
-	 * Determine whether we're a nested NMI.
-	 *
-	 * If we interrupted kernel code between repeat_nmi and
-	 * end_repeat_nmi, then we are a nested NMI.  We must not
-	 * modify the "iret" frame because it's being written by
-	 * the outer NMI.  That's okay; the outer NMI handler is
-	 * about to about to call do_nmi anyway, so we can just
-	 * resume the outer NMI.
-	 */
-
-	movq	$repeat_nmi, %rdx
-	cmpq	8(%rsp), %rdx
-	ja	1f
-	movq	$end_repeat_nmi, %rdx
-	cmpq	8(%rsp), %rdx
-	ja	nested_nmi_out
-1:
-
-	/*
-	 * Now check "NMI executing".  If it's set, then we're nested.
-	 * This will not detect if we interrupted an outer NMI just
-	 * before IRET.
-	 */
-	cmpl	$1, -8(%rsp)
-	je	nested_nmi
-
-	/*
-	 * Now test if the previous stack was an NMI stack.  This covers
-	 * the case where we interrupt an outer NMI after it clears
-	 * "NMI executing" but before IRET.  We need to be careful, though:
-	 * there is one case in which RSP could point to the NMI stack
-	 * despite there being no NMI active: naughty userspace controls
-	 * RSP at the very beginning of the SYSCALL targets.  We can
-	 * pull a fast one on naughty userspace, though: we program
-	 * SYSCALL to mask DF, so userspace cannot cause DF to be set
-	 * if it controls the kernel's RSP.  We set DF before we clear
-	 * "NMI executing".
-	 */
-	lea	6*8(%rsp), %rdx
-	/* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
-	cmpq	%rdx, 4*8(%rsp)
-	/* If the stack pointer is above the NMI stack, this is a normal NMI */
-	ja	first_nmi
-
-	subq	$EXCEPTION_STKSZ, %rdx
-	cmpq	%rdx, 4*8(%rsp)
-	/* If it is below the NMI stack, it is a normal NMI */
-	jb	first_nmi
-
-	/* Ah, it is within the NMI stack. */
-
-	testb	$(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp)
-	jz	first_nmi	/* RSP was user controlled. */
-
-	/* This is a nested NMI. */
-
-nested_nmi:
-	/*
-	 * Modify the "iret" frame to point to repeat_nmi, forcing another
-	 * iteration of NMI handling.
-	 */
-	subq	$8, %rsp
-	leaq	-10*8(%rsp), %rdx
-	pushq	$__KERNEL_DS
-	pushq	%rdx
-	pushfq
-	pushq	$__KERNEL_CS
-	pushq	$repeat_nmi
-
-	/* Put stack back */
-	addq	$(6*8), %rsp
-
-nested_nmi_out:
-	popq	%rdx
-
-	/* We are returning to kernel mode, so this cannot result in a fault. */
-	iretq
-
-first_nmi:
-	/* Restore rdx. */
-	movq	(%rsp), %rdx
-
-	/* Make room for "NMI executing". */
-	pushq	$0
-
-	/* Leave room for the "iret" frame */
-	subq	$(5*8), %rsp
-
-	/* Copy the "original" frame to the "outermost" frame */
-	.rept 5
-	pushq	11*8(%rsp)
-	.endr
-	UNWIND_HINT_IRET_REGS
-
-	/* Everything up to here is safe from nested NMIs */
-
-#ifdef CONFIG_DEBUG_ENTRY
-	/*
-	 * For ease of testing, unmask NMIs right away.  Disabled by
-	 * default because IRET is very expensive.
-	 */
-	pushq	$0		/* SS */
-	pushq	%rsp		/* RSP (minus 8 because of the previous push) */
-	addq	$8, (%rsp)	/* Fix up RSP */
-	pushfq			/* RFLAGS */
-	pushq	$__KERNEL_CS	/* CS */
-	pushq	$1f		/* RIP */
-	iretq			/* continues at repeat_nmi below */
-	UNWIND_HINT_IRET_REGS
-1:
-#endif
-
-repeat_nmi:
-	/*
-	 * If there was a nested NMI, the first NMI's iret will return
-	 * here. But NMIs are still enabled and we can take another
-	 * nested NMI. The nested NMI checks the interrupted RIP to see
-	 * if it is between repeat_nmi and end_repeat_nmi, and if so
-	 * it will just return, as we are about to repeat an NMI anyway.
-	 * This makes it safe to copy to the stack frame that a nested
-	 * NMI will update.
-	 *
-	 * RSP is pointing to "outermost RIP".  gsbase is unknown, but, if
-	 * we're repeating an NMI, gsbase has the same value that it had on
-	 * the first iteration.  paranoid_entry will load the kernel
-	 * gsbase if needed before we call do_nmi.  "NMI executing"
-	 * is zero.
-	 */
-	movq	$1, 10*8(%rsp)		/* Set "NMI executing". */
-
-	/*
-	 * Copy the "outermost" frame to the "iret" frame.  NMIs that nest
-	 * here must not modify the "iret" frame while we're writing to
-	 * it or it will end up containing garbage.
-	 */
-	addq	$(10*8), %rsp
-	.rept 5
-	pushq	-6*8(%rsp)
-	.endr
-	subq	$(5*8), %rsp
-end_repeat_nmi:
-
-	/*
-	 * Everything below this point can be preempted by a nested NMI.
-	 * If this happens, then the inner NMI will change the "iret"
-	 * frame to point back to repeat_nmi.
-	 */
-	pushq	$-1				/* ORIG_RAX: no syscall to restart */
-
-	/*
-	 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
-	 * as we should not be calling schedule in NMI context.
-	 * Even with normal interrupts enabled. An NMI should not be
-	 * setting NEED_RESCHED or anything that normal interrupts and
-	 * exceptions might do.
-	 */
-	call	paranoid_entry
-	UNWIND_HINT_REGS
-
-	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
-	movq	%rsp, %rdi
-	movq	$-1, %rsi
-	call	do_nmi
-
-	/* Always restore stashed CR3 value (see paranoid_entry) */
-	RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
-
-	/*
-	 * The above invocation of paranoid_entry stored the GSBASE
-	 * related information in R/EBX depending on the availability
-	 * of FSGSBASE.
-	 *
-	 * If FSGSBASE is enabled, restore the saved GSBASE value
-	 * unconditionally, otherwise take the conditional SWAPGS path.
-	 */
-	ALTERNATIVE "jmp nmi_no_fsgsbase", "", X86_FEATURE_FSGSBASE
-
-	wrgsbase	%rbx
-	jmp	nmi_restore
-
-nmi_no_fsgsbase:
-	/* EBX == 0 -> invoke SWAPGS */
-	testl	%ebx, %ebx
-	jnz	nmi_restore
-
-nmi_swapgs:
-	SWAPGS_UNSAFE_STACK
-
-nmi_restore:
-	POP_REGS
-
-	/*
-	 * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
-	 * at the "iret" frame.
-	 */
-	addq	$6*8, %rsp
-
-	/*
-	 * Clear "NMI executing".  Set DF first so that we can easily
-	 * distinguish the remaining code between here and IRET from
-	 * the SYSCALL entry and exit paths.
-	 *
-	 * We arguably should just inspect RIP instead, but I (Andy) wrote
-	 * this code when I had the misapprehension that Xen PV supported
-	 * NMIs, and Xen PV would break that approach.
-	 */
-	std
-	movq	$0, 5*8(%rsp)		/* clear "NMI executing" */
-
-	/*
-	 * iretq reads the "iret" frame and exits the NMI stack in a
-	 * single instruction.  We are returning to kernel mode, so this
-	 * cannot result in a fault.  Similarly, we don't need to worry
-	 * about espfix64 on the way back to kernel mode.
-	 */
-	iretq
-END(nmi)
-
-#ifndef CONFIG_IA32_EMULATION
-/*
- * This handles SYSCALL from 32-bit code.  There is no way to program
- * MSRs to fully disable 32-bit SYSCALL.
- */
-ENTRY(ignore_sysret)
-	UNWIND_HINT_EMPTY
-	mov	$-ENOSYS, %eax
-	sysret
-END(ignore_sysret)
-#endif
-
-ENTRY(rewind_stack_do_exit)
-	UNWIND_HINT_FUNC
-	/* Prevent any naive code from trying to unwind to our caller. */
-	xorl	%ebp, %ebp
-
-	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rax
-	leaq	-PTREGS_SIZE(%rax), %rsp
-	UNWIND_HINT_REGS
-
-	call	do_exit
-END(rewind_stack_do_exit)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
deleted file mode 100644
index 39913770a44d5aeed855c51082a6cf55080457f1..0000000000000000000000000000000000000000
--- a/arch/x86/entry/entry_64_compat.S
+++ /dev/null
@@ -1,419 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Compatibility mode system call entry point for x86-64.
- *
- * Copyright 2000-2002 Andi Kleen, SuSE Labs.
- */
-#include "calling.h"
-#include <asm/asm-offsets.h>
-#include <asm/current.h>
-#include <asm/errno.h>
-#include <asm/ia32_unistd.h>
-#include <asm/thread_info.h>
-#include <asm/segment.h>
-#include <asm/irqflags.h>
-#include <asm/asm.h>
-#include <asm/smap.h>
-#include <linux/linkage.h>
-#include <linux/err.h>
-
-	.section .entry.text, "ax"
-
-/*
- * 32-bit SYSENTER entry.
- *
- * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
- * on 64-bit kernels running on Intel CPUs.
- *
- * The SYSENTER instruction, in principle, should *only* occur in the
- * vDSO.  In practice, a small number of Android devices were shipped
- * with a copy of Bionic that inlined a SYSENTER instruction.  This
- * never happened in any of Google's Bionic versions -- it only happened
- * in a narrow range of Intel-provided versions.
- *
- * SYSENTER loads SS, RSP, CS, and RIP from previously programmed MSRs.
- * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
- * SYSENTER does not save anything on the stack,
- * and does not save old RIP (!!!), RSP, or RFLAGS.
- *
- * Arguments:
- * eax  system call number
- * ebx  arg1
- * ecx  arg2
- * edx  arg3
- * esi  arg4
- * edi  arg5
- * ebp  user stack
- * 0(%ebp) arg6
- */
-ENTRY(entry_SYSENTER_compat)
-	/* Interrupts are off on entry. */
-	SWAPGS
-
-	/* We are about to clobber %rsp anyway, clobbering here is OK */
-	SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
-
-	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-
-	/*
-	 * User tracing code (ptrace or signal handlers) might assume that
-	 * the saved RAX contains a 32-bit number when we're invoking a 32-bit
-	 * syscall.  Just in case the high bits are nonzero, zero-extend
-	 * the syscall number.  (This could almost certainly be deleted
-	 * with no ill effects.)
-	 */
-	movl	%eax, %eax
-
-	/* Construct struct pt_regs on stack */
-	pushq	$__USER32_DS		/* pt_regs->ss */
-	pushq	%rbp			/* pt_regs->sp (stashed in bp) */
-
-	/*
-	 * Push flags.  This is nasty.  First, interrupts are currently
-	 * off, but we need pt_regs->flags to have IF set.  Second, even
-	 * if TF was set when SYSENTER started, it's clear by now.  We fix
-	 * that later using TIF_SINGLESTEP.
-	 */
-	pushfq				/* pt_regs->flags (except IF = 0) */
-	orl	$X86_EFLAGS_IF, (%rsp)	/* Fix saved flags */
-	pushq	$__USER32_CS		/* pt_regs->cs */
-	pushq	$0			/* pt_regs->ip = 0 (placeholder) */
-	pushq	%rax			/* pt_regs->orig_ax */
-	pushq	%rdi			/* pt_regs->di */
-	pushq	%rsi			/* pt_regs->si */
-	pushq	%rdx			/* pt_regs->dx */
-	pushq	%rcx			/* pt_regs->cx */
-	pushq	$-ENOSYS		/* pt_regs->ax */
-	pushq   $0			/* pt_regs->r8  = 0 */
-	xorl	%r8d, %r8d		/* nospec   r8 */
-	pushq   $0			/* pt_regs->r9  = 0 */
-	xorl	%r9d, %r9d		/* nospec   r9 */
-	pushq   $0			/* pt_regs->r10 = 0 */
-	xorl	%r10d, %r10d		/* nospec   r10 */
-	pushq   $0			/* pt_regs->r11 = 0 */
-	xorl	%r11d, %r11d		/* nospec   r11 */
-	pushq   %rbx                    /* pt_regs->rbx */
-	xorl	%ebx, %ebx		/* nospec   rbx */
-	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
-	xorl	%ebp, %ebp		/* nospec   rbp */
-	pushq   $0			/* pt_regs->r12 = 0 */
-	xorl	%r12d, %r12d		/* nospec   r12 */
-	pushq   $0			/* pt_regs->r13 = 0 */
-	xorl	%r13d, %r13d		/* nospec   r13 */
-	pushq   $0			/* pt_regs->r14 = 0 */
-	xorl	%r14d, %r14d		/* nospec   r14 */
-	pushq   $0			/* pt_regs->r15 = 0 */
-	xorl	%r15d, %r15d		/* nospec   r15 */
-	cld
-
-	/*
-	 * SYSENTER doesn't filter flags, so we need to clear NT and AC
-	 * ourselves.  To save a few cycles, we can check whether
-	 * either was set instead of doing an unconditional popfq.
-	 * This needs to happen before enabling interrupts so that
-	 * we don't get preempted with NT set.
-	 *
-	 * If TF is set, we will single-step all the way to here -- do_debug
-	 * will ignore all the traps.  (Yes, this is slow, but so is
-	 * single-stepping in general.  This allows us to avoid having
-	 * a more complicated code to handle the case where a user program
-	 * forces us to single-step through the SYSENTER entry code.)
-	 *
-	 * NB.: .Lsysenter_fix_flags is a label with the code under it moved
-	 * out-of-line as an optimization: NT is unlikely to be set in the
-	 * majority of the cases and instead of polluting the I$ unnecessarily,
-	 * we're keeping that code behind a branch which will predict as
-	 * not-taken and therefore its instructions won't be fetched.
-	 */
-	testl	$X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, EFLAGS(%rsp)
-	jnz	.Lsysenter_fix_flags
-.Lsysenter_flags_fixed:
-
-	/*
-	 * User mode is traced as though IRQs are on, and SYSENTER
-	 * turned them off.
-	 */
-	TRACE_IRQS_OFF
-
-	movq	%rsp, %rdi
-	call	do_fast_syscall_32
-	/* XEN PV guests always use IRET path */
-	ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
-		    "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
-	jmp	sysret32_from_system_call
-
-.Lsysenter_fix_flags:
-	pushq	$X86_EFLAGS_FIXED
-	popfq
-	jmp	.Lsysenter_flags_fixed
-GLOBAL(__end_entry_SYSENTER_compat)
-ENDPROC(entry_SYSENTER_compat)
-
-/*
- * 32-bit SYSCALL entry.
- *
- * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
- * on 64-bit kernels running on AMD CPUs.
- *
- * The SYSCALL instruction, in principle, should *only* occur in the
- * vDSO.  In practice, it appears that this really is the case.
- * As evidence:
- *
- *  - The calling convention for SYSCALL has changed several times without
- *    anyone noticing.
- *
- *  - Prior to the in-kernel X86_BUG_SYSRET_SS_ATTRS fixup, anything
- *    user task that did SYSCALL without immediately reloading SS
- *    would randomly crash.
- *
- *  - Most programmers do not directly target AMD CPUs, and the 32-bit
- *    SYSCALL instruction does not exist on Intel CPUs.  Even on AMD
- *    CPUs, Linux disables the SYSCALL instruction on 32-bit kernels
- *    because the SYSCALL instruction in legacy/native 32-bit mode (as
- *    opposed to compat mode) is sufficiently poorly designed as to be
- *    essentially unusable.
- *
- * 32-bit SYSCALL saves RIP to RCX, clears RFLAGS.RF, then saves
- * RFLAGS to R11, then loads new SS, CS, and RIP from previously
- * programmed MSRs.  RFLAGS gets masked by a value from another MSR
- * (so CLD and CLAC are not needed).  SYSCALL does not save anything on
- * the stack and does not change RSP.
- *
- * Note: RFLAGS saving+masking-with-MSR happens only in Long mode
- * (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it).
- * Don't get confused: RFLAGS saving+masking depends on Long Mode Active bit
- * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
- * or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
- *
- * Arguments:
- * eax  system call number
- * ecx  return address
- * ebx  arg1
- * ebp  arg2	(note: not saved in the stack frame, should not be touched)
- * edx  arg3
- * esi  arg4
- * edi  arg5
- * esp  user stack
- * 0(%esp) arg6
- */
-ENTRY(entry_SYSCALL_compat)
-	/* Interrupts are off on entry. */
-	swapgs
-
-	/* Stash user ESP */
-	movl	%esp, %r8d
-
-	/* Use %rsp as scratch reg. User ESP is stashed in r8 */
-	SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
-
-	/* Switch to the kernel stack */
-	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-
-	/* Construct struct pt_regs on stack */
-	pushq	$__USER32_DS		/* pt_regs->ss */
-	pushq	%r8			/* pt_regs->sp */
-	pushq	%r11			/* pt_regs->flags */
-	pushq	$__USER32_CS		/* pt_regs->cs */
-	pushq	%rcx			/* pt_regs->ip */
-GLOBAL(entry_SYSCALL_compat_after_hwframe)
-	movl	%eax, %eax		/* discard orig_ax high bits */
-	pushq	%rax			/* pt_regs->orig_ax */
-	pushq	%rdi			/* pt_regs->di */
-	pushq	%rsi			/* pt_regs->si */
-	xorl	%esi, %esi		/* nospec   si */
-	pushq	%rdx			/* pt_regs->dx */
-	xorl	%edx, %edx		/* nospec   dx */
-	pushq	%rbp			/* pt_regs->cx (stashed in bp) */
-	xorl	%ecx, %ecx		/* nospec   cx */
-	pushq	$-ENOSYS		/* pt_regs->ax */
-	pushq   $0			/* pt_regs->r8  = 0 */
-	xorl	%r8d, %r8d		/* nospec   r8 */
-	pushq   $0			/* pt_regs->r9  = 0 */
-	xorl	%r9d, %r9d		/* nospec   r9 */
-	pushq   $0			/* pt_regs->r10 = 0 */
-	xorl	%r10d, %r10d		/* nospec   r10 */
-	pushq   $0			/* pt_regs->r11 = 0 */
-	xorl	%r11d, %r11d		/* nospec   r11 */
-	pushq   %rbx                    /* pt_regs->rbx */
-	xorl	%ebx, %ebx		/* nospec   rbx */
-	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
-	xorl	%ebp, %ebp		/* nospec   rbp */
-	pushq   $0			/* pt_regs->r12 = 0 */
-	xorl	%r12d, %r12d		/* nospec   r12 */
-	pushq   $0			/* pt_regs->r13 = 0 */
-	xorl	%r13d, %r13d		/* nospec   r13 */
-	pushq   $0			/* pt_regs->r14 = 0 */
-	xorl	%r14d, %r14d		/* nospec   r14 */
-	pushq   $0			/* pt_regs->r15 = 0 */
-	xorl	%r15d, %r15d		/* nospec   r15 */
-
-	/*
-	 * User mode is traced as though IRQs are on, and SYSENTER
-	 * turned them off.
-	 */
-	TRACE_IRQS_OFF
-
-	movq	%rsp, %rdi
-	call	do_fast_syscall_32
-	/* XEN PV guests always use IRET path */
-	ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
-		    "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
-
-	/* Opportunistic SYSRET */
-sysret32_from_system_call:
-	/*
-	 * We are not going to return to userspace from the trampoline
-	 * stack. So let's erase the thread stack right now.
-	 */
-	STACKLEAK_ERASE
-	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
-	movq	RBX(%rsp), %rbx		/* pt_regs->rbx */
-	movq	RBP(%rsp), %rbp		/* pt_regs->rbp */
-	movq	EFLAGS(%rsp), %r11	/* pt_regs->flags (in r11) */
-	movq	RIP(%rsp), %rcx		/* pt_regs->ip (in rcx) */
-	addq	$RAX, %rsp		/* Skip r8-r15 */
-	popq	%rax			/* pt_regs->rax */
-	popq	%rdx			/* Skip pt_regs->cx */
-	popq	%rdx			/* pt_regs->dx */
-	popq	%rsi			/* pt_regs->si */
-	popq	%rdi			/* pt_regs->di */
-
-        /*
-         * USERGS_SYSRET32 does:
-         *  GSBASE = user's GS base
-         *  EIP = ECX
-         *  RFLAGS = R11
-         *  CS = __USER32_CS
-         *  SS = __USER_DS
-         *
-	 * ECX will not match pt_regs->cx, but we're returning to a vDSO
-	 * trampoline that will fix up RCX, so this is okay.
-	 *
-	 * R12-R15 are callee-saved, so they contain whatever was in them
-	 * when the system call started, which is already known to user
-	 * code.  We zero R8-R10 to avoid info leaks.
-         */
-	movq	RSP-ORIG_RAX(%rsp), %rsp
-
-	/*
-	 * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
-	 * on the process stack which is not mapped to userspace and
-	 * not readable after we SWITCH_TO_USER_CR3.  Delay the CR3
-	 * switch until after after the last reference to the process
-	 * stack.
-	 *
-	 * %r8/%r9 are zeroed before the sysret, thus safe to clobber.
-	 */
-	SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
-
-	xorl	%r8d, %r8d
-	xorl	%r9d, %r9d
-	xorl	%r10d, %r10d
-	swapgs
-	sysretl
-END(entry_SYSCALL_compat)
-
-/*
- * 32-bit legacy system call entry.
- *
- * 32-bit x86 Linux system calls traditionally used the INT $0x80
- * instruction.  INT $0x80 lands here.
- *
- * This entry point can be used by 32-bit and 64-bit programs to perform
- * 32-bit system calls.  Instances of INT $0x80 can be found inline in
- * various programs and libraries.  It is also used by the vDSO's
- * __kernel_vsyscall fallback for hardware that doesn't support a faster
- * entry method.  Restarted 32-bit system calls also fall back to INT
- * $0x80 regardless of what instruction was originally used to do the
- * system call.
- *
- * This is considered a slow path.  It is not used by most libc
- * implementations on modern hardware except during process startup.
- *
- * Arguments:
- * eax  system call number
- * ebx  arg1
- * ecx  arg2
- * edx  arg3
- * esi  arg4
- * edi  arg5
- * ebp  arg6
- */
-ENTRY(entry_INT80_compat)
-	/*
-	 * Interrupts are off on entry.
-	 */
-	ASM_CLAC			/* Do this early to minimize exposure */
-	SWAPGS
-
-	/*
-	 * User tracing code (ptrace or signal handlers) might assume that
-	 * the saved RAX contains a 32-bit number when we're invoking a 32-bit
-	 * syscall.  Just in case the high bits are nonzero, zero-extend
-	 * the syscall number.  (This could almost certainly be deleted
-	 * with no ill effects.)
-	 */
-	movl	%eax, %eax
-
-	/* switch to thread stack expects orig_ax and rdi to be pushed */
-	pushq	%rax			/* pt_regs->orig_ax */
-	pushq	%rdi			/* pt_regs->di */
-
-	/* Need to switch before accessing the thread stack. */
-	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
-	/* In the Xen PV case we already run on the thread stack. */
-	ALTERNATIVE "movq %rsp, %rdi", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
-	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-
-	pushq	6*8(%rdi)		/* regs->ss */
-	pushq	5*8(%rdi)		/* regs->rsp */
-	pushq	4*8(%rdi)		/* regs->eflags */
-	pushq	3*8(%rdi)		/* regs->cs */
-	pushq	2*8(%rdi)		/* regs->ip */
-	pushq	1*8(%rdi)		/* regs->orig_ax */
-	pushq	(%rdi)			/* pt_regs->di */
-.Lint80_keep_stack:
-
-	pushq	%rsi			/* pt_regs->si */
-	xorl	%esi, %esi		/* nospec   si */
-	pushq	%rdx			/* pt_regs->dx */
-	xorl	%edx, %edx		/* nospec   dx */
-	pushq	%rcx			/* pt_regs->cx */
-	xorl	%ecx, %ecx		/* nospec   cx */
-	pushq	$-ENOSYS		/* pt_regs->ax */
-	pushq   %r8			/* pt_regs->r8 */
-	xorl	%r8d, %r8d		/* nospec   r8 */
-	pushq   %r9			/* pt_regs->r9 */
-	xorl	%r9d, %r9d		/* nospec   r9 */
-	pushq   %r10			/* pt_regs->r10*/
-	xorl	%r10d, %r10d		/* nospec   r10 */
-	pushq   %r11			/* pt_regs->r11 */
-	xorl	%r11d, %r11d		/* nospec   r11 */
-	pushq   %rbx                    /* pt_regs->rbx */
-	xorl	%ebx, %ebx		/* nospec   rbx */
-	pushq   %rbp                    /* pt_regs->rbp */
-	xorl	%ebp, %ebp		/* nospec   rbp */
-	pushq   %r12                    /* pt_regs->r12 */
-	xorl	%r12d, %r12d		/* nospec   r12 */
-	pushq   %r13                    /* pt_regs->r13 */
-	xorl	%r13d, %r13d		/* nospec   r13 */
-	pushq   %r14                    /* pt_regs->r14 */
-	xorl	%r14d, %r14d		/* nospec   r14 */
-	pushq   %r15                    /* pt_regs->r15 */
-	xorl	%r15d, %r15d		/* nospec   r15 */
-	cld
-
-	/*
-	 * User mode is traced as though IRQs are on, and the interrupt
-	 * gate turned them off.
-	 */
-	TRACE_IRQS_OFF
-
-	movq	%rsp, %rdi
-	call	do_int80_syscall_32
-.Lsyscall_32_done:
-
-	/* Go back to user mode. */
-	TRACE_IRQS_ON
-	jmp	swapgs_restore_regs_and_return_to_usermode
-END(entry_INT80_compat)
diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
deleted file mode 100644
index 2713490611a3b5361ee82f9d09555fc9f69953ae..0000000000000000000000000000000000000000
--- a/arch/x86/entry/thunk_32.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash)
- * Copyright 2008 by Steven Rostedt, Red Hat, Inc
- *  (inspired by Andi Kleen's thunk_64.S)
- */
-	#include <linux/linkage.h>
-	#include <asm/asm.h>
-	#include <asm/export.h>
-
-	/* put return address in eax (arg1) */
-	.macro THUNK name, func, put_ret_addr_in_eax=0
-	.globl \name
-\name:
-	pushl %eax
-	pushl %ecx
-	pushl %edx
-
-	.if \put_ret_addr_in_eax
-	/* Place EIP in the arg1 */
-	movl 3*4(%esp), %eax
-	.endif
-
-	call \func
-	popl %edx
-	popl %ecx
-	popl %eax
-	ret
-	_ASM_NOKPROBE(\name)
-	.endm
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-	THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1
-	THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1
-#endif
-
-#ifdef CONFIG_PREEMPTION
-	THUNK ___preempt_schedule, preempt_schedule
-	THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
-	EXPORT_SYMBOL(___preempt_schedule)
-	EXPORT_SYMBOL(___preempt_schedule_notrace)
-#endif
-
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
deleted file mode 100644
index ea5c4167086c20325a5cccd5579e6bb9c152b42c..0000000000000000000000000000000000000000
--- a/arch/x86/entry/thunk_64.S
+++ /dev/null
@@ -1,72 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Save registers before calling assembly functions. This avoids
- * disturbance of register allocation in some inline assembly constructs.
- * Copyright 2001,2002 by Andi Kleen, SuSE Labs.
- * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc.
- */
-#include <linux/linkage.h>
-#include "calling.h"
-#include <asm/asm.h>
-#include <asm/export.h>
-
-	/* rdi:	arg1 ... normal C conventions. rax is saved/restored. */
-	.macro THUNK name, func, put_ret_addr_in_rdi=0
-	ENTRY(\name)
-	pushq %rbp
-	movq %rsp, %rbp
-
-	pushq %rdi
-	pushq %rsi
-	pushq %rdx
-	pushq %rcx
-	pushq %rax
-	pushq %r8
-	pushq %r9
-	pushq %r10
-	pushq %r11
-
-	.if \put_ret_addr_in_rdi
-	/* 8(%rbp) is return addr on stack */
-	movq 8(%rbp), %rdi
-	.endif
-
-	call \func
-	jmp  .L_restore
-	ENDPROC(\name)
-	_ASM_NOKPROBE(\name)
-	.endm
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-	THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1
-	THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1
-#endif
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	THUNK lockdep_sys_exit_thunk,lockdep_sys_exit
-#endif
-
-#ifdef CONFIG_PREEMPTION
-	THUNK ___preempt_schedule, preempt_schedule
-	THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
-	EXPORT_SYMBOL(___preempt_schedule)
-	EXPORT_SYMBOL(___preempt_schedule_notrace)
-#endif
-
-#if defined(CONFIG_TRACE_IRQFLAGS) \
- || defined(CONFIG_DEBUG_LOCK_ALLOC) \
- || defined(CONFIG_PREEMPTION)
-.L_restore:
-	popq %r11
-	popq %r10
-	popq %r9
-	popq %r8
-	popq %rax
-	popq %rcx
-	popq %rdx
-	popq %rsi
-	popq %rdi
-	popq %rbp
-	ret
-	_ASM_NOKPROBE(.L_restore)
-#endif
diff --git a/arch/x86/entry/vdso/checkundef.sh b/arch/x86/entry/vdso/checkundef.sh
old mode 100755
new mode 100644
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
deleted file mode 100644
index 8ef84906450198974f922a1b34d5a9ae21db2ba8..0000000000000000000000000000000000000000
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ /dev/null
@@ -1,100 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/vdso.h>
-
-/*
- * Linker script for vDSO.  This is an ELF shared object prelinked to
- * its virtual address, and with only one read-only segment.
- * This script controls its layout.
- */
-
-SECTIONS
-{
-	/*
-	 * User/kernel shared data is before the vDSO.  This may be a little
-	 * uglier than putting it after the vDSO, but it avoids issues with
-	 * non-allocatable things that dangle past the end of the PT_LOAD
-	 * segment.
-	 */
-
-	vvar_start = . - 3 * PAGE_SIZE;
-	vvar_page = vvar_start;
-
-	/* Place all vvars at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
-#define __VVAR_KERNEL_LDS
-#include <asm/vvar.h>
-#undef __VVAR_KERNEL_LDS
-#undef EMIT_VVAR
-
-	pvclock_page = vvar_start + PAGE_SIZE;
-	hvclock_page = vvar_start + 2 * PAGE_SIZE;
-
-	. = SIZEOF_HEADERS;
-
-	.hash		: { *(.hash) }			:text
-	.gnu.hash	: { *(.gnu.hash) }
-	.dynsym		: { *(.dynsym) }
-	.dynstr		: { *(.dynstr) }
-	.gnu.version	: { *(.gnu.version) }
-	.gnu.version_d	: { *(.gnu.version_d) }
-	.gnu.version_r	: { *(.gnu.version_r) }
-
-	.dynamic	: { *(.dynamic) }		:text	:dynamic
-
-	.rodata		: {
-		*(.rodata*)
-		*(.data*)
-		*(.sdata*)
-		*(.got.plt) *(.got)
-		*(.gnu.linkonce.d.*)
-		*(.bss*)
-		*(.dynbss*)
-		*(.gnu.linkonce.b.*)
-	}						:text
-
-	.note		: { *(.note.*) }		:text	:note
-
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-
-
-	/*
-	 * Text is well-separated from actual data: there's plenty of
-	 * stuff that isn't used at runtime in between.
-	 */
-
-	.text		: {
-		*(.text*)
-		*(.fixup)
-	}						:text	=0x90909090,
-
-
-
-	.altinstructions	: { *(.altinstructions) }	:text
-	.altinstr_replacement	: { *(.altinstr_replacement) }	:text
-
-	__ex_table		: { *(__ex_table) }		:text
-
-	/DISCARD/ : {
-		*(.discard)
-		*(.discard.*)
-		*(__bug_table)
-	}
-}
-
-/*
- * Very old versions of ld do not recognize this name token; use the constant.
- */
-#define PT_GNU_EH_FRAME	0x6474e550
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
-	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
-	note		PT_NOTE		FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
-}
diff --git a/arch/x86/entry/vdso/vdso-note.S b/arch/x86/entry/vdso/vdso-note.S
deleted file mode 100644
index 79423170118f7f5fbdca28160c08523da49abdab..0000000000000000000000000000000000000000
--- a/arch/x86/entry/vdso/vdso-note.S
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/build-salt.h>
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
-	.long LINUX_VERSION_CODE
-ELFNOTE_END
-
-BUILD_SALT
diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
deleted file mode 100644
index 4bf48462fca7a133c39fd850d4d3ed55959d29cf..0000000000000000000000000000000000000000
--- a/arch/x86/entry/vdso/vdso.lds.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Linker script for 64-bit vDSO.
- * We #include the file to define the layout details.
- *
- * This file defines the version script giving the user-exported symbols in
- * the DSO.
- */
-
-#define BUILD_VDSO64
-
-#include "vdso-layout.lds.S"
-
-/*
- * This controls what userland symbols we export from the vDSO.
- */
-VERSION {
-	LINUX_2.6 {
-	global:
-		clock_gettime;
-		__vdso_clock_gettime;
-		gettimeofday;
-		__vdso_gettimeofday;
-		getcpu;
-		__vdso_getcpu;
-		time;
-		__vdso_time;
-		clock_getres;
-		__vdso_clock_getres;
-		__vdso_sgx_enter_enclave;
-	local: *;
-	};
-}
diff --git a/arch/x86/entry/vdso/vdso32/note.S b/arch/x86/entry/vdso/vdso32/note.S
deleted file mode 100644
index e78047d119f6b5d58dc9386ff34e845eada4f907..0000000000000000000000000000000000000000
--- a/arch/x86/entry/vdso/vdso32/note.S
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/build-salt.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-/* Ideally this would use UTS_NAME, but using a quoted string here
-   doesn't work. Remember to change this when changing the
-   kernel's name. */
-ELFNOTE_START(Linux, 0, "a")
-	.long LINUX_VERSION_CODE
-ELFNOTE_END
-
-BUILD_SALT
-
-#ifdef CONFIG_XEN
-/*
- * Add a special note telling glibc's dynamic linker a fake hardware
- * flavor that it will use to choose the search path for libraries in the
- * same way it uses real hardware capabilities like "mmx".
- * We supply "nosegneg" as the fake capability, to indicate that we
- * do not like negative offsets in instructions using segment overrides,
- * since we implement those inefficiently.  This makes it possible to
- * install libraries optimized to avoid those access patterns in someplace
- * like /lib/i686/tls/nosegneg.  Note that an /etc/ld.so.conf.d/file
- * corresponding to the bits here is needed to make ldconfig work right.
- * It should contain:
- *	hwcap 1 nosegneg
- * to match the mapping of bit to name that we give here.
- *
- * At runtime, the fake hardware feature will be considered to be present
- * if its bit is set in the mask word.  So, we start with the mask 0, and
- * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
- */
-
-#include "../../xen/vdso.h"	/* Defines VDSO_NOTE_NONEGSEG_BIT.  */
-
-ELFNOTE_START(GNU, 2, "a")
-	.long 1			/* ncaps */
-VDSO32_NOTE_MASK:		/* Symbol used by arch/x86/xen/setup.c */
-	.long 0			/* mask */
-	.byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg"	/* bit, name */
-ELFNOTE_END
-#endif
diff --git a/arch/x86/entry/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S
deleted file mode 100644
index c3233ee98a6b9399ef2cfcbdb6e5ff030fb9c617..0000000000000000000000000000000000000000
--- a/arch/x86/entry/vdso/vdso32/sigreturn.S
+++ /dev/null
@@ -1,138 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/unistd_32.h>
-#include <asm/asm-offsets.h>
-
-#ifndef SYSCALL_ENTER_KERNEL
-#define	SYSCALL_ENTER_KERNEL	int $0x80
-#endif
-
-	.text
-	.globl __kernel_sigreturn
-	.type __kernel_sigreturn,@function
-	nop /* this guy is needed for .LSTARTFDEDLSI1 below (watch for HACK) */
-	ALIGN
-__kernel_sigreturn:
-.LSTART_sigreturn:
-	popl %eax		/* XXX does this mean it needs unwind info? */
-	movl $__NR_sigreturn, %eax
-	SYSCALL_ENTER_KERNEL
-.LEND_sigreturn:
-	nop
-	.size __kernel_sigreturn,.-.LSTART_sigreturn
-
-	.globl __kernel_rt_sigreturn
-	.type __kernel_rt_sigreturn,@function
-	ALIGN
-__kernel_rt_sigreturn:
-.LSTART_rt_sigreturn:
-	movl $__NR_rt_sigreturn, %eax
-	SYSCALL_ENTER_KERNEL
-.LEND_rt_sigreturn:
-	nop
-	.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
-	.previous
-
-	.section .eh_frame,"a",@progbits
-.LSTARTFRAMEDLSI1:
-	.long .LENDCIEDLSI1-.LSTARTCIEDLSI1
-.LSTARTCIEDLSI1:
-	.long 0			/* CIE ID */
-	.byte 1			/* Version number */
-	.string "zRS"		/* NUL-terminated augmentation string */
-	.uleb128 1		/* Code alignment factor */
-	.sleb128 -4		/* Data alignment factor */
-	.byte 8			/* Return address register column */
-	.uleb128 1		/* Augmentation value length */
-	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
-	.byte 0			/* DW_CFA_nop */
-	.align 4
-.LENDCIEDLSI1:
-	.long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */
-.LSTARTFDEDLSI1:
-	.long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */
-	/* HACK: The dwarf2 unwind routines will subtract 1 from the
-	   return address to get an address in the middle of the
-	   presumed call instruction.  Since we didn't get here via
-	   a call, we need to include the nop before the real start
-	   to make up for it.  */
-	.long .LSTART_sigreturn-1-.	/* PC-relative start address */
-	.long .LEND_sigreturn-.LSTART_sigreturn+1
-	.uleb128 0			/* Augmentation */
-	/* What follows are the instructions for the table generation.
-	   We record the locations of each register saved.  This is
-	   complicated by the fact that the "CFA" is always assumed to
-	   be the value of the stack pointer in the caller.  This means
-	   that we must define the CFA of this body of code to be the
-	   saved value of the stack pointer in the sigcontext.  Which
-	   also means that there is no fixed relation to the other
-	   saved registers, which means that we must use DW_CFA_expression
-	   to compute their addresses.  It also means that when we
-	   adjust the stack with the popl, we have to do it all over again.  */
-
-#define do_cfa_expr(offset)						\
-	.byte 0x0f;			/* DW_CFA_def_cfa_expression */	\
-	.uleb128 1f-0f;			/*   length */			\
-0:	.byte 0x74;			/*     DW_OP_breg4 */		\
-	.sleb128 offset;		/*      offset */		\
-	.byte 0x06;			/*     DW_OP_deref */		\
-1:
-
-#define do_expr(regno, offset)						\
-	.byte 0x10;			/* DW_CFA_expression */		\
-	.uleb128 regno;			/*   regno */			\
-	.uleb128 1f-0f;			/*   length */			\
-0:	.byte 0x74;			/*     DW_OP_breg4 */		\
-	.sleb128 offset;		/*       offset */		\
-1:
-
-	do_cfa_expr(IA32_SIGCONTEXT_sp+4)
-	do_expr(0, IA32_SIGCONTEXT_ax+4)
-	do_expr(1, IA32_SIGCONTEXT_cx+4)
-	do_expr(2, IA32_SIGCONTEXT_dx+4)
-	do_expr(3, IA32_SIGCONTEXT_bx+4)
-	do_expr(5, IA32_SIGCONTEXT_bp+4)
-	do_expr(6, IA32_SIGCONTEXT_si+4)
-	do_expr(7, IA32_SIGCONTEXT_di+4)
-	do_expr(8, IA32_SIGCONTEXT_ip+4)
-
-	.byte 0x42	/* DW_CFA_advance_loc 2 -- nop; popl eax. */
-
-	do_cfa_expr(IA32_SIGCONTEXT_sp)
-	do_expr(0, IA32_SIGCONTEXT_ax)
-	do_expr(1, IA32_SIGCONTEXT_cx)
-	do_expr(2, IA32_SIGCONTEXT_dx)
-	do_expr(3, IA32_SIGCONTEXT_bx)
-	do_expr(5, IA32_SIGCONTEXT_bp)
-	do_expr(6, IA32_SIGCONTEXT_si)
-	do_expr(7, IA32_SIGCONTEXT_di)
-	do_expr(8, IA32_SIGCONTEXT_ip)
-
-	.align 4
-.LENDFDEDLSI1:
-
-	.long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */
-.LSTARTFDEDLSI2:
-	.long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */
-	/* HACK: See above wrt unwind library assumptions.  */
-	.long .LSTART_rt_sigreturn-1-.	/* PC-relative start address */
-	.long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
-	.uleb128 0			/* Augmentation */
-	/* What follows are the instructions for the table generation.
-	   We record the locations of each register saved.  This is
-	   slightly less complicated than the above, since we don't
-	   modify the stack pointer in the process.  */
-
-	do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_sp)
-	do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ax)
-	do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_cx)
-	do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_dx)
-	do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bx)
-	do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bp)
-	do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_si)
-	do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_di)
-	do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ip)
-
-	.align 4
-.LENDFDEDLSI2:
-	.previous
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
deleted file mode 100644
index 263d7433dea8587b237aefc184a0247a4fac1fbf..0000000000000000000000000000000000000000
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ /dev/null
@@ -1,85 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * AT_SYSINFO entry point
-*/
-
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
-
-	.text
-	.globl __kernel_vsyscall
-	.type __kernel_vsyscall,@function
-	ALIGN
-__kernel_vsyscall:
-	CFI_STARTPROC
-	/*
-	 * Reshuffle regs so that all of any of the entry instructions
-	 * will preserve enough state.
-	 *
-	 * A really nice entry sequence would be:
-	 *  pushl %edx
-	 *  pushl %ecx
-	 *  movl  %esp, %ecx
-	 *
-	 * Unfortunately, naughty Android versions between July and December
-	 * 2015 actually hardcode the traditional Linux SYSENTER entry
-	 * sequence.  That is severely broken for a number of reasons (ask
-	 * anyone with an AMD CPU, for example).  Nonetheless, we try to keep
-	 * it working approximately as well as it ever worked.
-	 *
-	 * This link may eludicate some of the history:
-	 *   https://android-review.googlesource.com/#/q/Iac3295376d61ef83e713ac9b528f3b50aa780cd7
-	 * personally, I find it hard to understand what's going on there.
-	 *
-	 * Note to future user developers: DO NOT USE SYSENTER IN YOUR CODE.
-	 * Execute an indirect call to the address in the AT_SYSINFO auxv
-	 * entry.  That is the ONLY correct way to make a fast 32-bit system
-	 * call on Linux.  (Open-coding int $0x80 is also fine, but it's
-	 * slow.)
-	 */
-	pushl	%ecx
-	CFI_ADJUST_CFA_OFFSET	4
-	CFI_REL_OFFSET		ecx, 0
-	pushl	%edx
-	CFI_ADJUST_CFA_OFFSET	4
-	CFI_REL_OFFSET		edx, 0
-	pushl	%ebp
-	CFI_ADJUST_CFA_OFFSET	4
-	CFI_REL_OFFSET		ebp, 0
-
-	#define SYSENTER_SEQUENCE	"movl %esp, %ebp; sysenter"
-	#define SYSCALL_SEQUENCE	"movl %ecx, %ebp; syscall"
-
-#ifdef CONFIG_X86_64
-	/* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */
-	ALTERNATIVE_2 "", SYSENTER_SEQUENCE, X86_FEATURE_SYSENTER32, \
-	                  SYSCALL_SEQUENCE,  X86_FEATURE_SYSCALL32
-#else
-	ALTERNATIVE "", SYSENTER_SEQUENCE, X86_FEATURE_SEP
-#endif
-
-	/* Enter using int $0x80 */
-	int	$0x80
-GLOBAL(int80_landing_pad)
-
-	/*
-	 * Restore EDX and ECX in case they were clobbered.  EBP is not
-	 * clobbered (the kernel restores it), but it's cleaner and
-	 * probably faster to pop it than to adjust ESP using addl.
-	 */
-	popl	%ebp
-	CFI_RESTORE		ebp
-	CFI_ADJUST_CFA_OFFSET	-4
-	popl	%edx
-	CFI_RESTORE		edx
-	CFI_ADJUST_CFA_OFFSET	-4
-	popl	%ecx
-	CFI_RESTORE		ecx
-	CFI_ADJUST_CFA_OFFSET	-4
-	ret
-	CFI_ENDPROC
-
-	.size __kernel_vsyscall,.-__kernel_vsyscall
-	.previous
diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
deleted file mode 100644
index c7720995ab1af9362ae9cfbcc99d8c05ff595895..0000000000000000000000000000000000000000
--- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Linker script for 32-bit vDSO.
- * We #include the file to define the layout details.
- *
- * This file defines the version script giving the user-exported symbols in
- * the DSO.
- */
-
-#include <asm/page.h>
-
-#define BUILD_VDSO32
-
-#include "../vdso-layout.lds.S"
-
-/* The ELF entry point can be used to set the AT_SYSINFO value.  */
-ENTRY(__kernel_vsyscall);
-
-/*
- * This controls what userland symbols we export from the vDSO.
- */
-VERSION
-{
-	LINUX_2.6 {
-	global:
-		__vdso_clock_gettime;
-		__vdso_gettimeofday;
-		__vdso_time;
-		__vdso_clock_getres;
-		__vdso_clock_gettime64;
-	};
-
-	LINUX_2.5 {
-	global:
-		__kernel_vsyscall;
-		__kernel_sigreturn;
-		__kernel_rt_sigreturn;
-	local: *;
-	};
-}
diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S
deleted file mode 100644
index 16a8050a4fb65bab6ee26154420492a95f5e01f4..0000000000000000000000000000000000000000
--- a/arch/x86/entry/vdso/vdsox32.lds.S
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Linker script for x32 vDSO.
- * We #include the file to define the layout details.
- *
- * This file defines the version script giving the user-exported symbols in
- * the DSO.
- */
-
-#define BUILD_VDSOX32
-
-#include "vdso-layout.lds.S"
-
-/*
- * This controls what userland symbols we export from the vDSO.
- */
-VERSION {
-	LINUX_2.6 {
-	global:
-		__vdso_clock_gettime;
-		__vdso_gettimeofday;
-		__vdso_getcpu;
-		__vdso_time;
-		__vdso_clock_getres;
-	local: *;
-	};
-}
diff --git a/arch/x86/entry/vdso/vsgx.S b/arch/x86/entry/vdso/vsgx.S
deleted file mode 100644
index 9d54ae436ef4ca37813a8eb27f87c201c164dca4..0000000000000000000000000000000000000000
--- a/arch/x86/entry/vdso/vsgx.S
+++ /dev/null
@@ -1,151 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#include <linux/linkage.h>
-#include <asm/export.h>
-#include <asm/errno.h>
-#include <asm/enclu.h>
-
-#include "extable.h"
-
-/* Relative to %rbp. */
-#define SGX_ENCLAVE_OFFSET_OF_RUN		16
-
-/* The offsets relative to struct sgx_enclave_run. */
-#define SGX_ENCLAVE_RUN_TCS			0
-#define SGX_ENCLAVE_RUN_LEAF			8
-#define SGX_ENCLAVE_RUN_EXCEPTION_VECTOR	12
-#define SGX_ENCLAVE_RUN_EXCEPTION_ERROR_CODE	14
-#define SGX_ENCLAVE_RUN_EXCEPTION_ADDR		16
-#define SGX_ENCLAVE_RUN_USER_HANDLER		24
-#define SGX_ENCLAVE_RUN_USER_DATA		32	/* not used */
-#define SGX_ENCLAVE_RUN_RESERVED_START		40
-#define SGX_ENCLAVE_RUN_RESERVED_END		256
-
-.code64
-.section .text, "ax"
-
-ENTRY(__vdso_sgx_enter_enclave)
-	/* Prolog */
-	.cfi_startproc
-	push	%rbp
-	.cfi_adjust_cfa_offset	8
-	.cfi_rel_offset		%rbp, 0
-	mov	%rsp, %rbp
-	.cfi_def_cfa_register	%rbp
-	push	%rbx
-	.cfi_rel_offset		%rbx, -8
-
-	mov	%ecx, %eax
-.Lenter_enclave:
-	/* EENTER <= function <= ERESUME */
-	cmp	$EENTER, %eax
-	jb	.Linvalid_input
-	cmp	$ERESUME, %eax
-	ja	.Linvalid_input
-
-	mov	SGX_ENCLAVE_OFFSET_OF_RUN(%rbp), %rcx
-
-	/* Validate that the reserved area contains only zeros. */
-	mov	$SGX_ENCLAVE_RUN_RESERVED_START, %rbx
-1:
-	cmpq	$0, (%rcx, %rbx)
-	jne	.Linvalid_input
-	add	$8, %rbx
-	cmpq	$SGX_ENCLAVE_RUN_RESERVED_END, %rbx
-	jne	1b
-
-	/* Load TCS and AEP */
-	mov	SGX_ENCLAVE_RUN_TCS(%rcx), %rbx
-	lea	.Lasync_exit_pointer(%rip), %rcx
-
-	/* Single ENCLU serving as both EENTER and AEP (ERESUME) */
-.Lasync_exit_pointer:
-.Lenclu_eenter_eresume:
-	enclu
-
-	/* EEXIT jumps here unless the enclave is doing something fancy. */
-	mov	SGX_ENCLAVE_OFFSET_OF_RUN(%rbp), %rbx
-
-	/* Set exit_reason. */
-	movl	$EEXIT, SGX_ENCLAVE_RUN_LEAF(%rbx)
-
-	/* Invoke userspace's exit handler if one was provided. */
-.Lhandle_exit:
-	cmpq	$0, SGX_ENCLAVE_RUN_USER_HANDLER(%rbx)
-	jne	.Linvoke_userspace_handler
-
-	/* Success, in the sense that ENCLU was attempted. */
-	xor	%eax, %eax
-
-.Lout:
-	pop	%rbx
-	leave
-	.cfi_def_cfa		%rsp, 8
-	ret
-
-	/* The out-of-line code runs with the pre-leave stack frame. */
-	.cfi_def_cfa		%rbp, 16
-
-.Linvalid_input:
-	mov	$(-EINVAL), %eax
-	jmp	.Lout
-
-.Lhandle_exception:
-	mov	SGX_ENCLAVE_OFFSET_OF_RUN(%rbp), %rbx
-
-	/* Set the exception info. */
-	mov	%eax, (SGX_ENCLAVE_RUN_LEAF)(%rbx)
-	mov	%di,  (SGX_ENCLAVE_RUN_EXCEPTION_VECTOR)(%rbx)
-	mov	%si,  (SGX_ENCLAVE_RUN_EXCEPTION_ERROR_CODE)(%rbx)
-	mov	%rdx, (SGX_ENCLAVE_RUN_EXCEPTION_ADDR)(%rbx)
-	jmp	.Lhandle_exit
-
-.Linvoke_userspace_handler:
-	/* Pass the untrusted RSP (at exit) to the callback via %rcx. */
-	mov	%rsp, %rcx
-
-	/* Save struct sgx_enclave_exception %rbx is about to be clobbered. */
-	mov	%rbx, %rax
-
-	/* Save the untrusted RSP offset in %rbx (non-volatile register). */
-	mov	%rsp, %rbx
-	and	$0xf, %rbx
-
-	/*
-	 * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned
-	 * _after_ pushing the parameters on the stack, hence the bonus push.
-	 */
-	and	$-0x10, %rsp
-	push	%rax
-
-	/* Push struct sgx_enclave_exception as a param to the callback. */
-	push	%rax
-
-	/* Clear RFLAGS.DF per x86_64 ABI */
-	cld
-
-	/*
-	 * Load the callback pointer to %rax and lfence for LVI (load value
-	 * injection) protection before making the call.
-	 */
-	mov	SGX_ENCLAVE_RUN_USER_HANDLER(%rax), %rax
-	lfence
-	call	*%rax
-
-	/* Undo the post-exit %rsp adjustment. */
-	lea	0x10(%rsp, %rbx), %rsp
-
-	/*
-	 * If the return from callback is zero or negative, return immediately,
-	 * else re-execute ENCLU with the postive return value interpreted as
-	 * the requested ENCLU function.
-	 */
-	cmp	$0, %eax
-	jle	.Lout
-	jmp	.Lenter_enclave
-
-	.cfi_endproc
-
-_ASM_VDSO_EXTABLE_HANDLE(.Lenclu_eenter_eresume, .Lhandle_exception)
-
-ENDPROC(__vdso_sgx_enter_enclave)
diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S
deleted file mode 100644
index 2e203f3a25a7b089e4bdd2b0f7b733395bf0e439..0000000000000000000000000000000000000000
--- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * vsyscall_emu_64.S: Vsyscall emulation page
- *
- * Copyright (c) 2011 Andy Lutomirski
- */
-
-#include <linux/linkage.h>
-
-#include <asm/irq_vectors.h>
-#include <asm/page_types.h>
-#include <asm/unistd_64.h>
-
-__PAGE_ALIGNED_DATA
-	.globl __vsyscall_page
-	.balign PAGE_SIZE, 0xcc
-	.type __vsyscall_page, @object
-__vsyscall_page:
-
-	mov $__NR_gettimeofday, %rax
-	syscall
-	ret
-
-	.balign 1024, 0xcc
-	mov $__NR_time, %rax
-	syscall
-	ret
-
-	.balign 1024, 0xcc
-	mov $__NR_getcpu, %rax
-	syscall
-	ret
-
-	.balign 4096, 0xcc
-
-	.size __vsyscall_page, 4096
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
deleted file mode 100644
index 5b076cb79f5fb82199bce908b1e651d57a176d37..0000000000000000000000000000000000000000
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-	.text
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/page_types.h>
-
-# Copyright 2003, 2008 Pavel Machek <pavel@suse.cz
-
-	.code32
-	ALIGN
-
-SYM_CODE_START(wakeup_pmode_return)
-	movw	$__KERNEL_DS, %ax
-	movw	%ax, %ss
-	movw	%ax, %fs
-	movw	%ax, %gs
-
-	movw	$__USER_DS, %ax
-	movw	%ax, %ds
-	movw	%ax, %es
-
-	# reload the gdt, as we need the full 32 bit address
-	lidt	saved_idt
-	lldt	saved_ldt
-	ljmp	$(__KERNEL_CS), $1f
-1:
-	movl	%cr3, %eax
-	movl	%eax, %cr3
-	wbinvd
-
-	# and restore the stack ... but you need gdt for this to work
-	movl	saved_context_esp, %esp
-
-	movl	%cs:saved_magic, %eax
-	cmpl	$0x12345678, %eax
-	jne	bogus_magic
-
-	# jump to place where we left off
-	movl	saved_eip, %eax
-	jmp	*%eax
-SYM_CODE_END(wakeup_pmode_return)
-
-bogus_magic:
-	jmp	bogus_magic
-
-
-
-save_registers:
-	sidt	saved_idt
-	sldt	saved_ldt
-	str	saved_tss
-
-	leal	4(%esp), %eax
-	movl	%eax, saved_context_esp
-	movl	%ebx, saved_context_ebx
-	movl	%ebp, saved_context_ebp
-	movl	%esi, saved_context_esi
-	movl	%edi, saved_context_edi
-	pushfl
-	popl	saved_context_eflags
-
-	movl	$ret_point, saved_eip
-	ret
-
-
-restore_registers:
-	movl	saved_context_ebp, %ebp
-	movl	saved_context_ebx, %ebx
-	movl	saved_context_esi, %esi
-	movl	saved_context_edi, %edi
-	pushl	saved_context_eflags
-	popfl
-	ret
-
-SYM_CODE_START(do_suspend_lowlevel)
-	call	save_processor_state
-	call	save_registers
-	pushl	$3
-	call	x86_acpi_enter_sleep_state
-	addl	$4, %esp
-
-#	In case of S3 failure, we'll emerge here.  Jump
-# 	to ret_point to recover
-	jmp	ret_point
-	.p2align 4,,7
-ret_point:
-	call	restore_registers
-	call	restore_processor_state
-	ret
-SYM_CODE_END(do_suspend_lowlevel)
-
-.data
-ALIGN
-ENTRY(saved_magic)	.long	0
-saved_eip:		.long 0
-
-# saved registers
-saved_idt:	.long	0,0
-saved_ldt:	.long	0
-saved_tss:	.long	0
-
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
deleted file mode 100644
index 7f9ade13bbcfefde7d940840f6c104b207941e84..0000000000000000000000000000000000000000
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ /dev/null
@@ -1,139 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-.text
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/pgtable_types.h>
-#include <asm/page_types.h>
-#include <asm/msr.h>
-#include <asm/asm-offsets.h>
-#include <asm/frame.h>
-
-# Copyright 2003 Pavel Machek <pavel@suse.cz
-
-.code64
-	/*
-	 * Hooray, we are in Long 64-bit mode (but still running in low memory)
-	 */
-ENTRY(wakeup_long64)
-	movq	saved_magic, %rax
-	movq	$0x123456789abcdef0, %rdx
-	cmpq	%rdx, %rax
-	je	2f
-
-	/* stop here on a saved_magic mismatch */
-	movq $0xbad6d61676963, %rcx
-1:
-	jmp 1b
-2:
-	movw	$__KERNEL_DS, %ax
-	movw	%ax, %ss	
-	movw	%ax, %ds
-	movw	%ax, %es
-	movw	%ax, %fs
-	movw	%ax, %gs
-	movq	saved_rsp, %rsp
-
-	movq	saved_rbx, %rbx
-	movq	saved_rdi, %rdi
-	movq	saved_rsi, %rsi
-	movq	saved_rbp, %rbp
-
-	movq	saved_rip, %rax
-	jmp	*%rax
-ENDPROC(wakeup_long64)
-
-ENTRY(do_suspend_lowlevel)
-	FRAME_BEGIN
-	subq	$8, %rsp
-	xorl	%eax, %eax
-	call	save_processor_state
-
-	movq	$saved_context, %rax
-	movq	%rsp, pt_regs_sp(%rax)
-	movq	%rbp, pt_regs_bp(%rax)
-	movq	%rsi, pt_regs_si(%rax)
-	movq	%rdi, pt_regs_di(%rax)
-	movq	%rbx, pt_regs_bx(%rax)
-	movq	%rcx, pt_regs_cx(%rax)
-	movq	%rdx, pt_regs_dx(%rax)
-	movq	%r8, pt_regs_r8(%rax)
-	movq	%r9, pt_regs_r9(%rax)
-	movq	%r10, pt_regs_r10(%rax)
-	movq	%r11, pt_regs_r11(%rax)
-	movq	%r12, pt_regs_r12(%rax)
-	movq	%r13, pt_regs_r13(%rax)
-	movq	%r14, pt_regs_r14(%rax)
-	movq	%r15, pt_regs_r15(%rax)
-	pushfq
-	popq	pt_regs_flags(%rax)
-
-	movq	$.Lresume_point, saved_rip(%rip)
-
-	movq	%rsp, saved_rsp
-	movq	%rbp, saved_rbp
-	movq	%rbx, saved_rbx
-	movq	%rdi, saved_rdi
-	movq	%rsi, saved_rsi
-
-	addq	$8, %rsp
-	movl	$3, %edi
-	xorl	%eax, %eax
-	call	x86_acpi_enter_sleep_state
-	/* in case something went wrong, restore the machine status and go on */
-	jmp	.Lresume_point
-
-	.align 4
-.Lresume_point:
-	/* We don't restore %rax, it must be 0 anyway */
-	movq	$saved_context, %rax
-	movq	saved_context_cr4(%rax), %rbx
-	movq	%rbx, %cr4
-	movq	saved_context_cr3(%rax), %rbx
-	movq	%rbx, %cr3
-	movq	saved_context_cr2(%rax), %rbx
-	movq	%rbx, %cr2
-	movq	saved_context_cr0(%rax), %rbx
-	movq	%rbx, %cr0
-	pushq	pt_regs_flags(%rax)
-	popfq
-	movq	pt_regs_sp(%rax), %rsp
-	movq	pt_regs_bp(%rax), %rbp
-	movq	pt_regs_si(%rax), %rsi
-	movq	pt_regs_di(%rax), %rdi
-	movq	pt_regs_bx(%rax), %rbx
-	movq	pt_regs_cx(%rax), %rcx
-	movq	pt_regs_dx(%rax), %rdx
-	movq	pt_regs_r8(%rax), %r8
-	movq	pt_regs_r9(%rax), %r9
-	movq	pt_regs_r10(%rax), %r10
-	movq	pt_regs_r11(%rax), %r11
-	movq	pt_regs_r12(%rax), %r12
-	movq	pt_regs_r13(%rax), %r13
-	movq	pt_regs_r14(%rax), %r14
-	movq	pt_regs_r15(%rax), %r15
-
-#ifdef CONFIG_KASAN
-	/*
-	 * The suspend path may have poisoned some areas deeper in the stack,
-	 * which we now need to unpoison.
-	 */
-	movq	%rsp, %rdi
-	call	kasan_unpoison_task_stack_below
-#endif
-
-	xorl	%eax, %eax
-	addq	$8, %rsp
-	FRAME_END
-	jmp	restore_processor_state
-ENDPROC(do_suspend_lowlevel)
-
-.data
-saved_rbp:		.quad	0
-saved_rsi:		.quad	0
-saved_rdi:		.quad	0
-saved_rbx:		.quad	0
-
-saved_rip:		.quad	0
-saved_rsp:		.quad	0
-
-ENTRY(saved_magic)	.quad	0
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
deleted file mode 100644
index 2cc0303522c99aded12608e4b4b49d1ab9cb5d08..0000000000000000000000000000000000000000
--- a/arch/x86/kernel/ftrace_32.S
+++ /dev/null
@@ -1,195 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  Copyright (C) 2017  Steven Rostedt, VMware Inc.
- */
-
-#include <linux/linkage.h>
-#include <asm/page_types.h>
-#include <asm/segment.h>
-#include <asm/export.h>
-#include <asm/ftrace.h>
-#include <asm/nospec-branch.h>
-#include <asm/frame.h>
-#include <asm/asm-offsets.h>
-
-# define function_hook	__fentry__
-EXPORT_SYMBOL(__fentry__)
-
-#ifdef CONFIG_FRAME_POINTER
-# define MCOUNT_FRAME			1	/* using frame = true  */
-#else
-# define MCOUNT_FRAME			0	/* using frame = false */
-#endif
-
-ENTRY(function_hook)
-	ret
-END(function_hook)
-
-ENTRY(ftrace_caller)
-
-#ifdef CONFIG_FRAME_POINTER
-	/*
-	 * Frame pointers are of ip followed by bp.
-	 * Since fentry is an immediate jump, we are left with
-	 * parent-ip, function-ip. We need to add a frame with
-	 * parent-ip followed by ebp.
-	 */
-	pushl	4(%esp)				/* parent ip */
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	2*4(%esp)			/* function ip */
-
-	/* For mcount, the function ip is directly above */
-	pushl	%ebp
-	movl	%esp, %ebp
-#endif
-	pushl	%eax
-	pushl	%ecx
-	pushl	%edx
-	pushl	$0				/* Pass NULL as regs pointer */
-
-#ifdef CONFIG_FRAME_POINTER
-	/* Load parent ebp into edx */
-	movl	4*4(%esp), %edx
-#else
-	/* There's no frame pointer, load the appropriate stack addr instead */
-	lea	4*4(%esp), %edx
-#endif
-
-	movl	(MCOUNT_FRAME+4)*4(%esp), %eax	/* load the rip */
-	/* Get the parent ip */
-	movl	4(%edx), %edx			/* edx has ebp */
-
-	movl	function_trace_op, %ecx
-	subl	$MCOUNT_INSN_SIZE, %eax
-
-.globl ftrace_call
-ftrace_call:
-	call	ftrace_stub
-
-	addl	$4, %esp			/* skip NULL pointer */
-	popl	%edx
-	popl	%ecx
-	popl	%eax
-#ifdef CONFIG_FRAME_POINTER
-	popl	%ebp
-	addl	$4,%esp				/* skip function ip */
-	popl	%ebp				/* this is the orig bp */
-	addl	$4, %esp			/* skip parent ip */
-#endif
-.Lftrace_ret:
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
-	jmp	ftrace_stub
-#endif
-
-/* This is weak to keep gas from relaxing the jumps */
-WEAK(ftrace_stub)
-	ret
-END(ftrace_caller)
-
-SYM_CODE_START(ftrace_regs_caller)
-	/*
-	 * We're here from an mcount/fentry CALL, and the stack frame looks like:
-	 *
-	 *  <previous context>
-	 *  RET-IP
-	 *
-	 * The purpose of this function is to call out in an emulated INT3
-	 * environment with a stack frame like:
-	 *
-	 *  <previous context>
-	 *  gap / RET-IP
-	 *  gap
-	 *  gap
-	 *  gap
-	 *  pt_regs
-	 *
-	 * We do _NOT_ restore: ss, flags, cs, gs, fs, es, ds
-	 */
-	subl	$3*4, %esp	# RET-IP + 3 gaps
-	pushl	%ss		# ss
-	pushl	%esp		# points at ss
-	addl	$5*4, (%esp)	#   make it point at <previous context>
-	pushfl			# flags
-	pushl	$__KERNEL_CS	# cs
-	pushl	7*4(%esp)	# ip <- RET-IP
-	pushl	$0		# orig_eax
-
-	pushl	%gs
-	pushl	%fs
-	pushl	%es
-	pushl	%ds
-
-	pushl	%eax
-	pushl	%ebp
-	pushl	%edi
-	pushl	%esi
-	pushl	%edx
-	pushl	%ecx
-	pushl	%ebx
-
-	ENCODE_FRAME_POINTER
-
-	movl	PT_EIP(%esp), %eax	# 1st argument: IP
-	subl	$MCOUNT_INSN_SIZE, %eax
-	movl	21*4(%esp), %edx	# 2nd argument: parent ip
-	movl	function_trace_op, %ecx	# 3rd argument: ftrace_pos
-	pushl	%esp			# 4th argument: pt_regs
-
-GLOBAL(ftrace_regs_call)
-	call	ftrace_stub
-
-	addl	$4, %esp		# skip 4th argument
-
-	/* place IP below the new SP */
-	movl	PT_OLDESP(%esp), %eax
-	movl	PT_EIP(%esp), %ecx
-	movl	%ecx, -4(%eax)
-
-	/* place EAX below that */
-	movl	PT_EAX(%esp), %ecx
-	movl	%ecx, -8(%eax)
-
-	popl	%ebx
-	popl	%ecx
-	popl	%edx
-	popl	%esi
-	popl	%edi
-	popl	%ebp
-
-	lea	-8(%eax), %esp
-	popl	%eax
-
-	jmp	.Lftrace_ret
-SYM_CODE_END(ftrace_regs_caller)
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
-	pushl	%eax
-	pushl	%ecx
-	pushl	%edx
-	movl	3*4(%esp), %eax
-	/* Even with frame pointers, fentry doesn't have one here */
-	lea	4*4(%esp), %edx
-	movl	$0, %ecx
-	subl	$MCOUNT_INSN_SIZE, %eax
-	call	prepare_ftrace_return
-	popl	%edx
-	popl	%ecx
-	popl	%eax
-	ret
-END(ftrace_graph_caller)
-
-.globl return_to_handler
-return_to_handler:
-	pushl	%eax
-	pushl	%edx
-	movl	$0, %eax
-	call	ftrace_return_to_handler
-	movl	%eax, %ecx
-	popl	%edx
-	popl	%eax
-	JMP_NOSPEC %ecx
-#endif
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
deleted file mode 100644
index 809d54397dba1c4786f6e204deeda60b410c8066..0000000000000000000000000000000000000000
--- a/arch/x86/kernel/ftrace_64.S
+++ /dev/null
@@ -1,316 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  Copyright (C) 2014  Steven Rostedt, Red Hat Inc
- */
-
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm/ftrace.h>
-#include <asm/export.h>
-#include <asm/nospec-branch.h>
-#include <asm/unwind_hints.h>
-#include <asm/frame.h>
-
-	.code64
-	.section .entry.text, "ax"
-
-# define function_hook	__fentry__
-EXPORT_SYMBOL(__fentry__)
-
-#ifdef CONFIG_FRAME_POINTER
-/* Save parent and function stack frames (rip and rbp) */
-#  define MCOUNT_FRAME_SIZE	(8+16*2)
-#else
-/* No need to save a stack frame */
-# define MCOUNT_FRAME_SIZE	0
-#endif /* CONFIG_FRAME_POINTER */
-
-/* Size of stack used to save mcount regs in save_mcount_regs */
-#define MCOUNT_REG_SIZE		(SS+8 + MCOUNT_FRAME_SIZE)
-
-/*
- * gcc -pg option adds a call to 'mcount' in most functions.
- * When -mfentry is used, the call is to 'fentry' and not 'mcount'
- * and is done before the function's stack frame is set up.
- * They both require a set of regs to be saved before calling
- * any C code and restored before returning back to the function.
- *
- * On boot up, all these calls are converted into nops. When tracing
- * is enabled, the call can jump to either ftrace_caller or
- * ftrace_regs_caller. Callbacks (tracing functions) that require
- * ftrace_regs_caller (like kprobes) need to have pt_regs passed to
- * it. For this reason, the size of the pt_regs structure will be
- * allocated on the stack and the required mcount registers will
- * be saved in the locations that pt_regs has them in.
- */
-
-/*
- * @added: the amount of stack added before calling this
- *
- * After this is called, the following registers contain:
- *
- *  %rdi - holds the address that called the trampoline
- *  %rsi - holds the parent function (traced function's return address)
- *  %rdx - holds the original %rbp
- */
-.macro save_mcount_regs added=0
-
-#ifdef CONFIG_FRAME_POINTER
-	/* Save the original rbp */
-	pushq %rbp
-
-	/*
-	 * Stack traces will stop at the ftrace trampoline if the frame pointer
-	 * is not set up properly. If fentry is used, we need to save a frame
-	 * pointer for the parent as well as the function traced, because the
-	 * fentry is called before the stack frame is set up, where as mcount
-	 * is called afterward.
-	 */
-
-	/* Save the parent pointer (skip orig rbp and our return address) */
-	pushq \added+8*2(%rsp)
-	pushq %rbp
-	movq %rsp, %rbp
-	/* Save the return address (now skip orig rbp, rbp and parent) */
-	pushq \added+8*3(%rsp)
-	pushq %rbp
-	movq %rsp, %rbp
-#endif /* CONFIG_FRAME_POINTER */
-
-	/*
-	 * We add enough stack to save all regs.
-	 */
-	subq $(MCOUNT_REG_SIZE - MCOUNT_FRAME_SIZE), %rsp
-	movq %rax, RAX(%rsp)
-	movq %rcx, RCX(%rsp)
-	movq %rdx, RDX(%rsp)
-	movq %rsi, RSI(%rsp)
-	movq %rdi, RDI(%rsp)
-	movq %r8, R8(%rsp)
-	movq %r9, R9(%rsp)
-	/*
-	 * Save the original RBP. Even though the mcount ABI does not
-	 * require this, it helps out callers.
-	 */
-#ifdef CONFIG_FRAME_POINTER
-	movq MCOUNT_REG_SIZE-8(%rsp), %rdx
-#else
-	movq %rbp, %rdx
-#endif
-	movq %rdx, RBP(%rsp)
-
-	/* Copy the parent address into %rsi (second parameter) */
-	movq MCOUNT_REG_SIZE+8+\added(%rsp), %rsi
-
-	 /* Move RIP to its proper location */
-	movq MCOUNT_REG_SIZE+\added(%rsp), %rdi
-	movq %rdi, RIP(%rsp)
-
-	/*
-	 * Now %rdi (the first parameter) has the return address of
-	 * where ftrace_call returns. But the callbacks expect the
-	 * address of the call itself.
-	 */
-	subq $MCOUNT_INSN_SIZE, %rdi
-	.endm
-
-.macro restore_mcount_regs
-	movq R9(%rsp), %r9
-	movq R8(%rsp), %r8
-	movq RDI(%rsp), %rdi
-	movq RSI(%rsp), %rsi
-	movq RDX(%rsp), %rdx
-	movq RCX(%rsp), %rcx
-	movq RAX(%rsp), %rax
-
-	/* ftrace_regs_caller can modify %rbp */
-	movq RBP(%rsp), %rbp
-
-	addq $MCOUNT_REG_SIZE, %rsp
-
-	.endm
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-ENTRY(function_hook)
-	retq
-ENDPROC(function_hook)
-
-ENTRY(ftrace_caller)
-	/* save_mcount_regs fills in first two parameters */
-	save_mcount_regs
-
-GLOBAL(ftrace_caller_op_ptr)
-	/* Load the ftrace_ops into the 3rd parameter */
-	movq function_trace_op(%rip), %rdx
-
-	/* regs go into 4th parameter (but make it NULL) */
-	movq $0, %rcx
-
-GLOBAL(ftrace_call)
-	call ftrace_stub
-
-	restore_mcount_regs
-
-	/*
-	 * The code up to this label is copied into trampolines so
-	 * think twice before adding any new code or changing the
-	 * layout here.
-	 */
-GLOBAL(ftrace_epilogue)
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-GLOBAL(ftrace_graph_call)
-	jmp ftrace_stub
-#endif
-
-/*
- * This is weak to keep gas from relaxing the jumps.
- * It is also used to copy the retq for trampolines.
- */
-WEAK(ftrace_stub)
-	retq
-ENDPROC(ftrace_caller)
-
-ENTRY(ftrace_regs_caller)
-	/* Save the current flags before any operations that can change them */
-	pushfq
-
-	/* added 8 bytes to save flags */
-	save_mcount_regs 8
-	/* save_mcount_regs fills in first two parameters */
-
-GLOBAL(ftrace_regs_caller_op_ptr)
-	/* Load the ftrace_ops into the 3rd parameter */
-	movq function_trace_op(%rip), %rdx
-
-	/* Save the rest of pt_regs */
-	movq %r15, R15(%rsp)
-	movq %r14, R14(%rsp)
-	movq %r13, R13(%rsp)
-	movq %r12, R12(%rsp)
-	movq %r11, R11(%rsp)
-	movq %r10, R10(%rsp)
-	movq %rbx, RBX(%rsp)
-	/* Copy saved flags */
-	movq MCOUNT_REG_SIZE(%rsp), %rcx
-	movq %rcx, EFLAGS(%rsp)
-	/* Kernel segments */
-	movq $__KERNEL_DS, %rcx
-	movq %rcx, SS(%rsp)
-	movq $__KERNEL_CS, %rcx
-	movq %rcx, CS(%rsp)
-	/* Stack - skipping return address and flags */
-	leaq MCOUNT_REG_SIZE+8*2(%rsp), %rcx
-	movq %rcx, RSP(%rsp)
-
-	ENCODE_FRAME_POINTER
-
-	/* regs go into 4th parameter */
-	leaq (%rsp), %rcx
-
-GLOBAL(ftrace_regs_call)
-	call ftrace_stub
-
-	/* Copy flags back to SS, to restore them */
-	movq EFLAGS(%rsp), %rax
-	movq %rax, MCOUNT_REG_SIZE(%rsp)
-
-	/* Handlers can change the RIP */
-	movq RIP(%rsp), %rax
-	movq %rax, MCOUNT_REG_SIZE+8(%rsp)
-
-	/* restore the rest of pt_regs */
-	movq R15(%rsp), %r15
-	movq R14(%rsp), %r14
-	movq R13(%rsp), %r13
-	movq R12(%rsp), %r12
-	movq R10(%rsp), %r10
-	movq RBX(%rsp), %rbx
-
-	restore_mcount_regs
-
-	/* Restore flags */
-	popfq
-
-	/*
-	 * As this jmp to ftrace_epilogue can be a short jump
-	 * it must not be copied into the trampoline.
-	 * The trampoline will add the code to jump
-	 * to the return.
-	 */
-GLOBAL(ftrace_regs_caller_end)
-
-	jmp ftrace_epilogue
-
-ENDPROC(ftrace_regs_caller)
-
-
-#else /* ! CONFIG_DYNAMIC_FTRACE */
-
-ENTRY(function_hook)
-	cmpq $ftrace_stub, ftrace_trace_function
-	jnz trace
-
-fgraph_trace:
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	cmpq $ftrace_stub, ftrace_graph_return
-	jnz ftrace_graph_caller
-
-	cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
-	jnz ftrace_graph_caller
-#endif
-
-GLOBAL(ftrace_stub)
-	retq
-
-trace:
-	/* save_mcount_regs fills in first two parameters */
-	save_mcount_regs
-
-	/*
-	 * When DYNAMIC_FTRACE is not defined, ARCH_SUPPORTS_FTRACE_OPS is not
-	 * set (see include/asm/ftrace.h and include/linux/ftrace.h).  Only the
-	 * ip and parent ip are used and the list function is called when
-	 * function tracing is enabled.
-	 */
-	movq ftrace_trace_function, %r8
-	CALL_NOSPEC %r8
-	restore_mcount_regs
-
-	jmp fgraph_trace
-ENDPROC(function_hook)
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
-	/* Saves rbp into %rdx and fills first parameter  */
-	save_mcount_regs
-
-	leaq MCOUNT_REG_SIZE+8(%rsp), %rsi
-	movq $0, %rdx	/* No framepointers needed */
-	call	prepare_ftrace_return
-
-	restore_mcount_regs
-
-	retq
-ENDPROC(ftrace_graph_caller)
-
-ENTRY(return_to_handler)
-	UNWIND_HINT_EMPTY
-	subq  $24, %rsp
-
-	/* Save the return values */
-	movq %rax, (%rsp)
-	movq %rdx, 8(%rsp)
-	movq %rbp, %rdi
-
-	call ftrace_return_to_handler
-
-	movq %rax, %rdi
-	movq 8(%rsp), %rdx
-	movq (%rsp), %rax
-	addq $24, %rsp
-	JMP_NOSPEC %rdi
-END(return_to_handler)
-#endif
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
deleted file mode 100644
index 11a5d5ade52cecbe703372c548cb6c807e95899f..0000000000000000000000000000000000000000
--- a/arch/x86/kernel/head_32.S
+++ /dev/null
@@ -1,634 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- *  Enhanced CPU detection and feature setting code by Mike Jagdis
- *  and Martin Mares, November 1997.
- */
-
-.text
-#include <linux/threads.h>
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/page_types.h>
-#include <asm/pgtable_types.h>
-#include <asm/cache.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/setup.h>
-#include <asm/processor-flags.h>
-#include <asm/msr-index.h>
-#include <asm/cpufeatures.h>
-#include <asm/percpu.h>
-#include <asm/nops.h>
-#include <asm/bootparam.h>
-#include <asm/export.h>
-#include <asm/pgtable_32.h>
-
-/* Physical address */
-#define pa(X) ((X) - __PAGE_OFFSET)
-
-/*
- * References to members of the new_cpu_data structure.
- */
-
-#define X86		new_cpu_data+CPUINFO_x86
-#define X86_VENDOR	new_cpu_data+CPUINFO_x86_vendor
-#define X86_MODEL	new_cpu_data+CPUINFO_x86_model
-#define X86_STEPPING	new_cpu_data+CPUINFO_x86_stepping
-#define X86_HARD_MATH	new_cpu_data+CPUINFO_hard_math
-#define X86_CPUID	new_cpu_data+CPUINFO_cpuid_level
-#define X86_CAPABILITY	new_cpu_data+CPUINFO_x86_capability
-#define X86_VENDOR_ID	new_cpu_data+CPUINFO_x86_vendor_id
-
-
-#define SIZEOF_PTREGS 17*4
-
-/*
- * Worst-case size of the kernel mapping we need to make:
- * a relocatable kernel can live anywhere in lowmem, so we need to be able
- * to map all of lowmem.
- */
-KERNEL_PAGES = LOWMEM_PAGES
-
-INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE
-RESERVE_BRK(pagetables, INIT_MAP_SIZE)
-
-/*
- * 32-bit kernel entrypoint; only used by the boot CPU.  On entry,
- * %esi points to the real-mode code as a 32-bit pointer.
- * CS and DS must be 4 GB flat segments, but we don't depend on
- * any particular GDT layout, because we load our own as soon as we
- * can.
- */
-__HEAD
-SYM_CODE_START(startup_32)
-	movl pa(initial_stack),%ecx
-	
-	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
-		us to not reload segments */
-	testb $KEEP_SEGMENTS, BP_loadflags(%esi)
-	jnz 2f
-
-/*
- * Set segments to known values.
- */
-	lgdt pa(boot_gdt_descr)
-	movl $(__BOOT_DS),%eax
-	movl %eax,%ds
-	movl %eax,%es
-	movl %eax,%fs
-	movl %eax,%gs
-	movl %eax,%ss
-2:
-	leal -__PAGE_OFFSET(%ecx),%esp
-
-/*
- * Clear BSS first so that there are no surprises...
- */
-	cld
-	xorl %eax,%eax
-	movl $pa(__bss_start),%edi
-	movl $pa(__bss_stop),%ecx
-	subl %edi,%ecx
-	shrl $2,%ecx
-	rep ; stosl
-/*
- * Copy bootup parameters out of the way.
- * Note: %esi still has the pointer to the real-mode data.
- * With the kexec as boot loader, parameter segment might be loaded beyond
- * kernel image and might not even be addressable by early boot page tables.
- * (kexec on panic case). Hence copy out the parameters before initializing
- * page tables.
- */
-	movl $pa(boot_params),%edi
-	movl $(PARAM_SIZE/4),%ecx
-	cld
-	rep
-	movsl
-	movl pa(boot_params) + NEW_CL_POINTER,%esi
-	andl %esi,%esi
-	jz 1f			# No command line
-	movl $pa(boot_command_line),%edi
-	movl $(COMMAND_LINE_SIZE/4),%ecx
-	rep
-	movsl
-1:
-
-#ifdef CONFIG_OLPC
-	/* save OFW's pgdir table for later use when calling into OFW */
-	movl %cr3, %eax
-	movl %eax, pa(olpc_ofw_pgd)
-#endif
-
-#ifdef CONFIG_MICROCODE
-	/* Early load ucode on BSP. */
-	call load_ucode_bsp
-#endif
-
-	/* Create early pagetables. */
-	call  mk_early_pgtbl_32
-
-	/* Do early initialization of the fixmap area */
-	movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
-#ifdef  CONFIG_X86_PAE
-#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
-	movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
-#else
-	movl %eax,pa(initial_page_table+0xffc)
-#endif
-
-#ifdef CONFIG_PARAVIRT
-	/* This is can only trip for a broken bootloader... */
-	cmpw $0x207, pa(boot_params + BP_version)
-	jb .Ldefault_entry
-
-	/* Paravirt-compatible boot parameters.  Look to see what architecture
-		we're booting under. */
-	movl pa(boot_params + BP_hardware_subarch), %eax
-	cmpl $num_subarch_entries, %eax
-	jae .Lbad_subarch
-
-	movl pa(subarch_entries)(,%eax,4), %eax
-	subl $__PAGE_OFFSET, %eax
-	jmp *%eax
-
-.Lbad_subarch:
-WEAK(xen_entry)
-	/* Unknown implementation; there's really
-	   nothing we can do at this point. */
-	ud2a
-
-	__INITDATA
-
-subarch_entries:
-	.long .Ldefault_entry		/* normal x86/PC */
-	.long xen_entry			/* Xen hypervisor */
-	.long .Ldefault_entry		/* Moorestown MID */
-num_subarch_entries = (. - subarch_entries) / 4
-.previous
-#else
-	jmp .Ldefault_entry
-#endif /* CONFIG_PARAVIRT */
-SYM_CODE_END(startup_32)
-
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * Boot CPU0 entry point. It's called from play_dead(). Everything has been set
- * up already except stack. We just set up stack here. Then call
- * start_secondary().
- */
-ENTRY(start_cpu0)
-	movl initial_stack, %ecx
-	movl %ecx, %esp
-	call *(initial_code)
-1:	jmp 1b
-ENDPROC(start_cpu0)
-#endif
-
-/*
- * Non-boot CPU entry point; entered from trampoline.S
- * We can't lgdt here, because lgdt itself uses a data segment, but
- * we know the trampoline has already loaded the boot_gdt for us.
- *
- * If cpu hotplug is not supported then this code can go in init section
- * which will be freed later
- */
-ENTRY(startup_32_smp)
-	cld
-	movl $(__BOOT_DS),%eax
-	movl %eax,%ds
-	movl %eax,%es
-	movl %eax,%fs
-	movl %eax,%gs
-	movl pa(initial_stack),%ecx
-	movl %eax,%ss
-	leal -__PAGE_OFFSET(%ecx),%esp
-
-#ifdef CONFIG_MICROCODE
-	/* Early load ucode on AP. */
-	call load_ucode_ap
-#endif
-
-.Ldefault_entry:
-	movl $(CR0_STATE & ~X86_CR0_PG),%eax
-	movl %eax,%cr0
-
-/*
- * We want to start out with EFLAGS unambiguously cleared. Some BIOSes leave
- * bits like NT set. This would confuse the debugger if this code is traced. So
- * initialize them properly now before switching to protected mode. That means
- * DF in particular (even though we have cleared it earlier after copying the
- * command line) because GCC expects it.
- */
-	pushl $0
-	popfl
-
-/*
- * New page tables may be in 4Mbyte page mode and may be using the global pages.
- *
- * NOTE! If we are on a 486 we may have no cr4 at all! Specifically, cr4 exists
- * if and only if CPUID exists and has flags other than the FPU flag set.
- */
-	movl $-1,pa(X86_CPUID)		# preset CPUID level
-	movl $X86_EFLAGS_ID,%ecx
-	pushl %ecx
-	popfl				# set EFLAGS=ID
-	pushfl
-	popl %eax			# get EFLAGS
-	testl $X86_EFLAGS_ID,%eax	# did EFLAGS.ID remained set?
-	jz .Lenable_paging		# hw disallowed setting of ID bit
-					# which means no CPUID and no CR4
-
-	xorl %eax,%eax
-	cpuid
-	movl %eax,pa(X86_CPUID)		# save largest std CPUID function
-
-	movl $1,%eax
-	cpuid
-	andl $~1,%edx			# Ignore CPUID.FPU
-	jz .Lenable_paging		# No flags or only CPUID.FPU = no CR4
-
-	movl pa(mmu_cr4_features),%eax
-	movl %eax,%cr4
-
-	testb $X86_CR4_PAE, %al		# check if PAE is enabled
-	jz .Lenable_paging
-
-	/* Check if extended functions are implemented */
-	movl $0x80000000, %eax
-	cpuid
-	/* Value must be in the range 0x80000001 to 0x8000ffff */
-	subl $0x80000001, %eax
-	cmpl $(0x8000ffff-0x80000001), %eax
-	ja .Lenable_paging
-
-	/* Clear bogus XD_DISABLE bits */
-	call verify_cpu
-
-	mov $0x80000001, %eax
-	cpuid
-	/* Execute Disable bit supported? */
-	btl $(X86_FEATURE_NX & 31), %edx
-	jnc .Lenable_paging
-
-	/* Setup EFER (Extended Feature Enable Register) */
-	movl $MSR_EFER, %ecx
-	rdmsr
-
-	btsl $_EFER_NX, %eax
-	/* Make changes effective */
-	wrmsr
-
-.Lenable_paging:
-
-/*
- * Enable paging
- */
-	movl $pa(initial_page_table), %eax
-	movl %eax,%cr3		/* set the page table pointer.. */
-	movl $CR0_STATE,%eax
-	movl %eax,%cr0		/* ..and set paging (PG) bit */
-	ljmp $__BOOT_CS,$1f	/* Clear prefetch and normalize %eip */
-1:
-	/* Shift the stack pointer to a virtual address */
-	addl $__PAGE_OFFSET, %esp
-
-/*
- * start system 32-bit setup. We need to re-do some of the things done
- * in 16-bit mode for the "real" operations.
- */
-	movl setup_once_ref,%eax
-	andl %eax,%eax
-	jz 1f				# Did we do this already?
-	call *%eax
-1:
-
-/*
- * Check if it is 486
- */
-	movb $4,X86			# at least 486
-	cmpl $-1,X86_CPUID
-	je .Lis486
-
-	/* get vendor info */
-	xorl %eax,%eax			# call CPUID with 0 -> return vendor ID
-	cpuid
-	movl %eax,X86_CPUID		# save CPUID level
-	movl %ebx,X86_VENDOR_ID		# lo 4 chars
-	movl %edx,X86_VENDOR_ID+4	# next 4 chars
-	movl %ecx,X86_VENDOR_ID+8	# last 4 chars
-
-	orl %eax,%eax			# do we have processor info as well?
-	je .Lis486
-
-	movl $1,%eax		# Use the CPUID instruction to get CPU type
-	cpuid
-	movb %al,%cl		# save reg for future use
-	andb $0x0f,%ah		# mask processor family
-	movb %ah,X86
-	andb $0xf0,%al		# mask model
-	shrb $4,%al
-	movb %al,X86_MODEL
-	andb $0x0f,%cl		# mask mask revision
-	movb %cl,X86_STEPPING
-	movl %edx,X86_CAPABILITY
-
-.Lis486:
-	movl $0x50022,%ecx	# set AM, WP, NE and MP
-	movl %cr0,%eax
-	andl $0x80000011,%eax	# Save PG,PE,ET
-	orl %ecx,%eax
-	movl %eax,%cr0
-
-	lgdt early_gdt_descr
-	ljmp $(__KERNEL_CS),$1f
-1:	movl $(__KERNEL_DS),%eax	# reload all the segment registers
-	movl %eax,%ss			# after changing gdt.
-
-	movl $(__USER_DS),%eax		# DS/ES contains default USER segment
-	movl %eax,%ds
-	movl %eax,%es
-
-	movl $(__KERNEL_PERCPU), %eax
-	movl %eax,%fs			# set this cpu's percpu
-
-	movl $(__KERNEL_STACK_CANARY),%eax
-	movl %eax,%gs
-
-	xorl %eax,%eax			# Clear LDT
-	lldt %ax
-
-	call *(initial_code)
-1:	jmp 1b
-ENDPROC(startup_32_smp)
-
-#include "verify_cpu.S"
-
-/*
- *  setup_once
- *
- *  The setup work we only want to run on the BSP.
- *
- *  Warning: %esi is live across this function.
- */
-__INIT
-setup_once:
-#ifdef CONFIG_STACKPROTECTOR
-	/*
-	 * Configure the stack canary. The linker can't handle this by
-	 * relocation.  Manually set base address in stack canary
-	 * segment descriptor.
-	 */
-	movl $gdt_page,%eax
-	movl $stack_canary,%ecx
-	movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
-	shrl $16, %ecx
-	movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
-	movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
-#endif
-
-	andl $0,setup_once_ref	/* Once is enough, thanks */
-	ret
-
-ENTRY(early_idt_handler_array)
-	# 36(%esp) %eflags
-	# 32(%esp) %cs
-	# 28(%esp) %eip
-	# 24(%rsp) error code
-	i = 0
-	.rept NUM_EXCEPTION_VECTORS
-	.if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
-	pushl $0		# Dummy error code, to make stack frame uniform
-	.endif
-	pushl $i		# 20(%esp) Vector number
-	jmp early_idt_handler_common
-	i = i + 1
-	.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
-	.endr
-ENDPROC(early_idt_handler_array)
-	
-early_idt_handler_common:
-	/*
-	 * The stack is the hardware frame, an error code or zero, and the
-	 * vector number.
-	 */
-	cld
-
-	incl %ss:early_recursion_flag
-
-	/* The vector number is in pt_regs->gs */
-
-	cld
-	pushl	%fs		/* pt_regs->fs (__fsh varies by model) */
-	pushl	%es		/* pt_regs->es (__esh varies by model) */
-	pushl	%ds		/* pt_regs->ds (__dsh varies by model) */
-	pushl	%eax		/* pt_regs->ax */
-	pushl	%ebp		/* pt_regs->bp */
-	pushl	%edi		/* pt_regs->di */
-	pushl	%esi		/* pt_regs->si */
-	pushl	%edx		/* pt_regs->dx */
-	pushl	%ecx		/* pt_regs->cx */
-	pushl	%ebx		/* pt_regs->bx */
-
-	/* Fix up DS and ES */
-	movl	$(__KERNEL_DS), %ecx
-	movl	%ecx, %ds
-	movl	%ecx, %es
-
-	/* Load the vector number into EDX */
-	movl	PT_GS(%esp), %edx
-
-	/* Load GS into pt_regs->gs (and maybe clobber __gsh) */
-	movw	%gs, PT_GS(%esp)
-
-	movl	%esp, %eax	/* args are pt_regs (EAX), trapnr (EDX) */
-	call	early_fixup_exception
-
-	popl	%ebx		/* pt_regs->bx */
-	popl	%ecx		/* pt_regs->cx */
-	popl	%edx		/* pt_regs->dx */
-	popl	%esi		/* pt_regs->si */
-	popl	%edi		/* pt_regs->di */
-	popl	%ebp		/* pt_regs->bp */
-	popl	%eax		/* pt_regs->ax */
-	popl	%ds		/* pt_regs->ds (always ignores __dsh) */
-	popl	%es		/* pt_regs->es (always ignores __esh) */
-	popl	%fs		/* pt_regs->fs (always ignores __fsh) */
-	popl	%gs		/* pt_regs->gs (always ignores __gsh) */
-	decl	%ss:early_recursion_flag
-	addl	$4, %esp	/* pop pt_regs->orig_ax */
-	iret
-ENDPROC(early_idt_handler_common)
-
-/* This is the default interrupt "handler" :-) */
-ENTRY(early_ignore_irq)
-	cld
-#ifdef CONFIG_PRINTK
-	pushl %eax
-	pushl %ecx
-	pushl %edx
-	pushl %es
-	pushl %ds
-	movl $(__KERNEL_DS),%eax
-	movl %eax,%ds
-	movl %eax,%es
-	cmpl $2,early_recursion_flag
-	je hlt_loop
-	incl early_recursion_flag
-	pushl 16(%esp)
-	pushl 24(%esp)
-	pushl 32(%esp)
-	pushl 40(%esp)
-	pushl $int_msg
-	call printk
-
-	call dump_stack
-
-	addl $(5*4),%esp
-	popl %ds
-	popl %es
-	popl %edx
-	popl %ecx
-	popl %eax
-#endif
-	iret
-
-hlt_loop:
-	hlt
-	jmp hlt_loop
-ENDPROC(early_ignore_irq)
-
-__INITDATA
-	.align 4
-GLOBAL(early_recursion_flag)
-	.long 0
-
-__REFDATA
-	.align 4
-ENTRY(initial_code)
-	.long i386_start_kernel
-ENTRY(setup_once_ref)
-	.long setup_once
-
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-#define	PGD_ALIGN	(2 * PAGE_SIZE)
-#define PTI_USER_PGD_FILL	1024
-#else
-#define	PGD_ALIGN	(PAGE_SIZE)
-#define PTI_USER_PGD_FILL	0
-#endif
-/*
- * BSS section
- */
-__PAGE_ALIGNED_BSS
-	.align PGD_ALIGN
-#ifdef CONFIG_X86_PAE
-.globl initial_pg_pmd
-initial_pg_pmd:
-	.fill 1024*KPMDS,4,0
-#else
-.globl initial_page_table
-initial_page_table:
-	.fill 1024,4,0
-#endif
-	.align PGD_ALIGN
-initial_pg_fixmap:
-	.fill 1024,4,0
-.globl swapper_pg_dir
-	.align PGD_ALIGN
-swapper_pg_dir:
-	.fill 1024,4,0
-	.fill PTI_USER_PGD_FILL,4,0
-.globl empty_zero_page
-empty_zero_page:
-	.fill 4096,1,0
-EXPORT_SYMBOL(empty_zero_page)
-
-/*
- * This starts the data section.
- */
-#ifdef CONFIG_X86_PAE
-__PAGE_ALIGNED_DATA
-	/* Page-aligned for the benefit of paravirt? */
-	.align PGD_ALIGN
-ENTRY(initial_page_table)
-	.long	pa(initial_pg_pmd+PGD_IDENT_ATTR),0	/* low identity map */
-# if KPMDS == 3
-	.long	pa(initial_pg_pmd+PGD_IDENT_ATTR),0
-	.long	pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0
-	.long	pa(initial_pg_pmd+PGD_IDENT_ATTR+0x2000),0
-# elif KPMDS == 2
-	.long	0,0
-	.long	pa(initial_pg_pmd+PGD_IDENT_ATTR),0
-	.long	pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0
-# elif KPMDS == 1
-	.long	0,0
-	.long	0,0
-	.long	pa(initial_pg_pmd+PGD_IDENT_ATTR),0
-# else
-#  error "Kernel PMDs should be 1, 2 or 3"
-# endif
-	.align PAGE_SIZE		/* needs to be page-sized too */
-
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-	/*
-	 * PTI needs another page so sync_initial_pagetable() works correctly
-	 * and does not scribble over the data which is placed behind the
-	 * actual initial_page_table. See clone_pgd_range().
-	 */
-	.fill 1024, 4, 0
-#endif
-
-#endif
-
-.data
-.balign 4
-ENTRY(initial_stack)
-	/*
-	 * The SIZEOF_PTREGS gap is a convention which helps the in-kernel
-	 * unwinder reliably detect the end of the stack.
-	 */
-	.long init_thread_union + THREAD_SIZE - SIZEOF_PTREGS - \
-	      TOP_OF_KERNEL_STACK_PADDING;
-
-__INITRODATA
-int_msg:
-	.asciz "Unknown interrupt or fault at: %p %p %p\n"
-
-#include "../../x86/xen/xen-head.S"
-
-/*
- * The IDT and GDT 'descriptors' are a strange 48-bit object
- * only used by the lidt and lgdt instructions. They are not
- * like usual segment descriptors - they consist of a 16-bit
- * segment size, and 32-bit linear address value:
- */
-
-	.data
-.globl boot_gdt_descr
-
-	ALIGN
-# early boot GDT descriptor (must use 1:1 address mapping)
-	.word 0				# 32 bit align gdt_desc.address
-boot_gdt_descr:
-	.word __BOOT_DS+7
-	.long boot_gdt - __PAGE_OFFSET
-
-# boot GDT descriptor (later on used by CPU#0):
-	.word 0				# 32 bit align gdt_desc.address
-ENTRY(early_gdt_descr)
-	.word GDT_ENTRIES*8-1
-	.long gdt_page			/* Overwritten for secondary CPUs */
-
-/*
- * The boot_gdt must mirror the equivalent in setup.S and is
- * used only for booting.
- */
-	.align L1_CACHE_BYTES
-ENTRY(boot_gdt)
-	.fill GDT_ENTRY_BOOT_CS,8,0
-	.quad 0x00cf9a000000ffff	/* kernel 4GB code at 0x00000000 */
-	.quad 0x00cf92000000ffff	/* kernel 4GB data at 0x00000000 */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
deleted file mode 100644
index f3d3e9646a99bafdc69257ecd1d2bcc5fae60067..0000000000000000000000000000000000000000
--- a/arch/x86/kernel/head_64.S
+++ /dev/null
@@ -1,483 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  linux/arch/x86/kernel/head_64.S -- start in 32bit and switch to 64bit
- *
- *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
- *  Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
- *  Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
- *  Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
- *  Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com>
- */
-
-
-#include <linux/linkage.h>
-#include <linux/threads.h>
-#include <linux/init.h>
-#include <asm/segment.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/msr.h>
-#include <asm/cache.h>
-#include <asm/processor-flags.h>
-#include <asm/percpu.h>
-#include <asm/nops.h>
-#include "../entry/calling.h"
-#include <asm/export.h>
-#include <asm/nospec-branch.h>
-#include <asm/fixmap.h>
-
-#ifdef CONFIG_PARAVIRT_XXL
-#include <asm/asm-offsets.h>
-#include <asm/paravirt.h>
-#else
-#define INTERRUPT_RETURN iretq
-#endif
-
-/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE
- * because we need identity-mapped pages.
- *
- */
-
-#define l4_index(x)	(((x) >> 39) & 511)
-#define pud_index(x)	(((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-
-L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
-L4_START_KERNEL = l4_index(__START_KERNEL_map)
-
-L3_START_KERNEL = pud_index(__START_KERNEL_map)
-
-	.text
-	__HEAD
-	.code64
-	.globl startup_64
-startup_64:
-	UNWIND_HINT_EMPTY
-	/*
-	 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
-	 * and someone has loaded an identity mapped page table
-	 * for us.  These identity mapped page tables map all of the
-	 * kernel pages and possibly all of memory.
-	 *
-	 * %rsi holds a physical pointer to real_mode_data.
-	 *
-	 * We come here either directly from a 64bit bootloader, or from
-	 * arch/x86/boot/compressed/head_64.S.
-	 *
-	 * We only come here initially at boot nothing else comes here.
-	 *
-	 * Since we may be loaded at an address different from what we were
-	 * compiled to run at we first fixup the physical addresses in our page
-	 * tables and then reload them.
-	 */
-
-	/* Set up the stack for verify_cpu(), similar to initial_stack below */
-	leaq	(__end_init_task - SIZEOF_PTREGS)(%rip), %rsp
-
-	/* Sanitize CPU configuration */
-	call verify_cpu
-
-	/*
-	 * Perform pagetable fixups. Additionally, if SME is active, encrypt
-	 * the kernel and retrieve the modifier (SME encryption mask if SME
-	 * is active) to be added to the initial pgdir entry that will be
-	 * programmed into CR3.
-	 */
-	leaq	_text(%rip), %rdi
-	pushq	%rsi
-	call	__startup_64
-	popq	%rsi
-
-	/* Form the CR3 value being sure to include the CR3 modifier */
-	addq	$(early_top_pgt - __START_KERNEL_map), %rax
-	jmp 1f
-ENTRY(secondary_startup_64)
-	UNWIND_HINT_EMPTY
-	/*
-	 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
-	 * and someone has loaded a mapped page table.
-	 *
-	 * %rsi holds a physical pointer to real_mode_data.
-	 *
-	 * We come here either from startup_64 (using physical addresses)
-	 * or from trampoline.S (using virtual addresses).
-	 *
-	 * Using virtual addresses from trampoline.S removes the need
-	 * to have any identity mapped pages in the kernel page table
-	 * after the boot processor executes this code.
-	 */
-
-	/* Sanitize CPU configuration */
-	call verify_cpu
-
-	/*
-	 * Retrieve the modifier (SME encryption mask if SME is active) to be
-	 * added to the initial pgdir entry that will be programmed into CR3.
-	 */
-	pushq	%rsi
-	call	__startup_secondary_64
-	popq	%rsi
-
-	/* Form the CR3 value being sure to include the CR3 modifier */
-	addq	$(init_top_pgt - __START_KERNEL_map), %rax
-1:
-
-	/* Enable PAE mode, PGE and LA57 */
-	movl	$(X86_CR4_PAE | X86_CR4_PGE), %ecx
-#ifdef CONFIG_X86_5LEVEL
-	testl	$1, __pgtable_l5_enabled(%rip)
-	jz	1f
-	orl	$X86_CR4_LA57, %ecx
-1:
-#endif
-	movq	%rcx, %cr4
-
-	/* Setup early boot stage 4-/5-level pagetables. */
-	addq	phys_base(%rip), %rax
-	movq	%rax, %cr3
-
-	/* Ensure I am executing from virtual addresses */
-	movq	$1f, %rax
-	ANNOTATE_RETPOLINE_SAFE
-	jmp	*%rax
-1:
-	UNWIND_HINT_EMPTY
-
-	/* Check if nx is implemented */
-	movl	$0x80000001, %eax
-	cpuid
-	movl	%edx,%edi
-
-	/* Setup EFER (Extended Feature Enable Register) */
-	movl	$MSR_EFER, %ecx
-	rdmsr
-	btsl	$_EFER_SCE, %eax	/* Enable System Call */
-	btl	$20,%edi		/* No Execute supported? */
-	jnc     1f
-	btsl	$_EFER_NX, %eax
-	btsq	$_PAGE_BIT_NX,early_pmd_flags(%rip)
-1:	wrmsr				/* Make changes effective */
-
-	/* Setup cr0 */
-	movl	$CR0_STATE, %eax
-	/* Make changes effective */
-	movq	%rax, %cr0
-
-	/* Setup a boot time stack */
-	movq initial_stack(%rip), %rsp
-
-	/* zero EFLAGS after setting rsp */
-	pushq $0
-	popfq
-
-	/*
-	 * We must switch to a new descriptor in kernel space for the GDT
-	 * because soon the kernel won't have access anymore to the userspace
-	 * addresses where we're currently running on. We have to do that here
-	 * because in 32bit we couldn't load a 64bit linear address.
-	 */
-	lgdt	early_gdt_descr(%rip)
-
-	/* set up data segments */
-	xorl %eax,%eax
-	movl %eax,%ds
-	movl %eax,%ss
-	movl %eax,%es
-
-	/*
-	 * We don't really need to load %fs or %gs, but load them anyway
-	 * to kill any stale realmode selectors.  This allows execution
-	 * under VT hardware.
-	 */
-	movl %eax,%fs
-	movl %eax,%gs
-
-	/* Set up %gs.
-	 *
-	 * The base of %gs always points to fixed_percpu_data. If the
-	 * stack protector canary is enabled, it is located at %gs:40.
-	 * Note that, on SMP, the boot cpu uses init data section until
-	 * the per cpu areas are set up.
-	 */
-	movl	$MSR_GS_BASE,%ecx
-	movl	initial_gs(%rip),%eax
-	movl	initial_gs+4(%rip),%edx
-	wrmsr
-
-	/* rsi is pointer to real mode structure with interesting info.
-	   pass it to C */
-	movq	%rsi, %rdi
-
-.Ljump_to_C_code:
-	/*
-	 * Jump to run C code and to be on a real kernel address.
-	 * Since we are running on identity-mapped space we have to jump
-	 * to the full 64bit address, this is only possible as indirect
-	 * jump.  In addition we need to ensure %cs is set so we make this
-	 * a far return.
-	 *
-	 * Note: do not change to far jump indirect with 64bit offset.
-	 *
-	 * AMD does not support far jump indirect with 64bit offset.
-	 * AMD64 Architecture Programmer's Manual, Volume 3: states only
-	 *	JMP FAR mem16:16 FF /5 Far jump indirect,
-	 *		with the target specified by a far pointer in memory.
-	 *	JMP FAR mem16:32 FF /5 Far jump indirect,
-	 *		with the target specified by a far pointer in memory.
-	 *
-	 * Intel64 does support 64bit offset.
-	 * Software Developer Manual Vol 2: states:
-	 *	FF /5 JMP m16:16 Jump far, absolute indirect,
-	 *		address given in m16:16
-	 *	FF /5 JMP m16:32 Jump far, absolute indirect,
-	 *		address given in m16:32.
-	 *	REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
-	 *		address given in m16:64.
-	 */
-	pushq	$.Lafter_lret	# put return address on stack for unwinder
-	xorl	%ebp, %ebp	# clear frame pointer
-	movq	initial_code(%rip), %rax
-	pushq	$__KERNEL_CS	# set correct cs
-	pushq	%rax		# target address in negative space
-	lretq
-.Lafter_lret:
-END(secondary_startup_64)
-
-#include "verify_cpu.S"
-
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * Boot CPU0 entry point. It's called from play_dead(). Everything has been set
- * up already except stack. We just set up stack here. Then call
- * start_secondary() via .Ljump_to_C_code.
- */
-ENTRY(start_cpu0)
-	UNWIND_HINT_EMPTY
-	movq	initial_stack(%rip), %rsp
-	jmp	.Ljump_to_C_code
-END(start_cpu0)
-#endif
-
-	/* Both SMP bootup and ACPI suspend change these variables */
-	__REFDATA
-	.balign	8
-	GLOBAL(initial_code)
-	.quad	x86_64_start_kernel
-	GLOBAL(initial_gs)
-	.quad	INIT_PER_CPU_VAR(fixed_percpu_data)
-	GLOBAL(initial_stack)
-	/*
-	 * The SIZEOF_PTREGS gap is a convention which helps the in-kernel
-	 * unwinder reliably detect the end of the stack.
-	 */
-	.quad  init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
-	__FINITDATA
-
-	__INIT
-ENTRY(early_idt_handler_array)
-	i = 0
-	.rept NUM_EXCEPTION_VECTORS
-	.if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
-		UNWIND_HINT_IRET_REGS
-		pushq $0	# Dummy error code, to make stack frame uniform
-	.else
-		UNWIND_HINT_IRET_REGS offset=8
-	.endif
-	pushq $i		# 72(%rsp) Vector number
-	jmp early_idt_handler_common
-	UNWIND_HINT_IRET_REGS
-	i = i + 1
-	.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
-	.endr
-	UNWIND_HINT_IRET_REGS offset=16
-END(early_idt_handler_array)
-
-early_idt_handler_common:
-	/*
-	 * The stack is the hardware frame, an error code or zero, and the
-	 * vector number.
-	 */
-	cld
-
-	incl early_recursion_flag(%rip)
-
-	/* The vector number is currently in the pt_regs->di slot. */
-	pushq %rsi				/* pt_regs->si */
-	movq 8(%rsp), %rsi			/* RSI = vector number */
-	movq %rdi, 8(%rsp)			/* pt_regs->di = RDI */
-	pushq %rdx				/* pt_regs->dx */
-	pushq %rcx				/* pt_regs->cx */
-	pushq %rax				/* pt_regs->ax */
-	pushq %r8				/* pt_regs->r8 */
-	pushq %r9				/* pt_regs->r9 */
-	pushq %r10				/* pt_regs->r10 */
-	pushq %r11				/* pt_regs->r11 */
-	pushq %rbx				/* pt_regs->bx */
-	pushq %rbp				/* pt_regs->bp */
-	pushq %r12				/* pt_regs->r12 */
-	pushq %r13				/* pt_regs->r13 */
-	pushq %r14				/* pt_regs->r14 */
-	pushq %r15				/* pt_regs->r15 */
-	UNWIND_HINT_REGS
-
-	cmpq $14,%rsi		/* Page fault? */
-	jnz 10f
-	GET_CR2_INTO(%rdi)	/* can clobber %rax if pv */
-	call early_make_pgtable
-	andl %eax,%eax
-	jz 20f			/* All good */
-
-10:
-	movq %rsp,%rdi		/* RDI = pt_regs; RSI is already trapnr */
-	call early_fixup_exception
-
-20:
-	decl early_recursion_flag(%rip)
-	jmp restore_regs_and_return_to_kernel
-END(early_idt_handler_common)
-
-	__INITDATA
-
-	.balign 4
-GLOBAL(early_recursion_flag)
-	.long 0
-
-#define NEXT_PAGE(name) \
-	.balign	PAGE_SIZE; \
-GLOBAL(name)
-
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-/*
- * Each PGD needs to be 8k long and 8k aligned.  We do not
- * ever go out to userspace with these, so we do not
- * strictly *need* the second page, but this allows us to
- * have a single set_pgd() implementation that does not
- * need to worry about whether it has 4k or 8k to work
- * with.
- *
- * This ensures PGDs are 8k long:
- */
-#define PTI_USER_PGD_FILL	512
-/* This ensures they are 8k-aligned: */
-#define NEXT_PGD_PAGE(name) \
-	.balign 2 * PAGE_SIZE; \
-GLOBAL(name)
-#else
-#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
-#define PTI_USER_PGD_FILL	0
-#endif
-
-/* Automate the creation of 1 to 1 mapping pmd entries */
-#define PMDS(START, PERM, COUNT)			\
-	i = 0 ;						\
-	.rept (COUNT) ;					\
-	.quad	(START) + (i << PMD_SHIFT) + (PERM) ;	\
-	i = i + 1 ;					\
-	.endr
-
-	__INITDATA
-NEXT_PGD_PAGE(early_top_pgt)
-	.fill	512,8,0
-	.fill	PTI_USER_PGD_FILL,8,0
-
-NEXT_PAGE(early_dynamic_pgts)
-	.fill	512*EARLY_DYNAMIC_PAGE_TABLES,8,0
-
-	.data
-
-#if defined(CONFIG_XEN_PV) || defined(CONFIG_PVH)
-NEXT_PGD_PAGE(init_top_pgt)
-	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
-	.org    init_top_pgt + L4_PAGE_OFFSET*8, 0
-	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
-	.org    init_top_pgt + L4_START_KERNEL*8, 0
-	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
-	.quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
-	.fill	PTI_USER_PGD_FILL,8,0
-
-NEXT_PAGE(level3_ident_pgt)
-	.quad	level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
-	.fill	511, 8, 0
-NEXT_PAGE(level2_ident_pgt)
-	/*
-	 * Since I easily can, map the first 1G.
-	 * Don't set NX because code runs from these pages.
-	 *
-	 * Note: This sets _PAGE_GLOBAL despite whether
-	 * the CPU supports it or it is enabled.  But,
-	 * the CPU should ignore the bit.
-	 */
-	PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
-#else
-NEXT_PGD_PAGE(init_top_pgt)
-	.fill	512,8,0
-	.fill	PTI_USER_PGD_FILL,8,0
-#endif
-
-#ifdef CONFIG_X86_5LEVEL
-NEXT_PAGE(level4_kernel_pgt)
-	.fill	511,8,0
-	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
-#endif
-
-NEXT_PAGE(level3_kernel_pgt)
-	.fill	L3_START_KERNEL,8,0
-	/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
-	.quad	level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
-	.quad	level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
-
-NEXT_PAGE(level2_kernel_pgt)
-	/*
-	 * 512 MB kernel mapping. We spend a full page on this pagetable
-	 * anyway.
-	 *
-	 * The kernel code+data+bss must not be bigger than that.
-	 *
-	 * (NOTE: at +512MB starts the module area, see MODULES_VADDR.
-	 *  If you want to increase this then increase MODULES_VADDR
-	 *  too.)
-	 *
-	 *  This table is eventually used by the kernel during normal
-	 *  runtime.  Care must be taken to clear out undesired bits
-	 *  later, like _PAGE_RW or _PAGE_GLOBAL in some cases.
-	 */
-	PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
-		KERNEL_IMAGE_SIZE/PMD_SIZE)
-
-NEXT_PAGE(level2_fixmap_pgt)
-	.fill	(512 - 4 - FIXMAP_PMD_NUM),8,0
-	pgtno = 0
-	.rept (FIXMAP_PMD_NUM)
-	.quad level1_fixmap_pgt + (pgtno << PAGE_SHIFT) - __START_KERNEL_map \
-		+ _PAGE_TABLE_NOENC;
-	pgtno = pgtno + 1
-	.endr
-	/* 6 MB reserved space + a 2MB hole */
-	.fill	4,8,0
-
-NEXT_PAGE(level1_fixmap_pgt)
-	.rept (FIXMAP_PMD_NUM)
-	.fill	512,8,0
-	.endr
-
-#undef PMDS
-
-	.data
-	.align 16
-	.globl early_gdt_descr
-early_gdt_descr:
-	.word	GDT_ENTRIES*8-1
-early_gdt_descr_base:
-	.quad	INIT_PER_CPU_VAR(gdt_page)
-
-ENTRY(phys_base)
-	/* This must match the first entry in level2_kernel_pgt */
-	.quad   0x0000000000000000
-EXPORT_SYMBOL(phys_base)
-
-#include "../../x86/xen/xen-head.S"
-
-	__PAGE_ALIGNED_BSS
-NEXT_PAGE(empty_zero_page)
-	.skip PAGE_SIZE
-EXPORT_SYMBOL(empty_zero_page)
-
diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S
deleted file mode 100644
index ddeeaac8addadcb0556975d31a3262e5473a0141..0000000000000000000000000000000000000000
--- a/arch/x86/kernel/irqflags.S
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#include <asm/asm.h>
-#include <asm/export.h>
-#include <linux/linkage.h>
-
-/*
- * unsigned long native_save_fl(void)
- */
-ENTRY(native_save_fl)
-	pushf
-	pop %_ASM_AX
-	ret
-ENDPROC(native_save_fl)
-EXPORT_SYMBOL(native_save_fl)
-
-/*
- * void native_restore_fl(unsigned long flags)
- * %eax/%rdi: flags
- */
-ENTRY(native_restore_fl)
-	push %_ASM_ARG1
-	popf
-	ret
-ENDPROC(native_restore_fl)
-EXPORT_SYMBOL(native_restore_fl)
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
deleted file mode 100644
index ee26df08002e66a20e918d3c39fd44b82486bad0..0000000000000000000000000000000000000000
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ /dev/null
@@ -1,275 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * relocate_kernel.S - put the kernel image in place to boot
- * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/page_types.h>
-#include <asm/kexec.h>
-#include <asm/processor-flags.h>
-
-/*
- * Must be relocatable PIC code callable as a C function
- */
-
-#define PTR(x) (x << 2)
-
-/*
- * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
- * ~ control_page + PAGE_SIZE are used as data storage and stack for
- * jumping back
- */
-#define DATA(offset)		(KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
-
-/* Minimal CPU state */
-#define ESP			DATA(0x0)
-#define CR0			DATA(0x4)
-#define CR3			DATA(0x8)
-#define CR4			DATA(0xc)
-
-/* other data */
-#define CP_VA_CONTROL_PAGE	DATA(0x10)
-#define CP_PA_PGD		DATA(0x14)
-#define CP_PA_SWAP_PAGE		DATA(0x18)
-#define CP_PA_BACKUP_PAGES_MAP	DATA(0x1c)
-
-	.text
-	.globl relocate_kernel
-relocate_kernel:
-	/* Save the CPU context, used for jumping back */
-
-	pushl	%ebx
-	pushl	%esi
-	pushl	%edi
-	pushl	%ebp
-	pushf
-
-	movl	20+8(%esp), %ebp /* list of pages */
-	movl	PTR(VA_CONTROL_PAGE)(%ebp), %edi
-	movl	%esp, ESP(%edi)
-	movl	%cr0, %eax
-	movl	%eax, CR0(%edi)
-	movl	%cr3, %eax
-	movl	%eax, CR3(%edi)
-	movl	%cr4, %eax
-	movl	%eax, CR4(%edi)
-
-	/* read the arguments and say goodbye to the stack */
-	movl  20+4(%esp), %ebx /* page_list */
-	movl  20+8(%esp), %ebp /* list of pages */
-	movl  20+12(%esp), %edx /* start address */
-	movl  20+16(%esp), %ecx /* cpu_has_pae */
-	movl  20+20(%esp), %esi /* preserve_context */
-
-	/* zero out flags, and disable interrupts */
-	pushl $0
-	popfl
-
-	/* save some information for jumping back */
-	movl	PTR(VA_CONTROL_PAGE)(%ebp), %edi
-	movl	%edi, CP_VA_CONTROL_PAGE(%edi)
-	movl	PTR(PA_PGD)(%ebp), %eax
-	movl	%eax, CP_PA_PGD(%edi)
-	movl	PTR(PA_SWAP_PAGE)(%ebp), %eax
-	movl	%eax, CP_PA_SWAP_PAGE(%edi)
-	movl	%ebx, CP_PA_BACKUP_PAGES_MAP(%edi)
-
-	/*
-	 * get physical address of control page now
-	 * this is impossible after page table switch
-	 */
-	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edi
-
-	/* switch to new set of page tables */
-	movl	PTR(PA_PGD)(%ebp), %eax
-	movl	%eax, %cr3
-
-	/* setup a new stack at the end of the physical control page */
-	lea	PAGE_SIZE(%edi), %esp
-
-	/* jump to identity mapped page */
-	movl    %edi, %eax
-	addl    $(identity_mapped - relocate_kernel), %eax
-	pushl   %eax
-	ret
-
-identity_mapped:
-	/* set return address to 0 if not preserving context */
-	pushl	$0
-	/* store the start address on the stack */
-	pushl   %edx
-
-	/*
-	 * Set cr0 to a known state:
-	 *  - Paging disabled
-	 *  - Alignment check disabled
-	 *  - Write protect disabled
-	 *  - No task switch
-	 *  - Don't do FP software emulation.
-	 *  - Proctected mode enabled
-	 */
-	movl	%cr0, %eax
-	andl	$~(X86_CR0_PG | X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %eax
-	orl	$(X86_CR0_PE), %eax
-	movl	%eax, %cr0
-
-	/* clear cr4 if applicable */
-	testl	%ecx, %ecx
-	jz	1f
-	/*
-	 * Set cr4 to a known state:
-	 * Setting everything to zero seems safe.
-	 */
-	xorl	%eax, %eax
-	movl	%eax, %cr4
-
-	jmp 1f
-1:
-
-	/* Flush the TLB (needed?) */
-	xorl	%eax, %eax
-	movl	%eax, %cr3
-
-	movl	CP_PA_SWAP_PAGE(%edi), %eax
-	pushl	%eax
-	pushl	%ebx
-	call	swap_pages
-	addl	$8, %esp
-
-	/*
-	 * To be certain of avoiding problems with self-modifying code
-	 * I need to execute a serializing instruction here.
-	 * So I flush the TLB, it's handy, and not processor dependent.
-	 */
-	xorl	%eax, %eax
-	movl	%eax, %cr3
-
-	/*
-	 * set all of the registers to known values
-	 * leave %esp alone
-	 */
-
-	testl	%esi, %esi
-	jnz 1f
-	xorl	%edi, %edi
-	xorl	%eax, %eax
-	xorl	%ebx, %ebx
-	xorl    %ecx, %ecx
-	xorl    %edx, %edx
-	xorl    %esi, %esi
-	xorl    %ebp, %ebp
-	ret
-1:
-	popl	%edx
-	movl	CP_PA_SWAP_PAGE(%edi), %esp
-	addl	$PAGE_SIZE, %esp
-2:
-	call	*%edx
-
-	/* get the re-entry point of the peer system */
-	movl	0(%esp), %ebp
-	call	1f
-1:
-	popl	%ebx
-	subl	$(1b - relocate_kernel), %ebx
-	movl	CP_VA_CONTROL_PAGE(%ebx), %edi
-	lea	PAGE_SIZE(%ebx), %esp
-	movl	CP_PA_SWAP_PAGE(%ebx), %eax
-	movl	CP_PA_BACKUP_PAGES_MAP(%ebx), %edx
-	pushl	%eax
-	pushl	%edx
-	call	swap_pages
-	addl	$8, %esp
-	movl	CP_PA_PGD(%ebx), %eax
-	movl	%eax, %cr3
-	movl	%cr0, %eax
-	orl	$X86_CR0_PG, %eax
-	movl	%eax, %cr0
-	lea	PAGE_SIZE(%edi), %esp
-	movl	%edi, %eax
-	addl	$(virtual_mapped - relocate_kernel), %eax
-	pushl	%eax
-	ret
-
-virtual_mapped:
-	movl	CR4(%edi), %eax
-	movl	%eax, %cr4
-	movl	CR3(%edi), %eax
-	movl	%eax, %cr3
-	movl	CR0(%edi), %eax
-	movl	%eax, %cr0
-	movl	ESP(%edi), %esp
-	movl	%ebp, %eax
-
-	popf
-	popl	%ebp
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	ret
-
-	/* Do the copies */
-swap_pages:
-	movl	8(%esp), %edx
-	movl	4(%esp), %ecx
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	movl	%ecx, %ebx
-	jmp	1f
-
-0:	/* top, read another word from the indirection page */
-	movl	(%ebx), %ecx
-	addl	$4, %ebx
-1:
-	testb	$0x1, %cl     /* is it a destination page */
-	jz	2f
-	movl	%ecx,	%edi
-	andl	$0xfffff000, %edi
-	jmp     0b
-2:
-	testb	$0x2, %cl    /* is it an indirection page */
-	jz	2f
-	movl	%ecx,	%ebx
-	andl	$0xfffff000, %ebx
-	jmp     0b
-2:
-	testb   $0x4, %cl    /* is it the done indicator */
-	jz      2f
-	jmp     3f
-2:
-	testb   $0x8, %cl    /* is it the source indicator */
-	jz      0b	     /* Ignore it otherwise */
-	movl    %ecx,   %esi /* For every source page do a copy */
-	andl    $0xfffff000, %esi
-
-	movl	%edi, %eax
-	movl	%esi, %ebp
-
-	movl	%edx, %edi
-	movl    $1024, %ecx
-	rep ; movsl
-
-	movl	%ebp, %edi
-	movl	%eax, %esi
-	movl	$1024, %ecx
-	rep ; movsl
-
-	movl	%eax, %edi
-	movl	%edx, %esi
-	movl	$1024, %ecx
-	rep ; movsl
-
-	lea	PAGE_SIZE(%ebp), %esi
-	jmp     0b
-3:
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	ret
-
-	.globl kexec_control_code_size
-.set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
deleted file mode 100644
index c51ccff5cd01ffc1c0bee1629faaceb82b69fdc6..0000000000000000000000000000000000000000
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ /dev/null
@@ -1,288 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * relocate_kernel.S - put the kernel image in place to boot
- * Copyright (C) 2002-2005 Eric Biederman  <ebiederm@xmission.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/page_types.h>
-#include <asm/kexec.h>
-#include <asm/processor-flags.h>
-#include <asm/pgtable_types.h>
-
-/*
- * Must be relocatable PIC code callable as a C function
- */
-
-#define PTR(x) (x << 3)
-#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-
-/*
- * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
- * ~ control_page + PAGE_SIZE are used as data storage and stack for
- * jumping back
- */
-#define DATA(offset)		(KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
-
-/* Minimal CPU state */
-#define RSP			DATA(0x0)
-#define CR0			DATA(0x8)
-#define CR3			DATA(0x10)
-#define CR4			DATA(0x18)
-
-/* other data */
-#define CP_PA_TABLE_PAGE	DATA(0x20)
-#define CP_PA_SWAP_PAGE		DATA(0x28)
-#define CP_PA_BACKUP_PAGES_MAP	DATA(0x30)
-
-	.text
-	.align PAGE_SIZE
-	.code64
-	.globl relocate_kernel
-relocate_kernel:
-	/*
-	 * %rdi indirection_page
-	 * %rsi page_list
-	 * %rdx start address
-	 * %rcx preserve_context
-	 * %r8  sme_active
-	 */
-
-	/* Save the CPU context, used for jumping back */
-	pushq %rbx
-	pushq %rbp
-	pushq %r12
-	pushq %r13
-	pushq %r14
-	pushq %r15
-	pushf
-
-	movq	PTR(VA_CONTROL_PAGE)(%rsi), %r11
-	movq	%rsp, RSP(%r11)
-	movq	%cr0, %rax
-	movq	%rax, CR0(%r11)
-	movq	%cr3, %rax
-	movq	%rax, CR3(%r11)
-	movq	%cr4, %rax
-	movq	%rax, CR4(%r11)
-
-	/* Save CR4. Required to enable the right paging mode later. */
-	movq	%rax, %r13
-
-	/* zero out flags, and disable interrupts */
-	pushq $0
-	popfq
-
-	/* Save SME active flag */
-	movq	%r8, %r12
-
-	/*
-	 * get physical address of control page now
-	 * this is impossible after page table switch
-	 */
-	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8
-
-	/* get physical address of page table now too */
-	movq	PTR(PA_TABLE_PAGE)(%rsi), %r9
-
-	/* get physical address of swap page now */
-	movq	PTR(PA_SWAP_PAGE)(%rsi), %r10
-
-	/* save some information for jumping back */
-	movq	%r9, CP_PA_TABLE_PAGE(%r11)
-	movq	%r10, CP_PA_SWAP_PAGE(%r11)
-	movq	%rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
-
-	/* Switch to the identity mapped page tables */
-	movq	%r9, %cr3
-
-	/* setup a new stack at the end of the physical control page */
-	lea	PAGE_SIZE(%r8), %rsp
-
-	/* jump to identity mapped page */
-	addq	$(identity_mapped - relocate_kernel), %r8
-	pushq	%r8
-	ret
-
-identity_mapped:
-	/* set return address to 0 if not preserving context */
-	pushq	$0
-	/* store the start address on the stack */
-	pushq   %rdx
-
-	/*
-	 * Set cr0 to a known state:
-	 *  - Paging enabled
-	 *  - Alignment check disabled
-	 *  - Write protect disabled
-	 *  - No task switch
-	 *  - Don't do FP software emulation.
-	 *  - Proctected mode enabled
-	 */
-	movq	%cr0, %rax
-	andq	$~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax
-	orl	$(X86_CR0_PG | X86_CR0_PE), %eax
-	movq	%rax, %cr0
-
-	/*
-	 * Set cr4 to a known state:
-	 *  - physical address extension enabled
-	 *  - 5-level paging, if it was enabled before
-	 */
-	movl	$X86_CR4_PAE, %eax
-	testq	$X86_CR4_LA57, %r13
-	jz	1f
-	orl	$X86_CR4_LA57, %eax
-1:
-	movq	%rax, %cr4
-
-	jmp 1f
-1:
-
-	/* Flush the TLB (needed?) */
-	movq	%r9, %cr3
-
-	/*
-	 * If SME is active, there could be old encrypted cache line
-	 * entries that will conflict with the now unencrypted memory
-	 * used by kexec. Flush the caches before copying the kernel.
-	 */
-	testq	%r12, %r12
-	jz 1f
-	wbinvd
-1:
-
-	movq	%rcx, %r11
-	call	swap_pages
-
-	/*
-	 * To be certain of avoiding problems with self-modifying code
-	 * I need to execute a serializing instruction here.
-	 * So I flush the TLB by reloading %cr3 here, it's handy,
-	 * and not processor dependent.
-	 */
-	movq	%cr3, %rax
-	movq	%rax, %cr3
-
-	/*
-	 * set all of the registers to known values
-	 * leave %rsp alone
-	 */
-
-	testq	%r11, %r11
-	jnz 1f
-	xorl	%eax, %eax
-	xorl	%ebx, %ebx
-	xorl    %ecx, %ecx
-	xorl    %edx, %edx
-	xorl    %esi, %esi
-	xorl    %edi, %edi
-	xorl    %ebp, %ebp
-	xorl	%r8d, %r8d
-	xorl	%r9d, %r9d
-	xorl	%r10d, %r10d
-	xorl	%r11d, %r11d
-	xorl	%r12d, %r12d
-	xorl	%r13d, %r13d
-	xorl	%r14d, %r14d
-	xorl	%r15d, %r15d
-
-	ret
-
-1:
-	popq	%rdx
-	leaq	PAGE_SIZE(%r10), %rsp
-	call	*%rdx
-
-	/* get the re-entry point of the peer system */
-	movq	0(%rsp), %rbp
-	call	1f
-1:
-	popq	%r8
-	subq	$(1b - relocate_kernel), %r8
-	movq	CP_PA_SWAP_PAGE(%r8), %r10
-	movq	CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
-	movq	CP_PA_TABLE_PAGE(%r8), %rax
-	movq	%rax, %cr3
-	lea	PAGE_SIZE(%r8), %rsp
-	call	swap_pages
-	movq	$virtual_mapped, %rax
-	pushq	%rax
-	ret
-
-virtual_mapped:
-	movq	RSP(%r8), %rsp
-	movq	CR4(%r8), %rax
-	movq	%rax, %cr4
-	movq	CR3(%r8), %rax
-	movq	CR0(%r8), %r8
-	movq	%rax, %cr3
-	movq	%r8, %cr0
-	movq	%rbp, %rax
-
-	popf
-	popq	%r15
-	popq	%r14
-	popq	%r13
-	popq	%r12
-	popq	%rbp
-	popq	%rbx
-	ret
-
-	/* Do the copies */
-swap_pages:
-	movq	%rdi, %rcx 	/* Put the page_list in %rcx */
-	xorl	%edi, %edi
-	xorl	%esi, %esi
-	jmp	1f
-
-0:	/* top, read another word for the indirection page */
-
-	movq	(%rbx), %rcx
-	addq	$8,	%rbx
-1:
-	testb	$0x1,	%cl   /* is it a destination page? */
-	jz	2f
-	movq	%rcx,	%rdi
-	andq	$0xfffffffffffff000, %rdi
-	jmp	0b
-2:
-	testb	$0x2,	%cl   /* is it an indirection page? */
-	jz	2f
-	movq	%rcx,   %rbx
-	andq	$0xfffffffffffff000, %rbx
-	jmp	0b
-2:
-	testb	$0x4,	%cl   /* is it the done indicator? */
-	jz	2f
-	jmp	3f
-2:
-	testb	$0x8,	%cl   /* is it the source indicator? */
-	jz	0b	      /* Ignore it otherwise */
-	movq	%rcx,   %rsi  /* For ever source page do a copy */
-	andq	$0xfffffffffffff000, %rsi
-
-	movq	%rdi, %rdx
-	movq	%rsi, %rax
-
-	movq	%r10, %rdi
-	movl	$512, %ecx
-	rep ; movsq
-
-	movq	%rax, %rdi
-	movq	%rdx, %rsi
-	movl	$512, %ecx
-	rep ; movsq
-
-	movq	%rdx, %rdi
-	movq	%r10, %rsi
-	movl	$512, %ecx
-	rep ; movsq
-
-	lea	PAGE_SIZE(%rax), %rsi
-	jmp	0b
-3:
-	ret
-
-	.globl kexec_control_code_size
-.set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
deleted file mode 100644
index a024c4f7ba56105d0693189740ce536b2bf6c70f..0000000000000000000000000000000000000000
--- a/arch/x86/kernel/verify_cpu.S
+++ /dev/null
@@ -1,140 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *
- *	verify_cpu.S - Code for cpu long mode and SSE verification. This
- *	code has been borrowed from boot/setup.S and was introduced by
- * 	Andi Kleen.
- *
- *	Copyright (c) 2007  Andi Kleen (ak@suse.de)
- *	Copyright (c) 2007  Eric Biederman (ebiederm@xmission.com)
- *	Copyright (c) 2007  Vivek Goyal (vgoyal@in.ibm.com)
- *	Copyright (c) 2010  Kees Cook (kees.cook@canonical.com)
- *
- *	This is a common code for verification whether CPU supports
- * 	long mode and SSE or not. It is not called directly instead this
- *	file is included at various places and compiled in that context.
- *	This file is expected to run in 32bit code.  Currently:
- *
- *	arch/x86/boot/compressed/head_64.S: Boot cpu verification
- *	arch/x86/kernel/trampoline_64.S: secondary processor verification
- *	arch/x86/kernel/head_32.S: processor startup
- *
- *	verify_cpu, returns the status of longmode and SSE in register %eax.
- *		0: Success    1: Failure
- *
- *	On Intel, the XD_DISABLE flag will be cleared as a side-effect.
- *
- * 	The caller needs to check for the error code and take the action
- * 	appropriately. Either display a message or halt.
- */
-
-#include <asm/cpufeatures.h>
-#include <asm/msr-index.h>
-
-ENTRY(verify_cpu)
-	pushf				# Save caller passed flags
-	push	$0			# Kill any dangerous flags
-	popf
-
-#ifndef __x86_64__
-	pushfl				# standard way to check for cpuid
-	popl	%eax
-	movl	%eax,%ebx
-	xorl	$0x200000,%eax
-	pushl	%eax
-	popfl
-	pushfl
-	popl	%eax
-	cmpl	%eax,%ebx
-	jz	.Lverify_cpu_no_longmode	# cpu has no cpuid
-#endif
-
-	movl	$0x0,%eax		# See if cpuid 1 is implemented
-	cpuid
-	cmpl	$0x1,%eax
-	jb	.Lverify_cpu_no_longmode	# no cpuid 1
-
-	xor	%di,%di
-	cmpl	$0x68747541,%ebx	# AuthenticAMD
-	jnz	.Lverify_cpu_noamd
-	cmpl	$0x69746e65,%edx
-	jnz	.Lverify_cpu_noamd
-	cmpl	$0x444d4163,%ecx
-	jnz	.Lverify_cpu_noamd
-	mov	$1,%di			# cpu is from AMD
-	jmp	.Lverify_cpu_check
-
-.Lverify_cpu_noamd:
-	cmpl	$0x756e6547,%ebx        # GenuineIntel?
-	jnz	.Lverify_cpu_check
-	cmpl	$0x49656e69,%edx
-	jnz	.Lverify_cpu_check
-	cmpl	$0x6c65746e,%ecx
-	jnz	.Lverify_cpu_check
-
-	# only call IA32_MISC_ENABLE when:
-	# family > 6 || (family == 6 && model >= 0xd)
-	movl	$0x1, %eax		# check CPU family and model
-	cpuid
-	movl	%eax, %ecx
-
-	andl	$0x0ff00f00, %eax	# mask family and extended family
-	shrl	$8, %eax
-	cmpl	$6, %eax
-	ja	.Lverify_cpu_clear_xd	# family > 6, ok
-	jb	.Lverify_cpu_check	# family < 6, skip
-
-	andl	$0x000f00f0, %ecx	# mask model and extended model
-	shrl	$4, %ecx
-	cmpl	$0xd, %ecx
-	jb	.Lverify_cpu_check	# family == 6, model < 0xd, skip
-
-.Lverify_cpu_clear_xd:
-	movl	$MSR_IA32_MISC_ENABLE, %ecx
-	rdmsr
-	btrl	$2, %edx		# clear MSR_IA32_MISC_ENABLE_XD_DISABLE
-	jnc	.Lverify_cpu_check	# only write MSR if bit was changed
-	wrmsr
-
-.Lverify_cpu_check:
-	movl    $0x1,%eax		# Does the cpu have what it takes
-	cpuid
-	andl	$REQUIRED_MASK0,%edx
-	xorl	$REQUIRED_MASK0,%edx
-	jnz	.Lverify_cpu_no_longmode
-
-	movl    $0x80000000,%eax	# See if extended cpuid is implemented
-	cpuid
-	cmpl    $0x80000001,%eax
-	jb      .Lverify_cpu_no_longmode	# no extended cpuid
-
-	movl    $0x80000001,%eax	# Does the cpu have what it takes
-	cpuid
-	andl    $REQUIRED_MASK1,%edx
-	xorl    $REQUIRED_MASK1,%edx
-	jnz     .Lverify_cpu_no_longmode
-
-.Lverify_cpu_sse_test:
-	movl	$1,%eax
-	cpuid
-	andl	$SSE_MASK,%edx
-	cmpl	$SSE_MASK,%edx
-	je	.Lverify_cpu_sse_ok
-	test	%di,%di
-	jz	.Lverify_cpu_no_longmode	# only try to force SSE on AMD
-	movl	$MSR_K7_HWCR,%ecx
-	rdmsr
-	btr	$15,%eax		# enable SSE
-	wrmsr
-	xor	%di,%di			# don't loop
-	jmp	.Lverify_cpu_sse_test	# try again
-
-.Lverify_cpu_no_longmode:
-	popf				# Restore caller passed flags
-	movl $1,%eax
-	ret
-.Lverify_cpu_sse_ok:
-	popf				# Restore caller passed flags
-	xorl %eax, %eax
-	ret
-ENDPROC(verify_cpu)
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
deleted file mode 100644
index 1afe211d7a7ca279f407eebae5886d0569068710..0000000000000000000000000000000000000000
--- a/arch/x86/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,461 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ld script for the x86 kernel
- *
- * Historic 32-bit version written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
- *
- * Modernisation, unification and other changes and fixes:
- *   Copyright (C) 2007-2009  Sam Ravnborg <sam@ravnborg.org>
- *
- *
- * Don't define absolute symbols until and unless you know that symbol
- * value is should remain constant even if kernel image is relocated
- * at run time. Absolute symbols are not relocated. If symbol value should
- * change if kernel is relocated, make the symbol section relative and
- * put it inside the section definition.
- */
-
-#ifdef CONFIG_X86_32
-#define LOAD_OFFSET __PAGE_OFFSET
-#else
-#define LOAD_OFFSET __START_KERNEL_map
-#endif
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/page_types.h>
-#include <asm/orc_lookup.h>
-#include <asm/cache.h>
-#include <asm/boot.h>
-
-#undef i386     /* in case the preprocessor is a 32bit one */
-
-OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT)
-
-#ifdef CONFIG_X86_32
-OUTPUT_ARCH(i386)
-ENTRY(phys_startup_32)
-#else
-OUTPUT_ARCH(i386:x86-64)
-ENTRY(phys_startup_64)
-#endif
-
-jiffies = jiffies_64;
-
-#if defined(CONFIG_X86_64)
-/*
- * On 64-bit, align RODATA to 2MB so we retain large page mappings for
- * boundaries spanning kernel text, rodata and data sections.
- *
- * However, kernel identity mappings will have different RWX permissions
- * to the pages mapping to text and to the pages padding (which are freed) the
- * text section. Hence kernel identity mappings will be broken to smaller
- * pages. For 64-bit, kernel text and kernel identity mappings are different,
- * so we can enable protection checks as well as retain 2MB large page
- * mappings for kernel text.
- */
-#define X86_ALIGN_RODATA_BEGIN	. = ALIGN(HPAGE_SIZE);
-
-#define X86_ALIGN_RODATA_END					\
-		. = ALIGN(HPAGE_SIZE);				\
-		__end_rodata_hpage_align = .;			\
-		__end_rodata_aligned = .;
-
-#define ALIGN_ENTRY_TEXT_BEGIN	. = ALIGN(PMD_SIZE);
-#define ALIGN_ENTRY_TEXT_END	. = ALIGN(PMD_SIZE);
-
-/*
- * This section contains data which will be mapped as decrypted. Memory
- * encryption operates on a page basis. Make this section PMD-aligned
- * to avoid splitting the pages while mapping the section early.
- *
- * Note: We use a separate section so that only this section gets
- * decrypted to avoid exposing more than we wish.
- */
-#define BSS_DECRYPTED						\
-	. = ALIGN(PMD_SIZE);					\
-	__start_bss_decrypted = .;				\
-	*(.bss..decrypted);					\
-	. = ALIGN(PAGE_SIZE);					\
-	__start_bss_decrypted_unused = .;			\
-	. = ALIGN(PMD_SIZE);					\
-	__end_bss_decrypted = .;				\
-
-#else
-
-#define X86_ALIGN_RODATA_BEGIN
-#define X86_ALIGN_RODATA_END					\
-		. = ALIGN(PAGE_SIZE);				\
-		__end_rodata_aligned = .;
-
-#define ALIGN_ENTRY_TEXT_BEGIN
-#define ALIGN_ENTRY_TEXT_END
-#define BSS_DECRYPTED
-
-#endif
-
-PHDRS {
-	text PT_LOAD FLAGS(5);          /* R_E */
-	data PT_LOAD FLAGS(6);          /* RW_ */
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_SMP
-	percpu PT_LOAD FLAGS(6);        /* RW_ */
-#endif
-	init PT_LOAD FLAGS(7);          /* RWE */
-#endif
-	note PT_NOTE FLAGS(0);          /* ___ */
-}
-
-SECTIONS
-{
-#ifdef CONFIG_X86_32
-	. = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
-	phys_startup_32 = ABSOLUTE(startup_32 - LOAD_OFFSET);
-#else
-	. = __START_KERNEL;
-	phys_startup_64 = ABSOLUTE(startup_64 - LOAD_OFFSET);
-#endif
-
-	/* Text and read-only data */
-	.text :  AT(ADDR(.text) - LOAD_OFFSET) {
-		_text = .;
-		_stext = .;
-		/* bootstrapping code */
-		HEAD_TEXT
-		TEXT_TEXT
-		SCHED_TEXT
-		CPUIDLE_TEXT
-		LOCK_TEXT
-		KPROBES_TEXT
-		ALIGN_ENTRY_TEXT_BEGIN
-		ENTRY_TEXT
-		IRQENTRY_TEXT
-		ALIGN_ENTRY_TEXT_END
-		SOFTIRQENTRY_TEXT
-		*(.fixup)
-		*(.gnu.warning)
-
-#ifdef CONFIG_RETPOLINE
-		__indirect_thunk_start = .;
-		*(.text.__x86.indirect_thunk)
-		__indirect_thunk_end = .;
-#endif
-
-		/* End of text section */
-		_etext = .;
-	} :text = 0x9090
-
-	NOTES :text :note
-
-	EXCEPTION_TABLE(16) :text = 0x9090
-
-	/* .text should occupy whole number of pages */
-	. = ALIGN(PAGE_SIZE);
-	X86_ALIGN_RODATA_BEGIN
-	RO_DATA(PAGE_SIZE)
-	X86_ALIGN_RODATA_END
-
-	/* Data */
-	.data : AT(ADDR(.data) - LOAD_OFFSET) {
-		/* Start of data section */
-		_sdata = .;
-
-		/* init_task */
-		INIT_TASK_DATA(THREAD_SIZE)
-
-#ifdef CONFIG_X86_32
-		/* 32 bit has nosave before _edata */
-		NOSAVE_DATA
-#endif
-
-		PAGE_ALIGNED_DATA(PAGE_SIZE)
-
-		CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
-
-		DATA_DATA
-		CONSTRUCTORS
-
-		/* rarely changed data like cpu maps */
-		READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES)
-
-		/* End of data section */
-		_edata = .;
-	} :data
-
-	BUG_TABLE
-
-	ORC_UNWIND_TABLE
-
-	. = ALIGN(PAGE_SIZE);
-	__vvar_page = .;
-
-	.vvar : AT(ADDR(.vvar) - LOAD_OFFSET) {
-		/* work around gold bug 13023 */
-		__vvar_beginning_hack = .;
-
-		/* Place all vvars at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset) 			\
-		. = __vvar_beginning_hack + offset;	\
-		*(.vvar_ ## name)
-#define __VVAR_KERNEL_LDS
-#include <asm/vvar.h>
-#undef __VVAR_KERNEL_LDS
-#undef EMIT_VVAR
-
-		/*
-		 * Pad the rest of the page with zeros.  Otherwise the loader
-		 * can leave garbage here.
-		 */
-		. = __vvar_beginning_hack + PAGE_SIZE;
-	} :data
-
-	. = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE);
-
-	/* Init code and data - will be freed after init */
-	. = ALIGN(PAGE_SIZE);
-	.init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) {
-		__init_begin = .; /* paired with __init_end */
-	}
-
-#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
-	/*
-	 * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
-	 * output PHDR, so the next output section - .init.text - should
-	 * start another segment - init.
-	 */
-	PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
-	ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START,
-	       "per-CPU data too large - increase CONFIG_PHYSICAL_START")
-#endif
-
-	INIT_TEXT_SECTION(PAGE_SIZE)
-#ifdef CONFIG_X86_64
-	:init
-#endif
-
-	/*
-	 * Section for code used exclusively before alternatives are run. All
-	 * references to such code must be patched out by alternatives, normally
-	 * by using X86_FEATURE_ALWAYS CPU feature bit.
-	 *
-	 * See static_cpu_has() for an example.
-	 */
-	.altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) {
-		*(.altinstr_aux)
-	}
-
-	INIT_DATA_SECTION(16)
-
-	.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
-		__x86_cpu_dev_start = .;
-		*(.x86_cpu_dev.init)
-		__x86_cpu_dev_end = .;
-	}
-
-#ifdef CONFIG_X86_INTEL_MID
-	.x86_intel_mid_dev.init : AT(ADDR(.x86_intel_mid_dev.init) - \
-								LOAD_OFFSET) {
-		__x86_intel_mid_dev_start = .;
-		*(.x86_intel_mid_dev.init)
-		__x86_intel_mid_dev_end = .;
-	}
-#endif
-
-	/*
-	 * start address and size of operations which during runtime
-	 * can be patched with virtualization friendly instructions or
-	 * baremetal native ones. Think page table operations.
-	 * Details in paravirt_types.h
-	 */
-	. = ALIGN(8);
-	.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
-		__parainstructions = .;
-		*(.parainstructions)
-		__parainstructions_end = .;
-	}
-
-	/*
-	 * struct alt_inst entries. From the header (alternative.h):
-	 * "Alternative instructions for different CPU types or capabilities"
-	 * Think locking instructions on spinlocks.
-	 */
-	. = ALIGN(8);
-	.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
-		__alt_instructions = .;
-		*(.altinstructions)
-		__alt_instructions_end = .;
-	}
-
-	/*
-	 * And here are the replacement instructions. The linker sticks
-	 * them as binary blobs. The .altinstructions has enough data to
-	 * get the address and the length of them to patch the kernel safely.
-	 */
-	.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
-		*(.altinstr_replacement)
-	}
-
-	/*
-	 * struct iommu_table_entry entries are injected in this section.
-	 * It is an array of IOMMUs which during run time gets sorted depending
-	 * on its dependency order. After rootfs_initcall is complete
-	 * this section can be safely removed.
-	 */
-	.iommu_table : AT(ADDR(.iommu_table) - LOAD_OFFSET) {
-		__iommu_table = .;
-		*(.iommu_table)
-		__iommu_table_end = .;
-	}
-
-	. = ALIGN(8);
-	.apicdrivers : AT(ADDR(.apicdrivers) - LOAD_OFFSET) {
-		__apicdrivers = .;
-		*(.apicdrivers);
-		__apicdrivers_end = .;
-	}
-
-	. = ALIGN(8);
-	/*
-	 * .exit.text is discard at runtime, not link time, to deal with
-	 *  references from .altinstructions and .eh_frame
-	 */
-	.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
-		EXIT_TEXT
-	}
-
-	.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
-		EXIT_DATA
-	}
-
-#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
-	PERCPU_SECTION(INTERNODE_CACHE_BYTES)
-#endif
-
-	. = ALIGN(PAGE_SIZE);
-
-	/* freed after init ends here */
-	.init.end : AT(ADDR(.init.end) - LOAD_OFFSET) {
-		__init_end = .;
-	}
-
-	/*
-	 * smp_locks might be freed after init
-	 * start/end must be page aligned
-	 */
-	. = ALIGN(PAGE_SIZE);
-	.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
-		__smp_locks = .;
-		*(.smp_locks)
-		. = ALIGN(PAGE_SIZE);
-		__smp_locks_end = .;
-	}
-
-#ifdef CONFIG_X86_64
-	.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
-		NOSAVE_DATA
-	}
-#endif
-
-	/* BSS */
-	. = ALIGN(PAGE_SIZE);
-	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
-		__bss_start = .;
-		*(.bss..page_aligned)
-		. = ALIGN(PAGE_SIZE);
-		*(BSS_MAIN)
-		BSS_DECRYPTED
-		. = ALIGN(PAGE_SIZE);
-		__bss_stop = .;
-	}
-
-	/*
-	 * The memory occupied from _text to here, __end_of_kernel_reserve, is
-	 * automatically reserved in setup_arch(). Anything after here must be
-	 * explicitly reserved using memblock_reserve() or it will be discarded
-	 * and treated as available memory.
-	 */
-	__end_of_kernel_reserve = .;
-
-	. = ALIGN(PAGE_SIZE);
-	.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
-		__brk_base = .;
-		. += 64 * 1024;		/* 64k alignment slop space */
-		*(.brk_reservation)	/* areas brk users have reserved */
-		__brk_limit = .;
-	}
-
-	. = ALIGN(PAGE_SIZE);		/* keep VO_INIT_SIZE page aligned */
-	_end = .;
-
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-	/*
-	 * Early scratch/workarea section: Lives outside of the kernel proper
-	 * (_text - _end).
-	 *
-	 * Resides after _end because even though the .brk section is after
-	 * __end_of_kernel_reserve, the .brk section is later reserved as a
-	 * part of the kernel. Since it is located after __end_of_kernel_reserve
-	 * it will be discarded and become part of the available memory. As
-	 * such, it can only be used by very early boot code and must not be
-	 * needed afterwards.
-	 *
-	 * Currently used by SME for performing in-place encryption of the
-	 * kernel during boot. Resides on a 2MB boundary to simplify the
-	 * pagetable setup used for SME in-place encryption.
-	 */
-	. = ALIGN(HPAGE_SIZE);
-	.init.scratch : AT(ADDR(.init.scratch) - LOAD_OFFSET) {
-		__init_scratch_begin = .;
-		*(.init.scratch)
-		. = ALIGN(HPAGE_SIZE);
-		__init_scratch_end = .;
-	}
-#endif
-
-	STABS_DEBUG
-	DWARF_DEBUG
-
-	DISCARDS
-	/DISCARD/ : {
-		*(.eh_frame)
-	}
-}
-
-
-#ifdef CONFIG_X86_32
-/*
- * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
- */
-. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
-	   "kernel image bigger than KERNEL_IMAGE_SIZE");
-#else
-/*
- * Per-cpu symbols which need to be offset from __per_cpu_load
- * for the boot processor.
- */
-#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
-INIT_PER_CPU(gdt_page);
-INIT_PER_CPU(fixed_percpu_data);
-INIT_PER_CPU(irq_stack_backing_store);
-
-/*
- * Build-time check on the image size:
- */
-. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
-	   "kernel image bigger than KERNEL_IMAGE_SIZE");
-
-#ifdef CONFIG_SMP
-. = ASSERT((fixed_percpu_data == 0),
-           "fixed_percpu_data is not at start of per-cpu area");
-#endif
-
-#endif /* CONFIG_X86_32 */
-
-#ifdef CONFIG_KEXEC_CORE
-#include <asm/kexec.h>
-
-. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
-           "kexec control code size is too big");
-#endif
-
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
deleted file mode 100644
index ca4252f81bf81edf8c921cc5784f8030210ff7b3..0000000000000000000000000000000000000000
--- a/arch/x86/kvm/vmx/vmenter.S
+++ /dev/null
@@ -1,297 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/asm.h>
-#include <asm/bitsperlong.h>
-#include <asm/kvm_vcpu_regs.h>
-#include <asm/nospec-branch.h>
-
-#define WORD_SIZE (BITS_PER_LONG / 8)
-
-#define VCPU_RAX	__VCPU_REGS_RAX * WORD_SIZE
-#define VCPU_RCX	__VCPU_REGS_RCX * WORD_SIZE
-#define VCPU_RDX	__VCPU_REGS_RDX * WORD_SIZE
-#define VCPU_RBX	__VCPU_REGS_RBX * WORD_SIZE
-/* Intentionally omit RSP as it's context switched by hardware */
-#define VCPU_RBP	__VCPU_REGS_RBP * WORD_SIZE
-#define VCPU_RSI	__VCPU_REGS_RSI * WORD_SIZE
-#define VCPU_RDI	__VCPU_REGS_RDI * WORD_SIZE
-
-#ifdef CONFIG_X86_64
-#define VCPU_R8		__VCPU_REGS_R8  * WORD_SIZE
-#define VCPU_R9		__VCPU_REGS_R9  * WORD_SIZE
-#define VCPU_R10	__VCPU_REGS_R10 * WORD_SIZE
-#define VCPU_R11	__VCPU_REGS_R11 * WORD_SIZE
-#define VCPU_R12	__VCPU_REGS_R12 * WORD_SIZE
-#define VCPU_R13	__VCPU_REGS_R13 * WORD_SIZE
-#define VCPU_R14	__VCPU_REGS_R14 * WORD_SIZE
-#define VCPU_R15	__VCPU_REGS_R15 * WORD_SIZE
-#endif
-
-	.text
-
-/**
- * vmx_vmenter - VM-Enter the current loaded VMCS
- *
- * %RFLAGS.ZF:	!VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME
- *
- * Returns:
- *	%RFLAGS.CF is set on VM-Fail Invalid
- *	%RFLAGS.ZF is set on VM-Fail Valid
- *	%RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
- *
- * Note that VMRESUME/VMLAUNCH fall-through and return directly if
- * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump
- * to vmx_vmexit.
- */
-ENTRY(vmx_vmenter)
-	/* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */
-	je 2f
-
-1:	vmresume
-	ret
-
-2:	vmlaunch
-	ret
-
-3:	cmpb $0, kvm_rebooting
-	je 4f
-	ret
-4:	ud2
-
-	.pushsection .fixup, "ax"
-5:	jmp 3b
-	.popsection
-
-	_ASM_EXTABLE(1b, 5b)
-	_ASM_EXTABLE(2b, 5b)
-
-ENDPROC(vmx_vmenter)
-
-/**
- * vmx_vmexit - Handle a VMX VM-Exit
- *
- * Returns:
- *	%RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
- *
- * This is vmx_vmenter's partner in crime.  On a VM-Exit, control will jump
- * here after hardware loads the host's state, i.e. this is the destination
- * referred to by VMCS.HOST_RIP.
- */
-ENTRY(vmx_vmexit)
-#ifdef CONFIG_RETPOLINE
-	ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE
-	/* Preserve guest's RAX, it's used to stuff the RSB. */
-	push %_ASM_AX
-
-	/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
-	FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
-
-	/* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */
-	or $1, %_ASM_AX
-
-	pop %_ASM_AX
-.Lvmexit_skip_rsb:
-#endif
-	ret
-ENDPROC(vmx_vmexit)
-
-/**
- * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
- * @vmx:	struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
- * @regs:	unsigned long * (to guest registers)
- * @launched:	%true if the VMCS has been launched
- *
- * Returns:
- *	0 on VM-Exit, 1 on VM-Fail
- */
-ENTRY(__vmx_vcpu_run)
-	push %_ASM_BP
-	mov  %_ASM_SP, %_ASM_BP
-#ifdef CONFIG_X86_64
-	push %r15
-	push %r14
-	push %r13
-	push %r12
-#else
-	push %edi
-	push %esi
-#endif
-	push %_ASM_BX
-
-	/*
-	 * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
-	 * @regs is needed after VM-Exit to save the guest's register values.
-	 */
-	push %_ASM_ARG2
-
-	/* Copy @launched to BL, _ASM_ARG3 is volatile. */
-	mov %_ASM_ARG3B, %bl
-
-	/* Adjust RSP to account for the CALL to vmx_vmenter(). */
-	lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2
-	call vmx_update_host_rsp
-
-	/* Load @regs to RAX. */
-	mov (%_ASM_SP), %_ASM_AX
-
-	/* Check if vmlaunch or vmresume is needed */
-	cmpb $0, %bl
-
-	/* Load guest registers.  Don't clobber flags. */
-	mov VCPU_RBX(%_ASM_AX), %_ASM_BX
-	mov VCPU_RCX(%_ASM_AX), %_ASM_CX
-	mov VCPU_RDX(%_ASM_AX), %_ASM_DX
-	mov VCPU_RSI(%_ASM_AX), %_ASM_SI
-	mov VCPU_RDI(%_ASM_AX), %_ASM_DI
-	mov VCPU_RBP(%_ASM_AX), %_ASM_BP
-#ifdef CONFIG_X86_64
-	mov VCPU_R8 (%_ASM_AX),  %r8
-	mov VCPU_R9 (%_ASM_AX),  %r9
-	mov VCPU_R10(%_ASM_AX), %r10
-	mov VCPU_R11(%_ASM_AX), %r11
-	mov VCPU_R12(%_ASM_AX), %r12
-	mov VCPU_R13(%_ASM_AX), %r13
-	mov VCPU_R14(%_ASM_AX), %r14
-	mov VCPU_R15(%_ASM_AX), %r15
-#endif
-	/* Load guest RAX.  This kills the @regs pointer! */
-	mov VCPU_RAX(%_ASM_AX), %_ASM_AX
-
-	/* Enter guest mode */
-	call vmx_vmenter
-
-	/* Jump on VM-Fail. */
-	jbe 2f
-
-	/* Temporarily save guest's RAX. */
-	push %_ASM_AX
-
-	/* Reload @regs to RAX. */
-	mov WORD_SIZE(%_ASM_SP), %_ASM_AX
-
-	/* Save all guest registers, including RAX from the stack */
-	__ASM_SIZE(pop) VCPU_RAX(%_ASM_AX)
-	mov %_ASM_BX,   VCPU_RBX(%_ASM_AX)
-	mov %_ASM_CX,   VCPU_RCX(%_ASM_AX)
-	mov %_ASM_DX,   VCPU_RDX(%_ASM_AX)
-	mov %_ASM_SI,   VCPU_RSI(%_ASM_AX)
-	mov %_ASM_DI,   VCPU_RDI(%_ASM_AX)
-	mov %_ASM_BP,   VCPU_RBP(%_ASM_AX)
-#ifdef CONFIG_X86_64
-	mov %r8,  VCPU_R8 (%_ASM_AX)
-	mov %r9,  VCPU_R9 (%_ASM_AX)
-	mov %r10, VCPU_R10(%_ASM_AX)
-	mov %r11, VCPU_R11(%_ASM_AX)
-	mov %r12, VCPU_R12(%_ASM_AX)
-	mov %r13, VCPU_R13(%_ASM_AX)
-	mov %r14, VCPU_R14(%_ASM_AX)
-	mov %r15, VCPU_R15(%_ASM_AX)
-#endif
-
-	/* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */
-	xor %eax, %eax
-
-	/*
-	 * Clear all general purpose registers except RSP and RAX to prevent
-	 * speculative use of the guest's values, even those that are reloaded
-	 * via the stack.  In theory, an L1 cache miss when restoring registers
-	 * could lead to speculative execution with the guest's values.
-	 * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
-	 * free.  RSP and RAX are exempt as RSP is restored by hardware during
-	 * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail.
-	 */
-1:	xor %ebx, %ebx
-	xor %ecx, %ecx
-	xor %edx, %edx
-	xor %esi, %esi
-	xor %edi, %edi
-	xor %ebp, %ebp
-#ifdef CONFIG_X86_64
-	xor %r8d,  %r8d
-	xor %r9d,  %r9d
-	xor %r10d, %r10d
-	xor %r11d, %r11d
-	xor %r12d, %r12d
-	xor %r13d, %r13d
-	xor %r14d, %r14d
-	xor %r15d, %r15d
-#endif
-
-	/* "POP" @regs. */
-	add $WORD_SIZE, %_ASM_SP
-	pop %_ASM_BX
-
-#ifdef CONFIG_X86_64
-	pop %r12
-	pop %r13
-	pop %r14
-	pop %r15
-#else
-	pop %esi
-	pop %edi
-#endif
-	pop %_ASM_BP
-	ret
-
-	/* VM-Fail.  Out-of-line to avoid a taken Jcc after VM-Exit. */
-2:	mov $1, %eax
-	jmp 1b
-ENDPROC(__vmx_vcpu_run)
-
-/**
- * vmread_error_trampoline - Trampoline from inline asm to vmread_error()
- * @field:	VMCS field encoding that failed
- * @fault:	%true if the VMREAD faulted, %false if it failed
-
- * Save and restore volatile registers across a call to vmread_error().  Note,
- * all parameters are passed on the stack.
- */
-ENTRY(vmread_error_trampoline)
-	push %_ASM_BP
-	mov  %_ASM_SP, %_ASM_BP
-
-	push %_ASM_AX
-	push %_ASM_CX
-	push %_ASM_DX
-#ifdef CONFIG_X86_64
-	push %rdi
-	push %rsi
-	push %r8
-	push %r9
-	push %r10
-	push %r11
-#endif
-#ifdef CONFIG_X86_64
-	/* Load @field and @fault to arg1 and arg2 respectively. */
-	mov 3*WORD_SIZE(%rbp), %_ASM_ARG2
-	mov 2*WORD_SIZE(%rbp), %_ASM_ARG1
-#else
-	/* Parameters are passed on the stack for 32-bit (see asmlinkage). */
-	push 3*WORD_SIZE(%ebp)
-	push 2*WORD_SIZE(%ebp)
-#endif
-
-	call vmread_error
-
-#ifndef CONFIG_X86_64
-	add $8, %esp
-#endif
-
-	/* Zero out @fault, which will be popped into the result register. */
-	_ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP)
-
-#ifdef CONFIG_X86_64
-	pop %r11
-	pop %r10
-	pop %r9
-	pop %r8
-	pop %rsi
-	pop %rdi
-#endif
-	pop %_ASM_DX
-	pop %_ASM_CX
-	pop %_ASM_AX
-	pop %_ASM_BP
-
-	ret
-ENDPROC(vmread_error_trampoline)
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
deleted file mode 100644
index e0788bade5abb741f7030a917d79499f5d9881f6..0000000000000000000000000000000000000000
--- a/arch/x86/lib/atomic64_386_32.S
+++ /dev/null
@@ -1,187 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * atomic64_t for 386/486
- *
- * Copyright © 2010  Luca Barbieri
- */
-
-#include <linux/linkage.h>
-#include <asm/alternative-asm.h>
-
-/* if you want SMP support, implement these with real spinlocks */
-.macro LOCK reg
-	pushfl
-	cli
-.endm
-
-.macro UNLOCK reg
-	popfl
-.endm
-
-#define BEGIN(op) \
-.macro endp; \
-ENDPROC(atomic64_##op##_386); \
-.purgem endp; \
-.endm; \
-ENTRY(atomic64_##op##_386); \
-	LOCK v;
-
-#define ENDP endp
-
-#define RET \
-	UNLOCK v; \
-	ret
-
-#define RET_ENDP \
-	RET; \
-	ENDP
-
-#define v %ecx
-BEGIN(read)
-	movl  (v), %eax
-	movl 4(v), %edx
-RET_ENDP
-#undef v
-
-#define v %esi
-BEGIN(set)
-	movl %ebx,  (v)
-	movl %ecx, 4(v)
-RET_ENDP
-#undef v
-
-#define v  %esi
-BEGIN(xchg)
-	movl  (v), %eax
-	movl 4(v), %edx
-	movl %ebx,  (v)
-	movl %ecx, 4(v)
-RET_ENDP
-#undef v
-
-#define v %ecx
-BEGIN(add)
-	addl %eax,  (v)
-	adcl %edx, 4(v)
-RET_ENDP
-#undef v
-
-#define v %ecx
-BEGIN(add_return)
-	addl  (v), %eax
-	adcl 4(v), %edx
-	movl %eax,  (v)
-	movl %edx, 4(v)
-RET_ENDP
-#undef v
-
-#define v %ecx
-BEGIN(sub)
-	subl %eax,  (v)
-	sbbl %edx, 4(v)
-RET_ENDP
-#undef v
-
-#define v %ecx
-BEGIN(sub_return)
-	negl %edx
-	negl %eax
-	sbbl $0, %edx
-	addl  (v), %eax
-	adcl 4(v), %edx
-	movl %eax,  (v)
-	movl %edx, 4(v)
-RET_ENDP
-#undef v
-
-#define v %esi
-BEGIN(inc)
-	addl $1,  (v)
-	adcl $0, 4(v)
-RET_ENDP
-#undef v
-
-#define v %esi
-BEGIN(inc_return)
-	movl  (v), %eax
-	movl 4(v), %edx
-	addl $1, %eax
-	adcl $0, %edx
-	movl %eax,  (v)
-	movl %edx, 4(v)
-RET_ENDP
-#undef v
-
-#define v %esi
-BEGIN(dec)
-	subl $1,  (v)
-	sbbl $0, 4(v)
-RET_ENDP
-#undef v
-
-#define v %esi
-BEGIN(dec_return)
-	movl  (v), %eax
-	movl 4(v), %edx
-	subl $1, %eax
-	sbbl $0, %edx
-	movl %eax,  (v)
-	movl %edx, 4(v)
-RET_ENDP
-#undef v
-
-#define v %esi
-BEGIN(add_unless)
-	addl %eax, %ecx
-	adcl %edx, %edi
-	addl  (v), %eax
-	adcl 4(v), %edx
-	cmpl %eax, %ecx
-	je 3f
-1:
-	movl %eax,  (v)
-	movl %edx, 4(v)
-	movl $1, %eax
-2:
-	RET
-3:
-	cmpl %edx, %edi
-	jne 1b
-	xorl %eax, %eax
-	jmp 2b
-ENDP
-#undef v
-
-#define v %esi
-BEGIN(inc_not_zero)
-	movl  (v), %eax
-	movl 4(v), %edx
-	testl %eax, %eax
-	je 3f
-1:
-	addl $1, %eax
-	adcl $0, %edx
-	movl %eax,  (v)
-	movl %edx, 4(v)
-	movl $1, %eax
-2:
-	RET
-3:
-	testl %edx, %edx
-	jne 1b
-	jmp 2b
-ENDP
-#undef v
-
-#define v %esi
-BEGIN(dec_if_positive)
-	movl  (v), %eax
-	movl 4(v), %edx
-	subl $1, %eax
-	sbbl $0, %edx
-	js 1f
-	movl %eax,  (v)
-	movl %edx, 4(v)
-1:
-RET_ENDP
-#undef v
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
deleted file mode 100644
index 843d978ee3413ae3ac4b97a81cb0a0342a261e92..0000000000000000000000000000000000000000
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ /dev/null
@@ -1,180 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * atomic64_t for 586+
- *
- * Copyright © 2010  Luca Barbieri
- */
-
-#include <linux/linkage.h>
-#include <asm/alternative-asm.h>
-
-.macro read64 reg
-	movl %ebx, %eax
-	movl %ecx, %edx
-/* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */
-	LOCK_PREFIX
-	cmpxchg8b (\reg)
-.endm
-
-ENTRY(atomic64_read_cx8)
-	read64 %ecx
-	ret
-ENDPROC(atomic64_read_cx8)
-
-ENTRY(atomic64_set_cx8)
-1:
-/* we don't need LOCK_PREFIX since aligned 64-bit writes
- * are atomic on 586 and newer */
-	cmpxchg8b (%esi)
-	jne 1b
-
-	ret
-ENDPROC(atomic64_set_cx8)
-
-ENTRY(atomic64_xchg_cx8)
-1:
-	LOCK_PREFIX
-	cmpxchg8b (%esi)
-	jne 1b
-
-	ret
-ENDPROC(atomic64_xchg_cx8)
-
-.macro addsub_return func ins insc
-ENTRY(atomic64_\func\()_return_cx8)
-	pushl %ebp
-	pushl %ebx
-	pushl %esi
-	pushl %edi
-
-	movl %eax, %esi
-	movl %edx, %edi
-	movl %ecx, %ebp
-
-	read64 %ecx
-1:
-	movl %eax, %ebx
-	movl %edx, %ecx
-	\ins\()l %esi, %ebx
-	\insc\()l %edi, %ecx
-	LOCK_PREFIX
-	cmpxchg8b (%ebp)
-	jne 1b
-
-10:
-	movl %ebx, %eax
-	movl %ecx, %edx
-	popl %edi
-	popl %esi
-	popl %ebx
-	popl %ebp
-	ret
-ENDPROC(atomic64_\func\()_return_cx8)
-.endm
-
-addsub_return add add adc
-addsub_return sub sub sbb
-
-.macro incdec_return func ins insc
-ENTRY(atomic64_\func\()_return_cx8)
-	pushl %ebx
-
-	read64 %esi
-1:
-	movl %eax, %ebx
-	movl %edx, %ecx
-	\ins\()l $1, %ebx
-	\insc\()l $0, %ecx
-	LOCK_PREFIX
-	cmpxchg8b (%esi)
-	jne 1b
-
-10:
-	movl %ebx, %eax
-	movl %ecx, %edx
-	popl %ebx
-	ret
-ENDPROC(atomic64_\func\()_return_cx8)
-.endm
-
-incdec_return inc add adc
-incdec_return dec sub sbb
-
-ENTRY(atomic64_dec_if_positive_cx8)
-	pushl %ebx
-
-	read64 %esi
-1:
-	movl %eax, %ebx
-	movl %edx, %ecx
-	subl $1, %ebx
-	sbb $0, %ecx
-	js 2f
-	LOCK_PREFIX
-	cmpxchg8b (%esi)
-	jne 1b
-
-2:
-	movl %ebx, %eax
-	movl %ecx, %edx
-	popl %ebx
-	ret
-ENDPROC(atomic64_dec_if_positive_cx8)
-
-ENTRY(atomic64_add_unless_cx8)
-	pushl %ebp
-	pushl %ebx
-/* these just push these two parameters on the stack */
-	pushl %edi
-	pushl %ecx
-
-	movl %eax, %ebp
-	movl %edx, %edi
-
-	read64 %esi
-1:
-	cmpl %eax, 0(%esp)
-	je 4f
-2:
-	movl %eax, %ebx
-	movl %edx, %ecx
-	addl %ebp, %ebx
-	adcl %edi, %ecx
-	LOCK_PREFIX
-	cmpxchg8b (%esi)
-	jne 1b
-
-	movl $1, %eax
-3:
-	addl $8, %esp
-	popl %ebx
-	popl %ebp
-	ret
-4:
-	cmpl %edx, 4(%esp)
-	jne 2b
-	xorl %eax, %eax
-	jmp 3b
-ENDPROC(atomic64_add_unless_cx8)
-
-ENTRY(atomic64_inc_not_zero_cx8)
-	pushl %ebx
-
-	read64 %esi
-1:
-	movl %eax, %ecx
-	orl %edx, %ecx
-	jz 3f
-	movl %eax, %ebx
-	xorl %ecx, %ecx
-	addl $1, %ebx
-	adcl %edx, %ecx
-	LOCK_PREFIX
-	cmpxchg8b (%esi)
-	jne 1b
-
-	movl $1, %eax
-3:
-	popl %ebx
-	ret
-ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
deleted file mode 100644
index 4df90c9ea383fdffd0135e1e4def6ef8c604e7a5..0000000000000000000000000000000000000000
--- a/arch/x86/lib/checksum_32.S
+++ /dev/null
@@ -1,492 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * INET		An implementation of the TCP/IP protocol suite for the LINUX
- *		operating system.  INET is implemented using the  BSD Socket
- *		interface as the means of communication with the user level.
- *
- *		IP/TCP/UDP checksumming routines
- *
- * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
- *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- *		Tom May, <ftom@netcom.com>
- *              Pentium Pro/II routines:
- *              Alexander Kjeldaas <astor@guardian.no>
- *              Finn Arne Gangstad <finnag@guardian.no>
- *		Lots of code moved from tcp.c and ip.c; see those files
- *		for more names.
- *
- * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
- *			     handling.
- *		Andi Kleen,  add zeroing on error
- *                   converted to pure assembler
- */
-
-#include <linux/linkage.h>
-#include <asm/errno.h>
-#include <asm/asm.h>
-#include <asm/export.h>
-#include <asm/nospec-branch.h>
-
-/*
- * computes a partial checksum, e.g. for TCP/UDP fragments
- */
-
-/*	
-unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
- */
-		
-.text
-		
-#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
-
-	  /*		
-	   * Experiments with Ethernet and SLIP connections show that buff
-	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
-	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
-	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
-	   * alignment for the unrolled loop.
-	   */		
-ENTRY(csum_partial)
-	pushl %esi
-	pushl %ebx
-	movl 20(%esp),%eax	# Function arg: unsigned int sum
-	movl 16(%esp),%ecx	# Function arg: int len
-	movl 12(%esp),%esi	# Function arg: unsigned char *buff
-	testl $3, %esi		# Check alignment.
-	jz 2f			# Jump if alignment is ok.
-	testl $1, %esi		# Check alignment.
-	jz 10f			# Jump if alignment is boundary of 2 bytes.
-
-	# buf is odd
-	dec %ecx
-	jl 8f
-	movzbl (%esi), %ebx
-	adcl %ebx, %eax
-	roll $8, %eax
-	inc %esi
-	testl $2, %esi
-	jz 2f
-10:
-	subl $2, %ecx		# Alignment uses up two bytes.
-	jae 1f			# Jump if we had at least two bytes.
-	addl $2, %ecx		# ecx was < 2.  Deal with it.
-	jmp 4f
-1:	movw (%esi), %bx
-	addl $2, %esi
-	addw %bx, %ax
-	adcl $0, %eax
-2:
-	movl %ecx, %edx
-	shrl $5, %ecx
-	jz 2f
-	testl %esi, %esi
-1:	movl (%esi), %ebx
-	adcl %ebx, %eax
-	movl 4(%esi), %ebx
-	adcl %ebx, %eax
-	movl 8(%esi), %ebx
-	adcl %ebx, %eax
-	movl 12(%esi), %ebx
-	adcl %ebx, %eax
-	movl 16(%esi), %ebx
-	adcl %ebx, %eax
-	movl 20(%esi), %ebx
-	adcl %ebx, %eax
-	movl 24(%esi), %ebx
-	adcl %ebx, %eax
-	movl 28(%esi), %ebx
-	adcl %ebx, %eax
-	lea 32(%esi), %esi
-	dec %ecx
-	jne 1b
-	adcl $0, %eax
-2:	movl %edx, %ecx
-	andl $0x1c, %edx
-	je 4f
-	shrl $2, %edx		# This clears CF
-3:	adcl (%esi), %eax
-	lea 4(%esi), %esi
-	dec %edx
-	jne 3b
-	adcl $0, %eax
-4:	andl $3, %ecx
-	jz 7f
-	cmpl $2, %ecx
-	jb 5f
-	movw (%esi),%cx
-	leal 2(%esi),%esi
-	je 6f
-	shll $16,%ecx
-5:	movb (%esi),%cl
-6:	addl %ecx,%eax
-	adcl $0, %eax 
-7:	
-	testb $1, 12(%esp)
-	jz 8f
-	roll $8, %eax
-8:
-	popl %ebx
-	popl %esi
-	ret
-ENDPROC(csum_partial)
-
-#else
-
-/* Version for PentiumII/PPro */
-
-ENTRY(csum_partial)
-	pushl %esi
-	pushl %ebx
-	movl 20(%esp),%eax	# Function arg: unsigned int sum
-	movl 16(%esp),%ecx	# Function arg: int len
-	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
-
-	testl $3, %esi         
-	jnz 25f                 
-10:
-	movl %ecx, %edx
-	movl %ecx, %ebx
-	andl $0x7c, %ebx
-	shrl $7, %ecx
-	addl %ebx,%esi
-	shrl $2, %ebx  
-	negl %ebx
-	lea 45f(%ebx,%ebx,2), %ebx
-	testl %esi, %esi
-	JMP_NOSPEC %ebx
-
-	# Handle 2-byte-aligned regions
-20:	addw (%esi), %ax
-	lea 2(%esi), %esi
-	adcl $0, %eax
-	jmp 10b
-25:
-	testl $1, %esi         
-	jz 30f                 
-	# buf is odd
-	dec %ecx
-	jl 90f
-	movzbl (%esi), %ebx
-	addl %ebx, %eax
-	adcl $0, %eax
-	roll $8, %eax
-	inc %esi
-	testl $2, %esi
-	jz 10b
-
-30:	subl $2, %ecx          
-	ja 20b                 
-	je 32f
-	addl $2, %ecx
-	jz 80f
-	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
-	addl %ebx, %eax
-	adcl $0, %eax
-	jmp 80f
-32:
-	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
-	adcl $0, %eax
-	jmp 80f
-
-40: 
-	addl -128(%esi), %eax
-	adcl -124(%esi), %eax
-	adcl -120(%esi), %eax
-	adcl -116(%esi), %eax   
-	adcl -112(%esi), %eax   
-	adcl -108(%esi), %eax
-	adcl -104(%esi), %eax
-	adcl -100(%esi), %eax
-	adcl -96(%esi), %eax
-	adcl -92(%esi), %eax
-	adcl -88(%esi), %eax
-	adcl -84(%esi), %eax
-	adcl -80(%esi), %eax
-	adcl -76(%esi), %eax
-	adcl -72(%esi), %eax
-	adcl -68(%esi), %eax
-	adcl -64(%esi), %eax     
-	adcl -60(%esi), %eax     
-	adcl -56(%esi), %eax     
-	adcl -52(%esi), %eax   
-	adcl -48(%esi), %eax   
-	adcl -44(%esi), %eax
-	adcl -40(%esi), %eax
-	adcl -36(%esi), %eax
-	adcl -32(%esi), %eax
-	adcl -28(%esi), %eax
-	adcl -24(%esi), %eax
-	adcl -20(%esi), %eax
-	adcl -16(%esi), %eax
-	adcl -12(%esi), %eax
-	adcl -8(%esi), %eax
-	adcl -4(%esi), %eax
-45:
-	lea 128(%esi), %esi
-	adcl $0, %eax
-	dec %ecx
-	jge 40b
-	movl %edx, %ecx
-50:	andl $3, %ecx
-	jz 80f
-
-	# Handle the last 1-3 bytes without jumping
-	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
-	movl $0xffffff,%ebx	# by the shll and shrl instructions
-	shll $3,%ecx
-	shrl %cl,%ebx
-	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
-	addl %ebx,%eax
-	adcl $0,%eax
-80: 
-	testb $1, 12(%esp)
-	jz 90f
-	roll $8, %eax
-90: 
-	popl %ebx
-	popl %esi
-	ret
-ENDPROC(csum_partial)
-				
-#endif
-EXPORT_SYMBOL(csum_partial)
-
-/*
-unsigned int csum_partial_copy_generic (const char *src, char *dst,
-				  int len, int sum, int *src_err_ptr, int *dst_err_ptr)
- */ 
-
-/*
- * Copy from ds while checksumming, otherwise like csum_partial
- *
- * The macros SRC and DST specify the type of access for the instruction.
- * thus we can call a custom exception handler for all access types.
- *
- * FIXME: could someone double-check whether I haven't mixed up some SRC and
- *	  DST definitions? It's damn hard to trigger all cases.  I hope I got
- *	  them all but there's no guarantee.
- */
-
-#define SRC(y...)			\
-	9999: y;			\
-	_ASM_EXTABLE_UA(9999b, 6001f)
-
-#define DST(y...)			\
-	9999: y;			\
-	_ASM_EXTABLE_UA(9999b, 6002f)
-
-#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
-
-#define ARGBASE 16		
-#define FP		12
-		
-ENTRY(csum_partial_copy_generic)
-	subl  $4,%esp	
-	pushl %edi
-	pushl %esi
-	pushl %ebx
-	movl ARGBASE+16(%esp),%eax	# sum
-	movl ARGBASE+12(%esp),%ecx	# len
-	movl ARGBASE+4(%esp),%esi	# src
-	movl ARGBASE+8(%esp),%edi	# dst
-
-	testl $2, %edi			# Check alignment. 
-	jz 2f				# Jump if alignment is ok.
-	subl $2, %ecx			# Alignment uses up two bytes.
-	jae 1f				# Jump if we had at least two bytes.
-	addl $2, %ecx			# ecx was < 2.  Deal with it.
-	jmp 4f
-SRC(1:	movw (%esi), %bx	)
-	addl $2, %esi
-DST(	movw %bx, (%edi)	)
-	addl $2, %edi
-	addw %bx, %ax	
-	adcl $0, %eax
-2:
-	movl %ecx, FP(%esp)
-	shrl $5, %ecx
-	jz 2f
-	testl %esi, %esi
-SRC(1:	movl (%esi), %ebx	)
-SRC(	movl 4(%esi), %edx	)
-	adcl %ebx, %eax
-DST(	movl %ebx, (%edi)	)
-	adcl %edx, %eax
-DST(	movl %edx, 4(%edi)	)
-
-SRC(	movl 8(%esi), %ebx	)
-SRC(	movl 12(%esi), %edx	)
-	adcl %ebx, %eax
-DST(	movl %ebx, 8(%edi)	)
-	adcl %edx, %eax
-DST(	movl %edx, 12(%edi)	)
-
-SRC(	movl 16(%esi), %ebx 	)
-SRC(	movl 20(%esi), %edx	)
-	adcl %ebx, %eax
-DST(	movl %ebx, 16(%edi)	)
-	adcl %edx, %eax
-DST(	movl %edx, 20(%edi)	)
-
-SRC(	movl 24(%esi), %ebx	)
-SRC(	movl 28(%esi), %edx	)
-	adcl %ebx, %eax
-DST(	movl %ebx, 24(%edi)	)
-	adcl %edx, %eax
-DST(	movl %edx, 28(%edi)	)
-
-	lea 32(%esi), %esi
-	lea 32(%edi), %edi
-	dec %ecx
-	jne 1b
-	adcl $0, %eax
-2:	movl FP(%esp), %edx
-	movl %edx, %ecx
-	andl $0x1c, %edx
-	je 4f
-	shrl $2, %edx			# This clears CF
-SRC(3:	movl (%esi), %ebx	)
-	adcl %ebx, %eax
-DST(	movl %ebx, (%edi)	)
-	lea 4(%esi), %esi
-	lea 4(%edi), %edi
-	dec %edx
-	jne 3b
-	adcl $0, %eax
-4:	andl $3, %ecx
-	jz 7f
-	cmpl $2, %ecx
-	jb 5f
-SRC(	movw (%esi), %cx	)
-	leal 2(%esi), %esi
-DST(	movw %cx, (%edi)	)
-	leal 2(%edi), %edi
-	je 6f
-	shll $16,%ecx
-SRC(5:	movb (%esi), %cl	)
-DST(	movb %cl, (%edi)	)
-6:	addl %ecx, %eax
-	adcl $0, %eax
-7:
-5000:
-
-# Exception handler:
-.section .fixup, "ax"							
-
-6001:
-	movl ARGBASE+20(%esp), %ebx	# src_err_ptr
-	movl $-EFAULT, (%ebx)
-
-	# zero the complete destination - computing the rest
-	# is too much work 
-	movl ARGBASE+8(%esp), %edi	# dst
-	movl ARGBASE+12(%esp), %ecx	# len
-	xorl %eax,%eax
-	rep ; stosb
-
-	jmp 5000b
-
-6002:
-	movl ARGBASE+24(%esp), %ebx	# dst_err_ptr
-	movl $-EFAULT,(%ebx)
-	jmp 5000b
-
-.previous
-
-	popl %ebx
-	popl %esi
-	popl %edi
-	popl %ecx			# equivalent to addl $4,%esp
-	ret	
-ENDPROC(csum_partial_copy_generic)
-
-#else
-
-/* Version for PentiumII/PPro */
-
-#define ROUND1(x) \
-	SRC(movl x(%esi), %ebx	)	;	\
-	addl %ebx, %eax			;	\
-	DST(movl %ebx, x(%edi)	)	; 
-
-#define ROUND(x) \
-	SRC(movl x(%esi), %ebx	)	;	\
-	adcl %ebx, %eax			;	\
-	DST(movl %ebx, x(%edi)	)	;
-
-#define ARGBASE 12
-		
-ENTRY(csum_partial_copy_generic)
-	pushl %ebx
-	pushl %edi
-	pushl %esi
-	movl ARGBASE+4(%esp),%esi	#src
-	movl ARGBASE+8(%esp),%edi	#dst	
-	movl ARGBASE+12(%esp),%ecx	#len
-	movl ARGBASE+16(%esp),%eax	#sum
-#	movl %ecx, %edx  
-	movl %ecx, %ebx  
-	movl %esi, %edx
-	shrl $6, %ecx     
-	andl $0x3c, %ebx  
-	negl %ebx
-	subl %ebx, %esi  
-	subl %ebx, %edi  
-	lea  -1(%esi),%edx
-	andl $-32,%edx
-	lea 3f(%ebx,%ebx), %ebx
-	testl %esi, %esi 
-	JMP_NOSPEC %ebx
-1:	addl $64,%esi
-	addl $64,%edi 
-	SRC(movb -32(%edx),%bl)	; SRC(movb (%edx),%bl)
-	ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)	
-	ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)	
-	ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)	
-	ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)	
-3:	adcl $0,%eax
-	addl $64, %edx
-	dec %ecx
-	jge 1b
-4:	movl ARGBASE+12(%esp),%edx	#len
-	andl $3, %edx
-	jz 7f
-	cmpl $2, %edx
-	jb 5f
-SRC(	movw (%esi), %dx         )
-	leal 2(%esi), %esi
-DST(	movw %dx, (%edi)         )
-	leal 2(%edi), %edi
-	je 6f
-	shll $16,%edx
-5:
-SRC(	movb (%esi), %dl         )
-DST(	movb %dl, (%edi)         )
-6:	addl %edx, %eax
-	adcl $0, %eax
-7:
-.section .fixup, "ax"
-6001:	movl	ARGBASE+20(%esp), %ebx	# src_err_ptr	
-	movl $-EFAULT, (%ebx)
-	# zero the complete destination (computing the rest is too much work)
-	movl ARGBASE+8(%esp),%edi	# dst
-	movl ARGBASE+12(%esp),%ecx	# len
-	xorl %eax,%eax
-	rep; stosb
-	jmp 7b
-6002:	movl ARGBASE+24(%esp), %ebx	# dst_err_ptr
-	movl $-EFAULT, (%ebx)
-	jmp  7b			
-.previous				
-
-	popl %esi
-	popl %edi
-	popl %ebx
-	ret
-ENDPROC(csum_partial_copy_generic)
-				
-#undef ROUND
-#undef ROUND1		
-		
-#endif
-EXPORT_SYMBOL(csum_partial_copy_generic)
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
deleted file mode 100644
index 75a5a4515fa75b2414cf7ee4324e2ad000231171..0000000000000000000000000000000000000000
--- a/arch/x86/lib/clear_page_64.S
+++ /dev/null
@@ -1,52 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#include <linux/linkage.h>
-#include <asm/export.h>
-
-/*
- * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
- * recommended to use this when possible and we do use them by default.
- * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
- * Otherwise, use original.
- */
-
-/*
- * Zero a page.
- * %rdi	- page
- */
-ENTRY(clear_page_rep)
-	movl $4096/8,%ecx
-	xorl %eax,%eax
-	rep stosq
-	ret
-ENDPROC(clear_page_rep)
-EXPORT_SYMBOL_GPL(clear_page_rep)
-
-ENTRY(clear_page_orig)
-	xorl   %eax,%eax
-	movl   $4096/64,%ecx
-	.p2align 4
-.Lloop:
-	decl	%ecx
-#define PUT(x) movq %rax,x*8(%rdi)
-	movq %rax,(%rdi)
-	PUT(1)
-	PUT(2)
-	PUT(3)
-	PUT(4)
-	PUT(5)
-	PUT(6)
-	PUT(7)
-	leaq	64(%rdi),%rdi
-	jnz	.Lloop
-	nop
-	ret
-ENDPROC(clear_page_orig)
-EXPORT_SYMBOL_GPL(clear_page_orig)
-
-ENTRY(clear_page_erms)
-	movl $4096,%ecx
-	xorl %eax,%eax
-	rep stosb
-	ret
-ENDPROC(clear_page_erms)
-EXPORT_SYMBOL_GPL(clear_page_erms)
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
deleted file mode 100644
index d63185698a23f6d9ce75651f690669cea60e19d3..0000000000000000000000000000000000000000
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#include <linux/linkage.h>
-#include <asm/percpu.h>
-
-.text
-
-/*
- * Inputs:
- * %rsi : memory location to compare
- * %rax : low 64 bits of old value
- * %rdx : high 64 bits of old value
- * %rbx : low 64 bits of new value
- * %rcx : high 64 bits of new value
- * %al  : Operation successful
- */
-ENTRY(this_cpu_cmpxchg16b_emu)
-
-#
-# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
-# via the ZF.  Caller will access %al to get result.
-#
-# Note that this is only useful for a cpuops operation.  Meaning that we
-# do *not* have a fully atomic operation but just an operation that is
-# *atomic* on a single cpu (as provided by the this_cpu_xx class of
-# macros).
-#
-	pushfq
-	cli
-
-	cmpq PER_CPU_VAR((%rsi)), %rax
-	jne .Lnot_same
-	cmpq PER_CPU_VAR(8(%rsi)), %rdx
-	jne .Lnot_same
-
-	movq %rbx, PER_CPU_VAR((%rsi))
-	movq %rcx, PER_CPU_VAR(8(%rsi))
-
-	popfq
-	mov $1, %al
-	ret
-
-.Lnot_same:
-	popfq
-	xor %al,%al
-	ret
-
-ENDPROC(this_cpu_cmpxchg16b_emu)
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
deleted file mode 100644
index 691d80e9748842b1901ac4ee010e3a5d817fe321..0000000000000000000000000000000000000000
--- a/arch/x86/lib/cmpxchg8b_emu.S
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-
-#include <linux/linkage.h>
-#include <asm/export.h>
-
-.text
-
-/*
- * Inputs:
- * %esi : memory location to compare
- * %eax : low 32 bits of old value
- * %edx : high 32 bits of old value
- * %ebx : low 32 bits of new value
- * %ecx : high 32 bits of new value
- */
-ENTRY(cmpxchg8b_emu)
-
-#
-# Emulate 'cmpxchg8b (%esi)' on UP except we don't
-# set the whole ZF thing (caller will just compare
-# eax:edx with the expected value)
-#
-	pushfl
-	cli
-
-	cmpl  (%esi), %eax
-	jne .Lnot_same
-	cmpl 4(%esi), %edx
-	jne .Lhalf_same
-
-	movl %ebx,  (%esi)
-	movl %ecx, 4(%esi)
-
-	popfl
-	ret
-
-.Lnot_same:
-	movl  (%esi), %eax
-.Lhalf_same:
-	movl 4(%esi), %edx
-
-	popfl
-	ret
-
-ENDPROC(cmpxchg8b_emu)
-EXPORT_SYMBOL(cmpxchg8b_emu)
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
deleted file mode 100644
index fd2d09afa0974b59a0bf54063682e6502fb44573..0000000000000000000000000000000000000000
--- a/arch/x86/lib/copy_page_64.S
+++ /dev/null
@@ -1,89 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
-
-#include <linux/linkage.h>
-#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
-#include <asm/export.h>
-
-/*
- * Some CPUs run faster using the string copy instructions (sane microcode).
- * It is also a lot simpler. Use this when possible. But, don't use streaming
- * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
- * prefetch distance based on SMP/UP.
- */
-	ALIGN
-ENTRY(copy_page)
-	ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
-	movl	$4096/8, %ecx
-	rep	movsq
-	ret
-ENDPROC(copy_page)
-EXPORT_SYMBOL(copy_page)
-
-ENTRY(copy_page_regs)
-	subq	$2*8,	%rsp
-	movq	%rbx,	(%rsp)
-	movq	%r12,	1*8(%rsp)
-
-	movl	$(4096/64)-5,	%ecx
-	.p2align 4
-.Loop64:
-	dec	%rcx
-	movq	0x8*0(%rsi), %rax
-	movq	0x8*1(%rsi), %rbx
-	movq	0x8*2(%rsi), %rdx
-	movq	0x8*3(%rsi), %r8
-	movq	0x8*4(%rsi), %r9
-	movq	0x8*5(%rsi), %r10
-	movq	0x8*6(%rsi), %r11
-	movq	0x8*7(%rsi), %r12
-
-	prefetcht0 5*64(%rsi)
-
-	movq	%rax, 0x8*0(%rdi)
-	movq	%rbx, 0x8*1(%rdi)
-	movq	%rdx, 0x8*2(%rdi)
-	movq	%r8,  0x8*3(%rdi)
-	movq	%r9,  0x8*4(%rdi)
-	movq	%r10, 0x8*5(%rdi)
-	movq	%r11, 0x8*6(%rdi)
-	movq	%r12, 0x8*7(%rdi)
-
-	leaq	64 (%rsi), %rsi
-	leaq	64 (%rdi), %rdi
-
-	jnz	.Loop64
-
-	movl	$5, %ecx
-	.p2align 4
-.Loop2:
-	decl	%ecx
-
-	movq	0x8*0(%rsi), %rax
-	movq	0x8*1(%rsi), %rbx
-	movq	0x8*2(%rsi), %rdx
-	movq	0x8*3(%rsi), %r8
-	movq	0x8*4(%rsi), %r9
-	movq	0x8*5(%rsi), %r10
-	movq	0x8*6(%rsi), %r11
-	movq	0x8*7(%rsi), %r12
-
-	movq	%rax, 0x8*0(%rdi)
-	movq	%rbx, 0x8*1(%rdi)
-	movq	%rdx, 0x8*2(%rdi)
-	movq	%r8,  0x8*3(%rdi)
-	movq	%r9,  0x8*4(%rdi)
-	movq	%r10, 0x8*5(%rdi)
-	movq	%r11, 0x8*6(%rdi)
-	movq	%r12, 0x8*7(%rdi)
-
-	leaq	64(%rdi), %rdi
-	leaq	64(%rsi), %rsi
-	jnz	.Loop2
-
-	movq	(%rsp), %rbx
-	movq	1*8(%rsp), %r12
-	addq	$2*8, %rsp
-	ret
-ENDPROC(copy_page_regs)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
deleted file mode 100644
index 497b4a8414b38b306149bc8a725fab8d4e3ba7ee..0000000000000000000000000000000000000000
--- a/arch/x86/lib/copy_user_64.S
+++ /dev/null
@@ -1,407 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
- * Copyright 2002 Andi Kleen, SuSE Labs.
- *
- * Functions to copy from and to user space.
- */
-
-#include <linux/linkage.h>
-#include <asm/current.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
-#include <asm/asm.h>
-#include <asm/smap.h>
-#include <asm/export.h>
-
-.macro ALIGN_DESTINATION
-	/* check for bad alignment of destination */
-	movl %edi,%ecx
-	andl $7,%ecx
-	jz 102f				/* already aligned */
-	subl $8,%ecx
-	negl %ecx
-	subl %ecx,%edx
-100:	movb (%rsi),%al
-101:	movb %al,(%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz 100b
-102:
-	.section .fixup,"ax"
-103:	addl %ecx,%edx			/* ecx is zerorest also */
-	jmp .Lcopy_user_handle_tail
-	.previous
-
-	_ASM_EXTABLE_CPY(100b, 103b)
-	_ASM_EXTABLE_CPY(101b, 103b)
-	.endm
-
-/*
- * copy_user_generic_unrolled - memory copy with exception handling.
- * This version is for CPUs like P4 that don't have efficient micro
- * code for rep movsq
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count
- *
- * Output:
- * eax uncopied bytes or 0 if successful.
- */
-ENTRY(copy_user_generic_unrolled)
-	ASM_STAC
-	cmpl $8,%edx
-	jb 20f		/* less then 8 bytes, go to byte copy loop */
-	ALIGN_DESTINATION
-	movl %edx,%ecx
-	andl $63,%edx
-	shrl $6,%ecx
-	jz .L_copy_short_string
-1:	movq (%rsi),%r8
-2:	movq 1*8(%rsi),%r9
-3:	movq 2*8(%rsi),%r10
-4:	movq 3*8(%rsi),%r11
-5:	movq %r8,(%rdi)
-6:	movq %r9,1*8(%rdi)
-7:	movq %r10,2*8(%rdi)
-8:	movq %r11,3*8(%rdi)
-9:	movq 4*8(%rsi),%r8
-10:	movq 5*8(%rsi),%r9
-11:	movq 6*8(%rsi),%r10
-12:	movq 7*8(%rsi),%r11
-13:	movq %r8,4*8(%rdi)
-14:	movq %r9,5*8(%rdi)
-15:	movq %r10,6*8(%rdi)
-16:	movq %r11,7*8(%rdi)
-	leaq 64(%rsi),%rsi
-	leaq 64(%rdi),%rdi
-	decl %ecx
-	jnz 1b
-.L_copy_short_string:
-	movl %edx,%ecx
-	andl $7,%edx
-	shrl $3,%ecx
-	jz 20f
-18:	movq (%rsi),%r8
-19:	movq %r8,(%rdi)
-	leaq 8(%rsi),%rsi
-	leaq 8(%rdi),%rdi
-	decl %ecx
-	jnz 18b
-20:	andl %edx,%edx
-	jz 23f
-	movl %edx,%ecx
-21:	movb (%rsi),%al
-22:	movb %al,(%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz 21b
-23:	xor %eax,%eax
-	ASM_CLAC
-	ret
-
-	.section .fixup,"ax"
-30:	shll $6,%ecx
-	addl %ecx,%edx
-	jmp 60f
-40:	leal (%rdx,%rcx,8),%edx
-	jmp 60f
-50:	movl %ecx,%edx
-60:	jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
-	.previous
-
-	_ASM_EXTABLE_CPY(1b, 30b)
-	_ASM_EXTABLE_CPY(2b, 30b)
-	_ASM_EXTABLE_CPY(3b, 30b)
-	_ASM_EXTABLE_CPY(4b, 30b)
-	_ASM_EXTABLE_CPY(5b, 30b)
-	_ASM_EXTABLE_CPY(6b, 30b)
-	_ASM_EXTABLE_CPY(7b, 30b)
-	_ASM_EXTABLE_CPY(8b, 30b)
-	_ASM_EXTABLE_CPY(9b, 30b)
-	_ASM_EXTABLE_CPY(10b, 30b)
-	_ASM_EXTABLE_CPY(11b, 30b)
-	_ASM_EXTABLE_CPY(12b, 30b)
-	_ASM_EXTABLE_CPY(13b, 30b)
-	_ASM_EXTABLE_CPY(14b, 30b)
-	_ASM_EXTABLE_CPY(15b, 30b)
-	_ASM_EXTABLE_CPY(16b, 30b)
-	_ASM_EXTABLE_CPY(18b, 40b)
-	_ASM_EXTABLE_CPY(19b, 40b)
-	_ASM_EXTABLE_CPY(21b, 50b)
-	_ASM_EXTABLE_CPY(22b, 50b)
-ENDPROC(copy_user_generic_unrolled)
-EXPORT_SYMBOL(copy_user_generic_unrolled)
-
-/* Some CPUs run faster using the string copy instructions.
- * This is also a lot simpler. Use them when possible.
- *
- * Only 4GB of copy is supported. This shouldn't be a problem
- * because the kernel normally only writes from/to page sized chunks
- * even if user space passed a longer buffer.
- * And more would be dangerous because both Intel and AMD have
- * errata with rep movsq > 4GB. If someone feels the need to fix
- * this please consider this.
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count
- *
- * Output:
- * eax uncopied bytes or 0 if successful.
- */
-ENTRY(copy_user_generic_string)
-	ASM_STAC
-	cmpl $8,%edx
-	jb 2f		/* less than 8 bytes, go to byte copy loop */
-	ALIGN_DESTINATION
-	movl %edx,%ecx
-	shrl $3,%ecx
-	andl $7,%edx
-1:	rep
-	movsq
-2:	movl %edx,%ecx
-3:	rep
-	movsb
-	xorl %eax,%eax
-	ASM_CLAC
-	ret
-
-	.section .fixup,"ax"
-11:	leal (%rdx,%rcx,8),%ecx
-12:	movl %ecx,%edx		/* ecx is zerorest also */
-	jmp .Lcopy_user_handle_tail
-	.previous
-
-	_ASM_EXTABLE_CPY(1b, 11b)
-	_ASM_EXTABLE_CPY(3b, 12b)
-ENDPROC(copy_user_generic_string)
-EXPORT_SYMBOL(copy_user_generic_string)
-
-/*
- * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
- * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count
- *
- * Output:
- * eax uncopied bytes or 0 if successful.
- */
-ENTRY(copy_user_enhanced_fast_string)
-	ASM_STAC
-	cmpl $64,%edx
-	jb .L_copy_short_string	/* less then 64 bytes, avoid the costly 'rep' */
-	movl %edx,%ecx
-1:	rep
-	movsb
-	xorl %eax,%eax
-	ASM_CLAC
-	ret
-
-	.section .fixup,"ax"
-12:	movl %ecx,%edx		/* ecx is zerorest also */
-	jmp .Lcopy_user_handle_tail
-	.previous
-
-	_ASM_EXTABLE_CPY(1b, 12b)
-ENDPROC(copy_user_enhanced_fast_string)
-EXPORT_SYMBOL(copy_user_enhanced_fast_string)
-
-/*
- * Try to copy last bytes and clear the rest if needed.
- * Since protection fault in copy_from/to_user is not a normal situation,
- * it is not necessary to optimize tail handling.
- * Don't try to copy the tail if machine check happened
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count
- *
- * Output:
- * eax uncopied bytes or 0 if successful.
- */
-ALIGN;
-.Lcopy_user_handle_tail:
-	movl %edx,%ecx
-	cmp $18,%eax		/* check if X86_TRAP_MC */
-	je 3f
-1:	rep movsb
-2:	mov %ecx,%eax
-	ASM_CLAC
-	ret
-
-	/*
-	 * Return zero to pretend that this copy succeeded. This
-	 * is counter-intuitive, but needed to prevent the code
-	 * in lib/iov_iter.c from retrying and running back into
-	 * the poison cache line again. The machine check handler
-	 * will ensure that a SIGBUS is sent to the task.
-	 */
-3:	xorl %eax,%eax
-	ASM_CLAC
-	ret
-
-	_ASM_EXTABLE_CPY(1b, 2b)
-END(.Lcopy_user_handle_tail)
-
-/*
- * copy_user_nocache - Uncached memory copy with exception handling
- * This will force destination out of cache for more performance.
- *
- * Note: Cached memory copy is used when destination or size is not
- * naturally aligned. That is:
- *  - Require 8-byte alignment when size is 8 bytes or larger.
- *  - Require 4-byte alignment when size is 4 bytes.
- */
-ENTRY(__copy_user_nocache)
-	ASM_STAC
-
-	/* If size is less than 8 bytes, go to 4-byte copy */
-	cmpl $8,%edx
-	jb .L_4b_nocache_copy_entry
-
-	/* If destination is not 8-byte aligned, "cache" copy to align it */
-	ALIGN_DESTINATION
-
-	/* Set 4x8-byte copy count and remainder */
-	movl %edx,%ecx
-	andl $63,%edx
-	shrl $6,%ecx
-	jz .L_8b_nocache_copy_entry	/* jump if count is 0 */
-
-	/* Perform 4x8-byte nocache loop-copy */
-.L_4x8b_nocache_copy_loop:
-1:	movq (%rsi),%r8
-2:	movq 1*8(%rsi),%r9
-3:	movq 2*8(%rsi),%r10
-4:	movq 3*8(%rsi),%r11
-5:	movnti %r8,(%rdi)
-6:	movnti %r9,1*8(%rdi)
-7:	movnti %r10,2*8(%rdi)
-8:	movnti %r11,3*8(%rdi)
-9:	movq 4*8(%rsi),%r8
-10:	movq 5*8(%rsi),%r9
-11:	movq 6*8(%rsi),%r10
-12:	movq 7*8(%rsi),%r11
-13:	movnti %r8,4*8(%rdi)
-14:	movnti %r9,5*8(%rdi)
-15:	movnti %r10,6*8(%rdi)
-16:	movnti %r11,7*8(%rdi)
-	leaq 64(%rsi),%rsi
-	leaq 64(%rdi),%rdi
-	decl %ecx
-	jnz .L_4x8b_nocache_copy_loop
-
-	/* Set 8-byte copy count and remainder */
-.L_8b_nocache_copy_entry:
-	movl %edx,%ecx
-	andl $7,%edx
-	shrl $3,%ecx
-	jz .L_4b_nocache_copy_entry	/* jump if count is 0 */
-
-	/* Perform 8-byte nocache loop-copy */
-.L_8b_nocache_copy_loop:
-20:	movq (%rsi),%r8
-21:	movnti %r8,(%rdi)
-	leaq 8(%rsi),%rsi
-	leaq 8(%rdi),%rdi
-	decl %ecx
-	jnz .L_8b_nocache_copy_loop
-
-	/* If no byte left, we're done */
-.L_4b_nocache_copy_entry:
-	andl %edx,%edx
-	jz .L_finish_copy
-
-	/* If destination is not 4-byte aligned, go to byte copy: */
-	movl %edi,%ecx
-	andl $3,%ecx
-	jnz .L_1b_cache_copy_entry
-
-	/* Set 4-byte copy count (1 or 0) and remainder */
-	movl %edx,%ecx
-	andl $3,%edx
-	shrl $2,%ecx
-	jz .L_1b_cache_copy_entry	/* jump if count is 0 */
-
-	/* Perform 4-byte nocache copy: */
-30:	movl (%rsi),%r8d
-31:	movnti %r8d,(%rdi)
-	leaq 4(%rsi),%rsi
-	leaq 4(%rdi),%rdi
-
-	/* If no bytes left, we're done: */
-	andl %edx,%edx
-	jz .L_finish_copy
-
-	/* Perform byte "cache" loop-copy for the remainder */
-.L_1b_cache_copy_entry:
-	movl %edx,%ecx
-.L_1b_cache_copy_loop:
-40:	movb (%rsi),%al
-41:	movb %al,(%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .L_1b_cache_copy_loop
-
-	/* Finished copying; fence the prior stores */
-.L_finish_copy:
-	xorl %eax,%eax
-	ASM_CLAC
-	sfence
-	ret
-
-	.section .fixup,"ax"
-.L_fixup_4x8b_copy:
-	shll $6,%ecx
-	addl %ecx,%edx
-	jmp .L_fixup_handle_tail
-.L_fixup_8b_copy:
-	lea (%rdx,%rcx,8),%rdx
-	jmp .L_fixup_handle_tail
-.L_fixup_4b_copy:
-	lea (%rdx,%rcx,4),%rdx
-	jmp .L_fixup_handle_tail
-.L_fixup_1b_copy:
-	movl %ecx,%edx
-.L_fixup_handle_tail:
-	sfence
-	jmp .Lcopy_user_handle_tail
-	.previous
-
-	_ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
-	_ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
-	_ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
-	_ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
-	_ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
-	_ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
-ENDPROC(__copy_user_nocache)
-EXPORT_SYMBOL(__copy_user_nocache)
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
deleted file mode 100644
index a4a379e79259d1dfb0230372c792b144b4441b0b..0000000000000000000000000000000000000000
--- a/arch/x86/lib/csum-copy_64.S
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of this archive
- * for more details. No warranty for anything given at all.
- */
-#include <linux/linkage.h>
-#include <asm/errno.h>
-#include <asm/asm.h>
-
-/*
- * Checksum copy with exception handling.
- * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
- * destination is zeroed.
- *
- * Input
- * rdi  source
- * rsi  destination
- * edx  len (32bit)
- * ecx  sum (32bit)
- * r8   src_err_ptr (int)
- * r9   dst_err_ptr (int)
- *
- * Output
- * eax  64bit sum. undefined in case of exception.
- *
- * Wrappers need to take care of valid exception sum and zeroing.
- * They also should align source or destination to 8 bytes.
- */
-
-	.macro source
-10:
-	_ASM_EXTABLE_UA(10b, .Lbad_source)
-	.endm
-
-	.macro dest
-20:
-	_ASM_EXTABLE_UA(20b, .Lbad_dest)
-	.endm
-
-	/*
-	 * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
-	 * potentially unmapped kernel address.
-	 */
-	.macro ignore L=.Lignore
-30:
-	_ASM_EXTABLE(30b, \L)
-	.endm
-
-
-ENTRY(csum_partial_copy_generic)
-	cmpl	$3*64, %edx
-	jle	.Lignore
-
-.Lignore:
-	subq  $7*8, %rsp
-	movq  %rbx, 2*8(%rsp)
-	movq  %r12, 3*8(%rsp)
-	movq  %r14, 4*8(%rsp)
-	movq  %r13, 5*8(%rsp)
-	movq  %r15, 6*8(%rsp)
-
-	movq  %r8, (%rsp)
-	movq  %r9, 1*8(%rsp)
-
-	movl  %ecx, %eax
-	movl  %edx, %ecx
-
-	xorl  %r9d, %r9d
-	movq  %rcx, %r12
-
-	shrq  $6, %r12
-	jz	.Lhandle_tail       /* < 64 */
-
-	clc
-
-	/* main loop. clear in 64 byte blocks */
-	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
-	/* r11:	temp3, rdx: temp4, r12 loopcnt */
-	/* r10:	temp5, r15: temp6, r14 temp7, r13 temp8 */
-	.p2align 4
-.Lloop:
-	source
-	movq  (%rdi), %rbx
-	source
-	movq  8(%rdi), %r8
-	source
-	movq  16(%rdi), %r11
-	source
-	movq  24(%rdi), %rdx
-
-	source
-	movq  32(%rdi), %r10
-	source
-	movq  40(%rdi), %r15
-	source
-	movq  48(%rdi), %r14
-	source
-	movq  56(%rdi), %r13
-
-	ignore 2f
-	prefetcht0 5*64(%rdi)
-2:
-	adcq  %rbx, %rax
-	adcq  %r8, %rax
-	adcq  %r11, %rax
-	adcq  %rdx, %rax
-	adcq  %r10, %rax
-	adcq  %r15, %rax
-	adcq  %r14, %rax
-	adcq  %r13, %rax
-
-	decl %r12d
-
-	dest
-	movq %rbx, (%rsi)
-	dest
-	movq %r8, 8(%rsi)
-	dest
-	movq %r11, 16(%rsi)
-	dest
-	movq %rdx, 24(%rsi)
-
-	dest
-	movq %r10, 32(%rsi)
-	dest
-	movq %r15, 40(%rsi)
-	dest
-	movq %r14, 48(%rsi)
-	dest
-	movq %r13, 56(%rsi)
-
-3:
-
-	leaq 64(%rdi), %rdi
-	leaq 64(%rsi), %rsi
-
-	jnz	.Lloop
-
-	adcq  %r9, %rax
-
-	/* do last up to 56 bytes */
-.Lhandle_tail:
-	/* ecx:	count */
-	movl %ecx, %r10d
-	andl $63, %ecx
-	shrl $3, %ecx
-	jz	.Lfold
-	clc
-	.p2align 4
-.Lloop_8:
-	source
-	movq (%rdi), %rbx
-	adcq %rbx, %rax
-	decl %ecx
-	dest
-	movq %rbx, (%rsi)
-	leaq 8(%rsi), %rsi /* preserve carry */
-	leaq 8(%rdi), %rdi
-	jnz	.Lloop_8
-	adcq %r9, %rax	/* add in carry */
-
-.Lfold:
-	/* reduce checksum to 32bits */
-	movl %eax, %ebx
-	shrq $32, %rax
-	addl %ebx, %eax
-	adcl %r9d, %eax
-
-	/* do last up to 6 bytes */
-.Lhandle_7:
-	movl %r10d, %ecx
-	andl $7, %ecx
-	shrl $1, %ecx
-	jz   .Lhandle_1
-	movl $2, %edx
-	xorl %ebx, %ebx
-	clc
-	.p2align 4
-.Lloop_1:
-	source
-	movw (%rdi), %bx
-	adcl %ebx, %eax
-	decl %ecx
-	dest
-	movw %bx, (%rsi)
-	leaq 2(%rdi), %rdi
-	leaq 2(%rsi), %rsi
-	jnz .Lloop_1
-	adcl %r9d, %eax	/* add in carry */
-
-	/* handle last odd byte */
-.Lhandle_1:
-	testb $1, %r10b
-	jz    .Lende
-	xorl  %ebx, %ebx
-	source
-	movb (%rdi), %bl
-	dest
-	movb %bl, (%rsi)
-	addl %ebx, %eax
-	adcl %r9d, %eax		/* carry */
-
-.Lende:
-	movq 2*8(%rsp), %rbx
-	movq 3*8(%rsp), %r12
-	movq 4*8(%rsp), %r14
-	movq 5*8(%rsp), %r13
-	movq 6*8(%rsp), %r15
-	addq $7*8, %rsp
-	ret
-
-	/* Exception handlers. Very simple, zeroing is done in the wrappers */
-.Lbad_source:
-	movq (%rsp), %rax
-	testq %rax, %rax
-	jz   .Lende
-	movl $-EFAULT, (%rax)
-	jmp  .Lende
-
-.Lbad_dest:
-	movq 8(%rsp), %rax
-	testq %rax, %rax
-	jz   .Lende
-	movl $-EFAULT, (%rax)
-	jmp .Lende
-ENDPROC(csum_partial_copy_generic)
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
deleted file mode 100644
index 9578eb88fc878ce265defa0eb89d8ca318c1248a..0000000000000000000000000000000000000000
--- a/arch/x86/lib/getuser.S
+++ /dev/null
@@ -1,143 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * __get_user functions.
- *
- * (C) Copyright 1998 Linus Torvalds
- * (C) Copyright 2005 Andi Kleen
- * (C) Copyright 2008 Glauber Costa
- *
- * These functions have a non-standard call interface
- * to make them more efficient, especially as they
- * return an error value in addition to the "real"
- * return value.
- */
-
-/*
- * __get_user_X
- *
- * Inputs:	%[r|e]ax contains the address.
- *
- * Outputs:	%[r|e]ax is error code (0 or -EFAULT)
- *		%[r|e]dx contains zero-extended value
- *		%ecx contains the high half for 32-bit __get_user_8
- *
- *
- * These functions should not modify any other registers,
- * as they get called from within inline assembly.
- */
-
-#include <linux/linkage.h>
-#include <asm/page_types.h>
-#include <asm/errno.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/asm.h>
-#include <asm/smap.h>
-#include <asm/export.h>
-
-	.text
-ENTRY(__get_user_1)
-	mov PER_CPU_VAR(current_task), %_ASM_DX
-	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
-	jae bad_get_user
-	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
-	and %_ASM_DX, %_ASM_AX
-	ASM_STAC
-1:	movzbl (%_ASM_AX),%edx
-	xor %eax,%eax
-	ASM_CLAC
-	ret
-ENDPROC(__get_user_1)
-EXPORT_SYMBOL(__get_user_1)
-
-ENTRY(__get_user_2)
-	add $1,%_ASM_AX
-	jc bad_get_user
-	mov PER_CPU_VAR(current_task), %_ASM_DX
-	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
-	jae bad_get_user
-	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
-	and %_ASM_DX, %_ASM_AX
-	ASM_STAC
-2:	movzwl -1(%_ASM_AX),%edx
-	xor %eax,%eax
-	ASM_CLAC
-	ret
-ENDPROC(__get_user_2)
-EXPORT_SYMBOL(__get_user_2)
-
-ENTRY(__get_user_4)
-	add $3,%_ASM_AX
-	jc bad_get_user
-	mov PER_CPU_VAR(current_task), %_ASM_DX
-	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
-	jae bad_get_user
-	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
-	and %_ASM_DX, %_ASM_AX
-	ASM_STAC
-3:	movl -3(%_ASM_AX),%edx
-	xor %eax,%eax
-	ASM_CLAC
-	ret
-ENDPROC(__get_user_4)
-EXPORT_SYMBOL(__get_user_4)
-
-ENTRY(__get_user_8)
-#ifdef CONFIG_X86_64
-	add $7,%_ASM_AX
-	jc bad_get_user
-	mov PER_CPU_VAR(current_task), %_ASM_DX
-	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
-	jae bad_get_user
-	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
-	and %_ASM_DX, %_ASM_AX
-	ASM_STAC
-4:	movq -7(%_ASM_AX),%rdx
-	xor %eax,%eax
-	ASM_CLAC
-	ret
-#else
-	add $7,%_ASM_AX
-	jc bad_get_user_8
-	mov PER_CPU_VAR(current_task), %_ASM_DX
-	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
-	jae bad_get_user_8
-	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
-	and %_ASM_DX, %_ASM_AX
-	ASM_STAC
-4:	movl -7(%_ASM_AX),%edx
-5:	movl -3(%_ASM_AX),%ecx
-	xor %eax,%eax
-	ASM_CLAC
-	ret
-#endif
-ENDPROC(__get_user_8)
-EXPORT_SYMBOL(__get_user_8)
-
-
-.Lbad_get_user_clac:
-	ASM_CLAC
-bad_get_user:
-	xor %edx,%edx
-	mov $(-EFAULT),%_ASM_AX
-	ret
-
-#ifdef CONFIG_X86_32
-.Lbad_get_user_8_clac:
-	ASM_CLAC
-bad_get_user_8:
-	xor %edx,%edx
-	xor %ecx,%ecx
-	mov $(-EFAULT),%_ASM_AX
-	ret
-#endif
-
-	_ASM_EXTABLE_UA(1b, .Lbad_get_user_clac)
-	_ASM_EXTABLE_UA(2b, .Lbad_get_user_clac)
-	_ASM_EXTABLE_UA(3b, .Lbad_get_user_clac)
-#ifdef CONFIG_X86_64
-	_ASM_EXTABLE_UA(4b, .Lbad_get_user_clac)
-#else
-	_ASM_EXTABLE_UA(4b, .Lbad_get_user_8_clac)
-	_ASM_EXTABLE_UA(5b, .Lbad_get_user_8_clac)
-#endif
diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
deleted file mode 100644
index a14f9939c365d66f4c3215c31573a3936df94869..0000000000000000000000000000000000000000
--- a/arch/x86/lib/hweight.S
+++ /dev/null
@@ -1,83 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/export.h>
-
-#include <asm/asm.h>
-
-/*
- * unsigned int __sw_hweight32(unsigned int w)
- * %rdi: w
- */
-ENTRY(__sw_hweight32)
-
-#ifdef CONFIG_X86_64
-	movl %edi, %eax				# w
-#endif
-	__ASM_SIZE(push,) %__ASM_REG(dx)
-	movl %eax, %edx				# w -> t
-	shrl %edx				# t >>= 1
-	andl $0x55555555, %edx			# t &= 0x55555555
-	subl %edx, %eax				# w -= t
-
-	movl %eax, %edx				# w -> t
-	shrl $2, %eax				# w_tmp >>= 2
-	andl $0x33333333, %edx			# t	&= 0x33333333
-	andl $0x33333333, %eax			# w_tmp &= 0x33333333
-	addl %edx, %eax				# w = w_tmp + t
-
-	movl %eax, %edx				# w -> t
-	shrl $4, %edx				# t >>= 4
-	addl %edx, %eax				# w_tmp += t
-	andl  $0x0f0f0f0f, %eax			# w_tmp &= 0x0f0f0f0f
-	imull $0x01010101, %eax, %eax		# w_tmp *= 0x01010101
-	shrl $24, %eax				# w = w_tmp >> 24
-	__ASM_SIZE(pop,) %__ASM_REG(dx)
-	ret
-ENDPROC(__sw_hweight32)
-EXPORT_SYMBOL(__sw_hweight32)
-
-ENTRY(__sw_hweight64)
-#ifdef CONFIG_X86_64
-	pushq   %rdi
-	pushq   %rdx
-
-	movq    %rdi, %rdx                      # w -> t
-	movabsq $0x5555555555555555, %rax
-	shrq    %rdx                            # t >>= 1
-	andq    %rdx, %rax                      # t &= 0x5555555555555555
-	movabsq $0x3333333333333333, %rdx
-	subq    %rax, %rdi                      # w -= t
-
-	movq    %rdi, %rax                      # w -> t
-	shrq    $2, %rdi                        # w_tmp >>= 2
-	andq    %rdx, %rax                      # t     &= 0x3333333333333333
-	andq    %rdi, %rdx                      # w_tmp &= 0x3333333333333333
-	addq    %rdx, %rax                      # w = w_tmp + t
-
-	movq    %rax, %rdx                      # w -> t
-	shrq    $4, %rdx                        # t >>= 4
-	addq    %rdx, %rax                      # w_tmp += t
-	movabsq $0x0f0f0f0f0f0f0f0f, %rdx
-	andq    %rdx, %rax                      # w_tmp &= 0x0f0f0f0f0f0f0f0f
-	movabsq $0x0101010101010101, %rdx
-	imulq   %rdx, %rax                      # w_tmp *= 0x0101010101010101
-	shrq    $56, %rax                       # w = w_tmp >> 56
-
-	popq    %rdx
-	popq    %rdi
-	ret
-#else /* CONFIG_X86_32 */
-	/* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
-	pushl   %ecx
-
-	call    __sw_hweight32
-	movl    %eax, %ecx                      # stash away result
-	movl    %edx, %eax                      # second part of input
-	call    __sw_hweight32
-	addl    %ecx, %eax                      # result
-
-	popl    %ecx
-	ret
-#endif
-ENDPROC(__sw_hweight64)
-EXPORT_SYMBOL(__sw_hweight64)
diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S
deleted file mode 100644
index a9bdf0805be048a1c30d94ef1dd7ca0af16540ee..0000000000000000000000000000000000000000
--- a/arch/x86/lib/iomap_copy_64.S
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2006 PathScale, Inc.  All Rights Reserved.
- */
-
-#include <linux/linkage.h>
-
-/*
- * override generic version in lib/iomap_copy.c
- */
-ENTRY(__iowrite32_copy)
-	movl %edx,%ecx
-	rep movsd
-	ret
-ENDPROC(__iowrite32_copy)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
deleted file mode 100644
index dc2fb886db2bcf722037d94c2bd34c7d4af57b0e..0000000000000000000000000000000000000000
--- a/arch/x86/lib/memcpy_64.S
+++ /dev/null
@@ -1,297 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* Copyright 2002 Andi Kleen */
-
-#include <linux/linkage.h>
-#include <asm/errno.h>
-#include <asm/cpufeatures.h>
-#include <asm/mcsafe_test.h>
-#include <asm/alternative-asm.h>
-#include <asm/export.h>
-
-/*
- * We build a jump to memcpy_orig by default which gets NOPped out on
- * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
- * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
- * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
- */
-
-/*
- * memcpy - Copy a memory block.
- *
- * Input:
- *  rdi destination
- *  rsi source
- *  rdx count
- *
- * Output:
- * rax original destination
- */
-ENTRY(__memcpy)
-SYM_FUNC_START_WEAK(memcpy)
-	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
-		      "jmp memcpy_erms", X86_FEATURE_ERMS
-
-	movq %rdi, %rax
-	movq %rdx, %rcx
-	shrq $3, %rcx
-	andl $7, %edx
-	rep movsq
-	movl %edx, %ecx
-	rep movsb
-	ret
-ENDPROC(memcpy)
-ENDPROC(__memcpy)
-EXPORT_SYMBOL(memcpy)
-EXPORT_SYMBOL(__memcpy)
-
-/*
- * memcpy_erms() - enhanced fast string memcpy. This is faster and
- * simpler than memcpy. Use memcpy_erms when possible.
- */
-ENTRY(memcpy_erms)
-	movq %rdi, %rax
-	movq %rdx, %rcx
-	rep movsb
-	ret
-ENDPROC(memcpy_erms)
-
-ENTRY(memcpy_orig)
-	movq %rdi, %rax
-
-	cmpq $0x20, %rdx
-	jb .Lhandle_tail
-
-	/*
-	 * We check whether memory false dependence could occur,
-	 * then jump to corresponding copy mode.
-	 */
-	cmp  %dil, %sil
-	jl .Lcopy_backward
-	subq $0x20, %rdx
-.Lcopy_forward_loop:
-	subq $0x20,	%rdx
-
-	/*
-	 * Move in blocks of 4x8 bytes:
-	 */
-	movq 0*8(%rsi),	%r8
-	movq 1*8(%rsi),	%r9
-	movq 2*8(%rsi),	%r10
-	movq 3*8(%rsi),	%r11
-	leaq 4*8(%rsi),	%rsi
-
-	movq %r8,	0*8(%rdi)
-	movq %r9,	1*8(%rdi)
-	movq %r10,	2*8(%rdi)
-	movq %r11,	3*8(%rdi)
-	leaq 4*8(%rdi),	%rdi
-	jae  .Lcopy_forward_loop
-	addl $0x20,	%edx
-	jmp  .Lhandle_tail
-
-.Lcopy_backward:
-	/*
-	 * Calculate copy position to tail.
-	 */
-	addq %rdx,	%rsi
-	addq %rdx,	%rdi
-	subq $0x20,	%rdx
-	/*
-	 * At most 3 ALU operations in one cycle,
-	 * so append NOPS in the same 16 bytes trunk.
-	 */
-	.p2align 4
-.Lcopy_backward_loop:
-	subq $0x20,	%rdx
-	movq -1*8(%rsi),	%r8
-	movq -2*8(%rsi),	%r9
-	movq -3*8(%rsi),	%r10
-	movq -4*8(%rsi),	%r11
-	leaq -4*8(%rsi),	%rsi
-	movq %r8,		-1*8(%rdi)
-	movq %r9,		-2*8(%rdi)
-	movq %r10,		-3*8(%rdi)
-	movq %r11,		-4*8(%rdi)
-	leaq -4*8(%rdi),	%rdi
-	jae  .Lcopy_backward_loop
-
-	/*
-	 * Calculate copy position to head.
-	 */
-	addl $0x20,	%edx
-	subq %rdx,	%rsi
-	subq %rdx,	%rdi
-.Lhandle_tail:
-	cmpl $16,	%edx
-	jb   .Lless_16bytes
-
-	/*
-	 * Move data from 16 bytes to 31 bytes.
-	 */
-	movq 0*8(%rsi), %r8
-	movq 1*8(%rsi),	%r9
-	movq -2*8(%rsi, %rdx),	%r10
-	movq -1*8(%rsi, %rdx),	%r11
-	movq %r8,	0*8(%rdi)
-	movq %r9,	1*8(%rdi)
-	movq %r10,	-2*8(%rdi, %rdx)
-	movq %r11,	-1*8(%rdi, %rdx)
-	retq
-	.p2align 4
-.Lless_16bytes:
-	cmpl $8,	%edx
-	jb   .Lless_8bytes
-	/*
-	 * Move data from 8 bytes to 15 bytes.
-	 */
-	movq 0*8(%rsi),	%r8
-	movq -1*8(%rsi, %rdx),	%r9
-	movq %r8,	0*8(%rdi)
-	movq %r9,	-1*8(%rdi, %rdx)
-	retq
-	.p2align 4
-.Lless_8bytes:
-	cmpl $4,	%edx
-	jb   .Lless_3bytes
-
-	/*
-	 * Move data from 4 bytes to 7 bytes.
-	 */
-	movl (%rsi), %ecx
-	movl -4(%rsi, %rdx), %r8d
-	movl %ecx, (%rdi)
-	movl %r8d, -4(%rdi, %rdx)
-	retq
-	.p2align 4
-.Lless_3bytes:
-	subl $1, %edx
-	jb .Lend
-	/*
-	 * Move data from 1 bytes to 3 bytes.
-	 */
-	movzbl (%rsi), %ecx
-	jz .Lstore_1byte
-	movzbq 1(%rsi), %r8
-	movzbq (%rsi, %rdx), %r9
-	movb %r8b, 1(%rdi)
-	movb %r9b, (%rdi, %rdx)
-.Lstore_1byte:
-	movb %cl, (%rdi)
-
-.Lend:
-	retq
-ENDPROC(memcpy_orig)
-
-#ifndef CONFIG_UML
-
-MCSAFE_TEST_CTL
-
-/*
- * __memcpy_mcsafe - memory copy with machine check exception handling
- * Note that we only catch machine checks when reading the source addresses.
- * Writes to target are posted and don't generate machine checks.
- */
-ENTRY(__memcpy_mcsafe)
-	cmpl $8, %edx
-	/* Less than 8 bytes? Go to byte copy loop */
-	jb .L_no_whole_words
-
-	/* Check for bad alignment of source */
-	testl $7, %esi
-	/* Already aligned */
-	jz .L_8byte_aligned
-
-	/* Copy one byte at a time until source is 8-byte aligned */
-	movl %esi, %ecx
-	andl $7, %ecx
-	subl $8, %ecx
-	negl %ecx
-	subl %ecx, %edx
-.L_read_leading_bytes:
-	movb (%rsi), %al
-	MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
-	MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
-.L_write_leading_bytes:
-	movb %al, (%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .L_read_leading_bytes
-
-.L_8byte_aligned:
-	movl %edx, %ecx
-	andl $7, %edx
-	shrl $3, %ecx
-	jz .L_no_whole_words
-
-.L_read_words:
-	movq (%rsi), %r8
-	MCSAFE_TEST_SRC %rsi 8 .E_read_words
-	MCSAFE_TEST_DST %rdi 8 .E_write_words
-.L_write_words:
-	movq %r8, (%rdi)
-	addq $8, %rsi
-	addq $8, %rdi
-	decl %ecx
-	jnz .L_read_words
-
-	/* Any trailing bytes? */
-.L_no_whole_words:
-	andl %edx, %edx
-	jz .L_done_memcpy_trap
-
-	/* Copy trailing bytes */
-	movl %edx, %ecx
-.L_read_trailing_bytes:
-	movb (%rsi), %al
-	MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
-	MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
-.L_write_trailing_bytes:
-	movb %al, (%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .L_read_trailing_bytes
-
-	/* Copy successful. Return zero */
-.L_done_memcpy_trap:
-	xorl %eax, %eax
-.L_done:
-	ret
-ENDPROC(__memcpy_mcsafe)
-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
-
-	.section .fixup, "ax"
-	/*
-	 * Return number of bytes not copied for any failure. Note that
-	 * there is no "tail" handling since the source buffer is 8-byte
-	 * aligned and poison is cacheline aligned.
-	 */
-.E_read_words:
-	shll	$3, %ecx
-.E_leading_bytes:
-	addl	%edx, %ecx
-.E_trailing_bytes:
-	mov	%ecx, %eax
-	jmp	.L_done
-
-	/*
-	 * For write fault handling, given the destination is unaligned,
-	 * we handle faults on multi-byte writes with a byte-by-byte
-	 * copy up to the write-protected page.
-	 */
-.E_write_words:
-	shll	$3, %ecx
-	addl	%edx, %ecx
-	movl	%ecx, %edx
-	jmp mcsafe_handle_tail
-
-	.previous
-
-	_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
-	_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
-	_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
-	_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
-	_ASM_EXTABLE(.L_write_words, .E_write_words)
-	_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
-#endif
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
deleted file mode 100644
index 5daea8ef588e5dce34184115e603419eba1a0480..0000000000000000000000000000000000000000
--- a/arch/x86/lib/memmove_64.S
+++ /dev/null
@@ -1,212 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Normally compiler builtins are used, but sometimes the compiler calls out
- * of line code. Based on asm-i386/string.h.
- *
- * This assembly file is re-written from memmove_64.c file.
- *	- Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
- */
-#include <linux/linkage.h>
-#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
-#include <asm/export.h>
-
-#undef memmove
-
-/*
- * Implement memmove(). This can handle overlap between src and dst.
- *
- * Input:
- * rdi: dest
- * rsi: src
- * rdx: count
- *
- * Output:
- * rax: dest
- */
-SYM_FUNC_START_WEAK(memmove)
-ENTRY(__memmove)
-
-	mov %rdi, %rax
-
-	/* Decide forward/backward copy mode */
-	cmp %rdi, %rsi
-	jge .Lmemmove_begin_forward
-	mov %rsi, %r8
-	add %rdx, %r8
-	cmp %rdi, %r8
-	jg 2f
-
-	/* FSRM implies ERMS => no length checks, do the copy directly */
-.Lmemmove_begin_forward:
-	ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
-	ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
-
-	/*
-	 * movsq instruction have many startup latency
-	 * so we handle small size by general register.
-	 */
-	cmp  $680, %rdx
-	jb	3f
-	/*
-	 * movsq instruction is only good for aligned case.
-	 */
-
-	cmpb %dil, %sil
-	je 4f
-3:
-	sub $0x20, %rdx
-	/*
-	 * We gobble 32 bytes forward in each loop.
-	 */
-5:
-	sub $0x20, %rdx
-	movq 0*8(%rsi), %r11
-	movq 1*8(%rsi), %r10
-	movq 2*8(%rsi), %r9
-	movq 3*8(%rsi), %r8
-	leaq 4*8(%rsi), %rsi
-
-	movq %r11, 0*8(%rdi)
-	movq %r10, 1*8(%rdi)
-	movq %r9, 2*8(%rdi)
-	movq %r8, 3*8(%rdi)
-	leaq 4*8(%rdi), %rdi
-	jae 5b
-	addq $0x20, %rdx
-	jmp 1f
-	/*
-	 * Handle data forward by movsq.
-	 */
-	.p2align 4
-4:
-	movq %rdx, %rcx
-	movq -8(%rsi, %rdx), %r11
-	lea -8(%rdi, %rdx), %r10
-	shrq $3, %rcx
-	rep movsq
-	movq %r11, (%r10)
-	jmp 13f
-.Lmemmove_end_forward:
-
-	/*
-	 * Handle data backward by movsq.
-	 */
-	.p2align 4
-7:
-	movq %rdx, %rcx
-	movq (%rsi), %r11
-	movq %rdi, %r10
-	leaq -8(%rsi, %rdx), %rsi
-	leaq -8(%rdi, %rdx), %rdi
-	shrq $3, %rcx
-	std
-	rep movsq
-	cld
-	movq %r11, (%r10)
-	jmp 13f
-
-	/*
-	 * Start to prepare for backward copy.
-	 */
-	.p2align 4
-2:
-	cmp $0x20, %rdx
-	jb 1f
-	cmp $680, %rdx
-	jb 6f
-	cmp %dil, %sil
-	je 7b
-6:
-	/*
-	 * Calculate copy position to tail.
-	 */
-	addq %rdx, %rsi
-	addq %rdx, %rdi
-	subq $0x20, %rdx
-	/*
-	 * We gobble 32 bytes backward in each loop.
-	 */
-8:
-	subq $0x20, %rdx
-	movq -1*8(%rsi), %r11
-	movq -2*8(%rsi), %r10
-	movq -3*8(%rsi), %r9
-	movq -4*8(%rsi), %r8
-	leaq -4*8(%rsi), %rsi
-
-	movq %r11, -1*8(%rdi)
-	movq %r10, -2*8(%rdi)
-	movq %r9, -3*8(%rdi)
-	movq %r8, -4*8(%rdi)
-	leaq -4*8(%rdi), %rdi
-	jae 8b
-	/*
-	 * Calculate copy position to head.
-	 */
-	addq $0x20, %rdx
-	subq %rdx, %rsi
-	subq %rdx, %rdi
-1:
-	cmpq $16, %rdx
-	jb 9f
-	/*
-	 * Move data from 16 bytes to 31 bytes.
-	 */
-	movq 0*8(%rsi), %r11
-	movq 1*8(%rsi), %r10
-	movq -2*8(%rsi, %rdx), %r9
-	movq -1*8(%rsi, %rdx), %r8
-	movq %r11, 0*8(%rdi)
-	movq %r10, 1*8(%rdi)
-	movq %r9, -2*8(%rdi, %rdx)
-	movq %r8, -1*8(%rdi, %rdx)
-	jmp 13f
-	.p2align 4
-9:
-	cmpq $8, %rdx
-	jb 10f
-	/*
-	 * Move data from 8 bytes to 15 bytes.
-	 */
-	movq 0*8(%rsi), %r11
-	movq -1*8(%rsi, %rdx), %r10
-	movq %r11, 0*8(%rdi)
-	movq %r10, -1*8(%rdi, %rdx)
-	jmp 13f
-10:
-	cmpq $4, %rdx
-	jb 11f
-	/*
-	 * Move data from 4 bytes to 7 bytes.
-	 */
-	movl (%rsi), %r11d
-	movl -4(%rsi, %rdx), %r10d
-	movl %r11d, (%rdi)
-	movl %r10d, -4(%rdi, %rdx)
-	jmp 13f
-11:
-	cmp $2, %rdx
-	jb 12f
-	/*
-	 * Move data from 2 bytes to 3 bytes.
-	 */
-	movw (%rsi), %r11w
-	movw -2(%rsi, %rdx), %r10w
-	movw %r11w, (%rdi)
-	movw %r10w, -2(%rdi, %rdx)
-	jmp 13f
-12:
-	cmp $1, %rdx
-	jb 13f
-	/*
-	 * Move data for 1 byte.
-	 */
-	movb (%rsi), %r11b
-	movb %r11b, (%rdi)
-13:
-	retq
-ENDPROC(__memmove)
-ENDPROC(memmove)
-EXPORT_SYMBOL(__memmove)
-EXPORT_SYMBOL(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
deleted file mode 100644
index e3376c7d4c97e8527d522efd4b142d6842d348f8..0000000000000000000000000000000000000000
--- a/arch/x86/lib/memset_64.S
+++ /dev/null
@@ -1,140 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright 2002 Andi Kleen, SuSE Labs */
-
-#include <linux/linkage.h>
-#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
-#include <asm/export.h>
-
-/*
- * ISO C memset - set a memory block to a byte value. This function uses fast
- * string to get better performance than the original function. The code is
- * simpler and shorter than the original function as well.
- *
- * rdi   destination
- * rsi   value (char)
- * rdx   count (bytes)
- *
- * rax   original destination
- */
-SYM_FUNC_START_WEAK(memset)
-ENTRY(__memset)
-	/*
-	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
-	 * to use it when possible. If not available, use fast string instructions.
-	 *
-	 * Otherwise, use original memset function.
-	 */
-	ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
-		      "jmp memset_erms", X86_FEATURE_ERMS
-
-	movq %rdi,%r9
-	movq %rdx,%rcx
-	andl $7,%edx
-	shrq $3,%rcx
-	/* expand byte value  */
-	movzbl %sil,%esi
-	movabs $0x0101010101010101,%rax
-	imulq %rsi,%rax
-	rep stosq
-	movl %edx,%ecx
-	rep stosb
-	movq %r9,%rax
-	ret
-ENDPROC(memset)
-ENDPROC(__memset)
-EXPORT_SYMBOL(memset)
-EXPORT_SYMBOL(__memset)
-
-/*
- * ISO C memset - set a memory block to a byte value. This function uses
- * enhanced rep stosb to override the fast string function.
- * The code is simpler and shorter than the fast string function as well.
- *
- * rdi   destination
- * rsi   value (char)
- * rdx   count (bytes)
- *
- * rax   original destination
- */
-ENTRY(memset_erms)
-	movq %rdi,%r9
-	movb %sil,%al
-	movq %rdx,%rcx
-	rep stosb
-	movq %r9,%rax
-	ret
-ENDPROC(memset_erms)
-
-ENTRY(memset_orig)
-	movq %rdi,%r10
-
-	/* expand byte value  */
-	movzbl %sil,%ecx
-	movabs $0x0101010101010101,%rax
-	imulq  %rcx,%rax
-
-	/* align dst */
-	movl  %edi,%r9d
-	andl  $7,%r9d
-	jnz  .Lbad_alignment
-.Lafter_bad_alignment:
-
-	movq  %rdx,%rcx
-	shrq  $6,%rcx
-	jz	 .Lhandle_tail
-
-	.p2align 4
-.Lloop_64:
-	decq  %rcx
-	movq  %rax,(%rdi)
-	movq  %rax,8(%rdi)
-	movq  %rax,16(%rdi)
-	movq  %rax,24(%rdi)
-	movq  %rax,32(%rdi)
-	movq  %rax,40(%rdi)
-	movq  %rax,48(%rdi)
-	movq  %rax,56(%rdi)
-	leaq  64(%rdi),%rdi
-	jnz    .Lloop_64
-
-	/* Handle tail in loops. The loops should be faster than hard
-	   to predict jump tables. */
-	.p2align 4
-.Lhandle_tail:
-	movl	%edx,%ecx
-	andl    $63&(~7),%ecx
-	jz 		.Lhandle_7
-	shrl	$3,%ecx
-	.p2align 4
-.Lloop_8:
-	decl   %ecx
-	movq  %rax,(%rdi)
-	leaq  8(%rdi),%rdi
-	jnz    .Lloop_8
-
-.Lhandle_7:
-	andl	$7,%edx
-	jz      .Lende
-	.p2align 4
-.Lloop_1:
-	decl    %edx
-	movb 	%al,(%rdi)
-	leaq	1(%rdi),%rdi
-	jnz     .Lloop_1
-
-.Lende:
-	movq	%r10,%rax
-	ret
-
-.Lbad_alignment:
-	cmpq $7,%rdx
-	jbe	.Lhandle_7
-	movq %rax,(%rdi)	/* unaligned store */
-	movq $8,%r8
-	subq %r9,%r8
-	addq %r8,%rdi
-	subq %r8,%rdx
-	jmp .Lafter_bad_alignment
-.Lfinal:
-ENDPROC(memset_orig)
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
deleted file mode 100644
index ed33cbab39582f0c057a301a2e63882c6cd260c1..0000000000000000000000000000000000000000
--- a/arch/x86/lib/msr-reg.S
+++ /dev/null
@@ -1,93 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <linux/errno.h>
-#include <asm/asm.h>
-#include <asm/msr.h>
-
-#ifdef CONFIG_X86_64
-/*
- * int {rdmsr,wrmsr}_safe_regs(u32 gprs[8]);
- *
- * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi]
- *
- */
-.macro op_safe_regs op
-ENTRY(\op\()_safe_regs)
-	pushq %rbx
-	pushq %r12
-	movq	%rdi, %r10	/* Save pointer */
-	xorl	%r11d, %r11d	/* Return value */
-	movl    (%rdi), %eax
-	movl    4(%rdi), %ecx
-	movl    8(%rdi), %edx
-	movl    12(%rdi), %ebx
-	movl    20(%rdi), %r12d
-	movl    24(%rdi), %esi
-	movl    28(%rdi), %edi
-1:	\op
-2:	movl    %eax, (%r10)
-	movl	%r11d, %eax	/* Return value */
-	movl    %ecx, 4(%r10)
-	movl    %edx, 8(%r10)
-	movl    %ebx, 12(%r10)
-	movl    %r12d, 20(%r10)
-	movl    %esi, 24(%r10)
-	movl    %edi, 28(%r10)
-	popq %r12
-	popq %rbx
-	ret
-3:
-	movl    $-EIO, %r11d
-	jmp     2b
-
-	_ASM_EXTABLE(1b, 3b)
-ENDPROC(\op\()_safe_regs)
-.endm
-
-#else /* X86_32 */
-
-.macro op_safe_regs op
-ENTRY(\op\()_safe_regs)
-	pushl %ebx
-	pushl %ebp
-	pushl %esi
-	pushl %edi
-	pushl $0              /* Return value */
-	pushl %eax
-	movl    4(%eax), %ecx
-	movl    8(%eax), %edx
-	movl    12(%eax), %ebx
-	movl    20(%eax), %ebp
-	movl    24(%eax), %esi
-	movl    28(%eax), %edi
-	movl    (%eax), %eax
-1:	\op
-2:	pushl %eax
-	movl    4(%esp), %eax
-	popl (%eax)
-	addl    $4, %esp
-	movl    %ecx, 4(%eax)
-	movl    %edx, 8(%eax)
-	movl    %ebx, 12(%eax)
-	movl    %ebp, 20(%eax)
-	movl    %esi, 24(%eax)
-	movl    %edi, 28(%eax)
-	popl %eax
-	popl %edi
-	popl %esi
-	popl %ebp
-	popl %ebx
-	ret
-3:
-	movl    $-EIO, 4(%esp)
-	jmp     2b
-
-	_ASM_EXTABLE(1b, 3b)
-ENDPROC(\op\()_safe_regs)
-.endm
-
-#endif
-
-op_safe_regs rdmsr
-op_safe_regs wrmsr
-
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
deleted file mode 100644
index 126dd6a9ec9b54c69e7ab2e25ee6ecae2151ee34..0000000000000000000000000000000000000000
--- a/arch/x86/lib/putuser.S
+++ /dev/null
@@ -1,106 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * __put_user functions.
- *
- * (C) Copyright 2005 Linus Torvalds
- * (C) Copyright 2005 Andi Kleen
- * (C) Copyright 2008 Glauber Costa
- *
- * These functions have a non-standard call interface
- * to make them more efficient, especially as they
- * return an error value in addition to the "real"
- * return value.
- */
-#include <linux/linkage.h>
-#include <asm/thread_info.h>
-#include <asm/errno.h>
-#include <asm/asm.h>
-#include <asm/smap.h>
-#include <asm/export.h>
-
-
-/*
- * __put_user_X
- *
- * Inputs:	%eax[:%edx] contains the data
- *		%ecx contains the address
- *
- * Outputs:	%eax is error code (0 or -EFAULT)
- *
- * These functions should not modify any other registers,
- * as they get called from within inline assembly.
- */
-
-#define ENTER	mov PER_CPU_VAR(current_task), %_ASM_BX
-
-.text
-ENTRY(__put_user_1)
-	ENTER
-	cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX
-	jae .Lbad_put_user
-	ASM_STAC
-1:	movb %al,(%_ASM_CX)
-	xor %eax,%eax
-	ASM_CLAC
-	ret
-ENDPROC(__put_user_1)
-EXPORT_SYMBOL(__put_user_1)
-
-ENTRY(__put_user_2)
-	ENTER
-	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
-	sub $1,%_ASM_BX
-	cmp %_ASM_BX,%_ASM_CX
-	jae .Lbad_put_user
-	ASM_STAC
-2:	movw %ax,(%_ASM_CX)
-	xor %eax,%eax
-	ASM_CLAC
-	ret
-ENDPROC(__put_user_2)
-EXPORT_SYMBOL(__put_user_2)
-
-ENTRY(__put_user_4)
-	ENTER
-	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
-	sub $3,%_ASM_BX
-	cmp %_ASM_BX,%_ASM_CX
-	jae .Lbad_put_user
-	ASM_STAC
-3:	movl %eax,(%_ASM_CX)
-	xor %eax,%eax
-	ASM_CLAC
-	ret
-ENDPROC(__put_user_4)
-EXPORT_SYMBOL(__put_user_4)
-
-ENTRY(__put_user_8)
-	ENTER
-	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
-	sub $7,%_ASM_BX
-	cmp %_ASM_BX,%_ASM_CX
-	jae .Lbad_put_user
-	ASM_STAC
-4:	mov %_ASM_AX,(%_ASM_CX)
-#ifdef CONFIG_X86_32
-5:	movl %edx,4(%_ASM_CX)
-#endif
-	xor %eax,%eax
-	ASM_CLAC
-	RET
-ENDPROC(__put_user_8)
-EXPORT_SYMBOL(__put_user_8)
-
-.Lbad_put_user_clac:
-	ASM_CLAC
-.Lbad_put_user:
-	movl $-EFAULT,%eax
-	RET
-
-	_ASM_EXTABLE_UA(1b, .Lbad_put_user_clac)
-	_ASM_EXTABLE_UA(2b, .Lbad_put_user_clac)
-	_ASM_EXTABLE_UA(3b, .Lbad_put_user_clac)
-	_ASM_EXTABLE_UA(4b, .Lbad_put_user_clac)
-#ifdef CONFIG_X86_32
-	_ASM_EXTABLE_UA(5b, .Lbad_put_user_clac)
-#endif
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
deleted file mode 100644
index c909961e678a594bd3812cb14936bdf035af2bb9..0000000000000000000000000000000000000000
--- a/arch/x86/lib/retpoline.S
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#include <linux/stringify.h>
-#include <linux/linkage.h>
-#include <asm/dwarf2.h>
-#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
-#include <asm/export.h>
-#include <asm/nospec-branch.h>
-
-.macro THUNK reg
-	.section .text.__x86.indirect_thunk
-
-ENTRY(__x86_indirect_thunk_\reg)
-	CFI_STARTPROC
-	JMP_NOSPEC %\reg
-	CFI_ENDPROC
-ENDPROC(__x86_indirect_thunk_\reg)
-.endm
-
-/*
- * Despite being an assembler file we can't just use .irp here
- * because __KSYM_DEPS__ only uses the C preprocessor and would
- * only see one instance of "__x86_indirect_thunk_\reg" rather
- * than one per register with the correct names. So we do it
- * the simple and nasty way...
- */
-#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
-#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
-#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
-
-GENERATE_THUNK(_ASM_AX)
-GENERATE_THUNK(_ASM_BX)
-GENERATE_THUNK(_ASM_CX)
-GENERATE_THUNK(_ASM_DX)
-GENERATE_THUNK(_ASM_SI)
-GENERATE_THUNK(_ASM_DI)
-GENERATE_THUNK(_ASM_BP)
-#ifdef CONFIG_64BIT
-GENERATE_THUNK(r8)
-GENERATE_THUNK(r9)
-GENERATE_THUNK(r10)
-GENERATE_THUNK(r11)
-GENERATE_THUNK(r12)
-GENERATE_THUNK(r13)
-GENERATE_THUNK(r14)
-GENERATE_THUNK(r15)
-#endif
diff --git a/arch/x86/math-emu/div_Xsig.S b/arch/x86/math-emu/div_Xsig.S
deleted file mode 100644
index ee08449d20fd80063aeb18398551592f7d3102bd..0000000000000000000000000000000000000000
--- a/arch/x86/math-emu/div_Xsig.S
+++ /dev/null
@@ -1,367 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.file	"div_Xsig.S"
-/*---------------------------------------------------------------------------+
- |  div_Xsig.S                                                               |
- |                                                                           |
- | Division subroutine for 96 bit quantities                                 |
- |                                                                           |
- | Copyright (C) 1994,1995                                                   |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and  |
- | put the 96 bit result at the location d.                                  |
- |                                                                           |
- | The result may not be accurate to 96 bits. It is intended for use where   |
- | a result better than 64 bits is required. The result should usually be    |
- | good to at least 94 bits.                                                 |
- | The returned result is actually divided by one half. This is done to      |
- | prevent overflow.                                                         |
- |                                                                           |
- |  .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb  ->  .dddddddddddd                      |
- |                                                                           |
- |  void div_Xsig(Xsig *a, Xsig *b, Xsig *dest)                              |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "exception.h"
-#include "fpu_emu.h"
-
-
-#define	XsigLL(x)	(x)
-#define	XsigL(x)	4(x)
-#define	XsigH(x)	8(x)
-
-
-#ifndef NON_REENTRANT_FPU
-/*
-	Local storage on the stack:
-	Accumulator:	FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
- */
-#define FPU_accum_3	-4(%ebp)
-#define FPU_accum_2	-8(%ebp)
-#define FPU_accum_1	-12(%ebp)
-#define FPU_accum_0	-16(%ebp)
-#define FPU_result_3	-20(%ebp)
-#define FPU_result_2	-24(%ebp)
-#define FPU_result_1	-28(%ebp)
-
-#else
-.data
-/*
-	Local storage in a static area:
-	Accumulator:	FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
- */
-	.align 4,0
-FPU_accum_3:
-	.long	0
-FPU_accum_2:
-	.long	0
-FPU_accum_1:
-	.long	0
-FPU_accum_0:
-	.long	0
-FPU_result_3:
-	.long	0
-FPU_result_2:
-	.long	0
-FPU_result_1:
-	.long	0
-#endif /* NON_REENTRANT_FPU */
-
-
-.text
-ENTRY(div_Xsig)
-	pushl	%ebp
-	movl	%esp,%ebp
-#ifndef NON_REENTRANT_FPU
-	subl	$28,%esp
-#endif /* NON_REENTRANT_FPU */ 
-
-	pushl	%esi
-	pushl	%edi
-	pushl	%ebx
-
-	movl	PARAM1,%esi	/* pointer to num */
-	movl	PARAM2,%ebx	/* pointer to denom */
-
-#ifdef PARANOID
-	testl	$0x80000000, XsigH(%ebx)	/* Divisor */
-	je	L_bugged
-#endif /* PARANOID */
-
-
-/*---------------------------------------------------------------------------+
- |  Divide:   Return  arg1/arg2 to arg3.                                     |
- |                                                                           |
- |  The maximum returned value is (ignoring exponents)                       |
- |               .ffffffff ffffffff                                          |
- |               ------------------  =  1.ffffffff fffffffe                  |
- |               .80000000 00000000                                          |
- | and the minimum is                                                        |
- |               .80000000 00000000                                          |
- |               ------------------  =  .80000000 00000001   (rounded)       |
- |               .ffffffff ffffffff                                          |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-	/* Save extended dividend in local register */
-
-	/* Divide by 2 to prevent overflow */
-	clc
-	movl	XsigH(%esi),%eax
-	rcrl	%eax
-	movl	%eax,FPU_accum_3
-	movl	XsigL(%esi),%eax
-	rcrl	%eax
-	movl	%eax,FPU_accum_2
-	movl	XsigLL(%esi),%eax
-	rcrl	%eax
-	movl	%eax,FPU_accum_1
-	movl	$0,%eax
-	rcrl	%eax
-	movl	%eax,FPU_accum_0
-
-	movl	FPU_accum_2,%eax	/* Get the current num */
-	movl	FPU_accum_3,%edx
-
-/*----------------------------------------------------------------------*/
-/* Initialization done.
-   Do the first 32 bits. */
-
-	/* We will divide by a number which is too large */
-	movl	XsigH(%ebx),%ecx
-	addl	$1,%ecx
-	jnc	LFirst_div_not_1
-
-	/* here we need to divide by 100000000h,
-	   i.e., no division at all.. */
-	mov	%edx,%eax
-	jmp	LFirst_div_done
-
-LFirst_div_not_1:
-	divl	%ecx		/* Divide the numerator by the augmented
-				   denom ms dw */
-
-LFirst_div_done:
-	movl	%eax,FPU_result_3	/* Put the result in the answer */
-
-	mull	XsigH(%ebx)	/* mul by the ms dw of the denom */
-
-	subl	%eax,FPU_accum_2	/* Subtract from the num local reg */
-	sbbl	%edx,FPU_accum_3
-
-	movl	FPU_result_3,%eax	/* Get the result back */
-	mull	XsigL(%ebx)	/* now mul the ls dw of the denom */
-
-	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
-	sbbl	%edx,FPU_accum_2
-	sbbl	$0,FPU_accum_3
-	je	LDo_2nd_32_bits		/* Must check for non-zero result here */
-
-#ifdef PARANOID
-	jb	L_bugged_1
-#endif /* PARANOID */ 
-
-	/* need to subtract another once of the denom */
-	incl	FPU_result_3	/* Correct the answer */
-
-	movl	XsigL(%ebx),%eax
-	movl	XsigH(%ebx),%edx
-	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
-	sbbl	%edx,FPU_accum_2
-
-#ifdef PARANOID
-	sbbl	$0,FPU_accum_3
-	jne	L_bugged_1	/* Must check for non-zero result here */
-#endif /* PARANOID */ 
-
-/*----------------------------------------------------------------------*/
-/* Half of the main problem is done, there is just a reduced numerator
-   to handle now.
-   Work with the second 32 bits, FPU_accum_0 not used from now on */
-LDo_2nd_32_bits:
-	movl	FPU_accum_2,%edx	/* get the reduced num */
-	movl	FPU_accum_1,%eax
-
-	/* need to check for possible subsequent overflow */
-	cmpl	XsigH(%ebx),%edx
-	jb	LDo_2nd_div
-	ja	LPrevent_2nd_overflow
-
-	cmpl	XsigL(%ebx),%eax
-	jb	LDo_2nd_div
-
-LPrevent_2nd_overflow:
-/* The numerator is greater or equal, would cause overflow */
-	/* prevent overflow */
-	subl	XsigL(%ebx),%eax
-	sbbl	XsigH(%ebx),%edx
-	movl	%edx,FPU_accum_2
-	movl	%eax,FPU_accum_1
-
-	incl	FPU_result_3	/* Reflect the subtraction in the answer */
-
-#ifdef PARANOID
-	je	L_bugged_2	/* Can't bump the result to 1.0 */
-#endif /* PARANOID */ 
-
-LDo_2nd_div:
-	cmpl	$0,%ecx		/* augmented denom msw */
-	jnz	LSecond_div_not_1
-
-	/* %ecx == 0, we are dividing by 1.0 */
-	mov	%edx,%eax
-	jmp	LSecond_div_done
-
-LSecond_div_not_1:
-	divl	%ecx		/* Divide the numerator by the denom ms dw */
-
-LSecond_div_done:
-	movl	%eax,FPU_result_2	/* Put the result in the answer */
-
-	mull	XsigH(%ebx)	/* mul by the ms dw of the denom */
-
-	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
-	sbbl	%edx,FPU_accum_2
-
-#ifdef PARANOID
-	jc	L_bugged_2
-#endif /* PARANOID */
-
-	movl	FPU_result_2,%eax	/* Get the result back */
-	mull	XsigL(%ebx)	/* now mul the ls dw of the denom */
-
-	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
-	sbbl	%edx,FPU_accum_1	/* Subtract from the num local reg */
-	sbbl	$0,FPU_accum_2
-
-#ifdef PARANOID
-	jc	L_bugged_2
-#endif /* PARANOID */
-
-	jz	LDo_3rd_32_bits
-
-#ifdef PARANOID
-	cmpl	$1,FPU_accum_2
-	jne	L_bugged_2
-#endif /* PARANOID */ 
-
-	/* need to subtract another once of the denom */
-	movl	XsigL(%ebx),%eax
-	movl	XsigH(%ebx),%edx
-	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
-	sbbl	%edx,FPU_accum_1
-	sbbl	$0,FPU_accum_2
-
-#ifdef PARANOID
-	jc	L_bugged_2
-	jne	L_bugged_2
-#endif /* PARANOID */ 
-
-	addl	$1,FPU_result_2	/* Correct the answer */
-	adcl	$0,FPU_result_3
-
-#ifdef PARANOID
-	jc	L_bugged_2	/* Must check for non-zero result here */
-#endif /* PARANOID */ 
-
-/*----------------------------------------------------------------------*/
-/* The division is essentially finished here, we just need to perform
-   tidying operations.
-   Deal with the 3rd 32 bits */
-LDo_3rd_32_bits:
-	/* We use an approximation for the third 32 bits.
-	To take account of the 3rd 32 bits of the divisor
-	(call them del), we subtract  del * (a/b) */
-
-	movl	FPU_result_3,%eax	/* a/b */
-	mull	XsigLL(%ebx)		/* del */
-
-	subl	%edx,FPU_accum_1
-
-	/* A borrow indicates that the result is negative */
-	jnb	LTest_over
-
-	movl	XsigH(%ebx),%edx
-	addl	%edx,FPU_accum_1
-
-	subl	$1,FPU_result_2		/* Adjust the answer */
-	sbbl	$0,FPU_result_3
-
-	/* The above addition might not have been enough, check again. */
-	movl	FPU_accum_1,%edx	/* get the reduced num */
-	cmpl	XsigH(%ebx),%edx	/* denom */
-	jb	LDo_3rd_div
-
-	movl	XsigH(%ebx),%edx
-	addl	%edx,FPU_accum_1
-
-	subl	$1,FPU_result_2		/* Adjust the answer */
-	sbbl	$0,FPU_result_3
-	jmp	LDo_3rd_div
-
-LTest_over:
-	movl	FPU_accum_1,%edx	/* get the reduced num */
-
-	/* need to check for possible subsequent overflow */
-	cmpl	XsigH(%ebx),%edx	/* denom */
-	jb	LDo_3rd_div
-
-	/* prevent overflow */
-	subl	XsigH(%ebx),%edx
-	movl	%edx,FPU_accum_1
-
-	addl	$1,FPU_result_2	/* Reflect the subtraction in the answer */
-	adcl	$0,FPU_result_3
-
-LDo_3rd_div:
-	movl	FPU_accum_0,%eax
-	movl	FPU_accum_1,%edx
-	divl	XsigH(%ebx)
-
-	movl    %eax,FPU_result_1       /* Rough estimate of third word */
-
-	movl	PARAM3,%esi		/* pointer to answer */
-
-	movl	FPU_result_1,%eax
-	movl	%eax,XsigLL(%esi)
-	movl	FPU_result_2,%eax
-	movl	%eax,XsigL(%esi)
-	movl	FPU_result_3,%eax
-	movl	%eax,XsigH(%esi)
-
-L_exit:
-	popl	%ebx
-	popl	%edi
-	popl	%esi
-
-	leave
-	ret
-
-
-#ifdef PARANOID
-/* The logic is wrong if we got here */
-L_bugged:
-	pushl	EX_INTERNAL|0x240
-	call	EXCEPTION
-	pop	%ebx
-	jmp	L_exit
-
-L_bugged_1:
-	pushl	EX_INTERNAL|0x241
-	call	EXCEPTION
-	pop	%ebx
-	jmp	L_exit
-
-L_bugged_2:
-	pushl	EX_INTERNAL|0x242
-	call	EXCEPTION
-	pop	%ebx
-	jmp	L_exit
-#endif /* PARANOID */ 
-ENDPROC(div_Xsig)
diff --git a/arch/x86/math-emu/div_small.S b/arch/x86/math-emu/div_small.S
deleted file mode 100644
index 8f5025c80ee055dc846553143844690158f6d4fa..0000000000000000000000000000000000000000
--- a/arch/x86/math-emu/div_small.S
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.file	"div_small.S"
-/*---------------------------------------------------------------------------+
- |  div_small.S                                                              |
- |                                                                           |
- | Divide a 64 bit integer by a 32 bit integer & return remainder.           |
- |                                                                           |
- | Copyright (C) 1992,1995                                                   |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- |    unsigned long FPU_div_small(unsigned long long *x, unsigned long y)    |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_emu.h"
-
-.text
-ENTRY(FPU_div_small)
-	pushl	%ebp
-	movl	%esp,%ebp
-
-	pushl	%esi
-
-	movl	PARAM1,%esi	/* pointer to num */
-	movl	PARAM2,%ecx	/* The denominator */
-
-	movl	4(%esi),%eax	/* Get the current num msw */
-	xorl	%edx,%edx
-	divl	%ecx
-
-	movl	%eax,4(%esi)
-
-	movl	(%esi),%eax	/* Get the num lsw */
-	divl	%ecx
-
-	movl	%eax,(%esi)
-
-	movl	%edx,%eax	/* Return the remainder in eax */
-
-	popl	%esi
-
-	leave
-	ret
-ENDPROC(FPU_div_small)
diff --git a/arch/x86/math-emu/mul_Xsig.S b/arch/x86/math-emu/mul_Xsig.S
deleted file mode 100644
index 3e489122a2b0c714c5d42d19863f73e883f8e2b5..0000000000000000000000000000000000000000
--- a/arch/x86/math-emu/mul_Xsig.S
+++ /dev/null
@@ -1,179 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*---------------------------------------------------------------------------+
- |  mul_Xsig.S                                                               |
- |                                                                           |
- | Multiply a 12 byte fixed point number by another fixed point number.      |
- |                                                                           |
- | Copyright (C) 1992,1994,1995                                              |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
- |                                                                           |
- | Call from C as:                                                           |
- |   void mul32_Xsig(Xsig *x, unsigned b)                                    |
- |                                                                           |
- |   void mul64_Xsig(Xsig *x, unsigned long long *b)                         |
- |                                                                           |
- |   void mul_Xsig_Xsig(Xsig *x, unsigned *b)                                |
- |                                                                           |
- | The result is neither rounded nor normalized, and the ls bit or so may    |
- | be wrong.                                                                 |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-	.file	"mul_Xsig.S"
-
-
-#include "fpu_emu.h"
-
-.text
-ENTRY(mul32_Xsig)
-	pushl %ebp
-	movl %esp,%ebp
-	subl $16,%esp
-	pushl %esi
-
-	movl PARAM1,%esi
-	movl PARAM2,%ecx
-
-	xor %eax,%eax
-	movl %eax,-4(%ebp)
-	movl %eax,-8(%ebp)
-
-	movl (%esi),%eax        /* lsl of Xsig */
-	mull %ecx		/* msl of b */
-	movl %edx,-12(%ebp)
-
-	movl 4(%esi),%eax	/* midl of Xsig */
-	mull %ecx		/* msl of b */
-	addl %eax,-12(%ebp)
-	adcl %edx,-8(%ebp)
-	adcl $0,-4(%ebp)
-
-	movl 8(%esi),%eax	/* msl of Xsig */
-	mull %ecx		/* msl of b */
-	addl %eax,-8(%ebp)
-	adcl %edx,-4(%ebp)
-
-	movl -12(%ebp),%eax
-	movl %eax,(%esi)
-	movl -8(%ebp),%eax
-	movl %eax,4(%esi)
-	movl -4(%ebp),%eax
-	movl %eax,8(%esi)
-
-	popl %esi
-	leave
-	ret
-ENDPROC(mul32_Xsig)
-
-
-ENTRY(mul64_Xsig)
-	pushl %ebp
-	movl %esp,%ebp
-	subl $16,%esp
-	pushl %esi
-
-	movl PARAM1,%esi
-	movl PARAM2,%ecx
-
-	xor %eax,%eax
-	movl %eax,-4(%ebp)
-	movl %eax,-8(%ebp)
-
-	movl (%esi),%eax        /* lsl of Xsig */
-	mull 4(%ecx)		/* msl of b */
-	movl %edx,-12(%ebp)
-
-	movl 4(%esi),%eax	/* midl of Xsig */
-	mull (%ecx)		/* lsl of b */
-	addl %edx,-12(%ebp)
-	adcl $0,-8(%ebp)
-	adcl $0,-4(%ebp)
-
-	movl 4(%esi),%eax	/* midl of Xsig */
-	mull 4(%ecx)		/* msl of b */
-	addl %eax,-12(%ebp)
-	adcl %edx,-8(%ebp)
-	adcl $0,-4(%ebp)
-
-	movl 8(%esi),%eax	/* msl of Xsig */
-	mull (%ecx)		/* lsl of b */
-	addl %eax,-12(%ebp)
-	adcl %edx,-8(%ebp)
-	adcl $0,-4(%ebp)
-
-	movl 8(%esi),%eax	/* msl of Xsig */
-	mull 4(%ecx)		/* msl of b */
-	addl %eax,-8(%ebp)
-	adcl %edx,-4(%ebp)
-
-	movl -12(%ebp),%eax
-	movl %eax,(%esi)
-	movl -8(%ebp),%eax
-	movl %eax,4(%esi)
-	movl -4(%ebp),%eax
-	movl %eax,8(%esi)
-
-	popl %esi
-	leave
-	ret
-ENDPROC(mul64_Xsig)
-
-
-
-ENTRY(mul_Xsig_Xsig)
-	pushl %ebp
-	movl %esp,%ebp
-	subl $16,%esp
-	pushl %esi
-
-	movl PARAM1,%esi
-	movl PARAM2,%ecx
-
-	xor %eax,%eax
-	movl %eax,-4(%ebp)
-	movl %eax,-8(%ebp)
-
-	movl (%esi),%eax        /* lsl of Xsig */
-	mull 8(%ecx)		/* msl of b */
-	movl %edx,-12(%ebp)
-
-	movl 4(%esi),%eax	/* midl of Xsig */
-	mull 4(%ecx)		/* midl of b */
-	addl %edx,-12(%ebp)
-	adcl $0,-8(%ebp)
-	adcl $0,-4(%ebp)
-
-	movl 8(%esi),%eax	/* msl of Xsig */
-	mull (%ecx)		/* lsl of b */
-	addl %edx,-12(%ebp)
-	adcl $0,-8(%ebp)
-	adcl $0,-4(%ebp)
-
-	movl 4(%esi),%eax	/* midl of Xsig */
-	mull 8(%ecx)		/* msl of b */
-	addl %eax,-12(%ebp)
-	adcl %edx,-8(%ebp)
-	adcl $0,-4(%ebp)
-
-	movl 8(%esi),%eax	/* msl of Xsig */
-	mull 4(%ecx)		/* midl of b */
-	addl %eax,-12(%ebp)
-	adcl %edx,-8(%ebp)
-	adcl $0,-4(%ebp)
-
-	movl 8(%esi),%eax	/* msl of Xsig */
-	mull 8(%ecx)		/* msl of b */
-	addl %eax,-8(%ebp)
-	adcl %edx,-4(%ebp)
-
-	movl -12(%ebp),%edx
-	movl %edx,(%esi)
-	movl -8(%ebp),%edx
-	movl %edx,4(%esi)
-	movl -4(%ebp),%edx
-	movl %edx,8(%esi)
-
-	popl %esi
-	leave
-	ret
-ENDPROC(mul_Xsig_Xsig)
diff --git a/arch/x86/math-emu/polynom_Xsig.S b/arch/x86/math-emu/polynom_Xsig.S
deleted file mode 100644
index 604f0b2d17e89c345072caaa89092ee90464bc2b..0000000000000000000000000000000000000000
--- a/arch/x86/math-emu/polynom_Xsig.S
+++ /dev/null
@@ -1,137 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*---------------------------------------------------------------------------+
- |  polynomial_Xsig.S                                                        |
- |                                                                           |
- | Fixed point arithmetic polynomial evaluation.                             |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1995                                         |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
- |                                                                           |
- | Call from C as:                                                           |
- |   void polynomial_Xsig(Xsig *accum, unsigned long long x,                 |
- |                        unsigned long long terms[], int n)                 |
- |                                                                           |
- | Computes:                                                                 |
- | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x  |
- | and adds the result to the 12 byte Xsig.                                  |
- | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
- | precision.                                                                |
- |                                                                           |
- | This function must be used carefully: most overflow of intermediate       |
- | results is controlled, but overflow of the result is not.                 |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-	.file	"polynomial_Xsig.S"
-
-#include "fpu_emu.h"
-
-
-#define	TERM_SIZE	$8
-#define	SUM_MS		-20(%ebp)	/* sum ms long */
-#define SUM_MIDDLE	-24(%ebp)	/* sum middle long */
-#define	SUM_LS		-28(%ebp)	/* sum ls long */
-#define	ACCUM_MS	-4(%ebp)	/* accum ms long */
-#define	ACCUM_MIDDLE	-8(%ebp)	/* accum middle long */
-#define	ACCUM_LS	-12(%ebp)	/* accum ls long */
-#define OVERFLOWED      -16(%ebp)	/* addition overflow flag */
-
-.text
-ENTRY(polynomial_Xsig)
-	pushl	%ebp
-	movl	%esp,%ebp
-	subl	$32,%esp
-	pushl	%esi
-	pushl	%edi
-	pushl	%ebx
-
-	movl	PARAM2,%esi		/* x */
-	movl	PARAM3,%edi		/* terms */
-
-	movl	TERM_SIZE,%eax
-	mull	PARAM4			/* n */
-	addl	%eax,%edi
-
-	movl	4(%edi),%edx		/* terms[n] */
-	movl	%edx,SUM_MS
-	movl	(%edi),%edx		/* terms[n] */
-	movl	%edx,SUM_MIDDLE
-	xor	%eax,%eax
-	movl	%eax,SUM_LS
-	movb	%al,OVERFLOWED
-
-	subl	TERM_SIZE,%edi
-	decl	PARAM4
-	js	L_accum_done
-
-L_accum_loop:
-	xor	%eax,%eax
-	movl	%eax,ACCUM_MS
-	movl	%eax,ACCUM_MIDDLE
-
-	movl	SUM_MIDDLE,%eax
-	mull	(%esi)			/* x ls long */
-	movl	%edx,ACCUM_LS
-
-	movl	SUM_MIDDLE,%eax
-	mull	4(%esi)			/* x ms long */
-	addl	%eax,ACCUM_LS
-	adcl	%edx,ACCUM_MIDDLE
-	adcl	$0,ACCUM_MS
-
-	movl	SUM_MS,%eax
-	mull	(%esi)			/* x ls long */
-	addl	%eax,ACCUM_LS
-	adcl	%edx,ACCUM_MIDDLE
-	adcl	$0,ACCUM_MS
-
-	movl	SUM_MS,%eax
-	mull	4(%esi)			/* x ms long */
-	addl	%eax,ACCUM_MIDDLE
-	adcl	%edx,ACCUM_MS
-
-	testb	$0xff,OVERFLOWED
-	jz	L_no_overflow
-
-	movl	(%esi),%eax
-	addl	%eax,ACCUM_MIDDLE
-	movl	4(%esi),%eax
-	adcl	%eax,ACCUM_MS		/* This could overflow too */
-
-L_no_overflow:
-
-/*
- * Now put the sum of next term and the accumulator
- * into the sum register
- */
-	movl	ACCUM_LS,%eax
-	addl	(%edi),%eax		/* term ls long */
-	movl	%eax,SUM_LS
-	movl	ACCUM_MIDDLE,%eax
-	adcl	(%edi),%eax		/* term ls long */
-	movl	%eax,SUM_MIDDLE
-	movl	ACCUM_MS,%eax
-	adcl	4(%edi),%eax		/* term ms long */
-	movl	%eax,SUM_MS
-	sbbb	%al,%al
-	movb	%al,OVERFLOWED		/* Used in the next iteration */
-
-	subl	TERM_SIZE,%edi
-	decl	PARAM4
-	jns	L_accum_loop
-
-L_accum_done:
-	movl	PARAM1,%edi		/* accum */
-	movl	SUM_LS,%eax
-	addl	%eax,(%edi)
-	movl	SUM_MIDDLE,%eax
-	adcl	%eax,4(%edi)
-	movl	SUM_MS,%eax
-	adcl	%eax,8(%edi)
-
-	popl	%ebx
-	popl	%edi
-	popl	%esi
-	leave
-	ret
-ENDPROC(polynomial_Xsig)
diff --git a/arch/x86/math-emu/reg_norm.S b/arch/x86/math-emu/reg_norm.S
deleted file mode 100644
index 7f6b4392a15de64b659cf2477557297f7f8c3245..0000000000000000000000000000000000000000
--- a/arch/x86/math-emu/reg_norm.S
+++ /dev/null
@@ -1,150 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*---------------------------------------------------------------------------+
- |  reg_norm.S                                                               |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1995,1997                                    |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@suburbia.net               |
- |                                                                           |
- | Normalize the value in a FPU_REG.                                         |
- |                                                                           |
- | Call from C as:                                                           |
- |    int FPU_normalize(FPU_REG *n)                                          |
- |                                                                           |
- |    int FPU_normalize_nuo(FPU_REG *n)                                      |
- |                                                                           |
- |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
- |    one was raised, or -1 on internal error.                               |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_emu.h"
-
-
-.text
-ENTRY(FPU_normalize)
-	pushl	%ebp
-	movl	%esp,%ebp
-	pushl	%ebx
-
-	movl	PARAM1,%ebx
-
-	movl	SIGH(%ebx),%edx
-	movl	SIGL(%ebx),%eax
-
-	orl	%edx,%edx	/* ms bits */
-	js	L_done		/* Already normalized */
-	jnz	L_shift_1	/* Shift left 1 - 31 bits */
-
-	orl	%eax,%eax
-	jz	L_zero		/* The contents are zero */
-
-	movl	%eax,%edx
-	xorl	%eax,%eax
-	subw	$32,EXP(%ebx)	/* This can cause an underflow */
-
-/* We need to shift left by 1 - 31 bits */
-L_shift_1:
-	bsrl	%edx,%ecx	/* get the required shift in %ecx */
-	subl	$31,%ecx
-	negl	%ecx
-	shld	%cl,%eax,%edx
-	shl	%cl,%eax
-	subw	%cx,EXP(%ebx)	/* This can cause an underflow */
-
-	movl	%edx,SIGH(%ebx)
-	movl	%eax,SIGL(%ebx)
-
-L_done:
-	cmpw	EXP_OVER,EXP(%ebx)
-	jge	L_overflow
-
-	cmpw	EXP_UNDER,EXP(%ebx)
-	jle	L_underflow
-
-L_exit_valid:
-	movl	TAG_Valid,%eax
-
-	/* Convert the exponent to 80x87 form. */
-	addw	EXTENDED_Ebias,EXP(%ebx)
-	andw	$0x7fff,EXP(%ebx)
-
-L_exit:
-	popl	%ebx
-	leave
-	ret
-
-
-L_zero:
-	movw	$0,EXP(%ebx)
-	movl	TAG_Zero,%eax
-	jmp	L_exit
-
-L_underflow:
-	/* Convert the exponent to 80x87 form. */
-	addw	EXTENDED_Ebias,EXP(%ebx)
-	push	%ebx
-	call	arith_underflow
-	pop	%ebx
-	jmp	L_exit
-
-L_overflow:
-	/* Convert the exponent to 80x87 form. */
-	addw	EXTENDED_Ebias,EXP(%ebx)
-	push	%ebx
-	call	arith_overflow
-	pop	%ebx
-	jmp	L_exit
-ENDPROC(FPU_normalize)
-
-
-
-/* Normalise without reporting underflow or overflow */
-ENTRY(FPU_normalize_nuo)
-	pushl	%ebp
-	movl	%esp,%ebp
-	pushl	%ebx
-
-	movl	PARAM1,%ebx
-
-	movl	SIGH(%ebx),%edx
-	movl	SIGL(%ebx),%eax
-
-	orl	%edx,%edx	/* ms bits */
-	js	L_exit_nuo_valid	/* Already normalized */
-	jnz	L_nuo_shift_1	/* Shift left 1 - 31 bits */
-
-	orl	%eax,%eax
-	jz	L_exit_nuo_zero		/* The contents are zero */
-
-	movl	%eax,%edx
-	xorl	%eax,%eax
-	subw	$32,EXP(%ebx)	/* This can cause an underflow */
-
-/* We need to shift left by 1 - 31 bits */
-L_nuo_shift_1:
-	bsrl	%edx,%ecx	/* get the required shift in %ecx */
-	subl	$31,%ecx
-	negl	%ecx
-	shld	%cl,%eax,%edx
-	shl	%cl,%eax
-	subw	%cx,EXP(%ebx)	/* This can cause an underflow */
-
-	movl	%edx,SIGH(%ebx)
-	movl	%eax,SIGL(%ebx)
-
-L_exit_nuo_valid:
-	movl	TAG_Valid,%eax
-
-	popl	%ebx
-	leave
-	ret
-
-L_exit_nuo_zero:
-	movl	TAG_Zero,%eax
-	movw	EXP_UNDER,EXP(%ebx)
-
-	popl	%ebx
-	leave
-	ret
-ENDPROC(FPU_normalize_nuo)
diff --git a/arch/x86/math-emu/reg_round.S b/arch/x86/math-emu/reg_round.S
deleted file mode 100644
index 04563421ee7d4614a33ec8b38ab85104f5dacb98..0000000000000000000000000000000000000000
--- a/arch/x86/math-emu/reg_round.S
+++ /dev/null
@@ -1,711 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.file "reg_round.S"
-/*---------------------------------------------------------------------------+
- |  reg_round.S                                                              |
- |                                                                           |
- | Rounding/truncation/etc for FPU basic arithmetic functions.               |
- |                                                                           |
- | Copyright (C) 1993,1995,1997                                              |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@suburbia.net               |
- |                                                                           |
- | This code has four possible entry points.                                 |
- | The following must be entered by a jmp instruction:                       |
- |   fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit.                  |
- |                                                                           |
- | The FPU_round entry point is intended to be used by C code.               |
- | From C, call as:                                                          |
- |  int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
- |                                                                           |
- |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
- |    one was raised, or -1 on internal error.                               |
- |                                                                           |
- | For correct "up" and "down" rounding, the argument must have the correct  |
- | sign.                                                                     |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- | Four entry points.                                                        |
- |                                                                           |
- | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points:     |
- |  %eax:%ebx  64 bit significand                                            |
- |  %edx       32 bit extension of the significand                           |
- |  %edi       pointer to an FPU_REG for the result to be stored             |
- |  stack      calling function must have set up a C stack frame and         |
- |             pushed %esi, %edi, and %ebx                                   |
- |                                                                           |
- | Needed just for the fpu_reg_round_sqrt entry point:                       |
- |  %cx  A control word in the same format as the FPU control word.          |
- | Otherwise, PARAM4 must give such a value.                                 |
- |                                                                           |
- |                                                                           |
- | The significand and its extension are assumed to be exact in the          |
- | following sense:                                                          |
- |   If the significand by itself is the exact result then the significand   |
- |   extension (%edx) must contain 0, otherwise the significand extension    |
- |   must be non-zero.                                                       |
- |   If the significand extension is non-zero then the significand is        |
- |   smaller than the magnitude of the correct exact result by an amount     |
- |   greater than zero and less than one ls bit of the significand.          |
- |   The significand extension is only required to have three possible       |
- |   non-zero values:                                                        |
- |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
- |                                 bit smaller than the magnitude of the     |
- |                                 true exact result.                        |
- |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
- |                                 smaller than the magnitude of the true    |
- |                                 exact result.                             |
- |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
- |                                 bit smaller than the magnitude of the     |
- |                                 true exact result.                        |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- |  The code in this module has become quite complex, but it should handle   |
- |  all of the FPU flags which are set at this stage of the basic arithmetic |
- |  computations.                                                            |
- |  There are a few rare cases where the results are not set identically to  |
- |  a real FPU. These require a bit more thought because at this stage the   |
- |  results of the code here appear to be more consistent...                 |
- |  This may be changed in a future version.                                 |
- +---------------------------------------------------------------------------*/
-
-
-#include "fpu_emu.h"
-#include "exception.h"
-#include "control_w.h"
-
-/* Flags for FPU_bits_lost */
-#define	LOST_DOWN	$1
-#define	LOST_UP		$2
-
-/* Flags for FPU_denormal */
-#define	DENORMAL	$1
-#define	UNMASKED_UNDERFLOW $2
-
-
-#ifndef NON_REENTRANT_FPU
-/*	Make the code re-entrant by putting
-	local storage on the stack: */
-#define FPU_bits_lost	(%esp)
-#define FPU_denormal	1(%esp)
-
-#else
-/*	Not re-entrant, so we can gain speed by putting
-	local storage in a static area: */
-.data
-	.align 4,0
-FPU_bits_lost:
-	.byte	0
-FPU_denormal:
-	.byte	0
-#endif /* NON_REENTRANT_FPU */
-
-
-.text
-.globl fpu_reg_round
-.globl fpu_Arith_exit
-
-/* Entry point when called from C */
-ENTRY(FPU_round)
-	pushl	%ebp
-	movl	%esp,%ebp
-	pushl	%esi
-	pushl	%edi
-	pushl	%ebx
-
-	movl	PARAM1,%edi
-	movl	SIGH(%edi),%eax
-	movl	SIGL(%edi),%ebx
-	movl	PARAM2,%edx
-
-fpu_reg_round:			/* Normal entry point */
-	movl	PARAM4,%ecx
-
-#ifndef NON_REENTRANT_FPU
-	pushl	%ebx		/* adjust the stack pointer */
-#endif /* NON_REENTRANT_FPU */ 
-
-#ifdef PARANOID
-/* Cannot use this here yet */
-/*	orl	%eax,%eax */
-/*	jns	L_entry_bugged */
-#endif /* PARANOID */
-
-	cmpw	EXP_UNDER,EXP(%edi)
-	jle	L_Make_denorm			/* The number is a de-normal */
-
-	movb	$0,FPU_denormal			/* 0 -> not a de-normal */
-
-Denorm_done:
-	movb	$0,FPU_bits_lost		/* No bits yet lost in rounding */
-
-	movl	%ecx,%esi
-	andl	CW_PC,%ecx
-	cmpl	PR_64_BITS,%ecx
-	je	LRound_To_64
-
-	cmpl	PR_53_BITS,%ecx
-	je	LRound_To_53
-
-	cmpl	PR_24_BITS,%ecx
-	je	LRound_To_24
-
-#ifdef PECULIAR_486
-/* With the precision control bits set to 01 "(reserved)", a real 80486
-   behaves as if the precision control bits were set to 11 "64 bits" */
-	cmpl	PR_RESERVED_BITS,%ecx
-	je	LRound_To_64
-#ifdef PARANOID
-	jmp	L_bugged_denorm_486
-#endif /* PARANOID */ 
-#else
-#ifdef PARANOID
-	jmp	L_bugged_denorm	/* There is no bug, just a bad control word */
-#endif /* PARANOID */ 
-#endif /* PECULIAR_486 */
-
-
-/* Round etc to 24 bit precision */
-LRound_To_24:
-	movl	%esi,%ecx
-	andl	CW_RC,%ecx
-	cmpl	RC_RND,%ecx
-	je	LRound_nearest_24
-
-	cmpl	RC_CHOP,%ecx
-	je	LCheck_truncate_24
-
-	cmpl	RC_UP,%ecx		/* Towards +infinity */
-	je	LUp_24
-
-	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
-	je	LDown_24
-
-#ifdef PARANOID
-	jmp	L_bugged_round24
-#endif /* PARANOID */ 
-
-LUp_24:
-	cmpb	SIGN_POS,PARAM5
-	jne	LCheck_truncate_24	/* If negative then  up==truncate */
-
-	jmp	LCheck_24_round_up
-
-LDown_24:
-	cmpb	SIGN_POS,PARAM5
-	je	LCheck_truncate_24	/* If positive then  down==truncate */
-
-LCheck_24_round_up:
-	movl	%eax,%ecx
-	andl	$0x000000ff,%ecx
-	orl	%ebx,%ecx
-	orl	%edx,%ecx
-	jnz	LDo_24_round_up
-	jmp	L_Re_normalise
-
-LRound_nearest_24:
-	/* Do rounding of the 24th bit if needed (nearest or even) */
-	movl	%eax,%ecx
-	andl	$0x000000ff,%ecx
-	cmpl	$0x00000080,%ecx
-	jc	LCheck_truncate_24	/* less than half, no increment needed */
-
-	jne	LGreater_Half_24	/* greater than half, increment needed */
-
-	/* Possibly half, we need to check the ls bits */
-	orl	%ebx,%ebx
-	jnz	LGreater_Half_24	/* greater than half, increment needed */
-
-	orl	%edx,%edx
-	jnz	LGreater_Half_24	/* greater than half, increment needed */
-
-	/* Exactly half, increment only if 24th bit is 1 (round to even) */
-	testl	$0x00000100,%eax
-	jz	LDo_truncate_24
-
-LGreater_Half_24:			/* Rounding: increment at the 24th bit */
-LDo_24_round_up:
-	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
-	xorl	%ebx,%ebx
-	movb	LOST_UP,FPU_bits_lost
-	addl	$0x00000100,%eax
-	jmp	LCheck_Round_Overflow
-
-LCheck_truncate_24:
-	movl	%eax,%ecx
-	andl	$0x000000ff,%ecx
-	orl	%ebx,%ecx
-	orl	%edx,%ecx
-	jz	L_Re_normalise		/* No truncation needed */
-
-LDo_truncate_24:
-	andl	$0xffffff00,%eax	/* Truncate to 24 bits */
-	xorl	%ebx,%ebx
-	movb	LOST_DOWN,FPU_bits_lost
-	jmp	L_Re_normalise
-
-
-/* Round etc to 53 bit precision */
-LRound_To_53:
-	movl	%esi,%ecx
-	andl	CW_RC,%ecx
-	cmpl	RC_RND,%ecx
-	je	LRound_nearest_53
-
-	cmpl	RC_CHOP,%ecx
-	je	LCheck_truncate_53
-
-	cmpl	RC_UP,%ecx		/* Towards +infinity */
-	je	LUp_53
-
-	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
-	je	LDown_53
-
-#ifdef PARANOID
-	jmp	L_bugged_round53
-#endif /* PARANOID */ 
-
-LUp_53:
-	cmpb	SIGN_POS,PARAM5
-	jne	LCheck_truncate_53	/* If negative then  up==truncate */
-
-	jmp	LCheck_53_round_up
-
-LDown_53:
-	cmpb	SIGN_POS,PARAM5
-	je	LCheck_truncate_53	/* If positive then  down==truncate */
-
-LCheck_53_round_up:
-	movl	%ebx,%ecx
-	andl	$0x000007ff,%ecx
-	orl	%edx,%ecx
-	jnz	LDo_53_round_up
-	jmp	L_Re_normalise
-
-LRound_nearest_53:
-	/* Do rounding of the 53rd bit if needed (nearest or even) */
-	movl	%ebx,%ecx
-	andl	$0x000007ff,%ecx
-	cmpl	$0x00000400,%ecx
-	jc	LCheck_truncate_53	/* less than half, no increment needed */
-
-	jnz	LGreater_Half_53	/* greater than half, increment needed */
-
-	/* Possibly half, we need to check the ls bits */
-	orl	%edx,%edx
-	jnz	LGreater_Half_53	/* greater than half, increment needed */
-
-	/* Exactly half, increment only if 53rd bit is 1 (round to even) */
-	testl	$0x00000800,%ebx
-	jz	LTruncate_53
-
-LGreater_Half_53:			/* Rounding: increment at the 53rd bit */
-LDo_53_round_up:
-	movb	LOST_UP,FPU_bits_lost
-	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
-	addl	$0x00000800,%ebx
-	adcl	$0,%eax
-	jmp	LCheck_Round_Overflow
-
-LCheck_truncate_53:
-	movl	%ebx,%ecx
-	andl	$0x000007ff,%ecx
-	orl	%edx,%ecx
-	jz	L_Re_normalise
-
-LTruncate_53:
-	movb	LOST_DOWN,FPU_bits_lost
-	andl	$0xfffff800,%ebx	/* Truncate to 53 bits */
-	jmp	L_Re_normalise
-
-
-/* Round etc to 64 bit precision */
-LRound_To_64:
-	movl	%esi,%ecx
-	andl	CW_RC,%ecx
-	cmpl	RC_RND,%ecx
-	je	LRound_nearest_64
-
-	cmpl	RC_CHOP,%ecx
-	je	LCheck_truncate_64
-
-	cmpl	RC_UP,%ecx		/* Towards +infinity */
-	je	LUp_64
-
-	cmpl	RC_DOWN,%ecx		/* Towards -infinity */
-	je	LDown_64
-
-#ifdef PARANOID
-	jmp	L_bugged_round64
-#endif /* PARANOID */ 
-
-LUp_64:
-	cmpb	SIGN_POS,PARAM5
-	jne	LCheck_truncate_64	/* If negative then  up==truncate */
-
-	orl	%edx,%edx
-	jnz	LDo_64_round_up
-	jmp	L_Re_normalise
-
-LDown_64:
-	cmpb	SIGN_POS,PARAM5
-	je	LCheck_truncate_64	/* If positive then  down==truncate */
-
-	orl	%edx,%edx
-	jnz	LDo_64_round_up
-	jmp	L_Re_normalise
-
-LRound_nearest_64:
-	cmpl	$0x80000000,%edx
-	jc	LCheck_truncate_64
-
-	jne	LDo_64_round_up
-
-	/* Now test for round-to-even */
-	testb	$1,%bl
-	jz	LCheck_truncate_64
-
-LDo_64_round_up:
-	movb	LOST_UP,FPU_bits_lost
-	addl	$1,%ebx
-	adcl	$0,%eax
-
-LCheck_Round_Overflow:
-	jnc	L_Re_normalise
-
-	/* Overflow, adjust the result (significand to 1.0) */
-	rcrl	$1,%eax
-	rcrl	$1,%ebx
-	incw	EXP(%edi)
-	jmp	L_Re_normalise
-
-LCheck_truncate_64:
-	orl	%edx,%edx
-	jz	L_Re_normalise
-
-LTruncate_64:
-	movb	LOST_DOWN,FPU_bits_lost
-
-L_Re_normalise:
-	testb	$0xff,FPU_denormal
-	jnz	Normalise_result
-
-L_Normalised:
-	movl	TAG_Valid,%edx
-
-L_deNormalised:
-	cmpb	LOST_UP,FPU_bits_lost
-	je	L_precision_lost_up
-
-	cmpb	LOST_DOWN,FPU_bits_lost
-	je	L_precision_lost_down
-
-L_no_precision_loss:
-	/* store the result */
-
-L_Store_significand:
-	movl	%eax,SIGH(%edi)
-	movl	%ebx,SIGL(%edi)
-
-	cmpw	EXP_OVER,EXP(%edi)
-	jge	L_overflow
-
-	movl	%edx,%eax
-
-	/* Convert the exponent to 80x87 form. */
-	addw	EXTENDED_Ebias,EXP(%edi)
-	andw	$0x7fff,EXP(%edi)
-
-fpu_reg_round_signed_special_exit:
-
-	cmpb	SIGN_POS,PARAM5
-	je	fpu_reg_round_special_exit
-
-	orw	$0x8000,EXP(%edi)	/* Negative sign for the result. */
-
-fpu_reg_round_special_exit:
-
-#ifndef NON_REENTRANT_FPU
-	popl	%ebx		/* adjust the stack pointer */
-#endif /* NON_REENTRANT_FPU */ 
-
-fpu_Arith_exit:
-	popl	%ebx
-	popl	%edi
-	popl	%esi
-	leave
-	ret
-
-
-/*
- * Set the FPU status flags to represent precision loss due to
- * round-up.
- */
-L_precision_lost_up:
-	push	%edx
-	push	%eax
-	call	set_precision_flag_up
-	popl	%eax
-	popl	%edx
-	jmp	L_no_precision_loss
-
-/*
- * Set the FPU status flags to represent precision loss due to
- * truncation.
- */
-L_precision_lost_down:
-	push	%edx
-	push	%eax
-	call	set_precision_flag_down
-	popl	%eax
-	popl	%edx
-	jmp	L_no_precision_loss
-
-
-/*
- * The number is a denormal (which might get rounded up to a normal)
- * Shift the number right the required number of bits, which will
- * have to be undone later...
- */
-L_Make_denorm:
-	/* The action to be taken depends upon whether the underflow
-	   exception is masked */
-	testb	CW_Underflow,%cl		/* Underflow mask. */
-	jz	Unmasked_underflow		/* Do not make a denormal. */
-
-	movb	DENORMAL,FPU_denormal
-
-	pushl	%ecx		/* Save */
-	movw	EXP_UNDER+1,%cx
-	subw	EXP(%edi),%cx
-
-	cmpw	$64,%cx	/* shrd only works for 0..31 bits */
-	jnc	Denorm_shift_more_than_63
-
-	cmpw	$32,%cx	/* shrd only works for 0..31 bits */
-	jnc	Denorm_shift_more_than_32
-
-/*
- * We got here without jumps by assuming that the most common requirement
- *   is for a small de-normalising shift.
- * Shift by [1..31] bits
- */
-	addw	%cx,EXP(%edi)
-	orl	%edx,%edx	/* extension */
-	setne	%ch		/* Save whether %edx is non-zero */
-	xorl	%edx,%edx
-	shrd	%cl,%ebx,%edx
-	shrd	%cl,%eax,%ebx
-	shr	%cl,%eax
-	orb	%ch,%dl
-	popl	%ecx
-	jmp	Denorm_done
-
-/* Shift by [32..63] bits */
-Denorm_shift_more_than_32:
-	addw	%cx,EXP(%edi)
-	subb	$32,%cl
-	orl	%edx,%edx
-	setne	%ch
-	orb	%ch,%bl
-	xorl	%edx,%edx
-	shrd	%cl,%ebx,%edx
-	shrd	%cl,%eax,%ebx
-	shr	%cl,%eax
-	orl	%edx,%edx		/* test these 32 bits */
-	setne	%cl
-	orb	%ch,%bl
-	orb	%cl,%bl
-	movl	%ebx,%edx
-	movl	%eax,%ebx
-	xorl	%eax,%eax
-	popl	%ecx
-	jmp	Denorm_done
-
-/* Shift by [64..) bits */
-Denorm_shift_more_than_63:
-	cmpw	$64,%cx
-	jne	Denorm_shift_more_than_64
-
-/* Exactly 64 bit shift */
-	addw	%cx,EXP(%edi)
-	xorl	%ecx,%ecx
-	orl	%edx,%edx
-	setne	%cl
-	orl	%ebx,%ebx
-	setne	%ch
-	orb	%ch,%cl
-	orb	%cl,%al
-	movl	%eax,%edx
-	xorl	%eax,%eax
-	xorl	%ebx,%ebx
-	popl	%ecx
-	jmp	Denorm_done
-
-Denorm_shift_more_than_64:
-	movw	EXP_UNDER+1,EXP(%edi)
-/* This is easy, %eax must be non-zero, so.. */
-	movl	$1,%edx
-	xorl	%eax,%eax
-	xorl	%ebx,%ebx
-	popl	%ecx
-	jmp	Denorm_done
-
-
-Unmasked_underflow:
-	movb	UNMASKED_UNDERFLOW,FPU_denormal
-	jmp	Denorm_done
-
-
-/* Undo the de-normalisation. */
-Normalise_result:
-	cmpb	UNMASKED_UNDERFLOW,FPU_denormal
-	je	Signal_underflow
-
-/* The number must be a denormal if we got here. */
-#ifdef PARANOID
-	/* But check it... just in case. */
-	cmpw	EXP_UNDER+1,EXP(%edi)
-	jne	L_norm_bugged
-#endif /* PARANOID */
-
-#ifdef PECULIAR_486
-	/*
-	 * This implements a special feature of 80486 behaviour.
-	 * Underflow will be signalled even if the number is
-	 * not a denormal after rounding.
-	 * This difference occurs only for masked underflow, and not
-	 * in the unmasked case.
-	 * Actual 80486 behaviour differs from this in some circumstances.
-	 */
-	orl	%eax,%eax		/* ms bits */
-	js	LPseudoDenormal		/* Will be masked underflow */
-#else
-	orl	%eax,%eax		/* ms bits */
-	js	L_Normalised		/* No longer a denormal */
-#endif /* PECULIAR_486 */ 
-
-	jnz	LDenormal_adj_exponent
-
-	orl	%ebx,%ebx
-	jz	L_underflow_to_zero	/* The contents are zero */
-
-LDenormal_adj_exponent:
-	decw	EXP(%edi)
-
-LPseudoDenormal:
-	testb	$0xff,FPU_bits_lost	/* bits lost == underflow */
-	movl	TAG_Special,%edx
-	jz	L_deNormalised
-
-	/* There must be a masked underflow */
-	push	%eax
-	pushl	EX_Underflow
-	call	EXCEPTION
-	popl	%eax
-	popl	%eax
-	movl	TAG_Special,%edx
-	jmp	L_deNormalised
-
-
-/*
- * The operations resulted in a number too small to represent.
- * Masked response.
- */
-L_underflow_to_zero:
-	push	%eax
-	call	set_precision_flag_down
-	popl	%eax
-
-	push	%eax
-	pushl	EX_Underflow
-	call	EXCEPTION
-	popl	%eax
-	popl	%eax
-
-/* Reduce the exponent to EXP_UNDER */
-	movw	EXP_UNDER,EXP(%edi)
-	movl	TAG_Zero,%edx
-	jmp	L_Store_significand
-
-
-/* The operations resulted in a number too large to represent. */
-L_overflow:
-	addw	EXTENDED_Ebias,EXP(%edi)	/* Set for unmasked response. */
-	push	%edi
-	call	arith_overflow
-	pop	%edi
-	jmp	fpu_reg_round_signed_special_exit
-
-
-Signal_underflow:
-	/* The number may have been changed to a non-denormal */
-	/* by the rounding operations. */
-	cmpw	EXP_UNDER,EXP(%edi)
-	jle	Do_unmasked_underflow
-
-	jmp	L_Normalised
-
-Do_unmasked_underflow:
-	/* Increase the exponent by the magic number */
-	addw	$(3*(1<<13)),EXP(%edi)
-	push	%eax
-	pushl	EX_Underflow
-	call	EXCEPTION
-	popl	%eax
-	popl	%eax
-	jmp	L_Normalised
-
-
-#ifdef PARANOID
-#ifdef PECULIAR_486
-L_bugged_denorm_486:
-	pushl	EX_INTERNAL|0x236
-	call	EXCEPTION
-	popl	%ebx
-	jmp	L_exception_exit
-#else
-L_bugged_denorm:
-	pushl	EX_INTERNAL|0x230
-	call	EXCEPTION
-	popl	%ebx
-	jmp	L_exception_exit
-#endif /* PECULIAR_486 */ 
-
-L_bugged_round24:
-	pushl	EX_INTERNAL|0x231
-	call	EXCEPTION
-	popl	%ebx
-	jmp	L_exception_exit
-
-L_bugged_round53:
-	pushl	EX_INTERNAL|0x232
-	call	EXCEPTION
-	popl	%ebx
-	jmp	L_exception_exit
-
-L_bugged_round64:
-	pushl	EX_INTERNAL|0x233
-	call	EXCEPTION
-	popl	%ebx
-	jmp	L_exception_exit
-
-L_norm_bugged:
-	pushl	EX_INTERNAL|0x234
-	call	EXCEPTION
-	popl	%ebx
-	jmp	L_exception_exit
-
-L_entry_bugged:
-	pushl	EX_INTERNAL|0x235
-	call	EXCEPTION
-	popl	%ebx
-L_exception_exit:
-	mov	$-1,%eax
-	jmp	fpu_reg_round_special_exit
-#endif /* PARANOID */ 
-
-ENDPROC(FPU_round)
diff --git a/arch/x86/math-emu/reg_u_add.S b/arch/x86/math-emu/reg_u_add.S
deleted file mode 100644
index 50fe9f8c893c97a0c248bd8275f045864d560a55..0000000000000000000000000000000000000000
--- a/arch/x86/math-emu/reg_u_add.S
+++ /dev/null
@@ -1,169 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.file	"reg_u_add.S"
-/*---------------------------------------------------------------------------+
- |  reg_u_add.S                                                              |
- |                                                                           |
- | Add two valid (TAG_Valid) FPU_REG numbers, of the same sign, and put the  |
- |   result in a destination FPU_REG.                                        |
- |                                                                           |
- | Copyright (C) 1992,1993,1995,1997                                         |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@suburbia.net                              |
- |                                                                           |
- | Call from C as:                                                           |
- |   int  FPU_u_add(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ,             |
- |                                                int control_w)             |
- |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
- |    one was raised, or -1 on internal error.                               |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*
- |    Kernel addition routine FPU_u_add(reg *arg1, reg *arg2, reg *answ).
- |    Takes two valid reg f.p. numbers (TAG_Valid), which are
- |    treated as unsigned numbers,
- |    and returns their sum as a TAG_Valid or TAG_Special f.p. number.
- |    The returned number is normalized.
- |    Basic checks are performed if PARANOID is defined.
- */
-
-#include "exception.h"
-#include "fpu_emu.h"
-#include "control_w.h"
-
-.text
-ENTRY(FPU_u_add)
-	pushl	%ebp
-	movl	%esp,%ebp
-	pushl	%esi
-	pushl	%edi
-	pushl	%ebx
-
-	movl	PARAM1,%esi		/* source 1 */
-	movl	PARAM2,%edi		/* source 2 */
-
-	movl	PARAM6,%ecx
-	movl	%ecx,%edx
-	subl	PARAM7,%ecx			/* exp1 - exp2 */
-	jge	L_arg1_larger
-
-	/* num1 is smaller */
-	movl	SIGL(%esi),%ebx
-	movl	SIGH(%esi),%eax
-
-	movl	%edi,%esi
-	movl	PARAM7,%edx
-	negw	%cx
-	jmp	L_accum_loaded
-
-L_arg1_larger:
-	/* num1 has larger or equal exponent */
-	movl	SIGL(%edi),%ebx
-	movl	SIGH(%edi),%eax
-
-L_accum_loaded:
-	movl	PARAM3,%edi		/* destination */
-	movw	%dx,EXP(%edi)		/* Copy exponent to destination */
-
-	xorl	%edx,%edx		/* clear the extension */
-
-#ifdef PARANOID
-	testl	$0x80000000,%eax
-	je	L_bugged
-
-	testl	$0x80000000,SIGH(%esi)
-	je	L_bugged
-#endif /* PARANOID */
-
-/* The number to be shifted is in %eax:%ebx:%edx */
-	cmpw	$32,%cx		/* shrd only works for 0..31 bits */
-	jnc	L_more_than_31
-
-/* less than 32 bits */
-	shrd	%cl,%ebx,%edx
-	shrd	%cl,%eax,%ebx
-	shr	%cl,%eax
-	jmp	L_shift_done
-
-L_more_than_31:
-	cmpw	$64,%cx
-	jnc	L_more_than_63
-
-	subb	$32,%cl
-	jz	L_exactly_32
-
-	shrd	%cl,%eax,%edx
-	shr	%cl,%eax
-	orl	%ebx,%ebx
-	jz	L_more_31_no_low	/* none of the lowest bits is set */
-
-	orl	$1,%edx			/* record the fact in the extension */
-
-L_more_31_no_low:
-	movl	%eax,%ebx
-	xorl	%eax,%eax
-	jmp	L_shift_done
-
-L_exactly_32:
-	movl	%ebx,%edx
-	movl	%eax,%ebx
-	xorl	%eax,%eax
-	jmp	L_shift_done
-
-L_more_than_63:
-	cmpw	$65,%cx
-	jnc	L_more_than_64
-
-	movl	%eax,%edx
-	orl	%ebx,%ebx
-	jz	L_more_63_no_low
-
-	orl	$1,%edx
-	jmp	L_more_63_no_low
-
-L_more_than_64:
-	movl	$1,%edx		/* The shifted nr always at least one '1' */
-
-L_more_63_no_low:
-	xorl	%ebx,%ebx
-	xorl	%eax,%eax
-
-L_shift_done:
-	/* Now do the addition */
-	addl	SIGL(%esi),%ebx
-	adcl	SIGH(%esi),%eax
-	jnc	L_round_the_result
-
-	/* Overflow, adjust the result */
-	rcrl	$1,%eax
-	rcrl	$1,%ebx
-	rcrl	$1,%edx
-	jnc	L_no_bit_lost
-
-	orl	$1,%edx
-
-L_no_bit_lost:
-	incw	EXP(%edi)
-
-L_round_the_result:
-	jmp	fpu_reg_round	/* Round the result */
-
-
-
-#ifdef PARANOID
-/* If we ever get here then we have problems! */
-L_bugged:
-	pushl	EX_INTERNAL|0x201
-	call	EXCEPTION
-	pop	%ebx
-	movl	$-1,%eax
-	jmp	L_exit
-
-L_exit:
-	popl	%ebx
-	popl	%edi
-	popl	%esi
-	leave
-	ret
-#endif /* PARANOID */
-ENDPROC(FPU_u_add)
diff --git a/arch/x86/math-emu/reg_u_div.S b/arch/x86/math-emu/reg_u_div.S
deleted file mode 100644
index 94d545e118e40af46ab372bd9ac15f84b600ddd8..0000000000000000000000000000000000000000
--- a/arch/x86/math-emu/reg_u_div.S
+++ /dev/null
@@ -1,474 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.file	"reg_u_div.S"
-/*---------------------------------------------------------------------------+
- |  reg_u_div.S                                                              |
- |                                                                           |
- | Divide one FPU_REG by another and put the result in a destination FPU_REG.|
- |                                                                           |
- | Copyright (C) 1992,1993,1995,1997                                         |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@suburbia.net                              |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- | Call from C as:                                                           |
- |    int FPU_u_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest,                   |
- |                unsigned int control_word, char *sign)                     |
- |                                                                           |
- |  Does not compute the destination exponent, but does adjust it.           |
- |                                                                           |
- |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
- |    one was raised, or -1 on internal error.                               |
- +---------------------------------------------------------------------------*/
-
-#include "exception.h"
-#include "fpu_emu.h"
-#include "control_w.h"
-
-
-/* #define	dSIGL(x)	(x) */
-/* #define	dSIGH(x)	4(x) */
-
-
-#ifndef NON_REENTRANT_FPU
-/*
-	Local storage on the stack:
-	Result:		FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
-	Overflow flag:	ovfl_flag
- */
-#define FPU_accum_3	-4(%ebp)
-#define FPU_accum_2	-8(%ebp)
-#define FPU_accum_1	-12(%ebp)
-#define FPU_accum_0	-16(%ebp)
-#define FPU_result_1	-20(%ebp)
-#define FPU_result_2	-24(%ebp)
-#define FPU_ovfl_flag	-28(%ebp)
-
-#else
-.data
-/*
-	Local storage in a static area:
-	Result:		FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
-	Overflow flag:	ovfl_flag
- */
-	.align 4,0
-FPU_accum_3:
-	.long	0
-FPU_accum_2:
-	.long	0
-FPU_accum_1:
-	.long	0
-FPU_accum_0:
-	.long	0
-FPU_result_1:
-	.long	0
-FPU_result_2:
-	.long	0
-FPU_ovfl_flag:
-	.byte	0
-#endif /* NON_REENTRANT_FPU */
-
-#define REGA	PARAM1
-#define REGB	PARAM2
-#define DEST	PARAM3
-
-.text
-ENTRY(FPU_u_div)
-	pushl	%ebp
-	movl	%esp,%ebp
-#ifndef NON_REENTRANT_FPU
-	subl	$28,%esp
-#endif /* NON_REENTRANT_FPU */
-
-	pushl	%esi
-	pushl	%edi
-	pushl	%ebx
-
-	movl	REGA,%esi
-	movl	REGB,%ebx
-	movl	DEST,%edi
-
-	movswl	EXP(%esi),%edx
-	movswl	EXP(%ebx),%eax
-	subl	%eax,%edx
-	addl	EXP_BIAS,%edx
-
-	/* A denormal and a large number can cause an exponent underflow */
-	cmpl	EXP_WAY_UNDER,%edx
-	jg	xExp_not_underflow
-
-	/* Set to a really low value allow correct handling */
-	movl	EXP_WAY_UNDER,%edx
-
-xExp_not_underflow:
-
-	movw    %dx,EXP(%edi)
-
-#ifdef PARANOID
-/*	testl	$0x80000000, SIGH(%esi)	// Dividend */
-/*	je	L_bugged */
-	testl	$0x80000000, SIGH(%ebx)	/* Divisor */
-	je	L_bugged
-#endif /* PARANOID */ 
-
-/* Check if the divisor can be treated as having just 32 bits */
-	cmpl	$0,SIGL(%ebx)
-	jnz	L_Full_Division	/* Can't do a quick divide */
-
-/* We should be able to zip through the division here */
-	movl	SIGH(%ebx),%ecx	/* The divisor */
-	movl	SIGH(%esi),%edx	/* Dividend */
-	movl	SIGL(%esi),%eax	/* Dividend */
-
-	cmpl	%ecx,%edx
-	setaeb	FPU_ovfl_flag	/* Keep a record */
-	jb	L_no_adjust
-
-	subl	%ecx,%edx	/* Prevent the overflow */
-
-L_no_adjust:
-	/* Divide the 64 bit number by the 32 bit denominator */
-	divl	%ecx
-	movl	%eax,FPU_result_2
-
-	/* Work on the remainder of the first division */
-	xorl	%eax,%eax
-	divl	%ecx
-	movl	%eax,FPU_result_1
-
-	/* Work on the remainder of the 64 bit division */
-	xorl	%eax,%eax
-	divl	%ecx
-
-	testb	$255,FPU_ovfl_flag	/* was the num > denom ? */
-	je	L_no_overflow
-
-	/* Do the shifting here */
-	/* increase the exponent */
-	incw	EXP(%edi)
-
-	/* shift the mantissa right one bit */
-	stc			/* To set the ms bit */
-	rcrl	FPU_result_2
-	rcrl	FPU_result_1
-	rcrl	%eax
-
-L_no_overflow:
-	jmp	LRound_precision	/* Do the rounding as required */
-
-
-/*---------------------------------------------------------------------------+
- |  Divide:   Return  arg1/arg2 to arg3.                                     |
- |                                                                           |
- |  This routine does not use the exponents of arg1 and arg2, but does       |
- |  adjust the exponent of arg3.                                             |
- |                                                                           |
- |  The maximum returned value is (ignoring exponents)                       |
- |               .ffffffff ffffffff                                          |
- |               ------------------  =  1.ffffffff fffffffe                  |
- |               .80000000 00000000                                          |
- | and the minimum is                                                        |
- |               .80000000 00000000                                          |
- |               ------------------  =  .80000000 00000001   (rounded)       |
- |               .ffffffff ffffffff                                          |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-
-L_Full_Division:
-	/* Save extended dividend in local register */
-	movl	SIGL(%esi),%eax
-	movl	%eax,FPU_accum_2
-	movl	SIGH(%esi),%eax
-	movl	%eax,FPU_accum_3
-	xorl	%eax,%eax
-	movl	%eax,FPU_accum_1	/* zero the extension */
-	movl	%eax,FPU_accum_0	/* zero the extension */
-
-	movl	SIGL(%esi),%eax	/* Get the current num */
-	movl	SIGH(%esi),%edx
-
-/*----------------------------------------------------------------------*/
-/* Initialization done.
-   Do the first 32 bits. */
-
-	movb	$0,FPU_ovfl_flag
-	cmpl	SIGH(%ebx),%edx	/* Test for imminent overflow */
-	jb	LLess_than_1
-	ja	LGreater_than_1
-
-	cmpl	SIGL(%ebx),%eax
-	jb	LLess_than_1
-
-LGreater_than_1:
-/* The dividend is greater or equal, would cause overflow */
-	setaeb	FPU_ovfl_flag		/* Keep a record */
-
-	subl	SIGL(%ebx),%eax
-	sbbl	SIGH(%ebx),%edx	/* Prevent the overflow */
-	movl	%eax,FPU_accum_2
-	movl	%edx,FPU_accum_3
-
-LLess_than_1:
-/* At this point, we have a dividend < divisor, with a record of
-   adjustment in FPU_ovfl_flag */
-
-	/* We will divide by a number which is too large */
-	movl	SIGH(%ebx),%ecx
-	addl	$1,%ecx
-	jnc	LFirst_div_not_1
-
-	/* here we need to divide by 100000000h,
-	   i.e., no division at all.. */
-	mov	%edx,%eax
-	jmp	LFirst_div_done
-
-LFirst_div_not_1:
-	divl	%ecx		/* Divide the numerator by the augmented
-				   denom ms dw */
-
-LFirst_div_done:
-	movl	%eax,FPU_result_2	/* Put the result in the answer */
-
-	mull	SIGH(%ebx)	/* mul by the ms dw of the denom */
-
-	subl	%eax,FPU_accum_2	/* Subtract from the num local reg */
-	sbbl	%edx,FPU_accum_3
-
-	movl	FPU_result_2,%eax	/* Get the result back */
-	mull	SIGL(%ebx)	/* now mul the ls dw of the denom */
-
-	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
-	sbbl	%edx,FPU_accum_2
-	sbbl	$0,FPU_accum_3
-	je	LDo_2nd_32_bits		/* Must check for non-zero result here */
-
-#ifdef PARANOID
-	jb	L_bugged_1
-#endif /* PARANOID */ 
-
-	/* need to subtract another once of the denom */
-	incl	FPU_result_2	/* Correct the answer */
-
-	movl	SIGL(%ebx),%eax
-	movl	SIGH(%ebx),%edx
-	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
-	sbbl	%edx,FPU_accum_2
-
-#ifdef PARANOID
-	sbbl	$0,FPU_accum_3
-	jne	L_bugged_1	/* Must check for non-zero result here */
-#endif /* PARANOID */ 
-
-/*----------------------------------------------------------------------*/
-/* Half of the main problem is done, there is just a reduced numerator
-   to handle now.
-   Work with the second 32 bits, FPU_accum_0 not used from now on */
-LDo_2nd_32_bits:
-	movl	FPU_accum_2,%edx	/* get the reduced num */
-	movl	FPU_accum_1,%eax
-
-	/* need to check for possible subsequent overflow */
-	cmpl	SIGH(%ebx),%edx
-	jb	LDo_2nd_div
-	ja	LPrevent_2nd_overflow
-
-	cmpl	SIGL(%ebx),%eax
-	jb	LDo_2nd_div
-
-LPrevent_2nd_overflow:
-/* The numerator is greater or equal, would cause overflow */
-	/* prevent overflow */
-	subl	SIGL(%ebx),%eax
-	sbbl	SIGH(%ebx),%edx
-	movl	%edx,FPU_accum_2
-	movl	%eax,FPU_accum_1
-
-	incl	FPU_result_2	/* Reflect the subtraction in the answer */
-
-#ifdef PARANOID
-	je	L_bugged_2	/* Can't bump the result to 1.0 */
-#endif /* PARANOID */ 
-
-LDo_2nd_div:
-	cmpl	$0,%ecx		/* augmented denom msw */
-	jnz	LSecond_div_not_1
-
-	/* %ecx == 0, we are dividing by 1.0 */
-	mov	%edx,%eax
-	jmp	LSecond_div_done
-
-LSecond_div_not_1:
-	divl	%ecx		/* Divide the numerator by the denom ms dw */
-
-LSecond_div_done:
-	movl	%eax,FPU_result_1	/* Put the result in the answer */
-
-	mull	SIGH(%ebx)	/* mul by the ms dw of the denom */
-
-	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
-	sbbl	%edx,FPU_accum_2
-
-#ifdef PARANOID
-	jc	L_bugged_2
-#endif /* PARANOID */ 
-
-	movl	FPU_result_1,%eax	/* Get the result back */
-	mull	SIGL(%ebx)	/* now mul the ls dw of the denom */
-
-	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
-	sbbl	%edx,FPU_accum_1	/* Subtract from the num local reg */
-	sbbl	$0,FPU_accum_2
-
-#ifdef PARANOID
-	jc	L_bugged_2
-#endif /* PARANOID */ 
-
-	jz	LDo_3rd_32_bits
-
-#ifdef PARANOID
-	cmpl	$1,FPU_accum_2
-	jne	L_bugged_2
-#endif /* PARANOID */
-
-	/* need to subtract another once of the denom */
-	movl	SIGL(%ebx),%eax
-	movl	SIGH(%ebx),%edx
-	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
-	sbbl	%edx,FPU_accum_1
-	sbbl	$0,FPU_accum_2
-
-#ifdef PARANOID
-	jc	L_bugged_2
-	jne	L_bugged_2
-#endif /* PARANOID */ 
-
-	addl	$1,FPU_result_1	/* Correct the answer */
-	adcl	$0,FPU_result_2
-
-#ifdef PARANOID
-	jc	L_bugged_2	/* Must check for non-zero result here */
-#endif /* PARANOID */
-
-/*----------------------------------------------------------------------*/
-/* The division is essentially finished here, we just need to perform
-   tidying operations.
-   Deal with the 3rd 32 bits */
-LDo_3rd_32_bits:
-	movl	FPU_accum_1,%edx		/* get the reduced num */
-	movl	FPU_accum_0,%eax
-
-	/* need to check for possible subsequent overflow */
-	cmpl	SIGH(%ebx),%edx	/* denom */
-	jb	LRound_prep
-	ja	LPrevent_3rd_overflow
-
-	cmpl	SIGL(%ebx),%eax	/* denom */
-	jb	LRound_prep
-
-LPrevent_3rd_overflow:
-	/* prevent overflow */
-	subl	SIGL(%ebx),%eax
-	sbbl	SIGH(%ebx),%edx
-	movl	%edx,FPU_accum_1
-	movl	%eax,FPU_accum_0
-
-	addl	$1,FPU_result_1	/* Reflect the subtraction in the answer */
-	adcl	$0,FPU_result_2
-	jne	LRound_prep
-	jnc	LRound_prep
-
-	/* This is a tricky spot, there is an overflow of the answer */
-	movb	$255,FPU_ovfl_flag		/* Overflow -> 1.000 */
-
-LRound_prep:
-/*
- * Prepare for rounding.
- * To test for rounding, we just need to compare 2*accum with the
- * denom.
- */
-	movl	FPU_accum_0,%ecx
-	movl	FPU_accum_1,%edx
-	movl	%ecx,%eax
-	orl	%edx,%eax
-	jz	LRound_ovfl		/* The accumulator contains zero. */
-
-	/* Multiply by 2 */
-	clc
-	rcll	$1,%ecx
-	rcll	$1,%edx
-	jc	LRound_large		/* No need to compare, denom smaller */
-
-	subl	SIGL(%ebx),%ecx
-	sbbl	SIGH(%ebx),%edx
-	jnc	LRound_not_small
-
-	movl	$0x70000000,%eax	/* Denom was larger */
-	jmp	LRound_ovfl
-
-LRound_not_small:
-	jnz	LRound_large
-
-	movl	$0x80000000,%eax	/* Remainder was exactly 1/2 denom */
-	jmp	LRound_ovfl
-
-LRound_large:
-	movl	$0xff000000,%eax	/* Denom was smaller */
-
-LRound_ovfl:
-/* We are now ready to deal with rounding, but first we must get
-   the bits properly aligned */
-	testb	$255,FPU_ovfl_flag	/* was the num > denom ? */
-	je	LRound_precision
-
-	incw	EXP(%edi)
-
-	/* shift the mantissa right one bit */
-	stc			/* Will set the ms bit */
-	rcrl	FPU_result_2
-	rcrl	FPU_result_1
-	rcrl	%eax
-
-/* Round the result as required */
-LRound_precision:
-	decw	EXP(%edi)	/* binary point between 1st & 2nd bits */
-
-	movl	%eax,%edx
-	movl	FPU_result_1,%ebx
-	movl	FPU_result_2,%eax
-	jmp	fpu_reg_round
-
-
-#ifdef PARANOID
-/* The logic is wrong if we got here */
-L_bugged:
-	pushl	EX_INTERNAL|0x202
-	call	EXCEPTION
-	pop	%ebx
-	jmp	L_exit
-
-L_bugged_1:
-	pushl	EX_INTERNAL|0x203
-	call	EXCEPTION
-	pop	%ebx
-	jmp	L_exit
-
-L_bugged_2:
-	pushl	EX_INTERNAL|0x204
-	call	EXCEPTION
-	pop	%ebx
-	jmp	L_exit
-
-L_exit:
-	movl	$-1,%eax
-	popl	%ebx
-	popl	%edi
-	popl	%esi
-
-	leave
-	ret
-#endif /* PARANOID */ 
-
-ENDPROC(FPU_u_div)
diff --git a/arch/x86/math-emu/reg_u_mul.S b/arch/x86/math-emu/reg_u_mul.S
deleted file mode 100644
index 21cde47fb3e5d6d11d80d69cdaac7476569a0f9d..0000000000000000000000000000000000000000
--- a/arch/x86/math-emu/reg_u_mul.S
+++ /dev/null
@@ -1,150 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.file	"reg_u_mul.S"
-/*---------------------------------------------------------------------------+
- |  reg_u_mul.S                                                              |
- |                                                                           |
- | Core multiplication routine                                               |
- |                                                                           |
- | Copyright (C) 1992,1993,1995,1997                                         |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@suburbia.net                              |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- |   Basic multiplication routine.                                           |
- |   Does not check the resulting exponent for overflow/underflow            |
- |                                                                           |
- |   FPU_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw);         |
- |                                                                           |
- |   Internal working is at approx 128 bits.                                 |
- |   Result is rounded to nearest 53 or 64 bits, using "nearest or even".    |
- +---------------------------------------------------------------------------*/
-
-#include "exception.h"
-#include "fpu_emu.h"
-#include "control_w.h"
-
-
-
-#ifndef NON_REENTRANT_FPU
-/*  Local storage on the stack: */
-#define FPU_accum_0	-4(%ebp)	/* ms word */
-#define FPU_accum_1	-8(%ebp)
-
-#else
-/*  Local storage in a static area: */
-.data
-	.align 4,0
-FPU_accum_0:
-	.long	0
-FPU_accum_1:
-	.long	0
-#endif /* NON_REENTRANT_FPU */
-
-
-.text
-ENTRY(FPU_u_mul)
-	pushl	%ebp
-	movl	%esp,%ebp
-#ifndef NON_REENTRANT_FPU
-	subl	$8,%esp
-#endif /* NON_REENTRANT_FPU */ 
-
-	pushl	%esi
-	pushl	%edi
-	pushl	%ebx
-
-	movl	PARAM1,%esi
-	movl	PARAM2,%edi
-
-#ifdef PARANOID
-	testl	$0x80000000,SIGH(%esi)
-	jz	L_bugged
-	testl	$0x80000000,SIGH(%edi)
-	jz	L_bugged
-#endif /* PARANOID */
-
-	xorl	%ecx,%ecx
-	xorl	%ebx,%ebx
-
-	movl	SIGL(%esi),%eax
-	mull	SIGL(%edi)
-	movl	%eax,FPU_accum_0
-	movl	%edx,FPU_accum_1
-
-	movl	SIGL(%esi),%eax
-	mull	SIGH(%edi)
-	addl	%eax,FPU_accum_1
-	adcl	%edx,%ebx
-/*	adcl	$0,%ecx		// overflow here is not possible */
-
-	movl	SIGH(%esi),%eax
-	mull	SIGL(%edi)
-	addl	%eax,FPU_accum_1
-	adcl	%edx,%ebx
-	adcl	$0,%ecx
-
-	movl	SIGH(%esi),%eax
-	mull	SIGH(%edi)
-	addl	%eax,%ebx
-	adcl	%edx,%ecx
-
-	/* Get the sum of the exponents. */
-	movl	PARAM6,%eax
-	subl	EXP_BIAS-1,%eax
-
-	/* Two denormals can cause an exponent underflow */
-	cmpl	EXP_WAY_UNDER,%eax
-	jg	Exp_not_underflow
-
-	/* Set to a really low value allow correct handling */
-	movl	EXP_WAY_UNDER,%eax
-
-Exp_not_underflow:
-
-/*  Have now finished with the sources */
-	movl	PARAM3,%edi	/* Point to the destination */
-	movw	%ax,EXP(%edi)
-
-/*  Now make sure that the result is normalized */
-	testl	$0x80000000,%ecx
-	jnz	LResult_Normalised
-
-	/* Normalize by shifting left one bit */
-	shll	$1,FPU_accum_0
-	rcll	$1,FPU_accum_1
-	rcll	$1,%ebx
-	rcll	$1,%ecx
-	decw	EXP(%edi)
-
-LResult_Normalised:
-	movl	FPU_accum_0,%eax
-	movl	FPU_accum_1,%edx
-	orl	%eax,%eax
-	jz	L_extent_zero
-
-	orl	$1,%edx
-
-L_extent_zero:
-	movl	%ecx,%eax
-	jmp	fpu_reg_round
-
-
-#ifdef PARANOID
-L_bugged:
-	pushl	EX_INTERNAL|0x205
-	call	EXCEPTION
-	pop	%ebx
-	jmp	L_exit
-
-L_exit:
-	popl	%ebx
-	popl	%edi
-	popl	%esi
-	leave
-	ret
-#endif /* PARANOID */ 
-
-ENDPROC(FPU_u_mul)
diff --git a/arch/x86/math-emu/reg_u_sub.S b/arch/x86/math-emu/reg_u_sub.S
deleted file mode 100644
index f05dea7dec38f7e2ecacbaf3e5017507cc0bd48a..0000000000000000000000000000000000000000
--- a/arch/x86/math-emu/reg_u_sub.S
+++ /dev/null
@@ -1,274 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.file	"reg_u_sub.S"
-/*---------------------------------------------------------------------------+
- |  reg_u_sub.S                                                              |
- |                                                                           |
- | Core floating point subtraction routine.                                  |
- |                                                                           |
- | Copyright (C) 1992,1993,1995,1997                                         |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@suburbia.net                              |
- |                                                                           |
- | Call from C as:                                                           |
- |    int FPU_u_sub(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ,             |
- |                                                int control_w)             |
- |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
- |    one was raised, or -1 on internal error.                               |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*
- |    Kernel subtraction routine FPU_u_sub(reg *arg1, reg *arg2, reg *answ).
- |    Takes two valid reg f.p. numbers (TAG_Valid), which are
- |    treated as unsigned numbers,
- |    and returns their difference as a TAG_Valid or TAG_Zero f.p.
- |    number.
- |    The first number (arg1) must be the larger.
- |    The returned number is normalized.
- |    Basic checks are performed if PARANOID is defined.
- */
-
-#include "exception.h"
-#include "fpu_emu.h"
-#include "control_w.h"
-
-.text
-ENTRY(FPU_u_sub)
-	pushl	%ebp
-	movl	%esp,%ebp
-	pushl	%esi
-	pushl	%edi
-	pushl	%ebx
-
-	movl	PARAM1,%esi	/* source 1 */
-	movl	PARAM2,%edi	/* source 2 */
-	
-	movl	PARAM6,%ecx
-	subl	PARAM7,%ecx	/* exp1 - exp2 */
-
-#ifdef PARANOID
-	/* source 2 is always smaller than source 1 */
-	js	L_bugged_1
-
-	testl	$0x80000000,SIGH(%edi)	/* The args are assumed to be be normalized */
-	je	L_bugged_2
-
-	testl	$0x80000000,SIGH(%esi)
-	je	L_bugged_2
-#endif /* PARANOID */
-
-/*--------------------------------------+
- |	Form a register holding the     |
- |	smaller number                  |
- +--------------------------------------*/
-	movl	SIGH(%edi),%eax	/* register ms word */
-	movl	SIGL(%edi),%ebx	/* register ls word */
-
-	movl	PARAM3,%edi	/* destination */
-	movl	PARAM6,%edx
-	movw	%dx,EXP(%edi)	/* Copy exponent to destination */
-
-	xorl	%edx,%edx	/* register extension */
-
-/*--------------------------------------+
- |	Shift the temporary register	|
- |      right the required number of	|
- |	places.				|
- +--------------------------------------*/
-
-	cmpw	$32,%cx		/* shrd only works for 0..31 bits */
-	jnc	L_more_than_31
-
-/* less than 32 bits */
-	shrd	%cl,%ebx,%edx
-	shrd	%cl,%eax,%ebx
-	shr	%cl,%eax
-	jmp	L_shift_done
-
-L_more_than_31:
-	cmpw	$64,%cx
-	jnc	L_more_than_63
-
-	subb	$32,%cl
-	jz	L_exactly_32
-
-	shrd	%cl,%eax,%edx
-	shr	%cl,%eax
-	orl	%ebx,%ebx
-	jz	L_more_31_no_low	/* none of the lowest bits is set */
-
-	orl	$1,%edx			/* record the fact in the extension */
-
-L_more_31_no_low:
-	movl	%eax,%ebx
-	xorl	%eax,%eax
-	jmp	L_shift_done
-
-L_exactly_32:
-	movl	%ebx,%edx
-	movl	%eax,%ebx
-	xorl	%eax,%eax
-	jmp	L_shift_done
-
-L_more_than_63:
-	cmpw	$65,%cx
-	jnc	L_more_than_64
-
-	/* Shift right by 64 bits */
-	movl	%eax,%edx
-	orl	%ebx,%ebx
-	jz	L_more_63_no_low
-
-	orl	$1,%edx
-	jmp	L_more_63_no_low
-
-L_more_than_64:
-	jne	L_more_than_65
-
-	/* Shift right by 65 bits */
-	/* Carry is clear if we get here */
-	movl	%eax,%edx
-	rcrl	%edx
-	jnc	L_shift_65_nc
-
-	orl	$1,%edx
-	jmp	L_more_63_no_low
-
-L_shift_65_nc:
-	orl	%ebx,%ebx
-	jz	L_more_63_no_low
-
-	orl	$1,%edx
-	jmp	L_more_63_no_low
-
-L_more_than_65:
-	movl	$1,%edx		/* The shifted nr always at least one '1' */
-
-L_more_63_no_low:
-	xorl	%ebx,%ebx
-	xorl	%eax,%eax
-
-L_shift_done:
-L_subtr:
-/*------------------------------+
- |	Do the subtraction	|
- +------------------------------*/
-	xorl	%ecx,%ecx
-	subl	%edx,%ecx
-	movl	%ecx,%edx
-	movl	SIGL(%esi),%ecx
-	sbbl	%ebx,%ecx
-	movl	%ecx,%ebx
-	movl	SIGH(%esi),%ecx
-	sbbl	%eax,%ecx
-	movl	%ecx,%eax
-
-#ifdef PARANOID
-	/* We can never get a borrow */
-	jc	L_bugged
-#endif /* PARANOID */
-
-/*--------------------------------------+
- |	Normalize the result		|
- +--------------------------------------*/
-	testl	$0x80000000,%eax
-	jnz	L_round		/* no shifting needed */
-
-	orl	%eax,%eax
-	jnz	L_shift_1	/* shift left 1 - 31 bits */
-
-	orl	%ebx,%ebx
-	jnz	L_shift_32	/* shift left 32 - 63 bits */
-
-/*
- *	 A rare case, the only one which is non-zero if we got here
- *         is:           1000000 .... 0000
- *                      -0111111 .... 1111 1
- *                       -------------------- 
- *                       0000000 .... 0000 1 
- */
-
-	cmpl	$0x80000000,%edx
-	jnz	L_must_be_zero
-
-	/* Shift left 64 bits */
-	subw	$64,EXP(%edi)
-	xchg	%edx,%eax
-	jmp	fpu_reg_round
-
-L_must_be_zero:
-#ifdef PARANOID
-	orl	%edx,%edx
-	jnz	L_bugged_3
-#endif /* PARANOID */ 
-
-	/* The result is zero */
-	movw	$0,EXP(%edi)		/* exponent */
-	movl	$0,SIGL(%edi)
-	movl	$0,SIGH(%edi)
-	movl	TAG_Zero,%eax
-	jmp	L_exit
-
-L_shift_32:
-	movl	%ebx,%eax
-	movl	%edx,%ebx
-	movl	$0,%edx
-	subw	$32,EXP(%edi)	/* Can get underflow here */
-
-/* We need to shift left by 1 - 31 bits */
-L_shift_1:
-	bsrl	%eax,%ecx	/* get the required shift in %ecx */
-	subl	$31,%ecx
-	negl	%ecx
-	shld	%cl,%ebx,%eax
-	shld	%cl,%edx,%ebx
-	shl	%cl,%edx
-	subw	%cx,EXP(%edi)	/* Can get underflow here */
-
-L_round:
-	jmp	fpu_reg_round	/* Round the result */
-
-
-#ifdef PARANOID
-L_bugged_1:
-	pushl	EX_INTERNAL|0x206
-	call	EXCEPTION
-	pop	%ebx
-	jmp	L_error_exit
-
-L_bugged_2:
-	pushl	EX_INTERNAL|0x209
-	call	EXCEPTION
-	pop	%ebx
-	jmp	L_error_exit
-
-L_bugged_3:
-	pushl	EX_INTERNAL|0x210
-	call	EXCEPTION
-	pop	%ebx
-	jmp	L_error_exit
-
-L_bugged_4:
-	pushl	EX_INTERNAL|0x211
-	call	EXCEPTION
-	pop	%ebx
-	jmp	L_error_exit
-
-L_bugged:
-	pushl	EX_INTERNAL|0x212
-	call	EXCEPTION
-	pop	%ebx
-	jmp	L_error_exit
-
-L_error_exit:
-	movl	$-1,%eax
-
-#endif /* PARANOID */
-
-L_exit:
-	popl	%ebx
-	popl	%edi
-	popl	%esi
-	leave
-	ret
-ENDPROC(FPU_u_sub)
diff --git a/arch/x86/math-emu/round_Xsig.S b/arch/x86/math-emu/round_Xsig.S
deleted file mode 100644
index 226a51e991f10dc5ed11327b3a2464bd8fb59f5a..0000000000000000000000000000000000000000
--- a/arch/x86/math-emu/round_Xsig.S
+++ /dev/null
@@ -1,142 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*---------------------------------------------------------------------------+
- |  round_Xsig.S                                                             |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1995                                         |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
- |                                                                           |
- | Normalize and round a 12 byte quantity.                                   |
- | Call from C as:                                                           |
- |   int round_Xsig(Xsig *n)                                                 |
- |                                                                           |
- | Normalize a 12 byte quantity.                                             |
- | Call from C as:                                                           |
- |   int norm_Xsig(Xsig *n)                                                  |
- |                                                                           |
- | Each function returns the size of the shift (nr of bits).                 |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-	.file	"round_Xsig.S"
-
-#include "fpu_emu.h"
-
-
-.text
-ENTRY(round_Xsig)
-	pushl	%ebp
-	movl	%esp,%ebp
-	pushl	%ebx		/* Reserve some space */
-	pushl	%ebx
-	pushl	%esi
-
-	movl	PARAM1,%esi
-
-	movl	8(%esi),%edx
-	movl	4(%esi),%ebx
-	movl	(%esi),%eax
-
-	movl	$0,-4(%ebp)
-
-	orl	%edx,%edx	/* ms bits */
-	js	L_round		/* Already normalized */
-	jnz	L_shift_1	/* Shift left 1 - 31 bits */
-
-	movl	%ebx,%edx
-	movl	%eax,%ebx
-	xorl	%eax,%eax
-	movl	$-32,-4(%ebp)
-
-/* We need to shift left by 1 - 31 bits */
-L_shift_1:
-	bsrl	%edx,%ecx	/* get the required shift in %ecx */
-	subl	$31,%ecx
-	negl	%ecx
-	subl	%ecx,-4(%ebp)
-	shld	%cl,%ebx,%edx
-	shld	%cl,%eax,%ebx
-	shl	%cl,%eax
-
-L_round:
-	testl	$0x80000000,%eax
-	jz	L_exit
-
-	addl	$1,%ebx
-	adcl	$0,%edx
-	jnz	L_exit
-
-	movl	$0x80000000,%edx
-	incl	-4(%ebp)
-
-L_exit:
-	movl	%edx,8(%esi)
-	movl	%ebx,4(%esi)
-	movl	%eax,(%esi)
-
-	movl	-4(%ebp),%eax
-
-	popl	%esi
-	popl	%ebx
-	leave
-	ret
-ENDPROC(round_Xsig)
-
-
-
-ENTRY(norm_Xsig)
-	pushl	%ebp
-	movl	%esp,%ebp
-	pushl	%ebx		/* Reserve some space */
-	pushl	%ebx
-	pushl	%esi
-
-	movl	PARAM1,%esi
-
-	movl	8(%esi),%edx
-	movl	4(%esi),%ebx
-	movl	(%esi),%eax
-
-	movl	$0,-4(%ebp)
-
-	orl	%edx,%edx	/* ms bits */
-	js	L_n_exit		/* Already normalized */
-	jnz	L_n_shift_1	/* Shift left 1 - 31 bits */
-
-	movl	%ebx,%edx
-	movl	%eax,%ebx
-	xorl	%eax,%eax
-	movl	$-32,-4(%ebp)
-
-	orl	%edx,%edx	/* ms bits */
-	js	L_n_exit	/* Normalized now */
-	jnz	L_n_shift_1	/* Shift left 1 - 31 bits */
-
-	movl	%ebx,%edx
-	movl	%eax,%ebx
-	xorl	%eax,%eax
-	addl	$-32,-4(%ebp)
-	jmp	L_n_exit	/* Might not be normalized,
-	                           but shift no more. */
-
-/* We need to shift left by 1 - 31 bits */
-L_n_shift_1:
-	bsrl	%edx,%ecx	/* get the required shift in %ecx */
-	subl	$31,%ecx
-	negl	%ecx
-	subl	%ecx,-4(%ebp)
-	shld	%cl,%ebx,%edx
-	shld	%cl,%eax,%ebx
-	shl	%cl,%eax
-
-L_n_exit:
-	movl	%edx,8(%esi)
-	movl	%ebx,4(%esi)
-	movl	%eax,(%esi)
-
-	movl	-4(%ebp),%eax
-
-	popl	%esi
-	popl	%ebx
-	leave
-	ret
-ENDPROC(norm_Xsig)
diff --git a/arch/x86/math-emu/shr_Xsig.S b/arch/x86/math-emu/shr_Xsig.S
deleted file mode 100644
index 96f4779aa9c19f31ddc6a8b60d3bf601ad4ccad0..0000000000000000000000000000000000000000
--- a/arch/x86/math-emu/shr_Xsig.S
+++ /dev/null
@@ -1,89 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.file	"shr_Xsig.S"
-/*---------------------------------------------------------------------------+
- |  shr_Xsig.S                                                               |
- |                                                                           |
- | 12 byte right shift function                                              |
- |                                                                           |
- | Copyright (C) 1992,1994,1995                                              |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
- |                                                                           |
- | Call from C as:                                                           |
- |   void shr_Xsig(Xsig *arg, unsigned nr)                                   |
- |                                                                           |
- |   Extended shift right function.                                          |
- |   Fastest for small shifts.                                               |
- |   Shifts the 12 byte quantity pointed to by the first arg (arg)           |
- |   right by the number of bits specified by the second arg (nr).           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_emu.h"
-
-.text
-ENTRY(shr_Xsig)
-	push	%ebp
-	movl	%esp,%ebp
-	pushl	%esi
-	movl	PARAM2,%ecx
-	movl	PARAM1,%esi
-	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */
-	jnc	L_more_than_31
-
-/* less than 32 bits */
-	pushl	%ebx
-	movl	(%esi),%eax	/* lsl */
-	movl	4(%esi),%ebx	/* midl */
-	movl	8(%esi),%edx	/* msl */
-	shrd	%cl,%ebx,%eax
-	shrd	%cl,%edx,%ebx
-	shr	%cl,%edx
-	movl	%eax,(%esi)
-	movl	%ebx,4(%esi)
-	movl	%edx,8(%esi)
-	popl	%ebx
-	popl	%esi
-	leave
-	ret
-
-L_more_than_31:
-	cmpl	$64,%ecx
-	jnc	L_more_than_63
-
-	subb	$32,%cl
-	movl	4(%esi),%eax	/* midl */
-	movl	8(%esi),%edx	/* msl */
-	shrd	%cl,%edx,%eax
-	shr	%cl,%edx
-	movl	%eax,(%esi)
-	movl	%edx,4(%esi)
-	movl	$0,8(%esi)
-	popl	%esi
-	leave
-	ret
-
-L_more_than_63:
-	cmpl	$96,%ecx
-	jnc	L_more_than_95
-
-	subb	$64,%cl
-	movl	8(%esi),%eax	/* msl */
-	shr	%cl,%eax
-	xorl	%edx,%edx
-	movl	%eax,(%esi)
-	movl	%edx,4(%esi)
-	movl	%edx,8(%esi)
-	popl	%esi
-	leave
-	ret
-
-L_more_than_95:
-	xorl	%eax,%eax
-	movl	%eax,(%esi)
-	movl	%eax,4(%esi)
-	movl	%eax,8(%esi)
-	popl	%esi
-	leave
-	ret
-ENDPROC(shr_Xsig)
diff --git a/arch/x86/math-emu/wm_shrx.S b/arch/x86/math-emu/wm_shrx.S
deleted file mode 100644
index d588874eb6fb1000a01ea11abd1ef0aae16f4b71..0000000000000000000000000000000000000000
--- a/arch/x86/math-emu/wm_shrx.S
+++ /dev/null
@@ -1,207 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.file	"wm_shrx.S"
-/*---------------------------------------------------------------------------+
- |  wm_shrx.S                                                                |
- |                                                                           |
- | 64 bit right shift functions                                              |
- |                                                                           |
- | Copyright (C) 1992,1995                                                   |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
- |                                                                           |
- | Call from C as:                                                           |
- |   unsigned FPU_shrx(void *arg1, unsigned arg2)                            |
- | and                                                                       |
- |   unsigned FPU_shrxs(void *arg1, unsigned arg2)                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_emu.h"
-
-.text
-/*---------------------------------------------------------------------------+
- |   unsigned FPU_shrx(void *arg1, unsigned arg2)                            |
- |                                                                           |
- |   Extended shift right function.                                          |
- |   Fastest for small shifts.                                               |
- |   Shifts the 64 bit quantity pointed to by the first arg (arg1)           |
- |   right by the number of bits specified by the second arg (arg2).         |
- |   Forms a 96 bit quantity from the 64 bit arg and eax:                    |
- |                [  64 bit arg ][ eax ]                                     |
- |            shift right  --------->                                        |
- |   The eax register is initialized to 0 before the shifting.               |
- |   Results returned in the 64 bit arg and eax.                             |
- +---------------------------------------------------------------------------*/
-
-ENTRY(FPU_shrx)
-	push	%ebp
-	movl	%esp,%ebp
-	pushl	%esi
-	movl	PARAM2,%ecx
-	movl	PARAM1,%esi
-	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */
-	jnc	L_more_than_31
-
-/* less than 32 bits */
-	pushl	%ebx
-	movl	(%esi),%ebx	/* lsl */
-	movl	4(%esi),%edx	/* msl */
-	xorl	%eax,%eax	/* extension */
-	shrd	%cl,%ebx,%eax
-	shrd	%cl,%edx,%ebx
-	shr	%cl,%edx
-	movl	%ebx,(%esi)
-	movl	%edx,4(%esi)
-	popl	%ebx
-	popl	%esi
-	leave
-	ret
-
-L_more_than_31:
-	cmpl	$64,%ecx
-	jnc	L_more_than_63
-
-	subb	$32,%cl
-	movl	(%esi),%eax	/* lsl */
-	movl	4(%esi),%edx	/* msl */
-	shrd	%cl,%edx,%eax
-	shr	%cl,%edx
-	movl	%edx,(%esi)
-	movl	$0,4(%esi)
-	popl	%esi
-	leave
-	ret
-
-L_more_than_63:
-	cmpl	$96,%ecx
-	jnc	L_more_than_95
-
-	subb	$64,%cl
-	movl	4(%esi),%eax	/* msl */
-	shr	%cl,%eax
-	xorl	%edx,%edx
-	movl	%edx,(%esi)
-	movl	%edx,4(%esi)
-	popl	%esi
-	leave
-	ret
-
-L_more_than_95:
-	xorl	%eax,%eax
-	movl	%eax,(%esi)
-	movl	%eax,4(%esi)
-	popl	%esi
-	leave
-	ret
-ENDPROC(FPU_shrx)
-
-
-/*---------------------------------------------------------------------------+
- |   unsigned FPU_shrxs(void *arg1, unsigned arg2)                           |
- |                                                                           |
- |   Extended shift right function (optimized for small floating point       |
- |   integers).                                                              |
- |   Shifts the 64 bit quantity pointed to by the first arg (arg1)           |
- |   right by the number of bits specified by the second arg (arg2).         |
- |   Forms a 96 bit quantity from the 64 bit arg and eax:                    |
- |                [  64 bit arg ][ eax ]                                     |
- |            shift right  --------->                                        |
- |   The eax register is initialized to 0 before the shifting.               |
- |   The lower 8 bits of eax are lost and replaced by a flag which is        |
- |   set (to 0x01) if any bit, apart from the first one, is set in the       |
- |   part which has been shifted out of the arg.                             |
- |   Results returned in the 64 bit arg and eax.                             |
- +---------------------------------------------------------------------------*/
-ENTRY(FPU_shrxs)
-	push	%ebp
-	movl	%esp,%ebp
-	pushl	%esi
-	pushl	%ebx
-	movl	PARAM2,%ecx
-	movl	PARAM1,%esi
-	cmpl	$64,%ecx	/* shrd only works for 0..31 bits */
-	jnc	Ls_more_than_63
-
-	cmpl	$32,%ecx	/* shrd only works for 0..31 bits */
-	jc	Ls_less_than_32
-
-/* We got here without jumps by assuming that the most common requirement
-   is for small integers */
-/* Shift by [32..63] bits */
-	subb	$32,%cl
-	movl	(%esi),%eax	/* lsl */
-	movl	4(%esi),%edx	/* msl */
-	xorl	%ebx,%ebx
-	shrd	%cl,%eax,%ebx
-	shrd	%cl,%edx,%eax
-	shr	%cl,%edx
-	orl	%ebx,%ebx		/* test these 32 bits */
-	setne	%bl
-	test	$0x7fffffff,%eax	/* and 31 bits here */
-	setne	%bh
-	orw	%bx,%bx			/* Any of the 63 bit set ? */
-	setne	%al
-	movl	%edx,(%esi)
-	movl	$0,4(%esi)
-	popl	%ebx
-	popl	%esi
-	leave
-	ret
-
-/* Shift by [0..31] bits */
-Ls_less_than_32:
-	movl	(%esi),%ebx	/* lsl */
-	movl	4(%esi),%edx	/* msl */
-	xorl	%eax,%eax	/* extension */
-	shrd	%cl,%ebx,%eax
-	shrd	%cl,%edx,%ebx
-	shr	%cl,%edx
-	test	$0x7fffffff,%eax	/* only need to look at eax here */
-	setne	%al
-	movl	%ebx,(%esi)
-	movl	%edx,4(%esi)
-	popl	%ebx
-	popl	%esi
-	leave
-	ret
-
-/* Shift by [64..95] bits */
-Ls_more_than_63:
-	cmpl	$96,%ecx
-	jnc	Ls_more_than_95
-
-	subb	$64,%cl
-	movl	(%esi),%ebx	/* lsl */
-	movl	4(%esi),%eax	/* msl */
-	xorl	%edx,%edx	/* extension */
-	shrd	%cl,%ebx,%edx
-	shrd	%cl,%eax,%ebx
-	shr	%cl,%eax
-	orl	%ebx,%edx
-	setne	%bl
-	test	$0x7fffffff,%eax	/* only need to look at eax here */
-	setne	%bh
-	orw	%bx,%bx
-	setne	%al
-	xorl	%edx,%edx
-	movl	%edx,(%esi)	/* set to zero */
-	movl	%edx,4(%esi)	/* set to zero */
-	popl	%ebx
-	popl	%esi
-	leave
-	ret
-
-Ls_more_than_95:
-/* Shift by [96..inf) bits */
-	xorl	%eax,%eax
-	movl	(%esi),%ebx
-	orl	4(%esi),%ebx
-	setne	%al
-	xorl	%ebx,%ebx
-	movl	%ebx,(%esi)
-	movl	%ebx,4(%esi)
-	popl	%ebx
-	popl	%esi
-	leave
-	ret
-ENDPROC(FPU_shrxs)
diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S
deleted file mode 100644
index 515cdee90df7218289b2f614e82d3c192f96b88c..0000000000000000000000000000000000000000
--- a/arch/x86/math-emu/wm_sqrt.S
+++ /dev/null
@@ -1,472 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.file	"wm_sqrt.S"
-/*---------------------------------------------------------------------------+
- |  wm_sqrt.S                                                                |
- |                                                                           |
- | Fixed point arithmetic square root evaluation.                            |
- |                                                                           |
- | Copyright (C) 1992,1993,1995,1997                                         |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@suburbia.net               |
- |                                                                           |
- | Call from C as:                                                           |
- |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
- |    returns the square root of n in n.                                     |
- |                                                                           |
- |  Use Newton's method to compute the square root of a number, which must   |
- |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
- |  Does not check the sign or tag of the argument.                          |
- |  Sets the exponent, but not the sign or tag of the result.                |
- |                                                                           |
- |  The guess is kept in %esi:%edi                                           |
- +---------------------------------------------------------------------------*/
-
-#include "exception.h"
-#include "fpu_emu.h"
-
-
-#ifndef NON_REENTRANT_FPU
-/*	Local storage on the stack: */
-#define FPU_accum_3	-4(%ebp)	/* ms word */
-#define FPU_accum_2	-8(%ebp)
-#define FPU_accum_1	-12(%ebp)
-#define FPU_accum_0	-16(%ebp)
-
-/*
- * The de-normalised argument:
- *                  sq_2                  sq_1              sq_0
- *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
- *           ^ binary point here
- */
-#define FPU_fsqrt_arg_2	-20(%ebp)	/* ms word */
-#define FPU_fsqrt_arg_1	-24(%ebp)
-#define FPU_fsqrt_arg_0	-28(%ebp)	/* ls word, at most the ms bit is set */
-
-#else
-/*	Local storage in a static area: */
-.data
-	.align 4,0
-FPU_accum_3:
-	.long	0		/* ms word */
-FPU_accum_2:
-	.long	0
-FPU_accum_1:
-	.long	0
-FPU_accum_0:
-	.long	0
-
-/* The de-normalised argument:
-                    sq_2                  sq_1              sq_0
-          b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
-             ^ binary point here
- */
-FPU_fsqrt_arg_2:
-	.long	0		/* ms word */
-FPU_fsqrt_arg_1:
-	.long	0
-FPU_fsqrt_arg_0:
-	.long	0		/* ls word, at most the ms bit is set */
-#endif /* NON_REENTRANT_FPU */ 
-
-
-.text
-ENTRY(wm_sqrt)
-	pushl	%ebp
-	movl	%esp,%ebp
-#ifndef NON_REENTRANT_FPU
-	subl	$28,%esp
-#endif /* NON_REENTRANT_FPU */
-	pushl	%esi
-	pushl	%edi
-	pushl	%ebx
-
-	movl	PARAM1,%esi
-
-	movl	SIGH(%esi),%eax
-	movl	SIGL(%esi),%ecx
-	xorl	%edx,%edx
-
-/* We use a rough linear estimate for the first guess.. */
-
-	cmpw	EXP_BIAS,EXP(%esi)
-	jnz	sqrt_arg_ge_2
-
-	shrl	$1,%eax			/* arg is in the range  [1.0 .. 2.0) */
-	rcrl	$1,%ecx
-	rcrl	$1,%edx
-
-sqrt_arg_ge_2:
-/* From here on, n is never accessed directly again until it is
-   replaced by the answer. */
-
-	movl	%eax,FPU_fsqrt_arg_2		/* ms word of n */
-	movl	%ecx,FPU_fsqrt_arg_1
-	movl	%edx,FPU_fsqrt_arg_0
-
-/* Make a linear first estimate */
-	shrl	$1,%eax
-	addl	$0x40000000,%eax
-	movl	$0xaaaaaaaa,%ecx
-	mull	%ecx
-	shll	%edx			/* max result was 7fff... */
-	testl	$0x80000000,%edx	/* but min was 3fff... */
-	jnz	sqrt_prelim_no_adjust
-
-	movl	$0x80000000,%edx	/* round up */
-
-sqrt_prelim_no_adjust:
-	movl	%edx,%esi	/* Our first guess */
-
-/* We have now computed (approx)   (2 + x) / 3, which forms the basis
-   for a few iterations of Newton's method */
-
-	movl	FPU_fsqrt_arg_2,%ecx	/* ms word */
-
-/*
- * From our initial estimate, three iterations are enough to get us
- * to 30 bits or so. This will then allow two iterations at better
- * precision to complete the process.
- */
-
-/* Compute  (g + n/g)/2  at each iteration (g is the guess). */
-	shrl	%ecx		/* Doing this first will prevent a divide */
-				/* overflow later. */
-
-	movl	%ecx,%edx	/* msw of the arg / 2 */
-	divl	%esi		/* current estimate */
-	shrl	%esi		/* divide by 2 */
-	addl	%eax,%esi	/* the new estimate */
-
-	movl	%ecx,%edx
-	divl	%esi
-	shrl	%esi
-	addl	%eax,%esi
-
-	movl	%ecx,%edx
-	divl	%esi
-	shrl	%esi
-	addl	%eax,%esi
-
-/*
- * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
- * we improve it to 60 bits or so.
- *
- * The strategy from now on is to compute new estimates from
- *      guess := guess + (n - guess^2) / (2 * guess)
- */
-
-/* First, find the square of the guess */
-	movl	%esi,%eax
-	mull	%esi
-/* guess^2 now in %edx:%eax */
-
-	movl	FPU_fsqrt_arg_1,%ecx
-	subl	%ecx,%eax
-	movl	FPU_fsqrt_arg_2,%ecx	/* ms word of normalized n */
-	sbbl	%ecx,%edx
-	jnc	sqrt_stage_2_positive
-
-/* Subtraction gives a negative result,
-   negate the result before division. */
-	notl	%edx
-	notl	%eax
-	addl	$1,%eax
-	adcl	$0,%edx
-
-	divl	%esi
-	movl	%eax,%ecx
-
-	movl	%edx,%eax
-	divl	%esi
-	jmp	sqrt_stage_2_finish
-
-sqrt_stage_2_positive:
-	divl	%esi
-	movl	%eax,%ecx
-
-	movl	%edx,%eax
-	divl	%esi
-
-	notl	%ecx
-	notl	%eax
-	addl	$1,%eax
-	adcl	$0,%ecx
-
-sqrt_stage_2_finish:
-	sarl	$1,%ecx		/* divide by 2 */
-	rcrl	$1,%eax
-
-	/* Form the new estimate in %esi:%edi */
-	movl	%eax,%edi
-	addl	%ecx,%esi
-
-	jnz	sqrt_stage_2_done	/* result should be [1..2) */
-
-#ifdef PARANOID
-/* It should be possible to get here only if the arg is ffff....ffff */
-	cmpl	$0xffffffff,FPU_fsqrt_arg_1
-	jnz	sqrt_stage_2_error
-#endif /* PARANOID */
-
-/* The best rounded result. */
-	xorl	%eax,%eax
-	decl	%eax
-	movl	%eax,%edi
-	movl	%eax,%esi
-	movl	$0x7fffffff,%eax
-	jmp	sqrt_round_result
-
-#ifdef PARANOID
-sqrt_stage_2_error:
-	pushl	EX_INTERNAL|0x213
-	call	EXCEPTION
-#endif /* PARANOID */ 
-
-sqrt_stage_2_done:
-
-/* Now the square root has been computed to better than 60 bits. */
-
-/* Find the square of the guess. */
-	movl	%edi,%eax		/* ls word of guess */
-	mull	%edi
-	movl	%edx,FPU_accum_1
-
-	movl	%esi,%eax
-	mull	%esi
-	movl	%edx,FPU_accum_3
-	movl	%eax,FPU_accum_2
-
-	movl	%edi,%eax
-	mull	%esi
-	addl	%eax,FPU_accum_1
-	adcl	%edx,FPU_accum_2
-	adcl	$0,FPU_accum_3
-
-/*	movl	%esi,%eax */
-/*	mull	%edi */
-	addl	%eax,FPU_accum_1
-	adcl	%edx,FPU_accum_2
-	adcl	$0,FPU_accum_3
-
-/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
-
-	movl	FPU_fsqrt_arg_0,%eax		/* get normalized n */
-	subl	%eax,FPU_accum_1
-	movl	FPU_fsqrt_arg_1,%eax
-	sbbl	%eax,FPU_accum_2
-	movl	FPU_fsqrt_arg_2,%eax		/* ms word of normalized n */
-	sbbl	%eax,FPU_accum_3
-	jnc	sqrt_stage_3_positive
-
-/* Subtraction gives a negative result,
-   negate the result before division */
-	notl	FPU_accum_1
-	notl	FPU_accum_2
-	notl	FPU_accum_3
-	addl	$1,FPU_accum_1
-	adcl	$0,FPU_accum_2
-
-#ifdef PARANOID
-	adcl	$0,FPU_accum_3	/* This must be zero */
-	jz	sqrt_stage_3_no_error
-
-sqrt_stage_3_error:
-	pushl	EX_INTERNAL|0x207
-	call	EXCEPTION
-
-sqrt_stage_3_no_error:
-#endif /* PARANOID */
-
-	movl	FPU_accum_2,%edx
-	movl	FPU_accum_1,%eax
-	divl	%esi
-	movl	%eax,%ecx
-
-	movl	%edx,%eax
-	divl	%esi
-
-	sarl	$1,%ecx		/* divide by 2 */
-	rcrl	$1,%eax
-
-	/* prepare to round the result */
-
-	addl	%ecx,%edi
-	adcl	$0,%esi
-
-	jmp	sqrt_stage_3_finished
-
-sqrt_stage_3_positive:
-	movl	FPU_accum_2,%edx
-	movl	FPU_accum_1,%eax
-	divl	%esi
-	movl	%eax,%ecx
-
-	movl	%edx,%eax
-	divl	%esi
-
-	sarl	$1,%ecx		/* divide by 2 */
-	rcrl	$1,%eax
-
-	/* prepare to round the result */
-
-	notl	%eax		/* Negate the correction term */
-	notl	%ecx
-	addl	$1,%eax
-	adcl	$0,%ecx		/* carry here ==> correction == 0 */
-	adcl	$0xffffffff,%esi
-
-	addl	%ecx,%edi
-	adcl	$0,%esi
-
-sqrt_stage_3_finished:
-
-/*
- * The result in %esi:%edi:%esi should be good to about 90 bits here,
- * and the rounding information here does not have sufficient accuracy
- * in a few rare cases.
- */
-	cmpl	$0xffffffe0,%eax
-	ja	sqrt_near_exact_x
-
-	cmpl	$0x00000020,%eax
-	jb	sqrt_near_exact
-
-	cmpl	$0x7fffffe0,%eax
-	jb	sqrt_round_result
-
-	cmpl	$0x80000020,%eax
-	jb	sqrt_get_more_precision
-
-sqrt_round_result:
-/* Set up for rounding operations */
-	movl	%eax,%edx
-	movl	%esi,%eax
-	movl	%edi,%ebx
-	movl	PARAM1,%edi
-	movw	EXP_BIAS,EXP(%edi)	/* Result is in  [1.0 .. 2.0) */
-	jmp	fpu_reg_round
-
-
-sqrt_near_exact_x:
-/* First, the estimate must be rounded up. */
-	addl	$1,%edi
-	adcl	$0,%esi
-
-sqrt_near_exact:
-/*
- * This is an easy case because x^1/2 is monotonic.
- * We need just find the square of our estimate, compare it
- * with the argument, and deduce whether our estimate is
- * above, below, or exact. We use the fact that the estimate
- * is known to be accurate to about 90 bits.
- */
-	movl	%edi,%eax		/* ls word of guess */
-	mull	%edi
-	movl	%edx,%ebx		/* 2nd ls word of square */
-	movl	%eax,%ecx		/* ls word of square */
-
-	movl	%edi,%eax
-	mull	%esi
-	addl	%eax,%ebx
-	addl	%eax,%ebx
-
-#ifdef PARANOID
-	cmp	$0xffffffb0,%ebx
-	jb	sqrt_near_exact_ok
-
-	cmp	$0x00000050,%ebx
-	ja	sqrt_near_exact_ok
-
-	pushl	EX_INTERNAL|0x214
-	call	EXCEPTION
-
-sqrt_near_exact_ok:
-#endif /* PARANOID */ 
-
-	or	%ebx,%ebx
-	js	sqrt_near_exact_small
-
-	jnz	sqrt_near_exact_large
-
-	or	%ebx,%edx
-	jnz	sqrt_near_exact_large
-
-/* Our estimate is exactly the right answer */
-	xorl	%eax,%eax
-	jmp	sqrt_round_result
-
-sqrt_near_exact_small:
-/* Our estimate is too small */
-	movl	$0x000000ff,%eax
-	jmp	sqrt_round_result
-	
-sqrt_near_exact_large:
-/* Our estimate is too large, we need to decrement it */
-	subl	$1,%edi
-	sbbl	$0,%esi
-	movl	$0xffffff00,%eax
-	jmp	sqrt_round_result
-
-
-sqrt_get_more_precision:
-/* This case is almost the same as the above, except we start
-   with an extra bit of precision in the estimate. */
-	stc			/* The extra bit. */
-	rcll	$1,%edi		/* Shift the estimate left one bit */
-	rcll	$1,%esi
-
-	movl	%edi,%eax		/* ls word of guess */
-	mull	%edi
-	movl	%edx,%ebx		/* 2nd ls word of square */
-	movl	%eax,%ecx		/* ls word of square */
-
-	movl	%edi,%eax
-	mull	%esi
-	addl	%eax,%ebx
-	addl	%eax,%ebx
-
-/* Put our estimate back to its original value */
-	stc			/* The ms bit. */
-	rcrl	$1,%esi		/* Shift the estimate left one bit */
-	rcrl	$1,%edi
-
-#ifdef PARANOID
-	cmp	$0xffffff60,%ebx
-	jb	sqrt_more_prec_ok
-
-	cmp	$0x000000a0,%ebx
-	ja	sqrt_more_prec_ok
-
-	pushl	EX_INTERNAL|0x215
-	call	EXCEPTION
-
-sqrt_more_prec_ok:
-#endif /* PARANOID */ 
-
-	or	%ebx,%ebx
-	js	sqrt_more_prec_small
-
-	jnz	sqrt_more_prec_large
-
-	or	%ebx,%ecx
-	jnz	sqrt_more_prec_large
-
-/* Our estimate is exactly the right answer */
-	movl	$0x80000000,%eax
-	jmp	sqrt_round_result
-
-sqrt_more_prec_small:
-/* Our estimate is too small */
-	movl	$0x800000ff,%eax
-	jmp	sqrt_round_result
-	
-sqrt_more_prec_large:
-/* Our estimate is too large */
-	movl	$0x7fffff00,%eax
-	jmp	sqrt_round_result
-ENDPROC(wm_sqrt)
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
deleted file mode 100644
index 6d71481a1e70ea41769849729b6778049ac58422..0000000000000000000000000000000000000000
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ /dev/null
@@ -1,156 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * AMD Memory Encryption Support
- *
- * Copyright (C) 2016 Advanced Micro Devices, Inc.
- *
- * Author: Tom Lendacky <thomas.lendacky@amd.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/processor-flags.h>
-#include <asm/msr-index.h>
-#include <asm/nospec-branch.h>
-
-	.text
-	.code64
-ENTRY(sme_encrypt_execute)
-
-	/*
-	 * Entry parameters:
-	 *   RDI - virtual address for the encrypted mapping
-	 *   RSI - virtual address for the decrypted mapping
-	 *   RDX - length to encrypt
-	 *   RCX - virtual address of the encryption workarea, including:
-	 *     - stack page (PAGE_SIZE)
-	 *     - encryption routine page (PAGE_SIZE)
-	 *     - intermediate copy buffer (PMD_PAGE_SIZE)
-	 *    R8 - physcial address of the pagetables to use for encryption
-	 */
-
-	push	%rbp
-	movq	%rsp, %rbp		/* RBP now has original stack pointer */
-
-	/* Set up a one page stack in the non-encrypted memory area */
-	movq	%rcx, %rax		/* Workarea stack page */
-	leaq	PAGE_SIZE(%rax), %rsp	/* Set new stack pointer */
-	addq	$PAGE_SIZE, %rax	/* Workarea encryption routine */
-
-	push	%r12
-	movq	%rdi, %r10		/* Encrypted area */
-	movq	%rsi, %r11		/* Decrypted area */
-	movq	%rdx, %r12		/* Area length */
-
-	/* Copy encryption routine into the workarea */
-	movq	%rax, %rdi				/* Workarea encryption routine */
-	leaq	__enc_copy(%rip), %rsi			/* Encryption routine */
-	movq	$(.L__enc_copy_end - __enc_copy), %rcx	/* Encryption routine length */
-	rep	movsb
-
-	/* Setup registers for call */
-	movq	%r10, %rdi		/* Encrypted area */
-	movq	%r11, %rsi		/* Decrypted area */
-	movq	%r8, %rdx		/* Pagetables used for encryption */
-	movq	%r12, %rcx		/* Area length */
-	movq	%rax, %r8		/* Workarea encryption routine */
-	addq	$PAGE_SIZE, %r8		/* Workarea intermediate copy buffer */
-
-	ANNOTATE_RETPOLINE_SAFE
-	call	*%rax			/* Call the encryption routine */
-
-	pop	%r12
-
-	movq	%rbp, %rsp		/* Restore original stack pointer */
-	pop	%rbp
-
-	ret
-ENDPROC(sme_encrypt_execute)
-
-ENTRY(__enc_copy)
-/*
- * Routine used to encrypt memory in place.
- *   This routine must be run outside of the kernel proper since
- *   the kernel will be encrypted during the process. So this
- *   routine is defined here and then copied to an area outside
- *   of the kernel where it will remain and run decrypted
- *   during execution.
- *
- *   On entry the registers must be:
- *     RDI - virtual address for the encrypted mapping
- *     RSI - virtual address for the decrypted mapping
- *     RDX - address of the pagetables to use for encryption
- *     RCX - length of area
- *      R8 - intermediate copy buffer
- *
- *     RAX - points to this routine
- *
- * The area will be encrypted by copying from the non-encrypted
- * memory space to an intermediate buffer and then copying from the
- * intermediate buffer back to the encrypted memory space. The physical
- * addresses of the two mappings are the same which results in the area
- * being encrypted "in place".
- */
-	/* Enable the new page tables */
-	mov	%rdx, %cr3
-
-	/* Flush any global TLBs */
-	mov	%cr4, %rdx
-	andq	$~X86_CR4_PGE, %rdx
-	mov	%rdx, %cr4
-	orq	$X86_CR4_PGE, %rdx
-	mov	%rdx, %cr4
-
-	push	%r15
-	push	%r12
-
-	movq	%rcx, %r9		/* Save area length */
-	movq	%rdi, %r10		/* Save encrypted area address */
-	movq	%rsi, %r11		/* Save decrypted area address */
-
-	/* Set the PAT register PA5 entry to write-protect */
-	movl	$MSR_IA32_CR_PAT, %ecx
-	rdmsr
-	mov	%rdx, %r15		/* Save original PAT value */
-	andl	$0xffff00ff, %edx	/* Clear PA5 */
-	orl	$0x00000500, %edx	/* Set PA5 to WP */
-	wrmsr
-
-	wbinvd				/* Invalidate any cache entries */
-
-	/* Copy/encrypt up to 2MB at a time */
-	movq	$PMD_PAGE_SIZE, %r12
-1:
-	cmpq	%r12, %r9
-	jnb	2f
-	movq	%r9, %r12
-
-2:
-	movq	%r11, %rsi		/* Source - decrypted area */
-	movq	%r8, %rdi		/* Dest   - intermediate copy buffer */
-	movq	%r12, %rcx
-	rep	movsb
-
-	movq	%r8, %rsi		/* Source - intermediate copy buffer */
-	movq	%r10, %rdi		/* Dest   - encrypted area */
-	movq	%r12, %rcx
-	rep	movsb
-
-	addq	%r12, %r11
-	addq	%r12, %r10
-	subq	%r12, %r9		/* Kernel length decrement */
-	jnz	1b			/* Kernel length not zero? */
-
-	/* Restore PAT register */
-	movl	$MSR_IA32_CR_PAT, %ecx
-	rdmsr
-	mov	%r15, %rdx		/* Restore original PAT value */
-	wrmsr
-
-	pop	%r12
-	pop	%r15
-
-	ret
-.L__enc_copy_end:
-ENDPROC(__enc_copy)
diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S
deleted file mode 100644
index ab2e91e7689425f40eeb46a2b0cd49d34c4bbf20..0000000000000000000000000000000000000000
--- a/arch/x86/platform/efi/efi_stub_32.S
+++ /dev/null
@@ -1,124 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * EFI call stub for IA32.
- *
- * This stub allows us to make EFI calls in physical mode with interrupts
- * turned off.
- */
-
-#include <linux/linkage.h>
-#include <asm/page_types.h>
-
-/*
- * efi_call_phys(void *, ...) is a function with variable parameters.
- * All the callers of this function assure that all the parameters are 4-bytes.
- */
-
-/*
- * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
- * So we'd better save all of them at the beginning of this function and restore
- * at the end no matter how many we use, because we can not assure EFI runtime
- * service functions will comply with gcc calling convention, too.
- */
-
-.text
-ENTRY(efi_call_phys)
-	/*
-	 * 0. The function can only be called in Linux kernel. So CS has been
-	 * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
-	 * the values of these registers are the same. And, the corresponding
-	 * GDT entries are identical. So I will do nothing about segment reg
-	 * and GDT, but change GDT base register in prolog and epilog.
-	 */
-
-	/*
-	 * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET.
-	 * But to make it smoothly switch from virtual mode to flat mode.
-	 * The mapping of lower virtual memory has been created in prolog and
-	 * epilog.
-	 */
-	movl	$1f, %edx
-	subl	$__PAGE_OFFSET, %edx
-	jmp	*%edx
-1:
-
-	/*
-	 * 2. Now on the top of stack is the return
-	 * address in the caller of efi_call_phys(), then parameter 1,
-	 * parameter 2, ..., param n. To make things easy, we save the return
-	 * address of efi_call_phys in a global variable.
-	 */
-	popl	%edx
-	movl	%edx, saved_return_addr
-	/* get the function pointer into ECX*/
-	popl	%ecx
-	movl	%ecx, efi_rt_function_ptr
-	movl	$2f, %edx
-	subl	$__PAGE_OFFSET, %edx
-	pushl	%edx
-
-	/*
-	 * 3. Clear PG bit in %CR0.
-	 */
-	movl	%cr0, %edx
-	andl	$0x7fffffff, %edx
-	movl	%edx, %cr0
-	jmp	1f
-1:
-
-	/*
-	 * 4. Adjust stack pointer.
-	 */
-	subl	$__PAGE_OFFSET, %esp
-
-	/*
-	 * 5. Call the physical function.
-	 */
-	jmp	*%ecx
-
-2:
-	/*
-	 * 6. After EFI runtime service returns, control will return to
-	 * following instruction. We'd better readjust stack pointer first.
-	 */
-	addl	$__PAGE_OFFSET, %esp
-
-	/*
-	 * 7. Restore PG bit
-	 */
-	movl	%cr0, %edx
-	orl	$0x80000000, %edx
-	movl	%edx, %cr0
-	jmp	1f
-1:
-	/*
-	 * 8. Now restore the virtual mode from flat mode by
-	 * adding EIP with PAGE_OFFSET.
-	 */
-	movl	$1f, %edx
-	jmp	*%edx
-1:
-
-	/*
-	 * 9. Balance the stack. And because EAX contain the return value,
-	 * we'd better not clobber it.
-	 */
-	leal	efi_rt_function_ptr, %edx
-	movl	(%edx), %ecx
-	pushl	%ecx
-
-	/*
-	 * 10. Push the saved return address onto the stack and return.
-	 */
-	leal	saved_return_addr, %edx
-	movl	(%edx), %ecx
-	pushl	%ecx
-	ret
-ENDPROC(efi_call_phys)
-.previous
-
-.data
-saved_return_addr:
-	.long 0
-efi_rt_function_ptr:
-	.long 0
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
deleted file mode 100644
index 74628ec78f29713dd9121174b129768a553959d2..0000000000000000000000000000000000000000
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Function calling ABI conversion from Linux to EFI for x86_64
- *
- * Copyright (C) 2007 Intel Corp
- *	Bibo Mao <bibo.mao@intel.com>
- *	Huang Ying <ying.huang@intel.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/msr.h>
-#include <asm/processor-flags.h>
-#include <asm/page_types.h>
-
-#define SAVE_XMM			\
-	mov %rsp, %rax;			\
-	subq $0x70, %rsp;		\
-	and $~0xf, %rsp;		\
-	mov %rax, (%rsp);		\
-	mov %cr0, %rax;			\
-	clts;				\
-	mov %rax, 0x8(%rsp);		\
-	movaps %xmm0, 0x60(%rsp);	\
-	movaps %xmm1, 0x50(%rsp);	\
-	movaps %xmm2, 0x40(%rsp);	\
-	movaps %xmm3, 0x30(%rsp);	\
-	movaps %xmm4, 0x20(%rsp);	\
-	movaps %xmm5, 0x10(%rsp)
-
-#define RESTORE_XMM			\
-	movaps 0x60(%rsp), %xmm0;	\
-	movaps 0x50(%rsp), %xmm1;	\
-	movaps 0x40(%rsp), %xmm2;	\
-	movaps 0x30(%rsp), %xmm3;	\
-	movaps 0x20(%rsp), %xmm4;	\
-	movaps 0x10(%rsp), %xmm5;	\
-	mov 0x8(%rsp), %rsi;		\
-	mov %rsi, %cr0;			\
-	mov (%rsp), %rsp
-
-ENTRY(efi_call)
-	pushq %rbp
-	movq %rsp, %rbp
-	SAVE_XMM
-	mov 16(%rbp), %rax
-	subq $48, %rsp
-	mov %r9, 32(%rsp)
-	mov %rax, 40(%rsp)
-	mov %r8, %r9
-	mov %rcx, %r8
-	mov %rsi, %rcx
-	call *%rdi
-	addq $48, %rsp
-	RESTORE_XMM
-	popq %rbp
-	ret
-ENDPROC(efi_call)
diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S
deleted file mode 100644
index 46c58b08739c89994635fd3b86afb30fbef9e319..0000000000000000000000000000000000000000
--- a/arch/x86/platform/efi/efi_thunk_64.S
+++ /dev/null
@@ -1,153 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2014 Intel Corporation; author Matt Fleming
- *
- * Support for invoking 32-bit EFI runtime services from a 64-bit
- * kernel.
- *
- * The below thunking functions are only used after ExitBootServices()
- * has been called. This simplifies things considerably as compared with
- * the early EFI thunking because we can leave all the kernel state
- * intact (GDT, IDT, etc) and simply invoke the the 32-bit EFI runtime
- * services from __KERNEL32_CS. This means we can continue to service
- * interrupts across an EFI mixed mode call.
- *
- * We do however, need to handle the fact that we're running in a full
- * 64-bit virtual address space. Things like the stack and instruction
- * addresses need to be accessible by the 32-bit firmware, so we rely on
- * using the identity mappings in the EFI page table to access the stack
- * and kernel text (see efi_setup_page_tables()).
- */
-
-#include <linux/linkage.h>
-#include <asm/page_types.h>
-#include <asm/segment.h>
-
-	.text
-	.code64
-ENTRY(efi64_thunk)
-	push	%rbp
-	push	%rbx
-
-	/*
-	 * Switch to 1:1 mapped 32-bit stack pointer.
-	 */
-	movq	%rsp, efi_saved_sp(%rip)
-	movq	efi_scratch(%rip), %rsp
-
-	/*
-	 * Calculate the physical address of the kernel text.
-	 */
-	movq	$__START_KERNEL_map, %rax
-	subq	phys_base(%rip), %rax
-
-	/*
-	 * Push some physical addresses onto the stack. This is easier
-	 * to do now in a code64 section while the assembler can address
-	 * 64-bit values. Note that all the addresses on the stack are
-	 * 32-bit.
-	 */
-	subq	$16, %rsp
-	leaq	efi_exit32(%rip), %rbx
-	subq	%rax, %rbx
-	movl	%ebx, 8(%rsp)
-
-	leaq	__efi64_thunk(%rip), %rbx
-	subq	%rax, %rbx
-	call	*%rbx
-
-	movq	efi_saved_sp(%rip), %rsp
-	pop	%rbx
-	pop	%rbp
-	retq
-ENDPROC(efi64_thunk)
-
-/*
- * We run this function from the 1:1 mapping.
- *
- * This function must be invoked with a 1:1 mapped stack.
- */
-ENTRY(__efi64_thunk)
-	movl	%ds, %eax
-	push	%rax
-	movl	%es, %eax
-	push	%rax
-	movl	%ss, %eax
-	push	%rax
-
-	subq	$32, %rsp
-	movl	%esi, 0x0(%rsp)
-	movl	%edx, 0x4(%rsp)
-	movl	%ecx, 0x8(%rsp)
-	movq	%r8, %rsi
-	movl	%esi, 0xc(%rsp)
-	movq	%r9, %rsi
-	movl	%esi,  0x10(%rsp)
-
-	leaq	1f(%rip), %rbx
-	movq	%rbx, func_rt_ptr(%rip)
-
-	/* Switch to 32-bit descriptor */
-	pushq	$__KERNEL32_CS
-	leaq	efi_enter32(%rip), %rax
-	pushq	%rax
-	lretq
-
-1:	addq	$32, %rsp
-
-	pop	%rbx
-	movl	%ebx, %ss
-	pop	%rbx
-	movl	%ebx, %es
-	pop	%rbx
-	movl	%ebx, %ds
-
-	/*
-	 * Convert 32-bit status code into 64-bit.
-	 */
-	test	%rax, %rax
-	jz	1f
-	movl	%eax, %ecx
-	andl	$0x0fffffff, %ecx
-	andl	$0xf0000000, %eax
-	shl	$32, %rax
-	or	%rcx, %rax
-1:
-	ret
-ENDPROC(__efi64_thunk)
-
-ENTRY(efi_exit32)
-	movq	func_rt_ptr(%rip), %rax
-	push	%rax
-	mov	%rdi, %rax
-	ret
-ENDPROC(efi_exit32)
-
-	.code32
-/*
- * EFI service pointer must be in %edi.
- *
- * The stack should represent the 32-bit calling convention.
- */
-ENTRY(efi_enter32)
-	movl	$__KERNEL_DS, %eax
-	movl	%eax, %ds
-	movl	%eax, %es
-	movl	%eax, %ss
-
-	call	*%edi
-
-	/* We must preserve return value */
-	movl	%eax, %edi
-
-	movl	72(%esp), %eax
-	pushl	$__KERNEL_CS
-	pushl	%eax
-
-	lret
-ENDPROC(efi_enter32)
-
-	.data
-	.balign	8
-func_rt_ptr:		.quad 0
-efi_saved_sp:		.quad 0
diff --git a/arch/x86/platform/olpc/xo1-wakeup.S b/arch/x86/platform/olpc/xo1-wakeup.S
deleted file mode 100644
index 5fee3a2c2fd4dfc0959a99d0e0a2b6b41f0a32f2..0000000000000000000000000000000000000000
--- a/arch/x86/platform/olpc/xo1-wakeup.S
+++ /dev/null
@@ -1,125 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-.text
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/page.h>
-#include <asm/pgtable_32.h>
-
-	.macro writepost,value
-		movb $0x34, %al
-		outb %al, $0x70
-		movb $\value, %al
-		outb %al, $0x71
-	.endm
-
-wakeup_start:
-	# OFW lands us here, running in protected mode, with a
-	# kernel-compatible GDT already setup.
-
-	# Clear any dangerous flags
-	pushl $0
-	popfl
-
-	writepost 0x31
-
-	# Set up %cr3
-	movl $initial_page_table - __PAGE_OFFSET, %eax
-	movl %eax, %cr3
-
-	movl saved_cr4, %eax
-	movl %eax, %cr4
-
-	movl saved_cr0, %eax
-	movl %eax, %cr0
-
-	# Control registers were modified, pipeline resync is needed
-	jmp 1f
-1:
-
-	movw    $__KERNEL_DS, %ax
-	movw    %ax, %ss
-	movw    %ax, %ds
-	movw    %ax, %es
-	movw    %ax, %fs
-	movw    %ax, %gs
-
-	lgdt    saved_gdt
-	lidt    saved_idt
-	lldt    saved_ldt
-	ljmp    $(__KERNEL_CS),$1f
-1:
-	movl    %cr3, %eax
-	movl    %eax, %cr3
-	wbinvd
-
-	# Go back to the return point
-	jmp ret_point
-
-save_registers:
-	sgdt  saved_gdt
-	sidt  saved_idt
-	sldt  saved_ldt
-
-	pushl %edx
-	movl %cr4, %edx
-	movl %edx, saved_cr4
-
-	movl %cr0, %edx
-	movl %edx, saved_cr0
-
-	popl %edx
-
-	movl %ebx, saved_context_ebx
-	movl %ebp, saved_context_ebp
-	movl %esi, saved_context_esi
-	movl %edi, saved_context_edi
-
-	pushfl
-	popl saved_context_eflags
-
-	ret
-
-restore_registers:
-	movl saved_context_ebp, %ebp
-	movl saved_context_ebx, %ebx
-	movl saved_context_esi, %esi
-	movl saved_context_edi, %edi
-
-	pushl saved_context_eflags
-	popfl
-
-	ret
-
-ENTRY(do_olpc_suspend_lowlevel)
-	call	save_processor_state
-	call	save_registers
-
-	# This is the stack context we want to remember
-	movl %esp, saved_context_esp
-
-	pushl	$3
-	call	xo1_do_sleep
-
-	jmp	wakeup_start
-	.p2align 4,,7
-ret_point:
-	movl    saved_context_esp, %esp
-
-	writepost 0x32
-
-	call	restore_registers
-	call	restore_processor_state
-	ret
-
-.data
-saved_gdt:             .long   0,0
-saved_idt:             .long   0,0
-saved_ldt:             .long   0
-saved_cr4:             .long   0
-saved_cr0:             .long   0
-saved_context_esp:     .long   0
-saved_context_edi:     .long   0
-saved_context_esi:     .long   0
-saved_context_ebx:     .long   0
-saved_context_ebp:     .long   0
-saved_context_eflags:  .long   0
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
deleted file mode 100644
index 1f8825bbaffbf7b87557de9ad1f6f1a67823cc4d..0000000000000000000000000000000000000000
--- a/arch/x86/platform/pvh/head.S
+++ /dev/null
@@ -1,177 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- * Copyright C 2016, Oracle and/or its affiliates. All rights reserved.
- */
-
-	.code32
-	.text
-#define _pa(x)          ((x) - __START_KERNEL_map)
-
-#include <linux/elfnote.h>
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/asm.h>
-#include <asm/boot.h>
-#include <asm/processor-flags.h>
-#include <asm/msr.h>
-#include <xen/interface/elfnote.h>
-
-	__HEAD
-
-/*
- * Entry point for PVH guests.
- *
- * Xen ABI specifies the following register state when we come here:
- *
- * - `ebx`: contains the physical memory address where the loader has placed
- *          the boot start info structure.
- * - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared.
- * - `cr4`: all bits are cleared.
- * - `cs `: must be a 32-bit read/execute code segment with a base of ‘0’
- *          and a limit of ‘0xFFFFFFFF’. The selector value is unspecified.
- * - `ds`, `es`: must be a 32-bit read/write data segment with a base of
- *               ‘0’ and a limit of ‘0xFFFFFFFF’. The selector values are all
- *               unspecified.
- * - `tr`: must be a 32-bit TSS (active) with a base of '0' and a limit
- *         of '0x67'.
- * - `eflags`: bit 17 (VM) must be cleared. Bit 9 (IF) must be cleared.
- *             Bit 8 (TF) must be cleared. Other bits are all unspecified.
- *
- * All other processor registers and flag bits are unspecified. The OS is in
- * charge of setting up it's own stack, GDT and IDT.
- */
-
-#define PVH_GDT_ENTRY_CS	1
-#define PVH_GDT_ENTRY_DS	2
-#define PVH_GDT_ENTRY_CANARY	3
-#define PVH_CS_SEL		(PVH_GDT_ENTRY_CS * 8)
-#define PVH_DS_SEL		(PVH_GDT_ENTRY_DS * 8)
-#define PVH_CANARY_SEL		(PVH_GDT_ENTRY_CANARY * 8)
-
-ENTRY(pvh_start_xen)
-	cld
-
-	lgdt (_pa(gdt))
-
-	mov $PVH_DS_SEL,%eax
-	mov %eax,%ds
-	mov %eax,%es
-	mov %eax,%ss
-
-	/* Stash hvm_start_info. */
-	mov $_pa(pvh_start_info), %edi
-	mov %ebx, %esi
-	mov _pa(pvh_start_info_sz), %ecx
-	shr $2,%ecx
-	rep
-	movsl
-
-	mov $_pa(early_stack_end), %esp
-
-	/* Enable PAE mode. */
-	mov %cr4, %eax
-	orl $X86_CR4_PAE, %eax
-	mov %eax, %cr4
-
-#ifdef CONFIG_X86_64
-	/* Enable Long mode. */
-	mov $MSR_EFER, %ecx
-	rdmsr
-	btsl $_EFER_LME, %eax
-	wrmsr
-
-	/* Enable pre-constructed page tables. */
-	mov $_pa(init_top_pgt), %eax
-	mov %eax, %cr3
-	mov $(X86_CR0_PG | X86_CR0_PE), %eax
-	mov %eax, %cr0
-
-	/* Jump to 64-bit mode. */
-	ljmp $PVH_CS_SEL, $_pa(1f)
-
-	/* 64-bit entry point. */
-	.code64
-1:
-	/* Set base address in stack canary descriptor. */
-	mov $MSR_GS_BASE,%ecx
-	mov $_pa(canary), %eax
-	xor %edx, %edx
-	wrmsr
-
-	call xen_prepare_pvh
-
-	/* startup_64 expects boot_params in %rsi. */
-	mov $_pa(pvh_bootparams), %rsi
-	mov $_pa(startup_64), %rax
-	jmp *%rax
-
-#else /* CONFIG_X86_64 */
-
-	/* Set base address in stack canary descriptor. */
-	movl $_pa(gdt_start),%eax
-	movl $_pa(canary),%ecx
-	movw %cx, (PVH_GDT_ENTRY_CANARY * 8) + 2(%eax)
-	shrl $16, %ecx
-	movb %cl, (PVH_GDT_ENTRY_CANARY * 8) + 4(%eax)
-	movb %ch, (PVH_GDT_ENTRY_CANARY * 8) + 7(%eax)
-
-	mov $PVH_CANARY_SEL,%eax
-	mov %eax,%gs
-
-	call mk_early_pgtbl_32
-
-	mov $_pa(initial_page_table), %eax
-	mov %eax, %cr3
-
-	mov %cr0, %eax
-	or $(X86_CR0_PG | X86_CR0_PE), %eax
-	mov %eax, %cr0
-
-	ljmp $PVH_CS_SEL, $1f
-1:
-	call xen_prepare_pvh
-	mov $_pa(pvh_bootparams), %esi
-
-	/* startup_32 doesn't expect paging and PAE to be on. */
-	ljmp $PVH_CS_SEL, $_pa(2f)
-2:
-	mov %cr0, %eax
-	and $~X86_CR0_PG, %eax
-	mov %eax, %cr0
-	mov %cr4, %eax
-	and $~X86_CR4_PAE, %eax
-	mov %eax, %cr4
-
-	ljmp $PVH_CS_SEL, $_pa(startup_32)
-#endif
-END(pvh_start_xen)
-
-	.section ".init.data","aw"
-	.balign 8
-gdt:
-	.word gdt_end - gdt_start
-	.long _pa(gdt_start)
-	.word 0
-gdt_start:
-	.quad 0x0000000000000000            /* NULL descriptor */
-#ifdef CONFIG_X86_64
-	.quad GDT_ENTRY(0xa09a, 0, 0xfffff) /* PVH_CS_SEL */
-#else
-	.quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* PVH_CS_SEL */
-#endif
-	.quad GDT_ENTRY(0xc092, 0, 0xfffff) /* PVH_DS_SEL */
-	.quad GDT_ENTRY(0x4090, 0, 0x18)    /* PVH_CANARY_SEL */
-gdt_end:
-
-	.balign 16
-canary:
-	.fill 48, 1, 0
-
-early_stack:
-	.fill BOOT_STACK_SIZE, 1, 0
-early_stack_end:
-
-	ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
-	             _ASM_PTR (pvh_start_xen - __START_KERNEL_map))
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
deleted file mode 100644
index a19ed3d231853f135c861b690aad3061545af455..0000000000000000000000000000000000000000
--- a/arch/x86/power/hibernate_asm_32.S
+++ /dev/null
@@ -1,112 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This may not use any stack, nor any variable that is not "NoSave":
- *
- * Its rewriting one kernel image with another. What is stack in "old"
- * image could very well be data page in "new" image, and overwriting
- * your own stack under you is bad idea.
- */
-
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/page_types.h>
-#include <asm/asm-offsets.h>
-#include <asm/processor-flags.h>
-#include <asm/frame.h>
-
-.text
-
-ENTRY(swsusp_arch_suspend)
-	movl %esp, saved_context_esp
-	movl %ebx, saved_context_ebx
-	movl %ebp, saved_context_ebp
-	movl %esi, saved_context_esi
-	movl %edi, saved_context_edi
-	pushfl
-	popl saved_context_eflags
-
-	/* save cr3 */
-	movl	%cr3, %eax
-	movl	%eax, restore_cr3
-
-	FRAME_BEGIN
-	call swsusp_save
-	FRAME_END
-	ret
-ENDPROC(swsusp_arch_suspend)
-
-SYM_CODE_START(restore_image)
-	/* prepare to jump to the image kernel */
-	movl	restore_jump_address, %ebx
-	movl	restore_cr3, %ebp
-
-	movl	mmu_cr4_features, %ecx
-
-	/* jump to relocated restore code */
-	movl	relocated_restore_code, %eax
-	jmpl	*%eax
-SYM_CODE_END(restore_image)
-
-/* code below has been relocated to a safe page */
-SYM_CODE_START(core_restore_code)
-	movl	temp_pgt, %eax
-	movl	%eax, %cr3
-
-	jecxz	1f	# cr4 Pentium and higher, skip if zero
-	andl	$~(X86_CR4_PGE), %ecx
-	movl	%ecx, %cr4;  # turn off PGE
-	movl	%cr3, %eax;  # flush TLB
-	movl	%eax, %cr3
-1:
-	movl	restore_pblist, %edx
-	.p2align 4,,7
-
-copy_loop:
-	testl	%edx, %edx
-	jz	done
-
-	movl	pbe_address(%edx), %esi
-	movl	pbe_orig_address(%edx), %edi
-
-	movl	$(PAGE_SIZE >> 2), %ecx
-	rep
-	movsl
-
-	movl	pbe_next(%edx), %edx
-	jmp	copy_loop
-	.p2align 4,,7
-
-done:
-	jmpl	*%ebx
-SYM_CODE_END(core_restore_code)
-
-	/* code below belongs to the image kernel */
-	.align PAGE_SIZE
-ENTRY(restore_registers)
-	/* go back to the original page tables */
-	movl	%ebp, %cr3
-	movl	mmu_cr4_features, %ecx
-	jecxz	1f	# cr4 Pentium and higher, skip if zero
-	movl	%ecx, %cr4;  # turn PGE back on
-1:
-
-	movl saved_context_esp, %esp
-	movl saved_context_ebp, %ebp
-	movl saved_context_ebx, %ebx
-	movl saved_context_esi, %esi
-	movl saved_context_edi, %edi
-
-	pushl saved_context_eflags
-	popfl
-
-	/* Saved in save_processor_state. */
-	movl $saved_context, %eax
-	lgdt saved_context_gdt_desc(%eax)
-
-	xorl	%eax, %eax
-
-	/* tell the hibernation core that we've just restored the memory */
-	movl	%eax, in_suspend
-
-	ret
-ENDPROC(restore_registers)
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
deleted file mode 100644
index a4d5eb0a7ece91569693ea54b5524b720850bf55..0000000000000000000000000000000000000000
--- a/arch/x86/power/hibernate_asm_64.S
+++ /dev/null
@@ -1,145 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Hibernation support for x86-64
- *
- * Copyright 2007 Rafael J. Wysocki <rjw@sisk.pl>
- * Copyright 2005 Andi Kleen <ak@suse.de>
- * Copyright 2004 Pavel Machek <pavel@suse.cz>
- *
- * swsusp_arch_resume must not use any stack or any nonlocal variables while
- * copying pages:
- *
- * Its rewriting one kernel image with another. What is stack in "old"
- * image could very well be data page in "new" image, and overwriting
- * your own stack under you is bad idea.
- */
-
-	.text
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/page_types.h>
-#include <asm/asm-offsets.h>
-#include <asm/processor-flags.h>
-#include <asm/frame.h>
-
-ENTRY(swsusp_arch_suspend)
-	movq	$saved_context, %rax
-	movq	%rsp, pt_regs_sp(%rax)
-	movq	%rbp, pt_regs_bp(%rax)
-	movq	%rsi, pt_regs_si(%rax)
-	movq	%rdi, pt_regs_di(%rax)
-	movq	%rbx, pt_regs_bx(%rax)
-	movq	%rcx, pt_regs_cx(%rax)
-	movq	%rdx, pt_regs_dx(%rax)
-	movq	%r8, pt_regs_r8(%rax)
-	movq	%r9, pt_regs_r9(%rax)
-	movq	%r10, pt_regs_r10(%rax)
-	movq	%r11, pt_regs_r11(%rax)
-	movq	%r12, pt_regs_r12(%rax)
-	movq	%r13, pt_regs_r13(%rax)
-	movq	%r14, pt_regs_r14(%rax)
-	movq	%r15, pt_regs_r15(%rax)
-	pushfq
-	popq	pt_regs_flags(%rax)
-
-	/* save cr3 */
-	movq	%cr3, %rax
-	movq	%rax, restore_cr3(%rip)
-
-	FRAME_BEGIN
-	call swsusp_save
-	FRAME_END
-	ret
-ENDPROC(swsusp_arch_suspend)
-
-ENTRY(restore_image)
-	/* prepare to jump to the image kernel */
-	movq	restore_jump_address(%rip), %r8
-	movq	restore_cr3(%rip), %r9
-
-	/* prepare to switch to temporary page tables */
-	movq	temp_pgt(%rip), %rax
-	movq	mmu_cr4_features(%rip), %rbx
-
-	/* prepare to copy image data to their original locations */
-	movq	restore_pblist(%rip), %rdx
-
-	/* jump to relocated restore code */
-	movq	relocated_restore_code(%rip), %rcx
-	jmpq	*%rcx
-
-	/* code below has been relocated to a safe page */
-ENTRY(core_restore_code)
-	/* switch to temporary page tables */
-	movq	%rax, %cr3
-	/* flush TLB */
-	movq	%rbx, %rcx
-	andq	$~(X86_CR4_PGE), %rcx
-	movq	%rcx, %cr4;  # turn off PGE
-	movq	%cr3, %rcx;  # flush TLB
-	movq	%rcx, %cr3;
-	movq	%rbx, %cr4;  # turn PGE back on
-.Lloop:
-	testq	%rdx, %rdx
-	jz	.Ldone
-
-	/* get addresses from the pbe and copy the page */
-	movq	pbe_address(%rdx), %rsi
-	movq	pbe_orig_address(%rdx), %rdi
-	movq	$(PAGE_SIZE >> 3), %rcx
-	rep
-	movsq
-
-	/* progress to the next pbe */
-	movq	pbe_next(%rdx), %rdx
-	jmp	.Lloop
-
-.Ldone:
-	/* jump to the restore_registers address from the image header */
-	jmpq	*%r8
-
-	 /* code below belongs to the image kernel */
-	.align PAGE_SIZE
-ENTRY(restore_registers)
-	/* go back to the original page tables */
-	movq    %r9, %cr3
-
-	/* Flush TLB, including "global" things (vmalloc) */
-	movq	mmu_cr4_features(%rip), %rax
-	movq	%rax, %rdx
-	andq	$~(X86_CR4_PGE), %rdx
-	movq	%rdx, %cr4;  # turn off PGE
-	movq	%cr3, %rcx;  # flush TLB
-	movq	%rcx, %cr3
-	movq	%rax, %cr4;  # turn PGE back on
-
-	/* We don't restore %rax, it must be 0 anyway */
-	movq	$saved_context, %rax
-	movq	pt_regs_sp(%rax), %rsp
-	movq	pt_regs_bp(%rax), %rbp
-	movq	pt_regs_si(%rax), %rsi
-	movq	pt_regs_di(%rax), %rdi
-	movq	pt_regs_bx(%rax), %rbx
-	movq	pt_regs_cx(%rax), %rcx
-	movq	pt_regs_dx(%rax), %rdx
-	movq	pt_regs_r8(%rax), %r8
-	movq	pt_regs_r9(%rax), %r9
-	movq	pt_regs_r10(%rax), %r10
-	movq	pt_regs_r11(%rax), %r11
-	movq	pt_regs_r12(%rax), %r12
-	movq	pt_regs_r13(%rax), %r13
-	movq	pt_regs_r14(%rax), %r14
-	movq	pt_regs_r15(%rax), %r15
-	pushq	pt_regs_flags(%rax)
-	popfq
-
-	/* Saved in save_processor_state. */
-	lgdt	saved_context_gdt_desc(%rax)
-
-	xorl	%eax, %eax
-
-	/* tell the hibernation core that we've just restored the memory */
-	movq	%rax, in_suspend(%rip)
-
-	ret
-ENDPROC(restore_registers)
diff --git a/arch/x86/purgatory/entry64.S b/arch/x86/purgatory/entry64.S
deleted file mode 100644
index 275a646d1048ca57314f393e08a7676089d58a88..0000000000000000000000000000000000000000
--- a/arch/x86/purgatory/entry64.S
+++ /dev/null
@@ -1,99 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2003,2004  Eric Biederman (ebiederm@xmission.com)
- * Copyright (C) 2014  Red Hat Inc.
-
- * Author(s): Vivek Goyal <vgoyal@redhat.com>
- *
- * This code has been taken from kexec-tools.
- */
-
-	.text
-	.balign 16
-	.code64
-	.globl entry64, entry64_regs
-
-
-entry64:
-	/* Setup a gdt that should be preserved */
-	lgdt gdt(%rip)
-
-	/* load the data segments */
-	movl    $0x18, %eax     /* data segment */
-	movl    %eax, %ds
-	movl    %eax, %es
-	movl    %eax, %ss
-	movl    %eax, %fs
-	movl    %eax, %gs
-
-	/* Setup new stack */
-	leaq    stack_init(%rip), %rsp
-	pushq   $0x10 /* CS */
-	leaq    new_cs_exit(%rip), %rax
-	pushq   %rax
-	lretq
-new_cs_exit:
-
-	/* Load the registers */
-	movq	rax(%rip), %rax
-	movq	rbx(%rip), %rbx
-	movq	rcx(%rip), %rcx
-	movq	rdx(%rip), %rdx
-	movq	rsi(%rip), %rsi
-	movq	rdi(%rip), %rdi
-	movq    rsp(%rip), %rsp
-	movq	rbp(%rip), %rbp
-	movq	r8(%rip), %r8
-	movq	r9(%rip), %r9
-	movq	r10(%rip), %r10
-	movq	r11(%rip), %r11
-	movq	r12(%rip), %r12
-	movq	r13(%rip), %r13
-	movq	r14(%rip), %r14
-	movq	r15(%rip), %r15
-
-	/* Jump to the new code... */
-	jmpq	*rip(%rip)
-
-	.section ".rodata"
-	.balign 4
-entry64_regs:
-rax:	.quad 0x0
-rcx:	.quad 0x0
-rdx:	.quad 0x0
-rbx:	.quad 0x0
-rsp:	.quad 0x0
-rbp:	.quad 0x0
-rsi:	.quad 0x0
-rdi:	.quad 0x0
-r8:	.quad 0x0
-r9:	.quad 0x0
-r10:	.quad 0x0
-r11:	.quad 0x0
-r12:	.quad 0x0
-r13:	.quad 0x0
-r14:	.quad 0x0
-r15:	.quad 0x0
-rip:	.quad 0x0
-	.size entry64_regs, . - entry64_regs
-
-	/* GDT */
-	.section ".rodata"
-	.balign 16
-gdt:
-	/* 0x00 unusable segment
-	 * 0x08 unused
-	 * so use them as gdt ptr
-	 */
-	.word gdt_end - gdt - 1
-	.quad gdt
-	.word 0, 0, 0
-
-	/* 0x10 4GB flat code segment */
-	.word 0xFFFF, 0x0000, 0x9A00, 0x00AF
-
-	/* 0x18 4GB flat data segment */
-	.word 0xFFFF, 0x0000, 0x9200, 0x00CF
-gdt_end:
-stack:	.quad   0, 0
-stack_init:
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
deleted file mode 100644
index 321146be741d7f20f729ecdfffa02a8fcba6ffe5..0000000000000000000000000000000000000000
--- a/arch/x86/purgatory/setup-x86_64.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * purgatory:  setup code
- *
- * Copyright (C) 2003,2004  Eric Biederman (ebiederm@xmission.com)
- * Copyright (C) 2014 Red Hat Inc.
- *
- * This code has been taken from kexec-tools.
- */
-#include <asm/purgatory.h>
-
-	.text
-	.globl purgatory_start
-	.balign 16
-purgatory_start:
-	.code64
-
-	/* Load a gdt so I know what the segment registers are */
-	lgdt	gdt(%rip)
-
-	/* load the data segments */
-	movl	$0x18, %eax	/* data segment */
-	movl	%eax, %ds
-	movl	%eax, %es
-	movl	%eax, %ss
-	movl	%eax, %fs
-	movl	%eax, %gs
-
-	/* Setup a stack */
-	leaq	lstack_end(%rip), %rsp
-
-	/* Call the C code */
-	call purgatory
-	jmp	entry64
-
-	.section ".rodata"
-	.balign 16
-gdt:	/* 0x00 unusable segment
-	 * 0x08 unused
-	 * so use them as the gdt ptr
-	 */
-	.word	gdt_end - gdt - 1
-	.quad	gdt
-	.word	0, 0, 0
-
-	/* 0x10 4GB flat code segment */
-	.word	0xFFFF, 0x0000, 0x9A00, 0x00AF
-
-	/* 0x18 4GB flat data segment */
-	.word	0xFFFF, 0x0000, 0x9200, 0x00CF
-gdt_end:
-
-	.bss
-	.balign 4096
-lstack:
-	.skip 4096
-lstack_end:
diff --git a/arch/x86/purgatory/stack.S b/arch/x86/purgatory/stack.S
deleted file mode 100644
index 8b1427422dfce5117b91272f38513668aea2a54a..0000000000000000000000000000000000000000
--- a/arch/x86/purgatory/stack.S
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * purgatory:  stack
- *
- * Copyright (C) 2014 Red Hat Inc.
- */
-
-	/* A stack for the loaded kernel.
-	 * Separate and in the data section so it can be prepopulated.
-	 */
-	.data
-	.balign 4096
-	.globl stack, stack_end
-
-stack:
-	.skip 4096
-stack_end:
diff --git a/arch/x86/realmode/rm/bioscall.S b/arch/x86/realmode/rm/bioscall.S
deleted file mode 100644
index 16162d19791888cfdd5527666ca7fb8353efd9c6..0000000000000000000000000000000000000000
--- a/arch/x86/realmode/rm/bioscall.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../boot/bioscall.S"
diff --git a/arch/x86/realmode/rm/copy.S b/arch/x86/realmode/rm/copy.S
deleted file mode 100644
index b785e6f38fdd0505b38eb9a1e214cc5b5909d8cb..0000000000000000000000000000000000000000
--- a/arch/x86/realmode/rm/copy.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../boot/copy.S"
diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S
deleted file mode 100644
index 6363761cc74cf59b22b9c896590b06afdf30721b..0000000000000000000000000000000000000000
--- a/arch/x86/realmode/rm/header.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Real-mode blob header; this should match realmode.h and be
- * readonly; for mutable data instead add pointers into the .data
- * or .bss sections as appropriate.
- */
-
-#include <linux/linkage.h>
-#include <asm/page_types.h>
-#include <asm/segment.h>
-
-#include "realmode.h"
-	
-	.section ".header", "a"
-
-	.balign	16
-GLOBAL(real_mode_header)
-	.long	pa_text_start
-	.long	pa_ro_end
-	/* SMP trampoline */
-	.long	pa_trampoline_start
-	.long	pa_trampoline_header
-#ifdef CONFIG_X86_64
-	.long	pa_trampoline_pgd;
-#endif
-	/* ACPI S3 wakeup */
-#ifdef CONFIG_ACPI_SLEEP
-	.long	pa_wakeup_start
-	.long	pa_wakeup_header
-#endif
-	/* APM/BIOS reboot */
-	.long	pa_machine_real_restart_asm
-#ifdef CONFIG_X86_64
-	.long	__KERNEL32_CS
-#endif
-END(real_mode_header)
-
-	/* End signature, used to verify integrity */
-	.section ".signature","a"
-	.balign 4
-GLOBAL(end_signature)
-	.long	REALMODE_END_SIGNATURE
-END(end_signature)
diff --git a/arch/x86/realmode/rm/realmode.lds.S b/arch/x86/realmode/rm/realmode.lds.S
deleted file mode 100644
index 3bb980800c5811e91c88c5ea3495d55371c4ffd6..0000000000000000000000000000000000000000
--- a/arch/x86/realmode/rm/realmode.lds.S
+++ /dev/null
@@ -1,77 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * realmode.lds.S
- *
- * Linker script for the real-mode code
- */
-
-#include <asm/page_types.h>
-
-#undef i386
-
-OUTPUT_FORMAT("elf32-i386")
-OUTPUT_ARCH(i386)
-
-SECTIONS
-{
-	real_mode_seg = 0;
-
-	. = 0;
-	.header : {
-		pa_real_mode_base = .;
-		*(.header)
-	}
-
-	. = ALIGN(4);
-	.rodata : {
-		*(.rodata)
-		*(.rodata.*)
-		. = ALIGN(16);
-		video_cards = .;
-		*(.videocards)
-		video_cards_end = .;
-	}
-
-	. = ALIGN(PAGE_SIZE);
-	pa_text_start = .;
-	.text : {
-		*(.text)
-		*(.text.*)
-	}
-
-	.text32 : {
-		*(.text32)
-		*(.text32.*)
-	}
-
-	.text64 : {
-		*(.text64)
-		*(.text64.*)
-	}
-	pa_ro_end = .;
-
-	. = ALIGN(PAGE_SIZE);
-	.data : {
-		*(.data)
-		*(.data.*)
-	}
-
-	. = ALIGN(128);
-	.bss : {
-		*(.bss*)
-	}
-
-	/* End signature for integrity checking */
-	. = ALIGN(4);
-	.signature : {
-		*(.signature)
-	}
-
-	/DISCARD/ : {
-		*(.note*)
-		*(.debug*)
-		*(.eh_frame*)
-	}
-
-#include "pasyms.h"
-}
diff --git a/arch/x86/realmode/rm/reboot.S b/arch/x86/realmode/rm/reboot.S
deleted file mode 100644
index cd2f97b9623baeec44b4d3128d613f82802e439b..0000000000000000000000000000000000000000
--- a/arch/x86/realmode/rm/reboot.S
+++ /dev/null
@@ -1,156 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/page_types.h>
-#include <asm/processor-flags.h>
-#include <asm/msr-index.h>
-#include "realmode.h"
-
-/*
- * The following code and data reboots the machine by switching to real
- * mode and jumping to the BIOS reset entry point, as if the CPU has
- * really been reset.  The previous version asked the keyboard
- * controller to pulse the CPU reset line, which is more thorough, but
- * doesn't work with at least one type of 486 motherboard.  It is easy
- * to stop this code working; hence the copious comments.
- *
- * This code is called with the restart type (0 = BIOS, 1 = APM) in
- * the primary argument register (%eax for 32 bit, %edi for 64 bit).
- */
-	.section ".text32", "ax"
-	.code32
-ENTRY(machine_real_restart_asm)
-
-#ifdef CONFIG_X86_64
-	/* Switch to trampoline GDT as it is guaranteed < 4 GiB */
-	movl	$__KERNEL_DS, %eax
-	movl	%eax, %ds
-	lgdtl	pa_tr_gdt
-
-	/* Disable paging to drop us out of long mode */
-	movl	%cr0, %eax
-	andl	$~X86_CR0_PG, %eax
-	movl	%eax, %cr0
-	ljmpl	$__KERNEL32_CS, $pa_machine_real_restart_paging_off
-
-GLOBAL(machine_real_restart_paging_off)
-	xorl	%eax, %eax
-	xorl	%edx, %edx
-	movl	$MSR_EFER, %ecx
-	wrmsr
-
-	movl	%edi, %eax
-	
-#endif /* CONFIG_X86_64 */
-	
-	/* Set up the IDT for real mode. */
-	lidtl	pa_machine_real_restart_idt
-
-	/*
-	 * Set up a GDT from which we can load segment descriptors for real
-	 * mode.  The GDT is not used in real mode; it is just needed here to
-	 * prepare the descriptors.
-	 */
-	lgdtl	pa_machine_real_restart_gdt
-
-	/*
-	 * Load the data segment registers with 16-bit compatible values
-	 */
-	movl	$16, %ecx
-	movl	%ecx, %ds
-	movl	%ecx, %es
-	movl	%ecx, %fs
-	movl	%ecx, %gs
-	movl	%ecx, %ss
-	ljmpw	$8, $1f
-
-/*
- * This is 16-bit protected mode code to disable paging and the cache,
- * switch to real mode and jump to the BIOS reset code.
- *
- * The instruction that switches to real mode by writing to CR0 must be
- * followed immediately by a far jump instruction, which set CS to a
- * valid value for real mode, and flushes the prefetch queue to avoid
- * running instructions that have already been decoded in protected
- * mode.
- *
- * Clears all the flags except ET, especially PG (paging), PE
- * (protected-mode enable) and TS (task switch for coprocessor state
- * save).  Flushes the TLB after paging has been disabled.  Sets CD and
- * NW, to disable the cache on a 486, and invalidates the cache.  This
- * is more like the state of a 486 after reset.  I don't know if
- * something else should be done for other chips.
- *
- * More could be done here to set up the registers as if a CPU reset had
- * occurred; hopefully real BIOSs don't assume much.  This is not the
- * actual BIOS entry point, anyway (that is at 0xfffffff0).
- *
- * Most of this work is probably excessive, but it is what is tested.
- */
-	.text
-	.code16
-
-	.balign	16
-machine_real_restart_asm16:
-1:
-	xorl	%ecx, %ecx
-	movl	%cr0, %edx
-	andl	$0x00000011, %edx
-	orl	$0x60000000, %edx
-	movl	%edx, %cr0
-	movl	%ecx, %cr3
-	movl	%cr0, %edx
-	testl	$0x60000000, %edx	/* If no cache bits -> no wbinvd */
-	jz	2f
-	wbinvd
-2:
-	andb	$0x10, %dl
-	movl	%edx, %cr0
-	LJMPW_RM(3f)
-3:
-	andw	%ax, %ax
-	jz	bios
-
-apm:
-	movw	$0x1000, %ax
-	movw	%ax, %ss
-	movw	$0xf000, %sp
-	movw	$0x5307, %ax
-	movw	$0x0001, %bx
-	movw	$0x0003, %cx
-	int	$0x15
-	/* This should never return... */
-
-bios:
-	ljmpw	$0xf000, $0xfff0
-
-	.section ".rodata", "a"
-
-	.balign	16
-GLOBAL(machine_real_restart_idt)
-	.word	0xffff		/* Length - real mode default value */
-	.long	0		/* Base - real mode default value */
-END(machine_real_restart_idt)
-
-	.balign	16
-GLOBAL(machine_real_restart_gdt)
-	/* Self-pointer */
-	.word	0xffff		/* Length - real mode default value */
-	.long	pa_machine_real_restart_gdt
-	.word	0
-
-	/*
-	 * 16-bit code segment pointing to real_mode_seg
-	 * Selector value 8
-	 */
-	.word	0xffff		/* Limit */
-	.long	0x9b000000 + pa_real_mode_base
-	.word	0
-
-	/*
-	 * 16-bit data segment with the selector value 16 = 0x10 and
-	 * base value 0x100; since this is consistent with real mode
-	 * semantics we don't have to reload the segments once CR0.PE = 0.
-	 */
-	.quad	GDT_ENTRY(0x0093, 0x100, 0xffff)
-END(machine_real_restart_gdt)
diff --git a/arch/x86/realmode/rm/stack.S b/arch/x86/realmode/rm/stack.S
deleted file mode 100644
index 8d4cb64799eaeaacc766b19fff8508b59a622d02..0000000000000000000000000000000000000000
--- a/arch/x86/realmode/rm/stack.S
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Common heap and stack allocations
- */
-
-#include <linux/linkage.h>
-
-	.data
-GLOBAL(HEAP)
-	.long	rm_heap
-GLOBAL(heap_end)
-	.long	rm_stack
-
-	.bss
-	.balign	16
-GLOBAL(rm_heap)
-	.space	2048
-GLOBAL(rm_stack)
-	.space	2048
-GLOBAL(rm_stack_end)
diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S
deleted file mode 100644
index 3a0ef0d5773443b08dd0dd9ef2be8284fdd44b09..0000000000000000000000000000000000000000
--- a/arch/x86/realmode/rm/trampoline_32.S
+++ /dev/null
@@ -1,73 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- *	Trampoline.S	Derived from Setup.S by Linus Torvalds
- *
- *	4 Jan 1997 Michael Chastain: changed to gnu as.
- *
- *	This is only used for booting secondary CPUs in SMP machine
- *
- *	Entry: CS:IP point to the start of our code, we are
- *	in real mode with no stack, but the rest of the
- *	trampoline page to make our stack and everything else
- *	is a mystery.
- *
- *	We jump into arch/x86/kernel/head_32.S.
- *
- *	On entry to trampoline_start, the processor is in real mode
- *	with 16-bit addressing and 16-bit data.  CS has some value
- *	and IP is zero.  Thus, we load CS to the physical segment
- *	of the real mode code before doing anything further.
- */
-
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/page_types.h>
-#include "realmode.h"
-
-	.text
-	.code16
-
-	.balign	PAGE_SIZE
-SYM_CODE_START(trampoline_start)
-	wbinvd			# Needed for NUMA-Q should be harmless for others
-
-	LJMPW_RM(1f)
-1:
-	mov	%cs, %ax	# Code and data in the same place
-	mov	%ax, %ds
-
-	cli			# We should be safe anyway
-
-	movl	tr_start, %eax	# where we need to go
-
-	/*
-	 * GDT tables in non default location kernel can be beyond 16MB and
-	 * lgdt will not be able to load the address as in real mode default
-	 * operand size is 16bit. Use lgdtl instead to force operand size
-	 * to 32 bit.
-	 */
-	lidtl	tr_idt			# load idt with 0, 0
-	lgdtl	tr_gdt			# load gdt with whatever is appropriate
-
-	movw	$1, %dx			# protected mode (PE) bit
-	lmsw	%dx			# into protected mode
-
-	ljmpl	$__BOOT_CS, $pa_startup_32
-SYM_CODE_END(trampoline_start)
-
-	.section ".text32","ax"
-	.code32
-SYM_CODE_START(startup_32)			# note: also used from wakeup_asm.S
-	jmp	*%eax
-SYM_CODE_END(startup_32)
-
-	.bss
-	.balign 8
-GLOBAL(trampoline_header)
-	tr_start:		.space	4
-	tr_gdt_pad:		.space	2
-	tr_gdt:			.space	6
-END(trampoline_header)
-	
-#include "trampoline_common.S"
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
deleted file mode 100644
index aee2b45d83b879ac7475024de38c9b9cfca7f45b..0000000000000000000000000000000000000000
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ /dev/null
@@ -1,174 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *
- *	Trampoline.S	Derived from Setup.S by Linus Torvalds
- *
- *	4 Jan 1997 Michael Chastain: changed to gnu as.
- *	15 Sept 2005 Eric Biederman: 64bit PIC support
- *
- *	Entry: CS:IP point to the start of our code, we are
- *	in real mode with no stack, but the rest of the
- *	trampoline page to make our stack and everything else
- *	is a mystery.
- *
- *	On entry to trampoline_start, the processor is in real mode
- *	with 16-bit addressing and 16-bit data.  CS has some value
- *	and IP is zero.  Thus, data addresses need to be absolute
- *	(no relocation) and are taken with regard to r_base.
- *
- *	With the addition of trampoline_level4_pgt this code can
- *	now enter a 64bit kernel that lives at arbitrary 64bit
- *	physical addresses.
- *
- *	If you work on this file, check the object module with objdump
- *	--full-contents --reloc to make sure there are no relocation
- *	entries.
- */
-
-#include <linux/linkage.h>
-#include <asm/pgtable_types.h>
-#include <asm/page_types.h>
-#include <asm/msr.h>
-#include <asm/segment.h>
-#include <asm/processor-flags.h>
-#include <asm/realmode.h>
-#include "realmode.h"
-
-	.text
-	.code16
-
-	.balign	PAGE_SIZE
-ENTRY(trampoline_start)
-	cli			# We should be safe anyway
-	wbinvd
-
-	LJMPW_RM(1f)
-1:
-	mov	%cs, %ax	# Code and data in the same place
-	mov	%ax, %ds
-	mov	%ax, %es
-	mov	%ax, %ss
-
-	# Setup stack
-	movl	$rm_stack_end, %esp
-
-	call	verify_cpu		# Verify the cpu supports long mode
-	testl   %eax, %eax		# Check for return code
-	jnz	no_longmode
-
-	/*
-	 * GDT tables in non default location kernel can be beyond 16MB and
-	 * lgdt will not be able to load the address as in real mode default
-	 * operand size is 16bit. Use lgdtl instead to force operand size
-	 * to 32 bit.
-	 */
-
-	lidtl	tr_idt	# load idt with 0, 0
-	lgdtl	tr_gdt	# load gdt with whatever is appropriate
-
-	movw	$__KERNEL_DS, %dx	# Data segment descriptor
-
-	# Enable protected mode
-	movl	$X86_CR0_PE, %eax	# protected mode (PE) bit
-	movl	%eax, %cr0		# into protected mode
-
-	# flush prefetch and jump to startup_32
-	ljmpl	$__KERNEL32_CS, $pa_startup_32
-
-no_longmode:
-	hlt
-	jmp no_longmode
-#include "../kernel/verify_cpu.S"
-
-	.section ".text32","ax"
-	.code32
-	.balign 4
-ENTRY(startup_32)
-	movl	%edx, %ss
-	addl	$pa_real_mode_base, %esp
-	movl	%edx, %ds
-	movl	%edx, %es
-	movl	%edx, %fs
-	movl	%edx, %gs
-
-	/*
-	 * Check for memory encryption support. This is a safety net in
-	 * case BIOS hasn't done the necessary step of setting the bit in
-	 * the MSR for this AP. If SME is active and we've gotten this far
-	 * then it is safe for us to set the MSR bit and continue. If we
-	 * don't we'll eventually crash trying to execute encrypted
-	 * instructions.
-	 */
-	btl	$TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
-	jnc	.Ldone
-	movl	$MSR_K8_SYSCFG, %ecx
-	rdmsr
-	bts	$MSR_K8_SYSCFG_MEM_ENCRYPT_BIT, %eax
-	jc	.Ldone
-
-	/*
-	 * Memory encryption is enabled but the SME enable bit for this
-	 * CPU has has not been set.  It is safe to set it, so do so.
-	 */
-	wrmsr
-.Ldone:
-
-	movl	pa_tr_cr4, %eax
-	movl	%eax, %cr4		# Enable PAE mode
-
-	# Setup trampoline 4 level pagetables
-	movl	$pa_trampoline_pgd, %eax
-	movl	%eax, %cr3
-
-	# Set up EFER
-	movl	pa_tr_efer, %eax
-	movl	pa_tr_efer + 4, %edx
-	movl	$MSR_EFER, %ecx
-	wrmsr
-
-	# Enable paging and in turn activate Long Mode
-	movl	$(X86_CR0_PG | X86_CR0_WP | X86_CR0_PE), %eax
-	movl	%eax, %cr0
-
-	/*
-	 * At this point we're in long mode but in 32bit compatibility mode
-	 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
-	 * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use
-	 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
-	 */
-	ljmpl	$__KERNEL_CS, $pa_startup_64
-
-	.section ".text64","ax"
-	.code64
-	.balign 4
-ENTRY(startup_64)
-	# Now jump into the kernel using virtual addresses
-	jmpq	*tr_start(%rip)
-
-	.section ".rodata","a"
-	# Duplicate the global descriptor table
-	# so the kernel can live anywhere
-	.balign	16
-	.globl tr_gdt
-tr_gdt:
-	.short	tr_gdt_end - tr_gdt - 1	# gdt limit
-	.long	pa_tr_gdt
-	.short	0
-	.quad	0x00cf9b000000ffff	# __KERNEL32_CS
-	.quad	0x00af9b000000ffff	# __KERNEL_CS
-	.quad	0x00cf93000000ffff	# __KERNEL_DS
-tr_gdt_end:
-
-	.bss
-	.balign	PAGE_SIZE
-GLOBAL(trampoline_pgd)		.space	PAGE_SIZE
-
-	.balign	8
-GLOBAL(trampoline_header)
-	tr_start:		.space	8
-	GLOBAL(tr_efer)		.space	8
-	GLOBAL(tr_cr4)		.space	4
-	GLOBAL(tr_flags)	.space	4
-END(trampoline_header)
-
-#include "trampoline_common.S"
diff --git a/arch/x86/realmode/rm/trampoline_common.S b/arch/x86/realmode/rm/trampoline_common.S
deleted file mode 100644
index 8d8208dcca244e7c2b2f23518c585816799cdbb9..0000000000000000000000000000000000000000
--- a/arch/x86/realmode/rm/trampoline_common.S
+++ /dev/null
@@ -1,4 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.section ".rodata","a"
-	.balign	16
-tr_idt: .fill 1, 6, 0
diff --git a/arch/x86/realmode/rm/wakeup_asm.S b/arch/x86/realmode/rm/wakeup_asm.S
deleted file mode 100644
index 05ac9c17c8111b9c9672386fcddaa5dcca1dd98e..0000000000000000000000000000000000000000
--- a/arch/x86/realmode/rm/wakeup_asm.S
+++ /dev/null
@@ -1,178 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ACPI wakeup real mode startup stub
- */
-#include <linux/linkage.h>
-#include <asm/segment.h>
-#include <asm/msr-index.h>
-#include <asm/page_types.h>
-#include <asm/pgtable_types.h>
-#include <asm/processor-flags.h>
-#include "realmode.h"
-#include "wakeup.h"
-
-	.code16
-
-/* This should match the structure in wakeup.h */
-	.section ".data", "aw"
-
-	.balign	16
-GLOBAL(wakeup_header)
-	video_mode:	.short	0	/* Video mode number */
-	pmode_entry:	.long	0
-	pmode_cs:	.short	__KERNEL_CS
-	pmode_cr0:	.long	0	/* Saved %cr0 */
-	pmode_cr3:	.long	0	/* Saved %cr3 */
-	pmode_cr4:	.long	0	/* Saved %cr4 */
-	pmode_efer:	.quad	0	/* Saved EFER */
-	pmode_gdt:	.quad	0
-	pmode_misc_en:	.quad	0	/* Saved MISC_ENABLE MSR */
-	pmode_behavior:	.long	0	/* Wakeup behavior flags */
-	realmode_flags:	.long	0
-	real_magic:	.long	0
-	signature:	.long	WAKEUP_HEADER_SIGNATURE
-END(wakeup_header)
-
-	.text
-	.code16
-
-	.balign	16
-ENTRY(wakeup_start)
-	cli
-	cld
-
-	LJMPW_RM(3f)
-3:
-	/* Apparently some dimwit BIOS programmers don't know how to
-	   program a PM to RM transition, and we might end up here with
-	   junk in the data segment descriptor registers.  The only way
-	   to repair that is to go into PM and fix it ourselves... */
-	movw	$16, %cx
-	lgdtl	%cs:wakeup_gdt
-	movl	%cr0, %eax
-	orb	$X86_CR0_PE, %al
-	movl	%eax, %cr0
-	ljmpw	$8, $2f
-2:
-	movw	%cx, %ds
-	movw	%cx, %es
-	movw	%cx, %ss
-	movw	%cx, %fs
-	movw	%cx, %gs
-
-	andb	$~X86_CR0_PE, %al
-	movl	%eax, %cr0
-	LJMPW_RM(3f)
-3:
-	/* Set up segments */
-	movw	%cs, %ax
-	movw	%ax, %ss
-	movl	$rm_stack_end, %esp
-	movw	%ax, %ds
-	movw	%ax, %es
-	movw	%ax, %fs
-	movw	%ax, %gs
-
-	lidtl	wakeup_idt
-
-	/* Clear the EFLAGS */
-	pushl $0
-	popfl
-
-	/* Check header signature... */
-	movl	signature, %eax
-	cmpl	$WAKEUP_HEADER_SIGNATURE, %eax
-	jne	bogus_real_magic
-
-	/* Check we really have everything... */
-	movl	end_signature, %eax
-	cmpl	$REALMODE_END_SIGNATURE, %eax
-	jne	bogus_real_magic
-
-	/* Call the C code */
-	calll	main
-
-	/* Restore MISC_ENABLE before entering protected mode, in case
-	   BIOS decided to clear XD_DISABLE during S3. */
-	movl	pmode_behavior, %edi
-	btl	$WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %edi
-	jnc	1f
-
-	movl	pmode_misc_en, %eax
-	movl	pmode_misc_en + 4, %edx
-	movl	$MSR_IA32_MISC_ENABLE, %ecx
-	wrmsr
-1:
-
-	/* Do any other stuff... */
-
-#ifndef CONFIG_64BIT
-	/* This could also be done in C code... */
-	movl	pmode_cr3, %eax
-	movl	%eax, %cr3
-
-	btl	$WAKEUP_BEHAVIOR_RESTORE_CR4, %edi
-	jnc	1f
-	movl	pmode_cr4, %eax
-	movl	%eax, %cr4
-1:
-	btl	$WAKEUP_BEHAVIOR_RESTORE_EFER, %edi
-	jnc	1f
-	movl	pmode_efer, %eax
-	movl	pmode_efer + 4, %edx
-	movl	$MSR_EFER, %ecx
-	wrmsr
-1:
-
-	lgdtl	pmode_gdt
-
-	/* This really couldn't... */
-	movl	pmode_entry, %eax
-	movl	pmode_cr0, %ecx
-	movl	%ecx, %cr0
-	ljmpl	$__KERNEL_CS, $pa_startup_32
-	/* -> jmp *%eax in trampoline_32.S */
-#else
-	jmp	trampoline_start
-#endif
-
-bogus_real_magic:
-1:
-	hlt
-	jmp	1b
-
-	.section ".rodata","a"
-
-	/*
-	 * Set up the wakeup GDT.  We set these up as Big Real Mode,
-	 * that is, with limits set to 4 GB.  At least the Lenovo
-	 * Thinkpad X61 is known to need this for the video BIOS
-	 * initialization quirk to work; this is likely to also
-	 * be the case for other laptops or integrated video devices.
-	 */
-
-	.balign	16
-GLOBAL(wakeup_gdt)
-	.word	3*8-1		/* Self-descriptor */
-	.long	pa_wakeup_gdt
-	.word	0
-
-	.word	0xffff		/* 16-bit code segment @ real_mode_base */
-	.long	0x9b000000 + pa_real_mode_base
-	.word	0x008f		/* big real mode */
-
-	.word	0xffff		/* 16-bit data segment @ real_mode_base */
-	.long	0x93000000 + pa_real_mode_base
-	.word	0x008f		/* big real mode */
-END(wakeup_gdt)
-
-	.section ".rodata","a"
-	.balign	8
-
-	/* This is the standard real-mode IDT */
-	.balign	16
-GLOBAL(wakeup_idt)
-	.word	0xffff		/* limit */
-	.long	0		/* address */
-	.word	0
-END(wakeup_idt)
diff --git a/arch/x86/realmode/rmpiggy.S b/arch/x86/realmode/rmpiggy.S
deleted file mode 100644
index c078dba40cef89a2d95ab4dc65e903606392b650..0000000000000000000000000000000000000000
--- a/arch/x86/realmode/rmpiggy.S
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Wrapper script for the realmode binary as a transport object
- * before copying to low memory.
- */
-#include <linux/linkage.h>
-#include <asm/page_types.h>
-
-	.section ".init.data","aw"
-
-	.balign PAGE_SIZE
-
-GLOBAL(real_mode_blob)
-	.incbin	"arch/x86/realmode/rm/realmode.bin"
-END(real_mode_blob)
-
-GLOBAL(real_mode_blob_end);
-
-GLOBAL(real_mode_relocs)
-	.incbin	"arch/x86/realmode/rm/realmode.relocs"
-END(real_mode_relocs)
diff --git a/arch/x86/um/checksum_32.S b/arch/x86/um/checksum_32.S
deleted file mode 100644
index 13f118dec74f869c2c22279f5e4eca999485e3bc..0000000000000000000000000000000000000000
--- a/arch/x86/um/checksum_32.S
+++ /dev/null
@@ -1,214 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * INET		An implementation of the TCP/IP protocol suite for the LINUX
- *		operating system.  INET is implemented using the  BSD Socket
- *		interface as the means of communication with the user level.
- *
- *		IP/TCP/UDP checksumming routines
- *
- * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
- *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- *		Tom May, <ftom@netcom.com>
- *              Pentium Pro/II routines:
- *              Alexander Kjeldaas <astor@guardian.no>
- *              Finn Arne Gangstad <finnag@guardian.no>
- *		Lots of code moved from tcp.c and ip.c; see those files
- *		for more names.
- *
- * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
- *			     handling.
- *		Andi Kleen,  add zeroing on error
- *                   converted to pure assembler
- */
-
-#include <asm/errno.h>
-#include <asm/asm.h>
-#include <asm/export.h>
-				
-/*
- * computes a partial checksum, e.g. for TCP/UDP fragments
- */
-
-/*	
-unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
- */
-		
-.text
-.align 4
-.globl csum_partial
-		
-#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
-
-	  /*		
-	   * Experiments with Ethernet and SLIP connections show that buff
-	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
-	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
-	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
-	   * alignment for the unrolled loop.
-	   */		
-csum_partial:
-	pushl %esi
-	pushl %ebx
-	movl 20(%esp),%eax	# Function arg: unsigned int sum
-	movl 16(%esp),%ecx	# Function arg: int len
-	movl 12(%esp),%esi	# Function arg: unsigned char *buff
-	testl $2, %esi		# Check alignment.
-	jz 2f			# Jump if alignment is ok.
-	subl $2, %ecx		# Alignment uses up two bytes.
-	jae 1f			# Jump if we had at least two bytes.
-	addl $2, %ecx		# ecx was < 2.  Deal with it.
-	jmp 4f
-1:	movw (%esi), %bx
-	addl $2, %esi
-	addw %bx, %ax
-	adcl $0, %eax
-2:
-	movl %ecx, %edx
-	shrl $5, %ecx
-	jz 2f
-	testl %esi, %esi
-1:	movl (%esi), %ebx
-	adcl %ebx, %eax
-	movl 4(%esi), %ebx
-	adcl %ebx, %eax
-	movl 8(%esi), %ebx
-	adcl %ebx, %eax
-	movl 12(%esi), %ebx
-	adcl %ebx, %eax
-	movl 16(%esi), %ebx
-	adcl %ebx, %eax
-	movl 20(%esi), %ebx
-	adcl %ebx, %eax
-	movl 24(%esi), %ebx
-	adcl %ebx, %eax
-	movl 28(%esi), %ebx
-	adcl %ebx, %eax
-	lea 32(%esi), %esi
-	dec %ecx
-	jne 1b
-	adcl $0, %eax
-2:	movl %edx, %ecx
-	andl $0x1c, %edx
-	je 4f
-	shrl $2, %edx		# This clears CF
-3:	adcl (%esi), %eax
-	lea 4(%esi), %esi
-	dec %edx
-	jne 3b
-	adcl $0, %eax
-4:	andl $3, %ecx
-	jz 7f
-	cmpl $2, %ecx
-	jb 5f
-	movw (%esi),%cx
-	leal 2(%esi),%esi
-	je 6f
-	shll $16,%ecx
-5:	movb (%esi),%cl
-6:	addl %ecx,%eax
-	adcl $0, %eax 
-7:	
-	popl %ebx
-	popl %esi
-	ret
-
-#else
-
-/* Version for PentiumII/PPro */
-
-csum_partial:
-	pushl %esi
-	pushl %ebx
-	movl 20(%esp),%eax	# Function arg: unsigned int sum
-	movl 16(%esp),%ecx	# Function arg: int len
-	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
-
-	testl $2, %esi         
-	jnz 30f                 
-10:
-	movl %ecx, %edx
-	movl %ecx, %ebx
-	andl $0x7c, %ebx
-	shrl $7, %ecx
-	addl %ebx,%esi
-	shrl $2, %ebx  
-	negl %ebx
-	lea 45f(%ebx,%ebx,2), %ebx
-	testl %esi, %esi
-	jmp *%ebx
-
-	# Handle 2-byte-aligned regions
-20:	addw (%esi), %ax
-	lea 2(%esi), %esi
-	adcl $0, %eax
-	jmp 10b
-
-30:	subl $2, %ecx          
-	ja 20b                 
-	je 32f
-	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
-	addl %ebx, %eax
-	adcl $0, %eax
-	jmp 80f
-32:
-	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
-	adcl $0, %eax
-	jmp 80f
-
-40: 
-	addl -128(%esi), %eax
-	adcl -124(%esi), %eax
-	adcl -120(%esi), %eax
-	adcl -116(%esi), %eax   
-	adcl -112(%esi), %eax   
-	adcl -108(%esi), %eax
-	adcl -104(%esi), %eax
-	adcl -100(%esi), %eax
-	adcl -96(%esi), %eax
-	adcl -92(%esi), %eax
-	adcl -88(%esi), %eax
-	adcl -84(%esi), %eax
-	adcl -80(%esi), %eax
-	adcl -76(%esi), %eax
-	adcl -72(%esi), %eax
-	adcl -68(%esi), %eax
-	adcl -64(%esi), %eax     
-	adcl -60(%esi), %eax     
-	adcl -56(%esi), %eax     
-	adcl -52(%esi), %eax   
-	adcl -48(%esi), %eax   
-	adcl -44(%esi), %eax
-	adcl -40(%esi), %eax
-	adcl -36(%esi), %eax
-	adcl -32(%esi), %eax
-	adcl -28(%esi), %eax
-	adcl -24(%esi), %eax
-	adcl -20(%esi), %eax
-	adcl -16(%esi), %eax
-	adcl -12(%esi), %eax
-	adcl -8(%esi), %eax
-	adcl -4(%esi), %eax
-45:
-	lea 128(%esi), %esi
-	adcl $0, %eax
-	dec %ecx
-	jge 40b
-	movl %edx, %ecx
-50:	andl $3, %ecx
-	jz 80f
-
-	# Handle the last 1-3 bytes without jumping
-	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
-	movl $0xffffff,%ebx	# by the shll and shrl instructions
-	shll $3,%ecx
-	shrl %cl,%ebx
-	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
-	addl %ebx,%eax
-	adcl $0,%eax
-80: 
-	popl %ebx
-	popl %esi
-	ret
-				
-#endif
-	EXPORT_SYMBOL(csum_partial)
diff --git a/arch/x86/um/setjmp_32.S b/arch/x86/um/setjmp_32.S
deleted file mode 100644
index 62eaf8c80e041791e142315a6a369b0d0f2d6b6d..0000000000000000000000000000000000000000
--- a/arch/x86/um/setjmp_32.S
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#
-# arch/i386/setjmp.S
-#
-# setjmp/longjmp for the i386 architecture
-#
-
-#
-# The jmp_buf is assumed to contain the following, in order:
-#	%ebx
-#	%esp
-#	%ebp
-#	%esi
-#	%edi
-#	<return address>
-#
-
-	.text
-	.align 4
-	.globl kernel_setjmp
-	.type kernel_setjmp, @function
-kernel_setjmp:
-#ifdef _REGPARM
-	movl %eax,%edx
-#else
-	movl 4(%esp),%edx
-#endif
-	popl %ecx			# Return address, and adjust the stack
-	xorl %eax,%eax			# Return value
-	movl %ebx,(%edx)
-	movl %esp,4(%edx)		# Post-return %esp!
-	pushl %ecx			# Make the call/return stack happy
-	movl %ebp,8(%edx)
-	movl %esi,12(%edx)
-	movl %edi,16(%edx)
-	movl %ecx,20(%edx)		# Return address
-	ret
-
-	.size kernel_setjmp,.-kernel_setjmp
-
-	.text
-	.align 4
-	.globl kernel_longjmp
-	.type kernel_longjmp, @function
-kernel_longjmp:
-#ifdef _REGPARM
-	xchgl %eax,%edx
-#else
-	movl 4(%esp),%edx		# jmp_ptr address
-	movl 8(%esp),%eax		# Return value
-#endif
-	movl (%edx),%ebx
-	movl 4(%edx),%esp
-	movl 8(%edx),%ebp
-	movl 12(%edx),%esi
-	movl 16(%edx),%edi
-	jmp *20(%edx)
-
-	.size kernel_longjmp,.-kernel_longjmp
diff --git a/arch/x86/um/setjmp_64.S b/arch/x86/um/setjmp_64.S
deleted file mode 100644
index 1b5d40d4ff46d28dc545db3e3732115e3c02c1c7..0000000000000000000000000000000000000000
--- a/arch/x86/um/setjmp_64.S
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#
-# arch/x86_64/setjmp.S
-#
-# setjmp/longjmp for the x86-64 architecture
-#
-
-#
-# The jmp_buf is assumed to contain the following, in order:
-#	%rbx
-#	%rsp (post-return)
-#	%rbp
-#	%r12
-#	%r13
-#	%r14
-#	%r15
-#	<return address>
-#
-
-	.text
-	.align 4
-	.globl kernel_setjmp
-	.type kernel_setjmp, @function
-kernel_setjmp:
-	pop  %rsi			# Return address, and adjust the stack
-	xorl %eax,%eax			# Return value
-	movq %rbx,(%rdi)
-	movq %rsp,8(%rdi)		# Post-return %rsp!
-	push %rsi			# Make the call/return stack happy
-	movq %rbp,16(%rdi)
-	movq %r12,24(%rdi)
-	movq %r13,32(%rdi)
-	movq %r14,40(%rdi)
-	movq %r15,48(%rdi)
-	movq %rsi,56(%rdi)		# Return address
-	ret
-
-	.size kernel_setjmp,.-kernel_setjmp
-
-	.text
-	.align 4
-	.globl kernel_longjmp
-	.type kernel_longjmp, @function
-kernel_longjmp:
-	movl %esi,%eax			# Return value (int)
-	movq (%rdi),%rbx
-	movq 8(%rdi),%rsp
-	movq 16(%rdi),%rbp
-	movq 24(%rdi),%r12
-	movq 32(%rdi),%r13
-	movq 40(%rdi),%r14
-	movq 48(%rdi),%r15
-	jmp *56(%rdi)
-
-	.size kernel_longjmp,.-kernel_longjmp
diff --git a/arch/x86/um/stub_32.S b/arch/x86/um/stub_32.S
deleted file mode 100644
index a193e88536a904a54abf4177323e2d871825423a..0000000000000000000000000000000000000000
--- a/arch/x86/um/stub_32.S
+++ /dev/null
@@ -1,51 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <as-layout.h>
-
-.section .__syscall_stub, "ax"
-
-	.globl batch_syscall_stub
-batch_syscall_stub:
-	/* load pointer to first operation */
-	mov	$(STUB_DATA+8), %esp
-
-again:
-	/* load length of additional data */
-	mov	0x0(%esp), %eax
-
-	/* if(length == 0) : end of list */
-	/* write possible 0 to header */
-	mov	%eax, STUB_DATA+4
-	cmpl	$0, %eax
-	jz	done
-
-	/* save current pointer */
-	mov	%esp, STUB_DATA+4
-
-	/* skip additional data */
-	add	%eax, %esp
-
-	/* load syscall-# */
-	pop	%eax
-
-	/* load syscall params */
-	pop	%ebx
-	pop	%ecx
-	pop	%edx
-	pop	%esi
- 	pop	%edi
-	pop	%ebp
-
-	/* execute syscall */
-	int	$0x80
-
-	/* check return value */
-	pop	%ebx
-	cmp	%ebx, %eax
-	je	again
-
-done:
-	/* save return value */
-	mov	%eax, STUB_DATA
-
-	/* stop */
-	int3
diff --git a/arch/x86/um/stub_64.S b/arch/x86/um/stub_64.S
deleted file mode 100644
index 8a95c5b2eaf93266a64543ae2fa86cfdecff89b0..0000000000000000000000000000000000000000
--- a/arch/x86/um/stub_64.S
+++ /dev/null
@@ -1,51 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <as-layout.h>
-
-.section .__syscall_stub, "ax"
-	.globl batch_syscall_stub
-batch_syscall_stub:
-	mov	$(STUB_DATA), %rbx
-	/* load pointer to first operation */
-	mov	%rbx, %rsp
-	add	$0x10, %rsp
-again:
-	/* load length of additional data */
-	mov	0x0(%rsp), %rax
-
-	/* if(length == 0) : end of list */
-	/* write possible 0 to header */
-	mov	%rax, 8(%rbx)
-	cmp	$0, %rax
-	jz	done
-
-	/* save current pointer */
-	mov	%rsp, 8(%rbx)
-
-	/* skip additional data */
-	add	%rax, %rsp
-
-	/* load syscall-# */
-	pop	%rax
-
-	/* load syscall params */
-	pop	%rdi
-	pop	%rsi
-	pop	%rdx
-	pop	%r10
- 	pop	%r8
-	pop	%r9
-
-	/* execute syscall */
-	syscall
-
-	/* check return value */
-	pop	%rcx
-	cmp	%rcx, %rax
-	je	again
-
-done:
-	/* save return value */
-	mov	%rax, (%rbx)
-
-	/* stop */
-	int3
diff --git a/arch/x86/um/vdso/vdso-layout.lds.S b/arch/x86/um/vdso/vdso-layout.lds.S
deleted file mode 100644
index 439b790df18f0b01e47910df0a256d289be90ed9..0000000000000000000000000000000000000000
--- a/arch/x86/um/vdso/vdso-layout.lds.S
+++ /dev/null
@@ -1,65 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Linker script for vDSO.  This is an ELF shared object prelinked to
- * its virtual address, and with only one read-only segment.
- * This script controls its layout.
- */
-
-SECTIONS
-{
-	. = VDSO_PRELINK + SIZEOF_HEADERS;
-
-	.hash		: { *(.hash) }			:text
-	.gnu.hash	: { *(.gnu.hash) }
-	.dynsym		: { *(.dynsym) }
-	.dynstr		: { *(.dynstr) }
-	.gnu.version	: { *(.gnu.version) }
-	.gnu.version_d	: { *(.gnu.version_d) }
-	.gnu.version_r	: { *(.gnu.version_r) }
-
-	.note		: { *(.note.*) }		:text	:note
-
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-
-	.dynamic	: { *(.dynamic) }		:text	:dynamic
-
-	.rodata		: { *(.rodata*) }		:text
-	.data		: {
-	      *(.data*)
-	      *(.sdata*)
-	      *(.got.plt) *(.got)
-	      *(.gnu.linkonce.d.*)
-	      *(.bss*)
-	      *(.dynbss*)
-	      *(.gnu.linkonce.b.*)
-	}
-
-	.altinstructions	: { *(.altinstructions) }
-	.altinstr_replacement	: { *(.altinstr_replacement) }
-
-	/*
-	 * Align the actual code well away from the non-instruction data.
-	 * This is the best thing for the I-cache.
-	 */
-	. = ALIGN(0x100);
-
-	.text		: { *(.text*) }			:text	=0x90909090
-}
-
-/*
- * Very old versions of ld do not recognize this name token; use the constant.
- */
-#define PT_GNU_EH_FRAME	0x6474e550
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
-	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
-	note		PT_NOTE		FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
-}
diff --git a/arch/x86/um/vdso/vdso-note.S b/arch/x86/um/vdso/vdso-note.S
deleted file mode 100644
index 79a071e4357e4bc51f8813427d0f0ffa4c5ba56c..0000000000000000000000000000000000000000
--- a/arch/x86/um/vdso/vdso-note.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
-	.long LINUX_VERSION_CODE
-ELFNOTE_END
diff --git a/arch/x86/um/vdso/vdso.S b/arch/x86/um/vdso/vdso.S
deleted file mode 100644
index a4a3870dc059b0f89c3b345909345b51eef0164a..0000000000000000000000000000000000000000
--- a/arch/x86/um/vdso/vdso.S
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/init.h>
-
-__INITDATA
-
-	.globl vdso_start, vdso_end
-vdso_start:
-	.incbin "arch/x86/um/vdso/vdso.so"
-vdso_end:
-
-__FINIT
diff --git a/arch/x86/um/vdso/vdso.lds.S b/arch/x86/um/vdso/vdso.lds.S
deleted file mode 100644
index 73c508587a984079ce25cbf697a65a3ae3f61e47..0000000000000000000000000000000000000000
--- a/arch/x86/um/vdso/vdso.lds.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Linker script for 64-bit vDSO.
- * We #include the file to define the layout details.
- * Here we only choose the prelinked virtual address.
- *
- * This file defines the version script giving the user-exported symbols in
- * the DSO.  We can define local symbols here called VDSO* to make their
- * values visible using the asm-x86/vdso.h macros from the kernel proper.
- */
-
-#define VDSO_PRELINK 0xffffffffff700000
-#include "vdso-layout.lds.S"
-
-/*
- * This controls what userland symbols we export from the vDSO.
- */
-VERSION {
-	LINUX_2.6 {
-	global:
-		clock_gettime;
-		__vdso_clock_gettime;
-		gettimeofday;
-		__vdso_gettimeofday;
-		getcpu;
-		__vdso_getcpu;
-		time;
-		__vdso_time;
-	local: *;
-	};
-}
-
-VDSO64_PRELINK = VDSO_PRELINK;
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
deleted file mode 100644
index be104eef80be61118bbe80fb18bf03aef8db62a8..0000000000000000000000000000000000000000
--- a/arch/x86/xen/xen-asm.S
+++ /dev/null
@@ -1,153 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Asm versions of Xen pv-ops, suitable for direct use.
- *
- * We only bother with direct forms (ie, vcpu in percpu data) of the
- * operations here; the indirect forms are better handled in C.
- */
-
-#include <asm/asm-offsets.h>
-#include <asm/percpu.h>
-#include <asm/processor-flags.h>
-#include <asm/frame.h>
-#include <asm/asm.h>
-
-#include <linux/linkage.h>
-
-/*
- * Enable events.  This clears the event mask and tests the pending
- * event status with one and operation.  If there are pending events,
- * then enter the hypervisor to get them handled.
- */
-ENTRY(xen_irq_enable_direct)
-	FRAME_BEGIN
-	/* Unmask events */
-	movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
-
-	/*
-	 * Preempt here doesn't matter because that will deal with any
-	 * pending interrupts.  The pending check may end up being run
-	 * on the wrong CPU, but that doesn't hurt.
-	 */
-
-	/* Test for pending */
-	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
-	jz 1f
-
-	call check_events
-1:
-	FRAME_END
-	ret
-	ENDPROC(xen_irq_enable_direct)
-
-
-/*
- * Disabling events is simply a matter of making the event mask
- * non-zero.
- */
-ENTRY(xen_irq_disable_direct)
-	movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
-	ret
-ENDPROC(xen_irq_disable_direct)
-
-/*
- * (xen_)save_fl is used to get the current interrupt enable status.
- * Callers expect the status to be in X86_EFLAGS_IF, and other bits
- * may be set in the return value.  We take advantage of this by
- * making sure that X86_EFLAGS_IF has the right value (and other bits
- * in that byte are 0), but other bits in the return value are
- * undefined.  We need to toggle the state of the bit, because Xen and
- * x86 use opposite senses (mask vs enable).
- */
-ENTRY(xen_save_fl_direct)
-	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
-	setz %ah
-	addb %ah, %ah
-	ret
-	ENDPROC(xen_save_fl_direct)
-
-
-/*
- * In principle the caller should be passing us a value return from
- * xen_save_fl_direct, but for robustness sake we test only the
- * X86_EFLAGS_IF flag rather than the whole byte. After setting the
- * interrupt mask state, it checks for unmasked pending events and
- * enters the hypervisor to get them delivered if so.
- */
-ENTRY(xen_restore_fl_direct)
-	FRAME_BEGIN
-#ifdef CONFIG_X86_64
-	testw $X86_EFLAGS_IF, %di
-#else
-	testb $X86_EFLAGS_IF>>8, %ah
-#endif
-	setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
-	/*
-	 * Preempt here doesn't matter because that will deal with any
-	 * pending interrupts.  The pending check may end up being run
-	 * on the wrong CPU, but that doesn't hurt.
-	 */
-
-	/* check for unmasked and pending */
-	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
-	jnz 1f
-	call check_events
-1:
-	FRAME_END
-	ret
-	ENDPROC(xen_restore_fl_direct)
-
-
-/*
- * Force an event check by making a hypercall, but preserve regs
- * before making the call.
- */
-ENTRY(check_events)
-	FRAME_BEGIN
-#ifdef CONFIG_X86_32
-	push %eax
-	push %ecx
-	push %edx
-	call xen_force_evtchn_callback
-	pop %edx
-	pop %ecx
-	pop %eax
-#else
-	push %rax
-	push %rcx
-	push %rdx
-	push %rsi
-	push %rdi
-	push %r8
-	push %r9
-	push %r10
-	push %r11
-	call xen_force_evtchn_callback
-	pop %r11
-	pop %r10
-	pop %r9
-	pop %r8
-	pop %rdi
-	pop %rsi
-	pop %rdx
-	pop %rcx
-	pop %rax
-#endif
-	FRAME_END
-	ret
-ENDPROC(check_events)
-
-ENTRY(xen_read_cr2)
-	FRAME_BEGIN
-	_ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
-	_ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
-	FRAME_END
-	ret
-	ENDPROC(xen_read_cr2);
-
-ENTRY(xen_read_cr2_direct)
-	FRAME_BEGIN
-	_ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
-	FRAME_END
-	ret
-	ENDPROC(xen_read_cr2_direct);
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
deleted file mode 100644
index 2712e915530632b2a15a7f64d76a19dbded26d3a..0000000000000000000000000000000000000000
--- a/arch/x86/xen/xen-asm_32.S
+++ /dev/null
@@ -1,183 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Asm versions of Xen pv-ops, suitable for direct use.
- *
- * We only bother with direct forms (ie, vcpu in pda) of the
- * operations here; the indirect forms are better handled in C.
- */
-
-#include <asm/thread_info.h>
-#include <asm/processor-flags.h>
-#include <asm/segment.h>
-#include <asm/asm.h>
-
-#include <xen/interface/xen.h>
-
-#include <linux/linkage.h>
-
-/* Pseudo-flag used for virtual NMI, which we don't implement yet */
-#define XEN_EFLAGS_NMI  0x80000000
-
-/*
- * This is run where a normal iret would be run, with the same stack setup:
- *	8: eflags
- *	4: cs
- *	esp-> 0: eip
- *
- * This attempts to make sure that any pending events are dealt with
- * on return to usermode, but there is a small window in which an
- * event can happen just before entering usermode.  If the nested
- * interrupt ends up setting one of the TIF_WORK_MASK pending work
- * flags, they will not be tested again before returning to
- * usermode. This means that a process can end up with pending work,
- * which will be unprocessed until the process enters and leaves the
- * kernel again, which could be an unbounded amount of time.  This
- * means that a pending signal or reschedule event could be
- * indefinitely delayed.
- *
- * The fix is to notice a nested interrupt in the critical window, and
- * if one occurs, then fold the nested interrupt into the current
- * interrupt stack frame, and re-process it iteratively rather than
- * recursively.  This means that it will exit via the normal path, and
- * all pending work will be dealt with appropriately.
- *
- * Because the nested interrupt handler needs to deal with the current
- * stack state in whatever form its in, we keep things simple by only
- * using a single register which is pushed/popped on the stack.
- */
-
-.macro POP_FS
-1:
-	popw %fs
-.pushsection .fixup, "ax"
-2:	movw $0, (%esp)
-	jmp 1b
-.popsection
-	_ASM_EXTABLE(1b,2b)
-.endm
-
-SYM_CODE_START(xen_iret)
-	/* test eflags for special cases */
-	testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
-	jnz hyper_iret
-
-	push %eax
-	ESP_OFFSET=4	# bytes pushed onto stack
-
-	/* Store vcpu_info pointer for easy access */
-#ifdef CONFIG_SMP
-	pushw %fs
-	movl $(__KERNEL_PERCPU), %eax
-	movl %eax, %fs
-	movl %fs:xen_vcpu, %eax
-	POP_FS
-#else
-	movl %ss:xen_vcpu, %eax
-#endif
-
-	/* check IF state we're restoring */
-	testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
-
-	/*
-	 * Maybe enable events.  Once this happens we could get a
-	 * recursive event, so the critical region starts immediately
-	 * afterwards.  However, if that happens we don't end up
-	 * resuming the code, so we don't have to be worried about
-	 * being preempted to another CPU.
-	 */
-	setz %ss:XEN_vcpu_info_mask(%eax)
-xen_iret_start_crit:
-
-	/* check for unmasked and pending */
-	cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax)
-
-	/*
-	 * If there's something pending, mask events again so we can
-	 * jump back into xen_hypervisor_callback. Otherwise do not
-	 * touch XEN_vcpu_info_mask.
-	 */
-	jne 1f
-	movb $1, %ss:XEN_vcpu_info_mask(%eax)
-
-1:	popl %eax
-
-	/*
-	 * From this point on the registers are restored and the stack
-	 * updated, so we don't need to worry about it if we're
-	 * preempted
-	 */
-iret_restore_end:
-
-	/*
-	 * Jump to hypervisor_callback after fixing up the stack.
-	 * Events are masked, so jumping out of the critical region is
-	 * OK.
-	 */
-	je xen_hypervisor_callback
-
-1:	iret
-xen_iret_end_crit:
-	_ASM_EXTABLE(1b, iret_exc)
-
-hyper_iret:
-	/* put this out of line since its very rarely used */
-	jmp hypercall_page + __HYPERVISOR_iret * 32
-SYM_CODE_END(xen_iret)
-
-	.globl xen_iret_start_crit, xen_iret_end_crit
-
-/*
- * This is called by xen_hypervisor_callback in entry_32.S when it sees
- * that the EIP at the time of interrupt was between
- * xen_iret_start_crit and xen_iret_end_crit.
- *
- * The stack format at this point is:
- *	----------------
- *	 ss		: (ss/esp may be present if we came from usermode)
- *	 esp		:
- *	 eflags		}  outer exception info
- *	 cs		}
- *	 eip		}
- *	----------------
- *	 eax		:  outer eax if it hasn't been restored
- *	----------------
- *	 eflags		}
- *	 cs		}  nested exception info
- *	 eip		}
- *	 return address	: (into xen_hypervisor_callback)
- *
- * In order to deliver the nested exception properly, we need to discard the
- * nested exception frame such that when we handle the exception, we do it
- * in the context of the outer exception rather than starting a new one.
- *
- * The only caveat is that if the outer eax hasn't been restored yet (i.e.
- * it's still on stack), we need to restore its value here.
- */
-SYM_CODE_START(xen_iret_crit_fixup)
-	/*
-	 * Paranoia: Make sure we're really coming from kernel space.
-	 * One could imagine a case where userspace jumps into the
-	 * critical range address, but just before the CPU delivers a
-	 * PF, it decides to deliver an interrupt instead.  Unlikely?
-	 * Definitely.  Easy to avoid?  Yes.
-	 */
-	testb $2, 2*4(%esp)		/* nested CS */
-	jnz 2f
-
-	/*
-	 * If eip is before iret_restore_end then stack
-	 * hasn't been restored yet.
-	 */
-	cmpl $iret_restore_end, 1*4(%esp)
-	jae 1f
-
-	movl 4*4(%esp), %eax		/* load outer EAX */
-	ret $4*4			/* discard nested EIP, CS, and EFLAGS as
-					 * well as the just restored EAX */
-
-1:
-	ret $3*4			/* discard nested EIP, CS, and EFLAGS */
-
-2:
-	ret
-SYM_CODE_END(xen_iret_crit_fixup)
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
deleted file mode 100644
index ebf610b49c0687e997d7ada1be17165c91edb5b2..0000000000000000000000000000000000000000
--- a/arch/x86/xen/xen-asm_64.S
+++ /dev/null
@@ -1,179 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Asm versions of Xen pv-ops, suitable for direct use.
- *
- * We only bother with direct forms (ie, vcpu in pda) of the
- * operations here; the indirect forms are better handled in C.
- */
-
-#include <asm/errno.h>
-#include <asm/percpu.h>
-#include <asm/processor-flags.h>
-#include <asm/segment.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/asm.h>
-
-#include <xen/interface/xen.h>
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-
-.macro xen_pv_trap name
-ENTRY(xen_\name)
-	pop %rcx
-	pop %r11
-	jmp  \name
-END(xen_\name)
-_ASM_NOKPROBE(xen_\name)
-.endm
-
-xen_pv_trap divide_error
-xen_pv_trap debug
-xen_pv_trap xendebug
-xen_pv_trap int3
-xen_pv_trap xennmi
-xen_pv_trap overflow
-xen_pv_trap bounds
-xen_pv_trap invalid_op
-xen_pv_trap device_not_available
-xen_pv_trap double_fault
-xen_pv_trap coprocessor_segment_overrun
-xen_pv_trap invalid_TSS
-xen_pv_trap segment_not_present
-xen_pv_trap stack_segment
-xen_pv_trap general_protection
-xen_pv_trap page_fault
-xen_pv_trap spurious_interrupt_bug
-xen_pv_trap coprocessor_error
-xen_pv_trap alignment_check
-#ifdef CONFIG_X86_MCE
-xen_pv_trap machine_check
-#endif /* CONFIG_X86_MCE */
-xen_pv_trap simd_coprocessor_error
-#ifdef CONFIG_IA32_EMULATION
-xen_pv_trap entry_INT80_compat
-#endif
-xen_pv_trap hypervisor_callback
-
-	__INIT
-ENTRY(xen_early_idt_handler_array)
-	i = 0
-	.rept NUM_EXCEPTION_VECTORS
-	pop %rcx
-	pop %r11
-	jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
-	i = i + 1
-	.fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
-	.endr
-END(xen_early_idt_handler_array)
-	__FINIT
-
-hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
-/*
- * Xen64 iret frame:
- *
- *	ss
- *	rsp
- *	rflags
- *	cs
- *	rip		<-- standard iret frame
- *
- *	flags
- *
- *	rcx		}
- *	r11		}<-- pushed by hypercall page
- * rsp->rax		}
- */
-ENTRY(xen_iret)
-	pushq $0
-	jmp hypercall_iret
-
-ENTRY(xen_sysret64)
-	/*
-	 * We're already on the usermode stack at this point, but
-	 * still with the kernel gs, so we can easily switch back.
-	 *
-	 * tss.sp2 is scratch space.
-	 */
-	movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
-	movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-
-	pushq $__USER_DS
-	pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
-	pushq %r11
-	pushq $__USER_CS
-	pushq %rcx
-
-	pushq $VGCF_in_syscall
-	jmp hypercall_iret
-
-/*
- * Xen handles syscall callbacks much like ordinary exceptions, which
- * means we have:
- * - kernel gs
- * - kernel rsp
- * - an iret-like stack frame on the stack (including rcx and r11):
- *	ss
- *	rsp
- *	rflags
- *	cs
- *	rip
- *	r11
- * rsp->rcx
- */
-
-/* Normal 64-bit system call target */
-ENTRY(xen_syscall_target)
-	popq %rcx
-	popq %r11
-
-	/*
-	 * Neither Xen nor the kernel really knows what the old SS and
-	 * CS were.  The kernel expects __USER_DS and __USER_CS, so
-	 * report those values even though Xen will guess its own values.
-	 */
-	movq $__USER_DS, 4*8(%rsp)
-	movq $__USER_CS, 1*8(%rsp)
-
-	jmp entry_SYSCALL_64_after_hwframe
-ENDPROC(xen_syscall_target)
-
-#ifdef CONFIG_IA32_EMULATION
-
-/* 32-bit compat syscall target */
-ENTRY(xen_syscall32_target)
-	popq %rcx
-	popq %r11
-
-	/*
-	 * Neither Xen nor the kernel really knows what the old SS and
-	 * CS were.  The kernel expects __USER32_DS and __USER32_CS, so
-	 * report those values even though Xen will guess its own values.
-	 */
-	movq $__USER32_DS, 4*8(%rsp)
-	movq $__USER32_CS, 1*8(%rsp)
-
-	jmp entry_SYSCALL_compat_after_hwframe
-ENDPROC(xen_syscall32_target)
-
-/* 32-bit compat sysenter target */
-ENTRY(xen_sysenter_target)
-	mov 0*8(%rsp), %rcx
-	mov 1*8(%rsp), %r11
-	mov 5*8(%rsp), %rsp
-	jmp entry_SYSENTER_compat
-ENDPROC(xen_sysenter_target)
-
-#else /* !CONFIG_IA32_EMULATION */
-
-ENTRY(xen_syscall32_target)
-ENTRY(xen_sysenter_target)
-	lea 16(%rsp), %rsp	/* strip %rcx, %r11 */
-	mov $-ENOSYS, %rax
-	pushq $0
-	jmp hypercall_iret
-ENDPROC(xen_syscall32_target)
-ENDPROC(xen_sysenter_target)
-
-#endif	/* CONFIG_IA32_EMULATION */
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
deleted file mode 100644
index c1d8b90aa4e2d5af3990e9d708e133aa27888675..0000000000000000000000000000000000000000
--- a/arch/x86/xen/xen-head.S
+++ /dev/null
@@ -1,104 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Xen-specific pieces of head.S, intended to be included in the right
-	place in head.S */
-
-#ifdef CONFIG_XEN
-
-#include <linux/elfnote.h>
-#include <linux/init.h>
-
-#include <asm/boot.h>
-#include <asm/asm.h>
-#include <asm/msr.h>
-#include <asm/page_types.h>
-#include <asm/percpu.h>
-#include <asm/unwind_hints.h>
-
-#include <xen/interface/elfnote.h>
-#include <xen/interface/features.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/xen-mca.h>
-#include <asm/xen/interface.h>
-
-#ifdef CONFIG_XEN_PV
-	__INIT
-ENTRY(startup_xen)
-	UNWIND_HINT_EMPTY
-	cld
-
-	/* Clear .bss */
-	xor %eax,%eax
-	mov $__bss_start, %_ASM_DI
-	mov $__bss_stop, %_ASM_CX
-	sub %_ASM_DI, %_ASM_CX
-	shr $__ASM_SEL(2, 3), %_ASM_CX
-	rep __ASM_SIZE(stos)
-
-	mov %_ASM_SI, xen_start_info
-	mov $init_thread_union+THREAD_SIZE, %_ASM_SP
-
-#ifdef CONFIG_X86_64
-	/* Set up %gs.
-	 *
-	 * The base of %gs always points to fixed_percpu_data.  If the
-	 * stack protector canary is enabled, it is located at %gs:40.
-	 * Note that, on SMP, the boot cpu uses init data section until
-	 * the per cpu areas are set up.
-	 */
-	movl	$MSR_GS_BASE,%ecx
-	movq	$INIT_PER_CPU_VAR(fixed_percpu_data),%rax
-	cdq
-	wrmsr
-#endif
-
-	jmp xen_start_kernel
-END(startup_xen)
-	__FINIT
-#endif
-
-.pushsection .text
-	.balign PAGE_SIZE
-ENTRY(hypercall_page)
-	.rept (PAGE_SIZE / 32)
-		UNWIND_HINT_EMPTY
-		.skip 32
-	.endr
-
-#define HYPERCALL(n) \
-	.equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
-	.type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
-#include <asm/xen-hypercalls.h>
-#undef HYPERCALL
-END(hypercall_page)
-.popsection
-
-	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz "linux")
-	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz "2.6")
-	ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz "xen-3.0")
-#ifdef CONFIG_X86_32
-	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      _ASM_PTR __PAGE_OFFSET)
-#else
-	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      _ASM_PTR __START_KERNEL_map)
-	/* Map the p2m table to a 512GB-aligned user address. */
-	ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M,       .quad (PUD_SIZE * PTRS_PER_PUD))
-#endif
-#ifdef CONFIG_XEN_PV
-	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          _ASM_PTR startup_xen)
-#endif
-	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
-	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,
-		.ascii "!writable_page_tables|pae_pgdir_above_4gb")
-	ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES,
-		.long (1 << XENFEAT_writable_page_tables) |       \
-		      (1 << XENFEAT_dom0) |                       \
-		      (1 << XENFEAT_linux_rsdp_unrestricted))
-	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz "yes")
-	ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz "generic")
-	ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
-		.quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
-	ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
-	ELFNOTE(Xen, XEN_ELFNOTE_MOD_START_PFN,  .long 1)
-	ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   _ASM_PTR __HYPERVISOR_VIRT_START)
-	ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   _ASM_PTR 0)
-
-#endif /*CONFIG_XEN */
diff --git a/arch/xtensa/boot/boot-elf/boot.lds.S b/arch/xtensa/boot/boot-elf/boot.lds.S
deleted file mode 100644
index 32a3b7c5b8dc2defd0f6891e3bb466ee24ad7312..0000000000000000000000000000000000000000
--- a/arch/xtensa/boot/boot-elf/boot.lds.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/arch/xtensa/boot/boot-elf/boot.lds.S
- *
- *  Copyright (C) 2008 - 2013 by Tensilica Inc.
- *
- *  Chris Zankel <chris@zankel.net>
- *  Marc Gauthier <marc@tensilica.com
- *  Pete Delaney <piet@tensilica.com>
- */
-
-#include <asm/vectors.h>
-OUTPUT_ARCH(xtensa)
-ENTRY(_ResetVector)
-
-SECTIONS
-{
-	.ResetVector.text XCHAL_RESET_VECTOR_VADDR :
-	{
-		*(.ResetVector.text)
-	}
-
-	.image KERNELOFFSET: AT (CONFIG_KERNEL_LOAD_ADDRESS)
-	{
-		_image_start = .;
-		*(image)
-		. = (. + 3) & ~ 3;
-		_image_end = .	;
-	}
-
-	.bss ((LOADADDR(.image) + SIZEOF(.image) + 3) & ~ 3):
-	{
-		__bss_start = .;
-		*(.sbss)
-		*(.scommon)
-		*(.dynbss)
-		*(.bss)
-		__bss_end = .;
-	}
-}
diff --git a/arch/xtensa/boot/boot-elf/bootstrap.S b/arch/xtensa/boot/boot-elf/bootstrap.S
deleted file mode 100644
index 99e98c9bae419a84774aa488d4d30a75017eba11..0000000000000000000000000000000000000000
--- a/arch/xtensa/boot/boot-elf/bootstrap.S
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * arch/xtensa/boot/boot-elf/bootstrap.S
- *
- * Low-level exception handling
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2004 - 2013 by Tensilica Inc.
- *
- * Chris Zankel <chris@zankel.net>
- * Marc Gauthier <marc@tensilica.com>
- * Piet Delaney <piet@tensilica.com>
- */
-
-#include <asm/bootparam.h>
-#include <asm/initialize_mmu.h>
-#include <asm/vectors.h>
-#include <linux/linkage.h>
-
-	.section	.ResetVector.text, "ax"
-	.global         _ResetVector
-	.global         reset
-
-_ResetVector:
-	_j _SetupMMU
-
-	.begin  no-absolute-literals
-	.literal_position
-
-#ifdef CONFIG_PARSE_BOOTPARAM
-	.align 4
-_bootparam:
-	.short	BP_TAG_FIRST
-	.short	4
-	.long	BP_VERSION
-	.short	BP_TAG_LAST
-	.short	0
-	.long	0
-#endif
-
-	.align  4
-_SetupMMU:
-	movi	a0, 0
-	wsr	a0, windowbase
-	rsync
-	movi	a0, 1
-	wsr	a0, windowstart
-	rsync
-	movi	a0, 0x1F
-	wsr	a0, ps
-	rsync
-
-#ifndef CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
-	initialize_mmu
-#endif
-
-	rsil    a0, XCHAL_DEBUGLEVEL-1
-	rsync
-reset:
-#if defined(CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX) && \
-	XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY
-	movi	a0, CONFIG_KERNEL_LOAD_ADDRESS
-#else
-	movi	a0, KERNELOFFSET
-#endif
-#ifdef CONFIG_PARSE_BOOTPARAM
-	movi	a2, _bootparam
-#else
-	movi	a2, 0
-#endif
-	movi	a3, 0
-	movi	a4, 0
-	jx      a0
-
-	.end    no-absolute-literals
diff --git a/arch/xtensa/boot/boot-redboot/bootstrap.S b/arch/xtensa/boot/boot-redboot/bootstrap.S
deleted file mode 100644
index 48ba5a232d948792d95aa8bc3fe3aca5e4c250ec..0000000000000000000000000000000000000000
--- a/arch/xtensa/boot/boot-redboot/bootstrap.S
+++ /dev/null
@@ -1,251 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/core.h>
-#include <asm/regs.h>
-#include <asm/asmmacro.h>
-#include <asm/cacheasm.h>
-	/*
-	 * RB-Data: RedBoot data/bss
-	 * P:	    Boot-Parameters
-	 * L:	    Kernel-Loader
-	 *
-	 * The Linux-Kernel image including the loader must be loaded
-	 * to a position so that the kernel and the boot parameters
-	 * can fit in the space before the load address.
-	 *  ______________________________________________________
-	 * |_RB-Data_|_P_|__________|_L_|___Linux-Kernel___|______|
-	 *                          ^
-	 *                          ^ Load address
-	 *  ______________________________________________________
-	 * |___Linux-Kernel___|_P_|_L_|___________________________|
-	 *
-	 * The loader copies the parameter to the position that will
-	 * be the end of the kernel and itself to the end of the
-	 * parameter list.
-	 */
-
-/* Make sure we have enough space for the 'uncompressor' */
-
-#define STACK_SIZE 32768
-#define HEAP_SIZE (131072*4)
-
-	# a2: Parameter list
-	# a3: Size of parameter list
-
-	.section .start, "ax"
-
-	.globl __start
-	/* this must be the first byte of the loader! */
-__start:
-	entry	sp, 32		# we do not intend to return
-	_call0	_start
-__start_a0:
-	.align 4
-
-	.section .text, "ax"
-	.literal_position
-	.begin literal_prefix .text
-
-	/* put literals in here! */
-
-	.globl _start
-_start:
-
-	/* 'reset' window registers */
-
-	movi	a4, 1
-	wsr	a4, ps
-	rsync
-
-	rsr	a5, windowbase
-	ssl	a5
-	sll	a4, a4
-	wsr	a4, windowstart
-	rsync
-
-	movi	a4, 0x00040000
-	wsr	a4, ps
-	rsync
-
-	/* copy the loader to its address
-	 * Note: The loader itself is a very small piece, so we assume we
-	 *       don't partially overlap. We also assume (even more important)
-	 *	 that the kernel image is out of the way. Usually, when the
-	 *	 load address of this image is not at an arbitrary address,
-	 *	 but aligned to some 10K's we shouldn't overlap.
-	 */
-
-	/* Note: The assembler cannot relax "addi a0, a0, ..." to an
-	   l32r, so we load to a4 first. */
-
-	# addi	a4, a0, __start - __start_a0
-	# mov	a0, a4
-
-	movi	a4, __start
-	movi	a5, __start_a0
-	add	a4, a0, a4
-	sub	a0, a4, a5
-
-	movi	a4, __start
-	movi	a5, __reloc_end
-
-	# a0: address where this code has been loaded
-	# a4: compiled address of __start
-	# a5: compiled end address
-
-	mov.n	a7, a0
-	mov.n	a8, a4
-
-1:
-	l32i	a10, a7, 0
-	l32i	a11, a7, 4
-	s32i	a10, a8, 0
-	s32i	a11, a8, 4
-	l32i	a10, a7, 8
-	l32i	a11, a7, 12
-	s32i	a10, a8, 8
-	s32i	a11, a8, 12
-	addi	a8, a8, 16
-	addi	a7, a7, 16
-	blt	a8, a5, 1b
-
-
-	/* We have to flush and invalidate the caches here before we jump. */
-
-#if XCHAL_DCACHE_IS_WRITEBACK
-
-	___flush_dcache_all a5 a6
-
-#endif
-
-	___invalidate_icache_all a5 a6
-	isync
-
-	movi	a11, _reloc
-	jx	a11
-
-	.globl _reloc
-_reloc:
-
-	/* RedBoot is now at the end of the memory, so we don't have
-	 * to copy the parameter list. Keep the code around; in case
-	 * we need it again. */
-#if 0
-	# a0: load address
-	# a2: start address of parameter list
-	# a3: length of parameter list
-	# a4: __start
-
-	/* copy the parameter list out of the way */
-
-	movi	a6, _param_start
-	add	a3, a2, a3
-2:
-	l32i	a8, a2, 0
-	s32i	a8, a6, 0
-	addi	a2, a2, 4
-	addi	a6, a6, 4
-	blt	a2, a3, 2b
-#endif
-
-	/* clear BSS section */
-	movi	a6, __bss_start
-	movi	a7, __bss_end
-	movi.n	a5, 0
-3:
-	s32i	a5, a6, 0
-	addi	a6, a6, 4
-	blt	a6, a7, 3b
-
-	movi	a5, -16
-	movi	a1, _stack + STACK_SIZE
-	and	a1, a1, a5
-
-	/* Uncompress the kernel */
-
-	# a0: load address
-	# a2: boot parameter
-	# a4: __start
-
-	movi	a3, __image_load
-	sub	a4, a3, a4
-	add	a8, a0, a4
-
-	# a1  Stack
-	# a8(a4)  Load address of the image
-
-	movi	a6, _image_start
-	movi	a10, _image_end
-	movi	a7, 0x1000000
-	sub	a11, a10, a6
-	movi	a9, complen
-	s32i	a11, a9, 0
-
-	movi	a0, 0
-
-	# a6 destination
-	# a7 maximum size of destination
-	# a8 source
-	# a9 ptr to length
-
-	.extern gunzip
-	movi	a4, gunzip
-	beqz	a4, 1f
-
-	callx4	a4
-
-	j	2f
-
-
-	# a6 destination start
-	# a7 maximum size of destination
-	# a8 source start
-	# a9 ptr to length
-	# a10 destination end
-
-1:
-        l32i    a9, a8, 0
-        l32i    a11, a8, 4
-        s32i    a9, a6, 0
-        s32i    a11, a6, 4
-        l32i    a9, a8, 8
-        l32i    a11, a8, 12
-        s32i    a9, a6, 8
-        s32i    a11, a6, 12
-        addi    a6, a6, 16
-        addi    a8, a8, 16
-        blt     a6, a10, 1b
-
-
-	/* jump to the kernel */
-2:
-#if XCHAL_DCACHE_IS_WRITEBACK
-
-	___flush_dcache_all a5 a6
-
-#endif
-
-	___invalidate_icache_all a5 a6
-
-	isync
-
-	# a2  Boot parameter list
-
-	movi	a0, _image_start
-	jx	a0
-
-	.align 16
-	.data
-	.globl avail_ram
-avail_ram:
-	.long	_heap
-	.globl end_avail
-end_avail:
-	.long	_heap + HEAP_SIZE
-
-	.comm _stack, STACK_SIZE
-	.comm _heap, HEAP_SIZE
-
-	.globl end_avail
-	.comm complen, 4
-
-	.end	literal_prefix
diff --git a/arch/xtensa/kernel/align.S b/arch/xtensa/kernel/align.S
deleted file mode 100644
index 9301452e521ed7c9752d0a29e0276a90fe8135a0..0000000000000000000000000000000000000000
--- a/arch/xtensa/kernel/align.S
+++ /dev/null
@@ -1,485 +0,0 @@
-/*
- * arch/xtensa/kernel/align.S
- *
- * Handle unalignment exceptions in kernel space.
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file "COPYING" in the main directory of
- * this archive for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica, Inc.
- * Copyright (C) 2014 Cadence Design Systems Inc.
- *
- * Rewritten by Chris Zankel <chris@zankel.net>
- *
- * Based on work from Joe Taylor <joe@tensilica.com, joetylr@yahoo.com>
- * and Marc Gauthier <marc@tensilica.com, marc@alimni.uwaterloo.ca>
- */
-
-#include <linux/linkage.h>
-#include <asm/current.h>
-#include <asm/asm-offsets.h>
-#include <asm/asmmacro.h>
-#include <asm/processor.h>
-
-#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
-
-/*  First-level exception handler for unaligned exceptions.
- *
- *  Note: This handler works only for kernel exceptions.  Unaligned user
- *        access should get a seg fault.
- */
-
-/* Big and little endian 16-bit values are located in
- * different halves of a register.  HWORD_START helps to
- * abstract the notion of extracting a 16-bit value from a
- * register.
- * We also have to define new shifting instructions because
- * lsb and msb are on 'opposite' ends in a register for
- * different endian machines.
- *
- * Assume a memory region in ascending address:
- *   	0 1 2 3|4 5 6 7
- *
- * When loading one word into a register, the content of that register is:
- *  LE	3 2 1 0, 7 6 5 4
- *  BE  0 1 2 3, 4 5 6 7
- *
- * Masking the bits of the higher/lower address means:
- *  LE  X X 0 0, 0 0 X X
- *  BE	0 0 X X, X X 0 0
- *
- * Shifting to higher/lower addresses, means:
- *  LE  shift left / shift right
- *  BE  shift right / shift left
- *
- * Extracting 16 bits from a 32 bit reg. value to higher/lower address means:
- *  LE  mask 0 0 X X / shift left
- *  BE  shift left / mask 0 0 X X
- */
-
-#define UNALIGNED_USER_EXCEPTION
-
-#if XCHAL_HAVE_BE
-
-#define HWORD_START	16
-#define	INSN_OP0	28
-#define	INSN_T		24
-#define	INSN_OP1	16
-
-.macro __ssa8r	r;		ssa8l	\r;		.endm
-.macro __sh	r, s;		srl	\r, \s;		.endm
-.macro __sl	r, s;		sll	\r, \s;		.endm
-.macro __exth	r, s;		extui	\r, \s, 0, 16;	.endm
-.macro __extl	r, s;		slli	\r, \s, 16;	.endm
-
-#else
-
-#define HWORD_START	0
-#define	INSN_OP0	0
-#define	INSN_T		4
-#define	INSN_OP1	12
-
-.macro __ssa8r	r;		ssa8b	\r;		.endm
-.macro __sh	r, s;		sll	\r, \s;		.endm
-.macro __sl	r, s;		srl	\r, \s;		.endm
-.macro __exth	r, s;		slli	\r, \s, 16;	.endm
-.macro __extl	r, s;		extui	\r, \s, 0, 16;	.endm
-
-#endif
-
-/*
- *	xxxx xxxx = imm8 field
- *	     yyyy = imm4 field
- *	     ssss = s field
- *	     tttt = t field
- *
- *	       		 16		    0
- *		          -------------------
- *	L32I.N		  yyyy ssss tttt 1000
- *	S32I.N	          yyyy ssss tttt 1001
- *
- *	       23			    0
- *		-----------------------------
- *	res	          0000           0010
- *	L16UI	xxxx xxxx 0001 ssss tttt 0010
- *	L32I	xxxx xxxx 0010 ssss tttt 0010
- *	XXX	          0011 ssss tttt 0010
- *	XXX	          0100 ssss tttt 0010
- *	S16I	xxxx xxxx 0101 ssss tttt 0010
- *	S32I	xxxx xxxx 0110 ssss tttt 0010
- *	XXX	          0111 ssss tttt 0010
- *	XXX	          1000 ssss tttt 0010
- *	L16SI	xxxx xxxx 1001 ssss tttt 0010
- *	XXX	          1010           0010
- *      **L32AI	xxxx xxxx 1011 ssss tttt 0010 unsupported
- *	XXX	          1100           0010
- *	XXX	          1101           0010
- *	XXX	          1110           0010
- *	**S32RI	xxxx xxxx 1111 ssss tttt 0010 unsupported
- *		-----------------------------
- *                           ^         ^    ^
- *    sub-opcode (NIBBLE_R) -+         |    |
- *       t field (NIBBLE_T) -----------+    |
- *  major opcode (NIBBLE_OP0) --------------+
- */
-
-#define OP0_L32I_N	0x8		/* load immediate narrow */
-#define OP0_S32I_N	0x9		/* store immediate narrow */
-#define OP1_SI_MASK	0x4		/* OP1 bit set for stores */
-#define OP1_SI_BIT	2		/* OP1 bit number for stores */
-
-#define OP1_L32I	0x2
-#define OP1_L16UI	0x1
-#define OP1_L16SI	0x9
-#define OP1_L32AI	0xb
-
-#define OP1_S32I	0x6
-#define OP1_S16I	0x5
-#define OP1_S32RI	0xf
-
-/*
- * Entry condition:
- *
- *   a0:	trashed, original value saved on stack (PT_AREG0)
- *   a1:	a1
- *   a2:	new stack pointer, original in DEPC
- *   a3:	a3
- *   depc:	a2, original value saved on stack (PT_DEPC)
- *   excsave_1:	dispatch table
- *
- *   PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
- *	     <  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
- */
-
-	.literal_position
-ENTRY(fast_unaligned)
-
-	/* Note: We don't expect the address to be aligned on a word
-	 *       boundary. After all, the processor generated that exception
-	 *       and it would be a hardware fault.
-	 */
-
-	/* Save some working register */
-
-	s32i	a4, a2, PT_AREG4
-	s32i	a5, a2, PT_AREG5
-	s32i	a6, a2, PT_AREG6
-	s32i	a7, a2, PT_AREG7
-	s32i	a8, a2, PT_AREG8
-
-	rsr	a0, depc
-	s32i	a0, a2, PT_AREG2
-	s32i	a3, a2, PT_AREG3
-
-	rsr	a3, excsave1
-	movi	a4, fast_unaligned_fixup
-	s32i	a4, a3, EXC_TABLE_FIXUP
-
-	/* Keep value of SAR in a0 */
-
-	rsr	a0, sar
-	rsr	a8, excvaddr		# load unaligned memory address
-
-	/* Now, identify one of the following load/store instructions.
-	 *
-	 * The only possible danger of a double exception on the
-	 * following l32i instructions is kernel code in vmalloc
-	 * memory. The processor was just executing at the EPC_1
-	 * address, and indeed, already fetched the instruction.  That
-	 * guarantees a TLB mapping, which hasn't been replaced by
-	 * this unaligned exception handler that uses only static TLB
-	 * mappings. However, high-level interrupt handlers might
-	 * modify TLB entries, so for the generic case, we register a
-	 * TABLE_FIXUP handler here, too.
-	 */
-
-	/* a3...a6 saved on stack, a2 = SP */
-
-	/* Extract the instruction that caused the unaligned access. */
-
-	rsr	a7, epc1	# load exception address
-	movi	a3, ~3
-	and	a3, a3, a7	# mask lower bits
-
-	l32i	a4, a3, 0	# load 2 words
-	l32i	a5, a3, 4
-
-	__ssa8	a7
-	__src_b	a4, a4, a5	# a4 has the instruction
-
-	/* Analyze the instruction (load or store?). */
-
-	extui	a5, a4, INSN_OP0, 4	# get insn.op0 nibble
-
-#if XCHAL_HAVE_DENSITY
-	_beqi	a5, OP0_L32I_N, .Lload	# L32I.N, jump
-	addi	a6, a5, -OP0_S32I_N
-	_beqz	a6, .Lstore		# S32I.N, do a store
-#endif
-	/* 'store indicator bit' not set, jump */
-	_bbci.l	a4, OP1_SI_BIT + INSN_OP1, .Lload
-
-	/* Store: Jump to table entry to get the value in the source register.*/
-
-.Lstore:movi	a5, .Lstore_table	# table
-	extui	a6, a4, INSN_T, 4	# get source register
-	addx8	a5, a6, a5
-	jx	a5			# jump into table
-
-	/* Load: Load memory address. */
-
-.Lload: movi	a3, ~3
-	and	a3, a3, a8		# align memory address
-
-	__ssa8	a8
-#ifdef UNALIGNED_USER_EXCEPTION
-	addi	a3, a3, 8
-	l32e	a5, a3, -8
-	l32e	a6, a3, -4
-#else
-	l32i	a5, a3, 0
-	l32i	a6, a3, 4
-#endif
-	__src_b	a3, a5, a6		# a3 has the data word
-
-#if XCHAL_HAVE_DENSITY
-	addi	a7, a7, 2		# increment PC (assume 16-bit insn)
-
-	extui	a5, a4, INSN_OP0, 4
-	_beqi	a5, OP0_L32I_N, 1f	# l32i.n: jump
-
-	addi	a7, a7, 1
-#else
-	addi	a7, a7, 3
-#endif
-
-	extui	a5, a4, INSN_OP1, 4
-	_beqi	a5, OP1_L32I, 1f	# l32i: jump
-
-	extui	a3, a3, 0, 16		# extract lower 16 bits
-	_beqi	a5, OP1_L16UI, 1f
-	addi	a5, a5, -OP1_L16SI
-	_bnez	a5, .Linvalid_instruction_load
-
-	/* sign extend value */
-
-	slli	a3, a3, 16
-	srai	a3, a3, 16
-
-	/* Set target register. */
-
-1:
-	extui	a4, a4, INSN_T, 4	# extract target register
-	movi	a5, .Lload_table
-	addx8	a4, a4, a5
-	jx	a4			# jump to entry for target register
-
-	.align	8
-.Lload_table:
-	s32i	a3, a2, PT_AREG0;	_j .Lexit;	.align 8
-	mov	a1, a3;			_j .Lexit;	.align 8 # fishy??
-	s32i	a3, a2, PT_AREG2;	_j .Lexit;	.align 8
-	s32i	a3, a2, PT_AREG3;	_j .Lexit;	.align 8
-	s32i	a3, a2, PT_AREG4;	_j .Lexit;	.align 8
-	s32i	a3, a2, PT_AREG5;	_j .Lexit;	.align 8
-	s32i	a3, a2, PT_AREG6;	_j .Lexit;	.align 8
-	s32i	a3, a2, PT_AREG7;	_j .Lexit;	.align 8
-	s32i	a3, a2, PT_AREG8;	_j .Lexit;	.align 8
-	mov	a9, a3		;	_j .Lexit;	.align 8
-	mov	a10, a3		;	_j .Lexit;	.align 8
-	mov	a11, a3		;	_j .Lexit;	.align 8
-	mov	a12, a3		;	_j .Lexit;	.align 8
-	mov	a13, a3		;	_j .Lexit;	.align 8
-	mov	a14, a3		;	_j .Lexit;	.align 8
-	mov	a15, a3		;	_j .Lexit;	.align 8
-
-.Lstore_table:
-	l32i	a3, a2, PT_AREG0;	_j 1f;	.align 8
-	mov	a3, a1;			_j 1f;	.align 8	# fishy??
-	l32i	a3, a2, PT_AREG2;	_j 1f;	.align 8
-	l32i	a3, a2, PT_AREG3;	_j 1f;	.align 8
-	l32i	a3, a2, PT_AREG4;	_j 1f;	.align 8
-	l32i	a3, a2, PT_AREG5;	_j 1f;	.align 8
-	l32i	a3, a2, PT_AREG6;	_j 1f;	.align 8
-	l32i	a3, a2, PT_AREG7;	_j 1f;	.align 8
-	l32i	a3, a2, PT_AREG8;	_j 1f;	.align 8
-	mov	a3, a9		;	_j 1f;	.align 8
-	mov	a3, a10		;	_j 1f;	.align 8
-	mov	a3, a11		;	_j 1f;	.align 8
-	mov	a3, a12		;	_j 1f;	.align 8
-	mov	a3, a13		;	_j 1f;	.align 8
-	mov	a3, a14		;	_j 1f;	.align 8
-	mov	a3, a15		;	_j 1f;	.align 8
-
-	/* We cannot handle this exception. */
-
-	.extern _kernel_exception
-.Linvalid_instruction_load:
-.Linvalid_instruction_store:
-
-	movi	a4, 0
-	rsr	a3, excsave1
-	s32i	a4, a3, EXC_TABLE_FIXUP
-
-	/* Restore a4...a8 and SAR, set SP, and jump to default exception. */
-
-	l32i	a8, a2, PT_AREG8
-	l32i	a7, a2, PT_AREG7
-	l32i	a6, a2, PT_AREG6
-	l32i	a5, a2, PT_AREG5
-	l32i	a4, a2, PT_AREG4
-	wsr	a0, sar
-	mov	a1, a2
-
-	rsr	a0, ps
-	bbsi.l  a0, PS_UM_BIT, 2f     # jump if user mode
-
-	movi	a0, _kernel_exception
-	jx	a0
-
-2:	movi	a0, _user_exception
-	jx	a0
-
-1: 	# a7: instruction pointer, a4: instruction, a3: value
-
-	movi	a6, 0			# mask: ffffffff:00000000
-
-#if XCHAL_HAVE_DENSITY
-	addi	a7, a7, 2		# incr. PC,assume 16-bit instruction
-
-	extui	a5, a4, INSN_OP0, 4	# extract OP0
-	addi	a5, a5, -OP0_S32I_N
-	_beqz	a5, 1f			# s32i.n: jump
-
-	addi	a7, a7, 1		# increment PC, 32-bit instruction
-#else
-	addi	a7, a7, 3		# increment PC, 32-bit instruction
-#endif
-
-	extui	a5, a4, INSN_OP1, 4	# extract OP1
-	_beqi	a5, OP1_S32I, 1f	# jump if 32 bit store
-	_bnei	a5, OP1_S16I, .Linvalid_instruction_store
-
-	movi	a5, -1
-	__extl	a3, a3			# get 16-bit value
-	__exth	a6, a5			# get 16-bit mask ffffffff:ffff0000
-
-	/* Get memory address */
-
-1:
-	movi	a4, ~3
-	and	a4, a4, a8		# align memory address
-
-	/* Insert value into memory */
-
-	movi	a5, -1			# mask: ffffffff:XXXX0000
-#ifdef UNALIGNED_USER_EXCEPTION
-	addi	a4, a4, 8
-#endif
-
-	__ssa8r a8
-	__src_b	a8, a5, a6		# lo-mask  F..F0..0 (BE) 0..0F..F (LE)
-	__src_b	a6, a6, a5		# hi-mask  0..0F..F (BE) F..F0..0 (LE)
-#ifdef UNALIGNED_USER_EXCEPTION
-	l32e	a5, a4, -8
-#else
-	l32i	a5, a4, 0		# load lower address word
-#endif
-	and	a5, a5, a8		# mask
-	__sh	a8, a3 			# shift value
-	or	a5, a5, a8		# or with original value
-#ifdef UNALIGNED_USER_EXCEPTION
-	s32e	a5, a4, -8
-	l32e	a8, a4, -4
-#else
-	s32i	a5, a4, 0		# store
-	l32i	a8, a4, 4		# same for upper address word
-#endif
-	__sl	a5, a3
-	and	a6, a8, a6
-	or	a6, a6, a5
-#ifdef UNALIGNED_USER_EXCEPTION
-	s32e	a6, a4, -4
-#else
-	s32i	a6, a4, 4
-#endif
-
-.Lexit:
-#if XCHAL_HAVE_LOOPS
-	rsr	a4, lend		# check if we reached LEND
-	bne	a7, a4, 1f
-	rsr	a4, lcount		# and LCOUNT != 0
-	beqz	a4, 1f
-	addi	a4, a4, -1		# decrement LCOUNT and set
-	rsr	a7, lbeg		# set PC to LBEGIN
-	wsr	a4, lcount
-#endif
-
-1:	wsr	a7, epc1		# skip emulated instruction
-
-	/* Update icount if we're single-stepping in userspace. */
-	rsr	a4, icountlevel
-	beqz	a4, 1f
-	bgeui	a4, LOCKLEVEL + 1, 1f
-	rsr	a4, icount
-	addi	a4, a4, 1
-	wsr	a4, icount
-1:
-	movi	a4, 0
-	rsr	a3, excsave1
-	s32i	a4, a3, EXC_TABLE_FIXUP
-
-	/* Restore working register */
-
-	l32i	a8, a2, PT_AREG8
-	l32i	a7, a2, PT_AREG7
-	l32i	a6, a2, PT_AREG6
-	l32i	a5, a2, PT_AREG5
-	l32i	a4, a2, PT_AREG4
-	l32i	a3, a2, PT_AREG3
-
-	/* restore SAR and return */
-
-	wsr	a0, sar
-	l32i	a0, a2, PT_AREG0
-	l32i	a2, a2, PT_AREG2
-	rfe
-
-ENDPROC(fast_unaligned)
-
-ENTRY(fast_unaligned_fixup)
-
-	l32i	a2, a3, EXC_TABLE_DOUBLE_SAVE
-	wsr	a3, excsave1
-
-	l32i	a8, a2, PT_AREG8
-	l32i	a7, a2, PT_AREG7
-	l32i	a6, a2, PT_AREG6
-	l32i	a5, a2, PT_AREG5
-	l32i	a4, a2, PT_AREG4
-	l32i	a0, a2, PT_AREG2
-	xsr	a0, depc			# restore depc and a0
-	wsr	a0, sar
-
-	rsr	a0, exccause
-	s32i	a0, a2, PT_DEPC			# mark as a regular exception
-
-	rsr	a0, ps
-	bbsi.l  a0, PS_UM_BIT, 1f		# jump if user mode
-
-	rsr	a0, exccause
-	addx4	a0, a0, a3              	# find entry in table
-	l32i	a0, a0, EXC_TABLE_FAST_KERNEL   # load handler
-	l32i	a3, a2, PT_AREG3
-	jx	a0
-1:
-	rsr	a0, exccause
-	addx4	a0, a0, a3              	# find entry in table
-	l32i	a0, a0, EXC_TABLE_FAST_USER     # load handler
-	l32i	a3, a2, PT_AREG3
-	jx	a0
-
-ENDPROC(fast_unaligned_fixup)
-
-#endif /* XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION */
diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S
deleted file mode 100644
index d956f87fcb0955c5312ce9d20632d4dc13d64511..0000000000000000000000000000000000000000
--- a/arch/xtensa/kernel/coprocessor.S
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * arch/xtensa/kernel/coprocessor.S
- *
- * Xtensa processor configuration-specific table of coprocessor and
- * other custom register layout information.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2003 - 2007 Tensilica Inc.
- */
-
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/asmmacro.h>
-#include <asm/processor.h>
-#include <asm/coprocessor.h>
-#include <asm/thread_info.h>
-#include <asm/asm-uaccess.h>
-#include <asm/unistd.h>
-#include <asm/ptrace.h>
-#include <asm/current.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/signal.h>
-#include <asm/tlbflush.h>
-
-#if XTENSA_HAVE_COPROCESSORS
-
-/*
- * Macros for lazy context switch. 
- */
-
-#define SAVE_CP_REGS(x)							\
-	.if XTENSA_HAVE_COPROCESSOR(x);					\
-		.align 4;						\
-	.Lsave_cp_regs_cp##x:						\
-		xchal_cp##x##_store a2 a4 a5 a6 a7;			\
-		jx	a0;						\
-	.endif
-
-#define SAVE_CP_REGS_TAB(x)						\
-	.if XTENSA_HAVE_COPROCESSOR(x);					\
-		.long .Lsave_cp_regs_cp##x;				\
-	.else;								\
-		.long 0;						\
-	.endif;								\
-	.long THREAD_XTREGS_CP##x
-
-
-#define LOAD_CP_REGS(x)							\
-	.if XTENSA_HAVE_COPROCESSOR(x);					\
-		.align 4;						\
-	.Lload_cp_regs_cp##x:						\
-		xchal_cp##x##_load a2 a4 a5 a6 a7;			\
-		jx	a0;						\
-	.endif
-
-#define LOAD_CP_REGS_TAB(x)						\
-	.if XTENSA_HAVE_COPROCESSOR(x);					\
-		.long .Lload_cp_regs_cp##x;				\
-	.else;								\
-		.long 0;						\
-	.endif;								\
-	.long THREAD_XTREGS_CP##x
-
-	SAVE_CP_REGS(0)
-	SAVE_CP_REGS(1)
-	SAVE_CP_REGS(2)
-	SAVE_CP_REGS(3)
-	SAVE_CP_REGS(4)
-	SAVE_CP_REGS(5)
-	SAVE_CP_REGS(6)
-	SAVE_CP_REGS(7)
-
-	LOAD_CP_REGS(0)
-	LOAD_CP_REGS(1)
-	LOAD_CP_REGS(2)
-	LOAD_CP_REGS(3)
-	LOAD_CP_REGS(4)
-	LOAD_CP_REGS(5)
-	LOAD_CP_REGS(6)
-	LOAD_CP_REGS(7)
-
-	.section ".rodata", "a"
-	.align 4
-.Lsave_cp_regs_jump_table:
-	SAVE_CP_REGS_TAB(0)
-	SAVE_CP_REGS_TAB(1)
-	SAVE_CP_REGS_TAB(2)
-	SAVE_CP_REGS_TAB(3)
-	SAVE_CP_REGS_TAB(4)
-	SAVE_CP_REGS_TAB(5)
-	SAVE_CP_REGS_TAB(6)
-	SAVE_CP_REGS_TAB(7)
-
-.Lload_cp_regs_jump_table:
-	LOAD_CP_REGS_TAB(0)
-	LOAD_CP_REGS_TAB(1)
-	LOAD_CP_REGS_TAB(2)
-	LOAD_CP_REGS_TAB(3)
-	LOAD_CP_REGS_TAB(4)
-	LOAD_CP_REGS_TAB(5)
-	LOAD_CP_REGS_TAB(6)
-	LOAD_CP_REGS_TAB(7)
-
-	.previous
-
-/*
- * Entry condition:
- *
- *   a0:	trashed, original value saved on stack (PT_AREG0)
- *   a1:	a1
- *   a2:	new stack pointer, original in DEPC
- *   a3:	a3
- *   depc:	a2, original value saved on stack (PT_DEPC)
- *   excsave_1:	dispatch table
- *
- *   PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
- *	     <  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
- */
-
-ENTRY(fast_coprocessor_double)
-
-	wsr	a0, excsave1
-	call0	unrecoverable_exception
-
-ENDPROC(fast_coprocessor_double)
-
-ENTRY(fast_coprocessor)
-
-	/* Save remaining registers a1-a3 and SAR */
-
-	s32i	a3, a2, PT_AREG3
-	rsr	a3, sar
-	s32i	a1, a2, PT_AREG1
-	s32i	a3, a2, PT_SAR
-	mov	a1, a2
-	rsr	a2, depc
-	s32i	a2, a1, PT_AREG2
-
-	/*
-	 * The hal macros require up to 4 temporary registers. We use a3..a6.
-	 */
-
-	s32i	a4, a1, PT_AREG4
-	s32i	a5, a1, PT_AREG5
-	s32i	a6, a1, PT_AREG6
-
-	/* Find coprocessor number. Subtract first CP EXCCAUSE from EXCCAUSE */
-
-	rsr	a3, exccause
-	addi	a3, a3, -EXCCAUSE_COPROCESSOR0_DISABLED
-
-	/* Set corresponding CPENABLE bit -> (sar:cp-index, a3: 1<<cp-index)*/
-
-	ssl	a3			# SAR: 32 - coprocessor_number
-	movi	a2, 1
-	rsr	a0, cpenable
-	sll	a2, a2
-	or	a0, a0, a2
-	wsr	a0, cpenable
-	rsync
-
-	/* Retrieve previous owner. (a3 still holds CP number) */
-
-	movi	a0, coprocessor_owner	# list of owners
-	addx4	a0, a3, a0		# entry for CP
-	l32i	a4, a0, 0
-
-	beqz	a4, 1f			# skip 'save' if no previous owner
-
-	/* Disable coprocessor for previous owner. (a2 = 1 << CP number) */
-
-	l32i	a5, a4, THREAD_CPENABLE
-	xor	a5, a5, a2		# (1 << cp-id) still in a2
-	s32i	a5, a4, THREAD_CPENABLE
-
-	/*
-	 * Get context save area and 'call' save routine. 
-	 * (a4 still holds previous owner (thread_info), a3 CP number)
-	 */
-
-	movi	a5, .Lsave_cp_regs_jump_table
-	movi	a0, 2f			# a0: 'return' address
-	addx8	a3, a3, a5		# a3: coprocessor number
-	l32i	a2, a3, 4		# a2: xtregs offset
-	l32i	a3, a3, 0		# a3: jump address
-	add	a2, a2, a4
-	jx	a3
-
-	/* Note that only a0 and a1 were preserved. */
-
-2:	rsr	a3, exccause
-	addi	a3, a3, -EXCCAUSE_COPROCESSOR0_DISABLED
-	movi	a0, coprocessor_owner
-	addx4	a0, a3, a0
-
-	/* Set new 'owner' (a0 points to the CP owner, a3 contains the CP nr) */
-
-1:	GET_THREAD_INFO (a4, a1)
-	s32i	a4, a0, 0
-
-	/* Get context save area and 'call' load routine. */
-
-	movi	a5, .Lload_cp_regs_jump_table
-	movi	a0, 1f
-	addx8	a3, a3, a5
-	l32i	a2, a3, 4		# a2: xtregs offset
-	l32i	a3, a3, 0		# a3: jump address
-	add	a2, a2, a4
-	jx	a3
-
-	/* Restore all registers and return from exception handler. */
-
-1:	l32i	a6, a1, PT_AREG6
-	l32i	a5, a1, PT_AREG5
-	l32i	a4, a1, PT_AREG4
-
-	l32i	a0, a1, PT_SAR
-	l32i	a3, a1, PT_AREG3
-	l32i	a2, a1, PT_AREG2
-	wsr	a0, sar
-	l32i	a0, a1, PT_AREG0
-	l32i	a1, a1, PT_AREG1
-
-	rfe
-
-ENDPROC(fast_coprocessor)
-
-	.text
-
-/*
- * coprocessor_flush(struct thread_info*, index)
- *                             a2        a3
- *
- * Save coprocessor registers for coprocessor 'index'.
- * The register values are saved to or loaded from the coprocessor area
- * inside the task_info structure.
- *
- * Note that this function doesn't update the coprocessor_owner information!
- *
- */
-
-ENTRY(coprocessor_flush)
-
-	/* reserve 4 bytes on stack to save a0 */
-	abi_entry(4)
-
-	s32i	a0, a1, 0
-	movi	a0, .Lsave_cp_regs_jump_table
-	addx8	a3, a3, a0
-	l32i	a4, a3, 4
-	l32i	a3, a3, 0
-	add	a2, a2, a4
-	beqz	a3, 1f
-	callx0	a3
-1:	l32i	a0, a1, 0
-
-	abi_ret(4)
-
-ENDPROC(coprocessor_flush)
-
-	.data
-
-ENTRY(coprocessor_owner)
-
-	.fill XCHAL_CP_MAX, 4, 0
-
-END(coprocessor_owner)
-
-#endif /* XTENSA_HAVE_COPROCESSORS */
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
deleted file mode 100644
index 1f07876ea2ed7a8c59880acb29b0db6d07719616..0000000000000000000000000000000000000000
--- a/arch/xtensa/kernel/entry.S
+++ /dev/null
@@ -1,2092 +0,0 @@
-/*
- * Low-level exception handling
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2004 - 2008 by Tensilica Inc.
- * Copyright (C) 2015 Cadence Design Systems Inc.
- *
- * Chris Zankel <chris@zankel.net>
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/asmmacro.h>
-#include <asm/processor.h>
-#include <asm/coprocessor.h>
-#include <asm/thread_info.h>
-#include <asm/asm-uaccess.h>
-#include <asm/unistd.h>
-#include <asm/ptrace.h>
-#include <asm/current.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/signal.h>
-#include <asm/tlbflush.h>
-#include <variant/tie-asm.h>
-
-/* Unimplemented features. */
-
-#undef KERNEL_STACK_OVERFLOW_CHECK
-
-/* Not well tested.
- *
- * - fast_coprocessor
- */
-
-/*
- * Macro to find first bit set in WINDOWBASE from the left + 1
- *
- * 100....0 -> 1
- * 010....0 -> 2
- * 000....1 -> WSBITS
- */
-
-	.macro ffs_ws bit mask
-
-#if XCHAL_HAVE_NSA
-	nsau    \bit, \mask			# 32-WSBITS ... 31 (32 iff 0)
-	addi    \bit, \bit, WSBITS - 32 + 1   	# uppest bit set -> return 1
-#else
-	movi    \bit, WSBITS
-#if WSBITS > 16
-	_bltui  \mask, 0x10000, 99f
-	addi    \bit, \bit, -16
-	extui   \mask, \mask, 16, 16
-#endif
-#if WSBITS > 8
-99:	_bltui  \mask, 0x100, 99f
-	addi    \bit, \bit, -8
-	srli    \mask, \mask, 8
-#endif
-99:	_bltui  \mask, 0x10, 99f
-	addi    \bit, \bit, -4
-	srli    \mask, \mask, 4
-99:	_bltui  \mask, 0x4, 99f
-	addi    \bit, \bit, -2
-	srli    \mask, \mask, 2
-99:	_bltui  \mask, 0x2, 99f
-	addi    \bit, \bit, -1
-99:
-
-#endif
-	.endm
-
-
-	.macro	irq_save flags tmp
-#if XTENSA_FAKE_NMI
-#if defined(CONFIG_DEBUG_KERNEL) && (LOCKLEVEL | TOPLEVEL) >= XCHAL_DEBUGLEVEL
-	rsr	\flags, ps
-	extui	\tmp, \flags, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
-	bgei	\tmp, LOCKLEVEL, 99f
-	rsil	\tmp, LOCKLEVEL
-99:
-#else
-	movi	\tmp, LOCKLEVEL
-	rsr	\flags, ps
-	or	\flags, \flags, \tmp
-	xsr	\flags, ps
-	rsync
-#endif
-#else
-	rsil	\flags, LOCKLEVEL
-#endif
-	.endm
-
-/* ----------------- DEFAULT FIRST LEVEL EXCEPTION HANDLERS ----------------- */
-
-/*
- * First-level exception handler for user exceptions.
- * Save some special registers, extra states and all registers in the AR
- * register file that were in use in the user task, and jump to the common
- * exception code.
- * We save SAR (used to calculate WMASK), and WB and WS (we don't have to
- * save them for kernel exceptions).
- *
- * Entry condition for user_exception:
- *
- *   a0:	trashed, original value saved on stack (PT_AREG0)
- *   a1:	a1
- *   a2:	new stack pointer, original value in depc
- *   a3:	a3
- *   depc:	a2, original value saved on stack (PT_DEPC)
- *   excsave1:	dispatch table
- *
- *   PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
- *	     <  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
- *
- * Entry condition for _user_exception:
- *
- *   a0-a3 and depc have been saved to PT_AREG0...PT_AREG3 and PT_DEPC
- *   excsave has been restored, and
- *   stack pointer (a1) has been set.
- *
- * Note: _user_exception might be at an odd address. Don't use call0..call12
- */
-	.literal_position
-
-ENTRY(user_exception)
-
-	/* Save a1, a2, a3, and set SP. */
-
-	rsr	a0, depc
-	s32i	a1, a2, PT_AREG1
-	s32i	a0, a2, PT_AREG2
-	s32i	a3, a2, PT_AREG3
-	mov	a1, a2
-
-	.globl _user_exception
-_user_exception:
-
-	/* Save SAR and turn off single stepping */
-
-	movi	a2, 0
-	wsr	a2, depc		# terminate user stack trace with 0
-	rsr	a3, sar
-	xsr	a2, icountlevel
-	s32i	a3, a1, PT_SAR
-	s32i	a2, a1, PT_ICOUNTLEVEL
-
-#if XCHAL_HAVE_THREADPTR
-	rur	a2, threadptr
-	s32i	a2, a1, PT_THREADPTR
-#endif
-
-	/* Rotate ws so that the current windowbase is at bit0. */
-	/* Assume ws = xxwww1yyyy. Rotate ws right, so that a2 = yyyyxxwww1 */
-
-	rsr	a2, windowbase
-	rsr	a3, windowstart
-	ssr	a2
-	s32i	a2, a1, PT_WINDOWBASE
-	s32i	a3, a1, PT_WINDOWSTART
-	slli	a2, a3, 32-WSBITS
-	src	a2, a3, a2
-	srli	a2, a2, 32-WSBITS
-	s32i	a2, a1, PT_WMASK	# needed for restoring registers
-
-	/* Save only live registers. */
-
-	_bbsi.l	a2, 1, 1f
-	s32i	a4, a1, PT_AREG4
-	s32i	a5, a1, PT_AREG5
-	s32i	a6, a1, PT_AREG6
-	s32i	a7, a1, PT_AREG7
-	_bbsi.l	a2, 2, 1f
-	s32i	a8, a1, PT_AREG8
-	s32i	a9, a1, PT_AREG9
-	s32i	a10, a1, PT_AREG10
-	s32i	a11, a1, PT_AREG11
-	_bbsi.l	a2, 3, 1f
-	s32i	a12, a1, PT_AREG12
-	s32i	a13, a1, PT_AREG13
-	s32i	a14, a1, PT_AREG14
-	s32i	a15, a1, PT_AREG15
-	_bnei	a2, 1, 1f		# only one valid frame?
-
-	/* Only one valid frame, skip saving regs. */
-
-	j	2f
-
-	/* Save the remaining registers.
-	 * We have to save all registers up to the first '1' from
-	 * the right, except the current frame (bit 0).
-	 * Assume a2 is:  001001000110001
-	 * All register frames starting from the top field to the marked '1'
-	 * must be saved.
-	 */
-
-1:	addi	a3, a2, -1		# eliminate '1' in bit 0: yyyyxxww0
-	neg	a3, a3			# yyyyxxww0 -> YYYYXXWW1+1
-	and	a3, a3, a2		# max. only one bit is set
-
-	/* Find number of frames to save */
-
-	ffs_ws	a0, a3			# number of frames to the '1' from left
-
-	/* Store information into WMASK:
-	 * bits 0..3: xxx1 masked lower 4 bits of the rotated windowstart,
-	 * bits 4...: number of valid 4-register frames
-	 */
-
-	slli	a3, a0, 4		# number of frames to save in bits 8..4
-	extui	a2, a2, 0, 4		# mask for the first 16 registers
-	or	a2, a3, a2
-	s32i	a2, a1, PT_WMASK	# needed when we restore the reg-file
-
-	/* Save 4 registers at a time */
-
-1:	rotw	-1
-	s32i	a0, a5, PT_AREG_END - 16
-	s32i	a1, a5, PT_AREG_END - 12
-	s32i	a2, a5, PT_AREG_END - 8
-	s32i	a3, a5, PT_AREG_END - 4
-	addi	a0, a4, -1
-	addi	a1, a5, -16
-	_bnez	a0, 1b
-
-	/* WINDOWBASE still in SAR! */
-
-	rsr	a2, sar			# original WINDOWBASE
-	movi	a3, 1
-	ssl	a2
-	sll	a3, a3
-	wsr	a3, windowstart		# set corresponding WINDOWSTART bit
-	wsr	a2, windowbase		# and WINDOWSTART
-	rsync
-
-	/* We are back to the original stack pointer (a1) */
-
-2:	/* Now, jump to the common exception handler. */
-
-	j	common_exception
-
-ENDPROC(user_exception)
-
-/*
- * First-level exit handler for kernel exceptions
- * Save special registers and the live window frame.
- * Note: Even though we changes the stack pointer, we don't have to do a
- *	 MOVSP here, as we do that when we return from the exception.
- *	 (See comment in the kernel exception exit code)
- *
- * Entry condition for kernel_exception:
- *
- *   a0:	trashed, original value saved on stack (PT_AREG0)
- *   a1:	a1
- *   a2:	new stack pointer, original in DEPC
- *   a3:	a3
- *   depc:	a2, original value saved on stack (PT_DEPC)
- *   excsave_1:	dispatch table
- *
- *   PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
- *	     <  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
- *
- * Entry condition for _kernel_exception:
- *
- *   a0-a3 and depc have been saved to PT_AREG0...PT_AREG3 and PT_DEPC
- *   excsave has been restored, and
- *   stack pointer (a1) has been set.
- *
- * Note: _kernel_exception might be at an odd address. Don't use call0..call12
- */
-
-ENTRY(kernel_exception)
-
-	/* Save a1, a2, a3, and set SP. */
-
-	rsr	a0, depc		# get a2
-	s32i	a1, a2, PT_AREG1
-	s32i	a0, a2, PT_AREG2
-	s32i	a3, a2, PT_AREG3
-	mov	a1, a2
-
-	.globl _kernel_exception
-_kernel_exception:
-
-	/* Save SAR and turn off single stepping */
-
-	movi	a2, 0
-	rsr	a3, sar
-	xsr	a2, icountlevel
-	s32i	a3, a1, PT_SAR
-	s32i	a2, a1, PT_ICOUNTLEVEL
-
-	/* Rotate ws so that the current windowbase is at bit0. */
-	/* Assume ws = xxwww1yyyy. Rotate ws right, so that a2 = yyyyxxwww1 */
-
-	rsr	a2, windowbase		# don't need to save these, we only
-	rsr	a3, windowstart		# need shifted windowstart: windowmask
-	ssr	a2
-	slli	a2, a3, 32-WSBITS
-	src	a2, a3, a2
-	srli	a2, a2, 32-WSBITS
-	s32i	a2, a1, PT_WMASK	# needed for kernel_exception_exit
-
-	/* Save only the live window-frame */
-
-	_bbsi.l	a2, 1, 1f
-	s32i	a4, a1, PT_AREG4
-	s32i	a5, a1, PT_AREG5
-	s32i	a6, a1, PT_AREG6
-	s32i	a7, a1, PT_AREG7
-	_bbsi.l	a2, 2, 1f
-	s32i	a8, a1, PT_AREG8
-	s32i	a9, a1, PT_AREG9
-	s32i	a10, a1, PT_AREG10
-	s32i	a11, a1, PT_AREG11
-	_bbsi.l	a2, 3, 1f
-	s32i	a12, a1, PT_AREG12
-	s32i	a13, a1, PT_AREG13
-	s32i	a14, a1, PT_AREG14
-	s32i	a15, a1, PT_AREG15
-
-	_bnei	a2, 1, 1f
-
-	/* Copy spill slots of a0 and a1 to imitate movsp
-	 * in order to keep exception stack continuous
-	 */
-	l32i	a3, a1, PT_SIZE
-	l32i	a0, a1, PT_SIZE + 4
-	s32e	a3, a1, -16
-	s32e	a0, a1, -12
-1:
-	l32i	a0, a1, PT_AREG0	# restore saved a0
-	wsr	a0, depc
-
-#ifdef KERNEL_STACK_OVERFLOW_CHECK
-
-	/*  Stack overflow check, for debugging  */
-	extui	a2, a1, TASK_SIZE_BITS,XX
-	movi	a3, SIZE??
-	_bge	a2, a3, out_of_stack_panic
-
-#endif
-
-/*
- * This is the common exception handler.
- * We get here from the user exception handler or simply by falling through
- * from the kernel exception handler.
- * Save the remaining special registers, switch to kernel mode, and jump
- * to the second-level exception handler.
- *
- */
-
-common_exception:
-
-	/* Save some registers, disable loops and clear the syscall flag. */
-
-	rsr	a2, debugcause
-	rsr	a3, epc1
-	s32i	a2, a1, PT_DEBUGCAUSE
-	s32i	a3, a1, PT_PC
-
-	movi	a2, NO_SYSCALL
-	rsr	a3, excvaddr
-	s32i	a2, a1, PT_SYSCALL
-	movi	a2, 0
-	s32i	a3, a1, PT_EXCVADDR
-#if XCHAL_HAVE_LOOPS
-	xsr	a2, lcount
-	s32i	a2, a1, PT_LCOUNT
-#endif
-
-#if XCHAL_HAVE_EXCLUSIVE
-	/* Clear exclusive access monitor set by interrupted code */
-	clrex
-#endif
-
-	/* It is now save to restore the EXC_TABLE_FIXUP variable. */
-
-	rsr	a2, exccause
-	movi	a3, 0
-	rsr	a0, excsave1
-	s32i	a2, a1, PT_EXCCAUSE
-	s32i	a3, a0, EXC_TABLE_FIXUP
-
-	/* All unrecoverable states are saved on stack, now, and a1 is valid.
-	 * Now we can allow exceptions again. In case we've got an interrupt
-	 * PS.INTLEVEL is set to LOCKLEVEL disabling furhter interrupts,
-	 * otherwise it's left unchanged.
-	 *
-	 * Set PS(EXCM = 0, UM = 0, RING = 0, OWB = 0, WOE = 1, INTLEVEL = X)
-	 */
-
-	rsr	a3, ps
-	s32i	a3, a1, PT_PS		# save ps
-
-#if XTENSA_FAKE_NMI
-	/* Correct PS needs to be saved in the PT_PS:
-	 * - in case of exception or level-1 interrupt it's in the PS,
-	 *   and is already saved.
-	 * - in case of medium level interrupt it's in the excsave2.
-	 */
-	movi	a0, EXCCAUSE_MAPPED_NMI
-	extui	a3, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
-	beq	a2, a0, .Lmedium_level_irq
-	bnei	a2, EXCCAUSE_LEVEL1_INTERRUPT, .Lexception
-	beqz	a3, .Llevel1_irq	# level-1 IRQ sets ps.intlevel to 0
-
-.Lmedium_level_irq:
-	rsr	a0, excsave2
-	s32i	a0, a1, PT_PS		# save medium-level interrupt ps
-	bgei	a3, LOCKLEVEL, .Lexception
-
-.Llevel1_irq:
-	movi	a3, LOCKLEVEL
-
-.Lexception:
-	movi	a0, PS_WOE_MASK
-	or	a3, a3, a0
-#else
-	addi	a2, a2, -EXCCAUSE_LEVEL1_INTERRUPT
-	movi	a0, LOCKLEVEL
-	extui	a3, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
-					# a3 = PS.INTLEVEL
-	moveqz	a3, a0, a2		# a3 = LOCKLEVEL iff interrupt
-	movi	a2, PS_WOE_MASK
-	or	a3, a3, a2
-	rsr	a2, exccause
-#endif
-
-	/* restore return address (or 0 if return to userspace) */
-	rsr	a0, depc
-	wsr	a3, ps
-	rsync				# PS.WOE => rsync => overflow
-
-	/* Save lbeg, lend */
-#if XCHAL_HAVE_LOOPS
-	rsr	a4, lbeg
-	rsr	a3, lend
-	s32i	a4, a1, PT_LBEG
-	s32i	a3, a1, PT_LEND
-#endif
-
-	/* Save SCOMPARE1 */
-
-#if XCHAL_HAVE_S32C1I
-	rsr     a3, scompare1
-	s32i    a3, a1, PT_SCOMPARE1
-#endif
-
-	/* Save optional registers. */
-
-	save_xtregs_opt a1 a3 a4 a5 a6 a7 PT_XTREGS_OPT
-	
-	/* Go to second-level dispatcher. Set up parameters to pass to the
-	 * exception handler and call the exception handler.
-	 */
-
-	rsr	a4, excsave1
-	mov	a6, a1			# pass stack frame
-	mov	a7, a2			# pass EXCCAUSE
-	addx4	a4, a2, a4
-	l32i	a4, a4, EXC_TABLE_DEFAULT		# load handler
-
-	/* Call the second-level handler */
-
-	callx4	a4
-
-	/* Jump here for exception exit */
-	.global common_exception_return
-common_exception_return:
-
-#if XTENSA_FAKE_NMI
-	l32i	a2, a1, PT_EXCCAUSE
-	movi	a3, EXCCAUSE_MAPPED_NMI
-	beq	a2, a3, .LNMIexit
-#endif
-1:
-	irq_save a2, a3
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call4	trace_hardirqs_off
-#endif
-
-	/* Jump if we are returning from kernel exceptions. */
-
-	l32i	a3, a1, PT_PS
-	GET_THREAD_INFO(a2, a1)
-	l32i	a4, a2, TI_FLAGS
-	_bbci.l	a3, PS_UM_BIT, 6f
-
-	/* Specific to a user exception exit:
-	 * We need to check some flags for signal handling and rescheduling,
-	 * and have to restore WB and WS, extra states, and all registers
-	 * in the register file that were in use in the user task.
-	 * Note that we don't disable interrupts here. 
-	 */
-
-	_bbsi.l	a4, TIF_NEED_RESCHED, 3f
-	_bbsi.l	a4, TIF_NOTIFY_RESUME, 2f
-	_bbci.l	a4, TIF_SIGPENDING, 5f
-
-2:	l32i	a4, a1, PT_DEPC
-	bgeui	a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 4f
-
-	/* Call do_signal() */
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call4	trace_hardirqs_on
-#endif
-	rsil	a2, 0
-	mov	a6, a1
-	call4	do_notify_resume	# int do_notify_resume(struct pt_regs*)
-	j	1b
-
-3:	/* Reschedule */
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-	call4	trace_hardirqs_on
-#endif
-	rsil	a2, 0
-	call4	schedule	# void schedule (void)
-	j	1b
-
-#ifdef CONFIG_PREEMPT
-6:
-	_bbci.l	a4, TIF_NEED_RESCHED, 4f
-
-	/* Check current_thread_info->preempt_count */
-
-	l32i	a4, a2, TI_PRE_COUNT
-	bnez	a4, 4f
-	call4	preempt_schedule_irq
-	j	1b
-#endif
-
-#if XTENSA_FAKE_NMI
-.LNMIexit:
-	l32i	a3, a1, PT_PS
-	_bbci.l	a3, PS_UM_BIT, 4f
-#endif
-
-5:
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
-	_bbci.l	a4, TIF_DB_DISABLED, 7f
-	call4	restore_dbreak
-7:
-#endif
-#ifdef CONFIG_DEBUG_TLB_SANITY
-	l32i	a4, a1, PT_DEPC
-	bgeui	a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 4f
-	call4	check_tlb_sanity
-#endif
-6:
-4:
-#ifdef CONFIG_TRACE_IRQFLAGS
-	extui	a4, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
-	bgei	a4, LOCKLEVEL, 1f
-	call4	trace_hardirqs_on
-1:
-#endif
-	/* Restore optional registers. */
-
-	load_xtregs_opt a1 a2 a4 a5 a6 a7 PT_XTREGS_OPT
-
-	/* Restore SCOMPARE1 */
-
-#if XCHAL_HAVE_S32C1I
-	l32i    a2, a1, PT_SCOMPARE1
-	wsr     a2, scompare1
-#endif
-	wsr	a3, ps		/* disable interrupts */
-
-	_bbci.l	a3, PS_UM_BIT, kernel_exception_exit
-
-user_exception_exit:
-
-	/* Restore the state of the task and return from the exception. */
-
-	/* Switch to the user thread WINDOWBASE. Save SP temporarily in DEPC */
-
-	l32i	a2, a1, PT_WINDOWBASE
-	l32i	a3, a1, PT_WINDOWSTART
-	wsr	a1, depc		# use DEPC as temp storage
-	wsr	a3, windowstart		# restore WINDOWSTART
-	ssr	a2			# preserve user's WB in the SAR
-	wsr	a2, windowbase		# switch to user's saved WB
-	rsync
-	rsr	a1, depc		# restore stack pointer
-	l32i	a2, a1, PT_WMASK	# register frames saved (in bits 4...9)
-	rotw	-1			# we restore a4..a7
-	_bltui	a6, 16, 1f		# only have to restore current window?
-
-	/* The working registers are a0 and a3.  We are restoring to
-	 * a4..a7.  Be careful not to destroy what we have just restored.
-	 * Note: wmask has the format YYYYM:
-	 *       Y: number of registers saved in groups of 4
-	 *       M: 4 bit mask of first 16 registers
-	 */
-
-	mov	a2, a6
-	mov	a3, a5
-
-2:	rotw	-1			# a0..a3 become a4..a7
-	addi	a3, a7, -4*4		# next iteration
-	addi	a2, a6, -16		# decrementing Y in WMASK
-	l32i	a4, a3, PT_AREG_END + 0
-	l32i	a5, a3, PT_AREG_END + 4
-	l32i	a6, a3, PT_AREG_END + 8
-	l32i	a7, a3, PT_AREG_END + 12
-	_bgeui	a2, 16, 2b
-
-	/* Clear unrestored registers (don't leak anything to user-land */
-
-1:	rsr	a0, windowbase
-	rsr	a3, sar
-	sub	a3, a0, a3
-	beqz	a3, 2f
-	extui	a3, a3, 0, WBBITS
-
-1:	rotw	-1
-	addi	a3, a7, -1
-	movi	a4, 0
-	movi	a5, 0
-	movi	a6, 0
-	movi	a7, 0
-	bgei	a3, 1, 1b
-
-	/* We are back were we were when we started.
-	 * Note: a2 still contains WMASK (if we've returned to the original
-	 *	 frame where we had loaded a2), or at least the lower 4 bits
-	 *	 (if we have restored WSBITS-1 frames).
-	 */
-
-2:
-#if XCHAL_HAVE_THREADPTR
-	l32i	a3, a1, PT_THREADPTR
-	wur	a3, threadptr
-#endif
-
-	j	common_exception_exit
-
-	/* This is the kernel exception exit.
-	 * We avoided to do a MOVSP when we entered the exception, but we
-	 * have to do it here.
-	 */
-
-kernel_exception_exit:
-
-	/* Check if we have to do a movsp.
-	 *
-	 * We only have to do a movsp if the previous window-frame has
-	 * been spilled to the *temporary* exception stack instead of the
-	 * task's stack. This is the case if the corresponding bit in
-	 * WINDOWSTART for the previous window-frame was set before
-	 * (not spilled) but is zero now (spilled).
-	 * If this bit is zero, all other bits except the one for the
-	 * current window frame are also zero. So, we can use a simple test:
-	 * 'and' WINDOWSTART and WINDOWSTART-1:
-	 *
-	 *  (XXXXXX1[0]* - 1) AND XXXXXX1[0]* = XXXXXX0[0]*
-	 *
-	 * The result is zero only if one bit was set.
-	 *
-	 * (Note: We might have gone through several task switches before
-	 *        we come back to the current task, so WINDOWBASE might be
-	 *        different from the time the exception occurred.)
-	 */
-
-	/* Test WINDOWSTART before and after the exception.
-	 * We actually have WMASK, so we only have to test if it is 1 or not.
-	 */
-
-	l32i	a2, a1, PT_WMASK
-	_beqi	a2, 1, common_exception_exit	# Spilled before exception,jump
-
-	/* Test WINDOWSTART now. If spilled, do the movsp */
-
-	rsr     a3, windowstart
-	addi	a0, a3, -1
-	and     a3, a3, a0
-	_bnez	a3, common_exception_exit
-
-	/* Do a movsp (we returned from a call4, so we have at least a0..a7) */
-
-	addi    a0, a1, -16
-	l32i    a3, a0, 0
-	l32i    a4, a0, 4
-	s32i    a3, a1, PT_SIZE+0
-	s32i    a4, a1, PT_SIZE+4
-	l32i    a3, a0, 8
-	l32i    a4, a0, 12
-	s32i    a3, a1, PT_SIZE+8
-	s32i    a4, a1, PT_SIZE+12
-
-	/* Common exception exit.
-	 * We restore the special register and the current window frame, and
-	 * return from the exception.
-	 *
-	 * Note: We expect a2 to hold PT_WMASK
-	 */
-
-common_exception_exit:
-
-	/* Restore address registers. */
-
-	_bbsi.l	a2, 1, 1f
-	l32i	a4,  a1, PT_AREG4
-	l32i	a5,  a1, PT_AREG5
-	l32i	a6,  a1, PT_AREG6
-	l32i	a7,  a1, PT_AREG7
-	_bbsi.l	a2, 2, 1f
-	l32i	a8,  a1, PT_AREG8
-	l32i	a9,  a1, PT_AREG9
-	l32i	a10, a1, PT_AREG10
-	l32i	a11, a1, PT_AREG11
-	_bbsi.l	a2, 3, 1f
-	l32i	a12, a1, PT_AREG12
-	l32i	a13, a1, PT_AREG13
-	l32i	a14, a1, PT_AREG14
-	l32i	a15, a1, PT_AREG15
-
-	/* Restore PC, SAR */
-
-1:	l32i	a2, a1, PT_PC
-	l32i	a3, a1, PT_SAR
-	wsr	a2, epc1
-	wsr	a3, sar
-
-	/* Restore LBEG, LEND, LCOUNT */
-#if XCHAL_HAVE_LOOPS
-	l32i	a2, a1, PT_LBEG
-	l32i	a3, a1, PT_LEND
-	wsr	a2, lbeg
-	l32i	a2, a1, PT_LCOUNT
-	wsr	a3, lend
-	wsr	a2, lcount
-#endif
-
-	/* We control single stepping through the ICOUNTLEVEL register. */
-
-	l32i	a2, a1, PT_ICOUNTLEVEL
-	movi	a3, -2
-	wsr	a2, icountlevel
-	wsr	a3, icount
-
-	/* Check if it was double exception. */
-
-	l32i	a0, a1, PT_DEPC
-	l32i	a3, a1, PT_AREG3
-	l32i	a2, a1, PT_AREG2
-	_bgeui	a0, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
-
-	/* Restore a0...a3 and return */
-
-	l32i	a0, a1, PT_AREG0
-	l32i	a1, a1, PT_AREG1
-	rfe
-
-1: 	wsr	a0, depc
-	l32i	a0, a1, PT_AREG0
-	l32i	a1, a1, PT_AREG1
-	rfde
-
-ENDPROC(kernel_exception)
-
-/*
- * Debug exception handler.
- *
- * Currently, we don't support KGDB, so only user application can be debugged.
- *
- * When we get here,  a0 is trashed and saved to excsave[debuglevel]
- */
-
-	.literal_position
-
-ENTRY(debug_exception)
-
-	rsr	a0, SREG_EPS + XCHAL_DEBUGLEVEL
-	bbsi.l	a0, PS_EXCM_BIT, 1f	# exception mode
-
-	/* Set EPC1 and EXCCAUSE */
-
-	wsr	a2, depc		# save a2 temporarily
-	rsr	a2, SREG_EPC + XCHAL_DEBUGLEVEL
-	wsr	a2, epc1
-
-	movi	a2, EXCCAUSE_MAPPED_DEBUG
-	wsr	a2, exccause
-
-	/* Restore PS to the value before the debug exc but with PS.EXCM set.*/
-
-	movi	a2, 1 << PS_EXCM_BIT
-	or	a2, a0, a2
-	wsr	a2, ps
-
-	/* Switch to kernel/user stack, restore jump vector, and save a0 */
-
-	bbsi.l	a2, PS_UM_BIT, 2f	# jump if user mode
-
-	addi	a2, a1, -16-PT_SIZE	# assume kernel stack
-3:
-	l32i	a0, a3, DT_DEBUG_SAVE
-	s32i	a1, a2, PT_AREG1
-	s32i	a0, a2, PT_AREG0
-	movi	a0, 0
-	s32i	a0, a2, PT_DEPC		# mark it as a regular exception
-	xsr	a3, SREG_EXCSAVE + XCHAL_DEBUGLEVEL
-	xsr	a0, depc
-	s32i	a3, a2, PT_AREG3
-	s32i	a0, a2, PT_AREG2
-	mov	a1, a2
-
-	/* Debug exception is handled as an exception, so interrupts will
-	 * likely be enabled in the common exception handler. Disable
-	 * preemption if we have HW breakpoints to preserve DEBUGCAUSE.DBNUM
-	 * meaning.
-	 */
-#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_HAVE_HW_BREAKPOINT)
-	GET_THREAD_INFO(a2, a1)
-	l32i	a3, a2, TI_PRE_COUNT
-	addi	a3, a3, 1
-	s32i	a3, a2, TI_PRE_COUNT
-#endif
-
-	rsr	a2, ps
-	bbsi.l	a2, PS_UM_BIT, _user_exception
-	j	_kernel_exception
-
-2:	rsr	a2, excsave1
-	l32i	a2, a2, EXC_TABLE_KSTK	# load kernel stack pointer
-	j	3b
-
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
-	/* Debug exception while in exception mode. This may happen when
-	 * window overflow/underflow handler or fast exception handler hits
-	 * data breakpoint, in which case save and disable all data
-	 * breakpoints, single-step faulting instruction and restore data
-	 * breakpoints.
-	 */
-1:
-	bbci.l	a0, PS_UM_BIT, 1b	# jump if kernel mode
-
-	rsr	a0, debugcause
-	bbsi.l	a0, DEBUGCAUSE_DBREAK_BIT, .Ldebug_save_dbreak
-
-	.set	_index, 0
-	.rept	XCHAL_NUM_DBREAK
-	l32i	a0, a3, DT_DBREAKC_SAVE + _index * 4
-	wsr	a0, SREG_DBREAKC + _index
-	.set	_index, _index + 1
-	.endr
-
-	l32i	a0, a3, DT_ICOUNT_LEVEL_SAVE
-	wsr	a0, icountlevel
-
-	l32i	a0, a3, DT_ICOUNT_SAVE
-	xsr	a0, icount
-
-	l32i	a0, a3, DT_DEBUG_SAVE
-	xsr	a3, SREG_EXCSAVE + XCHAL_DEBUGLEVEL
-	rfi	XCHAL_DEBUGLEVEL
-
-.Ldebug_save_dbreak:
-	.set	_index, 0
-	.rept	XCHAL_NUM_DBREAK
-	movi	a0, 0
-	xsr	a0, SREG_DBREAKC + _index
-	s32i	a0, a3, DT_DBREAKC_SAVE + _index * 4
-	.set	_index, _index + 1
-	.endr
-
-	movi	a0, XCHAL_EXCM_LEVEL + 1
-	xsr	a0, icountlevel
-	s32i	a0, a3, DT_ICOUNT_LEVEL_SAVE
-
-	movi	a0, 0xfffffffe
-	xsr	a0, icount
-	s32i	a0, a3, DT_ICOUNT_SAVE
-
-	l32i	a0, a3, DT_DEBUG_SAVE
-	xsr	a3, SREG_EXCSAVE + XCHAL_DEBUGLEVEL
-	rfi	XCHAL_DEBUGLEVEL
-#else
-	/* Debug exception while in exception mode. Should not happen. */
-1:	j	1b	// FIXME!!
-#endif
-
-ENDPROC(debug_exception)
-
-/*
- * We get here in case of an unrecoverable exception.
- * The only thing we can do is to be nice and print a panic message.
- * We only produce a single stack frame for panic, so ???
- *
- *
- * Entry conditions:
- *
- *   - a0 contains the caller address; original value saved in excsave1.
- *   - the original a0 contains a valid return address (backtrace) or 0.
- *   - a2 contains a valid stackpointer
- *
- * Notes:
- *
- *   - If the stack pointer could be invalid, the caller has to setup a
- *     dummy stack pointer (e.g. the stack of the init_task)
- *
- *   - If the return address could be invalid, the caller has to set it
- *     to 0, so the backtrace would stop.
- *
- */
-	.align 4
-unrecoverable_text:
-	.ascii "Unrecoverable error in exception handler\0"
-
-	.literal_position
-
-ENTRY(unrecoverable_exception)
-
-	movi	a0, 1
-	movi	a1, 0
-
-	wsr	a0, windowstart
-	wsr	a1, windowbase
-	rsync
-
-	movi	a1, PS_WOE_MASK | LOCKLEVEL
-	wsr	a1, ps
-	rsync
-
-	movi	a1, init_task
-	movi	a0, 0
-	addi	a1, a1, PT_REGS_OFFSET
-
-	movi	a6, unrecoverable_text
-	call4	panic
-
-1:	j	1b
-
-ENDPROC(unrecoverable_exception)
-
-/* -------------------------- FAST EXCEPTION HANDLERS ----------------------- */
-
-/*
- * Fast-handler for alloca exceptions
- *
- *  The ALLOCA handler is entered when user code executes the MOVSP
- *  instruction and the caller's frame is not in the register file.
- *
- * This algorithm was taken from the Ross Morley's RTOS Porting Layer:
- *
- *    /home/ross/rtos/porting/XtensaRTOS-PortingLayer-20090507/xtensa_vectors.S
- *
- * It leverages the existing window spill/fill routines and their support for
- * double exceptions. The 'movsp' instruction will only cause an exception if
- * the next window needs to be loaded. In fact this ALLOCA exception may be
- * replaced at some point by changing the hardware to do a underflow exception
- * of the proper size instead.
- *
- * This algorithm simply backs out the register changes started by the user
- * excpetion handler, makes it appear that we have started a window underflow
- * by rotating the window back and then setting the old window base (OWB) in
- * the 'ps' register with the rolled back window base. The 'movsp' instruction
- * will be re-executed and this time since the next window frames is in the
- * active AR registers it won't cause an exception.
- *
- * If the WindowUnderflow code gets a TLB miss the page will get mapped
- * the the partial windeowUnderflow will be handeled in the double exception
- * handler.
- *
- * Entry condition:
- *
- *   a0:	trashed, original value saved on stack (PT_AREG0)
- *   a1:	a1
- *   a2:	new stack pointer, original in DEPC
- *   a3:	a3
- *   depc:	a2, original value saved on stack (PT_DEPC)
- *   excsave_1:	dispatch table
- *
- *   PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
- *	     <  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
- */
-
-ENTRY(fast_alloca)
-	rsr	a0, windowbase
-	rotw	-1
-	rsr	a2, ps
-	extui	a3, a2, PS_OWB_SHIFT, PS_OWB_WIDTH
-	xor	a3, a3, a4
-	l32i	a4, a6, PT_AREG0
-	l32i	a1, a6, PT_DEPC
-	rsr	a6, depc
-	wsr	a1, depc
-	slli	a3, a3, PS_OWB_SHIFT
-	xor	a2, a2, a3
-	wsr	a2, ps
-	rsync
-
-	_bbci.l	a4, 31, 4f
-	rotw	-1
-	_bbci.l	a8, 30, 8f
-	rotw	-1
-	j	_WindowUnderflow12
-8:	j	_WindowUnderflow8
-4:	j	_WindowUnderflow4
-ENDPROC(fast_alloca)
-
-#ifdef CONFIG_USER_ABI_CALL0_PROBE
-/*
- * fast illegal instruction handler.
- *
- * This is used to fix up user PS.WOE on the exception caused
- * by the first opcode related to register window. If PS.WOE is
- * already set it goes directly to the common user exception handler.
- *
- * Entry condition:
- *
- *   a0:	trashed, original value saved on stack (PT_AREG0)
- *   a1:	a1
- *   a2:	new stack pointer, original in DEPC
- *   a3:	a3
- *   depc:	a2, original value saved on stack (PT_DEPC)
- *   excsave_1:	dispatch table
- */
-
-ENTRY(fast_illegal_instruction_user)
-
-	rsr	a0, ps
-	bbsi.l	a0, PS_WOE_BIT, user_exception
-	s32i	a3, a2, PT_AREG3
-	movi	a3, PS_WOE_MASK
-	or	a0, a0, a3
-	wsr	a0, ps
-	l32i	a3, a2, PT_AREG3
-	l32i	a0, a2, PT_AREG0
-	rsr	a2, depc
-	rfe
-
-ENDPROC(fast_illegal_instruction_user)
-#endif
-
-	/*
- * fast system calls.
- *
- * WARNING:  The kernel doesn't save the entire user context before
- * handling a fast system call.  These functions are small and short,
- * usually offering some functionality not available to user tasks.
- *
- * BE CAREFUL TO PRESERVE THE USER'S CONTEXT.
- *
- * Entry condition:
- *
- *   a0:	trashed, original value saved on stack (PT_AREG0)
- *   a1:	a1
- *   a2:	new stack pointer, original in DEPC
- *   a3:	a3
- *   depc:	a2, original value saved on stack (PT_DEPC)
- *   excsave_1:	dispatch table
- */
-
-ENTRY(fast_syscall_user)
-
-	/* Skip syscall. */
-
-	rsr	a0, epc1
-	addi	a0, a0, 3
-	wsr	a0, epc1
-
-	l32i	a0, a2, PT_DEPC
-	bgeui	a0, VALID_DOUBLE_EXCEPTION_ADDRESS, fast_syscall_unrecoverable
-
-	rsr	a0, depc			# get syscall-nr
-	_beqz	a0, fast_syscall_spill_registers
-	_beqi	a0, __NR_xtensa, fast_syscall_xtensa
-
-	j	user_exception
-
-ENDPROC(fast_syscall_user)
-
-ENTRY(fast_syscall_unrecoverable)
-
-	/* Restore all states. */
-
-	l32i    a0, a2, PT_AREG0        # restore a0
-	xsr     a2, depc                # restore a2, depc
-
-	wsr     a0, excsave1
-	call0	unrecoverable_exception
-
-ENDPROC(fast_syscall_unrecoverable)
-
-/*
- * sysxtensa syscall handler
- *
- * int sysxtensa (SYS_XTENSA_ATOMIC_SET,     ptr, val,    unused);
- * int sysxtensa (SYS_XTENSA_ATOMIC_ADD,     ptr, val,    unused);
- * int sysxtensa (SYS_XTENSA_ATOMIC_EXG_ADD, ptr, val,    unused);
- * int sysxtensa (SYS_XTENSA_ATOMIC_CMP_SWP, ptr, oldval, newval);
- *        a2            a6                   a3    a4      a5
- *
- * Entry condition:
- *
- *   a0:	a2 (syscall-nr), original value saved on stack (PT_AREG0)
- *   a1:	a1
- *   a2:	new stack pointer, original in a0 and DEPC
- *   a3:	a3
- *   a4..a15:	unchanged
- *   depc:	a2, original value saved on stack (PT_DEPC)
- *   excsave_1:	dispatch table
- *
- *   PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
- *	     <  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
- *
- * Note: we don't have to save a2; a2 holds the return value
- */
-
-	.literal_position
-
-#ifdef CONFIG_FAST_SYSCALL_XTENSA
-
-ENTRY(fast_syscall_xtensa)
-
-	s32i	a7, a2, PT_AREG7	# we need an additional register
-	movi	a7, 4			# sizeof(unsigned int)
-	access_ok a3, a7, a0, a2, .Leac	# a0: scratch reg, a2: sp
-
-	_bgeui	a6, SYS_XTENSA_COUNT, .Lill
-	_bnei	a6, SYS_XTENSA_ATOMIC_CMP_SWP, .Lnswp
-
-	/* Fall through for ATOMIC_CMP_SWP. */
-
-.Lswp:	/* Atomic compare and swap */
-
-EX(.Leac) l32i	a0, a3, 0		# read old value
-	bne	a0, a4, 1f		# same as old value? jump
-EX(.Leac) s32i	a5, a3, 0		# different, modify value
-	l32i	a7, a2, PT_AREG7	# restore a7
-	l32i	a0, a2, PT_AREG0	# restore a0
-	movi	a2, 1			# and return 1
-	rfe
-
-1:	l32i	a7, a2, PT_AREG7	# restore a7
-	l32i	a0, a2, PT_AREG0	# restore a0
-	movi	a2, 0			# return 0 (note that we cannot set
-	rfe
-
-.Lnswp:	/* Atomic set, add, and exg_add. */
-
-EX(.Leac) l32i	a7, a3, 0		# orig
-	addi	a6, a6, -SYS_XTENSA_ATOMIC_SET
-	add	a0, a4, a7		# + arg
-	moveqz	a0, a4, a6		# set
-	addi	a6, a6, SYS_XTENSA_ATOMIC_SET
-EX(.Leac) s32i	a0, a3, 0		# write new value
-
-	mov	a0, a2
-	mov	a2, a7
-	l32i	a7, a0, PT_AREG7	# restore a7
-	l32i	a0, a0, PT_AREG0	# restore a0
-	rfe
-
-.Leac:	l32i	a7, a2, PT_AREG7	# restore a7
-	l32i	a0, a2, PT_AREG0	# restore a0
-	movi	a2, -EFAULT
-	rfe
-
-.Lill:	l32i	a7, a2, PT_AREG7	# restore a7
-	l32i	a0, a2, PT_AREG0	# restore a0
-	movi	a2, -EINVAL
-	rfe
-
-ENDPROC(fast_syscall_xtensa)
-
-#else /* CONFIG_FAST_SYSCALL_XTENSA */
-
-ENTRY(fast_syscall_xtensa)
-
-	l32i    a0, a2, PT_AREG0        # restore a0
-	movi	a2, -ENOSYS
-	rfe
-
-ENDPROC(fast_syscall_xtensa)
-
-#endif /* CONFIG_FAST_SYSCALL_XTENSA */
-
-
-/* fast_syscall_spill_registers.
- *
- * Entry condition:
- *
- *   a0:	trashed, original value saved on stack (PT_AREG0)
- *   a1:	a1
- *   a2:	new stack pointer, original in DEPC
- *   a3:	a3
- *   depc:	a2, original value saved on stack (PT_DEPC)
- *   excsave_1:	dispatch table
- *
- * Note: We assume the stack pointer is EXC_TABLE_KSTK in the fixup handler.
- */
-
-#ifdef CONFIG_FAST_SYSCALL_SPILL_REGISTERS
-
-ENTRY(fast_syscall_spill_registers)
-
-	/* Register a FIXUP handler (pass current wb as a parameter) */
-
-	xsr	a3, excsave1
-	movi	a0, fast_syscall_spill_registers_fixup
-	s32i	a0, a3, EXC_TABLE_FIXUP
-	rsr	a0, windowbase
-	s32i	a0, a3, EXC_TABLE_PARAM
-	xsr	a3, excsave1		# restore a3 and excsave_1
-
-	/* Save a3, a4 and SAR on stack. */
-
-	rsr	a0, sar
-	s32i	a3, a2, PT_AREG3
-	s32i	a0, a2, PT_SAR
-
-	/* The spill routine might clobber a4, a7, a8, a11, a12, and a15. */
-
-	s32i	a4, a2, PT_AREG4
-	s32i	a7, a2, PT_AREG7
-	s32i	a8, a2, PT_AREG8
-	s32i	a11, a2, PT_AREG11
-	s32i	a12, a2, PT_AREG12
-	s32i	a15, a2, PT_AREG15
-
-	/*
-	 * Rotate ws so that the current windowbase is at bit 0.
-	 * Assume ws = xxxwww1yy (www1 current window frame).
-	 * Rotate ws right so that a4 = yyxxxwww1.
-	 */
-
-	rsr	a0, windowbase
-	rsr	a3, windowstart		# a3 = xxxwww1yy
-	ssr	a0			# holds WB
-	slli	a0, a3, WSBITS
-	or	a3, a3, a0		# a3 = xxxwww1yyxxxwww1yy
-	srl	a3, a3			# a3 = 00xxxwww1yyxxxwww1
-
-	/* We are done if there are no more than the current register frame. */
-
-	extui	a3, a3, 1, WSBITS-1	# a3 = 0yyxxxwww
-	movi	a0, (1 << (WSBITS-1))
-	_beqz	a3, .Lnospill		# only one active frame? jump
-
-	/* We want 1 at the top, so that we return to the current windowbase */
-
-	or	a3, a3, a0		# 1yyxxxwww
-
-	/* Skip empty frames - get 'oldest' WINDOWSTART-bit. */
-
-	wsr	a3, windowstart		# save shifted windowstart
-	neg	a0, a3
-	and	a3, a0, a3		# first bit set from right: 000010000
-
-	ffs_ws	a0, a3			# a0: shifts to skip empty frames
-	movi	a3, WSBITS
-	sub	a0, a3, a0		# WSBITS-a0:number of 0-bits from right
-	ssr	a0			# save in SAR for later.
-
-	rsr	a3, windowbase
-	add	a3, a3, a0
-	wsr	a3, windowbase
-	rsync
-
-	rsr	a3, windowstart
-	srl	a3, a3			# shift windowstart
-
-	/* WB is now just one frame below the oldest frame in the register
-	   window. WS is shifted so the oldest frame is in bit 0, thus, WB
-	   and WS differ by one 4-register frame. */
-
-	/* Save frames. Depending what call was used (call4, call8, call12),
-	 * we have to save 4,8. or 12 registers.
-	 */
-
-
-.Lloop: _bbsi.l	a3, 1, .Lc4
-	_bbci.l	a3, 2, .Lc12
-
-.Lc8:	s32e	a4, a13, -16
-	l32e	a4, a5, -12
-	s32e	a8, a4, -32
-	s32e	a5, a13, -12
-	s32e	a6, a13, -8
-	s32e	a7, a13, -4
-	s32e	a9, a4, -28
-	s32e	a10, a4, -24
-	s32e	a11, a4, -20
-	srli	a11, a3, 2		# shift windowbase by 2
-	rotw	2
-	_bnei	a3, 1, .Lloop
-	j	.Lexit
-
-.Lc4:	s32e	a4, a9, -16
-	s32e	a5, a9, -12
-	s32e	a6, a9, -8
-	s32e	a7, a9, -4
-
-	srli	a7, a3, 1
-	rotw	1
-	_bnei	a3, 1, .Lloop
-	j	.Lexit
-
-.Lc12:	_bbci.l	a3, 3, .Linvalid_mask	# bit 2 shouldn't be zero!
-
-	/* 12-register frame (call12) */
-
-	l32e	a0, a5, -12
-	s32e	a8, a0, -48
-	mov	a8, a0
-
-	s32e	a9, a8, -44
-	s32e	a10, a8, -40
-	s32e	a11, a8, -36
-	s32e	a12, a8, -32
-	s32e	a13, a8, -28
-	s32e	a14, a8, -24
-	s32e	a15, a8, -20
-	srli	a15, a3, 3
-
-	/* The stack pointer for a4..a7 is out of reach, so we rotate the
-	 * window, grab the stackpointer, and rotate back.
-	 * Alternatively, we could also use the following approach, but that
-	 * makes the fixup routine much more complicated:
-	 * rotw	1
-	 * s32e	a0, a13, -16
-	 * ...
-	 * rotw 2
-	 */
-
-	rotw	1
-	mov	a4, a13
-	rotw	-1
-
-	s32e	a4, a8, -16
-	s32e	a5, a8, -12
-	s32e	a6, a8, -8
-	s32e	a7, a8, -4
-
-	rotw	3
-
-	_beqi	a3, 1, .Lexit
-	j	.Lloop
-
-.Lexit:
-
-	/* Done. Do the final rotation and set WS */
-
-	rotw	1
-	rsr	a3, windowbase
-	ssl	a3
-	movi	a3, 1
-	sll	a3, a3
-	wsr	a3, windowstart
-.Lnospill:
-
-	/* Advance PC, restore registers and SAR, and return from exception. */
-
-	l32i	a3, a2, PT_SAR
-	l32i	a0, a2, PT_AREG0
-	wsr	a3, sar
-	l32i	a3, a2, PT_AREG3
-
-	/* Restore clobbered registers. */
-
-	l32i	a4, a2, PT_AREG4
-	l32i	a7, a2, PT_AREG7
-	l32i	a8, a2, PT_AREG8
-	l32i	a11, a2, PT_AREG11
-	l32i	a12, a2, PT_AREG12
-	l32i	a15, a2, PT_AREG15
-
-	movi	a2, 0
-	rfe
-
-.Linvalid_mask:
-
-	/* We get here because of an unrecoverable error in the window
-	 * registers, so set up a dummy frame and kill the user application.
-	 * Note: We assume EXC_TABLE_KSTK contains a valid stack pointer.
-	 */
-
-	movi	a0, 1
-	movi	a1, 0
-
-	wsr	a0, windowstart
-	wsr	a1, windowbase
-	rsync
-
-	movi	a0, 0
-
-	rsr	a3, excsave1
-	l32i	a1, a3, EXC_TABLE_KSTK
-
-	movi	a4, PS_WOE_MASK | LOCKLEVEL
-	wsr	a4, ps
-	rsync
-
-	movi	a6, SIGSEGV
-	call4	do_exit
-
-	/* shouldn't return, so panic */
-
-	wsr	a0, excsave1
-	call0	unrecoverable_exception		# should not return
-1:	j	1b
-
-
-ENDPROC(fast_syscall_spill_registers)
-
-/* Fixup handler.
- *
- * We get here if the spill routine causes an exception, e.g. tlb miss.
- * We basically restore WINDOWBASE and WINDOWSTART to the condition when
- * we entered the spill routine and jump to the user exception handler.
- *
- * Note that we only need to restore the bits in windowstart that have not
- * been spilled yet by the _spill_register routine. Luckily, a3 contains a
- * rotated windowstart with only those bits set for frames that haven't been
- * spilled yet. Because a3 is rotated such that bit 0 represents the register
- * frame for the current windowbase - 1, we need to rotate a3 left by the
- * value of the current windowbase + 1 and move it to windowstart.
- *
- * a0: value of depc, original value in depc
- * a2: trashed, original value in EXC_TABLE_DOUBLE_SAVE
- * a3: exctable, original value in excsave1
- */
-
-ENTRY(fast_syscall_spill_registers_fixup)
-
-	rsr	a2, windowbase	# get current windowbase (a2 is saved)
-	xsr	a0, depc	# restore depc and a0
-	ssl	a2		# set shift (32 - WB)
-
-	/* We need to make sure the current registers (a0-a3) are preserved.
-	 * To do this, we simply set the bit for the current window frame
-	 * in WS, so that the exception handlers save them to the task stack.
-	 *
-	 * Note: we use a3 to set the windowbase, so we take a special care
-	 * of it, saving it in the original _spill_registers frame across
-	 * the exception handler call.
-	 */
-
-	xsr	a3, excsave1	# get spill-mask
-	slli	a3, a3, 1	# shift left by one
-	addi	a3, a3, 1	# set the bit for the current window frame
-
-	slli	a2, a3, 32-WSBITS
-	src	a2, a3, a2	# a2 = xxwww1yyxxxwww1yy......
-	wsr	a2, windowstart	# set corrected windowstart
-
-	srli	a3, a3, 1
-	rsr	a2, excsave1
-	l32i	a2, a2, EXC_TABLE_DOUBLE_SAVE	# restore a2
-	xsr	a2, excsave1
-	s32i	a3, a2, EXC_TABLE_DOUBLE_SAVE	# save a3
-	l32i	a3, a2, EXC_TABLE_PARAM	# original WB (in user task)
-	xsr	a2, excsave1
-
-	/* Return to the original (user task) WINDOWBASE.
-	 * We leave the following frame behind:
-	 * a0, a1, a2	same
-	 * a3:		trashed (saved in EXC_TABLE_DOUBLE_SAVE)
-	 * depc:	depc (we have to return to that address)
-	 * excsave_1:	exctable
-	 */
-
-	wsr	a3, windowbase
-	rsync
-
-	/* We are now in the original frame when we entered _spill_registers:
-	 *  a0: return address
-	 *  a1: used, stack pointer
-	 *  a2: kernel stack pointer
-	 *  a3: available
-	 *  depc: exception address
-	 *  excsave: exctable
-	 * Note: This frame might be the same as above.
-	 */
-
-	/* Setup stack pointer. */
-
-	addi	a2, a2, -PT_USER_SIZE
-	s32i	a0, a2, PT_AREG0
-
-	/* Make sure we return to this fixup handler. */
-
-	movi	a3, fast_syscall_spill_registers_fixup_return
-	s32i	a3, a2, PT_DEPC		# setup depc
-
-	/* Jump to the exception handler. */
-
-	rsr	a3, excsave1
-	rsr	a0, exccause
-	addx4	a0, a0, a3              	# find entry in table
-	l32i	a0, a0, EXC_TABLE_FAST_USER     # load handler
-	l32i	a3, a3, EXC_TABLE_DOUBLE_SAVE
-	jx	a0
-
-ENDPROC(fast_syscall_spill_registers_fixup)
-
-ENTRY(fast_syscall_spill_registers_fixup_return)
-
-	/* When we return here, all registers have been restored (a2: DEPC) */
-
-	wsr	a2, depc		# exception address
-
-	/* Restore fixup handler. */
-
-	rsr	a2, excsave1
-	s32i	a3, a2, EXC_TABLE_DOUBLE_SAVE
-	movi	a3, fast_syscall_spill_registers_fixup
-	s32i	a3, a2, EXC_TABLE_FIXUP
-	rsr	a3, windowbase
-	s32i	a3, a2, EXC_TABLE_PARAM
-	l32i	a2, a2, EXC_TABLE_KSTK
-
-	/* Load WB at the time the exception occurred. */
-
-	rsr	a3, sar			# WB is still in SAR
-	neg	a3, a3
-	wsr	a3, windowbase
-	rsync
-
-	rsr	a3, excsave1
-	l32i	a3, a3, EXC_TABLE_DOUBLE_SAVE
-
-	rfde
-
-ENDPROC(fast_syscall_spill_registers_fixup_return)
-
-#else /* CONFIG_FAST_SYSCALL_SPILL_REGISTERS */
-
-ENTRY(fast_syscall_spill_registers)
-
-	l32i    a0, a2, PT_AREG0        # restore a0
-	movi	a2, -ENOSYS
-	rfe
-
-ENDPROC(fast_syscall_spill_registers)
-
-#endif /* CONFIG_FAST_SYSCALL_SPILL_REGISTERS */
-
-#ifdef CONFIG_MMU
-/*
- * We should never get here. Bail out!
- */
-
-ENTRY(fast_second_level_miss_double_kernel)
-
-1:
-	call0	unrecoverable_exception		# should not return
-1:	j	1b
-
-ENDPROC(fast_second_level_miss_double_kernel)
-
-/* First-level entry handler for user, kernel, and double 2nd-level
- * TLB miss exceptions.  Note that for now, user and kernel miss
- * exceptions share the same entry point and are handled identically.
- *
- * An old, less-efficient C version of this function used to exist.
- * We include it below, interleaved as comments, for reference.
- *
- * Entry condition:
- *
- *   a0:	trashed, original value saved on stack (PT_AREG0)
- *   a1:	a1
- *   a2:	new stack pointer, original in DEPC
- *   a3:	a3
- *   depc:	a2, original value saved on stack (PT_DEPC)
- *   excsave_1:	dispatch table
- *
- *   PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
- *	     <  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
- */
-
-ENTRY(fast_second_level_miss)
-
-	/* Save a1 and a3. Note: we don't expect a double exception. */
-
-	s32i	a1, a2, PT_AREG1
-	s32i	a3, a2, PT_AREG3
-
-	/* We need to map the page of PTEs for the user task.  Find
-	 * the pointer to that page.  Also, it's possible for tsk->mm
-	 * to be NULL while tsk->active_mm is nonzero if we faulted on
-	 * a vmalloc address.  In that rare case, we must use
-	 * active_mm instead to avoid a fault in this handler.  See
-	 *
-	 * http://mail.nl.linux.org/linux-mm/2002-08/msg00258.html
-	 *   (or search Internet on "mm vs. active_mm")
-	 *
-	 *	if (!mm)
-	 *		mm = tsk->active_mm;
-	 *	pgd = pgd_offset (mm, regs->excvaddr);
-	 *	pmd = pmd_offset (pgd, regs->excvaddr);
-	 *	pmdval = *pmd;
-	 */
-
-	GET_CURRENT(a1,a2)
-	l32i	a0, a1, TASK_MM		# tsk->mm
-	beqz	a0, 9f
-
-8:	rsr	a3, excvaddr		# fault address
-	_PGD_OFFSET(a0, a3, a1)
-	l32i	a0, a0, 0		# read pmdval
-	beqz	a0, 2f
-
-	/* Read ptevaddr and convert to top of page-table page.
-	 *
-	 * 	vpnval = read_ptevaddr_register() & PAGE_MASK;
-	 * 	vpnval += DTLB_WAY_PGTABLE;
-	 *	pteval = mk_pte (virt_to_page(pmd_val(pmdval)), PAGE_KERNEL);
-	 *	write_dtlb_entry (pteval, vpnval);
-	 *
-	 * The messy computation for 'pteval' above really simplifies
-	 * into the following:
-	 *
-	 * pteval = ((pmdval - PAGE_OFFSET + PHYS_OFFSET) & PAGE_MASK)
-	 *                 | PAGE_DIRECTORY
-	 */
-
-	movi	a1, (PHYS_OFFSET - PAGE_OFFSET) & 0xffffffff
-	add	a0, a0, a1		# pmdval - PAGE_OFFSET
-	extui	a1, a0, 0, PAGE_SHIFT	# ... & PAGE_MASK
-	xor	a0, a0, a1
-
-	movi	a1, _PAGE_DIRECTORY
-	or	a0, a0, a1		# ... | PAGE_DIRECTORY
-
-	/*
-	 * We utilize all three wired-ways (7-9) to hold pmd translations.
-	 * Memory regions are mapped to the DTLBs according to bits 28 and 29.
-	 * This allows to map the three most common regions to three different
-	 * DTLBs:
-	 *  0,1 -> way 7	program (0040.0000) and virtual (c000.0000)
-	 *  2   -> way 8	shared libaries (2000.0000)
-	 *  3   -> way 0	stack (3000.0000)
-	 */
-
-	extui	a3, a3, 28, 2		# addr. bit 28 and 29	0,1,2,3
-	rsr	a1, ptevaddr
-	addx2	a3, a3, a3		# ->			0,3,6,9
-	srli	a1, a1, PAGE_SHIFT
-	extui	a3, a3, 2, 2		# ->			0,0,1,2
-	slli	a1, a1, PAGE_SHIFT	# ptevaddr & PAGE_MASK
-	addi	a3, a3, DTLB_WAY_PGD
-	add	a1, a1, a3		# ... + way_number
-
-3:	wdtlb	a0, a1
-	dsync
-
-	/* Exit critical section. */
-
-4:	rsr	a3, excsave1
-	movi	a0, 0
-	s32i	a0, a3, EXC_TABLE_FIXUP
-
-	/* Restore the working registers, and return. */
-
-	l32i	a0, a2, PT_AREG0
-	l32i	a1, a2, PT_AREG1
-	l32i	a3, a2, PT_AREG3
-	l32i	a2, a2, PT_DEPC
-
-	bgeui	a2, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
-
-	/* Restore excsave1 and return. */
-
-	rsr	a2, depc
-	rfe
-
-	/* Return from double exception. */
-
-1:	xsr	a2, depc
-	esync
-	rfde
-
-9:	l32i	a0, a1, TASK_ACTIVE_MM	# unlikely case mm == 0
-	bnez	a0, 8b
-
-	/* Even more unlikely case active_mm == 0.
-	 * We can get here with NMI in the middle of context_switch that
-	 * touches vmalloc area.
-	 */
-	movi	a0, init_mm
-	j	8b
-
-#if (DCACHE_WAY_SIZE > PAGE_SIZE)
-
-2:	/* Special case for cache aliasing.
-	 * We (should) only get here if a clear_user_page, copy_user_page
-	 * or the aliased cache flush functions got preemptively interrupted 
-	 * by another task. Re-establish temporary mapping to the 
-	 * TLBTEMP_BASE areas.
-	 */
-
-	/* We shouldn't be in a double exception */
-
-	l32i	a0, a2, PT_DEPC
-	bgeui	a0, VALID_DOUBLE_EXCEPTION_ADDRESS, 2f
-
-	/* Make sure the exception originated in the special functions */
-
-	movi	a0, __tlbtemp_mapping_start
-	rsr	a3, epc1
-	bltu	a3, a0, 2f
-	movi	a0, __tlbtemp_mapping_end
-	bgeu	a3, a0, 2f
-
-	/* Check if excvaddr was in one of the TLBTEMP_BASE areas. */
-
-	movi	a3, TLBTEMP_BASE_1
-	rsr	a0, excvaddr
-	bltu	a0, a3, 2f
-
-	addi	a1, a0, -TLBTEMP_SIZE
-	bgeu	a1, a3, 2f
-
-	/* Check if we have to restore an ITLB mapping. */
-
-	movi	a1, __tlbtemp_mapping_itlb
-	rsr	a3, epc1
-	sub	a3, a3, a1
-
-	/* Calculate VPN */
-
-	movi	a1, PAGE_MASK
-	and	a1, a1, a0
-
-	/* Jump for ITLB entry */
-
-	bgez	a3, 1f
-
-	/* We can use up to two TLBTEMP areas, one for src and one for dst. */
-
-	extui	a3, a0, PAGE_SHIFT + DCACHE_ALIAS_ORDER, 1
-	add	a1, a3, a1
-
-	/* PPN is in a6 for the first TLBTEMP area and in a7 for the second. */
-
-	mov	a0, a6
-	movnez	a0, a7, a3
-	j	3b
-
-	/* ITLB entry. We only use dst in a6. */
-
-1:	witlb	a6, a1
-	isync
-	j	4b
-
-
-#endif	// DCACHE_WAY_SIZE > PAGE_SIZE
-
-
-2:	/* Invalid PGD, default exception handling */
-
-	rsr	a1, depc
-	s32i	a1, a2, PT_AREG2
-	mov	a1, a2
-
-	rsr	a2, ps
-	bbsi.l	a2, PS_UM_BIT, 1f
-	j	_kernel_exception
-1:	j	_user_exception
-
-ENDPROC(fast_second_level_miss)
-
-/*
- * StoreProhibitedException
- *
- * Update the pte and invalidate the itlb mapping for this pte.
- *
- * Entry condition:
- *
- *   a0:	trashed, original value saved on stack (PT_AREG0)
- *   a1:	a1
- *   a2:	new stack pointer, original in DEPC
- *   a3:	a3
- *   depc:	a2, original value saved on stack (PT_DEPC)
- *   excsave_1:	dispatch table
- *
- *   PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
- *	     <  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
- */
-
-ENTRY(fast_store_prohibited)
-
-	/* Save a1 and a3. */
-
-	s32i	a1, a2, PT_AREG1
-	s32i	a3, a2, PT_AREG3
-
-	GET_CURRENT(a1,a2)
-	l32i	a0, a1, TASK_MM		# tsk->mm
-	beqz	a0, 9f
-
-8:	rsr	a1, excvaddr		# fault address
-	_PGD_OFFSET(a0, a1, a3)
-	l32i	a0, a0, 0
-	beqz	a0, 2f
-
-	/*
-	 * Note that we test _PAGE_WRITABLE_BIT only if PTE is present
-	 * and is not PAGE_NONE. See pgtable.h for possible PTE layouts.
-	 */
-
-	_PTE_OFFSET(a0, a1, a3)
-	l32i	a3, a0, 0		# read pteval
-	movi	a1, _PAGE_CA_INVALID
-	ball	a3, a1, 2f
-	bbci.l	a3, _PAGE_WRITABLE_BIT, 2f
-
-	movi	a1, _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_HW_WRITE
-	or	a3, a3, a1
-	rsr	a1, excvaddr
-	s32i	a3, a0, 0
-
-	/* We need to flush the cache if we have page coloring. */
-#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
-	dhwb	a0, 0
-#endif
-	pdtlb	a0, a1
-	wdtlb	a3, a0
-
-	/* Exit critical section. */
-
-	movi	a0, 0
-	rsr	a3, excsave1
-	s32i	a0, a3, EXC_TABLE_FIXUP
-
-	/* Restore the working registers, and return. */
-
-	l32i	a3, a2, PT_AREG3
-	l32i	a1, a2, PT_AREG1
-	l32i	a0, a2, PT_AREG0
-	l32i	a2, a2, PT_DEPC
-
-	bgeui	a2, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
-
-	rsr	a2, depc
-	rfe
-
-	/* Double exception. Restore FIXUP handler and return. */
-
-1:	xsr	a2, depc
-	esync
-	rfde
-
-9:	l32i	a0, a1, TASK_ACTIVE_MM	# unlikely case mm == 0
-	j	8b
-
-2:	/* If there was a problem, handle fault in C */
-
-	rsr	a3, depc	# still holds a2
-	s32i	a3, a2, PT_AREG2
-	mov	a1, a2
-
-	rsr	a2, ps
-	bbsi.l	a2, PS_UM_BIT, 1f
-	j	_kernel_exception
-1:	j	_user_exception
-
-ENDPROC(fast_store_prohibited)
-
-#endif /* CONFIG_MMU */
-
-/*
- * System Calls.
- *
- * void system_call (struct pt_regs* regs, int exccause)
- *                            a2                 a3
- */
-	.literal_position
-
-ENTRY(system_call)
-
-	/* reserve 4 bytes on stack for function parameter */
-	abi_entry(4)
-
-	/* regs->syscall = regs->areg[2] */
-
-	l32i	a7, a2, PT_AREG2
-	s32i	a7, a2, PT_SYSCALL
-
-	GET_THREAD_INFO(a4, a1)
-	l32i	a3, a4, TI_FLAGS
-	movi	a4, _TIF_WORK_MASK
-	and	a3, a3, a4
-	beqz	a3, 1f
-
-	mov	a6, a2
-	call4	do_syscall_trace_enter
-	beqz	a6, .Lsyscall_exit
-	l32i	a7, a2, PT_SYSCALL
-
-1:
-	s32i	a7, a1, 4
-
-	/* syscall = sys_call_table[syscall_nr] */
-
-	movi	a4, sys_call_table
-	movi	a5, __NR_syscalls
-	movi	a6, -ENOSYS
-	bgeu	a7, a5, 1f
-
-	addx4	a4, a7, a4
-	l32i	a4, a4, 0
-
-	/* Load args: arg0 - arg5 are passed via regs. */
-
-	l32i	a6, a2, PT_AREG6
-	l32i	a7, a2, PT_AREG3
-	l32i	a8, a2, PT_AREG4
-	l32i	a9, a2, PT_AREG5
-	l32i	a10, a2, PT_AREG8
-	l32i	a11, a2, PT_AREG9
-
-	/* Pass one additional argument to the syscall: pt_regs (on stack) */
-	s32i	a2, a1, 0
-
-	callx4	a4
-
-1:	/* regs->areg[2] = return_value */
-
-	s32i	a6, a2, PT_AREG2
-	bnez	a3, 1f
-.Lsyscall_exit:
-	abi_ret(4)
-
-1:
-	l32i	a4, a1, 4
-	l32i	a3, a2, PT_SYSCALL
-	s32i	a4, a2, PT_SYSCALL
-	mov	a6, a2
-	call4	do_syscall_trace_leave
-	s32i	a3, a2, PT_SYSCALL
-	abi_ret(4)
-
-ENDPROC(system_call)
-
-/*
- * Spill live registers on the kernel stack macro.
- *
- * Entry condition: ps.woe is set, ps.excm is cleared
- * Exit condition: windowstart has single bit set
- * May clobber: a12, a13
- */
-	.macro	spill_registers_kernel
-
-#if XCHAL_NUM_AREGS > 16
-	call12	1f
-	_j	2f
-	retw
-	.align	4
-1:
-	_entry	a1, 48
-	addi	a12, a0, 3
-#if XCHAL_NUM_AREGS > 32
-	.rept	(XCHAL_NUM_AREGS - 32) / 12
-	_entry	a1, 48
-	mov	a12, a0
-	.endr
-#endif
-	_entry	a1, 16
-#if XCHAL_NUM_AREGS % 12 == 0
-	mov	a8, a8
-#elif XCHAL_NUM_AREGS % 12 == 4
-	mov	a12, a12
-#elif XCHAL_NUM_AREGS % 12 == 8
-	mov	a4, a4
-#endif
-	retw
-2:
-#else
-	mov	a12, a12
-#endif
-	.endm
-
-/*
- * Task switch.
- *
- * struct task*  _switch_to (struct task* prev, struct task* next)
- *         a2                              a2                 a3
- */
-
-ENTRY(_switch_to)
-
-	abi_entry(XTENSA_SPILL_STACK_RESERVE)
-
-	mov	a11, a3			# and 'next' (a3)
-
-	l32i	a4, a2, TASK_THREAD_INFO
-	l32i	a5, a3, TASK_THREAD_INFO
-
-	save_xtregs_user a4 a6 a8 a9 a12 a13 THREAD_XTREGS_USER
-
-#if THREAD_RA > 1020 || THREAD_SP > 1020
-	addi	a10, a2, TASK_THREAD
-	s32i	a0, a10, THREAD_RA - TASK_THREAD	# save return address
-	s32i	a1, a10, THREAD_SP - TASK_THREAD	# save stack pointer
-#else
-	s32i	a0, a2, THREAD_RA	# save return address
-	s32i	a1, a2, THREAD_SP	# save stack pointer
-#endif
-
-#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP)
-	movi	a6, __stack_chk_guard
-	l32i	a8, a3, TASK_STACK_CANARY
-	s32i	a8, a6, 0
-#endif
-
-	/* Disable ints while we manipulate the stack pointer. */
-
-	irq_save a14, a3
-	rsync
-
-	/* Switch CPENABLE */
-
-#if (XTENSA_HAVE_COPROCESSORS || XTENSA_HAVE_IO_PORTS)
-	l32i	a3, a5, THREAD_CPENABLE
-	xsr	a3, cpenable
-	s32i	a3, a4, THREAD_CPENABLE
-#endif
-
-#if XCHAL_HAVE_EXCLUSIVE
-	l32i	a3, a5, THREAD_ATOMCTL8
-	getex	a3
-	s32i	a3, a4, THREAD_ATOMCTL8
-#endif
-
-	/* Flush register file. */
-
-	spill_registers_kernel
-
-	/* Set kernel stack (and leave critical section)
-	 * Note: It's save to set it here. The stack will not be overwritten
-	 *       because the kernel stack will only be loaded again after
-	 *       we return from kernel space.
-	 */
-
-	rsr	a3, excsave1		# exc_table
-	addi	a7, a5, PT_REGS_OFFSET
-	s32i	a7, a3, EXC_TABLE_KSTK
-
-	/* restore context of the task 'next' */
-
-	l32i	a0, a11, THREAD_RA	# restore return address
-	l32i	a1, a11, THREAD_SP	# restore stack pointer
-
-	load_xtregs_user a5 a6 a8 a9 a12 a13 THREAD_XTREGS_USER
-
-	wsr	a14, ps
-	rsync
-
-	abi_ret(XTENSA_SPILL_STACK_RESERVE)
-
-ENDPROC(_switch_to)
-
-ENTRY(ret_from_fork)
-
-	/* void schedule_tail (struct task_struct *prev)
-	 * Note: prev is still in a6 (return value from fake call4 frame)
-	 */
-	call4	schedule_tail
-
-	mov	a6, a1
-	call4	do_syscall_trace_leave
-
-	j	common_exception_return
-
-ENDPROC(ret_from_fork)
-
-/*
- * Kernel thread creation helper
- * On entry, set up by copy_thread: a2 = thread_fn, a3 = thread_fn arg
- *           left from _switch_to: a6 = prev
- */
-ENTRY(ret_from_kernel_thread)
-
-	call4	schedule_tail
-	mov	a6, a3
-	callx4	a2
-	j	common_exception_return
-
-ENDPROC(ret_from_kernel_thread)
diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S
deleted file mode 100644
index 4ae998b5a3480631bf8f0fd1dc24156c9e3d3d06..0000000000000000000000000000000000000000
--- a/arch/xtensa/kernel/head.S
+++ /dev/null
@@ -1,375 +0,0 @@
-/*
- * arch/xtensa/kernel/head.S
- *
- * Xtensa Processor startup code.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2008 Tensilica Inc.
- *
- * Chris Zankel <chris@zankel.net>
- * Marc Gauthier <marc@tensilica.com, marc@alumni.uwaterloo.ca>
- * Joe Taylor <joe@tensilica.com, joetylr@yahoo.com>
- * Kevin Chea
- */
-
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cacheasm.h>
-#include <asm/initialize_mmu.h>
-#include <asm/mxregs.h>
-
-#include <linux/init.h>
-#include <linux/linkage.h>
-
-/*
- * This module contains the entry code for kernel images. It performs the
- * minimal setup needed to call the generic C routines.
- *
- * Prerequisites:
- *
- * - The kernel image has been loaded to the actual address where it was
- *   compiled to.
- * - a2 contains either 0 or a pointer to a list of boot parameters.
- *   (see setup.c for more details)
- *
- */
-
-/*
- *  _start
- *
- *  The bootloader passes a pointer to a list of boot parameters in a2.
- */
-
-	/* The first bytes of the kernel image must be an instruction, so we
-	 * manually allocate and define the literal constant we need for a jx
-	 * instruction.
-	 */
-
-	__HEAD
-	.begin	no-absolute-literals
-
-ENTRY(_start)
-
-	/* Preserve the pointer to the boot parameter list in EXCSAVE_1 */
-	wsr     a2, excsave1
-	_j	_SetupOCD
-
-	.align	4
-	.literal_position
-_SetupOCD:
-	/*
-	 * Initialize WB, WS, and clear PS.EXCM (to allow loop instructions).
-	 * Set Interrupt Level just below XCHAL_DEBUGLEVEL to allow
-	 * xt-gdb to single step via DEBUG exceptions received directly
-	 * by ocd.
-	 */
-	movi	a1, 1
-	movi	a0, 0
-	wsr	a1, windowstart
-	wsr	a0, windowbase
-	rsync
-
-	movi	a1, LOCKLEVEL
-	wsr	a1, ps
-	rsync
-
-	.global _SetupMMU
-_SetupMMU:
-	Offset = _SetupMMU - _start
-
-#ifdef CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
-	initialize_mmu
-#if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY
-	rsr	a2, excsave1
-	movi	a3, XCHAL_KSEG_PADDR
-	bltu	a2, a3, 1f
-	sub	a2, a2, a3
-	movi	a3, XCHAL_KSEG_SIZE
-	bgeu	a2, a3, 1f
-	movi	a3, XCHAL_KSEG_CACHED_VADDR
-	add	a2, a2, a3
-	wsr	a2, excsave1
-1:
-#endif
-#endif
-
-	movi	a0, _startup
-	jx	a0
-
-ENDPROC(_start)
-	.end	no-absolute-literals
-
-	__REF
-	.literal_position
-
-ENTRY(_startup)
-
-	/* Set a0 to 0 for the remaining initialization. */
-
-	movi	a0, 0
-
-#if XCHAL_HAVE_VECBASE
-	movi    a2, VECBASE_VADDR
-	wsr	a2, vecbase
-#endif
-
-	/* Clear debugging registers. */
-
-#if XCHAL_HAVE_DEBUG
-#if XCHAL_NUM_IBREAK > 0
-	wsr	a0, ibreakenable
-#endif
-	wsr	a0, icount
-	movi	a1, 15
-	wsr	a0, icountlevel
-
-	.set	_index, 0
-	.rept	XCHAL_NUM_DBREAK
-	wsr	a0, SREG_DBREAKC + _index
-	.set	_index, _index + 1
-	.endr
-#endif
-
-	/* Clear CCOUNT (not really necessary, but nice) */
-
-	wsr	a0, ccount	# not really necessary, but nice
-
-	/* Disable zero-loops. */
-
-#if XCHAL_HAVE_LOOPS
-	wsr	a0, lcount
-#endif
-
-	/* Disable all timers. */
-
-	.set	_index, 0
-	.rept	XCHAL_NUM_TIMERS
-	wsr	a0, SREG_CCOMPARE + _index
-	.set	_index, _index + 1
-	.endr
-
-	/* Interrupt initialization. */
-
-	movi	a2, XCHAL_INTTYPE_MASK_SOFTWARE | XCHAL_INTTYPE_MASK_EXTERN_EDGE
-	wsr	a0, intenable
-	wsr	a2, intclear
-
-	/* Disable coprocessors. */
-
-#if XCHAL_HAVE_CP
-	wsr	a0, cpenable
-#endif
-
-	/*  Initialize the caches.
-	 *  a2, a3 are just working registers (clobbered).
-	 */
-
-#if XCHAL_DCACHE_LINE_LOCKABLE
-	___unlock_dcache_all a2 a3
-#endif
-
-#if XCHAL_ICACHE_LINE_LOCKABLE
-	___unlock_icache_all a2 a3
-#endif
-
-	___invalidate_dcache_all a2 a3
-	___invalidate_icache_all a2 a3
-
-	isync
-
-	initialize_cacheattr
-
-#ifdef CONFIG_HAVE_SMP
-	movi	a2, CCON	# MX External Register to Configure Cache
-	movi	a3, 1
-	wer	a3, a2
-#endif
-
-	/* Setup stack and enable window exceptions (keep irqs disabled) */
-
-	movi	a1, start_info
-	l32i	a1, a1, 0
-
-	movi	a2, PS_WOE_MASK | LOCKLEVEL
-					# WOE=1, INTLEVEL=LOCKLEVEL, UM=0
-	wsr	a2, ps			# (enable reg-windows; progmode stack)
-	rsync
-
-#ifdef CONFIG_SMP
-	/*
-	 * Notice that we assume with SMP that cores have PRID
-	 * supported by the cores.
-	 */
-	rsr	a2, prid
-	bnez	a2, .Lboot_secondary
-
-#endif  /* CONFIG_SMP */
-
-	/* Unpack data sections
-	 *
-	 * The linker script used to build the Linux kernel image
-	 * creates a table located at __boot_reloc_table_start
-	 * that contans the information what data needs to be unpacked.
-	 *
-	 * Uses a2-a7.
-	 */
-
-	movi	a2, __boot_reloc_table_start
-	movi	a3, __boot_reloc_table_end
-
-1:	beq	a2, a3, 3f	# no more entries?
-	l32i	a4, a2, 0	# start destination (in RAM)
-	l32i	a5, a2, 4	# end desination (in RAM)
-	l32i	a6, a2, 8	# start source (in ROM)
-	addi	a2, a2, 12	# next entry
-	beq	a4, a5, 1b	# skip, empty entry
-	beq	a4, a6, 1b	# skip, source and dest. are the same
-
-2:	l32i	a7, a6, 0	# load word
-	addi	a6, a6, 4
-	s32i	a7, a4, 0	# store word
-	addi	a4, a4, 4
-	bltu	a4, a5, 2b
-	j	1b
-
-3:
-	/* All code and initialized data segments have been copied.
-	 * Now clear the BSS segment.
-	 */
-
-	movi	a2, __bss_start	# start of BSS
-	movi	a3, __bss_stop	# end of BSS
-
-	__loopt	a2, a3, a4, 2
-	s32i	a0, a2, 0
-	__endla	a2, a3, 4
-
-#if XCHAL_DCACHE_IS_WRITEBACK
-
-	/* After unpacking, flush the writeback cache to memory so the
-	 * instructions/data are available.
-	 */
-
-	___flush_dcache_all a2 a3
-#endif
-	memw
-	isync
-	___invalidate_icache_all a2 a3
-	isync
-
-	movi	a6, 0
-	xsr	a6, excsave1
-
-	/* init_arch kick-starts the linux kernel */
-
-	call4	init_arch
-	call4	start_kernel
-
-should_never_return:
-	j	should_never_return
-
-#ifdef CONFIG_SMP
-.Lboot_secondary:
-
-	movi	a2, cpu_start_ccount
-1:
-	memw
-	l32i	a3, a2, 0
-	beqi	a3, 0, 1b
-	movi	a3, 0
-	s32i	a3, a2, 0
-1:
-	memw
-	l32i	a3, a2, 0
-	beqi	a3, 0, 1b
-	wsr	a3, ccount
-	movi	a3, 0
-	s32i	a3, a2, 0
-	memw
-
-	movi	a6, 0
-	wsr	a6, excsave1
-
-	call4	secondary_start_kernel
-	j	should_never_return
-
-#endif  /* CONFIG_SMP */
-
-ENDPROC(_startup)
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-ENTRY(cpu_restart)
-
-#if XCHAL_DCACHE_IS_WRITEBACK
-	___flush_invalidate_dcache_all a2 a3
-#else
-	___invalidate_dcache_all a2 a3
-#endif
-	memw
-	movi	a2, CCON	# MX External Register to Configure Cache
-	movi	a3, 0
-	wer	a3, a2
-	extw
-
-	rsr	a0, prid
-	neg	a2, a0
-	movi	a3, cpu_start_id
-	memw
-	s32i	a2, a3, 0
-#if XCHAL_DCACHE_IS_WRITEBACK
-	dhwbi	a3, 0
-#endif
-1:
-	memw
-	l32i	a2, a3, 0
-	dhi	a3, 0
-	bne	a2, a0, 1b
-
-	/*
-	 * Initialize WB, WS, and clear PS.EXCM (to allow loop instructions).
-	 * Set Interrupt Level just below XCHAL_DEBUGLEVEL to allow
-	 * xt-gdb to single step via DEBUG exceptions received directly
-	 * by ocd.
-	 */
-	movi	a1, 1
-	movi	a0, 0
-	wsr	a1, windowstart
-	wsr	a0, windowbase
-	rsync
-
-	movi	a1, LOCKLEVEL
-	wsr	a1, ps
-	rsync
-
-	j	_startup
-
-ENDPROC(cpu_restart)
-
-#endif  /* CONFIG_HOTPLUG_CPU */
-
-/*
- * DATA section
- */
-
-        .section ".data.init.refok"
-        .align  4
-ENTRY(start_info)
-        .long   init_thread_union + KERNEL_STACK_SIZE
-
-/*
- * BSS section
- */
-	
-__PAGE_ALIGNED_BSS
-#ifdef CONFIG_MMU
-ENTRY(swapper_pg_dir)
-	.fill	PAGE_SIZE, 1, 0
-END(swapper_pg_dir)
-#endif
-ENTRY(empty_zero_page)
-	.fill	PAGE_SIZE, 1, 0
-END(empty_zero_page)
diff --git a/arch/xtensa/kernel/mcount.S b/arch/xtensa/kernel/mcount.S
deleted file mode 100644
index 5e4619f52858afdd856be1df8208cacac77f83f7..0000000000000000000000000000000000000000
--- a/arch/xtensa/kernel/mcount.S
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * arch/xtensa/kernel/mcount.S
- *
- * Xtensa specific mcount support
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2013 Tensilica Inc.
- */
-
-#include <linux/linkage.h>
-#include <asm/asmmacro.h>
-#include <asm/ftrace.h>
-
-/*
- * Entry condition:
- *
- *   a2:	a0 of the caller
- */
-
-ENTRY(_mcount)
-
-	abi_entry_default
-
-	movi	a4, ftrace_trace_function
-	l32i	a4, a4, 0
-	movi	a3, ftrace_stub
-	bne	a3, a4, 1f
-	abi_ret_default
-
-1: 	xor	a7, a2, a1
-	movi	a3, 0x3fffffff
-	and	a7, a7, a3
-	xor	a7, a7, a1
-
-	xor	a6, a0, a1
-	and	a6, a6, a3
-	xor	a6, a6, a1
-	addi	a6, a6, -MCOUNT_INSN_SIZE
-	callx4	a4
-
-	abi_ret_default
-
-ENDPROC(_mcount)
-
-ENTRY(ftrace_stub)
-	abi_entry_default
-	abi_ret_default
-ENDPROC(ftrace_stub)
diff --git a/arch/xtensa/kernel/mxhead.S b/arch/xtensa/kernel/mxhead.S
deleted file mode 100644
index 9f3843742726484c40dda3a357f0ddcb9043998d..0000000000000000000000000000000000000000
--- a/arch/xtensa/kernel/mxhead.S
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Xtensa Secondary Processors startup code.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2013 Tensilica Inc.
- *
- * Joe Taylor <joe@tensilica.com>
- * Chris Zankel <chris@zankel.net>
- * Marc Gauthier <marc@tensilica.com, marc@alumni.uwaterloo.ca>
- * Pete Delaney <piet@tensilica.com>
- */
-
-#include <linux/linkage.h>
-
-#include <asm/cacheasm.h>
-#include <asm/initialize_mmu.h>
-#include <asm/mxregs.h>
-#include <asm/regs.h>
-
-
-	.section .SecondaryResetVector.text, "ax"
-
-
-ENTRY(_SecondaryResetVector)
-	_j _SetupOCD
-
-	.begin  no-absolute-literals
-	.literal_position
-
-_SetupOCD:
-	/*
-	 * Initialize WB, WS, and clear PS.EXCM (to allow loop instructions).
-	 * Set Interrupt Level just below XCHAL_DEBUGLEVEL to allow
-	 * xt-gdb to single step via DEBUG exceptions received directly
-	 * by ocd.
-	 */
-	movi	a1, 1
-	movi	a0, 0
-	wsr	a1, windowstart
-	wsr	a0, windowbase
-	rsync
-
-	movi	a1, LOCKLEVEL
-	wsr	a1, ps
-	rsync
-
-_SetupMMU:
-#ifdef CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
-	initialize_mmu
-#endif
-
-	/*
-	 * Start Secondary Processors with NULL pointer to boot params.
-	 */
-	movi	a2, 0				#  a2 == NULL
-	movi	a3, _startup
-	jx	a3
-
-	.end    no-absolute-literals
diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S
deleted file mode 100644
index 841503d3307cb590ec3ad380aeeb2434d6e53d30..0000000000000000000000000000000000000000
--- a/arch/xtensa/kernel/vectors.S
+++ /dev/null
@@ -1,791 +0,0 @@
-/*
- * arch/xtensa/kernel/vectors.S
- *
- * This file contains all exception vectors (user, kernel, and double),
- * as well as the window vectors (overflow and underflow), and the debug
- * vector. These are the primary vectors executed by the processor if an
- * exception occurs.
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file "COPYING" in the main directory of
- * this archive for more details.
- *
- * Copyright (C) 2005 - 2008 Tensilica, Inc.
- *
- * Chris Zankel <chris@zankel.net>
- *
- */
-
-/*
- * We use a two-level table approach. The user and kernel exception vectors
- * use a first-level dispatch table to dispatch the exception to a registered
- * fast handler or the default handler, if no fast handler was registered.
- * The default handler sets up a C-stack and dispatches the exception to a
- * registerd C handler in the second-level dispatch table.
- *
- * Fast handler entry condition:
- *
- *   a0:	trashed, original value saved on stack (PT_AREG0)
- *   a1:	a1
- *   a2:	new stack pointer, original value in depc
- *   a3:	dispatch table
- *   depc:	a2, original value saved on stack (PT_DEPC)
- *   excsave_1:	a3
- *
- * The value for PT_DEPC saved to stack also functions as a boolean to
- * indicate that the exception is either a double or a regular exception:
- *
- *   PT_DEPC	>= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception
- *		<  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
- *
- * Note:  Neither the kernel nor the user exception handler generate literals.
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm/current.h>
-#include <asm/asm-offsets.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/thread_info.h>
-#include <asm/vectors.h>
-
-#define WINDOW_VECTORS_SIZE   0x180
-
-
-/*
- * User exception vector. (Exceptions with PS.UM == 1, PS.EXCM == 0)
- *
- * We get here when an exception occurred while we were in userland.
- * We switch to the kernel stack and jump to the first level handler
- * associated to the exception cause.
- *
- * Note: the saved kernel stack pointer (EXC_TABLE_KSTK) is already
- *       decremented by PT_USER_SIZE.
- */
-
-	.section .UserExceptionVector.text, "ax"
-
-ENTRY(_UserExceptionVector)
-
-	xsr	a3, excsave1		# save a3 and get dispatch table
-	wsr	a2, depc		# save a2
-	l32i	a2, a3, EXC_TABLE_KSTK	# load kernel stack to a2
-	s32i	a0, a2, PT_AREG0	# save a0 to ESF
-	rsr	a0, exccause		# retrieve exception cause
-	s32i	a0, a2, PT_DEPC		# mark it as a regular exception
-	addx4	a0, a0, a3		# find entry in table
-	l32i	a0, a0, EXC_TABLE_FAST_USER	# load handler
-	xsr	a3, excsave1		# restore a3 and dispatch table
-	jx	a0
-
-ENDPROC(_UserExceptionVector)
-
-/*
- * Kernel exception vector. (Exceptions with PS.UM == 0, PS.EXCM == 0)
- *
- * We get this exception when we were already in kernel space.
- * We decrement the current stack pointer (kernel) by PT_SIZE and
- * jump to the first-level handler associated with the exception cause.
- *
- * Note: we need to preserve space for the spill region.
- */
-
-	.section .KernelExceptionVector.text, "ax"
-
-ENTRY(_KernelExceptionVector)
-
-	xsr	a3, excsave1		# save a3, and get dispatch table
-	wsr	a2, depc		# save a2
-	addi	a2, a1, -16-PT_SIZE	# adjust stack pointer
-	s32i	a0, a2, PT_AREG0	# save a0 to ESF
-	rsr	a0, exccause		# retrieve exception cause
-	s32i	a0, a2, PT_DEPC		# mark it as a regular exception
-	addx4	a0, a0, a3		# find entry in table
-	l32i	a0, a0, EXC_TABLE_FAST_KERNEL	# load handler address
-	xsr	a3, excsave1		# restore a3 and dispatch table
-	jx	a0
-
-ENDPROC(_KernelExceptionVector)
-
-/*
- * Double exception vector (Exceptions with PS.EXCM == 1)
- * We get this exception when another exception occurs while were are
- * already in an exception, such as window overflow/underflow exception,
- * or 'expected' exceptions, for example memory exception when we were trying
- * to read data from an invalid address in user space.
- *
- * Note that this vector is never invoked for level-1 interrupts, because such
- * interrupts are disabled (masked) when PS.EXCM is set.
- *
- * We decode the exception and take the appropriate action.  However, the
- * double exception vector is much more careful, because a lot more error
- * cases go through the double exception vector than through the user and
- * kernel exception vectors.
- *
- * Occasionally, the kernel expects a double exception to occur.  This usually
- * happens when accessing user-space memory with the user's permissions
- * (l32e/s32e instructions).  The kernel state, though, is not always suitable
- * for immediate transfer of control to handle_double, where "normal" exception
- * processing occurs. Also in kernel mode, TLB misses can occur if accessing
- * vmalloc memory, possibly requiring repair in a double exception handler.
- *
- * The variable at TABLE_FIXUP offset from the pointer in EXCSAVE_1 doubles as
- * a boolean variable and a pointer to a fixup routine. If the variable
- * EXC_TABLE_FIXUP is non-zero, this handler jumps to that address. A value of
- * zero indicates to use the default kernel/user exception handler.
- * There is only one exception, when the value is identical to the exc_table
- * label, the kernel is in trouble. This mechanism is used to protect critical
- * sections, mainly when the handler writes to the stack to assert the stack
- * pointer is valid. Once the fixup/default handler leaves that area, the
- * EXC_TABLE_FIXUP variable is reset to the fixup handler or zero.
- *
- * Procedures wishing to use this mechanism should set EXC_TABLE_FIXUP to the
- * nonzero address of a fixup routine before it could cause a double exception
- * and reset it before it returns.
- *
- * Some other things to take care of when a fast exception handler doesn't
- * specify a particular fixup handler but wants to use the default handlers:
- *
- *  - The original stack pointer (in a1) must not be modified. The fast
- *    exception handler should only use a2 as the stack pointer.
- *
- *  - If the fast handler manipulates the stack pointer (in a2), it has to
- *    register a valid fixup handler and cannot use the default handlers.
- *
- *  - The handler can use any other generic register from a3 to a15, but it
- *    must save the content of these registers to stack (PT_AREG3...PT_AREGx)
- *
- *  - These registers must be saved before a double exception can occur.
- *
- *  - If we ever implement handling signals while in double exceptions, the
- *    number of registers a fast handler has saved (excluding a0 and a1) must
- *    be written to  PT_AREG1. (1 if only a3 is used, 2 for a3 and a4, etc. )
- *
- * The fixup handlers are special handlers:
- *
- *  - Fixup entry conditions differ from regular exceptions:
- *
- *	a0:	   DEPC
- *	a1: 	   a1
- *	a2:	   trashed, original value in EXC_TABLE_DOUBLE_SAVE
- *	a3:	   exctable
- *	depc:	   a0
- *	excsave_1: a3
- *
- *  - When the kernel enters the fixup handler, it still assumes it is in a
- *    critical section, so EXC_TABLE_FIXUP variable is set to exc_table.
- *    The fixup handler, therefore, has to re-register itself as the fixup
- *    handler before it returns from the double exception.
- *
- *  - Fixup handler can share the same exception frame with the fast handler.
- *    The kernel stack pointer is not changed when entering the fixup handler.
- *
- *  - Fixup handlers can jump to the default kernel and user exception
- *    handlers. Before it jumps, though, it has to setup a exception frame
- *    on stack. Because the default handler resets the register fixup handler
- *    the fixup handler must make sure that the default handler returns to
- *    it instead of the exception address, so it can re-register itself as
- *    the fixup handler.
- *
- * In case of a critical condition where the kernel cannot recover, we jump
- * to unrecoverable_exception with the following entry conditions.
- * All registers a0...a15 are unchanged from the last exception, except:
- *
- *	a0:	   last address before we jumped to the unrecoverable_exception.
- *	excsave_1: a0
- *
- *
- * See the handle_alloca_user and spill_registers routines for example clients.
- *
- * FIXME: Note: we currently don't allow signal handling coming from a double
- *        exception, so the item markt with (*) is not required.
- */
-
-	.section .DoubleExceptionVector.text, "ax"
-
-ENTRY(_DoubleExceptionVector)
-
-	xsr	a3, excsave1
-	s32i	a2, a3, EXC_TABLE_DOUBLE_SAVE
-
-	/* Check for kernel double exception (usually fatal). */
-
-	rsr	a2, ps
-	_bbsi.l	a2, PS_UM_BIT, 1f
-	j	.Lksp
-
-	.align	4
-	.literal_position
-1:
-	/* Check if we are currently handling a window exception. */
-	/* Note: We don't need to indicate that we enter a critical section. */
-
-	xsr	a0, depc		# get DEPC, save a0
-
-	movi	a2, WINDOW_VECTORS_VADDR
-	_bltu	a0, a2, .Lfixup
-	addi	a2, a2, WINDOW_VECTORS_SIZE
-	_bgeu	a0, a2, .Lfixup
-
-	/* Window overflow/underflow exception. Get stack pointer. */
-
-	l32i	a2, a3, EXC_TABLE_KSTK
-
-	/* Check for overflow/underflow exception, jump if overflow. */
-
-	bbci.l	a0, 6, _DoubleExceptionVector_WindowOverflow
-
-	/*
-	 * Restart window underflow exception.
-	 * Currently:
-	 *	depc = orig a0,
-	 *	a0 = orig DEPC,
-	 *	a2 = new sp based on KSTK from exc_table
-	 *	a3 = excsave_1
-	 *	excsave_1 = orig a3
-	 *
-	 * We return to the instruction in user space that caused the window
-	 * underflow exception. Therefore, we change window base to the value
-	 * before we entered the window underflow exception and prepare the
-	 * registers to return as if we were coming from a regular exception
-	 * by changing depc (in a0).
-	 * Note: We can trash the current window frame (a0...a3) and depc!
-	 */
-_DoubleExceptionVector_WindowUnderflow:
-	xsr	a3, excsave1
-	wsr	a2, depc		# save stack pointer temporarily
-	rsr	a0, ps
-	extui	a0, a0, PS_OWB_SHIFT, PS_OWB_WIDTH
-	wsr	a0, windowbase
-	rsync
-
-	/* We are now in the previous window frame. Save registers again. */
-
-	xsr	a2, depc		# save a2 and get stack pointer
-	s32i	a0, a2, PT_AREG0
-	xsr	a3, excsave1
-	rsr	a0, exccause
-	s32i	a0, a2, PT_DEPC		# mark it as a regular exception
-	addx4	a0, a0, a3
-	xsr	a3, excsave1
-	l32i	a0, a0, EXC_TABLE_FAST_USER
-	jx	a0
-
-	/*
-	 * We only allow the ITLB miss exception if we are in kernel space.
-	 * All other exceptions are unexpected and thus unrecoverable!
-	 */
-
-#ifdef CONFIG_MMU
-	.extern fast_second_level_miss_double_kernel
-
-.Lksp:	/* a0: a0, a1: a1, a2: a2, a3: trashed, depc: depc, excsave: a3 */
-
-	rsr	a3, exccause
-	beqi	a3, EXCCAUSE_ITLB_MISS, 1f
-	addi	a3, a3, -EXCCAUSE_DTLB_MISS
-	bnez	a3, .Lunrecoverable
-1:	movi	a3, fast_second_level_miss_double_kernel
-	jx	a3
-#else
-.equ	.Lksp,	.Lunrecoverable
-#endif
-
-	/* Critical! We can't handle this situation. PANIC! */
-
-	.extern unrecoverable_exception
-
-.Lunrecoverable_fixup:
-	l32i	a2, a3, EXC_TABLE_DOUBLE_SAVE
-	xsr	a0, depc
-
-.Lunrecoverable:
-	rsr	a3, excsave1
-	wsr	a0, excsave1
-	call0	unrecoverable_exception
-
-.Lfixup:/* Check for a fixup handler or if we were in a critical section. */
-
-	/* a0: depc, a1: a1, a2: trash, a3: exctable, depc: a0, excsave1: a3 */
-
-	/* Enter critical section. */
-
-	l32i	a2, a3, EXC_TABLE_FIXUP
-	s32i	a3, a3, EXC_TABLE_FIXUP
-	beq	a2, a3, .Lunrecoverable_fixup	# critical section
-	beqz	a2, .Ldflt			# no handler was registered
-
-	/* a0: depc, a1: a1, a2: trash, a3: exctable, depc: a0, excsave: a3 */
-
-	jx	a2
-
-.Ldflt:	/* Get stack pointer. */
-
-	l32i	a2, a3, EXC_TABLE_DOUBLE_SAVE
-	addi	a2, a2, -PT_USER_SIZE
-
-	/* a0: depc, a1: a1, a2: kstk, a3: exctable, depc: a0, excsave: a3 */
-
-	s32i	a0, a2, PT_DEPC
-	l32i	a0, a3, EXC_TABLE_DOUBLE_SAVE
-	xsr	a0, depc
-	s32i	a0, a2, PT_AREG0
-
-	/* a0: avail, a1: a1, a2: kstk, a3: exctable, depc: a2, excsave: a3 */
-
-	rsr	a0, exccause
-	addx4	a0, a0, a3
-	xsr	a3, excsave1
-	l32i	a0, a0, EXC_TABLE_FAST_USER
-	jx	a0
-
-	/*
-	 * Restart window OVERFLOW exception.
-	 * Currently:
-	 *	depc = orig a0,
-	 *	a0 = orig DEPC,
-	 *	a2 = new sp based on KSTK from exc_table
-	 *	a3 = EXCSAVE_1
-	 *	excsave_1 = orig a3
-	 *
-	 * We return to the instruction in user space that caused the window
-	 * overflow exception. Therefore, we change window base to the value
-	 * before we entered the window overflow exception and prepare the
-	 * registers to return as if we were coming from a regular exception
-	 * by changing DEPC (in a0).
-	 *
-	 * NOTE: We CANNOT trash the current window frame (a0...a3), but we
-	 * can clobber depc.
-	 *
-	 * The tricky part here is that overflow8 and overflow12 handlers
-	 * save a0, then clobber a0.  To restart the handler, we have to restore
-	 * a0 if the double exception was past the point where a0 was clobbered.
-	 *
-	 * To keep things simple, we take advantage of the fact all overflow
-	 * handlers save a0 in their very first instruction.  If DEPC was past
-	 * that instruction, we can safely restore a0 from where it was saved
-	 * on the stack.
-	 *
-	 * a0: depc, a1: a1, a2: kstk, a3: exc_table, depc: a0, excsave1: a3
-	 */
-_DoubleExceptionVector_WindowOverflow:
-	extui	a2, a0, 0, 6	# get offset into 64-byte vector handler
-	beqz	a2, 1f		# if at start of vector, don't restore
-
-	addi	a0, a0, -128
-	bbsi.l	a0, 8, 1f	# don't restore except for overflow 8 and 12
-
-	/*
-	 * This fixup handler is for the extremely unlikely case where the
-	 * overflow handler's reference thru a0 gets a hardware TLB refill
-	 * that bumps out the (distinct, aliasing) TLB entry that mapped its
-	 * prior references thru a9/a13, and where our reference now thru
-	 * a9/a13 gets a 2nd-level miss exception (not hardware TLB refill).
-	 */
-	movi	a2, window_overflow_restore_a0_fixup
-	s32i	a2, a3, EXC_TABLE_FIXUP
-	l32i	a2, a3, EXC_TABLE_DOUBLE_SAVE
-	xsr	a3, excsave1
-
-	bbsi.l	a0, 7, 2f
-
-	/*
-	 * Restore a0 as saved by _WindowOverflow8().
-	 */
-
-	l32e	a0, a9, -16
-	wsr	a0, depc	# replace the saved a0
-	j	3f
-
-2:
-	/*
-	 * Restore a0 as saved by _WindowOverflow12().
-	 */
-
-	l32e	a0, a13, -16
-	wsr	a0, depc	# replace the saved a0
-3:
-	xsr	a3, excsave1
-	movi	a0, 0
-	s32i	a0, a3, EXC_TABLE_FIXUP
-	s32i	a2, a3, EXC_TABLE_DOUBLE_SAVE
-1:
-	/*
-	 * Restore WindowBase while leaving all address registers restored.
-	 * We have to use ROTW for this, because WSR.WINDOWBASE requires
-	 * an address register (which would prevent restore).
-	 *
-	 * Window Base goes from 0 ... 7 (Module 8)
-	 * Window Start is 8 bits; Ex: (0b1010 1010):0x55 from series of call4s
-	 */
-
-	rsr	a0, ps
-	extui	a0, a0, PS_OWB_SHIFT, PS_OWB_WIDTH
-	rsr	a2, windowbase
-	sub	a0, a2, a0
-	extui	a0, a0, 0, 3
-
-	l32i	a2, a3, EXC_TABLE_DOUBLE_SAVE
-	xsr	a3, excsave1
-	beqi	a0, 1, .L1pane
-	beqi	a0, 3, .L3pane
-
-	rsr	a0, depc
-	rotw	-2
-
-	/*
-	 * We are now in the user code's original window frame.
-	 * Process the exception as a user exception as if it was
-	 * taken by the user code.
-	 *
-	 * This is similar to the user exception vector,
-	 * except that PT_DEPC isn't set to EXCCAUSE.
-	 */
-1:
-	xsr	a3, excsave1
-	wsr	a2, depc
-	l32i	a2, a3, EXC_TABLE_KSTK
-	s32i	a0, a2, PT_AREG0
-	rsr	a0, exccause
-
-	s32i	a0, a2, PT_DEPC
-
-_DoubleExceptionVector_handle_exception:
-	addi	a0, a0, -EXCCAUSE_UNALIGNED
-	beqz	a0, 2f
-	addx4	a0, a0, a3
-	l32i	a0, a0, EXC_TABLE_FAST_USER + 4 * EXCCAUSE_UNALIGNED
-	xsr	a3, excsave1
-	jx	a0
-2:
-	movi	a0, user_exception
-	xsr	a3, excsave1
-	jx	a0
-
-.L1pane:
-	rsr	a0, depc
-	rotw	-1
-	j	1b
-
-.L3pane:
-	rsr	a0, depc
-	rotw	-3
-	j	1b
-
-ENDPROC(_DoubleExceptionVector)
-
-	.text
-/*
- * Fixup handler for TLB miss in double exception handler for window owerflow.
- * We get here with windowbase set to the window that was being spilled and
- * a0 trashed. a0 bit 7 determines if this is a call8 (bit clear) or call12
- * (bit set) window.
- *
- * We do the following here:
- * - go to the original window retaining a0 value;
- * - set up exception stack to return back to appropriate a0 restore code
- *   (we'll need to rotate window back and there's no place to save this
- *    information, use different return address for that);
- * - handle the exception;
- * - go to the window that was being spilled;
- * - set up window_overflow_restore_a0_fixup as a fixup routine;
- * - reload a0;
- * - restore the original window;
- * - reset the default fixup routine;
- * - return to user. By the time we get to this fixup handler all information
- *   about the conditions of the original double exception that happened in
- *   the window overflow handler is lost, so we just return to userspace to
- *   retry overflow from start.
- *
- * a0: value of depc, original value in depc
- * a2: trashed, original value in EXC_TABLE_DOUBLE_SAVE
- * a3: exctable, original value in excsave1
- */
-
-	.literal_position
-
-ENTRY(window_overflow_restore_a0_fixup)
-
-	rsr	a0, ps
-	extui	a0, a0, PS_OWB_SHIFT, PS_OWB_WIDTH
-	rsr	a2, windowbase
-	sub	a0, a2, a0
-	extui	a0, a0, 0, 3
-	l32i	a2, a3, EXC_TABLE_DOUBLE_SAVE
-	xsr	a3, excsave1
-
-	_beqi	a0, 1, .Lhandle_1
-	_beqi	a0, 3, .Lhandle_3
-
-	.macro	overflow_fixup_handle_exception_pane n
-
-	rsr	a0, depc
-	rotw	-\n
-
-	xsr	a3, excsave1
-	wsr	a2, depc
-	l32i	a2, a3, EXC_TABLE_KSTK
-	s32i	a0, a2, PT_AREG0
-
-	movi	a0, .Lrestore_\n
-	s32i	a0, a2, PT_DEPC
-	rsr	a0, exccause
-	j	_DoubleExceptionVector_handle_exception
-
-	.endm
-
-	overflow_fixup_handle_exception_pane 2
-.Lhandle_1:
-	overflow_fixup_handle_exception_pane 1
-.Lhandle_3:
-	overflow_fixup_handle_exception_pane 3
-
-	.macro	overflow_fixup_restore_a0_pane n
-
-	rotw	\n
-	/* Need to preserve a0 value here to be able to handle exception
-	 * that may occur on a0 reload from stack. It may occur because
-	 * TLB miss handler may not be atomic and pointer to page table
-	 * may be lost before we get here. There are no free registers,
-	 * so we need to use EXC_TABLE_DOUBLE_SAVE area.
-	 */
-	xsr	a3, excsave1
-	s32i	a2, a3, EXC_TABLE_DOUBLE_SAVE
-	movi	a2, window_overflow_restore_a0_fixup
-	s32i	a2, a3, EXC_TABLE_FIXUP
-	l32i	a2, a3, EXC_TABLE_DOUBLE_SAVE
-	xsr	a3, excsave1
-	bbsi.l	a0, 7, 1f
-	l32e	a0, a9, -16
-	j	2f
-1:
-	l32e	a0, a13, -16
-2:
-	rotw	-\n
-
-	.endm
-
-.Lrestore_2:
-	overflow_fixup_restore_a0_pane 2
-
-.Lset_default_fixup:
-	xsr	a3, excsave1
-	s32i	a2, a3, EXC_TABLE_DOUBLE_SAVE
-	movi	a2, 0
-	s32i	a2, a3, EXC_TABLE_FIXUP
-	l32i	a2, a3, EXC_TABLE_DOUBLE_SAVE
-	xsr	a3, excsave1
-	rfe
-
-.Lrestore_1:
-	overflow_fixup_restore_a0_pane 1
-	j	.Lset_default_fixup
-.Lrestore_3:
-	overflow_fixup_restore_a0_pane 3
-	j	.Lset_default_fixup
-
-ENDPROC(window_overflow_restore_a0_fixup)
-
-/*
- * Debug interrupt vector
- *
- * There is not much space here, so simply jump to another handler.
- * EXCSAVE[DEBUGLEVEL] has been set to that handler.
- */
-
-	.section .DebugInterruptVector.text, "ax"
-
-ENTRY(_DebugInterruptVector)
-
-	xsr	a3, SREG_EXCSAVE + XCHAL_DEBUGLEVEL
-	s32i	a0, a3, DT_DEBUG_SAVE
-	l32i	a0, a3, DT_DEBUG_EXCEPTION
-	jx	a0
-
-ENDPROC(_DebugInterruptVector)
-
-
-
-/*
- * Medium priority level interrupt vectors
- *
- * Each takes less than 16 (0x10) bytes, no literals, by placing
- * the extra 8 bytes that would otherwise be required in the window
- * vectors area where there is space.  With relocatable vectors,
- * all vectors are within ~ 4 kB range of each other, so we can
- * simply jump (J) to another vector without having to use JX.
- *
- * common_exception code gets current IRQ level in PS.INTLEVEL
- * and preserves it for the IRQ handling time.
- */
-
-	.macro	irq_entry_level level
-
-	.if	XCHAL_EXCM_LEVEL >= \level
-	.section .Level\level\()InterruptVector.text, "ax"
-ENTRY(_Level\level\()InterruptVector)
-	wsr	a0, excsave2
-	rsr	a0, epc\level
-	wsr	a0, epc1
-	.if	\level <= LOCKLEVEL
-	movi	a0, EXCCAUSE_LEVEL1_INTERRUPT
-	.else
-	movi	a0, EXCCAUSE_MAPPED_NMI
-	.endif
-	wsr	a0, exccause
-	rsr	a0, eps\level
-					# branch to user or kernel vector
-	j	_SimulateUserKernelVectorException
-	.endif
-
-	.endm
-
-	irq_entry_level 2
-	irq_entry_level 3
-	irq_entry_level 4
-	irq_entry_level 5
-	irq_entry_level 6
-
-
-/* Window overflow and underflow handlers.
- * The handlers must be 64 bytes apart, first starting with the underflow
- * handlers underflow-4 to underflow-12, then the overflow handlers
- * overflow-4 to overflow-12.
- *
- * Note: We rerun the underflow handlers if we hit an exception, so
- *	 we try to access any page that would cause a page fault early.
- */
-
-#define ENTRY_ALIGN64(name)	\
-	.globl name;		\
-	.align 64;		\
-	name:
-
-	.section		.WindowVectors.text, "ax"
-
-
-/* 4-Register Window Overflow Vector (Handler) */
-
-ENTRY_ALIGN64(_WindowOverflow4)
-
-	s32e	a0, a5, -16
-	s32e	a1, a5, -12
-	s32e	a2, a5,  -8
-	s32e	a3, a5,  -4
-	rfwo
-
-ENDPROC(_WindowOverflow4)
-
-
-#if XCHAL_EXCM_LEVEL >= 2
-	/*  Not a window vector - but a convenient location
-	 *  (where we know there's space) for continuation of
-	 *  medium priority interrupt dispatch code.
-	 *  On entry here, a0 contains PS, and EPC2 contains saved a0:
-	 */
-	.align 4
-_SimulateUserKernelVectorException:
-	addi	a0, a0, (1 << PS_EXCM_BIT)
-#if !XTENSA_FAKE_NMI
-	wsr	a0, ps
-#endif
-	bbsi.l	a0, PS_UM_BIT, 1f	# branch if user mode
-	xsr	a0, excsave2		# restore a0
-	j	_KernelExceptionVector	# simulate kernel vector exception
-1:	xsr	a0, excsave2		# restore a0
-	j	_UserExceptionVector	# simulate user vector exception
-#endif
-
-
-/* 4-Register Window Underflow Vector (Handler) */
-
-ENTRY_ALIGN64(_WindowUnderflow4)
-
-	l32e	a0, a5, -16
-	l32e	a1, a5, -12
-	l32e	a2, a5,  -8
-	l32e	a3, a5,  -4
-	rfwu
-
-ENDPROC(_WindowUnderflow4)
-
-/* 8-Register Window Overflow Vector (Handler) */
-
-ENTRY_ALIGN64(_WindowOverflow8)
-
-	s32e	a0, a9, -16
-	l32e	a0, a1, -12
-	s32e	a2, a9,  -8
-	s32e	a1, a9, -12
-	s32e	a3, a9,  -4
-	s32e	a4, a0, -32
-	s32e	a5, a0, -28
-	s32e	a6, a0, -24
-	s32e	a7, a0, -20
-	rfwo
-
-ENDPROC(_WindowOverflow8)
-
-/* 8-Register Window Underflow Vector (Handler) */
-
-ENTRY_ALIGN64(_WindowUnderflow8)
-
-	l32e	a1, a9, -12
-	l32e	a0, a9, -16
-	l32e	a7, a1, -12
-	l32e	a2, a9,  -8
-	l32e	a4, a7, -32
-	l32e	a3, a9,  -4
-	l32e	a5, a7, -28
-	l32e	a6, a7, -24
-	l32e	a7, a7, -20
-	rfwu
-
-ENDPROC(_WindowUnderflow8)
-
-/* 12-Register Window Overflow Vector (Handler) */
-
-ENTRY_ALIGN64(_WindowOverflow12)
-
-	s32e	a0,  a13, -16
-	l32e	a0,  a1,  -12
-	s32e	a1,  a13, -12
-	s32e	a2,  a13,  -8
-	s32e	a3,  a13,  -4
-	s32e	a4,  a0,  -48
-	s32e	a5,  a0,  -44
-	s32e	a6,  a0,  -40
-	s32e	a7,  a0,  -36
-	s32e	a8,  a0,  -32
-	s32e	a9,  a0,  -28
-	s32e	a10, a0,  -24
-	s32e	a11, a0,  -20
-	rfwo
-
-ENDPROC(_WindowOverflow12)
-
-/* 12-Register Window Underflow Vector (Handler) */
-
-ENTRY_ALIGN64(_WindowUnderflow12)
-
-	l32e	a1,  a13, -12
-	l32e	a0,  a13, -16
-	l32e	a11, a1,  -12
-	l32e	a2,  a13,  -8
-	l32e	a4,  a11, -48
-	l32e	a8,  a11, -32
-	l32e	a3,  a13,  -4
-	l32e	a5,  a11, -44
-	l32e	a6,  a11, -40
-	l32e	a7,  a11, -36
-	l32e	a9,  a11, -28
-	l32e	a10, a11, -24
-	l32e	a11, a11, -20
-	rfwu
-
-ENDPROC(_WindowUnderflow12)
-
-	.text
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
deleted file mode 100644
index 943f10639a93321fea7c28009567503cb370a0bd..0000000000000000000000000000000000000000
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ /dev/null
@@ -1,308 +0,0 @@
-/*
- * arch/xtensa/kernel/vmlinux.lds.S
- *
- * Xtensa linker script
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2008 Tensilica Inc.
- *
- * Chris Zankel <chris@zankel.net>
- * Marc Gauthier <marc@tensilica.com, marc@alumni.uwaterloo.ca>
- * Joe Taylor <joe@tensilica.com, joetylr@yahoo.com>
- */
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/page.h>
-#include <asm/thread_info.h>
-
-#include <asm/core.h>
-#include <asm/vectors.h>
-
-OUTPUT_ARCH(xtensa)
-ENTRY(_start)
-
-#ifdef __XTENSA_EB__
-jiffies = jiffies_64 + 4;
-#else
-jiffies = jiffies_64;
-#endif
-
-/* Note: In the following macros, it would be nice to specify only the
-   vector name and section kind and construct "sym" and "section" using
-   CPP concatenation, but that does not work reliably.  Concatenating a
-   string with "." produces an invalid token.  CPP will not print a
-   warning because it thinks this is an assembly file, but it leaves
-   them as multiple tokens and there may or may not be whitespace
-   between them.  */
-
-/* Macro for a relocation entry */
-
-#define RELOCATE_ENTRY(sym, section)		\
-	LONG(sym ## _start);			\
-	LONG(sym ## _end);			\
-	LONG(LOADADDR(section))
-
-/*
- * Macro to define a section for a vector. When CONFIG_VECTORS_OFFSET is
- * defined code for every vector is located with other init data. At startup
- * time head.S copies code for every vector to its final position according
- * to description recorded in the corresponding RELOCATE_ENTRY.
- */
-
-#ifdef CONFIG_VECTORS_OFFSET
-#define SECTION_VECTOR(sym, section, addr, prevsec)                         \
-  section addr : AT(((LOADADDR(prevsec) + SIZEOF(prevsec)) + 3) & ~ 3)      \
-  {									    \
-    . = ALIGN(4);							    \
-    sym ## _start = ABSOLUTE(.);		 			    \
-    *(section)								    \
-    sym ## _end = ABSOLUTE(.);						    \
-  }
-#else
-#define SECTION_VECTOR(section, addr)					    \
-  . = addr;								    \
-  *(section)
-#endif
-
-/*
- *  Mapping of input sections to output sections when linking.
- */
-
-SECTIONS
-{
-  . = KERNELOFFSET;
-  /* .text section */
-
-  _text = .;
-  _stext = .;
-
-  .text :
-  {
-    /* The HEAD_TEXT section must be the first section! */
-    HEAD_TEXT
-
-#ifndef CONFIG_VECTORS_OFFSET
-  . = ALIGN(PAGE_SIZE);
-  _vecbase = .;
-
-  SECTION_VECTOR (.WindowVectors.text, WINDOW_VECTORS_VADDR)
-#if XCHAL_EXCM_LEVEL >= 2
-  SECTION_VECTOR (.Level2InterruptVector.text, INTLEVEL2_VECTOR_VADDR)
-#endif
-#if XCHAL_EXCM_LEVEL >= 3
-  SECTION_VECTOR (.Level3InterruptVector.text, INTLEVEL3_VECTOR_VADDR)
-#endif
-#if XCHAL_EXCM_LEVEL >= 4
-  SECTION_VECTOR (.Level4InterruptVector.text, INTLEVEL4_VECTOR_VADDR)
-#endif
-#if XCHAL_EXCM_LEVEL >= 5
-  SECTION_VECTOR (.Level5InterruptVector.text, INTLEVEL5_VECTOR_VADDR)
-#endif
-#if XCHAL_EXCM_LEVEL >= 6
-  SECTION_VECTOR (.Level6InterruptVector.text, INTLEVEL6_VECTOR_VADDR)
-#endif
-  SECTION_VECTOR (.DebugInterruptVector.text, DEBUG_VECTOR_VADDR)
-  SECTION_VECTOR (.KernelExceptionVector.text, KERNEL_VECTOR_VADDR)
-  SECTION_VECTOR (.UserExceptionVector.text, USER_VECTOR_VADDR)
-  SECTION_VECTOR (.DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR)
-#endif
-
-    IRQENTRY_TEXT
-    SOFTIRQENTRY_TEXT
-    ENTRY_TEXT
-    TEXT_TEXT
-    SCHED_TEXT
-    CPUIDLE_TEXT
-    LOCK_TEXT
-
-  }
-  _etext = .;
-  PROVIDE (etext = .);
-
-  . = ALIGN(16);
-
-  RODATA
-
-  /*  Relocation table */
-
-  .fixup   : { *(.fixup) }
-
-  EXCEPTION_TABLE(16)
-  NOTES
-  /* Data section */
-
-  _sdata = .;
-  RW_DATA_SECTION(XCHAL_ICACHE_LINESIZE, PAGE_SIZE, THREAD_SIZE)
-  _edata = .;
-
-  /* Initialization code and data: */
-
-  . = ALIGN(PAGE_SIZE);
-  __init_begin = .;
-  INIT_TEXT_SECTION(PAGE_SIZE)
-
-  .init.data :
-  {
-    INIT_DATA
-    . = ALIGN(0x4);
-    __tagtable_begin = .;
-    *(.taglist)
-    __tagtable_end = .;
-
-    . = ALIGN(16);
-    __boot_reloc_table_start = ABSOLUTE(.);
-
-#ifdef CONFIG_VECTORS_OFFSET
-    RELOCATE_ENTRY(_WindowVectors_text,
-		   .WindowVectors.text);
-#if XCHAL_EXCM_LEVEL >= 2
-    RELOCATE_ENTRY(_Level2InterruptVector_text,
-		   .Level2InterruptVector.text);
-#endif
-#if XCHAL_EXCM_LEVEL >= 3
-    RELOCATE_ENTRY(_Level3InterruptVector_text,
-		   .Level3InterruptVector.text);
-#endif
-#if XCHAL_EXCM_LEVEL >= 4
-    RELOCATE_ENTRY(_Level4InterruptVector_text,
-		   .Level4InterruptVector.text);
-#endif
-#if XCHAL_EXCM_LEVEL >= 5
-    RELOCATE_ENTRY(_Level5InterruptVector_text,
-		   .Level5InterruptVector.text);
-#endif
-#if XCHAL_EXCM_LEVEL >= 6
-    RELOCATE_ENTRY(_Level6InterruptVector_text,
-		   .Level6InterruptVector.text);
-#endif
-    RELOCATE_ENTRY(_KernelExceptionVector_text,
-		   .KernelExceptionVector.text);
-    RELOCATE_ENTRY(_UserExceptionVector_text,
-		   .UserExceptionVector.text);
-    RELOCATE_ENTRY(_DoubleExceptionVector_text,
-		   .DoubleExceptionVector.text);
-    RELOCATE_ENTRY(_DebugInterruptVector_text,
-		   .DebugInterruptVector.text);
-#endif
-#if defined(CONFIG_SMP)
-    RELOCATE_ENTRY(_SecondaryResetVector_text,
-		   .SecondaryResetVector.text);
-#endif
-
-  
-    __boot_reloc_table_end = ABSOLUTE(.) ;
-
-    INIT_SETUP(XCHAL_ICACHE_LINESIZE)
-    INIT_CALLS
-    CON_INITCALL
-    INIT_RAM_FS
-  }
-
-  PERCPU_SECTION(XCHAL_ICACHE_LINESIZE)
-
-  /* We need this dummy segment here */
-
-  . = ALIGN(4);
-  .dummy : { LONG(0) }
-
-#ifdef CONFIG_VECTORS_OFFSET
-  /* The vectors are relocated to the real position at startup time */
-
-  SECTION_VECTOR (_WindowVectors_text,
-		  .WindowVectors.text,
-		  WINDOW_VECTORS_VADDR,
-		  .dummy)
-  SECTION_VECTOR (_DebugInterruptVector_text,
-		  .DebugInterruptVector.text,
-		  DEBUG_VECTOR_VADDR,
-		  .WindowVectors.text)
-#undef LAST
-#define LAST	.DebugInterruptVector.text
-#if XCHAL_EXCM_LEVEL >= 2
-  SECTION_VECTOR (_Level2InterruptVector_text,
-		  .Level2InterruptVector.text,
-		  INTLEVEL2_VECTOR_VADDR,
-		  LAST)
-# undef LAST
-# define LAST	.Level2InterruptVector.text
-#endif
-#if XCHAL_EXCM_LEVEL >= 3
-  SECTION_VECTOR (_Level3InterruptVector_text,
-		  .Level3InterruptVector.text,
-		  INTLEVEL3_VECTOR_VADDR,
-		  LAST)
-# undef LAST
-# define LAST	.Level3InterruptVector.text
-#endif
-#if XCHAL_EXCM_LEVEL >= 4
-  SECTION_VECTOR (_Level4InterruptVector_text,
-		  .Level4InterruptVector.text,
-		  INTLEVEL4_VECTOR_VADDR,
-		  LAST)
-# undef LAST
-# define LAST	.Level4InterruptVector.text
-#endif
-#if XCHAL_EXCM_LEVEL >= 5
-  SECTION_VECTOR (_Level5InterruptVector_text,
-		  .Level5InterruptVector.text,
-		  INTLEVEL5_VECTOR_VADDR,
-		  LAST)
-# undef LAST
-# define LAST	.Level5InterruptVector.text
-#endif
-#if XCHAL_EXCM_LEVEL >= 6
-  SECTION_VECTOR (_Level6InterruptVector_text,
-		  .Level6InterruptVector.text,
-		  INTLEVEL6_VECTOR_VADDR,
-		  LAST)
-# undef LAST
-# define LAST	.Level6InterruptVector.text
-#endif
-  SECTION_VECTOR (_KernelExceptionVector_text,
-		  .KernelExceptionVector.text,
-		  KERNEL_VECTOR_VADDR,
-		  LAST)
-#undef LAST
-  SECTION_VECTOR (_UserExceptionVector_text,
-		  .UserExceptionVector.text,
-		  USER_VECTOR_VADDR,
-		  .KernelExceptionVector.text)
-  SECTION_VECTOR (_DoubleExceptionVector_text,
-		  .DoubleExceptionVector.text,
-		  DOUBLEEXC_VECTOR_VADDR,
-		  .UserExceptionVector.text)
-
-  . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3;
-
-#endif
-#if defined(CONFIG_SMP)
-
-  SECTION_VECTOR (_SecondaryResetVector_text,
-		  .SecondaryResetVector.text,
-		  RESET_VECTOR1_VADDR,
-		  .DoubleExceptionVector.text)
-
-  . = LOADADDR(.SecondaryResetVector.text)+SIZEOF(.SecondaryResetVector.text);
-
-#endif
-
-  . = ALIGN(PAGE_SIZE);
-
-  __init_end = .;
-
-  BSS_SECTION(0, 8192, 0)
-
-  _end = .;
-
-  DWARF_DEBUG
-
-  .xt.prop 0 : { KEEP(*(.xt.prop .xt.prop.* .gnu.linkonce.prop.*)) }
-  .xt.insn 0 : { KEEP(*(.xt.insn .xt.insn.* .gnu.linkonce.x*)) }
-  .xt.lit  0 : { KEEP(*(.xt.lit  .xt.lit.*  .gnu.linkonce.p*)) }
-
-  /* Sections to be discarded */
-  DISCARDS
-}
diff --git a/arch/xtensa/lib/checksum.S b/arch/xtensa/lib/checksum.S
deleted file mode 100644
index 4cb9ca58d9ad76d9991868a95efc2e928532ae0a..0000000000000000000000000000000000000000
--- a/arch/xtensa/lib/checksum.S
+++ /dev/null
@@ -1,394 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * INET		An implementation of the TCP/IP protocol suite for the LINUX
- *		operating system.  INET is implemented using the  BSD Socket
- *		interface as the means of communication with the user level.
- *
- *		IP/TCP/UDP checksumming routines
- *
- * Xtensa version:  Copyright (C) 2001 Tensilica, Inc. by Kevin Chea
- *                  Optimized by Joe Taylor
- */
-
-#include <linux/errno.h>
-#include <linux/linkage.h>
-#include <asm/asmmacro.h>
-#include <asm/core.h>
-
-/*
- * computes a partial checksum, e.g. for TCP/UDP fragments
- */
-
-/*
- * unsigned int csum_partial(const unsigned char *buf, int len,
- *                           unsigned int sum);
- *    a2 = buf
- *    a3 = len
- *    a4 = sum
- *
- * This function assumes 2- or 4-byte alignment.  Other alignments will fail!
- */
-
-/* ONES_ADD converts twos-complement math to ones-complement. */
-#define ONES_ADD(sum, val)	  \
-	add	sum, sum, val	; \
-	bgeu	sum, val, 99f	; \
-	addi	sum, sum, 1	; \
-99:				;
-
-.text
-ENTRY(csum_partial)
-
-	/*
-	 * Experiments with Ethernet and SLIP connections show that buf
-	 * is aligned on either a 2-byte or 4-byte boundary.
-	 */
-	abi_entry_default
-	extui	a5, a2, 0, 2
-	bnez	a5, 8f		/* branch if 2-byte aligned */
-	/* Fall-through on common case, 4-byte alignment */
-1:
-	srli	a5, a3, 5	/* 32-byte chunks */
-#if XCHAL_HAVE_LOOPS
-	loopgtz	a5, 2f
-#else
-	beqz	a5, 2f
-	slli	a5, a5, 5
-	add	a5, a5, a2	/* a5 = end of last 32-byte chunk */
-.Loop1:
-#endif
-	l32i	a6, a2, 0
-	l32i	a7, a2, 4
-	ONES_ADD(a4, a6)
-	ONES_ADD(a4, a7)
-	l32i	a6, a2, 8
-	l32i	a7, a2, 12
-	ONES_ADD(a4, a6)
-	ONES_ADD(a4, a7)
-	l32i	a6, a2, 16
-	l32i	a7, a2, 20
-	ONES_ADD(a4, a6)
-	ONES_ADD(a4, a7)
-	l32i	a6, a2, 24
-	l32i	a7, a2, 28
-	ONES_ADD(a4, a6)
-	ONES_ADD(a4, a7)
-	addi	a2, a2, 4*8
-#if !XCHAL_HAVE_LOOPS
-	blt	a2, a5, .Loop1
-#endif
-2:
-	extui	a5, a3, 2, 3	/* remaining 4-byte chunks */
-#if XCHAL_HAVE_LOOPS
-	loopgtz	a5, 3f
-#else
-	beqz	a5, 3f
-	slli	a5, a5, 2
-	add	a5, a5, a2	/* a5 = end of last 4-byte chunk */
-.Loop2:
-#endif
-	l32i	a6, a2, 0
-	ONES_ADD(a4, a6)
-	addi	a2, a2, 4
-#if !XCHAL_HAVE_LOOPS
-	blt	a2, a5, .Loop2
-#endif
-3:
-	_bbci.l	a3, 1, 5f	/* remaining 2-byte chunk */
-	l16ui	a6, a2, 0
-	ONES_ADD(a4, a6)
-	addi	a2, a2, 2
-5:
-	_bbci.l	a3, 0, 7f	/* remaining 1-byte chunk */
-6:	l8ui	a6, a2, 0
-#ifdef __XTENSA_EB__
-	slli	a6, a6, 8	/* load byte into bits 8..15 */
-#endif
-	ONES_ADD(a4, a6)
-7:
-	mov	a2, a4
-	abi_ret_default
-
-	/* uncommon case, buf is 2-byte aligned */
-8:
-	beqz	a3, 7b		/* branch if len == 0 */
-	beqi	a3, 1, 6b	/* branch if len == 1 */
-
-	extui	a5, a2, 0, 1
-	bnez	a5, 8f		/* branch if 1-byte aligned */
-
-	l16ui	a6, a2, 0	/* common case, len >= 2 */
-	ONES_ADD(a4, a6)
-	addi	a2, a2, 2	/* adjust buf */
-	addi	a3, a3, -2	/* adjust len */
-	j	1b		/* now buf is 4-byte aligned */
-
-	/* case: odd-byte aligned, len > 1
-	 * This case is dog slow, so don't give us an odd address.
-	 * (I don't think this ever happens, but just in case.)
-	 */
-8:
-	srli	a5, a3, 2	/* 4-byte chunks */
-#if XCHAL_HAVE_LOOPS
-	loopgtz	a5, 2f
-#else
-	beqz	a5, 2f
-	slli	a5, a5, 2
-	add	a5, a5, a2	/* a5 = end of last 4-byte chunk */
-.Loop3:
-#endif
-	l8ui	a6, a2, 0	/* bits 24..31 */
-	l16ui	a7, a2, 1	/* bits  8..23 */
-	l8ui	a8, a2, 3	/* bits  0.. 8 */
-#ifdef	__XTENSA_EB__
-	slli	a6, a6, 24
-#else
-	slli	a8, a8, 24
-#endif
-	slli	a7, a7, 8
-	or	a7, a7, a6
-	or	a7, a7, a8
-	ONES_ADD(a4, a7)
-	addi	a2, a2, 4
-#if !XCHAL_HAVE_LOOPS
-	blt	a2, a5, .Loop3
-#endif
-2:
-	_bbci.l	a3, 1, 3f	/* remaining 2-byte chunk, still odd addr */
-	l8ui	a6, a2, 0
-	l8ui	a7, a2, 1
-#ifdef	__XTENSA_EB__
-	slli	a6, a6, 8
-#else
-	slli	a7, a7, 8
-#endif
-	or	a7, a7, a6
-	ONES_ADD(a4, a7)
-	addi	a2, a2, 2
-3:
-	j	5b		/* branch to handle the remaining byte */
-
-ENDPROC(csum_partial)
-
-/*
- * Copy from ds while checksumming, otherwise like csum_partial
- */
-
-/*
-unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
-					int sum, int *src_err_ptr, int *dst_err_ptr)
-	a2  = src
-	a3  = dst
-	a4  = len
-	a5  = sum
-	a6  = src_err_ptr
-	a7  = dst_err_ptr
-	a8  = temp
-	a9  = temp
-	a10 = temp
-	a11 = original len for exception handling
-	a12 = original dst for exception handling
-
-    This function is optimized for 4-byte aligned addresses.  Other
-    alignments work, but not nearly as efficiently.
- */
-
-ENTRY(csum_partial_copy_generic)
-
-	abi_entry_default
-	mov	a12, a3
-	mov	a11, a4
-	or	a10, a2, a3
-
-	/* We optimize the following alignment tests for the 4-byte
-	aligned case.  Two bbsi.l instructions might seem more optimal
-	(commented out below).  However, both labels 5: and 3: are out
-	of the imm8 range, so the assembler relaxes them into
-	equivalent bbci.l, j combinations, which is actually
-	slower. */
-
-	extui	a9, a10, 0, 2
-	beqz	a9, 1f		/* branch if both are 4-byte aligned */
-	bbsi.l	a10, 0, 5f	/* branch if one address is odd */
-	j	3f		/* one address is 2-byte aligned */
-
-/*	_bbsi.l	a10, 0, 5f */	/* branch if odd address */
-/*	_bbsi.l	a10, 1, 3f */	/* branch if 2-byte-aligned address */
-
-1:
-	/* src and dst are both 4-byte aligned */
-	srli	a10, a4, 5	/* 32-byte chunks */
-#if XCHAL_HAVE_LOOPS
-	loopgtz	a10, 2f
-#else
-	beqz	a10, 2f
-	slli	a10, a10, 5
-	add	a10, a10, a2	/* a10 = end of last 32-byte src chunk */
-.Loop5:
-#endif
-EX(10f)	l32i	a9, a2, 0
-EX(10f)	l32i	a8, a2, 4
-EX(11f)	s32i	a9, a3, 0
-EX(11f)	s32i	a8, a3, 4
-	ONES_ADD(a5, a9)
-	ONES_ADD(a5, a8)
-EX(10f)	l32i	a9, a2, 8
-EX(10f)	l32i	a8, a2, 12
-EX(11f)	s32i	a9, a3, 8
-EX(11f)	s32i	a8, a3, 12
-	ONES_ADD(a5, a9)
-	ONES_ADD(a5, a8)
-EX(10f)	l32i	a9, a2, 16
-EX(10f)	l32i	a8, a2, 20
-EX(11f)	s32i	a9, a3, 16
-EX(11f)	s32i	a8, a3, 20
-	ONES_ADD(a5, a9)
-	ONES_ADD(a5, a8)
-EX(10f)	l32i	a9, a2, 24
-EX(10f)	l32i	a8, a2, 28
-EX(11f)	s32i	a9, a3, 24
-EX(11f)	s32i	a8, a3, 28
-	ONES_ADD(a5, a9)
-	ONES_ADD(a5, a8)
-	addi	a2, a2, 32
-	addi	a3, a3, 32
-#if !XCHAL_HAVE_LOOPS
-	blt	a2, a10, .Loop5
-#endif
-2:
-	extui	a10, a4, 2, 3	/* remaining 4-byte chunks */
-	extui	a4, a4, 0, 2	/* reset len for general-case, 2-byte chunks */
-#if XCHAL_HAVE_LOOPS
-	loopgtz	a10, 3f
-#else
-	beqz	a10, 3f
-	slli	a10, a10, 2
-	add	a10, a10, a2	/* a10 = end of last 4-byte src chunk */
-.Loop6:
-#endif
-EX(10f)	l32i	a9, a2, 0
-EX(11f)	s32i	a9, a3, 0
-	ONES_ADD(a5, a9)
-	addi	a2, a2, 4
-	addi	a3, a3, 4
-#if !XCHAL_HAVE_LOOPS
-	blt	a2, a10, .Loop6
-#endif
-3:
-	/*
-	Control comes to here in two cases: (1) It may fall through
-	to here from the 4-byte alignment case to process, at most,
-	one 2-byte chunk.  (2) It branches to here from above if
-	either src or dst is 2-byte aligned, and we process all bytes
-	here, except for perhaps a trailing odd byte.  It's
-	inefficient, so align your addresses to 4-byte boundaries.
-
-	a2 = src
-	a3 = dst
-	a4 = len
-	a5 = sum
-	*/
-	srli	a10, a4, 1	/* 2-byte chunks */
-#if XCHAL_HAVE_LOOPS
-	loopgtz	a10, 4f
-#else
-	beqz	a10, 4f
-	slli	a10, a10, 1
-	add	a10, a10, a2	/* a10 = end of last 2-byte src chunk */
-.Loop7:
-#endif
-EX(10f)	l16ui	a9, a2, 0
-EX(11f)	s16i	a9, a3, 0
-	ONES_ADD(a5, a9)
-	addi	a2, a2, 2
-	addi	a3, a3, 2
-#if !XCHAL_HAVE_LOOPS
-	blt	a2, a10, .Loop7
-#endif
-4:
-	/* This section processes a possible trailing odd byte. */
-	_bbci.l	a4, 0, 8f	/* 1-byte chunk */
-EX(10f)	l8ui	a9, a2, 0
-EX(11f)	s8i	a9, a3, 0
-#ifdef __XTENSA_EB__
-	slli	a9, a9, 8	/* shift byte to bits 8..15 */
-#endif
-	ONES_ADD(a5, a9)
-8:
-	mov	a2, a5
-	abi_ret_default
-
-5:
-	/* Control branch to here when either src or dst is odd.  We
-	process all bytes using 8-bit accesses.  Grossly inefficient,
-	so don't feed us an odd address. */
-
-	srli	a10, a4, 1	/* handle in pairs for 16-bit csum */
-#if XCHAL_HAVE_LOOPS
-	loopgtz	a10, 6f
-#else
-	beqz	a10, 6f
-	slli	a10, a10, 1
-	add	a10, a10, a2	/* a10 = end of last odd-aligned, 2-byte src chunk */
-.Loop8:
-#endif
-EX(10f)	l8ui	a9, a2, 0
-EX(10f)	l8ui	a8, a2, 1
-EX(11f)	s8i	a9, a3, 0
-EX(11f)	s8i	a8, a3, 1
-#ifdef __XTENSA_EB__
-	slli	a9, a9, 8	/* combine into a single 16-bit value */
-#else				/* for checksum computation */
-	slli	a8, a8, 8
-#endif
-	or	a9, a9, a8
-	ONES_ADD(a5, a9)
-	addi	a2, a2, 2
-	addi	a3, a3, 2
-#if !XCHAL_HAVE_LOOPS
-	blt	a2, a10, .Loop8
-#endif
-6:
-	j	4b		/* process the possible trailing odd byte */
-
-ENDPROC(csum_partial_copy_generic)
-
-
-# Exception handler:
-.section .fixup, "ax"
-/*
-	a6  = src_err_ptr
-	a7  = dst_err_ptr
-	a11 = original len for exception handling
-	a12 = original dst for exception handling
-*/
-
-10:
-	_movi	a2, -EFAULT
-	s32i	a2, a6, 0	/* src_err_ptr */
-
-	# clear the complete destination - computing the rest
-	# is too much work
-	movi	a2, 0
-#if XCHAL_HAVE_LOOPS
-	loopgtz	a11, 2f
-#else
-	beqz	a11, 2f
-	add	a11, a11, a12	/* a11 = ending address */
-.Leloop:
-#endif
-	s8i	a2, a12, 0
-	addi	a12, a12, 1
-#if !XCHAL_HAVE_LOOPS
-	blt	a12, a11, .Leloop
-#endif
-2:
-	abi_ret_default
-
-11:
-	movi	a2, -EFAULT
-	s32i	a2, a7, 0	/* dst_err_ptr */
-	movi	a2, 0
-	abi_ret_default
-
-.previous
diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S
deleted file mode 100644
index 582d817979ed51c79219014864869da96bb5a294..0000000000000000000000000000000000000000
--- a/arch/xtensa/lib/memcopy.S
+++ /dev/null
@@ -1,553 +0,0 @@
-/*
- * arch/xtensa/lib/hal/memcopy.S -- Core HAL library functions
- * xthal_memcpy and xthal_bcopy
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002 - 2012 Tensilica Inc.
- */
-
-#include <linux/linkage.h>
-#include <asm/asmmacro.h>
-#include <asm/core.h>
-
-/*
- * void *memcpy(void *dst, const void *src, size_t len);
- *
- * This function is intended to do the same thing as the standard
- * library function memcpy() for most cases.
- * However, where the source and/or destination references
- * an instruction RAM or ROM or a data RAM or ROM, that
- * source and/or destination will always be accessed with
- * 32-bit load and store instructions (as required for these
- * types of devices).
- *
- * !!!!!!!  XTFIXME:
- * !!!!!!!  Handling of IRAM/IROM has not yet
- * !!!!!!!  been implemented.
- *
- * The (general case) algorithm is as follows:
- *   If destination is unaligned, align it by conditionally
- *     copying 1 and 2 bytes.
- *   If source is aligned,
- *     do 16 bytes with a loop, and then finish up with
- *     8, 4, 2, and 1 byte copies conditional on the length;
- *   else (if source is unaligned),
- *     do the same, but use SRC to align the source data.
- *   This code tries to use fall-through branches for the common
- *     case of aligned source and destination and multiple
- *     of 4 (or 8) length.
- *
- * Register use:
- *	a0/ return address
- *	a1/ stack pointer
- *	a2/ return value
- *	a3/ src
- *	a4/ length
- *	a5/ dst
- *	a6/ tmp
- *	a7/ tmp
- *	a8/ tmp
- *	a9/ tmp
- *	a10/ tmp
- *	a11/ tmp
- */
-
-	.text
-
-/*
- * Byte by byte copy
- */
-	.align	4
-	.byte	0		# 1 mod 4 alignment for LOOPNEZ
-				# (0 mod 4 alignment for LBEG)
-.Lbytecopy:
-#if XCHAL_HAVE_LOOPS
-	loopnez	a4, .Lbytecopydone
-#else /* !XCHAL_HAVE_LOOPS */
-	beqz	a4, .Lbytecopydone
-	add	a7, a3, a4	# a7 = end address for source
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lnextbyte:
-	l8ui	a6, a3, 0
-	addi	a3, a3, 1
-	s8i	a6, a5, 0
-	addi	a5, a5, 1
-#if !XCHAL_HAVE_LOOPS
-	bne	a3, a7, .Lnextbyte # continue loop if $a3:src != $a7:src_end
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lbytecopydone:
-	abi_ret_default
-
-/*
- * Destination is unaligned
- */
-
-	.align	4
-.Ldst1mod2:	# dst is only byte aligned
-	_bltui	a4, 7, .Lbytecopy	# do short copies byte by byte
-
-	# copy 1 byte
-	l8ui	a6, a3,  0
-	addi	a3, a3,  1
-	addi	a4, a4, -1
-	s8i	a6, a5,  0
-	addi	a5, a5,  1
-	_bbci.l	a5, 1, .Ldstaligned	# if dst is now aligned, then
-					# return to main algorithm
-.Ldst2mod4:	# dst 16-bit aligned
-	# copy 2 bytes
-	_bltui	a4, 6, .Lbytecopy	# do short copies byte by byte
-	l8ui	a6, a3,  0
-	l8ui	a7, a3,  1
-	addi	a3, a3,  2
-	addi	a4, a4, -2
-	s8i	a6, a5,  0
-	s8i	a7, a5,  1
-	addi	a5, a5,  2
-	j	.Ldstaligned	# dst is now aligned, return to main algorithm
-
-ENTRY(__memcpy)
-WEAK(memcpy)
-
-	abi_entry_default
-	# a2/ dst, a3/ src, a4/ len
-	mov	a5, a2		# copy dst so that a2 is return value
-.Lcommon:
-	_bbsi.l	a2, 0, .Ldst1mod2	# if dst is 1 mod 2
-	_bbsi.l	a2, 1, .Ldst2mod4	# if dst is 2 mod 4
-.Ldstaligned:	# return here from .Ldst?mod? once dst is aligned
-	srli	a7, a4, 4	# number of loop iterations with 16B
-				# per iteration
-	movi	a8, 3		# if source is not aligned,
-	_bany	a3, a8, .Lsrcunaligned	# then use shifting copy
-	/*
-	 * Destination and source are word-aligned, use word copy.
-	 */
-	# copy 16 bytes per iteration for word-aligned dst and word-aligned src
-#if XCHAL_HAVE_LOOPS
-	loopnez	a7, .Loop1done
-#else /* !XCHAL_HAVE_LOOPS */
-	beqz	a7, .Loop1done
-	slli	a8, a7, 4
-	add	a8, a8, a3	# a8 = end of last 16B source chunk
-#endif /* !XCHAL_HAVE_LOOPS */
-.Loop1:
-	l32i	a6, a3,  0
-	l32i	a7, a3,  4
-	s32i	a6, a5,  0
-	l32i	a6, a3,  8
-	s32i	a7, a5,  4
-	l32i	a7, a3, 12
-	s32i	a6, a5,  8
-	addi	a3, a3, 16
-	s32i	a7, a5, 12
-	addi	a5, a5, 16
-#if !XCHAL_HAVE_LOOPS
-	bne	a3, a8, .Loop1  # continue loop if a3:src != a8:src_end
-#endif /* !XCHAL_HAVE_LOOPS */
-.Loop1done:
-	bbci.l	a4, 3, .L2
-	# copy 8 bytes
-	l32i	a6, a3,  0
-	l32i	a7, a3,  4
-	addi	a3, a3,  8
-	s32i	a6, a5,  0
-	s32i	a7, a5,  4
-	addi	a5, a5,  8
-.L2:
-	bbsi.l	a4, 2, .L3
-	bbsi.l	a4, 1, .L4
-	bbsi.l	a4, 0, .L5
-	abi_ret_default
-.L3:
-	# copy 4 bytes
-	l32i	a6, a3,  0
-	addi	a3, a3,  4
-	s32i	a6, a5,  0
-	addi	a5, a5,  4
-	bbsi.l	a4, 1, .L4
-	bbsi.l	a4, 0, .L5
-	abi_ret_default
-.L4:
-	# copy 2 bytes
-	l16ui	a6, a3,  0
-	addi	a3, a3,  2
-	s16i	a6, a5,  0
-	addi	a5, a5,  2
-	bbsi.l	a4, 0, .L5
-	abi_ret_default
-.L5:
-	# copy 1 byte
-	l8ui	a6, a3,  0
-	s8i	a6, a5,  0
-	abi_ret_default
-
-/*
- * Destination is aligned, Source is unaligned
- */
-
-	.align	4
-.Lsrcunaligned:
-	_beqz	a4, .Ldone	# avoid loading anything for zero-length copies
-	# copy 16 bytes per iteration for word-aligned dst and unaligned src
-	__ssa8	a3		# set shift amount from byte offset
-
-/* set to 1 when running on ISS (simulator) with the
-   lint or ferret client, or 0 to save a few cycles */
-#define SIM_CHECKS_ALIGNMENT	1
-#if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
-	and	a11, a3, a8	# save unalignment offset for below
-	sub	a3, a3, a11	# align a3
-#endif
-	l32i	a6, a3, 0	# load first word
-#if XCHAL_HAVE_LOOPS
-	loopnez	a7, .Loop2done
-#else /* !XCHAL_HAVE_LOOPS */
-	beqz	a7, .Loop2done
-	slli	a10, a7, 4
-	add	a10, a10, a3	# a10 = end of last 16B source chunk
-#endif /* !XCHAL_HAVE_LOOPS */
-.Loop2:
-	l32i	a7, a3,  4
-	l32i	a8, a3,  8
-	__src_b	a6, a6, a7
-	s32i	a6, a5,  0
-	l32i	a9, a3, 12
-	__src_b	a7, a7, a8
-	s32i	a7, a5,  4
-	l32i	a6, a3, 16
-	__src_b	a8, a8, a9
-	s32i	a8, a5,  8
-	addi	a3, a3, 16
-	__src_b	a9, a9, a6
-	s32i	a9, a5, 12
-	addi	a5, a5, 16
-#if !XCHAL_HAVE_LOOPS
-	bne	a3, a10, .Loop2 # continue loop if a3:src != a10:src_end
-#endif /* !XCHAL_HAVE_LOOPS */
-.Loop2done:
-	bbci.l	a4, 3, .L12
-	# copy 8 bytes
-	l32i	a7, a3,  4
-	l32i	a8, a3,  8
-	__src_b	a6, a6, a7
-	s32i	a6, a5,  0
-	addi	a3, a3,  8
-	__src_b	a7, a7, a8
-	s32i	a7, a5,  4
-	addi	a5, a5,  8
-	mov	a6, a8
-.L12:
-	bbci.l	a4, 2, .L13
-	# copy 4 bytes
-	l32i	a7, a3,  4
-	addi	a3, a3,  4
-	__src_b	a6, a6, a7
-	s32i	a6, a5,  0
-	addi	a5, a5,  4
-	mov	a6, a7
-.L13:
-#if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
-	add	a3, a3, a11	# readjust a3 with correct misalignment
-#endif
-	bbsi.l	a4, 1, .L14
-	bbsi.l	a4, 0, .L15
-.Ldone:	abi_ret_default
-.L14:
-	# copy 2 bytes
-	l8ui	a6, a3,  0
-	l8ui	a7, a3,  1
-	addi	a3, a3,  2
-	s8i	a6, a5,  0
-	s8i	a7, a5,  1
-	addi	a5, a5,  2
-	bbsi.l	a4, 0, .L15
-	abi_ret_default
-.L15:
-	# copy 1 byte
-	l8ui	a6, a3,  0
-	s8i	a6, a5,  0
-	abi_ret_default
-
-ENDPROC(__memcpy)
-
-/*
- * void bcopy(const void *src, void *dest, size_t n);
- */
-
-ENTRY(bcopy)
-
-	abi_entry_default
-	# a2=src, a3=dst, a4=len
-	mov	a5, a3
-	mov	a3, a2
-	mov	a2, a5
-	j	.Lmovecommon	# go to common code for memmove+bcopy
-
-ENDPROC(bcopy)
-
-/*
- * void *memmove(void *dst, const void *src, size_t len);
- *
- * This function is intended to do the same thing as the standard
- * library function memmove() for most cases.
- * However, where the source and/or destination references
- * an instruction RAM or ROM or a data RAM or ROM, that
- * source and/or destination will always be accessed with
- * 32-bit load and store instructions (as required for these
- * types of devices).
- *
- * !!!!!!!  XTFIXME:
- * !!!!!!!  Handling of IRAM/IROM has not yet
- * !!!!!!!  been implemented.
- *
- * The (general case) algorithm is as follows:
- *   If end of source doesn't overlap destination then use memcpy.
- *   Otherwise do memcpy backwards.
- *
- * Register use:
- *	a0/ return address
- *	a1/ stack pointer
- *	a2/ return value
- *	a3/ src
- *	a4/ length
- *	a5/ dst
- *	a6/ tmp
- *	a7/ tmp
- *	a8/ tmp
- *	a9/ tmp
- *	a10/ tmp
- *	a11/ tmp
- */
-
-/*
- * Byte by byte copy
- */
-	.align	4
-	.byte	0		# 1 mod 4 alignment for LOOPNEZ
-				# (0 mod 4 alignment for LBEG)
-.Lbackbytecopy:
-#if XCHAL_HAVE_LOOPS
-	loopnez	a4, .Lbackbytecopydone
-#else /* !XCHAL_HAVE_LOOPS */
-	beqz	a4, .Lbackbytecopydone
-	sub	a7, a3, a4	# a7 = start address for source
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lbacknextbyte:
-	addi	a3, a3, -1
-	l8ui	a6, a3, 0
-	addi	a5, a5, -1
-	s8i	a6, a5, 0
-#if !XCHAL_HAVE_LOOPS
-	bne	a3, a7, .Lbacknextbyte # continue loop if
-				       # $a3:src != $a7:src_start
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lbackbytecopydone:
-	abi_ret_default
-
-/*
- * Destination is unaligned
- */
-
-	.align	4
-.Lbackdst1mod2:	# dst is only byte aligned
-	_bltui	a4, 7, .Lbackbytecopy	# do short copies byte by byte
-
-	# copy 1 byte
-	addi	a3, a3, -1
-	l8ui	a6, a3,  0
-	addi	a5, a5, -1
-	s8i	a6, a5,  0
-	addi	a4, a4, -1
-	_bbci.l	a5, 1, .Lbackdstaligned	# if dst is now aligned, then
-					# return to main algorithm
-.Lbackdst2mod4:	# dst 16-bit aligned
-	# copy 2 bytes
-	_bltui	a4, 6, .Lbackbytecopy	# do short copies byte by byte
-	addi	a3, a3, -2
-	l8ui	a6, a3,  0
-	l8ui	a7, a3,  1
-	addi	a5, a5, -2
-	s8i	a6, a5,  0
-	s8i	a7, a5,  1
-	addi	a4, a4, -2
-	j	.Lbackdstaligned	# dst is now aligned,
-					# return to main algorithm
-
-ENTRY(__memmove)
-WEAK(memmove)
-
-	abi_entry_default
-	# a2/ dst, a3/ src, a4/ len
-	mov	a5, a2		# copy dst so that a2 is return value
-.Lmovecommon:
-	sub	a6, a5, a3
-	bgeu	a6, a4, .Lcommon
-
-	add	a5, a5, a4
-	add	a3, a3, a4
-
-	_bbsi.l	a5, 0, .Lbackdst1mod2	# if dst is 1 mod 2
-	_bbsi.l	a5, 1, .Lbackdst2mod4	# if dst is 2 mod 4
-.Lbackdstaligned:	# return here from .Lbackdst?mod? once dst is aligned
-	srli	a7, a4, 4	# number of loop iterations with 16B
-				# per iteration
-	movi	a8, 3		# if source is not aligned,
-	_bany	a3, a8, .Lbacksrcunaligned	# then use shifting copy
-	/*
-	 * Destination and source are word-aligned, use word copy.
-	 */
-	# copy 16 bytes per iteration for word-aligned dst and word-aligned src
-#if XCHAL_HAVE_LOOPS
-	loopnez	a7, .backLoop1done
-#else /* !XCHAL_HAVE_LOOPS */
-	beqz	a7, .backLoop1done
-	slli	a8, a7, 4
-	sub	a8, a3, a8	# a8 = start of first 16B source chunk
-#endif /* !XCHAL_HAVE_LOOPS */
-.backLoop1:
-	addi	a3, a3, -16
-	l32i	a7, a3, 12
-	l32i	a6, a3,  8
-	addi	a5, a5, -16
-	s32i	a7, a5, 12
-	l32i	a7, a3,  4
-	s32i	a6, a5,  8
-	l32i	a6, a3,  0
-	s32i	a7, a5,  4
-	s32i	a6, a5,  0
-#if !XCHAL_HAVE_LOOPS
-	bne	a3, a8, .backLoop1  # continue loop if a3:src != a8:src_start
-#endif /* !XCHAL_HAVE_LOOPS */
-.backLoop1done:
-	bbci.l	a4, 3, .Lback2
-	# copy 8 bytes
-	addi	a3, a3, -8
-	l32i	a6, a3,  0
-	l32i	a7, a3,  4
-	addi	a5, a5, -8
-	s32i	a6, a5,  0
-	s32i	a7, a5,  4
-.Lback2:
-	bbsi.l	a4, 2, .Lback3
-	bbsi.l	a4, 1, .Lback4
-	bbsi.l	a4, 0, .Lback5
-	abi_ret_default
-.Lback3:
-	# copy 4 bytes
-	addi	a3, a3, -4
-	l32i	a6, a3,  0
-	addi	a5, a5, -4
-	s32i	a6, a5,  0
-	bbsi.l	a4, 1, .Lback4
-	bbsi.l	a4, 0, .Lback5
-	abi_ret_default
-.Lback4:
-	# copy 2 bytes
-	addi	a3, a3, -2
-	l16ui	a6, a3,  0
-	addi	a5, a5, -2
-	s16i	a6, a5,  0
-	bbsi.l	a4, 0, .Lback5
-	abi_ret_default
-.Lback5:
-	# copy 1 byte
-	addi	a3, a3, -1
-	l8ui	a6, a3,  0
-	addi	a5, a5, -1
-	s8i	a6, a5,  0
-	abi_ret_default
-
-/*
- * Destination is aligned, Source is unaligned
- */
-
-	.align	4
-.Lbacksrcunaligned:
-	_beqz	a4, .Lbackdone	# avoid loading anything for zero-length copies
-	# copy 16 bytes per iteration for word-aligned dst and unaligned src
-	__ssa8	a3		# set shift amount from byte offset
-#define SIM_CHECKS_ALIGNMENT	1	/* set to 1 when running on ISS with
-					 * the lint or ferret client, or 0
-					 * to save a few cycles */
-#if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
-	and	a11, a3, a8	# save unalignment offset for below
-	sub	a3, a3, a11	# align a3
-#endif
-	l32i	a6, a3, 0	# load first word
-#if XCHAL_HAVE_LOOPS
-	loopnez	a7, .backLoop2done
-#else /* !XCHAL_HAVE_LOOPS */
-	beqz	a7, .backLoop2done
-	slli	a10, a7, 4
-	sub	a10, a3, a10	# a10 = start of first 16B source chunk
-#endif /* !XCHAL_HAVE_LOOPS */
-.backLoop2:
-	addi	a3, a3, -16
-	l32i	a7, a3, 12
-	l32i	a8, a3,  8
-	addi	a5, a5, -16
-	__src_b	a6, a7, a6
-	s32i	a6, a5, 12
-	l32i	a9, a3,  4
-	__src_b	a7, a8, a7
-	s32i	a7, a5,  8
-	l32i	a6, a3,  0
-	__src_b	a8, a9, a8
-	s32i	a8, a5,  4
-	__src_b	a9, a6, a9
-	s32i	a9, a5,  0
-#if !XCHAL_HAVE_LOOPS
-	bne	a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start
-#endif /* !XCHAL_HAVE_LOOPS */
-.backLoop2done:
-	bbci.l	a4, 3, .Lback12
-	# copy 8 bytes
-	addi	a3, a3, -8
-	l32i	a7, a3,  4
-	l32i	a8, a3,  0
-	addi	a5, a5, -8
-	__src_b	a6, a7, a6
-	s32i	a6, a5,  4
-	__src_b	a7, a8, a7
-	s32i	a7, a5,  0
-	mov	a6, a8
-.Lback12:
-	bbci.l	a4, 2, .Lback13
-	# copy 4 bytes
-	addi	a3, a3, -4
-	l32i	a7, a3,  0
-	addi	a5, a5, -4
-	__src_b	a6, a7, a6
-	s32i	a6, a5,  0
-	mov	a6, a7
-.Lback13:
-#if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
-	add	a3, a3, a11	# readjust a3 with correct misalignment
-#endif
-	bbsi.l	a4, 1, .Lback14
-	bbsi.l	a4, 0, .Lback15
-.Lbackdone:
-	abi_ret_default
-.Lback14:
-	# copy 2 bytes
-	addi	a3, a3, -2
-	l8ui	a6, a3,  0
-	l8ui	a7, a3,  1
-	addi	a5, a5, -2
-	s8i	a6, a5,  0
-	s8i	a7, a5,  1
-	bbsi.l	a4, 0, .Lback15
-	abi_ret_default
-.Lback15:
-	# copy 1 byte
-	addi	a3, a3, -1
-	addi	a5, a5, -1
-	l8ui	a6, a3,  0
-	s8i	a6, a5,  0
-	abi_ret_default
-
-ENDPROC(__memmove)
diff --git a/arch/xtensa/lib/memset.S b/arch/xtensa/lib/memset.S
deleted file mode 100644
index 59b1524fd601d175090ebe6f4bb3eb419b228e7d..0000000000000000000000000000000000000000
--- a/arch/xtensa/lib/memset.S
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- *  arch/xtensa/lib/memset.S
- *
- *  ANSI C standard library function memset
- *  (Well, almost.  .fixup code might return zero.)
- *
- *  This file is subject to the terms and conditions of the GNU General
- *  Public License.  See the file "COPYING" in the main directory of
- *  this archive for more details.
- *
- *  Copyright (C) 2002 Tensilica Inc.
- */
-
-#include <linux/linkage.h>
-#include <asm/asmmacro.h>
-#include <asm/core.h>
-
-/*
- * void *memset(void *dst, int c, size_t length)
- *
- * The algorithm is as follows:
- *   Create a word with c in all byte positions
- *   If the destination is aligned,
- *     do 16B chucks with a loop, and then finish up with
- *     8B, 4B, 2B, and 1B stores conditional on the length.
- *   If destination is unaligned, align it by conditionally
- *     setting 1B and 2B and then go to aligned case.
- *   This code tries to use fall-through branches for the common
- *     case of an aligned destination (except for the branches to
- *     the alignment labels).
- */
-
-.text
-ENTRY(__memset)
-WEAK(memset)
-
-	abi_entry_default
-	# a2/ dst, a3/ c, a4/ length
-	extui	a3, a3, 0, 8	# mask to just 8 bits
-	slli	a7, a3, 8	# duplicate character in all bytes of word
-	or	a3, a3, a7	# ...
-	slli	a7, a3, 16	# ...
-	or	a3, a3, a7	# ...
-	mov	a5, a2		# copy dst so that a2 is return value
-	movi	a6, 3		# for alignment tests
-	bany	a2, a6, .Ldstunaligned # if dst is unaligned
-.L0:	# return here from .Ldstunaligned when dst is aligned
-	srli	a7, a4, 4	# number of loop iterations with 16B
-				# per iteration
-	bnez	a4, .Laligned
-	abi_ret_default
-
-/*
- * Destination is word-aligned.
- */
-	# set 16 bytes per iteration for word-aligned dst
-	.align	4		# 1 mod 4 alignment for LOOPNEZ
-	.byte	0		# (0 mod 4 alignment for LBEG)
-.Laligned:
-#if XCHAL_HAVE_LOOPS
-	loopnez	a7, .Loop1done
-#else /* !XCHAL_HAVE_LOOPS */
-	beqz	a7, .Loop1done
-	slli	a6, a7, 4
-	add	a6, a6, a5	# a6 = end of last 16B chunk
-#endif /* !XCHAL_HAVE_LOOPS */
-.Loop1:
-EX(10f) s32i	a3, a5,  0
-EX(10f) s32i	a3, a5,  4
-EX(10f) s32i	a3, a5,  8
-EX(10f) s32i	a3, a5, 12
-	addi	a5, a5, 16
-#if !XCHAL_HAVE_LOOPS
-	blt	a5, a6, .Loop1
-#endif /* !XCHAL_HAVE_LOOPS */
-.Loop1done:
-	bbci.l	a4, 3, .L2
-	# set 8 bytes
-EX(10f) s32i	a3, a5,  0
-EX(10f) s32i	a3, a5,  4
-	addi	a5, a5,  8
-.L2:
-	bbci.l	a4, 2, .L3
-	# set 4 bytes
-EX(10f) s32i	a3, a5,  0
-	addi	a5, a5,  4
-.L3:
-	bbci.l	a4, 1, .L4
-	# set 2 bytes
-EX(10f) s16i	a3, a5,  0
-	addi	a5, a5,  2
-.L4:
-	bbci.l	a4, 0, .L5
-	# set 1 byte
-EX(10f) s8i	a3, a5,  0
-.L5:
-.Lret1:
-	abi_ret_default
-
-/*
- * Destination is unaligned
- */
-
-.Ldstunaligned:
-	bltui	a4, 8, .Lbyteset	# do short copies byte by byte
-	bbci.l	a5, 0, .L20		# branch if dst alignment half-aligned
-	# dst is only byte aligned
-	# set 1 byte
-EX(10f) s8i	a3, a5,  0
-	addi	a5, a5,  1
-	addi	a4, a4, -1
-	# now retest if dst aligned
-	bbci.l	a5, 1, .L0	# if now aligned, return to main algorithm
-.L20:
-	# dst half-aligned
-	# set 2 bytes
-EX(10f) s16i	a3, a5,  0
-	addi	a5, a5,  2
-	addi	a4, a4, -2
-	j	.L0		# dst is now aligned, return to main algorithm
-
-/*
- * Byte by byte set
- */
-	.align	4
-	.byte	0		# 1 mod 4 alignment for LOOPNEZ
-				# (0 mod 4 alignment for LBEG)
-.Lbyteset:
-#if XCHAL_HAVE_LOOPS
-	loopnez	a4, .Lbytesetdone
-#else /* !XCHAL_HAVE_LOOPS */
-	beqz	a4, .Lbytesetdone
-	add	a6, a5, a4	# a6 = ending address
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lbyteloop:
-EX(10f) s8i	a3, a5, 0
-	addi	a5, a5, 1
-#if !XCHAL_HAVE_LOOPS
-	blt	a5, a6, .Lbyteloop
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lbytesetdone:
-	abi_ret_default
-
-ENDPROC(__memset)
-
-	.section .fixup, "ax"
-	.align	4
-
-/* We return zero if a failure occurred. */
-
-10:
-	movi	a2, 0
-	abi_ret_default
diff --git a/arch/xtensa/lib/strncpy_user.S b/arch/xtensa/lib/strncpy_user.S
deleted file mode 100644
index 4faf46fe3f381a3afed861091ea6ccc1b01f2627..0000000000000000000000000000000000000000
--- a/arch/xtensa/lib/strncpy_user.S
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- *  arch/xtensa/lib/strncpy_user.S
- *
- *  This file is subject to the terms and conditions of the GNU General
- *  Public License.  See the file "COPYING" in the main directory of
- *  this archive for more details.
- *
- *  Returns: -EFAULT if exception before terminator, N if the entire
- *  buffer filled, else strlen.
- *
- *  Copyright (C) 2002 Tensilica Inc.
- */
-
-#include <linux/errno.h>
-#include <linux/linkage.h>
-#include <asm/asmmacro.h>
-#include <asm/core.h>
-
-/*
- * char *__strncpy_user(char *dst, const char *src, size_t len)
- */
-
-#ifdef __XTENSA_EB__
-# define MASK0 0xff000000
-# define MASK1 0x00ff0000
-# define MASK2 0x0000ff00
-# define MASK3 0x000000ff
-#else
-# define MASK0 0x000000ff
-# define MASK1 0x0000ff00
-# define MASK2 0x00ff0000
-# define MASK3 0xff000000
-#endif
-
-# Register use
-#   a0/ return address
-#   a1/ stack pointer
-#   a2/ return value
-#   a3/ src
-#   a4/ len
-#   a5/ mask0
-#   a6/ mask1
-#   a7/ mask2
-#   a8/ mask3
-#   a9/ tmp
-#   a10/ tmp
-#   a11/ dst
-#   a12/ tmp
-
-.text
-ENTRY(__strncpy_user)
-
-	abi_entry_default
-	# a2/ dst, a3/ src, a4/ len
-	mov	a11, a2		# leave dst in return value register
-	beqz	a4, .Lret	# if len is zero
-	movi	a5, MASK0	# mask for byte 0
-	movi	a6, MASK1	# mask for byte 1
-	movi	a7, MASK2	# mask for byte 2
-	movi	a8, MASK3	# mask for byte 3
-	bbsi.l	a3, 0, .Lsrc1mod2 # if only  8-bit aligned
-	bbsi.l	a3, 1, .Lsrc2mod4 # if only 16-bit aligned
-.Lsrcaligned:	# return here when src is word-aligned
-	srli	a12, a4, 2	# number of loop iterations with 4B per loop
-	movi	a9, 3
-	bnone	a11, a9, .Laligned
-	j	.Ldstunaligned
-
-.Lsrc1mod2:	# src address is odd
-EX(11f)	l8ui	a9, a3, 0		# get byte 0
-	addi	a3, a3, 1		# advance src pointer
-EX(10f)	s8i	a9, a11, 0		# store byte 0
-	beqz	a9, .Lret		# if byte 0 is zero
-	addi	a11, a11, 1		# advance dst pointer
-	addi	a4, a4, -1		# decrement len
-	beqz	a4, .Lret		# if len is zero
-	bbci.l	a3, 1, .Lsrcaligned	# if src is now word-aligned
-
-.Lsrc2mod4:	# src address is 2 mod 4
-EX(11f)	l8ui	a9, a3, 0		# get byte 0
-	/* 1-cycle interlock */
-EX(10f)	s8i	a9, a11, 0		# store byte 0
-	beqz	a9, .Lret		# if byte 0 is zero
-	addi	a11, a11, 1		# advance dst pointer
-	addi	a4, a4, -1		# decrement len
-	beqz	a4, .Lret		# if len is zero
-EX(11f)	l8ui	a9, a3, 1		# get byte 0
-	addi	a3, a3, 2		# advance src pointer
-EX(10f)	s8i	a9, a11, 0		# store byte 0
-	beqz	a9, .Lret		# if byte 0 is zero
-	addi	a11, a11, 1		# advance dst pointer
-	addi	a4, a4, -1		# decrement len
-	bnez	a4, .Lsrcaligned	# if len is nonzero
-.Lret:
-	sub	a2, a11, a2		# compute strlen
-	abi_ret_default
-
-/*
- * dst is word-aligned, src is word-aligned
- */
-	.align	4		# 1 mod 4 alignment for LOOPNEZ
-	.byte	0		# (0 mod 4 alignment for LBEG)
-.Laligned:
-#if XCHAL_HAVE_LOOPS
-	loopnez	a12, .Loop1done
-#else
-	beqz	a12, .Loop1done
-	slli	a12, a12, 2
-	add	a12, a12, a11	# a12 = end of last 4B chunck
-#endif
-.Loop1:
-EX(11f)	l32i	a9, a3, 0		# get word from src
-	addi	a3, a3, 4		# advance src pointer
-	bnone	a9, a5, .Lz0		# if byte 0 is zero
-	bnone	a9, a6, .Lz1		# if byte 1 is zero
-	bnone	a9, a7, .Lz2		# if byte 2 is zero
-EX(10f)	s32i	a9, a11, 0		# store word to dst
-	bnone	a9, a8, .Lz3		# if byte 3 is zero
-	addi	a11, a11, 4		# advance dst pointer
-#if !XCHAL_HAVE_LOOPS
-	blt	a11, a12, .Loop1
-#endif
-
-.Loop1done:
-	bbci.l	a4, 1, .L100
-	# copy 2 bytes
-EX(11f)	l16ui	a9, a3, 0
-	addi	a3, a3, 2		# advance src pointer
-#ifdef __XTENSA_EB__
-	bnone	a9, a7, .Lz0		# if byte 2 is zero
-	bnone	a9, a8, .Lz1		# if byte 3 is zero
-#else
-	bnone	a9, a5, .Lz0		# if byte 0 is zero
-	bnone	a9, a6, .Lz1		# if byte 1 is zero
-#endif
-EX(10f)	s16i	a9, a11, 0
-	addi	a11, a11, 2		# advance dst pointer
-.L100:
-	bbci.l	a4, 0, .Lret
-EX(11f)	l8ui	a9, a3, 0
-	/* slot */
-EX(10f)	s8i	a9, a11, 0
-	beqz	a9, .Lret		# if byte is zero
-	addi	a11, a11, 1-3		# advance dst ptr 1, but also cancel
-					# the effect of adding 3 in .Lz3 code
-	/* fall thru to .Lz3 and "retw" */
-
-.Lz3:	# byte 3 is zero
-	addi	a11, a11, 3		# advance dst pointer
-	sub	a2, a11, a2		# compute strlen
-	abi_ret_default
-.Lz0:	# byte 0 is zero
-#ifdef __XTENSA_EB__
-	movi	a9, 0
-#endif /* __XTENSA_EB__ */
-EX(10f)	s8i	a9, a11, 0
-	sub	a2, a11, a2		# compute strlen
-	abi_ret_default
-.Lz1:	# byte 1 is zero
-#ifdef __XTENSA_EB__
-	extui   a9, a9, 16, 16
-#endif /* __XTENSA_EB__ */
-EX(10f)	s16i	a9, a11, 0
-	addi	a11, a11, 1		# advance dst pointer
-	sub	a2, a11, a2		# compute strlen
-	abi_ret_default
-.Lz2:	# byte 2 is zero
-#ifdef __XTENSA_EB__
-	extui   a9, a9, 16, 16
-#endif /* __XTENSA_EB__ */
-EX(10f)	s16i	a9, a11, 0
-	movi	a9, 0
-EX(10f)	s8i	a9, a11, 2
-	addi	a11, a11, 2		# advance dst pointer
-	sub	a2, a11, a2		# compute strlen
-	abi_ret_default
-
-	.align	4		# 1 mod 4 alignment for LOOPNEZ
-	.byte	0		# (0 mod 4 alignment for LBEG)
-.Ldstunaligned:
-/*
- * for now just use byte copy loop
- */
-#if XCHAL_HAVE_LOOPS
-	loopnez	a4, .Lunalignedend
-#else
-	beqz	a4, .Lunalignedend
-	add	a12, a11, a4		# a12 = ending address
-#endif /* XCHAL_HAVE_LOOPS */
-.Lnextbyte:
-EX(11f)	l8ui	a9, a3, 0
-	addi	a3, a3, 1
-EX(10f)	s8i	a9, a11, 0
-	beqz	a9, .Lunalignedend
-	addi	a11, a11, 1
-#if !XCHAL_HAVE_LOOPS
-	blt	a11, a12, .Lnextbyte
-#endif
-
-.Lunalignedend:
-	sub	a2, a11, a2		# compute strlen
-	abi_ret_default
-
-ENDPROC(__strncpy_user)
-
-	.section .fixup, "ax"
-	.align	4
-
-	/* For now, just return -EFAULT.  Future implementations might
-	 * like to clear remaining kernel space, like the fixup
-	 * implementation in memset().  Thus, we differentiate between
-	 * load/store fixups. */
-
-10:
-11:
-	movi	a2, -EFAULT
-	abi_ret_default
diff --git a/arch/xtensa/lib/strnlen_user.S b/arch/xtensa/lib/strnlen_user.S
deleted file mode 100644
index 3d391dca3efb729efcb7bc358dee19eaeb417cd0..0000000000000000000000000000000000000000
--- a/arch/xtensa/lib/strnlen_user.S
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- *  arch/xtensa/lib/strnlen_user.S
- *
- *  This file is subject to the terms and conditions of the GNU General
- *  Public License.  See the file "COPYING" in the main directory of
- *  this archive for more details.
- *
- *  Returns strnlen, including trailing zero terminator.
- *  Zero indicates error.
- *
- *  Copyright (C) 2002 Tensilica Inc.
- */
-
-#include <linux/linkage.h>
-#include <asm/asmmacro.h>
-#include <asm/core.h>
-
-/*
- * size_t __strnlen_user(const char *s, size_t len)
- */
-
-#ifdef __XTENSA_EB__
-# define MASK0 0xff000000
-# define MASK1 0x00ff0000
-# define MASK2 0x0000ff00
-# define MASK3 0x000000ff
-#else
-# define MASK0 0x000000ff
-# define MASK1 0x0000ff00
-# define MASK2 0x00ff0000
-# define MASK3 0xff000000
-#endif
-
-# Register use:
-#   a2/ src
-#   a3/ len
-#   a4/ tmp
-#   a5/ mask0
-#   a6/ mask1
-#   a7/ mask2
-#   a8/ mask3
-#   a9/ tmp
-#   a10/ tmp
-
-.text
-ENTRY(__strnlen_user)
-
-	abi_entry_default
-	# a2/ s, a3/ len
-	addi	a4, a2, -4	# because we overincrement at the end;
-				# we compensate with load offsets of 4
-	movi	a5, MASK0	# mask for byte 0
-	movi	a6, MASK1	# mask for byte 1
-	movi	a7, MASK2	# mask for byte 2
-	movi	a8, MASK3	# mask for byte 3
-	bbsi.l	a2, 0, .L1mod2	# if only  8-bit aligned
-	bbsi.l	a2, 1, .L2mod4	# if only 16-bit aligned
-
-/*
- * String is word-aligned.
- */
-.Laligned:
-	srli	a10, a3, 2	# number of loop iterations with 4B per loop
-#if XCHAL_HAVE_LOOPS
-	loopnez	a10, .Ldone
-#else
-	beqz	a10, .Ldone
-	slli	a10, a10, 2
-	add	a10, a10, a4	# a10 = end of last 4B chunk
-#endif /* XCHAL_HAVE_LOOPS */
-.Loop:
-EX(10f)	l32i	a9, a4, 4		# get next word of string
-	addi	a4, a4, 4		# advance string pointer
-	bnone	a9, a5, .Lz0		# if byte 0 is zero
-	bnone	a9, a6, .Lz1		# if byte 1 is zero
-	bnone	a9, a7, .Lz2		# if byte 2 is zero
-	bnone	a9, a8, .Lz3		# if byte 3 is zero
-#if !XCHAL_HAVE_LOOPS
-	blt	a4, a10, .Loop
-#endif
-
-.Ldone:
-EX(10f)	l32i	a9, a4, 4	# load 4 bytes for remaining checks
-
-	bbci.l	a3, 1, .L100
-	# check two more bytes (bytes 0, 1 of word)
-	addi	a4, a4, 2	# advance string pointer
-	bnone	a9, a5, .Lz0	# if byte 0 is zero
-	bnone	a9, a6, .Lz1	# if byte 1 is zero
-.L100:
-	bbci.l	a3, 0, .L101
-	# check one more byte (byte 2 of word)
-	# Actually, we don't need to check.  Zero or nonzero, we'll add one.
-	# Do not add an extra one for the NULL terminator since we have
-	#  exhausted the original len parameter.
-	addi	a4, a4, 1	# advance string pointer
-.L101:
-	sub	a2, a4, a2	# compute length
-	abi_ret_default
-
-# NOTE that in several places below, we point to the byte just after
-# the zero byte in order to include the NULL terminator in the count.
-
-.Lz3:	# byte 3 is zero
-	addi	a4, a4, 3	# point to zero byte
-.Lz0:	# byte 0 is zero
-	addi	a4, a4, 1	# point just beyond zero byte
-	sub	a2, a4, a2	# subtract to get length
-	abi_ret_default
-.Lz1:	# byte 1 is zero
-	addi	a4, a4, 1+1	# point just beyond zero byte
-	sub	a2, a4, a2	# subtract to get length
-	abi_ret_default
-.Lz2:	# byte 2 is zero
-	addi	a4, a4, 2+1	# point just beyond zero byte
-	sub	a2, a4, a2	# subtract to get length
-	abi_ret_default
-
-.L1mod2:	# address is odd
-EX(10f)	l8ui	a9, a4, 4		# get byte 0
-	addi	a4, a4, 1		# advance string pointer
-	beqz	a9, .Lz3		# if byte 0 is zero
-	bbci.l	a4, 1, .Laligned	# if string pointer is now word-aligned
-
-.L2mod4:	# address is 2 mod 4
-	addi	a4, a4, 2	# advance ptr for aligned access
-EX(10f)	l32i	a9, a4, 0	# get word with first two bytes of string
-	bnone	a9, a7, .Lz2	# if byte 2 (of word, not string) is zero
-	bany	a9, a8, .Laligned # if byte 3 (of word, not string) is nonzero
-	# byte 3 is zero
-	addi	a4, a4, 3+1	# point just beyond zero byte
-	sub	a2, a4, a2	# subtract to get length
-	abi_ret_default
-
-ENDPROC(__strnlen_user)
-
-	.section .fixup, "ax"
-	.align	4
-10:
-	movi	a2, 0
-	abi_ret_default
diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S
deleted file mode 100644
index a0aa4047f94aec5779014c5aaef043f97e76d22c..0000000000000000000000000000000000000000
--- a/arch/xtensa/lib/usercopy.S
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
- *  arch/xtensa/lib/usercopy.S
- *
- *  Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
- *
- *  DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
- *  It needs to remain separate and distinct.  The hal files are part
- *  of the Xtensa link-time HAL, and those files may differ per
- *  processor configuration.  Patching the kernel for another
- *  processor configuration includes replacing the hal files, and we
- *  could lose the special functionality for accessing user-space
- *  memory during such a patch.  We sacrifice a little code space here
- *  in favor to simplify code maintenance.
- *
- *  This file is subject to the terms and conditions of the GNU General
- *  Public License.  See the file "COPYING" in the main directory of
- *  this archive for more details.
- *
- *  Copyright (C) 2002 Tensilica Inc.
- */
-
-
-/*
- * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
- *
- * The returned value is the number of bytes not copied.  Implies zero
- * is success.
- *
- * The general case algorithm is as follows:
- *   If the destination and source are both aligned,
- *     do 16B chunks with a loop, and then finish up with
- *     8B, 4B, 2B, and 1B copies conditional on the length.
- *   If destination is aligned and source unaligned,
- *     do the same, but use SRC to align the source data.
- *   If destination is unaligned, align it by conditionally
- *     copying 1B and 2B and then retest.
- *   This code tries to use fall-through braches for the common
- *     case of aligned destinations (except for the branches to
- *     the alignment label).
- *
- * Register use:
- *	a0/ return address
- *	a1/ stack pointer
- *	a2/ return value
- *	a3/ src
- *	a4/ length
- *	a5/ dst
- *	a6/ tmp
- *	a7/ tmp
- *	a8/ tmp
- *	a9/ tmp
- *	a10/ tmp
- *	a11/ original length
- */
-
-#include <linux/linkage.h>
-#include <asm/asmmacro.h>
-#include <asm/core.h>
-
-	.text
-ENTRY(__xtensa_copy_user)
-
-	abi_entry_default
-	# a2/ dst, a3/ src, a4/ len
-	mov	a5, a2		# copy dst so that a2 is return value
-	mov	a11, a4		# preserve original len for error case
-.Lcommon:
-	bbsi.l	a2, 0, .Ldst1mod2 # if dst is 1 mod 2
-	bbsi.l	a2, 1, .Ldst2mod4 # if dst is 2 mod 4
-.Ldstaligned:	# return here from .Ldstunaligned when dst is aligned
-	srli	a7, a4, 4	# number of loop iterations with 16B
-				# per iteration
-	movi	a8, 3		  # if source is also aligned,
-	bnone	a3, a8, .Laligned # then use word copy
-	__ssa8	a3		# set shift amount from byte offset
-	bnez	a4, .Lsrcunaligned
-	movi	a2, 0		# return success for len==0
-	abi_ret_default
-
-/*
- * Destination is unaligned
- */
-
-.Ldst1mod2:	# dst is only byte aligned
-	bltui	a4, 7, .Lbytecopy	# do short copies byte by byte
-
-	# copy 1 byte
-EX(10f)	l8ui	a6, a3, 0
-	addi	a3, a3,  1
-EX(10f)	s8i	a6, a5,  0
-	addi	a5, a5,  1
-	addi	a4, a4, -1
-	bbci.l	a5, 1, .Ldstaligned	# if dst is now aligned, then
-					# return to main algorithm
-.Ldst2mod4:	# dst 16-bit aligned
-	# copy 2 bytes
-	bltui	a4, 6, .Lbytecopy	# do short copies byte by byte
-EX(10f)	l8ui	a6, a3, 0
-EX(10f)	l8ui	a7, a3, 1
-	addi	a3, a3,  2
-EX(10f)	s8i	a6, a5,  0
-EX(10f)	s8i	a7, a5,  1
-	addi	a5, a5,  2
-	addi	a4, a4, -2
-	j	.Ldstaligned	# dst is now aligned, return to main algorithm
-
-/*
- * Byte by byte copy
- */
-	.align	4
-	.byte	0		# 1 mod 4 alignment for LOOPNEZ
-				# (0 mod 4 alignment for LBEG)
-.Lbytecopy:
-#if XCHAL_HAVE_LOOPS
-	loopnez	a4, .Lbytecopydone
-#else /* !XCHAL_HAVE_LOOPS */
-	beqz	a4, .Lbytecopydone
-	add	a7, a3, a4	# a7 = end address for source
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lnextbyte:
-EX(10f)	l8ui	a6, a3, 0
-	addi	a3, a3, 1
-EX(10f)	s8i	a6, a5, 0
-	addi	a5, a5, 1
-#if !XCHAL_HAVE_LOOPS
-	blt	a3, a7, .Lnextbyte
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lbytecopydone:
-	movi	a2, 0		# return success for len bytes copied
-	abi_ret_default
-
-/*
- * Destination and source are word-aligned.
- */
-	# copy 16 bytes per iteration for word-aligned dst and word-aligned src
-	.align	4		# 1 mod 4 alignment for LOOPNEZ
-	.byte	0		# (0 mod 4 alignment for LBEG)
-.Laligned:
-#if XCHAL_HAVE_LOOPS
-	loopnez	a7, .Loop1done
-#else /* !XCHAL_HAVE_LOOPS */
-	beqz	a7, .Loop1done
-	slli	a8, a7, 4
-	add	a8, a8, a3	# a8 = end of last 16B source chunk
-#endif /* !XCHAL_HAVE_LOOPS */
-.Loop1:
-EX(10f)	l32i	a6, a3,  0
-EX(10f)	l32i	a7, a3,  4
-EX(10f)	s32i	a6, a5,  0
-EX(10f)	l32i	a6, a3,  8
-EX(10f)	s32i	a7, a5,  4
-EX(10f)	l32i	a7, a3, 12
-EX(10f)	s32i	a6, a5,  8
-	addi	a3, a3, 16
-EX(10f)	s32i	a7, a5, 12
-	addi	a5, a5, 16
-#if !XCHAL_HAVE_LOOPS
-	blt	a3, a8, .Loop1
-#endif /* !XCHAL_HAVE_LOOPS */
-.Loop1done:
-	bbci.l	a4, 3, .L2
-	# copy 8 bytes
-EX(10f)	l32i	a6, a3,  0
-EX(10f)	l32i	a7, a3,  4
-	addi	a3, a3,  8
-EX(10f)	s32i	a6, a5,  0
-EX(10f)	s32i	a7, a5,  4
-	addi	a5, a5,  8
-.L2:
-	bbci.l	a4, 2, .L3
-	# copy 4 bytes
-EX(10f)	l32i	a6, a3,  0
-	addi	a3, a3,  4
-EX(10f)	s32i	a6, a5,  0
-	addi	a5, a5,  4
-.L3:
-	bbci.l	a4, 1, .L4
-	# copy 2 bytes
-EX(10f)	l16ui	a6, a3,  0
-	addi	a3, a3,  2
-EX(10f)	s16i	a6, a5,  0
-	addi	a5, a5,  2
-.L4:
-	bbci.l	a4, 0, .L5
-	# copy 1 byte
-EX(10f)	l8ui	a6, a3,  0
-EX(10f)	s8i	a6, a5,  0
-.L5:
-	movi	a2, 0		# return success for len bytes copied
-	abi_ret_default
-
-/*
- * Destination is aligned, Source is unaligned
- */
-
-	.align	4
-	.byte	0		# 1 mod 4 alignement for LOOPNEZ
-				# (0 mod 4 alignment for LBEG)
-.Lsrcunaligned:
-	# copy 16 bytes per iteration for word-aligned dst and unaligned src
-	and	a10, a3, a8	# save unalignment offset for below
-	sub	a3, a3, a10	# align a3 (to avoid sim warnings only; not needed for hardware)
-EX(10f)	l32i	a6, a3, 0	# load first word
-#if XCHAL_HAVE_LOOPS
-	loopnez	a7, .Loop2done
-#else /* !XCHAL_HAVE_LOOPS */
-	beqz	a7, .Loop2done
-	slli	a12, a7, 4
-	add	a12, a12, a3	# a12 = end of last 16B source chunk
-#endif /* !XCHAL_HAVE_LOOPS */
-.Loop2:
-EX(10f)	l32i	a7, a3,  4
-EX(10f)	l32i	a8, a3,  8
-	__src_b	a6, a6, a7
-EX(10f)	s32i	a6, a5,  0
-EX(10f)	l32i	a9, a3, 12
-	__src_b	a7, a7, a8
-EX(10f)	s32i	a7, a5,  4
-EX(10f)	l32i	a6, a3, 16
-	__src_b	a8, a8, a9
-EX(10f)	s32i	a8, a5,  8
-	addi	a3, a3, 16
-	__src_b	a9, a9, a6
-EX(10f)	s32i	a9, a5, 12
-	addi	a5, a5, 16
-#if !XCHAL_HAVE_LOOPS
-	blt	a3, a12, .Loop2
-#endif /* !XCHAL_HAVE_LOOPS */
-.Loop2done:
-	bbci.l	a4, 3, .L12
-	# copy 8 bytes
-EX(10f)	l32i	a7, a3,  4
-EX(10f)	l32i	a8, a3,  8
-	__src_b	a6, a6, a7
-EX(10f)	s32i	a6, a5,  0
-	addi	a3, a3,  8
-	__src_b	a7, a7, a8
-EX(10f)	s32i	a7, a5,  4
-	addi	a5, a5,  8
-	mov	a6, a8
-.L12:
-	bbci.l	a4, 2, .L13
-	# copy 4 bytes
-EX(10f)	l32i	a7, a3,  4
-	addi	a3, a3,  4
-	__src_b	a6, a6, a7
-EX(10f)	s32i	a6, a5,  0
-	addi	a5, a5,  4
-	mov	a6, a7
-.L13:
-	add	a3, a3, a10	# readjust a3 with correct misalignment
-	bbci.l	a4, 1, .L14
-	# copy 2 bytes
-EX(10f)	l8ui	a6, a3,  0
-EX(10f)	l8ui	a7, a3,  1
-	addi	a3, a3,  2
-EX(10f)	s8i	a6, a5,  0
-EX(10f)	s8i	a7, a5,  1
-	addi	a5, a5,  2
-.L14:
-	bbci.l	a4, 0, .L15
-	# copy 1 byte
-EX(10f)	l8ui	a6, a3,  0
-EX(10f)	s8i	a6, a5,  0
-.L15:
-	movi	a2, 0		# return success for len bytes copied
-	abi_ret_default
-
-ENDPROC(__xtensa_copy_user)
-
-	.section .fixup, "ax"
-	.align	4
-
-/* a2 = original dst; a5 = current dst; a11= original len
- * bytes_copied = a5 - a2
- * retval = bytes_not_copied = original len - bytes_copied
- * retval = a11 - (a5 - a2)
- */
-
-
-10:
-	sub	a2, a5, a2	/* a2 <-- bytes copied */
-	sub	a2, a11, a2	/* a2 <-- bytes not copied */
-	abi_ret_default
diff --git a/arch/xtensa/mm/misc.S b/arch/xtensa/mm/misc.S
deleted file mode 100644
index 6aa036c427c3b952ac4092de2b2932ae73403631..0000000000000000000000000000000000000000
--- a/arch/xtensa/mm/misc.S
+++ /dev/null
@@ -1,484 +0,0 @@
-/*
- * arch/xtensa/mm/misc.S
- *
- * Miscellaneous assembly functions.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2007 Tensilica Inc.
- *
- * Chris Zankel	<chris@zankel.net>
- */
-
-
-#include <linux/linkage.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/asmmacro.h>
-#include <asm/cacheasm.h>
-#include <asm/tlbflush.h>
-
-
-/*
- * clear_page and clear_user_page are the same for non-cache-aliased configs.
- *
- * clear_page (unsigned long page)
- *                    a2
- */
-
-ENTRY(clear_page)
-
-	abi_entry_default
-
-	movi	a3, 0
-	__loopi	a2, a7, PAGE_SIZE, 32
-	s32i	a3, a2, 0
-	s32i	a3, a2, 4
-	s32i	a3, a2, 8
-	s32i	a3, a2, 12
-	s32i	a3, a2, 16
-	s32i	a3, a2, 20
-	s32i	a3, a2, 24
-	s32i	a3, a2, 28
-	__endla	a2, a7, 32
-
-	abi_ret_default
-
-ENDPROC(clear_page)
-
-/*
- * copy_page and copy_user_page are the same for non-cache-aliased configs.
- *
- * copy_page (void *to, void *from)
- *               a2          a3
- */
-
-ENTRY(copy_page)
-
-	abi_entry_default
-
-	__loopi a2, a4, PAGE_SIZE, 32
-
-	l32i    a8, a3, 0
-	l32i    a9, a3, 4
-	s32i    a8, a2, 0
-	s32i    a9, a2, 4
-
-	l32i    a8, a3, 8
-	l32i    a9, a3, 12
-	s32i    a8, a2, 8
-	s32i    a9, a2, 12
-
-	l32i    a8, a3, 16
-	l32i    a9, a3, 20
-	s32i    a8, a2, 16
-	s32i    a9, a2, 20
-
-	l32i    a8, a3, 24
-	l32i    a9, a3, 28
-	s32i    a8, a2, 24
-	s32i    a9, a2, 28
-
-	addi    a2, a2, 32
-	addi    a3, a3, 32
-
-	__endl  a2, a4
-
-	abi_ret_default
-
-ENDPROC(copy_page)
-
-#ifdef CONFIG_MMU
-/*
- * If we have to deal with cache aliasing, we use temporary memory mappings
- * to ensure that the source and destination pages have the same color as
- * the virtual address. We use way 0 and 1 for temporary mappings in such cases.
- *
- * The temporary DTLB entries shouldn't be flushed by interrupts, but are
- * flushed by preemptive task switches. Special code in the 
- * fast_second_level_miss handler re-established the temporary mapping. 
- * It requires that the PPNs for the destination and source addresses are
- * in a6, and a7, respectively.
- */
-
-/* TLB miss exceptions are treated special in the following region */
-
-ENTRY(__tlbtemp_mapping_start)
-
-#if (DCACHE_WAY_SIZE > PAGE_SIZE)
-
-/*
- * clear_page_alias(void *addr, unsigned long paddr)
- *                     a2              a3
- */
-
-ENTRY(clear_page_alias)
-
-	abi_entry_default
-
-	/* Skip setting up a temporary DTLB if not aliased low page. */
-
-	movi	a5, PAGE_OFFSET
-	movi	a6, 0
-	beqz	a3, 1f
-
-	/* Setup a temporary DTLB for the addr. */
-
-	addi	a6, a3, (PAGE_KERNEL | _PAGE_HW_WRITE)
-	mov	a4, a2
-	wdtlb	a6, a2
-	dsync
-
-1:	movi	a3, 0
-	__loopi	a2, a7, PAGE_SIZE, 32
-	s32i	a3, a2, 0
-	s32i	a3, a2, 4
-	s32i	a3, a2, 8
-	s32i	a3, a2, 12
-	s32i	a3, a2, 16
-	s32i	a3, a2, 20
-	s32i	a3, a2, 24
-	s32i	a3, a2, 28
-	__endla	a2, a7, 32
-
-	bnez	a6, 1f
-	abi_ret_default
-
-	/* We need to invalidate the temporary idtlb entry, if any. */
-
-1:	idtlb	a4
-	dsync
-
-	abi_ret_default
-
-ENDPROC(clear_page_alias)
-
-/*
- * copy_page_alias(void *to, void *from,
- *			a2	  a3
- *                 unsigned long to_paddr, unsigned long from_paddr)
- *	        		 a4			 a5
- */
-
-ENTRY(copy_page_alias)
-
-	abi_entry_default
-
-	/* Skip setting up a temporary DTLB for destination if not aliased. */
-
-	movi	a6, 0
-	movi	a7, 0
-	beqz	a4, 1f
-
-	/* Setup a temporary DTLB for destination. */
-
-	addi	a6, a4, (PAGE_KERNEL | _PAGE_HW_WRITE)
-	wdtlb	a6, a2
-	dsync
-
-	/* Skip setting up a temporary DTLB for source if not aliased. */
-
-1:	beqz	a5, 1f
-
-	/* Setup a temporary DTLB for source. */
-
-	addi	a7, a5, PAGE_KERNEL
-	addi	a8, a3, 1				# way1
-
-	wdtlb	a7, a8
-	dsync
-
-1:	__loopi a2, a4, PAGE_SIZE, 32
-
-	l32i    a8, a3, 0
-	l32i    a9, a3, 4
-	s32i    a8, a2, 0
-	s32i    a9, a2, 4
-
-	l32i    a8, a3, 8
-	l32i    a9, a3, 12
-	s32i    a8, a2, 8
-	s32i    a9, a2, 12
-
-	l32i    a8, a3, 16
-	l32i    a9, a3, 20
-	s32i    a8, a2, 16
-	s32i    a9, a2, 20
-
-	l32i    a8, a3, 24
-	l32i    a9, a3, 28
-	s32i    a8, a2, 24
-	s32i    a9, a2, 28
-
-	addi    a2, a2, 32
-	addi    a3, a3, 32
-
-	__endl  a2, a4
-
-	/* We need to invalidate any temporary mapping! */
-
-	bnez	a6, 1f
-	bnez	a7, 2f
-	abi_ret_default
-
-1:	addi	a2, a2, -PAGE_SIZE
-	idtlb	a2
-	dsync
-	bnez	a7, 2f
-	abi_ret_default
-
-2:	addi	a3, a3, -PAGE_SIZE+1
-	idtlb	a3
-	dsync
-
-	abi_ret_default
-
-ENDPROC(copy_page_alias)
-
-#endif
-
-#if (DCACHE_WAY_SIZE > PAGE_SIZE)
-
-/*
- * void __flush_invalidate_dcache_page_alias (addr, phys)
- *                                             a2    a3
- */
-
-ENTRY(__flush_invalidate_dcache_page_alias)
-
-	abi_entry_default
-
-	movi	a7, 0			# required for exception handler
-	addi	a6, a3, (PAGE_KERNEL | _PAGE_HW_WRITE)
-	mov	a4, a2
-	wdtlb	a6, a2
-	dsync
-
-	___flush_invalidate_dcache_page a2 a3
-
-	idtlb	a4
-	dsync
-
-	abi_ret_default
-
-ENDPROC(__flush_invalidate_dcache_page_alias)
-
-/*
- * void __invalidate_dcache_page_alias (addr, phys)
- *                                       a2    a3
- */
-
-ENTRY(__invalidate_dcache_page_alias)
-
-	abi_entry_default
-
-	movi	a7, 0			# required for exception handler
-	addi	a6, a3, (PAGE_KERNEL | _PAGE_HW_WRITE)
-	mov	a4, a2
-	wdtlb	a6, a2
-	dsync
-
-	___invalidate_dcache_page a2 a3
-
-	idtlb	a4
-	dsync
-
-	abi_ret_default
-
-ENDPROC(__invalidate_dcache_page_alias)
-#endif
-
-ENTRY(__tlbtemp_mapping_itlb)
-
-#if (ICACHE_WAY_SIZE > PAGE_SIZE)
-	
-ENTRY(__invalidate_icache_page_alias)
-
-	abi_entry_default
-
-	addi	a6, a3, (PAGE_KERNEL_EXEC | _PAGE_HW_WRITE)
-	mov	a4, a2
-	witlb	a6, a2
-	isync
-
-	___invalidate_icache_page a2 a3
-
-	iitlb	a4
-	isync
-	abi_ret_default
-
-ENDPROC(__invalidate_icache_page_alias)
-
-#endif
-
-/* End of special treatment in tlb miss exception */
-
-ENTRY(__tlbtemp_mapping_end)
-
-#endif /* CONFIG_MMU
-
-/*
- * void __invalidate_icache_page(ulong start)
- */
-
-ENTRY(__invalidate_icache_page)
-
-	abi_entry_default
-
-	___invalidate_icache_page a2 a3
-	isync
-
-	abi_ret_default
-
-ENDPROC(__invalidate_icache_page)
-
-/*
- * void __invalidate_dcache_page(ulong start)
- */
-
-ENTRY(__invalidate_dcache_page)
-
-	abi_entry_default
-
-	___invalidate_dcache_page a2 a3
-	dsync
-
-	abi_ret_default
-
-ENDPROC(__invalidate_dcache_page)
-
-/*
- * void __flush_invalidate_dcache_page(ulong start)
- */
-
-ENTRY(__flush_invalidate_dcache_page)
-
-	abi_entry_default
-
-	___flush_invalidate_dcache_page a2 a3
-
-	dsync
-	abi_ret_default
-
-ENDPROC(__flush_invalidate_dcache_page)
-
-/*
- * void __flush_dcache_page(ulong start)
- */
-
-ENTRY(__flush_dcache_page)
-
-	abi_entry_default
-
-	___flush_dcache_page a2 a3
-
-	dsync
-	abi_ret_default
-
-ENDPROC(__flush_dcache_page)
-
-/*
- * void __invalidate_icache_range(ulong start, ulong size)
- */
-
-ENTRY(__invalidate_icache_range)
-
-	abi_entry_default
-
-	___invalidate_icache_range a2 a3 a4
-	isync
-
-	abi_ret_default
-
-ENDPROC(__invalidate_icache_range)
-
-/*
- * void __flush_invalidate_dcache_range(ulong start, ulong size)
- */
-
-ENTRY(__flush_invalidate_dcache_range)
-
-	abi_entry_default
-
-	___flush_invalidate_dcache_range a2 a3 a4
-	dsync
-
-	abi_ret_default
-
-ENDPROC(__flush_invalidate_dcache_range)
-
-/*
- * void _flush_dcache_range(ulong start, ulong size)
- */
-
-ENTRY(__flush_dcache_range)
-
-	abi_entry_default
-
-	___flush_dcache_range a2 a3 a4
-	dsync
-
-	abi_ret_default
-
-ENDPROC(__flush_dcache_range)
-
-/*
- * void _invalidate_dcache_range(ulong start, ulong size)
- */
-
-ENTRY(__invalidate_dcache_range)
-
-	abi_entry_default
-
-	___invalidate_dcache_range a2 a3 a4
-
-	abi_ret_default
-
-ENDPROC(__invalidate_dcache_range)
-
-/*
- * void _invalidate_icache_all(void)
- */
-
-ENTRY(__invalidate_icache_all)
-
-	abi_entry_default
-
-	___invalidate_icache_all a2 a3
-	isync
-
-	abi_ret_default
-
-ENDPROC(__invalidate_icache_all)
-
-/*
- * void _flush_invalidate_dcache_all(void)
- */
-
-ENTRY(__flush_invalidate_dcache_all)
-
-	abi_entry_default
-
-	___flush_invalidate_dcache_all a2 a3
-	dsync
-
-	abi_ret_default
-
-ENDPROC(__flush_invalidate_dcache_all)
-
-/*
- * void _invalidate_dcache_all(void)
- */
-
-ENTRY(__invalidate_dcache_all)
-
-	abi_entry_default
-
-	___invalidate_dcache_all a2 a3
-	dsync
-
-	abi_ret_default
-
-ENDPROC(__invalidate_dcache_all)
diff --git a/certs/system_certificates.S b/certs/system_certificates.S
deleted file mode 100644
index 8f29058adf93c9f30b53035e6784cd337a2c0a12..0000000000000000000000000000000000000000
--- a/certs/system_certificates.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/export.h>
-#include <linux/init.h>
-
-	__INITRODATA
-
-	.align 8
-	.globl system_certificate_list
-system_certificate_list:
-__cert_list_start:
-#ifdef CONFIG_MODULE_SIG
-	.incbin "certs/signing_key.x509"
-#endif
-	.incbin "certs/x509_certificate_list"
-__cert_list_end:
-
-#ifdef CONFIG_SYSTEM_EXTRA_CERTIFICATE
-	.globl system_extra_cert
-	.size system_extra_cert, CONFIG_SYSTEM_EXTRA_CERTIFICATE_SIZE
-system_extra_cert:
-	.fill CONFIG_SYSTEM_EXTRA_CERTIFICATE_SIZE, 1, 0
-
-	.align 4
-	.globl system_extra_cert_used
-system_extra_cert_used:
-	.int 0
-
-#endif /* CONFIG_SYSTEM_EXTRA_CERTIFICATE */
-
-	.align 8
-	.globl system_certificate_list_size
-system_certificate_list_size:
-#ifdef CONFIG_64BIT
-	.quad __cert_list_end - __cert_list_start
-#else
-	.long __cert_list_end - __cert_list_start
-#endif
diff --git a/dist/configs/00base/generic/aarch64.config b/dist/configs/00base/generic/aarch64.config
deleted file mode 120000
index 884abfae5e4f64d9cf2a4da6a7dc14f2b91118be..0000000000000000000000000000000000000000
--- a/dist/configs/00base/generic/aarch64.config
+++ /dev/null
@@ -1 +0,0 @@
-../../../../arch/arm64/configs/oc.config
\ No newline at end of file
diff --git a/dist/configs/00base/generic/aarch64.config b/dist/configs/00base/generic/aarch64.config
new file mode 100644
index 0000000000000000000000000000000000000000..884abfae5e4f64d9cf2a4da6a7dc14f2b91118be
--- /dev/null
+++ b/dist/configs/00base/generic/aarch64.config
@@ -0,0 +1 @@
+../../../../arch/arm64/configs/oc.config
\ No newline at end of file
diff --git a/dist/configs/00base/generic/x86_64.config b/dist/configs/00base/generic/x86_64.config
deleted file mode 120000
index a0fd2d466818f95d868f2f07307b9fdecbd89f0e..0000000000000000000000000000000000000000
--- a/dist/configs/00base/generic/x86_64.config
+++ /dev/null
@@ -1 +0,0 @@
-../../../../arch/x86/configs/oc.config
\ No newline at end of file
diff --git a/dist/configs/00base/generic/x86_64.config b/dist/configs/00base/generic/x86_64.config
new file mode 100644
index 0000000000000000000000000000000000000000..a0fd2d466818f95d868f2f07307b9fdecbd89f0e
--- /dev/null
+++ b/dist/configs/00base/generic/x86_64.config
@@ -0,0 +1 @@
+../../../../arch/x86/configs/oc.config
\ No newline at end of file
diff --git a/dist/scripts/check-commits.sh b/dist/scripts/check-commits.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/check-configs.sh b/dist/scripts/check-configs.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/check-tag.sh b/dist/scripts/check-tag.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/dummy-tools/gcc b/dist/scripts/dummy-tools/gcc
old mode 100755
new mode 100644
diff --git a/dist/scripts/dummy-tools/ld b/dist/scripts/dummy-tools/ld
old mode 100755
new mode 100644
diff --git a/dist/scripts/dummy-tools/nm b/dist/scripts/dummy-tools/nm
deleted file mode 120000
index c0648b38dd4241655cd8c817b4291b904dd3af87..0000000000000000000000000000000000000000
--- a/dist/scripts/dummy-tools/nm
+++ /dev/null
@@ -1 +0,0 @@
-ld
\ No newline at end of file
diff --git a/dist/scripts/dummy-tools/nm b/dist/scripts/dummy-tools/nm
new file mode 100644
index 0000000000000000000000000000000000000000..c0648b38dd4241655cd8c817b4291b904dd3af87
--- /dev/null
+++ b/dist/scripts/dummy-tools/nm
@@ -0,0 +1 @@
+ld
\ No newline at end of file
diff --git a/dist/scripts/dummy-tools/objcopy b/dist/scripts/dummy-tools/objcopy
deleted file mode 120000
index c0648b38dd4241655cd8c817b4291b904dd3af87..0000000000000000000000000000000000000000
--- a/dist/scripts/dummy-tools/objcopy
+++ /dev/null
@@ -1 +0,0 @@
-ld
\ No newline at end of file
diff --git a/dist/scripts/dummy-tools/objcopy b/dist/scripts/dummy-tools/objcopy
new file mode 100644
index 0000000000000000000000000000000000000000..c0648b38dd4241655cd8c817b4291b904dd3af87
--- /dev/null
+++ b/dist/scripts/dummy-tools/objcopy
@@ -0,0 +1 @@
+ld
\ No newline at end of file
diff --git a/dist/scripts/format-configs.sh b/dist/scripts/format-configs.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/gen-configs.sh b/dist/scripts/gen-configs.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/gen-spec.sh b/dist/scripts/gen-spec.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/get-next-sub-version.sh b/dist/scripts/get-next-sub-version.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/get-next-version.sh b/dist/scripts/get-next-version.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/get-version.sh b/dist/scripts/get-version.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/helper/backporter.sh b/dist/scripts/helper/backporter.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/helper/dist-repo-init.sh b/dist/scripts/helper/dist-repo-init.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/helper/gen-module-kabi.sh b/dist/scripts/helper/gen-module-kabi.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/helper/update-kabi.sh b/dist/scripts/helper/update-kabi.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/lib-config.sh b/dist/scripts/lib-config.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/lib-version.sh b/dist/scripts/lib-version.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/lib.sh b/dist/scripts/lib.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/ls-config-files.sh b/dist/scripts/ls-config-files.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/ls-config-targets.sh b/dist/scripts/ls-config-targets.sh
old mode 100755
new mode 100644
diff --git a/dist/scripts/make-release.sh b/dist/scripts/make-release.sh
old mode 100755
new mode 100644
diff --git a/dist/sources/check-kabi b/dist/sources/check-kabi
old mode 100755
new mode 100644
diff --git a/dist/sources/filter-aarch64.sh b/dist/sources/filter-aarch64.sh
old mode 100755
new mode 100644
diff --git a/dist/sources/filter-modules.sh b/dist/sources/filter-modules.sh
old mode 100755
new mode 100644
diff --git a/dist/sources/filter-riscv64.sh b/dist/sources/filter-riscv64.sh
old mode 100755
new mode 100644
diff --git a/dist/sources/filter-x86_64.sh b/dist/sources/filter-x86_64.sh
old mode 100755
new mode 100644
diff --git a/dist/sources/module-keygen.sh b/dist/sources/module-keygen.sh
old mode 100755
new mode 100644
diff --git a/dist/sources/module-signer.sh b/dist/sources/module-signer.sh
old mode 100755
new mode 100644
diff --git a/drivers/block/swim_asm.S b/drivers/block/swim_asm.S
deleted file mode 100644
index 3d7a2d87595a57701218af855f776dd6ebfa7684..0000000000000000000000000000000000000000
--- a/drivers/block/swim_asm.S
+++ /dev/null
@@ -1,243 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * low-level functions for the SWIM floppy controller
- *
- * needs assembly language because is very timing dependent
- * this controller exists only on macintosh 680x0 based
- *
- * Copyright (C) 2004,2008 Laurent Vivier <Laurent@lvivier.info>
- *
- * based on Alastair Bridgewater SWIM analysis, 2001
- * based on netBSD IWM driver (c) 1997, 1998 Hauke Fath.
- *
- * 2004-08-21 (lv) - Initial implementation
- * 2008-11-05 (lv) - add get_swim_mode
- */
-
-	.equ	write_data,	0x0000
-	.equ	write_mark,	0x0200
-	.equ	write_CRC,	0x0400
-	.equ	write_parameter,0x0600
-	.equ	write_phase,	0x0800
-	.equ	write_setup,	0x0a00
-	.equ	write_mode0,	0x0c00
-	.equ	write_mode1,	0x0e00
-	.equ	read_data,	0x1000
-	.equ	read_mark,	0x1200
-	.equ	read_error,	0x1400
-	.equ	read_parameter,	0x1600
-	.equ	read_phase,	0x1800
-	.equ	read_setup,	0x1a00
-	.equ	read_status,	0x1c00
-	.equ	read_handshake,	0x1e00
-
-	.equ	o_side, 0
-	.equ	o_track, 1
-	.equ	o_sector, 2
-	.equ	o_size, 3
-	.equ	o_crc0, 4
-	.equ	o_crc1, 5
-
-	.equ	seek_time, 30000
-	.equ	max_retry, 40
-	.equ	sector_size, 512
-
-	.global swim_read_sector_header
-swim_read_sector_header:
-	link	%a6, #0
-	moveml	%d1-%d5/%a0-%a4,%sp@-
-	movel	%a6@(0x0c), %a4
-	bsr	mfm_read_addrmark
-	moveml	%sp@+, %d1-%d5/%a0-%a4
-	unlk	%a6
-	rts
-
-sector_address_mark:
-	.byte	0xa1, 0xa1, 0xa1, 0xfe
-sector_data_mark:
-	.byte	0xa1, 0xa1, 0xa1, 0xfb
-
-mfm_read_addrmark:
-	movel	%a6@(0x08), %a3
-	lea	%a3@(read_handshake), %a2
-	lea	%a3@(read_mark), %a3
-	moveq	#-1, %d0
-	movew	#seek_time, %d2
-
-wait_header_init:
-	tstb	%a3@(read_error - read_mark)
-	moveb	#0x18, %a3@(write_mode0 - read_mark)
-	moveb	#0x01, %a3@(write_mode1 - read_mark)
-	moveb	#0x01, %a3@(write_mode0 - read_mark)
-	tstb	%a3@(read_error - read_mark)
-	moveb	#0x08, %a3@(write_mode1 - read_mark)
-
-	lea	sector_address_mark, %a0
-	moveq	#3, %d1
-
-wait_addr_mark_byte:
-
-	tstb	%a2@
-	dbmi	%d2, wait_addr_mark_byte
-	bpl	header_exit
-
-	moveb	%a3@, %d3
-	cmpb	%a0@+, %d3
-	dbne	%d1, wait_addr_mark_byte
-	bne	wait_header_init
-
-	moveq	#max_retry, %d2
-
-amark0:	tstb	%a2@
-	dbmi	%d2, amark0
-	bpl	signal_nonyb
-
-	moveb	%a3@, %a4@(o_track)
-
-	moveq	#max_retry, %d2
-
-amark1:	tstb	%a2@
-	dbmi	%d2, amark1
-	bpl	signal_nonyb
-
-	moveb	%a3@, %a4@(o_side)
-
-	moveq	#max_retry, %d2
-
-amark2:	tstb	%a2@
-	dbmi	%d2, amark2
-	bpl	signal_nonyb
-
-	moveb	%a3@, %a4@(o_sector)
-
-	moveq	#max_retry, %d2
-
-amark3:	tstb	%a2@
-	dbmi	%d2, amark3
-	bpl	signal_nonyb
-
-	moveb	%a3@, %a4@(o_size)
-
-	moveq	#max_retry, %d2
-
-crc0:	tstb	%a2@
-	dbmi	%d2, crc0
-	bpl	signal_nonyb
-
-	moveb	%a3@, %a4@(o_crc0)
-
-	moveq	#max_retry, %d2
-
-crc1:	tstb	%a2@
-	dbmi	%d2, crc1
-	bpl	signal_nonyb
-
-	moveb	%a3@, %a4@(o_crc1)
-
-	tstb	%a3@(read_error - read_mark)
-
-header_exit:
-	moveq	#0, %d0
-	moveb	#0x18, %a3@(write_mode0 - read_mark)
-	rts
-signal_nonyb:
-	moveq	#-1, %d0
-	moveb	#0x18, %a3@(write_mode0 - read_mark)
-	rts
-
-	.global swim_read_sector_data
-swim_read_sector_data:
-	link	%a6, #0
-	moveml	%d1-%d5/%a0-%a5,%sp@-
-	movel	%a6@(0x0c), %a4
-	bsr	mfm_read_data
-	moveml	%sp@+, %d1-%d5/%a0-%a5
-	unlk	%a6
-	rts
-
-mfm_read_data:
-	movel	%a6@(0x08), %a3
-	lea	%a3@(read_handshake), %a2
-	lea	%a3@(read_data), %a5
-	lea	%a3@(read_mark), %a3
-	movew	#seek_time, %d2
-
-wait_data_init:
-	tstb	%a3@(read_error - read_mark)
-	moveb	#0x18, %a3@(write_mode0 - read_mark)
-	moveb	#0x01, %a3@(write_mode1 - read_mark)
-	moveb	#0x01, %a3@(write_mode0 - read_mark)
-	tstb	%a3@(read_error - read_mark)
-	moveb	#0x08, %a3@(write_mode1 - read_mark)
-
-	lea	sector_data_mark, %a0
-	moveq	#3, %d1
-
-	/* wait data address mark */
-
-wait_data_mark_byte:
-
-	tstb	%a2@
-	dbmi	%d2, wait_data_mark_byte
-	bpl	data_exit
-
-	moveb	%a3@, %d3
-	cmpb	%a0@+, %d3
-	dbne	%d1, wait_data_mark_byte
-	bne	wait_data_init
-
-	/* read data */
-
-	tstb	%a3@(read_error - read_mark)
-
-	movel	#sector_size-1, %d4		/* sector size */
-read_new_data:
-	movew	#max_retry, %d2
-read_data_loop:
-	moveb	%a2@, %d5
-	andb	#0xc0, %d5
-	dbne	%d2, read_data_loop
-	beq	data_exit
-	moveb	%a5@, %a4@+
-	andb	#0x40, %d5
-	dbne	%d4, read_new_data
-	beq	exit_loop
-	moveb	%a5@, %a4@+
-	dbra	%d4, read_new_data
-exit_loop:
-
-	/* read CRC */
-
-	movew	#max_retry, %d2
-data_crc0:
-
-	tstb	%a2@
-	dbmi	%d2, data_crc0
-	bpl	data_exit
-
-	moveb	%a3@, %d5
-
-	moveq	#max_retry, %d2
-
-data_crc1:
-
-	tstb	%a2@
-	dbmi	%d2, data_crc1
-	bpl	data_exit
-
-	moveb	%a3@, %d5
-
-	tstb	%a3@(read_error - read_mark)
-
-	moveb	#0x18, %a3@(write_mode0 - read_mark)
-
-	/* return number of bytes read */
-
-	movel	#sector_size, %d0
-	addw	#1, %d4
-	subl	%d4, %d0
-	rts
-data_exit:
-	moveb	#0x18, %a3@(write_mode0 - read_mark)
-	moveq	#-1, %d0
-	rts
diff --git a/drivers/char/hw_random/n2-asm.S b/drivers/char/hw_random/n2-asm.S
deleted file mode 100644
index c205df43d5ae8afabeee85316d7eeca1953fc1f2..0000000000000000000000000000000000000000
--- a/drivers/char/hw_random/n2-asm.S
+++ /dev/null
@@ -1,80 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* n2-asm.S: Niagara2 RNG hypervisor call assembler.
- *
- * Copyright (C) 2008 David S. Miller <davem@davemloft.net>
- */
-#include <linux/linkage.h>
-#include <asm/hypervisor.h>
-#include "n2rng.h"
-
-	.text
-
-ENTRY(sun4v_rng_get_diag_ctl)
-	mov	HV_FAST_RNG_GET_DIAG_CTL, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_rng_get_diag_ctl)
-
-ENTRY(sun4v_rng_ctl_read_v1)
-	mov	%o1, %o3
-	mov	%o2, %o4
-	mov	HV_FAST_RNG_CTL_READ, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o3]
-	retl
-	 stx	%o2, [%o4]
-ENDPROC(sun4v_rng_ctl_read_v1)
-
-ENTRY(sun4v_rng_ctl_read_v2)
-	save	%sp, -192, %sp
-	mov	%i0, %o0
-	mov	%i1, %o1
-	mov	HV_FAST_RNG_CTL_READ, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%i2]
-	stx	%o2, [%i3]
-	stx	%o3, [%i4]
-	stx	%o4, [%i5]
-	ret
-	restore	%g0, %o0, %o0
-ENDPROC(sun4v_rng_ctl_read_v2)
-
-ENTRY(sun4v_rng_ctl_write_v1)
-	mov	%o3, %o4
-	mov	HV_FAST_RNG_CTL_WRITE, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 stx	%o1, [%o4]
-ENDPROC(sun4v_rng_ctl_write_v1)
-
-ENTRY(sun4v_rng_ctl_write_v2)
-	mov	HV_FAST_RNG_CTL_WRITE, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_rng_ctl_write_v2)
-
-ENTRY(sun4v_rng_data_read_diag_v1)
-	mov	%o2, %o4
-	mov	HV_FAST_RNG_DATA_READ_DIAG, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 stx	%o1, [%o4]
-ENDPROC(sun4v_rng_data_read_diag_v1)
-
-ENTRY(sun4v_rng_data_read_diag_v2)
-	mov	%o3, %o4
-	mov	HV_FAST_RNG_DATA_READ_DIAG, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 stx	%o1, [%o4]
-ENDPROC(sun4v_rng_data_read_diag_v2)
-
-ENTRY(sun4v_rng_data_read)
-	mov	%o1, %o4
-	mov	HV_FAST_RNG_DATA_READ, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 stx	%o1, [%o4]
-ENDPROC(sun4v_rng_data_read)
diff --git a/drivers/crypto/n2_asm.S b/drivers/crypto/n2_asm.S
deleted file mode 100644
index 9a67dbf340f45a5c264a0768143bb2c7405fdd62..0000000000000000000000000000000000000000
--- a/drivers/crypto/n2_asm.S
+++ /dev/null
@@ -1,96 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* n2_asm.S: Hypervisor calls for NCS support.
- *
- * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
- */
-
-#include <linux/linkage.h>
-#include <asm/hypervisor.h>
-#include "n2_core.h"
-
-	/* o0: queue type
-	 * o1: RA of queue
-	 * o2: num entries in queue
-	 * o3: address of queue handle return
-	 */
-ENTRY(sun4v_ncs_qconf)
-	mov	HV_FAST_NCS_QCONF, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o3]
-	retl
-	 nop
-ENDPROC(sun4v_ncs_qconf)
-
-	/* %o0: queue handle
-	 * %o1: address of queue type return
-	 * %o2: address of queue base address return
-	 * %o3: address of queue num entries return
-	 */
-ENTRY(sun4v_ncs_qinfo)
-	mov	%o1, %g1
-	mov	%o2, %g2
-	mov	%o3, %g3
-	mov	HV_FAST_NCS_QINFO, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%g1]
-	stx	%o2, [%g2]
-	stx	%o3, [%g3]
-	retl
-	 nop
-ENDPROC(sun4v_ncs_qinfo)
-
-	/* %o0: queue handle
-	 * %o1: address of head offset return
-	 */
-ENTRY(sun4v_ncs_gethead)
-	mov	%o1, %o2
-	mov	HV_FAST_NCS_GETHEAD, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o2]
-	retl
-	 nop
-ENDPROC(sun4v_ncs_gethead)
-
-	/* %o0: queue handle
-	 * %o1: address of tail offset return
-	 */
-ENTRY(sun4v_ncs_gettail)
-	mov	%o1, %o2
-	mov	HV_FAST_NCS_GETTAIL, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o2]
-	retl
-	 nop
-ENDPROC(sun4v_ncs_gettail)
-
-	/* %o0: queue handle
-	 * %o1: new tail offset
-	 */
-ENTRY(sun4v_ncs_settail)
-	mov	HV_FAST_NCS_SETTAIL, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_ncs_settail)
-
-	/* %o0: queue handle
-	 * %o1: address of devino return
-	 */
-ENTRY(sun4v_ncs_qhandle_to_devino)
-	mov	%o1, %o2
-	mov	HV_FAST_NCS_QHANDLE_TO_DEVINO, %o5
-	ta	HV_FAST_TRAP
-	stx	%o1, [%o2]
-	retl
-	 nop
-ENDPROC(sun4v_ncs_qhandle_to_devino)
-
-	/* %o0: queue handle
-	 * %o1: new head offset
-	 */
-ENTRY(sun4v_ncs_sethead_marker)
-	mov	HV_FAST_NCS_SETHEAD_MARKER, %o5
-	ta	HV_FAST_TRAP
-	retl
-	 nop
-ENDPROC(sun4v_ncs_sethead_marker)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c
deleted file mode 100644
index a11637b0f6ccf43cc39c417dc64a8fb349815a22..0000000000000000000000000000000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright 2009 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "aux.h"
-#include "pad.h"
-
-static int
-nvkm_i2c_aux_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
-{
-	struct nvkm_i2c_aux *aux = container_of(adap, typeof(*aux), i2c);
-	struct i2c_msg *msg = msgs;
-	int ret, mcnt = num;
-
-	ret = nvkm_i2c_aux_acquire(aux);
-	if (ret)
-		return ret;
-
-	while (mcnt--) {
-		u8 remaining = msg->len;
-		u8 *ptr = msg->buf;
-
-		while (remaining) {
-			u8 cnt, retries, cmd;
-
-			if (msg->flags & I2C_M_RD)
-				cmd = 1;
-			else
-				cmd = 0;
-
-			if (mcnt || remaining > 16)
-				cmd |= 4; /* MOT */
-
-			for (retries = 0, cnt = 0;
-			     retries < 32 && !cnt;
-			     retries++) {
-				cnt = min_t(u8, remaining, 16);
-				ret = aux->func->xfer(aux, true, cmd,
-						      msg->addr, ptr, &cnt);
-				if (ret < 0)
-					goto out;
-			}
-			if (!cnt) {
-				AUX_TRACE(aux, "no data after 32 retries");
-				ret = -EIO;
-				goto out;
-			}
-
-			ptr += cnt;
-			remaining -= cnt;
-		}
-
-		msg++;
-	}
-
-	ret = num;
-out:
-	nvkm_i2c_aux_release(aux);
-	return ret;
-}
-
-static u32
-nvkm_i2c_aux_i2c_func(struct i2c_adapter *adap)
-{
-	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
-}
-
-static const struct i2c_algorithm
-nvkm_i2c_aux_i2c_algo = {
-	.master_xfer = nvkm_i2c_aux_i2c_xfer,
-	.functionality = nvkm_i2c_aux_i2c_func
-};
-
-void
-nvkm_i2c_aux_monitor(struct nvkm_i2c_aux *aux, bool monitor)
-{
-	struct nvkm_i2c_pad *pad = aux->pad;
-	AUX_TRACE(aux, "monitor: %s", monitor ? "yes" : "no");
-	if (monitor)
-		nvkm_i2c_pad_mode(pad, NVKM_I2C_PAD_AUX);
-	else
-		nvkm_i2c_pad_mode(pad, NVKM_I2C_PAD_OFF);
-}
-
-void
-nvkm_i2c_aux_release(struct nvkm_i2c_aux *aux)
-{
-	struct nvkm_i2c_pad *pad = aux->pad;
-	AUX_TRACE(aux, "release");
-	nvkm_i2c_pad_release(pad);
-	mutex_unlock(&aux->mutex);
-}
-
-int
-nvkm_i2c_aux_acquire(struct nvkm_i2c_aux *aux)
-{
-	struct nvkm_i2c_pad *pad = aux->pad;
-	int ret;
-
-	AUX_TRACE(aux, "acquire");
-	mutex_lock(&aux->mutex);
-
-	if (aux->enabled)
-		ret = nvkm_i2c_pad_acquire(pad, NVKM_I2C_PAD_AUX);
-	else
-		ret = -EIO;
-
-	if (ret)
-		mutex_unlock(&aux->mutex);
-	return ret;
-}
-
-int
-nvkm_i2c_aux_xfer(struct nvkm_i2c_aux *aux, bool retry, u8 type,
-		  u32 addr, u8 *data, u8 *size)
-{
-	if (!*size && !aux->func->address_only) {
-		AUX_ERR(aux, "address-only transaction dropped");
-		return -ENOSYS;
-	}
-	return aux->func->xfer(aux, retry, type, addr, data, size);
-}
-
-int
-nvkm_i2c_aux_lnk_ctl(struct nvkm_i2c_aux *aux, int nr, int bw, bool ef)
-{
-	if (aux->func->lnk_ctl)
-		return aux->func->lnk_ctl(aux, nr, bw, ef);
-	return -ENODEV;
-}
-
-void
-nvkm_i2c_aux_del(struct nvkm_i2c_aux **paux)
-{
-	struct nvkm_i2c_aux *aux = *paux;
-	if (aux && !WARN_ON(!aux->func)) {
-		AUX_TRACE(aux, "dtor");
-		list_del(&aux->head);
-		i2c_del_adapter(&aux->i2c);
-		kfree(*paux);
-		*paux = NULL;
-	}
-}
-
-void
-nvkm_i2c_aux_init(struct nvkm_i2c_aux *aux)
-{
-	AUX_TRACE(aux, "init");
-	mutex_lock(&aux->mutex);
-	aux->enabled = true;
-	mutex_unlock(&aux->mutex);
-}
-
-void
-nvkm_i2c_aux_fini(struct nvkm_i2c_aux *aux)
-{
-	AUX_TRACE(aux, "fini");
-	mutex_lock(&aux->mutex);
-	aux->enabled = false;
-	mutex_unlock(&aux->mutex);
-}
-
-int
-nvkm_i2c_aux_ctor(const struct nvkm_i2c_aux_func *func,
-		  struct nvkm_i2c_pad *pad, int id,
-		  struct nvkm_i2c_aux *aux)
-{
-	struct nvkm_device *device = pad->i2c->subdev.device;
-
-	aux->func = func;
-	aux->pad = pad;
-	aux->id = id;
-	mutex_init(&aux->mutex);
-	list_add_tail(&aux->head, &pad->i2c->aux);
-	AUX_TRACE(aux, "ctor");
-
-	snprintf(aux->i2c.name, sizeof(aux->i2c.name), "nvkm-%s-aux-%04x",
-		 dev_name(device->dev), id);
-	aux->i2c.owner = THIS_MODULE;
-	aux->i2c.dev.parent = device->dev;
-	aux->i2c.algo = &nvkm_i2c_aux_i2c_algo;
-	return i2c_add_adapter(&aux->i2c);
-}
-
-int
-nvkm_i2c_aux_new_(const struct nvkm_i2c_aux_func *func,
-		  struct nvkm_i2c_pad *pad, int id,
-		  struct nvkm_i2c_aux **paux)
-{
-	if (!(*paux = kzalloc(sizeof(**paux), GFP_KERNEL)))
-		return -ENOMEM;
-	return nvkm_i2c_aux_ctor(func, pad, id, *paux);
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h
deleted file mode 100644
index 30b48896965ebc981c838087357dbfcc408ef054..0000000000000000000000000000000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NVKM_I2C_AUX_H__
-#define __NVKM_I2C_AUX_H__
-#include "pad.h"
-
-struct nvkm_i2c_aux_func {
-	bool address_only;
-	int  (*xfer)(struct nvkm_i2c_aux *, bool retry, u8 type,
-		     u32 addr, u8 *data, u8 *size);
-	int  (*lnk_ctl)(struct nvkm_i2c_aux *, int link_nr, int link_bw,
-			bool enhanced_framing);
-};
-
-int nvkm_i2c_aux_ctor(const struct nvkm_i2c_aux_func *, struct nvkm_i2c_pad *,
-		      int id, struct nvkm_i2c_aux *);
-int nvkm_i2c_aux_new_(const struct nvkm_i2c_aux_func *, struct nvkm_i2c_pad *,
-		      int id, struct nvkm_i2c_aux **);
-void nvkm_i2c_aux_del(struct nvkm_i2c_aux **);
-void nvkm_i2c_aux_init(struct nvkm_i2c_aux *);
-void nvkm_i2c_aux_fini(struct nvkm_i2c_aux *);
-int nvkm_i2c_aux_xfer(struct nvkm_i2c_aux *, bool retry, u8 type,
-		      u32 addr, u8 *data, u8 *size);
-
-int g94_i2c_aux_new_(const struct nvkm_i2c_aux_func *, struct nvkm_i2c_pad *,
-		     int, u8, struct nvkm_i2c_aux **);
-
-int g94_i2c_aux_new(struct nvkm_i2c_pad *, int, u8, struct nvkm_i2c_aux **);
-int g94_i2c_aux_xfer(struct nvkm_i2c_aux *, bool, u8, u32, u8 *, u8 *);
-int gf119_i2c_aux_new(struct nvkm_i2c_pad *, int, u8, struct nvkm_i2c_aux **);
-int gm200_i2c_aux_new(struct nvkm_i2c_pad *, int, u8, struct nvkm_i2c_aux **);
-
-#define AUX_MSG(b,l,f,a...) do {                                               \
-	struct nvkm_i2c_aux *_aux = (b);                                       \
-	nvkm_##l(&_aux->pad->i2c->subdev, "aux %04x: "f"\n", _aux->id, ##a);   \
-} while(0)
-#define AUX_ERR(b,f,a...) AUX_MSG((b), error, f, ##a)
-#define AUX_DBG(b,f,a...) AUX_MSG((b), debug, f, ##a)
-#define AUX_TRACE(b,f,a...) AUX_MSG((b), trace, f, ##a)
-#endif
diff --git a/drivers/i3c/master/i3c-master-phytium.c b/drivers/i3c/master/i3c-master-phytium.c
old mode 100755
new mode 100644
diff --git a/drivers/memory/ti-emif-sram-pm.S b/drivers/memory/ti-emif-sram-pm.S
deleted file mode 100644
index d1c83bd5b98e7abd33b44ecb6632fa71714de91d..0000000000000000000000000000000000000000
--- a/drivers/memory/ti-emif-sram-pm.S
+++ /dev/null
@@ -1,375 +0,0 @@
-/*
- * Low level PM code for TI EMIF
- *
- * Copyright (C) 2016-2017 Texas Instruments Incorporated - http://www.ti.com/
- *	Dave Gerlach
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/memory.h>
-
-#include "emif.h"
-#include "ti-emif-asm-offsets.h"
-
-#define EMIF_POWER_MGMT_WAIT_SELF_REFRESH_8192_CYCLES	0x00a0
-#define EMIF_POWER_MGMT_SR_TIMER_MASK			0x00f0
-#define EMIF_POWER_MGMT_SELF_REFRESH_MODE		0x0200
-#define EMIF_POWER_MGMT_SELF_REFRESH_MODE_MASK		0x0700
-
-#define EMIF_SDCFG_TYPE_DDR2				0x2 << SDRAM_TYPE_SHIFT
-#define EMIF_SDCFG_TYPE_DDR3				0x3 << SDRAM_TYPE_SHIFT
-#define EMIF_STATUS_READY				0x4
-
-#define AM43XX_EMIF_PHY_CTRL_REG_COUNT                  0x120
-
-#define EMIF_AM437X_REGISTERS				0x1
-
-	.arm
-	.align 3
-
-ENTRY(ti_emif_sram)
-
-/*
- * void ti_emif_save_context(void)
- *
- * Used during suspend to save the context of all required EMIF registers
- * to local memory if the EMIF is going to lose context during the sleep
- * transition. Operates on the VIRTUAL address of the EMIF.
- */
-ENTRY(ti_emif_save_context)
-	stmfd   sp!, {r4 - r11, lr}     @ save registers on stack
-
-	adr	r4, ti_emif_pm_sram_data
-	ldr	r0, [r4, #EMIF_PM_BASE_ADDR_VIRT_OFFSET]
-	ldr	r2, [r4, #EMIF_PM_REGS_VIRT_OFFSET]
-
-	/* Save EMIF configuration */
-	ldr	r1, [r0, #EMIF_SDRAM_CONFIG]
-	str	r1, [r2, #EMIF_SDCFG_VAL_OFFSET]
-
-	ldr	r1, [r0, #EMIF_SDRAM_REFRESH_CONTROL]
-	str	r1, [r2, #EMIF_REF_CTRL_VAL_OFFSET]
-
-	ldr	r1, [r0, #EMIF_SDRAM_TIMING_1]
-	str     r1, [r2, #EMIF_TIMING1_VAL_OFFSET]
-
-	ldr	r1, [r0, #EMIF_SDRAM_TIMING_2]
-	str     r1, [r2, #EMIF_TIMING2_VAL_OFFSET]
-
-	ldr	r1, [r0, #EMIF_SDRAM_TIMING_3]
-	str     r1, [r2, #EMIF_TIMING3_VAL_OFFSET]
-
-	ldr	r1, [r0, #EMIF_POWER_MANAGEMENT_CONTROL]
-	str     r1, [r2, #EMIF_PMCR_VAL_OFFSET]
-
-	ldr	r1, [r0, #EMIF_POWER_MANAGEMENT_CTRL_SHDW]
-	str     r1, [r2, #EMIF_PMCR_SHDW_VAL_OFFSET]
-
-	ldr	r1, [r0, #EMIF_SDRAM_OUTPUT_IMPEDANCE_CALIBRATION_CONFIG]
-	str     r1, [r2, #EMIF_ZQCFG_VAL_OFFSET]
-
-	ldr	r1, [r0, #EMIF_DDR_PHY_CTRL_1]
-	str     r1, [r2, #EMIF_DDR_PHY_CTLR_1_OFFSET]
-
-	ldr	r1, [r0, #EMIF_COS_CONFIG]
-	str     r1, [r2, #EMIF_COS_CONFIG_OFFSET]
-
-	ldr	r1, [r0, #EMIF_PRIORITY_TO_CLASS_OF_SERVICE_MAPPING]
-	str     r1, [r2, #EMIF_PRIORITY_TO_COS_MAPPING_OFFSET]
-
-	ldr	r1, [r0, #EMIF_CONNECTION_ID_TO_CLASS_OF_SERVICE_1_MAPPING]
-	str     r1, [r2, #EMIF_CONNECT_ID_SERV_1_MAP_OFFSET]
-
-	ldr	r1, [r0, #EMIF_CONNECTION_ID_TO_CLASS_OF_SERVICE_2_MAPPING]
-	str     r1, [r2, #EMIF_CONNECT_ID_SERV_2_MAP_OFFSET]
-
-	ldr	r1, [r0, #EMIF_OCP_CONFIG]
-	str     r1, [r2, #EMIF_OCP_CONFIG_VAL_OFFSET]
-
-	ldr	r5, [r4, #EMIF_PM_CONFIG_OFFSET]
-	cmp	r5, #EMIF_SRAM_AM43_REG_LAYOUT
-	bne	emif_skip_save_extra_regs
-
-	ldr	r1, [r0, #EMIF_READ_WRITE_LEVELING_RAMP_CONTROL]
-	str     r1, [r2, #EMIF_RD_WR_LEVEL_RAMP_CTRL_OFFSET]
-
-	ldr	r1, [r0, #EMIF_READ_WRITE_EXECUTION_THRESHOLD]
-	str     r1, [r2, #EMIF_RD_WR_EXEC_THRESH_OFFSET]
-
-	ldr	r1, [r0, #EMIF_LPDDR2_NVM_TIMING]
-	str     r1, [r2, #EMIF_LPDDR2_NVM_TIM_OFFSET]
-
-	ldr	r1, [r0, #EMIF_LPDDR2_NVM_TIMING_SHDW]
-	str     r1, [r2, #EMIF_LPDDR2_NVM_TIM_SHDW_OFFSET]
-
-	ldr	r1, [r0, #EMIF_DLL_CALIB_CTRL]
-	str     r1, [r2, #EMIF_DLL_CALIB_CTRL_VAL_OFFSET]
-
-	ldr	r1, [r0, #EMIF_DLL_CALIB_CTRL_SHDW]
-	str     r1, [r2, #EMIF_DLL_CALIB_CTRL_VAL_SHDW_OFFSET]
-
-	/* Loop and save entire block of emif phy regs */
-	mov	r5, #0x0
-	add	r4, r2, #EMIF_EXT_PHY_CTRL_VALS_OFFSET
-	add	r3, r0, #EMIF_EXT_PHY_CTRL_1
-ddr_phy_ctrl_save:
-	ldr	r1, [r3, r5]
-	str	r1, [r4, r5]
-	add	r5, r5, #0x4
-	cmp	r5, #AM43XX_EMIF_PHY_CTRL_REG_COUNT
-	bne	ddr_phy_ctrl_save
-
-emif_skip_save_extra_regs:
-	ldmfd	sp!, {r4 - r11, pc}	@ restore regs and return
-ENDPROC(ti_emif_save_context)
-
-/*
- * void ti_emif_restore_context(void)
- *
- * Used during resume to restore the context of all required EMIF registers
- * from local memory after the EMIF has lost context during a sleep transition.
- * Operates on the PHYSICAL address of the EMIF.
- */
-ENTRY(ti_emif_restore_context)
-	adr	r4, ti_emif_pm_sram_data
-	ldr	r0, [r4, #EMIF_PM_BASE_ADDR_PHYS_OFFSET]
-	ldr	r2, [r4, #EMIF_PM_REGS_PHYS_OFFSET]
-
-	/* Config EMIF Timings */
-	ldr     r1, [r2, #EMIF_DDR_PHY_CTLR_1_OFFSET]
-	str	r1, [r0, #EMIF_DDR_PHY_CTRL_1]
-	str	r1, [r0, #EMIF_DDR_PHY_CTRL_1_SHDW]
-
-	ldr     r1, [r2, #EMIF_TIMING1_VAL_OFFSET]
-	str	r1, [r0, #EMIF_SDRAM_TIMING_1]
-	str	r1, [r0, #EMIF_SDRAM_TIMING_1_SHDW]
-
-	ldr     r1, [r2, #EMIF_TIMING2_VAL_OFFSET]
-	str	r1, [r0, #EMIF_SDRAM_TIMING_2]
-	str	r1, [r0, #EMIF_SDRAM_TIMING_2_SHDW]
-
-	ldr     r1, [r2, #EMIF_TIMING3_VAL_OFFSET]
-	str	r1, [r0, #EMIF_SDRAM_TIMING_3]
-	str	r1, [r0, #EMIF_SDRAM_TIMING_3_SHDW]
-
-	ldr     r1, [r2, #EMIF_REF_CTRL_VAL_OFFSET]
-	str	r1, [r0, #EMIF_SDRAM_REFRESH_CONTROL]
-	str	r1, [r0, #EMIF_SDRAM_REFRESH_CTRL_SHDW]
-
-	ldr     r1, [r2, #EMIF_PMCR_VAL_OFFSET]
-	str	r1, [r0, #EMIF_POWER_MANAGEMENT_CONTROL]
-
-	ldr     r1, [r2, #EMIF_PMCR_SHDW_VAL_OFFSET]
-	str	r1, [r0, #EMIF_POWER_MANAGEMENT_CTRL_SHDW]
-
-	ldr     r1, [r2, #EMIF_COS_CONFIG_OFFSET]
-	str	r1, [r0, #EMIF_COS_CONFIG]
-
-	ldr     r1, [r2, #EMIF_PRIORITY_TO_COS_MAPPING_OFFSET]
-	str	r1, [r0, #EMIF_PRIORITY_TO_CLASS_OF_SERVICE_MAPPING]
-
-	ldr	r1, [r2, #EMIF_CONNECT_ID_SERV_1_MAP_OFFSET]
-	str	r1, [r0, #EMIF_CONNECTION_ID_TO_CLASS_OF_SERVICE_1_MAPPING]
-
-	ldr     r1, [r2, #EMIF_CONNECT_ID_SERV_2_MAP_OFFSET]
-	str	r1, [r0, #EMIF_CONNECTION_ID_TO_CLASS_OF_SERVICE_2_MAPPING]
-
-	ldr     r1, [r2, #EMIF_OCP_CONFIG_VAL_OFFSET]
-	str	r1, [r0, #EMIF_OCP_CONFIG]
-
-	ldr	r5, [r4, #EMIF_PM_CONFIG_OFFSET]
-	cmp	r5, #EMIF_SRAM_AM43_REG_LAYOUT
-	bne	emif_skip_restore_extra_regs
-
-	ldr     r1, [r2, #EMIF_RD_WR_LEVEL_RAMP_CTRL_OFFSET]
-	str	r1, [r0, #EMIF_READ_WRITE_LEVELING_RAMP_CONTROL]
-
-	ldr     r1, [r2, #EMIF_RD_WR_EXEC_THRESH_OFFSET]
-	str	r1, [r0, #EMIF_READ_WRITE_EXECUTION_THRESHOLD]
-
-	ldr     r1, [r2, #EMIF_LPDDR2_NVM_TIM_OFFSET]
-	str	r1, [r0, #EMIF_LPDDR2_NVM_TIMING]
-
-	ldr     r1, [r2, #EMIF_LPDDR2_NVM_TIM_SHDW_OFFSET]
-	str	r1, [r0, #EMIF_LPDDR2_NVM_TIMING_SHDW]
-
-	ldr     r1, [r2, #EMIF_DLL_CALIB_CTRL_VAL_OFFSET]
-	str	r1, [r0, #EMIF_DLL_CALIB_CTRL]
-
-	ldr     r1, [r2, #EMIF_DLL_CALIB_CTRL_VAL_SHDW_OFFSET]
-	str	r1, [r0, #EMIF_DLL_CALIB_CTRL_SHDW]
-
-	ldr     r1, [r2, #EMIF_ZQCFG_VAL_OFFSET]
-	str	r1, [r0, #EMIF_SDRAM_OUTPUT_IMPEDANCE_CALIBRATION_CONFIG]
-
-	/* Loop and restore entire block of emif phy regs */
-	mov	r5, #0x0
-	/* Load ti_emif_regs_amx3 + EMIF_EXT_PHY_CTRL_VALS_OFFSET for address
-	 * to phy register save space
-	 */
-	add	r3, r2, #EMIF_EXT_PHY_CTRL_VALS_OFFSET
-	add	r4, r0, #EMIF_EXT_PHY_CTRL_1
-ddr_phy_ctrl_restore:
-	ldr	r1, [r3, r5]
-	str	r1, [r4, r5]
-	add	r5, r5, #0x4
-	cmp	r5, #AM43XX_EMIF_PHY_CTRL_REG_COUNT
-	bne	ddr_phy_ctrl_restore
-
-emif_skip_restore_extra_regs:
-	/*
-	 * Output impedence calib needed only for DDR3
-	 * but since the initial state of this will be
-	 * disabled for DDR2 no harm in restoring the
-	 * old configuration
-	 */
-	ldr     r1, [r2, #EMIF_ZQCFG_VAL_OFFSET]
-	str	r1, [r0, #EMIF_SDRAM_OUTPUT_IMPEDANCE_CALIBRATION_CONFIG]
-
-	/* Write to sdcfg last for DDR2 only */
-	ldr	r1, [r2, #EMIF_SDCFG_VAL_OFFSET]
-	and	r2, r1, #SDRAM_TYPE_MASK
-	cmp	r2, #EMIF_SDCFG_TYPE_DDR2
-	streq	r1, [r0, #EMIF_SDRAM_CONFIG]
-
-	mov	pc, lr
-ENDPROC(ti_emif_restore_context)
-
-/*
- * void ti_emif_run_hw_leveling(void)
- *
- * Used during resume to run hardware leveling again and restore the
- * configuration of the EMIF PHY, only for DDR3.
- */
-ENTRY(ti_emif_run_hw_leveling)
-	adr	r4, ti_emif_pm_sram_data
-	ldr	r0, [r4, #EMIF_PM_BASE_ADDR_PHYS_OFFSET]
-
-	ldr	r3, [r0, #EMIF_READ_WRITE_LEVELING_CONTROL]
-	orr	r3, r3, #RDWRLVLFULL_START
-	ldr	r2, [r0, #EMIF_SDRAM_CONFIG]
-	and	r2, r2, #SDRAM_TYPE_MASK
-	cmp	r2, #EMIF_SDCFG_TYPE_DDR3
-	bne	skip_hwlvl
-
-	str	r3, [r0, #EMIF_READ_WRITE_LEVELING_CONTROL]
-
-	/*
-	 * If EMIF registers are touched during initial stage of HW
-	 * leveling sequence there will be an L3 NOC timeout error issued
-	 * as the EMIF will not respond, which is not fatal, but it is
-	 * avoidable. This small wait loop is enough time for this condition
-	 * to clear, even at worst case of CPU running at max speed of 1Ghz.
-	 */
-	mov	r2, #0x2000
-1:
-	subs	r2, r2, #0x1
-	bne	1b
-
-	/* Bit clears when operation is complete */
-2:	ldr     r1, [r0, #EMIF_READ_WRITE_LEVELING_CONTROL]
-	tst     r1, #RDWRLVLFULL_START
-	bne     2b
-
-skip_hwlvl:
-	mov	pc, lr
-ENDPROC(ti_emif_run_hw_leveling)
-
-/*
- * void ti_emif_enter_sr(void)
- *
- * Programs the EMIF to tell the SDRAM to enter into self-refresh
- * mode during a sleep transition. Operates on the VIRTUAL address
- * of the EMIF.
- */
-ENTRY(ti_emif_enter_sr)
-	stmfd   sp!, {r4 - r11, lr}     @ save registers on stack
-
-	adr	r4, ti_emif_pm_sram_data
-	ldr	r0, [r4, #EMIF_PM_BASE_ADDR_VIRT_OFFSET]
-	ldr	r2, [r4, #EMIF_PM_REGS_VIRT_OFFSET]
-
-	ldr	r1, [r0, #EMIF_POWER_MANAGEMENT_CONTROL]
-	bic	r1, r1, #EMIF_POWER_MGMT_SELF_REFRESH_MODE_MASK
-	orr	r1, r1, #EMIF_POWER_MGMT_SELF_REFRESH_MODE
-	str	r1, [r0, #EMIF_POWER_MANAGEMENT_CONTROL]
-
-	ldmfd	sp!, {r4 - r11, pc}	@ restore regs and return
-ENDPROC(ti_emif_enter_sr)
-
-/*
- * void ti_emif_exit_sr(void)
- *
- * Programs the EMIF to tell the SDRAM to exit self-refresh mode
- * after a sleep transition. Operates on the PHYSICAL address of
- * the EMIF.
- */
-ENTRY(ti_emif_exit_sr)
-	adr	r4, ti_emif_pm_sram_data
-	ldr	r0, [r4, #EMIF_PM_BASE_ADDR_PHYS_OFFSET]
-	ldr	r2, [r4, #EMIF_PM_REGS_PHYS_OFFSET]
-
-	/*
-	 * Toggle EMIF to exit refresh mode:
-	 * if EMIF lost context, PWR_MGT_CTRL is currently 0, writing disable
-	 *   (0x0), wont do diddly squat! so do a toggle from SR(0x2) to disable
-	 *   (0x0) here.
-	 * *If* EMIF did not lose context, nothing broken as we write the same
-	 *   value(0x2) to reg before we write a disable (0x0).
-	 */
-	ldr	r1, [r2, #EMIF_PMCR_VAL_OFFSET]
-	bic	r1, r1, #EMIF_POWER_MGMT_SELF_REFRESH_MODE_MASK
-	orr	r1, r1, #EMIF_POWER_MGMT_SELF_REFRESH_MODE
-	str	r1, [r0, #EMIF_POWER_MANAGEMENT_CONTROL]
-	bic	r1, r1, #EMIF_POWER_MGMT_SELF_REFRESH_MODE_MASK
-	str	r1, [r0, #EMIF_POWER_MANAGEMENT_CONTROL]
-
-        /* Wait for EMIF to become ready */
-1:	ldr     r1, [r0, #EMIF_STATUS]
-	tst     r1, #EMIF_STATUS_READY
-	beq     1b
-
-	mov	pc, lr
-ENDPROC(ti_emif_exit_sr)
-
-/*
- * void ti_emif_abort_sr(void)
- *
- * Disables self-refresh after a failed transition to a low-power
- * state so the kernel can jump back to DDR and follow abort path.
- * Operates on the VIRTUAL address of the EMIF.
- */
-ENTRY(ti_emif_abort_sr)
-	stmfd   sp!, {r4 - r11, lr}     @ save registers on stack
-
-	adr	r4, ti_emif_pm_sram_data
-	ldr	r0, [r4, #EMIF_PM_BASE_ADDR_VIRT_OFFSET]
-	ldr	r2, [r4, #EMIF_PM_REGS_VIRT_OFFSET]
-
-	ldr	r1, [r2, #EMIF_PMCR_VAL_OFFSET]
-	bic	r1, r1, #EMIF_POWER_MGMT_SELF_REFRESH_MODE_MASK
-	str	r1, [r0, #EMIF_POWER_MANAGEMENT_CONTROL]
-
-	/* Wait for EMIF to become ready */
-1:	ldr     r1, [r0, #EMIF_STATUS]
-	tst     r1, #EMIF_STATUS_READY
-	beq     1b
-
-	ldmfd	sp!, {r4 - r11, pc}	@ restore regs and return
-ENDPROC(ti_emif_abort_sr)
-
-	.align 3
-ENTRY(ti_emif_pm_sram_data)
-	.space EMIF_PM_DATA_SIZE
-ENTRY(ti_emif_sram_sz)
-        .word   . - ti_emif_save_context
diff --git a/drivers/mmc/host/phytium-mci-pci.c b/drivers/mmc/host/phytium-mci-pci.c
old mode 100755
new mode 100644
diff --git a/drivers/mmc/host/phytium-mci-plat.c b/drivers/mmc/host/phytium-mci-plat.c
old mode 100755
new mode 100644
diff --git a/drivers/mmc/host/phytium-mci.c b/drivers/mmc/host/phytium-mci.c
old mode 100755
new mode 100644
diff --git a/drivers/mmc/host/phytium-mci.h b/drivers/mmc/host/phytium-mci.h
old mode 100755
new mode 100644
diff --git a/drivers/mmc/host/phytium-sdci.c b/drivers/mmc/host/phytium-sdci.c
old mode 100755
new mode 100644
diff --git a/drivers/mmc/host/phytium-sdci.h b/drivers/mmc/host/phytium-sdci.h
old mode 100755
new mode 100644
diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile
old mode 100755
new mode 100644
diff --git a/drivers/net/ethernet/intel/ixgbevf/Makefile b/drivers/net/ethernet/intel/ixgbevf/Makefile
old mode 100755
new mode 100644
diff --git a/drivers/net/wan/wanxlfw.S b/drivers/net/wan/wanxlfw.S
deleted file mode 100644
index 6c3735ac8cf2759bb6b7133354c878942d2840f5..0000000000000000000000000000000000000000
--- a/drivers/net/wan/wanxlfw.S
+++ /dev/null
@@ -1,894 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-.psize 0
-/*
-  wanXL serial card driver for Linux
-  card firmware part
-
-  Copyright (C) 2003 Krzysztof Halasa <khc@pm.waw.pl>
-
-
-
-
-
-	DPRAM BDs:
-	0x000 - 0x050 TX#0	0x050 - 0x140 RX#0
-	0x140 - 0x190 TX#1	0x190 - 0x280 RX#1
-	0x280 - 0x2D0 TX#2	0x2D0 - 0x3C0 RX#2
-	0x3C0 - 0x410 TX#3	0x410 - 0x500 RX#3
-
-
-	000 5FF 1536 Bytes Dual-Port RAM User Data / BDs
-	600 6FF 256 Bytes Dual-Port RAM User Data / BDs
-	700 7FF 256 Bytes Dual-Port RAM User Data / BDs
-	C00 CBF 192 Bytes Dual-Port RAM Parameter RAM Page 1
-	D00 DBF 192 Bytes Dual-Port RAM Parameter RAM Page 2
-	E00 EBF 192 Bytes Dual-Port RAM Parameter RAM Page 3
-	F00 FBF 192 Bytes Dual-Port RAM Parameter RAM Page 4
-
-	local interrupts		    level
-	NMI					7
-	PIT timer, CPM (RX/TX complete)		4
-	PCI9060	DMA and PCI doorbells		3
-	Cable - not used			1
-*/
-
-#include <linux/hdlc.h>
-#include <linux/hdlc/ioctl.h>
-#include "wanxl.h"
-
-/* memory addresses and offsets */
-
-MAX_RAM_SIZE	= 16 * 1024 * 1024	// max RAM supported by hardware
-
-PCI9060_VECTOR	= 0x0000006C
-CPM_IRQ_BASE	= 0x40
-ERROR_VECTOR	= CPM_IRQ_BASE * 4
-SCC1_VECTOR	= (CPM_IRQ_BASE + 0x1E) * 4
-SCC2_VECTOR	= (CPM_IRQ_BASE + 0x1D) * 4
-SCC3_VECTOR	= (CPM_IRQ_BASE + 0x1C) * 4
-SCC4_VECTOR	= (CPM_IRQ_BASE + 0x1B) * 4
-CPM_IRQ_LEVEL	= 4
-TIMER_IRQ	= 128
-TIMER_IRQ_LEVEL = 4
-PITR_CONST	= 0x100 + 16		// 1 Hz timer
-
-MBAR		= 0x0003FF00
-
-VALUE_WINDOW	= 0x40000000
-ORDER_WINDOW	= 0xC0000000
-
-PLX		= 0xFFF90000
-
-CSRA		= 0xFFFB0000
-CSRB		= 0xFFFB0002
-CSRC		= 0xFFFB0004
-CSRD		= 0xFFFB0006
-STATUS_CABLE_LL		= 0x2000
-STATUS_CABLE_DTR	= 0x1000
-
-DPRBASE		= 0xFFFC0000
-
-SCC1_BASE	= DPRBASE + 0xC00
-MISC_BASE	= DPRBASE + 0xCB0
-SCC2_BASE	= DPRBASE + 0xD00
-SCC3_BASE	= DPRBASE + 0xE00
-SCC4_BASE	= DPRBASE + 0xF00
-
-// offset from SCCx_BASE
-// SCC_xBASE contain offsets from DPRBASE and must be divisible by 8
-SCC_RBASE	= 0		// 16-bit RxBD base address
-SCC_TBASE	= 2		// 16-bit TxBD base address
-SCC_RFCR	= 4		// 8-bit Rx function code
-SCC_TFCR	= 5		// 8-bit Tx function code
-SCC_MRBLR	= 6		// 16-bit maximum Rx buffer length
-SCC_C_MASK	= 0x34		// 32-bit CRC constant
-SCC_C_PRES	= 0x38		// 32-bit CRC preset
-SCC_MFLR	= 0x46		// 16-bit max Rx frame length (without flags)
-
-REGBASE		= DPRBASE + 0x1000
-PICR		= REGBASE + 0x026	// 16-bit periodic irq control
-PITR		= REGBASE + 0x02A	// 16-bit periodic irq timing
-OR1		= REGBASE + 0x064	// 32-bit RAM bank #1 options
-CICR		= REGBASE + 0x540	// 32(24)-bit CP interrupt config
-CIMR		= REGBASE + 0x548	// 32-bit CP interrupt mask
-CISR		= REGBASE + 0x54C	// 32-bit CP interrupts in-service
-PADIR		= REGBASE + 0x550	// 16-bit PortA data direction bitmap
-PAPAR		= REGBASE + 0x552	// 16-bit PortA pin assignment bitmap
-PAODR		= REGBASE + 0x554	// 16-bit PortA open drain bitmap
-PADAT		= REGBASE + 0x556	// 16-bit PortA data register
-
-PCDIR		= REGBASE + 0x560	// 16-bit PortC data direction bitmap
-PCPAR		= REGBASE + 0x562	// 16-bit PortC pin assignment bitmap
-PCSO		= REGBASE + 0x564	// 16-bit PortC special options
-PCDAT		= REGBASE + 0x566	// 16-bit PortC data register
-PCINT		= REGBASE + 0x568	// 16-bit PortC interrupt control
-CR		= REGBASE + 0x5C0	// 16-bit Command register
-
-SCC1_REGS	= REGBASE + 0x600
-SCC2_REGS	= REGBASE + 0x620
-SCC3_REGS	= REGBASE + 0x640
-SCC4_REGS	= REGBASE + 0x660
-SICR		= REGBASE + 0x6EC	// 32-bit SI clock route
-
-// offset from SCCx_REGS
-SCC_GSMR_L	= 0x00	// 32 bits
-SCC_GSMR_H	= 0x04	// 32 bits
-SCC_PSMR	= 0x08	// 16 bits
-SCC_TODR	= 0x0C	// 16 bits
-SCC_DSR		= 0x0E	// 16 bits
-SCC_SCCE	= 0x10	// 16 bits
-SCC_SCCM	= 0x14	// 16 bits
-SCC_SCCS	= 0x17	// 8 bits
-
-#if QUICC_MEMCPY_USES_PLX
-	.macro memcpy_from_pci src, dest, len // len must be < 8 MB
-	addl #3, \len
-	andl #0xFFFFFFFC, \len		// always copy n * 4 bytes
-	movel \src, PLX_DMA_0_PCI
-	movel \dest, PLX_DMA_0_LOCAL
-	movel \len, PLX_DMA_0_LENGTH
-	movel #0x0103, PLX_DMA_CMD_STS	// start channel 0 transfer
-	bsr memcpy_from_pci_run
-	.endm
-
-	.macro memcpy_to_pci src, dest, len
-	addl #3, \len
-	andl #0xFFFFFFFC, \len		// always copy n * 4 bytes
-	movel \src, PLX_DMA_1_LOCAL
-	movel \dest, PLX_DMA_1_PCI
-	movel \len, PLX_DMA_1_LENGTH
-	movel #0x0301, PLX_DMA_CMD_STS	// start channel 1 transfer
-	bsr memcpy_to_pci_run
-	.endm
-
-#else
-
-	.macro memcpy src, dest, len	// len must be < 65536 bytes
-	movel %d7, -(%sp)		// src and dest must be < 256 MB
-	movel \len, %d7			// bits 0 and 1
-	lsrl #2, \len
-	andl \len, \len
-	beq 99f				// only 0 - 3 bytes
-	subl #1, \len			// for dbf
-98:	movel (\src)+, (\dest)+
-	dbfw \len, 98b
-99:	movel %d7, \len
-	btstl #1, \len
-	beq 99f
-	movew (\src)+, (\dest)+
-99:	btstl #0, \len
-	beq 99f
-	moveb (\src)+, (\dest)+
-99:
-	movel (%sp)+, %d7
-	.endm
-
-	.macro memcpy_from_pci src, dest, len
-	addl #VALUE_WINDOW, \src
-	memcpy \src, \dest, \len
-	.endm
-
-	.macro memcpy_to_pci src, dest, len
-	addl #VALUE_WINDOW, \dest
-	memcpy \src, \dest, \len
-	.endm
-#endif
-
-
-	.macro wait_for_command
-99:	btstl #0, CR
-	bne 99b
-	.endm
-
-
-
-
-/****************************** card initialization *******************/
-	.text
-	.global _start
-_start:	bra init
-
-	.org _start + 4
-ch_status_addr:	.long 0, 0, 0, 0
-rx_descs_addr:	.long 0
-
-init:
-#if DETECT_RAM
-	movel OR1, %d0
-	andl #0xF00007FF, %d0		// mask AMxx bits
-	orl #0xFFFF800 & ~(MAX_RAM_SIZE - 1), %d0 // update RAM bank size
-	movel %d0, OR1
-#endif
-
-	addl #VALUE_WINDOW, rx_descs_addr // PCI addresses of shared data
-	clrl %d0			// D0 = 4 * port
-init_1:	tstl ch_status_addr(%d0)
-	beq init_2
-	addl #VALUE_WINDOW, ch_status_addr(%d0)
-init_2:	addl #4, %d0
-	cmpl #4 * 4, %d0
-	bne init_1
-
-	movel #pci9060_interrupt, PCI9060_VECTOR
-	movel #error_interrupt, ERROR_VECTOR
-	movel #port_interrupt_1, SCC1_VECTOR
-	movel #port_interrupt_2, SCC2_VECTOR
-	movel #port_interrupt_3, SCC3_VECTOR
-	movel #port_interrupt_4, SCC4_VECTOR
-	movel #timer_interrupt, TIMER_IRQ * 4
-
-	movel #0x78000000, CIMR		// only SCCx IRQs from CPM
-	movew #(TIMER_IRQ_LEVEL << 8) + TIMER_IRQ, PICR	// interrupt from PIT
-	movew #PITR_CONST, PITR
-
-	// SCC1=SCCa SCC2=SCCb SCC3=SCCc SCC4=SCCd prio=4 HP=-1 IRQ=64-79
-	movel #0xD41F40 + (CPM_IRQ_LEVEL << 13), CICR
-	movel #0x543, PLX_DMA_0_MODE	// 32-bit, Ready, Burst, IRQ
-	movel #0x543, PLX_DMA_1_MODE
-	movel #0x0, PLX_DMA_0_DESC	// from PCI to local
-	movel #0x8, PLX_DMA_1_DESC	// from local to PCI
-	movel #0x101, PLX_DMA_CMD_STS	// enable both DMA channels
-	// enable local IRQ, DMA, doorbells and PCI IRQ
-	orl #0x000F0300, PLX_INTERRUPT_CS
-
-#if DETECT_RAM
-	bsr ram_test
-#else
-	movel #1, PLX_MAILBOX_5		// non-zero value = init complete
-#endif
-	bsr check_csr
-
-	movew #0xFFFF, PAPAR		// all pins are clocks/data
-	clrw PADIR			// first function
-	clrw PCSO			// CD and CTS always active
-
-
-/****************************** main loop *****************************/
-
-main:	movel channel_stats, %d7	// D7 = doorbell + irq status
-	clrl channel_stats
-
-	tstl %d7
-	bne main_1
-	// nothing to do - wait for next event
-	stop #0x2200			// supervisor + IRQ level 2
-	movew #0x2700, %sr		// disable IRQs again
-	bra main
-
-main_1:	clrl %d0			// D0 = 4 * port
-	clrl %d6			// D6 = doorbell to host value
-
-main_l: btstl #DOORBELL_TO_CARD_CLOSE_0, %d7
-	beq main_op
-	bclrl #DOORBELL_TO_CARD_OPEN_0, %d7 // in case both bits are set
-	bsr close_port
-main_op:
-	btstl #DOORBELL_TO_CARD_OPEN_0, %d7
-	beq main_cl
-	bsr open_port
-main_cl:
-	btstl #DOORBELL_TO_CARD_TX_0, %d7
-	beq main_txend
-	bsr tx
-main_txend:
-	btstl #TASK_SCC_0, %d7
-	beq main_next
-	bsr tx_end
-	bsr rx
-
-main_next:
-	lsrl #1, %d7			// port status for next port
-	addl #4, %d0			// D0 = 4 * next port
-	cmpl #4 * 4, %d0
-	bne main_l
-	movel %d6, PLX_DOORBELL_FROM_CARD // signal the host
-	bra main
-
-
-/****************************** open port *****************************/
-
-open_port:				// D0 = 4 * port, D6 = doorbell to host
-	movel ch_status_addr(%d0), %a0	// A0 = port status address
-	tstl STATUS_OPEN(%a0)
-	bne open_port_ret		// port already open
-	movel #1, STATUS_OPEN(%a0)	// confirm the port is open
-// setup BDs
-	clrl tx_in(%d0)
-	clrl tx_out(%d0)
-	clrl tx_count(%d0)
-	clrl rx_in(%d0)
-
-	movel SICR, %d1			// D1 = clock settings in SICR
-	andl clocking_mask(%d0), %d1
-	cmpl #CLOCK_TXFROMRX, STATUS_CLOCKING(%a0)
-	bne open_port_clock_ext
-	orl clocking_txfromrx(%d0), %d1
-	bra open_port_set_clock
-
-open_port_clock_ext:
-	orl clocking_ext(%d0), %d1
-open_port_set_clock:
-	movel %d1, SICR			// update clock settings in SICR
-
-	orw #STATUS_CABLE_DTR, csr_output(%d0)	// DTR on
-	bsr check_csr			// call with disabled timer interrupt
-
-// Setup TX descriptors
-	movel first_buffer(%d0), %d1	// D1 = starting buffer address
-	movel tx_first_bd(%d0), %a1	// A1 = starting TX BD address
-	movel #TX_BUFFERS - 2, %d2	// D2 = TX_BUFFERS - 1 counter
-	movel #0x18000000, %d3		// D3 = initial TX BD flags: Int + Last
-	cmpl #PARITY_NONE, STATUS_PARITY(%a0)
-	beq open_port_tx_loop
-	bsetl #26, %d3			// TX BD flag: Transmit CRC
-open_port_tx_loop:
-	movel %d3, (%a1)+		// TX flags + length
-	movel %d1, (%a1)+		// buffer address
-	addl #BUFFER_LENGTH, %d1
-	dbfw %d2, open_port_tx_loop
-
-	bsetl #29, %d3			// TX BD flag: Wrap (last BD)
-	movel %d3, (%a1)+		// Final TX flags + length
-	movel %d1, (%a1)+		// buffer address
-
-// Setup RX descriptors			// A1 = starting RX BD address
-	movel #RX_BUFFERS - 2, %d2	// D2 = RX_BUFFERS - 1 counter
-open_port_rx_loop:
-	movel #0x90000000, (%a1)+	// RX flags + length
-	movel %d1, (%a1)+		// buffer address
-	addl #BUFFER_LENGTH, %d1
-	dbfw %d2, open_port_rx_loop
-
-	movel #0xB0000000, (%a1)+	// Final RX flags + length
-	movel %d1, (%a1)+		// buffer address
-
-// Setup port parameters
-	movel scc_base_addr(%d0), %a1	// A1 = SCC_BASE address
-	movel scc_reg_addr(%d0), %a2	// A2 = SCC_REGS address
-
-	movel #0xFFFF, SCC_SCCE(%a2)	// clear status bits
-	movel #0x0000, SCC_SCCM(%a2)	// interrupt mask
-
-	movel tx_first_bd(%d0), %d1
-	movew %d1, SCC_TBASE(%a1)	// D1 = offset of first TxBD
-	addl #TX_BUFFERS * 8, %d1
-	movew %d1, SCC_RBASE(%a1)	// D1 = offset of first RxBD
-	moveb #0x8, SCC_RFCR(%a1)	// Intel mode, 1000
-	moveb #0x8, SCC_TFCR(%a1)
-
-// Parity settings
-	cmpl #PARITY_CRC16_PR1_CCITT, STATUS_PARITY(%a0)
-	bne open_port_parity_1
-	clrw SCC_PSMR(%a2)		// CRC16-CCITT
-	movel #0xF0B8, SCC_C_MASK(%a1)
-	movel #0xFFFF, SCC_C_PRES(%a1)
-	movew #HDLC_MAX_MRU + 2, SCC_MFLR(%a1) // 2 bytes for CRC
-	movew #2, parity_bytes(%d0)
-	bra open_port_2
-
-open_port_parity_1:
-	cmpl #PARITY_CRC32_PR1_CCITT, STATUS_PARITY(%a0)
-	bne open_port_parity_2
-	movew #0x0800, SCC_PSMR(%a2)	// CRC32-CCITT
-	movel #0xDEBB20E3, SCC_C_MASK(%a1)
-	movel #0xFFFFFFFF, SCC_C_PRES(%a1)
-	movew #HDLC_MAX_MRU + 4, SCC_MFLR(%a1) // 4 bytes for CRC
-	movew #4, parity_bytes(%d0)
-	bra open_port_2
-
-open_port_parity_2:
-	cmpl #PARITY_CRC16_PR0_CCITT, STATUS_PARITY(%a0)
-	bne open_port_parity_3
-	clrw SCC_PSMR(%a2)		// CRC16-CCITT preset 0
-	movel #0xF0B8, SCC_C_MASK(%a1)
-	clrl SCC_C_PRES(%a1)
-	movew #HDLC_MAX_MRU + 2, SCC_MFLR(%a1) // 2 bytes for CRC
-	movew #2, parity_bytes(%d0)
-	bra open_port_2
-
-open_port_parity_3:
-	cmpl #PARITY_CRC32_PR0_CCITT, STATUS_PARITY(%a0)
-	bne open_port_parity_4
-	movew #0x0800, SCC_PSMR(%a2)	// CRC32-CCITT preset 0
-	movel #0xDEBB20E3, SCC_C_MASK(%a1)
-	clrl SCC_C_PRES(%a1)
-	movew #HDLC_MAX_MRU + 4, SCC_MFLR(%a1) // 4 bytes for CRC
-	movew #4, parity_bytes(%d0)
-	bra open_port_2
-
-open_port_parity_4:
-	clrw SCC_PSMR(%a2)		// no parity
-	movel #0xF0B8, SCC_C_MASK(%a1)
-	movel #0xFFFF, SCC_C_PRES(%a1)
-	movew #HDLC_MAX_MRU, SCC_MFLR(%a1) // 0 bytes for CRC
-	clrw parity_bytes(%d0)
-
-open_port_2:
-	movel #0x00000003, SCC_GSMR_H(%a2) // RTSM
-	cmpl #ENCODING_NRZI, STATUS_ENCODING(%a0)
-	bne open_port_nrz
-	movel #0x10040900, SCC_GSMR_L(%a2) // NRZI: TCI Tend RECN+TENC=1
-	bra open_port_3
-
-open_port_nrz:
-	movel #0x10040000, SCC_GSMR_L(%a2) // NRZ: TCI Tend RECN+TENC=0
-open_port_3:
-	movew #BUFFER_LENGTH, SCC_MRBLR(%a1)
-	movel %d0, %d1
-	lsll #4, %d1			// D1 bits 7 and 6 = port
-	orl #1, %d1
-	movew %d1, CR			// Init SCC RX and TX params
-	wait_for_command
-
-	// TCI Tend ENR ENT
-	movew #0x001F, SCC_SCCM(%a2)	// TXE RXF BSY TXB RXB interrupts
-	orl #0x00000030, SCC_GSMR_L(%a2) // enable SCC
-open_port_ret:
-	rts
-
-
-/****************************** close port ****************************/
-
-close_port:				// D0 = 4 * port, D6 = doorbell to host
-	movel scc_reg_addr(%d0), %a0	// A0 = SCC_REGS address
-	clrw SCC_SCCM(%a0)		// no SCC interrupts
-	andl #0xFFFFFFCF, SCC_GSMR_L(%a0) // Disable ENT and ENR
-
-	andw #~STATUS_CABLE_DTR, csr_output(%d0) // DTR off
-	bsr check_csr			// call with disabled timer interrupt
-
-	movel ch_status_addr(%d0), %d1
-	clrl STATUS_OPEN(%d1)		// confirm the port is closed
-	rts
-
-
-/****************************** transmit packet ***********************/
-// queue packets for transmission
-tx:					// D0 = 4 * port, D6 = doorbell to host
-	cmpl #TX_BUFFERS, tx_count(%d0)
-	beq tx_ret			// all DB's = descs in use
-
-	movel tx_out(%d0), %d1
-	movel %d1, %d2			// D1 = D2 = tx_out BD# = desc#
-	mulul #DESC_LENGTH, %d2		// D2 = TX desc offset
-	addl ch_status_addr(%d0), %d2
-	addl #STATUS_TX_DESCS, %d2	// D2 = TX desc address
-	cmpl #PACKET_FULL, (%d2)	// desc status
-	bne tx_ret
-
-// queue it
-	movel 4(%d2), %a0		// PCI address
-	lsll #3, %d1			// BD is 8-bytes long
-	addl tx_first_bd(%d0), %d1	// D1 = current tx_out BD addr
-
-	movel 4(%d1), %a1		// A1 = dest address
-	movel 8(%d2), %d2		// D2 = length
-	movew %d2, 2(%d1)		// length into BD
-	memcpy_from_pci %a0, %a1, %d2
-	bsetl #31, (%d1)		// CP go ahead
-
-// update tx_out and tx_count
-	movel tx_out(%d0), %d1
-	addl #1, %d1
-	cmpl #TX_BUFFERS, %d1
-	bne tx_1
-	clrl %d1
-tx_1:	movel %d1, tx_out(%d0)
-
-	addl #1, tx_count(%d0)
-	bra tx
-
-tx_ret: rts
-
-
-/****************************** packet received ***********************/
-
-// Service receive buffers		// D0 = 4 * port, D6 = doorbell to host
-rx:	movel rx_in(%d0), %d1		// D1 = rx_in BD#
-	lsll #3, %d1			// BD is 8-bytes long
-	addl rx_first_bd(%d0), %d1	// D1 = current rx_in BD address
-	movew (%d1), %d2		// D2 = RX BD flags
-	btstl #15, %d2
-	bne rx_ret			// BD still empty
-
-	btstl #1, %d2
-	bne rx_overrun
-
-	tstw parity_bytes(%d0)
-	bne rx_parity
-	bclrl #2, %d2			// do not test for CRC errors
-rx_parity:
-	andw #0x0CBC, %d2		// mask status bits
-	cmpw #0x0C00, %d2		// correct frame
-	bne rx_bad_frame
-	clrl %d3
-	movew 2(%d1), %d3
-	subw parity_bytes(%d0), %d3	// D3 = packet length
-	cmpw #HDLC_MAX_MRU, %d3
-	bgt rx_bad_frame
-
-rx_good_frame:
-	movel rx_out, %d2
-	mulul #DESC_LENGTH, %d2
-	addl rx_descs_addr, %d2		// D2 = RX desc address
-	cmpl #PACKET_EMPTY, (%d2)	// desc stat
-	bne rx_overrun
-
-	movel %d3, 8(%d2)
-	movel 4(%d1), %a0		// A0 = source address
-	movel 4(%d2), %a1
-	tstl %a1
-	beq rx_ignore_data
-	memcpy_to_pci %a0, %a1, %d3
-rx_ignore_data:
-	movel packet_full(%d0), (%d2)	// update desc stat
-
-// update D6 and rx_out
-	bsetl #DOORBELL_FROM_CARD_RX, %d6 // signal host that RX completed
-	movel rx_out, %d2
-	addl #1, %d2
-	cmpl #RX_QUEUE_LENGTH, %d2
-	bne rx_1
-	clrl %d2
-rx_1:	movel %d2, rx_out
-
-rx_free_bd:
-	andw #0xF000, (%d1)		// clear CM and error bits
-	bsetl #31, (%d1)		// free BD
-// update rx_in
-	movel rx_in(%d0), %d1
-	addl #1, %d1
-	cmpl #RX_BUFFERS, %d1
-	bne rx_2
-	clrl %d1
-rx_2:	movel %d1, rx_in(%d0)
-	bra rx
-
-rx_overrun:
-	movel ch_status_addr(%d0), %d2
-	addl #1, STATUS_RX_OVERRUNS(%d2)
-	bra rx_free_bd
-
-rx_bad_frame:
-	movel ch_status_addr(%d0), %d2
-	addl #1, STATUS_RX_FRAME_ERRORS(%d2)
-	bra rx_free_bd
-
-rx_ret: rts
-
-
-/****************************** packet transmitted ********************/
-
-// Service transmit buffers		// D0 = 4 * port, D6 = doorbell to host
-tx_end:	tstl tx_count(%d0)
-	beq tx_end_ret			// TX buffers already empty
-
-	movel tx_in(%d0), %d1
-	movel %d1, %d2			// D1 = D2 = tx_in BD# = desc#
-	lsll #3, %d1			// BD is 8-bytes long
-	addl tx_first_bd(%d0), %d1	// D1 = current tx_in BD address
-	movew (%d1), %d3		// D3 = TX BD flags
-	btstl #15, %d3
-	bne tx_end_ret			// BD still being transmitted
-
-// update D6, tx_in and tx_count
-	orl bell_tx(%d0), %d6		// signal host that TX desc freed
-	subl #1, tx_count(%d0)
-	movel tx_in(%d0), %d1
-	addl #1, %d1
-	cmpl #TX_BUFFERS, %d1
-	bne tx_end_1
-	clrl %d1
-tx_end_1:
-	movel %d1, tx_in(%d0)
-
-// free host's descriptor
-	mulul #DESC_LENGTH, %d2		// D2 = TX desc offset
-	addl ch_status_addr(%d0), %d2
-	addl #STATUS_TX_DESCS, %d2	// D2 = TX desc address
-	btstl #1, %d3
-	bne tx_end_underrun
-	movel #PACKET_SENT, (%d2)
-	bra tx_end
-
-tx_end_underrun:
-	movel #PACKET_UNDERRUN, (%d2)
-	bra tx_end
-
-tx_end_ret: rts
-
-
-/****************************** PLX PCI9060 DMA memcpy ****************/
-
-#if QUICC_MEMCPY_USES_PLX
-// called with interrupts disabled
-memcpy_from_pci_run:
-	movel %d0, -(%sp)
-	movew %sr, -(%sp)
-memcpy_1:
-	movel PLX_DMA_CMD_STS, %d0	// do not btst PLX register directly
-	btstl #4, %d0			// transfer done?
-	bne memcpy_end
-	stop #0x2200			// enable PCI9060 interrupts
-	movew #0x2700, %sr		// disable interrupts again
-	bra memcpy_1
-
-memcpy_to_pci_run:
-	movel %d0, -(%sp)
-	movew %sr, -(%sp)
-memcpy_2:
-	movel PLX_DMA_CMD_STS, %d0	// do not btst PLX register directly
-	btstl #12, %d0			// transfer done?
-	bne memcpy_end
-	stop #0x2200			// enable PCI9060 interrupts
-	movew #0x2700, %sr		// disable interrupts again
-	bra memcpy_2
-
-memcpy_end:
-	movew (%sp)+, %sr
-	movel (%sp)+, %d0
-	rts
-#endif
-
-
-
-
-
-
-/****************************** PLX PCI9060 interrupt *****************/
-
-pci9060_interrupt:
-	movel %d0, -(%sp)
-
-	movel PLX_DOORBELL_TO_CARD, %d0
-	movel %d0, PLX_DOORBELL_TO_CARD	// confirm all requests
-	orl %d0, channel_stats
-
-	movel #0x0909, PLX_DMA_CMD_STS	// clear DMA ch #0 and #1 interrupts
-
-	movel (%sp)+, %d0
-	rte
-
-/****************************** SCC interrupts ************************/
-
-port_interrupt_1:
-	orl #0, SCC1_REGS + SCC_SCCE; // confirm SCC events
-	orl #1 << TASK_SCC_0, channel_stats
-	movel #0x40000000, CISR
-	rte
-
-port_interrupt_2:
-	orl #0, SCC2_REGS + SCC_SCCE; // confirm SCC events
-	orl #1 << TASK_SCC_1, channel_stats
-	movel #0x20000000, CISR
-	rte
-
-port_interrupt_3:
-	orl #0, SCC3_REGS + SCC_SCCE; // confirm SCC events
-	orl #1 << TASK_SCC_2, channel_stats
-	movel #0x10000000, CISR
-	rte
-
-port_interrupt_4:
-	orl #0, SCC4_REGS + SCC_SCCE; // confirm SCC events
-	orl #1 << TASK_SCC_3, channel_stats
-	movel #0x08000000, CISR
-	rte
-
-error_interrupt:
-	rte
-
-
-/****************************** cable and PM routine ******************/
-// modified registers: none
-check_csr:
-	movel %d0, -(%sp)
-	movel %d1, -(%sp)
-	movel %d2, -(%sp)
-	movel %a0, -(%sp)
-	movel %a1, -(%sp)
-
-	clrl %d0			// D0 = 4 * port
-	movel #CSRA, %a0		// A0 = CSR address
-
-check_csr_loop:
-	movew (%a0), %d1		// D1 = CSR input bits
-	andl #0xE7, %d1			// PM and cable sense bits (no DCE bit)
-	cmpw #STATUS_CABLE_V35 * (1 + 1 << STATUS_CABLE_PM_SHIFT), %d1
-	bne check_csr_1
-	movew #0x0E08, %d1
-	bra check_csr_valid
-
-check_csr_1:
-	cmpw #STATUS_CABLE_X21 * (1 + 1 << STATUS_CABLE_PM_SHIFT), %d1
-	bne check_csr_2
-	movew #0x0408, %d1
-	bra check_csr_valid
-
-check_csr_2:
-	cmpw #STATUS_CABLE_V24 * (1 + 1 << STATUS_CABLE_PM_SHIFT), %d1
-	bne check_csr_3
-	movew #0x0208, %d1
-	bra check_csr_valid
-
-check_csr_3:
-	cmpw #STATUS_CABLE_EIA530 * (1 + 1 << STATUS_CABLE_PM_SHIFT), %d1
-	bne check_csr_disable
-	movew #0x0D08, %d1
-	bra check_csr_valid
-
-check_csr_disable:
-	movew #0x0008, %d1		// D1 = disable everything
-	movew #0x80E7, %d2		// D2 = input mask: ignore DSR
-	bra check_csr_write
-
-check_csr_valid:			// D1 = mode and IRQ bits
-	movew csr_output(%d0), %d2
-	andw #0x3000, %d2		// D2 = requested LL and DTR bits
-	orw %d2, %d1			// D1 = all requested output bits
-	movew #0x80FF, %d2		// D2 = input mask: include DSR
-
-check_csr_write:
-	cmpw old_csr_output(%d0), %d1
-	beq check_csr_input
-	movew %d1, old_csr_output(%d0)
-	movew %d1, (%a0)		// Write CSR output bits
-
-check_csr_input:
-	movew (PCDAT), %d1
-	andw dcd_mask(%d0), %d1
-	beq check_csr_dcd_on		// DCD and CTS signals are negated
-	movew (%a0), %d1		// D1 = CSR input bits
-	andw #~STATUS_CABLE_DCD, %d1	// DCD off
-	bra check_csr_previous
-
-check_csr_dcd_on:
-	movew (%a0), %d1		// D1 = CSR input bits
-	orw #STATUS_CABLE_DCD, %d1	// DCD on
-check_csr_previous:
-	andw %d2, %d1			// input mask
-	movel ch_status_addr(%d0), %a1
-	cmpl STATUS_CABLE(%a1), %d1	// check for change
-	beq check_csr_next
-	movel %d1, STATUS_CABLE(%a1)	// update status
-	movel bell_cable(%d0), PLX_DOORBELL_FROM_CARD	// signal the host
-
-check_csr_next:
-	addl #2, %a0			// next CSR register
-	addl #4, %d0			// D0 = 4 * next port
-	cmpl #4 * 4, %d0
-	bne check_csr_loop
-
-	movel (%sp)+, %a1
-	movel (%sp)+, %a0
-	movel (%sp)+, %d2
-	movel (%sp)+, %d1
-	movel (%sp)+, %d0
-	rts
-
-
-/****************************** timer interrupt ***********************/
-
-timer_interrupt:
-	bsr check_csr
-	rte
-
-
-/****************************** RAM sizing and test *******************/
-#if DETECT_RAM
-ram_test:
-	movel #0x12345678, %d1		// D1 = test value
-	movel %d1, (128 * 1024 - 4)
-	movel #128 * 1024, %d0		// D0 = RAM size tested
-ram_test_size:
-	cmpl #MAX_RAM_SIZE, %d0
-	beq ram_test_size_found
-	movel %d0, %a0
-	addl #128 * 1024 - 4, %a0
-	cmpl (%a0), %d1
-	beq ram_test_size_check
-ram_test_next_size:
-	lsll #1, %d0
-	bra ram_test_size
-
-ram_test_size_check:
-	eorl #0xFFFFFFFF, %d1
-	movel %d1, (128 * 1024 - 4)
-	cmpl (%a0), %d1
-	bne ram_test_next_size
-
-ram_test_size_found:			// D0 = RAM size
-	movel %d0, %a0			// A0 = fill ptr
-	subl #firmware_end + 4, %d0
-	lsrl #2, %d0
-	movel %d0, %d1			// D1 = DBf counter
-ram_test_fill:
-	movel %a0, -(%a0)
-	dbfw %d1, ram_test_fill
-	subl #0x10000, %d1
-	cmpl #0xFFFFFFFF, %d1
-	bne ram_test_fill
-
-ram_test_loop:				// D0 = DBf counter
-	cmpl (%a0)+, %a0
-	dbnew %d0, ram_test_loop
-	bne ram_test_found_bad
-	subl #0x10000, %d0
-	cmpl #0xFFFFFFFF, %d0
-	bne ram_test_loop
-	bra ram_test_all_ok
-
-ram_test_found_bad:
-	subl #4, %a0
-ram_test_all_ok:
-	movel %a0, PLX_MAILBOX_5
-	rts
-#endif
-
-
-/****************************** constants *****************************/
-
-scc_reg_addr:
-	.long SCC1_REGS, SCC2_REGS, SCC3_REGS, SCC4_REGS
-scc_base_addr:
-	.long SCC1_BASE, SCC2_BASE, SCC3_BASE, SCC4_BASE
-
-tx_first_bd:
-	.long DPRBASE
-	.long DPRBASE + (TX_BUFFERS + RX_BUFFERS) * 8
-	.long DPRBASE + (TX_BUFFERS + RX_BUFFERS) * 8 * 2
-	.long DPRBASE + (TX_BUFFERS + RX_BUFFERS) * 8 * 3
-
-rx_first_bd:
-	.long DPRBASE + TX_BUFFERS * 8
-	.long DPRBASE + TX_BUFFERS * 8 + (TX_BUFFERS + RX_BUFFERS) * 8
-	.long DPRBASE + TX_BUFFERS * 8 + (TX_BUFFERS + RX_BUFFERS) * 8 * 2
-	.long DPRBASE + TX_BUFFERS * 8 + (TX_BUFFERS + RX_BUFFERS) * 8 * 3
-
-first_buffer:
-	.long BUFFERS_ADDR
-	.long BUFFERS_ADDR + (TX_BUFFERS + RX_BUFFERS) * BUFFER_LENGTH
-	.long BUFFERS_ADDR + (TX_BUFFERS + RX_BUFFERS) * BUFFER_LENGTH * 2
-	.long BUFFERS_ADDR + (TX_BUFFERS + RX_BUFFERS) * BUFFER_LENGTH * 3
-
-bell_tx:
-	.long 1 << DOORBELL_FROM_CARD_TX_0, 1 << DOORBELL_FROM_CARD_TX_1
-	.long 1 << DOORBELL_FROM_CARD_TX_2, 1 << DOORBELL_FROM_CARD_TX_3
-
-bell_cable:
-	.long 1 << DOORBELL_FROM_CARD_CABLE_0, 1 << DOORBELL_FROM_CARD_CABLE_1
-	.long 1 << DOORBELL_FROM_CARD_CABLE_2, 1 << DOORBELL_FROM_CARD_CABLE_3
-
-packet_full:
-	.long PACKET_FULL, PACKET_FULL + 1, PACKET_FULL + 2, PACKET_FULL + 3
-
-clocking_ext:
-	.long 0x0000002C, 0x00003E00, 0x002C0000, 0x3E000000
-clocking_txfromrx:
-	.long 0x0000002D, 0x00003F00, 0x002D0000, 0x3F000000
-clocking_mask:
-	.long 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000
-dcd_mask:
-	.word 0x020, 0, 0x080, 0, 0x200, 0, 0x800
-
-	.ascii "wanXL firmware\n"
-	.asciz "Copyright (C) 2003 Krzysztof Halasa <khc@pm.waw.pl>\n"
-
-
-/****************************** variables *****************************/
-
-		.align 4
-channel_stats:	.long 0
-
-tx_in:		.long 0, 0, 0, 0	// transmitted
-tx_out:		.long 0, 0, 0, 0	// received from host for transmission
-tx_count:	.long 0, 0, 0, 0	// currently in transmit queue
-
-rx_in:		.long 0, 0, 0, 0	// received from port
-rx_out:		.long 0			// transmitted to host
-parity_bytes:	.word 0, 0, 0, 0, 0, 0, 0 // only 4 words are used
-
-csr_output:	.word 0
-old_csr_output:	.word 0, 0, 0, 0, 0, 0, 0
-		.align 4
-firmware_end:				// must be dword-aligned
diff --git a/drivers/scsi/arm/acornscsi-io.S b/drivers/scsi/arm/acornscsi-io.S
deleted file mode 100644
index fdd7237bb8291f1e3cc0b4bc145c817c543c7721..0000000000000000000000000000000000000000
--- a/drivers/scsi/arm/acornscsi-io.S
+++ /dev/null
@@ -1,135 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  linux/drivers/acorn/scsi/acornscsi-io.S: Acorn SCSI card IO
- */
-#include <linux/linkage.h>
-
-#include <asm/assembler.h>
-#include <mach/hardware.h>
-
-#if defined(__APCS_32__)
-#define LOADREGS(t,r,l...)	ldm##t	r, l
-#elif defined(__APCS_26__)
-#define LOADREGS(t,r,l...)	ldm##t	r, l##^
-#endif
-
-@ Purpose: transfer a block of data from the acorn scsi card to memory
-@ Proto  : void acornscsi_in(unsigned int addr_start, char *buffer, int length)
-@ Returns: nothing
-
-		.align
-ENTRY(__acornscsi_in)
-		stmfd	sp!, {r4 - r7, lr}
-		bic	r0, r0, #3
-		mov	lr, #0xff
-		orr	lr, lr, #0xff00
-acornscsi_in16lp:
-		subs	r2, r2, #16
-		bmi	acornscsi_in8
-		ldmia	r0!, {r3, r4, r5, r6}
-		and	r3, r3, lr
-		orr	r3, r3, r4, lsl #16
-		and 	r4, r5, lr
-		orr	r4, r4, r6, lsl #16
-		ldmia	r0!, {r5, r6, r7, ip}
-		and	r5, r5, lr
-		orr	r5, r5, r6, lsl #16
-		and	r6, r7, lr
-		orr	r6, r6, ip, lsl #16
-		stmia	r1!, {r3 - r6}
-		bne	acornscsi_in16lp
-		LOADREGS(fd, sp!, {r4 - r7, pc})
-
-acornscsi_in8:	adds	r2, r2, #8
-		bmi	acornscsi_in4
-		ldmia	r0!, {r3, r4, r5, r6}
-		and	r3, r3, lr
-		orr	r3, r3, r4, lsl #16
-		and	r4, r5, lr
-		orr	r4, r4, r6, lsl #16
-		stmia	r1!, {r3 - r4}
-		LOADREGS(eqfd, sp!, {r4 - r7, pc})
-		sub	r2, r2, #8
-
-acornscsi_in4:	adds	r2, r2, #4
-		bmi	acornscsi_in2
-		ldmia	r0!, {r3, r4}
-		and	r3, r3, lr
-		orr	r3, r3, r4, lsl #16
-		str	r3, [r1], #4
-		LOADREGS(eqfd, sp!, {r4 - r7, pc})
-		sub	r2, r2, #4
-
-acornscsi_in2:	adds	r2, r2, #2
-		ldr	r3, [r0], #4
-		and	r3, r3, lr
-		strb	r3, [r1], #1
-		mov	r3, r3, lsr #8
-		strplb	r3, [r1], #1
-		LOADREGS(fd, sp!, {r4 - r7, pc})
-
-@ Purpose: transfer a block of data from memory to the acorn scsi card
-@ Proto  : void acornscsi_in(unsigned int addr_start, char *buffer, int length)
-@ Returns: nothing
-
-ENTRY(__acornscsi_out)
-		stmfd	sp!, {r4 - r6, lr}
-		bic	r0, r0, #3
-acornscsi_out16lp:
-		subs	r2, r2, #16
-		bmi	acornscsi_out8
-		ldmia	r1!, {r4, r6, ip, lr}
-		mov	r3, r4, lsl #16
-		orr	r3, r3, r3, lsr #16
-		mov	r4, r4, lsr #16
-		orr	r4, r4, r4, lsl #16
-		mov	r5, r6, lsl #16
-		orr	r5, r5, r5, lsr #16
-		mov	r6, r6, lsr #16
-		orr	r6, r6, r6, lsl #16
-		stmia	r0!, {r3, r4, r5, r6}
-		mov	r3, ip, lsl #16
-		orr	r3, r3, r3, lsr #16
-		mov	r4, ip, lsr #16
-		orr	r4, r4, r4, lsl #16
-		mov	ip, lr, lsl #16
-		orr	ip, ip, ip, lsr #16
-		mov	lr, lr, lsr #16
-		orr	lr, lr, lr, lsl #16
-		stmia	r0!, {r3, r4, ip, lr}
-		bne	acornscsi_out16lp
-		LOADREGS(fd, sp!, {r4 - r6, pc})
-
-acornscsi_out8:	adds	r2, r2, #8
-		bmi	acornscsi_out4
-		ldmia	r1!, {r4, r6}
-		mov	r3, r4, lsl #16
-		orr	r3, r3, r3, lsr #16
-		mov	r4, r4, lsr #16
-		orr	r4, r4, r4, lsl #16
-		mov	r5, r6, lsl #16
-		orr	r5, r5, r5, lsr #16
-		mov	r6, r6, lsr #16
-		orr	r6, r6, r6, lsl #16
-		stmia	r0!, {r3, r4, r5, r6}
-		LOADREGS(eqfd, sp!, {r4 - r6, pc})
-
-		sub	r2, r2, #8
-acornscsi_out4:	adds	r2, r2, #4
-		bmi	acornscsi_out2
-		ldr	r4, [r1], #4
-		mov	r3, r4, lsl #16
-		orr	r3, r3, r3, lsr #16
-		mov	r4, r4, lsr #16
-		orr	r4, r4, r4, lsl #16
-		stmia	r0!, {r3, r4}
-		LOADREGS(eqfd, sp!, {r4 - r6, pc})
-
-		sub	r2, r2, #4
-acornscsi_out2:	adds	r2, r2, #2
-		ldr	r3, [r1], #2
-		strb	r3, [r0], #1
-		mov	r3, r3, lsr #8
-		strplb	r3, [r0], #1
-		LOADREGS(fd, sp!, {r4 - r6, pc})
-
diff --git a/drivers/scsi/mpt3sas/GPL_license.txt b/drivers/scsi/mpt3sas/GPL_license.txt
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/Kconfig b/drivers/scsi/mpt3sas/Kconfig
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/Makefile b/drivers/scsi/mpt3sas/Makefile
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/clean.sh b/drivers/scsi/mpt3sas/clean.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/compile.sh b/drivers/scsi/mpt3sas/compile.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/csmi/csmisas.c b/drivers/scsi/mpt3sas/csmi/csmisas.c
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/csmi/csmisas.h b/drivers/scsi/mpt3sas/csmi/csmisas.h
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/load.sh b/drivers/scsi/mpt3sas/load.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/load_diag_trace_on.sh b/drivers/scsi/mpt3sas/load_diag_trace_on.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2.h b/drivers/scsi/mpt3sas/mpi/mpi2.h
old mode 100755
new mode 100644
index 77f72e7bdda1a7e34c56eee42a30d5c230d07bd6..d33c7b1be5ec785980c82bc87ea662292969a157
--- a/drivers/scsi/mpt3sas/mpi/mpi2.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2.h
@@ -1,1353 +1,1353 @@
-/*
- *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
- *
- *
- *           Name:  mpi2.h
- *          Title:  MPI Message independent structures and definitions
- *                  including System Interface Register Set and
- *                  scatter/gather formats.
- *  Creation Date:  June 21, 2006
- *
- *  mpi2.h Version:  02.00.55
- *
- *  NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
- *        prefix are for use only on MPI v2.5 products, and must not be used
- *        with MPI v2.0 products. Unless otherwise noted, names beginning with
- *        MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products.
- *
- *  Version History
- *  ---------------
- *
- *  Date      Version   Description
- *  --------  --------  ------------------------------------------------------
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  06-04-07  02.00.01  Bumped MPI2_HEADER_VERSION_UNIT.
- *  06-26-07  02.00.02  Bumped MPI2_HEADER_VERSION_UNIT.
- *  08-31-07  02.00.03  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Moved ReplyPostHostIndex register to offset 0x6C of the
- *                      MPI2_SYSTEM_INTERFACE_REGS and modified the define for
- *                      MPI2_REPLY_POST_HOST_INDEX_OFFSET.
- *                      Added union of request descriptors.
- *                      Added union of reply descriptors.
- *  10-31-07  02.00.04  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added define for MPI2_VERSION_02_00.
- *                      Fixed the size of the FunctionDependent5 field in the
- *                      MPI2_DEFAULT_REPLY structure.
- *  12-18-07  02.00.05  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Removed the MPI-defined Fault Codes and extended the
- *                      product specific codes up to 0xEFFF.
- *                      Added a sixth key value for the WriteSequence register
- *                      and changed the flush value to 0x0.
- *                      Added message function codes for Diagnostic Buffer Post
- *                      and Diagnsotic Release.
- *                      New IOCStatus define: MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED
- *                      Moved MPI2_VERSION_UNION from mpi2_ioc.h.
- *  02-29-08  02.00.06  Bumped MPI2_HEADER_VERSION_UNIT.
- *  03-03-08  02.00.07  Bumped MPI2_HEADER_VERSION_UNIT.
- *  05-21-08  02.00.08  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added #defines for marking a reply descriptor as unused.
- *  06-27-08  02.00.09  Bumped MPI2_HEADER_VERSION_UNIT.
- *  10-02-08  02.00.10  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Moved LUN field defines from mpi2_init.h.
- *  01-19-09  02.00.11  Bumped MPI2_HEADER_VERSION_UNIT.
- *  05-06-09  02.00.12  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      In all request and reply descriptors, replaced VF_ID
- *                      field with MSIxIndex field.
- *                      Removed DevHandle field from
- *                      MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR and made those
- *                      bytes reserved.
- *                      Added RAID Accelerator functionality.
- *  07-30-09  02.00.13  Bumped MPI2_HEADER_VERSION_UNIT.
- *  10-28-09  02.00.14  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added MSI-x index mask and shift for Reply Post Host
- *                      Index register.
- *                      Added function code for Host Based Discovery Action.
- *  02-10-10  02.00.15  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added define for MPI2_FUNCTION_PWR_MGMT_CONTROL.
- *                      Added defines for product-specific range of message
- *                      function codes, 0xF0 to 0xFF.
- *  05-12-10  02.00.16  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added alternative defines for the SGE Direction bit.
- *  08-11-10  02.00.17  Bumped MPI2_HEADER_VERSION_UNIT.
- *  11-10-10  02.00.18  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added MPI2_IEEE_SGE_FLAGS_SYSTEMPLBCPI_ADDR define.
- *  02-23-11  02.00.19  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added MPI2_FUNCTION_SEND_HOST_MESSAGE.
- *  03-09-11  02.00.20  Bumped MPI2_HEADER_VERSION_UNIT.
- *  05-25-11  02.00.21  Bumped MPI2_HEADER_VERSION_UNIT.
- *  08-24-11  02.00.22  Bumped MPI2_HEADER_VERSION_UNIT.
- *  11-18-11  02.00.23  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Incorporating additions for MPI v2.5.
- *  02-06-12  02.00.24  Bumped MPI2_HEADER_VERSION_UNIT.
- *  03-29-12  02.00.25  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added Hard Reset delay timings.
- *  07-10-12  02.00.26  Bumped MPI2_HEADER_VERSION_UNIT.
- *  07-26-12  02.00.27  Bumped MPI2_HEADER_VERSION_UNIT.
- *  11-27-12  02.00.28  Bumped MPI2_HEADER_VERSION_UNIT.
- *  12-20-12  02.00.29  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET.
- *  04-09-13  02.00.30  Bumped MPI2_HEADER_VERSION_UNIT.
- *  04-17-13  02.00.31  Bumped MPI2_HEADER_VERSION_UNIT.
- *  08-19-13  02.00.32  Bumped MPI2_HEADER_VERSION_UNIT.
- *  12-05-13  02.00.33  Bumped MPI2_HEADER_VERSION_UNIT.
- *  01-08-14  02.00.34  Bumped MPI2_HEADER_VERSION_UNIT.
- *  06-13-14  02.00.35  Bumped MPI2_HEADER_VERSION_UNIT.
- *  11-18-14  02.00.36  Updated copyright information.
- *                      Bumped MPI2_HEADER_VERSION_UNIT.
- *  03-16-15  02.00.37  Updated for MPI v2.6.
- *                      Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added Scratchpad registers and
- *                      AtomicRequestDescriptorPost register to
- *                      MPI2_SYSTEM_INTERFACE_REGS.
- *                      Added MPI2_DIAG_SBR_RELOAD.
- *                      Added MPI2_IOCSTATUS_INSUFFICIENT_POWER.
- *  03-19-15  02.00.38  Bumped MPI2_HEADER_VERSION_UNIT.
- *  05-25-15  02.00.39  Bumped MPI2_HEADER_VERSION_UNIT
- *  08-25-15  02.00.40  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added V7 HostDiagnostic register defines
- *  12-15-15  02.00.41  Bumped MPI_HEADER_VERSION_UNIT
- *  01-01-16  02.00.42  Bumped MPI_HEADER_VERSION_UNIT
- *  04-05-16  02.00.43  Modified  MPI26_DIAG_BOOT_DEVICE_SELECT defines
- *                      to be unique within first 32 characters.
- *                      Removed AHCI support.
- *                      Removed SOP support.
- *                      Bumped MPI2_HEADER_VERSION_UNIT.
- *  04-10-16  02.00.44  Bumped MPI2_HEADER_VERSION_UNIT.
- *  07-06-16  02.00.45  Bumped MPI2_HEADER_VERSION_UNIT.
- *  09-02-16  02.00.46  Bumped MPI2_HEADER_VERSION_UNIT.
- *  11-23-16  02.00.47  Bumped MPI2_HEADER_VERSION_UNIT.
- *  02-03-17  02.00.48  Bumped MPI2_HEADER_VERSION_UNIT.
- *  06-13-17  02.00.49  Bumped MPI2_HEADER_VERSION_UNIT.
- *  09-29-17  02.00.50  Bumped MPI2_HEADER_VERSION_UNIT.
- *  07-22-18  02.00.51  Added SECURE_BOOT define.
- *                      Bumped MPI2_HEADER_VERSION_UNIT
- *  08-15-18  02.00.52  Bumped MPI2_HEADER_VERSION_UNIT.
- *  08-28-18  02.00.53  Bumped MPI2_HEADER_VERSION_UNITMPI2_HEADER_VERSION_UNIT.
- *                      Added MPI2_IOCSTATUS_FAILURE
- *  12-17-18  02.00.54  Bumped MPI2_HEADER_VERSION_UNIT
- *  06-24-19  02.00.55  Bumped MPI2_HEADER_VERSION_UNIT
- *  08-01-19  02.00.56  Bumped MPI2_HEADER_VERSION_UNIT
- *  10-02-19  02.00.56  Bumped MPI2_HEADER_VERSION_UNIT
- *  --------------------------------------------------------------------------
- */
-
-#ifndef MPI2_H
-#define MPI2_H
-
-
-/*****************************************************************************
-*
-*        MPI Version Definitions
-*
-*****************************************************************************/
-
-#define MPI2_VERSION_MAJOR_MASK             (0xFF00)
-#define MPI2_VERSION_MAJOR_SHIFT            (8)
-#define MPI2_VERSION_MINOR_MASK             (0x00FF)
-#define MPI2_VERSION_MINOR_SHIFT            (0)
-
-/* major version for all MPI v2.x */
-#define MPI2_VERSION_MAJOR                  (0x02)
-
-/* minor version for MPI v2.0 compatible products */
-#define MPI2_VERSION_MINOR                  (0x00)
-#define MPI2_VERSION ((MPI2_VERSION_MAJOR << MPI2_VERSION_MAJOR_SHIFT) |   \
-                                      MPI2_VERSION_MINOR)
-#define MPI2_VERSION_02_00                  (0x0200)
-
-
-/* minor version for MPI v2.5 compatible products */
-#define MPI25_VERSION_MINOR                 (0x05)
-#define MPI25_VERSION ((MPI2_VERSION_MAJOR << MPI2_VERSION_MAJOR_SHIFT) |   \
-                                      MPI25_VERSION_MINOR)
-#define MPI2_VERSION_02_05                  (0x0205)
-
-
-/* minor version for MPI v2.6 compatible products */
-#define MPI26_VERSION_MINOR                 (0x06)
-#define MPI26_VERSION ((MPI2_VERSION_MAJOR << MPI2_VERSION_MAJOR_SHIFT) |   \
-                                      MPI26_VERSION_MINOR)
-#define MPI2_VERSION_02_06                  (0x0206)
-
-
-/* Unit and Dev versioning for this MPI header set */
-#define MPI2_HEADER_VERSION_UNIT            (0x39)
-#define MPI2_HEADER_VERSION_DEV             (0x00)
-#define MPI2_HEADER_VERSION_UNIT_MASK       (0xFF00)
-#define MPI2_HEADER_VERSION_UNIT_SHIFT      (8)
-#define MPI2_HEADER_VERSION_DEV_MASK        (0x00FF)
-#define MPI2_HEADER_VERSION_DEV_SHIFT       (0)
-#define MPI2_HEADER_VERSION ((MPI2_HEADER_VERSION_UNIT << 8) | MPI2_HEADER_VERSION_DEV)
-
-
-/*****************************************************************************
-*
-*        IOC State Definitions
-*
-*****************************************************************************/
-
-#define MPI2_IOC_STATE_RESET               (0x00000000)
-#define MPI2_IOC_STATE_READY               (0x10000000)
-#define MPI2_IOC_STATE_OPERATIONAL         (0x20000000)
-#define MPI2_IOC_STATE_FAULT               (0x40000000)
-#define MPI2_IOC_STATE_COREDUMP            (0x50000000)
-
-#define MPI2_IOC_STATE_MASK                (0xF0000000)
-#define MPI2_IOC_STATE_SHIFT               (28)
-
-/* Fault state range for prodcut specific codes */
-#define MPI2_FAULT_PRODUCT_SPECIFIC_MIN                 (0x0000)
-#define MPI2_FAULT_PRODUCT_SPECIFIC_MAX                 (0xEFFF)
-
-
-/*****************************************************************************
-*
-*        System Interface Register Definitions
-*
-*****************************************************************************/
-
-typedef volatile struct _MPI2_SYSTEM_INTERFACE_REGS
-{
-    U32         Doorbell;                   /* 0x00 */
-    U32         WriteSequence;              /* 0x04 */
-    U32         HostDiagnostic;             /* 0x08 */
-    U32         Reserved1;                  /* 0x0C */
-    U32         DiagRWData;                 /* 0x10 */
-    U32         DiagRWAddressLow;           /* 0x14 */
-    U32         DiagRWAddressHigh;          /* 0x18 */
-    U32         Reserved2[5];               /* 0x1C */
-    U32         HostInterruptStatus;        /* 0x30 */
-    U32         HostInterruptMask;          /* 0x34 */
-    U32         DCRData;                    /* 0x38 */
-    U32         DCRAddress;                 /* 0x3C */
-    U32         Reserved3[2];               /* 0x40 */
-    U32         ReplyFreeHostIndex;         /* 0x48 */
-    U32         Reserved4[8];               /* 0x4C */
-    U32         ReplyPostHostIndex;         /* 0x6C */
-    U32         Reserved5;                  /* 0x70 */
-    U32         HCBSize;                    /* 0x74 */
-    U32         HCBAddressLow;              /* 0x78 */
-    U32         HCBAddressHigh;             /* 0x7C */
-    U32         Reserved6[12];              /* 0x80 */
-    U32         Scratchpad[4];              /* 0xB0 */
-    U32         RequestDescriptorPostLow;   /* 0xC0 */
-    U32         RequestDescriptorPostHigh;  /* 0xC4 */
-    U32         AtomicRequestDescriptorPost;/* 0xC8 */ /* MPI v2.6 and later; reserved in earlier versions */
-    U32         Reserved7[13];              /* 0xCC */
-} MPI2_SYSTEM_INTERFACE_REGS, MPI2_POINTER PTR_MPI2_SYSTEM_INTERFACE_REGS,
-  Mpi2SystemInterfaceRegs_t, MPI2_POINTER pMpi2SystemInterfaceRegs_t;
-
-/*
- * Defines for working with the Doorbell register.
- */
-#define MPI2_DOORBELL_OFFSET                    (0x00000000)
-
-/* IOC --> System values */
-#define MPI2_DOORBELL_USED                      (0x08000000)
-#define MPI2_DOORBELL_WHO_INIT_MASK             (0x07000000)
-#define MPI2_DOORBELL_WHO_INIT_SHIFT            (24)
-#define MPI2_DOORBELL_FAULT_CODE_MASK           (0x0000FFFF)
-#define MPI2_DOORBELL_DATA_MASK                 (0x0000FFFF)
-
-/* System --> IOC values */
-#define MPI2_DOORBELL_FUNCTION_MASK             (0xFF000000)
-#define MPI2_DOORBELL_FUNCTION_SHIFT            (24)
-#define MPI2_DOORBELL_ADD_DWORDS_MASK           (0x00FF0000)
-#define MPI2_DOORBELL_ADD_DWORDS_SHIFT          (16)
-
-
-/*
- * Defines for the WriteSequence register
- */
-#define MPI2_WRITE_SEQUENCE_OFFSET              (0x00000004)
-#define MPI2_WRSEQ_KEY_VALUE_MASK               (0x0000000F)
-#define MPI2_WRSEQ_FLUSH_KEY_VALUE              (0x0)
-#define MPI2_WRSEQ_1ST_KEY_VALUE                (0xF)
-#define MPI2_WRSEQ_2ND_KEY_VALUE                (0x4)
-#define MPI2_WRSEQ_3RD_KEY_VALUE                (0xB)
-#define MPI2_WRSEQ_4TH_KEY_VALUE                (0x2)
-#define MPI2_WRSEQ_5TH_KEY_VALUE                (0x7)
-#define MPI2_WRSEQ_6TH_KEY_VALUE                (0xD)
-
-/*
- * Defines for the HostDiagnostic register
- */
-#define MPI2_HOST_DIAGNOSTIC_OFFSET             (0x00000008)
-
-#define MPI26_DIAG_SECURE_BOOT                  (0x80000000)
-
-#define MPI2_DIAG_SBR_RELOAD                    (0x00002000)
-
-#define MPI2_DIAG_BOOT_DEVICE_SELECT_MASK       (0x00001800)
-#define MPI2_DIAG_BOOT_DEVICE_SELECT_DEFAULT    (0x00000000)
-#define MPI2_DIAG_BOOT_DEVICE_SELECT_HCDW       (0x00000800)
-
-/* Defines for V7A/V7R HostDiagnostic Register */
-#define MPI26_DIAG_BOOT_DEVICE_SEL_64FLASH      (0x00000000)
-#define MPI26_DIAG_BOOT_DEVICE_SEL_64HCDW       (0x00000800)
-#define MPI26_DIAG_BOOT_DEVICE_SEL_32FLASH      (0x00001000)
-#define MPI26_DIAG_BOOT_DEVICE_SEL_32HCDW       (0x00001800)
-
-#define MPI2_DIAG_CLEAR_FLASH_BAD_SIG           (0x00000400)
-#define MPI2_DIAG_FORCE_HCB_ON_RESET            (0x00000200)
-#define MPI2_DIAG_HCB_MODE                      (0x00000100)
-#define MPI2_DIAG_DIAG_WRITE_ENABLE             (0x00000080)
-#define MPI2_DIAG_FLASH_BAD_SIG                 (0x00000040)
-#define MPI2_DIAG_RESET_HISTORY                 (0x00000020)
-#define MPI2_DIAG_DIAG_RW_ENABLE                (0x00000010)
-#define MPI2_DIAG_RESET_ADAPTER                 (0x00000004)
-#define MPI2_DIAG_HOLD_IOC_RESET                (0x00000002)
-
-/*
- * Offsets for DiagRWData and address
- */
-#define MPI2_DIAG_RW_DATA_OFFSET                (0x00000010)
-#define MPI2_DIAG_RW_ADDRESS_LOW_OFFSET         (0x00000014)
-#define MPI2_DIAG_RW_ADDRESS_HIGH_OFFSET        (0x00000018)
-
-/*
- * Defines for the HostInterruptStatus register
- */
-#define MPI2_HOST_INTERRUPT_STATUS_OFFSET       (0x00000030)
-#define MPI2_HIS_SYS2IOC_DB_STATUS              (0x80000000)
-#define MPI2_HIS_IOP_DOORBELL_STATUS            MPI2_HIS_SYS2IOC_DB_STATUS
-#define MPI2_HIS_RESET_IRQ_STATUS               (0x40000000)
-#define MPI2_HIS_REPLY_DESCRIPTOR_INTERRUPT     (0x00000008)
-#define MPI2_HIS_IOC2SYS_DB_STATUS              (0x00000001)
-#define MPI2_HIS_DOORBELL_INTERRUPT             MPI2_HIS_IOC2SYS_DB_STATUS
-
-/*
- * Defines for the HostInterruptMask register
- */
-#define MPI2_HOST_INTERRUPT_MASK_OFFSET         (0x00000034)
-#define MPI2_HIM_RESET_IRQ_MASK                 (0x40000000)
-#define MPI2_HIM_REPLY_INT_MASK                 (0x00000008)
-#define MPI2_HIM_RIM                            MPI2_HIM_REPLY_INT_MASK
-#define MPI2_HIM_IOC2SYS_DB_MASK                (0x00000001)
-#define MPI2_HIM_DIM                            MPI2_HIM_IOC2SYS_DB_MASK
-
-/*
- * Offsets for DCRData and address
- */
-#define MPI2_DCR_DATA_OFFSET                    (0x00000038)
-#define MPI2_DCR_ADDRESS_OFFSET                 (0x0000003C)
-
-/*
- * Offset for the Reply Free Queue
- */
-#define MPI2_REPLY_FREE_HOST_INDEX_OFFSET       (0x00000048)
-
-/*
- * Defines for the Reply Descriptor Post Queue
- */
-#define MPI2_REPLY_POST_HOST_INDEX_OFFSET       (0x0000006C)
-#define MPI2_REPLY_POST_HOST_INDEX_MASK         (0x00FFFFFF)
-#define MPI2_RPHI_MSIX_INDEX_MASK               (0xFF000000)
-#define MPI2_RPHI_MSIX_INDEX_SHIFT              (24)
-#define MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET  (0x0000030C) /* MPI v2.5 only */
-
-
-/*
- * Defines for the HCBSize and address
- */
-#define MPI2_HCB_SIZE_OFFSET                    (0x00000074)
-#define MPI2_HCB_SIZE_SIZE_MASK                 (0xFFFFF000)
-#define MPI2_HCB_SIZE_HCB_ENABLE                (0x00000001)
-
-#define MPI2_HCB_ADDRESS_LOW_OFFSET             (0x00000078)
-#define MPI2_HCB_ADDRESS_HIGH_OFFSET            (0x0000007C)
-
-/*
- * Offsets for the Scratchpad registers
- */
-#define MPI26_SCRATCHPAD0_OFFSET                (0x000000B0)
-#define MPI26_SCRATCHPAD1_OFFSET                (0x000000B4)
-#define MPI26_SCRATCHPAD2_OFFSET                (0x000000B8)
-#define MPI26_SCRATCHPAD3_OFFSET                (0x000000BC)
-
-/*
- * Offsets for the Request Descriptor Post Queue
- */
-#define MPI2_REQUEST_DESCRIPTOR_POST_LOW_OFFSET     (0x000000C0)
-#define MPI2_REQUEST_DESCRIPTOR_POST_HIGH_OFFSET    (0x000000C4)
-#define MPI26_ATOMIC_REQUEST_DESCRIPTOR_POST_OFFSET (0x000000C8)
-
-
-/* Hard Reset delay timings */
-#define MPI2_HARD_RESET_PCIE_FIRST_READ_DELAY_MICRO_SEC     (50000)
-#define MPI2_HARD_RESET_PCIE_RESET_READ_WINDOW_MICRO_SEC    (255000)
-#define MPI2_HARD_RESET_PCIE_SECOND_READ_DELAY_MICRO_SEC    (256000)
-
-/*****************************************************************************
-*
-*        Message Descriptors
-*
-*****************************************************************************/
-
-/* Request Descriptors */
-
-/* Default Request Descriptor */
-typedef struct _MPI2_DEFAULT_REQUEST_DESCRIPTOR
-{
-    U8              RequestFlags;               /* 0x00 */
-    U8              MSIxIndex;                  /* 0x01 */
-    U16             SMID;                       /* 0x02 */
-    U16             LMID;                       /* 0x04 */
-    U16             DescriptorTypeDependent;    /* 0x06 */
-} MPI2_DEFAULT_REQUEST_DESCRIPTOR,
-  MPI2_POINTER PTR_MPI2_DEFAULT_REQUEST_DESCRIPTOR,
-  Mpi2DefaultRequestDescriptor_t, MPI2_POINTER pMpi2DefaultRequestDescriptor_t;
-
-/* defines for the RequestFlags field */
-#define MPI2_REQ_DESCRIPT_FLAGS_TYPE_MASK               (0x1E)
-#define MPI2_REQ_DESCRIPT_FLAGS_TYPE_RSHIFT             (1)    /* use carefully; values below are pre-shifted left */
-#define MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO                 (0x00)
-#define MPI2_REQ_DESCRIPT_FLAGS_SCSI_TARGET             (0x02)
-#define MPI2_REQ_DESCRIPT_FLAGS_HIGH_PRIORITY           (0x06)
-#define MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE            (0x08)
-#define MPI2_REQ_DESCRIPT_FLAGS_RAID_ACCELERATOR        (0x0A)
-#define MPI25_REQ_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO      (0x0C)
-#define MPI26_REQ_DESCRIPT_FLAGS_PCIE_ENCAPSULATED      (0x10)
-
-#define MPI2_REQ_DESCRIPT_FLAGS_IOC_FIFO_MARKER (0x01)
-
-
-/* High Priority Request Descriptor */
-typedef struct _MPI2_HIGH_PRIORITY_REQUEST_DESCRIPTOR
-{
-    U8              RequestFlags;               /* 0x00 */
-    U8              MSIxIndex;                  /* 0x01 */
-    U16             SMID;                       /* 0x02 */
-    U16             LMID;                       /* 0x04 */
-    U16             Reserved1;                  /* 0x06 */
-} MPI2_HIGH_PRIORITY_REQUEST_DESCRIPTOR,
-  MPI2_POINTER PTR_MPI2_HIGH_PRIORITY_REQUEST_DESCRIPTOR,
-  Mpi2HighPriorityRequestDescriptor_t,
-  MPI2_POINTER pMpi2HighPriorityRequestDescriptor_t;
-
-
-/* SCSI IO Request Descriptor */
-typedef struct _MPI2_SCSI_IO_REQUEST_DESCRIPTOR
-{
-    U8              RequestFlags;               /* 0x00 */
-    U8              MSIxIndex;                  /* 0x01 */
-    U16             SMID;                       /* 0x02 */
-    U16             LMID;                       /* 0x04 */
-    U16             DevHandle;                  /* 0x06 */
-} MPI2_SCSI_IO_REQUEST_DESCRIPTOR,
-  MPI2_POINTER PTR_MPI2_SCSI_IO_REQUEST_DESCRIPTOR,
-  Mpi2SCSIIORequestDescriptor_t, MPI2_POINTER pMpi2SCSIIORequestDescriptor_t;
-
-
-/* SCSI Target Request Descriptor */
-typedef struct _MPI2_SCSI_TARGET_REQUEST_DESCRIPTOR
-{
-    U8              RequestFlags;               /* 0x00 */
-    U8              MSIxIndex;                  /* 0x01 */
-    U16             SMID;                       /* 0x02 */
-    U16             LMID;                       /* 0x04 */
-    U16             IoIndex;                    /* 0x06 */
-} MPI2_SCSI_TARGET_REQUEST_DESCRIPTOR,
-  MPI2_POINTER PTR_MPI2_SCSI_TARGET_REQUEST_DESCRIPTOR,
-  Mpi2SCSITargetRequestDescriptor_t,
-  MPI2_POINTER pMpi2SCSITargetRequestDescriptor_t;
-
-
-/* RAID Accelerator Request Descriptor */
-typedef struct _MPI2_RAID_ACCEL_REQUEST_DESCRIPTOR
-{
-    U8              RequestFlags;               /* 0x00 */
-    U8              MSIxIndex;                  /* 0x01 */
-    U16             SMID;                       /* 0x02 */
-    U16             LMID;                       /* 0x04 */
-    U16             Reserved;                   /* 0x06 */
-} MPI2_RAID_ACCEL_REQUEST_DESCRIPTOR,
-  MPI2_POINTER PTR_MPI2_RAID_ACCEL_REQUEST_DESCRIPTOR,
-  Mpi2RAIDAcceleratorRequestDescriptor_t,
-  MPI2_POINTER pMpi2RAIDAcceleratorRequestDescriptor_t;
-
-
-/* Fast Path SCSI IO Request Descriptor */
-typedef MPI2_SCSI_IO_REQUEST_DESCRIPTOR
-    MPI25_FP_SCSI_IO_REQUEST_DESCRIPTOR,
-    MPI2_POINTER PTR_MPI25_FP_SCSI_IO_REQUEST_DESCRIPTOR,
-    Mpi25FastPathSCSIIORequestDescriptor_t,
-    MPI2_POINTER pMpi25FastPathSCSIIORequestDescriptor_t;
-
-
-/* PCIe Encapsulated Request Descriptor */
-typedef MPI2_SCSI_IO_REQUEST_DESCRIPTOR
-    MPI26_PCIE_ENCAPSULATED_REQUEST_DESCRIPTOR,
-    MPI2_POINTER PTR_MPI26_PCIE_ENCAPSULATED_REQUEST_DESCRIPTOR,
-    Mpi26PCIeEncapsulatedRequestDescriptor_t,
-    MPI2_POINTER pMpi26PCIeEncapsulatedRequestDescriptor_t;
-
-
-/* union of Request Descriptors */
-typedef union _MPI2_REQUEST_DESCRIPTOR_UNION
-{
-    MPI2_DEFAULT_REQUEST_DESCRIPTOR             Default;
-    MPI2_HIGH_PRIORITY_REQUEST_DESCRIPTOR       HighPriority;
-    MPI2_SCSI_IO_REQUEST_DESCRIPTOR             SCSIIO;
-    MPI2_SCSI_TARGET_REQUEST_DESCRIPTOR         SCSITarget;
-    MPI2_RAID_ACCEL_REQUEST_DESCRIPTOR          RAIDAccelerator;
-    MPI25_FP_SCSI_IO_REQUEST_DESCRIPTOR         FastPathSCSIIO;
-    MPI26_PCIE_ENCAPSULATED_REQUEST_DESCRIPTOR  PCIeEncapsulated;
-    U64                                         Words;
-} MPI2_REQUEST_DESCRIPTOR_UNION, MPI2_POINTER PTR_MPI2_REQUEST_DESCRIPTOR_UNION,
-  Mpi2RequestDescriptorUnion_t, MPI2_POINTER pMpi2RequestDescriptorUnion_t;
-
-
-/* Atomic Request Descriptors */
-
-/*
- * All Atomic Request Descriptors have the same format, so the following
- * structure is used for all Atomic Request Descriptors:
- *      Atomic Default Request Descriptor
- *      Atomic High Priority Request Descriptor
- *      Atomic SCSI IO Request Descriptor
- *      Atomic SCSI Target Request Descriptor
- *      Atomic RAID Accelerator Request Descriptor
- *      Atomic Fast Path SCSI IO Request Descriptor
- *      Atomic PCIe Encapsulated Request Descriptor
- */
-
-/* Atomic Request Descriptor */
-typedef struct _MPI26_ATOMIC_REQUEST_DESCRIPTOR
-{
-    U8              RequestFlags;               /* 0x00 */
-    U8              MSIxIndex;                  /* 0x01 */
-    U16             SMID;                       /* 0x02 */
-} MPI26_ATOMIC_REQUEST_DESCRIPTOR,
-  MPI2_POINTER PTR_MPI26_ATOMIC_REQUEST_DESCRIPTOR,
-  Mpi26AtomicRequestDescriptor_t, MPI2_POINTER pMpi26AtomicRequestDescriptor_t;
-
-/* for the RequestFlags field, use the same defines as MPI2_DEFAULT_REQUEST_DESCRIPTOR */
-
-
-/* Reply Descriptors */
-
-/* Default Reply Descriptor */
-typedef struct _MPI2_DEFAULT_REPLY_DESCRIPTOR
-{
-    U8              ReplyFlags;                 /* 0x00 */
-    U8              MSIxIndex;                  /* 0x01 */
-    U16             DescriptorTypeDependent1;   /* 0x02 */
-    U32             DescriptorTypeDependent2;   /* 0x04 */
-} MPI2_DEFAULT_REPLY_DESCRIPTOR, MPI2_POINTER PTR_MPI2_DEFAULT_REPLY_DESCRIPTOR,
-  Mpi2DefaultReplyDescriptor_t, MPI2_POINTER pMpi2DefaultReplyDescriptor_t;
-
-/* defines for the ReplyFlags field */
-#define MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK                   (0x0F)
-#define MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS             (0x00)
-#define MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY               (0x01)
-#define MPI2_RPY_DESCRIPT_FLAGS_TARGETASSIST_SUCCESS        (0x02)
-#define MPI2_RPY_DESCRIPT_FLAGS_TARGET_COMMAND_BUFFER       (0x03)
-#define MPI2_RPY_DESCRIPT_FLAGS_RAID_ACCELERATOR_SUCCESS    (0x05)
-#define MPI25_RPY_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO_SUCCESS  (0x06)
-#define MPI26_RPY_DESCRIPT_FLAGS_PCIE_ENCAPSULATED_SUCCESS  (0x08)
-#define MPI2_RPY_DESCRIPT_FLAGS_UNUSED                      (0x0F)
-
-/* values for marking a reply descriptor as unused */
-#define MPI2_RPY_DESCRIPT_UNUSED_WORD0_MARK             (0xFFFFFFFF)
-#define MPI2_RPY_DESCRIPT_UNUSED_WORD1_MARK             (0xFFFFFFFF)
-
-/* Address Reply Descriptor */
-typedef struct _MPI2_ADDRESS_REPLY_DESCRIPTOR
-{
-    U8              ReplyFlags;                 /* 0x00 */
-    U8              MSIxIndex;                  /* 0x01 */
-    U16             SMID;                       /* 0x02 */
-    U32             ReplyFrameAddress;          /* 0x04 */
-} MPI2_ADDRESS_REPLY_DESCRIPTOR, MPI2_POINTER PTR_MPI2_ADDRESS_REPLY_DESCRIPTOR,
-  Mpi2AddressReplyDescriptor_t, MPI2_POINTER pMpi2AddressReplyDescriptor_t;
-
-#define MPI2_ADDRESS_REPLY_SMID_INVALID                 (0x00)
-
-
-/* SCSI IO Success Reply Descriptor */
-typedef struct _MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR
-{
-    U8              ReplyFlags;                 /* 0x00 */
-    U8              MSIxIndex;                  /* 0x01 */
-    U16             SMID;                       /* 0x02 */
-    U16             TaskTag;                    /* 0x04 */
-    U16             Reserved1;                  /* 0x06 */
-} MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR,
-  MPI2_POINTER PTR_MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR,
-  Mpi2SCSIIOSuccessReplyDescriptor_t,
-  MPI2_POINTER pMpi2SCSIIOSuccessReplyDescriptor_t;
-
-
-/* TargetAssist Success Reply Descriptor */
-typedef struct _MPI2_TARGETASSIST_SUCCESS_REPLY_DESCRIPTOR
-{
-    U8              ReplyFlags;                 /* 0x00 */
-    U8              MSIxIndex;                  /* 0x01 */
-    U16             SMID;                       /* 0x02 */
-    U8              SequenceNumber;             /* 0x04 */
-    U8              Reserved1;                  /* 0x05 */
-    U16             IoIndex;                    /* 0x06 */
-} MPI2_TARGETASSIST_SUCCESS_REPLY_DESCRIPTOR,
-  MPI2_POINTER PTR_MPI2_TARGETASSIST_SUCCESS_REPLY_DESCRIPTOR,
-  Mpi2TargetAssistSuccessReplyDescriptor_t,
-  MPI2_POINTER pMpi2TargetAssistSuccessReplyDescriptor_t;
-
-
-/* Target Command Buffer Reply Descriptor */
-typedef struct _MPI2_TARGET_COMMAND_BUFFER_REPLY_DESCRIPTOR
-{
-    U8              ReplyFlags;                 /* 0x00 */
-    U8              MSIxIndex;                  /* 0x01 */
-    U8              VP_ID;                      /* 0x02 */
-    U8              Flags;                      /* 0x03 */
-    U16             InitiatorDevHandle;         /* 0x04 */
-    U16             IoIndex;                    /* 0x06 */
-} MPI2_TARGET_COMMAND_BUFFER_REPLY_DESCRIPTOR,
-  MPI2_POINTER PTR_MPI2_TARGET_COMMAND_BUFFER_REPLY_DESCRIPTOR,
-  Mpi2TargetCommandBufferReplyDescriptor_t,
-  MPI2_POINTER pMpi2TargetCommandBufferReplyDescriptor_t;
-
-/* defines for Flags field */
-#define MPI2_RPY_DESCRIPT_TCB_FLAGS_PHYNUM_MASK     (0x3F)
-
-
-/* RAID Accelerator Success Reply Descriptor */
-typedef struct _MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR
-{
-    U8              ReplyFlags;                 /* 0x00 */
-    U8              MSIxIndex;                  /* 0x01 */
-    U16             SMID;                       /* 0x02 */
-    U32             Reserved;                   /* 0x04 */
-} MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR,
-  MPI2_POINTER PTR_MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR,
-  Mpi2RAIDAcceleratorSuccessReplyDescriptor_t,
-  MPI2_POINTER pMpi2RAIDAcceleratorSuccessReplyDescriptor_t;
-
-
-/* Fast Path SCSI IO Success Reply Descriptor */
-typedef MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR
-    MPI25_FP_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR,
-    MPI2_POINTER PTR_MPI25_FP_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR,
-    Mpi25FastPathSCSIIOSuccessReplyDescriptor_t,
-    MPI2_POINTER pMpi25FastPathSCSIIOSuccessReplyDescriptor_t;
-
-
-/* PCIe Encapsulated Success Reply Descriptor */
-typedef MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR
-    MPI26_PCIE_ENCAPSULATED_SUCCESS_REPLY_DESCRIPTOR,
-    MPI2_POINTER PTR_MPI26_PCIE_ENCAPSULATED_SUCCESS_REPLY_DESCRIPTOR,
-    Mpi26PCIeEncapsulatedSuccessReplyDescriptor_t,
-    MPI2_POINTER pMpi26PCIeEncapsulatedSuccessReplyDescriptor_t;
-
-
-/* union of Reply Descriptors */
-typedef union _MPI2_REPLY_DESCRIPTORS_UNION
-{
-    MPI2_DEFAULT_REPLY_DESCRIPTOR                   Default;
-    MPI2_ADDRESS_REPLY_DESCRIPTOR                   AddressReply;
-    MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR           SCSIIOSuccess;
-    MPI2_TARGETASSIST_SUCCESS_REPLY_DESCRIPTOR      TargetAssistSuccess;
-    MPI2_TARGET_COMMAND_BUFFER_REPLY_DESCRIPTOR     TargetCommandBuffer;
-    MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR  RAIDAcceleratorSuccess;
-    MPI25_FP_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR       FastPathSCSIIOSuccess;
-    MPI26_PCIE_ENCAPSULATED_SUCCESS_REPLY_DESCRIPTOR    PCIeEncapsulatedSuccess;
-    U64                                             Words;
-} MPI2_REPLY_DESCRIPTORS_UNION, MPI2_POINTER PTR_MPI2_REPLY_DESCRIPTORS_UNION,
-  Mpi2ReplyDescriptorsUnion_t, MPI2_POINTER pMpi2ReplyDescriptorsUnion_t;
-
-
-
-/*****************************************************************************
-*
-*        Message Functions
-*
-*****************************************************************************/
-
-#define MPI2_FUNCTION_SCSI_IO_REQUEST               (0x00) /* SCSI IO */
-#define MPI2_FUNCTION_SCSI_TASK_MGMT                (0x01) /* SCSI Task Management */
-#define MPI2_FUNCTION_IOC_INIT                      (0x02) /* IOC Init */
-#define MPI2_FUNCTION_IOC_FACTS                     (0x03) /* IOC Facts */
-#define MPI2_FUNCTION_CONFIG                        (0x04) /* Configuration */
-#define MPI2_FUNCTION_PORT_FACTS                    (0x05) /* Port Facts */
-#define MPI2_FUNCTION_PORT_ENABLE                   (0x06) /* Port Enable */
-#define MPI2_FUNCTION_EVENT_NOTIFICATION            (0x07) /* Event Notification */
-#define MPI2_FUNCTION_EVENT_ACK                     (0x08) /* Event Acknowledge */
-#define MPI2_FUNCTION_FW_DOWNLOAD                   (0x09) /* FW Download */
-#define MPI2_FUNCTION_TARGET_ASSIST                 (0x0B) /* Target Assist */
-#define MPI2_FUNCTION_TARGET_STATUS_SEND            (0x0C) /* Target Status Send */
-#define MPI2_FUNCTION_TARGET_MODE_ABORT             (0x0D) /* Target Mode Abort */
-#define MPI2_FUNCTION_FW_UPLOAD                     (0x12) /* FW Upload */
-#define MPI2_FUNCTION_RAID_ACTION                   (0x15) /* RAID Action */
-#define MPI2_FUNCTION_RAID_SCSI_IO_PASSTHROUGH      (0x16) /* SCSI IO RAID Passthrough */
-#define MPI2_FUNCTION_TOOLBOX                       (0x17) /* Toolbox */
-#define MPI2_FUNCTION_SCSI_ENCLOSURE_PROCESSOR      (0x18) /* SCSI Enclosure Processor */
-#define MPI2_FUNCTION_SMP_PASSTHROUGH               (0x1A) /* SMP Passthrough */
-#define MPI2_FUNCTION_SAS_IO_UNIT_CONTROL           (0x1B) /* SAS IO Unit Control */ /* for MPI v2.5 and earlier */
-#define MPI2_FUNCTION_IO_UNIT_CONTROL               (0x1B) /* IO Unit Control */     /* for MPI v2.6 and later */
-#define MPI2_FUNCTION_SATA_PASSTHROUGH              (0x1C) /* SATA Passthrough */
-#define MPI2_FUNCTION_DIAG_BUFFER_POST              (0x1D) /* Diagnostic Buffer Post */
-#define MPI2_FUNCTION_DIAG_RELEASE                  (0x1E) /* Diagnostic Release */
-#define MPI2_FUNCTION_TARGET_CMD_BUF_BASE_POST      (0x24) /* Target Command Buffer Post Base */
-#define MPI2_FUNCTION_TARGET_CMD_BUF_LIST_POST      (0x25) /* Target Command Buffer Post List */
-#define MPI2_FUNCTION_RAID_ACCELERATOR              (0x2C) /* RAID Accelerator */
-#define MPI2_FUNCTION_HOST_BASED_DISCOVERY_ACTION   (0x2F) /* Host Based Discovery Action */
-#define MPI2_FUNCTION_PWR_MGMT_CONTROL              (0x30) /* Power Management Control */
-#define MPI2_FUNCTION_SEND_HOST_MESSAGE             (0x31) /* Send Host Message */
-#define MPI2_FUNCTION_NVME_ENCAPSULATED             (0x33) /* NVMe Encapsulated (MPI v2.6) */
-#define MPI2_FUNCTION_MIN_PRODUCT_SPECIFIC          (0xF0) /* beginning of product-specific range */
-#define MPI2_FUNCTION_MAX_PRODUCT_SPECIFIC          (0xFF) /* end of product-specific range */
-
-
-
-/* Doorbell functions */
-#define MPI2_FUNCTION_IOC_MESSAGE_UNIT_RESET        (0x40)
-#define MPI2_FUNCTION_HANDSHAKE                     (0x42)
-
-
-/*****************************************************************************
-*
-*        IOC Status Values
-*
-*****************************************************************************/
-
-/* mask for IOCStatus status value */
-#define MPI2_IOCSTATUS_MASK                     (0x7FFF)
-
-/****************************************************************************
-*  Common IOCStatus values for all replies
-****************************************************************************/
-
-#define MPI2_IOCSTATUS_SUCCESS                      (0x0000)
-#define MPI2_IOCSTATUS_INVALID_FUNCTION             (0x0001)
-#define MPI2_IOCSTATUS_BUSY                         (0x0002)
-#define MPI2_IOCSTATUS_INVALID_SGL                  (0x0003)
-#define MPI2_IOCSTATUS_INTERNAL_ERROR               (0x0004)
-#define MPI2_IOCSTATUS_INVALID_VPID                 (0x0005)
-#define MPI2_IOCSTATUS_INSUFFICIENT_RESOURCES       (0x0006)
-#define MPI2_IOCSTATUS_INVALID_FIELD                (0x0007)
-#define MPI2_IOCSTATUS_INVALID_STATE                (0x0008)
-#define MPI2_IOCSTATUS_OP_STATE_NOT_SUPPORTED       (0x0009)
-#define MPI2_IOCSTATUS_INSUFFICIENT_POWER           (0x000A) /* MPI v2.6 and later */
-#define MPI2_IOCSTATUS_FAILURE                      (0x000F)
-
-/****************************************************************************
-*  Config IOCStatus values
-****************************************************************************/
-
-#define MPI2_IOCSTATUS_CONFIG_INVALID_ACTION        (0x0020)
-#define MPI2_IOCSTATUS_CONFIG_INVALID_TYPE          (0x0021)
-#define MPI2_IOCSTATUS_CONFIG_INVALID_PAGE          (0x0022)
-#define MPI2_IOCSTATUS_CONFIG_INVALID_DATA          (0x0023)
-#define MPI2_IOCSTATUS_CONFIG_NO_DEFAULTS           (0x0024)
-#define MPI2_IOCSTATUS_CONFIG_CANT_COMMIT           (0x0025)
-
-/****************************************************************************
-*  SCSI IO Reply
-****************************************************************************/
-
-#define MPI2_IOCSTATUS_SCSI_RECOVERED_ERROR         (0x0040)
-#define MPI2_IOCSTATUS_SCSI_INVALID_DEVHANDLE       (0x0042)
-#define MPI2_IOCSTATUS_SCSI_DEVICE_NOT_THERE        (0x0043)
-#define MPI2_IOCSTATUS_SCSI_DATA_OVERRUN            (0x0044)
-#define MPI2_IOCSTATUS_SCSI_DATA_UNDERRUN           (0x0045)
-#define MPI2_IOCSTATUS_SCSI_IO_DATA_ERROR           (0x0046)
-#define MPI2_IOCSTATUS_SCSI_PROTOCOL_ERROR          (0x0047)
-#define MPI2_IOCSTATUS_SCSI_TASK_TERMINATED         (0x0048)
-#define MPI2_IOCSTATUS_SCSI_RESIDUAL_MISMATCH       (0x0049)
-#define MPI2_IOCSTATUS_SCSI_TASK_MGMT_FAILED        (0x004A)
-#define MPI2_IOCSTATUS_SCSI_IOC_TERMINATED          (0x004B)
-#define MPI2_IOCSTATUS_SCSI_EXT_TERMINATED          (0x004C)
-
-/****************************************************************************
-*  For use by SCSI Initiator and SCSI Target end-to-end data protection
-****************************************************************************/
-
-#define MPI2_IOCSTATUS_EEDP_GUARD_ERROR             (0x004D)
-#define MPI2_IOCSTATUS_EEDP_REF_TAG_ERROR           (0x004E)
-#define MPI2_IOCSTATUS_EEDP_APP_TAG_ERROR           (0x004F)
-
-/****************************************************************************
-*  SCSI Target values
-****************************************************************************/
-
-#define MPI2_IOCSTATUS_TARGET_INVALID_IO_INDEX      (0x0062)
-#define MPI2_IOCSTATUS_TARGET_ABORTED               (0x0063)
-#define MPI2_IOCSTATUS_TARGET_NO_CONN_RETRYABLE     (0x0064)
-#define MPI2_IOCSTATUS_TARGET_NO_CONNECTION         (0x0065)
-#define MPI2_IOCSTATUS_TARGET_XFER_COUNT_MISMATCH   (0x006A)
-#define MPI2_IOCSTATUS_TARGET_DATA_OFFSET_ERROR     (0x006D)
-#define MPI2_IOCSTATUS_TARGET_TOO_MUCH_WRITE_DATA   (0x006E)
-#define MPI2_IOCSTATUS_TARGET_IU_TOO_SHORT          (0x006F)
-#define MPI2_IOCSTATUS_TARGET_ACK_NAK_TIMEOUT       (0x0070)
-#define MPI2_IOCSTATUS_TARGET_NAK_RECEIVED          (0x0071)
-
-/****************************************************************************
-*  Serial Attached SCSI values
-****************************************************************************/
-
-#define MPI2_IOCSTATUS_SAS_SMP_REQUEST_FAILED       (0x0090)
-#define MPI2_IOCSTATUS_SAS_SMP_DATA_OVERRUN         (0x0091)
-
-/****************************************************************************
-*  Diagnostic Buffer Post / Diagnostic Release values
-****************************************************************************/
-
-#define MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED          (0x00A0)
-
-/****************************************************************************
-*  RAID Accelerator values
-****************************************************************************/
-
-#define MPI2_IOCSTATUS_RAID_ACCEL_ERROR             (0x00B0)
-
-/****************************************************************************
-*  IOCStatus flag to indicate that log info is available
-****************************************************************************/
-
-#define MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE      (0x8000)
-
-/****************************************************************************
-*  IOCLogInfo Types
-****************************************************************************/
-
-#define MPI2_IOCLOGINFO_TYPE_MASK               (0xF0000000)
-#define MPI2_IOCLOGINFO_TYPE_SHIFT              (28)
-#define MPI2_IOCLOGINFO_TYPE_NONE               (0x0)
-#define MPI2_IOCLOGINFO_TYPE_SCSI               (0x1)
-#define MPI2_IOCLOGINFO_TYPE_FC                 (0x2)
-#define MPI2_IOCLOGINFO_TYPE_SAS                (0x3)
-#define MPI2_IOCLOGINFO_TYPE_ISCSI              (0x4)
-#define MPI2_IOCLOGINFO_LOG_DATA_MASK           (0x0FFFFFFF)
-
-
-/*****************************************************************************
-*
-*        Standard Message Structures
-*
-*****************************************************************************/
-
-/****************************************************************************
-* Request Message Header for all request messages
-****************************************************************************/
-
-typedef struct _MPI2_REQUEST_HEADER
-{
-    U16             FunctionDependent1;         /* 0x00 */
-    U8              ChainOffset;                /* 0x02 */
-    U8              Function;                   /* 0x03 */
-    U16             FunctionDependent2;         /* 0x04 */
-    U8              FunctionDependent3;         /* 0x06 */
-    U8              MsgFlags;                   /* 0x07 */
-    U8              VP_ID;                      /* 0x08 */
-    U8              VF_ID;                      /* 0x09 */
-    U16             Reserved1;                  /* 0x0A */
-} MPI2_REQUEST_HEADER, MPI2_POINTER PTR_MPI2_REQUEST_HEADER,
-  MPI2RequestHeader_t, MPI2_POINTER pMPI2RequestHeader_t;
-
-
-/****************************************************************************
-*  Default Reply
-****************************************************************************/
-
-typedef struct _MPI2_DEFAULT_REPLY
-{
-    U16             FunctionDependent1;         /* 0x00 */
-    U8              MsgLength;                  /* 0x02 */
-    U8              Function;                   /* 0x03 */
-    U16             FunctionDependent2;         /* 0x04 */
-    U8              FunctionDependent3;         /* 0x06 */
-    U8              MsgFlags;                   /* 0x07 */
-    U8              VP_ID;                      /* 0x08 */
-    U8              VF_ID;                      /* 0x09 */
-    U16             Reserved1;                  /* 0x0A */
-    U16             FunctionDependent5;         /* 0x0C */
-    U16             IOCStatus;                  /* 0x0E */
-    U32             IOCLogInfo;                 /* 0x10 */
-} MPI2_DEFAULT_REPLY, MPI2_POINTER PTR_MPI2_DEFAULT_REPLY,
-  MPI2DefaultReply_t, MPI2_POINTER pMPI2DefaultReply_t;
-
-
-/* common version structure/union used in messages and configuration pages */
-
-typedef struct _MPI2_VERSION_STRUCT
-{
-    U8                      Dev;                        /* 0x00 */
-    U8                      Unit;                       /* 0x01 */
-    U8                      Minor;                      /* 0x02 */
-    U8                      Major;                      /* 0x03 */
-} MPI2_VERSION_STRUCT;
-
-typedef union _MPI2_VERSION_UNION
-{
-    MPI2_VERSION_STRUCT     Struct;
-    U32                     Word;
-} MPI2_VERSION_UNION;
-
-
-/* LUN field defines, common to many structures */
-#define MPI2_LUN_FIRST_LEVEL_ADDRESSING             (0x0000FFFF)
-#define MPI2_LUN_SECOND_LEVEL_ADDRESSING            (0xFFFF0000)
-#define MPI2_LUN_THIRD_LEVEL_ADDRESSING             (0x0000FFFF)
-#define MPI2_LUN_FOURTH_LEVEL_ADDRESSING            (0xFFFF0000)
-#define MPI2_LUN_LEVEL_1_WORD                       (0xFF00)
-#define MPI2_LUN_LEVEL_1_DWORD                      (0x0000FF00)
-
-
-/*****************************************************************************
-*
-*        Fusion-MPT MPI Scatter Gather Elements
-*
-*****************************************************************************/
-
-/****************************************************************************
-*  MPI Simple Element structures
-****************************************************************************/
-
-typedef struct _MPI2_SGE_SIMPLE32
-{
-    U32                     FlagsLength;
-    U32                     Address;
-} MPI2_SGE_SIMPLE32, MPI2_POINTER PTR_MPI2_SGE_SIMPLE32,
-  Mpi2SGESimple32_t, MPI2_POINTER pMpi2SGESimple32_t;
-
-typedef struct _MPI2_SGE_SIMPLE64
-{
-    U32                     FlagsLength;
-    U64                     Address;
-} MPI2_SGE_SIMPLE64, MPI2_POINTER PTR_MPI2_SGE_SIMPLE64,
-  Mpi2SGESimple64_t, MPI2_POINTER pMpi2SGESimple64_t;
-
-typedef struct _MPI2_SGE_SIMPLE_UNION
-{
-    U32                     FlagsLength;
-    union
-    {
-        U32                 Address32;
-        U64                 Address64;
-    } u;
-} MPI2_SGE_SIMPLE_UNION, MPI2_POINTER PTR_MPI2_SGE_SIMPLE_UNION,
-  Mpi2SGESimpleUnion_t, MPI2_POINTER pMpi2SGESimpleUnion_t;
-
-
-/****************************************************************************
-*  MPI Chain Element structures - for MPI v2.0 products only
-****************************************************************************/
-
-typedef struct _MPI2_SGE_CHAIN32
-{
-    U16                     Length;
-    U8                      NextChainOffset;
-    U8                      Flags;
-    U32                     Address;
-} MPI2_SGE_CHAIN32, MPI2_POINTER PTR_MPI2_SGE_CHAIN32,
-  Mpi2SGEChain32_t, MPI2_POINTER pMpi2SGEChain32_t;
-
-typedef struct _MPI2_SGE_CHAIN64
-{
-    U16                     Length;
-    U8                      NextChainOffset;
-    U8                      Flags;
-    U64                     Address;
-} MPI2_SGE_CHAIN64, MPI2_POINTER PTR_MPI2_SGE_CHAIN64,
-  Mpi2SGEChain64_t, MPI2_POINTER pMpi2SGEChain64_t;
-
-typedef struct _MPI2_SGE_CHAIN_UNION
-{
-    U16                     Length;
-    U8                      NextChainOffset;
-    U8                      Flags;
-    union
-    {
-        U32                 Address32;
-        U64                 Address64;
-    } u;
-} MPI2_SGE_CHAIN_UNION, MPI2_POINTER PTR_MPI2_SGE_CHAIN_UNION,
-  Mpi2SGEChainUnion_t, MPI2_POINTER pMpi2SGEChainUnion_t;
-
-
-/****************************************************************************
-*  MPI Transaction Context Element structures - for MPI v2.0 products only
-****************************************************************************/
-
-typedef struct _MPI2_SGE_TRANSACTION32
-{
-    U8                      Reserved;
-    U8                      ContextSize;
-    U8                      DetailsLength;
-    U8                      Flags;
-    U32                     TransactionContext[1];
-    U32                     TransactionDetails[1];
-} MPI2_SGE_TRANSACTION32, MPI2_POINTER PTR_MPI2_SGE_TRANSACTION32,
-  Mpi2SGETransaction32_t, MPI2_POINTER pMpi2SGETransaction32_t;
-
-typedef struct _MPI2_SGE_TRANSACTION64
-{
-    U8                      Reserved;
-    U8                      ContextSize;
-    U8                      DetailsLength;
-    U8                      Flags;
-    U32                     TransactionContext[2];
-    U32                     TransactionDetails[1];
-} MPI2_SGE_TRANSACTION64, MPI2_POINTER PTR_MPI2_SGE_TRANSACTION64,
-  Mpi2SGETransaction64_t, MPI2_POINTER pMpi2SGETransaction64_t;
-
-typedef struct _MPI2_SGE_TRANSACTION96
-{
-    U8                      Reserved;
-    U8                      ContextSize;
-    U8                      DetailsLength;
-    U8                      Flags;
-    U32                     TransactionContext[3];
-    U32                     TransactionDetails[1];
-} MPI2_SGE_TRANSACTION96, MPI2_POINTER PTR_MPI2_SGE_TRANSACTION96,
-  Mpi2SGETransaction96_t, MPI2_POINTER pMpi2SGETransaction96_t;
-
-typedef struct _MPI2_SGE_TRANSACTION128
-{
-    U8                      Reserved;
-    U8                      ContextSize;
-    U8                      DetailsLength;
-    U8                      Flags;
-    U32                     TransactionContext[4];
-    U32                     TransactionDetails[1];
-} MPI2_SGE_TRANSACTION128, MPI2_POINTER PTR_MPI2_SGE_TRANSACTION128,
-  Mpi2SGETransaction_t128, MPI2_POINTER pMpi2SGETransaction_t128;
-
-typedef struct _MPI2_SGE_TRANSACTION_UNION
-{
-    U8                      Reserved;
-    U8                      ContextSize;
-    U8                      DetailsLength;
-    U8                      Flags;
-    union
-    {
-        U32                 TransactionContext32[1];
-        U32                 TransactionContext64[2];
-        U32                 TransactionContext96[3];
-        U32                 TransactionContext128[4];
-    } u;
-    U32                     TransactionDetails[1];
-} MPI2_SGE_TRANSACTION_UNION, MPI2_POINTER PTR_MPI2_SGE_TRANSACTION_UNION,
-  Mpi2SGETransactionUnion_t, MPI2_POINTER pMpi2SGETransactionUnion_t;
-
-
-/****************************************************************************
-*  MPI SGE union for IO SGL's - for MPI v2.0 products only
-****************************************************************************/
-
-typedef struct _MPI2_MPI_SGE_IO_UNION
-{
-    union
-    {
-        MPI2_SGE_SIMPLE_UNION   Simple;
-        MPI2_SGE_CHAIN_UNION    Chain;
-    } u;
-} MPI2_MPI_SGE_IO_UNION, MPI2_POINTER PTR_MPI2_MPI_SGE_IO_UNION,
-  Mpi2MpiSGEIOUnion_t, MPI2_POINTER pMpi2MpiSGEIOUnion_t;
-
-
-/****************************************************************************
-*  MPI SGE union for SGL's with Simple and Transaction elements - for MPI v2.0 products only
-****************************************************************************/
-
-typedef struct _MPI2_SGE_TRANS_SIMPLE_UNION
-{
-    union
-    {
-        MPI2_SGE_SIMPLE_UNION       Simple;
-        MPI2_SGE_TRANSACTION_UNION  Transaction;
-    } u;
-} MPI2_SGE_TRANS_SIMPLE_UNION, MPI2_POINTER PTR_MPI2_SGE_TRANS_SIMPLE_UNION,
-  Mpi2SGETransSimpleUnion_t, MPI2_POINTER pMpi2SGETransSimpleUnion_t;
-
-
-/****************************************************************************
-*  All MPI SGE types union
-****************************************************************************/
-
-typedef struct _MPI2_MPI_SGE_UNION
-{
-    union
-    {
-        MPI2_SGE_SIMPLE_UNION       Simple;
-        MPI2_SGE_CHAIN_UNION        Chain;
-        MPI2_SGE_TRANSACTION_UNION  Transaction;
-    } u;
-} MPI2_MPI_SGE_UNION, MPI2_POINTER PTR_MPI2_MPI_SGE_UNION,
-  Mpi2MpiSgeUnion_t, MPI2_POINTER pMpi2MpiSgeUnion_t;
-
-
-/****************************************************************************
-*  MPI SGE field definition and masks
-****************************************************************************/
-
-/* Flags field bit definitions */
-
-#define MPI2_SGE_FLAGS_LAST_ELEMENT             (0x80)
-#define MPI2_SGE_FLAGS_END_OF_BUFFER            (0x40)
-#define MPI2_SGE_FLAGS_ELEMENT_TYPE_MASK        (0x30)
-#define MPI2_SGE_FLAGS_LOCAL_ADDRESS            (0x08)
-#define MPI2_SGE_FLAGS_DIRECTION                (0x04)
-#define MPI2_SGE_FLAGS_ADDRESS_SIZE             (0x02)
-#define MPI2_SGE_FLAGS_END_OF_LIST              (0x01)
-
-#define MPI2_SGE_FLAGS_SHIFT                    (24)
-
-#define MPI2_SGE_LENGTH_MASK                    (0x00FFFFFF)
-#define MPI2_SGE_CHAIN_LENGTH_MASK              (0x0000FFFF)
-
-/* Element Type */
-
-#define MPI2_SGE_FLAGS_TRANSACTION_ELEMENT      (0x00) /* for MPI v2.0 products only */
-#define MPI2_SGE_FLAGS_SIMPLE_ELEMENT           (0x10)
-#define MPI2_SGE_FLAGS_CHAIN_ELEMENT            (0x30) /* for MPI v2.0 products only */
-#define MPI2_SGE_FLAGS_ELEMENT_MASK             (0x30)
-
-/* Address location */
-
-#define MPI2_SGE_FLAGS_SYSTEM_ADDRESS           (0x00)
-
-/* Direction */
-
-#define MPI2_SGE_FLAGS_IOC_TO_HOST              (0x00)
-#define MPI2_SGE_FLAGS_HOST_TO_IOC              (0x04)
-
-#define MPI2_SGE_FLAGS_DEST                     (MPI2_SGE_FLAGS_IOC_TO_HOST)
-#define MPI2_SGE_FLAGS_SOURCE                   (MPI2_SGE_FLAGS_HOST_TO_IOC)
-
-/* Address Size */
-
-#define MPI2_SGE_FLAGS_32_BIT_ADDRESSING        (0x00)
-#define MPI2_SGE_FLAGS_64_BIT_ADDRESSING        (0x02)
-
-/* Context Size */
-
-#define MPI2_SGE_FLAGS_32_BIT_CONTEXT           (0x00)
-#define MPI2_SGE_FLAGS_64_BIT_CONTEXT           (0x02)
-#define MPI2_SGE_FLAGS_96_BIT_CONTEXT           (0x04)
-#define MPI2_SGE_FLAGS_128_BIT_CONTEXT          (0x06)
-
-#define MPI2_SGE_CHAIN_OFFSET_MASK              (0x00FF0000)
-#define MPI2_SGE_CHAIN_OFFSET_SHIFT             (16)
-
-/****************************************************************************
-*  MPI SGE operation Macros
-****************************************************************************/
-
-/* SIMPLE FlagsLength manipulations... */
-#define MPI2_SGE_SET_FLAGS(f)          ((U32)(f) << MPI2_SGE_FLAGS_SHIFT)
-#define MPI2_SGE_GET_FLAGS(f)          (((f) & ~MPI2_SGE_LENGTH_MASK) >> MPI2_SGE_FLAGS_SHIFT)
-#define MPI2_SGE_LENGTH(f)             ((f) & MPI2_SGE_LENGTH_MASK)
-#define MPI2_SGE_CHAIN_LENGTH(f)       ((f) & MPI2_SGE_CHAIN_LENGTH_MASK)
-
-#define MPI2_SGE_SET_FLAGS_LENGTH(f,l) (MPI2_SGE_SET_FLAGS(f) | MPI2_SGE_LENGTH(l))
-
-#define MPI2_pSGE_GET_FLAGS(psg)            MPI2_SGE_GET_FLAGS((psg)->FlagsLength)
-#define MPI2_pSGE_GET_LENGTH(psg)           MPI2_SGE_LENGTH((psg)->FlagsLength)
-#define MPI2_pSGE_SET_FLAGS_LENGTH(psg,f,l) (psg)->FlagsLength = MPI2_SGE_SET_FLAGS_LENGTH(f,l)
-
-/* CAUTION - The following are READ-MODIFY-WRITE! */
-#define MPI2_pSGE_SET_FLAGS(psg,f)      (psg)->FlagsLength |= MPI2_SGE_SET_FLAGS(f)
-#define MPI2_pSGE_SET_LENGTH(psg,l)     (psg)->FlagsLength |= MPI2_SGE_LENGTH(l)
-
-#define MPI2_GET_CHAIN_OFFSET(x)    ((x & MPI2_SGE_CHAIN_OFFSET_MASK) >> MPI2_SGE_CHAIN_OFFSET_SHIFT)
-
-
-/*****************************************************************************
-*
-*        Fusion-MPT IEEE Scatter Gather Elements
-*
-*****************************************************************************/
-
-/****************************************************************************
-*  IEEE Simple Element structures
-****************************************************************************/
-
-/* MPI2_IEEE_SGE_SIMPLE32 is for MPI v2.0 products only */
-typedef struct _MPI2_IEEE_SGE_SIMPLE32
-{
-    U32                     Address;
-    U32                     FlagsLength;
-} MPI2_IEEE_SGE_SIMPLE32, MPI2_POINTER PTR_MPI2_IEEE_SGE_SIMPLE32,
-  Mpi2IeeeSgeSimple32_t, MPI2_POINTER pMpi2IeeeSgeSimple32_t;
-
-typedef struct _MPI2_IEEE_SGE_SIMPLE64
-{
-    U64                     Address;
-    U32                     Length;
-    U16                     Reserved1;
-    U8                      Reserved2;
-    U8                      Flags;
-} MPI2_IEEE_SGE_SIMPLE64, MPI2_POINTER PTR_MPI2_IEEE_SGE_SIMPLE64,
-  Mpi2IeeeSgeSimple64_t, MPI2_POINTER pMpi2IeeeSgeSimple64_t;
-
-typedef union _MPI2_IEEE_SGE_SIMPLE_UNION
-{
-    MPI2_IEEE_SGE_SIMPLE32  Simple32;
-    MPI2_IEEE_SGE_SIMPLE64  Simple64;
-} MPI2_IEEE_SGE_SIMPLE_UNION, MPI2_POINTER PTR_MPI2_IEEE_SGE_SIMPLE_UNION,
-  Mpi2IeeeSgeSimpleUnion_t, MPI2_POINTER pMpi2IeeeSgeSimpleUnion_t;
-
-
-/****************************************************************************
-*  IEEE Chain Element structures
-****************************************************************************/
-
-/* MPI2_IEEE_SGE_CHAIN32 is for MPI v2.0 products only */
-typedef MPI2_IEEE_SGE_SIMPLE32  MPI2_IEEE_SGE_CHAIN32;
-
-/* MPI2_IEEE_SGE_CHAIN64 is for MPI v2.0 products only */
-typedef MPI2_IEEE_SGE_SIMPLE64  MPI2_IEEE_SGE_CHAIN64;
-
-typedef union _MPI2_IEEE_SGE_CHAIN_UNION
-{
-    MPI2_IEEE_SGE_CHAIN32   Chain32;
-    MPI2_IEEE_SGE_CHAIN64   Chain64;
-} MPI2_IEEE_SGE_CHAIN_UNION, MPI2_POINTER PTR_MPI2_IEEE_SGE_CHAIN_UNION,
-  Mpi2IeeeSgeChainUnion_t, MPI2_POINTER pMpi2IeeeSgeChainUnion_t;
-
-/* MPI25_IEEE_SGE_CHAIN64 is for MPI v2.5 and later */
-typedef struct _MPI25_IEEE_SGE_CHAIN64
-{
-    U64                     Address;
-    U32                     Length;
-    U16                     Reserved1;
-    U8                      NextChainOffset;
-    U8                      Flags;
-} MPI25_IEEE_SGE_CHAIN64, MPI2_POINTER PTR_MPI25_IEEE_SGE_CHAIN64,
-  Mpi25IeeeSgeChain64_t, MPI2_POINTER pMpi25IeeeSgeChain64_t;
-
-
-/****************************************************************************
-*  All IEEE SGE types union
-****************************************************************************/
-
-/* MPI2_IEEE_SGE_UNION is for MPI v2.0 products only */
-typedef struct _MPI2_IEEE_SGE_UNION
-{
-    union
-    {
-        MPI2_IEEE_SGE_SIMPLE_UNION  Simple;
-        MPI2_IEEE_SGE_CHAIN_UNION   Chain;
-    } u;
-} MPI2_IEEE_SGE_UNION, MPI2_POINTER PTR_MPI2_IEEE_SGE_UNION,
-  Mpi2IeeeSgeUnion_t, MPI2_POINTER pMpi2IeeeSgeUnion_t;
-
-
-/****************************************************************************
-*  IEEE SGE union for IO SGL's
-****************************************************************************/
-
-typedef union _MPI25_SGE_IO_UNION
-{
-    MPI2_IEEE_SGE_SIMPLE64      IeeeSimple;
-    MPI25_IEEE_SGE_CHAIN64      IeeeChain;
-} MPI25_SGE_IO_UNION, MPI2_POINTER PTR_MPI25_SGE_IO_UNION,
-  Mpi25SGEIOUnion_t, MPI2_POINTER pMpi25SGEIOUnion_t;
-
-
-/****************************************************************************
-*  IEEE SGE field definitions and masks
-****************************************************************************/
-
-/* Flags field bit definitions */
-
-#define MPI2_IEEE_SGE_FLAGS_ELEMENT_TYPE_MASK   (0x80)
-#define MPI25_IEEE_SGE_FLAGS_END_OF_LIST        (0x40)
-
-#define MPI2_IEEE32_SGE_FLAGS_SHIFT             (24)
-
-#define MPI2_IEEE32_SGE_LENGTH_MASK             (0x00FFFFFF)
-
-/* Element Type */
-
-#define MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT      (0x00)
-#define MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT       (0x80)
-
-/* Next Segment Format */
-
-#define MPI26_IEEE_SGE_FLAGS_NSF_MASK           (0x1C)
-#define MPI26_IEEE_SGE_FLAGS_NSF_MPI_IEEE       (0x00)
-#define MPI26_IEEE_SGE_FLAGS_NSF_NVME_PRP       (0x08)
-#define MPI26_IEEE_SGE_FLAGS_NSF_NVME_SGL       (0x10)
-
-/* Data Location Address Space */
-
-#define MPI2_IEEE_SGE_FLAGS_ADDR_MASK           (0x03)
-#define MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR         (0x00) /* for MPI v2.0, use in IEEE Simple Element only; for MPI v2.5 and later, use in IEEE Simple or Chain element */
-#define MPI2_IEEE_SGE_FLAGS_IOCDDR_ADDR         (0x01) /* use in IEEE Simple Element only */
-#define MPI2_IEEE_SGE_FLAGS_IOCPLB_ADDR         (0x02)
-#define MPI2_IEEE_SGE_FLAGS_IOCPLBNTA_ADDR      (0x03) /* for MPI v2.0, use in IEEE Simple Element only; for MPI v2.5, use in IEEE Simple or Chain element */
-#define MPI2_IEEE_SGE_FLAGS_SYSTEMPLBPCI_ADDR   (0x03) /* use in MPI v2.0 IEEE Chain Element only */
-#define MPI2_IEEE_SGE_FLAGS_SYSTEMPLBCPI_ADDR   (MPI2_IEEE_SGE_FLAGS_SYSTEMPLBPCI_ADDR) /* typo in name */
-
-#define MPI26_IEEE_SGE_FLAGS_IOCCTL_ADDR        (0x02) /* for MPI v2.6 only */
-
-/****************************************************************************
-*  IEEE SGE operation Macros
-****************************************************************************/
-
-/* SIMPLE FlagsLength manipulations... */
-#define MPI2_IEEE32_SGE_SET_FLAGS(f)     ((U32)(f) << MPI2_IEEE32_SGE_FLAGS_SHIFT)
-#define MPI2_IEEE32_SGE_GET_FLAGS(f)     (((f) & ~MPI2_IEEE32_SGE_LENGTH_MASK) >> MPI2_IEEE32_SGE_FLAGS_SHIFT)
-#define MPI2_IEEE32_SGE_LENGTH(f)        ((f) & MPI2_IEEE32_SGE_LENGTH_MASK)
-
-#define MPI2_IEEE32_SGE_SET_FLAGS_LENGTH(f, l)      (MPI2_IEEE32_SGE_SET_FLAGS(f) | MPI2_IEEE32_SGE_LENGTH(l))
-
-#define MPI2_IEEE32_pSGE_GET_FLAGS(psg)             MPI2_IEEE32_SGE_GET_FLAGS((psg)->FlagsLength)
-#define MPI2_IEEE32_pSGE_GET_LENGTH(psg)            MPI2_IEEE32_SGE_LENGTH((psg)->FlagsLength)
-#define MPI2_IEEE32_pSGE_SET_FLAGS_LENGTH(psg,f,l)  (psg)->FlagsLength = MPI2_IEEE32_SGE_SET_FLAGS_LENGTH(f,l)
-
-/* CAUTION - The following are READ-MODIFY-WRITE! */
-#define MPI2_IEEE32_pSGE_SET_FLAGS(psg,f)    (psg)->FlagsLength |= MPI2_IEEE32_SGE_SET_FLAGS(f)
-#define MPI2_IEEE32_pSGE_SET_LENGTH(psg,l)   (psg)->FlagsLength |= MPI2_IEEE32_SGE_LENGTH(l)
-
-
-
-/*****************************************************************************
-*
-*        Fusion-MPT MPI/IEEE Scatter Gather Unions
-*
-*****************************************************************************/
-
-typedef union _MPI2_SIMPLE_SGE_UNION
-{
-    MPI2_SGE_SIMPLE_UNION       MpiSimple;
-    MPI2_IEEE_SGE_SIMPLE_UNION  IeeeSimple;
-} MPI2_SIMPLE_SGE_UNION, MPI2_POINTER PTR_MPI2_SIMPLE_SGE_UNION,
-  Mpi2SimpleSgeUntion_t, MPI2_POINTER pMpi2SimpleSgeUntion_t;
-
-
-typedef union _MPI2_SGE_IO_UNION
-{
-    MPI2_SGE_SIMPLE_UNION       MpiSimple;
-    MPI2_SGE_CHAIN_UNION        MpiChain;
-    MPI2_IEEE_SGE_SIMPLE_UNION  IeeeSimple;
-    MPI2_IEEE_SGE_CHAIN_UNION   IeeeChain;
-} MPI2_SGE_IO_UNION, MPI2_POINTER PTR_MPI2_SGE_IO_UNION,
-  Mpi2SGEIOUnion_t, MPI2_POINTER pMpi2SGEIOUnion_t;
-
-
-/****************************************************************************
-*
-*  Values for SGLFlags field, used in many request messages with an SGL
-*
-****************************************************************************/
-
-/* values for MPI SGL Data Location Address Space subfield */
-#define MPI2_SGLFLAGS_ADDRESS_SPACE_MASK            (0x0C)
-#define MPI2_SGLFLAGS_SYSTEM_ADDRESS_SPACE          (0x00)
-#define MPI2_SGLFLAGS_IOCDDR_ADDRESS_SPACE          (0x04)
-#define MPI2_SGLFLAGS_IOCPLB_ADDRESS_SPACE          (0x08) /* only for MPI v2.5 and earlier */
-#define MPI26_SGLFLAGS_IOCPLB_ADDRESS_SPACE         (0x08) /* only for MPI v2.6 */
-#define MPI2_SGLFLAGS_IOCPLBNTA_ADDRESS_SPACE       (0x0C) /* only for MPI v2.5 and earlier */
-/* values for SGL Type subfield */
-#define MPI2_SGLFLAGS_SGL_TYPE_MASK                 (0x03)
-#define MPI2_SGLFLAGS_SGL_TYPE_MPI                  (0x00)
-#define MPI2_SGLFLAGS_SGL_TYPE_IEEE32               (0x01) /* MPI v2.0 products only */
-#define MPI2_SGLFLAGS_SGL_TYPE_IEEE64               (0x02)
-
-
-#endif
-
+/*
+ *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
+ *
+ *
+ *           Name:  mpi2.h
+ *          Title:  MPI Message independent structures and definitions
+ *                  including System Interface Register Set and
+ *                  scatter/gather formats.
+ *  Creation Date:  June 21, 2006
+ *
+ *  mpi2.h Version:  02.00.55
+ *
+ *  NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
+ *        prefix are for use only on MPI v2.5 products, and must not be used
+ *        with MPI v2.0 products. Unless otherwise noted, names beginning with
+ *        MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products.
+ *
+ *  Version History
+ *  ---------------
+ *
+ *  Date      Version   Description
+ *  --------  --------  ------------------------------------------------------
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  06-04-07  02.00.01  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  06-26-07  02.00.02  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  08-31-07  02.00.03  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Moved ReplyPostHostIndex register to offset 0x6C of the
+ *                      MPI2_SYSTEM_INTERFACE_REGS and modified the define for
+ *                      MPI2_REPLY_POST_HOST_INDEX_OFFSET.
+ *                      Added union of request descriptors.
+ *                      Added union of reply descriptors.
+ *  10-31-07  02.00.04  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added define for MPI2_VERSION_02_00.
+ *                      Fixed the size of the FunctionDependent5 field in the
+ *                      MPI2_DEFAULT_REPLY structure.
+ *  12-18-07  02.00.05  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Removed the MPI-defined Fault Codes and extended the
+ *                      product specific codes up to 0xEFFF.
+ *                      Added a sixth key value for the WriteSequence register
+ *                      and changed the flush value to 0x0.
+ *                      Added message function codes for Diagnostic Buffer Post
+ *                      and Diagnsotic Release.
+ *                      New IOCStatus define: MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED
+ *                      Moved MPI2_VERSION_UNION from mpi2_ioc.h.
+ *  02-29-08  02.00.06  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  03-03-08  02.00.07  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  05-21-08  02.00.08  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added #defines for marking a reply descriptor as unused.
+ *  06-27-08  02.00.09  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  10-02-08  02.00.10  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Moved LUN field defines from mpi2_init.h.
+ *  01-19-09  02.00.11  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  05-06-09  02.00.12  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      In all request and reply descriptors, replaced VF_ID
+ *                      field with MSIxIndex field.
+ *                      Removed DevHandle field from
+ *                      MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR and made those
+ *                      bytes reserved.
+ *                      Added RAID Accelerator functionality.
+ *  07-30-09  02.00.13  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  10-28-09  02.00.14  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added MSI-x index mask and shift for Reply Post Host
+ *                      Index register.
+ *                      Added function code for Host Based Discovery Action.
+ *  02-10-10  02.00.15  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added define for MPI2_FUNCTION_PWR_MGMT_CONTROL.
+ *                      Added defines for product-specific range of message
+ *                      function codes, 0xF0 to 0xFF.
+ *  05-12-10  02.00.16  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added alternative defines for the SGE Direction bit.
+ *  08-11-10  02.00.17  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  11-10-10  02.00.18  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added MPI2_IEEE_SGE_FLAGS_SYSTEMPLBCPI_ADDR define.
+ *  02-23-11  02.00.19  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added MPI2_FUNCTION_SEND_HOST_MESSAGE.
+ *  03-09-11  02.00.20  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  05-25-11  02.00.21  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  08-24-11  02.00.22  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  11-18-11  02.00.23  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Incorporating additions for MPI v2.5.
+ *  02-06-12  02.00.24  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  03-29-12  02.00.25  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added Hard Reset delay timings.
+ *  07-10-12  02.00.26  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  07-26-12  02.00.27  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  11-27-12  02.00.28  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  12-20-12  02.00.29  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET.
+ *  04-09-13  02.00.30  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  04-17-13  02.00.31  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  08-19-13  02.00.32  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  12-05-13  02.00.33  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  01-08-14  02.00.34  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  06-13-14  02.00.35  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  11-18-14  02.00.36  Updated copyright information.
+ *                      Bumped MPI2_HEADER_VERSION_UNIT.
+ *  03-16-15  02.00.37  Updated for MPI v2.6.
+ *                      Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added Scratchpad registers and
+ *                      AtomicRequestDescriptorPost register to
+ *                      MPI2_SYSTEM_INTERFACE_REGS.
+ *                      Added MPI2_DIAG_SBR_RELOAD.
+ *                      Added MPI2_IOCSTATUS_INSUFFICIENT_POWER.
+ *  03-19-15  02.00.38  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  05-25-15  02.00.39  Bumped MPI2_HEADER_VERSION_UNIT
+ *  08-25-15  02.00.40  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added V7 HostDiagnostic register defines
+ *  12-15-15  02.00.41  Bumped MPI_HEADER_VERSION_UNIT
+ *  01-01-16  02.00.42  Bumped MPI_HEADER_VERSION_UNIT
+ *  04-05-16  02.00.43  Modified  MPI26_DIAG_BOOT_DEVICE_SELECT defines
+ *                      to be unique within first 32 characters.
+ *                      Removed AHCI support.
+ *                      Removed SOP support.
+ *                      Bumped MPI2_HEADER_VERSION_UNIT.
+ *  04-10-16  02.00.44  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  07-06-16  02.00.45  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  09-02-16  02.00.46  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  11-23-16  02.00.47  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  02-03-17  02.00.48  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  06-13-17  02.00.49  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  09-29-17  02.00.50  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  07-22-18  02.00.51  Added SECURE_BOOT define.
+ *                      Bumped MPI2_HEADER_VERSION_UNIT
+ *  08-15-18  02.00.52  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  08-28-18  02.00.53  Bumped MPI2_HEADER_VERSION_UNITMPI2_HEADER_VERSION_UNIT.
+ *                      Added MPI2_IOCSTATUS_FAILURE
+ *  12-17-18  02.00.54  Bumped MPI2_HEADER_VERSION_UNIT
+ *  06-24-19  02.00.55  Bumped MPI2_HEADER_VERSION_UNIT
+ *  08-01-19  02.00.56  Bumped MPI2_HEADER_VERSION_UNIT
+ *  10-02-19  02.00.56  Bumped MPI2_HEADER_VERSION_UNIT
+ *  --------------------------------------------------------------------------
+ */
+
+#ifndef MPI2_H
+#define MPI2_H
+
+
+/*****************************************************************************
+*
+*        MPI Version Definitions
+*
+*****************************************************************************/
+
+#define MPI2_VERSION_MAJOR_MASK             (0xFF00)
+#define MPI2_VERSION_MAJOR_SHIFT            (8)
+#define MPI2_VERSION_MINOR_MASK             (0x00FF)
+#define MPI2_VERSION_MINOR_SHIFT            (0)
+
+/* major version for all MPI v2.x */
+#define MPI2_VERSION_MAJOR                  (0x02)
+
+/* minor version for MPI v2.0 compatible products */
+#define MPI2_VERSION_MINOR                  (0x00)
+#define MPI2_VERSION ((MPI2_VERSION_MAJOR << MPI2_VERSION_MAJOR_SHIFT) |   \
+                                      MPI2_VERSION_MINOR)
+#define MPI2_VERSION_02_00                  (0x0200)
+
+
+/* minor version for MPI v2.5 compatible products */
+#define MPI25_VERSION_MINOR                 (0x05)
+#define MPI25_VERSION ((MPI2_VERSION_MAJOR << MPI2_VERSION_MAJOR_SHIFT) |   \
+                                      MPI25_VERSION_MINOR)
+#define MPI2_VERSION_02_05                  (0x0205)
+
+
+/* minor version for MPI v2.6 compatible products */
+#define MPI26_VERSION_MINOR                 (0x06)
+#define MPI26_VERSION ((MPI2_VERSION_MAJOR << MPI2_VERSION_MAJOR_SHIFT) |   \
+                                      MPI26_VERSION_MINOR)
+#define MPI2_VERSION_02_06                  (0x0206)
+
+
+/* Unit and Dev versioning for this MPI header set */
+#define MPI2_HEADER_VERSION_UNIT            (0x39)
+#define MPI2_HEADER_VERSION_DEV             (0x00)
+#define MPI2_HEADER_VERSION_UNIT_MASK       (0xFF00)
+#define MPI2_HEADER_VERSION_UNIT_SHIFT      (8)
+#define MPI2_HEADER_VERSION_DEV_MASK        (0x00FF)
+#define MPI2_HEADER_VERSION_DEV_SHIFT       (0)
+#define MPI2_HEADER_VERSION ((MPI2_HEADER_VERSION_UNIT << 8) | MPI2_HEADER_VERSION_DEV)
+
+
+/*****************************************************************************
+*
+*        IOC State Definitions
+*
+*****************************************************************************/
+
+#define MPI2_IOC_STATE_RESET               (0x00000000)
+#define MPI2_IOC_STATE_READY               (0x10000000)
+#define MPI2_IOC_STATE_OPERATIONAL         (0x20000000)
+#define MPI2_IOC_STATE_FAULT               (0x40000000)
+#define MPI2_IOC_STATE_COREDUMP            (0x50000000)
+
+#define MPI2_IOC_STATE_MASK                (0xF0000000)
+#define MPI2_IOC_STATE_SHIFT               (28)
+
+/* Fault state range for prodcut specific codes */
+#define MPI2_FAULT_PRODUCT_SPECIFIC_MIN                 (0x0000)
+#define MPI2_FAULT_PRODUCT_SPECIFIC_MAX                 (0xEFFF)
+
+
+/*****************************************************************************
+*
+*        System Interface Register Definitions
+*
+*****************************************************************************/
+
+typedef volatile struct _MPI2_SYSTEM_INTERFACE_REGS
+{
+    U32         Doorbell;                   /* 0x00 */
+    U32         WriteSequence;              /* 0x04 */
+    U32         HostDiagnostic;             /* 0x08 */
+    U32         Reserved1;                  /* 0x0C */
+    U32         DiagRWData;                 /* 0x10 */
+    U32         DiagRWAddressLow;           /* 0x14 */
+    U32         DiagRWAddressHigh;          /* 0x18 */
+    U32         Reserved2[5];               /* 0x1C */
+    U32         HostInterruptStatus;        /* 0x30 */
+    U32         HostInterruptMask;          /* 0x34 */
+    U32         DCRData;                    /* 0x38 */
+    U32         DCRAddress;                 /* 0x3C */
+    U32         Reserved3[2];               /* 0x40 */
+    U32         ReplyFreeHostIndex;         /* 0x48 */
+    U32         Reserved4[8];               /* 0x4C */
+    U32         ReplyPostHostIndex;         /* 0x6C */
+    U32         Reserved5;                  /* 0x70 */
+    U32         HCBSize;                    /* 0x74 */
+    U32         HCBAddressLow;              /* 0x78 */
+    U32         HCBAddressHigh;             /* 0x7C */
+    U32         Reserved6[12];              /* 0x80 */
+    U32         Scratchpad[4];              /* 0xB0 */
+    U32         RequestDescriptorPostLow;   /* 0xC0 */
+    U32         RequestDescriptorPostHigh;  /* 0xC4 */
+    U32         AtomicRequestDescriptorPost;/* 0xC8 */ /* MPI v2.6 and later; reserved in earlier versions */
+    U32         Reserved7[13];              /* 0xCC */
+} MPI2_SYSTEM_INTERFACE_REGS, MPI2_POINTER PTR_MPI2_SYSTEM_INTERFACE_REGS,
+  Mpi2SystemInterfaceRegs_t, MPI2_POINTER pMpi2SystemInterfaceRegs_t;
+
+/*
+ * Defines for working with the Doorbell register.
+ */
+#define MPI2_DOORBELL_OFFSET                    (0x00000000)
+
+/* IOC --> System values */
+#define MPI2_DOORBELL_USED                      (0x08000000)
+#define MPI2_DOORBELL_WHO_INIT_MASK             (0x07000000)
+#define MPI2_DOORBELL_WHO_INIT_SHIFT            (24)
+#define MPI2_DOORBELL_FAULT_CODE_MASK           (0x0000FFFF)
+#define MPI2_DOORBELL_DATA_MASK                 (0x0000FFFF)
+
+/* System --> IOC values */
+#define MPI2_DOORBELL_FUNCTION_MASK             (0xFF000000)
+#define MPI2_DOORBELL_FUNCTION_SHIFT            (24)
+#define MPI2_DOORBELL_ADD_DWORDS_MASK           (0x00FF0000)
+#define MPI2_DOORBELL_ADD_DWORDS_SHIFT          (16)
+
+
+/*
+ * Defines for the WriteSequence register
+ */
+#define MPI2_WRITE_SEQUENCE_OFFSET              (0x00000004)
+#define MPI2_WRSEQ_KEY_VALUE_MASK               (0x0000000F)
+#define MPI2_WRSEQ_FLUSH_KEY_VALUE              (0x0)
+#define MPI2_WRSEQ_1ST_KEY_VALUE                (0xF)
+#define MPI2_WRSEQ_2ND_KEY_VALUE                (0x4)
+#define MPI2_WRSEQ_3RD_KEY_VALUE                (0xB)
+#define MPI2_WRSEQ_4TH_KEY_VALUE                (0x2)
+#define MPI2_WRSEQ_5TH_KEY_VALUE                (0x7)
+#define MPI2_WRSEQ_6TH_KEY_VALUE                (0xD)
+
+/*
+ * Defines for the HostDiagnostic register
+ */
+#define MPI2_HOST_DIAGNOSTIC_OFFSET             (0x00000008)
+
+#define MPI26_DIAG_SECURE_BOOT                  (0x80000000)
+
+#define MPI2_DIAG_SBR_RELOAD                    (0x00002000)
+
+#define MPI2_DIAG_BOOT_DEVICE_SELECT_MASK       (0x00001800)
+#define MPI2_DIAG_BOOT_DEVICE_SELECT_DEFAULT    (0x00000000)
+#define MPI2_DIAG_BOOT_DEVICE_SELECT_HCDW       (0x00000800)
+
+/* Defines for V7A/V7R HostDiagnostic Register */
+#define MPI26_DIAG_BOOT_DEVICE_SEL_64FLASH      (0x00000000)
+#define MPI26_DIAG_BOOT_DEVICE_SEL_64HCDW       (0x00000800)
+#define MPI26_DIAG_BOOT_DEVICE_SEL_32FLASH      (0x00001000)
+#define MPI26_DIAG_BOOT_DEVICE_SEL_32HCDW       (0x00001800)
+
+#define MPI2_DIAG_CLEAR_FLASH_BAD_SIG           (0x00000400)
+#define MPI2_DIAG_FORCE_HCB_ON_RESET            (0x00000200)
+#define MPI2_DIAG_HCB_MODE                      (0x00000100)
+#define MPI2_DIAG_DIAG_WRITE_ENABLE             (0x00000080)
+#define MPI2_DIAG_FLASH_BAD_SIG                 (0x00000040)
+#define MPI2_DIAG_RESET_HISTORY                 (0x00000020)
+#define MPI2_DIAG_DIAG_RW_ENABLE                (0x00000010)
+#define MPI2_DIAG_RESET_ADAPTER                 (0x00000004)
+#define MPI2_DIAG_HOLD_IOC_RESET                (0x00000002)
+
+/*
+ * Offsets for DiagRWData and address
+ */
+#define MPI2_DIAG_RW_DATA_OFFSET                (0x00000010)
+#define MPI2_DIAG_RW_ADDRESS_LOW_OFFSET         (0x00000014)
+#define MPI2_DIAG_RW_ADDRESS_HIGH_OFFSET        (0x00000018)
+
+/*
+ * Defines for the HostInterruptStatus register
+ */
+#define MPI2_HOST_INTERRUPT_STATUS_OFFSET       (0x00000030)
+#define MPI2_HIS_SYS2IOC_DB_STATUS              (0x80000000)
+#define MPI2_HIS_IOP_DOORBELL_STATUS            MPI2_HIS_SYS2IOC_DB_STATUS
+#define MPI2_HIS_RESET_IRQ_STATUS               (0x40000000)
+#define MPI2_HIS_REPLY_DESCRIPTOR_INTERRUPT     (0x00000008)
+#define MPI2_HIS_IOC2SYS_DB_STATUS              (0x00000001)
+#define MPI2_HIS_DOORBELL_INTERRUPT             MPI2_HIS_IOC2SYS_DB_STATUS
+
+/*
+ * Defines for the HostInterruptMask register
+ */
+#define MPI2_HOST_INTERRUPT_MASK_OFFSET         (0x00000034)
+#define MPI2_HIM_RESET_IRQ_MASK                 (0x40000000)
+#define MPI2_HIM_REPLY_INT_MASK                 (0x00000008)
+#define MPI2_HIM_RIM                            MPI2_HIM_REPLY_INT_MASK
+#define MPI2_HIM_IOC2SYS_DB_MASK                (0x00000001)
+#define MPI2_HIM_DIM                            MPI2_HIM_IOC2SYS_DB_MASK
+
+/*
+ * Offsets for DCRData and address
+ */
+#define MPI2_DCR_DATA_OFFSET                    (0x00000038)
+#define MPI2_DCR_ADDRESS_OFFSET                 (0x0000003C)
+
+/*
+ * Offset for the Reply Free Queue
+ */
+#define MPI2_REPLY_FREE_HOST_INDEX_OFFSET       (0x00000048)
+
+/*
+ * Defines for the Reply Descriptor Post Queue
+ */
+#define MPI2_REPLY_POST_HOST_INDEX_OFFSET       (0x0000006C)
+#define MPI2_REPLY_POST_HOST_INDEX_MASK         (0x00FFFFFF)
+#define MPI2_RPHI_MSIX_INDEX_MASK               (0xFF000000)
+#define MPI2_RPHI_MSIX_INDEX_SHIFT              (24)
+#define MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET  (0x0000030C) /* MPI v2.5 only */
+
+
+/*
+ * Defines for the HCBSize and address
+ */
+#define MPI2_HCB_SIZE_OFFSET                    (0x00000074)
+#define MPI2_HCB_SIZE_SIZE_MASK                 (0xFFFFF000)
+#define MPI2_HCB_SIZE_HCB_ENABLE                (0x00000001)
+
+#define MPI2_HCB_ADDRESS_LOW_OFFSET             (0x00000078)
+#define MPI2_HCB_ADDRESS_HIGH_OFFSET            (0x0000007C)
+
+/*
+ * Offsets for the Scratchpad registers
+ */
+#define MPI26_SCRATCHPAD0_OFFSET                (0x000000B0)
+#define MPI26_SCRATCHPAD1_OFFSET                (0x000000B4)
+#define MPI26_SCRATCHPAD2_OFFSET                (0x000000B8)
+#define MPI26_SCRATCHPAD3_OFFSET                (0x000000BC)
+
+/*
+ * Offsets for the Request Descriptor Post Queue
+ */
+#define MPI2_REQUEST_DESCRIPTOR_POST_LOW_OFFSET     (0x000000C0)
+#define MPI2_REQUEST_DESCRIPTOR_POST_HIGH_OFFSET    (0x000000C4)
+#define MPI26_ATOMIC_REQUEST_DESCRIPTOR_POST_OFFSET (0x000000C8)
+
+
+/* Hard Reset delay timings */
+#define MPI2_HARD_RESET_PCIE_FIRST_READ_DELAY_MICRO_SEC     (50000)
+#define MPI2_HARD_RESET_PCIE_RESET_READ_WINDOW_MICRO_SEC    (255000)
+#define MPI2_HARD_RESET_PCIE_SECOND_READ_DELAY_MICRO_SEC    (256000)
+
+/*****************************************************************************
+*
+*        Message Descriptors
+*
+*****************************************************************************/
+
+/* Request Descriptors */
+
+/* Default Request Descriptor */
+typedef struct _MPI2_DEFAULT_REQUEST_DESCRIPTOR
+{
+    U8              RequestFlags;               /* 0x00 */
+    U8              MSIxIndex;                  /* 0x01 */
+    U16             SMID;                       /* 0x02 */
+    U16             LMID;                       /* 0x04 */
+    U16             DescriptorTypeDependent;    /* 0x06 */
+} MPI2_DEFAULT_REQUEST_DESCRIPTOR,
+  MPI2_POINTER PTR_MPI2_DEFAULT_REQUEST_DESCRIPTOR,
+  Mpi2DefaultRequestDescriptor_t, MPI2_POINTER pMpi2DefaultRequestDescriptor_t;
+
+/* defines for the RequestFlags field */
+#define MPI2_REQ_DESCRIPT_FLAGS_TYPE_MASK               (0x1E)
+#define MPI2_REQ_DESCRIPT_FLAGS_TYPE_RSHIFT             (1)    /* use carefully; values below are pre-shifted left */
+#define MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO                 (0x00)
+#define MPI2_REQ_DESCRIPT_FLAGS_SCSI_TARGET             (0x02)
+#define MPI2_REQ_DESCRIPT_FLAGS_HIGH_PRIORITY           (0x06)
+#define MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE            (0x08)
+#define MPI2_REQ_DESCRIPT_FLAGS_RAID_ACCELERATOR        (0x0A)
+#define MPI25_REQ_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO      (0x0C)
+#define MPI26_REQ_DESCRIPT_FLAGS_PCIE_ENCAPSULATED      (0x10)
+
+#define MPI2_REQ_DESCRIPT_FLAGS_IOC_FIFO_MARKER (0x01)
+
+
+/* High Priority Request Descriptor */
+typedef struct _MPI2_HIGH_PRIORITY_REQUEST_DESCRIPTOR
+{
+    U8              RequestFlags;               /* 0x00 */
+    U8              MSIxIndex;                  /* 0x01 */
+    U16             SMID;                       /* 0x02 */
+    U16             LMID;                       /* 0x04 */
+    U16             Reserved1;                  /* 0x06 */
+} MPI2_HIGH_PRIORITY_REQUEST_DESCRIPTOR,
+  MPI2_POINTER PTR_MPI2_HIGH_PRIORITY_REQUEST_DESCRIPTOR,
+  Mpi2HighPriorityRequestDescriptor_t,
+  MPI2_POINTER pMpi2HighPriorityRequestDescriptor_t;
+
+
+/* SCSI IO Request Descriptor */
+typedef struct _MPI2_SCSI_IO_REQUEST_DESCRIPTOR
+{
+    U8              RequestFlags;               /* 0x00 */
+    U8              MSIxIndex;                  /* 0x01 */
+    U16             SMID;                       /* 0x02 */
+    U16             LMID;                       /* 0x04 */
+    U16             DevHandle;                  /* 0x06 */
+} MPI2_SCSI_IO_REQUEST_DESCRIPTOR,
+  MPI2_POINTER PTR_MPI2_SCSI_IO_REQUEST_DESCRIPTOR,
+  Mpi2SCSIIORequestDescriptor_t, MPI2_POINTER pMpi2SCSIIORequestDescriptor_t;
+
+
+/* SCSI Target Request Descriptor */
+typedef struct _MPI2_SCSI_TARGET_REQUEST_DESCRIPTOR
+{
+    U8              RequestFlags;               /* 0x00 */
+    U8              MSIxIndex;                  /* 0x01 */
+    U16             SMID;                       /* 0x02 */
+    U16             LMID;                       /* 0x04 */
+    U16             IoIndex;                    /* 0x06 */
+} MPI2_SCSI_TARGET_REQUEST_DESCRIPTOR,
+  MPI2_POINTER PTR_MPI2_SCSI_TARGET_REQUEST_DESCRIPTOR,
+  Mpi2SCSITargetRequestDescriptor_t,
+  MPI2_POINTER pMpi2SCSITargetRequestDescriptor_t;
+
+
+/* RAID Accelerator Request Descriptor */
+typedef struct _MPI2_RAID_ACCEL_REQUEST_DESCRIPTOR
+{
+    U8              RequestFlags;               /* 0x00 */
+    U8              MSIxIndex;                  /* 0x01 */
+    U16             SMID;                       /* 0x02 */
+    U16             LMID;                       /* 0x04 */
+    U16             Reserved;                   /* 0x06 */
+} MPI2_RAID_ACCEL_REQUEST_DESCRIPTOR,
+  MPI2_POINTER PTR_MPI2_RAID_ACCEL_REQUEST_DESCRIPTOR,
+  Mpi2RAIDAcceleratorRequestDescriptor_t,
+  MPI2_POINTER pMpi2RAIDAcceleratorRequestDescriptor_t;
+
+
+/* Fast Path SCSI IO Request Descriptor */
+typedef MPI2_SCSI_IO_REQUEST_DESCRIPTOR
+    MPI25_FP_SCSI_IO_REQUEST_DESCRIPTOR,
+    MPI2_POINTER PTR_MPI25_FP_SCSI_IO_REQUEST_DESCRIPTOR,
+    Mpi25FastPathSCSIIORequestDescriptor_t,
+    MPI2_POINTER pMpi25FastPathSCSIIORequestDescriptor_t;
+
+
+/* PCIe Encapsulated Request Descriptor */
+typedef MPI2_SCSI_IO_REQUEST_DESCRIPTOR
+    MPI26_PCIE_ENCAPSULATED_REQUEST_DESCRIPTOR,
+    MPI2_POINTER PTR_MPI26_PCIE_ENCAPSULATED_REQUEST_DESCRIPTOR,
+    Mpi26PCIeEncapsulatedRequestDescriptor_t,
+    MPI2_POINTER pMpi26PCIeEncapsulatedRequestDescriptor_t;
+
+
+/* union of Request Descriptors */
+typedef union _MPI2_REQUEST_DESCRIPTOR_UNION
+{
+    MPI2_DEFAULT_REQUEST_DESCRIPTOR             Default;
+    MPI2_HIGH_PRIORITY_REQUEST_DESCRIPTOR       HighPriority;
+    MPI2_SCSI_IO_REQUEST_DESCRIPTOR             SCSIIO;
+    MPI2_SCSI_TARGET_REQUEST_DESCRIPTOR         SCSITarget;
+    MPI2_RAID_ACCEL_REQUEST_DESCRIPTOR          RAIDAccelerator;
+    MPI25_FP_SCSI_IO_REQUEST_DESCRIPTOR         FastPathSCSIIO;
+    MPI26_PCIE_ENCAPSULATED_REQUEST_DESCRIPTOR  PCIeEncapsulated;
+    U64                                         Words;
+} MPI2_REQUEST_DESCRIPTOR_UNION, MPI2_POINTER PTR_MPI2_REQUEST_DESCRIPTOR_UNION,
+  Mpi2RequestDescriptorUnion_t, MPI2_POINTER pMpi2RequestDescriptorUnion_t;
+
+
+/* Atomic Request Descriptors */
+
+/*
+ * All Atomic Request Descriptors have the same format, so the following
+ * structure is used for all Atomic Request Descriptors:
+ *      Atomic Default Request Descriptor
+ *      Atomic High Priority Request Descriptor
+ *      Atomic SCSI IO Request Descriptor
+ *      Atomic SCSI Target Request Descriptor
+ *      Atomic RAID Accelerator Request Descriptor
+ *      Atomic Fast Path SCSI IO Request Descriptor
+ *      Atomic PCIe Encapsulated Request Descriptor
+ */
+
+/* Atomic Request Descriptor */
+typedef struct _MPI26_ATOMIC_REQUEST_DESCRIPTOR
+{
+    U8              RequestFlags;               /* 0x00 */
+    U8              MSIxIndex;                  /* 0x01 */
+    U16             SMID;                       /* 0x02 */
+} MPI26_ATOMIC_REQUEST_DESCRIPTOR,
+  MPI2_POINTER PTR_MPI26_ATOMIC_REQUEST_DESCRIPTOR,
+  Mpi26AtomicRequestDescriptor_t, MPI2_POINTER pMpi26AtomicRequestDescriptor_t;
+
+/* for the RequestFlags field, use the same defines as MPI2_DEFAULT_REQUEST_DESCRIPTOR */
+
+
+/* Reply Descriptors */
+
+/* Default Reply Descriptor */
+typedef struct _MPI2_DEFAULT_REPLY_DESCRIPTOR
+{
+    U8              ReplyFlags;                 /* 0x00 */
+    U8              MSIxIndex;                  /* 0x01 */
+    U16             DescriptorTypeDependent1;   /* 0x02 */
+    U32             DescriptorTypeDependent2;   /* 0x04 */
+} MPI2_DEFAULT_REPLY_DESCRIPTOR, MPI2_POINTER PTR_MPI2_DEFAULT_REPLY_DESCRIPTOR,
+  Mpi2DefaultReplyDescriptor_t, MPI2_POINTER pMpi2DefaultReplyDescriptor_t;
+
+/* defines for the ReplyFlags field */
+#define MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK                   (0x0F)
+#define MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS             (0x00)
+#define MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY               (0x01)
+#define MPI2_RPY_DESCRIPT_FLAGS_TARGETASSIST_SUCCESS        (0x02)
+#define MPI2_RPY_DESCRIPT_FLAGS_TARGET_COMMAND_BUFFER       (0x03)
+#define MPI2_RPY_DESCRIPT_FLAGS_RAID_ACCELERATOR_SUCCESS    (0x05)
+#define MPI25_RPY_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO_SUCCESS  (0x06)
+#define MPI26_RPY_DESCRIPT_FLAGS_PCIE_ENCAPSULATED_SUCCESS  (0x08)
+#define MPI2_RPY_DESCRIPT_FLAGS_UNUSED                      (0x0F)
+
+/* values for marking a reply descriptor as unused */
+#define MPI2_RPY_DESCRIPT_UNUSED_WORD0_MARK             (0xFFFFFFFF)
+#define MPI2_RPY_DESCRIPT_UNUSED_WORD1_MARK             (0xFFFFFFFF)
+
+/* Address Reply Descriptor */
+typedef struct _MPI2_ADDRESS_REPLY_DESCRIPTOR
+{
+    U8              ReplyFlags;                 /* 0x00 */
+    U8              MSIxIndex;                  /* 0x01 */
+    U16             SMID;                       /* 0x02 */
+    U32             ReplyFrameAddress;          /* 0x04 */
+} MPI2_ADDRESS_REPLY_DESCRIPTOR, MPI2_POINTER PTR_MPI2_ADDRESS_REPLY_DESCRIPTOR,
+  Mpi2AddressReplyDescriptor_t, MPI2_POINTER pMpi2AddressReplyDescriptor_t;
+
+#define MPI2_ADDRESS_REPLY_SMID_INVALID                 (0x00)
+
+
+/* SCSI IO Success Reply Descriptor */
+typedef struct _MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR
+{
+    U8              ReplyFlags;                 /* 0x00 */
+    U8              MSIxIndex;                  /* 0x01 */
+    U16             SMID;                       /* 0x02 */
+    U16             TaskTag;                    /* 0x04 */
+    U16             Reserved1;                  /* 0x06 */
+} MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR,
+  MPI2_POINTER PTR_MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR,
+  Mpi2SCSIIOSuccessReplyDescriptor_t,
+  MPI2_POINTER pMpi2SCSIIOSuccessReplyDescriptor_t;
+
+
+/* TargetAssist Success Reply Descriptor */
+typedef struct _MPI2_TARGETASSIST_SUCCESS_REPLY_DESCRIPTOR
+{
+    U8              ReplyFlags;                 /* 0x00 */
+    U8              MSIxIndex;                  /* 0x01 */
+    U16             SMID;                       /* 0x02 */
+    U8              SequenceNumber;             /* 0x04 */
+    U8              Reserved1;                  /* 0x05 */
+    U16             IoIndex;                    /* 0x06 */
+} MPI2_TARGETASSIST_SUCCESS_REPLY_DESCRIPTOR,
+  MPI2_POINTER PTR_MPI2_TARGETASSIST_SUCCESS_REPLY_DESCRIPTOR,
+  Mpi2TargetAssistSuccessReplyDescriptor_t,
+  MPI2_POINTER pMpi2TargetAssistSuccessReplyDescriptor_t;
+
+
+/* Target Command Buffer Reply Descriptor */
+typedef struct _MPI2_TARGET_COMMAND_BUFFER_REPLY_DESCRIPTOR
+{
+    U8              ReplyFlags;                 /* 0x00 */
+    U8              MSIxIndex;                  /* 0x01 */
+    U8              VP_ID;                      /* 0x02 */
+    U8              Flags;                      /* 0x03 */
+    U16             InitiatorDevHandle;         /* 0x04 */
+    U16             IoIndex;                    /* 0x06 */
+} MPI2_TARGET_COMMAND_BUFFER_REPLY_DESCRIPTOR,
+  MPI2_POINTER PTR_MPI2_TARGET_COMMAND_BUFFER_REPLY_DESCRIPTOR,
+  Mpi2TargetCommandBufferReplyDescriptor_t,
+  MPI2_POINTER pMpi2TargetCommandBufferReplyDescriptor_t;
+
+/* defines for Flags field */
+#define MPI2_RPY_DESCRIPT_TCB_FLAGS_PHYNUM_MASK     (0x3F)
+
+
+/* RAID Accelerator Success Reply Descriptor */
+typedef struct _MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR
+{
+    U8              ReplyFlags;                 /* 0x00 */
+    U8              MSIxIndex;                  /* 0x01 */
+    U16             SMID;                       /* 0x02 */
+    U32             Reserved;                   /* 0x04 */
+} MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR,
+  MPI2_POINTER PTR_MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR,
+  Mpi2RAIDAcceleratorSuccessReplyDescriptor_t,
+  MPI2_POINTER pMpi2RAIDAcceleratorSuccessReplyDescriptor_t;
+
+
+/* Fast Path SCSI IO Success Reply Descriptor */
+typedef MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR
+    MPI25_FP_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR,
+    MPI2_POINTER PTR_MPI25_FP_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR,
+    Mpi25FastPathSCSIIOSuccessReplyDescriptor_t,
+    MPI2_POINTER pMpi25FastPathSCSIIOSuccessReplyDescriptor_t;
+
+
+/* PCIe Encapsulated Success Reply Descriptor */
+typedef MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR
+    MPI26_PCIE_ENCAPSULATED_SUCCESS_REPLY_DESCRIPTOR,
+    MPI2_POINTER PTR_MPI26_PCIE_ENCAPSULATED_SUCCESS_REPLY_DESCRIPTOR,
+    Mpi26PCIeEncapsulatedSuccessReplyDescriptor_t,
+    MPI2_POINTER pMpi26PCIeEncapsulatedSuccessReplyDescriptor_t;
+
+
+/* union of Reply Descriptors */
+typedef union _MPI2_REPLY_DESCRIPTORS_UNION
+{
+    MPI2_DEFAULT_REPLY_DESCRIPTOR                   Default;
+    MPI2_ADDRESS_REPLY_DESCRIPTOR                   AddressReply;
+    MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR           SCSIIOSuccess;
+    MPI2_TARGETASSIST_SUCCESS_REPLY_DESCRIPTOR      TargetAssistSuccess;
+    MPI2_TARGET_COMMAND_BUFFER_REPLY_DESCRIPTOR     TargetCommandBuffer;
+    MPI2_RAID_ACCELERATOR_SUCCESS_REPLY_DESCRIPTOR  RAIDAcceleratorSuccess;
+    MPI25_FP_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR       FastPathSCSIIOSuccess;
+    MPI26_PCIE_ENCAPSULATED_SUCCESS_REPLY_DESCRIPTOR    PCIeEncapsulatedSuccess;
+    U64                                             Words;
+} MPI2_REPLY_DESCRIPTORS_UNION, MPI2_POINTER PTR_MPI2_REPLY_DESCRIPTORS_UNION,
+  Mpi2ReplyDescriptorsUnion_t, MPI2_POINTER pMpi2ReplyDescriptorsUnion_t;
+
+
+
+/*****************************************************************************
+*
+*        Message Functions
+*
+*****************************************************************************/
+
+#define MPI2_FUNCTION_SCSI_IO_REQUEST               (0x00) /* SCSI IO */
+#define MPI2_FUNCTION_SCSI_TASK_MGMT                (0x01) /* SCSI Task Management */
+#define MPI2_FUNCTION_IOC_INIT                      (0x02) /* IOC Init */
+#define MPI2_FUNCTION_IOC_FACTS                     (0x03) /* IOC Facts */
+#define MPI2_FUNCTION_CONFIG                        (0x04) /* Configuration */
+#define MPI2_FUNCTION_PORT_FACTS                    (0x05) /* Port Facts */
+#define MPI2_FUNCTION_PORT_ENABLE                   (0x06) /* Port Enable */
+#define MPI2_FUNCTION_EVENT_NOTIFICATION            (0x07) /* Event Notification */
+#define MPI2_FUNCTION_EVENT_ACK                     (0x08) /* Event Acknowledge */
+#define MPI2_FUNCTION_FW_DOWNLOAD                   (0x09) /* FW Download */
+#define MPI2_FUNCTION_TARGET_ASSIST                 (0x0B) /* Target Assist */
+#define MPI2_FUNCTION_TARGET_STATUS_SEND            (0x0C) /* Target Status Send */
+#define MPI2_FUNCTION_TARGET_MODE_ABORT             (0x0D) /* Target Mode Abort */
+#define MPI2_FUNCTION_FW_UPLOAD                     (0x12) /* FW Upload */
+#define MPI2_FUNCTION_RAID_ACTION                   (0x15) /* RAID Action */
+#define MPI2_FUNCTION_RAID_SCSI_IO_PASSTHROUGH      (0x16) /* SCSI IO RAID Passthrough */
+#define MPI2_FUNCTION_TOOLBOX                       (0x17) /* Toolbox */
+#define MPI2_FUNCTION_SCSI_ENCLOSURE_PROCESSOR      (0x18) /* SCSI Enclosure Processor */
+#define MPI2_FUNCTION_SMP_PASSTHROUGH               (0x1A) /* SMP Passthrough */
+#define MPI2_FUNCTION_SAS_IO_UNIT_CONTROL           (0x1B) /* SAS IO Unit Control */ /* for MPI v2.5 and earlier */
+#define MPI2_FUNCTION_IO_UNIT_CONTROL               (0x1B) /* IO Unit Control */     /* for MPI v2.6 and later */
+#define MPI2_FUNCTION_SATA_PASSTHROUGH              (0x1C) /* SATA Passthrough */
+#define MPI2_FUNCTION_DIAG_BUFFER_POST              (0x1D) /* Diagnostic Buffer Post */
+#define MPI2_FUNCTION_DIAG_RELEASE                  (0x1E) /* Diagnostic Release */
+#define MPI2_FUNCTION_TARGET_CMD_BUF_BASE_POST      (0x24) /* Target Command Buffer Post Base */
+#define MPI2_FUNCTION_TARGET_CMD_BUF_LIST_POST      (0x25) /* Target Command Buffer Post List */
+#define MPI2_FUNCTION_RAID_ACCELERATOR              (0x2C) /* RAID Accelerator */
+#define MPI2_FUNCTION_HOST_BASED_DISCOVERY_ACTION   (0x2F) /* Host Based Discovery Action */
+#define MPI2_FUNCTION_PWR_MGMT_CONTROL              (0x30) /* Power Management Control */
+#define MPI2_FUNCTION_SEND_HOST_MESSAGE             (0x31) /* Send Host Message */
+#define MPI2_FUNCTION_NVME_ENCAPSULATED             (0x33) /* NVMe Encapsulated (MPI v2.6) */
+#define MPI2_FUNCTION_MIN_PRODUCT_SPECIFIC          (0xF0) /* beginning of product-specific range */
+#define MPI2_FUNCTION_MAX_PRODUCT_SPECIFIC          (0xFF) /* end of product-specific range */
+
+
+
+/* Doorbell functions */
+#define MPI2_FUNCTION_IOC_MESSAGE_UNIT_RESET        (0x40)
+#define MPI2_FUNCTION_HANDSHAKE                     (0x42)
+
+
+/*****************************************************************************
+*
+*        IOC Status Values
+*
+*****************************************************************************/
+
+/* mask for IOCStatus status value */
+#define MPI2_IOCSTATUS_MASK                     (0x7FFF)
+
+/****************************************************************************
+*  Common IOCStatus values for all replies
+****************************************************************************/
+
+#define MPI2_IOCSTATUS_SUCCESS                      (0x0000)
+#define MPI2_IOCSTATUS_INVALID_FUNCTION             (0x0001)
+#define MPI2_IOCSTATUS_BUSY                         (0x0002)
+#define MPI2_IOCSTATUS_INVALID_SGL                  (0x0003)
+#define MPI2_IOCSTATUS_INTERNAL_ERROR               (0x0004)
+#define MPI2_IOCSTATUS_INVALID_VPID                 (0x0005)
+#define MPI2_IOCSTATUS_INSUFFICIENT_RESOURCES       (0x0006)
+#define MPI2_IOCSTATUS_INVALID_FIELD                (0x0007)
+#define MPI2_IOCSTATUS_INVALID_STATE                (0x0008)
+#define MPI2_IOCSTATUS_OP_STATE_NOT_SUPPORTED       (0x0009)
+#define MPI2_IOCSTATUS_INSUFFICIENT_POWER           (0x000A) /* MPI v2.6 and later */
+#define MPI2_IOCSTATUS_FAILURE                      (0x000F)
+
+/****************************************************************************
+*  Config IOCStatus values
+****************************************************************************/
+
+#define MPI2_IOCSTATUS_CONFIG_INVALID_ACTION        (0x0020)
+#define MPI2_IOCSTATUS_CONFIG_INVALID_TYPE          (0x0021)
+#define MPI2_IOCSTATUS_CONFIG_INVALID_PAGE          (0x0022)
+#define MPI2_IOCSTATUS_CONFIG_INVALID_DATA          (0x0023)
+#define MPI2_IOCSTATUS_CONFIG_NO_DEFAULTS           (0x0024)
+#define MPI2_IOCSTATUS_CONFIG_CANT_COMMIT           (0x0025)
+
+/****************************************************************************
+*  SCSI IO Reply
+****************************************************************************/
+
+#define MPI2_IOCSTATUS_SCSI_RECOVERED_ERROR         (0x0040)
+#define MPI2_IOCSTATUS_SCSI_INVALID_DEVHANDLE       (0x0042)
+#define MPI2_IOCSTATUS_SCSI_DEVICE_NOT_THERE        (0x0043)
+#define MPI2_IOCSTATUS_SCSI_DATA_OVERRUN            (0x0044)
+#define MPI2_IOCSTATUS_SCSI_DATA_UNDERRUN           (0x0045)
+#define MPI2_IOCSTATUS_SCSI_IO_DATA_ERROR           (0x0046)
+#define MPI2_IOCSTATUS_SCSI_PROTOCOL_ERROR          (0x0047)
+#define MPI2_IOCSTATUS_SCSI_TASK_TERMINATED         (0x0048)
+#define MPI2_IOCSTATUS_SCSI_RESIDUAL_MISMATCH       (0x0049)
+#define MPI2_IOCSTATUS_SCSI_TASK_MGMT_FAILED        (0x004A)
+#define MPI2_IOCSTATUS_SCSI_IOC_TERMINATED          (0x004B)
+#define MPI2_IOCSTATUS_SCSI_EXT_TERMINATED          (0x004C)
+
+/****************************************************************************
+*  For use by SCSI Initiator and SCSI Target end-to-end data protection
+****************************************************************************/
+
+#define MPI2_IOCSTATUS_EEDP_GUARD_ERROR             (0x004D)
+#define MPI2_IOCSTATUS_EEDP_REF_TAG_ERROR           (0x004E)
+#define MPI2_IOCSTATUS_EEDP_APP_TAG_ERROR           (0x004F)
+
+/****************************************************************************
+*  SCSI Target values
+****************************************************************************/
+
+#define MPI2_IOCSTATUS_TARGET_INVALID_IO_INDEX      (0x0062)
+#define MPI2_IOCSTATUS_TARGET_ABORTED               (0x0063)
+#define MPI2_IOCSTATUS_TARGET_NO_CONN_RETRYABLE     (0x0064)
+#define MPI2_IOCSTATUS_TARGET_NO_CONNECTION         (0x0065)
+#define MPI2_IOCSTATUS_TARGET_XFER_COUNT_MISMATCH   (0x006A)
+#define MPI2_IOCSTATUS_TARGET_DATA_OFFSET_ERROR     (0x006D)
+#define MPI2_IOCSTATUS_TARGET_TOO_MUCH_WRITE_DATA   (0x006E)
+#define MPI2_IOCSTATUS_TARGET_IU_TOO_SHORT          (0x006F)
+#define MPI2_IOCSTATUS_TARGET_ACK_NAK_TIMEOUT       (0x0070)
+#define MPI2_IOCSTATUS_TARGET_NAK_RECEIVED          (0x0071)
+
+/****************************************************************************
+*  Serial Attached SCSI values
+****************************************************************************/
+
+#define MPI2_IOCSTATUS_SAS_SMP_REQUEST_FAILED       (0x0090)
+#define MPI2_IOCSTATUS_SAS_SMP_DATA_OVERRUN         (0x0091)
+
+/****************************************************************************
+*  Diagnostic Buffer Post / Diagnostic Release values
+****************************************************************************/
+
+#define MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED          (0x00A0)
+
+/****************************************************************************
+*  RAID Accelerator values
+****************************************************************************/
+
+#define MPI2_IOCSTATUS_RAID_ACCEL_ERROR             (0x00B0)
+
+/****************************************************************************
+*  IOCStatus flag to indicate that log info is available
+****************************************************************************/
+
+#define MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE      (0x8000)
+
+/****************************************************************************
+*  IOCLogInfo Types
+****************************************************************************/
+
+#define MPI2_IOCLOGINFO_TYPE_MASK               (0xF0000000)
+#define MPI2_IOCLOGINFO_TYPE_SHIFT              (28)
+#define MPI2_IOCLOGINFO_TYPE_NONE               (0x0)
+#define MPI2_IOCLOGINFO_TYPE_SCSI               (0x1)
+#define MPI2_IOCLOGINFO_TYPE_FC                 (0x2)
+#define MPI2_IOCLOGINFO_TYPE_SAS                (0x3)
+#define MPI2_IOCLOGINFO_TYPE_ISCSI              (0x4)
+#define MPI2_IOCLOGINFO_LOG_DATA_MASK           (0x0FFFFFFF)
+
+
+/*****************************************************************************
+*
+*        Standard Message Structures
+*
+*****************************************************************************/
+
+/****************************************************************************
+* Request Message Header for all request messages
+****************************************************************************/
+
+typedef struct _MPI2_REQUEST_HEADER
+{
+    U16             FunctionDependent1;         /* 0x00 */
+    U8              ChainOffset;                /* 0x02 */
+    U8              Function;                   /* 0x03 */
+    U16             FunctionDependent2;         /* 0x04 */
+    U8              FunctionDependent3;         /* 0x06 */
+    U8              MsgFlags;                   /* 0x07 */
+    U8              VP_ID;                      /* 0x08 */
+    U8              VF_ID;                      /* 0x09 */
+    U16             Reserved1;                  /* 0x0A */
+} MPI2_REQUEST_HEADER, MPI2_POINTER PTR_MPI2_REQUEST_HEADER,
+  MPI2RequestHeader_t, MPI2_POINTER pMPI2RequestHeader_t;
+
+
+/****************************************************************************
+*  Default Reply
+****************************************************************************/
+
+typedef struct _MPI2_DEFAULT_REPLY
+{
+    U16             FunctionDependent1;         /* 0x00 */
+    U8              MsgLength;                  /* 0x02 */
+    U8              Function;                   /* 0x03 */
+    U16             FunctionDependent2;         /* 0x04 */
+    U8              FunctionDependent3;         /* 0x06 */
+    U8              MsgFlags;                   /* 0x07 */
+    U8              VP_ID;                      /* 0x08 */
+    U8              VF_ID;                      /* 0x09 */
+    U16             Reserved1;                  /* 0x0A */
+    U16             FunctionDependent5;         /* 0x0C */
+    U16             IOCStatus;                  /* 0x0E */
+    U32             IOCLogInfo;                 /* 0x10 */
+} MPI2_DEFAULT_REPLY, MPI2_POINTER PTR_MPI2_DEFAULT_REPLY,
+  MPI2DefaultReply_t, MPI2_POINTER pMPI2DefaultReply_t;
+
+
+/* common version structure/union used in messages and configuration pages */
+
+typedef struct _MPI2_VERSION_STRUCT
+{
+    U8                      Dev;                        /* 0x00 */
+    U8                      Unit;                       /* 0x01 */
+    U8                      Minor;                      /* 0x02 */
+    U8                      Major;                      /* 0x03 */
+} MPI2_VERSION_STRUCT;
+
+typedef union _MPI2_VERSION_UNION
+{
+    MPI2_VERSION_STRUCT     Struct;
+    U32                     Word;
+} MPI2_VERSION_UNION;
+
+
+/* LUN field defines, common to many structures */
+#define MPI2_LUN_FIRST_LEVEL_ADDRESSING             (0x0000FFFF)
+#define MPI2_LUN_SECOND_LEVEL_ADDRESSING            (0xFFFF0000)
+#define MPI2_LUN_THIRD_LEVEL_ADDRESSING             (0x0000FFFF)
+#define MPI2_LUN_FOURTH_LEVEL_ADDRESSING            (0xFFFF0000)
+#define MPI2_LUN_LEVEL_1_WORD                       (0xFF00)
+#define MPI2_LUN_LEVEL_1_DWORD                      (0x0000FF00)
+
+
+/*****************************************************************************
+*
+*        Fusion-MPT MPI Scatter Gather Elements
+*
+*****************************************************************************/
+
+/****************************************************************************
+*  MPI Simple Element structures
+****************************************************************************/
+
+typedef struct _MPI2_SGE_SIMPLE32
+{
+    U32                     FlagsLength;
+    U32                     Address;
+} MPI2_SGE_SIMPLE32, MPI2_POINTER PTR_MPI2_SGE_SIMPLE32,
+  Mpi2SGESimple32_t, MPI2_POINTER pMpi2SGESimple32_t;
+
+typedef struct _MPI2_SGE_SIMPLE64
+{
+    U32                     FlagsLength;
+    U64                     Address;
+} MPI2_SGE_SIMPLE64, MPI2_POINTER PTR_MPI2_SGE_SIMPLE64,
+  Mpi2SGESimple64_t, MPI2_POINTER pMpi2SGESimple64_t;
+
+typedef struct _MPI2_SGE_SIMPLE_UNION
+{
+    U32                     FlagsLength;
+    union
+    {
+        U32                 Address32;
+        U64                 Address64;
+    } u;
+} MPI2_SGE_SIMPLE_UNION, MPI2_POINTER PTR_MPI2_SGE_SIMPLE_UNION,
+  Mpi2SGESimpleUnion_t, MPI2_POINTER pMpi2SGESimpleUnion_t;
+
+
+/****************************************************************************
+*  MPI Chain Element structures - for MPI v2.0 products only
+****************************************************************************/
+
+typedef struct _MPI2_SGE_CHAIN32
+{
+    U16                     Length;
+    U8                      NextChainOffset;
+    U8                      Flags;
+    U32                     Address;
+} MPI2_SGE_CHAIN32, MPI2_POINTER PTR_MPI2_SGE_CHAIN32,
+  Mpi2SGEChain32_t, MPI2_POINTER pMpi2SGEChain32_t;
+
+typedef struct _MPI2_SGE_CHAIN64
+{
+    U16                     Length;
+    U8                      NextChainOffset;
+    U8                      Flags;
+    U64                     Address;
+} MPI2_SGE_CHAIN64, MPI2_POINTER PTR_MPI2_SGE_CHAIN64,
+  Mpi2SGEChain64_t, MPI2_POINTER pMpi2SGEChain64_t;
+
+typedef struct _MPI2_SGE_CHAIN_UNION
+{
+    U16                     Length;
+    U8                      NextChainOffset;
+    U8                      Flags;
+    union
+    {
+        U32                 Address32;
+        U64                 Address64;
+    } u;
+} MPI2_SGE_CHAIN_UNION, MPI2_POINTER PTR_MPI2_SGE_CHAIN_UNION,
+  Mpi2SGEChainUnion_t, MPI2_POINTER pMpi2SGEChainUnion_t;
+
+
+/****************************************************************************
+*  MPI Transaction Context Element structures - for MPI v2.0 products only
+****************************************************************************/
+
+typedef struct _MPI2_SGE_TRANSACTION32
+{
+    U8                      Reserved;
+    U8                      ContextSize;
+    U8                      DetailsLength;
+    U8                      Flags;
+    U32                     TransactionContext[1];
+    U32                     TransactionDetails[1];
+} MPI2_SGE_TRANSACTION32, MPI2_POINTER PTR_MPI2_SGE_TRANSACTION32,
+  Mpi2SGETransaction32_t, MPI2_POINTER pMpi2SGETransaction32_t;
+
+typedef struct _MPI2_SGE_TRANSACTION64
+{
+    U8                      Reserved;
+    U8                      ContextSize;
+    U8                      DetailsLength;
+    U8                      Flags;
+    U32                     TransactionContext[2];
+    U32                     TransactionDetails[1];
+} MPI2_SGE_TRANSACTION64, MPI2_POINTER PTR_MPI2_SGE_TRANSACTION64,
+  Mpi2SGETransaction64_t, MPI2_POINTER pMpi2SGETransaction64_t;
+
+typedef struct _MPI2_SGE_TRANSACTION96
+{
+    U8                      Reserved;
+    U8                      ContextSize;
+    U8                      DetailsLength;
+    U8                      Flags;
+    U32                     TransactionContext[3];
+    U32                     TransactionDetails[1];
+} MPI2_SGE_TRANSACTION96, MPI2_POINTER PTR_MPI2_SGE_TRANSACTION96,
+  Mpi2SGETransaction96_t, MPI2_POINTER pMpi2SGETransaction96_t;
+
+typedef struct _MPI2_SGE_TRANSACTION128
+{
+    U8                      Reserved;
+    U8                      ContextSize;
+    U8                      DetailsLength;
+    U8                      Flags;
+    U32                     TransactionContext[4];
+    U32                     TransactionDetails[1];
+} MPI2_SGE_TRANSACTION128, MPI2_POINTER PTR_MPI2_SGE_TRANSACTION128,
+  Mpi2SGETransaction_t128, MPI2_POINTER pMpi2SGETransaction_t128;
+
+typedef struct _MPI2_SGE_TRANSACTION_UNION
+{
+    U8                      Reserved;
+    U8                      ContextSize;
+    U8                      DetailsLength;
+    U8                      Flags;
+    union
+    {
+        U32                 TransactionContext32[1];
+        U32                 TransactionContext64[2];
+        U32                 TransactionContext96[3];
+        U32                 TransactionContext128[4];
+    } u;
+    U32                     TransactionDetails[1];
+} MPI2_SGE_TRANSACTION_UNION, MPI2_POINTER PTR_MPI2_SGE_TRANSACTION_UNION,
+  Mpi2SGETransactionUnion_t, MPI2_POINTER pMpi2SGETransactionUnion_t;
+
+
+/****************************************************************************
+*  MPI SGE union for IO SGL's - for MPI v2.0 products only
+****************************************************************************/
+
+typedef struct _MPI2_MPI_SGE_IO_UNION
+{
+    union
+    {
+        MPI2_SGE_SIMPLE_UNION   Simple;
+        MPI2_SGE_CHAIN_UNION    Chain;
+    } u;
+} MPI2_MPI_SGE_IO_UNION, MPI2_POINTER PTR_MPI2_MPI_SGE_IO_UNION,
+  Mpi2MpiSGEIOUnion_t, MPI2_POINTER pMpi2MpiSGEIOUnion_t;
+
+
+/****************************************************************************
+*  MPI SGE union for SGL's with Simple and Transaction elements - for MPI v2.0 products only
+****************************************************************************/
+
+typedef struct _MPI2_SGE_TRANS_SIMPLE_UNION
+{
+    union
+    {
+        MPI2_SGE_SIMPLE_UNION       Simple;
+        MPI2_SGE_TRANSACTION_UNION  Transaction;
+    } u;
+} MPI2_SGE_TRANS_SIMPLE_UNION, MPI2_POINTER PTR_MPI2_SGE_TRANS_SIMPLE_UNION,
+  Mpi2SGETransSimpleUnion_t, MPI2_POINTER pMpi2SGETransSimpleUnion_t;
+
+
+/****************************************************************************
+*  All MPI SGE types union
+****************************************************************************/
+
+typedef struct _MPI2_MPI_SGE_UNION
+{
+    union
+    {
+        MPI2_SGE_SIMPLE_UNION       Simple;
+        MPI2_SGE_CHAIN_UNION        Chain;
+        MPI2_SGE_TRANSACTION_UNION  Transaction;
+    } u;
+} MPI2_MPI_SGE_UNION, MPI2_POINTER PTR_MPI2_MPI_SGE_UNION,
+  Mpi2MpiSgeUnion_t, MPI2_POINTER pMpi2MpiSgeUnion_t;
+
+
+/****************************************************************************
+*  MPI SGE field definition and masks
+****************************************************************************/
+
+/* Flags field bit definitions */
+
+#define MPI2_SGE_FLAGS_LAST_ELEMENT             (0x80)
+#define MPI2_SGE_FLAGS_END_OF_BUFFER            (0x40)
+#define MPI2_SGE_FLAGS_ELEMENT_TYPE_MASK        (0x30)
+#define MPI2_SGE_FLAGS_LOCAL_ADDRESS            (0x08)
+#define MPI2_SGE_FLAGS_DIRECTION                (0x04)
+#define MPI2_SGE_FLAGS_ADDRESS_SIZE             (0x02)
+#define MPI2_SGE_FLAGS_END_OF_LIST              (0x01)
+
+#define MPI2_SGE_FLAGS_SHIFT                    (24)
+
+#define MPI2_SGE_LENGTH_MASK                    (0x00FFFFFF)
+#define MPI2_SGE_CHAIN_LENGTH_MASK              (0x0000FFFF)
+
+/* Element Type */
+
+#define MPI2_SGE_FLAGS_TRANSACTION_ELEMENT      (0x00) /* for MPI v2.0 products only */
+#define MPI2_SGE_FLAGS_SIMPLE_ELEMENT           (0x10)
+#define MPI2_SGE_FLAGS_CHAIN_ELEMENT            (0x30) /* for MPI v2.0 products only */
+#define MPI2_SGE_FLAGS_ELEMENT_MASK             (0x30)
+
+/* Address location */
+
+#define MPI2_SGE_FLAGS_SYSTEM_ADDRESS           (0x00)
+
+/* Direction */
+
+#define MPI2_SGE_FLAGS_IOC_TO_HOST              (0x00)
+#define MPI2_SGE_FLAGS_HOST_TO_IOC              (0x04)
+
+#define MPI2_SGE_FLAGS_DEST                     (MPI2_SGE_FLAGS_IOC_TO_HOST)
+#define MPI2_SGE_FLAGS_SOURCE                   (MPI2_SGE_FLAGS_HOST_TO_IOC)
+
+/* Address Size */
+
+#define MPI2_SGE_FLAGS_32_BIT_ADDRESSING        (0x00)
+#define MPI2_SGE_FLAGS_64_BIT_ADDRESSING        (0x02)
+
+/* Context Size */
+
+#define MPI2_SGE_FLAGS_32_BIT_CONTEXT           (0x00)
+#define MPI2_SGE_FLAGS_64_BIT_CONTEXT           (0x02)
+#define MPI2_SGE_FLAGS_96_BIT_CONTEXT           (0x04)
+#define MPI2_SGE_FLAGS_128_BIT_CONTEXT          (0x06)
+
+#define MPI2_SGE_CHAIN_OFFSET_MASK              (0x00FF0000)
+#define MPI2_SGE_CHAIN_OFFSET_SHIFT             (16)
+
+/****************************************************************************
+*  MPI SGE operation Macros
+****************************************************************************/
+
+/* SIMPLE FlagsLength manipulations... */
+#define MPI2_SGE_SET_FLAGS(f)          ((U32)(f) << MPI2_SGE_FLAGS_SHIFT)
+#define MPI2_SGE_GET_FLAGS(f)          (((f) & ~MPI2_SGE_LENGTH_MASK) >> MPI2_SGE_FLAGS_SHIFT)
+#define MPI2_SGE_LENGTH(f)             ((f) & MPI2_SGE_LENGTH_MASK)
+#define MPI2_SGE_CHAIN_LENGTH(f)       ((f) & MPI2_SGE_CHAIN_LENGTH_MASK)
+
+#define MPI2_SGE_SET_FLAGS_LENGTH(f,l) (MPI2_SGE_SET_FLAGS(f) | MPI2_SGE_LENGTH(l))
+
+#define MPI2_pSGE_GET_FLAGS(psg)            MPI2_SGE_GET_FLAGS((psg)->FlagsLength)
+#define MPI2_pSGE_GET_LENGTH(psg)           MPI2_SGE_LENGTH((psg)->FlagsLength)
+#define MPI2_pSGE_SET_FLAGS_LENGTH(psg,f,l) (psg)->FlagsLength = MPI2_SGE_SET_FLAGS_LENGTH(f,l)
+
+/* CAUTION - The following are READ-MODIFY-WRITE! */
+#define MPI2_pSGE_SET_FLAGS(psg,f)      (psg)->FlagsLength |= MPI2_SGE_SET_FLAGS(f)
+#define MPI2_pSGE_SET_LENGTH(psg,l)     (psg)->FlagsLength |= MPI2_SGE_LENGTH(l)
+
+#define MPI2_GET_CHAIN_OFFSET(x)    ((x & MPI2_SGE_CHAIN_OFFSET_MASK) >> MPI2_SGE_CHAIN_OFFSET_SHIFT)
+
+
+/*****************************************************************************
+*
+*        Fusion-MPT IEEE Scatter Gather Elements
+*
+*****************************************************************************/
+
+/****************************************************************************
+*  IEEE Simple Element structures
+****************************************************************************/
+
+/* MPI2_IEEE_SGE_SIMPLE32 is for MPI v2.0 products only */
+typedef struct _MPI2_IEEE_SGE_SIMPLE32
+{
+    U32                     Address;
+    U32                     FlagsLength;
+} MPI2_IEEE_SGE_SIMPLE32, MPI2_POINTER PTR_MPI2_IEEE_SGE_SIMPLE32,
+  Mpi2IeeeSgeSimple32_t, MPI2_POINTER pMpi2IeeeSgeSimple32_t;
+
+typedef struct _MPI2_IEEE_SGE_SIMPLE64
+{
+    U64                     Address;
+    U32                     Length;
+    U16                     Reserved1;
+    U8                      Reserved2;
+    U8                      Flags;
+} MPI2_IEEE_SGE_SIMPLE64, MPI2_POINTER PTR_MPI2_IEEE_SGE_SIMPLE64,
+  Mpi2IeeeSgeSimple64_t, MPI2_POINTER pMpi2IeeeSgeSimple64_t;
+
+typedef union _MPI2_IEEE_SGE_SIMPLE_UNION
+{
+    MPI2_IEEE_SGE_SIMPLE32  Simple32;
+    MPI2_IEEE_SGE_SIMPLE64  Simple64;
+} MPI2_IEEE_SGE_SIMPLE_UNION, MPI2_POINTER PTR_MPI2_IEEE_SGE_SIMPLE_UNION,
+  Mpi2IeeeSgeSimpleUnion_t, MPI2_POINTER pMpi2IeeeSgeSimpleUnion_t;
+
+
+/****************************************************************************
+*  IEEE Chain Element structures
+****************************************************************************/
+
+/* MPI2_IEEE_SGE_CHAIN32 is for MPI v2.0 products only */
+typedef MPI2_IEEE_SGE_SIMPLE32  MPI2_IEEE_SGE_CHAIN32;
+
+/* MPI2_IEEE_SGE_CHAIN64 is for MPI v2.0 products only */
+typedef MPI2_IEEE_SGE_SIMPLE64  MPI2_IEEE_SGE_CHAIN64;
+
+typedef union _MPI2_IEEE_SGE_CHAIN_UNION
+{
+    MPI2_IEEE_SGE_CHAIN32   Chain32;
+    MPI2_IEEE_SGE_CHAIN64   Chain64;
+} MPI2_IEEE_SGE_CHAIN_UNION, MPI2_POINTER PTR_MPI2_IEEE_SGE_CHAIN_UNION,
+  Mpi2IeeeSgeChainUnion_t, MPI2_POINTER pMpi2IeeeSgeChainUnion_t;
+
+/* MPI25_IEEE_SGE_CHAIN64 is for MPI v2.5 and later */
+typedef struct _MPI25_IEEE_SGE_CHAIN64
+{
+    U64                     Address;
+    U32                     Length;
+    U16                     Reserved1;
+    U8                      NextChainOffset;
+    U8                      Flags;
+} MPI25_IEEE_SGE_CHAIN64, MPI2_POINTER PTR_MPI25_IEEE_SGE_CHAIN64,
+  Mpi25IeeeSgeChain64_t, MPI2_POINTER pMpi25IeeeSgeChain64_t;
+
+
+/****************************************************************************
+*  All IEEE SGE types union
+****************************************************************************/
+
+/* MPI2_IEEE_SGE_UNION is for MPI v2.0 products only */
+typedef struct _MPI2_IEEE_SGE_UNION
+{
+    union
+    {
+        MPI2_IEEE_SGE_SIMPLE_UNION  Simple;
+        MPI2_IEEE_SGE_CHAIN_UNION   Chain;
+    } u;
+} MPI2_IEEE_SGE_UNION, MPI2_POINTER PTR_MPI2_IEEE_SGE_UNION,
+  Mpi2IeeeSgeUnion_t, MPI2_POINTER pMpi2IeeeSgeUnion_t;
+
+
+/****************************************************************************
+*  IEEE SGE union for IO SGL's
+****************************************************************************/
+
+typedef union _MPI25_SGE_IO_UNION
+{
+    MPI2_IEEE_SGE_SIMPLE64      IeeeSimple;
+    MPI25_IEEE_SGE_CHAIN64      IeeeChain;
+} MPI25_SGE_IO_UNION, MPI2_POINTER PTR_MPI25_SGE_IO_UNION,
+  Mpi25SGEIOUnion_t, MPI2_POINTER pMpi25SGEIOUnion_t;
+
+
+/****************************************************************************
+*  IEEE SGE field definitions and masks
+****************************************************************************/
+
+/* Flags field bit definitions */
+
+#define MPI2_IEEE_SGE_FLAGS_ELEMENT_TYPE_MASK   (0x80)
+#define MPI25_IEEE_SGE_FLAGS_END_OF_LIST        (0x40)
+
+#define MPI2_IEEE32_SGE_FLAGS_SHIFT             (24)
+
+#define MPI2_IEEE32_SGE_LENGTH_MASK             (0x00FFFFFF)
+
+/* Element Type */
+
+#define MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT      (0x00)
+#define MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT       (0x80)
+
+/* Next Segment Format */
+
+#define MPI26_IEEE_SGE_FLAGS_NSF_MASK           (0x1C)
+#define MPI26_IEEE_SGE_FLAGS_NSF_MPI_IEEE       (0x00)
+#define MPI26_IEEE_SGE_FLAGS_NSF_NVME_PRP       (0x08)
+#define MPI26_IEEE_SGE_FLAGS_NSF_NVME_SGL       (0x10)
+
+/* Data Location Address Space */
+
+#define MPI2_IEEE_SGE_FLAGS_ADDR_MASK           (0x03)
+#define MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR         (0x00) /* for MPI v2.0, use in IEEE Simple Element only; for MPI v2.5 and later, use in IEEE Simple or Chain element */
+#define MPI2_IEEE_SGE_FLAGS_IOCDDR_ADDR         (0x01) /* use in IEEE Simple Element only */
+#define MPI2_IEEE_SGE_FLAGS_IOCPLB_ADDR         (0x02)
+#define MPI2_IEEE_SGE_FLAGS_IOCPLBNTA_ADDR      (0x03) /* for MPI v2.0, use in IEEE Simple Element only; for MPI v2.5, use in IEEE Simple or Chain element */
+#define MPI2_IEEE_SGE_FLAGS_SYSTEMPLBPCI_ADDR   (0x03) /* use in MPI v2.0 IEEE Chain Element only */
+#define MPI2_IEEE_SGE_FLAGS_SYSTEMPLBCPI_ADDR   (MPI2_IEEE_SGE_FLAGS_SYSTEMPLBPCI_ADDR) /* typo in name */
+
+#define MPI26_IEEE_SGE_FLAGS_IOCCTL_ADDR        (0x02) /* for MPI v2.6 only */
+
+/****************************************************************************
+*  IEEE SGE operation Macros
+****************************************************************************/
+
+/* SIMPLE FlagsLength manipulations... */
+#define MPI2_IEEE32_SGE_SET_FLAGS(f)     ((U32)(f) << MPI2_IEEE32_SGE_FLAGS_SHIFT)
+#define MPI2_IEEE32_SGE_GET_FLAGS(f)     (((f) & ~MPI2_IEEE32_SGE_LENGTH_MASK) >> MPI2_IEEE32_SGE_FLAGS_SHIFT)
+#define MPI2_IEEE32_SGE_LENGTH(f)        ((f) & MPI2_IEEE32_SGE_LENGTH_MASK)
+
+#define MPI2_IEEE32_SGE_SET_FLAGS_LENGTH(f, l)      (MPI2_IEEE32_SGE_SET_FLAGS(f) | MPI2_IEEE32_SGE_LENGTH(l))
+
+#define MPI2_IEEE32_pSGE_GET_FLAGS(psg)             MPI2_IEEE32_SGE_GET_FLAGS((psg)->FlagsLength)
+#define MPI2_IEEE32_pSGE_GET_LENGTH(psg)            MPI2_IEEE32_SGE_LENGTH((psg)->FlagsLength)
+#define MPI2_IEEE32_pSGE_SET_FLAGS_LENGTH(psg,f,l)  (psg)->FlagsLength = MPI2_IEEE32_SGE_SET_FLAGS_LENGTH(f,l)
+
+/* CAUTION - The following are READ-MODIFY-WRITE! */
+#define MPI2_IEEE32_pSGE_SET_FLAGS(psg,f)    (psg)->FlagsLength |= MPI2_IEEE32_SGE_SET_FLAGS(f)
+#define MPI2_IEEE32_pSGE_SET_LENGTH(psg,l)   (psg)->FlagsLength |= MPI2_IEEE32_SGE_LENGTH(l)
+
+
+
+/*****************************************************************************
+*
+*        Fusion-MPT MPI/IEEE Scatter Gather Unions
+*
+*****************************************************************************/
+
+typedef union _MPI2_SIMPLE_SGE_UNION
+{
+    MPI2_SGE_SIMPLE_UNION       MpiSimple;
+    MPI2_IEEE_SGE_SIMPLE_UNION  IeeeSimple;
+} MPI2_SIMPLE_SGE_UNION, MPI2_POINTER PTR_MPI2_SIMPLE_SGE_UNION,
+  Mpi2SimpleSgeUntion_t, MPI2_POINTER pMpi2SimpleSgeUntion_t;
+
+
+typedef union _MPI2_SGE_IO_UNION
+{
+    MPI2_SGE_SIMPLE_UNION       MpiSimple;
+    MPI2_SGE_CHAIN_UNION        MpiChain;
+    MPI2_IEEE_SGE_SIMPLE_UNION  IeeeSimple;
+    MPI2_IEEE_SGE_CHAIN_UNION   IeeeChain;
+} MPI2_SGE_IO_UNION, MPI2_POINTER PTR_MPI2_SGE_IO_UNION,
+  Mpi2SGEIOUnion_t, MPI2_POINTER pMpi2SGEIOUnion_t;
+
+
+/****************************************************************************
+*
+*  Values for SGLFlags field, used in many request messages with an SGL
+*
+****************************************************************************/
+
+/* values for MPI SGL Data Location Address Space subfield */
+#define MPI2_SGLFLAGS_ADDRESS_SPACE_MASK            (0x0C)
+#define MPI2_SGLFLAGS_SYSTEM_ADDRESS_SPACE          (0x00)
+#define MPI2_SGLFLAGS_IOCDDR_ADDRESS_SPACE          (0x04)
+#define MPI2_SGLFLAGS_IOCPLB_ADDRESS_SPACE          (0x08) /* only for MPI v2.5 and earlier */
+#define MPI26_SGLFLAGS_IOCPLB_ADDRESS_SPACE         (0x08) /* only for MPI v2.6 */
+#define MPI2_SGLFLAGS_IOCPLBNTA_ADDRESS_SPACE       (0x0C) /* only for MPI v2.5 and earlier */
+/* values for SGL Type subfield */
+#define MPI2_SGLFLAGS_SGL_TYPE_MASK                 (0x03)
+#define MPI2_SGLFLAGS_SGL_TYPE_MPI                  (0x00)
+#define MPI2_SGLFLAGS_SGL_TYPE_IEEE32               (0x01) /* MPI v2.0 products only */
+#define MPI2_SGLFLAGS_SGL_TYPE_IEEE64               (0x02)
+
+
+#endif
+
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h b/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h
old mode 100755
new mode 100644
index 5ae3215464c6030ab3f974f2964d01daae94d203..567b0e0a5e7d68fc27800c896979acddedc1515c
--- a/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h
@@ -1,3913 +1,3913 @@
-/*
- *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
- *
- *
- *           Name:  mpi2_cnfg.h
- *          Title:  MPI Configuration messages and pages
- *  Creation Date:  November 10, 2006
- *
- *    mpi2_cnfg.h Version:  02.00.48
- *
- *  NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
- *        prefix are for use only on MPI v2.5 products, and must not be used
- *        with MPI v2.0 products. Unless otherwise noted, names beginning with
- *        MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products.
- *
- *  Version History
- *  ---------------
- *
- *  Date      Version   Description
- *  --------  --------  ------------------------------------------------------
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  06-04-07  02.00.01  Added defines for SAS IO Unit Page 2 PhyFlags.
- *                      Added Manufacturing Page 11.
- *                      Added MPI2_SAS_EXPANDER0_FLAGS_CONNECTOR_END_DEVICE
- *                      define.
- *  06-26-07  02.00.02  Adding generic structure for product-specific
- *                      Manufacturing pages: MPI2_CONFIG_PAGE_MANUFACTURING_PS.
- *                      Rework of BIOS Page 2 configuration page.
- *                      Fixed MPI2_BIOSPAGE2_BOOT_DEVICE to be a union of the
- *                      forms.
- *                      Added configuration pages IOC Page 8 and Driver
- *                      Persistent Mapping Page 0.
- *  08-31-07  02.00.03  Modified configuration pages dealing with Integrated
- *                      RAID (Manufacturing Page 4, RAID Volume Pages 0 and 1,
- *                      RAID Physical Disk Pages 0 and 1, RAID Configuration
- *                      Page 0).
- *                      Added new value for AccessStatus field of SAS Device
- *                      Page 0 (_SATA_NEEDS_INITIALIZATION).
- *  10-31-07  02.00.04  Added missing SEPDevHandle field to
- *                      MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0.
- *  12-18-07  02.00.05  Modified IO Unit Page 0 to use 32-bit version fields for
- *                      NVDATA.
- *                      Modified IOC Page 7 to use masks and added field for
- *                      SASBroadcastPrimitiveMasks.
- *                      Added MPI2_CONFIG_PAGE_BIOS_4.
- *                      Added MPI2_CONFIG_PAGE_LOG_0.
- *  02-29-08  02.00.06  Modified various names to make them 32-character unique.
- *                      Added SAS Device IDs.
- *                      Updated Integrated RAID configuration pages including
- *                      Manufacturing Page 4, IOC Page 6, and RAID Configuration
- *                      Page 0.
- *  05-21-08  02.00.07  Added define MPI2_MANPAGE4_MIX_SSD_SAS_SATA.
- *                      Added define MPI2_MANPAGE4_PHYSDISK_128MB_COERCION.
- *                      Fixed define MPI2_IOCPAGE8_FLAGS_ENCLOSURE_SLOT_MAPPING.
- *                      Added missing MaxNumRoutedSasAddresses field to
- *                      MPI2_CONFIG_PAGE_EXPANDER_0.
- *                      Added SAS Port Page 0.
- *                      Modified structure layout for
- *                      MPI2_CONFIG_PAGE_DRIVER_MAPPING_0.
- *  06-27-08  02.00.08  Changed MPI2_CONFIG_PAGE_RD_PDISK_1 to use
- *                      MPI2_RAID_PHYS_DISK1_PATH_MAX to size the array.
- *  10-02-08  02.00.09  Changed MPI2_RAID_PGAD_CONFIGNUM_MASK from 0x0000FFFF
- *                      to 0x000000FF.
- *                      Added two new values for the Physical Disk Coercion Size
- *                      bits in the Flags field of Manufacturing Page 4.
- *                      Added product-specific Manufacturing pages 16 to 31.
- *                      Modified Flags bits for controlling write cache on SATA
- *                      drives in IO Unit Page 1.
- *                      Added new bit to AdditionalControlFlags of SAS IO Unit
- *                      Page 1 to control Invalid Topology Correction.
- *                      Added additional defines for RAID Volume Page 0
- *                      VolumeStatusFlags field.
- *                      Modified meaning of RAID Volume Page 0 VolumeSettings
- *                      define for auto-configure of hot-swap drives.
- *                      Added SupportedPhysDisks field to RAID Volume Page 1 and
- *                      added related defines.
- *                      Added PhysDiskAttributes field (and related defines) to
- *                      RAID Physical Disk Page 0.
- *                      Added MPI2_SAS_PHYINFO_PHY_VACANT define.
- *                      Added three new DiscoveryStatus bits for SAS IO Unit
- *                      Page 0 and SAS Expander Page 0.
- *                      Removed multiplexing information from SAS IO Unit pages.
- *                      Added BootDeviceWaitTime field to SAS IO Unit Page 4.
- *                      Removed Zone Address Resolved bit from PhyInfo and from
- *                      Expander Page 0 Flags field.
- *                      Added two new AccessStatus values to SAS Device Page 0
- *                      for indicating routing problems. Added 3 reserved words
- *                      to this page.
- *  01-19-09  02.00.10  Fixed defines for GPIOVal field of IO Unit Page 3.
- *                      Inserted missing reserved field into structure for IOC
- *                      Page 6.
- *                      Added more pending task bits to RAID Volume Page 0
- *                      VolumeStatusFlags defines.
- *                      Added MPI2_PHYSDISK0_STATUS_FLAG_NOT_CERTIFIED define.
- *                      Added a new DiscoveryStatus bit for SAS IO Unit Page 0
- *                      and SAS Expander Page 0 to flag a downstream initiator
- *                      when in simplified routing mode.
- *                      Removed SATA Init Failure defines for DiscoveryStatus
- *                      fields of SAS IO Unit Page 0 and SAS Expander Page 0.
- *                      Added MPI2_SAS_DEVICE0_ASTATUS_DEVICE_BLOCKED define.
- *                      Added PortGroups, DmaGroup, and ControlGroup fields to
- *                      SAS Device Page 0.
- *  05-06-09  02.00.11  Added structures and defines for IO Unit Page 5 and IO
- *                      Unit Page 6.
- *                      Added expander reduced functionality data to SAS
- *                      Expander Page 0.
- *                      Added SAS PHY Page 2 and SAS PHY Page 3.
- *  07-30-09  02.00.12  Added IO Unit Page 7.
- *                      Added new device ids.
- *                      Added SAS IO Unit Page 5.
- *                      Added partial and slumber power management capable flags
- *                      to SAS Device Page 0 Flags field.
- *                      Added PhyInfo defines for power condition.
- *                      Added Ethernet configuration pages.
- *  10-28-09  02.00.13  Added MPI2_IOUNITPAGE1_ENABLE_HOST_BASED_DISCOVERY.
- *                      Added SAS PHY Page 4 structure and defines.
- *  02-10-10  02.00.14  Modified the comments for the configuration page
- *                      structures that contain an array of data. The host
- *                      should use the "count" field in the page data (e.g. the
- *                      NumPhys field) to determine the number of valid elements
- *                      in the array.
- *                      Added/modified some MPI2_MFGPAGE_DEVID_SAS defines.
- *                      Added PowerManagementCapabilities to IO Unit Page 7.
- *                      Added PortWidthModGroup field to
- *                      MPI2_SAS_IO_UNIT5_PHY_PM_SETTINGS.
- *                      Added MPI2_CONFIG_PAGE_SASIOUNIT_6 and related defines.
- *                      Added MPI2_CONFIG_PAGE_SASIOUNIT_7 and related defines.
- *                      Added MPI2_CONFIG_PAGE_SASIOUNIT_8 and related defines.
- *  05-12-10  02.00.15  Added MPI2_RAIDVOL0_STATUS_FLAG_VOL_NOT_CONSISTENT
- *                      define.
- *                      Added MPI2_PHYSDISK0_INCOMPATIBLE_MEDIA_TYPE define.
- *                      Added MPI2_SAS_NEG_LINK_RATE_UNSUPPORTED_PHY define.
- *  08-11-10  02.00.16  Removed IO Unit Page 1 device path (multi-pathing)
- *                      defines.
- *  11-10-10  02.00.17  Added ReceptacleID field (replacing Reserved1) to
- *                      MPI2_MANPAGE7_CONNECTOR_INFO and reworked defines for
- *                      the Pinout field.
- *                      Added BoardTemperature and BoardTemperatureUnits fields
- *                      to MPI2_CONFIG_PAGE_IO_UNIT_7.
- *                      Added MPI2_CONFIG_EXTPAGETYPE_EXT_MANUFACTURING define
- *                      and MPI2_CONFIG_PAGE_EXT_MAN_PS structure.
- *  02-23-11  02.00.18  Added ProxyVF_ID field to MPI2_CONFIG_REQUEST.
- *                      Added IO Unit Page 8, IO Unit Page 9,
- *                      and IO Unit Page 10.
- *                      Added SASNotifyPrimitiveMasks field to
- *                      MPI2_CONFIG_PAGE_IOC_7.
- *  03-09-11  02.00.19  Fixed IO Unit Page 10 (to match the spec).
- *  05-25-11  02.00.20  Cleaned up a few comments.
- *  08-24-11  02.00.21  Marked the IO Unit Page 7 PowerManagementCapabilities
- *                      for PCIe link as obsolete.
- *                      Added SpinupFlags field containing a Disable Spin-up bit
- *                      to the MPI2_SAS_IOUNIT4_SPINUP_GROUP fields of SAS IO
- *                      Unit Page 4.
- *  11-18-11  02.00.22  Added define MPI2_IOCPAGE6_CAP_FLAGS_4K_SECTORS_SUPPORT.
- *                      Added UEFIVersion field to BIOS Page 1 and defined new
- *                      BiosOptions bits.
- *                      Incorporating additions for MPI v2.5.
- *  11-27-12  02.00.23  Added MPI2_MANPAGE7_FLAG_EVENTREPLAY_SLOT_ORDER.
- *                      Added MPI2_BIOSPAGE1_OPTIONS_MASK_OEM_ID.
- *  12-20-12  02.00.24  Marked MPI2_SASIOUNIT1_CONTROL_CLEAR_AFFILIATION as
- *                      obsolete for MPI v2.5 and later.
- *                      Added some defines for 12G SAS speeds.
- *  04-09-13  02.00.25  Added MPI2_IOUNITPAGE1_ATA_SECURITY_FREEZE_LOCK.
- *                      Fixed MPI2_IOUNITPAGE5_DMA_CAP_MASK_MAX_REQUESTS to
- *                      match the specification.
- *  08-19-13  02.00.26  Added reserved words to MPI2_CONFIG_PAGE_IO_UNIT_7 for
- *                      future use.
- *  12-05-13  02.00.27  Added MPI2_MANPAGE7_FLAG_BASE_ENCLOSURE_LEVEL for
- *                      MPI2_CONFIG_PAGE_MAN_7.
- *                      Added EnclosureLevel and ConnectorName fields to
- *                      MPI2_CONFIG_PAGE_SAS_DEV_0.
- *                      Added MPI2_SAS_DEVICE0_FLAGS_ENCL_LEVEL_VALID for
- *                      MPI2_CONFIG_PAGE_SAS_DEV_0.
- *                      Added EnclosureLevel field to
- *                      MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0.
- *                      Added MPI2_SAS_ENCLS0_FLAGS_ENCL_LEVEL_VALID for
- *                      MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0.
- *  01-08-14  02.00.28  Added more defines for the BiosOptions field of
- *                      MPI2_CONFIG_PAGE_BIOS_1.
- *  06-13-14  02.00.29  Added SSUTimeout field to MPI2_CONFIG_PAGE_BIOS_1, and
- *                      more defines for the BiosOptions field.
- *  11-18-14  02.00.30  Updated copyright information.
- *                      Added MPI2_BIOSPAGE1_OPTIONS_ADVANCED_CONFIG.
- *                      Added AdapterOrderAux fields to BIOS Page 3.
- *  03-16-15  02.00.31  Updated for MPI v2.6.
- *                      Added BoardPowerRequirement, PCISlotPowerAllocation, and
- *                      Flags field to IO Unit Page 7.
- *                      Added IO Unit Page 11.
- *                      Added new SAS Phy Event codes
- *                      Added PCIe configuration pages.
- *  03-19-15  02.00.32  Fixed PCIe Link Config page structure names to be
- *                      unique in first 32 characters.
- *  05-25-15  02.00.33  Added more defines for the BiosOptions field of
- *                      MPI2_CONFIG_PAGE_BIOS_1.
- *  08-25-15  02.00.34  Added PCIe Device Page 2 SGL format capability.
- *  12-18-15  02.00.35  Added SATADeviceWaitTime to SAS IO Unit Page 4.
- *  01-21-16  02.00.36  Added/modified MPI2_MFGPAGE_DEVID_SAS defines.
- *                      Added Link field to PCIe Link Pages
- *                      Added EnclosureLevel and ConnectorName to PCIe
- *                      Device Page 0.
- *                      Added define for PCIE IoUnit page 1 max rate shift.
- *                      Added comment for reserved ExtPageTypes.
- *                      Added SAS 4 22.5 gbs speed support.
- *                      Added PCIe 4 16.0 GT/sec speec support.
- *                      Removed AHCI support.
- *                      Removed SOP support.
- *                      Added NegotiatedLinkRate and NegotiatedPortWidth to
- *                      PCIe device page 0.
- *  04-10-16  02.00.37  Fixed MPI2_MFGPAGE_DEVID_SAS3616/3708 defines
- *  07-01-16  02.00.38  Added Manufacturing page 7 Connector types.
- *                      Changed declaration of ConnectorName in PCIe DevicePage0
- *                      to match SAS DevicePage 0.
- *                      Added SATADeviceWaitTime to IO Unit Page 11.
- *                      Added MPI26_MFGPAGE_DEVID_SAS4008
- *                      Added x16 PCIe width to IO Unit Page 7
- *                      Added LINKFLAGS to control SRIS in PCIe IO Unit page 1
- *                      phy data.
- *                      Added InitStatus to PCIe IO Unit Page 1 header.
- *  09-01-16  02.00.39  Added MPI26_CONFIG_PAGE_ENCLOSURE_0 and related defines.
- *                      Added MPI26_ENCLOS_PGAD_FORM_GET_NEXT_HANDLE and
- *                      MPI26_ENCLOS_PGAD_FORM_HANDLE page address formats.
- *  02-02-17  02.00.40  Added MPI2_MANPAGE7_SLOT_UNKNOWN.
- *                      Added ChassisSlot field to SAS Enclosure Page 0.
- *                      Added ChassisSlot Valid bit (bit 5) to the Flags field
- *                      in SAS Enclosure Page 0.
- *  06-13-17  02.00.41  Added MPI26_MFGPAGE_DEVID_SAS3816 and
- *                      MPI26_MFGPAGE_DEVID_SAS3916 defines.
- *                      Removed MPI26_MFGPAGE_DEVID_SAS4008 define.
- *                      Added MPI26_PCIEIOUNIT1_LINKFLAGS_SRNS_EN define.
- *                      Renamed PI26_PCIEIOUNIT1_LINKFLAGS_EN_SRIS to
- *                      PI26_PCIEIOUNIT1_LINKFLAGS_SRIS_EN.
- *                      Renamed MPI26_PCIEIOUNIT1_LINKFLAGS_DIS_SRIS to
- *                      MPI26_PCIEIOUNIT1_LINKFLAGS_DIS_SEPARATE_REFCLK.
- *  09-29-17  02.00.42  Added ControllerResetTO field to PCIe Device Page 2.
- *                      Added NOIOB field to PCIe Device Page 2.
- *                      Added MPI26_PCIEDEV2_CAP_DATA_BLK_ALIGN_AND_GRAN to
- *                      the Capabilities field of PCIe Device Page 2.
- *  07-22-18  02.00.43  Added defines for SAS3916 and SAS3816.
- *                      Added WRiteCache defines to IO Unit Page 1.
- *                      Added MaxEnclosureLevel to BIOS Page 1.
- *                      Added OEMRD to SAS Enclosure Page 1.
- *                      Added DMDReportPCIe to PCIe IO Unit Page 1.
- *                      Added Flags field and flags for Retimers to
- *                      PCIe Switch Page 1.
- *  08-02-18  02.00.44  Added Slotx2, Slotx4 to ManPage 7.
- *  08-15-18  02.00.45  Added ProductSpecific field at end of IOC Page 1
- *  08-28-18  02.00.46  Added NVMs Write Cache flag to IOUnitPage1
- *                      Added DMDReport Delay Time defines to PCIeIOUnitPage1
- *  12-17-18  02.00.47  Swap locations of Slotx2 and Slotx4 in ManPage 7.
- *  06-24-19  02.00.48  Add DEVID for PCI Switch and MPI endpoints.
- *                      Add Man7 flag for Connector Lane.
- *                      Modify NVMe WriteCache values.
- *                      Add ConfigSource to PCIeIOUnit0.
- *                      Add various PCI id info to PCIeDevice2.
- *  08-01-19  02.00.49  Add MPI26_MANPAGE7_FLAG_X2_X4_SLOT_INFO_VALID
- *                      Add MPI26_IOUNITPAGE1_NVME_WRCACHE_SHIFT
- *                      Correct def of MPI26_PCIEIOUNIT1_CF_BP_AUTO_CLOCK_ENABLE
- *  --------------------------------------------------------------------------
- */
-
-#ifndef MPI2_CNFG_H
-#define MPI2_CNFG_H
-
-/*****************************************************************************
-*   Configuration Page Header and defines
-*****************************************************************************/
-
-/* Config Page Header */
-typedef struct _MPI2_CONFIG_PAGE_HEADER
-{
-    U8                 PageVersion;                /* 0x00 */
-    U8                 PageLength;                 /* 0x01 */
-    U8                 PageNumber;                 /* 0x02 */
-    U8                 PageType;                   /* 0x03 */
-} MPI2_CONFIG_PAGE_HEADER, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_HEADER,
-  Mpi2ConfigPageHeader_t, MPI2_POINTER pMpi2ConfigPageHeader_t;
-
-typedef union _MPI2_CONFIG_PAGE_HEADER_UNION
-{
-   MPI2_CONFIG_PAGE_HEADER  Struct;
-   U8                       Bytes[4];
-   U16                      Word16[2];
-   U32                      Word32;
-} MPI2_CONFIG_PAGE_HEADER_UNION, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_HEADER_UNION,
-  Mpi2ConfigPageHeaderUnion, MPI2_POINTER pMpi2ConfigPageHeaderUnion;
-
-/* Extended Config Page Header */
-typedef struct _MPI2_CONFIG_EXTENDED_PAGE_HEADER
-{
-    U8                  PageVersion;                /* 0x00 */
-    U8                  Reserved1;                  /* 0x01 */
-    U8                  PageNumber;                 /* 0x02 */
-    U8                  PageType;                   /* 0x03 */
-    U16                 ExtPageLength;              /* 0x04 */
-    U8                  ExtPageType;                /* 0x06 */
-    U8                  Reserved2;                  /* 0x07 */
-} MPI2_CONFIG_EXTENDED_PAGE_HEADER,
-  MPI2_POINTER PTR_MPI2_CONFIG_EXTENDED_PAGE_HEADER,
-  Mpi2ConfigExtendedPageHeader_t, MPI2_POINTER pMpi2ConfigExtendedPageHeader_t;
-
-typedef union _MPI2_CONFIG_EXT_PAGE_HEADER_UNION
-{
-   MPI2_CONFIG_PAGE_HEADER          Struct;
-   MPI2_CONFIG_EXTENDED_PAGE_HEADER Ext;
-   U8                               Bytes[8];
-   U16                              Word16[4];
-   U32                              Word32[2];
-} MPI2_CONFIG_EXT_PAGE_HEADER_UNION, MPI2_POINTER PTR_MPI2_CONFIG_EXT_PAGE_HEADER_UNION,
-  Mpi2ConfigPageExtendedHeaderUnion, MPI2_POINTER pMpi2ConfigPageExtendedHeaderUnion;
-
-
-/* PageType field values */
-#define MPI2_CONFIG_PAGEATTR_READ_ONLY              (0x00)
-#define MPI2_CONFIG_PAGEATTR_CHANGEABLE             (0x10)
-#define MPI2_CONFIG_PAGEATTR_PERSISTENT             (0x20)
-#define MPI2_CONFIG_PAGEATTR_MASK                   (0xF0)
-
-#define MPI2_CONFIG_PAGETYPE_IO_UNIT                (0x00)
-#define MPI2_CONFIG_PAGETYPE_IOC                    (0x01)
-#define MPI2_CONFIG_PAGETYPE_BIOS                   (0x02)
-#define MPI2_CONFIG_PAGETYPE_RAID_VOLUME            (0x08)
-#define MPI2_CONFIG_PAGETYPE_MANUFACTURING          (0x09)
-#define MPI2_CONFIG_PAGETYPE_RAID_PHYSDISK          (0x0A)
-#define MPI2_CONFIG_PAGETYPE_EXTENDED               (0x0F)
-#define MPI2_CONFIG_PAGETYPE_MASK                   (0x0F)
-
-#define MPI2_CONFIG_TYPENUM_MASK                    (0x0FFF)
-
-
-/* ExtPageType field values */
-#define MPI2_CONFIG_EXTPAGETYPE_SAS_IO_UNIT         (0x10)
-#define MPI2_CONFIG_EXTPAGETYPE_SAS_EXPANDER        (0x11)
-#define MPI2_CONFIG_EXTPAGETYPE_SAS_DEVICE          (0x12)
-#define MPI2_CONFIG_EXTPAGETYPE_SAS_PHY             (0x13)
-#define MPI2_CONFIG_EXTPAGETYPE_LOG                 (0x14)
-#define MPI2_CONFIG_EXTPAGETYPE_ENCLOSURE           (0x15)
-#define MPI2_CONFIG_EXTPAGETYPE_RAID_CONFIG         (0x16)
-#define MPI2_CONFIG_EXTPAGETYPE_DRIVER_MAPPING      (0x17)
-#define MPI2_CONFIG_EXTPAGETYPE_SAS_PORT            (0x18)
-#define MPI2_CONFIG_EXTPAGETYPE_ETHERNET            (0x19)
-#define MPI2_CONFIG_EXTPAGETYPE_EXT_MANUFACTURING   (0x1A)
-#define MPI2_CONFIG_EXTPAGETYPE_PCIE_IO_UNIT        (0x1B) /* MPI v2.6 and later */
-#define MPI2_CONFIG_EXTPAGETYPE_PCIE_SWITCH         (0x1C) /* MPI v2.6 and later */
-#define MPI2_CONFIG_EXTPAGETYPE_PCIE_DEVICE         (0x1D) /* MPI v2.6 and later */
-#define MPI2_CONFIG_EXTPAGETYPE_PCIE_LINK           (0x1E) /* MPI v2.6 and later */
-/*  Product specific reserved values  0xE0 - 0xEF */
-/*  Vendor specific reserved values   0xF0 - 0xFF */
-
-
-/*****************************************************************************
-*   PageAddress defines
-*****************************************************************************/
-
-/* RAID Volume PageAddress format */
-#define MPI2_RAID_VOLUME_PGAD_FORM_MASK             (0xF0000000)
-#define MPI2_RAID_VOLUME_PGAD_FORM_GET_NEXT_HANDLE  (0x00000000)
-#define MPI2_RAID_VOLUME_PGAD_FORM_HANDLE           (0x10000000)
-
-#define MPI2_RAID_VOLUME_PGAD_HANDLE_MASK           (0x0000FFFF)
-
-
-/* RAID Physical Disk PageAddress format */
-#define MPI2_PHYSDISK_PGAD_FORM_MASK                    (0xF0000000)
-#define MPI2_PHYSDISK_PGAD_FORM_GET_NEXT_PHYSDISKNUM    (0x00000000)
-#define MPI2_PHYSDISK_PGAD_FORM_PHYSDISKNUM             (0x10000000)
-#define MPI2_PHYSDISK_PGAD_FORM_DEVHANDLE               (0x20000000)
-
-#define MPI2_PHYSDISK_PGAD_PHYSDISKNUM_MASK             (0x000000FF)
-#define MPI2_PHYSDISK_PGAD_DEVHANDLE_MASK               (0x0000FFFF)
-
-
-/* SAS Expander PageAddress format */
-#define MPI2_SAS_EXPAND_PGAD_FORM_MASK              (0xF0000000)
-#define MPI2_SAS_EXPAND_PGAD_FORM_GET_NEXT_HNDL     (0x00000000)
-#define MPI2_SAS_EXPAND_PGAD_FORM_HNDL_PHY_NUM      (0x10000000)
-#define MPI2_SAS_EXPAND_PGAD_FORM_HNDL              (0x20000000)
-
-#define MPI2_SAS_EXPAND_PGAD_HANDLE_MASK            (0x0000FFFF)
-#define MPI2_SAS_EXPAND_PGAD_PHYNUM_MASK            (0x00FF0000)
-#define MPI2_SAS_EXPAND_PGAD_PHYNUM_SHIFT           (16)
-
-
-/* SAS Device PageAddress format */
-#define MPI2_SAS_DEVICE_PGAD_FORM_MASK              (0xF0000000)
-#define MPI2_SAS_DEVICE_PGAD_FORM_GET_NEXT_HANDLE   (0x00000000)
-#define MPI2_SAS_DEVICE_PGAD_FORM_HANDLE            (0x20000000)
-
-#define MPI2_SAS_DEVICE_PGAD_HANDLE_MASK            (0x0000FFFF)
-
-
-/* SAS PHY PageAddress format */
-#define MPI2_SAS_PHY_PGAD_FORM_MASK                 (0xF0000000)
-#define MPI2_SAS_PHY_PGAD_FORM_PHY_NUMBER           (0x00000000)
-#define MPI2_SAS_PHY_PGAD_FORM_PHY_TBL_INDEX        (0x10000000)
-
-#define MPI2_SAS_PHY_PGAD_PHY_NUMBER_MASK           (0x000000FF)
-#define MPI2_SAS_PHY_PGAD_PHY_TBL_INDEX_MASK        (0x0000FFFF)
-
-
-/* SAS Port PageAddress format */
-#define MPI2_SASPORT_PGAD_FORM_MASK                 (0xF0000000)
-#define MPI2_SASPORT_PGAD_FORM_GET_NEXT_PORT        (0x00000000)
-#define MPI2_SASPORT_PGAD_FORM_PORT_NUM             (0x10000000)
-
-#define MPI2_SASPORT_PGAD_PORTNUMBER_MASK           (0x00000FFF)
-
-
-/* SAS Enclosure PageAddress format */
-#define MPI2_SAS_ENCLOS_PGAD_FORM_MASK              (0xF0000000)
-#define MPI2_SAS_ENCLOS_PGAD_FORM_GET_NEXT_HANDLE   (0x00000000)
-#define MPI2_SAS_ENCLOS_PGAD_FORM_HANDLE            (0x10000000)
-
-#define MPI2_SAS_ENCLOS_PGAD_HANDLE_MASK            (0x0000FFFF)
-
-/* Enclosure PageAddress format */
-#define MPI26_ENCLOS_PGAD_FORM_MASK                 (0xF0000000)
-#define MPI26_ENCLOS_PGAD_FORM_GET_NEXT_HANDLE      (0x00000000)
-#define MPI26_ENCLOS_PGAD_FORM_HANDLE               (0x10000000)
-
-#define MPI26_ENCLOS_PGAD_HANDLE_MASK               (0x0000FFFF)
-
-/* RAID Configuration PageAddress format */
-#define MPI2_RAID_PGAD_FORM_MASK                    (0xF0000000)
-#define MPI2_RAID_PGAD_FORM_GET_NEXT_CONFIGNUM      (0x00000000)
-#define MPI2_RAID_PGAD_FORM_CONFIGNUM               (0x10000000)
-#define MPI2_RAID_PGAD_FORM_ACTIVE_CONFIG           (0x20000000)
-
-#define MPI2_RAID_PGAD_CONFIGNUM_MASK               (0x000000FF)
-
-
-/* Driver Persistent Mapping PageAddress format */
-#define MPI2_DPM_PGAD_FORM_MASK                     (0xF0000000)
-#define MPI2_DPM_PGAD_FORM_ENTRY_RANGE              (0x00000000)
-
-#define MPI2_DPM_PGAD_ENTRY_COUNT_MASK              (0x0FFF0000)
-#define MPI2_DPM_PGAD_ENTRY_COUNT_SHIFT             (16)
-#define MPI2_DPM_PGAD_START_ENTRY_MASK              (0x0000FFFF)
-
-
-/* Ethernet PageAddress format */
-#define MPI2_ETHERNET_PGAD_FORM_MASK                (0xF0000000)
-#define MPI2_ETHERNET_PGAD_FORM_IF_NUM              (0x00000000)
-
-#define MPI2_ETHERNET_PGAD_IF_NUMBER_MASK           (0x000000FF)
-
-
-/* PCIe Switch PageAddress format */
-#define MPI26_PCIE_SWITCH_PGAD_FORM_MASK            (0xF0000000)
-#define MPI26_PCIE_SWITCH_PGAD_FORM_GET_NEXT_HNDL   (0x00000000)
-#define MPI26_PCIE_SWITCH_PGAD_FORM_HNDL_PORTNUM    (0x10000000)
-#define MPI26_PCIE_SWITCH_EXPAND_PGAD_FORM_HNDL     (0x20000000)
-
-#define MPI26_PCIE_SWITCH_PGAD_HANDLE_MASK          (0x0000FFFF)
-#define MPI26_PCIE_SWITCH_PGAD_PORTNUM_MASK         (0x00FF0000)
-#define MPI26_PCIE_SWITCH_PGAD_PORTNUM_SHIFT        (16)
-
-
-/* PCIe Device PageAddress format */
-#define MPI26_PCIE_DEVICE_PGAD_FORM_MASK            (0xF0000000)
-#define MPI26_PCIE_DEVICE_PGAD_FORM_GET_NEXT_HANDLE (0x00000000)
-#define MPI26_PCIE_DEVICE_PGAD_FORM_HANDLE          (0x20000000)
-
-#define MPI26_PCIE_DEVICE_PGAD_HANDLE_MASK          (0x0000FFFF)
-
-/* PCIe Link PageAddress format */
-#define MPI26_PCIE_LINK_PGAD_FORM_MASK            (0xF0000000)
-#define MPI26_PCIE_LINK_PGAD_FORM_GET_NEXT_LINK   (0x00000000)
-#define MPI26_PCIE_LINK_PGAD_FORM_LINK_NUM        (0x10000000)
-
-#define MPI26_PCIE_DEVICE_PGAD_LINKNUM_MASK       (0x000000FF)
-
-
-
-/****************************************************************************
-*   Configuration messages
-****************************************************************************/
-
-/* Configuration Request Message */
-typedef struct _MPI2_CONFIG_REQUEST
-{
-    U8                      Action;                     /* 0x00 */
-    U8                      SGLFlags;                   /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     ExtPageLength;              /* 0x04 */
-    U8                      ExtPageType;                /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved1;                  /* 0x0A */
-    U8                      Reserved2;                  /* 0x0C */
-    U8                      ProxyVF_ID;                 /* 0x0D */
-    U16                     Reserved4;                  /* 0x0E */
-    U32                     Reserved3;                  /* 0x10 */
-    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x14 */
-    U32                     PageAddress;                /* 0x18 */
-    MPI2_SGE_IO_UNION       PageBufferSGE;              /* 0x1C */
-} MPI2_CONFIG_REQUEST, MPI2_POINTER PTR_MPI2_CONFIG_REQUEST,
-  Mpi2ConfigRequest_t, MPI2_POINTER pMpi2ConfigRequest_t;
-
-/* values for the Action field */
-#define MPI2_CONFIG_ACTION_PAGE_HEADER              (0x00)
-#define MPI2_CONFIG_ACTION_PAGE_READ_CURRENT        (0x01)
-#define MPI2_CONFIG_ACTION_PAGE_WRITE_CURRENT       (0x02)
-#define MPI2_CONFIG_ACTION_PAGE_DEFAULT             (0x03)
-#define MPI2_CONFIG_ACTION_PAGE_WRITE_NVRAM         (0x04)
-#define MPI2_CONFIG_ACTION_PAGE_READ_DEFAULT        (0x05)
-#define MPI2_CONFIG_ACTION_PAGE_READ_NVRAM          (0x06)
-#define MPI2_CONFIG_ACTION_PAGE_GET_CHANGEABLE      (0x07)
-
-/* use MPI2_SGLFLAGS_ defines from mpi2.h for the SGLFlags field */
-
-
-/* Config Reply Message */
-typedef struct _MPI2_CONFIG_REPLY
-{
-    U8                      Action;                     /* 0x00 */
-    U8                      SGLFlags;                   /* 0x01 */
-    U8                      MsgLength;                  /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     ExtPageLength;              /* 0x04 */
-    U8                      ExtPageType;                /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved1;                  /* 0x0A */
-    U16                     Reserved2;                  /* 0x0C */
-    U16                     IOCStatus;                  /* 0x0E */
-    U32                     IOCLogInfo;                 /* 0x10 */
-    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x14 */
-} MPI2_CONFIG_REPLY, MPI2_POINTER PTR_MPI2_CONFIG_REPLY,
-  Mpi2ConfigReply_t, MPI2_POINTER pMpi2ConfigReply_t;
-
-
-
-/*****************************************************************************
-*
-*               C o n f i g u r a t i o n    P a g e s
-*
-*****************************************************************************/
-
-/****************************************************************************
-*   Manufacturing Config pages
-****************************************************************************/
-
-#define MPI2_MFGPAGE_VENDORID_LSI                   (0x1000)
-
-/* MPI v2.0 SAS products */
-#define MPI2_MFGPAGE_DEVID_SAS2004                  (0x0070)
-#define MPI2_MFGPAGE_DEVID_SAS2008                  (0x0072)
-#define MPI2_MFGPAGE_DEVID_SAS2108_1                (0x0074)
-#define MPI2_MFGPAGE_DEVID_SAS2108_2                (0x0076)
-#define MPI2_MFGPAGE_DEVID_SAS2108_3                (0x0077)
-#define MPI2_MFGPAGE_DEVID_SAS2116_1                (0x0064)
-#define MPI2_MFGPAGE_DEVID_SAS2116_2                (0x0065)
-
-#define MPI2_MFGPAGE_DEVID_SSS6200                  (0x007E)
-
-#define MPI2_MFGPAGE_DEVID_SAS2208_1                (0x0080)
-#define MPI2_MFGPAGE_DEVID_SAS2208_2                (0x0081)
-#define MPI2_MFGPAGE_DEVID_SAS2208_3                (0x0082)
-#define MPI2_MFGPAGE_DEVID_SAS2208_4                (0x0083)
-#define MPI2_MFGPAGE_DEVID_SAS2208_5                (0x0084)
-#define MPI2_MFGPAGE_DEVID_SAS2208_6                (0x0085)
-#define MPI2_MFGPAGE_DEVID_SAS2308_1                (0x0086)
-#define MPI2_MFGPAGE_DEVID_SAS2308_2                (0x0087)
-#define MPI2_MFGPAGE_DEVID_SAS2308_3                (0x006E)
-
-/* MPI v2.5 SAS products */
-#define MPI25_MFGPAGE_DEVID_SAS3004                 (0x0096)
-#define MPI25_MFGPAGE_DEVID_SAS3008                 (0x0097)
-#define MPI25_MFGPAGE_DEVID_SAS3108_1               (0x0090)
-#define MPI25_MFGPAGE_DEVID_SAS3108_2               (0x0091)
-#define MPI25_MFGPAGE_DEVID_SAS3108_5               (0x0094)
-#define MPI25_MFGPAGE_DEVID_SAS3108_6               (0x0095)
-
-/* MPI v2.6 SAS Products */
-#define MPI26_MFGPAGE_DEVID_SAS3216                 (0x00C9)
-#define MPI26_MFGPAGE_DEVID_SAS3224                 (0x00C4)
-#define MPI26_MFGPAGE_DEVID_SAS3316_1               (0x00C5)
-#define MPI26_MFGPAGE_DEVID_SAS3316_2               (0x00C6)
-#define MPI26_MFGPAGE_DEVID_SAS3316_3               (0x00C7)
-#define MPI26_MFGPAGE_DEVID_SAS3316_4               (0x00C8)
-#define MPI26_MFGPAGE_DEVID_SAS3324_1               (0x00C0)
-#define MPI26_MFGPAGE_DEVID_SAS3324_2               (0x00C1)
-#define MPI26_MFGPAGE_DEVID_SAS3324_3               (0x00C2)
-#define MPI26_MFGPAGE_DEVID_SAS3324_4               (0x00C3)
-
-#define MPI26_MFGPAGE_DEVID_SAS3516                 (0x00AA)
-#define MPI26_MFGPAGE_DEVID_SAS3516_1               (0x00AB)
-#define MPI26_MFGPAGE_DEVID_SAS3416                 (0x00AC)
-#define MPI26_MFGPAGE_DEVID_SAS3508                 (0x00AD)
-#define MPI26_MFGPAGE_DEVID_SAS3508_1               (0x00AE)
-#define MPI26_MFGPAGE_DEVID_SAS3408                 (0x00AF)
-
-#define MPI26_MFGPAGE_DEVID_PEX88000                (0x00B2)
-
-#define MPI26_MFGPAGE_DEVID_SAS3716                 (0x00D0)
-#define MPI26_MFGPAGE_DEVID_SAS3616                 (0x00D1)
-#define MPI26_MFGPAGE_DEVID_SAS3708                 (0x00D2)
-
-#define MPI26_MFGPAGE_DEVID_SEC_MASK_3916           (0x0003)
-#define MPI26_MFGPAGE_DEVID_INVALID0_3916           (0x00E0)
-#define MPI26_MFGPAGE_DEVID_CFG_SEC_3916            (0x00E1)
-#define MPI26_MFGPAGE_DEVID_HARD_SEC_3916           (0x00E2)
-#define MPI26_MFGPAGE_DEVID_INVALID1_3916           (0x00E3)
-
-#define MPI26_MFGPAGE_DEVID_SEC_MASK_3816           (0x0003)
-#define MPI26_MFGPAGE_DEVID_INVALID0_3816           (0x00E4)
-#define MPI26_MFGPAGE_DEVID_CFG_SEC_3816            (0x00E5)
-#define MPI26_MFGPAGE_DEVID_HARD_SEC_3816           (0x00E6)
-#define MPI26_MFGPAGE_DEVID_INVALID1_3816           (0x00E7)
-
-#define MPI26_MFGPAGE_DEVID_SWCH_MPI_EP             (0x02B0)
-#define MPI26_MFGPAGE_DEVID_SWCH_MPI_EP_1           (0x02B1)
-
-
-/* Manufacturing Page 0 */
-
-typedef struct _MPI2_CONFIG_PAGE_MAN_0
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
-    U8                      ChipName[16];               /* 0x04 */
-    U8                      ChipRevision[8];            /* 0x14 */
-    U8                      BoardName[16];              /* 0x1C */
-    U8                      BoardAssembly[16];          /* 0x2C */
-    U8                      BoardTracerNumber[16];      /* 0x3C */
-} MPI2_CONFIG_PAGE_MAN_0,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_0,
-  Mpi2ManufacturingPage0_t, MPI2_POINTER pMpi2ManufacturingPage0_t;
-
-#define MPI2_MANUFACTURING0_PAGEVERSION                (0x00)
-
-
-/* Manufacturing Page 1 */
-
-typedef struct _MPI2_CONFIG_PAGE_MAN_1
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
-    U8                      VPD[256];                   /* 0x04 */
-} MPI2_CONFIG_PAGE_MAN_1,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_1,
-  Mpi2ManufacturingPage1_t, MPI2_POINTER pMpi2ManufacturingPage1_t;
-
-#define MPI2_MANUFACTURING1_PAGEVERSION                (0x00)
-
-
-typedef struct _MPI2_CHIP_REVISION_ID
-{
-    U16 DeviceID;                                       /* 0x00 */
-    U8  PCIRevisionID;                                  /* 0x02 */
-    U8  Reserved;                                       /* 0x03 */
-} MPI2_CHIP_REVISION_ID, MPI2_POINTER PTR_MPI2_CHIP_REVISION_ID,
-  Mpi2ChipRevisionId_t, MPI2_POINTER pMpi2ChipRevisionId_t;
-
-
-/* Manufacturing Page 2 */
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check Header.PageLength at runtime.
- */
-#ifndef MPI2_MAN_PAGE_2_HW_SETTINGS_WORDS
-#define MPI2_MAN_PAGE_2_HW_SETTINGS_WORDS   (1)
-#endif
-
-typedef struct _MPI2_CONFIG_PAGE_MAN_2
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
-    MPI2_CHIP_REVISION_ID   ChipId;                     /* 0x04 */
-    U32                     HwSettings[MPI2_MAN_PAGE_2_HW_SETTINGS_WORDS];/* 0x08 */
-} MPI2_CONFIG_PAGE_MAN_2,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_2,
-  Mpi2ManufacturingPage2_t, MPI2_POINTER pMpi2ManufacturingPage2_t;
-
-#define MPI2_MANUFACTURING2_PAGEVERSION                 (0x00)
-
-
-/* Manufacturing Page 3 */
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check Header.PageLength at runtime.
- */
-#ifndef MPI2_MAN_PAGE_3_INFO_WORDS
-#define MPI2_MAN_PAGE_3_INFO_WORDS          (1)
-#endif
-
-typedef struct _MPI2_CONFIG_PAGE_MAN_3
-{
-    MPI2_CONFIG_PAGE_HEADER             Header;         /* 0x00 */
-    MPI2_CHIP_REVISION_ID               ChipId;         /* 0x04 */
-    U32                                 Info[MPI2_MAN_PAGE_3_INFO_WORDS];/* 0x08 */
-} MPI2_CONFIG_PAGE_MAN_3,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_3,
-  Mpi2ManufacturingPage3_t, MPI2_POINTER pMpi2ManufacturingPage3_t;
-
-#define MPI2_MANUFACTURING3_PAGEVERSION                 (0x00)
-
-
-/* Manufacturing Page 4 */
-
-typedef struct _MPI2_MANPAGE4_PWR_SAVE_SETTINGS
-{
-    U8                          PowerSaveFlags;                 /* 0x00 */
-    U8                          InternalOperationsSleepTime;    /* 0x01 */
-    U8                          InternalOperationsRunTime;      /* 0x02 */
-    U8                          HostIdleTime;                   /* 0x03 */
-} MPI2_MANPAGE4_PWR_SAVE_SETTINGS,
-  MPI2_POINTER PTR_MPI2_MANPAGE4_PWR_SAVE_SETTINGS,
-  Mpi2ManPage4PwrSaveSettings_t, MPI2_POINTER pMpi2ManPage4PwrSaveSettings_t;
-
-/* defines for the PowerSaveFlags field */
-#define MPI2_MANPAGE4_MASK_POWERSAVE_MODE               (0x03)
-#define MPI2_MANPAGE4_POWERSAVE_MODE_DISABLED           (0x00)
-#define MPI2_MANPAGE4_CUSTOM_POWERSAVE_MODE             (0x01)
-#define MPI2_MANPAGE4_FULL_POWERSAVE_MODE               (0x02)
-
-typedef struct _MPI2_CONFIG_PAGE_MAN_4
-{
-    MPI2_CONFIG_PAGE_HEADER             Header;                 /* 0x00 */
-    U32                                 Reserved1;              /* 0x04 */
-    U32                                 Flags;                  /* 0x08 */
-    U8                                  InquirySize;            /* 0x0C */
-    U8                                  Reserved2;              /* 0x0D */
-    U16                                 Reserved3;              /* 0x0E */
-    U8                                  InquiryData[56];        /* 0x10 */
-    U32                                 RAID0VolumeSettings;    /* 0x48 */
-    U32                                 RAID1EVolumeSettings;   /* 0x4C */
-    U32                                 RAID1VolumeSettings;    /* 0x50 */
-    U32                                 RAID10VolumeSettings;   /* 0x54 */
-    U32                                 Reserved4;              /* 0x58 */
-    U32                                 Reserved5;              /* 0x5C */
-    MPI2_MANPAGE4_PWR_SAVE_SETTINGS     PowerSaveSettings;      /* 0x60 */
-    U8                                  MaxOCEDisks;            /* 0x64 */
-    U8                                  ResyncRate;             /* 0x65 */
-    U16                                 DataScrubDuration;      /* 0x66 */
-    U8                                  MaxHotSpares;           /* 0x68 */
-    U8                                  MaxPhysDisksPerVol;     /* 0x69 */
-    U8                                  MaxPhysDisks;           /* 0x6A */
-    U8                                  MaxVolumes;             /* 0x6B */
-} MPI2_CONFIG_PAGE_MAN_4,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_4,
-  Mpi2ManufacturingPage4_t, MPI2_POINTER pMpi2ManufacturingPage4_t;
-
-#define MPI2_MANUFACTURING4_PAGEVERSION                 (0x0A)
-
-/* Manufacturing Page 4 Flags field */
-#define MPI2_MANPAGE4_METADATA_SIZE_MASK                (0x00030000)
-#define MPI2_MANPAGE4_METADATA_512MB                    (0x00000000)
-
-#define MPI2_MANPAGE4_MIX_SSD_SAS_SATA                  (0x00008000)
-#define MPI2_MANPAGE4_MIX_SSD_AND_NON_SSD               (0x00004000)
-#define MPI2_MANPAGE4_HIDE_PHYSDISK_NON_IR              (0x00002000)
-
-#define MPI2_MANPAGE4_MASK_PHYSDISK_COERCION            (0x00001C00)
-#define MPI2_MANPAGE4_PHYSDISK_COERCION_1GB             (0x00000000)
-#define MPI2_MANPAGE4_PHYSDISK_128MB_COERCION           (0x00000400)
-#define MPI2_MANPAGE4_PHYSDISK_ADAPTIVE_COERCION        (0x00000800)
-#define MPI2_MANPAGE4_PHYSDISK_ZERO_COERCION            (0x00000C00)
-
-#define MPI2_MANPAGE4_MASK_BAD_BLOCK_MARKING            (0x00000300)
-#define MPI2_MANPAGE4_DEFAULT_BAD_BLOCK_MARKING         (0x00000000)
-#define MPI2_MANPAGE4_TABLE_BAD_BLOCK_MARKING           (0x00000100)
-#define MPI2_MANPAGE4_WRITE_LONG_BAD_BLOCK_MARKING      (0x00000200)
-
-#define MPI2_MANPAGE4_FORCE_OFFLINE_FAILOVER            (0x00000080)
-#define MPI2_MANPAGE4_RAID10_DISABLE                    (0x00000040)
-#define MPI2_MANPAGE4_RAID1E_DISABLE                    (0x00000020)
-#define MPI2_MANPAGE4_RAID1_DISABLE                     (0x00000010)
-#define MPI2_MANPAGE4_RAID0_DISABLE                     (0x00000008)
-#define MPI2_MANPAGE4_IR_MODEPAGE8_DISABLE              (0x00000004)
-#define MPI2_MANPAGE4_IM_RESYNC_CACHE_ENABLE            (0x00000002)
-#define MPI2_MANPAGE4_IR_NO_MIX_SAS_SATA                (0x00000001)
-
-
-/* Manufacturing Page 5 */
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumPhys at runtime.
- */
-#ifndef MPI2_MAN_PAGE_5_PHY_ENTRIES
-#define MPI2_MAN_PAGE_5_PHY_ENTRIES         (1)
-#endif
-
-typedef struct _MPI2_MANUFACTURING5_ENTRY
-{
-    U64                                 WWID;           /* 0x00 */
-    U64                                 DeviceName;     /* 0x08 */
-} MPI2_MANUFACTURING5_ENTRY, MPI2_POINTER PTR_MPI2_MANUFACTURING5_ENTRY,
-  Mpi2Manufacturing5Entry_t, MPI2_POINTER pMpi2Manufacturing5Entry_t;
-
-typedef struct _MPI2_CONFIG_PAGE_MAN_5
-{
-    MPI2_CONFIG_PAGE_HEADER             Header;         /* 0x00 */
-    U8                                  NumPhys;        /* 0x04 */
-    U8                                  Reserved1;      /* 0x05 */
-    U16                                 Reserved2;      /* 0x06 */
-    U32                                 Reserved3;      /* 0x08 */
-    U32                                 Reserved4;      /* 0x0C */
-    MPI2_MANUFACTURING5_ENTRY           Phy[MPI2_MAN_PAGE_5_PHY_ENTRIES];/* 0x08 */
-} MPI2_CONFIG_PAGE_MAN_5,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_5,
-  Mpi2ManufacturingPage5_t, MPI2_POINTER pMpi2ManufacturingPage5_t;
-
-#define MPI2_MANUFACTURING5_PAGEVERSION                 (0x03)
-
-
-/* Manufacturing Page 6 */
-
-typedef struct _MPI2_CONFIG_PAGE_MAN_6
-{
-    MPI2_CONFIG_PAGE_HEADER         Header;             /* 0x00 */
-    U32                             ProductSpecificInfo;/* 0x04 */
-} MPI2_CONFIG_PAGE_MAN_6,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_6,
-  Mpi2ManufacturingPage6_t, MPI2_POINTER pMpi2ManufacturingPage6_t;
-
-#define MPI2_MANUFACTURING6_PAGEVERSION                 (0x00)
-
-
-/* Manufacturing Page 7 */
-
-typedef struct _MPI2_MANPAGE7_CONNECTOR_INFO
-{
-    U32                         Pinout;                 /* 0x00 */
-    U8                          Connector[16];          /* 0x04 */
-    U8                          Location;               /* 0x14 */
-    U8                          ReceptacleID;           /* 0x15 */
-    U16                         Slot;                   /* 0x16 */
-    U16                         Slotx2;                 /* 0x18 */
-    U16                         Slotx4;                 /* 0x1A */
-} MPI2_MANPAGE7_CONNECTOR_INFO, MPI2_POINTER PTR_MPI2_MANPAGE7_CONNECTOR_INFO,
-  Mpi2ManPage7ConnectorInfo_t, MPI2_POINTER pMpi2ManPage7ConnectorInfo_t;
-
-/* defines for the Pinout field */
-#define MPI2_MANPAGE7_PINOUT_LANE_MASK                  (0x0000FF00)
-#define MPI2_MANPAGE7_PINOUT_LANE_SHIFT                 (8)
-
-#define MPI2_MANPAGE7_PINOUT_TYPE_MASK                  (0x000000FF)
-#define MPI2_MANPAGE7_PINOUT_TYPE_UNKNOWN               (0x00)
-#define MPI2_MANPAGE7_PINOUT_SATA_SINGLE                (0x01)
-#define MPI2_MANPAGE7_PINOUT_SFF_8482                   (0x02)
-#define MPI2_MANPAGE7_PINOUT_SFF_8486                   (0x03)
-#define MPI2_MANPAGE7_PINOUT_SFF_8484                   (0x04)
-#define MPI2_MANPAGE7_PINOUT_SFF_8087                   (0x05)
-#define MPI2_MANPAGE7_PINOUT_SFF_8643_4I                (0x06)
-#define MPI2_MANPAGE7_PINOUT_SFF_8643_8I                (0x07)
-#define MPI2_MANPAGE7_PINOUT_SFF_8470                   (0x08)
-#define MPI2_MANPAGE7_PINOUT_SFF_8088                   (0x09)
-#define MPI2_MANPAGE7_PINOUT_SFF_8644_4X                (0x0A)
-#define MPI2_MANPAGE7_PINOUT_SFF_8644_8X                (0x0B)
-#define MPI2_MANPAGE7_PINOUT_SFF_8644_16X               (0x0C)
-#define MPI2_MANPAGE7_PINOUT_SFF_8436                   (0x0D)
-#define MPI2_MANPAGE7_PINOUT_SFF_8088_A                 (0x0E)
-#define MPI2_MANPAGE7_PINOUT_SFF_8643_16i               (0x0F)
-#define MPI2_MANPAGE7_PINOUT_SFF_8654_4i                (0x10)
-#define MPI2_MANPAGE7_PINOUT_SFF_8654_8i                (0x11)
-#define MPI2_MANPAGE7_PINOUT_SFF_8611_4i                (0x12)
-#define MPI2_MANPAGE7_PINOUT_SFF_8611_8i                (0x13)
-
-/* defines for the Location field */
-#define MPI2_MANPAGE7_LOCATION_UNKNOWN                  (0x01)
-#define MPI2_MANPAGE7_LOCATION_INTERNAL                 (0x02)
-#define MPI2_MANPAGE7_LOCATION_EXTERNAL                 (0x04)
-#define MPI2_MANPAGE7_LOCATION_SWITCHABLE               (0x08)
-#define MPI2_MANPAGE7_LOCATION_AUTO                     (0x10)
-#define MPI2_MANPAGE7_LOCATION_NOT_PRESENT              (0x20)
-#define MPI2_MANPAGE7_LOCATION_NOT_CONNECTED            (0x80)
-
-/* defines for the Slot field */
-#define MPI2_MANPAGE7_SLOT_UNKNOWN                      (0xFFFF)
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumPhys at runtime.
- */
-#ifndef MPI2_MANPAGE7_CONNECTOR_INFO_MAX
-#define MPI2_MANPAGE7_CONNECTOR_INFO_MAX  (1)
-#endif
-
-typedef struct _MPI2_CONFIG_PAGE_MAN_7
-{
-    MPI2_CONFIG_PAGE_HEADER         Header;             /* 0x00 */
-    U32                             Reserved1;          /* 0x04 */
-    U32                             Reserved2;          /* 0x08 */
-    U32                             Flags;              /* 0x0C */
-    U8                              EnclosureName[16];  /* 0x10 */
-    U8                              NumPhys;            /* 0x20 */
-    U8                              Reserved3;          /* 0x21 */
-    U16                             Reserved4;          /* 0x22 */
-    MPI2_MANPAGE7_CONNECTOR_INFO    ConnectorInfo[MPI2_MANPAGE7_CONNECTOR_INFO_MAX]; /* 0x24 */
-} MPI2_CONFIG_PAGE_MAN_7,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_7,
-  Mpi2ManufacturingPage7_t, MPI2_POINTER pMpi2ManufacturingPage7_t;
-
-#define MPI2_MANUFACTURING7_PAGEVERSION                 (0x01)
-
-/* defines for the Flags field */
-#define MPI2_MANPAGE7_FLAG_BASE_ENCLOSURE_LEVEL         (0x00000008)
-#define MPI2_MANPAGE7_FLAG_EVENTREPLAY_SLOT_ORDER       (0x00000002)
-#define MPI2_MANPAGE7_FLAG_USE_SLOT_INFO                (0x00000001)
-
-#define MPI26_MANPAGE7_FLAG_CONN_LANE_USE_PINOUT        (0x00000020)
-#define MPI26_MANPAGE7_FLAG_X2_X4_SLOT_INFO_VALID       (0x00000010)
-
-/*
- * Generic structure to use for product-specific manufacturing pages
- * (currently Manufacturing Page 8 through Manufacturing Page 31).
- */
-
-typedef struct _MPI2_CONFIG_PAGE_MAN_PS
-{
-    MPI2_CONFIG_PAGE_HEADER         Header;             /* 0x00 */
-    U32                             ProductSpecificInfo;/* 0x04 */
-} MPI2_CONFIG_PAGE_MAN_PS,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_PS,
-  Mpi2ManufacturingPagePS_t, MPI2_POINTER pMpi2ManufacturingPagePS_t;
-
-#define MPI2_MANUFACTURING8_PAGEVERSION                 (0x00)
-#define MPI2_MANUFACTURING9_PAGEVERSION                 (0x00)
-#define MPI2_MANUFACTURING10_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING11_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING12_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING13_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING14_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING15_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING16_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING17_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING18_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING19_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING20_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING21_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING22_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING23_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING24_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING25_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING26_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING27_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING28_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING29_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING30_PAGEVERSION                (0x00)
-#define MPI2_MANUFACTURING31_PAGEVERSION                (0x00)
-
-
-/****************************************************************************
-*   IO Unit Config Pages
-****************************************************************************/
-
-/* IO Unit Page 0 */
-
-typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_0
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
-    U64                     UniqueValue;                /* 0x04 */
-    MPI2_VERSION_UNION      NvdataVersionDefault;       /* 0x08 */
-    MPI2_VERSION_UNION      NvdataVersionPersistent;    /* 0x0A */
-} MPI2_CONFIG_PAGE_IO_UNIT_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_0,
-  Mpi2IOUnitPage0_t, MPI2_POINTER pMpi2IOUnitPage0_t;
-
-#define MPI2_IOUNITPAGE0_PAGEVERSION                    (0x02)
-
-
-/* IO Unit Page 1 */
-
-typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_1
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
-    U32                     Flags;                      /* 0x04 */
-} MPI2_CONFIG_PAGE_IO_UNIT_1, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_1,
-  Mpi2IOUnitPage1_t, MPI2_POINTER pMpi2IOUnitPage1_t;
-
-#define MPI2_IOUNITPAGE1_PAGEVERSION                    (0x04)
-
-/* IO Unit Page 1 Flags defines */
-#define MPI26_IOUNITPAGE1_NVME_WRCACHE_MASK             (0x00030000)
-#define MPI26_IOUNITPAGE1_NVME_WRCACHE_SHIFT            (16)
-#define MPI26_IOUNITPAGE1_NVME_WRCACHE_NO_CHANGE        (0x00000000)
-#define MPI26_IOUNITPAGE1_NVME_WRCACHE_ENABLE           (0x00010000)
-#define MPI26_IOUNITPAGE1_NVME_WRCACHE_DISABLE          (0x00020000)
-#define MPI2_IOUNITPAGE1_ATA_SECURITY_FREEZE_LOCK       (0x00004000)
-#define MPI25_IOUNITPAGE1_NEW_DEVICE_FAST_PATH_DISABLE  (0x00002000)
-#define MPI25_IOUNITPAGE1_DISABLE_FAST_PATH             (0x00001000)
-#define MPI2_IOUNITPAGE1_ENABLE_HOST_BASED_DISCOVERY    (0x00000800)
-#define MPI2_IOUNITPAGE1_MASK_SATA_WRITE_CACHE          (0x00000600)
-#define MPI2_IOUNITPAGE1_SATA_WRITE_CACHE_SHIFT         (9)
-#define MPI2_IOUNITPAGE1_ENABLE_SATA_WRITE_CACHE        (0x00000000)
-#define MPI2_IOUNITPAGE1_DISABLE_SATA_WRITE_CACHE       (0x00000200)
-#define MPI2_IOUNITPAGE1_UNCHANGED_SATA_WRITE_CACHE     (0x00000400)
-#define MPI2_IOUNITPAGE1_NATIVE_COMMAND_Q_DISABLE       (0x00000100)
-#define MPI2_IOUNITPAGE1_DISABLE_IR                     (0x00000040)
-#define MPI2_IOUNITPAGE1_DISABLE_TASK_SET_FULL_HANDLING (0x00000020)
-#define MPI2_IOUNITPAGE1_IR_USE_STATIC_VOLUME_ID        (0x00000004)
-
-
-/* IO Unit Page 3 */
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for GPIOCount at runtime.
- */
-#ifndef MPI2_IO_UNIT_PAGE_3_GPIO_VAL_MAX
-#define MPI2_IO_UNIT_PAGE_3_GPIO_VAL_MAX    (1)
-#endif
-
-typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_3
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                                   /* 0x00 */
-    U8                      GPIOCount;                                /* 0x04 */
-    U8                      Reserved1;                                /* 0x05 */
-    U16                     Reserved2;                                /* 0x06 */
-    U16                     GPIOVal[MPI2_IO_UNIT_PAGE_3_GPIO_VAL_MAX];/* 0x08 */
-} MPI2_CONFIG_PAGE_IO_UNIT_3, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_3,
-  Mpi2IOUnitPage3_t, MPI2_POINTER pMpi2IOUnitPage3_t;
-
-#define MPI2_IOUNITPAGE3_PAGEVERSION                    (0x01)
-
-/* defines for IO Unit Page 3 GPIOVal field */
-#define MPI2_IOUNITPAGE3_GPIO_FUNCTION_MASK             (0xFFFC)
-#define MPI2_IOUNITPAGE3_GPIO_FUNCTION_SHIFT            (2)
-#define MPI2_IOUNITPAGE3_GPIO_SETTING_OFF               (0x0000)
-#define MPI2_IOUNITPAGE3_GPIO_SETTING_ON                (0x0001)
-
-
-/* IO Unit Page 5 */
-
-/*
- * Upper layer code (drivers, utilities, etc.) should leave this define set to
- * one and check the value returned for NumDmaEngines at runtime.
- */
-#ifndef MPI2_IOUNITPAGE5_DMAENGINE_ENTRIES
-#define MPI2_IOUNITPAGE5_DMAENGINE_ENTRIES      (1)
-#endif
-
-typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_5
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                                     /* 0x00 */
-    U64                     RaidAcceleratorBufferBaseAddress;           /* 0x04 */
-    U64                     RaidAcceleratorBufferSize;                  /* 0x0C */
-    U64                     RaidAcceleratorControlBaseAddress;          /* 0x14 */
-    U8                      RAControlSize;                              /* 0x1C */
-    U8                      NumDmaEngines;                              /* 0x1D */
-    U8                      RAMinControlSize;                           /* 0x1E */
-    U8                      RAMaxControlSize;                           /* 0x1F */
-    U32                     Reserved1;                                  /* 0x20 */
-    U32                     Reserved2;                                  /* 0x24 */
-    U32                     Reserved3;                                  /* 0x28 */
-    U32                     DmaEngineCapabilities[MPI2_IOUNITPAGE5_DMAENGINE_ENTRIES]; /* 0x2C */
-} MPI2_CONFIG_PAGE_IO_UNIT_5, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_5,
-  Mpi2IOUnitPage5_t, MPI2_POINTER pMpi2IOUnitPage5_t;
-
-#define MPI2_IOUNITPAGE5_PAGEVERSION                    (0x00)
-
-/* defines for IO Unit Page 5 DmaEngineCapabilities field */
-#define MPI2_IOUNITPAGE5_DMA_CAP_MASK_MAX_REQUESTS      (0xFFFF0000)
-#define MPI2_IOUNITPAGE5_DMA_CAP_SHIFT_MAX_REQUESTS     (16)
-
-#define MPI2_IOUNITPAGE5_DMA_CAP_EEDP                   (0x0008)
-#define MPI2_IOUNITPAGE5_DMA_CAP_PARITY_GENERATION      (0x0004)
-#define MPI2_IOUNITPAGE5_DMA_CAP_HASHING                (0x0002)
-#define MPI2_IOUNITPAGE5_DMA_CAP_ENCRYPTION             (0x0001)
-
-
-/* IO Unit Page 6 */
-
-typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_6
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                                 /* 0x00 */
-    U16                     Flags;                                  /* 0x04 */
-    U8                      RAHostControlSize;                      /* 0x06 */
-    U8                      Reserved0;                              /* 0x07 */
-    U64                     RaidAcceleratorHostControlBaseAddress;  /* 0x08 */
-    U32                     Reserved1;                              /* 0x10 */
-    U32                     Reserved2;                              /* 0x14 */
-    U32                     Reserved3;                              /* 0x18 */
-} MPI2_CONFIG_PAGE_IO_UNIT_6, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_6,
-  Mpi2IOUnitPage6_t, MPI2_POINTER pMpi2IOUnitPage6_t;
-
-#define MPI2_IOUNITPAGE6_PAGEVERSION                    (0x00)
-
-/* defines for IO Unit Page 6 Flags field */
-#define MPI2_IOUNITPAGE6_FLAGS_ENABLE_RAID_ACCELERATOR  (0x0001)
-
-
-/* IO Unit Page 7 */
-
-typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_7
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                                 /* 0x00 */
-    U8                      CurrentPowerMode;                       /* 0x04 */ /* reserved in MPI 2.0 */
-    U8                      PreviousPowerMode;                      /* 0x05 */ /* reserved in MPI 2.0 */
-    U8                      PCIeWidth;                              /* 0x06 */
-    U8                      PCIeSpeed;                              /* 0x07 */
-    U32                     ProcessorState;                         /* 0x08 */
-    U32                     PowerManagementCapabilities;            /* 0x0C */
-    U16                     IOCTemperature;                         /* 0x10 */
-    U8                      IOCTemperatureUnits;                    /* 0x12 */
-    U8                      IOCSpeed;                               /* 0x13 */
-    U16                     BoardTemperature;                       /* 0x14 */
-    U8                      BoardTemperatureUnits;                  /* 0x16 */
-    U8                      Reserved3;                              /* 0x17 */
-    U32                     BoardPowerRequirement;                              /* 0x18 */ /* reserved prior to MPI v2.6 */
-    U32                     PCISlotPowerAllocation;                              /* 0x1C */ /* reserved prior to MPI v2.6 */
-    U8                      Flags;                              /* 0x20 */ /* reserved prior to MPI v2.6 */
-    U8                      Reserved6;                              /* 0x21 */
-    U16                     Reserved7;                              /* 0x22 */
-    U32                     Reserved8;                              /* 0x24 */
-} MPI2_CONFIG_PAGE_IO_UNIT_7, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_7,
-  Mpi2IOUnitPage7_t, MPI2_POINTER pMpi2IOUnitPage7_t;
-
-#define MPI2_IOUNITPAGE7_PAGEVERSION                    (0x05)
-
-/* defines for IO Unit Page 7 CurrentPowerMode and PreviousPowerMode fields */
-#define MPI25_IOUNITPAGE7_PM_INIT_MASK              (0xC0)
-#define MPI25_IOUNITPAGE7_PM_INIT_UNAVAILABLE       (0x00)
-#define MPI25_IOUNITPAGE7_PM_INIT_HOST              (0x40)
-#define MPI25_IOUNITPAGE7_PM_INIT_IO_UNIT           (0x80)
-#define MPI25_IOUNITPAGE7_PM_INIT_PCIE_DPA          (0xC0)
-
-#define MPI25_IOUNITPAGE7_PM_MODE_MASK              (0x07)
-#define MPI25_IOUNITPAGE7_PM_MODE_UNAVAILABLE       (0x00)
-#define MPI25_IOUNITPAGE7_PM_MODE_UNKNOWN           (0x01)
-#define MPI25_IOUNITPAGE7_PM_MODE_FULL_POWER        (0x04)
-#define MPI25_IOUNITPAGE7_PM_MODE_REDUCED_POWER     (0x05)
-#define MPI25_IOUNITPAGE7_PM_MODE_STANDBY           (0x06)
-
-
-/* defines for IO Unit Page 7 PCIeWidth field */
-#define MPI2_IOUNITPAGE7_PCIE_WIDTH_X1              (0x01)
-#define MPI2_IOUNITPAGE7_PCIE_WIDTH_X2              (0x02)
-#define MPI2_IOUNITPAGE7_PCIE_WIDTH_X4              (0x04)
-#define MPI2_IOUNITPAGE7_PCIE_WIDTH_X8              (0x08)
-#define MPI2_IOUNITPAGE7_PCIE_WIDTH_X16             (0x10)
-
-/* defines for IO Unit Page 7 PCIeSpeed field */
-#define MPI2_IOUNITPAGE7_PCIE_SPEED_2_5_GBPS        (0x00)
-#define MPI2_IOUNITPAGE7_PCIE_SPEED_5_0_GBPS        (0x01)
-#define MPI2_IOUNITPAGE7_PCIE_SPEED_8_0_GBPS        (0x02)
-#define MPI2_IOUNITPAGE7_PCIE_SPEED_16_0_GBPS       (0x03)
-
-/* defines for IO Unit Page 7 ProcessorState field */
-#define MPI2_IOUNITPAGE7_PSTATE_MASK_SECOND         (0x0000000F)
-#define MPI2_IOUNITPAGE7_PSTATE_SHIFT_SECOND        (0)
-
-#define MPI2_IOUNITPAGE7_PSTATE_NOT_PRESENT         (0x00)
-#define MPI2_IOUNITPAGE7_PSTATE_DISABLED            (0x01)
-#define MPI2_IOUNITPAGE7_PSTATE_ENABLED             (0x02)
-
-/* defines for IO Unit Page 7 PowerManagementCapabilities field */
-#define MPI25_IOUNITPAGE7_PMCAP_DPA_FULL_PWR_MODE       (0x00400000)
-#define MPI25_IOUNITPAGE7_PMCAP_DPA_REDUCED_PWR_MODE    (0x00200000)
-#define MPI25_IOUNITPAGE7_PMCAP_DPA_STANDBY_MODE        (0x00100000)
-#define MPI25_IOUNITPAGE7_PMCAP_HOST_FULL_PWR_MODE      (0x00040000)
-#define MPI25_IOUNITPAGE7_PMCAP_HOST_REDUCED_PWR_MODE   (0x00020000)
-#define MPI25_IOUNITPAGE7_PMCAP_HOST_STANDBY_MODE       (0x00010000)
-#define MPI25_IOUNITPAGE7_PMCAP_IO_FULL_PWR_MODE        (0x00004000)
-#define MPI25_IOUNITPAGE7_PMCAP_IO_REDUCED_PWR_MODE     (0x00002000)
-#define MPI25_IOUNITPAGE7_PMCAP_IO_STANDBY_MODE         (0x00001000)
-#define MPI2_IOUNITPAGE7_PMCAP_HOST_12_5_PCT_IOCSPEED   (0x00000400)
-#define MPI2_IOUNITPAGE7_PMCAP_HOST_25_0_PCT_IOCSPEED   (0x00000200)
-#define MPI2_IOUNITPAGE7_PMCAP_HOST_50_0_PCT_IOCSPEED   (0x00000100)
-#define MPI25_IOUNITPAGE7_PMCAP_IO_12_5_PCT_IOCSPEED    (0x00000040)
-#define MPI25_IOUNITPAGE7_PMCAP_IO_25_0_PCT_IOCSPEED    (0x00000020)
-#define MPI25_IOUNITPAGE7_PMCAP_IO_50_0_PCT_IOCSPEED    (0x00000010)
-#define MPI2_IOUNITPAGE7_PMCAP_HOST_WIDTH_CHANGE_PCIE   (0x00000008) /* obsolete */
-#define MPI2_IOUNITPAGE7_PMCAP_HOST_SPEED_CHANGE_PCIE   (0x00000004) /* obsolete */
-#define MPI25_IOUNITPAGE7_PMCAP_IO_WIDTH_CHANGE_PCIE    (0x00000002) /* obsolete */
-#define MPI25_IOUNITPAGE7_PMCAP_IO_SPEED_CHANGE_PCIE    (0x00000001) /* obsolete */
-
-/* obsolete names for the PowerManagementCapabilities bits (above) */
-#define MPI2_IOUNITPAGE7_PMCAP_12_5_PCT_IOCSPEED    (0x00000400)
-#define MPI2_IOUNITPAGE7_PMCAP_25_0_PCT_IOCSPEED    (0x00000200)
-#define MPI2_IOUNITPAGE7_PMCAP_50_0_PCT_IOCSPEED    (0x00000100)
-#define MPI2_IOUNITPAGE7_PMCAP_PCIE_WIDTH_CHANGE    (0x00000008) /* obsolete */
-#define MPI2_IOUNITPAGE7_PMCAP_PCIE_SPEED_CHANGE    (0x00000004) /* obsolete */
-
-
-/* defines for IO Unit Page 7 IOCTemperatureUnits field */
-#define MPI2_IOUNITPAGE7_IOC_TEMP_NOT_PRESENT       (0x00)
-#define MPI2_IOUNITPAGE7_IOC_TEMP_FAHRENHEIT        (0x01)
-#define MPI2_IOUNITPAGE7_IOC_TEMP_CELSIUS           (0x02)
-
-/* defines for IO Unit Page 7 IOCSpeed field */
-#define MPI2_IOUNITPAGE7_IOC_SPEED_FULL             (0x01)
-#define MPI2_IOUNITPAGE7_IOC_SPEED_HALF             (0x02)
-#define MPI2_IOUNITPAGE7_IOC_SPEED_QUARTER          (0x04)
-#define MPI2_IOUNITPAGE7_IOC_SPEED_EIGHTH           (0x08)
-
-/* defines for IO Unit Page 7 BoardTemperatureUnits field */
-#define MPI2_IOUNITPAGE7_BOARD_TEMP_NOT_PRESENT     (0x00)
-#define MPI2_IOUNITPAGE7_BOARD_TEMP_FAHRENHEIT      (0x01)
-#define MPI2_IOUNITPAGE7_BOARD_TEMP_CELSIUS         (0x02)
-
-/* defines for IO Unit Page 7 Flags field */
-#define MPI2_IOUNITPAGE7_FLAG_CABLE_POWER_EXC       (0x01)
-
-
-/* IO Unit Page 8 */
-
-#define MPI2_IOUNIT8_NUM_THRESHOLDS     (4)
-
-typedef struct _MPI2_IOUNIT8_SENSOR
-{
-    U16                     Flags;                                  /* 0x00 */
-    U16                     Reserved1;                              /* 0x02 */
-    U16                     Threshold[MPI2_IOUNIT8_NUM_THRESHOLDS]; /* 0x04 */
-    U32                     Reserved2;                              /* 0x0C */
-    U32                     Reserved3;                              /* 0x10 */
-    U32                     Reserved4;                              /* 0x14 */
-} MPI2_IOUNIT8_SENSOR, MPI2_POINTER PTR_MPI2_IOUNIT8_SENSOR,
-  Mpi2IOUnit8Sensor_t, MPI2_POINTER pMpi2IOUnit8Sensor_t;
-
-/* defines for IO Unit Page 8 Sensor Flags field */
-#define MPI2_IOUNIT8_SENSOR_FLAGS_T3_ENABLE         (0x0008)
-#define MPI2_IOUNIT8_SENSOR_FLAGS_T2_ENABLE         (0x0004)
-#define MPI2_IOUNIT8_SENSOR_FLAGS_T1_ENABLE         (0x0002)
-#define MPI2_IOUNIT8_SENSOR_FLAGS_T0_ENABLE         (0x0001)
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumSensors at runtime.
- */
-#ifndef MPI2_IOUNITPAGE8_SENSOR_ENTRIES
-#define MPI2_IOUNITPAGE8_SENSOR_ENTRIES     (1)
-#endif
-
-typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_8
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                                 /* 0x00 */
-    U32                     Reserved1;                              /* 0x04 */
-    U32                     Reserved2;                              /* 0x08 */
-    U8                      NumSensors;                             /* 0x0C */
-    U8                      PollingInterval;                        /* 0x0D */
-    U16                     Reserved3;                              /* 0x0E */
-    MPI2_IOUNIT8_SENSOR     Sensor[MPI2_IOUNITPAGE8_SENSOR_ENTRIES];/* 0x10 */
-} MPI2_CONFIG_PAGE_IO_UNIT_8, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_8,
-  Mpi2IOUnitPage8_t, MPI2_POINTER pMpi2IOUnitPage8_t;
-
-#define MPI2_IOUNITPAGE8_PAGEVERSION                    (0x00)
-
-
-/* IO Unit Page 9 */
-
-typedef struct _MPI2_IOUNIT9_SENSOR
-{
-    U16                     CurrentTemperature;                     /* 0x00 */
-    U16                     Reserved1;                              /* 0x02 */
-    U8                      Flags;                                  /* 0x04 */
-    U8                      Reserved2;                              /* 0x05 */
-    U16                     Reserved3;                              /* 0x06 */
-    U32                     Reserved4;                              /* 0x08 */
-    U32                     Reserved5;                              /* 0x0C */
-} MPI2_IOUNIT9_SENSOR, MPI2_POINTER PTR_MPI2_IOUNIT9_SENSOR,
-  Mpi2IOUnit9Sensor_t, MPI2_POINTER pMpi2IOUnit9Sensor_t;
-
-/* defines for IO Unit Page 9 Sensor Flags field */
-#define MPI2_IOUNIT9_SENSOR_FLAGS_TEMP_VALID        (0x01)
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumSensors at runtime.
- */
-#ifndef MPI2_IOUNITPAGE9_SENSOR_ENTRIES
-#define MPI2_IOUNITPAGE9_SENSOR_ENTRIES     (1)
-#endif
-
-typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_9
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                                 /* 0x00 */
-    U32                     Reserved1;                              /* 0x04 */
-    U32                     Reserved2;                              /* 0x08 */
-    U8                      NumSensors;                             /* 0x0C */
-    U8                      Reserved4;                              /* 0x0D */
-    U16                     Reserved3;                              /* 0x0E */
-    MPI2_IOUNIT9_SENSOR     Sensor[MPI2_IOUNITPAGE9_SENSOR_ENTRIES];/* 0x10 */
-} MPI2_CONFIG_PAGE_IO_UNIT_9, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_9,
-  Mpi2IOUnitPage9_t, MPI2_POINTER pMpi2IOUnitPage9_t;
-
-#define MPI2_IOUNITPAGE9_PAGEVERSION                    (0x00)
-
-
-/* IO Unit Page 10 */
-
-typedef struct _MPI2_IOUNIT10_FUNCTION
-{
-    U8                      CreditPercent;      /* 0x00 */
-    U8                      Reserved1;          /* 0x01 */
-    U16                     Reserved2;          /* 0x02 */
-} MPI2_IOUNIT10_FUNCTION, MPI2_POINTER PTR_MPI2_IOUNIT10_FUNCTION,
-  Mpi2IOUnit10Function_t, MPI2_POINTER pMpi2IOUnit10Function_t;
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumFunctions at runtime.
- */
-#ifndef MPI2_IOUNITPAGE10_FUNCTION_ENTRIES
-#define MPI2_IOUNITPAGE10_FUNCTION_ENTRIES      (1)
-#endif
-
-typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_10
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                                         /* 0x00 */
-    U8                      NumFunctions;                                   /* 0x04 */
-    U8                      Reserved1;                                      /* 0x05 */
-    U16                     Reserved2;                                      /* 0x06 */
-    U32                     Reserved3;                                      /* 0x08 */
-    U32                     Reserved4;                                      /* 0x0C */
-    MPI2_IOUNIT10_FUNCTION  Function[MPI2_IOUNITPAGE10_FUNCTION_ENTRIES];   /* 0x10 */
-} MPI2_CONFIG_PAGE_IO_UNIT_10, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_10,
-  Mpi2IOUnitPage10_t, MPI2_POINTER pMpi2IOUnitPage10_t;
-
-#define MPI2_IOUNITPAGE10_PAGEVERSION                   (0x01)
-
-
-/* IO Unit Page 11 (for MPI v2.6 and later) */
-
-typedef struct _MPI26_IOUNIT11_SPINUP_GROUP
-{
-    U8          MaxTargetSpinup;            /* 0x00 */
-    U8          SpinupDelay;                /* 0x01 */
-    U8          SpinupFlags;                /* 0x02 */
-    U8          Reserved1;                  /* 0x03 */
-} MPI26_IOUNIT11_SPINUP_GROUP, MPI2_POINTER PTR_MPI26_IOUNIT11_SPINUP_GROUP,
-  Mpi26IOUnit11SpinupGroup_t, MPI2_POINTER pMpi26IOUnit11SpinupGroup_t;
-
-/* defines for IO Unit Page 11 SpinupFlags */
-#define MPI26_IOUNITPAGE11_SPINUP_DISABLE_FLAG          (0x01)
-
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * four and check the value returned for NumPhys at runtime.
- */
-#ifndef MPI26_IOUNITPAGE11_PHY_MAX
-#define MPI26_IOUNITPAGE11_PHY_MAX        (4)
-#endif
-
-typedef struct _MPI26_CONFIG_PAGE_IO_UNIT_11
-{
-    MPI2_CONFIG_PAGE_HEADER         Header;                         /* 0x00 */
-    U32                             Reserved1;                      /* 0x04 */
-    MPI26_IOUNIT11_SPINUP_GROUP     SpinupGroupParameters[4];       /* 0x08 */
-    U32                             Reserved2;                      /* 0x18 */
-    U32                             Reserved3;                      /* 0x1C */
-    U32                             Reserved4;                      /* 0x20 */
-    U8                              BootDeviceWaitTime;             /* 0x24 */
-    U8                              SATADeviceWaitTime;             /* 0x25 */
-    U8                              PCIeWaitTime;                   /* 0x26 */
-    U8                              Reserved6;                      /* 0x27 */
-    U8                              NumPhys;                        /* 0x28 */
-    U8                              PEInitialSpinupDelay;           /* 0x29 */
-    U8                              PEReplyDelay;                   /* 0x2A */
-    U8                              Flags;                          /* 0x2B */
-    U8                              PHY[MPI26_IOUNITPAGE11_PHY_MAX];/* 0x2C */
-} MPI26_CONFIG_PAGE_IO_UNIT_11,
-  MPI2_POINTER PTR_MPI26_CONFIG_PAGE_IO_UNIT_11,
-  Mpi26IOUnitPage11_t, MPI2_POINTER pMpi26IOUnitPage11_t;
-
-#define MPI26_IOUNITPAGE11_PAGEVERSION                  (0x00)
-
-/* defines for Flags field */
-#define MPI26_IOUNITPAGE11_FLAGS_AUTO_PORTENABLE        (0x01)
-
-/* defines for PHY field */
-#define MPI26_IOUNITPAGE11_PHY_SPINUP_GROUP_MASK        (0x03)
-
-
-
-/****************************************************************************
-*   IOC Config Pages
-****************************************************************************/
-
-/* IOC Page 0 */
-
-typedef struct _MPI2_CONFIG_PAGE_IOC_0
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
-    U32                     Reserved1;                  /* 0x04 */
-    U32                     Reserved2;                  /* 0x08 */
-    U16                     VendorID;                   /* 0x0C */
-    U16                     DeviceID;                   /* 0x0E */
-    U8                      RevisionID;                 /* 0x10 */
-    U8                      Reserved3;                  /* 0x11 */
-    U16                     Reserved4;                  /* 0x12 */
-    U32                     ClassCode;                  /* 0x14 */
-    U16                     SubsystemVendorID;          /* 0x18 */
-    U16                     SubsystemID;                /* 0x1A */
-} MPI2_CONFIG_PAGE_IOC_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IOC_0,
-  Mpi2IOCPage0_t, MPI2_POINTER pMpi2IOCPage0_t;
-
-#define MPI2_IOCPAGE0_PAGEVERSION                       (0x02)
-
-
-/* IOC Page 1 */
-
-typedef struct _MPI2_CONFIG_PAGE_IOC_1
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
-    U32                     Flags;                      /* 0x04 */
-    U32                     CoalescingTimeout;          /* 0x08 */
-    U8                      CoalescingDepth;            /* 0x0C */
-    U8                      PCISlotNum;                 /* 0x0D */
-    U8                      PCIBusNum;                  /* 0x0E */
-    U8                      PCIDomainSegment;           /* 0x0F */
-    U32                     Reserved1;                  /* 0x10 */
-    U32                     ProductSpecific;            /* 0x14 */
-} MPI2_CONFIG_PAGE_IOC_1, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IOC_1,
-  Mpi2IOCPage1_t, MPI2_POINTER pMpi2IOCPage1_t;
-
-#define MPI2_IOCPAGE1_PAGEVERSION                       (0x05)
-
-/* defines for IOC Page 1 Flags field */
-#define MPI2_IOCPAGE1_REPLY_COALESCING                  (0x00000001)
-
-#define MPI2_IOCPAGE1_PCISLOTNUM_UNKNOWN                (0xFF)
-#define MPI2_IOCPAGE1_PCIBUSNUM_UNKNOWN                 (0xFF)
-#define MPI2_IOCPAGE1_PCIDOMAIN_UNKNOWN                 (0xFF)
-
-/* IOC Page 6 */
-
-typedef struct _MPI2_CONFIG_PAGE_IOC_6
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                         /* 0x00 */
-    U32                     CapabilitiesFlags;              /* 0x04 */
-    U8                      MaxDrivesRAID0;                 /* 0x08 */
-    U8                      MaxDrivesRAID1;                 /* 0x09 */
-    U8                      MaxDrivesRAID1E;                /* 0x0A */
-    U8                      MaxDrivesRAID10;                /* 0x0B */
-    U8                      MinDrivesRAID0;                 /* 0x0C */
-    U8                      MinDrivesRAID1;                 /* 0x0D */
-    U8                      MinDrivesRAID1E;                /* 0x0E */
-    U8                      MinDrivesRAID10;                /* 0x0F */
-    U32                     Reserved1;                      /* 0x10 */
-    U8                      MaxGlobalHotSpares;             /* 0x14 */
-    U8                      MaxPhysDisks;                   /* 0x15 */
-    U8                      MaxVolumes;                     /* 0x16 */
-    U8                      MaxConfigs;                     /* 0x17 */
-    U8                      MaxOCEDisks;                    /* 0x18 */
-    U8                      Reserved2;                      /* 0x19 */
-    U16                     Reserved3;                      /* 0x1A */
-    U32                     SupportedStripeSizeMapRAID0;    /* 0x1C */
-    U32                     SupportedStripeSizeMapRAID1E;   /* 0x20 */
-    U32                     SupportedStripeSizeMapRAID10;   /* 0x24 */
-    U32                     Reserved4;                      /* 0x28 */
-    U32                     Reserved5;                      /* 0x2C */
-    U16                     DefaultMetadataSize;            /* 0x30 */
-    U16                     Reserved6;                      /* 0x32 */
-    U16                     MaxBadBlockTableEntries;        /* 0x34 */
-    U16                     Reserved7;                      /* 0x36 */
-    U32                     IRNvsramVersion;                /* 0x38 */
-} MPI2_CONFIG_PAGE_IOC_6, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IOC_6,
-  Mpi2IOCPage6_t, MPI2_POINTER pMpi2IOCPage6_t;
-
-#define MPI2_IOCPAGE6_PAGEVERSION                       (0x05)
-
-/* defines for IOC Page 6 CapabilitiesFlags */
-#define MPI2_IOCPAGE6_CAP_FLAGS_4K_SECTORS_SUPPORT      (0x00000020)
-#define MPI2_IOCPAGE6_CAP_FLAGS_RAID10_SUPPORT          (0x00000010)
-#define MPI2_IOCPAGE6_CAP_FLAGS_RAID1_SUPPORT           (0x00000008)
-#define MPI2_IOCPAGE6_CAP_FLAGS_RAID1E_SUPPORT          (0x00000004)
-#define MPI2_IOCPAGE6_CAP_FLAGS_RAID0_SUPPORT           (0x00000002)
-#define MPI2_IOCPAGE6_CAP_FLAGS_GLOBAL_HOT_SPARE        (0x00000001)
-
-
-/* IOC Page 7 */
-
-#define MPI2_IOCPAGE7_EVENTMASK_WORDS       (4)
-
-typedef struct _MPI2_CONFIG_PAGE_IOC_7
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
-    U32                     Reserved1;                  /* 0x04 */
-    U32                     EventMasks[MPI2_IOCPAGE7_EVENTMASK_WORDS];/* 0x08 */
-    U16                     SASBroadcastPrimitiveMasks; /* 0x18 */
-    U16                     SASNotifyPrimitiveMasks;    /* 0x1A */
-    U32                     Reserved3;                  /* 0x1C */
-} MPI2_CONFIG_PAGE_IOC_7, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IOC_7,
-  Mpi2IOCPage7_t, MPI2_POINTER pMpi2IOCPage7_t;
-
-#define MPI2_IOCPAGE7_PAGEVERSION                       (0x02)
-
-
-/* IOC Page 8 */
-
-typedef struct _MPI2_CONFIG_PAGE_IOC_8
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
-    U8                      NumDevsPerEnclosure;        /* 0x04 */
-    U8                      Reserved1;                  /* 0x05 */
-    U16                     Reserved2;                  /* 0x06 */
-    U16                     MaxPersistentEntries;       /* 0x08 */
-    U16                     MaxNumPhysicalMappedIDs;    /* 0x0A */
-    U16                     Flags;                      /* 0x0C */
-    U16                     Reserved3;                  /* 0x0E */
-    U16                     IRVolumeMappingFlags;       /* 0x10 */
-    U16                     Reserved4;                  /* 0x12 */
-    U32                     Reserved5;                  /* 0x14 */
-} MPI2_CONFIG_PAGE_IOC_8, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IOC_8,
-  Mpi2IOCPage8_t, MPI2_POINTER pMpi2IOCPage8_t;
-
-#define MPI2_IOCPAGE8_PAGEVERSION                       (0x00)
-
-/* defines for IOC Page 8 Flags field */
-#define MPI2_IOCPAGE8_FLAGS_DA_START_SLOT_1             (0x00000020)
-#define MPI2_IOCPAGE8_FLAGS_RESERVED_TARGETID_0         (0x00000010)
-
-#define MPI2_IOCPAGE8_FLAGS_MASK_MAPPING_MODE           (0x0000000E)
-#define MPI2_IOCPAGE8_FLAGS_DEVICE_PERSISTENCE_MAPPING  (0x00000000)
-#define MPI2_IOCPAGE8_FLAGS_ENCLOSURE_SLOT_MAPPING      (0x00000002)
-
-#define MPI2_IOCPAGE8_FLAGS_DISABLE_PERSISTENT_MAPPING  (0x00000001)
-#define MPI2_IOCPAGE8_FLAGS_ENABLE_PERSISTENT_MAPPING   (0x00000000)
-
-/* defines for IOC Page 8 IRVolumeMappingFlags */
-#define MPI2_IOCPAGE8_IRFLAGS_MASK_VOLUME_MAPPING_MODE  (0x00000003)
-#define MPI2_IOCPAGE8_IRFLAGS_LOW_VOLUME_MAPPING        (0x00000000)
-#define MPI2_IOCPAGE8_IRFLAGS_HIGH_VOLUME_MAPPING       (0x00000001)
-
-
-/****************************************************************************
-*   BIOS Config Pages
-****************************************************************************/
-
-/* BIOS Page 1 */
-
-typedef struct _MPI2_CONFIG_PAGE_BIOS_1
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
-    U32                     BiosOptions;                /* 0x04 */
-    U32                     IOCSettings;                /* 0x08 */
-    U8                      SSUTimeout;                 /* 0x0C */
-    U8                      MaxEnclosureLevel;          /* 0x0D */
-    U16                     Reserved2;                  /* 0x0E */
-    U32                     DeviceSettings;             /* 0x10 */
-    U16                     NumberOfDevices;            /* 0x14 */
-    U16                     UEFIVersion;                /* 0x16 */
-    U16                     IOTimeoutBlockDevicesNonRM; /* 0x18 */
-    U16                     IOTimeoutSequential;        /* 0x1A */
-    U16                     IOTimeoutOther;             /* 0x1C */
-    U16                     IOTimeoutBlockDevicesRM;    /* 0x1E */
-} MPI2_CONFIG_PAGE_BIOS_1, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_BIOS_1,
-  Mpi2BiosPage1_t, MPI2_POINTER pMpi2BiosPage1_t;
-
-#define MPI2_BIOSPAGE1_PAGEVERSION                      (0x07)
-
-/* values for BIOS Page 1 BiosOptions field */
-#define MPI2_BIOSPAGE1_OPTIONS_BOOT_LIST_ADD_ALT_BOOT_DEVICE    (0x00008000)
-#define MPI2_BIOSPAGE1_OPTIONS_ADVANCED_CONFIG                  (0x00004000)
-
-#define MPI2_BIOSPAGE1_OPTIONS_PNS_MASK                         (0x00003800)
-#define MPI2_BIOSPAGE1_OPTIONS_PNS_PBDHL                        (0x00000000)
-#define MPI2_BIOSPAGE1_OPTIONS_PNS_ENCSLOSURE                   (0x00000800)
-#define MPI2_BIOSPAGE1_OPTIONS_PNS_LWWID                        (0x00001000)
-#define MPI2_BIOSPAGE1_OPTIONS_PNS_PSENS                        (0x00001800)
-#define MPI2_BIOSPAGE1_OPTIONS_PNS_ESPHY                        (0x00002000)
-
-#define MPI2_BIOSPAGE1_OPTIONS_X86_DISABLE_BIOS                 (0x00000400)
-
-#define MPI2_BIOSPAGE1_OPTIONS_MASK_REGISTRATION_UEFI_BSD       (0x00000300)
-#define MPI2_BIOSPAGE1_OPTIONS_USE_BIT0_REGISTRATION_UEFI_BSD   (0x00000000)
-#define MPI2_BIOSPAGE1_OPTIONS_FULL_REGISTRATION_UEFI_BSD       (0x00000100)
-#define MPI2_BIOSPAGE1_OPTIONS_ADAPTER_REGISTRATION_UEFI_BSD    (0x00000200)
-#define MPI2_BIOSPAGE1_OPTIONS_DISABLE_REGISTRATION_UEFI_BSD    (0x00000300)
-
-#define MPI2_BIOSPAGE1_OPTIONS_MASK_OEM_ID                      (0x000000F0)
-#define MPI2_BIOSPAGE1_OPTIONS_LSI_OEM_ID                       (0x00000000)
-
-#define MPI2_BIOSPAGE1_OPTIONS_MASK_UEFI_HII_REGISTRATION       (0x00000006)
-#define MPI2_BIOSPAGE1_OPTIONS_ENABLE_UEFI_HII                  (0x00000000)
-#define MPI2_BIOSPAGE1_OPTIONS_DISABLE_UEFI_HII                 (0x00000002)
-#define MPI2_BIOSPAGE1_OPTIONS_VERSION_CHECK_UEFI_HII           (0x00000004)
-
-#define MPI2_BIOSPAGE1_OPTIONS_DISABLE_BIOS                     (0x00000001)
-
-/* values for BIOS Page 1 IOCSettings field */
-#define MPI2_BIOSPAGE1_IOCSET_MASK_BOOT_PREFERENCE      (0x00030000)
-#define MPI2_BIOSPAGE1_IOCSET_ENCLOSURE_SLOT_BOOT       (0x00000000)
-#define MPI2_BIOSPAGE1_IOCSET_SAS_ADDRESS_BOOT          (0x00010000)
-
-#define MPI2_BIOSPAGE1_IOCSET_MASK_RM_SETTING           (0x000000C0)
-#define MPI2_BIOSPAGE1_IOCSET_NONE_RM_SETTING           (0x00000000)
-#define MPI2_BIOSPAGE1_IOCSET_BOOT_RM_SETTING           (0x00000040)
-#define MPI2_BIOSPAGE1_IOCSET_MEDIA_RM_SETTING          (0x00000080)
-
-#define MPI2_BIOSPAGE1_IOCSET_MASK_ADAPTER_SUPPORT      (0x00000030)
-#define MPI2_BIOSPAGE1_IOCSET_NO_SUPPORT                (0x00000000)
-#define MPI2_BIOSPAGE1_IOCSET_BIOS_SUPPORT              (0x00000010)
-#define MPI2_BIOSPAGE1_IOCSET_OS_SUPPORT                (0x00000020)
-#define MPI2_BIOSPAGE1_IOCSET_ALL_SUPPORT               (0x00000030)
-
-#define MPI2_BIOSPAGE1_IOCSET_ALTERNATE_CHS             (0x00000008)
-
-/* values for BIOS Page 1 DeviceSettings field */
-#define MPI2_BIOSPAGE1_DEVSET_DISABLE_SMART_POLLING     (0x00000010)
-#define MPI2_BIOSPAGE1_DEVSET_DISABLE_SEQ_LUN           (0x00000008)
-#define MPI2_BIOSPAGE1_DEVSET_DISABLE_RM_LUN            (0x00000004)
-#define MPI2_BIOSPAGE1_DEVSET_DISABLE_NON_RM_LUN        (0x00000002)
-#define MPI2_BIOSPAGE1_DEVSET_DISABLE_OTHER_LUN         (0x00000001)
-
-/* defines for BIOS Page 1 UEFIVersion field */
-#define MPI2_BIOSPAGE1_UEFI_VER_MAJOR_MASK              (0xFF00)
-#define MPI2_BIOSPAGE1_UEFI_VER_MAJOR_SHIFT             (8)
-#define MPI2_BIOSPAGE1_UEFI_VER_MINOR_MASK              (0x00FF)
-#define MPI2_BIOSPAGE1_UEFI_VER_MINOR_SHIFT             (0)
-
-
-
-/* BIOS Page 2 */
-
-typedef struct _MPI2_BOOT_DEVICE_ADAPTER_ORDER
-{
-    U32         Reserved1;                              /* 0x00 */
-    U32         Reserved2;                              /* 0x04 */
-    U32         Reserved3;                              /* 0x08 */
-    U32         Reserved4;                              /* 0x0C */
-    U32         Reserved5;                              /* 0x10 */
-    U32         Reserved6;                              /* 0x14 */
-} MPI2_BOOT_DEVICE_ADAPTER_ORDER,
-  MPI2_POINTER PTR_MPI2_BOOT_DEVICE_ADAPTER_ORDER,
-  Mpi2BootDeviceAdapterOrder_t, MPI2_POINTER pMpi2BootDeviceAdapterOrder_t;
-
-typedef struct _MPI2_BOOT_DEVICE_SAS_WWID
-{
-    U64         SASAddress;                             /* 0x00 */
-    U8          LUN[8];                                 /* 0x08 */
-    U32         Reserved1;                              /* 0x10 */
-    U32         Reserved2;                              /* 0x14 */
-} MPI2_BOOT_DEVICE_SAS_WWID, MPI2_POINTER PTR_MPI2_BOOT_DEVICE_SAS_WWID,
-  Mpi2BootDeviceSasWwid_t, MPI2_POINTER pMpi2BootDeviceSasWwid_t;
-
-typedef struct _MPI2_BOOT_DEVICE_ENCLOSURE_SLOT
-{
-    U64         EnclosureLogicalID;                     /* 0x00 */
-    U32         Reserved1;                              /* 0x08 */
-    U32         Reserved2;                              /* 0x0C */
-    U16         SlotNumber;                             /* 0x10 */
-    U16         Reserved3;                              /* 0x12 */
-    U32         Reserved4;                              /* 0x14 */
-} MPI2_BOOT_DEVICE_ENCLOSURE_SLOT,
-  MPI2_POINTER PTR_MPI2_BOOT_DEVICE_ENCLOSURE_SLOT,
-  Mpi2BootDeviceEnclosureSlot_t, MPI2_POINTER pMpi2BootDeviceEnclosureSlot_t;
-
-typedef struct _MPI2_BOOT_DEVICE_DEVICE_NAME
-{
-    U64         DeviceName;                             /* 0x00 */
-    U8          LUN[8];                                 /* 0x08 */
-    U32         Reserved1;                              /* 0x10 */
-    U32         Reserved2;                              /* 0x14 */
-} MPI2_BOOT_DEVICE_DEVICE_NAME, MPI2_POINTER PTR_MPI2_BOOT_DEVICE_DEVICE_NAME,
-  Mpi2BootDeviceDeviceName_t, MPI2_POINTER pMpi2BootDeviceDeviceName_t;
-
-typedef union _MPI2_MPI2_BIOSPAGE2_BOOT_DEVICE
-{
-    MPI2_BOOT_DEVICE_ADAPTER_ORDER  AdapterOrder;
-    MPI2_BOOT_DEVICE_SAS_WWID       SasWwid;
-    MPI2_BOOT_DEVICE_ENCLOSURE_SLOT EnclosureSlot;
-    MPI2_BOOT_DEVICE_DEVICE_NAME    DeviceName;
-} MPI2_BIOSPAGE2_BOOT_DEVICE, MPI2_POINTER PTR_MPI2_BIOSPAGE2_BOOT_DEVICE,
-  Mpi2BiosPage2BootDevice_t, MPI2_POINTER pMpi2BiosPage2BootDevice_t;
-
-typedef struct _MPI2_CONFIG_PAGE_BIOS_2
-{
-    MPI2_CONFIG_PAGE_HEADER     Header;                 /* 0x00 */
-    U32                         Reserved1;              /* 0x04 */
-    U32                         Reserved2;              /* 0x08 */
-    U32                         Reserved3;              /* 0x0C */
-    U32                         Reserved4;              /* 0x10 */
-    U32                         Reserved5;              /* 0x14 */
-    U32                         Reserved6;              /* 0x18 */
-    U8                          ReqBootDeviceForm;      /* 0x1C */
-    U8                          Reserved7;              /* 0x1D */
-    U16                         Reserved8;              /* 0x1E */
-    MPI2_BIOSPAGE2_BOOT_DEVICE  RequestedBootDevice;    /* 0x20 */
-    U8                          ReqAltBootDeviceForm;   /* 0x38 */
-    U8                          Reserved9;              /* 0x39 */
-    U16                         Reserved10;             /* 0x3A */
-    MPI2_BIOSPAGE2_BOOT_DEVICE  RequestedAltBootDevice; /* 0x3C */
-    U8                          CurrentBootDeviceForm;  /* 0x58 */
-    U8                          Reserved11;             /* 0x59 */
-    U16                         Reserved12;             /* 0x5A */
-    MPI2_BIOSPAGE2_BOOT_DEVICE  CurrentBootDevice;      /* 0x58 */
-} MPI2_CONFIG_PAGE_BIOS_2, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_BIOS_2,
-  Mpi2BiosPage2_t, MPI2_POINTER pMpi2BiosPage2_t;
-
-#define MPI2_BIOSPAGE2_PAGEVERSION                      (0x04)
-
-/* values for BIOS Page 2 BootDeviceForm fields */
-#define MPI2_BIOSPAGE2_FORM_MASK                        (0x0F)
-#define MPI2_BIOSPAGE2_FORM_NO_DEVICE_SPECIFIED         (0x00)
-#define MPI2_BIOSPAGE2_FORM_SAS_WWID                    (0x05)
-#define MPI2_BIOSPAGE2_FORM_ENCLOSURE_SLOT              (0x06)
-#define MPI2_BIOSPAGE2_FORM_DEVICE_NAME                 (0x07)
-
-
-/* BIOS Page 3 */
-
-#define MPI2_BIOSPAGE3_NUM_ADAPTER      (4)
-
-typedef struct _MPI2_ADAPTER_INFO
-{
-    U8      PciBusNumber;                               /* 0x00 */
-    U8      PciDeviceAndFunctionNumber;                 /* 0x01 */
-    U16     AdapterFlags;                               /* 0x02 */
-} MPI2_ADAPTER_INFO, MPI2_POINTER PTR_MPI2_ADAPTER_INFO,
-  Mpi2AdapterInfo_t, MPI2_POINTER pMpi2AdapterInfo_t;
-
-#define MPI2_ADAPTER_INFO_FLAGS_EMBEDDED                (0x0001)
-#define MPI2_ADAPTER_INFO_FLAGS_INIT_STATUS             (0x0002)
-
-typedef struct _MPI2_ADAPTER_ORDER_AUX
-{
-    U64     WWID;                                       /* 0x00 */
-    U32     Reserved1;                                  /* 0x08 */
-    U32     Reserved2;                                  /* 0x0C */
-} MPI2_ADAPTER_ORDER_AUX, MPI2_POINTER PTR_MPI2_ADAPTER_ORDER_AUX,
-  Mpi2AdapterOrderAux_t, MPI2_POINTER pMpi2AdapterOrderAux_t;
-
-typedef struct _MPI2_CONFIG_PAGE_BIOS_3
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
-    U32                     GlobalFlags;                /* 0x04 */
-    U32                     BiosVersion;                /* 0x08 */
-    MPI2_ADAPTER_INFO       AdapterOrder[MPI2_BIOSPAGE3_NUM_ADAPTER]; /* 0x0C */
-    U32                     Reserved1;                  /* 0x1C */
-    MPI2_ADAPTER_ORDER_AUX  AdapterOrderAux[MPI2_BIOSPAGE3_NUM_ADAPTER]; /* 0x20 */ /* MPI v2.5 and newer */
-} MPI2_CONFIG_PAGE_BIOS_3, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_BIOS_3,
-  Mpi2BiosPage3_t, MPI2_POINTER pMpi2BiosPage3_t;
-
-#define MPI2_BIOSPAGE3_PAGEVERSION                      (0x01)
-
-/* values for BIOS Page 3 GlobalFlags */
-#define MPI2_BIOSPAGE3_FLAGS_PAUSE_ON_ERROR             (0x00000002)
-#define MPI2_BIOSPAGE3_FLAGS_VERBOSE_ENABLE             (0x00000004)
-#define MPI2_BIOSPAGE3_FLAGS_HOOK_INT_40_DISABLE        (0x00000010)
-
-#define MPI2_BIOSPAGE3_FLAGS_DEV_LIST_DISPLAY_MASK      (0x000000E0)
-#define MPI2_BIOSPAGE3_FLAGS_INSTALLED_DEV_DISPLAY      (0x00000000)
-#define MPI2_BIOSPAGE3_FLAGS_ADAPTER_DISPLAY            (0x00000020)
-#define MPI2_BIOSPAGE3_FLAGS_ADAPTER_DEV_DISPLAY        (0x00000040)
-
-
-/* BIOS Page 4 */
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumPhys at runtime.
- */
-#ifndef MPI2_BIOS_PAGE_4_PHY_ENTRIES
-#define MPI2_BIOS_PAGE_4_PHY_ENTRIES        (1)
-#endif
-
-typedef struct _MPI2_BIOS4_ENTRY
-{
-    U64                     ReassignmentWWID;       /* 0x00 */
-    U64                     ReassignmentDeviceName; /* 0x08 */
-} MPI2_BIOS4_ENTRY, MPI2_POINTER PTR_MPI2_BIOS4_ENTRY,
-  Mpi2MBios4Entry_t, MPI2_POINTER pMpi2Bios4Entry_t;
-
-typedef struct _MPI2_CONFIG_PAGE_BIOS_4
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                             /* 0x00 */
-    U8                      NumPhys;                            /* 0x04 */
-    U8                      Reserved1;                          /* 0x05 */
-    U16                     Reserved2;                          /* 0x06 */
-    MPI2_BIOS4_ENTRY        Phy[MPI2_BIOS_PAGE_4_PHY_ENTRIES];  /* 0x08 */
-} MPI2_CONFIG_PAGE_BIOS_4, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_BIOS_4,
-  Mpi2BiosPage4_t, MPI2_POINTER pMpi2BiosPage4_t;
-
-#define MPI2_BIOSPAGE4_PAGEVERSION                      (0x01)
-
-
-/****************************************************************************
-*   RAID Volume Config Pages
-****************************************************************************/
-
-/* RAID Volume Page 0 */
-
-typedef struct _MPI2_RAIDVOL0_PHYS_DISK
-{
-    U8                      RAIDSetNum;                 /* 0x00 */
-    U8                      PhysDiskMap;                /* 0x01 */
-    U8                      PhysDiskNum;                /* 0x02 */
-    U8                      Reserved;                   /* 0x03 */
-} MPI2_RAIDVOL0_PHYS_DISK, MPI2_POINTER PTR_MPI2_RAIDVOL0_PHYS_DISK,
-  Mpi2RaidVol0PhysDisk_t, MPI2_POINTER pMpi2RaidVol0PhysDisk_t;
-
-/* defines for the PhysDiskMap field */
-#define MPI2_RAIDVOL0_PHYSDISK_PRIMARY                  (0x01)
-#define MPI2_RAIDVOL0_PHYSDISK_SECONDARY                (0x02)
-
-typedef struct _MPI2_RAIDVOL0_SETTINGS
-{
-    U16                     Settings;                   /* 0x00 */
-    U8                      HotSparePool;               /* 0x01 */
-    U8                      Reserved;                   /* 0x02 */
-} MPI2_RAIDVOL0_SETTINGS, MPI2_POINTER PTR_MPI2_RAIDVOL0_SETTINGS,
-  Mpi2RaidVol0Settings_t, MPI2_POINTER pMpi2RaidVol0Settings_t;
-
-/* RAID Volume Page 0 HotSparePool defines, also used in RAID Physical Disk */
-#define MPI2_RAID_HOT_SPARE_POOL_0                      (0x01)
-#define MPI2_RAID_HOT_SPARE_POOL_1                      (0x02)
-#define MPI2_RAID_HOT_SPARE_POOL_2                      (0x04)
-#define MPI2_RAID_HOT_SPARE_POOL_3                      (0x08)
-#define MPI2_RAID_HOT_SPARE_POOL_4                      (0x10)
-#define MPI2_RAID_HOT_SPARE_POOL_5                      (0x20)
-#define MPI2_RAID_HOT_SPARE_POOL_6                      (0x40)
-#define MPI2_RAID_HOT_SPARE_POOL_7                      (0x80)
-
-/* RAID Volume Page 0 VolumeSettings defines */
-#define MPI2_RAIDVOL0_SETTING_USE_PRODUCT_ID_SUFFIX     (0x0008)
-#define MPI2_RAIDVOL0_SETTING_AUTO_CONFIG_HSWAP_DISABLE (0x0004)
-
-#define MPI2_RAIDVOL0_SETTING_MASK_WRITE_CACHING        (0x0003)
-#define MPI2_RAIDVOL0_SETTING_UNCHANGED                 (0x0000)
-#define MPI2_RAIDVOL0_SETTING_DISABLE_WRITE_CACHING     (0x0001)
-#define MPI2_RAIDVOL0_SETTING_ENABLE_WRITE_CACHING      (0x0002)
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumPhysDisks at runtime.
- */
-#ifndef MPI2_RAID_VOL_PAGE_0_PHYSDISK_MAX
-#define MPI2_RAID_VOL_PAGE_0_PHYSDISK_MAX       (1)
-#endif
-
-typedef struct _MPI2_CONFIG_PAGE_RAID_VOL_0
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
-    U16                     DevHandle;                  /* 0x04 */
-    U8                      VolumeState;                /* 0x06 */
-    U8                      VolumeType;                 /* 0x07 */
-    U32                     VolumeStatusFlags;          /* 0x08 */
-    MPI2_RAIDVOL0_SETTINGS  VolumeSettings;             /* 0x0C */
-    U64                     MaxLBA;                     /* 0x10 */
-    U32                     StripeSize;                 /* 0x18 */
-    U16                     BlockSize;                  /* 0x1C */
-    U16                     Reserved1;                  /* 0x1E */
-    U8                      SupportedPhysDisks;         /* 0x20 */
-    U8                      ResyncRate;                 /* 0x21 */
-    U16                     DataScrubDuration;          /* 0x22 */
-    U8                      NumPhysDisks;               /* 0x24 */
-    U8                      Reserved2;                  /* 0x25 */
-    U8                      Reserved3;                  /* 0x26 */
-    U8                      InactiveStatus;             /* 0x27 */
-    MPI2_RAIDVOL0_PHYS_DISK PhysDisk[MPI2_RAID_VOL_PAGE_0_PHYSDISK_MAX]; /* 0x28 */
-} MPI2_CONFIG_PAGE_RAID_VOL_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_RAID_VOL_0,
-  Mpi2RaidVolPage0_t, MPI2_POINTER pMpi2RaidVolPage0_t;
-
-#define MPI2_RAIDVOLPAGE0_PAGEVERSION           (0x0A)
-
-/* values for RAID VolumeState */
-#define MPI2_RAID_VOL_STATE_MISSING                         (0x00)
-#define MPI2_RAID_VOL_STATE_FAILED                          (0x01)
-#define MPI2_RAID_VOL_STATE_INITIALIZING                    (0x02)
-#define MPI2_RAID_VOL_STATE_ONLINE                          (0x03)
-#define MPI2_RAID_VOL_STATE_DEGRADED                        (0x04)
-#define MPI2_RAID_VOL_STATE_OPTIMAL                         (0x05)
-
-/* values for RAID VolumeType */
-#define MPI2_RAID_VOL_TYPE_RAID0                            (0x00)
-#define MPI2_RAID_VOL_TYPE_RAID1E                           (0x01)
-#define MPI2_RAID_VOL_TYPE_RAID1                            (0x02)
-#define MPI2_RAID_VOL_TYPE_RAID10                           (0x05)
-#define MPI2_RAID_VOL_TYPE_UNKNOWN                          (0xFF)
-
-/* values for RAID Volume Page 0 VolumeStatusFlags field */
-#define MPI2_RAIDVOL0_STATUS_FLAG_PENDING_RESYNC            (0x02000000)
-#define MPI2_RAIDVOL0_STATUS_FLAG_BACKG_INIT_PENDING        (0x01000000)
-#define MPI2_RAIDVOL0_STATUS_FLAG_MDC_PENDING               (0x00800000)
-#define MPI2_RAIDVOL0_STATUS_FLAG_USER_CONSIST_PENDING      (0x00400000)
-#define MPI2_RAIDVOL0_STATUS_FLAG_MAKE_DATA_CONSISTENT      (0x00200000)
-#define MPI2_RAIDVOL0_STATUS_FLAG_DATA_SCRUB                (0x00100000)
-#define MPI2_RAIDVOL0_STATUS_FLAG_CONSISTENCY_CHECK         (0x00080000)
-#define MPI2_RAIDVOL0_STATUS_FLAG_CAPACITY_EXPANSION        (0x00040000)
-#define MPI2_RAIDVOL0_STATUS_FLAG_BACKGROUND_INIT           (0x00020000)
-#define MPI2_RAIDVOL0_STATUS_FLAG_RESYNC_IN_PROGRESS        (0x00010000)
-#define MPI2_RAIDVOL0_STATUS_FLAG_VOL_NOT_CONSISTENT        (0x00000080)
-#define MPI2_RAIDVOL0_STATUS_FLAG_OCE_ALLOWED               (0x00000040)
-#define MPI2_RAIDVOL0_STATUS_FLAG_BGI_COMPLETE              (0x00000020)
-#define MPI2_RAIDVOL0_STATUS_FLAG_1E_OFFSET_MIRROR          (0x00000000)
-#define MPI2_RAIDVOL0_STATUS_FLAG_1E_ADJACENT_MIRROR        (0x00000010)
-#define MPI2_RAIDVOL0_STATUS_FLAG_BAD_BLOCK_TABLE_FULL      (0x00000008)
-#define MPI2_RAIDVOL0_STATUS_FLAG_VOLUME_INACTIVE           (0x00000004)
-#define MPI2_RAIDVOL0_STATUS_FLAG_QUIESCED                  (0x00000002)
-#define MPI2_RAIDVOL0_STATUS_FLAG_ENABLED                   (0x00000001)
-
-/* values for RAID Volume Page 0 SupportedPhysDisks field */
-#define MPI2_RAIDVOL0_SUPPORT_SOLID_STATE_DISKS             (0x08)
-#define MPI2_RAIDVOL0_SUPPORT_HARD_DISKS                    (0x04)
-#define MPI2_RAIDVOL0_SUPPORT_SAS_PROTOCOL                  (0x02)
-#define MPI2_RAIDVOL0_SUPPORT_SATA_PROTOCOL                 (0x01)
-
-/* values for RAID Volume Page 0 InactiveStatus field */
-#define MPI2_RAIDVOLPAGE0_UNKNOWN_INACTIVE                  (0x00)
-#define MPI2_RAIDVOLPAGE0_STALE_METADATA_INACTIVE           (0x01)
-#define MPI2_RAIDVOLPAGE0_FOREIGN_VOLUME_INACTIVE           (0x02)
-#define MPI2_RAIDVOLPAGE0_INSUFFICIENT_RESOURCE_INACTIVE    (0x03)
-#define MPI2_RAIDVOLPAGE0_CLONE_VOLUME_INACTIVE             (0x04)
-#define MPI2_RAIDVOLPAGE0_INSUFFICIENT_METADATA_INACTIVE    (0x05)
-#define MPI2_RAIDVOLPAGE0_PREVIOUSLY_DELETED                (0x06)
-
-
-/* RAID Volume Page 1 */
-
-typedef struct _MPI2_CONFIG_PAGE_RAID_VOL_1
-{
-    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
-    U16                     DevHandle;                  /* 0x04 */
-    U16                     Reserved0;                  /* 0x06 */
-    U8                      GUID[24];                   /* 0x08 */
-    U8                      Name[16];                   /* 0x20 */
-    U64                     WWID;                       /* 0x30 */
-    U32                     Reserved1;                  /* 0x38 */
-    U32                     Reserved2;                  /* 0x3C */
-} MPI2_CONFIG_PAGE_RAID_VOL_1, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_RAID_VOL_1,
-  Mpi2RaidVolPage1_t, MPI2_POINTER pMpi2RaidVolPage1_t;
-
-#define MPI2_RAIDVOLPAGE1_PAGEVERSION           (0x03)
-
-
-/****************************************************************************
-*   RAID Physical Disk Config Pages
-****************************************************************************/
-
-/* RAID Physical Disk Page 0 */
-
-typedef struct _MPI2_RAIDPHYSDISK0_SETTINGS
-{
-    U16                     Reserved1;                  /* 0x00 */
-    U8                      HotSparePool;               /* 0x02 */
-    U8                      Reserved2;                  /* 0x03 */
-} MPI2_RAIDPHYSDISK0_SETTINGS, MPI2_POINTER PTR_MPI2_RAIDPHYSDISK0_SETTINGS,
-  Mpi2RaidPhysDisk0Settings_t, MPI2_POINTER pMpi2RaidPhysDisk0Settings_t;
-
-/* use MPI2_RAID_HOT_SPARE_POOL_ defines for the HotSparePool field */
-
-typedef struct _MPI2_RAIDPHYSDISK0_INQUIRY_DATA
-{
-    U8                      VendorID[8];                /* 0x00 */
-    U8                      ProductID[16];              /* 0x08 */
-    U8                      ProductRevLevel[4];         /* 0x18 */
-    U8                      SerialNum[32];              /* 0x1C */
-} MPI2_RAIDPHYSDISK0_INQUIRY_DATA,
-  MPI2_POINTER PTR_MPI2_RAIDPHYSDISK0_INQUIRY_DATA,
-  Mpi2RaidPhysDisk0InquiryData_t, MPI2_POINTER pMpi2RaidPhysDisk0InquiryData_t;
-
-typedef struct _MPI2_CONFIG_PAGE_RD_PDISK_0
-{
-    MPI2_CONFIG_PAGE_HEADER         Header;                     /* 0x00 */
-    U16                             DevHandle;                  /* 0x04 */
-    U8                              Reserved1;                  /* 0x06 */
-    U8                              PhysDiskNum;                /* 0x07 */
-    MPI2_RAIDPHYSDISK0_SETTINGS     PhysDiskSettings;           /* 0x08 */
-    U32                             Reserved2;                  /* 0x0C */
-    MPI2_RAIDPHYSDISK0_INQUIRY_DATA InquiryData;                /* 0x10 */
-    U32                             Reserved3;                  /* 0x4C */
-    U8                              PhysDiskState;              /* 0x50 */
-    U8                              OfflineReason;              /* 0x51 */
-    U8                              IncompatibleReason;         /* 0x52 */
-    U8                              PhysDiskAttributes;         /* 0x53 */
-    U32                             PhysDiskStatusFlags;        /* 0x54 */
-    U64                             DeviceMaxLBA;               /* 0x58 */
-    U64                             HostMaxLBA;                 /* 0x60 */
-    U64                             CoercedMaxLBA;              /* 0x68 */
-    U16                             BlockSize;                  /* 0x70 */
-    U16                             Reserved5;                  /* 0x72 */
-    U32                             Reserved6;                  /* 0x74 */
-} MPI2_CONFIG_PAGE_RD_PDISK_0,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_RD_PDISK_0,
-  Mpi2RaidPhysDiskPage0_t, MPI2_POINTER pMpi2RaidPhysDiskPage0_t;
-
-#define MPI2_RAIDPHYSDISKPAGE0_PAGEVERSION          (0x05)
-
-/* PhysDiskState defines */
-#define MPI2_RAID_PD_STATE_NOT_CONFIGURED               (0x00)
-#define MPI2_RAID_PD_STATE_NOT_COMPATIBLE               (0x01)
-#define MPI2_RAID_PD_STATE_OFFLINE                      (0x02)
-#define MPI2_RAID_PD_STATE_ONLINE                       (0x03)
-#define MPI2_RAID_PD_STATE_HOT_SPARE                    (0x04)
-#define MPI2_RAID_PD_STATE_DEGRADED                     (0x05)
-#define MPI2_RAID_PD_STATE_REBUILDING                   (0x06)
-#define MPI2_RAID_PD_STATE_OPTIMAL                      (0x07)
-
-/* OfflineReason defines */
-#define MPI2_PHYSDISK0_ONLINE                           (0x00)
-#define MPI2_PHYSDISK0_OFFLINE_MISSING                  (0x01)
-#define MPI2_PHYSDISK0_OFFLINE_FAILED                   (0x03)
-#define MPI2_PHYSDISK0_OFFLINE_INITIALIZING             (0x04)
-#define MPI2_PHYSDISK0_OFFLINE_REQUESTED                (0x05)
-#define MPI2_PHYSDISK0_OFFLINE_FAILED_REQUESTED         (0x06)
-#define MPI2_PHYSDISK0_OFFLINE_OTHER                    (0xFF)
-
-/* IncompatibleReason defines */
-#define MPI2_PHYSDISK0_COMPATIBLE                       (0x00)
-#define MPI2_PHYSDISK0_INCOMPATIBLE_PROTOCOL            (0x01)
-#define MPI2_PHYSDISK0_INCOMPATIBLE_BLOCKSIZE           (0x02)
-#define MPI2_PHYSDISK0_INCOMPATIBLE_MAX_LBA             (0x03)
-#define MPI2_PHYSDISK0_INCOMPATIBLE_SATA_EXTENDED_CMD   (0x04)
-#define MPI2_PHYSDISK0_INCOMPATIBLE_REMOVEABLE_MEDIA    (0x05)
-#define MPI2_PHYSDISK0_INCOMPATIBLE_MEDIA_TYPE          (0x06)
-#define MPI2_PHYSDISK0_INCOMPATIBLE_UNKNOWN             (0xFF)
-
-/* PhysDiskAttributes defines */
-#define MPI2_PHYSDISK0_ATTRIB_MEDIA_MASK                (0x0C)
-#define MPI2_PHYSDISK0_ATTRIB_SOLID_STATE_DRIVE         (0x08)
-#define MPI2_PHYSDISK0_ATTRIB_HARD_DISK_DRIVE           (0x04)
-
-#define MPI2_PHYSDISK0_ATTRIB_PROTOCOL_MASK             (0x03)
-#define MPI2_PHYSDISK0_ATTRIB_SAS_PROTOCOL              (0x02)
-#define MPI2_PHYSDISK0_ATTRIB_SATA_PROTOCOL             (0x01)
-
-/* PhysDiskStatusFlags defines */
-#define MPI2_PHYSDISK0_STATUS_FLAG_NOT_CERTIFIED        (0x00000040)
-#define MPI2_PHYSDISK0_STATUS_FLAG_OCE_TARGET           (0x00000020)
-#define MPI2_PHYSDISK0_STATUS_FLAG_WRITE_CACHE_ENABLED  (0x00000010)
-#define MPI2_PHYSDISK0_STATUS_FLAG_OPTIMAL_PREVIOUS     (0x00000000)
-#define MPI2_PHYSDISK0_STATUS_FLAG_NOT_OPTIMAL_PREVIOUS (0x00000008)
-#define MPI2_PHYSDISK0_STATUS_FLAG_INACTIVE_VOLUME      (0x00000004)
-#define MPI2_PHYSDISK0_STATUS_FLAG_QUIESCED             (0x00000002)
-#define MPI2_PHYSDISK0_STATUS_FLAG_OUT_OF_SYNC          (0x00000001)
-
-
-/* RAID Physical Disk Page 1 */
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumPhysDiskPaths at runtime.
- */
-#ifndef MPI2_RAID_PHYS_DISK1_PATH_MAX
-#define MPI2_RAID_PHYS_DISK1_PATH_MAX   (1)
-#endif
-
-typedef struct _MPI2_RAIDPHYSDISK1_PATH
-{
-    U16             DevHandle;          /* 0x00 */
-    U16             Reserved1;          /* 0x02 */
-    U64             WWID;               /* 0x04 */
-    U64             OwnerWWID;          /* 0x0C */
-    U8              OwnerIdentifier;    /* 0x14 */
-    U8              Reserved2;          /* 0x15 */
-    U16             Flags;              /* 0x16 */
-} MPI2_RAIDPHYSDISK1_PATH, MPI2_POINTER PTR_MPI2_RAIDPHYSDISK1_PATH,
-  Mpi2RaidPhysDisk1Path_t, MPI2_POINTER pMpi2RaidPhysDisk1Path_t;
-
-/* RAID Physical Disk Page 1 Physical Disk Path Flags field defines */
-#define MPI2_RAID_PHYSDISK1_FLAG_PRIMARY        (0x0004)
-#define MPI2_RAID_PHYSDISK1_FLAG_BROKEN         (0x0002)
-#define MPI2_RAID_PHYSDISK1_FLAG_INVALID        (0x0001)
-
-typedef struct _MPI2_CONFIG_PAGE_RD_PDISK_1
-{
-    MPI2_CONFIG_PAGE_HEADER         Header;                     /* 0x00 */
-    U8                              NumPhysDiskPaths;           /* 0x04 */
-    U8                              PhysDiskNum;                /* 0x05 */
-    U16                             Reserved1;                  /* 0x06 */
-    U32                             Reserved2;                  /* 0x08 */
-    MPI2_RAIDPHYSDISK1_PATH         PhysicalDiskPath[MPI2_RAID_PHYS_DISK1_PATH_MAX];/* 0x0C */
-} MPI2_CONFIG_PAGE_RD_PDISK_1,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_RD_PDISK_1,
-  Mpi2RaidPhysDiskPage1_t, MPI2_POINTER pMpi2RaidPhysDiskPage1_t;
-
-#define MPI2_RAIDPHYSDISKPAGE1_PAGEVERSION          (0x02)
-
-
-/****************************************************************************
-*   values for fields used by several types of SAS Config Pages
-****************************************************************************/
-
-/* values for NegotiatedLinkRates fields */
-#define MPI2_SAS_NEG_LINK_RATE_MASK_LOGICAL             (0xF0)
-#define MPI2_SAS_NEG_LINK_RATE_SHIFT_LOGICAL            (4)
-#define MPI2_SAS_NEG_LINK_RATE_MASK_PHYSICAL            (0x0F)
-/* link rates used for Negotiated Physical and Logical Link Rate */
-#define MPI2_SAS_NEG_LINK_RATE_UNKNOWN_LINK_RATE        (0x00)
-#define MPI2_SAS_NEG_LINK_RATE_PHY_DISABLED             (0x01)
-#define MPI2_SAS_NEG_LINK_RATE_NEGOTIATION_FAILED       (0x02)
-#define MPI2_SAS_NEG_LINK_RATE_SATA_OOB_COMPLETE        (0x03)
-#define MPI2_SAS_NEG_LINK_RATE_PORT_SELECTOR            (0x04)
-#define MPI2_SAS_NEG_LINK_RATE_SMP_RESET_IN_PROGRESS    (0x05)
-#define MPI2_SAS_NEG_LINK_RATE_UNSUPPORTED_PHY          (0x06)
-#define MPI2_SAS_NEG_LINK_RATE_1_5                      (0x08)
-#define MPI2_SAS_NEG_LINK_RATE_3_0                      (0x09)
-#define MPI2_SAS_NEG_LINK_RATE_6_0                      (0x0A)
-#define MPI25_SAS_NEG_LINK_RATE_12_0                    (0x0B)
-#define MPI26_SAS_NEG_LINK_RATE_22_5                    (0x0C)
-
-
-/* values for AttachedPhyInfo fields */
-#define MPI2_SAS_APHYINFO_INSIDE_ZPSDS_PERSISTENT       (0x00000040)
-#define MPI2_SAS_APHYINFO_REQUESTED_INSIDE_ZPSDS        (0x00000020)
-#define MPI2_SAS_APHYINFO_BREAK_REPLY_CAPABLE           (0x00000010)
-
-#define MPI2_SAS_APHYINFO_REASON_MASK                   (0x0000000F)
-#define MPI2_SAS_APHYINFO_REASON_UNKNOWN                (0x00000000)
-#define MPI2_SAS_APHYINFO_REASON_POWER_ON               (0x00000001)
-#define MPI2_SAS_APHYINFO_REASON_HARD_RESET             (0x00000002)
-#define MPI2_SAS_APHYINFO_REASON_SMP_PHY_CONTROL        (0x00000003)
-#define MPI2_SAS_APHYINFO_REASON_LOSS_OF_SYNC           (0x00000004)
-#define MPI2_SAS_APHYINFO_REASON_MULTIPLEXING_SEQ       (0x00000005)
-#define MPI2_SAS_APHYINFO_REASON_IT_NEXUS_LOSS_TIMER    (0x00000006)
-#define MPI2_SAS_APHYINFO_REASON_BREAK_TIMEOUT          (0x00000007)
-#define MPI2_SAS_APHYINFO_REASON_PHY_TEST_STOPPED       (0x00000008)
-
-
-/* values for PhyInfo fields */
-#define MPI2_SAS_PHYINFO_PHY_VACANT                     (0x80000000)
-
-#define MPI2_SAS_PHYINFO_PHY_POWER_CONDITION_MASK       (0x18000000)
-#define MPI2_SAS_PHYINFO_SHIFT_PHY_POWER_CONDITION      (27)
-#define MPI2_SAS_PHYINFO_PHY_POWER_ACTIVE               (0x00000000)
-#define MPI2_SAS_PHYINFO_PHY_POWER_PARTIAL              (0x08000000)
-#define MPI2_SAS_PHYINFO_PHY_POWER_SLUMBER              (0x10000000)
-
-#define MPI2_SAS_PHYINFO_CHANGED_REQ_INSIDE_ZPSDS       (0x04000000)
-#define MPI2_SAS_PHYINFO_INSIDE_ZPSDS_PERSISTENT        (0x02000000)
-#define MPI2_SAS_PHYINFO_REQ_INSIDE_ZPSDS               (0x01000000)
-#define MPI2_SAS_PHYINFO_ZONE_GROUP_PERSISTENT          (0x00400000)
-#define MPI2_SAS_PHYINFO_INSIDE_ZPSDS                   (0x00200000)
-#define MPI2_SAS_PHYINFO_ZONING_ENABLED                 (0x00100000)
-
-#define MPI2_SAS_PHYINFO_REASON_MASK                    (0x000F0000)
-#define MPI2_SAS_PHYINFO_REASON_UNKNOWN                 (0x00000000)
-#define MPI2_SAS_PHYINFO_REASON_POWER_ON                (0x00010000)
-#define MPI2_SAS_PHYINFO_REASON_HARD_RESET              (0x00020000)
-#define MPI2_SAS_PHYINFO_REASON_SMP_PHY_CONTROL         (0x00030000)
-#define MPI2_SAS_PHYINFO_REASON_LOSS_OF_SYNC            (0x00040000)
-#define MPI2_SAS_PHYINFO_REASON_MULTIPLEXING_SEQ        (0x00050000)
-#define MPI2_SAS_PHYINFO_REASON_IT_NEXUS_LOSS_TIMER     (0x00060000)
-#define MPI2_SAS_PHYINFO_REASON_BREAK_TIMEOUT           (0x00070000)
-#define MPI2_SAS_PHYINFO_REASON_PHY_TEST_STOPPED        (0x00080000)
-
-#define MPI2_SAS_PHYINFO_MULTIPLEXING_SUPPORTED         (0x00008000)
-#define MPI2_SAS_PHYINFO_SATA_PORT_ACTIVE               (0x00004000)
-#define MPI2_SAS_PHYINFO_SATA_PORT_SELECTOR_PRESENT     (0x00002000)
-#define MPI2_SAS_PHYINFO_VIRTUAL_PHY                    (0x00001000)
-
-#define MPI2_SAS_PHYINFO_MASK_PARTIAL_PATHWAY_TIME      (0x00000F00)
-#define MPI2_SAS_PHYINFO_SHIFT_PARTIAL_PATHWAY_TIME     (8)
-
-#define MPI2_SAS_PHYINFO_MASK_ROUTING_ATTRIBUTE         (0x000000F0)
-#define MPI2_SAS_PHYINFO_DIRECT_ROUTING                 (0x00000000)
-#define MPI2_SAS_PHYINFO_SUBTRACTIVE_ROUTING            (0x00000010)
-#define MPI2_SAS_PHYINFO_TABLE_ROUTING                  (0x00000020)
-
-
-/* values for SAS ProgrammedLinkRate fields */
-#define MPI2_SAS_PRATE_MAX_RATE_MASK                    (0xF0)
-#define MPI2_SAS_PRATE_MAX_RATE_NOT_PROGRAMMABLE        (0x00)
-#define MPI2_SAS_PRATE_MAX_RATE_1_5                     (0x80)
-#define MPI2_SAS_PRATE_MAX_RATE_3_0                     (0x90)
-#define MPI2_SAS_PRATE_MAX_RATE_6_0                     (0xA0)
-#define MPI25_SAS_PRATE_MAX_RATE_12_0                   (0xB0)
-#define MPI26_SAS_PRATE_MAX_RATE_22_5                   (0xC0)
-#define MPI2_SAS_PRATE_MIN_RATE_MASK                    (0x0F)
-#define MPI2_SAS_PRATE_MIN_RATE_NOT_PROGRAMMABLE        (0x00)
-#define MPI2_SAS_PRATE_MIN_RATE_1_5                     (0x08)
-#define MPI2_SAS_PRATE_MIN_RATE_3_0                     (0x09)
-#define MPI2_SAS_PRATE_MIN_RATE_6_0                     (0x0A)
-#define MPI25_SAS_PRATE_MIN_RATE_12_0                   (0x0B)
-#define MPI26_SAS_PRATE_MIN_RATE_22_5                   (0x0C)
-
-
-/* values for SAS HwLinkRate fields */
-#define MPI2_SAS_HWRATE_MAX_RATE_MASK                   (0xF0)
-#define MPI2_SAS_HWRATE_MAX_RATE_1_5                    (0x80)
-#define MPI2_SAS_HWRATE_MAX_RATE_3_0                    (0x90)
-#define MPI2_SAS_HWRATE_MAX_RATE_6_0                    (0xA0)
-#define MPI25_SAS_HWRATE_MAX_RATE_12_0                  (0xB0)
-#define MPI26_SAS_HWRATE_MAX_RATE_22_5                  (0xC0)
-#define MPI2_SAS_HWRATE_MIN_RATE_MASK                   (0x0F)
-#define MPI2_SAS_HWRATE_MIN_RATE_1_5                    (0x08)
-#define MPI2_SAS_HWRATE_MIN_RATE_3_0                    (0x09)
-#define MPI2_SAS_HWRATE_MIN_RATE_6_0                    (0x0A)
-#define MPI25_SAS_HWRATE_MIN_RATE_12_0                  (0x0B)
-#define MPI26_SAS_HWRATE_MIN_RATE_22_5                  (0x0C)
-
-
-
-/****************************************************************************
-*   SAS IO Unit Config Pages
-****************************************************************************/
-
-/* SAS IO Unit Page 0 */
-
-typedef struct _MPI2_SAS_IO_UNIT0_PHY_DATA
-{
-    U8          Port;                   /* 0x00 */
-    U8          PortFlags;              /* 0x01 */
-    U8          PhyFlags;               /* 0x02 */
-    U8          NegotiatedLinkRate;     /* 0x03 */
-    U32         ControllerPhyDeviceInfo;/* 0x04 */
-    U16         AttachedDevHandle;      /* 0x08 */
-    U16         ControllerDevHandle;    /* 0x0A */
-    U32         DiscoveryStatus;        /* 0x0C */
-    U32         Reserved;               /* 0x10 */
-} MPI2_SAS_IO_UNIT0_PHY_DATA, MPI2_POINTER PTR_MPI2_SAS_IO_UNIT0_PHY_DATA,
-  Mpi2SasIOUnit0PhyData_t, MPI2_POINTER pMpi2SasIOUnit0PhyData_t;
-
-#define MPI26_SASIOUNIT0_PHY_PORT_NO_PORT                   (0xFF)
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumPhys at runtime.
- */
-#ifndef MPI2_SAS_IOUNIT0_PHY_MAX
-#define MPI2_SAS_IOUNIT0_PHY_MAX        (1)
-#endif
-
-typedef struct _MPI2_CONFIG_PAGE_SASIOUNIT_0
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                             /* 0x00 */
-    U32                                 Reserved1;                          /* 0x08 */
-    U8                                  NumPhys;                            /* 0x0C */
-    U8                                  Reserved2;                          /* 0x0D */
-    U16                                 Reserved3;                          /* 0x0E */
-    MPI2_SAS_IO_UNIT0_PHY_DATA          PhyData[MPI2_SAS_IOUNIT0_PHY_MAX];  /* 0x10 */
-} MPI2_CONFIG_PAGE_SASIOUNIT_0,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SASIOUNIT_0,
-  Mpi2SasIOUnitPage0_t, MPI2_POINTER pMpi2SasIOUnitPage0_t;
-
-#define MPI2_SASIOUNITPAGE0_PAGEVERSION                     (0x05)
-
-/* values for SAS IO Unit Page 0 PortFlags */
-#define MPI2_SASIOUNIT0_PORTFLAGS_DISCOVERY_IN_PROGRESS     (0x08)
-#define MPI2_SASIOUNIT0_PORTFLAGS_AUTO_PORT_CONFIG          (0x01)
-
-/* values for SAS IO Unit Page 0 PhyFlags */
-#define MPI2_SASIOUNIT0_PHYFLAGS_INIT_PERSIST_CONNECT       (0x40)
-#define MPI2_SASIOUNIT0_PHYFLAGS_TARG_PERSIST_CONNECT       (0x20)
-#define MPI2_SASIOUNIT0_PHYFLAGS_ZONING_ENABLED             (0x10)
-#define MPI2_SASIOUNIT0_PHYFLAGS_PHY_DISABLED               (0x08)
-
-/* use MPI2_SAS_NEG_LINK_RATE_ defines for the NegotiatedLinkRate field */
-
-/* see mpi2_sas.h for values for SAS IO Unit Page 0 ControllerPhyDeviceInfo values */
-
-/* values for SAS IO Unit Page 0 DiscoveryStatus */
-#define MPI2_SASIOUNIT0_DS_MAX_ENCLOSURES_EXCEED            (0x80000000)
-#define MPI2_SASIOUNIT0_DS_MAX_EXPANDERS_EXCEED             (0x40000000)
-#define MPI2_SASIOUNIT0_DS_MAX_DEVICES_EXCEED               (0x20000000)
-#define MPI2_SASIOUNIT0_DS_MAX_TOPO_PHYS_EXCEED             (0x10000000)
-#define MPI2_SASIOUNIT0_DS_DOWNSTREAM_INITIATOR             (0x08000000)
-#define MPI2_SASIOUNIT0_DS_MULTI_SUBTRACTIVE_SUBTRACTIVE    (0x00008000)
-#define MPI2_SASIOUNIT0_DS_EXP_MULTI_SUBTRACTIVE            (0x00004000)
-#define MPI2_SASIOUNIT0_DS_MULTI_PORT_DOMAIN                (0x00002000)
-#define MPI2_SASIOUNIT0_DS_TABLE_TO_SUBTRACTIVE_LINK        (0x00001000)
-#define MPI2_SASIOUNIT0_DS_UNSUPPORTED_DEVICE               (0x00000800)
-#define MPI2_SASIOUNIT0_DS_TABLE_LINK                       (0x00000400)
-#define MPI2_SASIOUNIT0_DS_SUBTRACTIVE_LINK                 (0x00000200)
-#define MPI2_SASIOUNIT0_DS_SMP_CRC_ERROR                    (0x00000100)
-#define MPI2_SASIOUNIT0_DS_SMP_FUNCTION_FAILED              (0x00000080)
-#define MPI2_SASIOUNIT0_DS_INDEX_NOT_EXIST                  (0x00000040)
-#define MPI2_SASIOUNIT0_DS_OUT_ROUTE_ENTRIES                (0x00000020)
-#define MPI2_SASIOUNIT0_DS_SMP_TIMEOUT                      (0x00000010)
-#define MPI2_SASIOUNIT0_DS_MULTIPLE_PORTS                   (0x00000004)
-#define MPI2_SASIOUNIT0_DS_UNADDRESSABLE_DEVICE             (0x00000002)
-#define MPI2_SASIOUNIT0_DS_LOOP_DETECTED                    (0x00000001)
-
-
-/* SAS IO Unit Page 1 */
-
-typedef struct _MPI2_SAS_IO_UNIT1_PHY_DATA
-{
-    U8          Port;                       /* 0x00 */
-    U8          PortFlags;                  /* 0x01 */
-    U8          PhyFlags;                   /* 0x02 */
-    U8          MaxMinLinkRate;             /* 0x03 */
-    U32         ControllerPhyDeviceInfo;    /* 0x04 */
-    U16         MaxTargetPortConnectTime;   /* 0x08 */
-    U16         Reserved1;                  /* 0x0A */
-} MPI2_SAS_IO_UNIT1_PHY_DATA, MPI2_POINTER PTR_MPI2_SAS_IO_UNIT1_PHY_DATA,
-  Mpi2SasIOUnit1PhyData_t, MPI2_POINTER pMpi2SasIOUnit1PhyData_t;
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumPhys at runtime.
- */
-#ifndef MPI2_SAS_IOUNIT1_PHY_MAX
-#define MPI2_SAS_IOUNIT1_PHY_MAX        (1)
-#endif
-
-typedef struct _MPI2_CONFIG_PAGE_SASIOUNIT_1
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                             /* 0x00 */
-    U16                                 ControlFlags;                       /* 0x08 */
-    U16                                 SASNarrowMaxQueueDepth;             /* 0x0A */
-    U16                                 AdditionalControlFlags;             /* 0x0C */
-    U16                                 SASWideMaxQueueDepth;               /* 0x0E */
-    U8                                  NumPhys;                            /* 0x10 */
-    U8                                  SATAMaxQDepth;                      /* 0x11 */
-    U8                                  ReportDeviceMissingDelay;           /* 0x12 */
-    U8                                  IODeviceMissingDelay;               /* 0x13 */
-    MPI2_SAS_IO_UNIT1_PHY_DATA          PhyData[MPI2_SAS_IOUNIT1_PHY_MAX];  /* 0x14 */
-} MPI2_CONFIG_PAGE_SASIOUNIT_1,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SASIOUNIT_1,
-  Mpi2SasIOUnitPage1_t, MPI2_POINTER pMpi2SasIOUnitPage1_t;
-
-#define MPI2_SASIOUNITPAGE1_PAGEVERSION     (0x09)
-
-/* values for SAS IO Unit Page 1 ControlFlags */
-#define MPI2_SASIOUNIT1_CONTROL_DEVICE_SELF_TEST                    (0x8000)
-#define MPI2_SASIOUNIT1_CONTROL_SATA_3_0_MAX                        (0x4000)
-#define MPI2_SASIOUNIT1_CONTROL_SATA_1_5_MAX                        (0x2000) /* MPI v2.0 only. Obsolete in MPI v2.5 and later. */
-#define MPI2_SASIOUNIT1_CONTROL_SATA_SW_PRESERVE                    (0x1000)
-
-#define MPI2_SASIOUNIT1_CONTROL_MASK_DEV_SUPPORT                    (0x0600)
-#define MPI2_SASIOUNIT1_CONTROL_SHIFT_DEV_SUPPORT                   (9)
-#define MPI2_SASIOUNIT1_CONTROL_DEV_SUPPORT_BOTH                    (0x0)
-#define MPI2_SASIOUNIT1_CONTROL_DEV_SAS_SUPPORT                     (0x1)
-#define MPI2_SASIOUNIT1_CONTROL_DEV_SATA_SUPPORT                    (0x2)
-
-#define MPI2_SASIOUNIT1_CONTROL_SATA_48BIT_LBA_REQUIRED             (0x0080)
-#define MPI2_SASIOUNIT1_CONTROL_SATA_SMART_REQUIRED                 (0x0040)
-#define MPI2_SASIOUNIT1_CONTROL_SATA_NCQ_REQUIRED                   (0x0020)
-#define MPI2_SASIOUNIT1_CONTROL_SATA_FUA_REQUIRED                   (0x0010)
-#define MPI2_SASIOUNIT1_CONTROL_TABLE_SUBTRACTIVE_ILLEGAL           (0x0008)
-#define MPI2_SASIOUNIT1_CONTROL_SUBTRACTIVE_ILLEGAL                 (0x0004)
-#define MPI2_SASIOUNIT1_CONTROL_FIRST_LVL_DISC_ONLY                 (0x0002)
-#define MPI2_SASIOUNIT1_CONTROL_CLEAR_AFFILIATION                   (0x0001) /* MPI v2.0 only. Obsolete in MPI v2.5 and later. */
-
-/* values for SAS IO Unit Page 1 AdditionalControlFlags */
-#define MPI2_SASIOUNIT1_ACONTROL_DA_PERSIST_CONNECT                 (0x0100)
-#define MPI2_SASIOUNIT1_ACONTROL_MULTI_PORT_DOMAIN_ILLEGAL          (0x0080)
-#define MPI2_SASIOUNIT1_ACONTROL_SATA_ASYNCHROUNOUS_NOTIFICATION    (0x0040)
-#define MPI2_SASIOUNIT1_ACONTROL_INVALID_TOPOLOGY_CORRECTION        (0x0020)
-#define MPI2_SASIOUNIT1_ACONTROL_PORT_ENABLE_ONLY_SATA_LINK_RESET   (0x0010)
-#define MPI2_SASIOUNIT1_ACONTROL_OTHER_AFFILIATION_SATA_LINK_RESET  (0x0008)
-#define MPI2_SASIOUNIT1_ACONTROL_SELF_AFFILIATION_SATA_LINK_RESET   (0x0004)
-#define MPI2_SASIOUNIT1_ACONTROL_NO_AFFILIATION_SATA_LINK_RESET     (0x0002)
-#define MPI2_SASIOUNIT1_ACONTROL_ALLOW_TABLE_TO_TABLE               (0x0001)
-
-/* defines for SAS IO Unit Page 1 ReportDeviceMissingDelay */
-#define MPI2_SASIOUNIT1_REPORT_MISSING_TIMEOUT_MASK                 (0x7F)
-#define MPI2_SASIOUNIT1_REPORT_MISSING_UNIT_16                      (0x80)
-
-/* values for SAS IO Unit Page 1 PortFlags */
-#define MPI2_SASIOUNIT1_PORT_FLAGS_AUTO_PORT_CONFIG                 (0x01)
-
-/* values for SAS IO Unit Page 1 PhyFlags */
-#define MPI2_SASIOUNIT1_PHYFLAGS_INIT_PERSIST_CONNECT               (0x40)
-#define MPI2_SASIOUNIT1_PHYFLAGS_TARG_PERSIST_CONNECT               (0x20)
-#define MPI2_SASIOUNIT1_PHYFLAGS_ZONING_ENABLE                      (0x10)
-#define MPI2_SASIOUNIT1_PHYFLAGS_PHY_DISABLE                        (0x08)
-
-/* values for SAS IO Unit Page 1 MaxMinLinkRate */
-#define MPI2_SASIOUNIT1_MAX_RATE_MASK                               (0xF0)
-#define MPI2_SASIOUNIT1_MAX_RATE_1_5                                (0x80)
-#define MPI2_SASIOUNIT1_MAX_RATE_3_0                                (0x90)
-#define MPI2_SASIOUNIT1_MAX_RATE_6_0                                (0xA0)
-#define MPI25_SASIOUNIT1_MAX_RATE_12_0                              (0xB0)
-#define MPI26_SASIOUNIT1_MAX_RATE_22_5                              (0xC0)
-#define MPI2_SASIOUNIT1_MIN_RATE_MASK                               (0x0F)
-#define MPI2_SASIOUNIT1_MIN_RATE_1_5                                (0x08)
-#define MPI2_SASIOUNIT1_MIN_RATE_3_0                                (0x09)
-#define MPI2_SASIOUNIT1_MIN_RATE_6_0                                (0x0A)
-#define MPI25_SASIOUNIT1_MIN_RATE_12_0                              (0x0B)
-#define MPI26_SASIOUNIT1_MIN_RATE_22_5                              (0x0C)
-
-/* see mpi2_sas.h for values for SAS IO Unit Page 1 ControllerPhyDeviceInfo values */
-
-
-/* SAS IO Unit Page 4 (for MPI v2.5 and earlier) */
-
-typedef struct _MPI2_SAS_IOUNIT4_SPINUP_GROUP
-{
-    U8          MaxTargetSpinup;            /* 0x00 */
-    U8          SpinupDelay;                /* 0x01 */
-    U8          SpinupFlags;                /* 0x02 */
-    U8          Reserved1;                  /* 0x03 */
-} MPI2_SAS_IOUNIT4_SPINUP_GROUP, MPI2_POINTER PTR_MPI2_SAS_IOUNIT4_SPINUP_GROUP,
-  Mpi2SasIOUnit4SpinupGroup_t, MPI2_POINTER pMpi2SasIOUnit4SpinupGroup_t;
-
-/* defines for SAS IO Unit Page 4 SpinupFlags */
-#define MPI2_SASIOUNIT4_SPINUP_DISABLE_FLAG         (0x01)
-
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumPhys at runtime.
- */
-#ifndef MPI2_SAS_IOUNIT4_PHY_MAX
-#define MPI2_SAS_IOUNIT4_PHY_MAX        (4)
-#endif
-
-typedef struct _MPI2_CONFIG_PAGE_SASIOUNIT_4
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                         /* 0x00 */
-    MPI2_SAS_IOUNIT4_SPINUP_GROUP       SpinupGroupParameters[4];       /* 0x08 */
-    U32                                 Reserved1;                      /* 0x18 */
-    U32                                 Reserved2;                      /* 0x1C */
-    U32                                 Reserved3;                      /* 0x20 */
-    U8                                  BootDeviceWaitTime;             /* 0x24 */
-    U8                                  SATADeviceWaitTime;             /* 0x25 */
-    U16                                 Reserved5;                      /* 0x26 */
-    U8                                  NumPhys;                        /* 0x28 */
-    U8                                  PEInitialSpinupDelay;           /* 0x29 */
-    U8                                  PEReplyDelay;                   /* 0x2A */
-    U8                                  Flags;                          /* 0x2B */
-    U8                                  PHY[MPI2_SAS_IOUNIT4_PHY_MAX];  /* 0x2C */
-} MPI2_CONFIG_PAGE_SASIOUNIT_4,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SASIOUNIT_4,
-  Mpi2SasIOUnitPage4_t, MPI2_POINTER pMpi2SasIOUnitPage4_t;
-
-#define MPI2_SASIOUNITPAGE4_PAGEVERSION     (0x02)
-
-/* defines for Flags field */
-#define MPI2_SASIOUNIT4_FLAGS_AUTO_PORTENABLE               (0x01)
-
-/* defines for PHY field */
-#define MPI2_SASIOUNIT4_PHY_SPINUP_GROUP_MASK               (0x03)
-
-
-/* SAS IO Unit Page 5 */
-
-typedef struct _MPI2_SAS_IO_UNIT5_PHY_PM_SETTINGS
-{
-    U8          ControlFlags;               /* 0x00 */
-    U8          PortWidthModGroup;          /* 0x01 */
-    U16         InactivityTimerExponent;    /* 0x02 */
-    U8          SATAPartialTimeout;         /* 0x04 */
-    U8          Reserved2;                  /* 0x05 */
-    U8          SATASlumberTimeout;         /* 0x06 */
-    U8          Reserved3;                  /* 0x07 */
-    U8          SASPartialTimeout;          /* 0x08 */
-    U8          Reserved4;                  /* 0x09 */
-    U8          SASSlumberTimeout;          /* 0x0A */
-    U8          Reserved5;                  /* 0x0B */
-} MPI2_SAS_IO_UNIT5_PHY_PM_SETTINGS,
-  MPI2_POINTER PTR_MPI2_SAS_IO_UNIT5_PHY_PM_SETTINGS,
-  Mpi2SasIOUnit5PhyPmSettings_t, MPI2_POINTER pMpi2SasIOUnit5PhyPmSettings_t;
-
-/* defines for ControlFlags field */
-#define MPI2_SASIOUNIT5_CONTROL_SAS_SLUMBER_ENABLE      (0x08)
-#define MPI2_SASIOUNIT5_CONTROL_SAS_PARTIAL_ENABLE      (0x04)
-#define MPI2_SASIOUNIT5_CONTROL_SATA_SLUMBER_ENABLE     (0x02)
-#define MPI2_SASIOUNIT5_CONTROL_SATA_PARTIAL_ENABLE     (0x01)
-
-/* defines for PortWidthModeGroup field */
-#define MPI2_SASIOUNIT5_PWMG_DISABLE                    (0xFF)
-
-/* defines for InactivityTimerExponent field */
-#define MPI2_SASIOUNIT5_ITE_MASK_SAS_SLUMBER            (0x7000)
-#define MPI2_SASIOUNIT5_ITE_SHIFT_SAS_SLUMBER           (12)
-#define MPI2_SASIOUNIT5_ITE_MASK_SAS_PARTIAL            (0x0700)
-#define MPI2_SASIOUNIT5_ITE_SHIFT_SAS_PARTIAL           (8)
-#define MPI2_SASIOUNIT5_ITE_MASK_SATA_SLUMBER           (0x0070)
-#define MPI2_SASIOUNIT5_ITE_SHIFT_SATA_SLUMBER          (4)
-#define MPI2_SASIOUNIT5_ITE_MASK_SATA_PARTIAL           (0x0007)
-#define MPI2_SASIOUNIT5_ITE_SHIFT_SATA_PARTIAL          (0)
-
-#define MPI2_SASIOUNIT5_ITE_TEN_SECONDS                 (7)
-#define MPI2_SASIOUNIT5_ITE_ONE_SECOND                  (6)
-#define MPI2_SASIOUNIT5_ITE_HUNDRED_MILLISECONDS        (5)
-#define MPI2_SASIOUNIT5_ITE_TEN_MILLISECONDS            (4)
-#define MPI2_SASIOUNIT5_ITE_ONE_MILLISECOND             (3)
-#define MPI2_SASIOUNIT5_ITE_HUNDRED_MICROSECONDS        (2)
-#define MPI2_SASIOUNIT5_ITE_TEN_MICROSECONDS            (1)
-#define MPI2_SASIOUNIT5_ITE_ONE_MICROSECOND             (0)
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumPhys at runtime.
- */
-#ifndef MPI2_SAS_IOUNIT5_PHY_MAX
-#define MPI2_SAS_IOUNIT5_PHY_MAX        (1)
-#endif
-
-typedef struct _MPI2_CONFIG_PAGE_SASIOUNIT_5
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                             /* 0x00 */
-    U8                                  NumPhys;                            /* 0x08 */
-    U8                                  Reserved1;                          /* 0x09 */
-    U16                                 Reserved2;                          /* 0x0A */
-    U32                                 Reserved3;                          /* 0x0C */
-    MPI2_SAS_IO_UNIT5_PHY_PM_SETTINGS   SASPhyPowerManagementSettings[MPI2_SAS_IOUNIT5_PHY_MAX];  /* 0x10 */
-} MPI2_CONFIG_PAGE_SASIOUNIT_5,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SASIOUNIT_5,
-  Mpi2SasIOUnitPage5_t, MPI2_POINTER pMpi2SasIOUnitPage5_t;
-
-#define MPI2_SASIOUNITPAGE5_PAGEVERSION     (0x01)
-
-
-/* SAS IO Unit Page 6 */
-
-typedef struct _MPI2_SAS_IO_UNIT6_PORT_WIDTH_MOD_GROUP_STATUS
-{
-    U8          CurrentStatus;              /* 0x00 */
-    U8          CurrentModulation;          /* 0x01 */
-    U8          CurrentUtilization;         /* 0x02 */
-    U8          Reserved1;                  /* 0x03 */
-    U32         Reserved2;                  /* 0x04 */
-} MPI2_SAS_IO_UNIT6_PORT_WIDTH_MOD_GROUP_STATUS,
-  MPI2_POINTER PTR_MPI2_SAS_IO_UNIT6_PORT_WIDTH_MOD_GROUP_STATUS,
-  Mpi2SasIOUnit6PortWidthModGroupStatus_t,
-  MPI2_POINTER pMpi2SasIOUnit6PortWidthModGroupStatus_t;
-
-/* defines for CurrentStatus field */
-#define MPI2_SASIOUNIT6_STATUS_UNAVAILABLE                      (0x00)
-#define MPI2_SASIOUNIT6_STATUS_UNCONFIGURED                     (0x01)
-#define MPI2_SASIOUNIT6_STATUS_INVALID_CONFIG                   (0x02)
-#define MPI2_SASIOUNIT6_STATUS_LINK_DOWN                        (0x03)
-#define MPI2_SASIOUNIT6_STATUS_OBSERVATION_ONLY                 (0x04)
-#define MPI2_SASIOUNIT6_STATUS_INACTIVE                         (0x05)
-#define MPI2_SASIOUNIT6_STATUS_ACTIVE_IOUNIT                    (0x06)
-#define MPI2_SASIOUNIT6_STATUS_ACTIVE_HOST                      (0x07)
-
-/* defines for CurrentModulation field */
-#define MPI2_SASIOUNIT6_MODULATION_25_PERCENT                   (0x00)
-#define MPI2_SASIOUNIT6_MODULATION_50_PERCENT                   (0x01)
-#define MPI2_SASIOUNIT6_MODULATION_75_PERCENT                   (0x02)
-#define MPI2_SASIOUNIT6_MODULATION_100_PERCENT                  (0x03)
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumGroups at runtime.
- */
-#ifndef MPI2_SAS_IOUNIT6_GROUP_MAX
-#define MPI2_SAS_IOUNIT6_GROUP_MAX      (1)
-#endif
-
-typedef struct _MPI2_CONFIG_PAGE_SASIOUNIT_6
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
-    U32                                 Reserved1;                  /* 0x08 */
-    U32                                 Reserved2;                  /* 0x0C */
-    U8                                  NumGroups;                  /* 0x10 */
-    U8                                  Reserved3;                  /* 0x11 */
-    U16                                 Reserved4;                  /* 0x12 */
-    MPI2_SAS_IO_UNIT6_PORT_WIDTH_MOD_GROUP_STATUS
-        PortWidthModulationGroupStatus[MPI2_SAS_IOUNIT6_GROUP_MAX]; /* 0x14 */
-} MPI2_CONFIG_PAGE_SASIOUNIT_6,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SASIOUNIT_6,
-  Mpi2SasIOUnitPage6_t, MPI2_POINTER pMpi2SasIOUnitPage6_t;
-
-#define MPI2_SASIOUNITPAGE6_PAGEVERSION     (0x00)
-
-
-/* SAS IO Unit Page 7 */
-
-typedef struct _MPI2_SAS_IO_UNIT7_PORT_WIDTH_MOD_GROUP_SETTINGS
-{
-    U8          Flags;                      /* 0x00 */
-    U8          Reserved1;                  /* 0x01 */
-    U16         Reserved2;                  /* 0x02 */
-    U8          Threshold75Pct;             /* 0x04 */
-    U8          Threshold50Pct;             /* 0x05 */
-    U8          Threshold25Pct;             /* 0x06 */
-    U8          Reserved3;                  /* 0x07 */
-} MPI2_SAS_IO_UNIT7_PORT_WIDTH_MOD_GROUP_SETTINGS,
-  MPI2_POINTER PTR_MPI2_SAS_IO_UNIT7_PORT_WIDTH_MOD_GROUP_SETTINGS,
-  Mpi2SasIOUnit7PortWidthModGroupSettings_t,
-  MPI2_POINTER pMpi2SasIOUnit7PortWidthModGroupSettings_t;
-
-/* defines for Flags field */
-#define MPI2_SASIOUNIT7_FLAGS_ENABLE_PORT_WIDTH_MODULATION  (0x01)
-
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumGroups at runtime.
- */
-#ifndef MPI2_SAS_IOUNIT7_GROUP_MAX
-#define MPI2_SAS_IOUNIT7_GROUP_MAX      (1)
-#endif
-
-typedef struct _MPI2_CONFIG_PAGE_SASIOUNIT_7
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER            Header;             /* 0x00 */
-    U8                                          SamplingInterval;   /* 0x08 */
-    U8                                          WindowLength;       /* 0x09 */
-    U16                                         Reserved1;          /* 0x0A */
-    U32                                         Reserved2;          /* 0x0C */
-    U32                                         Reserved3;          /* 0x10 */
-    U8                                          NumGroups;          /* 0x14 */
-    U8                                          Reserved4;          /* 0x15 */
-    U16                                         Reserved5;          /* 0x16 */
-    MPI2_SAS_IO_UNIT7_PORT_WIDTH_MOD_GROUP_SETTINGS
-        PortWidthModulationGroupSettings[MPI2_SAS_IOUNIT7_GROUP_MAX]; /* 0x18 */
-} MPI2_CONFIG_PAGE_SASIOUNIT_7,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SASIOUNIT_7,
-  Mpi2SasIOUnitPage7_t, MPI2_POINTER pMpi2SasIOUnitPage7_t;
-
-#define MPI2_SASIOUNITPAGE7_PAGEVERSION     (0x00)
-
-
-/* SAS IO Unit Page 8 */
-
-typedef struct _MPI2_CONFIG_PAGE_SASIOUNIT_8
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                         /* 0x00 */
-    U32                                 Reserved1;                      /* 0x08 */
-    U32                                 PowerManagementCapabilities;    /* 0x0C */
-    U8                                  TxRxSleepStatus;                /* 0x10 */ /* reserved in MPI 2.0 */
-    U8                                  Reserved2;                      /* 0x11 */
-    U16                                 Reserved3;                      /* 0x12 */
-} MPI2_CONFIG_PAGE_SASIOUNIT_8,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SASIOUNIT_8,
-  Mpi2SasIOUnitPage8_t, MPI2_POINTER pMpi2SasIOUnitPage8_t;
-
-#define MPI2_SASIOUNITPAGE8_PAGEVERSION     (0x00)
-
-/* defines for PowerManagementCapabilities field */
-#define MPI2_SASIOUNIT8_PM_HOST_PORT_WIDTH_MOD          (0x00001000)
-#define MPI2_SASIOUNIT8_PM_HOST_SAS_SLUMBER_MODE        (0x00000800)
-#define MPI2_SASIOUNIT8_PM_HOST_SAS_PARTIAL_MODE        (0x00000400)
-#define MPI2_SASIOUNIT8_PM_HOST_SATA_SLUMBER_MODE       (0x00000200)
-#define MPI2_SASIOUNIT8_PM_HOST_SATA_PARTIAL_MODE       (0x00000100)
-#define MPI2_SASIOUNIT8_PM_IOUNIT_PORT_WIDTH_MOD        (0x00000010)
-#define MPI2_SASIOUNIT8_PM_IOUNIT_SAS_SLUMBER_MODE      (0x00000008)
-#define MPI2_SASIOUNIT8_PM_IOUNIT_SAS_PARTIAL_MODE      (0x00000004)
-#define MPI2_SASIOUNIT8_PM_IOUNIT_SATA_SLUMBER_MODE     (0x00000002)
-#define MPI2_SASIOUNIT8_PM_IOUNIT_SATA_PARTIAL_MODE     (0x00000001)
-
-/* defines for TxRxSleepStatus field */
-#define MPI25_SASIOUNIT8_TXRXSLEEP_UNSUPPORTED          (0x00)
-#define MPI25_SASIOUNIT8_TXRXSLEEP_DISENGAGED           (0x01)
-#define MPI25_SASIOUNIT8_TXRXSLEEP_ACTIVE               (0x02)
-#define MPI25_SASIOUNIT8_TXRXSLEEP_SHUTDOWN             (0x03)
-
-
-
-/* SAS IO Unit Page 16 */
-
-typedef struct _MPI2_CONFIG_PAGE_SASIOUNIT16
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                             /* 0x00 */
-    U64                                 TimeStamp;                          /* 0x08 */
-    U32                                 Reserved1;                          /* 0x10 */
-    U32                                 Reserved2;                          /* 0x14 */
-    U32                                 FastPathPendedRequests;             /* 0x18 */
-    U32                                 FastPathUnPendedRequests;           /* 0x1C */
-    U32                                 FastPathHostRequestStarts;          /* 0x20 */
-    U32                                 FastPathFirmwareRequestStarts;      /* 0x24 */
-    U32                                 FastPathHostCompletions;            /* 0x28 */
-    U32                                 FastPathFirmwareCompletions;        /* 0x2C */
-    U32                                 NonFastPathRequestStarts;           /* 0x30 */
-    U32                                 NonFastPathHostCompletions;         /* 0x30 */
-} MPI2_CONFIG_PAGE_SASIOUNIT16,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SASIOUNIT16,
-  Mpi2SasIOUnitPage16_t, MPI2_POINTER pMpi2SasIOUnitPage16_t;
-
-#define MPI2_SASIOUNITPAGE16_PAGEVERSION    (0x00)
-
-
-/****************************************************************************
-*   SAS Expander Config Pages
-****************************************************************************/
-
-/* SAS Expander Page 0 */
-
-typedef struct _MPI2_CONFIG_PAGE_EXPANDER_0
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
-    U8                                  PhysicalPort;               /* 0x08 */
-    U8                                  ReportGenLength;            /* 0x09 */
-    U16                                 EnclosureHandle;            /* 0x0A */
-    U64                                 SASAddress;                 /* 0x0C */
-    U32                                 DiscoveryStatus;            /* 0x14 */
-    U16                                 DevHandle;                  /* 0x18 */
-    U16                                 ParentDevHandle;            /* 0x1A */
-    U16                                 ExpanderChangeCount;        /* 0x1C */
-    U16                                 ExpanderRouteIndexes;       /* 0x1E */
-    U8                                  NumPhys;                    /* 0x20 */
-    U8                                  SASLevel;                   /* 0x21 */
-    U16                                 Flags;                      /* 0x22 */
-    U16                                 STPBusInactivityTimeLimit;  /* 0x24 */
-    U16                                 STPMaxConnectTimeLimit;     /* 0x26 */
-    U16                                 STP_SMP_NexusLossTime;      /* 0x28 */
-    U16                                 MaxNumRoutedSasAddresses;   /* 0x2A */
-    U64                                 ActiveZoneManagerSASAddress;/* 0x2C */
-    U16                                 ZoneLockInactivityLimit;    /* 0x34 */
-    U16                                 Reserved1;                  /* 0x36 */
-    U8                                  TimeToReducedFunc;          /* 0x38 */
-    U8                                  InitialTimeToReducedFunc;   /* 0x39 */
-    U8                                  MaxReducedFuncTime;         /* 0x3A */
-    U8                                  Reserved2;                  /* 0x3B */
-} MPI2_CONFIG_PAGE_EXPANDER_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_EXPANDER_0,
-  Mpi2ExpanderPage0_t, MPI2_POINTER pMpi2ExpanderPage0_t;
-
-#define MPI2_SASEXPANDER0_PAGEVERSION       (0x06)
-
-/* values for SAS Expander Page 0 DiscoveryStatus field */
-#define MPI2_SAS_EXPANDER0_DS_MAX_ENCLOSURES_EXCEED         (0x80000000)
-#define MPI2_SAS_EXPANDER0_DS_MAX_EXPANDERS_EXCEED          (0x40000000)
-#define MPI2_SAS_EXPANDER0_DS_MAX_DEVICES_EXCEED            (0x20000000)
-#define MPI2_SAS_EXPANDER0_DS_MAX_TOPO_PHYS_EXCEED          (0x10000000)
-#define MPI2_SAS_EXPANDER0_DS_DOWNSTREAM_INITIATOR          (0x08000000)
-#define MPI2_SAS_EXPANDER0_DS_MULTI_SUBTRACTIVE_SUBTRACTIVE (0x00008000)
-#define MPI2_SAS_EXPANDER0_DS_EXP_MULTI_SUBTRACTIVE         (0x00004000)
-#define MPI2_SAS_EXPANDER0_DS_MULTI_PORT_DOMAIN             (0x00002000)
-#define MPI2_SAS_EXPANDER0_DS_TABLE_TO_SUBTRACTIVE_LINK     (0x00001000)
-#define MPI2_SAS_EXPANDER0_DS_UNSUPPORTED_DEVICE            (0x00000800)
-#define MPI2_SAS_EXPANDER0_DS_TABLE_LINK                    (0x00000400)
-#define MPI2_SAS_EXPANDER0_DS_SUBTRACTIVE_LINK              (0x00000200)
-#define MPI2_SAS_EXPANDER0_DS_SMP_CRC_ERROR                 (0x00000100)
-#define MPI2_SAS_EXPANDER0_DS_SMP_FUNCTION_FAILED           (0x00000080)
-#define MPI2_SAS_EXPANDER0_DS_INDEX_NOT_EXIST               (0x00000040)
-#define MPI2_SAS_EXPANDER0_DS_OUT_ROUTE_ENTRIES             (0x00000020)
-#define MPI2_SAS_EXPANDER0_DS_SMP_TIMEOUT                   (0x00000010)
-#define MPI2_SAS_EXPANDER0_DS_MULTIPLE_PORTS                (0x00000004)
-#define MPI2_SAS_EXPANDER0_DS_UNADDRESSABLE_DEVICE          (0x00000002)
-#define MPI2_SAS_EXPANDER0_DS_LOOP_DETECTED                 (0x00000001)
-
-/* values for SAS Expander Page 0 Flags field */
-#define MPI2_SAS_EXPANDER0_FLAGS_REDUCED_FUNCTIONALITY      (0x2000)
-#define MPI2_SAS_EXPANDER0_FLAGS_ZONE_LOCKED                (0x1000)
-#define MPI2_SAS_EXPANDER0_FLAGS_SUPPORTED_PHYSICAL_PRES    (0x0800)
-#define MPI2_SAS_EXPANDER0_FLAGS_ASSERTED_PHYSICAL_PRES     (0x0400)
-#define MPI2_SAS_EXPANDER0_FLAGS_ZONING_SUPPORT             (0x0200)
-#define MPI2_SAS_EXPANDER0_FLAGS_ENABLED_ZONING             (0x0100)
-#define MPI2_SAS_EXPANDER0_FLAGS_TABLE_TO_TABLE_SUPPORT     (0x0080)
-#define MPI2_SAS_EXPANDER0_FLAGS_CONNECTOR_END_DEVICE       (0x0010)
-#define MPI2_SAS_EXPANDER0_FLAGS_OTHERS_CONFIG              (0x0004)
-#define MPI2_SAS_EXPANDER0_FLAGS_CONFIG_IN_PROGRESS         (0x0002)
-#define MPI2_SAS_EXPANDER0_FLAGS_ROUTE_TABLE_CONFIG         (0x0001)
-
-
-/* SAS Expander Page 1 */
-
-typedef struct _MPI2_CONFIG_PAGE_EXPANDER_1
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
-    U8                                  PhysicalPort;               /* 0x08 */
-    U8                                  Reserved1;                  /* 0x09 */
-    U16                                 Reserved2;                  /* 0x0A */
-    U8                                  NumPhys;                    /* 0x0C */
-    U8                                  Phy;                        /* 0x0D */
-    U16                                 NumTableEntriesProgrammed;  /* 0x0E */
-    U8                                  ProgrammedLinkRate;         /* 0x10 */
-    U8                                  HwLinkRate;                 /* 0x11 */
-    U16                                 AttachedDevHandle;          /* 0x12 */
-    U32                                 PhyInfo;                    /* 0x14 */
-    U32                                 AttachedDeviceInfo;         /* 0x18 */
-    U16                                 ExpanderDevHandle;          /* 0x1C */
-    U8                                  ChangeCount;                /* 0x1E */
-    U8                                  NegotiatedLinkRate;         /* 0x1F */
-    U8                                  PhyIdentifier;              /* 0x20 */
-    U8                                  AttachedPhyIdentifier;      /* 0x21 */
-    U8                                  Reserved3;                  /* 0x22 */
-    U8                                  DiscoveryInfo;              /* 0x23 */
-    U32                                 AttachedPhyInfo;            /* 0x24 */
-    U8                                  ZoneGroup;                  /* 0x28 */
-    U8                                  SelfConfigStatus;           /* 0x29 */
-    U16                                 Reserved4;                  /* 0x2A */
-} MPI2_CONFIG_PAGE_EXPANDER_1, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_EXPANDER_1,
-  Mpi2ExpanderPage1_t, MPI2_POINTER pMpi2ExpanderPage1_t;
-
-#define MPI2_SASEXPANDER1_PAGEVERSION       (0x02)
-
-/* use MPI2_SAS_PRATE_ defines for the ProgrammedLinkRate field */
-
-/* use MPI2_SAS_HWRATE_ defines for the HwLinkRate field */
-
-/* use MPI2_SAS_PHYINFO_ for the PhyInfo field */
-
-/* see mpi2_sas.h for the MPI2_SAS_DEVICE_INFO_ defines used for the AttachedDeviceInfo field */
-
-/* use MPI2_SAS_NEG_LINK_RATE_ defines for the NegotiatedLinkRate field */
-
-/* values for SAS Expander Page 1 DiscoveryInfo field */
-#define MPI2_SAS_EXPANDER1_DISCINFO_BAD_PHY_DISABLED    (0x04)
-#define MPI2_SAS_EXPANDER1_DISCINFO_LINK_STATUS_CHANGE  (0x02)
-#define MPI2_SAS_EXPANDER1_DISCINFO_NO_ROUTING_ENTRIES  (0x01)
-
-/* use MPI2_SAS_APHYINFO_ defines for AttachedPhyInfo field */
-
-
-/****************************************************************************
-*   SAS Device Config Pages
-****************************************************************************/
-
-/* SAS Device Page 0 */
-
-typedef struct _MPI2_CONFIG_PAGE_SAS_DEV_0
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                 /* 0x00 */
-    U16                                 Slot;                   /* 0x08 */
-    U16                                 EnclosureHandle;        /* 0x0A */
-    U64                                 SASAddress;             /* 0x0C */
-    U16                                 ParentDevHandle;        /* 0x14 */
-    U8                                  PhyNum;                 /* 0x16 */
-    U8                                  AccessStatus;           /* 0x17 */
-    U16                                 DevHandle;              /* 0x18 */
-    U8                                  AttachedPhyIdentifier;  /* 0x1A */
-    U8                                  ZoneGroup;              /* 0x1B */
-    U32                                 DeviceInfo;             /* 0x1C */
-    U16                                 Flags;                  /* 0x20 */
-    U8                                  PhysicalPort;           /* 0x22 */
-    U8                                  MaxPortConnections;     /* 0x23 */
-    U64                                 DeviceName;             /* 0x24 */
-    U8                                  PortGroups;             /* 0x2C */
-    U8                                  DmaGroup;               /* 0x2D */
-    U8                                  ControlGroup;           /* 0x2E */
-    U8                                  EnclosureLevel;         /* 0x2F */
-    U8                                  ConnectorName[4];       /* 0x30 */
-    U32                                 Reserved3;              /* 0x34 */
-} MPI2_CONFIG_PAGE_SAS_DEV_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_DEV_0,
-  Mpi2SasDevicePage0_t, MPI2_POINTER pMpi2SasDevicePage0_t;
-
-#define MPI2_SASDEVICE0_PAGEVERSION         (0x09)
-
-/* values for SAS Device Page 0 AccessStatus field */
-#define MPI2_SAS_DEVICE0_ASTATUS_NO_ERRORS                  (0x00)
-#define MPI2_SAS_DEVICE0_ASTATUS_SATA_INIT_FAILED           (0x01)
-#define MPI2_SAS_DEVICE0_ASTATUS_SATA_CAPABILITY_FAILED     (0x02)
-#define MPI2_SAS_DEVICE0_ASTATUS_SATA_AFFILIATION_CONFLICT  (0x03)
-#define MPI2_SAS_DEVICE0_ASTATUS_SATA_NEEDS_INITIALIZATION  (0x04)
-#define MPI2_SAS_DEVICE0_ASTATUS_ROUTE_NOT_ADDRESSABLE      (0x05)
-#define MPI2_SAS_DEVICE0_ASTATUS_SMP_ERROR_NOT_ADDRESSABLE  (0x06)
-#define MPI2_SAS_DEVICE0_ASTATUS_DEVICE_BLOCKED             (0x07)
-/* specific values for SATA Init failures */
-#define MPI2_SAS_DEVICE0_ASTATUS_SIF_UNKNOWN                (0x10)
-#define MPI2_SAS_DEVICE0_ASTATUS_SIF_AFFILIATION_CONFLICT   (0x11)
-#define MPI2_SAS_DEVICE0_ASTATUS_SIF_DIAG                   (0x12)
-#define MPI2_SAS_DEVICE0_ASTATUS_SIF_IDENTIFICATION         (0x13)
-#define MPI2_SAS_DEVICE0_ASTATUS_SIF_CHECK_POWER            (0x14)
-#define MPI2_SAS_DEVICE0_ASTATUS_SIF_PIO_SN                 (0x15)
-#define MPI2_SAS_DEVICE0_ASTATUS_SIF_MDMA_SN                (0x16)
-#define MPI2_SAS_DEVICE0_ASTATUS_SIF_UDMA_SN                (0x17)
-#define MPI2_SAS_DEVICE0_ASTATUS_SIF_ZONING_VIOLATION       (0x18)
-#define MPI2_SAS_DEVICE0_ASTATUS_SIF_NOT_ADDRESSABLE        (0x19)
-#define MPI2_SAS_DEVICE0_ASTATUS_SIF_MAX                    (0x1F)
-
-/* see mpi2_sas.h for values for SAS Device Page 0 DeviceInfo values */
-
-/* values for SAS Device Page 0 Flags field */
-#define MPI2_SAS_DEVICE0_FLAGS_UNAUTHORIZED_DEVICE          (0x8000)
-#define MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH           (0x4000)
-#define MPI25_SAS_DEVICE0_FLAGS_FAST_PATH_CAPABLE           (0x2000)
-#define MPI2_SAS_DEVICE0_FLAGS_SLUMBER_PM_CAPABLE           (0x1000)
-#define MPI2_SAS_DEVICE0_FLAGS_PARTIAL_PM_CAPABLE           (0x0800)
-#define MPI2_SAS_DEVICE0_FLAGS_SATA_ASYNCHRONOUS_NOTIFY     (0x0400)
-#define MPI2_SAS_DEVICE0_FLAGS_SATA_SW_PRESERVE             (0x0200)
-#define MPI2_SAS_DEVICE0_FLAGS_UNSUPPORTED_DEVICE           (0x0100)
-#define MPI2_SAS_DEVICE0_FLAGS_SATA_48BIT_LBA_SUPPORTED     (0x0080)
-#define MPI2_SAS_DEVICE0_FLAGS_SATA_SMART_SUPPORTED         (0x0040)
-#define MPI2_SAS_DEVICE0_FLAGS_SATA_NCQ_SUPPORTED           (0x0020)
-#define MPI2_SAS_DEVICE0_FLAGS_SATA_FUA_SUPPORTED           (0x0010)
-#define MPI2_SAS_DEVICE0_FLAGS_PORT_SELECTOR_ATTACH         (0x0008)
-#define MPI2_SAS_DEVICE0_FLAGS_PERSIST_CAPABLE              (0x0004)
-#define MPI2_SAS_DEVICE0_FLAGS_ENCL_LEVEL_VALID             (0x0002)
-#define MPI2_SAS_DEVICE0_FLAGS_DEVICE_PRESENT               (0x0001)
-
-/* SAS Device Page 1 */
-
-typedef struct _MPI2_CONFIG_PAGE_SAS_DEV_1
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                 /* 0x00 */
-    U32                                 Reserved1;              /* 0x08 */
-    U64                                 SASAddress;             /* 0x0C */
-    U32                                 Reserved2;              /* 0x14 */
-    U16                                 DevHandle;              /* 0x18 */
-    U16                                 Reserved3;              /* 0x1A */
-    U8                                  InitialRegDeviceFIS[20];/* 0x1C */
-} MPI2_CONFIG_PAGE_SAS_DEV_1, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_DEV_1,
-  Mpi2SasDevicePage1_t, MPI2_POINTER pMpi2SasDevicePage1_t;
-
-#define MPI2_SASDEVICE1_PAGEVERSION         (0x01)
-
-
-/****************************************************************************
-*   SAS PHY Config Pages
-****************************************************************************/
-
-/* SAS PHY Page 0 */
-
-typedef struct _MPI2_CONFIG_PAGE_SAS_PHY_0
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                 /* 0x00 */
-    U16                                 OwnerDevHandle;         /* 0x08 */
-    U16                                 Reserved1;              /* 0x0A */
-    U16                                 AttachedDevHandle;      /* 0x0C */
-    U8                                  AttachedPhyIdentifier;  /* 0x0E */
-    U8                                  Reserved2;              /* 0x0F */
-    U32                                 AttachedPhyInfo;        /* 0x10 */
-    U8                                  ProgrammedLinkRate;     /* 0x14 */
-    U8                                  HwLinkRate;             /* 0x15 */
-    U8                                  ChangeCount;            /* 0x16 */
-    U8                                  Flags;                  /* 0x17 */
-    U32                                 PhyInfo;                /* 0x18 */
-    U8                                  NegotiatedLinkRate;     /* 0x1C */
-    U8                                  Reserved3;              /* 0x1D */
-    U16                                 Reserved4;              /* 0x1E */
-} MPI2_CONFIG_PAGE_SAS_PHY_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_PHY_0,
-  Mpi2SasPhyPage0_t, MPI2_POINTER pMpi2SasPhyPage0_t;
-
-#define MPI2_SASPHY0_PAGEVERSION            (0x03)
-
-/* use MPI2_SAS_APHYINFO_ defines for AttachedPhyInfo field */
-
-/* use MPI2_SAS_PRATE_ defines for the ProgrammedLinkRate field */
-
-/* use MPI2_SAS_HWRATE_ defines for the HwLinkRate field */
-
-/* values for SAS PHY Page 0 Flags field */
-#define MPI2_SAS_PHY0_FLAGS_SGPIO_DIRECT_ATTACH_ENC             (0x01)
-
-/* use MPI2_SAS_PHYINFO_ for the PhyInfo field */
-
-/* use MPI2_SAS_NEG_LINK_RATE_ defines for the NegotiatedLinkRate field */
-
-
-/* SAS PHY Page 1 */
-
-typedef struct _MPI2_CONFIG_PAGE_SAS_PHY_1
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
-    U32                                 Reserved1;                  /* 0x08 */
-    U32                                 InvalidDwordCount;          /* 0x0C */
-    U32                                 RunningDisparityErrorCount; /* 0x10 */
-    U32                                 LossDwordSynchCount;        /* 0x14 */
-    U32                                 PhyResetProblemCount;       /* 0x18 */
-} MPI2_CONFIG_PAGE_SAS_PHY_1, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_PHY_1,
-  Mpi2SasPhyPage1_t, MPI2_POINTER pMpi2SasPhyPage1_t;
-
-#define MPI2_SASPHY1_PAGEVERSION            (0x01)
-
-
-/* SAS PHY Page 2 */
-
-typedef struct _MPI2_SASPHY2_PHY_EVENT
-{
-    U8          PhyEventCode;       /* 0x00 */
-    U8          Reserved1;          /* 0x01 */
-    U16         Reserved2;          /* 0x02 */
-    U32         PhyEventInfo;       /* 0x04 */
-} MPI2_SASPHY2_PHY_EVENT, MPI2_POINTER PTR_MPI2_SASPHY2_PHY_EVENT,
-  Mpi2SasPhy2PhyEvent_t, MPI2_POINTER pMpi2SasPhy2PhyEvent_t;
-
-/* use MPI2_SASPHY3_EVENT_CODE_ for the PhyEventCode field */
-
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumPhyEvents at runtime.
- */
-#ifndef MPI2_SASPHY2_PHY_EVENT_MAX
-#define MPI2_SASPHY2_PHY_EVENT_MAX      (1)
-#endif
-
-typedef struct _MPI2_CONFIG_PAGE_SAS_PHY_2
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
-    U32                                 Reserved1;                  /* 0x08 */
-    U8                                  NumPhyEvents;               /* 0x0C */
-    U8                                  Reserved2;                  /* 0x0D */
-    U16                                 Reserved3;                  /* 0x0E */
-    MPI2_SASPHY2_PHY_EVENT              PhyEvent[MPI2_SASPHY2_PHY_EVENT_MAX]; /* 0x10 */
-} MPI2_CONFIG_PAGE_SAS_PHY_2, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_PHY_2,
-  Mpi2SasPhyPage2_t, MPI2_POINTER pMpi2SasPhyPage2_t;
-
-#define MPI2_SASPHY2_PAGEVERSION            (0x00)
-
-
-/* SAS PHY Page 3 */
-
-typedef struct _MPI2_SASPHY3_PHY_EVENT_CONFIG
-{
-    U8          PhyEventCode;       /* 0x00 */
-    U8          Reserved1;          /* 0x01 */
-    U16         Reserved2;          /* 0x02 */
-    U8          CounterType;        /* 0x04 */
-    U8          ThresholdWindow;    /* 0x05 */
-    U8          TimeUnits;          /* 0x06 */
-    U8          Reserved3;          /* 0x07 */
-    U32         EventThreshold;     /* 0x08 */
-    U16         ThresholdFlags;     /* 0x0C */
-    U16         Reserved4;          /* 0x0E */
-} MPI2_SASPHY3_PHY_EVENT_CONFIG, MPI2_POINTER PTR_MPI2_SASPHY3_PHY_EVENT_CONFIG,
-  Mpi2SasPhy3PhyEventConfig_t, MPI2_POINTER pMpi2SasPhy3PhyEventConfig_t;
-
-/* values for PhyEventCode field */
-#define MPI2_SASPHY3_EVENT_CODE_NO_EVENT                    (0x00)
-#define MPI2_SASPHY3_EVENT_CODE_INVALID_DWORD               (0x01)
-#define MPI2_SASPHY3_EVENT_CODE_RUNNING_DISPARITY_ERROR     (0x02)
-#define MPI2_SASPHY3_EVENT_CODE_LOSS_DWORD_SYNC             (0x03)
-#define MPI2_SASPHY3_EVENT_CODE_PHY_RESET_PROBLEM           (0x04)
-#define MPI2_SASPHY3_EVENT_CODE_ELASTICITY_BUF_OVERFLOW     (0x05)
-#define MPI2_SASPHY3_EVENT_CODE_RX_ERROR                    (0x06)
-#define MPI2_SASPHY3_EVENT_CODE_RX_ADDR_FRAME_ERROR         (0x20)
-#define MPI2_SASPHY3_EVENT_CODE_TX_AC_OPEN_REJECT           (0x21)
-#define MPI2_SASPHY3_EVENT_CODE_RX_AC_OPEN_REJECT           (0x22)
-#define MPI2_SASPHY3_EVENT_CODE_TX_RC_OPEN_REJECT           (0x23)
-#define MPI2_SASPHY3_EVENT_CODE_RX_RC_OPEN_REJECT           (0x24)
-#define MPI2_SASPHY3_EVENT_CODE_RX_AIP_PARTIAL_WAITING_ON   (0x25)
-#define MPI2_SASPHY3_EVENT_CODE_RX_AIP_CONNECT_WAITING_ON   (0x26)
-#define MPI2_SASPHY3_EVENT_CODE_TX_BREAK                    (0x27)
-#define MPI2_SASPHY3_EVENT_CODE_RX_BREAK                    (0x28)
-#define MPI2_SASPHY3_EVENT_CODE_BREAK_TIMEOUT               (0x29)
-#define MPI2_SASPHY3_EVENT_CODE_CONNECTION                  (0x2A)
-#define MPI2_SASPHY3_EVENT_CODE_PEAKTX_PATHWAY_BLOCKED      (0x2B)
-#define MPI2_SASPHY3_EVENT_CODE_PEAKTX_ARB_WAIT_TIME        (0x2C)
-#define MPI2_SASPHY3_EVENT_CODE_PEAK_ARB_WAIT_TIME          (0x2D)
-#define MPI2_SASPHY3_EVENT_CODE_PEAK_CONNECT_TIME           (0x2E)
-#define MPI2_SASPHY3_EVENT_CODE_TX_SSP_FRAMES               (0x40)
-#define MPI2_SASPHY3_EVENT_CODE_RX_SSP_FRAMES               (0x41)
-#define MPI2_SASPHY3_EVENT_CODE_TX_SSP_ERROR_FRAMES         (0x42)
-#define MPI2_SASPHY3_EVENT_CODE_RX_SSP_ERROR_FRAMES         (0x43)
-#define MPI2_SASPHY3_EVENT_CODE_TX_CREDIT_BLOCKED           (0x44)
-#define MPI2_SASPHY3_EVENT_CODE_RX_CREDIT_BLOCKED           (0x45)
-#define MPI2_SASPHY3_EVENT_CODE_TX_SATA_FRAMES              (0x50)
-#define MPI2_SASPHY3_EVENT_CODE_RX_SATA_FRAMES              (0x51)
-#define MPI2_SASPHY3_EVENT_CODE_SATA_OVERFLOW               (0x52)
-#define MPI2_SASPHY3_EVENT_CODE_TX_SMP_FRAMES               (0x60)
-#define MPI2_SASPHY3_EVENT_CODE_RX_SMP_FRAMES               (0x61)
-#define MPI2_SASPHY3_EVENT_CODE_RX_SMP_ERROR_FRAMES         (0x63)
-#define MPI2_SASPHY3_EVENT_CODE_HOTPLUG_TIMEOUT             (0xD0)
-#define MPI2_SASPHY3_EVENT_CODE_MISALIGNED_MUX_PRIMITIVE    (0xD1)
-#define MPI2_SASPHY3_EVENT_CODE_RX_AIP                      (0xD2)
-/* Following codes are product specific and in MPI v2.6 and later */
-#define MPI2_SASPHY3_EVENT_CODE_LCARB_WAIT_TIME             (0xD3)
-#define MPI2_SASPHY3_EVENT_CODE_RCVD_CONN_RESP_WAIT_TIME    (0xD4)
-#define MPI2_SASPHY3_EVENT_CODE_LCCONN_TIME                 (0xD5)
-#define MPI2_SASPHY3_EVENT_CODE_SSP_TX_START_TRANSMIT       (0xD6)
-#define MPI2_SASPHY3_EVENT_CODE_SATA_TX_START               (0xD7)
-#define MPI2_SASPHY3_EVENT_CODE_SMP_TX_START_TRANSMT        (0xD8)
-#define MPI2_SASPHY3_EVENT_CODE_TX_SMP_BREAK_CONN           (0xD9)
-#define MPI2_SASPHY3_EVENT_CODE_SSP_RX_START_RECEIVE        (0xDA)
-#define MPI2_SASPHY3_EVENT_CODE_SATA_RX_START_RECEIVE       (0xDB)
-#define MPI2_SASPHY3_EVENT_CODE_SMP_RX_START_RECEIVE        (0xDC)
-
-/* values for the CounterType field */
-#define MPI2_SASPHY3_COUNTER_TYPE_WRAPPING                  (0x00)
-#define MPI2_SASPHY3_COUNTER_TYPE_SATURATING                (0x01)
-#define MPI2_SASPHY3_COUNTER_TYPE_PEAK_VALUE                (0x02)
-
-/* values for the TimeUnits field */
-#define MPI2_SASPHY3_TIME_UNITS_10_MICROSECONDS             (0x00)
-#define MPI2_SASPHY3_TIME_UNITS_100_MICROSECONDS            (0x01)
-#define MPI2_SASPHY3_TIME_UNITS_1_MILLISECOND               (0x02)
-#define MPI2_SASPHY3_TIME_UNITS_10_MILLISECONDS             (0x03)
-
-/* values for the ThresholdFlags field */
-#define MPI2_SASPHY3_TFLAGS_PHY_RESET                       (0x0002)
-#define MPI2_SASPHY3_TFLAGS_EVENT_NOTIFY                    (0x0001)
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumPhyEvents at runtime.
- */
-#ifndef MPI2_SASPHY3_PHY_EVENT_MAX
-#define MPI2_SASPHY3_PHY_EVENT_MAX      (1)
-#endif
-
-typedef struct _MPI2_CONFIG_PAGE_SAS_PHY_3
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
-    U32                                 Reserved1;                  /* 0x08 */
-    U8                                  NumPhyEvents;               /* 0x0C */
-    U8                                  Reserved2;                  /* 0x0D */
-    U16                                 Reserved3;                  /* 0x0E */
-    MPI2_SASPHY3_PHY_EVENT_CONFIG       PhyEventConfig[MPI2_SASPHY3_PHY_EVENT_MAX]; /* 0x10 */
-} MPI2_CONFIG_PAGE_SAS_PHY_3, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_PHY_3,
-  Mpi2SasPhyPage3_t, MPI2_POINTER pMpi2SasPhyPage3_t;
-
-#define MPI2_SASPHY3_PAGEVERSION            (0x00)
-
-
-/* SAS PHY Page 4 */
-
-typedef struct _MPI2_CONFIG_PAGE_SAS_PHY_4
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
-    U16                                 Reserved1;                  /* 0x08 */
-    U8                                  Reserved2;                  /* 0x0A */
-    U8                                  Flags;                      /* 0x0B */
-    U8                                  InitialFrame[28];           /* 0x0C */
-} MPI2_CONFIG_PAGE_SAS_PHY_4, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_PHY_4,
-  Mpi2SasPhyPage4_t, MPI2_POINTER pMpi2SasPhyPage4_t;
-
-#define MPI2_SASPHY4_PAGEVERSION            (0x00)
-
-/* values for the Flags field */
-#define MPI2_SASPHY4_FLAGS_FRAME_VALID        (0x02)
-#define MPI2_SASPHY4_FLAGS_SATA_FRAME         (0x01)
-
-
-
-
-/****************************************************************************
-*   SAS Port Config Pages
-****************************************************************************/
-
-/* SAS Port Page 0 */
-
-typedef struct _MPI2_CONFIG_PAGE_SAS_PORT_0
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
-    U8                                  PortNumber;                 /* 0x08 */
-    U8                                  PhysicalPort;               /* 0x09 */
-    U8                                  PortWidth;                  /* 0x0A */
-    U8                                  PhysicalPortWidth;          /* 0x0B */
-    U8                                  ZoneGroup;                  /* 0x0C */
-    U8                                  Reserved1;                  /* 0x0D */
-    U16                                 Reserved2;                  /* 0x0E */
-    U64                                 SASAddress;                 /* 0x10 */
-    U32                                 DeviceInfo;                 /* 0x18 */
-    U32                                 Reserved3;                  /* 0x1C */
-    U32                                 Reserved4;                  /* 0x20 */
-} MPI2_CONFIG_PAGE_SAS_PORT_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_PORT_0,
-  Mpi2SasPortPage0_t, MPI2_POINTER pMpi2SasPortPage0_t;
-
-#define MPI2_SASPORT0_PAGEVERSION           (0x00)
-
-/* see mpi2_sas.h for values for SAS Port Page 0 DeviceInfo values */
-
-
-/****************************************************************************
-*   SAS Enclosure Config Pages
-****************************************************************************/
-
-/* SAS Enclosure Page 0, Enclosure Page 0 */
-
-typedef struct _MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
-    U32                                 Reserved1;                  /* 0x08 */
-    U64                                 EnclosureLogicalID;         /* 0x0C */
-    U16                                 Flags;                      /* 0x14 */
-    U16                                 EnclosureHandle;            /* 0x16 */
-    U16                                 NumSlots;                   /* 0x18 */
-    U16                                 StartSlot;                  /* 0x1A */
-    U8                                  ChassisSlot;                /* 0x1C */
-    U8                                  EnclosureLevel;             /* 0x1D */
-    U16                                 SEPDevHandle;               /* 0x1E */
-    U8                                  OEMRD;                      /* 0x20 */
-    U8                                  Reserved1a;                 /* 0x21 */
-    U16                                 Reserved2;                  /* 0x22 */
-    U32                                 Reserved3;                  /* 0x24 */
-} MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0,
-  Mpi2SasEnclosurePage0_t, MPI2_POINTER pMpi2SasEnclosurePage0_t,
-  MPI26_CONFIG_PAGE_ENCLOSURE_0,
-  MPI2_POINTER PTR_MPI26_CONFIG_PAGE_ENCLOSURE_0,
-  Mpi26EnclosurePage0_t, MPI2_POINTER pMpi26EnclosurePage0_t;
-
-#define MPI2_SASENCLOSURE0_PAGEVERSION      (0x04)
-
-/* values for SAS Enclosure Page 0 Flags field */
-#define MPI26_SAS_ENCLS0_FLAGS_OEMRD_VALID          (0x0080)
-#define MPI26_SAS_ENCLS0_FLAGS_OEMRD_COLLECTING     (0x0040)
-#define MPI2_SAS_ENCLS0_FLAGS_CHASSIS_SLOT_VALID    (0x0020)
-#define MPI2_SAS_ENCLS0_FLAGS_ENCL_LEVEL_VALID      (0x0010)
-#define MPI2_SAS_ENCLS0_FLAGS_MNG_MASK              (0x000F)
-#define MPI2_SAS_ENCLS0_FLAGS_MNG_UNKNOWN           (0x0000)
-#define MPI2_SAS_ENCLS0_FLAGS_MNG_IOC_SES           (0x0001)
-#define MPI2_SAS_ENCLS0_FLAGS_MNG_IOC_SGPIO         (0x0002)
-#define MPI2_SAS_ENCLS0_FLAGS_MNG_EXP_SGPIO         (0x0003)
-#define MPI2_SAS_ENCLS0_FLAGS_MNG_SES_ENCLOSURE     (0x0004)
-#define MPI2_SAS_ENCLS0_FLAGS_MNG_IOC_GPIO          (0x0005)
-
-#define MPI26_ENCLOSURE0_PAGEVERSION        (0x04)
-
-/* Values for Enclosure Page 0 Flags field */
-#define MPI26_ENCLS0_FLAGS_OEMRD_VALID              (0x0080)
-#define MPI26_ENCLS0_FLAGS_OEMRD_COLLECTING         (0x0040)
-#define MPI26_ENCLS0_FLAGS_CHASSIS_SLOT_VALID       (0x0020)
-#define MPI26_ENCLS0_FLAGS_ENCL_LEVEL_VALID         (0x0010)
-#define MPI26_ENCLS0_FLAGS_MNG_MASK                 (0x000F)
-#define MPI26_ENCLS0_FLAGS_MNG_UNKNOWN              (0x0000)
-#define MPI26_ENCLS0_FLAGS_MNG_IOC_SES              (0x0001)
-#define MPI26_ENCLS0_FLAGS_MNG_IOC_SGPIO            (0x0002)
-#define MPI26_ENCLS0_FLAGS_MNG_EXP_SGPIO            (0x0003)
-#define MPI26_ENCLS0_FLAGS_MNG_SES_ENCLOSURE        (0x0004)
-#define MPI26_ENCLS0_FLAGS_MNG_IOC_GPIO             (0x0005)
-#define MPI26_ENCLS0_FLAGS_MNG_PCIE_SW_SES_ENCL     (0x0006)
-
-/****************************************************************************
-*   Log Config Page
-****************************************************************************/
-
-/* Log Page 0 */
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumLogEntries at runtime.
- */
-#ifndef MPI2_LOG_0_NUM_LOG_ENTRIES
-#define MPI2_LOG_0_NUM_LOG_ENTRIES          (1)
-#endif
-
-#define MPI2_LOG_0_LOG_DATA_LENGTH          (0x1C)
-
-typedef struct _MPI2_LOG_0_ENTRY
-{
-    U64         TimeStamp;                          /* 0x00 */
-    U32         Reserved1;                          /* 0x08 */
-    U16         LogSequence;                        /* 0x0C */
-    U16         LogEntryQualifier;                  /* 0x0E */
-    U8          VP_ID;                              /* 0x10 */
-    U8          VF_ID;                              /* 0x11 */
-    U16         Reserved2;                          /* 0x12 */
-    U8          LogData[MPI2_LOG_0_LOG_DATA_LENGTH];/* 0x14 */
-} MPI2_LOG_0_ENTRY, MPI2_POINTER PTR_MPI2_LOG_0_ENTRY,
-  Mpi2Log0Entry_t, MPI2_POINTER pMpi2Log0Entry_t;
-
-/* values for Log Page 0 LogEntry LogEntryQualifier field */
-#define MPI2_LOG_0_ENTRY_QUAL_ENTRY_UNUSED          (0x0000)
-#define MPI2_LOG_0_ENTRY_QUAL_POWER_ON_RESET        (0x0001)
-#define MPI2_LOG_0_ENTRY_QUAL_TIMESTAMP_UPDATE      (0x0002)
-#define MPI2_LOG_0_ENTRY_QUAL_MIN_IMPLEMENT_SPEC    (0x8000)
-#define MPI2_LOG_0_ENTRY_QUAL_MAX_IMPLEMENT_SPEC    (0xFFFF)
-
-typedef struct _MPI2_CONFIG_PAGE_LOG_0
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
-    U32                                 Reserved1;                  /* 0x08 */
-    U32                                 Reserved2;                  /* 0x0C */
-    U16                                 NumLogEntries;              /* 0x10 */
-    U16                                 Reserved3;                  /* 0x12 */
-    MPI2_LOG_0_ENTRY                    LogEntry[MPI2_LOG_0_NUM_LOG_ENTRIES]; /* 0x14 */
-} MPI2_CONFIG_PAGE_LOG_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_LOG_0,
-  Mpi2LogPage0_t, MPI2_POINTER pMpi2LogPage0_t;
-
-#define MPI2_LOG_0_PAGEVERSION              (0x02)
-
-
-/****************************************************************************
-*   RAID Config Page
-****************************************************************************/
-
-/* RAID Page 0 */
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumElements at runtime.
- */
-#ifndef MPI2_RAIDCONFIG0_MAX_ELEMENTS
-#define MPI2_RAIDCONFIG0_MAX_ELEMENTS       (1)
-#endif
-
-typedef struct _MPI2_RAIDCONFIG0_CONFIG_ELEMENT
-{
-    U16                     ElementFlags;               /* 0x00 */
-    U16                     VolDevHandle;               /* 0x02 */
-    U8                      HotSparePool;               /* 0x04 */
-    U8                      PhysDiskNum;                /* 0x05 */
-    U16                     PhysDiskDevHandle;          /* 0x06 */
-} MPI2_RAIDCONFIG0_CONFIG_ELEMENT,
-  MPI2_POINTER PTR_MPI2_RAIDCONFIG0_CONFIG_ELEMENT,
-  Mpi2RaidConfig0ConfigElement_t, MPI2_POINTER pMpi2RaidConfig0ConfigElement_t;
-
-/* values for the ElementFlags field */
-#define MPI2_RAIDCONFIG0_EFLAGS_MASK_ELEMENT_TYPE       (0x000F)
-#define MPI2_RAIDCONFIG0_EFLAGS_VOLUME_ELEMENT          (0x0000)
-#define MPI2_RAIDCONFIG0_EFLAGS_VOL_PHYS_DISK_ELEMENT   (0x0001)
-#define MPI2_RAIDCONFIG0_EFLAGS_HOT_SPARE_ELEMENT       (0x0002)
-#define MPI2_RAIDCONFIG0_EFLAGS_OCE_ELEMENT             (0x0003)
-
-
-typedef struct _MPI2_CONFIG_PAGE_RAID_CONFIGURATION_0
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
-    U8                                  NumHotSpares;               /* 0x08 */
-    U8                                  NumPhysDisks;               /* 0x09 */
-    U8                                  NumVolumes;                 /* 0x0A */
-    U8                                  ConfigNum;                  /* 0x0B */
-    U32                                 Flags;                      /* 0x0C */
-    U8                                  ConfigGUID[24];             /* 0x10 */
-    U32                                 Reserved1;                  /* 0x28 */
-    U8                                  NumElements;                /* 0x2C */
-    U8                                  Reserved2;                  /* 0x2D */
-    U16                                 Reserved3;                  /* 0x2E */
-    MPI2_RAIDCONFIG0_CONFIG_ELEMENT     ConfigElement[MPI2_RAIDCONFIG0_MAX_ELEMENTS]; /* 0x30 */
-} MPI2_CONFIG_PAGE_RAID_CONFIGURATION_0,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_RAID_CONFIGURATION_0,
-  Mpi2RaidConfigurationPage0_t, MPI2_POINTER pMpi2RaidConfigurationPage0_t;
-
-#define MPI2_RAIDCONFIG0_PAGEVERSION            (0x00)
-
-/* values for RAID Configuration Page 0 Flags field */
-#define MPI2_RAIDCONFIG0_FLAG_FOREIGN_CONFIG        (0x00000001)
-
-
-/****************************************************************************
-*   Driver Persistent Mapping Config Pages
-****************************************************************************/
-
-/* Driver Persistent Mapping Page 0 */
-
-typedef struct _MPI2_CONFIG_PAGE_DRIVER_MAP0_ENTRY
-{
-    U64                                 PhysicalIdentifier;         /* 0x00 */
-    U16                                 MappingInformation;         /* 0x08 */
-    U16                                 DeviceIndex;                /* 0x0A */
-    U32                                 PhysicalBitsMapping;        /* 0x0C */
-    U32                                 Reserved1;                  /* 0x10 */
-} MPI2_CONFIG_PAGE_DRIVER_MAP0_ENTRY,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_DRIVER_MAP0_ENTRY,
-  Mpi2DriverMap0Entry_t, MPI2_POINTER pMpi2DriverMap0Entry_t;
-
-typedef struct _MPI2_CONFIG_PAGE_DRIVER_MAPPING_0
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
-    MPI2_CONFIG_PAGE_DRIVER_MAP0_ENTRY  Entry;                      /* 0x08 */
-} MPI2_CONFIG_PAGE_DRIVER_MAPPING_0,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_DRIVER_MAPPING_0,
-  Mpi2DriverMappingPage0_t, MPI2_POINTER pMpi2DriverMappingPage0_t;
-
-#define MPI2_DRIVERMAPPING0_PAGEVERSION         (0x00)
-
-/* values for Driver Persistent Mapping Page 0 MappingInformation field */
-#define MPI2_DRVMAP0_MAPINFO_SLOT_MASK              (0x07F0)
-#define MPI2_DRVMAP0_MAPINFO_SLOT_SHIFT             (4)
-#define MPI2_DRVMAP0_MAPINFO_MISSING_MASK           (0x000F)
-
-
-/****************************************************************************
-*   Ethernet Config Pages
-****************************************************************************/
-
-/* Ethernet Page 0 */
-
-/* IP address (union of IPv4 and IPv6) */
-typedef union _MPI2_ETHERNET_IP_ADDR
-{
-    U32     IPv4Addr;
-    U32     IPv6Addr[4];
-} MPI2_ETHERNET_IP_ADDR, MPI2_POINTER PTR_MPI2_ETHERNET_IP_ADDR,
-  Mpi2EthernetIpAddr_t, MPI2_POINTER pMpi2EthernetIpAddr_t;
-
-#define MPI2_ETHERNET_HOST_NAME_LENGTH          (32)
-
-typedef struct _MPI2_CONFIG_PAGE_ETHERNET_0
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                 /* 0x00 */
-    U8                                  NumInterfaces;          /* 0x08 */
-    U8                                  Reserved0;              /* 0x09 */
-    U16                                 Reserved1;              /* 0x0A */
-    U32                                 Status;                 /* 0x0C */
-    U8                                  MediaState;             /* 0x10 */
-    U8                                  Reserved2;              /* 0x11 */
-    U16                                 Reserved3;              /* 0x12 */
-    U8                                  MacAddress[6];          /* 0x14 */
-    U8                                  Reserved4;              /* 0x1A */
-    U8                                  Reserved5;              /* 0x1B */
-    MPI2_ETHERNET_IP_ADDR               IpAddress;              /* 0x1C */
-    MPI2_ETHERNET_IP_ADDR               SubnetMask;             /* 0x2C */
-    MPI2_ETHERNET_IP_ADDR               GatewayIpAddress;       /* 0x3C */
-    MPI2_ETHERNET_IP_ADDR               DNS1IpAddress;          /* 0x4C */
-    MPI2_ETHERNET_IP_ADDR               DNS2IpAddress;          /* 0x5C */
-    MPI2_ETHERNET_IP_ADDR               DhcpIpAddress;          /* 0x6C */
-    U8                                  HostName[MPI2_ETHERNET_HOST_NAME_LENGTH];/* 0x7C */
-} MPI2_CONFIG_PAGE_ETHERNET_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_ETHERNET_0,
-  Mpi2EthernetPage0_t, MPI2_POINTER pMpi2EthernetPage0_t;
-
-#define MPI2_ETHERNETPAGE0_PAGEVERSION   (0x00)
-
-/* values for Ethernet Page 0 Status field */
-#define MPI2_ETHPG0_STATUS_IPV6_CAPABLE             (0x80000000)
-#define MPI2_ETHPG0_STATUS_IPV4_CAPABLE             (0x40000000)
-#define MPI2_ETHPG0_STATUS_CONSOLE_CONNECTED        (0x20000000)
-#define MPI2_ETHPG0_STATUS_DEFAULT_IF               (0x00000100)
-#define MPI2_ETHPG0_STATUS_FW_DWNLD_ENABLED         (0x00000080)
-#define MPI2_ETHPG0_STATUS_TELNET_ENABLED           (0x00000040)
-#define MPI2_ETHPG0_STATUS_SSH2_ENABLED             (0x00000020)
-#define MPI2_ETHPG0_STATUS_DHCP_CLIENT_ENABLED      (0x00000010)
-#define MPI2_ETHPG0_STATUS_IPV6_ENABLED             (0x00000008)
-#define MPI2_ETHPG0_STATUS_IPV4_ENABLED             (0x00000004)
-#define MPI2_ETHPG0_STATUS_IPV6_ADDRESSES           (0x00000002)
-#define MPI2_ETHPG0_STATUS_ETH_IF_ENABLED           (0x00000001)
-
-/* values for Ethernet Page 0 MediaState field */
-#define MPI2_ETHPG0_MS_DUPLEX_MASK                  (0x80)
-#define MPI2_ETHPG0_MS_HALF_DUPLEX                  (0x00)
-#define MPI2_ETHPG0_MS_FULL_DUPLEX                  (0x80)
-
-#define MPI2_ETHPG0_MS_CONNECT_SPEED_MASK           (0x07)
-#define MPI2_ETHPG0_MS_NOT_CONNECTED                (0x00)
-#define MPI2_ETHPG0_MS_10MBIT                       (0x01)
-#define MPI2_ETHPG0_MS_100MBIT                      (0x02)
-#define MPI2_ETHPG0_MS_1GBIT                        (0x03)
-
-
-/* Ethernet Page 1 */
-
-typedef struct _MPI2_CONFIG_PAGE_ETHERNET_1
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                 /* 0x00 */
-    U32                                 Reserved0;              /* 0x08 */
-    U32                                 Flags;                  /* 0x0C */
-    U8                                  MediaState;             /* 0x10 */
-    U8                                  Reserved1;              /* 0x11 */
-    U16                                 Reserved2;              /* 0x12 */
-    U8                                  MacAddress[6];          /* 0x14 */
-    U8                                  Reserved3;              /* 0x1A */
-    U8                                  Reserved4;              /* 0x1B */
-    MPI2_ETHERNET_IP_ADDR               StaticIpAddress;        /* 0x1C */
-    MPI2_ETHERNET_IP_ADDR               StaticSubnetMask;       /* 0x2C */
-    MPI2_ETHERNET_IP_ADDR               StaticGatewayIpAddress; /* 0x3C */
-    MPI2_ETHERNET_IP_ADDR               StaticDNS1IpAddress;    /* 0x4C */
-    MPI2_ETHERNET_IP_ADDR               StaticDNS2IpAddress;    /* 0x5C */
-    U32                                 Reserved5;              /* 0x6C */
-    U32                                 Reserved6;              /* 0x70 */
-    U32                                 Reserved7;              /* 0x74 */
-    U32                                 Reserved8;              /* 0x78 */
-    U8                                  HostName[MPI2_ETHERNET_HOST_NAME_LENGTH];/* 0x7C */
-} MPI2_CONFIG_PAGE_ETHERNET_1, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_ETHERNET_1,
-  Mpi2EthernetPage1_t, MPI2_POINTER pMpi2EthernetPage1_t;
-
-#define MPI2_ETHERNETPAGE1_PAGEVERSION   (0x00)
-
-/* values for Ethernet Page 1 Flags field */
-#define MPI2_ETHPG1_FLAG_SET_DEFAULT_IF             (0x00000100)
-#define MPI2_ETHPG1_FLAG_ENABLE_FW_DOWNLOAD         (0x00000080)
-#define MPI2_ETHPG1_FLAG_ENABLE_TELNET              (0x00000040)
-#define MPI2_ETHPG1_FLAG_ENABLE_SSH2                (0x00000020)
-#define MPI2_ETHPG1_FLAG_ENABLE_DHCP_CLIENT         (0x00000010)
-#define MPI2_ETHPG1_FLAG_ENABLE_IPV6                (0x00000008)
-#define MPI2_ETHPG1_FLAG_ENABLE_IPV4                (0x00000004)
-#define MPI2_ETHPG1_FLAG_USE_IPV6_ADDRESSES         (0x00000002)
-#define MPI2_ETHPG1_FLAG_ENABLE_ETH_IF              (0x00000001)
-
-/* values for Ethernet Page 1 MediaState field */
-#define MPI2_ETHPG1_MS_DUPLEX_MASK                  (0x80)
-#define MPI2_ETHPG1_MS_HALF_DUPLEX                  (0x00)
-#define MPI2_ETHPG1_MS_FULL_DUPLEX                  (0x80)
-
-#define MPI2_ETHPG1_MS_DATA_RATE_MASK               (0x07)
-#define MPI2_ETHPG1_MS_DATA_RATE_AUTO               (0x00)
-#define MPI2_ETHPG1_MS_DATA_RATE_10MBIT             (0x01)
-#define MPI2_ETHPG1_MS_DATA_RATE_100MBIT            (0x02)
-#define MPI2_ETHPG1_MS_DATA_RATE_1GBIT              (0x03)
-
-
-/****************************************************************************
-*   Extended Manufacturing Config Pages
-****************************************************************************/
-
-/*
- * Generic structure to use for product-specific extended manufacturing pages
- * (currently Extended Manufacturing Page 40 through Extended Manufacturing
- * Page 60).
- */
-
-typedef struct _MPI2_CONFIG_PAGE_EXT_MAN_PS
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                 /* 0x00 */
-    U32                                 ProductSpecificInfo;    /* 0x08 */
-} MPI2_CONFIG_PAGE_EXT_MAN_PS,
-  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_EXT_MAN_PS,
-  Mpi2ExtManufacturingPagePS_t, MPI2_POINTER pMpi2ExtManufacturingPagePS_t;
-
-/* PageVersion should be provided by product-specific code */
-
-
-/****************************************************************************
-*   values for fields used by several types of PCIe Config Pages
-****************************************************************************/
-
-/* values for NegotiatedLinkRates fields */
-#define MPI26_PCIE_NEG_LINK_RATE_MASK_PHYSICAL          (0x0F)
-/* link rates used for Negotiated Physical Link Rate */
-#define MPI26_PCIE_NEG_LINK_RATE_UNKNOWN                (0x00)
-#define MPI26_PCIE_NEG_LINK_RATE_PHY_DISABLED           (0x01)
-#define MPI26_PCIE_NEG_LINK_RATE_2_5                    (0x02)
-#define MPI26_PCIE_NEG_LINK_RATE_5_0                    (0x03)
-#define MPI26_PCIE_NEG_LINK_RATE_8_0                    (0x04)
-#define MPI26_PCIE_NEG_LINK_RATE_16_0                   (0x05)
-
-
-/****************************************************************************
-*   PCIe IO Unit Config Pages (MPI v2.6 and later)
-****************************************************************************/
-
-/* PCIe IO Unit Page 0 */
-
-typedef struct _MPI26_PCIE_IO_UNIT0_PHY_DATA
-{
-    U8          Link;                   /* 0x00 */
-    U8          LinkFlags;              /* 0x01 */
-    U8          PhyFlags;               /* 0x02 */
-    U8          NegotiatedLinkRate;     /* 0x03 */
-    U32         ControllerPhyDeviceInfo;/* 0x04 */
-    U16         AttachedDevHandle;      /* 0x08 */
-    U16         ControllerDevHandle;    /* 0x0A */
-    U32         EnumerationStatus;      /* 0x0C */
-    U32         Reserved1;              /* 0x10 */
-} MPI26_PCIE_IO_UNIT0_PHY_DATA, MPI2_POINTER PTR_MPI26_PCIE_IO_UNIT0_PHY_DATA,
-  Mpi26PCIeIOUnit0PhyData_t, MPI2_POINTER pMpi26PCIeIOUnit0PhyData_t;
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumPhys at runtime.
- */
-#ifndef MPI26_PCIE_IOUNIT0_PHY_MAX
-#define MPI26_PCIE_IOUNIT0_PHY_MAX      (1)
-#endif
-
-typedef struct _MPI26_CONFIG_PAGE_PIOUNIT_0
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                                 /* 0x00 */
-    U32                                 Reserved1;                              /* 0x08 */
-    U8                                  NumPhys;                                /* 0x0C */
-    U8                                  InitStatus;                             /* 0x0D */
-    U16                                 Reserved3;                              /* 0x0E */
-    MPI26_PCIE_IO_UNIT0_PHY_DATA        PhyData[MPI26_PCIE_IOUNIT0_PHY_MAX];    /* 0x10 */
-} MPI26_CONFIG_PAGE_PIOUNIT_0,
-  MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PIOUNIT_0,
-  Mpi26PCIeIOUnitPage0_t, MPI2_POINTER pMpi26PCIeIOUnitPage0_t;
-
-#define MPI26_PCIEIOUNITPAGE0_PAGEVERSION                   (0x00)
-
-/* values for PCIe IO Unit Page 0 LinkFlags */
-#define MPI26_PCIEIOUNIT0_LF_ENUMERATION_IN_PROGRESS        (0x08)
-#define MPI26_PCIEIOUNIT0_LF_CFG_SRC_MASK                   (0x01)
-#define MPI26_PCIEIOUNIT0_LF_CFG_SRC_PAGE1                  (0x00)
-#define MPI26_PCIEIOUNIT0_LF_CFG_SRC_BACKPLANE              (0x01)
-
-/* values for PCIe IO Unit Page 0 PhyFlags */
-#define MPI26_PCIEIOUNIT0_PHYFLAGS_PHY_DISABLED             (0x08)
-
-/* use MPI26_PCIE_NEG_LINK_RATE_ defines for the NegotiatedLinkRate field */
-
-/* see mpi2_pci.h for values for PCIe IO Unit Page 0 ControllerPhyDeviceInfo values */
-
-/* values for PCIe IO Unit Page 0 EnumerationStatus */
-#define MPI26_PCIEIOUNIT0_ES_MAX_SWITCHES_EXCEEDED          (0x40000000)
-#define MPI26_PCIEIOUNIT0_ES_MAX_DEVICES_EXCEEDED           (0x20000000)
-
-
-/* PCIe IO Unit Page 1 */
-
-typedef struct _MPI26_PCIE_IO_UNIT1_PHY_DATA
-{
-    U8          Link;                       /* 0x00 */
-    U8          LinkFlags;                  /* 0x01 */
-    U8          PhyFlags;                   /* 0x02 */
-    U8          MaxMinLinkRate;             /* 0x03 */
-    U32         ControllerPhyDeviceInfo;    /* 0x04 */
-    U32         Reserved1;                  /* 0x08 */
-} MPI26_PCIE_IO_UNIT1_PHY_DATA, MPI2_POINTER PTR_MPI26_PCIE_IO_UNIT1_PHY_DATA,
-  Mpi26PCIeIOUnit1PhyData_t, MPI2_POINTER pMpi26PCIeIOUnit1PhyData_t;
-
-/* values for LinkFlags */
-#define MPI26_PCIEIOUNIT1_LINKFLAGS_DIS_SEPARATE_REFCLK     (0x00)
-#define MPI26_PCIEIOUNIT1_LINKFLAGS_SRIS_EN                 (0x01)
-#define MPI26_PCIEIOUNIT1_LINKFLAGS_SRNS_EN                 (0x02)
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumPhys at runtime.
- */
-#ifndef MPI26_PCIE_IOUNIT1_PHY_MAX
-#define MPI26_PCIE_IOUNIT1_PHY_MAX      (1)
-#endif
-
-typedef struct _MPI26_CONFIG_PAGE_PIOUNIT_1
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                             /* 0x00 */
-    U16                                 ControlFlags;                       /* 0x08 */
-    U16                                 Reserved;                           /* 0x0A */
-    U16                                 AdditionalControlFlags;             /* 0x0C */
-    U16                                 NVMeMaxQueueDepth;                  /* 0x0E */
-    U8                                  NumPhys;                            /* 0x10 */
-    U8                                  DMDReportPCIe;                      /* 0x11 */
-    U16                                 Reserved2;                          /* 0x12 */
-    MPI26_PCIE_IO_UNIT1_PHY_DATA        PhyData[MPI26_PCIE_IOUNIT1_PHY_MAX];/* 0x14 */
-} MPI26_CONFIG_PAGE_PIOUNIT_1,
-  MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PIOUNIT_1,
-  Mpi26PCIeIOUnitPage1_t, MPI2_POINTER pMpi26PCIeIOUnitPage1_t;
-
-#define MPI26_PCIEIOUNITPAGE1_PAGEVERSION   (0x00)
-
-/* values for PCIe IO Unit Page 1 ControlFlags */
-#define MPI26_PCIEIOUNIT1_CF_BP_CONFIG_DISABLE                      (0x0080)
-#define MPI26_PCIEIOUNIT1_CF_BP_AUTO_CLOCK_ENABLE                   (0x0040)
-#define MPI26_PCIEIOUNIT1_CF_BP_CLK_MASK                            (0x0030)
-#define MPI26_PCIEIOUNIT1_CF_BP_CLK_ALL_DIS                         (0x0000)
-#define MPI26_PCIEIOUNIT1_CF_BP_CLK_SRIS_EN                         (0x0010)
-#define MPI26_PCIEIOUNIT1_CF_BP_CLK_SRNS_EN                         (0x0020)
-#define MPI26_PCIEIOUNIT1_CF_BP_RATE_MASK                           (0x000F)
-#define MPI26_PCIEIOUNIT1_CF_BP_RATE_DISABLE                        (0x0000)
-#define MPI26_PCIEIOUNIT1_CF_BP_RATE_2_5_MAX                        (0x0002)
-#define MPI26_PCIEIOUNIT1_CF_BP_RATE_5_0_MAX                        (0x0003)
-#define MPI26_PCIEIOUNIT1_CF_BP_RATE_8_0_MAX                        (0x0004)
-#define MPI26_PCIEIOUNIT1_CF_BP_RATE_16_0_MAX                       (0x0005)
-
-
-/* values for PCIe IO Unit Page 1 PhyFlags */
-#define MPI26_PCIEIOUNIT1_PHYFLAGS_PHY_DISABLE                      (0x08)
-#define MPI26_PCIEIOUNIT1_PHYFLAGS_ENDPOINT_ONLY                    (0x01)
-
-/* values for PCIe IO Unit Page 1 MaxMinLinkRate */
-#define MPI26_PCIEIOUNIT1_MAX_RATE_MASK                             (0xF0)
-#define MPI26_PCIEIOUNIT1_MAX_RATE_SHIFT                            (4)
-#define MPI26_PCIEIOUNIT1_MAX_RATE_2_5                              (0x20)
-#define MPI26_PCIEIOUNIT1_MAX_RATE_5_0                              (0x30)
-#define MPI26_PCIEIOUNIT1_MAX_RATE_8_0                              (0x40)
-#define MPI26_PCIEIOUNIT1_MAX_RATE_16_0                             (0x50)
-
-/* values for PCIe IO Unit Page 1 DMDReportPCIe */
-#define MPI26_PCIEIOUNIT1_DMDRPT_UNIT_MASK                          (0x80)
-#define MPI26_PCIEIOUNIT1_DMDRPT_UNIT_1_SEC                         (0x00)
-#define MPI26_PCIEIOUNIT1_DMDRPT_UNIT_16_SEC                        (0x80)
-#define MPI26_PCIEIOUNIT1_DMDRPT_DELAY_TIME_MASK                    (0x7F)
-
-
-/* see mpi2_pci.h for values for PCIe IO Unit Page 0 ControllerPhyDeviceInfo values */
-
-
-/****************************************************************************
-*   PCIe Switch Config Pages (MPI v2.6 and later)
-****************************************************************************/
-
-/* PCIe Switch Page 0 */
-
-typedef struct _MPI26_CONFIG_PAGE_PSWITCH_0
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
-    U8                                  PhysicalPort;               /* 0x08 */
-    U8                                  Reserved1;                  /* 0x09 */
-    U16                                 Reserved2;                  /* 0x0A */
-    U16                                 DevHandle;                  /* 0x0C */
-    U16                                 ParentDevHandle;            /* 0x0E */
-    U8                                  NumPorts;                   /* 0x10 */
-    U8                                  PCIeLevel;                  /* 0x11 */
-    U16                                 Reserved3;                  /* 0x12 */
-    U32                                 Reserved4;                  /* 0x14 */
-    U32                                 Reserved5;                  /* 0x18 */
-    U32                                 Reserved6;                  /* 0x1C */
-} MPI26_CONFIG_PAGE_PSWITCH_0, MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PSWITCH_0,
-  Mpi26PCIeSwitchPage0_t, MPI2_POINTER pMpi26PCIeSwitchPage0_t;
-
-#define MPI26_PCIESWITCH0_PAGEVERSION       (0x00)
-
-
-/* PCIe Switch Page 1 */
-
-typedef struct _MPI26_CONFIG_PAGE_PSWITCH_1
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
-    U8                                  PhysicalPort;               /* 0x08 */
-    U8                                  Reserved1;                  /* 0x09 */
-    U16                                 Reserved2;                  /* 0x0A */
-    U8                                  NumPorts;                   /* 0x0C */
-    U8                                  PortNum;                    /* 0x0D */
-    U16                                 AttachedDevHandle;          /* 0x0E */
-    U16                                 SwitchDevHandle;            /* 0x10 */
-    U8                                  NegotiatedPortWidth;        /* 0x12 */
-    U8                                  NegotiatedLinkRate;         /* 0x13 */
-    U16                                 Flags;                      /* 0x14 */
-    U16                                 Reserved4;                  /* 0x16 */
-    U32                                 Reserved5;                  /* 0x18 */
-} MPI26_CONFIG_PAGE_PSWITCH_1, MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PSWITCH_1,
-  Mpi26PCIeSwitchPage1_t, MPI2_POINTER pMpi26PCIeSwitchPage1_t;
-
-#define MPI26_PCIESWITCH1_PAGEVERSION                (0x00)
-
-/* use MPI26_PCIE_NEG_LINK_RATE_ defines for the NegotiatedLinkRate field */
-
-/* defines for the Flags field */
-#define MPI26_PCIESWITCH1_2_RETIMER_PRESENCE         (0x0002)
-#define MPI26_PCIESWITCH1_RETIMER_PRESENCE           (0x0001)
-
-
-
-/****************************************************************************
-*   PCIe Device Config Pages (MPI v2.6 and later)
-****************************************************************************/
-
-/* PCIe Device Page 0 */
-
-typedef struct _MPI26_CONFIG_PAGE_PCIEDEV_0
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                 /* 0x00 */
-    U16                                 Slot;                   /* 0x08 */
-    U16                                 EnclosureHandle;        /* 0x0A */
-    U64                                 WWID;                   /* 0x0C */
-    U16                                 ParentDevHandle;        /* 0x14 */
-    U8                                  PortNum;                /* 0x16 */
-    U8                                  AccessStatus;           /* 0x17 */
-    U16                                 DevHandle;              /* 0x18 */
-    U8                                  PhysicalPort;           /* 0x1A */
-    U8                                  Reserved1;              /* 0x1B */
-    U32                                 DeviceInfo;             /* 0x1C */
-    U32                                 Flags;                  /* 0x20 */
-    U8                                  SupportedLinkRates;     /* 0x24 */
-    U8                                  MaxPortWidth;           /* 0x25 */
-    U8                                  NegotiatedPortWidth;    /* 0x26 */
-    U8                                  NegotiatedLinkRate;     /* 0x27 */
-    U8                                  EnclosureLevel;         /* 0x28 */
-    U8                                  Reserved2;              /* 0x29 */
-    U16                                 Reserved3;              /* 0x2A */
-    U8                                  ConnectorName[4];       /* 0x2C */
-    U32                                 Reserved4;              /* 0x30 */
-    U32                                 Reserved5;              /* 0x34 */
-} MPI26_CONFIG_PAGE_PCIEDEV_0, MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PCIEDEV_0,
-  Mpi26PCIeDevicePage0_t, MPI2_POINTER pMpi26PCIeDevicePage0_t;
-
-#define MPI26_PCIEDEVICE0_PAGEVERSION       (0x01)
-
-/* values for PCIe Device Page 0 AccessStatus field */
-#define MPI26_PCIEDEV0_ASTATUS_NO_ERRORS                    (0x00)
-#define MPI26_PCIEDEV0_ASTATUS_NEEDS_INITIALIZATION         (0x04)
-#define MPI26_PCIEDEV0_ASTATUS_CAPABILITY_FAILED            (0x02)
-#define MPI26_PCIEDEV0_ASTATUS_DEVICE_BLOCKED               (0x07)
-#define MPI26_PCIEDEV0_ASTATUS_MEMORY_SPACE_ACCESS_FAILED   (0x08)
-#define MPI26_PCIEDEV0_ASTATUS_UNSUPPORTED_DEVICE           (0x09)
-#define MPI26_PCIEDEV0_ASTATUS_MSIX_REQUIRED                (0x0A)
-#define MPI26_PCIEDEV0_ASTATUS_UNKNOWN                      (0x10)
-
-#define MPI26_PCIEDEV0_ASTATUS_NVME_READY_TIMEOUT           (0x30)
-#define MPI26_PCIEDEV0_ASTATUS_NVME_DEVCFG_UNSUPPORTED      (0x31)
-#define MPI26_PCIEDEV0_ASTATUS_NVME_IDENTIFY_FAILED         (0x32)
-#define MPI26_PCIEDEV0_ASTATUS_NVME_QCONFIG_FAILED          (0x33)
-#define MPI26_PCIEDEV0_ASTATUS_NVME_QCREATION_FAILED        (0x34)
-#define MPI26_PCIEDEV0_ASTATUS_NVME_EVENTCFG_FAILED         (0x35)
-#define MPI26_PCIEDEV0_ASTATUS_NVME_GET_FEATURE_STAT_FAILED (0x36)
-#define MPI26_PCIEDEV0_ASTATUS_NVME_IDLE_TIMEOUT            (0x37)
-#define MPI26_PCIEDEV0_ASTATUS_NVME_FAILURE_STATUS          (0x38)
-
-#define MPI26_PCIEDEV0_ASTATUS_INIT_FAIL_MAX                (0x3F)
-
-/* see mpi2_pci.h for the MPI26_PCIE_DEVINFO_ defines used for the DeviceInfo field */
-
-/* values for PCIe Device Page 0 Flags field */
-#define MPI26_PCIEDEV0_FLAGS_2_RETIMER_PRESENCE             (0x00020000)
-#define MPI26_PCIEDEV0_FLAGS_RETIMER_PRESENCE               (0x00010000)
-#define MPI26_PCIEDEV0_FLAGS_UNAUTHORIZED_DEVICE            (0x00008000)
-#define MPI26_PCIEDEV0_FLAGS_ENABLED_FAST_PATH              (0x00004000)
-#define MPI26_PCIEDEV0_FLAGS_FAST_PATH_CAPABLE              (0x00002000)
-#define MPI26_PCIEDEV0_FLAGS_ASYNCHRONOUS_NOTIFICATION      (0x00000400)
-#define MPI26_PCIEDEV0_FLAGS_ATA_SW_PRESERVATION            (0x00000200)
-#define MPI26_PCIEDEV0_FLAGS_UNSUPPORTED_DEVICE             (0x00000100)
-#define MPI26_PCIEDEV0_FLAGS_ATA_48BIT_LBA_SUPPORTED        (0x00000080)
-#define MPI26_PCIEDEV0_FLAGS_ATA_SMART_SUPPORTED            (0x00000040)
-#define MPI26_PCIEDEV0_FLAGS_ATA_NCQ_SUPPORTED              (0x00000020)
-#define MPI26_PCIEDEV0_FLAGS_ATA_FUA_SUPPORTED              (0x00000010)
-#define MPI26_PCIEDEV0_FLAGS_ENCL_LEVEL_VALID               (0x00000002)
-#define MPI26_PCIEDEV0_FLAGS_DEVICE_PRESENT                 (0x00000001)
-
-/* values for PCIe Device Page 0 SupportedLinkRates field */
-#define MPI26_PCIEDEV0_LINK_RATE_16_0_SUPPORTED             (0x08)
-#define MPI26_PCIEDEV0_LINK_RATE_8_0_SUPPORTED              (0x04)
-#define MPI26_PCIEDEV0_LINK_RATE_5_0_SUPPORTED              (0x02)
-#define MPI26_PCIEDEV0_LINK_RATE_2_5_SUPPORTED              (0x01)
-
-/* use MPI26_PCIE_NEG_LINK_RATE_ defines for the NegotiatedLinkRate field */
-
-
-/* PCIe Device Page 2 */
-
-typedef struct _MPI26_CONFIG_PAGE_PCIEDEV_2
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                 /* 0x00 */
-    U16                                 DevHandle;              /* 0x08 */
-    U8                                  ControllerResetTO;      /* 0x0A */
-    U8                                  Reserved1;              /* 0x0B */
-    U32                                 MaximumDataTransferSize;/* 0x0C */
-    U32                                 Capabilities;           /* 0x10 */
-    U16                                 NOIOB;                  /* 0x14 */
-    U16                                 ShutdownLatency;        /* 0x16 */
-    U16                                 VendorID;               /* 0x18 */
-    U16                                 DeviceID;               /* 0x1A */
-    U16                                 SubsystemVendorID;      /* 0x1C */
-    U16                                 SubsystemID;            /* 0x1E */
-    U8                                  RevisionID;             /* 0x20 */
-    U8                                  Reserved21[3];          /* 0x21 */
-} MPI26_CONFIG_PAGE_PCIEDEV_2, MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PCIEDEV_2,
-  Mpi26PCIeDevicePage2_t, MPI2_POINTER pMpi26PCIeDevicePage2_t;
-
-#define MPI26_PCIEDEVICE2_PAGEVERSION       (0x01)
-
-/* defines for PCIe Device Page 2 Capabilities field */
-#define MPI26_PCIEDEV2_CAP_DATA_BLK_ALIGN_AND_GRAN      (0x00000008)
-#define MPI26_PCIEDEV2_CAP_SGL_FORMAT                   (0x00000004)
-#define MPI26_PCIEDEV2_CAP_BIT_BUCKET_SUPPORT           (0x00000002)
-#define MPI26_PCIEDEV2_CAP_SGL_SUPPORT                  (0x00000001)
-
-/* Defines for the NOIOB field */
-#define MPI26_PCIEDEV2_NOIOB_UNSUPPORTED                (0x0000)
-
-
-/****************************************************************************
-*   PCIe Link Config Pages (MPI v2.6 and later)
-****************************************************************************/
-
-/* PCIe Link Page 1 */
-
-typedef struct _MPI26_CONFIG_PAGE_PCIELINK_1
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
-    U8                                  Link;                       /* 0x08 */
-    U8                                  Reserved1;                  /* 0x09 */
-    U16                                 Reserved2;                  /* 0x0A */
-    U32                                 CorrectableErrorCount;      /* 0x0C */
-    U16                                 NonFatalErrorCount;         /* 0x10 */
-    U16                                 Reserved3;                  /* 0x12 */
-    U16                                 FatalErrorCount;            /* 0x14 */
-    U16                                 Reserved4;                  /* 0x16 */
-} MPI26_CONFIG_PAGE_PCIELINK_1, MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PCIELINK_1,
-  Mpi26PcieLinkPage1_t, MPI2_POINTER pMpi26PcieLinkPage1_t;
-
-#define MPI26_PCIELINK1_PAGEVERSION            (0x00)
-
-/* PCIe Link Page 2 */
-
-typedef struct _MPI26_PCIELINK2_LINK_EVENT
-{
-    U8          LinkEventCode;      /* 0x00 */
-    U8          Reserved1;          /* 0x01 */
-    U16         Reserved2;          /* 0x02 */
-    U32         LinkEventInfo;      /* 0x04 */
-} MPI26_PCIELINK2_LINK_EVENT, MPI2_POINTER PTR_MPI26_PCIELINK2_LINK_EVENT,
-  Mpi26PcieLink2LinkEvent_t, MPI2_POINTER pMpi26PcieLink2LinkEvent_t;
-
-/* use MPI26_PCIELINK3_EVTCODE_ for the LinkEventCode field */
-
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumLinkEvents at runtime.
- */
-#ifndef MPI26_PCIELINK2_LINK_EVENT_MAX
-#define MPI26_PCIELINK2_LINK_EVENT_MAX      (1)
-#endif
-
-typedef struct _MPI26_CONFIG_PAGE_PCIELINK_2
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
-    U8                                  Link;                       /* 0x08 */
-    U8                                  Reserved1;                  /* 0x09 */
-    U16                                 Reserved2;                  /* 0x0A */
-    U8                                  NumLinkEvents;              /* 0x0C */
-    U8                                  Reserved3;                  /* 0x0D */
-    U16                                 Reserved4;                  /* 0x0E */
-    MPI26_PCIELINK2_LINK_EVENT          LinkEvent[MPI26_PCIELINK2_LINK_EVENT_MAX]; /* 0x10 */
-} MPI26_CONFIG_PAGE_PCIELINK_2, MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PCIELINK_2,
-  Mpi26PcieLinkPage2_t, MPI2_POINTER pMpi26PcieLinkPage2_t;
-
-#define MPI26_PCIELINK2_PAGEVERSION            (0x00)
-
-
-/* PCIe Link Page 3 */
-
-typedef struct _MPI26_PCIELINK3_LINK_EVENT_CONFIG
-{
-    U8          LinkEventCode;      /* 0x00 */
-    U8          Reserved1;          /* 0x01 */
-    U16         Reserved2;          /* 0x02 */
-    U8          CounterType;        /* 0x04 */
-    U8          ThresholdWindow;    /* 0x05 */
-    U8          TimeUnits;          /* 0x06 */
-    U8          Reserved3;          /* 0x07 */
-    U32         EventThreshold;     /* 0x08 */
-    U16         ThresholdFlags;     /* 0x0C */
-    U16         Reserved4;          /* 0x0E */
-} MPI26_PCIELINK3_LINK_EVENT_CONFIG, MPI2_POINTER PTR_MPI26_PCIELINK3_LINK_EVENT_CONFIG,
-  Mpi26PcieLink3LinkEventConfig_t, MPI2_POINTER pMpi26PcieLink3LinkEventConfig_t;
-
-/* values for LinkEventCode field */
-#define MPI26_PCIELINK3_EVTCODE_NO_EVENT                              (0x00)
-#define MPI26_PCIELINK3_EVTCODE_CORRECTABLE_ERROR_RECEIVED            (0x01)
-#define MPI26_PCIELINK3_EVTCODE_NON_FATAL_ERROR_RECEIVED              (0x02)
-#define MPI26_PCIELINK3_EVTCODE_FATAL_ERROR_RECEIVED                  (0x03)
-#define MPI26_PCIELINK3_EVTCODE_DATA_LINK_ERROR_DETECTED              (0x04)
-#define MPI26_PCIELINK3_EVTCODE_TRANSACTION_LAYER_ERROR_DETECTED      (0x05)
-#define MPI26_PCIELINK3_EVTCODE_TLP_ECRC_ERROR_DETECTED               (0x06)
-#define MPI26_PCIELINK3_EVTCODE_POISONED_TLP                          (0x07)
-#define MPI26_PCIELINK3_EVTCODE_RECEIVED_NAK_DLLP                     (0x08)
-#define MPI26_PCIELINK3_EVTCODE_SENT_NAK_DLLP                         (0x09)
-#define MPI26_PCIELINK3_EVTCODE_LTSSM_RECOVERY_STATE                  (0x0A)
-#define MPI26_PCIELINK3_EVTCODE_LTSSM_RXL0S_STATE                     (0x0B)
-#define MPI26_PCIELINK3_EVTCODE_LTSSM_TXL0S_STATE                     (0x0C)
-#define MPI26_PCIELINK3_EVTCODE_LTSSM_L1_STATE                        (0x0D)
-#define MPI26_PCIELINK3_EVTCODE_LTSSM_DISABLED_STATE                  (0x0E)
-#define MPI26_PCIELINK3_EVTCODE_LTSSM_HOT_RESET_STATE                 (0x0F)
-#define MPI26_PCIELINK3_EVTCODE_SYSTEM_ERROR                          (0x10)
-#define MPI26_PCIELINK3_EVTCODE_DECODE_ERROR                          (0x11)
-#define MPI26_PCIELINK3_EVTCODE_DISPARITY_ERROR                       (0x12)
-
-/* values for the CounterType field */
-#define MPI26_PCIELINK3_COUNTER_TYPE_WRAPPING               (0x00)
-#define MPI26_PCIELINK3_COUNTER_TYPE_SATURATING             (0x01)
-#define MPI26_PCIELINK3_COUNTER_TYPE_PEAK_VALUE             (0x02)
-
-/* values for the TimeUnits field */
-#define MPI26_PCIELINK3_TM_UNITS_10_MICROSECONDS            (0x00)
-#define MPI26_PCIELINK3_TM_UNITS_100_MICROSECONDS           (0x01)
-#define MPI26_PCIELINK3_TM_UNITS_1_MILLISECOND              (0x02)
-#define MPI26_PCIELINK3_TM_UNITS_10_MILLISECONDS            (0x03)
-
-/* values for the ThresholdFlags field */
-#define MPI26_PCIELINK3_TFLAGS_EVENT_NOTIFY                 (0x0001)
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check the value returned for NumLinkEvents at runtime.
- */
-#ifndef MPI26_PCIELINK3_LINK_EVENT_MAX
-#define MPI26_PCIELINK3_LINK_EVENT_MAX      (1)
-#endif
-
-typedef struct _MPI26_CONFIG_PAGE_PCIELINK_3
-{
-    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
-    U8                                  Link;                       /* 0x08 */
-    U8                                  Reserved1;                  /* 0x09 */
-    U16                                 Reserved2;                  /* 0x0A */
-    U8                                  NumLinkEvents;              /* 0x0C */
-    U8                                  Reserved3;                  /* 0x0D */
-    U16                                 Reserved4;                  /* 0x0E */
-    MPI26_PCIELINK3_LINK_EVENT_CONFIG   LinkEventConfig[MPI26_PCIELINK3_LINK_EVENT_MAX]; /* 0x10 */
-} MPI26_CONFIG_PAGE_PCIELINK_3, MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PCIELINK_3,
-  Mpi26PcieLinkPage3_t, MPI2_POINTER pMpi26PcieLinkPage3_t;
-
-#define MPI26_PCIELINK3_PAGEVERSION            (0x00)
-
-
-#endif
-
+/*
+ *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
+ *
+ *
+ *           Name:  mpi2_cnfg.h
+ *          Title:  MPI Configuration messages and pages
+ *  Creation Date:  November 10, 2006
+ *
+ *    mpi2_cnfg.h Version:  02.00.48
+ *
+ *  NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
+ *        prefix are for use only on MPI v2.5 products, and must not be used
+ *        with MPI v2.0 products. Unless otherwise noted, names beginning with
+ *        MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products.
+ *
+ *  Version History
+ *  ---------------
+ *
+ *  Date      Version   Description
+ *  --------  --------  ------------------------------------------------------
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  06-04-07  02.00.01  Added defines for SAS IO Unit Page 2 PhyFlags.
+ *                      Added Manufacturing Page 11.
+ *                      Added MPI2_SAS_EXPANDER0_FLAGS_CONNECTOR_END_DEVICE
+ *                      define.
+ *  06-26-07  02.00.02  Adding generic structure for product-specific
+ *                      Manufacturing pages: MPI2_CONFIG_PAGE_MANUFACTURING_PS.
+ *                      Rework of BIOS Page 2 configuration page.
+ *                      Fixed MPI2_BIOSPAGE2_BOOT_DEVICE to be a union of the
+ *                      forms.
+ *                      Added configuration pages IOC Page 8 and Driver
+ *                      Persistent Mapping Page 0.
+ *  08-31-07  02.00.03  Modified configuration pages dealing with Integrated
+ *                      RAID (Manufacturing Page 4, RAID Volume Pages 0 and 1,
+ *                      RAID Physical Disk Pages 0 and 1, RAID Configuration
+ *                      Page 0).
+ *                      Added new value for AccessStatus field of SAS Device
+ *                      Page 0 (_SATA_NEEDS_INITIALIZATION).
+ *  10-31-07  02.00.04  Added missing SEPDevHandle field to
+ *                      MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0.
+ *  12-18-07  02.00.05  Modified IO Unit Page 0 to use 32-bit version fields for
+ *                      NVDATA.
+ *                      Modified IOC Page 7 to use masks and added field for
+ *                      SASBroadcastPrimitiveMasks.
+ *                      Added MPI2_CONFIG_PAGE_BIOS_4.
+ *                      Added MPI2_CONFIG_PAGE_LOG_0.
+ *  02-29-08  02.00.06  Modified various names to make them 32-character unique.
+ *                      Added SAS Device IDs.
+ *                      Updated Integrated RAID configuration pages including
+ *                      Manufacturing Page 4, IOC Page 6, and RAID Configuration
+ *                      Page 0.
+ *  05-21-08  02.00.07  Added define MPI2_MANPAGE4_MIX_SSD_SAS_SATA.
+ *                      Added define MPI2_MANPAGE4_PHYSDISK_128MB_COERCION.
+ *                      Fixed define MPI2_IOCPAGE8_FLAGS_ENCLOSURE_SLOT_MAPPING.
+ *                      Added missing MaxNumRoutedSasAddresses field to
+ *                      MPI2_CONFIG_PAGE_EXPANDER_0.
+ *                      Added SAS Port Page 0.
+ *                      Modified structure layout for
+ *                      MPI2_CONFIG_PAGE_DRIVER_MAPPING_0.
+ *  06-27-08  02.00.08  Changed MPI2_CONFIG_PAGE_RD_PDISK_1 to use
+ *                      MPI2_RAID_PHYS_DISK1_PATH_MAX to size the array.
+ *  10-02-08  02.00.09  Changed MPI2_RAID_PGAD_CONFIGNUM_MASK from 0x0000FFFF
+ *                      to 0x000000FF.
+ *                      Added two new values for the Physical Disk Coercion Size
+ *                      bits in the Flags field of Manufacturing Page 4.
+ *                      Added product-specific Manufacturing pages 16 to 31.
+ *                      Modified Flags bits for controlling write cache on SATA
+ *                      drives in IO Unit Page 1.
+ *                      Added new bit to AdditionalControlFlags of SAS IO Unit
+ *                      Page 1 to control Invalid Topology Correction.
+ *                      Added additional defines for RAID Volume Page 0
+ *                      VolumeStatusFlags field.
+ *                      Modified meaning of RAID Volume Page 0 VolumeSettings
+ *                      define for auto-configure of hot-swap drives.
+ *                      Added SupportedPhysDisks field to RAID Volume Page 1 and
+ *                      added related defines.
+ *                      Added PhysDiskAttributes field (and related defines) to
+ *                      RAID Physical Disk Page 0.
+ *                      Added MPI2_SAS_PHYINFO_PHY_VACANT define.
+ *                      Added three new DiscoveryStatus bits for SAS IO Unit
+ *                      Page 0 and SAS Expander Page 0.
+ *                      Removed multiplexing information from SAS IO Unit pages.
+ *                      Added BootDeviceWaitTime field to SAS IO Unit Page 4.
+ *                      Removed Zone Address Resolved bit from PhyInfo and from
+ *                      Expander Page 0 Flags field.
+ *                      Added two new AccessStatus values to SAS Device Page 0
+ *                      for indicating routing problems. Added 3 reserved words
+ *                      to this page.
+ *  01-19-09  02.00.10  Fixed defines for GPIOVal field of IO Unit Page 3.
+ *                      Inserted missing reserved field into structure for IOC
+ *                      Page 6.
+ *                      Added more pending task bits to RAID Volume Page 0
+ *                      VolumeStatusFlags defines.
+ *                      Added MPI2_PHYSDISK0_STATUS_FLAG_NOT_CERTIFIED define.
+ *                      Added a new DiscoveryStatus bit for SAS IO Unit Page 0
+ *                      and SAS Expander Page 0 to flag a downstream initiator
+ *                      when in simplified routing mode.
+ *                      Removed SATA Init Failure defines for DiscoveryStatus
+ *                      fields of SAS IO Unit Page 0 and SAS Expander Page 0.
+ *                      Added MPI2_SAS_DEVICE0_ASTATUS_DEVICE_BLOCKED define.
+ *                      Added PortGroups, DmaGroup, and ControlGroup fields to
+ *                      SAS Device Page 0.
+ *  05-06-09  02.00.11  Added structures and defines for IO Unit Page 5 and IO
+ *                      Unit Page 6.
+ *                      Added expander reduced functionality data to SAS
+ *                      Expander Page 0.
+ *                      Added SAS PHY Page 2 and SAS PHY Page 3.
+ *  07-30-09  02.00.12  Added IO Unit Page 7.
+ *                      Added new device ids.
+ *                      Added SAS IO Unit Page 5.
+ *                      Added partial and slumber power management capable flags
+ *                      to SAS Device Page 0 Flags field.
+ *                      Added PhyInfo defines for power condition.
+ *                      Added Ethernet configuration pages.
+ *  10-28-09  02.00.13  Added MPI2_IOUNITPAGE1_ENABLE_HOST_BASED_DISCOVERY.
+ *                      Added SAS PHY Page 4 structure and defines.
+ *  02-10-10  02.00.14  Modified the comments for the configuration page
+ *                      structures that contain an array of data. The host
+ *                      should use the "count" field in the page data (e.g. the
+ *                      NumPhys field) to determine the number of valid elements
+ *                      in the array.
+ *                      Added/modified some MPI2_MFGPAGE_DEVID_SAS defines.
+ *                      Added PowerManagementCapabilities to IO Unit Page 7.
+ *                      Added PortWidthModGroup field to
+ *                      MPI2_SAS_IO_UNIT5_PHY_PM_SETTINGS.
+ *                      Added MPI2_CONFIG_PAGE_SASIOUNIT_6 and related defines.
+ *                      Added MPI2_CONFIG_PAGE_SASIOUNIT_7 and related defines.
+ *                      Added MPI2_CONFIG_PAGE_SASIOUNIT_8 and related defines.
+ *  05-12-10  02.00.15  Added MPI2_RAIDVOL0_STATUS_FLAG_VOL_NOT_CONSISTENT
+ *                      define.
+ *                      Added MPI2_PHYSDISK0_INCOMPATIBLE_MEDIA_TYPE define.
+ *                      Added MPI2_SAS_NEG_LINK_RATE_UNSUPPORTED_PHY define.
+ *  08-11-10  02.00.16  Removed IO Unit Page 1 device path (multi-pathing)
+ *                      defines.
+ *  11-10-10  02.00.17  Added ReceptacleID field (replacing Reserved1) to
+ *                      MPI2_MANPAGE7_CONNECTOR_INFO and reworked defines for
+ *                      the Pinout field.
+ *                      Added BoardTemperature and BoardTemperatureUnits fields
+ *                      to MPI2_CONFIG_PAGE_IO_UNIT_7.
+ *                      Added MPI2_CONFIG_EXTPAGETYPE_EXT_MANUFACTURING define
+ *                      and MPI2_CONFIG_PAGE_EXT_MAN_PS structure.
+ *  02-23-11  02.00.18  Added ProxyVF_ID field to MPI2_CONFIG_REQUEST.
+ *                      Added IO Unit Page 8, IO Unit Page 9,
+ *                      and IO Unit Page 10.
+ *                      Added SASNotifyPrimitiveMasks field to
+ *                      MPI2_CONFIG_PAGE_IOC_7.
+ *  03-09-11  02.00.19  Fixed IO Unit Page 10 (to match the spec).
+ *  05-25-11  02.00.20  Cleaned up a few comments.
+ *  08-24-11  02.00.21  Marked the IO Unit Page 7 PowerManagementCapabilities
+ *                      for PCIe link as obsolete.
+ *                      Added SpinupFlags field containing a Disable Spin-up bit
+ *                      to the MPI2_SAS_IOUNIT4_SPINUP_GROUP fields of SAS IO
+ *                      Unit Page 4.
+ *  11-18-11  02.00.22  Added define MPI2_IOCPAGE6_CAP_FLAGS_4K_SECTORS_SUPPORT.
+ *                      Added UEFIVersion field to BIOS Page 1 and defined new
+ *                      BiosOptions bits.
+ *                      Incorporating additions for MPI v2.5.
+ *  11-27-12  02.00.23  Added MPI2_MANPAGE7_FLAG_EVENTREPLAY_SLOT_ORDER.
+ *                      Added MPI2_BIOSPAGE1_OPTIONS_MASK_OEM_ID.
+ *  12-20-12  02.00.24  Marked MPI2_SASIOUNIT1_CONTROL_CLEAR_AFFILIATION as
+ *                      obsolete for MPI v2.5 and later.
+ *                      Added some defines for 12G SAS speeds.
+ *  04-09-13  02.00.25  Added MPI2_IOUNITPAGE1_ATA_SECURITY_FREEZE_LOCK.
+ *                      Fixed MPI2_IOUNITPAGE5_DMA_CAP_MASK_MAX_REQUESTS to
+ *                      match the specification.
+ *  08-19-13  02.00.26  Added reserved words to MPI2_CONFIG_PAGE_IO_UNIT_7 for
+ *                      future use.
+ *  12-05-13  02.00.27  Added MPI2_MANPAGE7_FLAG_BASE_ENCLOSURE_LEVEL for
+ *                      MPI2_CONFIG_PAGE_MAN_7.
+ *                      Added EnclosureLevel and ConnectorName fields to
+ *                      MPI2_CONFIG_PAGE_SAS_DEV_0.
+ *                      Added MPI2_SAS_DEVICE0_FLAGS_ENCL_LEVEL_VALID for
+ *                      MPI2_CONFIG_PAGE_SAS_DEV_0.
+ *                      Added EnclosureLevel field to
+ *                      MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0.
+ *                      Added MPI2_SAS_ENCLS0_FLAGS_ENCL_LEVEL_VALID for
+ *                      MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0.
+ *  01-08-14  02.00.28  Added more defines for the BiosOptions field of
+ *                      MPI2_CONFIG_PAGE_BIOS_1.
+ *  06-13-14  02.00.29  Added SSUTimeout field to MPI2_CONFIG_PAGE_BIOS_1, and
+ *                      more defines for the BiosOptions field.
+ *  11-18-14  02.00.30  Updated copyright information.
+ *                      Added MPI2_BIOSPAGE1_OPTIONS_ADVANCED_CONFIG.
+ *                      Added AdapterOrderAux fields to BIOS Page 3.
+ *  03-16-15  02.00.31  Updated for MPI v2.6.
+ *                      Added BoardPowerRequirement, PCISlotPowerAllocation, and
+ *                      Flags field to IO Unit Page 7.
+ *                      Added IO Unit Page 11.
+ *                      Added new SAS Phy Event codes
+ *                      Added PCIe configuration pages.
+ *  03-19-15  02.00.32  Fixed PCIe Link Config page structure names to be
+ *                      unique in first 32 characters.
+ *  05-25-15  02.00.33  Added more defines for the BiosOptions field of
+ *                      MPI2_CONFIG_PAGE_BIOS_1.
+ *  08-25-15  02.00.34  Added PCIe Device Page 2 SGL format capability.
+ *  12-18-15  02.00.35  Added SATADeviceWaitTime to SAS IO Unit Page 4.
+ *  01-21-16  02.00.36  Added/modified MPI2_MFGPAGE_DEVID_SAS defines.
+ *                      Added Link field to PCIe Link Pages
+ *                      Added EnclosureLevel and ConnectorName to PCIe
+ *                      Device Page 0.
+ *                      Added define for PCIE IoUnit page 1 max rate shift.
+ *                      Added comment for reserved ExtPageTypes.
+ *                      Added SAS 4 22.5 gbs speed support.
+ *                      Added PCIe 4 16.0 GT/sec speec support.
+ *                      Removed AHCI support.
+ *                      Removed SOP support.
+ *                      Added NegotiatedLinkRate and NegotiatedPortWidth to
+ *                      PCIe device page 0.
+ *  04-10-16  02.00.37  Fixed MPI2_MFGPAGE_DEVID_SAS3616/3708 defines
+ *  07-01-16  02.00.38  Added Manufacturing page 7 Connector types.
+ *                      Changed declaration of ConnectorName in PCIe DevicePage0
+ *                      to match SAS DevicePage 0.
+ *                      Added SATADeviceWaitTime to IO Unit Page 11.
+ *                      Added MPI26_MFGPAGE_DEVID_SAS4008
+ *                      Added x16 PCIe width to IO Unit Page 7
+ *                      Added LINKFLAGS to control SRIS in PCIe IO Unit page 1
+ *                      phy data.
+ *                      Added InitStatus to PCIe IO Unit Page 1 header.
+ *  09-01-16  02.00.39  Added MPI26_CONFIG_PAGE_ENCLOSURE_0 and related defines.
+ *                      Added MPI26_ENCLOS_PGAD_FORM_GET_NEXT_HANDLE and
+ *                      MPI26_ENCLOS_PGAD_FORM_HANDLE page address formats.
+ *  02-02-17  02.00.40  Added MPI2_MANPAGE7_SLOT_UNKNOWN.
+ *                      Added ChassisSlot field to SAS Enclosure Page 0.
+ *                      Added ChassisSlot Valid bit (bit 5) to the Flags field
+ *                      in SAS Enclosure Page 0.
+ *  06-13-17  02.00.41  Added MPI26_MFGPAGE_DEVID_SAS3816 and
+ *                      MPI26_MFGPAGE_DEVID_SAS3916 defines.
+ *                      Removed MPI26_MFGPAGE_DEVID_SAS4008 define.
+ *                      Added MPI26_PCIEIOUNIT1_LINKFLAGS_SRNS_EN define.
+ *                      Renamed PI26_PCIEIOUNIT1_LINKFLAGS_EN_SRIS to
+ *                      PI26_PCIEIOUNIT1_LINKFLAGS_SRIS_EN.
+ *                      Renamed MPI26_PCIEIOUNIT1_LINKFLAGS_DIS_SRIS to
+ *                      MPI26_PCIEIOUNIT1_LINKFLAGS_DIS_SEPARATE_REFCLK.
+ *  09-29-17  02.00.42  Added ControllerResetTO field to PCIe Device Page 2.
+ *                      Added NOIOB field to PCIe Device Page 2.
+ *                      Added MPI26_PCIEDEV2_CAP_DATA_BLK_ALIGN_AND_GRAN to
+ *                      the Capabilities field of PCIe Device Page 2.
+ *  07-22-18  02.00.43  Added defines for SAS3916 and SAS3816.
+ *                      Added WRiteCache defines to IO Unit Page 1.
+ *                      Added MaxEnclosureLevel to BIOS Page 1.
+ *                      Added OEMRD to SAS Enclosure Page 1.
+ *                      Added DMDReportPCIe to PCIe IO Unit Page 1.
+ *                      Added Flags field and flags for Retimers to
+ *                      PCIe Switch Page 1.
+ *  08-02-18  02.00.44  Added Slotx2, Slotx4 to ManPage 7.
+ *  08-15-18  02.00.45  Added ProductSpecific field at end of IOC Page 1
+ *  08-28-18  02.00.46  Added NVMs Write Cache flag to IOUnitPage1
+ *                      Added DMDReport Delay Time defines to PCIeIOUnitPage1
+ *  12-17-18  02.00.47  Swap locations of Slotx2 and Slotx4 in ManPage 7.
+ *  06-24-19  02.00.48  Add DEVID for PCI Switch and MPI endpoints.
+ *                      Add Man7 flag for Connector Lane.
+ *                      Modify NVMe WriteCache values.
+ *                      Add ConfigSource to PCIeIOUnit0.
+ *                      Add various PCI id info to PCIeDevice2.
+ *  08-01-19  02.00.49  Add MPI26_MANPAGE7_FLAG_X2_X4_SLOT_INFO_VALID
+ *                      Add MPI26_IOUNITPAGE1_NVME_WRCACHE_SHIFT
+ *                      Correct def of MPI26_PCIEIOUNIT1_CF_BP_AUTO_CLOCK_ENABLE
+ *  --------------------------------------------------------------------------
+ */
+
+#ifndef MPI2_CNFG_H
+#define MPI2_CNFG_H
+
+/*****************************************************************************
+*   Configuration Page Header and defines
+*****************************************************************************/
+
+/* Config Page Header */
+typedef struct _MPI2_CONFIG_PAGE_HEADER
+{
+    U8                 PageVersion;                /* 0x00 */
+    U8                 PageLength;                 /* 0x01 */
+    U8                 PageNumber;                 /* 0x02 */
+    U8                 PageType;                   /* 0x03 */
+} MPI2_CONFIG_PAGE_HEADER, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_HEADER,
+  Mpi2ConfigPageHeader_t, MPI2_POINTER pMpi2ConfigPageHeader_t;
+
+typedef union _MPI2_CONFIG_PAGE_HEADER_UNION
+{
+   MPI2_CONFIG_PAGE_HEADER  Struct;
+   U8                       Bytes[4];
+   U16                      Word16[2];
+   U32                      Word32;
+} MPI2_CONFIG_PAGE_HEADER_UNION, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_HEADER_UNION,
+  Mpi2ConfigPageHeaderUnion, MPI2_POINTER pMpi2ConfigPageHeaderUnion;
+
+/* Extended Config Page Header */
+typedef struct _MPI2_CONFIG_EXTENDED_PAGE_HEADER
+{
+    U8                  PageVersion;                /* 0x00 */
+    U8                  Reserved1;                  /* 0x01 */
+    U8                  PageNumber;                 /* 0x02 */
+    U8                  PageType;                   /* 0x03 */
+    U16                 ExtPageLength;              /* 0x04 */
+    U8                  ExtPageType;                /* 0x06 */
+    U8                  Reserved2;                  /* 0x07 */
+} MPI2_CONFIG_EXTENDED_PAGE_HEADER,
+  MPI2_POINTER PTR_MPI2_CONFIG_EXTENDED_PAGE_HEADER,
+  Mpi2ConfigExtendedPageHeader_t, MPI2_POINTER pMpi2ConfigExtendedPageHeader_t;
+
+typedef union _MPI2_CONFIG_EXT_PAGE_HEADER_UNION
+{
+   MPI2_CONFIG_PAGE_HEADER          Struct;
+   MPI2_CONFIG_EXTENDED_PAGE_HEADER Ext;
+   U8                               Bytes[8];
+   U16                              Word16[4];
+   U32                              Word32[2];
+} MPI2_CONFIG_EXT_PAGE_HEADER_UNION, MPI2_POINTER PTR_MPI2_CONFIG_EXT_PAGE_HEADER_UNION,
+  Mpi2ConfigPageExtendedHeaderUnion, MPI2_POINTER pMpi2ConfigPageExtendedHeaderUnion;
+
+
+/* PageType field values */
+#define MPI2_CONFIG_PAGEATTR_READ_ONLY              (0x00)
+#define MPI2_CONFIG_PAGEATTR_CHANGEABLE             (0x10)
+#define MPI2_CONFIG_PAGEATTR_PERSISTENT             (0x20)
+#define MPI2_CONFIG_PAGEATTR_MASK                   (0xF0)
+
+#define MPI2_CONFIG_PAGETYPE_IO_UNIT                (0x00)
+#define MPI2_CONFIG_PAGETYPE_IOC                    (0x01)
+#define MPI2_CONFIG_PAGETYPE_BIOS                   (0x02)
+#define MPI2_CONFIG_PAGETYPE_RAID_VOLUME            (0x08)
+#define MPI2_CONFIG_PAGETYPE_MANUFACTURING          (0x09)
+#define MPI2_CONFIG_PAGETYPE_RAID_PHYSDISK          (0x0A)
+#define MPI2_CONFIG_PAGETYPE_EXTENDED               (0x0F)
+#define MPI2_CONFIG_PAGETYPE_MASK                   (0x0F)
+
+#define MPI2_CONFIG_TYPENUM_MASK                    (0x0FFF)
+
+
+/* ExtPageType field values */
+#define MPI2_CONFIG_EXTPAGETYPE_SAS_IO_UNIT         (0x10)
+#define MPI2_CONFIG_EXTPAGETYPE_SAS_EXPANDER        (0x11)
+#define MPI2_CONFIG_EXTPAGETYPE_SAS_DEVICE          (0x12)
+#define MPI2_CONFIG_EXTPAGETYPE_SAS_PHY             (0x13)
+#define MPI2_CONFIG_EXTPAGETYPE_LOG                 (0x14)
+#define MPI2_CONFIG_EXTPAGETYPE_ENCLOSURE           (0x15)
+#define MPI2_CONFIG_EXTPAGETYPE_RAID_CONFIG         (0x16)
+#define MPI2_CONFIG_EXTPAGETYPE_DRIVER_MAPPING      (0x17)
+#define MPI2_CONFIG_EXTPAGETYPE_SAS_PORT            (0x18)
+#define MPI2_CONFIG_EXTPAGETYPE_ETHERNET            (0x19)
+#define MPI2_CONFIG_EXTPAGETYPE_EXT_MANUFACTURING   (0x1A)
+#define MPI2_CONFIG_EXTPAGETYPE_PCIE_IO_UNIT        (0x1B) /* MPI v2.6 and later */
+#define MPI2_CONFIG_EXTPAGETYPE_PCIE_SWITCH         (0x1C) /* MPI v2.6 and later */
+#define MPI2_CONFIG_EXTPAGETYPE_PCIE_DEVICE         (0x1D) /* MPI v2.6 and later */
+#define MPI2_CONFIG_EXTPAGETYPE_PCIE_LINK           (0x1E) /* MPI v2.6 and later */
+/*  Product specific reserved values  0xE0 - 0xEF */
+/*  Vendor specific reserved values   0xF0 - 0xFF */
+
+
+/*****************************************************************************
+*   PageAddress defines
+*****************************************************************************/
+
+/* RAID Volume PageAddress format */
+#define MPI2_RAID_VOLUME_PGAD_FORM_MASK             (0xF0000000)
+#define MPI2_RAID_VOLUME_PGAD_FORM_GET_NEXT_HANDLE  (0x00000000)
+#define MPI2_RAID_VOLUME_PGAD_FORM_HANDLE           (0x10000000)
+
+#define MPI2_RAID_VOLUME_PGAD_HANDLE_MASK           (0x0000FFFF)
+
+
+/* RAID Physical Disk PageAddress format */
+#define MPI2_PHYSDISK_PGAD_FORM_MASK                    (0xF0000000)
+#define MPI2_PHYSDISK_PGAD_FORM_GET_NEXT_PHYSDISKNUM    (0x00000000)
+#define MPI2_PHYSDISK_PGAD_FORM_PHYSDISKNUM             (0x10000000)
+#define MPI2_PHYSDISK_PGAD_FORM_DEVHANDLE               (0x20000000)
+
+#define MPI2_PHYSDISK_PGAD_PHYSDISKNUM_MASK             (0x000000FF)
+#define MPI2_PHYSDISK_PGAD_DEVHANDLE_MASK               (0x0000FFFF)
+
+
+/* SAS Expander PageAddress format */
+#define MPI2_SAS_EXPAND_PGAD_FORM_MASK              (0xF0000000)
+#define MPI2_SAS_EXPAND_PGAD_FORM_GET_NEXT_HNDL     (0x00000000)
+#define MPI2_SAS_EXPAND_PGAD_FORM_HNDL_PHY_NUM      (0x10000000)
+#define MPI2_SAS_EXPAND_PGAD_FORM_HNDL              (0x20000000)
+
+#define MPI2_SAS_EXPAND_PGAD_HANDLE_MASK            (0x0000FFFF)
+#define MPI2_SAS_EXPAND_PGAD_PHYNUM_MASK            (0x00FF0000)
+#define MPI2_SAS_EXPAND_PGAD_PHYNUM_SHIFT           (16)
+
+
+/* SAS Device PageAddress format */
+#define MPI2_SAS_DEVICE_PGAD_FORM_MASK              (0xF0000000)
+#define MPI2_SAS_DEVICE_PGAD_FORM_GET_NEXT_HANDLE   (0x00000000)
+#define MPI2_SAS_DEVICE_PGAD_FORM_HANDLE            (0x20000000)
+
+#define MPI2_SAS_DEVICE_PGAD_HANDLE_MASK            (0x0000FFFF)
+
+
+/* SAS PHY PageAddress format */
+#define MPI2_SAS_PHY_PGAD_FORM_MASK                 (0xF0000000)
+#define MPI2_SAS_PHY_PGAD_FORM_PHY_NUMBER           (0x00000000)
+#define MPI2_SAS_PHY_PGAD_FORM_PHY_TBL_INDEX        (0x10000000)
+
+#define MPI2_SAS_PHY_PGAD_PHY_NUMBER_MASK           (0x000000FF)
+#define MPI2_SAS_PHY_PGAD_PHY_TBL_INDEX_MASK        (0x0000FFFF)
+
+
+/* SAS Port PageAddress format */
+#define MPI2_SASPORT_PGAD_FORM_MASK                 (0xF0000000)
+#define MPI2_SASPORT_PGAD_FORM_GET_NEXT_PORT        (0x00000000)
+#define MPI2_SASPORT_PGAD_FORM_PORT_NUM             (0x10000000)
+
+#define MPI2_SASPORT_PGAD_PORTNUMBER_MASK           (0x00000FFF)
+
+
+/* SAS Enclosure PageAddress format */
+#define MPI2_SAS_ENCLOS_PGAD_FORM_MASK              (0xF0000000)
+#define MPI2_SAS_ENCLOS_PGAD_FORM_GET_NEXT_HANDLE   (0x00000000)
+#define MPI2_SAS_ENCLOS_PGAD_FORM_HANDLE            (0x10000000)
+
+#define MPI2_SAS_ENCLOS_PGAD_HANDLE_MASK            (0x0000FFFF)
+
+/* Enclosure PageAddress format */
+#define MPI26_ENCLOS_PGAD_FORM_MASK                 (0xF0000000)
+#define MPI26_ENCLOS_PGAD_FORM_GET_NEXT_HANDLE      (0x00000000)
+#define MPI26_ENCLOS_PGAD_FORM_HANDLE               (0x10000000)
+
+#define MPI26_ENCLOS_PGAD_HANDLE_MASK               (0x0000FFFF)
+
+/* RAID Configuration PageAddress format */
+#define MPI2_RAID_PGAD_FORM_MASK                    (0xF0000000)
+#define MPI2_RAID_PGAD_FORM_GET_NEXT_CONFIGNUM      (0x00000000)
+#define MPI2_RAID_PGAD_FORM_CONFIGNUM               (0x10000000)
+#define MPI2_RAID_PGAD_FORM_ACTIVE_CONFIG           (0x20000000)
+
+#define MPI2_RAID_PGAD_CONFIGNUM_MASK               (0x000000FF)
+
+
+/* Driver Persistent Mapping PageAddress format */
+#define MPI2_DPM_PGAD_FORM_MASK                     (0xF0000000)
+#define MPI2_DPM_PGAD_FORM_ENTRY_RANGE              (0x00000000)
+
+#define MPI2_DPM_PGAD_ENTRY_COUNT_MASK              (0x0FFF0000)
+#define MPI2_DPM_PGAD_ENTRY_COUNT_SHIFT             (16)
+#define MPI2_DPM_PGAD_START_ENTRY_MASK              (0x0000FFFF)
+
+
+/* Ethernet PageAddress format */
+#define MPI2_ETHERNET_PGAD_FORM_MASK                (0xF0000000)
+#define MPI2_ETHERNET_PGAD_FORM_IF_NUM              (0x00000000)
+
+#define MPI2_ETHERNET_PGAD_IF_NUMBER_MASK           (0x000000FF)
+
+
+/* PCIe Switch PageAddress format */
+#define MPI26_PCIE_SWITCH_PGAD_FORM_MASK            (0xF0000000)
+#define MPI26_PCIE_SWITCH_PGAD_FORM_GET_NEXT_HNDL   (0x00000000)
+#define MPI26_PCIE_SWITCH_PGAD_FORM_HNDL_PORTNUM    (0x10000000)
+#define MPI26_PCIE_SWITCH_EXPAND_PGAD_FORM_HNDL     (0x20000000)
+
+#define MPI26_PCIE_SWITCH_PGAD_HANDLE_MASK          (0x0000FFFF)
+#define MPI26_PCIE_SWITCH_PGAD_PORTNUM_MASK         (0x00FF0000)
+#define MPI26_PCIE_SWITCH_PGAD_PORTNUM_SHIFT        (16)
+
+
+/* PCIe Device PageAddress format */
+#define MPI26_PCIE_DEVICE_PGAD_FORM_MASK            (0xF0000000)
+#define MPI26_PCIE_DEVICE_PGAD_FORM_GET_NEXT_HANDLE (0x00000000)
+#define MPI26_PCIE_DEVICE_PGAD_FORM_HANDLE          (0x20000000)
+
+#define MPI26_PCIE_DEVICE_PGAD_HANDLE_MASK          (0x0000FFFF)
+
+/* PCIe Link PageAddress format */
+#define MPI26_PCIE_LINK_PGAD_FORM_MASK            (0xF0000000)
+#define MPI26_PCIE_LINK_PGAD_FORM_GET_NEXT_LINK   (0x00000000)
+#define MPI26_PCIE_LINK_PGAD_FORM_LINK_NUM        (0x10000000)
+
+#define MPI26_PCIE_DEVICE_PGAD_LINKNUM_MASK       (0x000000FF)
+
+
+
+/****************************************************************************
+*   Configuration messages
+****************************************************************************/
+
+/* Configuration Request Message */
+typedef struct _MPI2_CONFIG_REQUEST
+{
+    U8                      Action;                     /* 0x00 */
+    U8                      SGLFlags;                   /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     ExtPageLength;              /* 0x04 */
+    U8                      ExtPageType;                /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved1;                  /* 0x0A */
+    U8                      Reserved2;                  /* 0x0C */
+    U8                      ProxyVF_ID;                 /* 0x0D */
+    U16                     Reserved4;                  /* 0x0E */
+    U32                     Reserved3;                  /* 0x10 */
+    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x14 */
+    U32                     PageAddress;                /* 0x18 */
+    MPI2_SGE_IO_UNION       PageBufferSGE;              /* 0x1C */
+} MPI2_CONFIG_REQUEST, MPI2_POINTER PTR_MPI2_CONFIG_REQUEST,
+  Mpi2ConfigRequest_t, MPI2_POINTER pMpi2ConfigRequest_t;
+
+/* values for the Action field */
+#define MPI2_CONFIG_ACTION_PAGE_HEADER              (0x00)
+#define MPI2_CONFIG_ACTION_PAGE_READ_CURRENT        (0x01)
+#define MPI2_CONFIG_ACTION_PAGE_WRITE_CURRENT       (0x02)
+#define MPI2_CONFIG_ACTION_PAGE_DEFAULT             (0x03)
+#define MPI2_CONFIG_ACTION_PAGE_WRITE_NVRAM         (0x04)
+#define MPI2_CONFIG_ACTION_PAGE_READ_DEFAULT        (0x05)
+#define MPI2_CONFIG_ACTION_PAGE_READ_NVRAM          (0x06)
+#define MPI2_CONFIG_ACTION_PAGE_GET_CHANGEABLE      (0x07)
+
+/* use MPI2_SGLFLAGS_ defines from mpi2.h for the SGLFlags field */
+
+
+/* Config Reply Message */
+typedef struct _MPI2_CONFIG_REPLY
+{
+    U8                      Action;                     /* 0x00 */
+    U8                      SGLFlags;                   /* 0x01 */
+    U8                      MsgLength;                  /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     ExtPageLength;              /* 0x04 */
+    U8                      ExtPageType;                /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved1;                  /* 0x0A */
+    U16                     Reserved2;                  /* 0x0C */
+    U16                     IOCStatus;                  /* 0x0E */
+    U32                     IOCLogInfo;                 /* 0x10 */
+    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x14 */
+} MPI2_CONFIG_REPLY, MPI2_POINTER PTR_MPI2_CONFIG_REPLY,
+  Mpi2ConfigReply_t, MPI2_POINTER pMpi2ConfigReply_t;
+
+
+
+/*****************************************************************************
+*
+*               C o n f i g u r a t i o n    P a g e s
+*
+*****************************************************************************/
+
+/****************************************************************************
+*   Manufacturing Config pages
+****************************************************************************/
+
+#define MPI2_MFGPAGE_VENDORID_LSI                   (0x1000)
+
+/* MPI v2.0 SAS products */
+#define MPI2_MFGPAGE_DEVID_SAS2004                  (0x0070)
+#define MPI2_MFGPAGE_DEVID_SAS2008                  (0x0072)
+#define MPI2_MFGPAGE_DEVID_SAS2108_1                (0x0074)
+#define MPI2_MFGPAGE_DEVID_SAS2108_2                (0x0076)
+#define MPI2_MFGPAGE_DEVID_SAS2108_3                (0x0077)
+#define MPI2_MFGPAGE_DEVID_SAS2116_1                (0x0064)
+#define MPI2_MFGPAGE_DEVID_SAS2116_2                (0x0065)
+
+#define MPI2_MFGPAGE_DEVID_SSS6200                  (0x007E)
+
+#define MPI2_MFGPAGE_DEVID_SAS2208_1                (0x0080)
+#define MPI2_MFGPAGE_DEVID_SAS2208_2                (0x0081)
+#define MPI2_MFGPAGE_DEVID_SAS2208_3                (0x0082)
+#define MPI2_MFGPAGE_DEVID_SAS2208_4                (0x0083)
+#define MPI2_MFGPAGE_DEVID_SAS2208_5                (0x0084)
+#define MPI2_MFGPAGE_DEVID_SAS2208_6                (0x0085)
+#define MPI2_MFGPAGE_DEVID_SAS2308_1                (0x0086)
+#define MPI2_MFGPAGE_DEVID_SAS2308_2                (0x0087)
+#define MPI2_MFGPAGE_DEVID_SAS2308_3                (0x006E)
+
+/* MPI v2.5 SAS products */
+#define MPI25_MFGPAGE_DEVID_SAS3004                 (0x0096)
+#define MPI25_MFGPAGE_DEVID_SAS3008                 (0x0097)
+#define MPI25_MFGPAGE_DEVID_SAS3108_1               (0x0090)
+#define MPI25_MFGPAGE_DEVID_SAS3108_2               (0x0091)
+#define MPI25_MFGPAGE_DEVID_SAS3108_5               (0x0094)
+#define MPI25_MFGPAGE_DEVID_SAS3108_6               (0x0095)
+
+/* MPI v2.6 SAS Products */
+#define MPI26_MFGPAGE_DEVID_SAS3216                 (0x00C9)
+#define MPI26_MFGPAGE_DEVID_SAS3224                 (0x00C4)
+#define MPI26_MFGPAGE_DEVID_SAS3316_1               (0x00C5)
+#define MPI26_MFGPAGE_DEVID_SAS3316_2               (0x00C6)
+#define MPI26_MFGPAGE_DEVID_SAS3316_3               (0x00C7)
+#define MPI26_MFGPAGE_DEVID_SAS3316_4               (0x00C8)
+#define MPI26_MFGPAGE_DEVID_SAS3324_1               (0x00C0)
+#define MPI26_MFGPAGE_DEVID_SAS3324_2               (0x00C1)
+#define MPI26_MFGPAGE_DEVID_SAS3324_3               (0x00C2)
+#define MPI26_MFGPAGE_DEVID_SAS3324_4               (0x00C3)
+
+#define MPI26_MFGPAGE_DEVID_SAS3516                 (0x00AA)
+#define MPI26_MFGPAGE_DEVID_SAS3516_1               (0x00AB)
+#define MPI26_MFGPAGE_DEVID_SAS3416                 (0x00AC)
+#define MPI26_MFGPAGE_DEVID_SAS3508                 (0x00AD)
+#define MPI26_MFGPAGE_DEVID_SAS3508_1               (0x00AE)
+#define MPI26_MFGPAGE_DEVID_SAS3408                 (0x00AF)
+
+#define MPI26_MFGPAGE_DEVID_PEX88000                (0x00B2)
+
+#define MPI26_MFGPAGE_DEVID_SAS3716                 (0x00D0)
+#define MPI26_MFGPAGE_DEVID_SAS3616                 (0x00D1)
+#define MPI26_MFGPAGE_DEVID_SAS3708                 (0x00D2)
+
+#define MPI26_MFGPAGE_DEVID_SEC_MASK_3916           (0x0003)
+#define MPI26_MFGPAGE_DEVID_INVALID0_3916           (0x00E0)
+#define MPI26_MFGPAGE_DEVID_CFG_SEC_3916            (0x00E1)
+#define MPI26_MFGPAGE_DEVID_HARD_SEC_3916           (0x00E2)
+#define MPI26_MFGPAGE_DEVID_INVALID1_3916           (0x00E3)
+
+#define MPI26_MFGPAGE_DEVID_SEC_MASK_3816           (0x0003)
+#define MPI26_MFGPAGE_DEVID_INVALID0_3816           (0x00E4)
+#define MPI26_MFGPAGE_DEVID_CFG_SEC_3816            (0x00E5)
+#define MPI26_MFGPAGE_DEVID_HARD_SEC_3816           (0x00E6)
+#define MPI26_MFGPAGE_DEVID_INVALID1_3816           (0x00E7)
+
+#define MPI26_MFGPAGE_DEVID_SWCH_MPI_EP             (0x02B0)
+#define MPI26_MFGPAGE_DEVID_SWCH_MPI_EP_1           (0x02B1)
+
+
+/* Manufacturing Page 0 */
+
+typedef struct _MPI2_CONFIG_PAGE_MAN_0
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
+    U8                      ChipName[16];               /* 0x04 */
+    U8                      ChipRevision[8];            /* 0x14 */
+    U8                      BoardName[16];              /* 0x1C */
+    U8                      BoardAssembly[16];          /* 0x2C */
+    U8                      BoardTracerNumber[16];      /* 0x3C */
+} MPI2_CONFIG_PAGE_MAN_0,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_0,
+  Mpi2ManufacturingPage0_t, MPI2_POINTER pMpi2ManufacturingPage0_t;
+
+#define MPI2_MANUFACTURING0_PAGEVERSION                (0x00)
+
+
+/* Manufacturing Page 1 */
+
+typedef struct _MPI2_CONFIG_PAGE_MAN_1
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
+    U8                      VPD[256];                   /* 0x04 */
+} MPI2_CONFIG_PAGE_MAN_1,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_1,
+  Mpi2ManufacturingPage1_t, MPI2_POINTER pMpi2ManufacturingPage1_t;
+
+#define MPI2_MANUFACTURING1_PAGEVERSION                (0x00)
+
+
+typedef struct _MPI2_CHIP_REVISION_ID
+{
+    U16 DeviceID;                                       /* 0x00 */
+    U8  PCIRevisionID;                                  /* 0x02 */
+    U8  Reserved;                                       /* 0x03 */
+} MPI2_CHIP_REVISION_ID, MPI2_POINTER PTR_MPI2_CHIP_REVISION_ID,
+  Mpi2ChipRevisionId_t, MPI2_POINTER pMpi2ChipRevisionId_t;
+
+
+/* Manufacturing Page 2 */
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check Header.PageLength at runtime.
+ */
+#ifndef MPI2_MAN_PAGE_2_HW_SETTINGS_WORDS
+#define MPI2_MAN_PAGE_2_HW_SETTINGS_WORDS   (1)
+#endif
+
+typedef struct _MPI2_CONFIG_PAGE_MAN_2
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
+    MPI2_CHIP_REVISION_ID   ChipId;                     /* 0x04 */
+    U32                     HwSettings[MPI2_MAN_PAGE_2_HW_SETTINGS_WORDS];/* 0x08 */
+} MPI2_CONFIG_PAGE_MAN_2,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_2,
+  Mpi2ManufacturingPage2_t, MPI2_POINTER pMpi2ManufacturingPage2_t;
+
+#define MPI2_MANUFACTURING2_PAGEVERSION                 (0x00)
+
+
+/* Manufacturing Page 3 */
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check Header.PageLength at runtime.
+ */
+#ifndef MPI2_MAN_PAGE_3_INFO_WORDS
+#define MPI2_MAN_PAGE_3_INFO_WORDS          (1)
+#endif
+
+typedef struct _MPI2_CONFIG_PAGE_MAN_3
+{
+    MPI2_CONFIG_PAGE_HEADER             Header;         /* 0x00 */
+    MPI2_CHIP_REVISION_ID               ChipId;         /* 0x04 */
+    U32                                 Info[MPI2_MAN_PAGE_3_INFO_WORDS];/* 0x08 */
+} MPI2_CONFIG_PAGE_MAN_3,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_3,
+  Mpi2ManufacturingPage3_t, MPI2_POINTER pMpi2ManufacturingPage3_t;
+
+#define MPI2_MANUFACTURING3_PAGEVERSION                 (0x00)
+
+
+/* Manufacturing Page 4 */
+
+typedef struct _MPI2_MANPAGE4_PWR_SAVE_SETTINGS
+{
+    U8                          PowerSaveFlags;                 /* 0x00 */
+    U8                          InternalOperationsSleepTime;    /* 0x01 */
+    U8                          InternalOperationsRunTime;      /* 0x02 */
+    U8                          HostIdleTime;                   /* 0x03 */
+} MPI2_MANPAGE4_PWR_SAVE_SETTINGS,
+  MPI2_POINTER PTR_MPI2_MANPAGE4_PWR_SAVE_SETTINGS,
+  Mpi2ManPage4PwrSaveSettings_t, MPI2_POINTER pMpi2ManPage4PwrSaveSettings_t;
+
+/* defines for the PowerSaveFlags field */
+#define MPI2_MANPAGE4_MASK_POWERSAVE_MODE               (0x03)
+#define MPI2_MANPAGE4_POWERSAVE_MODE_DISABLED           (0x00)
+#define MPI2_MANPAGE4_CUSTOM_POWERSAVE_MODE             (0x01)
+#define MPI2_MANPAGE4_FULL_POWERSAVE_MODE               (0x02)
+
+typedef struct _MPI2_CONFIG_PAGE_MAN_4
+{
+    MPI2_CONFIG_PAGE_HEADER             Header;                 /* 0x00 */
+    U32                                 Reserved1;              /* 0x04 */
+    U32                                 Flags;                  /* 0x08 */
+    U8                                  InquirySize;            /* 0x0C */
+    U8                                  Reserved2;              /* 0x0D */
+    U16                                 Reserved3;              /* 0x0E */
+    U8                                  InquiryData[56];        /* 0x10 */
+    U32                                 RAID0VolumeSettings;    /* 0x48 */
+    U32                                 RAID1EVolumeSettings;   /* 0x4C */
+    U32                                 RAID1VolumeSettings;    /* 0x50 */
+    U32                                 RAID10VolumeSettings;   /* 0x54 */
+    U32                                 Reserved4;              /* 0x58 */
+    U32                                 Reserved5;              /* 0x5C */
+    MPI2_MANPAGE4_PWR_SAVE_SETTINGS     PowerSaveSettings;      /* 0x60 */
+    U8                                  MaxOCEDisks;            /* 0x64 */
+    U8                                  ResyncRate;             /* 0x65 */
+    U16                                 DataScrubDuration;      /* 0x66 */
+    U8                                  MaxHotSpares;           /* 0x68 */
+    U8                                  MaxPhysDisksPerVol;     /* 0x69 */
+    U8                                  MaxPhysDisks;           /* 0x6A */
+    U8                                  MaxVolumes;             /* 0x6B */
+} MPI2_CONFIG_PAGE_MAN_4,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_4,
+  Mpi2ManufacturingPage4_t, MPI2_POINTER pMpi2ManufacturingPage4_t;
+
+#define MPI2_MANUFACTURING4_PAGEVERSION                 (0x0A)
+
+/* Manufacturing Page 4 Flags field */
+#define MPI2_MANPAGE4_METADATA_SIZE_MASK                (0x00030000)
+#define MPI2_MANPAGE4_METADATA_512MB                    (0x00000000)
+
+#define MPI2_MANPAGE4_MIX_SSD_SAS_SATA                  (0x00008000)
+#define MPI2_MANPAGE4_MIX_SSD_AND_NON_SSD               (0x00004000)
+#define MPI2_MANPAGE4_HIDE_PHYSDISK_NON_IR              (0x00002000)
+
+#define MPI2_MANPAGE4_MASK_PHYSDISK_COERCION            (0x00001C00)
+#define MPI2_MANPAGE4_PHYSDISK_COERCION_1GB             (0x00000000)
+#define MPI2_MANPAGE4_PHYSDISK_128MB_COERCION           (0x00000400)
+#define MPI2_MANPAGE4_PHYSDISK_ADAPTIVE_COERCION        (0x00000800)
+#define MPI2_MANPAGE4_PHYSDISK_ZERO_COERCION            (0x00000C00)
+
+#define MPI2_MANPAGE4_MASK_BAD_BLOCK_MARKING            (0x00000300)
+#define MPI2_MANPAGE4_DEFAULT_BAD_BLOCK_MARKING         (0x00000000)
+#define MPI2_MANPAGE4_TABLE_BAD_BLOCK_MARKING           (0x00000100)
+#define MPI2_MANPAGE4_WRITE_LONG_BAD_BLOCK_MARKING      (0x00000200)
+
+#define MPI2_MANPAGE4_FORCE_OFFLINE_FAILOVER            (0x00000080)
+#define MPI2_MANPAGE4_RAID10_DISABLE                    (0x00000040)
+#define MPI2_MANPAGE4_RAID1E_DISABLE                    (0x00000020)
+#define MPI2_MANPAGE4_RAID1_DISABLE                     (0x00000010)
+#define MPI2_MANPAGE4_RAID0_DISABLE                     (0x00000008)
+#define MPI2_MANPAGE4_IR_MODEPAGE8_DISABLE              (0x00000004)
+#define MPI2_MANPAGE4_IM_RESYNC_CACHE_ENABLE            (0x00000002)
+#define MPI2_MANPAGE4_IR_NO_MIX_SAS_SATA                (0x00000001)
+
+
+/* Manufacturing Page 5 */
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumPhys at runtime.
+ */
+#ifndef MPI2_MAN_PAGE_5_PHY_ENTRIES
+#define MPI2_MAN_PAGE_5_PHY_ENTRIES         (1)
+#endif
+
+typedef struct _MPI2_MANUFACTURING5_ENTRY
+{
+    U64                                 WWID;           /* 0x00 */
+    U64                                 DeviceName;     /* 0x08 */
+} MPI2_MANUFACTURING5_ENTRY, MPI2_POINTER PTR_MPI2_MANUFACTURING5_ENTRY,
+  Mpi2Manufacturing5Entry_t, MPI2_POINTER pMpi2Manufacturing5Entry_t;
+
+typedef struct _MPI2_CONFIG_PAGE_MAN_5
+{
+    MPI2_CONFIG_PAGE_HEADER             Header;         /* 0x00 */
+    U8                                  NumPhys;        /* 0x04 */
+    U8                                  Reserved1;      /* 0x05 */
+    U16                                 Reserved2;      /* 0x06 */
+    U32                                 Reserved3;      /* 0x08 */
+    U32                                 Reserved4;      /* 0x0C */
+    MPI2_MANUFACTURING5_ENTRY           Phy[MPI2_MAN_PAGE_5_PHY_ENTRIES];/* 0x08 */
+} MPI2_CONFIG_PAGE_MAN_5,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_5,
+  Mpi2ManufacturingPage5_t, MPI2_POINTER pMpi2ManufacturingPage5_t;
+
+#define MPI2_MANUFACTURING5_PAGEVERSION                 (0x03)
+
+
+/* Manufacturing Page 6 */
+
+typedef struct _MPI2_CONFIG_PAGE_MAN_6
+{
+    MPI2_CONFIG_PAGE_HEADER         Header;             /* 0x00 */
+    U32                             ProductSpecificInfo;/* 0x04 */
+} MPI2_CONFIG_PAGE_MAN_6,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_6,
+  Mpi2ManufacturingPage6_t, MPI2_POINTER pMpi2ManufacturingPage6_t;
+
+#define MPI2_MANUFACTURING6_PAGEVERSION                 (0x00)
+
+
+/* Manufacturing Page 7 */
+
+typedef struct _MPI2_MANPAGE7_CONNECTOR_INFO
+{
+    U32                         Pinout;                 /* 0x00 */
+    U8                          Connector[16];          /* 0x04 */
+    U8                          Location;               /* 0x14 */
+    U8                          ReceptacleID;           /* 0x15 */
+    U16                         Slot;                   /* 0x16 */
+    U16                         Slotx2;                 /* 0x18 */
+    U16                         Slotx4;                 /* 0x1A */
+} MPI2_MANPAGE7_CONNECTOR_INFO, MPI2_POINTER PTR_MPI2_MANPAGE7_CONNECTOR_INFO,
+  Mpi2ManPage7ConnectorInfo_t, MPI2_POINTER pMpi2ManPage7ConnectorInfo_t;
+
+/* defines for the Pinout field */
+#define MPI2_MANPAGE7_PINOUT_LANE_MASK                  (0x0000FF00)
+#define MPI2_MANPAGE7_PINOUT_LANE_SHIFT                 (8)
+
+#define MPI2_MANPAGE7_PINOUT_TYPE_MASK                  (0x000000FF)
+#define MPI2_MANPAGE7_PINOUT_TYPE_UNKNOWN               (0x00)
+#define MPI2_MANPAGE7_PINOUT_SATA_SINGLE                (0x01)
+#define MPI2_MANPAGE7_PINOUT_SFF_8482                   (0x02)
+#define MPI2_MANPAGE7_PINOUT_SFF_8486                   (0x03)
+#define MPI2_MANPAGE7_PINOUT_SFF_8484                   (0x04)
+#define MPI2_MANPAGE7_PINOUT_SFF_8087                   (0x05)
+#define MPI2_MANPAGE7_PINOUT_SFF_8643_4I                (0x06)
+#define MPI2_MANPAGE7_PINOUT_SFF_8643_8I                (0x07)
+#define MPI2_MANPAGE7_PINOUT_SFF_8470                   (0x08)
+#define MPI2_MANPAGE7_PINOUT_SFF_8088                   (0x09)
+#define MPI2_MANPAGE7_PINOUT_SFF_8644_4X                (0x0A)
+#define MPI2_MANPAGE7_PINOUT_SFF_8644_8X                (0x0B)
+#define MPI2_MANPAGE7_PINOUT_SFF_8644_16X               (0x0C)
+#define MPI2_MANPAGE7_PINOUT_SFF_8436                   (0x0D)
+#define MPI2_MANPAGE7_PINOUT_SFF_8088_A                 (0x0E)
+#define MPI2_MANPAGE7_PINOUT_SFF_8643_16i               (0x0F)
+#define MPI2_MANPAGE7_PINOUT_SFF_8654_4i                (0x10)
+#define MPI2_MANPAGE7_PINOUT_SFF_8654_8i                (0x11)
+#define MPI2_MANPAGE7_PINOUT_SFF_8611_4i                (0x12)
+#define MPI2_MANPAGE7_PINOUT_SFF_8611_8i                (0x13)
+
+/* defines for the Location field */
+#define MPI2_MANPAGE7_LOCATION_UNKNOWN                  (0x01)
+#define MPI2_MANPAGE7_LOCATION_INTERNAL                 (0x02)
+#define MPI2_MANPAGE7_LOCATION_EXTERNAL                 (0x04)
+#define MPI2_MANPAGE7_LOCATION_SWITCHABLE               (0x08)
+#define MPI2_MANPAGE7_LOCATION_AUTO                     (0x10)
+#define MPI2_MANPAGE7_LOCATION_NOT_PRESENT              (0x20)
+#define MPI2_MANPAGE7_LOCATION_NOT_CONNECTED            (0x80)
+
+/* defines for the Slot field */
+#define MPI2_MANPAGE7_SLOT_UNKNOWN                      (0xFFFF)
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumPhys at runtime.
+ */
+#ifndef MPI2_MANPAGE7_CONNECTOR_INFO_MAX
+#define MPI2_MANPAGE7_CONNECTOR_INFO_MAX  (1)
+#endif
+
+typedef struct _MPI2_CONFIG_PAGE_MAN_7
+{
+    MPI2_CONFIG_PAGE_HEADER         Header;             /* 0x00 */
+    U32                             Reserved1;          /* 0x04 */
+    U32                             Reserved2;          /* 0x08 */
+    U32                             Flags;              /* 0x0C */
+    U8                              EnclosureName[16];  /* 0x10 */
+    U8                              NumPhys;            /* 0x20 */
+    U8                              Reserved3;          /* 0x21 */
+    U16                             Reserved4;          /* 0x22 */
+    MPI2_MANPAGE7_CONNECTOR_INFO    ConnectorInfo[MPI2_MANPAGE7_CONNECTOR_INFO_MAX]; /* 0x24 */
+} MPI2_CONFIG_PAGE_MAN_7,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_7,
+  Mpi2ManufacturingPage7_t, MPI2_POINTER pMpi2ManufacturingPage7_t;
+
+#define MPI2_MANUFACTURING7_PAGEVERSION                 (0x01)
+
+/* defines for the Flags field */
+#define MPI2_MANPAGE7_FLAG_BASE_ENCLOSURE_LEVEL         (0x00000008)
+#define MPI2_MANPAGE7_FLAG_EVENTREPLAY_SLOT_ORDER       (0x00000002)
+#define MPI2_MANPAGE7_FLAG_USE_SLOT_INFO                (0x00000001)
+
+#define MPI26_MANPAGE7_FLAG_CONN_LANE_USE_PINOUT        (0x00000020)
+#define MPI26_MANPAGE7_FLAG_X2_X4_SLOT_INFO_VALID       (0x00000010)
+
+/*
+ * Generic structure to use for product-specific manufacturing pages
+ * (currently Manufacturing Page 8 through Manufacturing Page 31).
+ */
+
+typedef struct _MPI2_CONFIG_PAGE_MAN_PS
+{
+    MPI2_CONFIG_PAGE_HEADER         Header;             /* 0x00 */
+    U32                             ProductSpecificInfo;/* 0x04 */
+} MPI2_CONFIG_PAGE_MAN_PS,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_MAN_PS,
+  Mpi2ManufacturingPagePS_t, MPI2_POINTER pMpi2ManufacturingPagePS_t;
+
+#define MPI2_MANUFACTURING8_PAGEVERSION                 (0x00)
+#define MPI2_MANUFACTURING9_PAGEVERSION                 (0x00)
+#define MPI2_MANUFACTURING10_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING11_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING12_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING13_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING14_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING15_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING16_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING17_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING18_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING19_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING20_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING21_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING22_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING23_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING24_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING25_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING26_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING27_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING28_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING29_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING30_PAGEVERSION                (0x00)
+#define MPI2_MANUFACTURING31_PAGEVERSION                (0x00)
+
+
+/****************************************************************************
+*   IO Unit Config Pages
+****************************************************************************/
+
+/* IO Unit Page 0 */
+
+typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_0
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
+    U64                     UniqueValue;                /* 0x04 */
+    MPI2_VERSION_UNION      NvdataVersionDefault;       /* 0x08 */
+    MPI2_VERSION_UNION      NvdataVersionPersistent;    /* 0x0A */
+} MPI2_CONFIG_PAGE_IO_UNIT_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_0,
+  Mpi2IOUnitPage0_t, MPI2_POINTER pMpi2IOUnitPage0_t;
+
+#define MPI2_IOUNITPAGE0_PAGEVERSION                    (0x02)
+
+
+/* IO Unit Page 1 */
+
+typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_1
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
+    U32                     Flags;                      /* 0x04 */
+} MPI2_CONFIG_PAGE_IO_UNIT_1, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_1,
+  Mpi2IOUnitPage1_t, MPI2_POINTER pMpi2IOUnitPage1_t;
+
+#define MPI2_IOUNITPAGE1_PAGEVERSION                    (0x04)
+
+/* IO Unit Page 1 Flags defines */
+#define MPI26_IOUNITPAGE1_NVME_WRCACHE_MASK             (0x00030000)
+#define MPI26_IOUNITPAGE1_NVME_WRCACHE_SHIFT            (16)
+#define MPI26_IOUNITPAGE1_NVME_WRCACHE_NO_CHANGE        (0x00000000)
+#define MPI26_IOUNITPAGE1_NVME_WRCACHE_ENABLE           (0x00010000)
+#define MPI26_IOUNITPAGE1_NVME_WRCACHE_DISABLE          (0x00020000)
+#define MPI2_IOUNITPAGE1_ATA_SECURITY_FREEZE_LOCK       (0x00004000)
+#define MPI25_IOUNITPAGE1_NEW_DEVICE_FAST_PATH_DISABLE  (0x00002000)
+#define MPI25_IOUNITPAGE1_DISABLE_FAST_PATH             (0x00001000)
+#define MPI2_IOUNITPAGE1_ENABLE_HOST_BASED_DISCOVERY    (0x00000800)
+#define MPI2_IOUNITPAGE1_MASK_SATA_WRITE_CACHE          (0x00000600)
+#define MPI2_IOUNITPAGE1_SATA_WRITE_CACHE_SHIFT         (9)
+#define MPI2_IOUNITPAGE1_ENABLE_SATA_WRITE_CACHE        (0x00000000)
+#define MPI2_IOUNITPAGE1_DISABLE_SATA_WRITE_CACHE       (0x00000200)
+#define MPI2_IOUNITPAGE1_UNCHANGED_SATA_WRITE_CACHE     (0x00000400)
+#define MPI2_IOUNITPAGE1_NATIVE_COMMAND_Q_DISABLE       (0x00000100)
+#define MPI2_IOUNITPAGE1_DISABLE_IR                     (0x00000040)
+#define MPI2_IOUNITPAGE1_DISABLE_TASK_SET_FULL_HANDLING (0x00000020)
+#define MPI2_IOUNITPAGE1_IR_USE_STATIC_VOLUME_ID        (0x00000004)
+
+
+/* IO Unit Page 3 */
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for GPIOCount at runtime.
+ */
+#ifndef MPI2_IO_UNIT_PAGE_3_GPIO_VAL_MAX
+#define MPI2_IO_UNIT_PAGE_3_GPIO_VAL_MAX    (1)
+#endif
+
+typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_3
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                                   /* 0x00 */
+    U8                      GPIOCount;                                /* 0x04 */
+    U8                      Reserved1;                                /* 0x05 */
+    U16                     Reserved2;                                /* 0x06 */
+    U16                     GPIOVal[MPI2_IO_UNIT_PAGE_3_GPIO_VAL_MAX];/* 0x08 */
+} MPI2_CONFIG_PAGE_IO_UNIT_3, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_3,
+  Mpi2IOUnitPage3_t, MPI2_POINTER pMpi2IOUnitPage3_t;
+
+#define MPI2_IOUNITPAGE3_PAGEVERSION                    (0x01)
+
+/* defines for IO Unit Page 3 GPIOVal field */
+#define MPI2_IOUNITPAGE3_GPIO_FUNCTION_MASK             (0xFFFC)
+#define MPI2_IOUNITPAGE3_GPIO_FUNCTION_SHIFT            (2)
+#define MPI2_IOUNITPAGE3_GPIO_SETTING_OFF               (0x0000)
+#define MPI2_IOUNITPAGE3_GPIO_SETTING_ON                (0x0001)
+
+
+/* IO Unit Page 5 */
+
+/*
+ * Upper layer code (drivers, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumDmaEngines at runtime.
+ */
+#ifndef MPI2_IOUNITPAGE5_DMAENGINE_ENTRIES
+#define MPI2_IOUNITPAGE5_DMAENGINE_ENTRIES      (1)
+#endif
+
+typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_5
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                                     /* 0x00 */
+    U64                     RaidAcceleratorBufferBaseAddress;           /* 0x04 */
+    U64                     RaidAcceleratorBufferSize;                  /* 0x0C */
+    U64                     RaidAcceleratorControlBaseAddress;          /* 0x14 */
+    U8                      RAControlSize;                              /* 0x1C */
+    U8                      NumDmaEngines;                              /* 0x1D */
+    U8                      RAMinControlSize;                           /* 0x1E */
+    U8                      RAMaxControlSize;                           /* 0x1F */
+    U32                     Reserved1;                                  /* 0x20 */
+    U32                     Reserved2;                                  /* 0x24 */
+    U32                     Reserved3;                                  /* 0x28 */
+    U32                     DmaEngineCapabilities[MPI2_IOUNITPAGE5_DMAENGINE_ENTRIES]; /* 0x2C */
+} MPI2_CONFIG_PAGE_IO_UNIT_5, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_5,
+  Mpi2IOUnitPage5_t, MPI2_POINTER pMpi2IOUnitPage5_t;
+
+#define MPI2_IOUNITPAGE5_PAGEVERSION                    (0x00)
+
+/* defines for IO Unit Page 5 DmaEngineCapabilities field */
+#define MPI2_IOUNITPAGE5_DMA_CAP_MASK_MAX_REQUESTS      (0xFFFF0000)
+#define MPI2_IOUNITPAGE5_DMA_CAP_SHIFT_MAX_REQUESTS     (16)
+
+#define MPI2_IOUNITPAGE5_DMA_CAP_EEDP                   (0x0008)
+#define MPI2_IOUNITPAGE5_DMA_CAP_PARITY_GENERATION      (0x0004)
+#define MPI2_IOUNITPAGE5_DMA_CAP_HASHING                (0x0002)
+#define MPI2_IOUNITPAGE5_DMA_CAP_ENCRYPTION             (0x0001)
+
+
+/* IO Unit Page 6 */
+
+typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_6
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                                 /* 0x00 */
+    U16                     Flags;                                  /* 0x04 */
+    U8                      RAHostControlSize;                      /* 0x06 */
+    U8                      Reserved0;                              /* 0x07 */
+    U64                     RaidAcceleratorHostControlBaseAddress;  /* 0x08 */
+    U32                     Reserved1;                              /* 0x10 */
+    U32                     Reserved2;                              /* 0x14 */
+    U32                     Reserved3;                              /* 0x18 */
+} MPI2_CONFIG_PAGE_IO_UNIT_6, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_6,
+  Mpi2IOUnitPage6_t, MPI2_POINTER pMpi2IOUnitPage6_t;
+
+#define MPI2_IOUNITPAGE6_PAGEVERSION                    (0x00)
+
+/* defines for IO Unit Page 6 Flags field */
+#define MPI2_IOUNITPAGE6_FLAGS_ENABLE_RAID_ACCELERATOR  (0x0001)
+
+
+/* IO Unit Page 7 */
+
+typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_7
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                                 /* 0x00 */
+    U8                      CurrentPowerMode;                       /* 0x04 */ /* reserved in MPI 2.0 */
+    U8                      PreviousPowerMode;                      /* 0x05 */ /* reserved in MPI 2.0 */
+    U8                      PCIeWidth;                              /* 0x06 */
+    U8                      PCIeSpeed;                              /* 0x07 */
+    U32                     ProcessorState;                         /* 0x08 */
+    U32                     PowerManagementCapabilities;            /* 0x0C */
+    U16                     IOCTemperature;                         /* 0x10 */
+    U8                      IOCTemperatureUnits;                    /* 0x12 */
+    U8                      IOCSpeed;                               /* 0x13 */
+    U16                     BoardTemperature;                       /* 0x14 */
+    U8                      BoardTemperatureUnits;                  /* 0x16 */
+    U8                      Reserved3;                              /* 0x17 */
+    U32                     BoardPowerRequirement;                              /* 0x18 */ /* reserved prior to MPI v2.6 */
+    U32                     PCISlotPowerAllocation;                              /* 0x1C */ /* reserved prior to MPI v2.6 */
+    U8                      Flags;                              /* 0x20 */ /* reserved prior to MPI v2.6 */
+    U8                      Reserved6;                              /* 0x21 */
+    U16                     Reserved7;                              /* 0x22 */
+    U32                     Reserved8;                              /* 0x24 */
+} MPI2_CONFIG_PAGE_IO_UNIT_7, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_7,
+  Mpi2IOUnitPage7_t, MPI2_POINTER pMpi2IOUnitPage7_t;
+
+#define MPI2_IOUNITPAGE7_PAGEVERSION                    (0x05)
+
+/* defines for IO Unit Page 7 CurrentPowerMode and PreviousPowerMode fields */
+#define MPI25_IOUNITPAGE7_PM_INIT_MASK              (0xC0)
+#define MPI25_IOUNITPAGE7_PM_INIT_UNAVAILABLE       (0x00)
+#define MPI25_IOUNITPAGE7_PM_INIT_HOST              (0x40)
+#define MPI25_IOUNITPAGE7_PM_INIT_IO_UNIT           (0x80)
+#define MPI25_IOUNITPAGE7_PM_INIT_PCIE_DPA          (0xC0)
+
+#define MPI25_IOUNITPAGE7_PM_MODE_MASK              (0x07)
+#define MPI25_IOUNITPAGE7_PM_MODE_UNAVAILABLE       (0x00)
+#define MPI25_IOUNITPAGE7_PM_MODE_UNKNOWN           (0x01)
+#define MPI25_IOUNITPAGE7_PM_MODE_FULL_POWER        (0x04)
+#define MPI25_IOUNITPAGE7_PM_MODE_REDUCED_POWER     (0x05)
+#define MPI25_IOUNITPAGE7_PM_MODE_STANDBY           (0x06)
+
+
+/* defines for IO Unit Page 7 PCIeWidth field */
+#define MPI2_IOUNITPAGE7_PCIE_WIDTH_X1              (0x01)
+#define MPI2_IOUNITPAGE7_PCIE_WIDTH_X2              (0x02)
+#define MPI2_IOUNITPAGE7_PCIE_WIDTH_X4              (0x04)
+#define MPI2_IOUNITPAGE7_PCIE_WIDTH_X8              (0x08)
+#define MPI2_IOUNITPAGE7_PCIE_WIDTH_X16             (0x10)
+
+/* defines for IO Unit Page 7 PCIeSpeed field */
+#define MPI2_IOUNITPAGE7_PCIE_SPEED_2_5_GBPS        (0x00)
+#define MPI2_IOUNITPAGE7_PCIE_SPEED_5_0_GBPS        (0x01)
+#define MPI2_IOUNITPAGE7_PCIE_SPEED_8_0_GBPS        (0x02)
+#define MPI2_IOUNITPAGE7_PCIE_SPEED_16_0_GBPS       (0x03)
+
+/* defines for IO Unit Page 7 ProcessorState field */
+#define MPI2_IOUNITPAGE7_PSTATE_MASK_SECOND         (0x0000000F)
+#define MPI2_IOUNITPAGE7_PSTATE_SHIFT_SECOND        (0)
+
+#define MPI2_IOUNITPAGE7_PSTATE_NOT_PRESENT         (0x00)
+#define MPI2_IOUNITPAGE7_PSTATE_DISABLED            (0x01)
+#define MPI2_IOUNITPAGE7_PSTATE_ENABLED             (0x02)
+
+/* defines for IO Unit Page 7 PowerManagementCapabilities field */
+#define MPI25_IOUNITPAGE7_PMCAP_DPA_FULL_PWR_MODE       (0x00400000)
+#define MPI25_IOUNITPAGE7_PMCAP_DPA_REDUCED_PWR_MODE    (0x00200000)
+#define MPI25_IOUNITPAGE7_PMCAP_DPA_STANDBY_MODE        (0x00100000)
+#define MPI25_IOUNITPAGE7_PMCAP_HOST_FULL_PWR_MODE      (0x00040000)
+#define MPI25_IOUNITPAGE7_PMCAP_HOST_REDUCED_PWR_MODE   (0x00020000)
+#define MPI25_IOUNITPAGE7_PMCAP_HOST_STANDBY_MODE       (0x00010000)
+#define MPI25_IOUNITPAGE7_PMCAP_IO_FULL_PWR_MODE        (0x00004000)
+#define MPI25_IOUNITPAGE7_PMCAP_IO_REDUCED_PWR_MODE     (0x00002000)
+#define MPI25_IOUNITPAGE7_PMCAP_IO_STANDBY_MODE         (0x00001000)
+#define MPI2_IOUNITPAGE7_PMCAP_HOST_12_5_PCT_IOCSPEED   (0x00000400)
+#define MPI2_IOUNITPAGE7_PMCAP_HOST_25_0_PCT_IOCSPEED   (0x00000200)
+#define MPI2_IOUNITPAGE7_PMCAP_HOST_50_0_PCT_IOCSPEED   (0x00000100)
+#define MPI25_IOUNITPAGE7_PMCAP_IO_12_5_PCT_IOCSPEED    (0x00000040)
+#define MPI25_IOUNITPAGE7_PMCAP_IO_25_0_PCT_IOCSPEED    (0x00000020)
+#define MPI25_IOUNITPAGE7_PMCAP_IO_50_0_PCT_IOCSPEED    (0x00000010)
+#define MPI2_IOUNITPAGE7_PMCAP_HOST_WIDTH_CHANGE_PCIE   (0x00000008) /* obsolete */
+#define MPI2_IOUNITPAGE7_PMCAP_HOST_SPEED_CHANGE_PCIE   (0x00000004) /* obsolete */
+#define MPI25_IOUNITPAGE7_PMCAP_IO_WIDTH_CHANGE_PCIE    (0x00000002) /* obsolete */
+#define MPI25_IOUNITPAGE7_PMCAP_IO_SPEED_CHANGE_PCIE    (0x00000001) /* obsolete */
+
+/* obsolete names for the PowerManagementCapabilities bits (above) */
+#define MPI2_IOUNITPAGE7_PMCAP_12_5_PCT_IOCSPEED    (0x00000400)
+#define MPI2_IOUNITPAGE7_PMCAP_25_0_PCT_IOCSPEED    (0x00000200)
+#define MPI2_IOUNITPAGE7_PMCAP_50_0_PCT_IOCSPEED    (0x00000100)
+#define MPI2_IOUNITPAGE7_PMCAP_PCIE_WIDTH_CHANGE    (0x00000008) /* obsolete */
+#define MPI2_IOUNITPAGE7_PMCAP_PCIE_SPEED_CHANGE    (0x00000004) /* obsolete */
+
+
+/* defines for IO Unit Page 7 IOCTemperatureUnits field */
+#define MPI2_IOUNITPAGE7_IOC_TEMP_NOT_PRESENT       (0x00)
+#define MPI2_IOUNITPAGE7_IOC_TEMP_FAHRENHEIT        (0x01)
+#define MPI2_IOUNITPAGE7_IOC_TEMP_CELSIUS           (0x02)
+
+/* defines for IO Unit Page 7 IOCSpeed field */
+#define MPI2_IOUNITPAGE7_IOC_SPEED_FULL             (0x01)
+#define MPI2_IOUNITPAGE7_IOC_SPEED_HALF             (0x02)
+#define MPI2_IOUNITPAGE7_IOC_SPEED_QUARTER          (0x04)
+#define MPI2_IOUNITPAGE7_IOC_SPEED_EIGHTH           (0x08)
+
+/* defines for IO Unit Page 7 BoardTemperatureUnits field */
+#define MPI2_IOUNITPAGE7_BOARD_TEMP_NOT_PRESENT     (0x00)
+#define MPI2_IOUNITPAGE7_BOARD_TEMP_FAHRENHEIT      (0x01)
+#define MPI2_IOUNITPAGE7_BOARD_TEMP_CELSIUS         (0x02)
+
+/* defines for IO Unit Page 7 Flags field */
+#define MPI2_IOUNITPAGE7_FLAG_CABLE_POWER_EXC       (0x01)
+
+
+/* IO Unit Page 8 */
+
+#define MPI2_IOUNIT8_NUM_THRESHOLDS     (4)
+
+typedef struct _MPI2_IOUNIT8_SENSOR
+{
+    U16                     Flags;                                  /* 0x00 */
+    U16                     Reserved1;                              /* 0x02 */
+    U16                     Threshold[MPI2_IOUNIT8_NUM_THRESHOLDS]; /* 0x04 */
+    U32                     Reserved2;                              /* 0x0C */
+    U32                     Reserved3;                              /* 0x10 */
+    U32                     Reserved4;                              /* 0x14 */
+} MPI2_IOUNIT8_SENSOR, MPI2_POINTER PTR_MPI2_IOUNIT8_SENSOR,
+  Mpi2IOUnit8Sensor_t, MPI2_POINTER pMpi2IOUnit8Sensor_t;
+
+/* defines for IO Unit Page 8 Sensor Flags field */
+#define MPI2_IOUNIT8_SENSOR_FLAGS_T3_ENABLE         (0x0008)
+#define MPI2_IOUNIT8_SENSOR_FLAGS_T2_ENABLE         (0x0004)
+#define MPI2_IOUNIT8_SENSOR_FLAGS_T1_ENABLE         (0x0002)
+#define MPI2_IOUNIT8_SENSOR_FLAGS_T0_ENABLE         (0x0001)
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumSensors at runtime.
+ */
+#ifndef MPI2_IOUNITPAGE8_SENSOR_ENTRIES
+#define MPI2_IOUNITPAGE8_SENSOR_ENTRIES     (1)
+#endif
+
+typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_8
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                                 /* 0x00 */
+    U32                     Reserved1;                              /* 0x04 */
+    U32                     Reserved2;                              /* 0x08 */
+    U8                      NumSensors;                             /* 0x0C */
+    U8                      PollingInterval;                        /* 0x0D */
+    U16                     Reserved3;                              /* 0x0E */
+    MPI2_IOUNIT8_SENSOR     Sensor[MPI2_IOUNITPAGE8_SENSOR_ENTRIES];/* 0x10 */
+} MPI2_CONFIG_PAGE_IO_UNIT_8, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_8,
+  Mpi2IOUnitPage8_t, MPI2_POINTER pMpi2IOUnitPage8_t;
+
+#define MPI2_IOUNITPAGE8_PAGEVERSION                    (0x00)
+
+
+/* IO Unit Page 9 */
+
+typedef struct _MPI2_IOUNIT9_SENSOR
+{
+    U16                     CurrentTemperature;                     /* 0x00 */
+    U16                     Reserved1;                              /* 0x02 */
+    U8                      Flags;                                  /* 0x04 */
+    U8                      Reserved2;                              /* 0x05 */
+    U16                     Reserved3;                              /* 0x06 */
+    U32                     Reserved4;                              /* 0x08 */
+    U32                     Reserved5;                              /* 0x0C */
+} MPI2_IOUNIT9_SENSOR, MPI2_POINTER PTR_MPI2_IOUNIT9_SENSOR,
+  Mpi2IOUnit9Sensor_t, MPI2_POINTER pMpi2IOUnit9Sensor_t;
+
+/* defines for IO Unit Page 9 Sensor Flags field */
+#define MPI2_IOUNIT9_SENSOR_FLAGS_TEMP_VALID        (0x01)
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumSensors at runtime.
+ */
+#ifndef MPI2_IOUNITPAGE9_SENSOR_ENTRIES
+#define MPI2_IOUNITPAGE9_SENSOR_ENTRIES     (1)
+#endif
+
+typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_9
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                                 /* 0x00 */
+    U32                     Reserved1;                              /* 0x04 */
+    U32                     Reserved2;                              /* 0x08 */
+    U8                      NumSensors;                             /* 0x0C */
+    U8                      Reserved4;                              /* 0x0D */
+    U16                     Reserved3;                              /* 0x0E */
+    MPI2_IOUNIT9_SENSOR     Sensor[MPI2_IOUNITPAGE9_SENSOR_ENTRIES];/* 0x10 */
+} MPI2_CONFIG_PAGE_IO_UNIT_9, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_9,
+  Mpi2IOUnitPage9_t, MPI2_POINTER pMpi2IOUnitPage9_t;
+
+#define MPI2_IOUNITPAGE9_PAGEVERSION                    (0x00)
+
+
+/* IO Unit Page 10 */
+
+typedef struct _MPI2_IOUNIT10_FUNCTION
+{
+    U8                      CreditPercent;      /* 0x00 */
+    U8                      Reserved1;          /* 0x01 */
+    U16                     Reserved2;          /* 0x02 */
+} MPI2_IOUNIT10_FUNCTION, MPI2_POINTER PTR_MPI2_IOUNIT10_FUNCTION,
+  Mpi2IOUnit10Function_t, MPI2_POINTER pMpi2IOUnit10Function_t;
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumFunctions at runtime.
+ */
+#ifndef MPI2_IOUNITPAGE10_FUNCTION_ENTRIES
+#define MPI2_IOUNITPAGE10_FUNCTION_ENTRIES      (1)
+#endif
+
+typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_10
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                                         /* 0x00 */
+    U8                      NumFunctions;                                   /* 0x04 */
+    U8                      Reserved1;                                      /* 0x05 */
+    U16                     Reserved2;                                      /* 0x06 */
+    U32                     Reserved3;                                      /* 0x08 */
+    U32                     Reserved4;                                      /* 0x0C */
+    MPI2_IOUNIT10_FUNCTION  Function[MPI2_IOUNITPAGE10_FUNCTION_ENTRIES];   /* 0x10 */
+} MPI2_CONFIG_PAGE_IO_UNIT_10, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IO_UNIT_10,
+  Mpi2IOUnitPage10_t, MPI2_POINTER pMpi2IOUnitPage10_t;
+
+#define MPI2_IOUNITPAGE10_PAGEVERSION                   (0x01)
+
+
+/* IO Unit Page 11 (for MPI v2.6 and later) */
+
+typedef struct _MPI26_IOUNIT11_SPINUP_GROUP
+{
+    U8          MaxTargetSpinup;            /* 0x00 */
+    U8          SpinupDelay;                /* 0x01 */
+    U8          SpinupFlags;                /* 0x02 */
+    U8          Reserved1;                  /* 0x03 */
+} MPI26_IOUNIT11_SPINUP_GROUP, MPI2_POINTER PTR_MPI26_IOUNIT11_SPINUP_GROUP,
+  Mpi26IOUnit11SpinupGroup_t, MPI2_POINTER pMpi26IOUnit11SpinupGroup_t;
+
+/* defines for IO Unit Page 11 SpinupFlags */
+#define MPI26_IOUNITPAGE11_SPINUP_DISABLE_FLAG          (0x01)
+
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * four and check the value returned for NumPhys at runtime.
+ */
+#ifndef MPI26_IOUNITPAGE11_PHY_MAX
+#define MPI26_IOUNITPAGE11_PHY_MAX        (4)
+#endif
+
+typedef struct _MPI26_CONFIG_PAGE_IO_UNIT_11
+{
+    MPI2_CONFIG_PAGE_HEADER         Header;                         /* 0x00 */
+    U32                             Reserved1;                      /* 0x04 */
+    MPI26_IOUNIT11_SPINUP_GROUP     SpinupGroupParameters[4];       /* 0x08 */
+    U32                             Reserved2;                      /* 0x18 */
+    U32                             Reserved3;                      /* 0x1C */
+    U32                             Reserved4;                      /* 0x20 */
+    U8                              BootDeviceWaitTime;             /* 0x24 */
+    U8                              SATADeviceWaitTime;             /* 0x25 */
+    U8                              PCIeWaitTime;                   /* 0x26 */
+    U8                              Reserved6;                      /* 0x27 */
+    U8                              NumPhys;                        /* 0x28 */
+    U8                              PEInitialSpinupDelay;           /* 0x29 */
+    U8                              PEReplyDelay;                   /* 0x2A */
+    U8                              Flags;                          /* 0x2B */
+    U8                              PHY[MPI26_IOUNITPAGE11_PHY_MAX];/* 0x2C */
+} MPI26_CONFIG_PAGE_IO_UNIT_11,
+  MPI2_POINTER PTR_MPI26_CONFIG_PAGE_IO_UNIT_11,
+  Mpi26IOUnitPage11_t, MPI2_POINTER pMpi26IOUnitPage11_t;
+
+#define MPI26_IOUNITPAGE11_PAGEVERSION                  (0x00)
+
+/* defines for Flags field */
+#define MPI26_IOUNITPAGE11_FLAGS_AUTO_PORTENABLE        (0x01)
+
+/* defines for PHY field */
+#define MPI26_IOUNITPAGE11_PHY_SPINUP_GROUP_MASK        (0x03)
+
+
+
+/****************************************************************************
+*   IOC Config Pages
+****************************************************************************/
+
+/* IOC Page 0 */
+
+typedef struct _MPI2_CONFIG_PAGE_IOC_0
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
+    U32                     Reserved1;                  /* 0x04 */
+    U32                     Reserved2;                  /* 0x08 */
+    U16                     VendorID;                   /* 0x0C */
+    U16                     DeviceID;                   /* 0x0E */
+    U8                      RevisionID;                 /* 0x10 */
+    U8                      Reserved3;                  /* 0x11 */
+    U16                     Reserved4;                  /* 0x12 */
+    U32                     ClassCode;                  /* 0x14 */
+    U16                     SubsystemVendorID;          /* 0x18 */
+    U16                     SubsystemID;                /* 0x1A */
+} MPI2_CONFIG_PAGE_IOC_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IOC_0,
+  Mpi2IOCPage0_t, MPI2_POINTER pMpi2IOCPage0_t;
+
+#define MPI2_IOCPAGE0_PAGEVERSION                       (0x02)
+
+
+/* IOC Page 1 */
+
+typedef struct _MPI2_CONFIG_PAGE_IOC_1
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
+    U32                     Flags;                      /* 0x04 */
+    U32                     CoalescingTimeout;          /* 0x08 */
+    U8                      CoalescingDepth;            /* 0x0C */
+    U8                      PCISlotNum;                 /* 0x0D */
+    U8                      PCIBusNum;                  /* 0x0E */
+    U8                      PCIDomainSegment;           /* 0x0F */
+    U32                     Reserved1;                  /* 0x10 */
+    U32                     ProductSpecific;            /* 0x14 */
+} MPI2_CONFIG_PAGE_IOC_1, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IOC_1,
+  Mpi2IOCPage1_t, MPI2_POINTER pMpi2IOCPage1_t;
+
+#define MPI2_IOCPAGE1_PAGEVERSION                       (0x05)
+
+/* defines for IOC Page 1 Flags field */
+#define MPI2_IOCPAGE1_REPLY_COALESCING                  (0x00000001)
+
+#define MPI2_IOCPAGE1_PCISLOTNUM_UNKNOWN                (0xFF)
+#define MPI2_IOCPAGE1_PCIBUSNUM_UNKNOWN                 (0xFF)
+#define MPI2_IOCPAGE1_PCIDOMAIN_UNKNOWN                 (0xFF)
+
+/* IOC Page 6 */
+
+typedef struct _MPI2_CONFIG_PAGE_IOC_6
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                         /* 0x00 */
+    U32                     CapabilitiesFlags;              /* 0x04 */
+    U8                      MaxDrivesRAID0;                 /* 0x08 */
+    U8                      MaxDrivesRAID1;                 /* 0x09 */
+    U8                      MaxDrivesRAID1E;                /* 0x0A */
+    U8                      MaxDrivesRAID10;                /* 0x0B */
+    U8                      MinDrivesRAID0;                 /* 0x0C */
+    U8                      MinDrivesRAID1;                 /* 0x0D */
+    U8                      MinDrivesRAID1E;                /* 0x0E */
+    U8                      MinDrivesRAID10;                /* 0x0F */
+    U32                     Reserved1;                      /* 0x10 */
+    U8                      MaxGlobalHotSpares;             /* 0x14 */
+    U8                      MaxPhysDisks;                   /* 0x15 */
+    U8                      MaxVolumes;                     /* 0x16 */
+    U8                      MaxConfigs;                     /* 0x17 */
+    U8                      MaxOCEDisks;                    /* 0x18 */
+    U8                      Reserved2;                      /* 0x19 */
+    U16                     Reserved3;                      /* 0x1A */
+    U32                     SupportedStripeSizeMapRAID0;    /* 0x1C */
+    U32                     SupportedStripeSizeMapRAID1E;   /* 0x20 */
+    U32                     SupportedStripeSizeMapRAID10;   /* 0x24 */
+    U32                     Reserved4;                      /* 0x28 */
+    U32                     Reserved5;                      /* 0x2C */
+    U16                     DefaultMetadataSize;            /* 0x30 */
+    U16                     Reserved6;                      /* 0x32 */
+    U16                     MaxBadBlockTableEntries;        /* 0x34 */
+    U16                     Reserved7;                      /* 0x36 */
+    U32                     IRNvsramVersion;                /* 0x38 */
+} MPI2_CONFIG_PAGE_IOC_6, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IOC_6,
+  Mpi2IOCPage6_t, MPI2_POINTER pMpi2IOCPage6_t;
+
+#define MPI2_IOCPAGE6_PAGEVERSION                       (0x05)
+
+/* defines for IOC Page 6 CapabilitiesFlags */
+#define MPI2_IOCPAGE6_CAP_FLAGS_4K_SECTORS_SUPPORT      (0x00000020)
+#define MPI2_IOCPAGE6_CAP_FLAGS_RAID10_SUPPORT          (0x00000010)
+#define MPI2_IOCPAGE6_CAP_FLAGS_RAID1_SUPPORT           (0x00000008)
+#define MPI2_IOCPAGE6_CAP_FLAGS_RAID1E_SUPPORT          (0x00000004)
+#define MPI2_IOCPAGE6_CAP_FLAGS_RAID0_SUPPORT           (0x00000002)
+#define MPI2_IOCPAGE6_CAP_FLAGS_GLOBAL_HOT_SPARE        (0x00000001)
+
+
+/* IOC Page 7 */
+
+#define MPI2_IOCPAGE7_EVENTMASK_WORDS       (4)
+
+typedef struct _MPI2_CONFIG_PAGE_IOC_7
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
+    U32                     Reserved1;                  /* 0x04 */
+    U32                     EventMasks[MPI2_IOCPAGE7_EVENTMASK_WORDS];/* 0x08 */
+    U16                     SASBroadcastPrimitiveMasks; /* 0x18 */
+    U16                     SASNotifyPrimitiveMasks;    /* 0x1A */
+    U32                     Reserved3;                  /* 0x1C */
+} MPI2_CONFIG_PAGE_IOC_7, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IOC_7,
+  Mpi2IOCPage7_t, MPI2_POINTER pMpi2IOCPage7_t;
+
+#define MPI2_IOCPAGE7_PAGEVERSION                       (0x02)
+
+
+/* IOC Page 8 */
+
+typedef struct _MPI2_CONFIG_PAGE_IOC_8
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
+    U8                      NumDevsPerEnclosure;        /* 0x04 */
+    U8                      Reserved1;                  /* 0x05 */
+    U16                     Reserved2;                  /* 0x06 */
+    U16                     MaxPersistentEntries;       /* 0x08 */
+    U16                     MaxNumPhysicalMappedIDs;    /* 0x0A */
+    U16                     Flags;                      /* 0x0C */
+    U16                     Reserved3;                  /* 0x0E */
+    U16                     IRVolumeMappingFlags;       /* 0x10 */
+    U16                     Reserved4;                  /* 0x12 */
+    U32                     Reserved5;                  /* 0x14 */
+} MPI2_CONFIG_PAGE_IOC_8, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_IOC_8,
+  Mpi2IOCPage8_t, MPI2_POINTER pMpi2IOCPage8_t;
+
+#define MPI2_IOCPAGE8_PAGEVERSION                       (0x00)
+
+/* defines for IOC Page 8 Flags field */
+#define MPI2_IOCPAGE8_FLAGS_DA_START_SLOT_1             (0x00000020)
+#define MPI2_IOCPAGE8_FLAGS_RESERVED_TARGETID_0         (0x00000010)
+
+#define MPI2_IOCPAGE8_FLAGS_MASK_MAPPING_MODE           (0x0000000E)
+#define MPI2_IOCPAGE8_FLAGS_DEVICE_PERSISTENCE_MAPPING  (0x00000000)
+#define MPI2_IOCPAGE8_FLAGS_ENCLOSURE_SLOT_MAPPING      (0x00000002)
+
+#define MPI2_IOCPAGE8_FLAGS_DISABLE_PERSISTENT_MAPPING  (0x00000001)
+#define MPI2_IOCPAGE8_FLAGS_ENABLE_PERSISTENT_MAPPING   (0x00000000)
+
+/* defines for IOC Page 8 IRVolumeMappingFlags */
+#define MPI2_IOCPAGE8_IRFLAGS_MASK_VOLUME_MAPPING_MODE  (0x00000003)
+#define MPI2_IOCPAGE8_IRFLAGS_LOW_VOLUME_MAPPING        (0x00000000)
+#define MPI2_IOCPAGE8_IRFLAGS_HIGH_VOLUME_MAPPING       (0x00000001)
+
+
+/****************************************************************************
+*   BIOS Config Pages
+****************************************************************************/
+
+/* BIOS Page 1 */
+
+typedef struct _MPI2_CONFIG_PAGE_BIOS_1
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
+    U32                     BiosOptions;                /* 0x04 */
+    U32                     IOCSettings;                /* 0x08 */
+    U8                      SSUTimeout;                 /* 0x0C */
+    U8                      MaxEnclosureLevel;          /* 0x0D */
+    U16                     Reserved2;                  /* 0x0E */
+    U32                     DeviceSettings;             /* 0x10 */
+    U16                     NumberOfDevices;            /* 0x14 */
+    U16                     UEFIVersion;                /* 0x16 */
+    U16                     IOTimeoutBlockDevicesNonRM; /* 0x18 */
+    U16                     IOTimeoutSequential;        /* 0x1A */
+    U16                     IOTimeoutOther;             /* 0x1C */
+    U16                     IOTimeoutBlockDevicesRM;    /* 0x1E */
+} MPI2_CONFIG_PAGE_BIOS_1, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_BIOS_1,
+  Mpi2BiosPage1_t, MPI2_POINTER pMpi2BiosPage1_t;
+
+#define MPI2_BIOSPAGE1_PAGEVERSION                      (0x07)
+
+/* values for BIOS Page 1 BiosOptions field */
+#define MPI2_BIOSPAGE1_OPTIONS_BOOT_LIST_ADD_ALT_BOOT_DEVICE    (0x00008000)
+#define MPI2_BIOSPAGE1_OPTIONS_ADVANCED_CONFIG                  (0x00004000)
+
+#define MPI2_BIOSPAGE1_OPTIONS_PNS_MASK                         (0x00003800)
+#define MPI2_BIOSPAGE1_OPTIONS_PNS_PBDHL                        (0x00000000)
+#define MPI2_BIOSPAGE1_OPTIONS_PNS_ENCSLOSURE                   (0x00000800)
+#define MPI2_BIOSPAGE1_OPTIONS_PNS_LWWID                        (0x00001000)
+#define MPI2_BIOSPAGE1_OPTIONS_PNS_PSENS                        (0x00001800)
+#define MPI2_BIOSPAGE1_OPTIONS_PNS_ESPHY                        (0x00002000)
+
+#define MPI2_BIOSPAGE1_OPTIONS_X86_DISABLE_BIOS                 (0x00000400)
+
+#define MPI2_BIOSPAGE1_OPTIONS_MASK_REGISTRATION_UEFI_BSD       (0x00000300)
+#define MPI2_BIOSPAGE1_OPTIONS_USE_BIT0_REGISTRATION_UEFI_BSD   (0x00000000)
+#define MPI2_BIOSPAGE1_OPTIONS_FULL_REGISTRATION_UEFI_BSD       (0x00000100)
+#define MPI2_BIOSPAGE1_OPTIONS_ADAPTER_REGISTRATION_UEFI_BSD    (0x00000200)
+#define MPI2_BIOSPAGE1_OPTIONS_DISABLE_REGISTRATION_UEFI_BSD    (0x00000300)
+
+#define MPI2_BIOSPAGE1_OPTIONS_MASK_OEM_ID                      (0x000000F0)
+#define MPI2_BIOSPAGE1_OPTIONS_LSI_OEM_ID                       (0x00000000)
+
+#define MPI2_BIOSPAGE1_OPTIONS_MASK_UEFI_HII_REGISTRATION       (0x00000006)
+#define MPI2_BIOSPAGE1_OPTIONS_ENABLE_UEFI_HII                  (0x00000000)
+#define MPI2_BIOSPAGE1_OPTIONS_DISABLE_UEFI_HII                 (0x00000002)
+#define MPI2_BIOSPAGE1_OPTIONS_VERSION_CHECK_UEFI_HII           (0x00000004)
+
+#define MPI2_BIOSPAGE1_OPTIONS_DISABLE_BIOS                     (0x00000001)
+
+/* values for BIOS Page 1 IOCSettings field */
+#define MPI2_BIOSPAGE1_IOCSET_MASK_BOOT_PREFERENCE      (0x00030000)
+#define MPI2_BIOSPAGE1_IOCSET_ENCLOSURE_SLOT_BOOT       (0x00000000)
+#define MPI2_BIOSPAGE1_IOCSET_SAS_ADDRESS_BOOT          (0x00010000)
+
+#define MPI2_BIOSPAGE1_IOCSET_MASK_RM_SETTING           (0x000000C0)
+#define MPI2_BIOSPAGE1_IOCSET_NONE_RM_SETTING           (0x00000000)
+#define MPI2_BIOSPAGE1_IOCSET_BOOT_RM_SETTING           (0x00000040)
+#define MPI2_BIOSPAGE1_IOCSET_MEDIA_RM_SETTING          (0x00000080)
+
+#define MPI2_BIOSPAGE1_IOCSET_MASK_ADAPTER_SUPPORT      (0x00000030)
+#define MPI2_BIOSPAGE1_IOCSET_NO_SUPPORT                (0x00000000)
+#define MPI2_BIOSPAGE1_IOCSET_BIOS_SUPPORT              (0x00000010)
+#define MPI2_BIOSPAGE1_IOCSET_OS_SUPPORT                (0x00000020)
+#define MPI2_BIOSPAGE1_IOCSET_ALL_SUPPORT               (0x00000030)
+
+#define MPI2_BIOSPAGE1_IOCSET_ALTERNATE_CHS             (0x00000008)
+
+/* values for BIOS Page 1 DeviceSettings field */
+#define MPI2_BIOSPAGE1_DEVSET_DISABLE_SMART_POLLING     (0x00000010)
+#define MPI2_BIOSPAGE1_DEVSET_DISABLE_SEQ_LUN           (0x00000008)
+#define MPI2_BIOSPAGE1_DEVSET_DISABLE_RM_LUN            (0x00000004)
+#define MPI2_BIOSPAGE1_DEVSET_DISABLE_NON_RM_LUN        (0x00000002)
+#define MPI2_BIOSPAGE1_DEVSET_DISABLE_OTHER_LUN         (0x00000001)
+
+/* defines for BIOS Page 1 UEFIVersion field */
+#define MPI2_BIOSPAGE1_UEFI_VER_MAJOR_MASK              (0xFF00)
+#define MPI2_BIOSPAGE1_UEFI_VER_MAJOR_SHIFT             (8)
+#define MPI2_BIOSPAGE1_UEFI_VER_MINOR_MASK              (0x00FF)
+#define MPI2_BIOSPAGE1_UEFI_VER_MINOR_SHIFT             (0)
+
+
+
+/* BIOS Page 2 */
+
+typedef struct _MPI2_BOOT_DEVICE_ADAPTER_ORDER
+{
+    U32         Reserved1;                              /* 0x00 */
+    U32         Reserved2;                              /* 0x04 */
+    U32         Reserved3;                              /* 0x08 */
+    U32         Reserved4;                              /* 0x0C */
+    U32         Reserved5;                              /* 0x10 */
+    U32         Reserved6;                              /* 0x14 */
+} MPI2_BOOT_DEVICE_ADAPTER_ORDER,
+  MPI2_POINTER PTR_MPI2_BOOT_DEVICE_ADAPTER_ORDER,
+  Mpi2BootDeviceAdapterOrder_t, MPI2_POINTER pMpi2BootDeviceAdapterOrder_t;
+
+typedef struct _MPI2_BOOT_DEVICE_SAS_WWID
+{
+    U64         SASAddress;                             /* 0x00 */
+    U8          LUN[8];                                 /* 0x08 */
+    U32         Reserved1;                              /* 0x10 */
+    U32         Reserved2;                              /* 0x14 */
+} MPI2_BOOT_DEVICE_SAS_WWID, MPI2_POINTER PTR_MPI2_BOOT_DEVICE_SAS_WWID,
+  Mpi2BootDeviceSasWwid_t, MPI2_POINTER pMpi2BootDeviceSasWwid_t;
+
+typedef struct _MPI2_BOOT_DEVICE_ENCLOSURE_SLOT
+{
+    U64         EnclosureLogicalID;                     /* 0x00 */
+    U32         Reserved1;                              /* 0x08 */
+    U32         Reserved2;                              /* 0x0C */
+    U16         SlotNumber;                             /* 0x10 */
+    U16         Reserved3;                              /* 0x12 */
+    U32         Reserved4;                              /* 0x14 */
+} MPI2_BOOT_DEVICE_ENCLOSURE_SLOT,
+  MPI2_POINTER PTR_MPI2_BOOT_DEVICE_ENCLOSURE_SLOT,
+  Mpi2BootDeviceEnclosureSlot_t, MPI2_POINTER pMpi2BootDeviceEnclosureSlot_t;
+
+typedef struct _MPI2_BOOT_DEVICE_DEVICE_NAME
+{
+    U64         DeviceName;                             /* 0x00 */
+    U8          LUN[8];                                 /* 0x08 */
+    U32         Reserved1;                              /* 0x10 */
+    U32         Reserved2;                              /* 0x14 */
+} MPI2_BOOT_DEVICE_DEVICE_NAME, MPI2_POINTER PTR_MPI2_BOOT_DEVICE_DEVICE_NAME,
+  Mpi2BootDeviceDeviceName_t, MPI2_POINTER pMpi2BootDeviceDeviceName_t;
+
+typedef union _MPI2_MPI2_BIOSPAGE2_BOOT_DEVICE
+{
+    MPI2_BOOT_DEVICE_ADAPTER_ORDER  AdapterOrder;
+    MPI2_BOOT_DEVICE_SAS_WWID       SasWwid;
+    MPI2_BOOT_DEVICE_ENCLOSURE_SLOT EnclosureSlot;
+    MPI2_BOOT_DEVICE_DEVICE_NAME    DeviceName;
+} MPI2_BIOSPAGE2_BOOT_DEVICE, MPI2_POINTER PTR_MPI2_BIOSPAGE2_BOOT_DEVICE,
+  Mpi2BiosPage2BootDevice_t, MPI2_POINTER pMpi2BiosPage2BootDevice_t;
+
+typedef struct _MPI2_CONFIG_PAGE_BIOS_2
+{
+    MPI2_CONFIG_PAGE_HEADER     Header;                 /* 0x00 */
+    U32                         Reserved1;              /* 0x04 */
+    U32                         Reserved2;              /* 0x08 */
+    U32                         Reserved3;              /* 0x0C */
+    U32                         Reserved4;              /* 0x10 */
+    U32                         Reserved5;              /* 0x14 */
+    U32                         Reserved6;              /* 0x18 */
+    U8                          ReqBootDeviceForm;      /* 0x1C */
+    U8                          Reserved7;              /* 0x1D */
+    U16                         Reserved8;              /* 0x1E */
+    MPI2_BIOSPAGE2_BOOT_DEVICE  RequestedBootDevice;    /* 0x20 */
+    U8                          ReqAltBootDeviceForm;   /* 0x38 */
+    U8                          Reserved9;              /* 0x39 */
+    U16                         Reserved10;             /* 0x3A */
+    MPI2_BIOSPAGE2_BOOT_DEVICE  RequestedAltBootDevice; /* 0x3C */
+    U8                          CurrentBootDeviceForm;  /* 0x58 */
+    U8                          Reserved11;             /* 0x59 */
+    U16                         Reserved12;             /* 0x5A */
+    MPI2_BIOSPAGE2_BOOT_DEVICE  CurrentBootDevice;      /* 0x58 */
+} MPI2_CONFIG_PAGE_BIOS_2, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_BIOS_2,
+  Mpi2BiosPage2_t, MPI2_POINTER pMpi2BiosPage2_t;
+
+#define MPI2_BIOSPAGE2_PAGEVERSION                      (0x04)
+
+/* values for BIOS Page 2 BootDeviceForm fields */
+#define MPI2_BIOSPAGE2_FORM_MASK                        (0x0F)
+#define MPI2_BIOSPAGE2_FORM_NO_DEVICE_SPECIFIED         (0x00)
+#define MPI2_BIOSPAGE2_FORM_SAS_WWID                    (0x05)
+#define MPI2_BIOSPAGE2_FORM_ENCLOSURE_SLOT              (0x06)
+#define MPI2_BIOSPAGE2_FORM_DEVICE_NAME                 (0x07)
+
+
+/* BIOS Page 3 */
+
+#define MPI2_BIOSPAGE3_NUM_ADAPTER      (4)
+
+typedef struct _MPI2_ADAPTER_INFO
+{
+    U8      PciBusNumber;                               /* 0x00 */
+    U8      PciDeviceAndFunctionNumber;                 /* 0x01 */
+    U16     AdapterFlags;                               /* 0x02 */
+} MPI2_ADAPTER_INFO, MPI2_POINTER PTR_MPI2_ADAPTER_INFO,
+  Mpi2AdapterInfo_t, MPI2_POINTER pMpi2AdapterInfo_t;
+
+#define MPI2_ADAPTER_INFO_FLAGS_EMBEDDED                (0x0001)
+#define MPI2_ADAPTER_INFO_FLAGS_INIT_STATUS             (0x0002)
+
+typedef struct _MPI2_ADAPTER_ORDER_AUX
+{
+    U64     WWID;                                       /* 0x00 */
+    U32     Reserved1;                                  /* 0x08 */
+    U32     Reserved2;                                  /* 0x0C */
+} MPI2_ADAPTER_ORDER_AUX, MPI2_POINTER PTR_MPI2_ADAPTER_ORDER_AUX,
+  Mpi2AdapterOrderAux_t, MPI2_POINTER pMpi2AdapterOrderAux_t;
+
+typedef struct _MPI2_CONFIG_PAGE_BIOS_3
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
+    U32                     GlobalFlags;                /* 0x04 */
+    U32                     BiosVersion;                /* 0x08 */
+    MPI2_ADAPTER_INFO       AdapterOrder[MPI2_BIOSPAGE3_NUM_ADAPTER]; /* 0x0C */
+    U32                     Reserved1;                  /* 0x1C */
+    MPI2_ADAPTER_ORDER_AUX  AdapterOrderAux[MPI2_BIOSPAGE3_NUM_ADAPTER]; /* 0x20 */ /* MPI v2.5 and newer */
+} MPI2_CONFIG_PAGE_BIOS_3, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_BIOS_3,
+  Mpi2BiosPage3_t, MPI2_POINTER pMpi2BiosPage3_t;
+
+#define MPI2_BIOSPAGE3_PAGEVERSION                      (0x01)
+
+/* values for BIOS Page 3 GlobalFlags */
+#define MPI2_BIOSPAGE3_FLAGS_PAUSE_ON_ERROR             (0x00000002)
+#define MPI2_BIOSPAGE3_FLAGS_VERBOSE_ENABLE             (0x00000004)
+#define MPI2_BIOSPAGE3_FLAGS_HOOK_INT_40_DISABLE        (0x00000010)
+
+#define MPI2_BIOSPAGE3_FLAGS_DEV_LIST_DISPLAY_MASK      (0x000000E0)
+#define MPI2_BIOSPAGE3_FLAGS_INSTALLED_DEV_DISPLAY      (0x00000000)
+#define MPI2_BIOSPAGE3_FLAGS_ADAPTER_DISPLAY            (0x00000020)
+#define MPI2_BIOSPAGE3_FLAGS_ADAPTER_DEV_DISPLAY        (0x00000040)
+
+
+/* BIOS Page 4 */
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumPhys at runtime.
+ */
+#ifndef MPI2_BIOS_PAGE_4_PHY_ENTRIES
+#define MPI2_BIOS_PAGE_4_PHY_ENTRIES        (1)
+#endif
+
+typedef struct _MPI2_BIOS4_ENTRY
+{
+    U64                     ReassignmentWWID;       /* 0x00 */
+    U64                     ReassignmentDeviceName; /* 0x08 */
+} MPI2_BIOS4_ENTRY, MPI2_POINTER PTR_MPI2_BIOS4_ENTRY,
+  Mpi2MBios4Entry_t, MPI2_POINTER pMpi2Bios4Entry_t;
+
+typedef struct _MPI2_CONFIG_PAGE_BIOS_4
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                             /* 0x00 */
+    U8                      NumPhys;                            /* 0x04 */
+    U8                      Reserved1;                          /* 0x05 */
+    U16                     Reserved2;                          /* 0x06 */
+    MPI2_BIOS4_ENTRY        Phy[MPI2_BIOS_PAGE_4_PHY_ENTRIES];  /* 0x08 */
+} MPI2_CONFIG_PAGE_BIOS_4, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_BIOS_4,
+  Mpi2BiosPage4_t, MPI2_POINTER pMpi2BiosPage4_t;
+
+#define MPI2_BIOSPAGE4_PAGEVERSION                      (0x01)
+
+
+/****************************************************************************
+*   RAID Volume Config Pages
+****************************************************************************/
+
+/* RAID Volume Page 0 */
+
+typedef struct _MPI2_RAIDVOL0_PHYS_DISK
+{
+    U8                      RAIDSetNum;                 /* 0x00 */
+    U8                      PhysDiskMap;                /* 0x01 */
+    U8                      PhysDiskNum;                /* 0x02 */
+    U8                      Reserved;                   /* 0x03 */
+} MPI2_RAIDVOL0_PHYS_DISK, MPI2_POINTER PTR_MPI2_RAIDVOL0_PHYS_DISK,
+  Mpi2RaidVol0PhysDisk_t, MPI2_POINTER pMpi2RaidVol0PhysDisk_t;
+
+/* defines for the PhysDiskMap field */
+#define MPI2_RAIDVOL0_PHYSDISK_PRIMARY                  (0x01)
+#define MPI2_RAIDVOL0_PHYSDISK_SECONDARY                (0x02)
+
+typedef struct _MPI2_RAIDVOL0_SETTINGS
+{
+    U16                     Settings;                   /* 0x00 */
+    U8                      HotSparePool;               /* 0x01 */
+    U8                      Reserved;                   /* 0x02 */
+} MPI2_RAIDVOL0_SETTINGS, MPI2_POINTER PTR_MPI2_RAIDVOL0_SETTINGS,
+  Mpi2RaidVol0Settings_t, MPI2_POINTER pMpi2RaidVol0Settings_t;
+
+/* RAID Volume Page 0 HotSparePool defines, also used in RAID Physical Disk */
+#define MPI2_RAID_HOT_SPARE_POOL_0                      (0x01)
+#define MPI2_RAID_HOT_SPARE_POOL_1                      (0x02)
+#define MPI2_RAID_HOT_SPARE_POOL_2                      (0x04)
+#define MPI2_RAID_HOT_SPARE_POOL_3                      (0x08)
+#define MPI2_RAID_HOT_SPARE_POOL_4                      (0x10)
+#define MPI2_RAID_HOT_SPARE_POOL_5                      (0x20)
+#define MPI2_RAID_HOT_SPARE_POOL_6                      (0x40)
+#define MPI2_RAID_HOT_SPARE_POOL_7                      (0x80)
+
+/* RAID Volume Page 0 VolumeSettings defines */
+#define MPI2_RAIDVOL0_SETTING_USE_PRODUCT_ID_SUFFIX     (0x0008)
+#define MPI2_RAIDVOL0_SETTING_AUTO_CONFIG_HSWAP_DISABLE (0x0004)
+
+#define MPI2_RAIDVOL0_SETTING_MASK_WRITE_CACHING        (0x0003)
+#define MPI2_RAIDVOL0_SETTING_UNCHANGED                 (0x0000)
+#define MPI2_RAIDVOL0_SETTING_DISABLE_WRITE_CACHING     (0x0001)
+#define MPI2_RAIDVOL0_SETTING_ENABLE_WRITE_CACHING      (0x0002)
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumPhysDisks at runtime.
+ */
+#ifndef MPI2_RAID_VOL_PAGE_0_PHYSDISK_MAX
+#define MPI2_RAID_VOL_PAGE_0_PHYSDISK_MAX       (1)
+#endif
+
+typedef struct _MPI2_CONFIG_PAGE_RAID_VOL_0
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
+    U16                     DevHandle;                  /* 0x04 */
+    U8                      VolumeState;                /* 0x06 */
+    U8                      VolumeType;                 /* 0x07 */
+    U32                     VolumeStatusFlags;          /* 0x08 */
+    MPI2_RAIDVOL0_SETTINGS  VolumeSettings;             /* 0x0C */
+    U64                     MaxLBA;                     /* 0x10 */
+    U32                     StripeSize;                 /* 0x18 */
+    U16                     BlockSize;                  /* 0x1C */
+    U16                     Reserved1;                  /* 0x1E */
+    U8                      SupportedPhysDisks;         /* 0x20 */
+    U8                      ResyncRate;                 /* 0x21 */
+    U16                     DataScrubDuration;          /* 0x22 */
+    U8                      NumPhysDisks;               /* 0x24 */
+    U8                      Reserved2;                  /* 0x25 */
+    U8                      Reserved3;                  /* 0x26 */
+    U8                      InactiveStatus;             /* 0x27 */
+    MPI2_RAIDVOL0_PHYS_DISK PhysDisk[MPI2_RAID_VOL_PAGE_0_PHYSDISK_MAX]; /* 0x28 */
+} MPI2_CONFIG_PAGE_RAID_VOL_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_RAID_VOL_0,
+  Mpi2RaidVolPage0_t, MPI2_POINTER pMpi2RaidVolPage0_t;
+
+#define MPI2_RAIDVOLPAGE0_PAGEVERSION           (0x0A)
+
+/* values for RAID VolumeState */
+#define MPI2_RAID_VOL_STATE_MISSING                         (0x00)
+#define MPI2_RAID_VOL_STATE_FAILED                          (0x01)
+#define MPI2_RAID_VOL_STATE_INITIALIZING                    (0x02)
+#define MPI2_RAID_VOL_STATE_ONLINE                          (0x03)
+#define MPI2_RAID_VOL_STATE_DEGRADED                        (0x04)
+#define MPI2_RAID_VOL_STATE_OPTIMAL                         (0x05)
+
+/* values for RAID VolumeType */
+#define MPI2_RAID_VOL_TYPE_RAID0                            (0x00)
+#define MPI2_RAID_VOL_TYPE_RAID1E                           (0x01)
+#define MPI2_RAID_VOL_TYPE_RAID1                            (0x02)
+#define MPI2_RAID_VOL_TYPE_RAID10                           (0x05)
+#define MPI2_RAID_VOL_TYPE_UNKNOWN                          (0xFF)
+
+/* values for RAID Volume Page 0 VolumeStatusFlags field */
+#define MPI2_RAIDVOL0_STATUS_FLAG_PENDING_RESYNC            (0x02000000)
+#define MPI2_RAIDVOL0_STATUS_FLAG_BACKG_INIT_PENDING        (0x01000000)
+#define MPI2_RAIDVOL0_STATUS_FLAG_MDC_PENDING               (0x00800000)
+#define MPI2_RAIDVOL0_STATUS_FLAG_USER_CONSIST_PENDING      (0x00400000)
+#define MPI2_RAIDVOL0_STATUS_FLAG_MAKE_DATA_CONSISTENT      (0x00200000)
+#define MPI2_RAIDVOL0_STATUS_FLAG_DATA_SCRUB                (0x00100000)
+#define MPI2_RAIDVOL0_STATUS_FLAG_CONSISTENCY_CHECK         (0x00080000)
+#define MPI2_RAIDVOL0_STATUS_FLAG_CAPACITY_EXPANSION        (0x00040000)
+#define MPI2_RAIDVOL0_STATUS_FLAG_BACKGROUND_INIT           (0x00020000)
+#define MPI2_RAIDVOL0_STATUS_FLAG_RESYNC_IN_PROGRESS        (0x00010000)
+#define MPI2_RAIDVOL0_STATUS_FLAG_VOL_NOT_CONSISTENT        (0x00000080)
+#define MPI2_RAIDVOL0_STATUS_FLAG_OCE_ALLOWED               (0x00000040)
+#define MPI2_RAIDVOL0_STATUS_FLAG_BGI_COMPLETE              (0x00000020)
+#define MPI2_RAIDVOL0_STATUS_FLAG_1E_OFFSET_MIRROR          (0x00000000)
+#define MPI2_RAIDVOL0_STATUS_FLAG_1E_ADJACENT_MIRROR        (0x00000010)
+#define MPI2_RAIDVOL0_STATUS_FLAG_BAD_BLOCK_TABLE_FULL      (0x00000008)
+#define MPI2_RAIDVOL0_STATUS_FLAG_VOLUME_INACTIVE           (0x00000004)
+#define MPI2_RAIDVOL0_STATUS_FLAG_QUIESCED                  (0x00000002)
+#define MPI2_RAIDVOL0_STATUS_FLAG_ENABLED                   (0x00000001)
+
+/* values for RAID Volume Page 0 SupportedPhysDisks field */
+#define MPI2_RAIDVOL0_SUPPORT_SOLID_STATE_DISKS             (0x08)
+#define MPI2_RAIDVOL0_SUPPORT_HARD_DISKS                    (0x04)
+#define MPI2_RAIDVOL0_SUPPORT_SAS_PROTOCOL                  (0x02)
+#define MPI2_RAIDVOL0_SUPPORT_SATA_PROTOCOL                 (0x01)
+
+/* values for RAID Volume Page 0 InactiveStatus field */
+#define MPI2_RAIDVOLPAGE0_UNKNOWN_INACTIVE                  (0x00)
+#define MPI2_RAIDVOLPAGE0_STALE_METADATA_INACTIVE           (0x01)
+#define MPI2_RAIDVOLPAGE0_FOREIGN_VOLUME_INACTIVE           (0x02)
+#define MPI2_RAIDVOLPAGE0_INSUFFICIENT_RESOURCE_INACTIVE    (0x03)
+#define MPI2_RAIDVOLPAGE0_CLONE_VOLUME_INACTIVE             (0x04)
+#define MPI2_RAIDVOLPAGE0_INSUFFICIENT_METADATA_INACTIVE    (0x05)
+#define MPI2_RAIDVOLPAGE0_PREVIOUSLY_DELETED                (0x06)
+
+
+/* RAID Volume Page 1 */
+
+typedef struct _MPI2_CONFIG_PAGE_RAID_VOL_1
+{
+    MPI2_CONFIG_PAGE_HEADER Header;                     /* 0x00 */
+    U16                     DevHandle;                  /* 0x04 */
+    U16                     Reserved0;                  /* 0x06 */
+    U8                      GUID[24];                   /* 0x08 */
+    U8                      Name[16];                   /* 0x20 */
+    U64                     WWID;                       /* 0x30 */
+    U32                     Reserved1;                  /* 0x38 */
+    U32                     Reserved2;                  /* 0x3C */
+} MPI2_CONFIG_PAGE_RAID_VOL_1, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_RAID_VOL_1,
+  Mpi2RaidVolPage1_t, MPI2_POINTER pMpi2RaidVolPage1_t;
+
+#define MPI2_RAIDVOLPAGE1_PAGEVERSION           (0x03)
+
+
+/****************************************************************************
+*   RAID Physical Disk Config Pages
+****************************************************************************/
+
+/* RAID Physical Disk Page 0 */
+
+typedef struct _MPI2_RAIDPHYSDISK0_SETTINGS
+{
+    U16                     Reserved1;                  /* 0x00 */
+    U8                      HotSparePool;               /* 0x02 */
+    U8                      Reserved2;                  /* 0x03 */
+} MPI2_RAIDPHYSDISK0_SETTINGS, MPI2_POINTER PTR_MPI2_RAIDPHYSDISK0_SETTINGS,
+  Mpi2RaidPhysDisk0Settings_t, MPI2_POINTER pMpi2RaidPhysDisk0Settings_t;
+
+/* use MPI2_RAID_HOT_SPARE_POOL_ defines for the HotSparePool field */
+
+typedef struct _MPI2_RAIDPHYSDISK0_INQUIRY_DATA
+{
+    U8                      VendorID[8];                /* 0x00 */
+    U8                      ProductID[16];              /* 0x08 */
+    U8                      ProductRevLevel[4];         /* 0x18 */
+    U8                      SerialNum[32];              /* 0x1C */
+} MPI2_RAIDPHYSDISK0_INQUIRY_DATA,
+  MPI2_POINTER PTR_MPI2_RAIDPHYSDISK0_INQUIRY_DATA,
+  Mpi2RaidPhysDisk0InquiryData_t, MPI2_POINTER pMpi2RaidPhysDisk0InquiryData_t;
+
+typedef struct _MPI2_CONFIG_PAGE_RD_PDISK_0
+{
+    MPI2_CONFIG_PAGE_HEADER         Header;                     /* 0x00 */
+    U16                             DevHandle;                  /* 0x04 */
+    U8                              Reserved1;                  /* 0x06 */
+    U8                              PhysDiskNum;                /* 0x07 */
+    MPI2_RAIDPHYSDISK0_SETTINGS     PhysDiskSettings;           /* 0x08 */
+    U32                             Reserved2;                  /* 0x0C */
+    MPI2_RAIDPHYSDISK0_INQUIRY_DATA InquiryData;                /* 0x10 */
+    U32                             Reserved3;                  /* 0x4C */
+    U8                              PhysDiskState;              /* 0x50 */
+    U8                              OfflineReason;              /* 0x51 */
+    U8                              IncompatibleReason;         /* 0x52 */
+    U8                              PhysDiskAttributes;         /* 0x53 */
+    U32                             PhysDiskStatusFlags;        /* 0x54 */
+    U64                             DeviceMaxLBA;               /* 0x58 */
+    U64                             HostMaxLBA;                 /* 0x60 */
+    U64                             CoercedMaxLBA;              /* 0x68 */
+    U16                             BlockSize;                  /* 0x70 */
+    U16                             Reserved5;                  /* 0x72 */
+    U32                             Reserved6;                  /* 0x74 */
+} MPI2_CONFIG_PAGE_RD_PDISK_0,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_RD_PDISK_0,
+  Mpi2RaidPhysDiskPage0_t, MPI2_POINTER pMpi2RaidPhysDiskPage0_t;
+
+#define MPI2_RAIDPHYSDISKPAGE0_PAGEVERSION          (0x05)
+
+/* PhysDiskState defines */
+#define MPI2_RAID_PD_STATE_NOT_CONFIGURED               (0x00)
+#define MPI2_RAID_PD_STATE_NOT_COMPATIBLE               (0x01)
+#define MPI2_RAID_PD_STATE_OFFLINE                      (0x02)
+#define MPI2_RAID_PD_STATE_ONLINE                       (0x03)
+#define MPI2_RAID_PD_STATE_HOT_SPARE                    (0x04)
+#define MPI2_RAID_PD_STATE_DEGRADED                     (0x05)
+#define MPI2_RAID_PD_STATE_REBUILDING                   (0x06)
+#define MPI2_RAID_PD_STATE_OPTIMAL                      (0x07)
+
+/* OfflineReason defines */
+#define MPI2_PHYSDISK0_ONLINE                           (0x00)
+#define MPI2_PHYSDISK0_OFFLINE_MISSING                  (0x01)
+#define MPI2_PHYSDISK0_OFFLINE_FAILED                   (0x03)
+#define MPI2_PHYSDISK0_OFFLINE_INITIALIZING             (0x04)
+#define MPI2_PHYSDISK0_OFFLINE_REQUESTED                (0x05)
+#define MPI2_PHYSDISK0_OFFLINE_FAILED_REQUESTED         (0x06)
+#define MPI2_PHYSDISK0_OFFLINE_OTHER                    (0xFF)
+
+/* IncompatibleReason defines */
+#define MPI2_PHYSDISK0_COMPATIBLE                       (0x00)
+#define MPI2_PHYSDISK0_INCOMPATIBLE_PROTOCOL            (0x01)
+#define MPI2_PHYSDISK0_INCOMPATIBLE_BLOCKSIZE           (0x02)
+#define MPI2_PHYSDISK0_INCOMPATIBLE_MAX_LBA             (0x03)
+#define MPI2_PHYSDISK0_INCOMPATIBLE_SATA_EXTENDED_CMD   (0x04)
+#define MPI2_PHYSDISK0_INCOMPATIBLE_REMOVEABLE_MEDIA    (0x05)
+#define MPI2_PHYSDISK0_INCOMPATIBLE_MEDIA_TYPE          (0x06)
+#define MPI2_PHYSDISK0_INCOMPATIBLE_UNKNOWN             (0xFF)
+
+/* PhysDiskAttributes defines */
+#define MPI2_PHYSDISK0_ATTRIB_MEDIA_MASK                (0x0C)
+#define MPI2_PHYSDISK0_ATTRIB_SOLID_STATE_DRIVE         (0x08)
+#define MPI2_PHYSDISK0_ATTRIB_HARD_DISK_DRIVE           (0x04)
+
+#define MPI2_PHYSDISK0_ATTRIB_PROTOCOL_MASK             (0x03)
+#define MPI2_PHYSDISK0_ATTRIB_SAS_PROTOCOL              (0x02)
+#define MPI2_PHYSDISK0_ATTRIB_SATA_PROTOCOL             (0x01)
+
+/* PhysDiskStatusFlags defines */
+#define MPI2_PHYSDISK0_STATUS_FLAG_NOT_CERTIFIED        (0x00000040)
+#define MPI2_PHYSDISK0_STATUS_FLAG_OCE_TARGET           (0x00000020)
+#define MPI2_PHYSDISK0_STATUS_FLAG_WRITE_CACHE_ENABLED  (0x00000010)
+#define MPI2_PHYSDISK0_STATUS_FLAG_OPTIMAL_PREVIOUS     (0x00000000)
+#define MPI2_PHYSDISK0_STATUS_FLAG_NOT_OPTIMAL_PREVIOUS (0x00000008)
+#define MPI2_PHYSDISK0_STATUS_FLAG_INACTIVE_VOLUME      (0x00000004)
+#define MPI2_PHYSDISK0_STATUS_FLAG_QUIESCED             (0x00000002)
+#define MPI2_PHYSDISK0_STATUS_FLAG_OUT_OF_SYNC          (0x00000001)
+
+
+/* RAID Physical Disk Page 1 */
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumPhysDiskPaths at runtime.
+ */
+#ifndef MPI2_RAID_PHYS_DISK1_PATH_MAX
+#define MPI2_RAID_PHYS_DISK1_PATH_MAX   (1)
+#endif
+
+typedef struct _MPI2_RAIDPHYSDISK1_PATH
+{
+    U16             DevHandle;          /* 0x00 */
+    U16             Reserved1;          /* 0x02 */
+    U64             WWID;               /* 0x04 */
+    U64             OwnerWWID;          /* 0x0C */
+    U8              OwnerIdentifier;    /* 0x14 */
+    U8              Reserved2;          /* 0x15 */
+    U16             Flags;              /* 0x16 */
+} MPI2_RAIDPHYSDISK1_PATH, MPI2_POINTER PTR_MPI2_RAIDPHYSDISK1_PATH,
+  Mpi2RaidPhysDisk1Path_t, MPI2_POINTER pMpi2RaidPhysDisk1Path_t;
+
+/* RAID Physical Disk Page 1 Physical Disk Path Flags field defines */
+#define MPI2_RAID_PHYSDISK1_FLAG_PRIMARY        (0x0004)
+#define MPI2_RAID_PHYSDISK1_FLAG_BROKEN         (0x0002)
+#define MPI2_RAID_PHYSDISK1_FLAG_INVALID        (0x0001)
+
+typedef struct _MPI2_CONFIG_PAGE_RD_PDISK_1
+{
+    MPI2_CONFIG_PAGE_HEADER         Header;                     /* 0x00 */
+    U8                              NumPhysDiskPaths;           /* 0x04 */
+    U8                              PhysDiskNum;                /* 0x05 */
+    U16                             Reserved1;                  /* 0x06 */
+    U32                             Reserved2;                  /* 0x08 */
+    MPI2_RAIDPHYSDISK1_PATH         PhysicalDiskPath[MPI2_RAID_PHYS_DISK1_PATH_MAX];/* 0x0C */
+} MPI2_CONFIG_PAGE_RD_PDISK_1,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_RD_PDISK_1,
+  Mpi2RaidPhysDiskPage1_t, MPI2_POINTER pMpi2RaidPhysDiskPage1_t;
+
+#define MPI2_RAIDPHYSDISKPAGE1_PAGEVERSION          (0x02)
+
+
+/****************************************************************************
+*   values for fields used by several types of SAS Config Pages
+****************************************************************************/
+
+/* values for NegotiatedLinkRates fields */
+#define MPI2_SAS_NEG_LINK_RATE_MASK_LOGICAL             (0xF0)
+#define MPI2_SAS_NEG_LINK_RATE_SHIFT_LOGICAL            (4)
+#define MPI2_SAS_NEG_LINK_RATE_MASK_PHYSICAL            (0x0F)
+/* link rates used for Negotiated Physical and Logical Link Rate */
+#define MPI2_SAS_NEG_LINK_RATE_UNKNOWN_LINK_RATE        (0x00)
+#define MPI2_SAS_NEG_LINK_RATE_PHY_DISABLED             (0x01)
+#define MPI2_SAS_NEG_LINK_RATE_NEGOTIATION_FAILED       (0x02)
+#define MPI2_SAS_NEG_LINK_RATE_SATA_OOB_COMPLETE        (0x03)
+#define MPI2_SAS_NEG_LINK_RATE_PORT_SELECTOR            (0x04)
+#define MPI2_SAS_NEG_LINK_RATE_SMP_RESET_IN_PROGRESS    (0x05)
+#define MPI2_SAS_NEG_LINK_RATE_UNSUPPORTED_PHY          (0x06)
+#define MPI2_SAS_NEG_LINK_RATE_1_5                      (0x08)
+#define MPI2_SAS_NEG_LINK_RATE_3_0                      (0x09)
+#define MPI2_SAS_NEG_LINK_RATE_6_0                      (0x0A)
+#define MPI25_SAS_NEG_LINK_RATE_12_0                    (0x0B)
+#define MPI26_SAS_NEG_LINK_RATE_22_5                    (0x0C)
+
+
+/* values for AttachedPhyInfo fields */
+#define MPI2_SAS_APHYINFO_INSIDE_ZPSDS_PERSISTENT       (0x00000040)
+#define MPI2_SAS_APHYINFO_REQUESTED_INSIDE_ZPSDS        (0x00000020)
+#define MPI2_SAS_APHYINFO_BREAK_REPLY_CAPABLE           (0x00000010)
+
+#define MPI2_SAS_APHYINFO_REASON_MASK                   (0x0000000F)
+#define MPI2_SAS_APHYINFO_REASON_UNKNOWN                (0x00000000)
+#define MPI2_SAS_APHYINFO_REASON_POWER_ON               (0x00000001)
+#define MPI2_SAS_APHYINFO_REASON_HARD_RESET             (0x00000002)
+#define MPI2_SAS_APHYINFO_REASON_SMP_PHY_CONTROL        (0x00000003)
+#define MPI2_SAS_APHYINFO_REASON_LOSS_OF_SYNC           (0x00000004)
+#define MPI2_SAS_APHYINFO_REASON_MULTIPLEXING_SEQ       (0x00000005)
+#define MPI2_SAS_APHYINFO_REASON_IT_NEXUS_LOSS_TIMER    (0x00000006)
+#define MPI2_SAS_APHYINFO_REASON_BREAK_TIMEOUT          (0x00000007)
+#define MPI2_SAS_APHYINFO_REASON_PHY_TEST_STOPPED       (0x00000008)
+
+
+/* values for PhyInfo fields */
+#define MPI2_SAS_PHYINFO_PHY_VACANT                     (0x80000000)
+
+#define MPI2_SAS_PHYINFO_PHY_POWER_CONDITION_MASK       (0x18000000)
+#define MPI2_SAS_PHYINFO_SHIFT_PHY_POWER_CONDITION      (27)
+#define MPI2_SAS_PHYINFO_PHY_POWER_ACTIVE               (0x00000000)
+#define MPI2_SAS_PHYINFO_PHY_POWER_PARTIAL              (0x08000000)
+#define MPI2_SAS_PHYINFO_PHY_POWER_SLUMBER              (0x10000000)
+
+#define MPI2_SAS_PHYINFO_CHANGED_REQ_INSIDE_ZPSDS       (0x04000000)
+#define MPI2_SAS_PHYINFO_INSIDE_ZPSDS_PERSISTENT        (0x02000000)
+#define MPI2_SAS_PHYINFO_REQ_INSIDE_ZPSDS               (0x01000000)
+#define MPI2_SAS_PHYINFO_ZONE_GROUP_PERSISTENT          (0x00400000)
+#define MPI2_SAS_PHYINFO_INSIDE_ZPSDS                   (0x00200000)
+#define MPI2_SAS_PHYINFO_ZONING_ENABLED                 (0x00100000)
+
+#define MPI2_SAS_PHYINFO_REASON_MASK                    (0x000F0000)
+#define MPI2_SAS_PHYINFO_REASON_UNKNOWN                 (0x00000000)
+#define MPI2_SAS_PHYINFO_REASON_POWER_ON                (0x00010000)
+#define MPI2_SAS_PHYINFO_REASON_HARD_RESET              (0x00020000)
+#define MPI2_SAS_PHYINFO_REASON_SMP_PHY_CONTROL         (0x00030000)
+#define MPI2_SAS_PHYINFO_REASON_LOSS_OF_SYNC            (0x00040000)
+#define MPI2_SAS_PHYINFO_REASON_MULTIPLEXING_SEQ        (0x00050000)
+#define MPI2_SAS_PHYINFO_REASON_IT_NEXUS_LOSS_TIMER     (0x00060000)
+#define MPI2_SAS_PHYINFO_REASON_BREAK_TIMEOUT           (0x00070000)
+#define MPI2_SAS_PHYINFO_REASON_PHY_TEST_STOPPED        (0x00080000)
+
+#define MPI2_SAS_PHYINFO_MULTIPLEXING_SUPPORTED         (0x00008000)
+#define MPI2_SAS_PHYINFO_SATA_PORT_ACTIVE               (0x00004000)
+#define MPI2_SAS_PHYINFO_SATA_PORT_SELECTOR_PRESENT     (0x00002000)
+#define MPI2_SAS_PHYINFO_VIRTUAL_PHY                    (0x00001000)
+
+#define MPI2_SAS_PHYINFO_MASK_PARTIAL_PATHWAY_TIME      (0x00000F00)
+#define MPI2_SAS_PHYINFO_SHIFT_PARTIAL_PATHWAY_TIME     (8)
+
+#define MPI2_SAS_PHYINFO_MASK_ROUTING_ATTRIBUTE         (0x000000F0)
+#define MPI2_SAS_PHYINFO_DIRECT_ROUTING                 (0x00000000)
+#define MPI2_SAS_PHYINFO_SUBTRACTIVE_ROUTING            (0x00000010)
+#define MPI2_SAS_PHYINFO_TABLE_ROUTING                  (0x00000020)
+
+
+/* values for SAS ProgrammedLinkRate fields */
+#define MPI2_SAS_PRATE_MAX_RATE_MASK                    (0xF0)
+#define MPI2_SAS_PRATE_MAX_RATE_NOT_PROGRAMMABLE        (0x00)
+#define MPI2_SAS_PRATE_MAX_RATE_1_5                     (0x80)
+#define MPI2_SAS_PRATE_MAX_RATE_3_0                     (0x90)
+#define MPI2_SAS_PRATE_MAX_RATE_6_0                     (0xA0)
+#define MPI25_SAS_PRATE_MAX_RATE_12_0                   (0xB0)
+#define MPI26_SAS_PRATE_MAX_RATE_22_5                   (0xC0)
+#define MPI2_SAS_PRATE_MIN_RATE_MASK                    (0x0F)
+#define MPI2_SAS_PRATE_MIN_RATE_NOT_PROGRAMMABLE        (0x00)
+#define MPI2_SAS_PRATE_MIN_RATE_1_5                     (0x08)
+#define MPI2_SAS_PRATE_MIN_RATE_3_0                     (0x09)
+#define MPI2_SAS_PRATE_MIN_RATE_6_0                     (0x0A)
+#define MPI25_SAS_PRATE_MIN_RATE_12_0                   (0x0B)
+#define MPI26_SAS_PRATE_MIN_RATE_22_5                   (0x0C)
+
+
+/* values for SAS HwLinkRate fields */
+#define MPI2_SAS_HWRATE_MAX_RATE_MASK                   (0xF0)
+#define MPI2_SAS_HWRATE_MAX_RATE_1_5                    (0x80)
+#define MPI2_SAS_HWRATE_MAX_RATE_3_0                    (0x90)
+#define MPI2_SAS_HWRATE_MAX_RATE_6_0                    (0xA0)
+#define MPI25_SAS_HWRATE_MAX_RATE_12_0                  (0xB0)
+#define MPI26_SAS_HWRATE_MAX_RATE_22_5                  (0xC0)
+#define MPI2_SAS_HWRATE_MIN_RATE_MASK                   (0x0F)
+#define MPI2_SAS_HWRATE_MIN_RATE_1_5                    (0x08)
+#define MPI2_SAS_HWRATE_MIN_RATE_3_0                    (0x09)
+#define MPI2_SAS_HWRATE_MIN_RATE_6_0                    (0x0A)
+#define MPI25_SAS_HWRATE_MIN_RATE_12_0                  (0x0B)
+#define MPI26_SAS_HWRATE_MIN_RATE_22_5                  (0x0C)
+
+
+
+/****************************************************************************
+*   SAS IO Unit Config Pages
+****************************************************************************/
+
+/* SAS IO Unit Page 0 */
+
+typedef struct _MPI2_SAS_IO_UNIT0_PHY_DATA
+{
+    U8          Port;                   /* 0x00 */
+    U8          PortFlags;              /* 0x01 */
+    U8          PhyFlags;               /* 0x02 */
+    U8          NegotiatedLinkRate;     /* 0x03 */
+    U32         ControllerPhyDeviceInfo;/* 0x04 */
+    U16         AttachedDevHandle;      /* 0x08 */
+    U16         ControllerDevHandle;    /* 0x0A */
+    U32         DiscoveryStatus;        /* 0x0C */
+    U32         Reserved;               /* 0x10 */
+} MPI2_SAS_IO_UNIT0_PHY_DATA, MPI2_POINTER PTR_MPI2_SAS_IO_UNIT0_PHY_DATA,
+  Mpi2SasIOUnit0PhyData_t, MPI2_POINTER pMpi2SasIOUnit0PhyData_t;
+
+#define MPI26_SASIOUNIT0_PHY_PORT_NO_PORT                   (0xFF)
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumPhys at runtime.
+ */
+#ifndef MPI2_SAS_IOUNIT0_PHY_MAX
+#define MPI2_SAS_IOUNIT0_PHY_MAX        (1)
+#endif
+
+typedef struct _MPI2_CONFIG_PAGE_SASIOUNIT_0
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                             /* 0x00 */
+    U32                                 Reserved1;                          /* 0x08 */
+    U8                                  NumPhys;                            /* 0x0C */
+    U8                                  Reserved2;                          /* 0x0D */
+    U16                                 Reserved3;                          /* 0x0E */
+    MPI2_SAS_IO_UNIT0_PHY_DATA          PhyData[MPI2_SAS_IOUNIT0_PHY_MAX];  /* 0x10 */
+} MPI2_CONFIG_PAGE_SASIOUNIT_0,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SASIOUNIT_0,
+  Mpi2SasIOUnitPage0_t, MPI2_POINTER pMpi2SasIOUnitPage0_t;
+
+#define MPI2_SASIOUNITPAGE0_PAGEVERSION                     (0x05)
+
+/* values for SAS IO Unit Page 0 PortFlags */
+#define MPI2_SASIOUNIT0_PORTFLAGS_DISCOVERY_IN_PROGRESS     (0x08)
+#define MPI2_SASIOUNIT0_PORTFLAGS_AUTO_PORT_CONFIG          (0x01)
+
+/* values for SAS IO Unit Page 0 PhyFlags */
+#define MPI2_SASIOUNIT0_PHYFLAGS_INIT_PERSIST_CONNECT       (0x40)
+#define MPI2_SASIOUNIT0_PHYFLAGS_TARG_PERSIST_CONNECT       (0x20)
+#define MPI2_SASIOUNIT0_PHYFLAGS_ZONING_ENABLED             (0x10)
+#define MPI2_SASIOUNIT0_PHYFLAGS_PHY_DISABLED               (0x08)
+
+/* use MPI2_SAS_NEG_LINK_RATE_ defines for the NegotiatedLinkRate field */
+
+/* see mpi2_sas.h for values for SAS IO Unit Page 0 ControllerPhyDeviceInfo values */
+
+/* values for SAS IO Unit Page 0 DiscoveryStatus */
+#define MPI2_SASIOUNIT0_DS_MAX_ENCLOSURES_EXCEED            (0x80000000)
+#define MPI2_SASIOUNIT0_DS_MAX_EXPANDERS_EXCEED             (0x40000000)
+#define MPI2_SASIOUNIT0_DS_MAX_DEVICES_EXCEED               (0x20000000)
+#define MPI2_SASIOUNIT0_DS_MAX_TOPO_PHYS_EXCEED             (0x10000000)
+#define MPI2_SASIOUNIT0_DS_DOWNSTREAM_INITIATOR             (0x08000000)
+#define MPI2_SASIOUNIT0_DS_MULTI_SUBTRACTIVE_SUBTRACTIVE    (0x00008000)
+#define MPI2_SASIOUNIT0_DS_EXP_MULTI_SUBTRACTIVE            (0x00004000)
+#define MPI2_SASIOUNIT0_DS_MULTI_PORT_DOMAIN                (0x00002000)
+#define MPI2_SASIOUNIT0_DS_TABLE_TO_SUBTRACTIVE_LINK        (0x00001000)
+#define MPI2_SASIOUNIT0_DS_UNSUPPORTED_DEVICE               (0x00000800)
+#define MPI2_SASIOUNIT0_DS_TABLE_LINK                       (0x00000400)
+#define MPI2_SASIOUNIT0_DS_SUBTRACTIVE_LINK                 (0x00000200)
+#define MPI2_SASIOUNIT0_DS_SMP_CRC_ERROR                    (0x00000100)
+#define MPI2_SASIOUNIT0_DS_SMP_FUNCTION_FAILED              (0x00000080)
+#define MPI2_SASIOUNIT0_DS_INDEX_NOT_EXIST                  (0x00000040)
+#define MPI2_SASIOUNIT0_DS_OUT_ROUTE_ENTRIES                (0x00000020)
+#define MPI2_SASIOUNIT0_DS_SMP_TIMEOUT                      (0x00000010)
+#define MPI2_SASIOUNIT0_DS_MULTIPLE_PORTS                   (0x00000004)
+#define MPI2_SASIOUNIT0_DS_UNADDRESSABLE_DEVICE             (0x00000002)
+#define MPI2_SASIOUNIT0_DS_LOOP_DETECTED                    (0x00000001)
+
+
+/* SAS IO Unit Page 1 */
+
+typedef struct _MPI2_SAS_IO_UNIT1_PHY_DATA
+{
+    U8          Port;                       /* 0x00 */
+    U8          PortFlags;                  /* 0x01 */
+    U8          PhyFlags;                   /* 0x02 */
+    U8          MaxMinLinkRate;             /* 0x03 */
+    U32         ControllerPhyDeviceInfo;    /* 0x04 */
+    U16         MaxTargetPortConnectTime;   /* 0x08 */
+    U16         Reserved1;                  /* 0x0A */
+} MPI2_SAS_IO_UNIT1_PHY_DATA, MPI2_POINTER PTR_MPI2_SAS_IO_UNIT1_PHY_DATA,
+  Mpi2SasIOUnit1PhyData_t, MPI2_POINTER pMpi2SasIOUnit1PhyData_t;
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumPhys at runtime.
+ */
+#ifndef MPI2_SAS_IOUNIT1_PHY_MAX
+#define MPI2_SAS_IOUNIT1_PHY_MAX        (1)
+#endif
+
+typedef struct _MPI2_CONFIG_PAGE_SASIOUNIT_1
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                             /* 0x00 */
+    U16                                 ControlFlags;                       /* 0x08 */
+    U16                                 SASNarrowMaxQueueDepth;             /* 0x0A */
+    U16                                 AdditionalControlFlags;             /* 0x0C */
+    U16                                 SASWideMaxQueueDepth;               /* 0x0E */
+    U8                                  NumPhys;                            /* 0x10 */
+    U8                                  SATAMaxQDepth;                      /* 0x11 */
+    U8                                  ReportDeviceMissingDelay;           /* 0x12 */
+    U8                                  IODeviceMissingDelay;               /* 0x13 */
+    MPI2_SAS_IO_UNIT1_PHY_DATA          PhyData[MPI2_SAS_IOUNIT1_PHY_MAX];  /* 0x14 */
+} MPI2_CONFIG_PAGE_SASIOUNIT_1,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SASIOUNIT_1,
+  Mpi2SasIOUnitPage1_t, MPI2_POINTER pMpi2SasIOUnitPage1_t;
+
+#define MPI2_SASIOUNITPAGE1_PAGEVERSION     (0x09)
+
+/* values for SAS IO Unit Page 1 ControlFlags */
+#define MPI2_SASIOUNIT1_CONTROL_DEVICE_SELF_TEST                    (0x8000)
+#define MPI2_SASIOUNIT1_CONTROL_SATA_3_0_MAX                        (0x4000)
+#define MPI2_SASIOUNIT1_CONTROL_SATA_1_5_MAX                        (0x2000) /* MPI v2.0 only. Obsolete in MPI v2.5 and later. */
+#define MPI2_SASIOUNIT1_CONTROL_SATA_SW_PRESERVE                    (0x1000)
+
+#define MPI2_SASIOUNIT1_CONTROL_MASK_DEV_SUPPORT                    (0x0600)
+#define MPI2_SASIOUNIT1_CONTROL_SHIFT_DEV_SUPPORT                   (9)
+#define MPI2_SASIOUNIT1_CONTROL_DEV_SUPPORT_BOTH                    (0x0)
+#define MPI2_SASIOUNIT1_CONTROL_DEV_SAS_SUPPORT                     (0x1)
+#define MPI2_SASIOUNIT1_CONTROL_DEV_SATA_SUPPORT                    (0x2)
+
+#define MPI2_SASIOUNIT1_CONTROL_SATA_48BIT_LBA_REQUIRED             (0x0080)
+#define MPI2_SASIOUNIT1_CONTROL_SATA_SMART_REQUIRED                 (0x0040)
+#define MPI2_SASIOUNIT1_CONTROL_SATA_NCQ_REQUIRED                   (0x0020)
+#define MPI2_SASIOUNIT1_CONTROL_SATA_FUA_REQUIRED                   (0x0010)
+#define MPI2_SASIOUNIT1_CONTROL_TABLE_SUBTRACTIVE_ILLEGAL           (0x0008)
+#define MPI2_SASIOUNIT1_CONTROL_SUBTRACTIVE_ILLEGAL                 (0x0004)
+#define MPI2_SASIOUNIT1_CONTROL_FIRST_LVL_DISC_ONLY                 (0x0002)
+#define MPI2_SASIOUNIT1_CONTROL_CLEAR_AFFILIATION                   (0x0001) /* MPI v2.0 only. Obsolete in MPI v2.5 and later. */
+
+/* values for SAS IO Unit Page 1 AdditionalControlFlags */
+#define MPI2_SASIOUNIT1_ACONTROL_DA_PERSIST_CONNECT                 (0x0100)
+#define MPI2_SASIOUNIT1_ACONTROL_MULTI_PORT_DOMAIN_ILLEGAL          (0x0080)
+#define MPI2_SASIOUNIT1_ACONTROL_SATA_ASYNCHROUNOUS_NOTIFICATION    (0x0040)
+#define MPI2_SASIOUNIT1_ACONTROL_INVALID_TOPOLOGY_CORRECTION        (0x0020)
+#define MPI2_SASIOUNIT1_ACONTROL_PORT_ENABLE_ONLY_SATA_LINK_RESET   (0x0010)
+#define MPI2_SASIOUNIT1_ACONTROL_OTHER_AFFILIATION_SATA_LINK_RESET  (0x0008)
+#define MPI2_SASIOUNIT1_ACONTROL_SELF_AFFILIATION_SATA_LINK_RESET   (0x0004)
+#define MPI2_SASIOUNIT1_ACONTROL_NO_AFFILIATION_SATA_LINK_RESET     (0x0002)
+#define MPI2_SASIOUNIT1_ACONTROL_ALLOW_TABLE_TO_TABLE               (0x0001)
+
+/* defines for SAS IO Unit Page 1 ReportDeviceMissingDelay */
+#define MPI2_SASIOUNIT1_REPORT_MISSING_TIMEOUT_MASK                 (0x7F)
+#define MPI2_SASIOUNIT1_REPORT_MISSING_UNIT_16                      (0x80)
+
+/* values for SAS IO Unit Page 1 PortFlags */
+#define MPI2_SASIOUNIT1_PORT_FLAGS_AUTO_PORT_CONFIG                 (0x01)
+
+/* values for SAS IO Unit Page 1 PhyFlags */
+#define MPI2_SASIOUNIT1_PHYFLAGS_INIT_PERSIST_CONNECT               (0x40)
+#define MPI2_SASIOUNIT1_PHYFLAGS_TARG_PERSIST_CONNECT               (0x20)
+#define MPI2_SASIOUNIT1_PHYFLAGS_ZONING_ENABLE                      (0x10)
+#define MPI2_SASIOUNIT1_PHYFLAGS_PHY_DISABLE                        (0x08)
+
+/* values for SAS IO Unit Page 1 MaxMinLinkRate */
+#define MPI2_SASIOUNIT1_MAX_RATE_MASK                               (0xF0)
+#define MPI2_SASIOUNIT1_MAX_RATE_1_5                                (0x80)
+#define MPI2_SASIOUNIT1_MAX_RATE_3_0                                (0x90)
+#define MPI2_SASIOUNIT1_MAX_RATE_6_0                                (0xA0)
+#define MPI25_SASIOUNIT1_MAX_RATE_12_0                              (0xB0)
+#define MPI26_SASIOUNIT1_MAX_RATE_22_5                              (0xC0)
+#define MPI2_SASIOUNIT1_MIN_RATE_MASK                               (0x0F)
+#define MPI2_SASIOUNIT1_MIN_RATE_1_5                                (0x08)
+#define MPI2_SASIOUNIT1_MIN_RATE_3_0                                (0x09)
+#define MPI2_SASIOUNIT1_MIN_RATE_6_0                                (0x0A)
+#define MPI25_SASIOUNIT1_MIN_RATE_12_0                              (0x0B)
+#define MPI26_SASIOUNIT1_MIN_RATE_22_5                              (0x0C)
+
+/* see mpi2_sas.h for values for SAS IO Unit Page 1 ControllerPhyDeviceInfo values */
+
+
+/* SAS IO Unit Page 4 (for MPI v2.5 and earlier) */
+
+typedef struct _MPI2_SAS_IOUNIT4_SPINUP_GROUP
+{
+    U8          MaxTargetSpinup;            /* 0x00 */
+    U8          SpinupDelay;                /* 0x01 */
+    U8          SpinupFlags;                /* 0x02 */
+    U8          Reserved1;                  /* 0x03 */
+} MPI2_SAS_IOUNIT4_SPINUP_GROUP, MPI2_POINTER PTR_MPI2_SAS_IOUNIT4_SPINUP_GROUP,
+  Mpi2SasIOUnit4SpinupGroup_t, MPI2_POINTER pMpi2SasIOUnit4SpinupGroup_t;
+
+/* defines for SAS IO Unit Page 4 SpinupFlags */
+#define MPI2_SASIOUNIT4_SPINUP_DISABLE_FLAG         (0x01)
+
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumPhys at runtime.
+ */
+#ifndef MPI2_SAS_IOUNIT4_PHY_MAX
+#define MPI2_SAS_IOUNIT4_PHY_MAX        (4)
+#endif
+
+typedef struct _MPI2_CONFIG_PAGE_SASIOUNIT_4
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                         /* 0x00 */
+    MPI2_SAS_IOUNIT4_SPINUP_GROUP       SpinupGroupParameters[4];       /* 0x08 */
+    U32                                 Reserved1;                      /* 0x18 */
+    U32                                 Reserved2;                      /* 0x1C */
+    U32                                 Reserved3;                      /* 0x20 */
+    U8                                  BootDeviceWaitTime;             /* 0x24 */
+    U8                                  SATADeviceWaitTime;             /* 0x25 */
+    U16                                 Reserved5;                      /* 0x26 */
+    U8                                  NumPhys;                        /* 0x28 */
+    U8                                  PEInitialSpinupDelay;           /* 0x29 */
+    U8                                  PEReplyDelay;                   /* 0x2A */
+    U8                                  Flags;                          /* 0x2B */
+    U8                                  PHY[MPI2_SAS_IOUNIT4_PHY_MAX];  /* 0x2C */
+} MPI2_CONFIG_PAGE_SASIOUNIT_4,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SASIOUNIT_4,
+  Mpi2SasIOUnitPage4_t, MPI2_POINTER pMpi2SasIOUnitPage4_t;
+
+#define MPI2_SASIOUNITPAGE4_PAGEVERSION     (0x02)
+
+/* defines for Flags field */
+#define MPI2_SASIOUNIT4_FLAGS_AUTO_PORTENABLE               (0x01)
+
+/* defines for PHY field */
+#define MPI2_SASIOUNIT4_PHY_SPINUP_GROUP_MASK               (0x03)
+
+
+/* SAS IO Unit Page 5 */
+
+typedef struct _MPI2_SAS_IO_UNIT5_PHY_PM_SETTINGS
+{
+    U8          ControlFlags;               /* 0x00 */
+    U8          PortWidthModGroup;          /* 0x01 */
+    U16         InactivityTimerExponent;    /* 0x02 */
+    U8          SATAPartialTimeout;         /* 0x04 */
+    U8          Reserved2;                  /* 0x05 */
+    U8          SATASlumberTimeout;         /* 0x06 */
+    U8          Reserved3;                  /* 0x07 */
+    U8          SASPartialTimeout;          /* 0x08 */
+    U8          Reserved4;                  /* 0x09 */
+    U8          SASSlumberTimeout;          /* 0x0A */
+    U8          Reserved5;                  /* 0x0B */
+} MPI2_SAS_IO_UNIT5_PHY_PM_SETTINGS,
+  MPI2_POINTER PTR_MPI2_SAS_IO_UNIT5_PHY_PM_SETTINGS,
+  Mpi2SasIOUnit5PhyPmSettings_t, MPI2_POINTER pMpi2SasIOUnit5PhyPmSettings_t;
+
+/* defines for ControlFlags field */
+#define MPI2_SASIOUNIT5_CONTROL_SAS_SLUMBER_ENABLE      (0x08)
+#define MPI2_SASIOUNIT5_CONTROL_SAS_PARTIAL_ENABLE      (0x04)
+#define MPI2_SASIOUNIT5_CONTROL_SATA_SLUMBER_ENABLE     (0x02)
+#define MPI2_SASIOUNIT5_CONTROL_SATA_PARTIAL_ENABLE     (0x01)
+
+/* defines for PortWidthModeGroup field */
+#define MPI2_SASIOUNIT5_PWMG_DISABLE                    (0xFF)
+
+/* defines for InactivityTimerExponent field */
+#define MPI2_SASIOUNIT5_ITE_MASK_SAS_SLUMBER            (0x7000)
+#define MPI2_SASIOUNIT5_ITE_SHIFT_SAS_SLUMBER           (12)
+#define MPI2_SASIOUNIT5_ITE_MASK_SAS_PARTIAL            (0x0700)
+#define MPI2_SASIOUNIT5_ITE_SHIFT_SAS_PARTIAL           (8)
+#define MPI2_SASIOUNIT5_ITE_MASK_SATA_SLUMBER           (0x0070)
+#define MPI2_SASIOUNIT5_ITE_SHIFT_SATA_SLUMBER          (4)
+#define MPI2_SASIOUNIT5_ITE_MASK_SATA_PARTIAL           (0x0007)
+#define MPI2_SASIOUNIT5_ITE_SHIFT_SATA_PARTIAL          (0)
+
+#define MPI2_SASIOUNIT5_ITE_TEN_SECONDS                 (7)
+#define MPI2_SASIOUNIT5_ITE_ONE_SECOND                  (6)
+#define MPI2_SASIOUNIT5_ITE_HUNDRED_MILLISECONDS        (5)
+#define MPI2_SASIOUNIT5_ITE_TEN_MILLISECONDS            (4)
+#define MPI2_SASIOUNIT5_ITE_ONE_MILLISECOND             (3)
+#define MPI2_SASIOUNIT5_ITE_HUNDRED_MICROSECONDS        (2)
+#define MPI2_SASIOUNIT5_ITE_TEN_MICROSECONDS            (1)
+#define MPI2_SASIOUNIT5_ITE_ONE_MICROSECOND             (0)
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumPhys at runtime.
+ */
+#ifndef MPI2_SAS_IOUNIT5_PHY_MAX
+#define MPI2_SAS_IOUNIT5_PHY_MAX        (1)
+#endif
+
+typedef struct _MPI2_CONFIG_PAGE_SASIOUNIT_5
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                             /* 0x00 */
+    U8                                  NumPhys;                            /* 0x08 */
+    U8                                  Reserved1;                          /* 0x09 */
+    U16                                 Reserved2;                          /* 0x0A */
+    U32                                 Reserved3;                          /* 0x0C */
+    MPI2_SAS_IO_UNIT5_PHY_PM_SETTINGS   SASPhyPowerManagementSettings[MPI2_SAS_IOUNIT5_PHY_MAX];  /* 0x10 */
+} MPI2_CONFIG_PAGE_SASIOUNIT_5,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SASIOUNIT_5,
+  Mpi2SasIOUnitPage5_t, MPI2_POINTER pMpi2SasIOUnitPage5_t;
+
+#define MPI2_SASIOUNITPAGE5_PAGEVERSION     (0x01)
+
+
+/* SAS IO Unit Page 6 */
+
+typedef struct _MPI2_SAS_IO_UNIT6_PORT_WIDTH_MOD_GROUP_STATUS
+{
+    U8          CurrentStatus;              /* 0x00 */
+    U8          CurrentModulation;          /* 0x01 */
+    U8          CurrentUtilization;         /* 0x02 */
+    U8          Reserved1;                  /* 0x03 */
+    U32         Reserved2;                  /* 0x04 */
+} MPI2_SAS_IO_UNIT6_PORT_WIDTH_MOD_GROUP_STATUS,
+  MPI2_POINTER PTR_MPI2_SAS_IO_UNIT6_PORT_WIDTH_MOD_GROUP_STATUS,
+  Mpi2SasIOUnit6PortWidthModGroupStatus_t,
+  MPI2_POINTER pMpi2SasIOUnit6PortWidthModGroupStatus_t;
+
+/* defines for CurrentStatus field */
+#define MPI2_SASIOUNIT6_STATUS_UNAVAILABLE                      (0x00)
+#define MPI2_SASIOUNIT6_STATUS_UNCONFIGURED                     (0x01)
+#define MPI2_SASIOUNIT6_STATUS_INVALID_CONFIG                   (0x02)
+#define MPI2_SASIOUNIT6_STATUS_LINK_DOWN                        (0x03)
+#define MPI2_SASIOUNIT6_STATUS_OBSERVATION_ONLY                 (0x04)
+#define MPI2_SASIOUNIT6_STATUS_INACTIVE                         (0x05)
+#define MPI2_SASIOUNIT6_STATUS_ACTIVE_IOUNIT                    (0x06)
+#define MPI2_SASIOUNIT6_STATUS_ACTIVE_HOST                      (0x07)
+
+/* defines for CurrentModulation field */
+#define MPI2_SASIOUNIT6_MODULATION_25_PERCENT                   (0x00)
+#define MPI2_SASIOUNIT6_MODULATION_50_PERCENT                   (0x01)
+#define MPI2_SASIOUNIT6_MODULATION_75_PERCENT                   (0x02)
+#define MPI2_SASIOUNIT6_MODULATION_100_PERCENT                  (0x03)
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumGroups at runtime.
+ */
+#ifndef MPI2_SAS_IOUNIT6_GROUP_MAX
+#define MPI2_SAS_IOUNIT6_GROUP_MAX      (1)
+#endif
+
+typedef struct _MPI2_CONFIG_PAGE_SASIOUNIT_6
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
+    U32                                 Reserved1;                  /* 0x08 */
+    U32                                 Reserved2;                  /* 0x0C */
+    U8                                  NumGroups;                  /* 0x10 */
+    U8                                  Reserved3;                  /* 0x11 */
+    U16                                 Reserved4;                  /* 0x12 */
+    MPI2_SAS_IO_UNIT6_PORT_WIDTH_MOD_GROUP_STATUS
+        PortWidthModulationGroupStatus[MPI2_SAS_IOUNIT6_GROUP_MAX]; /* 0x14 */
+} MPI2_CONFIG_PAGE_SASIOUNIT_6,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SASIOUNIT_6,
+  Mpi2SasIOUnitPage6_t, MPI2_POINTER pMpi2SasIOUnitPage6_t;
+
+#define MPI2_SASIOUNITPAGE6_PAGEVERSION     (0x00)
+
+
+/* SAS IO Unit Page 7 */
+
+typedef struct _MPI2_SAS_IO_UNIT7_PORT_WIDTH_MOD_GROUP_SETTINGS
+{
+    U8          Flags;                      /* 0x00 */
+    U8          Reserved1;                  /* 0x01 */
+    U16         Reserved2;                  /* 0x02 */
+    U8          Threshold75Pct;             /* 0x04 */
+    U8          Threshold50Pct;             /* 0x05 */
+    U8          Threshold25Pct;             /* 0x06 */
+    U8          Reserved3;                  /* 0x07 */
+} MPI2_SAS_IO_UNIT7_PORT_WIDTH_MOD_GROUP_SETTINGS,
+  MPI2_POINTER PTR_MPI2_SAS_IO_UNIT7_PORT_WIDTH_MOD_GROUP_SETTINGS,
+  Mpi2SasIOUnit7PortWidthModGroupSettings_t,
+  MPI2_POINTER pMpi2SasIOUnit7PortWidthModGroupSettings_t;
+
+/* defines for Flags field */
+#define MPI2_SASIOUNIT7_FLAGS_ENABLE_PORT_WIDTH_MODULATION  (0x01)
+
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumGroups at runtime.
+ */
+#ifndef MPI2_SAS_IOUNIT7_GROUP_MAX
+#define MPI2_SAS_IOUNIT7_GROUP_MAX      (1)
+#endif
+
+typedef struct _MPI2_CONFIG_PAGE_SASIOUNIT_7
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER            Header;             /* 0x00 */
+    U8                                          SamplingInterval;   /* 0x08 */
+    U8                                          WindowLength;       /* 0x09 */
+    U16                                         Reserved1;          /* 0x0A */
+    U32                                         Reserved2;          /* 0x0C */
+    U32                                         Reserved3;          /* 0x10 */
+    U8                                          NumGroups;          /* 0x14 */
+    U8                                          Reserved4;          /* 0x15 */
+    U16                                         Reserved5;          /* 0x16 */
+    MPI2_SAS_IO_UNIT7_PORT_WIDTH_MOD_GROUP_SETTINGS
+        PortWidthModulationGroupSettings[MPI2_SAS_IOUNIT7_GROUP_MAX]; /* 0x18 */
+} MPI2_CONFIG_PAGE_SASIOUNIT_7,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SASIOUNIT_7,
+  Mpi2SasIOUnitPage7_t, MPI2_POINTER pMpi2SasIOUnitPage7_t;
+
+#define MPI2_SASIOUNITPAGE7_PAGEVERSION     (0x00)
+
+
+/* SAS IO Unit Page 8 */
+
+typedef struct _MPI2_CONFIG_PAGE_SASIOUNIT_8
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                         /* 0x00 */
+    U32                                 Reserved1;                      /* 0x08 */
+    U32                                 PowerManagementCapabilities;    /* 0x0C */
+    U8                                  TxRxSleepStatus;                /* 0x10 */ /* reserved in MPI 2.0 */
+    U8                                  Reserved2;                      /* 0x11 */
+    U16                                 Reserved3;                      /* 0x12 */
+} MPI2_CONFIG_PAGE_SASIOUNIT_8,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SASIOUNIT_8,
+  Mpi2SasIOUnitPage8_t, MPI2_POINTER pMpi2SasIOUnitPage8_t;
+
+#define MPI2_SASIOUNITPAGE8_PAGEVERSION     (0x00)
+
+/* defines for PowerManagementCapabilities field */
+#define MPI2_SASIOUNIT8_PM_HOST_PORT_WIDTH_MOD          (0x00001000)
+#define MPI2_SASIOUNIT8_PM_HOST_SAS_SLUMBER_MODE        (0x00000800)
+#define MPI2_SASIOUNIT8_PM_HOST_SAS_PARTIAL_MODE        (0x00000400)
+#define MPI2_SASIOUNIT8_PM_HOST_SATA_SLUMBER_MODE       (0x00000200)
+#define MPI2_SASIOUNIT8_PM_HOST_SATA_PARTIAL_MODE       (0x00000100)
+#define MPI2_SASIOUNIT8_PM_IOUNIT_PORT_WIDTH_MOD        (0x00000010)
+#define MPI2_SASIOUNIT8_PM_IOUNIT_SAS_SLUMBER_MODE      (0x00000008)
+#define MPI2_SASIOUNIT8_PM_IOUNIT_SAS_PARTIAL_MODE      (0x00000004)
+#define MPI2_SASIOUNIT8_PM_IOUNIT_SATA_SLUMBER_MODE     (0x00000002)
+#define MPI2_SASIOUNIT8_PM_IOUNIT_SATA_PARTIAL_MODE     (0x00000001)
+
+/* defines for TxRxSleepStatus field */
+#define MPI25_SASIOUNIT8_TXRXSLEEP_UNSUPPORTED          (0x00)
+#define MPI25_SASIOUNIT8_TXRXSLEEP_DISENGAGED           (0x01)
+#define MPI25_SASIOUNIT8_TXRXSLEEP_ACTIVE               (0x02)
+#define MPI25_SASIOUNIT8_TXRXSLEEP_SHUTDOWN             (0x03)
+
+
+
+/* SAS IO Unit Page 16 */
+
+typedef struct _MPI2_CONFIG_PAGE_SASIOUNIT16
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                             /* 0x00 */
+    U64                                 TimeStamp;                          /* 0x08 */
+    U32                                 Reserved1;                          /* 0x10 */
+    U32                                 Reserved2;                          /* 0x14 */
+    U32                                 FastPathPendedRequests;             /* 0x18 */
+    U32                                 FastPathUnPendedRequests;           /* 0x1C */
+    U32                                 FastPathHostRequestStarts;          /* 0x20 */
+    U32                                 FastPathFirmwareRequestStarts;      /* 0x24 */
+    U32                                 FastPathHostCompletions;            /* 0x28 */
+    U32                                 FastPathFirmwareCompletions;        /* 0x2C */
+    U32                                 NonFastPathRequestStarts;           /* 0x30 */
+    U32                                 NonFastPathHostCompletions;         /* 0x30 */
+} MPI2_CONFIG_PAGE_SASIOUNIT16,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SASIOUNIT16,
+  Mpi2SasIOUnitPage16_t, MPI2_POINTER pMpi2SasIOUnitPage16_t;
+
+#define MPI2_SASIOUNITPAGE16_PAGEVERSION    (0x00)
+
+
+/****************************************************************************
+*   SAS Expander Config Pages
+****************************************************************************/
+
+/* SAS Expander Page 0 */
+
+typedef struct _MPI2_CONFIG_PAGE_EXPANDER_0
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
+    U8                                  PhysicalPort;               /* 0x08 */
+    U8                                  ReportGenLength;            /* 0x09 */
+    U16                                 EnclosureHandle;            /* 0x0A */
+    U64                                 SASAddress;                 /* 0x0C */
+    U32                                 DiscoveryStatus;            /* 0x14 */
+    U16                                 DevHandle;                  /* 0x18 */
+    U16                                 ParentDevHandle;            /* 0x1A */
+    U16                                 ExpanderChangeCount;        /* 0x1C */
+    U16                                 ExpanderRouteIndexes;       /* 0x1E */
+    U8                                  NumPhys;                    /* 0x20 */
+    U8                                  SASLevel;                   /* 0x21 */
+    U16                                 Flags;                      /* 0x22 */
+    U16                                 STPBusInactivityTimeLimit;  /* 0x24 */
+    U16                                 STPMaxConnectTimeLimit;     /* 0x26 */
+    U16                                 STP_SMP_NexusLossTime;      /* 0x28 */
+    U16                                 MaxNumRoutedSasAddresses;   /* 0x2A */
+    U64                                 ActiveZoneManagerSASAddress;/* 0x2C */
+    U16                                 ZoneLockInactivityLimit;    /* 0x34 */
+    U16                                 Reserved1;                  /* 0x36 */
+    U8                                  TimeToReducedFunc;          /* 0x38 */
+    U8                                  InitialTimeToReducedFunc;   /* 0x39 */
+    U8                                  MaxReducedFuncTime;         /* 0x3A */
+    U8                                  Reserved2;                  /* 0x3B */
+} MPI2_CONFIG_PAGE_EXPANDER_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_EXPANDER_0,
+  Mpi2ExpanderPage0_t, MPI2_POINTER pMpi2ExpanderPage0_t;
+
+#define MPI2_SASEXPANDER0_PAGEVERSION       (0x06)
+
+/* values for SAS Expander Page 0 DiscoveryStatus field */
+#define MPI2_SAS_EXPANDER0_DS_MAX_ENCLOSURES_EXCEED         (0x80000000)
+#define MPI2_SAS_EXPANDER0_DS_MAX_EXPANDERS_EXCEED          (0x40000000)
+#define MPI2_SAS_EXPANDER0_DS_MAX_DEVICES_EXCEED            (0x20000000)
+#define MPI2_SAS_EXPANDER0_DS_MAX_TOPO_PHYS_EXCEED          (0x10000000)
+#define MPI2_SAS_EXPANDER0_DS_DOWNSTREAM_INITIATOR          (0x08000000)
+#define MPI2_SAS_EXPANDER0_DS_MULTI_SUBTRACTIVE_SUBTRACTIVE (0x00008000)
+#define MPI2_SAS_EXPANDER0_DS_EXP_MULTI_SUBTRACTIVE         (0x00004000)
+#define MPI2_SAS_EXPANDER0_DS_MULTI_PORT_DOMAIN             (0x00002000)
+#define MPI2_SAS_EXPANDER0_DS_TABLE_TO_SUBTRACTIVE_LINK     (0x00001000)
+#define MPI2_SAS_EXPANDER0_DS_UNSUPPORTED_DEVICE            (0x00000800)
+#define MPI2_SAS_EXPANDER0_DS_TABLE_LINK                    (0x00000400)
+#define MPI2_SAS_EXPANDER0_DS_SUBTRACTIVE_LINK              (0x00000200)
+#define MPI2_SAS_EXPANDER0_DS_SMP_CRC_ERROR                 (0x00000100)
+#define MPI2_SAS_EXPANDER0_DS_SMP_FUNCTION_FAILED           (0x00000080)
+#define MPI2_SAS_EXPANDER0_DS_INDEX_NOT_EXIST               (0x00000040)
+#define MPI2_SAS_EXPANDER0_DS_OUT_ROUTE_ENTRIES             (0x00000020)
+#define MPI2_SAS_EXPANDER0_DS_SMP_TIMEOUT                   (0x00000010)
+#define MPI2_SAS_EXPANDER0_DS_MULTIPLE_PORTS                (0x00000004)
+#define MPI2_SAS_EXPANDER0_DS_UNADDRESSABLE_DEVICE          (0x00000002)
+#define MPI2_SAS_EXPANDER0_DS_LOOP_DETECTED                 (0x00000001)
+
+/* values for SAS Expander Page 0 Flags field */
+#define MPI2_SAS_EXPANDER0_FLAGS_REDUCED_FUNCTIONALITY      (0x2000)
+#define MPI2_SAS_EXPANDER0_FLAGS_ZONE_LOCKED                (0x1000)
+#define MPI2_SAS_EXPANDER0_FLAGS_SUPPORTED_PHYSICAL_PRES    (0x0800)
+#define MPI2_SAS_EXPANDER0_FLAGS_ASSERTED_PHYSICAL_PRES     (0x0400)
+#define MPI2_SAS_EXPANDER0_FLAGS_ZONING_SUPPORT             (0x0200)
+#define MPI2_SAS_EXPANDER0_FLAGS_ENABLED_ZONING             (0x0100)
+#define MPI2_SAS_EXPANDER0_FLAGS_TABLE_TO_TABLE_SUPPORT     (0x0080)
+#define MPI2_SAS_EXPANDER0_FLAGS_CONNECTOR_END_DEVICE       (0x0010)
+#define MPI2_SAS_EXPANDER0_FLAGS_OTHERS_CONFIG              (0x0004)
+#define MPI2_SAS_EXPANDER0_FLAGS_CONFIG_IN_PROGRESS         (0x0002)
+#define MPI2_SAS_EXPANDER0_FLAGS_ROUTE_TABLE_CONFIG         (0x0001)
+
+
+/* SAS Expander Page 1 */
+
+typedef struct _MPI2_CONFIG_PAGE_EXPANDER_1
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
+    U8                                  PhysicalPort;               /* 0x08 */
+    U8                                  Reserved1;                  /* 0x09 */
+    U16                                 Reserved2;                  /* 0x0A */
+    U8                                  NumPhys;                    /* 0x0C */
+    U8                                  Phy;                        /* 0x0D */
+    U16                                 NumTableEntriesProgrammed;  /* 0x0E */
+    U8                                  ProgrammedLinkRate;         /* 0x10 */
+    U8                                  HwLinkRate;                 /* 0x11 */
+    U16                                 AttachedDevHandle;          /* 0x12 */
+    U32                                 PhyInfo;                    /* 0x14 */
+    U32                                 AttachedDeviceInfo;         /* 0x18 */
+    U16                                 ExpanderDevHandle;          /* 0x1C */
+    U8                                  ChangeCount;                /* 0x1E */
+    U8                                  NegotiatedLinkRate;         /* 0x1F */
+    U8                                  PhyIdentifier;              /* 0x20 */
+    U8                                  AttachedPhyIdentifier;      /* 0x21 */
+    U8                                  Reserved3;                  /* 0x22 */
+    U8                                  DiscoveryInfo;              /* 0x23 */
+    U32                                 AttachedPhyInfo;            /* 0x24 */
+    U8                                  ZoneGroup;                  /* 0x28 */
+    U8                                  SelfConfigStatus;           /* 0x29 */
+    U16                                 Reserved4;                  /* 0x2A */
+} MPI2_CONFIG_PAGE_EXPANDER_1, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_EXPANDER_1,
+  Mpi2ExpanderPage1_t, MPI2_POINTER pMpi2ExpanderPage1_t;
+
+#define MPI2_SASEXPANDER1_PAGEVERSION       (0x02)
+
+/* use MPI2_SAS_PRATE_ defines for the ProgrammedLinkRate field */
+
+/* use MPI2_SAS_HWRATE_ defines for the HwLinkRate field */
+
+/* use MPI2_SAS_PHYINFO_ for the PhyInfo field */
+
+/* see mpi2_sas.h for the MPI2_SAS_DEVICE_INFO_ defines used for the AttachedDeviceInfo field */
+
+/* use MPI2_SAS_NEG_LINK_RATE_ defines for the NegotiatedLinkRate field */
+
+/* values for SAS Expander Page 1 DiscoveryInfo field */
+#define MPI2_SAS_EXPANDER1_DISCINFO_BAD_PHY_DISABLED    (0x04)
+#define MPI2_SAS_EXPANDER1_DISCINFO_LINK_STATUS_CHANGE  (0x02)
+#define MPI2_SAS_EXPANDER1_DISCINFO_NO_ROUTING_ENTRIES  (0x01)
+
+/* use MPI2_SAS_APHYINFO_ defines for AttachedPhyInfo field */
+
+
+/****************************************************************************
+*   SAS Device Config Pages
+****************************************************************************/
+
+/* SAS Device Page 0 */
+
+typedef struct _MPI2_CONFIG_PAGE_SAS_DEV_0
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                 /* 0x00 */
+    U16                                 Slot;                   /* 0x08 */
+    U16                                 EnclosureHandle;        /* 0x0A */
+    U64                                 SASAddress;             /* 0x0C */
+    U16                                 ParentDevHandle;        /* 0x14 */
+    U8                                  PhyNum;                 /* 0x16 */
+    U8                                  AccessStatus;           /* 0x17 */
+    U16                                 DevHandle;              /* 0x18 */
+    U8                                  AttachedPhyIdentifier;  /* 0x1A */
+    U8                                  ZoneGroup;              /* 0x1B */
+    U32                                 DeviceInfo;             /* 0x1C */
+    U16                                 Flags;                  /* 0x20 */
+    U8                                  PhysicalPort;           /* 0x22 */
+    U8                                  MaxPortConnections;     /* 0x23 */
+    U64                                 DeviceName;             /* 0x24 */
+    U8                                  PortGroups;             /* 0x2C */
+    U8                                  DmaGroup;               /* 0x2D */
+    U8                                  ControlGroup;           /* 0x2E */
+    U8                                  EnclosureLevel;         /* 0x2F */
+    U8                                  ConnectorName[4];       /* 0x30 */
+    U32                                 Reserved3;              /* 0x34 */
+} MPI2_CONFIG_PAGE_SAS_DEV_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_DEV_0,
+  Mpi2SasDevicePage0_t, MPI2_POINTER pMpi2SasDevicePage0_t;
+
+#define MPI2_SASDEVICE0_PAGEVERSION         (0x09)
+
+/* values for SAS Device Page 0 AccessStatus field */
+#define MPI2_SAS_DEVICE0_ASTATUS_NO_ERRORS                  (0x00)
+#define MPI2_SAS_DEVICE0_ASTATUS_SATA_INIT_FAILED           (0x01)
+#define MPI2_SAS_DEVICE0_ASTATUS_SATA_CAPABILITY_FAILED     (0x02)
+#define MPI2_SAS_DEVICE0_ASTATUS_SATA_AFFILIATION_CONFLICT  (0x03)
+#define MPI2_SAS_DEVICE0_ASTATUS_SATA_NEEDS_INITIALIZATION  (0x04)
+#define MPI2_SAS_DEVICE0_ASTATUS_ROUTE_NOT_ADDRESSABLE      (0x05)
+#define MPI2_SAS_DEVICE0_ASTATUS_SMP_ERROR_NOT_ADDRESSABLE  (0x06)
+#define MPI2_SAS_DEVICE0_ASTATUS_DEVICE_BLOCKED             (0x07)
+/* specific values for SATA Init failures */
+#define MPI2_SAS_DEVICE0_ASTATUS_SIF_UNKNOWN                (0x10)
+#define MPI2_SAS_DEVICE0_ASTATUS_SIF_AFFILIATION_CONFLICT   (0x11)
+#define MPI2_SAS_DEVICE0_ASTATUS_SIF_DIAG                   (0x12)
+#define MPI2_SAS_DEVICE0_ASTATUS_SIF_IDENTIFICATION         (0x13)
+#define MPI2_SAS_DEVICE0_ASTATUS_SIF_CHECK_POWER            (0x14)
+#define MPI2_SAS_DEVICE0_ASTATUS_SIF_PIO_SN                 (0x15)
+#define MPI2_SAS_DEVICE0_ASTATUS_SIF_MDMA_SN                (0x16)
+#define MPI2_SAS_DEVICE0_ASTATUS_SIF_UDMA_SN                (0x17)
+#define MPI2_SAS_DEVICE0_ASTATUS_SIF_ZONING_VIOLATION       (0x18)
+#define MPI2_SAS_DEVICE0_ASTATUS_SIF_NOT_ADDRESSABLE        (0x19)
+#define MPI2_SAS_DEVICE0_ASTATUS_SIF_MAX                    (0x1F)
+
+/* see mpi2_sas.h for values for SAS Device Page 0 DeviceInfo values */
+
+/* values for SAS Device Page 0 Flags field */
+#define MPI2_SAS_DEVICE0_FLAGS_UNAUTHORIZED_DEVICE          (0x8000)
+#define MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH           (0x4000)
+#define MPI25_SAS_DEVICE0_FLAGS_FAST_PATH_CAPABLE           (0x2000)
+#define MPI2_SAS_DEVICE0_FLAGS_SLUMBER_PM_CAPABLE           (0x1000)
+#define MPI2_SAS_DEVICE0_FLAGS_PARTIAL_PM_CAPABLE           (0x0800)
+#define MPI2_SAS_DEVICE0_FLAGS_SATA_ASYNCHRONOUS_NOTIFY     (0x0400)
+#define MPI2_SAS_DEVICE0_FLAGS_SATA_SW_PRESERVE             (0x0200)
+#define MPI2_SAS_DEVICE0_FLAGS_UNSUPPORTED_DEVICE           (0x0100)
+#define MPI2_SAS_DEVICE0_FLAGS_SATA_48BIT_LBA_SUPPORTED     (0x0080)
+#define MPI2_SAS_DEVICE0_FLAGS_SATA_SMART_SUPPORTED         (0x0040)
+#define MPI2_SAS_DEVICE0_FLAGS_SATA_NCQ_SUPPORTED           (0x0020)
+#define MPI2_SAS_DEVICE0_FLAGS_SATA_FUA_SUPPORTED           (0x0010)
+#define MPI2_SAS_DEVICE0_FLAGS_PORT_SELECTOR_ATTACH         (0x0008)
+#define MPI2_SAS_DEVICE0_FLAGS_PERSIST_CAPABLE              (0x0004)
+#define MPI2_SAS_DEVICE0_FLAGS_ENCL_LEVEL_VALID             (0x0002)
+#define MPI2_SAS_DEVICE0_FLAGS_DEVICE_PRESENT               (0x0001)
+
+/* SAS Device Page 1 */
+
+typedef struct _MPI2_CONFIG_PAGE_SAS_DEV_1
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                 /* 0x00 */
+    U32                                 Reserved1;              /* 0x08 */
+    U64                                 SASAddress;             /* 0x0C */
+    U32                                 Reserved2;              /* 0x14 */
+    U16                                 DevHandle;              /* 0x18 */
+    U16                                 Reserved3;              /* 0x1A */
+    U8                                  InitialRegDeviceFIS[20];/* 0x1C */
+} MPI2_CONFIG_PAGE_SAS_DEV_1, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_DEV_1,
+  Mpi2SasDevicePage1_t, MPI2_POINTER pMpi2SasDevicePage1_t;
+
+#define MPI2_SASDEVICE1_PAGEVERSION         (0x01)
+
+
+/****************************************************************************
+*   SAS PHY Config Pages
+****************************************************************************/
+
+/* SAS PHY Page 0 */
+
+typedef struct _MPI2_CONFIG_PAGE_SAS_PHY_0
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                 /* 0x00 */
+    U16                                 OwnerDevHandle;         /* 0x08 */
+    U16                                 Reserved1;              /* 0x0A */
+    U16                                 AttachedDevHandle;      /* 0x0C */
+    U8                                  AttachedPhyIdentifier;  /* 0x0E */
+    U8                                  Reserved2;              /* 0x0F */
+    U32                                 AttachedPhyInfo;        /* 0x10 */
+    U8                                  ProgrammedLinkRate;     /* 0x14 */
+    U8                                  HwLinkRate;             /* 0x15 */
+    U8                                  ChangeCount;            /* 0x16 */
+    U8                                  Flags;                  /* 0x17 */
+    U32                                 PhyInfo;                /* 0x18 */
+    U8                                  NegotiatedLinkRate;     /* 0x1C */
+    U8                                  Reserved3;              /* 0x1D */
+    U16                                 Reserved4;              /* 0x1E */
+} MPI2_CONFIG_PAGE_SAS_PHY_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_PHY_0,
+  Mpi2SasPhyPage0_t, MPI2_POINTER pMpi2SasPhyPage0_t;
+
+#define MPI2_SASPHY0_PAGEVERSION            (0x03)
+
+/* use MPI2_SAS_APHYINFO_ defines for AttachedPhyInfo field */
+
+/* use MPI2_SAS_PRATE_ defines for the ProgrammedLinkRate field */
+
+/* use MPI2_SAS_HWRATE_ defines for the HwLinkRate field */
+
+/* values for SAS PHY Page 0 Flags field */
+#define MPI2_SAS_PHY0_FLAGS_SGPIO_DIRECT_ATTACH_ENC             (0x01)
+
+/* use MPI2_SAS_PHYINFO_ for the PhyInfo field */
+
+/* use MPI2_SAS_NEG_LINK_RATE_ defines for the NegotiatedLinkRate field */
+
+
+/* SAS PHY Page 1 */
+
+typedef struct _MPI2_CONFIG_PAGE_SAS_PHY_1
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
+    U32                                 Reserved1;                  /* 0x08 */
+    U32                                 InvalidDwordCount;          /* 0x0C */
+    U32                                 RunningDisparityErrorCount; /* 0x10 */
+    U32                                 LossDwordSynchCount;        /* 0x14 */
+    U32                                 PhyResetProblemCount;       /* 0x18 */
+} MPI2_CONFIG_PAGE_SAS_PHY_1, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_PHY_1,
+  Mpi2SasPhyPage1_t, MPI2_POINTER pMpi2SasPhyPage1_t;
+
+#define MPI2_SASPHY1_PAGEVERSION            (0x01)
+
+
+/* SAS PHY Page 2 */
+
+typedef struct _MPI2_SASPHY2_PHY_EVENT
+{
+    U8          PhyEventCode;       /* 0x00 */
+    U8          Reserved1;          /* 0x01 */
+    U16         Reserved2;          /* 0x02 */
+    U32         PhyEventInfo;       /* 0x04 */
+} MPI2_SASPHY2_PHY_EVENT, MPI2_POINTER PTR_MPI2_SASPHY2_PHY_EVENT,
+  Mpi2SasPhy2PhyEvent_t, MPI2_POINTER pMpi2SasPhy2PhyEvent_t;
+
+/* use MPI2_SASPHY3_EVENT_CODE_ for the PhyEventCode field */
+
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumPhyEvents at runtime.
+ */
+#ifndef MPI2_SASPHY2_PHY_EVENT_MAX
+#define MPI2_SASPHY2_PHY_EVENT_MAX      (1)
+#endif
+
+typedef struct _MPI2_CONFIG_PAGE_SAS_PHY_2
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
+    U32                                 Reserved1;                  /* 0x08 */
+    U8                                  NumPhyEvents;               /* 0x0C */
+    U8                                  Reserved2;                  /* 0x0D */
+    U16                                 Reserved3;                  /* 0x0E */
+    MPI2_SASPHY2_PHY_EVENT              PhyEvent[MPI2_SASPHY2_PHY_EVENT_MAX]; /* 0x10 */
+} MPI2_CONFIG_PAGE_SAS_PHY_2, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_PHY_2,
+  Mpi2SasPhyPage2_t, MPI2_POINTER pMpi2SasPhyPage2_t;
+
+#define MPI2_SASPHY2_PAGEVERSION            (0x00)
+
+
+/* SAS PHY Page 3 */
+
+typedef struct _MPI2_SASPHY3_PHY_EVENT_CONFIG
+{
+    U8          PhyEventCode;       /* 0x00 */
+    U8          Reserved1;          /* 0x01 */
+    U16         Reserved2;          /* 0x02 */
+    U8          CounterType;        /* 0x04 */
+    U8          ThresholdWindow;    /* 0x05 */
+    U8          TimeUnits;          /* 0x06 */
+    U8          Reserved3;          /* 0x07 */
+    U32         EventThreshold;     /* 0x08 */
+    U16         ThresholdFlags;     /* 0x0C */
+    U16         Reserved4;          /* 0x0E */
+} MPI2_SASPHY3_PHY_EVENT_CONFIG, MPI2_POINTER PTR_MPI2_SASPHY3_PHY_EVENT_CONFIG,
+  Mpi2SasPhy3PhyEventConfig_t, MPI2_POINTER pMpi2SasPhy3PhyEventConfig_t;
+
+/* values for PhyEventCode field */
+#define MPI2_SASPHY3_EVENT_CODE_NO_EVENT                    (0x00)
+#define MPI2_SASPHY3_EVENT_CODE_INVALID_DWORD               (0x01)
+#define MPI2_SASPHY3_EVENT_CODE_RUNNING_DISPARITY_ERROR     (0x02)
+#define MPI2_SASPHY3_EVENT_CODE_LOSS_DWORD_SYNC             (0x03)
+#define MPI2_SASPHY3_EVENT_CODE_PHY_RESET_PROBLEM           (0x04)
+#define MPI2_SASPHY3_EVENT_CODE_ELASTICITY_BUF_OVERFLOW     (0x05)
+#define MPI2_SASPHY3_EVENT_CODE_RX_ERROR                    (0x06)
+#define MPI2_SASPHY3_EVENT_CODE_RX_ADDR_FRAME_ERROR         (0x20)
+#define MPI2_SASPHY3_EVENT_CODE_TX_AC_OPEN_REJECT           (0x21)
+#define MPI2_SASPHY3_EVENT_CODE_RX_AC_OPEN_REJECT           (0x22)
+#define MPI2_SASPHY3_EVENT_CODE_TX_RC_OPEN_REJECT           (0x23)
+#define MPI2_SASPHY3_EVENT_CODE_RX_RC_OPEN_REJECT           (0x24)
+#define MPI2_SASPHY3_EVENT_CODE_RX_AIP_PARTIAL_WAITING_ON   (0x25)
+#define MPI2_SASPHY3_EVENT_CODE_RX_AIP_CONNECT_WAITING_ON   (0x26)
+#define MPI2_SASPHY3_EVENT_CODE_TX_BREAK                    (0x27)
+#define MPI2_SASPHY3_EVENT_CODE_RX_BREAK                    (0x28)
+#define MPI2_SASPHY3_EVENT_CODE_BREAK_TIMEOUT               (0x29)
+#define MPI2_SASPHY3_EVENT_CODE_CONNECTION                  (0x2A)
+#define MPI2_SASPHY3_EVENT_CODE_PEAKTX_PATHWAY_BLOCKED      (0x2B)
+#define MPI2_SASPHY3_EVENT_CODE_PEAKTX_ARB_WAIT_TIME        (0x2C)
+#define MPI2_SASPHY3_EVENT_CODE_PEAK_ARB_WAIT_TIME          (0x2D)
+#define MPI2_SASPHY3_EVENT_CODE_PEAK_CONNECT_TIME           (0x2E)
+#define MPI2_SASPHY3_EVENT_CODE_TX_SSP_FRAMES               (0x40)
+#define MPI2_SASPHY3_EVENT_CODE_RX_SSP_FRAMES               (0x41)
+#define MPI2_SASPHY3_EVENT_CODE_TX_SSP_ERROR_FRAMES         (0x42)
+#define MPI2_SASPHY3_EVENT_CODE_RX_SSP_ERROR_FRAMES         (0x43)
+#define MPI2_SASPHY3_EVENT_CODE_TX_CREDIT_BLOCKED           (0x44)
+#define MPI2_SASPHY3_EVENT_CODE_RX_CREDIT_BLOCKED           (0x45)
+#define MPI2_SASPHY3_EVENT_CODE_TX_SATA_FRAMES              (0x50)
+#define MPI2_SASPHY3_EVENT_CODE_RX_SATA_FRAMES              (0x51)
+#define MPI2_SASPHY3_EVENT_CODE_SATA_OVERFLOW               (0x52)
+#define MPI2_SASPHY3_EVENT_CODE_TX_SMP_FRAMES               (0x60)
+#define MPI2_SASPHY3_EVENT_CODE_RX_SMP_FRAMES               (0x61)
+#define MPI2_SASPHY3_EVENT_CODE_RX_SMP_ERROR_FRAMES         (0x63)
+#define MPI2_SASPHY3_EVENT_CODE_HOTPLUG_TIMEOUT             (0xD0)
+#define MPI2_SASPHY3_EVENT_CODE_MISALIGNED_MUX_PRIMITIVE    (0xD1)
+#define MPI2_SASPHY3_EVENT_CODE_RX_AIP                      (0xD2)
+/* Following codes are product specific and in MPI v2.6 and later */
+#define MPI2_SASPHY3_EVENT_CODE_LCARB_WAIT_TIME             (0xD3)
+#define MPI2_SASPHY3_EVENT_CODE_RCVD_CONN_RESP_WAIT_TIME    (0xD4)
+#define MPI2_SASPHY3_EVENT_CODE_LCCONN_TIME                 (0xD5)
+#define MPI2_SASPHY3_EVENT_CODE_SSP_TX_START_TRANSMIT       (0xD6)
+#define MPI2_SASPHY3_EVENT_CODE_SATA_TX_START               (0xD7)
+#define MPI2_SASPHY3_EVENT_CODE_SMP_TX_START_TRANSMT        (0xD8)
+#define MPI2_SASPHY3_EVENT_CODE_TX_SMP_BREAK_CONN           (0xD9)
+#define MPI2_SASPHY3_EVENT_CODE_SSP_RX_START_RECEIVE        (0xDA)
+#define MPI2_SASPHY3_EVENT_CODE_SATA_RX_START_RECEIVE       (0xDB)
+#define MPI2_SASPHY3_EVENT_CODE_SMP_RX_START_RECEIVE        (0xDC)
+
+/* values for the CounterType field */
+#define MPI2_SASPHY3_COUNTER_TYPE_WRAPPING                  (0x00)
+#define MPI2_SASPHY3_COUNTER_TYPE_SATURATING                (0x01)
+#define MPI2_SASPHY3_COUNTER_TYPE_PEAK_VALUE                (0x02)
+
+/* values for the TimeUnits field */
+#define MPI2_SASPHY3_TIME_UNITS_10_MICROSECONDS             (0x00)
+#define MPI2_SASPHY3_TIME_UNITS_100_MICROSECONDS            (0x01)
+#define MPI2_SASPHY3_TIME_UNITS_1_MILLISECOND               (0x02)
+#define MPI2_SASPHY3_TIME_UNITS_10_MILLISECONDS             (0x03)
+
+/* values for the ThresholdFlags field */
+#define MPI2_SASPHY3_TFLAGS_PHY_RESET                       (0x0002)
+#define MPI2_SASPHY3_TFLAGS_EVENT_NOTIFY                    (0x0001)
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumPhyEvents at runtime.
+ */
+#ifndef MPI2_SASPHY3_PHY_EVENT_MAX
+#define MPI2_SASPHY3_PHY_EVENT_MAX      (1)
+#endif
+
+typedef struct _MPI2_CONFIG_PAGE_SAS_PHY_3
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
+    U32                                 Reserved1;                  /* 0x08 */
+    U8                                  NumPhyEvents;               /* 0x0C */
+    U8                                  Reserved2;                  /* 0x0D */
+    U16                                 Reserved3;                  /* 0x0E */
+    MPI2_SASPHY3_PHY_EVENT_CONFIG       PhyEventConfig[MPI2_SASPHY3_PHY_EVENT_MAX]; /* 0x10 */
+} MPI2_CONFIG_PAGE_SAS_PHY_3, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_PHY_3,
+  Mpi2SasPhyPage3_t, MPI2_POINTER pMpi2SasPhyPage3_t;
+
+#define MPI2_SASPHY3_PAGEVERSION            (0x00)
+
+
+/* SAS PHY Page 4 */
+
+typedef struct _MPI2_CONFIG_PAGE_SAS_PHY_4
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
+    U16                                 Reserved1;                  /* 0x08 */
+    U8                                  Reserved2;                  /* 0x0A */
+    U8                                  Flags;                      /* 0x0B */
+    U8                                  InitialFrame[28];           /* 0x0C */
+} MPI2_CONFIG_PAGE_SAS_PHY_4, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_PHY_4,
+  Mpi2SasPhyPage4_t, MPI2_POINTER pMpi2SasPhyPage4_t;
+
+#define MPI2_SASPHY4_PAGEVERSION            (0x00)
+
+/* values for the Flags field */
+#define MPI2_SASPHY4_FLAGS_FRAME_VALID        (0x02)
+#define MPI2_SASPHY4_FLAGS_SATA_FRAME         (0x01)
+
+
+
+
+/****************************************************************************
+*   SAS Port Config Pages
+****************************************************************************/
+
+/* SAS Port Page 0 */
+
+typedef struct _MPI2_CONFIG_PAGE_SAS_PORT_0
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
+    U8                                  PortNumber;                 /* 0x08 */
+    U8                                  PhysicalPort;               /* 0x09 */
+    U8                                  PortWidth;                  /* 0x0A */
+    U8                                  PhysicalPortWidth;          /* 0x0B */
+    U8                                  ZoneGroup;                  /* 0x0C */
+    U8                                  Reserved1;                  /* 0x0D */
+    U16                                 Reserved2;                  /* 0x0E */
+    U64                                 SASAddress;                 /* 0x10 */
+    U32                                 DeviceInfo;                 /* 0x18 */
+    U32                                 Reserved3;                  /* 0x1C */
+    U32                                 Reserved4;                  /* 0x20 */
+} MPI2_CONFIG_PAGE_SAS_PORT_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_PORT_0,
+  Mpi2SasPortPage0_t, MPI2_POINTER pMpi2SasPortPage0_t;
+
+#define MPI2_SASPORT0_PAGEVERSION           (0x00)
+
+/* see mpi2_sas.h for values for SAS Port Page 0 DeviceInfo values */
+
+
+/****************************************************************************
+*   SAS Enclosure Config Pages
+****************************************************************************/
+
+/* SAS Enclosure Page 0, Enclosure Page 0 */
+
+typedef struct _MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
+    U32                                 Reserved1;                  /* 0x08 */
+    U64                                 EnclosureLogicalID;         /* 0x0C */
+    U16                                 Flags;                      /* 0x14 */
+    U16                                 EnclosureHandle;            /* 0x16 */
+    U16                                 NumSlots;                   /* 0x18 */
+    U16                                 StartSlot;                  /* 0x1A */
+    U8                                  ChassisSlot;                /* 0x1C */
+    U8                                  EnclosureLevel;             /* 0x1D */
+    U16                                 SEPDevHandle;               /* 0x1E */
+    U8                                  OEMRD;                      /* 0x20 */
+    U8                                  Reserved1a;                 /* 0x21 */
+    U16                                 Reserved2;                  /* 0x22 */
+    U32                                 Reserved3;                  /* 0x24 */
+} MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0,
+  Mpi2SasEnclosurePage0_t, MPI2_POINTER pMpi2SasEnclosurePage0_t,
+  MPI26_CONFIG_PAGE_ENCLOSURE_0,
+  MPI2_POINTER PTR_MPI26_CONFIG_PAGE_ENCLOSURE_0,
+  Mpi26EnclosurePage0_t, MPI2_POINTER pMpi26EnclosurePage0_t;
+
+#define MPI2_SASENCLOSURE0_PAGEVERSION      (0x04)
+
+/* values for SAS Enclosure Page 0 Flags field */
+#define MPI26_SAS_ENCLS0_FLAGS_OEMRD_VALID          (0x0080)
+#define MPI26_SAS_ENCLS0_FLAGS_OEMRD_COLLECTING     (0x0040)
+#define MPI2_SAS_ENCLS0_FLAGS_CHASSIS_SLOT_VALID    (0x0020)
+#define MPI2_SAS_ENCLS0_FLAGS_ENCL_LEVEL_VALID      (0x0010)
+#define MPI2_SAS_ENCLS0_FLAGS_MNG_MASK              (0x000F)
+#define MPI2_SAS_ENCLS0_FLAGS_MNG_UNKNOWN           (0x0000)
+#define MPI2_SAS_ENCLS0_FLAGS_MNG_IOC_SES           (0x0001)
+#define MPI2_SAS_ENCLS0_FLAGS_MNG_IOC_SGPIO         (0x0002)
+#define MPI2_SAS_ENCLS0_FLAGS_MNG_EXP_SGPIO         (0x0003)
+#define MPI2_SAS_ENCLS0_FLAGS_MNG_SES_ENCLOSURE     (0x0004)
+#define MPI2_SAS_ENCLS0_FLAGS_MNG_IOC_GPIO          (0x0005)
+
+#define MPI26_ENCLOSURE0_PAGEVERSION        (0x04)
+
+/* Values for Enclosure Page 0 Flags field */
+#define MPI26_ENCLS0_FLAGS_OEMRD_VALID              (0x0080)
+#define MPI26_ENCLS0_FLAGS_OEMRD_COLLECTING         (0x0040)
+#define MPI26_ENCLS0_FLAGS_CHASSIS_SLOT_VALID       (0x0020)
+#define MPI26_ENCLS0_FLAGS_ENCL_LEVEL_VALID         (0x0010)
+#define MPI26_ENCLS0_FLAGS_MNG_MASK                 (0x000F)
+#define MPI26_ENCLS0_FLAGS_MNG_UNKNOWN              (0x0000)
+#define MPI26_ENCLS0_FLAGS_MNG_IOC_SES              (0x0001)
+#define MPI26_ENCLS0_FLAGS_MNG_IOC_SGPIO            (0x0002)
+#define MPI26_ENCLS0_FLAGS_MNG_EXP_SGPIO            (0x0003)
+#define MPI26_ENCLS0_FLAGS_MNG_SES_ENCLOSURE        (0x0004)
+#define MPI26_ENCLS0_FLAGS_MNG_IOC_GPIO             (0x0005)
+#define MPI26_ENCLS0_FLAGS_MNG_PCIE_SW_SES_ENCL     (0x0006)
+
+/****************************************************************************
+*   Log Config Page
+****************************************************************************/
+
+/* Log Page 0 */
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumLogEntries at runtime.
+ */
+#ifndef MPI2_LOG_0_NUM_LOG_ENTRIES
+#define MPI2_LOG_0_NUM_LOG_ENTRIES          (1)
+#endif
+
+#define MPI2_LOG_0_LOG_DATA_LENGTH          (0x1C)
+
+typedef struct _MPI2_LOG_0_ENTRY
+{
+    U64         TimeStamp;                          /* 0x00 */
+    U32         Reserved1;                          /* 0x08 */
+    U16         LogSequence;                        /* 0x0C */
+    U16         LogEntryQualifier;                  /* 0x0E */
+    U8          VP_ID;                              /* 0x10 */
+    U8          VF_ID;                              /* 0x11 */
+    U16         Reserved2;                          /* 0x12 */
+    U8          LogData[MPI2_LOG_0_LOG_DATA_LENGTH];/* 0x14 */
+} MPI2_LOG_0_ENTRY, MPI2_POINTER PTR_MPI2_LOG_0_ENTRY,
+  Mpi2Log0Entry_t, MPI2_POINTER pMpi2Log0Entry_t;
+
+/* values for Log Page 0 LogEntry LogEntryQualifier field */
+#define MPI2_LOG_0_ENTRY_QUAL_ENTRY_UNUSED          (0x0000)
+#define MPI2_LOG_0_ENTRY_QUAL_POWER_ON_RESET        (0x0001)
+#define MPI2_LOG_0_ENTRY_QUAL_TIMESTAMP_UPDATE      (0x0002)
+#define MPI2_LOG_0_ENTRY_QUAL_MIN_IMPLEMENT_SPEC    (0x8000)
+#define MPI2_LOG_0_ENTRY_QUAL_MAX_IMPLEMENT_SPEC    (0xFFFF)
+
+typedef struct _MPI2_CONFIG_PAGE_LOG_0
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
+    U32                                 Reserved1;                  /* 0x08 */
+    U32                                 Reserved2;                  /* 0x0C */
+    U16                                 NumLogEntries;              /* 0x10 */
+    U16                                 Reserved3;                  /* 0x12 */
+    MPI2_LOG_0_ENTRY                    LogEntry[MPI2_LOG_0_NUM_LOG_ENTRIES]; /* 0x14 */
+} MPI2_CONFIG_PAGE_LOG_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_LOG_0,
+  Mpi2LogPage0_t, MPI2_POINTER pMpi2LogPage0_t;
+
+#define MPI2_LOG_0_PAGEVERSION              (0x02)
+
+
+/****************************************************************************
+*   RAID Config Page
+****************************************************************************/
+
+/* RAID Page 0 */
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumElements at runtime.
+ */
+#ifndef MPI2_RAIDCONFIG0_MAX_ELEMENTS
+#define MPI2_RAIDCONFIG0_MAX_ELEMENTS       (1)
+#endif
+
+typedef struct _MPI2_RAIDCONFIG0_CONFIG_ELEMENT
+{
+    U16                     ElementFlags;               /* 0x00 */
+    U16                     VolDevHandle;               /* 0x02 */
+    U8                      HotSparePool;               /* 0x04 */
+    U8                      PhysDiskNum;                /* 0x05 */
+    U16                     PhysDiskDevHandle;          /* 0x06 */
+} MPI2_RAIDCONFIG0_CONFIG_ELEMENT,
+  MPI2_POINTER PTR_MPI2_RAIDCONFIG0_CONFIG_ELEMENT,
+  Mpi2RaidConfig0ConfigElement_t, MPI2_POINTER pMpi2RaidConfig0ConfigElement_t;
+
+/* values for the ElementFlags field */
+#define MPI2_RAIDCONFIG0_EFLAGS_MASK_ELEMENT_TYPE       (0x000F)
+#define MPI2_RAIDCONFIG0_EFLAGS_VOLUME_ELEMENT          (0x0000)
+#define MPI2_RAIDCONFIG0_EFLAGS_VOL_PHYS_DISK_ELEMENT   (0x0001)
+#define MPI2_RAIDCONFIG0_EFLAGS_HOT_SPARE_ELEMENT       (0x0002)
+#define MPI2_RAIDCONFIG0_EFLAGS_OCE_ELEMENT             (0x0003)
+
+
+typedef struct _MPI2_CONFIG_PAGE_RAID_CONFIGURATION_0
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
+    U8                                  NumHotSpares;               /* 0x08 */
+    U8                                  NumPhysDisks;               /* 0x09 */
+    U8                                  NumVolumes;                 /* 0x0A */
+    U8                                  ConfigNum;                  /* 0x0B */
+    U32                                 Flags;                      /* 0x0C */
+    U8                                  ConfigGUID[24];             /* 0x10 */
+    U32                                 Reserved1;                  /* 0x28 */
+    U8                                  NumElements;                /* 0x2C */
+    U8                                  Reserved2;                  /* 0x2D */
+    U16                                 Reserved3;                  /* 0x2E */
+    MPI2_RAIDCONFIG0_CONFIG_ELEMENT     ConfigElement[MPI2_RAIDCONFIG0_MAX_ELEMENTS]; /* 0x30 */
+} MPI2_CONFIG_PAGE_RAID_CONFIGURATION_0,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_RAID_CONFIGURATION_0,
+  Mpi2RaidConfigurationPage0_t, MPI2_POINTER pMpi2RaidConfigurationPage0_t;
+
+#define MPI2_RAIDCONFIG0_PAGEVERSION            (0x00)
+
+/* values for RAID Configuration Page 0 Flags field */
+#define MPI2_RAIDCONFIG0_FLAG_FOREIGN_CONFIG        (0x00000001)
+
+
+/****************************************************************************
+*   Driver Persistent Mapping Config Pages
+****************************************************************************/
+
+/* Driver Persistent Mapping Page 0 */
+
+typedef struct _MPI2_CONFIG_PAGE_DRIVER_MAP0_ENTRY
+{
+    U64                                 PhysicalIdentifier;         /* 0x00 */
+    U16                                 MappingInformation;         /* 0x08 */
+    U16                                 DeviceIndex;                /* 0x0A */
+    U32                                 PhysicalBitsMapping;        /* 0x0C */
+    U32                                 Reserved1;                  /* 0x10 */
+} MPI2_CONFIG_PAGE_DRIVER_MAP0_ENTRY,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_DRIVER_MAP0_ENTRY,
+  Mpi2DriverMap0Entry_t, MPI2_POINTER pMpi2DriverMap0Entry_t;
+
+typedef struct _MPI2_CONFIG_PAGE_DRIVER_MAPPING_0
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
+    MPI2_CONFIG_PAGE_DRIVER_MAP0_ENTRY  Entry;                      /* 0x08 */
+} MPI2_CONFIG_PAGE_DRIVER_MAPPING_0,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_DRIVER_MAPPING_0,
+  Mpi2DriverMappingPage0_t, MPI2_POINTER pMpi2DriverMappingPage0_t;
+
+#define MPI2_DRIVERMAPPING0_PAGEVERSION         (0x00)
+
+/* values for Driver Persistent Mapping Page 0 MappingInformation field */
+#define MPI2_DRVMAP0_MAPINFO_SLOT_MASK              (0x07F0)
+#define MPI2_DRVMAP0_MAPINFO_SLOT_SHIFT             (4)
+#define MPI2_DRVMAP0_MAPINFO_MISSING_MASK           (0x000F)
+
+
+/****************************************************************************
+*   Ethernet Config Pages
+****************************************************************************/
+
+/* Ethernet Page 0 */
+
+/* IP address (union of IPv4 and IPv6) */
+typedef union _MPI2_ETHERNET_IP_ADDR
+{
+    U32     IPv4Addr;
+    U32     IPv6Addr[4];
+} MPI2_ETHERNET_IP_ADDR, MPI2_POINTER PTR_MPI2_ETHERNET_IP_ADDR,
+  Mpi2EthernetIpAddr_t, MPI2_POINTER pMpi2EthernetIpAddr_t;
+
+#define MPI2_ETHERNET_HOST_NAME_LENGTH          (32)
+
+typedef struct _MPI2_CONFIG_PAGE_ETHERNET_0
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                 /* 0x00 */
+    U8                                  NumInterfaces;          /* 0x08 */
+    U8                                  Reserved0;              /* 0x09 */
+    U16                                 Reserved1;              /* 0x0A */
+    U32                                 Status;                 /* 0x0C */
+    U8                                  MediaState;             /* 0x10 */
+    U8                                  Reserved2;              /* 0x11 */
+    U16                                 Reserved3;              /* 0x12 */
+    U8                                  MacAddress[6];          /* 0x14 */
+    U8                                  Reserved4;              /* 0x1A */
+    U8                                  Reserved5;              /* 0x1B */
+    MPI2_ETHERNET_IP_ADDR               IpAddress;              /* 0x1C */
+    MPI2_ETHERNET_IP_ADDR               SubnetMask;             /* 0x2C */
+    MPI2_ETHERNET_IP_ADDR               GatewayIpAddress;       /* 0x3C */
+    MPI2_ETHERNET_IP_ADDR               DNS1IpAddress;          /* 0x4C */
+    MPI2_ETHERNET_IP_ADDR               DNS2IpAddress;          /* 0x5C */
+    MPI2_ETHERNET_IP_ADDR               DhcpIpAddress;          /* 0x6C */
+    U8                                  HostName[MPI2_ETHERNET_HOST_NAME_LENGTH];/* 0x7C */
+} MPI2_CONFIG_PAGE_ETHERNET_0, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_ETHERNET_0,
+  Mpi2EthernetPage0_t, MPI2_POINTER pMpi2EthernetPage0_t;
+
+#define MPI2_ETHERNETPAGE0_PAGEVERSION   (0x00)
+
+/* values for Ethernet Page 0 Status field */
+#define MPI2_ETHPG0_STATUS_IPV6_CAPABLE             (0x80000000)
+#define MPI2_ETHPG0_STATUS_IPV4_CAPABLE             (0x40000000)
+#define MPI2_ETHPG0_STATUS_CONSOLE_CONNECTED        (0x20000000)
+#define MPI2_ETHPG0_STATUS_DEFAULT_IF               (0x00000100)
+#define MPI2_ETHPG0_STATUS_FW_DWNLD_ENABLED         (0x00000080)
+#define MPI2_ETHPG0_STATUS_TELNET_ENABLED           (0x00000040)
+#define MPI2_ETHPG0_STATUS_SSH2_ENABLED             (0x00000020)
+#define MPI2_ETHPG0_STATUS_DHCP_CLIENT_ENABLED      (0x00000010)
+#define MPI2_ETHPG0_STATUS_IPV6_ENABLED             (0x00000008)
+#define MPI2_ETHPG0_STATUS_IPV4_ENABLED             (0x00000004)
+#define MPI2_ETHPG0_STATUS_IPV6_ADDRESSES           (0x00000002)
+#define MPI2_ETHPG0_STATUS_ETH_IF_ENABLED           (0x00000001)
+
+/* values for Ethernet Page 0 MediaState field */
+#define MPI2_ETHPG0_MS_DUPLEX_MASK                  (0x80)
+#define MPI2_ETHPG0_MS_HALF_DUPLEX                  (0x00)
+#define MPI2_ETHPG0_MS_FULL_DUPLEX                  (0x80)
+
+#define MPI2_ETHPG0_MS_CONNECT_SPEED_MASK           (0x07)
+#define MPI2_ETHPG0_MS_NOT_CONNECTED                (0x00)
+#define MPI2_ETHPG0_MS_10MBIT                       (0x01)
+#define MPI2_ETHPG0_MS_100MBIT                      (0x02)
+#define MPI2_ETHPG0_MS_1GBIT                        (0x03)
+
+
+/* Ethernet Page 1 */
+
+typedef struct _MPI2_CONFIG_PAGE_ETHERNET_1
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                 /* 0x00 */
+    U32                                 Reserved0;              /* 0x08 */
+    U32                                 Flags;                  /* 0x0C */
+    U8                                  MediaState;             /* 0x10 */
+    U8                                  Reserved1;              /* 0x11 */
+    U16                                 Reserved2;              /* 0x12 */
+    U8                                  MacAddress[6];          /* 0x14 */
+    U8                                  Reserved3;              /* 0x1A */
+    U8                                  Reserved4;              /* 0x1B */
+    MPI2_ETHERNET_IP_ADDR               StaticIpAddress;        /* 0x1C */
+    MPI2_ETHERNET_IP_ADDR               StaticSubnetMask;       /* 0x2C */
+    MPI2_ETHERNET_IP_ADDR               StaticGatewayIpAddress; /* 0x3C */
+    MPI2_ETHERNET_IP_ADDR               StaticDNS1IpAddress;    /* 0x4C */
+    MPI2_ETHERNET_IP_ADDR               StaticDNS2IpAddress;    /* 0x5C */
+    U32                                 Reserved5;              /* 0x6C */
+    U32                                 Reserved6;              /* 0x70 */
+    U32                                 Reserved7;              /* 0x74 */
+    U32                                 Reserved8;              /* 0x78 */
+    U8                                  HostName[MPI2_ETHERNET_HOST_NAME_LENGTH];/* 0x7C */
+} MPI2_CONFIG_PAGE_ETHERNET_1, MPI2_POINTER PTR_MPI2_CONFIG_PAGE_ETHERNET_1,
+  Mpi2EthernetPage1_t, MPI2_POINTER pMpi2EthernetPage1_t;
+
+#define MPI2_ETHERNETPAGE1_PAGEVERSION   (0x00)
+
+/* values for Ethernet Page 1 Flags field */
+#define MPI2_ETHPG1_FLAG_SET_DEFAULT_IF             (0x00000100)
+#define MPI2_ETHPG1_FLAG_ENABLE_FW_DOWNLOAD         (0x00000080)
+#define MPI2_ETHPG1_FLAG_ENABLE_TELNET              (0x00000040)
+#define MPI2_ETHPG1_FLAG_ENABLE_SSH2                (0x00000020)
+#define MPI2_ETHPG1_FLAG_ENABLE_DHCP_CLIENT         (0x00000010)
+#define MPI2_ETHPG1_FLAG_ENABLE_IPV6                (0x00000008)
+#define MPI2_ETHPG1_FLAG_ENABLE_IPV4                (0x00000004)
+#define MPI2_ETHPG1_FLAG_USE_IPV6_ADDRESSES         (0x00000002)
+#define MPI2_ETHPG1_FLAG_ENABLE_ETH_IF              (0x00000001)
+
+/* values for Ethernet Page 1 MediaState field */
+#define MPI2_ETHPG1_MS_DUPLEX_MASK                  (0x80)
+#define MPI2_ETHPG1_MS_HALF_DUPLEX                  (0x00)
+#define MPI2_ETHPG1_MS_FULL_DUPLEX                  (0x80)
+
+#define MPI2_ETHPG1_MS_DATA_RATE_MASK               (0x07)
+#define MPI2_ETHPG1_MS_DATA_RATE_AUTO               (0x00)
+#define MPI2_ETHPG1_MS_DATA_RATE_10MBIT             (0x01)
+#define MPI2_ETHPG1_MS_DATA_RATE_100MBIT            (0x02)
+#define MPI2_ETHPG1_MS_DATA_RATE_1GBIT              (0x03)
+
+
+/****************************************************************************
+*   Extended Manufacturing Config Pages
+****************************************************************************/
+
+/*
+ * Generic structure to use for product-specific extended manufacturing pages
+ * (currently Extended Manufacturing Page 40 through Extended Manufacturing
+ * Page 60).
+ */
+
+typedef struct _MPI2_CONFIG_PAGE_EXT_MAN_PS
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                 /* 0x00 */
+    U32                                 ProductSpecificInfo;    /* 0x08 */
+} MPI2_CONFIG_PAGE_EXT_MAN_PS,
+  MPI2_POINTER PTR_MPI2_CONFIG_PAGE_EXT_MAN_PS,
+  Mpi2ExtManufacturingPagePS_t, MPI2_POINTER pMpi2ExtManufacturingPagePS_t;
+
+/* PageVersion should be provided by product-specific code */
+
+
+/****************************************************************************
+*   values for fields used by several types of PCIe Config Pages
+****************************************************************************/
+
+/* values for NegotiatedLinkRates fields */
+#define MPI26_PCIE_NEG_LINK_RATE_MASK_PHYSICAL          (0x0F)
+/* link rates used for Negotiated Physical Link Rate */
+#define MPI26_PCIE_NEG_LINK_RATE_UNKNOWN                (0x00)
+#define MPI26_PCIE_NEG_LINK_RATE_PHY_DISABLED           (0x01)
+#define MPI26_PCIE_NEG_LINK_RATE_2_5                    (0x02)
+#define MPI26_PCIE_NEG_LINK_RATE_5_0                    (0x03)
+#define MPI26_PCIE_NEG_LINK_RATE_8_0                    (0x04)
+#define MPI26_PCIE_NEG_LINK_RATE_16_0                   (0x05)
+
+
+/****************************************************************************
+*   PCIe IO Unit Config Pages (MPI v2.6 and later)
+****************************************************************************/
+
+/* PCIe IO Unit Page 0 */
+
+typedef struct _MPI26_PCIE_IO_UNIT0_PHY_DATA
+{
+    U8          Link;                   /* 0x00 */
+    U8          LinkFlags;              /* 0x01 */
+    U8          PhyFlags;               /* 0x02 */
+    U8          NegotiatedLinkRate;     /* 0x03 */
+    U32         ControllerPhyDeviceInfo;/* 0x04 */
+    U16         AttachedDevHandle;      /* 0x08 */
+    U16         ControllerDevHandle;    /* 0x0A */
+    U32         EnumerationStatus;      /* 0x0C */
+    U32         Reserved1;              /* 0x10 */
+} MPI26_PCIE_IO_UNIT0_PHY_DATA, MPI2_POINTER PTR_MPI26_PCIE_IO_UNIT0_PHY_DATA,
+  Mpi26PCIeIOUnit0PhyData_t, MPI2_POINTER pMpi26PCIeIOUnit0PhyData_t;
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumPhys at runtime.
+ */
+#ifndef MPI26_PCIE_IOUNIT0_PHY_MAX
+#define MPI26_PCIE_IOUNIT0_PHY_MAX      (1)
+#endif
+
+typedef struct _MPI26_CONFIG_PAGE_PIOUNIT_0
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                                 /* 0x00 */
+    U32                                 Reserved1;                              /* 0x08 */
+    U8                                  NumPhys;                                /* 0x0C */
+    U8                                  InitStatus;                             /* 0x0D */
+    U16                                 Reserved3;                              /* 0x0E */
+    MPI26_PCIE_IO_UNIT0_PHY_DATA        PhyData[MPI26_PCIE_IOUNIT0_PHY_MAX];    /* 0x10 */
+} MPI26_CONFIG_PAGE_PIOUNIT_0,
+  MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PIOUNIT_0,
+  Mpi26PCIeIOUnitPage0_t, MPI2_POINTER pMpi26PCIeIOUnitPage0_t;
+
+#define MPI26_PCIEIOUNITPAGE0_PAGEVERSION                   (0x00)
+
+/* values for PCIe IO Unit Page 0 LinkFlags */
+#define MPI26_PCIEIOUNIT0_LF_ENUMERATION_IN_PROGRESS        (0x08)
+#define MPI26_PCIEIOUNIT0_LF_CFG_SRC_MASK                   (0x01)
+#define MPI26_PCIEIOUNIT0_LF_CFG_SRC_PAGE1                  (0x00)
+#define MPI26_PCIEIOUNIT0_LF_CFG_SRC_BACKPLANE              (0x01)
+
+/* values for PCIe IO Unit Page 0 PhyFlags */
+#define MPI26_PCIEIOUNIT0_PHYFLAGS_PHY_DISABLED             (0x08)
+
+/* use MPI26_PCIE_NEG_LINK_RATE_ defines for the NegotiatedLinkRate field */
+
+/* see mpi2_pci.h for values for PCIe IO Unit Page 0 ControllerPhyDeviceInfo values */
+
+/* values for PCIe IO Unit Page 0 EnumerationStatus */
+#define MPI26_PCIEIOUNIT0_ES_MAX_SWITCHES_EXCEEDED          (0x40000000)
+#define MPI26_PCIEIOUNIT0_ES_MAX_DEVICES_EXCEEDED           (0x20000000)
+
+
+/* PCIe IO Unit Page 1 */
+
+typedef struct _MPI26_PCIE_IO_UNIT1_PHY_DATA
+{
+    U8          Link;                       /* 0x00 */
+    U8          LinkFlags;                  /* 0x01 */
+    U8          PhyFlags;                   /* 0x02 */
+    U8          MaxMinLinkRate;             /* 0x03 */
+    U32         ControllerPhyDeviceInfo;    /* 0x04 */
+    U32         Reserved1;                  /* 0x08 */
+} MPI26_PCIE_IO_UNIT1_PHY_DATA, MPI2_POINTER PTR_MPI26_PCIE_IO_UNIT1_PHY_DATA,
+  Mpi26PCIeIOUnit1PhyData_t, MPI2_POINTER pMpi26PCIeIOUnit1PhyData_t;
+
+/* values for LinkFlags */
+#define MPI26_PCIEIOUNIT1_LINKFLAGS_DIS_SEPARATE_REFCLK     (0x00)
+#define MPI26_PCIEIOUNIT1_LINKFLAGS_SRIS_EN                 (0x01)
+#define MPI26_PCIEIOUNIT1_LINKFLAGS_SRNS_EN                 (0x02)
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumPhys at runtime.
+ */
+#ifndef MPI26_PCIE_IOUNIT1_PHY_MAX
+#define MPI26_PCIE_IOUNIT1_PHY_MAX      (1)
+#endif
+
+typedef struct _MPI26_CONFIG_PAGE_PIOUNIT_1
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                             /* 0x00 */
+    U16                                 ControlFlags;                       /* 0x08 */
+    U16                                 Reserved;                           /* 0x0A */
+    U16                                 AdditionalControlFlags;             /* 0x0C */
+    U16                                 NVMeMaxQueueDepth;                  /* 0x0E */
+    U8                                  NumPhys;                            /* 0x10 */
+    U8                                  DMDReportPCIe;                      /* 0x11 */
+    U16                                 Reserved2;                          /* 0x12 */
+    MPI26_PCIE_IO_UNIT1_PHY_DATA        PhyData[MPI26_PCIE_IOUNIT1_PHY_MAX];/* 0x14 */
+} MPI26_CONFIG_PAGE_PIOUNIT_1,
+  MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PIOUNIT_1,
+  Mpi26PCIeIOUnitPage1_t, MPI2_POINTER pMpi26PCIeIOUnitPage1_t;
+
+#define MPI26_PCIEIOUNITPAGE1_PAGEVERSION   (0x00)
+
+/* values for PCIe IO Unit Page 1 ControlFlags */
+#define MPI26_PCIEIOUNIT1_CF_BP_CONFIG_DISABLE                      (0x0080)
+#define MPI26_PCIEIOUNIT1_CF_BP_AUTO_CLOCK_ENABLE                   (0x0040)
+#define MPI26_PCIEIOUNIT1_CF_BP_CLK_MASK                            (0x0030)
+#define MPI26_PCIEIOUNIT1_CF_BP_CLK_ALL_DIS                         (0x0000)
+#define MPI26_PCIEIOUNIT1_CF_BP_CLK_SRIS_EN                         (0x0010)
+#define MPI26_PCIEIOUNIT1_CF_BP_CLK_SRNS_EN                         (0x0020)
+#define MPI26_PCIEIOUNIT1_CF_BP_RATE_MASK                           (0x000F)
+#define MPI26_PCIEIOUNIT1_CF_BP_RATE_DISABLE                        (0x0000)
+#define MPI26_PCIEIOUNIT1_CF_BP_RATE_2_5_MAX                        (0x0002)
+#define MPI26_PCIEIOUNIT1_CF_BP_RATE_5_0_MAX                        (0x0003)
+#define MPI26_PCIEIOUNIT1_CF_BP_RATE_8_0_MAX                        (0x0004)
+#define MPI26_PCIEIOUNIT1_CF_BP_RATE_16_0_MAX                       (0x0005)
+
+
+/* values for PCIe IO Unit Page 1 PhyFlags */
+#define MPI26_PCIEIOUNIT1_PHYFLAGS_PHY_DISABLE                      (0x08)
+#define MPI26_PCIEIOUNIT1_PHYFLAGS_ENDPOINT_ONLY                    (0x01)
+
+/* values for PCIe IO Unit Page 1 MaxMinLinkRate */
+#define MPI26_PCIEIOUNIT1_MAX_RATE_MASK                             (0xF0)
+#define MPI26_PCIEIOUNIT1_MAX_RATE_SHIFT                            (4)
+#define MPI26_PCIEIOUNIT1_MAX_RATE_2_5                              (0x20)
+#define MPI26_PCIEIOUNIT1_MAX_RATE_5_0                              (0x30)
+#define MPI26_PCIEIOUNIT1_MAX_RATE_8_0                              (0x40)
+#define MPI26_PCIEIOUNIT1_MAX_RATE_16_0                             (0x50)
+
+/* values for PCIe IO Unit Page 1 DMDReportPCIe */
+#define MPI26_PCIEIOUNIT1_DMDRPT_UNIT_MASK                          (0x80)
+#define MPI26_PCIEIOUNIT1_DMDRPT_UNIT_1_SEC                         (0x00)
+#define MPI26_PCIEIOUNIT1_DMDRPT_UNIT_16_SEC                        (0x80)
+#define MPI26_PCIEIOUNIT1_DMDRPT_DELAY_TIME_MASK                    (0x7F)
+
+
+/* see mpi2_pci.h for values for PCIe IO Unit Page 0 ControllerPhyDeviceInfo values */
+
+
+/****************************************************************************
+*   PCIe Switch Config Pages (MPI v2.6 and later)
+****************************************************************************/
+
+/* PCIe Switch Page 0 */
+
+typedef struct _MPI26_CONFIG_PAGE_PSWITCH_0
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
+    U8                                  PhysicalPort;               /* 0x08 */
+    U8                                  Reserved1;                  /* 0x09 */
+    U16                                 Reserved2;                  /* 0x0A */
+    U16                                 DevHandle;                  /* 0x0C */
+    U16                                 ParentDevHandle;            /* 0x0E */
+    U8                                  NumPorts;                   /* 0x10 */
+    U8                                  PCIeLevel;                  /* 0x11 */
+    U16                                 Reserved3;                  /* 0x12 */
+    U32                                 Reserved4;                  /* 0x14 */
+    U32                                 Reserved5;                  /* 0x18 */
+    U32                                 Reserved6;                  /* 0x1C */
+} MPI26_CONFIG_PAGE_PSWITCH_0, MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PSWITCH_0,
+  Mpi26PCIeSwitchPage0_t, MPI2_POINTER pMpi26PCIeSwitchPage0_t;
+
+#define MPI26_PCIESWITCH0_PAGEVERSION       (0x00)
+
+
+/* PCIe Switch Page 1 */
+
+typedef struct _MPI26_CONFIG_PAGE_PSWITCH_1
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
+    U8                                  PhysicalPort;               /* 0x08 */
+    U8                                  Reserved1;                  /* 0x09 */
+    U16                                 Reserved2;                  /* 0x0A */
+    U8                                  NumPorts;                   /* 0x0C */
+    U8                                  PortNum;                    /* 0x0D */
+    U16                                 AttachedDevHandle;          /* 0x0E */
+    U16                                 SwitchDevHandle;            /* 0x10 */
+    U8                                  NegotiatedPortWidth;        /* 0x12 */
+    U8                                  NegotiatedLinkRate;         /* 0x13 */
+    U16                                 Flags;                      /* 0x14 */
+    U16                                 Reserved4;                  /* 0x16 */
+    U32                                 Reserved5;                  /* 0x18 */
+} MPI26_CONFIG_PAGE_PSWITCH_1, MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PSWITCH_1,
+  Mpi26PCIeSwitchPage1_t, MPI2_POINTER pMpi26PCIeSwitchPage1_t;
+
+#define MPI26_PCIESWITCH1_PAGEVERSION                (0x00)
+
+/* use MPI26_PCIE_NEG_LINK_RATE_ defines for the NegotiatedLinkRate field */
+
+/* defines for the Flags field */
+#define MPI26_PCIESWITCH1_2_RETIMER_PRESENCE         (0x0002)
+#define MPI26_PCIESWITCH1_RETIMER_PRESENCE           (0x0001)
+
+
+
+/****************************************************************************
+*   PCIe Device Config Pages (MPI v2.6 and later)
+****************************************************************************/
+
+/* PCIe Device Page 0 */
+
+typedef struct _MPI26_CONFIG_PAGE_PCIEDEV_0
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                 /* 0x00 */
+    U16                                 Slot;                   /* 0x08 */
+    U16                                 EnclosureHandle;        /* 0x0A */
+    U64                                 WWID;                   /* 0x0C */
+    U16                                 ParentDevHandle;        /* 0x14 */
+    U8                                  PortNum;                /* 0x16 */
+    U8                                  AccessStatus;           /* 0x17 */
+    U16                                 DevHandle;              /* 0x18 */
+    U8                                  PhysicalPort;           /* 0x1A */
+    U8                                  Reserved1;              /* 0x1B */
+    U32                                 DeviceInfo;             /* 0x1C */
+    U32                                 Flags;                  /* 0x20 */
+    U8                                  SupportedLinkRates;     /* 0x24 */
+    U8                                  MaxPortWidth;           /* 0x25 */
+    U8                                  NegotiatedPortWidth;    /* 0x26 */
+    U8                                  NegotiatedLinkRate;     /* 0x27 */
+    U8                                  EnclosureLevel;         /* 0x28 */
+    U8                                  Reserved2;              /* 0x29 */
+    U16                                 Reserved3;              /* 0x2A */
+    U8                                  ConnectorName[4];       /* 0x2C */
+    U32                                 Reserved4;              /* 0x30 */
+    U32                                 Reserved5;              /* 0x34 */
+} MPI26_CONFIG_PAGE_PCIEDEV_0, MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PCIEDEV_0,
+  Mpi26PCIeDevicePage0_t, MPI2_POINTER pMpi26PCIeDevicePage0_t;
+
+#define MPI26_PCIEDEVICE0_PAGEVERSION       (0x01)
+
+/* values for PCIe Device Page 0 AccessStatus field */
+#define MPI26_PCIEDEV0_ASTATUS_NO_ERRORS                    (0x00)
+#define MPI26_PCIEDEV0_ASTATUS_NEEDS_INITIALIZATION         (0x04)
+#define MPI26_PCIEDEV0_ASTATUS_CAPABILITY_FAILED            (0x02)
+#define MPI26_PCIEDEV0_ASTATUS_DEVICE_BLOCKED               (0x07)
+#define MPI26_PCIEDEV0_ASTATUS_MEMORY_SPACE_ACCESS_FAILED   (0x08)
+#define MPI26_PCIEDEV0_ASTATUS_UNSUPPORTED_DEVICE           (0x09)
+#define MPI26_PCIEDEV0_ASTATUS_MSIX_REQUIRED                (0x0A)
+#define MPI26_PCIEDEV0_ASTATUS_UNKNOWN                      (0x10)
+
+#define MPI26_PCIEDEV0_ASTATUS_NVME_READY_TIMEOUT           (0x30)
+#define MPI26_PCIEDEV0_ASTATUS_NVME_DEVCFG_UNSUPPORTED      (0x31)
+#define MPI26_PCIEDEV0_ASTATUS_NVME_IDENTIFY_FAILED         (0x32)
+#define MPI26_PCIEDEV0_ASTATUS_NVME_QCONFIG_FAILED          (0x33)
+#define MPI26_PCIEDEV0_ASTATUS_NVME_QCREATION_FAILED        (0x34)
+#define MPI26_PCIEDEV0_ASTATUS_NVME_EVENTCFG_FAILED         (0x35)
+#define MPI26_PCIEDEV0_ASTATUS_NVME_GET_FEATURE_STAT_FAILED (0x36)
+#define MPI26_PCIEDEV0_ASTATUS_NVME_IDLE_TIMEOUT            (0x37)
+#define MPI26_PCIEDEV0_ASTATUS_NVME_FAILURE_STATUS          (0x38)
+
+#define MPI26_PCIEDEV0_ASTATUS_INIT_FAIL_MAX                (0x3F)
+
+/* see mpi2_pci.h for the MPI26_PCIE_DEVINFO_ defines used for the DeviceInfo field */
+
+/* values for PCIe Device Page 0 Flags field */
+#define MPI26_PCIEDEV0_FLAGS_2_RETIMER_PRESENCE             (0x00020000)
+#define MPI26_PCIEDEV0_FLAGS_RETIMER_PRESENCE               (0x00010000)
+#define MPI26_PCIEDEV0_FLAGS_UNAUTHORIZED_DEVICE            (0x00008000)
+#define MPI26_PCIEDEV0_FLAGS_ENABLED_FAST_PATH              (0x00004000)
+#define MPI26_PCIEDEV0_FLAGS_FAST_PATH_CAPABLE              (0x00002000)
+#define MPI26_PCIEDEV0_FLAGS_ASYNCHRONOUS_NOTIFICATION      (0x00000400)
+#define MPI26_PCIEDEV0_FLAGS_ATA_SW_PRESERVATION            (0x00000200)
+#define MPI26_PCIEDEV0_FLAGS_UNSUPPORTED_DEVICE             (0x00000100)
+#define MPI26_PCIEDEV0_FLAGS_ATA_48BIT_LBA_SUPPORTED        (0x00000080)
+#define MPI26_PCIEDEV0_FLAGS_ATA_SMART_SUPPORTED            (0x00000040)
+#define MPI26_PCIEDEV0_FLAGS_ATA_NCQ_SUPPORTED              (0x00000020)
+#define MPI26_PCIEDEV0_FLAGS_ATA_FUA_SUPPORTED              (0x00000010)
+#define MPI26_PCIEDEV0_FLAGS_ENCL_LEVEL_VALID               (0x00000002)
+#define MPI26_PCIEDEV0_FLAGS_DEVICE_PRESENT                 (0x00000001)
+
+/* values for PCIe Device Page 0 SupportedLinkRates field */
+#define MPI26_PCIEDEV0_LINK_RATE_16_0_SUPPORTED             (0x08)
+#define MPI26_PCIEDEV0_LINK_RATE_8_0_SUPPORTED              (0x04)
+#define MPI26_PCIEDEV0_LINK_RATE_5_0_SUPPORTED              (0x02)
+#define MPI26_PCIEDEV0_LINK_RATE_2_5_SUPPORTED              (0x01)
+
+/* use MPI26_PCIE_NEG_LINK_RATE_ defines for the NegotiatedLinkRate field */
+
+
+/* PCIe Device Page 2 */
+
+typedef struct _MPI26_CONFIG_PAGE_PCIEDEV_2
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                 /* 0x00 */
+    U16                                 DevHandle;              /* 0x08 */
+    U8                                  ControllerResetTO;      /* 0x0A */
+    U8                                  Reserved1;              /* 0x0B */
+    U32                                 MaximumDataTransferSize;/* 0x0C */
+    U32                                 Capabilities;           /* 0x10 */
+    U16                                 NOIOB;                  /* 0x14 */
+    U16                                 ShutdownLatency;        /* 0x16 */
+    U16                                 VendorID;               /* 0x18 */
+    U16                                 DeviceID;               /* 0x1A */
+    U16                                 SubsystemVendorID;      /* 0x1C */
+    U16                                 SubsystemID;            /* 0x1E */
+    U8                                  RevisionID;             /* 0x20 */
+    U8                                  Reserved21[3];          /* 0x21 */
+} MPI26_CONFIG_PAGE_PCIEDEV_2, MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PCIEDEV_2,
+  Mpi26PCIeDevicePage2_t, MPI2_POINTER pMpi26PCIeDevicePage2_t;
+
+#define MPI26_PCIEDEVICE2_PAGEVERSION       (0x01)
+
+/* defines for PCIe Device Page 2 Capabilities field */
+#define MPI26_PCIEDEV2_CAP_DATA_BLK_ALIGN_AND_GRAN      (0x00000008)
+#define MPI26_PCIEDEV2_CAP_SGL_FORMAT                   (0x00000004)
+#define MPI26_PCIEDEV2_CAP_BIT_BUCKET_SUPPORT           (0x00000002)
+#define MPI26_PCIEDEV2_CAP_SGL_SUPPORT                  (0x00000001)
+
+/* Defines for the NOIOB field */
+#define MPI26_PCIEDEV2_NOIOB_UNSUPPORTED                (0x0000)
+
+
+/****************************************************************************
+*   PCIe Link Config Pages (MPI v2.6 and later)
+****************************************************************************/
+
+/* PCIe Link Page 1 */
+
+typedef struct _MPI26_CONFIG_PAGE_PCIELINK_1
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
+    U8                                  Link;                       /* 0x08 */
+    U8                                  Reserved1;                  /* 0x09 */
+    U16                                 Reserved2;                  /* 0x0A */
+    U32                                 CorrectableErrorCount;      /* 0x0C */
+    U16                                 NonFatalErrorCount;         /* 0x10 */
+    U16                                 Reserved3;                  /* 0x12 */
+    U16                                 FatalErrorCount;            /* 0x14 */
+    U16                                 Reserved4;                  /* 0x16 */
+} MPI26_CONFIG_PAGE_PCIELINK_1, MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PCIELINK_1,
+  Mpi26PcieLinkPage1_t, MPI2_POINTER pMpi26PcieLinkPage1_t;
+
+#define MPI26_PCIELINK1_PAGEVERSION            (0x00)
+
+/* PCIe Link Page 2 */
+
+typedef struct _MPI26_PCIELINK2_LINK_EVENT
+{
+    U8          LinkEventCode;      /* 0x00 */
+    U8          Reserved1;          /* 0x01 */
+    U16         Reserved2;          /* 0x02 */
+    U32         LinkEventInfo;      /* 0x04 */
+} MPI26_PCIELINK2_LINK_EVENT, MPI2_POINTER PTR_MPI26_PCIELINK2_LINK_EVENT,
+  Mpi26PcieLink2LinkEvent_t, MPI2_POINTER pMpi26PcieLink2LinkEvent_t;
+
+/* use MPI26_PCIELINK3_EVTCODE_ for the LinkEventCode field */
+
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumLinkEvents at runtime.
+ */
+#ifndef MPI26_PCIELINK2_LINK_EVENT_MAX
+#define MPI26_PCIELINK2_LINK_EVENT_MAX      (1)
+#endif
+
+typedef struct _MPI26_CONFIG_PAGE_PCIELINK_2
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
+    U8                                  Link;                       /* 0x08 */
+    U8                                  Reserved1;                  /* 0x09 */
+    U16                                 Reserved2;                  /* 0x0A */
+    U8                                  NumLinkEvents;              /* 0x0C */
+    U8                                  Reserved3;                  /* 0x0D */
+    U16                                 Reserved4;                  /* 0x0E */
+    MPI26_PCIELINK2_LINK_EVENT          LinkEvent[MPI26_PCIELINK2_LINK_EVENT_MAX]; /* 0x10 */
+} MPI26_CONFIG_PAGE_PCIELINK_2, MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PCIELINK_2,
+  Mpi26PcieLinkPage2_t, MPI2_POINTER pMpi26PcieLinkPage2_t;
+
+#define MPI26_PCIELINK2_PAGEVERSION            (0x00)
+
+
+/* PCIe Link Page 3 */
+
+typedef struct _MPI26_PCIELINK3_LINK_EVENT_CONFIG
+{
+    U8          LinkEventCode;      /* 0x00 */
+    U8          Reserved1;          /* 0x01 */
+    U16         Reserved2;          /* 0x02 */
+    U8          CounterType;        /* 0x04 */
+    U8          ThresholdWindow;    /* 0x05 */
+    U8          TimeUnits;          /* 0x06 */
+    U8          Reserved3;          /* 0x07 */
+    U32         EventThreshold;     /* 0x08 */
+    U16         ThresholdFlags;     /* 0x0C */
+    U16         Reserved4;          /* 0x0E */
+} MPI26_PCIELINK3_LINK_EVENT_CONFIG, MPI2_POINTER PTR_MPI26_PCIELINK3_LINK_EVENT_CONFIG,
+  Mpi26PcieLink3LinkEventConfig_t, MPI2_POINTER pMpi26PcieLink3LinkEventConfig_t;
+
+/* values for LinkEventCode field */
+#define MPI26_PCIELINK3_EVTCODE_NO_EVENT                              (0x00)
+#define MPI26_PCIELINK3_EVTCODE_CORRECTABLE_ERROR_RECEIVED            (0x01)
+#define MPI26_PCIELINK3_EVTCODE_NON_FATAL_ERROR_RECEIVED              (0x02)
+#define MPI26_PCIELINK3_EVTCODE_FATAL_ERROR_RECEIVED                  (0x03)
+#define MPI26_PCIELINK3_EVTCODE_DATA_LINK_ERROR_DETECTED              (0x04)
+#define MPI26_PCIELINK3_EVTCODE_TRANSACTION_LAYER_ERROR_DETECTED      (0x05)
+#define MPI26_PCIELINK3_EVTCODE_TLP_ECRC_ERROR_DETECTED               (0x06)
+#define MPI26_PCIELINK3_EVTCODE_POISONED_TLP                          (0x07)
+#define MPI26_PCIELINK3_EVTCODE_RECEIVED_NAK_DLLP                     (0x08)
+#define MPI26_PCIELINK3_EVTCODE_SENT_NAK_DLLP                         (0x09)
+#define MPI26_PCIELINK3_EVTCODE_LTSSM_RECOVERY_STATE                  (0x0A)
+#define MPI26_PCIELINK3_EVTCODE_LTSSM_RXL0S_STATE                     (0x0B)
+#define MPI26_PCIELINK3_EVTCODE_LTSSM_TXL0S_STATE                     (0x0C)
+#define MPI26_PCIELINK3_EVTCODE_LTSSM_L1_STATE                        (0x0D)
+#define MPI26_PCIELINK3_EVTCODE_LTSSM_DISABLED_STATE                  (0x0E)
+#define MPI26_PCIELINK3_EVTCODE_LTSSM_HOT_RESET_STATE                 (0x0F)
+#define MPI26_PCIELINK3_EVTCODE_SYSTEM_ERROR                          (0x10)
+#define MPI26_PCIELINK3_EVTCODE_DECODE_ERROR                          (0x11)
+#define MPI26_PCIELINK3_EVTCODE_DISPARITY_ERROR                       (0x12)
+
+/* values for the CounterType field */
+#define MPI26_PCIELINK3_COUNTER_TYPE_WRAPPING               (0x00)
+#define MPI26_PCIELINK3_COUNTER_TYPE_SATURATING             (0x01)
+#define MPI26_PCIELINK3_COUNTER_TYPE_PEAK_VALUE             (0x02)
+
+/* values for the TimeUnits field */
+#define MPI26_PCIELINK3_TM_UNITS_10_MICROSECONDS            (0x00)
+#define MPI26_PCIELINK3_TM_UNITS_100_MICROSECONDS           (0x01)
+#define MPI26_PCIELINK3_TM_UNITS_1_MILLISECOND              (0x02)
+#define MPI26_PCIELINK3_TM_UNITS_10_MILLISECONDS            (0x03)
+
+/* values for the ThresholdFlags field */
+#define MPI26_PCIELINK3_TFLAGS_EVENT_NOTIFY                 (0x0001)
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check the value returned for NumLinkEvents at runtime.
+ */
+#ifndef MPI26_PCIELINK3_LINK_EVENT_MAX
+#define MPI26_PCIELINK3_LINK_EVENT_MAX      (1)
+#endif
+
+typedef struct _MPI26_CONFIG_PAGE_PCIELINK_3
+{
+    MPI2_CONFIG_EXTENDED_PAGE_HEADER    Header;                     /* 0x00 */
+    U8                                  Link;                       /* 0x08 */
+    U8                                  Reserved1;                  /* 0x09 */
+    U16                                 Reserved2;                  /* 0x0A */
+    U8                                  NumLinkEvents;              /* 0x0C */
+    U8                                  Reserved3;                  /* 0x0D */
+    U16                                 Reserved4;                  /* 0x0E */
+    MPI26_PCIELINK3_LINK_EVENT_CONFIG   LinkEventConfig[MPI26_PCIELINK3_LINK_EVENT_MAX]; /* 0x10 */
+} MPI26_CONFIG_PAGE_PCIELINK_3, MPI2_POINTER PTR_MPI26_CONFIG_PAGE_PCIELINK_3,
+  Mpi26PcieLinkPage3_t, MPI2_POINTER pMpi26PcieLinkPage3_t;
+
+#define MPI26_PCIELINK3_PAGEVERSION            (0x00)
+
+
+#endif
+
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_hbd.h b/drivers/scsi/mpt3sas/mpi/mpi2_hbd.h
old mode 100755
new mode 100644
index 48d823965f9cb6e25feef355fe83c29d84a68c2f..8d4af26144fbbcbcd5fafbbf8d3505fa76b06395
--- a/drivers/scsi/mpt3sas/mpi/mpi2_hbd.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_hbd.h
@@ -1,122 +1,122 @@
-/*
- *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
- *
- *
- *           Name:  mpi2_hbd.h
- *          Title:  MPI Host Based Discovery messages and structures
- *  Creation Date:  October 21, 2009
- *
- *  mpi2_hbd.h Version:  02.00.04
- *
- *  NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
- *        prefix are for use only on MPI v2.5 products, and must not be used
- *        with MPI v2.0 products. Unless otherwise noted, names beginning with
- *        MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products.
- *
- *  Version History
- *  ---------------
- *
- *  Date      Version   Description
- *  --------  --------  ------------------------------------------------------
- *  10-28-09  02.00.00  Initial version.
- *  08-11-10  02.00.01  Removed PortGroups, DmaGroup, and ControlGroup from
- *                      HBD Action request, replaced by AdditionalInfo field.
- *  11-18-11  02.00.02  Incorporating additions for MPI v2.5.
- *  11-18-14  02.00.03  Updated copyright information.
- *  02-17-16  02.00.04  Added SAS 4 22.5 gbs speed support.
- *  --------------------------------------------------------------------------
- */
-
-#ifndef MPI2_HBD_H
-#define MPI2_HBD_H
-
-/****************************************************************************
-*  Host Based Discovery Action messages
-****************************************************************************/
-
-/* Host Based Discovery Action Request Message */
-typedef struct _MPI2_HBD_ACTION_REQUEST
-{
-    U8                      Operation;          /* 0x00 */
-    U8                      Reserved1;          /* 0x01 */
-    U8                      ChainOffset;        /* 0x02 */
-    U8                      Function;           /* 0x03 */
-    U16                     DevHandle;          /* 0x04 */
-    U8                      Reserved2;          /* 0x06 */
-    U8                      MsgFlags;           /* 0x07 */
-    U8                      VP_ID;              /* 0x08 */
-    U8                      VF_ID;              /* 0x09 */
-    U16                     Reserved3;          /* 0x0A */
-    U32                     Reserved4;          /* 0x0C */
-    U64                     SASAddress;         /* 0x10 */
-    U32                     Reserved5;          /* 0x18 */
-    U32                     HbdDeviceInfo;      /* 0x1C */
-    U16                     ParentDevHandle;    /* 0x20 */
-    U16                     MaxQDepth;          /* 0x22 */
-    U8                      FirstPhyIdentifier; /* 0x24 */
-    U8                      Port;               /* 0x25 */
-    U8                      MaxConnections;     /* 0x26 */
-    U8                      MaxRate;            /* 0x27 */
-    U32                     AdditionalInfo;     /* 0x28 */
-    U16                     InitialAWT;         /* 0x2C */
-    U16                     Reserved7;          /* 0x2E */
-    U32                     Reserved8;          /* 0x30 */
-} MPI2_HBD_ACTION_REQUEST, MPI2_POINTER PTR_MPI2_HBD_ACTION_REQUEST,
-  Mpi2HbdActionRequest_t, MPI2_POINTER pMpi2HbdActionRequest_t;
-
-/* values for the Operation field */
-#define MPI2_HBD_OP_ADD_DEVICE                  (0x01)
-#define MPI2_HBD_OP_REMOVE_DEVICE               (0x02)
-#define MPI2_HBD_OP_UPDATE_DEVICE               (0x03)
-
-/* values for the HbdDeviceInfo field */
-#define MPI2_HBD_DEVICE_INFO_VIRTUAL_DEVICE     (0x00004000)
-#define MPI2_HBD_DEVICE_INFO_ATAPI_DEVICE       (0x00002000)
-#define MPI2_HBD_DEVICE_INFO_DIRECT_ATTACH      (0x00000800)
-#define MPI2_HBD_DEVICE_INFO_SSP_TARGET         (0x00000400)
-#define MPI2_HBD_DEVICE_INFO_STP_TARGET         (0x00000200)
-#define MPI2_HBD_DEVICE_INFO_SMP_TARGET         (0x00000100)
-#define MPI2_HBD_DEVICE_INFO_SATA_DEVICE        (0x00000080)
-#define MPI2_HBD_DEVICE_INFO_SSP_INITIATOR      (0x00000040)
-#define MPI2_HBD_DEVICE_INFO_STP_INITIATOR      (0x00000020)
-#define MPI2_HBD_DEVICE_INFO_SMP_INITIATOR      (0x00000010)
-#define MPI2_HBD_DEVICE_INFO_SATA_HOST          (0x00000008)
-
-#define MPI2_HBD_DEVICE_INFO_MASK_DEVICE_TYPE   (0x00000007)
-#define MPI2_HBD_DEVICE_INFO_NO_DEVICE          (0x00000000)
-#define MPI2_HBD_DEVICE_INFO_END_DEVICE         (0x00000001)
-#define MPI2_HBD_DEVICE_INFO_EDGE_EXPANDER      (0x00000002)
-#define MPI2_HBD_DEVICE_INFO_FANOUT_EXPANDER    (0x00000003)
-
-/* values for the MaxRate field */
-#define MPI2_HBD_MAX_RATE_MASK                  (0x0F)
-#define MPI2_HBD_MAX_RATE_1_5                   (0x08)
-#define MPI2_HBD_MAX_RATE_3_0                   (0x09)
-#define MPI2_HBD_MAX_RATE_6_0                   (0x0A)
-#define MPI25_HBD_MAX_RATE_12_0                 (0x0B)
-#define MPI26_HBD_MAX_RATE_22_5                 (0x0C)
-
-
-/* Host Based Discovery Action Reply Message */
-typedef struct _MPI2_HBD_ACTION_REPLY
-{
-    U8                      Operation;          /* 0x00 */
-    U8                      Reserved1;          /* 0x01 */
-    U8                      MsgLength;          /* 0x02 */
-    U8                      Function;           /* 0x03 */
-    U16                     DevHandle;          /* 0x04 */
-    U8                      Reserved2;          /* 0x06 */
-    U8                      MsgFlags;           /* 0x07 */
-    U8                      VP_ID;              /* 0x08 */
-    U8                      VF_ID;              /* 0x09 */
-    U16                     Reserved3;          /* 0x0A */
-    U16                     Reserved4;          /* 0x0C */
-    U16                     IOCStatus;          /* 0x0E */
-    U32                     IOCLogInfo;         /* 0x10 */
-} MPI2_HBD_ACTION_REPLY, MPI2_POINTER PTR_MPI2_HBD_ACTION_REPLY,
-  Mpi2HbdActionReply_t, MPI2_POINTER pMpi2HbdActionReply_t;
-
-
-#endif
-
-
+/*
+ *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
+ *
+ *
+ *           Name:  mpi2_hbd.h
+ *          Title:  MPI Host Based Discovery messages and structures
+ *  Creation Date:  October 21, 2009
+ *
+ *  mpi2_hbd.h Version:  02.00.04
+ *
+ *  NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
+ *        prefix are for use only on MPI v2.5 products, and must not be used
+ *        with MPI v2.0 products. Unless otherwise noted, names beginning with
+ *        MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products.
+ *
+ *  Version History
+ *  ---------------
+ *
+ *  Date      Version   Description
+ *  --------  --------  ------------------------------------------------------
+ *  10-28-09  02.00.00  Initial version.
+ *  08-11-10  02.00.01  Removed PortGroups, DmaGroup, and ControlGroup from
+ *                      HBD Action request, replaced by AdditionalInfo field.
+ *  11-18-11  02.00.02  Incorporating additions for MPI v2.5.
+ *  11-18-14  02.00.03  Updated copyright information.
+ *  02-17-16  02.00.04  Added SAS 4 22.5 gbs speed support.
+ *  --------------------------------------------------------------------------
+ */
+
+#ifndef MPI2_HBD_H
+#define MPI2_HBD_H
+
+/****************************************************************************
+*  Host Based Discovery Action messages
+****************************************************************************/
+
+/* Host Based Discovery Action Request Message */
+typedef struct _MPI2_HBD_ACTION_REQUEST
+{
+    U8                      Operation;          /* 0x00 */
+    U8                      Reserved1;          /* 0x01 */
+    U8                      ChainOffset;        /* 0x02 */
+    U8                      Function;           /* 0x03 */
+    U16                     DevHandle;          /* 0x04 */
+    U8                      Reserved2;          /* 0x06 */
+    U8                      MsgFlags;           /* 0x07 */
+    U8                      VP_ID;              /* 0x08 */
+    U8                      VF_ID;              /* 0x09 */
+    U16                     Reserved3;          /* 0x0A */
+    U32                     Reserved4;          /* 0x0C */
+    U64                     SASAddress;         /* 0x10 */
+    U32                     Reserved5;          /* 0x18 */
+    U32                     HbdDeviceInfo;      /* 0x1C */
+    U16                     ParentDevHandle;    /* 0x20 */
+    U16                     MaxQDepth;          /* 0x22 */
+    U8                      FirstPhyIdentifier; /* 0x24 */
+    U8                      Port;               /* 0x25 */
+    U8                      MaxConnections;     /* 0x26 */
+    U8                      MaxRate;            /* 0x27 */
+    U32                     AdditionalInfo;     /* 0x28 */
+    U16                     InitialAWT;         /* 0x2C */
+    U16                     Reserved7;          /* 0x2E */
+    U32                     Reserved8;          /* 0x30 */
+} MPI2_HBD_ACTION_REQUEST, MPI2_POINTER PTR_MPI2_HBD_ACTION_REQUEST,
+  Mpi2HbdActionRequest_t, MPI2_POINTER pMpi2HbdActionRequest_t;
+
+/* values for the Operation field */
+#define MPI2_HBD_OP_ADD_DEVICE                  (0x01)
+#define MPI2_HBD_OP_REMOVE_DEVICE               (0x02)
+#define MPI2_HBD_OP_UPDATE_DEVICE               (0x03)
+
+/* values for the HbdDeviceInfo field */
+#define MPI2_HBD_DEVICE_INFO_VIRTUAL_DEVICE     (0x00004000)
+#define MPI2_HBD_DEVICE_INFO_ATAPI_DEVICE       (0x00002000)
+#define MPI2_HBD_DEVICE_INFO_DIRECT_ATTACH      (0x00000800)
+#define MPI2_HBD_DEVICE_INFO_SSP_TARGET         (0x00000400)
+#define MPI2_HBD_DEVICE_INFO_STP_TARGET         (0x00000200)
+#define MPI2_HBD_DEVICE_INFO_SMP_TARGET         (0x00000100)
+#define MPI2_HBD_DEVICE_INFO_SATA_DEVICE        (0x00000080)
+#define MPI2_HBD_DEVICE_INFO_SSP_INITIATOR      (0x00000040)
+#define MPI2_HBD_DEVICE_INFO_STP_INITIATOR      (0x00000020)
+#define MPI2_HBD_DEVICE_INFO_SMP_INITIATOR      (0x00000010)
+#define MPI2_HBD_DEVICE_INFO_SATA_HOST          (0x00000008)
+
+#define MPI2_HBD_DEVICE_INFO_MASK_DEVICE_TYPE   (0x00000007)
+#define MPI2_HBD_DEVICE_INFO_NO_DEVICE          (0x00000000)
+#define MPI2_HBD_DEVICE_INFO_END_DEVICE         (0x00000001)
+#define MPI2_HBD_DEVICE_INFO_EDGE_EXPANDER      (0x00000002)
+#define MPI2_HBD_DEVICE_INFO_FANOUT_EXPANDER    (0x00000003)
+
+/* values for the MaxRate field */
+#define MPI2_HBD_MAX_RATE_MASK                  (0x0F)
+#define MPI2_HBD_MAX_RATE_1_5                   (0x08)
+#define MPI2_HBD_MAX_RATE_3_0                   (0x09)
+#define MPI2_HBD_MAX_RATE_6_0                   (0x0A)
+#define MPI25_HBD_MAX_RATE_12_0                 (0x0B)
+#define MPI26_HBD_MAX_RATE_22_5                 (0x0C)
+
+
+/* Host Based Discovery Action Reply Message */
+typedef struct _MPI2_HBD_ACTION_REPLY
+{
+    U8                      Operation;          /* 0x00 */
+    U8                      Reserved1;          /* 0x01 */
+    U8                      MsgLength;          /* 0x02 */
+    U8                      Function;           /* 0x03 */
+    U16                     DevHandle;          /* 0x04 */
+    U8                      Reserved2;          /* 0x06 */
+    U8                      MsgFlags;           /* 0x07 */
+    U8                      VP_ID;              /* 0x08 */
+    U8                      VF_ID;              /* 0x09 */
+    U16                     Reserved3;          /* 0x0A */
+    U16                     Reserved4;          /* 0x0C */
+    U16                     IOCStatus;          /* 0x0E */
+    U32                     IOCLogInfo;         /* 0x10 */
+} MPI2_HBD_ACTION_REPLY, MPI2_POINTER PTR_MPI2_HBD_ACTION_REPLY,
+  Mpi2HbdActionReply_t, MPI2_POINTER pMpi2HbdActionReply_t;
+
+
+#endif
+
+
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_history.txt b/drivers/scsi/mpt3sas/mpi/mpi2_history.txt
old mode 100755
new mode 100644
index 38d339c357b635b57896cfcffe6eb95abf8c7006..3609b5afa91ca9ff28a355698a401611cd9e1b58
--- a/drivers/scsi/mpt3sas/mpi/mpi2_history.txt
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_history.txt
@@ -1,796 +1,796 @@
- ==============================
- Fusion-MPT MPI 2.0 / 2.5 Header File Change History
- ==============================
-
- Copyright 2000-2015 Avago Technologies.  All rights reserved.
-
- ---------------------------------------
- Header Set Release Version:    02.00.50
- Header Set Release Date:       09-29-17
- ---------------------------------------
-
- Filename               Current version     Prior version
- ----------             ---------------     -------------
- mpi2.h                 02.00.50            02.00.49
- mpi2_cnfg.h            02.00.42            02.00.41
- mpi2_init.h            02.00.21            02.00.21
- mpi2_ioc.h             02.00.34            02.00.33
- mpi2_raid.h            02.00.11            02.00.11
- mpi2_sas.h             02.00.10            02.00.10
- mpi2_targ.h            02.00.09            02.00.09
- mpi2_tool.h            02.00.14            02.00.14
- mpi2_type.h            02.00.01            02.00.01
- mpi2_ra.h              02.00.01            02.00.01
- mpi2_hbd.h             02.00.04            02.00.04
- mpi2_pci.h             02.00.02            02.00.02
- mpi2_history.txt       02.00.46            02.00.45
-
-
- *  Date      Version   Description
- *  --------  --------  ------------------------------------------------------
-
-mpi2.h
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  06-04-07  02.00.01  Bumped MPI2_HEADER_VERSION_UNIT.
- *  06-26-07  02.00.02  Bumped MPI2_HEADER_VERSION_UNIT.
- *  08-31-07  02.00.03  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Moved ReplyPostHostIndex register to offset 0x6C of the
- *                      MPI2_SYSTEM_INTERFACE_REGS and modified the define for
- *                      MPI2_REPLY_POST_HOST_INDEX_OFFSET.
- *                      Added union of request descriptors.
- *                      Added union of reply descriptors.
- *  10-31-07  02.00.04  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added define for MPI2_VERSION_02_00.
- *                      Fixed the size of the FunctionDependent5 field in the
- *                      MPI2_DEFAULT_REPLY structure.
- *  12-18-07  02.00.05  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Removed the MPI-defined Fault Codes and extended the
- *                      product specific codes up to 0xEFFF.
- *                      Added a sixth key value for the WriteSequence register
- *                      and changed the flush value to 0x0.
- *                      Added message function codes for Diagnostic Buffer Post
- *                      and Diagnsotic Release.
- *                      New IOCStatus define: MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED
- *                      Moved MPI2_VERSION_UNION from mpi2_ioc.h.
- *  02-29-08  02.00.06  Bumped MPI2_HEADER_VERSION_UNIT.
- *  03-03-08  02.00.07  Bumped MPI2_HEADER_VERSION_UNIT.
- *  05-21-08  02.00.08  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added #defines for marking a reply descriptor as unused.
- *  06-27-08  02.00.09  Bumped MPI2_HEADER_VERSION_UNIT.
- *  10-02-08  02.00.10  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Moved LUN field defines from mpi2_init.h.
- *  01-19-09  02.00.11  Bumped MPI2_HEADER_VERSION_UNIT.
- *  05-06-09  02.00.12  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      In all request and reply descriptors, replaced VF_ID
- *                      field with MSIxIndex field.
- *                      Removed DevHandle field from
- *                      MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR and made those
- *                      bytes reserved.
- *                      Added RAID Accelerator functionality.
- *  07-30-09  02.00.13  Bumped MPI2_HEADER_VERSION_UNIT.
- *  10-28-09  02.00.14  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added MSI-x index mask and shift for Reply Post Host
- *                      Index register.
- *                      Added function code for Host Based Discovery Action.
- *  02-10-10  02.00.15  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added define for MPI2_FUNCTION_PWR_MGMT_CONTROL.
- *                      Added defines for product-specific range of message
- *                      function codes, 0xF0 to 0xFF.
- *  05-12-10  02.00.16  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added alternative defines for the SGE Direction bit.
- *  08-11-10  02.00.17  Bumped MPI2_HEADER_VERSION_UNIT.
- *  11-10-10  02.00.18  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added MPI2_IEEE_SGE_FLAGS_SYSTEMPLBCPI_ADDR define.
- *  02-23-11  02.00.19  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added MPI2_FUNCTION_SEND_HOST_MESSAGE.
- *  03-09-11  02.00.20  Bumped MPI2_HEADER_VERSION_UNIT.
- *  05-25-11  02.00.21  Bumped MPI2_HEADER_VERSION_UNIT.
- *  08-24-11  02.00.22  Bumped MPI2_HEADER_VERSION_UNIT.
- *  11-18-11  02.00.23  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Incorporating additions for MPI v2.5.
- *  02-06-12  02.00.24  Bumped MPI2_HEADER_VERSION_UNIT.
- *  03-29-12  02.00.25  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added Hard Reset delay timings.
- *  07-10-12  02.00.26  Bumped MPI2_HEADER_VERSION_UNIT.
- *  07-26-12  02.00.27  Bumped MPI2_HEADER_VERSION_UNIT.
- *  11-27-12  02.00.28  Bumped MPI2_HEADER_VERSION_UNIT.
- *  12-20-12  02.00.29  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET.
- *  04-09-13  02.00.30  Bumped MPI2_HEADER_VERSION_UNIT.
- *  04-17-13  02.00.31  Bumped MPI2_HEADER_VERSION_UNIT.
- *  08-19-13  02.00.32  Bumped MPI2_HEADER_VERSION_UNIT.
- *  12-05-13  02.00.33  Bumped MPI2_HEADER_VERSION_UNIT.
- *  01-08-14  02.00.34  Bumped MPI2_HEADER_VERSION_UNIT.
- *  06-13-14  02.00.35  Bumped MPI2_HEADER_VERSION_UNIT.
- *  11-18-14  02.00.36  Updated copyright information.
- *                      Bumped MPI2_HEADER_VERSION_UNIT.
- *  03-16-15  02.00.37  Updated for MPI v2.6.
- *                      Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added Scratchpad registers and
- *                      AtomicRequestDescriptorPost register to
- *                      MPI2_SYSTEM_INTERFACE_REGS.
- *                      Added MPI2_DIAG_SBR_RELOAD.
- *                      Added MPI2_IOCSTATUS_INSUFFICIENT_POWER.
- *  03-19-15  02.00.38  Bumped MPI2_HEADER_VERSION_UNIT.
- *  05-25-15  02.00.39  Bumped MPI2_HEADER_VERSION_UNIT.
- *  08-25-15  02.00.40  Bumped MPI2_HEADER_VERSION_UNIT.
- *                      Added V7 HostDiagnostic register defines
- *  12-15-15  02.00.41  Bumped MPI_HEADER_VERSION_UNIT
- *  01-04-16  02.00.42  Bumped MPI_HEADER_VERSION_UNIT
- *  04-05-16  02.00.43  Modified  MPI26_DIAG_BOOT_DEVICE_SELECT defines
- *                      to be unique within first 32 characters.
- *                      Removed AHCI support.
- *                      Removed SOP support.
- *                      Bumped MPI2_HEADER_VERSION_UNIT.
- * 04-10-16  02.00.44   Bumped MPI2_HEADER_VERSION_UNIT.
- * 07-06-16  02.00.45   Bumped MPI2_HEADER_VERSION_UNIT.
- * 09-02-16  02.00.46   Bumped MPI2_HEADER_VERSION_UNIT.
- * 11-23-16  02.00.47   Bumped MPI2_HEADER_VERSION_UNIT.
- * 02-03-17  02.00.48   Bumped MPI2_HEADER_VERSION_UNIT.
- * 06-13-17  02.00.49   Bumped MPI2_HEADER_VERSION_UNIT.
- * 09-29-17  02.00.50   Bumped MPI2_HEADER_VERSION_UNIT.
- *  --------------------------------------------------------------------------
-
-mpi2_cnfg.h
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  06-04-07  02.00.01  Added defines for SAS IO Unit Page 2 PhyFlags.
- *                      Added Manufacturing Page 11.
- *                      Added MPI2_SAS_EXPANDER0_FLAGS_CONNECTOR_END_DEVICE
- *                      define.
- *  06-26-07  02.00.02  Adding generic structure for product-specific
- *                      Manufacturing pages: MPI2_CONFIG_PAGE_MANUFACTURING_PS.
- *                      Rework of BIOS Page 2 configuration page.
- *                      Fixed MPI2_BIOSPAGE2_BOOT_DEVICE to be a union of the
- *                      forms.
- *                      Added configuration pages IOC Page 8 and Driver
- *                      Persistent Mapping Page 0.
- *  08-31-07  02.00.03  Modified configuration pages dealing with Integrated
- *                      RAID (Manufacturing Page 4, RAID Volume Pages 0 and 1,
- *                      RAID Physical Disk Pages 0 and 1, RAID Configuration
- *                      Page 0).
- *                      Added new value for AccessStatus field of SAS Device
- *                      Page 0 (_SATA_NEEDS_INITIALIZATION).
- *  10-31-07  02.00.04  Added missing SEPDevHandle field to
- *                      MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0.
- *  12-18-07  02.00.05  Modified IO Unit Page 0 to use 32-bit version fields for
- *                      NVDATA.
- *                      Modified IOC Page 7 to use masks and added field for
- *                      SASBroadcastPrimitiveMasks.
- *                      Added MPI2_CONFIG_PAGE_BIOS_4.
- *                      Added MPI2_CONFIG_PAGE_LOG_0.
- *  02-29-08  02.00.06  Modified various names to make them 32-character unique.
- *                      Added SAS Device IDs.
- *                      Updated Integrated RAID configuration pages including
- *                      Manufacturing Page 4, IOC Page 6, and RAID Configuration
- *                      Page 0.
- *  05-21-08  02.00.07  Added define MPI2_MANPAGE4_MIX_SSD_SAS_SATA.
- *                      Added define MPI2_MANPAGE4_PHYSDISK_128MB_COERCION.
- *                      Fixed define MPI2_IOCPAGE8_FLAGS_ENCLOSURE_SLOT_MAPPING.
- *                      Added missing MaxNumRoutedSasAddresses field to
- *                      MPI2_CONFIG_PAGE_EXPANDER_0.
- *                      Added SAS Port Page 0.
- *                      Modified structure layout for
- *                      MPI2_CONFIG_PAGE_DRIVER_MAPPING_0.
- *  06-27-08  02.00.08  Changed MPI2_CONFIG_PAGE_RD_PDISK_1 to use
- *                      MPI2_RAID_PHYS_DISK1_PATH_MAX to size the array.
- *  10-02-08  02.00.09  Changed MPI2_RAID_PGAD_CONFIGNUM_MASK from 0x0000FFFF
- *                      to 0x000000FF.
- *                      Added two new values for the Physical Disk Coercion Size
- *                      bits in the Flags field of Manufacturing Page 4.
- *                      Added product-specific Manufacturing pages 16 to 31.
- *                      Modified Flags bits for controlling write cache on SATA
- *                      drives in IO Unit Page 1.
- *                      Added new bit to AdditionalControlFlags of SAS IO Unit
- *                      Page 1 to control Invalid Topology Correction.
- *                      Added SupportedPhysDisks field to RAID Volume Page 1 and
- *                      added related defines.
- *                      Added additional defines for RAID Volume Page 0
- *                      VolumeStatusFlags field.
- *                      Modified meaning of RAID Volume Page 0 VolumeSettings
- *                      define for auto-configure of hot-swap drives.
- *                      Added PhysDiskAttributes field (and related defines) to
- *                      RAID Physical Disk Page 0.
- *                      Added MPI2_SAS_PHYINFO_PHY_VACANT define.
- *                      Added three new DiscoveryStatus bits for SAS IO Unit
- *                      Page 0 and SAS Expander Page 0.
- *                      Removed multiplexing information from SAS IO Unit pages.
- *                      Added BootDeviceWaitTime field to SAS IO Unit Page 4.
- *                      Removed Zone Address Resolved bit from PhyInfo and from
- *                      Expander Page 0 Flags field.
- *                      Added two new AccessStatus values to SAS Device Page 0
- *                      for indicating routing problems. Added 3 reserved words
- *                      to this page.
- *  01-19-09  02.00.10  Fixed defines for GPIOVal field of IO Unit Page 3.
- *                      Inserted missing reserved field into structure for IOC
- *                      Page 6.
- *                      Added more pending task bits to RAID Volume Page 0
- *                      VolumeStatusFlags defines.
- *                      Added MPI2_PHYSDISK0_STATUS_FLAG_NOT_CERTIFIED define.
- *                      Added a new DiscoveryStatus bit for SAS IO Unit Page 0
- *                      and SAS Expander Page 0 to flag a downstream initiator
- *                      when in simplified routing mode.
- *                      Removed SATA Init Failure defines for DiscoveryStatus
- *                      fields of SAS IO Unit Page 0 and SAS Expander Page 0.
- *                      Added MPI2_SAS_DEVICE0_ASTATUS_DEVICE_BLOCKED define.
- *                      Added PortGroups, DmaGroup, and ControlGroup fields to
- *                      SAS Device Page 0.
- *  05-06-09  02.00.11  Added structures and defines for IO Unit Page 5 and IO
- *                      Unit Page 6.
- *                      Added expander reduced functionality data to SAS
- *                      Expander Page 0.
- *                      Added SAS PHY Page 2 and SAS PHY Page 3.
- *  07-30-09  02.00.12  Added IO Unit Page 7.
- *                      Added new device ids.
- *                      Added SAS IO Unit Page 5.
- *                      Added partial and slumber power management capable flags
- *                      to SAS Device Page 0 Flags field.
- *                      Added PhyInfo defines for power condition.
- *                      Added Ethernet configuration pages.
- *  10-28-09  02.00.13  Added MPI2_IOUNITPAGE1_ENABLE_HOST_BASED_DISCOVERY.
- *                      Added SAS PHY Page 4 structure and defines.
- *  02-10-10  02.00.14  Modified the comments for the configuration page
- *                      structures that contain an array of data. The host
- *                      should use the "count" field in the page data (e.g. the
- *                      NumPhys field) to determine the number of valid elements
- *                      in the array.
- *                      Added/modified some MPI2_MFGPAGE_DEVID_SAS defines.
- *                      Added PowerManagementCapabilities to IO Unit Page 7.
- *                      Added PortWidthModGroup field to
- *                      MPI2_SAS_IO_UNIT5_PHY_PM_SETTINGS.
- *                      Added MPI2_CONFIG_PAGE_SASIOUNIT_6 and related defines.
- *                      Added MPI2_CONFIG_PAGE_SASIOUNIT_7 and related defines.
- *                      Added MPI2_CONFIG_PAGE_SASIOUNIT_8 and related defines.
- *  05-12-10  02.00.15  Added MPI2_RAIDVOL0_STATUS_FLAG_VOL_NOT_CONSISTENT
- *                      define.
- *                      Added MPI2_PHYSDISK0_INCOMPATIBLE_MEDIA_TYPE define.
- *                      Added MPI2_SAS_NEG_LINK_RATE_UNSUPPORTED_PHY define.
- *  08-11-10  02.00.16  Removed IO Unit Page 1 device path (multi-pathing)
- *                      defines.
- *  11-10-10  02.00.17  Added ReceptacleID field (replacing Reserved1) to
- *                      MPI2_MANPAGE7_CONNECTOR_INFO and reworked defines for
- *                      the Pinout field.
- *                      Added BoardTemperature and BoardTemperatureUnits fields
- *                      to MPI2_CONFIG_PAGE_IO_UNIT_7.
- *                      Added MPI2_CONFIG_EXTPAGETYPE_EXT_MANUFACTURING define
- *                      and MPI2_CONFIG_PAGE_EXT_MAN_PS structure.
- *  02-23-11  02.00.18  Added ProxyVF_ID field to MPI2_CONFIG_REQUEST.
- *                      Added IO Unit Page 8, IO Unit Page 9,
- *                      and IO Unit Page 10.
- *                      Added SASNotifyPrimitiveMasks field to
- *                      MPI2_CONFIG_PAGE_IOC_7.
- *  03-09-11  02.00.19  Fixed IO Unit Page 10 (to match the spec).
- *  05-25-11  02.00.20  Cleaned up a few comments.
- *  08-24-11  02.00.21  Marked the IO Unit Page 7 PowerManagementCapabilities
- *                      for PCIe link as obsolete.
- *                      Added SpinupFlags field containing a Disable Spin-up bit
- *                      to the MPI2_SAS_IOUNIT4_SPINUP_GROUP fields of SAS IO
- *                      Unit Page 4.
- *  11-18-11  02.00.22  Added define MPI2_IOCPAGE6_CAP_FLAGS_4K_SECTORS_SUPPORT.
- *                      Added UEFIVersion field to BIOS Page 1 and defined new
- *                      BiosOptions bits.
- *                      Incorporating additions for MPI v2.5.
- *  11-27-12  02.00.23  Added MPI2_MANPAGE7_FLAG_EVENTREPLAY_SLOT_ORDER.
- *                      Added MPI2_BIOSPAGE1_OPTIONS_MASK_OEM_ID.
- *  12-20-12  02.00.24  Marked MPI2_SASIOUNIT1_CONTROL_CLEAR_AFFILIATION as
- *                      obsolete for MPI v2.5 and later.
- *                      Added some defines for 12G SAS speeds.
- *  04-09-13  02.00.25  Added MPI2_IOUNITPAGE1_ATA_SECURITY_FREEZE_LOCK.
- *                      Fixed MPI2_IOUNITPAGE5_DMA_CAP_MASK_MAX_REQUESTS to
- *                      match the specification.
- *  08-19-13  02.00.26  Added reserved words to MPI2_CONFIG_PAGE_IO_UNIT_7 for
- *                      future use.
- *  12-05-13  02.00.27  Added MPI2_MANPAGE7_FLAG_BASE_ENCLOSURE_LEVEL for
- *                      MPI2_CONFIG_PAGE_MAN_7.
- *                      Added EnclosureLevel and ConnectorName fields to
- *                      MPI2_CONFIG_PAGE_SAS_DEV_0.
- *                      Added MPI2_SAS_DEVICE0_FLAGS_ENCL_LEVEL_VALID for
- *                      MPI2_CONFIG_PAGE_SAS_DEV_0.
- *                      Added EnclosureLevel field to
- *                      MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0.
- *                      Added MPI2_SAS_ENCLS0_FLAGS_ENCL_LEVEL_VALID for
- *                      MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0.
- *  01-08-14  02.00.28  Added more defines for the BiosOptions field of
- *                      MPI2_CONFIG_PAGE_BIOS_1.
- *  06-13-14  02.00.29  Added SSUTimeout field to MPI2_CONFIG_PAGE_BIOS_1, and
- *                      more defines for the BiosOptions field..
- *  11-18-14  02.00.30  Updated copyright information.
- *                      Added MPI2_BIOSPAGE1_OPTIONS_ADVANCED_CONFIG.
- *                      Added AdapterOrderAux fields to BIOS Page 3.
- *  03-16-15  02.00.31  Updated for MPI v2.6.
- *                      Added BoardPowerRequirement, PCISlotPowerAllocation, and
- *                      Flags field to IO Unit Page 7.
- *                      Added IO Unit Page 11.
- *                      Added new SAS Phy Event codes
- *                      Added PCIe configuration pages.
- *  03-19-15  02.00.32  Fixed PCIe Link Config page structure names to be
- *                      unique in first 32 characters.
- *  05-25-15  02.00.33  Added more defines for the BiosOptions field of
- *                      MPI2_CONFIG_PAGE_BIOS_1.
- *  08-25-15  02.00.34  Added PCIe Device Page 2 SGL format capability.
- *  12-18-15  02.00.35  Added SATADeviceWaitTime to SAS IO Unit Page 4.
- *  01-21-16  02.00.36  Added/modified MPI2_MFGPAGE_DEVID_SAS defines.
- *                      Added Link field to PCIe Link Pages
- *                      Added EnclosureLevel and ConnectorName to PCIe
- *                      Device Page 0.
- *                      Added define for PCIE IoUnit page 1 max rate shift.
- *                      Added comment for reserved ExtPageTypes.
- *                      Added SAS 4 22.5 gbs speed support.
- *                      Added PCIe 4 16.0 GT/sec speec support.
- *                      Removed AHCI support.
- *                      Removed SOP support.
- *                      Added NegotiatedLinkRate and NegotiatedPortWidth to
- *                      PCIe device page 0.
- *  04-10-16  02.00.37  Fixed MPI2_MFGPAGE_DEVID_SAS3616/3708 defines
- *  07-01-16  02.00.38  Added Manufacturing page 7 Connector types.
- *                      Changed declaration of ConnectorName in PCIe DevicePage0
- *                      to match SAS DevicePage 0.
- *                      Added SATADeviceWaitTime to IO Unit Page 11.
- *                      Added MPI26_MFGPAGE_DEVID_SAS4008
- *                      Added x16 PCIe width to IO Unit Page 7
- *                      Added LINKFLAGS to control SRIS in PCIe IO Unit page 1
- *                      phy data.
- *                      Added InitStatus to PCIe IO Unit Page 1 header.
- *  09-01-16  02.00.39  Added MPI26_CONFIG_PAGE_ENCLOSURE_0 and related defines.
- *                      Added MPI26_ENCLOS_PGAD_FORM_GET_NEXT_HANDLE and
- *                      MPI26_ENCLOS_PGAD_FORM_HANDLE page address formats.
- *  02-02-17  02.00.40  Added MPI2_MANPAGE7_SLOT_UNKNOWN.
- *                      Added ChassisSlot field to SAS Enclosure Page 0.
- *                      Added ChassisSlot Valid bit (bit 5) to the Flags field
- *                      in SAS Enclosure Page 0.
- *  06-13-17  02.00.41  Added MPI26_MFGPAGE_DEVID_SAS3816 and 
- *                      MPI26_MFGPAGE_DEVID_SAS3916 defines.
- *                      Removed MPI26_MFGPAGE_DEVID_SAS4008 define.
- *                      Added MPI26_PCIEIOUNIT1_LINKFLAGS_SRNS_EN define.
- *                      Renamed MPI26_PCIEIOUNIT1_LINKFLAGS_EN_SRIS to 
- *                      MPI26_PCIEIOUNIT1_LINKFLAGS_SRIS_EN.
- *                      Renamed MPI26_PCIEIOUNIT1_LINKFLAGS_DIS_SRIS to 
- *                      MPI26_PCIEIOUNIT1_LINKFLAGS_DIS_SEPARATE_REFCLK.
- *  09-29-17  02.00.42  Added ControllerResetTO field to PCIe Device Page 2.
- *                      Added NOIOB field to PCIe Device Page 2.
- *                      Added MPI26_PCIEDEV2_CAP_DATA_BLK_ALIGN_AND_GRAN to 
- *                      the Capabilities field of PCIe Device Page 2.
- *  --------------------------------------------------------------------------
-
-mpi2_init.h
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  10-31-07  02.00.01  Fixed name for pMpi2SCSITaskManagementRequest_t.
- *  12-18-07  02.00.02  Modified Task Management Target Reset Method defines.
- *  02-29-08  02.00.03  Added Query Task Set and Query Unit Attention.
- *  03-03-08  02.00.04  Fixed name of struct _MPI2_SCSI_TASK_MANAGE_REPLY.
- *  05-21-08  02.00.05  Fixed typo in name of Mpi2SepRequest_t.
- *  10-02-08  02.00.06  Removed Untagged and No Disconnect values from SCSI IO
- *                      Control field Task Attribute flags.
- *                      Moved LUN field defines to mpi2.h becasue they are
- *                      common to many structures.
- *  05-06-09  02.00.07  Changed task management type of Query Unit Attention to
- *                      Query Asynchronous Event.
- *                      Defined two new bits in the SlotStatus field of the SCSI
- *                      Enclosure Processor Request and Reply.
- *  10-28-09  02.00.08  Added defines for decoding the ResponseInfo bytes for
- *                      both SCSI IO Error Reply and SCSI Task Management Reply.
- *                      Added ResponseInfo field to MPI2_SCSI_TASK_MANAGE_REPLY.
- *                      Added MPI2_SCSITASKMGMT_RSP_TM_OVERLAPPED_TAG define.
- *  02-10-10  02.00.09  Removed unused structure that had "#if 0" around it.
- *  05-12-10  02.00.10  Added optional vendor-unique region to SCSI IO Request.
- *  11-10-10  02.00.11  Added MPI2_SCSIIO_NUM_SGLOFFSETS define.
- *  11-18-11  02.00.12  Incorporating additions for MPI v2.5.
- *  02-06-12  02.00.13  Added alternate defines for Task Priority / Command
- *                      Priority to match SAM-4.
- *                      Added EEDPErrorOffset to MPI2_SCSI_IO_REPLY.
- *  07-10-12  02.00.14  Added MPI2_SCSIIO_CONTROL_SHIFT_DATADIRECTION.
- *  04-09-13  02.00.15  Added SCSIStatusQualifier field to MPI2_SCSI_IO_REPLY,
- *                      replacing the Reserved4 field.
- *  11-18-14  02.00.16  Updated copyright information.
- *  03-16-15  02.00.17  Updated for MPI v2.6.
- *                      Added MPI26_SCSIIO_IOFLAGS_ESCAPE_PASSTHROUGH.
- *                      Added MPI2_SEP_REQ_SLOTSTATUS_DEV_OFF and
- *                      MPI2_SEP_REPLY_SLOTSTATUS_DEV_OFF.
- *  08-26-15  02.00.18  Added SCSITASKMGMT_MSGFLAGS for Target Reset.
- *  12-18-15  02.00.19  Added EEDPObservedValue added to SCSI IO Reply message.
- *  01-04-16  02.00.20  Modified EEDP reported values in SCSI IO Reply message.
- *  01-21-16  02.00.21  Modified MPI26_SCSITASKMGMT_MSGFLAGS_PCIE* defines to
- *                      be unique within first 32 characters.
- *  --------------------------------------------------------------------------
-
-mpi2_ioc.h
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  06-04-07  02.00.01  In IOCFacts Reply structure, renamed MaxDevices to
- *                      MaxTargets.
- *                      Added TotalImageSize field to FWDownload Request.
- *                      Added reserved words to FWUpload Request.
- *  06-26-07  02.00.02  Added IR Configuration Change List Event.
- *  08-31-07  02.00.03  Removed SystemReplyQueueDepth field from the IOCInit
- *                      request and replaced it with
- *                      ReplyDescriptorPostQueueDepth and ReplyFreeQueueDepth.
- *                      Replaced the MinReplyQueueDepth field of the IOCFacts
- *                      reply with MaxReplyDescriptorPostQueueDepth.
- *                      Added MPI2_RDPQ_DEPTH_MIN define to specify the minimum
- *                      depth for the Reply Descriptor Post Queue.
- *                      Added SASAddress field to Initiator Device Table
- *                      Overflow Event data.
- *  10-31-07  02.00.04  Added ReasonCode MPI2_EVENT_SAS_INIT_RC_NOT_RESPONDING
- *                      for SAS Initiator Device Status Change Event data.
- *                      Modified Reason Code defines for SAS Topology Change
- *                      List Event data, including adding a bit for PHY Vacant
- *                      status, and adding a mask for the Reason Code.
- *                      Added define for
- *                      MPI2_EVENT_SAS_TOPO_ES_DELAY_NOT_RESPONDING.
- *                      Added define for MPI2_EXT_IMAGE_TYPE_MEGARAID.
- *  12-18-07  02.00.05  Added Boot Status defines for the IOCExceptions field of
- *                      the IOCFacts Reply.
- *                      Removed MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER define.
- *                      Moved MPI2_VERSION_UNION to mpi2.h.
- *                      Changed MPI2_EVENT_NOTIFICATION_REQUEST to use masks
- *                      instead of enables, and added SASBroadcastPrimitiveMasks
- *                      field.
- *                      Added Log Entry Added Event and related structure.
- *  02-29-08  02.00.06  Added define MPI2_IOCFACTS_CAPABILITY_INTEGRATED_RAID.
- *                      Removed define MPI2_IOCFACTS_PROTOCOL_SMP_TARGET.
- *                      Added MaxVolumes and MaxPersistentEntries fields to
- *                      IOCFacts reply.
- *                      Added ProtocalFlags and IOCCapabilities fields to
- *                      MPI2_FW_IMAGE_HEADER.
- *                      Removed MPI2_PORTENABLE_FLAGS_ENABLE_SINGLE_PORT.
- *  03-03-08  02.00.07  Fixed MPI2_FW_IMAGE_HEADER by changing Reserved26 to
- *                      a U16 (from a U32).
- *                      Removed extra 's' from EventMasks name.
- *  06-27-08  02.00.08  Fixed an offset in a comment.
- *  10-02-08  02.00.09  Removed SystemReplyFrameSize from MPI2_IOC_INIT_REQUEST.
- *                      Removed CurReplyFrameSize from MPI2_IOC_FACTS_REPLY and
- *                      renamed MinReplyFrameSize to ReplyFrameSize.
- *                      Added MPI2_IOCFACTS_EXCEPT_IR_FOREIGN_CONFIG_MAX.
- *                      Added two new RAIDOperation values for Integrated RAID
- *                      Operations Status Event data.
- *                      Added four new IR Configuration Change List Event data
- *                      ReasonCode values.
- *                      Added two new ReasonCode defines for SAS Device Status
- *                      Change Event data.
- *                      Added three new DiscoveryStatus bits for the SAS
- *                      Discovery event data.
- *                      Added Multiplexing Status Change bit to the PhyStatus
- *                      field of the SAS Topology Change List event data.
- *                      Removed define for MPI2_INIT_IMAGE_BOOTFLAGS_XMEMCOPY.
- *                      BootFlags are now product-specific.
- *                      Added defines for the indivdual signature bytes
- *                      for MPI2_INIT_IMAGE_FOOTER.
- *  01-19-09  02.00.10  Added MPI2_IOCFACTS_CAPABILITY_EVENT_REPLAY define.
- *                      Added MPI2_EVENT_SAS_DISC_DS_DOWNSTREAM_INITIATOR
- *                      define.
- *                      Added MPI2_EVENT_SAS_DEV_STAT_RC_SATA_INIT_FAILURE
- *                      define.
- *                      Removed MPI2_EVENT_SAS_DISC_DS_SATA_INIT_FAILURE define.
- *  05-06-09  02.00.11  Added MPI2_IOCFACTS_CAPABILITY_RAID_ACCELERATOR define.
- *                      Added MPI2_IOCFACTS_CAPABILITY_MSI_X_INDEX define.
- *                      Added two new reason codes for SAS Device Status Change
- *                      Event.
- *                      Added new event: SAS PHY Counter.
- *  07-30-09  02.00.12  Added GPIO Interrupt event define and structure.
- *                      Added MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER define.
- *                      Added new product id family for 2208.
- *  10-28-09  02.00.13  Added HostMSIxVectors field to MPI2_IOC_INIT_REQUEST.
- *                      Added MaxMSIxVectors field to MPI2_IOC_FACTS_REPLY.
- *                      Added MinDevHandle field to MPI2_IOC_FACTS_REPLY.
- *                      Added MPI2_IOCFACTS_CAPABILITY_HOST_BASED_DISCOVERY.
- *                      Added MPI2_EVENT_HOST_BASED_DISCOVERY_PHY define.
- *                      Added MPI2_EVENT_SAS_TOPO_ES_NO_EXPANDER define.
- *                      Added Host Based Discovery Phy Event data.
- *                      Added defines for ProductID Product field
- *                      (MPI2_FW_HEADER_PID_).
- *                      Modified values for SAS ProductID Family
- *                      (MPI2_FW_HEADER_PID_FAMILY_).
- *  02-10-10  02.00.14  Added SAS Quiesce Event structure and defines.
- *                      Added PowerManagementControl Request structures and
- *                      defines.
- *  05-12-10  02.00.15  Marked Task Set Full Event as obsolete.
- *                      Added MPI2_EVENT_SAS_TOPO_LR_UNSUPPORTED_PHY define.
- *  11-10-10  02.00.16  Added MPI2_FW_DOWNLOAD_ITYPE_MIN_PRODUCT_SPECIFIC.
- *  02-23-11  02.00.17  Added SAS NOTIFY Primitive event, and added
- *                      SASNotifyPrimitiveMasks field to
- *                      MPI2_EVENT_NOTIFICATION_REQUEST.
- *                      Added Temperature Threshold Event.
- *                      Added Host Message Event.
- *                      Added Send Host Message request and reply.
- *  05-25-11  02.00.18  For Extended Image Header, added
- *                      MPI2_EXT_IMAGE_TYPE_MIN_PRODUCT_SPECIFIC and
- *                      MPI2_EXT_IMAGE_TYPE_MAX_PRODUCT_SPECIFIC defines.
- *                      Deprecated MPI2_EXT_IMAGE_TYPE_MAX define.
- *  08-24-11  02.00.19  Added PhysicalPort field to
- *                      MPI2_EVENT_DATA_SAS_DEVICE_STATUS_CHANGE structure.
- *                      Marked MPI2_PM_CONTROL_FEATURE_PCIE_LINK as obsolete.
- *  11-18-11  02.00.20  Incorporating additions for MPI v2.5.
- *  03-29-12  02.00.21  Added a product specific range to event values.
- *  07-26-12  02.00.22  Added MPI2_IOCFACTS_EXCEPT_PARTIAL_MEMORY_FAILURE.
- *                      Added ElapsedSeconds field to
- *                      MPI2_EVENT_DATA_IR_OPERATION_STATUS.
- *  08-19-13  02.00.23  For IOCInit, added MPI2_IOCINIT_MSGFLAG_RDPQ_ARRAY_MODE
- *                      and MPI2_IOC_INIT_RDPQ_ARRAY_ENTRY.
- *                      Added MPI2_IOCFACTS_CAPABILITY_RDPQ_ARRAY_CAPABLE.
- *                      Added MPI2_FW_DOWNLOAD_ITYPE_PUBLIC_KEY.
- *                      Added Encrypted Hash Extended Image.
- *  12-05-13  02.00.24  Added MPI25_HASH_IMAGE_TYPE_BIOS.
- *  11-18-14  02.00.25  Updated copyright information.
- *  03-16-15  02.00.26  Updated for MPI v2.6.
- *                      Added MPI2_EVENT_ACTIVE_CABLE_EXCEPTION and
- *                      MPI26_EVENT_DATA_ACTIVE_CABLE_EXCEPT.
- *                      Added MPI2_EVENT_PCIE_LINK_COUNTER and
- *                      MPI26_EVENT_DATA_PCIE_LINK_COUNTER.
- *                      Added MPI26_CTRL_OP_SHUTDOWN.
- *                      Added MPI26_CTRL_OP_LINK_CLEAR_ERROR_LOG
- *                      Added MPI26_FW_HEADER_PID_FAMILY_3324_SAS and
- *                      MPI26_FW_HEADER_PID_FAMILY_3516_SAS.
- *  08-25-15  02.00.27  Added IC ARCH Class based signature defines.
- *                      Added MPI26_EVENT_PCIE_ENUM_ES_RESOURCES_EXHAUSTED event.
- *                      Added ConigurationFlags field to IOCInit message to
- *                      support NVMe SGL format control.
- *                      Added PCIe SRIOV support.
- * 02-17-16   02.00.28  Added SAS 4 22.5 gbs speed support.
- *                      Added PCIe 4 16.0 GT/sec speec support.
- *                      Removed AHCI support.
- *                      Removed SOP support.
- * 07-01-16   02.00.29  Added Archclass for 4008 product.
- *                      Added IOCException MPI2_IOCFACTS_EXCEPT_PCIE_DISABLED.
- * 08-23-16   02.00.30  Added new defines for the ImageType field of FWDownload
- *                      Request Message.
- *                      Added new defines for the ImageType field of FWUpload
- *                      Request Message.
- *                      Added new values for the RegionType field in the Layout
- *                      Data sections of the FLASH Layout Extended Image Data.
- *                      Added new defines for the ReasonCode field of
- *                      Active Cable Exception Event.
- *                      Added MPI2_EVENT_ENCL_DEVICE_STATUS_CHANGE and
- *                      MPI26_EVENT_DATA_ENCL_DEV_STATUS_CHANGE.
- * 11-23-16   02.00.31  Added MPI2_EVENT_SAS_DEVICE_DISCOVERY_ERROR and
- *                      MPI25_EVENT_DATA_SAS_DEVICE_DISCOVERY_ERROR.
- * 02-02-17   02.00.32  Added MPI2_FW_DOWNLOAD_ITYPE_CBB_BACKUP.
- *                      Added MPI25_EVENT_DATA_ACTIVE_CABLE_EXCEPT and related
- *                      defines for the ReasonCode field.
- * 06-13-17   02.00.33  Added MPI2_FW_DOWNLOAD_ITYPE_CPLD.
- * 09-29-17   02.00.34  Added MPI26_EVENT_PCIDEV_STAT_RC_PCIE_HOT_RESET_FAILED
- *                      to the ReasonCode field in PCIe Device Status Change
- *                      Event Data.
- * --------------------------------------------------------------------------
-
-mpi2_raid.h
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  08-31-07  02.00.01  Modifications to RAID Action request and reply,
- *                      including the Actions and ActionData.
- *  02-29-08  02.00.02  Added MPI2_RAID_ACTION_ADATA_DISABL_FULL_REBUILD.
- *  05-21-08  02.00.03  Added MPI2_RAID_VOL_CREATION_NUM_PHYSDISKS so that
- *                      the PhysDisk array in MPI2_RAID_VOLUME_CREATION_STRUCT
- *                      can be sized by the build environment.
- *  07-30-09  02.00.04  Added proper define for the Use Default Settings bit of
- *                      VolumeCreationFlags and marked the old one as obsolete.
- *  05-12-10  02.00.05  Added MPI2_RAID_VOL_FLAGS_OP_MDC define.
- *  08-24-10  02.00.06  Added MPI2_RAID_ACTION_COMPATIBILITY_CHECK along with
- *                      related structures and defines.
- *                      Added product-specific range to RAID Action values.
- *  11-18-11  02.00.07  Incorporating additions for MPI v2.5.
- *  02-06-12  02.00.08  Added MPI2_RAID_ACTION_PHYSDISK_HIDDEN.
- *  07-26-12  02.00.09  Added ElapsedSeconds field to MPI2_RAID_VOL_INDICATOR.
- *                      Added MPI2_RAID_VOL_FLAGS_ELAPSED_SECONDS_VALID define.
- *  04-17-13  02.00.10  Added MPI25_RAID_ACTION_ADATA_ALLOW_PI.
- *  --------------------------------------------------------------------------
-
-mpi2_sas.h
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  06-26-07  02.00.01  Added Clear All Persistent Operation to SAS IO Unit
- *                      Control Request.
- *  10-02-08  02.00.02  Added Set IOC Parameter Operation to SAS IO Unit Control
- *                      Request.
- *  10-28-09  02.00.03  Changed the type of SGL in MPI2_SATA_PASSTHROUGH_REQUEST
- *                      to MPI2_SGE_IO_UNION since it supports chained SGLs.
- *  05-12-10  02.00.04  Modified some comments.
- *  08-11-10  02.00.05  Added NCQ operations to SAS IO Unit Control.
- *  11-18-11  02.00.06  Incorporating additions for MPI v2.5.
- *  07-10-12  02.00.07  Added MPI2_SATA_PT_SGE_UNION for use in the SATA
- *                      Passthrough Request message.
- *  08-19-13  02.00.08  Made MPI2_SAS_OP_TRANSMIT_PORT_SELECT_SIGNAL obsolete
- *                      for anything newer than MPI v2.0.
- *  11-18-14  02.00.09  Updated copyright information.
- *  03-16-15  02.00.10  Updated for MPI v2.6.
- *                      Added MPI2_SATA_PT_REQ_PT_FLAGS_FPDMA.
- *  --------------------------------------------------------------------------
-
-mpi2_targ.h
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  08-31-07  02.00.01  Added Command Buffer Data Location Address Space bits to
- *                      BufferPostFlags field of CommandBufferPostBase Request.
- *  02-29-08  02.00.02  Modified various names to make them 32-character unique.
- *  10-02-08  02.00.03  Removed NextCmdBufferOffset from
- *                      MPI2_TARGET_CMD_BUF_POST_BASE_REQUEST.
- *                      Target Status Send Request only takes a single SGE for
- *                      response data.
- *  02-10-10  02.00.04  Added comment to MPI2_TARGET_SSP_RSP_IU structure.
- *  11-18-11  02.00.05  Incorporating additions for MPI v2.5.
- *  11-27-12  02.00.06  Added InitiatorDevHandle field to MPI2_TARGET_MODE_ABORT
- *                      request message structure.
- *                      Added AbortType MPI2_TARGET_MODE_ABORT_DEVHANDLE and
- *                      MPI2_TARGET_MODE_ABORT_ALL_COMMANDS.
- *  06-13-14  02.00.07  Added MinMSIxIndex and MaxMSIxIndex fields to
- *                      MPI2_TARGET_CMD_BUF_POST_BASE_REQUEST.
- *  11-18-14  02.00.08  Updated copyright information.
- *  03-16-15  02.00.09  Updated for MPI v2.6.
- *                      Added MPI26_TARGET_ASSIST_IOFLAGS_ESCAPE_PASSTHROUGH.
- *  --------------------------------------------------------------------------
-
-mpi2_tool.h
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  12-18-07  02.00.01  Added Diagnostic Buffer Post and Diagnostic Release
- *                      structures and defines.
- *  02-29-08  02.00.02  Modified various names to make them 32-character unique.
- *  05-06-09  02.00.03  Added ISTWI Read Write Tool and Diagnostic CLI Tool.
- *  07-30-09  02.00.04  Added ExtendedType field to DiagnosticBufferPost request
- *                      and reply messages.
- *                      Added MPI2_DIAG_BUF_TYPE_EXTENDED.
- *                      Incremented MPI2_DIAG_BUF_TYPE_COUNT.
- *  05-12-10  02.00.05  Added Diagnostic Data Upload tool.
- *  08-11-10  02.00.06  Added defines that were missing for Diagnostic Buffer
- *                      Post Request.
- *  05-25-11  02.00.07  Added Flags field and related defines to
- *                      MPI2_TOOLBOX_ISTWI_READ_WRITE_REQUEST.
- *  11-18-11  02.00.08  Incorporating additions for MPI v2.5.
- *  07-10-12  02.00.09  Add MPI v2.5 Toolbox Diagnostic CLI Tool Request
- *                      message.
- *  07-26-12  02.00.10  Modified MPI2_TOOLBOX_DIAGNOSTIC_CLI_REQUEST so that
- *                      it uses MPI Chain SGE as well as MPI Simple SGE.
- *  08-19-13  02.00.11  Added MPI2_TOOLBOX_TEXT_DISPLAY_TOOL and related info.
- *  01-08-14  02.00.12  Added MPI2_TOOLBOX_CLEAN_BIT26_PRODUCT_SPECIFIC.
- *  11-18-14  02.00.13  Updated copyright information.
- *  08-25-16  02.00.14  Added new values for the Flags field of Toolbox Clean
- *                      Tool Request Message.
- *  --------------------------------------------------------------------------
-
-mpi2_type.h
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  11-18-14  02.00.01  Updated copyright information.
- *  --------------------------------------------------------------------------
-
-mpi2_ra.h
- *  05-06-09  02.00.00  Initial version.
- *  11-18-14  02.00.01  Updated copyright information.
- *  --------------------------------------------------------------------------
-
-mpi2_hbd.h
- *  10-28-09  02.00.00  Initial version.
- *  08-11-10  02.00.01  Removed PortGroups, DmaGroup, and ControlGroup from
- *                      HBD Action request, replaced by AdditionalInfo field.
- *  11-18-11  02.00.02  Incorporating additions for MPI v2.5.
- *  11-18-14  02.00.03  Updated copyright information.
- *  02-17-16  02.00.04  Added SAS 4 22.5 gbs speed support.
- *  --------------------------------------------------------------------------
-
-mpi2_pci.h
- *  03-16-15  02.00.00  Initial version.
- *  02-17-16  02.00.01  Removed AHCI support.
- *                      Removed SOP support.
- *  07-01-16  02.00.02  Added MPI26_NVME_FLAGS_FORCE_ADMIN_ERR_RESP to
- *                      NVME Encapsulated Request.
- *  --------------------------------------------------------------------------
-
-mpi2_history.txt         Parts list history
-
-Filename     02.00.50  02.00.49  02.00.48
-----------   --------  --------  --------
-mpi2.h       02.00.50  02.00.49  02.00.48
-mpi2_cnfg.h  02.00.42  02.00.41  02.00.40
-mpi2_init.h  02.00.21  02.00.21  02.00.21
-mpi2_ioc.h   02.00.34  02.00.33  02.00.32
-mpi2_raid.h  02.00.11  02.00.11  02.00.11
-mpi2_sas.h   02.00.10  02.00.10  02.00.10
-mpi2_targ.h  02.00.09  02.00.09  02.00.09
-mpi2_tool.h  02.00.14  02.00.14  02.00.14
-mpi2_type.h  02.00.01  02.00.01  02.00.01
-mpi2_ra.h    02.00.01  02.00.01  02.00.01
-mpi2_hbd.h   02.00.04  02.00.04  02.00.04
-mpi2_pci.h   02.00.02  02.00.02  02.00.02
-
-Filename     02.00.47  02.00.46  02.00.45  02.00.44  02.00.43  02.00.42
-----------   --------  --------  --------  --------  --------  --------
-mpi2.h       02.00.47  02.00.46  02.00.45  02.00.44  02.00.43  02.00.42
-mpi2_cnfg.h  02.00.39  02.00.39  02.00.38  02.00.37  02.00.36  02.00.35
-mpi2_init.h  02.00.21  02.00.21  02.00.21  02.00.21  02.00.21  02.00.20
-mpi2_ioc.h   02.00.31  02.00.30  02.00.29  02.00.28  02.00.28  02.00.27
-mpi2_raid.h  02.00.11  02.00.11  02.00.11  02.00.11  02.00.11  02.00.11
-mpi2_sas.h   02.00.10  02.00.10  02.00.10  02.00.10  02.00.10  02.00.10
-mpi2_targ.h  02.00.09  02.00.09  02.00.09  02.00.09  02.00.09  02.00.09
-mpi2_tool.h  02.00.14  02.00.14  02.00.13  02.00.13  02.00.13  02.00.13
-mpi2_type.h  02.00.01  02.00.01  02.00.01  02.00.01  02.00.01  02.00.01
-mpi2_ra.h    02.00.01  02.00.01  02.00.01  02.00.01  02.00.01  02.00.01
-mpi2_hbd.h   02.00.04  02.00.04  02.00.04  02.00.04  02.00.04  02.00.03
-mpi2_pci.h   02.00.02  02.00.02  02.00.02  02.00.01  02.00.01  02.00.00
-
-Filename     02.00.41  02.00.40  02.00.39  02.00.38  02.00.37  02.00.36
-----------   --------  --------  --------  --------  --------  --------
-mpi2.h       02.00.41  02.00.40  02.00.39  02.00.38  02.00.37  02.00.36
-mpi2_cnfg.h  02.00.35  02.00.34  02.00.33  02.00.32  02.00.31  02.00.30
-mpi2_init.h  02.00.19  02.00.18  02.00.17  02.00.17  02.00.17  02.00.16
-mpi2_ioc.h   02.00.27  02.00.27  02.00.26  02.00.26  02.00.26  02.00.25
-mpi2_raid.h  02.00.11  02.00.11  02.00.11  02.00.11  02.00.11  02.00.11
-mpi2_sas.h   02.00.10  02.00.10  02.00.10  02.00.10  02.00.10  02.00.09
-mpi2_targ.h  02.00.09  02.00.09  02.00.09  02.00.09  02.00.09  02.00.08
-mpi2_tool.h  02.00.13  02.00.13  02.00.13  02.00.13  02.00.13  02.00.13
-mpi2_type.h  02.00.01  02.00.01  02.00.01  02.00.01  02.00.01  02.00.01
-mpi2_ra.h    02.00.01  02.00.01  02.00.01  02.00.01  02.00.01  02.00.01
-mpi2_hbd.h   02.00.03  02.00.03  02.00.03  02.00.03  02.00.03  02.00.03
-mpi2_pci.h   02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
-
-Filename     02.00.35  02.00.34  02.00.33  02.00.32  02.00.31  02.00.30
-----------   --------  --------  --------  --------  --------  --------
-mpi2.h       02.00.35  02.00.34  02.00.33  02.00.32  02.00.31  02.00.30
-mpi2_cnfg.h  02.00.29  02.00.28  02.00.27  02.00.26  02.00.25  02.00.25
-mpi2_init.h  02.00.15  02.00.15  02.00.15  02.00.15  02.00.15  02.00.15
-mpi2_ioc.h   02.00.24  02.00.24  02.00.24  02.00.23  02.00.22  02.00.22
-mpi2_raid.h  02.00.10  02.00.10  02.00.10  02.00.10  02.00.10  02.00.09
-mpi2_sas.h   02.00.08  02.00.08  02.00.08  02.00.08  02.00.07  02.00.07
-mpi2_targ.h  02.00.07  02.00.06  02.00.06  02.00.06  02.00.06  02.00.06
-mpi2_tool.h  02.00.12  02.00.12  02.00.11  02.00.11  02.00.10  02.00.10
-mpi2_type.h  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
-mpi2_ra.h    02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
-mpi2_hbd.h   02.00.02  02.00.02  02.00.02  02.00.02  02.00.02  02.00.02
-
-Filename     02.00.29  02.00.28  02.00.27  02.00.26  02.00.25  02.00.24
-----------   --------  --------  --------  --------  --------  --------
-mpi2.h       02.00.29  02.00.28  02.00.27  02.00.26  02.00.25  02.00.24
-mpi2_cnfg.h  02.00.24  02.00.23  02.00.22  02.00.22  02.00.22  02.00.22
-mpi2_init.h  02.00.14  02.00.14  02.00.14  02.00.14  02.00.13  02.00.13
-mpi2_ioc.h   02.00.22  02.00.22  02.00.22  02.00.21  02.00.21  02.00.20
-mpi2_raid.h  02.00.09  02.00.09  02.00.09  02.00.08  02.00.08  02.00.08
-mpi2_sas.h   02.00.07  02.00.07  02.00.07  02.00.07  02.00.06  02.00.06
-mpi2_targ.h  02.00.06  02.00.06  02.00.05  02.00.05  02.00.05  02.00.05
-mpi2_tool.h  02.00.10  02.00.10  02.00.10  02.00.09  02.00.08  02.00.08
-mpi2_type.h  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
-mpi2_ra.h    02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
-mpi2_hbd.h   02.00.02  02.00.02  02.00.02  02.00.02  02.00.02  02.00.02
-
-Filename     02.00.23  02.00.22  02.00.21  02.00.20  02.00.19  02.00.18
-----------   --------  --------  --------  --------  --------  --------
-mpi2.h       02.00.23  02.00.22  02.00.21  02.00.20  02.00.19  02.00.18
-mpi2_cnfg.h  02.00.22  02.00.21  02.00.20  02.00.19  02.00.18  02.00.17
-mpi2_init.h  02.00.12  02.00.11  02.00.11  02.00.11  02.00.11  02.00.11
-mpi2_ioc.h   02.00.20  02.00.19  02.00.18  02.00.17  02.00.17  02.00.16
-mpi2_raid.h  02.00.07  02.00.06  02.00.05  02.00.05  02.00.05  02.00.05
-mpi2_sas.h   02.00.06  02.00.05  02.00.05  02.00.05  02.00.05  02.00.05
-mpi2_targ.h  02.00.05  02.00.04  02.00.04  02.00.04  02.00.04  02.00.04
-mpi2_tool.h  02.00.08  02.00.07  02.00.07  02.00.06  02.00.06  02.00.06
-mpi2_type.h  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
-mpi2_ra.h    02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
-mpi2_hbd.h   02.00.02  02.00.01  02.00.01  02.00.01  02.00.01  02.00.01
-
-Filename     02.00.17  02.00.16  02.00.15  02.00.14  02.00.13  02.00.12
-----------   --------  --------  --------  --------  --------  --------
-mpi2.h       02.00.17  02.00.16  02.00.15  02.00.14  02.00.13  02.00.12
-mpi2_cnfg.h  02.00.16  02.00.15  02.00.14  02.00.13  02.00.12  02.00.11
-mpi2_init.h  02.00.10  02.00.10  02.00.09  02.00.08  02.00.07  02.00.07
-mpi2_ioc.h   02.00.15  02.00.15  02.00.14  02.00.13  02.00.12  02.00.11
-mpi2_raid.h  02.00.05  02.00.05  02.00.04  02.00.04  02.00.04  02.00.03
-mpi2_sas.h   02.00.05  02.00.04  02.00.03  02.00.03  02.00.02  02.00.02
-mpi2_targ.h  02.00.04  02.00.04  02.00.04  02.00.03  02.00.03  02.00.03
-mpi2_tool.h  02.00.06  02.00.05  02.00.04  02.00.04  02.00.04  02.00.03
-mpi2_type.h  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
-mpi2_ra.h    02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
-mpi2_hbd.h   02.00.01  02.00.00  02.00.00  02.00.00
-
-Filename     02.00.11  02.00.10  02.00.09  02.00.08  02.00.07  02.00.06
-----------   --------  --------  --------  --------  --------  --------
-mpi2.h       02.00.11  02.00.10  02.00.09  02.00.08  02.00.07  02.00.06
-mpi2_cnfg.h  02.00.10  02.00.09  02.00.08  02.00.07  02.00.06  02.00.06
-mpi2_init.h  02.00.06  02.00.06  02.00.05  02.00.05  02.00.04  02.00.03
-mpi2_ioc.h   02.00.10  02.00.09  02.00.08  02.00.07  02.00.07  02.00.06
-mpi2_raid.h  02.00.03  02.00.03  02.00.03  02.00.03  02.00.02  02.00.02
-mpi2_sas.h   02.00.02  02.00.02  02.00.01  02.00.01  02.00.01  02.00.01
-mpi2_targ.h  02.00.03  02.00.03  02.00.02  02.00.02  02.00.02  02.00.02
-mpi2_tool.h  02.00.02  02.00.02  02.00.02  02.00.02  02.00.02  02.00.02
-mpi2_type.h  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
-
-Filename     02.00.05  02.00.04  02.00.03  02.00.02  02.00.01  02.00.00
-----------   --------  --------  --------  --------  --------  --------
-mpi2.h       02.00.05  02.00.04  02.00.03  02.00.02  02.00.01  02.00.00
-mpi2_cnfg.h  02.00.05  02.00.04  02.00.03  02.00.02  02.00.01  02.00.00
-mpi2_init.h  02.00.02  02.00.01  02.00.00  02.00.00  02.00.00  02.00.00
-mpi2_ioc.h   02.00.05  02.00.04  02.00.03  02.00.02  02.00.01  02.00.00
-mpi2_raid.h  02.00.01  02.00.01  02.00.01  02.00.00  02.00.00  02.00.00
-mpi2_sas.h   02.00.01  02.00.01  02.00.01  02.00.01  02.00.00  02.00.00
-mpi2_targ.h  02.00.01  02.00.01  02.00.01  02.00.00  02.00.00  02.00.00
-mpi2_tool.h  02.00.01  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
-mpi2_type.h  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
-
+ ==============================
+ Fusion-MPT MPI 2.0 / 2.5 Header File Change History
+ ==============================
+
+ Copyright 2000-2015 Avago Technologies.  All rights reserved.
+
+ ---------------------------------------
+ Header Set Release Version:    02.00.50
+ Header Set Release Date:       09-29-17
+ ---------------------------------------
+
+ Filename               Current version     Prior version
+ ----------             ---------------     -------------
+ mpi2.h                 02.00.50            02.00.49
+ mpi2_cnfg.h            02.00.42            02.00.41
+ mpi2_init.h            02.00.21            02.00.21
+ mpi2_ioc.h             02.00.34            02.00.33
+ mpi2_raid.h            02.00.11            02.00.11
+ mpi2_sas.h             02.00.10            02.00.10
+ mpi2_targ.h            02.00.09            02.00.09
+ mpi2_tool.h            02.00.14            02.00.14
+ mpi2_type.h            02.00.01            02.00.01
+ mpi2_ra.h              02.00.01            02.00.01
+ mpi2_hbd.h             02.00.04            02.00.04
+ mpi2_pci.h             02.00.02            02.00.02
+ mpi2_history.txt       02.00.46            02.00.45
+
+
+ *  Date      Version   Description
+ *  --------  --------  ------------------------------------------------------
+
+mpi2.h
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  06-04-07  02.00.01  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  06-26-07  02.00.02  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  08-31-07  02.00.03  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Moved ReplyPostHostIndex register to offset 0x6C of the
+ *                      MPI2_SYSTEM_INTERFACE_REGS and modified the define for
+ *                      MPI2_REPLY_POST_HOST_INDEX_OFFSET.
+ *                      Added union of request descriptors.
+ *                      Added union of reply descriptors.
+ *  10-31-07  02.00.04  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added define for MPI2_VERSION_02_00.
+ *                      Fixed the size of the FunctionDependent5 field in the
+ *                      MPI2_DEFAULT_REPLY structure.
+ *  12-18-07  02.00.05  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Removed the MPI-defined Fault Codes and extended the
+ *                      product specific codes up to 0xEFFF.
+ *                      Added a sixth key value for the WriteSequence register
+ *                      and changed the flush value to 0x0.
+ *                      Added message function codes for Diagnostic Buffer Post
+ *                      and Diagnsotic Release.
+ *                      New IOCStatus define: MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED
+ *                      Moved MPI2_VERSION_UNION from mpi2_ioc.h.
+ *  02-29-08  02.00.06  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  03-03-08  02.00.07  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  05-21-08  02.00.08  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added #defines for marking a reply descriptor as unused.
+ *  06-27-08  02.00.09  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  10-02-08  02.00.10  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Moved LUN field defines from mpi2_init.h.
+ *  01-19-09  02.00.11  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  05-06-09  02.00.12  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      In all request and reply descriptors, replaced VF_ID
+ *                      field with MSIxIndex field.
+ *                      Removed DevHandle field from
+ *                      MPI2_SCSI_IO_SUCCESS_REPLY_DESCRIPTOR and made those
+ *                      bytes reserved.
+ *                      Added RAID Accelerator functionality.
+ *  07-30-09  02.00.13  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  10-28-09  02.00.14  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added MSI-x index mask and shift for Reply Post Host
+ *                      Index register.
+ *                      Added function code for Host Based Discovery Action.
+ *  02-10-10  02.00.15  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added define for MPI2_FUNCTION_PWR_MGMT_CONTROL.
+ *                      Added defines for product-specific range of message
+ *                      function codes, 0xF0 to 0xFF.
+ *  05-12-10  02.00.16  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added alternative defines for the SGE Direction bit.
+ *  08-11-10  02.00.17  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  11-10-10  02.00.18  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added MPI2_IEEE_SGE_FLAGS_SYSTEMPLBCPI_ADDR define.
+ *  02-23-11  02.00.19  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added MPI2_FUNCTION_SEND_HOST_MESSAGE.
+ *  03-09-11  02.00.20  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  05-25-11  02.00.21  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  08-24-11  02.00.22  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  11-18-11  02.00.23  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Incorporating additions for MPI v2.5.
+ *  02-06-12  02.00.24  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  03-29-12  02.00.25  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added Hard Reset delay timings.
+ *  07-10-12  02.00.26  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  07-26-12  02.00.27  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  11-27-12  02.00.28  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  12-20-12  02.00.29  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET.
+ *  04-09-13  02.00.30  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  04-17-13  02.00.31  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  08-19-13  02.00.32  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  12-05-13  02.00.33  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  01-08-14  02.00.34  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  06-13-14  02.00.35  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  11-18-14  02.00.36  Updated copyright information.
+ *                      Bumped MPI2_HEADER_VERSION_UNIT.
+ *  03-16-15  02.00.37  Updated for MPI v2.6.
+ *                      Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added Scratchpad registers and
+ *                      AtomicRequestDescriptorPost register to
+ *                      MPI2_SYSTEM_INTERFACE_REGS.
+ *                      Added MPI2_DIAG_SBR_RELOAD.
+ *                      Added MPI2_IOCSTATUS_INSUFFICIENT_POWER.
+ *  03-19-15  02.00.38  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  05-25-15  02.00.39  Bumped MPI2_HEADER_VERSION_UNIT.
+ *  08-25-15  02.00.40  Bumped MPI2_HEADER_VERSION_UNIT.
+ *                      Added V7 HostDiagnostic register defines
+ *  12-15-15  02.00.41  Bumped MPI_HEADER_VERSION_UNIT
+ *  01-04-16  02.00.42  Bumped MPI_HEADER_VERSION_UNIT
+ *  04-05-16  02.00.43  Modified  MPI26_DIAG_BOOT_DEVICE_SELECT defines
+ *                      to be unique within first 32 characters.
+ *                      Removed AHCI support.
+ *                      Removed SOP support.
+ *                      Bumped MPI2_HEADER_VERSION_UNIT.
+ * 04-10-16  02.00.44   Bumped MPI2_HEADER_VERSION_UNIT.
+ * 07-06-16  02.00.45   Bumped MPI2_HEADER_VERSION_UNIT.
+ * 09-02-16  02.00.46   Bumped MPI2_HEADER_VERSION_UNIT.
+ * 11-23-16  02.00.47   Bumped MPI2_HEADER_VERSION_UNIT.
+ * 02-03-17  02.00.48   Bumped MPI2_HEADER_VERSION_UNIT.
+ * 06-13-17  02.00.49   Bumped MPI2_HEADER_VERSION_UNIT.
+ * 09-29-17  02.00.50   Bumped MPI2_HEADER_VERSION_UNIT.
+ *  --------------------------------------------------------------------------
+
+mpi2_cnfg.h
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  06-04-07  02.00.01  Added defines for SAS IO Unit Page 2 PhyFlags.
+ *                      Added Manufacturing Page 11.
+ *                      Added MPI2_SAS_EXPANDER0_FLAGS_CONNECTOR_END_DEVICE
+ *                      define.
+ *  06-26-07  02.00.02  Adding generic structure for product-specific
+ *                      Manufacturing pages: MPI2_CONFIG_PAGE_MANUFACTURING_PS.
+ *                      Rework of BIOS Page 2 configuration page.
+ *                      Fixed MPI2_BIOSPAGE2_BOOT_DEVICE to be a union of the
+ *                      forms.
+ *                      Added configuration pages IOC Page 8 and Driver
+ *                      Persistent Mapping Page 0.
+ *  08-31-07  02.00.03  Modified configuration pages dealing with Integrated
+ *                      RAID (Manufacturing Page 4, RAID Volume Pages 0 and 1,
+ *                      RAID Physical Disk Pages 0 and 1, RAID Configuration
+ *                      Page 0).
+ *                      Added new value for AccessStatus field of SAS Device
+ *                      Page 0 (_SATA_NEEDS_INITIALIZATION).
+ *  10-31-07  02.00.04  Added missing SEPDevHandle field to
+ *                      MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0.
+ *  12-18-07  02.00.05  Modified IO Unit Page 0 to use 32-bit version fields for
+ *                      NVDATA.
+ *                      Modified IOC Page 7 to use masks and added field for
+ *                      SASBroadcastPrimitiveMasks.
+ *                      Added MPI2_CONFIG_PAGE_BIOS_4.
+ *                      Added MPI2_CONFIG_PAGE_LOG_0.
+ *  02-29-08  02.00.06  Modified various names to make them 32-character unique.
+ *                      Added SAS Device IDs.
+ *                      Updated Integrated RAID configuration pages including
+ *                      Manufacturing Page 4, IOC Page 6, and RAID Configuration
+ *                      Page 0.
+ *  05-21-08  02.00.07  Added define MPI2_MANPAGE4_MIX_SSD_SAS_SATA.
+ *                      Added define MPI2_MANPAGE4_PHYSDISK_128MB_COERCION.
+ *                      Fixed define MPI2_IOCPAGE8_FLAGS_ENCLOSURE_SLOT_MAPPING.
+ *                      Added missing MaxNumRoutedSasAddresses field to
+ *                      MPI2_CONFIG_PAGE_EXPANDER_0.
+ *                      Added SAS Port Page 0.
+ *                      Modified structure layout for
+ *                      MPI2_CONFIG_PAGE_DRIVER_MAPPING_0.
+ *  06-27-08  02.00.08  Changed MPI2_CONFIG_PAGE_RD_PDISK_1 to use
+ *                      MPI2_RAID_PHYS_DISK1_PATH_MAX to size the array.
+ *  10-02-08  02.00.09  Changed MPI2_RAID_PGAD_CONFIGNUM_MASK from 0x0000FFFF
+ *                      to 0x000000FF.
+ *                      Added two new values for the Physical Disk Coercion Size
+ *                      bits in the Flags field of Manufacturing Page 4.
+ *                      Added product-specific Manufacturing pages 16 to 31.
+ *                      Modified Flags bits for controlling write cache on SATA
+ *                      drives in IO Unit Page 1.
+ *                      Added new bit to AdditionalControlFlags of SAS IO Unit
+ *                      Page 1 to control Invalid Topology Correction.
+ *                      Added SupportedPhysDisks field to RAID Volume Page 1 and
+ *                      added related defines.
+ *                      Added additional defines for RAID Volume Page 0
+ *                      VolumeStatusFlags field.
+ *                      Modified meaning of RAID Volume Page 0 VolumeSettings
+ *                      define for auto-configure of hot-swap drives.
+ *                      Added PhysDiskAttributes field (and related defines) to
+ *                      RAID Physical Disk Page 0.
+ *                      Added MPI2_SAS_PHYINFO_PHY_VACANT define.
+ *                      Added three new DiscoveryStatus bits for SAS IO Unit
+ *                      Page 0 and SAS Expander Page 0.
+ *                      Removed multiplexing information from SAS IO Unit pages.
+ *                      Added BootDeviceWaitTime field to SAS IO Unit Page 4.
+ *                      Removed Zone Address Resolved bit from PhyInfo and from
+ *                      Expander Page 0 Flags field.
+ *                      Added two new AccessStatus values to SAS Device Page 0
+ *                      for indicating routing problems. Added 3 reserved words
+ *                      to this page.
+ *  01-19-09  02.00.10  Fixed defines for GPIOVal field of IO Unit Page 3.
+ *                      Inserted missing reserved field into structure for IOC
+ *                      Page 6.
+ *                      Added more pending task bits to RAID Volume Page 0
+ *                      VolumeStatusFlags defines.
+ *                      Added MPI2_PHYSDISK0_STATUS_FLAG_NOT_CERTIFIED define.
+ *                      Added a new DiscoveryStatus bit for SAS IO Unit Page 0
+ *                      and SAS Expander Page 0 to flag a downstream initiator
+ *                      when in simplified routing mode.
+ *                      Removed SATA Init Failure defines for DiscoveryStatus
+ *                      fields of SAS IO Unit Page 0 and SAS Expander Page 0.
+ *                      Added MPI2_SAS_DEVICE0_ASTATUS_DEVICE_BLOCKED define.
+ *                      Added PortGroups, DmaGroup, and ControlGroup fields to
+ *                      SAS Device Page 0.
+ *  05-06-09  02.00.11  Added structures and defines for IO Unit Page 5 and IO
+ *                      Unit Page 6.
+ *                      Added expander reduced functionality data to SAS
+ *                      Expander Page 0.
+ *                      Added SAS PHY Page 2 and SAS PHY Page 3.
+ *  07-30-09  02.00.12  Added IO Unit Page 7.
+ *                      Added new device ids.
+ *                      Added SAS IO Unit Page 5.
+ *                      Added partial and slumber power management capable flags
+ *                      to SAS Device Page 0 Flags field.
+ *                      Added PhyInfo defines for power condition.
+ *                      Added Ethernet configuration pages.
+ *  10-28-09  02.00.13  Added MPI2_IOUNITPAGE1_ENABLE_HOST_BASED_DISCOVERY.
+ *                      Added SAS PHY Page 4 structure and defines.
+ *  02-10-10  02.00.14  Modified the comments for the configuration page
+ *                      structures that contain an array of data. The host
+ *                      should use the "count" field in the page data (e.g. the
+ *                      NumPhys field) to determine the number of valid elements
+ *                      in the array.
+ *                      Added/modified some MPI2_MFGPAGE_DEVID_SAS defines.
+ *                      Added PowerManagementCapabilities to IO Unit Page 7.
+ *                      Added PortWidthModGroup field to
+ *                      MPI2_SAS_IO_UNIT5_PHY_PM_SETTINGS.
+ *                      Added MPI2_CONFIG_PAGE_SASIOUNIT_6 and related defines.
+ *                      Added MPI2_CONFIG_PAGE_SASIOUNIT_7 and related defines.
+ *                      Added MPI2_CONFIG_PAGE_SASIOUNIT_8 and related defines.
+ *  05-12-10  02.00.15  Added MPI2_RAIDVOL0_STATUS_FLAG_VOL_NOT_CONSISTENT
+ *                      define.
+ *                      Added MPI2_PHYSDISK0_INCOMPATIBLE_MEDIA_TYPE define.
+ *                      Added MPI2_SAS_NEG_LINK_RATE_UNSUPPORTED_PHY define.
+ *  08-11-10  02.00.16  Removed IO Unit Page 1 device path (multi-pathing)
+ *                      defines.
+ *  11-10-10  02.00.17  Added ReceptacleID field (replacing Reserved1) to
+ *                      MPI2_MANPAGE7_CONNECTOR_INFO and reworked defines for
+ *                      the Pinout field.
+ *                      Added BoardTemperature and BoardTemperatureUnits fields
+ *                      to MPI2_CONFIG_PAGE_IO_UNIT_7.
+ *                      Added MPI2_CONFIG_EXTPAGETYPE_EXT_MANUFACTURING define
+ *                      and MPI2_CONFIG_PAGE_EXT_MAN_PS structure.
+ *  02-23-11  02.00.18  Added ProxyVF_ID field to MPI2_CONFIG_REQUEST.
+ *                      Added IO Unit Page 8, IO Unit Page 9,
+ *                      and IO Unit Page 10.
+ *                      Added SASNotifyPrimitiveMasks field to
+ *                      MPI2_CONFIG_PAGE_IOC_7.
+ *  03-09-11  02.00.19  Fixed IO Unit Page 10 (to match the spec).
+ *  05-25-11  02.00.20  Cleaned up a few comments.
+ *  08-24-11  02.00.21  Marked the IO Unit Page 7 PowerManagementCapabilities
+ *                      for PCIe link as obsolete.
+ *                      Added SpinupFlags field containing a Disable Spin-up bit
+ *                      to the MPI2_SAS_IOUNIT4_SPINUP_GROUP fields of SAS IO
+ *                      Unit Page 4.
+ *  11-18-11  02.00.22  Added define MPI2_IOCPAGE6_CAP_FLAGS_4K_SECTORS_SUPPORT.
+ *                      Added UEFIVersion field to BIOS Page 1 and defined new
+ *                      BiosOptions bits.
+ *                      Incorporating additions for MPI v2.5.
+ *  11-27-12  02.00.23  Added MPI2_MANPAGE7_FLAG_EVENTREPLAY_SLOT_ORDER.
+ *                      Added MPI2_BIOSPAGE1_OPTIONS_MASK_OEM_ID.
+ *  12-20-12  02.00.24  Marked MPI2_SASIOUNIT1_CONTROL_CLEAR_AFFILIATION as
+ *                      obsolete for MPI v2.5 and later.
+ *                      Added some defines for 12G SAS speeds.
+ *  04-09-13  02.00.25  Added MPI2_IOUNITPAGE1_ATA_SECURITY_FREEZE_LOCK.
+ *                      Fixed MPI2_IOUNITPAGE5_DMA_CAP_MASK_MAX_REQUESTS to
+ *                      match the specification.
+ *  08-19-13  02.00.26  Added reserved words to MPI2_CONFIG_PAGE_IO_UNIT_7 for
+ *                      future use.
+ *  12-05-13  02.00.27  Added MPI2_MANPAGE7_FLAG_BASE_ENCLOSURE_LEVEL for
+ *                      MPI2_CONFIG_PAGE_MAN_7.
+ *                      Added EnclosureLevel and ConnectorName fields to
+ *                      MPI2_CONFIG_PAGE_SAS_DEV_0.
+ *                      Added MPI2_SAS_DEVICE0_FLAGS_ENCL_LEVEL_VALID for
+ *                      MPI2_CONFIG_PAGE_SAS_DEV_0.
+ *                      Added EnclosureLevel field to
+ *                      MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0.
+ *                      Added MPI2_SAS_ENCLS0_FLAGS_ENCL_LEVEL_VALID for
+ *                      MPI2_CONFIG_PAGE_SAS_ENCLOSURE_0.
+ *  01-08-14  02.00.28  Added more defines for the BiosOptions field of
+ *                      MPI2_CONFIG_PAGE_BIOS_1.
+ *  06-13-14  02.00.29  Added SSUTimeout field to MPI2_CONFIG_PAGE_BIOS_1, and
+ *                      more defines for the BiosOptions field..
+ *  11-18-14  02.00.30  Updated copyright information.
+ *                      Added MPI2_BIOSPAGE1_OPTIONS_ADVANCED_CONFIG.
+ *                      Added AdapterOrderAux fields to BIOS Page 3.
+ *  03-16-15  02.00.31  Updated for MPI v2.6.
+ *                      Added BoardPowerRequirement, PCISlotPowerAllocation, and
+ *                      Flags field to IO Unit Page 7.
+ *                      Added IO Unit Page 11.
+ *                      Added new SAS Phy Event codes
+ *                      Added PCIe configuration pages.
+ *  03-19-15  02.00.32  Fixed PCIe Link Config page structure names to be
+ *                      unique in first 32 characters.
+ *  05-25-15  02.00.33  Added more defines for the BiosOptions field of
+ *                      MPI2_CONFIG_PAGE_BIOS_1.
+ *  08-25-15  02.00.34  Added PCIe Device Page 2 SGL format capability.
+ *  12-18-15  02.00.35  Added SATADeviceWaitTime to SAS IO Unit Page 4.
+ *  01-21-16  02.00.36  Added/modified MPI2_MFGPAGE_DEVID_SAS defines.
+ *                      Added Link field to PCIe Link Pages
+ *                      Added EnclosureLevel and ConnectorName to PCIe
+ *                      Device Page 0.
+ *                      Added define for PCIE IoUnit page 1 max rate shift.
+ *                      Added comment for reserved ExtPageTypes.
+ *                      Added SAS 4 22.5 gbs speed support.
+ *                      Added PCIe 4 16.0 GT/sec speec support.
+ *                      Removed AHCI support.
+ *                      Removed SOP support.
+ *                      Added NegotiatedLinkRate and NegotiatedPortWidth to
+ *                      PCIe device page 0.
+ *  04-10-16  02.00.37  Fixed MPI2_MFGPAGE_DEVID_SAS3616/3708 defines
+ *  07-01-16  02.00.38  Added Manufacturing page 7 Connector types.
+ *                      Changed declaration of ConnectorName in PCIe DevicePage0
+ *                      to match SAS DevicePage 0.
+ *                      Added SATADeviceWaitTime to IO Unit Page 11.
+ *                      Added MPI26_MFGPAGE_DEVID_SAS4008
+ *                      Added x16 PCIe width to IO Unit Page 7
+ *                      Added LINKFLAGS to control SRIS in PCIe IO Unit page 1
+ *                      phy data.
+ *                      Added InitStatus to PCIe IO Unit Page 1 header.
+ *  09-01-16  02.00.39  Added MPI26_CONFIG_PAGE_ENCLOSURE_0 and related defines.
+ *                      Added MPI26_ENCLOS_PGAD_FORM_GET_NEXT_HANDLE and
+ *                      MPI26_ENCLOS_PGAD_FORM_HANDLE page address formats.
+ *  02-02-17  02.00.40  Added MPI2_MANPAGE7_SLOT_UNKNOWN.
+ *                      Added ChassisSlot field to SAS Enclosure Page 0.
+ *                      Added ChassisSlot Valid bit (bit 5) to the Flags field
+ *                      in SAS Enclosure Page 0.
+ *  06-13-17  02.00.41  Added MPI26_MFGPAGE_DEVID_SAS3816 and 
+ *                      MPI26_MFGPAGE_DEVID_SAS3916 defines.
+ *                      Removed MPI26_MFGPAGE_DEVID_SAS4008 define.
+ *                      Added MPI26_PCIEIOUNIT1_LINKFLAGS_SRNS_EN define.
+ *                      Renamed MPI26_PCIEIOUNIT1_LINKFLAGS_EN_SRIS to 
+ *                      MPI26_PCIEIOUNIT1_LINKFLAGS_SRIS_EN.
+ *                      Renamed MPI26_PCIEIOUNIT1_LINKFLAGS_DIS_SRIS to 
+ *                      MPI26_PCIEIOUNIT1_LINKFLAGS_DIS_SEPARATE_REFCLK.
+ *  09-29-17  02.00.42  Added ControllerResetTO field to PCIe Device Page 2.
+ *                      Added NOIOB field to PCIe Device Page 2.
+ *                      Added MPI26_PCIEDEV2_CAP_DATA_BLK_ALIGN_AND_GRAN to 
+ *                      the Capabilities field of PCIe Device Page 2.
+ *  --------------------------------------------------------------------------
+
+mpi2_init.h
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  10-31-07  02.00.01  Fixed name for pMpi2SCSITaskManagementRequest_t.
+ *  12-18-07  02.00.02  Modified Task Management Target Reset Method defines.
+ *  02-29-08  02.00.03  Added Query Task Set and Query Unit Attention.
+ *  03-03-08  02.00.04  Fixed name of struct _MPI2_SCSI_TASK_MANAGE_REPLY.
+ *  05-21-08  02.00.05  Fixed typo in name of Mpi2SepRequest_t.
+ *  10-02-08  02.00.06  Removed Untagged and No Disconnect values from SCSI IO
+ *                      Control field Task Attribute flags.
+ *                      Moved LUN field defines to mpi2.h becasue they are
+ *                      common to many structures.
+ *  05-06-09  02.00.07  Changed task management type of Query Unit Attention to
+ *                      Query Asynchronous Event.
+ *                      Defined two new bits in the SlotStatus field of the SCSI
+ *                      Enclosure Processor Request and Reply.
+ *  10-28-09  02.00.08  Added defines for decoding the ResponseInfo bytes for
+ *                      both SCSI IO Error Reply and SCSI Task Management Reply.
+ *                      Added ResponseInfo field to MPI2_SCSI_TASK_MANAGE_REPLY.
+ *                      Added MPI2_SCSITASKMGMT_RSP_TM_OVERLAPPED_TAG define.
+ *  02-10-10  02.00.09  Removed unused structure that had "#if 0" around it.
+ *  05-12-10  02.00.10  Added optional vendor-unique region to SCSI IO Request.
+ *  11-10-10  02.00.11  Added MPI2_SCSIIO_NUM_SGLOFFSETS define.
+ *  11-18-11  02.00.12  Incorporating additions for MPI v2.5.
+ *  02-06-12  02.00.13  Added alternate defines for Task Priority / Command
+ *                      Priority to match SAM-4.
+ *                      Added EEDPErrorOffset to MPI2_SCSI_IO_REPLY.
+ *  07-10-12  02.00.14  Added MPI2_SCSIIO_CONTROL_SHIFT_DATADIRECTION.
+ *  04-09-13  02.00.15  Added SCSIStatusQualifier field to MPI2_SCSI_IO_REPLY,
+ *                      replacing the Reserved4 field.
+ *  11-18-14  02.00.16  Updated copyright information.
+ *  03-16-15  02.00.17  Updated for MPI v2.6.
+ *                      Added MPI26_SCSIIO_IOFLAGS_ESCAPE_PASSTHROUGH.
+ *                      Added MPI2_SEP_REQ_SLOTSTATUS_DEV_OFF and
+ *                      MPI2_SEP_REPLY_SLOTSTATUS_DEV_OFF.
+ *  08-26-15  02.00.18  Added SCSITASKMGMT_MSGFLAGS for Target Reset.
+ *  12-18-15  02.00.19  Added EEDPObservedValue added to SCSI IO Reply message.
+ *  01-04-16  02.00.20  Modified EEDP reported values in SCSI IO Reply message.
+ *  01-21-16  02.00.21  Modified MPI26_SCSITASKMGMT_MSGFLAGS_PCIE* defines to
+ *                      be unique within first 32 characters.
+ *  --------------------------------------------------------------------------
+
+mpi2_ioc.h
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  06-04-07  02.00.01  In IOCFacts Reply structure, renamed MaxDevices to
+ *                      MaxTargets.
+ *                      Added TotalImageSize field to FWDownload Request.
+ *                      Added reserved words to FWUpload Request.
+ *  06-26-07  02.00.02  Added IR Configuration Change List Event.
+ *  08-31-07  02.00.03  Removed SystemReplyQueueDepth field from the IOCInit
+ *                      request and replaced it with
+ *                      ReplyDescriptorPostQueueDepth and ReplyFreeQueueDepth.
+ *                      Replaced the MinReplyQueueDepth field of the IOCFacts
+ *                      reply with MaxReplyDescriptorPostQueueDepth.
+ *                      Added MPI2_RDPQ_DEPTH_MIN define to specify the minimum
+ *                      depth for the Reply Descriptor Post Queue.
+ *                      Added SASAddress field to Initiator Device Table
+ *                      Overflow Event data.
+ *  10-31-07  02.00.04  Added ReasonCode MPI2_EVENT_SAS_INIT_RC_NOT_RESPONDING
+ *                      for SAS Initiator Device Status Change Event data.
+ *                      Modified Reason Code defines for SAS Topology Change
+ *                      List Event data, including adding a bit for PHY Vacant
+ *                      status, and adding a mask for the Reason Code.
+ *                      Added define for
+ *                      MPI2_EVENT_SAS_TOPO_ES_DELAY_NOT_RESPONDING.
+ *                      Added define for MPI2_EXT_IMAGE_TYPE_MEGARAID.
+ *  12-18-07  02.00.05  Added Boot Status defines for the IOCExceptions field of
+ *                      the IOCFacts Reply.
+ *                      Removed MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER define.
+ *                      Moved MPI2_VERSION_UNION to mpi2.h.
+ *                      Changed MPI2_EVENT_NOTIFICATION_REQUEST to use masks
+ *                      instead of enables, and added SASBroadcastPrimitiveMasks
+ *                      field.
+ *                      Added Log Entry Added Event and related structure.
+ *  02-29-08  02.00.06  Added define MPI2_IOCFACTS_CAPABILITY_INTEGRATED_RAID.
+ *                      Removed define MPI2_IOCFACTS_PROTOCOL_SMP_TARGET.
+ *                      Added MaxVolumes and MaxPersistentEntries fields to
+ *                      IOCFacts reply.
+ *                      Added ProtocalFlags and IOCCapabilities fields to
+ *                      MPI2_FW_IMAGE_HEADER.
+ *                      Removed MPI2_PORTENABLE_FLAGS_ENABLE_SINGLE_PORT.
+ *  03-03-08  02.00.07  Fixed MPI2_FW_IMAGE_HEADER by changing Reserved26 to
+ *                      a U16 (from a U32).
+ *                      Removed extra 's' from EventMasks name.
+ *  06-27-08  02.00.08  Fixed an offset in a comment.
+ *  10-02-08  02.00.09  Removed SystemReplyFrameSize from MPI2_IOC_INIT_REQUEST.
+ *                      Removed CurReplyFrameSize from MPI2_IOC_FACTS_REPLY and
+ *                      renamed MinReplyFrameSize to ReplyFrameSize.
+ *                      Added MPI2_IOCFACTS_EXCEPT_IR_FOREIGN_CONFIG_MAX.
+ *                      Added two new RAIDOperation values for Integrated RAID
+ *                      Operations Status Event data.
+ *                      Added four new IR Configuration Change List Event data
+ *                      ReasonCode values.
+ *                      Added two new ReasonCode defines for SAS Device Status
+ *                      Change Event data.
+ *                      Added three new DiscoveryStatus bits for the SAS
+ *                      Discovery event data.
+ *                      Added Multiplexing Status Change bit to the PhyStatus
+ *                      field of the SAS Topology Change List event data.
+ *                      Removed define for MPI2_INIT_IMAGE_BOOTFLAGS_XMEMCOPY.
+ *                      BootFlags are now product-specific.
+ *                      Added defines for the indivdual signature bytes
+ *                      for MPI2_INIT_IMAGE_FOOTER.
+ *  01-19-09  02.00.10  Added MPI2_IOCFACTS_CAPABILITY_EVENT_REPLAY define.
+ *                      Added MPI2_EVENT_SAS_DISC_DS_DOWNSTREAM_INITIATOR
+ *                      define.
+ *                      Added MPI2_EVENT_SAS_DEV_STAT_RC_SATA_INIT_FAILURE
+ *                      define.
+ *                      Removed MPI2_EVENT_SAS_DISC_DS_SATA_INIT_FAILURE define.
+ *  05-06-09  02.00.11  Added MPI2_IOCFACTS_CAPABILITY_RAID_ACCELERATOR define.
+ *                      Added MPI2_IOCFACTS_CAPABILITY_MSI_X_INDEX define.
+ *                      Added two new reason codes for SAS Device Status Change
+ *                      Event.
+ *                      Added new event: SAS PHY Counter.
+ *  07-30-09  02.00.12  Added GPIO Interrupt event define and structure.
+ *                      Added MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER define.
+ *                      Added new product id family for 2208.
+ *  10-28-09  02.00.13  Added HostMSIxVectors field to MPI2_IOC_INIT_REQUEST.
+ *                      Added MaxMSIxVectors field to MPI2_IOC_FACTS_REPLY.
+ *                      Added MinDevHandle field to MPI2_IOC_FACTS_REPLY.
+ *                      Added MPI2_IOCFACTS_CAPABILITY_HOST_BASED_DISCOVERY.
+ *                      Added MPI2_EVENT_HOST_BASED_DISCOVERY_PHY define.
+ *                      Added MPI2_EVENT_SAS_TOPO_ES_NO_EXPANDER define.
+ *                      Added Host Based Discovery Phy Event data.
+ *                      Added defines for ProductID Product field
+ *                      (MPI2_FW_HEADER_PID_).
+ *                      Modified values for SAS ProductID Family
+ *                      (MPI2_FW_HEADER_PID_FAMILY_).
+ *  02-10-10  02.00.14  Added SAS Quiesce Event structure and defines.
+ *                      Added PowerManagementControl Request structures and
+ *                      defines.
+ *  05-12-10  02.00.15  Marked Task Set Full Event as obsolete.
+ *                      Added MPI2_EVENT_SAS_TOPO_LR_UNSUPPORTED_PHY define.
+ *  11-10-10  02.00.16  Added MPI2_FW_DOWNLOAD_ITYPE_MIN_PRODUCT_SPECIFIC.
+ *  02-23-11  02.00.17  Added SAS NOTIFY Primitive event, and added
+ *                      SASNotifyPrimitiveMasks field to
+ *                      MPI2_EVENT_NOTIFICATION_REQUEST.
+ *                      Added Temperature Threshold Event.
+ *                      Added Host Message Event.
+ *                      Added Send Host Message request and reply.
+ *  05-25-11  02.00.18  For Extended Image Header, added
+ *                      MPI2_EXT_IMAGE_TYPE_MIN_PRODUCT_SPECIFIC and
+ *                      MPI2_EXT_IMAGE_TYPE_MAX_PRODUCT_SPECIFIC defines.
+ *                      Deprecated MPI2_EXT_IMAGE_TYPE_MAX define.
+ *  08-24-11  02.00.19  Added PhysicalPort field to
+ *                      MPI2_EVENT_DATA_SAS_DEVICE_STATUS_CHANGE structure.
+ *                      Marked MPI2_PM_CONTROL_FEATURE_PCIE_LINK as obsolete.
+ *  11-18-11  02.00.20  Incorporating additions for MPI v2.5.
+ *  03-29-12  02.00.21  Added a product specific range to event values.
+ *  07-26-12  02.00.22  Added MPI2_IOCFACTS_EXCEPT_PARTIAL_MEMORY_FAILURE.
+ *                      Added ElapsedSeconds field to
+ *                      MPI2_EVENT_DATA_IR_OPERATION_STATUS.
+ *  08-19-13  02.00.23  For IOCInit, added MPI2_IOCINIT_MSGFLAG_RDPQ_ARRAY_MODE
+ *                      and MPI2_IOC_INIT_RDPQ_ARRAY_ENTRY.
+ *                      Added MPI2_IOCFACTS_CAPABILITY_RDPQ_ARRAY_CAPABLE.
+ *                      Added MPI2_FW_DOWNLOAD_ITYPE_PUBLIC_KEY.
+ *                      Added Encrypted Hash Extended Image.
+ *  12-05-13  02.00.24  Added MPI25_HASH_IMAGE_TYPE_BIOS.
+ *  11-18-14  02.00.25  Updated copyright information.
+ *  03-16-15  02.00.26  Updated for MPI v2.6.
+ *                      Added MPI2_EVENT_ACTIVE_CABLE_EXCEPTION and
+ *                      MPI26_EVENT_DATA_ACTIVE_CABLE_EXCEPT.
+ *                      Added MPI2_EVENT_PCIE_LINK_COUNTER and
+ *                      MPI26_EVENT_DATA_PCIE_LINK_COUNTER.
+ *                      Added MPI26_CTRL_OP_SHUTDOWN.
+ *                      Added MPI26_CTRL_OP_LINK_CLEAR_ERROR_LOG
+ *                      Added MPI26_FW_HEADER_PID_FAMILY_3324_SAS and
+ *                      MPI26_FW_HEADER_PID_FAMILY_3516_SAS.
+ *  08-25-15  02.00.27  Added IC ARCH Class based signature defines.
+ *                      Added MPI26_EVENT_PCIE_ENUM_ES_RESOURCES_EXHAUSTED event.
+ *                      Added ConigurationFlags field to IOCInit message to
+ *                      support NVMe SGL format control.
+ *                      Added PCIe SRIOV support.
+ * 02-17-16   02.00.28  Added SAS 4 22.5 gbs speed support.
+ *                      Added PCIe 4 16.0 GT/sec speec support.
+ *                      Removed AHCI support.
+ *                      Removed SOP support.
+ * 07-01-16   02.00.29  Added Archclass for 4008 product.
+ *                      Added IOCException MPI2_IOCFACTS_EXCEPT_PCIE_DISABLED.
+ * 08-23-16   02.00.30  Added new defines for the ImageType field of FWDownload
+ *                      Request Message.
+ *                      Added new defines for the ImageType field of FWUpload
+ *                      Request Message.
+ *                      Added new values for the RegionType field in the Layout
+ *                      Data sections of the FLASH Layout Extended Image Data.
+ *                      Added new defines for the ReasonCode field of
+ *                      Active Cable Exception Event.
+ *                      Added MPI2_EVENT_ENCL_DEVICE_STATUS_CHANGE and
+ *                      MPI26_EVENT_DATA_ENCL_DEV_STATUS_CHANGE.
+ * 11-23-16   02.00.31  Added MPI2_EVENT_SAS_DEVICE_DISCOVERY_ERROR and
+ *                      MPI25_EVENT_DATA_SAS_DEVICE_DISCOVERY_ERROR.
+ * 02-02-17   02.00.32  Added MPI2_FW_DOWNLOAD_ITYPE_CBB_BACKUP.
+ *                      Added MPI25_EVENT_DATA_ACTIVE_CABLE_EXCEPT and related
+ *                      defines for the ReasonCode field.
+ * 06-13-17   02.00.33  Added MPI2_FW_DOWNLOAD_ITYPE_CPLD.
+ * 09-29-17   02.00.34  Added MPI26_EVENT_PCIDEV_STAT_RC_PCIE_HOT_RESET_FAILED
+ *                      to the ReasonCode field in PCIe Device Status Change
+ *                      Event Data.
+ * --------------------------------------------------------------------------
+
+mpi2_raid.h
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  08-31-07  02.00.01  Modifications to RAID Action request and reply,
+ *                      including the Actions and ActionData.
+ *  02-29-08  02.00.02  Added MPI2_RAID_ACTION_ADATA_DISABL_FULL_REBUILD.
+ *  05-21-08  02.00.03  Added MPI2_RAID_VOL_CREATION_NUM_PHYSDISKS so that
+ *                      the PhysDisk array in MPI2_RAID_VOLUME_CREATION_STRUCT
+ *                      can be sized by the build environment.
+ *  07-30-09  02.00.04  Added proper define for the Use Default Settings bit of
+ *                      VolumeCreationFlags and marked the old one as obsolete.
+ *  05-12-10  02.00.05  Added MPI2_RAID_VOL_FLAGS_OP_MDC define.
+ *  08-24-10  02.00.06  Added MPI2_RAID_ACTION_COMPATIBILITY_CHECK along with
+ *                      related structures and defines.
+ *                      Added product-specific range to RAID Action values.
+ *  11-18-11  02.00.07  Incorporating additions for MPI v2.5.
+ *  02-06-12  02.00.08  Added MPI2_RAID_ACTION_PHYSDISK_HIDDEN.
+ *  07-26-12  02.00.09  Added ElapsedSeconds field to MPI2_RAID_VOL_INDICATOR.
+ *                      Added MPI2_RAID_VOL_FLAGS_ELAPSED_SECONDS_VALID define.
+ *  04-17-13  02.00.10  Added MPI25_RAID_ACTION_ADATA_ALLOW_PI.
+ *  --------------------------------------------------------------------------
+
+mpi2_sas.h
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  06-26-07  02.00.01  Added Clear All Persistent Operation to SAS IO Unit
+ *                      Control Request.
+ *  10-02-08  02.00.02  Added Set IOC Parameter Operation to SAS IO Unit Control
+ *                      Request.
+ *  10-28-09  02.00.03  Changed the type of SGL in MPI2_SATA_PASSTHROUGH_REQUEST
+ *                      to MPI2_SGE_IO_UNION since it supports chained SGLs.
+ *  05-12-10  02.00.04  Modified some comments.
+ *  08-11-10  02.00.05  Added NCQ operations to SAS IO Unit Control.
+ *  11-18-11  02.00.06  Incorporating additions for MPI v2.5.
+ *  07-10-12  02.00.07  Added MPI2_SATA_PT_SGE_UNION for use in the SATA
+ *                      Passthrough Request message.
+ *  08-19-13  02.00.08  Made MPI2_SAS_OP_TRANSMIT_PORT_SELECT_SIGNAL obsolete
+ *                      for anything newer than MPI v2.0.
+ *  11-18-14  02.00.09  Updated copyright information.
+ *  03-16-15  02.00.10  Updated for MPI v2.6.
+ *                      Added MPI2_SATA_PT_REQ_PT_FLAGS_FPDMA.
+ *  --------------------------------------------------------------------------
+
+mpi2_targ.h
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  08-31-07  02.00.01  Added Command Buffer Data Location Address Space bits to
+ *                      BufferPostFlags field of CommandBufferPostBase Request.
+ *  02-29-08  02.00.02  Modified various names to make them 32-character unique.
+ *  10-02-08  02.00.03  Removed NextCmdBufferOffset from
+ *                      MPI2_TARGET_CMD_BUF_POST_BASE_REQUEST.
+ *                      Target Status Send Request only takes a single SGE for
+ *                      response data.
+ *  02-10-10  02.00.04  Added comment to MPI2_TARGET_SSP_RSP_IU structure.
+ *  11-18-11  02.00.05  Incorporating additions for MPI v2.5.
+ *  11-27-12  02.00.06  Added InitiatorDevHandle field to MPI2_TARGET_MODE_ABORT
+ *                      request message structure.
+ *                      Added AbortType MPI2_TARGET_MODE_ABORT_DEVHANDLE and
+ *                      MPI2_TARGET_MODE_ABORT_ALL_COMMANDS.
+ *  06-13-14  02.00.07  Added MinMSIxIndex and MaxMSIxIndex fields to
+ *                      MPI2_TARGET_CMD_BUF_POST_BASE_REQUEST.
+ *  11-18-14  02.00.08  Updated copyright information.
+ *  03-16-15  02.00.09  Updated for MPI v2.6.
+ *                      Added MPI26_TARGET_ASSIST_IOFLAGS_ESCAPE_PASSTHROUGH.
+ *  --------------------------------------------------------------------------
+
+mpi2_tool.h
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  12-18-07  02.00.01  Added Diagnostic Buffer Post and Diagnostic Release
+ *                      structures and defines.
+ *  02-29-08  02.00.02  Modified various names to make them 32-character unique.
+ *  05-06-09  02.00.03  Added ISTWI Read Write Tool and Diagnostic CLI Tool.
+ *  07-30-09  02.00.04  Added ExtendedType field to DiagnosticBufferPost request
+ *                      and reply messages.
+ *                      Added MPI2_DIAG_BUF_TYPE_EXTENDED.
+ *                      Incremented MPI2_DIAG_BUF_TYPE_COUNT.
+ *  05-12-10  02.00.05  Added Diagnostic Data Upload tool.
+ *  08-11-10  02.00.06  Added defines that were missing for Diagnostic Buffer
+ *                      Post Request.
+ *  05-25-11  02.00.07  Added Flags field and related defines to
+ *                      MPI2_TOOLBOX_ISTWI_READ_WRITE_REQUEST.
+ *  11-18-11  02.00.08  Incorporating additions for MPI v2.5.
+ *  07-10-12  02.00.09  Add MPI v2.5 Toolbox Diagnostic CLI Tool Request
+ *                      message.
+ *  07-26-12  02.00.10  Modified MPI2_TOOLBOX_DIAGNOSTIC_CLI_REQUEST so that
+ *                      it uses MPI Chain SGE as well as MPI Simple SGE.
+ *  08-19-13  02.00.11  Added MPI2_TOOLBOX_TEXT_DISPLAY_TOOL and related info.
+ *  01-08-14  02.00.12  Added MPI2_TOOLBOX_CLEAN_BIT26_PRODUCT_SPECIFIC.
+ *  11-18-14  02.00.13  Updated copyright information.
+ *  08-25-16  02.00.14  Added new values for the Flags field of Toolbox Clean
+ *                      Tool Request Message.
+ *  --------------------------------------------------------------------------
+
+mpi2_type.h
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  11-18-14  02.00.01  Updated copyright information.
+ *  --------------------------------------------------------------------------
+
+mpi2_ra.h
+ *  05-06-09  02.00.00  Initial version.
+ *  11-18-14  02.00.01  Updated copyright information.
+ *  --------------------------------------------------------------------------
+
+mpi2_hbd.h
+ *  10-28-09  02.00.00  Initial version.
+ *  08-11-10  02.00.01  Removed PortGroups, DmaGroup, and ControlGroup from
+ *                      HBD Action request, replaced by AdditionalInfo field.
+ *  11-18-11  02.00.02  Incorporating additions for MPI v2.5.
+ *  11-18-14  02.00.03  Updated copyright information.
+ *  02-17-16  02.00.04  Added SAS 4 22.5 gbs speed support.
+ *  --------------------------------------------------------------------------
+
+mpi2_pci.h
+ *  03-16-15  02.00.00  Initial version.
+ *  02-17-16  02.00.01  Removed AHCI support.
+ *                      Removed SOP support.
+ *  07-01-16  02.00.02  Added MPI26_NVME_FLAGS_FORCE_ADMIN_ERR_RESP to
+ *                      NVME Encapsulated Request.
+ *  --------------------------------------------------------------------------
+
+mpi2_history.txt         Parts list history
+
+Filename     02.00.50  02.00.49  02.00.48
+----------   --------  --------  --------
+mpi2.h       02.00.50  02.00.49  02.00.48
+mpi2_cnfg.h  02.00.42  02.00.41  02.00.40
+mpi2_init.h  02.00.21  02.00.21  02.00.21
+mpi2_ioc.h   02.00.34  02.00.33  02.00.32
+mpi2_raid.h  02.00.11  02.00.11  02.00.11
+mpi2_sas.h   02.00.10  02.00.10  02.00.10
+mpi2_targ.h  02.00.09  02.00.09  02.00.09
+mpi2_tool.h  02.00.14  02.00.14  02.00.14
+mpi2_type.h  02.00.01  02.00.01  02.00.01
+mpi2_ra.h    02.00.01  02.00.01  02.00.01
+mpi2_hbd.h   02.00.04  02.00.04  02.00.04
+mpi2_pci.h   02.00.02  02.00.02  02.00.02
+
+Filename     02.00.47  02.00.46  02.00.45  02.00.44  02.00.43  02.00.42
+----------   --------  --------  --------  --------  --------  --------
+mpi2.h       02.00.47  02.00.46  02.00.45  02.00.44  02.00.43  02.00.42
+mpi2_cnfg.h  02.00.39  02.00.39  02.00.38  02.00.37  02.00.36  02.00.35
+mpi2_init.h  02.00.21  02.00.21  02.00.21  02.00.21  02.00.21  02.00.20
+mpi2_ioc.h   02.00.31  02.00.30  02.00.29  02.00.28  02.00.28  02.00.27
+mpi2_raid.h  02.00.11  02.00.11  02.00.11  02.00.11  02.00.11  02.00.11
+mpi2_sas.h   02.00.10  02.00.10  02.00.10  02.00.10  02.00.10  02.00.10
+mpi2_targ.h  02.00.09  02.00.09  02.00.09  02.00.09  02.00.09  02.00.09
+mpi2_tool.h  02.00.14  02.00.14  02.00.13  02.00.13  02.00.13  02.00.13
+mpi2_type.h  02.00.01  02.00.01  02.00.01  02.00.01  02.00.01  02.00.01
+mpi2_ra.h    02.00.01  02.00.01  02.00.01  02.00.01  02.00.01  02.00.01
+mpi2_hbd.h   02.00.04  02.00.04  02.00.04  02.00.04  02.00.04  02.00.03
+mpi2_pci.h   02.00.02  02.00.02  02.00.02  02.00.01  02.00.01  02.00.00
+
+Filename     02.00.41  02.00.40  02.00.39  02.00.38  02.00.37  02.00.36
+----------   --------  --------  --------  --------  --------  --------
+mpi2.h       02.00.41  02.00.40  02.00.39  02.00.38  02.00.37  02.00.36
+mpi2_cnfg.h  02.00.35  02.00.34  02.00.33  02.00.32  02.00.31  02.00.30
+mpi2_init.h  02.00.19  02.00.18  02.00.17  02.00.17  02.00.17  02.00.16
+mpi2_ioc.h   02.00.27  02.00.27  02.00.26  02.00.26  02.00.26  02.00.25
+mpi2_raid.h  02.00.11  02.00.11  02.00.11  02.00.11  02.00.11  02.00.11
+mpi2_sas.h   02.00.10  02.00.10  02.00.10  02.00.10  02.00.10  02.00.09
+mpi2_targ.h  02.00.09  02.00.09  02.00.09  02.00.09  02.00.09  02.00.08
+mpi2_tool.h  02.00.13  02.00.13  02.00.13  02.00.13  02.00.13  02.00.13
+mpi2_type.h  02.00.01  02.00.01  02.00.01  02.00.01  02.00.01  02.00.01
+mpi2_ra.h    02.00.01  02.00.01  02.00.01  02.00.01  02.00.01  02.00.01
+mpi2_hbd.h   02.00.03  02.00.03  02.00.03  02.00.03  02.00.03  02.00.03
+mpi2_pci.h   02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
+
+Filename     02.00.35  02.00.34  02.00.33  02.00.32  02.00.31  02.00.30
+----------   --------  --------  --------  --------  --------  --------
+mpi2.h       02.00.35  02.00.34  02.00.33  02.00.32  02.00.31  02.00.30
+mpi2_cnfg.h  02.00.29  02.00.28  02.00.27  02.00.26  02.00.25  02.00.25
+mpi2_init.h  02.00.15  02.00.15  02.00.15  02.00.15  02.00.15  02.00.15
+mpi2_ioc.h   02.00.24  02.00.24  02.00.24  02.00.23  02.00.22  02.00.22
+mpi2_raid.h  02.00.10  02.00.10  02.00.10  02.00.10  02.00.10  02.00.09
+mpi2_sas.h   02.00.08  02.00.08  02.00.08  02.00.08  02.00.07  02.00.07
+mpi2_targ.h  02.00.07  02.00.06  02.00.06  02.00.06  02.00.06  02.00.06
+mpi2_tool.h  02.00.12  02.00.12  02.00.11  02.00.11  02.00.10  02.00.10
+mpi2_type.h  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
+mpi2_ra.h    02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
+mpi2_hbd.h   02.00.02  02.00.02  02.00.02  02.00.02  02.00.02  02.00.02
+
+Filename     02.00.29  02.00.28  02.00.27  02.00.26  02.00.25  02.00.24
+----------   --------  --------  --------  --------  --------  --------
+mpi2.h       02.00.29  02.00.28  02.00.27  02.00.26  02.00.25  02.00.24
+mpi2_cnfg.h  02.00.24  02.00.23  02.00.22  02.00.22  02.00.22  02.00.22
+mpi2_init.h  02.00.14  02.00.14  02.00.14  02.00.14  02.00.13  02.00.13
+mpi2_ioc.h   02.00.22  02.00.22  02.00.22  02.00.21  02.00.21  02.00.20
+mpi2_raid.h  02.00.09  02.00.09  02.00.09  02.00.08  02.00.08  02.00.08
+mpi2_sas.h   02.00.07  02.00.07  02.00.07  02.00.07  02.00.06  02.00.06
+mpi2_targ.h  02.00.06  02.00.06  02.00.05  02.00.05  02.00.05  02.00.05
+mpi2_tool.h  02.00.10  02.00.10  02.00.10  02.00.09  02.00.08  02.00.08
+mpi2_type.h  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
+mpi2_ra.h    02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
+mpi2_hbd.h   02.00.02  02.00.02  02.00.02  02.00.02  02.00.02  02.00.02
+
+Filename     02.00.23  02.00.22  02.00.21  02.00.20  02.00.19  02.00.18
+----------   --------  --------  --------  --------  --------  --------
+mpi2.h       02.00.23  02.00.22  02.00.21  02.00.20  02.00.19  02.00.18
+mpi2_cnfg.h  02.00.22  02.00.21  02.00.20  02.00.19  02.00.18  02.00.17
+mpi2_init.h  02.00.12  02.00.11  02.00.11  02.00.11  02.00.11  02.00.11
+mpi2_ioc.h   02.00.20  02.00.19  02.00.18  02.00.17  02.00.17  02.00.16
+mpi2_raid.h  02.00.07  02.00.06  02.00.05  02.00.05  02.00.05  02.00.05
+mpi2_sas.h   02.00.06  02.00.05  02.00.05  02.00.05  02.00.05  02.00.05
+mpi2_targ.h  02.00.05  02.00.04  02.00.04  02.00.04  02.00.04  02.00.04
+mpi2_tool.h  02.00.08  02.00.07  02.00.07  02.00.06  02.00.06  02.00.06
+mpi2_type.h  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
+mpi2_ra.h    02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
+mpi2_hbd.h   02.00.02  02.00.01  02.00.01  02.00.01  02.00.01  02.00.01
+
+Filename     02.00.17  02.00.16  02.00.15  02.00.14  02.00.13  02.00.12
+----------   --------  --------  --------  --------  --------  --------
+mpi2.h       02.00.17  02.00.16  02.00.15  02.00.14  02.00.13  02.00.12
+mpi2_cnfg.h  02.00.16  02.00.15  02.00.14  02.00.13  02.00.12  02.00.11
+mpi2_init.h  02.00.10  02.00.10  02.00.09  02.00.08  02.00.07  02.00.07
+mpi2_ioc.h   02.00.15  02.00.15  02.00.14  02.00.13  02.00.12  02.00.11
+mpi2_raid.h  02.00.05  02.00.05  02.00.04  02.00.04  02.00.04  02.00.03
+mpi2_sas.h   02.00.05  02.00.04  02.00.03  02.00.03  02.00.02  02.00.02
+mpi2_targ.h  02.00.04  02.00.04  02.00.04  02.00.03  02.00.03  02.00.03
+mpi2_tool.h  02.00.06  02.00.05  02.00.04  02.00.04  02.00.04  02.00.03
+mpi2_type.h  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
+mpi2_ra.h    02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
+mpi2_hbd.h   02.00.01  02.00.00  02.00.00  02.00.00
+
+Filename     02.00.11  02.00.10  02.00.09  02.00.08  02.00.07  02.00.06
+----------   --------  --------  --------  --------  --------  --------
+mpi2.h       02.00.11  02.00.10  02.00.09  02.00.08  02.00.07  02.00.06
+mpi2_cnfg.h  02.00.10  02.00.09  02.00.08  02.00.07  02.00.06  02.00.06
+mpi2_init.h  02.00.06  02.00.06  02.00.05  02.00.05  02.00.04  02.00.03
+mpi2_ioc.h   02.00.10  02.00.09  02.00.08  02.00.07  02.00.07  02.00.06
+mpi2_raid.h  02.00.03  02.00.03  02.00.03  02.00.03  02.00.02  02.00.02
+mpi2_sas.h   02.00.02  02.00.02  02.00.01  02.00.01  02.00.01  02.00.01
+mpi2_targ.h  02.00.03  02.00.03  02.00.02  02.00.02  02.00.02  02.00.02
+mpi2_tool.h  02.00.02  02.00.02  02.00.02  02.00.02  02.00.02  02.00.02
+mpi2_type.h  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
+
+Filename     02.00.05  02.00.04  02.00.03  02.00.02  02.00.01  02.00.00
+----------   --------  --------  --------  --------  --------  --------
+mpi2.h       02.00.05  02.00.04  02.00.03  02.00.02  02.00.01  02.00.00
+mpi2_cnfg.h  02.00.05  02.00.04  02.00.03  02.00.02  02.00.01  02.00.00
+mpi2_init.h  02.00.02  02.00.01  02.00.00  02.00.00  02.00.00  02.00.00
+mpi2_ioc.h   02.00.05  02.00.04  02.00.03  02.00.02  02.00.01  02.00.00
+mpi2_raid.h  02.00.01  02.00.01  02.00.01  02.00.00  02.00.00  02.00.00
+mpi2_sas.h   02.00.01  02.00.01  02.00.01  02.00.01  02.00.00  02.00.00
+mpi2_targ.h  02.00.01  02.00.01  02.00.01  02.00.00  02.00.00  02.00.00
+mpi2_tool.h  02.00.01  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
+mpi2_type.h  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00  02.00.00
+
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_image.h b/drivers/scsi/mpt3sas/mpi/mpi2_image.h
old mode 100755
new mode 100644
index 99f921c52cd9e84b633313e63a1c1cd8b74f4e35..7c53e61e26211d4fb659806e5317297c3e45d3b3
--- a/drivers/scsi/mpt3sas/mpi/mpi2_image.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_image.h
@@ -1,520 +1,520 @@
-/*
- *  Copyright 2016-2020 Broadcom Limited. All rights reserved.
- *
- *           Name: mpi2_image.h
- *    Description: Contains definitions for firmware and other component images.
- *  Creation Date: 04/02/2018
- *        Version: 02.06.05
- *
- *
- *  Version History
- *  ---------------
- *
- *  Date      Version   Description
- *  --------  --------  ------------------------------------------------------
- *  08-01-18  02.06.00  Initial version for MPI 2.6.5.
- *  08-14-18  02.06.01  Corrected define for MPI26_IMAGE_HEADER_SIGNATURE0_MPI26
- *  08-28-18  02.06.02  Added MPI2_EXT_IMAGE_TYPE_RDE
- *  09-07-18  02.06.03  Added MPI26_EVENT_PCIE_TOPO_PI_16_LANES
- *  12-17-18  02.06.04  Added MPI2_EXT_IMAGE_TYPE_PBLP
- *                      Shorten some defines to be compatible with DOS
- *  06-24-19  02.06.05  Whitespace adjustments to help with identifier checking tool.
- *  10-02-19  02.06.06  Added MPI26_IMAGE_HEADER_SIG1_COREDUMP
- *                      Added MPI2_FLASH_REGION_COREDUMP
- */
-#ifndef MPI2_IMAGE_H
-#define MPI2_IMAGE_H
-
-
-/* FW Image Header */
-typedef struct _MPI2_FW_IMAGE_HEADER
-{
-    U32                     Signature;                  /* 0x00 */
-    U32                     Signature0;                 /* 0x04 */
-    U32                     Signature1;                 /* 0x08 */
-    U32                     Signature2;                 /* 0x0C */
-    MPI2_VERSION_UNION      MPIVersion;                 /* 0x10 */
-    MPI2_VERSION_UNION      FWVersion;                  /* 0x14 */
-    MPI2_VERSION_UNION      NVDATAVersion;              /* 0x18 */
-    MPI2_VERSION_UNION      PackageVersion;             /* 0x1C */
-    U16                     VendorID;                   /* 0x20 */
-    U16                     ProductID;                  /* 0x22 */
-    U16                     ProtocolFlags;              /* 0x24 */
-    U16                     Reserved26;                 /* 0x26 */
-    U32                     IOCCapabilities;            /* 0x28 */
-    U32                     ImageSize;                  /* 0x2C */
-    U32                     NextImageHeaderOffset;      /* 0x30 */
-    U32                     Checksum;                   /* 0x34 */
-    U32                     Reserved38;                 /* 0x38 */
-    U32                     Reserved3C;                 /* 0x3C */
-    U32                     Reserved40;                 /* 0x40 */
-    U32                     Reserved44;                 /* 0x44 */
-    U32                     Reserved48;                 /* 0x48 */
-    U32                     Reserved4C;                 /* 0x4C */
-    U32                     Reserved50;                 /* 0x50 */
-    U32                     Reserved54;                 /* 0x54 */
-    U32                     Reserved58;                 /* 0x58 */
-    U32                     Reserved5C;                 /* 0x5C */
-    U32                     BootFlags;                 /* 0x60 */ /* reserved in  MPI v2.5 and earlier */
-    U32                     FirmwareVersionNameWhat;    /* 0x64 */
-    U8                      FirmwareVersionName[32];    /* 0x68 */
-    U32                     VendorNameWhat;             /* 0x88 */
-    U8                      VendorName[32];             /* 0x8C */
-    U32                     PackageNameWhat;            /* 0x88 */
-    U8                      PackageName[32];            /* 0x8C */
-    U32                     ReservedD0;                 /* 0xD0 */
-    U32                     ReservedD4;                 /* 0xD4 */
-    U32                     ReservedD8;                 /* 0xD8 */
-    U32                     ReservedDC;                 /* 0xDC */
-    U32                     ReservedE0;                 /* 0xE0 */
-    U32                     ReservedE4;                 /* 0xE4 */
-    U32                     ReservedE8;                 /* 0xE8 */
-    U32                     ReservedEC;                 /* 0xEC */
-    U32                     ReservedF0;                 /* 0xF0 */
-    U32                     ReservedF4;                 /* 0xF4 */
-    U32                     ReservedF8;                 /* 0xF8 */
-    U32                     ReservedFC;                 /* 0xFC */
-} MPI2_FW_IMAGE_HEADER, MPI2_POINTER PTR_MPI2_FW_IMAGE_HEADER,
-  Mpi2FWImageHeader_t, MPI2_POINTER pMpi2FWImageHeader_t;
-
-/* Signature field */
-#define MPI2_FW_HEADER_SIGNATURE_OFFSET         (0x00)
-#define MPI2_FW_HEADER_SIGNATURE_MASK           (0xFF000000)
-#define MPI2_FW_HEADER_SIGNATURE                (0xEA000000)
-#define MPI26_FW_HEADER_SIGNATURE               (0xEB000000)
-
-/* Signature0 field */
-#define MPI2_FW_HEADER_SIGNATURE0_OFFSET        (0x04)
-#define MPI2_FW_HEADER_SIGNATURE0               (0x5AFAA55A)
-#define MPI26_FW_HEADER_SIGNATURE0_BASE         (0x5AEAA500)    /* Last byte is defined by architecture */
-#define MPI26_FW_HEADER_SIGNATURE0_ARC_0        (0x5A)
-#define MPI26_FW_HEADER_SIGNATURE0_ARC_1        (0x00)
-#define MPI26_FW_HEADER_SIGNATURE0_ARC_2        (0x01)
-#define MPI26_FW_HEADER_SIGNATURE0_ARC_3        (0x02)
-#define MPI26_FW_HEADER_SIGNATURE0              (MPI26_FW_HEADER_SIGNATURE0_BASE + MPI26_FW_HEADER_SIGNATURE0_ARC_0) // legacy (0x5AEAA55A)
-#define MPI26_FW_HEADER_SIGNATURE0_3516         (MPI26_FW_HEADER_SIGNATURE0_BASE + MPI26_FW_HEADER_SIGNATURE0_ARC_1)
-#define MPI26_FW_HEADER_SIGNATURE0_4008         (MPI26_FW_HEADER_SIGNATURE0_BASE + MPI26_FW_HEADER_SIGNATURE0_ARC_3)
-
-/* Signature1 field */
-#define MPI2_FW_HEADER_SIGNATURE1_OFFSET        (0x08)
-#define MPI2_FW_HEADER_SIGNATURE1               (0xA55AFAA5)
-#define MPI26_FW_HEADER_SIGNATURE1              (0xA55AEAA5)
-
-/* Signature2 field */
-#define MPI2_FW_HEADER_SIGNATURE2_OFFSET        (0x0C)
-#define MPI2_FW_HEADER_SIGNATURE2               (0x5AA55AFA)
-#define MPI26_FW_HEADER_SIGNATURE2              (0x5AA55AEA)
-
-
-/* defines for using the ProductID field */
-#define MPI2_FW_HEADER_PID_TYPE_MASK            (0xF000)
-#define MPI2_FW_HEADER_PID_TYPE_SAS             (0x2000)
-
-#define MPI2_FW_HEADER_PID_PROD_MASK                    (0x0F00)
-#define MPI2_FW_HEADER_PID_PROD_A                       (0x0000)
-#define MPI2_FW_HEADER_PID_PROD_TARGET_INITIATOR_SCSI   (0x0200)
-#define MPI2_FW_HEADER_PID_PROD_IR_SCSI                 (0x0700)
-
-
-#define MPI2_FW_HEADER_PID_FAMILY_MASK          (0x00FF)
-/* SAS ProductID Family bits */
-#define MPI2_FW_HEADER_PID_FAMILY_2108_SAS      (0x0013)
-#define MPI2_FW_HEADER_PID_FAMILY_2208_SAS      (0x0014)
-#define MPI25_FW_HEADER_PID_FAMILY_3108_SAS     (0x0021)
-#define MPI26_FW_HEADER_PID_FAMILY_3324_SAS     (0x0028)
-#define MPI26_FW_HEADER_PID_FAMILY_3516_SAS     (0x0031)
-
-/* use MPI2_IOCFACTS_PROTOCOL_ defines for ProtocolFlags field */
-
-/* use MPI2_IOCFACTS_CAPABILITY_ defines for IOCCapabilities field */
-
-#define MPI2_FW_HEADER_IMAGESIZE_OFFSET         (0x2C)
-#define MPI2_FW_HEADER_NEXTIMAGE_OFFSET         (0x30)
-
-#define MPI26_FW_HEADER_BOOTFLAGS_OFFSET          (0x60)
-#define MPI2_FW_HEADER_BOOTFLAGS_ISSI32M_FLAG     (0x00000001)
-#define MPI2_FW_HEADER_BOOTFLAGS_W25Q256JW_FLAG   (0x00000002)
-#define MPI2_FW_HEADER_BOOTFLAGS_AUTO_SPI_FLAG    (0x00000004)  /* This image has a auto-discovery version of SPI */
-
-#define MPI2_FW_HEADER_VERNMHWAT_OFFSET         (0x64)
-
-#define MPI2_FW_HEADER_WHAT_SIGNATURE           (0x29232840)
-
-#define MPI2_FW_HEADER_SIZE                     (0x100)
-
-
-/****************************************************************************
- *              Component Image Format and related defines                  *
- ****************************************************************************/
-
-/* Maximum number of Hash Exclusion entries in a Component Image Header */
-#define MPI26_COMP_IMG_HDR_NUM_HASH_EXCL        (4)
-
-/* Hash Exclusion Format */
-typedef struct _MPI26_HASH_EXCLUSION_FORMAT
-{
-    U32                     Offset;        /* 0x00 */
-    U32                     Size;          /* 0x04 */
-} MPI26_HASH_EXCLUSION_FORMAT, MPI2_POINTER PTR_MPI26_HASH_EXCLUSION_FORMAT,
-Mpi26HashSxclusionFormat_t,    MPI2_POINTER pMpi26HashExclusionFormat_t;
-
-/* FW Image Header */
-typedef struct _MPI26_COMPONENT_IMAGE_HEADER
-{
-    U32                            Signature0;                                             /* 0x00 */
-    U32                            LoadAddress;                                            /* 0x04 */
-    U32                            DataSize;                                               /* 0x08 */
-    U32                            StartAddress;                                           /* 0x0C */
-    U32                            Signature1;                                             /* 0x10 */
-    U32                            FlashOffset;                                            /* 0x14 */
-    U32                            FlashSize;                                              /* 0x18 */
-    U32                            VersionStringOffset;                                    /* 0x1C */
-    U32                            BuildDateStringOffset;                                  /* 0x20 */
-    U32                            BuildTimeStringOffset;                                  /* 0x24 */
-    U32                            EnvironmentVariableOffset;                              /* 0x28 */
-    U32                            ApplicationSpecific;                                    /* 0x2C */
-    U32                            Signature2;                                             /* 0x30 */
-    U32                            HeaderSize;                                             /* 0x34 */
-    U32                            Crc;                                                    /* 0x38 */
-    U8                             NotFlashImage;                                          /* 0x3C */
-    U8                             Compressed;                                             /* 0x3D */
-    U16                            Reserved3E;                                             /* 0x3E */
-    U32                            SecondaryFlashOffset;                                   /* 0x40 */
-    U32                            Reserved44;                                             /* 0x44 */
-    U32                            Reserved48;                                             /* 0x48 */
-    MPI2_VERSION_UNION             RMCInterfaceVersion;                                    /* 0x4C */
-    MPI2_VERSION_UNION             Reserved50;                                             /* 0x50 */
-    MPI2_VERSION_UNION             FWVersion;                                              /* 0x54 */
-    MPI2_VERSION_UNION             NvdataVersion;                                          /* 0x58 */
-    MPI26_HASH_EXCLUSION_FORMAT    HashExclusion[MPI26_COMP_IMG_HDR_NUM_HASH_EXCL];        /* 0x5C */
-    U32                            NextImageHeaderOffset;                                  /* 0x7C */
-    U32                            Reserved80[32];                                         /* 0x80 -- 0xFC */
-} MPI26_COMPONENT_IMAGE_HEADER, MPI2_POINTER PTR_MPI26_COMPONENT_IMAGE_HEADER,
-  Mpi26ComponentImageHeader_t,  MPI2_POINTER pMpi26ComponentImageHeader_t;
-
-
-/**** Definitions for Signature0 field ****/
-#define MPI26_IMAGE_HEADER_SIGNATURE0_MPI26                     (0xEB000042)
-
-/**** Definitions for Signature1 field ****/
-#define MPI26_IMAGE_HEADER_SIG1_APPLICATION                    (0x20505041)
-#define MPI26_IMAGE_HEADER_SIG1_CBB                            (0x20424243)
-#define MPI26_IMAGE_HEADER_SIG1_MFG                            (0x2047464D)
-#define MPI26_IMAGE_HEADER_SIG1_BIOS                           (0x534F4942)
-#define MPI26_IMAGE_HEADER_SIG1_HIIM                           (0x4D494948)
-#define MPI26_IMAGE_HEADER_SIG1_HIIA                           (0x41494948)
-#define MPI26_IMAGE_HEADER_SIG1_CPLD                           (0x444C5043)
-#define MPI26_IMAGE_HEADER_SIG1_SPD                            (0x20445053)
-#define MPI26_IMAGE_HEADER_SIG1_NVDATA                         (0x5444564E)
-#define MPI26_IMAGE_HEADER_SIG1_GAS_GAUGE                      (0x20534147)
-#define MPI26_IMAGE_HEADER_SIG1_PBLP                           (0x504C4250)
-#define MPI26_IMAGE_HEADER_SIG1_COREDUMP                       (0x504D5544)     /* little-endian "DUMP" */
-
-/**** Definitions for Signature2 field ****/
-#define MPI26_IMAGE_HEADER_SIGNATURE2_VALUE                    (0x50584546)
-
-/**** Offsets for Image Header Fields ****/
-#define MPI26_IMAGE_HEADER_SIGNATURE0_OFFSET                   (0x00)
-#define MPI26_IMAGE_HEADER_LOAD_ADDRESS_OFFSET                 (0x04)
-#define MPI26_IMAGE_HEADER_DATA_SIZE_OFFSET                    (0x08)
-#define MPI26_IMAGE_HEADER_START_ADDRESS_OFFSET                (0x0C)
-#define MPI26_IMAGE_HEADER_SIGNATURE1_OFFSET                   (0x10)
-#define MPI26_IMAGE_HEADER_FLASH_OFFSET_OFFSET                 (0x14)
-#define MPI26_IMAGE_HEADER_FLASH_SIZE_OFFSET                   (0x18)
-#define MPI26_IMAGE_HEADER_VERSION_STRING_OFFSET_OFFSET        (0x1C)
-#define MPI26_IMAGE_HEADER_BUILD_DATE_STRING_OFFSET_OFFSET     (0x20)
-#define MPI26_IMAGE_HEADER_BUILD_TIME_OFFSET_OFFSET            (0x24)
-#define MPI26_IMAGE_HEADER_ENVIROMENT_VAR_OFFSET_OFFSET        (0x28)
-#define MPI26_IMAGE_HEADER_APPLICATION_SPECIFIC_OFFSET         (0x2C)
-#define MPI26_IMAGE_HEADER_SIGNATURE2_OFFSET                   (0x30)
-#define MPI26_IMAGE_HEADER_HEADER_SIZE_OFFSET                  (0x34)
-#define MPI26_IMAGE_HEADER_CRC_OFFSET                          (0x38)
-#define MPI26_IMAGE_HEADER_NOT_FLASH_IMAGE_OFFSET              (0x3C)
-#define MPI26_IMAGE_HEADER_COMPRESSED_OFFSET                   (0x3D)
-#define MPI26_IMAGE_HEADER_SECONDARY_FLASH_OFFSET_OFFSET       (0x40)
-#define MPI26_IMAGE_HEADER_RMC_INTERFACE_VER_OFFSET            (0x4C)
-#define MPI26_IMAGE_HEADER_COMPONENT_IMAGE_VER_OFFSET          (0x54)
-#define MPI26_IMAGE_HEADER_HASH_EXCLUSION_OFFSET               (0x5C)
-#define MPI26_IMAGE_HEADER_NEXT_IMAGE_HEADER_OFFSET_OFFSET     (0x7C)
-
-
-#define MPI26_IMAGE_HEADER_SIZE                                (0x100)
-
-
-/* Extended Image Header */
-typedef struct _MPI2_EXT_IMAGE_HEADER
-
-{
-    U8                      ImageType;                  /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U16                     Reserved2;                  /* 0x02 */
-    U32                     Checksum;                   /* 0x04 */
-    U32                     ImageSize;                  /* 0x08 */
-    U32                     NextImageHeaderOffset;      /* 0x0C */
-    U32                     PackageVersion;             /* 0x10 */
-    U32                     Reserved3;                  /* 0x14 */
-    U32                     Reserved4;                  /* 0x18 */
-    U32                     Reserved5;                  /* 0x1C */
-    U8                      IdentifyString[32];         /* 0x20 */
-} MPI2_EXT_IMAGE_HEADER, MPI2_POINTER PTR_MPI2_EXT_IMAGE_HEADER,
-  Mpi2ExtImageHeader_t, MPI2_POINTER pMpi2ExtImageHeader_t;
-
-/* useful offsets */
-#define MPI2_EXT_IMAGE_IMAGETYPE_OFFSET         (0x00)
-#define MPI2_EXT_IMAGE_IMAGESIZE_OFFSET         (0x08)
-#define MPI2_EXT_IMAGE_NEXTIMAGE_OFFSET         (0x0C)
-#define MPI2_EXT_IMAGE_PACKAGEVERSION_OFFSET   (0x10)
-
-#define MPI2_EXT_IMAGE_HEADER_SIZE              (0x40)
-
-/* defines for the ImageType field */
-#define MPI2_EXT_IMAGE_TYPE_UNSPECIFIED             (0x00)
-#define MPI2_EXT_IMAGE_TYPE_FW                      (0x01)
-#define MPI2_EXT_IMAGE_TYPE_NVDATA                  (0x03)
-#define MPI2_EXT_IMAGE_TYPE_BOOTLOADER              (0x04)
-#define MPI2_EXT_IMAGE_TYPE_INITIALIZATION          (0x05)
-#define MPI2_EXT_IMAGE_TYPE_FLASH_LAYOUT            (0x06)
-#define MPI2_EXT_IMAGE_TYPE_SUPPORTED_DEVICES       (0x07)
-#define MPI2_EXT_IMAGE_TYPE_MEGARAID                (0x08)
-#define MPI2_EXT_IMAGE_TYPE_ENCRYPTED_HASH          (0x09) /* MPI v2.5 and newer */
-#define MPI2_EXT_IMAGE_TYPE_RDE                     (0x0A)
-#define MPI2_EXT_IMAGE_TYPE_PBLP                    (0x0B)
-#define MPI2_EXT_IMAGE_TYPE_MIN_PRODUCT_SPECIFIC    (0x80)
-#define MPI2_EXT_IMAGE_TYPE_MAX_PRODUCT_SPECIFIC    (0xFF)
-
-#define MPI2_EXT_IMAGE_TYPE_MAX (MPI2_EXT_IMAGE_TYPE_MAX_PRODUCT_SPECIFIC)  /* deprecated */
-
-
-/* FLASH Layout Extended Image Data */
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check RegionsPerLayout at runtime.
- */
-#ifndef MPI2_FLASH_NUMBER_OF_REGIONS
-#define MPI2_FLASH_NUMBER_OF_REGIONS        (1)
-#endif
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check NumberOfLayouts at runtime.
- */
-#ifndef MPI2_FLASH_NUMBER_OF_LAYOUTS
-#define MPI2_FLASH_NUMBER_OF_LAYOUTS        (1)
-#endif
-
-typedef struct _MPI2_FLASH_REGION
-{
-    U8                      RegionType;                 /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U16                     Reserved2;                  /* 0x02 */
-    U32                     RegionOffset;               /* 0x04 */
-    U32                     RegionSize;                 /* 0x08 */
-    U32                     Reserved3;                  /* 0x0C */
-} MPI2_FLASH_REGION, MPI2_POINTER PTR_MPI2_FLASH_REGION,
-  Mpi2FlashRegion_t, MPI2_POINTER pMpi2FlashRegion_t;
-
-typedef struct _MPI2_FLASH_LAYOUT
-{
-    U32                     FlashSize;                  /* 0x00 */
-    U32                     Reserved1;                  /* 0x04 */
-    U32                     Reserved2;                  /* 0x08 */
-    U32                     Reserved3;                  /* 0x0C */
-    MPI2_FLASH_REGION       Region[MPI2_FLASH_NUMBER_OF_REGIONS];/* 0x10 */
-} MPI2_FLASH_LAYOUT, MPI2_POINTER PTR_MPI2_FLASH_LAYOUT,
-  Mpi2FlashLayout_t, MPI2_POINTER pMpi2FlashLayout_t;
-
-typedef struct _MPI2_FLASH_LAYOUT_DATA
-{
-    U8                      ImageRevision;              /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      SizeOfRegion;               /* 0x02 */
-    U8                      Reserved2;                  /* 0x03 */
-    U16                     NumberOfLayouts;            /* 0x04 */
-    U16                     RegionsPerLayout;           /* 0x06 */
-    U16                     MinimumSectorAlignment;     /* 0x08 */
-    U16                     Reserved3;                  /* 0x0A */
-    U32                     Reserved4;                  /* 0x0C */
-    MPI2_FLASH_LAYOUT       Layout[MPI2_FLASH_NUMBER_OF_LAYOUTS];/* 0x10 */
-} MPI2_FLASH_LAYOUT_DATA, MPI2_POINTER PTR_MPI2_FLASH_LAYOUT_DATA,
-  Mpi2FlashLayoutData_t, MPI2_POINTER pMpi2FlashLayoutData_t;
-
-/* defines for the RegionType field */
-#define MPI2_FLASH_REGION_UNUSED                (0x00)
-#define MPI2_FLASH_REGION_FIRMWARE              (0x01)
-#define MPI2_FLASH_REGION_BIOS                  (0x02)
-#define MPI2_FLASH_REGION_NVDATA                (0x03)
-#define MPI2_FLASH_REGION_FIRMWARE_BACKUP       (0x05)
-#define MPI2_FLASH_REGION_MFG_INFORMATION       (0x06)
-#define MPI2_FLASH_REGION_CONFIG_1              (0x07)
-#define MPI2_FLASH_REGION_CONFIG_2              (0x08)
-#define MPI2_FLASH_REGION_MEGARAID              (0x09)
-#define MPI2_FLASH_REGION_COMMON_BOOT_BLOCK     (0x0A)
-#define MPI2_FLASH_REGION_INIT                  (MPI2_FLASH_REGION_COMMON_BOOT_BLOCK) /* older name */
-#define MPI2_FLASH_REGION_CBB_BACKUP            (0x0D)
-#define MPI2_FLASH_REGION_SBR                   (0x0E)
-#define MPI2_FLASH_REGION_SBR_BACKUP            (0x0F)
-#define MPI2_FLASH_REGION_HIIM                  (0x10)
-#define MPI2_FLASH_REGION_HIIA                  (0x11)
-#define MPI2_FLASH_REGION_CTLR                  (0x12)
-#define MPI2_FLASH_REGION_IMR_FIRMWARE          (0x13)
-#define MPI2_FLASH_REGION_MR_NVDATA             (0x14)
-#define MPI2_FLASH_REGION_CPLD                  (0x15)
-#define MPI2_FLASH_REGION_PSOC                  (0x16)
-#define MPI2_FLASH_REGION_COREDUMP              (0x17)
-
-/* ImageRevision */
-#define MPI2_FLASH_LAYOUT_IMAGE_REVISION        (0x00)
-
-
-/* Supported Devices Extended Image Data */
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check NumberOfDevices at runtime.
- */
-#ifndef MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES
-#define MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES    (1)
-#endif
-
-typedef struct _MPI2_SUPPORTED_DEVICE
-{
-    U16                     DeviceID;                   /* 0x00 */
-    U16                     VendorID;                   /* 0x02 */
-    U16                     DeviceIDMask;               /* 0x04 */
-    U16                     Reserved1;                  /* 0x06 */
-    U8                      LowPCIRev;                  /* 0x08 */
-    U8                      HighPCIRev;                 /* 0x09 */
-    U16                     Reserved2;                  /* 0x0A */
-    U32                     Reserved3;                  /* 0x0C */
-} MPI2_SUPPORTED_DEVICE, MPI2_POINTER PTR_MPI2_SUPPORTED_DEVICE,
-  Mpi2SupportedDevice_t, MPI2_POINTER pMpi2SupportedDevice_t;
-
-typedef struct _MPI2_SUPPORTED_DEVICES_DATA
-{
-    U8                      ImageRevision;              /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      NumberOfDevices;            /* 0x02 */
-    U8                      Reserved2;                  /* 0x03 */
-    U32                     Reserved3;                  /* 0x04 */
-    MPI2_SUPPORTED_DEVICE   SupportedDevice[MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES]; /* 0x08 */
-} MPI2_SUPPORTED_DEVICES_DATA, MPI2_POINTER PTR_MPI2_SUPPORTED_DEVICES_DATA,
-  Mpi2SupportedDevicesData_t, MPI2_POINTER pMpi2SupportedDevicesData_t;
-
-/* ImageRevision */
-#define MPI2_SUPPORTED_DEVICES_IMAGE_REVISION   (0x00)
-
-
-/* Init Extended Image Data */
-
-typedef struct _MPI2_INIT_IMAGE_FOOTER
-
-{
-    U32                     BootFlags;                  /* 0x00 */
-    U32                     ImageSize;                  /* 0x04 */
-    U32                     Signature0;                 /* 0x08 */
-    U32                     Signature1;                 /* 0x0C */
-    U32                     Signature2;                 /* 0x10 */
-    U32                     ResetVector;                /* 0x14 */
-} MPI2_INIT_IMAGE_FOOTER, MPI2_POINTER PTR_MPI2_INIT_IMAGE_FOOTER,
-  Mpi2InitImageFooter_t, MPI2_POINTER pMpi2InitImageFooter_t;
-
-/* defines for the BootFlags field */
-#define MPI2_INIT_IMAGE_BOOTFLAGS_OFFSET        (0x00)
-
-/* defines for the ImageSize field */
-#define MPI2_INIT_IMAGE_IMAGESIZE_OFFSET        (0x04)
-
-/* defines for the Signature0 field */
-#define MPI2_INIT_IMAGE_SIGNATURE0_OFFSET       (0x08)
-#define MPI2_INIT_IMAGE_SIGNATURE0              (0x5AA55AEA)
-
-/* defines for the Signature1 field */
-#define MPI2_INIT_IMAGE_SIGNATURE1_OFFSET       (0x0C)
-#define MPI2_INIT_IMAGE_SIGNATURE1              (0xA55AEAA5)
-
-/* defines for the Signature2 field */
-#define MPI2_INIT_IMAGE_SIGNATURE2_OFFSET       (0x10)
-#define MPI2_INIT_IMAGE_SIGNATURE2              (0x5AEAA55A)
-
-/* Signature fields as individual bytes */
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_0        (0xEA)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_1        (0x5A)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_2        (0xA5)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_3        (0x5A)
-
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_4        (0xA5)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_5        (0xEA)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_6        (0x5A)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_7        (0xA5)
-
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_8        (0x5A)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_9        (0xA5)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_A        (0xEA)
-#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_B        (0x5A)
-
-/* defines for the ResetVector field */
-#define MPI2_INIT_IMAGE_RESETVECTOR_OFFSET      (0x14)
-
-
-/* Encrypted Hash Extended Image Data */
-
-typedef struct _MPI25_ENCRYPTED_HASH_ENTRY
-{
-    U8                  HashImageType;          /* 0x00 */
-    U8                  HashAlgorithm;          /* 0x01 */
-    U8                  EncryptionAlgorithm;    /* 0x02 */
-    U8                  Reserved1;              /* 0x03 */
-    U32                 Reserved2;              /* 0x04 */
-    U32                 EncryptedHash[1];       /* 0x08 */ /* variable length */
-} MPI25_ENCRYPTED_HASH_ENTRY, MPI2_POINTER PTR_MPI25_ENCRYPTED_HASH_ENTRY,
-  Mpi25EncryptedHashEntry_t, MPI2_POINTER pMpi25EncryptedHashEntry_t;
-
-/* values for HashImageType */
-#define MPI25_HASH_IMAGE_TYPE_UNUSED            (0x00)
-#define MPI25_HASH_IMAGE_TYPE_FIRMWARE          (0x01)
-#define MPI25_HASH_IMAGE_TYPE_BIOS              (0x02)
-
-#define MPI26_HASH_IMAGE_TYPE_UNUSED            (0x00)
-#define MPI26_HASH_IMAGE_TYPE_FIRMWARE          (0x01)
-#define MPI26_HASH_IMAGE_TYPE_BIOS              (0x02)
-#define MPI26_HASH_IMAGE_TYPE_KEY_HASH          (0x03)
-
-/* values for HashAlgorithm */
-#define MPI25_HASH_ALGORITHM_UNUSED             (0x00)
-#define MPI25_HASH_ALGORITHM_SHA256             (0x01)
-
-#define MPI26_HASH_ALGORITHM_VER_MASK           (0xE0)
-#define MPI26_HASH_ALGORITHM_VER_NONE           (0x00)
-#define MPI26_HASH_ALGORITHM_VER_SHA1           (0x20)
-#define MPI26_HASH_ALGORITHM_VER_SHA2           (0x40)
-#define MPI26_HASH_ALGORITHM_VER_SHA3           (0x60)
-#define MPI26_HASH_ALGORITHM_SIZE_MASK          (0x1F)
-#define MPI26_HASH_ALGORITHM_SIZE_256           (0x01)
-#define MPI26_HASH_ALGORITHM_SIZE_512           (0x02)
-
-
-/* values for EncryptionAlgorithm */
-#define MPI25_ENCRYPTION_ALG_UNUSED             (0x00)
-#define MPI25_ENCRYPTION_ALG_RSA256             (0x01)
-
-#define MPI26_ENCRYPTION_ALG_UNUSED             (0x00)
-#define MPI26_ENCRYPTION_ALG_RSA256             (0x01)
-#define MPI26_ENCRYPTION_ALG_RSA512             (0x02)
-#define MPI26_ENCRYPTION_ALG_RSA1024            (0x03)
-#define MPI26_ENCRYPTION_ALG_RSA2048            (0x04)
-#define MPI26_ENCRYPTION_ALG_RSA4096            (0x05)
-
-typedef struct _MPI25_ENCRYPTED_HASH_DATA
-{
-    U8                              ImageVersion;           /* 0x00 */
-    U8                              NumHash;                /* 0x01 */
-    U16                             Reserved1;              /* 0x02 */
-    U32                             Reserved2;              /* 0x04 */
-    MPI25_ENCRYPTED_HASH_ENTRY      EncryptedHashEntry[1];  /* 0x08 */ /* variable number of entries */
-} MPI25_ENCRYPTED_HASH_DATA, MPI2_POINTER PTR_MPI25_ENCRYPTED_HASH_DATA,
-  Mpi25EncryptedHashData_t, MPI2_POINTER pMpi25EncryptedHashData_t;
-
-
-#endif /* MPI2_IMAGE_H */
-
+/*
+ *  Copyright 2016-2020 Broadcom Limited. All rights reserved.
+ *
+ *           Name: mpi2_image.h
+ *    Description: Contains definitions for firmware and other component images.
+ *  Creation Date: 04/02/2018
+ *        Version: 02.06.05
+ *
+ *
+ *  Version History
+ *  ---------------
+ *
+ *  Date      Version   Description
+ *  --------  --------  ------------------------------------------------------
+ *  08-01-18  02.06.00  Initial version for MPI 2.6.5.
+ *  08-14-18  02.06.01  Corrected define for MPI26_IMAGE_HEADER_SIGNATURE0_MPI26
+ *  08-28-18  02.06.02  Added MPI2_EXT_IMAGE_TYPE_RDE
+ *  09-07-18  02.06.03  Added MPI26_EVENT_PCIE_TOPO_PI_16_LANES
+ *  12-17-18  02.06.04  Added MPI2_EXT_IMAGE_TYPE_PBLP
+ *                      Shorten some defines to be compatible with DOS
+ *  06-24-19  02.06.05  Whitespace adjustments to help with identifier checking tool.
+ *  10-02-19  02.06.06  Added MPI26_IMAGE_HEADER_SIG1_COREDUMP
+ *                      Added MPI2_FLASH_REGION_COREDUMP
+ */
+#ifndef MPI2_IMAGE_H
+#define MPI2_IMAGE_H
+
+
+/* FW Image Header */
+typedef struct _MPI2_FW_IMAGE_HEADER
+{
+    U32                     Signature;                  /* 0x00 */
+    U32                     Signature0;                 /* 0x04 */
+    U32                     Signature1;                 /* 0x08 */
+    U32                     Signature2;                 /* 0x0C */
+    MPI2_VERSION_UNION      MPIVersion;                 /* 0x10 */
+    MPI2_VERSION_UNION      FWVersion;                  /* 0x14 */
+    MPI2_VERSION_UNION      NVDATAVersion;              /* 0x18 */
+    MPI2_VERSION_UNION      PackageVersion;             /* 0x1C */
+    U16                     VendorID;                   /* 0x20 */
+    U16                     ProductID;                  /* 0x22 */
+    U16                     ProtocolFlags;              /* 0x24 */
+    U16                     Reserved26;                 /* 0x26 */
+    U32                     IOCCapabilities;            /* 0x28 */
+    U32                     ImageSize;                  /* 0x2C */
+    U32                     NextImageHeaderOffset;      /* 0x30 */
+    U32                     Checksum;                   /* 0x34 */
+    U32                     Reserved38;                 /* 0x38 */
+    U32                     Reserved3C;                 /* 0x3C */
+    U32                     Reserved40;                 /* 0x40 */
+    U32                     Reserved44;                 /* 0x44 */
+    U32                     Reserved48;                 /* 0x48 */
+    U32                     Reserved4C;                 /* 0x4C */
+    U32                     Reserved50;                 /* 0x50 */
+    U32                     Reserved54;                 /* 0x54 */
+    U32                     Reserved58;                 /* 0x58 */
+    U32                     Reserved5C;                 /* 0x5C */
+    U32                     BootFlags;                 /* 0x60 */ /* reserved in  MPI v2.5 and earlier */
+    U32                     FirmwareVersionNameWhat;    /* 0x64 */
+    U8                      FirmwareVersionName[32];    /* 0x68 */
+    U32                     VendorNameWhat;             /* 0x88 */
+    U8                      VendorName[32];             /* 0x8C */
+    U32                     PackageNameWhat;            /* 0x88 */
+    U8                      PackageName[32];            /* 0x8C */
+    U32                     ReservedD0;                 /* 0xD0 */
+    U32                     ReservedD4;                 /* 0xD4 */
+    U32                     ReservedD8;                 /* 0xD8 */
+    U32                     ReservedDC;                 /* 0xDC */
+    U32                     ReservedE0;                 /* 0xE0 */
+    U32                     ReservedE4;                 /* 0xE4 */
+    U32                     ReservedE8;                 /* 0xE8 */
+    U32                     ReservedEC;                 /* 0xEC */
+    U32                     ReservedF0;                 /* 0xF0 */
+    U32                     ReservedF4;                 /* 0xF4 */
+    U32                     ReservedF8;                 /* 0xF8 */
+    U32                     ReservedFC;                 /* 0xFC */
+} MPI2_FW_IMAGE_HEADER, MPI2_POINTER PTR_MPI2_FW_IMAGE_HEADER,
+  Mpi2FWImageHeader_t, MPI2_POINTER pMpi2FWImageHeader_t;
+
+/* Signature field */
+#define MPI2_FW_HEADER_SIGNATURE_OFFSET         (0x00)
+#define MPI2_FW_HEADER_SIGNATURE_MASK           (0xFF000000)
+#define MPI2_FW_HEADER_SIGNATURE                (0xEA000000)
+#define MPI26_FW_HEADER_SIGNATURE               (0xEB000000)
+
+/* Signature0 field */
+#define MPI2_FW_HEADER_SIGNATURE0_OFFSET        (0x04)
+#define MPI2_FW_HEADER_SIGNATURE0               (0x5AFAA55A)
+#define MPI26_FW_HEADER_SIGNATURE0_BASE         (0x5AEAA500)    /* Last byte is defined by architecture */
+#define MPI26_FW_HEADER_SIGNATURE0_ARC_0        (0x5A)
+#define MPI26_FW_HEADER_SIGNATURE0_ARC_1        (0x00)
+#define MPI26_FW_HEADER_SIGNATURE0_ARC_2        (0x01)
+#define MPI26_FW_HEADER_SIGNATURE0_ARC_3        (0x02)
+#define MPI26_FW_HEADER_SIGNATURE0              (MPI26_FW_HEADER_SIGNATURE0_BASE + MPI26_FW_HEADER_SIGNATURE0_ARC_0) // legacy (0x5AEAA55A)
+#define MPI26_FW_HEADER_SIGNATURE0_3516         (MPI26_FW_HEADER_SIGNATURE0_BASE + MPI26_FW_HEADER_SIGNATURE0_ARC_1)
+#define MPI26_FW_HEADER_SIGNATURE0_4008         (MPI26_FW_HEADER_SIGNATURE0_BASE + MPI26_FW_HEADER_SIGNATURE0_ARC_3)
+
+/* Signature1 field */
+#define MPI2_FW_HEADER_SIGNATURE1_OFFSET        (0x08)
+#define MPI2_FW_HEADER_SIGNATURE1               (0xA55AFAA5)
+#define MPI26_FW_HEADER_SIGNATURE1              (0xA55AEAA5)
+
+/* Signature2 field */
+#define MPI2_FW_HEADER_SIGNATURE2_OFFSET        (0x0C)
+#define MPI2_FW_HEADER_SIGNATURE2               (0x5AA55AFA)
+#define MPI26_FW_HEADER_SIGNATURE2              (0x5AA55AEA)
+
+
+/* defines for using the ProductID field */
+#define MPI2_FW_HEADER_PID_TYPE_MASK            (0xF000)
+#define MPI2_FW_HEADER_PID_TYPE_SAS             (0x2000)
+
+#define MPI2_FW_HEADER_PID_PROD_MASK                    (0x0F00)
+#define MPI2_FW_HEADER_PID_PROD_A                       (0x0000)
+#define MPI2_FW_HEADER_PID_PROD_TARGET_INITIATOR_SCSI   (0x0200)
+#define MPI2_FW_HEADER_PID_PROD_IR_SCSI                 (0x0700)
+
+
+#define MPI2_FW_HEADER_PID_FAMILY_MASK          (0x00FF)
+/* SAS ProductID Family bits */
+#define MPI2_FW_HEADER_PID_FAMILY_2108_SAS      (0x0013)
+#define MPI2_FW_HEADER_PID_FAMILY_2208_SAS      (0x0014)
+#define MPI25_FW_HEADER_PID_FAMILY_3108_SAS     (0x0021)
+#define MPI26_FW_HEADER_PID_FAMILY_3324_SAS     (0x0028)
+#define MPI26_FW_HEADER_PID_FAMILY_3516_SAS     (0x0031)
+
+/* use MPI2_IOCFACTS_PROTOCOL_ defines for ProtocolFlags field */
+
+/* use MPI2_IOCFACTS_CAPABILITY_ defines for IOCCapabilities field */
+
+#define MPI2_FW_HEADER_IMAGESIZE_OFFSET         (0x2C)
+#define MPI2_FW_HEADER_NEXTIMAGE_OFFSET         (0x30)
+
+#define MPI26_FW_HEADER_BOOTFLAGS_OFFSET          (0x60)
+#define MPI2_FW_HEADER_BOOTFLAGS_ISSI32M_FLAG     (0x00000001)
+#define MPI2_FW_HEADER_BOOTFLAGS_W25Q256JW_FLAG   (0x00000002)
+#define MPI2_FW_HEADER_BOOTFLAGS_AUTO_SPI_FLAG    (0x00000004)  /* This image has a auto-discovery version of SPI */
+
+#define MPI2_FW_HEADER_VERNMHWAT_OFFSET         (0x64)
+
+#define MPI2_FW_HEADER_WHAT_SIGNATURE           (0x29232840)
+
+#define MPI2_FW_HEADER_SIZE                     (0x100)
+
+
+/****************************************************************************
+ *              Component Image Format and related defines                  *
+ ****************************************************************************/
+
+/* Maximum number of Hash Exclusion entries in a Component Image Header */
+#define MPI26_COMP_IMG_HDR_NUM_HASH_EXCL        (4)
+
+/* Hash Exclusion Format */
+typedef struct _MPI26_HASH_EXCLUSION_FORMAT
+{
+    U32                     Offset;        /* 0x00 */
+    U32                     Size;          /* 0x04 */
+} MPI26_HASH_EXCLUSION_FORMAT, MPI2_POINTER PTR_MPI26_HASH_EXCLUSION_FORMAT,
+Mpi26HashSxclusionFormat_t,    MPI2_POINTER pMpi26HashExclusionFormat_t;
+
+/* FW Image Header */
+typedef struct _MPI26_COMPONENT_IMAGE_HEADER
+{
+    U32                            Signature0;                                             /* 0x00 */
+    U32                            LoadAddress;                                            /* 0x04 */
+    U32                            DataSize;                                               /* 0x08 */
+    U32                            StartAddress;                                           /* 0x0C */
+    U32                            Signature1;                                             /* 0x10 */
+    U32                            FlashOffset;                                            /* 0x14 */
+    U32                            FlashSize;                                              /* 0x18 */
+    U32                            VersionStringOffset;                                    /* 0x1C */
+    U32                            BuildDateStringOffset;                                  /* 0x20 */
+    U32                            BuildTimeStringOffset;                                  /* 0x24 */
+    U32                            EnvironmentVariableOffset;                              /* 0x28 */
+    U32                            ApplicationSpecific;                                    /* 0x2C */
+    U32                            Signature2;                                             /* 0x30 */
+    U32                            HeaderSize;                                             /* 0x34 */
+    U32                            Crc;                                                    /* 0x38 */
+    U8                             NotFlashImage;                                          /* 0x3C */
+    U8                             Compressed;                                             /* 0x3D */
+    U16                            Reserved3E;                                             /* 0x3E */
+    U32                            SecondaryFlashOffset;                                   /* 0x40 */
+    U32                            Reserved44;                                             /* 0x44 */
+    U32                            Reserved48;                                             /* 0x48 */
+    MPI2_VERSION_UNION             RMCInterfaceVersion;                                    /* 0x4C */
+    MPI2_VERSION_UNION             Reserved50;                                             /* 0x50 */
+    MPI2_VERSION_UNION             FWVersion;                                              /* 0x54 */
+    MPI2_VERSION_UNION             NvdataVersion;                                          /* 0x58 */
+    MPI26_HASH_EXCLUSION_FORMAT    HashExclusion[MPI26_COMP_IMG_HDR_NUM_HASH_EXCL];        /* 0x5C */
+    U32                            NextImageHeaderOffset;                                  /* 0x7C */
+    U32                            Reserved80[32];                                         /* 0x80 -- 0xFC */
+} MPI26_COMPONENT_IMAGE_HEADER, MPI2_POINTER PTR_MPI26_COMPONENT_IMAGE_HEADER,
+  Mpi26ComponentImageHeader_t,  MPI2_POINTER pMpi26ComponentImageHeader_t;
+
+
+/**** Definitions for Signature0 field ****/
+#define MPI26_IMAGE_HEADER_SIGNATURE0_MPI26                     (0xEB000042)
+
+/**** Definitions for Signature1 field ****/
+#define MPI26_IMAGE_HEADER_SIG1_APPLICATION                    (0x20505041)
+#define MPI26_IMAGE_HEADER_SIG1_CBB                            (0x20424243)
+#define MPI26_IMAGE_HEADER_SIG1_MFG                            (0x2047464D)
+#define MPI26_IMAGE_HEADER_SIG1_BIOS                           (0x534F4942)
+#define MPI26_IMAGE_HEADER_SIG1_HIIM                           (0x4D494948)
+#define MPI26_IMAGE_HEADER_SIG1_HIIA                           (0x41494948)
+#define MPI26_IMAGE_HEADER_SIG1_CPLD                           (0x444C5043)
+#define MPI26_IMAGE_HEADER_SIG1_SPD                            (0x20445053)
+#define MPI26_IMAGE_HEADER_SIG1_NVDATA                         (0x5444564E)
+#define MPI26_IMAGE_HEADER_SIG1_GAS_GAUGE                      (0x20534147)
+#define MPI26_IMAGE_HEADER_SIG1_PBLP                           (0x504C4250)
+#define MPI26_IMAGE_HEADER_SIG1_COREDUMP                       (0x504D5544)     /* little-endian "DUMP" */
+
+/**** Definitions for Signature2 field ****/
+#define MPI26_IMAGE_HEADER_SIGNATURE2_VALUE                    (0x50584546)
+
+/**** Offsets for Image Header Fields ****/
+#define MPI26_IMAGE_HEADER_SIGNATURE0_OFFSET                   (0x00)
+#define MPI26_IMAGE_HEADER_LOAD_ADDRESS_OFFSET                 (0x04)
+#define MPI26_IMAGE_HEADER_DATA_SIZE_OFFSET                    (0x08)
+#define MPI26_IMAGE_HEADER_START_ADDRESS_OFFSET                (0x0C)
+#define MPI26_IMAGE_HEADER_SIGNATURE1_OFFSET                   (0x10)
+#define MPI26_IMAGE_HEADER_FLASH_OFFSET_OFFSET                 (0x14)
+#define MPI26_IMAGE_HEADER_FLASH_SIZE_OFFSET                   (0x18)
+#define MPI26_IMAGE_HEADER_VERSION_STRING_OFFSET_OFFSET        (0x1C)
+#define MPI26_IMAGE_HEADER_BUILD_DATE_STRING_OFFSET_OFFSET     (0x20)
+#define MPI26_IMAGE_HEADER_BUILD_TIME_OFFSET_OFFSET            (0x24)
+#define MPI26_IMAGE_HEADER_ENVIROMENT_VAR_OFFSET_OFFSET        (0x28)
+#define MPI26_IMAGE_HEADER_APPLICATION_SPECIFIC_OFFSET         (0x2C)
+#define MPI26_IMAGE_HEADER_SIGNATURE2_OFFSET                   (0x30)
+#define MPI26_IMAGE_HEADER_HEADER_SIZE_OFFSET                  (0x34)
+#define MPI26_IMAGE_HEADER_CRC_OFFSET                          (0x38)
+#define MPI26_IMAGE_HEADER_NOT_FLASH_IMAGE_OFFSET              (0x3C)
+#define MPI26_IMAGE_HEADER_COMPRESSED_OFFSET                   (0x3D)
+#define MPI26_IMAGE_HEADER_SECONDARY_FLASH_OFFSET_OFFSET       (0x40)
+#define MPI26_IMAGE_HEADER_RMC_INTERFACE_VER_OFFSET            (0x4C)
+#define MPI26_IMAGE_HEADER_COMPONENT_IMAGE_VER_OFFSET          (0x54)
+#define MPI26_IMAGE_HEADER_HASH_EXCLUSION_OFFSET               (0x5C)
+#define MPI26_IMAGE_HEADER_NEXT_IMAGE_HEADER_OFFSET_OFFSET     (0x7C)
+
+
+#define MPI26_IMAGE_HEADER_SIZE                                (0x100)
+
+
+/* Extended Image Header */
+typedef struct _MPI2_EXT_IMAGE_HEADER
+
+{
+    U8                      ImageType;                  /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U16                     Reserved2;                  /* 0x02 */
+    U32                     Checksum;                   /* 0x04 */
+    U32                     ImageSize;                  /* 0x08 */
+    U32                     NextImageHeaderOffset;      /* 0x0C */
+    U32                     PackageVersion;             /* 0x10 */
+    U32                     Reserved3;                  /* 0x14 */
+    U32                     Reserved4;                  /* 0x18 */
+    U32                     Reserved5;                  /* 0x1C */
+    U8                      IdentifyString[32];         /* 0x20 */
+} MPI2_EXT_IMAGE_HEADER, MPI2_POINTER PTR_MPI2_EXT_IMAGE_HEADER,
+  Mpi2ExtImageHeader_t, MPI2_POINTER pMpi2ExtImageHeader_t;
+
+/* useful offsets */
+#define MPI2_EXT_IMAGE_IMAGETYPE_OFFSET         (0x00)
+#define MPI2_EXT_IMAGE_IMAGESIZE_OFFSET         (0x08)
+#define MPI2_EXT_IMAGE_NEXTIMAGE_OFFSET         (0x0C)
+#define MPI2_EXT_IMAGE_PACKAGEVERSION_OFFSET   (0x10)
+
+#define MPI2_EXT_IMAGE_HEADER_SIZE              (0x40)
+
+/* defines for the ImageType field */
+#define MPI2_EXT_IMAGE_TYPE_UNSPECIFIED             (0x00)
+#define MPI2_EXT_IMAGE_TYPE_FW                      (0x01)
+#define MPI2_EXT_IMAGE_TYPE_NVDATA                  (0x03)
+#define MPI2_EXT_IMAGE_TYPE_BOOTLOADER              (0x04)
+#define MPI2_EXT_IMAGE_TYPE_INITIALIZATION          (0x05)
+#define MPI2_EXT_IMAGE_TYPE_FLASH_LAYOUT            (0x06)
+#define MPI2_EXT_IMAGE_TYPE_SUPPORTED_DEVICES       (0x07)
+#define MPI2_EXT_IMAGE_TYPE_MEGARAID                (0x08)
+#define MPI2_EXT_IMAGE_TYPE_ENCRYPTED_HASH          (0x09) /* MPI v2.5 and newer */
+#define MPI2_EXT_IMAGE_TYPE_RDE                     (0x0A)
+#define MPI2_EXT_IMAGE_TYPE_PBLP                    (0x0B)
+#define MPI2_EXT_IMAGE_TYPE_MIN_PRODUCT_SPECIFIC    (0x80)
+#define MPI2_EXT_IMAGE_TYPE_MAX_PRODUCT_SPECIFIC    (0xFF)
+
+#define MPI2_EXT_IMAGE_TYPE_MAX (MPI2_EXT_IMAGE_TYPE_MAX_PRODUCT_SPECIFIC)  /* deprecated */
+
+
+/* FLASH Layout Extended Image Data */
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check RegionsPerLayout at runtime.
+ */
+#ifndef MPI2_FLASH_NUMBER_OF_REGIONS
+#define MPI2_FLASH_NUMBER_OF_REGIONS        (1)
+#endif
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check NumberOfLayouts at runtime.
+ */
+#ifndef MPI2_FLASH_NUMBER_OF_LAYOUTS
+#define MPI2_FLASH_NUMBER_OF_LAYOUTS        (1)
+#endif
+
+typedef struct _MPI2_FLASH_REGION
+{
+    U8                      RegionType;                 /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U16                     Reserved2;                  /* 0x02 */
+    U32                     RegionOffset;               /* 0x04 */
+    U32                     RegionSize;                 /* 0x08 */
+    U32                     Reserved3;                  /* 0x0C */
+} MPI2_FLASH_REGION, MPI2_POINTER PTR_MPI2_FLASH_REGION,
+  Mpi2FlashRegion_t, MPI2_POINTER pMpi2FlashRegion_t;
+
+typedef struct _MPI2_FLASH_LAYOUT
+{
+    U32                     FlashSize;                  /* 0x00 */
+    U32                     Reserved1;                  /* 0x04 */
+    U32                     Reserved2;                  /* 0x08 */
+    U32                     Reserved3;                  /* 0x0C */
+    MPI2_FLASH_REGION       Region[MPI2_FLASH_NUMBER_OF_REGIONS];/* 0x10 */
+} MPI2_FLASH_LAYOUT, MPI2_POINTER PTR_MPI2_FLASH_LAYOUT,
+  Mpi2FlashLayout_t, MPI2_POINTER pMpi2FlashLayout_t;
+
+typedef struct _MPI2_FLASH_LAYOUT_DATA
+{
+    U8                      ImageRevision;              /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      SizeOfRegion;               /* 0x02 */
+    U8                      Reserved2;                  /* 0x03 */
+    U16                     NumberOfLayouts;            /* 0x04 */
+    U16                     RegionsPerLayout;           /* 0x06 */
+    U16                     MinimumSectorAlignment;     /* 0x08 */
+    U16                     Reserved3;                  /* 0x0A */
+    U32                     Reserved4;                  /* 0x0C */
+    MPI2_FLASH_LAYOUT       Layout[MPI2_FLASH_NUMBER_OF_LAYOUTS];/* 0x10 */
+} MPI2_FLASH_LAYOUT_DATA, MPI2_POINTER PTR_MPI2_FLASH_LAYOUT_DATA,
+  Mpi2FlashLayoutData_t, MPI2_POINTER pMpi2FlashLayoutData_t;
+
+/* defines for the RegionType field */
+#define MPI2_FLASH_REGION_UNUSED                (0x00)
+#define MPI2_FLASH_REGION_FIRMWARE              (0x01)
+#define MPI2_FLASH_REGION_BIOS                  (0x02)
+#define MPI2_FLASH_REGION_NVDATA                (0x03)
+#define MPI2_FLASH_REGION_FIRMWARE_BACKUP       (0x05)
+#define MPI2_FLASH_REGION_MFG_INFORMATION       (0x06)
+#define MPI2_FLASH_REGION_CONFIG_1              (0x07)
+#define MPI2_FLASH_REGION_CONFIG_2              (0x08)
+#define MPI2_FLASH_REGION_MEGARAID              (0x09)
+#define MPI2_FLASH_REGION_COMMON_BOOT_BLOCK     (0x0A)
+#define MPI2_FLASH_REGION_INIT                  (MPI2_FLASH_REGION_COMMON_BOOT_BLOCK) /* older name */
+#define MPI2_FLASH_REGION_CBB_BACKUP            (0x0D)
+#define MPI2_FLASH_REGION_SBR                   (0x0E)
+#define MPI2_FLASH_REGION_SBR_BACKUP            (0x0F)
+#define MPI2_FLASH_REGION_HIIM                  (0x10)
+#define MPI2_FLASH_REGION_HIIA                  (0x11)
+#define MPI2_FLASH_REGION_CTLR                  (0x12)
+#define MPI2_FLASH_REGION_IMR_FIRMWARE          (0x13)
+#define MPI2_FLASH_REGION_MR_NVDATA             (0x14)
+#define MPI2_FLASH_REGION_CPLD                  (0x15)
+#define MPI2_FLASH_REGION_PSOC                  (0x16)
+#define MPI2_FLASH_REGION_COREDUMP              (0x17)
+
+/* ImageRevision */
+#define MPI2_FLASH_LAYOUT_IMAGE_REVISION        (0x00)
+
+
+/* Supported Devices Extended Image Data */
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check NumberOfDevices at runtime.
+ */
+#ifndef MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES
+#define MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES    (1)
+#endif
+
+typedef struct _MPI2_SUPPORTED_DEVICE
+{
+    U16                     DeviceID;                   /* 0x00 */
+    U16                     VendorID;                   /* 0x02 */
+    U16                     DeviceIDMask;               /* 0x04 */
+    U16                     Reserved1;                  /* 0x06 */
+    U8                      LowPCIRev;                  /* 0x08 */
+    U8                      HighPCIRev;                 /* 0x09 */
+    U16                     Reserved2;                  /* 0x0A */
+    U32                     Reserved3;                  /* 0x0C */
+} MPI2_SUPPORTED_DEVICE, MPI2_POINTER PTR_MPI2_SUPPORTED_DEVICE,
+  Mpi2SupportedDevice_t, MPI2_POINTER pMpi2SupportedDevice_t;
+
+typedef struct _MPI2_SUPPORTED_DEVICES_DATA
+{
+    U8                      ImageRevision;              /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      NumberOfDevices;            /* 0x02 */
+    U8                      Reserved2;                  /* 0x03 */
+    U32                     Reserved3;                  /* 0x04 */
+    MPI2_SUPPORTED_DEVICE   SupportedDevice[MPI2_SUPPORTED_DEVICES_IMAGE_NUM_DEVICES]; /* 0x08 */
+} MPI2_SUPPORTED_DEVICES_DATA, MPI2_POINTER PTR_MPI2_SUPPORTED_DEVICES_DATA,
+  Mpi2SupportedDevicesData_t, MPI2_POINTER pMpi2SupportedDevicesData_t;
+
+/* ImageRevision */
+#define MPI2_SUPPORTED_DEVICES_IMAGE_REVISION   (0x00)
+
+
+/* Init Extended Image Data */
+
+typedef struct _MPI2_INIT_IMAGE_FOOTER
+
+{
+    U32                     BootFlags;                  /* 0x00 */
+    U32                     ImageSize;                  /* 0x04 */
+    U32                     Signature0;                 /* 0x08 */
+    U32                     Signature1;                 /* 0x0C */
+    U32                     Signature2;                 /* 0x10 */
+    U32                     ResetVector;                /* 0x14 */
+} MPI2_INIT_IMAGE_FOOTER, MPI2_POINTER PTR_MPI2_INIT_IMAGE_FOOTER,
+  Mpi2InitImageFooter_t, MPI2_POINTER pMpi2InitImageFooter_t;
+
+/* defines for the BootFlags field */
+#define MPI2_INIT_IMAGE_BOOTFLAGS_OFFSET        (0x00)
+
+/* defines for the ImageSize field */
+#define MPI2_INIT_IMAGE_IMAGESIZE_OFFSET        (0x04)
+
+/* defines for the Signature0 field */
+#define MPI2_INIT_IMAGE_SIGNATURE0_OFFSET       (0x08)
+#define MPI2_INIT_IMAGE_SIGNATURE0              (0x5AA55AEA)
+
+/* defines for the Signature1 field */
+#define MPI2_INIT_IMAGE_SIGNATURE1_OFFSET       (0x0C)
+#define MPI2_INIT_IMAGE_SIGNATURE1              (0xA55AEAA5)
+
+/* defines for the Signature2 field */
+#define MPI2_INIT_IMAGE_SIGNATURE2_OFFSET       (0x10)
+#define MPI2_INIT_IMAGE_SIGNATURE2              (0x5AEAA55A)
+
+/* Signature fields as individual bytes */
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_0        (0xEA)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_1        (0x5A)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_2        (0xA5)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_3        (0x5A)
+
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_4        (0xA5)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_5        (0xEA)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_6        (0x5A)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_7        (0xA5)
+
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_8        (0x5A)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_9        (0xA5)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_A        (0xEA)
+#define MPI2_INIT_IMAGE_SIGNATURE_BYTE_B        (0x5A)
+
+/* defines for the ResetVector field */
+#define MPI2_INIT_IMAGE_RESETVECTOR_OFFSET      (0x14)
+
+
+/* Encrypted Hash Extended Image Data */
+
+typedef struct _MPI25_ENCRYPTED_HASH_ENTRY
+{
+    U8                  HashImageType;          /* 0x00 */
+    U8                  HashAlgorithm;          /* 0x01 */
+    U8                  EncryptionAlgorithm;    /* 0x02 */
+    U8                  Reserved1;              /* 0x03 */
+    U32                 Reserved2;              /* 0x04 */
+    U32                 EncryptedHash[1];       /* 0x08 */ /* variable length */
+} MPI25_ENCRYPTED_HASH_ENTRY, MPI2_POINTER PTR_MPI25_ENCRYPTED_HASH_ENTRY,
+  Mpi25EncryptedHashEntry_t, MPI2_POINTER pMpi25EncryptedHashEntry_t;
+
+/* values for HashImageType */
+#define MPI25_HASH_IMAGE_TYPE_UNUSED            (0x00)
+#define MPI25_HASH_IMAGE_TYPE_FIRMWARE          (0x01)
+#define MPI25_HASH_IMAGE_TYPE_BIOS              (0x02)
+
+#define MPI26_HASH_IMAGE_TYPE_UNUSED            (0x00)
+#define MPI26_HASH_IMAGE_TYPE_FIRMWARE          (0x01)
+#define MPI26_HASH_IMAGE_TYPE_BIOS              (0x02)
+#define MPI26_HASH_IMAGE_TYPE_KEY_HASH          (0x03)
+
+/* values for HashAlgorithm */
+#define MPI25_HASH_ALGORITHM_UNUSED             (0x00)
+#define MPI25_HASH_ALGORITHM_SHA256             (0x01)
+
+#define MPI26_HASH_ALGORITHM_VER_MASK           (0xE0)
+#define MPI26_HASH_ALGORITHM_VER_NONE           (0x00)
+#define MPI26_HASH_ALGORITHM_VER_SHA1           (0x20)
+#define MPI26_HASH_ALGORITHM_VER_SHA2           (0x40)
+#define MPI26_HASH_ALGORITHM_VER_SHA3           (0x60)
+#define MPI26_HASH_ALGORITHM_SIZE_MASK          (0x1F)
+#define MPI26_HASH_ALGORITHM_SIZE_256           (0x01)
+#define MPI26_HASH_ALGORITHM_SIZE_512           (0x02)
+
+
+/* values for EncryptionAlgorithm */
+#define MPI25_ENCRYPTION_ALG_UNUSED             (0x00)
+#define MPI25_ENCRYPTION_ALG_RSA256             (0x01)
+
+#define MPI26_ENCRYPTION_ALG_UNUSED             (0x00)
+#define MPI26_ENCRYPTION_ALG_RSA256             (0x01)
+#define MPI26_ENCRYPTION_ALG_RSA512             (0x02)
+#define MPI26_ENCRYPTION_ALG_RSA1024            (0x03)
+#define MPI26_ENCRYPTION_ALG_RSA2048            (0x04)
+#define MPI26_ENCRYPTION_ALG_RSA4096            (0x05)
+
+typedef struct _MPI25_ENCRYPTED_HASH_DATA
+{
+    U8                              ImageVersion;           /* 0x00 */
+    U8                              NumHash;                /* 0x01 */
+    U16                             Reserved1;              /* 0x02 */
+    U32                             Reserved2;              /* 0x04 */
+    MPI25_ENCRYPTED_HASH_ENTRY      EncryptedHashEntry[1];  /* 0x08 */ /* variable number of entries */
+} MPI25_ENCRYPTED_HASH_DATA, MPI2_POINTER PTR_MPI25_ENCRYPTED_HASH_DATA,
+  Mpi25EncryptedHashData_t, MPI2_POINTER pMpi25EncryptedHashData_t;
+
+
+#endif /* MPI2_IMAGE_H */
+
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_init.h b/drivers/scsi/mpt3sas/mpi/mpi2_init.h
old mode 100755
new mode 100644
index 6f6e7e7cfc233fb7ec5cda49eb8678e557fdcd18..63a0f4968f0c98ef9718ee5999e7a017572d3360
--- a/drivers/scsi/mpt3sas/mpi/mpi2_init.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_init.h
@@ -1,603 +1,603 @@
-/*
- *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
- *
- *
- *           Name:  mpi2_init.h
- *          Title:  MPI SCSI initiator mode messages and structures
- *  Creation Date:  June 23, 2006
- *
- *  mpi2_init.h Version:  02.00.21
- *
- *  NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
- *        prefix are for use only on MPI v2.5 products, and must not be used
- *        with MPI v2.0 products. Unless otherwise noted, names beginning with
- *        MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products.
- *
- *  Version History
- *  ---------------
- *
- *  Date      Version   Description
- *  --------  --------  ------------------------------------------------------
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  10-31-07  02.00.01  Fixed name for pMpi2SCSITaskManagementRequest_t.
- *  12-18-07  02.00.02  Modified Task Management Target Reset Method defines.
- *  02-29-08  02.00.03  Added Query Task Set and Query Unit Attention.
- *  03-03-08  02.00.04  Fixed name of struct _MPI2_SCSI_TASK_MANAGE_REPLY.
- *  05-21-08  02.00.05  Fixed typo in name of Mpi2SepRequest_t.
- *  10-02-08  02.00.06  Removed Untagged and No Disconnect values from SCSI IO
- *                      Control field Task Attribute flags.
- *                      Moved LUN field defines to mpi2.h becasue they are
- *                      common to many structures.
- *  05-06-09  02.00.07  Changed task management type of Query Unit Attention to
- *                      Query Asynchronous Event.
- *                      Defined two new bits in the SlotStatus field of the SCSI
- *                      Enclosure Processor Request and Reply.
- *  10-28-09  02.00.08  Added defines for decoding the ResponseInfo bytes for
- *                      both SCSI IO Error Reply and SCSI Task Management Reply.
- *                      Added ResponseInfo field to MPI2_SCSI_TASK_MANAGE_REPLY.
- *                      Added MPI2_SCSITASKMGMT_RSP_TM_OVERLAPPED_TAG define.
- *  02-10-10  02.00.09  Removed unused structure that had "#if 0" around it.
- *  05-12-10  02.00.10  Added optional vendor-unique region to SCSI IO Request.
- *  11-10-10  02.00.11  Added MPI2_SCSIIO_NUM_SGLOFFSETS define.
- *  11-18-11  02.00.12  Incorporating additions for MPI v2.5.
- *  02-06-12  02.00.13  Added alternate defines for Task Priority / Command
- *                      Priority to match SAM-4.
- *                      Added EEDPErrorOffset to MPI2_SCSI_IO_REPLY.
- *  07-10-12  02.00.14  Added MPI2_SCSIIO_CONTROL_SHIFT_DATADIRECTION.
- *  04-09-13  02.00.15  Added SCSIStatusQualifier field to MPI2_SCSI_IO_REPLY,
- *                      replacing the Reserved4 field.
- *  11-18-14  02.00.16  Updated copyright information.
- *  03-16-15  02.00.17  Updated for MPI v2.6.
- *                      Added MPI26_SCSIIO_IOFLAGS_ESCAPE_PASSTHROUGH.
- *                      Added MPI2_SEP_REQ_SLOTSTATUS_DEV_OFF and
- *                      MPI2_SEP_REPLY_SLOTSTATUS_DEV_OFF.
- *  08-26-15  02.00.18  Added SCSITASKMGMT_MSGFLAGS for Target Reset.
- *  12-18-15  02.00.19  Added EEDPObservedValue added to SCSI IO Reply message.
- *  01-04-16  02.00.20  Modified EEDP reported values in SCSI IO Reply message.
- *  01-21-16  02.00.21  Modified MPI26_SCSITASKMGMT_MSGFLAGS_PCIE* defines to
- *                      be unique within first 32 characters.
- *  --------------------------------------------------------------------------
- */
-
-#ifndef MPI2_INIT_H
-#define MPI2_INIT_H
-
-/*****************************************************************************
-*
-*               SCSI Initiator Messages
-*
-*****************************************************************************/
-
-/****************************************************************************
-*  SCSI IO messages and associated structures
-****************************************************************************/
-
-typedef struct _MPI2_SCSI_IO_CDB_EEDP32
-{
-    U8                      CDB[20];                    /* 0x00 */
-    __be32                  PrimaryReferenceTag;        /* 0x14 */
-    U16                     PrimaryApplicationTag;      /* 0x18 */
-    U16                     PrimaryApplicationTagMask;  /* 0x1A */
-    U32                     TransferLength;             /* 0x1C */
-} MPI2_SCSI_IO_CDB_EEDP32, MPI2_POINTER PTR_MPI2_SCSI_IO_CDB_EEDP32,
-  Mpi2ScsiIoCdbEedp32_t, MPI2_POINTER pMpi2ScsiIoCdbEedp32_t;
-
-/* MPI v2.0 CDB field */
-typedef union _MPI2_SCSI_IO_CDB_UNION
-{
-    U8                      CDB32[32];
-    MPI2_SCSI_IO_CDB_EEDP32 EEDP32;
-    MPI2_SGE_SIMPLE_UNION   SGE;
-} MPI2_SCSI_IO_CDB_UNION, MPI2_POINTER PTR_MPI2_SCSI_IO_CDB_UNION,
-  Mpi2ScsiIoCdb_t, MPI2_POINTER pMpi2ScsiIoCdb_t;
-
-/* MPI v2.0 SCSI IO Request Message */
-typedef struct _MPI2_SCSI_IO_REQUEST
-{
-    U16                     DevHandle;                      /* 0x00 */
-    U8                      ChainOffset;                    /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     Reserved1;                      /* 0x04 */
-    U8                      Reserved2;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved3;                      /* 0x0A */
-    U32                     SenseBufferLowAddress;          /* 0x0C */
-    U16                     SGLFlags;                       /* 0x10 */
-    U8                      SenseBufferLength;              /* 0x12 */
-    U8                      Reserved4;                      /* 0x13 */
-    U8                      SGLOffset0;                     /* 0x14 */
-    U8                      SGLOffset1;                     /* 0x15 */
-    U8                      SGLOffset2;                     /* 0x16 */
-    U8                      SGLOffset3;                     /* 0x17 */
-    U32                     SkipCount;                      /* 0x18 */
-    U32                     DataLength;                     /* 0x1C */
-    U32                     BidirectionalDataLength;        /* 0x20 */
-    U16                     IoFlags;                        /* 0x24 */
-    U16                     EEDPFlags;                      /* 0x26 */
-    U32                     EEDPBlockSize;                  /* 0x28 */
-    U32                     SecondaryReferenceTag;          /* 0x2C */
-    U16                     SecondaryApplicationTag;        /* 0x30 */
-    U16                     ApplicationTagTranslationMask;  /* 0x32 */
-    U8                      LUN[8];                         /* 0x34 */
-    U32                     Control;                        /* 0x3C */
-    MPI2_SCSI_IO_CDB_UNION  CDB;                            /* 0x40 */
-
-#ifdef MPI2_SCSI_IO_VENDOR_UNIQUE_REGION /* typically this is left undefined */
-    MPI2_SCSI_IO_VENDOR_UNIQUE VendorRegion;
-#endif
-
-    MPI2_SGE_IO_UNION       SGL;                            /* 0x60 */
-
-} MPI2_SCSI_IO_REQUEST, MPI2_POINTER PTR_MPI2_SCSI_IO_REQUEST,
-  Mpi2SCSIIORequest_t, MPI2_POINTER pMpi2SCSIIORequest_t;
-
-/* SCSI IO MsgFlags bits */
-
-/* MsgFlags for SenseBufferAddressSpace */
-#define MPI2_SCSIIO_MSGFLAGS_MASK_SENSE_ADDR        (0x0C)
-#define MPI2_SCSIIO_MSGFLAGS_SYSTEM_SENSE_ADDR      (0x00)
-#define MPI2_SCSIIO_MSGFLAGS_IOCDDR_SENSE_ADDR      (0x04)
-#define MPI2_SCSIIO_MSGFLAGS_IOCPLB_SENSE_ADDR      (0x08) /* for MPI v2.5 and earlier only */
-#define MPI2_SCSIIO_MSGFLAGS_IOCPLBNTA_SENSE_ADDR   (0x0C) /* for MPI v2.5 and earlier only */
-#define MPI26_SCSIIO_MSGFLAGS_IOCCTL_SENSE_ADDR     (0x08) /* for MPI v2.6 only */
-
-/* SCSI IO SGLFlags bits */
-
-/* base values for Data Location Address Space */
-#define MPI2_SCSIIO_SGLFLAGS_ADDR_MASK              (0x0C)
-#define MPI2_SCSIIO_SGLFLAGS_SYSTEM_ADDR            (0x00)
-#define MPI2_SCSIIO_SGLFLAGS_IOCDDR_ADDR            (0x04)
-#define MPI2_SCSIIO_SGLFLAGS_IOCPLB_ADDR            (0x08)
-#define MPI2_SCSIIO_SGLFLAGS_IOCPLBNTA_ADDR         (0x0C)
-
-/* base values for Type */
-#define MPI2_SCSIIO_SGLFLAGS_TYPE_MASK              (0x03)
-#define MPI2_SCSIIO_SGLFLAGS_TYPE_MPI               (0x00)
-#define MPI2_SCSIIO_SGLFLAGS_TYPE_IEEE32            (0x01)
-#define MPI2_SCSIIO_SGLFLAGS_TYPE_IEEE64            (0x02)
-
-/* shift values for each sub-field */
-#define MPI2_SCSIIO_SGLFLAGS_SGL3_SHIFT             (12)
-#define MPI2_SCSIIO_SGLFLAGS_SGL2_SHIFT             (8)
-#define MPI2_SCSIIO_SGLFLAGS_SGL1_SHIFT             (4)
-#define MPI2_SCSIIO_SGLFLAGS_SGL0_SHIFT             (0)
-
-/* number of SGLOffset fields */
-#define MPI2_SCSIIO_NUM_SGLOFFSETS                  (4)
-
-/* SCSI IO IoFlags bits */
-
-/* Large CDB Address Space */
-#define MPI2_SCSIIO_CDB_ADDR_MASK                   (0x6000)
-#define MPI2_SCSIIO_CDB_ADDR_SYSTEM                 (0x0000)
-#define MPI2_SCSIIO_CDB_ADDR_IOCDDR                 (0x2000)
-#define MPI2_SCSIIO_CDB_ADDR_IOCPLB                 (0x4000)
-#define MPI2_SCSIIO_CDB_ADDR_IOCPLBNTA              (0x6000)
-
-#define MPI2_SCSIIO_IOFLAGS_LARGE_CDB               (0x1000)
-#define MPI2_SCSIIO_IOFLAGS_BIDIRECTIONAL           (0x0800)
-#define MPI2_SCSIIO_IOFLAGS_MULTICAST               (0x0400)
-#define MPI2_SCSIIO_IOFLAGS_CMD_DETERMINES_DATA_DIR (0x0200)
-#define MPI2_SCSIIO_IOFLAGS_CDBLENGTH_MASK          (0x01FF)
-
-/* SCSI IO EEDPFlags bits */
-
-#define MPI2_SCSIIO_EEDPFLAGS_INC_PRI_REFTAG        (0x8000)
-#define MPI2_SCSIIO_EEDPFLAGS_INC_SEC_REFTAG        (0x4000)
-#define MPI2_SCSIIO_EEDPFLAGS_INC_PRI_APPTAG        (0x2000)
-#define MPI2_SCSIIO_EEDPFLAGS_INC_SEC_APPTAG        (0x1000)
-
-#define MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG          (0x0400)
-#define MPI2_SCSIIO_EEDPFLAGS_CHECK_APPTAG          (0x0200)
-#define MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD           (0x0100)
-
-#define MPI2_SCSIIO_EEDPFLAGS_PASSTHRU_REFTAG       (0x0008)
-
-#define MPI2_SCSIIO_EEDPFLAGS_MASK_OP               (0x0007)
-#define MPI2_SCSIIO_EEDPFLAGS_NOOP_OP               (0x0000)
-#define MPI2_SCSIIO_EEDPFLAGS_CHECK_OP              (0x0001)
-#define MPI2_SCSIIO_EEDPFLAGS_STRIP_OP              (0x0002)
-#define MPI2_SCSIIO_EEDPFLAGS_CHECK_REMOVE_OP       (0x0003)
-#define MPI2_SCSIIO_EEDPFLAGS_INSERT_OP             (0x0004)
-#define MPI2_SCSIIO_EEDPFLAGS_REPLACE_OP            (0x0006)
-#define MPI2_SCSIIO_EEDPFLAGS_CHECK_REGEN_OP        (0x0007)
-
-/* SCSI IO LUN fields: use MPI2_LUN_ from mpi2.h */
-
-/* SCSI IO Control bits */
-#define MPI2_SCSIIO_CONTROL_ADDCDBLEN_MASK      (0xFC000000)
-#define MPI2_SCSIIO_CONTROL_ADDCDBLEN_SHIFT     (26)
-
-#define MPI2_SCSIIO_CONTROL_DATADIRECTION_MASK  (0x03000000)
-#define MPI2_SCSIIO_CONTROL_SHIFT_DATADIRECTION (24)
-#define MPI2_SCSIIO_CONTROL_NODATATRANSFER      (0x00000000)
-#define MPI2_SCSIIO_CONTROL_WRITE               (0x01000000)
-#define MPI2_SCSIIO_CONTROL_READ                (0x02000000)
-#define MPI2_SCSIIO_CONTROL_BIDIRECTIONAL       (0x03000000)
-
-#define MPI2_SCSIIO_CONTROL_TASKPRI_MASK        (0x00007800)
-#define MPI2_SCSIIO_CONTROL_TASKPRI_SHIFT       (11)
-/* alternate name for the previous field; called Command Priority in SAM-4 */
-#define MPI2_SCSIIO_CONTROL_CMDPRI_MASK         (0x00007800)
-#define MPI2_SCSIIO_CONTROL_CMDPRI_SHIFT        (11)
-
-#define MPI2_SCSIIO_CONTROL_TASKATTRIBUTE_MASK  (0x00000700)
-#define MPI2_SCSIIO_CONTROL_SIMPLEQ             (0x00000000)
-#define MPI2_SCSIIO_CONTROL_HEADOFQ             (0x00000100)
-#define MPI2_SCSIIO_CONTROL_ORDEREDQ            (0x00000200)
-#define MPI2_SCSIIO_CONTROL_ACAQ                (0x00000400)
-
-#define MPI2_SCSIIO_CONTROL_TLR_MASK            (0x000000C0)
-#define MPI2_SCSIIO_CONTROL_NO_TLR              (0x00000000)
-#define MPI2_SCSIIO_CONTROL_TLR_ON              (0x00000040)
-#define MPI2_SCSIIO_CONTROL_TLR_OFF             (0x00000080)
-
-
-/* MPI v2.5 CDB field */
-typedef union _MPI25_SCSI_IO_CDB_UNION
-{
-    U8                      CDB32[32];
-    MPI2_SCSI_IO_CDB_EEDP32 EEDP32;
-    MPI2_IEEE_SGE_SIMPLE64  SGE;
-} MPI25_SCSI_IO_CDB_UNION, MPI2_POINTER PTR_MPI25_SCSI_IO_CDB_UNION,
-  Mpi25ScsiIoCdb_t, MPI2_POINTER pMpi25ScsiIoCdb_t;
-
-/* MPI v2.5/2.6 SCSI IO Request Message */
-typedef struct _MPI25_SCSI_IO_REQUEST
-{
-    U16                     DevHandle;                      /* 0x00 */
-    U8                      ChainOffset;                    /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     Reserved1;                      /* 0x04 */
-    U8                      Reserved2;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved3;                      /* 0x0A */
-    U32                     SenseBufferLowAddress;          /* 0x0C */
-    U8                      DMAFlags;                       /* 0x10 */
-    U8                      Reserved5;                      /* 0x11 */
-    U8                      SenseBufferLength;              /* 0x12 */
-    U8                      Reserved4;                      /* 0x13 */
-    U8                      SGLOffset0;                     /* 0x14 */
-    U8                      SGLOffset1;                     /* 0x15 */
-    U8                      SGLOffset2;                     /* 0x16 */
-    U8                      SGLOffset3;                     /* 0x17 */
-    U32                     SkipCount;                      /* 0x18 */
-    U32                     DataLength;                     /* 0x1C */
-    U32                     BidirectionalDataLength;        /* 0x20 */
-    U16                     IoFlags;                        /* 0x24 */
-    U16                     EEDPFlags;                      /* 0x26 */
-    U16                     EEDPBlockSize;                  /* 0x28 */
-    U16                     Reserved6;                      /* 0x2A */
-    U32                     SecondaryReferenceTag;          /* 0x2C */
-    U16                     SecondaryApplicationTag;        /* 0x30 */
-    U16                     ApplicationTagTranslationMask;  /* 0x32 */
-    U8                      LUN[8];                         /* 0x34 */
-    U32                     Control;                        /* 0x3C */
-    MPI25_SCSI_IO_CDB_UNION CDB;                            /* 0x40 */
-
-#ifdef MPI25_SCSI_IO_VENDOR_UNIQUE_REGION /* typically this is left undefined */
-    MPI25_SCSI_IO_VENDOR_UNIQUE VendorRegion;
-#endif
-
-    MPI25_SGE_IO_UNION      SGL;                            /* 0x60 */
-
-} MPI25_SCSI_IO_REQUEST, MPI2_POINTER PTR_MPI25_SCSI_IO_REQUEST,
-  Mpi25SCSIIORequest_t, MPI2_POINTER pMpi25SCSIIORequest_t;
-
-/* use MPI2_SCSIIO_MSGFLAGS_ defines for the MsgFlags field */
-
-/* Defines for the DMAFlags field
- *  Each setting affects 4 SGLS, from SGL0 to SGL3.
- *      D = Data
- *      C = Cache DIF
- *      I = Interleaved
- *      H = Host DIF
- */
-#define MPI25_SCSIIO_DMAFLAGS_OP_MASK               (0x0F)
-#define MPI25_SCSIIO_DMAFLAGS_OP_D_D_D_D            (0x00)
-#define MPI25_SCSIIO_DMAFLAGS_OP_D_D_D_C            (0x01)
-#define MPI25_SCSIIO_DMAFLAGS_OP_D_D_D_I            (0x02)
-#define MPI25_SCSIIO_DMAFLAGS_OP_D_D_C_C            (0x03)
-#define MPI25_SCSIIO_DMAFLAGS_OP_D_D_C_I            (0x04)
-#define MPI25_SCSIIO_DMAFLAGS_OP_D_D_I_I            (0x05)
-#define MPI25_SCSIIO_DMAFLAGS_OP_D_C_C_C            (0x06)
-#define MPI25_SCSIIO_DMAFLAGS_OP_D_C_C_I            (0x07)
-#define MPI25_SCSIIO_DMAFLAGS_OP_D_C_I_I            (0x08)
-#define MPI25_SCSIIO_DMAFLAGS_OP_D_I_I_I            (0x09)
-#define MPI25_SCSIIO_DMAFLAGS_OP_D_H_D_D            (0x0A)
-#define MPI25_SCSIIO_DMAFLAGS_OP_D_H_D_C            (0x0B)
-#define MPI25_SCSIIO_DMAFLAGS_OP_D_H_D_I            (0x0C)
-#define MPI25_SCSIIO_DMAFLAGS_OP_D_H_C_C            (0x0D)
-#define MPI25_SCSIIO_DMAFLAGS_OP_D_H_C_I            (0x0E)
-#define MPI25_SCSIIO_DMAFLAGS_OP_D_H_I_I            (0x0F)
-
-/* number of SGLOffset fields */
-#define MPI25_SCSIIO_NUM_SGLOFFSETS                 (4)
-
-/* defines for the IoFlags field */
-#define MPI25_SCSIIO_IOFLAGS_IO_PATH_MASK           (0xC000)
-#define MPI25_SCSIIO_IOFLAGS_NORMAL_PATH            (0x0000)
-#define MPI25_SCSIIO_IOFLAGS_FAST_PATH              (0x4000)
-
-#define MPI26_SCSIIO_IOFLAGS_ESCAPE_PASSTHROUGH         (0x2000) /* MPI v2.6 and later */
-#define MPI25_SCSIIO_IOFLAGS_LARGE_CDB                  (0x1000)
-#define MPI25_SCSIIO_IOFLAGS_BIDIRECTIONAL              (0x0800)
-#define MPI26_SCSIIO_IOFLAGS_PORT_REQUEST               (0x0400) /* MPI v2.6 and later; IOC use only */
-#define MPI25_SCSIIO_IOFLAGS_CDBLENGTH_MASK             (0x01FF)
-
-/* MPI v2.5 defines for the EEDPFlags bits */
-/* use MPI2_SCSIIO_EEDPFLAGS_ defines for the other EEDPFlags bits */
-#define MPI25_SCSIIO_EEDPFLAGS_ESCAPE_MODE_MASK             (0x00C0)
-#define MPI25_SCSIIO_EEDPFLAGS_COMPATIBLE_MODE              (0x0000)
-#define MPI25_SCSIIO_EEDPFLAGS_DO_NOT_DISABLE_MODE          (0x0040)
-#define MPI25_SCSIIO_EEDPFLAGS_APPTAG_DISABLE_MODE          (0x0080)
-#define MPI25_SCSIIO_EEDPFLAGS_APPTAG_REFTAG_DISABLE_MODE   (0x00C0)
-
-#define MPI25_SCSIIO_EEDPFLAGS_HOST_GUARD_METHOD_MASK       (0x0030)
-#define MPI25_SCSIIO_EEDPFLAGS_T10_CRC_HOST_GUARD           (0x0000)
-#define MPI25_SCSIIO_EEDPFLAGS_IP_CHKSUM_HOST_GUARD         (0x0010)
-
-/* use MPI2_LUN_ defines from mpi2.h for the LUN field */
-
-/* use MPI2_SCSIIO_CONTROL_ defines for the Control field */
-
-
-/* NOTE: The SCSI IO Reply is nearly the same for MPI 2.0 and MPI 2.5, so
- *       MPI2_SCSI_IO_REPLY is used for both.
- */
-
-/* SCSI IO Error Reply Message */
-typedef struct _MPI2_SCSI_IO_REPLY
-{
-    U16                     DevHandle;                      /* 0x00 */
-    U8                      MsgLength;                      /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     Reserved1;                      /* 0x04 */
-    U8                      Reserved2;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved3;                      /* 0x0A */
-    U8                      SCSIStatus;                     /* 0x0C */
-    U8                      SCSIState;                      /* 0x0D */
-    U16                     IOCStatus;                      /* 0x0E */
-    U32                     IOCLogInfo;                     /* 0x10 */
-    U32                     TransferCount;                  /* 0x14 */
-    U32                     SenseCount;                     /* 0x18 */
-    U32                     ResponseInfo;                   /* 0x1C */
-    U16                     TaskTag;                        /* 0x20 */
-    U16                     SCSIStatusQualifier;            /* 0x22 */
-    U32                     BidirectionalTransferCount;     /* 0x24 */
-    U32                     EEDPErrorOffset;                /* 0x28 */ /* MPI 2.5+ only; Reserved in MPI 2.0 */
-    U16                     EEDPObservedAppTag;             /* 0x2C */ /* MPI 2.5+ only; Reserved in MPI 2.0 */
-    U16                     EEDPObservedGuard;              /* 0x2E */ /* MPI 2.5+ only; Reserved in MPI 2.0 */
-    U32                     EEDPObservedRefTag;             /* 0x30 */ /* MPI 2.5+ only; Reserved in MPI 2.0 */
-} MPI2_SCSI_IO_REPLY, MPI2_POINTER PTR_MPI2_SCSI_IO_REPLY,
-  Mpi2SCSIIOReply_t, MPI2_POINTER pMpi2SCSIIOReply_t;
-
-/* SCSI IO Reply MsgFlags bits */
-#define MPI26_SCSIIO_REPLY_MSGFLAGS_REFTAG_OBSERVED_VALID     (0x01)
-#define MPI26_SCSIIO_REPLY_MSGFLAGS_GUARD_OBSERVED_VALID      (0x02)
-#define MPI26_SCSIIO_REPLY_MSGFLAGS_APPTAG_OBSERVED_VALID     (0x04)
-
-
-/* SCSI IO Reply SCSIStatus values (SAM-4 status codes) */
-
-#define MPI2_SCSI_STATUS_GOOD                   (0x00)
-#define MPI2_SCSI_STATUS_CHECK_CONDITION        (0x02)
-#define MPI2_SCSI_STATUS_CONDITION_MET          (0x04)
-#define MPI2_SCSI_STATUS_BUSY                   (0x08)
-#define MPI2_SCSI_STATUS_INTERMEDIATE           (0x10)
-#define MPI2_SCSI_STATUS_INTERMEDIATE_CONDMET   (0x14)
-#define MPI2_SCSI_STATUS_RESERVATION_CONFLICT   (0x18)
-#define MPI2_SCSI_STATUS_COMMAND_TERMINATED     (0x22) /* obsolete */
-#define MPI2_SCSI_STATUS_TASK_SET_FULL          (0x28)
-#define MPI2_SCSI_STATUS_ACA_ACTIVE             (0x30)
-#define MPI2_SCSI_STATUS_TASK_ABORTED           (0x40)
-
-/* SCSI IO Reply SCSIState flags */
-
-#define MPI2_SCSI_STATE_RESPONSE_INFO_VALID     (0x10)
-#define MPI2_SCSI_STATE_TERMINATED              (0x08)
-#define MPI2_SCSI_STATE_NO_SCSI_STATUS          (0x04)
-#define MPI2_SCSI_STATE_AUTOSENSE_FAILED        (0x02)
-#define MPI2_SCSI_STATE_AUTOSENSE_VALID         (0x01)
-
-/* masks and shifts for the ResponseInfo field */
-
-#define MPI2_SCSI_RI_MASK_REASONCODE            (0x000000FF)
-#define MPI2_SCSI_RI_SHIFT_REASONCODE           (0)
-
-#define MPI2_SCSI_TASKTAG_UNKNOWN               (0xFFFF)
-
-
-/****************************************************************************
-*  SCSI Task Management messages
-****************************************************************************/
-
-/* SCSI Task Management Request Message */
-typedef struct _MPI2_SCSI_TASK_MANAGE_REQUEST
-{
-    U16                     DevHandle;                      /* 0x00 */
-    U8                      ChainOffset;                    /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U8                      Reserved1;                      /* 0x04 */
-    U8                      TaskType;                       /* 0x05 */
-    U8                      Reserved2;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved3;                      /* 0x0A */
-    U8                      LUN[8];                         /* 0x0C */
-    U32                     Reserved4[7];                   /* 0x14 */
-    U16                     TaskMID;                        /* 0x30 */
-    U16                     Reserved5;                      /* 0x32 */
-} MPI2_SCSI_TASK_MANAGE_REQUEST,
-  MPI2_POINTER PTR_MPI2_SCSI_TASK_MANAGE_REQUEST,
-  Mpi2SCSITaskManagementRequest_t,
-  MPI2_POINTER pMpi2SCSITaskManagementRequest_t;
-
-/* TaskType values */
-
-#define MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK           (0x01)
-#define MPI2_SCSITASKMGMT_TASKTYPE_ABRT_TASK_SET        (0x02)
-#define MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET         (0x03)
-#define MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET   (0x05)
-#define MPI2_SCSITASKMGMT_TASKTYPE_CLEAR_TASK_SET       (0x06)
-#define MPI2_SCSITASKMGMT_TASKTYPE_QUERY_TASK           (0x07)
-#define MPI2_SCSITASKMGMT_TASKTYPE_CLR_ACA              (0x08)
-#define MPI2_SCSITASKMGMT_TASKTYPE_QRY_TASK_SET         (0x09)
-#define MPI2_SCSITASKMGMT_TASKTYPE_QRY_ASYNC_EVENT      (0x0A)
-
-/* obsolete TaskType name */
-#define MPI2_SCSITASKMGMT_TASKTYPE_QRY_UNIT_ATTENTION   (MPI2_SCSITASKMGMT_TASKTYPE_QRY_ASYNC_EVENT)
-
-/* MsgFlags bits */
-#define MPI2_SCSITASKMGMT_MSGFLAGS_MASK_TARGET_RESET      (0x18)
-#define MPI26_SCSITASKMGMT_MSGFLAGS_HOT_RESET_PCIE        (0x00)
-#define MPI2_SCSITASKMGMT_MSGFLAGS_LINK_RESET             (0x00)
-#define MPI2_SCSITASKMGMT_MSGFLAGS_DO_NOT_SEND_TASK_IU    (0x01)
-#define MPI2_SCSITASKMGMT_MSGFLAGS_NEXUS_RESET_SRST       (0x08)
-#define MPI2_SCSITASKMGMT_MSGFLAGS_SAS_HARD_LINK_RESET    (0x10)
-#define MPI26_SCSITASKMGMT_MSGFLAGS_PROTOCOL_LVL_RST_PCIE (0x18)
-
-
-/* SCSI Task Management Reply Message */
-typedef struct _MPI2_SCSI_TASK_MANAGE_REPLY
-{
-    U16                     DevHandle;                      /* 0x00 */
-    U8                      MsgLength;                      /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U8                      ResponseCode;                   /* 0x04 */
-    U8                      TaskType;                       /* 0x05 */
-    U8                      Reserved1;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved2;                      /* 0x0A */
-    U16                     Reserved3;                      /* 0x0C */
-    U16                     IOCStatus;                      /* 0x0E */
-    U32                     IOCLogInfo;                     /* 0x10 */
-    U32                     TerminationCount;               /* 0x14 */
-    U32                     ResponseInfo;                   /* 0x18 */
-} MPI2_SCSI_TASK_MANAGE_REPLY,
-  MPI2_POINTER PTR_MPI2_SCSI_TASK_MANAGE_REPLY,
-  Mpi2SCSITaskManagementReply_t, MPI2_POINTER pMpi2SCSIManagementReply_t;
-
-/* ResponseCode values */
-
-#define MPI2_SCSITASKMGMT_RSP_TM_COMPLETE               (0x00)
-#define MPI2_SCSITASKMGMT_RSP_INVALID_FRAME             (0x02)
-#define MPI2_SCSITASKMGMT_RSP_TM_NOT_SUPPORTED          (0x04)
-#define MPI2_SCSITASKMGMT_RSP_TM_FAILED                 (0x05)
-#define MPI2_SCSITASKMGMT_RSP_TM_SUCCEEDED              (0x08)
-#define MPI2_SCSITASKMGMT_RSP_TM_INVALID_LUN            (0x09)
-#define MPI2_SCSITASKMGMT_RSP_TM_OVERLAPPED_TAG         (0x0A)
-#define MPI2_SCSITASKMGMT_RSP_IO_QUEUED_ON_IOC          (0x80)
-
-/* masks and shifts for the ResponseInfo field */
-
-#define MPI2_SCSITASKMGMT_RI_MASK_REASONCODE            (0x000000FF)
-#define MPI2_SCSITASKMGMT_RI_SHIFT_REASONCODE           (0)
-#define MPI2_SCSITASKMGMT_RI_MASK_ARI2                  (0x0000FF00)
-#define MPI2_SCSITASKMGMT_RI_SHIFT_ARI2                 (8)
-#define MPI2_SCSITASKMGMT_RI_MASK_ARI1                  (0x00FF0000)
-#define MPI2_SCSITASKMGMT_RI_SHIFT_ARI1                 (16)
-#define MPI2_SCSITASKMGMT_RI_MASK_ARI0                  (0xFF000000)
-#define MPI2_SCSITASKMGMT_RI_SHIFT_ARI0                 (24)
-
-
-/****************************************************************************
-*  SCSI Enclosure Processor messages
-****************************************************************************/
-
-/* SCSI Enclosure Processor Request Message */
-typedef struct _MPI2_SEP_REQUEST
-{
-    U16                     DevHandle;          /* 0x00 */
-    U8                      ChainOffset;        /* 0x02 */
-    U8                      Function;           /* 0x03 */
-    U8                      Action;             /* 0x04 */
-    U8                      Flags;              /* 0x05 */
-    U8                      Reserved1;          /* 0x06 */
-    U8                      MsgFlags;           /* 0x07 */
-    U8                      VP_ID;              /* 0x08 */
-    U8                      VF_ID;              /* 0x09 */
-    U16                     Reserved2;          /* 0x0A */
-    U32                     SlotStatus;         /* 0x0C */
-    U32                     Reserved3;          /* 0x10 */
-    U32                     Reserved4;          /* 0x14 */
-    U32                     Reserved5;          /* 0x18 */
-    U16                     Slot;               /* 0x1C */
-    U16                     EnclosureHandle;    /* 0x1E */
-} MPI2_SEP_REQUEST, MPI2_POINTER PTR_MPI2_SEP_REQUEST,
-  Mpi2SepRequest_t, MPI2_POINTER pMpi2SepRequest_t;
-
-/* Action defines */
-#define MPI2_SEP_REQ_ACTION_WRITE_STATUS                (0x00)
-#define MPI2_SEP_REQ_ACTION_READ_STATUS                 (0x01)
-
-/* Flags defines */
-#define MPI2_SEP_REQ_FLAGS_DEVHANDLE_ADDRESS            (0x00)
-#define MPI2_SEP_REQ_FLAGS_ENCLOSURE_SLOT_ADDRESS       (0x01)
-
-/* SlotStatus defines */
-#define MPI2_SEP_REQ_SLOTSTATUS_DEV_OFF                 (0x00080000) /* MPI v2.6 and newer */
-#define MPI2_SEP_REQ_SLOTSTATUS_REQUEST_REMOVE          (0x00040000)
-#define MPI2_SEP_REQ_SLOTSTATUS_IDENTIFY_REQUEST        (0x00020000)
-#define MPI2_SEP_REQ_SLOTSTATUS_REBUILD_STOPPED         (0x00000200)
-#define MPI2_SEP_REQ_SLOTSTATUS_HOT_SPARE               (0x00000100)
-#define MPI2_SEP_REQ_SLOTSTATUS_UNCONFIGURED            (0x00000080)
-#define MPI2_SEP_REQ_SLOTSTATUS_PREDICTED_FAULT         (0x00000040)
-#define MPI2_SEP_REQ_SLOTSTATUS_IN_CRITICAL_ARRAY       (0x00000010)
-#define MPI2_SEP_REQ_SLOTSTATUS_IN_FAILED_ARRAY         (0x00000008)
-#define MPI2_SEP_REQ_SLOTSTATUS_DEV_REBUILDING          (0x00000004)
-#define MPI2_SEP_REQ_SLOTSTATUS_DEV_FAULTY              (0x00000002)
-#define MPI2_SEP_REQ_SLOTSTATUS_NO_ERROR                (0x00000001)
-
-
-/* SCSI Enclosure Processor Reply Message */
-typedef struct _MPI2_SEP_REPLY
-{
-    U16                     DevHandle;          /* 0x00 */
-    U8                      MsgLength;          /* 0x02 */
-    U8                      Function;           /* 0x03 */
-    U8                      Action;             /* 0x04 */
-    U8                      Flags;              /* 0x05 */
-    U8                      Reserved1;          /* 0x06 */
-    U8                      MsgFlags;           /* 0x07 */
-    U8                      VP_ID;              /* 0x08 */
-    U8                      VF_ID;              /* 0x09 */
-    U16                     Reserved2;          /* 0x0A */
-    U16                     Reserved3;          /* 0x0C */
-    U16                     IOCStatus;          /* 0x0E */
-    U32                     IOCLogInfo;         /* 0x10 */
-    U32                     SlotStatus;         /* 0x14 */
-    U32                     Reserved4;          /* 0x18 */
-    U16                     Slot;               /* 0x1C */
-    U16                     EnclosureHandle;    /* 0x1E */
-} MPI2_SEP_REPLY, MPI2_POINTER PTR_MPI2_SEP_REPLY,
-  Mpi2SepReply_t, MPI2_POINTER pMpi2SepReply_t;
-
-/* SlotStatus defines */
-#define MPI2_SEP_REPLY_SLOTSTATUS_DEV_OFF               (0x00080000) /* MPI v2.6 and newer */
-#define MPI2_SEP_REPLY_SLOTSTATUS_REMOVE_READY          (0x00040000)
-#define MPI2_SEP_REPLY_SLOTSTATUS_IDENTIFY_REQUEST      (0x00020000)
-#define MPI2_SEP_REPLY_SLOTSTATUS_REBUILD_STOPPED       (0x00000200)
-#define MPI2_SEP_REPLY_SLOTSTATUS_HOT_SPARE             (0x00000100)
-#define MPI2_SEP_REPLY_SLOTSTATUS_UNCONFIGURED          (0x00000080)
-#define MPI2_SEP_REPLY_SLOTSTATUS_PREDICTED_FAULT       (0x00000040)
-#define MPI2_SEP_REPLY_SLOTSTATUS_IN_CRITICAL_ARRAY     (0x00000010)
-#define MPI2_SEP_REPLY_SLOTSTATUS_IN_FAILED_ARRAY       (0x00000008)
-#define MPI2_SEP_REPLY_SLOTSTATUS_DEV_REBUILDING        (0x00000004)
-#define MPI2_SEP_REPLY_SLOTSTATUS_DEV_FAULTY            (0x00000002)
-#define MPI2_SEP_REPLY_SLOTSTATUS_NO_ERROR              (0x00000001)
-
-
-#endif
-
-
+/*
+ *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
+ *
+ *
+ *           Name:  mpi2_init.h
+ *          Title:  MPI SCSI initiator mode messages and structures
+ *  Creation Date:  June 23, 2006
+ *
+ *  mpi2_init.h Version:  02.00.21
+ *
+ *  NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
+ *        prefix are for use only on MPI v2.5 products, and must not be used
+ *        with MPI v2.0 products. Unless otherwise noted, names beginning with
+ *        MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products.
+ *
+ *  Version History
+ *  ---------------
+ *
+ *  Date      Version   Description
+ *  --------  --------  ------------------------------------------------------
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  10-31-07  02.00.01  Fixed name for pMpi2SCSITaskManagementRequest_t.
+ *  12-18-07  02.00.02  Modified Task Management Target Reset Method defines.
+ *  02-29-08  02.00.03  Added Query Task Set and Query Unit Attention.
+ *  03-03-08  02.00.04  Fixed name of struct _MPI2_SCSI_TASK_MANAGE_REPLY.
+ *  05-21-08  02.00.05  Fixed typo in name of Mpi2SepRequest_t.
+ *  10-02-08  02.00.06  Removed Untagged and No Disconnect values from SCSI IO
+ *                      Control field Task Attribute flags.
+ *                      Moved LUN field defines to mpi2.h becasue they are
+ *                      common to many structures.
+ *  05-06-09  02.00.07  Changed task management type of Query Unit Attention to
+ *                      Query Asynchronous Event.
+ *                      Defined two new bits in the SlotStatus field of the SCSI
+ *                      Enclosure Processor Request and Reply.
+ *  10-28-09  02.00.08  Added defines for decoding the ResponseInfo bytes for
+ *                      both SCSI IO Error Reply and SCSI Task Management Reply.
+ *                      Added ResponseInfo field to MPI2_SCSI_TASK_MANAGE_REPLY.
+ *                      Added MPI2_SCSITASKMGMT_RSP_TM_OVERLAPPED_TAG define.
+ *  02-10-10  02.00.09  Removed unused structure that had "#if 0" around it.
+ *  05-12-10  02.00.10  Added optional vendor-unique region to SCSI IO Request.
+ *  11-10-10  02.00.11  Added MPI2_SCSIIO_NUM_SGLOFFSETS define.
+ *  11-18-11  02.00.12  Incorporating additions for MPI v2.5.
+ *  02-06-12  02.00.13  Added alternate defines for Task Priority / Command
+ *                      Priority to match SAM-4.
+ *                      Added EEDPErrorOffset to MPI2_SCSI_IO_REPLY.
+ *  07-10-12  02.00.14  Added MPI2_SCSIIO_CONTROL_SHIFT_DATADIRECTION.
+ *  04-09-13  02.00.15  Added SCSIStatusQualifier field to MPI2_SCSI_IO_REPLY,
+ *                      replacing the Reserved4 field.
+ *  11-18-14  02.00.16  Updated copyright information.
+ *  03-16-15  02.00.17  Updated for MPI v2.6.
+ *                      Added MPI26_SCSIIO_IOFLAGS_ESCAPE_PASSTHROUGH.
+ *                      Added MPI2_SEP_REQ_SLOTSTATUS_DEV_OFF and
+ *                      MPI2_SEP_REPLY_SLOTSTATUS_DEV_OFF.
+ *  08-26-15  02.00.18  Added SCSITASKMGMT_MSGFLAGS for Target Reset.
+ *  12-18-15  02.00.19  Added EEDPObservedValue added to SCSI IO Reply message.
+ *  01-04-16  02.00.20  Modified EEDP reported values in SCSI IO Reply message.
+ *  01-21-16  02.00.21  Modified MPI26_SCSITASKMGMT_MSGFLAGS_PCIE* defines to
+ *                      be unique within first 32 characters.
+ *  --------------------------------------------------------------------------
+ */
+
+#ifndef MPI2_INIT_H
+#define MPI2_INIT_H
+
+/*****************************************************************************
+*
+*               SCSI Initiator Messages
+*
+*****************************************************************************/
+
+/****************************************************************************
+*  SCSI IO messages and associated structures
+****************************************************************************/
+
+typedef struct _MPI2_SCSI_IO_CDB_EEDP32
+{
+    U8                      CDB[20];                    /* 0x00 */
+    __be32                  PrimaryReferenceTag;        /* 0x14 */
+    U16                     PrimaryApplicationTag;      /* 0x18 */
+    U16                     PrimaryApplicationTagMask;  /* 0x1A */
+    U32                     TransferLength;             /* 0x1C */
+} MPI2_SCSI_IO_CDB_EEDP32, MPI2_POINTER PTR_MPI2_SCSI_IO_CDB_EEDP32,
+  Mpi2ScsiIoCdbEedp32_t, MPI2_POINTER pMpi2ScsiIoCdbEedp32_t;
+
+/* MPI v2.0 CDB field */
+typedef union _MPI2_SCSI_IO_CDB_UNION
+{
+    U8                      CDB32[32];
+    MPI2_SCSI_IO_CDB_EEDP32 EEDP32;
+    MPI2_SGE_SIMPLE_UNION   SGE;
+} MPI2_SCSI_IO_CDB_UNION, MPI2_POINTER PTR_MPI2_SCSI_IO_CDB_UNION,
+  Mpi2ScsiIoCdb_t, MPI2_POINTER pMpi2ScsiIoCdb_t;
+
+/* MPI v2.0 SCSI IO Request Message */
+typedef struct _MPI2_SCSI_IO_REQUEST
+{
+    U16                     DevHandle;                      /* 0x00 */
+    U8                      ChainOffset;                    /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     Reserved1;                      /* 0x04 */
+    U8                      Reserved2;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved3;                      /* 0x0A */
+    U32                     SenseBufferLowAddress;          /* 0x0C */
+    U16                     SGLFlags;                       /* 0x10 */
+    U8                      SenseBufferLength;              /* 0x12 */
+    U8                      Reserved4;                      /* 0x13 */
+    U8                      SGLOffset0;                     /* 0x14 */
+    U8                      SGLOffset1;                     /* 0x15 */
+    U8                      SGLOffset2;                     /* 0x16 */
+    U8                      SGLOffset3;                     /* 0x17 */
+    U32                     SkipCount;                      /* 0x18 */
+    U32                     DataLength;                     /* 0x1C */
+    U32                     BidirectionalDataLength;        /* 0x20 */
+    U16                     IoFlags;                        /* 0x24 */
+    U16                     EEDPFlags;                      /* 0x26 */
+    U32                     EEDPBlockSize;                  /* 0x28 */
+    U32                     SecondaryReferenceTag;          /* 0x2C */
+    U16                     SecondaryApplicationTag;        /* 0x30 */
+    U16                     ApplicationTagTranslationMask;  /* 0x32 */
+    U8                      LUN[8];                         /* 0x34 */
+    U32                     Control;                        /* 0x3C */
+    MPI2_SCSI_IO_CDB_UNION  CDB;                            /* 0x40 */
+
+#ifdef MPI2_SCSI_IO_VENDOR_UNIQUE_REGION /* typically this is left undefined */
+    MPI2_SCSI_IO_VENDOR_UNIQUE VendorRegion;
+#endif
+
+    MPI2_SGE_IO_UNION       SGL;                            /* 0x60 */
+
+} MPI2_SCSI_IO_REQUEST, MPI2_POINTER PTR_MPI2_SCSI_IO_REQUEST,
+  Mpi2SCSIIORequest_t, MPI2_POINTER pMpi2SCSIIORequest_t;
+
+/* SCSI IO MsgFlags bits */
+
+/* MsgFlags for SenseBufferAddressSpace */
+#define MPI2_SCSIIO_MSGFLAGS_MASK_SENSE_ADDR        (0x0C)
+#define MPI2_SCSIIO_MSGFLAGS_SYSTEM_SENSE_ADDR      (0x00)
+#define MPI2_SCSIIO_MSGFLAGS_IOCDDR_SENSE_ADDR      (0x04)
+#define MPI2_SCSIIO_MSGFLAGS_IOCPLB_SENSE_ADDR      (0x08) /* for MPI v2.5 and earlier only */
+#define MPI2_SCSIIO_MSGFLAGS_IOCPLBNTA_SENSE_ADDR   (0x0C) /* for MPI v2.5 and earlier only */
+#define MPI26_SCSIIO_MSGFLAGS_IOCCTL_SENSE_ADDR     (0x08) /* for MPI v2.6 only */
+
+/* SCSI IO SGLFlags bits */
+
+/* base values for Data Location Address Space */
+#define MPI2_SCSIIO_SGLFLAGS_ADDR_MASK              (0x0C)
+#define MPI2_SCSIIO_SGLFLAGS_SYSTEM_ADDR            (0x00)
+#define MPI2_SCSIIO_SGLFLAGS_IOCDDR_ADDR            (0x04)
+#define MPI2_SCSIIO_SGLFLAGS_IOCPLB_ADDR            (0x08)
+#define MPI2_SCSIIO_SGLFLAGS_IOCPLBNTA_ADDR         (0x0C)
+
+/* base values for Type */
+#define MPI2_SCSIIO_SGLFLAGS_TYPE_MASK              (0x03)
+#define MPI2_SCSIIO_SGLFLAGS_TYPE_MPI               (0x00)
+#define MPI2_SCSIIO_SGLFLAGS_TYPE_IEEE32            (0x01)
+#define MPI2_SCSIIO_SGLFLAGS_TYPE_IEEE64            (0x02)
+
+/* shift values for each sub-field */
+#define MPI2_SCSIIO_SGLFLAGS_SGL3_SHIFT             (12)
+#define MPI2_SCSIIO_SGLFLAGS_SGL2_SHIFT             (8)
+#define MPI2_SCSIIO_SGLFLAGS_SGL1_SHIFT             (4)
+#define MPI2_SCSIIO_SGLFLAGS_SGL0_SHIFT             (0)
+
+/* number of SGLOffset fields */
+#define MPI2_SCSIIO_NUM_SGLOFFSETS                  (4)
+
+/* SCSI IO IoFlags bits */
+
+/* Large CDB Address Space */
+#define MPI2_SCSIIO_CDB_ADDR_MASK                   (0x6000)
+#define MPI2_SCSIIO_CDB_ADDR_SYSTEM                 (0x0000)
+#define MPI2_SCSIIO_CDB_ADDR_IOCDDR                 (0x2000)
+#define MPI2_SCSIIO_CDB_ADDR_IOCPLB                 (0x4000)
+#define MPI2_SCSIIO_CDB_ADDR_IOCPLBNTA              (0x6000)
+
+#define MPI2_SCSIIO_IOFLAGS_LARGE_CDB               (0x1000)
+#define MPI2_SCSIIO_IOFLAGS_BIDIRECTIONAL           (0x0800)
+#define MPI2_SCSIIO_IOFLAGS_MULTICAST               (0x0400)
+#define MPI2_SCSIIO_IOFLAGS_CMD_DETERMINES_DATA_DIR (0x0200)
+#define MPI2_SCSIIO_IOFLAGS_CDBLENGTH_MASK          (0x01FF)
+
+/* SCSI IO EEDPFlags bits */
+
+#define MPI2_SCSIIO_EEDPFLAGS_INC_PRI_REFTAG        (0x8000)
+#define MPI2_SCSIIO_EEDPFLAGS_INC_SEC_REFTAG        (0x4000)
+#define MPI2_SCSIIO_EEDPFLAGS_INC_PRI_APPTAG        (0x2000)
+#define MPI2_SCSIIO_EEDPFLAGS_INC_SEC_APPTAG        (0x1000)
+
+#define MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG          (0x0400)
+#define MPI2_SCSIIO_EEDPFLAGS_CHECK_APPTAG          (0x0200)
+#define MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD           (0x0100)
+
+#define MPI2_SCSIIO_EEDPFLAGS_PASSTHRU_REFTAG       (0x0008)
+
+#define MPI2_SCSIIO_EEDPFLAGS_MASK_OP               (0x0007)
+#define MPI2_SCSIIO_EEDPFLAGS_NOOP_OP               (0x0000)
+#define MPI2_SCSIIO_EEDPFLAGS_CHECK_OP              (0x0001)
+#define MPI2_SCSIIO_EEDPFLAGS_STRIP_OP              (0x0002)
+#define MPI2_SCSIIO_EEDPFLAGS_CHECK_REMOVE_OP       (0x0003)
+#define MPI2_SCSIIO_EEDPFLAGS_INSERT_OP             (0x0004)
+#define MPI2_SCSIIO_EEDPFLAGS_REPLACE_OP            (0x0006)
+#define MPI2_SCSIIO_EEDPFLAGS_CHECK_REGEN_OP        (0x0007)
+
+/* SCSI IO LUN fields: use MPI2_LUN_ from mpi2.h */
+
+/* SCSI IO Control bits */
+#define MPI2_SCSIIO_CONTROL_ADDCDBLEN_MASK      (0xFC000000)
+#define MPI2_SCSIIO_CONTROL_ADDCDBLEN_SHIFT     (26)
+
+#define MPI2_SCSIIO_CONTROL_DATADIRECTION_MASK  (0x03000000)
+#define MPI2_SCSIIO_CONTROL_SHIFT_DATADIRECTION (24)
+#define MPI2_SCSIIO_CONTROL_NODATATRANSFER      (0x00000000)
+#define MPI2_SCSIIO_CONTROL_WRITE               (0x01000000)
+#define MPI2_SCSIIO_CONTROL_READ                (0x02000000)
+#define MPI2_SCSIIO_CONTROL_BIDIRECTIONAL       (0x03000000)
+
+#define MPI2_SCSIIO_CONTROL_TASKPRI_MASK        (0x00007800)
+#define MPI2_SCSIIO_CONTROL_TASKPRI_SHIFT       (11)
+/* alternate name for the previous field; called Command Priority in SAM-4 */
+#define MPI2_SCSIIO_CONTROL_CMDPRI_MASK         (0x00007800)
+#define MPI2_SCSIIO_CONTROL_CMDPRI_SHIFT        (11)
+
+#define MPI2_SCSIIO_CONTROL_TASKATTRIBUTE_MASK  (0x00000700)
+#define MPI2_SCSIIO_CONTROL_SIMPLEQ             (0x00000000)
+#define MPI2_SCSIIO_CONTROL_HEADOFQ             (0x00000100)
+#define MPI2_SCSIIO_CONTROL_ORDEREDQ            (0x00000200)
+#define MPI2_SCSIIO_CONTROL_ACAQ                (0x00000400)
+
+#define MPI2_SCSIIO_CONTROL_TLR_MASK            (0x000000C0)
+#define MPI2_SCSIIO_CONTROL_NO_TLR              (0x00000000)
+#define MPI2_SCSIIO_CONTROL_TLR_ON              (0x00000040)
+#define MPI2_SCSIIO_CONTROL_TLR_OFF             (0x00000080)
+
+
+/* MPI v2.5 CDB field */
+typedef union _MPI25_SCSI_IO_CDB_UNION
+{
+    U8                      CDB32[32];
+    MPI2_SCSI_IO_CDB_EEDP32 EEDP32;
+    MPI2_IEEE_SGE_SIMPLE64  SGE;
+} MPI25_SCSI_IO_CDB_UNION, MPI2_POINTER PTR_MPI25_SCSI_IO_CDB_UNION,
+  Mpi25ScsiIoCdb_t, MPI2_POINTER pMpi25ScsiIoCdb_t;
+
+/* MPI v2.5/2.6 SCSI IO Request Message */
+typedef struct _MPI25_SCSI_IO_REQUEST
+{
+    U16                     DevHandle;                      /* 0x00 */
+    U8                      ChainOffset;                    /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     Reserved1;                      /* 0x04 */
+    U8                      Reserved2;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved3;                      /* 0x0A */
+    U32                     SenseBufferLowAddress;          /* 0x0C */
+    U8                      DMAFlags;                       /* 0x10 */
+    U8                      Reserved5;                      /* 0x11 */
+    U8                      SenseBufferLength;              /* 0x12 */
+    U8                      Reserved4;                      /* 0x13 */
+    U8                      SGLOffset0;                     /* 0x14 */
+    U8                      SGLOffset1;                     /* 0x15 */
+    U8                      SGLOffset2;                     /* 0x16 */
+    U8                      SGLOffset3;                     /* 0x17 */
+    U32                     SkipCount;                      /* 0x18 */
+    U32                     DataLength;                     /* 0x1C */
+    U32                     BidirectionalDataLength;        /* 0x20 */
+    U16                     IoFlags;                        /* 0x24 */
+    U16                     EEDPFlags;                      /* 0x26 */
+    U16                     EEDPBlockSize;                  /* 0x28 */
+    U16                     Reserved6;                      /* 0x2A */
+    U32                     SecondaryReferenceTag;          /* 0x2C */
+    U16                     SecondaryApplicationTag;        /* 0x30 */
+    U16                     ApplicationTagTranslationMask;  /* 0x32 */
+    U8                      LUN[8];                         /* 0x34 */
+    U32                     Control;                        /* 0x3C */
+    MPI25_SCSI_IO_CDB_UNION CDB;                            /* 0x40 */
+
+#ifdef MPI25_SCSI_IO_VENDOR_UNIQUE_REGION /* typically this is left undefined */
+    MPI25_SCSI_IO_VENDOR_UNIQUE VendorRegion;
+#endif
+
+    MPI25_SGE_IO_UNION      SGL;                            /* 0x60 */
+
+} MPI25_SCSI_IO_REQUEST, MPI2_POINTER PTR_MPI25_SCSI_IO_REQUEST,
+  Mpi25SCSIIORequest_t, MPI2_POINTER pMpi25SCSIIORequest_t;
+
+/* use MPI2_SCSIIO_MSGFLAGS_ defines for the MsgFlags field */
+
+/* Defines for the DMAFlags field
+ *  Each setting affects 4 SGLS, from SGL0 to SGL3.
+ *      D = Data
+ *      C = Cache DIF
+ *      I = Interleaved
+ *      H = Host DIF
+ */
+#define MPI25_SCSIIO_DMAFLAGS_OP_MASK               (0x0F)
+#define MPI25_SCSIIO_DMAFLAGS_OP_D_D_D_D            (0x00)
+#define MPI25_SCSIIO_DMAFLAGS_OP_D_D_D_C            (0x01)
+#define MPI25_SCSIIO_DMAFLAGS_OP_D_D_D_I            (0x02)
+#define MPI25_SCSIIO_DMAFLAGS_OP_D_D_C_C            (0x03)
+#define MPI25_SCSIIO_DMAFLAGS_OP_D_D_C_I            (0x04)
+#define MPI25_SCSIIO_DMAFLAGS_OP_D_D_I_I            (0x05)
+#define MPI25_SCSIIO_DMAFLAGS_OP_D_C_C_C            (0x06)
+#define MPI25_SCSIIO_DMAFLAGS_OP_D_C_C_I            (0x07)
+#define MPI25_SCSIIO_DMAFLAGS_OP_D_C_I_I            (0x08)
+#define MPI25_SCSIIO_DMAFLAGS_OP_D_I_I_I            (0x09)
+#define MPI25_SCSIIO_DMAFLAGS_OP_D_H_D_D            (0x0A)
+#define MPI25_SCSIIO_DMAFLAGS_OP_D_H_D_C            (0x0B)
+#define MPI25_SCSIIO_DMAFLAGS_OP_D_H_D_I            (0x0C)
+#define MPI25_SCSIIO_DMAFLAGS_OP_D_H_C_C            (0x0D)
+#define MPI25_SCSIIO_DMAFLAGS_OP_D_H_C_I            (0x0E)
+#define MPI25_SCSIIO_DMAFLAGS_OP_D_H_I_I            (0x0F)
+
+/* number of SGLOffset fields */
+#define MPI25_SCSIIO_NUM_SGLOFFSETS                 (4)
+
+/* defines for the IoFlags field */
+#define MPI25_SCSIIO_IOFLAGS_IO_PATH_MASK           (0xC000)
+#define MPI25_SCSIIO_IOFLAGS_NORMAL_PATH            (0x0000)
+#define MPI25_SCSIIO_IOFLAGS_FAST_PATH              (0x4000)
+
+#define MPI26_SCSIIO_IOFLAGS_ESCAPE_PASSTHROUGH         (0x2000) /* MPI v2.6 and later */
+#define MPI25_SCSIIO_IOFLAGS_LARGE_CDB                  (0x1000)
+#define MPI25_SCSIIO_IOFLAGS_BIDIRECTIONAL              (0x0800)
+#define MPI26_SCSIIO_IOFLAGS_PORT_REQUEST               (0x0400) /* MPI v2.6 and later; IOC use only */
+#define MPI25_SCSIIO_IOFLAGS_CDBLENGTH_MASK             (0x01FF)
+
+/* MPI v2.5 defines for the EEDPFlags bits */
+/* use MPI2_SCSIIO_EEDPFLAGS_ defines for the other EEDPFlags bits */
+#define MPI25_SCSIIO_EEDPFLAGS_ESCAPE_MODE_MASK             (0x00C0)
+#define MPI25_SCSIIO_EEDPFLAGS_COMPATIBLE_MODE              (0x0000)
+#define MPI25_SCSIIO_EEDPFLAGS_DO_NOT_DISABLE_MODE          (0x0040)
+#define MPI25_SCSIIO_EEDPFLAGS_APPTAG_DISABLE_MODE          (0x0080)
+#define MPI25_SCSIIO_EEDPFLAGS_APPTAG_REFTAG_DISABLE_MODE   (0x00C0)
+
+#define MPI25_SCSIIO_EEDPFLAGS_HOST_GUARD_METHOD_MASK       (0x0030)
+#define MPI25_SCSIIO_EEDPFLAGS_T10_CRC_HOST_GUARD           (0x0000)
+#define MPI25_SCSIIO_EEDPFLAGS_IP_CHKSUM_HOST_GUARD         (0x0010)
+
+/* use MPI2_LUN_ defines from mpi2.h for the LUN field */
+
+/* use MPI2_SCSIIO_CONTROL_ defines for the Control field */
+
+
+/* NOTE: The SCSI IO Reply is nearly the same for MPI 2.0 and MPI 2.5, so
+ *       MPI2_SCSI_IO_REPLY is used for both.
+ */
+
+/* SCSI IO Error Reply Message */
+typedef struct _MPI2_SCSI_IO_REPLY
+{
+    U16                     DevHandle;                      /* 0x00 */
+    U8                      MsgLength;                      /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     Reserved1;                      /* 0x04 */
+    U8                      Reserved2;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved3;                      /* 0x0A */
+    U8                      SCSIStatus;                     /* 0x0C */
+    U8                      SCSIState;                      /* 0x0D */
+    U16                     IOCStatus;                      /* 0x0E */
+    U32                     IOCLogInfo;                     /* 0x10 */
+    U32                     TransferCount;                  /* 0x14 */
+    U32                     SenseCount;                     /* 0x18 */
+    U32                     ResponseInfo;                   /* 0x1C */
+    U16                     TaskTag;                        /* 0x20 */
+    U16                     SCSIStatusQualifier;            /* 0x22 */
+    U32                     BidirectionalTransferCount;     /* 0x24 */
+    U32                     EEDPErrorOffset;                /* 0x28 */ /* MPI 2.5+ only; Reserved in MPI 2.0 */
+    U16                     EEDPObservedAppTag;             /* 0x2C */ /* MPI 2.5+ only; Reserved in MPI 2.0 */
+    U16                     EEDPObservedGuard;              /* 0x2E */ /* MPI 2.5+ only; Reserved in MPI 2.0 */
+    U32                     EEDPObservedRefTag;             /* 0x30 */ /* MPI 2.5+ only; Reserved in MPI 2.0 */
+} MPI2_SCSI_IO_REPLY, MPI2_POINTER PTR_MPI2_SCSI_IO_REPLY,
+  Mpi2SCSIIOReply_t, MPI2_POINTER pMpi2SCSIIOReply_t;
+
+/* SCSI IO Reply MsgFlags bits */
+#define MPI26_SCSIIO_REPLY_MSGFLAGS_REFTAG_OBSERVED_VALID     (0x01)
+#define MPI26_SCSIIO_REPLY_MSGFLAGS_GUARD_OBSERVED_VALID      (0x02)
+#define MPI26_SCSIIO_REPLY_MSGFLAGS_APPTAG_OBSERVED_VALID     (0x04)
+
+
+/* SCSI IO Reply SCSIStatus values (SAM-4 status codes) */
+
+#define MPI2_SCSI_STATUS_GOOD                   (0x00)
+#define MPI2_SCSI_STATUS_CHECK_CONDITION        (0x02)
+#define MPI2_SCSI_STATUS_CONDITION_MET          (0x04)
+#define MPI2_SCSI_STATUS_BUSY                   (0x08)
+#define MPI2_SCSI_STATUS_INTERMEDIATE           (0x10)
+#define MPI2_SCSI_STATUS_INTERMEDIATE_CONDMET   (0x14)
+#define MPI2_SCSI_STATUS_RESERVATION_CONFLICT   (0x18)
+#define MPI2_SCSI_STATUS_COMMAND_TERMINATED     (0x22) /* obsolete */
+#define MPI2_SCSI_STATUS_TASK_SET_FULL          (0x28)
+#define MPI2_SCSI_STATUS_ACA_ACTIVE             (0x30)
+#define MPI2_SCSI_STATUS_TASK_ABORTED           (0x40)
+
+/* SCSI IO Reply SCSIState flags */
+
+#define MPI2_SCSI_STATE_RESPONSE_INFO_VALID     (0x10)
+#define MPI2_SCSI_STATE_TERMINATED              (0x08)
+#define MPI2_SCSI_STATE_NO_SCSI_STATUS          (0x04)
+#define MPI2_SCSI_STATE_AUTOSENSE_FAILED        (0x02)
+#define MPI2_SCSI_STATE_AUTOSENSE_VALID         (0x01)
+
+/* masks and shifts for the ResponseInfo field */
+
+#define MPI2_SCSI_RI_MASK_REASONCODE            (0x000000FF)
+#define MPI2_SCSI_RI_SHIFT_REASONCODE           (0)
+
+#define MPI2_SCSI_TASKTAG_UNKNOWN               (0xFFFF)
+
+
+/****************************************************************************
+*  SCSI Task Management messages
+****************************************************************************/
+
+/* SCSI Task Management Request Message */
+typedef struct _MPI2_SCSI_TASK_MANAGE_REQUEST
+{
+    U16                     DevHandle;                      /* 0x00 */
+    U8                      ChainOffset;                    /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U8                      Reserved1;                      /* 0x04 */
+    U8                      TaskType;                       /* 0x05 */
+    U8                      Reserved2;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved3;                      /* 0x0A */
+    U8                      LUN[8];                         /* 0x0C */
+    U32                     Reserved4[7];                   /* 0x14 */
+    U16                     TaskMID;                        /* 0x30 */
+    U16                     Reserved5;                      /* 0x32 */
+} MPI2_SCSI_TASK_MANAGE_REQUEST,
+  MPI2_POINTER PTR_MPI2_SCSI_TASK_MANAGE_REQUEST,
+  Mpi2SCSITaskManagementRequest_t,
+  MPI2_POINTER pMpi2SCSITaskManagementRequest_t;
+
+/* TaskType values */
+
+#define MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK           (0x01)
+#define MPI2_SCSITASKMGMT_TASKTYPE_ABRT_TASK_SET        (0x02)
+#define MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET         (0x03)
+#define MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET   (0x05)
+#define MPI2_SCSITASKMGMT_TASKTYPE_CLEAR_TASK_SET       (0x06)
+#define MPI2_SCSITASKMGMT_TASKTYPE_QUERY_TASK           (0x07)
+#define MPI2_SCSITASKMGMT_TASKTYPE_CLR_ACA              (0x08)
+#define MPI2_SCSITASKMGMT_TASKTYPE_QRY_TASK_SET         (0x09)
+#define MPI2_SCSITASKMGMT_TASKTYPE_QRY_ASYNC_EVENT      (0x0A)
+
+/* obsolete TaskType name */
+#define MPI2_SCSITASKMGMT_TASKTYPE_QRY_UNIT_ATTENTION   (MPI2_SCSITASKMGMT_TASKTYPE_QRY_ASYNC_EVENT)
+
+/* MsgFlags bits */
+#define MPI2_SCSITASKMGMT_MSGFLAGS_MASK_TARGET_RESET      (0x18)
+#define MPI26_SCSITASKMGMT_MSGFLAGS_HOT_RESET_PCIE        (0x00)
+#define MPI2_SCSITASKMGMT_MSGFLAGS_LINK_RESET             (0x00)
+#define MPI2_SCSITASKMGMT_MSGFLAGS_DO_NOT_SEND_TASK_IU    (0x01)
+#define MPI2_SCSITASKMGMT_MSGFLAGS_NEXUS_RESET_SRST       (0x08)
+#define MPI2_SCSITASKMGMT_MSGFLAGS_SAS_HARD_LINK_RESET    (0x10)
+#define MPI26_SCSITASKMGMT_MSGFLAGS_PROTOCOL_LVL_RST_PCIE (0x18)
+
+
+/* SCSI Task Management Reply Message */
+typedef struct _MPI2_SCSI_TASK_MANAGE_REPLY
+{
+    U16                     DevHandle;                      /* 0x00 */
+    U8                      MsgLength;                      /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U8                      ResponseCode;                   /* 0x04 */
+    U8                      TaskType;                       /* 0x05 */
+    U8                      Reserved1;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved2;                      /* 0x0A */
+    U16                     Reserved3;                      /* 0x0C */
+    U16                     IOCStatus;                      /* 0x0E */
+    U32                     IOCLogInfo;                     /* 0x10 */
+    U32                     TerminationCount;               /* 0x14 */
+    U32                     ResponseInfo;                   /* 0x18 */
+} MPI2_SCSI_TASK_MANAGE_REPLY,
+  MPI2_POINTER PTR_MPI2_SCSI_TASK_MANAGE_REPLY,
+  Mpi2SCSITaskManagementReply_t, MPI2_POINTER pMpi2SCSIManagementReply_t;
+
+/* ResponseCode values */
+
+#define MPI2_SCSITASKMGMT_RSP_TM_COMPLETE               (0x00)
+#define MPI2_SCSITASKMGMT_RSP_INVALID_FRAME             (0x02)
+#define MPI2_SCSITASKMGMT_RSP_TM_NOT_SUPPORTED          (0x04)
+#define MPI2_SCSITASKMGMT_RSP_TM_FAILED                 (0x05)
+#define MPI2_SCSITASKMGMT_RSP_TM_SUCCEEDED              (0x08)
+#define MPI2_SCSITASKMGMT_RSP_TM_INVALID_LUN            (0x09)
+#define MPI2_SCSITASKMGMT_RSP_TM_OVERLAPPED_TAG         (0x0A)
+#define MPI2_SCSITASKMGMT_RSP_IO_QUEUED_ON_IOC          (0x80)
+
+/* masks and shifts for the ResponseInfo field */
+
+#define MPI2_SCSITASKMGMT_RI_MASK_REASONCODE            (0x000000FF)
+#define MPI2_SCSITASKMGMT_RI_SHIFT_REASONCODE           (0)
+#define MPI2_SCSITASKMGMT_RI_MASK_ARI2                  (0x0000FF00)
+#define MPI2_SCSITASKMGMT_RI_SHIFT_ARI2                 (8)
+#define MPI2_SCSITASKMGMT_RI_MASK_ARI1                  (0x00FF0000)
+#define MPI2_SCSITASKMGMT_RI_SHIFT_ARI1                 (16)
+#define MPI2_SCSITASKMGMT_RI_MASK_ARI0                  (0xFF000000)
+#define MPI2_SCSITASKMGMT_RI_SHIFT_ARI0                 (24)
+
+
+/****************************************************************************
+*  SCSI Enclosure Processor messages
+****************************************************************************/
+
+/* SCSI Enclosure Processor Request Message */
+typedef struct _MPI2_SEP_REQUEST
+{
+    U16                     DevHandle;          /* 0x00 */
+    U8                      ChainOffset;        /* 0x02 */
+    U8                      Function;           /* 0x03 */
+    U8                      Action;             /* 0x04 */
+    U8                      Flags;              /* 0x05 */
+    U8                      Reserved1;          /* 0x06 */
+    U8                      MsgFlags;           /* 0x07 */
+    U8                      VP_ID;              /* 0x08 */
+    U8                      VF_ID;              /* 0x09 */
+    U16                     Reserved2;          /* 0x0A */
+    U32                     SlotStatus;         /* 0x0C */
+    U32                     Reserved3;          /* 0x10 */
+    U32                     Reserved4;          /* 0x14 */
+    U32                     Reserved5;          /* 0x18 */
+    U16                     Slot;               /* 0x1C */
+    U16                     EnclosureHandle;    /* 0x1E */
+} MPI2_SEP_REQUEST, MPI2_POINTER PTR_MPI2_SEP_REQUEST,
+  Mpi2SepRequest_t, MPI2_POINTER pMpi2SepRequest_t;
+
+/* Action defines */
+#define MPI2_SEP_REQ_ACTION_WRITE_STATUS                (0x00)
+#define MPI2_SEP_REQ_ACTION_READ_STATUS                 (0x01)
+
+/* Flags defines */
+#define MPI2_SEP_REQ_FLAGS_DEVHANDLE_ADDRESS            (0x00)
+#define MPI2_SEP_REQ_FLAGS_ENCLOSURE_SLOT_ADDRESS       (0x01)
+
+/* SlotStatus defines */
+#define MPI2_SEP_REQ_SLOTSTATUS_DEV_OFF                 (0x00080000) /* MPI v2.6 and newer */
+#define MPI2_SEP_REQ_SLOTSTATUS_REQUEST_REMOVE          (0x00040000)
+#define MPI2_SEP_REQ_SLOTSTATUS_IDENTIFY_REQUEST        (0x00020000)
+#define MPI2_SEP_REQ_SLOTSTATUS_REBUILD_STOPPED         (0x00000200)
+#define MPI2_SEP_REQ_SLOTSTATUS_HOT_SPARE               (0x00000100)
+#define MPI2_SEP_REQ_SLOTSTATUS_UNCONFIGURED            (0x00000080)
+#define MPI2_SEP_REQ_SLOTSTATUS_PREDICTED_FAULT         (0x00000040)
+#define MPI2_SEP_REQ_SLOTSTATUS_IN_CRITICAL_ARRAY       (0x00000010)
+#define MPI2_SEP_REQ_SLOTSTATUS_IN_FAILED_ARRAY         (0x00000008)
+#define MPI2_SEP_REQ_SLOTSTATUS_DEV_REBUILDING          (0x00000004)
+#define MPI2_SEP_REQ_SLOTSTATUS_DEV_FAULTY              (0x00000002)
+#define MPI2_SEP_REQ_SLOTSTATUS_NO_ERROR                (0x00000001)
+
+
+/* SCSI Enclosure Processor Reply Message */
+typedef struct _MPI2_SEP_REPLY
+{
+    U16                     DevHandle;          /* 0x00 */
+    U8                      MsgLength;          /* 0x02 */
+    U8                      Function;           /* 0x03 */
+    U8                      Action;             /* 0x04 */
+    U8                      Flags;              /* 0x05 */
+    U8                      Reserved1;          /* 0x06 */
+    U8                      MsgFlags;           /* 0x07 */
+    U8                      VP_ID;              /* 0x08 */
+    U8                      VF_ID;              /* 0x09 */
+    U16                     Reserved2;          /* 0x0A */
+    U16                     Reserved3;          /* 0x0C */
+    U16                     IOCStatus;          /* 0x0E */
+    U32                     IOCLogInfo;         /* 0x10 */
+    U32                     SlotStatus;         /* 0x14 */
+    U32                     Reserved4;          /* 0x18 */
+    U16                     Slot;               /* 0x1C */
+    U16                     EnclosureHandle;    /* 0x1E */
+} MPI2_SEP_REPLY, MPI2_POINTER PTR_MPI2_SEP_REPLY,
+  Mpi2SepReply_t, MPI2_POINTER pMpi2SepReply_t;
+
+/* SlotStatus defines */
+#define MPI2_SEP_REPLY_SLOTSTATUS_DEV_OFF               (0x00080000) /* MPI v2.6 and newer */
+#define MPI2_SEP_REPLY_SLOTSTATUS_REMOVE_READY          (0x00040000)
+#define MPI2_SEP_REPLY_SLOTSTATUS_IDENTIFY_REQUEST      (0x00020000)
+#define MPI2_SEP_REPLY_SLOTSTATUS_REBUILD_STOPPED       (0x00000200)
+#define MPI2_SEP_REPLY_SLOTSTATUS_HOT_SPARE             (0x00000100)
+#define MPI2_SEP_REPLY_SLOTSTATUS_UNCONFIGURED          (0x00000080)
+#define MPI2_SEP_REPLY_SLOTSTATUS_PREDICTED_FAULT       (0x00000040)
+#define MPI2_SEP_REPLY_SLOTSTATUS_IN_CRITICAL_ARRAY     (0x00000010)
+#define MPI2_SEP_REPLY_SLOTSTATUS_IN_FAILED_ARRAY       (0x00000008)
+#define MPI2_SEP_REPLY_SLOTSTATUS_DEV_REBUILDING        (0x00000004)
+#define MPI2_SEP_REPLY_SLOTSTATUS_DEV_FAULTY            (0x00000002)
+#define MPI2_SEP_REPLY_SLOTSTATUS_NO_ERROR              (0x00000001)
+
+
+#endif
+
+
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h b/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h
old mode 100755
new mode 100644
index 84d8cd27c7d48802fb7893221443a3f4f0df6c76..a43dbad1e307f4b27a900806ae2920f27854bfb4
--- a/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h
@@ -1,1880 +1,1880 @@
-/*
- *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
- *
- *
- *           Name:  mpi2_ioc.h
- *          Title:  MPI IOC, Port, Event, FW Download, and FW Upload messages
- *  Creation Date:  October 11, 2006
- *
- *  mpi2_ioc.h Version:  02.00.37
- *
- *  NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
- *        prefix are for use only on MPI v2.5 products, and must not be used
- *        with MPI v2.0 products. Unless otherwise noted, names beginning with
- *        MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products.
- *
- *  Version History
- *  ---------------
- *
- *  Date      Version   Description
- *  --------  --------  ------------------------------------------------------
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  06-04-07  02.00.01  In IOCFacts Reply structure, renamed MaxDevices to
- *                      MaxTargets.
- *                      Added TotalImageSize field to FWDownload Request.
- *                      Added reserved words to FWUpload Request.
- *  06-26-07  02.00.02  Added IR Configuration Change List Event.
- *  08-31-07  02.00.03  Removed SystemReplyQueueDepth field from the IOCInit
- *                      request and replaced it with
- *                      ReplyDescriptorPostQueueDepth and ReplyFreeQueueDepth.
- *                      Replaced the MinReplyQueueDepth field of the IOCFacts
- *                      reply with MaxReplyDescriptorPostQueueDepth.
- *                      Added MPI2_RDPQ_DEPTH_MIN define to specify the minimum
- *                      depth for the Reply Descriptor Post Queue.
- *                      Added SASAddress field to Initiator Device Table
- *                      Overflow Event data.
- *  10-31-07  02.00.04  Added ReasonCode MPI2_EVENT_SAS_INIT_RC_NOT_RESPONDING
- *                      for SAS Initiator Device Status Change Event data.
- *                      Modified Reason Code defines for SAS Topology Change
- *                      List Event data, including adding a bit for PHY Vacant
- *                      status, and adding a mask for the Reason Code.
- *                      Added define for
- *                      MPI2_EVENT_SAS_TOPO_ES_DELAY_NOT_RESPONDING.
- *                      Added define for MPI2_EXT_IMAGE_TYPE_MEGARAID.
- *  12-18-07  02.00.05  Added Boot Status defines for the IOCExceptions field of
- *                      the IOCFacts Reply.
- *                      Removed MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER define.
- *                      Moved MPI2_VERSION_UNION to mpi2.h.
- *                      Changed MPI2_EVENT_NOTIFICATION_REQUEST to use masks
- *                      instead of enables, and added SASBroadcastPrimitiveMasks
- *                      field.
- *                      Added Log Entry Added Event and related structure.
- *  02-29-08  02.00.06  Added define MPI2_IOCFACTS_CAPABILITY_INTEGRATED_RAID.
- *                      Removed define MPI2_IOCFACTS_PROTOCOL_SMP_TARGET.
- *                      Added MaxVolumes and MaxPersistentEntries fields to
- *                      IOCFacts reply.
- *                      Added ProtocalFlags and IOCCapabilities fields to
- *                      MPI2_FW_IMAGE_HEADER.
- *                      Removed MPI2_PORTENABLE_FLAGS_ENABLE_SINGLE_PORT.
- *  03-03-08  02.00.07  Fixed MPI2_FW_IMAGE_HEADER by changing Reserved26 to
- *                      a U16 (from a U32).
- *                      Removed extra 's' from EventMasks name.
- *  06-27-08  02.00.08  Fixed an offset in a comment.
- *  10-02-08  02.00.09  Removed SystemReplyFrameSize from MPI2_IOC_INIT_REQUEST.
- *                      Removed CurReplyFrameSize from MPI2_IOC_FACTS_REPLY and
- *                      renamed MinReplyFrameSize to ReplyFrameSize.
- *                      Added MPI2_IOCFACTS_EXCEPT_IR_FOREIGN_CONFIG_MAX.
- *                      Added two new RAIDOperation values for Integrated RAID
- *                      Operations Status Event data.
- *                      Added four new IR Configuration Change List Event data
- *                      ReasonCode values.
- *                      Added two new ReasonCode defines for SAS Device Status
- *                      Change Event data.
- *                      Added three new DiscoveryStatus bits for the SAS
- *                      Discovery event data.
- *                      Added Multiplexing Status Change bit to the PhyStatus
- *                      field of the SAS Topology Change List event data.
- *                      Removed define for MPI2_INIT_IMAGE_BOOTFLAGS_XMEMCOPY.
- *                      BootFlags are now product-specific.
- *                      Added defines for the indivdual signature bytes
- *                      for MPI2_INIT_IMAGE_FOOTER.
- *  01-19-09  02.00.10  Added MPI2_IOCFACTS_CAPABILITY_EVENT_REPLAY define.
- *                      Added MPI2_EVENT_SAS_DISC_DS_DOWNSTREAM_INITIATOR
- *                      define.
- *                      Added MPI2_EVENT_SAS_DEV_STAT_RC_SATA_INIT_FAILURE
- *                      define.
- *                      Removed MPI2_EVENT_SAS_DISC_DS_SATA_INIT_FAILURE define.
- *  05-06-09  02.00.11  Added MPI2_IOCFACTS_CAPABILITY_RAID_ACCELERATOR define.
- *                      Added MPI2_IOCFACTS_CAPABILITY_MSI_X_INDEX define.
- *                      Added two new reason codes for SAS Device Status Change
- *                      Event.
- *                      Added new event: SAS PHY Counter.
- *  07-30-09  02.00.12  Added GPIO Interrupt event define and structure.
- *                      Added MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER define.
- *                      Added new product id family for 2208.
- *  10-28-09  02.00.13  Added HostMSIxVectors field to MPI2_IOC_INIT_REQUEST.
- *                      Added MaxMSIxVectors field to MPI2_IOC_FACTS_REPLY.
- *                      Added MinDevHandle field to MPI2_IOC_FACTS_REPLY.
- *                      Added MPI2_IOCFACTS_CAPABILITY_HOST_BASED_DISCOVERY.
- *                      Added MPI2_EVENT_HOST_BASED_DISCOVERY_PHY define.
- *                      Added MPI2_EVENT_SAS_TOPO_ES_NO_EXPANDER define.
- *                      Added Host Based Discovery Phy Event data.
- *                      Added defines for ProductID Product field
- *                      (MPI2_FW_HEADER_PID_).
- *                      Modified values for SAS ProductID Family
- *                      (MPI2_FW_HEADER_PID_FAMILY_).
- *  02-10-10  02.00.14  Added SAS Quiesce Event structure and defines.
- *                      Added PowerManagementControl Request structures and
- *                      defines.
- *  05-12-10  02.00.15  Marked Task Set Full Event as obsolete.
- *                      Added MPI2_EVENT_SAS_TOPO_LR_UNSUPPORTED_PHY define.
- *  11-10-10  02.00.16  Added MPI2_FW_DOWNLOAD_ITYPE_MIN_PRODUCT_SPECIFIC.
- *  02-23-11  02.00.17  Added SAS NOTIFY Primitive event, and added
- *                      SASNotifyPrimitiveMasks field to
- *                      MPI2_EVENT_NOTIFICATION_REQUEST.
- *                      Added Temperature Threshold Event.
- *                      Added Host Message Event.
- *                      Added Send Host Message request and reply.
- *  05-25-11  02.00.18  For Extended Image Header, added
- *                      MPI2_EXT_IMAGE_TYPE_MIN_PRODUCT_SPECIFIC and
- *                      MPI2_EXT_IMAGE_TYPE_MAX_PRODUCT_SPECIFIC defines.
- *                      Deprecated MPI2_EXT_IMAGE_TYPE_MAX define.
- *  08-24-11  02.00.19  Added PhysicalPort field to
- *                      MPI2_EVENT_DATA_SAS_DEVICE_STATUS_CHANGE structure.
- *                      Marked MPI2_PM_CONTROL_FEATURE_PCIE_LINK as obsolete.
- *  11-18-11  02.00.20  Incorporating additions for MPI v2.5.
- *  03-29-12  02.00.21  Added a product specific range to event values.
- *  07-26-12  02.00.22  Added MPI2_IOCFACTS_EXCEPT_PARTIAL_MEMORY_FAILURE.
- *                      Added ElapsedSeconds field to
- *                      MPI2_EVENT_DATA_IR_OPERATION_STATUS.
- *  08-19-13  02.00.23  For IOCInit, added MPI2_IOCINIT_MSGFLAG_RDPQ_ARRAY_MODE
- *                      and MPI2_IOC_INIT_RDPQ_ARRAY_ENTRY.
- *                      Added MPI2_IOCFACTS_CAPABILITY_RDPQ_ARRAY_CAPABLE.
- *                      Added MPI2_FW_DOWNLOAD_ITYPE_PUBLIC_KEY.
- *                      Added Encrypted Hash Extended Image.
- *  12-05-13  02.00.24  Added MPI25_HASH_IMAGE_TYPE_BIOS.
- *  11-18-14  02.00.25  Updated copyright information.
- *  03-16-15  02.00.26  Updated for MPI v2.6.
- *                      Added MPI2_EVENT_ACTIVE_CABLE_EXCEPTION and
- *                      MPI26_EVENT_DATA_ACTIVE_CABLE_EXCEPT.
- *                      Added MPI2_EVENT_PCIE_LINK_COUNTER and
- *                      MPI26_EVENT_DATA_PCIE_LINK_COUNTER.
- *                      Added MPI26_CTRL_OP_SHUTDOWN.
- *                      Added MPI26_CTRL_OP_LINK_CLEAR_ERROR_LOG
- *                      Added MPI26_FW_HEADER_PID_FAMILY_3324_SAS and
- *                      MPI26_FW_HEADER_PID_FAMILY_3516_SAS.
- *  08-25-15  02.00.27  Added IC ARCH Class based signature defines.
- *                      Added MPI26_EVENT_PCIE_ENUM_ES_RESOURCES_EXHAUSTED event.
- *                      Added ConigurationFlags field to IOCInit message to
- *                      support NVMe SGL format control.
- *                      Added PCIe SRIOV support.
- * 02-17-16   02.00.28  Added SAS 4 22.5 gbs speed support.
- *                      Added PCIe 4 16.0 GT/sec speec support.
- *                      Removed AHCI support.
- *                      Removed SOP support.
- * 07-01-16   02.00.29  Added Archclass for 4008 product.
- *                      Added IOCException MPI2_IOCFACTS_EXCEPT_PCIE_DISABLED
- * 08-23-16   02.00.30  Added new defines for the ImageType field of FWDownload
- *                      Request Message.
- *                      Added new defines for the ImageType field of FWUpload
- *                      Request Message.
- *                      Added new values for the RegionType field in the Layout
- *                      Data sections of the FLASH Layout Extended Image Data.
- *                      Added new defines for the ReasonCode field of
- *                      Active Cable Exception Event.
- *                      Added MPI2_EVENT_ENCL_DEVICE_STATUS_CHANGE and
- *                      MPI26_EVENT_DATA_ENCL_DEV_STATUS_CHANGE.
- * 11-23-16   02.00.31  Added MPI2_EVENT_SAS_DEVICE_DISCOVERY_ERROR and
- *                      MPI25_EVENT_DATA_SAS_DEVICE_DISCOVERY_ERROR.
- * 02-02-17   02.00.32  Added MPI2_FW_DOWNLOAD_ITYPE_CBB_BACKUP.
- *                      Added MPI25_EVENT_DATA_ACTIVE_CABLE_EXCEPT and related
- *                      defines for the ReasonCode field.
- * 06-13-17   02.00.33  Added MPI2_FW_DOWNLOAD_ITYPE_CPLD.
- * 09-29-17   02.00.34  Added MPI26_EVENT_PCIDEV_STAT_RC_PCIE_HOT_RESET_FAILED
- *                      to the ReasonCode field in PCIe Device Status Change
- *                      Event Data.
- * 07-22-18   02.00.35  Added FW_DOWNLOAD_ITYPE_CPLD and _PSOC.
- *                      Moved FW image definitions ionto new mpi2_image,h
- * 08-14-18   02.00.36  Fixed definition of MPI2_FW_DOWNLOAD_ITYPE_PSOC (0x16)
- * 09-07-18   02.00.37  Added MPI26_EVENT_PCIE_TOPO_PI_16_LANES
- * 10-02-19   02.00.38  Added MPI26_IOCINIT_CFGFLAGS_COREDUMP_ENABLE
- *                      Added MPI26_IOCFACTS_CAPABILITY_COREDUMP_ENABLED
- *                      Added MPI2_FW_DOWNLOAD_ITYPE_COREDUMP
- *                      Added MPI2_FW_UPLOAD_ITYPE_COREDUMP
- *  --------------------------------------------------------------------------
- */
-
-#ifndef MPI2_IOC_H
-#define MPI2_IOC_H
-
-/*****************************************************************************
-*
-*               IOC Messages
-*
-*****************************************************************************/
-
-/****************************************************************************
-*  IOCInit message
-****************************************************************************/
-
-/* IOCInit Request message */
-typedef struct _MPI2_IOC_INIT_REQUEST
-{
-    U8                      WhoInit;                        /* 0x00 */
-    U8                      Reserved1;                      /* 0x01 */
-    U8                      ChainOffset;                    /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     Reserved2;                      /* 0x04 */
-    U8                      Reserved3;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved4;                      /* 0x0A */
-    U16                     MsgVersion;                     /* 0x0C */
-    U16                     HeaderVersion;                  /* 0x0E */
-    U32                     Reserved5;                      /* 0x10 */
-    U16                     ConfigurationFlags;             /* 0x14 */
-    U8                      HostPageSize;                   /* 0x16 */
-    U8                      HostMSIxVectors;                /* 0x17 */
-    U16                     Reserved8;                      /* 0x18 */
-    U16                     SystemRequestFrameSize;         /* 0x1A */
-    U16                     ReplyDescriptorPostQueueDepth;  /* 0x1C */
-    U16                     ReplyFreeQueueDepth;            /* 0x1E */
-    U32                     SenseBufferAddressHigh;         /* 0x20 */
-    U32                     SystemReplyAddressHigh;         /* 0x24 */
-    U64                     SystemRequestFrameBaseAddress;  /* 0x28 */
-    U64                     ReplyDescriptorPostQueueAddress;/* 0x30 */
-    U64                     ReplyFreeQueueAddress;          /* 0x38 */
-    U64                     TimeStamp;                      /* 0x40 */
-} MPI2_IOC_INIT_REQUEST, MPI2_POINTER PTR_MPI2_IOC_INIT_REQUEST,
-  Mpi2IOCInitRequest_t, MPI2_POINTER pMpi2IOCInitRequest_t;
-
-/* WhoInit values */
-#define MPI2_WHOINIT_NOT_INITIALIZED            (0x00)
-#define MPI2_WHOINIT_SYSTEM_BIOS                (0x01)
-#define MPI2_WHOINIT_ROM_BIOS                   (0x02)
-#define MPI2_WHOINIT_PCI_PEER                   (0x03)
-#define MPI2_WHOINIT_HOST_DRIVER                (0x04)
-#define MPI2_WHOINIT_MANUFACTURER               (0x05)
-
-/* MsgFlags */
-#define MPI2_IOCINIT_MSGFLAG_RDPQ_ARRAY_MODE    (0x01)
-
-/* MsgVersion */
-#define MPI2_IOCINIT_MSGVERSION_MAJOR_MASK      (0xFF00)
-#define MPI2_IOCINIT_MSGVERSION_MAJOR_SHIFT     (8)
-#define MPI2_IOCINIT_MSGVERSION_MINOR_MASK      (0x00FF)
-#define MPI2_IOCINIT_MSGVERSION_MINOR_SHIFT     (0)
-
-/* HeaderVersion */
-#define MPI2_IOCINIT_HDRVERSION_UNIT_MASK       (0xFF00)
-#define MPI2_IOCINIT_HDRVERSION_UNIT_SHIFT      (8)
-#define MPI2_IOCINIT_HDRVERSION_DEV_MASK        (0x00FF)
-#define MPI2_IOCINIT_HDRVERSION_DEV_SHIFT       (0)
-
-/* ConfigurationFlags */
-#define MPI26_IOCINIT_CFGFLAGS_NVME_SGL_FORMAT  (0x0001)
-#define MPI26_IOCINIT_CFGFLAGS_COREDUMP_ENABLE  (0x0002)
-
-/* minimum depth for a Reply Descriptor Post Queue */
-#define MPI2_RDPQ_DEPTH_MIN                     (16)
-
-/* Reply Descriptor Post Queue Array Entry */
-typedef struct _MPI2_IOC_INIT_RDPQ_ARRAY_ENTRY
-{
-    U64                 RDPQBaseAddress;                    /* 0x00 */
-    U32                 Reserved1;                          /* 0x08 */
-    U32                 Reserved2;                          /* 0x0C */
-} MPI2_IOC_INIT_RDPQ_ARRAY_ENTRY,
-  MPI2_POINTER PTR_MPI2_IOC_INIT_RDPQ_ARRAY_ENTRY,
-  Mpi2IOCInitRDPQArrayEntry, MPI2_POINTER pMpi2IOCInitRDPQArrayEntry;
-
-/* IOCInit Reply message */
-typedef struct _MPI2_IOC_INIT_REPLY
-{
-    U8                      WhoInit;                        /* 0x00 */
-    U8                      Reserved1;                      /* 0x01 */
-    U8                      MsgLength;                      /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     Reserved2;                      /* 0x04 */
-    U8                      Reserved3;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved4;                      /* 0x0A */
-    U16                     Reserved5;                      /* 0x0C */
-    U16                     IOCStatus;                      /* 0x0E */
-    U32                     IOCLogInfo;                     /* 0x10 */
-} MPI2_IOC_INIT_REPLY, MPI2_POINTER PTR_MPI2_IOC_INIT_REPLY,
-  Mpi2IOCInitReply_t, MPI2_POINTER pMpi2IOCInitReply_t;
-
-
-/****************************************************************************
-*  IOCFacts message
-****************************************************************************/
-
-/* IOCFacts Request message */
-typedef struct _MPI2_IOC_FACTS_REQUEST
-{
-    U16                     Reserved1;                      /* 0x00 */
-    U8                      ChainOffset;                    /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     Reserved2;                      /* 0x04 */
-    U8                      Reserved3;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved4;                      /* 0x0A */
-} MPI2_IOC_FACTS_REQUEST, MPI2_POINTER PTR_MPI2_IOC_FACTS_REQUEST,
-  Mpi2IOCFactsRequest_t, MPI2_POINTER pMpi2IOCFactsRequest_t;
-
-
-/* IOCFacts Reply message */
-typedef struct _MPI2_IOC_FACTS_REPLY
-{
-    U16                     MsgVersion;                     /* 0x00 */
-    U8                      MsgLength;                      /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     HeaderVersion;                  /* 0x04 */
-    U8                      IOCNumber;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved1;                      /* 0x0A */
-    U16                     IOCExceptions;                  /* 0x0C */
-    U16                     IOCStatus;                      /* 0x0E */
-    U32                     IOCLogInfo;                     /* 0x10 */
-    U8                      MaxChainDepth;                  /* 0x14 */
-    U8                      WhoInit;                        /* 0x15 */
-    U8                      NumberOfPorts;                  /* 0x16 */
-    U8                      MaxMSIxVectors;                 /* 0x17 */
-    U16                     RequestCredit;                  /* 0x18 */
-    U16                     ProductID;                      /* 0x1A */
-    U32                     IOCCapabilities;                /* 0x1C */
-    MPI2_VERSION_UNION      FWVersion;                      /* 0x20 */
-    U16                     IOCRequestFrameSize;            /* 0x24 */
-    U16                     IOCMaxChainSegmentSize;         /* 0x26 */ /* MPI 2.5 only; Reserved in MPI 2.0 */
-    U16                     MaxInitiators;                  /* 0x28 */
-    U16                     MaxTargets;                     /* 0x2A */
-    U16                     MaxSasExpanders;                /* 0x2C */
-    U16                     MaxEnclosures;                  /* 0x2E */
-    U16                     ProtocolFlags;                  /* 0x30 */
-    U16                     HighPriorityCredit;             /* 0x32 */
-    U16                     MaxReplyDescriptorPostQueueDepth; /* 0x34 */
-    U8                      ReplyFrameSize;                 /* 0x36 */
-    U8                      MaxVolumes;                     /* 0x37 */
-    U16                     MaxDevHandle;                   /* 0x38 */
-    U16                     MaxPersistentEntries;           /* 0x3A */
-    U16                     MinDevHandle;                   /* 0x3C */
-    U8                      CurrentHostPageSize;            /* 0x3E */
-    U8                      Reserved4;                      /* 0x3F */
-    U8                      SGEModifierMask;                /* 0x40 */
-    U8                      SGEModifierValue;               /* 0x41 */
-    U8                      SGEModifierShift;               /* 0x42 */
-    U8                      Reserved5;                      /* 0x43 */
-} MPI2_IOC_FACTS_REPLY, MPI2_POINTER PTR_MPI2_IOC_FACTS_REPLY,
-  Mpi2IOCFactsReply_t, MPI2_POINTER pMpi2IOCFactsReply_t;
-
-/* MsgVersion */
-#define MPI2_IOCFACTS_MSGVERSION_MAJOR_MASK             (0xFF00)
-#define MPI2_IOCFACTS_MSGVERSION_MAJOR_SHIFT            (8)
-#define MPI2_IOCFACTS_MSGVERSION_MINOR_MASK             (0x00FF)
-#define MPI2_IOCFACTS_MSGVERSION_MINOR_SHIFT            (0)
-
-/* HeaderVersion */
-#define MPI2_IOCFACTS_HDRVERSION_UNIT_MASK              (0xFF00)
-#define MPI2_IOCFACTS_HDRVERSION_UNIT_SHIFT             (8)
-#define MPI2_IOCFACTS_HDRVERSION_DEV_MASK               (0x00FF)
-#define MPI2_IOCFACTS_HDRVERSION_DEV_SHIFT              (0)
-
-/* IOCExceptions */
-#define MPI2_IOCFACTS_EXCEPT_PCIE_DISABLED              (0x0400)
-#define MPI2_IOCFACTS_EXCEPT_PARTIAL_MEMORY_FAILURE     (0x0200)
-#define MPI2_IOCFACTS_EXCEPT_IR_FOREIGN_CONFIG_MAX      (0x0100)
-
-#define MPI2_IOCFACTS_EXCEPT_BOOTSTAT_MASK              (0x00E0)
-#define MPI2_IOCFACTS_EXCEPT_BOOTSTAT_GOOD              (0x0000)
-#define MPI2_IOCFACTS_EXCEPT_BOOTSTAT_BACKUP            (0x0020)
-#define MPI2_IOCFACTS_EXCEPT_BOOTSTAT_RESTORED          (0x0040)
-#define MPI2_IOCFACTS_EXCEPT_BOOTSTAT_CORRUPT_BACKUP    (0x0060)
-
-#define MPI2_IOCFACTS_EXCEPT_METADATA_UNSUPPORTED       (0x0010)
-#define MPI2_IOCFACTS_EXCEPT_MANUFACT_CHECKSUM_FAIL     (0x0008)
-#define MPI2_IOCFACTS_EXCEPT_FW_CHECKSUM_FAIL           (0x0004)
-#define MPI2_IOCFACTS_EXCEPT_RAID_CONFIG_INVALID        (0x0002)
-#define MPI2_IOCFACTS_EXCEPT_CONFIG_CHECKSUM_FAIL       (0x0001)
-
-/* defines for WhoInit field are after the IOCInit Request */
-
-/* ProductID field uses MPI2_FW_HEADER_PID_ */
-
-/* IOCCapabilities */
-#define MPI26_IOCFACTS_CAPABILITY_COREDUMP_ENABLED      (0x00200000)
-#define MPI26_IOCFACTS_CAPABILITY_PCIE_SRIOV            (0x00100000)
-#define MPI26_IOCFACTS_CAPABILITY_ATOMIC_REQ            (0x00080000)
-#define MPI2_IOCFACTS_CAPABILITY_RDPQ_ARRAY_CAPABLE     (0x00040000)
-#define MPI25_IOCFACTS_CAPABILITY_FAST_PATH_CAPABLE     (0x00020000)
-#define MPI2_IOCFACTS_CAPABILITY_HOST_BASED_DISCOVERY   (0x00010000)
-#define MPI2_IOCFACTS_CAPABILITY_MSI_X_INDEX            (0x00008000)
-#define MPI2_IOCFACTS_CAPABILITY_RAID_ACCELERATOR       (0x00004000)
-#define MPI2_IOCFACTS_CAPABILITY_EVENT_REPLAY           (0x00002000)
-#define MPI2_IOCFACTS_CAPABILITY_INTEGRATED_RAID        (0x00001000)
-#define MPI2_IOCFACTS_CAPABILITY_TLR                    (0x00000800)
-#define MPI2_IOCFACTS_CAPABILITY_MULTICAST              (0x00000100)
-#define MPI2_IOCFACTS_CAPABILITY_BIDIRECTIONAL_TARGET   (0x00000080)
-#define MPI2_IOCFACTS_CAPABILITY_EEDP                   (0x00000040)
-#define MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER        (0x00000020)
-#define MPI2_IOCFACTS_CAPABILITY_SNAPSHOT_BUFFER        (0x00000010)
-#define MPI2_IOCFACTS_CAPABILITY_DIAG_TRACE_BUFFER      (0x00000008)
-#define MPI2_IOCFACTS_CAPABILITY_TASK_SET_FULL_HANDLING (0x00000004)
-
-/* ProtocolFlags */
-#define MPI2_IOCFACTS_PROTOCOL_NVME_DEVICES             (0x0008) /* MPI v2.6 and later */
-#define MPI2_IOCFACTS_PROTOCOL_SCSI_INITIATOR           (0x0002)
-#define MPI2_IOCFACTS_PROTOCOL_SCSI_TARGET              (0x0001)
-
-
-/****************************************************************************
-*  PortFacts message
-****************************************************************************/
-
-/* PortFacts Request message */
-typedef struct _MPI2_PORT_FACTS_REQUEST
-{
-    U16                     Reserved1;                      /* 0x00 */
-    U8                      ChainOffset;                    /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     Reserved2;                      /* 0x04 */
-    U8                      PortNumber;                     /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved3;                      /* 0x0A */
-} MPI2_PORT_FACTS_REQUEST, MPI2_POINTER PTR_MPI2_PORT_FACTS_REQUEST,
-  Mpi2PortFactsRequest_t, MPI2_POINTER pMpi2PortFactsRequest_t;
-
-/* PortFacts Reply message */
-typedef struct _MPI2_PORT_FACTS_REPLY
-{
-    U16                     Reserved1;                      /* 0x00 */
-    U8                      MsgLength;                      /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     Reserved2;                      /* 0x04 */
-    U8                      PortNumber;                     /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved3;                      /* 0x0A */
-    U16                     Reserved4;                      /* 0x0C */
-    U16                     IOCStatus;                      /* 0x0E */
-    U32                     IOCLogInfo;                     /* 0x10 */
-    U8                      Reserved5;                      /* 0x14 */
-    U8                      PortType;                       /* 0x15 */
-    U16                     Reserved6;                      /* 0x16 */
-    U16                     MaxPostedCmdBuffers;            /* 0x18 */
-    U16                     Reserved7;                      /* 0x1A */
-} MPI2_PORT_FACTS_REPLY, MPI2_POINTER PTR_MPI2_PORT_FACTS_REPLY,
-  Mpi2PortFactsReply_t, MPI2_POINTER pMpi2PortFactsReply_t;
-
-/* PortType values */
-#define MPI2_PORTFACTS_PORTTYPE_INACTIVE            (0x00)
-#define MPI2_PORTFACTS_PORTTYPE_FC                  (0x10)
-#define MPI2_PORTFACTS_PORTTYPE_ISCSI               (0x20)
-#define MPI2_PORTFACTS_PORTTYPE_SAS_PHYSICAL        (0x30)
-#define MPI2_PORTFACTS_PORTTYPE_SAS_VIRTUAL         (0x31)
-#define MPI2_PORTFACTS_PORTTYPE_TRI_MODE            (0x40) /* MPI v2.6 and later */
-
-
-/****************************************************************************
-*  PortEnable message
-****************************************************************************/
-
-/* PortEnable Request message */
-typedef struct _MPI2_PORT_ENABLE_REQUEST
-{
-    U16                     Reserved1;                      /* 0x00 */
-    U8                      ChainOffset;                    /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U8                      Reserved2;                      /* 0x04 */
-    U8                      PortFlags;                      /* 0x05 */
-    U8                      Reserved3;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved4;                      /* 0x0A */
-} MPI2_PORT_ENABLE_REQUEST, MPI2_POINTER PTR_MPI2_PORT_ENABLE_REQUEST,
-  Mpi2PortEnableRequest_t, MPI2_POINTER pMpi2PortEnableRequest_t;
-
-
-/* PortEnable Reply message */
-typedef struct _MPI2_PORT_ENABLE_REPLY
-{
-    U16                     Reserved1;                      /* 0x00 */
-    U8                      MsgLength;                      /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U8                      Reserved2;                      /* 0x04 */
-    U8                      PortFlags;                      /* 0x05 */
-    U8                      Reserved3;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved4;                      /* 0x0A */
-    U16                     Reserved5;                      /* 0x0C */
-    U16                     IOCStatus;                      /* 0x0E */
-    U32                     IOCLogInfo;                     /* 0x10 */
-} MPI2_PORT_ENABLE_REPLY, MPI2_POINTER PTR_MPI2_PORT_ENABLE_REPLY,
-  Mpi2PortEnableReply_t, MPI2_POINTER pMpi2PortEnableReply_t;
-
-
-/****************************************************************************
-*  EventNotification message
-****************************************************************************/
-
-/* EventNotification Request message */
-#define MPI2_EVENT_NOTIFY_EVENTMASK_WORDS           (4)
-
-typedef struct _MPI2_EVENT_NOTIFICATION_REQUEST
-{
-    U16                     Reserved1;                      /* 0x00 */
-    U8                      ChainOffset;                    /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     Reserved2;                      /* 0x04 */
-    U8                      Reserved3;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved4;                      /* 0x0A */
-    U32                     Reserved5;                      /* 0x0C */
-    U32                     Reserved6;                      /* 0x10 */
-    U32                     EventMasks[MPI2_EVENT_NOTIFY_EVENTMASK_WORDS];/* 0x14 */
-    U16                     SASBroadcastPrimitiveMasks;     /* 0x24 */
-    U16                     SASNotifyPrimitiveMasks;        /* 0x26 */
-    U32                     Reserved8;                      /* 0x28 */
-} MPI2_EVENT_NOTIFICATION_REQUEST,
-  MPI2_POINTER PTR_MPI2_EVENT_NOTIFICATION_REQUEST,
-  Mpi2EventNotificationRequest_t, MPI2_POINTER pMpi2EventNotificationRequest_t;
-
-
-/* EventNotification Reply message */
-typedef struct _MPI2_EVENT_NOTIFICATION_REPLY
-{
-    U16                     EventDataLength;                /* 0x00 */
-    U8                      MsgLength;                      /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     Reserved1;                      /* 0x04 */
-    U8                      AckRequired;                    /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved2;                      /* 0x0A */
-    U16                     Reserved3;                      /* 0x0C */
-    U16                     IOCStatus;                      /* 0x0E */
-    U32                     IOCLogInfo;                     /* 0x10 */
-    U16                     Event;                          /* 0x14 */
-    U16                     Reserved4;                      /* 0x16 */
-    U32                     EventContext;                   /* 0x18 */
-    U32                     EventData[1];                   /* 0x1C */
-} MPI2_EVENT_NOTIFICATION_REPLY, MPI2_POINTER PTR_MPI2_EVENT_NOTIFICATION_REPLY,
-  Mpi2EventNotificationReply_t, MPI2_POINTER pMpi2EventNotificationReply_t;
-
-/* AckRequired */
-#define MPI2_EVENT_NOTIFICATION_ACK_NOT_REQUIRED    (0x00)
-#define MPI2_EVENT_NOTIFICATION_ACK_REQUIRED        (0x01)
-
-/* Event */
-#define MPI2_EVENT_LOG_DATA                         (0x0001)
-#define MPI2_EVENT_STATE_CHANGE                     (0x0002)
-#define MPI2_EVENT_HARD_RESET_RECEIVED              (0x0005)
-#define MPI2_EVENT_EVENT_CHANGE                     (0x000A)
-#define MPI2_EVENT_TASK_SET_FULL                    (0x000E) /* obsolete */
-#define MPI2_EVENT_SAS_DEVICE_STATUS_CHANGE         (0x000F)
-#define MPI2_EVENT_IR_OPERATION_STATUS              (0x0014)
-#define MPI2_EVENT_SAS_DISCOVERY                    (0x0016)
-#define MPI2_EVENT_SAS_BROADCAST_PRIMITIVE          (0x0017)
-#define MPI2_EVENT_SAS_INIT_DEVICE_STATUS_CHANGE    (0x0018)
-#define MPI2_EVENT_SAS_INIT_TABLE_OVERFLOW          (0x0019)
-#define MPI2_EVENT_SAS_TOPOLOGY_CHANGE_LIST         (0x001C)
-#define MPI2_EVENT_SAS_ENCL_DEVICE_STATUS_CHANGE    (0x001D)
-#define MPI2_EVENT_ENCL_DEVICE_STATUS_CHANGE        (0x001D) /* MPI v2.6 and later */
-#define MPI2_EVENT_IR_VOLUME                        (0x001E)
-#define MPI2_EVENT_IR_PHYSICAL_DISK                 (0x001F)
-#define MPI2_EVENT_IR_CONFIGURATION_CHANGE_LIST     (0x0020)
-#define MPI2_EVENT_LOG_ENTRY_ADDED                  (0x0021)
-#define MPI2_EVENT_SAS_PHY_COUNTER                  (0x0022)
-#define MPI2_EVENT_GPIO_INTERRUPT                   (0x0023)
-#define MPI2_EVENT_HOST_BASED_DISCOVERY_PHY         (0x0024)
-#define MPI2_EVENT_SAS_QUIESCE                      (0x0025)
-#define MPI2_EVENT_SAS_NOTIFY_PRIMITIVE             (0x0026)
-#define MPI2_EVENT_TEMP_THRESHOLD                   (0x0027)
-#define MPI2_EVENT_HOST_MESSAGE                     (0x0028)
-#define MPI2_EVENT_POWER_PERFORMANCE_CHANGE         (0x0029)
-#define MPI2_EVENT_PCIE_DEVICE_STATUS_CHANGE        (0x0030) /* MPI v2.6 and later */
-#define MPI2_EVENT_PCIE_ENUMERATION                 (0x0031) /* MPI v2.6 and later */
-#define MPI2_EVENT_PCIE_TOPOLOGY_CHANGE_LIST        (0x0032) /* MPI v2.6 and later */
-#define MPI2_EVENT_PCIE_LINK_COUNTER                (0x0033) /* MPI v2.6 and later */
-#define MPI2_EVENT_ACTIVE_CABLE_EXCEPTION           (0x0034) /* MPI v2.6 and later */
-#define MPI2_EVENT_SAS_DEVICE_DISCOVERY_ERROR       (0x0035) /* MPI v2.5 and later */
-#define MPI2_EVENT_MIN_PRODUCT_SPECIFIC             (0x006E)
-#define MPI2_EVENT_MAX_PRODUCT_SPECIFIC             (0x007F)
-
-
-/* Log Entry Added Event data */
-
-/* the following structure matches MPI2_LOG_0_ENTRY in mpi2_cnfg.h */
-#define MPI2_EVENT_DATA_LOG_DATA_LENGTH             (0x1C)
-
-typedef struct _MPI2_EVENT_DATA_LOG_ENTRY_ADDED
-{
-    U64         TimeStamp;                          /* 0x00 */
-    U32         Reserved1;                          /* 0x08 */
-    U16         LogSequence;                        /* 0x0C */
-    U16         LogEntryQualifier;                  /* 0x0E */
-    U8          VP_ID;                              /* 0x10 */
-    U8          VF_ID;                              /* 0x11 */
-    U16         Reserved2;                          /* 0x12 */
-    U8          LogData[MPI2_EVENT_DATA_LOG_DATA_LENGTH];/* 0x14 */
-} MPI2_EVENT_DATA_LOG_ENTRY_ADDED,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_LOG_ENTRY_ADDED,
-  Mpi2EventDataLogEntryAdded_t, MPI2_POINTER pMpi2EventDataLogEntryAdded_t;
-
-
-/* GPIO Interrupt Event data */
-
-typedef struct _MPI2_EVENT_DATA_GPIO_INTERRUPT
-{
-    U8          GPIONum;                            /* 0x00 */
-    U8          Reserved1;                          /* 0x01 */
-    U16         Reserved2;                          /* 0x02 */
-} MPI2_EVENT_DATA_GPIO_INTERRUPT,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_GPIO_INTERRUPT,
-  Mpi2EventDataGpioInterrupt_t, MPI2_POINTER pMpi2EventDataGpioInterrupt_t;
-
-
-/* Temperature Threshold Event data */
-
-typedef struct _MPI2_EVENT_DATA_TEMPERATURE
-{
-    U16         Status;                             /* 0x00 */
-    U8          SensorNum;                          /* 0x02 */
-    U8          Reserved1;                          /* 0x03 */
-    U16         CurrentTemperature;                 /* 0x04 */
-    U16         Reserved2;                          /* 0x06 */
-    U32         Reserved3;                          /* 0x08 */
-    U32         Reserved4;                          /* 0x0C */
-} MPI2_EVENT_DATA_TEMPERATURE,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_TEMPERATURE,
-  Mpi2EventDataTemperature_t, MPI2_POINTER pMpi2EventDataTemperature_t;
-
-/* Temperature Threshold Event data Status bits */
-#define MPI2_EVENT_TEMPERATURE3_EXCEEDED            (0x0008)
-#define MPI2_EVENT_TEMPERATURE2_EXCEEDED            (0x0004)
-#define MPI2_EVENT_TEMPERATURE1_EXCEEDED            (0x0002)
-#define MPI2_EVENT_TEMPERATURE0_EXCEEDED            (0x0001)
-
-
-/* Host Message Event data */
-
-typedef struct _MPI2_EVENT_DATA_HOST_MESSAGE
-{
-    U8          SourceVF_ID;                        /* 0x00 */
-    U8          Reserved1;                          /* 0x01 */
-    U16         Reserved2;                          /* 0x02 */
-    U32         Reserved3;                          /* 0x04 */
-    U32         HostData[1];                        /* 0x08 */
-} MPI2_EVENT_DATA_HOST_MESSAGE, MPI2_POINTER PTR_MPI2_EVENT_DATA_HOST_MESSAGE,
-  Mpi2EventDataHostMessage_t, MPI2_POINTER pMpi2EventDataHostMessage_t;
-
-
-/* Power Performance Change Event data */
-
-typedef struct _MPI2_EVENT_DATA_POWER_PERF_CHANGE
-{
-    U8          CurrentPowerMode;                   /* 0x00 */
-    U8          PreviousPowerMode;                  /* 0x01 */
-    U16         Reserved1;                          /* 0x02 */
-} MPI2_EVENT_DATA_POWER_PERF_CHANGE,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_POWER_PERF_CHANGE,
-  Mpi2EventDataPowerPerfChange_t, MPI2_POINTER pMpi2EventDataPowerPerfChange_t;
-
-/* defines for CurrentPowerMode and PreviousPowerMode fields */
-#define MPI2_EVENT_PM_INIT_MASK              (0xC0)
-#define MPI2_EVENT_PM_INIT_UNAVAILABLE       (0x00)
-#define MPI2_EVENT_PM_INIT_HOST              (0x40)
-#define MPI2_EVENT_PM_INIT_IO_UNIT           (0x80)
-#define MPI2_EVENT_PM_INIT_PCIE_DPA          (0xC0)
-
-#define MPI2_EVENT_PM_MODE_MASK              (0x07)
-#define MPI2_EVENT_PM_MODE_UNAVAILABLE       (0x00)
-#define MPI2_EVENT_PM_MODE_UNKNOWN           (0x01)
-#define MPI2_EVENT_PM_MODE_FULL_POWER        (0x04)
-#define MPI2_EVENT_PM_MODE_REDUCED_POWER     (0x05)
-#define MPI2_EVENT_PM_MODE_STANDBY           (0x06)
-
-
-/* Active Cable Exception Event data */
-
-typedef struct _MPI26_EVENT_DATA_ACTIVE_CABLE_EXCEPT
-{
-    U32         ActiveCablePowerRequirement;        /* 0x00 */
-    U8          ReasonCode;                         /* 0x04 */
-    U8          ReceptacleID;                       /* 0x05 */
-    U16         Reserved1;                          /* 0x06 */
-} MPI25_EVENT_DATA_ACTIVE_CABLE_EXCEPT,
-  MPI2_POINTER PTR_MPI25_EVENT_DATA_ACTIVE_CABLE_EXCEPT,
-  Mpi25EventDataActiveCableExcept_t,
-  MPI2_POINTER pMpi25EventDataActiveCableExcept_t,
-  MPI26_EVENT_DATA_ACTIVE_CABLE_EXCEPT,
-  MPI2_POINTER PTR_MPI26_EVENT_DATA_ACTIVE_CABLE_EXCEPT,
-  Mpi26EventDataActiveCableExcept_t,
-  MPI2_POINTER pMpi26EventDataActiveCableExcept_t;
-
-/* MPI2.5 defines for the ReasonCode field */
-#define MPI25_EVENT_ACTIVE_CABLE_INSUFFICIENT_POWER     (0x00)
-#define MPI25_EVENT_ACTIVE_CABLE_PRESENT                (0x01)
-#define MPI25_EVENT_ACTIVE_CABLE_DEGRADED               (0x02)
-
-/* MPI2.6 defines for the ReasonCode field */
-#define MPI26_EVENT_ACTIVE_CABLE_INSUFFICIENT_POWER     (0x00)
-#define MPI26_EVENT_ACTIVE_CABLE_PRESENT                (0x01)
-#define MPI26_EVENT_ACTIVE_CABLE_DEGRADED               (0x02)
-
-/* Hard Reset Received Event data */
-
-typedef struct _MPI2_EVENT_DATA_HARD_RESET_RECEIVED
-{
-    U8                      Reserved1;                      /* 0x00 */
-    U8                      Port;                           /* 0x01 */
-    U16                     Reserved2;                      /* 0x02 */
-} MPI2_EVENT_DATA_HARD_RESET_RECEIVED,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_HARD_RESET_RECEIVED,
-  Mpi2EventDataHardResetReceived_t,
-  MPI2_POINTER pMpi2EventDataHardResetReceived_t;
-
-
-/* Task Set Full Event data */
-/*   this event is obsolete */
-
-typedef struct _MPI2_EVENT_DATA_TASK_SET_FULL
-{
-    U16                     DevHandle;                      /* 0x00 */
-    U16                     CurrentDepth;                   /* 0x02 */
-} MPI2_EVENT_DATA_TASK_SET_FULL, MPI2_POINTER PTR_MPI2_EVENT_DATA_TASK_SET_FULL,
-  Mpi2EventDataTaskSetFull_t, MPI2_POINTER pMpi2EventDataTaskSetFull_t;
-
-
-/* SAS Device Status Change Event data */
-
-typedef struct _MPI2_EVENT_DATA_SAS_DEVICE_STATUS_CHANGE
-{
-    U16                     TaskTag;                        /* 0x00 */
-    U8                      ReasonCode;                     /* 0x02 */
-    U8                      PhysicalPort;                   /* 0x03 */
-    U8                      ASC;                            /* 0x04 */
-    U8                      ASCQ;                           /* 0x05 */
-    U16                     DevHandle;                      /* 0x06 */
-    U32                     Reserved2;                      /* 0x08 */
-    U64                     SASAddress;                     /* 0x0C */
-    U8                      LUN[8];                         /* 0x14 */
-} MPI2_EVENT_DATA_SAS_DEVICE_STATUS_CHANGE,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_DEVICE_STATUS_CHANGE,
-  Mpi2EventDataSasDeviceStatusChange_t,
-  MPI2_POINTER pMpi2EventDataSasDeviceStatusChange_t;
-
-/* SAS Device Status Change Event data ReasonCode values */
-#define MPI2_EVENT_SAS_DEV_STAT_RC_SMART_DATA                           (0x05)
-#define MPI2_EVENT_SAS_DEV_STAT_RC_UNSUPPORTED                          (0x07)
-#define MPI2_EVENT_SAS_DEV_STAT_RC_INTERNAL_DEVICE_RESET                (0x08)
-#define MPI2_EVENT_SAS_DEV_STAT_RC_TASK_ABORT_INTERNAL                  (0x09)
-#define MPI2_EVENT_SAS_DEV_STAT_RC_ABORT_TASK_SET_INTERNAL              (0x0A)
-#define MPI2_EVENT_SAS_DEV_STAT_RC_CLEAR_TASK_SET_INTERNAL              (0x0B)
-#define MPI2_EVENT_SAS_DEV_STAT_RC_QUERY_TASK_INTERNAL                  (0x0C)
-#define MPI2_EVENT_SAS_DEV_STAT_RC_ASYNC_NOTIFICATION                   (0x0D)
-#define MPI2_EVENT_SAS_DEV_STAT_RC_CMP_INTERNAL_DEV_RESET               (0x0E)
-#define MPI2_EVENT_SAS_DEV_STAT_RC_CMP_TASK_ABORT_INTERNAL              (0x0F)
-#define MPI2_EVENT_SAS_DEV_STAT_RC_SATA_INIT_FAILURE                    (0x10)
-#define MPI2_EVENT_SAS_DEV_STAT_RC_EXPANDER_REDUCED_FUNCTIONALITY       (0x11)
-#define MPI2_EVENT_SAS_DEV_STAT_RC_CMP_EXPANDER_REDUCED_FUNCTIONALITY   (0x12)
-
-
-/* Integrated RAID Operation Status Event data */
-
-typedef struct _MPI2_EVENT_DATA_IR_OPERATION_STATUS
-{
-    U16                     VolDevHandle;               /* 0x00 */
-    U16                     Reserved1;                  /* 0x02 */
-    U8                      RAIDOperation;              /* 0x04 */
-    U8                      PercentComplete;            /* 0x05 */
-    U16                     Reserved2;                  /* 0x06 */
-    U32                     ElapsedSeconds;             /* 0x08 */
-} MPI2_EVENT_DATA_IR_OPERATION_STATUS,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_IR_OPERATION_STATUS,
-  Mpi2EventDataIrOperationStatus_t,
-  MPI2_POINTER pMpi2EventDataIrOperationStatus_t;
-
-/* Integrated RAID Operation Status Event data RAIDOperation values */
-#define MPI2_EVENT_IR_RAIDOP_RESYNC                     (0x00)
-#define MPI2_EVENT_IR_RAIDOP_ONLINE_CAP_EXPANSION       (0x01)
-#define MPI2_EVENT_IR_RAIDOP_CONSISTENCY_CHECK          (0x02)
-#define MPI2_EVENT_IR_RAIDOP_BACKGROUND_INIT            (0x03)
-#define MPI2_EVENT_IR_RAIDOP_MAKE_DATA_CONSISTENT       (0x04)
-
-
-/* Integrated RAID Volume Event data */
-
-typedef struct _MPI2_EVENT_DATA_IR_VOLUME
-{
-    U16                     VolDevHandle;               /* 0x00 */
-    U8                      ReasonCode;                 /* 0x02 */
-    U8                      Reserved1;                  /* 0x03 */
-    U32                     NewValue;                   /* 0x04 */
-    U32                     PreviousValue;              /* 0x08 */
-} MPI2_EVENT_DATA_IR_VOLUME, MPI2_POINTER PTR_MPI2_EVENT_DATA_IR_VOLUME,
-  Mpi2EventDataIrVolume_t, MPI2_POINTER pMpi2EventDataIrVolume_t;
-
-/* Integrated RAID Volume Event data ReasonCode values */
-#define MPI2_EVENT_IR_VOLUME_RC_SETTINGS_CHANGED        (0x01)
-#define MPI2_EVENT_IR_VOLUME_RC_STATUS_FLAGS_CHANGED    (0x02)
-#define MPI2_EVENT_IR_VOLUME_RC_STATE_CHANGED           (0x03)
-
-
-/* Integrated RAID Physical Disk Event data */
-
-typedef struct _MPI2_EVENT_DATA_IR_PHYSICAL_DISK
-{
-    U16                     Reserved1;                  /* 0x00 */
-    U8                      ReasonCode;                 /* 0x02 */
-    U8                      PhysDiskNum;                /* 0x03 */
-    U16                     PhysDiskDevHandle;          /* 0x04 */
-    U16                     Reserved2;                  /* 0x06 */
-    U16                     Slot;                       /* 0x08 */
-    U16                     EnclosureHandle;            /* 0x0A */
-    U32                     NewValue;                   /* 0x0C */
-    U32                     PreviousValue;              /* 0x10 */
-} MPI2_EVENT_DATA_IR_PHYSICAL_DISK,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_IR_PHYSICAL_DISK,
-  Mpi2EventDataIrPhysicalDisk_t, MPI2_POINTER pMpi2EventDataIrPhysicalDisk_t;
-
-/* Integrated RAID Physical Disk Event data ReasonCode values */
-#define MPI2_EVENT_IR_PHYSDISK_RC_SETTINGS_CHANGED      (0x01)
-#define MPI2_EVENT_IR_PHYSDISK_RC_STATUS_FLAGS_CHANGED  (0x02)
-#define MPI2_EVENT_IR_PHYSDISK_RC_STATE_CHANGED         (0x03)
-
-
-/* Integrated RAID Configuration Change List Event data */
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check NumElements at runtime.
- */
-#ifndef MPI2_EVENT_IR_CONFIG_ELEMENT_COUNT
-#define MPI2_EVENT_IR_CONFIG_ELEMENT_COUNT          (1)
-#endif
-
-typedef struct _MPI2_EVENT_IR_CONFIG_ELEMENT
-{
-    U16                     ElementFlags;               /* 0x00 */
-    U16                     VolDevHandle;               /* 0x02 */
-    U8                      ReasonCode;                 /* 0x04 */
-    U8                      PhysDiskNum;                /* 0x05 */
-    U16                     PhysDiskDevHandle;          /* 0x06 */
-} MPI2_EVENT_IR_CONFIG_ELEMENT, MPI2_POINTER PTR_MPI2_EVENT_IR_CONFIG_ELEMENT,
-  Mpi2EventIrConfigElement_t, MPI2_POINTER pMpi2EventIrConfigElement_t;
-
-/* IR Configuration Change List Event data ElementFlags values */
-#define MPI2_EVENT_IR_CHANGE_EFLAGS_ELEMENT_TYPE_MASK   (0x000F)
-#define MPI2_EVENT_IR_CHANGE_EFLAGS_VOLUME_ELEMENT      (0x0000)
-#define MPI2_EVENT_IR_CHANGE_EFLAGS_VOLPHYSDISK_ELEMENT (0x0001)
-#define MPI2_EVENT_IR_CHANGE_EFLAGS_HOTSPARE_ELEMENT    (0x0002)
-
-/* IR Configuration Change List Event data ReasonCode values */
-#define MPI2_EVENT_IR_CHANGE_RC_ADDED                   (0x01)
-#define MPI2_EVENT_IR_CHANGE_RC_REMOVED                 (0x02)
-#define MPI2_EVENT_IR_CHANGE_RC_NO_CHANGE               (0x03)
-#define MPI2_EVENT_IR_CHANGE_RC_HIDE                    (0x04)
-#define MPI2_EVENT_IR_CHANGE_RC_UNHIDE                  (0x05)
-#define MPI2_EVENT_IR_CHANGE_RC_VOLUME_CREATED          (0x06)
-#define MPI2_EVENT_IR_CHANGE_RC_VOLUME_DELETED          (0x07)
-#define MPI2_EVENT_IR_CHANGE_RC_PD_CREATED              (0x08)
-#define MPI2_EVENT_IR_CHANGE_RC_PD_DELETED              (0x09)
-
-typedef struct _MPI2_EVENT_DATA_IR_CONFIG_CHANGE_LIST
-{
-    U8                              NumElements;        /* 0x00 */
-    U8                              Reserved1;          /* 0x01 */
-    U8                              Reserved2;          /* 0x02 */
-    U8                              ConfigNum;          /* 0x03 */
-    U32                             Flags;              /* 0x04 */
-    MPI2_EVENT_IR_CONFIG_ELEMENT    ConfigElement[MPI2_EVENT_IR_CONFIG_ELEMENT_COUNT];    /* 0x08 */
-} MPI2_EVENT_DATA_IR_CONFIG_CHANGE_LIST,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_IR_CONFIG_CHANGE_LIST,
-  Mpi2EventDataIrConfigChangeList_t,
-  MPI2_POINTER pMpi2EventDataIrConfigChangeList_t;
-
-/* IR Configuration Change List Event data Flags values */
-#define MPI2_EVENT_IR_CHANGE_FLAGS_FOREIGN_CONFIG   (0x00000001)
-
-
-/* SAS Discovery Event data */
-
-typedef struct _MPI2_EVENT_DATA_SAS_DISCOVERY
-{
-    U8                      Flags;                      /* 0x00 */
-    U8                      ReasonCode;                 /* 0x01 */
-    U8                      PhysicalPort;               /* 0x02 */
-    U8                      Reserved1;                  /* 0x03 */
-    U32                     DiscoveryStatus;            /* 0x04 */
-} MPI2_EVENT_DATA_SAS_DISCOVERY,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_DISCOVERY,
-  Mpi2EventDataSasDiscovery_t, MPI2_POINTER pMpi2EventDataSasDiscovery_t;
-
-/* SAS Discovery Event data Flags values */
-#define MPI2_EVENT_SAS_DISC_DEVICE_CHANGE                   (0x02)
-#define MPI2_EVENT_SAS_DISC_IN_PROGRESS                     (0x01)
-
-/* SAS Discovery Event data ReasonCode values */
-#define MPI2_EVENT_SAS_DISC_RC_STARTED                      (0x01)
-#define MPI2_EVENT_SAS_DISC_RC_COMPLETED                    (0x02)
-
-/* SAS Discovery Event data DiscoveryStatus values */
-#define MPI2_EVENT_SAS_DISC_DS_MAX_ENCLOSURES_EXCEED            (0x80000000)
-#define MPI2_EVENT_SAS_DISC_DS_MAX_EXPANDERS_EXCEED             (0x40000000)
-#define MPI2_EVENT_SAS_DISC_DS_MAX_DEVICES_EXCEED               (0x20000000)
-#define MPI2_EVENT_SAS_DISC_DS_MAX_TOPO_PHYS_EXCEED             (0x10000000)
-#define MPI2_EVENT_SAS_DISC_DS_DOWNSTREAM_INITIATOR             (0x08000000)
-#define MPI2_EVENT_SAS_DISC_DS_MULTI_SUBTRACTIVE_SUBTRACTIVE    (0x00008000)
-#define MPI2_EVENT_SAS_DISC_DS_EXP_MULTI_SUBTRACTIVE            (0x00004000)
-#define MPI2_EVENT_SAS_DISC_DS_MULTI_PORT_DOMAIN                (0x00002000)
-#define MPI2_EVENT_SAS_DISC_DS_TABLE_TO_SUBTRACTIVE_LINK        (0x00001000)
-#define MPI2_EVENT_SAS_DISC_DS_UNSUPPORTED_DEVICE               (0x00000800)
-#define MPI2_EVENT_SAS_DISC_DS_TABLE_LINK                       (0x00000400)
-#define MPI2_EVENT_SAS_DISC_DS_SUBTRACTIVE_LINK                 (0x00000200)
-#define MPI2_EVENT_SAS_DISC_DS_SMP_CRC_ERROR                    (0x00000100)
-#define MPI2_EVENT_SAS_DISC_DS_SMP_FUNCTION_FAILED              (0x00000080)
-#define MPI2_EVENT_SAS_DISC_DS_INDEX_NOT_EXIST                  (0x00000040)
-#define MPI2_EVENT_SAS_DISC_DS_OUT_ROUTE_ENTRIES                (0x00000020)
-#define MPI2_EVENT_SAS_DISC_DS_SMP_TIMEOUT                      (0x00000010)
-#define MPI2_EVENT_SAS_DISC_DS_MULTIPLE_PORTS                   (0x00000004)
-#define MPI2_EVENT_SAS_DISC_DS_UNADDRESSABLE_DEVICE             (0x00000002)
-#define MPI2_EVENT_SAS_DISC_DS_LOOP_DETECTED                    (0x00000001)
-
-
-/* SAS Broadcast Primitive Event data */
-
-typedef struct _MPI2_EVENT_DATA_SAS_BROADCAST_PRIMITIVE
-{
-    U8                      PhyNum;                     /* 0x00 */
-    U8                      Port;                       /* 0x01 */
-    U8                      PortWidth;                  /* 0x02 */
-    U8                      Primitive;                  /* 0x03 */
-} MPI2_EVENT_DATA_SAS_BROADCAST_PRIMITIVE,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_BROADCAST_PRIMITIVE,
-  Mpi2EventDataSasBroadcastPrimitive_t,
-  MPI2_POINTER pMpi2EventDataSasBroadcastPrimitive_t;
-
-/* defines for the Primitive field */
-#define MPI2_EVENT_PRIMITIVE_CHANGE                         (0x01)
-#define MPI2_EVENT_PRIMITIVE_SES                            (0x02)
-#define MPI2_EVENT_PRIMITIVE_EXPANDER                       (0x03)
-#define MPI2_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT             (0x04)
-#define MPI2_EVENT_PRIMITIVE_RESERVED3                      (0x05)
-#define MPI2_EVENT_PRIMITIVE_RESERVED4                      (0x06)
-#define MPI2_EVENT_PRIMITIVE_CHANGE0_RESERVED               (0x07)
-#define MPI2_EVENT_PRIMITIVE_CHANGE1_RESERVED               (0x08)
-
-
-/* SAS Notify Primitive Event data */
-
-typedef struct _MPI2_EVENT_DATA_SAS_NOTIFY_PRIMITIVE
-{
-    U8                      PhyNum;                     /* 0x00 */
-    U8                      Port;                       /* 0x01 */
-    U8                      Reserved1;                  /* 0x02 */
-    U8                      Primitive;                  /* 0x03 */
-} MPI2_EVENT_DATA_SAS_NOTIFY_PRIMITIVE,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_NOTIFY_PRIMITIVE,
-  Mpi2EventDataSasNotifyPrimitive_t,
-  MPI2_POINTER pMpi2EventDataSasNotifyPrimitive_t;
-
-/* defines for the Primitive field */
-#define MPI2_EVENT_NOTIFY_ENABLE_SPINUP                     (0x01)
-#define MPI2_EVENT_NOTIFY_POWER_LOSS_EXPECTED               (0x02)
-#define MPI2_EVENT_NOTIFY_RESERVED1                         (0x03)
-#define MPI2_EVENT_NOTIFY_RESERVED2                         (0x04)
-
-
-/* SAS Initiator Device Status Change Event data */
-
-typedef struct _MPI2_EVENT_DATA_SAS_INIT_DEV_STATUS_CHANGE
-{
-    U8                      ReasonCode;                 /* 0x00 */
-    U8                      PhysicalPort;               /* 0x01 */
-    U16                     DevHandle;                  /* 0x02 */
-    U64                     SASAddress;                 /* 0x04 */
-} MPI2_EVENT_DATA_SAS_INIT_DEV_STATUS_CHANGE,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_INIT_DEV_STATUS_CHANGE,
-  Mpi2EventDataSasInitDevStatusChange_t,
-  MPI2_POINTER pMpi2EventDataSasInitDevStatusChange_t;
-
-/* SAS Initiator Device Status Change event ReasonCode values */
-#define MPI2_EVENT_SAS_INIT_RC_ADDED                (0x01)
-#define MPI2_EVENT_SAS_INIT_RC_NOT_RESPONDING       (0x02)
-
-
-/* SAS Initiator Device Table Overflow Event data */
-
-typedef struct _MPI2_EVENT_DATA_SAS_INIT_TABLE_OVERFLOW
-{
-    U16                     MaxInit;                    /* 0x00 */
-    U16                     CurrentInit;                /* 0x02 */
-    U64                     SASAddress;                 /* 0x04 */
-} MPI2_EVENT_DATA_SAS_INIT_TABLE_OVERFLOW,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_INIT_TABLE_OVERFLOW,
-  Mpi2EventDataSasInitTableOverflow_t,
-  MPI2_POINTER pMpi2EventDataSasInitTableOverflow_t;
-
-
-/* SAS Topology Change List Event data */
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check NumEntries at runtime.
- */
-#ifndef MPI2_EVENT_SAS_TOPO_PHY_COUNT
-#define MPI2_EVENT_SAS_TOPO_PHY_COUNT           (1)
-#endif
-
-typedef struct _MPI2_EVENT_SAS_TOPO_PHY_ENTRY
-{
-    U16                     AttachedDevHandle;          /* 0x00 */
-    U8                      LinkRate;                   /* 0x02 */
-    U8                      PhyStatus;                  /* 0x03 */
-} MPI2_EVENT_SAS_TOPO_PHY_ENTRY, MPI2_POINTER PTR_MPI2_EVENT_SAS_TOPO_PHY_ENTRY,
-  Mpi2EventSasTopoPhyEntry_t, MPI2_POINTER pMpi2EventSasTopoPhyEntry_t;
-
-typedef struct _MPI2_EVENT_DATA_SAS_TOPOLOGY_CHANGE_LIST
-{
-    U16                             EnclosureHandle;            /* 0x00 */
-    U16                             ExpanderDevHandle;          /* 0x02 */
-    U8                              NumPhys;                    /* 0x04 */
-    U8                              Reserved1;                  /* 0x05 */
-    U16                             Reserved2;                  /* 0x06 */
-    U8                              NumEntries;                 /* 0x08 */
-    U8                              StartPhyNum;                /* 0x09 */
-    U8                              ExpStatus;                  /* 0x0A */
-    U8                              PhysicalPort;               /* 0x0B */
-    MPI2_EVENT_SAS_TOPO_PHY_ENTRY   PHY[MPI2_EVENT_SAS_TOPO_PHY_COUNT]; /* 0x0C*/
-} MPI2_EVENT_DATA_SAS_TOPOLOGY_CHANGE_LIST,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_TOPOLOGY_CHANGE_LIST,
-  Mpi2EventDataSasTopologyChangeList_t,
-  MPI2_POINTER pMpi2EventDataSasTopologyChangeList_t;
-
-/* values for the ExpStatus field */
-#define MPI2_EVENT_SAS_TOPO_ES_NO_EXPANDER                  (0x00)
-#define MPI2_EVENT_SAS_TOPO_ES_ADDED                        (0x01)
-#define MPI2_EVENT_SAS_TOPO_ES_NOT_RESPONDING               (0x02)
-#define MPI2_EVENT_SAS_TOPO_ES_RESPONDING                   (0x03)
-#define MPI2_EVENT_SAS_TOPO_ES_DELAY_NOT_RESPONDING         (0x04)
-
-/* defines for the LinkRate field */
-#define MPI2_EVENT_SAS_TOPO_LR_CURRENT_MASK                 (0xF0)
-#define MPI2_EVENT_SAS_TOPO_LR_CURRENT_SHIFT                (4)
-#define MPI2_EVENT_SAS_TOPO_LR_PREV_MASK                    (0x0F)
-#define MPI2_EVENT_SAS_TOPO_LR_PREV_SHIFT                   (0)
-
-#define MPI2_EVENT_SAS_TOPO_LR_UNKNOWN_LINK_RATE            (0x00)
-#define MPI2_EVENT_SAS_TOPO_LR_PHY_DISABLED                 (0x01)
-#define MPI2_EVENT_SAS_TOPO_LR_NEGOTIATION_FAILED           (0x02)
-#define MPI2_EVENT_SAS_TOPO_LR_SATA_OOB_COMPLETE            (0x03)
-#define MPI2_EVENT_SAS_TOPO_LR_PORT_SELECTOR                (0x04)
-#define MPI2_EVENT_SAS_TOPO_LR_SMP_RESET_IN_PROGRESS        (0x05)
-#define MPI2_EVENT_SAS_TOPO_LR_UNSUPPORTED_PHY              (0x06)
-#define MPI2_EVENT_SAS_TOPO_LR_RATE_1_5                     (0x08)
-#define MPI2_EVENT_SAS_TOPO_LR_RATE_3_0                     (0x09)
-#define MPI2_EVENT_SAS_TOPO_LR_RATE_6_0                     (0x0A)
-#define MPI25_EVENT_SAS_TOPO_LR_RATE_12_0                   (0x0B)
-#define MPI26_EVENT_SAS_TOPO_LR_RATE_22_5                   (0x0C)
-
-/* values for the PhyStatus field */
-#define MPI2_EVENT_SAS_TOPO_PHYSTATUS_VACANT                (0x80)
-#define MPI2_EVENT_SAS_TOPO_PS_MULTIPLEX_CHANGE             (0x10)
-/* values for the PhyStatus ReasonCode sub-field */
-#define MPI2_EVENT_SAS_TOPO_RC_MASK                         (0x0F)
-#define MPI2_EVENT_SAS_TOPO_RC_TARG_ADDED                   (0x01)
-#define MPI2_EVENT_SAS_TOPO_RC_TARG_NOT_RESPONDING          (0x02)
-#define MPI2_EVENT_SAS_TOPO_RC_PHY_CHANGED                  (0x03)
-#define MPI2_EVENT_SAS_TOPO_RC_NO_CHANGE                    (0x04)
-#define MPI2_EVENT_SAS_TOPO_RC_DELAY_NOT_RESPONDING         (0x05)
-
-
-/* SAS Enclosure Device Status Change Event data */
-
-typedef struct _MPI2_EVENT_DATA_SAS_ENCL_DEV_STATUS_CHANGE
-{
-    U16                     EnclosureHandle;            /* 0x00 */
-    U8                      ReasonCode;                 /* 0x02 */
-    U8                      PhysicalPort;               /* 0x03 */
-    U64                     EnclosureLogicalID;         /* 0x04 */
-    U16                     NumSlots;                   /* 0x0C */
-    U16                     StartSlot;                  /* 0x0E */
-    U32                     PhyBits;                    /* 0x10 */
-} MPI2_EVENT_DATA_SAS_ENCL_DEV_STATUS_CHANGE,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_ENCL_DEV_STATUS_CHANGE,
-  Mpi2EventDataSasEnclDevStatusChange_t,
-  MPI2_POINTER pMpi2EventDataSasEnclDevStatusChange_t,
-  MPI26_EVENT_DATA_ENCL_DEV_STATUS_CHANGE,
-  MPI2_POINTER PTR_MPI26_EVENT_DATA_ENCL_DEV_STATUS_CHANGE,
-  Mpi26EventDataEnclDevStatusChange_t,
-  MPI2_POINTER pMpi26EventDataEnclDevStatusChange_t;
-
-/* SAS Enclosure Device Status Change event ReasonCode values */
-#define MPI2_EVENT_SAS_ENCL_RC_ADDED                (0x01)
-#define MPI2_EVENT_SAS_ENCL_RC_NOT_RESPONDING       (0x02)
-
-/* Enclosure Device Status Change event ReasonCode values */
-#define MPI26_EVENT_ENCL_RC_ADDED                   (0x01)
-#define MPI26_EVENT_ENCL_RC_NOT_RESPONDING          (0x02)
-
-/* SAS PHY Counter Event data */
-
-typedef struct _MPI2_EVENT_DATA_SAS_PHY_COUNTER
-{
-    U64         TimeStamp;          /* 0x00 */
-    U32         Reserved1;          /* 0x08 */
-    U8          PhyEventCode;       /* 0x0C */
-    U8          PhyNum;             /* 0x0D */
-    U16         Reserved2;          /* 0x0E */
-    U32         PhyEventInfo;       /* 0x10 */
-    U8          CounterType;        /* 0x14 */
-    U8          ThresholdWindow;    /* 0x15 */
-    U8          TimeUnits;          /* 0x16 */
-    U8          Reserved3;          /* 0x17 */
-    U32         EventThreshold;     /* 0x18 */
-    U16         ThresholdFlags;     /* 0x1C */
-    U16         Reserved4;          /* 0x1E */
-} MPI2_EVENT_DATA_SAS_PHY_COUNTER,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_PHY_COUNTER,
-  Mpi2EventDataSasPhyCounter_t, MPI2_POINTER pMpi2EventDataSasPhyCounter_t;
-
-/* use MPI2_SASPHY3_EVENT_CODE_ values from mpi2_cnfg.h for the PhyEventCode field */
-
-/* use MPI2_SASPHY3_COUNTER_TYPE_ values from mpi2_cnfg.h for the CounterType field */
-
-/* use MPI2_SASPHY3_TIME_UNITS_ values from mpi2_cnfg.h for the TimeUnits field */
-
-/* use MPI2_SASPHY3_TFLAGS_ values from mpi2_cnfg.h for the ThresholdFlags field */
-
-
-/* SAS Quiesce Event data */
-
-typedef struct _MPI2_EVENT_DATA_SAS_QUIESCE
-{
-    U8                      ReasonCode;                 /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U16                     Reserved2;                  /* 0x02 */
-    U32                     Reserved3;                  /* 0x04 */
-} MPI2_EVENT_DATA_SAS_QUIESCE,
-  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_QUIESCE,
-  Mpi2EventDataSasQuiesce_t, MPI2_POINTER pMpi2EventDataSasQuiesce_t;
-
-/* SAS Quiesce Event data ReasonCode values */
-#define MPI2_EVENT_SAS_QUIESCE_RC_STARTED                   (0x01)
-#define MPI2_EVENT_SAS_QUIESCE_RC_COMPLETED                 (0x02)
-
-
-typedef struct _MPI25_EVENT_DATA_SAS_DEVICE_DISCOVERY_ERROR
-{
-    U16         DevHandle;                  /* 0x00 */
-    U8          ReasonCode;                 /* 0x02 */
-    U8          PhysicalPort;               /* 0x03 */
-    U32         Reserved1[2];               /* 0x04 */
-    U64         SASAddress;                 /* 0x0C */
-    U32         Reserved2[2];               /* 0x14 */
-} MPI25_EVENT_DATA_SAS_DEVICE_DISCOVERY_ERROR,
-  MPI2_POINTER PTR_MPI25_EVENT_DATA_SAS_DEVICE_DISCOVERY_ERROR,
-  Mpi25EventDataSasDeviceDiscoveryError_t,
-  MPI2_POINTER pMpi25EventDataSasDeviceDiscoveryError_t;
-
-/* SAS Device Discovery Error Event data ReasonCode values */
-#define MPI25_EVENT_SAS_DISC_ERR_SMP_FAILED         (0x01)
-#define MPI25_EVENT_SAS_DISC_ERR_SMP_TIMEOUT        (0x02)
-
-
-/* Host Based Discovery Phy Event data */
-
-typedef struct _MPI2_EVENT_HBD_PHY_SAS
-{
-    U8          Flags;                      /* 0x00 */
-    U8          NegotiatedLinkRate;         /* 0x01 */
-    U8          PhyNum;                     /* 0x02 */
-    U8          PhysicalPort;               /* 0x03 */
-    U32         Reserved1;                  /* 0x04 */
-    U8          InitialFrame[28];           /* 0x08 */
-} MPI2_EVENT_HBD_PHY_SAS, MPI2_POINTER PTR_MPI2_EVENT_HBD_PHY_SAS,
-  Mpi2EventHbdPhySas_t, MPI2_POINTER pMpi2EventHbdPhySas_t;
-
-/* values for the Flags field */
-#define MPI2_EVENT_HBD_SAS_FLAGS_FRAME_VALID        (0x02)
-#define MPI2_EVENT_HBD_SAS_FLAGS_SATA_FRAME         (0x01)
-
-/* use MPI2_SAS_NEG_LINK_RATE_ defines from mpi2_cnfg.h for the NegotiatedLinkRate field */
-
-typedef union _MPI2_EVENT_HBD_DESCRIPTOR
-{
-    MPI2_EVENT_HBD_PHY_SAS      Sas;
-} MPI2_EVENT_HBD_DESCRIPTOR, MPI2_POINTER PTR_MPI2_EVENT_HBD_DESCRIPTOR,
-  Mpi2EventHbdDescriptor_t, MPI2_POINTER pMpi2EventHbdDescriptor_t;
-
-typedef struct _MPI2_EVENT_DATA_HBD_PHY
-{
-    U8                          DescriptorType;     /* 0x00 */
-    U8                          Reserved1;          /* 0x01 */
-    U16                         Reserved2;          /* 0x02 */
-    U32                         Reserved3;          /* 0x04 */
-    MPI2_EVENT_HBD_DESCRIPTOR   Descriptor;         /* 0x08 */
-} MPI2_EVENT_DATA_HBD_PHY, MPI2_POINTER PTR_MPI2_EVENT_DATA_HBD_PHY,
-  Mpi2EventDataHbdPhy_t, MPI2_POINTER pMpi2EventDataMpi2EventDataHbdPhy_t;
-
-/* values for the DescriptorType field */
-#define MPI2_EVENT_HBD_DT_SAS               (0x01)
-
-
-/* PCIe Device Status Change Event data (MPI v2.6 and later) */
-
-typedef struct _MPI26_EVENT_DATA_PCIE_DEVICE_STATUS_CHANGE
-{
-    U16                     TaskTag;                        /* 0x00 */
-    U8                      ReasonCode;                     /* 0x02 */
-    U8                      PhysicalPort;                   /* 0x03 */
-    U8                      ASC;                            /* 0x04 */
-    U8                      ASCQ;                           /* 0x05 */
-    U16                     DevHandle;                      /* 0x06 */
-    U32                     Reserved2;                      /* 0x08 */
-    U64                     WWID;                           /* 0x0C */
-    U8                      LUN[8];                         /* 0x14 */
-} MPI26_EVENT_DATA_PCIE_DEVICE_STATUS_CHANGE,
-  MPI2_POINTER PTR_MPI26_EVENT_DATA_PCIE_DEVICE_STATUS_CHANGE,
-  Mpi26EventDataPCIeDeviceStatusChange_t,
-  MPI2_POINTER pMpi26EventDataPCIeDeviceStatusChange_t;
-
-/* PCIe Device Status Change Event data ReasonCode values */
-#define MPI26_EVENT_PCIDEV_STAT_RC_SMART_DATA                           (0x05)
-#define MPI26_EVENT_PCIDEV_STAT_RC_UNSUPPORTED                          (0x07)
-#define MPI26_EVENT_PCIDEV_STAT_RC_INTERNAL_DEVICE_RESET                (0x08)
-#define MPI26_EVENT_PCIDEV_STAT_RC_TASK_ABORT_INTERNAL                  (0x09)
-#define MPI26_EVENT_PCIDEV_STAT_RC_ABORT_TASK_SET_INTERNAL              (0x0A)
-#define MPI26_EVENT_PCIDEV_STAT_RC_CLEAR_TASK_SET_INTERNAL              (0x0B)
-#define MPI26_EVENT_PCIDEV_STAT_RC_QUERY_TASK_INTERNAL                  (0x0C)
-#define MPI26_EVENT_PCIDEV_STAT_RC_ASYNC_NOTIFICATION                   (0x0D)
-#define MPI26_EVENT_PCIDEV_STAT_RC_CMP_INTERNAL_DEV_RESET               (0x0E)
-#define MPI26_EVENT_PCIDEV_STAT_RC_CMP_TASK_ABORT_INTERNAL              (0x0F)
-#define MPI26_EVENT_PCIDEV_STAT_RC_DEV_INIT_FAILURE                     (0x10)
-#define MPI26_EVENT_PCIDEV_STAT_RC_PCIE_HOT_RESET_FAILED                (0x11)
-
-
-/* PCIe Enumeration Event data (MPI v2.6 and later) */
-
-typedef struct _MPI26_EVENT_DATA_PCIE_ENUMERATION
-{
-    U8                      Flags;                      /* 0x00 */
-    U8                      ReasonCode;                 /* 0x01 */
-    U8                      PhysicalPort;               /* 0x02 */
-    U8                      Reserved1;                  /* 0x03 */
-    U32                     EnumerationStatus;          /* 0x04 */
-} MPI26_EVENT_DATA_PCIE_ENUMERATION,
-  MPI2_POINTER PTR_MPI26_EVENT_DATA_PCIE_ENUMERATION,
-  Mpi26EventDataPCIeEnumeration_t,
-  MPI2_POINTER pMpi26EventDataPCIeEnumeration_t;
-
-/* PCIe Enumeration Event data Flags values */
-#define MPI26_EVENT_PCIE_ENUM_DEVICE_CHANGE                 (0x02)
-#define MPI26_EVENT_PCIE_ENUM_IN_PROGRESS                   (0x01)
-
-/* PCIe Enumeration Event data ReasonCode values */
-#define MPI26_EVENT_PCIE_ENUM_RC_STARTED                    (0x01)
-#define MPI26_EVENT_PCIE_ENUM_RC_COMPLETED                  (0x02)
-
-/* PCIe Enumeration Event data EnumerationStatus values */
-#define MPI26_EVENT_PCIE_ENUM_ES_MAX_SWITCHES_EXCEED            (0x40000000)
-#define MPI26_EVENT_PCIE_ENUM_ES_MAX_DEVICES_EXCEED             (0x20000000)
-#define MPI26_EVENT_PCIE_ENUM_ES_RESOURCES_EXHAUSTED            (0x10000000)
-
-
-/* PCIe Topology Change List Event data (MPI v2.6 and later) */
-
-/*
- * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
- * one and check NumEntries at runtime.
- */
-#ifndef MPI26_EVENT_PCIE_TOPO_PORT_COUNT
-#define MPI26_EVENT_PCIE_TOPO_PORT_COUNT        (1)
-#endif
-
-typedef struct _MPI26_EVENT_PCIE_TOPO_PORT_ENTRY
-{
-    U16         AttachedDevHandle;      /* 0x00 */
-    U8          PortStatus;             /* 0x02 */
-    U8          Reserved1;              /* 0x03 */
-    U8          CurrentPortInfo;        /* 0x04 */
-    U8          Reserved2;              /* 0x05 */
-    U8          PreviousPortInfo;       /* 0x06 */
-    U8          Reserved3;              /* 0x07 */
-} MPI26_EVENT_PCIE_TOPO_PORT_ENTRY,
-  MPI2_POINTER PTR_MPI26_EVENT_PCIE_TOPO_PORT_ENTRY,
-  Mpi26EventPCIeTopoPortEntry_t,
-  MPI2_POINTER pMpi26EventPCIeTopoPortEntry_t;
-
-/* PCIe Topology Change List Event data PortStatus values */
-#define MPI26_EVENT_PCIE_TOPO_PS_DEV_ADDED                  (0x01)
-#define MPI26_EVENT_PCIE_TOPO_PS_NOT_RESPONDING             (0x02)
-#define MPI26_EVENT_PCIE_TOPO_PS_PORT_CHANGED               (0x03)
-#define MPI26_EVENT_PCIE_TOPO_PS_NO_CHANGE                  (0x04)
-#define MPI26_EVENT_PCIE_TOPO_PS_DELAY_NOT_RESPONDING       (0x05)
-
-/* PCIe Topology Change List Event data defines for CurrentPortInfo and PreviousPortInfo */
-#define MPI26_EVENT_PCIE_TOPO_PI_LANE_MASK                  (0xF0)
-#define MPI26_EVENT_PCIE_TOPO_PI_LANES_UNKNOWN              (0x00)
-#define MPI26_EVENT_PCIE_TOPO_PI_1_LANE                     (0x10)
-#define MPI26_EVENT_PCIE_TOPO_PI_2_LANES                    (0x20)
-#define MPI26_EVENT_PCIE_TOPO_PI_4_LANES                    (0x30)
-#define MPI26_EVENT_PCIE_TOPO_PI_8_LANES                    (0x40)
-#define MPI26_EVENT_PCIE_TOPO_PI_16_LANES                   (0x50)
-
-#define MPI26_EVENT_PCIE_TOPO_PI_RATE_MASK                  (0x0F)
-#define MPI26_EVENT_PCIE_TOPO_PI_RATE_UNKNOWN               (0x00)
-#define MPI26_EVENT_PCIE_TOPO_PI_RATE_DISABLED              (0x01)
-#define MPI26_EVENT_PCIE_TOPO_PI_RATE_2_5                   (0x02)
-#define MPI26_EVENT_PCIE_TOPO_PI_RATE_5_0                   (0x03)
-#define MPI26_EVENT_PCIE_TOPO_PI_RATE_8_0                   (0x04)
-#define MPI26_EVENT_PCIE_TOPO_PI_RATE_16_0                  (0x05)
-
-typedef struct _MPI26_EVENT_DATA_PCIE_TOPOLOGY_CHANGE_LIST
-{
-    U16                                 EnclosureHandle;        /* 0x00 */
-    U16                                 SwitchDevHandle;        /* 0x02 */
-    U8                                  NumPorts;               /* 0x04 */
-    U8                                  Reserved1;              /* 0x05 */
-    U16                                 Reserved2;              /* 0x06 */
-    U8                                  NumEntries;             /* 0x08 */
-    U8                                  StartPortNum;           /* 0x09 */
-    U8                                  SwitchStatus;           /* 0x0A */
-    U8                                  PhysicalPort;           /* 0x0B */
-    MPI26_EVENT_PCIE_TOPO_PORT_ENTRY    PortEntry[MPI26_EVENT_PCIE_TOPO_PORT_COUNT]; /* 0x0C */
-} MPI26_EVENT_DATA_PCIE_TOPOLOGY_CHANGE_LIST,
-  MPI2_POINTER PTR_MPI26_EVENT_DATA_PCIE_TOPOLOGY_CHANGE_LIST,
-  Mpi26EventDataPCIeTopologyChangeList_t,
-  MPI2_POINTER pMpi26EventDataPCIeTopologyChangeList_t;
-
-/* PCIe Topology Change List Event data SwitchStatus values */
-#define MPI26_EVENT_PCIE_TOPO_SS_NO_PCIE_SWITCH             (0x00)
-#define MPI26_EVENT_PCIE_TOPO_SS_ADDED                      (0x01)
-#define MPI26_EVENT_PCIE_TOPO_SS_NOT_RESPONDING             (0x02)
-#define MPI26_EVENT_PCIE_TOPO_SS_RESPONDING                 (0x03)
-#define MPI26_EVENT_PCIE_TOPO_SS_DELAY_NOT_RESPONDING       (0x04)
-
-/* PCIe Link Counter Event data (MPI v2.6 and later) */
-
-typedef struct _MPI26_EVENT_DATA_PCIE_LINK_COUNTER
-{
-    U64         TimeStamp;          /* 0x00 */
-    U32         Reserved1;          /* 0x08 */
-    U8          LinkEventCode;      /* 0x0C */
-    U8          LinkNum;            /* 0x0D */
-    U16         Reserved2;          /* 0x0E */
-    U32         LinkEventInfo;      /* 0x10 */
-    U8          CounterType;        /* 0x14 */
-    U8          ThresholdWindow;    /* 0x15 */
-    U8          TimeUnits;          /* 0x16 */
-    U8          Reserved3;          /* 0x17 */
-    U32         EventThreshold;     /* 0x18 */
-    U16         ThresholdFlags;     /* 0x1C */
-    U16         Reserved4;          /* 0x1E */
-} MPI26_EVENT_DATA_PCIE_LINK_COUNTER,
-  MPI2_POINTER PTR_MPI26_EVENT_DATA_PCIE_LINK_COUNTER,
-  Mpi26EventDataPcieLinkCounter_t, MPI2_POINTER pMpi26EventDataPcieLinkCounter_t;
-
-
-/* use MPI26_PCIELINK3_EVTCODE_ values from mpi2_cnfg.h for the LinkEventCode field */
-
-/* use MPI26_PCIELINK3_COUNTER_TYPE_ values from mpi2_cnfg.h for the CounterType field */
-
-/* use MPI26_PCIELINK3_TIME_UNITS_ values from mpi2_cnfg.h for the TimeUnits field */
-
-/* use MPI26_PCIELINK3_TFLAGS_ values from mpi2_cnfg.h for the ThresholdFlags field */
-
-/****************************************************************************
-*  EventAck message
-****************************************************************************/
-
-/* EventAck Request message */
-typedef struct _MPI2_EVENT_ACK_REQUEST
-{
-    U16                     Reserved1;                      /* 0x00 */
-    U8                      ChainOffset;                    /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     Reserved2;                      /* 0x04 */
-    U8                      Reserved3;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved4;                      /* 0x0A */
-    U16                     Event;                          /* 0x0C */
-    U16                     Reserved5;                      /* 0x0E */
-    U32                     EventContext;                   /* 0x10 */
-} MPI2_EVENT_ACK_REQUEST, MPI2_POINTER PTR_MPI2_EVENT_ACK_REQUEST,
-  Mpi2EventAckRequest_t, MPI2_POINTER pMpi2EventAckRequest_t;
-
-
-/* EventAck Reply message */
-typedef struct _MPI2_EVENT_ACK_REPLY
-{
-    U16                     Reserved1;                      /* 0x00 */
-    U8                      MsgLength;                      /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     Reserved2;                      /* 0x04 */
-    U8                      Reserved3;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved4;                      /* 0x0A */
-    U16                     Reserved5;                      /* 0x0C */
-    U16                     IOCStatus;                      /* 0x0E */
-    U32                     IOCLogInfo;                     /* 0x10 */
-} MPI2_EVENT_ACK_REPLY, MPI2_POINTER PTR_MPI2_EVENT_ACK_REPLY,
-  Mpi2EventAckReply_t, MPI2_POINTER pMpi2EventAckReply_t;
-
-
-/****************************************************************************
-*  SendHostMessage message
-****************************************************************************/
-
-/* SendHostMessage Request message */
-typedef struct _MPI2_SEND_HOST_MESSAGE_REQUEST
-{
-    U16                     HostDataLength;                 /* 0x00 */
-    U8                      ChainOffset;                    /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     Reserved1;                      /* 0x04 */
-    U8                      Reserved2;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved3;                      /* 0x0A */
-    U8                      Reserved4;                      /* 0x0C */
-    U8                      DestVF_ID;                      /* 0x0D */
-    U16                     Reserved5;                      /* 0x0E */
-    U32                     Reserved6;                      /* 0x10 */
-    U32                     Reserved7;                      /* 0x14 */
-    U32                     Reserved8;                      /* 0x18 */
-    U32                     Reserved9;                      /* 0x1C */
-    U32                     Reserved10;                     /* 0x20 */
-    U32                     HostData[1];                    /* 0x24 */
-} MPI2_SEND_HOST_MESSAGE_REQUEST,
-  MPI2_POINTER PTR_MPI2_SEND_HOST_MESSAGE_REQUEST,
-  Mpi2SendHostMessageRequest_t, MPI2_POINTER pMpi2SendHostMessageRequest_t;
-
-
-/* SendHostMessage Reply message */
-typedef struct _MPI2_SEND_HOST_MESSAGE_REPLY
-{
-    U16                     HostDataLength;                 /* 0x00 */
-    U8                      MsgLength;                      /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     Reserved1;                      /* 0x04 */
-    U8                      Reserved2;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved3;                      /* 0x0A */
-    U16                     Reserved4;                      /* 0x0C */
-    U16                     IOCStatus;                      /* 0x0E */
-    U32                     IOCLogInfo;                     /* 0x10 */
-} MPI2_SEND_HOST_MESSAGE_REPLY, MPI2_POINTER PTR_MPI2_SEND_HOST_MESSAGE_REPLY,
-  Mpi2SendHostMessageReply_t, MPI2_POINTER pMpi2SendHostMessageReply_t;
-
-
-/****************************************************************************
-*  FWDownload message
-****************************************************************************/
-
-/* MPI v2.0 FWDownload Request message */
-typedef struct _MPI2_FW_DOWNLOAD_REQUEST
-{
-    U8                      ImageType;                  /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U32                     TotalImageSize;             /* 0x0C */
-    U32                     Reserved5;                  /* 0x10 */
-    MPI2_MPI_SGE_UNION      SGL;                        /* 0x14 */
-} MPI2_FW_DOWNLOAD_REQUEST, MPI2_POINTER PTR_MPI2_FW_DOWNLOAD_REQUEST,
-  Mpi2FWDownloadRequest, MPI2_POINTER pMpi2FWDownloadRequest;
-
-#define MPI2_FW_DOWNLOAD_MSGFLGS_LAST_SEGMENT   (0x01)
-
-#define MPI2_FW_DOWNLOAD_ITYPE_FW                   (0x01)
-#define MPI2_FW_DOWNLOAD_ITYPE_BIOS                 (0x02)
-#define MPI2_FW_DOWNLOAD_ITYPE_MANUFACTURING        (0x06)
-#define MPI2_FW_DOWNLOAD_ITYPE_CONFIG_1             (0x07)
-#define MPI2_FW_DOWNLOAD_ITYPE_CONFIG_2             (0x08)
-#define MPI2_FW_DOWNLOAD_ITYPE_MEGARAID             (0x09)
-#define MPI2_FW_DOWNLOAD_ITYPE_COMPLETE             (0x0A)
-#define MPI2_FW_DOWNLOAD_ITYPE_COMMON_BOOT_BLOCK    (0x0B)
-#define MPI2_FW_DOWNLOAD_ITYPE_PUBLIC_KEY           (0x0C) /* MPI v2.5 and newer */
-#define MPI2_FW_DOWNLOAD_ITYPE_CBB_BACKUP           (0x0D)
-#define MPI2_FW_DOWNLOAD_ITYPE_SBR                  (0x0E)
-#define MPI2_FW_DOWNLOAD_ITYPE_SBR_BACKUP           (0x0F)
-#define MPI2_FW_DOWNLOAD_ITYPE_HIIM                 (0x10)
-#define MPI2_FW_DOWNLOAD_ITYPE_HIIA                 (0x11)
-#define MPI2_FW_DOWNLOAD_ITYPE_CTLR                 (0x12)
-#define MPI2_FW_DOWNLOAD_ITYPE_IMR_FIRMWARE         (0x13)
-#define MPI2_FW_DOWNLOAD_ITYPE_MR_NVDATA            (0x14)
-#define MPI2_FW_DOWNLOAD_ITYPE_CPLD                 (0x15) /* MPI v2.6 and newer */
-#define MPI2_FW_DOWNLOAD_ITYPE_PSOC                 (0x16) /* MPI v2.6 and newer */
-#define MPI2_FW_DOWNLOAD_ITYPE_COREDUMP             (0x17)
-#define MPI2_FW_DOWNLOAD_ITYPE_MIN_PRODUCT_SPECIFIC (0xF0)
-
-
-/* MPI v2.0 FWDownload TransactionContext Element */
-typedef struct _MPI2_FW_DOWNLOAD_TCSGE
-{
-    U8                      Reserved1;                  /* 0x00 */
-    U8                      ContextSize;                /* 0x01 */
-    U8                      DetailsLength;              /* 0x02 */
-    U8                      Flags;                      /* 0x03 */
-    U32                     Reserved2;                  /* 0x04 */
-    U32                     ImageOffset;                /* 0x08 */
-    U32                     ImageSize;                  /* 0x0C */
-} MPI2_FW_DOWNLOAD_TCSGE, MPI2_POINTER PTR_MPI2_FW_DOWNLOAD_TCSGE,
-  Mpi2FWDownloadTCSGE_t, MPI2_POINTER pMpi2FWDownloadTCSGE_t;
-
-
-/* MPI v2.5 FWDownload Request message */
-typedef struct _MPI25_FW_DOWNLOAD_REQUEST
-{
-    U8                      ImageType;                  /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U32                     TotalImageSize;             /* 0x0C */
-    U32                     Reserved5;                  /* 0x10 */
-    U32                     Reserved6;                  /* 0x14 */
-    U32                     ImageOffset;                /* 0x18 */
-    U32                     ImageSize;                  /* 0x1C */
-    MPI25_SGE_IO_UNION      SGL;                        /* 0x20 */
-} MPI25_FW_DOWNLOAD_REQUEST, MPI2_POINTER PTR_MPI25_FW_DOWNLOAD_REQUEST,
-  Mpi25FWDownloadRequest, MPI2_POINTER pMpi25FWDownloadRequest;
-
-
-/* FWDownload Reply message */
-typedef struct _MPI2_FW_DOWNLOAD_REPLY
-{
-    U8                      ImageType;                  /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      MsgLength;                  /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U16                     Reserved5;                  /* 0x0C */
-    U16                     IOCStatus;                  /* 0x0E */
-    U32                     IOCLogInfo;                 /* 0x10 */
-} MPI2_FW_DOWNLOAD_REPLY, MPI2_POINTER PTR_MPI2_FW_DOWNLOAD_REPLY,
-  Mpi2FWDownloadReply_t, MPI2_POINTER pMpi2FWDownloadReply_t;
-
-
-/****************************************************************************
-*  FWUpload message
-****************************************************************************/
-
-/* MPI v2.0 FWUpload Request message */
-typedef struct _MPI2_FW_UPLOAD_REQUEST
-{
-    U8                      ImageType;                  /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U32                     Reserved5;                  /* 0x0C */
-    U32                     Reserved6;                  /* 0x10 */
-    MPI2_MPI_SGE_UNION      SGL;                        /* 0x14 */
-} MPI2_FW_UPLOAD_REQUEST, MPI2_POINTER PTR_MPI2_FW_UPLOAD_REQUEST,
-  Mpi2FWUploadRequest_t, MPI2_POINTER pMpi2FWUploadRequest_t;
-
-#define MPI2_FW_UPLOAD_ITYPE_FW_CURRENT         (0x00)
-#define MPI2_FW_UPLOAD_ITYPE_FW_FLASH           (0x01)
-#define MPI2_FW_UPLOAD_ITYPE_BIOS_FLASH         (0x02)
-#define MPI2_FW_UPLOAD_ITYPE_FW_BACKUP          (0x05)
-#define MPI2_FW_UPLOAD_ITYPE_MANUFACTURING      (0x06)
-#define MPI2_FW_UPLOAD_ITYPE_CONFIG_1           (0x07)
-#define MPI2_FW_UPLOAD_ITYPE_CONFIG_2           (0x08)
-#define MPI2_FW_UPLOAD_ITYPE_MEGARAID           (0x09)
-#define MPI2_FW_UPLOAD_ITYPE_COMPLETE           (0x0A)
-#define MPI2_FW_UPLOAD_ITYPE_COMMON_BOOT_BLOCK  (0x0B)
-#define MPI2_FW_UPLOAD_ITYPE_CBB_BACKUP         (0x0D)
-#define MPI2_FW_UPLOAD_ITYPE_SBR                (0x0E)
-#define MPI2_FW_UPLOAD_ITYPE_SBR_BACKUP         (0x0F)
-#define MPI2_FW_UPLOAD_ITYPE_HIIM               (0x10)
-#define MPI2_FW_UPLOAD_ITYPE_HIIA               (0x11)
-#define MPI2_FW_UPLOAD_ITYPE_CTLR               (0x12)
-#define MPI2_FW_UPLOAD_ITYPE_IMR_FIRMWARE       (0x13)
-#define MPI2_FW_UPLOAD_ITYPE_MR_NVDATA          (0x14)
-/* skipping 0x15, 0x16. They are defined in DOWNLOAD, but not needed here */
-#define MPI2_FW_UPLOAD_ITYPE_COREDUMP           (0x17)
-
-/* MPI v2.0 FWUpload TransactionContext Element */
-typedef struct _MPI2_FW_UPLOAD_TCSGE
-{
-    U8                      Reserved1;                  /* 0x00 */
-    U8                      ContextSize;                /* 0x01 */
-    U8                      DetailsLength;              /* 0x02 */
-    U8                      Flags;                      /* 0x03 */
-    U32                     Reserved2;                  /* 0x04 */
-    U32                     ImageOffset;                /* 0x08 */
-    U32                     ImageSize;                  /* 0x0C */
-} MPI2_FW_UPLOAD_TCSGE, MPI2_POINTER PTR_MPI2_FW_UPLOAD_TCSGE,
-  Mpi2FWUploadTCSGE_t, MPI2_POINTER pMpi2FWUploadTCSGE_t;
-
-
-/* MPI v2.5 FWUpload Request message */
-typedef struct _MPI25_FW_UPLOAD_REQUEST
-{
-    U8                      ImageType;                  /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U32                     Reserved5;                  /* 0x0C */
-    U32                     Reserved6;                  /* 0x10 */
-    U32                     Reserved7;                  /* 0x14 */
-    U32                     ImageOffset;                /* 0x18 */
-    U32                     ImageSize;                  /* 0x1C */
-    MPI25_SGE_IO_UNION      SGL;                        /* 0x20 */
-} MPI25_FW_UPLOAD_REQUEST, MPI2_POINTER PTR_MPI25_FW_UPLOAD_REQUEST,
-  Mpi25FWUploadRequest_t, MPI2_POINTER pMpi25FWUploadRequest_t;
-
-
-/* FWUpload Reply message */
-typedef struct _MPI2_FW_UPLOAD_REPLY
-{
-    U8                      ImageType;                  /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      MsgLength;                  /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U16                     Reserved5;                  /* 0x0C */
-    U16                     IOCStatus;                  /* 0x0E */
-    U32                     IOCLogInfo;                 /* 0x10 */
-    U32                     ActualImageSize;            /* 0x14 */
-} MPI2_FW_UPLOAD_REPLY, MPI2_POINTER PTR_MPI2_FW_UPLOAD_REPLY,
-  Mpi2FWUploadReply_t, MPI2_POINTER pMPi2FWUploadReply_t;
-
-
-/****************************************************************************
-*  PowerManagementControl message
-****************************************************************************/
-
-/* PowerManagementControl Request message */
-typedef struct _MPI2_PWR_MGMT_CONTROL_REQUEST
-{
-    U8                      Feature;                    /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U8                      Parameter1;                 /* 0x0C */
-    U8                      Parameter2;                 /* 0x0D */
-    U8                      Parameter3;                 /* 0x0E */
-    U8                      Parameter4;                 /* 0x0F */
-    U32                     Reserved5;                  /* 0x10 */
-    U32                     Reserved6;                  /* 0x14 */
-} MPI2_PWR_MGMT_CONTROL_REQUEST, MPI2_POINTER PTR_MPI2_PWR_MGMT_CONTROL_REQUEST,
-  Mpi2PwrMgmtControlRequest_t, MPI2_POINTER pMpi2PwrMgmtControlRequest_t;
-
-/* defines for the Feature field */
-#define MPI2_PM_CONTROL_FEATURE_DA_PHY_POWER_COND       (0x01)
-#define MPI2_PM_CONTROL_FEATURE_PORT_WIDTH_MODULATION   (0x02)
-#define MPI2_PM_CONTROL_FEATURE_PCIE_LINK               (0x03) /* obsolete */
-#define MPI2_PM_CONTROL_FEATURE_IOC_SPEED               (0x04)
-#define MPI2_PM_CONTROL_FEATURE_GLOBAL_PWR_MGMT_MODE    (0x05) /* reserved in MPI 2.0 */
-#define MPI2_PM_CONTROL_FEATURE_MIN_PRODUCT_SPECIFIC    (0x80)
-#define MPI2_PM_CONTROL_FEATURE_MAX_PRODUCT_SPECIFIC    (0xFF)
-
-/* parameter usage for the MPI2_PM_CONTROL_FEATURE_DA_PHY_POWER_COND Feature */
-/* Parameter1 contains a PHY number */
-/* Parameter2 indicates power condition action using these defines */
-#define MPI2_PM_CONTROL_PARAM2_PARTIAL                  (0x01)
-#define MPI2_PM_CONTROL_PARAM2_SLUMBER                  (0x02)
-#define MPI2_PM_CONTROL_PARAM2_EXIT_PWR_MGMT            (0x03)
-/* Parameter3 and Parameter4 are reserved */
-
-/* parameter usage for the MPI2_PM_CONTROL_FEATURE_PORT_WIDTH_MODULATION Feature */
-/* Parameter1 contains SAS port width modulation group number */
-/* Parameter2 indicates IOC action using these defines */
-#define MPI2_PM_CONTROL_PARAM2_REQUEST_OWNERSHIP        (0x01)
-#define MPI2_PM_CONTROL_PARAM2_CHANGE_MODULATION        (0x02)
-#define MPI2_PM_CONTROL_PARAM2_RELINQUISH_OWNERSHIP     (0x03)
-/* Parameter3 indicates desired modulation level using these defines */
-#define MPI2_PM_CONTROL_PARAM3_25_PERCENT               (0x00)
-#define MPI2_PM_CONTROL_PARAM3_50_PERCENT               (0x01)
-#define MPI2_PM_CONTROL_PARAM3_75_PERCENT               (0x02)
-#define MPI2_PM_CONTROL_PARAM3_100_PERCENT              (0x03)
-/* Parameter4 is reserved */
-
-/* this next set (_PCIE_LINK) is obsolete */
-/* parameter usage for the MPI2_PM_CONTROL_FEATURE_PCIE_LINK Feature */
-/* Parameter1 indicates desired PCIe link speed using these defines */
-#define MPI2_PM_CONTROL_PARAM1_PCIE_2_5_GBPS            (0x00) /* obsolete */
-#define MPI2_PM_CONTROL_PARAM1_PCIE_5_0_GBPS            (0x01) /* obsolete */
-#define MPI2_PM_CONTROL_PARAM1_PCIE_8_0_GBPS            (0x02) /* obsolete */
-/* Parameter2 indicates desired PCIe link width using these defines */
-#define MPI2_PM_CONTROL_PARAM2_WIDTH_X1                 (0x01) /* obsolete */
-#define MPI2_PM_CONTROL_PARAM2_WIDTH_X2                 (0x02) /* obsolete */
-#define MPI2_PM_CONTROL_PARAM2_WIDTH_X4                 (0x04) /* obsolete */
-#define MPI2_PM_CONTROL_PARAM2_WIDTH_X8                 (0x08) /* obsolete */
-/* Parameter3 and Parameter4 are reserved */
-
-/* parameter usage for the MPI2_PM_CONTROL_FEATURE_IOC_SPEED Feature */
-/* Parameter1 indicates desired IOC hardware clock speed using these defines */
-#define MPI2_PM_CONTROL_PARAM1_FULL_IOC_SPEED           (0x01)
-#define MPI2_PM_CONTROL_PARAM1_HALF_IOC_SPEED           (0x02)
-#define MPI2_PM_CONTROL_PARAM1_QUARTER_IOC_SPEED        (0x04)
-#define MPI2_PM_CONTROL_PARAM1_EIGHTH_IOC_SPEED         (0x08)
-/* Parameter2, Parameter3, and Parameter4 are reserved */
-
-/* parameter usage for the MPI2_PM_CONTROL_FEATURE_GLOBAL_PWR_MGMT_MODE Feature */
-/* Parameter1 indicates host action regarding global power management mode */
-#define MPI2_PM_CONTROL_PARAM1_TAKE_CONTROL             (0x01)
-#define MPI2_PM_CONTROL_PARAM1_CHANGE_GLOBAL_MODE       (0x02)
-#define MPI2_PM_CONTROL_PARAM1_RELEASE_CONTROL          (0x03)
-/* Parameter2 indicates the requested global power management mode */
-#define MPI2_PM_CONTROL_PARAM2_FULL_PWR_PERF            (0x01)
-#define MPI2_PM_CONTROL_PARAM2_REDUCED_PWR_PERF         (0x08)
-#define MPI2_PM_CONTROL_PARAM2_STANDBY                  (0x40)
-/* Parameter3 and Parameter4 are reserved */
-
-
-/* PowerManagementControl Reply message */
-typedef struct _MPI2_PWR_MGMT_CONTROL_REPLY
-{
-    U8                      Feature;                    /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      MsgLength;                  /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U16                     Reserved5;                  /* 0x0C */
-    U16                     IOCStatus;                  /* 0x0E */
-    U32                     IOCLogInfo;                 /* 0x10 */
-} MPI2_PWR_MGMT_CONTROL_REPLY, MPI2_POINTER PTR_MPI2_PWR_MGMT_CONTROL_REPLY,
-  Mpi2PwrMgmtControlReply_t, MPI2_POINTER pMpi2PwrMgmtControlReply_t;
-
-
-/****************************************************************************
-*  IO Unit Control messages (MPI v2.6 and later only.)
-****************************************************************************/
-
-/* IO Unit Control Request Message */
-typedef struct _MPI26_IOUNIT_CONTROL_REQUEST
-{
-    U8                      Operation;          /* 0x00 */
-    U8                      Reserved1;          /* 0x01 */
-    U8                      ChainOffset;        /* 0x02 */
-    U8                      Function;           /* 0x03 */
-    U16                     DevHandle;          /* 0x04 */
-    U8                      IOCParameter;       /* 0x06 */
-    U8                      MsgFlags;           /* 0x07 */
-    U8                      VP_ID;              /* 0x08 */
-    U8                      VF_ID;              /* 0x09 */
-    U16                     Reserved3;          /* 0x0A */
-    U16                     Reserved4;          /* 0x0C */
-    U8                      PhyNum;             /* 0x0E */
-    U8                      PrimFlags;          /* 0x0F */
-    U32                     Primitive;          /* 0x10 */
-    U8                      LookupMethod;       /* 0x14 */
-    U8                      Reserved5;          /* 0x15 */
-    U16                     SlotNumber;         /* 0x16 */
-    U64                     LookupAddress;      /* 0x18 */
-    U32                     IOCParameterValue;  /* 0x20 */
-    U32                     Reserved7;          /* 0x24 */
-    U32                     Reserved8;          /* 0x28 */
-} MPI26_IOUNIT_CONTROL_REQUEST,
-  MPI2_POINTER PTR_MPI26_IOUNIT_CONTROL_REQUEST,
-  Mpi26IoUnitControlRequest_t, MPI2_POINTER pMpi26IoUnitControlRequest_t;
-
-/* values for the Operation field */
-#define MPI26_CTRL_OP_CLEAR_ALL_PERSISTENT              (0x02)
-#define MPI26_CTRL_OP_SAS_PHY_LINK_RESET                (0x06)
-#define MPI26_CTRL_OP_SAS_PHY_HARD_RESET                (0x07)
-#define MPI26_CTRL_OP_PHY_CLEAR_ERROR_LOG               (0x08)
-#define MPI26_CTRL_OP_LINK_CLEAR_ERROR_LOG              (0x09)
-#define MPI26_CTRL_OP_SAS_SEND_PRIMITIVE                (0x0A)
-#define MPI26_CTRL_OP_FORCE_FULL_DISCOVERY              (0x0B)
-#define MPI26_CTRL_OP_REMOVE_DEVICE                     (0x0D)
-#define MPI26_CTRL_OP_LOOKUP_MAPPING                    (0x0E)
-#define MPI26_CTRL_OP_SET_IOC_PARAMETER                 (0x0F)
-#define MPI26_CTRL_OP_ENABLE_FP_DEVICE                  (0x10)
-#define MPI26_CTRL_OP_DISABLE_FP_DEVICE                 (0x11)
-#define MPI26_CTRL_OP_ENABLE_FP_ALL                     (0x12)
-#define MPI26_CTRL_OP_DISABLE_FP_ALL                    (0x13)
-#define MPI26_CTRL_OP_DEV_ENABLE_NCQ                    (0x14)
-#define MPI26_CTRL_OP_DEV_DISABLE_NCQ                   (0x15)
-#define MPI26_CTRL_OP_SHUTDOWN                          (0x16)
-#define MPI26_CTRL_OP_DEV_ENABLE_PERSIST_CONNECTION     (0x17)
-#define MPI26_CTRL_OP_DEV_DISABLE_PERSIST_CONNECTION    (0x18)
-#define MPI26_CTRL_OP_DEV_CLOSE_PERSIST_CONNECTION      (0x19)
-#define MPI26_CTRL_OP_ENABLE_NVME_SGL_FORMAT            (0x1A)
-#define MPI26_CTRL_OP_DISABLE_NVME_SGL_FORMAT           (0x1B)
-#define MPI26_CTRL_OP_PRODUCT_SPECIFIC_MIN              (0x80)
-
-/* values for the PrimFlags field */
-#define MPI26_CTRL_PRIMFLAGS_SINGLE                     (0x08)
-#define MPI26_CTRL_PRIMFLAGS_TRIPLE                     (0x02)
-#define MPI26_CTRL_PRIMFLAGS_REDUNDANT                  (0x01)
-
-/* values for the LookupMethod field */
-#define MPI26_CTRL_LOOKUP_METHOD_WWID_ADDRESS           (0x01)
-#define MPI26_CTRL_LOOKUP_METHOD_ENCLOSURE_SLOT         (0x02)
-#define MPI26_CTRL_LOOKUP_METHOD_SAS_DEVICE_NAME        (0x03)
-
-
-/* IO Unit Control Reply Message */
-typedef struct _MPI26_IOUNIT_CONTROL_REPLY
-{
-    U8                      Operation;          /* 0x00 */
-    U8                      Reserved1;          /* 0x01 */
-    U8                      MsgLength;          /* 0x02 */
-    U8                      Function;           /* 0x03 */
-    U16                     DevHandle;          /* 0x04 */
-    U8                      IOCParameter;       /* 0x06 */
-    U8                      MsgFlags;           /* 0x07 */
-    U8                      VP_ID;              /* 0x08 */
-    U8                      VF_ID;              /* 0x09 */
-    U16                     Reserved3;          /* 0x0A */
-    U16                     Reserved4;          /* 0x0C */
-    U16                     IOCStatus;          /* 0x0E */
-    U32                     IOCLogInfo;         /* 0x10 */
-} MPI26_IOUNIT_CONTROL_REPLY, MPI2_POINTER PTR_MPI26_IOUNIT_CONTROL_REPLY,
-  Mpi26IoUnitControlReply_t, MPI2_POINTER pMpi26IoUnitControlReply_t;
-
-
-#endif
-
+/*
+ *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
+ *
+ *
+ *           Name:  mpi2_ioc.h
+ *          Title:  MPI IOC, Port, Event, FW Download, and FW Upload messages
+ *  Creation Date:  October 11, 2006
+ *
+ *  mpi2_ioc.h Version:  02.00.37
+ *
+ *  NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
+ *        prefix are for use only on MPI v2.5 products, and must not be used
+ *        with MPI v2.0 products. Unless otherwise noted, names beginning with
+ *        MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products.
+ *
+ *  Version History
+ *  ---------------
+ *
+ *  Date      Version   Description
+ *  --------  --------  ------------------------------------------------------
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  06-04-07  02.00.01  In IOCFacts Reply structure, renamed MaxDevices to
+ *                      MaxTargets.
+ *                      Added TotalImageSize field to FWDownload Request.
+ *                      Added reserved words to FWUpload Request.
+ *  06-26-07  02.00.02  Added IR Configuration Change List Event.
+ *  08-31-07  02.00.03  Removed SystemReplyQueueDepth field from the IOCInit
+ *                      request and replaced it with
+ *                      ReplyDescriptorPostQueueDepth and ReplyFreeQueueDepth.
+ *                      Replaced the MinReplyQueueDepth field of the IOCFacts
+ *                      reply with MaxReplyDescriptorPostQueueDepth.
+ *                      Added MPI2_RDPQ_DEPTH_MIN define to specify the minimum
+ *                      depth for the Reply Descriptor Post Queue.
+ *                      Added SASAddress field to Initiator Device Table
+ *                      Overflow Event data.
+ *  10-31-07  02.00.04  Added ReasonCode MPI2_EVENT_SAS_INIT_RC_NOT_RESPONDING
+ *                      for SAS Initiator Device Status Change Event data.
+ *                      Modified Reason Code defines for SAS Topology Change
+ *                      List Event data, including adding a bit for PHY Vacant
+ *                      status, and adding a mask for the Reason Code.
+ *                      Added define for
+ *                      MPI2_EVENT_SAS_TOPO_ES_DELAY_NOT_RESPONDING.
+ *                      Added define for MPI2_EXT_IMAGE_TYPE_MEGARAID.
+ *  12-18-07  02.00.05  Added Boot Status defines for the IOCExceptions field of
+ *                      the IOCFacts Reply.
+ *                      Removed MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER define.
+ *                      Moved MPI2_VERSION_UNION to mpi2.h.
+ *                      Changed MPI2_EVENT_NOTIFICATION_REQUEST to use masks
+ *                      instead of enables, and added SASBroadcastPrimitiveMasks
+ *                      field.
+ *                      Added Log Entry Added Event and related structure.
+ *  02-29-08  02.00.06  Added define MPI2_IOCFACTS_CAPABILITY_INTEGRATED_RAID.
+ *                      Removed define MPI2_IOCFACTS_PROTOCOL_SMP_TARGET.
+ *                      Added MaxVolumes and MaxPersistentEntries fields to
+ *                      IOCFacts reply.
+ *                      Added ProtocalFlags and IOCCapabilities fields to
+ *                      MPI2_FW_IMAGE_HEADER.
+ *                      Removed MPI2_PORTENABLE_FLAGS_ENABLE_SINGLE_PORT.
+ *  03-03-08  02.00.07  Fixed MPI2_FW_IMAGE_HEADER by changing Reserved26 to
+ *                      a U16 (from a U32).
+ *                      Removed extra 's' from EventMasks name.
+ *  06-27-08  02.00.08  Fixed an offset in a comment.
+ *  10-02-08  02.00.09  Removed SystemReplyFrameSize from MPI2_IOC_INIT_REQUEST.
+ *                      Removed CurReplyFrameSize from MPI2_IOC_FACTS_REPLY and
+ *                      renamed MinReplyFrameSize to ReplyFrameSize.
+ *                      Added MPI2_IOCFACTS_EXCEPT_IR_FOREIGN_CONFIG_MAX.
+ *                      Added two new RAIDOperation values for Integrated RAID
+ *                      Operations Status Event data.
+ *                      Added four new IR Configuration Change List Event data
+ *                      ReasonCode values.
+ *                      Added two new ReasonCode defines for SAS Device Status
+ *                      Change Event data.
+ *                      Added three new DiscoveryStatus bits for the SAS
+ *                      Discovery event data.
+ *                      Added Multiplexing Status Change bit to the PhyStatus
+ *                      field of the SAS Topology Change List event data.
+ *                      Removed define for MPI2_INIT_IMAGE_BOOTFLAGS_XMEMCOPY.
+ *                      BootFlags are now product-specific.
+ *                      Added defines for the indivdual signature bytes
+ *                      for MPI2_INIT_IMAGE_FOOTER.
+ *  01-19-09  02.00.10  Added MPI2_IOCFACTS_CAPABILITY_EVENT_REPLAY define.
+ *                      Added MPI2_EVENT_SAS_DISC_DS_DOWNSTREAM_INITIATOR
+ *                      define.
+ *                      Added MPI2_EVENT_SAS_DEV_STAT_RC_SATA_INIT_FAILURE
+ *                      define.
+ *                      Removed MPI2_EVENT_SAS_DISC_DS_SATA_INIT_FAILURE define.
+ *  05-06-09  02.00.11  Added MPI2_IOCFACTS_CAPABILITY_RAID_ACCELERATOR define.
+ *                      Added MPI2_IOCFACTS_CAPABILITY_MSI_X_INDEX define.
+ *                      Added two new reason codes for SAS Device Status Change
+ *                      Event.
+ *                      Added new event: SAS PHY Counter.
+ *  07-30-09  02.00.12  Added GPIO Interrupt event define and structure.
+ *                      Added MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER define.
+ *                      Added new product id family for 2208.
+ *  10-28-09  02.00.13  Added HostMSIxVectors field to MPI2_IOC_INIT_REQUEST.
+ *                      Added MaxMSIxVectors field to MPI2_IOC_FACTS_REPLY.
+ *                      Added MinDevHandle field to MPI2_IOC_FACTS_REPLY.
+ *                      Added MPI2_IOCFACTS_CAPABILITY_HOST_BASED_DISCOVERY.
+ *                      Added MPI2_EVENT_HOST_BASED_DISCOVERY_PHY define.
+ *                      Added MPI2_EVENT_SAS_TOPO_ES_NO_EXPANDER define.
+ *                      Added Host Based Discovery Phy Event data.
+ *                      Added defines for ProductID Product field
+ *                      (MPI2_FW_HEADER_PID_).
+ *                      Modified values for SAS ProductID Family
+ *                      (MPI2_FW_HEADER_PID_FAMILY_).
+ *  02-10-10  02.00.14  Added SAS Quiesce Event structure and defines.
+ *                      Added PowerManagementControl Request structures and
+ *                      defines.
+ *  05-12-10  02.00.15  Marked Task Set Full Event as obsolete.
+ *                      Added MPI2_EVENT_SAS_TOPO_LR_UNSUPPORTED_PHY define.
+ *  11-10-10  02.00.16  Added MPI2_FW_DOWNLOAD_ITYPE_MIN_PRODUCT_SPECIFIC.
+ *  02-23-11  02.00.17  Added SAS NOTIFY Primitive event, and added
+ *                      SASNotifyPrimitiveMasks field to
+ *                      MPI2_EVENT_NOTIFICATION_REQUEST.
+ *                      Added Temperature Threshold Event.
+ *                      Added Host Message Event.
+ *                      Added Send Host Message request and reply.
+ *  05-25-11  02.00.18  For Extended Image Header, added
+ *                      MPI2_EXT_IMAGE_TYPE_MIN_PRODUCT_SPECIFIC and
+ *                      MPI2_EXT_IMAGE_TYPE_MAX_PRODUCT_SPECIFIC defines.
+ *                      Deprecated MPI2_EXT_IMAGE_TYPE_MAX define.
+ *  08-24-11  02.00.19  Added PhysicalPort field to
+ *                      MPI2_EVENT_DATA_SAS_DEVICE_STATUS_CHANGE structure.
+ *                      Marked MPI2_PM_CONTROL_FEATURE_PCIE_LINK as obsolete.
+ *  11-18-11  02.00.20  Incorporating additions for MPI v2.5.
+ *  03-29-12  02.00.21  Added a product specific range to event values.
+ *  07-26-12  02.00.22  Added MPI2_IOCFACTS_EXCEPT_PARTIAL_MEMORY_FAILURE.
+ *                      Added ElapsedSeconds field to
+ *                      MPI2_EVENT_DATA_IR_OPERATION_STATUS.
+ *  08-19-13  02.00.23  For IOCInit, added MPI2_IOCINIT_MSGFLAG_RDPQ_ARRAY_MODE
+ *                      and MPI2_IOC_INIT_RDPQ_ARRAY_ENTRY.
+ *                      Added MPI2_IOCFACTS_CAPABILITY_RDPQ_ARRAY_CAPABLE.
+ *                      Added MPI2_FW_DOWNLOAD_ITYPE_PUBLIC_KEY.
+ *                      Added Encrypted Hash Extended Image.
+ *  12-05-13  02.00.24  Added MPI25_HASH_IMAGE_TYPE_BIOS.
+ *  11-18-14  02.00.25  Updated copyright information.
+ *  03-16-15  02.00.26  Updated for MPI v2.6.
+ *                      Added MPI2_EVENT_ACTIVE_CABLE_EXCEPTION and
+ *                      MPI26_EVENT_DATA_ACTIVE_CABLE_EXCEPT.
+ *                      Added MPI2_EVENT_PCIE_LINK_COUNTER and
+ *                      MPI26_EVENT_DATA_PCIE_LINK_COUNTER.
+ *                      Added MPI26_CTRL_OP_SHUTDOWN.
+ *                      Added MPI26_CTRL_OP_LINK_CLEAR_ERROR_LOG
+ *                      Added MPI26_FW_HEADER_PID_FAMILY_3324_SAS and
+ *                      MPI26_FW_HEADER_PID_FAMILY_3516_SAS.
+ *  08-25-15  02.00.27  Added IC ARCH Class based signature defines.
+ *                      Added MPI26_EVENT_PCIE_ENUM_ES_RESOURCES_EXHAUSTED event.
+ *                      Added ConigurationFlags field to IOCInit message to
+ *                      support NVMe SGL format control.
+ *                      Added PCIe SRIOV support.
+ * 02-17-16   02.00.28  Added SAS 4 22.5 gbs speed support.
+ *                      Added PCIe 4 16.0 GT/sec speec support.
+ *                      Removed AHCI support.
+ *                      Removed SOP support.
+ * 07-01-16   02.00.29  Added Archclass for 4008 product.
+ *                      Added IOCException MPI2_IOCFACTS_EXCEPT_PCIE_DISABLED
+ * 08-23-16   02.00.30  Added new defines for the ImageType field of FWDownload
+ *                      Request Message.
+ *                      Added new defines for the ImageType field of FWUpload
+ *                      Request Message.
+ *                      Added new values for the RegionType field in the Layout
+ *                      Data sections of the FLASH Layout Extended Image Data.
+ *                      Added new defines for the ReasonCode field of
+ *                      Active Cable Exception Event.
+ *                      Added MPI2_EVENT_ENCL_DEVICE_STATUS_CHANGE and
+ *                      MPI26_EVENT_DATA_ENCL_DEV_STATUS_CHANGE.
+ * 11-23-16   02.00.31  Added MPI2_EVENT_SAS_DEVICE_DISCOVERY_ERROR and
+ *                      MPI25_EVENT_DATA_SAS_DEVICE_DISCOVERY_ERROR.
+ * 02-02-17   02.00.32  Added MPI2_FW_DOWNLOAD_ITYPE_CBB_BACKUP.
+ *                      Added MPI25_EVENT_DATA_ACTIVE_CABLE_EXCEPT and related
+ *                      defines for the ReasonCode field.
+ * 06-13-17   02.00.33  Added MPI2_FW_DOWNLOAD_ITYPE_CPLD.
+ * 09-29-17   02.00.34  Added MPI26_EVENT_PCIDEV_STAT_RC_PCIE_HOT_RESET_FAILED
+ *                      to the ReasonCode field in PCIe Device Status Change
+ *                      Event Data.
+ * 07-22-18   02.00.35  Added FW_DOWNLOAD_ITYPE_CPLD and _PSOC.
+ *                      Moved FW image definitions ionto new mpi2_image,h
+ * 08-14-18   02.00.36  Fixed definition of MPI2_FW_DOWNLOAD_ITYPE_PSOC (0x16)
+ * 09-07-18   02.00.37  Added MPI26_EVENT_PCIE_TOPO_PI_16_LANES
+ * 10-02-19   02.00.38  Added MPI26_IOCINIT_CFGFLAGS_COREDUMP_ENABLE
+ *                      Added MPI26_IOCFACTS_CAPABILITY_COREDUMP_ENABLED
+ *                      Added MPI2_FW_DOWNLOAD_ITYPE_COREDUMP
+ *                      Added MPI2_FW_UPLOAD_ITYPE_COREDUMP
+ *  --------------------------------------------------------------------------
+ */
+
+#ifndef MPI2_IOC_H
+#define MPI2_IOC_H
+
+/*****************************************************************************
+*
+*               IOC Messages
+*
+*****************************************************************************/
+
+/****************************************************************************
+*  IOCInit message
+****************************************************************************/
+
+/* IOCInit Request message */
+typedef struct _MPI2_IOC_INIT_REQUEST
+{
+    U8                      WhoInit;                        /* 0x00 */
+    U8                      Reserved1;                      /* 0x01 */
+    U8                      ChainOffset;                    /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     Reserved2;                      /* 0x04 */
+    U8                      Reserved3;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved4;                      /* 0x0A */
+    U16                     MsgVersion;                     /* 0x0C */
+    U16                     HeaderVersion;                  /* 0x0E */
+    U32                     Reserved5;                      /* 0x10 */
+    U16                     ConfigurationFlags;             /* 0x14 */
+    U8                      HostPageSize;                   /* 0x16 */
+    U8                      HostMSIxVectors;                /* 0x17 */
+    U16                     Reserved8;                      /* 0x18 */
+    U16                     SystemRequestFrameSize;         /* 0x1A */
+    U16                     ReplyDescriptorPostQueueDepth;  /* 0x1C */
+    U16                     ReplyFreeQueueDepth;            /* 0x1E */
+    U32                     SenseBufferAddressHigh;         /* 0x20 */
+    U32                     SystemReplyAddressHigh;         /* 0x24 */
+    U64                     SystemRequestFrameBaseAddress;  /* 0x28 */
+    U64                     ReplyDescriptorPostQueueAddress;/* 0x30 */
+    U64                     ReplyFreeQueueAddress;          /* 0x38 */
+    U64                     TimeStamp;                      /* 0x40 */
+} MPI2_IOC_INIT_REQUEST, MPI2_POINTER PTR_MPI2_IOC_INIT_REQUEST,
+  Mpi2IOCInitRequest_t, MPI2_POINTER pMpi2IOCInitRequest_t;
+
+/* WhoInit values */
+#define MPI2_WHOINIT_NOT_INITIALIZED            (0x00)
+#define MPI2_WHOINIT_SYSTEM_BIOS                (0x01)
+#define MPI2_WHOINIT_ROM_BIOS                   (0x02)
+#define MPI2_WHOINIT_PCI_PEER                   (0x03)
+#define MPI2_WHOINIT_HOST_DRIVER                (0x04)
+#define MPI2_WHOINIT_MANUFACTURER               (0x05)
+
+/* MsgFlags */
+#define MPI2_IOCINIT_MSGFLAG_RDPQ_ARRAY_MODE    (0x01)
+
+/* MsgVersion */
+#define MPI2_IOCINIT_MSGVERSION_MAJOR_MASK      (0xFF00)
+#define MPI2_IOCINIT_MSGVERSION_MAJOR_SHIFT     (8)
+#define MPI2_IOCINIT_MSGVERSION_MINOR_MASK      (0x00FF)
+#define MPI2_IOCINIT_MSGVERSION_MINOR_SHIFT     (0)
+
+/* HeaderVersion */
+#define MPI2_IOCINIT_HDRVERSION_UNIT_MASK       (0xFF00)
+#define MPI2_IOCINIT_HDRVERSION_UNIT_SHIFT      (8)
+#define MPI2_IOCINIT_HDRVERSION_DEV_MASK        (0x00FF)
+#define MPI2_IOCINIT_HDRVERSION_DEV_SHIFT       (0)
+
+/* ConfigurationFlags */
+#define MPI26_IOCINIT_CFGFLAGS_NVME_SGL_FORMAT  (0x0001)
+#define MPI26_IOCINIT_CFGFLAGS_COREDUMP_ENABLE  (0x0002)
+
+/* minimum depth for a Reply Descriptor Post Queue */
+#define MPI2_RDPQ_DEPTH_MIN                     (16)
+
+/* Reply Descriptor Post Queue Array Entry */
+typedef struct _MPI2_IOC_INIT_RDPQ_ARRAY_ENTRY
+{
+    U64                 RDPQBaseAddress;                    /* 0x00 */
+    U32                 Reserved1;                          /* 0x08 */
+    U32                 Reserved2;                          /* 0x0C */
+} MPI2_IOC_INIT_RDPQ_ARRAY_ENTRY,
+  MPI2_POINTER PTR_MPI2_IOC_INIT_RDPQ_ARRAY_ENTRY,
+  Mpi2IOCInitRDPQArrayEntry, MPI2_POINTER pMpi2IOCInitRDPQArrayEntry;
+
+/* IOCInit Reply message */
+typedef struct _MPI2_IOC_INIT_REPLY
+{
+    U8                      WhoInit;                        /* 0x00 */
+    U8                      Reserved1;                      /* 0x01 */
+    U8                      MsgLength;                      /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     Reserved2;                      /* 0x04 */
+    U8                      Reserved3;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved4;                      /* 0x0A */
+    U16                     Reserved5;                      /* 0x0C */
+    U16                     IOCStatus;                      /* 0x0E */
+    U32                     IOCLogInfo;                     /* 0x10 */
+} MPI2_IOC_INIT_REPLY, MPI2_POINTER PTR_MPI2_IOC_INIT_REPLY,
+  Mpi2IOCInitReply_t, MPI2_POINTER pMpi2IOCInitReply_t;
+
+
+/****************************************************************************
+*  IOCFacts message
+****************************************************************************/
+
+/* IOCFacts Request message */
+typedef struct _MPI2_IOC_FACTS_REQUEST
+{
+    U16                     Reserved1;                      /* 0x00 */
+    U8                      ChainOffset;                    /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     Reserved2;                      /* 0x04 */
+    U8                      Reserved3;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved4;                      /* 0x0A */
+} MPI2_IOC_FACTS_REQUEST, MPI2_POINTER PTR_MPI2_IOC_FACTS_REQUEST,
+  Mpi2IOCFactsRequest_t, MPI2_POINTER pMpi2IOCFactsRequest_t;
+
+
+/* IOCFacts Reply message */
+typedef struct _MPI2_IOC_FACTS_REPLY
+{
+    U16                     MsgVersion;                     /* 0x00 */
+    U8                      MsgLength;                      /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     HeaderVersion;                  /* 0x04 */
+    U8                      IOCNumber;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved1;                      /* 0x0A */
+    U16                     IOCExceptions;                  /* 0x0C */
+    U16                     IOCStatus;                      /* 0x0E */
+    U32                     IOCLogInfo;                     /* 0x10 */
+    U8                      MaxChainDepth;                  /* 0x14 */
+    U8                      WhoInit;                        /* 0x15 */
+    U8                      NumberOfPorts;                  /* 0x16 */
+    U8                      MaxMSIxVectors;                 /* 0x17 */
+    U16                     RequestCredit;                  /* 0x18 */
+    U16                     ProductID;                      /* 0x1A */
+    U32                     IOCCapabilities;                /* 0x1C */
+    MPI2_VERSION_UNION      FWVersion;                      /* 0x20 */
+    U16                     IOCRequestFrameSize;            /* 0x24 */
+    U16                     IOCMaxChainSegmentSize;         /* 0x26 */ /* MPI 2.5 only; Reserved in MPI 2.0 */
+    U16                     MaxInitiators;                  /* 0x28 */
+    U16                     MaxTargets;                     /* 0x2A */
+    U16                     MaxSasExpanders;                /* 0x2C */
+    U16                     MaxEnclosures;                  /* 0x2E */
+    U16                     ProtocolFlags;                  /* 0x30 */
+    U16                     HighPriorityCredit;             /* 0x32 */
+    U16                     MaxReplyDescriptorPostQueueDepth; /* 0x34 */
+    U8                      ReplyFrameSize;                 /* 0x36 */
+    U8                      MaxVolumes;                     /* 0x37 */
+    U16                     MaxDevHandle;                   /* 0x38 */
+    U16                     MaxPersistentEntries;           /* 0x3A */
+    U16                     MinDevHandle;                   /* 0x3C */
+    U8                      CurrentHostPageSize;            /* 0x3E */
+    U8                      Reserved4;                      /* 0x3F */
+    U8                      SGEModifierMask;                /* 0x40 */
+    U8                      SGEModifierValue;               /* 0x41 */
+    U8                      SGEModifierShift;               /* 0x42 */
+    U8                      Reserved5;                      /* 0x43 */
+} MPI2_IOC_FACTS_REPLY, MPI2_POINTER PTR_MPI2_IOC_FACTS_REPLY,
+  Mpi2IOCFactsReply_t, MPI2_POINTER pMpi2IOCFactsReply_t;
+
+/* MsgVersion */
+#define MPI2_IOCFACTS_MSGVERSION_MAJOR_MASK             (0xFF00)
+#define MPI2_IOCFACTS_MSGVERSION_MAJOR_SHIFT            (8)
+#define MPI2_IOCFACTS_MSGVERSION_MINOR_MASK             (0x00FF)
+#define MPI2_IOCFACTS_MSGVERSION_MINOR_SHIFT            (0)
+
+/* HeaderVersion */
+#define MPI2_IOCFACTS_HDRVERSION_UNIT_MASK              (0xFF00)
+#define MPI2_IOCFACTS_HDRVERSION_UNIT_SHIFT             (8)
+#define MPI2_IOCFACTS_HDRVERSION_DEV_MASK               (0x00FF)
+#define MPI2_IOCFACTS_HDRVERSION_DEV_SHIFT              (0)
+
+/* IOCExceptions */
+#define MPI2_IOCFACTS_EXCEPT_PCIE_DISABLED              (0x0400)
+#define MPI2_IOCFACTS_EXCEPT_PARTIAL_MEMORY_FAILURE     (0x0200)
+#define MPI2_IOCFACTS_EXCEPT_IR_FOREIGN_CONFIG_MAX      (0x0100)
+
+#define MPI2_IOCFACTS_EXCEPT_BOOTSTAT_MASK              (0x00E0)
+#define MPI2_IOCFACTS_EXCEPT_BOOTSTAT_GOOD              (0x0000)
+#define MPI2_IOCFACTS_EXCEPT_BOOTSTAT_BACKUP            (0x0020)
+#define MPI2_IOCFACTS_EXCEPT_BOOTSTAT_RESTORED          (0x0040)
+#define MPI2_IOCFACTS_EXCEPT_BOOTSTAT_CORRUPT_BACKUP    (0x0060)
+
+#define MPI2_IOCFACTS_EXCEPT_METADATA_UNSUPPORTED       (0x0010)
+#define MPI2_IOCFACTS_EXCEPT_MANUFACT_CHECKSUM_FAIL     (0x0008)
+#define MPI2_IOCFACTS_EXCEPT_FW_CHECKSUM_FAIL           (0x0004)
+#define MPI2_IOCFACTS_EXCEPT_RAID_CONFIG_INVALID        (0x0002)
+#define MPI2_IOCFACTS_EXCEPT_CONFIG_CHECKSUM_FAIL       (0x0001)
+
+/* defines for WhoInit field are after the IOCInit Request */
+
+/* ProductID field uses MPI2_FW_HEADER_PID_ */
+
+/* IOCCapabilities */
+#define MPI26_IOCFACTS_CAPABILITY_COREDUMP_ENABLED      (0x00200000)
+#define MPI26_IOCFACTS_CAPABILITY_PCIE_SRIOV            (0x00100000)
+#define MPI26_IOCFACTS_CAPABILITY_ATOMIC_REQ            (0x00080000)
+#define MPI2_IOCFACTS_CAPABILITY_RDPQ_ARRAY_CAPABLE     (0x00040000)
+#define MPI25_IOCFACTS_CAPABILITY_FAST_PATH_CAPABLE     (0x00020000)
+#define MPI2_IOCFACTS_CAPABILITY_HOST_BASED_DISCOVERY   (0x00010000)
+#define MPI2_IOCFACTS_CAPABILITY_MSI_X_INDEX            (0x00008000)
+#define MPI2_IOCFACTS_CAPABILITY_RAID_ACCELERATOR       (0x00004000)
+#define MPI2_IOCFACTS_CAPABILITY_EVENT_REPLAY           (0x00002000)
+#define MPI2_IOCFACTS_CAPABILITY_INTEGRATED_RAID        (0x00001000)
+#define MPI2_IOCFACTS_CAPABILITY_TLR                    (0x00000800)
+#define MPI2_IOCFACTS_CAPABILITY_MULTICAST              (0x00000100)
+#define MPI2_IOCFACTS_CAPABILITY_BIDIRECTIONAL_TARGET   (0x00000080)
+#define MPI2_IOCFACTS_CAPABILITY_EEDP                   (0x00000040)
+#define MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER        (0x00000020)
+#define MPI2_IOCFACTS_CAPABILITY_SNAPSHOT_BUFFER        (0x00000010)
+#define MPI2_IOCFACTS_CAPABILITY_DIAG_TRACE_BUFFER      (0x00000008)
+#define MPI2_IOCFACTS_CAPABILITY_TASK_SET_FULL_HANDLING (0x00000004)
+
+/* ProtocolFlags */
+#define MPI2_IOCFACTS_PROTOCOL_NVME_DEVICES             (0x0008) /* MPI v2.6 and later */
+#define MPI2_IOCFACTS_PROTOCOL_SCSI_INITIATOR           (0x0002)
+#define MPI2_IOCFACTS_PROTOCOL_SCSI_TARGET              (0x0001)
+
+
+/****************************************************************************
+*  PortFacts message
+****************************************************************************/
+
+/* PortFacts Request message */
+typedef struct _MPI2_PORT_FACTS_REQUEST
+{
+    U16                     Reserved1;                      /* 0x00 */
+    U8                      ChainOffset;                    /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     Reserved2;                      /* 0x04 */
+    U8                      PortNumber;                     /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved3;                      /* 0x0A */
+} MPI2_PORT_FACTS_REQUEST, MPI2_POINTER PTR_MPI2_PORT_FACTS_REQUEST,
+  Mpi2PortFactsRequest_t, MPI2_POINTER pMpi2PortFactsRequest_t;
+
+/* PortFacts Reply message */
+typedef struct _MPI2_PORT_FACTS_REPLY
+{
+    U16                     Reserved1;                      /* 0x00 */
+    U8                      MsgLength;                      /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     Reserved2;                      /* 0x04 */
+    U8                      PortNumber;                     /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved3;                      /* 0x0A */
+    U16                     Reserved4;                      /* 0x0C */
+    U16                     IOCStatus;                      /* 0x0E */
+    U32                     IOCLogInfo;                     /* 0x10 */
+    U8                      Reserved5;                      /* 0x14 */
+    U8                      PortType;                       /* 0x15 */
+    U16                     Reserved6;                      /* 0x16 */
+    U16                     MaxPostedCmdBuffers;            /* 0x18 */
+    U16                     Reserved7;                      /* 0x1A */
+} MPI2_PORT_FACTS_REPLY, MPI2_POINTER PTR_MPI2_PORT_FACTS_REPLY,
+  Mpi2PortFactsReply_t, MPI2_POINTER pMpi2PortFactsReply_t;
+
+/* PortType values */
+#define MPI2_PORTFACTS_PORTTYPE_INACTIVE            (0x00)
+#define MPI2_PORTFACTS_PORTTYPE_FC                  (0x10)
+#define MPI2_PORTFACTS_PORTTYPE_ISCSI               (0x20)
+#define MPI2_PORTFACTS_PORTTYPE_SAS_PHYSICAL        (0x30)
+#define MPI2_PORTFACTS_PORTTYPE_SAS_VIRTUAL         (0x31)
+#define MPI2_PORTFACTS_PORTTYPE_TRI_MODE            (0x40) /* MPI v2.6 and later */
+
+
+/****************************************************************************
+*  PortEnable message
+****************************************************************************/
+
+/* PortEnable Request message */
+typedef struct _MPI2_PORT_ENABLE_REQUEST
+{
+    U16                     Reserved1;                      /* 0x00 */
+    U8                      ChainOffset;                    /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U8                      Reserved2;                      /* 0x04 */
+    U8                      PortFlags;                      /* 0x05 */
+    U8                      Reserved3;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved4;                      /* 0x0A */
+} MPI2_PORT_ENABLE_REQUEST, MPI2_POINTER PTR_MPI2_PORT_ENABLE_REQUEST,
+  Mpi2PortEnableRequest_t, MPI2_POINTER pMpi2PortEnableRequest_t;
+
+
+/* PortEnable Reply message */
+typedef struct _MPI2_PORT_ENABLE_REPLY
+{
+    U16                     Reserved1;                      /* 0x00 */
+    U8                      MsgLength;                      /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U8                      Reserved2;                      /* 0x04 */
+    U8                      PortFlags;                      /* 0x05 */
+    U8                      Reserved3;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved4;                      /* 0x0A */
+    U16                     Reserved5;                      /* 0x0C */
+    U16                     IOCStatus;                      /* 0x0E */
+    U32                     IOCLogInfo;                     /* 0x10 */
+} MPI2_PORT_ENABLE_REPLY, MPI2_POINTER PTR_MPI2_PORT_ENABLE_REPLY,
+  Mpi2PortEnableReply_t, MPI2_POINTER pMpi2PortEnableReply_t;
+
+
+/****************************************************************************
+*  EventNotification message
+****************************************************************************/
+
+/* EventNotification Request message */
+#define MPI2_EVENT_NOTIFY_EVENTMASK_WORDS           (4)
+
+typedef struct _MPI2_EVENT_NOTIFICATION_REQUEST
+{
+    U16                     Reserved1;                      /* 0x00 */
+    U8                      ChainOffset;                    /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     Reserved2;                      /* 0x04 */
+    U8                      Reserved3;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved4;                      /* 0x0A */
+    U32                     Reserved5;                      /* 0x0C */
+    U32                     Reserved6;                      /* 0x10 */
+    U32                     EventMasks[MPI2_EVENT_NOTIFY_EVENTMASK_WORDS];/* 0x14 */
+    U16                     SASBroadcastPrimitiveMasks;     /* 0x24 */
+    U16                     SASNotifyPrimitiveMasks;        /* 0x26 */
+    U32                     Reserved8;                      /* 0x28 */
+} MPI2_EVENT_NOTIFICATION_REQUEST,
+  MPI2_POINTER PTR_MPI2_EVENT_NOTIFICATION_REQUEST,
+  Mpi2EventNotificationRequest_t, MPI2_POINTER pMpi2EventNotificationRequest_t;
+
+
+/* EventNotification Reply message */
+typedef struct _MPI2_EVENT_NOTIFICATION_REPLY
+{
+    U16                     EventDataLength;                /* 0x00 */
+    U8                      MsgLength;                      /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     Reserved1;                      /* 0x04 */
+    U8                      AckRequired;                    /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved2;                      /* 0x0A */
+    U16                     Reserved3;                      /* 0x0C */
+    U16                     IOCStatus;                      /* 0x0E */
+    U32                     IOCLogInfo;                     /* 0x10 */
+    U16                     Event;                          /* 0x14 */
+    U16                     Reserved4;                      /* 0x16 */
+    U32                     EventContext;                   /* 0x18 */
+    U32                     EventData[1];                   /* 0x1C */
+} MPI2_EVENT_NOTIFICATION_REPLY, MPI2_POINTER PTR_MPI2_EVENT_NOTIFICATION_REPLY,
+  Mpi2EventNotificationReply_t, MPI2_POINTER pMpi2EventNotificationReply_t;
+
+/* AckRequired */
+#define MPI2_EVENT_NOTIFICATION_ACK_NOT_REQUIRED    (0x00)
+#define MPI2_EVENT_NOTIFICATION_ACK_REQUIRED        (0x01)
+
+/* Event */
+#define MPI2_EVENT_LOG_DATA                         (0x0001)
+#define MPI2_EVENT_STATE_CHANGE                     (0x0002)
+#define MPI2_EVENT_HARD_RESET_RECEIVED              (0x0005)
+#define MPI2_EVENT_EVENT_CHANGE                     (0x000A)
+#define MPI2_EVENT_TASK_SET_FULL                    (0x000E) /* obsolete */
+#define MPI2_EVENT_SAS_DEVICE_STATUS_CHANGE         (0x000F)
+#define MPI2_EVENT_IR_OPERATION_STATUS              (0x0014)
+#define MPI2_EVENT_SAS_DISCOVERY                    (0x0016)
+#define MPI2_EVENT_SAS_BROADCAST_PRIMITIVE          (0x0017)
+#define MPI2_EVENT_SAS_INIT_DEVICE_STATUS_CHANGE    (0x0018)
+#define MPI2_EVENT_SAS_INIT_TABLE_OVERFLOW          (0x0019)
+#define MPI2_EVENT_SAS_TOPOLOGY_CHANGE_LIST         (0x001C)
+#define MPI2_EVENT_SAS_ENCL_DEVICE_STATUS_CHANGE    (0x001D)
+#define MPI2_EVENT_ENCL_DEVICE_STATUS_CHANGE        (0x001D) /* MPI v2.6 and later */
+#define MPI2_EVENT_IR_VOLUME                        (0x001E)
+#define MPI2_EVENT_IR_PHYSICAL_DISK                 (0x001F)
+#define MPI2_EVENT_IR_CONFIGURATION_CHANGE_LIST     (0x0020)
+#define MPI2_EVENT_LOG_ENTRY_ADDED                  (0x0021)
+#define MPI2_EVENT_SAS_PHY_COUNTER                  (0x0022)
+#define MPI2_EVENT_GPIO_INTERRUPT                   (0x0023)
+#define MPI2_EVENT_HOST_BASED_DISCOVERY_PHY         (0x0024)
+#define MPI2_EVENT_SAS_QUIESCE                      (0x0025)
+#define MPI2_EVENT_SAS_NOTIFY_PRIMITIVE             (0x0026)
+#define MPI2_EVENT_TEMP_THRESHOLD                   (0x0027)
+#define MPI2_EVENT_HOST_MESSAGE                     (0x0028)
+#define MPI2_EVENT_POWER_PERFORMANCE_CHANGE         (0x0029)
+#define MPI2_EVENT_PCIE_DEVICE_STATUS_CHANGE        (0x0030) /* MPI v2.6 and later */
+#define MPI2_EVENT_PCIE_ENUMERATION                 (0x0031) /* MPI v2.6 and later */
+#define MPI2_EVENT_PCIE_TOPOLOGY_CHANGE_LIST        (0x0032) /* MPI v2.6 and later */
+#define MPI2_EVENT_PCIE_LINK_COUNTER                (0x0033) /* MPI v2.6 and later */
+#define MPI2_EVENT_ACTIVE_CABLE_EXCEPTION           (0x0034) /* MPI v2.6 and later */
+#define MPI2_EVENT_SAS_DEVICE_DISCOVERY_ERROR       (0x0035) /* MPI v2.5 and later */
+#define MPI2_EVENT_MIN_PRODUCT_SPECIFIC             (0x006E)
+#define MPI2_EVENT_MAX_PRODUCT_SPECIFIC             (0x007F)
+
+
+/* Log Entry Added Event data */
+
+/* the following structure matches MPI2_LOG_0_ENTRY in mpi2_cnfg.h */
+#define MPI2_EVENT_DATA_LOG_DATA_LENGTH             (0x1C)
+
+typedef struct _MPI2_EVENT_DATA_LOG_ENTRY_ADDED
+{
+    U64         TimeStamp;                          /* 0x00 */
+    U32         Reserved1;                          /* 0x08 */
+    U16         LogSequence;                        /* 0x0C */
+    U16         LogEntryQualifier;                  /* 0x0E */
+    U8          VP_ID;                              /* 0x10 */
+    U8          VF_ID;                              /* 0x11 */
+    U16         Reserved2;                          /* 0x12 */
+    U8          LogData[MPI2_EVENT_DATA_LOG_DATA_LENGTH];/* 0x14 */
+} MPI2_EVENT_DATA_LOG_ENTRY_ADDED,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_LOG_ENTRY_ADDED,
+  Mpi2EventDataLogEntryAdded_t, MPI2_POINTER pMpi2EventDataLogEntryAdded_t;
+
+
+/* GPIO Interrupt Event data */
+
+typedef struct _MPI2_EVENT_DATA_GPIO_INTERRUPT
+{
+    U8          GPIONum;                            /* 0x00 */
+    U8          Reserved1;                          /* 0x01 */
+    U16         Reserved2;                          /* 0x02 */
+} MPI2_EVENT_DATA_GPIO_INTERRUPT,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_GPIO_INTERRUPT,
+  Mpi2EventDataGpioInterrupt_t, MPI2_POINTER pMpi2EventDataGpioInterrupt_t;
+
+
+/* Temperature Threshold Event data */
+
+typedef struct _MPI2_EVENT_DATA_TEMPERATURE
+{
+    U16         Status;                             /* 0x00 */
+    U8          SensorNum;                          /* 0x02 */
+    U8          Reserved1;                          /* 0x03 */
+    U16         CurrentTemperature;                 /* 0x04 */
+    U16         Reserved2;                          /* 0x06 */
+    U32         Reserved3;                          /* 0x08 */
+    U32         Reserved4;                          /* 0x0C */
+} MPI2_EVENT_DATA_TEMPERATURE,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_TEMPERATURE,
+  Mpi2EventDataTemperature_t, MPI2_POINTER pMpi2EventDataTemperature_t;
+
+/* Temperature Threshold Event data Status bits */
+#define MPI2_EVENT_TEMPERATURE3_EXCEEDED            (0x0008)
+#define MPI2_EVENT_TEMPERATURE2_EXCEEDED            (0x0004)
+#define MPI2_EVENT_TEMPERATURE1_EXCEEDED            (0x0002)
+#define MPI2_EVENT_TEMPERATURE0_EXCEEDED            (0x0001)
+
+
+/* Host Message Event data */
+
+typedef struct _MPI2_EVENT_DATA_HOST_MESSAGE
+{
+    U8          SourceVF_ID;                        /* 0x00 */
+    U8          Reserved1;                          /* 0x01 */
+    U16         Reserved2;                          /* 0x02 */
+    U32         Reserved3;                          /* 0x04 */
+    U32         HostData[1];                        /* 0x08 */
+} MPI2_EVENT_DATA_HOST_MESSAGE, MPI2_POINTER PTR_MPI2_EVENT_DATA_HOST_MESSAGE,
+  Mpi2EventDataHostMessage_t, MPI2_POINTER pMpi2EventDataHostMessage_t;
+
+
+/* Power Performance Change Event data */
+
+typedef struct _MPI2_EVENT_DATA_POWER_PERF_CHANGE
+{
+    U8          CurrentPowerMode;                   /* 0x00 */
+    U8          PreviousPowerMode;                  /* 0x01 */
+    U16         Reserved1;                          /* 0x02 */
+} MPI2_EVENT_DATA_POWER_PERF_CHANGE,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_POWER_PERF_CHANGE,
+  Mpi2EventDataPowerPerfChange_t, MPI2_POINTER pMpi2EventDataPowerPerfChange_t;
+
+/* defines for CurrentPowerMode and PreviousPowerMode fields */
+#define MPI2_EVENT_PM_INIT_MASK              (0xC0)
+#define MPI2_EVENT_PM_INIT_UNAVAILABLE       (0x00)
+#define MPI2_EVENT_PM_INIT_HOST              (0x40)
+#define MPI2_EVENT_PM_INIT_IO_UNIT           (0x80)
+#define MPI2_EVENT_PM_INIT_PCIE_DPA          (0xC0)
+
+#define MPI2_EVENT_PM_MODE_MASK              (0x07)
+#define MPI2_EVENT_PM_MODE_UNAVAILABLE       (0x00)
+#define MPI2_EVENT_PM_MODE_UNKNOWN           (0x01)
+#define MPI2_EVENT_PM_MODE_FULL_POWER        (0x04)
+#define MPI2_EVENT_PM_MODE_REDUCED_POWER     (0x05)
+#define MPI2_EVENT_PM_MODE_STANDBY           (0x06)
+
+
+/* Active Cable Exception Event data */
+
+typedef struct _MPI26_EVENT_DATA_ACTIVE_CABLE_EXCEPT
+{
+    U32         ActiveCablePowerRequirement;        /* 0x00 */
+    U8          ReasonCode;                         /* 0x04 */
+    U8          ReceptacleID;                       /* 0x05 */
+    U16         Reserved1;                          /* 0x06 */
+} MPI25_EVENT_DATA_ACTIVE_CABLE_EXCEPT,
+  MPI2_POINTER PTR_MPI25_EVENT_DATA_ACTIVE_CABLE_EXCEPT,
+  Mpi25EventDataActiveCableExcept_t,
+  MPI2_POINTER pMpi25EventDataActiveCableExcept_t,
+  MPI26_EVENT_DATA_ACTIVE_CABLE_EXCEPT,
+  MPI2_POINTER PTR_MPI26_EVENT_DATA_ACTIVE_CABLE_EXCEPT,
+  Mpi26EventDataActiveCableExcept_t,
+  MPI2_POINTER pMpi26EventDataActiveCableExcept_t;
+
+/* MPI2.5 defines for the ReasonCode field */
+#define MPI25_EVENT_ACTIVE_CABLE_INSUFFICIENT_POWER     (0x00)
+#define MPI25_EVENT_ACTIVE_CABLE_PRESENT                (0x01)
+#define MPI25_EVENT_ACTIVE_CABLE_DEGRADED               (0x02)
+
+/* MPI2.6 defines for the ReasonCode field */
+#define MPI26_EVENT_ACTIVE_CABLE_INSUFFICIENT_POWER     (0x00)
+#define MPI26_EVENT_ACTIVE_CABLE_PRESENT                (0x01)
+#define MPI26_EVENT_ACTIVE_CABLE_DEGRADED               (0x02)
+
+/* Hard Reset Received Event data */
+
+typedef struct _MPI2_EVENT_DATA_HARD_RESET_RECEIVED
+{
+    U8                      Reserved1;                      /* 0x00 */
+    U8                      Port;                           /* 0x01 */
+    U16                     Reserved2;                      /* 0x02 */
+} MPI2_EVENT_DATA_HARD_RESET_RECEIVED,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_HARD_RESET_RECEIVED,
+  Mpi2EventDataHardResetReceived_t,
+  MPI2_POINTER pMpi2EventDataHardResetReceived_t;
+
+
+/* Task Set Full Event data */
+/*   this event is obsolete */
+
+typedef struct _MPI2_EVENT_DATA_TASK_SET_FULL
+{
+    U16                     DevHandle;                      /* 0x00 */
+    U16                     CurrentDepth;                   /* 0x02 */
+} MPI2_EVENT_DATA_TASK_SET_FULL, MPI2_POINTER PTR_MPI2_EVENT_DATA_TASK_SET_FULL,
+  Mpi2EventDataTaskSetFull_t, MPI2_POINTER pMpi2EventDataTaskSetFull_t;
+
+
+/* SAS Device Status Change Event data */
+
+typedef struct _MPI2_EVENT_DATA_SAS_DEVICE_STATUS_CHANGE
+{
+    U16                     TaskTag;                        /* 0x00 */
+    U8                      ReasonCode;                     /* 0x02 */
+    U8                      PhysicalPort;                   /* 0x03 */
+    U8                      ASC;                            /* 0x04 */
+    U8                      ASCQ;                           /* 0x05 */
+    U16                     DevHandle;                      /* 0x06 */
+    U32                     Reserved2;                      /* 0x08 */
+    U64                     SASAddress;                     /* 0x0C */
+    U8                      LUN[8];                         /* 0x14 */
+} MPI2_EVENT_DATA_SAS_DEVICE_STATUS_CHANGE,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_DEVICE_STATUS_CHANGE,
+  Mpi2EventDataSasDeviceStatusChange_t,
+  MPI2_POINTER pMpi2EventDataSasDeviceStatusChange_t;
+
+/* SAS Device Status Change Event data ReasonCode values */
+#define MPI2_EVENT_SAS_DEV_STAT_RC_SMART_DATA                           (0x05)
+#define MPI2_EVENT_SAS_DEV_STAT_RC_UNSUPPORTED                          (0x07)
+#define MPI2_EVENT_SAS_DEV_STAT_RC_INTERNAL_DEVICE_RESET                (0x08)
+#define MPI2_EVENT_SAS_DEV_STAT_RC_TASK_ABORT_INTERNAL                  (0x09)
+#define MPI2_EVENT_SAS_DEV_STAT_RC_ABORT_TASK_SET_INTERNAL              (0x0A)
+#define MPI2_EVENT_SAS_DEV_STAT_RC_CLEAR_TASK_SET_INTERNAL              (0x0B)
+#define MPI2_EVENT_SAS_DEV_STAT_RC_QUERY_TASK_INTERNAL                  (0x0C)
+#define MPI2_EVENT_SAS_DEV_STAT_RC_ASYNC_NOTIFICATION                   (0x0D)
+#define MPI2_EVENT_SAS_DEV_STAT_RC_CMP_INTERNAL_DEV_RESET               (0x0E)
+#define MPI2_EVENT_SAS_DEV_STAT_RC_CMP_TASK_ABORT_INTERNAL              (0x0F)
+#define MPI2_EVENT_SAS_DEV_STAT_RC_SATA_INIT_FAILURE                    (0x10)
+#define MPI2_EVENT_SAS_DEV_STAT_RC_EXPANDER_REDUCED_FUNCTIONALITY       (0x11)
+#define MPI2_EVENT_SAS_DEV_STAT_RC_CMP_EXPANDER_REDUCED_FUNCTIONALITY   (0x12)
+
+
+/* Integrated RAID Operation Status Event data */
+
+typedef struct _MPI2_EVENT_DATA_IR_OPERATION_STATUS
+{
+    U16                     VolDevHandle;               /* 0x00 */
+    U16                     Reserved1;                  /* 0x02 */
+    U8                      RAIDOperation;              /* 0x04 */
+    U8                      PercentComplete;            /* 0x05 */
+    U16                     Reserved2;                  /* 0x06 */
+    U32                     ElapsedSeconds;             /* 0x08 */
+} MPI2_EVENT_DATA_IR_OPERATION_STATUS,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_IR_OPERATION_STATUS,
+  Mpi2EventDataIrOperationStatus_t,
+  MPI2_POINTER pMpi2EventDataIrOperationStatus_t;
+
+/* Integrated RAID Operation Status Event data RAIDOperation values */
+#define MPI2_EVENT_IR_RAIDOP_RESYNC                     (0x00)
+#define MPI2_EVENT_IR_RAIDOP_ONLINE_CAP_EXPANSION       (0x01)
+#define MPI2_EVENT_IR_RAIDOP_CONSISTENCY_CHECK          (0x02)
+#define MPI2_EVENT_IR_RAIDOP_BACKGROUND_INIT            (0x03)
+#define MPI2_EVENT_IR_RAIDOP_MAKE_DATA_CONSISTENT       (0x04)
+
+
+/* Integrated RAID Volume Event data */
+
+typedef struct _MPI2_EVENT_DATA_IR_VOLUME
+{
+    U16                     VolDevHandle;               /* 0x00 */
+    U8                      ReasonCode;                 /* 0x02 */
+    U8                      Reserved1;                  /* 0x03 */
+    U32                     NewValue;                   /* 0x04 */
+    U32                     PreviousValue;              /* 0x08 */
+} MPI2_EVENT_DATA_IR_VOLUME, MPI2_POINTER PTR_MPI2_EVENT_DATA_IR_VOLUME,
+  Mpi2EventDataIrVolume_t, MPI2_POINTER pMpi2EventDataIrVolume_t;
+
+/* Integrated RAID Volume Event data ReasonCode values */
+#define MPI2_EVENT_IR_VOLUME_RC_SETTINGS_CHANGED        (0x01)
+#define MPI2_EVENT_IR_VOLUME_RC_STATUS_FLAGS_CHANGED    (0x02)
+#define MPI2_EVENT_IR_VOLUME_RC_STATE_CHANGED           (0x03)
+
+
+/* Integrated RAID Physical Disk Event data */
+
+typedef struct _MPI2_EVENT_DATA_IR_PHYSICAL_DISK
+{
+    U16                     Reserved1;                  /* 0x00 */
+    U8                      ReasonCode;                 /* 0x02 */
+    U8                      PhysDiskNum;                /* 0x03 */
+    U16                     PhysDiskDevHandle;          /* 0x04 */
+    U16                     Reserved2;                  /* 0x06 */
+    U16                     Slot;                       /* 0x08 */
+    U16                     EnclosureHandle;            /* 0x0A */
+    U32                     NewValue;                   /* 0x0C */
+    U32                     PreviousValue;              /* 0x10 */
+} MPI2_EVENT_DATA_IR_PHYSICAL_DISK,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_IR_PHYSICAL_DISK,
+  Mpi2EventDataIrPhysicalDisk_t, MPI2_POINTER pMpi2EventDataIrPhysicalDisk_t;
+
+/* Integrated RAID Physical Disk Event data ReasonCode values */
+#define MPI2_EVENT_IR_PHYSDISK_RC_SETTINGS_CHANGED      (0x01)
+#define MPI2_EVENT_IR_PHYSDISK_RC_STATUS_FLAGS_CHANGED  (0x02)
+#define MPI2_EVENT_IR_PHYSDISK_RC_STATE_CHANGED         (0x03)
+
+
+/* Integrated RAID Configuration Change List Event data */
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check NumElements at runtime.
+ */
+#ifndef MPI2_EVENT_IR_CONFIG_ELEMENT_COUNT
+#define MPI2_EVENT_IR_CONFIG_ELEMENT_COUNT          (1)
+#endif
+
+typedef struct _MPI2_EVENT_IR_CONFIG_ELEMENT
+{
+    U16                     ElementFlags;               /* 0x00 */
+    U16                     VolDevHandle;               /* 0x02 */
+    U8                      ReasonCode;                 /* 0x04 */
+    U8                      PhysDiskNum;                /* 0x05 */
+    U16                     PhysDiskDevHandle;          /* 0x06 */
+} MPI2_EVENT_IR_CONFIG_ELEMENT, MPI2_POINTER PTR_MPI2_EVENT_IR_CONFIG_ELEMENT,
+  Mpi2EventIrConfigElement_t, MPI2_POINTER pMpi2EventIrConfigElement_t;
+
+/* IR Configuration Change List Event data ElementFlags values */
+#define MPI2_EVENT_IR_CHANGE_EFLAGS_ELEMENT_TYPE_MASK   (0x000F)
+#define MPI2_EVENT_IR_CHANGE_EFLAGS_VOLUME_ELEMENT      (0x0000)
+#define MPI2_EVENT_IR_CHANGE_EFLAGS_VOLPHYSDISK_ELEMENT (0x0001)
+#define MPI2_EVENT_IR_CHANGE_EFLAGS_HOTSPARE_ELEMENT    (0x0002)
+
+/* IR Configuration Change List Event data ReasonCode values */
+#define MPI2_EVENT_IR_CHANGE_RC_ADDED                   (0x01)
+#define MPI2_EVENT_IR_CHANGE_RC_REMOVED                 (0x02)
+#define MPI2_EVENT_IR_CHANGE_RC_NO_CHANGE               (0x03)
+#define MPI2_EVENT_IR_CHANGE_RC_HIDE                    (0x04)
+#define MPI2_EVENT_IR_CHANGE_RC_UNHIDE                  (0x05)
+#define MPI2_EVENT_IR_CHANGE_RC_VOLUME_CREATED          (0x06)
+#define MPI2_EVENT_IR_CHANGE_RC_VOLUME_DELETED          (0x07)
+#define MPI2_EVENT_IR_CHANGE_RC_PD_CREATED              (0x08)
+#define MPI2_EVENT_IR_CHANGE_RC_PD_DELETED              (0x09)
+
+typedef struct _MPI2_EVENT_DATA_IR_CONFIG_CHANGE_LIST
+{
+    U8                              NumElements;        /* 0x00 */
+    U8                              Reserved1;          /* 0x01 */
+    U8                              Reserved2;          /* 0x02 */
+    U8                              ConfigNum;          /* 0x03 */
+    U32                             Flags;              /* 0x04 */
+    MPI2_EVENT_IR_CONFIG_ELEMENT    ConfigElement[MPI2_EVENT_IR_CONFIG_ELEMENT_COUNT];    /* 0x08 */
+} MPI2_EVENT_DATA_IR_CONFIG_CHANGE_LIST,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_IR_CONFIG_CHANGE_LIST,
+  Mpi2EventDataIrConfigChangeList_t,
+  MPI2_POINTER pMpi2EventDataIrConfigChangeList_t;
+
+/* IR Configuration Change List Event data Flags values */
+#define MPI2_EVENT_IR_CHANGE_FLAGS_FOREIGN_CONFIG   (0x00000001)
+
+
+/* SAS Discovery Event data */
+
+typedef struct _MPI2_EVENT_DATA_SAS_DISCOVERY
+{
+    U8                      Flags;                      /* 0x00 */
+    U8                      ReasonCode;                 /* 0x01 */
+    U8                      PhysicalPort;               /* 0x02 */
+    U8                      Reserved1;                  /* 0x03 */
+    U32                     DiscoveryStatus;            /* 0x04 */
+} MPI2_EVENT_DATA_SAS_DISCOVERY,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_DISCOVERY,
+  Mpi2EventDataSasDiscovery_t, MPI2_POINTER pMpi2EventDataSasDiscovery_t;
+
+/* SAS Discovery Event data Flags values */
+#define MPI2_EVENT_SAS_DISC_DEVICE_CHANGE                   (0x02)
+#define MPI2_EVENT_SAS_DISC_IN_PROGRESS                     (0x01)
+
+/* SAS Discovery Event data ReasonCode values */
+#define MPI2_EVENT_SAS_DISC_RC_STARTED                      (0x01)
+#define MPI2_EVENT_SAS_DISC_RC_COMPLETED                    (0x02)
+
+/* SAS Discovery Event data DiscoveryStatus values */
+#define MPI2_EVENT_SAS_DISC_DS_MAX_ENCLOSURES_EXCEED            (0x80000000)
+#define MPI2_EVENT_SAS_DISC_DS_MAX_EXPANDERS_EXCEED             (0x40000000)
+#define MPI2_EVENT_SAS_DISC_DS_MAX_DEVICES_EXCEED               (0x20000000)
+#define MPI2_EVENT_SAS_DISC_DS_MAX_TOPO_PHYS_EXCEED             (0x10000000)
+#define MPI2_EVENT_SAS_DISC_DS_DOWNSTREAM_INITIATOR             (0x08000000)
+#define MPI2_EVENT_SAS_DISC_DS_MULTI_SUBTRACTIVE_SUBTRACTIVE    (0x00008000)
+#define MPI2_EVENT_SAS_DISC_DS_EXP_MULTI_SUBTRACTIVE            (0x00004000)
+#define MPI2_EVENT_SAS_DISC_DS_MULTI_PORT_DOMAIN                (0x00002000)
+#define MPI2_EVENT_SAS_DISC_DS_TABLE_TO_SUBTRACTIVE_LINK        (0x00001000)
+#define MPI2_EVENT_SAS_DISC_DS_UNSUPPORTED_DEVICE               (0x00000800)
+#define MPI2_EVENT_SAS_DISC_DS_TABLE_LINK                       (0x00000400)
+#define MPI2_EVENT_SAS_DISC_DS_SUBTRACTIVE_LINK                 (0x00000200)
+#define MPI2_EVENT_SAS_DISC_DS_SMP_CRC_ERROR                    (0x00000100)
+#define MPI2_EVENT_SAS_DISC_DS_SMP_FUNCTION_FAILED              (0x00000080)
+#define MPI2_EVENT_SAS_DISC_DS_INDEX_NOT_EXIST                  (0x00000040)
+#define MPI2_EVENT_SAS_DISC_DS_OUT_ROUTE_ENTRIES                (0x00000020)
+#define MPI2_EVENT_SAS_DISC_DS_SMP_TIMEOUT                      (0x00000010)
+#define MPI2_EVENT_SAS_DISC_DS_MULTIPLE_PORTS                   (0x00000004)
+#define MPI2_EVENT_SAS_DISC_DS_UNADDRESSABLE_DEVICE             (0x00000002)
+#define MPI2_EVENT_SAS_DISC_DS_LOOP_DETECTED                    (0x00000001)
+
+
+/* SAS Broadcast Primitive Event data */
+
+typedef struct _MPI2_EVENT_DATA_SAS_BROADCAST_PRIMITIVE
+{
+    U8                      PhyNum;                     /* 0x00 */
+    U8                      Port;                       /* 0x01 */
+    U8                      PortWidth;                  /* 0x02 */
+    U8                      Primitive;                  /* 0x03 */
+} MPI2_EVENT_DATA_SAS_BROADCAST_PRIMITIVE,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_BROADCAST_PRIMITIVE,
+  Mpi2EventDataSasBroadcastPrimitive_t,
+  MPI2_POINTER pMpi2EventDataSasBroadcastPrimitive_t;
+
+/* defines for the Primitive field */
+#define MPI2_EVENT_PRIMITIVE_CHANGE                         (0x01)
+#define MPI2_EVENT_PRIMITIVE_SES                            (0x02)
+#define MPI2_EVENT_PRIMITIVE_EXPANDER                       (0x03)
+#define MPI2_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT             (0x04)
+#define MPI2_EVENT_PRIMITIVE_RESERVED3                      (0x05)
+#define MPI2_EVENT_PRIMITIVE_RESERVED4                      (0x06)
+#define MPI2_EVENT_PRIMITIVE_CHANGE0_RESERVED               (0x07)
+#define MPI2_EVENT_PRIMITIVE_CHANGE1_RESERVED               (0x08)
+
+
+/* SAS Notify Primitive Event data */
+
+typedef struct _MPI2_EVENT_DATA_SAS_NOTIFY_PRIMITIVE
+{
+    U8                      PhyNum;                     /* 0x00 */
+    U8                      Port;                       /* 0x01 */
+    U8                      Reserved1;                  /* 0x02 */
+    U8                      Primitive;                  /* 0x03 */
+} MPI2_EVENT_DATA_SAS_NOTIFY_PRIMITIVE,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_NOTIFY_PRIMITIVE,
+  Mpi2EventDataSasNotifyPrimitive_t,
+  MPI2_POINTER pMpi2EventDataSasNotifyPrimitive_t;
+
+/* defines for the Primitive field */
+#define MPI2_EVENT_NOTIFY_ENABLE_SPINUP                     (0x01)
+#define MPI2_EVENT_NOTIFY_POWER_LOSS_EXPECTED               (0x02)
+#define MPI2_EVENT_NOTIFY_RESERVED1                         (0x03)
+#define MPI2_EVENT_NOTIFY_RESERVED2                         (0x04)
+
+
+/* SAS Initiator Device Status Change Event data */
+
+typedef struct _MPI2_EVENT_DATA_SAS_INIT_DEV_STATUS_CHANGE
+{
+    U8                      ReasonCode;                 /* 0x00 */
+    U8                      PhysicalPort;               /* 0x01 */
+    U16                     DevHandle;                  /* 0x02 */
+    U64                     SASAddress;                 /* 0x04 */
+} MPI2_EVENT_DATA_SAS_INIT_DEV_STATUS_CHANGE,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_INIT_DEV_STATUS_CHANGE,
+  Mpi2EventDataSasInitDevStatusChange_t,
+  MPI2_POINTER pMpi2EventDataSasInitDevStatusChange_t;
+
+/* SAS Initiator Device Status Change event ReasonCode values */
+#define MPI2_EVENT_SAS_INIT_RC_ADDED                (0x01)
+#define MPI2_EVENT_SAS_INIT_RC_NOT_RESPONDING       (0x02)
+
+
+/* SAS Initiator Device Table Overflow Event data */
+
+typedef struct _MPI2_EVENT_DATA_SAS_INIT_TABLE_OVERFLOW
+{
+    U16                     MaxInit;                    /* 0x00 */
+    U16                     CurrentInit;                /* 0x02 */
+    U64                     SASAddress;                 /* 0x04 */
+} MPI2_EVENT_DATA_SAS_INIT_TABLE_OVERFLOW,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_INIT_TABLE_OVERFLOW,
+  Mpi2EventDataSasInitTableOverflow_t,
+  MPI2_POINTER pMpi2EventDataSasInitTableOverflow_t;
+
+
+/* SAS Topology Change List Event data */
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check NumEntries at runtime.
+ */
+#ifndef MPI2_EVENT_SAS_TOPO_PHY_COUNT
+#define MPI2_EVENT_SAS_TOPO_PHY_COUNT           (1)
+#endif
+
+typedef struct _MPI2_EVENT_SAS_TOPO_PHY_ENTRY
+{
+    U16                     AttachedDevHandle;          /* 0x00 */
+    U8                      LinkRate;                   /* 0x02 */
+    U8                      PhyStatus;                  /* 0x03 */
+} MPI2_EVENT_SAS_TOPO_PHY_ENTRY, MPI2_POINTER PTR_MPI2_EVENT_SAS_TOPO_PHY_ENTRY,
+  Mpi2EventSasTopoPhyEntry_t, MPI2_POINTER pMpi2EventSasTopoPhyEntry_t;
+
+typedef struct _MPI2_EVENT_DATA_SAS_TOPOLOGY_CHANGE_LIST
+{
+    U16                             EnclosureHandle;            /* 0x00 */
+    U16                             ExpanderDevHandle;          /* 0x02 */
+    U8                              NumPhys;                    /* 0x04 */
+    U8                              Reserved1;                  /* 0x05 */
+    U16                             Reserved2;                  /* 0x06 */
+    U8                              NumEntries;                 /* 0x08 */
+    U8                              StartPhyNum;                /* 0x09 */
+    U8                              ExpStatus;                  /* 0x0A */
+    U8                              PhysicalPort;               /* 0x0B */
+    MPI2_EVENT_SAS_TOPO_PHY_ENTRY   PHY[MPI2_EVENT_SAS_TOPO_PHY_COUNT]; /* 0x0C*/
+} MPI2_EVENT_DATA_SAS_TOPOLOGY_CHANGE_LIST,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_TOPOLOGY_CHANGE_LIST,
+  Mpi2EventDataSasTopologyChangeList_t,
+  MPI2_POINTER pMpi2EventDataSasTopologyChangeList_t;
+
+/* values for the ExpStatus field */
+#define MPI2_EVENT_SAS_TOPO_ES_NO_EXPANDER                  (0x00)
+#define MPI2_EVENT_SAS_TOPO_ES_ADDED                        (0x01)
+#define MPI2_EVENT_SAS_TOPO_ES_NOT_RESPONDING               (0x02)
+#define MPI2_EVENT_SAS_TOPO_ES_RESPONDING                   (0x03)
+#define MPI2_EVENT_SAS_TOPO_ES_DELAY_NOT_RESPONDING         (0x04)
+
+/* defines for the LinkRate field */
+#define MPI2_EVENT_SAS_TOPO_LR_CURRENT_MASK                 (0xF0)
+#define MPI2_EVENT_SAS_TOPO_LR_CURRENT_SHIFT                (4)
+#define MPI2_EVENT_SAS_TOPO_LR_PREV_MASK                    (0x0F)
+#define MPI2_EVENT_SAS_TOPO_LR_PREV_SHIFT                   (0)
+
+#define MPI2_EVENT_SAS_TOPO_LR_UNKNOWN_LINK_RATE            (0x00)
+#define MPI2_EVENT_SAS_TOPO_LR_PHY_DISABLED                 (0x01)
+#define MPI2_EVENT_SAS_TOPO_LR_NEGOTIATION_FAILED           (0x02)
+#define MPI2_EVENT_SAS_TOPO_LR_SATA_OOB_COMPLETE            (0x03)
+#define MPI2_EVENT_SAS_TOPO_LR_PORT_SELECTOR                (0x04)
+#define MPI2_EVENT_SAS_TOPO_LR_SMP_RESET_IN_PROGRESS        (0x05)
+#define MPI2_EVENT_SAS_TOPO_LR_UNSUPPORTED_PHY              (0x06)
+#define MPI2_EVENT_SAS_TOPO_LR_RATE_1_5                     (0x08)
+#define MPI2_EVENT_SAS_TOPO_LR_RATE_3_0                     (0x09)
+#define MPI2_EVENT_SAS_TOPO_LR_RATE_6_0                     (0x0A)
+#define MPI25_EVENT_SAS_TOPO_LR_RATE_12_0                   (0x0B)
+#define MPI26_EVENT_SAS_TOPO_LR_RATE_22_5                   (0x0C)
+
+/* values for the PhyStatus field */
+#define MPI2_EVENT_SAS_TOPO_PHYSTATUS_VACANT                (0x80)
+#define MPI2_EVENT_SAS_TOPO_PS_MULTIPLEX_CHANGE             (0x10)
+/* values for the PhyStatus ReasonCode sub-field */
+#define MPI2_EVENT_SAS_TOPO_RC_MASK                         (0x0F)
+#define MPI2_EVENT_SAS_TOPO_RC_TARG_ADDED                   (0x01)
+#define MPI2_EVENT_SAS_TOPO_RC_TARG_NOT_RESPONDING          (0x02)
+#define MPI2_EVENT_SAS_TOPO_RC_PHY_CHANGED                  (0x03)
+#define MPI2_EVENT_SAS_TOPO_RC_NO_CHANGE                    (0x04)
+#define MPI2_EVENT_SAS_TOPO_RC_DELAY_NOT_RESPONDING         (0x05)
+
+
+/* SAS Enclosure Device Status Change Event data */
+
+typedef struct _MPI2_EVENT_DATA_SAS_ENCL_DEV_STATUS_CHANGE
+{
+    U16                     EnclosureHandle;            /* 0x00 */
+    U8                      ReasonCode;                 /* 0x02 */
+    U8                      PhysicalPort;               /* 0x03 */
+    U64                     EnclosureLogicalID;         /* 0x04 */
+    U16                     NumSlots;                   /* 0x0C */
+    U16                     StartSlot;                  /* 0x0E */
+    U32                     PhyBits;                    /* 0x10 */
+} MPI2_EVENT_DATA_SAS_ENCL_DEV_STATUS_CHANGE,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_ENCL_DEV_STATUS_CHANGE,
+  Mpi2EventDataSasEnclDevStatusChange_t,
+  MPI2_POINTER pMpi2EventDataSasEnclDevStatusChange_t,
+  MPI26_EVENT_DATA_ENCL_DEV_STATUS_CHANGE,
+  MPI2_POINTER PTR_MPI26_EVENT_DATA_ENCL_DEV_STATUS_CHANGE,
+  Mpi26EventDataEnclDevStatusChange_t,
+  MPI2_POINTER pMpi26EventDataEnclDevStatusChange_t;
+
+/* SAS Enclosure Device Status Change event ReasonCode values */
+#define MPI2_EVENT_SAS_ENCL_RC_ADDED                (0x01)
+#define MPI2_EVENT_SAS_ENCL_RC_NOT_RESPONDING       (0x02)
+
+/* Enclosure Device Status Change event ReasonCode values */
+#define MPI26_EVENT_ENCL_RC_ADDED                   (0x01)
+#define MPI26_EVENT_ENCL_RC_NOT_RESPONDING          (0x02)
+
+/* SAS PHY Counter Event data */
+
+typedef struct _MPI2_EVENT_DATA_SAS_PHY_COUNTER
+{
+    U64         TimeStamp;          /* 0x00 */
+    U32         Reserved1;          /* 0x08 */
+    U8          PhyEventCode;       /* 0x0C */
+    U8          PhyNum;             /* 0x0D */
+    U16         Reserved2;          /* 0x0E */
+    U32         PhyEventInfo;       /* 0x10 */
+    U8          CounterType;        /* 0x14 */
+    U8          ThresholdWindow;    /* 0x15 */
+    U8          TimeUnits;          /* 0x16 */
+    U8          Reserved3;          /* 0x17 */
+    U32         EventThreshold;     /* 0x18 */
+    U16         ThresholdFlags;     /* 0x1C */
+    U16         Reserved4;          /* 0x1E */
+} MPI2_EVENT_DATA_SAS_PHY_COUNTER,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_PHY_COUNTER,
+  Mpi2EventDataSasPhyCounter_t, MPI2_POINTER pMpi2EventDataSasPhyCounter_t;
+
+/* use MPI2_SASPHY3_EVENT_CODE_ values from mpi2_cnfg.h for the PhyEventCode field */
+
+/* use MPI2_SASPHY3_COUNTER_TYPE_ values from mpi2_cnfg.h for the CounterType field */
+
+/* use MPI2_SASPHY3_TIME_UNITS_ values from mpi2_cnfg.h for the TimeUnits field */
+
+/* use MPI2_SASPHY3_TFLAGS_ values from mpi2_cnfg.h for the ThresholdFlags field */
+
+
+/* SAS Quiesce Event data */
+
+typedef struct _MPI2_EVENT_DATA_SAS_QUIESCE
+{
+    U8                      ReasonCode;                 /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U16                     Reserved2;                  /* 0x02 */
+    U32                     Reserved3;                  /* 0x04 */
+} MPI2_EVENT_DATA_SAS_QUIESCE,
+  MPI2_POINTER PTR_MPI2_EVENT_DATA_SAS_QUIESCE,
+  Mpi2EventDataSasQuiesce_t, MPI2_POINTER pMpi2EventDataSasQuiesce_t;
+
+/* SAS Quiesce Event data ReasonCode values */
+#define MPI2_EVENT_SAS_QUIESCE_RC_STARTED                   (0x01)
+#define MPI2_EVENT_SAS_QUIESCE_RC_COMPLETED                 (0x02)
+
+
+typedef struct _MPI25_EVENT_DATA_SAS_DEVICE_DISCOVERY_ERROR
+{
+    U16         DevHandle;                  /* 0x00 */
+    U8          ReasonCode;                 /* 0x02 */
+    U8          PhysicalPort;               /* 0x03 */
+    U32         Reserved1[2];               /* 0x04 */
+    U64         SASAddress;                 /* 0x0C */
+    U32         Reserved2[2];               /* 0x14 */
+} MPI25_EVENT_DATA_SAS_DEVICE_DISCOVERY_ERROR,
+  MPI2_POINTER PTR_MPI25_EVENT_DATA_SAS_DEVICE_DISCOVERY_ERROR,
+  Mpi25EventDataSasDeviceDiscoveryError_t,
+  MPI2_POINTER pMpi25EventDataSasDeviceDiscoveryError_t;
+
+/* SAS Device Discovery Error Event data ReasonCode values */
+#define MPI25_EVENT_SAS_DISC_ERR_SMP_FAILED         (0x01)
+#define MPI25_EVENT_SAS_DISC_ERR_SMP_TIMEOUT        (0x02)
+
+
+/* Host Based Discovery Phy Event data */
+
+typedef struct _MPI2_EVENT_HBD_PHY_SAS
+{
+    U8          Flags;                      /* 0x00 */
+    U8          NegotiatedLinkRate;         /* 0x01 */
+    U8          PhyNum;                     /* 0x02 */
+    U8          PhysicalPort;               /* 0x03 */
+    U32         Reserved1;                  /* 0x04 */
+    U8          InitialFrame[28];           /* 0x08 */
+} MPI2_EVENT_HBD_PHY_SAS, MPI2_POINTER PTR_MPI2_EVENT_HBD_PHY_SAS,
+  Mpi2EventHbdPhySas_t, MPI2_POINTER pMpi2EventHbdPhySas_t;
+
+/* values for the Flags field */
+#define MPI2_EVENT_HBD_SAS_FLAGS_FRAME_VALID        (0x02)
+#define MPI2_EVENT_HBD_SAS_FLAGS_SATA_FRAME         (0x01)
+
+/* use MPI2_SAS_NEG_LINK_RATE_ defines from mpi2_cnfg.h for the NegotiatedLinkRate field */
+
+typedef union _MPI2_EVENT_HBD_DESCRIPTOR
+{
+    MPI2_EVENT_HBD_PHY_SAS      Sas;
+} MPI2_EVENT_HBD_DESCRIPTOR, MPI2_POINTER PTR_MPI2_EVENT_HBD_DESCRIPTOR,
+  Mpi2EventHbdDescriptor_t, MPI2_POINTER pMpi2EventHbdDescriptor_t;
+
+typedef struct _MPI2_EVENT_DATA_HBD_PHY
+{
+    U8                          DescriptorType;     /* 0x00 */
+    U8                          Reserved1;          /* 0x01 */
+    U16                         Reserved2;          /* 0x02 */
+    U32                         Reserved3;          /* 0x04 */
+    MPI2_EVENT_HBD_DESCRIPTOR   Descriptor;         /* 0x08 */
+} MPI2_EVENT_DATA_HBD_PHY, MPI2_POINTER PTR_MPI2_EVENT_DATA_HBD_PHY,
+  Mpi2EventDataHbdPhy_t, MPI2_POINTER pMpi2EventDataMpi2EventDataHbdPhy_t;
+
+/* values for the DescriptorType field */
+#define MPI2_EVENT_HBD_DT_SAS               (0x01)
+
+
+/* PCIe Device Status Change Event data (MPI v2.6 and later) */
+
+typedef struct _MPI26_EVENT_DATA_PCIE_DEVICE_STATUS_CHANGE
+{
+    U16                     TaskTag;                        /* 0x00 */
+    U8                      ReasonCode;                     /* 0x02 */
+    U8                      PhysicalPort;                   /* 0x03 */
+    U8                      ASC;                            /* 0x04 */
+    U8                      ASCQ;                           /* 0x05 */
+    U16                     DevHandle;                      /* 0x06 */
+    U32                     Reserved2;                      /* 0x08 */
+    U64                     WWID;                           /* 0x0C */
+    U8                      LUN[8];                         /* 0x14 */
+} MPI26_EVENT_DATA_PCIE_DEVICE_STATUS_CHANGE,
+  MPI2_POINTER PTR_MPI26_EVENT_DATA_PCIE_DEVICE_STATUS_CHANGE,
+  Mpi26EventDataPCIeDeviceStatusChange_t,
+  MPI2_POINTER pMpi26EventDataPCIeDeviceStatusChange_t;
+
+/* PCIe Device Status Change Event data ReasonCode values */
+#define MPI26_EVENT_PCIDEV_STAT_RC_SMART_DATA                           (0x05)
+#define MPI26_EVENT_PCIDEV_STAT_RC_UNSUPPORTED                          (0x07)
+#define MPI26_EVENT_PCIDEV_STAT_RC_INTERNAL_DEVICE_RESET                (0x08)
+#define MPI26_EVENT_PCIDEV_STAT_RC_TASK_ABORT_INTERNAL                  (0x09)
+#define MPI26_EVENT_PCIDEV_STAT_RC_ABORT_TASK_SET_INTERNAL              (0x0A)
+#define MPI26_EVENT_PCIDEV_STAT_RC_CLEAR_TASK_SET_INTERNAL              (0x0B)
+#define MPI26_EVENT_PCIDEV_STAT_RC_QUERY_TASK_INTERNAL                  (0x0C)
+#define MPI26_EVENT_PCIDEV_STAT_RC_ASYNC_NOTIFICATION                   (0x0D)
+#define MPI26_EVENT_PCIDEV_STAT_RC_CMP_INTERNAL_DEV_RESET               (0x0E)
+#define MPI26_EVENT_PCIDEV_STAT_RC_CMP_TASK_ABORT_INTERNAL              (0x0F)
+#define MPI26_EVENT_PCIDEV_STAT_RC_DEV_INIT_FAILURE                     (0x10)
+#define MPI26_EVENT_PCIDEV_STAT_RC_PCIE_HOT_RESET_FAILED                (0x11)
+
+
+/* PCIe Enumeration Event data (MPI v2.6 and later) */
+
+typedef struct _MPI26_EVENT_DATA_PCIE_ENUMERATION
+{
+    U8                      Flags;                      /* 0x00 */
+    U8                      ReasonCode;                 /* 0x01 */
+    U8                      PhysicalPort;               /* 0x02 */
+    U8                      Reserved1;                  /* 0x03 */
+    U32                     EnumerationStatus;          /* 0x04 */
+} MPI26_EVENT_DATA_PCIE_ENUMERATION,
+  MPI2_POINTER PTR_MPI26_EVENT_DATA_PCIE_ENUMERATION,
+  Mpi26EventDataPCIeEnumeration_t,
+  MPI2_POINTER pMpi26EventDataPCIeEnumeration_t;
+
+/* PCIe Enumeration Event data Flags values */
+#define MPI26_EVENT_PCIE_ENUM_DEVICE_CHANGE                 (0x02)
+#define MPI26_EVENT_PCIE_ENUM_IN_PROGRESS                   (0x01)
+
+/* PCIe Enumeration Event data ReasonCode values */
+#define MPI26_EVENT_PCIE_ENUM_RC_STARTED                    (0x01)
+#define MPI26_EVENT_PCIE_ENUM_RC_COMPLETED                  (0x02)
+
+/* PCIe Enumeration Event data EnumerationStatus values */
+#define MPI26_EVENT_PCIE_ENUM_ES_MAX_SWITCHES_EXCEED            (0x40000000)
+#define MPI26_EVENT_PCIE_ENUM_ES_MAX_DEVICES_EXCEED             (0x20000000)
+#define MPI26_EVENT_PCIE_ENUM_ES_RESOURCES_EXHAUSTED            (0x10000000)
+
+
+/* PCIe Topology Change List Event data (MPI v2.6 and later) */
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check NumEntries at runtime.
+ */
+#ifndef MPI26_EVENT_PCIE_TOPO_PORT_COUNT
+#define MPI26_EVENT_PCIE_TOPO_PORT_COUNT        (1)
+#endif
+
+typedef struct _MPI26_EVENT_PCIE_TOPO_PORT_ENTRY
+{
+    U16         AttachedDevHandle;      /* 0x00 */
+    U8          PortStatus;             /* 0x02 */
+    U8          Reserved1;              /* 0x03 */
+    U8          CurrentPortInfo;        /* 0x04 */
+    U8          Reserved2;              /* 0x05 */
+    U8          PreviousPortInfo;       /* 0x06 */
+    U8          Reserved3;              /* 0x07 */
+} MPI26_EVENT_PCIE_TOPO_PORT_ENTRY,
+  MPI2_POINTER PTR_MPI26_EVENT_PCIE_TOPO_PORT_ENTRY,
+  Mpi26EventPCIeTopoPortEntry_t,
+  MPI2_POINTER pMpi26EventPCIeTopoPortEntry_t;
+
+/* PCIe Topology Change List Event data PortStatus values */
+#define MPI26_EVENT_PCIE_TOPO_PS_DEV_ADDED                  (0x01)
+#define MPI26_EVENT_PCIE_TOPO_PS_NOT_RESPONDING             (0x02)
+#define MPI26_EVENT_PCIE_TOPO_PS_PORT_CHANGED               (0x03)
+#define MPI26_EVENT_PCIE_TOPO_PS_NO_CHANGE                  (0x04)
+#define MPI26_EVENT_PCIE_TOPO_PS_DELAY_NOT_RESPONDING       (0x05)
+
+/* PCIe Topology Change List Event data defines for CurrentPortInfo and PreviousPortInfo */
+#define MPI26_EVENT_PCIE_TOPO_PI_LANE_MASK                  (0xF0)
+#define MPI26_EVENT_PCIE_TOPO_PI_LANES_UNKNOWN              (0x00)
+#define MPI26_EVENT_PCIE_TOPO_PI_1_LANE                     (0x10)
+#define MPI26_EVENT_PCIE_TOPO_PI_2_LANES                    (0x20)
+#define MPI26_EVENT_PCIE_TOPO_PI_4_LANES                    (0x30)
+#define MPI26_EVENT_PCIE_TOPO_PI_8_LANES                    (0x40)
+#define MPI26_EVENT_PCIE_TOPO_PI_16_LANES                   (0x50)
+
+#define MPI26_EVENT_PCIE_TOPO_PI_RATE_MASK                  (0x0F)
+#define MPI26_EVENT_PCIE_TOPO_PI_RATE_UNKNOWN               (0x00)
+#define MPI26_EVENT_PCIE_TOPO_PI_RATE_DISABLED              (0x01)
+#define MPI26_EVENT_PCIE_TOPO_PI_RATE_2_5                   (0x02)
+#define MPI26_EVENT_PCIE_TOPO_PI_RATE_5_0                   (0x03)
+#define MPI26_EVENT_PCIE_TOPO_PI_RATE_8_0                   (0x04)
+#define MPI26_EVENT_PCIE_TOPO_PI_RATE_16_0                  (0x05)
+
+typedef struct _MPI26_EVENT_DATA_PCIE_TOPOLOGY_CHANGE_LIST
+{
+    U16                                 EnclosureHandle;        /* 0x00 */
+    U16                                 SwitchDevHandle;        /* 0x02 */
+    U8                                  NumPorts;               /* 0x04 */
+    U8                                  Reserved1;              /* 0x05 */
+    U16                                 Reserved2;              /* 0x06 */
+    U8                                  NumEntries;             /* 0x08 */
+    U8                                  StartPortNum;           /* 0x09 */
+    U8                                  SwitchStatus;           /* 0x0A */
+    U8                                  PhysicalPort;           /* 0x0B */
+    MPI26_EVENT_PCIE_TOPO_PORT_ENTRY    PortEntry[MPI26_EVENT_PCIE_TOPO_PORT_COUNT]; /* 0x0C */
+} MPI26_EVENT_DATA_PCIE_TOPOLOGY_CHANGE_LIST,
+  MPI2_POINTER PTR_MPI26_EVENT_DATA_PCIE_TOPOLOGY_CHANGE_LIST,
+  Mpi26EventDataPCIeTopologyChangeList_t,
+  MPI2_POINTER pMpi26EventDataPCIeTopologyChangeList_t;
+
+/* PCIe Topology Change List Event data SwitchStatus values */
+#define MPI26_EVENT_PCIE_TOPO_SS_NO_PCIE_SWITCH             (0x00)
+#define MPI26_EVENT_PCIE_TOPO_SS_ADDED                      (0x01)
+#define MPI26_EVENT_PCIE_TOPO_SS_NOT_RESPONDING             (0x02)
+#define MPI26_EVENT_PCIE_TOPO_SS_RESPONDING                 (0x03)
+#define MPI26_EVENT_PCIE_TOPO_SS_DELAY_NOT_RESPONDING       (0x04)
+
+/* PCIe Link Counter Event data (MPI v2.6 and later) */
+
+typedef struct _MPI26_EVENT_DATA_PCIE_LINK_COUNTER
+{
+    U64         TimeStamp;          /* 0x00 */
+    U32         Reserved1;          /* 0x08 */
+    U8          LinkEventCode;      /* 0x0C */
+    U8          LinkNum;            /* 0x0D */
+    U16         Reserved2;          /* 0x0E */
+    U32         LinkEventInfo;      /* 0x10 */
+    U8          CounterType;        /* 0x14 */
+    U8          ThresholdWindow;    /* 0x15 */
+    U8          TimeUnits;          /* 0x16 */
+    U8          Reserved3;          /* 0x17 */
+    U32         EventThreshold;     /* 0x18 */
+    U16         ThresholdFlags;     /* 0x1C */
+    U16         Reserved4;          /* 0x1E */
+} MPI26_EVENT_DATA_PCIE_LINK_COUNTER,
+  MPI2_POINTER PTR_MPI26_EVENT_DATA_PCIE_LINK_COUNTER,
+  Mpi26EventDataPcieLinkCounter_t, MPI2_POINTER pMpi26EventDataPcieLinkCounter_t;
+
+
+/* use MPI26_PCIELINK3_EVTCODE_ values from mpi2_cnfg.h for the LinkEventCode field */
+
+/* use MPI26_PCIELINK3_COUNTER_TYPE_ values from mpi2_cnfg.h for the CounterType field */
+
+/* use MPI26_PCIELINK3_TIME_UNITS_ values from mpi2_cnfg.h for the TimeUnits field */
+
+/* use MPI26_PCIELINK3_TFLAGS_ values from mpi2_cnfg.h for the ThresholdFlags field */
+
+/****************************************************************************
+*  EventAck message
+****************************************************************************/
+
+/* EventAck Request message */
+typedef struct _MPI2_EVENT_ACK_REQUEST
+{
+    U16                     Reserved1;                      /* 0x00 */
+    U8                      ChainOffset;                    /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     Reserved2;                      /* 0x04 */
+    U8                      Reserved3;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved4;                      /* 0x0A */
+    U16                     Event;                          /* 0x0C */
+    U16                     Reserved5;                      /* 0x0E */
+    U32                     EventContext;                   /* 0x10 */
+} MPI2_EVENT_ACK_REQUEST, MPI2_POINTER PTR_MPI2_EVENT_ACK_REQUEST,
+  Mpi2EventAckRequest_t, MPI2_POINTER pMpi2EventAckRequest_t;
+
+
+/* EventAck Reply message */
+typedef struct _MPI2_EVENT_ACK_REPLY
+{
+    U16                     Reserved1;                      /* 0x00 */
+    U8                      MsgLength;                      /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     Reserved2;                      /* 0x04 */
+    U8                      Reserved3;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved4;                      /* 0x0A */
+    U16                     Reserved5;                      /* 0x0C */
+    U16                     IOCStatus;                      /* 0x0E */
+    U32                     IOCLogInfo;                     /* 0x10 */
+} MPI2_EVENT_ACK_REPLY, MPI2_POINTER PTR_MPI2_EVENT_ACK_REPLY,
+  Mpi2EventAckReply_t, MPI2_POINTER pMpi2EventAckReply_t;
+
+
+/****************************************************************************
+*  SendHostMessage message
+****************************************************************************/
+
+/* SendHostMessage Request message */
+typedef struct _MPI2_SEND_HOST_MESSAGE_REQUEST
+{
+    U16                     HostDataLength;                 /* 0x00 */
+    U8                      ChainOffset;                    /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     Reserved1;                      /* 0x04 */
+    U8                      Reserved2;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved3;                      /* 0x0A */
+    U8                      Reserved4;                      /* 0x0C */
+    U8                      DestVF_ID;                      /* 0x0D */
+    U16                     Reserved5;                      /* 0x0E */
+    U32                     Reserved6;                      /* 0x10 */
+    U32                     Reserved7;                      /* 0x14 */
+    U32                     Reserved8;                      /* 0x18 */
+    U32                     Reserved9;                      /* 0x1C */
+    U32                     Reserved10;                     /* 0x20 */
+    U32                     HostData[1];                    /* 0x24 */
+} MPI2_SEND_HOST_MESSAGE_REQUEST,
+  MPI2_POINTER PTR_MPI2_SEND_HOST_MESSAGE_REQUEST,
+  Mpi2SendHostMessageRequest_t, MPI2_POINTER pMpi2SendHostMessageRequest_t;
+
+
+/* SendHostMessage Reply message */
+typedef struct _MPI2_SEND_HOST_MESSAGE_REPLY
+{
+    U16                     HostDataLength;                 /* 0x00 */
+    U8                      MsgLength;                      /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     Reserved1;                      /* 0x04 */
+    U8                      Reserved2;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved3;                      /* 0x0A */
+    U16                     Reserved4;                      /* 0x0C */
+    U16                     IOCStatus;                      /* 0x0E */
+    U32                     IOCLogInfo;                     /* 0x10 */
+} MPI2_SEND_HOST_MESSAGE_REPLY, MPI2_POINTER PTR_MPI2_SEND_HOST_MESSAGE_REPLY,
+  Mpi2SendHostMessageReply_t, MPI2_POINTER pMpi2SendHostMessageReply_t;
+
+
+/****************************************************************************
+*  FWDownload message
+****************************************************************************/
+
+/* MPI v2.0 FWDownload Request message */
+typedef struct _MPI2_FW_DOWNLOAD_REQUEST
+{
+    U8                      ImageType;                  /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U32                     TotalImageSize;             /* 0x0C */
+    U32                     Reserved5;                  /* 0x10 */
+    MPI2_MPI_SGE_UNION      SGL;                        /* 0x14 */
+} MPI2_FW_DOWNLOAD_REQUEST, MPI2_POINTER PTR_MPI2_FW_DOWNLOAD_REQUEST,
+  Mpi2FWDownloadRequest, MPI2_POINTER pMpi2FWDownloadRequest;
+
+#define MPI2_FW_DOWNLOAD_MSGFLGS_LAST_SEGMENT   (0x01)
+
+#define MPI2_FW_DOWNLOAD_ITYPE_FW                   (0x01)
+#define MPI2_FW_DOWNLOAD_ITYPE_BIOS                 (0x02)
+#define MPI2_FW_DOWNLOAD_ITYPE_MANUFACTURING        (0x06)
+#define MPI2_FW_DOWNLOAD_ITYPE_CONFIG_1             (0x07)
+#define MPI2_FW_DOWNLOAD_ITYPE_CONFIG_2             (0x08)
+#define MPI2_FW_DOWNLOAD_ITYPE_MEGARAID             (0x09)
+#define MPI2_FW_DOWNLOAD_ITYPE_COMPLETE             (0x0A)
+#define MPI2_FW_DOWNLOAD_ITYPE_COMMON_BOOT_BLOCK    (0x0B)
+#define MPI2_FW_DOWNLOAD_ITYPE_PUBLIC_KEY           (0x0C) /* MPI v2.5 and newer */
+#define MPI2_FW_DOWNLOAD_ITYPE_CBB_BACKUP           (0x0D)
+#define MPI2_FW_DOWNLOAD_ITYPE_SBR                  (0x0E)
+#define MPI2_FW_DOWNLOAD_ITYPE_SBR_BACKUP           (0x0F)
+#define MPI2_FW_DOWNLOAD_ITYPE_HIIM                 (0x10)
+#define MPI2_FW_DOWNLOAD_ITYPE_HIIA                 (0x11)
+#define MPI2_FW_DOWNLOAD_ITYPE_CTLR                 (0x12)
+#define MPI2_FW_DOWNLOAD_ITYPE_IMR_FIRMWARE         (0x13)
+#define MPI2_FW_DOWNLOAD_ITYPE_MR_NVDATA            (0x14)
+#define MPI2_FW_DOWNLOAD_ITYPE_CPLD                 (0x15) /* MPI v2.6 and newer */
+#define MPI2_FW_DOWNLOAD_ITYPE_PSOC                 (0x16) /* MPI v2.6 and newer */
+#define MPI2_FW_DOWNLOAD_ITYPE_COREDUMP             (0x17)
+#define MPI2_FW_DOWNLOAD_ITYPE_MIN_PRODUCT_SPECIFIC (0xF0)
+
+
+/* MPI v2.0 FWDownload TransactionContext Element */
+typedef struct _MPI2_FW_DOWNLOAD_TCSGE
+{
+    U8                      Reserved1;                  /* 0x00 */
+    U8                      ContextSize;                /* 0x01 */
+    U8                      DetailsLength;              /* 0x02 */
+    U8                      Flags;                      /* 0x03 */
+    U32                     Reserved2;                  /* 0x04 */
+    U32                     ImageOffset;                /* 0x08 */
+    U32                     ImageSize;                  /* 0x0C */
+} MPI2_FW_DOWNLOAD_TCSGE, MPI2_POINTER PTR_MPI2_FW_DOWNLOAD_TCSGE,
+  Mpi2FWDownloadTCSGE_t, MPI2_POINTER pMpi2FWDownloadTCSGE_t;
+
+
+/* MPI v2.5 FWDownload Request message */
+typedef struct _MPI25_FW_DOWNLOAD_REQUEST
+{
+    U8                      ImageType;                  /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U32                     TotalImageSize;             /* 0x0C */
+    U32                     Reserved5;                  /* 0x10 */
+    U32                     Reserved6;                  /* 0x14 */
+    U32                     ImageOffset;                /* 0x18 */
+    U32                     ImageSize;                  /* 0x1C */
+    MPI25_SGE_IO_UNION      SGL;                        /* 0x20 */
+} MPI25_FW_DOWNLOAD_REQUEST, MPI2_POINTER PTR_MPI25_FW_DOWNLOAD_REQUEST,
+  Mpi25FWDownloadRequest, MPI2_POINTER pMpi25FWDownloadRequest;
+
+
+/* FWDownload Reply message */
+typedef struct _MPI2_FW_DOWNLOAD_REPLY
+{
+    U8                      ImageType;                  /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      MsgLength;                  /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U16                     Reserved5;                  /* 0x0C */
+    U16                     IOCStatus;                  /* 0x0E */
+    U32                     IOCLogInfo;                 /* 0x10 */
+} MPI2_FW_DOWNLOAD_REPLY, MPI2_POINTER PTR_MPI2_FW_DOWNLOAD_REPLY,
+  Mpi2FWDownloadReply_t, MPI2_POINTER pMpi2FWDownloadReply_t;
+
+
+/****************************************************************************
+*  FWUpload message
+****************************************************************************/
+
+/* MPI v2.0 FWUpload Request message */
+typedef struct _MPI2_FW_UPLOAD_REQUEST
+{
+    U8                      ImageType;                  /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U32                     Reserved5;                  /* 0x0C */
+    U32                     Reserved6;                  /* 0x10 */
+    MPI2_MPI_SGE_UNION      SGL;                        /* 0x14 */
+} MPI2_FW_UPLOAD_REQUEST, MPI2_POINTER PTR_MPI2_FW_UPLOAD_REQUEST,
+  Mpi2FWUploadRequest_t, MPI2_POINTER pMpi2FWUploadRequest_t;
+
+#define MPI2_FW_UPLOAD_ITYPE_FW_CURRENT         (0x00)
+#define MPI2_FW_UPLOAD_ITYPE_FW_FLASH           (0x01)
+#define MPI2_FW_UPLOAD_ITYPE_BIOS_FLASH         (0x02)
+#define MPI2_FW_UPLOAD_ITYPE_FW_BACKUP          (0x05)
+#define MPI2_FW_UPLOAD_ITYPE_MANUFACTURING      (0x06)
+#define MPI2_FW_UPLOAD_ITYPE_CONFIG_1           (0x07)
+#define MPI2_FW_UPLOAD_ITYPE_CONFIG_2           (0x08)
+#define MPI2_FW_UPLOAD_ITYPE_MEGARAID           (0x09)
+#define MPI2_FW_UPLOAD_ITYPE_COMPLETE           (0x0A)
+#define MPI2_FW_UPLOAD_ITYPE_COMMON_BOOT_BLOCK  (0x0B)
+#define MPI2_FW_UPLOAD_ITYPE_CBB_BACKUP         (0x0D)
+#define MPI2_FW_UPLOAD_ITYPE_SBR                (0x0E)
+#define MPI2_FW_UPLOAD_ITYPE_SBR_BACKUP         (0x0F)
+#define MPI2_FW_UPLOAD_ITYPE_HIIM               (0x10)
+#define MPI2_FW_UPLOAD_ITYPE_HIIA               (0x11)
+#define MPI2_FW_UPLOAD_ITYPE_CTLR               (0x12)
+#define MPI2_FW_UPLOAD_ITYPE_IMR_FIRMWARE       (0x13)
+#define MPI2_FW_UPLOAD_ITYPE_MR_NVDATA          (0x14)
+/* skipping 0x15, 0x16. They are defined in DOWNLOAD, but not needed here */
+#define MPI2_FW_UPLOAD_ITYPE_COREDUMP           (0x17)
+
+/* MPI v2.0 FWUpload TransactionContext Element */
+typedef struct _MPI2_FW_UPLOAD_TCSGE
+{
+    U8                      Reserved1;                  /* 0x00 */
+    U8                      ContextSize;                /* 0x01 */
+    U8                      DetailsLength;              /* 0x02 */
+    U8                      Flags;                      /* 0x03 */
+    U32                     Reserved2;                  /* 0x04 */
+    U32                     ImageOffset;                /* 0x08 */
+    U32                     ImageSize;                  /* 0x0C */
+} MPI2_FW_UPLOAD_TCSGE, MPI2_POINTER PTR_MPI2_FW_UPLOAD_TCSGE,
+  Mpi2FWUploadTCSGE_t, MPI2_POINTER pMpi2FWUploadTCSGE_t;
+
+
+/* MPI v2.5 FWUpload Request message */
+typedef struct _MPI25_FW_UPLOAD_REQUEST
+{
+    U8                      ImageType;                  /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U32                     Reserved5;                  /* 0x0C */
+    U32                     Reserved6;                  /* 0x10 */
+    U32                     Reserved7;                  /* 0x14 */
+    U32                     ImageOffset;                /* 0x18 */
+    U32                     ImageSize;                  /* 0x1C */
+    MPI25_SGE_IO_UNION      SGL;                        /* 0x20 */
+} MPI25_FW_UPLOAD_REQUEST, MPI2_POINTER PTR_MPI25_FW_UPLOAD_REQUEST,
+  Mpi25FWUploadRequest_t, MPI2_POINTER pMpi25FWUploadRequest_t;
+
+
+/* FWUpload Reply message */
+typedef struct _MPI2_FW_UPLOAD_REPLY
+{
+    U8                      ImageType;                  /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      MsgLength;                  /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U16                     Reserved5;                  /* 0x0C */
+    U16                     IOCStatus;                  /* 0x0E */
+    U32                     IOCLogInfo;                 /* 0x10 */
+    U32                     ActualImageSize;            /* 0x14 */
+} MPI2_FW_UPLOAD_REPLY, MPI2_POINTER PTR_MPI2_FW_UPLOAD_REPLY,
+  Mpi2FWUploadReply_t, MPI2_POINTER pMPi2FWUploadReply_t;
+
+
+/****************************************************************************
+*  PowerManagementControl message
+****************************************************************************/
+
+/* PowerManagementControl Request message */
+typedef struct _MPI2_PWR_MGMT_CONTROL_REQUEST
+{
+    U8                      Feature;                    /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U8                      Parameter1;                 /* 0x0C */
+    U8                      Parameter2;                 /* 0x0D */
+    U8                      Parameter3;                 /* 0x0E */
+    U8                      Parameter4;                 /* 0x0F */
+    U32                     Reserved5;                  /* 0x10 */
+    U32                     Reserved6;                  /* 0x14 */
+} MPI2_PWR_MGMT_CONTROL_REQUEST, MPI2_POINTER PTR_MPI2_PWR_MGMT_CONTROL_REQUEST,
+  Mpi2PwrMgmtControlRequest_t, MPI2_POINTER pMpi2PwrMgmtControlRequest_t;
+
+/* defines for the Feature field */
+#define MPI2_PM_CONTROL_FEATURE_DA_PHY_POWER_COND       (0x01)
+#define MPI2_PM_CONTROL_FEATURE_PORT_WIDTH_MODULATION   (0x02)
+#define MPI2_PM_CONTROL_FEATURE_PCIE_LINK               (0x03) /* obsolete */
+#define MPI2_PM_CONTROL_FEATURE_IOC_SPEED               (0x04)
+#define MPI2_PM_CONTROL_FEATURE_GLOBAL_PWR_MGMT_MODE    (0x05) /* reserved in MPI 2.0 */
+#define MPI2_PM_CONTROL_FEATURE_MIN_PRODUCT_SPECIFIC    (0x80)
+#define MPI2_PM_CONTROL_FEATURE_MAX_PRODUCT_SPECIFIC    (0xFF)
+
+/* parameter usage for the MPI2_PM_CONTROL_FEATURE_DA_PHY_POWER_COND Feature */
+/* Parameter1 contains a PHY number */
+/* Parameter2 indicates power condition action using these defines */
+#define MPI2_PM_CONTROL_PARAM2_PARTIAL                  (0x01)
+#define MPI2_PM_CONTROL_PARAM2_SLUMBER                  (0x02)
+#define MPI2_PM_CONTROL_PARAM2_EXIT_PWR_MGMT            (0x03)
+/* Parameter3 and Parameter4 are reserved */
+
+/* parameter usage for the MPI2_PM_CONTROL_FEATURE_PORT_WIDTH_MODULATION Feature */
+/* Parameter1 contains SAS port width modulation group number */
+/* Parameter2 indicates IOC action using these defines */
+#define MPI2_PM_CONTROL_PARAM2_REQUEST_OWNERSHIP        (0x01)
+#define MPI2_PM_CONTROL_PARAM2_CHANGE_MODULATION        (0x02)
+#define MPI2_PM_CONTROL_PARAM2_RELINQUISH_OWNERSHIP     (0x03)
+/* Parameter3 indicates desired modulation level using these defines */
+#define MPI2_PM_CONTROL_PARAM3_25_PERCENT               (0x00)
+#define MPI2_PM_CONTROL_PARAM3_50_PERCENT               (0x01)
+#define MPI2_PM_CONTROL_PARAM3_75_PERCENT               (0x02)
+#define MPI2_PM_CONTROL_PARAM3_100_PERCENT              (0x03)
+/* Parameter4 is reserved */
+
+/* this next set (_PCIE_LINK) is obsolete */
+/* parameter usage for the MPI2_PM_CONTROL_FEATURE_PCIE_LINK Feature */
+/* Parameter1 indicates desired PCIe link speed using these defines */
+#define MPI2_PM_CONTROL_PARAM1_PCIE_2_5_GBPS            (0x00) /* obsolete */
+#define MPI2_PM_CONTROL_PARAM1_PCIE_5_0_GBPS            (0x01) /* obsolete */
+#define MPI2_PM_CONTROL_PARAM1_PCIE_8_0_GBPS            (0x02) /* obsolete */
+/* Parameter2 indicates desired PCIe link width using these defines */
+#define MPI2_PM_CONTROL_PARAM2_WIDTH_X1                 (0x01) /* obsolete */
+#define MPI2_PM_CONTROL_PARAM2_WIDTH_X2                 (0x02) /* obsolete */
+#define MPI2_PM_CONTROL_PARAM2_WIDTH_X4                 (0x04) /* obsolete */
+#define MPI2_PM_CONTROL_PARAM2_WIDTH_X8                 (0x08) /* obsolete */
+/* Parameter3 and Parameter4 are reserved */
+
+/* parameter usage for the MPI2_PM_CONTROL_FEATURE_IOC_SPEED Feature */
+/* Parameter1 indicates desired IOC hardware clock speed using these defines */
+#define MPI2_PM_CONTROL_PARAM1_FULL_IOC_SPEED           (0x01)
+#define MPI2_PM_CONTROL_PARAM1_HALF_IOC_SPEED           (0x02)
+#define MPI2_PM_CONTROL_PARAM1_QUARTER_IOC_SPEED        (0x04)
+#define MPI2_PM_CONTROL_PARAM1_EIGHTH_IOC_SPEED         (0x08)
+/* Parameter2, Parameter3, and Parameter4 are reserved */
+
+/* parameter usage for the MPI2_PM_CONTROL_FEATURE_GLOBAL_PWR_MGMT_MODE Feature */
+/* Parameter1 indicates host action regarding global power management mode */
+#define MPI2_PM_CONTROL_PARAM1_TAKE_CONTROL             (0x01)
+#define MPI2_PM_CONTROL_PARAM1_CHANGE_GLOBAL_MODE       (0x02)
+#define MPI2_PM_CONTROL_PARAM1_RELEASE_CONTROL          (0x03)
+/* Parameter2 indicates the requested global power management mode */
+#define MPI2_PM_CONTROL_PARAM2_FULL_PWR_PERF            (0x01)
+#define MPI2_PM_CONTROL_PARAM2_REDUCED_PWR_PERF         (0x08)
+#define MPI2_PM_CONTROL_PARAM2_STANDBY                  (0x40)
+/* Parameter3 and Parameter4 are reserved */
+
+
+/* PowerManagementControl Reply message */
+typedef struct _MPI2_PWR_MGMT_CONTROL_REPLY
+{
+    U8                      Feature;                    /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      MsgLength;                  /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U16                     Reserved5;                  /* 0x0C */
+    U16                     IOCStatus;                  /* 0x0E */
+    U32                     IOCLogInfo;                 /* 0x10 */
+} MPI2_PWR_MGMT_CONTROL_REPLY, MPI2_POINTER PTR_MPI2_PWR_MGMT_CONTROL_REPLY,
+  Mpi2PwrMgmtControlReply_t, MPI2_POINTER pMpi2PwrMgmtControlReply_t;
+
+
+/****************************************************************************
+*  IO Unit Control messages (MPI v2.6 and later only.)
+****************************************************************************/
+
+/* IO Unit Control Request Message */
+typedef struct _MPI26_IOUNIT_CONTROL_REQUEST
+{
+    U8                      Operation;          /* 0x00 */
+    U8                      Reserved1;          /* 0x01 */
+    U8                      ChainOffset;        /* 0x02 */
+    U8                      Function;           /* 0x03 */
+    U16                     DevHandle;          /* 0x04 */
+    U8                      IOCParameter;       /* 0x06 */
+    U8                      MsgFlags;           /* 0x07 */
+    U8                      VP_ID;              /* 0x08 */
+    U8                      VF_ID;              /* 0x09 */
+    U16                     Reserved3;          /* 0x0A */
+    U16                     Reserved4;          /* 0x0C */
+    U8                      PhyNum;             /* 0x0E */
+    U8                      PrimFlags;          /* 0x0F */
+    U32                     Primitive;          /* 0x10 */
+    U8                      LookupMethod;       /* 0x14 */
+    U8                      Reserved5;          /* 0x15 */
+    U16                     SlotNumber;         /* 0x16 */
+    U64                     LookupAddress;      /* 0x18 */
+    U32                     IOCParameterValue;  /* 0x20 */
+    U32                     Reserved7;          /* 0x24 */
+    U32                     Reserved8;          /* 0x28 */
+} MPI26_IOUNIT_CONTROL_REQUEST,
+  MPI2_POINTER PTR_MPI26_IOUNIT_CONTROL_REQUEST,
+  Mpi26IoUnitControlRequest_t, MPI2_POINTER pMpi26IoUnitControlRequest_t;
+
+/* values for the Operation field */
+#define MPI26_CTRL_OP_CLEAR_ALL_PERSISTENT              (0x02)
+#define MPI26_CTRL_OP_SAS_PHY_LINK_RESET                (0x06)
+#define MPI26_CTRL_OP_SAS_PHY_HARD_RESET                (0x07)
+#define MPI26_CTRL_OP_PHY_CLEAR_ERROR_LOG               (0x08)
+#define MPI26_CTRL_OP_LINK_CLEAR_ERROR_LOG              (0x09)
+#define MPI26_CTRL_OP_SAS_SEND_PRIMITIVE                (0x0A)
+#define MPI26_CTRL_OP_FORCE_FULL_DISCOVERY              (0x0B)
+#define MPI26_CTRL_OP_REMOVE_DEVICE                     (0x0D)
+#define MPI26_CTRL_OP_LOOKUP_MAPPING                    (0x0E)
+#define MPI26_CTRL_OP_SET_IOC_PARAMETER                 (0x0F)
+#define MPI26_CTRL_OP_ENABLE_FP_DEVICE                  (0x10)
+#define MPI26_CTRL_OP_DISABLE_FP_DEVICE                 (0x11)
+#define MPI26_CTRL_OP_ENABLE_FP_ALL                     (0x12)
+#define MPI26_CTRL_OP_DISABLE_FP_ALL                    (0x13)
+#define MPI26_CTRL_OP_DEV_ENABLE_NCQ                    (0x14)
+#define MPI26_CTRL_OP_DEV_DISABLE_NCQ                   (0x15)
+#define MPI26_CTRL_OP_SHUTDOWN                          (0x16)
+#define MPI26_CTRL_OP_DEV_ENABLE_PERSIST_CONNECTION     (0x17)
+#define MPI26_CTRL_OP_DEV_DISABLE_PERSIST_CONNECTION    (0x18)
+#define MPI26_CTRL_OP_DEV_CLOSE_PERSIST_CONNECTION      (0x19)
+#define MPI26_CTRL_OP_ENABLE_NVME_SGL_FORMAT            (0x1A)
+#define MPI26_CTRL_OP_DISABLE_NVME_SGL_FORMAT           (0x1B)
+#define MPI26_CTRL_OP_PRODUCT_SPECIFIC_MIN              (0x80)
+
+/* values for the PrimFlags field */
+#define MPI26_CTRL_PRIMFLAGS_SINGLE                     (0x08)
+#define MPI26_CTRL_PRIMFLAGS_TRIPLE                     (0x02)
+#define MPI26_CTRL_PRIMFLAGS_REDUNDANT                  (0x01)
+
+/* values for the LookupMethod field */
+#define MPI26_CTRL_LOOKUP_METHOD_WWID_ADDRESS           (0x01)
+#define MPI26_CTRL_LOOKUP_METHOD_ENCLOSURE_SLOT         (0x02)
+#define MPI26_CTRL_LOOKUP_METHOD_SAS_DEVICE_NAME        (0x03)
+
+
+/* IO Unit Control Reply Message */
+typedef struct _MPI26_IOUNIT_CONTROL_REPLY
+{
+    U8                      Operation;          /* 0x00 */
+    U8                      Reserved1;          /* 0x01 */
+    U8                      MsgLength;          /* 0x02 */
+    U8                      Function;           /* 0x03 */
+    U16                     DevHandle;          /* 0x04 */
+    U8                      IOCParameter;       /* 0x06 */
+    U8                      MsgFlags;           /* 0x07 */
+    U8                      VP_ID;              /* 0x08 */
+    U8                      VF_ID;              /* 0x09 */
+    U16                     Reserved3;          /* 0x0A */
+    U16                     Reserved4;          /* 0x0C */
+    U16                     IOCStatus;          /* 0x0E */
+    U32                     IOCLogInfo;         /* 0x10 */
+} MPI26_IOUNIT_CONTROL_REPLY, MPI2_POINTER PTR_MPI26_IOUNIT_CONTROL_REPLY,
+  Mpi26IoUnitControlReply_t, MPI2_POINTER pMpi26IoUnitControlReply_t;
+
+
+#endif
+
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_pci.h b/drivers/scsi/mpt3sas/mpi/mpi2_pci.h
old mode 100755
new mode 100644
index 4704fb0950e7d04e017a75c75f31cb05019a7b81..612dbcc940a944f2b7bf869e3ad95dfc7ae263bb
--- a/drivers/scsi/mpt3sas/mpi/mpi2_pci.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_pci.h
@@ -1,118 +1,118 @@
-/*
- *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
- *
- *
- *           Name:  mpi2_pci.h
- *          Title:  MPI PCIe Attached Devices structures and definitions.
- *  Creation Date:  October 9, 2012
- *
- *  mpi2_pci.h Version:  02.00.04
- *
- *  NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
- *        prefix are for use only on MPI v2.5 products, and must not be used
- *        with MPI v2.0 products. Unless otherwise noted, names beginning with
- *        MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products.
- *
- *  Version History
- *  ---------------
- *
- *  Date      Version   Description
- *  --------  --------  ------------------------------------------------------
- *  03-16-15  02.00.00  Initial version.
- *  02-17-16  02.00.01  Removed AHCI support.
- *                      Removed SOP support.
- *  07-01-16  02.00.02  Added MPI26_NVME_FLAGS_FORCE_ADMIN_ERR_RESP to
- *                      NVME Encapsulated Request.
- *  07-22-18  02.00.03  Updated flags field for NVME Encapsulated req
- *  12-17-18  02.00.04  Added MPI26_PCIE_DEVINFO_SCSI
- *                      Shortten some defines to be compatible with DOS
- *  --------------------------------------------------------------------------
- */
-
-#ifndef MPI2_PCI_H
-#define MPI2_PCI_H
-
-
-/*
- * Values for the PCIe DeviceInfo field used in PCIe Device Status Change Event
- * data and PCIe Configuration pages.
- */
-#define MPI26_PCIE_DEVINFO_DIRECT_ATTACH        (0x00000010)
-
-#define MPI26_PCIE_DEVINFO_MASK_DEVICE_TYPE     (0x0000000F)
-#define MPI26_PCIE_DEVINFO_NO_DEVICE            (0x00000000)
-#define MPI26_PCIE_DEVINFO_PCI_SWITCH           (0x00000001)
-#define MPI26_PCIE_DEVINFO_NVME                 (0x00000003)
-#define MPI26_PCIE_DEVINFO_SCSI                 (0x00000004)
-
-
-/****************************************************************************
-*  NVMe Encapsulated message
-****************************************************************************/
-
-/* NVME Encapsulated Request Message */
-typedef struct _MPI26_NVME_ENCAPSULATED_REQUEST
-{
-    U16                     DevHandle;                      /* 0x00 */
-    U8                      ChainOffset;                    /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     EncapsulatedCommandLength;      /* 0x04 */
-    U8                      Reserved1;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved2;                      /* 0x0A */
-    U32                     Reserved3;                      /* 0x0C */
-    U64                     ErrorResponseBaseAddress;       /* 0x10 */
-    U16                     ErrorResponseAllocationLength;  /* 0x18 */
-    U16                     Flags;                          /* 0x1A */
-    U32                     DataLength;                     /* 0x1C */
-    U8                      NVMe_Command[4];                /* 0x20 */ /* variable length */
-
-} MPI26_NVME_ENCAPSULATED_REQUEST, MPI2_POINTER PTR_MPI26_NVME_ENCAPSULATED_REQUEST,
-  Mpi26NVMeEncapsulatedRequest_t, MPI2_POINTER pMpi26NVMeEncapsulatedRequest_t;
-
-/* defines for the Flags field */
-#define MPI26_NVME_FLAGS_FORCE_ADMIN_ERR_RESP       (0x0020)
-/* Submission Queue Type*/
-#define MPI26_NVME_FLAGS_SUBMISSIONQ_MASK           (0x0010)
-#define MPI26_NVME_FLAGS_SUBMISSIONQ_IO             (0x0000)
-#define MPI26_NVME_FLAGS_SUBMISSIONQ_ADMIN          (0x0010)
-/* Error Response Address Space */
-#define MPI26_NVME_FLAGS_ERR_RSP_ADDR_MASK          (0x000C)
-#define MPI26_NVME_FLAGS_ERR_RSP_ADDR_SYSTEM        (0x0000)
-#define MPI26_NVME_FLAGS_ERR_RSP_ADDR_IOCTL         (0x0008)
-/* Data Direction*/
-#define MPI26_NVME_FLAGS_DATADIRECTION_MASK         (0x0003)
-#define MPI26_NVME_FLAGS_NODATATRANSFER             (0x0000)
-#define MPI26_NVME_FLAGS_WRITE                      (0x0001)
-#define MPI26_NVME_FLAGS_READ                       (0x0002)
-#define MPI26_NVME_FLAGS_BIDIRECTIONAL              (0x0003)
-
-
-/* NVMe Encapuslated Reply Message */
-typedef struct _MPI26_NVME_ENCAPSULATED_ERROR_REPLY
-{
-    U16                     DevHandle;                      /* 0x00 */
-    U8                      MsgLength;                      /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     EncapsulatedCommandLength;      /* 0x04 */
-    U8                      Reserved1;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved2;                      /* 0x0A */
-    U16                     Reserved3;                      /* 0x0C */
-    U16                     IOCStatus;                      /* 0x0E */
-    U32                     IOCLogInfo;                     /* 0x10 */
-    U16                     ErrorResponseCount;             /* 0x14 */
-    U16                     Reserved4;                      /* 0x16 */
-} MPI26_NVME_ENCAPSULATED_ERROR_REPLY,
-  MPI2_POINTER PTR_MPI26_NVME_ENCAPSULATED_ERROR_REPLY,
-  Mpi26NVMeEncapsulatedErrorReply_t,
-  MPI2_POINTER pMpi26NVMeEncapsulatedErrorReply_t;
-
-
-#endif
-
-
+/*
+ *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
+ *
+ *
+ *           Name:  mpi2_pci.h
+ *          Title:  MPI PCIe Attached Devices structures and definitions.
+ *  Creation Date:  October 9, 2012
+ *
+ *  mpi2_pci.h Version:  02.00.04
+ *
+ *  NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
+ *        prefix are for use only on MPI v2.5 products, and must not be used
+ *        with MPI v2.0 products. Unless otherwise noted, names beginning with
+ *        MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products.
+ *
+ *  Version History
+ *  ---------------
+ *
+ *  Date      Version   Description
+ *  --------  --------  ------------------------------------------------------
+ *  03-16-15  02.00.00  Initial version.
+ *  02-17-16  02.00.01  Removed AHCI support.
+ *                      Removed SOP support.
+ *  07-01-16  02.00.02  Added MPI26_NVME_FLAGS_FORCE_ADMIN_ERR_RESP to
+ *                      NVME Encapsulated Request.
+ *  07-22-18  02.00.03  Updated flags field for NVME Encapsulated req
+ *  12-17-18  02.00.04  Added MPI26_PCIE_DEVINFO_SCSI
+ *                      Shortten some defines to be compatible with DOS
+ *  --------------------------------------------------------------------------
+ */
+
+#ifndef MPI2_PCI_H
+#define MPI2_PCI_H
+
+
+/*
+ * Values for the PCIe DeviceInfo field used in PCIe Device Status Change Event
+ * data and PCIe Configuration pages.
+ */
+#define MPI26_PCIE_DEVINFO_DIRECT_ATTACH        (0x00000010)
+
+#define MPI26_PCIE_DEVINFO_MASK_DEVICE_TYPE     (0x0000000F)
+#define MPI26_PCIE_DEVINFO_NO_DEVICE            (0x00000000)
+#define MPI26_PCIE_DEVINFO_PCI_SWITCH           (0x00000001)
+#define MPI26_PCIE_DEVINFO_NVME                 (0x00000003)
+#define MPI26_PCIE_DEVINFO_SCSI                 (0x00000004)
+
+
+/****************************************************************************
+*  NVMe Encapsulated message
+****************************************************************************/
+
+/* NVME Encapsulated Request Message */
+typedef struct _MPI26_NVME_ENCAPSULATED_REQUEST
+{
+    U16                     DevHandle;                      /* 0x00 */
+    U8                      ChainOffset;                    /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     EncapsulatedCommandLength;      /* 0x04 */
+    U8                      Reserved1;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved2;                      /* 0x0A */
+    U32                     Reserved3;                      /* 0x0C */
+    U64                     ErrorResponseBaseAddress;       /* 0x10 */
+    U16                     ErrorResponseAllocationLength;  /* 0x18 */
+    U16                     Flags;                          /* 0x1A */
+    U32                     DataLength;                     /* 0x1C */
+    U8                      NVMe_Command[4];                /* 0x20 */ /* variable length */
+
+} MPI26_NVME_ENCAPSULATED_REQUEST, MPI2_POINTER PTR_MPI26_NVME_ENCAPSULATED_REQUEST,
+  Mpi26NVMeEncapsulatedRequest_t, MPI2_POINTER pMpi26NVMeEncapsulatedRequest_t;
+
+/* defines for the Flags field */
+#define MPI26_NVME_FLAGS_FORCE_ADMIN_ERR_RESP       (0x0020)
+/* Submission Queue Type*/
+#define MPI26_NVME_FLAGS_SUBMISSIONQ_MASK           (0x0010)
+#define MPI26_NVME_FLAGS_SUBMISSIONQ_IO             (0x0000)
+#define MPI26_NVME_FLAGS_SUBMISSIONQ_ADMIN          (0x0010)
+/* Error Response Address Space */
+#define MPI26_NVME_FLAGS_ERR_RSP_ADDR_MASK          (0x000C)
+#define MPI26_NVME_FLAGS_ERR_RSP_ADDR_SYSTEM        (0x0000)
+#define MPI26_NVME_FLAGS_ERR_RSP_ADDR_IOCTL         (0x0008)
+/* Data Direction*/
+#define MPI26_NVME_FLAGS_DATADIRECTION_MASK         (0x0003)
+#define MPI26_NVME_FLAGS_NODATATRANSFER             (0x0000)
+#define MPI26_NVME_FLAGS_WRITE                      (0x0001)
+#define MPI26_NVME_FLAGS_READ                       (0x0002)
+#define MPI26_NVME_FLAGS_BIDIRECTIONAL              (0x0003)
+
+
+/* NVMe Encapuslated Reply Message */
+typedef struct _MPI26_NVME_ENCAPSULATED_ERROR_REPLY
+{
+    U16                     DevHandle;                      /* 0x00 */
+    U8                      MsgLength;                      /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     EncapsulatedCommandLength;      /* 0x04 */
+    U8                      Reserved1;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved2;                      /* 0x0A */
+    U16                     Reserved3;                      /* 0x0C */
+    U16                     IOCStatus;                      /* 0x0E */
+    U32                     IOCLogInfo;                     /* 0x10 */
+    U16                     ErrorResponseCount;             /* 0x14 */
+    U16                     Reserved4;                      /* 0x16 */
+} MPI26_NVME_ENCAPSULATED_ERROR_REPLY,
+  MPI2_POINTER PTR_MPI26_NVME_ENCAPSULATED_ERROR_REPLY,
+  Mpi26NVMeEncapsulatedErrorReply_t,
+  MPI2_POINTER pMpi26NVMeEncapsulatedErrorReply_t;
+
+
+#endif
+
+
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_ra.h b/drivers/scsi/mpt3sas/mpi/mpi2_ra.h
old mode 100755
new mode 100644
index 87b1afa36da6c40adfc3901084759d9ed8b294e6..b7f10e96ba54bc896bae455f6ba2d766e61dbc42
--- a/drivers/scsi/mpt3sas/mpi/mpi2_ra.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_ra.h
@@ -1,86 +1,86 @@
-/*
- *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
- *
- *
- *           Name:  mpi2_ra.h
- *          Title:  MPI RAID Accelerator messages and structures
- *  Creation Date:  April 13, 2009
- *
- *  mpi2_ra.h Version:  02.00.01
- *
- *  Version History
- *  ---------------
- *
- *  Date      Version   Description
- *  --------  --------  ------------------------------------------------------
- *  05-06-09  02.00.00  Initial version.
- *  11-18-14  02.00.01  Updated copyright information.
- *  --------------------------------------------------------------------------
- */
-
-#ifndef MPI2_RA_H
-#define MPI2_RA_H
-
-/* generic structure for RAID Accelerator Control Block */
-typedef struct _MPI2_RAID_ACCELERATOR_CONTROL_BLOCK
-{
-    U32                 Reserved[8];                /* 0x00 */
-    U32                 RaidAcceleratorCDB[1];      /* 0x20 */
-} MPI2_RAID_ACCELERATOR_CONTROL_BLOCK,
-  MPI2_POINTER PTR_MPI2_RAID_ACCELERATOR_CONTROL_BLOCK,
-  Mpi2RAIDAcceleratorControlBlock_t,
-  MPI2_POINTER pMpi2RAIDAcceleratorControlBlock_t;
-
-
-/******************************************************************************
-*
-*        RAID Accelerator Messages
-*
-*******************************************************************************/
-
-/* RAID Accelerator Request Message */
-typedef struct _MPI2_RAID_ACCELERATOR_REQUEST
-{
-    U16                     Reserved0;                          /* 0x00 */
-    U8                      ChainOffset;                        /* 0x02 */
-    U8                      Function;                           /* 0x03 */
-    U16                     Reserved1;                          /* 0x04 */
-    U8                      Reserved2;                          /* 0x06 */
-    U8                      MsgFlags;                           /* 0x07 */
-    U8                      VP_ID;                              /* 0x08 */
-    U8                      VF_ID;                              /* 0x09 */
-    U16                     Reserved3;                          /* 0x0A */
-    U64                     RaidAcceleratorControlBlockAddress; /* 0x0C */
-    U8                      DmaEngineNumber;                    /* 0x14 */
-    U8                      Reserved4;                          /* 0x15 */
-    U16                     Reserved5;                          /* 0x16 */
-    U32                     Reserved6;                          /* 0x18 */
-    U32                     Reserved7;                          /* 0x1C */
-    U32                     Reserved8;                          /* 0x20 */
-} MPI2_RAID_ACCELERATOR_REQUEST, MPI2_POINTER PTR_MPI2_RAID_ACCELERATOR_REQUEST,
-  Mpi2RAIDAcceleratorRequest_t, MPI2_POINTER pMpi2RAIDAcceleratorRequest_t;
-
-
-/* RAID Accelerator Error Reply Message */
-typedef struct _MPI2_RAID_ACCELERATOR_REPLY
-{
-    U16                     Reserved0;                      /* 0x00 */
-    U8                      MsgLength;                      /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     Reserved1;                      /* 0x04 */
-    U8                      Reserved2;                      /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved3;                      /* 0x0A */
-    U16                     Reserved4;                      /* 0x0C */
-    U16                     IOCStatus;                      /* 0x0E */
-    U32                     IOCLogInfo;                     /* 0x10 */
-    U32                     ProductSpecificData[3];         /* 0x14 */
-} MPI2_RAID_ACCELERATOR_REPLY, MPI2_POINTER PTR_MPI2_RAID_ACCELERATOR_REPLY,
-  Mpi2RAIDAcceleratorReply_t, MPI2_POINTER pMpi2RAIDAcceleratorReply_t;
-
-
-#endif
-
-
+/*
+ *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
+ *
+ *
+ *           Name:  mpi2_ra.h
+ *          Title:  MPI RAID Accelerator messages and structures
+ *  Creation Date:  April 13, 2009
+ *
+ *  mpi2_ra.h Version:  02.00.01
+ *
+ *  Version History
+ *  ---------------
+ *
+ *  Date      Version   Description
+ *  --------  --------  ------------------------------------------------------
+ *  05-06-09  02.00.00  Initial version.
+ *  11-18-14  02.00.01  Updated copyright information.
+ *  --------------------------------------------------------------------------
+ */
+
+#ifndef MPI2_RA_H
+#define MPI2_RA_H
+
+/* generic structure for RAID Accelerator Control Block */
+typedef struct _MPI2_RAID_ACCELERATOR_CONTROL_BLOCK
+{
+    U32                 Reserved[8];                /* 0x00 */
+    U32                 RaidAcceleratorCDB[1];      /* 0x20 */
+} MPI2_RAID_ACCELERATOR_CONTROL_BLOCK,
+  MPI2_POINTER PTR_MPI2_RAID_ACCELERATOR_CONTROL_BLOCK,
+  Mpi2RAIDAcceleratorControlBlock_t,
+  MPI2_POINTER pMpi2RAIDAcceleratorControlBlock_t;
+
+
+/******************************************************************************
+*
+*        RAID Accelerator Messages
+*
+*******************************************************************************/
+
+/* RAID Accelerator Request Message */
+typedef struct _MPI2_RAID_ACCELERATOR_REQUEST
+{
+    U16                     Reserved0;                          /* 0x00 */
+    U8                      ChainOffset;                        /* 0x02 */
+    U8                      Function;                           /* 0x03 */
+    U16                     Reserved1;                          /* 0x04 */
+    U8                      Reserved2;                          /* 0x06 */
+    U8                      MsgFlags;                           /* 0x07 */
+    U8                      VP_ID;                              /* 0x08 */
+    U8                      VF_ID;                              /* 0x09 */
+    U16                     Reserved3;                          /* 0x0A */
+    U64                     RaidAcceleratorControlBlockAddress; /* 0x0C */
+    U8                      DmaEngineNumber;                    /* 0x14 */
+    U8                      Reserved4;                          /* 0x15 */
+    U16                     Reserved5;                          /* 0x16 */
+    U32                     Reserved6;                          /* 0x18 */
+    U32                     Reserved7;                          /* 0x1C */
+    U32                     Reserved8;                          /* 0x20 */
+} MPI2_RAID_ACCELERATOR_REQUEST, MPI2_POINTER PTR_MPI2_RAID_ACCELERATOR_REQUEST,
+  Mpi2RAIDAcceleratorRequest_t, MPI2_POINTER pMpi2RAIDAcceleratorRequest_t;
+
+
+/* RAID Accelerator Error Reply Message */
+typedef struct _MPI2_RAID_ACCELERATOR_REPLY
+{
+    U16                     Reserved0;                      /* 0x00 */
+    U8                      MsgLength;                      /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     Reserved1;                      /* 0x04 */
+    U8                      Reserved2;                      /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved3;                      /* 0x0A */
+    U16                     Reserved4;                      /* 0x0C */
+    U16                     IOCStatus;                      /* 0x0E */
+    U32                     IOCLogInfo;                     /* 0x10 */
+    U32                     ProductSpecificData[3];         /* 0x14 */
+} MPI2_RAID_ACCELERATOR_REPLY, MPI2_POINTER PTR_MPI2_RAID_ACCELERATOR_REPLY,
+  Mpi2RAIDAcceleratorReply_t, MPI2_POINTER pMpi2RAIDAcceleratorReply_t;
+
+
+#endif
+
+
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_raid.h b/drivers/scsi/mpt3sas/mpi/mpi2_raid.h
old mode 100755
new mode 100644
index da36ffbc9dd32515574affd4c2f7560fba78b56b..af56ac4065ff673428a9adf520dcaafb3ddc272c
--- a/drivers/scsi/mpt3sas/mpi/mpi2_raid.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_raid.h
@@ -1,374 +1,374 @@
-/*
- *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
- *
- *
- *           Name:  mpi2_raid.h
- *          Title:  MPI Integrated RAID messages and structures
- *  Creation Date:  April 26, 2007
- *
- *    mpi2_raid.h Version:  02.00.11
- *
- *  Version History
- *  ---------------
- *
- *  Date      Version   Description
- *  --------  --------  ------------------------------------------------------
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  08-31-07  02.00.01  Modifications to RAID Action request and reply,
- *                      including the Actions and ActionData.
- *  02-29-08  02.00.02  Added MPI2_RAID_ACTION_ADATA_DISABL_FULL_REBUILD.
- *  05-21-08  02.00.03  Added MPI2_RAID_VOL_CREATION_NUM_PHYSDISKS so that
- *                      the PhysDisk array in MPI2_RAID_VOLUME_CREATION_STRUCT
- *                      can be sized by the build environment.
- *  07-30-09  02.00.04  Added proper define for the Use Default Settings bit of
- *                      VolumeCreationFlags and marked the old one as obsolete.
- *  05-12-10  02.00.05  Added MPI2_RAID_VOL_FLAGS_OP_MDC define.
- *  08-24-10  02.00.06  Added MPI2_RAID_ACTION_COMPATIBILITY_CHECK along with
- *                      related structures and defines.
- *                      Added product-specific range to RAID Action values.
- *  11-18-11  02.00.07  Incorporating additions for MPI v2.5.
- *  02-06-12  02.00.08  Added MPI2_RAID_ACTION_PHYSDISK_HIDDEN.
- *  07-26-12  02.00.09  Added ElapsedSeconds field to MPI2_RAID_VOL_INDICATOR.
- *                      Added MPI2_RAID_VOL_FLAGS_ELAPSED_SECONDS_VALID define.
- *  04-17-13  02.00.10  Added MPI25_RAID_ACTION_ADATA_ALLOW_PI.
- *  11-18-14  02.00.11  Updated copyright information.
- *  --------------------------------------------------------------------------
- */
-
-#ifndef MPI2_RAID_H
-#define MPI2_RAID_H
-
-/*****************************************************************************
-*
-*               Integrated RAID Messages
-*
-*****************************************************************************/
-
-/****************************************************************************
-*  RAID Action messages
-****************************************************************************/
-
-/* ActionDataWord defines for use with MPI2_RAID_ACTION_CREATE_VOLUME action */
-#define MPI25_RAID_ACTION_ADATA_ALLOW_PI            (0x80000000)
-
-/* ActionDataWord defines for use with MPI2_RAID_ACTION_DELETE_VOLUME action */
-#define MPI2_RAID_ACTION_ADATA_KEEP_LBA0            (0x00000000)
-#define MPI2_RAID_ACTION_ADATA_ZERO_LBA0            (0x00000001)
-
-/* use MPI2_RAIDVOL0_SETTING_ defines from mpi2_cnfg.h for MPI2_RAID_ACTION_CHANGE_VOL_WRITE_CACHE action */
-
-/* ActionDataWord defines for use with MPI2_RAID_ACTION_DISABLE_ALL_VOLUMES action */
-#define MPI2_RAID_ACTION_ADATA_DISABL_FULL_REBUILD  (0x00000001)
-
-/* ActionDataWord for MPI2_RAID_ACTION_SET_RAID_FUNCTION_RATE Action */
-typedef struct _MPI2_RAID_ACTION_RATE_DATA
-{
-    U8              RateToChange;               /* 0x00 */
-    U8              RateOrMode;                 /* 0x01 */
-    U16             DataScrubDuration;          /* 0x02 */
-} MPI2_RAID_ACTION_RATE_DATA, MPI2_POINTER PTR_MPI2_RAID_ACTION_RATE_DATA,
-  Mpi2RaidActionRateData_t, MPI2_POINTER pMpi2RaidActionRateData_t;
-
-#define MPI2_RAID_ACTION_SET_RATE_RESYNC            (0x00)
-#define MPI2_RAID_ACTION_SET_RATE_DATA_SCRUB        (0x01)
-#define MPI2_RAID_ACTION_SET_RATE_POWERSAVE_MODE    (0x02)
-
-/* ActionDataWord for MPI2_RAID_ACTION_START_RAID_FUNCTION Action */
-typedef struct _MPI2_RAID_ACTION_START_RAID_FUNCTION
-{
-    U8              RAIDFunction;                       /* 0x00 */
-    U8              Flags;                              /* 0x01 */
-    U16             Reserved1;                          /* 0x02 */
-} MPI2_RAID_ACTION_START_RAID_FUNCTION,
-  MPI2_POINTER PTR_MPI2_RAID_ACTION_START_RAID_FUNCTION,
-  Mpi2RaidActionStartRaidFunction_t,
-  MPI2_POINTER pMpi2RaidActionStartRaidFunction_t;
-
-/* defines for the RAIDFunction field */
-#define MPI2_RAID_ACTION_START_BACKGROUND_INIT      (0x00)
-#define MPI2_RAID_ACTION_START_ONLINE_CAP_EXPANSION (0x01)
-#define MPI2_RAID_ACTION_START_CONSISTENCY_CHECK    (0x02)
-
-/* defines for the Flags field */
-#define MPI2_RAID_ACTION_START_NEW                  (0x00)
-#define MPI2_RAID_ACTION_START_RESUME               (0x01)
-
-/* ActionDataWord for MPI2_RAID_ACTION_STOP_RAID_FUNCTION Action */
-typedef struct _MPI2_RAID_ACTION_STOP_RAID_FUNCTION
-{
-    U8              RAIDFunction;                       /* 0x00 */
-    U8              Flags;                              /* 0x01 */
-    U16             Reserved1;                          /* 0x02 */
-} MPI2_RAID_ACTION_STOP_RAID_FUNCTION,
-  MPI2_POINTER PTR_MPI2_RAID_ACTION_STOP_RAID_FUNCTION,
-  Mpi2RaidActionStopRaidFunction_t,
-  MPI2_POINTER pMpi2RaidActionStopRaidFunction_t;
-
-/* defines for the RAIDFunction field */
-#define MPI2_RAID_ACTION_STOP_BACKGROUND_INIT       (0x00)
-#define MPI2_RAID_ACTION_STOP_ONLINE_CAP_EXPANSION  (0x01)
-#define MPI2_RAID_ACTION_STOP_CONSISTENCY_CHECK     (0x02)
-
-/* defines for the Flags field */
-#define MPI2_RAID_ACTION_STOP_ABORT                 (0x00)
-#define MPI2_RAID_ACTION_STOP_PAUSE                 (0x01)
-
-/* ActionDataWord for MPI2_RAID_ACTION_CREATE_HOT_SPARE Action */
-typedef struct _MPI2_RAID_ACTION_HOT_SPARE
-{
-    U8              HotSparePool;               /* 0x00 */
-    U8              Reserved1;                  /* 0x01 */
-    U16             DevHandle;                  /* 0x02 */
-} MPI2_RAID_ACTION_HOT_SPARE, MPI2_POINTER PTR_MPI2_RAID_ACTION_HOT_SPARE,
-  Mpi2RaidActionHotSpare_t, MPI2_POINTER pMpi2RaidActionHotSpare_t;
-
-/* ActionDataWord for MPI2_RAID_ACTION_DEVICE_FW_UPDATE_MODE Action */
-typedef struct _MPI2_RAID_ACTION_FW_UPDATE_MODE
-{
-    U8              Flags;                              /* 0x00 */
-    U8              DeviceFirmwareUpdateModeTimeout;    /* 0x01 */
-    U16             Reserved1;                          /* 0x02 */
-} MPI2_RAID_ACTION_FW_UPDATE_MODE,
-  MPI2_POINTER PTR_MPI2_RAID_ACTION_FW_UPDATE_MODE,
-  Mpi2RaidActionFwUpdateMode_t, MPI2_POINTER pMpi2RaidActionFwUpdateMode_t;
-
-/* ActionDataWord defines for use with MPI2_RAID_ACTION_DEVICE_FW_UPDATE_MODE action */
-#define MPI2_RAID_ACTION_ADATA_DISABLE_FW_UPDATE        (0x00)
-#define MPI2_RAID_ACTION_ADATA_ENABLE_FW_UPDATE         (0x01)
-
-typedef union _MPI2_RAID_ACTION_DATA
-{
-    U32                                     Word;
-    MPI2_RAID_ACTION_RATE_DATA              Rates;
-    MPI2_RAID_ACTION_START_RAID_FUNCTION    StartRaidFunction;
-    MPI2_RAID_ACTION_STOP_RAID_FUNCTION     StopRaidFunction;
-    MPI2_RAID_ACTION_HOT_SPARE              HotSpare;
-    MPI2_RAID_ACTION_FW_UPDATE_MODE         FwUpdateMode;
-} MPI2_RAID_ACTION_DATA, MPI2_POINTER PTR_MPI2_RAID_ACTION_DATA,
-  Mpi2RaidActionData_t, MPI2_POINTER pMpi2RaidActionData_t;
-
-
-/* RAID Action Request Message */
-typedef struct _MPI2_RAID_ACTION_REQUEST
-{
-    U8                      Action;                         /* 0x00 */
-    U8                      Reserved1;                      /* 0x01 */
-    U8                      ChainOffset;                    /* 0x02 */
-    U8                      Function;                       /* 0x03 */
-    U16                     VolDevHandle;                   /* 0x04 */
-    U8                      PhysDiskNum;                    /* 0x06 */
-    U8                      MsgFlags;                       /* 0x07 */
-    U8                      VP_ID;                          /* 0x08 */
-    U8                      VF_ID;                          /* 0x09 */
-    U16                     Reserved2;                      /* 0x0A */
-    U32                     Reserved3;                      /* 0x0C */
-    MPI2_RAID_ACTION_DATA   ActionDataWord;                 /* 0x10 */
-    MPI2_SGE_SIMPLE_UNION   ActionDataSGE;                  /* 0x14 */
-} MPI2_RAID_ACTION_REQUEST, MPI2_POINTER PTR_MPI2_RAID_ACTION_REQUEST,
-  Mpi2RaidActionRequest_t, MPI2_POINTER pMpi2RaidActionRequest_t;
-
-/* RAID Action request Action values */
-
-#define MPI2_RAID_ACTION_INDICATOR_STRUCT           (0x01)
-#define MPI2_RAID_ACTION_CREATE_VOLUME              (0x02)
-#define MPI2_RAID_ACTION_DELETE_VOLUME              (0x03)
-#define MPI2_RAID_ACTION_DISABLE_ALL_VOLUMES        (0x04)
-#define MPI2_RAID_ACTION_ENABLE_ALL_VOLUMES         (0x05)
-#define MPI2_RAID_ACTION_PHYSDISK_OFFLINE           (0x0A)
-#define MPI2_RAID_ACTION_PHYSDISK_ONLINE            (0x0B)
-#define MPI2_RAID_ACTION_FAIL_PHYSDISK              (0x0F)
-#define MPI2_RAID_ACTION_ACTIVATE_VOLUME            (0x11)
-#define MPI2_RAID_ACTION_DEVICE_FW_UPDATE_MODE      (0x15)
-#define MPI2_RAID_ACTION_CHANGE_VOL_WRITE_CACHE     (0x17)
-#define MPI2_RAID_ACTION_SET_VOLUME_NAME            (0x18)
-#define MPI2_RAID_ACTION_SET_RAID_FUNCTION_RATE     (0x19)
-#define MPI2_RAID_ACTION_ENABLE_FAILED_VOLUME       (0x1C)
-#define MPI2_RAID_ACTION_CREATE_HOT_SPARE           (0x1D)
-#define MPI2_RAID_ACTION_DELETE_HOT_SPARE           (0x1E)
-#define MPI2_RAID_ACTION_SYSTEM_SHUTDOWN_INITIATED  (0x20)
-#define MPI2_RAID_ACTION_START_RAID_FUNCTION        (0x21)
-#define MPI2_RAID_ACTION_STOP_RAID_FUNCTION         (0x22)
-#define MPI2_RAID_ACTION_COMPATIBILITY_CHECK        (0x23)
-#define MPI2_RAID_ACTION_PHYSDISK_HIDDEN            (0x24)
-#define MPI2_RAID_ACTION_MIN_PRODUCT_SPECIFIC       (0x80)
-#define MPI2_RAID_ACTION_MAX_PRODUCT_SPECIFIC       (0xFF)
-
-
-/* RAID Volume Creation Structure */
-
-/*
- * The following define can be customized for the targeted product.
- */
-#ifndef MPI2_RAID_VOL_CREATION_NUM_PHYSDISKS
-#define MPI2_RAID_VOL_CREATION_NUM_PHYSDISKS        (1)
-#endif
-
-typedef struct _MPI2_RAID_VOLUME_PHYSDISK
-{
-    U8                      RAIDSetNum;                     /* 0x00 */
-    U8                      PhysDiskMap;                    /* 0x01 */
-    U16                     PhysDiskDevHandle;              /* 0x02 */
-} MPI2_RAID_VOLUME_PHYSDISK, MPI2_POINTER PTR_MPI2_RAID_VOLUME_PHYSDISK,
-  Mpi2RaidVolumePhysDisk_t, MPI2_POINTER pMpi2RaidVolumePhysDisk_t;
-
-/* defines for the PhysDiskMap field */
-#define MPI2_RAIDACTION_PHYSDISK_PRIMARY            (0x01)
-#define MPI2_RAIDACTION_PHYSDISK_SECONDARY          (0x02)
-
-typedef struct _MPI2_RAID_VOLUME_CREATION_STRUCT
-{
-    U8                          NumPhysDisks;               /* 0x00 */
-    U8                          VolumeType;                 /* 0x01 */
-    U16                         Reserved1;                  /* 0x02 */
-    U32                         VolumeCreationFlags;        /* 0x04 */
-    U32                         VolumeSettings;             /* 0x08 */
-    U8                          Reserved2;                  /* 0x0C */
-    U8                          ResyncRate;                 /* 0x0D */
-    U16                         DataScrubDuration;          /* 0x0E */
-    U64                         VolumeMaxLBA;               /* 0x10 */
-    U32                         StripeSize;                 /* 0x18 */
-    U8                          Name[16];                   /* 0x1C */
-    MPI2_RAID_VOLUME_PHYSDISK   PhysDisk[MPI2_RAID_VOL_CREATION_NUM_PHYSDISKS];/* 0x2C */
-} MPI2_RAID_VOLUME_CREATION_STRUCT,
-  MPI2_POINTER PTR_MPI2_RAID_VOLUME_CREATION_STRUCT,
-  Mpi2RaidVolumeCreationStruct_t, MPI2_POINTER pMpi2RaidVolumeCreationStruct_t;
-
-/* use MPI2_RAID_VOL_TYPE_ defines from mpi2_cnfg.h for VolumeType */
-
-/* defines for the VolumeCreationFlags field */
-#define MPI2_RAID_VOL_CREATION_DEFAULT_SETTINGS     (0x80000000)
-#define MPI2_RAID_VOL_CREATION_BACKGROUND_INIT      (0x00000004) /* MPI 2.0 only */
-#define MPI2_RAID_VOL_CREATION_LOW_LEVEL_INIT       (0x00000002)
-#define MPI2_RAID_VOL_CREATION_MIGRATE_DATA         (0x00000001)
-/* The following is an obsolete define.
- * It must be shifted left 24 bits in order to set the proper bit.
- */
-#define MPI2_RAID_VOL_CREATION_USE_DEFAULT_SETTINGS (0x80)
-
-
-/* RAID Online Capacity Expansion Structure */
-
-typedef struct _MPI2_RAID_ONLINE_CAPACITY_EXPANSION
-{
-    U32                     Flags;                          /* 0x00 */
-    U16                     DevHandle0;                     /* 0x04 */
-    U16                     Reserved1;                      /* 0x06 */
-    U16                     DevHandle1;                     /* 0x08 */
-    U16                     Reserved2;                      /* 0x0A */
-} MPI2_RAID_ONLINE_CAPACITY_EXPANSION,
-  MPI2_POINTER PTR_MPI2_RAID_ONLINE_CAPACITY_EXPANSION,
-  Mpi2RaidOnlineCapacityExpansion_t,
-  MPI2_POINTER pMpi2RaidOnlineCapacityExpansion_t;
-
-
-/* RAID Compatibility Input Structure */
-
-typedef struct _MPI2_RAID_COMPATIBILITY_INPUT_STRUCT
-{
-    U16                     SourceDevHandle;                /* 0x00 */
-    U16                     CandidateDevHandle;             /* 0x02 */
-    U32                     Flags;                          /* 0x04 */
-    U32                     Reserved1;                      /* 0x08 */
-    U32                     Reserved2;                      /* 0x0C */
-} MPI2_RAID_COMPATIBILITY_INPUT_STRUCT,
-  MPI2_POINTER PTR_MPI2_RAID_COMPATIBILITY_INPUT_STRUCT,
-  Mpi2RaidCompatibilityInputStruct_t,
-  MPI2_POINTER pMpi2RaidCompatibilityInputStruct_t;
-
-/* defines for RAID Compatibility Structure Flags field */
-#define MPI2_RAID_COMPAT_SOURCE_IS_VOLUME_FLAG      (0x00000002)
-#define MPI2_RAID_COMPAT_REPORT_SOURCE_INFO_FLAG    (0x00000001)
-
-
-/* RAID Volume Indicator Structure */
-
-typedef struct _MPI2_RAID_VOL_INDICATOR
-{
-    U64                     TotalBlocks;                    /* 0x00 */
-    U64                     BlocksRemaining;                /* 0x08 */
-    U32                     Flags;                          /* 0x10 */
-    U32                     ElapsedSeconds;                 /* 0x14 */
-} MPI2_RAID_VOL_INDICATOR, MPI2_POINTER PTR_MPI2_RAID_VOL_INDICATOR,
-  Mpi2RaidVolIndicator_t, MPI2_POINTER pMpi2RaidVolIndicator_t;
-
-/* defines for RAID Volume Indicator Flags field */
-#define MPI2_RAID_VOL_FLAGS_ELAPSED_SECONDS_VALID   (0x80000000)
-
-#define MPI2_RAID_VOL_FLAGS_OP_MASK                 (0x0000000F)
-#define MPI2_RAID_VOL_FLAGS_OP_BACKGROUND_INIT      (0x00000000)
-#define MPI2_RAID_VOL_FLAGS_OP_ONLINE_CAP_EXPANSION (0x00000001)
-#define MPI2_RAID_VOL_FLAGS_OP_CONSISTENCY_CHECK    (0x00000002)
-#define MPI2_RAID_VOL_FLAGS_OP_RESYNC               (0x00000003)
-#define MPI2_RAID_VOL_FLAGS_OP_MDC                  (0x00000004)
-
-
-/* RAID Compatibility Result Structure */
-
-typedef struct _MPI2_RAID_COMPATIBILITY_RESULT_STRUCT
-{
-    U8                      State;                          /* 0x00 */
-    U8                      Reserved1;                      /* 0x01 */
-    U16                     Reserved2;                      /* 0x02 */
-    U32                     GenericAttributes;              /* 0x04 */
-    U32                     OEMSpecificAttributes;          /* 0x08 */
-    U32                     Reserved3;                      /* 0x0C */
-    U32                     Reserved4;                      /* 0x10 */
-} MPI2_RAID_COMPATIBILITY_RESULT_STRUCT,
-  MPI2_POINTER PTR_MPI2_RAID_COMPATIBILITY_RESULT_STRUCT,
-  Mpi2RaidCompatibilityResultStruct_t,
-  MPI2_POINTER pMpi2RaidCompatibilityResultStruct_t;
-
-/* defines for RAID Compatibility Result Structure State field */
-#define MPI2_RAID_COMPAT_STATE_COMPATIBLE           (0x00)
-#define MPI2_RAID_COMPAT_STATE_NOT_COMPATIBLE       (0x01)
-
-/* defines for RAID Compatibility Result Structure GenericAttributes field */
-#define MPI2_RAID_COMPAT_GENATTRIB_4K_SECTOR            (0x00000010)
-
-#define MPI2_RAID_COMPAT_GENATTRIB_MEDIA_MASK           (0x0000000C)
-#define MPI2_RAID_COMPAT_GENATTRIB_SOLID_STATE_DRIVE    (0x00000008)
-#define MPI2_RAID_COMPAT_GENATTRIB_HARD_DISK_DRIVE      (0x00000004)
-
-#define MPI2_RAID_COMPAT_GENATTRIB_PROTOCOL_MASK        (0x00000003)
-#define MPI2_RAID_COMPAT_GENATTRIB_SAS_PROTOCOL         (0x00000002)
-#define MPI2_RAID_COMPAT_GENATTRIB_SATA_PROTOCOL        (0x00000001)
-
-
-/* RAID Action Reply ActionData union */
-typedef union _MPI2_RAID_ACTION_REPLY_DATA
-{
-    U32                                     Word[6];
-    MPI2_RAID_VOL_INDICATOR                 RaidVolumeIndicator;
-    U16                                     VolDevHandle;
-    U8                                      VolumeState;
-    U8                                      PhysDiskNum;
-    MPI2_RAID_COMPATIBILITY_RESULT_STRUCT   RaidCompatibilityResult;
-} MPI2_RAID_ACTION_REPLY_DATA, MPI2_POINTER PTR_MPI2_RAID_ACTION_REPLY_DATA,
-  Mpi2RaidActionReplyData_t, MPI2_POINTER pMpi2RaidActionReplyData_t;
-
-/* use MPI2_RAIDVOL0_SETTING_ defines from mpi2_cnfg.h for MPI2_RAID_ACTION_CHANGE_VOL_WRITE_CACHE action */
-
-
-/* RAID Action Reply Message */
-typedef struct _MPI2_RAID_ACTION_REPLY
-{
-    U8                          Action;                     /* 0x00 */
-    U8                          Reserved1;                  /* 0x01 */
-    U8                          MsgLength;                  /* 0x02 */
-    U8                          Function;                   /* 0x03 */
-    U16                         VolDevHandle;               /* 0x04 */
-    U8                          PhysDiskNum;                /* 0x06 */
-    U8                          MsgFlags;                   /* 0x07 */
-    U8                          VP_ID;                      /* 0x08 */
-    U8                          VF_ID;                      /* 0x09 */
-    U16                         Reserved2;                  /* 0x0A */
-    U16                         Reserved3;                  /* 0x0C */
-    U16                         IOCStatus;                  /* 0x0E */
-    U32                         IOCLogInfo;                 /* 0x10 */
-    MPI2_RAID_ACTION_REPLY_DATA ActionData;                 /* 0x14 */
-} MPI2_RAID_ACTION_REPLY, MPI2_POINTER PTR_MPI2_RAID_ACTION_REPLY,
-  Mpi2RaidActionReply_t, MPI2_POINTER pMpi2RaidActionReply_t;
-
-
-#endif
-
+/*
+ *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
+ *
+ *
+ *           Name:  mpi2_raid.h
+ *          Title:  MPI Integrated RAID messages and structures
+ *  Creation Date:  April 26, 2007
+ *
+ *    mpi2_raid.h Version:  02.00.11
+ *
+ *  Version History
+ *  ---------------
+ *
+ *  Date      Version   Description
+ *  --------  --------  ------------------------------------------------------
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  08-31-07  02.00.01  Modifications to RAID Action request and reply,
+ *                      including the Actions and ActionData.
+ *  02-29-08  02.00.02  Added MPI2_RAID_ACTION_ADATA_DISABL_FULL_REBUILD.
+ *  05-21-08  02.00.03  Added MPI2_RAID_VOL_CREATION_NUM_PHYSDISKS so that
+ *                      the PhysDisk array in MPI2_RAID_VOLUME_CREATION_STRUCT
+ *                      can be sized by the build environment.
+ *  07-30-09  02.00.04  Added proper define for the Use Default Settings bit of
+ *                      VolumeCreationFlags and marked the old one as obsolete.
+ *  05-12-10  02.00.05  Added MPI2_RAID_VOL_FLAGS_OP_MDC define.
+ *  08-24-10  02.00.06  Added MPI2_RAID_ACTION_COMPATIBILITY_CHECK along with
+ *                      related structures and defines.
+ *                      Added product-specific range to RAID Action values.
+ *  11-18-11  02.00.07  Incorporating additions for MPI v2.5.
+ *  02-06-12  02.00.08  Added MPI2_RAID_ACTION_PHYSDISK_HIDDEN.
+ *  07-26-12  02.00.09  Added ElapsedSeconds field to MPI2_RAID_VOL_INDICATOR.
+ *                      Added MPI2_RAID_VOL_FLAGS_ELAPSED_SECONDS_VALID define.
+ *  04-17-13  02.00.10  Added MPI25_RAID_ACTION_ADATA_ALLOW_PI.
+ *  11-18-14  02.00.11  Updated copyright information.
+ *  --------------------------------------------------------------------------
+ */
+
+#ifndef MPI2_RAID_H
+#define MPI2_RAID_H
+
+/*****************************************************************************
+*
+*               Integrated RAID Messages
+*
+*****************************************************************************/
+
+/****************************************************************************
+*  RAID Action messages
+****************************************************************************/
+
+/* ActionDataWord defines for use with MPI2_RAID_ACTION_CREATE_VOLUME action */
+#define MPI25_RAID_ACTION_ADATA_ALLOW_PI            (0x80000000)
+
+/* ActionDataWord defines for use with MPI2_RAID_ACTION_DELETE_VOLUME action */
+#define MPI2_RAID_ACTION_ADATA_KEEP_LBA0            (0x00000000)
+#define MPI2_RAID_ACTION_ADATA_ZERO_LBA0            (0x00000001)
+
+/* use MPI2_RAIDVOL0_SETTING_ defines from mpi2_cnfg.h for MPI2_RAID_ACTION_CHANGE_VOL_WRITE_CACHE action */
+
+/* ActionDataWord defines for use with MPI2_RAID_ACTION_DISABLE_ALL_VOLUMES action */
+#define MPI2_RAID_ACTION_ADATA_DISABL_FULL_REBUILD  (0x00000001)
+
+/* ActionDataWord for MPI2_RAID_ACTION_SET_RAID_FUNCTION_RATE Action */
+typedef struct _MPI2_RAID_ACTION_RATE_DATA
+{
+    U8              RateToChange;               /* 0x00 */
+    U8              RateOrMode;                 /* 0x01 */
+    U16             DataScrubDuration;          /* 0x02 */
+} MPI2_RAID_ACTION_RATE_DATA, MPI2_POINTER PTR_MPI2_RAID_ACTION_RATE_DATA,
+  Mpi2RaidActionRateData_t, MPI2_POINTER pMpi2RaidActionRateData_t;
+
+#define MPI2_RAID_ACTION_SET_RATE_RESYNC            (0x00)
+#define MPI2_RAID_ACTION_SET_RATE_DATA_SCRUB        (0x01)
+#define MPI2_RAID_ACTION_SET_RATE_POWERSAVE_MODE    (0x02)
+
+/* ActionDataWord for MPI2_RAID_ACTION_START_RAID_FUNCTION Action */
+typedef struct _MPI2_RAID_ACTION_START_RAID_FUNCTION
+{
+    U8              RAIDFunction;                       /* 0x00 */
+    U8              Flags;                              /* 0x01 */
+    U16             Reserved1;                          /* 0x02 */
+} MPI2_RAID_ACTION_START_RAID_FUNCTION,
+  MPI2_POINTER PTR_MPI2_RAID_ACTION_START_RAID_FUNCTION,
+  Mpi2RaidActionStartRaidFunction_t,
+  MPI2_POINTER pMpi2RaidActionStartRaidFunction_t;
+
+/* defines for the RAIDFunction field */
+#define MPI2_RAID_ACTION_START_BACKGROUND_INIT      (0x00)
+#define MPI2_RAID_ACTION_START_ONLINE_CAP_EXPANSION (0x01)
+#define MPI2_RAID_ACTION_START_CONSISTENCY_CHECK    (0x02)
+
+/* defines for the Flags field */
+#define MPI2_RAID_ACTION_START_NEW                  (0x00)
+#define MPI2_RAID_ACTION_START_RESUME               (0x01)
+
+/* ActionDataWord for MPI2_RAID_ACTION_STOP_RAID_FUNCTION Action */
+typedef struct _MPI2_RAID_ACTION_STOP_RAID_FUNCTION
+{
+    U8              RAIDFunction;                       /* 0x00 */
+    U8              Flags;                              /* 0x01 */
+    U16             Reserved1;                          /* 0x02 */
+} MPI2_RAID_ACTION_STOP_RAID_FUNCTION,
+  MPI2_POINTER PTR_MPI2_RAID_ACTION_STOP_RAID_FUNCTION,
+  Mpi2RaidActionStopRaidFunction_t,
+  MPI2_POINTER pMpi2RaidActionStopRaidFunction_t;
+
+/* defines for the RAIDFunction field */
+#define MPI2_RAID_ACTION_STOP_BACKGROUND_INIT       (0x00)
+#define MPI2_RAID_ACTION_STOP_ONLINE_CAP_EXPANSION  (0x01)
+#define MPI2_RAID_ACTION_STOP_CONSISTENCY_CHECK     (0x02)
+
+/* defines for the Flags field */
+#define MPI2_RAID_ACTION_STOP_ABORT                 (0x00)
+#define MPI2_RAID_ACTION_STOP_PAUSE                 (0x01)
+
+/* ActionDataWord for MPI2_RAID_ACTION_CREATE_HOT_SPARE Action */
+typedef struct _MPI2_RAID_ACTION_HOT_SPARE
+{
+    U8              HotSparePool;               /* 0x00 */
+    U8              Reserved1;                  /* 0x01 */
+    U16             DevHandle;                  /* 0x02 */
+} MPI2_RAID_ACTION_HOT_SPARE, MPI2_POINTER PTR_MPI2_RAID_ACTION_HOT_SPARE,
+  Mpi2RaidActionHotSpare_t, MPI2_POINTER pMpi2RaidActionHotSpare_t;
+
+/* ActionDataWord for MPI2_RAID_ACTION_DEVICE_FW_UPDATE_MODE Action */
+typedef struct _MPI2_RAID_ACTION_FW_UPDATE_MODE
+{
+    U8              Flags;                              /* 0x00 */
+    U8              DeviceFirmwareUpdateModeTimeout;    /* 0x01 */
+    U16             Reserved1;                          /* 0x02 */
+} MPI2_RAID_ACTION_FW_UPDATE_MODE,
+  MPI2_POINTER PTR_MPI2_RAID_ACTION_FW_UPDATE_MODE,
+  Mpi2RaidActionFwUpdateMode_t, MPI2_POINTER pMpi2RaidActionFwUpdateMode_t;
+
+/* ActionDataWord defines for use with MPI2_RAID_ACTION_DEVICE_FW_UPDATE_MODE action */
+#define MPI2_RAID_ACTION_ADATA_DISABLE_FW_UPDATE        (0x00)
+#define MPI2_RAID_ACTION_ADATA_ENABLE_FW_UPDATE         (0x01)
+
+typedef union _MPI2_RAID_ACTION_DATA
+{
+    U32                                     Word;
+    MPI2_RAID_ACTION_RATE_DATA              Rates;
+    MPI2_RAID_ACTION_START_RAID_FUNCTION    StartRaidFunction;
+    MPI2_RAID_ACTION_STOP_RAID_FUNCTION     StopRaidFunction;
+    MPI2_RAID_ACTION_HOT_SPARE              HotSpare;
+    MPI2_RAID_ACTION_FW_UPDATE_MODE         FwUpdateMode;
+} MPI2_RAID_ACTION_DATA, MPI2_POINTER PTR_MPI2_RAID_ACTION_DATA,
+  Mpi2RaidActionData_t, MPI2_POINTER pMpi2RaidActionData_t;
+
+
+/* RAID Action Request Message */
+typedef struct _MPI2_RAID_ACTION_REQUEST
+{
+    U8                      Action;                         /* 0x00 */
+    U8                      Reserved1;                      /* 0x01 */
+    U8                      ChainOffset;                    /* 0x02 */
+    U8                      Function;                       /* 0x03 */
+    U16                     VolDevHandle;                   /* 0x04 */
+    U8                      PhysDiskNum;                    /* 0x06 */
+    U8                      MsgFlags;                       /* 0x07 */
+    U8                      VP_ID;                          /* 0x08 */
+    U8                      VF_ID;                          /* 0x09 */
+    U16                     Reserved2;                      /* 0x0A */
+    U32                     Reserved3;                      /* 0x0C */
+    MPI2_RAID_ACTION_DATA   ActionDataWord;                 /* 0x10 */
+    MPI2_SGE_SIMPLE_UNION   ActionDataSGE;                  /* 0x14 */
+} MPI2_RAID_ACTION_REQUEST, MPI2_POINTER PTR_MPI2_RAID_ACTION_REQUEST,
+  Mpi2RaidActionRequest_t, MPI2_POINTER pMpi2RaidActionRequest_t;
+
+/* RAID Action request Action values */
+
+#define MPI2_RAID_ACTION_INDICATOR_STRUCT           (0x01)
+#define MPI2_RAID_ACTION_CREATE_VOLUME              (0x02)
+#define MPI2_RAID_ACTION_DELETE_VOLUME              (0x03)
+#define MPI2_RAID_ACTION_DISABLE_ALL_VOLUMES        (0x04)
+#define MPI2_RAID_ACTION_ENABLE_ALL_VOLUMES         (0x05)
+#define MPI2_RAID_ACTION_PHYSDISK_OFFLINE           (0x0A)
+#define MPI2_RAID_ACTION_PHYSDISK_ONLINE            (0x0B)
+#define MPI2_RAID_ACTION_FAIL_PHYSDISK              (0x0F)
+#define MPI2_RAID_ACTION_ACTIVATE_VOLUME            (0x11)
+#define MPI2_RAID_ACTION_DEVICE_FW_UPDATE_MODE      (0x15)
+#define MPI2_RAID_ACTION_CHANGE_VOL_WRITE_CACHE     (0x17)
+#define MPI2_RAID_ACTION_SET_VOLUME_NAME            (0x18)
+#define MPI2_RAID_ACTION_SET_RAID_FUNCTION_RATE     (0x19)
+#define MPI2_RAID_ACTION_ENABLE_FAILED_VOLUME       (0x1C)
+#define MPI2_RAID_ACTION_CREATE_HOT_SPARE           (0x1D)
+#define MPI2_RAID_ACTION_DELETE_HOT_SPARE           (0x1E)
+#define MPI2_RAID_ACTION_SYSTEM_SHUTDOWN_INITIATED  (0x20)
+#define MPI2_RAID_ACTION_START_RAID_FUNCTION        (0x21)
+#define MPI2_RAID_ACTION_STOP_RAID_FUNCTION         (0x22)
+#define MPI2_RAID_ACTION_COMPATIBILITY_CHECK        (0x23)
+#define MPI2_RAID_ACTION_PHYSDISK_HIDDEN            (0x24)
+#define MPI2_RAID_ACTION_MIN_PRODUCT_SPECIFIC       (0x80)
+#define MPI2_RAID_ACTION_MAX_PRODUCT_SPECIFIC       (0xFF)
+
+
+/* RAID Volume Creation Structure */
+
+/*
+ * The following define can be customized for the targeted product.
+ */
+#ifndef MPI2_RAID_VOL_CREATION_NUM_PHYSDISKS
+#define MPI2_RAID_VOL_CREATION_NUM_PHYSDISKS        (1)
+#endif
+
+typedef struct _MPI2_RAID_VOLUME_PHYSDISK
+{
+    U8                      RAIDSetNum;                     /* 0x00 */
+    U8                      PhysDiskMap;                    /* 0x01 */
+    U16                     PhysDiskDevHandle;              /* 0x02 */
+} MPI2_RAID_VOLUME_PHYSDISK, MPI2_POINTER PTR_MPI2_RAID_VOLUME_PHYSDISK,
+  Mpi2RaidVolumePhysDisk_t, MPI2_POINTER pMpi2RaidVolumePhysDisk_t;
+
+/* defines for the PhysDiskMap field */
+#define MPI2_RAIDACTION_PHYSDISK_PRIMARY            (0x01)
+#define MPI2_RAIDACTION_PHYSDISK_SECONDARY          (0x02)
+
+typedef struct _MPI2_RAID_VOLUME_CREATION_STRUCT
+{
+    U8                          NumPhysDisks;               /* 0x00 */
+    U8                          VolumeType;                 /* 0x01 */
+    U16                         Reserved1;                  /* 0x02 */
+    U32                         VolumeCreationFlags;        /* 0x04 */
+    U32                         VolumeSettings;             /* 0x08 */
+    U8                          Reserved2;                  /* 0x0C */
+    U8                          ResyncRate;                 /* 0x0D */
+    U16                         DataScrubDuration;          /* 0x0E */
+    U64                         VolumeMaxLBA;               /* 0x10 */
+    U32                         StripeSize;                 /* 0x18 */
+    U8                          Name[16];                   /* 0x1C */
+    MPI2_RAID_VOLUME_PHYSDISK   PhysDisk[MPI2_RAID_VOL_CREATION_NUM_PHYSDISKS];/* 0x2C */
+} MPI2_RAID_VOLUME_CREATION_STRUCT,
+  MPI2_POINTER PTR_MPI2_RAID_VOLUME_CREATION_STRUCT,
+  Mpi2RaidVolumeCreationStruct_t, MPI2_POINTER pMpi2RaidVolumeCreationStruct_t;
+
+/* use MPI2_RAID_VOL_TYPE_ defines from mpi2_cnfg.h for VolumeType */
+
+/* defines for the VolumeCreationFlags field */
+#define MPI2_RAID_VOL_CREATION_DEFAULT_SETTINGS     (0x80000000)
+#define MPI2_RAID_VOL_CREATION_BACKGROUND_INIT      (0x00000004) /* MPI 2.0 only */
+#define MPI2_RAID_VOL_CREATION_LOW_LEVEL_INIT       (0x00000002)
+#define MPI2_RAID_VOL_CREATION_MIGRATE_DATA         (0x00000001)
+/* The following is an obsolete define.
+ * It must be shifted left 24 bits in order to set the proper bit.
+ */
+#define MPI2_RAID_VOL_CREATION_USE_DEFAULT_SETTINGS (0x80)
+
+
+/* RAID Online Capacity Expansion Structure */
+
+typedef struct _MPI2_RAID_ONLINE_CAPACITY_EXPANSION
+{
+    U32                     Flags;                          /* 0x00 */
+    U16                     DevHandle0;                     /* 0x04 */
+    U16                     Reserved1;                      /* 0x06 */
+    U16                     DevHandle1;                     /* 0x08 */
+    U16                     Reserved2;                      /* 0x0A */
+} MPI2_RAID_ONLINE_CAPACITY_EXPANSION,
+  MPI2_POINTER PTR_MPI2_RAID_ONLINE_CAPACITY_EXPANSION,
+  Mpi2RaidOnlineCapacityExpansion_t,
+  MPI2_POINTER pMpi2RaidOnlineCapacityExpansion_t;
+
+
+/* RAID Compatibility Input Structure */
+
+typedef struct _MPI2_RAID_COMPATIBILITY_INPUT_STRUCT
+{
+    U16                     SourceDevHandle;                /* 0x00 */
+    U16                     CandidateDevHandle;             /* 0x02 */
+    U32                     Flags;                          /* 0x04 */
+    U32                     Reserved1;                      /* 0x08 */
+    U32                     Reserved2;                      /* 0x0C */
+} MPI2_RAID_COMPATIBILITY_INPUT_STRUCT,
+  MPI2_POINTER PTR_MPI2_RAID_COMPATIBILITY_INPUT_STRUCT,
+  Mpi2RaidCompatibilityInputStruct_t,
+  MPI2_POINTER pMpi2RaidCompatibilityInputStruct_t;
+
+/* defines for RAID Compatibility Structure Flags field */
+#define MPI2_RAID_COMPAT_SOURCE_IS_VOLUME_FLAG      (0x00000002)
+#define MPI2_RAID_COMPAT_REPORT_SOURCE_INFO_FLAG    (0x00000001)
+
+
+/* RAID Volume Indicator Structure */
+
+typedef struct _MPI2_RAID_VOL_INDICATOR
+{
+    U64                     TotalBlocks;                    /* 0x00 */
+    U64                     BlocksRemaining;                /* 0x08 */
+    U32                     Flags;                          /* 0x10 */
+    U32                     ElapsedSeconds;                 /* 0x14 */
+} MPI2_RAID_VOL_INDICATOR, MPI2_POINTER PTR_MPI2_RAID_VOL_INDICATOR,
+  Mpi2RaidVolIndicator_t, MPI2_POINTER pMpi2RaidVolIndicator_t;
+
+/* defines for RAID Volume Indicator Flags field */
+#define MPI2_RAID_VOL_FLAGS_ELAPSED_SECONDS_VALID   (0x80000000)
+
+#define MPI2_RAID_VOL_FLAGS_OP_MASK                 (0x0000000F)
+#define MPI2_RAID_VOL_FLAGS_OP_BACKGROUND_INIT      (0x00000000)
+#define MPI2_RAID_VOL_FLAGS_OP_ONLINE_CAP_EXPANSION (0x00000001)
+#define MPI2_RAID_VOL_FLAGS_OP_CONSISTENCY_CHECK    (0x00000002)
+#define MPI2_RAID_VOL_FLAGS_OP_RESYNC               (0x00000003)
+#define MPI2_RAID_VOL_FLAGS_OP_MDC                  (0x00000004)
+
+
+/* RAID Compatibility Result Structure */
+
+typedef struct _MPI2_RAID_COMPATIBILITY_RESULT_STRUCT
+{
+    U8                      State;                          /* 0x00 */
+    U8                      Reserved1;                      /* 0x01 */
+    U16                     Reserved2;                      /* 0x02 */
+    U32                     GenericAttributes;              /* 0x04 */
+    U32                     OEMSpecificAttributes;          /* 0x08 */
+    U32                     Reserved3;                      /* 0x0C */
+    U32                     Reserved4;                      /* 0x10 */
+} MPI2_RAID_COMPATIBILITY_RESULT_STRUCT,
+  MPI2_POINTER PTR_MPI2_RAID_COMPATIBILITY_RESULT_STRUCT,
+  Mpi2RaidCompatibilityResultStruct_t,
+  MPI2_POINTER pMpi2RaidCompatibilityResultStruct_t;
+
+/* defines for RAID Compatibility Result Structure State field */
+#define MPI2_RAID_COMPAT_STATE_COMPATIBLE           (0x00)
+#define MPI2_RAID_COMPAT_STATE_NOT_COMPATIBLE       (0x01)
+
+/* defines for RAID Compatibility Result Structure GenericAttributes field */
+#define MPI2_RAID_COMPAT_GENATTRIB_4K_SECTOR            (0x00000010)
+
+#define MPI2_RAID_COMPAT_GENATTRIB_MEDIA_MASK           (0x0000000C)
+#define MPI2_RAID_COMPAT_GENATTRIB_SOLID_STATE_DRIVE    (0x00000008)
+#define MPI2_RAID_COMPAT_GENATTRIB_HARD_DISK_DRIVE      (0x00000004)
+
+#define MPI2_RAID_COMPAT_GENATTRIB_PROTOCOL_MASK        (0x00000003)
+#define MPI2_RAID_COMPAT_GENATTRIB_SAS_PROTOCOL         (0x00000002)
+#define MPI2_RAID_COMPAT_GENATTRIB_SATA_PROTOCOL        (0x00000001)
+
+
+/* RAID Action Reply ActionData union */
+typedef union _MPI2_RAID_ACTION_REPLY_DATA
+{
+    U32                                     Word[6];
+    MPI2_RAID_VOL_INDICATOR                 RaidVolumeIndicator;
+    U16                                     VolDevHandle;
+    U8                                      VolumeState;
+    U8                                      PhysDiskNum;
+    MPI2_RAID_COMPATIBILITY_RESULT_STRUCT   RaidCompatibilityResult;
+} MPI2_RAID_ACTION_REPLY_DATA, MPI2_POINTER PTR_MPI2_RAID_ACTION_REPLY_DATA,
+  Mpi2RaidActionReplyData_t, MPI2_POINTER pMpi2RaidActionReplyData_t;
+
+/* use MPI2_RAIDVOL0_SETTING_ defines from mpi2_cnfg.h for MPI2_RAID_ACTION_CHANGE_VOL_WRITE_CACHE action */
+
+
+/* RAID Action Reply Message */
+typedef struct _MPI2_RAID_ACTION_REPLY
+{
+    U8                          Action;                     /* 0x00 */
+    U8                          Reserved1;                  /* 0x01 */
+    U8                          MsgLength;                  /* 0x02 */
+    U8                          Function;                   /* 0x03 */
+    U16                         VolDevHandle;               /* 0x04 */
+    U8                          PhysDiskNum;                /* 0x06 */
+    U8                          MsgFlags;                   /* 0x07 */
+    U8                          VP_ID;                      /* 0x08 */
+    U8                          VF_ID;                      /* 0x09 */
+    U16                         Reserved2;                  /* 0x0A */
+    U16                         Reserved3;                  /* 0x0C */
+    U16                         IOCStatus;                  /* 0x0E */
+    U32                         IOCLogInfo;                 /* 0x10 */
+    MPI2_RAID_ACTION_REPLY_DATA ActionData;                 /* 0x14 */
+} MPI2_RAID_ACTION_REPLY, MPI2_POINTER PTR_MPI2_RAID_ACTION_REPLY,
+  Mpi2RaidActionReply_t, MPI2_POINTER pMpi2RaidActionReply_t;
+
+
+#endif
+
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_sas.h b/drivers/scsi/mpt3sas/mpi/mpi2_sas.h
old mode 100755
new mode 100644
index cde1e9de916f87ac4cb4db7dfc84445a72b10fec..3b515efef2c81d3bd15cb9fc7757e4c87fcdf627
--- a/drivers/scsi/mpt3sas/mpi/mpi2_sas.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_sas.h
@@ -1,319 +1,319 @@
-/*
- *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
- *
- *
- *           Name:  mpi2_sas.h
- *          Title:  MPI Serial Attached SCSI structures and definitions
- *  Creation Date:  February 9, 2007
- *
- *  mpi2_sas.h Version:  02.00.10
- *
- *  NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
- *        prefix are for use only on MPI v2.5 products, and must not be used
- *        with MPI v2.0 products. Unless otherwise noted, names beginning with
- *        MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products.
- *
- *  Version History
- *  ---------------
- *
- *  Date      Version   Description
- *  --------  --------  ------------------------------------------------------
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  06-26-07  02.00.01  Added Clear All Persistent Operation to SAS IO Unit
- *                      Control Request.
- *  10-02-08  02.00.02  Added Set IOC Parameter Operation to SAS IO Unit Control
- *                      Request.
- *  10-28-09  02.00.03  Changed the type of SGL in MPI2_SATA_PASSTHROUGH_REQUEST
- *                      to MPI2_SGE_IO_UNION since it supports chained SGLs.
- *  05-12-10  02.00.04  Modified some comments.
- *  08-11-10  02.00.05  Added NCQ operations to SAS IO Unit Control.
- *  11-18-11  02.00.06  Incorporating additions for MPI v2.5.
- *  07-10-12  02.00.07  Added MPI2_SATA_PT_SGE_UNION for use in the SATA
- *                      Passthrough Request message.
- *  08-19-13  02.00.08  Made MPI2_SAS_OP_TRANSMIT_PORT_SELECT_SIGNAL obsolete
- *                      for anything newer than MPI v2.0.
- *  11-18-14  02.00.09  Updated copyright information.
- *  03-16-15  02.00.10  Updated for MPI v2.6.
- *                      Added MPI2_SATA_PT_REQ_PT_FLAGS_FPDMA.
- *  --------------------------------------------------------------------------
- */
-
-#ifndef MPI2_SAS_H
-#define MPI2_SAS_H
-
-/*
- * Values for SASStatus.
- */
-#define MPI2_SASSTATUS_SUCCESS                          (0x00)
-#define MPI2_SASSTATUS_UNKNOWN_ERROR                    (0x01)
-#define MPI2_SASSTATUS_INVALID_FRAME                    (0x02)
-#define MPI2_SASSTATUS_UTC_BAD_DEST                     (0x03)
-#define MPI2_SASSTATUS_UTC_BREAK_RECEIVED               (0x04)
-#define MPI2_SASSTATUS_UTC_CONNECT_RATE_NOT_SUPPORTED   (0x05)
-#define MPI2_SASSTATUS_UTC_PORT_LAYER_REQUEST           (0x06)
-#define MPI2_SASSTATUS_UTC_PROTOCOL_NOT_SUPPORTED       (0x07)
-#define MPI2_SASSTATUS_UTC_STP_RESOURCES_BUSY           (0x08)
-#define MPI2_SASSTATUS_UTC_WRONG_DESTINATION            (0x09)
-#define MPI2_SASSTATUS_SHORT_INFORMATION_UNIT           (0x0A)
-#define MPI2_SASSTATUS_LONG_INFORMATION_UNIT            (0x0B)
-#define MPI2_SASSTATUS_XFER_RDY_INCORRECT_WRITE_DATA    (0x0C)
-#define MPI2_SASSTATUS_XFER_RDY_REQUEST_OFFSET_ERROR    (0x0D)
-#define MPI2_SASSTATUS_XFER_RDY_NOT_EXPECTED            (0x0E)
-#define MPI2_SASSTATUS_DATA_INCORRECT_DATA_LENGTH       (0x0F)
-#define MPI2_SASSTATUS_DATA_TOO_MUCH_READ_DATA          (0x10)
-#define MPI2_SASSTATUS_DATA_OFFSET_ERROR                (0x11)
-#define MPI2_SASSTATUS_SDSF_NAK_RECEIVED                (0x12)
-#define MPI2_SASSTATUS_SDSF_CONNECTION_FAILED           (0x13)
-#define MPI2_SASSTATUS_INITIATOR_RESPONSE_TIMEOUT       (0x14)
-
-
-/*
- * Values for the SAS DeviceInfo field used in SAS Device Status Change Event
- * data and SAS Configuration pages.
- */
-#define MPI2_SAS_DEVICE_INFO_SEP                (0x00004000)
-#define MPI2_SAS_DEVICE_INFO_ATAPI_DEVICE       (0x00002000)
-#define MPI2_SAS_DEVICE_INFO_LSI_DEVICE         (0x00001000)
-#define MPI2_SAS_DEVICE_INFO_DIRECT_ATTACH      (0x00000800)
-#define MPI2_SAS_DEVICE_INFO_SSP_TARGET         (0x00000400)
-#define MPI2_SAS_DEVICE_INFO_STP_TARGET         (0x00000200)
-#define MPI2_SAS_DEVICE_INFO_SMP_TARGET         (0x00000100)
-#define MPI2_SAS_DEVICE_INFO_SATA_DEVICE        (0x00000080)
-#define MPI2_SAS_DEVICE_INFO_SSP_INITIATOR      (0x00000040)
-#define MPI2_SAS_DEVICE_INFO_STP_INITIATOR      (0x00000020)
-#define MPI2_SAS_DEVICE_INFO_SMP_INITIATOR      (0x00000010)
-#define MPI2_SAS_DEVICE_INFO_SATA_HOST          (0x00000008)
-
-#define MPI2_SAS_DEVICE_INFO_MASK_DEVICE_TYPE   (0x00000007)
-#define MPI2_SAS_DEVICE_INFO_NO_DEVICE          (0x00000000)
-#define MPI2_SAS_DEVICE_INFO_END_DEVICE         (0x00000001)
-#define MPI2_SAS_DEVICE_INFO_EDGE_EXPANDER      (0x00000002)
-#define MPI2_SAS_DEVICE_INFO_FANOUT_EXPANDER    (0x00000003)
-
-
-/*****************************************************************************
-*
-*        SAS Messages
-*
-*****************************************************************************/
-
-/****************************************************************************
-*  SMP Passthrough messages
-****************************************************************************/
-
-/* SMP Passthrough Request Message */
-typedef struct _MPI2_SMP_PASSTHROUGH_REQUEST
-{
-    U8                      PassthroughFlags;   /* 0x00 */
-    U8                      PhysicalPort;       /* 0x01 */
-    U8                      ChainOffset;        /* 0x02 */
-    U8                      Function;           /* 0x03 */
-    U16                     RequestDataLength;  /* 0x04 */
-    U8                      SGLFlags;           /* 0x06 */ /* MPI v2.0 only. Reserved on MPI v2.5. */
-    U8                      MsgFlags;           /* 0x07 */
-    U8                      VP_ID;              /* 0x08 */
-    U8                      VF_ID;              /* 0x09 */
-    U16                     Reserved1;          /* 0x0A */
-    U32                     Reserved2;          /* 0x0C */
-    U64                     SASAddress;         /* 0x10 */
-    U32                     Reserved3;          /* 0x18 */
-    U32                     Reserved4;          /* 0x1C */
-    MPI2_SIMPLE_SGE_UNION   SGL;                /* 0x20 */ /* MPI v2.5: IEEE Simple 64 elements only */
-} MPI2_SMP_PASSTHROUGH_REQUEST, MPI2_POINTER PTR_MPI2_SMP_PASSTHROUGH_REQUEST,
-  Mpi2SmpPassthroughRequest_t, MPI2_POINTER pMpi2SmpPassthroughRequest_t;
-
-/* values for PassthroughFlags field */
-#define MPI2_SMP_PT_REQ_PT_FLAGS_IMMEDIATE      (0x80)
-
-/* MPI v2.0: use MPI2_SGLFLAGS_ defines from mpi2.h for the SGLFlags field */
-
-
-/* SMP Passthrough Reply Message */
-typedef struct _MPI2_SMP_PASSTHROUGH_REPLY
-{
-    U8                      PassthroughFlags;   /* 0x00 */
-    U8                      PhysicalPort;       /* 0x01 */
-    U8                      MsgLength;          /* 0x02 */
-    U8                      Function;           /* 0x03 */
-    U16                     ResponseDataLength; /* 0x04 */
-    U8                      SGLFlags;           /* 0x06 */
-    U8                      MsgFlags;           /* 0x07 */
-    U8                      VP_ID;              /* 0x08 */
-    U8                      VF_ID;              /* 0x09 */
-    U16                     Reserved1;          /* 0x0A */
-    U8                      Reserved2;          /* 0x0C */
-    U8                      SASStatus;          /* 0x0D */
-    U16                     IOCStatus;          /* 0x0E */
-    U32                     IOCLogInfo;         /* 0x10 */
-    U32                     Reserved3;          /* 0x14 */
-    U8                      ResponseData[4];    /* 0x18 */
-} MPI2_SMP_PASSTHROUGH_REPLY, MPI2_POINTER PTR_MPI2_SMP_PASSTHROUGH_REPLY,
-  Mpi2SmpPassthroughReply_t, MPI2_POINTER pMpi2SmpPassthroughReply_t;
-
-/* values for PassthroughFlags field */
-#define MPI2_SMP_PT_REPLY_PT_FLAGS_IMMEDIATE    (0x80)
-
-/* values for SASStatus field are at the top of this file */
-
-
-/****************************************************************************
-*  SATA Passthrough messages
-****************************************************************************/
-
-typedef union _MPI2_SATA_PT_SGE_UNION
-{
-    MPI2_SGE_SIMPLE_UNION       MpiSimple;      /* MPI v2.0 only */
-    MPI2_SGE_CHAIN_UNION        MpiChain;       /* MPI v2.0 only */
-    MPI2_IEEE_SGE_SIMPLE_UNION  IeeeSimple;
-    MPI2_IEEE_SGE_CHAIN_UNION   IeeeChain;      /* MPI v2.0 only */
-    MPI25_IEEE_SGE_CHAIN64      IeeeChain64;    /* MPI v2.5 only */
-} MPI2_SATA_PT_SGE_UNION, MPI2_POINTER PTR_MPI2_SATA_PT_SGE_UNION,
-  Mpi2SataPTSGEUnion_t, MPI2_POINTER pMpi2SataPTSGEUnion_t;
-
-
-/* SATA Passthrough Request Message */
-typedef struct _MPI2_SATA_PASSTHROUGH_REQUEST
-{
-    U16                     DevHandle;          /* 0x00 */
-    U8                      ChainOffset;        /* 0x02 */
-    U8                      Function;           /* 0x03 */
-    U16                     PassthroughFlags;   /* 0x04 */
-    U8                      SGLFlags;           /* 0x06 */ /* MPI v2.0 only. Reserved on MPI v2.5. */
-    U8                      MsgFlags;           /* 0x07 */
-    U8                      VP_ID;              /* 0x08 */
-    U8                      VF_ID;              /* 0x09 */
-    U16                     Reserved1;          /* 0x0A */
-    U32                     Reserved2;          /* 0x0C */
-    U32                     Reserved3;          /* 0x10 */
-    U32                     Reserved4;          /* 0x14 */
-    U32                     DataLength;         /* 0x18 */
-    U8                      CommandFIS[20];     /* 0x1C */
-    MPI2_SATA_PT_SGE_UNION  SGL;                /* 0x30 */ /* MPI v2.5: IEEE 64 elements only */
-} MPI2_SATA_PASSTHROUGH_REQUEST, MPI2_POINTER PTR_MPI2_SATA_PASSTHROUGH_REQUEST,
-  Mpi2SataPassthroughRequest_t, MPI2_POINTER pMpi2SataPassthroughRequest_t;
-
-/* values for PassthroughFlags field */
-#define MPI2_SATA_PT_REQ_PT_FLAGS_EXECUTE_DIAG      (0x0100)
-#define MPI2_SATA_PT_REQ_PT_FLAGS_FPDMA             (0x0040) /* MPI v2.6 and newer */
-#define MPI2_SATA_PT_REQ_PT_FLAGS_DMA               (0x0020)
-#define MPI2_SATA_PT_REQ_PT_FLAGS_PIO               (0x0010)
-#define MPI2_SATA_PT_REQ_PT_FLAGS_UNSPECIFIED_VU    (0x0004)
-#define MPI2_SATA_PT_REQ_PT_FLAGS_WRITE             (0x0002)
-#define MPI2_SATA_PT_REQ_PT_FLAGS_READ              (0x0001)
-
-/* MPI v2.0: use MPI2_SGLFLAGS_ defines from mpi2.h for the SGLFlags field */
-
-
-/* SATA Passthrough Reply Message */
-typedef struct _MPI2_SATA_PASSTHROUGH_REPLY
-{
-    U16                     DevHandle;          /* 0x00 */
-    U8                      MsgLength;          /* 0x02 */
-    U8                      Function;           /* 0x03 */
-    U16                     PassthroughFlags;   /* 0x04 */
-    U8                      SGLFlags;           /* 0x06 */
-    U8                      MsgFlags;           /* 0x07 */
-    U8                      VP_ID;              /* 0x08 */
-    U8                      VF_ID;              /* 0x09 */
-    U16                     Reserved1;          /* 0x0A */
-    U8                      Reserved2;          /* 0x0C */
-    U8                      SASStatus;          /* 0x0D */
-    U16                     IOCStatus;          /* 0x0E */
-    U32                     IOCLogInfo;         /* 0x10 */
-    U8                      StatusFIS[20];      /* 0x14 */
-    U32                     StatusControlRegisters; /* 0x28 */
-    U32                     TransferCount;      /* 0x2C */
-} MPI2_SATA_PASSTHROUGH_REPLY, MPI2_POINTER PTR_MPI2_SATA_PASSTHROUGH_REPLY,
-  Mpi2SataPassthroughReply_t, MPI2_POINTER pMpi2SataPassthroughReply_t;
-
-/* values for SASStatus field are at the top of this file */
-
-
-/****************************************************************************
-*  SAS IO Unit Control messages
-*  (MPI v2.5 and earlier only.
-*  Replaced by IO Unit Control messages in MPI v2.6 and later.)
-****************************************************************************/
-
-/* SAS IO Unit Control Request Message */
-typedef struct _MPI2_SAS_IOUNIT_CONTROL_REQUEST
-{
-    U8                      Operation;          /* 0x00 */
-    U8                      Reserved1;          /* 0x01 */
-    U8                      ChainOffset;        /* 0x02 */
-    U8                      Function;           /* 0x03 */
-    U16                     DevHandle;          /* 0x04 */
-    U8                      IOCParameter;       /* 0x06 */
-    U8                      MsgFlags;           /* 0x07 */
-    U8                      VP_ID;              /* 0x08 */
-    U8                      VF_ID;              /* 0x09 */
-    U16                     Reserved3;          /* 0x0A */
-    U16                     Reserved4;          /* 0x0C */
-    U8                      PhyNum;             /* 0x0E */
-    U8                      PrimFlags;          /* 0x0F */
-    U32                     Primitive;          /* 0x10 */
-    U8                      LookupMethod;       /* 0x14 */
-    U8                      Reserved5;          /* 0x15 */
-    U16                     SlotNumber;         /* 0x16 */
-    U64                     LookupAddress;      /* 0x18 */
-    U32                     IOCParameterValue;  /* 0x20 */
-    U32                     Reserved7;          /* 0x24 */
-    U32                     Reserved8;          /* 0x28 */
-} MPI2_SAS_IOUNIT_CONTROL_REQUEST,
-  MPI2_POINTER PTR_MPI2_SAS_IOUNIT_CONTROL_REQUEST,
-  Mpi2SasIoUnitControlRequest_t, MPI2_POINTER pMpi2SasIoUnitControlRequest_t;
-
-/* values for the Operation field */
-#define MPI2_SAS_OP_CLEAR_ALL_PERSISTENT        (0x02)
-#define MPI2_SAS_OP_PHY_LINK_RESET              (0x06)
-#define MPI2_SAS_OP_PHY_HARD_RESET              (0x07)
-#define MPI2_SAS_OP_PHY_CLEAR_ERROR_LOG         (0x08)
-#define MPI2_SAS_OP_SEND_PRIMITIVE              (0x0A)
-#define MPI2_SAS_OP_FORCE_FULL_DISCOVERY        (0x0B)
-#define MPI2_SAS_OP_TRANSMIT_PORT_SELECT_SIGNAL (0x0C) /* MPI v2.0 only */
-#define MPI2_SAS_OP_REMOVE_DEVICE               (0x0D)
-#define MPI2_SAS_OP_LOOKUP_MAPPING              (0x0E)
-#define MPI2_SAS_OP_SET_IOC_PARAMETER           (0x0F)
-#define MPI25_SAS_OP_ENABLE_FP_DEVICE           (0x10)
-#define MPI25_SAS_OP_DISABLE_FP_DEVICE          (0x11)
-#define MPI25_SAS_OP_ENABLE_FP_ALL              (0x12)
-#define MPI25_SAS_OP_DISABLE_FP_ALL             (0x13)
-#define MPI2_SAS_OP_DEV_ENABLE_NCQ              (0x14)
-#define MPI2_SAS_OP_DEV_DISABLE_NCQ             (0x15)
-#define MPI2_SAS_OP_PRODUCT_SPECIFIC_MIN        (0x80)
-
-/* values for the PrimFlags field */
-#define MPI2_SAS_PRIMFLAGS_SINGLE               (0x08)
-#define MPI2_SAS_PRIMFLAGS_TRIPLE               (0x02)
-#define MPI2_SAS_PRIMFLAGS_REDUNDANT            (0x01)
-
-/* values for the LookupMethod field */
-#define MPI2_SAS_LOOKUP_METHOD_SAS_ADDRESS          (0x01)
-#define MPI2_SAS_LOOKUP_METHOD_SAS_ENCLOSURE_SLOT   (0x02)
-#define MPI2_SAS_LOOKUP_METHOD_SAS_DEVICE_NAME      (0x03)
-
-
-/* SAS IO Unit Control Reply Message */
-typedef struct _MPI2_SAS_IOUNIT_CONTROL_REPLY
-{
-    U8                      Operation;          /* 0x00 */
-    U8                      Reserved1;          /* 0x01 */
-    U8                      MsgLength;          /* 0x02 */
-    U8                      Function;           /* 0x03 */
-    U16                     DevHandle;          /* 0x04 */
-    U8                      IOCParameter;       /* 0x06 */
-    U8                      MsgFlags;           /* 0x07 */
-    U8                      VP_ID;              /* 0x08 */
-    U8                      VF_ID;              /* 0x09 */
-    U16                     Reserved3;          /* 0x0A */
-    U16                     Reserved4;          /* 0x0C */
-    U16                     IOCStatus;          /* 0x0E */
-    U32                     IOCLogInfo;         /* 0x10 */
-} MPI2_SAS_IOUNIT_CONTROL_REPLY,
-  MPI2_POINTER PTR_MPI2_SAS_IOUNIT_CONTROL_REPLY,
-  Mpi2SasIoUnitControlReply_t, MPI2_POINTER pMpi2SasIoUnitControlReply_t;
-
-
-#endif
-
-
+/*
+ *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
+ *
+ *
+ *           Name:  mpi2_sas.h
+ *          Title:  MPI Serial Attached SCSI structures and definitions
+ *  Creation Date:  February 9, 2007
+ *
+ *  mpi2_sas.h Version:  02.00.10
+ *
+ *  NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
+ *        prefix are for use only on MPI v2.5 products, and must not be used
+ *        with MPI v2.0 products. Unless otherwise noted, names beginning with
+ *        MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products.
+ *
+ *  Version History
+ *  ---------------
+ *
+ *  Date      Version   Description
+ *  --------  --------  ------------------------------------------------------
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  06-26-07  02.00.01  Added Clear All Persistent Operation to SAS IO Unit
+ *                      Control Request.
+ *  10-02-08  02.00.02  Added Set IOC Parameter Operation to SAS IO Unit Control
+ *                      Request.
+ *  10-28-09  02.00.03  Changed the type of SGL in MPI2_SATA_PASSTHROUGH_REQUEST
+ *                      to MPI2_SGE_IO_UNION since it supports chained SGLs.
+ *  05-12-10  02.00.04  Modified some comments.
+ *  08-11-10  02.00.05  Added NCQ operations to SAS IO Unit Control.
+ *  11-18-11  02.00.06  Incorporating additions for MPI v2.5.
+ *  07-10-12  02.00.07  Added MPI2_SATA_PT_SGE_UNION for use in the SATA
+ *                      Passthrough Request message.
+ *  08-19-13  02.00.08  Made MPI2_SAS_OP_TRANSMIT_PORT_SELECT_SIGNAL obsolete
+ *                      for anything newer than MPI v2.0.
+ *  11-18-14  02.00.09  Updated copyright information.
+ *  03-16-15  02.00.10  Updated for MPI v2.6.
+ *                      Added MPI2_SATA_PT_REQ_PT_FLAGS_FPDMA.
+ *  --------------------------------------------------------------------------
+ */
+
+#ifndef MPI2_SAS_H
+#define MPI2_SAS_H
+
+/*
+ * Values for SASStatus.
+ */
+#define MPI2_SASSTATUS_SUCCESS                          (0x00)
+#define MPI2_SASSTATUS_UNKNOWN_ERROR                    (0x01)
+#define MPI2_SASSTATUS_INVALID_FRAME                    (0x02)
+#define MPI2_SASSTATUS_UTC_BAD_DEST                     (0x03)
+#define MPI2_SASSTATUS_UTC_BREAK_RECEIVED               (0x04)
+#define MPI2_SASSTATUS_UTC_CONNECT_RATE_NOT_SUPPORTED   (0x05)
+#define MPI2_SASSTATUS_UTC_PORT_LAYER_REQUEST           (0x06)
+#define MPI2_SASSTATUS_UTC_PROTOCOL_NOT_SUPPORTED       (0x07)
+#define MPI2_SASSTATUS_UTC_STP_RESOURCES_BUSY           (0x08)
+#define MPI2_SASSTATUS_UTC_WRONG_DESTINATION            (0x09)
+#define MPI2_SASSTATUS_SHORT_INFORMATION_UNIT           (0x0A)
+#define MPI2_SASSTATUS_LONG_INFORMATION_UNIT            (0x0B)
+#define MPI2_SASSTATUS_XFER_RDY_INCORRECT_WRITE_DATA    (0x0C)
+#define MPI2_SASSTATUS_XFER_RDY_REQUEST_OFFSET_ERROR    (0x0D)
+#define MPI2_SASSTATUS_XFER_RDY_NOT_EXPECTED            (0x0E)
+#define MPI2_SASSTATUS_DATA_INCORRECT_DATA_LENGTH       (0x0F)
+#define MPI2_SASSTATUS_DATA_TOO_MUCH_READ_DATA          (0x10)
+#define MPI2_SASSTATUS_DATA_OFFSET_ERROR                (0x11)
+#define MPI2_SASSTATUS_SDSF_NAK_RECEIVED                (0x12)
+#define MPI2_SASSTATUS_SDSF_CONNECTION_FAILED           (0x13)
+#define MPI2_SASSTATUS_INITIATOR_RESPONSE_TIMEOUT       (0x14)
+
+
+/*
+ * Values for the SAS DeviceInfo field used in SAS Device Status Change Event
+ * data and SAS Configuration pages.
+ */
+#define MPI2_SAS_DEVICE_INFO_SEP                (0x00004000)
+#define MPI2_SAS_DEVICE_INFO_ATAPI_DEVICE       (0x00002000)
+#define MPI2_SAS_DEVICE_INFO_LSI_DEVICE         (0x00001000)
+#define MPI2_SAS_DEVICE_INFO_DIRECT_ATTACH      (0x00000800)
+#define MPI2_SAS_DEVICE_INFO_SSP_TARGET         (0x00000400)
+#define MPI2_SAS_DEVICE_INFO_STP_TARGET         (0x00000200)
+#define MPI2_SAS_DEVICE_INFO_SMP_TARGET         (0x00000100)
+#define MPI2_SAS_DEVICE_INFO_SATA_DEVICE        (0x00000080)
+#define MPI2_SAS_DEVICE_INFO_SSP_INITIATOR      (0x00000040)
+#define MPI2_SAS_DEVICE_INFO_STP_INITIATOR      (0x00000020)
+#define MPI2_SAS_DEVICE_INFO_SMP_INITIATOR      (0x00000010)
+#define MPI2_SAS_DEVICE_INFO_SATA_HOST          (0x00000008)
+
+#define MPI2_SAS_DEVICE_INFO_MASK_DEVICE_TYPE   (0x00000007)
+#define MPI2_SAS_DEVICE_INFO_NO_DEVICE          (0x00000000)
+#define MPI2_SAS_DEVICE_INFO_END_DEVICE         (0x00000001)
+#define MPI2_SAS_DEVICE_INFO_EDGE_EXPANDER      (0x00000002)
+#define MPI2_SAS_DEVICE_INFO_FANOUT_EXPANDER    (0x00000003)
+
+
+/*****************************************************************************
+*
+*        SAS Messages
+*
+*****************************************************************************/
+
+/****************************************************************************
+*  SMP Passthrough messages
+****************************************************************************/
+
+/* SMP Passthrough Request Message */
+typedef struct _MPI2_SMP_PASSTHROUGH_REQUEST
+{
+    U8                      PassthroughFlags;   /* 0x00 */
+    U8                      PhysicalPort;       /* 0x01 */
+    U8                      ChainOffset;        /* 0x02 */
+    U8                      Function;           /* 0x03 */
+    U16                     RequestDataLength;  /* 0x04 */
+    U8                      SGLFlags;           /* 0x06 */ /* MPI v2.0 only. Reserved on MPI v2.5. */
+    U8                      MsgFlags;           /* 0x07 */
+    U8                      VP_ID;              /* 0x08 */
+    U8                      VF_ID;              /* 0x09 */
+    U16                     Reserved1;          /* 0x0A */
+    U32                     Reserved2;          /* 0x0C */
+    U64                     SASAddress;         /* 0x10 */
+    U32                     Reserved3;          /* 0x18 */
+    U32                     Reserved4;          /* 0x1C */
+    MPI2_SIMPLE_SGE_UNION   SGL;                /* 0x20 */ /* MPI v2.5: IEEE Simple 64 elements only */
+} MPI2_SMP_PASSTHROUGH_REQUEST, MPI2_POINTER PTR_MPI2_SMP_PASSTHROUGH_REQUEST,
+  Mpi2SmpPassthroughRequest_t, MPI2_POINTER pMpi2SmpPassthroughRequest_t;
+
+/* values for PassthroughFlags field */
+#define MPI2_SMP_PT_REQ_PT_FLAGS_IMMEDIATE      (0x80)
+
+/* MPI v2.0: use MPI2_SGLFLAGS_ defines from mpi2.h for the SGLFlags field */
+
+
+/* SMP Passthrough Reply Message */
+typedef struct _MPI2_SMP_PASSTHROUGH_REPLY
+{
+    U8                      PassthroughFlags;   /* 0x00 */
+    U8                      PhysicalPort;       /* 0x01 */
+    U8                      MsgLength;          /* 0x02 */
+    U8                      Function;           /* 0x03 */
+    U16                     ResponseDataLength; /* 0x04 */
+    U8                      SGLFlags;           /* 0x06 */
+    U8                      MsgFlags;           /* 0x07 */
+    U8                      VP_ID;              /* 0x08 */
+    U8                      VF_ID;              /* 0x09 */
+    U16                     Reserved1;          /* 0x0A */
+    U8                      Reserved2;          /* 0x0C */
+    U8                      SASStatus;          /* 0x0D */
+    U16                     IOCStatus;          /* 0x0E */
+    U32                     IOCLogInfo;         /* 0x10 */
+    U32                     Reserved3;          /* 0x14 */
+    U8                      ResponseData[4];    /* 0x18 */
+} MPI2_SMP_PASSTHROUGH_REPLY, MPI2_POINTER PTR_MPI2_SMP_PASSTHROUGH_REPLY,
+  Mpi2SmpPassthroughReply_t, MPI2_POINTER pMpi2SmpPassthroughReply_t;
+
+/* values for PassthroughFlags field */
+#define MPI2_SMP_PT_REPLY_PT_FLAGS_IMMEDIATE    (0x80)
+
+/* values for SASStatus field are at the top of this file */
+
+
+/****************************************************************************
+*  SATA Passthrough messages
+****************************************************************************/
+
+typedef union _MPI2_SATA_PT_SGE_UNION
+{
+    MPI2_SGE_SIMPLE_UNION       MpiSimple;      /* MPI v2.0 only */
+    MPI2_SGE_CHAIN_UNION        MpiChain;       /* MPI v2.0 only */
+    MPI2_IEEE_SGE_SIMPLE_UNION  IeeeSimple;
+    MPI2_IEEE_SGE_CHAIN_UNION   IeeeChain;      /* MPI v2.0 only */
+    MPI25_IEEE_SGE_CHAIN64      IeeeChain64;    /* MPI v2.5 only */
+} MPI2_SATA_PT_SGE_UNION, MPI2_POINTER PTR_MPI2_SATA_PT_SGE_UNION,
+  Mpi2SataPTSGEUnion_t, MPI2_POINTER pMpi2SataPTSGEUnion_t;
+
+
+/* SATA Passthrough Request Message */
+typedef struct _MPI2_SATA_PASSTHROUGH_REQUEST
+{
+    U16                     DevHandle;          /* 0x00 */
+    U8                      ChainOffset;        /* 0x02 */
+    U8                      Function;           /* 0x03 */
+    U16                     PassthroughFlags;   /* 0x04 */
+    U8                      SGLFlags;           /* 0x06 */ /* MPI v2.0 only. Reserved on MPI v2.5. */
+    U8                      MsgFlags;           /* 0x07 */
+    U8                      VP_ID;              /* 0x08 */
+    U8                      VF_ID;              /* 0x09 */
+    U16                     Reserved1;          /* 0x0A */
+    U32                     Reserved2;          /* 0x0C */
+    U32                     Reserved3;          /* 0x10 */
+    U32                     Reserved4;          /* 0x14 */
+    U32                     DataLength;         /* 0x18 */
+    U8                      CommandFIS[20];     /* 0x1C */
+    MPI2_SATA_PT_SGE_UNION  SGL;                /* 0x30 */ /* MPI v2.5: IEEE 64 elements only */
+} MPI2_SATA_PASSTHROUGH_REQUEST, MPI2_POINTER PTR_MPI2_SATA_PASSTHROUGH_REQUEST,
+  Mpi2SataPassthroughRequest_t, MPI2_POINTER pMpi2SataPassthroughRequest_t;
+
+/* values for PassthroughFlags field */
+#define MPI2_SATA_PT_REQ_PT_FLAGS_EXECUTE_DIAG      (0x0100)
+#define MPI2_SATA_PT_REQ_PT_FLAGS_FPDMA             (0x0040) /* MPI v2.6 and newer */
+#define MPI2_SATA_PT_REQ_PT_FLAGS_DMA               (0x0020)
+#define MPI2_SATA_PT_REQ_PT_FLAGS_PIO               (0x0010)
+#define MPI2_SATA_PT_REQ_PT_FLAGS_UNSPECIFIED_VU    (0x0004)
+#define MPI2_SATA_PT_REQ_PT_FLAGS_WRITE             (0x0002)
+#define MPI2_SATA_PT_REQ_PT_FLAGS_READ              (0x0001)
+
+/* MPI v2.0: use MPI2_SGLFLAGS_ defines from mpi2.h for the SGLFlags field */
+
+
+/* SATA Passthrough Reply Message */
+typedef struct _MPI2_SATA_PASSTHROUGH_REPLY
+{
+    U16                     DevHandle;          /* 0x00 */
+    U8                      MsgLength;          /* 0x02 */
+    U8                      Function;           /* 0x03 */
+    U16                     PassthroughFlags;   /* 0x04 */
+    U8                      SGLFlags;           /* 0x06 */
+    U8                      MsgFlags;           /* 0x07 */
+    U8                      VP_ID;              /* 0x08 */
+    U8                      VF_ID;              /* 0x09 */
+    U16                     Reserved1;          /* 0x0A */
+    U8                      Reserved2;          /* 0x0C */
+    U8                      SASStatus;          /* 0x0D */
+    U16                     IOCStatus;          /* 0x0E */
+    U32                     IOCLogInfo;         /* 0x10 */
+    U8                      StatusFIS[20];      /* 0x14 */
+    U32                     StatusControlRegisters; /* 0x28 */
+    U32                     TransferCount;      /* 0x2C */
+} MPI2_SATA_PASSTHROUGH_REPLY, MPI2_POINTER PTR_MPI2_SATA_PASSTHROUGH_REPLY,
+  Mpi2SataPassthroughReply_t, MPI2_POINTER pMpi2SataPassthroughReply_t;
+
+/* values for SASStatus field are at the top of this file */
+
+
+/****************************************************************************
+*  SAS IO Unit Control messages
+*  (MPI v2.5 and earlier only.
+*  Replaced by IO Unit Control messages in MPI v2.6 and later.)
+****************************************************************************/
+
+/* SAS IO Unit Control Request Message */
+typedef struct _MPI2_SAS_IOUNIT_CONTROL_REQUEST
+{
+    U8                      Operation;          /* 0x00 */
+    U8                      Reserved1;          /* 0x01 */
+    U8                      ChainOffset;        /* 0x02 */
+    U8                      Function;           /* 0x03 */
+    U16                     DevHandle;          /* 0x04 */
+    U8                      IOCParameter;       /* 0x06 */
+    U8                      MsgFlags;           /* 0x07 */
+    U8                      VP_ID;              /* 0x08 */
+    U8                      VF_ID;              /* 0x09 */
+    U16                     Reserved3;          /* 0x0A */
+    U16                     Reserved4;          /* 0x0C */
+    U8                      PhyNum;             /* 0x0E */
+    U8                      PrimFlags;          /* 0x0F */
+    U32                     Primitive;          /* 0x10 */
+    U8                      LookupMethod;       /* 0x14 */
+    U8                      Reserved5;          /* 0x15 */
+    U16                     SlotNumber;         /* 0x16 */
+    U64                     LookupAddress;      /* 0x18 */
+    U32                     IOCParameterValue;  /* 0x20 */
+    U32                     Reserved7;          /* 0x24 */
+    U32                     Reserved8;          /* 0x28 */
+} MPI2_SAS_IOUNIT_CONTROL_REQUEST,
+  MPI2_POINTER PTR_MPI2_SAS_IOUNIT_CONTROL_REQUEST,
+  Mpi2SasIoUnitControlRequest_t, MPI2_POINTER pMpi2SasIoUnitControlRequest_t;
+
+/* values for the Operation field */
+#define MPI2_SAS_OP_CLEAR_ALL_PERSISTENT        (0x02)
+#define MPI2_SAS_OP_PHY_LINK_RESET              (0x06)
+#define MPI2_SAS_OP_PHY_HARD_RESET              (0x07)
+#define MPI2_SAS_OP_PHY_CLEAR_ERROR_LOG         (0x08)
+#define MPI2_SAS_OP_SEND_PRIMITIVE              (0x0A)
+#define MPI2_SAS_OP_FORCE_FULL_DISCOVERY        (0x0B)
+#define MPI2_SAS_OP_TRANSMIT_PORT_SELECT_SIGNAL (0x0C) /* MPI v2.0 only */
+#define MPI2_SAS_OP_REMOVE_DEVICE               (0x0D)
+#define MPI2_SAS_OP_LOOKUP_MAPPING              (0x0E)
+#define MPI2_SAS_OP_SET_IOC_PARAMETER           (0x0F)
+#define MPI25_SAS_OP_ENABLE_FP_DEVICE           (0x10)
+#define MPI25_SAS_OP_DISABLE_FP_DEVICE          (0x11)
+#define MPI25_SAS_OP_ENABLE_FP_ALL              (0x12)
+#define MPI25_SAS_OP_DISABLE_FP_ALL             (0x13)
+#define MPI2_SAS_OP_DEV_ENABLE_NCQ              (0x14)
+#define MPI2_SAS_OP_DEV_DISABLE_NCQ             (0x15)
+#define MPI2_SAS_OP_PRODUCT_SPECIFIC_MIN        (0x80)
+
+/* values for the PrimFlags field */
+#define MPI2_SAS_PRIMFLAGS_SINGLE               (0x08)
+#define MPI2_SAS_PRIMFLAGS_TRIPLE               (0x02)
+#define MPI2_SAS_PRIMFLAGS_REDUNDANT            (0x01)
+
+/* values for the LookupMethod field */
+#define MPI2_SAS_LOOKUP_METHOD_SAS_ADDRESS          (0x01)
+#define MPI2_SAS_LOOKUP_METHOD_SAS_ENCLOSURE_SLOT   (0x02)
+#define MPI2_SAS_LOOKUP_METHOD_SAS_DEVICE_NAME      (0x03)
+
+
+/* SAS IO Unit Control Reply Message */
+typedef struct _MPI2_SAS_IOUNIT_CONTROL_REPLY
+{
+    U8                      Operation;          /* 0x00 */
+    U8                      Reserved1;          /* 0x01 */
+    U8                      MsgLength;          /* 0x02 */
+    U8                      Function;           /* 0x03 */
+    U16                     DevHandle;          /* 0x04 */
+    U8                      IOCParameter;       /* 0x06 */
+    U8                      MsgFlags;           /* 0x07 */
+    U8                      VP_ID;              /* 0x08 */
+    U8                      VF_ID;              /* 0x09 */
+    U16                     Reserved3;          /* 0x0A */
+    U16                     Reserved4;          /* 0x0C */
+    U16                     IOCStatus;          /* 0x0E */
+    U32                     IOCLogInfo;         /* 0x10 */
+} MPI2_SAS_IOUNIT_CONTROL_REPLY,
+  MPI2_POINTER PTR_MPI2_SAS_IOUNIT_CONTROL_REPLY,
+  Mpi2SasIoUnitControlReply_t, MPI2_POINTER pMpi2SasIoUnitControlReply_t;
+
+
+#endif
+
+
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_targ.h b/drivers/scsi/mpt3sas/mpi/mpi2_targ.h
old mode 100755
new mode 100644
index 1b6aa870a31c8616c280356c1bf5b9e81d355b3d..2e88a535c9a9ae298f406b820e103992bb0d1f86
--- a/drivers/scsi/mpt3sas/mpi/mpi2_targ.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_targ.h
@@ -1,575 +1,575 @@
-/*
- *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
- *
- *
- *           Name:  mpi2_targ.h
- *          Title:  MPI Target mode messages and structures
- *  Creation Date:  September 8, 2006
- *
- *  mpi2_targ.h Version: 02.00.09
- *
- *  NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
- *        prefix are for use only on MPI v2.5 products, and must not be used
- *        with MPI v2.0 products. Unless otherwise noted, names beginning with
- *        MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products.
- *
- *  Version History
- *  ---------------
- *
- *  Date      Version   Description
- *  --------  --------  ------------------------------------------------------
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  08-31-07  02.00.01  Added Command Buffer Data Location Address Space bits to
- *                      BufferPostFlags field of CommandBufferPostBase Request.
- *  02-29-08  02.00.02  Modified various names to make them 32-character unique.
- *  10-02-08  02.00.03  Removed NextCmdBufferOffset from
- *                      MPI2_TARGET_CMD_BUF_POST_BASE_REQUEST.
- *                      Target Status Send Request only takes a single SGE for
- *                      response data.
- *  02-10-10  02.00.04  Added comment to MPI2_TARGET_SSP_RSP_IU structure.
- *  11-18-11  02.00.05  Incorporating additions for MPI v2.5.
- *  11-27-12  02.00.06  Added InitiatorDevHandle field to MPI2_TARGET_MODE_ABORT
- *                      request message structure.
- *                      Added AbortType MPI2_TARGET_MODE_ABORT_DEVHANDLE and
- *                      MPI2_TARGET_MODE_ABORT_ALL_COMMANDS.
- *  06-13-14  02.00.07  Added MinMSIxIndex and MaxMSIxIndex fields to
- *                      MPI2_TARGET_CMD_BUF_POST_BASE_REQUEST.
- *  11-18-14  02.00.08  Updated copyright information.
- *  03-16-15  02.00.09  Updated for MPI v2.6.
- *                      Added MPI26_TARGET_ASSIST_IOFLAGS_ESCAPE_PASSTHROUGH.
- *  --------------------------------------------------------------------------
- */
-
-#ifndef MPI2_TARG_H
-#define MPI2_TARG_H
-
-
-/******************************************************************************
-*
-*        SCSI Target Messages
-*
-*******************************************************************************/
-
-/****************************************************************************
-*  Target Command Buffer Post Base Request
-****************************************************************************/
-
-typedef struct _MPI2_TARGET_CMD_BUF_POST_BASE_REQUEST
-{
-    U8                      BufferPostFlags;        /* 0x00 */
-    U8                      Reserved1;              /* 0x01 */
-    U8                      ChainOffset;            /* 0x02 */
-    U8                      Function;               /* 0x03 */
-    U16                     TotalCmdBuffers;        /* 0x04 */
-    U8                      Reserved;               /* 0x06 */
-    U8                      MsgFlags;               /* 0x07 */
-    U8                      VP_ID;                  /* 0x08 */
-    U8                      VF_ID;                  /* 0x09 */
-    U16                     Reserved2;              /* 0x0A */
-    U32                     Reserved3;              /* 0x0C */
-    U16                     CmdBufferLength;        /* 0x10 */
-    U8                      MinMSIxIndex;           /* 0x12 */ /* MPI 2.5 and newer only; Reserved in MPI 2.0 */
-    U8                      MaxMSIxIndex;           /* 0x13 */ /* MPI 2.5 and newer only; Reserved in MPI 2.0 */
-    U32                     BaseAddressLow;         /* 0x14 */
-    U32                     BaseAddressHigh;        /* 0x18 */
-} MPI2_TARGET_CMD_BUF_POST_BASE_REQUEST,
-  MPI2_POINTER PTR_MPI2_TARGET_CMD_BUF_POST_BASE_REQUEST,
-  Mpi2TargetCmdBufferPostBaseRequest_t,
-  MPI2_POINTER pMpi2TargetCmdBufferPostBaseRequest_t;
-
-/* values for the BufferPostflags field */
-#define MPI2_CMD_BUF_POST_BASE_ADDRESS_SPACE_MASK            (0x0C)
-#define MPI2_CMD_BUF_POST_BASE_SYSTEM_ADDRESS_SPACE          (0x00)
-#define MPI2_CMD_BUF_POST_BASE_IOCDDR_ADDRESS_SPACE          (0x04)
-#define MPI2_CMD_BUF_POST_BASE_IOCPLB_ADDRESS_SPACE          (0x08) /* only for MPI v2.5 and earlier */
-#define MPI26_CMD_BUF_POST_BASE_IOCCTL_ADDRESS_SPACE         (0x08) /* for MPI v2.6 only */
-#define MPI2_CMD_BUF_POST_BASE_IOCPLBNTA_ADDRESS_SPACE       (0x0C) /* only for MPI v2.5 and earlier */
-
-#define MPI2_CMD_BUF_POST_BASE_FLAGS_AUTO_POST_ALL           (0x01)
-
-
-/****************************************************************************
-*  Target Command Buffer Post List Request
-****************************************************************************/
-
-typedef struct _MPI2_TARGET_CMD_BUF_POST_LIST_REQUEST
-{
-    U16                     Reserved;               /* 0x00 */
-    U8                      ChainOffset;            /* 0x02 */
-    U8                      Function;               /* 0x03 */
-    U16                     CmdBufferCount;         /* 0x04 */
-    U8                      Reserved1;              /* 0x06 */
-    U8                      MsgFlags;               /* 0x07 */
-    U8                      VP_ID;                  /* 0x08 */
-    U8                      VF_ID;                  /* 0x09 */
-    U16                     Reserved2;              /* 0x0A */
-    U32                     Reserved3;              /* 0x0C */
-    U16                     IoIndex[2];             /* 0x10 */
-} MPI2_TARGET_CMD_BUF_POST_LIST_REQUEST,
-  MPI2_POINTER PTR_MPI2_TARGET_CMD_BUF_POST_LIST_REQUEST,
-  Mpi2TargetCmdBufferPostListRequest_t,
-  MPI2_POINTER pMpi2TargetCmdBufferPostListRequest_t;
-
-/****************************************************************************
-*  Target Command Buffer Post Base List Reply
-****************************************************************************/
-
-typedef struct _MPI2_TARGET_BUF_POST_BASE_LIST_REPLY
-{
-    U8                      Flags;                  /* 0x00 */
-    U8                      Reserved;               /* 0x01 */
-    U8                      MsgLength;              /* 0x02 */
-    U8                      Function;               /* 0x03 */
-    U16                     Reserved1;              /* 0x04 */
-    U8                      Reserved2;              /* 0x06 */
-    U8                      MsgFlags;               /* 0x07 */
-    U8                      VP_ID;                  /* 0x08 */
-    U8                      VF_ID;                  /* 0x09 */
-    U16                     Reserved3;              /* 0x0A */
-    U16                     Reserved4;              /* 0x0C */
-    U16                     IOCStatus;              /* 0x0E */
-    U32                     IOCLogInfo;             /* 0x10 */
-    U16                     IoIndex;                /* 0x14 */
-    U16                     Reserved5;              /* 0x16 */
-    U32                     Reserved6;              /* 0x18 */
-} MPI2_TARGET_BUF_POST_BASE_LIST_REPLY,
-  MPI2_POINTER PTR_MPI2_TARGET_BUF_POST_BASE_LIST_REPLY,
-  Mpi2TargetCmdBufferPostBaseListReply_t,
-  MPI2_POINTER pMpi2TargetCmdBufferPostBaseListReply_t;
-
-/* Flags defines */
-#define MPI2_CMD_BUF_POST_REPLY_IOINDEX_VALID       (0x01)
-
-
-/****************************************************************************
-*  Command Buffer Formats (with 16 byte CDB)
-****************************************************************************/
-
-typedef struct _MPI2_TARGET_SSP_CMD_BUFFER
-{
-    U8      FrameType;                                  /* 0x00 */
-    U8      Reserved1;                                  /* 0x01 */
-    U16     InitiatorConnectionTag;                     /* 0x02 */
-    U32     HashedSourceSASAddress;                     /* 0x04 */
-    U16     Reserved2;                                  /* 0x08 */
-    U16     Flags;                                      /* 0x0A */
-    U32     Reserved3;                                  /* 0x0C */
-    U16     Tag;                                        /* 0x10 */
-    U16     TargetPortTransferTag;                      /* 0x12 */
-    U32     DataOffset;                                 /* 0x14 */
-    /* COMMAND information unit starts here */
-    U8      LogicalUnitNumber[8];                       /* 0x18 */
-    U8      Reserved4;                                  /* 0x20 */
-    U8      TaskAttribute; /* lower 3 bits */           /* 0x21 */
-    U8      Reserved5;                                  /* 0x22 */
-    U8      AdditionalCDBLength; /* upper 5 bits */     /* 0x23 */
-    U8      CDB[16];                                    /* 0x24 */
-    /* Additional CDB bytes extend past the CDB field */
-} MPI2_TARGET_SSP_CMD_BUFFER, MPI2_POINTER PTR_MPI2_TARGET_SSP_CMD_BUFFER,
-  Mpi2TargetSspCmdBuffer, MPI2_POINTER pMp2iTargetSspCmdBuffer;
-
-typedef struct _MPI2_TARGET_SSP_TASK_BUFFER
-{
-    U8      FrameType;                                  /* 0x00 */
-    U8      Reserved1;                                  /* 0x01 */
-    U16     InitiatorConnectionTag;                     /* 0x02 */
-    U32     HashedSourceSASAddress;                     /* 0x04 */
-    U16     Reserved2;                                  /* 0x08 */
-    U16     Flags;                                      /* 0x0A */
-    U32     Reserved3;                                  /* 0x0C */
-    U16     Tag;                                        /* 0x10 */
-    U16     TargetPortTransferTag;                      /* 0x12 */
-    U32     DataOffset;                                 /* 0x14 */
-    /* TASK information unit starts here */
-    U8      LogicalUnitNumber[8];                       /* 0x18 */
-    U16     Reserved4;                                  /* 0x20 */
-    U8      TaskManagementFunction;                     /* 0x22 */
-    U8      Reserved5;                                  /* 0x23 */
-    U16     ManagedTaskTag;                             /* 0x24 */
-    U16     Reserved6;                                  /* 0x26 */
-    U32     Reserved7;                                  /* 0x28 */
-    U32     Reserved8;                                  /* 0x2C */
-    U32     Reserved9;                                  /* 0x30 */
-} MPI2_TARGET_SSP_TASK_BUFFER, MPI2_POINTER PTR_MPI2_TARGET_SSP_TASK_BUFFER,
-  Mpi2TargetSspTaskBuffer, MPI2_POINTER pMpi2TargetSspTaskBuffer;
-
-/* mask and shift for HashedSourceSASAddress field */
-#define MPI2_TARGET_HASHED_SAS_ADDRESS_MASK     (0xFFFFFF00)
-#define MPI2_TARGET_HASHED_SAS_ADDRESS_SHIFT    (8)
-
-
-/****************************************************************************
-*   MPI v2.0 Target Assist Request
-****************************************************************************/
-
-typedef struct _MPI2_TARGET_ASSIST_REQUEST
-{
-    U8                  Reserved1;                          /* 0x00 */
-    U8                  TargetAssistFlags;                  /* 0x01 */
-    U8                  ChainOffset;                        /* 0x02 */
-    U8                  Function;                           /* 0x03 */
-    U16                 QueueTag;                           /* 0x04 */
-    U8                  Reserved2;                          /* 0x06 */
-    U8                  MsgFlags;                           /* 0x07 */
-    U8                  VP_ID;                              /* 0x08 */
-    U8                  VF_ID;                              /* 0x09 */
-    U16                 Reserved3;                          /* 0x0A */
-    U16                 IoIndex;                            /* 0x0C */
-    U16                 InitiatorConnectionTag;             /* 0x0E */
-    U16                 SGLFlags;                           /* 0x10 */
-    U8                  SequenceNumber;                     /* 0x12 */
-    U8                  Reserved4;                          /* 0x13 */
-    U8                  SGLOffset0;                         /* 0x14 */
-    U8                  SGLOffset1;                         /* 0x15 */
-    U8                  SGLOffset2;                         /* 0x16 */
-    U8                  SGLOffset3;                         /* 0x17 */
-    U32                 SkipCount;                          /* 0x18 */
-    U32                 DataLength;                         /* 0x1C */
-    U32                 BidirectionalDataLength;            /* 0x20 */
-    U16                 IoFlags;                            /* 0x24 */
-    U16                 EEDPFlags;                          /* 0x26 */
-    U32                 EEDPBlockSize;                      /* 0x28 */
-    U32                 SecondaryReferenceTag;              /* 0x2C */
-    U16                 SecondaryApplicationTag;            /* 0x30 */
-    U16                 ApplicationTagTranslationMask;      /* 0x32 */
-    U32                 PrimaryReferenceTag;                /* 0x34 */
-    U16                 PrimaryApplicationTag;              /* 0x38 */
-    U16                 PrimaryApplicationTagMask;          /* 0x3A */
-    U32                 RelativeOffset;                     /* 0x3C */
-    U32                 Reserved5;                          /* 0x40 */
-    U32                 Reserved6;                          /* 0x44 */
-    U32                 Reserved7;                          /* 0x48 */
-    U32                 Reserved8;                          /* 0x4C */
-    MPI2_SGE_IO_UNION   SGL[1];                             /* 0x50 */
-} MPI2_TARGET_ASSIST_REQUEST, MPI2_POINTER PTR_MPI2_TARGET_ASSIST_REQUEST,
-  Mpi2TargetAssistRequest_t, MPI2_POINTER pMpi2TargetAssistRequest_t;
-
-/* Target Assist TargetAssistFlags bits */
-
-#define MPI2_TARGET_ASSIST_FLAGS_REPOST_CMD_BUFFER      (0x80)
-#define MPI2_TARGET_ASSIST_FLAGS_TLR                    (0x10)
-#define MPI2_TARGET_ASSIST_FLAGS_RETRANSMIT             (0x04)
-#define MPI2_TARGET_ASSIST_FLAGS_AUTO_STATUS            (0x02)
-#define MPI2_TARGET_ASSIST_FLAGS_DATA_DIRECTION         (0x01)
-
-/* Target Assist SGLFlags bits */
-
-/* base values for Data Location Address Space */
-#define MPI2_TARGET_ASSIST_SGLFLAGS_ADDR_MASK           (0x0C)
-#define MPI2_TARGET_ASSIST_SGLFLAGS_SYSTEM_ADDR         (0x00)
-#define MPI2_TARGET_ASSIST_SGLFLAGS_IOCDDR_ADDR         (0x04)
-#define MPI2_TARGET_ASSIST_SGLFLAGS_IOCPLB_ADDR         (0x08)
-#define MPI2_TARGET_ASSIST_SGLFLAGS_PLBNTA_ADDR         (0x0C)
-
-/* base values for Type */
-#define MPI2_TARGET_ASSIST_SGLFLAGS_TYPE_MASK           (0x03)
-#define MPI2_TARGET_ASSIST_SGLFLAGS_MPI_TYPE            (0x00)
-#define MPI2_TARGET_ASSIST_SGLFLAGS_32IEEE_TYPE         (0x01)
-#define MPI2_TARGET_ASSIST_SGLFLAGS_64IEEE_TYPE         (0x02)
-
-/* shift values for each sub-field */
-#define MPI2_TARGET_ASSIST_SGLFLAGS_SGL3_SHIFT          (12)
-#define MPI2_TARGET_ASSIST_SGLFLAGS_SGL2_SHIFT          (8)
-#define MPI2_TARGET_ASSIST_SGLFLAGS_SGL1_SHIFT          (4)
-#define MPI2_TARGET_ASSIST_SGLFLAGS_SGL0_SHIFT          (0)
-
-/* Target Assist IoFlags bits */
-
-#define MPI2_TARGET_ASSIST_IOFLAGS_BIDIRECTIONAL        (0x0800)
-#define MPI2_TARGET_ASSIST_IOFLAGS_MULTICAST            (0x0400)
-#define MPI2_TARGET_ASSIST_IOFLAGS_RECEIVE_FIRST        (0x0200)
-
-/* Target Assist EEDPFlags bits */
-
-#define MPI2_TA_EEDPFLAGS_INC_PRI_REFTAG            (0x8000)
-#define MPI2_TA_EEDPFLAGS_INC_SEC_REFTAG            (0x4000)
-#define MPI2_TA_EEDPFLAGS_INC_PRI_APPTAG            (0x2000)
-#define MPI2_TA_EEDPFLAGS_INC_SEC_APPTAG            (0x1000)
-
-#define MPI2_TA_EEDPFLAGS_CHECK_REFTAG              (0x0400)
-#define MPI2_TA_EEDPFLAGS_CHECK_APPTAG              (0x0200)
-#define MPI2_TA_EEDPFLAGS_CHECK_GUARD               (0x0100)
-
-#define MPI2_TA_EEDPFLAGS_PASSTHRU_REFTAG           (0x0008)
-
-#define MPI2_TA_EEDPFLAGS_MASK_OP                   (0x0007)
-#define MPI2_TA_EEDPFLAGS_NOOP_OP                   (0x0000)
-#define MPI2_TA_EEDPFLAGS_CHECK_OP                  (0x0001)
-#define MPI2_TA_EEDPFLAGS_STRIP_OP                  (0x0002)
-#define MPI2_TA_EEDPFLAGS_CHECK_REMOVE_OP           (0x0003)
-#define MPI2_TA_EEDPFLAGS_INSERT_OP                 (0x0004)
-#define MPI2_TA_EEDPFLAGS_REPLACE_OP                (0x0006)
-#define MPI2_TA_EEDPFLAGS_CHECK_REGEN_OP            (0x0007)
-
-
-/****************************************************************************
-*   MPI v2.5 Target Assist Request
-****************************************************************************/
-
-typedef struct _MPI25_TARGET_ASSIST_REQUEST
-{
-    U8                  Reserved1;                          /* 0x00 */
-    U8                  TargetAssistFlags;                  /* 0x01 */
-    U8                  ChainOffset;                        /* 0x02 */
-    U8                  Function;                           /* 0x03 */
-    U16                 QueueTag;                           /* 0x04 */
-    U8                  Reserved2;                          /* 0x06 */
-    U8                  MsgFlags;                           /* 0x07 */
-    U8                  VP_ID;                              /* 0x08 */
-    U8                  VF_ID;                              /* 0x09 */
-    U16                 Reserved3;                          /* 0x0A */
-    U16                 IoIndex;                            /* 0x0C */
-    U16                 InitiatorConnectionTag;             /* 0x0E */
-    U8                  DMAFlags;                           /* 0x10 */
-    U8                  Reserved9;                          /* 0x11 */
-    U8                  SequenceNumber;                     /* 0x12 */
-    U8                  Reserved4;                          /* 0x13 */
-    U8                  SGLOffset0;                         /* 0x14 */
-    U8                  SGLOffset1;                         /* 0x15 */
-    U8                  SGLOffset2;                         /* 0x16 */
-    U8                  SGLOffset3;                         /* 0x17 */
-    U32                 SkipCount;                          /* 0x18 */
-    U32                 DataLength;                         /* 0x1C */
-    U32                 BidirectionalDataLength;            /* 0x20 */
-    U16                 IoFlags;                            /* 0x24 */
-    U16                 EEDPFlags;                          /* 0x26 */
-    U16                 EEDPBlockSize;                      /* 0x28 */
-    U16                 Reserved10;                         /* 0x2A */
-    U32                 SecondaryReferenceTag;              /* 0x2C */
-    U16                 SecondaryApplicationTag;            /* 0x30 */
-    U16                 ApplicationTagTranslationMask;      /* 0x32 */
-    U32                 PrimaryReferenceTag;                /* 0x34 */
-    U16                 PrimaryApplicationTag;              /* 0x38 */
-    U16                 PrimaryApplicationTagMask;          /* 0x3A */
-    U32                 RelativeOffset;                     /* 0x3C */
-    U32                 Reserved5;                          /* 0x40 */
-    U32                 Reserved6;                          /* 0x44 */
-    U32                 Reserved7;                          /* 0x48 */
-    U32                 Reserved8;                          /* 0x4C */
-    MPI25_SGE_IO_UNION  SGL;                                /* 0x50 */
-} MPI25_TARGET_ASSIST_REQUEST, MPI2_POINTER PTR_MPI25_TARGET_ASSIST_REQUEST,
-  Mpi25TargetAssistRequest_t, MPI2_POINTER pMpi25TargetAssistRequest_t;
-
-/* use MPI2_TARGET_ASSIST_FLAGS_ defines for the Flags field */
-
-/* Defines for the DMAFlags field
- *  Each setting affects 4 SGLS, from SGL0 to SGL3.
- *      D = Data
- *      C = Cache DIF
- *      I = Interleaved
- *      H = Host DIF
- */
-#define MPI25_TA_DMAFLAGS_OP_MASK                   (0x0F)
-#define MPI25_TA_DMAFLAGS_OP_D_D_D_D                (0x00)
-#define MPI25_TA_DMAFLAGS_OP_D_D_D_C                (0x01)
-#define MPI25_TA_DMAFLAGS_OP_D_D_D_I                (0x02)
-#define MPI25_TA_DMAFLAGS_OP_D_D_C_C                (0x03)
-#define MPI25_TA_DMAFLAGS_OP_D_D_C_I                (0x04)
-#define MPI25_TA_DMAFLAGS_OP_D_D_I_I                (0x05)
-#define MPI25_TA_DMAFLAGS_OP_D_C_C_C                (0x06)
-#define MPI25_TA_DMAFLAGS_OP_D_C_C_I                (0x07)
-#define MPI25_TA_DMAFLAGS_OP_D_C_I_I                (0x08)
-#define MPI25_TA_DMAFLAGS_OP_D_I_I_I                (0x09)
-#define MPI25_TA_DMAFLAGS_OP_D_H_D_D                (0x0A)
-#define MPI25_TA_DMAFLAGS_OP_D_H_D_C                (0x0B)
-#define MPI25_TA_DMAFLAGS_OP_D_H_D_I                (0x0C)
-#define MPI25_TA_DMAFLAGS_OP_D_H_C_C                (0x0D)
-#define MPI25_TA_DMAFLAGS_OP_D_H_C_I                (0x0E)
-#define MPI25_TA_DMAFLAGS_OP_D_H_I_I                (0x0F)
-
-/* defines for the IoFlags field */
-#define MPI26_TARGET_ASSIST_IOFLAGS_ESCAPE_PASSTHROUGH  (0x2000) /* MPI v2.6 and later */
-#define MPI25_TARGET_ASSIST_IOFLAGS_BIDIRECTIONAL       (0x0800)
-#define MPI25_TARGET_ASSIST_IOFLAGS_RECEIVE_FIRST       (0x0200)
-
-/* defines for the EEDPFlags field */
-#define MPI25_TA_EEDPFLAGS_INC_PRI_REFTAG               (0x8000)
-#define MPI25_TA_EEDPFLAGS_INC_SEC_REFTAG               (0x4000)
-#define MPI25_TA_EEDPFLAGS_INC_PRI_APPTAG               (0x2000)
-#define MPI25_TA_EEDPFLAGS_INC_SEC_APPTAG               (0x1000)
-
-#define MPI25_TA_EEDPFLAGS_CHECK_REFTAG                 (0x0400)
-#define MPI25_TA_EEDPFLAGS_CHECK_APPTAG                 (0x0200)
-#define MPI25_TA_EEDPFLAGS_CHECK_GUARD                  (0x0100)
-
-#define MPI25_TA_EEDPFLAGS_ESCAPE_MODE_MASK             (0x00C0)
-#define MPI25_TA_EEDPFLAGS_COMPATIBLE_MODE              (0x0000)
-#define MPI25_TA_EEDPFLAGS_DO_NOT_DISABLE_MODE          (0x0040)
-#define MPI25_TA_EEDPFLAGS_APPTAG_DISABLE_MODE          (0x0080)
-#define MPI25_TA_EEDPFLAGS_APPTAG_REFTAG_DISABLE_MODE   (0x00C0)
-
-#define MPI25_TA_EEDPFLAGS_HOST_GUARD_METHOD_MASK       (0x0030)
-#define MPI25_TA_EEDPFLAGS_T10_CRC_HOST_GUARD           (0x0000)
-#define MPI25_TA_EEDPFLAGS_IP_CHKSUM_HOST_GUARD         (0x0010)
-
-#define MPI25_TA_EEDPFLAGS_PASSTHRU_REFTAG              (0x0008)
-
-#define MPI25_TA_EEDPFLAGS_MASK_OP                      (0x0007)
-#define MPI25_TA_EEDPFLAGS_NOOP_OP                      (0x0000)
-#define MPI25_TA_EEDPFLAGS_CHECK_OP                     (0x0001)
-#define MPI25_TA_EEDPFLAGS_STRIP_OP                     (0x0002)
-#define MPI25_TA_EEDPFLAGS_CHECK_REMOVE_OP              (0x0003)
-#define MPI25_TA_EEDPFLAGS_INSERT_OP                    (0x0004)
-#define MPI25_TA_EEDPFLAGS_REPLACE_OP                   (0x0006)
-#define MPI25_TA_EEDPFLAGS_CHECK_REGEN_OP               (0x0007)
-
-
-/****************************************************************************
-*  Target Status Send Request
-****************************************************************************/
-
-typedef struct _MPI2_TARGET_STATUS_SEND_REQUEST
-{
-    U8                      Reserved1;                  /* 0x00 */
-    U8                      StatusFlags;                /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     QueueTag;                   /* 0x04 */
-    U8                      Reserved2;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved3;                  /* 0x0A */
-    U16                     IoIndex;                    /* 0x0C */
-    U16                     InitiatorConnectionTag;     /* 0x0E */
-    U16                     SGLFlags;                   /* 0x10 */ /* MPI v2.0 only. Reserved on MPI v2.5. */
-    U16                     Reserved4;                  /* 0x12 */
-    U8                      SGLOffset0;                 /* 0x14 */
-    U8                      Reserved5;                  /* 0x15 */
-    U16                     Reserved6;                  /* 0x16 */
-    U32                     Reserved7;                  /* 0x18 */
-    U32                     Reserved8;                  /* 0x1C */
-    MPI2_SIMPLE_SGE_UNION   StatusDataSGE;              /* 0x20 */ /* MPI v2.5: This must be an IEEE Simple Element 64. */
-} MPI2_TARGET_STATUS_SEND_REQUEST,
-  MPI2_POINTER PTR_MPI2_TARGET_STATUS_SEND_REQUEST,
-  Mpi2TargetStatusSendRequest_t, MPI2_POINTER pMpi2TargetStatusSendRequest_t;
-
-/* Target Status Send StatusFlags bits */
-
-#define MPI2_TSS_FLAGS_REPOST_CMD_BUFFER            (0x80)
-#define MPI2_TSS_FLAGS_RETRANSMIT                   (0x04)
-#define MPI2_TSS_FLAGS_AUTO_GOOD_STATUS             (0x01)
-
-/* Target Status Send SGLFlags bits - MPI v2.0 only */
-/* Data Location Address Space */
-#define MPI2_TSS_SGLFLAGS_ADDR_MASK                 (0x0C)
-#define MPI2_TSS_SGLFLAGS_SYSTEM_ADDR               (0x00)
-#define MPI2_TSS_SGLFLAGS_IOCDDR_ADDR               (0x04)
-#define MPI2_TSS_SGLFLAGS_IOCPLB_ADDR               (0x08)
-#define MPI2_TSS_SGLFLAGS_IOCPLBNTA_ADDR            (0x0C)
-/* Type */
-#define MPI2_TSS_SGLFLAGS_TYPE_MASK                 (0x03)
-#define MPI2_TSS_SGLFLAGS_MPI_TYPE                  (0x00)
-#define MPI2_TSS_SGLFLAGS_IEEE32_TYPE               (0x01)
-#define MPI2_TSS_SGLFLAGS_IEEE64_TYPE               (0x02)
-
-
-
-/*
- * NOTE: The SSP status IU is big-endian. When used on a little-endian system,
- * this structure properly orders the bytes.
- */
-typedef struct _MPI2_TARGET_SSP_RSP_IU
-{
-    U32     Reserved0[6]; /* reserved for SSP header */ /* 0x00 */
-
-    /* start of RESPONSE information unit */
-    U32     Reserved1;                                  /* 0x18 */
-    U32     Reserved2;                                  /* 0x1C */
-    U16     Reserved3;                                  /* 0x20 */
-    U8      DataPres; /* lower 2 bits */                /* 0x22 */
-    U8      Status;                                     /* 0x23 */
-    U32     Reserved4;                                  /* 0x24 */
-    U32     SenseDataLength;                            /* 0x28 */
-    U32     ResponseDataLength;                         /* 0x2C */
-
-    /* start of Response or Sense Data (size may vary dynamically) */
-    U8      ResponseSenseData[4];                       /* 0x30 */
-} MPI2_TARGET_SSP_RSP_IU, MPI2_POINTER PTR_MPI2_TARGET_SSP_RSP_IU,
-  Mpi2TargetSspRspIu_t, MPI2_POINTER pMpi2TargetSspRspIu_t;
-
-
-/****************************************************************************
-*  Target Standard Reply - used with Target Assist or Target Status Send
-****************************************************************************/
-
-typedef struct _MPI2_TARGET_STANDARD_REPLY
-{
-    U16                     Reserved;                   /* 0x00 */
-    U8                      MsgLength;                  /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved1;                  /* 0x04 */
-    U8                      Reserved2;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved3;                  /* 0x0A */
-    U16                     Reserved4;                  /* 0x0C */
-    U16                     IOCStatus;                  /* 0x0E */
-    U32                     IOCLogInfo;                 /* 0x10 */
-    U16                     IoIndex;                    /* 0x14 */
-    U16                     Reserved5;                  /* 0x16 */
-    U32                     TransferCount;              /* 0x18 */
-    U32                     BidirectionalTransferCount; /* 0x1C */
-} MPI2_TARGET_STANDARD_REPLY, MPI2_POINTER PTR_MPI2_TARGET_STANDARD_REPLY,
-  Mpi2TargetErrorReply_t, MPI2_POINTER pMpi2TargetErrorReply_t;
-
-
-/****************************************************************************
-*  Target Mode Abort Request
-****************************************************************************/
-
-typedef struct _MPI2_TARGET_MODE_ABORT_REQUEST
-{
-    U8                      AbortType;                  /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U16                     IoIndexToAbort;             /* 0x0C */
-    U16                     InitiatorDevHandle;         /* 0x0E */
-    U32                     MidToAbort;                 /* 0x10 */
-} MPI2_TARGET_MODE_ABORT, MPI2_POINTER PTR_MPI2_TARGET_MODE_ABORT,
-  Mpi2TargetModeAbort_t, MPI2_POINTER pMpi2TargetModeAbort_t;
-
-/* Target Mode Abort AbortType values */
-
-#define MPI2_TARGET_MODE_ABORT_ALL_CMD_BUFFERS      (0x00)
-#define MPI2_TARGET_MODE_ABORT_ALL_IO               (0x01)
-#define MPI2_TARGET_MODE_ABORT_EXACT_IO             (0x02)
-#define MPI2_TARGET_MODE_ABORT_EXACT_IO_REQUEST     (0x03)
-#define MPI2_TARGET_MODE_ABORT_IO_REQUEST_AND_IO    (0x04)
-#define MPI2_TARGET_MODE_ABORT_DEVHANDLE            (0x05)
-#define MPI2_TARGET_MODE_ABORT_ALL_COMMANDS         (0x06)
-
-
-/****************************************************************************
-*  Target Mode Abort Reply
-****************************************************************************/
-
-typedef struct _MPI2_TARGET_MODE_ABORT_REPLY
-{
-    U16                     Reserved;                   /* 0x00 */
-    U8                      MsgLength;                  /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved1;                  /* 0x04 */
-    U8                      Reserved2;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved3;                  /* 0x0A */
-    U16                     Reserved4;                  /* 0x0C */
-    U16                     IOCStatus;                  /* 0x0E */
-    U32                     IOCLogInfo;                 /* 0x10 */
-    U32                     AbortCount;                 /* 0x14 */
-} MPI2_TARGET_MODE_ABORT_REPLY, MPI2_POINTER PTR_MPI2_TARGET_MODE_ABORT_REPLY,
-  Mpi2TargetModeAbortReply_t, MPI2_POINTER pMpi2TargetModeAbortReply_t;
-
-
-#endif
-
+/*
+ *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
+ *
+ *
+ *           Name:  mpi2_targ.h
+ *          Title:  MPI Target mode messages and structures
+ *  Creation Date:  September 8, 2006
+ *
+ *  mpi2_targ.h Version: 02.00.09
+ *
+ *  NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
+ *        prefix are for use only on MPI v2.5 products, and must not be used
+ *        with MPI v2.0 products. Unless otherwise noted, names beginning with
+ *        MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products.
+ *
+ *  Version History
+ *  ---------------
+ *
+ *  Date      Version   Description
+ *  --------  --------  ------------------------------------------------------
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  08-31-07  02.00.01  Added Command Buffer Data Location Address Space bits to
+ *                      BufferPostFlags field of CommandBufferPostBase Request.
+ *  02-29-08  02.00.02  Modified various names to make them 32-character unique.
+ *  10-02-08  02.00.03  Removed NextCmdBufferOffset from
+ *                      MPI2_TARGET_CMD_BUF_POST_BASE_REQUEST.
+ *                      Target Status Send Request only takes a single SGE for
+ *                      response data.
+ *  02-10-10  02.00.04  Added comment to MPI2_TARGET_SSP_RSP_IU structure.
+ *  11-18-11  02.00.05  Incorporating additions for MPI v2.5.
+ *  11-27-12  02.00.06  Added InitiatorDevHandle field to MPI2_TARGET_MODE_ABORT
+ *                      request message structure.
+ *                      Added AbortType MPI2_TARGET_MODE_ABORT_DEVHANDLE and
+ *                      MPI2_TARGET_MODE_ABORT_ALL_COMMANDS.
+ *  06-13-14  02.00.07  Added MinMSIxIndex and MaxMSIxIndex fields to
+ *                      MPI2_TARGET_CMD_BUF_POST_BASE_REQUEST.
+ *  11-18-14  02.00.08  Updated copyright information.
+ *  03-16-15  02.00.09  Updated for MPI v2.6.
+ *                      Added MPI26_TARGET_ASSIST_IOFLAGS_ESCAPE_PASSTHROUGH.
+ *  --------------------------------------------------------------------------
+ */
+
+#ifndef MPI2_TARG_H
+#define MPI2_TARG_H
+
+
+/******************************************************************************
+*
+*        SCSI Target Messages
+*
+*******************************************************************************/
+
+/****************************************************************************
+*  Target Command Buffer Post Base Request
+****************************************************************************/
+
+typedef struct _MPI2_TARGET_CMD_BUF_POST_BASE_REQUEST
+{
+    U8                      BufferPostFlags;        /* 0x00 */
+    U8                      Reserved1;              /* 0x01 */
+    U8                      ChainOffset;            /* 0x02 */
+    U8                      Function;               /* 0x03 */
+    U16                     TotalCmdBuffers;        /* 0x04 */
+    U8                      Reserved;               /* 0x06 */
+    U8                      MsgFlags;               /* 0x07 */
+    U8                      VP_ID;                  /* 0x08 */
+    U8                      VF_ID;                  /* 0x09 */
+    U16                     Reserved2;              /* 0x0A */
+    U32                     Reserved3;              /* 0x0C */
+    U16                     CmdBufferLength;        /* 0x10 */
+    U8                      MinMSIxIndex;           /* 0x12 */ /* MPI 2.5 and newer only; Reserved in MPI 2.0 */
+    U8                      MaxMSIxIndex;           /* 0x13 */ /* MPI 2.5 and newer only; Reserved in MPI 2.0 */
+    U32                     BaseAddressLow;         /* 0x14 */
+    U32                     BaseAddressHigh;        /* 0x18 */
+} MPI2_TARGET_CMD_BUF_POST_BASE_REQUEST,
+  MPI2_POINTER PTR_MPI2_TARGET_CMD_BUF_POST_BASE_REQUEST,
+  Mpi2TargetCmdBufferPostBaseRequest_t,
+  MPI2_POINTER pMpi2TargetCmdBufferPostBaseRequest_t;
+
+/* values for the BufferPostflags field */
+#define MPI2_CMD_BUF_POST_BASE_ADDRESS_SPACE_MASK            (0x0C)
+#define MPI2_CMD_BUF_POST_BASE_SYSTEM_ADDRESS_SPACE          (0x00)
+#define MPI2_CMD_BUF_POST_BASE_IOCDDR_ADDRESS_SPACE          (0x04)
+#define MPI2_CMD_BUF_POST_BASE_IOCPLB_ADDRESS_SPACE          (0x08) /* only for MPI v2.5 and earlier */
+#define MPI26_CMD_BUF_POST_BASE_IOCCTL_ADDRESS_SPACE         (0x08) /* for MPI v2.6 only */
+#define MPI2_CMD_BUF_POST_BASE_IOCPLBNTA_ADDRESS_SPACE       (0x0C) /* only for MPI v2.5 and earlier */
+
+#define MPI2_CMD_BUF_POST_BASE_FLAGS_AUTO_POST_ALL           (0x01)
+
+
+/****************************************************************************
+*  Target Command Buffer Post List Request
+****************************************************************************/
+
+typedef struct _MPI2_TARGET_CMD_BUF_POST_LIST_REQUEST
+{
+    U16                     Reserved;               /* 0x00 */
+    U8                      ChainOffset;            /* 0x02 */
+    U8                      Function;               /* 0x03 */
+    U16                     CmdBufferCount;         /* 0x04 */
+    U8                      Reserved1;              /* 0x06 */
+    U8                      MsgFlags;               /* 0x07 */
+    U8                      VP_ID;                  /* 0x08 */
+    U8                      VF_ID;                  /* 0x09 */
+    U16                     Reserved2;              /* 0x0A */
+    U32                     Reserved3;              /* 0x0C */
+    U16                     IoIndex[2];             /* 0x10 */
+} MPI2_TARGET_CMD_BUF_POST_LIST_REQUEST,
+  MPI2_POINTER PTR_MPI2_TARGET_CMD_BUF_POST_LIST_REQUEST,
+  Mpi2TargetCmdBufferPostListRequest_t,
+  MPI2_POINTER pMpi2TargetCmdBufferPostListRequest_t;
+
+/****************************************************************************
+*  Target Command Buffer Post Base List Reply
+****************************************************************************/
+
+typedef struct _MPI2_TARGET_BUF_POST_BASE_LIST_REPLY
+{
+    U8                      Flags;                  /* 0x00 */
+    U8                      Reserved;               /* 0x01 */
+    U8                      MsgLength;              /* 0x02 */
+    U8                      Function;               /* 0x03 */
+    U16                     Reserved1;              /* 0x04 */
+    U8                      Reserved2;              /* 0x06 */
+    U8                      MsgFlags;               /* 0x07 */
+    U8                      VP_ID;                  /* 0x08 */
+    U8                      VF_ID;                  /* 0x09 */
+    U16                     Reserved3;              /* 0x0A */
+    U16                     Reserved4;              /* 0x0C */
+    U16                     IOCStatus;              /* 0x0E */
+    U32                     IOCLogInfo;             /* 0x10 */
+    U16                     IoIndex;                /* 0x14 */
+    U16                     Reserved5;              /* 0x16 */
+    U32                     Reserved6;              /* 0x18 */
+} MPI2_TARGET_BUF_POST_BASE_LIST_REPLY,
+  MPI2_POINTER PTR_MPI2_TARGET_BUF_POST_BASE_LIST_REPLY,
+  Mpi2TargetCmdBufferPostBaseListReply_t,
+  MPI2_POINTER pMpi2TargetCmdBufferPostBaseListReply_t;
+
+/* Flags defines */
+#define MPI2_CMD_BUF_POST_REPLY_IOINDEX_VALID       (0x01)
+
+
+/****************************************************************************
+*  Command Buffer Formats (with 16 byte CDB)
+****************************************************************************/
+
+typedef struct _MPI2_TARGET_SSP_CMD_BUFFER
+{
+    U8      FrameType;                                  /* 0x00 */
+    U8      Reserved1;                                  /* 0x01 */
+    U16     InitiatorConnectionTag;                     /* 0x02 */
+    U32     HashedSourceSASAddress;                     /* 0x04 */
+    U16     Reserved2;                                  /* 0x08 */
+    U16     Flags;                                      /* 0x0A */
+    U32     Reserved3;                                  /* 0x0C */
+    U16     Tag;                                        /* 0x10 */
+    U16     TargetPortTransferTag;                      /* 0x12 */
+    U32     DataOffset;                                 /* 0x14 */
+    /* COMMAND information unit starts here */
+    U8      LogicalUnitNumber[8];                       /* 0x18 */
+    U8      Reserved4;                                  /* 0x20 */
+    U8      TaskAttribute; /* lower 3 bits */           /* 0x21 */
+    U8      Reserved5;                                  /* 0x22 */
+    U8      AdditionalCDBLength; /* upper 5 bits */     /* 0x23 */
+    U8      CDB[16];                                    /* 0x24 */
+    /* Additional CDB bytes extend past the CDB field */
+} MPI2_TARGET_SSP_CMD_BUFFER, MPI2_POINTER PTR_MPI2_TARGET_SSP_CMD_BUFFER,
+  Mpi2TargetSspCmdBuffer, MPI2_POINTER pMp2iTargetSspCmdBuffer;
+
+typedef struct _MPI2_TARGET_SSP_TASK_BUFFER
+{
+    U8      FrameType;                                  /* 0x00 */
+    U8      Reserved1;                                  /* 0x01 */
+    U16     InitiatorConnectionTag;                     /* 0x02 */
+    U32     HashedSourceSASAddress;                     /* 0x04 */
+    U16     Reserved2;                                  /* 0x08 */
+    U16     Flags;                                      /* 0x0A */
+    U32     Reserved3;                                  /* 0x0C */
+    U16     Tag;                                        /* 0x10 */
+    U16     TargetPortTransferTag;                      /* 0x12 */
+    U32     DataOffset;                                 /* 0x14 */
+    /* TASK information unit starts here */
+    U8      LogicalUnitNumber[8];                       /* 0x18 */
+    U16     Reserved4;                                  /* 0x20 */
+    U8      TaskManagementFunction;                     /* 0x22 */
+    U8      Reserved5;                                  /* 0x23 */
+    U16     ManagedTaskTag;                             /* 0x24 */
+    U16     Reserved6;                                  /* 0x26 */
+    U32     Reserved7;                                  /* 0x28 */
+    U32     Reserved8;                                  /* 0x2C */
+    U32     Reserved9;                                  /* 0x30 */
+} MPI2_TARGET_SSP_TASK_BUFFER, MPI2_POINTER PTR_MPI2_TARGET_SSP_TASK_BUFFER,
+  Mpi2TargetSspTaskBuffer, MPI2_POINTER pMpi2TargetSspTaskBuffer;
+
+/* mask and shift for HashedSourceSASAddress field */
+#define MPI2_TARGET_HASHED_SAS_ADDRESS_MASK     (0xFFFFFF00)
+#define MPI2_TARGET_HASHED_SAS_ADDRESS_SHIFT    (8)
+
+
+/****************************************************************************
+*   MPI v2.0 Target Assist Request
+****************************************************************************/
+
+typedef struct _MPI2_TARGET_ASSIST_REQUEST
+{
+    U8                  Reserved1;                          /* 0x00 */
+    U8                  TargetAssistFlags;                  /* 0x01 */
+    U8                  ChainOffset;                        /* 0x02 */
+    U8                  Function;                           /* 0x03 */
+    U16                 QueueTag;                           /* 0x04 */
+    U8                  Reserved2;                          /* 0x06 */
+    U8                  MsgFlags;                           /* 0x07 */
+    U8                  VP_ID;                              /* 0x08 */
+    U8                  VF_ID;                              /* 0x09 */
+    U16                 Reserved3;                          /* 0x0A */
+    U16                 IoIndex;                            /* 0x0C */
+    U16                 InitiatorConnectionTag;             /* 0x0E */
+    U16                 SGLFlags;                           /* 0x10 */
+    U8                  SequenceNumber;                     /* 0x12 */
+    U8                  Reserved4;                          /* 0x13 */
+    U8                  SGLOffset0;                         /* 0x14 */
+    U8                  SGLOffset1;                         /* 0x15 */
+    U8                  SGLOffset2;                         /* 0x16 */
+    U8                  SGLOffset3;                         /* 0x17 */
+    U32                 SkipCount;                          /* 0x18 */
+    U32                 DataLength;                         /* 0x1C */
+    U32                 BidirectionalDataLength;            /* 0x20 */
+    U16                 IoFlags;                            /* 0x24 */
+    U16                 EEDPFlags;                          /* 0x26 */
+    U32                 EEDPBlockSize;                      /* 0x28 */
+    U32                 SecondaryReferenceTag;              /* 0x2C */
+    U16                 SecondaryApplicationTag;            /* 0x30 */
+    U16                 ApplicationTagTranslationMask;      /* 0x32 */
+    U32                 PrimaryReferenceTag;                /* 0x34 */
+    U16                 PrimaryApplicationTag;              /* 0x38 */
+    U16                 PrimaryApplicationTagMask;          /* 0x3A */
+    U32                 RelativeOffset;                     /* 0x3C */
+    U32                 Reserved5;                          /* 0x40 */
+    U32                 Reserved6;                          /* 0x44 */
+    U32                 Reserved7;                          /* 0x48 */
+    U32                 Reserved8;                          /* 0x4C */
+    MPI2_SGE_IO_UNION   SGL[1];                             /* 0x50 */
+} MPI2_TARGET_ASSIST_REQUEST, MPI2_POINTER PTR_MPI2_TARGET_ASSIST_REQUEST,
+  Mpi2TargetAssistRequest_t, MPI2_POINTER pMpi2TargetAssistRequest_t;
+
+/* Target Assist TargetAssistFlags bits */
+
+#define MPI2_TARGET_ASSIST_FLAGS_REPOST_CMD_BUFFER      (0x80)
+#define MPI2_TARGET_ASSIST_FLAGS_TLR                    (0x10)
+#define MPI2_TARGET_ASSIST_FLAGS_RETRANSMIT             (0x04)
+#define MPI2_TARGET_ASSIST_FLAGS_AUTO_STATUS            (0x02)
+#define MPI2_TARGET_ASSIST_FLAGS_DATA_DIRECTION         (0x01)
+
+/* Target Assist SGLFlags bits */
+
+/* base values for Data Location Address Space */
+#define MPI2_TARGET_ASSIST_SGLFLAGS_ADDR_MASK           (0x0C)
+#define MPI2_TARGET_ASSIST_SGLFLAGS_SYSTEM_ADDR         (0x00)
+#define MPI2_TARGET_ASSIST_SGLFLAGS_IOCDDR_ADDR         (0x04)
+#define MPI2_TARGET_ASSIST_SGLFLAGS_IOCPLB_ADDR         (0x08)
+#define MPI2_TARGET_ASSIST_SGLFLAGS_PLBNTA_ADDR         (0x0C)
+
+/* base values for Type */
+#define MPI2_TARGET_ASSIST_SGLFLAGS_TYPE_MASK           (0x03)
+#define MPI2_TARGET_ASSIST_SGLFLAGS_MPI_TYPE            (0x00)
+#define MPI2_TARGET_ASSIST_SGLFLAGS_32IEEE_TYPE         (0x01)
+#define MPI2_TARGET_ASSIST_SGLFLAGS_64IEEE_TYPE         (0x02)
+
+/* shift values for each sub-field */
+#define MPI2_TARGET_ASSIST_SGLFLAGS_SGL3_SHIFT          (12)
+#define MPI2_TARGET_ASSIST_SGLFLAGS_SGL2_SHIFT          (8)
+#define MPI2_TARGET_ASSIST_SGLFLAGS_SGL1_SHIFT          (4)
+#define MPI2_TARGET_ASSIST_SGLFLAGS_SGL0_SHIFT          (0)
+
+/* Target Assist IoFlags bits */
+
+#define MPI2_TARGET_ASSIST_IOFLAGS_BIDIRECTIONAL        (0x0800)
+#define MPI2_TARGET_ASSIST_IOFLAGS_MULTICAST            (0x0400)
+#define MPI2_TARGET_ASSIST_IOFLAGS_RECEIVE_FIRST        (0x0200)
+
+/* Target Assist EEDPFlags bits */
+
+#define MPI2_TA_EEDPFLAGS_INC_PRI_REFTAG            (0x8000)
+#define MPI2_TA_EEDPFLAGS_INC_SEC_REFTAG            (0x4000)
+#define MPI2_TA_EEDPFLAGS_INC_PRI_APPTAG            (0x2000)
+#define MPI2_TA_EEDPFLAGS_INC_SEC_APPTAG            (0x1000)
+
+#define MPI2_TA_EEDPFLAGS_CHECK_REFTAG              (0x0400)
+#define MPI2_TA_EEDPFLAGS_CHECK_APPTAG              (0x0200)
+#define MPI2_TA_EEDPFLAGS_CHECK_GUARD               (0x0100)
+
+#define MPI2_TA_EEDPFLAGS_PASSTHRU_REFTAG           (0x0008)
+
+#define MPI2_TA_EEDPFLAGS_MASK_OP                   (0x0007)
+#define MPI2_TA_EEDPFLAGS_NOOP_OP                   (0x0000)
+#define MPI2_TA_EEDPFLAGS_CHECK_OP                  (0x0001)
+#define MPI2_TA_EEDPFLAGS_STRIP_OP                  (0x0002)
+#define MPI2_TA_EEDPFLAGS_CHECK_REMOVE_OP           (0x0003)
+#define MPI2_TA_EEDPFLAGS_INSERT_OP                 (0x0004)
+#define MPI2_TA_EEDPFLAGS_REPLACE_OP                (0x0006)
+#define MPI2_TA_EEDPFLAGS_CHECK_REGEN_OP            (0x0007)
+
+
+/****************************************************************************
+*   MPI v2.5 Target Assist Request
+****************************************************************************/
+
+typedef struct _MPI25_TARGET_ASSIST_REQUEST
+{
+    U8                  Reserved1;                          /* 0x00 */
+    U8                  TargetAssistFlags;                  /* 0x01 */
+    U8                  ChainOffset;                        /* 0x02 */
+    U8                  Function;                           /* 0x03 */
+    U16                 QueueTag;                           /* 0x04 */
+    U8                  Reserved2;                          /* 0x06 */
+    U8                  MsgFlags;                           /* 0x07 */
+    U8                  VP_ID;                              /* 0x08 */
+    U8                  VF_ID;                              /* 0x09 */
+    U16                 Reserved3;                          /* 0x0A */
+    U16                 IoIndex;                            /* 0x0C */
+    U16                 InitiatorConnectionTag;             /* 0x0E */
+    U8                  DMAFlags;                           /* 0x10 */
+    U8                  Reserved9;                          /* 0x11 */
+    U8                  SequenceNumber;                     /* 0x12 */
+    U8                  Reserved4;                          /* 0x13 */
+    U8                  SGLOffset0;                         /* 0x14 */
+    U8                  SGLOffset1;                         /* 0x15 */
+    U8                  SGLOffset2;                         /* 0x16 */
+    U8                  SGLOffset3;                         /* 0x17 */
+    U32                 SkipCount;                          /* 0x18 */
+    U32                 DataLength;                         /* 0x1C */
+    U32                 BidirectionalDataLength;            /* 0x20 */
+    U16                 IoFlags;                            /* 0x24 */
+    U16                 EEDPFlags;                          /* 0x26 */
+    U16                 EEDPBlockSize;                      /* 0x28 */
+    U16                 Reserved10;                         /* 0x2A */
+    U32                 SecondaryReferenceTag;              /* 0x2C */
+    U16                 SecondaryApplicationTag;            /* 0x30 */
+    U16                 ApplicationTagTranslationMask;      /* 0x32 */
+    U32                 PrimaryReferenceTag;                /* 0x34 */
+    U16                 PrimaryApplicationTag;              /* 0x38 */
+    U16                 PrimaryApplicationTagMask;          /* 0x3A */
+    U32                 RelativeOffset;                     /* 0x3C */
+    U32                 Reserved5;                          /* 0x40 */
+    U32                 Reserved6;                          /* 0x44 */
+    U32                 Reserved7;                          /* 0x48 */
+    U32                 Reserved8;                          /* 0x4C */
+    MPI25_SGE_IO_UNION  SGL;                                /* 0x50 */
+} MPI25_TARGET_ASSIST_REQUEST, MPI2_POINTER PTR_MPI25_TARGET_ASSIST_REQUEST,
+  Mpi25TargetAssistRequest_t, MPI2_POINTER pMpi25TargetAssistRequest_t;
+
+/* use MPI2_TARGET_ASSIST_FLAGS_ defines for the Flags field */
+
+/* Defines for the DMAFlags field
+ *  Each setting affects 4 SGLS, from SGL0 to SGL3.
+ *      D = Data
+ *      C = Cache DIF
+ *      I = Interleaved
+ *      H = Host DIF
+ */
+#define MPI25_TA_DMAFLAGS_OP_MASK                   (0x0F)
+#define MPI25_TA_DMAFLAGS_OP_D_D_D_D                (0x00)
+#define MPI25_TA_DMAFLAGS_OP_D_D_D_C                (0x01)
+#define MPI25_TA_DMAFLAGS_OP_D_D_D_I                (0x02)
+#define MPI25_TA_DMAFLAGS_OP_D_D_C_C                (0x03)
+#define MPI25_TA_DMAFLAGS_OP_D_D_C_I                (0x04)
+#define MPI25_TA_DMAFLAGS_OP_D_D_I_I                (0x05)
+#define MPI25_TA_DMAFLAGS_OP_D_C_C_C                (0x06)
+#define MPI25_TA_DMAFLAGS_OP_D_C_C_I                (0x07)
+#define MPI25_TA_DMAFLAGS_OP_D_C_I_I                (0x08)
+#define MPI25_TA_DMAFLAGS_OP_D_I_I_I                (0x09)
+#define MPI25_TA_DMAFLAGS_OP_D_H_D_D                (0x0A)
+#define MPI25_TA_DMAFLAGS_OP_D_H_D_C                (0x0B)
+#define MPI25_TA_DMAFLAGS_OP_D_H_D_I                (0x0C)
+#define MPI25_TA_DMAFLAGS_OP_D_H_C_C                (0x0D)
+#define MPI25_TA_DMAFLAGS_OP_D_H_C_I                (0x0E)
+#define MPI25_TA_DMAFLAGS_OP_D_H_I_I                (0x0F)
+
+/* defines for the IoFlags field */
+#define MPI26_TARGET_ASSIST_IOFLAGS_ESCAPE_PASSTHROUGH  (0x2000) /* MPI v2.6 and later */
+#define MPI25_TARGET_ASSIST_IOFLAGS_BIDIRECTIONAL       (0x0800)
+#define MPI25_TARGET_ASSIST_IOFLAGS_RECEIVE_FIRST       (0x0200)
+
+/* defines for the EEDPFlags field */
+#define MPI25_TA_EEDPFLAGS_INC_PRI_REFTAG               (0x8000)
+#define MPI25_TA_EEDPFLAGS_INC_SEC_REFTAG               (0x4000)
+#define MPI25_TA_EEDPFLAGS_INC_PRI_APPTAG               (0x2000)
+#define MPI25_TA_EEDPFLAGS_INC_SEC_APPTAG               (0x1000)
+
+#define MPI25_TA_EEDPFLAGS_CHECK_REFTAG                 (0x0400)
+#define MPI25_TA_EEDPFLAGS_CHECK_APPTAG                 (0x0200)
+#define MPI25_TA_EEDPFLAGS_CHECK_GUARD                  (0x0100)
+
+#define MPI25_TA_EEDPFLAGS_ESCAPE_MODE_MASK             (0x00C0)
+#define MPI25_TA_EEDPFLAGS_COMPATIBLE_MODE              (0x0000)
+#define MPI25_TA_EEDPFLAGS_DO_NOT_DISABLE_MODE          (0x0040)
+#define MPI25_TA_EEDPFLAGS_APPTAG_DISABLE_MODE          (0x0080)
+#define MPI25_TA_EEDPFLAGS_APPTAG_REFTAG_DISABLE_MODE   (0x00C0)
+
+#define MPI25_TA_EEDPFLAGS_HOST_GUARD_METHOD_MASK       (0x0030)
+#define MPI25_TA_EEDPFLAGS_T10_CRC_HOST_GUARD           (0x0000)
+#define MPI25_TA_EEDPFLAGS_IP_CHKSUM_HOST_GUARD         (0x0010)
+
+#define MPI25_TA_EEDPFLAGS_PASSTHRU_REFTAG              (0x0008)
+
+#define MPI25_TA_EEDPFLAGS_MASK_OP                      (0x0007)
+#define MPI25_TA_EEDPFLAGS_NOOP_OP                      (0x0000)
+#define MPI25_TA_EEDPFLAGS_CHECK_OP                     (0x0001)
+#define MPI25_TA_EEDPFLAGS_STRIP_OP                     (0x0002)
+#define MPI25_TA_EEDPFLAGS_CHECK_REMOVE_OP              (0x0003)
+#define MPI25_TA_EEDPFLAGS_INSERT_OP                    (0x0004)
+#define MPI25_TA_EEDPFLAGS_REPLACE_OP                   (0x0006)
+#define MPI25_TA_EEDPFLAGS_CHECK_REGEN_OP               (0x0007)
+
+
+/****************************************************************************
+*  Target Status Send Request
+****************************************************************************/
+
+typedef struct _MPI2_TARGET_STATUS_SEND_REQUEST
+{
+    U8                      Reserved1;                  /* 0x00 */
+    U8                      StatusFlags;                /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     QueueTag;                   /* 0x04 */
+    U8                      Reserved2;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved3;                  /* 0x0A */
+    U16                     IoIndex;                    /* 0x0C */
+    U16                     InitiatorConnectionTag;     /* 0x0E */
+    U16                     SGLFlags;                   /* 0x10 */ /* MPI v2.0 only. Reserved on MPI v2.5. */
+    U16                     Reserved4;                  /* 0x12 */
+    U8                      SGLOffset0;                 /* 0x14 */
+    U8                      Reserved5;                  /* 0x15 */
+    U16                     Reserved6;                  /* 0x16 */
+    U32                     Reserved7;                  /* 0x18 */
+    U32                     Reserved8;                  /* 0x1C */
+    MPI2_SIMPLE_SGE_UNION   StatusDataSGE;              /* 0x20 */ /* MPI v2.5: This must be an IEEE Simple Element 64. */
+} MPI2_TARGET_STATUS_SEND_REQUEST,
+  MPI2_POINTER PTR_MPI2_TARGET_STATUS_SEND_REQUEST,
+  Mpi2TargetStatusSendRequest_t, MPI2_POINTER pMpi2TargetStatusSendRequest_t;
+
+/* Target Status Send StatusFlags bits */
+
+#define MPI2_TSS_FLAGS_REPOST_CMD_BUFFER            (0x80)
+#define MPI2_TSS_FLAGS_RETRANSMIT                   (0x04)
+#define MPI2_TSS_FLAGS_AUTO_GOOD_STATUS             (0x01)
+
+/* Target Status Send SGLFlags bits - MPI v2.0 only */
+/* Data Location Address Space */
+#define MPI2_TSS_SGLFLAGS_ADDR_MASK                 (0x0C)
+#define MPI2_TSS_SGLFLAGS_SYSTEM_ADDR               (0x00)
+#define MPI2_TSS_SGLFLAGS_IOCDDR_ADDR               (0x04)
+#define MPI2_TSS_SGLFLAGS_IOCPLB_ADDR               (0x08)
+#define MPI2_TSS_SGLFLAGS_IOCPLBNTA_ADDR            (0x0C)
+/* Type */
+#define MPI2_TSS_SGLFLAGS_TYPE_MASK                 (0x03)
+#define MPI2_TSS_SGLFLAGS_MPI_TYPE                  (0x00)
+#define MPI2_TSS_SGLFLAGS_IEEE32_TYPE               (0x01)
+#define MPI2_TSS_SGLFLAGS_IEEE64_TYPE               (0x02)
+
+
+
+/*
+ * NOTE: The SSP status IU is big-endian. When used on a little-endian system,
+ * this structure properly orders the bytes.
+ */
+typedef struct _MPI2_TARGET_SSP_RSP_IU
+{
+    U32     Reserved0[6]; /* reserved for SSP header */ /* 0x00 */
+
+    /* start of RESPONSE information unit */
+    U32     Reserved1;                                  /* 0x18 */
+    U32     Reserved2;                                  /* 0x1C */
+    U16     Reserved3;                                  /* 0x20 */
+    U8      DataPres; /* lower 2 bits */                /* 0x22 */
+    U8      Status;                                     /* 0x23 */
+    U32     Reserved4;                                  /* 0x24 */
+    U32     SenseDataLength;                            /* 0x28 */
+    U32     ResponseDataLength;                         /* 0x2C */
+
+    /* start of Response or Sense Data (size may vary dynamically) */
+    U8      ResponseSenseData[4];                       /* 0x30 */
+} MPI2_TARGET_SSP_RSP_IU, MPI2_POINTER PTR_MPI2_TARGET_SSP_RSP_IU,
+  Mpi2TargetSspRspIu_t, MPI2_POINTER pMpi2TargetSspRspIu_t;
+
+
+/****************************************************************************
+*  Target Standard Reply - used with Target Assist or Target Status Send
+****************************************************************************/
+
+typedef struct _MPI2_TARGET_STANDARD_REPLY
+{
+    U16                     Reserved;                   /* 0x00 */
+    U8                      MsgLength;                  /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved1;                  /* 0x04 */
+    U8                      Reserved2;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved3;                  /* 0x0A */
+    U16                     Reserved4;                  /* 0x0C */
+    U16                     IOCStatus;                  /* 0x0E */
+    U32                     IOCLogInfo;                 /* 0x10 */
+    U16                     IoIndex;                    /* 0x14 */
+    U16                     Reserved5;                  /* 0x16 */
+    U32                     TransferCount;              /* 0x18 */
+    U32                     BidirectionalTransferCount; /* 0x1C */
+} MPI2_TARGET_STANDARD_REPLY, MPI2_POINTER PTR_MPI2_TARGET_STANDARD_REPLY,
+  Mpi2TargetErrorReply_t, MPI2_POINTER pMpi2TargetErrorReply_t;
+
+
+/****************************************************************************
+*  Target Mode Abort Request
+****************************************************************************/
+
+typedef struct _MPI2_TARGET_MODE_ABORT_REQUEST
+{
+    U8                      AbortType;                  /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U16                     IoIndexToAbort;             /* 0x0C */
+    U16                     InitiatorDevHandle;         /* 0x0E */
+    U32                     MidToAbort;                 /* 0x10 */
+} MPI2_TARGET_MODE_ABORT, MPI2_POINTER PTR_MPI2_TARGET_MODE_ABORT,
+  Mpi2TargetModeAbort_t, MPI2_POINTER pMpi2TargetModeAbort_t;
+
+/* Target Mode Abort AbortType values */
+
+#define MPI2_TARGET_MODE_ABORT_ALL_CMD_BUFFERS      (0x00)
+#define MPI2_TARGET_MODE_ABORT_ALL_IO               (0x01)
+#define MPI2_TARGET_MODE_ABORT_EXACT_IO             (0x02)
+#define MPI2_TARGET_MODE_ABORT_EXACT_IO_REQUEST     (0x03)
+#define MPI2_TARGET_MODE_ABORT_IO_REQUEST_AND_IO    (0x04)
+#define MPI2_TARGET_MODE_ABORT_DEVHANDLE            (0x05)
+#define MPI2_TARGET_MODE_ABORT_ALL_COMMANDS         (0x06)
+
+
+/****************************************************************************
+*  Target Mode Abort Reply
+****************************************************************************/
+
+typedef struct _MPI2_TARGET_MODE_ABORT_REPLY
+{
+    U16                     Reserved;                   /* 0x00 */
+    U8                      MsgLength;                  /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved1;                  /* 0x04 */
+    U8                      Reserved2;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved3;                  /* 0x0A */
+    U16                     Reserved4;                  /* 0x0C */
+    U16                     IOCStatus;                  /* 0x0E */
+    U32                     IOCLogInfo;                 /* 0x10 */
+    U32                     AbortCount;                 /* 0x14 */
+} MPI2_TARGET_MODE_ABORT_REPLY, MPI2_POINTER PTR_MPI2_TARGET_MODE_ABORT_REPLY,
+  Mpi2TargetModeAbortReply_t, MPI2_POINTER pMpi2TargetModeAbortReply_t;
+
+
+#endif
+
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_tool.h b/drivers/scsi/mpt3sas/mpi/mpi2_tool.h
old mode 100755
new mode 100644
index fdbcdd2d8ad0cda1c77da884b30866a80c79a19d..1ac0c59f78da717e655b63480297d28c72ea49a9
--- a/drivers/scsi/mpt3sas/mpi/mpi2_tool.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_tool.h
@@ -1,592 +1,592 @@
-/*
- *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
- *
- *
- *           Name:  mpi2_tool.h
- *          Title:  MPI diagnostic tool structures and definitions
- *  Creation Date:  March 26, 2007
- *
- *    mpi2_tool.h Version:  02.00.16
- *
- *  Version History
- *  ---------------
- *
- *  Date      Version   Description
- *  --------  --------  ------------------------------------------------------
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  12-18-07  02.00.01  Added Diagnostic Buffer Post and Diagnostic Release
- *                      structures and defines.
- *  02-29-08  02.00.02  Modified various names to make them 32-character unique.
- *  05-06-09  02.00.03  Added ISTWI Read Write Tool and Diagnostic CLI Tool.
- *  07-30-09  02.00.04  Added ExtendedType field to DiagnosticBufferPost request
- *                      and reply messages.
- *                      Added MPI2_DIAG_BUF_TYPE_EXTENDED.
- *                      Incremented MPI2_DIAG_BUF_TYPE_COUNT.
- *  05-12-10  02.00.05  Added Diagnostic Data Upload tool.
- *  08-11-10  02.00.06  Added defines that were missing for Diagnostic Buffer
- *                      Post Request.
- *  05-25-11  02.00.07  Added Flags field and related defines to
- *                      MPI2_TOOLBOX_ISTWI_READ_WRITE_REQUEST.
- *  11-18-11  02.00.08  Incorporating additions for MPI v2.5.
- *  07-10-12  02.00.09  Add MPI v2.5 Toolbox Diagnostic CLI Tool Request
- *                      message.
- *  07-26-12  02.00.10  Modified MPI2_TOOLBOX_DIAGNOSTIC_CLI_REQUEST so that
- *                      it uses MPI Chain SGE as well as MPI Simple SGE.
- *  08-19-13  02.00.11  Added MPI2_TOOLBOX_TEXT_DISPLAY_TOOL and related info.
- *  01-08-14  02.00.12  Added MPI2_TOOLBOX_CLEAN_BIT26_PRODUCT_SPECIFIC.
- *  11-18-14  02.00.13  Updated copyright information.
- *  08-25-16  02.00.14  Added new values for the Flags field of Toolbox Clean
- *                      Tool Request Message.
- *  07-22-18  02.00.15  Added defines for new TOOLBOX_PCIE_LANE_MARGINING tool.
- *                      Added option for DeviceInfo field in ISTWI tool.
- * 12-17-18   02.00.16  Shorten some defines to be compatible with DOS
- *  --------------------------------------------------------------------------
- */
-
-#ifndef MPI2_TOOL_H
-#define MPI2_TOOL_H
-
-/*****************************************************************************
-*
-*               Toolbox Messages
-*
-*****************************************************************************/
-
-/* defines for the Tools */
-#define MPI2_TOOLBOX_CLEAN_TOOL                     (0x00)
-#define MPI2_TOOLBOX_MEMORY_MOVE_TOOL               (0x01)
-#define MPI2_TOOLBOX_DIAG_DATA_UPLOAD_TOOL          (0x02)
-#define MPI2_TOOLBOX_ISTWI_READ_WRITE_TOOL          (0x03)
-#define MPI2_TOOLBOX_BEACON_TOOL                    (0x05)
-#define MPI2_TOOLBOX_DIAGNOSTIC_CLI_TOOL            (0x06)
-#define MPI2_TOOLBOX_TEXT_DISPLAY_TOOL              (0x07)
-#define MPI26_TOOLBOX_BACKEND_PCIE_LANE_MARGIN      (0x08)
-
-
-/****************************************************************************
-*  Toolbox reply
-****************************************************************************/
-
-typedef struct _MPI2_TOOLBOX_REPLY
-{
-    U8                      Tool;                       /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      MsgLength;                  /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U16                     Reserved5;                  /* 0x0C */
-    U16                     IOCStatus;                  /* 0x0E */
-    U32                     IOCLogInfo;                 /* 0x10 */
-} MPI2_TOOLBOX_REPLY, MPI2_POINTER PTR_MPI2_TOOLBOX_REPLY,
-  Mpi2ToolboxReply_t, MPI2_POINTER pMpi2ToolboxReply_t;
-
-
-/****************************************************************************
-*  Toolbox Clean Tool request
-****************************************************************************/
-
-typedef struct _MPI2_TOOLBOX_CLEAN_REQUEST
-{
-    U8                      Tool;                       /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U32                     Flags;                      /* 0x0C */
-   } MPI2_TOOLBOX_CLEAN_REQUEST, MPI2_POINTER PTR_MPI2_TOOLBOX_CLEAN_REQUEST,
-  Mpi2ToolboxCleanRequest_t, MPI2_POINTER pMpi2ToolboxCleanRequest_t;
-
-/* values for the Flags field */
-#define MPI2_TOOLBOX_CLEAN_BOOT_SERVICES            (0x80000000)
-#define MPI2_TOOLBOX_CLEAN_PERSIST_MANUFACT_PAGES   (0x40000000)
-#define MPI2_TOOLBOX_CLEAN_OTHER_PERSIST_PAGES      (0x20000000)
-#define MPI2_TOOLBOX_CLEAN_FW_CURRENT               (0x10000000)
-#define MPI2_TOOLBOX_CLEAN_FW_BACKUP                (0x08000000)
-#define MPI2_TOOLBOX_CLEAN_BIT26_PRODUCT_SPECIFIC   (0x04000000)
-#define MPI2_TOOLBOX_CLEAN_MEGARAID                 (0x02000000)
-#define MPI2_TOOLBOX_CLEAN_INITIALIZATION           (0x01000000)
-#define MPI2_TOOLBOX_CLEAN_SBR                      (0x00800000)
-#define MPI2_TOOLBOX_CLEAN_SBR_BACKUP               (0x00400000)
-#define MPI2_TOOLBOX_CLEAN_HIIM                     (0x00200000)
-#define MPI2_TOOLBOX_CLEAN_HIIA                     (0x00100000)
-#define MPI2_TOOLBOX_CLEAN_CTLR                     (0x00080000)
-#define MPI2_TOOLBOX_CLEAN_IMR_FIRMWARE             (0x00040000)
-#define MPI2_TOOLBOX_CLEAN_MR_NVDATA                (0x00020000)
-#define MPI2_TOOLBOX_CLEAN_RESERVED_5_16            (0x0001FFE0)
-#define MPI2_TOOLBOX_CLEAN_ALL_BUT_MPB              (0x00000010)
-#define MPI2_TOOLBOX_CLEAN_ENTIRE_FLASH             (0x00000008)
-#define MPI2_TOOLBOX_CLEAN_FLASH                    (0x00000004)
-#define MPI2_TOOLBOX_CLEAN_SEEPROM                  (0x00000002)
-#define MPI2_TOOLBOX_CLEAN_NVSRAM                   (0x00000001)
-
-
-/****************************************************************************
-*  Toolbox Memory Move request
-****************************************************************************/
-
-typedef struct _MPI2_TOOLBOX_MEM_MOVE_REQUEST
-{
-    U8                      Tool;                       /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    MPI2_SGE_SIMPLE_UNION   SGL;                        /* 0x0C */
-} MPI2_TOOLBOX_MEM_MOVE_REQUEST, MPI2_POINTER PTR_MPI2_TOOLBOX_MEM_MOVE_REQUEST,
-  Mpi2ToolboxMemMoveRequest_t, MPI2_POINTER pMpi2ToolboxMemMoveRequest_t;
-
-
-/****************************************************************************
-*  Toolbox Diagnostic Data Upload request
-****************************************************************************/
-
-typedef struct _MPI2_TOOLBOX_DIAG_DATA_UPLOAD_REQUEST
-{
-    U8                      Tool;                       /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U8                      SGLFlags;                   /* 0x0C */
-    U8                      Reserved5;                  /* 0x0D */
-    U16                     Reserved6;                  /* 0x0E */
-    U32                     Flags;                      /* 0x10 */
-    U32                     DataLength;                 /* 0x14 */
-    MPI2_SGE_SIMPLE_UNION   SGL;                        /* 0x18 */
-} MPI2_TOOLBOX_DIAG_DATA_UPLOAD_REQUEST,
-  MPI2_POINTER PTR_MPI2_TOOLBOX_DIAG_DATA_UPLOAD_REQUEST,
-  Mpi2ToolboxDiagDataUploadRequest_t,
-  MPI2_POINTER pMpi2ToolboxDiagDataUploadRequest_t;
-
-/* use MPI2_SGLFLAGS_ defines from mpi2.h for the SGLFlags field */
-
-
-typedef struct _MPI2_DIAG_DATA_UPLOAD_HEADER
-{
-    U32                     DiagDataLength;             /* 00h */
-    U8                      FormatCode;                 /* 04h */
-    U8                      Reserved1;                  /* 05h */
-    U16                     Reserved2;                  /* 06h */
-} MPI2_DIAG_DATA_UPLOAD_HEADER, MPI2_POINTER PTR_MPI2_DIAG_DATA_UPLOAD_HEADER,
-  Mpi2DiagDataUploadHeader_t, MPI2_POINTER pMpi2DiagDataUploadHeader_t;
-
-
-/****************************************************************************
-*  Toolbox ISTWI Read Write Tool
-****************************************************************************/
-
-/* Toolbox ISTWI Read Write Tool request message */
-typedef struct _MPI2_TOOLBOX_ISTWI_READ_WRITE_REQUEST
-{
-    U8                      Tool;                       /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U32                     Reserved5;                  /* 0x0C */
-    U32                     Reserved6;                  /* 0x10 */
-    U8                      DevIndex;                   /* 0x14 */
-    U8                      Action;                     /* 0x15 */
-    U8                      SGLFlags;                   /* 0x16 */
-    U8                      Flags;                      /* 0x17 */
-    U16                     TxDataLength;               /* 0x18 */
-    U16                     RxDataLength;               /* 0x1A */
-    U32                     DeviceInfo[3];              /* 0x1C */
-    U32                     Reserved11;                 /* 0x28 */
-    U32                     Reserved12;                 /* 0x2C */
-    MPI2_SGE_SIMPLE_UNION   SGL;                        /* 0x30 */
-} MPI2_TOOLBOX_ISTWI_READ_WRITE_REQUEST,
-  MPI2_POINTER PTR_MPI2_TOOLBOX_ISTWI_READ_WRITE_REQUEST,
-  Mpi2ToolboxIstwiReadWriteRequest_t,
-  MPI2_POINTER pMpi2ToolboxIstwiReadWriteRequest_t;
-
-/* values for the Action field */
-#define MPI2_TOOL_ISTWI_ACTION_READ_DATA            (0x01)
-#define MPI2_TOOL_ISTWI_ACTION_WRITE_DATA           (0x02)
-#define MPI2_TOOL_ISTWI_ACTION_SEQUENCE             (0x03)
-#define MPI2_TOOL_ISTWI_ACTION_RESERVE_BUS          (0x10)
-#define MPI2_TOOL_ISTWI_ACTION_RELEASE_BUS          (0x11)
-#define MPI2_TOOL_ISTWI_ACTION_RESET                (0x12)
-
-/* use MPI2_SGLFLAGS_ defines from mpi2.h for the SGLFlags field */
-
-/* values for the Flags field */
-#define MPI2_TOOL_ISTWI_FLAG_AUTO_RESERVE_RELEASE   (0x80)
-#define MPI2_TOOL_ISTWI_FLAG_PAGE_ADDR_MASK         (0x07)
-
-/* MPI26 TOOLBOX Request MsgFlags defines */
-#define MPI26_TOOL_ISTWI_MSGFLG_ADDR_MASK           (0x01)
-#define MPI26_TOOL_ISTWI_MSGFLG_ADDR_INDEX          (0x00)  /* Request uses Man Page 43 device index addressing */
-#define MPI26_TOOL_ISTWI_MSGFLG_ADDR_INFO           (0x01)  /* Request uses Man Page 43 device info struct addressing */
-
-
-/* Toolbox ISTWI Read Write Tool reply message */
-typedef struct _MPI2_TOOLBOX_ISTWI_REPLY
-{
-    U8                      Tool;                       /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      MsgLength;                  /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U16                     Reserved5;                  /* 0x0C */
-    U16                     IOCStatus;                  /* 0x0E */
-    U32                     IOCLogInfo;                 /* 0x10 */
-    U8                      DevIndex;                   /* 0x14 */
-    U8                      Action;                     /* 0x15 */
-    U8                      IstwiStatus;                /* 0x16 */
-    U8                      Reserved6;                  /* 0x17 */
-    U16                     TxDataCount;                /* 0x18 */
-    U16                     RxDataCount;                /* 0x1A */
-} MPI2_TOOLBOX_ISTWI_REPLY, MPI2_POINTER PTR_MPI2_TOOLBOX_ISTWI_REPLY,
-  Mpi2ToolboxIstwiReply_t, MPI2_POINTER pMpi2ToolboxIstwiReply_t;
-
-
-/****************************************************************************
-*  Toolbox Beacon Tool request
-****************************************************************************/
-
-typedef struct _MPI2_TOOLBOX_BEACON_REQUEST
-{
-    U8                      Tool;                       /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U8                      Reserved5;                  /* 0x0C */
-    U8                      PhysicalPort;               /* 0x0D */
-    U8                      Reserved6;                  /* 0x0E */
-    U8                      Flags;                      /* 0x0F */
-} MPI2_TOOLBOX_BEACON_REQUEST, MPI2_POINTER PTR_MPI2_TOOLBOX_BEACON_REQUEST,
-  Mpi2ToolboxBeaconRequest_t, MPI2_POINTER pMpi2ToolboxBeaconRequest_t;
-
-/* values for the Flags field */
-#define MPI2_TOOLBOX_FLAGS_BEACONMODE_OFF       (0x00)
-#define MPI2_TOOLBOX_FLAGS_BEACONMODE_ON        (0x01)
-
-
-/****************************************************************************
-*  Toolbox Diagnostic CLI Tool
-****************************************************************************/
-
-#define MPI2_TOOLBOX_DIAG_CLI_CMD_LENGTH    (0x5C)
-
-/* MPI v2.0 Toolbox Diagnostic CLI Tool request message */
-typedef struct _MPI2_TOOLBOX_DIAGNOSTIC_CLI_REQUEST
-{
-    U8                      Tool;                       /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U8                      SGLFlags;                   /* 0x0C */
-    U8                      Reserved5;                  /* 0x0D */
-    U16                     Reserved6;                  /* 0x0E */
-    U32                     DataLength;                 /* 0x10 */
-    U8                      DiagnosticCliCommand[MPI2_TOOLBOX_DIAG_CLI_CMD_LENGTH]; /* 0x14 */
-    MPI2_MPI_SGE_IO_UNION   SGL;                        /* 0x70 */
-} MPI2_TOOLBOX_DIAGNOSTIC_CLI_REQUEST,
-  MPI2_POINTER PTR_MPI2_TOOLBOX_DIAGNOSTIC_CLI_REQUEST,
-  Mpi2ToolboxDiagnosticCliRequest_t,
-  MPI2_POINTER pMpi2ToolboxDiagnosticCliRequest_t;
-
-/* use MPI2_SGLFLAGS_ defines from mpi2.h for the SGLFlags field */
-
-
-/* MPI v2.5 Toolbox Diagnostic CLI Tool request message */
-typedef struct _MPI25_TOOLBOX_DIAGNOSTIC_CLI_REQUEST
-{
-    U8                      Tool;                       /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U32                     Reserved5;                  /* 0x0C */
-    U32                     DataLength;                 /* 0x10 */
-    U8                      DiagnosticCliCommand[MPI2_TOOLBOX_DIAG_CLI_CMD_LENGTH]; /* 0x14 */
-    MPI25_SGE_IO_UNION      SGL;                        /* 0x70 */
-} MPI25_TOOLBOX_DIAGNOSTIC_CLI_REQUEST,
-  MPI2_POINTER PTR_MPI25_TOOLBOX_DIAGNOSTIC_CLI_REQUEST,
-  Mpi25ToolboxDiagnosticCliRequest_t,
-  MPI2_POINTER pMpi25ToolboxDiagnosticCliRequest_t;
-
-
-/* Toolbox Diagnostic CLI Tool reply message */
-typedef struct _MPI2_TOOLBOX_DIAGNOSTIC_CLI_REPLY
-{
-    U8                      Tool;                       /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      MsgLength;                  /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U16                     Reserved5;                  /* 0x0C */
-    U16                     IOCStatus;                  /* 0x0E */
-    U32                     IOCLogInfo;                 /* 0x10 */
-    U32                     ReturnedDataLength;         /* 0x14 */
-} MPI2_TOOLBOX_DIAGNOSTIC_CLI_REPLY,
-  MPI2_POINTER PTR_MPI2_TOOLBOX_DIAG_CLI_REPLY,
-  Mpi2ToolboxDiagnosticCliReply_t,
-  MPI2_POINTER pMpi2ToolboxDiagnosticCliReply_t;
-
-
-/****************************************************************************
-*  Toolbox Console Text Display Tool
-****************************************************************************/
-
-/* Toolbox Console Text Display Tool request message */
-typedef struct _MPI2_TOOLBOX_TEXT_DISPLAY_REQUEST
-{
-    U8                      Tool;               /* 0x00 */
-    U8                      Reserved1;          /* 0x01 */
-    U8                      ChainOffset;        /* 0x02 */
-    U8                      Function;           /* 0x03 */
-    U16                     Reserved2;          /* 0x04 */
-    U8                      Reserved3;          /* 0x06 */
-    U8                      MsgFlags;           /* 0x07 */
-    U8                      VP_ID;              /* 0x08 */
-    U8                      VF_ID;              /* 0x09 */
-    U16                     Reserved4;          /* 0x0A */
-    U8                      Console;            /* 0x0C */
-    U8                      Flags;              /* 0x0D */
-    U16                     Reserved6;          /* 0x0E */
-    U8                      TextToDisplay[4];   /* 0x10 */ /* actual length determined at runtime based on frame size */
-} MPI2_TOOLBOX_TEXT_DISPLAY_REQUEST,
-  MPI2_POINTER PTR_MPI2_TOOLBOX_TEXT_DISPLAY_REQUEST,
-  Mpi2ToolboxTextDisplayRequest_t,
-  MPI2_POINTER pMpi2ToolboxTextDisplayRequest_t;
-
-/* defines for the Console field */
-#define MPI2_TOOLBOX_CONSOLE_TYPE_MASK          (0xF0)
-#define MPI2_TOOLBOX_CONSOLE_TYPE_DEFAULT       (0x00)
-#define MPI2_TOOLBOX_CONSOLE_TYPE_UART          (0x10)
-#define MPI2_TOOLBOX_CONSOLE_TYPE_ETHERNET      (0x20)
-
-#define MPI2_TOOLBOX_CONSOLE_NUMBER_MASK        (0x0F)
-
-/* defines for the Flags field */
-#define MPI2_TOOLBOX_CONSOLE_FLAG_TIMESTAMP     (0x01)
-
-
-/****************************************************************************
-*  Toolbox Backend Lane Margining Tool
-****************************************************************************/
-
-/* Toolbox Backend Lane Margining Tool request message */
-typedef struct _MPI26_TOOLBOX_LANE_MARGIN_REQUEST
-{
-    U8                      Tool;                       /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U8                      Command;                    /* 0x0C */
-    U8                      SwitchPort;                 /* 0x0D */
-    U16                     DevHandle;                  /* 0x0E */
-    U8                      RegisterOffset;             /* 0x10 */
-    U8                      Reserved5;                  /* 0x11 */
-    U16                     DataLength;                 /* 0x12 */
-    MPI25_SGE_IO_UNION      SGL;                        /* 0x14 */
-} MPI26_TOOLBOX_LANE_MARGINING_REQUEST, MPI2_POINTER PTR_MPI2_TOOLBOX_LANE_MARGINING_REQUEST,
-  Mpi26ToolboxLaneMarginingRequest_t, MPI2_POINTER pMpi2ToolboxLaneMarginingRequest_t;
-
-/* defines for the Command field */
-#define MPI26_TOOL_MARGIN_COMMAND_ENTER_MARGIN_MODE        (0x01)
-#define MPI26_TOOL_MARGIN_COMMAND_READ_REGISTER_DATA       (0x02)
-#define MPI26_TOOL_MARGIN_COMMAND_WRITE_REGISTER_DATA      (0x03)
-#define MPI26_TOOL_MARGIN_COMMAND_EXIT_MARGIN_MODE         (0x04)
-
-
-/* Toolbox Backend Lane Margining Tool reply message */
-typedef struct _MPI26_TOOLBOX_LANE_MARGIN_REPLY
-{
-    U8                      Tool;                       /* 0x00 */
-    U8                      Reserved1;                  /* 0x01 */
-    U8                      MsgLength;                  /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U16                     Reserved5;                  /* 0x0C */
-    U16                     IOCStatus;                  /* 0x0E */
-    U32                     IOCLogInfo;                 /* 0x10 */
-    U16                     ReturnedDataLength;         /* 0x14 */
-    U16                     Reserved6;                  /* 0x16 */
-} MPI26_TOOLBOX_LANE_MARGINING_REPLY,
-  MPI2_POINTER PTR_MPI26_TOOLBOX_LANE_MARGINING_REPLY,
-  Mpi26ToolboxLaneMarginingReply_t,
-  MPI2_POINTER pMpi26ToolboxLaneMarginingReply_t;
-
-
-/*****************************************************************************
-*
-*       Diagnostic Buffer Messages
-*
-*****************************************************************************/
-
-
-/****************************************************************************
-*  Diagnostic Buffer Post request
-****************************************************************************/
-
-typedef struct _MPI2_DIAG_BUFFER_POST_REQUEST
-{
-    U8                      ExtendedType;               /* 0x00 */
-    U8                      BufferType;                 /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U64                     BufferAddress;              /* 0x0C */
-    U32                     BufferLength;               /* 0x14 */
-    U32                     Reserved5;                  /* 0x18 */
-    U32                     Reserved6;                  /* 0x1C */
-    U32                     Flags;                      /* 0x20 */
-    U32                     ProductSpecific[23];        /* 0x24 */
-} MPI2_DIAG_BUFFER_POST_REQUEST, MPI2_POINTER PTR_MPI2_DIAG_BUFFER_POST_REQUEST,
-  Mpi2DiagBufferPostRequest_t, MPI2_POINTER pMpi2DiagBufferPostRequest_t;
-
-/* values for the ExtendedType field */
-#define MPI2_DIAG_EXTENDED_TYPE_UTILIZATION         (0x02)
-
-/* values for the BufferType field */
-#define MPI2_DIAG_BUF_TYPE_TRACE                    (0x00)
-#define MPI2_DIAG_BUF_TYPE_SNAPSHOT                 (0x01)
-#define MPI2_DIAG_BUF_TYPE_EXTENDED                 (0x02)
-/* count of the number of buffer types */
-#define MPI2_DIAG_BUF_TYPE_COUNT                    (0x03)
-
-/* values for the Flags field */
-#define MPI2_DIAG_BUF_FLAG_RELEASE_ON_FULL          (0x00000002) /* for MPI v2.0 products only */
-#define MPI2_DIAG_BUF_FLAG_IMMEDIATE_RELEASE        (0x00000001)
-
-
-/****************************************************************************
-*  Diagnostic Buffer Post reply
-****************************************************************************/
-
-typedef struct _MPI2_DIAG_BUFFER_POST_REPLY
-{
-    U8                      ExtendedType;               /* 0x00 */
-    U8                      BufferType;                 /* 0x01 */
-    U8                      MsgLength;                  /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U16                     Reserved5;                  /* 0x0C */
-    U16                     IOCStatus;                  /* 0x0E */
-    U32                     IOCLogInfo;                 /* 0x10 */
-    U32                     TransferLength;             /* 0x14 */
-} MPI2_DIAG_BUFFER_POST_REPLY, MPI2_POINTER PTR_MPI2_DIAG_BUFFER_POST_REPLY,
-  Mpi2DiagBufferPostReply_t, MPI2_POINTER pMpi2DiagBufferPostReply_t;
-
-
-/****************************************************************************
-*  Diagnostic Release request
-****************************************************************************/
-
-typedef struct _MPI2_DIAG_RELEASE_REQUEST
-{
-    U8                      Reserved1;                  /* 0x00 */
-    U8                      BufferType;                 /* 0x01 */
-    U8                      ChainOffset;                /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-} MPI2_DIAG_RELEASE_REQUEST, MPI2_POINTER PTR_MPI2_DIAG_RELEASE_REQUEST,
-  Mpi2DiagReleaseRequest_t, MPI2_POINTER pMpi2DiagReleaseRequest_t;
-
-
-/****************************************************************************
-*  Diagnostic Buffer Post reply
-****************************************************************************/
-
-typedef struct _MPI2_DIAG_RELEASE_REPLY
-{
-    U8                      Reserved1;                  /* 0x00 */
-    U8                      BufferType;                 /* 0x01 */
-    U8                      MsgLength;                  /* 0x02 */
-    U8                      Function;                   /* 0x03 */
-    U16                     Reserved2;                  /* 0x04 */
-    U8                      Reserved3;                  /* 0x06 */
-    U8                      MsgFlags;                   /* 0x07 */
-    U8                      VP_ID;                      /* 0x08 */
-    U8                      VF_ID;                      /* 0x09 */
-    U16                     Reserved4;                  /* 0x0A */
-    U16                     Reserved5;                  /* 0x0C */
-    U16                     IOCStatus;                  /* 0x0E */
-    U32                     IOCLogInfo;                 /* 0x10 */
-} MPI2_DIAG_RELEASE_REPLY, MPI2_POINTER PTR_MPI2_DIAG_RELEASE_REPLY,
-  Mpi2DiagReleaseReply_t, MPI2_POINTER pMpi2DiagReleaseReply_t;
-
-
-#endif
-
+/*
+ *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
+ *
+ *
+ *           Name:  mpi2_tool.h
+ *          Title:  MPI diagnostic tool structures and definitions
+ *  Creation Date:  March 26, 2007
+ *
+ *    mpi2_tool.h Version:  02.00.16
+ *
+ *  Version History
+ *  ---------------
+ *
+ *  Date      Version   Description
+ *  --------  --------  ------------------------------------------------------
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  12-18-07  02.00.01  Added Diagnostic Buffer Post and Diagnostic Release
+ *                      structures and defines.
+ *  02-29-08  02.00.02  Modified various names to make them 32-character unique.
+ *  05-06-09  02.00.03  Added ISTWI Read Write Tool and Diagnostic CLI Tool.
+ *  07-30-09  02.00.04  Added ExtendedType field to DiagnosticBufferPost request
+ *                      and reply messages.
+ *                      Added MPI2_DIAG_BUF_TYPE_EXTENDED.
+ *                      Incremented MPI2_DIAG_BUF_TYPE_COUNT.
+ *  05-12-10  02.00.05  Added Diagnostic Data Upload tool.
+ *  08-11-10  02.00.06  Added defines that were missing for Diagnostic Buffer
+ *                      Post Request.
+ *  05-25-11  02.00.07  Added Flags field and related defines to
+ *                      MPI2_TOOLBOX_ISTWI_READ_WRITE_REQUEST.
+ *  11-18-11  02.00.08  Incorporating additions for MPI v2.5.
+ *  07-10-12  02.00.09  Add MPI v2.5 Toolbox Diagnostic CLI Tool Request
+ *                      message.
+ *  07-26-12  02.00.10  Modified MPI2_TOOLBOX_DIAGNOSTIC_CLI_REQUEST so that
+ *                      it uses MPI Chain SGE as well as MPI Simple SGE.
+ *  08-19-13  02.00.11  Added MPI2_TOOLBOX_TEXT_DISPLAY_TOOL and related info.
+ *  01-08-14  02.00.12  Added MPI2_TOOLBOX_CLEAN_BIT26_PRODUCT_SPECIFIC.
+ *  11-18-14  02.00.13  Updated copyright information.
+ *  08-25-16  02.00.14  Added new values for the Flags field of Toolbox Clean
+ *                      Tool Request Message.
+ *  07-22-18  02.00.15  Added defines for new TOOLBOX_PCIE_LANE_MARGINING tool.
+ *                      Added option for DeviceInfo field in ISTWI tool.
+ * 12-17-18   02.00.16  Shorten some defines to be compatible with DOS
+ *  --------------------------------------------------------------------------
+ */
+
+#ifndef MPI2_TOOL_H
+#define MPI2_TOOL_H
+
+/*****************************************************************************
+*
+*               Toolbox Messages
+*
+*****************************************************************************/
+
+/* defines for the Tools */
+#define MPI2_TOOLBOX_CLEAN_TOOL                     (0x00)
+#define MPI2_TOOLBOX_MEMORY_MOVE_TOOL               (0x01)
+#define MPI2_TOOLBOX_DIAG_DATA_UPLOAD_TOOL          (0x02)
+#define MPI2_TOOLBOX_ISTWI_READ_WRITE_TOOL          (0x03)
+#define MPI2_TOOLBOX_BEACON_TOOL                    (0x05)
+#define MPI2_TOOLBOX_DIAGNOSTIC_CLI_TOOL            (0x06)
+#define MPI2_TOOLBOX_TEXT_DISPLAY_TOOL              (0x07)
+#define MPI26_TOOLBOX_BACKEND_PCIE_LANE_MARGIN      (0x08)
+
+
+/****************************************************************************
+*  Toolbox reply
+****************************************************************************/
+
+typedef struct _MPI2_TOOLBOX_REPLY
+{
+    U8                      Tool;                       /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      MsgLength;                  /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U16                     Reserved5;                  /* 0x0C */
+    U16                     IOCStatus;                  /* 0x0E */
+    U32                     IOCLogInfo;                 /* 0x10 */
+} MPI2_TOOLBOX_REPLY, MPI2_POINTER PTR_MPI2_TOOLBOX_REPLY,
+  Mpi2ToolboxReply_t, MPI2_POINTER pMpi2ToolboxReply_t;
+
+
+/****************************************************************************
+*  Toolbox Clean Tool request
+****************************************************************************/
+
+typedef struct _MPI2_TOOLBOX_CLEAN_REQUEST
+{
+    U8                      Tool;                       /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U32                     Flags;                      /* 0x0C */
+   } MPI2_TOOLBOX_CLEAN_REQUEST, MPI2_POINTER PTR_MPI2_TOOLBOX_CLEAN_REQUEST,
+  Mpi2ToolboxCleanRequest_t, MPI2_POINTER pMpi2ToolboxCleanRequest_t;
+
+/* values for the Flags field */
+#define MPI2_TOOLBOX_CLEAN_BOOT_SERVICES            (0x80000000)
+#define MPI2_TOOLBOX_CLEAN_PERSIST_MANUFACT_PAGES   (0x40000000)
+#define MPI2_TOOLBOX_CLEAN_OTHER_PERSIST_PAGES      (0x20000000)
+#define MPI2_TOOLBOX_CLEAN_FW_CURRENT               (0x10000000)
+#define MPI2_TOOLBOX_CLEAN_FW_BACKUP                (0x08000000)
+#define MPI2_TOOLBOX_CLEAN_BIT26_PRODUCT_SPECIFIC   (0x04000000)
+#define MPI2_TOOLBOX_CLEAN_MEGARAID                 (0x02000000)
+#define MPI2_TOOLBOX_CLEAN_INITIALIZATION           (0x01000000)
+#define MPI2_TOOLBOX_CLEAN_SBR                      (0x00800000)
+#define MPI2_TOOLBOX_CLEAN_SBR_BACKUP               (0x00400000)
+#define MPI2_TOOLBOX_CLEAN_HIIM                     (0x00200000)
+#define MPI2_TOOLBOX_CLEAN_HIIA                     (0x00100000)
+#define MPI2_TOOLBOX_CLEAN_CTLR                     (0x00080000)
+#define MPI2_TOOLBOX_CLEAN_IMR_FIRMWARE             (0x00040000)
+#define MPI2_TOOLBOX_CLEAN_MR_NVDATA                (0x00020000)
+#define MPI2_TOOLBOX_CLEAN_RESERVED_5_16            (0x0001FFE0)
+#define MPI2_TOOLBOX_CLEAN_ALL_BUT_MPB              (0x00000010)
+#define MPI2_TOOLBOX_CLEAN_ENTIRE_FLASH             (0x00000008)
+#define MPI2_TOOLBOX_CLEAN_FLASH                    (0x00000004)
+#define MPI2_TOOLBOX_CLEAN_SEEPROM                  (0x00000002)
+#define MPI2_TOOLBOX_CLEAN_NVSRAM                   (0x00000001)
+
+
+/****************************************************************************
+*  Toolbox Memory Move request
+****************************************************************************/
+
+typedef struct _MPI2_TOOLBOX_MEM_MOVE_REQUEST
+{
+    U8                      Tool;                       /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    MPI2_SGE_SIMPLE_UNION   SGL;                        /* 0x0C */
+} MPI2_TOOLBOX_MEM_MOVE_REQUEST, MPI2_POINTER PTR_MPI2_TOOLBOX_MEM_MOVE_REQUEST,
+  Mpi2ToolboxMemMoveRequest_t, MPI2_POINTER pMpi2ToolboxMemMoveRequest_t;
+
+
+/****************************************************************************
+*  Toolbox Diagnostic Data Upload request
+****************************************************************************/
+
+typedef struct _MPI2_TOOLBOX_DIAG_DATA_UPLOAD_REQUEST
+{
+    U8                      Tool;                       /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U8                      SGLFlags;                   /* 0x0C */
+    U8                      Reserved5;                  /* 0x0D */
+    U16                     Reserved6;                  /* 0x0E */
+    U32                     Flags;                      /* 0x10 */
+    U32                     DataLength;                 /* 0x14 */
+    MPI2_SGE_SIMPLE_UNION   SGL;                        /* 0x18 */
+} MPI2_TOOLBOX_DIAG_DATA_UPLOAD_REQUEST,
+  MPI2_POINTER PTR_MPI2_TOOLBOX_DIAG_DATA_UPLOAD_REQUEST,
+  Mpi2ToolboxDiagDataUploadRequest_t,
+  MPI2_POINTER pMpi2ToolboxDiagDataUploadRequest_t;
+
+/* use MPI2_SGLFLAGS_ defines from mpi2.h for the SGLFlags field */
+
+
+typedef struct _MPI2_DIAG_DATA_UPLOAD_HEADER
+{
+    U32                     DiagDataLength;             /* 00h */
+    U8                      FormatCode;                 /* 04h */
+    U8                      Reserved1;                  /* 05h */
+    U16                     Reserved2;                  /* 06h */
+} MPI2_DIAG_DATA_UPLOAD_HEADER, MPI2_POINTER PTR_MPI2_DIAG_DATA_UPLOAD_HEADER,
+  Mpi2DiagDataUploadHeader_t, MPI2_POINTER pMpi2DiagDataUploadHeader_t;
+
+
+/****************************************************************************
+*  Toolbox ISTWI Read Write Tool
+****************************************************************************/
+
+/* Toolbox ISTWI Read Write Tool request message */
+typedef struct _MPI2_TOOLBOX_ISTWI_READ_WRITE_REQUEST
+{
+    U8                      Tool;                       /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U32                     Reserved5;                  /* 0x0C */
+    U32                     Reserved6;                  /* 0x10 */
+    U8                      DevIndex;                   /* 0x14 */
+    U8                      Action;                     /* 0x15 */
+    U8                      SGLFlags;                   /* 0x16 */
+    U8                      Flags;                      /* 0x17 */
+    U16                     TxDataLength;               /* 0x18 */
+    U16                     RxDataLength;               /* 0x1A */
+    U32                     DeviceInfo[3];              /* 0x1C */
+    U32                     Reserved11;                 /* 0x28 */
+    U32                     Reserved12;                 /* 0x2C */
+    MPI2_SGE_SIMPLE_UNION   SGL;                        /* 0x30 */
+} MPI2_TOOLBOX_ISTWI_READ_WRITE_REQUEST,
+  MPI2_POINTER PTR_MPI2_TOOLBOX_ISTWI_READ_WRITE_REQUEST,
+  Mpi2ToolboxIstwiReadWriteRequest_t,
+  MPI2_POINTER pMpi2ToolboxIstwiReadWriteRequest_t;
+
+/* values for the Action field */
+#define MPI2_TOOL_ISTWI_ACTION_READ_DATA            (0x01)
+#define MPI2_TOOL_ISTWI_ACTION_WRITE_DATA           (0x02)
+#define MPI2_TOOL_ISTWI_ACTION_SEQUENCE             (0x03)
+#define MPI2_TOOL_ISTWI_ACTION_RESERVE_BUS          (0x10)
+#define MPI2_TOOL_ISTWI_ACTION_RELEASE_BUS          (0x11)
+#define MPI2_TOOL_ISTWI_ACTION_RESET                (0x12)
+
+/* use MPI2_SGLFLAGS_ defines from mpi2.h for the SGLFlags field */
+
+/* values for the Flags field */
+#define MPI2_TOOL_ISTWI_FLAG_AUTO_RESERVE_RELEASE   (0x80)
+#define MPI2_TOOL_ISTWI_FLAG_PAGE_ADDR_MASK         (0x07)
+
+/* MPI26 TOOLBOX Request MsgFlags defines */
+#define MPI26_TOOL_ISTWI_MSGFLG_ADDR_MASK           (0x01)
+#define MPI26_TOOL_ISTWI_MSGFLG_ADDR_INDEX          (0x00)  /* Request uses Man Page 43 device index addressing */
+#define MPI26_TOOL_ISTWI_MSGFLG_ADDR_INFO           (0x01)  /* Request uses Man Page 43 device info struct addressing */
+
+
+/* Toolbox ISTWI Read Write Tool reply message */
+typedef struct _MPI2_TOOLBOX_ISTWI_REPLY
+{
+    U8                      Tool;                       /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      MsgLength;                  /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U16                     Reserved5;                  /* 0x0C */
+    U16                     IOCStatus;                  /* 0x0E */
+    U32                     IOCLogInfo;                 /* 0x10 */
+    U8                      DevIndex;                   /* 0x14 */
+    U8                      Action;                     /* 0x15 */
+    U8                      IstwiStatus;                /* 0x16 */
+    U8                      Reserved6;                  /* 0x17 */
+    U16                     TxDataCount;                /* 0x18 */
+    U16                     RxDataCount;                /* 0x1A */
+} MPI2_TOOLBOX_ISTWI_REPLY, MPI2_POINTER PTR_MPI2_TOOLBOX_ISTWI_REPLY,
+  Mpi2ToolboxIstwiReply_t, MPI2_POINTER pMpi2ToolboxIstwiReply_t;
+
+
+/****************************************************************************
+*  Toolbox Beacon Tool request
+****************************************************************************/
+
+typedef struct _MPI2_TOOLBOX_BEACON_REQUEST
+{
+    U8                      Tool;                       /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U8                      Reserved5;                  /* 0x0C */
+    U8                      PhysicalPort;               /* 0x0D */
+    U8                      Reserved6;                  /* 0x0E */
+    U8                      Flags;                      /* 0x0F */
+} MPI2_TOOLBOX_BEACON_REQUEST, MPI2_POINTER PTR_MPI2_TOOLBOX_BEACON_REQUEST,
+  Mpi2ToolboxBeaconRequest_t, MPI2_POINTER pMpi2ToolboxBeaconRequest_t;
+
+/* values for the Flags field */
+#define MPI2_TOOLBOX_FLAGS_BEACONMODE_OFF       (0x00)
+#define MPI2_TOOLBOX_FLAGS_BEACONMODE_ON        (0x01)
+
+
+/****************************************************************************
+*  Toolbox Diagnostic CLI Tool
+****************************************************************************/
+
+#define MPI2_TOOLBOX_DIAG_CLI_CMD_LENGTH    (0x5C)
+
+/* MPI v2.0 Toolbox Diagnostic CLI Tool request message */
+typedef struct _MPI2_TOOLBOX_DIAGNOSTIC_CLI_REQUEST
+{
+    U8                      Tool;                       /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U8                      SGLFlags;                   /* 0x0C */
+    U8                      Reserved5;                  /* 0x0D */
+    U16                     Reserved6;                  /* 0x0E */
+    U32                     DataLength;                 /* 0x10 */
+    U8                      DiagnosticCliCommand[MPI2_TOOLBOX_DIAG_CLI_CMD_LENGTH]; /* 0x14 */
+    MPI2_MPI_SGE_IO_UNION   SGL;                        /* 0x70 */
+} MPI2_TOOLBOX_DIAGNOSTIC_CLI_REQUEST,
+  MPI2_POINTER PTR_MPI2_TOOLBOX_DIAGNOSTIC_CLI_REQUEST,
+  Mpi2ToolboxDiagnosticCliRequest_t,
+  MPI2_POINTER pMpi2ToolboxDiagnosticCliRequest_t;
+
+/* use MPI2_SGLFLAGS_ defines from mpi2.h for the SGLFlags field */
+
+
+/* MPI v2.5 Toolbox Diagnostic CLI Tool request message */
+typedef struct _MPI25_TOOLBOX_DIAGNOSTIC_CLI_REQUEST
+{
+    U8                      Tool;                       /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U32                     Reserved5;                  /* 0x0C */
+    U32                     DataLength;                 /* 0x10 */
+    U8                      DiagnosticCliCommand[MPI2_TOOLBOX_DIAG_CLI_CMD_LENGTH]; /* 0x14 */
+    MPI25_SGE_IO_UNION      SGL;                        /* 0x70 */
+} MPI25_TOOLBOX_DIAGNOSTIC_CLI_REQUEST,
+  MPI2_POINTER PTR_MPI25_TOOLBOX_DIAGNOSTIC_CLI_REQUEST,
+  Mpi25ToolboxDiagnosticCliRequest_t,
+  MPI2_POINTER pMpi25ToolboxDiagnosticCliRequest_t;
+
+
+/* Toolbox Diagnostic CLI Tool reply message */
+typedef struct _MPI2_TOOLBOX_DIAGNOSTIC_CLI_REPLY
+{
+    U8                      Tool;                       /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      MsgLength;                  /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U16                     Reserved5;                  /* 0x0C */
+    U16                     IOCStatus;                  /* 0x0E */
+    U32                     IOCLogInfo;                 /* 0x10 */
+    U32                     ReturnedDataLength;         /* 0x14 */
+} MPI2_TOOLBOX_DIAGNOSTIC_CLI_REPLY,
+  MPI2_POINTER PTR_MPI2_TOOLBOX_DIAG_CLI_REPLY,
+  Mpi2ToolboxDiagnosticCliReply_t,
+  MPI2_POINTER pMpi2ToolboxDiagnosticCliReply_t;
+
+
+/****************************************************************************
+*  Toolbox Console Text Display Tool
+****************************************************************************/
+
+/* Toolbox Console Text Display Tool request message */
+typedef struct _MPI2_TOOLBOX_TEXT_DISPLAY_REQUEST
+{
+    U8                      Tool;               /* 0x00 */
+    U8                      Reserved1;          /* 0x01 */
+    U8                      ChainOffset;        /* 0x02 */
+    U8                      Function;           /* 0x03 */
+    U16                     Reserved2;          /* 0x04 */
+    U8                      Reserved3;          /* 0x06 */
+    U8                      MsgFlags;           /* 0x07 */
+    U8                      VP_ID;              /* 0x08 */
+    U8                      VF_ID;              /* 0x09 */
+    U16                     Reserved4;          /* 0x0A */
+    U8                      Console;            /* 0x0C */
+    U8                      Flags;              /* 0x0D */
+    U16                     Reserved6;          /* 0x0E */
+    U8                      TextToDisplay[4];   /* 0x10 */ /* actual length determined at runtime based on frame size */
+} MPI2_TOOLBOX_TEXT_DISPLAY_REQUEST,
+  MPI2_POINTER PTR_MPI2_TOOLBOX_TEXT_DISPLAY_REQUEST,
+  Mpi2ToolboxTextDisplayRequest_t,
+  MPI2_POINTER pMpi2ToolboxTextDisplayRequest_t;
+
+/* defines for the Console field */
+#define MPI2_TOOLBOX_CONSOLE_TYPE_MASK          (0xF0)
+#define MPI2_TOOLBOX_CONSOLE_TYPE_DEFAULT       (0x00)
+#define MPI2_TOOLBOX_CONSOLE_TYPE_UART          (0x10)
+#define MPI2_TOOLBOX_CONSOLE_TYPE_ETHERNET      (0x20)
+
+#define MPI2_TOOLBOX_CONSOLE_NUMBER_MASK        (0x0F)
+
+/* defines for the Flags field */
+#define MPI2_TOOLBOX_CONSOLE_FLAG_TIMESTAMP     (0x01)
+
+
+/****************************************************************************
+*  Toolbox Backend Lane Margining Tool
+****************************************************************************/
+
+/* Toolbox Backend Lane Margining Tool request message */
+typedef struct _MPI26_TOOLBOX_LANE_MARGIN_REQUEST
+{
+    U8                      Tool;                       /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U8                      Command;                    /* 0x0C */
+    U8                      SwitchPort;                 /* 0x0D */
+    U16                     DevHandle;                  /* 0x0E */
+    U8                      RegisterOffset;             /* 0x10 */
+    U8                      Reserved5;                  /* 0x11 */
+    U16                     DataLength;                 /* 0x12 */
+    MPI25_SGE_IO_UNION      SGL;                        /* 0x14 */
+} MPI26_TOOLBOX_LANE_MARGINING_REQUEST, MPI2_POINTER PTR_MPI2_TOOLBOX_LANE_MARGINING_REQUEST,
+  Mpi26ToolboxLaneMarginingRequest_t, MPI2_POINTER pMpi2ToolboxLaneMarginingRequest_t;
+
+/* defines for the Command field */
+#define MPI26_TOOL_MARGIN_COMMAND_ENTER_MARGIN_MODE        (0x01)
+#define MPI26_TOOL_MARGIN_COMMAND_READ_REGISTER_DATA       (0x02)
+#define MPI26_TOOL_MARGIN_COMMAND_WRITE_REGISTER_DATA      (0x03)
+#define MPI26_TOOL_MARGIN_COMMAND_EXIT_MARGIN_MODE         (0x04)
+
+
+/* Toolbox Backend Lane Margining Tool reply message */
+typedef struct _MPI26_TOOLBOX_LANE_MARGIN_REPLY
+{
+    U8                      Tool;                       /* 0x00 */
+    U8                      Reserved1;                  /* 0x01 */
+    U8                      MsgLength;                  /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U16                     Reserved5;                  /* 0x0C */
+    U16                     IOCStatus;                  /* 0x0E */
+    U32                     IOCLogInfo;                 /* 0x10 */
+    U16                     ReturnedDataLength;         /* 0x14 */
+    U16                     Reserved6;                  /* 0x16 */
+} MPI26_TOOLBOX_LANE_MARGINING_REPLY,
+  MPI2_POINTER PTR_MPI26_TOOLBOX_LANE_MARGINING_REPLY,
+  Mpi26ToolboxLaneMarginingReply_t,
+  MPI2_POINTER pMpi26ToolboxLaneMarginingReply_t;
+
+
+/*****************************************************************************
+*
+*       Diagnostic Buffer Messages
+*
+*****************************************************************************/
+
+
+/****************************************************************************
+*  Diagnostic Buffer Post request
+****************************************************************************/
+
+typedef struct _MPI2_DIAG_BUFFER_POST_REQUEST
+{
+    U8                      ExtendedType;               /* 0x00 */
+    U8                      BufferType;                 /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U64                     BufferAddress;              /* 0x0C */
+    U32                     BufferLength;               /* 0x14 */
+    U32                     Reserved5;                  /* 0x18 */
+    U32                     Reserved6;                  /* 0x1C */
+    U32                     Flags;                      /* 0x20 */
+    U32                     ProductSpecific[23];        /* 0x24 */
+} MPI2_DIAG_BUFFER_POST_REQUEST, MPI2_POINTER PTR_MPI2_DIAG_BUFFER_POST_REQUEST,
+  Mpi2DiagBufferPostRequest_t, MPI2_POINTER pMpi2DiagBufferPostRequest_t;
+
+/* values for the ExtendedType field */
+#define MPI2_DIAG_EXTENDED_TYPE_UTILIZATION         (0x02)
+
+/* values for the BufferType field */
+#define MPI2_DIAG_BUF_TYPE_TRACE                    (0x00)
+#define MPI2_DIAG_BUF_TYPE_SNAPSHOT                 (0x01)
+#define MPI2_DIAG_BUF_TYPE_EXTENDED                 (0x02)
+/* count of the number of buffer types */
+#define MPI2_DIAG_BUF_TYPE_COUNT                    (0x03)
+
+/* values for the Flags field */
+#define MPI2_DIAG_BUF_FLAG_RELEASE_ON_FULL          (0x00000002) /* for MPI v2.0 products only */
+#define MPI2_DIAG_BUF_FLAG_IMMEDIATE_RELEASE        (0x00000001)
+
+
+/****************************************************************************
+*  Diagnostic Buffer Post reply
+****************************************************************************/
+
+typedef struct _MPI2_DIAG_BUFFER_POST_REPLY
+{
+    U8                      ExtendedType;               /* 0x00 */
+    U8                      BufferType;                 /* 0x01 */
+    U8                      MsgLength;                  /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U16                     Reserved5;                  /* 0x0C */
+    U16                     IOCStatus;                  /* 0x0E */
+    U32                     IOCLogInfo;                 /* 0x10 */
+    U32                     TransferLength;             /* 0x14 */
+} MPI2_DIAG_BUFFER_POST_REPLY, MPI2_POINTER PTR_MPI2_DIAG_BUFFER_POST_REPLY,
+  Mpi2DiagBufferPostReply_t, MPI2_POINTER pMpi2DiagBufferPostReply_t;
+
+
+/****************************************************************************
+*  Diagnostic Release request
+****************************************************************************/
+
+typedef struct _MPI2_DIAG_RELEASE_REQUEST
+{
+    U8                      Reserved1;                  /* 0x00 */
+    U8                      BufferType;                 /* 0x01 */
+    U8                      ChainOffset;                /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+} MPI2_DIAG_RELEASE_REQUEST, MPI2_POINTER PTR_MPI2_DIAG_RELEASE_REQUEST,
+  Mpi2DiagReleaseRequest_t, MPI2_POINTER pMpi2DiagReleaseRequest_t;
+
+
+/****************************************************************************
+*  Diagnostic Buffer Post reply
+****************************************************************************/
+
+typedef struct _MPI2_DIAG_RELEASE_REPLY
+{
+    U8                      Reserved1;                  /* 0x00 */
+    U8                      BufferType;                 /* 0x01 */
+    U8                      MsgLength;                  /* 0x02 */
+    U8                      Function;                   /* 0x03 */
+    U16                     Reserved2;                  /* 0x04 */
+    U8                      Reserved3;                  /* 0x06 */
+    U8                      MsgFlags;                   /* 0x07 */
+    U8                      VP_ID;                      /* 0x08 */
+    U8                      VF_ID;                      /* 0x09 */
+    U16                     Reserved4;                  /* 0x0A */
+    U16                     Reserved5;                  /* 0x0C */
+    U16                     IOCStatus;                  /* 0x0E */
+    U32                     IOCLogInfo;                 /* 0x10 */
+} MPI2_DIAG_RELEASE_REPLY, MPI2_POINTER PTR_MPI2_DIAG_RELEASE_REPLY,
+  Mpi2DiagReleaseReply_t, MPI2_POINTER pMpi2DiagReleaseReply_t;
+
+
+#endif
+
diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_type.h b/drivers/scsi/mpt3sas/mpi/mpi2_type.h
old mode 100755
new mode 100644
index 080a1005bbd016fccf728393ea0b58cab2f93c22..e5f465721f4b87bba8bf920330aa1f84660a6029
--- a/drivers/scsi/mpt3sas/mpi/mpi2_type.h
+++ b/drivers/scsi/mpt3sas/mpi/mpi2_type.h
@@ -1,119 +1,119 @@
-/*
- *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
- *
- *
- *           Name:  mpi2_type.h
- *          Title:  MPI basic type definitions
- *  Creation Date:  August 16, 2006
- *
- *    mpi2_type.h Version:  02.00.01
- *
- *  Version History
- *  ---------------
- *
- *  Date      Version   Description
- *  --------  --------  ------------------------------------------------------
- *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
- *  11-18-14  02.00.01  Updated copyright information.
- *  --------------------------------------------------------------------------
- */
-
-#ifndef MPI2_TYPE_H
-#define MPI2_TYPE_H
-
-
-/*******************************************************************************
- * Define MPI2_POINTER if it hasn't already been defined. By default
- * MPI2_POINTER is defined to be a near pointer. MPI2_POINTER can be defined as
- * a far pointer by defining MPI2_POINTER as "far *" before this header file is
- * included.
- */
-#ifndef MPI2_POINTER
-#define MPI2_POINTER     *
-#endif
-
-#ifdef MPT3SAS_BASE_H_INCLUDED
-
-/*****************************************************************************
- *
- *            Basic Types
- *
- *****************************************************************************/
-
-typedef u8 U8;
-typedef __le16 U16;
-typedef __le32 U32;
-typedef __le64 U64 __attribute__((aligned(4)));
-
-/*****************************************************************************
- *
- *           Pointer Types
- *
- *****************************************************************************/
-
-typedef U8      *PU8;
-typedef U16     *PU16;
-typedef U32     *PU32;
-typedef U64     *PU64;
-
-#endif
-
-/* the basic types may have already been included by mpi_type.h */
-#if !defined(MPI_TYPE_H) && !defined(MPT3SAS_BASE_H_INCLUDED)
-/*****************************************************************************
-*
-*               Basic Types
-*
-*****************************************************************************/
-
-typedef signed   char   S8;
-typedef unsigned char   U8;
-typedef signed   short  S16;
-typedef unsigned short  U16;
-
-
-#if defined(unix) || defined(__arm) || defined(ALPHA) || defined(__PPC__) || defined(__ppc)
-
-    typedef signed   int   S32;
-    typedef unsigned int   U32;
-
-#else
-
-    typedef signed   long  S32;
-    typedef unsigned long  U32;
-
-#endif
-
-
-typedef struct _S64
-{
-    U32          Low;
-    S32          High;
-} S64;
-
-typedef struct _U64
-{
-    U32          Low;
-    U32          High;
-} U64;
-
-
-/*****************************************************************************
-*
-*               Pointer Types
-*
-*****************************************************************************/
-
-typedef S8      *PS8;
-typedef U8      *PU8;
-typedef S16     *PS16;
-typedef U16     *PU16;
-typedef S32     *PS32;
-typedef U32     *PU32;
-typedef S64     *PS64;
-typedef U64     *PU64;
-
-#endif
-
-#endif
-
+/*
+ *  Copyright 2000-2020 Broadcom Inc. All rights reserved.
+ *
+ *
+ *           Name:  mpi2_type.h
+ *          Title:  MPI basic type definitions
+ *  Creation Date:  August 16, 2006
+ *
+ *    mpi2_type.h Version:  02.00.01
+ *
+ *  Version History
+ *  ---------------
+ *
+ *  Date      Version   Description
+ *  --------  --------  ------------------------------------------------------
+ *  04-30-07  02.00.00  Corresponds to Fusion-MPT MPI Specification Rev A.
+ *  11-18-14  02.00.01  Updated copyright information.
+ *  --------------------------------------------------------------------------
+ */
+
+#ifndef MPI2_TYPE_H
+#define MPI2_TYPE_H
+
+
+/*******************************************************************************
+ * Define MPI2_POINTER if it hasn't already been defined. By default
+ * MPI2_POINTER is defined to be a near pointer. MPI2_POINTER can be defined as
+ * a far pointer by defining MPI2_POINTER as "far *" before this header file is
+ * included.
+ */
+#ifndef MPI2_POINTER
+#define MPI2_POINTER     *
+#endif
+
+#ifdef MPT3SAS_BASE_H_INCLUDED
+
+/*****************************************************************************
+ *
+ *            Basic Types
+ *
+ *****************************************************************************/
+
+typedef u8 U8;
+typedef __le16 U16;
+typedef __le32 U32;
+typedef __le64 U64 __attribute__((aligned(4)));
+
+/*****************************************************************************
+ *
+ *           Pointer Types
+ *
+ *****************************************************************************/
+
+typedef U8      *PU8;
+typedef U16     *PU16;
+typedef U32     *PU32;
+typedef U64     *PU64;
+
+#endif
+
+/* the basic types may have already been included by mpi_type.h */
+#if !defined(MPI_TYPE_H) && !defined(MPT3SAS_BASE_H_INCLUDED)
+/*****************************************************************************
+*
+*               Basic Types
+*
+*****************************************************************************/
+
+typedef signed   char   S8;
+typedef unsigned char   U8;
+typedef signed   short  S16;
+typedef unsigned short  U16;
+
+
+#if defined(unix) || defined(__arm) || defined(ALPHA) || defined(__PPC__) || defined(__ppc)
+
+    typedef signed   int   S32;
+    typedef unsigned int   U32;
+
+#else
+
+    typedef signed   long  S32;
+    typedef unsigned long  U32;
+
+#endif
+
+
+typedef struct _S64
+{
+    U32          Low;
+    S32          High;
+} S64;
+
+typedef struct _U64
+{
+    U32          Low;
+    U32          High;
+} U64;
+
+
+/*****************************************************************************
+*
+*               Pointer Types
+*
+*****************************************************************************/
+
+typedef S8      *PS8;
+typedef U8      *PU8;
+typedef S16     *PS16;
+typedef U16     *PU16;
+typedef S32     *PS32;
+typedef U32     *PU32;
+typedef S64     *PS64;
+typedef U64     *PU64;
+
+#endif
+
+#endif
+
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
old mode 100755
new mode 100644
index 7aed8a6120e4bcfba87bfb714ff96e2cfb686260..fd3920ed854defe4a6f3707627c80421e40bbe98
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -1260,7 +1260,7 @@ mpt3sas_halt_firmware(struct MPT3SAS_ADAPTER *ioc, u8 set_fault)
 	} 
 	else if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_COREDUMP)
 		mpt3sas_base_coredump_info(ioc, doorbell &
-		    MPI2_DOORBELL_DATA_MASK);
+		    MPI2_DOORBELL_DATA_MASK);
 	else {
 		writel(0xC0FFEE00, &ioc->chip->Doorbell);
 		if (!set_fault)
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/mpt3sas_compatibility.h b/drivers/scsi/mpt3sas/mpt3sas_compatibility.h
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/mpt3sas_csmi_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_csmi_ctl.c
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.h b/drivers/scsi/mpt3sas/mpt3sas_ctl.h
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/mpt3sas_debug.h b/drivers/scsi/mpt3sas/mpt3sas_debug.h
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/mpt3sas_debugfs.c b/drivers/scsi/mpt3sas/mpt3sas_debugfs.c
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c
old mode 100755
new mode 100644
index 663bb8df4c85a832aebd12abab488f0f2b4b61d4..3f8145c9a1824bec40453a88d3207be382574571
--- a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c
@@ -1,468 +1,468 @@
-/*
- * This module provides common API to set Diagnostic trigger for MPT 
- * (Message Passing Technology) based controllers
- *
- * This code is based on drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c
- * Copyright (C) 2013-2018  LSI Corporation
- * Copyright (C) 2013-2018  Avago Technologies
- * Copyright (C) 2013-2018  Broadcom Inc.
- *  (mailto:MPT-FusionLinux.pdl@broadcom.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * NO WARRANTY
- * THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
- * LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
- * solely responsible for determining the appropriateness of using and
- * distributing the Program and assumes all risks associated with its
- * exercise of rights under this Agreement, including but not limited to
- * the risks and costs of program errors, damage to or loss of data,
- * programs or equipment, and unavailability or interruption of operations.
-
- * DISCLAIMER OF LIABILITY
- * NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
- * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
- * HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES
-
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
- * USA.
- */
-
-#include <linux/version.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/compat.h>
-#include <linux/poll.h>
-
-#include <linux/io.h>
-#include <asm/uaccess.h>
-
-#include "mpt3sas_base.h"
-
-/**
- * _mpt3sas_raise_sigio - notifiy app
- * @ioc: per adapter object
- * @event_data:
- */
-static void
-_mpt3sas_raise_sigio(struct MPT3SAS_ADAPTER *ioc,
-	struct SL_WH_TRIGGERS_EVENT_DATA_T *event_data)
-{
-	Mpi2EventNotificationReply_t *mpi_reply;
-	u16 sz, event_data_sz;
-	unsigned long flags;
-
-	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: enter\n",
-	    ioc->name, __func__));
-
-	sz = offsetof(Mpi2EventNotificationReply_t, EventData) +
-	    sizeof(struct SL_WH_TRIGGERS_EVENT_DATA_T) + 4;
-	mpi_reply = kzalloc(sz, GFP_KERNEL);
-	if (!mpi_reply)
-		goto out;
-	mpi_reply->Event = cpu_to_le16(MPI3_EVENT_DIAGNOSTIC_TRIGGER_FIRED);
-	event_data_sz = (sizeof(struct SL_WH_TRIGGERS_EVENT_DATA_T) + 4) / 4;
-	mpi_reply->EventDataLength = cpu_to_le16(event_data_sz);
-	memcpy(&mpi_reply->EventData, event_data,
-	    sizeof(struct SL_WH_TRIGGERS_EVENT_DATA_T));
-	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: add to driver "
-	    "event log\n", ioc->name, __func__));
-	mpt3sas_ctl_add_to_event_log(ioc, mpi_reply);
-	kfree(mpi_reply);
- out:
-
-	/* clearing the diag_trigger_active flag */
-	spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
-	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: clearing "
-	    "diag_trigger_active flag\n", ioc->name, __func__));
-	ioc->diag_trigger_active = 0;
-	spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
-
-	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: exit\n", ioc->name,
-	    __func__));
-}
-
-/**
- * mpt3sas_process_trigger_data - process the event data for the trigger
- * @ioc: per adapter object
- * @event_data:
- */
-void
-mpt3sas_process_trigger_data(struct MPT3SAS_ADAPTER *ioc,
-	struct SL_WH_TRIGGERS_EVENT_DATA_T *event_data)
-{
-	u8 issue_reset = 0;
-	u32 *trig_data = (u32*)&event_data->u.master;
-
-	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: enter\n",
-	    ioc->name, __func__));
-
-	/* release the diag buffer trace */
-	if ((ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
-	    MPT3_DIAG_BUFFER_IS_RELEASED) == 0) {
-		dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: release "
-		    "trace diag buffer\n", ioc->name, __func__));
-
-		/* add a log message so that user knows which event caused the release */
-		printk(MPT3SAS_INFO_FMT "%s: Releasing the trace buffer."
-		    "Trigger_Type 0x%08x, Data[0] 0x%08x, Data[1] 0x%08x\n", ioc->name,
-			__func__, event_data->trigger_type, trig_data[0], trig_data[1]);
-
-		mpt3sas_send_diag_release(ioc, MPI2_DIAG_BUF_TYPE_TRACE,
-		    &issue_reset);
-	}
-
-	ioc->htb_rel.buffer_rel_condition = MPT3_DIAG_BUFFER_REL_TRIGGER;
-	if (event_data) {
-		ioc->htb_rel.trigger_type = event_data->trigger_type;
-		switch (event_data->trigger_type) {
-		case MPT3SAS_TRIGGER_SCSI: 
-			 memcpy(&ioc->htb_rel.trigger_info_dwords,
-			     &event_data->u.scsi,
-			     sizeof(struct SL_WH_SCSI_TRIGGER_T));
-			 break;
-		case MPT3SAS_TRIGGER_MPI:
-			memcpy(&ioc->htb_rel.trigger_info_dwords,
-			    &event_data->u.mpi,
-			    sizeof(struct SL_WH_MPI_TRIGGER_T));
-			break;
-		case MPT3SAS_TRIGGER_MASTER:
-			ioc->htb_rel.trigger_info_dwords[0] =
-			    event_data->u.master.MasterData;
-			break;
-		case MPT3SAS_TRIGGER_EVENT:
-			memcpy(&ioc->htb_rel.trigger_info_dwords,
-			    &event_data->u.event,
-			    sizeof(struct SL_WH_EVENT_TRIGGER_T));
-			break;
-		default:
-			pr_err("%s: %d - Is not a valid Trigger type\n",
-			    ioc->name, event_data->trigger_type);
-			break;
-		 }
-	}
-	_mpt3sas_raise_sigio(ioc, event_data);
-	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: exit\n", ioc->name,
-	    __func__));
-}
-
-/**
- * mpt3sas_trigger_master - Master trigger handler
- * @ioc: per adapter object
- * @trigger_bitmask:
- *
- */
-void
-mpt3sas_trigger_master(struct MPT3SAS_ADAPTER *ioc, u32 trigger_bitmask)
-{
-	struct SL_WH_TRIGGERS_EVENT_DATA_T event_data;
-	unsigned long flags;
-	u8 found_match = 0;
-
-	spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
-
-	if (trigger_bitmask & MASTER_TRIGGER_FW_FAULT ||
-	    trigger_bitmask & MASTER_TRIGGER_ADAPTER_RESET)
-		goto by_pass_checks;
-
-	/* check to see if trace buffers are currently registered */
-	if ((ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
-	    MPT3_DIAG_BUFFER_IS_REGISTERED) == 0) {
-		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
-		return;
-	}
-
-	/* check to see if trace buffers are currently released */
-	if (ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
-	    MPT3_DIAG_BUFFER_IS_RELEASED) {
-		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
-		return;
-	}
-
- by_pass_checks:
-
-	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: enter - "
-	    "trigger_bitmask = 0x%08x\n", ioc->name, __func__,
-	    trigger_bitmask));
-
-	/* don't send trigger if an trigger is currently active */
-	if (ioc->diag_trigger_active) {
-		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
-		goto out;
-	}
-
-	/* check for the trigger condition */
-	if (ioc->diag_trigger_master.MasterData & trigger_bitmask) {
-		found_match = 1;
-		ioc->diag_trigger_active = 1;
-		dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: setting "
-		"diag_trigger_active flag\n", ioc->name, __func__));
-	}
-	spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
-
-	if (!found_match)
-		goto out;
-
-	memset(&event_data, 0, sizeof(struct SL_WH_TRIGGERS_EVENT_DATA_T));
-	event_data.trigger_type = MPT3SAS_TRIGGER_MASTER;
-	event_data.u.master.MasterData = trigger_bitmask;
-	if (trigger_bitmask & MASTER_TRIGGER_FW_FAULT ||
-	    trigger_bitmask & MASTER_TRIGGER_ADAPTER_RESET) {
-		ioc->htb_rel.trigger_type = MPT3SAS_TRIGGER_MASTER;
-		ioc->htb_rel.trigger_info_dwords[0] = trigger_bitmask;
-		if (ioc->reset_from_user)
-			ioc->htb_rel.trigger_info_dwords[1] =
-			    MPT_DIAG_RESET_ISSUED_BY_USER;
-		_mpt3sas_raise_sigio(ioc, &event_data);
-	} else
-		mpt3sas_send_trigger_data_event(ioc, &event_data);
-
- out:
-	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: exit\n", ioc->name,
-	    __func__));
-}
-
-/**
- * mpt3sas_trigger_event - Event trigger handler
- * @ioc: per adapter object
- * @event:
- * @log_entry_qualifier:
- *
- */
-void
-mpt3sas_trigger_event(struct MPT3SAS_ADAPTER *ioc, u16 event,
-	u16 log_entry_qualifier)
-{
-	struct SL_WH_TRIGGERS_EVENT_DATA_T event_data;
-	struct SL_WH_EVENT_TRIGGER_T *event_trigger;
-	int i;
-	unsigned long flags;
-	u8 found_match;
-
-	spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
-
-	/* check to see if trace buffers are currently registered */
-	if ((ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
-	    MPT3_DIAG_BUFFER_IS_REGISTERED) == 0) {
-		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
-		return;
-	}
-
-	/* check to see if trace buffers are currently released */
-	if (ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
-	    MPT3_DIAG_BUFFER_IS_RELEASED) {
-		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
-		return;
-	}
-
-	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: enter - "
-	    "event = 0x%04x, log_entry_qualifier = 0x%04x\n", ioc->name,
-	    __func__, event, log_entry_qualifier));
-
-	/* don't send trigger if an trigger is currently active */
-	if (ioc->diag_trigger_active) {
-		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
-		goto out;
-	}
-
-	/* check for the trigger condition */
-	event_trigger = ioc->diag_trigger_event.EventTriggerEntry;
-	for (i = 0 , found_match = 0; i < ioc->diag_trigger_event.ValidEntries
-	    && !found_match; i++, event_trigger++) {
-		if (event_trigger->EventValue != event)
-			continue;
-		if (event == MPI2_EVENT_LOG_ENTRY_ADDED) {
-			if (event_trigger->LogEntryQualifier ==
-			    log_entry_qualifier)
-				found_match = 1;
-			continue;
-		}
-		found_match = 1;
-		ioc->diag_trigger_active = 1;
-		dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: setting "
-		    "diag_trigger_active flag\n", ioc->name, __func__));
-	}
-	spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
-
-	if (!found_match)
-		goto out;
-
-	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: setting "
-	    "diag_trigger_active flag\n", ioc->name, __func__));
-	memset(&event_data, 0, sizeof(struct SL_WH_TRIGGERS_EVENT_DATA_T));
-	event_data.trigger_type = MPT3SAS_TRIGGER_EVENT;
-	event_data.u.event.EventValue = event;
-	event_data.u.event.LogEntryQualifier = log_entry_qualifier;
-	mpt3sas_send_trigger_data_event(ioc, &event_data);
- out:
-	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: exit\n", ioc->name,
-	    __func__));
-}
-
-/**
- * mpt3sas_trigger_scsi - SCSI trigger handler
- * @ioc: per adapter object
- * @sense_key:
- * @asc:
- * @ascq:
- *
- */
-void
-mpt3sas_trigger_scsi(struct MPT3SAS_ADAPTER *ioc, u8 sense_key, u8 asc,
-	u8 ascq)
-{
-	struct SL_WH_TRIGGERS_EVENT_DATA_T event_data;
-	struct SL_WH_SCSI_TRIGGER_T *scsi_trigger;
-	int i;
-	unsigned long flags;
-	u8 found_match;
-
-	spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
-
-	/* check to see if trace buffers are currently registered */
-	if ((ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
-	    MPT3_DIAG_BUFFER_IS_REGISTERED) == 0) {
-		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
-		return;
-	}
-
-	/* check to see if trace buffers are currently released */
-	if (ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
-	    MPT3_DIAG_BUFFER_IS_RELEASED) {
-		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
-		return;
-	}
-
-	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: enter - "
-	    "sense_key = 0x%02x, asc = 0x%02x, ascq = 0x%02x\n", ioc->name,
-	    __func__, sense_key, asc, ascq));
-
-	/* don't send trigger if an trigger is currently active */
-	if (ioc->diag_trigger_active) {
-		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
-		goto out;
-	}
-
-	/* check for the trigger condition */
-	scsi_trigger = ioc->diag_trigger_scsi.SCSITriggerEntry;
-	for (i = 0 , found_match = 0; i < ioc->diag_trigger_scsi.ValidEntries
-	    && !found_match; i++, scsi_trigger++) {
-		if (scsi_trigger->SenseKey != sense_key)
-			continue;
-		if (!(scsi_trigger->ASC == 0xFF || scsi_trigger->ASC == asc))
-			continue;
-		if (!(scsi_trigger->ASCQ == 0xFF || scsi_trigger->ASCQ == ascq))
-			continue;
-		found_match = 1;
-		ioc->diag_trigger_active = 1;
-	}
-	spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
-
-	if (!found_match)
-		goto out;
-
-	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: setting "
-	    "diag_trigger_active flag\n", ioc->name, __func__));
-	memset(&event_data, 0, sizeof(struct SL_WH_TRIGGERS_EVENT_DATA_T));
-	event_data.trigger_type = MPT3SAS_TRIGGER_SCSI;
-	event_data.u.scsi.SenseKey = sense_key;
-	event_data.u.scsi.ASC = asc;
-	event_data.u.scsi.ASCQ = ascq;
-	mpt3sas_send_trigger_data_event(ioc, &event_data);
- out:
-	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: exit\n", ioc->name,
-	    __func__));
-}
-
-/**
- * mpt3sas_trigger_mpi - MPI trigger handler
- * @ioc: per adapter object
- * @ioc_status:
- * @loginfo:
- *
- */
-void
-mpt3sas_trigger_mpi(struct MPT3SAS_ADAPTER *ioc, u16 ioc_status, u32 loginfo)
-{
-	struct SL_WH_TRIGGERS_EVENT_DATA_T event_data;
-	struct SL_WH_MPI_TRIGGER_T *mpi_trigger;
-	int i;
-	unsigned long flags;
-	u8 found_match;
-
-	spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
-
-	/* check to see if trace buffers are currently registered */
-	if ((ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
-	    MPT3_DIAG_BUFFER_IS_REGISTERED) == 0) {
-		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
-		return;
-	}
-
-	/* check to see if trace buffers are currently released */
-	if (ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
-	    MPT3_DIAG_BUFFER_IS_RELEASED) {
-		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
-		return;
-	}
-
-	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: enter - "
-	    "ioc_status = 0x%04x, loginfo = 0x%08x\n", ioc->name, __func__,
-	    ioc_status, loginfo));
-
-	/* don't send trigger if an trigger is currently active */
-	if (ioc->diag_trigger_active) {
-		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
-		goto out;
-	}
-
-	/* check for the trigger condition */
-	mpi_trigger = ioc->diag_trigger_mpi.MPITriggerEntry;
-	for (i = 0 , found_match = 0; i < ioc->diag_trigger_mpi.ValidEntries
-	    && !found_match; i++, mpi_trigger++) {
-		if (mpi_trigger->IOCStatus != ioc_status)
-			continue;
-		if (!(mpi_trigger->IocLogInfo == 0xFFFFFFFF ||
-		    mpi_trigger->IocLogInfo == loginfo))
-			continue;
-		found_match = 1;
-		ioc->diag_trigger_active = 1;
-	}
-	spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
-
-	if (!found_match)
-		goto out;
-
-	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: setting "
-	    "diag_trigger_active flag\n", ioc->name, __func__));
-	memset(&event_data, 0, sizeof(struct SL_WH_TRIGGERS_EVENT_DATA_T));
-	event_data.trigger_type = MPT3SAS_TRIGGER_MPI;
-	event_data.u.mpi.IOCStatus = ioc_status;
-	event_data.u.mpi.IocLogInfo = loginfo;
-	mpt3sas_send_trigger_data_event(ioc, &event_data);
- out:
-	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: exit\n", ioc->name,
-	    __func__));
-}
+/*
+ * This module provides common API to set Diagnostic trigger for MPT 
+ * (Message Passing Technology) based controllers
+ *
+ * This code is based on drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c
+ * Copyright (C) 2013-2018  LSI Corporation
+ * Copyright (C) 2013-2018  Avago Technologies
+ * Copyright (C) 2013-2018  Broadcom Inc.
+ *  (mailto:MPT-FusionLinux.pdl@broadcom.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * NO WARRANTY
+ * THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+ * LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+ * solely responsible for determining the appropriateness of using and
+ * distributing the Program and assumes all risks associated with its
+ * exercise of rights under this Agreement, including but not limited to
+ * the risks and costs of program errors, damage to or loss of data,
+ * programs or equipment, and unavailability or interruption of operations.
+
+ * DISCLAIMER OF LIABILITY
+ * NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+ * HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
+ * USA.
+ */
+
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/compat.h>
+#include <linux/poll.h>
+
+#include <linux/io.h>
+#include <asm/uaccess.h>
+
+#include "mpt3sas_base.h"
+
+/**
+ * _mpt3sas_raise_sigio - notifiy app
+ * @ioc: per adapter object
+ * @event_data:
+ */
+static void
+_mpt3sas_raise_sigio(struct MPT3SAS_ADAPTER *ioc,
+	struct SL_WH_TRIGGERS_EVENT_DATA_T *event_data)
+{
+	Mpi2EventNotificationReply_t *mpi_reply;
+	u16 sz, event_data_sz;
+	unsigned long flags;
+
+	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: enter\n",
+	    ioc->name, __func__));
+
+	sz = offsetof(Mpi2EventNotificationReply_t, EventData) +
+	    sizeof(struct SL_WH_TRIGGERS_EVENT_DATA_T) + 4;
+	mpi_reply = kzalloc(sz, GFP_KERNEL);
+	if (!mpi_reply)
+		goto out;
+	mpi_reply->Event = cpu_to_le16(MPI3_EVENT_DIAGNOSTIC_TRIGGER_FIRED);
+	event_data_sz = (sizeof(struct SL_WH_TRIGGERS_EVENT_DATA_T) + 4) / 4;
+	mpi_reply->EventDataLength = cpu_to_le16(event_data_sz);
+	memcpy(&mpi_reply->EventData, event_data,
+	    sizeof(struct SL_WH_TRIGGERS_EVENT_DATA_T));
+	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: add to driver "
+	    "event log\n", ioc->name, __func__));
+	mpt3sas_ctl_add_to_event_log(ioc, mpi_reply);
+	kfree(mpi_reply);
+ out:
+
+	/* clearing the diag_trigger_active flag */
+	spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
+	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: clearing "
+	    "diag_trigger_active flag\n", ioc->name, __func__));
+	ioc->diag_trigger_active = 0;
+	spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
+
+	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: exit\n", ioc->name,
+	    __func__));
+}
+
+/**
+ * mpt3sas_process_trigger_data - process the event data for the trigger
+ * @ioc: per adapter object
+ * @event_data:
+ */
+void
+mpt3sas_process_trigger_data(struct MPT3SAS_ADAPTER *ioc,
+	struct SL_WH_TRIGGERS_EVENT_DATA_T *event_data)
+{
+	u8 issue_reset = 0;
+	u32 *trig_data = (u32*)&event_data->u.master;
+
+	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: enter\n",
+	    ioc->name, __func__));
+
+	/* release the diag buffer trace */
+	if ((ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
+	    MPT3_DIAG_BUFFER_IS_RELEASED) == 0) {
+		dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: release "
+		    "trace diag buffer\n", ioc->name, __func__));
+
+		/* add a log message so that user knows which event caused the release */
+		printk(MPT3SAS_INFO_FMT "%s: Releasing the trace buffer."
+		    "Trigger_Type 0x%08x, Data[0] 0x%08x, Data[1] 0x%08x\n", ioc->name,
+			__func__, event_data->trigger_type, trig_data[0], trig_data[1]);
+
+		mpt3sas_send_diag_release(ioc, MPI2_DIAG_BUF_TYPE_TRACE,
+		    &issue_reset);
+	}
+
+	ioc->htb_rel.buffer_rel_condition = MPT3_DIAG_BUFFER_REL_TRIGGER;
+	if (event_data) {
+		ioc->htb_rel.trigger_type = event_data->trigger_type;
+		switch (event_data->trigger_type) {
+		case MPT3SAS_TRIGGER_SCSI: 
+			 memcpy(&ioc->htb_rel.trigger_info_dwords,
+			     &event_data->u.scsi,
+			     sizeof(struct SL_WH_SCSI_TRIGGER_T));
+			 break;
+		case MPT3SAS_TRIGGER_MPI:
+			memcpy(&ioc->htb_rel.trigger_info_dwords,
+			    &event_data->u.mpi,
+			    sizeof(struct SL_WH_MPI_TRIGGER_T));
+			break;
+		case MPT3SAS_TRIGGER_MASTER:
+			ioc->htb_rel.trigger_info_dwords[0] =
+			    event_data->u.master.MasterData;
+			break;
+		case MPT3SAS_TRIGGER_EVENT:
+			memcpy(&ioc->htb_rel.trigger_info_dwords,
+			    &event_data->u.event,
+			    sizeof(struct SL_WH_EVENT_TRIGGER_T));
+			break;
+		default:
+			pr_err("%s: %d - Is not a valid Trigger type\n",
+			    ioc->name, event_data->trigger_type);
+			break;
+		 }
+	}
+	_mpt3sas_raise_sigio(ioc, event_data);
+	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: exit\n", ioc->name,
+	    __func__));
+}
+
+/**
+ * mpt3sas_trigger_master - Master trigger handler
+ * @ioc: per adapter object
+ * @trigger_bitmask:
+ *
+ */
+void
+mpt3sas_trigger_master(struct MPT3SAS_ADAPTER *ioc, u32 trigger_bitmask)
+{
+	struct SL_WH_TRIGGERS_EVENT_DATA_T event_data;
+	unsigned long flags;
+	u8 found_match = 0;
+
+	spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
+
+	if (trigger_bitmask & MASTER_TRIGGER_FW_FAULT ||
+	    trigger_bitmask & MASTER_TRIGGER_ADAPTER_RESET)
+		goto by_pass_checks;
+
+	/* check to see if trace buffers are currently registered */
+	if ((ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
+	    MPT3_DIAG_BUFFER_IS_REGISTERED) == 0) {
+		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
+		return;
+	}
+
+	/* check to see if trace buffers are currently released */
+	if (ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
+	    MPT3_DIAG_BUFFER_IS_RELEASED) {
+		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
+		return;
+	}
+
+ by_pass_checks:
+
+	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: enter - "
+	    "trigger_bitmask = 0x%08x\n", ioc->name, __func__,
+	    trigger_bitmask));
+
+	/* don't send trigger if an trigger is currently active */
+	if (ioc->diag_trigger_active) {
+		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
+		goto out;
+	}
+
+	/* check for the trigger condition */
+	if (ioc->diag_trigger_master.MasterData & trigger_bitmask) {
+		found_match = 1;
+		ioc->diag_trigger_active = 1;
+		dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: setting "
+		"diag_trigger_active flag\n", ioc->name, __func__));
+	}
+	spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
+
+	if (!found_match)
+		goto out;
+
+	memset(&event_data, 0, sizeof(struct SL_WH_TRIGGERS_EVENT_DATA_T));
+	event_data.trigger_type = MPT3SAS_TRIGGER_MASTER;
+	event_data.u.master.MasterData = trigger_bitmask;
+	if (trigger_bitmask & MASTER_TRIGGER_FW_FAULT ||
+	    trigger_bitmask & MASTER_TRIGGER_ADAPTER_RESET) {
+		ioc->htb_rel.trigger_type = MPT3SAS_TRIGGER_MASTER;
+		ioc->htb_rel.trigger_info_dwords[0] = trigger_bitmask;
+		if (ioc->reset_from_user)
+			ioc->htb_rel.trigger_info_dwords[1] =
+			    MPT_DIAG_RESET_ISSUED_BY_USER;
+		_mpt3sas_raise_sigio(ioc, &event_data);
+	} else
+		mpt3sas_send_trigger_data_event(ioc, &event_data);
+
+ out:
+	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: exit\n", ioc->name,
+	    __func__));
+}
+
+/**
+ * mpt3sas_trigger_event - Event trigger handler
+ * @ioc: per adapter object
+ * @event:
+ * @log_entry_qualifier:
+ *
+ */
+void
+mpt3sas_trigger_event(struct MPT3SAS_ADAPTER *ioc, u16 event,
+	u16 log_entry_qualifier)
+{
+	struct SL_WH_TRIGGERS_EVENT_DATA_T event_data;
+	struct SL_WH_EVENT_TRIGGER_T *event_trigger;
+	int i;
+	unsigned long flags;
+	u8 found_match;
+
+	spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
+
+	/* check to see if trace buffers are currently registered */
+	if ((ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
+	    MPT3_DIAG_BUFFER_IS_REGISTERED) == 0) {
+		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
+		return;
+	}
+
+	/* check to see if trace buffers are currently released */
+	if (ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
+	    MPT3_DIAG_BUFFER_IS_RELEASED) {
+		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
+		return;
+	}
+
+	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: enter - "
+	    "event = 0x%04x, log_entry_qualifier = 0x%04x\n", ioc->name,
+	    __func__, event, log_entry_qualifier));
+
+	/* don't send trigger if an trigger is currently active */
+	if (ioc->diag_trigger_active) {
+		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
+		goto out;
+	}
+
+	/* check for the trigger condition */
+	event_trigger = ioc->diag_trigger_event.EventTriggerEntry;
+	for (i = 0 , found_match = 0; i < ioc->diag_trigger_event.ValidEntries
+	    && !found_match; i++, event_trigger++) {
+		if (event_trigger->EventValue != event)
+			continue;
+		if (event == MPI2_EVENT_LOG_ENTRY_ADDED) {
+			if (event_trigger->LogEntryQualifier ==
+			    log_entry_qualifier)
+				found_match = 1;
+			continue;
+		}
+		found_match = 1;
+		ioc->diag_trigger_active = 1;
+		dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: setting "
+		    "diag_trigger_active flag\n", ioc->name, __func__));
+	}
+	spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
+
+	if (!found_match)
+		goto out;
+
+	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: setting "
+	    "diag_trigger_active flag\n", ioc->name, __func__));
+	memset(&event_data, 0, sizeof(struct SL_WH_TRIGGERS_EVENT_DATA_T));
+	event_data.trigger_type = MPT3SAS_TRIGGER_EVENT;
+	event_data.u.event.EventValue = event;
+	event_data.u.event.LogEntryQualifier = log_entry_qualifier;
+	mpt3sas_send_trigger_data_event(ioc, &event_data);
+ out:
+	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: exit\n", ioc->name,
+	    __func__));
+}
+
+/**
+ * mpt3sas_trigger_scsi - SCSI trigger handler
+ * @ioc: per adapter object
+ * @sense_key:
+ * @asc:
+ * @ascq:
+ *
+ */
+void
+mpt3sas_trigger_scsi(struct MPT3SAS_ADAPTER *ioc, u8 sense_key, u8 asc,
+	u8 ascq)
+{
+	struct SL_WH_TRIGGERS_EVENT_DATA_T event_data;
+	struct SL_WH_SCSI_TRIGGER_T *scsi_trigger;
+	int i;
+	unsigned long flags;
+	u8 found_match;
+
+	spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
+
+	/* check to see if trace buffers are currently registered */
+	if ((ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
+	    MPT3_DIAG_BUFFER_IS_REGISTERED) == 0) {
+		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
+		return;
+	}
+
+	/* check to see if trace buffers are currently released */
+	if (ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
+	    MPT3_DIAG_BUFFER_IS_RELEASED) {
+		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
+		return;
+	}
+
+	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: enter - "
+	    "sense_key = 0x%02x, asc = 0x%02x, ascq = 0x%02x\n", ioc->name,
+	    __func__, sense_key, asc, ascq));
+
+	/* don't send trigger if an trigger is currently active */
+	if (ioc->diag_trigger_active) {
+		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
+		goto out;
+	}
+
+	/* check for the trigger condition */
+	scsi_trigger = ioc->diag_trigger_scsi.SCSITriggerEntry;
+	for (i = 0 , found_match = 0; i < ioc->diag_trigger_scsi.ValidEntries
+	    && !found_match; i++, scsi_trigger++) {
+		if (scsi_trigger->SenseKey != sense_key)
+			continue;
+		if (!(scsi_trigger->ASC == 0xFF || scsi_trigger->ASC == asc))
+			continue;
+		if (!(scsi_trigger->ASCQ == 0xFF || scsi_trigger->ASCQ == ascq))
+			continue;
+		found_match = 1;
+		ioc->diag_trigger_active = 1;
+	}
+	spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
+
+	if (!found_match)
+		goto out;
+
+	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: setting "
+	    "diag_trigger_active flag\n", ioc->name, __func__));
+	memset(&event_data, 0, sizeof(struct SL_WH_TRIGGERS_EVENT_DATA_T));
+	event_data.trigger_type = MPT3SAS_TRIGGER_SCSI;
+	event_data.u.scsi.SenseKey = sense_key;
+	event_data.u.scsi.ASC = asc;
+	event_data.u.scsi.ASCQ = ascq;
+	mpt3sas_send_trigger_data_event(ioc, &event_data);
+ out:
+	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: exit\n", ioc->name,
+	    __func__));
+}
+
+/**
+ * mpt3sas_trigger_mpi - MPI trigger handler
+ * @ioc: per adapter object
+ * @ioc_status:
+ * @loginfo:
+ *
+ */
+void
+mpt3sas_trigger_mpi(struct MPT3SAS_ADAPTER *ioc, u16 ioc_status, u32 loginfo)
+{
+	struct SL_WH_TRIGGERS_EVENT_DATA_T event_data;
+	struct SL_WH_MPI_TRIGGER_T *mpi_trigger;
+	int i;
+	unsigned long flags;
+	u8 found_match;
+
+	spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
+
+	/* check to see if trace buffers are currently registered */
+	if ((ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
+	    MPT3_DIAG_BUFFER_IS_REGISTERED) == 0) {
+		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
+		return;
+	}
+
+	/* check to see if trace buffers are currently released */
+	if (ioc->diag_buffer_status[MPI2_DIAG_BUF_TYPE_TRACE] &
+	    MPT3_DIAG_BUFFER_IS_RELEASED) {
+		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
+		return;
+	}
+
+	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: enter - "
+	    "ioc_status = 0x%04x, loginfo = 0x%08x\n", ioc->name, __func__,
+	    ioc_status, loginfo));
+
+	/* don't send trigger if an trigger is currently active */
+	if (ioc->diag_trigger_active) {
+		spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
+		goto out;
+	}
+
+	/* check for the trigger condition */
+	mpi_trigger = ioc->diag_trigger_mpi.MPITriggerEntry;
+	for (i = 0 , found_match = 0; i < ioc->diag_trigger_mpi.ValidEntries
+	    && !found_match; i++, mpi_trigger++) {
+		if (mpi_trigger->IOCStatus != ioc_status)
+			continue;
+		if (!(mpi_trigger->IocLogInfo == 0xFFFFFFFF ||
+		    mpi_trigger->IocLogInfo == loginfo))
+			continue;
+		found_match = 1;
+		ioc->diag_trigger_active = 1;
+	}
+	spin_unlock_irqrestore(&ioc->diag_trigger_lock, flags);
+
+	if (!found_match)
+		goto out;
+
+	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: setting "
+	    "diag_trigger_active flag\n", ioc->name, __func__));
+	memset(&event_data, 0, sizeof(struct SL_WH_TRIGGERS_EVENT_DATA_T));
+	event_data.trigger_type = MPT3SAS_TRIGGER_MPI;
+	event_data.u.mpi.IOCStatus = ioc_status;
+	event_data.u.mpi.IocLogInfo = loginfo;
+	mpt3sas_send_trigger_data_event(ioc, &event_data);
+ out:
+	dTriggerDiagPrintk(ioc, printk(MPT3SAS_INFO_FMT "%s: exit\n", ioc->name,
+	    __func__));
+}
diff --git a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.h b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.h
old mode 100755
new mode 100644
index 1549b9f3099fdcfd55366f43e1d5b20861f1f068..be8e748f46f08b2e003f7c4718788e42f4f0fef9
--- a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.h
@@ -1,195 +1,195 @@
-/*
- * This is the Fusion MPT base driver providing common API layer interface
- * to set Diagnostic triggers for MPT (Message Passing Technology) based 
- * controllers
- *
- * This code is based on drivers/scsi/mpt3sas/mpt3sas_base.h
- * Copyright (C) 2013-2018  LSI Corporation
- * Copyright (C) 2013-2018  Avago Technologies
- * Copyright (C) 2013-2018  Broadcom Inc.
- *  (mailto:MPT-FusionLinux.pdl@broadcom.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * NO WARRANTY
- * THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
- * LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
- * solely responsible for determining the appropriateness of using and
- * distributing the Program and assumes all risks associated with its
- * exercise of rights under this Agreement, including but not limited to
- * the risks and costs of program errors, damage to or loss of data,
- * programs or equipment, and unavailability or interruption of operations.
-
- * DISCLAIMER OF LIABILITY
- * NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
- * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
- * HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES
-
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
- * USA.
- */
-  /* Diagnostic Trigger Configuration Data Structures */
-
-#ifndef MPT3SAS_TRIGGER_DIAG_H_INCLUDED
-#define MPT3SAS_TRIGGER_DIAG_H_INCLUDED
-
-/* limitation on number of entries */
-#define NUM_VALID_ENTRIES               (20)
-
-/* trigger types */
-#define MPT3SAS_TRIGGER_MASTER          (1)
-#define MPT3SAS_TRIGGER_EVENT           (2)
-#define MPT3SAS_TRIGGER_SCSI            (3)
-#define MPT3SAS_TRIGGER_MPI             (4)
-
-/* trigger names */
-#define MASTER_TRIGGER_FILE_NAME        "diag_trigger_master"
-#define EVENT_TRIGGERS_FILE_NAME        "diag_trigger_event"
-#define SCSI_TRIGGERS_FILE_NAME         "diag_trigger_scsi"
-#define MPI_TRIGGER_FILE_NAME           "diag_trigger_mpi"
-
-/* master trigger bitmask */
-#define MASTER_TRIGGER_FW_FAULT         (0x00000001)
-#define MASTER_TRIGGER_ADAPTER_RESET    (0x00000002)
-#define MASTER_TRIGGER_TASK_MANAGMENT   (0x00000004)
-#define MASTER_TRIGGER_DEVICE_REMOVAL   (0x00000008)
-
-/* fake firmware event for tigger */
-#define MPI3_EVENT_DIAGNOSTIC_TRIGGER_FIRED	(0x6E)
-
-/**
- * MasterTrigger is a single U32 passed to/from sysfs.
- *
- * Bit Flags (enables) include:
- * 1. FW Faults
- * 2. Adapter Reset issued by driver
- * 3. TMs
- * 4. Device Remove Event sent by FW
- */
-
-struct SL_WH_MASTER_TRIGGER_T {
-	uint32_t MasterData;
-};
-
-/**
- * struct SL_WH_EVENT_TRIGGER_T -  Definition of an event trigger element
- * @EventValue: Event Code to trigger on
- * @LogEntryQualifier: Type of FW event that logged (Log Entry Added Event only)
- *
- * Defines an event that should induce a DIAG_TRIGGER driver event if observed.
- */
-struct SL_WH_EVENT_TRIGGER_T {
-	uint16_t EventValue;
-	uint16_t LogEntryQualifier;
-};
-
-/**
- * struct SL_WH_EVENT_TRIGGERS_T -  Structure passed to/from sysfs containing a
- *    list of Event Triggers to be monitored for.
- * @ValidEntries: Number of _SL_WH_EVENT_TRIGGER_T structures contained in this
- *    structure.
- * @EventTriggerEntry: List of Event trigger elements.
- *
- * This binary structure is transferred via sysfs to get/set Event Triggers
- * in the Linux Driver.
- */
-
-struct SL_WH_EVENT_TRIGGERS_T {
-	uint32_t ValidEntries;
-	struct SL_WH_EVENT_TRIGGER_T EventTriggerEntry[NUM_VALID_ENTRIES];
-};
-
-/**
- * struct SL_WH_SCSI_TRIGGER_T -  Definition of a SCSI trigger element
- * @ASCQ: Additional Sense Code Qualifier.  Can be specific or 0xFF for
- *     wildcard.
- * @ASC: Additional Sense Code.  Can be specific or 0xFF for wildcard
- * @SenseKey: SCSI Sense Key
- *
- * Defines a sense key (single or many variants) that should induce a
- * DIAG_TRIGGER driver event if observed.
- */
-struct SL_WH_SCSI_TRIGGER_T {
-	U8 ASCQ;
-	U8 ASC;
-	U8 SenseKey;
-	U8 Reserved;
-};
-
-/**
- * struct SL_WH_SCSI_TRIGGERS_T -  Structure passed to/from sysfs containing a
- *    list of SCSI sense codes that should trigger a DIAG_SERVICE event when
- *    observed.
- * @ValidEntries: Number of _SL_WH_SCSI_TRIGGER_T structures contained in this
- *    structure.
- * @SCSITriggerEntry: List of SCSI Sense Code trigger elements.
- *
- * This binary structure is transferred via sysfs to get/set SCSI Sense Code
- * Triggers in the Linux Driver.
- */
-struct SL_WH_SCSI_TRIGGERS_T {
-	uint32_t ValidEntries;
-	struct SL_WH_SCSI_TRIGGER_T SCSITriggerEntry[NUM_VALID_ENTRIES];
-};
-
-/**
- * struct SL_WH_MPI_TRIGGER_T -  Definition of an MPI trigger element
- * @IOCStatus: MPI IOCStatus
- * @IocLogInfo: MPI IocLogInfo.  Can be specific or 0xFFFFFFFF for wildcard
- *
- * Defines a MPI IOCStatus/IocLogInfo pair that should induce a DIAG_TRIGGER
- * driver event if observed.
- */
-struct SL_WH_MPI_TRIGGER_T {
-	uint16_t IOCStatus;
-	uint16_t Reserved;
-	uint32_t IocLogInfo;
-};
-
-/**
- * struct SL_WH_MPI_TRIGGERS_T -  Structure passed to/from sysfs containing a
- *    list of MPI IOCStatus/IocLogInfo pairs that should trigger a DIAG_SERVICE
- *    event when observed.
- * @ValidEntries: Number of _SL_WH_MPI_TRIGGER_T structures contained in this
- *    structure.
- * @MPITriggerEntry: List of MPI IOCStatus/IocLogInfo trigger elements.
- *
- * This binary structure is transferred via sysfs to get/set MPI Error Triggers
- * in the Linux Driver.
- */
-struct SL_WH_MPI_TRIGGERS_T {
-	uint32_t ValidEntries;
-	struct SL_WH_MPI_TRIGGER_T MPITriggerEntry[NUM_VALID_ENTRIES];
-};
-
-/**
- * struct SL_WH_TRIGGERS_EVENT_DATA_T -  event data for trigger
- * @trigger_type: trigger type (see MPT3SAS_TRIGGER_XXXX)
- * @u: trigger condition that caused trigger to be sent
- */
-struct SL_WH_TRIGGERS_EVENT_DATA_T {
-	uint32_t trigger_type;
-	union {
-		struct SL_WH_MASTER_TRIGGER_T master;
-		struct SL_WH_EVENT_TRIGGER_T event;
-		struct SL_WH_SCSI_TRIGGER_T scsi;
-		struct SL_WH_MPI_TRIGGER_T mpi;
-	} u;
-};
-#endif /* MPT3SAS_TRIGGER_DIAG_H_INCLUDED */
+/*
+ * This is the Fusion MPT base driver providing common API layer interface
+ * to set Diagnostic triggers for MPT (Message Passing Technology) based 
+ * controllers
+ *
+ * This code is based on drivers/scsi/mpt3sas/mpt3sas_base.h
+ * Copyright (C) 2013-2018  LSI Corporation
+ * Copyright (C) 2013-2018  Avago Technologies
+ * Copyright (C) 2013-2018  Broadcom Inc.
+ *  (mailto:MPT-FusionLinux.pdl@broadcom.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * NO WARRANTY
+ * THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+ * LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+ * solely responsible for determining the appropriateness of using and
+ * distributing the Program and assumes all risks associated with its
+ * exercise of rights under this Agreement, including but not limited to
+ * the risks and costs of program errors, damage to or loss of data,
+ * programs or equipment, and unavailability or interruption of operations.
+
+ * DISCLAIMER OF LIABILITY
+ * NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+ * HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
+ * USA.
+ */
+  /* Diagnostic Trigger Configuration Data Structures */
+
+#ifndef MPT3SAS_TRIGGER_DIAG_H_INCLUDED
+#define MPT3SAS_TRIGGER_DIAG_H_INCLUDED
+
+/* limitation on number of entries */
+#define NUM_VALID_ENTRIES               (20)
+
+/* trigger types */
+#define MPT3SAS_TRIGGER_MASTER          (1)
+#define MPT3SAS_TRIGGER_EVENT           (2)
+#define MPT3SAS_TRIGGER_SCSI            (3)
+#define MPT3SAS_TRIGGER_MPI             (4)
+
+/* trigger names */
+#define MASTER_TRIGGER_FILE_NAME        "diag_trigger_master"
+#define EVENT_TRIGGERS_FILE_NAME        "diag_trigger_event"
+#define SCSI_TRIGGERS_FILE_NAME         "diag_trigger_scsi"
+#define MPI_TRIGGER_FILE_NAME           "diag_trigger_mpi"
+
+/* master trigger bitmask */
+#define MASTER_TRIGGER_FW_FAULT         (0x00000001)
+#define MASTER_TRIGGER_ADAPTER_RESET    (0x00000002)
+#define MASTER_TRIGGER_TASK_MANAGMENT   (0x00000004)
+#define MASTER_TRIGGER_DEVICE_REMOVAL   (0x00000008)
+
+/* fake firmware event for tigger */
+#define MPI3_EVENT_DIAGNOSTIC_TRIGGER_FIRED	(0x6E)
+
+/**
+ * MasterTrigger is a single U32 passed to/from sysfs.
+ *
+ * Bit Flags (enables) include:
+ * 1. FW Faults
+ * 2. Adapter Reset issued by driver
+ * 3. TMs
+ * 4. Device Remove Event sent by FW
+ */
+
+struct SL_WH_MASTER_TRIGGER_T {
+	uint32_t MasterData;
+};
+
+/**
+ * struct SL_WH_EVENT_TRIGGER_T -  Definition of an event trigger element
+ * @EventValue: Event Code to trigger on
+ * @LogEntryQualifier: Type of FW event that logged (Log Entry Added Event only)
+ *
+ * Defines an event that should induce a DIAG_TRIGGER driver event if observed.
+ */
+struct SL_WH_EVENT_TRIGGER_T {
+	uint16_t EventValue;
+	uint16_t LogEntryQualifier;
+};
+
+/**
+ * struct SL_WH_EVENT_TRIGGERS_T -  Structure passed to/from sysfs containing a
+ *    list of Event Triggers to be monitored for.
+ * @ValidEntries: Number of _SL_WH_EVENT_TRIGGER_T structures contained in this
+ *    structure.
+ * @EventTriggerEntry: List of Event trigger elements.
+ *
+ * This binary structure is transferred via sysfs to get/set Event Triggers
+ * in the Linux Driver.
+ */
+
+struct SL_WH_EVENT_TRIGGERS_T {
+	uint32_t ValidEntries;
+	struct SL_WH_EVENT_TRIGGER_T EventTriggerEntry[NUM_VALID_ENTRIES];
+};
+
+/**
+ * struct SL_WH_SCSI_TRIGGER_T -  Definition of a SCSI trigger element
+ * @ASCQ: Additional Sense Code Qualifier.  Can be specific or 0xFF for
+ *     wildcard.
+ * @ASC: Additional Sense Code.  Can be specific or 0xFF for wildcard
+ * @SenseKey: SCSI Sense Key
+ *
+ * Defines a sense key (single or many variants) that should induce a
+ * DIAG_TRIGGER driver event if observed.
+ */
+struct SL_WH_SCSI_TRIGGER_T {
+	U8 ASCQ;
+	U8 ASC;
+	U8 SenseKey;
+	U8 Reserved;
+};
+
+/**
+ * struct SL_WH_SCSI_TRIGGERS_T -  Structure passed to/from sysfs containing a
+ *    list of SCSI sense codes that should trigger a DIAG_SERVICE event when
+ *    observed.
+ * @ValidEntries: Number of _SL_WH_SCSI_TRIGGER_T structures contained in this
+ *    structure.
+ * @SCSITriggerEntry: List of SCSI Sense Code trigger elements.
+ *
+ * This binary structure is transferred via sysfs to get/set SCSI Sense Code
+ * Triggers in the Linux Driver.
+ */
+struct SL_WH_SCSI_TRIGGERS_T {
+	uint32_t ValidEntries;
+	struct SL_WH_SCSI_TRIGGER_T SCSITriggerEntry[NUM_VALID_ENTRIES];
+};
+
+/**
+ * struct SL_WH_MPI_TRIGGER_T -  Definition of an MPI trigger element
+ * @IOCStatus: MPI IOCStatus
+ * @IocLogInfo: MPI IocLogInfo.  Can be specific or 0xFFFFFFFF for wildcard
+ *
+ * Defines a MPI IOCStatus/IocLogInfo pair that should induce a DIAG_TRIGGER
+ * driver event if observed.
+ */
+struct SL_WH_MPI_TRIGGER_T {
+	uint16_t IOCStatus;
+	uint16_t Reserved;
+	uint32_t IocLogInfo;
+};
+
+/**
+ * struct SL_WH_MPI_TRIGGERS_T -  Structure passed to/from sysfs containing a
+ *    list of MPI IOCStatus/IocLogInfo pairs that should trigger a DIAG_SERVICE
+ *    event when observed.
+ * @ValidEntries: Number of _SL_WH_MPI_TRIGGER_T structures contained in this
+ *    structure.
+ * @MPITriggerEntry: List of MPI IOCStatus/IocLogInfo trigger elements.
+ *
+ * This binary structure is transferred via sysfs to get/set MPI Error Triggers
+ * in the Linux Driver.
+ */
+struct SL_WH_MPI_TRIGGERS_T {
+	uint32_t ValidEntries;
+	struct SL_WH_MPI_TRIGGER_T MPITriggerEntry[NUM_VALID_ENTRIES];
+};
+
+/**
+ * struct SL_WH_TRIGGERS_EVENT_DATA_T -  event data for trigger
+ * @trigger_type: trigger type (see MPT3SAS_TRIGGER_XXXX)
+ * @u: trigger condition that caused trigger to be sent
+ */
+struct SL_WH_TRIGGERS_EVENT_DATA_T {
+	uint32_t trigger_type;
+	union {
+		struct SL_WH_MASTER_TRIGGER_T master;
+		struct SL_WH_EVENT_TRIGGER_T event;
+		struct SL_WH_SCSI_TRIGGER_T scsi;
+		struct SL_WH_MPI_TRIGGER_T mpi;
+	} u;
+};
+#endif /* MPT3SAS_TRIGGER_DIAG_H_INCLUDED */
diff --git a/drivers/scsi/mpt3sas/mpt3sas_trigger_pages.h b/drivers/scsi/mpt3sas/mpt3sas_trigger_pages.h
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c b/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/scripts/collect_ioc_dump_on_reset.sh b/drivers/scsi/mpt3sas/scripts/collect_ioc_dump_on_reset.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/scripts/expander_attributes.sh b/drivers/scsi/mpt3sas/scripts/expander_attributes.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/scripts/get_sas_address.sh b/drivers/scsi/mpt3sas/scripts/get_sas_address.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/scripts/hba_properties.sh b/drivers/scsi/mpt3sas/scripts/hba_properties.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/scripts/sas_device_attributes.sh b/drivers/scsi/mpt3sas/scripts/sas_device_attributes.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/scripts/sas_dump_rhel5.sh b/drivers/scsi/mpt3sas/scripts/sas_dump_rhel5.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/scripts/sas_dump_sles10.sh b/drivers/scsi/mpt3sas/scripts/sas_dump_sles10.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/scripts/sdev_attributes.sh b/drivers/scsi/mpt3sas/scripts/sdev_attributes.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/scripts/set_affinity.sh b/drivers/scsi/mpt3sas/scripts/set_affinity.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/scripts/set_device_queue_depth.sh b/drivers/scsi/mpt3sas/scripts/set_device_queue_depth.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/scripts/set_device_timeout.sh b/drivers/scsi/mpt3sas/scripts/set_device_timeout.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/scripts/set_driver_debug.pl b/drivers/scsi/mpt3sas/scripts/set_driver_debug.pl
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/scripts/set_logging_level.sh b/drivers/scsi/mpt3sas/scripts/set_logging_level.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/scripts/shost_attributes.pl b/drivers/scsi/mpt3sas/scripts/shost_attributes.pl
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/scripts/shost_attributes.sh b/drivers/scsi/mpt3sas/scripts/shost_attributes.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/scripts/statistics.sh b/drivers/scsi/mpt3sas/scripts/statistics.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/scripts/task_management.sh b/drivers/scsi/mpt3sas/scripts/task_management.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/trace_buffer/kill_tb.sh b/drivers/scsi/mpt3sas/trace_buffer/kill_tb.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/trace_buffer/pull_trace_buffer.sh b/drivers/scsi/mpt3sas/trace_buffer/pull_trace_buffer.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/trace_buffer/start_tb.sh b/drivers/scsi/mpt3sas/trace_buffer/start_tb.sh
old mode 100755
new mode 100644
diff --git a/drivers/scsi/mpt3sas/uload.sh b/drivers/scsi/mpt3sas/uload.sh
old mode 100755
new mode 100644
diff --git a/drivers/soc/bcm/brcmstb/pm/s2-arm.S b/drivers/soc/bcm/brcmstb/pm/s2-arm.S
deleted file mode 100644
index 5f0c4a8ae9df149d631c8ddadcb7dca6997e801b..0000000000000000000000000000000000000000
--- a/drivers/soc/bcm/brcmstb/pm/s2-arm.S
+++ /dev/null
@@ -1,68 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright © 2014-2017 Broadcom
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#include "pm.h"
-
-	.text
-	.align	3
-
-#define AON_CTRL_REG		r10
-#define DDR_PHY_STATUS_REG	r11
-
-/*
- * r0: AON_CTRL base address
- * r1: DDRY PHY PLL status register address
- */
-ENTRY(brcmstb_pm_do_s2)
-	stmfd	sp!, {r4-r11, lr}
-	mov	AON_CTRL_REG, r0
-	mov	DDR_PHY_STATUS_REG, r1
-
-	/* Flush memory transactions */
-	dsb
-
-	/* Cache DDR_PHY_STATUS_REG translation */
-	ldr	r0, [DDR_PHY_STATUS_REG]
-
-	/* power down request */
-	ldr	r0, =PM_S2_COMMAND
-	ldr	r1, =0
-	str	r1, [AON_CTRL_REG, #AON_CTRL_PM_CTRL]
-	ldr	r1, [AON_CTRL_REG, #AON_CTRL_PM_CTRL]
-	str	r0, [AON_CTRL_REG, #AON_CTRL_PM_CTRL]
-	ldr	r0, [AON_CTRL_REG, #AON_CTRL_PM_CTRL]
-
-	/* Wait for interrupt */
-	wfi
-	nop
-
-	/* Bring MEMC back up */
-1:	ldr	r0, [DDR_PHY_STATUS_REG]
-	ands	r0, #1
-	beq	1b
-
-	/* Power-up handshake */
-	ldr	r0, =1
-	str	r0, [AON_CTRL_REG, #AON_CTRL_HOST_MISC_CMDS]
-	ldr	r0, [AON_CTRL_REG, #AON_CTRL_HOST_MISC_CMDS]
-
-	ldr	r0, =0
-	str	r0, [AON_CTRL_REG, #AON_CTRL_PM_CTRL]
-	ldr	r0, [AON_CTRL_REG, #AON_CTRL_PM_CTRL]
-
-	/* Return to caller */
-	ldr	r0, =0
-	ldmfd	sp!, {r4-r11, pc}
-
-	ENDPROC(brcmstb_pm_do_s2)
-
-	/* Place literal pool here */
-	.ltorg
-
-ENTRY(brcmstb_pm_do_s2_sz)
-	.word   . - brcmstb_pm_do_s2
diff --git a/drivers/soc/bcm/brcmstb/pm/s2-mips.S b/drivers/soc/bcm/brcmstb/pm/s2-mips.S
deleted file mode 100644
index 2a26a94eb9bb05a9c3a0d95bd7683fa18383a495..0000000000000000000000000000000000000000
--- a/drivers/soc/bcm/brcmstb/pm/s2-mips.S
+++ /dev/null
@@ -1,192 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2016 Broadcom Corporation
- */
-
-#include <asm/asm.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-#include <asm/stackframe.h>
-
-#include "pm.h"
-
-	.text
-	.set	noreorder
-	.align	5
-
-/*
- * a0: u32 params array
- */
-LEAF(brcm_pm_do_s2)
-
-	subu	sp, 64
-	sw	ra, 0(sp)
-	sw	s0, 4(sp)
-	sw	s1, 8(sp)
-	sw	s2, 12(sp)
-	sw	s3, 16(sp)
-	sw	s4, 20(sp)
-	sw	s5, 24(sp)
-	sw	s6, 28(sp)
-	sw	s7, 32(sp)
-
-	/*
-	 * Dereference the params array
-	 * s0: AON_CTRL base register
-	 * s1: DDR_PHY base register
-	 * s2: TIMERS base register
-	 * s3: I-Cache line size
-	 * s4: Restart vector address
-	 * s5: Restart vector size
-	 */
-	move	t0, a0
-
-	lw	s0, 0(t0)
-	lw	s1, 4(t0)
-	lw	s2, 8(t0)
-	lw	s3, 12(t0)
-	lw	s4, 16(t0)
-	lw	s5, 20(t0)
-
-	/* Lock this asm section into the I-cache */
-	addiu	t1, s3, -1
-	not	t1
-
-	la	t0, brcm_pm_do_s2
-	and	t0, t1
-
-	la	t2, asm_end
-	and	t2, t1
-
-1:	cache	0x1c, 0(t0)
-	bne	t0, t2, 1b
-	addu	t0, s3
-
-	/* Lock the interrupt vector into the I-cache */
-	move	t0, zero
-
-2:	move	t1, s4
-	cache 	0x1c, 0(t1)
-	addu	t1, s3
-	addu	t0, s3
-	ble	t0, s5, 2b
-	nop
-
-	sync
-
-	/* Power down request */
-	li	t0, PM_S2_COMMAND
-	sw	zero, AON_CTRL_PM_CTRL(s0)
-	lw	zero, AON_CTRL_PM_CTRL(s0)
-	sw	t0, AON_CTRL_PM_CTRL(s0)
-	lw	t0, AON_CTRL_PM_CTRL(s0)
-
-	/* Enable CP0 interrupt 2 and wait for interrupt */
-	mfc0	t0, CP0_STATUS
-	/* Save cp0 sr for restoring later */
-	move	s6, t0
-
-	li	t1, ~(ST0_IM | ST0_IE)
-	and	t0, t1
-	ori	t0, STATUSF_IP2
-	mtc0	t0, CP0_STATUS
-	nop
-	nop
-	nop
-	ori	t0, ST0_IE
-	mtc0	t0, CP0_STATUS
-
-	/* Wait for interrupt */
-	wait
-	nop
-
-	/* Wait for memc0 */
-1:	lw	t0, DDR40_PHY_CONTROL_REGS_0_PLL_STATUS(s1)
-	andi	t0, 1
-	beqz	t0, 1b
-	nop
-
-	/* 1ms delay needed for stable recovery */
-	/* Use TIMER1 to count 1 ms */
-	li	t0, RESET_TIMER
-	sw	t0, TIMER_TIMER1_CTRL(s2)
-	lw	t0, TIMER_TIMER1_CTRL(s2)
-
-	li	t0, START_TIMER
-	sw	t0, TIMER_TIMER1_CTRL(s2)
-	lw	t0, TIMER_TIMER1_CTRL(s2)
-
-	/* Prepare delay */
-	li	t0, TIMER_MASK
-	lw	t1, TIMER_TIMER1_STAT(s2)
-	and	t1, t0
-	/* 1ms delay */
-	addi	t1, 27000
-
-	/* Wait for the timer value to exceed t1 */
-1:	lw	t0, TIMER_TIMER1_STAT(s2)
-	sgtu	t2, t1, t0
-	bnez	t2, 1b
-	nop
-
-	/* Power back up */
-	li	t1, 1
-	sw	t1, AON_CTRL_HOST_MISC_CMDS(s0)
-	lw	t1, AON_CTRL_HOST_MISC_CMDS(s0)
-
-	sw	zero, AON_CTRL_PM_CTRL(s0)
-	lw	zero, AON_CTRL_PM_CTRL(s0)
-
-	/* Unlock I-cache */
-	addiu	t1, s3, -1
-	not	t1
-
-	la	t0, brcm_pm_do_s2
-	and 	t0, t1
-
-	la	t2, asm_end
-	and	t2, t1
-
-1:	cache	0x00, 0(t0)
-	bne	t0, t2, 1b
-	addu	t0, s3
-
-	/* Unlock interrupt vector */
-	move	t0, zero
-
-2:	move	t1, s4
-	cache 	0x00, 0(t1)
-	addu	t1, s3
-	addu	t0, s3
-	ble	t0, s5, 2b
-	nop
-
-	/* Restore cp0 sr */
-	sync
-	nop
-	mtc0	s6, CP0_STATUS
-	nop
-
-	/* Set return value to success */
-	li	v0, 0
-
-	/* Return to caller */
-	lw	s7, 32(sp)
-	lw	s6, 28(sp)
-	lw	s5, 24(sp)
-	lw	s4, 20(sp)
-	lw	s3, 16(sp)
-	lw	s2, 12(sp)
-	lw	s1, 8(sp)
-	lw	s0, 4(sp)
-	lw	ra, 0(sp)
-	addiu	sp, 64
-
-	jr ra
-	nop
-END(brcm_pm_do_s2)
-
-	.globl asm_end
-asm_end:
-	nop
-
diff --git a/drivers/soc/bcm/brcmstb/pm/s3-mips.S b/drivers/soc/bcm/brcmstb/pm/s3-mips.S
deleted file mode 100644
index ecfcfd34c2f81f1402e7b814cf8ad59fed814b8f..0000000000000000000000000000000000000000
--- a/drivers/soc/bcm/brcmstb/pm/s3-mips.S
+++ /dev/null
@@ -1,138 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2016 Broadcom Corporation
- */
-
-#include <asm/asm.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-#include <asm/bmips.h>
-
-#include "pm.h"
-
-	.text
-	.set		noreorder
-	.align		5
-	.global		s3_reentry
-
-/*
- * a0: AON_CTRL base register
- * a1: D-Cache line size
- */
-LEAF(brcm_pm_do_s3)
-
-	/* Get the address of s3_context */
-	la	t0, gp_regs
-	sw	ra, 0(t0)
-	sw	s0, 4(t0)
-	sw	s1, 8(t0)
-	sw	s2, 12(t0)
-	sw	s3, 16(t0)
-	sw	s4, 20(t0)
-	sw	s5, 24(t0)
-	sw	s6, 28(t0)
-	sw	s7, 32(t0)
-	sw	gp, 36(t0)
-	sw	sp, 40(t0)
-	sw	fp, 44(t0)
-
-	/* Save CP0 Status */
-	mfc0	t1, CP0_STATUS
-	sw	t1, 48(t0)
-
-	/* Write-back gp registers - cache will be gone */
-	addiu	t1, a1, -1
-	not	t1
-	and	t0, t1
-
-	/* Flush at least 64 bytes */
-	addiu	t2, t0, 64
-	and	t2, t1
-
-1:	cache	0x17, 0(t0)
-	bne	t0, t2, 1b
-	addu	t0, a1
-
-	/* Drop to deep standby */
-	li	t1, PM_WARM_CONFIG
-	sw	zero, AON_CTRL_PM_CTRL(a0)
-	lw	zero, AON_CTRL_PM_CTRL(a0)
-	sw	t1, AON_CTRL_PM_CTRL(a0)
-	lw	t1, AON_CTRL_PM_CTRL(a0)
-
-	li	t1, (PM_WARM_CONFIG | PM_PWR_DOWN)
-	sw	t1, AON_CTRL_PM_CTRL(a0)
-	lw	t1, AON_CTRL_PM_CTRL(a0)
-
-	/* Enable CP0 interrupt 2 and wait for interrupt */
-	mfc0	t0, CP0_STATUS
-
-	li	t1, ~(ST0_IM | ST0_IE)
-	and	t0, t1
-	ori	t0, STATUSF_IP2
-	mtc0	t0, CP0_STATUS
-	nop
-	nop
-	nop
-	ori	t0, ST0_IE
-	mtc0	t0, CP0_STATUS
-
-        /* Wait for interrupt */
-        wait
-        nop
-
-s3_reentry:
-
-	/* Clear call/return stack */
-	li	t0, (0x06 << 16)
-	mtc0	t0, $22, 2
-	ssnop
-	ssnop
-	ssnop
-
-	/* Clear jump target buffer */
-	li	t0, (0x04 << 16)
-	mtc0	t0, $22, 2
-	ssnop
-	ssnop
-	ssnop
-
-	sync
-	nop
-
-	/* Setup mmu defaults */
-	mtc0	zero, CP0_WIRED
-	mtc0	zero, CP0_ENTRYHI
-	li	k0, PM_DEFAULT_MASK
-	mtc0	k0, CP0_PAGEMASK
-
-	li	sp, BMIPS_WARM_RESTART_VEC
-	la	k0, plat_wired_tlb_setup
-	jalr	k0
-	nop
-
-	/* Restore general purpose registers */
-	la	t0, gp_regs
-	lw	fp, 44(t0)
-	lw	sp, 40(t0)
-	lw	gp, 36(t0)
-	lw	s7, 32(t0)
-	lw	s6, 28(t0)
-	lw	s5, 24(t0)
-	lw	s4, 20(t0)
-	lw	s3, 16(t0)
-	lw	s2, 12(t0)
-	lw	s1, 8(t0)
-	lw	s0, 4(t0)
-	lw	ra, 0(t0)
-
-	/* Restore CP0 status */
-	lw	t1, 48(t0)
-	mtc0	t1, CP0_STATUS
-
-	/* Return to caller */
-	li	v0, 0
-	jr      ra
-	nop
-
-END(brcm_pm_do_s3)
diff --git a/drivers/spi/spi-s3c24xx-fiq.S b/drivers/spi/spi-s3c24xx-fiq.S
deleted file mode 100644
index 68ea12bead2278463fb0411795340661deadf911..0000000000000000000000000000000000000000
--- a/drivers/spi/spi-s3c24xx-fiq.S
+++ /dev/null
@@ -1,110 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* linux/drivers/spi/spi_s3c24xx_fiq.S
- *
- * Copyright 2009 Simtec Electronics
- *	Ben Dooks <ben@simtec.co.uk>
- *
- * S3C24XX SPI - FIQ pseudo-DMA transfer code
-*/
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#include <mach/map.h>
-#include <mach/regs-irq.h>
-#include <plat/regs-spi.h>
-
-#include "spi-s3c24xx-fiq.h"
-
-	.text
-
-	@ entry to these routines is as follows, with the register names
-	@ defined in fiq.h so that they can be shared with the C files which
-	@ setup the calling registers.
-	@
-	@ fiq_rirq	The base of the IRQ registers to find S3C2410_SRCPND
-	@ fiq_rtmp	Temporary register to hold tx/rx data
-	@ fiq_rspi	The base of the SPI register block
-	@ fiq_rtx	The tx buffer pointer
-	@ fiq_rrx	The rx buffer pointer
-	@ fiq_rcount	The number of bytes to move
-
-	@ each entry starts with a word entry of how long it is
-	@ and an offset to the irq acknowledgment word
-
-ENTRY(s3c24xx_spi_fiq_rx)
-	.word	fiq_rx_end - fiq_rx_start
-	.word	fiq_rx_irq_ack - fiq_rx_start
-fiq_rx_start:
-	ldr	fiq_rtmp, fiq_rx_irq_ack
-	str	fiq_rtmp, [ fiq_rirq, # S3C2410_SRCPND - S3C24XX_VA_IRQ ]
-
-	ldrb	fiq_rtmp, [ fiq_rspi, #  S3C2410_SPRDAT ]
-	strb	fiq_rtmp, [ fiq_rrx ], #1
-
-	mov	fiq_rtmp, #0xff
-	strb	fiq_rtmp, [ fiq_rspi, # S3C2410_SPTDAT ]
-
-	subs	fiq_rcount, fiq_rcount, #1
-	subsne	pc, lr, #4		@@ return, still have work to do
-
-	@@ set IRQ controller so that next op will trigger IRQ
-	mov	fiq_rtmp, #0
-	str	fiq_rtmp, [ fiq_rirq, # S3C2410_INTMOD  - S3C24XX_VA_IRQ ]
-	subs	pc, lr, #4
-
-fiq_rx_irq_ack:
-	.word	0
-fiq_rx_end:
-
-ENTRY(s3c24xx_spi_fiq_txrx)
-	.word	fiq_txrx_end - fiq_txrx_start
-	.word	fiq_txrx_irq_ack - fiq_txrx_start
-fiq_txrx_start:
-
-	ldrb	fiq_rtmp, [ fiq_rspi, #  S3C2410_SPRDAT ]
-	strb	fiq_rtmp, [ fiq_rrx ], #1
-
-	ldr	fiq_rtmp, fiq_txrx_irq_ack
-	str	fiq_rtmp, [ fiq_rirq, # S3C2410_SRCPND - S3C24XX_VA_IRQ ]
-
-	ldrb	fiq_rtmp, [ fiq_rtx ], #1
-	strb	fiq_rtmp, [ fiq_rspi, # S3C2410_SPTDAT ]
-
-	subs	fiq_rcount, fiq_rcount, #1
-	subsne	pc, lr, #4		@@ return, still have work to do
-
-	mov	fiq_rtmp, #0
-	str	fiq_rtmp, [ fiq_rirq, # S3C2410_INTMOD  - S3C24XX_VA_IRQ ]
-	subs	pc, lr, #4
-
-fiq_txrx_irq_ack:
-	.word	0
-
-fiq_txrx_end:
-
-ENTRY(s3c24xx_spi_fiq_tx)
-	.word	fiq_tx_end - fiq_tx_start
-	.word	fiq_tx_irq_ack - fiq_tx_start
-fiq_tx_start:
-	ldrb	fiq_rtmp, [ fiq_rspi, #  S3C2410_SPRDAT ]
-
-	ldr	fiq_rtmp, fiq_tx_irq_ack
-	str	fiq_rtmp, [ fiq_rirq, # S3C2410_SRCPND - S3C24XX_VA_IRQ ]
-
-	ldrb	fiq_rtmp, [ fiq_rtx ], #1
-	strb	fiq_rtmp, [ fiq_rspi, # S3C2410_SPTDAT ]
-
-	subs	fiq_rcount, fiq_rcount, #1
-	subsne	pc, lr, #4		@@ return, still have work to do
-
-	mov	fiq_rtmp, #0
-	str	fiq_rtmp, [ fiq_rirq, # S3C2410_INTMOD  - S3C24XX_VA_IRQ ]
-	subs	pc, lr, #4
-
-fiq_tx_irq_ack:
-	.word	0
-
-fiq_tx_end:
-
-	.end
diff --git a/drivers/staging/comedi/drivers/ni_routing/tools/convert_csv_to_c.py b/drivers/staging/comedi/drivers/ni_routing/tools/convert_csv_to_c.py
old mode 100755
new mode 100644
diff --git a/drivers/staging/comedi/drivers/ni_routing/tools/convert_py_to_csv.py b/drivers/staging/comedi/drivers/ni_routing/tools/convert_py_to_csv.py
old mode 100755
new mode 100644
diff --git a/drivers/staging/comedi/drivers/ni_routing/tools/make_blank_csv.py b/drivers/staging/comedi/drivers/ni_routing/tools/make_blank_csv.py
old mode 100755
new mode 100644
diff --git a/drivers/staging/greybus/tools/lbtest b/drivers/staging/greybus/tools/lbtest
old mode 100755
new mode 100644
diff --git a/drivers/watchdog/octeon-wdt-nmi.S b/drivers/watchdog/octeon-wdt-nmi.S
deleted file mode 100644
index 97f6eb7b5a8e04f209beb0cbde29b831863e6cd0..0000000000000000000000000000000000000000
--- a/drivers/watchdog/octeon-wdt-nmi.S
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2007-2017 Cavium, Inc.
- */
-#include <asm/asm.h>
-#include <asm/regdef.h>
-
-#define CVMSEG_BASE	-32768
-#define CVMSEG_SIZE	6912
-#define SAVE_REG(r)	sd $r, CVMSEG_BASE + CVMSEG_SIZE - ((32 - r) * 8)($0)
-
-        NESTED(octeon_wdt_nmi_stage2, 0, sp)
-	.set 	push
-	.set 	noreorder
-	.set 	noat
-	/* Clear Dcache so cvmseg works right. */
-	cache	1,0($0)
-	/* Use K0 to do a read/modify/write of CVMMEMCTL */
-	dmfc0	k0, $11, 7
-	/* Clear out the size of CVMSEG	*/
-	dins	k0, $0, 0, 6
-	/* Set CVMSEG to its largest value */
-	ori	k0, k0, 0x1c0 | 54
-	/* Store the CVMMEMCTL value */
-	dmtc0	k0, $11, 7
-	/*
-	 * Restore K0 from the debug scratch register, it was saved in
-	 * the boot-vector code.
-	 */
-	dmfc0	k0, $31
-
-	/*
-	 * Save all registers to the top CVMSEG. This shouldn't
-	 * corrupt any state used by the kernel. Also all registers
-	 * should have the value right before the NMI.
-	 */
-	SAVE_REG(0)
-	SAVE_REG(1)
-	SAVE_REG(2)
-	SAVE_REG(3)
-	SAVE_REG(4)
-	SAVE_REG(5)
-	SAVE_REG(6)
-	SAVE_REG(7)
-	SAVE_REG(8)
-	SAVE_REG(9)
-	SAVE_REG(10)
-	SAVE_REG(11)
-	SAVE_REG(12)
-	SAVE_REG(13)
-	SAVE_REG(14)
-	SAVE_REG(15)
-	SAVE_REG(16)
-	SAVE_REG(17)
-	SAVE_REG(18)
-	SAVE_REG(19)
-	SAVE_REG(20)
-	SAVE_REG(21)
-	SAVE_REG(22)
-	SAVE_REG(23)
-	SAVE_REG(24)
-	SAVE_REG(25)
-	SAVE_REG(26)
-	SAVE_REG(27)
-	SAVE_REG(28)
-	SAVE_REG(29)
-	SAVE_REG(30)
-	SAVE_REG(31)
-	/* Write zero to all CVMSEG locations per Core-15169 */
-	dli	a0, CVMSEG_SIZE - (33 * 8)
-1:	sd	zero, CVMSEG_BASE(a0)
-	daddiu	a0, a0, -8
-	bgez	a0, 1b
-	nop
-	/* Set the stack to begin right below the registers */
-	dli	sp, CVMSEG_BASE + CVMSEG_SIZE - (32 * 8)
-	/* Load the address of the third stage handler */
-	dla	$25, octeon_wdt_nmi_stage3
-	/* Call the third stage handler */
-	jal	$25
-	/* a0 is the address of the saved registers */
-	 move	a0, sp
-	/* Loop forvever if we get here. */
-2:	b	2b
-	nop
-	.set pop
-	END(octeon_wdt_nmi_stage2)
diff --git a/include/dt-bindings/input/linux-event-codes.h b/include/dt-bindings/input/linux-event-codes.h
deleted file mode 120000
index 693bbcd2670a41ce72418214217c613d8ad52037..0000000000000000000000000000000000000000
--- a/include/dt-bindings/input/linux-event-codes.h
+++ /dev/null
@@ -1 +0,0 @@
-../../uapi/linux/input-event-codes.h
\ No newline at end of file
diff --git a/include/dt-bindings/input/linux-event-codes.h b/include/dt-bindings/input/linux-event-codes.h
new file mode 100644
index 0000000000000000000000000000000000000000..693bbcd2670a41ce72418214217c613d8ad52037
--- /dev/null
+++ b/include/dt-bindings/input/linux-event-codes.h
@@ -0,0 +1 @@
+../../uapi/linux/input-event-codes.h
\ No newline at end of file
diff --git a/include/linux/sli.h b/include/linux/sli.h
old mode 100755
new mode 100644
diff --git a/include/soc/arc/aux.h b/include/soc/arc/aux.h
deleted file mode 100644
index e223c4ffa153a37eef993160905c2e0efcd39bfb..0000000000000000000000000000000000000000
--- a/include/soc/arc/aux.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2016-2017 Synopsys, Inc. (www.synopsys.com)
- */
-
-#ifndef __SOC_ARC_AUX_H__
-#define __SOC_ARC_AUX_H__
-
-#ifdef CONFIG_ARC
-
-#define read_aux_reg(r)		__builtin_arc_lr(r)
-
-/* gcc builtin sr needs reg param to be long immediate */
-#define write_aux_reg(r, v)	__builtin_arc_sr((unsigned int)(v), r)
-
-#else	/* !CONFIG_ARC */
-
-static inline int read_aux_reg(u32 r)
-{
-	return 0;
-}
-
-/*
- * function helps elide unused variable warning
- * see: http://lists.infradead.org/pipermail/linux-snps-arc/2016-November/001748.html
- */
-static inline void write_aux_reg(u32 r, u32 v)
-{
-	;
-}
-
-#endif
-
-#define READ_BCR(reg, into)				\
-{							\
-	unsigned int tmp;				\
-	tmp = read_aux_reg(reg);			\
-	if (sizeof(tmp) == sizeof(into)) {		\
-		into = *((typeof(into) *)&tmp);		\
-	} else {					\
-		extern void bogus_undefined(void);	\
-		bogus_undefined();			\
-	}						\
-}
-
-#define WRITE_AUX(reg, into)				\
-{							\
-	unsigned int tmp;				\
-	if (sizeof(tmp) == sizeof(into)) {		\
-		tmp = (*(unsigned int *)&(into));	\
-		write_aux_reg(reg, tmp);		\
-	} else  {					\
-		extern void bogus_undefined(void);	\
-		bogus_undefined();			\
-	}						\
-}
-
-
-#endif
diff --git a/include/uapi/linux/netfilter/xt_CONNMARK.h b/include/uapi/linux/netfilter/xt_CONNMARK.h
deleted file mode 100644
index 36cc956ead1ae83177c9fb28313a8df6e852f5e4..0000000000000000000000000000000000000000
--- a/include/uapi/linux/netfilter/xt_CONNMARK.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _XT_CONNMARK_H_target
-#define _XT_CONNMARK_H_target
-
-#include <linux/netfilter/xt_connmark.h>
-
-#endif /*_XT_CONNMARK_H_target*/
diff --git a/include/uapi/linux/netfilter/xt_DSCP.h b/include/uapi/linux/netfilter/xt_DSCP.h
deleted file mode 100644
index 223d635e8b6f77e467c615391b725901e18618ba..0000000000000000000000000000000000000000
--- a/include/uapi/linux/netfilter/xt_DSCP.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/* x_tables module for setting the IPv4/IPv6 DSCP field
- *
- * (C) 2002 Harald Welte <laforge@gnumonks.org>
- * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
- * This software is distributed under GNU GPL v2, 1991
- *
- * See RFC2474 for a description of the DSCP field within the IP Header.
- *
- * xt_DSCP.h,v 1.7 2002/03/14 12:03:13 laforge Exp
-*/
-#ifndef _XT_DSCP_TARGET_H
-#define _XT_DSCP_TARGET_H
-#include <linux/netfilter/xt_dscp.h>
-#include <linux/types.h>
-
-/* target info */
-struct xt_DSCP_info {
-	__u8 dscp;
-};
-
-struct xt_tos_target_info {
-	__u8 tos_value;
-	__u8 tos_mask;
-};
-
-#endif /* _XT_DSCP_TARGET_H */
diff --git a/include/uapi/linux/netfilter/xt_MARK.h b/include/uapi/linux/netfilter/xt_MARK.h
deleted file mode 100644
index f1fe2b4be9332cfc455d1b485eb70ad05859c499..0000000000000000000000000000000000000000
--- a/include/uapi/linux/netfilter/xt_MARK.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _XT_MARK_H_target
-#define _XT_MARK_H_target
-
-#include <linux/netfilter/xt_mark.h>
-
-#endif /*_XT_MARK_H_target */
diff --git a/include/uapi/linux/netfilter/xt_RATEEST.h b/include/uapi/linux/netfilter/xt_RATEEST.h
deleted file mode 100644
index 2b87a71e6266e477853cb38693fdf15dcbbd7a07..0000000000000000000000000000000000000000
--- a/include/uapi/linux/netfilter/xt_RATEEST.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _XT_RATEEST_TARGET_H
-#define _XT_RATEEST_TARGET_H
-
-#include <linux/types.h>
-#include <linux/if.h>
-
-struct xt_rateest_target_info {
-	char			name[IFNAMSIZ];
-	__s8			interval;
-	__u8		ewma_log;
-
-	/* Used internally by the kernel */
-	struct xt_rateest	*est __attribute__((aligned(8)));
-};
-
-#endif /* _XT_RATEEST_TARGET_H */
diff --git a/include/uapi/linux/netfilter/xt_TCPMSS.h b/include/uapi/linux/netfilter/xt_TCPMSS.h
deleted file mode 100644
index 65ea6c9dab4b6473314727ea69b98670c17afac2..0000000000000000000000000000000000000000
--- a/include/uapi/linux/netfilter/xt_TCPMSS.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _XT_TCPMSS_H
-#define _XT_TCPMSS_H
-
-#include <linux/types.h>
-
-struct xt_tcpmss_info {
-	__u16 mss;
-};
-
-#define XT_TCPMSS_CLAMP_PMTU 0xffff
-
-#endif /* _XT_TCPMSS_H */
diff --git a/include/uapi/linux/netfilter_ipv4/ipt_ECN.h b/include/uapi/linux/netfilter_ipv4/ipt_ECN.h
deleted file mode 100644
index e3630fd045b8dd92c003c14f07845e76aeffd252..0000000000000000000000000000000000000000
--- a/include/uapi/linux/netfilter_ipv4/ipt_ECN.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/* Header file for iptables ipt_ECN target
- *
- * (C) 2002 by Harald Welte <laforge@gnumonks.org>
- *
- * This software is distributed under GNU GPL v2, 1991
- * 
- * ipt_ECN.h,v 1.3 2002/05/29 12:17:40 laforge Exp
-*/
-#ifndef _IPT_ECN_TARGET_H
-#define _IPT_ECN_TARGET_H
-
-#include <linux/types.h>
-#include <linux/netfilter/xt_DSCP.h>
-
-#define IPT_ECN_IP_MASK	(~XT_DSCP_MASK)
-
-#define IPT_ECN_OP_SET_IP	0x01	/* set ECN bits of IPv4 header */
-#define IPT_ECN_OP_SET_ECE	0x10	/* set ECE bit of TCP header */
-#define IPT_ECN_OP_SET_CWR	0x20	/* set CWR bit of TCP header */
-
-#define IPT_ECN_OP_MASK		0xce
-
-struct ipt_ECN_info {
-	__u8 operation;	/* bitset of operations */
-	__u8 ip_ect;	/* ECT codepoint of IPv4 header, pre-shifted */
-	union {
-		struct {
-			__u8 ece:1, cwr:1; /* TCP ECT bits */
-		} tcp;
-	} proto;
-};
-
-#endif /* _IPT_ECN_TARGET_H */
diff --git a/include/uapi/linux/netfilter_ipv4/ipt_TTL.h b/include/uapi/linux/netfilter_ipv4/ipt_TTL.h
deleted file mode 100644
index 57d2fc67a94371575691577db0038a076eeb8779..0000000000000000000000000000000000000000
--- a/include/uapi/linux/netfilter_ipv4/ipt_TTL.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/* TTL modification module for IP tables
- * (C) 2000 by Harald Welte <laforge@netfilter.org> */
-
-#ifndef _IPT_TTL_H
-#define _IPT_TTL_H
-
-#include <linux/types.h>
-
-enum {
-	IPT_TTL_SET = 0,
-	IPT_TTL_INC,
-	IPT_TTL_DEC
-};
-
-#define IPT_TTL_MAXMODE	IPT_TTL_DEC
-
-struct ipt_TTL_info {
-	__u8	mode;
-	__u8	ttl;
-};
-
-
-#endif
diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_HL.h b/include/uapi/linux/netfilter_ipv6/ip6t_HL.h
deleted file mode 100644
index eaed56a287b47ceb34e3b6c3754d7241a3dd98cf..0000000000000000000000000000000000000000
--- a/include/uapi/linux/netfilter_ipv6/ip6t_HL.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/* Hop Limit modification module for ip6tables
- * Maciej Soltysiak <solt@dns.toxicfilms.tv>
- * Based on HW's TTL module */
-
-#ifndef _IP6T_HL_H
-#define _IP6T_HL_H
-
-#include <linux/types.h>
-
-enum {
-	IP6T_HL_SET = 0,
-	IP6T_HL_INC,
-	IP6T_HL_DEC
-};
-
-#define IP6T_HL_MAXMODE	IP6T_HL_DEC
-
-struct ip6t_HL_info {
-	__u8	mode;
-	__u8	hop_limit;
-};
-
-
-#endif
diff --git a/kernel/cgroup/sli.c b/kernel/cgroup/sli.c
old mode 100755
new mode 100644
diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
old mode 100755
new mode 100644
diff --git a/lib/build_OID_registry b/lib/build_OID_registry
old mode 100755
new mode 100644
diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S
deleted file mode 100644
index 9ea6100dca875fe463b689211a20bb66eb263659..0000000000000000000000000000000000000000
--- a/net/bpfilter/bpfilter_umh_blob.S
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.section .rodata, "a"
-	.global bpfilter_umh_start
-bpfilter_umh_start:
-	.incbin "net/bpfilter/bpfilter_umh"
-	.global bpfilter_umh_end
-bpfilter_umh_end:
diff --git a/net/netfilter/ipvs/testcase.sh b/net/netfilter/ipvs/testcase.sh
old mode 100755
new mode 100644
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
deleted file mode 100644
index eababc354ff10f65001209c9745832252b0ba0fc..0000000000000000000000000000000000000000
--- a/net/netfilter/xt_DSCP.c
+++ /dev/null
@@ -1,161 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* x_tables module for setting the IPv4/IPv6 DSCP field, Version 1.8
- *
- * (C) 2002 by Harald Welte <laforge@netfilter.org>
- * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
- *
- * See RFC2474 for a description of the DSCP field within the IP Header.
-*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <net/dsfield.h>
-
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_DSCP.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("Xtables: DSCP/TOS field modification");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("ipt_DSCP");
-MODULE_ALIAS("ip6t_DSCP");
-MODULE_ALIAS("ipt_TOS");
-MODULE_ALIAS("ip6t_TOS");
-
-static unsigned int
-dscp_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
-	const struct xt_DSCP_info *dinfo = par->targinfo;
-	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
-
-	if (dscp != dinfo->dscp) {
-		if (skb_ensure_writable(skb, sizeof(struct iphdr)))
-			return NF_DROP;
-
-		ipv4_change_dsfield(ip_hdr(skb),
-				    (__force __u8)(~XT_DSCP_MASK),
-				    dinfo->dscp << XT_DSCP_SHIFT);
-
-	}
-	return XT_CONTINUE;
-}
-
-static unsigned int
-dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par)
-{
-	const struct xt_DSCP_info *dinfo = par->targinfo;
-	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
-
-	if (dscp != dinfo->dscp) {
-		if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
-			return NF_DROP;
-
-		ipv6_change_dsfield(ipv6_hdr(skb),
-				    (__force __u8)(~XT_DSCP_MASK),
-				    dinfo->dscp << XT_DSCP_SHIFT);
-	}
-	return XT_CONTINUE;
-}
-
-static int dscp_tg_check(const struct xt_tgchk_param *par)
-{
-	const struct xt_DSCP_info *info = par->targinfo;
-
-	if (info->dscp > XT_DSCP_MAX)
-		return -EDOM;
-	return 0;
-}
-
-static unsigned int
-tos_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
-	const struct xt_tos_target_info *info = par->targinfo;
-	struct iphdr *iph = ip_hdr(skb);
-	u_int8_t orig, nv;
-
-	orig = ipv4_get_dsfield(iph);
-	nv   = (orig & ~info->tos_mask) ^ info->tos_value;
-
-	if (orig != nv) {
-		if (skb_ensure_writable(skb, sizeof(struct iphdr)))
-			return NF_DROP;
-		iph = ip_hdr(skb);
-		ipv4_change_dsfield(iph, 0, nv);
-	}
-
-	return XT_CONTINUE;
-}
-
-static unsigned int
-tos_tg6(struct sk_buff *skb, const struct xt_action_param *par)
-{
-	const struct xt_tos_target_info *info = par->targinfo;
-	struct ipv6hdr *iph = ipv6_hdr(skb);
-	u_int8_t orig, nv;
-
-	orig = ipv6_get_dsfield(iph);
-	nv   = (orig & ~info->tos_mask) ^ info->tos_value;
-
-	if (orig != nv) {
-		if (skb_ensure_writable(skb, sizeof(struct iphdr)))
-			return NF_DROP;
-		iph = ipv6_hdr(skb);
-		ipv6_change_dsfield(iph, 0, nv);
-	}
-
-	return XT_CONTINUE;
-}
-
-static struct xt_target dscp_tg_reg[] __read_mostly = {
-	{
-		.name		= "DSCP",
-		.family		= NFPROTO_IPV4,
-		.checkentry	= dscp_tg_check,
-		.target		= dscp_tg,
-		.targetsize	= sizeof(struct xt_DSCP_info),
-		.table		= "mangle",
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "DSCP",
-		.family		= NFPROTO_IPV6,
-		.checkentry	= dscp_tg_check,
-		.target		= dscp_tg6,
-		.targetsize	= sizeof(struct xt_DSCP_info),
-		.table		= "mangle",
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "TOS",
-		.revision	= 1,
-		.family		= NFPROTO_IPV4,
-		.table		= "mangle",
-		.target		= tos_tg,
-		.targetsize	= sizeof(struct xt_tos_target_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "TOS",
-		.revision	= 1,
-		.family		= NFPROTO_IPV6,
-		.table		= "mangle",
-		.target		= tos_tg6,
-		.targetsize	= sizeof(struct xt_tos_target_info),
-		.me		= THIS_MODULE,
-	},
-};
-
-static int __init dscp_tg_init(void)
-{
-	return xt_register_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg));
-}
-
-static void __exit dscp_tg_exit(void)
-{
-	xt_unregister_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg));
-}
-
-module_init(dscp_tg_init);
-module_exit(dscp_tg_exit);
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
deleted file mode 100644
index 7873b834c30045661a76bbffe9c8e2f71a5c500b..0000000000000000000000000000000000000000
--- a/net/netfilter/xt_HL.c
+++ /dev/null
@@ -1,159 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * TTL modification target for IP tables
- * (C) 2000,2005 by Harald Welte <laforge@netfilter.org>
- *
- * Hop Limit modification target for ip6tables
- * Maciej Soltysiak <solt@dns.toxicfilms.tv>
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv4/ipt_TTL.h>
-#include <linux/netfilter_ipv6/ip6t_HL.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
-MODULE_DESCRIPTION("Xtables: Hoplimit/TTL Limit field modification target");
-MODULE_LICENSE("GPL");
-
-static unsigned int
-ttl_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
-	struct iphdr *iph;
-	const struct ipt_TTL_info *info = par->targinfo;
-	int new_ttl;
-
-	if (skb_ensure_writable(skb, sizeof(*iph)))
-		return NF_DROP;
-
-	iph = ip_hdr(skb);
-
-	switch (info->mode) {
-	case IPT_TTL_SET:
-		new_ttl = info->ttl;
-		break;
-	case IPT_TTL_INC:
-		new_ttl = iph->ttl + info->ttl;
-		if (new_ttl > 255)
-			new_ttl = 255;
-		break;
-	case IPT_TTL_DEC:
-		new_ttl = iph->ttl - info->ttl;
-		if (new_ttl < 0)
-			new_ttl = 0;
-		break;
-	default:
-		new_ttl = iph->ttl;
-		break;
-	}
-
-	if (new_ttl != iph->ttl) {
-		csum_replace2(&iph->check, htons(iph->ttl << 8),
-					   htons(new_ttl << 8));
-		iph->ttl = new_ttl;
-	}
-
-	return XT_CONTINUE;
-}
-
-static unsigned int
-hl_tg6(struct sk_buff *skb, const struct xt_action_param *par)
-{
-	struct ipv6hdr *ip6h;
-	const struct ip6t_HL_info *info = par->targinfo;
-	int new_hl;
-
-	if (skb_ensure_writable(skb, sizeof(*ip6h)))
-		return NF_DROP;
-
-	ip6h = ipv6_hdr(skb);
-
-	switch (info->mode) {
-	case IP6T_HL_SET:
-		new_hl = info->hop_limit;
-		break;
-	case IP6T_HL_INC:
-		new_hl = ip6h->hop_limit + info->hop_limit;
-		if (new_hl > 255)
-			new_hl = 255;
-		break;
-	case IP6T_HL_DEC:
-		new_hl = ip6h->hop_limit - info->hop_limit;
-		if (new_hl < 0)
-			new_hl = 0;
-		break;
-	default:
-		new_hl = ip6h->hop_limit;
-		break;
-	}
-
-	ip6h->hop_limit = new_hl;
-
-	return XT_CONTINUE;
-}
-
-static int ttl_tg_check(const struct xt_tgchk_param *par)
-{
-	const struct ipt_TTL_info *info = par->targinfo;
-
-	if (info->mode > IPT_TTL_MAXMODE)
-		return -EINVAL;
-	if (info->mode != IPT_TTL_SET && info->ttl == 0)
-		return -EINVAL;
-	return 0;
-}
-
-static int hl_tg6_check(const struct xt_tgchk_param *par)
-{
-	const struct ip6t_HL_info *info = par->targinfo;
-
-	if (info->mode > IP6T_HL_MAXMODE)
-		return -EINVAL;
-	if (info->mode != IP6T_HL_SET && info->hop_limit == 0)
-		return -EINVAL;
-	return 0;
-}
-
-static struct xt_target hl_tg_reg[] __read_mostly = {
-	{
-		.name       = "TTL",
-		.revision   = 0,
-		.family     = NFPROTO_IPV4,
-		.target     = ttl_tg,
-		.targetsize = sizeof(struct ipt_TTL_info),
-		.table      = "mangle",
-		.checkentry = ttl_tg_check,
-		.me         = THIS_MODULE,
-	},
-	{
-		.name       = "HL",
-		.revision   = 0,
-		.family     = NFPROTO_IPV6,
-		.target     = hl_tg6,
-		.targetsize = sizeof(struct ip6t_HL_info),
-		.table      = "mangle",
-		.checkentry = hl_tg6_check,
-		.me         = THIS_MODULE,
-	},
-};
-
-static int __init hl_tg_init(void)
-{
-	return xt_register_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg));
-}
-
-static void __exit hl_tg_exit(void)
-{
-	xt_unregister_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg));
-}
-
-module_init(hl_tg_init);
-module_exit(hl_tg_exit);
-MODULE_ALIAS("ipt_TTL");
-MODULE_ALIAS("ip6t_HL");
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
deleted file mode 100644
index 182c1285b4adaf0de8849271eca909e109a03bcf..0000000000000000000000000000000000000000
--- a/net/netfilter/xt_RATEEST.c
+++ /dev/null
@@ -1,232 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * (C) 2007 Patrick McHardy <kaber@trash.net>
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/gen_stats.h>
-#include <linux/jhash.h>
-#include <linux/rtnetlink.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <net/gen_stats.h>
-#include <net/netlink.h>
-#include <net/netns/generic.h>
-
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_RATEEST.h>
-#include <net/netfilter/xt_rateest.h>
-
-#define RATEEST_HSIZE	16
-
-struct xt_rateest_net {
-	struct mutex hash_lock;
-	struct hlist_head hash[RATEEST_HSIZE];
-};
-
-static unsigned int xt_rateest_id;
-
-static unsigned int jhash_rnd __read_mostly;
-
-static unsigned int xt_rateest_hash(const char *name)
-{
-	return jhash(name, FIELD_SIZEOF(struct xt_rateest, name), jhash_rnd) &
-	       (RATEEST_HSIZE - 1);
-}
-
-static void xt_rateest_hash_insert(struct xt_rateest_net *xn,
-				   struct xt_rateest *est)
-{
-	unsigned int h;
-
-	h = xt_rateest_hash(est->name);
-	hlist_add_head(&est->list, &xn->hash[h]);
-}
-
-static struct xt_rateest *__xt_rateest_lookup(struct xt_rateest_net *xn,
-					      const char *name)
-{
-	struct xt_rateest *est;
-	unsigned int h;
-
-	h = xt_rateest_hash(name);
-	hlist_for_each_entry(est, &xn->hash[h], list) {
-		if (strcmp(est->name, name) == 0) {
-			est->refcnt++;
-			return est;
-		}
-	}
-
-	return NULL;
-}
-
-struct xt_rateest *xt_rateest_lookup(struct net *net, const char *name)
-{
-	struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
-	struct xt_rateest *est;
-
-	mutex_lock(&xn->hash_lock);
-	est = __xt_rateest_lookup(xn, name);
-	mutex_unlock(&xn->hash_lock);
-	return est;
-}
-EXPORT_SYMBOL_GPL(xt_rateest_lookup);
-
-void xt_rateest_put(struct net *net, struct xt_rateest *est)
-{
-	struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
-
-	mutex_lock(&xn->hash_lock);
-	if (--est->refcnt == 0) {
-		hlist_del(&est->list);
-		gen_kill_estimator(&est->rate_est);
-		/*
-		 * gen_estimator est_timer() might access est->lock or bstats,
-		 * wait a RCU grace period before freeing 'est'
-		 */
-		kfree_rcu(est, rcu);
-	}
-	mutex_unlock(&xn->hash_lock);
-}
-EXPORT_SYMBOL_GPL(xt_rateest_put);
-
-static unsigned int
-xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
-	const struct xt_rateest_target_info *info = par->targinfo;
-	struct gnet_stats_basic_packed *stats = &info->est->bstats;
-
-	spin_lock_bh(&info->est->lock);
-	stats->bytes += skb->len;
-	stats->packets++;
-	spin_unlock_bh(&info->est->lock);
-
-	return XT_CONTINUE;
-}
-
-static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
-{
-	struct xt_rateest_net *xn = net_generic(par->net, xt_rateest_id);
-	struct xt_rateest_target_info *info = par->targinfo;
-	struct xt_rateest *est;
-	struct {
-		struct nlattr		opt;
-		struct gnet_estimator	est;
-	} cfg;
-	int ret;
-
-	if (strnlen(info->name, sizeof(est->name)) >= sizeof(est->name))
-		return -ENAMETOOLONG;
-
-	net_get_random_once(&jhash_rnd, sizeof(jhash_rnd));
-
-	mutex_lock(&xn->hash_lock);
-	est = __xt_rateest_lookup(xn, info->name);
-	if (est) {
-		mutex_unlock(&xn->hash_lock);
-		/*
-		 * If estimator parameters are specified, they must match the
-		 * existing estimator.
-		 */
-		if ((!info->interval && !info->ewma_log) ||
-		    (info->interval != est->params.interval ||
-		     info->ewma_log != est->params.ewma_log)) {
-			xt_rateest_put(par->net, est);
-			return -EINVAL;
-		}
-		info->est = est;
-		return 0;
-	}
-
-	ret = -ENOMEM;
-	est = kzalloc(sizeof(*est), GFP_KERNEL);
-	if (!est)
-		goto err1;
-
-	strlcpy(est->name, info->name, sizeof(est->name));
-	spin_lock_init(&est->lock);
-	est->refcnt		= 1;
-	est->params.interval	= info->interval;
-	est->params.ewma_log	= info->ewma_log;
-
-	cfg.opt.nla_len		= nla_attr_size(sizeof(cfg.est));
-	cfg.opt.nla_type	= TCA_STATS_RATE_EST;
-	cfg.est.interval	= info->interval;
-	cfg.est.ewma_log	= info->ewma_log;
-
-	ret = gen_new_estimator(&est->bstats, NULL, &est->rate_est,
-				&est->lock, NULL, &cfg.opt);
-	if (ret < 0)
-		goto err2;
-
-	info->est = est;
-	xt_rateest_hash_insert(xn, est);
-	mutex_unlock(&xn->hash_lock);
-	return 0;
-
-err2:
-	kfree(est);
-err1:
-	mutex_unlock(&xn->hash_lock);
-	return ret;
-}
-
-static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par)
-{
-	struct xt_rateest_target_info *info = par->targinfo;
-
-	xt_rateest_put(par->net, info->est);
-}
-
-static struct xt_target xt_rateest_tg_reg __read_mostly = {
-	.name       = "RATEEST",
-	.revision   = 0,
-	.family     = NFPROTO_UNSPEC,
-	.target     = xt_rateest_tg,
-	.checkentry = xt_rateest_tg_checkentry,
-	.destroy    = xt_rateest_tg_destroy,
-	.targetsize = sizeof(struct xt_rateest_target_info),
-	.usersize   = offsetof(struct xt_rateest_target_info, est),
-	.me         = THIS_MODULE,
-};
-
-static __net_init int xt_rateest_net_init(struct net *net)
-{
-	struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
-	int i;
-
-	mutex_init(&xn->hash_lock);
-	for (i = 0; i < ARRAY_SIZE(xn->hash); i++)
-		INIT_HLIST_HEAD(&xn->hash[i]);
-	return 0;
-}
-
-static struct pernet_operations xt_rateest_net_ops = {
-	.init = xt_rateest_net_init,
-	.id   = &xt_rateest_id,
-	.size = sizeof(struct xt_rateest_net),
-};
-
-static int __init xt_rateest_tg_init(void)
-{
-	int err = register_pernet_subsys(&xt_rateest_net_ops);
-
-	if (err)
-		return err;
-	return xt_register_target(&xt_rateest_tg_reg);
-}
-
-static void __exit xt_rateest_tg_fini(void)
-{
-	xt_unregister_target(&xt_rateest_tg_reg);
-	unregister_pernet_subsys(&xt_rateest_net_ops);
-}
-
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Xtables: packet rate estimator");
-MODULE_ALIAS("ipt_RATEEST");
-MODULE_ALIAS("ip6t_RATEEST");
-module_init(xt_rateest_tg_init);
-module_exit(xt_rateest_tg_fini);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
deleted file mode 100644
index 122db9fbb9f49ec4a3aac29235d093c5057a231c..0000000000000000000000000000000000000000
--- a/net/netfilter/xt_TCPMSS.c
+++ /dev/null
@@ -1,345 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * This is a module which is used for setting the MSS option in TCP packets.
- *
- * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
- * Copyright (C) 2007 Patrick McHardy <kaber@trash.net>
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/gfp.h>
-#include <linux/ipv6.h>
-#include <linux/tcp.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/ipv6.h>
-#include <net/route.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_tcpudp.h>
-#include <linux/netfilter/xt_TCPMSS.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("Xtables: TCP Maximum Segment Size (MSS) adjustment");
-MODULE_ALIAS("ipt_TCPMSS");
-MODULE_ALIAS("ip6t_TCPMSS");
-
-static inline unsigned int
-optlen(const u_int8_t *opt, unsigned int offset)
-{
-	/* Beware zero-length options: make finite progress */
-	if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
-		return 1;
-	else
-		return opt[offset+1];
-}
-
-static u_int32_t tcpmss_reverse_mtu(struct net *net,
-				    const struct sk_buff *skb,
-				    unsigned int family)
-{
-	struct flowi fl;
-	struct rtable *rt = NULL;
-	u_int32_t mtu     = ~0U;
-
-	if (family == PF_INET) {
-		struct flowi4 *fl4 = &fl.u.ip4;
-		memset(fl4, 0, sizeof(*fl4));
-		fl4->daddr = ip_hdr(skb)->saddr;
-	} else {
-		struct flowi6 *fl6 = &fl.u.ip6;
-
-		memset(fl6, 0, sizeof(*fl6));
-		fl6->daddr = ipv6_hdr(skb)->saddr;
-	}
-
-	nf_route(net, (struct dst_entry **)&rt, &fl, false, family);
-	if (rt != NULL) {
-		mtu = dst_mtu(&rt->dst);
-		dst_release(&rt->dst);
-	}
-	return mtu;
-}
-
-static int
-tcpmss_mangle_packet(struct sk_buff *skb,
-		     const struct xt_action_param *par,
-		     unsigned int family,
-		     unsigned int tcphoff,
-		     unsigned int minlen)
-{
-	const struct xt_tcpmss_info *info = par->targinfo;
-	struct tcphdr *tcph;
-	int len, tcp_hdrlen;
-	unsigned int i;
-	__be16 oldval;
-	u16 newmss;
-	u8 *opt;
-
-	/* This is a fragment, no TCP header is available */
-	if (par->fragoff != 0)
-		return 0;
-
-	if (skb_ensure_writable(skb, skb->len))
-		return -1;
-
-	len = skb->len - tcphoff;
-	if (len < (int)sizeof(struct tcphdr))
-		return -1;
-
-	tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
-	tcp_hdrlen = tcph->doff * 4;
-
-	if (len < tcp_hdrlen || tcp_hdrlen < sizeof(struct tcphdr))
-		return -1;
-
-	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
-		struct net *net = xt_net(par);
-		unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family);
-		unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu);
-
-		if (min_mtu <= minlen) {
-			net_err_ratelimited("unknown or invalid path-MTU (%u)\n",
-					    min_mtu);
-			return -1;
-		}
-		newmss = min_mtu - minlen;
-	} else
-		newmss = info->mss;
-
-	opt = (u_int8_t *)tcph;
-	for (i = sizeof(struct tcphdr); i <= tcp_hdrlen - TCPOLEN_MSS; i += optlen(opt, i)) {
-		if (opt[i] == TCPOPT_MSS && opt[i+1] == TCPOLEN_MSS) {
-			u_int16_t oldmss;
-
-			oldmss = (opt[i+2] << 8) | opt[i+3];
-
-			/* Never increase MSS, even when setting it, as
-			 * doing so results in problems for hosts that rely
-			 * on MSS being set correctly.
-			 */
-			if (oldmss <= newmss)
-				return 0;
-
-			opt[i+2] = (newmss & 0xff00) >> 8;
-			opt[i+3] = newmss & 0x00ff;
-
-			inet_proto_csum_replace2(&tcph->check, skb,
-						 htons(oldmss), htons(newmss),
-						 false);
-			return 0;
-		}
-	}
-
-	/* There is data after the header so the option can't be added
-	 * without moving it, and doing so may make the SYN packet
-	 * itself too large. Accept the packet unmodified instead.
-	 */
-	if (len > tcp_hdrlen)
-		return 0;
-
-	/* tcph->doff has 4 bits, do not wrap it to 0 */
-	if (tcp_hdrlen >= 15 * 4)
-		return 0;
-
-	/*
-	 * MSS Option not found ?! add it..
-	 */
-	if (skb_tailroom(skb) < TCPOLEN_MSS) {
-		if (pskb_expand_head(skb, 0,
-				     TCPOLEN_MSS - skb_tailroom(skb),
-				     GFP_ATOMIC))
-			return -1;
-		tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
-	}
-
-	skb_put(skb, TCPOLEN_MSS);
-
-	/*
-	 * IPv4: RFC 1122 states "If an MSS option is not received at
-	 * connection setup, TCP MUST assume a default send MSS of 536".
-	 * IPv6: RFC 2460 states IPv6 has a minimum MTU of 1280 and a minimum
-	 * length IPv6 header of 60, ergo the default MSS value is 1220
-	 * Since no MSS was provided, we must use the default values
-	 */
-	if (xt_family(par) == NFPROTO_IPV4)
-		newmss = min(newmss, (u16)536);
-	else
-		newmss = min(newmss, (u16)1220);
-
-	opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
-	memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr));
-
-	inet_proto_csum_replace2(&tcph->check, skb,
-				 htons(len), htons(len + TCPOLEN_MSS), true);
-	opt[0] = TCPOPT_MSS;
-	opt[1] = TCPOLEN_MSS;
-	opt[2] = (newmss & 0xff00) >> 8;
-	opt[3] = newmss & 0x00ff;
-
-	inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), false);
-
-	oldval = ((__be16 *)tcph)[6];
-	tcph->doff += TCPOLEN_MSS/4;
-	inet_proto_csum_replace2(&tcph->check, skb,
-				 oldval, ((__be16 *)tcph)[6], false);
-	return TCPOLEN_MSS;
-}
-
-static unsigned int
-tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
-{
-	struct iphdr *iph = ip_hdr(skb);
-	__be16 newlen;
-	int ret;
-
-	ret = tcpmss_mangle_packet(skb, par,
-				   PF_INET,
-				   iph->ihl * 4,
-				   sizeof(*iph) + sizeof(struct tcphdr));
-	if (ret < 0)
-		return NF_DROP;
-	if (ret > 0) {
-		iph = ip_hdr(skb);
-		newlen = htons(ntohs(iph->tot_len) + ret);
-		csum_replace2(&iph->check, iph->tot_len, newlen);
-		iph->tot_len = newlen;
-	}
-	return XT_CONTINUE;
-}
-
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-static unsigned int
-tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
-{
-	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-	u8 nexthdr;
-	__be16 frag_off, oldlen, newlen;
-	int tcphoff;
-	int ret;
-
-	nexthdr = ipv6h->nexthdr;
-	tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off);
-	if (tcphoff < 0)
-		return NF_DROP;
-	ret = tcpmss_mangle_packet(skb, par,
-				   PF_INET6,
-				   tcphoff,
-				   sizeof(*ipv6h) + sizeof(struct tcphdr));
-	if (ret < 0)
-		return NF_DROP;
-	if (ret > 0) {
-		ipv6h = ipv6_hdr(skb);
-		oldlen = ipv6h->payload_len;
-		newlen = htons(ntohs(oldlen) + ret);
-		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->csum = csum_add(csum_sub(skb->csum, oldlen),
-					     newlen);
-		ipv6h->payload_len = newlen;
-	}
-	return XT_CONTINUE;
-}
-#endif
-
-/* Must specify -p tcp --syn */
-static inline bool find_syn_match(const struct xt_entry_match *m)
-{
-	const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data;
-
-	if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
-	    tcpinfo->flg_cmp & TCPHDR_SYN &&
-	    !(tcpinfo->invflags & XT_TCP_INV_FLAGS))
-		return true;
-
-	return false;
-}
-
-static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
-{
-	const struct xt_tcpmss_info *info = par->targinfo;
-	const struct ipt_entry *e = par->entryinfo;
-	const struct xt_entry_match *ematch;
-
-	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
-	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
-			   (1 << NF_INET_LOCAL_OUT) |
-			   (1 << NF_INET_POST_ROUTING))) != 0) {
-		pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
-		return -EINVAL;
-	}
-	if (par->nft_compat)
-		return 0;
-
-	xt_ematch_foreach(ematch, e)
-		if (find_syn_match(ematch))
-			return 0;
-	pr_info_ratelimited("Only works on TCP SYN packets\n");
-	return -EINVAL;
-}
-
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
-{
-	const struct xt_tcpmss_info *info = par->targinfo;
-	const struct ip6t_entry *e = par->entryinfo;
-	const struct xt_entry_match *ematch;
-
-	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
-	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
-			   (1 << NF_INET_LOCAL_OUT) |
-			   (1 << NF_INET_POST_ROUTING))) != 0) {
-		pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
-		return -EINVAL;
-	}
-	if (par->nft_compat)
-		return 0;
-
-	xt_ematch_foreach(ematch, e)
-		if (find_syn_match(ematch))
-			return 0;
-	pr_info_ratelimited("Only works on TCP SYN packets\n");
-	return -EINVAL;
-}
-#endif
-
-static struct xt_target tcpmss_tg_reg[] __read_mostly = {
-	{
-		.family		= NFPROTO_IPV4,
-		.name		= "TCPMSS",
-		.checkentry	= tcpmss_tg4_check,
-		.target		= tcpmss_tg4,
-		.targetsize	= sizeof(struct xt_tcpmss_info),
-		.proto		= IPPROTO_TCP,
-		.me		= THIS_MODULE,
-	},
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-	{
-		.family		= NFPROTO_IPV6,
-		.name		= "TCPMSS",
-		.checkentry	= tcpmss_tg6_check,
-		.target		= tcpmss_tg6,
-		.targetsize	= sizeof(struct xt_tcpmss_info),
-		.proto		= IPPROTO_TCP,
-		.me		= THIS_MODULE,
-	},
-#endif
-};
-
-static int __init tcpmss_tg_init(void)
-{
-	return xt_register_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
-}
-
-static void __exit tcpmss_tg_exit(void)
-{
-	xt_unregister_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
-}
-
-module_init(tcpmss_tg_init);
-module_exit(tcpmss_tg_exit);
diff --git a/package/default/license_check.py b/package/default/license_check.py
old mode 100755
new mode 100644
diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh
old mode 100755
new mode 100644
diff --git a/samples/bpf/lwt_len_hist.sh b/samples/bpf/lwt_len_hist.sh
old mode 100755
new mode 100644
diff --git a/samples/bpf/run_cookie_uid_helper_example.sh b/samples/bpf/run_cookie_uid_helper_example.sh
old mode 100755
new mode 100644
diff --git a/samples/bpf/tc_l2_redirect.sh b/samples/bpf/tc_l2_redirect.sh
old mode 100755
new mode 100644
diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh
old mode 100755
new mode 100644
diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh
old mode 100755
new mode 100644
diff --git a/samples/bpf/test_cgrp2_tc.sh b/samples/bpf/test_cgrp2_tc.sh
old mode 100755
new mode 100644
diff --git a/samples/bpf/test_cls_bpf.sh b/samples/bpf/test_cls_bpf.sh
old mode 100755
new mode 100644
diff --git a/samples/bpf/test_ipip.sh b/samples/bpf/test_ipip.sh
old mode 100755
new mode 100644
diff --git a/samples/bpf/test_lwt_bpf.sh b/samples/bpf/test_lwt_bpf.sh
old mode 100755
new mode 100644
diff --git a/samples/bpf/test_override_return.sh b/samples/bpf/test_override_return.sh
old mode 100755
new mode 100644
diff --git a/samples/bpf/xdp2skb_meta.sh b/samples/bpf/xdp2skb_meta.sh
old mode 100755
new mode 100644
diff --git a/samples/mic/mpssd/micctrl b/samples/mic/mpssd/micctrl
old mode 100755
new mode 100644
diff --git a/samples/mic/mpssd/mpss b/samples/mic/mpssd/mpss
old mode 100755
new mode 100644
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
old mode 100755
new mode 100644
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
old mode 100755
new mode 100644
diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh
old mode 100755
new mode 100644
diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh
old mode 100755
new mode 100644
diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
old mode 100755
new mode 100644
diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh
old mode 100755
new mode 100644
diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
old mode 100755
new mode 100644
diff --git a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
old mode 100755
new mode 100644
diff --git a/scripts/Lindent b/scripts/Lindent
old mode 100755
new mode 100644
diff --git a/scripts/adjust_autoksyms.sh b/scripts/adjust_autoksyms.sh
old mode 100755
new mode 100644
diff --git a/scripts/atomic/atomic-tbl.sh b/scripts/atomic/atomic-tbl.sh
old mode 100755
new mode 100644
diff --git a/scripts/atomic/atomics.tbl b/scripts/atomic/atomics.tbl
old mode 100755
new mode 100644
diff --git a/scripts/atomic/check-atomics.sh b/scripts/atomic/check-atomics.sh
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/acquire b/scripts/atomic/fallbacks/acquire
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/add_negative b/scripts/atomic/fallbacks/add_negative
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/add_unless b/scripts/atomic/fallbacks/add_unless
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/andnot b/scripts/atomic/fallbacks/andnot
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/dec b/scripts/atomic/fallbacks/dec
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/dec_and_test b/scripts/atomic/fallbacks/dec_and_test
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/dec_if_positive b/scripts/atomic/fallbacks/dec_if_positive
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/dec_unless_positive b/scripts/atomic/fallbacks/dec_unless_positive
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/fence b/scripts/atomic/fallbacks/fence
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/fetch_add_unless b/scripts/atomic/fallbacks/fetch_add_unless
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/inc b/scripts/atomic/fallbacks/inc
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/inc_and_test b/scripts/atomic/fallbacks/inc_and_test
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/inc_not_zero b/scripts/atomic/fallbacks/inc_not_zero
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/inc_unless_negative b/scripts/atomic/fallbacks/inc_unless_negative
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/read_acquire b/scripts/atomic/fallbacks/read_acquire
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/release b/scripts/atomic/fallbacks/release
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/set_release b/scripts/atomic/fallbacks/set_release
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/sub_and_test b/scripts/atomic/fallbacks/sub_and_test
old mode 100755
new mode 100644
diff --git a/scripts/atomic/fallbacks/try_cmpxchg b/scripts/atomic/fallbacks/try_cmpxchg
old mode 100755
new mode 100644
diff --git a/scripts/atomic/gen-atomic-fallback.sh b/scripts/atomic/gen-atomic-fallback.sh
old mode 100755
new mode 100644
diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh
old mode 100755
new mode 100644
diff --git a/scripts/atomic/gen-atomic-long.sh b/scripts/atomic/gen-atomic-long.sh
old mode 100755
new mode 100644
diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter
old mode 100755
new mode 100644
diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
old mode 100755
new mode 100644
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
old mode 100755
new mode 100644
diff --git a/scripts/cc-can-link.sh b/scripts/cc-can-link.sh
old mode 100755
new mode 100644
diff --git a/scripts/check-kabi b/scripts/check-kabi
old mode 100755
new mode 100644
diff --git a/scripts/check_extable.sh b/scripts/check_extable.sh
old mode 100755
new mode 100644
diff --git a/scripts/checkincludes.pl b/scripts/checkincludes.pl
old mode 100755
new mode 100644
diff --git a/scripts/checkkconfigsymbols.py b/scripts/checkkconfigsymbols.py
old mode 100755
new mode 100644
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
old mode 100755
new mode 100644
diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
old mode 100755
new mode 100644
diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh
old mode 100755
new mode 100644
diff --git a/scripts/checkversion.pl b/scripts/checkversion.pl
old mode 100755
new mode 100644
diff --git a/scripts/clang-version.sh b/scripts/clang-version.sh
old mode 100755
new mode 100644
diff --git a/scripts/cleanfile b/scripts/cleanfile
old mode 100755
new mode 100644
diff --git a/scripts/cleanpatch b/scripts/cleanpatch
old mode 100755
new mode 100644
diff --git a/scripts/coccicheck b/scripts/coccicheck
old mode 100755
new mode 100644
diff --git a/scripts/config b/scripts/config
old mode 100755
new mode 100644
diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh
old mode 100755
new mode 100644
diff --git a/scripts/decodecode b/scripts/decodecode
old mode 100755
new mode 100644
diff --git a/scripts/depmod.sh b/scripts/depmod.sh
old mode 100755
new mode 100644
diff --git a/scripts/diffconfig b/scripts/diffconfig
old mode 100755
new mode 100644
diff --git a/scripts/documentation-file-ref-check b/scripts/documentation-file-ref-check
old mode 100755
new mode 100644
diff --git a/scripts/dtc/dt_to_config b/scripts/dtc/dt_to_config
old mode 100755
new mode 100644
diff --git a/scripts/dtc/dtx_diff b/scripts/dtc/dtx_diff
old mode 100755
new mode 100644
diff --git a/scripts/dtc/include-prefixes/arc b/scripts/dtc/include-prefixes/arc
deleted file mode 120000
index 5d21b5a69a112a34750e919a9222a8f99c696036..0000000000000000000000000000000000000000
--- a/scripts/dtc/include-prefixes/arc
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/arc/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/arc b/scripts/dtc/include-prefixes/arc
new file mode 100644
index 0000000000000000000000000000000000000000..5d21b5a69a112a34750e919a9222a8f99c696036
--- /dev/null
+++ b/scripts/dtc/include-prefixes/arc
@@ -0,0 +1 @@
+../../../arch/arc/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/arm b/scripts/dtc/include-prefixes/arm
deleted file mode 120000
index eb14d4515a57e9fba995487c1f3c9235086a9596..0000000000000000000000000000000000000000
--- a/scripts/dtc/include-prefixes/arm
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/arm/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/arm b/scripts/dtc/include-prefixes/arm
new file mode 100644
index 0000000000000000000000000000000000000000..eb14d4515a57e9fba995487c1f3c9235086a9596
--- /dev/null
+++ b/scripts/dtc/include-prefixes/arm
@@ -0,0 +1 @@
+../../../arch/arm/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/arm64 b/scripts/dtc/include-prefixes/arm64
deleted file mode 120000
index 275c42c21d71c4cf2fd64df687c85a221129f059..0000000000000000000000000000000000000000
--- a/scripts/dtc/include-prefixes/arm64
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/arm64/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/arm64 b/scripts/dtc/include-prefixes/arm64
new file mode 100644
index 0000000000000000000000000000000000000000..275c42c21d71c4cf2fd64df687c85a221129f059
--- /dev/null
+++ b/scripts/dtc/include-prefixes/arm64
@@ -0,0 +1 @@
+../../../arch/arm64/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/c6x b/scripts/dtc/include-prefixes/c6x
deleted file mode 120000
index 49ded4cae2be96e26b5184b7200019058a929dff..0000000000000000000000000000000000000000
--- a/scripts/dtc/include-prefixes/c6x
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/c6x/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/c6x b/scripts/dtc/include-prefixes/c6x
new file mode 100644
index 0000000000000000000000000000000000000000..49ded4cae2be96e26b5184b7200019058a929dff
--- /dev/null
+++ b/scripts/dtc/include-prefixes/c6x
@@ -0,0 +1 @@
+../../../arch/c6x/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/dt-bindings b/scripts/dtc/include-prefixes/dt-bindings
deleted file mode 120000
index 04fdbb3af01630bbbc1931323981ef9a2482710b..0000000000000000000000000000000000000000
--- a/scripts/dtc/include-prefixes/dt-bindings
+++ /dev/null
@@ -1 +0,0 @@
-../../../include/dt-bindings
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/dt-bindings b/scripts/dtc/include-prefixes/dt-bindings
new file mode 100644
index 0000000000000000000000000000000000000000..04fdbb3af01630bbbc1931323981ef9a2482710b
--- /dev/null
+++ b/scripts/dtc/include-prefixes/dt-bindings
@@ -0,0 +1 @@
+../../../include/dt-bindings
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/h8300 b/scripts/dtc/include-prefixes/h8300
deleted file mode 120000
index 3bdaa332c54c7af4cb03841978822f5675e451d6..0000000000000000000000000000000000000000
--- a/scripts/dtc/include-prefixes/h8300
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/h8300/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/h8300 b/scripts/dtc/include-prefixes/h8300
new file mode 100644
index 0000000000000000000000000000000000000000..3bdaa332c54c7af4cb03841978822f5675e451d6
--- /dev/null
+++ b/scripts/dtc/include-prefixes/h8300
@@ -0,0 +1 @@
+../../../arch/h8300/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/microblaze b/scripts/dtc/include-prefixes/microblaze
deleted file mode 120000
index d9830330a21d0916fb0df4315406abe137e59d24..0000000000000000000000000000000000000000
--- a/scripts/dtc/include-prefixes/microblaze
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/microblaze/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/microblaze b/scripts/dtc/include-prefixes/microblaze
new file mode 100644
index 0000000000000000000000000000000000000000..d9830330a21d0916fb0df4315406abe137e59d24
--- /dev/null
+++ b/scripts/dtc/include-prefixes/microblaze
@@ -0,0 +1 @@
+../../../arch/microblaze/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/mips b/scripts/dtc/include-prefixes/mips
deleted file mode 120000
index ae8d4948dc8d5a098cf70b1d07182a04b84dbe05..0000000000000000000000000000000000000000
--- a/scripts/dtc/include-prefixes/mips
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/mips/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/mips b/scripts/dtc/include-prefixes/mips
new file mode 100644
index 0000000000000000000000000000000000000000..ae8d4948dc8d5a098cf70b1d07182a04b84dbe05
--- /dev/null
+++ b/scripts/dtc/include-prefixes/mips
@@ -0,0 +1 @@
+../../../arch/mips/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/nios2 b/scripts/dtc/include-prefixes/nios2
deleted file mode 120000
index 51772336d13fb3606011db9fd8b608aa016bf56f..0000000000000000000000000000000000000000
--- a/scripts/dtc/include-prefixes/nios2
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/nios2/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/nios2 b/scripts/dtc/include-prefixes/nios2
new file mode 100644
index 0000000000000000000000000000000000000000..51772336d13fb3606011db9fd8b608aa016bf56f
--- /dev/null
+++ b/scripts/dtc/include-prefixes/nios2
@@ -0,0 +1 @@
+../../../arch/nios2/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/openrisc b/scripts/dtc/include-prefixes/openrisc
deleted file mode 120000
index 71c3bc75c560ac4697ee26e85f8b01994366112f..0000000000000000000000000000000000000000
--- a/scripts/dtc/include-prefixes/openrisc
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/openrisc/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/openrisc b/scripts/dtc/include-prefixes/openrisc
new file mode 100644
index 0000000000000000000000000000000000000000..71c3bc75c560ac4697ee26e85f8b01994366112f
--- /dev/null
+++ b/scripts/dtc/include-prefixes/openrisc
@@ -0,0 +1 @@
+../../../arch/openrisc/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/powerpc b/scripts/dtc/include-prefixes/powerpc
deleted file mode 120000
index 7cd6ec16e899044c9fdbb991c8ef2d4ec594fc91..0000000000000000000000000000000000000000
--- a/scripts/dtc/include-prefixes/powerpc
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/powerpc/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/powerpc b/scripts/dtc/include-prefixes/powerpc
new file mode 100644
index 0000000000000000000000000000000000000000..7cd6ec16e899044c9fdbb991c8ef2d4ec594fc91
--- /dev/null
+++ b/scripts/dtc/include-prefixes/powerpc
@@ -0,0 +1 @@
+../../../arch/powerpc/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/sh b/scripts/dtc/include-prefixes/sh
deleted file mode 120000
index 67d37808c599dba0314822acc47f9ae2160adf5f..0000000000000000000000000000000000000000
--- a/scripts/dtc/include-prefixes/sh
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/sh/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/sh b/scripts/dtc/include-prefixes/sh
new file mode 100644
index 0000000000000000000000000000000000000000..67d37808c599dba0314822acc47f9ae2160adf5f
--- /dev/null
+++ b/scripts/dtc/include-prefixes/sh
@@ -0,0 +1 @@
+../../../arch/sh/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/xtensa b/scripts/dtc/include-prefixes/xtensa
deleted file mode 120000
index d1eaf6ec7a2b9691d34ff2a01a3d589f945623bd..0000000000000000000000000000000000000000
--- a/scripts/dtc/include-prefixes/xtensa
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/xtensa/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/xtensa b/scripts/dtc/include-prefixes/xtensa
new file mode 100644
index 0000000000000000000000000000000000000000..d1eaf6ec7a2b9691d34ff2a01a3d589f945623bd
--- /dev/null
+++ b/scripts/dtc/include-prefixes/xtensa
@@ -0,0 +1 @@
+../../../arch/xtensa/boot/dts
\ No newline at end of file
diff --git a/scripts/dtc/update-dtc-source.sh b/scripts/dtc/update-dtc-source.sh
old mode 100755
new mode 100644
diff --git a/scripts/export_report.pl b/scripts/export_report.pl
old mode 100755
new mode 100644
diff --git a/scripts/extract-ikconfig b/scripts/extract-ikconfig
old mode 100755
new mode 100644
diff --git a/scripts/extract-module-sig.pl b/scripts/extract-module-sig.pl
old mode 100755
new mode 100644
diff --git a/scripts/extract-sys-certs.pl b/scripts/extract-sys-certs.pl
old mode 100755
new mode 100644
diff --git a/scripts/extract-vmlinux b/scripts/extract-vmlinux
old mode 100755
new mode 100644
diff --git a/scripts/extract_xc3028.pl b/scripts/extract_xc3028.pl
old mode 100755
new mode 100644
diff --git a/scripts/faddr2line b/scripts/faddr2line
old mode 100755
new mode 100644
diff --git a/scripts/file-size.sh b/scripts/file-size.sh
old mode 100755
new mode 100644
diff --git a/scripts/find-unused-docs.sh b/scripts/find-unused-docs.sh
old mode 100755
new mode 100644
diff --git a/scripts/gcc-goto.sh b/scripts/gcc-goto.sh
old mode 100755
new mode 100644
diff --git a/scripts/gcc-ld b/scripts/gcc-ld
old mode 100755
new mode 100644
diff --git a/scripts/gcc-plugin.sh b/scripts/gcc-plugin.sh
old mode 100755
new mode 100644
diff --git a/scripts/gcc-version.sh b/scripts/gcc-version.sh
old mode 100755
new mode 100644
diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh
old mode 100755
new mode 100644
diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh
old mode 100755
new mode 100644
diff --git a/scripts/gen_compile_commands.py b/scripts/gen_compile_commands.py
old mode 100755
new mode 100644
diff --git a/scripts/gen_ksymdeps.sh b/scripts/gen_ksymdeps.sh
old mode 100755
new mode 100644
diff --git a/scripts/get_abi.pl b/scripts/get_abi.pl
old mode 100755
new mode 100644
diff --git a/scripts/get_dvb_firmware b/scripts/get_dvb_firmware
old mode 100755
new mode 100644
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
old mode 100755
new mode 100644
diff --git a/scripts/gfp-translate b/scripts/gfp-translate
old mode 100755
new mode 100644
diff --git a/scripts/headerdep.pl b/scripts/headerdep.pl
old mode 100755
new mode 100644
diff --git a/scripts/headers_check.pl b/scripts/headers_check.pl
old mode 100755
new mode 100644
diff --git a/scripts/headers_install.sh b/scripts/headers_install.sh
old mode 100755
new mode 100644
diff --git a/scripts/kconfig/gconf-cfg.sh b/scripts/kconfig/gconf-cfg.sh
old mode 100755
new mode 100644
diff --git a/scripts/kconfig/mconf-cfg.sh b/scripts/kconfig/mconf-cfg.sh
old mode 100755
new mode 100644
diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh
old mode 100755
new mode 100644
diff --git a/scripts/kconfig/nconf-cfg.sh b/scripts/kconfig/nconf-cfg.sh
old mode 100755
new mode 100644
diff --git a/scripts/kconfig/qconf-cfg.sh b/scripts/kconfig/qconf-cfg.sh
old mode 100755
new mode 100644
diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl
old mode 100755
new mode 100644
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
old mode 100755
new mode 100644
diff --git a/scripts/ld-version.sh b/scripts/ld-version.sh
old mode 100755
new mode 100644
diff --git a/scripts/leaking_addresses.pl b/scripts/leaking_addresses.pl
old mode 100755
new mode 100644
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
old mode 100755
new mode 100644
diff --git a/scripts/makelst b/scripts/makelst
old mode 100755
new mode 100644
diff --git a/scripts/markup_oops.pl b/scripts/markup_oops.pl
old mode 100755
new mode 100644
diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
old mode 100755
new mode 100644
diff --git a/scripts/mkmakefile b/scripts/mkmakefile
old mode 100755
new mode 100644
diff --git a/scripts/mksysmap b/scripts/mksysmap
old mode 100755
new mode 100644
diff --git a/scripts/mkuboot.sh b/scripts/mkuboot.sh
old mode 100755
new mode 100644
diff --git a/scripts/modules-check.sh b/scripts/modules-check.sh
old mode 100755
new mode 100644
diff --git a/scripts/namespace.pl b/scripts/namespace.pl
old mode 100755
new mode 100644
diff --git a/scripts/objdiff b/scripts/objdiff
old mode 100755
new mode 100644
diff --git a/scripts/package/builddeb b/scripts/package/builddeb
old mode 100755
new mode 100644
diff --git a/scripts/package/buildtar b/scripts/package/buildtar
old mode 100755
new mode 100644
diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
old mode 100755
new mode 100644
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
old mode 100755
new mode 100644
diff --git a/scripts/parse-maintainers.pl b/scripts/parse-maintainers.pl
old mode 100755
new mode 100644
diff --git a/scripts/patch-kernel b/scripts/patch-kernel
old mode 100755
new mode 100644
diff --git a/scripts/profile2linkerlist.pl b/scripts/profile2linkerlist.pl
old mode 100755
new mode 100644
diff --git a/scripts/prune-kernel b/scripts/prune-kernel
old mode 100755
new mode 100644
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
old mode 100755
new mode 100644
diff --git a/scripts/selinux/install_policy.sh b/scripts/selinux/install_policy.sh
old mode 100755
new mode 100644
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
old mode 100755
new mode 100644
diff --git a/scripts/show_delta b/scripts/show_delta
old mode 100755
new mode 100644
diff --git a/scripts/spdxcheck.py b/scripts/spdxcheck.py
old mode 100755
new mode 100644
diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
old mode 100755
new mode 100644
diff --git a/scripts/split-man.pl b/scripts/split-man.pl
old mode 100755
new mode 100644
diff --git a/scripts/stackdelta b/scripts/stackdelta
old mode 100755
new mode 100644
diff --git a/scripts/stackusage b/scripts/stackusage
old mode 100755
new mode 100644
diff --git a/scripts/tags.sh b/scripts/tags.sh
old mode 100755
new mode 100644
diff --git a/scripts/tools-support-relr.sh b/scripts/tools-support-relr.sh
old mode 100755
new mode 100644
diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py
old mode 100755
new mode 100644
diff --git a/scripts/tracing/ftrace-bisect.sh b/scripts/tracing/ftrace-bisect.sh
old mode 100755
new mode 100644
diff --git a/scripts/ver_linux b/scripts/ver_linux
old mode 100755
new mode 100644
diff --git a/scripts/xz_wrap.sh b/scripts/xz_wrap.sh
old mode 100755
new mode 100644
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
deleted file mode 100644
index 92748660ba51234f31a651d9181f50605418f0c9..0000000000000000000000000000000000000000
--- a/tools/arch/x86/lib/memcpy_64.S
+++ /dev/null
@@ -1,299 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* Copyright 2002 Andi Kleen */
-
-#include <linux/linkage.h>
-#include <asm/errno.h>
-#include <asm/cpufeatures.h>
-#include <asm/mcsafe_test.h>
-#include <asm/alternative-asm.h>
-#include <asm/export.h>
-
-/*
- * We build a jump to memcpy_orig by default which gets NOPped out on
- * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
- * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
- * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
- */
-
-.weak memcpy
-
-/*
- * memcpy - Copy a memory block.
- *
- * Input:
- *  rdi destination
- *  rsi source
- *  rdx count
- *
- * Output:
- * rax original destination
- */
-ENTRY(__memcpy)
-ENTRY(memcpy)
-	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
-		      "jmp memcpy_erms", X86_FEATURE_ERMS
-
-	movq %rdi, %rax
-	movq %rdx, %rcx
-	shrq $3, %rcx
-	andl $7, %edx
-	rep movsq
-	movl %edx, %ecx
-	rep movsb
-	ret
-ENDPROC(memcpy)
-ENDPROC(__memcpy)
-EXPORT_SYMBOL(memcpy)
-EXPORT_SYMBOL(__memcpy)
-
-/*
- * memcpy_erms() - enhanced fast string memcpy. This is faster and
- * simpler than memcpy. Use memcpy_erms when possible.
- */
-ENTRY(memcpy_erms)
-	movq %rdi, %rax
-	movq %rdx, %rcx
-	rep movsb
-	ret
-ENDPROC(memcpy_erms)
-
-ENTRY(memcpy_orig)
-	movq %rdi, %rax
-
-	cmpq $0x20, %rdx
-	jb .Lhandle_tail
-
-	/*
-	 * We check whether memory false dependence could occur,
-	 * then jump to corresponding copy mode.
-	 */
-	cmp  %dil, %sil
-	jl .Lcopy_backward
-	subq $0x20, %rdx
-.Lcopy_forward_loop:
-	subq $0x20,	%rdx
-
-	/*
-	 * Move in blocks of 4x8 bytes:
-	 */
-	movq 0*8(%rsi),	%r8
-	movq 1*8(%rsi),	%r9
-	movq 2*8(%rsi),	%r10
-	movq 3*8(%rsi),	%r11
-	leaq 4*8(%rsi),	%rsi
-
-	movq %r8,	0*8(%rdi)
-	movq %r9,	1*8(%rdi)
-	movq %r10,	2*8(%rdi)
-	movq %r11,	3*8(%rdi)
-	leaq 4*8(%rdi),	%rdi
-	jae  .Lcopy_forward_loop
-	addl $0x20,	%edx
-	jmp  .Lhandle_tail
-
-.Lcopy_backward:
-	/*
-	 * Calculate copy position to tail.
-	 */
-	addq %rdx,	%rsi
-	addq %rdx,	%rdi
-	subq $0x20,	%rdx
-	/*
-	 * At most 3 ALU operations in one cycle,
-	 * so append NOPS in the same 16 bytes trunk.
-	 */
-	.p2align 4
-.Lcopy_backward_loop:
-	subq $0x20,	%rdx
-	movq -1*8(%rsi),	%r8
-	movq -2*8(%rsi),	%r9
-	movq -3*8(%rsi),	%r10
-	movq -4*8(%rsi),	%r11
-	leaq -4*8(%rsi),	%rsi
-	movq %r8,		-1*8(%rdi)
-	movq %r9,		-2*8(%rdi)
-	movq %r10,		-3*8(%rdi)
-	movq %r11,		-4*8(%rdi)
-	leaq -4*8(%rdi),	%rdi
-	jae  .Lcopy_backward_loop
-
-	/*
-	 * Calculate copy position to head.
-	 */
-	addl $0x20,	%edx
-	subq %rdx,	%rsi
-	subq %rdx,	%rdi
-.Lhandle_tail:
-	cmpl $16,	%edx
-	jb   .Lless_16bytes
-
-	/*
-	 * Move data from 16 bytes to 31 bytes.
-	 */
-	movq 0*8(%rsi), %r8
-	movq 1*8(%rsi),	%r9
-	movq -2*8(%rsi, %rdx),	%r10
-	movq -1*8(%rsi, %rdx),	%r11
-	movq %r8,	0*8(%rdi)
-	movq %r9,	1*8(%rdi)
-	movq %r10,	-2*8(%rdi, %rdx)
-	movq %r11,	-1*8(%rdi, %rdx)
-	retq
-	.p2align 4
-.Lless_16bytes:
-	cmpl $8,	%edx
-	jb   .Lless_8bytes
-	/*
-	 * Move data from 8 bytes to 15 bytes.
-	 */
-	movq 0*8(%rsi),	%r8
-	movq -1*8(%rsi, %rdx),	%r9
-	movq %r8,	0*8(%rdi)
-	movq %r9,	-1*8(%rdi, %rdx)
-	retq
-	.p2align 4
-.Lless_8bytes:
-	cmpl $4,	%edx
-	jb   .Lless_3bytes
-
-	/*
-	 * Move data from 4 bytes to 7 bytes.
-	 */
-	movl (%rsi), %ecx
-	movl -4(%rsi, %rdx), %r8d
-	movl %ecx, (%rdi)
-	movl %r8d, -4(%rdi, %rdx)
-	retq
-	.p2align 4
-.Lless_3bytes:
-	subl $1, %edx
-	jb .Lend
-	/*
-	 * Move data from 1 bytes to 3 bytes.
-	 */
-	movzbl (%rsi), %ecx
-	jz .Lstore_1byte
-	movzbq 1(%rsi), %r8
-	movzbq (%rsi, %rdx), %r9
-	movb %r8b, 1(%rdi)
-	movb %r9b, (%rdi, %rdx)
-.Lstore_1byte:
-	movb %cl, (%rdi)
-
-.Lend:
-	retq
-ENDPROC(memcpy_orig)
-
-#ifndef CONFIG_UML
-
-MCSAFE_TEST_CTL
-
-/*
- * __memcpy_mcsafe - memory copy with machine check exception handling
- * Note that we only catch machine checks when reading the source addresses.
- * Writes to target are posted and don't generate machine checks.
- */
-ENTRY(__memcpy_mcsafe)
-	cmpl $8, %edx
-	/* Less than 8 bytes? Go to byte copy loop */
-	jb .L_no_whole_words
-
-	/* Check for bad alignment of source */
-	testl $7, %esi
-	/* Already aligned */
-	jz .L_8byte_aligned
-
-	/* Copy one byte at a time until source is 8-byte aligned */
-	movl %esi, %ecx
-	andl $7, %ecx
-	subl $8, %ecx
-	negl %ecx
-	subl %ecx, %edx
-.L_read_leading_bytes:
-	movb (%rsi), %al
-	MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
-	MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
-.L_write_leading_bytes:
-	movb %al, (%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .L_read_leading_bytes
-
-.L_8byte_aligned:
-	movl %edx, %ecx
-	andl $7, %edx
-	shrl $3, %ecx
-	jz .L_no_whole_words
-
-.L_read_words:
-	movq (%rsi), %r8
-	MCSAFE_TEST_SRC %rsi 8 .E_read_words
-	MCSAFE_TEST_DST %rdi 8 .E_write_words
-.L_write_words:
-	movq %r8, (%rdi)
-	addq $8, %rsi
-	addq $8, %rdi
-	decl %ecx
-	jnz .L_read_words
-
-	/* Any trailing bytes? */
-.L_no_whole_words:
-	andl %edx, %edx
-	jz .L_done_memcpy_trap
-
-	/* Copy trailing bytes */
-	movl %edx, %ecx
-.L_read_trailing_bytes:
-	movb (%rsi), %al
-	MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
-	MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
-.L_write_trailing_bytes:
-	movb %al, (%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .L_read_trailing_bytes
-
-	/* Copy successful. Return zero */
-.L_done_memcpy_trap:
-	xorl %eax, %eax
-.L_done:
-	ret
-ENDPROC(__memcpy_mcsafe)
-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
-
-	.section .fixup, "ax"
-	/*
-	 * Return number of bytes not copied for any failure. Note that
-	 * there is no "tail" handling since the source buffer is 8-byte
-	 * aligned and poison is cacheline aligned.
-	 */
-.E_read_words:
-	shll	$3, %ecx
-.E_leading_bytes:
-	addl	%edx, %ecx
-.E_trailing_bytes:
-	mov	%ecx, %eax
-	jmp	.L_done
-
-	/*
-	 * For write fault handling, given the destination is unaligned,
-	 * we handle faults on multi-byte writes with a byte-by-byte
-	 * copy up to the write-protected page.
-	 */
-.E_write_words:
-	shll	$3, %ecx
-	addl	%edx, %ecx
-	movl	%ecx, %edx
-	jmp mcsafe_handle_tail
-
-	.previous
-
-	_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
-	_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
-	_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
-	_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
-	_ASM_EXTABLE(.L_write_words, .E_write_words)
-	_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
-#endif
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
deleted file mode 100644
index f8f3dc0a6690e92f19ce5bc398f33451907a66cb..0000000000000000000000000000000000000000
--- a/tools/arch/x86/lib/memset_64.S
+++ /dev/null
@@ -1,139 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright 2002 Andi Kleen, SuSE Labs */
-
-#include <linux/linkage.h>
-#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
-
-.weak memset
-
-/*
- * ISO C memset - set a memory block to a byte value. This function uses fast
- * string to get better performance than the original function. The code is
- * simpler and shorter than the original function as well.
- *
- * rdi   destination
- * rsi   value (char)
- * rdx   count (bytes)
- *
- * rax   original destination
- */
-ENTRY(memset)
-ENTRY(__memset)
-	/*
-	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
-	 * to use it when possible. If not available, use fast string instructions.
-	 *
-	 * Otherwise, use original memset function.
-	 */
-	ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
-		      "jmp memset_erms", X86_FEATURE_ERMS
-
-	movq %rdi,%r9
-	movq %rdx,%rcx
-	andl $7,%edx
-	shrq $3,%rcx
-	/* expand byte value  */
-	movzbl %sil,%esi
-	movabs $0x0101010101010101,%rax
-	imulq %rsi,%rax
-	rep stosq
-	movl %edx,%ecx
-	rep stosb
-	movq %r9,%rax
-	ret
-ENDPROC(memset)
-ENDPROC(__memset)
-
-/*
- * ISO C memset - set a memory block to a byte value. This function uses
- * enhanced rep stosb to override the fast string function.
- * The code is simpler and shorter than the fast string function as well.
- *
- * rdi   destination
- * rsi   value (char)
- * rdx   count (bytes)
- *
- * rax   original destination
- */
-ENTRY(memset_erms)
-	movq %rdi,%r9
-	movb %sil,%al
-	movq %rdx,%rcx
-	rep stosb
-	movq %r9,%rax
-	ret
-ENDPROC(memset_erms)
-
-ENTRY(memset_orig)
-	movq %rdi,%r10
-
-	/* expand byte value  */
-	movzbl %sil,%ecx
-	movabs $0x0101010101010101,%rax
-	imulq  %rcx,%rax
-
-	/* align dst */
-	movl  %edi,%r9d
-	andl  $7,%r9d
-	jnz  .Lbad_alignment
-.Lafter_bad_alignment:
-
-	movq  %rdx,%rcx
-	shrq  $6,%rcx
-	jz	 .Lhandle_tail
-
-	.p2align 4
-.Lloop_64:
-	decq  %rcx
-	movq  %rax,(%rdi)
-	movq  %rax,8(%rdi)
-	movq  %rax,16(%rdi)
-	movq  %rax,24(%rdi)
-	movq  %rax,32(%rdi)
-	movq  %rax,40(%rdi)
-	movq  %rax,48(%rdi)
-	movq  %rax,56(%rdi)
-	leaq  64(%rdi),%rdi
-	jnz    .Lloop_64
-
-	/* Handle tail in loops. The loops should be faster than hard
-	   to predict jump tables. */
-	.p2align 4
-.Lhandle_tail:
-	movl	%edx,%ecx
-	andl    $63&(~7),%ecx
-	jz 		.Lhandle_7
-	shrl	$3,%ecx
-	.p2align 4
-.Lloop_8:
-	decl   %ecx
-	movq  %rax,(%rdi)
-	leaq  8(%rdi),%rdi
-	jnz    .Lloop_8
-
-.Lhandle_7:
-	andl	$7,%edx
-	jz      .Lende
-	.p2align 4
-.Lloop_1:
-	decl    %edx
-	movb 	%al,(%rdi)
-	leaq	1(%rdi),%rdi
-	jnz     .Lloop_1
-
-.Lende:
-	movq	%r10,%rax
-	ret
-
-.Lbad_alignment:
-	cmpq $7,%rdx
-	jbe	.Lhandle_7
-	movq %rax,(%rdi)	/* unaligned store */
-	movq $8,%r8
-	subq %r9,%r8
-	addq %r8,%rdi
-	subq %r8,%rdx
-	jmp .Lafter_bad_alignment
-.Lfinal:
-ENDPROC(memset_orig)
diff --git a/tools/bpf/nettrace/.gitignore b/tools/bpf/nettrace/.gitignore
deleted file mode 100644
index 2bf186b49c042fab059646fe9fdf2fa420aa7306..0000000000000000000000000000000000000000
--- a/tools/bpf/nettrace/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-.vscode
-__pycache__
-*.pyc
-output
-vmlinux.h
-*.skel.h
-*.o
-.*
\ No newline at end of file
diff --git a/tools/bpf/nettrace/droptrace/.gitignore b/tools/bpf/nettrace/droptrace/.gitignore
deleted file mode 100644
index 7ea1ffdc6421eb3138c6c8ba4d2fa95ebd9da350..0000000000000000000000000000000000000000
--- a/tools/bpf/nettrace/droptrace/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-drop_reason.h
-droptrace
diff --git a/tools/bpf/nettrace/droptrace/README.md b/tools/bpf/nettrace/droptrace/README.md
deleted file mode 120000
index 0153b1754a1b56a6267f91ac8de13576d74a1d11..0000000000000000000000000000000000000000
--- a/tools/bpf/nettrace/droptrace/README.md
+++ /dev/null
@@ -1 +0,0 @@
-../docs/droptrace.md
\ No newline at end of file
diff --git a/tools/bpf/nettrace/droptrace/README.md b/tools/bpf/nettrace/droptrace/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0153b1754a1b56a6267f91ac8de13576d74a1d11
--- /dev/null
+++ b/tools/bpf/nettrace/droptrace/README.md
@@ -0,0 +1 @@
+../docs/droptrace.md
\ No newline at end of file
diff --git a/tools/bpf/nettrace/nodetrace/.gitignore b/tools/bpf/nettrace/nodetrace/.gitignore
deleted file mode 100644
index ab89c8064b3b8bf1297630b239c04b4e69a6d855..0000000000000000000000000000000000000000
--- a/tools/bpf/nettrace/nodetrace/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-watch
-mark
\ No newline at end of file
diff --git a/tools/bpf/nettrace/src/README.md b/tools/bpf/nettrace/src/README.md
deleted file mode 120000
index c90c41fea9987f5f86dad77314bdab01af241427..0000000000000000000000000000000000000000
--- a/tools/bpf/nettrace/src/README.md
+++ /dev/null
@@ -1 +0,0 @@
-../docs/nettrace.md
\ No newline at end of file
diff --git a/tools/bpf/nettrace/src/README.md b/tools/bpf/nettrace/src/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c90c41fea9987f5f86dad77314bdab01af241427
--- /dev/null
+++ b/tools/bpf/nettrace/src/README.md
@@ -0,0 +1 @@
+../docs/nettrace.md
\ No newline at end of file
diff --git a/tools/bpf/nettrace/src/nettrace.py b/tools/bpf/nettrace/src/nettrace.py
old mode 100755
new mode 100644
diff --git a/tools/build/tests/run.sh b/tools/build/tests/run.sh
old mode 100755
new mode 100644
diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint
old mode 100755
new mode 100644
diff --git a/tools/hv/hv_get_dhcp_info.sh b/tools/hv/hv_get_dhcp_info.sh
old mode 100755
new mode 100644
diff --git a/tools/hv/hv_get_dns_info.sh b/tools/hv/hv_get_dns_info.sh
old mode 100755
new mode 100644
diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh
old mode 100755
new mode 100644
diff --git a/tools/image-builder/packages/eklet-agent/final.sh b/tools/image-builder/packages/eklet-agent/final.sh
old mode 100755
new mode 100644
diff --git a/tools/image-builder/packages/eklet-network/config.sh b/tools/image-builder/packages/eklet-network/config.sh
old mode 100755
new mode 100644
diff --git a/tools/image-builder/prepare.sh b/tools/image-builder/prepare.sh
old mode 100755
new mode 100644
diff --git a/tools/image-builder/rootfs/eks-common/etc/systemd/system/multi-user.target.wants/acpid.service b/tools/image-builder/rootfs/eks-common/etc/systemd/system/multi-user.target.wants/acpid.service
deleted file mode 120000
index 8cdbe883dacef82947694856aa5391973be1fbd6..0000000000000000000000000000000000000000
--- a/tools/image-builder/rootfs/eks-common/etc/systemd/system/multi-user.target.wants/acpid.service
+++ /dev/null
@@ -1 +0,0 @@
-/usr/lib/systemd/system/acpid.service
\ No newline at end of file
diff --git a/tools/image-builder/rootfs/eks-common/etc/systemd/system/multi-user.target.wants/acpid.service b/tools/image-builder/rootfs/eks-common/etc/systemd/system/multi-user.target.wants/acpid.service
new file mode 100644
index 0000000000000000000000000000000000000000..8cdbe883dacef82947694856aa5391973be1fbd6
--- /dev/null
+++ b/tools/image-builder/rootfs/eks-common/etc/systemd/system/multi-user.target.wants/acpid.service
@@ -0,0 +1 @@
+/usr/lib/systemd/system/acpid.service
\ No newline at end of file
diff --git a/tools/image-builder/rootfs/eks-common/usr/lib/systemd/system/local-fs.target.wants/tmp.mount b/tools/image-builder/rootfs/eks-common/usr/lib/systemd/system/local-fs.target.wants/tmp.mount
deleted file mode 120000
index cd6f2a2d2ed5e83ce5b469a2ba6fd7e9c447961a..0000000000000000000000000000000000000000
--- a/tools/image-builder/rootfs/eks-common/usr/lib/systemd/system/local-fs.target.wants/tmp.mount
+++ /dev/null
@@ -1 +0,0 @@
-../tmp.mount
\ No newline at end of file
diff --git a/tools/image-builder/rootfs/eks-common/usr/lib/systemd/system/local-fs.target.wants/tmp.mount b/tools/image-builder/rootfs/eks-common/usr/lib/systemd/system/local-fs.target.wants/tmp.mount
new file mode 100644
index 0000000000000000000000000000000000000000..cd6f2a2d2ed5e83ce5b469a2ba6fd7e9c447961a
--- /dev/null
+++ b/tools/image-builder/rootfs/eks-common/usr/lib/systemd/system/local-fs.target.wants/tmp.mount
@@ -0,0 +1 @@
+../tmp.mount
\ No newline at end of file
diff --git a/tools/image-builder/rootfs/eks-common/usr/lib/systemd/system/sysinit.target.wants/sys-kernel-tracing.mount b/tools/image-builder/rootfs/eks-common/usr/lib/systemd/system/sysinit.target.wants/sys-kernel-tracing.mount
deleted file mode 120000
index b9995eadf597570d13823129aabb5cc075530ec2..0000000000000000000000000000000000000000
--- a/tools/image-builder/rootfs/eks-common/usr/lib/systemd/system/sysinit.target.wants/sys-kernel-tracing.mount
+++ /dev/null
@@ -1 +0,0 @@
-../sys-kernel-tracing.mount
\ No newline at end of file
diff --git a/tools/image-builder/rootfs/eks-common/usr/lib/systemd/system/sysinit.target.wants/sys-kernel-tracing.mount b/tools/image-builder/rootfs/eks-common/usr/lib/systemd/system/sysinit.target.wants/sys-kernel-tracing.mount
new file mode 100644
index 0000000000000000000000000000000000000000..b9995eadf597570d13823129aabb5cc075530ec2
--- /dev/null
+++ b/tools/image-builder/rootfs/eks-common/usr/lib/systemd/system/sysinit.target.wants/sys-kernel-tracing.mount
@@ -0,0 +1 @@
+../sys-kernel-tracing.mount
\ No newline at end of file
diff --git a/tools/image-builder/rootfs/eks-dev/etc/systemd/system/multi-user.target.wants/acpid.service b/tools/image-builder/rootfs/eks-dev/etc/systemd/system/multi-user.target.wants/acpid.service
deleted file mode 120000
index 8cdbe883dacef82947694856aa5391973be1fbd6..0000000000000000000000000000000000000000
--- a/tools/image-builder/rootfs/eks-dev/etc/systemd/system/multi-user.target.wants/acpid.service
+++ /dev/null
@@ -1 +0,0 @@
-/usr/lib/systemd/system/acpid.service
\ No newline at end of file
diff --git a/tools/image-builder/rootfs/eks-dev/etc/systemd/system/multi-user.target.wants/acpid.service b/tools/image-builder/rootfs/eks-dev/etc/systemd/system/multi-user.target.wants/acpid.service
new file mode 100644
index 0000000000000000000000000000000000000000..8cdbe883dacef82947694856aa5391973be1fbd6
--- /dev/null
+++ b/tools/image-builder/rootfs/eks-dev/etc/systemd/system/multi-user.target.wants/acpid.service
@@ -0,0 +1 @@
+/usr/lib/systemd/system/acpid.service
\ No newline at end of file
diff --git a/tools/image-builder/rootfs/eks-dev/usr/lib/systemd/system/local-fs.target.wants/tmp.mount b/tools/image-builder/rootfs/eks-dev/usr/lib/systemd/system/local-fs.target.wants/tmp.mount
deleted file mode 120000
index cd6f2a2d2ed5e83ce5b469a2ba6fd7e9c447961a..0000000000000000000000000000000000000000
--- a/tools/image-builder/rootfs/eks-dev/usr/lib/systemd/system/local-fs.target.wants/tmp.mount
+++ /dev/null
@@ -1 +0,0 @@
-../tmp.mount
\ No newline at end of file
diff --git a/tools/image-builder/rootfs/eks-dev/usr/lib/systemd/system/local-fs.target.wants/tmp.mount b/tools/image-builder/rootfs/eks-dev/usr/lib/systemd/system/local-fs.target.wants/tmp.mount
new file mode 100644
index 0000000000000000000000000000000000000000..cd6f2a2d2ed5e83ce5b469a2ba6fd7e9c447961a
--- /dev/null
+++ b/tools/image-builder/rootfs/eks-dev/usr/lib/systemd/system/local-fs.target.wants/tmp.mount
@@ -0,0 +1 @@
+../tmp.mount
\ No newline at end of file
diff --git a/tools/image-builder/rootfs/eks-dev/usr/lib/systemd/system/sysinit.target.wants/sys-kernel-tracing.mount b/tools/image-builder/rootfs/eks-dev/usr/lib/systemd/system/sysinit.target.wants/sys-kernel-tracing.mount
deleted file mode 120000
index b9995eadf597570d13823129aabb5cc075530ec2..0000000000000000000000000000000000000000
--- a/tools/image-builder/rootfs/eks-dev/usr/lib/systemd/system/sysinit.target.wants/sys-kernel-tracing.mount
+++ /dev/null
@@ -1 +0,0 @@
-../sys-kernel-tracing.mount
\ No newline at end of file
diff --git a/tools/image-builder/rootfs/eks-dev/usr/lib/systemd/system/sysinit.target.wants/sys-kernel-tracing.mount b/tools/image-builder/rootfs/eks-dev/usr/lib/systemd/system/sysinit.target.wants/sys-kernel-tracing.mount
new file mode 100644
index 0000000000000000000000000000000000000000..b9995eadf597570d13823129aabb5cc075530ec2
--- /dev/null
+++ b/tools/image-builder/rootfs/eks-dev/usr/lib/systemd/system/sysinit.target.wants/sys-kernel-tracing.mount
@@ -0,0 +1 @@
+../sys-kernel-tracing.mount
\ No newline at end of file
diff --git a/tools/image-builder/scripts/build_packages b/tools/image-builder/scripts/build_packages
old mode 100755
new mode 100644
diff --git a/tools/image-builder/scripts/download_packages b/tools/image-builder/scripts/download_packages
old mode 100755
new mode 100644
diff --git a/tools/image-builder/scripts/env b/tools/image-builder/scripts/env
old mode 100755
new mode 100644
diff --git a/tools/image-builder/scripts/generate_image b/tools/image-builder/scripts/generate_image
old mode 100755
new mode 100644
diff --git a/tools/image-builder/scripts/generate_sshkey b/tools/image-builder/scripts/generate_sshkey
old mode 100755
new mode 100644
diff --git a/tools/image-builder/scripts/setup_host b/tools/image-builder/scripts/setup_host
old mode 100755
new mode 100644
diff --git a/tools/image-builder/scripts/setup_toolchain b/tools/image-builder/scripts/setup_toolchain
old mode 100755
new mode 100644
diff --git a/tools/image-builder/scripts/test_rootfs b/tools/image-builder/scripts/test_rootfs
old mode 100755
new mode 100644
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
old mode 100755
new mode 100644
diff --git a/tools/leds/get_led_device_info.sh b/tools/leds/get_led_device_info.sh
old mode 100755
new mode 100644
diff --git a/tools/lib/lockdep/lockdep b/tools/lib/lockdep/lockdep
old mode 100755
new mode 100644
diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
old mode 100755
new mode 100644
diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus
deleted file mode 100644
index 415248fb669902a9b1c77e0abf905fabfc9634f5..0000000000000000000000000000000000000000
--- a/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus
+++ /dev/null
@@ -1,42 +0,0 @@
-C Z6.0+pooncelock+poonceLock+pombonce
-
-(*
- * Result: Never
- *
- * This litmus test demonstrates how smp_mb__after_spinlock() may be
- * used to ensure that accesses in different critical sections for a
- * given lock running on different CPUs are nevertheless seen in order
- * by CPUs not holding that lock.
- *)
-
-{}
-
-P0(int *x, int *y, spinlock_t *mylock)
-{
-	spin_lock(mylock);
-	WRITE_ONCE(*x, 1);
-	WRITE_ONCE(*y, 1);
-	spin_unlock(mylock);
-}
-
-P1(int *y, int *z, spinlock_t *mylock)
-{
-	int r0;
-
-	spin_lock(mylock);
-	smp_mb__after_spinlock();
-	r0 = READ_ONCE(*y);
-	WRITE_ONCE(*z, 1);
-	spin_unlock(mylock);
-}
-
-P2(int *x, int *z)
-{
-	int r1;
-
-	WRITE_ONCE(*z, 2);
-	smp_mb();
-	r1 = READ_ONCE(*x);
-}
-
-exists (1:r0=1 /\ z=2 /\ 2:r1=0)
diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh
old mode 100755
new mode 100644
diff --git a/tools/memory-model/scripts/checkghlitmus.sh b/tools/memory-model/scripts/checkghlitmus.sh
old mode 100755
new mode 100644
diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh
old mode 100755
new mode 100644
diff --git a/tools/memory-model/scripts/checklitmushist.sh b/tools/memory-model/scripts/checklitmushist.sh
old mode 100755
new mode 100644
diff --git a/tools/memory-model/scripts/cmplitmushist.sh b/tools/memory-model/scripts/cmplitmushist.sh
old mode 100755
new mode 100644
diff --git a/tools/memory-model/scripts/initlitmushist.sh b/tools/memory-model/scripts/initlitmushist.sh
old mode 100755
new mode 100644
diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh
old mode 100755
new mode 100644
diff --git a/tools/memory-model/scripts/newlitmushist.sh b/tools/memory-model/scripts/newlitmushist.sh
old mode 100755
new mode 100644
diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh
old mode 100755
new mode 100644
diff --git a/tools/memory-model/scripts/runlitmushist.sh b/tools/memory-model/scripts/runlitmushist.sh
old mode 100755
new mode 100644
diff --git a/tools/nfsd/inject_fault.sh b/tools/nfsd/inject_fault.sh
old mode 100755
new mode 100644
diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/arch/arm/tests/regs_load.S b/tools/perf/arch/arm/tests/regs_load.S
deleted file mode 100644
index 6e2495cc4517191656784a9bf61a43653901e7cc..0000000000000000000000000000000000000000
--- a/tools/perf/arch/arm/tests/regs_load.S
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-
-#define R0 0x00
-#define R1 0x08
-#define R2 0x10
-#define R3 0x18
-#define R4 0x20
-#define R5 0x28
-#define R6 0x30
-#define R7 0x38
-#define R8 0x40
-#define R9 0x48
-#define SL 0x50
-#define FP 0x58
-#define IP 0x60
-#define SP 0x68
-#define LR 0x70
-#define PC 0x78
-
-/*
- * Implementation of void perf_regs_load(u64 *regs);
- *
- * This functions fills in the 'regs' buffer from the actual registers values,
- * in the way the perf built-in unwinding test expects them:
- * - the PC at the time at the call to this function. Since this function
- *   is called using a bl instruction, the PC value is taken from LR.
- * The built-in unwinding test then unwinds the call stack from the dwarf
- * information in unwind__get_entries.
- *
- * Notes:
- * - the 8 bytes stride in the registers offsets comes from the fact
- * that the registers are stored in an u64 array (u64 *regs),
- * - the regs buffer needs to be zeroed before the call to this function,
- * in this case using a calloc in dwarf-unwind.c.
- */
-
-.text
-.type perf_regs_load,%function
-ENTRY(perf_regs_load)
-	str r0, [r0, #R0]
-	str r1, [r0, #R1]
-	str r2, [r0, #R2]
-	str r3, [r0, #R3]
-	str r4, [r0, #R4]
-	str r5, [r0, #R5]
-	str r6, [r0, #R6]
-	str r7, [r0, #R7]
-	str r8, [r0, #R8]
-	str r9, [r0, #R9]
-	str sl, [r0, #SL]
-	str fp, [r0, #FP]
-	str ip, [r0, #IP]
-	str sp, [r0, #SP]
-	str lr, [r0, #LR]
-	str lr, [r0, #PC]	// store pc as lr in order to skip the call
-	                        //  to this function
-	mov pc, lr
-ENDPROC(perf_regs_load)
diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
old mode 100755
new mode 100644
diff --git a/tools/perf/arch/arm64/tests/regs_load.S b/tools/perf/arch/arm64/tests/regs_load.S
deleted file mode 100644
index 07042511dca925fc3cbf3ce9594476bce0d19b47..0000000000000000000000000000000000000000
--- a/tools/perf/arch/arm64/tests/regs_load.S
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-
-.text
-.type perf_regs_load,%function
-#define STR_REG(r)	str x##r, [x0, 8 * r]
-#define LDR_REG(r)	ldr x##r, [x0, 8 * r]
-#define SP	(8 * 31)
-#define PC	(8 * 32)
-ENTRY(perf_regs_load)
-	STR_REG(0)
-	STR_REG(1)
-	STR_REG(2)
-	STR_REG(3)
-	STR_REG(4)
-	STR_REG(5)
-	STR_REG(6)
-	STR_REG(7)
-	STR_REG(8)
-	STR_REG(9)
-	STR_REG(10)
-	STR_REG(11)
-	STR_REG(12)
-	STR_REG(13)
-	STR_REG(14)
-	STR_REG(15)
-	STR_REG(16)
-	STR_REG(17)
-	STR_REG(18)
-	STR_REG(19)
-	STR_REG(20)
-	STR_REG(21)
-	STR_REG(22)
-	STR_REG(23)
-	STR_REG(24)
-	STR_REG(25)
-	STR_REG(26)
-	STR_REG(27)
-	STR_REG(28)
-	STR_REG(29)
-	STR_REG(30)
-	mov x1, sp
-	str x1, [x0, #SP]
-	str x30, [x0, #PC]
-	LDR_REG(1)
-	ret
-ENDPROC(perf_regs_load)
diff --git a/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl b/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
old mode 100755
new mode 100644
diff --git a/tools/perf/arch/powerpc/tests/regs_load.S b/tools/perf/arch/powerpc/tests/regs_load.S
deleted file mode 100644
index 36a20b0033f97c00a068435ecfda38d7fa73c3c1..0000000000000000000000000000000000000000
--- a/tools/perf/arch/powerpc/tests/regs_load.S
+++ /dev/null
@@ -1,95 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-
-/* Offset is based on macros from arch/powerpc/include/uapi/asm/ptrace.h. */
-#define R0	 0
-#define R1	 1 * 8
-#define R2	 2 * 8
-#define R3	 3 * 8
-#define R4	 4 * 8
-#define R5	 5 * 8
-#define R6	 6 * 8
-#define R7	 7 * 8
-#define R8	 8 * 8
-#define R9	 9 * 8
-#define R10	10 * 8
-#define R11	11 * 8
-#define R12	12 * 8
-#define R13	13 * 8
-#define R14	14 * 8
-#define R15	15 * 8
-#define R16	16 * 8
-#define R17	17 * 8
-#define R18	18 * 8
-#define R19	19 * 8
-#define R20	20 * 8
-#define R21	21 * 8
-#define R22	22 * 8
-#define R23	23 * 8
-#define R24	24 * 8
-#define R25	25 * 8
-#define R26	26 * 8
-#define R27	27 * 8
-#define R28	28 * 8
-#define R29	29 * 8
-#define R30	30 * 8
-#define R31	31 * 8
-#define NIP	32 * 8
-#define CTR	35 * 8
-#define LINK	36 * 8
-#define XER	37 * 8
-
-.globl perf_regs_load
-perf_regs_load:
-	std 0, R0(3)
-	std 1, R1(3)
-	std 2, R2(3)
-	std 3, R3(3)
-	std 4, R4(3)
-	std 5, R5(3)
-	std 6, R6(3)
-	std 7, R7(3)
-	std 8, R8(3)
-	std 9, R9(3)
-	std 10, R10(3)
-	std 11, R11(3)
-	std 12, R12(3)
-	std 13, R13(3)
-	std 14, R14(3)
-	std 15, R15(3)
-	std 16, R16(3)
-	std 17, R17(3)
-	std 18, R18(3)
-	std 19, R19(3)
-	std 20, R20(3)
-	std 21, R21(3)
-	std 22, R22(3)
-	std 23, R23(3)
-	std 24, R24(3)
-	std 25, R25(3)
-	std 26, R26(3)
-	std 27, R27(3)
-	std 28, R28(3)
-	std 29, R29(3)
-	std 30, R30(3)
-	std 31, R31(3)
-
-	/* store NIP */
-	mflr 4
-	std 4, NIP(3)
-
-	/* Store LR */
-	std 4, LINK(3)
-
-	/* Store XER */
-	mfxer 4
-	std 4, XER(3)
-
-	/* Store CTR */
-	mfctr 4
-	std 4, CTR(3)
-
-	/* Restore original value of r4 */
-	ld 4, R4(3)
-
-	blr
diff --git a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
old mode 100755
new mode 100644
diff --git a/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh b/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/arch/x86/tests/regs_load.S b/tools/perf/arch/x86/tests/regs_load.S
deleted file mode 100644
index bbe5a0d16e51083d5432c30f6a45dc8fcec7211f..0000000000000000000000000000000000000000
--- a/tools/perf/arch/x86/tests/regs_load.S
+++ /dev/null
@@ -1,99 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-
-#define AX	 0
-#define BX	 1 * 8
-#define CX	 2 * 8
-#define DX	 3 * 8
-#define SI	 4 * 8
-#define DI	 5 * 8
-#define BP	 6 * 8
-#define SP	 7 * 8
-#define IP	 8 * 8
-#define FLAGS	 9 * 8
-#define CS	10 * 8
-#define SS	11 * 8
-#define DS	12 * 8
-#define ES	13 * 8
-#define FS	14 * 8
-#define GS	15 * 8
-#define R8	16 * 8
-#define R9	17 * 8
-#define R10	18 * 8
-#define R11	19 * 8
-#define R12	20 * 8
-#define R13	21 * 8
-#define R14	22 * 8
-#define R15	23 * 8
-
-.text
-#ifdef HAVE_ARCH_X86_64_SUPPORT
-ENTRY(perf_regs_load)
-	movq %rax, AX(%rdi)
-	movq %rbx, BX(%rdi)
-	movq %rcx, CX(%rdi)
-	movq %rdx, DX(%rdi)
-	movq %rsi, SI(%rdi)
-	movq %rdi, DI(%rdi)
-	movq %rbp, BP(%rdi)
-
-	leaq 8(%rsp), %rax /* exclude this call.  */
-	movq %rax, SP(%rdi)
-
-	movq 0(%rsp), %rax
-	movq %rax, IP(%rdi)
-
-	movq $0, FLAGS(%rdi)
-	movq $0, CS(%rdi)
-	movq $0, SS(%rdi)
-	movq $0, DS(%rdi)
-	movq $0, ES(%rdi)
-	movq $0, FS(%rdi)
-	movq $0, GS(%rdi)
-
-	movq %r8,  R8(%rdi)
-	movq %r9,  R9(%rdi)
-	movq %r10, R10(%rdi)
-	movq %r11, R11(%rdi)
-	movq %r12, R12(%rdi)
-	movq %r13, R13(%rdi)
-	movq %r14, R14(%rdi)
-	movq %r15, R15(%rdi)
-	ret
-ENDPROC(perf_regs_load)
-#else
-ENTRY(perf_regs_load)
-	push %edi
-	movl 8(%esp), %edi
-	movl %eax, AX(%edi)
-	movl %ebx, BX(%edi)
-	movl %ecx, CX(%edi)
-	movl %edx, DX(%edi)
-	movl %esi, SI(%edi)
-	pop %eax
-	movl %eax, DI(%edi)
-	movl %ebp, BP(%edi)
-
-	leal 4(%esp), %eax /* exclude this call.  */
-	movl %eax, SP(%edi)
-
-	movl 0(%esp), %eax
-	movl %eax, IP(%edi)
-
-	movl $0, FLAGS(%edi)
-	movl $0, CS(%edi)
-	movl $0, SS(%edi)
-	movl $0, DS(%edi)
-	movl $0, ES(%edi)
-	movl $0, FS(%edi)
-	movl $0, GS(%edi)
-	ret
-ENDPROC(perf_regs_load)
-#endif
-
-/*
- * We need to provide note.GNU-stack section, saying that we want
- * NOT executable stack. Otherwise the final linking will assume that
- * the ELF stack should not be restricted at all and set it RWX.
- */
-.section .note.GNU-stack,"",@progbits
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
deleted file mode 100644
index 9ad015a1e20247a001e115284def3902e036bf67..0000000000000000000000000000000000000000
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/* Various wrappers to make the kernel .S file build in user-space: */
-
-#define memcpy MEMCPY /* don't hide glibc's memcpy() */
-#define altinstr_replacement text
-#define globl p2align 4; .globl
-#define _ASM_EXTABLE_FAULT(x, y)
-#define _ASM_EXTABLE(x, y)
-
-#include "../../arch/x86/lib/memcpy_64.S"
-/*
- * We need to provide note.GNU-stack section, saying that we want
- * NOT executable stack. Otherwise the final linking will assume that
- * the ELF stack should not be restricted at all and set it RWX.
- */
-.section .note.GNU-stack,"",@progbits
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
deleted file mode 100644
index d550bd5261629a07575c55814d7d8b87938afd30..0000000000000000000000000000000000000000
--- a/tools/perf/bench/mem-memset-x86-64-asm.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#define memset MEMSET /* don't hide glibc's memset() */
-#define altinstr_replacement text
-#define globl p2align 4; .globl
-#include "../../arch/x86/lib/memset_64.S"
-
-/*
- * We need to provide note.GNU-stack section, saying that we want
- * NOT executable stack. Otherwise the final linking will assume that
- * the ELF stack should not be restricted at all and set it RWX.
- */
-.section .note.GNU-stack,"",@progbits
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/python/tracepoint.py b/tools/perf/python/tracepoint.py
old mode 100755
new mode 100644
diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py
old mode 100755
new mode 100644
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py
old mode 100755
new mode 100644
diff --git a/tools/perf/scripts/python/bin/net_dropmonitor-record b/tools/perf/scripts/python/bin/net_dropmonitor-record
old mode 100755
new mode 100644
diff --git a/tools/perf/scripts/python/bin/net_dropmonitor-report b/tools/perf/scripts/python/bin/net_dropmonitor-report
old mode 100755
new mode 100644
diff --git a/tools/perf/scripts/python/bin/stackcollapse-record b/tools/perf/scripts/python/bin/stackcollapse-record
old mode 100755
new mode 100644
diff --git a/tools/perf/scripts/python/bin/stackcollapse-report b/tools/perf/scripts/python/bin/stackcollapse-report
old mode 100755
new mode 100644
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
old mode 100755
new mode 100644
diff --git a/tools/perf/scripts/python/net_dropmonitor.py b/tools/perf/scripts/python/net_dropmonitor.py
old mode 100755
new mode 100644
diff --git a/tools/perf/scripts/python/stackcollapse.py b/tools/perf/scripts/python/stackcollapse.py
old mode 100755
new mode 100644
diff --git a/tools/perf/tests/perf-targz-src-pkg b/tools/perf/tests/perf-targz-src-pkg
old mode 100755
new mode 100644
diff --git a/tools/perf/tests/shell/probe_vfs_getname.sh b/tools/perf/tests/shell/probe_vfs_getname.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/drm_ioctl.sh b/tools/perf/trace/beauty/drm_ioctl.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/fadvise.sh b/tools/perf/trace/beauty/fadvise.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/fsconfig.sh b/tools/perf/trace/beauty/fsconfig.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/fsmount.sh b/tools/perf/trace/beauty/fsmount.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/fspick.sh b/tools/perf/trace/beauty/fspick.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/kcmp_type.sh b/tools/perf/trace/beauty/kcmp_type.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/kvm_ioctl.sh b/tools/perf/trace/beauty/kvm_ioctl.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/madvise_behavior.sh b/tools/perf/trace/beauty/madvise_behavior.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/mount_flags.sh b/tools/perf/trace/beauty/mount_flags.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/move_mount_flags.sh b/tools/perf/trace/beauty/move_mount_flags.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/perf_ioctl.sh b/tools/perf/trace/beauty/perf_ioctl.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/rename_flags.sh b/tools/perf/trace/beauty/rename_flags.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/socket_ipproto.sh b/tools/perf/trace/beauty/socket_ipproto.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/sync_file_range.sh b/tools/perf/trace/beauty/sync_file_range.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/usbdevfs_ioctl.sh b/tools/perf/trace/beauty/usbdevfs_ioctl.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/trace/beauty/x86_arch_prctl.sh b/tools/perf/trace/beauty/x86_arch_prctl.sh
old mode 100755
new mode 100644
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
old mode 100755
new mode 100644
diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh
old mode 100755
new mode 100644
diff --git a/tools/power/cpupower/utils/version-gen.sh b/tools/power/cpupower/utils/version-gen.sh
old mode 100755
new mode 100644
diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py
old mode 100755
new mode 100644
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
old mode 100755
new mode 100644
diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
old mode 100755
new mode 100644
diff --git a/tools/testing/ktest/compare-ktest-sample.pl b/tools/testing/ktest/compare-ktest-sample.pl
old mode 100755
new mode 100644
diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl
old mode 100755
new mode 100644
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/android/ion/ion_test.sh b/tools/testing/selftests/android/ion/ion_test.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/android/run.sh b/tools/testing/selftests/android/run.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/arm64/run_tags_test.sh b/tools/testing/selftests/arm64/run_tags_test.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh b/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh b/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/test_xdping.sh b/tools/testing/selftests/bpf/test_xdping.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/with_addr.sh b/tools/testing/selftests/bpf/with_addr.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/bpf/with_tunnels.sh b/tools/testing/selftests/bpf/with_tunnels.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/cpufreq/cpu.sh b/tools/testing/selftests/cpufreq/cpu.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/cpufreq/governor.sh b/tools/testing/selftests/cpufreq/governor.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/cpufreq/module.sh b/tools/testing/selftests/cpufreq/module.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/cpufreq/special-tests.sh b/tools/testing/selftests/cpufreq/special-tests.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/gpu/drm_mm.sh b/tools/testing/selftests/drivers/gpu/drm_mm.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/gpu/i915.sh b/tools/testing/selftests/drivers/gpu/i915.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh b/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/futex/run.sh b/tools/testing/selftests/futex/run.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh b/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/gpio/gpio-mockup.sh b/tools/testing/selftests/gpio/gpio-mockup.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/ir/ir_loopback.sh b/tools/testing/selftests/ir/ir_loopback.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/kexec/kexec_common_lib.sh b/tools/testing/selftests/kexec/kexec_common_lib.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/kexec/test_kexec_file_load.sh b/tools/testing/selftests/kexec/test_kexec_file_load.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/kexec/test_kexec_load.sh b/tools/testing/selftests/kexec/test_kexec_load.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/kselftest/prefix.pl b/tools/testing/selftests/kselftest/prefix.pl
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/kselftest_module.sh b/tools/testing/selftests/kselftest_module.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/lib/strscpy.sh b/tools/testing/selftests/lib/strscpy.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/livepatch/test-callbacks.sh b/tools/testing/selftests/livepatch/test-callbacks.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/livepatch/test-shadow-vars.sh b/tools/testing/selftests/livepatch/test-shadow-vars.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/locking/ww_mutex.sh b/tools/testing/selftests/locking/ww_mutex.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/media_tests/bind_unbind_sample.sh b/tools/testing/selftests/media_tests/bind_unbind_sample.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/media_tests/media_dev_allocator.sh b/tools/testing/selftests/media_tests/media_dev_allocator.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/media_tests/open_loop_test.sh b/tools/testing/selftests/media_tests/open_loop_test.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/memfd/run_fuse_test.sh b/tools/testing/selftests/memfd/run_fuse_test.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/memfd/run_hugetlbfs_test.sh b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/mount/run_tests.sh b/tools/testing/selftests/mount/run_tests.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/bridge_port_isolation.sh b/tools/testing/selftests/net/forwarding/bridge_port_isolation.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh b/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/ipip_flat_gre.sh b/tools/testing/selftests/net/forwarding/ipip_flat_gre.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/ipip_flat_gre_key.sh b/tools/testing/selftests/net/forwarding/ipip_flat_gre_key.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/ipip_flat_gre_keys.sh b/tools/testing/selftests/net/forwarding/ipip_flat_gre_keys.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/ipip_hier_gre.sh b/tools/testing/selftests/net/forwarding/ipip_hier_gre.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/ipip_hier_gre_key.sh b/tools/testing/selftests/net/forwarding/ipip_hier_gre_key.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/ipip_hier_gre_keys.sh b/tools/testing/selftests/net/forwarding/ipip_hier_gre_keys.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/loopback.sh b/tools/testing/selftests/net/forwarding/loopback.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/router_bridge.sh b/tools/testing/selftests/net/forwarding/router_bridge.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/router_broadcast.sh b/tools/testing/selftests/net/forwarding/router_broadcast.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/router_vid_1.sh b/tools/testing/selftests/net/forwarding/router_vid_1.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_router.sh b/tools/testing/selftests/net/forwarding/tc_flower_router.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/tc_vlan_modify.sh b/tools/testing/selftests/net/forwarding/tc_vlan_modify.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/in_netns.sh b/tools/testing/selftests/net/in_netns.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/ip6_gre_headroom.sh b/tools/testing/selftests/net/ip6_gre_headroom.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/ip_defrag.sh b/tools/testing/selftests/net/ip_defrag.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/ipv6_flowlabel.sh b/tools/testing/selftests/net/ipv6_flowlabel.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/l2tp.sh b/tools/testing/selftests/net/l2tp.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/reuseport_addr_any.sh b/tools/testing/selftests/net/reuseport_addr_any.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/route_localnet.sh b/tools/testing/selftests/net/route_localnet.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.sh b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/test_blackhole_dev.sh b/tools/testing/selftests/net/test_blackhole_dev.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/test_bpf.sh b/tools/testing/selftests/net/test_bpf.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/netfilter/bridge_brouter.sh b/tools/testing/selftests/netfilter/bridge_brouter.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.sh b/tools/testing/selftests/networking/timestamping/txtimestamp.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S b/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S
deleted file mode 100644
index 3363b86407d6f4d1d54330d8769139c890b0a375..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S
+++ /dev/null
@@ -1,24 +0,0 @@
-#include <asm/ppc_asm.h>
-
-_GLOBAL(copy_tofrom_user_reference)
-	cmpdi	r5,0
-	beq	4f
-
-	mtctr	r5
-
-1:	lbz	r6,0(r4)
-2:	stb	r6,0(r3)
-	addi	r3,r3,1
-	addi	r4,r4,1
-	bdnz	1b
-
-3:	mfctr	r3
-	blr
-
-4:	mr	r3,r5
-	blr
-
-.section __ex_table,"a"
-	.llong	1b,3b
-	.llong	2b,3b
-.text
diff --git a/tools/testing/selftests/powerpc/copyloops/copyuser_64.S b/tools/testing/selftests/powerpc/copyloops/copyuser_64.S
deleted file mode 120000
index f1c418a2521ae4e073758bc17c957ce54fd8b30a..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/copyloops/copyuser_64.S
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/lib/copyuser_64.S
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S b/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S
deleted file mode 120000
index 478689598298317f7503e4f927c9970901a789c7..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/lib/copyuser_power7.S
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_64.S
deleted file mode 120000
index cce33fb6f9d8938b22372db810ff86980fc7bf56..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/copyloops/memcpy_64.S
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/lib/memcpy_64.S
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
deleted file mode 120000
index f0feef3062f63d797e20589aff957aef9032684c..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/lib/memcpy_mcsafe_64.S
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S b/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S
deleted file mode 120000
index 0d6fbfaf3d5973634dcdbafbd0e63498459da0fd..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/lib/memcpy_power7.S
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/stubs.S b/tools/testing/selftests/powerpc/copyloops/stubs.S
deleted file mode 100644
index ec8bcf2bf1c274c1274bbe49f759170a90dd6e12..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/copyloops/stubs.S
+++ /dev/null
@@ -1,19 +0,0 @@
-#include <asm/ppc_asm.h>
-
-FUNC_START(enter_vmx_usercopy)
-	li	r3,1
-	blr
-
-FUNC_START(exit_vmx_usercopy)
-	li	r3,0
-	blr
-
-FUNC_START(enter_vmx_ops)
-	li	r3,1
-	blr
-
-FUNC_START(exit_vmx_ops)
-	blr
-
-FUNC_START(__copy_tofrom_user_base)
-	blr
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/powerpc/lib/reg.S b/tools/testing/selftests/powerpc/lib/reg.S
deleted file mode 100644
index 9304ea7d59b94d224d9cc1fbffd98f99059c098c..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/lib/reg.S
+++ /dev/null
@@ -1,393 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * test helper assembly functions
- *
- * Copyright (C) 2016 Simon Guo, IBM Corporation.
- */
-#include <ppc-asm.h>
-#include "reg.h"
-
-
-/* Non volatile GPR - unsigned long buf[18] */
-FUNC_START(load_gpr)
-	ld	14, 0*8(3)
-	ld	15, 1*8(3)
-	ld	16, 2*8(3)
-	ld	17, 3*8(3)
-	ld	18, 4*8(3)
-	ld	19, 5*8(3)
-	ld	20, 6*8(3)
-	ld	21, 7*8(3)
-	ld	22, 8*8(3)
-	ld	23, 9*8(3)
-	ld	24, 10*8(3)
-	ld	25, 11*8(3)
-	ld	26, 12*8(3)
-	ld	27, 13*8(3)
-	ld	28, 14*8(3)
-	ld	29, 15*8(3)
-	ld	30, 16*8(3)
-	ld	31, 17*8(3)
-	blr
-FUNC_END(load_gpr)
-
-FUNC_START(store_gpr)
-	std	14, 0*8(3)
-	std	15, 1*8(3)
-	std	16, 2*8(3)
-	std	17, 3*8(3)
-	std	18, 4*8(3)
-	std	19, 5*8(3)
-	std	20, 6*8(3)
-	std	21, 7*8(3)
-	std	22, 8*8(3)
-	std	23, 9*8(3)
-	std	24, 10*8(3)
-	std	25, 11*8(3)
-	std	26, 12*8(3)
-	std	27, 13*8(3)
-	std	28, 14*8(3)
-	std	29, 15*8(3)
-	std	30, 16*8(3)
-	std	31, 17*8(3)
-	blr
-FUNC_END(store_gpr)
-
-/* Single Precision Float - float buf[32] */
-FUNC_START(load_fpr_single_precision)
-	lfs 0, 0*4(3)
-	lfs 1, 1*4(3)
-	lfs 2, 2*4(3)
-	lfs 3, 3*4(3)
-	lfs 4, 4*4(3)
-	lfs 5, 5*4(3)
-	lfs 6, 6*4(3)
-	lfs 7, 7*4(3)
-	lfs 8, 8*4(3)
-	lfs 9, 9*4(3)
-	lfs 10, 10*4(3)
-	lfs 11, 11*4(3)
-	lfs 12, 12*4(3)
-	lfs 13, 13*4(3)
-	lfs 14, 14*4(3)
-	lfs 15, 15*4(3)
-	lfs 16, 16*4(3)
-	lfs 17, 17*4(3)
-	lfs 18, 18*4(3)
-	lfs 19, 19*4(3)
-	lfs 20, 20*4(3)
-	lfs 21, 21*4(3)
-	lfs 22, 22*4(3)
-	lfs 23, 23*4(3)
-	lfs 24, 24*4(3)
-	lfs 25, 25*4(3)
-	lfs 26, 26*4(3)
-	lfs 27, 27*4(3)
-	lfs 28, 28*4(3)
-	lfs 29, 29*4(3)
-	lfs 30, 30*4(3)
-	lfs 31, 31*4(3)
-	blr
-FUNC_END(load_fpr_single_precision)
-
-/* Single Precision Float - float buf[32] */
-FUNC_START(store_fpr_single_precision)
-	stfs 0, 0*4(3)
-	stfs 1, 1*4(3)
-	stfs 2, 2*4(3)
-	stfs 3, 3*4(3)
-	stfs 4, 4*4(3)
-	stfs 5, 5*4(3)
-	stfs 6, 6*4(3)
-	stfs 7, 7*4(3)
-	stfs 8, 8*4(3)
-	stfs 9, 9*4(3)
-	stfs 10, 10*4(3)
-	stfs 11, 11*4(3)
-	stfs 12, 12*4(3)
-	stfs 13, 13*4(3)
-	stfs 14, 14*4(3)
-	stfs 15, 15*4(3)
-	stfs 16, 16*4(3)
-	stfs 17, 17*4(3)
-	stfs 18, 18*4(3)
-	stfs 19, 19*4(3)
-	stfs 20, 20*4(3)
-	stfs 21, 21*4(3)
-	stfs 22, 22*4(3)
-	stfs 23, 23*4(3)
-	stfs 24, 24*4(3)
-	stfs 25, 25*4(3)
-	stfs 26, 26*4(3)
-	stfs 27, 27*4(3)
-	stfs 28, 28*4(3)
-	stfs 29, 29*4(3)
-	stfs 30, 30*4(3)
-	stfs 31, 31*4(3)
-	blr
-FUNC_END(store_fpr_single_precision)
-
-/* VMX/VSX registers - unsigned long buf[128] */
-FUNC_START(loadvsx)
-	lis	4, 0
-	LXVD2X	(0,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(1,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(2,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(3,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(4,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(5,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(6,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(7,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(8,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(9,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(10,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(11,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(12,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(13,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(14,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(15,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(16,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(17,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(18,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(19,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(20,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(21,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(22,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(23,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(24,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(25,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(26,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(27,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(28,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(29,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(30,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(31,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(32,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(33,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(34,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(35,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(36,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(37,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(38,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(39,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(40,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(41,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(42,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(43,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(44,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(45,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(46,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(47,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(48,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(49,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(50,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(51,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(52,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(53,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(54,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(55,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(56,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(57,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(58,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(59,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(60,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(61,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(62,(4),(3))
-	addi	4, 4, 16
-	LXVD2X	(63,(4),(3))
-	blr
-FUNC_END(loadvsx)
-
-FUNC_START(storevsx)
-	lis	4, 0
-	STXVD2X	(0,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(1,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(2,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(3,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(4,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(5,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(6,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(7,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(8,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(9,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(10,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(11,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(12,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(13,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(14,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(15,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(16,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(17,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(18,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(19,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(20,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(21,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(22,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(23,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(24,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(25,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(26,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(27,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(28,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(29,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(30,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(31,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(32,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(33,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(34,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(35,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(36,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(37,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(38,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(39,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(40,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(41,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(42,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(43,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(44,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(45,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(46,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(47,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(48,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(49,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(50,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(51,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(52,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(53,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(54,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(55,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(56,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(57,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(58,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(59,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(60,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(61,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(62,(4),(3))
-	addi	4, 4, 16
-	STXVD2X	(63,(4),(3))
-	blr
-FUNC_END(storevsx)
diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S b/tools/testing/selftests/powerpc/math/fpu_asm.S
deleted file mode 100644
index 9dc0c158f87168e80cd817729016b83f5de88c17..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/math/fpu_asm.S
+++ /dev/null
@@ -1,131 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright 2015, Cyril Bur, IBM Corp.
- */
-
-#include "basic_asm.h"
-#include "fpu_asm.h"
-
-FUNC_START(check_fpu)
-	mr r4,r3
-	li	r3,1 # assume a bad result
-	lfd	f0,0(r4)
-	fcmpu	cr1,f0,f14
-	bne	cr1,1f
-	lfd	f0,8(r4)
-	fcmpu	cr1,f0,f15
-	bne	cr1,1f
-	lfd	f0,16(r4)
-	fcmpu	cr1,f0,f16
-	bne	cr1,1f
-	lfd	f0,24(r4)
-	fcmpu	cr1,f0,f17
-	bne	cr1,1f
-	lfd	f0,32(r4)
-	fcmpu	cr1,f0,f18
-	bne	cr1,1f
-	lfd	f0,40(r4)
-	fcmpu	cr1,f0,f19
-	bne	cr1,1f
-	lfd	f0,48(r4)
-	fcmpu	cr1,f0,f20
-	bne	cr1,1f
-	lfd	f0,56(r4)
-	fcmpu	cr1,f0,f21
-	bne	cr1,1f
-	lfd	f0,64(r4)
-	fcmpu	cr1,f0,f22
-	bne	cr1,1f
-	lfd	f0,72(r4)
-	fcmpu	cr1,f0,f23
-	bne	cr1,1f
-	lfd	f0,80(r4)
-	fcmpu	cr1,f0,f24
-	bne	cr1,1f
-	lfd	f0,88(r4)
-	fcmpu	cr1,f0,f25
-	bne	cr1,1f
-	lfd	f0,96(r4)
-	fcmpu	cr1,f0,f26
-	bne	cr1,1f
-	lfd	f0,104(r4)
-	fcmpu	cr1,f0,f27
-	bne	cr1,1f
-	lfd	f0,112(r4)
-	fcmpu	cr1,f0,f28
-	bne	cr1,1f
-	lfd	f0,120(r4)
-	fcmpu	cr1,f0,f29
-	bne	cr1,1f
-	lfd	f0,128(r4)
-	fcmpu	cr1,f0,f30
-	bne	cr1,1f
-	lfd	f0,136(r4)
-	fcmpu	cr1,f0,f31
-	bne	cr1,1f
-	li	r3,0 # Success!!!
-1:	blr
-
-FUNC_START(test_fpu)
-	# r3 holds pointer to where to put the result of fork
-	# r4 holds pointer to the pid
-	# f14-f31 are non volatiles
-	PUSH_BASIC_STACK(256)
-	PUSH_FPU(256)
-	std	r3,STACK_FRAME_PARAM(0)(sp) # Address of darray
-	std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid
-
-	bl load_fpu
-	nop
-	li	r0,__NR_fork
-	sc
-
-	# pass the result of the fork to the caller
-	ld	r9,STACK_FRAME_PARAM(1)(sp)
-	std	r3,0(r9)
-
-	ld r3,STACK_FRAME_PARAM(0)(sp)
-	bl check_fpu
-	nop
-
-	POP_FPU(256)
-	POP_BASIC_STACK(256)
-	blr
-FUNC_END(test_fpu)
-
-# int preempt_fpu(double *darray, int *threads_running, int *running)
-# On starting will (atomically) decrement not_ready as a signal that the FPU
-# has been loaded with darray. Will proceed to check the validity of the FPU
-# registers while running is not zero.
-FUNC_START(preempt_fpu)
-	PUSH_BASIC_STACK(256)
-	PUSH_FPU(256)
-	std r3,STACK_FRAME_PARAM(0)(sp) # double *darray
-	std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
-	std r5,STACK_FRAME_PARAM(2)(sp) # int *running
-
-	bl load_fpu
-	nop
-
-	sync
-	# Atomic DEC
-	ld r3,STACK_FRAME_PARAM(1)(sp)
-1:	lwarx r4,0,r3
-	addi r4,r4,-1
-	stwcx. r4,0,r3
-	bne- 1b
-
-2:	ld r3,STACK_FRAME_PARAM(0)(sp)
-	bl check_fpu
-	nop
-	cmpdi r3,0
-	bne 3f
-	ld r4,STACK_FRAME_PARAM(2)(sp)
-	ld r5,0(r4)
-	cmpwi r5,0
-	bne 2b
-
-3:	POP_FPU(256)
-	POP_BASIC_STACK(256)
-	blr
-FUNC_END(preempt_fpu)
diff --git a/tools/testing/selftests/powerpc/math/vmx_asm.S b/tools/testing/selftests/powerpc/math/vmx_asm.S
deleted file mode 100644
index 11b0704c597dc00a0f193f2a4e799ffb99e3f42e..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/math/vmx_asm.S
+++ /dev/null
@@ -1,148 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright 2015, Cyril Bur, IBM Corp.
- */
-
-#include "basic_asm.h"
-#include "vmx_asm.h"
-
-# Should be safe from C, only touches r4, r5 and v0,v1,v2
-FUNC_START(check_vmx)
-	PUSH_BASIC_STACK(32)
-	mr r4,r3
-	li	r3,1 # assume a bad result
-	li	r5,0
-	lvx	v0,r5,r4
-	vcmpequd.	v1,v0,v20
-	vmr	v2,v1
-
-	addi	r5,r5,16
-	lvx	v0,r5,r4
-	vcmpequd.	v1,v0,v21
-	vand	v2,v2,v1
-
-	addi	r5,r5,16
-	lvx	v0,r5,r4
-	vcmpequd.	v1,v0,v22
-	vand	v2,v2,v1
-
-	addi	r5,r5,16
-	lvx	v0,r5,r4
-	vcmpequd.	v1,v0,v23
-	vand	v2,v2,v1
-
-	addi	r5,r5,16
-	lvx	v0,r5,r4
-	vcmpequd.	v1,v0,v24
-	vand	v2,v2,v1
-
-	addi	r5,r5,16
-	lvx	v0,r5,r4
-	vcmpequd.	v1,v0,v25
-	vand	v2,v2,v1
-
-	addi	r5,r5,16
-	lvx	v0,r5,r4
-	vcmpequd.	v1,v0,v26
-	vand	v2,v2,v1
-
-	addi	r5,r5,16
-	lvx	v0,r5,r4
-	vcmpequd.	v1,v0,v27
-	vand	v2,v2,v1
-
-	addi	r5,r5,16
-	lvx	v0,r5,r4
-	vcmpequd.	v1,v0,v28
-	vand	v2,v2,v1
-
-	addi	r5,r5,16
-	lvx	v0,r5,r4
-	vcmpequd.	v1,v0,v29
-	vand	v2,v2,v1
-
-	addi	r5,r5,16
-	lvx	v0,r5,r4
-	vcmpequd.	v1,v0,v30
-	vand	v2,v2,v1
-
-	addi	r5,r5,16
-	lvx	v0,r5,r4
-	vcmpequd.	v1,v0,v31
-	vand	v2,v2,v1
-
-	li	r5,STACK_FRAME_LOCAL(0,0)
-	stvx	v2,r5,sp
-	ldx	r0,r5,sp
-	cmpdi	r0,0xffffffffffffffff
-	bne	1f
-	li	r3,0
-1:	POP_BASIC_STACK(32)
-	blr
-FUNC_END(check_vmx)
-
-# Safe from C
-FUNC_START(test_vmx)
-	# r3 holds pointer to where to put the result of fork
-	# r4 holds pointer to the pid
-	# v20-v31 are non-volatile
-	PUSH_BASIC_STACK(512)
-	std	r3,STACK_FRAME_PARAM(0)(sp) # Address of varray
-	std r4,STACK_FRAME_PARAM(1)(sp) # address of pid
-	PUSH_VMX(STACK_FRAME_LOCAL(2,0),r4)
-
-	bl load_vmx
-	nop
-
-	li	r0,__NR_fork
-	sc
-	# Pass the result of fork back to the caller
-	ld	r9,STACK_FRAME_PARAM(1)(sp)
-	std	r3,0(r9)
-
-	ld r3,STACK_FRAME_PARAM(0)(sp)
-	bl check_vmx
-	nop
-
-	POP_VMX(STACK_FRAME_LOCAL(2,0),r4)
-	POP_BASIC_STACK(512)
-	blr
-FUNC_END(test_vmx)
-
-# int preempt_vmx(vector int *varray, int *threads_starting, int *running)
-# On starting will (atomically) decrement threads_starting as a signal that
-# the VMX have been loaded with varray. Will proceed to check the validity of
-# the VMX registers while running is not zero.
-FUNC_START(preempt_vmx)
-	PUSH_BASIC_STACK(512)
-	std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray
-	std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
-	std r5,STACK_FRAME_PARAM(2)(sp) # int *running
-	# VMX need to write to 16 byte aligned addresses, skip STACK_FRAME_LOCAL(3,0)
-	PUSH_VMX(STACK_FRAME_LOCAL(4,0),r4)
-
-	bl load_vmx
-	nop
-
-	sync
-	# Atomic DEC
-	ld r3,STACK_FRAME_PARAM(1)(sp)
-1:	lwarx r4,0,r3
-	addi r4,r4,-1
-	stwcx. r4,0,r3
-	bne- 1b
-
-2:	ld r3,STACK_FRAME_PARAM(0)(sp)
-	bl check_vmx
-	nop
-	cmpdi r3,0
-	bne 3f
-	ld r4,STACK_FRAME_PARAM(2)(sp)
-	ld r5,0(r4)
-	cmpwi r5,0
-	bne 2b
-
-3:	POP_VMX(STACK_FRAME_LOCAL(4,0),r4)
-	POP_BASIC_STACK(512)
-	blr
-FUNC_END(preempt_vmx)
diff --git a/tools/testing/selftests/powerpc/math/vsx_asm.S b/tools/testing/selftests/powerpc/math/vsx_asm.S
deleted file mode 100644
index ffc165d984cc517009304b802ff0e535618abce1..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/math/vsx_asm.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright 2015, Cyril Bur, IBM Corp.
- */
-
-#include "basic_asm.h"
-#include "vsx_asm.h"
-
-#long check_vsx(vector int *r3);
-#This function wraps storeing VSX regs to the end of an array and a
-#call to a comparison function in C which boils down to a memcmp()
-FUNC_START(check_vsx)
-	PUSH_BASIC_STACK(32)
-	std	r3,STACK_FRAME_PARAM(0)(sp)
-	addi r3, r3, 16 * 12 #Second half of array
-	bl store_vsx
-	ld r3,STACK_FRAME_PARAM(0)(sp)
-	bl vsx_memcmp
-	POP_BASIC_STACK(32)
-	blr
-FUNC_END(check_vsx)
-
-# int preempt_vmx(vector int *varray, int *threads_starting,
-#                 int *running);
-# On starting will (atomically) decrement threads_starting as a signal
-# that the VMX have been loaded with varray. Will proceed to check the
-# validity of the VMX registers while running is not zero.
-FUNC_START(preempt_vsx)
-	PUSH_BASIC_STACK(512)
-	std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray
-	std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
-	std r5,STACK_FRAME_PARAM(2)(sp) # int *running
-
-	bl load_vsx
-	nop
-
-	sync
-	# Atomic DEC
-	ld r3,STACK_FRAME_PARAM(1)(sp)
-1:	lwarx r4,0,r3
-	addi r4,r4,-1
-	stwcx. r4,0,r3
-	bne- 1b
-
-2:	ld r3,STACK_FRAME_PARAM(0)(sp)
-	bl check_vsx
-	nop
-	cmpdi r3,0
-	bne 3f
-	ld r4,STACK_FRAME_PARAM(2)(sp)
-	ld r5,0(r4)
-	cmpwi r5,0
-	bne 2b
-
-3:	POP_BASIC_STACK(512)
-	blr
-FUNC_END(preempt_vsx)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S
deleted file mode 100644
index 4866a3a76d2268d7cfd536832eaf66c38c8a7c4b..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S
+++ /dev/null
@@ -1,271 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2014, Michael Ellerman, IBM Corp.
- */
-
-#include <ppc-asm.h>
-
-	.text
-
-FUNC_START(core_busy_loop)
-	stdu	%r1, -168(%r1)
-	std	r14, 160(%r1)
-	std	r15, 152(%r1)
-	std	r16, 144(%r1)
-	std	r17, 136(%r1)
-	std	r18, 128(%r1)
-	std	r19, 120(%r1)
-	std	r20, 112(%r1)
-	std	r21, 104(%r1)
-	std	r22, 96(%r1)
-	std	r23, 88(%r1)
-	std	r24, 80(%r1)
-	std	r25, 72(%r1)
-	std	r26, 64(%r1)
-	std	r27, 56(%r1)
-	std	r28, 48(%r1)
-	std	r29, 40(%r1)
-	std	r30, 32(%r1)
-	std	r31, 24(%r1)
-
-	li	 r3, 0x3030
-	std	 r3, -96(%r1)
-	li	 r4, 0x4040
-	std	 r4, -104(%r1)
-	li	 r5, 0x5050
-	std	 r5, -112(%r1)
-	li	 r6, 0x6060
-	std	 r6, -120(%r1)
-	li	 r7, 0x7070
-	std	 r7, -128(%r1)
-	li	 r8, 0x0808
-	std	 r8, -136(%r1)
-	li	 r9, 0x0909
-	std	 r9, -144(%r1)
-	li	r10, 0x1010
-	std	r10, -152(%r1)
-	li	r11, 0x1111
-	std	r11, -160(%r1)
-	li	r14, 0x1414
-	std	r14, -168(%r1)
-	li	r15, 0x1515
-	std	r15, -176(%r1)
-	li	r16, 0x1616
-	std	r16, -184(%r1)
-	li	r17, 0x1717
-	std	r17, -192(%r1)
-	li	r18, 0x1818
-	std	r18, -200(%r1)
-	li	r19, 0x1919
-	std	r19, -208(%r1)
-	li	r20, 0x2020
-	std	r20, -216(%r1)
-	li	r21, 0x2121
-	std	r21, -224(%r1)
-	li	r22, 0x2222
-	std	r22, -232(%r1)
-	li	r23, 0x2323
-	std	r23, -240(%r1)
-	li	r24, 0x2424
-	std	r24, -248(%r1)
-	li	r25, 0x2525
-	std	r25, -256(%r1)
-	li	r26, 0x2626
-	std	r26, -264(%r1)
-	li	r27, 0x2727
-	std	r27, -272(%r1)
-	li	r28, 0x2828
-	std	r28, -280(%r1)
-	li	r29, 0x2929
-	std	r29, -288(%r1)
-	li	r30, 0x3030
-	li	r31, 0x3131
-
-	li	r3, 0
-0:	addi	r3, r3, 1
-	cmpwi	r3, 100
-	blt	0b
-
-	/* Return 1 (fail) unless we get through all the checks */
-	li	r3, 1
-
-	/* Check none of our registers have been corrupted */
-	cmpwi	r4,  0x4040
-	bne	1f
-	cmpwi	r5,  0x5050
-	bne	1f
-	cmpwi	r6,  0x6060
-	bne	1f
-	cmpwi	r7,  0x7070
-	bne	1f
-	cmpwi	r8,  0x0808
-	bne	1f
-	cmpwi	r9,  0x0909
-	bne	1f
-	cmpwi	r10, 0x1010
-	bne	1f
-	cmpwi	r11, 0x1111
-	bne	1f
-	cmpwi	r14, 0x1414
-	bne	1f
-	cmpwi	r15, 0x1515
-	bne	1f
-	cmpwi	r16, 0x1616
-	bne	1f
-	cmpwi	r17, 0x1717
-	bne	1f
-	cmpwi	r18, 0x1818
-	bne	1f
-	cmpwi	r19, 0x1919
-	bne	1f
-	cmpwi	r20, 0x2020
-	bne	1f
-	cmpwi	r21, 0x2121
-	bne	1f
-	cmpwi	r22, 0x2222
-	bne	1f
-	cmpwi	r23, 0x2323
-	bne	1f
-	cmpwi	r24, 0x2424
-	bne	1f
-	cmpwi	r25, 0x2525
-	bne	1f
-	cmpwi	r26, 0x2626
-	bne	1f
-	cmpwi	r27, 0x2727
-	bne	1f
-	cmpwi	r28, 0x2828
-	bne	1f
-	cmpwi	r29, 0x2929
-	bne	1f
-	cmpwi	r30, 0x3030
-	bne	1f
-	cmpwi	r31, 0x3131
-	bne	1f
-
-	/* Load junk into all our registers before we reload them from the stack. */
-	li	r3,  0xde
-	li	r4,  0xad
-	li	r5,  0xbe
-	li	r6,  0xef
-	li	r7,  0xde
-	li	r8,  0xad
-	li	r9,  0xbe
-	li	r10, 0xef
-	li	r11, 0xde
-	li	r14, 0xad
-	li	r15, 0xbe
-	li	r16, 0xef
-	li	r17, 0xde
-	li	r18, 0xad
-	li	r19, 0xbe
-	li	r20, 0xef
-	li	r21, 0xde
-	li	r22, 0xad
-	li	r23, 0xbe
-	li	r24, 0xef
-	li	r25, 0xde
-	li	r26, 0xad
-	li	r27, 0xbe
-	li	r28, 0xef
-	li	r29, 0xdd
-
-	ld	r3,	-96(%r1)
-	cmpwi	r3,  0x3030
-	bne	1f
-	ld	r4,	-104(%r1)
-	cmpwi	r4,  0x4040
-	bne	1f
-	ld	r5,	-112(%r1)
-	cmpwi	r5,  0x5050
-	bne	1f
-	ld	r6,	-120(%r1)
-	cmpwi	r6,  0x6060
-	bne	1f
-	ld	r7,	-128(%r1)
-	cmpwi	r7,  0x7070
-	bne	1f
-	ld	r8,	-136(%r1)
-	cmpwi	r8,  0x0808
-	bne	1f
-	ld	r9,	-144(%r1)
-	cmpwi	r9,  0x0909
-	bne	1f
-	ld	r10, -152(%r1)
-	cmpwi	r10, 0x1010
-	bne	1f
-	ld	r11, -160(%r1)
-	cmpwi	r11, 0x1111
-	bne	1f
-	ld	r14, -168(%r1)
-	cmpwi	r14, 0x1414
-	bne	1f
-	ld	r15, -176(%r1)
-	cmpwi	r15, 0x1515
-	bne	1f
-	ld	r16, -184(%r1)
-	cmpwi	r16, 0x1616
-	bne	1f
-	ld	r17, -192(%r1)
-	cmpwi	r17, 0x1717
-	bne	1f
-	ld	r18, -200(%r1)
-	cmpwi	r18, 0x1818
-	bne	1f
-	ld	r19, -208(%r1)
-	cmpwi	r19, 0x1919
-	bne	1f
-	ld	r20, -216(%r1)
-	cmpwi	r20, 0x2020
-	bne	1f
-	ld	r21, -224(%r1)
-	cmpwi	r21, 0x2121
-	bne	1f
-	ld	r22, -232(%r1)
-	cmpwi	r22, 0x2222
-	bne	1f
-	ld	r23, -240(%r1)
-	cmpwi	r23, 0x2323
-	bne	1f
-	ld	r24, -248(%r1)
-	cmpwi	r24, 0x2424
-	bne	1f
-	ld	r25, -256(%r1)
-	cmpwi	r25, 0x2525
-	bne	1f
-	ld	r26, -264(%r1)
-	cmpwi	r26, 0x2626
-	bne	1f
-	ld	r27, -272(%r1)
-	cmpwi	r27, 0x2727
-	bne	1f
-	ld	r28, -280(%r1)
-	cmpwi	r28, 0x2828
-	bne	1f
-	ld	r29, -288(%r1)
-	cmpwi	r29, 0x2929
-	bne	1f
-
-	/* Load 0 (success) to return */
-	li	r3, 0
-
-1:	ld	r14, 160(%r1)
-	ld	r15, 152(%r1)
-	ld	r16, 144(%r1)
-	ld	r17, 136(%r1)
-	ld	r18, 128(%r1)
-	ld	r19, 120(%r1)
-	ld	r20, 112(%r1)
-	ld	r21, 104(%r1)
-	ld	r22, 96(%r1)
-	ld	r23, 88(%r1)
-	ld	r24, 80(%r1)
-	ld	r25, 72(%r1)
-	ld	r26, 64(%r1)
-	ld	r27, 56(%r1)
-	ld	r28, 48(%r1)
-	ld	r29, 40(%r1)
-	ld	r30, 32(%r1)
-	ld	r31, 24(%r1)
-	addi	%r1, %r1, 168
-	blr
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S b/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S
deleted file mode 100644
index c170398de91a688f06a90eea55d45a0acb980a4e..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S
+++ /dev/null
@@ -1,365 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2014, Michael Ellerman, IBM Corp.
- */
-
-#include <ppc-asm.h>
-#include "reg.h"
-
-
-/* ppc-asm.h defines most of the reg aliases, but not r1/r2. */
-#define r1 1
-#define r2 2
-
-#define RFEBB   .long 0x4c000924
-
-/* Stack layout:
- *
- *                   ^
- *  User stack       |
- *  Back chain ------+	<- r1		<-------+
- *  ...						|
- *  Red zone / ABI Gap				|
- *  ...						|
- *  vr63	<+				|
- *  vr0		 |				|
- *  VSCR	 |				|
- *  FSCR	 |				|
- *  r31		 | Save area			|
- *  r0		 |				|
- *  XER		 |				|
- *  CTR		 |				|
- *  LR		 |				|
- *  CCR		<+				|
- *  ...		<+				|
- *  LR		 | Caller frame			|
- *  CCR		 |				|
- *  Back chain	<+	<- updated r1	--------+
- *
- */
-
-#if defined(_CALL_ELF) && _CALL_ELF == 2
-#define ABIGAP		512
-#else
-#define ABIGAP		288
-#endif
-
-#define NR_GPR		32
-#define NR_SPR		6
-#define NR_VSR		64
-
-#define SAVE_AREA	((NR_GPR + NR_SPR) * 8 + (NR_VSR * 16))
-#define CALLER_FRAME	112
-
-#define STACK_FRAME	(ABIGAP + SAVE_AREA + CALLER_FRAME)
-
-#define CCR_SAVE	(CALLER_FRAME)
-#define LR_SAVE		(CCR_SAVE + 8)
-#define CTR_SAVE	(LR_SAVE  + 8)
-#define XER_SAVE	(CTR_SAVE + 8)
-#define GPR_SAVE(n)	(XER_SAVE + 8 + (8 * n))
-#define FSCR_SAVE	(GPR_SAVE(31) + 8)
-#define VSCR_SAVE	(FSCR_SAVE + 8)
-#define VSR_SAVE(n)	(VSCR_SAVE + 8 + (16 * n))
-
-#define SAVE_GPR(n)	std n,GPR_SAVE(n)(r1)
-#define REST_GPR(n)	ld  n,GPR_SAVE(n)(r1)
-#define TRASH_GPR(n)	lis n,0xaaaa
-
-#define SAVE_VSR(n, b)	li b, VSR_SAVE(n); stxvd2x n,b,r1
-#define LOAD_VSR(n, b)	li b, VSR_SAVE(n); lxvd2x  n,b,r1
-
-#define LOAD_REG_IMMEDIATE(reg,expr)	\
-	lis     reg,(expr)@highest;	\
-	ori     reg,reg,(expr)@higher;	\
-	rldicr  reg,reg,32,31;		\
-	oris    reg,reg,(expr)@h;	\
-	ori     reg,reg,(expr)@l;
-
-
-#if defined(_CALL_ELF) && _CALL_ELF == 2
-#define ENTRY_POINT(name) \
-	.type FUNC_NAME(name),@function; \
-	.globl FUNC_NAME(name); \
-	FUNC_NAME(name):
-
-#define RESTORE_TOC(name)	\
-	/* Restore our TOC pointer using our entry point */	\
-	LOAD_REG_IMMEDIATE(r12, name)				\
-0:	addis	r2,r12,(.TOC.-0b)@ha;				\
-	addi	r2,r2,(.TOC.-0b)@l;
-
-#else
-#define ENTRY_POINT(name) FUNC_START(name)
-#define RESTORE_TOC(name)	\
-	/* Restore our TOC pointer via our opd entry */	\
-	LOAD_REG_IMMEDIATE(r2, name)			\
-	ld      r2,8(r2);
-#endif
-
-    .text
-
-ENTRY_POINT(ebb_handler)
-    stdu    r1,-STACK_FRAME(r1)
-    SAVE_GPR(0)
-    mflr    r0
-    std     r0,LR_SAVE(r1)
-    mfcr    r0
-    std     r0,CCR_SAVE(r1)
-    mfctr   r0
-    std     r0,CTR_SAVE(r1)
-    mfxer   r0
-    std     r0,XER_SAVE(r1)
-    SAVE_GPR(2)
-    SAVE_GPR(3)
-    SAVE_GPR(4)
-    SAVE_GPR(5)
-    SAVE_GPR(6)
-    SAVE_GPR(7)
-    SAVE_GPR(8)
-    SAVE_GPR(9)
-    SAVE_GPR(10)
-    SAVE_GPR(11)
-    SAVE_GPR(12)
-    SAVE_GPR(13)
-    SAVE_GPR(14)
-    SAVE_GPR(15)
-    SAVE_GPR(16)
-    SAVE_GPR(17)
-    SAVE_GPR(18)
-    SAVE_GPR(19)
-    SAVE_GPR(20)
-    SAVE_GPR(21)
-    SAVE_GPR(22)
-    SAVE_GPR(23)
-    SAVE_GPR(24)
-    SAVE_GPR(25)
-    SAVE_GPR(26)
-    SAVE_GPR(27)
-    SAVE_GPR(28)
-    SAVE_GPR(29)
-    SAVE_GPR(30)
-    SAVE_GPR(31)
-    SAVE_VSR(0, r3)
-    mffs     f0
-    stfd     f0, FSCR_SAVE(r1)
-    mfvscr   f0
-    stfd     f0, VSCR_SAVE(r1)
-    SAVE_VSR(1,  r3)
-    SAVE_VSR(2,  r3)
-    SAVE_VSR(3,  r3)
-    SAVE_VSR(4,  r3)
-    SAVE_VSR(5,  r3)
-    SAVE_VSR(6,  r3)
-    SAVE_VSR(7,  r3)
-    SAVE_VSR(8,  r3)
-    SAVE_VSR(9,  r3)
-    SAVE_VSR(10, r3)
-    SAVE_VSR(11, r3)
-    SAVE_VSR(12, r3)
-    SAVE_VSR(13, r3)
-    SAVE_VSR(14, r3)
-    SAVE_VSR(15, r3)
-    SAVE_VSR(16, r3)
-    SAVE_VSR(17, r3)
-    SAVE_VSR(18, r3)
-    SAVE_VSR(19, r3)
-    SAVE_VSR(20, r3)
-    SAVE_VSR(21, r3)
-    SAVE_VSR(22, r3)
-    SAVE_VSR(23, r3)
-    SAVE_VSR(24, r3)
-    SAVE_VSR(25, r3)
-    SAVE_VSR(26, r3)
-    SAVE_VSR(27, r3)
-    SAVE_VSR(28, r3)
-    SAVE_VSR(29, r3)
-    SAVE_VSR(30, r3)
-    SAVE_VSR(31, r3)
-    SAVE_VSR(32, r3)
-    SAVE_VSR(33, r3)
-    SAVE_VSR(34, r3)
-    SAVE_VSR(35, r3)
-    SAVE_VSR(36, r3)
-    SAVE_VSR(37, r3)
-    SAVE_VSR(38, r3)
-    SAVE_VSR(39, r3)
-    SAVE_VSR(40, r3)
-    SAVE_VSR(41, r3)
-    SAVE_VSR(42, r3)
-    SAVE_VSR(43, r3)
-    SAVE_VSR(44, r3)
-    SAVE_VSR(45, r3)
-    SAVE_VSR(46, r3)
-    SAVE_VSR(47, r3)
-    SAVE_VSR(48, r3)
-    SAVE_VSR(49, r3)
-    SAVE_VSR(50, r3)
-    SAVE_VSR(51, r3)
-    SAVE_VSR(52, r3)
-    SAVE_VSR(53, r3)
-    SAVE_VSR(54, r3)
-    SAVE_VSR(55, r3)
-    SAVE_VSR(56, r3)
-    SAVE_VSR(57, r3)
-    SAVE_VSR(58, r3)
-    SAVE_VSR(59, r3)
-    SAVE_VSR(60, r3)
-    SAVE_VSR(61, r3)
-    SAVE_VSR(62, r3)
-    SAVE_VSR(63, r3)
-
-    TRASH_GPR(2)
-    TRASH_GPR(3)
-    TRASH_GPR(4)
-    TRASH_GPR(5)
-    TRASH_GPR(6)
-    TRASH_GPR(7)
-    TRASH_GPR(8)
-    TRASH_GPR(9)
-    TRASH_GPR(10)
-    TRASH_GPR(11)
-    TRASH_GPR(12)
-    TRASH_GPR(14)
-    TRASH_GPR(15)
-    TRASH_GPR(16)
-    TRASH_GPR(17)
-    TRASH_GPR(18)
-    TRASH_GPR(19)
-    TRASH_GPR(20)
-    TRASH_GPR(21)
-    TRASH_GPR(22)
-    TRASH_GPR(23)
-    TRASH_GPR(24)
-    TRASH_GPR(25)
-    TRASH_GPR(26)
-    TRASH_GPR(27)
-    TRASH_GPR(28)
-    TRASH_GPR(29)
-    TRASH_GPR(30)
-    TRASH_GPR(31)
-
-    RESTORE_TOC(ebb_handler)
-
-    /*
-     * r13 is our TLS pointer. We leave whatever value was in there when the
-     * EBB fired. That seems to be OK because once set the TLS pointer is not
-     * changed - but presumably that could change in future.
-     */
-
-    bl      ebb_hook
-    nop
-
-    /* r2 may be changed here but we don't care */
-
-    lfd      f0, FSCR_SAVE(r1)
-    mtfsf    0xff,f0
-    lfd      f0, VSCR_SAVE(r1)
-    mtvscr   f0
-    LOAD_VSR(0, r3)
-    LOAD_VSR(1,  r3)
-    LOAD_VSR(2,  r3)
-    LOAD_VSR(3,  r3)
-    LOAD_VSR(4,  r3)
-    LOAD_VSR(5,  r3)
-    LOAD_VSR(6,  r3)
-    LOAD_VSR(7,  r3)
-    LOAD_VSR(8,  r3)
-    LOAD_VSR(9,  r3)
-    LOAD_VSR(10, r3)
-    LOAD_VSR(11, r3)
-    LOAD_VSR(12, r3)
-    LOAD_VSR(13, r3)
-    LOAD_VSR(14, r3)
-    LOAD_VSR(15, r3)
-    LOAD_VSR(16, r3)
-    LOAD_VSR(17, r3)
-    LOAD_VSR(18, r3)
-    LOAD_VSR(19, r3)
-    LOAD_VSR(20, r3)
-    LOAD_VSR(21, r3)
-    LOAD_VSR(22, r3)
-    LOAD_VSR(23, r3)
-    LOAD_VSR(24, r3)
-    LOAD_VSR(25, r3)
-    LOAD_VSR(26, r3)
-    LOAD_VSR(27, r3)
-    LOAD_VSR(28, r3)
-    LOAD_VSR(29, r3)
-    LOAD_VSR(30, r3)
-    LOAD_VSR(31, r3)
-    LOAD_VSR(32, r3)
-    LOAD_VSR(33, r3)
-    LOAD_VSR(34, r3)
-    LOAD_VSR(35, r3)
-    LOAD_VSR(36, r3)
-    LOAD_VSR(37, r3)
-    LOAD_VSR(38, r3)
-    LOAD_VSR(39, r3)
-    LOAD_VSR(40, r3)
-    LOAD_VSR(41, r3)
-    LOAD_VSR(42, r3)
-    LOAD_VSR(43, r3)
-    LOAD_VSR(44, r3)
-    LOAD_VSR(45, r3)
-    LOAD_VSR(46, r3)
-    LOAD_VSR(47, r3)
-    LOAD_VSR(48, r3)
-    LOAD_VSR(49, r3)
-    LOAD_VSR(50, r3)
-    LOAD_VSR(51, r3)
-    LOAD_VSR(52, r3)
-    LOAD_VSR(53, r3)
-    LOAD_VSR(54, r3)
-    LOAD_VSR(55, r3)
-    LOAD_VSR(56, r3)
-    LOAD_VSR(57, r3)
-    LOAD_VSR(58, r3)
-    LOAD_VSR(59, r3)
-    LOAD_VSR(60, r3)
-    LOAD_VSR(61, r3)
-    LOAD_VSR(62, r3)
-    LOAD_VSR(63, r3)
-
-    ld      r0,XER_SAVE(r1)
-    mtxer   r0
-    ld      r0,CTR_SAVE(r1)
-    mtctr   r0
-    ld      r0,LR_SAVE(r1)
-    mtlr    r0
-    ld      r0,CCR_SAVE(r1)
-    mtcr    r0
-    REST_GPR(0)
-    REST_GPR(2)
-    REST_GPR(3)
-    REST_GPR(4)
-    REST_GPR(5)
-    REST_GPR(6)
-    REST_GPR(7)
-    REST_GPR(8)
-    REST_GPR(9)
-    REST_GPR(10)
-    REST_GPR(11)
-    REST_GPR(12)
-    REST_GPR(13)
-    REST_GPR(14)
-    REST_GPR(15)
-    REST_GPR(16)
-    REST_GPR(17)
-    REST_GPR(18)
-    REST_GPR(19)
-    REST_GPR(20)
-    REST_GPR(21)
-    REST_GPR(22)
-    REST_GPR(23)
-    REST_GPR(24)
-    REST_GPR(25)
-    REST_GPR(26)
-    REST_GPR(27)
-    REST_GPR(28)
-    REST_GPR(29)
-    REST_GPR(30)
-    REST_GPR(31)
-    addi    r1,r1,STACK_FRAME
-    RFEBB
-FUNC_END(ebb_handler)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
deleted file mode 100644
index 08a7b5f133b9a9c143212b9dcbd7669824a0b9c9..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2014, Michael Ellerman, IBM Corp.
- */
-
-#include <ppc-asm.h>
-
-	.text
-
-FUNC_START(thirty_two_instruction_loop)
-	cmpwi	r3,0
-	beqlr
-	addi	r4,r3,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1	# 28 addi's
-	subi	r3,r3,1
-	b	FUNC_NAME(thirty_two_instruction_loop)
-FUNC_END(thirty_two_instruction_loop)
diff --git a/tools/testing/selftests/powerpc/pmu/loop.S b/tools/testing/selftests/powerpc/pmu/loop.S
deleted file mode 100644
index 8cc9b5e2c9deadb9aebb5bedbb1d8207a23ebad9..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/pmu/loop.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2013, Michael Ellerman, IBM Corp.
- */
-
-#include <ppc-asm.h>
-
-	.text
-
-FUNC_START(thirty_two_instruction_loop)
-	cmpdi	r3,0
-	beqlr
-	addi	r4,r3,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1
-	addi	r4,r4,1	# 28 addi's
-	subi	r3,r3,1
-	b	FUNC_NAME(thirty_two_instruction_loop)
-FUNC_END(thirty_two_instruction_loop)
diff --git a/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h b/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h
deleted file mode 120000
index b14255e15a252d25d4ff9f30a70bbd69e8c9e5c0..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h
+++ /dev/null
@@ -1 +0,0 @@
-../.././../../../../arch/powerpc/include/asm/asm-compat.h
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h b/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h
new file mode 100644
index 0000000000000000000000000000000000000000..b14255e15a252d25d4ff9f30a70bbd69e8c9e5c0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h
@@ -0,0 +1 @@
+../.././../../../../arch/powerpc/include/asm/asm-compat.h
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/asm-const.h b/tools/testing/selftests/powerpc/primitives/asm/asm-const.h
deleted file mode 120000
index 18d8be13e67fb5bd99a517652cda05c55611979b..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/primitives/asm/asm-const.h
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../arch/powerpc/include/asm/asm-const.h
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/asm-const.h b/tools/testing/selftests/powerpc/primitives/asm/asm-const.h
new file mode 100644
index 0000000000000000000000000000000000000000..18d8be13e67fb5bd99a517652cda05c55611979b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/asm-const.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/asm-const.h
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h b/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h
deleted file mode 120000
index 8dc6d4d46e8e7af9a70fc59fe184266baa294713..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../arch/powerpc/include/asm/feature-fixups.h
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h b/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h
new file mode 100644
index 0000000000000000000000000000000000000000..8dc6d4d46e8e7af9a70fc59fe184266baa294713
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/feature-fixups.h
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h b/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h
deleted file mode 120000
index 66c8193224e92011d1374892b4d5e48079386d67..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../arch/powerpc/include/asm/ppc_asm.h
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h b/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h
new file mode 100644
index 0000000000000000000000000000000000000000..66c8193224e92011d1374892b4d5e48079386d67
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/ppc_asm.h
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/word-at-a-time.h b/tools/testing/selftests/powerpc/primitives/word-at-a-time.h
deleted file mode 120000
index eb74401b591f764a8ff4a36a70a4da4fd896be3b..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/primitives/word-at-a-time.h
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/include/asm/word-at-a-time.h
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/word-at-a-time.h b/tools/testing/selftests/powerpc/primitives/word-at-a-time.h
new file mode 100644
index 0000000000000000000000000000000000000000..eb74401b591f764a8ff4a36a70a4da4fd896be3b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/word-at-a-time.h
@@ -0,0 +1 @@
+../../../../../arch/powerpc/include/asm/word-at-a-time.h
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/scripts/hmi.sh b/tools/testing/selftests/powerpc/scripts/hmi.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/powerpc/signal/signal.S b/tools/testing/selftests/powerpc/signal/signal.S
deleted file mode 100644
index 228fba49935d03bb26a5e161b6c7e336dc1a3491..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/signal/signal.S
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright 2015, Cyril Bur, IBM Corp.
- */
-
-#include "basic_asm.h"
-
-/* long signal_self(pid_t pid, int sig); */
-FUNC_START(signal_self)
-	li	r0,37 /* sys_kill */
-	/* r3 already has our pid in it */
-	/* r4 already has signal type in it */
-	sc
-	bc	4,3,1f
-	subfze	r3,r3
-1:	blr
-FUNC_END(signal_self)
-
-/* long tm_signal_self(pid_t pid, int sig, int *ret); */
-FUNC_START(tm_signal_self)
-	PUSH_BASIC_STACK(8)
-	std	r5,STACK_FRAME_PARAM(0)(sp) /* ret */
-	tbegin.
-	beq	1f
-	tsuspend.
-	li	r0,37 /* sys_kill */
-	/* r3 already has our pid in it */
-	/* r4 already has signal type in it */
-	sc
-	ld	r5,STACK_FRAME_PARAM(0)(sp) /* ret */
-	bc	4,3,2f
-	subfze	r3,r3
-2:	std	r3,0(r5)
-	tabort. 0
-	tresume. /* Be nice to some cleanup, jumps back to tbegin then to 1: */
-	/*
-	 * Transaction should be proper doomed and we should never get
-	 * here
-	 */
-	li	r3,1
-	POP_BASIC_STACK(8)
-	blr
-1:	li	r3,0
-	POP_BASIC_STACK(8)
-	blr
-FUNC_END(tm_signal_self)
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp_32.S b/tools/testing/selftests/powerpc/stringloops/memcmp_32.S
deleted file mode 120000
index 056f2b3af7898f339592d3f352cd663119361d93..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/stringloops/memcmp_32.S
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/lib/memcmp_32.S
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp_64.S b/tools/testing/selftests/powerpc/stringloops/memcmp_64.S
deleted file mode 120000
index 9bc87e438ae9829f83e318379c5c669a4d0e0790..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/stringloops/memcmp_64.S
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/lib/memcmp_64.S
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/stringloops/strlen_32.S b/tools/testing/selftests/powerpc/stringloops/strlen_32.S
deleted file mode 120000
index 72b13731b24c171b6e6062b0776248dcc2ddcaee..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/stringloops/strlen_32.S
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/lib/strlen_32.S
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/switch_endian/check.S b/tools/testing/selftests/powerpc/switch_endian/check.S
deleted file mode 100644
index 927a5c675e832ea8b30a6f63a28b318842d5563a..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/switch_endian/check.S
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include "common.h"
-
-/*
- * Checks that registers contain what we expect, ie. they were not clobbered by
- * the syscall.
- *
- * r15: pattern to check registers against.
- *
- * At the end r3 == 0 if everything's OK.
- */
-	nop			# guaranteed to be illegal in reverse-endian
-	mr	r9,r15
-	cmpd    r9,r3		# check r3
-	bne     1f
-	addi    r9,r15,4	# check r4
-	cmpd    r9,r4
-	bne     1f
-	lis     r9,0x00FF	# check CR
-	ori     r9,r9,0xF000
-	mfcr    r10
-	and     r10,r10,r9
-	cmpw    r9,r10
-	addi    r9,r15,34
-	bne     1f
-	addi    r9,r15,32	# check LR
-	mflr    r10
-	cmpd    r9,r10
-	bne     1f
-	addi    r9,r15,5	# check r5
-	cmpd    r9,r5
-	bne     1f
-	addi    r9,r15,6	# check r6
-	cmpd    r9,r6
-	bne     1f
-	addi    r9,r15,7	# check r7
-	cmpd    r9,r7
-	bne     1f
-	addi    r9,r15,8	# check r8
-	cmpd    r9,r8
-	bne     1f
-	addi    r9,r15,13	# check r13
-	cmpd    r9,r13
-	bne     1f
-	addi    r9,r15,14	# check r14
-	cmpd    r9,r14
-	bne     1f
-	addi    r9,r15,16	# check r16
-	cmpd    r9,r16
-	bne     1f
-	addi    r9,r15,17	# check r17
-	cmpd    r9,r17
-	bne     1f
-	addi    r9,r15,18	# check r18
-	cmpd    r9,r18
-	bne     1f
-	addi    r9,r15,19	# check r19
-	cmpd    r9,r19
-	bne     1f
-	addi    r9,r15,20	# check r20
-	cmpd    r9,r20
-	bne     1f
-	addi    r9,r15,21	# check r21
-	cmpd    r9,r21
-	bne     1f
-	addi    r9,r15,22	# check r22
-	cmpd    r9,r22
-	bne     1f
-	addi    r9,r15,23	# check r23
-	cmpd    r9,r23
-	bne     1f
-	addi    r9,r15,24	# check r24
-	cmpd    r9,r24
-	bne     1f
-	addi    r9,r15,25	# check r25
-	cmpd    r9,r25
-	bne     1f
-	addi    r9,r15,26	# check r26
-	cmpd    r9,r26
-	bne     1f
-	addi    r9,r15,27	# check r27
-	cmpd    r9,r27
-	bne     1f
-	addi    r9,r15,28	# check r28
-	cmpd    r9,r28
-	bne     1f
-	addi    r9,r15,29	# check r29
-	cmpd    r9,r29
-	bne     1f
-	addi    r9,r15,30	# check r30
-	cmpd    r9,r30
-	bne     1f
-	addi    r9,r15,31	# check r31
-	cmpd    r9,r31
-	bne     1f
-	b	2f
-1:	mr	r3, r9
-	li	r0, __NR_exit
-	sc
-2:	li	r0, __NR_switch_endian
-	nop
diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
deleted file mode 100644
index cc493046723511bd961ad41b422e1a476a4debd9..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
+++ /dev/null
@@ -1,82 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include "common.h"
-
-	.data
-	.balign 8
-message:
-	.ascii "success: switch_endian_test\n\0"
-
-	.section ".toc"
-	.balign 8
-pattern:
-	.8byte 0x5555AAAA5555AAAA
-
-	.text
-FUNC_START(_start)
-	/* Load the pattern */
-	ld	r15, pattern@TOC(%r2)
-
-	/* Setup CR, only CR2-CR4 are maintained */
-	lis	r3, 0x00FF
-	ori	r3, r3, 0xF000
-	mtcr	r3
-
-	/* Load the pattern slightly modified into the registers */
-	mr	r3, r15
-	addi	r4, r15, 4
-
-	addi	r5, r15, 32
-	mtlr	r5
-
-	addi	r5, r15, 5
-	addi	r6, r15, 6
-	addi	r7, r15, 7
-	addi	r8, r15, 8
-
-	/* r9 - r12 are clobbered */
-
-	addi	r13, r15, 13
-	addi	r14, r15, 14
-
-	/* Skip r15 we're using it */
-
-	addi	r16, r15, 16
-	addi	r17, r15, 17
-	addi	r18, r15, 18
-	addi	r19, r15, 19
-	addi	r20, r15, 20
-	addi	r21, r15, 21
-	addi	r22, r15, 22
-	addi	r23, r15, 23
-	addi	r24, r15, 24
-	addi	r25, r15, 25
-	addi	r26, r15, 26
-	addi	r27, r15, 27
-	addi	r28, r15, 28
-	addi	r29, r15, 29
-	addi	r30, r15, 30
-	addi	r31, r15, 31
-
-	/*
-	 * Call the syscall to switch endian.
-	 * It clobbers r9-r12, XER, CTR and CR0-1,5-7.
-	 */
-	li r0, __NR_switch_endian
-	sc
-
-#include "check-reversed.S"
-
-	/* Flip back, r0 already has the switch syscall number */
-	.long	0x02000044	/* sc */
-
-#include "check.S"
-
-	li	r0, __NR_write
-	li	r3, 1	/* stdout */
-	ld	r4, message@got(%r2)
-	li	r5, 28	/* strlen(message3) */
-	sc
-	li      r0, __NR_exit
-	li	r3, 0
-	sc
-	b       .
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal.S b/tools/testing/selftests/powerpc/tm/tm-signal.S
deleted file mode 100644
index c80c9136601b72b42983d65a31a73225bc86869a..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/tm/tm-signal.S
+++ /dev/null
@@ -1,110 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright 2015, Cyril Bur, IBM Corp.
- */
-
-#include "basic_asm.h"
-#include "gpr_asm.h"
-#include "fpu_asm.h"
-#include "vmx_asm.h"
-#include "vsx_asm.h"
-
-/*
- * Large caveat here being that the caller cannot expect the
- * signal to always be sent! The hardware can (AND WILL!) abort
- * the transaction between the tbegin and the tsuspend (however
- * unlikely it seems or infrequently it actually happens).
- * You have been warned.
- */
-/* long tm_signal_self(pid_t pid, long *gprs, double *fps, vector *vms, vector *vss); */
-FUNC_START(tm_signal_self_context_load)
-	PUSH_BASIC_STACK(512)
-	/*
-	 * Don't strictly need to save and restore as it depends on if
-	 * we're going to use them, however this reduces messy logic
-	 */
-	PUSH_VMX(STACK_FRAME_LOCAL(5,0),r8)
-	PUSH_FPU(512)
-	PUSH_NVREGS_BELOW_FPU(512)
-	std r3, STACK_FRAME_PARAM(0)(sp) /* pid */
-	std r4, STACK_FRAME_PARAM(1)(sp) /* gps */
-	std r5, STACK_FRAME_PARAM(2)(sp) /* fps */
-	std r6, STACK_FRAME_PARAM(3)(sp) /* vms */
-	std r7, STACK_FRAME_PARAM(4)(sp) /* vss */
-
-	ld r3, STACK_FRAME_PARAM(1)(sp)
-	cmpdi r3, 0
-	beq skip_gpr_lc
-	bl load_gpr
-skip_gpr_lc:
-	ld r3, STACK_FRAME_PARAM(2)(sp)
-	cmpdi	r3, 0
-	beq	skip_fpu_lc
-	bl load_fpu
-skip_fpu_lc:
-	ld r3, STACK_FRAME_PARAM(3)(sp)
-	cmpdi r3, 0
-	beq	skip_vmx_lc
-	bl load_vmx
-skip_vmx_lc:
-	ld r3, STACK_FRAME_PARAM(4)(sp)
-	cmpdi	r3, 0
-	beq	skip_vsx_lc
-	bl load_vsx
-skip_vsx_lc:
-	/*
-	 * Set r3 (return value) before tbegin. Use the pid as a known
-	 * 'all good' return value, zero is used to indicate a non-doomed
-	 * transaction.
-	 */
-	ld	r3, STACK_FRAME_PARAM(0)(sp)
-	tbegin.
-	beq	1f
-	tsuspend. /* Can't enter a syscall transactionally */
-	ld	r3, STACK_FRAME_PARAM(1)(sp)
-	cmpdi	r3, 0
-	beq skip_gpr_lt
-	/* Get the second half of the array */
-	addi	r3, r3, 8 * 18
-	bl load_gpr
-skip_gpr_lt:
-	ld r3, STACK_FRAME_PARAM(2)(sp)
-	cmpdi	r3, 0
-	beq	skip_fpu_lt
-	/* Get the second half of the array */
-	addi	r3, r3, 8 * 18
-	bl load_fpu
-skip_fpu_lt:
-	ld r3, STACK_FRAME_PARAM(3)(sp)
-	cmpdi r3, 0
-	beq	skip_vmx_lt
-	/* Get the second half of the array */
-	addi	r3, r3, 16 * 12
-	bl load_vmx
-skip_vmx_lt:
-	ld r3, STACK_FRAME_PARAM(4)(sp)
-	cmpdi	r3, 0
-	beq	skip_vsx_lt
-	/* Get the second half of the array */
-	addi	r3, r3, 16 * 12
-	bl load_vsx
-skip_vsx_lt:
-	li	r0, 37 /* sys_kill */
-	ld r3, STACK_FRAME_PARAM(0)(sp) /* pid */
-	li r4, 10 /* SIGUSR1 */
-	sc /* Taking the signal will doom the transaction */
-	tabort. 0
-	tresume. /* Be super sure we abort */
-	/*
-	 * This will cause us to resume doomed transaction and cause
-	 * hardware to cleanup, we'll end up at 1: anything between
-	 * tresume. and 1: shouldn't ever run.
-	 */
-	li r3, 0
-	1:
-	POP_VMX(STACK_FRAME_LOCAL(5,0),r4)
-	POP_FPU(512)
-	POP_NVREGS_BELOW_FPU(512)
-	POP_BASIC_STACK(512)
-	blr
-FUNC_END(tm_signal_self_context_load)
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
deleted file mode 100644
index bd1ca25febe4c71543a646a901c0470118307063..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <ppc-asm.h>
-#include <asm/unistd.h>
-
-	.text
-FUNC_START(getppid_tm_active)
-	tbegin.
-	beq 1f
-	li	r0, __NR_getppid
-	sc
-	tend.
-	blr
-1:
-	li	r3, -1
-	blr
-
-FUNC_START(getppid_tm_suspended)
-	tbegin.
-	beq 1f
-	li	r0, __NR_getppid
-	tsuspend.
-	sc
-	tresume.
-	tend.
-	blr
-1:
-	li	r3, -1
-	blr
diff --git a/tools/testing/selftests/powerpc/vphn/asm/lppaca.h b/tools/testing/selftests/powerpc/vphn/asm/lppaca.h
deleted file mode 120000
index 942b1d00999c137842356c2e9d524c4c19b20f90..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/vphn/asm/lppaca.h
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../arch/powerpc/include/asm/lppaca.h
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/vphn/asm/lppaca.h b/tools/testing/selftests/powerpc/vphn/asm/lppaca.h
new file mode 100644
index 0000000000000000000000000000000000000000..942b1d00999c137842356c2e9d524c4c19b20f90
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/asm/lppaca.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/lppaca.h
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/vphn/vphn.c b/tools/testing/selftests/powerpc/vphn/vphn.c
deleted file mode 120000
index 5b5fbddccabd93bb4a683777683d6bf765640859..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/powerpc/vphn/vphn.c
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/platforms/pseries/vphn.c
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/vphn/vphn.c b/tools/testing/selftests/powerpc/vphn/vphn.c
new file mode 100644
index 0000000000000000000000000000000000000000..5b5fbddccabd93bb4a683777683d6bf765640859
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/vphn.c
@@ -0,0 +1 @@
+../../../../../arch/powerpc/platforms/pseries/vphn.c
\ No newline at end of file
diff --git a/tools/testing/selftests/pstore/common_tests b/tools/testing/selftests/pstore/common_tests
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/pstore/pstore_crash_test b/tools/testing/selftests/pstore/pstore_crash_test
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/pstore/pstore_post_reboot_tests b/tools/testing/selftests/pstore/pstore_post_reboot_tests
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/pstore/pstore_tests b/tools/testing/selftests/pstore/pstore_tests
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/ptp/phc.sh b/tools/testing/selftests/ptp/phc.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh b/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/config_override.sh b/tools/testing/selftests/rcutorture/bin/config_override.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/safesetid/safesetid-test.sh b/tools/testing/selftests/safesetid/safesetid-test.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/sgx/call.S b/tools/testing/selftests/sgx/call.S
deleted file mode 100644
index 4ecadc7490f487ae4348ab6156855f5cdcf7d3b9..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/sgx/call.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/**
-* Copyright(c) 2016-20 Intel Corporation.
-*/
-
-	.text
-
-	.global sgx_call_vdso
-sgx_call_vdso:
-	.cfi_startproc
-	push	%r15
-	.cfi_adjust_cfa_offset	8
-	.cfi_rel_offset		%r15, 0
-	push	%r14
-	.cfi_adjust_cfa_offset	8
-	.cfi_rel_offset		%r14, 0
-	push	%r13
-	.cfi_adjust_cfa_offset	8
-	.cfi_rel_offset		%r13, 0
-	push	%r12
-	.cfi_adjust_cfa_offset	8
-	.cfi_rel_offset		%r12, 0
-	push	%rbx
-	.cfi_adjust_cfa_offset	8
-	.cfi_rel_offset		%rbx, 0
-	push	$0
-	.cfi_adjust_cfa_offset	8
-	push	0x38(%rsp)
-	.cfi_adjust_cfa_offset	8
-	call	*eenter(%rip)
-	add	$0x10, %rsp
-	.cfi_adjust_cfa_offset	-0x10
-	pop	%rbx
-	.cfi_adjust_cfa_offset	-8
-	pop	%r12
-	.cfi_adjust_cfa_offset	-8
-	pop	%r13
-	.cfi_adjust_cfa_offset	-8
-	pop	%r14
-	.cfi_adjust_cfa_offset	-8
-	pop	%r15
-	.cfi_adjust_cfa_offset	-8
-	ret
-	.cfi_endproc
diff --git a/tools/testing/selftests/sgx/sign_key.S b/tools/testing/selftests/sgx/sign_key.S
deleted file mode 100644
index e4fbe948444a183c330b16cf44519a68d0732522..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/sgx/sign_key.S
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/**
-* Copyright(c) 2016-20 Intel Corporation.
-*/
-
-    .section ".rodata", "a"
-
-sign_key:
-    .globl sign_key
-    .incbin "sign_key.pem"
-sign_key_end:
-    .globl sign_key_end
diff --git a/tools/testing/selftests/sgx/test_encl_bootstrap.S b/tools/testing/selftests/sgx/test_encl_bootstrap.S
deleted file mode 100644
index 5d5680d4ea3962f0e0ab60f2635711284a00e1a4..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/sgx/test_encl_bootstrap.S
+++ /dev/null
@@ -1,89 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright(c) 2016-20 Intel Corporation.
- */
-
-	.macro ENCLU
-	.byte 0x0f, 0x01, 0xd7
-	.endm
-
-	.section ".tcs", "aw"
-	.balign	4096
-
-	.fill	1, 8, 0			# STATE (set by CPU)
-	.fill	1, 8, 0			# FLAGS
-	.quad	encl_ssa		# OSSA
-	.fill	1, 4, 0			# CSSA (set by CPU)
-	.fill	1, 4, 1			# NSSA
-	.quad	encl_entry		# OENTRY
-	.fill	1, 8, 0			# AEP (set by EENTER and ERESUME)
-	.fill	1, 8, 0			# OFSBASE
-	.fill	1, 8, 0			# OGSBASE
-	.fill	1, 4, 0xFFFFFFFF 	# FSLIMIT
-	.fill	1, 4, 0xFFFFFFFF	# GSLIMIT
-	.fill	4024, 1, 0		# Reserved
-
-	# Identical to the previous TCS.
-	.fill	1, 8, 0			# STATE (set by CPU)
-	.fill	1, 8, 0			# FLAGS
-	.quad	encl_ssa		# OSSA
-	.fill	1, 4, 0			# CSSA (set by CPU)
-	.fill	1, 4, 1			# NSSA
-	.quad	encl_entry		# OENTRY
-	.fill	1, 8, 0			# AEP (set by EENTER and ERESUME)
-	.fill	1, 8, 0			# OFSBASE
-	.fill	1, 8, 0			# OGSBASE
-	.fill	1, 4, 0xFFFFFFFF 	# FSLIMIT
-	.fill	1, 4, 0xFFFFFFFF	# GSLIMIT
-	.fill	4024, 1, 0		# Reserved
-
-	.text
-
-encl_entry:
-	# RBX contains the base address for TCS, which is also the first address
-	# inside the enclave. By adding the value of le_stack_end to it, we get
-	# the absolute address for the stack.
-	lea	(encl_stack)(%rbx), %rax
-	xchg	%rsp, %rax
-	push	%rax
-
-	push	%rcx # push the address after EENTER
-	push	%rbx # push the enclave base address
-
-	call	encl_body
-
-	pop	%rbx # pop the enclave base address
-
-	/* Clear volatile GPRs, except RAX (EEXIT function). */
-	xor     %rcx, %rcx
-	xor     %rdx, %rdx
-	xor     %rdi, %rdi
-	xor     %rsi, %rsi
-	xor     %r8, %r8
-	xor     %r9, %r9
-	xor     %r10, %r10
-	xor     %r11, %r11
-
-	# Reset status flags.
-	add     %rdx, %rdx # OF = SF = AF = CF = 0; ZF = PF = 1
-
-	# Prepare EEXIT target by popping the address of the instruction after
-	# EENTER to RBX.
-	pop	%rbx
-
-	# Restore the caller stack.
-	pop	%rax
-	mov	%rax, %rsp
-
-	# EEXIT
-	mov	$4, %rax
-	enclu
-
-	.section ".data", "aw"
-
-encl_ssa:
-	.space 4096
-
-	.balign 4096
-	.space 8192
-encl_stack:
diff --git a/tools/testing/selftests/sparc64/drivers/drivers_test.sh b/tools/testing/selftests/sparc64/drivers/drivers_test.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/sparc64/run.sh b/tools/testing/selftests/sparc64/run.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/splice/default_file_splice_read.sh b/tools/testing/selftests/splice/default_file_splice_read.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/tc-testing/tdc_multibatch.py b/tools/testing/selftests/tc-testing/tdc_multibatch.py
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/tpm2/test_space.sh b/tools/testing/selftests/tpm2/test_space.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/user/test_user_copy.sh b/tools/testing/selftests/user/test_user_copy.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/vm/test_vmalloc.sh b/tools/testing/selftests/vm/test_vmalloc.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/x86/raw_syscall_helper_32.S b/tools/testing/selftests/x86/raw_syscall_helper_32.S
deleted file mode 100644
index 94410fa2b5ed3efe631ec478c9737cb711d36474..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/x86/raw_syscall_helper_32.S
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-.global sys32_helper
-sys32_helper:
-	/* Args: syscall_args_32*, function pointer */
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%esi
-	pushl	%edi
-	movl	5*4(%esp), %eax	/* pointer to args struct */
-
-	movl	1*4(%eax), %ebx
-	movl	2*4(%eax), %ecx
-	movl	3*4(%eax), %edx
-	movl	4*4(%eax), %esi
-	movl	5*4(%eax), %edi
-	movl	6*4(%eax), %ebp
-	movl	0*4(%eax), %eax
-
-	call	*(6*4)(%esp)	/* Do the syscall */
-
-	/* Now we need to recover without losing any reg values */
-	pushl	%eax
-	movl	6*4(%esp), %eax
-	popl	0*4(%eax)
-	movl	%ebx, 1*4(%eax)
-	movl	%ecx, 2*4(%eax)
-	movl	%edx, 3*4(%eax)
-	movl	%esi, 4*4(%eax)
-	movl	%edi, 5*4(%eax)
-	movl	%ebp, 6*4(%eax)
-
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-
-	.type sys32_helper, @function
-	.size sys32_helper, .-sys32_helper
-
-.global int80_and_ret
-int80_and_ret:
-	int	$0x80
-	ret
-
-	.type int80_and_ret, @function
-	.size int80_and_ret, .-int80_and_ret
diff --git a/tools/testing/selftests/x86/thunks.S b/tools/testing/selftests/x86/thunks.S
deleted file mode 100644
index 1bb5d62c16a424a2aac14b70fb64546743019b80..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/x86/thunks.S
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * thunks.S - assembly helpers for mixed-bitness code
- * Copyright (c) 2015 Andrew Lutomirski
- *
- * These are little helpers that make it easier to switch bitness on
- * the fly.
- */
-
-	.text
-
-	.global call32_from_64
-	.type call32_from_64, @function
-call32_from_64:
-	// rdi: stack to use
-	// esi: function to call
-
-	// Save registers
-	pushq %rbx
-	pushq %rbp
-	pushq %r12
-	pushq %r13
-	pushq %r14
-	pushq %r15
-	pushfq
-
-	// Switch stacks
-	mov %rsp,(%rdi)
-	mov %rdi,%rsp
-
-	// Switch to compatibility mode
-	pushq $0x23  /* USER32_CS */
-	pushq $1f
-	lretq
-
-1:
-	.code32
-	// Call the function
-	call *%esi
-	// Switch back to long mode
-	jmp $0x33,$1f
-	.code64
-
-1:
-	// Restore the stack
-	mov (%rsp),%rsp
-
-	// Restore registers
-	popfq
-	popq %r15
-	popq %r14
-	popq %r13
-	popq %r12
-	popq %rbp
-	popq %rbx
-
-	ret
-
-.size call32_from_64, .-call32_from_64
diff --git a/tools/testing/selftests/x86/thunks_32.S b/tools/testing/selftests/x86/thunks_32.S
deleted file mode 100644
index a71d92da8f46684eefb9ac43550a183ea19460f9..0000000000000000000000000000000000000000
--- a/tools/testing/selftests/x86/thunks_32.S
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * thunks_32.S - assembly helpers for mixed-bitness code
- * Copyright (c) 2015 Denys Vlasenko
- *
- * These are little helpers that make it easier to switch bitness on
- * the fly.
- */
-
-	.text
-	.code32
-
-	.global call64_from_32
-	.type call32_from_64, @function
-
-	// 4(%esp): function to call
-call64_from_32:
-	// Fetch function address
-	mov	4(%esp), %eax
-
-	// Save registers which are callee-clobbered by 64-bit ABI
-	push	%ecx
-	push	%edx
-	push	%esi
-	push	%edi
-
-	// Switch to long mode
-	jmp	$0x33,$1f
-1:	.code64
-
-	// Call the function
-	call	*%rax
-
-	// Switch to compatibility mode
-	push	$0x23  /* USER32_CS */
-	.code32; push $1f; .code64 /* hack: can't have X86_64_32S relocation in 32-bit ELF */
-	lretq
-1:	.code32
-
-	pop	%edi
-	pop	%esi
-	pop	%edx
-	pop	%ecx
-
-	ret
-
-.size call64_from_32, .-call64_from_32
diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/zram/zram02.sh b/tools/testing/selftests/zram/zram02.sh
old mode 100755
new mode 100644
diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh
old mode 100755
new mode 100644
diff --git a/tools/time/udelay_test.sh b/tools/time/udelay_test.sh
old mode 100755
new mode 100644
diff --git a/tools/usb/usbip/autogen.sh b/tools/usb/usbip/autogen.sh
old mode 100755
new mode 100644
diff --git a/tools/usb/usbip/cleanup.sh b/tools/usb/usbip/cleanup.sh
old mode 100755
new mode 100644
diff --git a/tools/usb/usbip/vudc/vudc_server_example.sh b/tools/usb/usbip/vudc/vudc_server_example.sh
old mode 100755
new mode 100644
diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh
old mode 100755
new mode 100644
diff --git a/usr/gen_initramfs_list.sh b/usr/gen_initramfs_list.sh
old mode 100755
new mode 100644
diff --git a/usr/initramfs_data.S b/usr/initramfs_data.S
deleted file mode 100644
index d07648f05bbf8068adcb65fcf68991fbfb8480e9..0000000000000000000000000000000000000000
--- a/usr/initramfs_data.S
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
-  initramfs_data includes the compressed binary that is the
-  filesystem used for early user space.
-  Note: Older versions of "as" (prior to binutils 2.11.90.0.23
-  released on 2001-07-14) dit not support .incbin.
-  If you are forced to use older binutils than that then the
-  following trick can be applied to create the resulting binary:
-
-
-  ld -m elf_i386  --format binary --oformat elf32-i386 -r \
-  -T initramfs_data.scr initramfs_data.cpio.gz -o initramfs_data.o
-   ld -m elf_i386  -r -o built-in.a initramfs_data.o
-
-  For including the .init.ramfs sections, see include/asm-generic/vmlinux.lds.
-
-  The above example is for i386 - the parameters vary from architectures.
-  Eventually look up LDFLAGS_BLOB in an older version of the
-  arch/$(ARCH)/Makefile to see the flags used before .incbin was introduced.
-
-  Using .incbin has the advantage over ld that the correct flags are set
-  in the ELF header, as required by certain architectures.
-*/
-
-#include <linux/stringify.h>
-#include <asm-generic/vmlinux.lds.h>
-
-.section .init.ramfs,"a"
-__irf_start:
-.incbin __stringify(INITRAMFS_IMAGE)
-__irf_end:
-.section .init.ramfs.info,"a"
-.globl __initramfs_size
-__initramfs_size:
-#ifdef CONFIG_64BIT
-	.quad __irf_end - __irf_start
-#else
-	.long __irf_end - __irf_start
-#endif
diff --git "a/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/ON CPU TIME\345\210\206\346\236\220.md" "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/ON CPU TIME\345\210\206\346\236\220.md"
new file mode 100644
index 0000000000000000000000000000000000000000..72e6db3f6b5b49c17ff4088824c13f8f0d2dc403
--- /dev/null
+++ "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/ON CPU TIME\345\210\206\346\236\220.md"	
@@ -0,0 +1,139 @@
+# ON CPU TIME分析
+
+用perf抓取当前系统的ON CPU Time数据，并且生成火焰图。
+
+
+
+## 1、先切换为管理员账号，方便后续操作
+
+```shell
+sudo su root
+```
+
+随后，终端会提示您输入`root`用户密码。
+
+如果没有创建`root`用户，则在后续操作中，如果执行命令后，提示`无权限`时，请在前面加上，可能要输入密码：
+
+```shell
+sudo 
+```
+
+## 2、安装perf采样工具
+
+```shell
+yum install perf
+```
+
+![image-20231127134949724](image-20231127134949724.png)
+
+等待几分钟，当出现 “完毕！”时，即表示安装完成
+
+## 3、查看perf支持的采样点事件
+
+![image-20231127143742193](image-20231127143742193.png)
+
+不同的架构与CPU类型，所能够支持的采样类型也不同
+
+## 4、对perf提升权限
+
+如果这个时候使用`perf`直接执行任何监控CPU的行为，可能会出现以下报错
+
+（注：可能会报错显示权限不足，也可能不会。如果不会的，可以跳过此步骤，直接进入下一步）
+
+![image-20231127144921891](image-20231127144921891.png)
+
+这说明`perf`权限不足，我们需要对perf工具进行提升权限的操作
+
+```shell
+sysctl -w kernel.perf_event_paranoid=1
+```
+
+![image-20231127145102317](image-20231127145102317.png)
+
+并且给`perf`文件也提高权限
+
+```shell
+chmod u+s $(which perf)
+```
+
+![image-20231127145610153](image-20231127145610153.png)
+
+## 5、通过perf工具，抓取事件数据
+
+命令如下：
+
+```shell
+perf record -a -g -e <event_name(从list中选择)> -a -- sleep <time>
+```
+
+其中，
+
+`<event_name>`是采样事件类型，请从第3步的 perf list 中选择，并填入你要采样的事件类型，
+
+`<time>`是采样时间，填入你想采样的时长，
+
+`-g`参数表示让`perf`记录当前堆栈信息，以便后续操作 ，
+
+假如我们想要在60秒内采集`cpu-clock`的数据并进行分析，可执行以下命令，随后等待60秒让`perf`采样完毕
+
+```shell
+perf record -e cpu-clock -a -- sleep 60
+```
+
+如果返回内容和下图类似，即表示执行成功。执行`ls`查看目录下的文件，可看到，当前目录下已经生成了`perf.data`文件
+
+```shell
+ls
+```
+
+![image-20231127171759671](image-20231127171759671.png)
+
+这个时候，我们也可以执行下面的命令，直接查看生成的`data`采样数据
+
+```shell
+perf report -n --stdio
+```
+
+![image-20231127172033308](image-20231127172033308.png)
+
+但是这样看也太抽象了，所以，我们需要进一步，将其转换为可视化的火焰图文件
+
+## 6、安装FlameGraph火焰图工具
+
+使用`git`克隆`FlameGraph`的仓库，以便使用`FlameGraph`工具
+
+```shell
+ git clone https://github.com/brendangregg/FlameGraph.git
+```
+
+![image-20231127153654753](image-20231127153654753.png)
+
+## 7、生成火焰图
+
+执行下列命令，先将 `perf.data` 文件转换为可用于生成火焰图的堆栈跟踪
+
+```shell
+perf script | FlameGraph/stackcollapse-perf.pl > out.perf-folded
+```
+
+再执行下面的命令，将堆栈生成火焰图`svg`文件
+
+```shell
+FlameGraph/flamegraph.pl out.perf-folded > flamegraph.svg
+ls
+```
+
+即可发现，目录下已经生成的火焰图的`svg`文件
+
+![image-20231127173426661](image-20231127173426661.png)
+
+可以使用以下命令，在文件管理器中打开`root`用户所在路径，即可发现并打开查看火焰图文件
+
+```shell
+nautilus
+```
+
+![image-20231127173643083](image-20231127173643083.png)
+
+![image-20231127173756841](image-20231127173756841.png)
+
diff --git "a/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127134949724.png" "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127134949724.png"
new file mode 100644
index 0000000000000000000000000000000000000000..85610d1448cebfac58f04a6a551b0903c4aad01f
Binary files /dev/null and "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127134949724.png" differ
diff --git "a/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127143742193.png" "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127143742193.png"
new file mode 100644
index 0000000000000000000000000000000000000000..7d9ae618ceb2afdcf886d1597034380248078455
Binary files /dev/null and "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127143742193.png" differ
diff --git "a/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127144921891.png" "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127144921891.png"
new file mode 100644
index 0000000000000000000000000000000000000000..3ff7a1516ae4f3317fb2908119f68e1e106b48cb
Binary files /dev/null and "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127144921891.png" differ
diff --git "a/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127145102317.png" "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127145102317.png"
new file mode 100644
index 0000000000000000000000000000000000000000..64e90249dec65448ed5168d7ad79cc889c407dd0
Binary files /dev/null and "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127145102317.png" differ
diff --git "a/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127145610153.png" "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127145610153.png"
new file mode 100644
index 0000000000000000000000000000000000000000..e673b7fa06c92a20b313833218bae914bbb79f0d
Binary files /dev/null and "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127145610153.png" differ
diff --git "a/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127152218215.png" "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127152218215.png"
new file mode 100644
index 0000000000000000000000000000000000000000..4a5d278a3094aedd504dcd940674a924bb53f72c
Binary files /dev/null and "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127152218215.png" differ
diff --git "a/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127153540745.png" "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127153540745.png"
new file mode 100644
index 0000000000000000000000000000000000000000..6fce1d6374fece56e048e7fea2f89f1c3d69dba0
Binary files /dev/null and "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127153540745.png" differ
diff --git "a/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127153654753.png" "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127153654753.png"
new file mode 100644
index 0000000000000000000000000000000000000000..be5de4cc8919f6794255614f730d8d1f674c4da6
Binary files /dev/null and "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127153654753.png" differ
diff --git "a/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127155425558.png" "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127155425558.png"
new file mode 100644
index 0000000000000000000000000000000000000000..e39e0fac755178f1b868a0372c47f65ad8948b94
Binary files /dev/null and "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127155425558.png" differ
diff --git "a/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127160638921.png" "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127160638921.png"
new file mode 100644
index 0000000000000000000000000000000000000000..95c1d042697f2a52e8b35f235fe5a8e8102c44cc
Binary files /dev/null and "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127160638921.png" differ
diff --git "a/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127171759671.png" "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127171759671.png"
new file mode 100644
index 0000000000000000000000000000000000000000..24b92cd9c56293031367dbfe26cd2615b63100dd
Binary files /dev/null and "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127171759671.png" differ
diff --git "a/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127172033308.png" "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127172033308.png"
new file mode 100644
index 0000000000000000000000000000000000000000..a6dda4845a2ee0cc220aac625df66136518c3df6
Binary files /dev/null and "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127172033308.png" differ
diff --git "a/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127173426661.png" "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127173426661.png"
new file mode 100644
index 0000000000000000000000000000000000000000..9d3488c169e7b4cc01b98f88f396495638dbca7d
Binary files /dev/null and "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127173426661.png" differ
diff --git "a/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127173643083.png" "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127173643083.png"
new file mode 100644
index 0000000000000000000000000000000000000000..e62174fbd2a34db3460dbb6f742b996268f37457
Binary files /dev/null and "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127173643083.png" differ
diff --git "a/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127173756841.png" "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127173756841.png"
new file mode 100644
index 0000000000000000000000000000000000000000..93be4fe64c6b6c0932527e6acfff7d2bea54fa80
Binary files /dev/null and "b/\346\226\207\346\241\243\346\217\220\344\272\244/ON CPU TIME\345\210\206\346\236\220/image-20231127173756841.png" differ